4879 files changed, 269930 insertions, 41504 deletions
diff --git a/test/.clang-format b/test/.clang-format
new file mode 100644
index 000000000000..4799b66f3e9a
--- /dev/null
+++ b/test/.clang-format
@@ -0,0 +1,2 @@
+BasedOnStyle: LLVM
+ColumnLimit: 0
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
index 2c34fd5f615a..4de2daf6b2dc 100644
--- a/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -29,7 +29,9 @@ entry:
   ret i32 %tmp5.lobit
 }
 
-!0 = metadata !{metadata !"double", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
+!3 = metadata !{metadata !5, metadata !5, i64 0}
+!4 = metadata !{metadata !"double", metadata !1}
+!5 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll
index 9c2c7eeec38d..2c0d467003f2 100644
--- a/test/Analysis/BasicAA/gep-alias.ll
+++ b/test/Analysis/BasicAA/gep-alias.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -basicaa -gvn -instcombine -S 2>&1 | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 ; Make sure that basicaa thinks R and r are must aliases.
 define i32 @test1(i8 * %P) {
@@ -15,7 +15,7 @@ entry:
 
 	%t = sub i32 %S, %s
 	ret i32 %t
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
@@ -32,7 +32,7 @@ entry:
 
 	%t = sub i32 %S, %s
 	ret i32 %t
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 0
 }
 
@@ -51,7 +51,7 @@ entry:
 
 	%t = sub i32 %S, %s
 	ret i32 %t
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 0
 }
 
@@ -68,7 +68,7 @@ entry:
   store i8* null, i8** %tmp3, align 8
   %tmp4 = load i32* %tmp2, align 8
 	ret i32 %tmp4
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret i32 64
 }
 
@@ -82,7 +82,34 @@ define i32 @test5(i32* %p, i64 %i) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test5
+; CHECK-LABEL: @test5(
+; CHECK: ret i32 0
+}
+
+define i32 @test5_as1_smaller_size(i32 addrspace(1)* %p, i8 %i) {
+  %pi = getelementptr i32 addrspace(1)* %p, i8 %i
+  %i.next = add i8 %i, 1
+  %pi.next = getelementptr i32 addrspace(1)* %p, i8 %i.next
+  %x = load i32 addrspace(1)* %pi
+  store i32 42, i32 addrspace(1)* %pi.next
+  %y = load i32 addrspace(1)* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK-LABEL: @test5_as1_smaller_size(
+; CHECK: sext
+; CHECK: ret i32 0
+}
+
+define i32 @test5_as1_same_size(i32 addrspace(1)* %p, i16 %i) {
+  %pi = getelementptr i32 addrspace(1)* %p, i16 %i
+  %i.next = add i16 %i, 1
+  %pi.next = getelementptr i32 addrspace(1)* %p, i16 %i.next
+  %x = load i32 addrspace(1)* %pi
+  store i32 42, i32 addrspace(1)* %pi.next
+  %y = load i32 addrspace(1)* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK-LABEL: @test5_as1_same_size(
 ; CHECK: ret i32 0
 }
 
@@ -97,7 +124,7 @@ define i32 @test6(i32* %p, i64 %i1) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: ret i32 0
 }
 
@@ -111,7 +138,7 @@ define i32 @test7(i32* %p, i64 %i) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: ret i32 0
 }
 
@@ -128,7 +155,7 @@ define i32 @test8(i32* %p, i16 %i) {
   %y = load i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: ret i32 0
 }
 
@@ -139,7 +166,7 @@ define i8 @test9([4 x i8] *%P, i32 %i, i32 %j) {
   %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3
 
   %j2 = shl i32 %j, 2
-  
+
   ; P4 = P + 4*j
   %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %j2
 
@@ -148,7 +175,7 @@ define i8 @test9([4 x i8] *%P, i32 %i, i32 %j) {
   %y = load i8* %P2
   %z = sub i8 %x, %y
   ret i8 %z
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: ret i8 0
 }
 
@@ -157,7 +184,7 @@ define i8 @test10([4 x i8] *%P, i32 %i) {
   %i3 = add i32 %i2, 4
   ; P2 = P + 4 + 4*i
   %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3
-  
+
   ; P4 = P + 4*i
   %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %i2
 
@@ -166,7 +193,7 @@ define i8 @test10([4 x i8] *%P, i32 %i) {
   %y = load i8* %P2
   %z = sub i8 %x, %y
   ret i8 %z
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: ret i8 0
 }
 
@@ -182,8 +209,8 @@ define float @test11(i32 %indvar, [4 x [2 x float]]* %q) nounwind ssp {
   store i64 0, i64* %scevgep35, align 4
   %tmp30 = load float* %y29, align 4
   ret float %tmp30
-  ; CHECK: @test11
-  ; CHECK: ret float %tmp30
+; CHECK-LABEL: @test11(
+; CHECK: ret float %tmp30
 }
 
 ; (This was a miscompilation.)
@@ -198,6 +225,6 @@ define i32 @test12(i32 %x, i32 %y, i8* %p) nounwind {
   store i32 0, i32* %castd
   %r = load i32* %castp
   ret i32 %r
-  ; CHECK: @test12
-  ; CHECK: ret i32 %r
+; CHECK-LABEL: @test12(
+; CHECK: ret i32 %r
 }
diff --git a/test/Analysis/BasicAA/global-size.ll b/test/Analysis/BasicAA/global-size.ll
index a7e5aab6c1f2..f081cb1e0724 100644
--- a/test/Analysis/BasicAA/global-size.ll
+++ b/test/Analysis/BasicAA/global-size.ll
@@ -2,11 +2,11 @@
 ; the global.
 
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @B = global i16 8
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define i16 @test1(i32* %P) {
         %X = load i16* @B
         store i32 7, i32* %P
@@ -16,11 +16,23 @@ define i16 @test1(i32* %P) {
 ; CHECK: ret i16 0
 }
 
+@B_as1 = addrspace(1) global i16 8
+
+define i16 @test1_as1(i32 addrspace(1)* %P) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: ret i16 0
+  %X = load i16 addrspace(1)* @B_as1
+  store i32 7, i32 addrspace(1)* %P
+  %Y = load i16 addrspace(1)* @B_as1
+  %Z = sub i16 %Y, %X
+  ret i16 %Z
+}
+
 ; Cannot know anything about the size of this global.
 ; rdar://8813415
 @window = external global [0 x i8]
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i8 @test2(i32 %tmp79, i32 %w.2, i32 %indvar89) nounwind {
   %tmp92 = add i32 %tmp79, %indvar89
   %arrayidx412 = getelementptr [0 x i8]* @window, i32 0, i32 %tmp92
diff --git a/test/Analysis/BasicAA/noalias-geps.ll b/test/Analysis/BasicAA/noalias-geps.ll
index a93d778da074..f9ec71345739 100644
--- a/test/Analysis/BasicAA/noalias-geps.ll
+++ b/test/Analysis/BasicAA/noalias-geps.ll
@@ -4,6 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 ; Check that geps with equal base offsets of noalias base pointers stay noalias.
 define i32 @test(i32* %p, i16 %i) {
+; CHECK-LABEL: Function: test:
   %pi = getelementptr i32* %p, i32 0
   %pi.next = getelementptr i32* %p, i32 1
   %b = icmp eq i16 %i, 0
@@ -30,6 +31,7 @@ ret i32 0
 
 ; Check that geps with equal indices of noalias base pointers stay noalias.
 define i32 @test2([2 x i32]* %p, i32 %i) {
+; CHECK-LABEL: Function: test2:
   %pi = getelementptr [2 x i32]* %p, i32 0
   %pi.next = getelementptr [2 x i32]* %p, i32 1
   %b = icmp eq i32 %i, 0
diff --git a/test/Analysis/BasicAA/noalias-param.ll b/test/Analysis/BasicAA/noalias-param.ll
new file mode 100644
index 000000000000..6494771fc59f
--- /dev/null
+++ b/test/Analysis/BasicAA/noalias-param.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
+
+declare i32* @captures(i32* %cap) nounwind readonly
+
+define void @no(i32* noalias %a, i32* %b) nounwind {
+entry:
+  store i32 1, i32* %a 
+  %cap = call i32* @captures(i32* %a) nounwind readonly
+  %l = load i32* %b
+  ret void
+}
+
+; CHECK: NoAlias:      i32* %a, i32* %b
+
+define void @yes(i32* %c, i32* %d) nounwind {
+entry:
+  store i32 1, i32* %c 
+  %cap = call i32* @captures(i32* %c) nounwind readonly
+  %l = load i32* %d
+  ret void
+}
+
+; CHECK: MayAlias:     i32* %c, i32* %d
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index 540d06b1f562..ce29fb5ce1ba 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -2,12 +2,12 @@
 
 define i32 @test1(i32 %i, i32* %a) {
 ; CHECK: Printing analysis {{.*}} for function 'test1'
-; CHECK: entry = 1024
+; CHECK: entry = 1.0
 entry:
   br label %body
 
 ; Loop backedges are weighted and thus their bodies have a greater frequency.
-; CHECK: body = 31744
+; CHECK: body = 32.0
 body:
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
@@ -18,29 +18,29 @@ body:
   %exitcond = icmp eq i32 %next, %i
   br i1 %exitcond, label %exit, label %body
 
-; CHECK: exit = 1024
+; CHECK: exit = 1.0
 exit:
   ret i32 %sum
 }
 
 define i32 @test2(i32 %i, i32 %a, i32 %b) {
 ; CHECK: Printing analysis {{.*}} for function 'test2'
-; CHECK: entry = 1024
+; CHECK: entry = 1.0
 entry:
   %cond = icmp ult i32 %i, 42
   br i1 %cond, label %then, label %else, !prof !0
 
 ; The 'then' branch is predicted more likely via branch weight metadata.
-; CHECK: then = 963
+; CHECK: then = 0.94116
 then:
   br label %exit
 
-; CHECK: else = 60
+; CHECK: else = 0.05877
 else:
   br label %exit
 
-; FIXME: It may be a bug that we don't sum back to 1024.
-; CHECK: exit = 1023
+; FIXME: It may be a bug that we don't sum back to 1.0.
+; CHECK: exit = 0.99993
 exit:
   %result = phi i32 [ %a, %then ], [ %b, %else ]
   ret i32 %result
@@ -50,36 +50,36 @@ exit:
 
 define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; CHECK: Printing analysis {{.*}} for function 'test3'
-; CHECK: entry = 1024
+; CHECK: entry = 1.0
 entry:
   switch i32 %i, label %case_a [ i32 1, label %case_b
                                  i32 2, label %case_c
                                  i32 3, label %case_d
                                  i32 4, label %case_e ], !prof !1
 
-; CHECK: case_a = 51
+; CHECK: case_a = 0.04998
 case_a:
   br label %exit
 
-; CHECK: case_b = 51
+; CHECK: case_b = 0.04998
 case_b:
   br label %exit
 
 ; The 'case_c' branch is predicted more likely via branch weight metadata.
-; CHECK: case_c = 819
+; CHECK: case_c = 0.79998
 case_c:
   br label %exit
 
-; CHECK: case_d = 51
+; CHECK: case_d = 0.04998
 case_d:
   br label %exit
 
-; CHECK: case_e = 51
+; CHECK: case_e = 0.04998
 case_e:
   br label %exit
 
-; FIXME: It may be a bug that we don't sum back to 1024.
-; CHECK: exit = 1023
+; FIXME: It may be a bug that we don't sum back to 1.0.
+; CHECK: exit = 0.99993
 exit:
   %result = phi i32 [ %a, %case_a ],
                     [ %b, %case_b ],
@@ -90,3 +90,45 @@ exit:
 }
 
 !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+
+; CHECK: Printing analysis {{.*}} for function 'nested_loops'
+; CHECK: entry = 1.0
+; This test doesn't seem to be assigning sensible frequencies to nested loops.
+define void @nested_loops(i32 %a) {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  br label %for.cond4.preheader
+
+for.cond4.preheader:
+  %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
+  %add = add i32 %y.023, %x.024
+  br label %for.body6
+
+for.body6:
+  %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+  %add7 = add i32 %add, %z.022
+  tail call void @g(i32 %add7) #2
+  %inc = add i32 %z.022, 1
+  %cmp5 = icmp ugt i32 %inc, %a
+  br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
+
+for.inc8:
+  %inc9 = add i32 %y.023, 1
+  %cmp2 = icmp ugt i32 %inc9, %a
+  br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2
+
+for.inc11:
+  %inc12 = add i32 %x.024, 1
+  %cmp = icmp ugt i32 %inc12, %a
+  br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2
+
+for.end13:
+  ret void
+}
+
+declare void @g(i32) #1
+
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}
diff --git a/test/Analysis/BlockFrequencyInfo/lit.local.cfg b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/BlockFrequencyInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index 08adfa8a36fb..05cb31dca0ee 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -115,3 +115,100 @@ return:
 }
 
 !2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
+
+declare void @coldfunc() cold
+
+define i32 @test5(i32 %a, i32 %b, i1 %flag) {
+; CHECK: Printing analysis {{.*}} for function 'test5'
+entry:
+  br i1 %flag, label %then, label %else
+; CHECK: edge entry -> then probability is 4 / 68
+; CHECK: edge entry -> else probability is 64 / 68
+
+then:
+  call void @coldfunc()
+  br label %exit
+; CHECK: edge then -> exit probability is 16 / 16 = 100%
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 16 / 16 = 100%
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
+declare i32 @regular_function(i32 %i)
+
+define i32 @test_cold_call_sites(i32* %a) {
+; Test that edges to blocks post-dominated by cold call sites
+; are marked as not expected to be taken.
+; TODO(dnovillo) The calls to regular_function should not be merged, but
+; they are currently being merged. Convert this into a code generation test
+; after that is fixed.
+
+; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites'
+; CHECK: edge entry -> then probability is 4 / 68 = 5.88235%
+; CHECK: edge entry -> else probability is 64 / 68 = 94.1176% [HOT edge]
+
+entry:
+  %gep1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %gep1
+  %cond1 = icmp ugt i32 %val1, 1
+  br i1 %cond1, label %then, label %else
+
+then:
+  ; This function is not declared cold, but this call site is.
+  %val4 = call i32 @regular_function(i32 %val1) cold
+  br label %exit
+
+else:
+  %gep2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %gep2
+  %val3 = call i32 @regular_function(i32 %val2)
+  br label %exit
+
+exit:
+  %ret = phi i32 [ %val4, %then ], [ %val3, %else ]
+  ret i32 %ret
+}
+
+define i32 @zero1(i32 %i, i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'zero1'
+entry:
+  %cond = icmp eq i32 %i, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 12 / 32
+; CHECK: edge entry -> else probability is 20 / 32
+
+then:
+  br label %exit
+
+else:
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
+define i32 @zero2(i32 %i, i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'zero2'
+entry:
+  %cond = icmp ne i32 %i, -1
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 20 / 32
+; CHECK: edge entry -> else probability is 12 / 32
+
+then:
+  br label %exit
+
+else:
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
diff --git a/test/Analysis/BranchProbabilityInfo/lit.local.cfg b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/BranchProbabilityInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/CallGraph/lit.local.cfg b/test/Analysis/CallGraph/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/CallGraph/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/CallGraph/no-intrinsics.ll b/test/Analysis/CallGraph/no-intrinsics.ll
index 450dce58e30f..d858907d7247 100644
--- a/test/Analysis/CallGraph/no-intrinsics.ll
+++ b/test/Analysis/CallGraph/no-intrinsics.ll
@@ -10,4 +10,4 @@ define void @f(i8* %out, i8* %in) {
 }
 
 ; CHECK: Call graph node for function: 'f'
-; CHECK-NOT: calls function 'llvm.memcpy.p0i8.p0i8.i32'
-\ No newline at end of file
+; CHECK-NOT: calls function 'llvm.memcpy.p0i8.p0i8.i32'
diff --git a/test/Analysis/CostModel/ARM/lit.local.cfg b/test/Analysis/CostModel/ARM/lit.local.cfg
index cb77b09ef4ad..8a3ba96497e7 100644
--- a/test/Analysis/CostModel/ARM/lit.local.cfg
+++ b/test/Analysis/CostModel/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM/select.ll b/test/Analysis/CostModel/ARM/select.ll
index 34ed1eefdaf4..21eef83c4bbe 100644
--- a/test/Analysis/CostModel/ARM/select.ll
+++ b/test/Analysis/CostModel/ARM/select.ll
@@ -63,5 +63,13 @@ define void @casts() {
   ; CHECK: cost of 1 {{.*}} select
   %v19 = select <2 x i1>  undef, <2 x double> undef, <2 x double> undef
 
+  ; odd vectors get legalized and should have similar costs
+  ; CHECK: cost of 1 {{.*}} select
+  %v20 = select <1 x i1>  undef, <1 x i32> undef, <1 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v21 = select <3 x i1>  undef, <3 x float> undef, <3 x float> undef
+  ; CHECK: cost of 4 {{.*}} select
+  %v22 = select <5 x i1>  undef, <5 x double> undef, <5 x double> undef
+
   ret void
 }
diff --git a/test/Analysis/CostModel/PowerPC/lit.local.cfg b/test/Analysis/CostModel/PowerPC/lit.local.cfg
index 4019eca0bb88..2e463005586f 100644
--- a/test/Analysis/CostModel/PowerPC/lit.local.cfg
+++ b/test/Analysis/CostModel/PowerPC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index b69b3bf6304c..f3c1283c7e32 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -38,6 +38,10 @@ define i32 @zext_sext(<8 x i1> %in) {
   ;CHECK: cost of 9 {{.*}} sext
   %S = sext <8 x i1> %in to <8 x i32>
 
+  ;CHECK: cost of 1 {{.*}} zext
+  %A1 = zext <16 x i8> undef to <16 x i16>
+  ;CHECK: cost of 1 {{.*}} sext
+  %A2 = sext <16 x i8> undef to <16 x i16>
   ;CHECK: cost of 1 {{.*}} sext
   %A = sext <8 x i16> undef to <8 x i32>
   ;CHECK: cost of 1 {{.*}} zext
@@ -51,11 +55,13 @@ define i32 @zext_sext(<8 x i1> %in) {
 
   ;CHECK: cost of 1 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
-  ;CHECK: cost of 1 {{.*}} trunc
 
+  ;CHECK: cost of 1 {{.*}} trunc
   %E = trunc <4 x i64> undef to <4 x i32>
   ;CHECK: cost of 1 {{.*}} trunc
   %F = trunc <8 x i32> undef to <8 x i16>
+  ;CHECK: cost of 2 {{.*}} trunc
+  %F1 = trunc <16 x i16> undef to <16 x i8>
 
   ;CHECK: cost of 3 {{.*}} trunc
   %G = trunc <8 x i64> undef to <8 x i32>
diff --git a/test/Analysis/CostModel/X86/div.ll b/test/Analysis/CostModel/X86/div.ll
new file mode 100644
index 000000000000..c7d6517c7f03
--- /dev/null
+++ b/test/Analysis/CostModel/X86/div.ll
@@ -0,0 +1,32 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX2 %s
+
+
+define void @div_sse() {
+  ; SSE2: div_sse
+  ; SSE2: cost of 320 {{.*}} sdiv
+  %a0 = sdiv <16 x i8> undef, undef
+  ; SSE2: cost of 160 {{.*}} sdiv
+  %a1 = sdiv <8 x i16> undef, undef
+  ; SSE2: cost of 80 {{.*}} sdiv
+  %a2 = sdiv <4 x i32> undef, undef
+  ; SSE2: cost of 40 {{.*}} sdiv
+  %a3 = sdiv <2 x i32> undef, undef
+  ret void
+}
+; SSE2: div_avx
+
+define void @div_avx() {
+  ; AVX2: div_avx
+  ; AVX2: cost of 640 {{.*}} sdiv
+  %a0 = sdiv <32 x i8> undef, undef
+  ; AVX2: cost of 320 {{.*}} sdiv
+  %a1 = sdiv <16 x i16> undef, undef
+  ; AVX2: cost of 160 {{.*}} sdiv
+  %a2 = sdiv <8 x i32> undef, undef
+  ; AVX2: cost of 80 {{.*}} sdiv
+  %a3 = sdiv <4 x i32> undef, undef
+  ret void
+}
+
+
diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll
index e235a36222a7..8eeee8124d9a 100644
--- a/test/Analysis/CostModel/X86/intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -30,3 +30,31 @@ for.end:                                          ; preds = %vector.body
 }
 
 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)  nounwind readnone
+
+define void @test2(float* nocapture %f) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float* %f, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+  store <4 x float> %2, <4 x float>* %1, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2':
+; CORE2: Cost Model: Found an estimated cost of 400 for instruction:   %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+
+; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2':
+; COREI7: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+
+}
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)  nounwind readnone
diff --git a/test/Analysis/CostModel/X86/lit.local.cfg b/test/Analysis/CostModel/X86/lit.local.cfg
index a8ad0f1a28b2..ba763cf03ffc 100644
--- a/test/Analysis/CostModel/X86/lit.local.cfg
+++ b/test/Analysis/CostModel/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Analysis/CostModel/X86/load_store.ll b/test/Analysis/CostModel/X86/load_store.ll
index 4195b1d879a1..a53d0bd4e993 100644
--- a/test/Analysis/CostModel/X86/load_store.ll
+++ b/test/Analysis/CostModel/X86/load_store.ll
@@ -59,6 +59,25 @@ define i32 @loads(i32 %arg) {
   ;CHECK: cost of 4 {{.*}} load
   load <8 x i64>* undef, align 4
 
+
+  ;CHECK: cost of 3 {{.*}} load
+  load <3 x float>* undef, align 4
+
+  ;CHECK: cost of 3 {{.*}} load
+  load <3 x double>* undef, align 4
+
+  ;CHECK: cost of 3 {{.*}} load
+  load <3 x i32>* undef, align 4
+
+  ;CHECK: cost of 3 {{.*}} load
+  load <3 x i64>* undef, align 4
+
+  ;CHECK: cost of 10 {{.*}} load
+  load <5 x i32>* undef, align 4
+
+  ;CHECK: cost of 10 {{.*}} load
+  load <5 x i64>* undef, align 4
+
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/reduction.ll b/test/Analysis/CostModel/X86/reduction.ll
new file mode 100644
index 000000000000..78e65aee1460
--- /dev/null
+++ b/test/Analysis/CostModel/X86/reduction.ll
@@ -0,0 +1,365 @@
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=core2 -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=corei7 -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=SSE3
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=corei7-avx -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -cost-model -costmodel-reduxcost=true -analyze -mcpu=core-avx2 -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=AVX2
+
+define fastcc float @reduction_cost_float(<4 x float> %rdx) {
+  %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+
+; Check that we recognize the tree starting at the extractelement as a
+; reduction.
+; CHECK-LABEL: reduction_cost
+; CHECK:  cost of 9 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) {
+  %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef,
+   <8 x i32> <i32 4    , i32     5, i32     6, i32     7,
+              i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %rdx, %rdx.shuf
+  %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,
+   <8 x i32> <i32 2    , i32 3,     i32 undef, i32 undef,
+              i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2
+  %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef,
+   <8 x i32> <i32 1    , i32 undef, i32 undef, i32 undef,
+              i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
+
+; CHECK-LABEL: reduction_cost_int
+; CHECK:  cost of 23 {{.*}} extractelement
+
+  %r = extractelement <8 x i32> %bin.rdx.3, i32 0
+  ret i32 %r
+}
+
+define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; CHECK-LABEL: pairwise_hadd
+; CHECK: cost of 11 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx.1, i32 0
+  %r2 = fadd float %r, %f1
+  ret float %r2
+}
+
+define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0
+  %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; CHECK-LABEL: pairwise_hadd_assoc
+; CHECK: cost of 11 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx.1, i32 0
+  %r2 = fadd float %r, %f1
+  ret float %r2
+}
+
+define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+        <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+        <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
+
+; CHECK-LABEL: pairwise_hadd_skip_first
+; CHECK: cost of 11 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx.1, i32 0
+  %r2 = fadd float %r, %f1
+  ret float %r2
+}
+
+define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1) {
+  %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 2 {{.*}} extractelement
+; AVX2:  cost of 2 {{.*}} extractelement
+
+  %r = extractelement <2 x double> %bin.rdx, i32 0
+  ret double %r
+}
+
+define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) {
+  %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+
+; SSE3:  cost of 4 {{.*}} extractelement
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1) {
+  %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x double> %bin.rdx8, i32 0
+  ret double %r
+}
+
+define fastcc float @no_pairwise_reduction8float(<8 x float> %rdx, float %f1) {
+  %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3
+  %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf
+  %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 4 {{.*}} extractelement
+; AVX2:  cost of 4 {{.*}} extractelement
+
+  %r = extractelement <8 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
+  %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx = add <2 x i64> %rdx, %rdx.shuf
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 1 {{.*}} extractelement
+; AVX2:  cost of 1 {{.*}} extractelement
+
+  %r = extractelement <2 x i64> %bin.rdx, i32 0
+  ret i64 %r
+}
+
+define fastcc i32 @no_pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
+  %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i32> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7
+
+; SSE3:  cost of 3 {{.*}} extractelement
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x i32> %bin.rdx8, i32 0
+  ret i32 %r
+}
+
+define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
+  %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i64> %rdx, %rdx.shuf
+  %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x i64> %bin.rdx8, i32 0
+  ret i64 %r
+}
+
+define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
+  %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3
+  %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf
+  %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7
+
+; SSE3:  cost of 4 {{.*}} extractelement
+; AVX:  cost of 4 {{.*}} extractelement
+; AVX2:  cost of 4 {{.*}} extractelement
+
+  %r = extractelement <8 x i16> %bin.rdx8, i32 0
+  ret i16 %r
+}
+
+define fastcc i32 @no_pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
+  %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3
+  %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf
+  %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <8 x i32> %bin.rdx8, i32 0
+  ret i32 %r
+}
+
+define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) {
+  %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 2 {{.*}} extractelement
+; AVX2:  cost of 2 {{.*}} extractelement
+
+  %r = extractelement <2 x double> %bin.rdx8, i32 0
+  ret double %r
+}
+
+define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 4 {{.*}} extractelement
+; AVX:  cost of 4 {{.*}} extractelement
+; AVX2:  cost of 4 {{.*}} extractelement
+
+  %r = extractelement <4 x float> %bin.rdx8, i32 0
+  ret float %r
+}
+
+define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <4 x double> %bin.rdx8, i32 0
+  ret double %r
+}
+
+define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) {
+  %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+  %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
+
+; AVX:  cost of 7 {{.*}} extractelement
+; AVX2:  cost of 7 {{.*}} extractelement
+
+  %r = extractelement <8 x float> %bin.rdx9, i32 0
+  ret float %r
+}
+
+define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
+  %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 2 {{.*}} extractelement
+; AVX:  cost of 1 {{.*}} extractelement
+; AVX2:  cost of 1 {{.*}} extractelement
+
+  %r = extractelement <2 x i64> %bin.rdx8, i32 0
+  ret i64 %r
+}
+
+define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; SSE3:  cost of 3 {{.*}} extractelement
+; AVX:  cost of 3 {{.*}} extractelement
+; AVX2:  cost of 3 {{.*}} extractelement
+
+  %r = extractelement <4 x i32> %bin.rdx8, i32 0
+  ret i32 %r
+}
+
+define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
+  %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <4 x i64> %bin.rdx8, i32 0
+  ret i64 %r
+}
+
+define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
+  %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1
+  %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
+
+; SSE3:  cost of 5 {{.*}} extractelement
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <8 x i16> %bin.rdx9, i32 0
+  ret i16 %r
+}
+
+define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
+  %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1
+  %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
+  %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
+
+; AVX:  cost of 5 {{.*}} extractelement
+; AVX2:  cost of 5 {{.*}} extractelement
+
+  %r = extractelement <8 x i32> %bin.rdx9, i32 0
+  ret i32 %r
+}
diff --git a/test/Analysis/CostModel/lit.local.cfg b/test/Analysis/CostModel/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/CostModel/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll
new file mode 100644
index 000000000000..9308749b2792
--- /dev/null
+++ b/test/Analysis/Delinearization/a.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+;
+; void foo(long n, long m, long o, int A[n][m][o]) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[2*i+3][3*j-4][5*k+7] = 1;
+; }
+
+; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(i32) bytes.
+; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
+
+; AddRec: {{(8 + ((4 + (12 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(i32) bytes.
+; CHECK: ArrayRef[{(1 + (3 * %m)),+,(2 * %m)}<%for.i>][{2,+,(3 * %o)}<%for.j>]
+
+; AddRec: {(8 + ((-8 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of 2 bytes.
+; CHECK: ArrayRef[{((1 + ((-1 + (3 * %m)) * %o)) * sizeof(i32)),+,(%m * %o * sizeof(i32))}<%for.i>]
+
+; Function Attrs: nounwind uwtable
+define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
+entry:
+  %cmp32 = icmp sgt i64 %n, 0
+  br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end17
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp230 = icmp sgt i64 %m, 0
+  %cmp528 = icmp sgt i64 %o, 0
+  br i1 %cmp230, label %for.i, label %for.end17
+
+for.inc15.us:                                     ; preds = %for.inc12.us.us, %for.i
+  %inc16.us = add nsw i64 %i.033.us, 1
+  %exitcond55 = icmp eq i64 %inc16.us, %n
+  br i1 %exitcond55, label %for.end17, label %for.i
+
+for.i:                     ; preds = %for.cond1.preheader.lr.ph, %for.inc15.us
+  %i.033.us = phi i64 [ %inc16.us, %for.inc15.us ], [ 0, %for.cond1.preheader.lr.ph ]
+  %mul8.us = shl i64 %i.033.us, 1
+  %add9.us = add nsw i64 %mul8.us, 3
+  %0 = mul i64 %add9.us, %m
+  %sub.us = add i64 %0, -4
+  br i1 %cmp528, label %for.j, label %for.inc15.us
+
+for.inc12.us.us:                                  ; preds = %for.k
+  %inc13.us.us = add nsw i64 %j.031.us.us, 1
+  %exitcond54 = icmp eq i64 %inc13.us.us, %m
+  br i1 %exitcond54, label %for.inc15.us, label %for.j
+
+for.j:                            ; preds = %for.i, %for.inc12.us.us
+  %j.031.us.us = phi i64 [ %inc13.us.us, %for.inc12.us.us ], [ 0, %for.i ]
+  %mul7.us.us = mul nsw i64 %j.031.us.us, 3
+  %tmp.us.us = add i64 %sub.us, %mul7.us.us
+  %tmp27.us.us = mul i64 %tmp.us.us, %o
+  br label %for.k
+
+for.k:                                  ; preds = %for.k, %for.j
+  %k.029.us.us = phi i64 [ 0, %for.j ], [ %inc.us.us, %for.k ]
+  %mul.us.us = mul nsw i64 %k.029.us.us, 5
+  %arrayidx.sum.us.us = add i64 %mul.us.us, 7
+  %arrayidx10.sum.us.us = add i64 %arrayidx.sum.us.us, %tmp27.us.us
+  %arrayidx11.us.us = getelementptr inbounds i32* %A, i64 %arrayidx10.sum.us.us
+  store i32 1, i32* %arrayidx11.us.us, align 4
+  %inc.us.us = add nsw i64 %k.029.us.us, 1
+  %exitcond = icmp eq i64 %inc.us.us, %o
+  br i1 %exitcond, label %for.inc12.us.us, label %for.k
+
+for.end17:                                        ; preds = %for.inc15.us, %for.cond1.preheader.lr.ph, %entry
+  ret void
+}
diff --git a/test/Analysis/Delinearization/himeno_1.ll b/test/Analysis/Delinearization/himeno_1.ll
new file mode 100644
index 000000000000..9458bd2e5261
--- /dev/null
+++ b/test/Analysis/Delinearization/himeno_1.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; #define MR(mt,n,r,c,d)  mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
+;
+; struct Mat {
+;   float* m;
+;   int mnums;
+;   int mrows;
+;   int mcols;
+;   int mdeps;
+; };
+;
+; typedef struct Mat Matrix;
+;
+; void jacobi(int nn, Matrix* a, Matrix* p)
+; {
+;   long i, j, k, max,jmax,kmax;
+;
+;   p_rows_sub = p->mrows - 1;
+;   p_cols_sub = p->mcols - 1;
+;   p_deps_sub = p->mdeps - 1;
+;
+;     for(i = 1; i < p_rows_sub; i++)
+;       for(j = 1; j < p_cols_sub; j++)
+;         for(k = 1; k < p_deps_sub; k++)
+;           MR(a,0,i,j,k) = i + j + k;
+; }
+
+; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+
+; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
+
+; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
+
+%struct.Mat = type { float*, i32, i32, i32, i32 }
+
+define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
+entry:
+  %p.rows.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 2
+  %p.rows = load i32* %p.rows.ptr
+  %p.rows.sub = add i32 %p.rows, -1
+  %p.rows.sext = sext i32 %p.rows.sub to i64
+  %p.cols.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 3
+  %p.cols = load i32* %p.cols.ptr
+  %p.cols.sub = add i32 %p.cols, -1
+  %p.cols.sext = sext i32 %p.cols.sub to i64
+  %p.deps.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 4
+  %p.deps = load i32* %p.deps.ptr
+  %p.deps.sub = add i32 %p.deps, -1
+  %p.deps.sext = sext i32 %p.deps.sub to i64
+  %a.cols.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 3
+  %a.cols = load i32* %a.cols.ptr
+  %a.deps.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 4
+  %a.deps = load i32* %a.deps.ptr
+  %a.base.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 0
+  %a.base = load float** %a.base.ptr, align 8
+  br label %for.i
+
+for.i:                                            ; preds = %for.i.inc, %entry
+  %i = phi i64 [ %i.inc, %for.i.inc ], [ 1, %entry ]
+  br label %for.j
+
+for.j:                                            ; preds = %for.j.inc, %for.i
+  %j = phi i64 [ %j.inc, %for.j.inc ], [ 1, %for.i ]
+  %a.cols.sext = sext i32 %a.cols to i64
+  %a.deps.sext = sext i32 %a.deps to i64
+  br label %for.k
+
+for.k:                                            ; preds = %for.k, %for.j
+  %k = phi i64 [ 1, %for.j ], [ %k.inc, %for.k ]
+  %tmp1 = mul nsw i64 %a.cols.sext, %i
+  %tmp2 = add i64 %tmp1, %j
+  %tmp3 = mul i64 %tmp2, %a.deps.sext
+  %tmp4 = add nsw i64 %k, %tmp3
+  %arrayidx = getelementptr inbounds float* %a.base, i64 %tmp4
+  store float 1.000000e+00, float* %arrayidx
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %p.deps.sext
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:                                        ; preds = %for.k
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %p.cols.sext
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:                                        ; preds = %for.j.inc
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %p.rows.sext
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:                                              ; preds = %for.i.inc
+  ret void
+}
diff --git a/test/Analysis/Delinearization/himeno_2.ll b/test/Analysis/Delinearization/himeno_2.ll
new file mode 100644
index 000000000000..a29006606fab
--- /dev/null
+++ b/test/Analysis/Delinearization/himeno_2.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; #define MR(mt,n,r,c,d)  mt->m[(n) * mt->mrows * mt->mcols * mt->mdeps + (r) * mt->mcols* mt->mdeps + (c) * mt->mdeps + (d)]
+;
+; struct Mat {
+;   float* m;
+;   int mnums;
+;   int mrows;
+;   int mcols;
+;   int mdeps;
+; };
+;
+; typedef struct Mat Matrix;
+;
+; void jacobi(int nn, Matrix* a, Matrix* p)
+; {
+;   long i, j, k, max,jmax,kmax;
+;
+;   p_rows_sub = p->mrows - 1;
+;   p_cols_sub = p->mcols - 1;
+;   p_deps_sub = p->mdeps - 1;
+;
+;     for(i = 1; i < p_rows_sub; i++)
+;       for(j = 1; j < p_cols_sub; j++)
+;         for(k = 1; k < p_deps_sub; k++)
+;           MR(a,0,i,j,k) = i + j + k;
+; }
+
+; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+
+; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
+
+; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
+; CHECK: Base offset: %a.base
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
+; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
+
+%struct.Mat = type { float*, i32, i32, i32, i32 }
+
+define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
+entry:
+  %p.rows.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 2
+  %p.rows = load i32* %p.rows.ptr
+  %p.rows.sub = add i32 %p.rows, -1
+  %p.rows.sext = sext i32 %p.rows.sub to i64
+  %p.cols.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 3
+  %p.cols = load i32* %p.cols.ptr
+  %p.cols.sub = add i32 %p.cols, -1
+  %p.cols.sext = sext i32 %p.cols.sub to i64
+  %p.deps.ptr = getelementptr inbounds %struct.Mat* %p, i64 0, i32 4
+  %p.deps = load i32* %p.deps.ptr
+  %p.deps.sub = add i32 %p.deps, -1
+  %p.deps.sext = sext i32 %p.deps.sub to i64
+  %a.cols.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 3
+  %a.cols = load i32* %a.cols.ptr
+  %a.cols.sext = sext i32 %a.cols to i64
+  %a.deps.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 4
+  %a.deps = load i32* %a.deps.ptr
+  %a.deps.sext = sext i32 %a.deps to i64
+  %a.base.ptr = getelementptr inbounds %struct.Mat* %a, i64 0, i32 0
+  %a.base = load float** %a.base.ptr, align 8
+  br label %for.i
+
+for.i:                                            ; preds = %for.i.inc, %entry
+  %i = phi i64 [ %i.inc, %for.i.inc ], [ 1, %entry ]
+  br label %for.j
+
+for.j:                                            ; preds = %for.j.inc, %for.i
+  %j = phi i64 [ %j.inc, %for.j.inc ], [ 1, %for.i ]
+  br label %for.k
+
+for.k:                                            ; preds = %for.k, %for.j
+  %k = phi i64 [ 1, %for.j ], [ %k.inc, %for.k ]
+  %tmp1 = mul nsw i64 %a.cols.sext, %i
+  %tmp2 = add i64 %tmp1, %j
+  %tmp3 = mul i64 %tmp2, %a.deps.sext
+  %tmp4 = add nsw i64 %k, %tmp3
+  %arrayidx = getelementptr inbounds float* %a.base, i64 %tmp4
+  store float 1.000000e+00, float* %arrayidx
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %p.deps.sext
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:                                        ; preds = %for.k
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %p.cols.sext
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:                                        ; preds = %for.j.inc
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %p.rows.sext
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:                                              ; preds = %for.i.inc
+  ret void
+}
diff --git a/test/Analysis/BasicAA/lit.local.cfg b/test/Analysis/Delinearization/lit.local.cfg
index 19eebc0ac7ac..19eebc0ac7ac 100644
--- a/test/Analysis/BasicAA/lit.local.cfg
+++ b/test/Analysis/Delinearization/lit.local.cfg
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
new file mode 100644
index 000000000000..82cab167c74f
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -analyze -delinearize  | FileCheck %s
+
+; void foo(long n, long m, long o, double A[n][m][o]) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i+3][j-4][k+7] = 1.0;
+; }
+
+; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>]
+
+; AddRec: {{(48 + ((-24 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-3 + (3 * %m)),+,%m}<%for.i>][{6,+,%o}<%for.j>]
+
+; AddRec: {(48 + ((-32 + (32 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(6 + ((-4 + (4 * %m)) * %o)),+,(%m * %o)}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+  %offset0 = add nsw i64 %i, 3
+  %subscript0 = mul i64 %offset0, %m
+  %offset1 = add nsw i64 %j, -4
+  %subscript1 = add i64 %offset1, %subscript0
+  %subscript2 = mul i64 %subscript1, %o
+  %offset2 = add nsw i64 %k, 7
+  %subscript = add i64 %subscript2, %offset2
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
new file mode 100644
index 000000000000..a1e779fff6c9
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; void foo(long n, long m, long o, long p, double A[n][m][o+p]) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i+3][j-4][k+7] = 1.0;
+; }
+
+; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{3,+,1}<nw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
+
+; AddRec: {{(48 + (8 * %o) + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][(%o + %p)] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-4 + (3 * %m)),+,%m}<%for.cond4.preheader.lr.ph.us>][{(6 + %o),+,(%o + %p)}<%for.body6.lr.ph.us.us>]
+
+; AddRec: {(48 + (8 * %o) + ((-40 + (32 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(6 + ((-5 + (4 * %m)) * (%o + %p)) + %o),+,((%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
+entry:
+  %add = add nsw i64 %p, %o
+  %cmp22 = icmp sgt i64 %n, 0
+  br i1 %cmp22, label %for.cond1.preheader.lr.ph, label %for.end16
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp220 = icmp sgt i64 %m, 0
+  %cmp518 = icmp sgt i64 %o, 0
+  br i1 %cmp220, label %for.cond4.preheader.lr.ph.us, label %for.end16
+
+for.inc14.us:                                     ; preds = %for.cond4.preheader.lr.ph.us, %for.inc11.us.us
+  %inc15.us = add nsw i64 %i.023.us, 1
+  %exitcond43 = icmp eq i64 %inc15.us, %n
+  br i1 %exitcond43, label %for.end16, label %for.cond4.preheader.lr.ph.us
+
+for.cond4.preheader.lr.ph.us:                     ; preds = %for.inc14.us, %for.cond1.preheader.lr.ph
+  %i.023.us = phi i64 [ %inc15.us, %for.inc14.us ], [ 0, %for.cond1.preheader.lr.ph ]
+  %add8.us = add nsw i64 %i.023.us, 3
+  %0 = mul i64 %add8.us, %m
+  %sub.us = add i64 %0, -4
+  br i1 %cmp518, label %for.body6.lr.ph.us.us, label %for.inc14.us
+
+for.inc11.us.us:                                  ; preds = %for.body6.us.us
+  %inc12.us.us = add nsw i64 %j.021.us.us, 1
+  %exitcond42 = icmp eq i64 %inc12.us.us, %m
+  br i1 %exitcond42, label %for.inc14.us, label %for.body6.lr.ph.us.us
+
+for.body6.lr.ph.us.us:                            ; preds = %for.cond4.preheader.lr.ph.us, %for.inc11.us.us
+  %j.021.us.us = phi i64 [ %inc12.us.us, %for.inc11.us.us ], [ 0, %for.cond4.preheader.lr.ph.us ]
+  %tmp.us.us = add i64 %sub.us, %j.021.us.us
+  %tmp17.us.us = mul i64 %tmp.us.us, %add
+  br label %for.body6.us.us
+
+for.body6.us.us:                                  ; preds = %for.body6.us.us, %for.body6.lr.ph.us.us
+  %k.019.us.us = phi i64 [ 0, %for.body6.lr.ph.us.us ], [ %inc.us.us, %for.body6.us.us ]
+  %arrayidx.sum.us.us = add i64 %k.019.us.us, 7
+  %arrayidx9.sum.us.us = add i64 %arrayidx.sum.us.us, %tmp17.us.us
+  %arrayidx10.us.us = getelementptr inbounds double* %A, i64 %arrayidx9.sum.us.us
+  store double 1.000000e+00, double* %arrayidx10.us.us, align 8
+  %inc.us.us = add nsw i64 %k.019.us.us, 1
+  %exitcond = icmp eq i64 %inc.us.us, %o
+  br i1 %exitcond, label %for.inc11.us.us, label %for.body6.us.us
+
+for.end16:                                        ; preds = %for.cond1.preheader.lr.ph, %for.inc14.us, %entry
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
new file mode 100644
index 000000000000..a52a4c93ce23
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; void foo(long n, long m, long o, double A[n][m][o], long p, long q, long r) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i+p][j+q][k+r] = 1.0;
+; }
+
+; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>]
+
+; AddRec: {{(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(1 + (%m * %p) + %q),+,%m}<%for.i>][{(-1 + %r),+,%o}<%for.j>]
+
+; AddRec: {(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-1 + ((((1 + %p) * %m) + %q) * %o) + %r),+,(%m * %o)}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+  %offset0 = add nsw i64 %i, %p
+  %subscript0 = mul i64 %offset0, %m
+  %offset1 = add nsw i64 %j, %q
+  %subscript1 = add i64 %offset1, %subscript0
+  %subscript2 = mul i64 %subscript1, %o
+  %offset2 = add nsw i64 %k, %r
+  %subscript = add i64 %subscript2, %offset2
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
new file mode 100644
index 000000000000..d68a15883942
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; Derived from the following code:
+;
+; void foo(long n, long m, double A[n][m]) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       A[i][j] = 1.0;
+; }
+
+; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+
+; AddRec: {(-8 + (8 * %m) + %A),+,(8 * %m)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-1 + %m),+,%m}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  %tmp = mul nsw i64 %i, %m
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+  %vlaarrayidx.sum = add i64 %j, %tmp
+  %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+  store double 1.0, double* %arrayidx
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
new file mode 100644
index 000000000000..7207420205aa
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; extern void bar(long n, long m, double A[n][m]);
+;
+; void foo(long a, long b) {
+;   for (long n = 1; n < a; ++n)
+;   for (long m = 1; m < b; ++m) {
+;     double A[n][m];
+;     for (long i = 0; i < n; i++)
+;       for (long j = 0; j < m; j++)
+;         A[i][j] = 1.0;
+;     bar(n, m, A);
+;   }
+; }
+
+; AddRec: {{%vla.us,+,{8,+,8}<%for.cond7.preheader.lr.ph.split.us.us>}<%for.body9.lr.ph.us.us>,+,8}<%for.body9.us.us>
+; CHECK: Base offset: %vla.us
+; CHECK: ArrayDecl[UnknownSize][{1,+,1}<%for.cond7.preheader.lr.ph.split.us.us>] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.body9.lr.ph.us.us>][{0,+,1}<nuw><nsw><%for.body9.us.us>]
+
+define void @foo(i64 %a, i64 %b) nounwind uwtable {
+entry:
+  %cmp43 = icmp sgt i64 %a, 1
+  br i1 %cmp43, label %for.cond1.preheader.lr.ph, label %for.end19
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp224 = icmp sgt i64 %b, 1
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc17, %for.cond1.preheader.lr.ph
+  %indvars.iv51 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next52, %for.inc17 ]
+  br i1 %cmp224, label %for.cond7.preheader.lr.ph.split.us.us, label %for.inc17
+
+for.end13.us:                                     ; preds = %for.inc11.us.us
+  call void @bar(i64 %indvars.iv51, i64 %indvars.iv48, double* %vla.us) nounwind
+  call void @llvm.stackrestore(i8* %1)
+  %indvars.iv.next49 = add i64 %indvars.iv48, 1
+  %exitcond54 = icmp eq i64 %indvars.iv.next49, %b
+  br i1 %exitcond54, label %for.inc17, label %for.cond7.preheader.lr.ph.split.us.us
+
+for.inc11.us.us:                                  ; preds = %for.body9.us.us
+  %inc12.us.us = add nsw i64 %i.023.us.us, 1
+  %exitcond53 = icmp eq i64 %inc12.us.us, %indvars.iv51
+  br i1 %exitcond53, label %for.end13.us, label %for.body9.lr.ph.us.us
+
+for.body9.lr.ph.us.us:                            ; preds = %for.cond7.preheader.lr.ph.split.us.us, %for.inc11.us.us
+  %i.023.us.us = phi i64 [ 0, %for.cond7.preheader.lr.ph.split.us.us ], [ %inc12.us.us, %for.inc11.us.us ]
+  %0 = mul nsw i64 %i.023.us.us, %indvars.iv48
+  br label %for.body9.us.us
+
+for.body9.us.us:                                  ; preds = %for.body9.us.us, %for.body9.lr.ph.us.us
+  %j.021.us.us = phi i64 [ 0, %for.body9.lr.ph.us.us ], [ %inc.us.us, %for.body9.us.us ]
+  %arrayidx.sum.us.us = add i64 %j.021.us.us, %0
+  %arrayidx10.us.us = getelementptr inbounds double* %vla.us, i64 %arrayidx.sum.us.us
+  store double 1.000000e+00, double* %arrayidx10.us.us, align 8
+  %inc.us.us = add nsw i64 %j.021.us.us, 1
+  %exitcond50 = icmp eq i64 %inc.us.us, %indvars.iv48
+  br i1 %exitcond50, label %for.inc11.us.us, label %for.body9.us.us
+
+for.cond7.preheader.lr.ph.split.us.us:            ; preds = %for.cond1.preheader, %for.end13.us
+  %indvars.iv48 = phi i64 [ %indvars.iv.next49, %for.end13.us ], [ 1, %for.cond1.preheader ]
+  %1 = call i8* @llvm.stacksave()
+  %2 = mul nuw i64 %indvars.iv48, %indvars.iv51
+  %vla.us = alloca double, i64 %2, align 16
+  br label %for.body9.lr.ph.us.us
+
+for.inc17:                                        ; preds = %for.end13.us, %for.cond1.preheader
+  %indvars.iv.next52 = add i64 %indvars.iv51, 1
+  %exitcond55 = icmp eq i64 %indvars.iv.next52, %a
+  br i1 %exitcond55, label %for.end19, label %for.cond1.preheader
+
+for.end19:                                        ; preds = %for.inc17, %entry
+  ret void
+}
+
+declare i8* @llvm.stacksave() nounwind
+declare void @bar(i64, i64, double*)
+declare void @llvm.stackrestore(i8*) nounwind
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
new file mode 100644
index 000000000000..24f95837c860
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; void foo(long n, long m, long o, double A[n][m][o]) {
+;
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       for (long k = 0; k < o; k++)
+;         A[i][j][k] = 1.0;
+; }
+
+; AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
+
+; AddRec: {{(-8 + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][(%m * %o)] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{(-1 + %o),+,%o}<%for.j>]
+
+; AddRec: {(-8 + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{(-1 + (%m * %o)),+,(%m * %o)}<%for.i>]
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+  %subscript0 = mul i64 %i, %m
+  %subscript1 = add i64 %j, %subscript0
+  %subscript2 = mul i64 %subscript1, %o
+  %subscript = add i64 %subscript2, %k
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add nsw i64 %k, 1
+  %k.exitcond = icmp eq i64 %k.inc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
new file mode 100644
index 000000000000..e1516104ddfc
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+; void foo(int n, int m, int o, double A[n][m][o]) {
+;
+;   for (int i = 0; i < n; i++)
+;     for (int j = 0; j < m; j++)
+;       for (int k = 0; k < o; k++)
+;         A[i][j][k] = 1.0;
+; }
+
+; AddRec: {{{%A,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>,+,8}<%for.k>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
+; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+
+; AddRec: {{((8 * (zext i32 (-1 + %o) to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][((zext i32 %m to i64) * (zext i32 %o to i64))] with elements of 8 bytes.
+; CHECK: ArrayRef[{0,+,1}<%for.i>][{(zext i32 (-1 + %o) to i64),+,(zext i32 %o to i64)}<%for.j>]
+
+; AddRec: {((8 * (zext i32 (-1 + %o) to i64)) + (8 * (zext i32 (-1 + %m) to i64) * (zext i32 %o to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize] with elements of 8 bytes.
+; CHECK: ArrayRef[{((zext i32 (-1 + %o) to i64) + ((zext i32 (-1 + %m) to i64) * (zext i32 %o to i64))),+,((zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>]
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %n, i32 %m, i32 %o, double* %A) {
+entry:
+  %m_zext = zext i32 %m to i64
+  %n_zext = zext i32 %o to i64
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ %i.inc, %for.i.inc ], [ 0, %entry ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ %j.inc, %for.j.inc ], [ 0, %for.i ]
+  br label %for.k
+
+for.k:
+  %k = phi i64 [ %k.inc, %for.k.inc ], [ 0, %for.j ]
+  %tmp = mul i64 %i, %m_zext
+  %tmp1 = trunc i64 %j to i32
+  %tmp2 = trunc i64 %i to i32
+  %mul.us.us = mul nsw i32 %tmp1, %tmp2
+  %tmp.us.us = add i64 %j, %tmp
+  %tmp17.us.us = mul i64 %tmp.us.us, %n_zext
+  %subscript = add i64 %tmp17.us.us, %k
+  %idx = getelementptr inbounds double* %A, i64 %subscript
+  store double 1.0, double* %idx
+  br label %for.k.inc
+
+for.k.inc:
+  %k.inc = add i64 %k, 1
+  %k.inc.trunc = trunc i64 %k.inc to i32
+  %k.exitcond = icmp eq i32 %k.inc.trunc, %o
+  br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+  %j.inc = add i64 %j, 1
+  %j.inc.trunc = trunc i64 %j.inc to i32
+  %j.exitcond = icmp eq i32 %j.inc.trunc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add i64 %i, 1
+  %i.inc.trunc = trunc i64 %i.inc to i32
+  %i.exitcond = icmp eq i32 %i.inc.trunc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index 003ee03ab0be..09e8fd29dcc4 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx10.6.0"
 define void @banerjee0(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
-
+; CHECK: 'Dependence Analysis' for function 'banerjee0':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<= <>]!
 ; CHECK: da analyze - confused!
@@ -65,6 +65,7 @@ entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end9
 
+; CHECK: 'Dependence Analysis' for function 'banerjee1':
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* <>]!
 ; CHECK: da analyze - confused!
@@ -131,6 +132,7 @@ define void @banerjee2(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee2':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -181,6 +183,7 @@ define void @banerjee3(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee3':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> >]!
 ; CHECK: da analyze - confused!
@@ -231,6 +234,7 @@ define void @banerjee4(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee4':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -281,6 +285,7 @@ define void @banerjee5(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee5':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [< <]!
 ; CHECK: da analyze - confused!
@@ -331,6 +336,7 @@ define void @banerjee6(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee6':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=> <>]!
 ; CHECK: da analyze - confused!
@@ -381,6 +387,7 @@ define void @banerjee7(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee7':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> <=]!
 ; CHECK: da analyze - confused!
@@ -431,6 +438,7 @@ define void @banerjee8(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee8':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> <>]!
 ; CHECK: da analyze - confused!
@@ -481,6 +489,7 @@ define void @banerjee9(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee9':
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<= =|<]!
 ; CHECK: da analyze - confused!
@@ -532,6 +541,7 @@ define void @banerjee10(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee10':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<> =]!
 ; CHECK: da analyze - confused!
@@ -582,6 +592,7 @@ define void @banerjee11(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee11':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<= <>]!
 ; CHECK: da analyze - confused!
@@ -632,6 +643,7 @@ define void @banerjee12(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'banerjee12':
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [= <>]!
 ; CHECK: da analyze - confused!
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index a42212464f86..bb31d118857d 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -14,6 +14,7 @@ define void @gcd0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd0'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [=> *|<]!
 ; CHECK: da analyze - confused!
@@ -66,6 +67,7 @@ define void @gcd1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd1'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -119,6 +121,7 @@ define void @gcd2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd2'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -172,6 +175,7 @@ define void @gcd3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd3'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<> *]!
 ; CHECK: da analyze - confused!
@@ -223,6 +227,7 @@ define void @gcd4(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd4'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -284,6 +289,7 @@ define void @gcd5(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd5'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<> *]!
 ; CHECK: da analyze - confused!
@@ -346,6 +352,7 @@ entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12
 
+; CHECK: 'Dependence Analysis' for function 'gcd6'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -417,6 +424,7 @@ entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
+; CHECK: 'Dependence Analysis' for function 'gcd7'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* *|<]!
 ; CHECK: da analyze - confused!
@@ -500,6 +508,7 @@ entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
+; CHECK: 'Dependence Analysis' for function 'gcd8'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -578,6 +587,7 @@ entry:
   %cmp4 = icmp eq i32 %n, 0
   br i1 %cmp4, label %for.end15, label %for.cond1.preheader.preheader
 
+; CHECK: 'Dependence Analysis' for function 'gcd9'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* *|<]!
 ; CHECK: da analyze - confused!
diff --git a/test/Analysis/DependenceAnalysis/Invariant.ll b/test/Analysis/DependenceAnalysis/Invariant.ll
new file mode 100644
index 000000000000..202d8e2d68db
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/Invariant.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
+
+; Test for a bug, which caused an assert when an invalid
+; SCEVAddRecExpr is created in addToCoefficient.
+
+; CHECK: da analyze - consistent input [S 0]!
+; CHECK: da analyze - input [* 0|<]!
+; CHECK: da analyze - none!
+
+define float @foo(float %g, [40 x float]* %rr) nounwind {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %i.04 = phi i32 [ 0, %entry ], [ %add10, %for.inc9 ]
+  %res.03 = phi float [ 0.000000e+00, %entry ], [ %add.res.1, %for.inc9 ]
+  br label %for.body3
+
+for.body3:
+  %j.02 = phi i32 [ 0, %for.cond1.preheader ], [ %add8, %for.body3 ]
+  %res.11 = phi float [ %res.03, %for.cond1.preheader ], [ %add.res.1, %for.body3 ]
+  %arrayidx4 = getelementptr inbounds [40 x float]* %rr, i32 %j.02, i32 %j.02
+  %0 = load float* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds [40 x float]* %rr, i32 %i.04, i32 %j.02
+  %1 = load float* %arrayidx6, align 4
+  %add = fadd float %0, %1
+  %cmp7 = fcmp ogt float %add, %g
+  %add.res.1 = select i1 %cmp7, float %add, float %res.11
+  %add8 = add nsw i32 %j.02, 5
+  %cmp2 = icmp slt i32 %add8, 40
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.inc9:
+  %add10 = add nsw i32 %i.04, 5
+  %cmp = icmp slt i32 %add10, 40
+  br i1 %cmp, label %for.cond1.preheader, label %for.end11
+
+for.end11:
+  ret float %add.res.1
+}
diff --git a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index 81e61892d8e8..5443909d7ef6 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -15,6 +15,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv0'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -77,6 +78,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond2.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv1'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -141,6 +143,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv2'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -203,6 +206,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv3'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -263,6 +267,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv4'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -324,6 +329,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv5'
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
@@ -385,6 +391,7 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.end7, label %for.cond1.preheader.preheader
 
+; CHECK: 'Dependence Analysis' for function 'symbolicrdiv6'
 ; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
diff --git a/test/Analysis/Dominators/lit.local.cfg b/test/Analysis/Dominators/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/Dominators/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/GlobalsModRef/lit.local.cfg b/test/Analysis/GlobalsModRef/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/GlobalsModRef/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Lint/check-zero-divide.ll b/test/Analysis/Lint/check-zero-divide.ll
new file mode 100644
index 000000000000..f4e79ed95f8c
--- /dev/null
+++ b/test/Analysis/Lint/check-zero-divide.ll
@@ -0,0 +1,78 @@
+; RUN: opt -lint -disable-output %s 2>&1 | FileCheck %s
+
+define <2 x i32> @use_vector_sdiv(<2 x i32> %a) nounwind {
+  %b = sdiv <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_srem(<2 x i32> %a) nounwind {
+  %b = srem <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_udiv(<2 x i32> %a) nounwind {
+  %b = udiv <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_urem(<2 x i32> %a) nounwind {
+  %b = urem <2 x i32> %a, <i32 5, i32 8>
+  ret <2 x i32> %b
+}
+
+define i32 @use_sdiv_by_zero(i32 %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv i32 %a, 0
+  %b = sdiv i32 %a, 0
+  ret i32 %b
+}
+
+define i32 @use_sdiv_by_zeroinitializer(i32 %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv i32 %a, 0
+  %b = sdiv i32 %a, zeroinitializer
+   ret i32 %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_zero_x(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, <i32 0, i32 5>
+  %b = sdiv <2 x i32> %a, <i32 0, i32 5>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_zero_y(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT:  %b = sdiv <2 x i32> %a, <i32 4, i32 0>
+  %b = sdiv <2 x i32> %a, <i32 4, i32 0>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_zero_xy(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, zeroinitializer
+  %b = sdiv <2 x i32> %a, <i32 0, i32 0>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_undef_x(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, <i32 undef, i32 5>
+  %b = sdiv <2 x i32> %a, <i32 undef, i32 5>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_undef_y(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, <i32 5, i32 undef>
+  %b = sdiv <2 x i32> %a, <i32 5, i32 undef>
+  ret <2 x i32> %b
+}
+
+define <2 x i32> @use_vector_sdiv_by_undef_xy(<2 x i32> %a) nounwind {
+; CHECK: Undefined behavior: Division by zero
+; CHECK-NEXT: %b = sdiv <2 x i32> %a, undef
+  %b = sdiv <2 x i32> %a, <i32 undef, i32 undef>
+  ret <2 x i32> %b
+}
+
diff --git a/test/Analysis/DependenceAnalysis/lit.local.cfg b/test/Analysis/Lint/lit.local.cfg
index c6106e4746f2..c6106e4746f2 100644
--- a/test/Analysis/DependenceAnalysis/lit.local.cfg
+++ b/test/Analysis/Lint/lit.local.cfg
diff --git a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
index 7119007ffde5..a87bab7cabf2 100644
--- a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
+++ b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
@@ -1,8 +1,9 @@
 ; This testcase was incorrectly computing that the loopentry.7 loop was
 ; not a child of the loopentry.6 loop.
 ;
-; RUN: opt < %s -analyze -loops | \
-; RUN:   grep "^            Loop at depth 4 containing: %loopentry.7<header><latch><exiting>"
+; RUN: opt < %s -analyze -loops | FileCheck %s
+
+; CHECK: Loop at depth 4 containing: %loopentry.7<header><latch><exiting>
 
 define void @getAndMoveToFrontDecode() {
 	br label %endif.2
diff --git a/test/Analysis/LoopInfo/lit.local.cfg b/test/Analysis/LoopInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/LoopInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg b/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Analysis/PostDominators/lit.local.cfg b/test/Analysis/PostDominators/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/PostDominators/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/PostDominators/pr1098.ll b/test/Analysis/PostDominators/pr1098.ll
index afb47769ee49..2eed21371276 100644
--- a/test/Analysis/PostDominators/pr1098.ll
+++ b/test/Analysis/PostDominators/pr1098.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -postdomtree -analyze | grep entry
+; RUN: opt < %s -postdomtree -analyze | FileCheck %s
 ; PR932
 
 define void @foo(i1 %x) {
+; CHECK: entry
 entry:
         br i1 %x, label %bb1, label %bb0
 bb0:            ; preds = %entry, bb0
diff --git a/test/Analysis/Profiling/edge-profiling.ll b/test/Analysis/Profiling/edge-profiling.ll
deleted file mode 100644
index cbaf47617fb6..000000000000
--- a/test/Analysis/Profiling/edge-profiling.ll
+++ /dev/null
@@ -1,139 +0,0 @@
-; Test the edge profiling instrumentation.
-; RUN: opt < %s -insert-edge-profiling -S | FileCheck %s
-
-; ModuleID = '<stdin>'
-
-@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
-@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
-; CHECK:@EdgeProfCounters
-; CHECK:[19 x i32] 
-; CHECK:zeroinitializer
-
-define void @oneblock() nounwind {
-entry:
-; CHECK:entry:
-; CHECK:%OldFuncCounter
-; CHECK:load 
-; CHECK:getelementptr
-; CHECK:@EdgeProfCounters
-; CHECK:i32 0
-; CHECK:i32 0
-; CHECK:%NewFuncCounter
-; CHECK:add
-; CHECK:%OldFuncCounter
-; CHECK:store 
-; CHECK:%NewFuncCounter
-; CHECK:getelementptr
-; CHECK:@EdgeProfCounters
-  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  ret void
-}
-
-declare i32 @puts(i8*)
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-; CHECK:entry:
-  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
-  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
-  %retval = alloca i32                            ; <i32*> [#uses=2]
-  %j = alloca i32                                 ; <i32*> [#uses=4]
-  %i = alloca i32                                 ; <i32*> [#uses=4]
-  %0 = alloca i32                                 ; <i32*> [#uses=2]
-; CHECK:call 
-; CHECK:@llvm_start_edge_profiling
-; CHECK:@EdgeProfCounters
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  store i32 %argc, i32* %argc_addr
-  store i8** %argv, i8*** %argv_addr
-  store i32 0, i32* %i, align 4
-  br label %bb10
-
-bb:                                               ; preds = %bb10
-; CHECK:bb:
-  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
-  br i1 %2, label %bb1, label %bb8
-
-bb1:                                              ; preds = %bb
-; CHECK:bb1:
-  store i32 0, i32* %j, align 4
-  br label %bb6
-
-bb2:                                              ; preds = %bb6
-; CHECK:bb2:
-  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
-  br i1 %5, label %bb3, label %bb4
-
-bb3:                                              ; preds = %bb2
-; CHECK:bb3:
-  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb5
-
-bb4:                                              ; preds = %bb2
-; CHECK:bb4:
-  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb11
-
-bb5:                                              ; preds = %bb3
-; CHECK:bb5:
-  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
-  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
-  store i32 %10, i32* %j, align 4
-  br label %bb6
-
-bb6:                                              ; preds = %bb5, %bb1
-; CHECK:bb6:
-  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
-  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
-  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
-  br i1 %13, label %bb2, label %bb7
-
-bb7:                                              ; preds = %bb6
-; CHECK:bb7:
-  br label %bb9
-
-bb8:                                              ; preds = %bb
-; CHECK:bb8:
-  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb9
-
-bb9:                                              ; preds = %bb8, %bb7
-; CHECK:bb9:
-  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
-  store i32 %16, i32* %i, align 4
-  br label %bb10
-
-bb10:                                             ; preds = %bb9, %entry
-; CHECK:bb10:
-  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
-  br i1 %18, label %bb, label %bb11
-; CHECK:br
-; CHECK:label %bb10.bb11_crit_edge
-
-; CHECK:bb10.bb11_crit_edge:
-; CHECK:br
-; CHECK:label %bb11
-
-bb11:                                             ; preds = %bb10, %bb4
-; CHECK:bb11:
-  call void @oneblock() nounwind
-  store i32 0, i32* %0, align 4
-  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
-  store i32 %19, i32* %retval, align 4
-  br label %return
-
-return:                                           ; preds = %bb11
-; CHECK:return:
-  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
-  ret i32 %retval12
-}
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
deleted file mode 100644
index d40fa4f9d3ab..000000000000
--- a/test/Analysis/Profiling/lit.local.cfg
+++ /dev/null
@@ -1,11 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-if 'hexagon' in root.target_triple:
-    config.unsupported = True
diff --git a/test/Analysis/Profiling/load-branch-weights-ifs.ll b/test/Analysis/Profiling/load-branch-weights-ifs.ll
deleted file mode 100644
index 7ed090b7c366..000000000000
--- a/test/Analysis/Profiling/load-branch-weights-ifs.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; RUN: opt -insert-edge-profiling -o %t1 < %s
-; RUN: rm -f %t1.prof_data
-; RUN: lli %defaultjit -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
-; RUN:     -llvmprof-output %t1.prof_data
-; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
-; RUN:     | FileCheck %s
-; RUN: rm -f %t1.prof_data
-
-; FIXME: profile_rt.dll could be built on win32.
-; REQUIRES: loadable_module
-
-;; func_mod - Branch taken 6 times in 7.
-define i32 @func_mod(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %rem = srem i32 %0, 7
-  %tobool = icmp ne i32 %rem, 0
-  br i1 %tobool, label %if.then, label %if.else
-; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof !0
-
-if.then:
-  store i32 1, i32* %retval
-  br label %return
-
-if.else:
-  store i32 0, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-;; func_const_true - conditional branch which 100% taken probability.
-define i32 @func_const_true(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %cmp = icmp eq i32 %0, 1
-  br i1 %cmp, label %if.then, label %if.end
-; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !1
-
-if.then:
-  store i32 1, i32* %retval
-  br label %return
-
-if.end:
-  store i32 0, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-;; func_const_true - conditional branch which 100% not-taken probability.
-define i32 @func_const_false(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %cmp = icmp eq i32 %0, 1
-  br i1 %cmp, label %if.then, label %if.end
-; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !2
-
-if.then:
-  store i32 1, i32* %retval
-  br label %return
-
-if.end:
-  store i32 0, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  %loop = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %cmp = icmp slt i32 %0, 7000
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !3
-
-for.body:
-  %1 = load i32* %loop, align 4
-  %call = call i32 @func_mod(i32 %1)
-  br label %for.inc
-
-for.inc:
-  %2 = load i32* %loop, align 4
-  %inc = add nsw i32 %2, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  %call1 = call i32 @func_const_true(i32 1)
-  %call2 = call i32 @func_const_false(i32 0)
-  ret i32 0
-}
-
-; CHECK: !0 = metadata !{metadata !"branch_weights", i32 6000, i32 1000}
-; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 0}
-; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1}
-; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7000, i32 1}
-; CHECK-NOT: !4
diff --git a/test/Analysis/Profiling/load-branch-weights-loops.ll b/test/Analysis/Profiling/load-branch-weights-loops.ll
deleted file mode 100644
index 9d1925a2d701..000000000000
--- a/test/Analysis/Profiling/load-branch-weights-loops.ll
+++ /dev/null
@@ -1,188 +0,0 @@
-; RUN: opt -insert-edge-profiling -o %t1 < %s
-; RUN: rm -f %t1.prof_data
-; RUN: lli %defaultjit -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
-; RUN:     -llvmprof-output %t1.prof_data
-; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
-; RUN:     | FileCheck %s
-; RUN: rm -f %t1.prof_data
-
-; FIXME: profile_rt.dll could be built on win32.
-; REQUIRES: loadable_module
-
-;; func_for - Test branch probabilities for a vanilla for loop.
-define i32 @func_for(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %1 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %0, %1
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !0
-
-for.body:
-  %2 = load i32* %N.addr, align 4
-  %3 = load i32* %ret, align 4
-  %add = add nsw i32 %3, %2
-  store i32 %add, i32* %ret, align 4
-  br label %for.inc
-
-for.inc:
-  %4 = load i32* %loop, align 4
-  %inc = add nsw i32 %4, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  %5 = load i32* %ret, align 4
-  ret i32 %5
-}
-
-;; func_for_odd - Test branch probabilities for a for loop with a continue and
-;; a break.
-define i32 @func_for_odd(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %1 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %0, %1
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !1
-
-for.body:
-  %2 = load i32* %loop, align 4
-  %rem = srem i32 %2, 10
-  %tobool = icmp ne i32 %rem, 0
-  br i1 %tobool, label %if.then, label %if.end
-; CHECK: br i1 %tobool, label %if.then, label %if.end, !prof !2
-
-if.then:
-  br label %for.inc
-
-if.end:
-  %3 = load i32* %loop, align 4
-  %cmp1 = icmp eq i32 %3, 500
-  br i1 %cmp1, label %if.then2, label %if.end3
-; CHECK: br i1 %cmp1, label %if.then2, label %if.end3, !prof !3
-
-if.then2:
-  br label %for.end
-
-if.end3:
-  %4 = load i32* %N.addr, align 4
-  %5 = load i32* %ret, align 4
-  %add = add nsw i32 %5, %4
-  store i32 %add, i32* %ret, align 4
-  br label %for.inc
-
-for.inc:
-  %6 = load i32* %loop, align 4
-  %inc = add nsw i32 %6, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  %7 = load i32* %ret, align 4
-  ret i32 %7
-}
-
-;; func_while - Test branch probability in a vanilla while loop.
-define i32 @func_while(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %while.cond
-
-while.cond:
-  %0 = load i32* %loop, align 4
-  %1 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %0, %1
-  br i1 %cmp, label %while.body, label %while.end
-; CHECK: br i1 %cmp, label %while.body, label %while.end, !prof !0
-
-while.body:
-  %2 = load i32* %N.addr, align 4
-  %3 = load i32* %ret, align 4
-  %add = add nsw i32 %3, %2
-  store i32 %add, i32* %ret, align 4
-  %4 = load i32* %loop, align 4
-  %inc = add nsw i32 %4, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %while.cond
-
-while.end:
-  %5 = load i32* %ret, align 4
-  ret i32 %5
-}
-
-;; func_while - Test branch probability in a vanilla do-while loop.
-define i32 @func_do_while(i32 %N) nounwind uwtable {
-entry:
-  %N.addr = alloca i32, align 4
-  %ret = alloca i32, align 4
-  %loop = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  store i32 0, i32* %ret, align 4
-  store i32 0, i32* %loop, align 4
-  br label %do.body
-
-do.body:
-  %0 = load i32* %N.addr, align 4
-  %1 = load i32* %ret, align 4
-  %add = add nsw i32 %1, %0
-  store i32 %add, i32* %ret, align 4
-  %2 = load i32* %loop, align 4
-  %inc = add nsw i32 %2, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %do.cond
-
-do.cond:
-  %3 = load i32* %loop, align 4
-  %4 = load i32* %N.addr, align 4
-  %cmp = icmp slt i32 %3, %4
-  br i1 %cmp, label %do.body, label %do.end
-; CHECK: br i1 %cmp, label %do.body, label %do.end, !prof !4
-
-do.end:
-  %5 = load i32* %ret, align 4
-  ret i32 %5
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  store i32 0, i32* %retval
-  %call = call i32 @func_for(i32 1000)
-  %call1 = call i32 @func_for_odd(i32 1000)
-  %call2 = call i32 @func_while(i32 1000)
-  %call3 = call i32 @func_do_while(i32 1000)
-  ret i32 0
-}
-
-!0 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
-!1 = metadata !{metadata !"branch_weights", i32 501, i32 0}
-!2 = metadata !{metadata !"branch_weights", i32 450, i32 51}
-!3 = metadata !{metadata !"branch_weights", i32 1, i32 50}
-!4 = metadata !{metadata !"branch_weights", i32 999, i32 1}
-; CHECK-NOT: !5
diff --git a/test/Analysis/Profiling/load-branch-weights-switches.ll b/test/Analysis/Profiling/load-branch-weights-switches.ll
deleted file mode 100644
index 5587c7172bb6..000000000000
--- a/test/Analysis/Profiling/load-branch-weights-switches.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: opt -insert-edge-profiling -o %t1 < %s
-; RUN: rm -f %t1.prof_data
-; RUN: lli %defaultjit -load %llvmshlibdir/libprofile_rt%shlibext %t1 \
-; RUN:     -llvmprof-output %t1.prof_data
-; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \
-; RUN:     | FileCheck %s
-; RUN: rm -f %t1.prof_data
-
-; FIXME: profile_rt.dll could be built on win32.
-; REQUIRES: loadable_module
-
-;; func_switch - Test branch probabilities for a switch instruction with an
-;; even chance of taking each case (or no case).
-define i32 @func_switch(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %rem = srem i32 %0, 4
-  switch i32 %rem, label %sw.epilog [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-  ]
-; CHECK: ], !prof !0
-
-sw.bb:
-  store i32 5, i32* %retval
-  br label %return
-
-sw.bb1:
-  store i32 6, i32* %retval
-  br label %return
-
-sw.bb2:
-  store i32 7, i32* %retval
-  br label %return
-
-sw.epilog:
-  store i32 8, i32* %retval
-  br label %return
-
-return:
-  %1 = load i32* %retval
-  ret i32 %1
-}
-
-;; func_switch_switch - Test branch probabilities in a switch-instruction that
-;; leads to further switch instructions.  The first-tier switch occludes some
-;; possibilities in the second-tier switches, leading to some branches having a
-;; 0 probability.
-define i32 @func_switch_switch(i32 %N) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %N.addr = alloca i32, align 4
-  store i32 %N, i32* %N.addr, align 4
-  %0 = load i32* %N.addr, align 4
-  %rem = srem i32 %0, 2
-  switch i32 %rem, label %sw.default11 [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb5
-  ]
-; CHECK: ], !prof !1
-
-sw.bb:
-  %1 = load i32* %N.addr, align 4
-  %rem1 = srem i32 %1, 4
-  switch i32 %rem1, label %sw.default [
-    i32 0, label %sw.bb2
-    i32 1, label %sw.bb3
-    i32 2, label %sw.bb4
-  ]
-; CHECK: ], !prof !2
-
-sw.bb2:
-  store i32 5, i32* %retval
-  br label %return
-
-sw.bb3:
-  store i32 6, i32* %retval
-  br label %return
-
-sw.bb4:
-  store i32 7, i32* %retval
-  br label %return
-
-sw.default:
-  store i32 8, i32* %retval
-  br label %return
-
-sw.bb5:
-  %2 = load i32* %N.addr, align 4
-  %rem6 = srem i32 %2, 4
-  switch i32 %rem6, label %sw.default10 [
-    i32 0, label %sw.bb7
-    i32 1, label %sw.bb8
-    i32 2, label %sw.bb9
-  ]
-; CHECK: ], !prof !3
-
-sw.bb7:
-  store i32 9, i32* %retval
-  br label %return
-
-sw.bb8:
-  store i32 10, i32* %retval
-  br label %return
-
-sw.bb9:
-  store i32 11, i32* %retval
-  br label %return
-
-sw.default10:
-  store i32 12, i32* %retval
-  br label %return
-
-sw.default11:
-  store i32 13, i32* %retval
-  br label %return
-
-return:
-  %3 = load i32* %retval
-  ret i32 %3
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  %loop = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %loop, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %loop, align 4
-  %cmp = icmp slt i32 %0, 4000
-  br i1 %cmp, label %for.body, label %for.end
-; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !4
-
-for.body:
-  %1 = load i32* %loop, align 4
-  %call = call i32 @func_switch(i32 %1)
-  %2 = load i32* %loop, align 4
-  %call1 = call i32 @func_switch_switch(i32 %2)
-  br label %for.inc
-
-for.inc:
-  %3 = load i32* %loop, align 4
-  %inc = add nsw i32 %3, 1
-  store i32 %inc, i32* %loop, align 4
-  br label %for.cond
-
-for.end:
-  ret i32 0
-}
-
-; CHECK: !0 = metadata !{metadata !"branch_weights", i32 1000, i32 1000, i32 1000, i32 1000}
-; CHECK: !1 = metadata !{metadata !"branch_weights", i32 0, i32 2000, i32 2000}
-; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1000, i32 0, i32 1000}
-; CHECK: !3 = metadata !{metadata !"branch_weights", i32 1000, i32 0, i32 1000, i32 0}
-; CHECK: !4 = metadata !{metadata !"branch_weights", i32 4000, i32 1}
-; CHECK-NOT: !5
diff --git a/test/Analysis/Profiling/profiling-tool-chain.ll b/test/Analysis/Profiling/profiling-tool-chain.ll
deleted file mode 100644
index 9135a85dc3ad..000000000000
--- a/test/Analysis/Profiling/profiling-tool-chain.ll
+++ /dev/null
@@ -1,212 +0,0 @@
-; RUN: llvm-as %s -o %t1
-
-; FIXME: The RUX parts of the test are disabled for now, they aren't working on
-; llvm-gcc-x86_64-darwin10-selfhost.
-
-; Test the edge optimal profiling instrumentation.
-; RUN: opt %t1 -insert-optimal-edge-profiling -o %t2
-; RUX: llvm-dis < %t2 | FileCheck --check-prefix=INST %s
-
-; Test the creation, reading and displaying of profile
-; RUX: rm -f llvmprof.out
-; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2
-; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2 1 2
-; RUX: llvm-prof -print-all-code %t1 | FileCheck --check-prefix=PROF %s
-
-; Test the loaded profile also with verifier.
-; RUX  opt %t1 -profile-loader -profile-verifier -o %t3
-
-; Test profile estimator.
-; RUN: opt %t1 -profile-estimator -profile-verifier -o %t3
-
-; PROF:  1.     2/4 oneblock
-; PROF:  2.     2/4 main
-; PROF:  1. 15.7895%    12/76	main() - bb6
-; PROF:  2. 11.8421%     9/76	main() - bb2
-; PROF:  3. 11.8421%     9/76	main() - bb3
-; PROF:  4. 11.8421%     9/76	main() - bb5
-; PROF:  5. 10.5263%     8/76	main() - bb10
-; PROF:  6. 7.89474%     6/76	main() - bb
-; PROF:  7. 7.89474%     6/76	main() - bb9
-; PROF:  8. 3.94737%     3/76	main() - bb1
-; PROF:  9. 3.94737%     3/76	main() - bb7
-; PROF: 10. 3.94737%     3/76	main() - bb8
-; PROF: 11. 2.63158%     2/76	oneblock() - entry
-; PROF: 12. 2.63158%     2/76	main() - entry
-; PROF: 13. 2.63158%     2/76	main() - bb11
-; PROF: 14. 2.63158%     2/76	main() - return
-
-; ModuleID = '<stdin>'
-
-@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
-@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
-@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
-@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
-; INST:@OptEdgeProfCounters
-; INST:[21 x i32]
-; INST:[i32 0,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 0,
-; INST:i32 0,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 0,
-; INST:i32 0,
-; INST:i32 -1,
-; INST:i32 -1,
-; INST:i32 0,
-; INST:i32 -1,
-; INST:i32 -1]
-
-; PROF:;;; %oneblock called 2 times.
-; PROF:;;;
-define void @oneblock() nounwind {
-entry:
-; PROF:entry:
-; PROF:	;;; Basic block executed 2 times.
-  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  ret void
-}
-
-declare i32 @puts(i8*)
-
-; PROF:;;; %main called 2 times.
-; PROF:;;;
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-; PROF:entry:
-; PROF:	;;; Basic block executed 2 times.
-  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
-  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
-  %retval = alloca i32                            ; <i32*> [#uses=2]
-  %j = alloca i32                                 ; <i32*> [#uses=4]
-  %i = alloca i32                                 ; <i32*> [#uses=4]
-  %0 = alloca i32                                 ; <i32*> [#uses=2]
-; INST:call 
-; INST:@llvm_start_opt_edge_profiling
-; INST:@OptEdgeProfCounters
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  store i32 %argc, i32* %argc_addr
-  store i8** %argv, i8*** %argv_addr
-  store i32 0, i32* %i, align 4
-  br label %bb10
-; PROF:	;;; Out-edge counts: [2.000000e+00 -> bb10]
-
-bb:                                               ; preds = %bb10
-; PROF:bb:
-; PROF:	;;; Basic block executed 6 times.
-  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
-  br i1 %2, label %bb1, label %bb8
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb1] [3.000000e+00 -> bb8]
-
-bb1:                                              ; preds = %bb
-; PROF:bb1:
-; PROF:	;;; Basic block executed 3 times.
-  store i32 0, i32* %j, align 4
-  br label %bb6
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb6]
-
-bb2:                                              ; preds = %bb6
-; PROF:bb2:
-; PROF:	;;; Basic block executed 9 times.
-  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
-  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
-  br i1 %5, label %bb3, label %bb4
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb3]
-
-bb3:                                              ; preds = %bb2
-; PROF:bb3:
-; PROF:	;;; Basic block executed 9 times.
-  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb5
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb5]
-
-bb4:                                              ; preds = %bb2
-; PROF:bb4:
-; PROF:	;;; Never executed!
-  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb11
-
-bb5:                                              ; preds = %bb3
-; PROF:bb5:
-; PROF:	;;; Basic block executed 9 times.
-  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
-  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
-  store i32 %10, i32* %j, align 4
-  br label %bb6
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb6]
-
-bb6:                                              ; preds = %bb5, %bb1
-; PROF:bb6:
-; PROF:	;;; Basic block executed 12 times.
-  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
-  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
-  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
-  br i1 %13, label %bb2, label %bb7
-; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb2] [3.000000e+00 -> bb7]
-
-bb7:                                              ; preds = %bb6
-; PROF:bb7:
-; PROF:	;;; Basic block executed 3 times.
-  br label %bb9
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
-
-bb8:                                              ; preds = %bb
-; PROF:bb8:
-; PROF:	;;; Basic block executed 3 times.
-  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
-  br label %bb9
-; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
-
-bb9:                                              ; preds = %bb8, %bb7
-; PROF:bb9:
-; PROF:	;;; Basic block executed 6 times.
-  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
-  store i32 %16, i32* %i, align 4
-  br label %bb10
-; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb10]
-
-bb10:                                             ; preds = %bb9, %entry
-; PROF:bb10:
-; PROF:	;;; Basic block executed 8 times.
-  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
-  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
-  br i1 %18, label %bb, label %bb11
-; INST:br
-; INST:label %bb10.bb11_crit_edge
-; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb] [2.000000e+00 -> bb11]
-
-; INST:bb10.bb11_crit_edge:
-; INST:br
-; INST:label %bb11
-
-bb11:                                             ; preds = %bb10, %bb4
-; PROF:bb11:
-; PROF:	;;; Basic block executed 2 times.
-  call void @oneblock() nounwind
-  store i32 0, i32* %0, align 4
-  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
-  store i32 %19, i32* %retval, align 4
-  br label %return
-; PROF:	;;; Out-edge counts: [2.000000e+00 -> return]
-
-return:                                           ; preds = %bb11
-; PROF:return:
-; PROF:	;;; Basic block executed 2 times.
-  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
-  ret i32 %retval12
-}
diff --git a/test/Analysis/RegionInfo/lit.local.cfg b/test/Analysis/RegionInfo/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/RegionInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
index e0c5583cbb97..b5eb9fc4878d 100644
--- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "Loop %bb: backedge-taken count is 100"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR1533
 
 @array = weak global [101 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
 
+; CHECK: Loop %bb: backedge-taken count is 100
+
 define void @loop(i32 %x) {
 entry:
 	br label %bb
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
index e67e4d00d625..fd09fd5f28c5 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -indvars -adce -simplifycfg -S | grep "icmp s"
+; RUN: opt < %s -indvars -adce -simplifycfg -S | FileCheck %s
 ; PR1598
 
+; CHECK: icmp s
+
 define i32 @f(i32 %a, i32 %b, i32 %x, i32 %y) {
 entry:
 	%tmp3 = icmp eq i32 %a, %b		; <i1> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
index 036abf5b7c16..9e19ccab6eb1 100644
--- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
+++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb: backedge-taken count is (-1 + (-1 \* %x) + %y)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 ; PR1597
 
+; CHECK: Loop %bb: backedge-taken count is (-1 + (-1 * %x) + %y)
+
 define i32 @f(i32 %x, i32 %y) {
 entry:
         %tmp63 = icmp ult i32 %x, %y            ; <i1> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
index a3192b9c01fd..b65a52502447 100644
--- a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
+++ b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 13"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR1706
 
+; CHECK: backedge-taken count is 13
+
 define i32 @f() {
 entry:
 	br label %bb5
diff --git a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
index 514920f0f6fa..a2850d8c4b4f 100644
--- a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -indvars -S | grep printd | grep 1206807378
+; RUN: opt < %s -indvars -S | FileCheck %s
 ; PR1798
 
+; CHECK: printd(i32 1206807378)
+
 declare void @printd(i32)
 
 define i32 @test() {
diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
index d0644f7b3f9d..6ebfa61de41d 100644
--- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %header: backedge-taken count is (0 smax %n)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: Loop %header: backedge-taken count is (0 smax %n)
 
 define void @foo(i32 %n) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
index 52c7985045d0..527fd273cd25 100644
--- a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep umax
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR2003
 
+; CHECK: umax
+
 define i32 @foo(i32 %n) {
 entry:
         br label %header
diff --git a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
index 41734d70f071..9a05d88c4ce7 100644
--- a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
+++ b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 61"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2364
 
+; CHECK: backedge-taken count is 61
+
 define i32 @func_6() nounwind  {
 entry:
 	br label %bb5
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
index 5cf17a210140..dcf8fc9dbdb0 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -1,6 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
 ; PR2261
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'foo'
+; CHECK-NOT: smax
+
 @lut = common global [256 x i8] zeroinitializer, align 32		; <[256 x i8]*> [#uses=1]
 
 define void @foo(i32 %count, i32* %srcptr, i32* %dstptr) nounwind  {
diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
index 195dfaaaee95..c804bd905510 100644
--- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
@@ -1,6 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
 ; PR2070
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'a'
+; CHECK-NOT: smax
+
 define i32 @a(i32 %x) nounwind  {
 entry:
 	icmp sgt i32 %x, 1		; <i1>:0 [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
index 1865c059a998..ad34f6cedf61 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep Unpredictable
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2088
 
+; CHECK: Unpredictable
+
 define void @fun() {
 entry:
         br label %loop
diff --git a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
index cbf200e40f9b..82b9d560425e 100644
--- a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
+++ b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 113"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR2088
 
+; CHECK: backedge-taken count is 113
+
 define void @fun() {
 entry:
         br label %loop
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index c25e4a3b2b23..46c6c59e92c5 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \
-; RUN: grep "Loop %bb: backedge-taken count is (7 + (-1 \* %argc))"
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+
+; CHECK: Loop %bb: backedge-taken count is (7 + (-1 * %argc))
 
 define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
index 56a83438830e..7acf90c7330c 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | grep "Loop %bb: Unpredictable backedge-taken count\."
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: Loop %bb: backedge-taken count is ((-5 + %x) /u 3)
+; CHECK: Loop %bb: max backedge-taken count is 1431655764
+
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
index aaf6770676bd..2b2296a3a24f 100644
--- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
+++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 3"
-; XFAIL: *
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+
+; CHECK: Loop %bb: backedge-taken count is ((999 + (-1 * %x)) /u 3)
+; CHECK: Loop %bb: max backedge-taken count is 334
+
 
 ; This is a tricky testcase for unsigned wrap detection which ScalarEvolution
 ; doesn't yet know how to do.
diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index a1b3b7191658..7a7a64001a69 100644
--- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep "backedge-taken count is 255"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: backedge-taken count is 255
 
 define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
 bb1.thread:
diff --git a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
index bb149193a0f4..95aa1fc85e20 100644
--- a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \
-; RUN: grep "(((-1 * %i0) + (100005 smax %i0)) /u 5)"
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 |  FileCheck %s
 ; XFAIL: *
 
+; CHECK: (((-1 * %i0) + (100005 smax %i0)) /u 5)
+
 define i32 @foo0(i32 %i0) nounwind {
 entry:
 	br label %bb1
diff --git a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
index 70006260cb9e..70588bc0574e 100644
--- a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
+++ b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 5"
-; XFAIL: *
+; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+
+; CHECK: /u 5
 
 define i8 @foo0(i8 %i0) nounwind {
 entry:
diff --git a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
index 82f2608e5747..f19d18c72e69 100644
--- a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
+++ b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -1,6 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution | not grep "/u -1"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR3275
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'func_15'
+; CHECK-NOT: /u -1
+
 @g_16 = external global i16		; <i16*> [#uses=3]
 @.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
 
diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
index ebd9f7377d0c..3dacfbb0a8d4 100644
--- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
+++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -1,35 +1,53 @@
 ; RUN: opt < %s -analyze -scalar-evolution | grep "(trunc i" | not grep ext
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test1(i8 %x) {
   %A = sext i8 %x to i32
   %B = trunc i32 %A to i16
   ret i16 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test2'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i8 @test2(i16 %x) {
   %A = sext i16 %x to i32
   %B = trunc i32 %A to i8
   ret i8 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test3'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test3(i16 %x) {
   %A = sext i16 %x to i32
   %B = trunc i32 %A to i16
   ret i16 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test4'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test4(i8 %x) {
   %A = zext i8 %x to i32
   %B = trunc i32 %A to i16
   ret i16 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test5'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i8 @test5(i16 %x) {
   %A = zext i16 %x to i32
   %B = trunc i32 %A to i8
   ret i8 %B
 }
 
+; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test6'
+; CHECK-NOT: (trunc i{{.*}}ext
+
 define i16 @test6(i16 %x) {
   %A = zext i16 %x to i32
   %B = trunc i32 %A to i16
diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
index 8a780431345e..5d1502da179f 100644
--- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
+++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep "count is 2"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR3171
+
+; CHECK: count is 2
+
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 	%struct.Foo = type { i32 }
diff --git a/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
index 1600d5f05a18..973dd7d6dd0d 100644
--- a/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
@@ -30,4 +30,4 @@ for.inc9:
 if.then:
   ret i8 0
 
-}
-\ No newline at end of file
+}
diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index e946d7a64bc9..2cb8c5bf46ff 100644
--- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 
 ; PR1101
 
diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll
index 06f4a8582fa1..404ab91e269d 100644
--- a/test/Analysis/ScalarEvolution/and-xor.ll
+++ b/test/Analysis/ScalarEvolution/and-xor.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:   | grep "\-->  (zext" | count 2
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: -->  (zext
+; CHECK: -->  (zext
+; CHECK-NOT: -->  (zext
 
 define i32 @foo(i32 %x) {
   %n = and i32 %x, 255
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index 3d15c787fcfe..8abb43074c5e 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb3: backedge-taken count is (-1 + %n)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: Loop %bb3: backedge-taken count is (-1 + %n)
 
 ; We don't want to use a max in the trip count expression in
 ; this testcase.
diff --git a/test/Analysis/ScalarEvolution/div-overflow.ll b/test/Analysis/ScalarEvolution/div-overflow.ll
index 28467975606f..aca964ae62b6 100644
--- a/test/Analysis/ScalarEvolution/div-overflow.ll
+++ b/test/Analysis/ScalarEvolution/div-overflow.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep "\-->  ((-128 \* %a) /u -128)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+
+; CHECK: -->  ((-128 * %a) /u -128)
 
 ; Don't let ScalarEvolution fold this div away.
 
diff --git a/test/Analysis/ScalarEvolution/do-loop.ll b/test/Analysis/ScalarEvolution/do-loop.ll
index 6e3295a920b0..e35ea7d57e3a 100644
--- a/test/Analysis/ScalarEvolution/do-loop.ll
+++ b/test/Analysis/ScalarEvolution/do-loop.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep smax
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR1614
 
+; CHECK: smax
+
 define i32 @f(i32 %x, i32 %y) {
 entry:
 	br label %bb
diff --git a/test/Analysis/ScalarEvolution/lit.local.cfg b/test/Analysis/ScalarEvolution/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Analysis/ScalarEvolution/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll b/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll
new file mode 100644
index 000000000000..aa5254c758bf
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to understand the loop and eliminate the casts.
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-p4:64:64:64-n16:32:64"
+
+; CHECK:  {%d,+,4}<%bb>		Exits: ((4 * (trunc i32 (-1 + %n) to i16)) + %d)
+
+
+define void @foo(i32 addrspace(1)* nocapture %d, i32 %n) nounwind {
+; CHECK: @foo
+entry:
+	%0 = icmp sgt i32 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.02 = phi i32 [ %5, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%p.01 = phi i8 [ %4, %bb1 ], [ -1, %bb.nph ]		; <i8> [#uses=2]
+	%1 = sext i8 %p.01 to i32		; <i32> [#uses=1]
+	%2 = sext i32 %i.02 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32 addrspace(1)* %d, i64 %2		; <i32*> [#uses=1]
+	store i32 %1, i32 addrspace(1)* %3, align 4
+	%4 = add i8 %p.01, 1		; <i8> [#uses=1]
+	%5 = add i32 %i.02, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%6 = icmp slt i32 %5, %n		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
+
+define void @test(i8 addrspace(1)* %a, i32 %n) nounwind {
+; CHECK: @test
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %tmp = zext i32 %n to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
+  %arrayidx = getelementptr i8 addrspace(1)* %a, i64 %indvar
+  store i8 0, i8 addrspace(1)* %arrayidx, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp ne i64 %indvar.next, %tmp
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @test
+; CHECK-NEXT: backedge-taken count is
+; CHECK-NEXT: max backedge-taken count is -1
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index 659cf4f8da97..05992eadbac0 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -62,11 +62,11 @@ for.body.lr.ph.i.i:                               ; preds = %entry
 for.body.i.i:                                     ; preds = %for.body.i.i, %for.body.lr.ph.i.i
   %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ]
 ; CHECK: %__first.addr.02.i.i
-; CHECK-NEXT: -->  {%begin,+,4}<nw><%for.body.i.i>
+; CHECK-NEXT: -->  {%begin,+,4}<nuw><%for.body.i.i>
   store i32 0, i32* %__first.addr.02.i.i, align 4
   %ptrincdec.i.i = getelementptr inbounds i32* %__first.addr.02.i.i, i64 1
 ; CHECK: %ptrincdec.i.i
-; CHECK-NEXT: -->  {(4 + %begin),+,4}<nw><%for.body.i.i>
+; CHECK-NEXT: -->  {(4 + %begin),+,4}<nuw><%for.body.i.i>
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i
 
@@ -122,3 +122,39 @@ exit:
   %result = phi i32 [ %a, %entry ], [ %tmp2, %greater ]
   ret i32 %result
 }
+
+; TODO: This could fold down to '1'
+; CHECK-LABEL: PR12375
+; CHECK: -->  {(4 + %arg),+,4}<nuw><%bb1>		Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax (8 + %arg)<nsw>)) /u 4)) + %arg)
+define i32 @PR12375(i32* readnone %arg) {
+bb:
+  %tmp = getelementptr inbounds i32* %arg, i64 2
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i32* [ %arg, %bb ], [ %tmp5, %bb1 ]
+  %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ]
+  %tmp4 = add nsw i32 %tmp3, 1
+  %tmp5 = getelementptr inbounds i32* %tmp2, i64 1
+  %tmp6 = icmp ult i32* %tmp5, %tmp
+  br i1 %tmp6, label %bb1, label %bb7
+
+bb7:                                              ; preds = %bb1
+  ret i32 %tmp4
+}
+
+; CHECK-LABEL: PR12376
+; CHECK: -->  {(4 + %arg),+,4}<nuw><%bb2>		Exits: (4 + (4 * ((3 + (-1 * %arg) + (%arg umax %arg1)) /u 4)) + %arg)
+define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1)  {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb
+  %tmp = phi i32* [ %arg, %bb ], [ %tmp4, %bb2 ]
+  %tmp3 = icmp ult i32* %tmp, %arg1
+  %tmp4 = getelementptr inbounds i32* %tmp, i64 1
+  br i1 %tmp3, label %bb2, label %bb5
+
+bb5:                                              ; preds = %bb2
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/smax.ll b/test/Analysis/ScalarEvolution/smax.ll
index eceb4298fd50..122e9e47e56f 100644
--- a/test/Analysis/ScalarEvolution/smax.ll
+++ b/test/Analysis/ScalarEvolution/smax.ll
@@ -1,8 +1,10 @@
-; RUN: opt < %s -analyze -scalar-evolution | grep smax | count 2
-; RUN: opt < %s -analyze -scalar-evolution | grep \
-; RUN:     "%. smax %. smax %."
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR1614
 
+; CHECK: -->  (%a smax %b)
+; CHECK: -->  (%a smax %b smax %c)
+; CHECK-NOT: smax
+
 define i32 @x(i32 %a, i32 %b, i32 %c) {
   %A = icmp sgt i32 %a, %b
   %B = select i1 %A, i32 %a, i32 %b
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index 94f6882c0c91..f89125aeb29b 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 10000"
+; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
 ; PR1101
 
 @A = weak global [1000 x i32] zeroinitializer, align 32         
 
+; CHECK: backedge-taken count is 10000
 
 define void @test(i32 %N) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/trip-count10.ll b/test/Analysis/ScalarEvolution/trip-count10.ll
index 546e1dc7d8f7..ead80b950409 100644
--- a/test/Analysis/ScalarEvolution/trip-count10.ll
+++ b/test/Analysis/ScalarEvolution/trip-count10.ll
@@ -3,7 +3,7 @@
 ; Trip counts with trivial exit conditions.
 
 ; CHECK: Determining loop execution counts for: @a
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
 ; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 
 ; CHECK: Determining loop execution counts for: @b
@@ -15,8 +15,8 @@
 ; CHECK: Loop %loop: max backedge-taken count is false
 
 ; CHECK: Determining loop execution counts for: @d
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 
 define void @a(i64 %n) nounwind {
 entry:
@@ -124,3 +124,28 @@ loop:
 return:
   ret void
 }
+
+; PR16130: Loop exit depends on an 'or' expression.
+; One side of the expression test against a value that will be skipped.
+; We can't assume undefined behavior just because we have an NSW flag.
+;
+; CHECK: Determining loop execution counts for: @exit_orcond_nsw
+; CHECK: Loop %for.body.i: Unpredictable backedge-taken count.
+; CHECK: Loop %for.body.i: max backedge-taken count is 1
+define void @exit_orcond_nsw(i32 *%a) nounwind {
+entry:
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %entry
+  %b.01.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  %tobool.i = icmp ne i32 %b.01.i, 0
+  %add.i = add nsw i32 %b.01.i, 8
+  %cmp.i = icmp eq i32 %add.i, 13
+  %or.cond = or i1 %tobool.i, %cmp.i
+  br i1 %or.cond, label %exit, label %for.body.i
+
+exit:                                     ; preds = %for.body.i
+  %b.01.i.lcssa = phi i32 [ %b.01.i, %for.body.i ]
+  store i32 %b.01.i.lcssa, i32* %a, align 4
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count11.ll b/test/Analysis/ScalarEvolution/trip-count11.ll
index 71915037ec8b..e14af08e33f8 100644
--- a/test/Analysis/ScalarEvolution/trip-count11.ll
+++ b/test/Analysis/ScalarEvolution/trip-count11.ll
@@ -1,9 +1,11 @@
 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 @foo.a = internal constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 16
+@foo.a_as1 = internal addrspace(1) constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 16
+
 
 define i32 @foo() nounwind uwtable noinline {
 entry:
@@ -27,3 +29,27 @@ for.inc:                                          ; preds = %for.cond
 for.end:                                          ; preds = %for.cond
   ret i32 %sum.0
 }
+
+define i32 @foo_as1() nounwind uwtable noinline {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+; CHECK: --> %sum.0 Exits: 28
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i32 %i.0, 8
+  br i1 %cmp, label %for.inc, label %for.end
+
+for.inc:                                          ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [8 x i32] addrspace(1)* @foo.a_as1, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 %sum.0
+}
+
diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll
index d84e99f6e7ca..e76488abfca5 100644
--- a/test/Analysis/ScalarEvolution/trip-count2.ll
+++ b/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution | \
-; RUN:   grep "backedge-taken count is 4"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR1101
 
 @A = weak global [1000 x i32] zeroinitializer, align 32         
 
+; CHECK: backedge-taken count is 4
 
 define void @test(i32 %N) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 0cb6c952b893..850e035e7c6b 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -1,10 +1,12 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep "Loop %bb3\.i: Unpredictable backedge-taken count\."
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
 ; be teaching it how to use a more elaborate trip count computation.
 
+; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64)
+; CHECK: Loop %bb3.i: max backedge-taken count is 33554431
+
 %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
 %struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
 %struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
diff --git a/test/Analysis/ScalarEvolution/trip-count4.ll b/test/Analysis/ScalarEvolution/trip-count4.ll
index c02ae145268f..b7184a48fe85 100644
--- a/test/Analysis/ScalarEvolution/trip-count4.ll
+++ b/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   | grep "sext.*trunc.*Exits: 11"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
 ; ScalarEvolution should be able to compute a loop exit value for %indvar.i8.
 
+; CHECK: sext{{.*}}trunc{{.*}}Exits: 11
+
 define void @another_count_down_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
diff --git a/test/Analysis/ScalarEvolution/trip-count6.ll b/test/Analysis/ScalarEvolution/trip-count6.ll
index 882f5526da4c..0f394a09d156 100644
--- a/test/Analysis/ScalarEvolution/trip-count6.ll
+++ b/test/Analysis/ScalarEvolution/trip-count6.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | grep "max backedge-taken count is 1$"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
 @mode_table = global [4 x i32] zeroinitializer          ; <[4 x i32]*> [#uses=1]
 
+; CHECK: max backedge-taken count is 1{{$}}
+
 define i8 @f() {
 entry:
   tail call i32 @fegetround( )          ; <i32>:0 [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/trip-count7.ll b/test/Analysis/ScalarEvolution/trip-count7.ll
index 2bcb9e92abcc..d01a18a468f0 100644
--- a/test/Analysis/ScalarEvolution/trip-count7.ll
+++ b/test/Analysis/ScalarEvolution/trip-count7.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   | grep "Loop %bb7.i: Unpredictable backedge-taken count\."
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
+; CHECK: Loop %bb7.i: Unpredictable backedge-taken count.
+
 	%struct.complex = type { float, float }
 	%struct.element = type { i32, i32 }
 	%struct.node = type { %struct.node*, %struct.node*, i32 }
diff --git a/test/Analysis/ScalarEvolution/trip-count8.ll b/test/Analysis/ScalarEvolution/trip-count8.ll
index 005162b79212..a1777bc969c9 100644
--- a/test/Analysis/ScalarEvolution/trip-count8.ll
+++ b/test/Analysis/ScalarEvolution/trip-count8.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | grep "Loop %for\.body: backedge-taken count is (-1 + [%]ecx)"
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR4599
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
+; CHECK: Loop %for.body: backedge-taken count is (-1 + {{%?}}ecx)
+
 define i32 @foo(i32 %ecx) nounwind {
 entry:
 	%cmp2 = icmp eq i32 %ecx, 0		; <i1> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/trip-count9.ll b/test/Analysis/ScalarEvolution/trip-count9.ll
index 9180f2b8dd7f..9a080b34743f 100644
--- a/test/Analysis/ScalarEvolution/trip-count9.ll
+++ b/test/Analysis/ScalarEvolution/trip-count9.ll
@@ -25,8 +25,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -57,8 +57,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -89,8 +89,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable backedge-taken count.
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
 define void @startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -120,12 +120,18 @@ exit:
   ret void
 }
 
-; Be careful with this one. If %n is INT4_MAX, %i.next will wrap. The nsw bit
-; says that the result is undefined, but ScalarEvolution must respect that
-; subsequent passes may result the undefined behavior in predictable ways.
+; If %n is INT4_MAX, %i.next will wrap. The nsw bit says that the
+; result is undefined. Therefore, after the loop's second iteration,
+; we are free to assume that the loop exits. This is valid because:
+; (a) %i.next is a poison value after the second iteration, which can
+; also be considered an undef value.
+; (b) the return instruction enacts a side effect that is control
+; dependent on the poison value.
+;
+; CHECK-LABEL: nsw_step2
 ; CHECK: Determining loop execution counts for: @nsw_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -139,6 +145,7 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: nsw_start1
 ; CHECK: Determining loop execution counts for: @nsw_start1
 ; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax %n))
 ; CHECK: Loop %loop: max backedge-taken count is 5
@@ -156,8 +163,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-2 + (3 smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
 define void @nsw_start1_step2(i4 %n) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -188,8 +195,8 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
-; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 7
 define void @nsw_startx_step2(i4 %n, i4 %x) {
 entry:
   %s = icmp sgt i4 %n, 0
@@ -221,7 +228,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (2 * %n)) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_step2(i4 %n) {
 entry:
@@ -255,7 +262,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_start1_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-2 + (3 smax (2 * %n))) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 2
 define void @even_start1_step2(i4 %n) {
 entry:
@@ -273,7 +280,7 @@ exit:
 
 ; CHECK: Determining loop execution counts for: @even_startx
 ; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax (2 * %n)))
-; CHECK: Loop %loop: max backedge-taken count is -1
+; CHECK: Loop %loop: max backedge-taken count is -2
 define void @even_startx(i4 %n, i4 %x) {
 entry:
   %m = shl i4 %n, 1
@@ -289,7 +296,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_startx_step2
-; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax (2 * %n))) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 7
 define void @even_startx_step2(i4 %n, i4 %x) {
 entry:
@@ -375,7 +382,7 @@ exit:
 
 ; CHECK: Determining loop execution counts for: @even_nsw_startx
 ; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax (2 * %n)))
-; CHECK: Loop %loop: max backedge-taken count is -1
+; CHECK: Loop %loop: max backedge-taken count is -2
 define void @even_nsw_startx(i4 %n, i4 %x) {
 entry:
   %m = shl i4 %n, 1
diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll
index 4ab2f39a2810..2616ea928a49 100644
--- a/test/Analysis/ScalarEvolution/xor-and.ll
+++ b/test/Analysis/ScalarEvolution/xor-and.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:   | grep "\-->  (zext i4 (-8 + (trunc i64 (8 \* %x) to i4)) to i64)"
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 
 ; ScalarEvolution shouldn't try to analyze %z into something like
 ;   -->  (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64)
 
+; CHECK: -->  (zext i4 (-8 + (trunc i64 (8 * %x) to i4)) to i64)
+
 define i64 @foo(i64 %x) {
   %a = shl i64 %x, 3
   %t = and i64 %a, 8
diff --git a/test/Analysis/ScalarEvolution/zext-wrap.ll b/test/Analysis/ScalarEvolution/zext-wrap.ll
index 38d15ffbd880..104ed41010af 100644
--- a/test/Analysis/ScalarEvolution/zext-wrap.ll
+++ b/test/Analysis/ScalarEvolution/zext-wrap.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:  | FileCheck %s
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 ; PR4569
 
 define i16 @main() nounwind {
diff --git a/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll b/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
new file mode 100644
index 000000000000..905113995c30
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -tbaa -gvn -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%structA = type { %structB }
+%structB = type { i32*, %classT }
+%classT = type { %classO, %classJ*, i8 }
+%classO = type { i32 }
+%classJ = type { i8 }
+%classA = type { %classB }
+%classB = type { i8 }
+%classC = type { %classD, %structA }
+%classD = type { %structA* }
+
+; Function Attrs: ssp uwtable
+define %structA** @test(%classA* %this, i32** %p1) #0 align 2 {
+entry:
+; CHECK-LABEL: @test
+; CHECK: load i32** %p1, align 8, !tbaa
+; CHECK: load i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa
+; CHECK: call void @callee
+  %0 = load i32** %p1, align 8, !tbaa !1
+  %1 = load i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa !5
+  call void @callee(i32* %0, i32* %1)
+  unreachable
+}
+
+declare void @callee(i32*, i32*) #1
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4"}
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{metadata !6, metadata !2, i64 8}
+!6 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11RINS_1FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS2_S3_EEEEE1GEPSD_EE", metadata !7, i64 8}
+!7 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS1_S2_EEEEE1GE", metadata !8, i64 0}
+!8 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11DIKPi8TreeIterEE", metadata !2, i64 0, metadata !9, i64 8}
+!9 = metadata !{metadata !"_ZTS8TreeIter", metadata !2, i64 8, metadata !10, i64 16}
+!10 = metadata !{metadata !"bool", metadata !3, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
index d59e3924acd3..76a88c859a6b 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
@@ -47,16 +47,21 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
 ; Root note.
 !0 = metadata !{ }
 ; Some type.
-!1 = metadata !{ metadata !"foo", metadata !0 }
+!1 = metadata !{metadata !7, metadata !7, i64 0}
 ; Some other non-aliasing type.
-!2 = metadata !{ metadata !"bar", metadata !0 }
+!2 = metadata !{metadata !8, metadata !8, i64 0}
 
 ; Some type.
-!3 = metadata !{ metadata !"foo", metadata !0 }
+!3 = metadata !{metadata !9, metadata !9, i64 0}
 ; Some type in a different type system.
-!4 = metadata !{ metadata !"bar", metadata !"different" }
+!4 = metadata !{metadata !10, metadata !10, i64 0}
 
 ; Invariant memory.
-!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+!5 = metadata !{metadata !11, metadata !11, i64 0, i1 1}
 ; Not invariant memory.
-!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
+!6 = metadata !{metadata !11, metadata !11, i64 0, i1 0}
+!7 = metadata !{ metadata !"foo", metadata !0 }
+!8 = metadata !{ metadata !"bar", metadata !0 }
+!9 = metadata !{ metadata !"foo", metadata !0 }
+!10 = metadata !{ metadata !"bar", metadata !"different" }
+!11 = metadata !{ metadata !"qux", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
index 3b5211e5999d..14bbeac14d69 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
@@ -1,7 +1,9 @@
-; RUN: opt < %s -tbaa -basicaa -argpromotion -mem2reg -S | not grep alloca
+; RUN: opt < %s -tbaa -basicaa -argpromotion -mem2reg -S | FileCheck %s
 
 target datalayout = "E-p:64:64:64"
 
+; CHECK: test
+; CHECK-NOT: alloca
 define internal i32 @test(i32* %X, i32* %Y, i32* %Q) {
   store i32 77, i32* %Q, !tbaa !2
   %A = load i32* %X, !tbaa !1
@@ -10,6 +12,8 @@ define internal i32 @test(i32* %X, i32* %Y, i32* %Q) {
   ret i32 %C
 }
 
+; CHECK: caller
+; CHECK-NOT: alloca
 define internal i32 @caller(i32* %B, i32* %Q) {
   %A = alloca i32
   store i32 78, i32* %Q, !tbaa !2
@@ -18,6 +22,8 @@ define internal i32 @caller(i32* %B, i32* %Q) {
   ret i32 %C
 }
 
+; CHECK: callercaller
+; CHECK-NOT: alloca
 define i32 @callercaller(i32* %Q) {
   %B = alloca i32
   store i32 2, i32* %B, !tbaa !1
@@ -27,5 +33,7 @@ define i32 @callercaller(i32* %Q) {
 }
 
 !0 = metadata !{metadata !"test"}
-!1 = metadata !{metadata !"green", metadata !0}
-!2 = metadata !{metadata !"blue", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"green", metadata !0}
+!4 = metadata !{metadata !"blue", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
index 6b44eb638423..bcf1f2c5275d 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dse.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -51,16 +51,21 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
 ; Root note.
 !0 = metadata !{ }
 ; Some type.
-!1 = metadata !{ metadata !"foo", metadata !0 }
+!1 = metadata !{metadata !7, metadata !7, i64 0}
 ; Some other non-aliasing type.
-!2 = metadata !{ metadata !"bar", metadata !0 }
+!2 = metadata !{metadata !8, metadata !8, i64 0}
 
 ; Some type.
-!3 = metadata !{ metadata !"foo", metadata !0 }
+!3 = metadata !{metadata !9, metadata !9, i64 0}
 ; Some type in a different type system.
-!4 = metadata !{ metadata !"bar", metadata !"different" }
+!4 = metadata !{metadata !10, metadata !10, i64 0}
 
 ; Invariant memory.
-!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+!5 = metadata !{metadata !11, metadata !11, i64 0, i1 1}
 ; Not invariant memory.
-!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
+!6 = metadata !{metadata !11, metadata !11, i64 0, i1 0}
+!7 = metadata !{ metadata !"foo", metadata !0 }
+!8 = metadata !{ metadata !"bar", metadata !0 }
+!9 = metadata !{ metadata !"foo", metadata !0 }
+!10 = metadata !{ metadata !"bar", metadata !"different" }
+!11 = metadata !{ metadata !"qux", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
index 52e394b2d09c..4dc40739edfb 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -13,7 +13,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK: for.end:
 ; CHECK:   %arrayidx31 = getelementptr inbounds %union.vector_t* %t, i64 0, i32 0, i64 1
-; CHECK:   %tmp32 = load i64* %arrayidx31, align 8, !tbaa !3
+; CHECK:   %tmp32 = load i64* %arrayidx31, align 8, !tbaa [[TAG:!.*]]
 
 define void @vrlh(%union.vector_t* %va, %union.vector_t* %vb, %union.vector_t* %vd) nounwind {
 entry:
@@ -123,9 +123,15 @@ for.end:                                          ; preds = %for.body
   ret float %tmp10
 }
 
-!0 = metadata !{metadata !"short", metadata !1}
+; CHECK: [[TAG]] = metadata !{metadata [[TYPE_LL:!.*]], metadata [[TYPE_LL]], i64 0}
+; CHECK: [[TYPE_LL]] = metadata !{metadata !"long long", metadata {{!.*}}}
+!0 = metadata !{metadata !6, metadata !6, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"long long", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
-!5 = metadata !{metadata !"float", metadata !1}
+!3 = metadata !{metadata !7, metadata !7, i64 0}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !9, metadata !9, i64 0}
+!6 = metadata !{metadata !"short", metadata !1}
+!7 = metadata !{metadata !"long long", metadata !1}
+!8 = metadata !{metadata !"int", metadata !1}
+!9 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index c6cc26a24106..e9fb9418e704 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -49,7 +49,7 @@ define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind {
   ret void
 }
 
-; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture %q, i64 %n) #1 {
+; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture readonly %q, i64 %n) #1 {
 define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2
   ret void
@@ -80,6 +80,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
 !0 = metadata !{ }
 
 ; Invariant memory.
-!1 = metadata !{ metadata !"foo", metadata !0, i1 1 }
+!1 = metadata !{metadata !3, metadata !3, i64 0, i1 1 }
 ; Not invariant memory.
-!2 = metadata !{ metadata !"foo", metadata !0, i1 0 }
+!2 = metadata !{metadata !3, metadata !3, i64 0, i1 0 }
+!3 = metadata !{ metadata !"foo", metadata !0 }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
index eceaa2cf02d3..90e1abbb673a 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -85,7 +85,11 @@ if.else:
 }
 
 !0 = metadata !{}
-!1 = metadata !{metadata !"red", metadata !0}
-!2 = metadata !{metadata !"blu", metadata !0}
-!3 = metadata !{metadata !"outer space"}
-!4 = metadata !{metadata !"brick red", metadata !1}
+!1 = metadata !{metadata !5, metadata !5, i64 0}
+!2 = metadata !{metadata !6, metadata !6, i64 0}
+!3 = metadata !{metadata !7, metadata !7, i64 0}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !"red", metadata !0}
+!6 = metadata !{metadata !"blu", metadata !0}
+!7 = metadata !{metadata !"outer space"}
+!8 = metadata !{metadata !"brick red", metadata !5}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 6f1c22da3ac5..93b8e503b855 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -26,5 +26,7 @@ declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !0 = metadata !{metadata !"tbaa root", null}
-!1 = metadata !{metadata !"A", metadata !0}
-!2 = metadata !{metadata !"B", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"A", metadata !0}
+!4 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
index 12a9c1dc5649..e45fc85478d3 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/licm.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -30,8 +30,8 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 !0 = metadata !{metadata !"root", null}
-!1 = metadata !{metadata !"pointer", metadata !0}
-!2 = metadata !{metadata !"double", metadata !0}
+!1 = metadata !{metadata !6, metadata !6, i64 0}
+!2 = metadata !{metadata !7, metadata !7, i64 0}
 
 ; LICM shouldn't hoist anything here.
 
@@ -56,6 +56,10 @@ loop:
   br label %loop
 }
 
-!3 = metadata !{metadata !"pointer", metadata !4}
-!4 = metadata !{metadata !"char", metadata !5}
-!5 = metadata !{metadata !"root", null}
+!3 = metadata !{metadata !"pointer", metadata !8}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !9, metadata !9, i64 0}
+!6 = metadata !{metadata !"pointer", metadata !0}
+!7 = metadata !{metadata !"double", metadata !0}
+!8 = metadata !{metadata !"char", metadata !9}
+!9 = metadata !{metadata !"root", null}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
index c2407dfd4c89..6fd6eaca012e 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64"
 
 ; CHECK: @foo
 ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0
-; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa !2
+; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]]
 ; CHECK-NEXT: ret void
 define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2
@@ -18,6 +18,10 @@ define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
+; CHECK [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
+; CHECK [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
 !0 = metadata !{metadata !"tbaa root", null}
-!1 = metadata !{metadata !"A", metadata !0}
-!2 = metadata !{metadata !"B", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"A", metadata !0}
+!4 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
index f1edb4482cf1..609e87c2313f 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
@@ -18,7 +18,7 @@
 
 ; Basic AA says MayAlias, TBAA says NoAlias
 ; CHECK: MayAlias: i64* %i5, i8** %p
-; CHECK: NoAlias: store i64 %conv, i64* %i5, align 8, !tbaa !4 <->   store i8* null, i8** %p, align 8, !tbaa !3
+; CHECK: NoAlias: store i64 %conv, i64* %i5, align 8, !tbaa !6 <->   store i8* null, i8** %p, align 8, !tbaa !9
 
 %struct.Foo = type { i64 }
 %struct.Bar = type { i8* }
@@ -32,10 +32,10 @@ entry:
   store i32 %n, i32* %n.addr, align 4, !tbaa !0
   %call = call noalias i8* @_Znwm(i64 8)
   %0 = bitcast i8* %call to %struct.Foo*
-  store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !3
-  %1 = load %struct.Foo** %f, align 8, !tbaa !3
+  store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !4
+  %1 = load %struct.Foo** %f, align 8, !tbaa !4
   %i = getelementptr inbounds %struct.Foo* %1, i32 0, i32 0
-  store i64 1, i64* %i, align 8, !tbaa !4
+  store i64 1, i64* %i, align 8, !tbaa !6
   store i32 0, i32* %i1, align 4, !tbaa !0
   br label %for.cond
 
@@ -46,7 +46,7 @@ for.cond:
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:
-  %4 = load %struct.Foo** %f, align 8, !tbaa !3
+  %4 = load %struct.Foo** %f, align 8, !tbaa !4
   %5 = bitcast %struct.Foo* %4 to i8*
   %new.isnull = icmp eq i8* %5, null
   br i1 %new.isnull, label %new.cont, label %new.notnull
@@ -57,11 +57,11 @@ new.notnull:
 
 new.cont:
   %7 = phi %struct.Bar* [ %6, %new.notnull ], [ null, %for.body ]
-  store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !3
-  %8 = load %struct.Bar** %b, align 8, !tbaa !3
+  store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !4
+  %8 = load %struct.Bar** %b, align 8, !tbaa !4
   %p = getelementptr inbounds %struct.Bar* %8, i32 0, i32 0
-  store i8* null, i8** %p, align 8, !tbaa !3
-  %9 = load %struct.Foo** %f, align 8, !tbaa !3
+  store i8* null, i8** %p, align 8, !tbaa !9
+  %9 = load %struct.Foo** %f, align 8, !tbaa !4
   %10 = bitcast %struct.Foo* %9 to i8*
   %new.isnull2 = icmp eq i8* %10, null
   br i1 %new.isnull2, label %new.cont4, label %new.notnull3
@@ -72,12 +72,12 @@ new.notnull3:
 
 new.cont4:
   %12 = phi %struct.Foo* [ %11, %new.notnull3 ], [ null, %new.cont ]
-  store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !3
+  store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !4
   %13 = load i32* %i1, align 4, !tbaa !0
   %conv = sext i32 %13 to i64
-  %14 = load %struct.Foo** %f, align 8, !tbaa !3
+  %14 = load %struct.Foo** %f, align 8, !tbaa !4
   %i5 = getelementptr inbounds %struct.Foo* %14, i32 0, i32 0
-  store i64 %conv, i64* %i5, align 8, !tbaa !4
+  store i64 %conv, i64* %i5, align 8, !tbaa !6
   br label %for.inc
 
 for.inc:
@@ -87,9 +87,9 @@ for.inc:
   br label %for.cond
 
 for.end:
-  %16 = load %struct.Foo** %f, align 8, !tbaa !3
+  %16 = load %struct.Foo** %f, align 8, !tbaa !4
   %i6 = getelementptr inbounds %struct.Foo* %16, i32 0, i32 0
-  %17 = load i64* %i6, align 8, !tbaa !4
+  %17 = load i64* %i6, align 8, !tbaa !6
   ret i64 %17
 }
 
@@ -97,8 +97,14 @@ declare noalias i8* @_Znwm(i64)
 
 attributes #0 = { nounwind }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
-!4 = metadata !{metadata !"long", metadata !1}
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"int", metadata !2, i64 0}
+!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
+!3 = metadata !{metadata !"Simple C/C++ TBAA"}
+!4 = metadata !{metadata !5, metadata !5, i64 0}
+!5 = metadata !{metadata !"any pointer", metadata !2, i64 0}
+!6 = metadata !{metadata !7, metadata !8, i64 0}
+!7 = metadata !{metadata !"_ZTS3Foo", metadata !8, i64 0}
+!8 = metadata !{metadata !"long", metadata !2, i64 0}
+!9 = metadata !{metadata !10, metadata !5, i64 0}
+!10 = metadata !{metadata !"_ZTS3Bar", metadata !5, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
index 47cb5f2256d3..b219ef19284e 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -39,8 +39,12 @@ entry:
   ret i64 %tmp3
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !2, metadata !2, i64 0}
 !1 = metadata !{metadata !"simple"}
-!3 = metadata !{metadata !"float", metadata !1}
-!4 = metadata !{metadata !"long", metadata !1}
-!5 = metadata !{metadata !"small", metadata !1}
+!2 = metadata !{metadata !"int", metadata !1}
+!3 = metadata !{metadata !6, metadata !6, i64 0}
+!4 = metadata !{metadata !7, metadata !7, i64 0}
+!5 = metadata !{metadata !8, metadata !8, i64 0}
+!6 = metadata !{metadata !"float", metadata !1}
+!7 = metadata !{metadata !"long", metadata !1}
+!8 = metadata !{metadata !"small", metadata !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/sink.ll b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
index fd32d6a7a58e..726da6ce1e81 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/sink.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -tbaa -sink -S < %s | FileCheck %s
 
 ; CHECK: a:
-; CHECK:   %f = load float* %p, !tbaa !2
+; CHECK:   %f = load float* %p, !tbaa [[TAGA:!.*]]
 ; CHECK:   store float %f, float* %q
 
 define void @foo(float* %p, i1 %c, float* %q, float* %r) {
@@ -15,6 +15,10 @@ b:
   ret void
 }
 
-!0 = metadata !{metadata !"A", metadata !2}
-!1 = metadata !{metadata !"B", metadata !2}
+; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
+; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
+!1 = metadata !{metadata !4, metadata !4, i64 0}
 !2 = metadata !{metadata !"test"}
+!3 = metadata !{metadata !"A", metadata !2}
+!4 = metadata !{metadata !"B", metadata !2}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
index ee527639b3d1..0cd5c301842a 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
-; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -gvn -S | FileCheck %s --check-prefix=OPT
+; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -tbaa -basicaa -gvn -S | FileCheck %s --check-prefix=OPT
 ; Generated from clang/test/CodeGen/tbaa.cpp with "-O1 -struct-path-tbaa -disable-llvm-optzns".
 
 %struct.StructA = type { i16, i32, i16, i32 }
diff --git a/test/Archive/README.txt b/test/Archive/README.txt
deleted file mode 100644
index 6810befc5857..000000000000
--- a/test/Archive/README.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-test/Regression/Archive
-=======================
-
-This directory contains various tests of llvm-ar and llvm-ranlib to ensure 
-compatibility reading other ar(1) formats. It also provides a basic
-functionality test for these tools.
-
-There are four archives accompanying these tests: 
-
-GNU.a    - constructed on Linux with GNU ar
-MacOSX.a - constructed on Mac OS X with its native BSD4.4 ar
-SVR4.a   - constructed on Solaris with /usr/ccs/bin/ar
-xpg4.a   - constructed on Solaris with /usr/xpg4/bin/ar
-
-Each type of test is run on each of these archive files.  These archives each 
-contain four members:
-
-oddlen - a member with an odd lengthed name and content
-evenlen - a member with an even lengthed name and content
-IsNAN.o - a Linux native binary
-very_long_bytecode_file_name.bc - LLVM bytecode file with really long name
-
-These files test different aspects of the archiver that should cause failures
-in llvm-ar if regressions are introduced.
diff --git a/test/Archive/check_binary_output.ll b/test/Archive/check_binary_output.ll
deleted file mode 100644
index 60ab5caac453..000000000000
--- a/test/Archive/check_binary_output.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; This is not an assembly file, this is just to run the test.
-; The test verifies that llvm-ar produces a binary output.
-
-;RUN: llvm-ar p %p/GNU.a very_long_bytecode_file_name.bc | cmp -s %p/very_long_bytecode_file_name.bc -
diff --git a/test/Archive/extract.ll b/test/Archive/extract.ll
deleted file mode 100644
index 5c0f508319b9..000000000000
--- a/test/Archive/extract.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; This isn't really an assembly file, its just here to run the test.
-
-; This test just makes sure that llvm-ar can extract bytecode members
-; from various style archives.
-
-; RUN: llvm-ar p %p/GNU.a very_long_bytecode_file_name.bc | \
-; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
-
-; RUN: llvm-ar p %p/MacOSX.a very_long_bytecode_file_name.bc | \
-; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
-
-; RUN: llvm-ar p %p/SVR4.a very_long_bytecode_file_name.bc | \
-; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
-
-; RUN: llvm-ar p %p/xpg4.a very_long_bytecode_file_name.bc |\
-; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
diff --git a/test/Archive/lit.local.cfg b/test/Archive/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Archive/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Archive/toc_GNU.ll b/test/Archive/toc_GNU.ll
deleted file mode 100644
index 9ed7d8eb8cbd..000000000000
--- a/test/Archive/toc_GNU.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-;This isn't really an assembly file, its just here to run the test.
-;This test just makes sure that llvm-ar can generate a table of contents for
-;GNU style archives
-;RUN: llvm-ar t %p/GNU.a | FileCheck %s
-;CHECK:      evenlen
-;CHECK-NEXT: oddlen
-;CHECK-NEXT: very_long_bytecode_file_name.bc
-;CHECK-NEXT: IsNAN.o
diff --git a/test/Archive/toc_MacOSX.ll b/test/Archive/toc_MacOSX.ll
deleted file mode 100644
index 6dbc9d2ea4a6..000000000000
--- a/test/Archive/toc_MacOSX.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;This isn't really an assembly file, its just here to run the test.
-;This test just makes sure that llvm-ar can generate a table of contents for
-;MacOSX style archives
-;RUN: llvm-ar t %p/MacOSX.a | FileCheck %s
-;CHECK:      __.SYMDEF SORTED
-;CHECK-NEXT: evenlen
-;CHECK-NEXT: oddlen
-;CHECK-NEXT: very_long_bytecode_file_name.bc
-;CHECK-NEXT: IsNAN.o
diff --git a/test/Archive/toc_SVR4.ll b/test/Archive/toc_SVR4.ll
deleted file mode 100644
index d447b9219990..000000000000
--- a/test/Archive/toc_SVR4.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-;This isn't really an assembly file, its just here to run the test.
-;This test just makes sure that llvm-ar can generate a table of contents for
-;SVR4 style archives
-;RUN: llvm-ar t %p/SVR4.a | FileCheck %s
-;CHECK:      evenlen
-;CHECK-NEXT: oddlen
-;CHECK-NEXT: very_long_bytecode_file_name.bc
-;CHECK-NEXT: IsNAN.o
diff --git a/test/Archive/toc_xpg4.ll b/test/Archive/toc_xpg4.ll
deleted file mode 100644
index fd875eebdaab..000000000000
--- a/test/Archive/toc_xpg4.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-;This isn't really an assembly file, its just here to run the test.
-;This test just makes sure that llvm-ar can generate a table of contents for
-;xpg4 style archives
-;RUN: llvm-ar t %p/xpg4.a | FileCheck %s
-CHECK:      evenlen
-CHECK-NEXT: oddlen
-CHECK-NEXT: very_long_bytecode_file_name.bc
-CHECK-NEXT: IsNAN.o
diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index df70149a33f6..17dd745682b0 100644
--- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -1,4 +1,4 @@
-; RUN: opt -std-compile-opts < %s | llvm-dis | not grep badref 
+; RUN: opt -std-compile-opts < %s | llvm-dis | not grep badref
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.2"
@@ -23,10 +23,12 @@ define i32 @main() nounwind readonly {
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !7 = metadata !{metadata !1}
-!6 = metadata !{i32 786449, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !7, null, null} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !7, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !0 = metadata !{i32 786688, metadata !1, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @main, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786478, metadata !8, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !6, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, null, metadata !6, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
+!9 = metadata !{i32 0}
diff --git a/test/Assembler/ConstantExprFoldCast.ll b/test/Assembler/ConstantExprFoldCast.ll
index 0ce6e84626a3..161a4ca35757 100644
--- a/test/Assembler/ConstantExprFoldCast.ll
+++ b/test/Assembler/ConstantExprFoldCast.ll
@@ -12,3 +12,5 @@
 @F = global i32* inttoptr (i32 add (i32 5, i32 -5) to i32*)
 @G = global i32* inttoptr (i32 sub (i32 5, i32 5) to i32*)
 
+; Address space cast AS0 null-> AS1 null
+@H = global i32 addrspace(1)* addrspacecast(i32* null to i32 addrspace(1)*)
diff --git a/test/Assembler/ConstantExprNoFold.ll b/test/Assembler/ConstantExprNoFold.ll
index 83e8909b5ebd..b41959f494fd 100644
--- a/test/Assembler/ConstantExprNoFold.ll
+++ b/test/Assembler/ConstantExprNoFold.ll
@@ -21,3 +21,6 @@ target datalayout = "p:32:32"
 
 ; CHECK: @D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
 @D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
+
+; CHECK: @E = global i64 addrspace(1)* addrspacecast (i64* @A to i64 addrspace(1)*)
+@E = global i64 addrspace(1)* addrspacecast(i64* @A to i64 addrspace(1)*)
diff --git a/test/Assembler/attribute-builtin.ll b/test/Assembler/attribute-builtin.ll
new file mode 100644
index 000000000000..01c8a6bd8653
--- /dev/null
+++ b/test/Assembler/attribute-builtin.ll
@@ -0,0 +1,50 @@
+
+; Make sure that llvm-as/llvm-dis properly assembly/disassembly the 'builtin'
+; attribute.
+;
+; rdar://13727199
+
+; RUN: llvm-as -disable-verify < %s | \
+; RUN: llvm-dis | \
+; RUN: llvm-as -disable-verify | \
+; RUN: llvm-dis | \
+; RUN: FileCheck -check-prefix=CHECK-ASSEMBLES %s
+
+; CHECK-ASSEMBLES: declare i8* @foo(i8*) [[NOBUILTIN:#[0-9]+]]
+; CHECK-ASSEMBLES: call i8* @foo(i8* %x) [[BUILTIN:#[0-9]+]]
+; CHECK-ASSEMBLES: attributes [[NOBUILTIN]] = { nobuiltin }
+; CHECK-ASSEMBLES: attributes [[BUILTIN]] = { builtin }
+
+declare i8* @foo(i8*) #1
+define i8* @bar(i8* %x) {
+  %y = call i8* @foo(i8* %x) #0
+  ret i8* %y
+}
+
+; Make sure that we do not accept the 'builtin' attribute on function
+; definitions, function declarations, and on call sites that call functions
+; which do not have nobuiltin on them.
+; rdar://13727199
+
+; RUN: not llvm-as <%s 2>&1  | FileCheck -check-prefix=CHECK-BAD %s
+
+; CHECK-BAD: Attribute 'builtin' can only be applied to a callsite.
+; CHECK-BAD-NEXT: i8* (i8*)* @car
+; CHECK-BAD: Attribute 'builtin' can only be applied to a callsite.
+; CHECK-BAD-NEXT: i8* (i8*)* @mar
+
+declare i8* @lar(i8*)
+
+define i8* @har(i8* %x) {
+  %y = call i8* @lar(i8* %x) #0
+  ret i8* %y
+}
+
+define i8* @car(i8* %x) #0 {
+  ret i8* %x
+}
+
+declare i8* @mar(i8*) #0
+
+attributes #0 = { builtin }
+attributes #1 = { nobuiltin }
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll
index 7ad5cc30fa71..8f655cec9eb2 100644
--- a/test/Assembler/auto_upgrade_intrinsics.ll
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -6,6 +6,10 @@ declare i16 @llvm.ctlz.i16(i16)
 declare i32 @llvm.ctlz.i32(i32)
 declare i42 @llvm.ctlz.i42(i42)  ; Not a power-of-2
 
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+
 define void @test.ctlz(i8 %a, i16 %b, i32 %c, i42 %d) {
 ; CHECK: @test.ctlz
 
@@ -42,3 +46,14 @@ entry:
 
   ret void
 }
+
+
+@a = private global [60 x i8] zeroinitializer, align 1
+
+define i32 @test.objectsize() {
+; CHECK-LABEL: @test.objectsize(
+; CHECK: @llvm.objectsize.i32.p0i8
+; CHECK-DAG: declare i32 @llvm.objectsize.i32.p0i8
+  %s = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  ret i32 %s
+}
diff --git a/test/Assembler/functionlocal-metadata.ll b/test/Assembler/functionlocal-metadata.ll
index 216587d98a72..0d93bfdb275d 100644
--- a/test/Assembler/functionlocal-metadata.ll
+++ b/test/Assembler/functionlocal-metadata.ll
@@ -2,8 +2,8 @@
 
 define void @Foo(i32 %a, i32 %b) {
 entry:
-  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !"bar")
-; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !"bar")
+  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !2)
+; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID2:[0-9]+]])
   %0 = add i32 %a, 1                              ; <i32> [#uses=1]
   %two = add i32 %b, %0                           ; <i32> [#uses=0]
   %1 = alloca i32                                 ; <i32*> [#uses=1]
@@ -19,26 +19,38 @@ entry:
   call void @llvm.dbg.declare(metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"})
 ; CHECK: metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"}
   call void @llvm.dbg.declare(metadata !{i32 %b}, metadata !{metadata !0, i32 %two})
-; CHECK: metadata !{i32 %b}, metadata !{metadata !0, i32 %two}
+; CHECK: metadata !{i32 %b}, metadata !{metadata ![[ID0:[0-9]+]], i32 %two}
 
   call void @llvm.dbg.value(metadata !{ i32 %a }, i64 0, metadata !1)
-; CHECK: metadata !{i32 %a}, i64 0, metadata !1
+; CHECK: metadata !{i32 %a}, i64 0, metadata ![[ID1:[0-9]+]]
   call void @llvm.dbg.value(metadata !{ i32 %0 }, i64 25, metadata !0)
-; CHECK: metadata !{i32 %0}, i64 25, metadata !0
-  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !"foo")
-; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !"foo")
-  call void @llvm.dbg.value(metadata !"foo", i64 12, metadata !"bar")
-; CHECK: metadata !"foo", i64 12, metadata !"bar"
+; CHECK: metadata !{i32 %0}, i64 25, metadata ![[ID0]]
+  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !3)
+; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID3:[0-9]+]])
+  call void @llvm.dbg.value(metadata !3, i64 12, metadata !2)
+; CHECK: metadata ![[ID3]], i64 12, metadata ![[ID2]]
 
   ret void, !foo !0, !bar !1
-; CHECK: ret void, !foo !0, !bar !1
+; CHECK: ret void, !foo ![[FOO:[0-9]+]], !bar ![[BAR:[0-9]+]]
 }
 
+!llvm.module.flags = !{!4}
+
 !0 = metadata !{i32 662302, i32 26, metadata !1, null}
 !1 = metadata !{i32 4, metadata !"foo"}
+!2 = metadata !{metadata !"bar"}
+!3 = metadata !{metadata !"foo"}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !foo = !{ !0 }
 !bar = !{ !1 }
+
+; CHECK: !foo = !{![[FOO]]}
+; CHECK: !bar = !{![[BAR]]}
+; CHECK: ![[ID0]] = metadata !{i32 662302, i32 26, metadata ![[ID1]], null}
+; CHECK: ![[ID1]] = metadata !{i32 4, metadata !"foo"}
+; CHECK: ![[ID2]] = metadata !{metadata !"bar"}
+; CHECK; ![[ID3]] = metadata !{metadata !"foo"}
diff --git a/test/Assembler/lit.local.cfg b/test/Assembler/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Assembler/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml
index 7df8e21203a9..c02645c0bce3 100644
--- a/test/Bindings/Ocaml/analysis.ml
+++ b/test/Bindings/Ocaml/analysis.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %t.builddir/analysis.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml
index e5beccd118bd..f1d202ab0490 100644
--- a/test/Bindings/Ocaml/bitreader.ml
+++ b/test/Bindings/Ocaml/bitreader.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %t.builddir/bitreader.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
index 138876001244..ae456cf785c8 100644
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ b/test/Bindings/Ocaml/bitwriter.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %t.builddir/bitwriter.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml
index f7a49bb284c0..8e2494952a2b 100644
--- a/test/Bindings/Ocaml/executionengine.ml
+++ b/test/Bindings/Ocaml/executionengine.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %t.builddir/executionengine.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
@@ -100,11 +103,11 @@ let test_executionengine () =
   (* run_static_dtors *)
   ExecutionEngine.run_static_dtors ee;
 
-  (* Show that the target data binding links and runs.*)
-  let td = ExecutionEngine.target_data ee in
+  (* Show that the data layout binding links and runs.*)
+  let dl = ExecutionEngine.data_layout ee in
 
   (* Demonstrate that a garbage pointer wasn't returned. *)
-  let ty = intptr_type td in
+  let ty = DataLayout.intptr_type context dl in
   if ty != i32_type && ty != i64_type then bomb "target_data did not work";
   
   (* dispose *)
diff --git a/test/Bindings/Ocaml/ext_exc.ml b/test/Bindings/Ocaml/ext_exc.ml
index b4d2e6dc6414..9afc3c3ab4b2 100644
--- a/test/Bindings/Ocaml/ext_exc.ml
+++ b/test/Bindings/Ocaml/ext_exc.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_executionengine.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_executionengine.cmxa %t.builddir/ext_exc.ml -o %t
  * RUN: %t </dev/null
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/Ocaml/ipo_opts.ml b/test/Bindings/Ocaml/ipo_opts.ml
index d4537e4413fb..e0bcbe5f561e 100644
--- a/test/Bindings/Ocaml/ipo_opts.ml
+++ b/test/Bindings/Ocaml/ipo_opts.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_ipo.cmxa llvm_target.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_ipo.cmxa llvm_target.cmxa %t.builddir/ipo_opts.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
@@ -43,15 +46,13 @@ let test_transforms () =
       ignore (build_ret (build_call fn [| |] "" b) b);
   end;
 
-  let td = DataLayout.create (target_triple m) in
-  
   ignore (PassManager.create ()
-           ++ DataLayout.add td
            ++ add_argument_promotion
            ++ add_constant_merge
            ++ add_dead_arg_elimination
            ++ add_function_attrs
            ++ add_function_inlining
+           ++ add_always_inliner
            ++ add_global_dce
            ++ add_global_optimizer
            ++ add_ipc_propagation
@@ -61,9 +62,7 @@ let test_transforms () =
            ++ add_strip_dead_prototypes
            ++ add_strip_symbols
            ++ PassManager.run_module m
-           ++ PassManager.dispose);
-
-  DataLayout.dispose td
+           ++ PassManager.dispose)
 
 
 (*===-- Driver ------------------------------------------------------------===*)
diff --git a/test/Bindings/Ocaml/irreader.ml b/test/Bindings/Ocaml/irreader.ml
new file mode 100644
index 000000000000..3511c2b23646
--- /dev/null
+++ b/test/Bindings/Ocaml/irreader.ml
@@ -0,0 +1,59 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -g -warn-error A llvm.cmxa llvm_irreader.cmxa %t.builddir/irreader.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_irreader
+
+let context = global_context ()
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+let _ =
+  Printexc.record_backtrace true
+
+let insist cond =
+  if not cond then failwith "insist"
+
+
+(*===-- IR Reader ---------------------------------------------------------===*)
+
+let test_irreader () =
+  begin
+    let buf = MemoryBuffer.of_string "@foo = global i32 42" in
+    let m   = parse_ir context buf in
+    match lookup_global "foo" m with
+    | Some foo ->
+        insist ((global_initializer foo) = (const_int (i32_type context) 42))
+    | None ->
+        failwith "global"
+  end;
+
+  begin
+    let buf = MemoryBuffer.of_string "@foo = global garble" in
+    try
+      ignore (parse_ir context buf);
+      failwith "parsed"
+    with Llvm_irreader.Error _ ->
+      ()
+  end
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "irreader" test_irreader
diff --git a/test/Bindings/Ocaml/linker.ml b/test/Bindings/Ocaml/linker.ml
new file mode 100644
index 000000000000..9359ae9f2c48
--- /dev/null
+++ b/test/Bindings/Ocaml/linker.ml
@@ -0,0 +1,63 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_linker.cmxa %t.builddir/linker.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_linker
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Linker -----------------------------------------------------------===*)
+
+let test_linker () =
+  let fty = function_type void_type [| |] in
+
+  let make_module name =
+    let m = create_module context name in
+    let fn = define_function ("fn_" ^ name) fty m in
+    ignore (build_ret_void (builder_at_end context (entry_block fn)));
+    m
+  in
+
+  let m1 = make_module "one"
+  and m2 = make_module "two" in
+  link_modules m1 m2 Mode.PreserveSource;
+  dispose_module m1;
+  dispose_module m2;
+
+  let m1 = make_module "one"
+  and m2 = make_module "two" in
+  link_modules m1 m2 Mode.DestroySource;
+  dispose_module m1;
+
+  let m1 = make_module "one"
+  and m2 = make_module "one" in
+  try
+    link_modules m1 m2 Mode.PreserveSource;
+    failwith "must raise"
+  with Error _ ->
+    dispose_module m1;
+    dispose_module m2
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "linker" test_linker
diff --git a/test/Bindings/Ocaml/lit.local.cfg b/test/Bindings/Ocaml/lit.local.cfg
index 640c58d2f3d6..c38d89ab09e9 100644
--- a/test/Bindings/Ocaml/lit.local.cfg
+++ b/test/Bindings/Ocaml/lit.local.cfg
@@ -1,6 +1,5 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.ml']
+config.suffixes = ['.ml']
 
 bindings = set([s.strip() for s in config.root.llvm_bindings.split(',')])
 if not 'ocaml' in bindings:
     config.unsupported = True
-
diff --git a/test/Bindings/Ocaml/passmgr_builder.ml b/test/Bindings/Ocaml/passmgr_builder.ml
new file mode 100644
index 000000000000..1a3102f70a34
--- /dev/null
+++ b/test/Bindings/Ocaml/passmgr_builder.ml
@@ -0,0 +1,64 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_passmgr_builder.cmxa %t.builddir/passmgr_builder.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_passmgr_builder
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Pass Manager Builder ----------------------------------------------===*)
+
+let test_pmbuilder () =
+  let (++) x f = ignore (f x); x in
+
+  let module_passmgr = PassManager.create () in
+  let func_passmgr   = PassManager.create_function m in
+  let lto_passmgr    = PassManager.create () in
+
+  ignore (Llvm_passmgr_builder.create ()
+           ++ set_opt_level 3
+           ++ set_size_level 1
+           ++ set_disable_unit_at_a_time false
+           ++ set_disable_unroll_loops false
+           ++ use_inliner_with_threshold 10
+           ++ populate_function_pass_manager func_passmgr
+           ++ populate_module_pass_manager module_passmgr
+           ++ populate_lto_pass_manager lto_passmgr
+                  ~internalize:false ~run_inliner:false);
+  Gc.compact ();
+
+  PassManager.dispose module_passmgr;
+  PassManager.dispose func_passmgr;
+  PassManager.dispose lto_passmgr
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "pass manager builder" test_pmbuilder;
+  dispose_module m
diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml
index 0760dad4ad02..39913e43119d 100644
--- a/test/Bindings/Ocaml/scalar_opts.ml
+++ b/test/Bindings/Ocaml/scalar_opts.ml
@@ -1,4 +1,7 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %t.builddir/scalar_opts.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
@@ -38,10 +41,7 @@ let test_transforms () =
   let fn = define_function "fn" fty m in
   ignore (build_ret_void (builder_at_end context (entry_block fn)));
   
-  let td = DataLayout.create (target_triple m) in
-  
   ignore (PassManager.create_function m
-           ++ DataLayout.add td
            ++ add_verifier
            ++ add_constant_propagation
            ++ add_sccp
@@ -72,13 +72,12 @@ let test_transforms () =
            ++ add_lower_expect_intrinsic
            ++ add_type_based_alias_analysis
            ++ add_basic_alias_analysis
+           ++ add_partially_inline_lib_calls
            ++ add_verifier
            ++ PassManager.initialize
            ++ PassManager.run_function fn
            ++ PassManager.finalize
-           ++ PassManager.dispose);
-  
-  DataLayout.dispose td
+           ++ PassManager.dispose)
 
 
 (*===-- Driver ------------------------------------------------------------===*)
diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml
index 7a35a790ab3a..d69fb0e664fd 100644
--- a/test/Bindings/Ocaml/target.ml
+++ b/test/Bindings/Ocaml/target.ml
@@ -1,5 +1,9 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -g -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %t.builddir/target.ml -o %t
  * RUN: %t %t.bc
+ * REQUIRES: native, object-emission
  * XFAIL: vg_leak
  *)
 
@@ -10,6 +14,7 @@
 open Llvm
 open Llvm_target
 
+let _ = Llvm_executionengine.initialize_native_target ()
 
 let context = global_context ()
 let i32_type = Llvm.i32_type context
@@ -18,10 +23,11 @@ let i64_type = Llvm.i64_type context
 (* Tiny unit test framework - really just to help find which line is busted *)
 let print_checkpoints = false
 
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
+let _ =
+  Printexc.record_backtrace true
+
+let assert_equal a b =
+  if a <> b then failwith "assert_equal"
 
 
 (*===-- Fixture -----------------------------------------------------------===*)
@@ -29,31 +35,83 @@ let suite name f =
 let filename = Sys.argv.(1)
 let m = create_module context filename
 
+let target = Target.by_triple (Target.default_triple ())
 
-(*===-- Target Data -------------------------------------------------------===*)
+let machine = TargetMachine.create (Target.default_triple ()) target
+
+(*===-- Data Layout -------------------------------------------------------===*)
 
 let test_target_data () =
-  let td = DataLayout.create (target_triple m) in
-  let sty = struct_type context [| i32_type; i64_type |] in
-  
-  ignore (DataLayout.as_string td);
-  ignore (byte_order td);
-  ignore (pointer_size td);
-  ignore (intptr_type td);
-  ignore (size_in_bits td sty);
-  ignore (store_size td sty);
-  ignore (abi_size td sty);
-  ignore (stack_align td sty);
-  ignore (preferred_align td sty);
-  ignore (preferred_align_of_global td (declare_global sty "g" m));
-  ignore (element_at_offset td sty (Int64.of_int 1));
-  ignore (offset_of_element td sty 1);
+  let module DL = DataLayout in
+  let layout = "e-p:32:32:32-S32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-" ^
+               "f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:32:64-v128:32:128-" ^
+               "a0:0:64-n32" in
+  let dl     = DL.of_string layout in
+  let sty    = struct_type context [| i32_type; i64_type |] in
   
-  DataLayout.dispose td
+  assert_equal (DL.as_string dl) layout;
+  assert_equal (DL.byte_order dl) Endian.Little;
+  assert_equal (DL.pointer_size dl) 4;
+  assert_equal (DL.intptr_type context dl) i32_type;
+  assert_equal (DL.qualified_pointer_size 0 dl) 4;
+  assert_equal (DL.qualified_intptr_type context 0 dl) i32_type;
+  assert_equal (DL.size_in_bits sty dl) (Int64.of_int 96);
+  assert_equal (DL.store_size sty dl) (Int64.of_int 12);
+  assert_equal (DL.abi_size sty dl) (Int64.of_int 12);
+  assert_equal (DL.stack_align sty dl) 4;
+  assert_equal (DL.preferred_align sty dl) 8;
+  assert_equal (DL.preferred_align_of_global (declare_global sty "g" m) dl) 8;
+  assert_equal (DL.element_at_offset sty (Int64.of_int 1) dl) 0;
+  assert_equal (DL.offset_of_element sty 1 dl) (Int64.of_int 4);
+
+  let pm = PassManager.create () in
+  ignore (DL.add_to_pass_manager pm dl)
+
+
+(*===-- Target ------------------------------------------------------------===*)
+
+let test_target () =
+  let module T = Target in
+  ignore (T.succ target);
+  ignore (T.name target);
+  ignore (T.description target);
+  ignore (T.has_jit target);
+  ignore (T.has_target_machine target);
+  ignore (T.has_asm_backend target)
+
+
+(*===-- Target Machine ----------------------------------------------------===*)
+
+let test_target_machine () =
+  let module TM = TargetMachine in
+  assert_equal (TM.target machine) target;
+  assert_equal (TM.triple machine) (Target.default_triple ());
+  assert_equal (TM.cpu machine) "";
+  assert_equal (TM.features machine) "";
+  ignore (TM.data_layout machine)
+
+
+(*===-- Code Emission -----------------------------------------------------===*)
+
+let test_code_emission () =
+  TargetMachine.emit_to_file m CodeGenFileType.ObjectFile filename machine;
+  try
+    TargetMachine.emit_to_file m CodeGenFileType.ObjectFile
+                               "/nonexistent/file" machine;
+    failwith "must raise"
+  with Llvm_target.Error _ ->
+    ();
+
+  let buf = TargetMachine.emit_to_memory_buffer m CodeGenFileType.ObjectFile
+                                                machine in
+  Llvm.MemoryBuffer.dispose buf
 
 
 (*===-- Driver ------------------------------------------------------------===*)
 
 let _ =
-  suite "target data" test_target_data;
+  test_target_data ();
+  test_target ();
+  test_target_machine ();
+  (* test_code_emission (); *) (* broken without AsmParser support *)
   dispose_module m
diff --git a/test/Bindings/Ocaml/vectorize_opts.ml b/test/Bindings/Ocaml/vectorize_opts.ml
new file mode 100644
index 000000000000..5ef985d5dc18
--- /dev/null
+++ b/test/Bindings/Ocaml/vectorize_opts.ml
@@ -0,0 +1,56 @@
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_vectorize.cmxa llvm_target.cmxa %t.builddir/vectorize_opts.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_vectorize
+open Llvm_target
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Transforms --------------------------------------------------------===*)
+
+let test_transforms () =
+  let (++) x f = ignore (f x); x in
+
+  let fty = function_type void_type [| |] in
+  let fn = define_function "fn" fty m in
+  ignore (build_ret_void (builder_at_end context (entry_block fn)));
+
+  ignore (PassManager.create ()
+           ++ add_bb_vectorize
+           ++ add_loop_vectorize
+           ++ add_slp_vectorize
+           ++ PassManager.run_module m
+           ++ PassManager.dispose)
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "transforms" test_transforms;
+  dispose_module m
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index b49bab9ab17b..167efce0b2b1 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -1,6 +1,12 @@
-(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t
+(* RUN: rm -rf %t.builddir
+ * RUN: mkdir -p %t.builddir
+ * RUN: cp %s %t.builddir
+ * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %t.builddir/vmcore.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc > %t.ll
+ * RUN: FileCheck %s < %t.ll
+ * Do a second pass for things that shouldn't be anywhere.
+ * RUN: FileCheck -check-prefix=CHECK-NOWHERE %s < %t.ll
  * XFAIL: vg_leak
  *)
 
@@ -61,30 +67,37 @@ let filename = Sys.argv.(1)
 let m = create_module context filename
 
 
+(*===-- Conversion --------------------------------------------------------===*)
+
+let test_conversion () =
+  insist ("i32" = (string_of_lltype i32_type));
+  let c = const_int i32_type 42 in
+  insist ("i32 42" = (string_of_llvalue c))
+
+
 (*===-- Target ------------------------------------------------------------===*)
 
 let test_target () =
   begin group "triple";
-    (* RUN: grep "i686-apple-darwin8" < %t.ll
-     *)
     let trip = "i686-apple-darwin8" in
     set_target_triple trip m;
     insist (trip = target_triple m)
   end;
   
   begin group "layout";
-    (* RUN: grep "bogus" < %t.ll
-     *)
     let layout = "bogus" in
     set_data_layout layout m;
     insist (layout = data_layout m)
   end
+  (* CHECK: target datalayout = "bogus"
+   * CHECK: target triple = "i686-apple-darwin8"
+   *)
 
 
 (*===-- Constants ---------------------------------------------------------===*)
 
 let test_constants () =
-  (* RUN: grep "const_int.*i32.*-1" < %t.ll
+  (* CHECK: const_int{{.*}}i32{{.*}}-1
    *)
   group "int";
   let c = const_int i32_type (-1) in
@@ -92,44 +105,44 @@ let test_constants () =
   insist (i32_type = type_of c);
   insist (is_constant c);
 
-  (* RUN: grep "const_sext_int.*i64.*-1" < %t.ll
+  (* CHECK: const_sext_int{{.*}}i64{{.*}}-1
    *)
   group "sext int";
   let c = const_int i64_type (-1) in
   ignore (define_global "const_sext_int" c m);
   insist (i64_type = type_of c);
 
-  (* RUN: grep "const_zext_int64.*i64.*4294967295" < %t.ll
+  (* CHECK: const_zext_int64{{.*}}i64{{.*}}4294967295
    *)
   group "zext int64";
   let c = const_of_int64 i64_type (Int64.of_string "4294967295") false in
   ignore (define_global "const_zext_int64" c m);
   insist (i64_type = type_of c);
 
-  (* RUN: grep "const_int_string.*i32.*-1" < %t.ll
+  (* CHECK: const_int_string{{.*}}i32{{.*}}-1
    *)
   group "int string";
   let c = const_int_of_string i32_type "-1" 10 in
   ignore (define_global "const_int_string" c m);
   insist (i32_type = type_of c);
 
-  (* RUN: grep 'const_string.*"cruel\\00world"' < %t.ll
+  (* CHECK: @const_string = global {{.*}}c"cruel\00world"
    *)
   group "string";
   let c = const_string context "cruel\000world" in
   ignore (define_global "const_string" c m);
   insist ((array_type i8_type 11) = type_of c);
 
-  (* RUN: grep 'const_stringz.*"hi\\00again\\00"' < %t.ll
+  (* CHECK: const_stringz{{.*}}"hi\00again\00"
    *)
   group "stringz";
   let c = const_stringz context "hi\000again" in
   ignore (define_global "const_stringz" c m);
   insist ((array_type i8_type 9) = type_of c);
 
-  (* RUN: grep "const_single.*2.75" < %t.ll
-   * RUN: grep "const_double.*3.1459" < %t.ll
-   * RUN: grep "const_double_string.*1.25" < %t.ll
+  (* CHECK: const_single{{.*}}2.75
+   * CHECK: const_double{{.*}}3.1459
+   * CHECK: const_double_string{{.*}}1.25
    *)
   begin group "real";
     let cs = const_float float_type 2.75 in
@@ -150,14 +163,14 @@ let test_constants () =
   let three = const_int i32_type 3 in
   let four = const_int i32_type 4 in
   
-  (* RUN: grep "const_array.*[i32 3, i32 4]" < %t.ll
+  (* CHECK: const_array{{.*}}[i32 3, i32 4]
    *)
   group "array";
   let c = const_array i32_type [| three; four |] in
   ignore (define_global "const_array" c m);
   insist ((array_type i32_type 2) = (type_of c));
   
-  (* RUN: grep "const_vector.*<i16 1, i16 2.*>" < %t.ll
+  (* CHECK: const_vector{{.*}}<i16 1, i16 2{{.*}}>
    *)
   group "vector";
   let c = const_vector [| one; two; one; two;
@@ -165,7 +178,7 @@ let test_constants () =
   ignore (define_global "const_vector" c m);
   insist ((vector_type i16_type 8) = (type_of c));
 
-  (* RUN: grep "const_structure.*.i16 1, i16 2, i32 3, i32 4" < %t.ll
+  (* CHECK: const_structure{{.*.}}i16 1, i16 2, i32 3, i32 4
    *)
   group "structure";
   let c = const_struct context [| one; two; three; four |] in
@@ -173,27 +186,27 @@ let test_constants () =
   insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |])
         = (type_of c));
 
-  (* RUN: grep "const_null.*zeroinit" < %t.ll
+  (* CHECK: const_null{{.*}}zeroinit
    *)
   group "null";
   let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type;
                                                     double_type |]) in
   ignore (define_global "const_null" c m);
   
-  (* RUN: grep "const_all_ones.*-1" < %t.ll
+  (* CHECK: const_all_ones{{.*}}-1
    *)
   group "all ones";
   let c = const_all_ones i64_type in
   ignore (define_global "const_all_ones" c m);
 
   group "pointer null"; begin
-    (* RUN: grep "const_pointer_null = global i64\* null" < %t.ll
+    (* CHECK: const_pointer_null = global i64* null
      *)
     let c = const_pointer_null (pointer_type i64_type) in
     ignore (define_global "const_pointer_null" c m);
   end;
   
-  (* RUN: grep "const_undef.*undef" < %t.ll
+  (* CHECK: const_undef{{.*}}undef
    *)
   group "undef";
   let c = undef i1_type in
@@ -202,35 +215,35 @@ let test_constants () =
   insist (is_undef c);
   
   group "constant arithmetic";
-  (* RUN: grep "@const_neg = global i64 sub" < %t.ll
-   * RUN: grep "@const_nsw_neg = global i64 sub nsw " < %t.ll
-   * RUN: grep "@const_nuw_neg = global i64 sub nuw " < %t.ll
-   * RUN: grep "@const_fneg = global double fsub " < %t.ll
-   * RUN: grep "@const_not = global i64 xor " < %t.ll
-   * RUN: grep "@const_add = global i64 add " < %t.ll
-   * RUN: grep "@const_nsw_add = global i64 add nsw " < %t.ll
-   * RUN: grep "@const_nuw_add = global i64 add nuw " < %t.ll
-   * RUN: grep "@const_fadd = global double fadd " < %t.ll
-   * RUN: grep "@const_sub = global i64 sub " < %t.ll
-   * RUN: grep "@const_nsw_sub = global i64 sub nsw " < %t.ll
-   * RUN: grep "@const_nuw_sub = global i64 sub nuw " < %t.ll
-   * RUN: grep "@const_fsub = global double fsub " < %t.ll
-   * RUN: grep "@const_mul = global i64 mul " < %t.ll
-   * RUN: grep "@const_nsw_mul = global i64 mul nsw " < %t.ll
-   * RUN: grep "@const_nuw_mul = global i64 mul nuw " < %t.ll
-   * RUN: grep "@const_fmul = global double fmul " < %t.ll
-   * RUN: grep "@const_udiv = global i64 udiv " < %t.ll
-   * RUN: grep "@const_sdiv = global i64 sdiv " < %t.ll
-   * RUN: grep "@const_exact_sdiv = global i64 sdiv exact " < %t.ll
-   * RUN: grep "@const_fdiv = global double fdiv " < %t.ll
-   * RUN: grep "@const_urem = global i64 urem " < %t.ll
-   * RUN: grep "@const_srem = global i64 srem " < %t.ll
-   * RUN: grep "@const_frem = global double frem " < %t.ll
-   * RUN: grep "@const_and = global i64 and " < %t.ll
-   * RUN: grep "@const_or = global i64 or " < %t.ll
-   * RUN: grep "@const_xor = global i64 xor " < %t.ll
-   * RUN: grep "@const_icmp = global i1 icmp sle " < %t.ll
-   * RUN: grep "@const_fcmp = global i1 fcmp ole " < %t.ll
+  (* CHECK: @const_neg = global i64 sub
+   * CHECK: @const_nsw_neg = global i64 sub nsw
+   * CHECK: @const_nuw_neg = global i64 sub nuw
+   * CHECK: @const_fneg = global double fsub
+   * CHECK: @const_not = global i64 xor
+   * CHECK: @const_add = global i64 add
+   * CHECK: @const_nsw_add = global i64 add nsw
+   * CHECK: @const_nuw_add = global i64 add nuw
+   * CHECK: @const_fadd = global double fadd
+   * CHECK: @const_sub = global i64 sub
+   * CHECK: @const_nsw_sub = global i64 sub nsw
+   * CHECK: @const_nuw_sub = global i64 sub nuw
+   * CHECK: @const_fsub = global double fsub
+   * CHECK: @const_mul = global i64 mul
+   * CHECK: @const_nsw_mul = global i64 mul nsw
+   * CHECK: @const_nuw_mul = global i64 mul nuw
+   * CHECK: @const_fmul = global double fmul
+   * CHECK: @const_udiv = global i64 udiv
+   * CHECK: @const_sdiv = global i64 sdiv
+   * CHECK: @const_exact_sdiv = global i64 sdiv exact
+   * CHECK: @const_fdiv = global double fdiv
+   * CHECK: @const_urem = global i64 urem
+   * CHECK: @const_srem = global i64 srem
+   * CHECK: @const_frem = global double frem
+   * CHECK: @const_and = global i64 and
+   * CHECK: @const_or = global i64 or
+   * CHECK: @const_xor = global i64 xor
+   * CHECK: @const_icmp = global i1 icmp sle
+   * CHECK: @const_fcmp = global i1 fcmp ole
    *)
   let void_ptr = pointer_type i8_type in
   let five = const_int i64_type 5 in
@@ -269,18 +282,19 @@ let test_constants () =
   ignore (define_global "const_fcmp" (const_fcmp Fcmp.Ole ffoldbomb ffive) m);
   
   group "constant casts";
-  (* RUN: grep "const_trunc.*trunc" < %t.ll
-   * RUN: grep "const_sext.*sext" < %t.ll
-   * RUN: grep "const_zext.*zext" < %t.ll
-   * RUN: grep "const_fptrunc.*fptrunc" < %t.ll
-   * RUN: grep "const_fpext.*fpext" < %t.ll
-   * RUN: grep "const_uitofp.*uitofp" < %t.ll
-   * RUN: grep "const_sitofp.*sitofp" < %t.ll
-   * RUN: grep "const_fptoui.*fptoui" < %t.ll
-   * RUN: grep "const_fptosi.*fptosi" < %t.ll
-   * RUN: grep "const_ptrtoint.*ptrtoint" < %t.ll
-   * RUN: grep "const_inttoptr.*inttoptr" < %t.ll
-   * RUN: grep "const_bitcast.*bitcast" < %t.ll
+  (* CHECK: const_trunc{{.*}}trunc
+   * CHECK: const_sext{{.*}}sext
+   * CHECK: const_zext{{.*}}zext
+   * CHECK: const_fptrunc{{.*}}fptrunc
+   * CHECK: const_fpext{{.*}}fpext
+   * CHECK: const_uitofp{{.*}}uitofp
+   * CHECK: const_sitofp{{.*}}sitofp
+   * CHECK: const_fptoui{{.*}}fptoui
+   * CHECK: const_fptosi{{.*}}fptosi
+   * CHECK: const_ptrtoint{{.*}}ptrtoint
+   * CHECK: const_inttoptr{{.*}}inttoptr
+   * CHECK: const_bitcast{{.*}}bitcast
+   * CHECK: const_intcast{{.*}}zext
    *)
   let i128_type = integer_type context 128 in
   ignore (define_global "const_trunc" (const_trunc (const_add foldbomb five)
@@ -300,14 +314,16 @@ let test_constants () =
   ignore (define_global "const_inttoptr" (const_inttoptr (const_add foldbomb five)
                                                   void_ptr) m);
   ignore (define_global "const_bitcast" (const_bitcast ffoldbomb i64_type) m);
+  ignore (define_global "const_intcast"
+          (const_intcast foldbomb i128_type ~is_signed:false) m);
   
   group "misc constants";
-  (* RUN: grep "const_size_of.*getelementptr.*null" < %t.ll
-   * RUN: grep "const_gep.*getelementptr" < %t.ll
-   * RUN: grep "const_select.*select" < %t.ll
-   * RUN: grep "const_extractelement.*extractelement" < %t.ll
-   * RUN: grep "const_insertelement.*insertelement" < %t.ll
-   * RUN: grep "const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>" < %t.ll
+  (* CHECK: const_size_of{{.*}}getelementptr{{.*}}null
+   * CHECK: const_gep{{.*}}getelementptr
+   * CHECK: const_select{{.*}}select
+   * CHECK: const_extractelement{{.*}}extractelement
+   * CHECK: const_insertelement{{.*}}insertelement
+   * CHECK: const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>
    *)
   ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m);
   ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m);
@@ -356,7 +372,7 @@ let test_global_values () =
   let (++) x f = f x; x in
   let zero32 = const_null i32_type in
 
-  (* RUN: grep "GVal01" < %t.ll
+  (* CHECK: GVal01
    *)
   group "naming";
   let g = define_global "TEMPORARY" zero32 m in
@@ -364,28 +380,28 @@ let test_global_values () =
   set_value_name "GVal01" g;
   insist ("GVal01" = value_name g);
 
-  (* RUN: grep "GVal02.*linkonce" < %t.ll
+  (* CHECK: GVal02{{.*}}linkonce
    *)
   group "linkage";
   let g = define_global "GVal02" zero32 m ++
           set_linkage Linkage.Link_once in
   insist (Linkage.Link_once = linkage g);
 
-  (* RUN: grep "GVal03.*Hanalei" < %t.ll
+  (* CHECK: GVal03{{.*}}Hanalei
    *)
   group "section";
   let g = define_global "GVal03" zero32 m ++
           set_section "Hanalei" in
   insist ("Hanalei" = section g);
   
-  (* RUN: grep "GVal04.*hidden" < %t.ll
+  (* CHECK: GVal04{{.*}}hidden
    *)
   group "visibility";
   let g = define_global "GVal04" zero32 m ++
           set_visibility Visibility.Hidden in
   insist (Visibility.Hidden = visibility g);
   
-  (* RUN: grep "GVal05.*align 128" < %t.ll
+  (* CHECK: GVal05{{.*}}align 128
    *)
   group "alignment";
   let g = define_global "GVal05" zero32 m ++
@@ -400,7 +416,8 @@ let test_global_variables () =
   let fourty_two32 = const_int i32_type 42 in
 
   group "declarations"; begin
-    (* RUN: grep "GVar01.*external" < %t.ll
+    (* CHECK: @GVar01 = external global i32
+     * CHECK: @QGVar01 = external addrspace(3) global i32
      *)
     insist (None == lookup_global "GVar01" m);
     let g = declare_global i32_type "GVar01" m in
@@ -422,8 +439,10 @@ let test_global_variables () =
   end;
   
   group "definitions"; begin
-    (* RUN: grep "GVar02.*42" < %t.ll
-     * RUN: grep "GVar03.*42" < %t.ll
+    (* CHECK: @GVar02 = global i32 42
+     * CHECK: @GVar03 = global i32 42
+     * CHECK: @QGVar02 = addrspace(3) global i32 42
+     * CHECK: @QGVar03 = addrspace(3) global i32 42
      *)
     let g = define_global "GVar02" fourty_two32 m in
     let g2 = declare_global i32_type "GVar03" m ++
@@ -440,20 +459,34 @@ let test_global_variables () =
     insist ((global_initializer g) == (global_initializer g2));
   end;
 
-  (* RUN: grep "GVar04.*thread_local" < %t.ll
+  (* CHECK: GVar04{{.*}}thread_local
    *)
   group "threadlocal";
   let g = define_global "GVar04" fourty_two32 m ++
           set_thread_local true in
   insist (is_thread_local g);
 
-  (* RUN: grep -v "GVar05" < %t.ll
+  (* CHECK: GVar05{{.*}}thread_local(initialexec)
+   *)
+  group "threadlocal_mode";
+  let g = define_global "GVar05" fourty_two32 m ++
+          set_thread_local_mode ThreadLocalMode.InitialExec in
+  insist ((thread_local_mode g) = ThreadLocalMode.InitialExec);
+
+  (* CHECK: GVar06{{.*}}externally_initialized
+   *)
+  group "externally_initialized";
+  let g = define_global "GVar06" fourty_two32 m ++
+          set_externally_initialized true in
+  insist (is_externally_initialized g);
+
+  (* CHECK-NOWHERE-NOT: GVar07
    *)
   group "delete";
-  let g = define_global "GVar05" fourty_two32 m in
+  let g = define_global "GVar07" fourty_two32 m in
   delete_global g;
 
-  (* RUN: grep -v "ConstGlobalVar.*constant" < %t.ll
+  (* CHECK: ConstGlobalVar{{.*}}constant
    *)
   group "constant";
   let g = define_global "ConstGlobalVar" fourty_two32 m in
@@ -487,6 +520,10 @@ let test_global_variables () =
     dispose_module m
   end
 
+(* String globals built below are emitted here.
+ * CHECK: build_global_string{{.*}}stringval
+ *)
+
 
 (*===-- Uses --------------------------------------------------------------===*)
 
@@ -542,7 +579,7 @@ let test_users () =
 (*===-- Aliases -----------------------------------------------------------===*)
 
 let test_aliases () =
-  (* RUN: grep "@alias = alias i32\* @aliasee" < %t.ll
+  (* CHECK: @alias = alias i32* @aliasee
    *)
   let v = declare_global i32_type "aliasee" m in
   ignore (add_alias m (pointer_type i32_type) v "alias")
@@ -554,7 +591,7 @@ let test_functions () =
   let ty = function_type i32_type [| i32_type; i64_type |] in
   let ty2 = function_type i8_type [| i8_type; i64_type |] in
   
-  (* RUN: grep 'declare i32 @Fn1(i32, i64)' < %t.ll
+  (* CHECK: declare i32 @Fn1(i32, i64)
    *)
   begin group "declare";
     insist (None = lookup_function "Fn1" m);
@@ -570,13 +607,13 @@ let test_functions () =
     insist (m == global_parent fn)
   end;
   
-  (* RUN: grep -v "Fn2" < %t.ll
+  (* CHECK-NOWHERE-NOT: Fn2
    *)
   group "delete";
   let fn = declare_function "Fn2" ty m in
   delete_function fn;
   
-  (* RUN: grep "define.*Fn3" < %t.ll
+  (* CHECK: define{{.*}}Fn3
    *)
   group "define";
   let fn = define_function "Fn3" ty m in
@@ -584,7 +621,7 @@ let test_functions () =
   insist (1 = Array.length (basic_blocks fn));
   ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
-  (* RUN: grep "define.*Fn4.*Param1.*Param2" < %t.ll
+  (* CHECK: define{{.*}}Fn4{{.*}}Param1{{.*}}Param2
    *)
   group "params";
   let fn = define_function "Fn4" ty m in
@@ -598,7 +635,7 @@ let test_functions () =
   set_value_name "Param2" params.(1);
   ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
-  (* RUN: grep "fastcc.*Fn5" < %t.ll
+  (* CHECK: fastcc{{.*}}Fn5
    *)
   group "callconv";
   let fn = define_function "Fn5" ty m in
@@ -608,7 +645,7 @@ let test_functions () =
   ignore (build_unreachable (builder_at_end context (entry_block fn)));
   
   begin group "gc";
-    (* RUN: grep "Fn6.*gc.*shadowstack" < %t.ll
+    (* CHECK: Fn6{{.*}}gc{{.*}}shadowstack
      *)
     let fn = define_function "Fn6" ty m in
     insist (None = gc fn);
@@ -694,7 +731,7 @@ let test_params () =
 let test_basic_blocks () =
   let ty = function_type void_type [| |] in
   
-  (* RUN: grep "Bb1" < %t.ll
+  (* CHECK: Bb1
    *)
   group "entry";
   let fn = declare_function "X" ty m in
@@ -702,7 +739,7 @@ let test_basic_blocks () =
   insist (bb = entry_block fn);
   ignore (build_unreachable (builder_at_end context bb));
   
-  (* RUN: grep -v Bb2 < %t.ll
+  (* CHECK-NOWHERE-NOT: Bb2
    *)
   group "delete";
   let fn = declare_function "X2" ty m in
@@ -717,7 +754,7 @@ let test_basic_blocks () =
   ignore (build_unreachable (builder_at_end context bba));
   ignore (build_unreachable (builder_at_end context bbb));
   
-  (* RUN: grep Bb3 < %t.ll
+  (* CHECK: Bb3
    *)
   group "name/value";
   let fn = define_function "X4" ty m in
@@ -825,7 +862,7 @@ let test_builder () =
   
   group "ret void";
   begin
-    (* RUN: grep "ret void" < %t.ll
+    (* CHECK: ret void
      *)
     let fty = function_type void_type [| |] in
     let fn = declare_function "X6" fty m in
@@ -835,7 +872,7 @@ let test_builder () =
 
   group "ret aggregate";
   begin
-      (* RUN: grep "ret { i8, i64 } { i8 4, i64 5 }" < %t.ll
+      (* CHECK: ret { i8, i64 } { i8 4, i64 5 }
        *)
       let sty = struct_type context [| i8_type; i64_type |] in
       let fty = function_type sty [| |] in
@@ -860,12 +897,202 @@ let test_builder () =
   group "function attribute";
   begin
       ignore (add_function_attr fn Attribute.UWTable);
-      (* RUN: grep "X7.*#0" < %t.ll
-       * RUN: grep "attributes #0 = .*uwtable.*" < %t.ll
+      (* CHECK: X7{{.*}}#0
+       * #0 is uwtable, defined at EOF.
        *)
       insist ([Attribute.UWTable] = function_attr fn);
   end;
 
+  group "casts"; begin
+    let void_ptr = pointer_type i8_type in
+
+    (* CHECK-DAG: %build_trunc = trunc i32 %P1 to i8
+     * CHECK-DAG: %build_trunc2 = trunc i32 %P1 to i8
+     * CHECK-DAG: %build_trunc3 = trunc i32 %P1 to i8
+     * CHECK-DAG: %build_zext = zext i8 %build_trunc to i32
+     * CHECK-DAG: %build_zext2 = zext i8 %build_trunc to i32
+     * CHECK-DAG: %build_sext = sext i32 %build_zext to i64
+     * CHECK-DAG: %build_sext2 = sext i32 %build_zext to i64
+     * CHECK-DAG: %build_sext3 = sext i32 %build_zext to i64
+     * CHECK-DAG: %build_uitofp = uitofp i64 %build_sext to float
+     * CHECK-DAG: %build_sitofp = sitofp i32 %build_zext to double
+     * CHECK-DAG: %build_fptoui = fptoui float %build_uitofp to i32
+     * CHECK-DAG: %build_fptosi = fptosi double %build_sitofp to i64
+     * CHECK-DAG: %build_fptrunc = fptrunc double %build_sitofp to float
+     * CHECK-DAG: %build_fptrunc2 = fptrunc double %build_sitofp to float
+     * CHECK-DAG: %build_fpext = fpext float %build_fptrunc to double
+     * CHECK-DAG: %build_fpext2 = fpext float %build_fptrunc to double
+     * CHECK-DAG: %build_inttoptr = inttoptr i32 %P1 to i8*
+     * CHECK-DAG: %build_ptrtoint = ptrtoint i8* %build_inttoptr to i64
+     * CHECK-DAG: %build_ptrtoint2 = ptrtoint i8* %build_inttoptr to i64
+     * CHECK-DAG: %build_bitcast = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_bitcast2 = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_bitcast3 = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_bitcast4 = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_pointercast = bitcast i8* %build_inttoptr to i16*
+     *)
+    let inst28 = build_trunc p1 i8_type "build_trunc" atentry in
+    let inst29 = build_zext inst28 i32_type "build_zext" atentry in
+    let inst30 = build_sext inst29 i64_type "build_sext" atentry in
+    let inst31 = build_uitofp inst30 float_type "build_uitofp" atentry in
+    let inst32 = build_sitofp inst29 double_type "build_sitofp" atentry in
+    ignore(build_fptoui inst31 i32_type "build_fptoui" atentry);
+    ignore(build_fptosi inst32 i64_type "build_fptosi" atentry);
+    let inst35 = build_fptrunc inst32 float_type "build_fptrunc" atentry in
+    ignore(build_fpext inst35 double_type "build_fpext" atentry);
+    let inst37 = build_inttoptr p1 void_ptr "build_inttoptr" atentry in
+    let inst38 = build_ptrtoint inst37 i64_type "build_ptrtoint" atentry in
+    ignore(build_bitcast inst38 double_type "build_bitcast" atentry);
+    ignore(build_zext_or_bitcast inst38 double_type "build_bitcast2" atentry);
+    ignore(build_sext_or_bitcast inst38 double_type "build_bitcast3" atentry);
+    ignore(build_trunc_or_bitcast inst38 double_type "build_bitcast4" atentry);
+    ignore(build_pointercast inst37 (pointer_type i16_type) "build_pointercast" atentry);
+
+    ignore(build_zext_or_bitcast inst28 i32_type "build_zext2" atentry);
+    ignore(build_sext_or_bitcast inst29 i64_type "build_sext2" atentry);
+    ignore(build_trunc_or_bitcast p1 i8_type "build_trunc2" atentry);
+    ignore(build_pointercast inst37 i64_type "build_ptrtoint2" atentry);
+    ignore(build_intcast inst29 i64_type "build_sext3" atentry);
+    ignore(build_intcast p1 i8_type "build_trunc3" atentry);
+    ignore(build_fpcast inst35 double_type "build_fpext2" atentry);
+    ignore(build_fpcast inst32 float_type "build_fptrunc2" atentry);
+  end;
+
+  group "comparisons"; begin
+    (* CHECK: %build_icmp_ne = icmp ne i32 %P1, %P2
+     * CHECK: %build_icmp_sle = icmp sle i32 %P2, %P1
+     * CHECK: %build_fcmp_false = fcmp false float %F1, %F2
+     * CHECK: %build_fcmp_true = fcmp true float %F2, %F1
+     * CHECK: %build_is_null{{.*}}= icmp eq{{.*}}%X0,{{.*}}null
+     * CHECK: %build_is_not_null = icmp ne i8* %X1, null
+     * CHECK: %build_ptrdiff
+     *)
+    ignore (build_icmp Icmp.Ne    p1 p2 "build_icmp_ne" atentry);
+    ignore (build_icmp Icmp.Sle   p2 p1 "build_icmp_sle" atentry);
+    ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry);
+    ignore (build_fcmp Fcmp.True  f2 f1 "build_fcmp_true" atentry);
+    let g0 = declare_global (pointer_type i8_type) "g0" m in
+    let g1 = declare_global (pointer_type i8_type) "g1" m in
+    let p0 = build_load g0 "X0" atentry in
+    let p1 = build_load g1 "X1" atentry in
+    ignore (build_is_null p0 "build_is_null" atentry);
+    ignore (build_is_not_null p1 "build_is_not_null" atentry);
+    ignore (build_ptrdiff p1 p0 "build_ptrdiff" atentry);
+  end;
+
+  group "miscellaneous"; begin
+    (* CHECK: %build_call = tail call cc63 i32 @{{.*}}(i32 signext %P2, i32 %P1)
+     * CHECK: %build_select = select i1 %build_icmp, i32 %P1, i32 %P2
+     * CHECK: %build_va_arg = va_arg i8** null, i32
+     * CHECK: %build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2
+     * CHECK: %build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2
+     * CHECK: %build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+     * CHECK: %build_insertvalue0 = insertvalue{{.*}}%bl, i32 1, 0
+     * CHECK: %build_extractvalue = extractvalue{{.*}}%build_insertvalue1, 1
+     *)
+    let ci = build_call fn [| p2; p1 |] "build_call" atentry in
+    insist (CallConv.c = instruction_call_conv ci);
+    set_instruction_call_conv 63 ci;
+    insist (63 = instruction_call_conv ci);
+    insist (not (is_tail_call ci));
+    set_tail_call true ci;
+    insist (is_tail_call ci);
+    add_instruction_param_attr ci 1 Attribute.Sext;
+    add_instruction_param_attr ci 2 Attribute.Noalias;
+    remove_instruction_param_attr ci 2 Attribute.Noalias;
+
+    let inst46 = build_icmp Icmp.Eq p1 p2 "build_icmp" atentry in
+    ignore (build_select inst46 p1 p2 "build_select" atentry);
+    ignore (build_va_arg
+      (const_null (pointer_type (pointer_type i8_type)))
+      i32_type "build_va_arg" atentry);
+
+    (* Set up some vector vregs. *)
+    let one  = const_int i32_type 1 in
+    let zero = const_int i32_type 0 in
+    let t1 = const_vector [| one; zero; one; zero |] in
+    let t2 = const_vector [| zero; one; zero; one |] in
+    let t3 = const_vector [| one; one; zero; zero |] in
+    let vec1 = build_insertelement t1 p1 p2 "Vec1" atentry in
+    let vec2 = build_insertelement t2 p1 p2 "Vec2" atentry in
+    let sty = struct_type context [| i32_type; i8_type |] in
+
+    ignore (build_extractelement vec1 p2 "build_extractelement" atentry);
+    ignore (build_insertelement vec1 p1 p2 "build_insertelement" atentry);
+    ignore (build_shufflevector vec1 vec2 t3 "build_shufflevector" atentry);
+
+    let p = build_alloca sty "ba" atentry in
+    let agg = build_load p "bl" atentry in
+    let agg0 = build_insertvalue agg (const_int i32_type 1) 0
+                 "build_insertvalue0" atentry in
+    let agg1 = build_insertvalue agg0 (const_int i8_type 2) 1
+                 "build_insertvalue1" atentry in
+    ignore (build_extractvalue agg1 1 "build_extractvalue" atentry)
+  end;
+
+  group "metadata"; begin
+    (* CHECK: %metadata = add i32 %P1, %P2, !test !1
+     * !1 is metadata emitted at EOF.
+     *)
+    let i = build_add p1 p2 "metadata" atentry in
+    insist ((has_metadata i) = false);
+
+    let m1 = const_int i32_type 1 in
+    let m2 = mdstring context "metadata test" in
+    let md = mdnode context [| m1; m2 |] in
+
+    let kind = mdkind_id context "test" in
+    set_metadata i kind md;
+
+    insist ((has_metadata i) = true);
+    insist ((metadata i kind) = Some md);
+
+    clear_metadata i kind;
+
+    insist ((has_metadata i) = false);
+    insist ((metadata i kind) = None);
+
+    set_metadata i kind md
+  end;
+
+  group "named metadata"; begin
+    (* !llvm.module.flags is emitted at EOF. *)
+    let n1 = const_int i32_type 1 in
+    let n2 = mdstring context "Debug Info Version" in
+    let md = mdnode context [| n1; n2; n1 |] in
+    add_named_metadata_operand m "llvm.module.flags" md;
+
+    insist ((get_named_metadata m "llvm.module.flags") = [| md |])
+  end;
+
+  group "dbg"; begin
+    (* CHECK: %dbg = add i32 %P1, %P2, !dbg !2
+     * !2 is metadata emitted at EOF.
+     *)
+    insist ((current_debug_location atentry) = None);
+
+    let m_line = const_int i32_type 2 in
+    let m_col = const_int i32_type 3 in
+    let m_scope = mdnode context [| |] in
+    let m_inlined = mdnode context [| |] in
+    let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in
+    set_current_debug_location atentry md;
+
+    insist ((current_debug_location atentry) = Some md);
+
+    let i = build_add p1 p2 "dbg" atentry in
+    insist ((has_metadata i) = true);
+
+    clear_current_debug_location atentry
+  end;
+
+  group "ret"; begin
+    (* CHECK: ret{{.*}}P1
+     *)
+    let ret = build_ret p1 atentry in
+    position_before ret atentry
+  end;
+
   (* see test/Feature/exception.ll *)
   let bblpad = append_block context "Bblpad" fn in
   let rt = struct_type context [| pointer_type i8_type; i32_type |] in
@@ -887,23 +1114,16 @@ let test_builder () =
            add_clause lp (const_array ety [| ztipkc; ztid |]);
            ignore (build_resume lp (builder_at_end context bblpad));
       end;
-      (* RUN: grep "landingpad.*personality.*__gxx_personality_v0" < %t.ll
-       * RUN: grep "cleanup" < %t.ll
-       * RUN: grep "catch.*i8\*\*.*@_ZTIc" < %t.ll
-       * RUN: grep "filter.*@_ZTIPKc.*@_ZTId" < %t.ll
-       * RUN: grep "resume " < %t.ll
+      (* CHECK: landingpad{{.*}}personality{{.*}}__gxx_personality_v0
+       * CHECK: cleanup
+       * CHECK: catch{{.*}}i8**{{.*}}@_ZTIc
+       * CHECK: filter{{.*}}@_ZTIPKc{{.*}}@_ZTId
+       * CHECK: resume
        * *)
   end;
 
-  group "ret"; begin
-    (* RUN: grep "ret.*P1" < %t.ll
-     *)
-    let ret = build_ret p1 atentry in
-    position_before ret atentry
-  end;
-  
   group "br"; begin
-    (* RUN: grep "br.*Bb02" < %t.ll
+    (* CHECK: br{{.*}}Bb02
      *)
     let bb02 = append_block context "Bb02" fn in
     let b = builder_at_end context bb02 in
@@ -911,7 +1131,7 @@ let test_builder () =
   end;
   
   group "cond_br"; begin
-    (* RUN: grep "br.*build_br.*Bb03.*Bb00" < %t.ll
+    (* CHECK: br{{.*}}build_br{{.*}}Bb03{{.*}}Bb00
      *)
     let bb03 = append_block context "Bb03" fn in
     let b = builder_at_end context bb03 in
@@ -920,8 +1140,8 @@ let test_builder () =
   end;
   
   group "switch"; begin
-    (* RUN: grep "switch.*P1.*SwiBlock3" < %t.ll
-     * RUN: grep "2,.*SwiBlock2" < %t.ll
+    (* CHECK: switch{{.*}}P1{{.*}}SwiBlock3
+     * CHECK: 2,{{.*}}SwiBlock2
      *)
     let bb1 = append_block context "SwiBlock1" fn in
     let bb2 = append_block context "SwiBlock2" fn in
@@ -935,9 +1155,9 @@ let test_builder () =
   end;
 
   group "malloc/free"; begin
-      (* RUN: grep "call.*@malloc(i32 ptrtoint" < %t.ll
-       * RUN: grep "call.*@free(i8\*" < %t.ll
-       * RUN: grep "call.*@malloc(i32 %" < %t.ll
+      (* CHECK: call{{.*}}@malloc(i32 ptrtoint
+       * CHECK: call{{.*}}@free(i8*
+       * CHECK: call{{.*}}@malloc(i32 %
        *)
       let bb1 = append_block context "MallocBlock1" fn in
       let m1 = (build_malloc (pointer_type i32_type) "m1"
@@ -948,7 +1168,7 @@ let test_builder () =
   end;
 
   group "indirectbr"; begin
-    (* RUN: grep "indirectbr i8\* blockaddress(@X7, %IBRBlock2), \[label %IBRBlock2, label %IBRBlock3\]" < %t.ll
+    (* CHECK: indirectbr i8* blockaddress(@X7, %IBRBlock2), [label %IBRBlock2, label %IBRBlock3]
      *)
     let bb1 = append_block context "IBRBlock1" fn in
 
@@ -965,8 +1185,8 @@ let test_builder () =
   end;
   
   group "invoke"; begin
-    (* RUN: grep "build_invoke.*invoke.*P1.*P2" < %t.ll
-     * RUN: grep "to.*Bb04.*unwind.*Bblpad" < %t.ll
+    (* CHECK: build_invoke{{.*}}invoke{{.*}}P1{{.*}}P2
+     * CHECK: to{{.*}}Bb04{{.*}}unwind{{.*}}Bblpad
      *)
     let bb04 = append_block context "Bb04" fn in
     let b = builder_at_end context bb04 in
@@ -974,7 +1194,7 @@ let test_builder () =
   end;
   
   group "unreachable"; begin
-    (* RUN: grep "unreachable" < %t.ll
+    (* CHECK: unreachable
      *)
     let bb06 = append_block context "Bb06" fn in
     let b = builder_at_end context bb06 in
@@ -985,36 +1205,36 @@ let test_builder () =
     let bb07 = append_block context "Bb07" fn in
     let b = builder_at_end context bb07 in
     
-    (* RUN: grep "%build_add = add i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_nsw_add = add nsw i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_nuw_add = add nuw i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_fadd = fadd float %F1, %F2" < %t.ll
-     * RUN: grep "%build_sub = sub i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_nsw_sub = sub nsw i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_nuw_sub = sub nuw i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_fsub = fsub float %F1, %F2" < %t.ll
-     * RUN: grep "%build_mul = mul i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_nsw_mul = mul nsw i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_nuw_mul = mul nuw i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_fmul = fmul float %F1, %F2" < %t.ll
-     * RUN: grep "%build_udiv = udiv i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_sdiv = sdiv i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_exact_sdiv = sdiv exact i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_fdiv = fdiv float %F1, %F2" < %t.ll
-     * RUN: grep "%build_urem = urem i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_srem = srem i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_frem = frem float %F1, %F2" < %t.ll
-     * RUN: grep "%build_shl = shl i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_lshl = lshr i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_ashl = ashr i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_and = and i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_or = or i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_xor = xor i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_neg = sub i32 0, %P1" < %t.ll
-     * RUN: grep "%build_nsw_neg = sub nsw i32 0, %P1" < %t.ll
-     * RUN: grep "%build_nuw_neg = sub nuw i32 0, %P1" < %t.ll
-     * RUN: grep "%build_fneg = fsub float .*0.*, %F1" < %t.ll
-     * RUN: grep "%build_not = xor i32 %P1, -1" < %t.ll
+    (* CHECK: %build_add = add i32 %P1, %P2
+     * CHECK: %build_nsw_add = add nsw i32 %P1, %P2
+     * CHECK: %build_nuw_add = add nuw i32 %P1, %P2
+     * CHECK: %build_fadd = fadd float %F1, %F2
+     * CHECK: %build_sub = sub i32 %P1, %P2
+     * CHECK: %build_nsw_sub = sub nsw i32 %P1, %P2
+     * CHECK: %build_nuw_sub = sub nuw i32 %P1, %P2
+     * CHECK: %build_fsub = fsub float %F1, %F2
+     * CHECK: %build_mul = mul i32 %P1, %P2
+     * CHECK: %build_nsw_mul = mul nsw i32 %P1, %P2
+     * CHECK: %build_nuw_mul = mul nuw i32 %P1, %P2
+     * CHECK: %build_fmul = fmul float %F1, %F2
+     * CHECK: %build_udiv = udiv i32 %P1, %P2
+     * CHECK: %build_sdiv = sdiv i32 %P1, %P2
+     * CHECK: %build_exact_sdiv = sdiv exact i32 %P1, %P2
+     * CHECK: %build_fdiv = fdiv float %F1, %F2
+     * CHECK: %build_urem = urem i32 %P1, %P2
+     * CHECK: %build_srem = srem i32 %P1, %P2
+     * CHECK: %build_frem = frem float %F1, %F2
+     * CHECK: %build_shl = shl i32 %P1, %P2
+     * CHECK: %build_lshl = lshr i32 %P1, %P2
+     * CHECK: %build_ashl = ashr i32 %P1, %P2
+     * CHECK: %build_and = and i32 %P1, %P2
+     * CHECK: %build_or = or i32 %P1, %P2
+     * CHECK: %build_xor = xor i32 %P1, %P2
+     * CHECK: %build_neg = sub i32 0, %P1
+     * CHECK: %build_nsw_neg = sub nsw i32 0, %P1
+     * CHECK: %build_nuw_neg = sub nuw i32 0, %P1
+     * CHECK: %build_fneg = fsub float {{.*}}0{{.*}}, %F1
+     * CHECK: %build_not = xor i32 %P1, -1
      *)
     ignore (build_add p1 p2 "build_add" b);
     ignore (build_nsw_add p1 p2 "build_nsw_add" b);
@@ -1053,18 +1273,29 @@ let test_builder () =
     let bb08 = append_block context "Bb08" fn in
     let b = builder_at_end context bb08 in
 
-    (* RUN: grep "%build_alloca = alloca i32" < %t.ll
-     * RUN: grep "%build_array_alloca = alloca i32, i32 %P2" < %t.ll
-     * RUN: grep "%build_load = load i32\* %build_array_alloca" < %t.ll
-     * RUN: grep "store i32 %P2, i32\* %build_alloca" < %t.ll
-     * RUN: grep "%build_gep = getelementptr i32\* %build_array_alloca, i32 %P2" < %t.ll
-     * RUN: grep "%build_in_bounds_gep = getelementptr inbounds i32\* %build_array_alloca, i32 %P2" < %t.ll
-     * RUN: grep "%build_struct_gep = getelementptr inbounds.*%build_alloca2, i32 0, i32 1" < %t.ll
+    (* CHECK: %build_alloca = alloca i32
+     * CHECK: %build_array_alloca = alloca i32, i32 %P2
+     * CHECK: %build_load = load volatile i32* %build_array_alloca, align 4
+     * CHECK: store volatile i32 %P2, i32* %build_alloca, align 4
+     * CHECK: %build_gep = getelementptr i32* %build_array_alloca, i32 %P2
+     * CHECK: %build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2
+     * CHECK: %build_struct_gep = getelementptr inbounds{{.*}}%build_alloca2, i32 0, i32 1
+     * CHECK: %build_atomicrmw = atomicrmw xchg i8* %p, i8 42 seq_cst
      *)
     let alloca = build_alloca i32_type "build_alloca" b in
     let array_alloca = build_array_alloca i32_type p2 "build_array_alloca" b in
-    ignore(build_load array_alloca "build_load" b);
-    ignore(build_store p2 alloca b);
+
+    let load = build_load array_alloca "build_load" b in
+    ignore(set_alignment 4 load);
+    ignore(set_volatile true load);
+    insist(true = is_volatile load);
+    insist(4 = alignment load);
+
+    let store = build_store p2 alloca b in
+    ignore(set_volatile true store);
+    ignore(set_alignment 4 store);
+    insist(true = is_volatile store);
+    insist(4 = alignment store);
     ignore(build_gep array_alloca [| p2 |] "build_gep" b);
     ignore(build_in_bounds_gep array_alloca [| p2 |] "build_in_bounds_gep" b);
 
@@ -1072,6 +1303,11 @@ let test_builder () =
     let alloca2 = build_alloca sty "build_alloca2" b in
     ignore(build_struct_gep alloca2 1 "build_struct_gep" b);
 
+    let p = build_alloca i8_type "p" b in
+    ignore(build_atomicrmw AtomicRMWBinOp.Xchg p (const_int i8_type 42)
+              AtomicOrdering.SequentiallyConsistent false "build_atomicrmw"
+              b);
+
     ignore(build_unreachable b)
   end;
 
@@ -1079,8 +1315,8 @@ let test_builder () =
     let bb09 = append_block context "Bb09" fn in
     let b = builder_at_end context bb09 in
     let p = build_alloca (pointer_type i8_type) "p" b in
-    (* RUN: grep "build_global_string.*stringval" < %t.ll
-     * RUN: grep "store.*build_global_string1.*p" < %t.ll
+    (* build_global_string is emitted above.
+     * CHECK: store{{.*}}build_global_string1{{.*}}p
      * *)
     ignore (build_global_string "stringval" "build_global_string" b);
     let g = build_global_stringptr "stringval" "build_global_string1" b in
@@ -1088,181 +1324,8 @@ let test_builder () =
     ignore(build_unreachable b);
   end;
 
-  group "casts"; begin
-    let void_ptr = pointer_type i8_type in
-    
-    (* RUN: grep "%build_trunc = trunc i32 %P1 to i8" < %t.ll
-     * RUN: grep "%build_trunc2 = trunc i32 %P1 to i8" < %t.ll
-     * RUN: grep "%build_trunc3 = trunc i32 %P1 to i8" < %t.ll
-     * RUN: grep "%build_zext = zext i8 %build_trunc to i32" < %t.ll
-     * RUN: grep "%build_zext2 = zext i8 %build_trunc to i32" < %t.ll
-     * RUN: grep "%build_sext = sext i32 %build_zext to i64" < %t.ll
-     * RUN: grep "%build_sext2 = sext i32 %build_zext to i64" < %t.ll
-     * RUN: grep "%build_sext3 = sext i32 %build_zext to i64" < %t.ll
-     * RUN: grep "%build_uitofp = uitofp i64 %build_sext to float" < %t.ll
-     * RUN: grep "%build_sitofp = sitofp i32 %build_zext to double" < %t.ll
-     * RUN: grep "%build_fptoui = fptoui float %build_uitofp to i32" < %t.ll
-     * RUN: grep "%build_fptosi = fptosi double %build_sitofp to i64" < %t.ll
-     * RUN: grep "%build_fptrunc = fptrunc double %build_sitofp to float" < %t.ll
-     * RUN: grep "%build_fptrunc2 = fptrunc double %build_sitofp to float" < %t.ll
-     * RUN: grep "%build_fpext = fpext float %build_fptrunc to double" < %t.ll
-     * RUN: grep "%build_fpext2 = fpext float %build_fptrunc to double" < %t.ll
-     * RUN: grep "%build_inttoptr = inttoptr i32 %P1 to i8\*" < %t.ll
-     * RUN: grep "%build_ptrtoint = ptrtoint i8\* %build_inttoptr to i64" < %t.ll
-     * RUN: grep "%build_ptrtoint2 = ptrtoint i8\* %build_inttoptr to i64" < %t.ll
-     * RUN: grep "%build_bitcast = bitcast i64 %build_ptrtoint to double" < %t.ll
-     * RUN: grep "%build_bitcast2 = bitcast i64 %build_ptrtoint to double" < %t.ll
-     * RUN: grep "%build_bitcast3 = bitcast i64 %build_ptrtoint to double" < %t.ll
-     * RUN: grep "%build_bitcast4 = bitcast i64 %build_ptrtoint to double" < %t.ll
-     * RUN: grep "%build_pointercast = bitcast i8\* %build_inttoptr to i16*" < %t.ll
-     *)
-    let inst28 = build_trunc p1 i8_type "build_trunc" atentry in
-    let inst29 = build_zext inst28 i32_type "build_zext" atentry in
-    let inst30 = build_sext inst29 i64_type "build_sext" atentry in
-    let inst31 = build_uitofp inst30 float_type "build_uitofp" atentry in
-    let inst32 = build_sitofp inst29 double_type "build_sitofp" atentry in
-    ignore(build_fptoui inst31 i32_type "build_fptoui" atentry);
-    ignore(build_fptosi inst32 i64_type "build_fptosi" atentry);
-    let inst35 = build_fptrunc inst32 float_type "build_fptrunc" atentry in
-    ignore(build_fpext inst35 double_type "build_fpext" atentry);
-    let inst37 = build_inttoptr p1 void_ptr "build_inttoptr" atentry in
-    let inst38 = build_ptrtoint inst37 i64_type "build_ptrtoint" atentry in
-    ignore(build_bitcast inst38 double_type "build_bitcast" atentry);
-    ignore(build_zext_or_bitcast inst38 double_type "build_bitcast2" atentry);
-    ignore(build_sext_or_bitcast inst38 double_type "build_bitcast3" atentry);
-    ignore(build_trunc_or_bitcast inst38 double_type "build_bitcast4" atentry);
-    ignore(build_pointercast inst37 (pointer_type i16_type) "build_pointercast" atentry);
-
-    ignore(build_zext_or_bitcast inst28 i32_type "build_zext2" atentry);
-    ignore(build_sext_or_bitcast inst29 i64_type "build_sext2" atentry);
-    ignore(build_trunc_or_bitcast p1 i8_type "build_trunc2" atentry);
-    ignore(build_pointercast inst37 i64_type "build_ptrtoint2" atentry);
-    ignore(build_intcast inst29 i64_type "build_sext3" atentry);
-    ignore(build_intcast p1 i8_type "build_trunc3" atentry);
-    ignore(build_fpcast inst35 double_type "build_fpext2" atentry);
-    ignore(build_fpcast inst32 float_type "build_fptrunc2" atentry);
-  end;
-  
-  group "comparisons"; begin
-    (* RUN: grep "%build_icmp_ne = icmp ne i32 %P1, %P2" < %t.ll
-     * RUN: grep "%build_icmp_sle = icmp sle i32 %P2, %P1" < %t.ll
-     * RUN: grep "%build_fcmp_false = fcmp false float %F1, %F2" < %t.ll
-     * RUN: grep "%build_fcmp_true = fcmp true float %F2, %F1" < %t.ll
-     * RUN: grep "%build_is_null.*= icmp eq.*%X0,.*null" < %t.ll
-     * RUN: grep "%build_is_not_null = icmp ne i8\* %X1, null" < %t.ll
-     * RUN: grep "%build_ptrdiff" < %t.ll
-     *)
-    ignore (build_icmp Icmp.Ne    p1 p2 "build_icmp_ne" atentry);
-    ignore (build_icmp Icmp.Sle   p2 p1 "build_icmp_sle" atentry);
-    ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry);
-    ignore (build_fcmp Fcmp.True  f2 f1 "build_fcmp_true" atentry);
-    let g0 = declare_global (pointer_type i8_type) "g0" m in
-    let g1 = declare_global (pointer_type i8_type) "g1" m in
-    let p0 = build_load g0 "X0" atentry in
-    let p1 = build_load g1 "X1" atentry in
-    ignore (build_is_null p0 "build_is_null" atentry);
-    ignore (build_is_not_null p1 "build_is_not_null" atentry);
-    ignore (build_ptrdiff p1 p0 "build_ptrdiff" atentry);
-  end;
-  
-  group "miscellaneous"; begin
-    (* RUN: grep "%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)" < %t.ll
-     * RUN: grep "%build_select = select i1 %build_icmp, i32 %P1, i32 %P2" < %t.ll
-     * RUN: grep "%build_va_arg = va_arg i8\*\* null, i32" < %t.ll
-     * RUN: grep "%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2" < %t.ll
-     * RUN: grep "%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2" < %t.ll
-     * RUN: grep "%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>" < %t.ll
-     * RUN: grep "%build_insertvalue0 = insertvalue.*%bl, i32 1, 0" < %t.ll
-     * RUN: grep "%build_extractvalue = extractvalue.*%build_insertvalue1, 1" < %t.ll
-     *)
-    let ci = build_call fn [| p2; p1 |] "build_call" atentry in
-    insist (CallConv.c = instruction_call_conv ci);
-    set_instruction_call_conv 63 ci;
-    insist (63 = instruction_call_conv ci);
-    insist (not (is_tail_call ci));
-    set_tail_call true ci;
-    insist (is_tail_call ci);
-    add_instruction_param_attr ci 1 Attribute.Sext;
-    add_instruction_param_attr ci 2 Attribute.Noalias;
-    remove_instruction_param_attr ci 2 Attribute.Noalias;
-    
-    let inst46 = build_icmp Icmp.Eq p1 p2 "build_icmp" atentry in
-    ignore (build_select inst46 p1 p2 "build_select" atentry);
-    ignore (build_va_arg
-      (const_null (pointer_type (pointer_type i8_type)))
-      i32_type "build_va_arg" atentry);
-    
-    (* Set up some vector vregs. *)
-    let one  = const_int i32_type 1 in
-    let zero = const_int i32_type 0 in
-    let t1 = const_vector [| one; zero; one; zero |] in
-    let t2 = const_vector [| zero; one; zero; one |] in
-    let t3 = const_vector [| one; one; zero; zero |] in
-    let vec1 = build_insertelement t1 p1 p2 "Vec1" atentry in
-    let vec2 = build_insertelement t2 p1 p2 "Vec2" atentry in
-    let sty = struct_type context [| i32_type; i8_type |] in
-    
-    ignore (build_extractelement vec1 p2 "build_extractelement" atentry);
-    ignore (build_insertelement vec1 p1 p2 "build_insertelement" atentry);
-    ignore (build_shufflevector vec1 vec2 t3 "build_shufflevector" atentry);
-
-    let p = build_alloca sty "ba" atentry in
-    let agg = build_load p "bl" atentry in
-    let agg0 = build_insertvalue agg (const_int i32_type 1) 0
-                 "build_insertvalue0" atentry in
-    let agg1 = build_insertvalue agg0 (const_int i8_type 2) 1
-                 "build_insertvalue1" atentry in
-    ignore (build_extractvalue agg1 1 "build_extractvalue" atentry)
-  end;
-
-  group "metadata"; begin
-    (* RUN: grep '%metadata = add i32 %P1, %P2, !test !0' < %t.ll
-     * RUN: grep '!0 = metadata !{i32 1, metadata !"metadata test"}' < %t.ll
-     *)
-    let i = build_add p1 p2 "metadata" atentry in
-    insist ((has_metadata i) = false);
-
-    let m1 = const_int i32_type 1 in
-    let m2 = mdstring context "metadata test" in
-    let md = mdnode context [| m1; m2 |] in
-
-    let kind = mdkind_id context "test" in
-    set_metadata i kind md;
-
-    insist ((has_metadata i) = true);
-    insist ((metadata i kind) = Some md);
-
-    clear_metadata i kind;
-
-    insist ((has_metadata i) = false);
-    insist ((metadata i kind) = None);
-
-    set_metadata i kind md
-  end;
-
-  group "dbg"; begin
-    (* RUN: grep '%dbg = add i32 %P1, %P2, !dbg !1' < %t.ll
-     * RUN: grep '!1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}' < %t.ll
-     *)
-    insist ((current_debug_location atentry) = None);
-
-    let m_line = const_int i32_type 2 in
-    let m_col = const_int i32_type 3 in
-    let m_scope = mdnode context [| |] in
-    let m_inlined = mdnode context [| |] in
-    let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in
-    set_current_debug_location atentry md;
-
-    insist ((current_debug_location atentry) = Some md);
-
-    let i = build_add p1 p2 "dbg" atentry in
-    insist ((has_metadata i) = true);
-
-    clear_current_debug_location atentry
-  end;
-  
   group "phi"; begin
-    (* RUN: grep "PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2" < %t.ll
+    (* CHECK: PhiNode{{.*}}P1{{.*}}PhiBlock1{{.*}}P2{{.*}}PhiBlock2
      *)
     let b1 = append_block context "PhiBlock1" fn in
     let b2 = append_block context "PhiBlock2" fn in
@@ -1281,6 +1344,13 @@ let test_builder () =
     ignore (build_unreachable at_jb);
   end
 
+(* End-of-file checks for things like metdata and attributes.
+ * CHECK: attributes #0 = {{.*}}uwtable{{.*}}
+ * CHECK: !llvm.module.flags = !{!0}
+ * CHECK: !0 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+ * CHECK: !1 = metadata !{i32 1, metadata !"metadata test"}
+ * CHECK: !2 = metadata !{i32 2, i32 3, metadata !3, metadata !3}
+ *)
 
 (*===-- Pass Managers -----------------------------------------------------===*)
 
@@ -1306,6 +1376,14 @@ let test_pass_manager () =
   end
 
 
+(*===-- Memory Buffer -----------------------------------------------------===*)
+
+let test_memory_buffer () =
+  group "memory buffer";
+  let buf = MemoryBuffer.of_string "foobar" in
+  insist ((MemoryBuffer.as_string buf) = "foobar")
+
+
 (*===-- Writer ------------------------------------------------------------===*)
 
 let test_writer () =
@@ -1323,6 +1401,7 @@ let test_writer () =
 (*===-- Driver ------------------------------------------------------------===*)
 
 let _ =
+  suite "conversion"       test_conversion;
   suite "target"           test_target;
   suite "constants"        test_constants;
   suite "global values"    test_global_values;
@@ -1336,5 +1415,6 @@ let _ =
   suite "instructions"     test_instructions;
   suite "builder"          test_builder;
   suite "pass manager"     test_pass_manager;
+  suite "memory buffer"    test_memory_buffer;
   suite "writer"           test_writer; (* Keep this last; it disposes m. *)
   exit !exit_status
diff --git a/test/Bindings/llvm-c/calc.test b/test/Bindings/llvm-c/calc.test
new file mode 100644
index 000000000000..36a76e660330
--- /dev/null
+++ b/test/Bindings/llvm-c/calc.test
@@ -0,0 +1,15 @@
+; RUN: llvm-c-test --calc <%s | FileCheck %s
+
+; constant folding
+test 100 200 +
+;CHECK: ModuleID = 'test'
+;CHECK: define i64 @test
+;CHECK: {
+;CHECK: ret i64 300
+;CHECK: }
+
+arg1 0 @ 0 @ * 1 @ 1 @ * +
+;CHECK: ModuleID = 'arg1'
+;CHECK: getelementptr
+;CHECK: load
+;CHECK: ret
diff --git a/test/Bindings/llvm-c/disassemble.test b/test/Bindings/llvm-c/disassemble.test
new file mode 100644
index 000000000000..201e914587d4
--- /dev/null
+++ b/test/Bindings/llvm-c/disassemble.test
@@ -0,0 +1,29 @@
+; RUN: llvm-c-test --disassemble < %s | FileCheck %s
+
+
+arm-linux-android    44 26 1f e5 0c 10 4b e2 02 20 81 e0
+;CHECK: triple: arm-linux-android
+;CHECK: ldr	r2, [pc, #-1604]
+;CHECK: sub	r1, r11, #12
+;CHECK: 02 20 81 e0
+;CHECK: add	r2, r1, r2
+
+x86_64-linux-unknown 48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3
+;CHECK: triple: x86_64-linux-unknown
+;CHECK: addq	$56, %rsp
+;CHECK: popq	%rbx
+;CHECK: popq	%rbp
+;CHECK: popq	%r12
+;CHECK: popq	%r13
+;CHECK: popq	%r14
+;CHECK: popq	%r15
+;CHECK: ret
+
+i686-apple-darwin    0f b7 4c 24 0a e8 29 ce ff ff
+;CHECK: movzwl	10(%esp), %ecx
+;CHECK: calll	-12759
+
+i686-linux-unknown   dd 44 24 04 d9 e1 c3
+;CHECK: fldl	4(%esp)
+;CHECK: fabs
+;CHECK: ret
diff --git a/test/Bindings/llvm-c/functions.ll b/test/Bindings/llvm-c/functions.ll
new file mode 100644
index 000000000000..4503fb17315f
--- /dev/null
+++ b/test/Bindings/llvm-c/functions.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-c-test --module-list-functions | FileCheck %s
+
+define i32 @X() {
+entry:
+  br label %l1
+
+l1:
+  br label %l2
+
+l2:
+  br label %l3
+
+l3:
+  ret i32 1234
+}
+;CHECK: FunctionDefinition: X [#bb=4]
+
+
+define i32 @Z(i32 %a) {
+entry:
+  %0 = tail call i32 @Y(i32 %a)
+  ret i32 %0
+}
+
+;CHECK: FunctionDefinition: Z [#bb=1]
+;CHECK:  calls: Y
+;CHECK:  #isn: 2
+
+declare i32 @Y(i32)
+;CHECK: FunctionDeclaration: Y
+
diff --git a/test/Bindings/llvm-c/globals.ll b/test/Bindings/llvm-c/globals.ll
new file mode 100644
index 000000000000..a38f08b93ee9
--- /dev/null
+++ b/test/Bindings/llvm-c/globals.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-c-test --module-list-globals | FileCheck %s
+
+@foo = constant [7 x i8] c"foobar\00", align 1
+;CHECK: GlobalDefinition: foo [7 x i8]*
+
+@bar = common global i32 0, align 4
+;CHECK: GlobalDefinition: bar i32*
diff --git a/test/Bindings/llvm-c/lit.local.cfg b/test/Bindings/llvm-c/lit.local.cfg
new file mode 100644
index 000000000000..d83ebeed8e1c
--- /dev/null
+++ b/test/Bindings/llvm-c/lit.local.cfg
@@ -0,0 +1,5 @@
+targets = set(config.root.targets_to_build.split())
+if not "X86" in targets:
+    config.unsupported = True
+if not "ARM" in targets:
+    config.unsupported = True
diff --git a/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll b/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll
deleted file mode 100644
index 583b9a853bde..000000000000
--- a/test/Bitcode/2012-05-07-SwitchInstRangesSupport.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: rm -f %t.bc
-; RUN: rm -f %t.ll
-; RUN: rm -f %t2.bc
-; RUN: rm -f %t2.ll
-; RUN: llvm-as %s -o %t.bc
-; RUN: llvm-dis %t.bc -o - | tail -n +2 > %t.ll
-; RUN: llvm-as %t.ll -o %t2.bc
-; RUN: llvm-dis %t2.bc -o - | tail -n +2 > %t2.ll
-; RUN: llvm-diff %t.ll %t2.ll
-
-define void @test() {
-  %mem = alloca i32
-  store i32 2, i32* %mem
-  %c = load i32* %mem
-  switch i32 %c, label %exit [
-      i32 1, label %exit
-      i32 2, label %exit
-  ]
-exit:
-  ret void
-}
-define void @test_wide() {
-  %mem = alloca i256
-  store i256 2, i256* %mem
-  %c = load i256* %mem
-  switch i256 %c, label %exit [
-      i256 123456789012345678901234567890, label %exit
-      i256 2, label %exit
-  ]
-exit:
-  ret void
-}
-
diff --git a/test/Bitcode/attributes-3.3.ll b/test/Bitcode/attributes-3.3.ll
new file mode 100644
index 000000000000..cd70ba1a749a
--- /dev/null
+++ b/test/Bitcode/attributes-3.3.ll
@@ -0,0 +1,236 @@
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; attributes-3.3.ll.bc was generated by passing this file to llvm-as-3.3.
+; The test checks that LLVM does not silently misread attributes of
+; older bitcode files.
+
+define void @f1(i8 zeroext)
+; CHECK: define void @f1(i8 zeroext)
+{
+        ret void;
+}
+
+define void @f2(i8 signext)
+; CHECK: define void @f2(i8 signext)
+{
+        ret void;
+}
+
+define void @f3() noreturn
+; CHECK: define void @f3() #0
+{
+        ret void;
+}
+
+define void @f4(i8 inreg)
+; CHECK: define void @f4(i8 inreg)
+{
+        ret void;
+}
+
+define void @f5(i8* sret)
+; CHECK: define void @f5(i8* sret)
+{
+        ret void;
+}
+
+define void @f6() nounwind
+; CHECK: define void @f6() #1
+{
+        ret void;
+}
+
+define void @f7(i8* noalias)
+; CHECK: define void @f7(i8* noalias)
+{
+        ret void;
+}
+
+define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval)
+{
+        ret void;
+}
+
+define void @f9(i8* nest)
+; CHECK: define void @f9(i8* nest)
+{
+        ret void;
+}
+
+define void @f10() readnone
+; CHECK: define void @f10() #2
+{
+        ret void;
+}
+
+define void @f11() readonly
+; CHECK: define void @f11() #3
+{
+        ret void;
+}
+
+define void @f12() noinline
+; CHECK: define void @f12() #4
+{
+        ret void;
+}
+
+define void @f13() alwaysinline
+; CHECK: define void @f13() #5
+{
+        ret void;
+}
+
+define void @f14() optsize
+; CHECK: define void @f14() #6
+{
+        ret void;
+}
+
+define void @f15() ssp
+; CHECK: define void @f15() #7
+{
+        ret void;
+}
+
+define void @f16() sspreq
+; CHECK: define void @f16() #8
+{
+        ret void;
+}
+
+define void @f17(i8 align 4)
+; CHECK: define void @f17(i8 align 4)
+{
+        ret void;
+}
+
+define void @f18(i8* nocapture)
+; CHECK: define void @f18(i8* nocapture)
+{
+        ret void;
+}
+
+define void @f19() noredzone
+; CHECK: define void @f19() #9
+{
+        ret void;
+}
+
+define void @f20() noimplicitfloat
+; CHECK: define void @f20() #10
+{
+        ret void;
+}
+
+define void @f21() naked
+; CHECK: define void @f21() #11
+{
+        ret void;
+}
+
+define void @f22() inlinehint
+; CHECK: define void @f22() #12
+{
+        ret void;
+}
+
+define void @f23() alignstack(4)
+; CHECK: define void @f23() #13
+{
+        ret void;
+}
+
+define void @f24() returns_twice
+; CHECK: define void @f24() #14
+{
+        ret void;
+}
+
+define void @f25() uwtable
+; CHECK: define void @f25() #15
+{
+        ret void;
+}
+
+define void @f26() nonlazybind
+; CHECK: define void @f26() #16
+{
+        ret void;
+}
+
+define void @f27() sanitize_address
+; CHECK: define void @f27() #17
+{
+        ret void;
+}
+define void @f28() sanitize_thread
+; CHECK: define void @f28() #18
+{
+        ret void;
+}
+define void @f29() sanitize_memory
+; CHECK: define void @f29() #19
+{
+        ret void;
+}
+
+define void @f30() "cpu"="cortex-a8"
+; CHECK: define void @f30() #20
+{
+        ret void;
+}
+
+define i8 @f31(i8 returned %A)
+; CHECK: define i8 @f31(i8 returned %A)
+{
+        ret i8 %A;
+}
+
+define void @f32() sspstrong
+; CHECK: define void @f32() #21
+{
+        ret void;
+}
+
+define void @f33() minsize
+; CHECK: define void @f33() #22
+{
+        ret void;
+}
+
+declare void @nobuiltin()
+
+define void @f34()
+; CHECK: define void @f34()
+{
+        call void @nobuiltin() nobuiltin
+; CHECK: call void @nobuiltin() #23
+        ret void;
+}
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes #1 = { nounwind }
+; CHECK: attributes #2 = { readnone }
+; CHECK: attributes #3 = { readonly }
+; CHECK: attributes #4 = { noinline }
+; CHECK: attributes #5 = { alwaysinline }
+; CHECK: attributes #6 = { optsize }
+; CHECK: attributes #7 = { ssp }
+; CHECK: attributes #8 = { sspreq }
+; CHECK: attributes #9 = { noredzone }
+; CHECK: attributes #10 = { noimplicitfloat }
+; CHECK: attributes #11 = { naked }
+; CHECK: attributes #12 = { inlinehint }
+; CHECK: attributes #13 = { alignstack=4 }
+; CHECK: attributes #14 = { returns_twice }
+; CHECK: attributes #15 = { uwtable }
+; CHECK: attributes #16 = { nonlazybind }
+; CHECK: attributes #17 = { sanitize_address }
+; CHECK: attributes #18 = { sanitize_thread }
+; CHECK: attributes #19 = { sanitize_memory }
+; CHECK: attributes #20 = { "cpu"="cortex-a8" }
+; CHECK: attributes #21 = { sspstrong }
+; CHECK: attributes #22 = { minsize }
+; CHECK: attributes #23 = { nobuiltin }
diff --git a/test/Bitcode/attributes-3.3.ll.bc b/test/Bitcode/attributes-3.3.ll.bc
new file mode 100644
index 000000000000..5dd71864ea4f
--- /dev/null
+++ b/test/Bitcode/attributes-3.3.ll.bc
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 6c46e94012a5..1789878e9f50 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -179,6 +179,40 @@ define void @f30() "cpu"="cortex-a8"
         ret void;
 }
 
+define i8 @f31(i8 returned %A)
+; CHECK: define i8 @f31(i8 returned %A)
+{
+        ret i8 %A;
+}
+
+define void @f32() sspstrong
+; CHECK: define void @f32() #21
+{
+        ret void;
+}
+
+define void @f33() minsize
+; CHECK: define void @f33() #22
+{
+        ret void;
+}
+
+declare void @nobuiltin()
+
+define void @f34()
+; CHECK: define void @f34()
+{
+        call void @nobuiltin() nobuiltin
+; CHECK: call void @nobuiltin() #24
+        ret void;
+}
+
+define void @f35() optnone noinline
+; CHECK: define void @f35() #23
+{
+        ret void;
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -200,3 +234,8 @@ define void @f30() "cpu"="cortex-a8"
 ; CHECK: attributes #18 = { sanitize_thread }
 ; CHECK: attributes #19 = { sanitize_memory }
 ; CHECK: attributes #20 = { "cpu"="cortex-a8" }
+; CHECK: attributes #21 = { sspstrong }
+; CHECK: attributes #22 = { minsize }
+; CHECK: attributes #23 = { noinline optnone }
+; CHECK: attributes #24 = { nobuiltin }
+
diff --git a/test/Bitcode/case-ranges-3.3.ll b/test/Bitcode/case-ranges-3.3.ll
new file mode 100644
index 000000000000..6e1d0a69a591
--- /dev/null
+++ b/test/Bitcode/case-ranges-3.3.ll
@@ -0,0 +1,67 @@
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; case-ranges.ll.bc was generated by passing this file to llvm-as from the 3.3
+; release of LLVM. This tests that the bitcode for switches from that release
+; can still be read.
+
+define i32 @foo(i32 %x) nounwind ssp uwtable {
+; CHECK: define i32 @foo
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  store i32 %x, i32* %2, align 4
+  %3 = load i32* %2, align 4
+  switch i32 %3, label %9 [
+; CHECK: switch i32 %3, label %9
+    i32 -3, label %4
+; CHECK-NEXT: i32 -3, label %4
+    i32 -2, label %4
+; CHECK-NEXT: i32 -2, label %4
+    i32 -1, label %4
+; CHECK-NEXT: i32 -1, label %4
+    i32 0, label %4
+; CHECK-NEXT: i32 0, label %4
+    i32 1, label %4
+; CHECK-NEXT: i32 1, label %4
+    i32 2, label %4
+; CHECK-NEXT: i32 2, label %4
+    i32 4, label %5
+; CHECK-NEXT: i32 4, label %5
+    i32 5, label %6
+; CHECK-NEXT: i32 5, label %6
+    i32 6, label %7
+; CHECK-NEXT: i32 6, label %7
+    i32 7, label %8
+; CHECK-NEXT: i32 7, label %8
+  ]
+
+; <label>:4
+  store i32 -1, i32* %1
+  br label %11
+
+; <label>:5
+  store i32 2, i32* %1
+  br label %11
+
+; <label>:6
+  store i32 1, i32* %1
+  br label %11
+
+; <label>:7
+  store i32 4, i32* %1
+  br label %11
+
+; <label>:8
+  store i32 3, i32* %1
+  br label %11
+
+; <label>:9
+  br label %10
+
+; <label>:10
+  store i32 0, i32* %1
+  br label %11
+
+; <label>:11
+  %12 = load i32* %1
+  ret i32 %12
+}
diff --git a/test/Bitcode/case-ranges-3.3.ll.bc b/test/Bitcode/case-ranges-3.3.ll.bc
new file mode 100644
index 000000000000..998f7475a4d4
--- /dev/null
+++ b/test/Bitcode/case-ranges-3.3.ll.bc
diff --git a/test/Bitcode/drop-debug-info.ll b/test/Bitcode/drop-debug-info.ll
new file mode 100644
index 000000000000..da4ae0c541eb
--- /dev/null
+++ b/test/Bitcode/drop-debug-info.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !12
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"../llvm/tools/clang/test/CodeGen/debug-info-version.c", metadata !"/Users/manmanren/llvm_gmail/release"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!12 = metadata !{i32 4, i32 0, metadata !4, null}
+
+; CHECK-NOT: !dbg
+; CHECK-NOT: !llvm.dbg.cu
diff --git a/test/Bitcode/extractelement.ll b/test/Bitcode/extractelement.ll
index d88f811e8e9c..8999c656fce9 100644
--- a/test/Bitcode/extractelement.ll
+++ b/test/Bitcode/extractelement.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -constprop | llvm-dis
+; RUN: opt < %s -constprop | llvm-dis -disable-output
 ; PR3465
 
 define double @test() {
diff --git a/test/Bitcode/invalid.ll b/test/Bitcode/invalid.ll
new file mode 100644
index 000000000000..1d4a82bf2b61
--- /dev/null
+++ b/test/Bitcode/invalid.ll
@@ -0,0 +1,7 @@
+; RUN:  not llvm-dis < %s.bc 2>&1 | FileCheck %s
+
+; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: Invalid value
+
+; invalid.ll.bc has an invalid attribute number.
+; The test checks that LLVM reports the error and doesn't access freed memory
+; in doing so.
diff --git a/test/Bitcode/invalid.ll.bc b/test/Bitcode/invalid.ll.bc
new file mode 100644
index 000000000000..a85c3644b3ab
--- /dev/null
+++ b/test/Bitcode/invalid.ll.bc
diff --git a/test/Bitcode/lit.local.cfg b/test/Bitcode/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Bitcode/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Bitcode/metadata-2.ll b/test/Bitcode/metadata-2.ll
index dbf46b06e198..4055f921c330 100644
--- a/test/Bitcode/metadata-2.ll
+++ b/test/Bitcode/metadata-2.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis -o /dev/null
+; RUN: llvm-as < %s | llvm-dis -disable-output
 	%0 = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
 	%1 = type { i64, %object.ModuleInfo* }		; type %1
 	%2 = type { i32, void ()* }		; type %2
diff --git a/test/Bitcode/metadata.ll b/test/Bitcode/metadata.ll
index 19db3eac2160..fc8a622252bf 100644
--- a/test/Bitcode/metadata.ll
+++ b/test/Bitcode/metadata.ll
@@ -1,6 +1,5 @@
-; RUN: llvm-as < %s | llvm-dis -o /dev/null
+; RUN: llvm-as < %s | llvm-dis -disable-output
 
 !llvm.foo = !{!0}
 !0 = metadata !{i32 42}
 @my.str = internal constant [4 x i8] c"foo\00"
-
diff --git a/test/Bitcode/null-type.ll b/test/Bitcode/null-type.ll
index 8502b0d55a33..a620fab8cc3a 100644
--- a/test/Bitcode/null-type.ll
+++ b/test/Bitcode/null-type.ll
@@ -1,5 +1,4 @@
-; RUN: not llvm-dis < %s.bc > /dev/null 2> %t
-; RUN: FileCheck %s < %t
+; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s
 ; PR8494
 
-; CHECK: Invalid MODULE_CODE_FUNCTION record
+; CHECK: Invalid record
diff --git a/test/Bitcode/select.ll b/test/Bitcode/select.ll
new file mode 100644
index 000000000000..71e669a90cdc
--- /dev/null
+++ b/test/Bitcode/select.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define <2 x i32> @main() {
+  ret <2 x i32> select (<2 x i1> <i1 false, i1 undef>, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 undef>)
+}
+
+; CHECK: define <2 x i32> @main() {
+; CHECK:   ret <2 x i32> select (<2 x i1> <i1 false, i1 undef>, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 undef>)
+; CHECK: }
diff --git a/test/Bitcode/shuffle.ll b/test/Bitcode/shuffle.ll
index c3c01c6692c4..1495d8eebf9c 100644
--- a/test/Bitcode/shuffle.ll
+++ b/test/Bitcode/shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llvm-dis
+; RUN: llvm-as < %s | llvm-dis -disable-output
 
 ; <rdar://problem/8622574>
 ; tests the bitcodereader can handle the case where the reader will initially
diff --git a/test/Bitcode/upgrade-tbaa.ll b/test/Bitcode/upgrade-tbaa.ll
new file mode 100644
index 000000000000..e7389095b8c1
--- /dev/null
+++ b/test/Bitcode/upgrade-tbaa.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Function Attrs: nounwind
+define void @_Z4testPiPf(i32* nocapture %pI, float* nocapture %pF) #0 {
+entry:
+  store i32 0, i32* %pI, align 4, !tbaa !{metadata !"int", metadata !0}
+  ; CHECK: store i32 0, i32* %pI, align 4, !tbaa [[TAG_INT:!.*]]
+  store float 1.000000e+00, float* %pF, align 4, !tbaa !2
+  ; CHECK: store float 1.000000e+00, float* %pF, align 4, !tbaa [[TAG_FLOAT:!.*]]
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+!2 = metadata !{metadata !"float", metadata !0}
+
+; CHECK: [[TAG_INT]] = metadata !{metadata [[TYPE_INT:!.*]], metadata [[TYPE_INT]], i64 0}
+; CHECK: [[TYPE_INT]] = metadata !{metadata !"int", metadata [[TYPE_CHAR:!.*]]}
+; CHECK: [[TYPE_CHAR]] = metadata !{metadata !"omnipotent char", metadata !{{.*}}
+; CHECK: [[TAG_FLOAT]] = metadata !{metadata [[TYPE_FLOAT:!.*]], metadata [[TYPE_FLOAT]], i64 0}
+; CHECK: [[TYPE_FLOAT]] = metadata !{metadata !"float", metadata [[TYPE_CHAR]]}
diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll
index c81283695731..d080d9dd4b0c 100644
--- a/test/BugPoint/crash-narrowfunctiontest.ll
+++ b/test/BugPoint/crash-narrowfunctiontest.ll
@@ -2,7 +2,6 @@
 ;
 ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
 ; REQUIRES: loadable_module
-; XFAIL: lto_on_osx
 
 define i32 @foo() { ret i32 1 }
 
diff --git a/test/BugPoint/lit.local.cfg b/test/BugPoint/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/BugPoint/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
index 6dc9574bbe4b..cc043f084feb 100644
--- a/test/BugPoint/metadata.ll
+++ b/test/BugPoint/metadata.ll
@@ -1,14 +1,15 @@
 ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
 ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
 ; REQUIRES: loadable_module
-; XFAIL: lto_on_osx
 
 ; Bugpoint should keep the call's metadata attached to the call.
 
-; CHECK: call void @foo(), !dbg !0, !attach !2
-; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1}
-; CHECK: !1 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0}
-; CHECK: !2 = metadata !{metadata !"the call to foo"}
+; CHECK: call void @foo(), !dbg ![[LOC:[0-9]+]], !attach ![[CALL:[0-9]+]]
+; CHECK: ![[LOC]] = metadata !{i32 104, i32 105, metadata ![[SCOPE:[0-9]+]], metadata ![[SCOPE]]}
+; CHECK: ![[SCOPE]] = metadata !{i32 458769, metadata ![[FILE:[0-9]+]], i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata ![[LIST:[0-9]+]], metadata ![[LIST]], null, null, null, metadata !""}
+; CHECK: ![[FILE]] = metadata !{metadata !"source.c", metadata !"/dir"}
+; CHECK: ![[LIST]] = metadata !{i32 0}
+; CHECK: ![[CALL]] = metadata !{metadata !"the call to foo"}
 
 %rust_task = type {}
 define void @test(i32* %a, i8* %b) {
@@ -22,15 +23,20 @@ define void @test(i32* %a, i8* %b) {
 
 declare void @foo()
 
+!llvm.module.flags = !{!17}
+
 !0 = metadata !{metadata !"boring"}
 !1 = metadata !{metadata !"uninteresting"}
 !2 = metadata !{metadata !"the call to foo"}
 !3 = metadata !{metadata !"noise"}
 !4 = metadata !{metadata !"filler"}
 
-!9 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0}
+!9 = metadata !{i32 458769, metadata !15, i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata !16, metadata !16, null, null, null, metadata !""}
 !10 = metadata !{i32 100, i32 101, metadata !9, metadata !9}
 !11 = metadata !{i32 102, i32 103, metadata !9, metadata !9}
 !12 = metadata !{i32 104, i32 105, metadata !9, metadata !9}
 !13 = metadata !{i32 106, i32 107, metadata !9, metadata !9}
 !14 = metadata !{i32 108, i32 109, metadata !9, metadata !9}
+!15 = metadata !{metadata !"source.c", metadata !"/dir"}
+!16 = metadata !{i32 0}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll
index 5a45f846e103..29a03b831077 100644
--- a/test/BugPoint/remove_arguments_test.ll
+++ b/test/BugPoint/remove_arguments_test.ll
@@ -1,7 +1,6 @@
 ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes
 ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
 ; REQUIRES: loadable_module
-; XFAIL: lto_on_osx
 
 ; Test to make sure that arguments are removed from the function if they are 
 ; unnecessary. And clean up any types that that frees up too.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 728213f6130a..d6f7dab1287e 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -13,12 +13,25 @@ if(NOT LLVM_BUILD_TOOLS)
 endif()
 
 # Set the depends list as a variable so that it can grow conditionally.
-set(LLVM_TEST_DEPENDS UnitTests
-          BugpointPasses LLVMHello
-          llc lli llvm-ar llvm-as
-          llvm-bcanalyzer llvm-diff
-          llvm-dis llvm-extract llvm-dwarfdump
+# NOTE: Sync the substitutions in test/lit.cfg when adding to this list.
+set(LLVM_TEST_DEPENDS
+          UnitTests
+          BugpointPasses
+          LLVMHello
+          llc
+          lli
+          lli-child-target
+          llvm-ar
+          llvm-as
+          llvm-bcanalyzer
+          llvm-c-test
+          llvm-cov
+          llvm-diff
+          llvm-dis
+          llvm-extract
+          llvm-dwarfdump
           llvm-link
+          llvm-lto
           llvm-mc
           llvm-mcmarkup
           llvm-nm
@@ -26,10 +39,15 @@ set(LLVM_TEST_DEPENDS UnitTests
           llvm-readobj
           llvm-rtdyld
           llvm-symbolizer
-          macho-dump opt
+          macho-dump
+          opt
           profile_rt-shared
-          FileCheck count not
-          yaml2obj obj2yaml)
+          FileCheck
+          count
+          not
+          yaml2obj
+          obj2yaml
+        )
 
 # If Intel JIT events are supported, depend on a tool that tests the listener.
 if( LLVM_USE_INTEL_JITEVENTS )
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll
index 7cb373232a2c..26fd3e66b798 100644
--- a/test/CodeGen/AArch64/adc.ll
+++ b/test/CodeGen/AArch64/adc.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
-; CHECK: test_simple:
+; CHECK-LABEL: test_simple:
 
   %valadd = add i128 %a, %b
 ; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2
@@ -16,7 +16,7 @@ define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
 }
 
 define i128 @test_imm(i128 %a) {
-; CHECK: test_imm:
+; CHECK-LABEL: test_imm:
 
   %val = add i128 %a, 12
 ; CHECK: adds x0, x0, #12
@@ -27,7 +27,7 @@ define i128 @test_imm(i128 %a) {
 }
 
 define i128 @test_shifted(i128 %a, i128 %b) {
-; CHECK: test_shifted:
+; CHECK-LABEL: test_shifted:
 
   %rhs = shl i128 %b, 45
 
@@ -40,7 +40,7 @@ define i128 @test_shifted(i128 %a, i128 %b) {
 }
 
 define i128 @test_extended(i128 %a, i16 %b) {
-; CHECK: test_extended:
+; CHECK-LABEL: test_extended:
 
   %ext = sext i16 %b to i128
   %rhs = shl i128 %ext, 3
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
index f2c74f6952b0..269c1e8143b2 100644
--- a/test/CodeGen/AArch64/addsub-shifted.ll
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -4,7 +4,7 @@
 @var64 = global i64 0
 
 define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
-; CHECK: test_lsl_arith:
+; CHECK-LABEL: test_lsl_arith:
 
   %rhs1 = load volatile i32* @var32
   %shift1 = shl i32 %rhs1, 18
@@ -73,7 +73,7 @@ define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
 }
 
 define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
-; CHECK: test_lsr_arith:
+; CHECK-LABEL: test_lsr_arith:
 
   %shift1 = lshr i32 %rhs32, 18
   %val1 = add i32 %lhs32, %shift1
@@ -132,7 +132,7 @@ define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
 }
 
 define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
-; CHECK: test_asr_arith:
+; CHECK-LABEL: test_asr_arith:
 
   %shift1 = ashr i32 %rhs32, 18
   %val1 = add i32 %lhs32, %shift1
@@ -191,7 +191,7 @@ define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
 }
 
 define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
-; CHECK: test_cmp:
+; CHECK-LABEL: test_cmp:
 
   %shift1 = shl i32 %rhs32, 13
   %tst1 = icmp uge i32 %lhs32, %shift1
@@ -237,7 +237,7 @@ end:
 }
 
 define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
-; CHECK: test_cmn:
+; CHECK-LABEL: test_cmn:
 
   %shift1 = shl i32 %rhs32, 13
   %val1 = sub i32 0, %shift1
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
index 5148807163c9..4d46d04b80f1 100644
--- a/test/CodeGen/AArch64/addsub.ll
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -9,7 +9,7 @@
 
 ; Add pure 12-bit immediates:
 define void @add_small() {
-; CHECK: add_small:
+; CHECK-LABEL: add_small:
 
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
   %val32 = load i32* @var_i32
@@ -26,7 +26,7 @@ define void @add_small() {
 
 ; Add 12-bit immediates, shifted left by 12 bits
 define void @add_med() {
-; CHECK: add_med:
+; CHECK-LABEL: add_med:
 
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
   %val32 = load i32* @var_i32
@@ -43,7 +43,7 @@ define void @add_med() {
 
 ; Subtract 12-bit immediates
 define void @sub_small() {
-; CHECK: sub_small:
+; CHECK-LABEL: sub_small:
 
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
   %val32 = load i32* @var_i32
@@ -60,7 +60,7 @@ define void @sub_small() {
 
 ; Subtract 12-bit immediates, shifted left by 12 bits
 define void @sub_med() {
-; CHECK: sub_med:
+; CHECK-LABEL: sub_med:
 
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
   %val32 = load i32* @var_i32
@@ -76,7 +76,7 @@ define void @sub_med() {
 }
 
 define void @testing() {
-; CHECK: testing:
+; CHECK-LABEL: testing:
   %val = load i32* @var_i32
 
 ; CHECK: cmp {{w[0-9]+}}, #4095
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index 2dd16626ea9f..f0e11c652240 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -6,7 +6,7 @@
 @var64 = global i64 0
 
 define void @addsub_i8rhs() {
-; CHECK: addsub_i8rhs:
+; CHECK-LABEL: addsub_i8rhs:
     %val8_tmp = load i8* @var8
     %lhs32 = load i32* @var32
     %lhs64 = load i64* @var64
@@ -81,7 +81,7 @@ end:
 }
 
 define void @addsub_i16rhs() {
-; CHECK: addsub_i16rhs:
+; CHECK-LABEL: addsub_i16rhs:
     %val16_tmp = load i16* @var16
     %lhs32 = load i32* @var32
     %lhs64 = load i64* @var64
@@ -159,7 +159,7 @@ end:
 ; example), but the remaining instructions are probably not idiomatic
 ; in the face of "add/sub (shifted register)" so I don't intend to.
 define void @addsub_i32rhs() {
-; CHECK: addsub_i32rhs:
+; CHECK-LABEL: addsub_i32rhs:
     %val32_tmp = load i32* @var32
     %lhs64 = load i64* @var64
 
@@ -186,4 +186,4 @@ define void @addsub_i32rhs() {
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
 
     ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
deleted file mode 100644
index cf411166a3a0..000000000000
--- a/test/CodeGen/AArch64/adrp-relocation.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | llvm-readobj -s -r | FileCheck %s
-
-define i64 @testfn() nounwind {
-entry:
-  ret i64 0
-}
-
-define i64 @foo() nounwind {
-entry:
-  %bar = alloca i64 ()*, align 8
-  store i64 ()* @testfn, i64 ()** %bar, align 8
-  %call = call i64 @testfn()
-  ret i64 %call
-}
-
-; The above should produce an ADRP/ADD pair to calculate the address of
-; testfn. The important point is that LLVM shouldn't think it can deal with the
-; relocation on the ADRP itself (even though it knows everything about the
-; relative offsets of testfn and foo) because its value depends on where this
-; object file's .text section gets relocated in memory.
-
-; CHECK:      Relocations [
-; CHECK-NEXT:   Section (1) .text {
-; CHECK-NEXT:     0x10 R_AARCH64_ADR_PREL_PG_HI21 testfn 0x0
-; CHECK-NEXT:     0x14 R_AARCH64_ADD_ABS_LO12_NC testfn 0x0
-; CHECK-NEXT:   }
-; CHECK-NEXT: ]
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index c62edf6503c6..1d3c0a02ac87 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -1,19 +1,20 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP %s
 
 declare void @use_addr(i8*)
 
 define void @test_simple_alloca(i64 %n) {
-; CHECK: test_simple_alloca:
+; CHECK-LABEL: test_simple_alloca:
 
   %buf = alloca i8, i64 %n
   ; Make sure we align the stack change to 16 bytes:
-; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
-; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+; CHECK-DAG: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK-DAG: and x0, [[SPDELTA]], #0xfffffffffffffff0
 
   ; Make sure we change SP. It would be surprising if anything but x0 were used
   ; for the final sp, but it could be if it was then moved into x0.
-; CHECK: mov [[TMP:x[0-9]+]], sp
-; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK-DAG: mov [[TMP:x[0-9]+]], sp
+; CHECK-DAG: sub x0, [[TMP]], [[SPDELTA]]
 ; CHECK: mov sp, x0
 
   call void @use_addr(i8* %buf)
@@ -30,20 +31,20 @@ define void @test_simple_alloca(i64 %n) {
 declare void @use_addr_loc(i8*, i64*)
 
 define i64 @test_alloca_with_local(i64 %n) {
-; CHECK: test_alloca_with_local:
+; CHECK-LABEL: test_alloca_with_local:
 ; CHECK: sub sp, sp, #32
 ; CHECK: stp x29, x30, [sp, #16]
 
   %loc = alloca i64
   %buf = alloca i8, i64 %n
   ; Make sure we align the stack change to 16 bytes:
-; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
-; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+; CHECK-DAG: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK-DAG: and x0, [[SPDELTA]], #0xfffffffffffffff0
 
   ; Make sure we change SP. It would be surprising if anything but x0 were used
   ; for the final sp, but it could be if it was then moved into x0.
-; CHECK: mov [[TMP:x[0-9]+]], sp
-; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK-DAG: mov [[TMP:x[0-9]+]], sp
+; CHECK-DAG: sub x0, [[TMP]], [[SPDELTA]]
 ; CHECK: mov sp, x0
 
   ; Obviously suboptimal code here, but it to get &local in x1
@@ -73,9 +74,15 @@ define void @test_variadic_alloca(i64 %n, ...) {
 ; CHECK: add     x29, sp, #192
 ; CHECK: sub     [[TMP:x[0-9]+]], x29, #192
 ; CHECK: add     x8, [[TMP]], #0
-; CHECK: str     q7, [x8, #112]
+; CHECK-FP: str     q7, [x8, #112]
 ; [...]
-; CHECK: str     q1, [x8, #16]
+; CHECK-FP: str     q1, [x8, #16]
+
+; CHECK-NOFP: sub     sp, sp, #80
+; CHECK-NOFP: stp     x29, x30, [sp, #64]
+; CHECK-NOFP: add     x29, sp, #64
+; CHECK-NOFP: sub     [[TMP:x[0-9]+]], x29, #64
+; CHECK-NOFP: add     x8, [[TMP]], #0
 
   %addr = alloca i8, i64 %n
 
@@ -86,10 +93,14 @@ define void @test_variadic_alloca(i64 %n, ...) {
 ; CHECK: sub sp, x29, #192
 ; CHECK: ldp x29, x30, [sp, #192]
 ; CHECK: add sp, sp, #208
+
+; CHECK-NOFP: sub sp, x29, #64
+; CHECK-NOFP: ldp x29, x30, [sp, #64]
+; CHECK-NOFP: add sp, sp, #80
 }
 
 define void @test_alloca_large_frame(i64 %n) {
-; CHECK: test_alloca_large_frame:
+; CHECK-LABEL: test_alloca_large_frame:
 
 ; CHECK: sub sp, sp, #496
 ; CHECK: stp x29, x30, [sp, #480]
@@ -112,16 +123,16 @@ declare i8* @llvm.stacksave()
 declare void @llvm.stackrestore(i8*)
 
 define void @test_scoped_alloca(i64 %n) {
-; CHECK: test_scoped_alloca
+; CHECK-LABEL: test_scoped_alloca:
 ; CHECK: sub sp, sp, #32
 
   %sp = call i8* @llvm.stacksave()
 ; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
+; CHECK: mov [[OLDSP:x[0-9]+]], sp
 
   %addr = alloca i8, i64 %n
 ; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
-; CHECK: mov [[OLDSP:x[0-9]+]], sp
-; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
+; CHECK-DAG: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
 ; CHECK: mov sp, [[NEWSP]]
 
   call void @use_addr(i8* %addr)
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
index e10bbb0f8691..36bc2e00d238 100644
--- a/test/CodeGen/AArch64/analyze-branch.ll
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -11,7 +11,7 @@ declare void @test_false()
 !1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
 
 define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
-; CHECK: test_Bcc_fallthrough_taken:
+; CHECK-LABEL: test_Bcc_fallthrough_taken:
   %tst = icmp eq i32 %in, 42
   br i1 %tst, label %true, label %false, !prof !0
 
@@ -34,7 +34,7 @@ false:
 }
 
 define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind {
-; CHECK: test_Bcc_fallthrough_nottaken:
+; CHECK-LABEL: test_Bcc_fallthrough_nottaken:
   %tst = icmp eq i32 %in, 42
   br i1 %tst, label %true, label %false, !prof !1
 
@@ -57,7 +57,7 @@ false:
 }
 
 define void @test_CBZ_fallthrough_taken(i32 %in) nounwind {
-; CHECK: test_CBZ_fallthrough_taken:
+; CHECK-LABEL: test_CBZ_fallthrough_taken:
   %tst = icmp eq i32 %in, 0
   br i1 %tst, label %true, label %false, !prof !0
 
@@ -78,7 +78,7 @@ false:
 }
 
 define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind {
-; CHECK: test_CBZ_fallthrough_nottaken:
+; CHECK-LABEL: test_CBZ_fallthrough_nottaken:
   %tst = icmp eq i64 %in, 0
   br i1 %tst, label %true, label %false, !prof !1
 
@@ -99,7 +99,7 @@ false:
 }
 
 define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind {
-; CHECK: test_CBNZ_fallthrough_taken:
+; CHECK-LABEL: test_CBNZ_fallthrough_taken:
   %tst = icmp ne i32 %in, 0
   br i1 %tst, label %true, label %false, !prof !0
 
@@ -120,7 +120,7 @@ false:
 }
 
 define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind {
-; CHECK: test_CBNZ_fallthrough_nottaken:
+; CHECK-LABEL: test_CBNZ_fallthrough_nottaken:
   %tst = icmp ne i64 %in, 0
   br i1 %tst, label %true, label %false, !prof !1
 
@@ -141,7 +141,7 @@ false:
 }
 
 define void @test_TBZ_fallthrough_taken(i32 %in) nounwind {
-; CHECK: test_TBZ_fallthrough_taken:
+; CHECK-LABEL: test_TBZ_fallthrough_taken:
   %bit = and i32 %in, 32768
   %tst = icmp eq i32 %bit, 0
   br i1 %tst, label %true, label %false, !prof !0
@@ -163,7 +163,7 @@ false:
 }
 
 define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind {
-; CHECK: test_TBZ_fallthrough_nottaken:
+; CHECK-LABEL: test_TBZ_fallthrough_nottaken:
   %bit = and i64 %in, 32768
   %tst = icmp eq i64 %bit, 0
   br i1 %tst, label %true, label %false, !prof !1
@@ -186,7 +186,7 @@ false:
 
 
 define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind {
-; CHECK: test_TBNZ_fallthrough_taken:
+; CHECK-LABEL: test_TBNZ_fallthrough_taken:
   %bit = and i32 %in, 32768
   %tst = icmp ne i32 %bit, 0
   br i1 %tst, label %true, label %false, !prof !0
@@ -208,7 +208,7 @@ false:
 }
 
 define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind {
-; CHECK: test_TBNZ_fallthrough_nottaken:
+; CHECK-LABEL: test_TBNZ_fallthrough_nottaken:
   %bit = and i64 %in, 32768
   %tst = icmp ne i64 %bit, 0
   br i1 %tst, label %true, label %false, !prof !1
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
index 9888a742e32b..da095a0a42c5 100644
--- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
 
 define i32 @foo(i32* %var, i1 %cond) {
-; CHECK: foo:
+; CHECK-LABEL: foo:
   br i1 %cond, label %atomic_ver, label %simple_ver
 simple_ver:
   %oldval = load i32* %var
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 5e87f21a217d..de84ff46ec3b 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -6,7 +6,7 @@
 @var64 = global i64 0
 
 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_add_i8:
+; CHECK-LABEL: test_atomic_load_add_i8:
    %old = atomicrmw add i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -26,7 +26,7 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_add_i16:
+; CHECK-LABEL: test_atomic_load_add_i16:
    %old = atomicrmw add i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -46,7 +46,7 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_add_i32:
+; CHECK-LABEL: test_atomic_load_add_i32:
    %old = atomicrmw add i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -66,7 +66,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_add_i64:
+; CHECK-LABEL: test_atomic_load_add_i64:
    %old = atomicrmw add i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -86,7 +86,7 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_sub_i8:
+; CHECK-LABEL: test_atomic_load_sub_i8:
    %old = atomicrmw sub i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -106,7 +106,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_sub_i16:
+; CHECK-LABEL: test_atomic_load_sub_i16:
    %old = atomicrmw sub i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -126,7 +126,7 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_sub_i32:
+; CHECK-LABEL: test_atomic_load_sub_i32:
    %old = atomicrmw sub i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -146,7 +146,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_sub_i64:
+; CHECK-LABEL: test_atomic_load_sub_i64:
    %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -166,7 +166,7 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_and_i8:
+; CHECK-LABEL: test_atomic_load_and_i8:
    %old = atomicrmw and i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -186,7 +186,7 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_and_i16:
+; CHECK-LABEL: test_atomic_load_and_i16:
    %old = atomicrmw and i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -206,7 +206,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_and_i32:
+; CHECK-LABEL: test_atomic_load_and_i32:
    %old = atomicrmw and i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -226,7 +226,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_and_i64:
+; CHECK-LABEL: test_atomic_load_and_i64:
    %old = atomicrmw and i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -246,7 +246,7 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_or_i8:
+; CHECK-LABEL: test_atomic_load_or_i8:
    %old = atomicrmw or i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -266,7 +266,7 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_or_i16:
+; CHECK-LABEL: test_atomic_load_or_i16:
    %old = atomicrmw or i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -286,7 +286,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_or_i32:
+; CHECK-LABEL: test_atomic_load_or_i32:
    %old = atomicrmw or i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -306,7 +306,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_or_i64:
+; CHECK-LABEL: test_atomic_load_or_i64:
    %old = atomicrmw or i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -326,7 +326,7 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_xor_i8:
+; CHECK-LABEL: test_atomic_load_xor_i8:
    %old = atomicrmw xor i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -346,7 +346,7 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_xor_i16:
+; CHECK-LABEL: test_atomic_load_xor_i16:
    %old = atomicrmw xor i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -366,7 +366,7 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_xor_i32:
+; CHECK-LABEL: test_atomic_load_xor_i32:
    %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -386,7 +386,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_xor_i64:
+; CHECK-LABEL: test_atomic_load_xor_i64:
    %old = atomicrmw xor i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -406,7 +406,7 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_xchg_i8:
+; CHECK-LABEL: test_atomic_load_xchg_i8:
    %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -425,7 +425,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_xchg_i16:
+; CHECK-LABEL: test_atomic_load_xchg_i16:
    %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -444,7 +444,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_xchg_i32:
+; CHECK-LABEL: test_atomic_load_xchg_i32:
    %old = atomicrmw xchg i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -463,7 +463,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_xchg_i64:
+; CHECK-LABEL: test_atomic_load_xchg_i64:
    %old = atomicrmw xchg i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -483,7 +483,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 
 
 define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_min_i8:
+; CHECK-LABEL: test_atomic_load_min_i8:
    %old = atomicrmw min i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -504,7 +504,7 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_min_i16:
+; CHECK-LABEL: test_atomic_load_min_i16:
    %old = atomicrmw min i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -525,7 +525,7 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_min_i32:
+; CHECK-LABEL: test_atomic_load_min_i32:
    %old = atomicrmw min i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -546,7 +546,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_min_i64:
+; CHECK-LABEL: test_atomic_load_min_i64:
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -567,7 +567,7 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_max_i8:
+; CHECK-LABEL: test_atomic_load_max_i8:
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -588,7 +588,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_max_i16:
+; CHECK-LABEL: test_atomic_load_max_i16:
    %old = atomicrmw max i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -609,7 +609,7 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_max_i32:
+; CHECK-LABEL: test_atomic_load_max_i32:
    %old = atomicrmw max i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -630,7 +630,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_max_i64:
+; CHECK-LABEL: test_atomic_load_max_i64:
    %old = atomicrmw max i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -651,7 +651,7 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_umin_i8:
+; CHECK-LABEL: test_atomic_load_umin_i8:
    %old = atomicrmw umin i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -672,7 +672,7 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_umin_i16:
+; CHECK-LABEL: test_atomic_load_umin_i16:
    %old = atomicrmw umin i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -693,7 +693,7 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_umin_i32:
+; CHECK-LABEL: test_atomic_load_umin_i32:
    %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -714,7 +714,7 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_umin_i64:
+; CHECK-LABEL: test_atomic_load_umin_i64:
    %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -735,7 +735,7 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
-; CHECK: test_atomic_load_umax_i8:
+; CHECK-LABEL: test_atomic_load_umax_i8:
    %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -756,7 +756,7 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 }
 
 define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
-; CHECK: test_atomic_load_umax_i16:
+; CHECK-LABEL: test_atomic_load_umax_i16:
    %old = atomicrmw umax i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -777,7 +777,7 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 }
 
 define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
-; CHECK: test_atomic_load_umax_i32:
+; CHECK-LABEL: test_atomic_load_umax_i32:
    %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -798,7 +798,7 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 }
 
 define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
-; CHECK: test_atomic_load_umax_i64:
+; CHECK-LABEL: test_atomic_load_umax_i64:
    %old = atomicrmw umax i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -819,7 +819,7 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 }
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
-; CHECK: test_atomic_cmpxchg_i8:
+; CHECK-LABEL: test_atomic_cmpxchg_i8:
    %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -841,7 +841,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 }
 
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
-; CHECK: test_atomic_cmpxchg_i16:
+; CHECK-LABEL: test_atomic_cmpxchg_i16:
    %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
@@ -863,7 +863,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 }
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
-; CHECK: test_atomic_cmpxchg_i32:
+; CHECK-LABEL: test_atomic_cmpxchg_i32:
    %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
@@ -885,7 +885,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 }
 
 define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
-; CHECK: test_atomic_cmpxchg_i64:
+; CHECK-LABEL: test_atomic_cmpxchg_i64:
    %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
@@ -907,7 +907,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 }
 
 define i8 @test_atomic_load_monotonic_i8() nounwind {
-; CHECK: test_atomic_load_monotonic_i8:
+; CHECK-LABEL: test_atomic_load_monotonic_i8:
   %val = load atomic i8* @var8 monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
@@ -918,7 +918,7 @@ define i8 @test_atomic_load_monotonic_i8() nounwind {
 }
 
 define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
-; CHECK: test_atomic_load_monotonic_regoff_i8:
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8:
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i8*
 
@@ -931,7 +931,7 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
 }
 
 define i8 @test_atomic_load_acquire_i8() nounwind {
-; CHECK: test_atomic_load_acquire_i8:
+; CHECK-LABEL: test_atomic_load_acquire_i8:
   %val = load atomic i8* @var8 acquire, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
@@ -944,7 +944,7 @@ define i8 @test_atomic_load_acquire_i8() nounwind {
 }
 
 define i8 @test_atomic_load_seq_cst_i8() nounwind {
-; CHECK: test_atomic_load_seq_cst_i8:
+; CHECK-LABEL: test_atomic_load_seq_cst_i8:
   %val = load atomic i8* @var8 seq_cst, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
@@ -957,7 +957,7 @@ define i8 @test_atomic_load_seq_cst_i8() nounwind {
 }
 
 define i16 @test_atomic_load_monotonic_i16() nounwind {
-; CHECK: test_atomic_load_monotonic_i16:
+; CHECK-LABEL: test_atomic_load_monotonic_i16:
   %val = load atomic i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
@@ -969,7 +969,7 @@ define i16 @test_atomic_load_monotonic_i16() nounwind {
 }
 
 define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
-; CHECK: test_atomic_load_monotonic_regoff_i32:
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32:
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i32*
 
@@ -982,7 +982,7 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
 }
 
 define i64 @test_atomic_load_seq_cst_i64() nounwind {
-; CHECK: test_atomic_load_seq_cst_i64:
+; CHECK-LABEL: test_atomic_load_seq_cst_i64:
   %val = load atomic i64* @var64 seq_cst, align 8
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
@@ -995,7 +995,7 @@ define i64 @test_atomic_load_seq_cst_i64() nounwind {
 }
 
 define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
-; CHECK: test_atomic_store_monotonic_i8:
+; CHECK-LABEL: test_atomic_store_monotonic_i8:
   store atomic i8 %val, i8* @var8 monotonic, align 1
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
 ; CHECK: strb w0, [x[[HIADDR]], #:lo12:var8]
@@ -1004,7 +1004,7 @@ define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
 }
 
 define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
-; CHECK: test_atomic_store_monotonic_regoff_i8:
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8:
 
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i8*
@@ -1015,7 +1015,7 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
   ret void
 }
 define void @test_atomic_store_release_i8(i8 %val) nounwind {
-; CHECK: test_atomic_store_release_i8:
+; CHECK-LABEL: test_atomic_store_release_i8:
   store atomic i8 %val, i8* @var8 release, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
@@ -1028,7 +1028,7 @@ define void @test_atomic_store_release_i8(i8 %val) nounwind {
 }
 
 define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
-; CHECK: test_atomic_store_seq_cst_i8:
+; CHECK-LABEL: test_atomic_store_seq_cst_i8:
   store atomic i8 %val, i8* @var8 seq_cst, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
@@ -1042,7 +1042,7 @@ define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 }
 
 define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
-; CHECK: test_atomic_store_monotonic_i16:
+; CHECK-LABEL: test_atomic_store_monotonic_i16:
   store atomic i16 %val, i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
@@ -1053,7 +1053,7 @@ define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
 }
 
 define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
-; CHECK: test_atomic_store_monotonic_regoff_i32:
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32:
 
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i32*
@@ -1067,7 +1067,7 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va
 }
 
 define void @test_atomic_store_release_i64(i64 %val) nounwind {
-; CHECK: test_atomic_store_release_i64:
+; CHECK-LABEL: test_atomic_store_release_i64:
   store atomic i64 %val, i64* @var64 release, align 8
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
index da94041c95ff..682b7ba69d95 100644
--- a/test/CodeGen/AArch64/basic-pic.ll
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -1,70 +1,54 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
 
 @var = global i32 0
 
-; CHECK-ELF: RELOCATION RECORDS FOR [.text]
-
 define i32 @get_globalvar() {
-; CHECK: get_globalvar:
+; CHECK-LABEL: get_globalvar:
 
   %val = load i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
 ; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
 ; CHECK: ldr w0, [x[[GOTLOC]]]
 
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
   ret i32 %val
 }
 
 define i32* @get_globalvaraddr() {
-; CHECK: get_globalvaraddr:
+; CHECK-LABEL: get_globalvaraddr:
 
   %val = load i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
 ; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
 
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
   ret i32* @var
 }
 
 @hiddenvar = hidden global i32 0
 
 define i32 @get_hiddenvar() {
-; CHECK: get_hiddenvar:
+; CHECK-LABEL: get_hiddenvar:
 
   %val = load i32* @hiddenvar
 ; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
 ; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
 
-; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
-; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
   ret i32 %val
 }
 
 define i32* @get_hiddenvaraddr() {
-; CHECK: get_hiddenvaraddr:
+; CHECK-LABEL: get_hiddenvaraddr:
 
   %val = load i32* @hiddenvar
 ; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
 ; CHECK: add x0, [[HI]], #:lo12:hiddenvar
 
-; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
-; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
   ret i32* @hiddenvar
 }
 
 define void()* @get_func() {
-; CHECK: get_func:
+; CHECK-LABEL: get_func:
 
   ret void()* bitcast(void()*()* @get_func to void()*)
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
 ; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
-
-  ; Particularly important that the ADRP gets a relocation, LLVM tends to think
-  ; it can relax it because it knows where get_func is. It can't!
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
index d1191f6aaa8a..37a18b7fb613 100644
--- a/test/CodeGen/AArch64/bitfield-insert-0.ll
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -16,4 +16,4 @@ define void @test_bfi0(i32* %existing, i32* %new) {
   store volatile i32 %combined, i32* %existing
 
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
index 3e871b9a6d27..1f046087abc0 100644
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -6,7 +6,7 @@
 %struct.foo = type { i8, [2 x i8], i8 }
 
 define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
-; CHECK: from_clang:
+; CHECK-LABEL: from_clang:
 ; CHECK: bfi w0, w1, #3, #4
 ; CHECK-NEXT: ret
 
@@ -25,7 +25,7 @@ entry:
 }
 
 define void @test_whole32(i32* %existing, i32* %new) {
-; CHECK: test_whole32:
+; CHECK-LABEL: test_whole32:
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
 
   %oldval = load volatile i32* %existing
@@ -42,7 +42,7 @@ define void @test_whole32(i32* %existing, i32* %new) {
 }
 
 define void @test_whole64(i64* %existing, i64* %new) {
-; CHECK: test_whole64:
+; CHECK-LABEL: test_whole64:
 ; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14
 ; CHECK-NOT: and
 ; CHECK: ret
@@ -61,7 +61,7 @@ define void @test_whole64(i64* %existing, i64* %new) {
 }
 
 define void @test_whole32_from64(i64* %existing, i64* %new) {
-; CHECK: test_whole32_from64:
+; CHECK-LABEL: test_whole32_from64:
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16
 ; CHECK-NOT: and
 ; CHECK: ret
@@ -79,7 +79,7 @@ define void @test_whole32_from64(i64* %existing, i64* %new) {
 }
 
 define void @test_32bit_masked(i32 *%existing, i32 *%new) {
-; CHECK: test_32bit_masked:
+; CHECK-LABEL: test_32bit_masked:
 ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
 ; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff
 
@@ -97,7 +97,7 @@ define void @test_32bit_masked(i32 *%existing, i32 *%new) {
 }
 
 define void @test_64bit_masked(i64 *%existing, i64 *%new) {
-; CHECK: test_64bit_masked:
+; CHECK-LABEL: test_64bit_masked:
 ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
 ; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000
 
@@ -116,7 +116,7 @@ define void @test_64bit_masked(i64 *%existing, i64 *%new) {
 
 ; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
 define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
-; CHECK: test_32bit_complexmask:
+; CHECK-LABEL: test_32bit_complexmask:
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
 ; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
@@ -135,7 +135,7 @@ define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
 
 ; Neither mask is is a contiguous set of 1s. BFI can't be used
 define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
-; CHECK: test_32bit_badmask:
+; CHECK-LABEL: test_32bit_badmask:
 ; CHECK-NOT: bfi
 ; CHECK: ret
 
@@ -154,7 +154,7 @@ define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
 
 ; Ditto
 define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
-; CHECK: test_64bit_badmask:
+; CHECK-LABEL: test_64bit_badmask:
 ; CHECK-NOT: bfi
 ; CHECK: ret
 
@@ -174,7 +174,7 @@ define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
 ; Bitfield insert where there's a left-over shr needed at the beginning
 ; (e.g. result of str.bf1 = str.bf2)
 define void @test_32bit_with_shr(i32* %existing, i32* %new) {
-; CHECK: test_32bit_with_shr:
+; CHECK-LABEL: test_32bit_with_shr:
 
   %oldval = load volatile i32* %existing
   %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
index 36d337ef05ef..1c84f5d57854 100644
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -5,7 +5,7 @@
 @var64 = global i64 0
 
 define void @test_extendb(i8 %var) {
-; CHECK: test_extendb:
+; CHECK-LABEL: test_extendb:
 
   %sxt32 = sext i8 %var to i32
   store volatile i32 %sxt32, i32* @var32
@@ -29,7 +29,7 @@ define void @test_extendb(i8 %var) {
 }
 
 define void @test_extendh(i16 %var) {
-; CHECK: test_extendh:
+; CHECK-LABEL: test_extendh:
 
   %sxt32 = sext i16 %var to i32
   store volatile i32 %sxt32, i32* @var32
@@ -53,7 +53,7 @@ define void @test_extendh(i16 %var) {
 }
 
 define void @test_extendw(i32 %var) {
-; CHECK: test_extendw:
+; CHECK-LABEL: test_extendw:
 
   %sxt64 = sext i32 %var to i64
   store volatile i64 %sxt64, i64* @var64
@@ -66,7 +66,7 @@ define void @test_extendw(i32 %var) {
 }
 
 define void @test_shifts(i32 %val32, i64 %val64) {
-; CHECK: test_shifts:
+; CHECK-LABEL: test_shifts:
 
   %shift1 = ashr i32 %val32, 31
   store volatile i32 %shift1, i32* @var32
@@ -114,7 +114,7 @@ define void @test_shifts(i32 %val32, i64 %val64) {
 ; LLVM can produce in-register extensions taking place entirely with
 ; 64-bit registers too.
 define void @test_sext_inreg_64(i64 %in) {
-; CHECK: test_sext_inreg_64:
+; CHECK-LABEL: test_sext_inreg_64:
 
 ; i1 doesn't have an official alias, but crops up and is handled by
 ; the bitfield ops.
@@ -143,7 +143,7 @@ define void @test_sext_inreg_64(i64 %in) {
 ; These instructions don't actually select to official bitfield
 ; operations, but it's important that we select them somehow:
 define void @test_zext_inreg_64(i64 %in) {
-; CHECK: test_zext_inreg_64:
+; CHECK-LABEL: test_zext_inreg_64:
 
   %trunc_i8 = trunc i64 %in to i8
   %zext_i8 = zext i8 %trunc_i8 to i64
@@ -164,7 +164,7 @@ define void @test_zext_inreg_64(i64 %in) {
 }
 
 define i64 @test_sext_inreg_from_32(i32 %in) {
-; CHECK: test_sext_inreg_from_32:
+; CHECK-LABEL: test_sext_inreg_from_32:
 
   %small = trunc i32 %in to i1
   %ext = sext i1 %small to i64
@@ -178,7 +178,7 @@ define i64 @test_sext_inreg_from_32(i32 %in) {
 
 
 define i32 @test_ubfx32(i32* %addr) {
-; CHECK: test_ubfx32:
+; CHECK-LABEL: test_ubfx32:
 ; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
 
    %fields = load i32* %addr
@@ -188,7 +188,7 @@ define i32 @test_ubfx32(i32* %addr) {
 }
 
 define i64 @test_ubfx64(i64* %addr) {
-; CHECK: test_ubfx64:
+; CHECK-LABEL: test_ubfx64:
 ; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
 
    %fields = load i64* %addr
@@ -198,7 +198,7 @@ define i64 @test_ubfx64(i64* %addr) {
 }
 
 define i32 @test_sbfx32(i32* %addr) {
-; CHECK: test_sbfx32:
+; CHECK-LABEL: test_sbfx32:
 ; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
 
    %fields = load i32* %addr
@@ -208,7 +208,7 @@ define i32 @test_sbfx32(i32* %addr) {
 }
 
 define i64 @test_sbfx64(i64* %addr) {
-; CHECK: test_sbfx64:
+; CHECK-LABEL: test_sbfx64:
 ; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
 
    %fields = load i64* %addr
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index 5e85057a3c3b..8cda431b8e92 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -4,7 +4,7 @@
 @addr = global i8* null
 
 define void @test_blockaddress() {
-; CHECK: test_blockaddress:
+; CHECK-LABEL: test_blockaddress:
   store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
   %val = load volatile i8** @addr
   indirectbr i8* %val, [label %block]
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
index 38ed4734e1b4..1ed5b9b755dd 100644
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@@ -3,7 +3,7 @@
 @stored_label = global i8* null
 
 define void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
   %lab = load i8** @stored_label
   indirectbr i8* %lab, [label  %otherlab, label %retlab]
 ; CHECK: adrp {{x[0-9]+}}, stored_label
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
index c66aa5bfc510..52243b05b4b9 100644
--- a/test/CodeGen/AArch64/callee-save.ll
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -3,7 +3,7 @@
 @var = global float 0.0
 
 define void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 
 ; CHECK: stp d14, d15, [sp
 ; CHECK: stp d12, d13, [sp
diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll
index a365568e11ee..b387f285d1d4 100644
--- a/test/CodeGen/AArch64/code-model-large-abs.ll
+++ b/test/CodeGen/AArch64/code-model-large-abs.ll
@@ -6,7 +6,7 @@
 @var64 = global i64 0
 
 define i8* @global_addr() {
-; CHECK: global_addr:
+; CHECK-LABEL: global_addr:
   ret i8* @var8
   ; The movz/movk calculation should end up returned directly in x0.
 ; CHECK: movz x0, #:abs_g3:var8
@@ -17,7 +17,7 @@ define i8* @global_addr() {
 }
 
 define i8 @global_i8() {
-; CHECK: global_i8:
+; CHECK-LABEL: global_i8:
   %val = load i8* @var8
   ret i8 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
@@ -28,7 +28,7 @@ define i8 @global_i8() {
 }
 
 define i16 @global_i16() {
-; CHECK: global_i16:
+; CHECK-LABEL: global_i16:
   %val = load i16* @var16
   ret i16 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
@@ -39,7 +39,7 @@ define i16 @global_i16() {
 }
 
 define i32 @global_i32() {
-; CHECK: global_i32:
+; CHECK-LABEL: global_i32:
   %val = load i32* @var32
   ret i32 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
@@ -50,7 +50,7 @@ define i32 @global_i32() {
 }
 
 define i64 @global_i64() {
-; CHECK: global_i64:
+; CHECK-LABEL: global_i64:
   %val = load i64* @var64
   ret i64 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
index 4213110497d3..75efd9d4a0d6 100644
--- a/test/CodeGen/AArch64/compare-branch.ll
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -4,7 +4,7 @@
 @var64 = global i64 0
 
 define void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 
   %val1 = load volatile i32* @var32
   %tst1 = icmp eq i32 %val1, 0
@@ -35,4 +35,4 @@ test5:
 
 end:
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/complex-copy-noneon.ll b/test/CodeGen/AArch64/complex-copy-noneon.ll
new file mode 100644
index 000000000000..4ae547856ecd
--- /dev/null
+++ b/test/CodeGen/AArch64/complex-copy-noneon.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s
+
+; The DAG combiner decided to use a vector load/store for this struct copy
+; previously. This probably shouldn't happen without NEON, but the most
+; important thing is that it compiles.
+
+define void @store_combine() nounwind {
+  %src = alloca { double, double }, align 8
+  %dst = alloca { double, double }, align 8
+
+  %src.realp = getelementptr inbounds { double, double }* %src, i32 0, i32 0
+  %src.real = load double* %src.realp
+  %src.imagp = getelementptr inbounds { double, double }* %src, i32 0, i32 1
+  %src.imag = load double* %src.imagp
+
+  %dst.realp = getelementptr inbounds { double, double }* %dst, i32 0, i32 0
+  %dst.imagp = getelementptr inbounds { double, double }* %dst, i32 0, i32 1
+  store double %src.real, double* %dst.realp
+  store double %src.imag, double* %dst.imagp
+  ret void
+}
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
index 3051cf53fdf8..9c1dfeb3c8d3 100644
--- a/test/CodeGen/AArch64/cond-sel.ll
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -1,24 +1,25 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var32 = global i32 0
 @var64 = global i64 0
 
 define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
-; CHECK: test_csel:
+; CHECK-LABEL: test_csel:
 
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, i32 42, i32 52
   store i32 %val1, i32* @var32
-; CHECK: movz [[W52:w[0-9]+]], #52
-; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK-DAG: movz [[W52:w[0-9]+]], #52
+; CHECK-DAG: movz [[W42:w[0-9]+]], #42
 ; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
 
   %rhs64 = sext i32 %rhs32 to i64
   %tst2 = icmp sle i64 %lhs64, %rhs64
   %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64
   store i64 %val2, i64* @var64
-; CHECK: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
-; CHECK: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
+; CHECK-DAG: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
+; CHECK-DAG: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
 ; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le
 
   ret void
@@ -26,10 +27,11 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
 }
 
 define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %rhs64) {
-; CHECK: test_floatcsel:
+; CHECK-LABEL: test_floatcsel:
 
   %tst1 = fcmp one float %lhs32, %rhs32
 ; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFP-NOT: fcmp
   %val1 = select i1 %tst1, i32 42, i32 52
   store i32 %val1, i32* @var32
 ; CHECK: movz [[W52:w[0-9]+]], #52
@@ -40,6 +42,7 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
 
   %tst2 = fcmp ueq double %lhs64, %rhs64
 ; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFP-NOT: fcmp
   %val2 = select i1 %tst2, i64 9, i64 15
   store i64 %val2, i64* @var64
 ; CHECK: movz [[CONST15:x[0-9]+]], #15
@@ -53,7 +56,7 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
 
 
 define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
-; CHECK: test_csinc:
+; CHECK-LABEL: test_csinc:
 
 ; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
   %tst1 = icmp ugt i32 %lhs32, %rhs32
@@ -93,7 +96,7 @@ define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
 }
 
 define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
-; CHECK: test_csinv:
+; CHECK-LABEL: test_csinv:
 
 ; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
   %tst1 = icmp ugt i32 %lhs32, %rhs32
@@ -133,7 +136,7 @@ define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
 }
 
 define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
-; CHECK: test_csneg:
+; CHECK-LABEL: test_csneg:
 
 ; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
   %tst1 = icmp ugt i32 %lhs32, %rhs32
@@ -173,7 +176,7 @@ define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
 }
 
 define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) {
-; CHECK: test_cset:
+; CHECK-LABEL: test_cset:
 
 ; N.b. code is not optimal here (32-bit csinc would be better) but
 ; incoming DAG is too complex
@@ -194,7 +197,7 @@ define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) {
 }
 
 define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) {
-; CHECK: test_csetm:
+; CHECK-LABEL: test_csetm:
 
   %tst1 = icmp eq i32 %lhs, %rhs
   %val1 = sext i1 %tst1 to i32
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index f5d57593bfad..12c7b6aed643 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
-; CHECK: test_select_i32:
+; CHECK-LABEL: test_select_i32:
   %val = select i1 %bit, i32 %a, i32 %b
 ; CHECK: movz [[ONE:w[0-9]+]], #1
 ; CHECK: tst w0, [[ONE]]
@@ -11,7 +12,7 @@ define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 }
 
 define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
-; CHECK: test_select_i64:
+; CHECK-LABEL: test_select_i64:
   %val = select i1 %bit, i64 %a, i64 %b
 ; CHECK: movz [[ONE:w[0-9]+]], #1
 ; CHECK: tst w0, [[ONE]]
@@ -21,27 +22,28 @@ define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
 }
 
 define float @test_select_float(i1 %bit, float %a, float %b) {
-; CHECK: test_select_float:
+; CHECK-LABEL: test_select_float:
   %val = select i1 %bit, float %a, float %b
 ; CHECK: movz [[ONE:w[0-9]+]], #1
 ; CHECK: tst w0, [[ONE]]
 ; CHECK-NEXT: fcsel s0, s0, s1, ne
-
+; CHECK-NOFP-NOT: fcsel
   ret float %val
 }
 
 define double @test_select_double(i1 %bit, double %a, double %b) {
-; CHECK: test_select_double:
+; CHECK-LABEL: test_select_double:
   %val = select i1 %bit, double %a, double %b
 ; CHECK: movz [[ONE:w[0-9]+]], #1
 ; CHECK: tst w0, [[ONE]]
 ; CHECK-NEXT: fcsel d0, d0, d1, ne
+; CHECK-NOFP-NOT: fcsel
 
   ret double %val
 }
 
 define i32 @test_brcond(i1 %bit) {
-; CHECK: test_brcond:
+; CHECK-LABEL: test_brcond:
   br i1 %bit, label %true, label %false
 ; CHECK: tbz {{w[0-9]+}}, #0, .LBB
 
@@ -56,6 +58,7 @@ define i1 @test_setcc_float(float %lhs, float %rhs) {
   %val = fcmp oeq float %lhs, %rhs
 ; CHECK: fcmp s0, s1
 ; CHECK: csinc w0, wzr, wzr, ne
+; CHECK-NOFP-NOT: fcmp
   ret i1 %val
 }
 
@@ -64,6 +67,7 @@ define i1 @test_setcc_double(double %lhs, double %rhs) {
   %val = fcmp oeq double %lhs, %rhs
 ; CHECK: fcmp d0, d1
 ; CHECK: csinc w0, wzr, wzr, ne
+; CHECK-NOFP-NOT: fcmp
   ret i1 %val
 }
 
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
index c40d3933b44b..81d9e15532fa 100644
--- a/test/CodeGen/AArch64/dp-3source.ll
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
-; CHECK: test_madd32:
+; CHECK-LABEL: test_madd32:
   %mid = mul i32 %val1, %val2
   %res = add i32 %val0, %mid
 ; CHECK: madd {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -9,7 +9,7 @@ define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
 }
 
 define i64 @test_madd64(i64 %val0, i64 %val1, i64 %val2) {
-; CHECK: test_madd64:
+; CHECK-LABEL: test_madd64:
   %mid = mul i64 %val1, %val2
   %res = add i64 %val0, %mid
 ; CHECK: madd {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
@@ -17,7 +17,7 @@ define i64 @test_madd64(i64 %val0, i64 %val1, i64 %val2) {
 }
 
 define i32 @test_msub32(i32 %val0, i32 %val1, i32 %val2) {
-; CHECK: test_msub32:
+; CHECK-LABEL: test_msub32:
   %mid = mul i32 %val1, %val2
   %res = sub i32 %val0, %mid
 ; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -25,7 +25,7 @@ define i32 @test_msub32(i32 %val0, i32 %val1, i32 %val2) {
 }
 
 define i64 @test_msub64(i64 %val0, i64 %val1, i64 %val2) {
-; CHECK: test_msub64:
+; CHECK-LABEL: test_msub64:
   %mid = mul i64 %val1, %val2
   %res = sub i64 %val0, %mid
 ; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
@@ -33,7 +33,7 @@ define i64 @test_msub64(i64 %val0, i64 %val1, i64 %val2) {
 }
 
 define i64 @test_smaddl(i64 %acc, i32 %val1, i32 %val2) {
-; CHECK: test_smaddl:
+; CHECK-LABEL: test_smaddl:
   %ext1 = sext i32 %val1 to i64
   %ext2 = sext i32 %val2 to i64
   %prod = mul i64 %ext1, %ext2
@@ -43,7 +43,7 @@ define i64 @test_smaddl(i64 %acc, i32 %val1, i32 %val2) {
 }
 
 define i64 @test_smsubl(i64 %acc, i32 %val1, i32 %val2) {
-; CHECK: test_smsubl:
+; CHECK-LABEL: test_smsubl:
   %ext1 = sext i32 %val1 to i64
   %ext2 = sext i32 %val2 to i64
   %prod = mul i64 %ext1, %ext2
@@ -53,7 +53,7 @@ define i64 @test_smsubl(i64 %acc, i32 %val1, i32 %val2) {
 }
 
 define i64 @test_umaddl(i64 %acc, i32 %val1, i32 %val2) {
-; CHECK: test_umaddl:
+; CHECK-LABEL: test_umaddl:
   %ext1 = zext i32 %val1 to i64
   %ext2 = zext i32 %val2 to i64
   %prod = mul i64 %ext1, %ext2
@@ -63,7 +63,7 @@ define i64 @test_umaddl(i64 %acc, i32 %val1, i32 %val2) {
 }
 
 define i64 @test_umsubl(i64 %acc, i32 %val1, i32 %val2) {
-; CHECK: test_umsubl:
+; CHECK-LABEL: test_umsubl:
   %ext1 = zext i32 %val1 to i64
   %ext2 = zext i32 %val2 to i64
   %prod = mul i64 %ext1, %ext2
@@ -73,7 +73,7 @@ define i64 @test_umsubl(i64 %acc, i32 %val1, i32 %val2) {
 }
 
 define i64 @test_smulh(i64 %lhs, i64 %rhs) {
-; CHECK: test_smulh:
+; CHECK-LABEL: test_smulh:
   %ext1 = sext i64 %lhs to i128
   %ext2 = sext i64 %rhs to i128
   %res = mul i128 %ext1, %ext2
@@ -84,7 +84,7 @@ define i64 @test_smulh(i64 %lhs, i64 %rhs) {
 }
 
 define i64 @test_umulh(i64 %lhs, i64 %rhs) {
-; CHECK: test_umulh:
+; CHECK-LABEL: test_umulh:
   %ext1 = zext i64 %lhs to i128
   %ext2 = zext i64 %rhs to i128
   %res = mul i128 %ext1, %ext2
@@ -95,21 +95,21 @@ define i64 @test_umulh(i64 %lhs, i64 %rhs) {
 }
 
 define i32 @test_mul32(i32 %lhs, i32 %rhs) {
-; CHECK: test_mul32:
+; CHECK-LABEL: test_mul32:
   %res = mul i32 %lhs, %rhs
 ; CHECK: mul {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
   ret i32 %res
 }
 
 define i64 @test_mul64(i64 %lhs, i64 %rhs) {
-; CHECK: test_mul64:
+; CHECK-LABEL: test_mul64:
   %res = mul i64 %lhs, %rhs
 ; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
   ret i64 %res
 }
 
 define i32 @test_mneg32(i32 %lhs, i32 %rhs) {
-; CHECK: test_mneg32:
+; CHECK-LABEL: test_mneg32:
   %prod = mul i32 %lhs, %rhs
   %res = sub i32 0, %prod
 ; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -117,7 +117,7 @@ define i32 @test_mneg32(i32 %lhs, i32 %rhs) {
 }
 
 define i64 @test_mneg64(i64 %lhs, i64 %rhs) {
-; CHECK: test_mneg64:
+; CHECK-LABEL: test_mneg64:
   %prod = mul i64 %lhs, %rhs
   %res = sub i64 0, %prod
 ; CHECK: mneg {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
@@ -125,7 +125,7 @@ define i64 @test_mneg64(i64 %lhs, i64 %rhs) {
 }
 
 define i64 @test_smull(i32 %lhs, i32 %rhs) {
-; CHECK: test_smull:
+; CHECK-LABEL: test_smull:
   %ext1 = sext i32 %lhs to i64
   %ext2 = sext i32 %rhs to i64
   %res = mul i64 %ext1, %ext2
@@ -134,7 +134,7 @@ define i64 @test_smull(i32 %lhs, i32 %rhs) {
 }
 
 define i64 @test_umull(i32 %lhs, i32 %rhs) {
-; CHECK: test_umull:
+; CHECK-LABEL: test_umull:
   %ext1 = zext i32 %lhs to i64
   %ext2 = zext i32 %rhs to i64
   %res = mul i64 %ext1, %ext2
@@ -143,7 +143,7 @@ define i64 @test_umull(i32 %lhs, i32 %rhs) {
 }
 
 define i64 @test_smnegl(i32 %lhs, i32 %rhs) {
-; CHECK: test_smnegl:
+; CHECK-LABEL: test_smnegl:
   %ext1 = sext i32 %lhs to i64
   %ext2 = sext i32 %rhs to i64
   %prod = mul i64 %ext1, %ext2
@@ -153,7 +153,7 @@ define i64 @test_smnegl(i32 %lhs, i32 %rhs) {
 }
 
 define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
-; CHECK: test_umnegl:
+; CHECK-LABEL: test_umnegl:
   %ext1 = zext i32 %lhs to i64
   %ext2 = zext i32 %rhs to i64
   %prod = mul i64 %ext1, %ext2
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
index 83aa8b4f6631..6a8d55cdc7ea 100644
--- a/test/CodeGen/AArch64/dp1.ll
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -4,7 +4,7 @@
 @var64 = global i64 0
 
 define void @rev_i32() {
-; CHECK: rev_i32:
+; CHECK-LABEL: rev_i32:
     %val0_tmp = load i32* @var32
     %val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
 ; CHECK: rev	{{w[0-9]+}}, {{w[0-9]+}}
@@ -13,7 +13,7 @@ define void @rev_i32() {
 }
 
 define void @rev_i64() {
-; CHECK: rev_i64:
+; CHECK-LABEL: rev_i64:
     %val0_tmp = load i64* @var64
     %val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
 ; CHECK: rev	{{x[0-9]+}}, {{x[0-9]+}}
@@ -22,7 +22,7 @@ define void @rev_i64() {
 }
 
 define void @rev32_i64() {
-; CHECK: rev32_i64:
+; CHECK-LABEL: rev32_i64:
     %val0_tmp = load i64* @var64
     %val1_tmp = shl i64 %val0_tmp, 32
     %val5_tmp = sub i64 64, 32
@@ -35,7 +35,7 @@ define void @rev32_i64() {
 }
 
 define void @rev16_i32() {
-; CHECK: rev16_i32:
+; CHECK-LABEL: rev16_i32:
     %val0_tmp = load i32* @var32
     %val1_tmp = shl i32 %val0_tmp, 16
     %val2_tmp = lshr i32 %val0_tmp, 16
@@ -47,7 +47,7 @@ define void @rev16_i32() {
 }
 
 define void @clz_zerodef_i32() {
-; CHECK: clz_zerodef_i32:
+; CHECK-LABEL: clz_zerodef_i32:
     %val0_tmp = load i32* @var32
     %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
 ; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
@@ -56,7 +56,7 @@ define void @clz_zerodef_i32() {
 }
 
 define void @clz_zerodef_i64() {
-; CHECK: clz_zerodef_i64:
+; CHECK-LABEL: clz_zerodef_i64:
     %val0_tmp = load i64* @var64
     %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
 ; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
@@ -65,7 +65,7 @@ define void @clz_zerodef_i64() {
 }
 
 define void @clz_zeroundef_i32() {
-; CHECK: clz_zeroundef_i32:
+; CHECK-LABEL: clz_zeroundef_i32:
     %val0_tmp = load i32* @var32
     %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
 ; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
@@ -74,7 +74,7 @@ define void @clz_zeroundef_i32() {
 }
 
 define void @clz_zeroundef_i64() {
-; CHECK: clz_zeroundef_i64:
+; CHECK-LABEL: clz_zeroundef_i64:
     %val0_tmp = load i64* @var64
     %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
 ; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
@@ -83,7 +83,7 @@ define void @clz_zeroundef_i64() {
 }
 
 define void @cttz_zerodef_i32() {
-; CHECK: cttz_zerodef_i32:
+; CHECK-LABEL: cttz_zerodef_i32:
     %val0_tmp = load i32* @var32
     %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
 ; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
@@ -93,7 +93,7 @@ define void @cttz_zerodef_i32() {
 }
 
 define void @cttz_zerodef_i64() {
-; CHECK: cttz_zerodef_i64:
+; CHECK-LABEL: cttz_zerodef_i64:
     %val0_tmp = load i64* @var64
     %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
 ; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
@@ -103,7 +103,7 @@ define void @cttz_zerodef_i64() {
 }
 
 define void @cttz_zeroundef_i32() {
-; CHECK: cttz_zeroundef_i32:
+; CHECK-LABEL: cttz_zeroundef_i32:
     %val0_tmp = load i32* @var32
     %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
 ; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
@@ -113,7 +113,7 @@ define void @cttz_zeroundef_i32() {
 }
 
 define void @cttz_zeroundef_i64() {
-; CHECK: cttz_zeroundef_i64:
+; CHECK-LABEL: cttz_zeroundef_i64:
     %val0_tmp = load i64* @var64
     %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
 ; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
@@ -125,7 +125,7 @@ define void @cttz_zeroundef_i64() {
 ; These two are just compilation tests really: the operation's set to Expand in
 ; ISelLowering.
 define void @ctpop_i32() {
-; CHECK: ctpop_i32:
+; CHECK-LABEL: ctpop_i32:
     %val0_tmp = load i32* @var32
     %val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
     store volatile i32 %val4_tmp, i32* @var32
@@ -133,7 +133,7 @@ define void @ctpop_i32() {
 }
 
 define void @ctpop_i64() {
-; CHECK: ctpop_i64:
+; CHECK-LABEL: ctpop_i64:
     %val0_tmp = load i64* @var64
     %val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp)
     store volatile i64 %val4_tmp, i64* @var64
diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll
index 4c740f6b8623..48b0701ad1fa 100644
--- a/test/CodeGen/AArch64/dp2.ll
+++ b/test/CodeGen/AArch64/dp2.ll
@@ -6,7 +6,7 @@
 @var64_1 = global i64 0
 
 define void @rorv_i64() {
-; CHECK: rorv_i64:
+; CHECK-LABEL: rorv_i64:
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val2_tmp = sub i64 64, %val1_tmp
@@ -19,7 +19,7 @@ define void @rorv_i64() {
 }
 
 define void @asrv_i64() {
-; CHECK: asrv_i64:
+; CHECK-LABEL: asrv_i64:
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = ashr i64 %val0_tmp, %val1_tmp
@@ -29,7 +29,7 @@ define void @asrv_i64() {
 }
 
 define void @lsrv_i64() {
-; CHECK: lsrv_i64:
+; CHECK-LABEL: lsrv_i64:
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
@@ -39,7 +39,7 @@ define void @lsrv_i64() {
 }
 
 define void @lslv_i64() {
-; CHECK: lslv_i64:
+; CHECK-LABEL: lslv_i64:
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = shl i64 %val0_tmp, %val1_tmp
@@ -49,7 +49,7 @@ define void @lslv_i64() {
 }
 
 define void @udiv_i64() {
-; CHECK: udiv_i64:
+; CHECK-LABEL: udiv_i64:
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = udiv i64 %val0_tmp, %val1_tmp
@@ -59,7 +59,7 @@ define void @udiv_i64() {
 }
 
 define void @sdiv_i64() {
-; CHECK: sdiv_i64:
+; CHECK-LABEL: sdiv_i64:
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = sdiv i64 %val0_tmp, %val1_tmp
@@ -70,7 +70,7 @@ define void @sdiv_i64() {
 
 
 define void @lsrv_i32() {
-; CHECK: lsrv_i32:
+; CHECK-LABEL: lsrv_i32:
     %val0_tmp = load i32* @var32_0
     %val1_tmp = load i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
@@ -81,7 +81,7 @@ define void @lsrv_i32() {
 }
 
 define void @lslv_i32() {
-; CHECK: lslv_i32:
+; CHECK-LABEL: lslv_i32:
     %val0_tmp = load i32* @var32_0
     %val1_tmp = load i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
@@ -92,7 +92,7 @@ define void @lslv_i32() {
 }
 
 define void @rorv_i32() {
-; CHECK: rorv_i32:
+; CHECK-LABEL: rorv_i32:
     %val0_tmp = load i32* @var32_0
     %val6_tmp = load i32* @var32_1
     %val1_tmp = add i32 1, %val6_tmp
@@ -106,7 +106,7 @@ define void @rorv_i32() {
 }
 
 define void @asrv_i32() {
-; CHECK: asrv_i32:
+; CHECK-LABEL: asrv_i32:
     %val0_tmp = load i32* @var32_0
     %val1_tmp = load i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
@@ -117,7 +117,7 @@ define void @asrv_i32() {
 }
 
 define void @sdiv_i32() {
-; CHECK: sdiv_i32:
+; CHECK-LABEL: sdiv_i32:
     %val0_tmp = load i32* @var32_0
     %val1_tmp = load i32* @var32_1
     %val4_tmp = sdiv i32 %val0_tmp, %val1_tmp
@@ -127,7 +127,7 @@ define void @sdiv_i32() {
 }
 
 define void @udiv_i32() {
-; CHECK: udiv_i32:
+; CHECK-LABEL: udiv_i32:
     %val0_tmp = load i32* @var32_0
     %val1_tmp = load i32* @var32_1
     %val4_tmp = udiv i32 %val0_tmp, %val1_tmp
@@ -139,7 +139,7 @@ define void @udiv_i32() {
 ; The point of this test is that we may not actually see (shl GPR32:$Val, (zext GPR32:$Val2))
 ; in the DAG (the RHS may be natively 64-bit), but we should still use the lsl instructions.
 define i32 @test_lsl32() {
-; CHECK: test_lsl32:
+; CHECK-LABEL: test_lsl32:
 
   %val = load i32* @var32_0
   %ret = shl i32 1, %val
@@ -149,7 +149,7 @@ define i32 @test_lsl32() {
 }
 
 define i32 @test_lsr32() {
-; CHECK: test_lsr32:
+; CHECK-LABEL: test_lsr32:
 
   %val = load i32* @var32_0
   %ret = lshr i32 1, %val
@@ -159,7 +159,7 @@ define i32 @test_lsr32() {
 }
 
 define i32 @test_asr32(i32 %in) {
-; CHECK: test_asr32:
+; CHECK-LABEL: test_asr32:
 
   %val = load i32* @var32_0
   %ret = ashr i32 %in, %val
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
deleted file mode 100644
index 8bf1b2ff4fa9..000000000000
--- a/test/CodeGen/AArch64/elf-extern.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
-
-; External symbols are a different concept to global variables but should still
-; get relocations and so on when used.
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
-
-define i32 @check_extern() {
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
-  ret i32 0
-}
-
-; CHECK: Relocations [
-; CHECK:   Section (1) .text {
-; CHECK:     0x{{[0-9,A-F]+}} R_AARCH64_CALL26 memcpy
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
index bc0acc253388..322b3f4522d6 100644
--- a/test/CodeGen/AArch64/extern-weak.ll
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -51,4 +51,4 @@ define i32* @wibble() {
 ; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g0_nc:defined_weak_var
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll
index 06267816a4e1..62d9ed2fc9d9 100644
--- a/test/CodeGen/AArch64/extract.ll
+++ b/test/CodeGen/AArch64/extract.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 define i64 @ror_i64(i64 %in) {
-; CHECK: ror_i64:
+; CHECK-LABEL: ror_i64:
     %left = shl i64 %in, 19
     %right = lshr i64 %in, 45
     %val5 = or i64 %left, %right
@@ -10,7 +10,7 @@ define i64 @ror_i64(i64 %in) {
 }
 
 define i32 @ror_i32(i32 %in) {
-; CHECK: ror_i32:
+; CHECK-LABEL: ror_i32:
     %left = shl i32 %in, 9
     %right = lshr i32 %in, 23
     %val5 = or i32 %left, %right
@@ -19,7 +19,7 @@ define i32 @ror_i32(i32 %in) {
 }
 
 define i32 @extr_i32(i32 %lhs, i32 %rhs) {
-; CHECK: extr_i32:
+; CHECK-LABEL: extr_i32:
   %left = shl i32 %lhs, 6
   %right = lshr i32 %rhs, 26
   %val = or i32 %left, %right
@@ -31,7 +31,7 @@ define i32 @extr_i32(i32 %lhs, i32 %rhs) {
 }
 
 define i64 @extr_i64(i64 %lhs, i64 %rhs) {
-; CHECK: extr_i64:
+; CHECK-LABEL: extr_i64:
   %right = lshr i64 %rhs, 40
   %left = shl i64 %lhs, 24
   %val = or i64 %right, %left
@@ -45,7 +45,7 @@ define i64 @extr_i64(i64 %lhs, i64 %rhs) {
 ; Regression test: a bad experimental pattern crept into git which optimised
 ; this pattern to a single EXTR.
 define i32 @extr_regress(i32 %a, i32 %b) {
-; CHECK: extr_regress:
+; CHECK-LABEL: extr_regress:
 
     %sh1 = shl i32 %a, 14
     %sh2 = lshr i32 %b, 14
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
index e40aa3033bde..c6c050570dd6 100644
--- a/test/CodeGen/AArch64/fastcc-reserved.ll
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -7,7 +7,7 @@
 declare fastcc void @will_pop([8 x i32], i32 %val)
 
 define fastcc void @foo(i32 %in) {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 
   %addr = alloca i8, i32 %in
 
@@ -34,7 +34,7 @@ define fastcc void @foo(i32 %in) {
 declare void @wont_pop([8 x i32], i32 %val)
 
 define void @foo1(i32 %in) {
-; CHECK: foo1:
+; CHECK-LABEL: foo1:
 
   %addr = alloca i8, i32 %in
 ; Normal frame setup again
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
index 41cde94edc1c..a4cd37858ee4 100644
--- a/test/CodeGen/AArch64/fastcc.ll
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -5,10 +5,10 @@
 ; stack, so try to make sure this is respected.
 
 define fastcc void @func_stack0() {
-; CHECK: func_stack0:
+; CHECK-LABEL: func_stack0:
 ; CHECK: sub sp, sp, #48
 
-; CHECK-TAIL: func_stack0:
+; CHECK-TAIL-LABEL: func_stack0:
 ; CHECK-TAIL: sub sp, sp, #48
 
 
@@ -45,10 +45,10 @@ define fastcc void @func_stack0() {
 }
 
 define fastcc void @func_stack8([8 x i32], i32 %stacked) {
-; CHECK: func_stack8:
+; CHECK-LABEL: func_stack8:
 ; CHECK: sub sp, sp, #48
 
-; CHECK-TAIL: func_stack8:
+; CHECK-TAIL-LABEL: func_stack8:
 ; CHECK-TAIL: sub sp, sp, #48
 
 
@@ -84,10 +84,10 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
 }
 
 define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
-; CHECK: func_stack32:
+; CHECK-LABEL: func_stack32:
 ; CHECK: sub sp, sp, #48
 
-; CHECK-TAIL: func_stack32:
+; CHECK-TAIL-LABEL: func_stack32:
 ; CHECK-TAIL: sub sp, sp, #48
 
 
diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll
index ad4a903c9b25..a9518eabb754 100644
--- a/test/CodeGen/AArch64/fcmp.ll
+++ b/test/CodeGen/AArch64/fcmp.ll
@@ -3,7 +3,7 @@
 declare void @bar(i32)
 
 define void @test_float(float %a, float %b) {
-; CHECK: test_float:
+; CHECK-LABEL: test_float:
 
   %tst1 = fcmp oeq float %a, %b
   br i1 %tst1, label %end, label %t2
@@ -42,7 +42,7 @@ end:
 }
 
 define void @test_double(double %a, double %b) {
-; CHECK: test_double:
+; CHECK-LABEL: test_double:
 
   %tst1 = fcmp oeq double %a, %b
   br i1 %tst1, label %end, label %t2
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
index 0f7b95b2a48f..9d66da49437b 100644
--- a/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -4,7 +4,7 @@
 @var64 = global i64 0
 
 define void @test_fcvtzs(float %flt, double %dbl) {
-; CHECK: test_fcvtzs:
+; CHECK-LABEL: test_fcvtzs:
 
   %fix1 = fmul float %flt, 128.0
   %cvt1 = fptosi float %fix1 to i32
@@ -50,7 +50,7 @@ define void @test_fcvtzs(float %flt, double %dbl) {
 }
 
 define void @test_fcvtzu(float %flt, double %dbl) {
-; CHECK: test_fcvtzu:
+; CHECK-LABEL: test_fcvtzu:
 
   %fix1 = fmul float %flt, 128.0
   %cvt1 = fptoui float %fix1 to i32
@@ -99,7 +99,7 @@ define void @test_fcvtzu(float %flt, double %dbl) {
 @vardouble = global double 0.0
 
 define void @test_scvtf(i32 %int, i64 %long) {
-; CHECK: test_scvtf:
+; CHECK-LABEL: test_scvtf:
 
   %cvt1 = sitofp i32 %int to float
   %fix1 = fdiv float %cvt1, 128.0
@@ -145,7 +145,7 @@ define void @test_scvtf(i32 %int, i64 %long) {
 }
 
 define void @test_ucvtf(i32 %int, i64 %long) {
-; CHECK: test_ucvtf:
+; CHECK-LABEL: test_ucvtf:
 
   %cvt1 = uitofp i32 %int to float
   %fix1 = fdiv float %cvt1, 128.0
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
index c771d683a99c..b28eb3ea1bef 100644
--- a/test/CodeGen/AArch64/fcvt-int.ll
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 define i32 @test_floattoi32(float %in) {
-; CHECK: test_floattoi32:
+; CHECK-LABEL: test_floattoi32:
 
   %signed = fptosi float %in to i32
   %unsigned = fptoui float %in to i32
-; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
-; CHECK: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
 
   %res = sub i32 %signed, %unsigned
 ; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -16,12 +16,12 @@ define i32 @test_floattoi32(float %in) {
 }
 
 define i32 @test_doubletoi32(double %in) {
-; CHECK: test_doubletoi32:
+; CHECK-LABEL: test_doubletoi32:
 
   %signed = fptosi double %in to i32
   %unsigned = fptoui double %in to i32
-; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
-; CHECK: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
 
   %res = sub i32 %signed, %unsigned
 ; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -31,12 +31,12 @@ define i32 @test_doubletoi32(double %in) {
 }
 
 define i64 @test_floattoi64(float %in) {
-; CHECK: test_floattoi64:
+; CHECK-LABEL: test_floattoi64:
 
   %signed = fptosi float %in to i64
   %unsigned = fptoui float %in to i64
-; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
-; CHECK: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
 
   %res = sub i64 %signed, %unsigned
 ; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -46,12 +46,12 @@ define i64 @test_floattoi64(float %in) {
 }
 
 define i64 @test_doubletoi64(double %in) {
-; CHECK: test_doubletoi64:
+; CHECK-LABEL: test_doubletoi64:
 
   %signed = fptosi double %in to i64
   %unsigned = fptoui double %in to i64
-; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
-; CHECK: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK-DAG: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
 
   %res = sub i64 %signed, %unsigned
 ; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -61,12 +61,12 @@ define i64 @test_doubletoi64(double %in) {
 }
 
 define float @test_i32tofloat(i32 %in) {
-; CHECK: test_i32tofloat:
+; CHECK-LABEL: test_i32tofloat:
 
   %signed = sitofp i32 %in to float
   %unsigned = uitofp i32 %in to float
-; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
-; CHECK: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
 
   %res = fsub float %signed, %unsigned
 ; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -75,12 +75,12 @@ define float @test_i32tofloat(i32 %in) {
 }
 
 define double @test_i32todouble(i32 %in) {
-; CHECK: test_i32todouble:
+; CHECK-LABEL: test_i32todouble:
 
   %signed = sitofp i32 %in to double
   %unsigned = uitofp i32 %in to double
-; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
-; CHECK: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
 
   %res = fsub double %signed, %unsigned
 ; CHECK: fsub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -89,12 +89,12 @@ define double @test_i32todouble(i32 %in) {
 }
 
 define float @test_i64tofloat(i64 %in) {
-; CHECK: test_i64tofloat:
+; CHECK-LABEL: test_i64tofloat:
 
   %signed = sitofp i64 %in to float
   %unsigned = uitofp i64 %in to float
-; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
-; CHECK: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
 
   %res = fsub float %signed, %unsigned
 ; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -103,12 +103,12 @@ define float @test_i64tofloat(i64 %in) {
 }
 
 define double @test_i64todouble(i64 %in) {
-; CHECK: test_i64todouble:
+; CHECK-LABEL: test_i64todouble:
 
   %signed = sitofp i64 %in to double
   %unsigned = uitofp i64 %in to double
-; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
-; CHECK: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK-DAG: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
 
   %res = fsub double %signed, %unsigned
 ; CHECK: sub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
@@ -117,7 +117,7 @@ define double @test_i64todouble(i64 %in) {
 }
 
 define i32 @test_bitcastfloattoi32(float %in) {
-; CHECK: test_bitcastfloattoi32:
+; CHECK-LABEL: test_bitcastfloattoi32:
 
    %res = bitcast float %in to i32
 ; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
@@ -125,7 +125,7 @@ define i32 @test_bitcastfloattoi32(float %in) {
 }
 
 define i64 @test_bitcastdoubletoi64(double %in) {
-; CHECK: test_bitcastdoubletoi64:
+; CHECK-LABEL: test_bitcastdoubletoi64:
 
    %res = bitcast double %in to i64
 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
@@ -133,7 +133,7 @@ define i64 @test_bitcastdoubletoi64(double %in) {
 }
 
 define float @test_bitcasti32tofloat(i32 %in) {
-; CHECK: test_bitcasti32tofloat:
+; CHECK-LABEL: test_bitcasti32tofloat:
 
    %res = bitcast i32 %in to float
 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
@@ -142,7 +142,7 @@ define float @test_bitcasti32tofloat(i32 %in) {
 }
 
 define double @test_bitcasti64todouble(i64 %in) {
-; CHECK: test_bitcasti64todouble:
+; CHECK-LABEL: test_bitcasti64todouble:
 
    %res = bitcast i64 %in to double
 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
index 940c146f0a9f..e99c72833997 100644
--- a/test/CodeGen/AArch64/flags-multiuse.ll
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -9,7 +9,7 @@ declare void @bar()
 @var = global i32 0
 
 define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) {
-; CHECK: test_multiflag:
+; CHECK-LABEL: test_multiflag:
 
   %test = icmp ne i32 %n, %m
 ; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll
index c94ba9b57b5a..3d7f8f0369fc 100644
--- a/test/CodeGen/AArch64/floatdp_1source.ll
+++ b/test/CodeGen/AArch64/floatdp_1source.ll
@@ -26,7 +26,7 @@ declare float @nearbyintf(float) readonly
 declare double @nearbyint(double) readonly
 
 define void @simple_float() {
-; CHECK: simple_float:
+; CHECK-LABEL: simple_float:
   %val1 = load volatile float* @varfloat
 
   %valabs = call float @fabsf(float %val1)
@@ -65,7 +65,7 @@ define void @simple_float() {
 }
 
 define void @simple_double() {
-; CHECK: simple_double:
+; CHECK-LABEL: simple_double:
   %val1 = load volatile double* @vardouble
 
   %valabs = call double @fabs(double %val1)
@@ -104,7 +104,7 @@ define void @simple_double() {
 }
 
 define void @converts() {
-; CHECK: converts:
+; CHECK-LABEL: converts:
 
   %val16 = load volatile half* @varhalf
   %val32 = load volatile float* @varfloat
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
index b2256b342acf..bb655285ac54 100644
--- a/test/CodeGen/AArch64/floatdp_2source.ll
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -4,7 +4,7 @@
 @vardouble = global double 0.0
 
 define void @testfloat() {
-; CHECK: testfloat:
+; CHECK-LABEL: testfloat:
   %val1 = load float* @varfloat
 
   %val2 = fadd float %val1, %val1
@@ -32,7 +32,7 @@ define void @testfloat() {
 }
 
 define void @testdouble() {
-; CHECK: testdouble:
+; CHECK-LABEL: testdouble:
   %val1 = load double* @vardouble
 
   %val2 = fadd double %val1, %val1
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
index 56e8f16f9b36..572f42e210b1 100644
--- a/test/CodeGen/AArch64/fp-cond-sel.ll
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -4,7 +4,7 @@
 @vardouble = global double 0.0
 
 define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
-; CHECK: test_csel:
+; CHECK-LABEL: test_csel:
 
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, float 0.0, float 1.0
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
index 39db9be15771..590557f1e8ed 100644
--- a/test/CodeGen/AArch64/fp-dp3.ll
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -1,102 +1,137 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST
 
 declare float @llvm.fma.f32(float, float, float)
 declare double @llvm.fma.f64(double, double, double)
 
 define float @test_fmadd(float %a, float %b, float %c) {
-; CHECK: test_fmadd:
+; CHECK-LABEL: test_fmadd:
+; CHECK-NOFAST-LABEL: test_fmadd:
   %val = call float @llvm.fma.f32(float %a, float %b, float %c)
 ; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define float @test_fmsub(float %a, float %b, float %c) {
-; CHECK: test_fmsub:
+; CHECK-LABEL: test_fmsub:
+; CHECK-NOFAST-LABEL: test_fmsub:
   %nega = fsub float -0.0, %a
   %val = call float @llvm.fma.f32(float %nega, float %b, float %c)
 ; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define float @test_fnmadd(float %a, float %b, float %c) {
-; CHECK: test_fnmadd:
+; CHECK-LABEL: test_fnmadd:
+; CHECK-NOFAST-LABEL: test_fnmadd:
   %negc = fsub float -0.0, %c
   %val = call float @llvm.fma.f32(float %a, float %b, float %negc)
 ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define float @test_fnmsub(float %a, float %b, float %c) {
-; CHECK: test_fnmsub:
+; CHECK-LABEL: test_fnmsub:
+; CHECK-NOFAST-LABEL: test_fnmsub:
   %nega = fsub float -0.0, %a
   %negc = fsub float -0.0, %c
   %val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
 ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define double @testd_fmadd(double %a, double %b, double %c) {
-; CHECK: testd_fmadd:
+; CHECK-LABEL: testd_fmadd:
+; CHECK-NOFAST-LABEL: testd_fmadd:
   %val = call double @llvm.fma.f64(double %a, double %b, double %c)
 ; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define double @testd_fmsub(double %a, double %b, double %c) {
-; CHECK: testd_fmsub:
+; CHECK-LABEL: testd_fmsub:
+; CHECK-NOFAST-LABEL: testd_fmsub:
   %nega = fsub double -0.0, %a
   %val = call double @llvm.fma.f64(double %nega, double %b, double %c)
 ; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define double @testd_fnmadd(double %a, double %b, double %c) {
-; CHECK: testd_fnmadd:
+; CHECK-LABEL: testd_fnmadd:
+; CHECK-NOFAST-LABEL: testd_fnmadd:
   %negc = fsub double -0.0, %c
   %val = call double @llvm.fma.f64(double %a, double %b, double %negc)
 ; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define double @testd_fnmsub(double %a, double %b, double %c) {
-; CHECK: testd_fnmsub:
+; CHECK-LABEL: testd_fnmsub:
+; CHECK-NOFAST-LABEL: testd_fnmsub:
   %nega = fsub double -0.0, %a
   %negc = fsub double -0.0, %c
   %val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
 ; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define float @test_fmadd_unfused(float %a, float %b, float %c) {
-; CHECK: test_fmadd_unfused:
+; CHECK-LABEL: test_fmadd_unfused:
+; CHECK-NOFAST-LABEL: test_fmadd_unfused:
   %prod = fmul float %b, %c
   %sum = fadd float %a, %prod
 ; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %sum
 }
 
 define float @test_fmsub_unfused(float %a, float %b, float %c) {
-; CHECK: test_fmsub_unfused:
+; CHECK-LABEL: test_fmsub_unfused:
+; CHECK-NOFAST-LABEL: test_fmsub_unfused:
   %prod = fmul float %b, %c
   %diff = fsub float %a, %prod
 ; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %diff
 }
 
 define float @test_fnmadd_unfused(float %a, float %b, float %c) {
-; CHECK: test_fnmadd_unfused:
+; CHECK-LABEL: test_fnmadd_unfused:
+; CHECK-NOFAST-LABEL: test_fnmadd_unfused:
   %nega = fsub float -0.0, %a
   %prod = fmul float %b, %c
   %sum = fadd float %nega, %prod
 ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %sum
 }
 
 define float @test_fnmsub_unfused(float %a, float %b, float %c) {
-; CHECK: test_fnmsub_unfused:
+; CHECK-LABEL: test_fnmsub_unfused:
+; CHECK-NOFAST-LABEL: test_fnmsub_unfused:
   %nega = fsub float -0.0, %a
   %prod = fmul float %b, %c
   %diff = fsub float %nega, %prod
 ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-DAG: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-DAG: fneg {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-DAG: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: ret
   ret float %diff
 }
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
index b5bdcf4f37b4..b1c560d2b648 100644
--- a/test/CodeGen/AArch64/fp128-folding.ll
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -5,7 +5,7 @@ declare void @bar(i8*, i8*, i32*)
 ; which is not supported.
 
 define fp128 @test_folding() {
-; CHECK: test_folding:
+; CHECK-LABEL: test_folding:
   %l = alloca i32
   store i32 42, i32* %l
   %val = load i32* %l
@@ -14,4 +14,4 @@ define fp128 @test_folding() {
   ; successfully.
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI
   ret fp128 %fpval
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll
index 258d34b8f81f..c312bb1917ab 100644
--- a/test/CodeGen/AArch64/fp128.ll
+++ b/test/CodeGen/AArch64/fp128.ll
@@ -4,7 +4,7 @@
 @rhs = global fp128 zeroinitializer
 
 define fp128 @test_add() {
-; CHECK: test_add:
+; CHECK-LABEL: test_add:
 
   %lhs = load fp128* @lhs
   %rhs = load fp128* @rhs
@@ -17,7 +17,7 @@ define fp128 @test_add() {
 }
 
 define fp128 @test_sub() {
-; CHECK: test_sub:
+; CHECK-LABEL: test_sub:
 
   %lhs = load fp128* @lhs
   %rhs = load fp128* @rhs
@@ -30,7 +30,7 @@ define fp128 @test_sub() {
 }
 
 define fp128 @test_mul() {
-; CHECK: test_mul:
+; CHECK-LABEL: test_mul:
 
   %lhs = load fp128* @lhs
   %rhs = load fp128* @rhs
@@ -43,7 +43,7 @@ define fp128 @test_mul() {
 }
 
 define fp128 @test_div() {
-; CHECK: test_div:
+; CHECK-LABEL: test_div:
 
   %lhs = load fp128* @lhs
   %rhs = load fp128* @rhs
@@ -59,7 +59,7 @@ define fp128 @test_div() {
 @var64 = global i64 0
 
 define void @test_fptosi() {
-; CHECK: test_fptosi:
+; CHECK-LABEL: test_fptosi:
   %val = load fp128* @lhs
 
   %val32 = fptosi fp128 %val to i32
@@ -74,7 +74,7 @@ define void @test_fptosi() {
 }
 
 define void @test_fptoui() {
-; CHECK: test_fptoui:
+; CHECK-LABEL: test_fptoui:
   %val = load fp128* @lhs
 
   %val32 = fptoui fp128 %val to i32
@@ -89,7 +89,7 @@ define void @test_fptoui() {
 }
 
 define void @test_sitofp() {
-; CHECK: test_sitofp:
+; CHECK-LABEL: test_sitofp:
 
   %src32 = load i32* @var32
   %val32 = sitofp i32 %src32 to fp128
@@ -105,7 +105,7 @@ define void @test_sitofp() {
 }
 
 define void @test_uitofp() {
-; CHECK: test_uitofp:
+; CHECK-LABEL: test_uitofp:
 
   %src32 = load i32* @var32
   %val32 = uitofp i32 %src32 to fp128
@@ -121,7 +121,7 @@ define void @test_uitofp() {
 }
 
 define i1 @test_setcc1() {
-; CHECK: test_setcc1:
+; CHECK-LABEL: test_setcc1:
 
   %lhs = load fp128* @lhs
   %rhs = load fp128* @rhs
@@ -140,7 +140,7 @@ define i1 @test_setcc1() {
 }
 
 define i1 @test_setcc2() {
-; CHECK: test_setcc2:
+; CHECK-LABEL: test_setcc2:
 
   %lhs = load fp128* @lhs
   %rhs = load fp128* @rhs
@@ -150,14 +150,14 @@ define i1 @test_setcc2() {
 ; Technically, everything after the call to __letf2 is redundant, but we'll let
 ; LLVM have its fun for now.
   %val = fcmp ugt fp128 %lhs, %rhs
-; CHECK: bl      __unordtf2
-; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
-
 ; CHECK: bl      __gttf2
 ; CHECK: cmp w0, #0
 ; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
-; CHECK: cmp w[[UNORDERED]], #0
+
+; CHECK: bl      __unordtf2
+; CHECK: cmp w0, #0
 ; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+
 ; CHECK: orr     w0, [[UNORDERED]], [[GT]]
 
   ret i1 %val
@@ -165,7 +165,7 @@ define i1 @test_setcc2() {
 }
 
 define i32 @test_br_cc() {
-; CHECK: test_br_cc:
+; CHECK-LABEL: test_br_cc:
 
   %lhs = load fp128* @lhs
   %rhs = load fp128* @rhs
@@ -174,15 +174,14 @@ define i32 @test_br_cc() {
 
   ; olt == !uge, which LLVM unfortunately "optimizes" this to.
   %cond = fcmp olt fp128 %lhs, %rhs
-; CHECK: bl      __unordtf2
-; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
-
 ; CHECK: bl      __getf2
 ; CHECK: cmp w0, #0
-
 ; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
-; CHECK: cmp w[[UNORDERED]], #0
+
+; CHECK: bl      __unordtf2
+; CHECK: cmp w0, #0
 ; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+
 ; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
 ; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
   br i1 %cond, label %iftrue, label %iffalse
@@ -202,7 +201,7 @@ iffalse:
 }
 
 define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
-; CHECK: test_select:
+; CHECK-LABEL: test_select:
 
   %val = select i1 %cond, fp128 %lhs, fp128 %rhs
   store fp128 %val, fp128* @lhs
@@ -222,7 +221,7 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
 @vardouble = global double 0.0
 
 define void @test_round() {
-; CHECK: test_round:
+; CHECK-LABEL: test_round:
 
   %val = load fp128* @lhs
 
@@ -240,7 +239,7 @@ define void @test_round() {
 }
 
 define void @test_extend() {
-; CHECK: test_extend:
+; CHECK-LABEL: test_extend:
 
   %val = load fp128* @lhs
 
@@ -265,7 +264,7 @@ define fp128 @test_neg(fp128 %in) {
 ; Make sure the weird hex constant below *is* -0.0
 ; CHECK-NEXT: fp128 -0
 
-; CHECK: test_neg:
+; CHECK-LABEL: test_neg:
 
   ; Could in principle be optimized to fneg which we can't select, this makes
   ; sure that doesn't happen.
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index fd28aeef9291..b8f716959449 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -4,31 +4,33 @@
 @varf64 = global double 0.0
 
 define void @check_float() {
-; CHECK: check_float:
+; CHECK-LABEL: check_float:
 
   %val = load float* @varf32
   %newval1 = fadd float %val, 8.5
   store volatile float %newval1, float* @varf32
-; CHECK: fmov {{s[0-9]+}}, #8.5
+; CHECK-DAG: fmov [[EIGHT5:s[0-9]+]], #8.5
 
   %newval2 = fadd float %val, 128.0
   store volatile float %newval2, float* @varf32
-; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI0_0
+; CHECK-DAG: ldr [[HARD:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI0_0
 
+; CHECK: ret
   ret void
 }
 
 define void @check_double() {
-; CHECK: check_double:
+; CHECK-LABEL: check_double:
 
   %val = load double* @varf64
   %newval1 = fadd double %val, 8.5
   store volatile double %newval1, double* @varf64
-; CHECK: fmov {{d[0-9]+}}, #8.5
+; CHECK-DAG: fmov {{d[0-9]+}}, #8.5
 
   %newval2 = fadd double %val, 128.0
   store volatile double %newval2, double* @varf64
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
+; CHECK-DAG: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
 
+; CHECK: ret
   ret void
 }
diff --git a/test/CodeGen/AArch64/frameaddr.ll b/test/CodeGen/AArch64/frameaddr.ll
new file mode 100644
index 000000000000..182704bd6541
--- /dev/null
+++ b/test/CodeGen/AArch64/frameaddr.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  | FileCheck %s
+
+define i8* @t() nounwind {
+entry:
+; CHECK-LABEL: t:
+; CHECK: mov x0, x29
+	%0 = call i8* @llvm.frameaddress(i32 0)
+        ret i8* %0
+}
+
+define i8* @t2() nounwind {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: ldr x[[reg:[0-9]+]], [x29]
+; CHECK: ldr x[[reg]], [x[[reg]]]
+	%0 = call i8* @llvm.frameaddress(i32 2)
+        ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index 78fde6a3c33a..430d77f9e932 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -11,7 +12,7 @@
 @varstruct = global %myStruct zeroinitializer
 
 define void @take_i8s(i8 %val1, i8 %val2) {
-; CHECK: take_i8s:
+; CHECK-LABEL: take_i8s:
     store i8 %val2, i8* @var8
     ; Not using w1 may be technically allowed, but it would indicate a
     ; problem in itself.
@@ -20,9 +21,10 @@ define void @take_i8s(i8 %val1, i8 %val2) {
 }
 
 define void @add_floats(float %val1, float %val2) {
-; CHECK: add_floats:
+; CHECK-LABEL: add_floats:
     %newval = fadd float %val1, %val2
 ; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1
+; CHECK-NOFP-NOT: fadd
     store float %newval, float* @varfloat
 ; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat]
     ret void
@@ -31,19 +33,19 @@ define void @add_floats(float %val1, float %val2) {
 ; byval pointers should be allocated to the stack and copied as if
 ; with memcpy.
 define void @take_struct(%myStruct* byval %structval) {
-; CHECK: take_struct:
+; CHECK-LABEL: take_struct:
     %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
     %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
 
-    %val0 = load i32* %addr0
+    %val0 = load volatile i32* %addr0
     ; Some weird move means x0 is used for one access
 ; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
-    store i32 %val0, i32* @var32
+    store volatile i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
 
-    %val1 = load i64* %addr1
+    %val1 = load volatile i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
-    store i64 %val1, i64* @var64
+    store volatile i64 %val1, i64* @var64
 ; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
 
     ret void
@@ -51,19 +53,19 @@ define void @take_struct(%myStruct* byval %structval) {
 
 ; %structval should be at sp + 16
 define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %structval) {
-; CHECK: check_byval_align:
+; CHECK-LABEL: check_byval_align:
 
     %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
     %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
 
-    %val0 = load i32* %addr0
+    %val0 = load volatile i32* %addr0
     ; Some weird move means x0 is used for one access
 ; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16
 ; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12]
     store i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
 
-    %val1 = load i64* %addr1
+    %val1 = load volatile i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
     store i64 %val1, i64* @var64
 ; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
@@ -72,7 +74,7 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st
 }
 
 define i32 @return_int() {
-; CHECK: return_int:
+; CHECK-LABEL: return_int:
     %val = load i32* @var32
     ret i32 %val
 ; CHECK: ldr w0, [{{x[0-9]+}}, #:lo12:var32]
@@ -81,16 +83,17 @@ define i32 @return_int() {
 }
 
 define double @return_double() {
-; CHECK: return_double:
+; CHECK-LABEL: return_double:
     ret double 3.14
 ; CHECK: ldr d0, [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK-NOFP-NOT: ldr d0,
 }
 
 ; This is the kind of IR clang will produce for returning a struct
 ; small enough to go into registers. Not all that pretty, but it
 ; works.
 define [2 x i64] @return_struct() {
-; CHECK: return_struct:
+; CHECK-LABEL: return_struct:
     %addr = bitcast %myStruct* @varstruct to [2 x i64]*
     %val = load [2 x i64]* %addr
     ret [2 x i64] %val
@@ -107,7 +110,7 @@ define [2 x i64] @return_struct() {
 ; structs larger than 16 bytes, but C semantics can still be provided
 ; if LLVM does it to %myStruct too. So this is the simplest check
 define void @return_large_struct(%myStruct* sret %retval) {
-; CHECK: return_large_struct:
+; CHECK-LABEL: return_large_struct:
     %addr0 = getelementptr %myStruct* %retval, i64 0, i32 0
     %addr1 = getelementptr %myStruct* %retval, i64 0, i32 1
     %addr2 = getelementptr %myStruct* %retval, i64 0, i32 2
@@ -128,19 +131,20 @@ define void @return_large_struct(%myStruct* sret %retval) {
 define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
                           i32* %var6, %myStruct* byval %struct, i32* byval %stacked,
                           double %notstacked) {
-; CHECK: struct_on_stack:
+; CHECK-LABEL: struct_on_stack:
     %addr = getelementptr %myStruct* %struct, i64 0, i32 0
-    %val64 = load i64* %addr
-    store i64 %val64, i64* @var64
+    %val64 = load volatile i64* %addr
+    store volatile i64 %val64, i64* @var64
     ; Currently nothing on local stack, so struct should be at sp
 ; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
 ; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64]
 
-    store double %notstacked, double* @vardouble
+    store volatile double %notstacked, double* @vardouble
 ; CHECK-NOT: ldr d0
 ; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble
+; CHECK-NOFP-NOT: str d0,
 
-    %retval = load i32* %stacked
+    %retval = load volatile i32* %stacked
     ret i32 %retval
 ; CHECK: ldr w0, [sp, #16]
 }
@@ -148,7 +152,7 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
 define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
                          float %var4, float %var5, float %var6, float %var7,
                          float %var8) {
-; CHECK: stacked_fpu:
+; CHECK-LABEL: stacked_fpu:
     store float %var8, float* @varfloat
     ; Beware as above: the offset would be different on big-endian
     ; machines if the first ldr were changed to use s-registers.
@@ -176,17 +180,17 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
 ; CHECK: check_i128_stackalign
     store i128 %stack2, i128* @var128
     ; Nothing local on stack in current codegen, so first stack is 16 away
-; CHECK: ldr {{x[0-9]+}}, [sp, #16]
+; CHECK: add     x[[REG:[0-9]+]], sp, #16
+; CHECK: ldr {{x[0-9]+}}, [x[[REG]], #8]
     ; Important point is that we address sp+24 for second dword
-; CHECK: add     [[REG:x[0-9]+]], sp, #16
-; CHECK: ldr     {{x[0-9]+}}, {{\[}}[[REG]], #8]
+; CHECK: ldr     {{x[0-9]+}}, [sp, #16]
     ret void
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
 define i32 @test_extern() {
-; CHECK: test_extern:
+; CHECK-LABEL: test_extern:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
 ; CHECK: bl memcpy
   ret i32 0
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 13b689c40886..ac188bb3bb57 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -17,20 +18,22 @@ declare void @take_i8s(i8 %val1, i8 %val2)
 declare void @take_floats(float %val1, float %val2)
 
 define void @simple_args() {
-; CHECK: simple_args:
+; CHECK-LABEL: simple_args:
   %char1 = load i8* @var8
   %char2 = load i8* @var8_2
   call void @take_i8s(i8 %char1, i8 %char2)
-; CHECK: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
-; CHECK: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
+; CHECK-DAG: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
+; CHECK-DAG: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
 ; CHECK: bl take_i8s
 
   %float1 = load float* @varfloat
   %float2 = load float* @varfloat_2
   call void @take_floats(float %float1, float %float2)
-; CHECK: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
-; CHECK: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK-DAG: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
+; CHECK-DAG: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
 ; CHECK: bl take_floats
+; CHECK-NOFP-NOT: ldr s1,
+; CHECK-NOFP-NOT: ldr s0,
 
   ret void
 }
@@ -41,7 +44,7 @@ declare [2 x i64] @return_smallstruct()
 declare void @return_large_struct(%myStruct* sret %retval)
 
 define void @simple_rets() {
-; CHECK: simple_rets:
+; CHECK-LABEL: simple_rets:
 
   %int = call i32 @return_int()
   store i32 %int, i32* @var32
@@ -52,6 +55,7 @@ define void @simple_rets() {
   store double %dbl, double* @vardouble
 ; CHECK: bl return_double
 ; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+; CHECK-NOFP-NOT: str d0,
 
   %arr = call [2 x i64] @return_smallstruct()
   store [2 x i64] %arr, [2 x i64]* @varsmallstruct
@@ -75,17 +79,19 @@ declare void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
                           float %var8)
 
 define void @check_stack_args() {
+; CHECK-LABEL: check_stack_args:
   call i32 @struct_on_stack(i8 0, i16 12, i32 42, i64 99, i128 1,
                             i32* @var32, %myStruct* byval @varstruct,
                             i32 999, double 1.0)
   ; Want to check that the final double is passed in registers and
   ; that varstruct is passed on the stack. Rather dependent on how a
   ; memcpy gets created, but the following works for now.
-; CHECK: mov x0, sp
-; CHECK: str {{w[0-9]+}}, [x0]
-; CHECK: str {{w[0-9]+}}, [x0, #12]
-; CHECK: fmov d0,
+; CHECK: mov x[[SPREG:[0-9]+]], sp
+; CHECK-DAG: str {{w[0-9]+}}, [x[[SPREG]]]
+; CHECK-DAG: str {{w[0-9]+}}, [x[[SPREG]], #12]
+; CHECK-DAG: fmov d0,
 ; CHECK: bl struct_on_stack
+; CHECK-NOFP-NOT: fmov
 
   call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0,
                          float -2.0, float -8.0, float 16.0, float 1.0,
@@ -106,7 +112,7 @@ declare void @check_i128_regalign(i32 %val0, i128 %val1)
 
 
 define void @check_i128_align() {
-; CHECK: check_i128_align:
+; CHECK-LABEL: check_i128_align:
   %val = load i128* @var128
   call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
                                    i32 4, i32 5, i32 6, i32 7,
@@ -130,7 +136,7 @@ define void @check_i128_align() {
 @fptr = global void()* null
 
 define void @check_indirect_call() {
-; CHECK: check_indirect_call:
+; CHECK-LABEL: check_indirect_call:
   %func = load void()** @fptr
   call void %func()
 ; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, #:lo12:fptr]
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
index 8ed6e551cdeb..56e5cba519c1 100644
--- a/test/CodeGen/AArch64/global-alignment.ll
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -5,7 +5,7 @@
 @var32_align64 = global [3 x i32] zeroinitializer, align 8
 
 define i64 @test_align32() {
-; CHECK: test_align32:
+; CHECK-LABEL: test_align32:
   %addr = bitcast [3 x i32]* @var32 to i64*
 
   ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
@@ -19,7 +19,7 @@ define i64 @test_align32() {
 }
 
 define i64 @test_align64() {
-; CHECK: test_align64:
+; CHECK-LABEL: test_align64:
   %addr = bitcast [3 x i64]* @var64 to i64*
 
   ; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be
@@ -33,7 +33,7 @@ define i64 @test_align64() {
 }
 
 define i64 @test_var32_align64() {
-; CHECK: test_var32_align64:
+; CHECK-LABEL: test_var32_align64:
   %addr = bitcast [3 x i32]* @var32_align64 to i64*
 
   ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
@@ -49,7 +49,7 @@ define i64 @test_var32_align64() {
 @yet_another_var = external global {i32, i32}
 
 define i64 @test_yet_another_var() {
-; CHECK: test_yet_another_var:
+; CHECK-LABEL: test_yet_another_var:
 
   ; @yet_another_var has a preferred alignment of 8, but that's not enough if
   ; we're going to be linking against other things. Its ABI alignment is only 4
@@ -62,7 +62,7 @@ define i64 @test_yet_another_var() {
 }
 
 define i64()* @test_functions() {
-; CHECK: test_functions:
+; CHECK-LABEL: test_functions:
   ret i64()* @test_yet_another_var
 ; CHECK: adrp [[HIBITS:x[0-9]+]], test_yet_another_var
 ; CHECK: add x0, [[HIBITS]], #:lo12:test_yet_another_var
diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll
index c474e5845a64..8b06031c88f7 100644
--- a/test/CodeGen/AArch64/got-abuse.ll
+++ b/test/CodeGen/AArch64/got-abuse.ll
@@ -13,7 +13,7 @@ declare void @consume(i32)
 declare void @func()
 
 define void @foo() nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 entry:
   call void @consume(i32 ptrtoint (void ()* @func to i32))
 ; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:func
diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll
index f019ea0a6706..21ca7eda66bb 100644
--- a/test/CodeGen/AArch64/i128-align.ll
+++ b/test/CodeGen/AArch64/i128-align.ll
@@ -5,7 +5,7 @@
 @var = global %struct zeroinitializer
 
 define i64 @check_size() {
-; CHECK: check_size:
+; CHECK-LABEL: check_size:
   %starti = ptrtoint %struct* @var to i64
 
   %endp = getelementptr %struct* @var, i64 1
@@ -17,7 +17,7 @@ define i64 @check_size() {
 }
 
 define i64 @check_field() {
-; CHECK: check_field:
+; CHECK-LABEL: check_field:
   %starti = ptrtoint %struct* @var to i64
 
   %endp = getelementptr %struct* @var, i64 0, i32 1
@@ -26,4 +26,4 @@ define i64 @check_field() {
   %diff = sub i64 %endi, %starti
   ret i64 %diff
 ; CHECK: movz x0, #16
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll
index 446151b8ffac..03c6d8d10087 100644
--- a/test/CodeGen/AArch64/illegal-float-ops.ll
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -9,7 +9,7 @@ declare double @llvm.cos.f64(double)
 declare fp128 @llvm.cos.f128(fp128)
 
 define void @test_cos(float %float, double %double, fp128 %fp128) {
-; CHECK: test_cos:
+; CHECK-LABEL: test_cos:
 
    %cosfloat = call float @llvm.cos.f32(float %float)
    store float %cosfloat, float* @varfloat
@@ -31,7 +31,7 @@ declare double @llvm.exp.f64(double)
 declare fp128 @llvm.exp.f128(fp128)
 
 define void @test_exp(float %float, double %double, fp128 %fp128) {
-; CHECK: test_exp:
+; CHECK-LABEL: test_exp:
 
    %expfloat = call float @llvm.exp.f32(float %float)
    store float %expfloat, float* @varfloat
@@ -53,7 +53,7 @@ declare double @llvm.exp2.f64(double)
 declare fp128 @llvm.exp2.f128(fp128)
 
 define void @test_exp2(float %float, double %double, fp128 %fp128) {
-; CHECK: test_exp2:
+; CHECK-LABEL: test_exp2:
 
    %exp2float = call float @llvm.exp2.f32(float %float)
    store float %exp2float, float* @varfloat
@@ -75,7 +75,7 @@ declare double @llvm.log.f64(double)
 declare fp128 @llvm.log.f128(fp128)
 
 define void @test_log(float %float, double %double, fp128 %fp128) {
-; CHECK: test_log:
+; CHECK-LABEL: test_log:
 
    %logfloat = call float @llvm.log.f32(float %float)
    store float %logfloat, float* @varfloat
@@ -97,7 +97,7 @@ declare double @llvm.log2.f64(double)
 declare fp128 @llvm.log2.f128(fp128)
 
 define void @test_log2(float %float, double %double, fp128 %fp128) {
-; CHECK: test_log2:
+; CHECK-LABEL: test_log2:
 
    %log2float = call float @llvm.log2.f32(float %float)
    store float %log2float, float* @varfloat
@@ -119,7 +119,7 @@ declare double @llvm.log10.f64(double)
 declare fp128 @llvm.log10.f128(fp128)
 
 define void @test_log10(float %float, double %double, fp128 %fp128) {
-; CHECK: test_log10:
+; CHECK-LABEL: test_log10:
 
    %log10float = call float @llvm.log10.f32(float %float)
    store float %log10float, float* @varfloat
@@ -141,7 +141,7 @@ declare double @llvm.sin.f64(double)
 declare fp128 @llvm.sin.f128(fp128)
 
 define void @test_sin(float %float, double %double, fp128 %fp128) {
-; CHECK: test_sin:
+; CHECK-LABEL: test_sin:
 
    %sinfloat = call float @llvm.sin.f32(float %float)
    store float %sinfloat, float* @varfloat
@@ -163,7 +163,7 @@ declare double @llvm.pow.f64(double, double)
 declare fp128 @llvm.pow.f128(fp128, fp128)
 
 define void @test_pow(float %float, double %double, fp128 %fp128) {
-; CHECK: test_pow:
+; CHECK-LABEL: test_pow:
 
    %powfloat = call float @llvm.pow.f32(float %float, float %float)
    store float %powfloat, float* @varfloat
@@ -185,7 +185,7 @@ declare double @llvm.powi.f64(double, i32)
 declare fp128 @llvm.powi.f128(fp128, i32)
 
 define void @test_powi(float %float, double %double, i32 %exponent, fp128 %fp128) {
-; CHECK: test_powi:
+; CHECK-LABEL: test_powi:
 
    %powifloat = call float @llvm.powi.f32(float %float, i32 %exponent)
    store float %powifloat, float* @varfloat
@@ -203,7 +203,7 @@ define void @test_powi(float %float, double %double, i32 %exponent, fp128 %fp128
 }
 
 define void @test_frem(float %float, double %double, fp128 %fp128) {
-; CHECK: test_frem:
+; CHECK-LABEL: test_frem:
 
   %fremfloat = frem float %float, %float
   store float %fremfloat, float* @varfloat
@@ -219,3 +219,29 @@ define void @test_frem(float %float, double %double, fp128 %fp128) {
 
   ret void
 }
+
+declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
+
+define void @test_fma(fp128 %fp128) {
+; CHECK-LABEL: test_fma:
+
+  %fmafp128 = call fp128 @llvm.fma.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+  store fp128 %fmafp128, fp128* @varfp128
+; CHECK: bl fmal
+
+  ret void
+}
+
+declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
+
+define void @test_fmuladd(fp128 %fp128) {
+; CHECK-LABEL: test_fmuladd:
+
+  %fmuladdfp128 = call fp128 @llvm.fmuladd.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+  store fp128 %fmuladdfp128, fp128* @varfp128
+; CHECK-NOT: bl fmal
+; CHECK: bl __multf3
+; CHECK: bl __addtf3
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
index d80be8f3a639..3ff1c1a86ec6 100644
--- a/test/CodeGen/AArch64/init-array.ll
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -6,4 +6,4 @@ define internal void @_GLOBAL__I_a() section ".text.startup" {
 
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
 
-; CHECK: .section .init_array
-\ No newline at end of file
+; CHECK: .section .init_array
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
index c39c57f05822..61bbfc201354 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
@@ -4,4 +4,4 @@ define void @foo() {
   ; Out of range immediate for I.
   call void asm sideeffect "add x0, x0, $0", "I"(i32 4096)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
index 47c5f98bf009..40746e1528ce 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
@@ -4,4 +4,4 @@ define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.
   call void asm sideeffect "and w0, w0, $0", "K"(i32 13)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
index 7a5b99e23b3d..2c5338191fde 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
@@ -4,4 +4,4 @@ define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.
   call void asm sideeffect "and w0, w0, $0", "K"(i64 4294967296)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
index 4f0039865a35..d82d5a2ee4d0 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
@@ -4,4 +4,4 @@ define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.
   call void asm sideeffect "and x0, x0, $0", "L"(i32 13)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll
index c232f3208cfa..18a3b37b41d1 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints.ll
@@ -1,21 +1,21 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 
 define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
-; CHECK: test_inline_constraint_r:
+; CHECK-LABEL: test_inline_constraint_r:
   %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset)
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
   ret i64 %val
 }
 
 define i16 @test_small_reg(i16 %lhs, i16 %rhs) {
-; CHECK: test_small_reg:
+; CHECK-LABEL: test_small_reg:
   %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs)
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
   ret i16 %val
 }
 
 define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) {
-; CHECK: test_inline_constraint_r_imm:
+; CHECK-LABEL: test_inline_constraint_r_imm:
   %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12)
 ; CHECK: movz [[FOUR:x[0-9]+]], #4
 ; CHECK: movz [[TWELVE:w[0-9]+]], #12
@@ -26,7 +26,7 @@ define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) {
 ; m is permitted to have a base/offset form. We don't do that
 ; currently though.
 define i32 @test_inline_constraint_m(i32 *%ptr) {
-; CHECK: test_inline_constraint_m:
+; CHECK-LABEL: test_inline_constraint_m:
   %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr)
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
   ret i32 %val
@@ -36,7 +36,7 @@ define i32 @test_inline_constraint_m(i32 *%ptr) {
 
 ; Q should *never* have base/offset form even if given the chance.
 define i32 @test_inline_constraint_Q(i32 *%ptr) {
-; CHECK: test_inline_constraint_Q:
+; CHECK-LABEL: test_inline_constraint_Q:
   %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1))
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
   ret i32 %val
@@ -44,8 +44,28 @@ define i32 @test_inline_constraint_Q(i32 *%ptr) {
 
 @dump = global fp128 zeroinitializer
 
+define void @test_inline_constraint_w(<8 x i8> %vec64, <4 x float> %vec128, half %hlf, float %flt, double %dbl, fp128 %quad) {
+; CHECK: test_inline_constraint_w:
+  call <8 x i8> asm sideeffect "add $0.8b, $1.8b, $1.8b", "=w,w"(<8 x i8> %vec64)
+  call <8 x i8> asm sideeffect "fadd $0.4s, $1.4s, $1.4s", "=w,w"(<4 x float> %vec128)
+; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+
+  ; Arguably semantically dodgy to output "vN", but it's what GCC does
+  ; so purely for compatibility we want vector registers to be output.
+  call float asm sideeffect "fcvt ${0:s}, ${1:h}", "=w,w"(half undef)
+  call float asm sideeffect "fadd $0.2s, $0.2s, $0.2s", "=w,w"(float %flt)
+  call double asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(double %dbl)
+  call fp128 asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(fp128 %quad)
+; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
+; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+  ret void
+}
+
 define void @test_inline_constraint_I() {
-; CHECK: test_inline_constraint_I:
+; CHECK-LABEL: test_inline_constraint_I:
   call void asm sideeffect "add x0, x0, $0", "I"(i32 0)
   call void asm sideeffect "add x0, x0, $0", "I"(i64 4095)
 ; CHECK: add x0, x0, #0
@@ -57,7 +77,7 @@ define void @test_inline_constraint_I() {
 ; Skip J because it's useless
 
 define void @test_inline_constraint_K() {
-; CHECK: test_inline_constraint_K:
+; CHECK-LABEL: test_inline_constraint_K:
   call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa
   call void asm sideeffect "and w0, w0, $0", "K"(i32 65535)
 ; CHECK: and w0, w0, #-1431655766
@@ -67,7 +87,7 @@ define void @test_inline_constraint_K() {
 }
 
 define void @test_inline_constraint_L() {
-; CHECK: test_inline_constraint_L:
+; CHECK-LABEL: test_inline_constraint_L:
   call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa
   call void asm sideeffect "and x0, x0, $0", "L"(i64 65535)
 ; CHECK: and x0, x0, #4294967296
@@ -81,7 +101,7 @@ define void @test_inline_constraint_L() {
 @var = global i32 0
 
 define void @test_inline_constraint_S() {
-; CHECK: test_inline_constraint_S:
+; CHECK-LABEL: test_inline_constraint_S:
   call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
   call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var)
   call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var)
@@ -92,7 +112,7 @@ define void @test_inline_constraint_S() {
 }
 
 define i32 @test_inline_constraint_S_label(i1 %in) {
-; CHECK: test_inline_constraint_S_label:
+; CHECK-LABEL: test_inline_constraint_S_label:
   call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
 ; CHECK: adr x0, .Ltmp{{[0-9]+}}
   br i1 %in, label %loc, label %loc2
@@ -103,15 +123,15 @@ loc2:
 }
 
 define void @test_inline_constraint_Y() {
-; CHECK: test_inline_constraint_Y:
+; CHECK-LABEL: test_inline_constraint_Y:
   call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0)
 ; CHECK: fcmp s0, #0.0
   ret void
 }
 
 define void @test_inline_constraint_Z() {
-; CHECK: test_inline_constraint_Z:
+; CHECK-LABEL: test_inline_constraint_Z:
   call void asm sideeffect "cmp w0, $0", "Z"(i32 0)
 ; CHECK: cmp w0, #0
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll
index 3b55945561eb..b7f4d3c57ba3 100644
--- a/test/CodeGen/AArch64/inline-asm-modifiers.ll
+++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll
@@ -1,5 +1,4 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
 
 @var_simple = hidden global i32 0
 @var_got = global i32 0
@@ -9,7 +8,7 @@
 @var_tlsle = thread_local(localexec) global i32 0
 
 define void @test_inline_modifier_L() nounwind {
-; CHECK: test_inline_modifier_L:
+; CHECK-LABEL: test_inline_modifier_L:
   call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple)
   call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got)
   call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd)
@@ -23,31 +22,28 @@ define void @test_inline_modifier_L() nounwind {
 ; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie]
 ; CHECK: add x0, x0, #:tprel_lo12:var_tlsle
 
-; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC var_simple
-; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var_got
-; CHECK-ELF: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
-; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
-; CHECK-ELF: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
-; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+  call void asm sideeffect "add x0, x0, ${0:L}", "Si,~{x0}"(i32 64)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "Si,~{x0}"(i32 64)
+; CHECK: add x0, x0, #64
+; CHECK: ldr x0, [x0, #64]
 
   ret void
 }
 
 define void @test_inline_modifier_G() nounwind {
-; CHECK: test_inline_modifier_G:
+; CHECK-LABEL: test_inline_modifier_G:
   call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld)
   call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle)
 ; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
 ; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
 
-; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
-; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
-
+  call void asm sideeffect "add x0, x0, ${0:G}", "Si,~{x0}"(i32 42)
+; CHECK: add x0, x0, #42
   ret void
 }
 
 define void @test_inline_modifier_A() nounwind {
-; CHECK: test_inline_modifier_A:
+; CHECK-LABEL: test_inline_modifier_A:
   call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple)
   call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got)
   call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd)
@@ -58,16 +54,14 @@ define void @test_inline_modifier_A() nounwind {
 ; CHECK: adrp x0, :tlsdesc:var_tlsgd
 ; CHECK: adrp x0, :gottprel:var_tlsie
 
-; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 var_simple
-; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var_got
-; CHECK-ELF: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
-; CHECK-ELF: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+  call void asm sideeffect "adrp x0, ${0:A}", "Si,~{x0}"(i32 40)
+; CHECK: adrp x0, #40
 
   ret void
 }
 
 define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
-; CHECK: test_inline_modifier_wx:
+; CHECK-LABEL: test_inline_modifier_wx:
   call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small)
   call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small)
   call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small)
@@ -87,11 +81,17 @@ define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
   call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0)
 ; CHECK: add {{w[0-9]+}}, wzr, wzr
 ; CHECK: add {{x[0-9]+}}, xzr, xzr
+
+  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${1:w}", "=r,Ir,0"(i32 123, i32 %small)
+  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${1:x}", "=r,Ir,0"(i32 456, i64 %big)
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #123
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #456
+
   ret void
 }
 
 define void @test_inline_modifier_bhsdq() nounwind {
-; CHECK: test_inline_modifier_bhsdq:
+; CHECK-LABEL: test_inline_modifier_bhsdq:
   call float asm sideeffect "ldr ${0:b}, [sp]", "=w"()
   call float asm sideeffect "ldr ${0:h}, [sp]", "=w"()
   call float asm sideeffect "ldr ${0:s}, [sp]", "=w"()
@@ -113,13 +113,35 @@ define void @test_inline_modifier_bhsdq() nounwind {
 ; CHECK: ldr s0, [sp]
 ; CHECK: ldr d0, [sp]
 ; CHECK: ldr q0, [sp]
+
+  call void asm sideeffect "fcmp b0, ${0:b}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp h0, ${0:h}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp s0, ${0:s}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp d0, ${0:d}", "Yw"(float 0.0)
+  call void asm sideeffect "fcmp q0, ${0:q}", "Yw"(float 0.0)
+; CHECK: fcmp b0, #0
+; CHECK: fcmp h0, #0
+; CHECK: fcmp s0, #0
+; CHECK: fcmp d0, #0
+; CHECK: fcmp q0, #0
+
   ret void
 }
 
 define void @test_inline_modifier_c() nounwind {
-; CHECK: test_inline_modifier_c:
+; CHECK-LABEL: test_inline_modifier_c:
   call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3)
 ; CHECK: adr x0, 3
 
   ret void
-}
-\ No newline at end of file
+}
+
+define void @test_inline_modifier_a() nounwind {
+; CHECK-LABEL: test_inline_modifier_a:
+  call void asm sideeffect "prfm pldl1keep, ${0:a}", "r"(i32* @var_simple)
+; CHECK: adrp [[VARHI:x[0-9]+]], var_simple
+; CHECK: add x[[VARADDR:[0-9]+]], [[VARHI]], #:lo12:var_simple
+; CHECK: prfm pldl1keep, [x[[VARADDR]]]
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index 3c7f5f9ec1b0..4bb094217af3 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
@@ -48,19 +47,3 @@ lbl4:
 ; CHECK-NEXT: .xword
 ; CHECK-NEXT: .xword
 ; CHECK-NEXT: .xword
-
-; ELF tests:
-
-; First make sure we get a page/lo12 pair in .text to pick up the jump-table
-
-; CHECK-ELF:      Relocations [
-; CHECK-ELF:        Section ({{[0-9]+}}) .text {
-; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 .rodata
-; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC .rodata
-; CHECK-ELF:        }
-
-; Also check the targets in .rodata are relocated
-; CHECK-ELF:        Section ({{[0-9]+}}) .rodata {
-; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ABS64 .text
-; CHECK-ELF:        }
-; CHECK-ELF:      ]
diff --git a/test/CodeGen/AArch64/large-consts.ll b/test/CodeGen/AArch64/large-consts.ll
new file mode 100644
index 000000000000..1b769c6e350d
--- /dev/null
+++ b/test/CodeGen/AArch64/large-consts.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s
+
+; Make sure the shift amount is encoded into the instructions by LLVM because
+; it's not the linker's job to put it there.
+
+define double @foo() {
+; CHECK: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0   // encoding: [A,A,0xe0'A',0xd2'A']
+; CHECK: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [A,A,0xc0'A',0xf2'A']
+; CHECK: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [A,A,0xa0'A',0xf2'A']
+; CHECK: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [A,A,0x80'A',0xf2'A']
+
+  ret double 3.14159
+}
diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll
index 2b2e1295c4f6..fde3036aef4a 100644
--- a/test/CodeGen/AArch64/large-frame.ll
+++ b/test/CodeGen/AArch64/large-frame.ll
@@ -4,17 +4,21 @@ declare void @use_addr(i8*)
 @addr = global i8* null
 
 define void @test_bigframe() {
-; CHECK: test_bigframe:
+; CHECK-LABEL: test_bigframe:
+; CHECK: .cfi_startproc
 
   %var1 = alloca i8, i32 20000000
   %var2 = alloca i8, i32 16
   %var3 = alloca i8, i32 20000000
 ; CHECK: sub sp, sp, #496
+; CHECK: .cfi_def_cfa sp, 496
 ; CHECK: str x30, [sp, #488]
   ; Total adjust is 39999536
 ; CHECK: movz [[SUBCONST:x[0-9]+]], #22576
 ; CHECK: movk [[SUBCONST]], #610, lsl #16
 ; CHECK: sub sp, sp, [[SUBCONST]]
+; CHECK: .cfi_def_cfa sp, 40000032
+; CHECK: .cfi_offset x30, -8
 
   ; Total offset is 20000024
 ; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544
@@ -41,11 +45,12 @@ define void @test_bigframe() {
 ; CHECK: movz [[ADDCONST:x[0-9]+]], #22576
 ; CHECK: movk [[ADDCONST]], #610, lsl #16
 ; CHECK: add sp, sp, [[ADDCONST]]
+; CHECK: .cfi_endproc
   ret void
 }
 
 define void @test_mediumframe() {
-; CHECK: test_mediumframe:
+; CHECK-LABEL: test_mediumframe:
   %var1 = alloca i8, i32 1000000
   %var2 = alloca i8, i32 16
   %var3 = alloca i8, i32 1000000
@@ -88,7 +93,7 @@ define void @test_mediumframe() {
 ; If temporary registers are allocated for adjustment, they should *not* clobber
 ; argument registers.
 define void @test_tempallocation([8 x i64] %val) nounwind {
-; CHECK: test_tempallocation:
+; CHECK-LABEL: test_tempallocation:
   %var = alloca i8, i32 1000000
 ; CHECK: sub sp, sp,
 
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
index 45935129fd7e..db30fd915fb0 100644
--- a/test/CodeGen/AArch64/ldst-regoffset.ll
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
@@ -9,7 +10,7 @@
 @var_double = global double 0.0
 
 define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
-; CHECK: ldst_8bit:
+; CHECK-LABEL: ldst_8bit:
 
    %addr8_sxtw = getelementptr i8* %base, i32 %off32
    %val8_sxtw = load volatile i8* %addr8_sxtw
@@ -37,7 +38,7 @@ define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
 
 
 define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
-; CHECK: ldst_16bit:
+; CHECK-LABEL: ldst_16bit:
 
    %addr8_sxtwN = getelementptr i16* %base, i32 %off32
    %val8_sxtwN = load volatile i16* %addr8_sxtwN
@@ -91,7 +92,7 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
 }
 
 define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
-; CHECK: ldst_32bit:
+; CHECK-LABEL: ldst_32bit:
 
    %addr_sxtwN = getelementptr i32* %base, i32 %off32
    %val_sxtwN = load volatile i32* %addr_sxtwN
@@ -143,7 +144,7 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
 }
 
 define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
-; CHECK: ldst_64bit:
+; CHECK-LABEL: ldst_64bit:
 
    %addr_sxtwN = getelementptr i64* %base, i32 %off32
    %val_sxtwN = load volatile i64* %addr_sxtwN
@@ -191,17 +192,19 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
 }
 
 define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
-; CHECK: ldst_float:
+; CHECK-LABEL: ldst_float:
 
    %addr_sxtwN = getelementptr float* %base, i32 %off32
    %val_sxtwN = load volatile float* %addr_sxtwN
    store volatile float %val_sxtwN, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %addr_lslN = getelementptr float* %base, i64 %off64
   %val_lslN = load volatile float* %addr_lslN
   store volatile float %val_lslN, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %addrint_uxtw = ptrtoint float* %base to i64
   %offset_uxtw = zext i32 %off32 to i64
@@ -210,6 +213,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val_uxtw = load volatile float* %addr_uxtw
   store volatile float %val_uxtw, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_sxtw = ptrtoint float* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -218,6 +222,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val64_sxtw = load volatile float* %addr_sxtw
   store volatile float %val64_sxtw, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_lsl = ptrtoint float* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -225,6 +230,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val64_lsl = load volatile float* %addr_lsl
   store volatile float %val64_lsl, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_uxtwN = ptrtoint float* %base to i64
   %offset_uxtwN = zext i32 %off32 to i64
@@ -234,21 +240,24 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %val64 = load volatile float* @var_float
   store volatile float %val64, float* %addr_uxtwN
 ; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
    ret void
 }
 
 define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
-; CHECK: ldst_double:
+; CHECK-LABEL: ldst_double:
 
    %addr_sxtwN = getelementptr double* %base, i32 %off32
    %val_sxtwN = load volatile double* %addr_sxtwN
    store volatile double %val_sxtwN, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %addr_lslN = getelementptr double* %base, i64 %off64
   %val_lslN = load volatile double* %addr_lslN
   store volatile double %val_lslN, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %addrint_uxtw = ptrtoint double* %base to i64
   %offset_uxtw = zext i32 %off32 to i64
@@ -257,6 +266,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val_uxtw = load volatile double* %addr_uxtw
   store volatile double %val_uxtw, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_sxtw = ptrtoint double* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -265,6 +275,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val64_sxtw = load volatile double* %addr_sxtw
   store volatile double %val64_sxtw, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_lsl = ptrtoint double* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -272,6 +283,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val64_lsl = load volatile double* %addr_lsl
   store volatile double %val64_lsl, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_uxtwN = ptrtoint double* %base to i64
   %offset_uxtwN = zext i32 %off32 to i64
@@ -281,22 +293,25 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %val64 = load volatile double* @var_double
   store volatile double %val64, double* %addr_uxtwN
 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
    ret void
 }
 
 
 define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
-; CHECK: ldst_128bit:
+; CHECK-LABEL: ldst_128bit:
 
    %addr_sxtwN = getelementptr fp128* %base, i32 %off32
    %val_sxtwN = load volatile fp128* %addr_sxtwN
    store volatile fp128 %val_sxtwN, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %addr_lslN = getelementptr fp128* %base, i64 %off64
   %val_lslN = load volatile fp128* %addr_lslN
   store volatile fp128 %val_lslN, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %addrint_uxtw = ptrtoint fp128* %base to i64
   %offset_uxtw = zext i32 %off32 to i64
@@ -305,6 +320,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val_uxtw = load volatile fp128* %addr_uxtw
   store volatile fp128 %val_uxtw, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %base_sxtw = ptrtoint fp128* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -313,6 +329,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val64_sxtw = load volatile fp128* %addr_sxtw
   store volatile fp128 %val64_sxtw, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %base_lsl = ptrtoint fp128* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -320,6 +337,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val64_lsl = load volatile fp128* %addr_lsl
   store volatile fp128 %val64_lsl, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
 
   %base_uxtwN = ptrtoint fp128* %base to i64
   %offset_uxtwN = zext i32 %off32 to i64
@@ -329,5 +347,6 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val64 = load volatile fp128* %base
   store volatile fp128 %val64, fp128* %addr_uxtwN
 ; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
    ret void
 }
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
index 78a3c83c3dd8..bea5bb5d6dd6 100644
--- a/test/CodeGen/AArch64/ldst-unscaledimm.ll
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
@@ -11,7 +12,7 @@
 @varptr = global i8* null
 
 define void @ldst_8bit() {
-; CHECK: ldst_8bit:
+; CHECK-LABEL: ldst_8bit:
 
 ; No architectural support for loads to 16-bit or 8-bit since we
 ; promote i8 during lowering.
@@ -72,7 +73,7 @@ define void @ldst_8bit() {
 }
 
 define void @ldst_16bit() {
-; CHECK: ldst_16bit:
+; CHECK-LABEL: ldst_16bit:
 
 ; No architectural support for loads to 16-bit or 16-bit since we
 ; promote i16 during lowering.
@@ -140,7 +141,7 @@ define void @ldst_16bit() {
 }
 
 define void @ldst_32bit() {
-; CHECK: ldst_32bit:
+; CHECK-LABEL: ldst_32bit:
 
   %addr_8bit = load i8** @varptr
 
@@ -186,7 +187,7 @@ define void @ldst_32bit() {
 }
 
 define void @ldst_float() {
-; CHECK: ldst_float:
+; CHECK-LABEL: ldst_float:
 
   %addr_8bit = load i8** @varptr
   %addrfp_8 = getelementptr i8* %addr_8bit, i64 -5
@@ -194,15 +195,17 @@ define void @ldst_float() {
 
   %valfp = load volatile float* %addrfp
 ; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+; CHECK-NOFP-NOT: ldur {{s[0-9]+}},
 
   store volatile float %valfp, float* %addrfp
 ; CHECK: stur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+; CHECK-NOFP-NOT: stur {{s[0-9]+}},
 
   ret void
 }
 
 define void @ldst_double() {
-; CHECK: ldst_double:
+; CHECK-LABEL: ldst_double:
 
   %addr_8bit = load i8** @varptr
   %addrfp_8 = getelementptr i8* %addr_8bit, i64 4
@@ -210,9 +213,11 @@ define void @ldst_double() {
 
   %valfp = load volatile double* %addrfp
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+; CHECK-NOFP-NOT: ldur {{d[0-9]+}},
 
   store volatile double %valfp, double* %addrfp
 ; CHECK: stur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+; CHECK-NOFP-NOT: stur {{d[0-9]+}},
 
    ret void
 }
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
index 1e7540d9be0a..44c1586e1ec7 100644
--- a/test/CodeGen/AArch64/ldst-unsignedimm.ll
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
 @var_16bit = global i16 0
@@ -9,7 +10,7 @@
 @var_double = global double 0.0
 
 define void @ldst_8bit() {
-; CHECK: ldst_8bit:
+; CHECK-LABEL: ldst_8bit:
 
 ; No architectural support for loads to 16-bit or 8-bit since we
 ; promote i8 during lowering.
@@ -63,7 +64,7 @@ define void @ldst_8bit() {
 }
 
 define void @ldst_16bit() {
-; CHECK: ldst_16bit:
+; CHECK-LABEL: ldst_16bit:
 
 ; No architectural support for load volatiles to 16-bit promote i16 during
 ; lowering.
@@ -117,7 +118,7 @@ define void @ldst_16bit() {
 }
 
 define void @ldst_32bit() {
-; CHECK: ldst_32bit:
+; CHECK-LABEL: ldst_32bit:
 
 ; Straight 32-bit load/store
   %val32_noext = load volatile i32* @var_32bit
@@ -225,27 +226,31 @@ define void @ldst_complex_offsets() {
 }
 
 define void @ldst_float() {
-; CHECK: ldst_float:
+; CHECK-LABEL: ldst_float:
 
    %valfp = load volatile float* @var_float
 ; CHECK: adrp {{x[0-9]+}}, var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   store volatile float %valfp, float* @var_float
 ; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+; CHECK-NOFP-NOT: str {{s[0-9]+}},
 
    ret void
 }
 
 define void @ldst_double() {
-; CHECK: ldst_double:
+; CHECK-LABEL: ldst_double:
 
    %valfp = load volatile double* @var_double
 ; CHECK: adrp {{x[0-9]+}}, var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   store volatile double %valfp, double* @var_double
 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+; CHECK-NOFP-NOT: str {{d[0-9]+}},
 
    ret void
 }
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index c5ce2411ed48..9a66a00189ea 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
index 9cfa8c5426e4..fc33aee10d84 100644
--- a/test/CodeGen/AArch64/literal_pools.ll
+++ b/test/CodeGen/AArch64/literal_pools.ll
@@ -1,11 +1,13 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
 
 @var32 = global i32 0
 @var64 = global i64 0
 
 define void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
     %val32 = load i32* @var32
     %val64 = load i64* @var64
 
@@ -60,13 +62,13 @@ define void @foo() {
 @vardouble = global double 0.0
 
 define void @floating_lits() {
-; CHECK: floating_lits:
+; CHECK-LABEL: floating_lits:
 
   %floatval = load float* @varfloat
   %newfloat = fadd float %floatval, 128.0
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
-; CHECK: ldr {{s[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-; CHECK: fadd
+; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
 ; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
@@ -74,20 +76,26 @@ define void @floating_lits() {
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
 ; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]]
 ; CHECK-LARGE: fadd
+; CHECK-NOFP-LARGE-NOT: ldr {{s[0-9]+}},
+; CHECK-NOFP-LARGE-NOT: fadd
 
   store float %newfloat, float* @varfloat
 
   %doubleval = load double* @vardouble
   %newdouble = fadd double %doubleval, 129.0
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
-; CHECK: ldr {{d[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-; CHECK: fadd
+; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, [[LIT128]]
+; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, [[LIT129]]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
+; CHECK-NOFP-NOT: fadd
 
 ; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
 ; CHECK-LARGE: ldr {{d[0-9]+}}, [x[[LITADDR]]]
+; CHECK-NOFP-LARGE-NOT: ldr {{d[0-9]+}},
 
   store double %newdouble, double* @vardouble
 
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
index 5cbf5a37ec54..b5cef859e35f 100644
--- a/test/CodeGen/AArch64/local_vars.ll
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -24,7 +24,7 @@ define void @trivial_func() nounwind {
 }
 
 define void @trivial_fp_func() {
-; CHECK-WITHFP: trivial_fp_func:
+; CHECK-WITHFP-LABEL: trivial_fp_func:
 
 ; CHECK-WITHFP: sub sp, sp, #16
 ; CHECK-WITHFP: stp x29, x30, [sp]
@@ -43,7 +43,7 @@ define void @trivial_fp_func() {
 
 define void @stack_local() {
   %local_var = alloca i64
-; CHECK: stack_local:
+; CHECK-LABEL: stack_local:
 ; CHECK: sub sp, sp, #16
 
   %val = load i64* @var
diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll
index 5f3f4da0cdad..e04bb510ebf2 100644
--- a/test/CodeGen/AArch64/logical-imm.ll
+++ b/test/CodeGen/AArch64/logical-imm.ll
@@ -4,7 +4,7 @@
 @var64 = global i64 0
 
 define void @test_and(i32 %in32, i64 %in64) {
-; CHECK: test_and:
+; CHECK-LABEL: test_and:
 
   %val0 = and i32 %in32, 2863311530
   store volatile i32 %val0, i32* @var32
@@ -26,7 +26,7 @@ define void @test_and(i32 %in32, i64 %in64) {
 }
 
 define void @test_orr(i32 %in32, i64 %in64) {
-; CHECK: test_orr:
+; CHECK-LABEL: test_orr:
 
   %val0 = or i32 %in32, 2863311530
   store volatile i32 %val0, i32* @var32
@@ -48,7 +48,7 @@ define void @test_orr(i32 %in32, i64 %in64) {
 }
 
 define void @test_eor(i32 %in32, i64 %in64) {
-; CHECK: test_eor:
+; CHECK-LABEL: test_eor:
 
   %val0 = xor i32 %in32, 2863311530
   store volatile i32 %val0, i32* @var32
@@ -70,7 +70,7 @@ define void @test_eor(i32 %in32, i64 %in64) {
 }
 
 define void @test_mov(i32 %in32, i64 %in64) {
-; CHECK: test_mov:
+; CHECK-LABEL: test_mov:
   %val0 = add i32 %in32, 2863311530
   store i32 %val0, i32* @var32
 ; CHECK: orr {{w[0-9]+}}, wzr, #0xaaaaaaaa
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
index bbbfcc1b9118..a08ba20c7f11 100644
--- a/test/CodeGen/AArch64/logical_shifted_reg.ll
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -7,7 +7,7 @@
 @var2_64 = global i64 0
 
 define void @logical_32bit() {
-; CHECK: logical_32bit:
+; CHECK-LABEL: logical_32bit:
   %val1 = load i32* @var1_32
   %val2 = load i32* @var2_32
 
@@ -97,7 +97,7 @@ define void @logical_32bit() {
 }
 
 define void @logical_64bit() {
-; CHECK: logical_64bit:
+; CHECK-LABEL: logical_64bit:
   %val1 = load i64* @var1_64
   %val2 = load i64* @var2_64
 
@@ -190,7 +190,7 @@ define void @logical_64bit() {
 }
 
 define void @flag_setting() {
-; CHECK: flag_setting:
+; CHECK-LABEL: flag_setting:
   %val1 = load i64* @var1_64
   %val2 = load i64* @var2_64
 
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.s b/test/CodeGen/AArch64/logical_shifted_reg.s
deleted file mode 100644
index 89aea580119b..000000000000
--- a/test/CodeGen/AArch64/logical_shifted_reg.s
+++ /dev/null
@@ -1,208 +0,0 @@
-	.file	"/home/timnor01/a64-trunk/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll"
-	.text
-	.globl	logical_32bit
-	.type	logical_32bit,@function
-logical_32bit:                          // @logical_32bit
-	.cfi_startproc
-// BB#0:
-	adrp	x0, var1_32
-	ldr	w1, [x0, #:lo12:var1_32]
-	adrp	x0, var2_32
-	ldr	w2, [x0, #:lo12:var2_32]
-	and	w3, w1, w2
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	bic	w3, w1, w2
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	orr	w3, w1, w2
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	orn	w3, w1, w2
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	eor	w3, w1, w2
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	eon	w3, w2, w1
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	and	w3, w1, w2, lsl #31
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	bic	w3, w1, w2, lsl #31
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	orr	w3, w1, w2, lsl #31
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	orn	w3, w1, w2, lsl #31
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	eor	w3, w1, w2, lsl #31
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	eon	w3, w1, w2, lsl #31
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	bic	w3, w1, w2, asr #10
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	eor	w3, w1, w2, asr #10
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	orn	w3, w1, w2, lsr #1
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	eor	w3, w1, w2, lsr #1
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	eon	w3, w1, w2, ror #20
-	adrp	x0, var1_32
-	str	w3, [x0, #:lo12:var1_32]
-	and	w1, w1, w2, ror #20
-	adrp	x0, var1_32
-	str	w1, [x0, #:lo12:var1_32]
-	ret
-.Ltmp0:
-	.size	logical_32bit, .Ltmp0-logical_32bit
-	.cfi_endproc
-
-	.globl	logical_64bit
-	.type	logical_64bit,@function
-logical_64bit:                          // @logical_64bit
-	.cfi_startproc
-// BB#0:
-	adrp	x0, var1_64
-	ldr	x0, [x0, #:lo12:var1_64]
-	adrp	x1, var2_64
-	ldr	x1, [x1, #:lo12:var2_64]
-	and	x2, x0, x1
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	bic	x2, x0, x1
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	orr	x2, x0, x1
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	orn	x2, x0, x1
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	eor	x2, x0, x1
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	eon	x2, x1, x0
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	and	x2, x0, x1, lsl #63
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	bic	x2, x0, x1, lsl #63
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	orr	x2, x0, x1, lsl #63
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	orn	x2, x0, x1, lsl #63
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	eor	x2, x0, x1, lsl #63
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	eon	x2, x0, x1, lsl #63
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	bic	x2, x0, x1, asr #10
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	eor	x2, x0, x1, asr #10
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	orn	x2, x0, x1, lsr #1
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	eor	x2, x0, x1, lsr #1
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	eon	x2, x0, x1, ror #20
-	adrp	x3, var1_64
-	str	x2, [x3, #:lo12:var1_64]
-	and	x0, x0, x1, ror #20
-	adrp	x1, var1_64
-	str	x0, [x1, #:lo12:var1_64]
-	ret
-.Ltmp1:
-	.size	logical_64bit, .Ltmp1-logical_64bit
-	.cfi_endproc
-
-	.globl	flag_setting
-	.type	flag_setting,@function
-flag_setting:                           // @flag_setting
-	.cfi_startproc
-// BB#0:
-	sub	sp, sp, #16
-	adrp	x0, var1_64
-	ldr	x0, [x0, #:lo12:var1_64]
-	adrp	x1, var2_64
-	ldr	x1, [x1, #:lo12:var2_64]
-	tst	x0, x1
-	str	x0, [sp, #8]            // 8-byte Folded Spill
-	str	x1, [sp]                // 8-byte Folded Spill
-	b.gt .LBB2_4
-	b	.LBB2_1
-.LBB2_1:                                // %test2
-	ldr	x0, [sp, #8]            // 8-byte Folded Reload
-	ldr	x1, [sp]                // 8-byte Folded Reload
-	tst	x0, x1, lsl #63
-	b.lt .LBB2_4
-	b	.LBB2_2
-.LBB2_2:                                // %test3
-	ldr	x0, [sp, #8]            // 8-byte Folded Reload
-	ldr	x1, [sp]                // 8-byte Folded Reload
-	tst	x0, x1, asr #12
-	b.gt .LBB2_4
-	b	.LBB2_3
-.LBB2_3:                                // %other_exit
-	adrp	x0, var1_64
-	ldr	x1, [sp, #8]            // 8-byte Folded Reload
-	str	x1, [x0, #:lo12:var1_64]
-	add	sp, sp, #16
-	ret
-.LBB2_4:                                // %ret
-	add	sp, sp, #16
-	ret
-.Ltmp2:
-	.size	flag_setting, .Ltmp2-flag_setting
-	.cfi_endproc
-
-	.type	var1_32,@object         // @var1_32
-	.bss
-	.globl	var1_32
-	.align	2
-var1_32:
-	.word	0                       // 0x0
-	.size	var1_32, 4
-
-	.type	var2_32,@object         // @var2_32
-	.globl	var2_32
-	.align	2
-var2_32:
-	.word	0                       // 0x0
-	.size	var2_32, 4
-
-	.type	var1_64,@object         // @var1_64
-	.globl	var1_64
-	.align	3
-var1_64:
-	.xword	0                       // 0x0
-	.size	var1_64, 8
-
-	.type	var2_64,@object         // @var2_64
-	.globl	var2_64
-	.align	3
-var2_64:
-	.xword	0                       // 0x0
-	.size	var2_64, 8
-
-
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
index b8a5fb932202..38e37db7b58c 100644
--- a/test/CodeGen/AArch64/movw-consts.ll
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -1,50 +1,50 @@
 ; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 define i64 @test0() {
-; CHECK: test0:
+; CHECK-LABEL: test0:
 ; Not produced by move wide instructions, but good to make sure we can return 0 anyway:
 ; CHECK: mov x0, xzr
   ret i64 0
 }
 
 define i64 @test1() {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: movz x0, #1
   ret i64 1
 }
 
 define i64 @test2() {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movz x0, #65535
   ret i64 65535
 }
 
 define i64 @test3() {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: movz x0, #1, lsl #16
   ret i64 65536
 }
 
 define i64 @test4() {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: movz x0, #65535, lsl #16
   ret i64 4294901760
 }
 
 define i64 @test5() {
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: movz x0, #1, lsl #32
   ret i64 4294967296
 }
 
 define i64 @test6() {
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: movz x0, #65535, lsl #32
   ret i64 281470681743360
 }
 
 define i64 @test7() {
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: movz x0, #1, lsl #48
   ret i64 281474976710656
 }
@@ -52,19 +52,19 @@ define i64 @test7() {
 ; A 32-bit MOVN can generate some 64-bit patterns that a 64-bit one
 ; couldn't. Useful even for i64
 define i64 @test8() {
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: movn w0, #60875
   ret i64 4294906420
 }
 
 define i64 @test9() {
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: movn x0, #0
   ret i64 -1
 }
 
 define i64 @test10() {
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: movn x0, #60875, lsl #16
   ret i64 18446744069720047615
 }
@@ -74,49 +74,49 @@ define i64 @test10() {
 @var32 = global i32 0
 
 define void @test11() {
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK: mov {{w[0-9]+}}, wzr
   store i32 0, i32* @var32
   ret void
 }
 
 define void @test12() {
-; CHECK: test12:
+; CHECK-LABEL: test12:
 ; CHECK: movz {{w[0-9]+}}, #1
   store i32 1, i32* @var32
   ret void
 }
 
 define void @test13() {
-; CHECK: test13:
+; CHECK-LABEL: test13:
 ; CHECK: movz {{w[0-9]+}}, #65535
   store i32 65535, i32* @var32
   ret void
 }
 
 define void @test14() {
-; CHECK: test14:
+; CHECK-LABEL: test14:
 ; CHECK: movz {{w[0-9]+}}, #1, lsl #16
   store i32 65536, i32* @var32
   ret void
 }
 
 define void @test15() {
-; CHECK: test15:
+; CHECK-LABEL: test15:
 ; CHECK: movz {{w[0-9]+}}, #65535, lsl #16
   store i32 4294901760, i32* @var32
   ret void
 }
 
 define void @test16() {
-; CHECK: test16:
+; CHECK-LABEL: test16:
 ; CHECK: movn {{w[0-9]+}}, #0
   store i32 -1, i32* @var32
   ret void
 }
 
 define i64 @test17() {
-; CHECK: test17:
+; CHECK-LABEL: test17:
 
   ; Mustn't MOVN w0 here.
 ; CHECK: movn x0, #2
diff --git a/test/CodeGen/AArch64/movw-shift-encoding.ll b/test/CodeGen/AArch64/movw-shift-encoding.ll
new file mode 100644
index 000000000000..ec133bd706b1
--- /dev/null
+++ b/test/CodeGen/AArch64/movw-shift-encoding.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s
+
+@var = global i32 0
+
+; CodeGen should ensure that the correct shift bits are set, because the linker
+; isn't going to!
+
+define i32* @get_var() {
+  ret i32* @var
+; CHECK: movz    x0, #:abs_g3:var        // encoding: [A,A,0xe0'A',0xd2'A']
+; CHECK: movk    x0, #:abs_g2_nc:var     // encoding: [A,A,0xc0'A',0xf2'A']
+; CHECK: movk    x0, #:abs_g1_nc:var     // encoding: [A,A,0xa0'A',0xf2'A']
+; CHECK: movk    x0, #:abs_g0_nc:var     // encoding: [A,A,0x80'A',0xf2'A']
+}
diff --git a/test/CodeGen/AArch64/neon-2velem-high.ll b/test/CodeGen/AArch64/neon-2velem-high.ll
new file mode 100644
index 000000000000..97031d98b7c0
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-2velem-high.ll
@@ -0,0 +1,331 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
+
+define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
+; CHECK: test_vmull_high_n_s16:
+; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vmull15.i.i
+}
+
+define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
+; CHECK: test_vmull_high_n_s32:
+; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vmull9.i.i
+}
+
+define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
+; CHECK: test_vmull_high_n_u16:
+; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vmull15.i.i
+}
+
+define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
+; CHECK: test_vmull_high_n_u32:
+; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vmull9.i.i
+}
+
+define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
+; CHECK: test_vqdmull_high_n_s16:
+; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vqdmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vqdmull15.i.i
+}
+
+define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
+; CHECK: test_vqdmull_high_n_s32:
+; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vqdmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vqdmull9.i.i
+}
+
+define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlal_high_n_s16:
+; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlal_high_n_s32:
+; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlal_high_n_u16:
+; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlal_high_n_u32:
+; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vqdmlal_high_n_s16:
+; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
+  ret <4 x i32> %vqdmlal17.i.i
+}
+
+define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vqdmlal_high_n_s32:
+; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
+  ret <2 x i64> %vqdmlal11.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlsl_high_n_s16:
+; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlsl_high_n_s32:
+; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vmlsl_high_n_u16:
+; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vmlsl_high_n_u32:
+; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK: test_vqdmlsl_high_n_s16:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
+  ret <4 x i32> %vqdmlsl17.i.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK: test_vqdmlsl_high_n_s32:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
+  ret <2 x i64> %vqdmlsl11.i.i
+}
+
+define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
+; CHECK: test_vmul_n_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
+  %mul.i = fmul <2 x float> %vecinit1.i, %a
+  ret <2 x float> %mul.i
+}
+
+define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
+; CHECK: test_vmulq_n_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
+  %mul.i = fmul <4 x float> %vecinit3.i, %a
+  ret <4 x float> %mul.i
+}
+
+define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
+; CHECK: test_vmulq_n_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
+  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
+  %mul.i = fmul <2 x double> %vecinit1.i, %a
+  ret <2 x double> %mul.i
+}
+
+define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
+; CHECK: test_vfma_n_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
+; CHECK: test_vfmaq_n_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
+; CHECK: test_vfms_n_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
+  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
+  %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
+  ret <2 x float> %1
+}
+
+define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
+; CHECK: test_vfmsq_n_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
+  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
+  %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
+  ret <4 x float> %1
+}
diff --git a/test/CodeGen/AArch64/neon-2velem.ll b/test/CodeGen/AArch64/neon-2velem.ll
new file mode 100644
index 000000000000..9d6184243713
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-2velem.ll
@@ -0,0 +1,2550 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
+
+declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
+
+declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
+
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmla_lane_s16:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlaq_lane_s16:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmla_lane_s32:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlaq_lane_s32:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmla_laneq_s16:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlaq_laneq_s16:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmla_laneq_s32:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlaq_laneq_s32:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmls_lane_s16:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsq_lane_s16:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmls_lane_s32:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsq_lane_s32:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmls_laneq_s16:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsq_laneq_s16:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmls_laneq_s32:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsq_laneq_s32:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_s16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_s16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_s32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_s32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_u16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_u16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_u32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_u32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_s16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_s16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_s32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_s32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_u16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_u16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_u32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_u32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfma_lane_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmaq_lane_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfma_laneq_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmaq_laneq_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfms_lane_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmsq_lane_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfms_laneq_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmsq_laneq_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
+; CHECK: test_vfmaq_lane_f64:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmaq_laneq_f64:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
+; CHECK: test_vfmsq_lane_f64:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %sub = fsub <1 x double> <double -0.000000e+00>, %v
+  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmsq_laneq_f64:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
+  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_s16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_s32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_s16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_s32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_s16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_s32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_s16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_s32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_s16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_s32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_s16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_s32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_s16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_s32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_s16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_s32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_u16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_u32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_u16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_u32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_u16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_u32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_u16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_u32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_u16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_u32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_u16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_u32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_u16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_u32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_u16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_u32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_s16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_s32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_u16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_u32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_s16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_s32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_u16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_u32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_s16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_s32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_u16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_u32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_s16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_s32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_u16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_u32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_lane_s16:
+; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_lane_s32:
+; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_high_lane_s16:
+; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_high_lane_s32:
+; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_lane_s16:
+; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_lane_s32:
+; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_high_lane_s16:
+; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_high_lane_s32:
+; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_lane_s16:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_lane_s32:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_laneq_s16:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_laneq_s32:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_high_lane_s16:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_high_lane_s32:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_high_laneq_s16:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_high_laneq_s32:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulh_lane_s16:
+; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqdmulh2.i
+}
+
+define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulhq_lane_s16:
+; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqdmulh2.i
+}
+
+define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulh_lane_s32:
+; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqdmulh2.i
+}
+
+define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulhq_lane_s32:
+; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqdmulh2.i
+}
+
+define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulh_lane_s16:
+; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmulh2.i
+}
+
+define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulhq_lane_s16:
+; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmulh2.i
+}
+
+define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulh_lane_s32:
+; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmulh2.i
+}
+
+define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulhq_lane_s32:
+; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmulh2.i
+}
+
+define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmul_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
+; CHECK: test_vmul_lane_f64:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <1 x double> %v, i32 0
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulq_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
+; CHECK: test_vmulq_lane_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmul_laneq_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
+; CHECK: test_vmul_laneq_f64:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <2 x double> %v, i32 1
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulq_laneq_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulq_laneq_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulx_lane_f32:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulxq_lane_f32:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
+; CHECK: test_vmulxq_lane_f64:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulx_laneq_f32:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulxq_laneq_f32:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulxq_laneq_f64:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmla_lane_s16_0:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlaq_lane_s16_0:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmla_lane_s32_0:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlaq_lane_s32_0:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmla_laneq_s16_0:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlaq_laneq_s16_0:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmla_laneq_s32_0:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlaq_laneq_s32_0:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmls_lane_s16_0:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsq_lane_s16_0:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmls_lane_s32_0:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsq_lane_s32_0:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmls_laneq_s16_0:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsq_laneq_s16_0:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmls_laneq_s32_0:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsq_laneq_s32_0:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_s16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_s16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_s32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_s32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmul_lane_u16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmulq_lane_u16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmul_lane_u32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmulq_lane_u32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_s16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_s16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_s32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_s32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmul_laneq_u16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmulq_laneq_u16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmul_laneq_u32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmulq_laneq_u32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfma_lane_f32_0:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmaq_lane_f32_0:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfma_laneq_f32_0:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmaq_laneq_f32_0:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfms_lane_f32_0:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK: test_vfmsq_lane_f32_0:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK: test_vfms_laneq_f32_0:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK: test_vfmsq_laneq_f32_0:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmaq_laneq_f64_0:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK: test_vfmsq_laneq_f64_0:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
+  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_s16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_s32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_s16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_s32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_s16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_s32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_s16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_s32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_s16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_s32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_s16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_s32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_s16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_s32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_s16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_s32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_lane_u16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_lane_u32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_laneq_u16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_laneq_u32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlal_high_lane_u16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlal_high_lane_u32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlal_high_laneq_u16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlal_high_laneq_u32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_lane_u16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_lane_u32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_laneq_u16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_laneq_u32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vmlsl_high_lane_u16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vmlsl_high_lane_u32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK: test_vmlsl_high_laneq_u16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK: test_vmlsl_high_laneq_u32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_s16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_s32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_lane_u16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_lane_u32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_s16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_s32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vmull_high_lane_u16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vmull_high_lane_u32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_s16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_s32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_laneq_u16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_laneq_u32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_s16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_s32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vmull_high_laneq_u16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vmull_high_laneq_u32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_lane_s16_0:
+; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_lane_s32_0:
+; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlal_high_lane_s16_0:
+; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlal_high_lane_s32_0:
+; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_lane_s16_0:
+; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_lane_s32_0:
+; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK: test_vqdmlsl_high_lane_s16_0:
+; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK: test_vqdmlsl_high_lane_s32_0:
+; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_lane_s16_0:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_lane_s32_0:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_laneq_s16_0:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_laneq_s32_0:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmull_high_lane_s16_0:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmull_high_lane_s32_0:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK: test_vqdmull_high_laneq_s16_0:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK: test_vqdmull_high_laneq_s32_0:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulh_lane_s16_0:
+; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqdmulh2.i
+}
+
+define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqdmulhq_lane_s16_0:
+; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqdmulh2.i
+}
+
+define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulh_lane_s32_0:
+; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqdmulh2.i
+}
+
+define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqdmulhq_lane_s32_0:
+; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqdmulh2.i
+}
+
+define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulh_lane_s16_0:
+; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmulh2.i
+}
+
+define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK: test_vqrdmulhq_lane_s16_0:
+; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmulh2.i
+}
+
+define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulh_lane_s32_0:
+; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmulh2.i
+}
+
+define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK: test_vqrdmulhq_lane_s32_0:
+; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmulh2.i
+}
+
+define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmul_lane_f32_0:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulq_lane_f32_0:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmul_laneq_f32_0:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
+; CHECK: test_vmul_laneq_f64_0:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <2 x double> %v, i32 0
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulq_laneq_f32_0:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulq_laneq_f64_0:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulx_lane_f32_0:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
+; CHECK: test_vmulxq_lane_f32_0:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
+; CHECK: test_vmulxq_lane_f64_0:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulx_laneq_f32_0:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
+; CHECK: test_vmulxq_laneq_f32_0:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
+; CHECK: test_vmulxq_laneq_f64_0:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-3vdiff.ll b/test/CodeGen/AArch64/neon-3vdiff.ll
new file mode 100644
index 000000000000..171e2b2edad0
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-3vdiff.ll
@@ -0,0 +1,1806 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vaddl_s8:
+; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vaddl_s16:
+; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vaddl_s32:
+; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vaddl_u8:
+; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vaddl_u16:
+; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vaddl_u32:
+; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vaddl_high_s8:
+; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %1
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddl_high_s16:
+; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %1
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddl_high_s32:
+; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %1
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vaddl_high_u8:
+; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %1
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddl_high_u16:
+; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %1
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddl_high_u32:
+; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %1
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vaddw_s8:
+; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vaddw_s16:
+; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vaddw_s32:
+; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vaddw_u8:
+; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vaddw_u16:
+; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vaddw_u32:
+; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vaddw_high_s8:
+; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vaddw_high_s16:
+; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vaddw_high_s32:
+; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vaddw_high_u8:
+; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vaddw_high_u16:
+; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vaddw_high_u32:
+; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsubl_s8:
+; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsubl_s16:
+; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsubl_s32:
+; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsubl_u8:
+; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsubl_u16:
+; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsubl_u32:
+; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsubl_high_s8:
+; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %sub.i = sub <8 x i16> %0, %1
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubl_high_s16:
+; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %sub.i = sub <4 x i32> %0, %1
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubl_high_s32:
+; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %sub.i = sub <2 x i64> %0, %1
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsubl_high_u8:
+; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %sub.i = sub <8 x i16> %0, %1
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubl_high_u16:
+; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %sub.i = sub <4 x i32> %0, %1
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubl_high_u32:
+; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %sub.i = sub <2 x i64> %0, %1
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vsubw_s8:
+; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %vmovl.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vsubw_s16:
+; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %vmovl.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vsubw_s32:
+; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %vmovl.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK: test_vsubw_u8:
+; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %vmovl.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK: test_vsubw_u16:
+; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %vmovl.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK: test_vsubw_u32:
+; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %vmovl.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vsubw_high_s8:
+; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %0
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vsubw_high_s16:
+; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %0
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vsubw_high_s32:
+; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %0
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK: test_vsubw_high_u8:
+; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %0
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK: test_vsubw_high_u16:
+; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %0
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK: test_vsubw_high_u32:
+; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %0
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_s16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i = add <8 x i16> %a, %b
+  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+  ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_s32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i = add <4 x i32> %a, %b
+  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+  ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_s64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i = add <2 x i64> %a, %b
+  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+  ret <2 x i32> %vaddhn2.i
+}
+
+define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_u16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i = add <8 x i16> %a, %b
+  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+  ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_u32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i = add <4 x i32> %a, %b
+  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+  ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_u64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i = add <2 x i64> %a, %b
+  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+  ret <2 x i32> %vaddhn2.i
+}
+
+define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_high_s16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i.i = add <8 x i16> %a, %b
+  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_high_s32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i.i = add <4 x i32> %a, %b
+  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_high_s64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i.i = add <2 x i64> %a, %b
+  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vaddhn_high_u16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i.i = add <8 x i16> %a, %b
+  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vaddhn_high_u32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i.i = add <4 x i32> %a, %b
+  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vaddhn_high_u64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i.i = add <2 x i64> %a, %b
+  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_s16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_s32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_s64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vraddhn2.i
+}
+
+define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_u16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_u32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_u64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vraddhn2.i
+}
+
+define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_high_s16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_high_s32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_high_s64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vraddhn_high_u16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vraddhn_high_u32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vraddhn_high_u64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_s16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i = sub <8 x i16> %a, %b
+  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+  ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_s32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i = sub <4 x i32> %a, %b
+  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+  ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_s64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i = sub <2 x i64> %a, %b
+  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+  ret <2 x i32> %vsubhn2.i
+}
+
+define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_u16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i = sub <8 x i16> %a, %b
+  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+  ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_u32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i = sub <4 x i32> %a, %b
+  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+  ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_u64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i = sub <2 x i64> %a, %b
+  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+  ret <2 x i32> %vsubhn2.i
+}
+
+define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_high_s16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i.i = sub <8 x i16> %a, %b
+  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_high_s32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i.i = sub <4 x i32> %a, %b
+  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_high_s64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i.i = sub <2 x i64> %a, %b
+  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsubhn_high_u16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i.i = sub <8 x i16> %a, %b
+  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsubhn_high_u32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i.i = sub <4 x i32> %a, %b
+  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsubhn_high_u64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i.i = sub <2 x i64> %a, %b
+  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_s16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_s32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_s64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vrsubhn2.i
+}
+
+define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_u16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_u32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_u64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vrsubhn2.i
+}
+
+define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_high_s16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_high_s32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_high_s64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsubhn_high_u16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsubhn_high_u32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsubhn_high_u64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vabdl_s8:
+; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vabdl_s16:
+; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vabdl_s32:
+; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vabdl_u8:
+; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vabdl_u16:
+; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vabdl_u32:
+; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vabal_s8:
+; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vabal_s16:
+; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vabal_s32:
+; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vabal_u8:
+; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vabal_u16:
+; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vabal_u32:
+; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vabdl_high_s8:
+; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vabdl_high_s16:
+; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vabdl_high_s32:
+; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vabdl_high_u8:
+; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vabdl_high_u16:
+; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vabdl_high_u32:
+; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vabal_high_s8:
+; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vabal_high_s16:
+; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vabal_high_s32:
+; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vabal_high_u8:
+; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vabal_high_u16:
+; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vabal_high_u32:
+; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_s8:
+; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vmull_s16:
+; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vmull_s32:
+; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_u8:
+; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vmull_u16:
+; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vmull_u32:
+; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_s8:
+; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vmull_high_s16:
+; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vmull_high_s32:
+; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_u8:
+; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vmull_high_u16:
+; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vmull_high_u32:
+; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlal_s8:
+; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %add.i = add <8 x i16> %vmull.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlal_s16:
+; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %add.i = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlal_s32:
+; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %add.i = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlal_u8:
+; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %add.i = add <8 x i16> %vmull.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlal_u16:
+; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %add.i = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlal_u32:
+; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %add.i = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlal_high_s8:
+; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlal_high_s16:
+; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlal_high_s32:
+; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlal_high_u8:
+; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlal_high_u16:
+; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlal_high_u32:
+; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlsl_s8:
+; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %sub.i = sub <8 x i16> %a, %vmull.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlsl_s16:
+; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %sub.i = sub <4 x i32> %a, %vmull2.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlsl_s32:
+; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %sub.i = sub <2 x i64> %a, %vmull2.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vmlsl_u8:
+; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %sub.i = sub <8 x i16> %a, %vmull.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vmlsl_u16:
+; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %sub.i = sub <4 x i32> %a, %vmull2.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vmlsl_u32:
+; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %sub.i = sub <2 x i64> %a, %vmull2.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlsl_high_s8:
+; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+  ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlsl_high_s16:
+; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlsl_high_s32:
+; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vmlsl_high_u8:
+; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+  ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vmlsl_high_u16:
+; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vmlsl_high_u32:
+; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vqdmull_s16:
+; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vqdmull_s32:
+; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vqdmlal_s16:
+; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vqdmlal_s32:
+; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK: test_vqdmlsl_s16:
+; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK: test_vqdmlsl_s32:
+; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vqdmull_high_s16:
+; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vqdmull2.i.i
+}
+
+define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vqdmull_high_s32:
+; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vqdmull2.i.i
+}
+
+define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vqdmlal_high_s16:
+; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
+  ret <4 x i32> %vqdmlal4.i.i
+}
+
+define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vqdmlal_high_s32:
+; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
+  ret <2 x i64> %vqdmlal4.i.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK: test_vqdmlsl_high_s16:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
+  ret <4 x i32> %vqdmlsl4.i.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK: test_vqdmlsl_high_s32:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
+  ret <2 x i64> %vqdmlsl4.i.i
+}
+
+define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vmull_p8:
+; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vmull_high_p8:
+; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-aba-abd.ll b/test/CodeGen/AArch64/neon-aba-abd.ll
new file mode 100644
index 000000000000..54009849ef60
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-aba-abd.ll
@@ -0,0 +1,236 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uabd_v8i8:
+  %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uabd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %abd
+}
+
+define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uaba_v8i8:
+  %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %aba = add <8 x i8> %lhs, %abd
+; CHECK: uaba v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %aba
+}
+
+define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_sabd_v8i8:
+  %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sabd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %abd
+}
+
+define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_saba_v8i8:
+  %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %aba = add <8 x i8> %lhs, %abd
+; CHECK: saba v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %aba
+}
+
+declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uabd_v16i8:
+  %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uabd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %abd
+}
+
+define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uaba_v16i8:
+  %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %aba = add <16 x i8> %lhs, %abd
+; CHECK: uaba v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %aba
+}
+
+define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sabd_v16i8:
+  %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sabd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %abd
+}
+
+define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_saba_v16i8:
+  %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %aba = add <16 x i8> %lhs, %abd
+; CHECK: saba v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %aba
+}
+
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uabd_v4i16:
+  %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uabd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %abd
+}
+
+define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uaba_v4i16:
+  %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %aba = add <4 x i16> %lhs, %abd
+; CHECK: uaba v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %aba
+}
+
+define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sabd_v4i16:
+  %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sabd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %abd
+}
+
+define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_saba_v4i16:
+  %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %aba = add <4 x i16> %lhs, %abd
+; CHECK: saba v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %aba
+}
+
+declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uabd_v8i16:
+  %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uabd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %abd
+}
+
+define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uaba_v8i16:
+  %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %aba = add <8 x i16> %lhs, %abd
+; CHECK: uaba v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %aba
+}
+
+define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sabd_v8i16:
+  %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sabd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %abd
+}
+
+define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_saba_v8i16:
+  %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %aba = add <8 x i16> %lhs, %abd
+; CHECK: saba v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %aba
+}
+
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uabd_v2i32:
+  %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uabd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %abd
+}
+
+define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uaba_v2i32:
+  %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %aba = add <2 x i32> %lhs, %abd
+; CHECK: uaba v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %aba
+}
+
+define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sabd_v2i32:
+  %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sabd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %abd
+}
+
+define <2 x i32> @test_sabd_v2i32_const() {
+; CHECK: test_sabd_v2i32_const:
+; CHECK: movi     d1, #0xffffffff0000
+; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
+  %1 = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(
+    <2 x i32> <i32 -2147483648, i32 2147450880>,
+    <2 x i32> <i32 -65536, i32 65535>)
+  ret <2 x i32> %1
+}
+
+define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_saba_v2i32:
+  %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %aba = add <2 x i32> %lhs, %abd
+; CHECK: saba v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %aba
+}
+
+declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uabd_v4i32:
+  %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uabd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %abd
+}
+
+define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uaba_v4i32:
+  %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %aba = add <4 x i32> %lhs, %abd
+; CHECK: uaba v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %aba
+}
+
+define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sabd_v4i32:
+  %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sabd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %abd
+}
+
+define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_saba_v4i32:
+  %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %aba = add <4 x i32> %lhs, %abd
+; CHECK: saba v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %aba
+}
+
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>)
+
+define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fabd_v2f32:
+  %abd = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fabd v0.2s, v0.2s, v1.2s
+  ret <2 x float> %abd
+}
+
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>)
+
+define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fabd_v4f32:
+  %abd = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fabd v0.4s, v0.4s, v1.4s
+  ret <4 x float> %abd
+}
+
+declare <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double>, <2 x double>)
+
+define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fabd_v2f64:
+  %abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fabd v0.2d, v0.2d, v1.2d
+  ret <2 x double> %abd
+}
diff --git a/test/CodeGen/AArch64/neon-across.ll b/test/CodeGen/AArch64/neon-across.ll
new file mode 100644
index 000000000000..733db970cf33
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-across.ll
@@ -0,0 +1,476 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float>)
+
+declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32>)
+
+declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16>)
+
+declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8>)
+
+declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32>)
+
+declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8>)
+
+declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32>)
+
+declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8>)
+
+declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16>)
+
+declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8>)
+
+define i16 @test_vaddlv_s8(<8 x i8> %a) {
+; CHECK: test_vaddlv_s8:
+; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i16> %saddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlv_s16(<4 x i16> %a) {
+; CHECK: test_vaddlv_s16:
+; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i32> %saddlv.i, i32 0
+  ret i32 %0
+}
+
+define i16 @test_vaddlv_u8(<8 x i8> %a) {
+; CHECK: test_vaddlv_u8:
+; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlv_u16(<4 x i16> %a) {
+; CHECK: test_vaddlv_u16:
+; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
+  ret i32 %0
+}
+
+define i16 @test_vaddlvq_s8(<16 x i8> %a) {
+; CHECK: test_vaddlvq_s8:
+; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i16> %saddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlvq_s16(<8 x i16> %a) {
+; CHECK: test_vaddlvq_s16:
+; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i32> %saddlv.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vaddlvq_s32(<4 x i32> %a) {
+; CHECK: test_vaddlvq_s32:
+; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %saddlv.i = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i64> %saddlv.i, i32 0
+  ret i64 %0
+}
+
+define i16 @test_vaddlvq_u8(<16 x i8> %a) {
+; CHECK: test_vaddlvq_u8:
+; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddlvq_u16(<8 x i16> %a) {
+; CHECK: test_vaddlvq_u16:
+; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vaddlvq_u32(<4 x i32> %a) {
+; CHECK: test_vaddlvq_u32:
+; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %uaddlv.i = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i64> %uaddlv.i, i32 0
+  ret i64 %0
+}
+
+define i8 @test_vmaxv_s8(<8 x i8> %a) {
+; CHECK: test_vmaxv_s8:
+; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %smaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxv_s16(<4 x i16> %a) {
+; CHECK: test_vmaxv_s16:
+; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %smaxv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vmaxv_u8(<8 x i8> %a) {
+; CHECK: test_vmaxv_u8:
+; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %umaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxv_u16(<4 x i16> %a) {
+; CHECK: test_vmaxv_u16:
+; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %umaxv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vmaxvq_s8(<16 x i8> %a) {
+; CHECK: test_vmaxvq_s8:
+; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %smaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxvq_s16(<8 x i16> %a) {
+; CHECK: test_vmaxvq_s16:
+; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %smaxv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vmaxvq_s32(<4 x i32> %a) {
+; CHECK: test_vmaxvq_s32:
+; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %smaxv.i = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %smaxv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vmaxvq_u8(<16 x i8> %a) {
+; CHECK: test_vmaxvq_u8:
+; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %umaxv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vmaxvq_u16(<8 x i16> %a) {
+; CHECK: test_vmaxvq_u16:
+; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %umaxv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vmaxvq_u32(<4 x i32> %a) {
+; CHECK: test_vmaxvq_u32:
+; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %umaxv.i = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %umaxv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vminv_s8(<8 x i8> %a) {
+; CHECK: test_vminv_s8:
+; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %sminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminv_s16(<4 x i16> %a) {
+; CHECK: test_vminv_s16:
+; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %sminv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vminv_u8(<8 x i8> %a) {
+; CHECK: test_vminv_u8:
+; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %uminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminv_u16(<4 x i16> %a) {
+; CHECK: test_vminv_u16:
+; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %uminv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vminvq_s8(<16 x i8> %a) {
+; CHECK: test_vminvq_s8:
+; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %sminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminvq_s16(<8 x i16> %a) {
+; CHECK: test_vminvq_s16:
+; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %sminv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vminvq_s32(<4 x i32> %a) {
+; CHECK: test_vminvq_s32:
+; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sminv.i = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %sminv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vminvq_u8(<16 x i8> %a) {
+; CHECK: test_vminvq_u8:
+; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %uminv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vminvq_u16(<8 x i16> %a) {
+; CHECK: test_vminvq_u16:
+; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %uminv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vminvq_u32(<4 x i32> %a) {
+; CHECK: test_vminvq_u32:
+; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %uminv.i = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %uminv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vaddv_s8(<8 x i8> %a) {
+; CHECK: test_vaddv_s8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddv_s16(<4 x i16> %a) {
+; CHECK: test_vaddv_s16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vaddv_u8(<8 x i8> %a) {
+; CHECK: test_vaddv_u8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddv_u16(<4 x i16> %a) {
+; CHECK: test_vaddv_u16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i8 @test_vaddvq_s8(<16 x i8> %a) {
+; CHECK: test_vaddvq_s8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddvq_s16(<8 x i16> %a) {
+; CHECK: test_vaddvq_s16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_s32(<4 x i32> %a) {
+; CHECK: test_vaddvq_s32:
+; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %vaddv.i, i32 0
+  ret i32 %0
+}
+
+define i8 @test_vaddvq_u8(<16 x i8> %a) {
+; CHECK: test_vaddvq_u8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
+  %0 = extractelement <1 x i8> %vaddv.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vaddvq_u16(<8 x i16> %a) {
+; CHECK: test_vaddvq_u16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
+  %0 = extractelement <1 x i16> %vaddv.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_u32(<4 x i32> %a) {
+; CHECK: test_vaddvq_u32:
+; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
+  %0 = extractelement <1 x i32> %vaddv.i, i32 0
+  ret i32 %0
+}
+
+define float @test_vmaxvq_f32(<4 x float> %a) {
+; CHECK: test_vmaxvq_f32:
+; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vmaxv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vmaxv.i, i32 0
+  ret float %0
+}
+
+define float @test_vminvq_f32(<4 x float> %a) {
+; CHECK: test_vminvq_f32:
+; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vminv.i = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vminv.i, i32 0
+  ret float %0
+}
+
+define float @test_vmaxnmvq_f32(<4 x float> %a) {
+; CHECK: test_vmaxnmvq_f32:
+; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vmaxnmv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vmaxnmv.i, i32 0
+  ret float %0
+}
+
+define float @test_vminnmvq_f32(<4 x float> %a) {
+; CHECK: test_vminnmvq_f32:
+; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vminnmv.i = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float> %a)
+  %0 = extractelement <1 x float> %vminnmv.i, i32 0
+  ret float %0
+}
+
diff --git a/test/CodeGen/AArch64/neon-add-pairwise.ll b/test/CodeGen/AArch64/neon-add-pairwise.ll
new file mode 100644
index 000000000000..1abfed31908c
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-add-pairwise.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: test_addp_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: addp v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_addp_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: addp v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_addp_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: addp v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_addp_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: addp v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_addp_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: addp v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_addp_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: addp v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+
+declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_addp_v2i64:
+        %val = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: addp v0.2d, v0.2d, v1.2d
+        ret <2 x i64> %val
+}
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_faddp_v2f32:
+        %val = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: faddp v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_faddp_v4f32:
+        %val = call <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: faddp v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_faddp_v2f64:
+        %val = call <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: faddp v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll
new file mode 100644
index 000000000000..078ba14bd87a
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-add-sub.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: add {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp3 = add <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: add {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp3 = add <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: add {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+	%tmp3 = add <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: add {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+	%tmp3 = add <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: add {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp3 = add <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: add {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp3 = add <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp3 = add <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fadd {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp3 = fadd <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fadd {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp3 = fadd <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp3 = fadd <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: sub {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp3 = sub <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: sub {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp3 = sub <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: sub {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+	%tmp3 = sub <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: sub {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+	%tmp3 = sub <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: sub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp3 = sub <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: sub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp3 = sub <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp3 = sub <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fsub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp3 = fsub <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fsub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp3 = fsub <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp3 = fsub <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vadd_f64
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fadd <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmul_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vdiv_f64
+; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fdiv <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmla_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fadd <1 x double> %1, %a
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmls_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fsub <1 x double> %a, %1
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfms_f64
+; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %b
+  %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfma_f64
+; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vsub_f64
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vabd_f64
+; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmax_f64
+; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmin_f64
+; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmaxnm_f64
+; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vminnm_f64
+; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabs_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vabs_f64
+; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vneg_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vneg_f64
+; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %a
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-bitcast.ll b/test/CodeGen/AArch64/neon-bitcast.ll
new file mode 100644
index 000000000000..f9ec70484024
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-bitcast.ll
@@ -0,0 +1,574 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+
+; From <8 x i8>
+
+define <1 x i64> @test_v8i8_to_v1i64(<8 x i8> %in) nounwind {
+; CHECK: test_v8i8_to_v1i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i8> %in to <1 x i64>
+  ret <1 x i64> %val
+}
+
+define <2 x i32> @test_v8i8_to_v2i32(<8 x i8> %in) nounwind {
+; CHECK: test_v8i8_to_v2i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i8> %in to <2 x i32>
+  ret <2 x i32> %val
+}
+
+define <2 x float> @test_v8i8_to_v1f32(<8 x i8> %in) nounwind{
+; CHECK: test_v8i8_to_v1f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i8> %in to <2 x float>
+  ret <2 x float> %val
+}
+
+define <4 x i16> @test_v8i8_to_v4i16(<8 x i8> %in) nounwind{
+; CHECK: test_v8i8_to_v4i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i8> %in to <4 x i16>
+  ret <4 x i16> %val
+}
+
+define <8 x i8> @test_v8i8_to_v8i8(<8 x i8> %in) nounwind{
+; CHECK: test_v8i8_to_v8i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i8> %in to <8 x i8>
+  ret <8 x i8> %val
+}
+
+; From <4 x i16>
+
+define <1 x i64> @test_v4i16_to_v1i64(<4 x i16> %in) nounwind {
+; CHECK: test_v4i16_to_v1i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i16> %in to <1 x i64>
+  ret <1 x i64> %val
+}
+
+define <2 x i32> @test_v4i16_to_v2i32(<4 x i16> %in) nounwind {
+; CHECK: test_v4i16_to_v2i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i16> %in to <2 x i32>
+  ret <2 x i32> %val
+}
+
+define <2 x float> @test_v4i16_to_v1f32(<4 x i16> %in) nounwind{
+; CHECK: test_v4i16_to_v1f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i16> %in to <2 x float>
+  ret <2 x float> %val
+}
+
+define <4 x i16> @test_v4i16_to_v4i16(<4 x i16> %in) nounwind{
+; CHECK: test_v4i16_to_v4i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i16> %in to <4 x i16>
+  ret <4 x i16> %val
+}
+
+define <8 x i8> @test_v4i16_to_v8i8(<4 x i16> %in) nounwind{
+; CHECK: test_v4i16_to_v8i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i16> %in to <8 x i8>
+  ret <8 x i8> %val
+}
+
+; From <2 x i32>
+
+define <1 x i64> @test_v2i32_to_v1i64(<2 x i32> %in) nounwind {
+; CHECK: test_v2i32_to_v1i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i32> %in to <1 x i64>
+  ret <1 x i64> %val
+}
+
+define <2 x i32> @test_v2i32_to_v2i32(<2 x i32> %in) nounwind {
+; CHECK: test_v2i32_to_v2i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i32> %in to <2 x i32>
+  ret <2 x i32> %val
+}
+
+define <2 x float> @test_v2i32_to_v1f32(<2 x i32> %in) nounwind{
+; CHECK: test_v2i32_to_v1f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i32> %in to <2 x float>
+  ret <2 x float> %val
+}
+
+define <4 x i16> @test_v2i32_to_v4i16(<2 x i32> %in) nounwind{
+; CHECK: test_v2i32_to_v4i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i32> %in to <4 x i16>
+  ret <4 x i16> %val
+}
+
+define <8 x i8> @test_v2i32_to_v8i8(<2 x i32> %in) nounwind{
+; CHECK: test_v2i32_to_v8i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i32> %in to <8 x i8>
+  ret <8 x i8> %val
+}
+
+; From <2 x float>
+
+define <1 x i64> @test_v2f32_to_v1i64(<2 x float> %in) nounwind {
+; CHECK: test_v2f32_to_v1i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x float> %in to <1 x i64>
+  ret <1 x i64> %val
+}
+
+define <2 x i32> @test_v2f32_to_v2i32(<2 x float> %in) nounwind {
+; CHECK: test_v2f32_to_v2i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x float> %in to <2 x i32>
+  ret <2 x i32> %val
+}
+
+define <2 x float> @test_v2f32_to_v2f32(<2 x float> %in) nounwind{
+; CHECK: test_v2f32_to_v2f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x float> %in to <2 x float>
+  ret <2 x float> %val
+}
+
+define <4 x i16> @test_v2f32_to_v4i16(<2 x float> %in) nounwind{
+; CHECK: test_v2f32_to_v4i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x float> %in to <4 x i16>
+  ret <4 x i16> %val
+}
+
+define <8 x i8> @test_v2f32_to_v8i8(<2 x float> %in) nounwind{
+; CHECK: test_v2f32_to_v8i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x float> %in to <8 x i8>
+  ret <8 x i8> %val
+}
+
+; From <1 x i64>
+
+define <1 x i64> @test_v1i64_to_v1i64(<1 x i64> %in) nounwind {
+; CHECK: test_v1i64_to_v1i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <1 x i64> %in to <1 x i64>
+  ret <1 x i64> %val
+}
+
+define <2 x i32> @test_v1i64_to_v2i32(<1 x i64> %in) nounwind {
+; CHECK: test_v1i64_to_v2i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <1 x i64> %in to <2 x i32>
+  ret <2 x i32> %val
+}
+
+define <2 x float> @test_v1i64_to_v2f32(<1 x i64> %in) nounwind{
+; CHECK: test_v1i64_to_v2f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <1 x i64> %in to <2 x float>
+  ret <2 x float> %val
+}
+
+define <4 x i16> @test_v1i64_to_v4i16(<1 x i64> %in) nounwind{
+; CHECK: test_v1i64_to_v4i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <1 x i64> %in to <4 x i16>
+  ret <4 x i16> %val
+}
+
+define <8 x i8> @test_v1i64_to_v8i8(<1 x i64> %in) nounwind{
+; CHECK: test_v1i64_to_v8i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <1 x i64> %in to <8 x i8>
+  ret <8 x i8> %val
+}
+
+
+; From <16 x i8>
+
+define <2 x double> @test_v16i8_to_v2f64(<16 x i8> %in) nounwind {
+; CHECK: test_v16i8_to_v2f64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <16 x i8> %in to <2 x double>
+  ret <2 x double> %val
+}
+
+define <2 x i64> @test_v16i8_to_v2i64(<16 x i8> %in) nounwind {
+; CHECK: test_v16i8_to_v2i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <16 x i8> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <4 x i32> @test_v16i8_to_v4i32(<16 x i8> %in) nounwind {
+; CHECK: test_v16i8_to_v4i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <16 x i8> %in to <4 x i32>
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v16i8_to_v2f32(<16 x i8> %in) nounwind{
+; CHECK: test_v16i8_to_v2f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <16 x i8> %in to <4 x float>
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v16i8_to_v8i16(<16 x i8> %in) nounwind{
+; CHECK: test_v16i8_to_v8i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <16 x i8> %in to <8 x i16>
+  ret <8 x i16> %val
+}
+
+define <16 x i8> @test_v16i8_to_v16i8(<16 x i8> %in) nounwind{
+; CHECK: test_v16i8_to_v16i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <16 x i8> %in to <16 x i8>
+  ret <16 x i8> %val
+}
+
+; From <8 x i16>
+
+define <2 x double> @test_v8i16_to_v2f64(<8 x i16> %in) nounwind {
+; CHECK: test_v8i16_to_v2f64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i16> %in to <2 x double>
+  ret <2 x double> %val
+}
+
+define <2 x i64> @test_v8i16_to_v2i64(<8 x i16> %in) nounwind {
+; CHECK: test_v8i16_to_v2i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i16> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <4 x i32> @test_v8i16_to_v4i32(<8 x i16> %in) nounwind {
+; CHECK: test_v8i16_to_v4i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i16> %in to <4 x i32>
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v8i16_to_v2f32(<8 x i16> %in) nounwind{
+; CHECK: test_v8i16_to_v2f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i16> %in to <4 x float>
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v8i16_to_v8i16(<8 x i16> %in) nounwind{
+; CHECK: test_v8i16_to_v8i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i16> %in to <8 x i16>
+  ret <8 x i16> %val
+}
+
+define <16 x i8> @test_v8i16_to_v16i8(<8 x i16> %in) nounwind{
+; CHECK: test_v8i16_to_v16i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <8 x i16> %in to <16 x i8>
+  ret <16 x i8> %val
+}
+
+; From <4 x i32>
+
+define <2 x double> @test_v4i32_to_v2f64(<4 x i32> %in) nounwind {
+; CHECK: test_v4i32_to_v2f64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i32> %in to <2 x double>
+  ret <2 x double> %val
+}
+
+define <2 x i64> @test_v4i32_to_v2i64(<4 x i32> %in) nounwind {
+; CHECK: test_v4i32_to_v2i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i32> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <4 x i32> @test_v4i32_to_v4i32(<4 x i32> %in) nounwind {
+; CHECK: test_v4i32_to_v4i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i32> %in to <4 x i32>
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v4i32_to_v2f32(<4 x i32> %in) nounwind{
+; CHECK: test_v4i32_to_v2f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i32> %in to <4 x float>
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v4i32_to_v8i16(<4 x i32> %in) nounwind{
+; CHECK: test_v4i32_to_v8i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i32> %in to <8 x i16>
+  ret <8 x i16> %val
+}
+
+define <16 x i8> @test_v4i32_to_v16i8(<4 x i32> %in) nounwind{
+; CHECK: test_v4i32_to_v16i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x i32> %in to <16 x i8>
+  ret <16 x i8> %val
+}
+
+; From <4 x float>
+
+define <2 x double> @test_v4f32_to_v2f64(<4 x float> %in) nounwind {
+; CHECK: test_v4f32_to_v2f64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x float> %in to <2 x double>
+  ret <2 x double> %val
+}
+
+define <2 x i64> @test_v4f32_to_v2i64(<4 x float> %in) nounwind {
+; CHECK: test_v4f32_to_v2i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x float> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <4 x i32> @test_v4f32_to_v4i32(<4 x float> %in) nounwind {
+; CHECK: test_v4f32_to_v4i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x float> %in to <4 x i32>
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v4f32_to_v4f32(<4 x float> %in) nounwind{
+; CHECK: test_v4f32_to_v4f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x float> %in to <4 x float>
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v4f32_to_v8i16(<4 x float> %in) nounwind{
+; CHECK: test_v4f32_to_v8i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x float> %in to <8 x i16>
+  ret <8 x i16> %val
+}
+
+define <16 x i8> @test_v4f32_to_v16i8(<4 x float> %in) nounwind{
+; CHECK: test_v4f32_to_v16i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <4 x float> %in to <16 x i8>
+  ret <16 x i8> %val
+}
+
+; From <2 x i64>
+
+define <2 x double> @test_v2i64_to_v2f64(<2 x i64> %in) nounwind {
+; CHECK: test_v2i64_to_v2f64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i64> %in to <2 x double>
+  ret <2 x double> %val
+}
+
+define <2 x i64> @test_v2i64_to_v2i64(<2 x i64> %in) nounwind {
+; CHECK: test_v2i64_to_v2i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i64> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <4 x i32> @test_v2i64_to_v4i32(<2 x i64> %in) nounwind {
+; CHECK: test_v2i64_to_v4i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i64> %in to <4 x i32>
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v2i64_to_v4f32(<2 x i64> %in) nounwind{
+; CHECK: test_v2i64_to_v4f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i64> %in to <4 x float>
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v2i64_to_v8i16(<2 x i64> %in) nounwind{
+; CHECK: test_v2i64_to_v8i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i64> %in to <8 x i16>
+  ret <8 x i16> %val
+}
+
+define <16 x i8> @test_v2i64_to_v16i8(<2 x i64> %in) nounwind{
+; CHECK: test_v2i64_to_v16i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x i64> %in to <16 x i8>
+  ret <16 x i8> %val
+}
+
+; From <2 x double>
+
+define <2 x double> @test_v2f64_to_v2f64(<2 x double> %in) nounwind {
+; CHECK: test_v2f64_to_v2f64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x double> %in to <2 x double>
+  ret <2 x double> %val
+}
+
+define <2 x i64> @test_v2f64_to_v2i64(<2 x double> %in) nounwind {
+; CHECK: test_v2f64_to_v2i64:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x double> %in to <2 x i64>
+  ret <2 x i64> %val
+}
+
+define <4 x i32> @test_v2f64_to_v4i32(<2 x double> %in) nounwind {
+; CHECK: test_v2f64_to_v4i32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x double> %in to <4 x i32>
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v2f64_to_v4f32(<2 x double> %in) nounwind{
+; CHECK: test_v2f64_to_v4f32:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x double> %in to <4 x float>
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v2f64_to_v8i16(<2 x double> %in) nounwind{
+; CHECK: test_v2f64_to_v8i16:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x double> %in to <8 x i16>
+  ret <8 x i16> %val
+}
+
+define <16 x i8> @test_v2f64_to_v16i8(<2 x double> %in) nounwind{
+; CHECK: test_v2f64_to_v16i8:
+; CHECK-NEXT: // BB#0:
+; CHECK-NEXT: ret
+
+  %val = bitcast <2 x double> %in to <16 x i8>
+  ret <16 x i8> %val
+}
+
diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll
new file mode 100644
index 000000000000..1c43b979fc44
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -0,0 +1,594 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+
+define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) {
+;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <8 x i8> %a, %b;
+	ret <8 x i8> %tmp1
+}
+
+define <16 x i8> @and16xi8(<16 x i8> %a, <16 x i8> %b) {
+;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <16 x i8> %a, %b;
+	ret <16 x i8> %tmp1
+}
+
+
+define <8 x i8> @orr8xi8(<8 x i8> %a, <8 x i8> %b) {
+;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = or <8 x i8> %a, %b;
+	ret <8 x i8> %tmp1
+}
+
+define <16 x i8> @orr16xi8(<16 x i8> %a, <16 x i8> %b) {
+;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = or <16 x i8> %a, %b;
+	ret <16 x i8> %tmp1
+}
+
+
+define <8 x i8> @xor8xi8(<8 x i8> %a, <8 x i8> %b) {
+;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = xor <8 x i8> %a, %b;
+	ret <8 x i8> %tmp1
+}
+
+define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
+;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = xor <16 x i8> %a, %b;
+	ret <16 x i8> %tmp1
+}
+
+define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b)  {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
+	%tmp3 = or <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
+	%tmp3 = or <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b)  {
+;CHECK:  orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+  %tmp2 = or <8 x i8> %a, %tmp1
+  ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) {
+;CHECK:  orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+  %tmp2 = or <16 x i8> %a, %tmp1
+  ret <16 x i8> %tmp2
+}
+
+define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b)  {
+;CHECK:  bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+  %tmp2 = and <8 x i8> %a, %tmp1
+  ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) {
+;CHECK:  bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+  %tmp2 = and <16 x i8> %a, %tmp1
+  ret <16 x i8> %tmp2
+}
+
+define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.2s, #0xff
+	%tmp1 = or <2 x i32> %a, < i32 255, i32 255>
+	ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.2s, #0xff, lsl #8
+	%tmp1 = or <2 x i32> %a, < i32 65280, i32 65280>
+	ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.2s, #0xff, lsl #16
+	%tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680>
+	ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.2s, #0xff, lsl #24
+	%tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080>
+	ret <2 x i32> %tmp1
+}
+
+define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.4s, #0xff
+	%tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255>
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.4s, #0xff, lsl #8
+	%tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280>
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.4s, #0xff, lsl #16
+	%tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680>
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) {
+;CHECK:  orr {{v[0-31]+}}.4s, #0xff, lsl #24
+	%tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080>
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) {
+;CHECK:  orr {{v[0-31]+}}.4h, #0xff
+	%tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 >
+	ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) {
+;CHECK:  orr {{v[0-31]+}}.4h, #0xff, lsl #8
+	%tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
+	ret <4 x i16> %tmp1
+}
+
+define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) {
+;CHECK:  orr {{v[0-31]+}}.8h, #0xff
+	%tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
+	ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) {
+;CHECK:  orr {{v[0-31]+}}.8h, #0xff, lsl #8
+	%tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
+	ret <8 x i16> %tmp1
+}
+
+define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.2s, #0x10
+	%tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 >
+	ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.2s, #0x10, lsl #8
+	%tmp1 = and <2 x i32> %a, < i32 18446744073709547519, i32  18446744073709547519 >
+	ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.2s, #0x10, lsl #16
+	%tmp1 = and <2 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039 >
+	ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.2s, #0x10, lsl #24
+	%tmp1 = and <2 x i32> %a, < i32 18446744073441116159, i32  18446744073441116159>
+	ret <2 x i32> %tmp1
+}
+
+define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.4s, #0x10
+	%tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.4s, #0x10, lsl #8
+	%tmp1 = and <4 x i32> %a, < i32 18446744073709547519, i32  18446744073709547519, i32  18446744073709547519, i32  18446744073709547519 >
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.4s, #0x10, lsl #16
+	%tmp1 = and <4 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039 >
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) {
+;CHECK:  bic {{v[0-31]+}}.4s, #0x10, lsl #24
+	%tmp1 = and <4 x i32> %a, < i32 18446744073441116159, i32  18446744073441116159, i32  18446744073441116159, i32  18446744073441116159>
+	ret <4 x i32> %tmp1
+}
+
+define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.4h, #0x10
+	%tmp1 = and <4 x i16> %a, < i16 18446744073709551599, i16  18446744073709551599, i16  18446744073709551599, i16  18446744073709551599 >
+	ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.4h, #0x0
+	%tmp1 = and <4 x i16> %a, < i16 65280, i16  65280, i16  65280, i16 65280 >
+	ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.4h, #0x10, lsl #8
+	%tmp1 = and <4 x i16> %a, < i16 18446744073709547519, i16  18446744073709547519, i16  18446744073709547519, i16  18446744073709547519>
+	ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.4h, #0x0, lsl #8
+	%tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255>
+	ret <4 x i16> %tmp1
+}
+
+define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.8h, #0x10
+	%tmp1 = and <8 x i16> %a, < i16 18446744073709551599, i16  18446744073709551599, i16  18446744073709551599, i16  18446744073709551599,
+   i16  18446744073709551599, i16  18446744073709551599, i16  18446744073709551599, i16  18446744073709551599 >
+	ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.8h, #0x0
+	%tmp1 = and <8 x i16> %a, < i16 65280, i16  65280, i16  65280, i16 65280, i16 65280, i16  65280, i16  65280, i16 65280 >
+	ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.8h, #0x10, lsl #8
+	%tmp1 = and <8 x i16> %a, < i16 18446744073709547519, i16  18446744073709547519, i16  18446744073709547519, i16  18446744073709547519,
+   i16  18446744073709547519, i16  18446744073709547519, i16  18446744073709547519, i16  18446744073709547519>
+	ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) {
+;CHECK:  bic {{v[0-31]+}}.8h, #0x0, lsl #8
+	%tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+	ret <8 x i16> %tmp1
+}
+
+define <2 x i32> @and2xi32(<2 x i32> %a, <2 x i32> %b) {
+;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <2 x i32> %a, %b;
+	ret <2 x i32> %tmp1
+}
+
+define <4 x i16> @and4xi16(<4 x i16> %a, <4 x i16> %b) {
+;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <4 x i16> %a, %b;
+	ret <4 x i16> %tmp1
+}
+
+define <1 x i64> @and1xi64(<1 x i64> %a, <1 x i64> %b) {
+;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <1 x i64> %a, %b;
+	ret <1 x i64> %tmp1
+}
+
+define <4 x i32> @and4xi32(<4 x i32> %a, <4 x i32> %b) {
+;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <4 x i32> %a, %b;
+	ret <4 x i32> %tmp1
+}
+
+define <8 x i16> @and8xi16(<8 x i16> %a, <8 x i16> %b) {
+;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <8 x i16> %a, %b;
+	ret <8 x i16> %tmp1
+}
+
+define <2 x i64> @and2xi64(<2 x i64> %a, <2 x i64> %b) {
+;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <2 x i64> %a, %b;
+	ret <2 x i64> %tmp1
+}
+
+define <2 x i32> @orr2xi32(<2 x i32> %a, <2 x i32> %b) {
+;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = or <2 x i32> %a, %b;
+	ret <2 x i32> %tmp1
+}
+
+define <4 x i16> @orr4xi16(<4 x i16> %a, <4 x i16> %b) {
+;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = or <4 x i16> %a, %b;
+	ret <4 x i16> %tmp1
+}
+
+define <1 x i64> @orr1xi64(<1 x i64> %a, <1 x i64> %b) {
+;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = or <1 x i64> %a, %b;
+	ret <1 x i64> %tmp1
+}
+
+define <4 x i32> @orr4xi32(<4 x i32> %a, <4 x i32> %b) {
+;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = or <4 x i32> %a, %b;
+	ret <4 x i32> %tmp1
+}
+
+define <8 x i16> @orr8xi16(<8 x i16> %a, <8 x i16> %b) {
+;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = or <8 x i16> %a, %b;
+	ret <8 x i16> %tmp1
+}
+
+define <2 x i64> @orr2xi64(<2 x i64> %a, <2 x i64> %b) {
+;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = or <2 x i64> %a, %b;
+	ret <2 x i64> %tmp1
+}
+
+define <2 x i32> @eor2xi32(<2 x i32> %a, <2 x i32> %b) {
+;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = xor <2 x i32> %a, %b;
+	ret <2 x i32> %tmp1
+}
+
+define <4 x i16> @eor4xi16(<4 x i16> %a, <4 x i16> %b) {
+;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = xor <4 x i16> %a, %b;
+	ret <4 x i16> %tmp1
+}
+
+define <1 x i64> @eor1xi64(<1 x i64> %a, <1 x i64> %b) {
+;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = xor <1 x i64> %a, %b;
+	ret <1 x i64> %tmp1
+}
+
+define <4 x i32> @eor4xi32(<4 x i32> %a, <4 x i32> %b) {
+;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = xor <4 x i32> %a, %b;
+	ret <4 x i32> %tmp1
+}
+
+define <8 x i16> @eor8xi16(<8 x i16> %a, <8 x i16> %b) {
+;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = xor <8 x i16> %a, %b;
+	ret <8 x i16> %tmp1
+}
+
+define <2 x i64> @eor2xi64(<2 x i64> %a, <2 x i64> %b) {
+;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = xor <2 x i64> %a, %b;
+	ret <2 x i64> %tmp1
+}
+
+
+define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b)  {
+;CHECK:  bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
+  %tmp2 = and <2 x i32> %a, %tmp1
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b)  {
+;CHECK:  bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
+  %tmp2 = and <4 x i16> %a, %tmp1
+  ret <4 x i16> %tmp2
+}
+
+define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b)  {
+;CHECK:  bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <1 x i64> %b, < i64 -1>
+  %tmp2 = and <1 x i64> %a, %tmp1
+  ret <1 x i64> %tmp2
+}
+
+define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b)  {
+;CHECK:  bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
+  %tmp2 = and <4 x i32> %a, %tmp1
+  ret <4 x i32> %tmp2
+}
+
+define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b)  {
+;CHECK:  bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
+  %tmp2 = and <8 x i16> %a, %tmp1
+  ret <8 x i16> %tmp2
+}
+
+define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b)  {
+;CHECK:  bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
+  %tmp2 = and <2 x i64> %a, %tmp1
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b)  {
+;CHECK:  orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
+  %tmp2 = or <2 x i32> %a, %tmp1
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b)  {
+;CHECK:  orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
+  %tmp2 = or <4 x i16> %a, %tmp1
+  ret <4 x i16> %tmp2
+}
+
+define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b)  {
+;CHECK:  orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %tmp1 = xor <1 x i64> %b, < i64 -1>
+  %tmp2 = or <1 x i64> %a, %tmp1
+  ret <1 x i64> %tmp2
+}
+
+define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b)  {
+;CHECK:  orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
+  %tmp2 = or <4 x i32> %a, %tmp1
+  ret <4 x i32> %tmp2
+}
+
+define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b)  {
+;CHECK:  orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
+  %tmp2 = or <8 x i16> %a, %tmp1
+  ret <8 x i16> %tmp2
+}
+
+define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b)  {
+;CHECK:  orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
+  %tmp2 = or <2 x i64> %a, %tmp1
+  ret <2 x i64> %tmp2
+}
+define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b)  {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 >
+	%tmp2 = and <2 x i32> %b, < i32 0, i32 0 >
+	%tmp3 = or <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+
+define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b)  {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 >
+	%tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 >
+	%tmp3 = or <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b)  {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = and <1 x i64> %a, < i64 -1 >
+	%tmp2 = and <1 x i64> %b, < i64 0 >
+	%tmp3 = or <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b)  {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 >
+	%tmp3 = or <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b)  {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 >
+	%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 >
+	%tmp3 = or <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b)  {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 >
+	%tmp2 = and <2 x i64> %b, < i64 0, i64 0 >
+	%tmp3 = or <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+
+define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %1 = and <8 x i8> %v1, %v2
+  %2 = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %3 = and <8 x i8> %2, %v3
+  %4 = or <8 x i8> %1, %3
+  ret <8 x i8> %4
+}
+
+define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %1 = and <4 x i16> %v1, %v2
+  %2 = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
+  %3 = and <4 x i16> %2, %v3
+  %4 = or <4 x i16> %1, %3
+  ret <4 x i16> %4
+}
+
+define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %1 = and <2 x i32> %v1, %v2
+  %2 = xor <2 x i32> %v1, <i32 -1, i32 -1>
+  %3 = and <2 x i32> %2, %v3
+  %4 = or <2 x i32> %1, %3
+  ret <2 x i32> %4
+}
+
+define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+  %1 = and <1 x i64> %v1, %v2
+  %2 = xor <1 x i64> %v1, <i64 -1>
+  %3 = and <1 x i64> %2, %v3
+  %4 = or <1 x i64> %1, %3
+  ret <1 x i64> %4
+}
+
+define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %1 = and <16 x i8> %v1, %v2
+  %2 = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %3 = and <16 x i8> %2, %v3
+  %4 = or <16 x i8> %1, %3
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %1 = and <8 x i16> %v1, %v2
+  %2 = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %3 = and <8 x i16> %2, %v3
+  %4 = or <8 x i16> %1, %3
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %1 = and <4 x i32> %v1, %v2
+  %2 = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %3 = and <4 x i32> %2, %v3
+  %4 = or <4 x i32> %1, %3
+  ret <4 x i32> %4
+}
+
+define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
+;CHECK:  bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+  %1 = and <2 x i64> %v1, %v2
+  %2 = xor <2 x i64> %v1, <i64 -1, i64 -1>
+  %3 = and <2 x i64> %2, %v3
+  %4 = or <2 x i64> %1, %3
+  ret <2 x i64> %4
+}
+
+define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) {
+;CHECK:  orr {{v[0-31]+}}.4h, #0xff
+  %val = or <8 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
+  ret <8 x i8> %val
+}
+
+define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) {
+;CHECK:  orr {{v[0-31]+}}.4h, #0xff, lsl #8
+  %val = or <8 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
+  ret <8 x i8> %val
+}
+
+define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) {
+;CHECK:  orr {{v[0-31]+}}.8h, #0xff
+  %val = or <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
+  ret <16 x i8> %val
+}
+
+define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) {
+;CHECK:  orr {{v[0-31]+}}.8h, #0xff, lsl #8
+  %val = or <16 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
+  ret <16 x i8> %val
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-bsl.ll b/test/CodeGen/AArch64/neon-bsl.ll
new file mode 100644
index 000000000000..6bd923dc2cca
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-bsl.ll
@@ -0,0 +1,222 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
+
+declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
+
+declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>)
+
+declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>)
+
+declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+
+define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_s8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_s16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8>
+  ret <8 x i8> %0
+}
+
+define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
+; CHECK-LABEL: test_vbsl_s32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
+  ret <2 x i32> %vbsl3.i
+}
+
+define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_vbsl_s64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
+  ret <1 x i64> %vbsl3.i
+}
+
+define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_u8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_u16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  ret <4 x i16> %vbsl3.i
+}
+
+define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
+; CHECK-LABEL: test_vbsl_u32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
+  ret <2 x i32> %vbsl3.i
+}
+
+define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_vbsl_u64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
+  ret <1 x i64> %vbsl3.i
+}
+
+define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) {
+; CHECK-LABEL: test_vbsl_f32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3)
+  ret <2 x float> %vbsl3.i
+}
+
+define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) {
+; CHECK-LABEL: test_vbsl_f64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = bitcast <1 x i64> %v1 to <1 x double>
+  %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3)
+  ret <1 x double> %vbsl3.i
+}
+
+define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_p8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_p16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  ret <4 x i16> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_s8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_s16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: test_vbslq_s32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
+  ret <4 x i32> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: test_vbslq_s64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
+  ret <2 x i64> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_u8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_u16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: test_vbslq_u32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
+  ret <4 x i32> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: test_vbslq_u64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
+  ret <2 x i64> %vbsl3.i
+}
+
+define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) {
+; CHECK-LABEL: test_vbslq_f32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = bitcast <4 x i32> %v1 to <4 x float>
+  %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3)
+  ret <4 x float> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_p8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_p16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) {
+; CHECK-LABEL: test_vbslq_f64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = bitcast <2 x i64> %v1 to <2 x double>
+  %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3)
+  ret <2 x double> %vbsl3.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll
new file mode 100644
index 000000000000..68f03425b276
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -0,0 +1,1926 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp eq <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmeq16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp eq <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmeq4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp eq <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmeq8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp eq <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmeq2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp eq <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmeq4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp eq <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp eq <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmgt8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp sgt <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmgt16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp sgt <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmgt4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp sgt <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmgt8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp sgt <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmgt2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp sgt <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmgt4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp sgt <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmgt2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp sgt <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlt8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp slt <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlt16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp slt <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlt4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp slt <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlt8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp slt <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlt2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp slt <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlt4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp slt <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlt2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp slt <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmge8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp sge <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmge16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp sge <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmge4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp sge <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmge8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp sge <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmge2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp sge <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmge4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp sge <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmge2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp sge <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmle8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp sle <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmle16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp sle <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmle4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp sle <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmle8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp sle <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmle2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp sle <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmle4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp sle <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmle2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp sle <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmhi8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ugt <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhi16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ugt <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhi4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp ugt <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhi8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp ugt <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhi2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp ugt <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhi4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp ugt <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhi2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp ugt <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlo8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp ult <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlo16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp ult <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlo4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp ult <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlo8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp ult <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlo2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp ult <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlo4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp ult <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlo2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp ult <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmhs8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp uge <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhs16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp uge <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhs4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp uge <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhs8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp uge <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhs2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp uge <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhs4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp uge <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhs2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp uge <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmls8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp ule <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmls16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp ule <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmls4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp ule <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmls8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp ule <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmls2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp ule <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmls4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp ule <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp ule <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmtst8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = and <8 x i8> %A, %B
+	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+   %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+	ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @cmtst16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = and <16 x i8> %A, %B
+	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+   %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+	ret <16 x i8> %tmp5
+}
+
+define <4 x i16> @cmtst4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = and <4 x i16> %A, %B
+	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+   %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+	ret <4 x i16> %tmp5
+}
+
+define <8 x i16> @cmtst8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = and <8 x i16> %A, %B
+	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+   %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+	ret <8 x i16> %tmp5
+}
+
+define <2 x i32> @cmtst2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = and <2 x i32> %A, %B
+	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+   %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+	ret <2 x i32> %tmp5
+}
+
+define <4 x i32> @cmtst4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = and <4 x i32> %A, %B
+	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+   %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @cmtst2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = and <2 x i64> %A, %B
+	%tmp4 = icmp ne <2 x i64> %tmp3, zeroinitializer
+   %tmp5 = sext <2 x i1> %tmp4 to <2 x i64>
+	ret <2 x i64> %tmp5
+}
+
+
+
+define <8 x i8> @cmeqz8xi8(<8 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+	%tmp3 = icmp eq <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmeqz16xi8(<16 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+	%tmp3 = icmp eq <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmeqz4xi16(<4 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+	%tmp3 = icmp eq <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmeqz8xi16(<8 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+	%tmp3 = icmp eq <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmeqz2xi32(<2 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+	%tmp3 = icmp eq <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmeqz4xi32(<4 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+	%tmp3 = icmp eq <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmeqz2xi64(<2 x i64> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+	%tmp3 = icmp eq <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <8 x i8> @cmgez8xi8(<8 x i8> %A) {
+;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+	%tmp3 = icmp sge <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmgez16xi8(<16 x i8> %A) {
+;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+	%tmp3 = icmp sge <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmgez4xi16(<4 x i16> %A) {
+;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+	%tmp3 = icmp sge <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmgez8xi16(<8 x i16> %A) {
+;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+	%tmp3 = icmp sge <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmgez2xi32(<2 x i32> %A) {
+;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+	%tmp3 = icmp sge <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmgez4xi32(<4 x i32> %A) {
+;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+	%tmp3 = icmp sge <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
+;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+	%tmp3 = icmp sge <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <8 x i8> @cmgtz8xi8(<8 x i8> %A) {
+;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+	%tmp3 = icmp sgt <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmgtz16xi8(<16 x i8> %A) {
+;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+	%tmp3 = icmp sgt <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmgtz4xi16(<4 x i16> %A) {
+;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+	%tmp3 = icmp sgt <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmgtz8xi16(<8 x i16> %A) {
+;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+	%tmp3 = icmp sgt <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmgtz2xi32(<2 x i32> %A) {
+;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+	%tmp3 = icmp sgt <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmgtz4xi32(<4 x i32> %A) {
+;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+	%tmp3 = icmp sgt <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmgtz2xi64(<2 x i64> %A) {
+;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+	%tmp3 = icmp sgt <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlez8xi8(<8 x i8> %A) {
+;CHECK: cmle {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+	%tmp3 = icmp sle <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlez16xi8(<16 x i8> %A) {
+;CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+	%tmp3 = icmp sle <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlez4xi16(<4 x i16> %A) {
+;CHECK: cmle {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+	%tmp3 = icmp sle <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlez8xi16(<8 x i16> %A) {
+;CHECK: cmle {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+	%tmp3 = icmp sle <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlez2xi32(<2 x i32> %A) {
+;CHECK: cmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+	%tmp3 = icmp sle <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlez4xi32(<4 x i32> %A) {
+;CHECK: cmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+	%tmp3 = icmp sle <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlez2xi64(<2 x i64> %A) {
+;CHECK: cmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+	%tmp3 = icmp sle <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmltz8xi8(<8 x i8> %A) {
+;CHECK: cmlt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+	%tmp3 = icmp slt <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmltz16xi8(<16 x i8> %A) {
+;CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+	%tmp3 = icmp slt <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmltz4xi16(<4 x i16> %A) {
+;CHECK: cmlt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+	%tmp3 = icmp slt <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmltz8xi16(<8 x i16> %A) {
+;CHECK: cmlt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+	%tmp3 = icmp slt <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmltz2xi32(<2 x i32> %A) {
+;CHECK: cmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+	%tmp3 = icmp slt <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmltz4xi32(<4 x i32> %A) {
+;CHECK: cmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+	%tmp3 = icmp slt <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
+;CHECK: cmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+	%tmp3 = icmp slt <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmhsz8xi8(<8 x i8> %A) {
+;CHECK: movi {{v[0-9]+}}.8b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp uge <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhsz16xi8(<16 x i8> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp uge <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhsz4xi16(<4 x i16> %A) {
+;CHECK: movi {{v[0-9]+}}.8b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp uge <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhsz8xi16(<8 x i16> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp uge <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhsz2xi32(<2 x i32> %A) {
+;CHECK: movi {{v[0-9]+}}.8b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp uge <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhsz4xi32(<4 x i32> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp uge <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp uge <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <8 x i8> @cmhiz8xi8(<8 x i8> %A) {
+;CHECK: movi {{v[0-9]+}}.8b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ugt <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhiz16xi8(<16 x i8> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ugt <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhiz4xi16(<4 x i16> %A) {
+;CHECK: movi {{v[0-9]+}}.8b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp ugt <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhiz8xi16(<8 x i16> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp ugt <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhiz2xi32(<2 x i32> %A) {
+;CHECK: movi {{v[0-9]+}}.8b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp ugt <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhiz4xi32(<4 x i32> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp ugt <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhiz2xi64(<2 x i64> %A) {
+;CHECK: movi {{v[0-9]+}}.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp ugt <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v1.8b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v1.8b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v1.8b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp ule <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlsz2xi64(<2 x i64> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp ule <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v1.8b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v1.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ult <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp ult <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v1.8b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp ult <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp ult <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v1.8b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp ult <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmloz4xi32(<4 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp ult <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v1.16b, #0x0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp ult <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmoeq2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+   %tmp3 = fcmp oeq <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoeq4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+   %tmp3 = fcmp oeq <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmoeq2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+   %tmp3 = fcmp oeq <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmoge2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+   %tmp3 = fcmp oge <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoge4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+   %tmp3 = fcmp oge <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmoge2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+   %tmp3 = fcmp oge <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmogt2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+   %tmp3 = fcmp ogt <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmogt4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+   %tmp3 = fcmp ogt <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmogt2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+   %tmp3 = fcmp ogt <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmole2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; OLE implemented as OGE, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+   %tmp3 = fcmp ole <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmole4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; OLE implemented as OGE, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+   %tmp3 = fcmp ole <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmole2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; OLE implemented as OGE, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+   %tmp3 = fcmp ole <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmolt2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; OLE implemented as OGE, so check reversed operands.
+;CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+   %tmp3 = fcmp olt <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmolt4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; OLE implemented as OGE, so check reversed operands.
+;CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+   %tmp3 = fcmp olt <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmolt2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; OLE implemented as OGE, so check reversed operands.
+;CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+   %tmp3 = fcmp olt <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmone2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ONE = OGT | OLT, OLT implemented as OGT so check reversed operands
+;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp one <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmone4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ONE = OGT | OLT, OLT implemented as OGT so check reversed operands
+;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp one <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmone2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ONE = OGT | OLT, OLT implemented as OGT so check reversed operands
+;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; todo check reversed operands
+   %tmp3 = fcmp one <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmord2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ORD = OGE | OLT, OLT implemented as OGT, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ord <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+
+define <4 x i32> @fcmord4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ORD = OGE | OLT, OLT implemented as OGT, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ord <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ORD = OGE | OLT, OLT implemented as OGT, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ord <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp uno <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uno <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
+;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uno <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
+;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ueq <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
+;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ueq <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
+;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
+;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ueq <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmuge2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UGE = ULE with swapped operands, ULE implemented as !OGT.
+;CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp uge <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmuge4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UGE = ULE with swapped operands, ULE implemented as !OGT.
+;CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uge <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmuge2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UGE = ULE with swapped operands, ULE implemented as !OGT.
+;CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uge <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmugt2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UGT = ULT with swapped operands, ULT implemented as !OGE.
+;CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ugt <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmugt4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UGT = ULT with swapped operands, ULT implemented as !OGE.
+;CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ugt <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmugt2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ugt <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmule2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ULE implemented as !OGT.
+;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ule <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmule4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ULE implemented as !OGT.
+;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ule <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmule2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ULE implemented as !OGT.
+;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ule <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmult2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ULT implemented as !OGE.
+;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ult <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmult4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ULT implemented as !OGE.
+;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ult <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmult2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; ULT implemented as !OGE.
+;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ult <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmune2xfloat(<2 x float> %A, <2 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UNE = !OEQ.
+;CHECK: fcmeq {{v[0-9]+}}.2s, v0.2s, v1.2s
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp une <2 x float> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmune4xfloat(<4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UNE = !OEQ.
+;CHECK: fcmeq {{v[0-9]+}}.4s, v0.4s, v1.4s
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp une <4 x float> %A, %B
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; UNE = !OEQ.
+;CHECK: fcmeq {{v[0-9]+}}.2d, v0.2d, v1.2d
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp une <2 x double> %A, %B
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
+;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+   %tmp3 = fcmp oeq <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoeqz4xfloat(<4 x float> %A) {
+;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+   %tmp3 = fcmp oeq <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmoeqz2xdouble(<2 x double> %A) {
+;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+   %tmp3 = fcmp oeq <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmogez2xfloat(<2 x float> %A) {
+;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+   %tmp3 = fcmp oge <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmogez4xfloat(<4 x float> %A) {
+;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+   %tmp3 = fcmp oge <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmogez2xdouble(<2 x double> %A) {
+;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+   %tmp3 = fcmp oge <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmogtz2xfloat(<2 x float> %A) {
+;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+   %tmp3 = fcmp ogt <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmogtz4xfloat(<4 x float> %A) {
+;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+   %tmp3 = fcmp ogt <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmogtz2xdouble(<2 x double> %A) {
+;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+   %tmp3 = fcmp ogt <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmoltz2xfloat(<2 x float> %A) {
+;CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+   %tmp3 = fcmp olt <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmoltz4xfloat(<4 x float> %A) {
+;CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+   %tmp3 = fcmp olt <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmoltz2xdouble(<2 x double> %A) {
+;CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+   %tmp3 = fcmp olt <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmolez2xfloat(<2 x float> %A) {
+;CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+   %tmp3 = fcmp ole <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmolez4xfloat(<4 x float> %A) {
+;CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+   %tmp3 = fcmp ole <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmolez2xdouble(<2 x double> %A) {
+;CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+   %tmp3 = fcmp ole <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmonez2xfloat(<2 x float> %A) {
+; ONE with zero = OLT | OGT
+;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp one <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmonez4xfloat(<4 x float> %A) {
+; ONE with zero = OLT | OGT
+;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp one <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmonez2xdouble(<2 x double> %A) {
+; ONE with zero = OLT | OGT
+;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp one <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmordz2xfloat(<2 x float> %A) {
+; ORD with zero = OLT | OGE
+;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ord <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmordz4xfloat(<4 x float> %A) {
+; ORD with zero = OLT | OGE
+;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ord <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmordz2xdouble(<2 x double> %A) {
+; ORD with zero = OLT | OGE
+;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ord <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
+; UEQ with zero = !ONE = !(OLT |OGT)
+;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
+; UEQ with zero = !ONE = !(OLT |OGT)
+;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
+; UEQ with zero = !ONE = !(OLT |OGT)
+;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmugez2xfloat(<2 x float> %A) {
+; UGE with zero = !OLT
+;CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp uge <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmugez4xfloat(<4 x float> %A) {
+; UGE with zero = !OLT
+;CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uge <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmugez2xdouble(<2 x double> %A) {
+; UGE with zero = !OLT
+;CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uge <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) {
+; UGT with zero = !OLE
+;CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ugt <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) {
+; UGT with zero = !OLE
+;CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ugt <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) {
+; UGT with zero = !OLE
+;CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ugt <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmultz2xfloat(<2 x float> %A) {
+; ULT with zero = !OGE
+;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ult <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmultz4xfloat(<4 x float> %A) {
+;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ult <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
+;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ult <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmulez2xfloat(<2 x float> %A) {
+; ULE with zero = !OGT
+;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp ule <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmulez4xfloat(<4 x float> %A) {
+; ULE with zero = !OGT
+;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ule <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmulez2xdouble(<2 x double> %A) {
+; ULE with zero = !OGT
+;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp ule <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmunez2xfloat(<2 x float> %A) {
+; UNE with zero = !OEQ with zero
+;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp une <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmunez4xfloat(<4 x float> %A) {
+; UNE with zero = !OEQ with zero
+;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp une <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmunez2xdouble(<2 x double> %A) {
+; UNE with zero = !OEQ with zero
+;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp une <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <2 x i32> @fcmunoz2xfloat(<2 x float> %A) {
+; UNO with zero = !ORD = !(OLT | OGE)
+;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+   %tmp3 = fcmp uno <2 x float> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmunoz4xfloat(<4 x float> %A) {
+; UNO with zero = !ORD = !(OLT | OGE)
+;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uno <4 x float> %A, zeroinitializer
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) {
+; UNO with zero = !ORD = !(OLT | OGE)
+;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+   %tmp3 = fcmp uno <2 x double> %A, zeroinitializer
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+
+}
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
new file mode 100644
index 000000000000..e18530e6ff8e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-copy.ll
@@ -0,0 +1,615 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+
+define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[15], {{w[0-31]+}}
+  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[6], {{w[0-31]+}}
+  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
+  ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[2], {{w[0-31]+}}
+  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
+  ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{x[0-31]+}}
+  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
+  ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[5], {{w[0-31]+}}
+  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
+  ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[3], {{w[0-31]+}}
+  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
+  ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{w[0-31]+}}
+  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+  ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
+  ret <2 x i64> %tmp4
+}
+
+define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x float> %tmp1, i32 2
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x double> %tmp1, i32 0
+  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
+  ret <2 x double> %tmp4
+}
+
+define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
+  ret <2 x i64> %tmp4
+}
+
+define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x float> %tmp1, i32 1
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x double> %tmp1, i32 0
+  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
+  ret <2 x double> %tmp4
+}
+
+define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[7], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
+  ret <1 x i64> %tmp4
+}
+
+define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x float> %tmp1, i32 2
+  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
+  ret <2 x float> %tmp4
+}
+
+define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x double> %tmp1, i32 0
+  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
+  ret <1 x double> %tmp4
+}
+
+define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.b[4], {{v[0-31]+}}.b[2]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
+  ret <1 x i64> %tmp4
+}
+
+define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0]
+  %tmp3 = extractelement <2 x float> %tmp1, i32 0
+  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
+  ret <2 x float> %tmp4
+}
+
+define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
+;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <1 x double> %tmp1, i32 0
+  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
+  ret <1 x double> %tmp4
+}
+
+define i32 @umovw16b(<16 x i8> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = zext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw8h(<8 x i16> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = zext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw4s(<4 x i32> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  ret i32 %tmp3
+}
+
+define i64 @umovx2d(<2 x i64> %tmp1) {
+;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  ret i64 %tmp3
+}
+
+define i32 @umovw8b(<8 x i8> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[7]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
+  %tmp4 = zext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw4h(<4 x i16> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = zext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw2s(<2 x i32> %tmp1) {
+;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  ret i32 %tmp3
+}
+
+define i64 @umovx1d(<1 x i64> %tmp1) {
+;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}}
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  ret i64 %tmp3
+}
+
+define i32 @smovw16b(<16 x i8> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovw8h(<8 x i16> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovx16b(<16 x i8> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = sext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @smovx8h(<8 x i16> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i64 @smovx4s(<4 x i32> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = sext i32 %tmp3 to i64
+  ret i64 %tmp4
+}
+
+define i32 @smovw8b(<8 x i8> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[4]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovw4h(<4 x i16> %tmp1) {
+;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  %tmp5 = add i32 5, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovx8b(<8 x i8> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[6]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
+  %tmp4 = sext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @smovx4h(<4 x i16> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i64 @smovx2s(<2 x i32> %tmp1) {
+;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  %tmp4 = sext i32 %tmp3 to i64
+  ret i64 %tmp4
+}
+
+define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
+;CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
+  ret <8 x i8> %vset_lane
+}
+
+define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
+;CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
+  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
+  ret <16 x i8> %vset_lane
+}
+
+define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
+;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
+  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
+  ret <8 x i8> %vset_lane
+}
+
+define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
+;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
+  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <16 x i8> %vset_lane
+}
+
+define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
+  ret <8 x i8> %vecinit7.i
+}
+
+define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
+  ret <4 x i16> %vecinit3.i
+}
+
+define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
+  ret <2 x i32> %vecinit1.i
+}
+
+define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
+;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
+  ret <1 x i64> %vecinit.i
+}
+
+define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
+  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
+  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
+  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
+  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
+  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
+  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
+  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
+  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
+  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
+  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
+  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
+  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
+  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
+  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
+  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
+  ret <16 x i8> %vecinit15.i
+}
+
+define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
+  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
+  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
+  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
+  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
+  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
+  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
+  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
+  ret <8 x i16> %vecinit7.i
+}
+
+define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
+  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
+  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
+  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
+  ret <4 x i32> %vecinit3.i
+}
+
+define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
+  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
+  ret <2 x i64> %vecinit1.i
+}
+
+define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i8> %shuffle
+}
+
+define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i16> %shuffle
+}
+
+define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %shuffle
+}
+
+define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
+;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
+;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
+;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
+;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %shuffle
+}
+
+define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i8> %shuffle
+}
+
+define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i16> %shuffle
+}
+
+define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %shuffle
+}
+
+define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
+;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
+;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %shuffle
+}
+
+define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
+; CHECK-LABEL: test_bitcastv8i8toi64:
+   %res = bitcast <8 x i8> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
+; CHECK-LABEL: test_bitcastv4i16toi64:
+   %res = bitcast <4 x i16> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
+; CHECK-LABEL: test_bitcastv2i32toi64:
+   %res = bitcast <2 x i32> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
+; CHECK-LABEL: test_bitcastv2f32toi64:
+   %res = bitcast <2 x float> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
+; CHECK-LABEL: test_bitcastv1i64toi64:
+   %res = bitcast <1 x i64> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
+; CHECK-LABEL: test_bitcastv1f64toi64:
+   %res = bitcast <1 x double> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov8i8:
+   %res = bitcast i64 %in to <8 x i8>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <8 x i8> %res
+}
+
+define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov4i16:
+   %res = bitcast i64 %in to <4 x i16>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <4 x i16> %res
+}
+
+define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2i32:
+   %res = bitcast i64 %in to <2 x i32>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <2 x i32> %res
+}
+
+define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2f32:
+   %res = bitcast i64 %in to <2 x float>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <2 x float> %res
+}
+
+define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1i64:
+   %res = bitcast i64 %in to <1 x i64>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <1 x i64> %res
+}
+
+define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1f64:
+   %res = bitcast i64 %in to <1 x double>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <1 x double> %res
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll
new file mode 100644
index 000000000000..0283e0e7ca2e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-crypto.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s
+; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
+
+declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32>, <1 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32>, <1 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32>, <1 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) #1
+
+declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) #1
+
+declare <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32>) #1
+
+declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) #1
+
+declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) #1
+
+declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) #1
+
+declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) #1
+
+define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) {
+; CHECK: test_vaeseq_u8:
+; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese
+entry:
+  %aese.i = tail call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %data, <16 x i8> %key)
+  ret <16 x i8> %aese.i
+}
+
+define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) {
+; CHECK: test_vaesdq_u8:
+; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %data, <16 x i8> %key)
+  ret <16 x i8> %aesd.i
+}
+
+define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) {
+; CHECK: test_vaesmcq_u8:
+; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %data)
+  ret <16 x i8> %aesmc.i
+}
+
+define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) {
+; CHECK: test_vaesimcq_u8:
+; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %data)
+  ret <16 x i8> %aesimc.i
+}
+
+define i32 @test_vsha1h_u32(i32 %hash_e) {
+; CHECK: test_vsha1h_u32:
+; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %sha1h.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1h1.i = tail call <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32> %sha1h.i)
+  %0 = extractelement <1 x i32> %sha1h1.i, i32 0
+  ret i32 %0
+}
+
+define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) {
+; CHECK: test_vsha1su1q_u32:
+; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w12_15)
+  ret <4 x i32> %sha1su12.i
+}
+
+define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) {
+; CHECK: test_vsha256su0q_u32:
+; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7)
+  ret <4 x i32> %sha256su02.i
+}
+
+define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK: test_vsha1cq_u32:
+; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha1c.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1c1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32> %hash_abcd, <1 x i32> %sha1c.i, <4 x i32> %wk)
+  ret <4 x i32> %sha1c1.i
+}
+
+define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK: test_vsha1pq_u32:
+; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha1p.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1p1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32> %hash_abcd, <1 x i32> %sha1p.i, <4 x i32> %wk)
+  ret <4 x i32> %sha1p1.i
+}
+
+define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK: test_vsha1mq_u32:
+; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha1m.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
+  %sha1m1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32> %hash_abcd, <1 x i32> %sha1m.i, <4 x i32> %wk)
+  ret <4 x i32> %sha1m1.i
+}
+
+define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) {
+; CHECK: test_vsha1su0q_u32:
+; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11)
+  ret <4 x i32> %sha1su03.i
+}
+
+define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) {
+; CHECK: test_vsha256hq_u32:
+; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
+  ret <4 x i32> %sha256h3.i
+}
+
+define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) {
+; CHECK: test_vsha256h2q_u32:
+; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
+  ret <4 x i32> %sha256h23.i
+}
+
+define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) {
+; CHECK: test_vsha256su1q_u32:
+; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
+  ret <4 x i32> %sha256su13.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll
new file mode 100644
index 000000000000..f546aa7d3341
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-diagnostics.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK: test_vfma_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x float> %shuffle, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <4 x i32> @test_vshrn_not_match(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_not_match
+; CHECK-NOT: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #35
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 35, i64 35>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll
new file mode 100644
index 000000000000..5c52cd30676a
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-extract.ll
@@ -0,0 +1,190 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vext_s8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i8> %vext
+}
+
+define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vext_s16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+entry:
+  %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i16> %vext
+}
+
+define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vext_s32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+entry:
+  %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i32> %vext
+}
+
+define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK: test_vext_s64:
+entry:
+  %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> <i32 0>
+  ret <1 x i64> %vext
+}
+
+define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vextq_s8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+entry:
+  %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  ret <16 x i8> %vext
+}
+
+define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vextq_s16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %vext
+}
+
+define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vextq_s32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+entry:
+  %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %vext
+}
+
+define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vextq_s64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+entry:
+  %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %vext
+}
+
+define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vext_u8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i8> %vext
+}
+
+define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vext_u16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+entry:
+  %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i16> %vext
+}
+
+define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vext_u32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+entry:
+  %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i32> %vext
+}
+
+define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK: test_vext_u64:
+entry:
+  %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> <i32 0>
+  ret <1 x i64> %vext
+}
+
+define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vextq_u8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+entry:
+  %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  ret <16 x i8> %vext
+}
+
+define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vextq_u16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %vext
+}
+
+define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vextq_u32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+entry:
+  %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %vext
+}
+
+define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vextq_u64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+entry:
+  %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %vext
+}
+
+define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vext_f32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+entry:
+  %vext = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %vext
+}
+
+define <1 x double> @test_vext_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK: test_vext_f64:
+entry:
+  %vext = shufflevector <1 x double> %a, <1 x double> %b, <1 x i32> <i32 0>
+  ret <1 x double> %vext
+}
+
+define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vextq_f32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+entry:
+  %vext = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x float> %vext
+}
+
+define <2 x double> @test_vextq_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vextq_f64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+entry:
+  %vext = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %vext
+}
+
+define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vext_p8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+entry:
+  %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i8> %vext
+}
+
+define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vext_p16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+entry:
+  %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i16> %vext
+}
+
+define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vextq_p8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+entry:
+  %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  ret <16 x i8> %vext
+}
+
+define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vextq_p16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+entry:
+  %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %vext
+}
diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll
new file mode 100644
index 000000000000..146256e4be11
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-facge-facgt.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>)
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>)
+declare <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double>, <2 x double>)
+
+define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: facge_from_intr_v2i32:
+  %val = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %A, <2 x float> %B)
+; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  ret <2 x i32> %val
+}
+define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: facge_from_intr_v4i32:
+  %val = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %A, <4 x float> %B)
+; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  ret <4 x i32> %val
+}
+
+define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: facge_from_intr_v2i64:
+  %val = call <2 x i64> @llvm.aarch64.neon.vacgeq(<2 x double> %A, <2 x double> %B)
+; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+  ret <2 x i64> %val
+}
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>)
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>)
+declare <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double>, <2 x double>)
+
+define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: facgt_from_intr_v2i32:
+  %val = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %A, <2 x float> %B)
+; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  ret <2 x i32> %val
+}
+define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: facgt_from_intr_v4i32:
+  %val = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %A, <4 x float> %B)
+; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  ret <4 x i32> %val
+}
+
+define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: facgt_from_intr_v2i64:
+  %val = call <2 x i64> @llvm.aarch64.neon.vacgtq(<2 x double> %A, <2 x double> %B)
+; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+  ret <2 x i64> %val
+}
+
diff --git a/test/CodeGen/AArch64/neon-fma.ll b/test/CodeGen/AArch64/neon-fma.ll
new file mode 100644
index 000000000000..dcf4e2878068
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-fma.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp1 = fmul <2 x float> %A, %B;
+	%tmp2 = fadd <2 x float> %C, %tmp1;
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp1 = fmul <4 x float> %A, %B;
+	%tmp2 = fadd <4 x float> %C, %tmp1;
+	ret <4 x float> %tmp2
+}
+
+define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
+;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp1 = fmul <2 x double> %A, %B;
+	%tmp2 = fadd <2 x double> %C, %tmp1;
+	ret <2 x double> %tmp2
+}
+
+
+define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp1 = fmul <2 x float> %A, %B;
+	%tmp2 = fsub <2 x float> %C, %tmp1;
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp1 = fmul <4 x float> %A, %B;
+	%tmp2 = fsub <4 x float> %C, %tmp1;
+	ret <4 x float> %tmp2
+}
+
+define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
+;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp1 = fmul <2 x double> %A, %B;
+	%tmp2 = fsub <2 x double> %C, %tmp1;
+	ret <2 x double> %tmp2
+}
+
+
+; Another set of tests for when the intrinsic is used.
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+        %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
+	ret <2 x float> %val
+}
+
+define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+        %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+	ret <4 x float> %val
+}
+
+define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
+;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+        %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
+	ret <2 x double> %val
+}
+
+define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+        %negA = fsub <2 x float> <float -0.0, float -0.0>, %A
+        %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C)
+	ret <2 x float> %val
+}
+
+define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+        %negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A
+        %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C)
+	ret <4 x float> %val
+}
+
+define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
+;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+        %negA = fsub <2 x double> <double -0.0, double -0.0>, %A
+        %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C)
+	ret <2 x double> %val
+}
+
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+        %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
+	ret <2 x float> %val
+}
+
+define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+        %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+	ret <4 x float> %val
+}
+
+define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
+;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+        %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
+	ret <2 x double> %val
+}
diff --git a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll b/test/CodeGen/AArch64/neon-frsqrt-frecp.ll
new file mode 100644
index 000000000000..46fe25d74d9d
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-frsqrt-frecp.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon  | FileCheck %s
+
+; Set of tests for when the intrinsic is used.
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @frsqrts_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: frsqrts v0.2s, v0.2s, v1.2s
+        %val = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+        ret <2 x float> %val
+}
+
+define <4 x float> @frsqrts_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: frsqrts v0.4s, v0.4s, v1.4s
+        %val = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+        ret <4 x float> %val
+}
+
+define <2 x double> @frsqrts_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: frsqrts v0.2d, v0.2d, v1.2d
+        %val = call <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+        ret <2 x double> %val
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @frecps_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: frecps v0.2s, v0.2s, v1.2s
+        %val = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+        ret <2 x float> %val
+}
+
+define <4 x float> @frecps_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: frecps v0.4s, v0.4s, v1.4s
+        %val = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+        ret <4 x float> %val
+}
+
+define <2 x double> @frecps_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: frecps v0.2d, v0.2d, v1.2d
+        %val = call <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+        ret <2 x double> %val
+}
+
diff --git a/test/CodeGen/AArch64/neon-halving-add-sub.ll b/test/CodeGen/AArch64/neon-halving-add-sub.ll
new file mode 100644
index 000000000000..a8f59dbdb0ad
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-halving-add-sub.ll
@@ -0,0 +1,207 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uhadd_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uhadd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_shadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_shadd_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: shadd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uhadd_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uhadd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_shadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_shadd_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: shadd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uhadd_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uhadd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_shadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_shadd_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: shadd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uhadd_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uhadd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_shadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_shadd_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: shadd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uhadd_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uhadd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_shadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_shadd_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: shadd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uhadd_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uhadd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_shadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_shadd_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: shadd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+
+declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uhsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uhsub_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uhsub v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_shsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_shsub_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: shsub v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uhsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uhsub_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uhsub v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_shsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_shsub_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: shsub v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uhsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uhsub_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uhsub v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_shsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_shsub_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: shsub v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uhsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uhsub_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uhsub v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_shsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_shsub_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: shsub v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uhsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uhsub_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uhsub v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_shsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_shsub_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: shsub v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uhsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uhsub_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uhsub v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_shsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_shsub_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: shsub v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
diff --git a/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/test/CodeGen/AArch64/neon-max-min-pairwise.ll
new file mode 100644
index 000000000000..d757aca86a69
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-max-min-pairwise.ll
@@ -0,0 +1,310 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: test_smaxp_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: smaxp v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: umaxp v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_smaxp_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: smaxp v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_umaxp_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: umaxp v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_smaxp_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: smaxp v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_umaxp_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: umaxp v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+
+declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_smaxp_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: smaxp v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_umaxp_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: umaxp v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_smaxp_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: smaxp v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_umaxp_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: umaxp v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_smaxp_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: smaxp v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_umaxp_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: umaxp v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: test_sminp_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sminp v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uminp v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sminp_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sminp v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uminp_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uminp v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sminp_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sminp v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uminp_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uminp v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+
+declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sminp_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sminp v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uminp_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uminp v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sminp_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sminp v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uminp_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uminp v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sminp_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sminp v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uminp_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uminp v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fmaxp_v2f32:
+        %val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fmaxp v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fmaxp_v4f32:
+        %val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fmaxp v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fmaxp_v2f64:
+        %val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fmaxp v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fminp_v2f32:
+        %val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fminp v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fminp_v4f32:
+        %val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fminp v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fminp_v2f64:
+        %val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fminp v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
+declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fmaxnmp_v2f32:
+        %val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fmaxnmp_v4f32:
+        %val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fmaxnmp_v2f64:
+        %val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
+declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fminnmp_v2f32:
+        %val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fminnmp v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fminnmp_v4f32:
+        %val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fminnmp v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fminnmp_v2f64:
+        %val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fminnmp v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
diff --git a/test/CodeGen/AArch64/neon-max-min.ll b/test/CodeGen/AArch64/neon-max-min.ll
new file mode 100644
index 000000000000..7889c77e37f1
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-max-min.ll
@@ -0,0 +1,310 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: test_smax_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: smax v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: umax v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_smax_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: smax v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_umax_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: umax v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_smax_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: smax v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_umax_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: umax v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_smax_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: smax v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_umax_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: umax v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_smax_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: smax v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_umax_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: umax v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_smax_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: smax v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_umax_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: umax v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: test_smin_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: smin v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: umin v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_smin_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: smin v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_umin_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: umin v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_smin_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: smin v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_umin_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: umin v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_smin_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: smin v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_umin_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: umin v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_smin_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: smin v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_umin_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: umin v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_smin_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: smin v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_umin_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: umin v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fmax_v2f32:
+        %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fmax v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fmax_v4f32:
+        %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fmax v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fmax_v2f64:
+        %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fmax v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fmin_v2f32:
+        %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fmin v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fmin_v4f32:
+        %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fmin v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fmin_v2f64:
+        %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fmin v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
+
+declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fmaxnm_v2f32:
+        %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fmaxnm v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fmaxnm_v4f32:
+        %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fmaxnm v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fmaxnm_v2f64:
+        %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fmaxnm v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
+declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fminnm_v2f32:
+        %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fminnm v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fminnm_v4f32:
+        %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fminnm v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fminnm_v2f64:
+        %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fminnm v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
diff --git a/test/CodeGen/AArch64/neon-misc-scalar.ll b/test/CodeGen/AArch64/neon-misc-scalar.ll
new file mode 100644
index 000000000000..cca8deb45cba
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-misc-scalar.ll
@@ -0,0 +1,60 @@
+;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)
+
+declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)
+
+declare <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64>)
+
+declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>)
+
+declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) {
+entry:
+  ; CHECK: test_vuqadd_s64
+  %vuqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+  ; CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vuqadd2.i
+}
+
+define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) {
+entry:
+  ; CHECK: test_vsqadd_u64
+  %vsqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+  ; CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vsqadd2.i
+}
+
+define <1 x i64> @test_vabs_s64(<1 x i64> %a) {
+  ; CHECK: test_vabs_s64
+entry:
+  %vabs1.i = tail call <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64> %a)
+  ; CHECK: abs d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vabs1.i
+}
+
+define <1 x i64> @test_vqabs_s64(<1 x i64> %a) {
+  ; CHECK: test_vqabs_s64
+entry:
+  %vqabs1.i = tail call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %a)
+  ; CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vqabs1.i
+}
+
+define <1 x i64> @test_vqneg_s64(<1 x i64> %a) {
+  ; CHECK: test_vqneg_s64
+entry:
+  %vqneg1.i = tail call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %a)
+  ; CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %vqneg1.i
+}
+
+define <1 x i64> @test_vneg_s64(<1 x i64> %a) {
+  ; CHECK: test_vneg_s64
+entry:
+  %sub.i = sub <1 x i64> zeroinitializer, %a
+  ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
+  ret <1 x i64> %sub.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll
new file mode 100644
index 000000000000..9660bf2c7a30
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-misc.ll
@@ -0,0 +1,1799 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+
+define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
+; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
+; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i16> %shuffle.i
+}
+
+define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
+; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x i32> %shuffle.i
+}
+
+define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x float> %shuffle.i
+}
+
+define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %shuffle.i
+}
+
+define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
+; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x float> %shuffle.i
+}
+
+define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4
+  ret <4 x i16> %vpaddl.i
+}
+
+define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4
+  ret <2 x i32> %vpaddl1.i
+}
+
+define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4
+  ret <1 x i64> %vpaddl1.i
+}
+
+define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4
+  ret <4 x i16> %vpaddl.i
+}
+
+define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4
+  ret <2 x i32> %vpaddl1.i
+}
+
+define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4
+  ret <1 x i64> %vpaddl1.i
+}
+
+define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4
+  ret <8 x i16> %vpaddl.i
+}
+
+define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4
+  ret <4 x i32> %vpaddl1.i
+}
+
+define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
+; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4
+  ret <2 x i64> %vpaddl1.i
+}
+
+define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4
+  ret <8 x i16> %vpaddl.i
+}
+
+define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4
+  ret <4 x i32> %vpaddl1.i
+}
+
+define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
+; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4
+  ret <2 x i64> %vpaddl1.i
+}
+
+define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
+  ret <4 x i16> %vpadal1.i
+}
+
+define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
+  ret <2 x i32> %vpadal2.i
+}
+
+define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
+  ret <1 x i64> %vpadal2.i
+}
+
+define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
+  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
+  ret <4 x i16> %vpadal1.i
+}
+
+define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
+  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
+  ret <2 x i32> %vpadal2.i
+}
+
+define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
+  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
+  ret <1 x i64> %vpadal2.i
+}
+
+define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
+  ret <8 x i16> %vpadal1.i
+}
+
+define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
+  ret <4 x i32> %vpadal2.i
+}
+
+define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
+; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
+  ret <2 x i64> %vpadal2.i
+}
+
+define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
+  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
+  ret <8 x i16> %vpadal1.i
+}
+
+define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
+  ret <4 x i32> %vpadal2.i
+}
+
+define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
+; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
+  ret <2 x i64> %vpadal2.i
+}
+
+define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vqabs.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vqabs.i
+}
+
+define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vqabs.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vqabs.i
+}
+
+define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vqabs1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vqabs1.i
+}
+
+define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vqabs1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vqabs1.i
+}
+
+define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vqabs1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vqabs1.i
+}
+
+define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vqabs1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vqabs1.i
+}
+
+define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
+; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vqabs1.i = tail call <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64> %a) #4
+  ret <2 x i64> %vqabs1.i
+}
+
+define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vqneg.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vqneg.i
+}
+
+define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vqneg.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vqneg.i
+}
+
+define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vqneg1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vqneg1.i
+}
+
+define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vqneg1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vqneg1.i
+}
+
+define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vqneg1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vqneg1.i
+}
+
+define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vqneg1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vqneg1.i
+}
+
+define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
+; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vqneg1.i = tail call <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64> %a) #4
+  ret <2 x i64> %vqneg1.i
+}
+
+define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %sub.i = sub <8 x i8> zeroinitializer, %a
+  ret <8 x i8> %sub.i
+}
+
+define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %sub.i = sub <16 x i8> zeroinitializer, %a
+  ret <16 x i8> %sub.i
+}
+
+define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %sub.i = sub <4 x i16> zeroinitializer, %a
+  ret <4 x i16> %sub.i
+}
+
+define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %sub.i = sub <8 x i16> zeroinitializer, %a
+  ret <8 x i16> %sub.i
+}
+
+define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %sub.i = sub <2 x i32> zeroinitializer, %a
+  ret <2 x i32> %sub.i
+}
+
+define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %sub.i = sub <4 x i32> zeroinitializer, %a
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
+; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %sub.i = sub <2 x i64> zeroinitializer, %a
+  ret <2 x i64> %sub.i
+}
+
+define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
+; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
+  ret <2 x float> %sub.i
+}
+
+define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
+; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  ret <4 x float> %sub.i
+}
+
+define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
+; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
+  ret <2 x double> %sub.i
+}
+
+define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vabs.i
+}
+
+define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vabs.i
+}
+
+define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vabs1.i
+}
+
+define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vabs1.i
+}
+
+define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vabs1.i
+}
+
+define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vabs1.i
+}
+
+define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
+; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vabs1.i = tail call <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64> %a) #4
+  ret <2 x i64> %vabs1.i
+}
+
+define <2 x float> @test_vabs_f32(<2 x float> %a) #1 {
+; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vabs1.i
+}
+
+define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 {
+; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vabs1.i
+}
+
+define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 {
+; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vabs1.i
+}
+
+define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vuqadd.i = tail call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
+  ret <8 x i8> %vuqadd.i
+}
+
+define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vuqadd.i = tail call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
+  ret <16 x i8> %vuqadd.i
+}
+
+define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vuqadd2.i = tail call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4
+  ret <4 x i16> %vuqadd2.i
+}
+
+define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vuqadd2.i = tail call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4
+  ret <8 x i16> %vuqadd2.i
+}
+
+define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vuqadd2.i = tail call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4
+  ret <2 x i32> %vuqadd2.i
+}
+
+define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vuqadd2.i = tail call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4
+  ret <4 x i32> %vuqadd2.i
+}
+
+define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vuqadd2.i = tail call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4
+  ret <2 x i64> %vuqadd2.i
+}
+
+define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vcls.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vcls.i
+}
+
+define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vcls.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vcls.i
+}
+
+define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vcls1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4
+  ret <4 x i16> %vcls1.i
+}
+
+define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vcls1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4
+  ret <8 x i16> %vcls1.i
+}
+
+define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcls1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vcls1.i
+}
+
+define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
+; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcls1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vcls1.i
+}
+
+define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
+  ret <8 x i8> %vclz.i
+}
+
+define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
+  ret <16 x i8> %vclz.i
+}
+
+define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4
+  ret <4 x i16> %vclz1.i
+}
+
+define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4
+  ret <8 x i16> %vclz1.i
+}
+
+define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4
+  ret <2 x i32> %vclz1.i
+}
+
+define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
+; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4
+  ret <4 x i32> %vclz1.i
+}
+
+define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
+; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vctpop.i
+}
+
+define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
+; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vctpop.i
+}
+
+define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
+; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %neg.i = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ret <8 x i8> %neg.i
+}
+
+define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
+; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %neg.i = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ret <16 x i8> %neg.i
+}
+
+define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
+; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %neg.i = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+  ret <4 x i16> %neg.i
+}
+
+define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
+; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %neg.i = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ret <8 x i16> %neg.i
+}
+
+define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
+; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %neg.i = xor <2 x i32> %a, <i32 -1, i32 -1>
+  ret <2 x i32> %neg.i
+}
+
+define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
+; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %neg.i = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %neg.i
+}
+
+define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
+; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %vrbit.i = tail call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #4
+  ret <8 x i8> %vrbit.i
+}
+
+define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
+; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+  %vrbit.i = tail call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #4
+  ret <16 x i8> %vrbit.i
+}
+
+define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
+; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vmovn.i = trunc <8 x i16> %a to <8 x i8>
+  ret <8 x i8> %vmovn.i
+}
+
+define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
+; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vmovn.i = trunc <4 x i32> %a to <4 x i16>
+  ret <4 x i16> %vmovn.i
+}
+
+define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
+; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vmovn.i = trunc <2 x i64> %a to <2 x i32>
+  ret <2 x i32> %vmovn.i
+}
+
+define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vmovn.i.i = trunc <8 x i16> %b to <8 x i8>
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vmovn.i.i = trunc <4 x i32> %b to <4 x i16>
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vmovn.i.i = trunc <2 x i64> %b to <2 x i32>
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
+; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vqdmull1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4
+  ret <8 x i8> %vqdmull1.i
+}
+
+define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
+; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vqdmull1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4
+  ret <4 x i16> %vqdmull1.i
+}
+
+define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
+; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vqdmull1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4
+  ret <2 x i32> %vqdmull1.i
+}
+
+define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vqdmull1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %b) #4
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vqdmull1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vqdmull1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %b) #4
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
+; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4
+  ret <8 x i8> %vqmovn1.i
+}
+
+define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
+; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4
+  ret <4 x i16> %vqmovn1.i
+}
+
+define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
+; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4
+  ret <2 x i32> %vqmovn1.i
+}
+
+define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %b) #4
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: test_vqmovn_high_s32
+  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: test_vqmovn_high_s64
+  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %b) #4
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
+; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
+  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4
+  ret <8 x i8> %vqmovn1.i
+}
+
+define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
+; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4
+  ret <4 x i16> %vqmovn1.i
+}
+
+define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
+; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4
+  ret <2 x i32> %vqmovn1.i
+}
+
+define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
+; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
+  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %b) #4
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle.i
+}
+
+define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
+; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
+; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %b) #4
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle.i
+}
+
+define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
+  %1 = sext <8 x i8> %a to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
+  %1 = sext <4 x i16> %a to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
+  %1 = sext <2 x i32> %a to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
+  %1 = zext <8 x i8> %a to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
+  %1 = zext <4 x i16> %a to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
+; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
+  %1 = zext <2 x i32> %a to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i to <8 x i16>
+  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %vshll_n
+}
+
+define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i to <4 x i32>
+  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vshll_n
+}
+
+define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
+; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i to <2 x i64>
+  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
+  ret <2 x i64> %vshll_n
+}
+
+define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 {
+; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vcvt1.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4
+  ret <4 x i16> %vcvt1.i
+}
+
+define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 {
+; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
+  %vcvt1.i.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %b) #4
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle.i
+}
+
+define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 {
+; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h
+  %vcvt1.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %a) #4
+  ret <4 x float> %vcvt1.i
+}
+
+define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 {
+; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vcvt1.i.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4
+  ret <4 x float> %vcvt1.i.i
+}
+
+define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
+; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvt.i = fptrunc <2 x double> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vcvt.i.i = fptrunc <2 x double> %b to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
+; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvtx_f32_f641.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4
+  ret <2 x float> %vcvtx_f32_f641.i
+}
+
+define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
+  %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
+; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
+  %vcvt.i = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
+; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
+  %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double>
+  ret <2 x double> %vcvt.i.i
+}
+
+define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
+; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndn1.i = tail call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndn1.i
+}
+
+define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
+; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndn1.i = tail call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndn1.i
+}
+
+define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
+; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndn1.i = tail call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndn1.i
+}
+
+define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
+; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrnda1.i
+}
+
+define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
+; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+   %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrnda1.i
+}
+
+define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
+; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrnda1.i
+}
+
+define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
+; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndp1.i
+}
+
+define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
+; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+ %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndp1.i
+}
+
+define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
+; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndp1.i
+}
+
+define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
+; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndm1.i
+}
+
+define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
+; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndm1.i
+}
+
+define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
+; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+   %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndm1.i
+}
+
+define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
+; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndx1.i
+}
+
+define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
+; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndx1.i
+}
+
+define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
+; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndx1.i
+}
+
+define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
+; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+   %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrnd1.i
+}
+
+define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
+; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrnd1.i
+}
+
+define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
+; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrnd1.i
+}
+
+define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
+; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrndi1.i
+}
+
+define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
+; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrndi1.i
+}
+
+define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
+; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrndi1.i
+}
+
+define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = fptosi <2 x float> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = fptosi <4 x float> %a to <4 x i32>
+  ret <4 x i32> %vcvt.i
+}
+
+define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = fptosi <2 x double> %a to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = fptoui <2 x float> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = fptoui <4 x float> %a to <4 x i32>
+  ret <4 x i32> %vcvt.i
+}
+
+define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = fptoui <2 x double> %a to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtns_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtns_f321.i
+}
+
+define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtns_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtns_f321.i
+}
+
+define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtns_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtns_f641.i
+}
+
+define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtnu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtnu_f321.i
+}
+
+define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtnu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtnu_f321.i
+}
+
+define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtnu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtnu_f641.i
+}
+
+define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtps_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtps_f321.i
+}
+
+define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtps_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtps_f321.i
+}
+
+define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtps_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtps_f641.i
+}
+
+define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtpu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtpu_f321.i
+}
+
+define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtpu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtpu_f321.i
+}
+
+define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtpu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtpu_f641.i
+}
+
+define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtms_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtms_f321.i
+}
+
+define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtms_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtms_f321.i
+}
+
+define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtms_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtms_f641.i
+}
+
+define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtmu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtmu_f321.i
+}
+
+define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtmu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtmu_f321.i
+}
+
+define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtmu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtmu_f641.i
+}
+
+define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtas_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtas_f321.i
+}
+
+define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtas_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtas_f321.i
+}
+
+define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtas_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtas_f641.i
+}
+
+define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
+; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvtau_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #4
+  ret <2 x i32> %vcvtau_f321.i
+}
+
+define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
+; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvtau_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #4
+  ret <4 x i32> %vcvtau_f321.i
+}
+
+define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 {
+; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvtau_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #4
+  ret <2 x i64> %vcvtau_f641.i
+}
+
+define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
+; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrsqrte1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrsqrte1.i
+}
+
+define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
+; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrsqrte1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrsqrte1.i
+}
+
+define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
+; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrsqrte1.i = tail call <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrsqrte1.i
+}
+
+define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
+; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrecpe1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vrecpe1.i
+}
+
+define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
+; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrecpe1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vrecpe1.i
+}
+
+define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
+; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vrecpe1.i = tail call <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vrecpe1.i
+}
+
+define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
+; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vrecpe1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4
+  ret <2 x i32> %vrecpe1.i
+}
+
+define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
+; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vrecpe1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4
+  ret <4 x i32> %vrecpe1.i
+}
+
+define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
+; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4
+  ret <2 x float> %vsqrt1.i
+}
+
+define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
+; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4
+  ret <4 x float> %vsqrt1.i
+}
+
+define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
+; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4
+  ret <2 x double> %vsqrt1.i
+}
+
+define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
+; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = sitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
+; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
+  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
+; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = sitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
+; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
+; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
+; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2
+
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) #2
+
+declare <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) #2
+
+declare <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) #2
+
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.round.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.round.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.round.v2f32(<2 x float>) #3
+
+declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) #2
+
+declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2
+
+declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
+
+declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2
+
+declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) #2
+
+declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) #2
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2
+
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2
+
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2
+
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2
+
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2
+
+declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2
+
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2
+
+declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2
+
+declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2
+
+declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2
+
+declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2
+
+declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2
+
+declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2
+
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3
+
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3
+
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3
+
+declare <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64>) #2
+
+declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64>) #2
+
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64>) #2
+
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) #2
+
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) #2
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) #2
+
+declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) #2
+
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) #2
+
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) #2
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) #2
+
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) #2
+
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) #2
+
+declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2
+
+declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2
+
+
+define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fptosi <1 x double> %a to <1 x i64>
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fptoui <1 x double> %a to <1 x i64>
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtn_s64_f64
+; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtn_u64_f64
+; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtp_s64_f64
+; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtp_u64_f64
+; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtm_s64_f64
+; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvtm_u64_f64
+; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvta_s64_f64
+; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvta_u64_f64
+; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
+  ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = sitofp <1 x i64> %a to <1 x double>
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = uitofp <1 x i64> %a to <1 x double>
+  ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+
+define <1 x double> @test_vrndn_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndn_f64
+; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrnda_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrnda_f64
+; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndp_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndp_f64
+; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndm_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndm_f64
+; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndx_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndx_f64
+; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrnd_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrnd_f64
+; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrndi_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrndi_f64
+; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
+declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
+declare <1 x double> @llvm.rint.v1f64(<1 x double>)
+declare <1 x double> @llvm.floor.v1f64(<1 x double>)
+declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
+declare <1 x double> @llvm.round.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>)
+
+define <1 x double> @test_vrsqrte_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrsqrte_f64
+; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrecpe_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vrecpe_f64
+; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vsqrt_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vsqrt_f64
+; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vrecps_f64
+; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vrsqrts_f64
+; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
+declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
+declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-mla-mls.ll b/test/CodeGen/AArch64/neon-mla-mls.ll
new file mode 100644
index 000000000000..23e9223a8b7b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-mla-mls.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+
+define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
+;CHECK: mla {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = mul <8 x i8> %A, %B;
+	%tmp2 = add <8 x i8> %C, %tmp1;
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
+;CHECK: mla {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = mul <16 x i8> %A, %B;
+	%tmp2 = add <16 x i8> %C, %tmp1;
+	ret <16 x i8> %tmp2
+}
+
+define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
+;CHECK: mla {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+	%tmp1 = mul <4 x i16> %A, %B;
+	%tmp2 = add <4 x i16> %C, %tmp1;
+	ret <4 x i16> %tmp2
+}
+
+define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
+;CHECK: mla {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+	%tmp1 = mul <8 x i16> %A, %B;
+	%tmp2 = add <8 x i16> %C, %tmp1;
+	ret <8 x i16> %tmp2
+}
+
+define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+;CHECK: mla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp1 = mul <2 x i32> %A, %B;
+	%tmp2 = add <2 x i32> %C, %tmp1;
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
+;CHECK: mla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp1 = mul <4 x i32> %A, %B;
+	%tmp2 = add <4 x i32> %C, %tmp1;
+	ret <4 x i32> %tmp2
+}
+
+define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
+;CHECK: mls {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp1 = mul <8 x i8> %A, %B;
+	%tmp2 = sub <8 x i8> %C, %tmp1;
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
+;CHECK: mls {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp1 = mul <16 x i8> %A, %B;
+	%tmp2 = sub <16 x i8> %C, %tmp1;
+	ret <16 x i8> %tmp2
+}
+
+define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) {
+;CHECK: mls {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+	%tmp1 = mul <4 x i16> %A, %B;
+	%tmp2 = sub <4 x i16> %C, %tmp1;
+	ret <4 x i16> %tmp2
+}
+
+define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) {
+;CHECK: mls {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+	%tmp1 = mul <8 x i16> %A, %B;
+	%tmp2 = sub <8 x i16> %C, %tmp1;
+	ret <8 x i16> %tmp2
+}
+
+define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+;CHECK: mls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp1 = mul <2 x i32> %A, %B;
+	%tmp2 = sub <2 x i32> %C, %tmp1;
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) {
+;CHECK: mls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp1 = mul <4 x i32> %A, %B;
+	%tmp2 = sub <4 x i32> %C, %tmp1;
+	ret <4 x i32> %tmp2
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll
new file mode 100644
index 000000000000..60b13b8b9a0e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-mov.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @movi8b() {
+;CHECK:  movi {{v[0-31]+}}.8b, #0x8
+   ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <16 x i8> @movi16b() {
+;CHECK:  movi {{v[0-31]+}}.16b, #0x8
+   ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <2 x i32> @movi2s_lsl0() {
+;CHECK:  movi {{v[0-31]+}}.2s, #0xff
+   ret <2 x i32> < i32 255, i32 255 >
+}
+
+define <2 x i32> @movi2s_lsl8() {
+;CHECK:  movi {{v[0-31]+}}.2s, #0xff, lsl #8
+   ret <2 x i32> < i32 65280, i32 65280 >
+}
+
+define <2 x i32> @movi2s_lsl16() {
+;CHECK:  movi {{v[0-31]+}}.2s, #0xff, lsl #16
+   ret <2 x i32> < i32 16711680, i32 16711680 >
+
+}
+
+define <2 x i32> @movi2s_lsl24() {
+;CHECK:  movi {{v[0-31]+}}.2s, #0xff, lsl #24
+   ret <2 x i32> < i32 4278190080, i32 4278190080 >
+}
+
+define <4 x i32> @movi4s_lsl0() {
+;CHECK:  movi {{v[0-31]+}}.4s, #0xff
+   ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 >
+}
+
+define <4 x i32> @movi4s_lsl8() {
+;CHECK:  movi {{v[0-31]+}}.4s, #0xff, lsl #8
+   ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 >
+}
+
+define <4 x i32> @movi4s_lsl16() {
+;CHECK:  movi {{v[0-31]+}}.4s, #0xff, lsl #16
+   ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 >
+
+}
+
+define <4 x i32> @movi4s_lsl24() {
+;CHECK:  movi {{v[0-31]+}}.4s, #0xff, lsl #24
+   ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 >
+}
+
+define <4 x i16> @movi4h_lsl0() {
+;CHECK:  movi {{v[0-31]+}}.4h, #0xff
+   ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 >
+}
+
+define <4 x i16> @movi4h_lsl8() {
+;CHECK:  movi {{v[0-31]+}}.4h, #0xff, lsl #8
+   ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 >
+}
+
+define <8 x i16> @movi8h_lsl0() {
+;CHECK:  movi {{v[0-31]+}}.8h, #0xff
+   ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
+}
+
+define <8 x i16> @movi8h_lsl8() {
+;CHECK:  movi {{v[0-31]+}}.8h, #0xff, lsl #8
+   ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
+}
+
+
+define <2 x i32> @mvni2s_lsl0() {
+;CHECK:  mvni {{v[0-31]+}}.2s, #0x10
+   ret <2 x i32> < i32 4294967279, i32 4294967279 >
+}
+
+define <2 x i32> @mvni2s_lsl8() {
+;CHECK:  mvni {{v[0-31]+}}.2s, #0x10, lsl #8
+   ret <2 x i32> < i32 4294963199, i32 4294963199 >
+}
+
+define <2 x i32> @mvni2s_lsl16() {
+;CHECK:  mvni {{v[0-31]+}}.2s, #0x10, lsl #16
+   ret <2 x i32> < i32 4293918719, i32 4293918719 >
+}
+
+define <2 x i32> @mvni2s_lsl24() {
+;CHECK:  mvni {{v[0-31]+}}.2s, #0x10, lsl #24
+   ret <2 x i32> < i32 4026531839, i32 4026531839 >
+}
+
+define <4 x i32> @mvni4s_lsl0() {
+;CHECK:  mvni {{v[0-31]+}}.4s, #0x10
+   ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
+}
+
+define <4 x i32> @mvni4s_lsl8() {
+;CHECK:  mvni {{v[0-31]+}}.4s, #0x10, lsl #8
+   ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 >
+}
+
+define <4 x i32> @mvni4s_lsl16() {
+;CHECK:  mvni {{v[0-31]+}}.4s, #0x10, lsl #16
+   ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 >
+
+}
+
+define <4 x i32> @mvni4s_lsl24() {
+;CHECK:  mvni {{v[0-31]+}}.4s, #0x10, lsl #24
+   ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 >
+}
+
+
+define <4 x i16> @mvni4h_lsl0() {
+;CHECK:  mvni {{v[0-31]+}}.4h, #0x10
+   ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
+}
+
+define <4 x i16> @mvni4h_lsl8() {
+;CHECK:  mvni {{v[0-31]+}}.4h, #0x10, lsl #8
+   ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
+}
+
+define <8 x i16> @mvni8h_lsl0() {
+;CHECK:  mvni {{v[0-31]+}}.8h, #0x10
+   ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 >
+}
+
+define <8 x i16> @mvni8h_lsl8() {
+;CHECK:  mvni {{v[0-31]+}}.8h, #0x10, lsl #8
+   ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 >
+}
+
+
+define <2 x i32> @movi2s_msl8(<2 x i32> %a) {
+;CHECK:  movi {{v[0-31]+}}.2s, #0xff, msl #8
+	ret <2 x i32> < i32 65535, i32 65535 >
+}
+
+define <2 x i32> @movi2s_msl16() {
+;CHECK:  movi {{v[0-31]+}}.2s, #0xff, msl #16
+   ret <2 x i32> < i32 16777215, i32 16777215 >
+}
+
+
+define <4 x i32> @movi4s_msl8() {
+;CHECK:  movi {{v[0-31]+}}.4s, #0xff, msl #8
+   ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 >
+}
+
+define <4 x i32> @movi4s_msl16() {
+;CHECK:  movi {{v[0-31]+}}.4s, #0xff, msl #16
+   ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 >
+}
+
+define <2 x i32> @mvni2s_msl8() {
+;CHECK:  mvni {{v[0-31]+}}.2s, #0x10, msl #8
+   ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264>
+}
+
+define <2 x i32> @mvni2s_msl16() {
+;CHECK:  mvni {{v[0-31]+}}.2s, #0x10, msl #16
+   ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504>
+}
+
+define <4 x i32> @mvni4s_msl8() {
+;CHECK:  mvni {{v[0-31]+}}.4s, #0x10, msl #8
+   ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264>
+}
+
+define <4 x i32> @mvni4s_msl16() {
+;CHECK:  mvni {{v[0-31]+}}.4s, #0x10, msl #16
+   ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504>
+}
+
+define <2 x i64> @movi2d() {
+;CHECK: movi {{v[0-31]+}}.2d, #0xff0000ff0000ffff
+	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
+}
+
+define <1 x i64> @movid() {
+;CHECK: movi {{d[0-31]+}}, #0xff0000ff0000ffff
+	ret  <1 x i64> < i64 18374687574888349695 >
+}
+
+define <2 x float> @fmov2s() {
+;CHECK:  fmov {{v[0-31]+}}.2s, #-12.00000000
+	ret <2 x float> < float -1.2e1, float -1.2e1>
+}
+
+define <4 x float> @fmov4s() {
+;CHECK:  fmov {{v[0-31]+}}.4s, #-12.00000000
+	ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1>
+}
+
+define <2 x double> @fmov2d() {
+;CHECK:  fmov {{v[0-31]+}}.2d, #-12.00000000
+	ret <2 x double> < double -1.2e1, double -1.2e1>
+}
+
+define <2 x i32> @movi1d_1() {
+; CHECK: movi    d0, #0xffffffff0000
+  ret <2 x i32> < i32  -65536, i32 65535>
+}
+
+
+declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>)
+define <2 x i32> @movi1d() {
+; CHECK: movi     d1, #0xffffffff0000
+  %1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
+  ret <2 x i32> %1
+}
+
diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll
new file mode 100644
index 000000000000..e1be31326638
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-mul-div.ll
@@ -0,0 +1,181 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+
+define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: mul {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+	%tmp3 = mul <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: mul {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+	%tmp3 = mul <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: mul {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+	%tmp3 = mul <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: mul {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+	%tmp3 = mul <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: mul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp3 = mul <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: mul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp3 = mul <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+ define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fmul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp3 = fmul <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fmul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp3 = fmul <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: fmul {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp3 = fmul <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+
+ define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fdiv {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+	%tmp3 = fdiv <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fdiv {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+	%tmp3 = fdiv <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: fdiv {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+	%tmp3 = fdiv <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>)
+declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>)
+
+define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: poly_mulv8i8:
+   %prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: pmul v0.8b, v0.8b, v1.8b
+   ret <8 x i8> %prod
+}
+
+define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: poly_mulv16i8:
+   %prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: pmul v0.16b, v0.16b, v1.16b
+   ret <16 x i8> %prod
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sqdmulh_v4i16:
+   %prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
+   ret <4 x i16> %prod
+}
+
+define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sqdmulh_v8i16:
+   %prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
+   ret <8 x i16> %prod
+}
+
+define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sqdmulh_v2i32:
+   %prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
+   ret <2 x i32> %prod
+}
+
+define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sqdmulh_v4i32:
+   %prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
+   ret <4 x i32> %prod
+}
+
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sqrdmulh_v4i16:
+   %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
+   ret <4 x i16> %prod
+}
+
+define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sqrdmulh_v8i16:
+   %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
+   ret <8 x i16> %prod
+}
+
+define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sqrdmulh_v2i32:
+   %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
+   ret <2 x i32> %prod
+}
+
+define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sqrdmulh_v4i32:
+   %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
+   ret <4 x i32> %prod
+}
+
+declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: fmulx v0.2s, v0.2s, v1.2s
+        %val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+        ret <2 x float> %val
+}
+
+define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: fmulx v0.4s, v0.4s, v1.4s
+        %val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+        ret <4 x float> %val
+}
+
+define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: fmulx v0.2d, v0.2d, v1.2d
+        %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+        ret <2 x double> %val
+}
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
new file mode 100644
index 000000000000..fa4d54dc745f
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -0,0 +1,1693 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.uint8x8x2_t = type { [2 x <8 x i8>] }
+%struct.uint16x4x2_t = type { [2 x <4 x i16>] }
+%struct.uint32x2x2_t = type { [2 x <2 x i32>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.poly8x8x2_t = type { [2 x <8 x i8>] }
+%struct.poly16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
+%struct.uint16x8x2_t = type { [2 x <8 x i16>] }
+%struct.uint32x4x2_t = type { [2 x <4 x i32>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
+%struct.poly16x8x2_t = type { [2 x <8 x i16>] }
+
+define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp1q_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp1q_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp1_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp1q_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp1_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp1q_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_s8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_s8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_s16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_s16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_s32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_u8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_u8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_u16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_u16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzp2q_u32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vuzp2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzp2q_f32:
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vuzp2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp2_p8:
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzp2q_p8:
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp2_p16:
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzp2q_p16:
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip1q_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip1q_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip1_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip1q_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip1_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip1q_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_s8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_s8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_s16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_s16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_s32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_u8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_u8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_u16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_u16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzip2q_u32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vzip2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzip2q_f32:
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vzip2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip2_p8:
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzip2q_p8:
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip2_p16:
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzip2q_p16:
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_s64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn1_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn1q_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn1q_u64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn1_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn1q_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn1q_f64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn1_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn1q_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn1_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn1q_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i16> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_s8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_s8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_s16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_s16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_s32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_s32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_s64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_u8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_u8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_u16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_u16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn2_u32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrn2q_u32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i32> %shuffle.i
+}
+
+define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vtrn2q_u64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle.i
+}
+
+define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn2_f32:
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrn2q_f32:
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x float> %shuffle.i
+}
+
+define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
+; CHECK: test_vtrn2q_f64:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn2_p8:
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrn2q_p8:
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ret <16 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn2_p16:
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrn2q_p16:
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i16> %shuffle.i
+}
+
+define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_s8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_s16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_u8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_u16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vuzp_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vuzp_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vuzp1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vuzp_p8:
+; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vuzp_p16:
+; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_s8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_s16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_s32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_u8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_u16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vuzpq_u32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vuzpq_f32:
+; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vuzp.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %vuzp1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vuzp1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vuzpq_p8:
+; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vuzpq_p16:
+; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_s8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_s16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_u8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_u16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vzip_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vzip_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vzip1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vzip_p8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vzip_p16:
+; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_s8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_s16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_s32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_u8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_u16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vzipq_u32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vzipq_f32:
+; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vzip.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  %vzip1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vzip1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vzipq_p8:
+; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vzipq_p16:
+; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vzip.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_s8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_s16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_s32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_u8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_u16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.uint16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vtrn_u32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1
+  ret %struct.uint32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
+; CHECK: test_vtrn_f32:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+entry:
+  %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
+  %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vtrn1.i, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtrn_p8:
+; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1
+  ret %struct.poly8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vtrn_p16:
+; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1
+  ret %struct.poly16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_s8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_s16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_s32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_u8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.uint8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_u16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.uint16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vtrnq_u32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1
+  ret %struct.uint32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) {
+; CHECK: test_vtrnq_f32:
+; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vtrn.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vtrn1.i, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vtrnq_p8:
+; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1
+  ret %struct.poly8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vtrnq_p16:
+; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
+  ret %struct.poly16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
+; CHECK: test_uzp:
+
+  %vuzp.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %vuzp1.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+  ret %struct.uint8x8x2_t %.fca.0.1.insert
+
+; CHECK: dup	{{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: uzp1	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: uzp2	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
diff --git a/test/CodeGen/AArch64/neon-rounding-halving-add.ll b/test/CodeGen/AArch64/neon-rounding-halving-add.ll
new file mode 100644
index 000000000000..009da3b51a83
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-rounding-halving-add.ll
@@ -0,0 +1,105 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_urhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_urhadd_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: urhadd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_srhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_srhadd_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: srhadd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_urhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_urhadd_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: urhadd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_srhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_srhadd_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: srhadd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_urhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_urhadd_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: urhadd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_srhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_srhadd_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: srhadd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_urhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_urhadd_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: urhadd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_srhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_srhadd_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: srhadd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_urhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_urhadd_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: urhadd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_srhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_srhadd_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: srhadd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_urhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_urhadd_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: urhadd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_srhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_srhadd_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: srhadd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll
new file mode 100644
index 000000000000..5b4ec2862c79
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-rounding-shift.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_urshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_urshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: urshl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_srshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_srshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: srshl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_urshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_urshl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: urshl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_srshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_srshl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: srshl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_urshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_urshl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: urshl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_srshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_srshl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: srshl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_urshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_urshl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: urshl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_srshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_srshl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: srshl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_urshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_urshl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: urshl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_srshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_srshl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: srshl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_urshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_urshl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: urshl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_srshl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: srshl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_urshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_urshl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: urshl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @test_srshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_srshl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: srshl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
new file mode 100644
index 000000000000..fc60d900e4db
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
@@ -0,0 +1,241 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uqadd_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uqadd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_sqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_sqadd_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sqadd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uqadd_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uqadd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_sqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sqadd_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sqadd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uqadd_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uqadd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_sqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sqadd_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqadd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uqadd_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uqadd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_sqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sqadd_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqadd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uqadd_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uqadd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_sqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sqadd_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqadd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uqadd_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uqadd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sqadd_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqadd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+
+
+declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_uqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_uqadd_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: uqadd v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @test_sqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_sqadd_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: sqadd v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uqsub_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uqsub v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_sqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_sqsub_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sqsub v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uqsub_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uqsub v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_sqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sqsub_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sqsub v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uqsub_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uqsub v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_sqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sqsub_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqsub v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uqsub_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uqsub v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_sqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sqsub_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqsub v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uqsub_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uqsub v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_sqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sqsub_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqsub v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uqsub_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uqsub v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_sqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sqsub_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqsub v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_uqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_uqsub_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: uqsub v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_sqsub_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: sqsub v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
new file mode 100644
index 000000000000..d89262c2abaa
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uqrshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uqrshl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_sqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_sqrshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sqrshl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uqrshl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uqrshl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_sqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sqrshl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sqrshl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uqrshl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uqrshl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_sqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sqrshl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqrshl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uqrshl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uqrshl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_sqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sqrshl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqrshl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uqrshl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uqrshl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_sqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sqrshl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqrshl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uqrshl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uqrshl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sqrshl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqrshl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_uqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_uqrshl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: uqrshl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @test_sqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_sqrshl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: sqrshl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll
new file mode 100644
index 000000000000..11009fba7511
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-saturating-shift.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uqshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uqshl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_sqshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sqshl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uqshl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uqshl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_sqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sqshl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sqshl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uqshl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uqshl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_sqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sqshl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqshl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uqshl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uqshl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_sqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sqshl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqshl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uqshl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uqshl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_sqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sqshl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqshl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uqshl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uqshl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sqshl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqshl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_uqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_uqshl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: uqshl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @test_sqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_sqshl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: sqshl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-abs.ll b/test/CodeGen/AArch64/neon-scalar-abs.ll
new file mode 100644
index 000000000000..03a89e043e50
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-abs.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define i64 @test_vabsd_s64(i64 %a) {
+; CHECK: test_vabsd_s64
+; CHECK: abs {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vabs1.i = tail call <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64> %vabs.i)
+  %0 = extractelement <1 x i64> %vabs1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64>)
+
+define i8 @test_vqabsb_s8(i8 %a) {
+; CHECK: test_vqabsb_s8
+; CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vqabs1.i = call <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8> %vqabs.i)
+  %0 = extractelement <1 x i8> %vqabs1.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8>)
+
+define i16 @test_vqabsh_s16(i16 %a) {
+; CHECK: test_vqabsh_s16
+; CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqabs1.i = call <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16> %vqabs.i)
+  %0 = extractelement <1 x i16> %vqabs1.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16>)
+
+define i32 @test_vqabss_s32(i32 %a) {
+; CHECK: test_vqabss_s32
+; CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqabs1.i = call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %vqabs.i)
+  %0 = extractelement <1 x i32> %vqabs1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>)
+
+define i64 @test_vqabsd_s64(i64 %a) {
+; CHECK: test_vqabsd_s64
+; CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqabs1.i = call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %vqabs.i)
+  %0 = extractelement <1 x i64> %vqabs1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
new file mode 100644
index 000000000000..09ca880c8053
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+	%tmp3 = add <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+	%tmp3 = sub <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_add_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uadd_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_usub_v1i64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
new file mode 100644
index 000000000000..8ce42def409a
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+
+define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmla_ss4S
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmla_ss4S_swap
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
+  ; CHECK: test_fmla_ss2S
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %tmp2
+}
+
+define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
+  ; CHECK: test_fmla_ddD
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmla_dd2D
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmla_dd2D_swap
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
+  ret double %tmp2
+}
+
+define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmls_ss4S
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fsub float -0.0, %tmp1
+  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
+  ret float %tmp3
+}
+
+define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
+  ; CHECK: test_fmls_ss4S_swap
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fsub float -0.0, %tmp1
+  %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
+  ret float %tmp3
+}
+
+
+define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
+  ; CHECK: test_fmls_ss2S
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fsub float -0.0, %tmp1
+  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
+  ret float %tmp3
+}
+
+define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
+  ; CHECK: test_fmls_ddD
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = fsub double -0.0, %tmp1
+  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
+  ret double %tmp3
+}
+
+define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmls_dd2D
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fsub double -0.0, %tmp1
+  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
+  ret double %tmp3
+}
+
+define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
+  ; CHECK: test_fmls_dd2D_swap
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fsub double -0.0, %tmp1
+  %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
+  ret double %tmp3
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
new file mode 100644
index 000000000000..968ad3e8cf71
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
+  ; CHECK: test_fmul_lane_ss2S
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fmul float %a, %tmp1;
+  ret float %tmp2;
+}
+
+define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
+  ; CHECK: test_fmul_lane_ss2S_swap
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fmul float %tmp1, %a;
+  ret float %tmp2;
+}
+
+
+define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
+  ; CHECK: test_fmul_lane_ss4S
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fmul float %a, %tmp1;
+  ret float %tmp2;
+}
+
+define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
+  ; CHECK: test_fmul_lane_ss4S_swap
+  ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fmul float %tmp1, %a;
+  ret float %tmp2;
+}
+
+
+define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
+  ; CHECK: test_fmul_lane_ddD
+  ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = fmul double %a, %tmp1;
+  ret double %tmp2;
+}
+
+
+
+define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
+  ; CHECK: test_fmul_lane_dd2D
+  ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fmul double %a, %tmp1;
+  ret double %tmp2;
+}
+
+
+define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
+  ; CHECK: test_fmul_lane_dd2D_swap
+  ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fmul double %tmp1, %a;
+  ret double %tmp2;
+}
+
+declare float @llvm.aarch64.neon.vmulx.f32(float, float)
+
+define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
+  ; CHECK: test_fmulx_lane_f32
+  ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
+  ret float %tmp2;
+}
+
+define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
+  ; CHECK: test_fmulx_laneq_f32
+  ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
+  ret float %tmp2;
+}
+
+define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
+  ; CHECK: test_fmulx_laneq_f32_swap
+  ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a)
+  ret float %tmp2;
+}
+
+declare double @llvm.aarch64.neon.vmulx.f64(double, double)
+
+define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
+  ; CHECK: test_fmulx_lane_f64
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
+  ; CHECK: test_fmulx_laneq_f64_0
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+
+define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
+  ; CHECK: test_fmulx_laneq_f64_1
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
+  ; CHECK: test_fmulx_laneq_f64_1_swap
+  ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a)
+  ret double %tmp2;
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll
new file mode 100644
index 000000000000..5f10cbbab2a6
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-compare.ll
@@ -0,0 +1,343 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+;; Scalar Integer Compare
+
+define i64 @test_vceqd(i64 %a, i64 %b) {
+; CHECK: test_vceqd
+; CHECK: cmeq {{d[0-9]+}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vceq.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vceq1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceq.i, <1 x i64> %vceq1.i)
+  %0 = extractelement <1 x i64> %vceq2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vceqzd(i64 %a) {
+; CHECK: test_vceqzd
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vceqz.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vceqz1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceqz.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vceqz1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcged(i64 %a, i64 %b) {
+; CHECK: test_vcged
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcge1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcgezd(i64 %a) {
+; CHECK: test_vcgezd
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vcgez.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgez1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcgez.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vcgez1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcgtd(i64 %a, i64 %b) {
+; CHECK: test_vcgtd
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgt1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcgtzd(i64 %a) {
+; CHECK: test_vcgtzd
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vcgtz.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgtz1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgtz.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vcgtz1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcled(i64 %a, i64 %b) {
+; CHECK: test_vcled
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcgt1.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vclezd(i64 %a) {
+; CHECK: test_vclezd
+; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vclez.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vclez1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64> %vclez.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vclez1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcltd(i64 %a, i64 %b) {
+; CHECK: test_vcltd
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vcge1.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vcltzd(i64 %a) {
+; CHECK: test_vcltzd
+; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0x0
+entry:
+  %vcltz.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vcltz1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64> %vcltz.i, <1 x i64> zeroinitializer)
+  %0 = extractelement <1 x i64> %vcltz1.i, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vtstd(i64 %a, i64 %b) {
+; CHECK: test_vtstd
+; CHECK: cmtst {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vtst.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vtst1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vtst2.i = call <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64> %vtst.i, <1 x i64> %vtst1.i)
+  %0 = extractelement <1 x i64> %vtst2.i, i32 0
+  ret i64 %0
+}
+
+
+define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcage_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2
+  ret <1 x i64> %vcage2.i
+}
+
+define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcagt_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2
+  ret <1 x i64> %vcagt2.i
+}
+
+define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcale_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2
+  ret <1 x i64> %vcage2.i
+}
+
+define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcalt_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2
+  ret <1 x i64> %vcagt2.i
+}
+
+define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vceq_s64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp eq <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vceq_u64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp eq <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vceq_f64
+; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp oeq <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcge_s64
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp sge <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcge_u64
+; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp uge <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcge_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp oge <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcle_s64
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp sle <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcle_u64
+; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp ule <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcle_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp ole <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcgt_s64
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp sgt <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vcgt_u64
+; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp ugt <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vcgt_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp ogt <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vclt_s64
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp slt <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK: test_vclt_u64
+; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = icmp ult <1 x i64> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK: test_vclt_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+  %cmp.i = fcmp olt <1 x double> %a, %b
+  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
+  ret <1 x i64> %sext.i
+}
+
+define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 {
+; CHECK: test_vceqz_s64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp eq <1 x i64> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 {
+; CHECK: test_vceqz_u64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp eq <1 x i64> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 {
+; CHECK: test_vceqz_p64
+; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp eq <1 x i64> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 {
+; CHECK: test_vceqzq_p64
+; CHECK: cmeq  {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0
+  %1 = icmp eq <2 x i64> %a, zeroinitializer
+  %vceqz.i = zext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %vceqz.i
+}
+
+define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 {
+; CHECK: test_vcgez_s64
+; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp sge <1 x i64> %a, zeroinitializer
+  %vcgez.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vcgez.i
+}
+
+define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 {
+; CHECK: test_vclez_s64
+; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp sle <1 x i64> %a, zeroinitializer
+  %vclez.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vclez.i
+}
+
+define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 {
+; CHECK: test_vcgtz_s64
+; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
+  %1 = icmp sgt <1 x i64> %a, zeroinitializer
+  %vcgtz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vcgtz.i
+}
+
+define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 {
+; CHECK: test_vcltz_s64
+; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0
+  %1 = icmp slt <1 x i64> %a, zeroinitializer
+  %vcltz.i = zext <1 x i1> %1 to <1 x i64>
+  ret <1 x i64> %vcltz.i
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vchi.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll
new file mode 100644
index 000000000000..d433ff595d1c
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define float @test_dup_sv2S(<2 x float> %v) {
+ ;CHECK: test_dup_sv2S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+ %tmp1 = extractelement <2 x float> %v, i32 1
+ ret float  %tmp1
+}
+
+define float @test_dup_sv4S(<4 x float> %v) {
+ ;CHECK: test_dup_sv4S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[0]
+ %tmp1 = extractelement <4 x float> %v, i32 0
+ ret float  %tmp1
+}
+
+define double @test_dup_dvD(<1 x double> %v) {
+ ;CHECK: test_dup_dvD
+ ;CHECK-NOT: dup {{d[0-31]+}}, {{v[0-31]+}}.d[0]
+ ;CHECK: ret
+ %tmp1 = extractelement <1 x double> %v, i32 0
+ ret double  %tmp1
+}
+
+define double @test_dup_dv2D(<2 x double> %v) {
+ ;CHECK: test_dup_dv2D
+ ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+ %tmp1 = extractelement <2 x double> %v, i32 1
+ ret double  %tmp1
+}
+
+define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) {
+ ;CHECK: test_vector_dup_bv16B
+ ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[14]
+ %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> 
+ ret <1 x i8> %shuffle.i
+}
+
+define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) {
+ ;CHECK: test_vector_dup_bv8B
+ ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[7]
+ %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7> 
+ ret <1 x i8> %shuffle.i
+}
+
+define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) {
+ ;CHECK: test_vector_dup_hv8H
+ ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[7]
+ %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> 
+ ret <1 x i16> %shuffle.i
+}
+
+define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) {
+ ;CHECK: test_vector_dup_hv4H
+ ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[3]
+ %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3> 
+ ret <1 x i16> %shuffle.i
+}
+
+define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) {
+ ;CHECK: test_vector_dup_sv4S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[3]
+ %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> 
+ ret <1 x i32> %shuffle
+}
+
+define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
+ ;CHECK: test_vector_dup_sv2S
+ ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1]
+ %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1> 
+ ret <1 x i32> %shuffle
+}
+
+define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
+ ;CHECK: test_vector_dup_dv2D
+ ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+ %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> 
+ ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) {
+  ;CHECK: test_vector_copy_dup_dv2D
+  ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %vget_lane = extractelement <2 x i64> %c, i32 1
+  %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0
+  ret <1 x i64> %vset_lane
+}
+
diff --git a/test/CodeGen/AArch64/neon-scalar-cvt.ll b/test/CodeGen/AArch64/neon-scalar-cvt.ll
new file mode 100644
index 000000000000..a06d5d60a85b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-cvt.ll
@@ -0,0 +1,137 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define float @test_vcvts_f32_s32(i32 %a) {
+; CHECK: test_vcvts_f32_s32
+; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32> %vcvtf.i)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32>)
+
+define double @test_vcvtd_f64_s64(i64 %a) {
+; CHECK: test_vcvtd_f64_s64
+; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64> %vcvtf.i)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64>)
+
+define float @test_vcvts_f32_u32(i32 %a) {
+; CHECK: test_vcvts_f32_u32
+; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32> %vcvtf.i)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32>)
+
+define double @test_vcvtd_f64_u64(i64 %a) {
+; CHECK: test_vcvtd_f64_u64
+; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64> %vcvtf.i)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64>)
+
+define float @test_vcvts_n_f32_s32(i32 %a) {
+; CHECK: test_vcvts_n_f32_s32
+; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32> %vcvtf, i32 1)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32>, i32)
+
+define double @test_vcvtd_n_f64_s64(i64 %a) {
+; CHECK: test_vcvtd_n_f64_s64
+; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64> %vcvtf, i32 1)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64>, i32)
+
+define float @test_vcvts_n_f32_u32(i32 %a) {
+; CHECK: test_vcvts_n_f32_u32
+; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
+  %0 = call float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32> %vcvtf, i32 1)
+  ret float %0
+}
+
+declare float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32>, i32)
+
+define double @test_vcvtd_n_f64_u64(i64 %a) {
+; CHECK: test_vcvtd_n_f64_u64
+; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
+entry:
+  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
+  %0 = call double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64> %vcvtf, i32 1)
+  ret double %0
+}
+
+declare double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64>, i32)
+
+define i32 @test_vcvts_n_s32_f32(float %a) {
+; CHECK: test_vcvts_n_s32_f32
+; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1
+entry:
+  %fcvtzs = insertelement <1 x float> undef, float %a, i32 0
+  %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 1)
+  %0 = extractelement <1 x i32> %fcvtzs1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float>, i32)
+
+define i64 @test_vcvtd_n_s64_f64(double %a) {
+; CHECK: test_vcvtd_n_s64_f64
+; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1
+entry:
+  %fcvtzs = insertelement <1 x double> undef, double %a, i32 0
+  %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 1)
+  %0 = extractelement <1 x i64> %fcvtzs1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double>, i32)
+
+define i32 @test_vcvts_n_u32_f32(float %a) {
+; CHECK: test_vcvts_n_u32_f32
+; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32
+entry:
+  %fcvtzu = insertelement <1 x float> undef, float %a, i32 0
+  %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 32)
+  %0 = extractelement <1 x i32> %fcvtzu1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float>, i32)
+
+define i64 @test_vcvtd_n_u64_f64(double %a) {
+; CHECK: test_vcvtd_n_u64_f64
+; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64
+entry:
+  %fcvtzu = insertelement <1 x double> undef, double %a, i32 0
+  %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 64)
+  %0 = extractelement <1 x i64> %fcvtzu1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double>, i32)
diff --git a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
new file mode 100644
index 000000000000..faf521bc889a
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define i8 @test_vqmovunh_s16(i16 %a) {
+; CHECK: test_vqmovunh_s16
+; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i)
+  %0 = extractelement <1 x i8> %vqmovun1.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vqmovuns_s32(i32 %a) {
+; CHECK: test_vqmovuns_s32
+; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i)
+  %0 = extractelement <1 x i16> %vqmovun1.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vqmovund_s64(i64 %a) {
+; CHECK: test_vqmovund_s64
+; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i)
+  %0 = extractelement <1 x i32> %vqmovun1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>)
+
+define i8 @test_vqmovnh_s16(i16 %a) {
+; CHECK: test_vqmovnh_s16
+; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i)
+  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
+  ret i8 %0
+}
+
+define i16 @test_vqmovns_s32(i32 %a) {
+; CHECK: test_vqmovns_s32
+; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i)
+  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vqmovnd_s64(i64 %a) {
+; CHECK: test_vqmovnd_s64
+; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i)
+  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>)
+
+define i8 @test_vqmovnh_u16(i16 %a) {
+; CHECK: test_vqmovnh_u16
+; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i)
+  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
+  ret i8 %0
+}
+
+
+define i16 @test_vqmovns_u32(i32 %a) {
+; CHECK: test_vqmovns_u32
+; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i)
+  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
+  ret i16 %0
+}
+
+define i32 @test_vqmovnd_u64(i64 %a) {
+; CHECK: test_vqmovnd_u64
+; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i)
+  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-fabd.ll b/test/CodeGen/AArch64/neon-scalar-fabd.ll
new file mode 100644
index 000000000000..75686d32064b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-fabd.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define float @test_vabds_f32(float %a, float %b) {
+; CHECK-LABEL: test_vabds_f32
+; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vabd.i = insertelement <1 x float> undef, float %a, i32 0
+  %vabd1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vabd2.i = call <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float> %vabd.i, <1 x float> %vabd1.i)
+  %0 = extractelement <1 x float> %vabd2.i, i32 0
+  ret float %0
+}
+
+define double @test_vabdd_f64(double %a, double %b) {
+; CHECK-LABEL: test_vabdd_f64
+; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vabd.i = insertelement <1 x double> undef, double %a, i32 0
+  %vabd1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vabd2.i = call <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double> %vabd.i, <1 x double> %vabd1.i)
+  %0 = extractelement <1 x double> %vabd2.i, i32 0
+  ret double %0
+}
+
+declare <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double>, <1 x double>)
+declare <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float>, <1 x float>)
diff --git a/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/test/CodeGen/AArch64/neon-scalar-fcvt.ll
new file mode 100644
index 000000000000..d7b84fae7375
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-fcvt.ll
@@ -0,0 +1,255 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+;; Scalar Floating-point Convert
+
+define float @test_vcvtxn(double %a) {
+; CHECK: test_vcvtxn
+; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtf.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i)
+  %0 = extractelement <1 x float> %vcvtf1.i, i32 0
+  ret float %0
+}
+
+declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>)
+
+define i32 @test_vcvtass(float %a) {
+; CHECK: test_vcvtass
+; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtas.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtas1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float> %vcvtas.i)
+  %0 = extractelement <1 x i32> %vcvtas1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float>)
+
+define i64 @test_test_vcvtasd(double %a) {
+; CHECK: test_test_vcvtasd
+; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtas.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtas1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %vcvtas.i)
+  %0 = extractelement <1 x i64> %vcvtas1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtaus(float %a) {
+; CHECK: test_vcvtaus
+; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtau.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtau1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float> %vcvtau.i)
+  %0 = extractelement <1 x i32> %vcvtau1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtaud(double %a) {
+; CHECK: test_vcvtaud
+; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtau.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtau1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %vcvtau.i)
+  %0 = extractelement <1 x i64> %vcvtau1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) 
+
+define i32 @test_vcvtmss(float %a) {
+; CHECK: test_vcvtmss
+; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtms.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtms1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float> %vcvtms.i)
+  %0 = extractelement <1 x i32> %vcvtms1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtmd_s64_f64(double %a) {
+; CHECK: test_vcvtmd_s64_f64
+; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtms.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtms1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %vcvtms.i)
+  %0 = extractelement <1 x i64> %vcvtms1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtmus(float %a) {
+; CHECK: test_vcvtmus
+; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtmu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i)
+  %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtmud(double %a) {
+; CHECK: test_vcvtmud
+; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtmu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i)
+  %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtnss(float %a) {
+; CHECK: test_vcvtnss
+; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtns.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtns1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float> %vcvtns.i)
+  %0 = extractelement <1 x i32> %vcvtns1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtnd_s64_f64(double %a) {
+; CHECK: test_vcvtnd_s64_f64
+; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtns.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtns1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %vcvtns.i)
+  %0 = extractelement <1 x i64> %vcvtns1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtnus(float %a) {
+; CHECK: test_vcvtnus
+; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtnu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i)
+  %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtnud(double %a) {
+; CHECK: test_vcvtnud
+; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtnu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i)
+  %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtpss(float %a) {
+; CHECK: test_vcvtpss
+; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtps.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtps1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float> %vcvtps.i)
+  %0 = extractelement <1 x i32> %vcvtps1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtpd_s64_f64(double %a) {
+; CHECK: test_vcvtpd_s64_f64
+; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtps.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtps1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %vcvtps.i)
+  %0 = extractelement <1 x i64> %vcvtps1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtpus(float %a) {
+; CHECK: test_vcvtpus
+; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtpu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i)
+  %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtpud(double %a) {
+; CHECK: test_vcvtpud
+; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtpu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i)
+  %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtss(float %a) {
+; CHECK: test_vcvtss
+; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i)
+  %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtd_s64_f64(double %a) {
+; CHECK: test_vcvtd_s64_f64
+; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvzs.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i)
+  %0 = extractelement <1 x i64> %vcvzs1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtus(float %a) {
+; CHECK: test_vcvtus
+; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i)
+  %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtud(double %a) {
+; CHECK: test_vcvtud
+; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i)
+  %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>)
diff --git a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
new file mode 100644
index 000000000000..a6e58599acdb
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
@@ -0,0 +1,328 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+;; Scalar Floating-point Compare
+
+define i32 @test_vceqs_f32(float %a, float %b) {
+; CHECK: test_vceqs_f32
+; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vceq.i = insertelement <1 x float> undef, float %a, i32 0
+  %vceq1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vceq2.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> %vceq1.i)
+  %0 = extractelement <1 x i32> %vceq2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vceqd_f64(double %a, double %b) {
+; CHECK: test_vceqd_f64
+; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vceq.i = insertelement <1 x double> undef, double %a, i32 0
+  %vceq1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double> %vceq.i, <1 x double> %vceq1.i)
+  %0 = extractelement <1 x i64> %vceq2.i, i32 0
+  ret i64 %0
+}
+
+define <1 x i64> @test_vceqz_f64(<1 x double> %a) #0 {
+; CHECK: test_vceqz_f64
+; CHECK: fcmeq  {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+entry:
+  %0 = fcmp oeq <1 x double> %a, zeroinitializer
+  %vceqz.i = zext <1 x i1> %0 to <1 x i64>
+  ret <1 x i64> %vceqz.i
+}
+
+define i32 @test_vceqzs_f32(float %a) {
+; CHECK: test_vceqzs_f32
+; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vceq.i = insertelement <1 x float> undef, float %a, i32 0
+  %vceq1.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vceq1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vceqzd_f64(double %a) {
+; CHECK: test_vceqzd_f64
+; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vceq.i = insertelement <1 x double> undef, double %a, i32 0
+  %vceq1.i = tail call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double> %vceq.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vceq1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcges_f32(float %a, float %b) {
+; CHECK: test_vcges_f32
+; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcge1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i)
+  %0 = extractelement <1 x i32> %vcge2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcged_f64(double %a, double %b) {
+; CHECK: test_vcged_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcge1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcgezs_f32(float %a) {
+; CHECK: test_vcgezs_f32
+; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vcge.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcge1.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vcge1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcgezd_f64(double %a) {
+; CHECK: test_vcgezd_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vcge.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcge1.i = tail call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double> %vcge.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vcge1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcgts_f32(float %a, float %b) {
+; CHECK: test_vcgts_f32
+; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcgt1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i)
+  %0 = extractelement <1 x i32> %vcgt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcgtd_f64(double %a, double %b) {
+; CHECK: test_vcgtd_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcgt1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcgtzs_f32(float %a) {
+; CHECK: test_vcgtzs_f32
+; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vcgt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcgt1.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vcgt1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcgtzd_f64(double %a) {
+; CHECK: test_vcgtzd_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vcgt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcgt1.i = tail call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double> %vcgt.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vcgt1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcles_f32(float %a, float %b) {
+; CHECK: test_vcles_f32
+; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcge1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i)
+  %0 = extractelement <1 x i32> %vcge2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcled_f64(double %a, double %b) {
+; CHECK: test_vcled_f64
+; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcge.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcge1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i)
+  %0 = extractelement <1 x i64> %vcge2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vclezs_f32(float %a) {
+; CHECK: test_vclezs_f32
+; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vcle.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcle1.i = call <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float> %vcle.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vcle1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vclezd_f64(double %a) {
+; CHECK: test_vclezd_f64
+; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vcle.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcle1.i = tail call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x double> %vcle.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vcle1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vclts_f32(float %a, float %b) {
+; CHECK: test_vclts_f32
+; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcgt1.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i)
+  %0 = extractelement <1 x i32> %vcgt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcltd_f64(double %a, double %b) {
+; CHECK: test_vcltd_f64
+; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcgt.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcgt1.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i)
+  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcltzs_f32(float %a) {
+; CHECK: test_vcltzs_f32
+; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0
+entry:
+  %vclt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vclt1.i = call <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float> %vclt.i, <1 x float> zeroinitializer)
+  %0 = extractelement <1 x i32> %vclt1.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcltzd_f64(double %a) {
+; CHECK: test_vcltzd_f64
+; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0
+entry:
+  %vclt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vclt1.i = tail call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x double> %vclt.i, <1 x float> zeroinitializer) #5
+  %0 = extractelement <1 x i64> %vclt1.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcages_f32(float %a, float %b) {
+; CHECK: test_vcages_f32
+; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcage1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i)
+  %0 = extractelement <1 x i32> %vcage2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcaged_f64(double %a, double %b) {
+; CHECK: test_vcaged_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcage1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i)
+  %0 = extractelement <1 x i64> %vcage2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcagts_f32(float %a, float %b) {
+; CHECK: test_vcagts_f32
+; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcagt.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcagt1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcagt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcagt.i, <1 x float> %vcagt1.i)
+  %0 = extractelement <1 x i32> %vcagt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcagtd_f64(double %a, double %b) {
+; CHECK: test_vcagtd_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcagt.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcagt1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcagt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcagt.i, <1 x double> %vcagt1.i)
+  %0 = extractelement <1 x i64> %vcagt2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcales_f32(float %a, float %b) {
+; CHECK: test_vcales_f32
+; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcage1.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i)
+  %0 = extractelement <1 x i32> %vcage2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcaled_f64(double %a, double %b) {
+; CHECK: test_vcaled_f64
+; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcage.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcage1.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i)
+  %0 = extractelement <1 x i64> %vcage2.i, i32 0
+  ret i64 %0
+}
+
+define i32 @test_vcalts_f32(float %a, float %b) {
+; CHECK: test_vcalts_f32
+; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcalt.i = insertelement <1 x float> undef, float %b, i32 0
+  %vcalt1.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcalt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcalt.i, <1 x float> %vcalt1.i)
+  %0 = extractelement <1 x i32> %vcalt2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vcaltd_f64(double %a, double %b) {
+; CHECK: test_vcaltd_f64
+; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcalt.i = insertelement <1 x double> undef, double %b, i32 0
+  %vcalt1.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcalt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcalt.i, <1 x double> %vcalt1.i)
+  %0 = extractelement <1 x i64> %vcalt2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f32(<1 x double>, <1 x float>)
+declare <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
+declare <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>)
+declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>)
diff --git a/test/CodeGen/AArch64/neon-scalar-mul.ll b/test/CodeGen/AArch64/neon-scalar-mul.ll
new file mode 100644
index 000000000000..991037f6cb88
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-mul.ll
@@ -0,0 +1,143 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) {
+; CHECK: test_vqdmulhh_s16
+; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
+  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
+  %3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
+  %4 = extractelement <1 x i16> %3, i32 0
+  ret i16 %4
+}
+
+define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) {
+; CHECK: test_vqdmulhs_s32
+; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
+  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
+  %3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
+  %4 = extractelement <1 x i32> %3, i32 0
+  ret i32 %4
+}
+
+declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>)
+
+define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) {
+; CHECK: test_vqrdmulhh_s16
+; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
+  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
+  %3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
+  %4 = extractelement <1 x i16> %3, i32 0
+  ret i16 %4
+}
+
+define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) {
+; CHECK: test_vqrdmulhs_s32
+; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
+  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
+  %3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
+  %4 = extractelement <1 x i32> %3, i32 0
+  ret i32 %4
+}
+
+declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>)
+
+define float @test_vmulxs_f32(float %a, float %b) {
+; CHECK: test_vmulxs_f32
+; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %b)
+  ret float %1
+}
+
+define double @test_vmulxd_f64(double %a, double %b) {
+; CHECK: test_vmulxd_f64
+; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %b)
+  ret double %1
+}
+
+declare float @llvm.aarch64.neon.vmulx.f32(float, float)
+declare double @llvm.aarch64.neon.vmulx.f64(double, double)
+
+define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) {
+; CHECK: test_vqdmlalh_s16
+; CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqdmlal.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqdmlal1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vqdmlal2.i = insertelement <1 x i16> undef, i16 %c, i32 0
+  %vqdmlal3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32> %vqdmlal.i, <1 x i16> %vqdmlal1.i, <1 x i16> %vqdmlal2.i)
+  %0 = extractelement <1 x i32> %vqdmlal3.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) {
+; CHECK: test_vqdmlals_s32
+; CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqdmlal.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqdmlal1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vqdmlal2.i = insertelement <1 x i32> undef, i32 %c, i32 0
+  %vqdmlal3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64> %vqdmlal.i, <1 x i32> %vqdmlal1.i, <1 x i32> %vqdmlal2.i)
+  %0 = extractelement <1 x i64> %vqdmlal3.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
+declare <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
+
+define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) {
+; CHECK: test_vqdmlslh_s16
+; CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqdmlsl.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqdmlsl1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vqdmlsl2.i = insertelement <1 x i16> undef, i16 %c, i32 0
+  %vqdmlsl3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32> %vqdmlsl.i, <1 x i16> %vqdmlsl1.i, <1 x i16> %vqdmlsl2.i)
+  %0 = extractelement <1 x i32> %vqdmlsl3.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) {
+; CHECK: test_vqdmlsls_s32
+; CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqdmlsl.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqdmlsl1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vqdmlsl2.i = insertelement <1 x i32> undef, i32 %c, i32 0
+  %vqdmlsl3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64> %vqdmlsl.i, <1 x i32> %vqdmlsl1.i, <1 x i32> %vqdmlsl2.i)
+  %0 = extractelement <1 x i64> %vqdmlsl3.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
+declare <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
+
+define i32 @test_vqdmullh_s16(i16 %a, i16 %b) {
+; CHECK: test_vqdmullh_s16
+; CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqdmull.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i)
+  %0 = extractelement <1 x i32> %vqdmull2.i, i32 0
+  ret i32 %0
+}
+
+define i64 @test_vqdmulls_s32(i32 %a, i32 %b) {
+; CHECK: test_vqdmulls_s32
+; CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i)
+  %0 = extractelement <1 x i64> %vqdmull2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>)
+declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>)
diff --git a/test/CodeGen/AArch64/neon-scalar-neg.ll b/test/CodeGen/AArch64/neon-scalar-neg.ll
new file mode 100644
index 000000000000..4dc9d519783d
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-neg.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define i64 @test_vnegd_s64(i64 %a) {
+; CHECK: test_vnegd_s64
+; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vneg1.i = tail call <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64> %vneg.i)
+  %0 = extractelement <1 x i64> %vneg1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64>)
+
+define i8 @test_vqnegb_s8(i8 %a) {
+; CHECK: test_vqnegb_s8
+; CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vqneg1.i = call <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8> %vqneg.i)
+  %0 = extractelement <1 x i8> %vqneg1.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8>)
+
+define i16 @test_vqnegh_s16(i16 %a) {
+; CHECK: test_vqnegh_s16
+; CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vqneg1.i = call <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16> %vqneg.i)
+  %0 = extractelement <1 x i16> %vqneg1.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16>)
+
+define i32 @test_vqnegs_s32(i32 %a) {
+; CHECK: test_vqnegs_s32
+; CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vqneg1.i = call <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32> %vqneg.i)
+  %0 = extractelement <1 x i32> %vqneg1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32>)
+
+define i64 @test_vqnegd_s64(i64 %a) {
+; CHECK: test_vqnegd_s64
+; CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vqneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vqneg1.i = call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %vqneg.i)
+  %0 = extractelement <1 x i64> %vqneg1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll
new file mode 100644
index 000000000000..f21c27bee435
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-recip.ll
@@ -0,0 +1,116 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+define float @test_vrecpss_f32(float %a, float %b) {
+; CHECK: test_vrecpss_f32
+; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x float> undef, float %a, i32 0
+  %2 = insertelement <1 x float> undef, float %b, i32 0
+  %3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2)
+  %4 = extractelement <1 x float> %3, i32 0
+  ret float %4
+}
+
+define double @test_vrecpsd_f64(double %a, double %b) {
+; CHECK: test_vrecpsd_f64
+; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = insertelement <1 x double> undef, double %a, i32 0
+  %2 = insertelement <1 x double> undef, double %b, i32 0
+  %3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2)
+  %4 = extractelement <1 x double> %3, i32 0
+  ret double %4
+}
+
+declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>)
+declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
+
+define float @test_vrsqrtss_f32(float %a, float %b) {
+; CHECK: test_vrsqrtss_f32
+; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %1 = insertelement <1 x float> undef, float %a, i32 0
+  %2 = insertelement <1 x float> undef, float %b, i32 0
+  %3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2)
+  %4 = extractelement <1 x float> %3, i32 0
+  ret float %4
+}
+
+define double @test_vrsqrtsd_f64(double %a, double %b) {
+; CHECK: test_vrsqrtsd_f64
+; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = insertelement <1 x double> undef, double %a, i32 0
+  %2 = insertelement <1 x double> undef, double %b, i32 0
+  %3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2)
+  %4 = extractelement <1 x double> %3, i32 0
+  ret double %4
+}
+
+declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>)
+declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
+
+define float @test_vrecpes_f32(float %a) {
+; CHECK: test_vrecpes_f32
+; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrecpe.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrecpe1.i = tail call <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float> %vrecpe.i)
+  %0 = extractelement <1 x float> %vrecpe1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrecped_f64(double %a) {
+; CHECK: test_vrecped_f64
+; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrecpe.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrecpe1.i = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %vrecpe.i)
+  %0 = extractelement <1 x double> %vrecpe1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float>)
+declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
+
+define float @test_vrecpxs_f32(float %a) {
+; CHECK: test_vrecpxs_f32
+; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrecpx.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrecpx1.i = tail call <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float> %vrecpx.i)
+  %0 = extractelement <1 x float> %vrecpx1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrecpxd_f64(double %a) {
+; CHECK: test_vrecpxd_f64
+; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrecpx.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrecpx1.i = tail call <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double> %vrecpx.i)
+  %0 = extractelement <1 x double> %vrecpx1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float>)
+declare <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double>)
+
+define float @test_vrsqrtes_f32(float %a) {
+; CHECK: test_vrsqrtes_f32
+; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vrsqrte.i = insertelement <1 x float> undef, float %a, i32 0
+  %vrsqrte1.i = tail call <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float> %vrsqrte.i)
+  %0 = extractelement <1 x float> %vrsqrte1.i, i32 0
+  ret float %0
+}
+
+define double @test_vrsqrted_f64(double %a) {
+; CHECK: test_vrsqrted_f64
+; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vrsqrte.i = insertelement <1 x double> undef, double %a, i32 0
+  %vrsqrte1.i = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %vrsqrte.i)
+  %0 = extractelement <1 x double> %vrsqrte1.i, i32 0
+  ret double %0
+}
+
+declare <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float>)
+declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
new file mode 100644
index 000000000000..80e8dc339d68
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
@@ -0,0 +1,247 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
+
+define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
+; CHECK: test_addp_v1i64:
+        %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
+; CHECK: addp d0, v0.2d
+        ret <1 x i64> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>)
+
+define <1 x float> @test_faddp_v1f32(<2 x float> %a) {
+; CHECK: test_faddp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a)
+; CHECK: faddp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>)
+
+define <1 x double> @test_faddp_v1f64(<2 x double> %a) {
+; CHECK: test_faddp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a)
+; CHECK: faddp d0, v0.2d
+        ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>)
+
+define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a)
+; CHECK: fmaxp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>)
+
+define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a)
+; CHECK: fmaxp d0, v0.2d
+        ret <1 x double> %val
+}
+
+
+declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>)
+
+define <1 x float> @test_fminp_v1f32(<2 x float> %a) {
+; CHECK: test_fminp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a)
+; CHECK: fminp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>)
+
+define <1 x double> @test_fminp_v1f64(<2 x double> %a) {
+; CHECK: test_fminp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a)
+; CHECK: fminp d0, v0.2d
+        ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>)
+
+define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fmaxnmp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a)
+; CHECK: fmaxnmp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>)
+
+define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fmaxnmp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a)
+; CHECK: fmaxnmp d0, v0.2d
+        ret <1 x double> %val
+}
+
+declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>)
+
+define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) {
+; CHECK: test_fminnmp_v1f32:
+        %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a)
+; CHECK: fminnmp s0, v0.2s
+        ret <1 x float> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>)
+
+define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) {
+; CHECK: test_fminnmp_v1f64:
+        %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a)
+; CHECK: fminnmp d0, v0.2d
+        ret <1 x double> %val
+}
+
+define float @test_vaddv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vaddv_f32
+; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define float @test_vaddvq_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vaddvq_f32
+; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vaddvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vaddvq_f64
+; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vmaxv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vmaxv_f32
+; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vmaxvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vmaxvq_f64
+; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vminv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vminv_f32
+; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vminvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vminvq_f64
+; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define double @test_vmaxnmvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vmaxnmvq_f64
+; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vmaxnmv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vmaxnmv_f32
+; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define double @test_vminnmvq_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vminnmvq_f64
+; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double> %a)
+  %2 = extractelement <1 x double> %1, i32 0
+  ret double %2
+}
+
+define float @test_vminnmv_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vminnmv_f32
+; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
+  %1 = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float> %a)
+  %2 = extractelement <1 x float> %1, i32 0
+  ret float %2
+}
+
+define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpaddq_s64
+; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+  %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpaddq_u64
+; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+  %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define i64 @test_vaddvq_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vaddvq_s64
+; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
+  %2 = extractelement <1 x i64> %1, i32 0
+  ret i64 %2
+}
+
+define i64 @test_vaddvq_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vaddvq_u64
+; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
+  %2 = extractelement <1 x i64> %1, i32 0
+  ret i64 %2
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>)
+
+declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
+
+declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double>)
+
+declare <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float>)
+
+declare <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double>)
+
+declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float>)
+
+declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float>)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
new file mode 100644
index 000000000000..83ceb4ebdad5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_urshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_srshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
new file mode 100644
index 000000000000..bd66f80cebb6
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
@@ -0,0 +1,242 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqadd_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqadd_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqsub_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqsub_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqadd_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqadd_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqsub_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqsub_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqadd_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqadd_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqsub_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+
+define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqsub_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqadd_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqadd_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqsub_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqsub_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define i8 @test_vuqaddb_s8(i8 %a, i8 %b) {
+; CHECK: test_vuqaddb_s8
+; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
+  %vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i)
+  %0 = extractelement <1 x i8> %vuqadd2.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
+
+define i16 @test_vuqaddh_s16(i16 %a, i16 %b) {
+; CHECK: test_vuqaddh_s16
+; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i)
+  %0 = extractelement <1 x i16> %vuqadd2.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
+
+define i32 @test_vuqadds_s32(i32 %a, i32 %b) {
+; CHECK: test_vuqadds_s32
+; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i)
+  %0 = extractelement <1 x i32> %vuqadd2.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>)
+
+define i64 @test_vuqaddd_s64(i64 %a, i64 %b) {
+; CHECK: test_vuqaddd_s64
+; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i)
+  %0 = extractelement <1 x i64> %vuqadd2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>)
+
+define i8 @test_vsqaddb_u8(i8 %a, i8 %b) {
+; CHECK: test_vsqaddb_u8
+; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
+  %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i)
+  %0 = extractelement <1 x i8> %vsqadd2.i, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>)
+
+define i16 @test_vsqaddh_u16(i16 %a, i16 %b) {
+; CHECK: test_vsqaddh_u16
+; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
+  %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i)
+  %0 = extractelement <1 x i16> %vsqadd2.i, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>)
+
+define i32 @test_vsqadds_u32(i32 %a, i32 %b) {
+; CHECK: test_vsqadds_u32
+; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
+  %vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i)
+  %0 = extractelement <1 x i32> %vsqadd2.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>)
+
+define i64 @test_vsqaddd_u64(i64 %a, i64 %b) {
+; CHECK: test_vsqaddd_u64
+; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i)
+  %0 = extractelement <1 x i64> %vsqadd2.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
new file mode 100644
index 000000000000..0fd67dfa901c
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqrshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqrshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqrshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqrshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqrshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqrshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqrshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqrshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
new file mode 100644
index 000000000000..8fdea24a36d7
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
+declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
+
+define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_uqshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
+; CHECK: test_sqshl_v1i8_aarch64:
+  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
+;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
+  ret <1 x i8> %tmp1
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
+declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
+
+define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_uqshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
+; CHECK: test_sqshl_v1i16_aarch64:
+  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
+;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
+  ret <1 x i16> %tmp1
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
+declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
+
+define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_uqshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
+; CHECK: test_sqshl_v1i32_aarch64:
+  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
+;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
+  ret <1 x i32> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_uqshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sqshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
new file mode 100644
index 000000000000..62243618171a
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
@@ -0,0 +1,531 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define i64 @test_vshrd_n_s64(i64 %a) {
+; CHECK: test_vshrd_n_s64
+; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63)
+  %0 = extractelement <1 x i64> %vsshr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32)
+
+define i64 @test_vshrd_n_u64(i64 %a) {
+; CHECK: test_vshrd_n_u64
+; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vushr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63)
+  %0 = extractelement <1 x i64> %vushr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32)
+
+define i64 @test_vrshrd_n_s64(i64 %a) {
+; CHECK: test_vrshrd_n_s64
+; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64> %vsrshr, i32 63)
+  %0 = extractelement <1 x i64> %vsrshr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64>, i32)
+
+define i64 @test_vrshrd_n_u64(i64 %a) {
+; CHECK: test_vrshrd_n_u64
+; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64> %vurshr, i32 63)
+  %0 = extractelement <1 x i64> %vurshr1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64>, i32)
+
+define i64 @test_vsrad_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vsrad_n_s64
+; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vssra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63)
+  %0 = extractelement <1 x i64> %vssra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vsrad_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vsrad_n_u64
+; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vusra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63)
+  %0 = extractelement <1 x i64> %vusra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vrsrad_n_s64
+; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63)
+  %0 = extractelement <1 x i64> %vsrsra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vrsrad_n_u64
+; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vursra = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63)
+  %0 = extractelement <1 x i64> %vursra2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vshld_n_s64(i64 %a) {
+; CHECK: test_vshld_n_s64
+; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
+  %0 = extractelement <1 x i64> %vshl1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32)
+
+define i64 @test_vshld_n_u64(i64 %a) {
+; CHECK: test_vshld_n_u64
+; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
+  %0 = extractelement <1 x i64> %vshl1, i32 0
+  ret i64 %0
+}
+
+define i8 @test_vqshlb_n_s8(i8 %a) {
+; CHECK: test_vqshlb_n_s8
+; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+  %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7)
+  %0 = extractelement <1 x i8> %vsqshl1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshlh_n_s16(i16 %a) {
+; CHECK: test_vqshlh_n_s16
+; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+  %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15)
+  %0 = extractelement <1 x i16> %vsqshl1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshls_n_s32(i32 %a) {
+; CHECK: test_vqshls_n_s32
+; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+  %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31)
+  %0 = extractelement <1 x i32> %vsqshl1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshld_n_s64(i64 %a) {
+; CHECK: test_vqshld_n_s64
+; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63)
+  %0 = extractelement <1 x i64> %vsqshl1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32)
+
+define i8 @test_vqshlb_n_u8(i8 %a) {
+; CHECK: test_vqshlb_n_u8
+; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+  %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7)
+  %0 = extractelement <1 x i8> %vuqshl1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshlh_n_u16(i16 %a) {
+; CHECK: test_vqshlh_n_u16
+; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+  %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15)
+  %0 = extractelement <1 x i16> %vuqshl1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshls_n_u32(i32 %a) {
+; CHECK: test_vqshls_n_u32
+; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+  %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31)
+  %0 = extractelement <1 x i32> %vuqshl1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshld_n_u64(i64 %a) {
+; CHECK: test_vqshld_n_u64
+; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63)
+  %0 = extractelement <1 x i64> %vuqshl1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32)
+
+define i8 @test_vqshlub_n_s8(i8 %a) {
+; CHECK: test_vqshlub_n_s8
+; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7
+entry:
+  %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0
+  %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8> %vsqshlu, i32 7)
+  %0 = extractelement <1 x i8> %vsqshlu1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8>, i32)
+
+define i16 @test_vqshluh_n_s16(i16 %a) {
+; CHECK: test_vqshluh_n_s16
+; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15
+entry:
+  %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16> %vsqshlu, i32 15)
+  %0 = extractelement <1 x i16> %vsqshlu1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16>, i32)
+
+define i32 @test_vqshlus_n_s32(i32 %a) {
+; CHECK: test_vqshlus_n_s32
+; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31
+entry:
+  %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32> %vsqshlu, i32 31)
+  %0 = extractelement <1 x i32> %vsqshlu1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32>, i32)
+
+define i64 @test_vqshlud_n_s64(i64 %a) {
+; CHECK: test_vqshlud_n_s64
+; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64> %vsqshlu, i32 63)
+  %0 = extractelement <1 x i64> %vsqshlu1, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64>, i32)
+
+define i64 @test_vsrid_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vsrid_n_s64
+; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
+  %0 = extractelement <1 x i64> %vsri2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vsrid_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vsrid_n_u64
+; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
+  %0 = extractelement <1 x i64> %vsri2, i32 0
+  ret i64 %0
+}
+
+define i64 @test_vslid_n_s64(i64 %a, i64 %b) {
+; CHECK: test_vslid_n_s64
+; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
+  %0 = extractelement <1 x i64> %vsli2, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32)
+
+define i64 @test_vslid_n_u64(i64 %a, i64 %b) {
+; CHECK: test_vslid_n_u64
+; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
+entry:
+  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
+  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
+  %0 = extractelement <1 x i64> %vsli2, i32 0
+  ret i64 %0
+}
+
+define i8 @test_vqshrnh_n_s16(i16 %a) {
+; CHECK: test_vqshrnh_n_s16
+; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8)
+  %0 = extractelement <1 x i8> %vsqshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshrns_n_s32(i32 %a) {
+; CHECK: test_vqshrns_n_s32
+; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16)
+  %0 = extractelement <1 x i16> %vsqshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrnd_n_s64(i64 %a) {
+; CHECK: test_vqshrnd_n_s64
+; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32)
+  %0 = extractelement <1 x i32> %vsqshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqshrnh_n_u16(i16 %a) {
+; CHECK: test_vqshrnh_n_u16
+; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8)
+  %0 = extractelement <1 x i8> %vuqshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshrns_n_u32(i32 %a) {
+; CHECK: test_vqshrns_n_u32
+; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16)
+  %0 = extractelement <1 x i16> %vuqshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrnd_n_u64(i64 %a) {
+; CHECK: test_vqshrnd_n_u64
+; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32)
+  %0 = extractelement <1 x i32> %vuqshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrnh_n_s16(i16 %a) {
+; CHECK: test_vqrshrnh_n_s16
+; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8)
+  %0 = extractelement <1 x i8> %vsqrshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshrns_n_s32(i32 %a) {
+; CHECK: test_vqrshrns_n_s32
+; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16)
+  %0 = extractelement <1 x i16> %vsqrshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrnd_n_s64(i64 %a) {
+; CHECK: test_vqrshrnd_n_s64
+; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32)
+  %0 = extractelement <1 x i32> %vsqrshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrnh_n_u16(i16 %a) {
+; CHECK: test_vqrshrnh_n_u16
+; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8)
+  %0 = extractelement <1 x i8> %vuqrshrn1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshrns_n_u32(i32 %a) {
+; CHECK: test_vqrshrns_n_u32
+; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16)
+  %0 = extractelement <1 x i16> %vuqrshrn1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrnd_n_u64(i64 %a) {
+; CHECK: test_vqrshrnd_n_u64
+; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32)
+  %0 = extractelement <1 x i32> %vuqrshrn1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqshrunh_n_s16(i16 %a) {
+; CHECK: test_vqshrunh_n_s16
+; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8)
+  %0 = extractelement <1 x i8> %vsqshrun1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqshruns_n_s32(i32 %a) {
+; CHECK: test_vqshruns_n_s32
+; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16)
+  %0 = extractelement <1 x i16> %vsqshrun1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqshrund_n_s64(i64 %a) {
+; CHECK: test_vqshrund_n_s64
+; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32)
+  %0 = extractelement <1 x i32> %vsqshrun1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32)
+
+define i8 @test_vqrshrunh_n_s16(i16 %a) {
+; CHECK: test_vqrshrunh_n_s16
+; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
+entry:
+  %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0
+  %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8)
+  %0 = extractelement <1 x i8> %vsqrshrun1, i32 0
+  ret i8 %0
+}
+
+declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32)
+
+define i16 @test_vqrshruns_n_s32(i32 %a) {
+; CHECK: test_vqrshruns_n_s32
+; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
+entry:
+  %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0
+  %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16)
+  %0 = extractelement <1 x i16> %vsqrshrun1, i32 0
+  ret i16 %0
+}
+
+declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32)
+
+define i32 @test_vqrshrund_n_s64(i64 %a) {
+; CHECK: test_vqrshrund_n_s64
+; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
+entry:
+  %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0
+  %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32)
+  %0 = extractelement <1 x i32> %vsqrshrun1, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32)
diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll
new file mode 100644
index 000000000000..1222be50cf4b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-scalar-shift.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64:
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
+
+define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_ushl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
+; CHECK: test_sshl_v1i64_aarch64:
+  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
+; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
+  ret <1 x i64> %tmp1
+}
+
+
diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll
new file mode 100644
index 000000000000..d45c47685b0f
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {
+; CHECK: test_sshll_v8i8:
+; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+  %1 = sext <8 x i8> %a to <8 x i16>
+  %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll_v4i16(<4 x i16> %a) {
+; CHECK: test_sshll_v4i16:
+; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+  %1 = sext <4 x i16> %a to <4 x i32>
+  %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll_v2i32(<2 x i32> %a) {
+; CHECK: test_sshll_v2i32:
+; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+  %1 = sext <2 x i32> %a to <2 x i64>
+  %tmp = shl <2 x i64> %1, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll_v8i8(<8 x i8> %a) {
+; CHECK: test_ushll_v8i8:
+; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+  %1 = zext <8 x i8> %a to <8 x i16>
+  %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll_v4i16(<4 x i16> %a) {
+; CHECK: test_ushll_v4i16:
+; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+  %1 = zext <4 x i16> %a to <4 x i32>
+  %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll_v2i32(<2 x i32> %a) {
+; CHECK: test_ushll_v2i32:
+; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+  %1 = zext <2 x i32> %a to <2 x i64>
+  %tmp = shl <2 x i64> %1, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) {
+; CHECK: test_sshll2_v16i8:
+; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = sext <8 x i8> %1 to <8 x i16>
+  %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll2_v8i16(<8 x i16> %a) {
+; CHECK: test_sshll2_v8i16:
+; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %2 = sext <4 x i16> %1 to <4 x i32>
+  %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll2_v4i32(<4 x i32> %a) {
+; CHECK: test_sshll2_v4i32:
+; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %2 = sext <2 x i32> %1 to <2 x i64>
+  %tmp = shl <2 x i64> %2, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll2_v16i8(<16 x i8> %a) {
+; CHECK: test_ushll2_v16i8:
+; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = zext <8 x i8> %1 to <8 x i16>
+  %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll2_v8i16(<8 x i16> %a) {
+; CHECK: test_ushll2_v8i16:
+; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %2 = zext <4 x i16> %1 to <4 x i32>
+  %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll2_v4i32(<4 x i32> %a) {
+; CHECK: test_ushll2_v4i32:
+; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %2 = zext <2 x i32> %1 to <2 x i64>
+  %tmp = shl <2 x i64> %2, <i64 19, i64 19>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_sshll_shl0_v8i8(<8 x i8> %a) {
+; CHECK: test_sshll_shl0_v8i8:
+; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+  %tmp = sext <8 x i8> %a to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll_shl0_v4i16(<4 x i16> %a) {
+; CHECK: test_sshll_shl0_v4i16:
+; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+  %tmp = sext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll_shl0_v2i32(<2 x i32> %a) {
+; CHECK: test_sshll_shl0_v2i32:
+; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+  %tmp = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll_shl0_v8i8(<8 x i8> %a) {
+; CHECK: test_ushll_shl0_v8i8:
+; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+  %tmp = zext <8 x i8> %a to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll_shl0_v4i16(<4 x i16> %a) {
+; CHECK: test_ushll_shl0_v4i16:
+; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+  %tmp = zext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll_shl0_v2i32(<2 x i32> %a) {
+; CHECK: test_ushll_shl0_v2i32:
+; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+  %tmp = zext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_sshll2_shl0_v16i8(<16 x i8> %a) {
+; CHECK: test_sshll2_shl0_v16i8:
+; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp = sext <8 x i8> %1 to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_sshll2_shl0_v8i16(<8 x i16> %a) {
+; CHECK: test_sshll2_shl0_v8i16:
+; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp = sext <4 x i16> %1 to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_sshll2_shl0_v4i32(<4 x i32> %a) {
+; CHECK: test_sshll2_shl0_v4i32:
+; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp = sext <2 x i32> %1 to <2 x i64>
+  ret <2 x i64> %tmp
+}
+
+define <8 x i16> @test_ushll2_shl0_v16i8(<16 x i8> %a) {
+; CHECK: test_ushll2_shl0_v16i8:
+; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp = zext <8 x i8> %1 to <8 x i16>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_ushll2_shl0_v8i16(<8 x i16> %a) {
+; CHECK: test_ushll2_shl0_v8i16:
+; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp = zext <4 x i16> %1 to <4 x i32>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_ushll2_shl0_v4i32(<4 x i32> %a) {
+; CHECK: test_ushll2_shl0_v4i32:
+; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp = zext <2 x i32> %1 to <2 x i64>
+  ret <2 x i64> %tmp
+}
diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll
new file mode 100644
index 000000000000..33b04ceb4895
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-shift.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uqshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: ushl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_sqshl_v8i8:
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sshl v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_ushl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_ushl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: ushl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @test_sshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sshl_v16i8:
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sshl v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_ushl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_ushl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: ushl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+define <4 x i16> @test_sshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sshl_v4i16:
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sshl v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_ushl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_ushl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: ushl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+define <8 x i16> @test_sshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sshl_v8i16:
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sshl v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_ushl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_ushl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: ushl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+define <2 x i32> @test_sshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sshl_v2i32:
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sshl v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_ushl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_ushl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: ushl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sshl_v4i32:
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sshl v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_ushl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_ushl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: ushl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_sshl_v2i64:
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: sshl v0.2d, v0.2d, v1.2d
+  ret <2 x i64> %tmp1
+}
+
+
+define <8 x i8> @test_shl_v8i8(<8 x i8> %a) {
+; CHECK: test_shl_v8i8:
+; CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %tmp = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %tmp
+}
+
+define <4 x i16> @test_shl_v4i16(<4 x i16> %a) {
+; CHECK: test_shl_v4i16:
+; CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %tmp = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %tmp
+}
+
+define <2 x i32> @test_shl_v2i32(<2 x i32> %a) {
+; CHECK: test_shl_v2i32:
+; CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %tmp = shl <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %tmp
+}
+
+define <16 x i8> @test_shl_v16i8(<16 x i8> %a) {
+; CHECK: test_shl_v16i8:
+; CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %tmp = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %tmp
+}
+
+define <8 x i16> @test_shl_v8i16(<8 x i16> %a) {
+; CHECK: test_shl_v8i16:
+; CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %tmp = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %tmp
+}
+
+define <4 x i32> @test_shl_v4i32(<4 x i32> %a) {
+; CHECK: test_shl_v4i32:
+; CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %tmp = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_shl_v2i64(<2 x i64> %a) {
+; CHECK: test_shl_v2i64:
+; CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63
+  %tmp = shl <2 x i64> %a, <i64 63, i64 63>
+  ret <2 x i64> %tmp
+}
+
diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
new file mode 100644
index 000000000000..d5557c0c8562
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
@@ -0,0 +1,2314 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v16i8:
+; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %tmp = load <16 x i8>* %ptr
+  store <16 x i8> %tmp, <16 x i8>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v8i16:
+; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %tmp = load <8 x i16>* %ptr
+  store <8 x i16> %tmp, <8 x i16>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v4i32:
+; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %tmp = load <4 x i32>* %ptr
+  store <4 x i32> %tmp, <4 x i32>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v2i64:
+; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %tmp = load <2 x i64>* %ptr
+  store <2 x i64> %tmp, <2 x i64>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v8i8:
+; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %tmp = load <8 x i8>* %ptr
+  store <8 x i8> %tmp, <8 x i8>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v4i16:
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %tmp = load <4 x i16>* %ptr
+  store <4 x i16> %tmp, <4 x i16>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v2i32:
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %tmp = load <2 x i32>* %ptr
+  store <2 x i32> %tmp, <2 x i32>* %ptr2
+  ret void
+}
+
+define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
+; CHECK-LABEL: test_ldst1_v1i64:
+; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+; CHECK: st1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %tmp = load <1 x i64>* %ptr
+  store <1 x i64> %tmp, <1 x i64>* %ptr2
+  ret void
+}
+
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.int64x2x2_t = type { [2 x <2 x i64>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.float64x2x2_t = type { [2 x <2 x double>] }
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.int64x1x2_t = type { [2 x <1 x i64>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.float64x1x2_t = type { [2 x <1 x double>] }
+%struct.int8x16x3_t = type { [3 x <16 x i8>] }
+%struct.int16x8x3_t = type { [3 x <8 x i16>] }
+%struct.int32x4x3_t = type { [3 x <4 x i32>] }
+%struct.int64x2x3_t = type { [3 x <2 x i64>] }
+%struct.float32x4x3_t = type { [3 x <4 x float>] }
+%struct.float64x2x3_t = type { [3 x <2 x double>] }
+%struct.int8x8x3_t = type { [3 x <8 x i8>] }
+%struct.int16x4x3_t = type { [3 x <4 x i16>] }
+%struct.int32x2x3_t = type { [3 x <2 x i32>] }
+%struct.int64x1x3_t = type { [3 x <1 x i64>] }
+%struct.float32x2x3_t = type { [3 x <2 x float>] }
+%struct.float64x1x3_t = type { [3 x <1 x double>] }
+%struct.int8x16x4_t = type { [4 x <16 x i8>] }
+%struct.int16x8x4_t = type { [4 x <8 x i16>] }
+%struct.int32x4x4_t = type { [4 x <4 x i32>] }
+%struct.int64x2x4_t = type { [4 x <2 x i64>] }
+%struct.float32x4x4_t = type { [4 x <4 x float>] }
+%struct.float64x2x4_t = type { [4 x <2 x double>] }
+%struct.int8x8x4_t = type { [4 x <8 x i8>] }
+%struct.int16x4x4_t = type { [4 x <4 x i16>] }
+%struct.int32x2x4_t = type { [4 x <2 x i32>] }
+%struct.int64x1x4_t = type { [4 x <1 x i64>] }
+%struct.float32x2x4_t = type { [4 x <2 x float>] }
+%struct.float64x1x4_t = type { [4 x <1 x double>] }
+
+
+define <16 x i8> @test_vld1q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld1q_s8
+; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1)
+  ret <16 x i8> %vld1
+}
+
+define <8 x i16> @test_vld1q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld1q_s16
+; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2)
+  ret <8 x i16> %vld1
+}
+
+define <4 x i32> @test_vld1q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld1q_s32
+; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4)
+  ret <4 x i32> %vld1
+}
+
+define <2 x i64> @test_vld1q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld1q_s64
+; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8)
+  ret <2 x i64> %vld1
+}
+
+define <4 x float> @test_vld1q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld1q_f32
+; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4)
+  ret <4 x float> %vld1
+}
+
+define <2 x double> @test_vld1q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld1q_f64
+; CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8)
+  ret <2 x double> %vld1
+}
+
+define <8 x i8> @test_vld1_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld1_s8
+; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
+  ret <8 x i8> %vld1
+}
+
+define <4 x i16> @test_vld1_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld1_s16
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
+  ret <4 x i16> %vld1
+}
+
+define <2 x i32> @test_vld1_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld1_s32
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4)
+  ret <2 x i32> %vld1
+}
+
+define <1 x i64> @test_vld1_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld1_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8)
+  ret <1 x i64> %vld1
+}
+
+define <2 x float> @test_vld1_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld1_f32
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4)
+  ret <2 x float> %vld1
+}
+
+define <1 x double> @test_vld1_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld1_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8)
+  ret <1 x double> %vld1
+}
+
+define <8 x i8> @test_vld1_p8(i8* readonly %a) {
+; CHECK-LABEL: test_vld1_p8
+; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
+  ret <8 x i8> %vld1
+}
+
+define <4 x i16> @test_vld1_p16(i16* readonly %a) {
+; CHECK-LABEL: test_vld1_p16
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
+  ret <4 x i16> %vld1
+}
+
+define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld2q_s8
+; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1)
+  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld2q_s16
+; CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2)
+  %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld2q_s32
+; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld2q_s64
+; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1
+  ret %struct.int64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld2q_f32
+; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld2q_f64
+; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1
+  ret %struct.float64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld2_s8
+; CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1)
+  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld2_s16
+; CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2)
+  %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld2_s32
+; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld2_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1
+  ret %struct.int64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld2_f32
+; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4)
+  %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld2_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8)
+  %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1
+  ret %struct.float64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld3q_s8
+; CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1)
+  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2
+  ret %struct.int8x16x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld3q_s16
+; CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2)
+  %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2
+  ret %struct.int16x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld3q_s32
+; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2
+  ret %struct.int32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld3q_s64
+; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2
+  ret %struct.int64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld3q_f32
+; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2
+  ret %struct.float32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld3q_f64
+; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2
+  ret %struct.float64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld3_s8
+; CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1)
+  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2
+  ret %struct.int8x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld3_s16
+; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2)
+  %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2
+  ret %struct.int16x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld3_s32
+; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2
+  ret %struct.int32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld3_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2
+  ret %struct.int64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld3_f32
+; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4)
+  %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2
+  ret %struct.float32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld3_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8)
+  %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2
+  ret %struct.float64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld4q_s8
+; CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
+  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1)
+  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3
+  ret %struct.int8x16x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld4q_s16
+; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2)
+  %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3
+  ret %struct.int16x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld4q_s32
+; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3
+  ret %struct.int32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld4q_s64
+; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3
+  ret %struct.int64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld4q_f32
+; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3
+  ret %struct.float32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld4q_f64
+; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3
+  ret %struct.float64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) {
+; CHECK-LABEL: test_vld4_s8
+; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1)
+  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3
+  ret %struct.int8x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) {
+; CHECK-LABEL: test_vld4_s16
+; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2)
+  %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3
+  ret %struct.int16x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) {
+; CHECK-LABEL: test_vld4_s32
+; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3
+  ret %struct.int32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) {
+; CHECK-LABEL: test_vld4_s64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3
+  ret %struct.int64x1x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) {
+; CHECK-LABEL: test_vld4_f32
+; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4)
+  %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3
+  ret %struct.float32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) {
+; CHECK-LABEL: test_vld4_f64
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8)
+  %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3
+  ret %struct.float64x1x4_t %.fca.0.3.insert
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32)
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32)
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32)
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32)
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32)
+declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32)
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32)
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32)
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32)
+declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
+
+define void @test_vst1q_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vst1q_s8
+; CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vst1q_s16
+; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vst1q_s32
+; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vst1q_s64
+; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vst1q_f32
+; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1q_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vst1q_f64
+; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vst1_s8
+; CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vst1_s16
+; CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vst1_s32
+; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vst1_s64
+; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vst1_f32
+; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vst1_f64
+; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8)
+  ret void
+}
+
+define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s8
+; CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1)
+  ret void
+}
+
+define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s16
+; CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2)
+  ret void
+}
+
+define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s32
+; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_s64
+; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_f32
+; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_f64
+; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s8
+; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1)
+  ret void
+}
+
+define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s16
+; CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2)
+  ret void
+}
+
+define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s32
+; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2_s64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2_f32
+; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4)
+  ret void
+}
+
+define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2_f64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s8
+; CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1)
+  ret void
+}
+
+define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s16
+; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2)
+  ret void
+}
+
+define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s32
+; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_s64
+; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_f32
+; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_f64
+; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s8
+; CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1)
+  ret void
+}
+
+define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s16
+; CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2)
+  ret void
+}
+
+define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s32
+; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3_s64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3_f32
+; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4)
+  ret void
+}
+
+define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3_f64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s8
+; CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1)
+  ret void
+}
+
+define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s16
+; CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2)
+  ret void
+}
+
+define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s32
+; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_s64
+; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_f32
+; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_f64
+; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s8
+; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1)
+  ret void
+}
+
+define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s16
+; CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
+  %1 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2)
+  ret void
+}
+
+define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s32
+; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
+  %1 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4_s64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
+  %1 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4_f32
+; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %1 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4)
+  ret void
+}
+
+define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4_f64
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
+  %1 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32)
+declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32)
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
+declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32)
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32)
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32)
+declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32)
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
+
+define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a)  {
+; CHECK-LABEL: test_vld1q_s8_x2
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
+  %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0
+  %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1
+  ret %struct.int8x16x2_t %5
+}
+
+define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a)  {
+; CHECK-LABEL: test_vld1q_s16_x2
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
+  %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1
+  %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0
+  %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1
+  ret %struct.int16x8x2_t %6
+}
+
+define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a)  {
+; CHECK-LABEL: test_vld1q_s32_x2
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
+  %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
+  %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0
+  %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1
+  ret %struct.int32x4x2_t %6
+}
+
+define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a)  {
+; CHECK-LABEL: test_vld1q_s64_x2
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0
+  %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1
+  %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0
+  %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1
+  ret %struct.int64x2x2_t %6
+}
+
+define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a)  {
+; CHECK-LABEL: test_vld1q_f32_x2
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float> } %2, 0
+  %4 = extractvalue { <4 x float>, <4 x float> } %2, 1
+  %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0
+  %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1
+  ret %struct.float32x4x2_t %6
+}
+
+
+define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a)  {
+; CHECK-LABEL: test_vld1q_f64_x2
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double> } %2, 0
+  %4 = extractvalue { <2 x double>, <2 x double> } %2, 1
+  %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0
+  %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1
+  ret %struct.float64x2x2_t %6
+}
+
+define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a)  {
+; CHECK-LABEL: test_vld1_s8_x2
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
+  %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
+  %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0
+  %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1
+  ret %struct.int8x8x2_t %5
+}
+
+define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a)  {
+; CHECK-LABEL: test_vld1_s16_x2
+; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0
+  %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1
+  %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0
+  %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1
+  ret %struct.int16x4x2_t %6
+}
+
+define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a)  {
+; CHECK-LABEL: test_vld1_s32_x2
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0
+  %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1
+  %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0
+  %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1
+  ret %struct.int32x2x2_t %6
+}
+
+define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a)  {
+; CHECK-LABEL: test_vld1_s64_x2
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0
+  %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1
+  %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0
+  %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1
+  ret %struct.int64x1x2_t %6
+}
+
+define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a)  {
+; CHECK-LABEL: test_vld1_f32_x2
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x float>, <2 x float> } %2, 0
+  %4 = extractvalue { <2 x float>, <2 x float> } %2, 1
+  %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0
+  %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1
+  ret %struct.float32x2x2_t %6
+}
+
+define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a)  {
+; CHECK-LABEL: test_vld1_f64_x2
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x double>, <1 x double> } %2, 0
+  %4 = extractvalue { <1 x double>, <1 x double> } %2, 1
+  %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0
+  %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1
+  ret %struct.float64x1x2_t %6
+}
+
+define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a)  {
+; CHECK-LABEL: test_vld1q_s8_x3
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+; [{{x[0-9]+|sp}}]
+  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
+  %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0
+  %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1
+  %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2
+  ret %struct.int8x16x3_t %7
+}
+
+define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a)  {
+; CHECK-LABEL: test_vld1q_s16_x3
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
+  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
+  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
+  %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0
+  %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1
+  %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2
+  ret %struct.int16x8x3_t %8
+}
+
+define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a)  {
+; CHECK-LABEL: test_vld1q_s32_x3
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
+  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
+  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
+  %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0
+  %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1
+  %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2
+  ret %struct.int32x4x3_t %8
+}
+
+define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a)  {
+; CHECK-LABEL: test_vld1q_s64_x3
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
+  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
+  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
+  %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0
+  %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1
+  %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2
+  ret %struct.int64x2x3_t %8
+}
+
+define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a)  {
+; CHECK-LABEL: test_vld1q_f32_x3
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0
+  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1
+  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2
+  %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0
+  %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1
+  %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2
+  ret %struct.float32x4x3_t %8
+}
+
+
+define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a)  {
+; CHECK-LABEL: test_vld1q_f64_x3
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0
+  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1
+  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2
+  %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0
+  %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1
+  %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2
+  ret %struct.float64x2x3_t %8
+}
+
+define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a)  {
+; CHECK-LABEL: test_vld1_s8_x3
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+; [{{x[0-9]+|sp}}]
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
+  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
+  %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0
+  %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1
+  %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2
+  ret %struct.int8x8x3_t %7
+}
+
+define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a)  {
+; CHECK-LABEL: test_vld1_s16_x3
+; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
+  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
+  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
+  %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0
+  %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1
+  %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2
+  ret %struct.int16x4x3_t %8
+}
+
+define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a)  {
+  %1 = bitcast i32* %a to i8*
+; CHECK-LABEL: test_vld1_s32_x3
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
+  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
+  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
+  %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0
+  %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1
+  %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2
+  ret %struct.int32x2x3_t %8
+}
+
+define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a)  {
+; CHECK-LABEL: test_vld1_s64_x3
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
+  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
+  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
+  %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0
+  %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1
+  %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2
+  ret %struct.int64x1x3_t %8
+}
+
+define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a)  {
+; CHECK-LABEL: test_vld1_f32_x3
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0
+  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1
+  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2
+  %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0
+  %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1
+  %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2
+  ret %struct.float32x2x3_t %8
+}
+
+
+define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a)  {
+; CHECK-LABEL: test_vld1_f64_x3
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0
+  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1
+  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2
+  %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0
+  %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1
+  %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2
+  ret %struct.float64x1x3_t %8
+}
+
+define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a)  {
+; CHECK-LABEL: test_vld1q_s8_x4
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
+  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
+  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
+  %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0
+  %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1
+  %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2
+  %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3
+  ret %struct.int8x16x4_t %9
+}
+
+define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a)  {
+; CHECK-LABEL: test_vld1q_s16_x4
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
+  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
+  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
+  %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3
+  %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0
+  %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1
+  %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2
+  %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3
+  ret %struct.int16x8x4_t %10
+}
+
+define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a)  {
+; CHECK-LABEL: test_vld1q_s32_x4
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
+  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
+  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
+  %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3
+  %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0
+  %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1
+  %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2
+  %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3
+  ret %struct.int32x4x4_t %10
+}
+
+define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a)  {
+; CHECK-LABEL: test_vld1q_s64_x4
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
+  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
+  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
+  %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3
+  %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0
+  %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1
+  %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2
+  %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3
+  ret %struct.int64x2x4_t %10
+}
+
+define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a)  {
+; CHECK-LABEL: test_vld1q_f32_x4
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
+  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
+  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2
+  %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
+  %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0
+  %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1
+  %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2
+  %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3
+  ret %struct.float32x4x4_t %10
+}
+
+define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a)  {
+; CHECK-LABEL: test_vld1q_f64_x4
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
+  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
+  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
+  %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
+  %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0
+  %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1
+  %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2
+  %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3
+  ret %struct.float64x2x4_t %10
+}
+
+define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a)  {
+; CHECK-LABEL: test_vld1_s8_x4
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
+  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
+  %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3
+  %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0
+  %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1
+  %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2
+  %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3
+  ret %struct.int8x8x4_t %9
+}
+
+define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a)  {
+; CHECK-LABEL: test_vld1_s16_x4
+; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
+  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
+  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
+  %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3
+  %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0
+  %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1
+  %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2
+  %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3
+  ret %struct.int16x4x4_t %10
+}
+
+define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a)  {
+; CHECK-LABEL: test_vld1_s32_x4
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
+  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
+  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
+  %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
+  %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0
+  %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1
+  %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2
+  %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3
+  ret %struct.int32x2x4_t %10
+}
+
+define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a)  {
+; CHECK-LABEL: test_vld1_s64_x4
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
+  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
+  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
+  %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3
+  %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0
+  %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1
+  %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2
+  %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3
+  ret %struct.int64x1x4_t %10
+}
+
+define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a)  {
+; CHECK-LABEL: test_vld1_f32_x4
+; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4)
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0
+  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1
+  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2
+  %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3
+  %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0
+  %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1
+  %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2
+  %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3
+  ret %struct.float32x2x4_t %10
+}
+
+
+define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a)  {
+; CHECK-LABEL: test_vld1_f64_x4
+; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8)
+  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0
+  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1
+  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2
+  %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3
+  %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0
+  %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1
+  %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2
+  %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3
+  ret %struct.float64x1x4_t %10
+}
+
+define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b)  {
+; CHECK-LABEL: test_vst1q_s8_x2
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <16 x i8>] %b, 0
+  %2 = extractvalue [2 x <16 x i8>] %b, 1
+  tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b)  {
+; CHECK-LABEL: test_vst1q_s16_x2
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <8 x i16>] %b, 0
+  %2 = extractvalue [2 x <8 x i16>] %b, 1
+  %3 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b)  {
+; CHECK-LABEL: test_vst1q_s32_x2
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <4 x i32>] %b, 0
+  %2 = extractvalue [2 x <4 x i32>] %b, 1
+  %3 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b)  {
+; CHECK-LABEL: test_vst1q_s64_x2
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x i64>] %b, 0
+  %2 = extractvalue [2 x <2 x i64>] %b, 1
+  %3 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b)  {
+; CHECK-LABEL: test_vst1q_f32_x2
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <4 x float>] %b, 0
+  %2 = extractvalue [2 x <4 x float>] %b, 1
+  %3 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4)
+  ret void
+}
+
+
+define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b)  {
+; CHECK-LABEL: test_vst1q_f64_x2
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x double>] %b, 0
+  %2 = extractvalue [2 x <2 x double>] %b, 1
+  %3 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b)  {
+; CHECK-LABEL: test_vst1_s8_x2
+; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b)  {
+; CHECK-LABEL: test_vst1_s16_x2
+; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <4 x i16>] %b, 0
+  %2 = extractvalue [2 x <4 x i16>] %b, 1
+  %3 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b)  {
+; CHECK-LABEL: test_vst1_s32_x2
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x i32>] %b, 0
+  %2 = extractvalue [2 x <2 x i32>] %b, 1
+  %3 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b)  {
+; CHECK-LABEL: test_vst1_s64_x2
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <1 x i64>] %b, 0
+  %2 = extractvalue [2 x <1 x i64>] %b, 1
+  %3 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b)  {
+; CHECK-LABEL: test_vst1_f32_x2
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <2 x float>] %b, 0
+  %2 = extractvalue [2 x <2 x float>] %b, 1
+  %3 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b)  {
+; CHECK-LABEL: test_vst1_f64_x2
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [2 x <1 x double>] %b, 0
+  %2 = extractvalue [2 x <1 x double>] %b, 1
+  %3 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8)
+  ret void
+}
+
+define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b)  {
+; CHECK-LABEL: test_vst1q_s8_x3
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <16 x i8>] %b, 0
+  %2 = extractvalue [3 x <16 x i8>] %b, 1
+  %3 = extractvalue [3 x <16 x i8>] %b, 2
+  tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b)  {
+; CHECK-LABEL: test_vst1q_s16_x3
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <8 x i16>] %b, 0
+  %2 = extractvalue [3 x <8 x i16>] %b, 1
+  %3 = extractvalue [3 x <8 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b)  {
+; CHECK-LABEL: test_vst1q_s32_x3
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <4 x i32>] %b, 0
+  %2 = extractvalue [3 x <4 x i32>] %b, 1
+  %3 = extractvalue [3 x <4 x i32>] %b, 2
+  %4 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b)  {
+; CHECK-LABEL: test_vst1q_s64_x3
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x i64>] %b, 0
+  %2 = extractvalue [3 x <2 x i64>] %b, 1
+  %3 = extractvalue [3 x <2 x i64>] %b, 2
+  %4 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b)  {
+; CHECK-LABEL: test_vst1q_f32_x3
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <4 x float>] %b, 0
+  %2 = extractvalue [3 x <4 x float>] %b, 1
+  %3 = extractvalue [3 x <4 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b)  {
+; CHECK-LABEL: test_vst1q_f64_x3
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x double>] %b, 0
+  %2 = extractvalue [3 x <2 x double>] %b, 1
+  %3 = extractvalue [3 x <2 x double>] %b, 2
+  %4 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b)  {
+; CHECK-LABEL: test_vst1_s8_x3
+; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <8 x i8>] %b, 0
+  %2 = extractvalue [3 x <8 x i8>] %b, 1
+  %3 = extractvalue [3 x <8 x i8>] %b, 2
+  tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b)  {
+; CHECK-LABEL: test_vst1_s16_x3
+; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <4 x i16>] %b, 0
+  %2 = extractvalue [3 x <4 x i16>] %b, 1
+  %3 = extractvalue [3 x <4 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b)  {
+; CHECK-LABEL: test_vst1_s32_x3
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x i32>] %b, 0
+  %2 = extractvalue [3 x <2 x i32>] %b, 1
+  %3 = extractvalue [3 x <2 x i32>] %b, 2
+  %4 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b)  {
+; CHECK-LABEL: test_vst1_s64_x3
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <1 x i64>] %b, 0
+  %2 = extractvalue [3 x <1 x i64>] %b, 1
+  %3 = extractvalue [3 x <1 x i64>] %b, 2
+  %4 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b)  {
+; CHECK-LABEL: test_vst1_f32_x3
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <2 x float>] %b, 0
+  %2 = extractvalue [3 x <2 x float>] %b, 1
+  %3 = extractvalue [3 x <2 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b)  {
+; CHECK-LABEL: test_vst1_f64_x3
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
+; [{{x[0-9]+|sp}}]
+  %1 = extractvalue [3 x <1 x double>] %b, 0
+  %2 = extractvalue [3 x <1 x double>] %b, 1
+  %3 = extractvalue [3 x <1 x double>] %b, 2
+  %4 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8)
+  ret void
+}
+
+define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b)  {
+; CHECK-LABEL: test_vst1q_s8_x4
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
+; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <16 x i8>] %b, 0
+  %2 = extractvalue [4 x <16 x i8>] %b, 1
+  %3 = extractvalue [4 x <16 x i8>] %b, 2
+  %4 = extractvalue [4 x <16 x i8>] %b, 3
+  tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1)
+  ret void
+}
+
+define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b)  {
+; CHECK-LABEL: test_vst1q_s16_x4
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
+; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <8 x i16>] %b, 0
+  %2 = extractvalue [4 x <8 x i16>] %b, 1
+  %3 = extractvalue [4 x <8 x i16>] %b, 2
+  %4 = extractvalue [4 x <8 x i16>] %b, 3
+  %5 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2)
+  ret void
+}
+
+define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b)  {
+; CHECK-LABEL: test_vst1q_s32_x4
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <4 x i32>] %b, 0
+  %2 = extractvalue [4 x <4 x i32>] %b, 1
+  %3 = extractvalue [4 x <4 x i32>] %b, 2
+  %4 = extractvalue [4 x <4 x i32>] %b, 3
+  %5 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b)  {
+; CHECK-LABEL: test_vst1q_s64_x4
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x i64>] %b, 0
+  %2 = extractvalue [4 x <2 x i64>] %b, 1
+  %3 = extractvalue [4 x <2 x i64>] %b, 2
+  %4 = extractvalue [4 x <2 x i64>] %b, 3
+  %5 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8)
+  ret void
+}
+
+define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b)  {
+; CHECK-LABEL: test_vst1q_f32_x4
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
+; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <4 x float>] %b, 0
+  %2 = extractvalue [4 x <4 x float>] %b, 1
+  %3 = extractvalue [4 x <4 x float>] %b, 2
+  %4 = extractvalue [4 x <4 x float>] %b, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b)  {
+; CHECK-LABEL: test_vst1q_f64_x4
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
+; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x double>] %b, 0
+  %2 = extractvalue [4 x <2 x double>] %b, 1
+  %3 = extractvalue [4 x <2 x double>] %b, 2
+  %4 = extractvalue [4 x <2 x double>] %b, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
+  ret void
+}
+
+define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b)  {
+; CHECK-LABEL: test_vst1_s8_x4
+; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
+; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <8 x i8>] %b, 0
+  %2 = extractvalue [4 x <8 x i8>] %b, 1
+  %3 = extractvalue [4 x <8 x i8>] %b, 2
+  %4 = extractvalue [4 x <8 x i8>] %b, 3
+  tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1)
+  ret void
+}
+
+define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b)  {
+; CHECK-LABEL: test_vst1_s16_x4
+; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
+; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <4 x i16>] %b, 0
+  %2 = extractvalue [4 x <4 x i16>] %b, 1
+  %3 = extractvalue [4 x <4 x i16>] %b, 2
+  %4 = extractvalue [4 x <4 x i16>] %b, 3
+  %5 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2)
+  ret void
+}
+
+define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b)  {
+; CHECK-LABEL: test_vst1_s32_x4
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x i32>] %b, 0
+  %2 = extractvalue [4 x <2 x i32>] %b, 1
+  %3 = extractvalue [4 x <2 x i32>] %b, 2
+  %4 = extractvalue [4 x <2 x i32>] %b, 3
+  %5 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b)  {
+; CHECK-LABEL: test_vst1_s64_x4
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <1 x i64>] %b, 0
+  %2 = extractvalue [4 x <1 x i64>] %b, 1
+  %3 = extractvalue [4 x <1 x i64>] %b, 2
+  %4 = extractvalue [4 x <1 x i64>] %b, 3
+  %5 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8)
+  ret void
+}
+
+define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b)  {
+; CHECK-LABEL: test_vst1_f32_x4
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
+; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <2 x float>] %b, 0
+  %2 = extractvalue [4 x <2 x float>] %b, 1
+  %3 = extractvalue [4 x <2 x float>] %b, 2
+  %4 = extractvalue [4 x <2 x float>] %b, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4)
+  ret void
+}
+
+define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b)  {
+; CHECK-LABEL: test_vst1_f64_x4
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
+; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  %1 = extractvalue [4 x <1 x double>] %b, 0
+  %2 = extractvalue [4 x <1 x double>] %b, 1
+  %3 = extractvalue [4 x <1 x double>] %b, 2
+  %4 = extractvalue [4 x <1 x double>] %b, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8)
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32)
+declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll
new file mode 100644
index 000000000000..3f28320f23d5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-ldst-one.ll
@@ -0,0 +1,2113 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.int64x2x2_t = type { [2 x <2 x i64>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.float64x2x2_t = type { [2 x <2 x double>] }
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.int64x1x2_t = type { [2 x <1 x i64>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.float64x1x2_t = type { [2 x <1 x double>] }
+%struct.int8x16x3_t = type { [3 x <16 x i8>] }
+%struct.int16x8x3_t = type { [3 x <8 x i16>] }
+%struct.int32x4x3_t = type { [3 x <4 x i32>] }
+%struct.int64x2x3_t = type { [3 x <2 x i64>] }
+%struct.float32x4x3_t = type { [3 x <4 x float>] }
+%struct.float64x2x3_t = type { [3 x <2 x double>] }
+%struct.int8x8x3_t = type { [3 x <8 x i8>] }
+%struct.int16x4x3_t = type { [3 x <4 x i16>] }
+%struct.int32x2x3_t = type { [3 x <2 x i32>] }
+%struct.int64x1x3_t = type { [3 x <1 x i64>] }
+%struct.float32x2x3_t = type { [3 x <2 x float>] }
+%struct.float64x1x3_t = type { [3 x <1 x double>] }
+%struct.int8x16x4_t = type { [4 x <16 x i8>] }
+%struct.int16x8x4_t = type { [4 x <8 x i16>] }
+%struct.int32x4x4_t = type { [4 x <4 x i32>] }
+%struct.int64x2x4_t = type { [4 x <2 x i64>] }
+%struct.float32x4x4_t = type { [4 x <4 x float>] }
+%struct.float64x2x4_t = type { [4 x <2 x double>] }
+%struct.int8x8x4_t = type { [4 x <8 x i8>] }
+%struct.int16x4x4_t = type { [4 x <4 x i16>] }
+%struct.int32x2x4_t = type { [4 x <2 x i32>] }
+%struct.int64x1x4_t = type { [4 x <1 x i64>] }
+%struct.float32x2x4_t = type { [4 x <2 x float>] }
+%struct.float64x1x4_t = type { [4 x <1 x double>] }
+
+define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld1q_dup_s8
+; CHECK: ld1r {{{v[0-9]+}}.16b}, [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
+  %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %lane
+}
+
+define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld1q_dup_s16
+; CHECK: ld1r {{{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %1 = insertelement <8 x i16> undef, i16 %0, i32 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %lane
+}
+
+define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld1q_dup_s32
+; CHECK: ld1r {{{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %1 = insertelement <4 x i32> undef, i32 %0, i32 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %lane
+}
+
+define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld1q_dup_s64
+; CHECK: ld1r {{{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %1 = insertelement <2 x i64> undef, i64 %0, i32 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %lane
+}
+
+define <4 x float> @test_vld1q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld1q_dup_f32
+; CHECK: ld1r {{{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = load float* %a, align 4
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %lane
+}
+
+define <2 x double> @test_vld1q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld1q_dup_f64
+; CHECK: ld1r {{{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %1 = insertelement <2 x double> undef, double %0, i32 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %lane
+}
+
+define <8 x i8> @test_vld1_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld1_dup_s8
+; CHECK: ld1r {{{v[0-9]+}}.8b}, [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  ret <8 x i8> %lane
+}
+
+define <4 x i16> @test_vld1_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld1_dup_s16
+; CHECK: ld1r {{{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %1 = insertelement <4 x i16> undef, i16 %0, i32 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  ret <4 x i16> %lane
+}
+
+define <2 x i32> @test_vld1_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld1_dup_s32
+; CHECK: ld1r {{{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %1 = insertelement <2 x i32> undef, i32 %0, i32 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  ret <2 x i32> %lane
+}
+
+define <1 x i64> @test_vld1_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld1_dup_s64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %1 = insertelement <1 x i64> undef, i64 %0, i32 0
+  ret <1 x i64> %1
+}
+
+define <2 x float> @test_vld1_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld1_dup_f32
+; CHECK: ld1r {{{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = load float* %a, align 4
+  %1 = insertelement <2 x float> undef, float %0, i32 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  ret <2 x float> %lane
+}
+
+define <1 x double> @test_vld1_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld1_dup_f64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %1 = insertelement <1 x double> undef, double %0, i32 0
+  ret <1 x double> %1
+}
+
+define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld2q_dup_s8
+; CHECK: ld2r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
+entry:
+  %vld_dup = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 0
+  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  %1 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
+  ret %struct.int8x16x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld2q_dup_s16
+; CHECK: ld2r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld2q_dup_s32
+; CHECK: ld2r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld2q_dup_s64
+; CHECK: ld2r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
+  ret %struct.int64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld2q_dup_f32
+; CHECK: ld2r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 1
+  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld2q_dup_f64
+; CHECK: ld2r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 1
+  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
+  ret %struct.float64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld2_dup_s8
+; CHECK: ld2r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
+entry:
+  %vld_dup = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 0
+  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
+  %1 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld2_dup_s16
+; CHECK: ld2r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld2_dup_s32
+; CHECK: ld2r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld2_dup_s64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
+  ret %struct.int64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld2_dup_f32
+; CHECK: ld2r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 1
+  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld2_dup_f64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
+  ret %struct.float64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld3q_dup_s8
+; CHECK: ld3r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
+entry:
+  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
+  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
+  ret %struct.int8x16x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld3q_dup_s16
+; CHECK: ld3r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
+  ret %struct.int16x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld3q_dup_s32
+; CHECK: ld3r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
+  ret %struct.int32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld3q_dup_s64
+; CHECK: ld3r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
+  ret %struct.int64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld3q_dup_f32
+; CHECK: ld3r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
+  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
+  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
+  ret %struct.float32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld3q_dup_f64
+; CHECK: ld3r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
+  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
+  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
+  ret %struct.float64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld3_dup_s8
+; CHECK: ld3r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
+entry:
+  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
+  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
+  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
+  ret %struct.int8x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld3_dup_s16
+; CHECK: ld3r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
+  ret %struct.int16x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld3_dup_s32
+; CHECK: ld3r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
+  ret %struct.int32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld3_dup_s64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
+  ret %struct.int64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld3_dup_f32
+; CHECK: ld3r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
+  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
+  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
+  ret %struct.float32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld3_dup_f64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
+  ret %struct.float64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld4q_dup_s8
+; CHECK: ld4r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
+entry:
+  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
+  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
+  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 3
+  %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %lane3, 0, 3
+  ret %struct.int8x16x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld4q_dup_s16
+; CHECK: ld4r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
+  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 3
+  %lane3 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %lane3, 0, 3
+  ret %struct.int16x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld4q_dup_s32
+; CHECK: ld4r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
+  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 3
+  %lane3 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %lane3, 0, 3
+  ret %struct.int32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld4q_dup_s64
+; CHECK: ld4r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 3
+  %lane3 = shufflevector <2 x i64> %4, <2 x i64> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %lane3, 0, 3
+  ret %struct.int64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld4q_dup_f32
+; CHECK: ld4r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
+  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
+  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
+  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 3
+  %lane3 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %lane3, 0, 3
+  ret %struct.float32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld4q_dup_f64
+; CHECK: ld4r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
+  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
+  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 3
+  %lane3 = shufflevector <2 x double> %4, <2 x double> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %lane3, 0, 3
+  ret %struct.float64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld4_dup_s8
+; CHECK: ld4r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
+entry:
+  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
+  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
+  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
+  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
+  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 3
+  %lane3 = shufflevector <8 x i8> %3, <8 x i8> undef, <8 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %lane3, 0, 3
+  ret %struct.int8x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld4_dup_s16
+; CHECK: ld4r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
+entry:
+  %0 = bitcast i16* %a to i8*
+  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
+  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
+  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 3
+  %lane3 = shufflevector <4 x i16> %4, <4 x i16> undef, <4 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %lane3, 0, 3
+  ret %struct.int16x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld4_dup_s32
+; CHECK: ld4r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast i32* %a to i8*
+  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
+  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
+  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 3
+  %lane3 = shufflevector <2 x i32> %4, <2 x i32> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %lane3, 0, 3
+  ret %struct.int32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld4_dup_s64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast i64* %a to i8*
+  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
+  %vld_dup.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld_dup.fca.3.extract, 0, 3
+  ret %struct.int64x1x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld4_dup_f32
+; CHECK: ld4r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
+entry:
+  %0 = bitcast float* %a to i8*
+  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
+  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
+  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
+  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
+  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
+  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 3
+  %lane3 = shufflevector <2 x float> %4, <2 x float> undef, <2 x i32> zeroinitializer
+  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %lane, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %lane3, 0, 3
+  ret %struct.float32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld4_dup_f64
+; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = bitcast double* %a to i8*
+  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %0, i32 8)
+  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
+  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
+  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
+  %vld_dup.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld_dup.fca.3.extract, 0, 3
+  ret %struct.float64x1x4_t %.fca.0.3.insert
+}
+
+define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vld1q_lane_s8
+; CHECK: ld1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
+  ret <16 x i8> %vld1_lane
+}
+
+define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vld1q_lane_s16
+; CHECK: ld1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
+  ret <8 x i16> %vld1_lane
+}
+
+define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vld1q_lane_s32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
+  ret <4 x i32> %vld1_lane
+}
+
+define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vld1q_lane_s64
+; CHECK: ld1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
+  ret <2 x i64> %vld1_lane
+}
+
+define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vld1q_lane_f32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load float* %a, align 4
+  %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
+  ret <4 x float> %vld1_lane
+}
+
+define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vld1q_lane_f64
+; CHECK: ld1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load double* %a, align 8
+  %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
+  ret <2 x double> %vld1_lane
+}
+
+define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vld1_lane_s8
+; CHECK: ld1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
+  ret <8 x i8> %vld1_lane
+}
+
+define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vld1_lane_s16
+; CHECK: ld1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
+  ret <4 x i16> %vld1_lane
+}
+
+define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vld1_lane_s32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
+  ret <2 x i32> %vld1_lane
+}
+
+define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vld1_lane_s64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
+  ret <1 x i64> %vld1_lane
+}
+
+define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vld1_lane_f32
+; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = load float* %a, align 4
+  %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
+  ret <2 x float> %vld1_lane
+}
+
+define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vld1_lane_f64
+; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
+  ret <1 x double> %vld1_lane
+}
+
+define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_s16
+; CHECK: ld2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  %vld2_lane = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
+  %vld2_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int16x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_s32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  %vld2_lane = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_s64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  %vld2_lane = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_f32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  %vld2_lane = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float32x4x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld2q_lane_f64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  %vld2_lane = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float64x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s8
+; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vld2_lane = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
+  %vld2_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int8x8x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s16
+; CHECK: ld2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  %vld2_lane = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
+  %vld2_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int16x4x2_t %.fca.0.1.insert
+}
+
+define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  %vld2_lane = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_s64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  %vld2_lane = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.int64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_f32
+; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  %vld2_lane = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
+  %vld2_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float32x2x2_t %.fca.0.1.insert
+}
+
+define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld2_lane_f64
+; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  %vld2_lane = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
+  %vld2_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 0
+  %vld2_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 1
+  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2_lane.fca.1.extract, 0, 1
+  ret %struct.float64x1x2_t %.fca.0.1.insert
+}
+
+define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_s16
+; CHECK: ld3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int16x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_s32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_s64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_f32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float32x4x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld3q_lane_f64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float64x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s8
+; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int8x8x3_t %.fca.0.2.insert
+}
+
+define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s16
+; CHECK: ld3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int16x4x3_t %.fca.0.2.insert
+}
+
+define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_s64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.int64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_f32
+; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float32x2x3_t %.fca.0.2.insert
+}
+
+define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld3_lane_f64
+; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
+  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
+  ret %struct.float64x1x3_t %.fca.0.2.insert
+}
+
+define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s8
+; CHECK: ld4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 1)
+  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int8x16x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s16
+; CHECK: ld4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int16x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_s64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_f32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float32x4x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld4q_lane_f64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float64x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s8
+; CHECK: ld4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
+  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int8x8x4_t %.fca.0.3.insert
+}
+
+define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s16
+; CHECK: ld4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
+  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int16x4x4_t %.fca.0.3.insert
+}
+
+define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_s64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.int64x1x4_t %.fca.0.3.insert
+}
+
+define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_f32
+; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
+  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float32x2x4_t %.fca.0.3.insert
+}
+
+define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vld4_lane_f64
+; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
+  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
+  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
+  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
+  %vld3_lane.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 3
+  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
+  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
+  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld3_lane.fca.3.extract, 0, 3
+  ret %struct.float64x1x4_t %.fca.0.3.insert
+}
+
+define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vst1q_lane_s8
+; CHECK: st1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <16 x i8> %b, i32 15
+  store i8 %0, i8* %a, align 1
+  ret void
+}
+
+define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vst1q_lane_s16
+; CHECK: st1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <8 x i16> %b, i32 7
+  store i16 %0, i16* %a, align 2
+  ret void
+}
+
+define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vst1q_lane_s32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x i32> %b, i32 3
+  store i32 %0, i32* %a, align 4
+  ret void
+}
+
+define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vst1q_lane_s64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x i64> %b, i32 1
+  store i64 %0, i64* %a, align 8
+  ret void
+}
+
+define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vst1q_lane_f32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x float> %b, i32 3
+  store float %0, float* %a, align 4
+  ret void
+}
+
+define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vst1q_lane_f64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x double> %b, i32 1
+  store double %0, double* %a, align 8
+  ret void
+}
+
+define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vst1_lane_s8
+; CHECK: st1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <8 x i8> %b, i32 7
+  store i8 %0, i8* %a, align 1
+  ret void
+}
+
+define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vst1_lane_s16
+; CHECK: st1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x i16> %b, i32 3
+  store i16 %0, i16* %a, align 2
+  ret void
+}
+
+define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vst1_lane_s32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x i32> %b, i32 1
+  store i32 %0, i32* %a, align 4
+  ret void
+}
+
+define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vst1_lane_s64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <1 x i64> %b, i32 0
+  store i64 %0, i64* %a, align 8
+  ret void
+}
+
+define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vst1_lane_f32
+; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x float> %b, i32 1
+  store float %0, float* %a, align 4
+  ret void
+}
+
+define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vst1_lane_f64
+; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <1 x double> %b, i32 0
+  store double %0, double* %a, align 8
+  ret void
+}
+
+define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s8
+; CHECK: st2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 15, i32 1)
+  ret void
+}
+
+define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s16
+; CHECK: st2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
+  ret void
+}
+
+define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_s64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_f32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2q_lane_f64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s8
+; CHECK: st2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  tail call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
+  ret void
+}
+
+define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s16
+; CHECK: st2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
+  ret void
+}
+
+define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_s64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_f32
+; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst2_lane_f64
+; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s8
+; CHECK: st3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 15, i32 1)
+  ret void
+}
+
+define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s16
+; CHECK: st3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
+  ret void
+}
+
+define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_s64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_f32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3q_lane_f64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s8
+; CHECK: st3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  tail call void @llvm.arm.neon.vst3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
+  ret void
+}
+
+define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s16
+; CHECK: st3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
+  ret void
+}
+
+define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_s64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_f32
+; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst3_lane_f64
+; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst4q_lane_s8(i16* %a, [4 x <16 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s8
+; CHECK: st4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v16i8(i8* %0, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 2)
+  ret void
+}
+
+define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s16
+; CHECK: st4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
+  ret void
+}
+
+define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_s64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_f32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
+  ret void
+}
+
+define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4q_lane_f64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
+  ret void
+}
+
+define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s8
+; CHECK: st4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  tail call void @llvm.arm.neon.vst4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
+  ret void
+}
+
+define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s16
+; CHECK: st4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
+  %0 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
+  ret void
+}
+
+define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_s64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
+  %0 = bitcast i64* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
+  ret void
+}
+
+define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_f32
+; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %0 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
+  ret void
+}
+
+define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
+; CHECK-LABEL: test_vst4_lane_f64
+; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
+entry:
+  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
+  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
+  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
+  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
+  %0 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
+declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
+declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
+declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
+declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
+declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
+declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-simd-ldst.ll b/test/CodeGen/AArch64/neon-simd-ldst.ll
new file mode 100644
index 000000000000..afc0901bbc0b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-ldst.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define void @test_ldstq_4v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_4v
+; CHECK: ld4     {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+; CHECK: st4     {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+entry:
+  %tobool62 = icmp eq i32 %count, 0
+  br i1 %tobool62, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.063, -1
+  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1)
+  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
+  tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldstq_3v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_3v
+; CHECK: ld3     {v0.16b, v1.16b, v2.16b}, [x0]
+; CHECK: st3     {v0.16b, v1.16b, v2.16b}, [x0]
+entry:
+  %tobool47 = icmp eq i32 %count, 0
+  br i1 %tobool47, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.048, -1
+  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1)
+  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
+  tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldstq_2v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldstq_2v
+; CHECK: ld2     {v0.16b, v1.16b}, [x0]
+; CHECK: st2     {v0.16b, v1.16b}, [x0]
+entry:
+  %tobool22 = icmp eq i32 %count, 0
+  br i1 %tobool22, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.023, -1
+  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1)
+  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
+  tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+
+define void @test_ldst_4v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_4v
+; CHECK: ld4     {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+; CHECK: st4     {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+entry:
+  %tobool42 = icmp eq i32 %count, 0
+  br i1 %tobool42, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.043, -1
+  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1)
+  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
+  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
+  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
+  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
+  tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define void @test_ldst_3v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_3v
+; CHECK: ld3     {v0.8b, v1.8b, v2.8b}, [x0]
+; CHECK: st3     {v0.8b, v1.8b, v2.8b}, [x0]
+entry:
+  %tobool32 = icmp eq i32 %count, 0
+  br i1 %tobool32, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.033, -1
+  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1)
+  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
+  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
+  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define void @test_ldst_2v(i8* noalias %io, i32 %count) {
+; CHECK-LABEL: test_ldst_2v
+; CHECK: ld2     {v0.8b, v1.8b}, [x0]
+; CHECK: st2     {v0.8b, v1.8b}, [x0]
+entry:
+  %tobool22 = icmp eq i32 %count, 0
+  br i1 %tobool22, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
+  %dec = add i32 %count.addr.023, -1
+  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1)
+  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
+  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
+  tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1)
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+
diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
new file mode 100644
index 000000000000..156fe1db0ff5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
@@ -0,0 +1,354 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+;Check for a post-increment updating load.
+define <4 x i16> @test_vld1_fx_update(i16** %ptr) nounwind {
+; CHECK: test_vld1_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], #8
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 2)
+  %tmp2 = getelementptr i16* %A, i32 4
+  store i16* %tmp2, i16** %ptr
+  ret <4 x i16> %tmp1
+}
+
+;Check for a post-increment updating load with register increment.
+define <2 x i32> @test_vld1_reg_update(i32** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld1_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 4)
+  %tmp2 = getelementptr i32* %A, i32 %inc
+  store i32* %tmp2, i32** %ptr
+  ret <2 x i32> %tmp1
+}
+
+define <2 x float> @test_vld2_fx_update(float** %ptr) nounwind {
+; CHECK: test_vld2_fx_update
+; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #16
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 4)
+  %tmp2 = extractvalue { <2 x float>, <2 x float> } %tmp1, 0
+  %tmp3 = getelementptr float* %A, i32 4
+  store float* %tmp3, float** %ptr
+  ret <2 x float> %tmp2
+}
+
+define <16 x i8> @test_vld2_reg_update(i8** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld2_reg_update
+; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  %tmp0 = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1)
+  %tmp1 = extractvalue { <16 x i8>, <16 x i8> } %tmp0, 0
+  %tmp2 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp2, i8** %ptr
+  ret <16 x i8> %tmp1
+}
+
+define <4 x i32> @test_vld3_fx_update(i32** %ptr) nounwind {
+; CHECK: test_vld3_fx_update
+; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #48
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 4)
+  %tmp2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %tmp1, 0
+  %tmp3 = getelementptr i32* %A, i32 12
+  store i32* %tmp3, i32** %ptr
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i16> @test_vld3_reg_update(i16** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld3_reg_update
+; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 2)
+  %tmp2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %tmp1, 0
+  %tmp3 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp3, i16** %ptr
+  ret <4 x i16> %tmp2
+}
+
+define <8 x i16> @test_vld4_fx_update(i16** %ptr) nounwind {
+; CHECK: test_vld4_fx_update
+; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], #64
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+  %tmp2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %tmp1, 0
+  %tmp3 = getelementptr i16* %A, i32 32
+  store i16* %tmp3, i16** %ptr
+  ret <8 x i16> %tmp2
+}
+
+define <8 x i8> @test_vld4_reg_update(i8** %ptr, i32 %inc) nounwind {
+; CHECK: test_vld4_reg_update
+; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  %tmp0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1)
+  %tmp1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %tmp0, 0
+  %tmp2 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp2, i8** %ptr
+  ret <8 x i8> %tmp1
+}
+
+define void @test_vst1_fx_update(float** %ptr, <2 x float> %B) nounwind {
+; CHECK: test_vst1_fx_update
+; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}], #8
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %B, i32 4)
+  %tmp2 = getelementptr float* %A, i32 2
+  store float* %tmp2, float** %ptr
+  ret void
+}
+
+define void @test_vst1_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind {
+; CHECK: test_vst1_reg_update
+; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %B, i32 2)
+  %tmp1 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst2_fx_update(i64** %ptr, <1 x i64> %B) nounwind {
+; CHECK: test_vst2_fx_update
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}], #16
+  %A = load i64** %ptr
+  %tmp0 = bitcast i64* %A to i8*
+  call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %B, <1 x i64> %B, i32 8)
+  %tmp1 = getelementptr i64* %A, i32 2
+  store i64* %tmp1, i64** %ptr
+  ret void
+}
+
+define void @test_vst2_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind {
+; CHECK: test_vst2_reg_update
+; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, i32 4)
+  %tmp0 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp0, i8** %ptr
+  ret void
+}
+
+define void @test_vst3_fx_update(i32** %ptr, <2 x i32> %B) nounwind {
+; CHECK: test_vst3_fx_update
+; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}], #24
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %B, <2 x i32> %B, <2 x i32> %B, i32 4)
+  %tmp1 = getelementptr i32* %A, i32 6
+  store i32* %tmp1, i32** %ptr
+  ret void
+}
+
+define void @test_vst3_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind {
+; CHECK: test_vst3_reg_update
+; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %B, <8 x i16> %B, <8 x i16> %B, i32 2)
+  %tmp1 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst4_fx_update(float** %ptr, <4 x float> %B) nounwind {
+; CHECK: test_vst4_fx_update
+; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}], #64
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %B, <4 x float> %B, <4 x float> %B, <4 x float> %B, i32 4)
+  %tmp1 = getelementptr float* %A, i32 16
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+define void @test_vst4_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind {
+; CHECK: test_vst4_reg_update
+; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
+  %A = load i8** %ptr
+  call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, i32 1)
+  %tmp0 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp0, i8** %ptr
+  ret void
+}
+
+
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
+declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
+
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+
+define <16 x i8> @test_vld1x2_fx_update(i8* %a, i8** %ptr) {
+; CHECK: test_vld1x2_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #32
+  %1 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %tmp1 = getelementptr i8* %a, i32 32
+  store i8* %tmp1, i8** %ptr
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vld1x2_reg_update(i16* %a, i16** %ptr, i32 %inc) {
+; CHECK: test_vld1x2_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret <8 x i16> %3
+}
+
+define <2 x i64> @test_vld1x3_fx_update(i64* %a, i64** %ptr) {
+; CHECK: test_vld1x3_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], #48
+  %1 = bitcast i64* %a to i8*
+  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
+  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
+  %tmp1 = getelementptr i64* %a, i32 6
+  store i64* %tmp1, i64** %ptr
+  ret  <2 x i64> %3
+}
+
+define <8 x i16> @test_vld1x3_reg_update(i16* %a, i16** %ptr, i32 %inc) {
+; CHECK: test_vld1x3_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
+  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret <8 x i16> %3
+}
+
+define <4 x float> @test_vld1x4_fx_update(float* %a, float** %ptr) {
+; CHECK: test_vld1x4_fx_update
+; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #64
+  %1 = bitcast float* %a to i8*
+  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
+  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
+  %tmp1 = getelementptr float* %a, i32 16
+  store float* %tmp1, float** %ptr
+  ret <4 x float> %3
+}
+
+define <8 x i8> @test_vld1x4_reg_update(i8* readonly %a, i8** %ptr, i32 %inc) #0 {
+; CHECK: test_vld1x4_reg_update
+; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %tmp1 = getelementptr i8* %a, i32 %inc
+  store i8* %tmp1, i8** %ptr
+  ret <8 x i8> %2
+}
+
+define void @test_vst1x2_fx_update(i8* %a, [2 x <16 x i8>] %b.coerce, i8** %ptr) #2 {
+; CHECK: test_vst1x2_fx_update
+; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #32
+  %1 = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %2 = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
+  %tmp1 = getelementptr i8* %a, i32 32
+  store i8* %tmp1, i8** %ptr
+  ret void
+}
+
+define void @test_vst1x2_reg_update(i16* %a, [2 x <8 x i16>] %b.coerce, i16** %ptr, i32 %inc) #2 {
+; CHECK: test_vst1x2_reg_update
+; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [2 x <8 x i16>] %b.coerce, 0
+  %2 = extractvalue [2 x <8 x i16>] %b.coerce, 1
+  %3 = bitcast i16* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst1x3_fx_update(i32* %a, [3 x <2 x i32>] %b.coerce, i32** %ptr) #2 {
+; CHECK: test_vst1x3_fx_update
+; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #24
+  %1 = extractvalue [3 x <2 x i32>] %b.coerce, 0
+  %2 = extractvalue [3 x <2 x i32>] %b.coerce, 1
+  %3 = extractvalue [3 x <2 x i32>] %b.coerce, 2
+  %4 = bitcast i32* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
+  %tmp1 = getelementptr i32* %a, i32 6
+  store i32* %tmp1, i32** %ptr
+  ret void
+}
+
+define void @test_vst1x3_reg_update(i64* %a, [3 x <1 x i64>] %b.coerce, i64** %ptr, i32 %inc) #2 {
+; CHECK: test_vst1x3_reg_update
+; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [3 x <1 x i64>] %b.coerce, 0
+  %2 = extractvalue [3 x <1 x i64>] %b.coerce, 1
+  %3 = extractvalue [3 x <1 x i64>] %b.coerce, 2
+  %4 = bitcast i64* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
+  %tmp1 = getelementptr i64* %a, i32 %inc
+  store i64* %tmp1, i64** %ptr
+  ret void
+}
+
+define void @test_vst1x4_fx_update(float* %a, [4 x <4 x float>] %b.coerce, float** %ptr) #2 {
+; CHECK: test_vst1x4_fx_update
+; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #64
+  %1 = extractvalue [4 x <4 x float>] %b.coerce, 0
+  %2 = extractvalue [4 x <4 x float>] %b.coerce, 1
+  %3 = extractvalue [4 x <4 x float>] %b.coerce, 2
+  %4 = extractvalue [4 x <4 x float>] %b.coerce, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
+  %tmp1 = getelementptr float* %a, i32 16
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+define void @test_vst1x4_reg_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr, i32 %inc) #2 {
+; CHECK: test_vst1x4_reg_update
+; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %2 = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %3 = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %4 = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
+  %tmp1 = getelementptr double* %a, i32 %inc
+  store double* %tmp1, double** %ptr
+  ret void
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
+declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
+declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
+declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #3
+declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) #3
diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
new file mode 100644
index 000000000000..80a934700c6b
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
@@ -0,0 +1,319 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) {
+; CHECK-LABEL: test_vld2q_dup_fx_update
+; CHECK: ld2r  {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #2
+  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
+  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
+  %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
+  %4 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
+  %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> zeroinitializer
+  %6 = insertvalue { [2 x <16 x i8>] } undef, <16 x i8> %3, 0, 0
+  %7 = insertvalue { [2 x <16 x i8>] } %6, <16 x i8> %5, 0, 1
+  %tmp1 = getelementptr i8* %a, i32 2
+  store i8* %tmp1, i8** %ptr
+  ret { [2 x <16 x i8>] } %7
+}
+
+define { [2 x <4 x i32>] } @test_vld2q_dup_reg_update(i32* %a, i32** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld2q_dup_reg_update
+; CHECK: ld2r  {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %1, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
+  %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
+  %5 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> zeroinitializer
+  %7 = insertvalue { [2 x <4 x i32>] } undef, <4 x i32> %4, 0, 0
+  %8 = insertvalue { [2 x <4 x i32>] } %7, <4 x i32> %6, 0, 1
+  %tmp1 = getelementptr i32* %a, i32 %inc
+  store i32* %tmp1, i32** %ptr
+  ret { [2 x <4 x i32>] } %8
+}
+
+define { [3 x <4 x i16>] } @test_vld3_dup_fx_update(i16* %a, i16** %ptr) {
+; CHECK-LABEL: test_vld3_dup_fx_update
+; CHECK: ld3r  {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], #6
+  %1 = bitcast i16* %a to i8*
+  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %1, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
+  %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
+  %6 = shufflevector <4 x i16> %5, <4 x i16> undef, <4 x i32> zeroinitializer
+  %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
+  %8 = shufflevector <4 x i16> %7, <4 x i16> undef, <4 x i32> zeroinitializer
+  %9 = insertvalue { [3 x <4 x i16>] }  undef, <4 x i16> %4, 0, 0
+  %10 = insertvalue { [3 x <4 x i16>] }  %9, <4 x i16> %6, 0, 1
+  %11 = insertvalue { [3 x <4 x i16>] }  %10, <4 x i16> %8, 0, 2
+  %tmp1 = getelementptr i16* %a, i32 3
+  store i16* %tmp1, i16** %ptr
+  ret { [3 x <4 x i16>] }  %11
+}
+
+define { [3 x <8 x i8>] } @test_vld3_dup_reg_update(i8* %a, i8** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld3_dup_reg_update
+; CHECK: ld3r  {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
+  %3 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
+  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
+  %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <8 x i32> zeroinitializer
+  %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
+  %7 = shufflevector <8 x i8> %6, <8 x i8> undef, <8 x i32> zeroinitializer
+  %8 = insertvalue { [3 x <8 x i8>] } undef, <8 x i8> %3, 0, 0
+  %9 = insertvalue { [3 x <8 x i8>] } %8, <8 x i8> %5, 0, 1
+  %10 = insertvalue { [3 x <8 x i8>] } %9, <8 x i8> %7, 0, 2
+  %tmp1 = getelementptr i8* %a, i32 %inc
+  store i8* %tmp1, i8** %ptr
+  ret { [3 x <8 x i8>] }%10
+}
+
+define { [4 x <2 x i32>] } @test_vld4_dup_fx_update(i32* %a, i32** %ptr) #0 {
+; CHECK-LABEL: test_vld4_dup_fx_update
+; CHECK: ld4r  {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #16
+  %1 = bitcast i32* %a to i8*
+  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %1, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
+  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
+  %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
+  %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <2 x i32> zeroinitializer
+  %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
+  %8 = shufflevector <2 x i32> %7, <2 x i32> undef, <2 x i32> zeroinitializer
+  %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
+  %10 = shufflevector <2 x i32> %9, <2 x i32> undef, <2 x i32> zeroinitializer
+  %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %4, 0, 0
+  %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %6, 0, 1
+  %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %8, 0, 2
+  %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
+  %tmp1 = getelementptr i32* %a, i32 4
+  store i32* %tmp1, i32** %ptr
+  ret { [4 x <2 x i32>] } %14
+}
+
+define { [4 x <2 x double>] } @test_vld4_dup_reg_update(double* %a, double** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld4_dup_reg_update
+; CHECK: ld4r  {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = bitcast double* %a to i8*
+  %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %1, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
+  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
+  %4 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
+  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
+  %6 = shufflevector <2 x double> %5, <2 x double> undef, <2 x i32> zeroinitializer
+  %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
+  %8 = shufflevector <2 x double> %7, <2 x double> undef, <2 x i32> zeroinitializer
+  %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
+  %10 = shufflevector <2 x double> %9, <2 x double> undef, <2 x i32> zeroinitializer
+  %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %4, 0, 0
+  %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %6, 0, 1
+  %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %8, 0, 2
+  %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
+  %tmp1 = getelementptr double* %a, i32 %inc
+  store double* %tmp1, double** %ptr
+  ret { [4 x <2 x double>] } %14
+}
+
+define { [2 x <8 x i8>] } @test_vld2_lane_fx_update(i8*  %a, [2 x <8 x i8>] %b, i8** %ptr) {
+; CHECK-LABEL: test_vld2_lane_fx_update
+; CHECK: ld2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
+  %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
+  %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
+  %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
+  %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
+  %tmp1 = getelementptr i8* %a, i32 2
+  store i8* %tmp1, i8** %ptr
+  ret { [2 x <8 x i8>] } %7
+}
+
+define { [2 x <8 x i8>] } @test_vld2_lane_reg_update(i8*  %a, [2 x <8 x i8>] %b, i8** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld2_lane_reg_update
+; CHECK: ld2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[6], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 6, i32 1)
+  %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
+  %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
+  %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
+  %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
+  %tmp1 = getelementptr i8* %a, i32 %inc
+  store i8* %tmp1, i8** %ptr
+  ret { [2 x <8 x i8>] } %7
+}
+
+define { [3 x <2 x float>] } @test_vld3_lane_fx_update(float* %a, [3 x <2 x float>] %b, float** %ptr) {
+; CHECK-LABEL: test_vld3_lane_fx_update
+; CHECK: ld3  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #12
+  %1 = extractvalue [3 x <2 x float>] %b, 0
+  %2 = extractvalue [3 x <2 x float>] %b, 1
+  %3 = extractvalue [3 x <2 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  %5 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4)
+  %6 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 0
+  %7 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 1
+  %8 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 2
+  %9 = insertvalue { [3 x <2 x float>] } undef, <2 x float> %6, 0, 0
+  %10 = insertvalue { [3 x <2 x float>] } %9, <2 x float> %7, 0, 1
+  %11 = insertvalue { [3 x <2 x float>] } %10, <2 x float> %8, 0, 2
+  %tmp1 = getelementptr float* %a, i32 3
+  store float* %tmp1, float** %ptr
+  ret { [3 x <2 x float>] } %11
+}
+
+define { [3 x <4 x i16>] } @test_vld3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld3_lane_reg_update
+; CHECK: ld3  {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [3 x <4 x i16>] %b, 0
+  %2 = extractvalue [3 x <4 x i16>] %b, 1
+  %3 = extractvalue [3 x <4 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  %5 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
+  %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 0
+  %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 1
+  %8 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 2
+  %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %6, 0, 0
+  %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %7, 0, 1
+  %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret { [3 x <4 x i16>] } %11
+}
+
+define { [4 x <2 x i32>] } @test_vld4_lane_fx_update(i32* readonly %a, [4 x <2 x i32>] %b, i32** %ptr) {
+; CHECK-LABEL: test_vld4_lane_fx_update
+; CHECK: ld4  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #16
+  %1 = extractvalue [4 x <2 x i32>] %b, 0
+  %2 = extractvalue [4 x <2 x i32>] %b, 1
+  %3 = extractvalue [4 x <2 x i32>] %b, 2
+  %4 = extractvalue [4 x <2 x i32>] %b, 3
+  %5 = bitcast i32* %a to i8*
+  %6 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 1, i32 4)
+  %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 0
+  %8 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 1
+  %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 2
+  %10 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 3
+  %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %7, 0, 0
+  %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %8, 0, 1
+  %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %9, 0, 2
+  %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
+  %tmp1 = getelementptr i32* %a, i32 4
+  store i32* %tmp1, i32** %ptr
+  ret { [4 x <2 x i32>] } %14
+}
+
+define { [4 x <2 x double>] } @test_vld4_lane_reg_update(double* readonly %a, [4 x <2 x double>] %b, double** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vld4_lane_reg_update
+; CHECK: ld4  {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [4 x <2 x double>] %b, 0
+  %2 = extractvalue [4 x <2 x double>] %b, 1
+  %3 = extractvalue [4 x <2 x double>] %b, 2
+  %4 = extractvalue [4 x <2 x double>] %b, 3
+  %5 = bitcast double* %a to i8*
+  %6 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
+  %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 0
+  %8 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 1
+  %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 2
+  %10 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 3
+  %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %7, 0, 0
+  %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %8, 0, 1
+  %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %9, 0, 2
+  %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
+  %tmp1 = getelementptr double* %a, i32 %inc
+  store double* %tmp1, double** %ptr
+  ret { [4 x <2 x double>] } %14
+}
+
+define void @test_vst2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) {
+; CHECK-LABEL: test_vst2_lane_fx_update
+; CHECK: st2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
+  %1 = extractvalue [2 x <8 x i8>] %b, 0
+  %2 = extractvalue [2 x <8 x i8>] %b, 1
+  call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
+  %tmp1 = getelementptr i8* %a, i32 2
+  store i8* %tmp1, i8** %ptr
+  ret void
+}
+
+define void @test_vst2_lane_reg_update(i32* %a, [2 x <2 x i32>] %b.coerce, i32** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vst2_lane_reg_update
+; CHECK: st2  {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [2 x <2 x i32>] %b.coerce, 0
+  %2 = extractvalue [2 x <2 x i32>] %b.coerce, 1
+  %3 = bitcast i32* %a to i8*
+  tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4)
+  %tmp1 = getelementptr i32* %a, i32 %inc
+  store i32* %tmp1, i32** %ptr
+  ret void
+}
+
+define void @test_vst3_lane_fx_update(float* %a, [3 x <4 x float>] %b, float** %ptr) {
+; CHECK-LABEL: test_vst3_lane_fx_update
+; CHECK: st3  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [x{{[0-9]+|sp}}], #12
+  %1 = extractvalue [3 x <4 x float>] %b, 0
+  %2 = extractvalue [3 x <4 x float>] %b, 1
+  %3 = extractvalue [3 x <4 x float>] %b, 2
+  %4 = bitcast float* %a to i8*
+  call void @llvm.arm.neon.vst3lane.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 3, i32 4)
+  %tmp1 = getelementptr float* %a, i32 3
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @test_vst3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vst3_lane_reg_update
+; CHECK: st3  {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [3 x <4 x i16>] %b, 0
+  %2 = extractvalue [3 x <4 x i16>] %b, 1
+  %3 = extractvalue [3 x <4 x i16>] %b, 2
+  %4 = bitcast i16* %a to i8*
+  tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
+  %tmp1 = getelementptr i16* %a, i32 %inc
+  store i16* %tmp1, i16** %ptr
+  ret void
+}
+
+define void @test_vst4_lane_fx_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr) {
+; CHECK-LABEL: test_vst4_lane_fx_update
+; CHECK: st4  {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], #32
+  %1 = extractvalue [4 x <2 x double>] %b.coerce, 0
+  %2 = extractvalue [4 x <2 x double>] %b.coerce, 1
+  %3 = extractvalue [4 x <2 x double>] %b.coerce, 2
+  %4 = extractvalue [4 x <2 x double>] %b.coerce, 3
+  %5 = bitcast double* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
+  %tmp1 = getelementptr double* %a, i32 4
+  store double* %tmp1, double** %ptr
+  ret void
+}
+
+
+define void @test_vst4_lane_reg_update(float* %a, [4 x <2 x float>] %b.coerce, float** %ptr, i32 %inc) {
+; CHECK-LABEL: test_vst4_lane_reg_update
+; CHECK: st4  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
+  %1 = extractvalue [4 x <2 x float>] %b.coerce, 0
+  %2 = extractvalue [4 x <2 x float>] %b.coerce, 1
+  %3 = extractvalue [4 x <2 x float>] %b.coerce, 2
+  %4 = extractvalue [4 x <2 x float>] %b.coerce, 3
+  %5 = bitcast float* %a to i8*
+  tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 1, i32 4)
+  %tmp1 = getelementptr float* %a, i32 %inc
+  store float* %tmp1, float** %ptr
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
+declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
+declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll
new file mode 100644
index 000000000000..fd762656e56e
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-shift.ll
@@ -0,0 +1,1556 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vshr_n_s8
+; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vshr_n_s16
+; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vshr_n_s32
+; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_s8
+; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_s16
+; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_s32
+; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_s64
+; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vshr_n_u8
+; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vshr_n_u16
+; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vshr_n_u32
+; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_u8
+; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_u16
+; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_u32
+; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_u64
+; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_s8
+; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_s16
+; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_s32
+; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_s8
+; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_s16
+; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_s32
+; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_s64
+; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_u8
+; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_u16
+; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_u32
+; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_u8
+; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_u16
+; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_u32
+; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_u64
+; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vrshr_n_s8
+; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vrshr_n
+}
+
+
+define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vrshr_n_s16
+; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vrshr_n
+}
+
+
+define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vrshr_n_s32
+; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vrshr_n
+}
+
+
+define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vrshrq_n_s8
+; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vrshr_n
+}
+
+
+define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vrshrq_n_s16
+; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vrshr_n
+}
+
+
+define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vrshrq_n_s32
+; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vrshr_n
+}
+
+
+define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vrshrq_n_s64
+; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vrshr_n
+}
+
+
+define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vrshr_n_u8
+; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vrshr_n
+}
+
+
+define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vrshr_n_u16
+; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vrshr_n
+}
+
+
+define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vrshr_n_u32
+; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vrshr_n
+}
+
+
+define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vrshrq_n_u8
+; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vrshr_n
+}
+
+
+define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vrshrq_n_u16
+; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vrshr_n
+}
+
+
+define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vrshrq_n_u32
+; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vrshr_n
+}
+
+
+define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vrshrq_n_u64
+; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vrshr_n
+}
+
+
+define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vrsra_n_s8
+; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3)
+  %vrsra_n = add <8 x i8> %1, %a
+  ret <8 x i8> %vrsra_n
+}
+
+define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vrsra_n_s16
+; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3)
+  %vrsra_n = add <4 x i16> %1, %a
+  ret <4 x i16> %vrsra_n
+}
+
+define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vrsra_n_s32
+; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3)
+  %vrsra_n = add <2 x i32> %1, %a
+  ret <2 x i32> %vrsra_n
+}
+
+define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vrsraq_n_s8
+; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3)
+  %vrsra_n = add <16 x i8> %1, %a
+  ret <16 x i8> %vrsra_n
+}
+
+define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsraq_n_s16
+; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3)
+  %vrsra_n = add <8 x i16> %1, %a
+  ret <8 x i16> %vrsra_n
+}
+
+define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsraq_n_s32
+; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3)
+  %vrsra_n = add <4 x i32> %1, %a
+  ret <4 x i32> %vrsra_n
+}
+
+define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsraq_n_s64
+; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3)
+  %vrsra_n = add <2 x i64> %1, %a
+  ret <2 x i64> %vrsra_n
+}
+
+define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vrsra_n_u8
+; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3)
+  %vrsra_n = add <8 x i8> %1, %a
+  ret <8 x i8> %vrsra_n
+}
+
+define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vrsra_n_u16
+; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3)
+  %vrsra_n = add <4 x i16> %1, %a
+  ret <4 x i16> %vrsra_n
+}
+
+define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vrsra_n_u32
+; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3)
+  %vrsra_n = add <2 x i32> %1, %a
+  ret <2 x i32> %vrsra_n
+}
+
+define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vrsraq_n_u8
+; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3)
+  %vrsra_n = add <16 x i8> %1, %a
+  ret <16 x i8> %vrsra_n
+}
+
+define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vrsraq_n_u16
+; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3)
+  %vrsra_n = add <8 x i16> %1, %a
+  ret <8 x i16> %vrsra_n
+}
+
+define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vrsraq_n_u32
+; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3)
+  %vrsra_n = add <4 x i32> %1, %a
+  ret <4 x i32> %vrsra_n
+}
+
+define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vrsraq_n_u64
+; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3)
+  %vrsra_n = add <2 x i64> %1, %a
+  ret <2 x i64> %vrsra_n
+}
+
+define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsri_n_s8
+; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsri_n
+}
+
+
+define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsri_n_s16
+; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
+  ret <4 x i16> %vsri
+}
+
+
+define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsri_n_s32
+; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
+  ret <2 x i32> %vsri
+}
+
+
+define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsriq_n_s8
+; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsri_n
+}
+
+
+define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsriq_n_s16
+; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
+  ret <8 x i16> %vsri
+}
+
+
+define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsriq_n_s32
+; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
+  ret <4 x i32> %vsri
+}
+
+
+define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsriq_n_s64
+; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
+  ret <2 x i64> %vsri
+}
+
+define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsri_n_p8
+; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsri_n
+}
+
+define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsri_n_p16
+; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
+  ret <4 x i16> %vsri
+}
+
+define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsriq_n_p8
+; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsri_n
+}
+
+define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsriq_n_p16
+; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
+  ret <8 x i16> %vsri
+}
+
+define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsli_n_s8
+; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsli_n
+}
+
+define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsli_n_s16
+; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
+  ret <4 x i16> %vsli
+}
+
+define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsli_n_s32
+; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
+  ret <2 x i32> %vsli
+}
+
+define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsliq_n_s8
+; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsli_n
+}
+
+define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsliq_n_s16
+; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
+  ret <8 x i16> %vsli
+}
+
+define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsliq_n_s32
+; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
+  ret <4 x i32> %vsli
+}
+
+define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsliq_n_s64
+; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
+  ret <2 x i64> %vsli
+}
+
+define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsli_n_p8
+; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+  ret <8 x i8> %vsli_n
+}
+
+define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsli_n_p16
+; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
+  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
+  ret <4 x i16> %vsli
+}
+
+define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsliq_n_p8
+; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+  ret <16 x i8> %vsli_n
+}
+
+define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsliq_n_p16
+; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
+  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
+  ret <8 x i16> %vsli
+}
+
+define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
+; CHECK: test_vqshl_n_s8
+; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <8 x i8> %vqshl
+}
+
+
+define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
+; CHECK: test_vqshl_n_s16
+; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+  ret <4 x i16> %vqshl
+}
+
+
+define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
+; CHECK: test_vqshl_n_s32
+; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
+  ret <2 x i32> %vqshl
+}
+
+
+define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
+; CHECK: test_vqshlq_n_s8
+; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %vqshl_n
+}
+
+
+define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshlq_n_s16
+; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %vqshl
+}
+
+
+define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshlq_n_s32
+; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %vqshl
+}
+
+
+define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshlq_n_s64
+; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
+  ret <2 x i64> %vqshl
+}
+
+
+define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
+; CHECK: test_vqshl_n_u8
+; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <8 x i8> %vqshl_n
+}
+
+
+define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
+; CHECK: test_vqshl_n_u16
+; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+  ret <4 x i16> %vqshl
+}
+
+
+define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
+; CHECK: test_vqshl_n_u32
+; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
+  ret <2 x i32> %vqshl
+}
+
+
+define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
+; CHECK: test_vqshlq_n_u8
+; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %vqshl_n
+}
+
+
+define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
+; CHECK: test_vqshlq_n_u16
+; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %vqshl
+}
+
+
+define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
+; CHECK: test_vqshlq_n_u32
+; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %vqshl
+}
+
+
+define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
+; CHECK: test_vqshlq_n_u64
+; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
+  ret <2 x i64> %vqshl
+}
+
+define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) {
+; CHECK: test_vqshlu_n_s8
+; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3)
+  ret <8 x i8> %vqshlu
+}
+
+
+define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) {
+; CHECK: test_vqshlu_n_s16
+; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3)
+  ret <4 x i16> %vqshlu
+}
+
+
+define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) {
+; CHECK: test_vqshlu_n_s32
+; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3)
+  ret <2 x i32> %vqshlu
+}
+
+
+define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
+; CHECK: test_vqshluq_n_s8
+; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3)
+  ret <16 x i8> %vqshlu
+}
+
+
+define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshluq_n_s16
+; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3)
+  ret <8 x i16> %vqshlu
+}
+
+
+define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshluq_n_s32
+; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3)
+  ret <4 x i32> %vqshlu
+}
+
+
+define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshluq_n_s64
+; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3)
+  ret <2 x i64> %vqshlu
+}
+
+
+define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_s16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_s32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_s64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_u16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_u32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_u64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_s16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_s32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_s64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_u16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_u32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_u64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshrun_n_s16
+; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrun
+}
+
+
+define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshrun_n_s32
+; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrun
+}
+
+define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshrun_n_s64
+; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrun
+}
+
+define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrun_high_n_s16
+; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrun_high_n_s32
+; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrun_high_n_s64
+; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vrshrn_n_s16
+; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vrshrn
+}
+
+
+define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vrshrn_n_s32
+; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vrshrn
+}
+
+
+define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vrshrn_n_s64
+; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vrshrn
+}
+
+define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vrshrn_high_n_s16
+; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vrshrn_high_n_s32
+; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vrshrn_high_n_s64
+; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) {
+; CHECK: test_vqrshrun_n_s16
+; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrun
+}
+
+define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) {
+; CHECK: test_vqrshrun_n_s32
+; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrun
+}
+
+define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) {
+; CHECK: test_vqrshrun_n_s64
+; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrun
+}
+
+define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrun_high_n_s16
+; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrun_high_n_s32
+; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrun_high_n_s64
+; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vqshrn_n_s16
+; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrn
+}
+
+
+define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vqshrn_n_s32
+; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrn
+}
+
+
+define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vqshrn_n_s64
+; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrn
+}
+
+
+define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vqshrn_n_u16
+; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqshrn
+}
+
+
+define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vqshrn_n_u32
+; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqshrn
+}
+
+
+define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vqshrn_n_u64
+; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqshrn
+}
+
+
+define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_s16
+; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_s32
+; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_s64
+; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_u16
+; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_u32
+; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_u64
+; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vqrshrn_n_s16
+; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrn
+}
+
+
+define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vqrshrn_n_s32
+; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrn
+}
+
+
+define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vqrshrn_n_s64
+; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrn
+}
+
+
+define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vqrshrn_n_u16
+; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3)
+  ret <8 x i8> %vqrshrn
+}
+
+
+define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vqrshrn_n_u32
+; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9)
+  ret <4 x i16> %vqrshrn
+}
+
+
+define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vqrshrn_n_u64
+; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19)
+  ret <2 x i32> %vqrshrn
+}
+
+
+define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_s16
+; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_s32
+; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_s64
+; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_u16
+; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_u32
+; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_u64
+; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) {
+; CHECK: test_vcvt_n_f32_s32
+; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
+  ret <2 x float> %vcvt
+}
+
+define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
+; CHECK: test_vcvtq_n_f32_s32
+; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
+  ret <4 x float> %vcvt
+}
+
+define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) {
+; CHECK: test_vcvtq_n_f64_s64
+; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
+  ret <2 x double> %vcvt
+}
+
+define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) {
+; CHECK: test_vcvt_n_f32_u32
+; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
+  ret <2 x float> %vcvt
+}
+
+define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
+; CHECK: test_vcvtq_n_f32_u32
+; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
+  ret <4 x float> %vcvt
+}
+
+define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) {
+; CHECK: test_vcvtq_n_f64_u64
+; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
+  ret <2 x double> %vcvt
+}
+
+define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) {
+; CHECK: test_vcvt_n_s32_f32
+; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31)
+  ret <2 x i32> %vcvt
+}
+
+define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
+; CHECK: test_vcvtq_n_s32_f32
+; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31)
+  ret <4 x i32> %vcvt
+}
+
+define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) {
+; CHECK: test_vcvtq_n_s64_f64
+; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50)
+  ret <2 x i64> %vcvt
+}
+
+define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) {
+; CHECK: test_vcvt_n_u32_f32
+; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
+  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31)
+  ret <2 x i32> %vcvt
+}
+
+define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
+; CHECK: test_vcvt_n_u32_f32
+; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
+  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31)
+  ret <4 x i32> %vcvt
+}
+
+define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) {
+; CHECK: test_vcvtq_n_u64_f64
+; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
+  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50)
+  ret <2 x i64> %vcvt
+}
+
+declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32)
+
+declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32)
+
+declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
+
+declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
+
+declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
+
+declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
+
+declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
+
+declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) 
+
+declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) 
+
+declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) 
+
+declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
+
+define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
+declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
+declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
+declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-simd-tbl.ll b/test/CodeGen/AArch64/neon-simd-tbl.ll
new file mode 100644
index 000000000000..8eac1e88c4a5
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-tbl.ll
@@ -0,0 +1,828 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8>, <8 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8>, <16 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
+
+define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtbl1_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl11.i
+}
+
+define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vqtbl1_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %vtbl1.i
+}
+
+define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl2_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
+  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl17.i
+}
+
+define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl2_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl2.i
+}
+
+define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl3_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl212.i
+}
+
+define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl3_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl3.i
+}
+
+define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl4_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl216.i
+}
+
+define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl4_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl4.i
+}
+
+define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vqtbl1q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %vtbl1.i
+}
+
+define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl2q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl2.i
+}
+
+define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl3q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl3.i
+}
+
+define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl4q_s8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl4.i
+}
+
+define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vtbx1_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx2_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx17.i
+}
+
+define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx3_s8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx4_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx216.i
+}
+
+define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vqtbx1_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
+  ret <8 x i8> %vtbx1.i
+}
+
+define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx2_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx2.i
+}
+
+define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx3_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx3.i
+}
+
+define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx4_s8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx4.i
+}
+
+define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vqtbx1q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %vtbx1.i
+}
+
+define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx2q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx2.i
+}
+
+define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx3q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx3.i
+}
+
+define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx4q_s8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx4.i
+}
+
+define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtbl1_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl11.i
+}
+
+define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vqtbl1_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %vtbl1.i
+}
+
+define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl2_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
+  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl17.i
+}
+
+define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl2_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl2.i
+}
+
+define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl3_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl212.i
+}
+
+define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl3_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl3.i
+}
+
+define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl4_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl216.i
+}
+
+define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl4_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl4.i
+}
+
+define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vqtbl1q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %vtbl1.i
+}
+
+define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl2q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl2.i
+}
+
+define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl3q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl3.i
+}
+
+define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl4q_u8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl4.i
+}
+
+define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vtbx1_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx2_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx17.i
+}
+
+define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx3_u8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx4_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx216.i
+}
+
+define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vqtbx1_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
+  ret <8 x i8> %vtbx1.i
+}
+
+define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx2_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx2.i
+}
+
+define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx3_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx3.i
+}
+
+define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx4_u8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx4.i
+}
+
+define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vqtbx1q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %vtbx1.i
+}
+
+define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx2q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx2.i
+}
+
+define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx3q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx3.i
+}
+
+define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx4q_u8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx4.i
+}
+
+define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vtbl1_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl11.i
+}
+
+define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vqtbl1_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
+  ret <8 x i8> %vtbl1.i
+}
+
+define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl2_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
+  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl17.i
+}
+
+define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl2_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl2.i
+}
+
+define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl3_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl212.i
+}
+
+define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl3_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl3.i
+}
+
+define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vtbl4_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
+  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl216.i
+}
+
+define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
+; CHECK: test_vqtbl4_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
+  ret <8 x i8> %vtbl4.i
+}
+
+define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vqtbl1q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %vtbl1.i
+}
+
+define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl2q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
+  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl2.i
+}
+
+define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl3q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
+  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl3.i
+}
+
+define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
+; CHECK: test_vqtbl4q_p8:
+; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
+  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
+  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
+  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
+  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
+  ret <16 x i8> %vtbl4.i
+}
+
+define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vtbx1_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx2_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
+  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx17.i
+}
+
+define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx3_p8:
+; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
+  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
+  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+  %1 = sext <8 x i1> %0 to <8 x i8>
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vtbx4_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
+  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx216.i
+}
+
+define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
+; CHECK: test_vqtbx1_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
+  ret <8 x i8> %vtbx1.i
+}
+
+define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx2_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx2.i
+}
+
+define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx3_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx3.i
+}
+
+define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
+; CHECK: test_vqtbx4_p8:
+; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
+  ret <8 x i8> %vtbx4.i
+}
+
+define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK: test_vqtbx1q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %vtbx1.i
+}
+
+define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx2q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
+  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx2.i
+}
+
+define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx3q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
+  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx3.i
+}
+
+define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
+; CHECK: test_vqtbx4q_p8:
+; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
+entry:
+  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
+  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
+  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
+  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
+  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
+  ret <16 x i8> %vtbx4.i
+}
+
diff --git a/test/CodeGen/AArch64/neon-simd-vget.ll b/test/CodeGen/AArch64/neon-simd-vget.ll
new file mode 100644
index 000000000000..6474499e4ff1
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-simd-vget.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_s8:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_s16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_s32:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_s64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_u8:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_u16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_u32:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_u64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_p64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_f16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_high_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_high_f32:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_p8:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_p16:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_high_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_high_f64:
+; CHECK: dup d0, {{v[0-9]+}}.d[1]
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
+  ret <1 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_s8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_s16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_s32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_s64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_u8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_u16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_u32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_u64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_p64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_f16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_low_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_low_f32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_p8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_p16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_low_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_low_f64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer
+  ret <1 x double> %shuffle.i
+}
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
index 77bf691cbcbd..6ec4b19a1204 100644
--- a/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -57,4 +57,4 @@ declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
 
 declare i8* @__cxa_begin_catch(i8*)
 
-declare void @__cxa_end_catch()
-\ No newline at end of file
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll
index 28dc9a7e2515..9655f90d826d 100644
--- a/test/CodeGen/AArch64/regress-bitcast-formals.ll
+++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll
@@ -4,7 +4,7 @@
 ; actually capable of that (the test was omitted from LowerFormalArguments).
 
 define void @test_bitcast_lower(<2 x i32> %a) {
-; CHECK: test_bitcast_lower:
+; CHECK-LABEL: test_bitcast_lower:
 
   ret void
 ; CHECK: ret
diff --git a/test/CodeGen/AArch64/regress-fp128-livein.ll b/test/CodeGen/AArch64/regress-fp128-livein.ll
new file mode 100644
index 000000000000..cb8432a7e4e4
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-fp128-livein.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s
+
+; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB,
+; causing a crash during live range calc.
+define void @fp128_livein(i64 %a) {
+  %tobool = icmp ne i64 %a, 0
+  %conv = zext i1 %tobool to i32
+  %conv2 = sitofp i32 %conv to fp128
+  %conv6 = sitofp i32 %conv to double
+  %call3 = tail call i32 @g(fp128 %conv2)
+  %call8 = tail call i32 @h(double %conv6)
+  ret void
+}
+
+declare i32 @f()
+declare i32 @g(fp128)
+declare i32 @h(double)
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
index 8d5485cae4c8..053249c6855f 100644
--- a/test/CodeGen/AArch64/regress-tail-livereg.ll
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -4,7 +4,7 @@
 declare void @bar()
 
 define void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
        %func = load void()** @var
 
        ; Calling a function encourages @foo to use a callee-saved register,
@@ -16,4 +16,4 @@ define void @foo() {
        tail call void %func()
 ; CHECK: br {{x([0-79]|1[0-8])}}
        ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
index e54552fd8edf..ff77fb4e48f7 100644
--- a/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -12,7 +12,7 @@
 declare void @bar(i8*)
 
 define i64 @test_chains() {
-; CHECK: test_chains:
+; CHECK-LABEL: test_chains:
 
   %locvar = alloca i8
 
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
index 980e2ffef901..0ef981819ec3 100644
--- a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -4,8 +4,23 @@
 declare void @bar()
 
 define void @test_w29_reserved() {
-; CHECK: test_w29_reserved:
+; CHECK-LABEL: test_w29_reserved:
+; CHECK: .cfi_startproc
+; CHECK: .cfi_def_cfa sp, 96
 ; CHECK: add x29, sp, #{{[0-9]+}}
+; CHECK: .cfi_def_cfa x29, 16
+; CHECK: .cfi_offset x30, -8
+; CHECK: .cfi_offset x29, -16
+; CHECK: .cfi_offset x28, -24
+; CHECK: .cfi_offset x27, -32
+; CHECK: .cfi_offset x26, -40
+; CHECK: .cfi_offset x25, -48
+; CHECK: .cfi_offset x24, -56
+; CHECK: .cfi_offset x23, -64
+; CHECK: .cfi_offset x22, -72
+; CHECK: .cfi_offset x21, -80
+; CHECK: .cfi_offset x20, -88
+; CHECK: .cfi_offset x19, -96
 
   %val1 = load volatile i32* @var
   %val2 = load volatile i32* @var
diff --git a/test/CodeGen/AArch64/returnaddr.ll b/test/CodeGen/AArch64/returnaddr.ll
new file mode 100644
index 000000000000..c85f9ec4ffd5
--- /dev/null
+++ b/test/CodeGen/AArch64/returnaddr.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  | FileCheck %s
+
+define i8* @rt0(i32 %x) nounwind readnone {
+entry:
+; CHECK-LABEL: rt0:
+; CHECK: mov x0, x30
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @rt2() nounwind readnone {
+entry:
+; CHECK-LABEL: rt2:
+; CHECK: ldr x[[reg:[0-9]+]], [x29]
+; CHECK: ldr x[[reg]], [x[[reg]]]
+; CHECK: ldr x0, [x[[reg]], #8]
+  %0 = tail call i8* @llvm.returnaddress(i32 2)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll
index d2eb77ab1b54..bd79685d34b4 100644
--- a/test/CodeGen/AArch64/setcc-takes-i32.ll
+++ b/test/CodeGen/AArch64/setcc-takes-i32.ll
@@ -12,11 +12,11 @@
 declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
 
 define i64 @test_select(i64 %lhs, i64 %rhs) {
-; CHECK: test_select:
+; CHECK-LABEL: test_select:
 
   %res = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %lhs, i64 %rhs)
   %flag = extractvalue {i64, i1} %res, 1
   %retval = select i1 %flag, i64 %lhs, i64 %rhs
   ret i64 %retval
 ; CHECK: ret
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
index a1ec618b03ba..20f1062a44dc 100644
--- a/test/CodeGen/AArch64/sibling-call.ll
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -5,7 +5,7 @@ declare void @callee_stack8([8 x i32], i64)
 declare void @callee_stack16([8 x i32], i64, i64)
 
 define void @caller_to0_from0() nounwind {
-; CHECK: caller_to0_from0:
+; CHECK-LABEL: caller_to0_from0:
 ; CHECK-NEXT: // BB
   tail call void @callee_stack0()
   ret void
@@ -13,7 +13,7 @@ define void @caller_to0_from0() nounwind {
 }
 
 define void @caller_to0_from8([8 x i32], i64) nounwind{
-; CHECK: caller_to0_from8:
+; CHECK-LABEL: caller_to0_from8:
 ; CHECK-NEXT: // BB
 
   tail call void @callee_stack0()
@@ -22,7 +22,7 @@ define void @caller_to0_from8([8 x i32], i64) nounwind{
 }
 
 define void @caller_to8_from0() {
-; CHECK: caller_to8_from0:
+; CHECK-LABEL: caller_to8_from0:
 
 ; Caller isn't going to clean up any extra stack we allocate, so it
 ; can't be a tail call.
@@ -32,7 +32,7 @@ define void @caller_to8_from0() {
 }
 
 define void @caller_to8_from8([8 x i32], i64 %a) {
-; CHECK: caller_to8_from8:
+; CHECK-LABEL: caller_to8_from8:
 ; CHECK-NOT: sub sp, sp,
 
 ; This should reuse our stack area for the 42
@@ -43,7 +43,7 @@ define void @caller_to8_from8([8 x i32], i64 %a) {
 }
 
 define void @caller_to16_from8([8 x i32], i64 %a) {
-; CHECK: caller_to16_from8:
+; CHECK-LABEL: caller_to16_from8:
 
 ; Shouldn't be a tail call: we can't use SP+8 because our caller might
 ; have something there. This may sound obvious but implementation does
@@ -54,7 +54,7 @@ define void @caller_to16_from8([8 x i32], i64 %a) {
 }
 
 define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
-; CHECK: caller_to8_from24:
+; CHECK-LABEL: caller_to8_from24:
 ; CHECK-NOT: sub sp, sp
 
 ; Reuse our area, putting "42" at incoming sp
@@ -65,7 +65,7 @@ define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
 }
 
 define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
-; CHECK: caller_to16_from16:
+; CHECK-LABEL: caller_to16_from16:
 ; CHECK-NOT: sub sp, sp,
 
 ; Here we want to make sure that both loads happen before the stores:
@@ -85,13 +85,13 @@ define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
 @func = global void(i32)* null
 
 define void @indirect_tail() {
-; CHECK: indirect_tail:
+; CHECK-LABEL: indirect_tail:
 ; CHECK-NOT: sub sp, sp
 
   %fptr = load void(i32)** @func
   tail call void %fptr(i32 42)
   ret void
-; CHECK: movz w0, #42
 ; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func]
+; CHECK: movz w0, #42
 ; CHECK: br [[FPTR]]
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll
index c7a392b78c24..4cd44494d545 100644
--- a/test/CodeGen/AArch64/sincos-expansion.ll
+++ b/test/CodeGen/AArch64/sincos-expansion.ll
@@ -3,8 +3,8 @@
 define float @test_sincos_f32(float %f) {
   %sin = call float @sinf(float %f) readnone
   %cos = call float @cosf(float %f) readnone
-; CHECK: bl cosf
 ; CHECK: bl sinf
+; CHECK: bl cosf
   %val = fadd float %sin, %cos
   ret float %val
 }
@@ -13,8 +13,8 @@ define double @test_sincos_f64(double %f) {
   %sin = call double @sin(double %f) readnone
   %cos = call double @cos(double %f) readnone
   %val = fadd double %sin, %cos
-; CHECK: bl cos
 ; CHECK: bl sin
+; CHECK: bl cos
   ret double %val
 }
 
@@ -22,8 +22,8 @@ define fp128 @test_sincos_f128(fp128 %f) {
   %sin = call fp128 @sinl(fp128 %f) readnone
   %cos = call fp128 @cosl(fp128 %f) readnone
   %val = fadd fp128 %sin, %cos
-; CHECK: bl cosl
 ; CHECK: bl sinl
+; CHECK: bl cosl
   ret fp128 %val
 }
 
@@ -32,4 +32,4 @@ declare double @sin(double) readonly
 declare fp128 @sinl(fp128) readonly
 declare float @cosf(float) readonly
 declare double @cos(double) readonly
-declare fp128 @cosl(fp128) readonly
-\ No newline at end of file
+declare fp128 @cosl(fp128) readonly
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
index f323b151ad1e..81885f108512 100644
--- a/test/CodeGen/AArch64/tail-call.ll
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -5,7 +5,7 @@ declare fastcc void @callee_stack8([8 x i32], i64)
 declare fastcc void @callee_stack16([8 x i32], i64, i64)
 
 define fastcc void @caller_to0_from0() nounwind {
-; CHECK: caller_to0_from0:
+; CHECK-LABEL: caller_to0_from0:
 ; CHECK-NEXT: // BB
   tail call fastcc void @callee_stack0()
   ret void
@@ -13,7 +13,7 @@ define fastcc void @caller_to0_from0() nounwind {
 }
 
 define fastcc void @caller_to0_from8([8 x i32], i64) {
-; CHECK: caller_to0_from8:
+; CHECK-LABEL: caller_to0_from8:
 
   tail call fastcc void @callee_stack0()
   ret void
@@ -22,7 +22,7 @@ define fastcc void @caller_to0_from8([8 x i32], i64) {
 }
 
 define fastcc void @caller_to8_from0() {
-; CHECK: caller_to8_from0:
+; CHECK-LABEL: caller_to8_from0:
 ; CHECK: sub sp, sp, #32
 
 ; Key point is that the "42" should go #16 below incoming stack
@@ -35,7 +35,7 @@ define fastcc void @caller_to8_from0() {
 }
 
 define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
-; CHECK: caller_to8_from8:
+; CHECK-LABEL: caller_to8_from8:
 ; CHECK: sub sp, sp, #16
 
 ; Key point is that the "%a" should go where at SP on entry.
@@ -47,7 +47,7 @@ define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
 }
 
 define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
-; CHECK: caller_to16_from8:
+; CHECK-LABEL: caller_to16_from8:
 ; CHECK: sub sp, sp, #16
 
 ; Important point is that the call reuses the "dead" argument space
@@ -63,7 +63,7 @@ define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
 
 
 define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
-; CHECK: caller_to8_from24:
+; CHECK-LABEL: caller_to8_from24:
 ; CHECK: sub sp, sp, #16
 
 ; Key point is that the "%a" should go where at #16 above SP on entry.
@@ -76,7 +76,7 @@ define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
 
 
 define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
-; CHECK: caller_to16_from16:
+; CHECK-LABEL: caller_to16_from16:
 ; CHECK: sub sp, sp, #16
 
 ; Here we want to make sure that both loads happen before the stores:
diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll
index bad2298c8a65..b5d7d8938444 100644
--- a/test/CodeGen/AArch64/tls-dynamic-together.ll
+++ b/test/CodeGen/AArch64/tls-dynamic-together.ll
@@ -8,7 +8,7 @@
 @general_dynamic_var = external thread_local global i32
 
 define i32 @test_generaldynamic() {
-; CHECK: test_generaldynamic:
+; CHECK-LABEL: test_generaldynamic:
 
   %val = load i32* @general_dynamic_var
   ret i32 %val
diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll
index cdfd11783c23..68c481ce98b6 100644
--- a/test/CodeGen/AArch64/tls-dynamics.ll
+++ b/test/CodeGen/AArch64/tls-dynamics.ll
@@ -4,14 +4,14 @@
 @general_dynamic_var = external thread_local global i32
 
 define i32 @test_generaldynamic() {
-; CHECK: test_generaldynamic:
+; CHECK-LABEL: test_generaldynamic:
 
   %val = load i32* @general_dynamic_var
   ret i32 %val
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
 ; CHECK: .tlsdesccall general_dynamic_var
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -19,20 +19,20 @@ define i32 @test_generaldynamic() {
 ; CHECK: ldr w0, [x[[TP]], x0]
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
 
 define i32* @test_generaldynamic_addr() {
-; CHECK: test_generaldynamic_addr:
+; CHECK-LABEL: test_generaldynamic_addr:
 
   ret i32* @general_dynamic_var
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
 ; CHECK: .tlsdesccall general_dynamic_var
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -40,8 +40,8 @@ define i32* @test_generaldynamic_addr() {
 ; CHECK: add x0, [[TP]], x0
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -49,14 +49,14 @@ define i32* @test_generaldynamic_addr() {
 @local_dynamic_var = external thread_local(localdynamic) global i32
 
 define i32 @test_localdynamic() {
-; CHECK: test_localdynamic:
+; CHECK-LABEL: test_localdynamic:
 
   %val = load i32* @local_dynamic_var
   ret i32 %val
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
 ; CHECK: .tlsdesccall _TLS_MODULE_BASE_
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -66,20 +66,20 @@ define i32 @test_localdynamic() {
 ; CHECK: ldr w0, [x0, [[DTP_OFFSET]]]
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
 
 define i32* @test_localdynamic_addr() {
-; CHECK: test_localdynamic_addr:
+; CHECK-LABEL: test_localdynamic_addr:
 
   ret i32* @local_dynamic_var
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
 ; CHECK: .tlsdesccall _TLS_MODULE_BASE_
 ; CHECK-NEXT: blr [[CALLEE]]
 
@@ -89,8 +89,8 @@ define i32* @test_localdynamic_addr() {
 ; CHECK: add x0, x0, [[DTP_OFFSET]]
 
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
 
 }
@@ -101,7 +101,7 @@ define i32* @test_localdynamic_addr() {
 @local_dynamic_var2 = external thread_local(localdynamic) global i32
 
 define i32 @test_localdynamic_deduplicate() {
-; CHECK: test_localdynamic_deduplicate:
+; CHECK-LABEL: test_localdynamic_deduplicate:
 
   %val = load i32* @local_dynamic_var
   %val2 = load i32* @local_dynamic_var2
@@ -110,8 +110,8 @@ define i32 @test_localdynamic_deduplicate() {
   ret i32 %sum
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
 ; CHECK: .tlsdesccall _TLS_MODULE_BASE_
 ; CHECK-NEXT: blr [[CALLEE]]
 
diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll
index a66588422793..39ceb9a4795c 100644
--- a/test/CodeGen/AArch64/tls-execs.ll
+++ b/test/CodeGen/AArch64/tls-execs.ll
@@ -1,10 +1,10 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
 
 @initial_exec_var = external thread_local(initialexec) global i32
 
 define i32 @test_initial_exec() {
-; CHECK: test_initial_exec:
+; CHECK-LABEL: test_initial_exec:
   %val = load i32* @initial_exec_var
 
 ; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
@@ -19,7 +19,7 @@ define i32 @test_initial_exec() {
 }
 
 define i32* @test_initial_exec_addr() {
-; CHECK: test_initial_exec_addr:
+; CHECK-LABEL: test_initial_exec_addr:
   ret i32* @initial_exec_var
 
 ; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
@@ -35,10 +35,10 @@ define i32* @test_initial_exec_addr() {
 @local_exec_var = thread_local(initialexec) global i32 0
 
 define i32 @test_local_exec() {
-; CHECK: test_local_exec:
+; CHECK-LABEL: test_local_exec:
   %val = load i32* @local_exec_var
 
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [A,A,0xa0'A',0x92'A']
 ; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
 ; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
 ; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
@@ -50,7 +50,7 @@ define i32 @test_local_exec() {
 }
 
 define i32* @test_local_exec_addr() {
-; CHECK: test_local_exec_addr:
+; CHECK-LABEL: test_local_exec_addr:
   ret i32* @local_exec_var
 
 ; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
index 65c1fda49e2d..154bc08c144c 100644
--- a/test/CodeGen/AArch64/tst-br.ll
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -7,7 +7,7 @@
 @var64 = global i64 0
 
 define i32 @test_tbz() {
-; CHECK: test_tbz:
+; CHECK-LABEL: test_tbz:
 
   %val = load i32* @var32
   %val64 = load i64* @var64
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
index c5d319eb112b..f3d376beeb28 100644
--- a/test/CodeGen/AArch64/variadic.ll
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck --check-prefix=CHECK-NOFP %s
 
 %va_list = type {i8*, i8*, i8*, i32, i32}
 
@@ -7,21 +8,30 @@
 declare void @llvm.va_start(i8*)
 
 define void @test_simple(i32 %n, ...) {
-; CHECK: test_simple:
+; CHECK-LABEL: test_simple:
 ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 ; CHECK: mov x[[FPRBASE:[0-9]+]], sp
 ; CHECK: str q7, [x[[FPRBASE]], #112]
 ; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
 ; CHECK: str x7, [x[[GPRBASE]], #48]
 
+; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK-NOFP: str x7, [x[[GPRBASE]], #48]
+; CHECK-NOFP-NOT: str q7,
+; CHECK-NOFP: str x1, [sp, #[[GPRFROMSP]]]
+
 ; Omit the middle ones
 
 ; CHECK: str q0, [sp]
 ; CHECK: str x1, [sp, #[[GPRFROMSP]]]
 
+; CHECK-NOFP-NOT: str q0, [sp]
+
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 ; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
 ; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
 ; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
@@ -33,22 +43,38 @@ define void @test_simple(i32 %n, ...) {
 ; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
+; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #55
+; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
+; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
   ret void
 }
 
 define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
-; CHECK: test_fewargs:
+; CHECK-LABEL: test_fewargs:
 ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
 ; CHECK: mov x[[FPRBASE:[0-9]+]], sp
 ; CHECK: str q7, [x[[FPRBASE]], #96]
 ; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
 ; CHECK: str x7, [x[[GPRBASE]], #32]
 
+; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK-NOFP-NOT: str q7,
+; CHECK-NOFP: mov x[[GPRBASE:[0-9]+]], sp
+; CHECK-NOFP: str x7, [x[[GPRBASE]], #24]
+
 ; Omit the middle ones
 
 ; CHECK: str q1, [sp]
 ; CHECK: str x3, [sp, #[[GPRFROMSP]]]
 
+; CHECK-NOFP-NOT: str q1, [sp]
+; CHECK-NOFP: str x4, [sp]
+
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
 ; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
@@ -63,11 +89,20 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
+; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #31
+; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #32
+; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
   ret void
 }
 
 define void @test_nospare([8 x i64], [8 x float], ...) {
-; CHECK: test_nospare:
+; CHECK-LABEL: test_nospare:
 
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
@@ -75,18 +110,25 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
 ; CHECK: mov [[STACK:x[0-9]+]], sp
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP-NOT: sub sp, sp
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #64
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
   ret void
 }
 
 ; If there are non-variadic arguments on the stack (here two i64s) then the
 ; __stack field should point just past them.
 define void @test_offsetstack([10 x i64], [3 x float], ...) {
-; CHECK: test_offsetstack:
+; CHECK-LABEL: test_offsetstack:
 ; CHECK: sub sp, sp, #80
 ; CHECK: mov x[[FPRBASE:[0-9]+]], sp
 ; CHECK: str q7, [x[[FPRBASE]], #64]
 
 ; CHECK-NOT: str x{{[0-9]+}},
+
+; CHECK-NOFP-NOT: str q7,
+; CHECK-NOT: str x7,
+
 ; Omit the middle ones
 
 ; CHECK: str q3, [sp]
@@ -102,20 +144,27 @@ define void @test_offsetstack([10 x i64], [3 x float], ...) {
 ; CHECK: add [[STACK:x[0-9]+]], sp, #96
 ; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
 
+; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #40
+; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
+; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24]
   ret void
 }
 
 declare void @llvm.va_end(i8*)
 
 define void @test_va_end() nounwind {
-; CHECK: test_va_end:
+; CHECK-LABEL: test_va_end:
 ; CHECK-NEXT: BB#0
+; CHECK-NOFP: BB#0
 
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_end(i8* %addr)
 
   ret void
 ; CHECK-NEXT: ret
+; CHECK-NOFP-NEXT: ret
 }
 
 declare void @llvm.va_copy(i8* %dest, i8* %src)
@@ -123,7 +172,7 @@ declare void @llvm.va_copy(i8* %dest, i8* %src)
 @second_list = global %va_list zeroinitializer
 
 define void @test_va_copy() {
-; CHECK: test_va_copy:
+; CHECK-LABEL: test_va_copy:
   %srcaddr = bitcast %va_list* @var to i8*
   %dstaddr = bitcast %va_list* @second_list to i8*
   call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
@@ -131,14 +180,25 @@ define void @test_va_copy() {
 ; Check beginning and end again:
 
 ; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+
 ; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
 
+; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
 ; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
-; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
 
-; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
 ; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
 
+; CHECK-NOFP: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
+
+; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+
+; CHECK-NOFP: str [[BLOCK]], [x[[DEST_LIST]], #24]
+
   ret void
 ; CHECK: ret
+; CHECK-NOFP: ret
 }
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
index fef0437ae7f3..9b1e52770ce4 100644
--- a/test/CodeGen/AArch64/zero-reg.ll
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -4,7 +4,7 @@
 @var64 = global i64 0
 
 define void @test_zr() {
-; CHECK: test_zr:
+; CHECK-LABEL: test_zr:
 
   store i32 0, i32* @var32
 ; CHECK: str wzr, [{{x[0-9]+}}, #:lo12:var32]
@@ -16,7 +16,7 @@ define void @test_zr() {
 }
 
 define void @test_sp(i32 %val) {
-; CHECK: test_sp:
+; CHECK-LABEL: test_sp:
 
 ; Important correctness point here is that LLVM doesn't try to use xzr
 ; as an addressing register: "str w0, [xzr]" is not a valid A64
@@ -28,4 +28,4 @@ define void @test_sp(i32 %val) {
 
   ret void
 ; CHECK: ret
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 0bfe33175196..e7c0129a7752 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -4,7 +4,7 @@
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @A = external global [4 x [4 x i32]]		; <[4 x [4 x i32]]*> [#uses=1]
 
-; CHECK: dct_luma_sp:
+; CHECK-LABEL: dct_luma_sp:
 define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
 entry:
 ; Make sure to use base-updating stores for saving callee-saved registers.
diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
index e1e60e6317a6..ee99c70ff0e6 100644
--- a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
+++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 ; pr4843
 define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
-;CHECK: v2regbug:
+;CHECK-LABEL: v2regbug:
 ;CHECK: vzip.16
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
index 0fe3b39a622d..e2ff164502ce 100644
--- a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -4,7 +4,7 @@
 %0 = type { double, double }
 
 define void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: bl __aeabi_dadd
 ; CHECK-NOT: strd
 ; CHECK: mov
diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll
index a2e7ff718b4a..570fcf96e641 100644
--- a/test/CodeGen/ARM/2009-10-16-Scope.ll
+++ b/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -23,10 +23,12 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare i32 @foo(i32) ssp
 
 !0 = metadata !{i32 5, i32 2, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !2, i32 1, i32 1}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 4, null, i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"genmodes.i", metadata !"/Users/yash/Downloads", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!1 = metadata !{i32 458763, null, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, metadata !8, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, null, metadata !9, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
 !4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
-!5 = metadata !{i32 458763, metadata !1, i32 1, i32 1}; [DW_TAG_lexical_block ]
-!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!5 = metadata !{i32 458763, null, metadata !1, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
+!6 = metadata !{i32 458788, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
 !7 = metadata !{i32 6, i32 1, metadata !2, null}
+!8 = metadata !{metadata !"genmodes.i", metadata !"/Users/yash/Downloads"}
+!9 = metadata !{i32 0}
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index a8afc20bc130..4fb2be02ce9a 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -12,7 +12,7 @@ entry:
   %3 = fmul float %0, %1                          ; <float> [#uses=1]
   %4 = fadd float 0.000000e+00, %3                ; <float> [#uses=1]
   %5 = fsub float 1.000000e+00, %4                ; <float> [#uses=1]
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: vmov.f32 s{{[0-9]+}}, #1.000000e+00
   %6 = fsub float 1.000000e+00, undef             ; <float> [#uses=2]
   %7 = fsub float %2, undef                       ; <float> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
index 05581c3f16cf..35739d76eae0 100644
--- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -12,15 +12,21 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", metadata !2, i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!15}
+!0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, metadata !12, null, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
+!12 = metadata !{metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc"}
+!3 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, metadata !14, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6, metadata !6}
-!6 = metadata !{i32 524310, metadata !2, metadata !"SItype", metadata !7, i32 152, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 524310, metadata !12, null, metadata !"SItype", i32 152, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
 !7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524324, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !9 = metadata !{i32 95, i32 0, metadata !10, null}
-!10 = metadata !{i32 524299, metadata !1, i32 94, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 524299, metadata !12, metadata !1, i32 94, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 100, i32 0, metadata !10, null}
+!13 = metadata !{i32 0}
+!14 = metadata !{metadata !1}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index 0ae7f84f3ef3..35995b77c5bc 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -6,10 +6,10 @@
 
 define zeroext i8 @t(%struct.foo* %this) noreturn optsize {
 entry:
-; ARM:       t:
+; ARM-LABEL:       t:
 ; ARM:       str r2, [r1], r0
 
-; THUMB:     t:
+; THUMB-LABEL:     t:
 ; THUMB-NOT: str r0, [r1], r0
 ; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
index cdb11c71fc0e..a53200e72c3f 100644
--- a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
+++ b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -48,19 +48,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.gv = !{!14}
 
 !0 = metadata !{i32 524545, metadata !1, metadata !"buf", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"x0", metadata !"x0", metadata !"x0", metadata !2, i32 5, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"t.c", metadata !"/private/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!1 = metadata !{i32 524334, metadata !26, null, metadata !"x0", metadata !"x0", metadata !"x0", i32 5, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !26} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 524305, i32 0, i32 12, metadata !"t.c", metadata !".", metadata !"clang 2.0", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 524309, metadata !26, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{null}
-!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524324, metadata !2, metadata !"unsigned char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524303, metadata !26, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524324, metadata !26, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 524545, metadata !1, metadata !"nbytes", metadata !2, i32 4, metadata !9} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 524324, metadata !2, metadata !"unsigned long", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524324, metadata !26, metadata !2, metadata !"unsigned long", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 524544, metadata !11, metadata !"nread", metadata !2, i32 6, metadata !9} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 524299, metadata !1, i32 5, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 524299, metadata !26, metadata !1, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 524544, metadata !11, metadata !"c", metadata !2, i32 7, metadata !13} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 524324, metadata !26, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 524340, i32 0, metadata !2, metadata !"length", metadata !"length", metadata !"length", metadata !2, i32 1, metadata !13, i1 false, i1 true, i32* @length} ; [ DW_TAG_variable ]
 !15 = metadata !{i32 4, i32 24, metadata !1, null}
 !16 = metadata !{i32 4, i32 43, metadata !1, null}
@@ -69,7 +69,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !19 = metadata !{i32 10, i32 2, metadata !11, null}
 !20 = metadata !{i32 11, i32 2, metadata !11, null}
 !21 = metadata !{i32 12, i32 3, metadata !22, null}
-!22 = metadata !{i32 524299, metadata !11, i32 11, i32 45} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 524299, metadata !26, metadata !11, i32 11, i32 45, i32 0} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 13, i32 3, metadata !22, null}
 !24 = metadata !{i32 14, i32 2, metadata !22, null}
 !25 = metadata !{i32 15, i32 1, metadata !11, null}
+!26 = metadata !{metadata !"t.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 112512ff59a5..7aacd1aa70ca 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -75,48 +75,49 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!49}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!0 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786451, metadata !48, null, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
 !2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, metadata !48, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !48, metadata !1, metadata !"Data", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786447, metadata !48, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786445, metadata !48, metadata !1, metadata !"Kind", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786468, metadata !48, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786447, metadata !48, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !48, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !48, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{i32 786478, metadata !48, metadata !2, metadata !"main", metadata !"main", metadata !"main", i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{metadata !13}
 !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
 !25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!26 = metadata !{i32 786448, metadata !48, metadata !2, metadata !"SVal", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
 !28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
 !31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 786470, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786470, metadata !48, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 786447, metadata !48, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !48, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !48, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
+!39 = metadata !{i32 786443, metadata !48, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !48, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
@@ -125,3 +126,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
 !47 = metadata !{i32 0}
 !48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
index bda14bcb1520..305369435138 100644
--- a/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
+++ b/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
@@ -1,12 +1,284 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
-; This tests that MC/asm header conversion is smooth
-;
-; CHECK:      .syntax unified
-; CHECK: .eabi_attribute 20, 1
-; CHECK: .eabi_attribute 21, 1
-; CHECK: .eabi_attribute 23, 3
-; CHECK: .eabi_attribute 24, 1
-; CHECK: .eabi_attribute 25, 1
+; This tests that MC/asm header conversion is smooth and that the
+; build attributes are correct
+
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8
+; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9-mp | FileCheck %s --check-prefix=CORTEX-A9-MP
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57
+
+; V6:   .eabi_attribute 6, 6
+; V6:   .eabi_attribute 8, 1
+; V6:   .eabi_attribute 24, 1
+; V6:   .eabi_attribute 25, 1
+; V6-NOT:   .eabi_attribute 27
+; V6-NOT:   .eabi_attribute 28
+; V6-NOT:    .eabi_attribute 36
+; V6-NOT:    .eabi_attribute 42
+; V6-NOT:    .eabi_attribute 68
+
+; V6M:  .eabi_attribute 6, 12
+; V6M:  .eabi_attribute 7, 77
+; V6M:  .eabi_attribute 8, 0
+; V6M:  .eabi_attribute 9, 1
+; V6M:  .eabi_attribute 24, 1
+; V6M:  .eabi_attribute 25, 1
+; V6M-NOT:  .eabi_attribute 27
+; V6M-NOT:  .eabi_attribute 28
+; V6M-NOT:  .eabi_attribute 36
+; V6M-NOT:  .eabi_attribute 42
+; V6M-NOT:  .eabi_attribute 68
+
+; ARM1156T2F-S: .cpu arm1156t2f-s
+; ARM1156T2F-S: .eabi_attribute 6, 8
+; ARM1156T2F-S: .eabi_attribute 8, 1
+; ARM1156T2F-S: .eabi_attribute 9, 2
+; ARM1156T2F-S: .fpu vfpv2
+; ARM1156T2F-S: .eabi_attribute 20, 1
+; ARM1156T2F-S: .eabi_attribute 21, 1
+; ARM1156T2F-S: .eabi_attribute 23, 3
+; ARM1156T2F-S: .eabi_attribute 24, 1
+; ARM1156T2F-S: .eabi_attribute 25, 1
+; ARM1156T2F-S-NOT: .eabi_attribute 27
+; ARM1156T2F-S-NOT: .eabi_attribute 28
+; ARM1156T2F-S-NOT: .eabi_attribute 36
+; ARM1156T2F-S-NOT:    .eabi_attribute 42
+; ARM1156T2F-S-NOT:    .eabi_attribute 68
+
+; V7M:  .eabi_attribute 6, 10
+; V7M:  .eabi_attribute 7, 77
+; V7M:  .eabi_attribute 8, 0
+; V7M:  .eabi_attribute 9, 2
+; V7M:  .eabi_attribute 24, 1
+; V7M:  .eabi_attribute 25, 1
+; V7M-NOT:  .eabi_attribute 27
+; V7M-NOT:  .eabi_attribute 28
+; V7M-NOT:  .eabi_attribute 36
+; V7M-NOT:  .eabi_attribute 42
+; V7M:  .eabi_attribute 44, 0
+; V7M-NOT:  .eabi_attribute 68
+
+; V7:      .syntax unified
+; V7: .eabi_attribute 6, 10
+; V7: .eabi_attribute 20, 1
+; V7: .eabi_attribute 21, 1
+; V7: .eabi_attribute 23, 3
+; V7: .eabi_attribute 24, 1
+; V7: .eabi_attribute 25, 1
+; V7-NOT: .eabi_attribute 27
+; V7-NOT: .eabi_attribute 28
+; V7-NOT: .eabi_attribute 36
+; V7-NOT:    .eabi_attribute 42
+; V7-NOT:    .eabi_attribute 68
+
+; V8:      .syntax unified
+; V8: .eabi_attribute 6, 14
+
+; Vt8:     .syntax unified
+; Vt8: .eabi_attribute 6, 14
+
+; V8-FPARMv8:      .syntax unified
+; V8-FPARMv8: .eabi_attribute 6, 14
+; V8-FPARMv8: .fpu fp-armv8
+
+; V8-NEON:      .syntax unified
+; V8-NEON: .eabi_attribute 6, 14
+; V8-NEON: .fpu neon
+; V8-NEON: .eabi_attribute 12, 3
+
+; V8-FPARMv8-NEON:      .syntax unified
+; V8-FPARMv8-NEON: .eabi_attribute 6, 14
+; V8-FPARMv8-NEON: .fpu neon-fp-armv8
+; V8-FPARMv8-NEON: .eabi_attribute 12, 3
+
+; V8-FPARMv8-NEON-CRYPTO:      .syntax unified
+; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 6, 14
+; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8
+; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3
+
+; CORTEX-A9-SOFT:  .cpu cortex-a9
+; CORTEX-A9-SOFT:  .eabi_attribute 6, 10
+; CORTEX-A9-SOFT:  .eabi_attribute 7, 65
+; CORTEX-A9-SOFT:  .eabi_attribute 8, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 9, 2
+; CORTEX-A9-SOFT:  .fpu neon
+; CORTEX-A9-SOFT:  .eabi_attribute 20, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 21, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 23, 3
+; CORTEX-A9-SOFT:  .eabi_attribute 24, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 25, 1
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 27
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 28
+; CORTEX-A9-SOFT:  .eabi_attribute 36, 1
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 42
+; CORTEX-A9-SOFT:  .eabi_attribute 68, 1
+
+; CORTEX-A9-HARD:  .cpu cortex-a9
+; CORTEX-A9-HARD:  .eabi_attribute 6, 10
+; CORTEX-A9-HARD:  .eabi_attribute 7, 65
+; CORTEX-A9-HARD:  .eabi_attribute 8, 1
+; CORTEX-A9-HARD:  .eabi_attribute 9, 2
+; CORTEX-A9-HARD:  .fpu neon
+; CORTEX-A9-HARD:  .eabi_attribute 20, 1
+; CORTEX-A9-HARD:  .eabi_attribute 21, 1
+; CORTEX-A9-HARD:  .eabi_attribute 23, 3
+; CORTEX-A9-HARD:  .eabi_attribute 24, 1
+; CORTEX-A9-HARD:  .eabi_attribute 25, 1
+; CORTEX-A9-HARD-NOT:  .eabi_attribute 27
+; CORTEX-A9-HARD:  .eabi_attribute 28, 1
+; CORTEX-A9-HARD:  .eabi_attribute 36, 1
+; CORTEX-A9-HARD-NOT:  .eabi_attribute 42
+; CORTEX-A9-HARD:  .eabi_attribute 68, 1
+
+; CORTEX-A9-MP:  .cpu cortex-a9-mp
+; CORTEX-A9-MP:  .eabi_attribute 6, 10
+; CORTEX-A9-MP:  .eabi_attribute 7, 65
+; CORTEX-A9-MP:  .eabi_attribute 8, 1
+; CORTEX-A9-MP:  .eabi_attribute 9, 2
+; CORTEX-A9-MP:  .fpu neon
+; CORTEX-A9-MP:  .eabi_attribute 20, 1
+; CORTEX-A9-MP:  .eabi_attribute 21, 1
+; CORTEX-A9-MP:  .eabi_attribute 23, 3
+; CORTEX-A9-MP:  .eabi_attribute 24, 1
+; CORTEX-A9-MP:  .eabi_attribute 25, 1
+; CORTEX-A9-NOT:  .eabi_attribute 27
+; CORTEX-A9-NOT:  .eabi_attribute 28
+; CORTEX-A9-MP:  .eabi_attribute 36, 1
+; CORTEX-A9-MP:  .eabi_attribute 42, 1
+; CORTEX-A9-MP:  .eabi_attribute 68, 1
+
+; CORTEX-A15: .cpu cortex-a15
+; CORTEX-A15: .eabi_attribute 6, 10
+; CORTEX-A15: .eabi_attribute 7, 65
+; CORTEX-A15: .eabi_attribute 8, 1
+; CORTEX-A15: .eabi_attribute 9, 2
+; CORTEX-A15: .fpu neon-vfpv4
+; CORTEX-A15: .eabi_attribute 20, 1
+; CORTEX-A15: .eabi_attribute 21, 1
+; CORTEX-A15: .eabi_attribute 23, 3
+; CORTEX-A15: .eabi_attribute 24, 1
+; CORTEX-A15: .eabi_attribute 25, 1
+; CORTEX-A15-NOT: .eabi_attribute 27
+; CORTEX-A15-NOT: .eabi_attribute 28
+; CORTEX-A15: .eabi_attribute 36, 1
+; CORTEX-A15: .eabi_attribute 42, 1
+; CORTEX-A15: .eabi_attribute 44, 2
+; CORTEX-A15: .eabi_attribute 68, 3
+
+; CORTEX-M0:  .cpu cortex-m0
+; CORTEX-M0:  .eabi_attribute 6, 12
+; CORTEX-M0:  .eabi_attribute 7, 77
+; CORTEX-M0:  .eabi_attribute 8, 0
+; CORTEX-M0:  .eabi_attribute 9, 1
+; CORTEX-M0:  .eabi_attribute 24, 1
+; CORTEX-M0:  .eabi_attribute 25, 1
+; CORTEX-M0-NOT:  .eabi_attribute 27
+; CORTEX-M0-NOT:  .eabi_attribute 28
+; CORTEX-M0-NOT:  .eabi_attribute 36
+; CORTEX-M0-NOT:  .eabi_attribute 42
+; CORTEX-M0-NOT:  .eabi_attribute 68
+
+; CORTEX-M4-SOFT:  .cpu cortex-m4
+; CORTEX-M4-SOFT:  .eabi_attribute 6, 13
+; CORTEX-M4-SOFT:  .eabi_attribute 7, 77
+; CORTEX-M4-SOFT:  .eabi_attribute 8, 0
+; CORTEX-M4-SOFT:  .eabi_attribute 9, 2
+; CORTEX-M4-SOFT:  .fpu vfpv4-d16
+; CORTEX-M4-SOFT:  .eabi_attribute 20, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 21, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 23, 3
+; CORTEX-M4-SOFT:  .eabi_attribute 24, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 25, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 27, 1
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 28
+; CORTEX-M4-SOFT:  .eabi_attribute 36, 1
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 42
+; CORTEX-M4-SOFT:  .eabi_attribute 44, 0
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 68
+
+; CORTEX-M4-HARD:  .cpu cortex-m4
+; CORTEX-M4-HARD:  .eabi_attribute 6, 13
+; CORTEX-M4-HARD:  .eabi_attribute 7, 77
+; CORTEX-M4-HARD:  .eabi_attribute 8, 0
+; CORTEX-M4-HARD:  .eabi_attribute 9, 2
+; CORTEX-M4-HARD:  .fpu vfpv4-d16
+; CORTEX-M4-HARD:  .eabi_attribute 20, 1
+; CORTEX-M4-HARD:  .eabi_attribute 21, 1
+; CORTEX-M4-HARD:  .eabi_attribute 23, 3
+; CORTEX-M4-HARD:  .eabi_attribute 24, 1
+; CORTEX-M4-HARD:  .eabi_attribute 25, 1
+; CORTEX-M4-HARD:  .eabi_attribute 27, 1
+; CORTEX-M4-HARD:  .eabi_attribute 28, 1
+; CORTEX-M4-HARD:  .eabi_attribute 36, 1
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 42
+; CORTEX-M4-HARD:  .eabi_attribute 44, 0
+; CORTEX-M4-HRAD-NOT:  .eabi_attribute 68
+
+; CORTEX-R5:  .cpu cortex-r5
+; CORTEX-R5:  .eabi_attribute 6, 10
+; CORTEX-R5:  .eabi_attribute 7, 82
+; CORTEX-R5:  .eabi_attribute 8, 1
+; CORTEX-R5:  .eabi_attribute 9, 2
+; CORTEX-R5:  .fpu vfpv3-d16
+; CORTEX-R5:  .eabi_attribute 20, 1
+; CORTEX-R5:  .eabi_attribute 21, 1
+; CORTEX-R5:  .eabi_attribute 23, 3
+; CORTEX-R5:  .eabi_attribute 24, 1
+; CORTEX-R5:  .eabi_attribute 25, 1
+; CORTEX-R5:  .eabi_attribute 27, 1
+; CORTEX-R5-NOT:  .eabi_attribute 28
+; CORTEX-R5-NOT:  .eabi_attribute 36
+; CORTEX-R5-NOT:  .eabi_attribute 42
+; CORTEX-R5:  .eabi_attribute 44, 2
+; CORTEX-R5-NOT:  .eabi_attribute 68
+
+; CORTEX-A53:  .cpu cortex-a53
+; CORTEX-A53:  .eabi_attribute 6, 14
+; CORTEX-A53:  .eabi_attribute 7, 65
+; CORTEX-A53:  .eabi_attribute 8, 1
+; CORTEX-A53:  .eabi_attribute 9, 2
+; CORTEX-A53:  .fpu crypto-neon-fp-armv8
+; CORTEX-A53:  .eabi_attribute 12, 3
+; CORTEX-A53:  .eabi_attribute 24, 1
+; CORTEX-A53:  .eabi_attribute 25, 1
+; CORTEX-A53-NOT:  .eabi_attribute 27
+; CORTEX-A53-NOT:  .eabi_attribute 28
+; CORTEX-A53:  .eabi_attribute 36, 1
+; CORTEX-A53:  .eabi_attribute 42, 1
+; CORTEX-A53:  .eabi_attribute 44, 2
+; CORTEX-A53:  .eabi_attribute 68, 3
+
+; CORTEX-A57:  .cpu cortex-a57
+; CORTEX-A57:  .eabi_attribute 6, 14
+; CORTEX-A57:  .eabi_attribute 7, 65
+; CORTEX-A57:  .eabi_attribute 8, 1
+; CORTEX-A57:  .eabi_attribute 9, 2
+; CORTEX-A57:  .fpu crypto-neon-fp-armv8
+; CORTEX-A57:  .eabi_attribute 12, 3
+; CORTEX-A57:  .eabi_attribute 24, 1
+; CORTEX-A57:  .eabi_attribute 25, 1
+; CORTEX-A57-NOT:  .eabi_attribute 27
+; CORTEX-A57-NOT:  .eabi_attribute 28
+; CORTEX-A57:  .eabi_attribute 36, 1
+; CORTEX-A57:  .eabi_attribute 42, 1
+; CORTEX-A57:  .eabi_attribute 44, 2
+; CORTEX-A57:  .eabi_attribute 68, 3
 
 define i32 @f(i64 %z) {
 	ret i32 0
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
deleted file mode 100644
index b253fefe87c4..000000000000
--- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
-; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=BASIC %s 
-; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
-; RUN:    -mattr=-neon,-vfp3,+vfp2 \
-; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
-; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=CORTEXA8 %s
-
-
-; This tests that the extpected ARM attributes are emitted.
-;
-; BASIC:        Section {
-; BASIC:          Name: .ARM.attributes
-; BASIC-NEXT:     Type: SHT_ARM_ATTRIBUTES
-; BASIC-NEXT:     Flags [ (0x0)
-; BASIC-NEXT:     ]
-; BASIC-NEXT:     Address: 0x0
-; BASIC-NEXT:     Offset: 0x3C
-; BASIC-NEXT:     Size: 34
-; BASIC-NEXT:     Link: 0
-; BASIC-NEXT:     Info: 0
-; BASIC-NEXT:     AddressAlignment: 1
-; BASIC-NEXT:     EntrySize: 0
-; BASIC-NEXT:     SectionData (
-; BASIC-NEXT:       0000: 41210000 00616561 62690001 17000000
-; BASIC-NEXT:       0010: 060A0741 08010902 14011501 17031801
-; BASIC-NEXT:       0020: 1901
-; BASIC-NEXT:     )
-
-; CORTEXA8:        Name: .ARM.attributes
-; CORTEXA8-NEXT:     Type: SHT_ARM_ATTRIBUTES
-; CORTEXA8-NEXT:     Flags [ (0x0)
-; CORTEXA8-NEXT:     ]
-; CORTEXA8-NEXT:     Address: 0x0
-; CORTEXA8-NEXT:     Offset: 0x3C
-; CORTEXA8-NEXT:     Size: 47
-; CORTEXA8-NEXT:     Link: 0
-; CORTEXA8-NEXT:     Info: 0
-; CORTEXA8-NEXT:     AddressAlignment: 1
-; CORTEXA8-NEXT:     EntrySize: 0
-; CORTEXA8-NEXT:     SectionData (
-; CORTEXA8-NEXT:       0000: 412E0000 00616561 62690001 24000000
-; CORTEXA8-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
-; CORTEXA8-NEXT:       0020: 0109020A 02140115 01170318 011901
-; CORTEXA8-NEXT:     )
-
-define i32 @f(i64 %z) {
-       ret i32 0
-}
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index da4d15771f48..4179d8c99d6a 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -4,7 +4,7 @@
 
 define i32* @t(i32* %x) nounwind {
 entry:
-; ARM: t:
+; ARM-LABEL: t:
 ; ARM: push
 ; ARM: mov r7, sp
 ; ARM: bl _foo
@@ -12,7 +12,7 @@ entry:
 ; ARM: bl _foo
 ; ARM: pop {r7, pc}
 
-; THUMB2: t:
+; THUMB2-LABEL: t:
 ; THUMB2: push
 ; THUMB2: mov r7, sp
 ; THUMB2: blx _foo
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
deleted file mode 100644
index 9eecd045bfa0..000000000000
--- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    llvm-readobj -s -sr -sd | FileCheck  -check-prefix=OBJ %s
-
-target triple = "armv7-none-linux-gnueabi"
-
-@a = external global i8
-
-define arm_aapcs_vfpcc i32 @barf() nounwind {
-entry:
-  %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
-  ret i32 %0
-; OBJ:        Section {
-; OBJ:          Name: .text
-; OBJ:          Relocations [
-; OBJ-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
-; OBJ-NEXT:       0x8 R_ARM_MOVT_ABS
-; OBJ-NEXT:       0xC R_ARM_CALL foo
-; OBJ-NEXT:     ]
-; OBJ-NEXT:     SectionData (
-; OBJ-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
-; OBJ-NEXT:       0010: 0088BDE8
-; OBJ-NEXT:     )
-
-}
-
-declare arm_aapcs_vfpcc i32 @foo(i8*)
-
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index 4879f4e10bac..eef6abd96451 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -3,7 +3,7 @@
 
 define hidden void @foo() nounwind ssp {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: mov r7, sp
 ; CHECK-NEXT: vpush {d8}
 ; CHECK-NEXT: vpush {d10, d11}
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 98c0af35ef9a..f57411bb2c56 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -76,20 +76,21 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!49}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41,  metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !47, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786478, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786468, metadata !47, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 786443, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786443, metadata !47, metadata !0, i32 4, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1, null} ; [ DW_TAG_variable ]
 !14 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2, null} ; [ DW_TAG_variable ]
 !15 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3, null} ; [ DW_TAG_variable ]
@@ -97,16 +98,16 @@ entry:
 !17 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5, null} ; [ DW_TAG_variable ]
 !18 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786688, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 786443, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 786443, metadata !47, metadata !6, i32 7, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !21 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 786688, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 786443, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !47, metadata !7, i32 10, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !25 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 786443, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786443, metadata !47, metadata !8, i32 13, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786443, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 786443, metadata !47, metadata !9, i32 16, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 4, i32 0, metadata !0, null}
 !31 = metadata !{i32 4, i32 0, metadata !12, null}
 !32 = metadata !{i32 7, i32 0, metadata !6, null}
@@ -125,3 +126,5 @@ entry:
 !45 = metadata !{metadata !24, metadata !25}
 !46 = metadata !{metadata !27, metadata !28}
 !47 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
+!48 = metadata !{i32 0}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index e84ce0e2394d..bc72e126b407 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -9,7 +9,7 @@
 @oStruct = external global %struct.Outer, align 4
 
 define void @main() nounwind {
-; CHECK: main:
+; CHECK-LABEL: main:
 ; CHECK-NOT: ldrd
 ; CHECK: mul
 for.body.lr.ph:
@@ -21,8 +21,8 @@ for.body:                                         ; preds = %_Z14printIsNotZeroi
   %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
   %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
   %inc = add i32 %i.022, 1
-  %tmp8 = load i32* %x, align 4, !tbaa !0
-  %tmp11 = load i32* %y, align 4, !tbaa !0
+  %tmp8 = load i32* %x, align 4
+  %tmp11 = load i32* %y, align 4
   %mul = mul nsw i32 %tmp11, %tmp8
   %tobool.i14 = icmp eq i32 %mul, 0
   br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
@@ -35,15 +35,10 @@ _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi
 
 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
   %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
-  %tmp3.pre = load i8* %b.phi.trans.insert, align 1, !tbaa !3
+  %tmp3.pre = load i8* %b.phi.trans.insert, align 1
   %phitmp27 = icmp eq i8 undef, 0
   br label %for.body
 
 for.end:                                          ; preds = %_Z14printIsNotZeroi.exit17
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"bool", metadata !1}
diff --git a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
index 0fe88bd0ed7e..caa0be56578c 100644
--- a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
+++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
@@ -8,7 +8,7 @@
 ; rdar://9172742
 
 define i32 @t() nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 entry:
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2011-04-07-schediv.ll b/test/CodeGen/ARM/2011-04-07-schediv.ll
index 19f756f51364..f3dd3dd5811e 100644
--- a/test/CodeGen/ARM/2011-04-07-schediv.ll
+++ b/test/CodeGen/ARM/2011-04-07-schediv.ll
@@ -12,7 +12,7 @@ entry:
 
 ; Make sure the scheduler schedules all uses of the preincrement
 ; induction variable before defining the postincrement value.
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: %bb
 ; CHECK-NOT: mov
 bb:                                               ; preds = %entry, %bb
diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
index 568718c91127..e30c9c615053 100644
--- a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
+++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -4,7 +4,7 @@
 ; rdar://9266679
 
 define zeroext i1 @t(i32* nocapture %A, i32 %size, i32 %value) nounwind readonly ssp {
-; CHECK: t:
+; CHECK-LABEL: t:
 entry:
   br label %for.cond
 
@@ -15,15 +15,14 @@ for.cond:
 
 for.body:
 ; CHECK: %for.
-; CHECK: movs r{{[0-9]+}}, #{{[01]}}
+; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
+; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
+; CHECK-NOT: mov r{{[0-9]+}}, #{{[01]}}
   %arrayidx = getelementptr i32* %A, i32 %0
   %tmp4 = load i32* %arrayidx, align 4
   %cmp6 = icmp eq i32 %tmp4, %value
   br i1 %cmp6, label %return, label %for.inc
 
-; CHECK: %for.
-; CHECK: movs r{{[0-9]+}}, #{{[01]}}
-
 for.inc:
   %inc = add i32 %0, 1
   br label %for.cond
diff --git a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
index ed7dd0332046..057c19948c35 100644
--- a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
+++ b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
@@ -15,7 +15,7 @@
 
 define i32 @test() nounwind optsize ssp {
 entry:
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: push
 ; CHECK-NOT: push
   %block_size = alloca i32, align 4
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 7a7ca8e0d8d9..bb7870764c50 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -73,29 +73,30 @@ define i32 @get5(i32 %a) nounwind optsize ssp {
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!49}
 
-!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41,  metadata !41, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !41, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get1", metadata !"get1", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [get1]
 !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786478, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get2", metadata !"get2", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [get2]
+!7 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get3", metadata !"get3", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [get3]
+!8 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get4", metadata !"get4", metadata !"", i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [get4]
+!9 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get5", metadata !"get5", metadata !"", i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ] [line 17] [def] [get5]
 !10 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 786443, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786443, metadata !47, metadata !1, i32 5, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777224, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !14 = metadata !{i32 786688, metadata !15, metadata !"b", metadata !2, i32 8, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !6, i32 8, i32 17, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 786443, metadata !47, metadata !6, i32 8, i32 17, i32 1} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !2, i32 16777227, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786688, metadata !18, metadata !"b", metadata !2, i32 11, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786443, metadata !7, i32 11, i32 19, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !47, metadata !7, i32 11, i32 19, i32 2} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !2, i32 16777230, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !20 = metadata !{i32 786688, metadata !21, metadata !"b", metadata !2, i32 14, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!21 = metadata !{i32 786443, metadata !8, i32 14, i32 19, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 786443, metadata !47, metadata !8, i32 14, i32 19, i32 3} ; [ DW_TAG_lexical_block ]
 !22 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x5", metadata !"x5", metadata !"", metadata !2, i32 16, metadata !5, i32 0, i32 1, i32* @x5, null} ; [ DW_TAG_variable ]
 !23 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x4", metadata !"x4", metadata !"", metadata !2, i32 13, metadata !5, i32 1, i32 1, i32* @x4, null} ; [ DW_TAG_variable ]
 !24 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x3", metadata !"x3", metadata !"", metadata !2, i32 10, metadata !5, i32 1, i32 1, i32* @x3, null} ; [ DW_TAG_variable ]
@@ -103,7 +104,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !26 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x1", metadata !"x1", metadata !"", metadata !2, i32 4, metadata !5, i32 1, i32 1, i32* @x1, null} ; [ DW_TAG_variable ]
 !27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !2, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !2, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786443, metadata !9, i32 17, i32 19, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 786443, metadata !47, metadata !9, i32 17, i32 19, i32 4} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 5, i32 16, metadata !1, null}
 !31 = metadata !{i32 5, i32 32, metadata !12, null}
 !32 = metadata !{i32 8, i32 14, metadata !6, null}
@@ -122,3 +123,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !45 = metadata !{metadata !19, metadata !20}
 !46 = metadata !{metadata !27, metadata !28}
 !47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"}
+!48 = metadata !{i32 0}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 216057a31385..9163166177c1 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -42,7 +42,7 @@ if.then:                                          ; preds = %land.lhs.true
 ; If-convert the return
 ; CHECK: it	ne
 ; Fold the CSR+return into a pop
-; CHECK: popne	{r4, r5, r7, pc}
+; CHECK: pop {r4, r5, r7, pc}
 sw.bb18:
   %call20 = tail call i32 @bar(i32 %in2) nounwind
   switch i32 %call20, label %sw.default56 [
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
index ff049c89860d..03614eddbf70 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -10,8 +10,8 @@ target triple = "thumbv7-apple-ios5.0.0"
 ; CHECK-GENERIT-NEXT: strb
 ; CHECK-GENERIT-NEXT: strb
 ; CHECK-GENERIT-NEXT: strb
-; CHECK-UNALIGNED:      strb
-; CHECK-UNALIGNED-NEXT: str 
+; CHECK-UNALIGNED:    strb
+; CHECK-UNALIGNED:    str
 define void @foo(i8* nocapture %c) nounwind optsize {
 entry:
   call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
index f563eeef0180..850c51133f3e 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
 
 ; Trigger multiple NEON stores.
-; CHECK:      vst1.64
-; CHECK-NEXT: vst1.64
+; CHECK: vst1.64
+; CHECK: vst1.64
 define void @f_0_40(i8* nocapture %c) nounwind optsize {
 entry:
   call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
index 113cbfe39620..8a65f2e82b75 100644
--- a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
+++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -6,7 +6,7 @@
 @i8_src2 = global <2 x i8> <i8 2, i8 1>
 
 define void @test_neon_vector_add_2xi8() nounwind {
-; CHECK: test_neon_vector_add_2xi8:
+; CHECK-LABEL: test_neon_vector_add_2xi8:
   %1 = load <2 x i8>* @i8_src1
   %2 = load <2 x i8>* @i8_src2
   %3 = add <2 x i8> %1, %2
@@ -15,7 +15,7 @@ define void @test_neon_vector_add_2xi8() nounwind {
 }
 
 define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
-; CHECK: test_neon_ld_st_volatile_with_ashr_2xi8:
+; CHECK-LABEL: test_neon_ld_st_volatile_with_ashr_2xi8:
   %1 = load volatile <2 x i8>* @i8_src1
   %2 = load volatile <2 x i8>* @i8_src2
   %3 = ashr <2 x i8> %1, %2
diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
index 2ab6a4fcc9b4..42eb32d14c74 100644
--- a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
+++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -7,7 +7,7 @@
 declare <2 x i16> @foo_v2i16(<2 x i16>) nounwind
 
 define void @test_neon_call_return_v2i16() {
-; CHECK: test_neon_call_return_v2i16:
+; CHECK-LABEL: test_neon_call_return_v2i16:
   %1 = load <2 x i16>* @src1_v2i16
   %2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
   store <2 x i16> %2, <2 x i16>* @res_v2i16
diff --git a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
index 5409f8c60887..bc496b99f4a6 100644
--- a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -10,7 +10,7 @@
 @infoBlock = external global %struct.InformationBlock
 
 define hidden void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: ldr.w
 ; CHECK: ldr.w
 ; CHECK-NOT: ldm
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 0d0d03b23e86..a263c9c8d678 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -4,7 +4,7 @@
 
 define void @test_sqrt(<4 x float>* %X) nounwind {
 
-; CHECK: test_sqrt:
+; CHECK-LABEL: test_sqrt:
 
 ; CHECK:      movw    r1, :lower16:{{.*}}
 ; CHECK:      movt    r1, :upper16:{{.*}}
@@ -27,7 +27,7 @@ declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly
 
 define void @test_cos(<4 x float>* %X) nounwind {
 
-; CHECK: test_cos:
+; CHECK-LABEL: test_cos:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -58,7 +58,7 @@ declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
 
 define void @test_exp(<4 x float>* %X) nounwind {
 
-; CHECK: test_exp:
+; CHECK-LABEL: test_exp:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -89,7 +89,7 @@ declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly
 
 define void @test_exp2(<4 x float>* %X) nounwind {
 
-; CHECK: test_exp2:
+; CHECK-LABEL: test_exp2:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -120,7 +120,7 @@ declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly
 
 define void @test_log10(<4 x float>* %X) nounwind {
 
-; CHECK: test_log10:
+; CHECK-LABEL: test_log10:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -151,7 +151,7 @@ declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly
 
 define void @test_log(<4 x float>* %X) nounwind {
 
-; CHECK: test_log:
+; CHECK-LABEL: test_log:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -182,7 +182,7 @@ declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly
 
 define void @test_log2(<4 x float>* %X) nounwind {
 
-; CHECK: test_log2:
+; CHECK-LABEL: test_log2:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -214,7 +214,7 @@ declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly
 
 define void @test_pow(<4 x float>* %X) nounwind {
 
-; CHECK: test_pow:
+; CHECK-LABEL: test_pow:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -248,7 +248,7 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly
 
 define void @test_powi(<4 x float>* %X) nounwind {
 
-; CHECK: test_powi:
+; CHECK-LABEL: test_powi:
 
 ; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:       movt  [[reg0]], :upper16:{{.*}}
@@ -271,7 +271,7 @@ declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly
 
 define void @test_sin(<4 x float>* %X) nounwind {
 
-; CHECK: test_sin:
+; CHECK-LABEL: test_sin:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
@@ -302,7 +302,7 @@ declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
 
 define void @test_floor(<4 x float>* %X) nounwind {
 
-; CHECK: test_floor:
+; CHECK-LABEL: test_floor:
 
 ; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
diff --git a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
index 0ff4f510eb3e..e795ec55fe5b 100644
--- a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
+++ b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
@@ -23,7 +23,7 @@
 ;
 ; rdar://11116189
 define i64 @t(i64 %aInput) nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movs [[REG:(r[0-9]+)]], #0
 ; CHECK: movt [[REG]], #46540
 ; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
diff --git a/test/CodeGen/ARM/2012-05-04-vmov.ll b/test/CodeGen/ARM/2012-05-04-vmov.ll
index d52ef2cc5a1c..14dbf7ff4ac9 100644
--- a/test/CodeGen/ARM/2012-05-04-vmov.ll
+++ b/test/CodeGen/ARM/2012-05-04-vmov.ll
@@ -7,5 +7,8 @@ entry:
   %div = udiv <2 x i32> %A, %B
   ret <2 x i32> %div
 ; A9-CHECK: vmov.32
-; SWIFT-CHECK-NOT: vmov.32
+; vmov.32 should not be used to get a lane:
+; vmov.32 <dst>, <src>[<lane>].
+; but vmov.32 <dst>[<lane>], <src> is fine.
+; SWIFT-CHECK-NOT: vmov.32 {{r[0-9]+}}, {{d[0-9]\[[0-9]+\]}}
 }
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
index 764c58f2e159..a7108253cb62 100644
--- a/test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -12,7 +12,7 @@
 @var_v2i64 = global <2 x i64> zeroinitializer
 
 define void @test_v2i8tov2i32() {
-; CHECK: test_v2i8tov2i32:
+; CHECK-LABEL: test_v2i8tov2i32:
 
   %i8val = load <2 x i8>* @var_v2i8
 
@@ -26,7 +26,7 @@ define void @test_v2i8tov2i32() {
 }
 
 define void @test_v2i8tov2i64() {
-; CHECK: test_v2i8tov2i64:
+; CHECK-LABEL: test_v2i8tov2i64:
 
   %i8val = load <2 x i8>* @var_v2i8
 
@@ -44,7 +44,7 @@ define void @test_v2i8tov2i64() {
 }
 
 define void @test_v4i8tov4i16() {
-; CHECK: test_v4i8tov4i16:
+; CHECK-LABEL: test_v4i8tov4i16:
 
   %i8val = load <4 x i8>* @var_v4i8
 
@@ -59,7 +59,7 @@ define void @test_v4i8tov4i16() {
 }
 
 define void @test_v4i8tov4i32() {
-; CHECK: test_v4i8tov4i32:
+; CHECK-LABEL: test_v4i8tov4i32:
 
   %i8val = load <4 x i8>* @var_v4i8
 
@@ -73,7 +73,7 @@ define void @test_v4i8tov4i32() {
 }
 
 define void @test_v2i16tov2i32() {
-; CHECK: test_v2i16tov2i32:
+; CHECK-LABEL: test_v2i16tov2i32:
 
   %i16val = load <2 x i16>* @var_v2i16
 
@@ -88,7 +88,7 @@ define void @test_v2i16tov2i32() {
 }
 
 define void @test_v2i16tov2i64() {
-; CHECK: test_v2i16tov2i64:
+; CHECK-LABEL: test_v2i16tov2i64:
 
   %i16val = load <2 x i16>* @var_v2i16
 
diff --git a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
index 2f55204aa407..647ebd6bdfd4 100644
--- a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
+++ b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -13,7 +13,7 @@
 ; v4i8
 ;
 define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
-;CHECK: sextload_v4i8_c:
+;CHECK-LABEL: sextload_v4i8_c:
 entry:
   %0 = load <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
@@ -26,7 +26,7 @@ entry:
 ; v2i8
 ;
 define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
-;CHECK: sextload_v2i8_c:
+;CHECK-LABEL: sextload_v2i8_c:
 entry:
   %0   = load <2 x i8>* %v, align 8
   %v0  = sext <2 x i8>  %0 to <2 x i64>
@@ -39,7 +39,7 @@ entry:
 ; v2i16
 ;
 define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
-;CHECK: sextload_v2i16_c:
+;CHECK-LABEL: sextload_v2i16_c:
 entry:
   %0   = load <2 x i16>* %v, align 8
   %v0  = sext <2 x i16>  %0 to <2 x i64>
@@ -54,7 +54,7 @@ entry:
 ; v4i8
 ;
 define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
-;CHECK: sextload_v4i8_v:
+;CHECK-LABEL: sextload_v4i8_v:
 entry:
   %0 = load <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
@@ -70,7 +70,7 @@ entry:
 ; v2i8
 ;
 define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
-;CHECK: sextload_v2i8_v:
+;CHECK-LABEL: sextload_v2i8_v:
 entry:
   %0 = load <2 x i8>* %v, align 8
   %v0  = sext <2 x i8> %0 to <2 x i64>
@@ -86,7 +86,7 @@ entry:
 ; v2i16
 ;
 define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
-;CHECK: sextload_v2i16_v:
+;CHECK-LABEL: sextload_v2i16_v:
 entry:
   %0 = load <2 x i16>* %v, align 8
   %v0  = sext <2 x i16> %0 to <2 x i64>
@@ -104,7 +104,7 @@ entry:
 ; v4i8 x v4i16
 ;
 define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
-;CHECK: sextload_v4i8_vs:
+;CHECK-LABEL: sextload_v4i8_vs:
 entry:
   %0 = load <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
@@ -120,7 +120,7 @@ entry:
 ; v2i8
 ; v2i8 x v2i16
 define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
-;CHECK: sextload_v2i8_vs:
+;CHECK-LABEL: sextload_v2i8_vs:
 entry:
   %0 = load <2 x i8>* %v, align 8
   %v0  = sext <2 x i8> %0 to <2 x i64>
@@ -136,7 +136,7 @@ entry:
 ; v2i16
 ; v2i16 x v2i32
 define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
-;CHECK: sextload_v2i16_vs:
+;CHECK-LABEL: sextload_v2i16_vs:
 entry:
   %0 = load <2 x i16>* %v, align 8
   %v0  = sext <2 x i16> %0 to <2 x i64>
diff --git a/test/CodeGen/ARM/2012-08-30-select.ll b/test/CodeGen/ARM/2012-08-30-select.ll
index 8471be5330b8..e78bbdea01f2 100644
--- a/test/CodeGen/ARM/2012-08-30-select.ll
+++ b/test/CodeGen/ARM/2012-08-30-select.ll
@@ -1,18 +1,15 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 ; rdar://12201387
 
-;CHECK: select_s_v_v
+;CHECK-LABEL: select_s_v_v:
 ;CHECK: it  ne
 ;CHECK-NEXT: vmovne.i32
 ;CHECK: bx
-define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
+define <16 x i8> @select_s_v_v(<16 x i8> %vec, i32 %avail) {
 entry:
-  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
   %and = and i32 %avail, 1
   %tobool = icmp eq i32 %and, 0
-  %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
-  ret <16 x i8> %vld1.
+  %ret = select i1 %tobool, <16 x i8> %vec, <16 x i8> zeroinitializer
+  ret <16 x i8> %ret
 }
 
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
-
diff --git a/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll b/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
index e761ffe72c13..3bdbb3cf5801 100644
--- a/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
+++ b/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
@@ -4,7 +4,7 @@
 ; rdar://12300648
 
 define i32 @t(i32 %x) {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK-NOT: movw
   %tmp = add i32 %x, -65535
   ret i32 %tmp
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
index 75766099a220..38624e0641f2 100644
--- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
 
 ; Check for error message:
 ; CHECK: non-trivial scalar-to-vector conversion, possible invalid constraint for vector type
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
index 6fa1391474bb..7ba693d6df4a 100644
--- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
 
 ; Check for error message:
 ; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index b0644d17431d..f864c8cbfcb5 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -7,7 +7,7 @@
 declare void @llvm.va_start(i8*) nounwind
 declare void @llvm.va_end(i8*) nounwind
 
-; CHECK: test_byval_8_bytes_alignment:
+; CHECK-LABEL: test_byval_8_bytes_alignment:
 define void @test_byval_8_bytes_alignment(i32 %i, ...) {
 entry:
 ; CHECK: stm     r0, {r1, r2, r3}
@@ -23,8 +23,12 @@ entry:
   ret void
 }
 
-; CHECK: main:
-; CHECK: ldm     r0, {r2, r3}
+; CHECK-LABEL: main:
+; CHECK: movw [[BASE:r[0-9]+]], :lower16:static_val
+; CHECK: movt [[BASE]], :upper16:static_val
+; ldm is not formed when the coalescer failed to coalesce everything.
+; CHECK: ldrd    r2, [[TMP:r[0-9]+]], {{\[}}[[BASE]]{{\]}}
+; CHECK: movw r0, #555
 define i32 @main() {
 entry:
   call void (i32, ...)* @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
@@ -33,7 +37,7 @@ entry:
 
 declare void @f(double);
 
-; CHECK:     test_byval_8_bytes_alignment_fixed_arg:
+; CHECK-LABEL:     test_byval_8_bytes_alignment_fixed_arg:
 ; CHECK-NOT:   str     r1
 ; CHECK:       str     r3, [sp, #12]
 ; CHECK:       str     r2, [sp, #8]
@@ -46,11 +50,14 @@ entry:
   ret void
 }
 
-; CHECK: main_fixed_arg:
-; CHECK: ldm     r0, {r2, r3}
+; CHECK-LABEL: main_fixed_arg:
+; CHECK: movw [[BASE:r[0-9]+]], :lower16:static_val
+; CHECK: movt [[BASE]], :upper16:static_val
+; ldm is not formed when the coalescer failed to coalesce everything.
+; CHECK: ldrd     r2, [[TMP:r[0-9]+]], {{\[}}[[BASE]]{{\]}}
+; CHECK: movw r0, #555
 define i32 @main_fixed_arg() {
 entry:
   call void (i32, %struct_t*)* @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
   ret i32 0
 }
-
diff --git a/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll b/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
index 478048d09600..c9ccc103e2fa 100644
--- a/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
+++ b/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
@@ -6,7 +6,7 @@
 
 declare i32 @printf(i8*, ...)
 
-; CHECK:     test_byval_usage_scheduling:
+; CHECK-LABEL:     test_byval_usage_scheduling:
 ; CHECK:       str     r3, [sp, #12]
 ; CHECK:       str     r2, [sp, #8]
 ; CHECK:       vldr    d16, [sp, #8]
diff --git a/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll b/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
index f2395107d426..a59533c4a85d 100644
--- a/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
+++ b/test/CodeGen/ARM/2012-10-04-LDRB_POST_IMM-Crash.ll
@@ -6,7 +6,7 @@
 
 declare void @f(i32 %n1, i32 %n2, i32 %n3, %my_struct_t* byval %val);
 
-; CHECK: main:
+; CHECK-LABEL: main:
 define i32 @main() nounwind {
 entry:
 ; CHECK: ldrb	{{(r[0-9]+)}}, {{(\[r[0-9]+\])}}, #1
diff --git a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
index fcc6a7f7e96f..0028eec80f44 100644
--- a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
+++ b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
@@ -5,7 +5,7 @@
 
 declare void @f(%struct.s* %p);
 
-; CHECK: t:
+; CHECK-LABEL: t:
 define void @t(i32 %a, %struct.s* byval %s) nounwind {
 entry:
 
@@ -20,7 +20,7 @@ entry:
   ret void
 }
 
-; CHECK: caller:
+; CHECK-LABEL: caller:
 define void @caller() {
 
 ; CHECK:      ldm     r0, {r1, r2, r3}
diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
index 38700f3a8d10..8df295a2f658 100644
--- a/test/CodeGen/ARM/2012-11-14-subs_carry.ll
+++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
-;CHECK: foo
+;CHECK-LABEL: foo:
 ;CHECK: adds
 ;CHECK-NEXT: adc
 ;CHECK-NEXT: bx
diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll
index 05abdeda0f19..014686feee0e 100644
--- a/test/CodeGen/ARM/2013-01-21-PR14992.ll
+++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -2,8 +2,8 @@
 ;RUN: llc -mtriple=thumbv7 < %s  | FileCheck -check-prefix=EXPECTED %s
 ;RUN: llc -mtriple=thumbv7 < %s  | FileCheck %s
 
-;EXPECTED: foo:
-;CHECK: foo:
+;EXPECTED-LABEL: foo:
+;CHECK-LABEL: foo:
 define i32 @foo(i32* %a) nounwind optsize {
 entry:
   %0 = load i32* %a, align 4
diff --git a/test/CodeGen/ARM/2013-02-27-expand-vfma.ll b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
index 0e3bf2371061..f81211811860 100644
--- a/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
+++ b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=armv7s-apple-darwin | FileCheck %s -check-prefix=VFP4
+; RUN: llc < %s -mtriple=armv7s-apple-darwin | FileCheck %s -check-prefix=CHECK-VFP4
 
 define <4 x float> @muladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind {
-; CHECK: muladd:
+; CHECK-LABEL: muladd:
 ; CHECK: fmaf
 ; CHECK: fmaf
 ; CHECK: fmaf
@@ -17,7 +17,7 @@ define <4 x float> @muladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounw
 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
 
 define <2 x float> @muladd2(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind {
-; CHECK: muladd2:
+; CHECK-LABEL: muladd2:
 ; CHECK: fmaf
 ; CHECK: fmaf
 ; CHECK-NOT: fmaf
diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
index 4a5ca9db0e50..127429bc31e3 100644
--- a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
+++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -1,30 +1,30 @@
 ;PR15293: ARM codegen ice - expected larger existing stack allocation
 ;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
 
-;CHECK: foo:
+;CHECK-LABEL: foo:
 ;CHECK: 	sub	sp, sp, #8
 ;CHECK: 	push	{r11, lr}
-;CHECK: 	str	r0, [sp, #12]
-;CHECK: 	add	r0, sp, #12
+;CHECK: 	str	r0, [sp, #8]
+;CHECK: 	add	r0, sp, #8
 ;CHECK: 	bl	fooUseParam
 ;CHECK: 	pop	{r11, lr}
 ;CHECK: 	add	sp, sp, #8
 ;CHECK: 	mov	pc, lr
 
-;CHECK: foo2:
-;CHECK: 	sub	sp, sp, #16
+;CHECK-LABEL: foo2:
+;CHECK: 	sub	sp, sp, #8
 ;CHECK: 	push	{r11, lr}
-;CHECK: 	str	r0, [sp, #12]
-;CHECK: 	add	r0, sp, #12
-;CHECK: 	str	r2, [sp, #16]
+;CHECK: 	str	r0, [sp, #8]
+;CHECK: 	add	r0, sp, #8
+;CHECK: 	str	r2, [sp, #12]
 ;CHECK: 	bl	fooUseParam
-;CHECK: 	add	r0, sp, #16
+;CHECK: 	add	r0, sp, #12
 ;CHECK: 	bl	fooUseParam
 ;CHECK: 	pop	{r11, lr}
-;CHECK: 	add	sp, sp, #16
+;CHECK: 	add	sp, sp, #8
 ;CHECK: 	mov	pc, lr
 
-;CHECK: doFoo:
+;CHECK-LABEL: doFoo:
 ;CHECK: 	push	{r11, lr}
 ;CHECK: 	ldr	r0,
 ;CHECK: 	ldr	r0, [r0]
@@ -33,7 +33,7 @@
 ;CHECK: 	mov	pc, lr
 
 
-;CHECK: doFoo2:
+;CHECK-LABEL: doFoo2:
 ;CHECK: 	push	{r11, lr}
 ;CHECK: 	ldr	r0,
 ;CHECK: 	mov	r1, #0
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
index 38d515f9227f..08bf99b31f54 100644
--- a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
@@ -53,11 +53,11 @@
 
 ;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
 ;
-;CHECK:     foo:
+;CHECK-LABEL:     foo:
 ;CHECK-NOT:     mov r0
 ;CHECK-NOT:     ldr r0
 ;CHECK:         bl fooUseI32
-;CHECK:     doFoo:
+;CHECK-LABEL:     doFoo:
 ;CHECK:         movs    r0, #43
 ;CHECK:         bl      foo
 
diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
index de5fd31e2f2d..0e0537ec5bfc 100644
--- a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
+++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
@@ -9,7 +9,7 @@
 
 @.str = private unnamed_addr constant [13 x i8] c"%d %d %f %i\0A\00", align 1
 
-;CHECK: printfn:
+;CHECK-LABEL: printfn:
 define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) {
 entry:
   %conv = sext i16 %b to i32
diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
index abc6e0d11144..c4f5f54c3af0 100644
--- a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
+++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
@@ -1,8 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=CHECK-V8 %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck -check-prefix=CHECK-V8 %s
 ; rdar://13782395
 
 define i32 @t1(i32 %a, i32 %b, i8** %retaddr) {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: Block address taken
 ; CHECK-NOT: Address of block that was removed by CodeGen
   store i8* blockaddress(@t1, %cond_true), i8** %retaddr
@@ -19,7 +21,7 @@ cond_false:
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d, i8** %retaddr) {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: Block address taken
 ; CHECK: %cond_true
 ; CHECK: add
@@ -41,7 +43,7 @@ UnifiedReturnBlock:
 }
 
 define hidden fastcc void @t3(i8** %retaddr) {
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: Block address taken
 ; CHECK-NOT: Address of block that was removed by CodeGen
 bb:
@@ -69,3 +71,83 @@ bb6.i350:                                         ; preds = %bb2.i
 KBBlockZero.exit:                                 ; preds = %bb2.i
   indirectbr i8* undef, [label %KBBlockZero_return_1, label %KBBlockZero_return_0]
 }
+
+
+; If-converter was checking for the wrong predicate subsumes pattern when doing
+; nested predicates.
+; E.g., Let A be a basic block that flows conditionally into B and B be a
+; predicated block.
+; B can be predicated with A.BrToBPredicate into A iff B.Predicate is less
+; "permissive" than A.BrToBPredicate, i.e., iff A.BrToBPredicate subsumes
+; B.Predicate. 
+; <rdar://problem/14379453>
+
+; Hard-coded registers comes from the ABI.
+; CHECK-LABEL: wrapDistance:
+; CHECK: cmp r1, #59
+; CHECK-NEXT: itt le
+; CHECK-NEXT: suble r0, r2, #1
+; CHECK-NEXT: bxle lr
+; CHECK-NEXT: subs [[REG:r[0-9]+]], #120
+; CHECK-NEXT: cmp [[REG]], r1
+; CHECK-NOT: it lt
+; CHECK-NEXT: bge [[LABEL:.+]]
+; Next BB
+; CHECK-NOT: cmplt
+; CHECK: cmp r0, #119
+; CHECK-NEXT: itt le
+; CHECK-NEXT: addle r0, r1, #1
+; CHECK-NEXT: bxle lr
+; Next BB
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: subs r0, r1, r0
+; CHECK-NEXT: bx lr
+
+; CHECK-V8-LABEL: wrapDistance:
+; CHECK-V8: cmp r1, #59
+; CHECK-V8-NEXT: bgt
+; CHECK-V8-NEXT: %if.then
+; CHECK-V8-NEXT: subs r0, r2, #1
+; CHECK-V8-NEXT: bx lr
+; CHECK-V8-NEXT: %if.else
+; CHECK-V8-NEXT: subs [[REG:r[0-9]+]], #120
+; CHECK-V8-NEXT: cmp [[REG]], r1
+; CHECK-V8-NEXT: bge
+; CHECK-V8-NEXT: %if.else
+; CHECK-V8-NEXT: cmp r0, #119
+; CHECK-V8-NEXT: bgt
+; CHECK-V8-NEXT: %if.then4
+; CHECK-V8-NEXT: adds r0, r1, #1
+; CHECK-V8-NEXT: bx lr
+; CHECK-V8-NEXT: %if.end5
+; CHECK-V8-NEXT: subs r0, r1, r0
+; CHECK-V8-NEXT: bx lr
+
+define i32 @wrapDistance(i32 %tx, i32 %sx, i32 %w) {
+entry:
+  %cmp = icmp slt i32 %sx, 60
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %sub = add nsw i32 %w, -1
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %sub1 = add nsw i32 %w, -120
+  %cmp2 = icmp slt i32 %sub1, %sx
+  %cmp3 = icmp slt i32 %tx, 120
+  %or.cond = and i1 %cmp2, %cmp3
+  br i1 %or.cond, label %if.then4, label %if.end5
+
+if.then4:                                         ; preds = %if.else
+  %add = add nsw i32 %sx, 1
+  br label %return
+
+if.end5:                                          ; preds = %if.else
+  %sub6 = sub nsw i32 %sx, %tx
+  br label %return
+
+return:                                           ; preds = %if.end5, %if.then4, %if.then
+  %retval.0 = phi i32 [ %sub, %if.then ], [ %add, %if.then4 ], [ %sub6, %if.end5 ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
new file mode 100644
index 000000000000..defb94601141
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=thumb -mattr=+v7,+thumb2 | FileCheck %s
+
+define i8 @f1(i8* %call1, i8* %call3, i32 %h, i32 %w, i32 %Width) {
+; CHECK: f1:
+entry:
+        %mul17 = mul nsw i32 %Width, %h
+        %add = add nsw i32 %mul17, %w
+        %sub19 = sub i32 %add, %Width
+        %sub20 = add i32 %sub19, -1
+        %arrayidx21 = getelementptr inbounds i8* %call1, i32 %sub20
+        %0 = load i8* %arrayidx21, align 1
+        %conv22 = zext i8 %0 to i32
+        %arrayidx25 = getelementptr inbounds i8* %call1, i32 %sub19
+        %1 = load i8* %arrayidx25, align 1
+        %conv26 = zext i8 %1 to i32
+        %mul23189 = add i32 %conv26, %conv22
+        %add30 = add i32 %sub19, 1
+        %arrayidx31 = getelementptr inbounds i8* %call1, i32 %add30
+        %2 = load i8* %arrayidx31, align 1
+        %conv32 = zext i8 %2 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %add28190 = add i32 %mul23189, %conv32
+        %sub35 = add i32 %add, -1
+        %arrayidx36 = getelementptr inbounds i8* %call1, i32 %sub35
+        %3 = load i8* %arrayidx36, align 1
+        %conv37 = zext i8 %3 to i32
+        %add34191 = add i32 %add28190, %conv37
+        %arrayidx40 = getelementptr inbounds i8* %call1, i32 %add
+        %4 = load i8* %arrayidx40, align 1
+        %conv41 = zext i8 %4 to i32
+        %mul42 = mul nsw i32 %conv41, 255
+        %add44 = add i32 %add, 1
+        %arrayidx45 = getelementptr inbounds i8* %call1, i32 %add44
+        %5 = load i8* %arrayidx45, align 1
+        %conv46 = zext i8 %5 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %add49 = add i32 %add, %Width
+        %sub50 = add i32 %add49, -1
+        %arrayidx51 = getelementptr inbounds i8* %call1, i32 %sub50
+        %6 = load i8* %arrayidx51, align 1
+        %conv52 = zext i8 %6 to i32
+        %arrayidx56 = getelementptr inbounds i8* %call1, i32 %add49
+        %7 = load i8* %arrayidx56, align 1
+        %conv57 = zext i8 %7 to i32
+        %add61 = add i32 %add49, 1
+        %arrayidx62 = getelementptr inbounds i8* %call1, i32 %add61
+        %8 = load i8* %arrayidx62, align 1
+        %conv63 = zext i8 %8 to i32
+; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
+; CHECK-NEXT: ldrb{{[.w]*}} r{{[0-9]*}}, [r{{[0-9]*}}, #1]
+        %tmp = add i32 %add34191, %conv46
+        %tmp193 = add i32 %tmp, %conv52
+        %tmp194 = add i32 %tmp193, %conv57
+        %tmp195 = add i32 %tmp194, %conv63
+        %tmp196 = mul i32 %tmp195, -28
+        %add65 = add i32 %tmp196, %mul42
+        %9 = lshr i32 %add65, 8
+        %conv68 = trunc i32 %9 to i8
+        %arrayidx69 = getelementptr inbounds i8* %call3, i32 %add
+        store i8 %conv68, i8* %arrayidx69, align 1
+        ret i8 %conv68
+}
diff --git a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
new file mode 100644
index 000000000000..7bf03a16c6fb
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
@@ -0,0 +1,31 @@
+;PR15293: ARM codegen ice - expected larger existing stack allocation
+;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
+
+%struct.S227 = type { [49 x i32], i32 }
+
+define void @check227(
+                      i32 %b,                              
+                      %struct.S227* byval nocapture %arg0,
+                      %struct.S227* %arg1) {
+; b --> R0
+; arg0 --> [R1, R2, R3, SP+0 .. SP+188)
+; arg1 --> SP+188
+
+entry:
+
+;CHECK:  sub   sp, sp, #16
+;CHECK:  push  {r11, lr}
+;CHECK:  add   r0, sp, #12
+;CHECK:  stm   r0, {r1, r2, r3}
+;CHECK:  ldr   r0, [sp, #212]
+;CHECK:  bl    useInt
+;CHECK:  pop   {r11, lr}
+;CHECK:  add   sp, sp, #16
+
+  %0 = ptrtoint %struct.S227* %arg1 to i32
+  tail call void @useInt(i32 %0)
+  ret void
+}
+
+declare void @useInt(i32)
+
diff --git a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
new file mode 100644
index 000000000000..438b021a040b
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
@@ -0,0 +1,25 @@
+;PR15293: ARM codegen ice - expected larger existing stack allocation
+;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
+
+%struct4bytes = type { i32 }
+%struct20bytes = type { i32, i32, i32, i32, i32 }
+
+define void @foo(%struct4bytes* byval %p0, ; --> R0
+                 %struct20bytes* byval %p1 ; --> R1,R2,R3, [SP+0 .. SP+8)
+) {
+;CHECK:  sub  sp, sp, #16
+;CHECK:  push  {r11, lr}
+;CHECK:  add  r11, sp, #8
+;CHECK:  stm  r11, {r0, r1, r2, r3}
+;CHECK:  add  r0, sp, #12
+;CHECK:  bl  useInt
+;CHECK:  pop  {r11, lr}
+;CHECK:  add  sp, sp, #16
+
+  %1 = ptrtoint %struct20bytes* %p1 to i32
+  tail call void @useInt(i32 %1)
+  ret void
+}
+
+declare void @useInt(i32)
+
diff --git a/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll b/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll
new file mode 100644
index 000000000000..8f6709ec5e7b
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s
+target triple = "armv7-none-linux-gnueabi"
+
+define <3 x i64> @shuffle(i1 %dec1, i1 %dec0, <3 x i64> %b) {
+entry:
+  %.sink = select i1 %dec1, <3 x i64> %b, <3 x i64> zeroinitializer
+  %.sink15 = select i1 %dec0, <3 x i64> %b, <3 x i64> zeroinitializer
+  %vecinit7 = shufflevector <3 x i64> %.sink, <3 x i64> %.sink15, <3 x i32> <i32 0, i32 4, i32 undef>
+  ret <3 x i64> %vecinit7
+}
diff --git a/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll b/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
new file mode 100644
index 000000000000..0130f7ab68f5
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -mtriple=armv4t--linux-eabi-android
+; RUN: llc < %s -O0 -mtriple=armv4t-unknown-linux
+; RUN: llc < %s -O0 -mtriple=armv5-unknown-linux
+
+; See http://llvm.org/bugs/show_bug.cgi?id=16178
+; ARMFastISel used to fail emitting sext/zext in pre-ARMv6.
+
+; Function Attrs: nounwind
+define arm_aapcscc void @f2(i8 signext %a) #0 {
+entry:
+  %a.addr = alloca i8, align 1
+  store i8 %a, i8* %a.addr, align 1
+  %0 = load i8* %a.addr, align 1
+  %conv = sext i8 %0 to i32
+  %shr = ashr i32 %conv, 56
+  %conv1 = trunc i32 %shr to i8
+  call arm_aapcscc void @f1(i8 signext %conv1)
+  ret void
+}
+
+declare arm_aapcscc void @f1(i8 signext) #1
diff --git a/test/CodeGen/ARM/2013-06-03-ByVal-2Kbytes.ll b/test/CodeGen/ARM/2013-06-03-ByVal-2Kbytes.ll
new file mode 100644
index 000000000000..1c1380070219
--- /dev/null
+++ b/test/CodeGen/ARM/2013-06-03-ByVal-2Kbytes.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mcpu=cortex-a15 | FileCheck %s
+; ModuleID = 'attri_16.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv4t--linux-gnueabihf"
+
+%big_struct0 = type { [517 x i32] }
+%big_struct1 = type { [516 x i32] }
+
+;CHECK-LABEL: f:
+define void @f(%big_struct0* %p0, %big_struct1* %p1) {
+
+;CHECK: sub sp, sp, #8
+;CHECK: sub sp, sp, #2048
+;CHECK: bl callme0
+  call void @callme0(%big_struct0* byval %p0)
+
+;CHECK: add sp, sp, #8
+;CHECK: add sp, sp, #2048
+;CHECK: sub sp, sp, #2048
+;CHECK: bl callme1
+  call void @callme1(%big_struct1* byval %p1)
+
+;CHECK: add sp, sp, #2048
+
+  ret void
+}
+
+declare void @callme0(%big_struct0* byval)
+declare void @callme1(%big_struct1* byval)
+
diff --git a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
new file mode 100644
index 000000000000..a438c1f4556a
--- /dev/null
+++ b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabi"
+
+; CHECK-LABEL: function
+define void @function() {
+; CHECK: cmp r0, #0
+; CHECK: bxne lr
+; CHECK: vmov.i32 q8, #0xff0000
+entry:
+  br i1 undef, label %vector.body, label %for.end
+
+; CHECK: vld1.32 {d18, d19}, [r0]
+; CHECK: vand q10, q9, q8
+; CHECK: vbic.i16 q9, #0xff
+; CHECK: vorr q9, q9, q10
+; CHECK: vst1.32 {d18, d19}, [r0]
+vector.body:
+  %wide.load = load <4 x i32>* undef, align 4
+  %0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
+  %1 = sub <4 x i32> %wide.load, zeroinitializer
+  %2 = and <4 x i32> %1, <i32 16711680, i32 16711680, i32 16711680, i32 16711680>
+  %3 = or <4 x i32> undef, %0
+  %4 = or <4 x i32> %3, %2
+  store <4 x i32> %4, <4 x i32>* undef, align 4
+  br label %vector.body
+
+for.end:
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
new file mode 100644
index 000000000000..33c0587226a8
--- /dev/null
+++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
@@ -0,0 +1,16 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls"
+; Evaluate the two vld1.8 instructions in separate MBB's,
+; instead of stalling on one and conditionally overwriting its result.
+
+define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
+entry:
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
+  %vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+  %and = and i32 %avail, 1
+  %tobool = icmp eq i32 %and, 0
+  %retv = select i1 %tobool, <16 x i8> %vld1, <16 x i8> %vld2
+  ret <16 x i8> %retv
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
diff --git a/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll b/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll
new file mode 100644
index 000000000000..5a864772faef
--- /dev/null
+++ b/test/CodeGen/ARM/2013-11-08-inline-asm-neon-array.ll
@@ -0,0 +1,16 @@
+;RUN:  not llc -mtriple=arm-linux-gnueabihf < %s 2>&1 | FileCheck %s
+
+; ModuleID = 'bug.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--"
+
+%struct.uint8x8x4_t = type { [4 x <8 x i8>] }
+
+define void @foo() #0 {
+  %vsrc = alloca %struct.uint8x8x4_t, align 8
+  %ptr = alloca i8;
+  %1 = call i8* asm sideeffect "vld4.u8 ${0:h}, [$1], $2", "=*w,=r,r,1"(%struct.uint8x8x4_t* %vsrc, i32 0, i8* %ptr)
+  ret void
+}
+
+; CHECK: error: couldn't allocate output register for constraint 'w'
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
index a52468e5be9e..019ff6129b00 100644
--- a/test/CodeGen/ARM/a15-SD-dep.ll
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -1,8 +1,8 @@
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s  | FileCheck -check-prefix=DISABLED %s
-; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=ENABLED %s
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s  | FileCheck -check-prefix=CHECK-DISABLED %s
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-ENABLED %s
 
-; CHECK-ENABLED: t1:
-; CHECK-DISABLED: t1:
+; CHECK-ENABLED-LABEL: t1:
+; CHECK-DISABLED-LABEL: t1:
 define <2 x float> @t1(float %f) {
   ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
   ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -11,8 +11,8 @@ define <2 x float> @t1(float %f) {
   ret <2 x float> %i2
 }
 
-; CHECK-ENABLED: t2:
-; CHECK-DISABLED: t2:
+; CHECK-ENABLED-LABEL: t2:
+; CHECK-DISABLED-LABEL: t2:
 define <4 x float> @t2(float %g, float %f) {
   ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
   ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -21,8 +21,8 @@ define <4 x float> @t2(float %g, float %f) {
   ret <4 x float> %i2
 }
 
-; CHECK-ENABLED: t3:
-; CHECK-DISABLED: t3:
+; CHECK-ENABLED-LABEL: t3:
+; CHECK-DISABLED-LABEL: t3:
 define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
   ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] 
   ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
@@ -31,8 +31,8 @@ define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
   ret <2 x float> %i2
 }
 
-; CHECK-ENABLED: t4:
-; CHECK-DISABLED: t4:
+; CHECK-ENABLED-LABEL: t4:
+; CHECK-DISABLED-LABEL: t4:
 define <2 x float> @t4(float %f) {
   ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
   ; CHECK-DISABLED-NOT: vdup
@@ -45,8 +45,8 @@ b:
   ret <2 x float> %i2
 }
 
-; CHECK-ENABLED: t5:
-; CHECK-DISABLED: t5:
+; CHECK-ENABLED-LABEL: t5:
+; CHECK-DISABLED-LABEL: t5:
 define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
   ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
   ; CHECK-ENABLED: vadd.f32
diff --git a/test/CodeGen/ARM/a15-mla.ll b/test/CodeGen/ARM/a15-mla.ll
index 25f6de4762d5..b233cc27c4ba 100644
--- a/test/CodeGen/ARM/a15-mla.ll
+++ b/test/CodeGen/ARM/a15-mla.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s  -march=arm -float-abi=hard -mcpu=cortex-a15 -mattr=+neon,+neonfp | FileCheck %s
 
 ; This test checks that the VMLxForwarting feature is disabled for A15.
-; CHECK: fun_a
+; CHECK: fun_a:
 define <4 x i32> @fun_a(<4 x i32> %x, <4 x i32> %y) nounwind{
   %1 = add <4 x i32> %x, %y
 ; CHECK-NOT: vmul
@@ -10,3 +10,27 @@ define <4 x i32> @fun_a(<4 x i32> %x, <4 x i32> %y) nounwind{
   %3 = add <4 x i32> %y, %2
   ret <4 x i32> %3
 }
+
+; This tests checks that VMLA FP patterns can be matched in instruction selection when targeting
+; Cortex-A15.
+; CHECK: fun_b:
+define <4 x float> @fun_b(<4 x float> %x, <4 x float> %y, <4 x float> %z) nounwind{
+; CHECK: vmla.f32
+  %t = fmul <4 x float> %x, %y
+  %r = fadd <4 x float> %t, %z
+  ret <4 x float> %r
+}
+
+; This tests checks that FP VMLA instructions are not expanded into separate multiply/addition
+; operations when targeting Cortex-A15.
+; CHECK: fun_c:
+define <4 x float> @fun_c(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %u, <4 x float> %v) nounwind{
+; CHECK: vmla.f32
+  %t1 = fmul <4 x float> %x, %y
+  %r1 = fadd <4 x float> %t1, %z
+; CHECK: vmla.f32
+  %t2 = fmul <4 x float> %u, %v
+  %r2 = fadd <4 x float> %t2, %r1
+  ret <4 x float> %r2
+}
+
diff --git a/test/CodeGen/ARM/a15-partial-update.ll b/test/CodeGen/ARM/a15-partial-update.ll
index 6306790d15f0..5747253d56b7 100644
--- a/test/CodeGen/ARM/a15-partial-update.ll
+++ b/test/CodeGen/ARM/a15-partial-update.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s  | FileCheck %s
 
-; CHECK: t1:
+; CHECK-LABEL: t1:
 define <2 x float> @t1(float* %A, <2 x float> %B) {
 ; The generated code for this test uses a vld1.32 instruction
 ; to write the lane 1 of a D register containing the value of
@@ -15,7 +15,7 @@ define <2 x float> @t1(float* %A, <2 x float> %B) {
   ret <2 x float> %tmp3
 }
 
-; CHECK: t2:
+; CHECK-LABEL: t2:
 define void @t2(<4 x i8> *%in, <4 x i8> *%out, i32 %n) {
 entry:
   br label %loop
diff --git a/test/CodeGen/ARM/addrspacecast.ll b/test/CodeGen/ARM/addrspacecast.ll
new file mode 100644
index 000000000000..2e98ba53c67a
--- /dev/null
+++ b/test/CodeGen/ARM/addrspacecast.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm
+
+; Check that codegen for an addrspace cast succeeds without error.
+define <4 x i32 addrspace(1)*> @f (<4 x i32*> %x) {
+  %1 = addrspacecast <4 x i32*> %x to <4 x i32 addrspace(1)*>
+  ret <4 x i32 addrspace(1)*> %1
+}
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index d668334f8d6a..f55ae10b247d 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,15 +1,30 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
-; RUN: grep " = " %t   | count 5
-; RUN: grep globl %t | count 4
-; RUN: grep weak %t  | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
 
-@bar = external global i32
+; CHECK: .globl	test
+
+; CHECK: .globl	foo1
+; CHECK: foo1 = bar
+
+; CHECK: .globl	foo2
+; CHECK: foo2 = bar
+
+; CHECK: .weak	bar_f
+; CHECK: bar_f = foo_f
+
+; CHECK: bar_i = bar
+
+; CHECK: .globl	A
+; CHECK: A = bar
+
+@bar = global i32 42
 @foo1 = alias i32* @bar
 @foo2 = alias i32* @bar
 
 %FunTy = type i32()
 
-declare i32 @foo_f()
+define i32 @foo_f() {
+  ret i32 0
+}
 @bar_f = alias weak %FunTy* @foo_f
 
 @bar_i = alias internal i32* @bar
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 273041dee34e..6e6311d4d34f 100644
--- a/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -1,30 +1,14 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -O0 -realign-stack=0 | FileCheck %s -check-prefix=NO-REALIGN
-; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN
 
 ; rdar://12713765
 ; When realign-stack is set to false, make sure we are not creating stack
 ; objects that are assumed to be 64-byte aligned.
 @T3_retval = common global <16 x float> zeroinitializer, align 16
 
-define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp {
+define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
 entry:
-; CHECK: test
-; CHECK: bic sp, sp, #63
-; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
-; CHECK: vst1.64
-; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
-; CHECK: vst1.64
-; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
-; CHECK: vst1.64
-; CHECK: vst1.64
-; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
-; CHECK: vst1.64
-; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
-; CHECK: vst1.64
-; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
-; CHECK: vst1.64
-; CHECK: vst1.64
-; NO-REALIGN: test
+; NO-REALIGN: test1
 ; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
 ; NO-REALIGN: vst1.64
 ; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
@@ -46,3 +30,29 @@ entry:
  store <16 x float> %1, <16 x float>* %agg.result, align 16
  ret void
 }
+
+define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
+entry:
+; REALIGN: test2
+; REALIGN: bic sp, sp, #63
+; REALIGN: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; REALIGN: vst1.64
+; REALIGN: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; REALIGN: vst1.64
+; REALIGN: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; REALIGN: vst1.64
+; REALIGN: vst1.64
+; REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; REALIGN: vst1.64
+; REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; REALIGN: vst1.64
+; REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; REALIGN: vst1.64
+; REALIGN: vst1.64
+ %retval = alloca <16 x float>, align 16
+ %0 = load <16 x float>* @T3_retval, align 16
+ store <16 x float> %0, <16 x float>* %retval
+ %1 = load <16 x float>* %retval
+ store <16 x float> %1, <16 x float>* %agg.result, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index a8b42e63b71f..e7fbf9f28eff 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 | FileCheck %s -check-prefix=DARWIN
 
 define i32 @f1(i32 %a, i64 %b) {
-; ELF: f1:
+; ELF-LABEL: f1:
 ; ELF: mov r0, r2
-; DARWIN: f1:
+; DARWIN-LABEL: f1:
 ; DARWIN: mov r0, r1
         %tmp = call i32 @g1(i64 %b)
         ret i32 %tmp
@@ -12,10 +12,10 @@ define i32 @f1(i32 %a, i64 %b) {
 
 ; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
 define i32 @f2() nounwind optsize {
-; ELF: f2:
+; ELF-LABEL: f2:
 ; ELF: mov  [[REGISTER:(r[0-9]+)]], #128
 ; ELF: str  [[REGISTER]], [
-; DARWIN: f2:
+; DARWIN-LABEL: f2:
 ; DARWIN: mov	r3, #128
 entry:
   %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]
@@ -26,10 +26,10 @@ entry:
 
 ; test that on gnueabi a 64 bit value at this position will cause r3 to go
 ; unused and the value stored in [sp]
-; ELF: f3:
+; ELF-LABEL: f3:
 ; ELF: ldr r0, [sp]
 ; ELF-NEXT: mov pc, lr
-; DARWIN: f3:
+; DARWIN-LABEL: f3:
 ; DARWIN: mov r0, r3
 ; DARWIN-NEXT: mov pc, lr
 define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) {
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 07620700aedb..88d797e83648 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
 ; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=V8 %s
 
 ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
 
@@ -39,6 +40,17 @@ tailrecurse:                                      ; preds = %sw.bb, %entry
   br i1 %tst, label %sw.bb, label %tailrecurse.switch
 
 tailrecurse.switch:                               ; preds = %tailrecurse
+; V8-LABEL: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: b	
+; The trailing space in the last line checks that the branch is unconditional
   switch i32 %and, label %sw.epilog [
     i32 1, label %sw.bb
     i32 3, label %sw.bb6
@@ -73,6 +85,7 @@ sw.epilog:                                        ; preds = %tailrecurse.switch
 ; ARM: bar
 ; THUMB: bar
 ; T2: bar
+; V8-LABEL: bar:
 define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
 entry:
   %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
@@ -81,22 +94,32 @@ entry:
 ; ARM: ands
 ; THUMB: ands
 ; T2: ands
+; V8: ands
+; V8-NEXT: beq
   %3 = and i32 %2, 112
   %4 = icmp eq i32 %3, 0
   br i1 %4, label %return, label %bb
 
 bb:                                               ; preds = %entry
+; V8-NEXT: %bb
   %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
   %6 = load i8* %5, align 1
   %7 = zext i8 %6 to i32
 ; ARM: andsne
 ; THUMB: ands
 ; T2: andsne
+; V8: ands
+; V8-NEXT: beq
   %8 = and i32 %7, 112
   %9 = icmp eq i32 %8, 0
   br i1 %9, label %return, label %bb2
 
 bb2:                                              ; preds = %bb
+; V8-NEXT: %bb2
+; V8-NEXT: cmp
+; V8-NEXT: it	ne
+; V8-NEXT: cmpne
+; V8-NEXT: bne
   %10 = icmp eq i32 %3, 16
   %11 = icmp eq i32 %8, 16
   %or.cond = or i1 %10, %11
diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll
index 2cf1422c66a9..9c4173ef0ce2 100644
--- a/test/CodeGen/ARM/arm-frameaddr.ll
+++ b/test/CodeGen/ARM/arm-frameaddr.ll
@@ -5,10 +5,10 @@
 
 define i8* @t() nounwind {
 entry:
-; DARWIN: t:
+; DARWIN-LABEL: t:
 ; DARWIN: mov r0, r7
 
-; LINUX: t:
+; LINUX-LABEL: t:
 ; LINUX: mov r0, r11
 	%0 = call i8* @llvm.frameaddress(i32 0)
         ret i8* %0
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index c74701663459..854864277720 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -60,8 +60,14 @@ ret void
 
 define i64 @f4(i64* %val) nounwind {
 entry:
-  ;CHECK: f4
+  ;CHECK-LABEL: f4:
   ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
   %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind
   ret i64 %0
 }
+
+; PR16490
+define void @f5(i64 %__pu_val) {
+  call void asm sideeffect "$1", "r,i"(i64 %__pu_val, i32 -14)
+  ret void
+}
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 1272e8efc26b..4266572b077f 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -7,7 +7,7 @@
 
 define i8* @rt0(i32 %x) nounwind readnone {
 entry:
-; CHECK: rt0:
+; CHECK-LABEL: rt0:
 ; CHECK: {r7, lr}
 ; CHECK: mov r0, lr
   %0 = tail call i8* @llvm.returnaddress(i32 0)
@@ -16,7 +16,7 @@ entry:
 
 define i8* @rt2() nounwind readnone {
 entry:
-; CHECK: rt2:
+; CHECK-LABEL: rt2:
 ; CHECK: {r7, lr}
 ; CHECK: ldr r[[R0:[0-9]+]], [r7]
 ; CHECK: ldr r0, [r0]
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index f2c7305ff33a..0477d4f40160 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,155 +1,155 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-THUMB
 
 define i64 @test1(i64* %ptr, i64 %val) {
-; CHECK: test1:
-; CHECK: dmb ish
+; CHECK-LABEL: test1:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: adds [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: adc [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test1:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test1:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw add i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test2(i64* %ptr, i64 %val) {
-; CHECK: test2:
-; CHECK: dmb ish
+; CHECK-LABEL: test2:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: subs [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test2:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test2:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test3(i64* %ptr, i64 %val) {
-; CHECK: test3:
-; CHECK: dmb ish
+; CHECK-LABEL: test3:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test3:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test3:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw and i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test4(i64* %ptr, i64 %val) {
-; CHECK: test4:
-; CHECK: dmb ish
+; CHECK-LABEL: test4:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test4:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test4:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw or i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test5(i64* %ptr, i64 %val) {
-; CHECK: test5:
-; CHECK: dmb ish
+; CHECK-LABEL: test5:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test5:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test5:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test6(i64* %ptr, i64 %val) {
-; CHECK: test6:
-; CHECK: dmb ish
+; CHECK-LABEL: test6:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test6:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test6:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
-; CHECK: test7:
-; CHECK: dmb ish
+; CHECK-LABEL: test7:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: cmp [[REG1]]
 ; CHECK: cmpeq [[REG2]]
@@ -157,10 +157,10 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test7:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test7:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: cmp [[REG1]]
 ; CHECK-THUMB: it eq
@@ -169,35 +169,21 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
   ret i64 %r
 }
 
-; Compiles down to cmpxchg
-; FIXME: Should compile to a single ldrexd
+; Compiles down to a single ldrexd
 define i64 @test8(i64* %ptr) {
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: cmp [[REG1]]
-; CHECK: cmpeq [[REG2]]
-; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
-; CHECK: cmp
-; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test8:
+; CHECK-THUMB-LABEL: test8:
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: cmp [[REG1]]
-; CHECK-THUMB: it eq
-; CHECK-THUMB: cmpeq [[REG2]]
-; CHECK-THUMB: bne
-; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
-; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = load atomic i64* %ptr seq_cst, align 8
   ret i64 %r
@@ -206,29 +192,29 @@ define i64 @test8(i64* %ptr) {
 ; Compiles down to atomicrmw xchg; there really isn't any more efficient
 ; way to write it.
 define void @test9(i64* %ptr, i64 %val) {
-; CHECK: test9:
-; CHECK: dmb ish
+; CHECK-LABEL: test9:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test9:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test9:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   store atomic i64 %val, i64* %ptr seq_cst, align 8
   ret void
 }
 
 define i64 @test10(i64* %ptr, i64 %val) {
-; CHECK: test10:
-; CHECK: dmb ish
+; CHECK-LABEL: test10:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
 ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
@@ -236,10 +222,10 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test10:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test10:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
 ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
@@ -247,15 +233,15 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw min i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test11(i64* %ptr, i64 %val) {
-; CHECK: test11:
-; CHECK: dmb ish
+; CHECK-LABEL: test11:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
 ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
@@ -263,11 +249,11 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
 
-; CHECK-THUMB: test11:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test11:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
 ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
@@ -275,15 +261,15 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test12(i64* %ptr, i64 %val) {
-; CHECK: test12:
-; CHECK: dmb ish
+; CHECK-LABEL: test12:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
 ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
@@ -291,10 +277,10 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test12:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test12:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
 ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
@@ -302,15 +288,15 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
 
   %r = atomicrmw max i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test13(i64* %ptr, i64 %val) {
-; CHECK: test13:
-; CHECK: dmb ish
+; CHECK-LABEL: test13:
+; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
 ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
 ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
@@ -318,10 +304,10 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
-; CHECK: dmb ish
+; CHECK: dmb {{ish$}}
 
-; CHECK-THUMB: test13:
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB-LABEL: test13:
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
 ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
@@ -329,7 +315,7 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
-; CHECK-THUMB: dmb ish
+; CHECK-THUMB: dmb {{ish$}}
   %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
index 82726daebca3..51ada693d0b8 100644
--- a/test/CodeGen/ARM/atomic-cmp.ll
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -3,11 +3,11 @@
 ; rdar://8964854
 
 define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
-; ARM: t:
+; ARM-LABEL: t:
 ; ARM: ldrexb
 ; ARM: strexb
 
-; T2: t:
+; T2-LABEL: t:
 ; T2: ldrexb
 ; T2: strexb
   %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic
diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll
index 12a8fe4cd884..53c7184d2a84 100644
--- a/test/CodeGen/ARM/atomic-load-store.ll
+++ b/test/CodeGen/ARM/atomic-load-store.ll
@@ -1,19 +1,20 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s -check-prefix=THUMBTWO
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO
 ; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE
+; RUN  llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4
 
 define void @test1(i32* %ptr, i32 %val1) {
 ; ARM: test1
-; ARM: dmb ish
+; ARM: dmb {{ish$}}
 ; ARM-NEXT: str
-; ARM-NEXT: dmb ish
+; ARM-NEXT: dmb {{ish$}}
 ; THUMBONE: test1
 ; THUMBONE: __sync_lock_test_and_set_4
 ; THUMBTWO: test1
-; THUMBTWO: dmb ish
+; THUMBTWO: dmb {{ish$}}
 ; THUMBTWO-NEXT: str
-; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: dmb {{ish$}}
   store atomic i32 %val1, i32* %ptr seq_cst, align 4
   ret void
 }
@@ -21,12 +22,12 @@ define void @test1(i32* %ptr, i32 %val1) {
 define i32 @test2(i32* %ptr) {
 ; ARM: test2
 ; ARM: ldr
-; ARM-NEXT: dmb ish
+; ARM-NEXT: dmb {{ish$}}
 ; THUMBONE: test2
 ; THUMBONE: __sync_val_compare_and_swap_4
 ; THUMBTWO: test2
 ; THUMBTWO: ldr
-; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: dmb {{ish$}}
   %val = load atomic i32* %ptr seq_cst, align 4
   ret i32 %val
 }
@@ -54,3 +55,17 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
   store atomic i8 %val, i8* %ptr2 seq_cst, align 1
   ret void
 }
+
+define i64 @test_old_load_64bit(i64* %p) {
+; ARMV4: test_old_load_64bit
+; ARMV4: ___sync_val_compare_and_swap_8
+  %1 = load atomic i64* %p seq_cst, align 8
+  ret i64 %1
+}
+
+define void @test_old_store_64bit(i64* %p, i64 %v) {
+; ARMV4: test_old_store_64bit
+; ARMV4: ___sync_lock_test_and_set_8
+  store atomic i64 %v, i64* %p seq_cst, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 6e6b36377fde..9a79c9fd7b1b 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -24,78 +26,93 @@ entry:
   ; CHECK: ldrex
   ; CHECK: add
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_add_4
   %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
 	store i32 %0, i32* %old
   ; CHECK: ldrex
   ; CHECK: sub
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_sub_4
   %1 = atomicrmw sub i32* %val2, i32 30 monotonic
 	store i32 %1, i32* %old
   ; CHECK: ldrex
   ; CHECK: add
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_add_4
   %2 = atomicrmw add i32* %val2, i32 1 monotonic
 	store i32 %2, i32* %old
   ; CHECK: ldrex
   ; CHECK: sub
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_sub_4
   %3 = atomicrmw sub i32* %val2, i32 1 monotonic
 	store i32 %3, i32* %old
   ; CHECK: ldrex
   ; CHECK: and
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_and_4
   %4 = atomicrmw and i32* %andt, i32 4080 monotonic
 	store i32 %4, i32* %old
   ; CHECK: ldrex
   ; CHECK: or
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_or_4
   %5 = atomicrmw or i32* %ort, i32 4080 monotonic
 	store i32 %5, i32* %old
   ; CHECK: ldrex
   ; CHECK: eor
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_xor_4
   %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
 	store i32 %6, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_min_4
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
 	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_min_4
   %8 = atomicrmw min i32* %val2, i32 %neg monotonic
 	store i32 %8, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_max_4
   %9 = atomicrmw max i32* %val2, i32 1 monotonic
 	store i32 %9, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_max_4
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_4
   %11 = atomicrmw umin i32* %val2, i32 16 monotonic
 	store i32 %11, i32* %old
 	%uneg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_4
   %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
 	store i32 %12, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_4
   %13 = atomicrmw umax i32* %val2, i32 1 monotonic
 	store i32 %13, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_4
   %14 = atomicrmw umax i32* %val2, i32 0 monotonic
 	store i32 %14, i32* %old
 
@@ -110,22 +127,26 @@ entry:
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_2
   %0 = atomicrmw umin i16* %val, i16 16 monotonic
   store i16 %0, i16* %old
   %uneg = sub i16 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_2
   %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
   store i16 %1, i16* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_2
   %2 = atomicrmw umax i16* %val, i16 1 monotonic
   store i16 %2, i16* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_2
   %3 = atomicrmw umax i16* %val, i16 0 monotonic
   store i16 %3, i16* %old
   ret void
@@ -139,22 +160,26 @@ entry:
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_1
   %0 = atomicrmw umin i8* %val, i8 16 monotonic
   store i8 %0, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umin_1
   %uneg = sub i8 0, 1
   %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
   store i8 %1, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_1
   %2 = atomicrmw umax i8* %val, i8 1 monotonic
   store i8 %2, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
+  ; CHECK-T1: blx ___sync_fetch_and_umax_1
   %3 = atomicrmw umax i8* %val, i8 0 monotonic
   store i8 %3, i8* %old
   ret void
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
new file mode 100644
index 000000000000..3f93929fd19d
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -0,0 +1,1344 @@
+; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8:
+   %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16:
+   %old = atomicrmw add i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32:
+   %old = atomicrmw add i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64:
+   %old = atomicrmw add i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: adc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8:
+   %old = atomicrmw sub i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16:
+   %old = atomicrmw sub i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32:
+   %old = atomicrmw sub i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64:
+   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8:
+   %old = atomicrmw and i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16:
+   %old = atomicrmw and i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32:
+   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64:
+   %old = atomicrmw and i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8:
+   %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16:
+   %old = atomicrmw or i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32:
+   %old = atomicrmw or i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64:
+   %old = atomicrmw or i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8:
+   %old = atomicrmw xor i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16:
+   %old = atomicrmw xor i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32:
+   %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64:
+   %old = atomicrmw xor i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8:
+   %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16:
+   %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32:
+   %old = atomicrmw xchg i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64:
+   %old = atomicrmw xchg i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8:
+   %old = atomicrmw min i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ge
+; CHECK:      movge r[[OLDX]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16:
+   %old = atomicrmw min i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ge
+; CHECK:      movge r[[OLDX]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32:
+   %old = atomicrmw min i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lt
+; CHECK:      movlt r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64:
+   %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: blt .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8:
+   %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it le
+; CHECK:      movle r[[OLDX]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16:
+   %old = atomicrmw max i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it le
+; CHECK:      movle r[[OLDX]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32:
+   %old = atomicrmw max i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it gt
+; CHECK:      movgt r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64:
+   %old = atomicrmw max i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: bge .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8:
+   %old = atomicrmw umin i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK:      movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16:
+   %old = atomicrmw umin i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK:      movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32:
+   %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK:      movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64:
+   %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: blo .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8:
+   %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK:      movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16:
+   %old = atomicrmw umax i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK:      movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32:
+   %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK:      movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64:
+   %old = atomicrmw umax i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8:
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16:
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i32:
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+  ; r0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i64:
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp   [[OLD1]], r0
+; Thumb mode: it eq
+; CHECK:      cmpeq [[OLD2]], r1
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+  ; As above, r2, r3 is a reasonable guess.
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, [[OLD1]]
+; CHECK-NEXT: mov r1, [[OLD2]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_monotonic_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_i8:
+  %val = load atomic i8* @var8 monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: ldrb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  %val = load atomic i8* %addr monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldrb r0, [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_acquire_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_acquire_i8:
+  %val = load atomic i8* @var8 acquire, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldab r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_seq_cst_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_seq_cst_i8:
+  %val = load atomic i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldab r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret i8 %val
+}
+
+define i16 @test_atomic_load_monotonic_i16() nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_i16:
+  %val = load atomic i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldrh r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i16 %val
+}
+
+define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  %val = load atomic i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldr r0, [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret i32 %val
+}
+
+define i64 @test_atomic_load_seq_cst_i64() nounwind {
+; CHECK-LABEL: test_atomic_load_seq_cst_i64:
+  %val = load atomic i64* @var64 seq_cst, align 8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var64
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldaexd r0, r1, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret i64 %val
+}
+
+define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_i8:
+  store atomic i8 %val, i8* @var8 monotonic, align 1
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: strb r0, [r[[ADDR]]]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  store atomic i8 %val, i8* %addr monotonic, align 1
+; CHECK: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
+; CHECK: strb [[VAL]], [r0, r2]
+
+  ret void
+}
+
+define void @test_atomic_store_release_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_release_i8:
+  store atomic i8 %val, i8* @var8 release, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: stlb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret void
+}
+
+define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_seq_cst_i8:
+  store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: stlb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret void
+}
+
+define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_i16:
+  store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: strh r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldr [[VAL:r[0-9]+]], [sp]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: str [[VAL]], [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret void
+}
+
+define void @test_atomic_store_release_i64(i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_release_i64:
+  store atomic i64 %val, i64* @var64 release, align 8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+  ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+  ret void
+}
+
+define i32 @not.barriers(i32* %var, i1 %cond) {
+; CHECK-LABEL: not.barriers:
+  br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+  %oldval = load i32* %var
+  %newval = add nsw i32 %oldval, -1
+  store i32 %newval, i32* %var
+  br label %somewhere
+atomic_ver:
+  fence seq_cst
+  %val = atomicrmw add i32* %var, i32 -1 monotonic
+  fence seq_cst
+  br label %somewhere
+; CHECK: dmb
+; CHECK: ldrex
+; CHECK: dmb
+  ; The key point here is that the second dmb isn't immediately followed by the
+  ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+  ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+  %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+  ret i32 %combined
+}
diff --git a/test/CodeGen/ARM/atomicrmw_minmax.ll b/test/CodeGen/ARM/atomicrmw_minmax.ll
index 69f1384e125c..5befc228e03c 100644
--- a/test/CodeGen/ARM/atomicrmw_minmax.ll
+++ b/test/CodeGen/ARM/atomicrmw_minmax.ll
@@ -1,6 +1,6 @@
 ;  RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
 
-;  CHECK: max:
+;  CHECK-LABEL: max:
 define i32 @max(i8 %ctx, i32* %ptr, i32 %val)
 {
 ;  CHECK: ldrex
@@ -10,7 +10,7 @@ define i32 @max(i8 %ctx, i32* %ptr, i32 %val)
   ret i32 %old
 }
 
-;  CHECK: min:
+;  CHECK-LABEL: min:
 define i32 @min(i8 %ctx, i32* %ptr, i32 %val)
 {
 ;  CHECK: ldrex
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index c14f5302d311..0217a4a8fb83 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -6,7 +6,7 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
 ; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
 ; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
@@ -20,7 +20,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
 ; rdar://10357570
 define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
   %tobool7 = icmp eq i32* %ptr2, null
   br i1 %tobool7, label %while.end, label %while.body
 
@@ -54,7 +54,7 @@ while.end:
 ; rdar://12878928
 define void @t3(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind minsize {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
   %tobool7 = icmp eq i32* %ptr2, null
   br i1 %tobool7, label %while.end, label %while.body
 
diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll
index c4a44b4472d1..3a17d2b8cf99 100644
--- a/test/CodeGen/ARM/bfc.ll
+++ b/test/CodeGen/ARM/bfc.ll
@@ -2,7 +2,7 @@
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: bfc
     %tmp = and i32 %a, 4278190095
     ret i32 %tmp
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
 
 ; 4286578688 = 0xff800000
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: bfc
     %tmp = and i32 %a, 4286578688
     ret i32 %tmp
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a) {
 
 ; 4095 = 0x00000fff
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: bfc
     %tmp = and i32 %a, 4095
     ret i32 %tmp
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 84f3813975a9..72a467809978 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -52,7 +52,7 @@ define i32 @f4(i32 %a) nounwind {
 ; rdar://8458663
 define i32 @f5(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: bfc
 ; CHECK: bfi r0, r1, #20, #4
   %0 = and i32 %a, -15728641
@@ -65,7 +65,7 @@ entry:
 ; rdar://9609030
 define i32 @f6(i32 %a, i32 %b) nounwind readnone {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: bic
 ; CHECK: bfi r0, r1, #8, #9
   %and = and i32 %a, -130817
diff --git a/test/CodeGen/ARM/bswap-inline-asm.ll b/test/CodeGen/ARM/bswap-inline-asm.ll
index 472213d5f85f..31f9d729cf6e 100644
--- a/test/CodeGen/ARM/bswap-inline-asm.ll
+++ b/test/CodeGen/ARM/bswap-inline-asm.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 | FileCheck %s
 
 define i32 @t1(i32 %x) nounwind {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK-NOT: InlineAsm
 ; CHECK: rev
   %asmtmp = tail call i32 asm "rev $0, $1\0A", "=l,l"(i32 %x) nounwind
diff --git a/test/CodeGen/ARM/build-attributes-encoding.s b/test/CodeGen/ARM/build-attributes-encoding.s
new file mode 100644
index 000000000000..5ad51b284113
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-encoding.s
@@ -0,0 +1,85 @@
+// This tests that ARM attributes are properly encoded.
+
+// RUN: llvm-mc < %s -triple=arm-linux-gnueabi -filetype=obj -o - \
+// RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+// Tag_CPU_name (=5)
+.cpu Cortex-A8
+
+// Tag_CPU_arch (=6)
+.eabi_attribute 6, 10
+
+// Tag_arch_profile (=7)
+.eabi_attribute 7, 'A'
+
+// Tag_ARM_ISA_use (=8)
+.eabi_attribute 8, 1
+
+// Tag_THUMB_ISA_use (=9)
+.eabi_attribute 9, 2
+
+// Tag_FP_arch (=10)
+.fpu vfpv3
+
+// Tag_Advanced_SIMD_arch (=12)
+.eabi_attribute 12, 2
+
+// Tag_ABI_FP_denormal (=20)
+.eabi_attribute 20, 1
+
+// Tag_ABI_FP_exceptions (=21)
+.eabi_attribute 21, 1
+
+// Tag_ABI_FP_number_model (=23)
+.eabi_attribute 23, 1
+
+// Tag_ABI_align_needed (=24)
+.eabi_attribute 24, 1
+
+// Tag_ABI_align_preserved (=25)
+.eabi_attribute 25, 1
+
+// Tag_ABI_HardFP_use (=27)
+.eabi_attribute 27, 0
+
+// Tag_ABI_VFP_args (=28)
+.eabi_attribute 28, 1
+
+// Tag_FP_HP_extension (=36)
+.eabi_attribute 36, 1
+
+// Tag_MPextension_use (=42)
+.eabi_attribute 42, 1
+
+// Tag_DIV_use (=44)
+.eabi_attribute 44, 2
+
+// Tag_Virtualization_use (=68)
+.eabi_attribute 68, 3
+
+// Check that values > 128 are encoded properly
+.eabi_attribute 110, 160
+
+// Check that tags > 128 are encoded properly
+.eabi_attribute 129, 1
+.eabi_attribute 250, 1
+
+// CHECK:        Section {
+// CHECK:          Name: .ARM.attributes
+// CHECK-NEXT:     Type: SHT_ARM_ATTRIBUTES
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x34
+// CHECK-NEXT:     Size: 70
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 41450000 00616561 62690001 3B000000
+// CHECK-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
+// CHECK-NEXT:       0020: 0109020A 030C0214 01150117 01180119
+// CHECK-NEXT:       0030: 011B001C 0124012A 012C0244 036EA001
+// CHECK-NEXT:       0040: 810101FA 0101
+// CHECK-NEXT:     )
diff --git a/test/CodeGen/ARM/byval_load_align.ll b/test/CodeGen/ARM/byval_load_align.ll
new file mode 100644
index 000000000000..2c0910c71d2f
--- /dev/null
+++ b/test/CodeGen/ARM/byval_load_align.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple thumbv7-apple-ios -O1 | FileCheck %s
+
+; rdar://15144402
+; Make sure we don't assume 4-byte alignment when loading from a byval argument
+; with alignment of 2.
+; CHECK: ldr r1, [r[[REG:[0-9]+]]]
+; CHECK: ldr r2, [r[[REG]], #4]
+; CHECK: ldr r3, [r[[REG]], #8]
+; CHECK-NOT: ldm
+; CHECK: .align	1 @ @sID
+
+%struct.ModuleID = type { [32 x i8], [32 x i8], i16 }
+
+@sID = internal constant %struct.ModuleID { [32 x i8] c"TEST\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [32 x i8] c"1.0\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", i16 23 }, align 2
+
+; Function Attrs: nounwind ssp
+define void @Client() #0 {
+entry:
+  tail call void @Logger(i8 signext 97, %struct.ModuleID* byval @sID) #2
+  ret void
+}
+
+declare void @Logger(i8 signext, %struct.ModuleID* byval) #1
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/ARM/call-noret-minsize.ll b/test/CodeGen/ARM/call-noret-minsize.ll
index df3c19eca6a0..e610d29d77fc 100644
--- a/test/CodeGen/ARM/call-noret-minsize.ll
+++ b/test/CodeGen/ARM/call-noret-minsize.ll
@@ -4,10 +4,10 @@
 
 define void @t1() noreturn minsize nounwind ssp {
 entry:
-; ARM: t1:
+; ARM-LABEL: t1:
 ; ARM: bl _bar
 
-; SWIFT: t1:
+; SWIFT-LABEL: t1:
 ; SWIFT: bl _bar
   tail call void @bar() noreturn nounwind
   unreachable
@@ -15,10 +15,10 @@ entry:
 
 define void @t2() noreturn minsize nounwind ssp {
 entry:
-; ARM: t2:
+; ARM-LABEL: t2:
 ; ARM: bl _t1
 
-; SWIFT: t2:
+; SWIFT-LABEL: t2:
 ; SWIFT: bl _t1
   tail call void @t1() noreturn nounwind
   unreachable
diff --git a/test/CodeGen/ARM/call-noret.ll b/test/CodeGen/ARM/call-noret.ll
index 27062dca38dc..bb56e8b86336 100644
--- a/test/CodeGen/ARM/call-noret.ll
+++ b/test/CodeGen/ARM/call-noret.ll
@@ -4,11 +4,11 @@
 
 define void @t1() noreturn nounwind ssp {
 entry:
-; ARM: t1:
+; ARM-LABEL: t1:
 ; ARM: mov lr, pc
 ; ARM: b _bar
 
-; SWIFT: t1:
+; SWIFT-LABEL: t1:
 ; SWIFT: mov lr, pc
 ; SWIFT: b _bar
   tail call void @bar() noreturn nounwind
@@ -17,11 +17,11 @@ entry:
 
 define void @t2() noreturn nounwind ssp {
 entry:
-; ARM: t2:
+; ARM-LABEL: t2:
 ; ARM: mov lr, pc
 ; ARM: b _t1
 
-; SWIFT: t2:
+; SWIFT-LABEL: t2:
 ; SWIFT: mov lr, pc
 ; SWIFT: b _t1
   tail call void @t1() noreturn nounwind
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index 58fbbda0f6bd..d4636021b599 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -11,16 +11,16 @@
 declare void @g(i32, i32, i32, i32)
 
 define void @t1() {
-; CHECKELF: t1:
+; CHECKELF-LABEL: t1:
 ; CHECKELF: bl g(PLT)
         call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
 
 define void @t2() {
-; CHECKV6: t2:
+; CHECKV6-LABEL: t2:
 ; CHECKV6: bx r0
-; CHECKT2D: t2:
+; CHECKT2D-LABEL: t2:
 ; CHECKT2D: ldr
 ; CHECKT2D-NEXT: ldr
 ; CHECKT2D-NEXT: bx r0
@@ -30,11 +30,11 @@ define void @t2() {
 }
 
 define void @t3() {
-; CHECKV6: t3:
+; CHECKV6-LABEL: t3:
 ; CHECKV6: b _t2
-; CHECKELF: t3:
+; CHECKELF-LABEL: t3:
 ; CHECKELF: b t2(PLT)
-; CHECKT2D: t3:
+; CHECKT2D-LABEL: t3:
 ; CHECKT2D: b.w _t2
 
         tail call void @t2( )            ; <i32> [#uses=0]
@@ -44,9 +44,9 @@ define void @t3() {
 ; Sibcall optimization of expanded libcalls. rdar://8707777
 define double @t4(double %a) nounwind readonly ssp {
 entry:
-; CHECKV6: t4:
+; CHECKV6-LABEL: t4:
 ; CHECKV6: b _sin
-; CHECKELF: t4:
+; CHECKELF-LABEL: t4:
 ; CHECKELF: b sin(PLT)
   %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
   ret double %0
@@ -54,9 +54,9 @@ entry:
 
 define float @t5(float %a) nounwind readonly ssp {
 entry:
-; CHECKV6: t5:
+; CHECKV6-LABEL: t5:
 ; CHECKV6: b _sinf
-; CHECKELF: t5:
+; CHECKELF-LABEL: t5:
 ; CHECKELF: b sinf(PLT)
   %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
   ret float %0
@@ -68,9 +68,9 @@ declare double @sin(double) nounwind readonly
 
 define i32 @t6(i32 %a, i32 %b) nounwind readnone {
 entry:
-; CHECKV6: t6:
+; CHECKV6-LABEL: t6:
 ; CHECKV6: b ___divsi3
-; CHECKELF: t6:
+; CHECKELF-LABEL: t6:
 ; CHECKELF: b __aeabi_idiv(PLT)
   %0 = sdiv i32 %a, %b
   ret i32 %0
@@ -82,7 +82,7 @@ declare void @foo() nounwind
 
 define void @t7() nounwind {
 entry:
-; CHECKT2D: t7:
+; CHECKT2D-LABEL: t7:
 ; CHECKT2D: blxeq _foo
 ; CHECKT2D-NEXT: pop.w
 ; CHECKT2D-NEXT: b.w _foo
@@ -101,7 +101,7 @@ bb:
 ; rdar://11140249
 define i32 @t8(i32 %x) nounwind ssp {
 entry:
-; CHECKT2D: t8:
+; CHECKT2D-LABEL: t8:
 ; CHECKT2D-NOT: push
   %and = and i32 %x, 1
   %tobool = icmp eq i32 %and, 0
@@ -147,7 +147,7 @@ declare i32 @c(i32)
 @x = external global i32, align 4
 
 define i32 @t9() nounwind {
-; CHECKT2D: t9:
+; CHECKT2D-LABEL: t9:
 ; CHECKT2D: blx __ZN9MutexLockC1Ev
 ; CHECKT2D: blx __ZN9MutexLockD1Ev
 ; CHECKT2D: b.w ___divsi3
@@ -162,3 +162,20 @@ define i32 @t9() nounwind {
 declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
 
 declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
+
+; rdar://13827621
+; Correctly preserve the input chain for the tailcall node in the bitcast case,
+; otherwise the call to floorf is lost.
+define float @libcall_tc_test2(float* nocapture %a, float %b) {
+; CHECKT2D-LABEL: libcall_tc_test2:
+; CHECKT2D: blx _floorf
+; CHECKT2D: b.w _truncf
+  %1 = load float* %a, align 4
+  %call = tail call float @floorf(float %1)
+  store float %call, float* %a, align 4
+  %call1 = tail call float @truncf(float %b)
+  ret float %call1
+}
+
+declare float @floorf(float) readnone
+declare float @truncf(float) readnone
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index 5ec7f74a605f..48fa3a62ffb0 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -7,7 +7,7 @@
 @numi = external global i32		; <i32*> [#uses=1]
 @counter = external global [2 x i32]		; <[2 x i32]*> [#uses=1]
 
-; CHECK: main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i:
+; CHECK-LABEL: main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i:
 ; CHECK-NOT: bx lr
 
 define void @main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i() {
@@ -56,7 +56,7 @@ define void @PR15520(void ()* %fn) {
   call void %fn()
   ret void
 
-; CHECK: PR15520:
+; CHECK-LABEL: PR15520:
 ; CHECK: mov lr, pc
 ; CHECK: mov pc, r0
 }
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index bf51cd627b3c..f67987f8eb61 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: subs r
 ; CHECK: sbc r
 entry:
@@ -10,7 +10,7 @@ entry:
 }
 
 define i64 @f2(i64 %a, i64 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: adc r
 ; CHECK: subs r
 ; CHECK: sbc r
@@ -22,7 +22,7 @@ entry:
 
 ; add with live carry
 define i64 @f3(i32 %al, i32 %bl) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: adds r
 ; CHECK: adc r
 entry:
@@ -39,7 +39,7 @@ entry:
 ; rdar://10073745
 define i64 @f4(i64 %x) nounwind readnone {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: rsbs r
 ; CHECK: rsc r
   %0 = sub nsw i64 0, %x
@@ -49,7 +49,7 @@ entry:
 ; rdar://12559385
 define i64 @f5(i32 %vi) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: movw [[REG:r[0-9]+]], #36102
 ; CHECK: sbc r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
     %v0 = zext i32 %vi to i64
diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll
new file mode 100644
index 000000000000..86106a045201
--- /dev/null
+++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -0,0 +1,111 @@
+; RUN: llc < %s -verify-machineinstrs
+; PR16110
+;
+; This test case contains a value that is split into two connected components
+; by rematerialization during coalescing. It also contains a DBG_VALUE
+; instruction which must be updated during
+; ConnectedVNInfoEqClasses::Distribute().
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@a = common global i64 0, align 8
+@d = common global i32 0, align 4
+
+; Function Attrs: nounwind ssp
+define i32 @pr16110() #0 {
+for.cond1.preheader:
+  store i32 0, i32* @c, align 4, !dbg !21
+  br label %for.cond1.outer, !dbg !26
+
+for.cond1:                                        ; preds = %for.end9, %for.cond1.outer
+  %storemerge11 = phi i32 [ 0, %for.end9 ], [ %storemerge11.ph, %for.cond1.outer ]
+  %cmp = icmp slt i32 %storemerge11, 1, !dbg !26
+  br i1 %cmp, label %for.body2, label %for.end9, !dbg !26
+
+for.body2:                                        ; preds = %for.cond1
+  store i32 %storemerge11, i32* @b, align 4, !dbg !26
+  tail call void @llvm.dbg.value(metadata !27, i64 0, metadata !11), !dbg !28
+  %0 = load i64* @a, align 8, !dbg !29
+  %xor = xor i64 %0, %e.1.ph, !dbg !29
+  %conv3 = trunc i64 %xor to i32, !dbg !29
+  tail call void @llvm.dbg.value(metadata !{i32 %conv3}, i64 0, metadata !10), !dbg !29
+  %tobool4 = icmp eq i32 %conv3, 0, !dbg !29
+  br i1 %tobool4, label %land.end, label %land.rhs, !dbg !29
+
+land.rhs:                                         ; preds = %for.body2
+  %call = tail call i32 bitcast (i32 (...)* @fn3 to i32 ()*)() #3, !dbg !29
+  %tobool5 = icmp ne i32 %call, 0, !dbg !29
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %for.body2
+  %1 = phi i1 [ false, %for.body2 ], [ %tobool5, %land.rhs ]
+  %land.ext = zext i1 %1 to i32
+  %call6 = tail call i32 bitcast (i32 (...)* @fn2 to i32 (i32, i32*)*)(i32 %land.ext, i32* null) #3
+  %2 = load i32* @b, align 4, !dbg !26
+  %inc8 = add nsw i32 %2, 1, !dbg !26
+  %phitmp = and i64 %xor, 4294967295, !dbg !26
+  br label %for.cond1.outer, !dbg !26
+
+for.cond1.outer:                                  ; preds = %land.end, %for.cond1.preheader
+  %storemerge11.ph = phi i32 [ %inc8, %land.end ], [ 0, %for.cond1.preheader ]
+  %e.1.ph = phi i64 [ %phitmp, %land.end ], [ 0, %for.cond1.preheader ]
+  %3 = load i32* @d, align 4, !dbg !31
+  %tobool10 = icmp eq i32 %3, 0, !dbg !31
+  br label %for.cond1
+
+for.end9:                                         ; preds = %for.cond1
+  br i1 %tobool10, label %if.end, label %for.cond1, !dbg !31
+
+if.end:                                           ; preds = %for.end9
+  store i32 %storemerge11, i32* @b, align 4, !dbg !26
+  ret i32 0, !dbg !32
+}
+
+declare i32 @fn2(...) #1
+
+declare i32 @fn3(...) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 182024) (llvm/trunk 182023)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"pr16110.c", metadata !"/d/b"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"pr16110", metadata !"pr16110", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @pr16110, null, null, metadata !9, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [pr16110]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/d/b/pr16110.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10, metadata !11}
+!10 = metadata !{i32 786688, metadata !4, metadata !"e", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 8]
+!11 = metadata !{i32 786688, metadata !12, metadata !"f", metadata !5, i32 13, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 13]
+!12 = metadata !{i32 786443, metadata !1, metadata !13, i32 12, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
+!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 12, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
+!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!15 = metadata !{metadata !16, metadata !18, metadata !19, metadata !20}
+!16 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !5, i32 1, metadata !17, i32 0, i32 1, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!17 = metadata !{i32 786468, null, null, metadata !"long long int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long long int] [line 0, size 64, align 32, offset 0, enc DW_ATE_signed]
+!18 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !5, i32 2, metadata !8, i32 0, i32 1, i32* @b, null} ; [ DW_TAG_variable ] [b] [line 2] [def]
+!19 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !5, i32 3, metadata !8, i32 0, i32 1, i32* @c, null} ; [ DW_TAG_variable ] [c] [line 3] [def]
+!20 = metadata !{i32 786484, i32 0, null, metadata !"d", metadata !"d", metadata !"", metadata !5, i32 4, metadata !8, i32 0, i32 1, i32* @d, null} ; [ DW_TAG_variable ] [d] [line 4] [def]
+!21 = metadata !{i32 10, i32 0, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !1, metadata !4, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
+!26 = metadata !{i32 12, i32 0, metadata !13, null}
+!27 = metadata !{i32* null}
+!28 = metadata !{i32 13, i32 0, metadata !12, null}
+!29 = metadata !{i32 14, i32 0, metadata !12, null}
+!31 = metadata !{i32 16, i32 0, metadata !4, null}
+!32 = metadata !{i32 18, i32 0, metadata !4, null}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 487ec690ea5d..70d85c91c8ca 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -7,7 +7,7 @@
 
 define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
   %0 = icmp eq %struct.list_head* %list, null
   br i1 %0, label %bb2, label %bb
 
@@ -33,7 +33,7 @@ bb2:
 ; rdar://8117827
 define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: beq LBB1_[[RET:.]]
   %0 = icmp eq i32 %passes, 0                     ; <i1> [#uses=1]
   br i1 %0, label %bb5, label %bb.nph15
diff --git a/test/CodeGen/ARM/constantfp.ll b/test/CodeGen/ARM/constantfp.ll
new file mode 100644
index 000000000000..974bdd729efc
--- /dev/null
+++ b/test/CodeGen/ARM/constantfp.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=swift %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEONFP %s
+; RUN: llc -mtriple=armv7 -mattr=-neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEON %s
+
+define arm_aapcs_vfpcc float @test_vmov_f32() {
+; CHECK-LABEL: test_vmov_f32:
+; CHECK: vmov.f32 d0, #1.0
+
+; CHECK-NONEONFP: vmov.f32 s0, #1.0
+  ret float 1.0
+}
+
+define arm_aapcs_vfpcc float @test_vmov_imm() {
+; CHECK-LABEL: test_vmov_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret float 0.0
+}
+
+define arm_aapcs_vfpcc float @test_vmvn_imm() {
+; CHECK-LABEL: test_vmvn_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_imm:
+; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret float 8589934080.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_f64() {
+; CHECK-LABEL: test_vmov_f64:
+; CHECK: vmov.f64 d0, #1.0
+
+; CHECK-NONEON-LABEL: test_vmov_f64:
+; CHECK_NONEON: vmov.f64 d0, #1.0
+
+  ret double 1.0
+}
+
+define arm_aapcs_vfpcc double @test_vmov_double_imm() {
+; CHECK-LABEL: test_vmov_double_imm:
+; CHECK: vmov.i32 d0, #0
+
+; CHECK-NONEON-LABEL: test_vmov_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0.0
+}
+
+define arm_aapcs_vfpcc double @test_vmvn_double_imm() {
+; CHECK-LABEL: test_vmvn_double_imm:
+; CHECK: vmvn.i32 d0, #0xb0000000
+
+; CHECK-NONEON-LABEL: test_vmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0x4fffffff4fffffff
+}
+
+; Make sure we don't ignore the high half of 64-bit values when deciding whether
+; a vmov/vmvn is possible.
+define arm_aapcs_vfpcc double @test_notvmvn_double_imm() {
+; CHECK-LABEL: test_notvmvn_double_imm:
+; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+
+; CHECK-NONEON-LABEL: test_notvmvn_double_imm:
+; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
+  ret double 0x4fffffffffffffff
+}
diff --git a/test/CodeGen/ARM/copy-paired-reg.ll b/test/CodeGen/ARM/copy-paired-reg.ll
new file mode 100644
index 000000000000..17a4461c682b
--- /dev/null
+++ b/test/CodeGen/ARM/copy-paired-reg.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -verify-machineinstrs
+
+define void @f() {
+  %a = alloca i8, i32 8, align 8
+  %b = alloca i8, i32 8, align 8
+
+  %c = bitcast i8* %a to i64*
+  %d = bitcast i8* %b to i64*
+
+  store atomic i64 0, i64* %c seq_cst, align 8
+  store atomic i64 0, i64* %d seq_cst, align 8
+
+  %e = load atomic i64* %d seq_cst, align 8
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/crash-greedy-v6.ll b/test/CodeGen/ARM/crash-greedy-v6.ll
index fd42254767d3..e165dbdf087a 100644
--- a/test/CodeGen/ARM/crash-greedy-v6.ll
+++ b/test/CodeGen/ARM/crash-greedy-v6.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -disable-fp-elim -relocation-model=pic < %s
+; RUN: llc -disable-fp-elim -relocation-model=pic -O0 -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
 target triple = "armv6-apple-ios"
 
 ; Reduced from 177.mesa. This test causes a live range split before an LDR_POST instruction.
@@ -11,6 +12,25 @@ for.body.lr.ph:                                   ; preds = %entry
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
+; SOURCE-SCHED: ldr
+; SOURCE-SCHED: ldr
+; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
+; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
+; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
+; SOURCE-SCHED: add
+; SOURCE-SCHED: str
+; SOURCE-SCHED: str
+; SOURCE-SCHED: str
+; SOURCE-SCHED: str
+; SOURCE-SCHED: ldr
+; SOURCE-SCHED: bl
+; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
+; SOURCE-SCHED: cmp
+; SOURCE-SCHED: bne
   %i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ]
   %arrayidx11 = getelementptr float* %t, i32 %i.031
   %arrayidx15 = getelementptr float* %u, i32 %i.031
diff --git a/test/CodeGen/ARM/crash-shufflevector.ll b/test/CodeGen/ARM/crash-shufflevector.ll
index bdc0e0ea4db0..0ae866800c8c 100644
--- a/test/CodeGen/ARM/crash-shufflevector.ll
+++ b/test/CodeGen/ARM/crash-shufflevector.ll
@@ -7,4 +7,4 @@ define void @f(<4 x i8> %param1, <4 x i8> %param2) {
    %z = shufflevector <16 x i8> %y1, <16 x i8> %y2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
    call void @g(<16 x i8> %z)
    ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
index 5ebca53b4692..2c7efc7c5da5 100644
--- a/test/CodeGen/ARM/ctz.ll
+++ b/test/CodeGen/ARM/ctz.ll
@@ -3,7 +3,7 @@
 declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: rbit
 ; CHECK: clz
   %tmp = call i32 @llvm.cttz.i32( i32 %a, i1 true )
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
index 18f57ea41cd8..8950abdef6a3 100644
--- a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
 
-; CHECK: f:
+; CHECK-LABEL: f:
 define float @f(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
   ; CHECK: vmovl.u16
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
index e9e0fe3239a7..2927ea2f3ca9 100644
--- a/test/CodeGen/ARM/dagcombine-concatvector.ll
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 -mcpu=generic | FileCheck %s
 
 ; PR15525
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: ldr.w	[[REG:r[0-9]+]], [sp]
 ; CHECK-NEXT: vmov	{{d[0-9]+}}, r1, r2
 ; CHECK-NEXT: vmov	{{d[0-9]+}}, r3, [[REG]]
diff --git a/test/CodeGen/ARM/darwin-eabi.ll b/test/CodeGen/ARM/darwin-eabi.ll
new file mode 100644
index 000000000000..f2cde71dd496
--- /dev/null
+++ b/test/CodeGen/ARM/darwin-eabi.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=thumbv7m-apple-darwin -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-M3
+; RUN: llc -mtriple=thumbv7em-apple-darwin -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-M4
+; RUN: llc -mtriple=thumbv7-apple-darwin -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-M3
+; RUN: llc -mtriple=thumbv7-apple-darwin -mcpu=cortex-m4 < %s | FileCheck %s --check-prefix=CHECK-M4
+
+define float @float_op(float %lhs, float %rhs) {
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+; CHECK-M3-LABEL: float_op:
+; CHECK-M3: blx ___addsf3
+
+; CHECK-M4-LABEL: float_op:
+; CHECK-M4: vadd.f32
+}
+
+define double @double_op(double %lhs, double %rhs) {
+  %sum = fadd double %lhs, %rhs
+  ret double %sum
+; CHECK-M3-LABEL: double_op:
+; CHECK-M3: blx ___adddf3
+
+; CHECK-M4-LABEL: double_op:
+; CHECK-M4: blx ___adddf3
+}
diff --git a/test/CodeGen/ARM/data-in-code-annotations.ll b/test/CodeGen/ARM/data-in-code-annotations.ll
index a66a9d1292f0..da70178225eb 100644
--- a/test/CodeGen/ARM/data-in-code-annotations.ll
+++ b/test/CodeGen/ARM/data-in-code-annotations.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
 
 define double @f1() nounwind {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: .data_region
 ; CHECK: .long 1413754129
 ; CHECK: .long 1074340347
@@ -11,7 +11,7 @@ define double @f1() nounwind {
 
 
 define i32 @f2()  {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: .data_region jt32
 ; CHECK: .end_data_region
 
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index c162260dcd0c..e8bf3ba9d61f 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -11,7 +11,7 @@ define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64
   tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %c}, i64 0, metadata !13), !dbg !21
   tail call void @llvm.dbg.value(metadata !{i64 %x}, i64 0, metadata !14), !dbg !22
   tail call void @llvm.dbg.value(metadata !{i64 %y}, i64 0, metadata !17), !dbg !23
-;CHECK:	@DEBUG_VALUE: foo:y <- R7+4294967295
+;CHECK:	@DEBUG_VALUE: foo:y <- [R7+8]
   tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %ptr1}, i64 0, metadata !18), !dbg !24
   tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %ptr2}, i64 0, metadata !19), !dbg !25
   %1 = icmp eq %struct.tag_s* %c, null, !dbg !26
@@ -30,15 +30,16 @@ declare void @foobar(i64, i64)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !30, null,  null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [foo]
 !2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !6 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [tag_s] [line 5, size 96, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !11, metadata !12}
 !9 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"x", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -64,3 +65,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !30 = metadata !{metadata !1}
 !31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19}
 !32 = metadata !{metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772"}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index d0bfecc5af41..6cbe4b4727cd 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
+; CHECK: @DEBUG_VALUE: foobar_func_block_invoke_0:mydata <- [SP+{{[0-9]+}}]
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-ios"
@@ -93,37 +93,38 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!162}
 
-!0 = metadata !{i32 786449, i32 16, metadata !40, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, null, metadata !148, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!0 = metadata !{i32 786449, metadata !153, i32 16, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, metadata !26, metadata !148, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786436, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, null, metadata !3, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 248, size 32, align 32, offset 0] [def] [from ]
 !2 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786472, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
-!5 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!5 = metadata !{i32 786436, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [Mode] [line 79, size 32, align 32, offset 0] [def] [from ]
 !6 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786472, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
-!9 = metadata !{i32 786433, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!9 = metadata !{i32 786436, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 15, size 32, align 32, offset 0] [def] [from ]
 !10 = metadata !{i32 786473, metadata !149} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786472, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
 !13 = metadata !{i32 786472, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
-!14 = metadata !{i32 786433, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!14 = metadata !{i32 786436, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
 !15 = metadata !{i32 786473, metadata !150} ; [ DW_TAG_file_type ]
 !16 = metadata !{metadata !17, metadata !18}
 !17 = metadata !{i32 786472, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
 !18 = metadata !{i32 786472, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
-!19 = metadata !{i32 786433, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!19 = metadata !{i32 786436, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 14, size 32, align 32, offset 0] [def] [from ]
 !20 = metadata !{i32 786473, metadata !151} ; [ DW_TAG_file_type ]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 786472, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
-!23 = metadata !{i32 786478, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 786478, metadata !152, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ] [line 609] [local] [def] [foobar_func_block_invoke_0]
 !24 = metadata !{i32 786473, metadata !152} ; [ DW_TAG_file_type ]
-!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !26 = metadata !{null}
 !27 = metadata !{i32 786689, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64, null} ; [ DW_TAG_arg_variable ]
 !28 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_14] [line 609, size 256, align 32, offset 0] [def] [from ]
 !30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
 !31 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
 !32 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -133,7 +134,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !36 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__FuncPtr", i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
 !37 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__descriptor", i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
 !38 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
-!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 307, size 128, align 32, offset 0] [def] [from ]
 !40 = metadata !{i32 786473, metadata !153} ; [ DW_TAG_file_type ]
 !41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
 !42 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"reserved", i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
@@ -144,7 +145,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !47 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"DestroyFuncPtr", i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
 !48 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
 !49 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
-!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, null, metadata !51, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 0, size 224, align 0, offset 0] [def] [from ]
 !51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
 !52 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
 !53 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__forwarding", i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
@@ -154,27 +155,27 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !57 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__destroy_helper", i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
 !58 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
 !59 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
-!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, null, metadata !62, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [UIMydata] [line 26, size 128, align 32, offset 0] [def] [from ]
 !61 = metadata !{i32 786473, metadata !154} ; [ DW_TAG_file_type ]
 !62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
 !63 = metadata !{i32 786460, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, null, metadata !66, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSO] [line 66, size 32, align 32, offset 0] [def] [from ]
 !65 = metadata !{i32 786473, metadata !155} ; [ DW_TAG_file_type ]
 !66 = metadata !{metadata !67}
 !67 = metadata !{i32 786445, metadata !155, metadata !65, metadata !"isa", i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!68 = metadata !{i32 786454, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
+!68 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"Class", i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
 !69 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
-!70 = metadata !{i32 786451, metadata !40, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!70 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !71 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataRef", i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!72 = metadata !{i32 786454, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
+!72 = metadata !{i32 786454, metadata !152, metadata !0, metadata !"CFTypeRef", i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
 !73 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
 !74 = metadata !{i32 786470, null, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
 !75 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_scale", i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!76 = metadata !{i32 786454, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
+!76 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"Float", i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
 !77 = metadata !{i32 786473, metadata !156} ; [ DW_TAG_file_type ]
 !78 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !79 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataFlags", i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, null, metadata !81, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 30, size 8, align 8, offset 0] [def] [from ]
 !81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
 !82 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"named", i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
 !83 = metadata !{i32 786468, null, metadata !0, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
@@ -185,43 +186,43 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !88 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"isCIMydata", i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
 !89 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"self", i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
 !90 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
-!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, null, metadata !92, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [MyWork] [line 36, size 384, align 32, offset 0] [def] [from ]
 !92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
-!93 = metadata !{i32 786460, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
-!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!93 = metadata !{i32 786460, metadata !152, metadata !91, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
+!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, null, metadata !96, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [twork] [line 43, size 32, align 32, offset 0] [def] [from ]
 !95 = metadata !{i32 786473, metadata !157} ; [ DW_TAG_file_type ]
 !96 = metadata !{metadata !97}
 !97 = metadata !{i32 786460, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
 !98 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_itemID", i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!99 = metadata !{i32 786454, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
+!99 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"uint64_t", i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
 !100 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !101 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_library", i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !102 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
-!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, null, metadata !105, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [MyLibrary2] [line 22, size 32, align 32, offset 0] [def] [from ]
 !104 = metadata !{i32 786473, metadata !158} ; [ DW_TAG_file_type ]
 !105 = metadata !{metadata !106}
 !106 = metadata !{i32 786460, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
 !107 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_bounds", i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!108 = metadata !{i32 786454, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
-!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!108 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"CR", i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
+!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, null, metadata !110, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [CR] [line 29, size 128, align 32, offset 0] [def] [from ]
 !110 = metadata !{metadata !111, metadata !117}
 !111 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"origin", i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
-!112 = metadata !{i32 786454, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
-!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!112 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"CP", i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
+!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, null, metadata !114, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [CP] [line 13, size 64, align 32, offset 0] [def] [from ]
 !114 = metadata !{metadata !115, metadata !116}
 !115 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"x", i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
 !116 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"y", i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
 !117 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"size", i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
-!118 = metadata !{i32 786454, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
-!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!118 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"Size", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
+!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, null, metadata !120, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Size] [line 21, size 64, align 32, offset 0] [def] [from ]
 !120 = metadata !{metadata !121, metadata !122}
 !121 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"width", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
 !122 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"height", i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
 !123 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_data", i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
 !124 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"semi", i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
-!125 = metadata !{i32 786454, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
+!125 = metadata !{i32 786454, metadata !152, metadata !0, metadata !"d_t", i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
 !126 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
-!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [my_struct] [line 49, size 0, align 0, offset 0] [decl] [from ]
 !128 = metadata !{i32 786473, metadata !159} ; [ DW_TAG_file_type ]
 !129 = metadata !{i32 609, i32 144, metadata !23, null}
 !130 = metadata !{i32 786689, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -236,7 +237,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !139 = metadata !{i32 786688, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, null, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
 !140 = metadata !{i32 607, i32 30, metadata !23, null}
 !141 = metadata !{i32 610, i32 17, metadata !142, null}
-!142 = metadata !{i32 786443, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
+!142 = metadata !{i32 786443, metadata !152, metadata !23, i32 609, i32 200, i32 94} ; [ DW_TAG_lexical_block ]
 !143 = metadata !{i32 611, i32 17, metadata !142, null}
 !144 = metadata !{i32 612, i32 17, metadata !142, null}
 !145 = metadata !{i32 613, i32 17, metadata !142, null}
@@ -256,3 +257,4 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !159 = metadata !{metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm"}
 !160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"}
 !161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"}
+!162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 38945ac2ea7b..8505f5365567 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -5,8 +5,8 @@ target triple = "thumbv7-apple-macosx10.6.7"
 ;CHECK: 	vadd.f32	q4, q8, q8
 ;CHECK-NEXT: LBB0_1
 
-;CHECK:@DEBUG_VALUE: x <- Q4+0
-;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
+;CHECK:@DEBUG_VALUE: x <- Q4{{$}}
+;CHECK-NEXT:@DEBUG_VALUE: y <- Q4{{$}}
 
 
 @.str = external constant [13 x i8]
@@ -38,23 +38,25 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
+!llvm.module.flags = !{!56}
+
+!0 = metadata !{i32 786478, metadata !54, null, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!6 = metadata !{i32 786433, metadata !54, metadata !2, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
 !7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !54, null, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 0] [main]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786478, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 786478, metadata !55, null, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [scope 0] [printFV]
 !15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null}
 !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -94,3 +96,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !53 = metadata !{metadata !30}
 !54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
 !55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
+!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index e4040fa02caa..30a3e2dcdc2c 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -57,23 +57,24 @@ entry:
 declare i32 @puts(i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!48}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !46, i32 1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, metadata !47, metadata !47, metadata !42, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
-!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786468, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786468, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"double", i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !5, metadata !5, metadata !13}
-!13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -83,14 +84,14 @@ declare i32 @puts(i8* nocapture) nounwind
 !22 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !23 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 786688, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 18, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !46, metadata !10, i32 18, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 4, i32 0, metadata !9, null}
 !27 = metadata !{i32 6, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !1, metadata !9, i32 5, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !46, metadata !9, i32 5, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 0, metadata !28, null}
 !30 = metadata !{i32 11, i32 0, metadata !0, null}
 !31 = metadata !{i32 13, i32 0, metadata !32, null}
-!32 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!32 = metadata !{i32 786443, metadata !46, metadata !0, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !33 = metadata !{i32 14, i32 0, metadata !32, null}
 !34 = metadata !{i32 17, i32 0, metadata !10, null}
 !35 = metadata !{i32 19, i32 0, metadata !25, null}
@@ -105,3 +106,5 @@ declare i32 @puts(i8* nocapture) nounwind
 !44 = metadata !{metadata !19, metadata !20, metadata !21}
 !45 = metadata !{metadata !22, metadata !23, metadata !24}
 !46 = metadata !{metadata !"a.c", metadata !"/tmp/"}
+!47 = metadata !{i32 0}
+!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 1de6ffaeec7d..ee515fd55c81 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -36,24 +36,25 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!56}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [test0001]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!6 = metadata !{i32 786433, metadata !2, null, metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
 !7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ] [line 59] [def] [main]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786478, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 786478, metadata !55, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [printFV]
 !15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null}
 !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
@@ -62,7 +63,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
 !23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !54, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
 !27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
 !28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -70,21 +71,21 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
 !31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
 !32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, null} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
 !35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
 !36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
 !37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
 !38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !54, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 786443, metadata !54, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 786443, metadata !54, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 786443, metadata !54, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
 !44 = metadata !{i32 75, i32 5, metadata !42, null}
 !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
+!46 = metadata !{i32 786443, metadata !55, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 786443, metadata !55, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
 !48 = metadata !{i32 95, i32 3, metadata !25, null}
 !49 = metadata !{i32 99, i32 3, metadata !25, null}
 !50 = metadata !{metadata !0, metadata !10, metadata !14}
@@ -93,3 +94,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !53 = metadata !{metadata !30}
 !54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
 !55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
+!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 186894232eaf..e92d9776db8c 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -62,42 +62,43 @@ declare i32 @puts(i8* nocapture) nounwind optsize
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!53}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [inlineprinter]
 !1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !51, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !52, metadata !52, metadata !47, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !51, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"printer", metadata !"printer", metadata !"", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [printer]
+!7 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
 !8 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 786689, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
 !16 = metadata !{i32 786689, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786689, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 786689, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!19 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !22 = metadata !{i32 786688, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0, null} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 786443, metadata !1, metadata !7, i32 18, i32 1, i32 2} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !51, metadata !7, i32 18, i32 1, i32 2} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 4, i32 22, metadata !0, null}
 !25 = metadata !{i32 4, i32 33, metadata !0, null}
 !26 = metadata !{i32 4, i32 52, metadata !0, null}
 !27 = metadata !{i32 6, i32 3, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !51, metadata !0, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 3, metadata !28, null}
 !30 = metadata !{i32 11, i32 42, metadata !6, null}
 !31 = metadata !{i32 11, i32 53, metadata !6, null}
 !32 = metadata !{i32 11, i32 72, metadata !6, null}
 !33 = metadata !{i32 13, i32 3, metadata !34, null}
-!34 = metadata !{i32 786443, metadata !1, metadata !6, i32 12, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 786443, metadata !51, metadata !6, i32 12, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
 !35 = metadata !{i32 14, i32 3, metadata !34, null}
 !36 = metadata !{i32 17, i32 15, metadata !7, null}
 !37 = metadata !{i32 17, i32 28, metadata !7, null}
@@ -115,3 +116,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !49 = metadata !{metadata !14, metadata !15, metadata !16}
 !50 = metadata !{metadata !17, metadata !18, metadata !22}
 !51 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
+!52 = metadata !{i32 0}
+!53 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ba83f797e2ce..854fcabbae87 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -40,18 +40,19 @@ declare float @_Z2f3f(float) optsize
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !18, i32 4, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, metadata !16, null,  null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786688, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 786443, metadata !2, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 786468, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786443, metadata !18, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 786688, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 786443, metadata !2, metadata !10, i32 7, i32 25, i32 2} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786443, metadata !18, metadata !10, i32 7, i32 25, i32 2} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !18, metadata !6, i32 7, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 6, i32 18, metadata !6, null}
 !12 = metadata !{i32 7, i32 3, metadata !6, null}
 !13 = metadata !{i32 8, i32 20, metadata !9, null}
@@ -60,3 +61,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !16 = metadata !{metadata !1}
 !17 = metadata !{metadata !5, metadata !8}
 !18 = metadata !{metadata !"k.cc", metadata !"/private/tmp"}
+!19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 82cfca182b80..a339c816c578 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,13 +1,14 @@
 ; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-ARM
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift     | FileCheck %s -check-prefix=CHECK-SWIFT
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift     | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | FileCheck %s -check-prefix=CHECK-HWDIV
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
 ; CHECK-ARM: f1
 ; CHECK-ARM: __divsi3
 
-; CHECK-SWIFT: f1
-; CHECK-SWIFT: sdiv
+; CHECK-HWDIV: f1
+; CHECK-HWDIV: sdiv
         %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
@@ -17,8 +18,8 @@ entry:
 ; CHECK-ARM: f2
 ; CHECK-ARM: __udivsi3
 
-; CHECK-SWIFT: f2
-; CHECK-SWIFT: udiv
+; CHECK-HWDIV: f2
+; CHECK-HWDIV: udiv
         %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
@@ -28,9 +29,9 @@ entry:
 ; CHECK-ARM: f3
 ; CHECK-ARM: __modsi3
 
-; CHECK-SWIFT: f3
-; CHECK-SWIFT: sdiv
-; CHECK-SWIFT: mls
+; CHECK-HWDIV: f3
+; CHECK-HWDIV: sdiv
+; CHECK-HWDIV: mls
         %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
@@ -40,9 +41,9 @@ entry:
 ; CHECK-ARM: f4
 ; CHECK-ARM: __umodsi3
 
-; CHECK-SWIFT: f4
-; CHECK-SWIFT: udiv
-; CHECK-SWIFT: mls
+; CHECK-HWDIV: f4
+; CHECK-HWDIV: udiv
+; CHECK-HWDIV: mls
         %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll
new file mode 100644
index 000000000000..404cae0da2b2
--- /dev/null
+++ b/test/CodeGen/ARM/divmod-eabi.ll
@@ -0,0 +1,202 @@
+; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI
+; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU
+; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN
+
+define signext i16 @f16(i16 signext %a, i16 signext %b) {
+; EABI-LABEL: f16:
+; GNU-LABEL: f16:
+; DARWIN-LABEL: f16:
+entry:
+  %conv = sext i16 %a to i32
+  %conv1 = sext i16 %b to i32
+  %div = sdiv i32 %conv, %conv1
+  %rem = srem i32 %conv, %conv1
+; EABI: __aeabi_idivmod
+; EABI: mov [[div:r[0-9]+]], r0
+; EABI: mov [[rem:r[0-9]+]], r1
+; GNU: __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __modsi3
+; GNU: add [[sum]]{{.*}}r0
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+; DARWIN: add [[sum]]{{.*}}r0
+  %rem8 = srem i32 %conv1, %conv
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+  %add13 = add nsw i32 %add, %rem8
+  %conv14 = trunc i32 %add13 to i16
+; EABI: add r0{{.*}}r1
+; EABI: sxth r0, r0
+; GNU: add r0{{.*}}[[sum]]
+; GNU: sxth r0, r0
+; DARWIN: add r0{{.*}}[[sum]]
+; DARWIN: sxth r0, r0
+  ret i16 %conv14
+}
+
+define i32 @f32(i32 %a, i32 %b) {
+; EABI-LABEL: f32:
+; GNU-LABEL: f32:
+; DARWIN-LABEL: f32:
+entry:
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; EABI: mov [[div:r[0-9]+]], r0
+; EABI: mov [[rem:r[0-9]+]], r1
+; GNU: __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __modsi3
+; GNU: add [[sum]]{{.*}}r0
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+; DARWIN: add [[sum]]{{.*}}r0
+  %rem1 = srem i32 %b, %a
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+  %add2 = add nsw i32 %add, %rem1
+; EABI: add r0{{.*}}r1
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add2
+}
+
+define i32 @uf(i32 %a, i32 %b) {
+; EABI-LABEL: uf:
+; GNU-LABEL: uf:
+; DARWIN-LABEL: uf:
+entry:
+  %div = udiv i32 %a, %b
+  %rem = urem i32 %a, %b
+; EABI: __aeabi_uidivmod
+; GNU: __aeabi_uidiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __umodsi3
+; GNU: add [[sum]]{{.*}}r0
+; DARWIN: ___udivsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __umodsi3
+; DARWIN: add [[sum]]{{.*}}r0
+  %rem1 = urem i32 %b, %a
+; EABI: __aeabi_uidivmod
+; GNU: __umodsi3
+; DARWIN: __umodsi3
+  %add = add nuw i32 %rem, %div
+  %add2 = add nuw i32 %add, %rem1
+; EABI: add r0{{.*}}r1
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add2
+}
+
+; FIXME: AEABI is not lowering long u/srem into u/ldivmod
+define i64 @longf(i64 %a, i64 %b) {
+; EABI-LABEL: longf:
+; GNU-LABEL: longf:
+; DARWIN-LABEL: longf:
+entry:
+  %div = sdiv i64 %a, %b
+  %rem = srem i64 %a, %b
+; EABI: __aeabi_ldivmod
+; GNU: __aeabi_ldivmod
+; GNU: mov [[div1:r[0-9]+]], r0
+; GNU: mov [[div2:r[0-9]+]], r1
+; DARWIN: ___divdi3
+; DARWIN: mov [[div1:r[0-9]+]], r0
+; DARWIN: mov [[div2:r[0-9]+]], r1
+; DARWIN: __moddi3
+  %add = add nsw i64 %rem, %div
+; GNU: adds r0{{.*}}[[div1]]
+; GNU: adc r1{{.*}}[[div2]]
+; DARWIN: adds r0{{.*}}[[div1]]
+; DARWIN: adc r1{{.*}}[[div2]]
+  ret i64 %add
+}
+
+define i32 @g1(i32 %a, i32 %b) {
+; EABI-LABEL: g1:
+; GNU-LABEL: g1:
+; DARWIN-LABEL: g1:
+entry:
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; GNU: __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; GNU: __modsi3
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+; EABI:	add	r0{{.*}}r1
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add
+}
+
+; On both Darwin and Gnu, this is just a call to __modsi3
+define i32 @g2(i32 %a, i32 %b) {
+; EABI-LABEL: g2:
+; GNU-LABEL: g2:
+; DARWIN-LABEL: g2:
+entry:
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  ret i32 %rem
+; EABI:	mov	r0, r1
+}
+
+define i32 @g3(i32 %a, i32 %b) {
+; EABI-LABEL: g3:
+; GNU-LABEL: g3:
+; DARWIN-LABEL: g3:
+entry:
+  %rem = srem i32 %a, %b
+; EABI: __aeabi_idivmod
+; EABI: mov [[mod:r[0-9]+]], r1
+; GNU: __modsi3
+; GNU: mov [[sum:r[0-9]+]], r0
+; DARWIN: __modsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+  %rem1 = srem i32 %b, %rem
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem1, %rem
+; EABI: add r0, r1, [[mod]]
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add
+}
+
+define i32 @g4(i32 %a, i32 %b) {
+; EABI-LABEL: g4:
+; GNU-LABEL: g4:
+; DARWIN-LABEL: g4:
+entry:
+  %div = sdiv i32 %a, %b
+; EABI: __aeabi_idivmod
+; EABI: mov [[div:r[0-9]+]], r0
+; GNU __aeabi_idiv
+; GNU: mov [[sum:r[0-9]+]], r0
+; DARWIN: ___divsi3
+; DARWIN: mov [[sum:r[0-9]+]], r0
+  %rem = srem i32 %b, %div
+; EABI: __aeabi_idivmod
+; GNU: __modsi3
+; DARWIN: __modsi3
+  %add = add nsw i32 %rem, %div
+; EABI: add r0, r1, [[div]]
+; GNU: add r0{{.*}}[[sum]]
+; DARWIN: add r0{{.*}}[[sum]]
+  ret i32 %add
+}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 577f8aa7d39b..7be0c796bd21 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -5,11 +5,11 @@
 
 define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
 entry:
-; A8: foo:
+; A8-LABEL: foo:
 ; A8: bl ___divmodsi4
 ; A8-NOT: bl ___divmodsi4
 
-; SWIFT: foo:
+; SWIFT-LABEL: foo:
 ; SWIFT: sdiv
 ; SWIFT: mls
 ; SWIFT-NOT: bl __divmodsi4
@@ -23,11 +23,11 @@ entry:
 
 define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
 entry:
-; A8: bar:
+; A8-LABEL: bar:
 ; A8: bl ___udivmodsi4
 ; A8-NOT: bl ___udivmodsi4
 
-; SWIFT: bar:
+; SWIFT-LABEL: bar:
 ; SWIFT: udiv
 ; SWIFT: mls
 ; SWIFT-NOT: bl __udivmodsi4
@@ -45,8 +45,8 @@ entry:
 
 define void @do_indent(i32 %cols) nounwind {
 entry:
-; A8: do_indent:
-; SWIFT: do_indent:
+; A8-LABEL: do_indent:
+; SWIFT-LABEL: do_indent:
   %0 = load i32* @flags, align 4
   %1 = and i32 %0, 67108864
   %2 = icmp eq i32 %1, 0
@@ -60,7 +60,7 @@ bb:
   %3 = load i32* @tabsize, align 4
   %4 = srem i32 %cols, %3
   %5 = sdiv i32 %cols, %3
-  %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false)
+  %6 = tail call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false)
   %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind
   br label %bb1
 
@@ -71,17 +71,17 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readnone
 declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 
 ; rdar://11714607
 define i32 @howmany(i32 %x, i32 %y) nounwind {
 entry:
-; A8: howmany:
+; A8-LABEL: howmany:
 ; A8: bl ___udivmodsi4
 ; A8-NOT: ___udivsi3
 
-; SWIFT: howmany:
+; SWIFT-LABEL: howmany:
 ; SWIFT: udiv
 ; SWIFT: mls
 ; SWIFT-NOT: bl __udivmodsi4
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
index b5586cc99fc1..d6528db741cd 100644
--- a/test/CodeGen/ARM/domain-conv-vmovs.ll
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
 
 define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane0to0:
+; CHECK-LABEL: test_vmovs_via_vext_lane0to0:
   %vec = insertelement <2 x float> %in, float %arg, i32 0
   %res = fadd <2 x float> %vec, %vec
 
@@ -13,7 +13,7 @@ define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
 }
 
 define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane0to1:
+; CHECK-LABEL: test_vmovs_via_vext_lane0to1:
   %vec = insertelement <2 x float> %in, float %arg, i32 1
   %res = fadd <2 x float> %vec, %vec
 
@@ -25,7 +25,7 @@ define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
 }
 
 define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane1to0:
+; CHECK-LABEL: test_vmovs_via_vext_lane1to0:
   %vec = insertelement <2 x float> %in, float %arg, i32 0
   %res = fadd <2 x float> %vec, %vec
 
@@ -37,7 +37,7 @@ define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float>
 }
 
 define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
-; CHECK: test_vmovs_via_vext_lane1to1:
+; CHECK-LABEL: test_vmovs_via_vext_lane1to1:
   %vec = insertelement <2 x float> %in, float %arg, i32 1
   %res = fadd <2 x float> %vec, %vec
 
@@ -50,7 +50,7 @@ define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float>
 
 
 define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
-; CHECK: test_vmovs_via_vdup:
+; CHECK-LABEL: test_vmovs_via_vdup:
 
   ; Do an operation (which will end up NEON because of +neonfp) to convince the
   ; execution-domain pass that NEON is a good thing to use.
@@ -68,7 +68,7 @@ declare void @bar()
 
 ; This is a comp
 define float @test_ineligible(float, float %in) {
-; CHECK: test_ineligible:
+; CHECK-LABEL: test_ineligible:
 
   %sqrt = call float @llvm.sqrt.f32(float %in)
   %val = fadd float %sqrt, %sqrt
@@ -85,7 +85,7 @@ define float @test_ineligible(float, float %in) {
 }
 
 define i32 @test_vmovs_no_sreg(i32 %in) {
-; CHECK: test_vmovs_no_sreg:
+; CHECK-LABEL: test_vmovs_no_sreg:
 
   ; Check that the movement to and from GPRs takes place in the NEON domain.
 ; CHECK: vmov.32 d
diff --git a/test/CodeGen/ARM/eh-dispcont.ll b/test/CodeGen/ARM/eh-dispcont.ll
index 935965bbdf8b..57ab15feca5e 100644
--- a/test/CodeGen/ARM/eh-dispcont.ll
+++ b/test/CodeGen/ARM/eh-dispcont.ll
@@ -65,10 +65,10 @@ attributes #2 = { noreturn }
 
 ; THUMB1-PIC: cxa_throw
 ; THUMB1-PIC: trap
-; THUMB1-PIC: adr [[REG0:r[0-9]+]], [[LJTI:.*]]
-; THUMB1-PIC: adds [[REG1:r[0-9]+]], [[REG1]], [[REG0]]
-; THUMB1-PIC: ldr [[REG1]]
-; THUMB1-PIC: adds [[REG0]], [[REG1]], [[REG0]]
+; THUMB1-PIC: adr [[REG1:r[0-9]+]], [[LJTI:.*]]
+; THUMB1-PIC: adds [[REG0:r[0-9]+]], [[REG0]], [[REG1]]
+; THUMB1-PIC: ldr [[REG0]]
+; THUMB1-PIC: adds [[REG0]], [[REG0]], [[REG1]]
 ; THUMB1-PIC: mov pc, [[REG0]]
 ; THUMB1-PIC: [[LJTI]]
 ; THUMB1-PIC: .data_region jt32
diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll
index 4c92a2975d39..cb5291b20e62 100644
--- a/test/CodeGen/ARM/ehabi-filters.ll
+++ b/test/CodeGen/ARM/ehabi-filters.ll
@@ -15,7 +15,7 @@ declare void @__cxa_throw(i8*, i8*, i8*)
 declare void @__cxa_call_unexpected(i8*)
 
 define i32 @main() {
-; CHECK: main:
+; CHECK-LABEL: main:
 entry:
   %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
   %0 = bitcast i8* %exception.i to i32*
diff --git a/test/CodeGen/ARM/ehabi-mc-cantunwind.ll b/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
deleted file mode 100644
index 698d76e56580..000000000000
--- a/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s
-
-define void @test() nounwind {
-entry:
-  ret void
-}
-
-; CHECK: section .text
-; CHECK: section .ARM.exidx
-; CHECK-NEXT: 0000 00000000 01000000
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
deleted file mode 100644
index 11f3e6db0fe5..000000000000
--- a/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -disable-fp-elim -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -disable-fp-elim -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -r - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -r - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
-
-define void @_Z4testv() {
-entry:
-  tail call void @_Z15throw_exceptionv()
-  ret void
-}
-
-declare void @_Z15throw_exceptionv()
-
-; CHECK-NOT: section .ARM.extab
-; CHECK: section .text
-; CHECK-NOT: section .ARM.extab
-; CHECK: section .ARM.exidx
-; CHECK-NEXT: 0000 00000000 80849b80
-; CHECK-NOT: section .ARM.extab
-
-; CHECK-FP-ELIM-NOT: section .ARM.extab
-; CHECK-FP-ELIM: section .text
-; CHECK-FP-ELIM-NOT: section .ARM.extab
-; CHECK-FP-ELIM: section .ARM.exidx
-; CHECK-FP-ELIM-NEXT: 0000 00000000 b0808480
-; CHECK-FP-ELIM-NOT: section .ARM.extab
-
-; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
-; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
-; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
deleted file mode 100644
index 79dba084c044..000000000000
--- a/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -disable-fp-elim -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -disable-fp-elim -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -r - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -r - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM-RELOC
-
-define i32 @_Z3addiiiiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
-entry:
-  %add = add nsw i32 %b, %a
-  %add1 = add nsw i32 %add, %c
-  %add2 = add nsw i32 %add1, %d
-  tail call void @_Z15throw_exceptioni(i32 %add2)
-  %add3 = add nsw i32 %f, %e
-  %add4 = add nsw i32 %add3, %g
-  %add5 = add nsw i32 %add4, %h
-  tail call void @_Z15throw_exceptioni(i32 %add5)
-  %add6 = add nsw i32 %add5, %add2
-  ret i32 %add6
-}
-
-declare void @_Z15throw_exceptioni(i32)
-
-; CHECK-NOT: section .ARM.extab
-; CHECK: section .text
-; CHECK: section .ARM.extab
-; CHECK-NEXT: 0000 419b0181 b0b08384
-; CHECK: section .ARM.exidx
-; CHECK-NEXT: 0000 00000000 00000000
-; CHECK-NOT: section .ARM.extab
-
-; CHECK-FP-ELIM-NOT: section .ARM.extab
-; CHECK-FP-ELIM: section .text
-; CHECK-FP-ELIM-NOT: section .ARM.extab
-; CHECK-FP-ELIM: section .ARM.exidx
-; CHECK-FP-ELIM-NEXT: 0000 00000000 b0838480
-; CHECK-FP-ELIM-NOT: section .ARM.extab
-
-; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
-; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
-; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr1
-
-; CHECK-FP-ELIM-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
-; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_PREL31 .text
-; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll
deleted file mode 100644
index 616aa1ba46e7..000000000000
--- a/test/CodeGen/ARM/ehabi-mc-section-group.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; Test section group of the function with linkonce_odr
-
-; The instantiation of C++ function template will come with linkonce_odr,
-; which indicates that the linker can remove the duplicated instantiation.
-; However, to make this feature work, we have to group the section properly.
-; .text, .ARM.extab, and .ARM.exidx should be grouped together.
-
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-readobj -s -sd \
-; RUN:   | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
-target triple = "armv4t--linux-gnueabi"
-
-define void @_Z11instantiatev() {
-entry:
-  tail call void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 1, i32 2, i32 3, i32 4, i32 5, double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01)
-  ret void
-}
-
-define linkonce_odr void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
-entry:
-  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
-  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %lpad
-  tail call void @__cxa_end_catch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %entry, %invoke.cont2
-  ret void
-
-lpad1:                                            ; preds = %lpad
-  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          cleanup
-  invoke void @__cxa_end_catch()
-          to label %eh.resume unwind label %terminate.lpad
-
-eh.resume:                                        ; preds = %lpad1
-  resume { i8*, i32 } %3
-
-terminate.lpad:                                   ; preds = %lpad1
-  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  tail call void @_ZSt9terminatev() noreturn nounwind
-  unreachable
-}
-
-declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
-
-declare i32 @__gxx_personality_v0(...)
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @_Z5printddddd(double, double, double, double, double)
-
-declare void @__cxa_end_catch()
-
-declare void @_ZSt9terminatev()
-
-; CHECK:        Section {
-; CHECK:          Index: 1
-; CHECK-NEXT:     Name: .group (47)
-; CHECK:          SectionData (
-; CHECK-NEXT:       0000: 01000000 09000000 0B000000 0D000000
-; CHECK-NEXT:     )
-
-; CHECK:        Section {
-; CHECK:          Index: 9
-; CHECK-NEXT:     Name: .text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (214)
-
-; CHECK:        Section {
-; CHECK:          Index: 11
-; CHECK-NEXT:     Name: .ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (204)
-
-; CHECK:        Section {
-; CHECK:          Index: 13
-; CHECK-NEXT:     Name: .ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (90)
diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll
deleted file mode 100644
index 4e6e46829148..000000000000
--- a/test/CodeGen/ARM/ehabi-mc-section.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -disable-fp-elim -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
-
-define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" {
-entry:
-  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
-  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %lpad
-  tail call void @__cxa_end_catch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %entry, %invoke.cont2
-  ret void
-
-lpad1:                                            ; preds = %lpad
-  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          cleanup
-  invoke void @__cxa_end_catch()
-          to label %eh.resume unwind label %terminate.lpad
-
-eh.resume:                                        ; preds = %lpad1
-  resume { i8*, i32 } %3
-
-terminate.lpad:                                   ; preds = %lpad1
-  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  tail call void @_ZSt9terminatev() noreturn nounwind
-  unreachable
-}
-
-declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
-
-declare i32 @__gxx_personality_v0(...)
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @_Z5printddddd(double, double, double, double, double)
-
-declare void @__cxa_end_catch()
-
-declare void @_ZSt9terminatev()
-
-; CHECK: section .test_section
-; CHECK: section .ARM.extab.test_section
-; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
-; CHECK: section .ARM.exidx.test_section
-; CHECK-NEXT: 0000 00000000 00000000
-
-; CHECK-FP-ELIM: section .test_section
-; CHECK-FP-ELIM: section .ARM.extab.test_section
-; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
-; CHECK-FP-ELIM: section .ARM.exidx.test_section
-; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
deleted file mode 100644
index ac0a0fc9309a..000000000000
--- a/test/CodeGen/ARM/ehabi-mc-sh_link.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; Test the sh_link in Elf32_Shdr.
-
-; The .ARM.exidx section should be linked with corresponding text section.
-; The sh_link in Elf32_Shdr should be filled with the section index of
-; the text section.
-
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-readobj -s \
-; RUN:   | FileCheck %s
-
-define void @test1() nounwind {
-entry:
-  ret void
-}
-
-define void @test2() nounwind section ".test_section" {
-entry:
-  ret void
-}
-
-; CHECK:      Sections [
-; CHECK:        Section {
-; CHECK:          Index: 1
-; CHECK-NEXT:     Name: .text (16)
-
-; CHECK:        Section {
-; CHECK:          Name: .ARM.exidx (5)
-; CHECK-NEXT:     Type: SHT_ARM_EXIDX
-; CHECK-NEXT:     Flags [ (0x82)
-; CHECK-NEXT:       SHF_ALLOC
-; CHECK-NEXT:       SHF_LINK_ORDER
-; CHECK-NEXT:     ]
-; CHECK-NEXT:     Address: 0x0
-; CHECK-NEXT:     Offset: 0x5C
-; CHECK-NEXT:     Size: 8
-; CHECK-NEXT:     Link: 1
-; CHECK-NEXT:     Info: 0
-; CHECK-NEXT:     AddressAlignment: 4
-
-; CHECK:        Section {
-; CHECK:          Index: 7
-; CHECK-NEXT:     Name: .test_section (57)
-
-; CHECK:        Section {
-; CHECK:          Name: .ARM.exidx.test_section (47)
-; CHECK-NEXT:     Type: SHT_ARM_EXIDX
-; CHECK-NEXT:     Flags [ (0x82)
-; CHECK-NEXT:       SHF_ALLOC
-; CHECK-NEXT:       SHF_LINK_ORDER
-; CHECK-NEXT:     ]
-; CHECK-NEXT:     Address: 0x0
-; CHECK-NEXT:     Offset: 0x68
-; CHECK-NEXT:     Size: 8
-; CHECK-NEXT:     Link: 7
-; CHECK-NEXT:     Info: 0
-; CHECK-NEXT:     AddressAlignment: 4
diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll
deleted file mode 100644
index 83b8425af7c4..000000000000
--- a/test/CodeGen/ARM/ehabi-mc.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -disable-fp-elim -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK
-
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
-; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
-; RUN:     -filetype=obj -o - %s \
-; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
-
-define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
-entry:
-  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
-  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %lpad
-  tail call void @__cxa_end_catch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %entry, %invoke.cont2
-  ret void
-
-lpad1:                                            ; preds = %lpad
-  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          cleanup
-  invoke void @__cxa_end_catch()
-          to label %eh.resume unwind label %terminate.lpad
-
-eh.resume:                                        ; preds = %lpad1
-  resume { i8*, i32 } %3
-
-terminate.lpad:                                   ; preds = %lpad1
-  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* null
-  tail call void @_ZSt9terminatev() noreturn nounwind
-  unreachable
-}
-
-declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
-
-declare i32 @__gxx_personality_v0(...)
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @_Z5printddddd(double, double, double, double, double)
-
-declare void @__cxa_end_catch()
-
-declare void @_ZSt9terminatev()
-
-; CHECK: section .text
-; CHECK: section .ARM.extab
-; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
-; CHECK: section .ARM.exidx
-; CHECK-NEXT: 0000 00000000 00000000
-
-; CHECK-FP-ELIM: section .text
-; CHECK-FP-ELIM: section .ARM.extab
-; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
-; CHECK-FP-ELIM: section .ARM.exidx
-; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi.ll b/test/CodeGen/ARM/ehabi.ll
new file mode 100644
index 000000000000..66446528c31a
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi.ll
@@ -0,0 +1,298 @@
+; ARM EHABI integrated test
+
+; This test case checks whether the ARM unwind directives are properly
+; generated or not.
+
+; The purpose of the test:
+; (1) .fnstart and .fnend directives should wrap the function.
+; (2) .setfp directive should be available if frame pointer is not eliminated.
+; (3) .save directive should come with push instruction.
+; (4) .vsave directive should come with vpush instruction.
+; (5) .pad directive should come with stack pointer adjustment.
+; (6) .cantunwind directive should be available if the function is marked with
+;     nounwind function attribute.
+
+; We have to check several cases:
+; (1) arm with -disable-fp-elim
+; (2) arm without -disable-fp-elim
+; (3) armv7 with -disable-fp-elim
+; (4) armv7 without -disable-fp-elim
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
+
+;-------------------------------------------------------------------------------
+; Test 1
+;-------------------------------------------------------------------------------
+; This is the LLVM assembly generated from following C++ code:
+;
+;   extern void print(int, int, int, int, int);
+;   extern void print(double, double, double, double, double);
+;
+;   void test(int a, int b, int c, int d, int e,
+;             double m, double n, double p, double q, double r) {
+;     try {
+;       print(a, b, c, d, e);
+;     } catch (...) {
+;       print(m, n, p, q, r);
+;     }
+;   }
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+define void @_Z4testiiiiiddddd(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
+                               double %m, double %n, double %p,
+                               double %q, double %r) {
+entry:
+  invoke void @_Z5printiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e)
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1)
+  invoke void @_Z5printddddd(double %m, double %n, double %p,
+                             double %q, double %r)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret void
+
+lpad1:
+  %3 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:
+  resume { i8*, i32 } %3
+
+terminate.lpad:
+  %4 = landingpad { i8*, i32 }
+          personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %5 = extractvalue { i8*, i32 } %4, 0
+  tail call void @__clang_call_terminate(i8* %5)
+  unreachable
+}
+
+declare void @__clang_call_terminate(i8*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK-FP-LABEL: _Z4testiiiiiddddd:
+; CHECK-FP:   .fnstart
+; CHECK-FP:   .save  {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP:   push   {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP:   .setfp r11, sp, #28
+; CHECK-FP:   add    r11, sp, #28
+; CHECK-FP:   .pad   #28
+; CHECK-FP:   sub    sp, sp, #28
+; CHECK-FP:   .personality __gxx_personality_v0
+; CHECK-FP:   .handlerdata
+; CHECK-FP:   .fnend
+
+; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; CHECK-FP-ELIM:   .fnstart
+; CHECK-FP-ELIM:   .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP-ELIM:   push  {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP-ELIM:   .pad  #28
+; CHECK-FP-ELIM:   sub   sp, sp, #28
+; CHECK-FP-ELIM:   .personality __gxx_personality_v0
+; CHECK-FP-ELIM:   .handlerdata
+; CHECK-FP-ELIM:   .fnend
+
+; CHECK-V7-FP-LABEL: _Z4testiiiiiddddd:
+; CHECK-V7-FP:   .fnstart
+; CHECK-V7-FP:   .save  {r4, r11, lr}
+; CHECK-V7-FP:   push   {r4, r11, lr}
+; CHECK-V7-FP:   .setfp r11, sp, #4
+; CHECK-V7-FP:   add    r11, sp, #4
+; CHECK-V7-FP:   .vsave {d8, d9, d10, d11, d12}
+; CHECK-V7-FP:   vpush  {d8, d9, d10, d11, d12}
+; CHECK-V7-FP:   .pad   #28
+; CHECK-V7-FP:   sub    sp, sp, #28
+; CHECK-V7-FP:   .personality __gxx_personality_v0
+; CHECK-V7-FP:   .handlerdata
+; CHECK-V7-FP:   .fnend
+
+; CHECK-V7-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; CHECK-V7-FP-ELIM:   .fnstart
+; CHECK-V7-FP-ELIM:   .save  {r4, lr}
+; CHECK-V7-FP-ELIM:   push   {r4, lr}
+; CHECK-V7-FP-ELIM:   .vsave {d8, d9, d10, d11, d12}
+; CHECK-V7-FP-ELIM:   vpush  {d8, d9, d10, d11, d12}
+; CHECK-V7-FP-ELIM:   .pad   #24
+; CHECK-V7-FP-ELIM:   sub    sp, sp, #24
+; CHECK-V7-FP-ELIM:   .personality __gxx_personality_v0
+; CHECK-V7-FP-ELIM:   .handlerdata
+; CHECK-V7-FP-ELIM:   .fnend
+
+
+;-------------------------------------------------------------------------------
+; Test 2
+;-------------------------------------------------------------------------------
+
+declare void @throw_exception_2()
+
+define void @test2() {
+entry:
+  tail call void @throw_exception_2()
+  ret void
+}
+
+; CHECK-FP-LABEL: test2:
+; CHECK-FP:   .fnstart
+; CHECK-FP:   .save  {r11, lr}
+; CHECK-FP:   push   {r11, lr}
+; CHECK-FP:   .setfp r11, sp
+; CHECK-FP:   mov    r11, sp
+; CHECK-FP:   pop    {r11, lr}
+; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   .fnend
+
+; CHECK-FP-ELIM-LABEL: test2:
+; CHECK-FP-ELIM:   .fnstart
+; CHECK-FP-ELIM:   .save {r11, lr}
+; CHECK-FP-ELIM:   push  {r11, lr}
+; CHECK-FP-ELIM:   pop   {r11, lr}
+; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   .fnend
+
+; CHECK-V7-FP-LABEL: test2:
+; CHECK-V7-FP:   .fnstart
+; CHECK-V7-FP:   .save  {r11, lr}
+; CHECK-V7-FP:   push   {r11, lr}
+; CHECK-V7-FP:   .setfp r11, sp
+; CHECK-V7-FP:   mov    r11, sp
+; CHECK-V7-FP:   pop    {r11, pc}
+; CHECK-V7-FP:   .fnend
+
+; CHECK-V7-FP-ELIM-LABEL: test2:
+; CHECK-V7-FP-ELIM:   .fnstart
+; CHECK-V7-FP-ELIM:   .save {r11, lr}
+; CHECK-V7-FP-ELIM:   push  {r11, lr}
+; CHECK-V7-FP-ELIM:   pop   {r11, pc}
+; CHECK-V7-FP-ELIM:   .fnend
+
+
+;-------------------------------------------------------------------------------
+; Test 3
+;-------------------------------------------------------------------------------
+
+declare void @throw_exception_3(i32)
+
+define i32 @test3(i32 %a, i32 %b, i32 %c, i32 %d,
+                  i32 %e, i32 %f, i32 %g, i32 %h) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  %add2 = add nsw i32 %add1, %d
+  tail call void @throw_exception_3(i32 %add2)
+  %add3 = add nsw i32 %f, %e
+  %add4 = add nsw i32 %add3, %g
+  %add5 = add nsw i32 %add4, %h
+  tail call void @throw_exception_3(i32 %add5)
+  %add6 = add nsw i32 %add5, %add2
+  ret i32 %add6
+}
+
+; CHECK-FP-LABEL: test3:
+; CHECK-FP:   .fnstart
+; CHECK-FP:   .save  {r4, r5, r11, lr}
+; CHECK-FP:   push   {r4, r5, r11, lr}
+; CHECK-FP:   .setfp r11, sp, #8
+; CHECK-FP:   add    r11, sp, #8
+; CHECK-FP:   pop    {r4, r5, r11, lr}
+; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   .fnend
+
+; CHECK-FP-ELIM-LABEL: test3:
+; CHECK-FP-ELIM:   .fnstart
+; CHECK-FP-ELIM:   .save {r4, r5, r11, lr}
+; CHECK-FP-ELIM:   push  {r4, r5, r11, lr}
+; CHECK-FP-ELIM:   pop   {r4, r5, r11, lr}
+; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   .fnend
+
+; CHECK-V7-FP-LABEL: test3:
+; CHECK-V7-FP:   .fnstart
+; CHECK-V7-FP:   .save  {r4, r5, r11, lr}
+; CHECK-V7-FP:   push   {r4, r5, r11, lr}
+; CHECK-V7-FP:   .setfp r11, sp, #8
+; CHECK-V7-FP:   add    r11, sp, #8
+; CHECK-V7-FP:   pop    {r4, r5, r11, pc}
+; CHECK-V7-FP:   .fnend
+
+; CHECK-V7-FP-ELIM-LABEL: test3:
+; CHECK-V7-FP-ELIM:   .fnstart
+; CHECK-V7-FP-ELIM:   .save {r4, r5, r11, lr}
+; CHECK-V7-FP-ELIM:   push  {r4, r5, r11, lr}
+; CHECK-V7-FP-ELIM:   pop   {r4, r5, r11, pc}
+; CHECK-V7-FP-ELIM:   .fnend
+
+
+;-------------------------------------------------------------------------------
+; Test 4
+;-------------------------------------------------------------------------------
+
+define void @test4() nounwind {
+entry:
+  ret void
+}
+
+; CHECK-FP-LABEL: test4:
+; CHECK-FP:   .fnstart
+; CHECK-FP:   mov pc, lr
+; CHECK-FP:   .cantunwind
+; CHECK-FP:   .fnend
+
+; CHECK-FP-ELIM-LABEL: test4:
+; CHECK-FP-ELIM:   .fnstart
+; CHECK-FP-ELIM:   mov pc, lr
+; CHECK-FP-ELIM:   .cantunwind
+; CHECK-FP-ELIM:   .fnend
+
+; CHECK-V7-FP-LABEL: test4:
+; CHECK-V7-FP:   .fnstart
+; CHECK-V7-FP:   bx lr
+; CHECK-V7-FP:   .cantunwind
+; CHECK-V7-FP:   .fnend
+
+; CHECK-V7-FP-ELIM-LABEL: test4:
+; CHECK-V7-FP-ELIM:   .fnstart
+; CHECK-V7-FP-ELIM:   bx lr
+; CHECK-V7-FP-ELIM:   .cantunwind
+; CHECK-V7-FP-ELIM:   .fnend
diff --git a/test/CodeGen/ARM/emit-big-cst.ll b/test/CodeGen/ARM/emit-big-cst.ll
new file mode 100644
index 000000000000..9a3367dab1a1
--- /dev/null
+++ b/test/CodeGen/ARM/emit-big-cst.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=thumbv7-unknown-unknown < %s | FileCheck %s
+; Check assembly printing of odd constants.
+
+; CHECK: bigCst:
+; CHECK-NEXT: .long 1694510592
+; CHECK-NEXT: .long 2960197
+; CHECK-NEXT: .long 26220
+; CHECK-NEXT: .size bigCst, 12
+
+@bigCst = internal constant i82 483673642326615442599424
+
+define void @accessBig(i64* %storage) {
+  %addr = bitcast i64* %storage to i82*
+  %bigLoadedCst = load volatile i82* @bigCst
+  %tmp = add i82 %bigLoadedCst, 1
+  store i82 %tmp, i82* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/extload-knownzero.ll b/test/CodeGen/ARM/extload-knownzero.ll
index 8fd6b6bd777a..8ccf58c39170 100644
--- a/test/CodeGen/ARM/extload-knownzero.ll
+++ b/test/CodeGen/ARM/extload-knownzero.ll
@@ -3,7 +3,7 @@
 
 define void @foo(i16* %ptr, i32 %a) nounwind {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
   %tmp1 = icmp ult i32 %a, 100
   br i1 %tmp1, label %bb1, label %bb2
 bb1:
diff --git a/test/CodeGen/ARM/fabs-neon.ll b/test/CodeGen/ARM/fabs-neon.ll
index 614117ff7bca..e3094aaf57d0 100644
--- a/test/CodeGen/ARM/fabs-neon.ll
+++ b/test/CodeGen/ARM/fabs-neon.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=armv7-eabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s
 
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK:         vabs.f32        q0, q0
 define <4 x float> @test(<4 x float> %a) {
   %foo = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
@@ -8,7 +8,7 @@ define <4 x float> @test(<4 x float> %a) {
 }
 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK:        vabs.f32        d0, d0
 define <2 x float> @test2(<2 x float> %a) {
   %foo = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index c3e00ce47019..77c21c5be91a 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -13,17 +13,17 @@ entry:
 
 declare float @fabsf(float)
 
-; VFP2: test:
+; VFP2-LABEL: test:
 ; VFP2: 	vabs.f32	s
 
-; NFP1: test:
+; NFP1-LABEL: test:
 ; NFP1: 	vabs.f32	d
-; NFP0: test:
+; NFP0-LABEL: test:
 ; NFP0: 	vabs.f32	s
 
-; CORTEXA8: test:
+; CORTEXA8-LABEL: test:
 ; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
 ; CORTEXA8: 	vabs.f32	{{d[0-9]+}}, [[D1]]
 
-; CORTEXA9: test:
+; CORTEXA9-LABEL: test:
 ; CORTEXA9: 	vabs.f32	s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index c7e2f5d094b8..21219ce18e26 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -11,17 +11,17 @@ entry:
 	ret float %0
 }
 
-; VFP2: test:
+; VFP2-LABEL: test:
 ; VFP2: 	vadd.f32	s
 
-; NFP1: test:
+; NFP1-LABEL: test:
 ; NFP1: 	vadd.f32	d
-; NFP0: test:
+; NFP0-LABEL: test:
 ; NFP0: 	vadd.f32	s
 
-; CORTEXA8: test:
+; CORTEXA8-LABEL: test:
 ; CORTEXA8: 	vadd.f32	s
-; CORTEXA8U: test:
+; CORTEXA8U-LABEL: test:
 ; CORTEXA8U: 	vadd.f32	d
-; CORTEXA9: test:
+; CORTEXA9-LABEL: test:
 ; CORTEXA9: 	vadd.f32	s
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index 60bc6a62f5d3..05a6bab99dbf 100644
--- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 %struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
@@ -26,8 +27,8 @@ entry:
 ; THUMB: t2
   %addr = alloca i32*, align 4
   store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
-; ARM: movw r1, #1148
-; ARM: add r0, r0, r1
+; ARM: movw [[R:r[0-9]+]], #1148
+; ARM: add r0, r{{[0-9]+}}, [[R]]
 ; THUMB: addw r0, r0, #1148
   %0 = load i32** %addr, align 4
   ret i32* %0
diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll
new file mode 100644
index 000000000000..9c9a18858289
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-align.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+
+; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; Check unaligned stores
+%struct.anon = type <{ float }>
+
+@a = common global %struct.anon* null, align 4
+
+define void @unaligned_store(float %x, float %y) nounwind {
+entry:
+; ARM: @unaligned_store
+; ARM: vmov r1, s0
+; ARM: str r1, [r0]
+
+; THUMB: @unaligned_store
+; THUMB: vmov r1, s0
+; THUMB: str r1, [r0]
+
+  %add = fadd float %x, %y
+  %0 = load %struct.anon** @a, align 4
+  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+  store float %add, float* %x1, align 1
+  ret void
+}
+
+; Doublewords require only word-alignment.
+; rdar://10528060
+%struct.anon.0 = type { double }
+
+@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
+
+define void @word_aligned_f64_store(double %a, double %b) nounwind {
+entry:
+; ARM: @word_aligned_f64_store
+; THUMB: @word_aligned_f64_store
+  %add = fadd double %a, %b
+  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+; ARM: vstr d16, [r0]
+; THUMB: vstr d16, [r0]
+  ret void
+}
+
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @unaligned_f32_load(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @unaligned_f32_load
+; THUMB: @unaligned_f32_load
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r[[R:[0-9]+]], [r0, #2]
+; ARM: vmov s0, r[[R]]
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r[[R:[0-9]+]], [r0, #2]
+; THUMB: vmov s0, r[[R]]
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
+
+define void @unaligned_i16_store(i16 %x, i16* %y) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i16_store
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+
+; THUMB-STRICT-ALIGN: @unaligned_i16_store
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+
+  store i16 %x, i16* %y, align 1
+  ret void
+}
+
+define i16 @unaligned_i16_load(i16* %x) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i16_load
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+
+; THUMB-STRICT-ALIGN: @unaligned_i16_load
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+
+  %0 = load i16* %x, align 1
+  ret i16 %0
+}
+
+define void @unaligned_i32_store(i32 %x, i32* %y) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i32_store
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+
+; THUMB-STRICT-ALIGN: @unaligned_i32_store
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+
+  store i32 %x, i32* %y, align 1
+  ret void
+}
+
+define i32 @unaligned_i32_load(i32* %x) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i32_load
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+
+; THUMB-STRICT-ALIGN: @unaligned_i32_load
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+
+  %0 = load i32* %x, align 1
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
index 723383e04b8e..e1a2a4f33835 100644
--- a/test/CodeGen/ARM/fast-isel-binary.ll
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test add with non-legal types
 
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 4e6efd248997..2e28b08fc8d6 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,14 +1,15 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
 entry:
-; THUMB: t1:
-; ARM: t1:
+; THUMB-LABEL: t1:
+; ARM-LABEL: t1:
   %x = add i32 %a, %b  
   br i1 1, label %if.then, label %if.else
-; THUMB-NOT: b LBB0_1
-; ARM-NOT:  b LBB0_1
+; THUMB-NOT: b {{\.?}}LBB0_1
+; ARM-NOT:  b {{\.?}}LBB0_1
 
 if.then:                                          ; preds = %entry
   call void @foo1()
@@ -16,8 +17,8 @@ if.then:                                          ; preds = %entry
 
 if.else:                                          ; preds = %entry
   br i1 0, label %if.then2, label %if.else3
-; THUMB: b LBB0_4
-; ARM:  b LBB0_4
+; THUMB: b {{\.?}}LBB0_4
+; ARM:  b {{\.?}}LBB0_4
 
 if.then2:                                         ; preds = %if.else
   call void @foo2()
@@ -26,8 +27,8 @@ if.then2:                                         ; preds = %if.else
 if.else3:                                         ; preds = %if.else
   %y = sub i32 %a, %b
   br i1 1, label %if.then5, label %if.end
-; THUMB-NOT: b LBB0_5
-; ARM-NOT:  b LBB0_5
+; THUMB-NOT: b {{\.?}}LBB0_5
+; ARM-NOT:  b {{\.?}}LBB0_5
 
 if.then5:                                         ; preds = %if.else3
   call void @foo1()
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
index a0aba694e43c..3b9d4652b755 100644
--- a/test/CodeGen/ARM/fast-isel-br-phi.ll
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
 
 ; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
 ; non-legal integer types (i.e., i1, i8, i16).
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
index b6f201728c2b..da829e929ef0 100644
--- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Fast-isel can't handle non-double multi-reg retvals.
 ; This test just check to make sure we don't hit the assert in FinishCall.
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index b6c9098613fe..917a15d28bd7 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,9 +1,18 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+
+; XFAIL: vg_leak
+
+; Note that some of these tests assume that relocations are either
+; movw/movt or constant pool loads. Different platforms will select
+; different approaches.
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
@@ -44,9 +53,9 @@ define void @foo(i8 %a, i16 %b) nounwind {
 ; THUMB: sxtb	r2, r1
 ; THUMB: mov r0, r2
   %2 = call i32 @t1(i8 signext %a)
-; ARM: uxtb	r2, r1
+; ARM: and	r2, r1, #255
 ; ARM: mov r0, r2
-; THUMB: uxtb	r2, r1
+; THUMB: and	r2, r1, #255
 ; THUMB: mov r0, r2
   %3 = call i32 @t2(i8 zeroext %a)
 ; ARM: sxth	r2, r1
@@ -85,56 +94,56 @@ declare signext i8 @t7();
 declare zeroext i8 @t8();
 declare zeroext i1 @t9();
 
-define i32 @t10(i32 %argc, i8** nocapture %argv) {
+define i32 @t10() {
 entry:
 ; ARM: @t10
-; ARM: movw r0, #0
-; ARM: movw r1, #248
-; ARM: movw r2, #187
-; ARM: movw r3, #28
-; ARM: movw r9, #40
-; ARM: movw r12, #186
-; ARM: uxtb r0, r0
-; ARM: uxtb r1, r1
-; ARM: uxtb r2, r2
-; ARM: uxtb r3, r3
-; ARM: uxtb r9, r9
-; ARM: str r9, [sp]
-; ARM: uxtb r9, r12
-; ARM: str r9, [sp, #4]
-; ARM: bl _bar
+; ARM: movw [[R0:l?r[0-9]*]], #0
+; ARM: movw [[R1:l?r[0-9]*]], #248
+; ARM: movw [[R2:l?r[0-9]*]], #187
+; ARM: movw [[R3:l?r[0-9]*]], #28
+; ARM: movw [[R4:l?r[0-9]*]], #40
+; ARM: movw [[R5:l?r[0-9]*]], #186
+; ARM: and [[R0]], [[R0]], #255
+; ARM: and [[R1]], [[R1]], #255
+; ARM: and [[R2]], [[R2]], #255
+; ARM: and [[R3]], [[R3]], #255
+; ARM: and [[R4]], [[R4]], #255
+; ARM: str [[R4]], [sp]
+; ARM: and [[R4]], [[R5]], #255
+; ARM: str [[R4]], [sp, #4]
+; ARM: bl {{_?}}bar
 ; ARM-LONG: @t10
-; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
-; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
-; ARM-LONG: ldr lr, [lr]
-; ARM-LONG: blx lr
+; ARM-LONG: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; ARM-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; ARM-LONG: ldr [[R]], {{\[}}[[R]]{{\]}}
+; ARM-LONG: blx [[R]]
 ; THUMB: @t10
-; THUMB: movs r0, #0
-; THUMB: movt r0, #0
-; THUMB: movs r1, #248
-; THUMB: movt r1, #0
-; THUMB: movs r2, #187
-; THUMB: movt r2, #0
-; THUMB: movs r3, #28
-; THUMB: movt r3, #0
-; THUMB: movw r9, #40
-; THUMB: movt r9, #0
-; THUMB: movw r12, #186
-; THUMB: movt r12, #0
-; THUMB: uxtb r0, r0
-; THUMB: uxtb r1, r1
-; THUMB: uxtb r2, r2
-; THUMB: uxtb r3, r3
-; THUMB: uxtb.w r9, r9
-; THUMB: str.w r9, [sp]
-; THUMB: uxtb.w r9, r12
-; THUMB: str.w r9, [sp, #4]
-; THUMB: bl _bar
+; THUMB: movs [[R0:l?r[0-9]*]], #0
+; THUMB: movt [[R0]], #0
+; THUMB: movs [[R1:l?r[0-9]*]], #248
+; THUMB: movt [[R1]], #0
+; THUMB: movs [[R2:l?r[0-9]*]], #187
+; THUMB: movt [[R2]], #0
+; THUMB: movs [[R3:l?r[0-9]*]], #28
+; THUMB: movt [[R3]], #0
+; THUMB: movw [[R4:l?r[0-9]*]], #40
+; THUMB: movt [[R4]], #0
+; THUMB: movw [[R5:l?r[0-9]*]], #186
+; THUMB: movt [[R5]], #0
+; THUMB: and [[R0]], [[R0]], #255
+; THUMB: and [[R1]], [[R1]], #255
+; THUMB: and [[R2]], [[R2]], #255
+; THUMB: and [[R3]], [[R3]], #255
+; THUMB: and [[R4]], [[R4]], #255
+; THUMB: str.w [[R4]], [sp]
+; THUMB: and [[R4]], [[R5]], #255
+; THUMB: str.w [[R4]], [sp, #4]
+; THUMB: bl {{_?}}bar
 ; THUMB-LONG: @t10
-; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
-; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
-; THUMB-LONG: ldr.w lr, [lr]
-; THUMB-LONG: blx lr
+; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}}
+; THUMB-LONG: blx [[R]]
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
   ret i32 0
 }
@@ -147,12 +156,12 @@ define i32 @bar0(i32 %i) nounwind {
 
 define void @foo3() uwtable {
 ; ARM: movw    r0, #0
-; ARM: movw    r1, :lower16:_bar0
-; ARM: movt    r1, :upper16:_bar0
+; ARM: {{(movw r1, :lower16:_?bar0)|(ldr r1, .LCPI)}}
+; ARM: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
 ; ARM: blx     r1
 ; THUMB: movs    r0, #0
-; THUMB: movw    r1, :lower16:_bar0
-; THUMB: movt    r1, :upper16:_bar0
+; THUMB: {{(movw r1, :lower16:_?bar0)|(ldr.n r1, .LCPI)}}
+; THUMB: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
 ; THUMB: blx     r1
   %fptr = alloca i32 (i32)*, align 8
   store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
@@ -164,66 +173,23 @@ define void @foo3() uwtable {
 define i32 @LibCall(i32 %a, i32 %b) {
 entry:
 ; ARM: LibCall
-; ARM: bl ___udivsi3
+; ARM: bl {{___udivsi3|__aeabi_uidiv}}
 ; ARM-LONG: LibCall
-; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
-; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr r2, .LCPI)}}
+; ARM-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r2, [r2]
 ; ARM-LONG: blx r2
 ; THUMB: LibCall
-; THUMB: bl ___udivsi3
+; THUMB: bl {{___udivsi3|__aeabi_uidiv}}
 ; THUMB-LONG: LibCall
-; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
-; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr.n r2, .LCPI)}}
+; THUMB-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
 ; THUMB-LONG: ldr r2, [r2]
 ; THUMB-LONG: blx r2
         %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
-define i32 @VarArg() nounwind {
-entry:
-  %i = alloca i32, align 4
-  %j = alloca i32, align 4
-  %k = alloca i32, align 4
-  %m = alloca i32, align 4
-  %n = alloca i32, align 4
-  %tmp = alloca i32, align 4
-  %0 = load i32* %i, align 4
-  %1 = load i32* %j, align 4
-  %2 = load i32* %k, align 4
-  %3 = load i32* %m, align 4
-  %4 = load i32* %n, align 4
-; ARM: VarArg
-; ARM: mov r7, sp
-; ARM: movw r0, #5
-; ARM: ldr r1, [r7, #-4]
-; ARM: ldr r2, [r7, #-8]
-; ARM: ldr r3, [r7, #-12]
-; ARM: ldr r9, [sp, #16]
-; ARM: ldr r12, [sp, #12]
-; ARM: str r9, [sp]
-; ARM: str r12, [sp, #4]
-; ARM: bl _CallVariadic
-; THUMB: mov r7, sp
-; THUMB: movs r0, #5
-; THUMB: movt r0, #0
-; THUMB: ldr r1, [sp, #28]
-; THUMB: ldr r2, [sp, #24]
-; THUMB: ldr r3, [sp, #20]
-; THUMB: ldr.w r9, [sp, #16]
-; THUMB: ldr.w r12, [sp, #12]
-; THUMB: str.w r9, [sp]
-; THUMB: str.w r12, [sp, #4]
-; THUMB: bl _CallVariadic
-  %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
-  store i32 %call, i32* %tmp, align 4
-  %5 = load i32* %tmp, align 4
-  ret i32 %5
-}
-
-declare i32 @CallVariadic(i32, ...)
-
 ; Test fastcc
 
 define fastcc void @fast_callee(float %i) ssp {
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 660156aa48bd..55baf488a425 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 define void @t1a(float %a) uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 686ccad029d8..5983493a818b 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test sitofp
 
@@ -130,11 +131,11 @@ entry:
 define void @uitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ARM: uitofp_single_i8
-; ARM: uxtb r0, r0
+; ARM: and r0, r0, #255
 ; ARM: vmov s0, r0
 ; ARM: vcvt.f32.u32 s0, s0
 ; THUMB: uitofp_single_i8
-; THUMB: uxtb r0, r0
+; THUMB: and r0, r0, #255
 ; THUMB: vmov s0, r0
 ; THUMB: vcvt.f32.u32 s0, s0
   %b.addr = alloca float, align 4
@@ -176,11 +177,11 @@ entry:
 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ARM: uitofp_double_i8
-; ARM: uxtb r0, r0
+; ARM: and r0, r0, #255
 ; ARM: vmov s0, r0
 ; ARM: vcvt.f64.u32 d16, s0
 ; THUMB: uitofp_double_i8
-; THUMB: uxtb r0, r0
+; THUMB: and r0, r0, #255
 ; THUMB: vmov s0, r0
 ; THUMB: vcvt.f64.u32 d16, s0
   %b.addr = alloca double, align 8
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
index 8fb4b66b7dd4..ec9cf8d95019 100644
--- a/test/CodeGen/ARM/fast-isel-crash.ll
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi
 
 %union.anon = type { <16 x i32> }
 
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
index f245168a8e30..d606877673dc 100644
--- a/test/CodeGen/ARM/fast-isel-crash2.ll
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi
 ; rdar://9515076
 ; (Make sure this doesn't crash.)
 
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 3a943d854b4a..5e6666c47d3e 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
diff --git a/test/CodeGen/ARM/fast-isel-ext.ll b/test/CodeGen/ARM/fast-isel-ext.ll
new file mode 100644
index 000000000000..de0dd1917eb7
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ext.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
+
+; Can't test pre-ARMv6 Thumb because ARM FastISel currently only supports
+; Thumb2. The ARMFastISel::ARMEmitIntExt code should work for Thumb by always
+; using two shifts.
+
+; Note that lsl, asr and lsr in Thumb are all encoded as 16-bit instructions
+; and therefore must set flags. {{s?}} below denotes this, instead of
+; duplicating tests.
+
+; zext
+
+define i8 @zext_1_8(i1 %a) nounwind ssp {
+; v7-LABEL: zext_1_8:
+; v7: and r0, r0, #1
+; prev6-LABEL: zext_1_8:
+; prev6: and r0, r0, #1
+  %r = zext i1 %a to i8
+  ret i8 %r
+}
+
+define i16 @zext_1_16(i1 %a) nounwind ssp {
+; v7-LABEL: zext_1_16:
+; v7: and r0, r0, #1
+; prev6-LABEL: zext_1_16:
+; prev6: and r0, r0, #1
+  %r = zext i1 %a to i16
+  ret i16 %r
+}
+
+define i32 @zext_1_32(i1 %a) nounwind ssp {
+; v7-LABEL: zext_1_32:
+; v7: and r0, r0, #1
+; prev6-LABEL: zext_1_32:
+; prev6: and r0, r0, #1
+  %r = zext i1 %a to i32
+  ret i32 %r
+}
+
+define i16 @zext_8_16(i8 %a) nounwind ssp {
+; v7-LABEL: zext_8_16:
+; v7: and r0, r0, #255
+; prev6-LABEL: zext_8_16:
+; prev6: and r0, r0, #255
+  %r = zext i8 %a to i16
+  ret i16 %r
+}
+
+define i32 @zext_8_32(i8 %a) nounwind ssp {
+; v7-LABEL: zext_8_32:
+; v7: and r0, r0, #255
+; prev6-LABEL: zext_8_32:
+; prev6: and r0, r0, #255
+  %r = zext i8 %a to i32
+  ret i32 %r
+}
+
+define i32 @zext_16_32(i16 %a) nounwind ssp {
+; v7-LABEL: zext_16_32:
+; v7: uxth r0, r0
+; prev6-LABEL: zext_16_32:
+; prev6: lsl{{s?}} r0, r0, #16
+; prev6: lsr{{s?}} r0, r0, #16
+  %r = zext i16 %a to i32
+  ret i32 %r
+}
+
+; sext
+
+define i8 @sext_1_8(i1 %a) nounwind ssp {
+; v7-LABEL: sext_1_8:
+; v7: lsl{{s?}} r0, r0, #31
+; v7: asr{{s?}} r0, r0, #31
+; prev6-LABEL: sext_1_8:
+; prev6: lsl{{s?}} r0, r0, #31
+; prev6: asr{{s?}} r0, r0, #31
+  %r = sext i1 %a to i8
+  ret i8 %r
+}
+
+define i16 @sext_1_16(i1 %a) nounwind ssp {
+; v7-LABEL: sext_1_16:
+; v7: lsl{{s?}} r0, r0, #31
+; v7: asr{{s?}} r0, r0, #31
+; prev6-LABEL: sext_1_16:
+; prev6: lsl{{s?}} r0, r0, #31
+; prev6: asr{{s?}} r0, r0, #31
+  %r = sext i1 %a to i16
+  ret i16 %r
+}
+
+define i32 @sext_1_32(i1 %a) nounwind ssp {
+; v7-LABEL: sext_1_32:
+; v7: lsl{{s?}} r0, r0, #31
+; v7: asr{{s?}} r0, r0, #31
+; prev6-LABEL: sext_1_32:
+; prev6: lsl{{s?}} r0, r0, #31
+; prev6: asr{{s?}} r0, r0, #31
+  %r = sext i1 %a to i32
+  ret i32 %r
+}
+
+define i16 @sext_8_16(i8 %a) nounwind ssp {
+; v7-LABEL: sext_8_16:
+; v7: sxtb r0, r0
+; prev6-LABEL: sext_8_16:
+; prev6: lsl{{s?}} r0, r0, #24
+; prev6: asr{{s?}} r0, r0, #24
+  %r = sext i8 %a to i16
+  ret i16 %r
+}
+
+define i32 @sext_8_32(i8 %a) nounwind ssp {
+; v7-LABEL: sext_8_32:
+; v7: sxtb r0, r0
+; prev6-LABEL: sext_8_32:
+; prev6: lsl{{s?}} r0, r0, #24
+; prev6: asr{{s?}} r0, r0, #24
+  %r = sext i8 %a to i32
+  ret i32 %r
+}
+
+define i32 @sext_16_32(i16 %a) nounwind ssp {
+; v7-LABEL: sext_16_32:
+; v7: sxth r0, r0
+; prev6-LABEL: sext_16_32:
+; prev6: lsl{{s?}} r0, r0, #16
+; prev6: asr{{s?}} r0, r0, #16
+  %r = sext i16 %a to i32
+  ret i32 %r
+}
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
index 7a65295f01b6..e8ed8cbf34e9 100644
--- a/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 @a = global i8 1, align 1
@@ -8,9 +9,11 @@ define void @t1() nounwind uwtable ssp {
 ; ARM: t1
 ; ARM: ldrb
 ; ARM-NOT: uxtb
+; ARM-NOT: and{{.*}}, #255
 ; THUMB: t1
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
+; THUMB-NOT: and{{.*}}, #255
   %1 = load i8* @a, align 1
   call void @foo1(i8 zeroext %1)
   ret void
@@ -35,9 +38,11 @@ define i32 @t3() nounwind uwtable ssp {
 ; ARM: t3
 ; ARM: ldrb
 ; ARM-NOT: uxtb
+; ARM-NOT: and{{.*}}, #255
 ; THUMB: t3
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
+; THUMB-NOT: and{{.*}}, #255
   %1 = load i8* @a, align 1
   %2 = zext i8 %1 to i32
   ret i32 %2
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
index c256e73ab98c..8542bb5e27d2 100644
--- a/test/CodeGen/ARM/fast-isel-frameaddr.ll
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -5,22 +5,22 @@
 
 define i8* @frameaddr_index0() nounwind {
 entry:
-; DARWIN-ARM: frameaddr_index0:
+; DARWIN-ARM-LABEL: frameaddr_index0:
 ; DARWIN-ARM: push {r7}
 ; DARWIN-ARM: mov r7, sp
 ; DARWIN-ARM: mov r0, r7
 
-; DARWIN-THUMB2: frameaddr_index0:
+; DARWIN-THUMB2-LABEL: frameaddr_index0:
 ; DARWIN-THUMB2: str r7, [sp, #-4]!
 ; DARWIN-THUMB2: mov r7, sp
 ; DARWIN-THUMB2: mov r0, r7
 
-; LINUX-ARM: frameaddr_index0:
+; LINUX-ARM-LABEL: frameaddr_index0:
 ; LINUX-ARM: push {r11}
 ; LINUX-ARM: mov r11, sp
 ; LINUX-ARM: mov r0, r11
 
-; LINUX-THUMB2: frameaddr_index0:
+; LINUX-THUMB2-LABEL: frameaddr_index0:
 ; LINUX-THUMB2: str r7, [sp, #-4]!
 ; LINUX-THUMB2: mov r7, sp
 ; LINUX-THUMB2: mov r0, r7
@@ -31,25 +31,24 @@ entry:
 
 define i8* @frameaddr_index1() nounwind {
 entry:
-; DARWIN-ARM: frameaddr_index1:
+; DARWIN-ARM-LABEL: frameaddr_index1:
 ; DARWIN-ARM: push {r7}
 ; DARWIN-ARM: mov r7, sp
 ; DARWIN-ARM: mov r0, r7
 ; DARWIN-ARM: ldr r0, [r0]
 
-; DARWIN-THUMB2: frameaddr_index1:
+; DARWIN-THUMB2-LABEL: frameaddr_index1:
 ; DARWIN-THUMB2: str r7, [sp, #-4]!
 ; DARWIN-THUMB2: mov r7, sp
 ; DARWIN-THUMB2: mov r0, r7
 ; DARWIN-THUMB2: ldr r0, [r0]
 
-; LINUX-ARM: frameaddr_index1:
+; LINUX-ARM-LABEL: frameaddr_index1:
 ; LINUX-ARM: push {r11}
 ; LINUX-ARM: mov r11, sp
-; LINUX-ARM: mov r0, r11
-; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r11]
 
-; LINUX-THUMB2: frameaddr_index1:
+; LINUX-THUMB2-LABEL: frameaddr_index1:
 ; LINUX-THUMB2: str r7, [sp, #-4]!
 ; LINUX-THUMB2: mov r7, sp
 ; LINUX-THUMB2: mov r0, r7
@@ -61,7 +60,7 @@ entry:
 
 define i8* @frameaddr_index3() nounwind {
 entry:
-; DARWIN-ARM: frameaddr_index3:
+; DARWIN-ARM-LABEL: frameaddr_index3:
 ; DARWIN-ARM: push {r7}
 ; DARWIN-ARM: mov r7, sp
 ; DARWIN-ARM: mov r0, r7
@@ -69,7 +68,7 @@ entry:
 ; DARWIN-ARM: ldr r0, [r0]
 ; DARWIN-ARM: ldr r0, [r0]
 
-; DARWIN-THUMB2: frameaddr_index3:
+; DARWIN-THUMB2-LABEL: frameaddr_index3:
 ; DARWIN-THUMB2: str r7, [sp, #-4]!
 ; DARWIN-THUMB2: mov r7, sp
 ; DARWIN-THUMB2: mov r0, r7
@@ -77,15 +76,14 @@ entry:
 ; DARWIN-THUMB2: ldr r0, [r0]
 ; DARWIN-THUMB2: ldr r0, [r0]
 
-; LINUX-ARM: frameaddr_index3:
+; LINUX-ARM-LABEL: frameaddr_index3:
 ; LINUX-ARM: push {r11}
 ; LINUX-ARM: mov r11, sp
-; LINUX-ARM: mov r0, r11
-; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r11]
 ; LINUX-ARM: ldr r0, [r0]
 ; LINUX-ARM: ldr r0, [r0]
 
-; LINUX-THUMB2: frameaddr_index3:
+; LINUX-THUMB2-LABEL: frameaddr_index3:
 ; LINUX-THUMB2: str r7, [sp, #-4]!
 ; LINUX-THUMB2: mov r7, sp
 ; LINUX-THUMB2: mov r0, r7
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 8357ed5c549c..85f449e3d71d 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 define i32 @icmp_i16_signed(i16 %a, i16 %b) nounwind {
 entry:
@@ -49,12 +50,12 @@ entry:
 define i32 @icmp_i8_unsigned(i8 %a, i8 %b) nounwind {
 entry:
 ; ARM: icmp_i8_unsigned
-; ARM: uxtb r0, r0
-; ARM: uxtb r1, r1
+; ARM: and r0, r0, #255
+; ARM: and r1, r1, #255
 ; ARM: cmp r0, r1
 ; THUMB: icmp_i8_unsigned
-; THUMB: uxtb r0, r0
-; THUMB: uxtb r1, r1
+; THUMB: and r0, r0, #255
+; THUMB: and r1, r1, #255
 ; THUMB: cmp r0, r1
   %cmp = icmp ugt i8 %a, %b
   %conv2 = zext i1 %cmp to i32
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
index ebc0e8426d55..2456ef442040 100644
--- a/test/CodeGen/ARM/fast-isel-indirectbr.ll
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1(i8* %x) {
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 48105dd3893b..b08b72baa61e 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,35 +1,43 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG
+
+; XFAIL: vg_leak
+
+; Note that some of these tests assume that relocations are either
+; movw/movt or constant pool loads. Different platforms will select
+; different approaches.
 
 @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
 @temp = common global [60 x i8] zeroinitializer, align 1
 
 define void @t1() nounwind ssp {
 ; ARM: t1
-; ARM: movw r0, :lower16:_message1
-; ARM: movt r0, :upper16:_message1
+; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; ARM: add r0, r0, #5
 ; ARM: movw r1, #64
 ; ARM: movw r2, #10
-; ARM: uxtb r1, r1
-; ARM: bl _memset
+; ARM: and r1, r1, #255
+; ARM: bl {{_?}}memset
 ; ARM-LONG: t1
-; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
-; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; ARM-LONG: {{(movw r3, :lower16:L_memset\$non_lazy_ptr)|(ldr r3, .LCPI)}}
+; ARM-LONG: {{(movt r3, :upper16:L_memset\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t1
-; THUMB: movw r0, :lower16:_message1
-; THUMB: movt r0, :upper16:_message1
+; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; THUMB: adds r0, #5
 ; THUMB: movs r1, #64
 ; THUMB: movt r1, #0
 ; THUMB: movs r2, #10
 ; THUMB: movt r2, #0
-; THUMB: uxtb r1, r1
-; THUMB: bl _memset
+; THUMB: and r1, r1, #255
+; THUMB: bl {{_?}}memset
 ; THUMB-LONG: t1
 ; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
@@ -43,31 +51,33 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @t2() nounwind ssp {
 ; ARM: t2
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
 ; ARM: movw r2, #17
-; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; ARM: mov r0, r1
-; ARM: ldr r1, [sp]                @ 4-byte Reload
-; ARM: bl _memcpy
+; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
+; ARM: bl {{_?}}memcpy
 ; ARM-LONG: t2
-; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
-; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; ARM-LONG: {{(movw r3, :lower16:L_memcpy\$non_lazy_ptr)|(ldr r3, .LCPI)}}
+; ARM-LONG: {{(movt r3, :upper16:L_memcpy\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t2
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #17
 ; THUMB: movt r2, #0
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
-; THUMB: bl _memcpy
+; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: bl {{_?}}memcpy
 ; THUMB-LONG: t2
 ; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
@@ -81,29 +91,31 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t3() nounwind ssp {
 ; ARM: t3
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
 ; ARM: movw r2, #10
 ; ARM: mov r0, r1
-; ARM: bl _memmove
+; ARM: bl {{_?}}memmove
 ; ARM-LONG: t3
-; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
-; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; ARM-LONG: {{(movw r3, :lower16:L_memmove\$non_lazy_ptr)|(ldr r3, .LCPI)}}
+; ARM-LONG: {{(movt r3, :upper16:L_memmove\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t3
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #10
 ; THUMB: movt r2, #0
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
-; THUMB: bl _memmove
+; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: bl {{_?}}memmove
 ; THUMB-LONG: t3
 ; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
@@ -115,8 +127,8 @@ define void @t3() nounwind ssp {
 
 define void @t4() nounwind ssp {
 ; ARM: t4
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldr r1, [r0, #16]
 ; ARM: str r1, [r0, #4]
@@ -126,8 +138,8 @@ define void @t4() nounwind ssp {
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
 ; THUMB: t4
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldr r1, [r0, #16]
 ; THUMB: str r1, [r0, #4]
@@ -144,8 +156,8 @@ declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t5() nounwind ssp {
 ; ARM: t5
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldrh r1, [r0, #16]
 ; ARM: strh r1, [r0, #4]
@@ -159,8 +171,8 @@ define void @t5() nounwind ssp {
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
 ; THUMB: t5
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldrh r1, [r0, #16]
 ; THUMB: strh r1, [r0, #4]
@@ -179,8 +191,8 @@ define void @t5() nounwind ssp {
 
 define void @t6() nounwind ssp {
 ; ARM: t6
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldrb r1, [r0, #16]
 ; ARM: strb r1, [r0, #4]
@@ -204,8 +216,8 @@ define void @t6() nounwind ssp {
 ; ARM: strb r1, [r0, #13]
 ; ARM: bx lr
 ; THUMB: t6
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldrb r1, [r0, #16]
 ; THUMB: strb r1, [r0, #4]
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
index dfb8c53735a3..cf294bcfbece 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index 2a88678da767..d9c9cc459c7e 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t1
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
-  %0 = load i32* %add.ptr, align 4, !tbaa !0
+  %0 = load i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-4]
   ret i32 %0
 }
@@ -13,7 +13,7 @@ define i32 @t2(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t2
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
-  %0 = load i32* %add.ptr, align 4, !tbaa !0
+  %0 = load i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-252]
   ret i32 %0
 }
@@ -22,7 +22,7 @@ define i32 @t3(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t3
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
-  %0 = load i32* %add.ptr, align 4, !tbaa !0
+  %0 = load i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0]
   ret i32 %0
 }
@@ -31,7 +31,7 @@ define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t4
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
-  %0 = load i16* %add.ptr, align 2, !tbaa !3
+  %0 = load i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
   ret i16 %0
 }
@@ -40,7 +40,7 @@ define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t5
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
-  %0 = load i16* %add.ptr, align 2, !tbaa !3
+  %0 = load i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
   ret i16 %0
 }
@@ -49,7 +49,7 @@ define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t6
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
-  %0 = load i16* %add.ptr, align 2, !tbaa !3
+  %0 = load i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0]
   ret i16 %0
 }
@@ -58,7 +58,7 @@ define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t7
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
-  %0 = load i8* %add.ptr, align 1, !tbaa !1
+  %0 = load i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
   ret i8 %0
 }
@@ -67,7 +67,7 @@ define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t8
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
-  %0 = load i8* %add.ptr, align 1, !tbaa !1
+  %0 = load i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
   ret i8 %0
 }
@@ -76,7 +76,7 @@ define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t9
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
-  %0 = load i8* %add.ptr, align 1, !tbaa !1
+  %0 = load i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0]
   ret i8 %0
 }
@@ -85,7 +85,7 @@ define void @t10(i32* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t10
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
-  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+  store i32 0, i32* %add.ptr, align 4
 ; THUMB: str r{{[0-9]}}, [r0, #-4]
   ret void
 }
@@ -94,7 +94,7 @@ define void @t11(i32* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t11
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
-  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+  store i32 0, i32* %add.ptr, align 4
 ; THUMB: str r{{[0-9]}}, [r0, #-252]
   ret void
 }
@@ -103,7 +103,7 @@ define void @t12(i32* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t12
   %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
-  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+  store i32 0, i32* %add.ptr, align 4
 ; THUMB: str r{{[0-9]}}, [r0]
   ret void
 }
@@ -112,7 +112,7 @@ define void @t13(i16* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t13
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
-  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+  store i16 0, i16* %add.ptr, align 2
 ; THUMB: strh r{{[0-9]}}, [r0, #-2]
   ret void
 }
@@ -121,7 +121,7 @@ define void @t14(i16* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t14
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
-  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+  store i16 0, i16* %add.ptr, align 2
 ; THUMB: strh r{{[0-9]}}, [r0, #-254]
   ret void
 }
@@ -130,7 +130,7 @@ define void @t15(i16* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t15
   %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
-  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+  store i16 0, i16* %add.ptr, align 2
 ; THUMB: strh r{{[0-9]}}, [r0]
   ret void
 }
@@ -139,7 +139,7 @@ define void @t16(i8* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t16
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
-  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+  store i8 0, i8* %add.ptr, align 1
 ; THUMB: strb r{{[0-9]}}, [r0, #-1]
   ret void
 }
@@ -148,7 +148,7 @@ define void @t17(i8* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t17
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
-  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+  store i8 0, i8* %add.ptr, align 1
 ; THUMB: strb r{{[0-9]}}, [r0, #-255]
   ret void
 }
@@ -157,12 +157,7 @@ define void @t18(i8* nocapture %ptr) nounwind {
 entry:
 ; THUMB: t18
   %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
-  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+  store i8 0, i8* %add.ptr, align 1
 ; THUMB: strb r{{[0-9]}}, [r0]
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index 0b5267ddc973..c05ea398d72e 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; rdar://10418009
 
 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
diff --git a/test/CodeGen/ARM/fast-isel-load-store-verify.ll b/test/CodeGen/ARM/fast-isel-load-store-verify.ll
new file mode 100644
index 000000000000..710d88b3158c
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-load-store-verify.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ALL
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ALL
+
+; FIXME Add tests for thumbv7, they currently fail MI verification because
+;       of a mismatch in register classes in uses.
+
+; This test verifies that load/store instructions are properly generated,
+; and that they pass MI verification (wasn't the case until 2013-06-08).
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+@c = global i32 4, align 4
+
+; ldr
+
+define i8 @t1() nounwind uwtable ssp {
+; ALL: @t1
+; ALL: ldrb
+; ALL: add
+  %1 = load i8* @a, align 1
+  %2 = add nsw i8 %1, 1
+  ret i8 %2
+}
+
+define i16 @t2() nounwind uwtable ssp {
+; ALL: @t2
+; ALL: ldrh
+; ALL: add
+  %1 = load i16* @b, align 2
+  %2 = add nsw i16 %1, 1
+  ret i16 %2
+}
+
+define i32 @t3() nounwind uwtable ssp {
+; ALL: @t3
+; ALL: ldr
+; ALL: add
+  %1 = load i32* @c, align 4
+  %2 = add nsw i32 %1, 1
+  ret i32 %2
+}
+
+; str
+
+define void @t4(i8 %v) nounwind uwtable ssp {
+; ALL: @t4
+; ALL: add
+; ALL: strb
+  %1 = add nsw i8 %v, 1
+  store i8 %1, i8* @a, align 1
+  ret void
+}
+
+define void @t5(i16 %v) nounwind uwtable ssp {
+; ALL: @t5
+; ALL: add
+; ALL: strh
+  %1 = add nsw i16 %v, 1
+  store i16 %1, i16* @b, align 2
+  ret void
+}
+
+define void @t6(i32 %v) nounwind uwtable ssp {
+; ALL: @t6
+; ALL: add
+; ALL: str
+  %1 = add nsw i32 %v, 1
+  store i32 %1, i32* @c, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index b180e439dd6f..0bc9395e2d78 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; rdar://10412592
 
 ; Note: The Thumb code is being generated by the target-independent selector.
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index 867d53f973db..838c103e7c09 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
 
 @g = global i32 0, align 4
 
@@ -13,9 +13,9 @@ entry:
 ; THUMB: movt [[reg0]],
 ; THUMB: add  [[reg0]], pc
 ; THUMB-ELF: LoadGV
-; THUMB-ELF: ldr.n r[[reg0:[0-9]+]],
-; THUMB-ELF: ldr.n r[[reg1:[0-9]+]],
-; THUMB-ELF: ldr r[[reg0]], [r[[reg1]], r[[reg0]]]
+; THUMB-ELF: ldr r[[reg0:[0-9]+]],
+; THUMB-ELF: ldr r[[reg1:[0-9]+]],
+; THUMB-ELF: ldr r[[reg0]], [r[[reg0]], r[[reg1]]]
 ; ARM: LoadGV
 ; ARM: ldr [[reg1:r[0-9]+]],
 ; ARM: add [[reg1]], pc, [[reg1]]
@@ -25,6 +25,8 @@ entry:
 ; ARMv7: add  [[reg2]], pc, [[reg2]]
 ; ARMv7-ELF: LoadGV
 ; ARMv7-ELF: ldr r[[reg2:[0-9]+]],
+; ARMv7-ELF: .LPC
+; ARMv7-ELF-NEXT: add r[[reg2]], pc
 ; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
   %tmp = load i32* @g
@@ -41,9 +43,9 @@ entry:
 ; THUMB: add  r[[reg3]], pc
 ; THUMB: ldr  r[[reg3]], [r[[reg3]]]
 ; THUMB-ELF: LoadIndirectSymbol
-; THUMB-ELF: ldr.n r[[reg3:[0-9]+]],
-; THUMB-ELF: ldr.n r[[reg4:[0-9]+]],
-; THUMB-ELF: ldr r[[reg3]], [r[[reg4]], r[[reg3]]]
+; THUMB-ELF: ldr r[[reg3:[0-9]+]],
+; THUMB-ELF: ldr r[[reg4:[0-9]+]],
+; THUMB-ELF: ldr r[[reg3]], [r[[reg3]], r[[reg4]]]
 ; ARM: LoadIndirectSymbol
 ; ARM: ldr [[reg4:r[0-9]+]],
 ; ARM: ldr [[reg4]], [pc, [[reg4]]]
@@ -54,6 +56,8 @@ entry:
 ; ARMv7: ldr  r[[reg5]], [r[[reg5]]]
 ; ARMv7-ELF: LoadIndirectSymbol
 ; ARMv7-ELF: ldr r[[reg5:[0-9]+]],
+; ARMv7-ELF: .LPC
+; ARMv7-ELF-NEXT: add r[[reg5]], pc
 ; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
   %tmp = load i32* @i
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
index 27731def1f57..48f93225b6b8 100644
--- a/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-linux-gnueabi < %s
 
 define i32 @main() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 563880dab0a9..ee150facac96 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -verify-machineinstrs -optimize-regalloc -regalloc=basic < %s
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index 689b169ee32f..8a68309dc831 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 ; Sign-extend of i1 currently not supported by fast-isel
 ;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
@@ -26,7 +27,7 @@ entry:
 define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
 entry:
 ; CHECK: ret3
-; CHECK: uxtb r0, r0
+; CHECK: and r0, r0, #255
 ; CHECK: bx lr
   ret i8 %a
 }
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index b83a73366948..40f88075039e 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv8-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i1 %c) nounwind readnone {
 entry:
@@ -38,15 +40,16 @@ define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
 entry:
 ; ARM: t3
 ; ARM: cmp r0, #0
-; ARM: movne r{{[1-9]}}, r{{[1-9]}}
-; ARM: mov r0, r{{[1-9]}}
+; ARM: movne r2, r1
+; ARM: add r0, r2, r1
 ; THUMB: t3
 ; THUMB: cmp r0, #0
 ; THUMB: it ne
-; THUMB: movne r{{[1-9]}}, r{{[1-9]}}
-; THUMB: mov r0, r{{[1-9]}}
+; THUMB: movne r2, r1
+; THUMB: add.w r0, r2, r1
   %0 = select i1 %c, i32 %a, i32 %b
-  ret i32 %0
+  %1 = add i32 %0, %a
+  ret i32 %1
 }
 
 define i32 @t4(i1 %c) nounwind readnone {
diff --git a/test/CodeGen/ARM/fast-isel-shifter.ll b/test/CodeGen/ARM/fast-isel-shifter.ll
index 111818b289e8..eb4b2b2ce0ae 100644
--- a/test/CodeGen/ARM/fast-isel-shifter.ll
+++ b/test/CodeGen/ARM/fast-isel-shifter.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
 
 define i32 @shl() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index e8759a7fc4ce..93c14a09205e 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
@@ -24,7 +26,7 @@ entry:
   store float 0.000000e+00, float* %ztot, align 4
   store float 1.000000e+00, float* %z, align 4
 ; CHECK-LONG: blx     r
-; CHECK-NORM: bl      _myadd
+; CHECK-NORM: bl      {{_?}}myadd
   call void @myadd(float* %ztot, float* %z)
   ret i32 0
 }
diff --git a/test/CodeGen/ARM/fast-isel-vararg.ll b/test/CodeGen/ARM/fast-isel-vararg.ll
new file mode 100644
index 000000000000..0b7b0bd1c6f0
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @VarArg() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i32, align 4
+  %k = alloca i32, align 4
+  %m = alloca i32, align 4
+  %n = alloca i32, align 4
+  %tmp = alloca i32, align 4
+  %0 = load i32* %i, align 4
+  %1 = load i32* %j, align 4
+  %2 = load i32* %k, align 4
+  %3 = load i32* %m, align 4
+  %4 = load i32* %n, align 4
+; ARM: VarArg
+; ARM: mov [[FP:r[0-9]+]], sp
+; ARM: sub sp, sp, #32
+; ARM: movw r0, #5
+; ARM: ldr r1, {{\[}}[[FP]], #-4]
+; ARM: ldr r2, {{\[}}[[FP]], #-8]
+; ARM: ldr r3, {{\[}}[[FP]], #-12]
+; ARM: ldr [[Ra:r[0-9]+]], [sp, #16]
+; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12]
+; ARM: str [[Ra]], [sp]
+; ARM: str [[Rb]], [sp, #4]
+; ARM: bl {{_?CallVariadic}}
+; THUMB: sub sp, #32
+; THUMB: movs r0, #5
+; THUMB: movt r0, #0
+; THUMB: ldr r1, [sp, #28]
+; THUMB: ldr r2, [sp, #24]
+; THUMB: ldr r3, [sp, #20]
+; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16]
+; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #12]
+; THUMB: str.w {{[a-z0-9]+}}, [sp]
+; THUMB: str.w {{[a-z0-9]+}}, [sp, #4]
+; THUMB: bl {{_?}}CallVariadic
+  %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
+  store i32 %call, i32* %tmp, align 4
+  %5 = load i32* %tmp, align 4
+  ret i32 %5
+}
+
+declare i32 @CallVariadic(i32, ...)
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 41fda4132632..5981cab7dcb1 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,10 +1,9 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
-define i32 @add(i32 %a, i32 %b) nounwind {
+define i32 @test0(i32 %a, i32 %b) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %b.addr = alloca i32, align 4
@@ -28,16 +27,16 @@ br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
 ret void
-; ARM: test1:
+; ARM-LABEL: test1:
 ; ARM: tst r0, #1
-; THUMB: test1:
+; THUMB-LABEL: test1:
 ; THUMB: tst.w r0, #1
 }
 
 ; Check some simple operations with immediates
 define void @test2(i32 %tmp, i32* %ptr) nounwind {
-; THUMB: test2:
-; ARM: test2:
+; THUMB-LABEL: test2:
+; ARM-LABEL: test2:
 
 b1:
   %a = add i32 %tmp, 4096
@@ -65,8 +64,8 @@ b3:
 }
 
 define void @test3(i32 %tmp, i32* %ptr1, i16* %ptr2, i8* %ptr3) nounwind {
-; THUMB: test3:
-; ARM: test3:
+; THUMB-LABEL: test3:
+; ARM-LABEL: test3:
 
 bb1:
   %a1 = trunc i32 %tmp to i16
@@ -82,12 +81,12 @@ bb1:
 
 ; THUMB: and
 ; THUMB: strb
-; THUMB: uxtb
+; THUMB: and{{.*}}, #255
 ; THUMB: strh
 ; THUMB: uxth
 ; ARM: and
 ; ARM: strb
-; ARM: uxtb
+; ARM: and{{.*}}, #255
 ; ARM: strh
 ; ARM: uxth
 
@@ -123,13 +122,13 @@ bb3:
 
 ; THUMB: ldrb
 ; THUMB: ldrh
-; THUMB: uxtb
+; THUMB: and{{.*}}, #255
 ; THUMB: sxth
 ; THUMB: add
 ; THUMB: sub
 ; ARM: ldrb
 ; ARM: ldrh
-; ARM: uxtb
+; ARM: and{{.*}}, #255
 ; ARM: sxth
 ; ARM: add
 ; ARM: sub
@@ -144,82 +143,25 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
+
+; Note that relocations are either movw/movt or constant pool
+; loads. Different platforms will select different approaches.
+
+; THUMB: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldr r1, [r0]
 ; THUMB: adds r1, #1
 ; THUMB: str r1, [r0]
 
-; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
-; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldr r1, [r0]
 ; ARM: add r1, r1, #1
 ; ARM: str r1, [r0]
 }
 
-; Check unaligned stores
-%struct.anon = type <{ float }>
-
-@a = common global %struct.anon* null, align 4
-
-define void @unaligned_store(float %x, float %y) nounwind {
-entry:
-; ARM: @unaligned_store
-; ARM: vmov r1, s0
-; ARM: str r1, [r0]
-
-; THUMB: @unaligned_store
-; THUMB: vmov r1, s0
-; THUMB: str r1, [r0]
-
-  %add = fadd float %x, %y
-  %0 = load %struct.anon** @a, align 4
-  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
-  store float %add, float* %x1, align 1
-  ret void
-}
-
-; Doublewords require only word-alignment.
-; rdar://10528060
-%struct.anon.0 = type { double }
-
-@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
-
-define void @test5(double %a, double %b) nounwind {
-entry:
-; ARM: @test5
-; THUMB: @test5
-  %add = fadd double %a, %b
-  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
-; ARM: vstr d16, [r0]
-; THUMB: vstr d16, [r0]
-  ret void
-}
-
-; Check unaligned loads of floats
-%class.TAlignTest = type <{ i16, float }>
-
-define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
-entry:
-; ARM: @test6
-; THUMB: @test6
-  %0 = alloca %class.TAlignTest*, align 4
-  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
-  %1 = load %class.TAlignTest** %0
-  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
-  %3 = load float* %2, align 1
-  %4 = fcmp une float %3, 0.000000e+00
-; ARM: ldr r0, [r0, #2]
-; ARM: vmov s0, r0
-; ARM: vcmpe.f32 s0, #0
-; THUMB: ldr.w r0, [r0, #2]
-; THUMB: vmov s0, r0
-; THUMB: vcmpe.f32 s0, #0
-  ret i1 %4
-}
-
 ; ARM: @urem_fold
 ; THUMB: @urem_fold
 ; ARM: and r0, r0, #31
@@ -229,10 +171,10 @@ define i32 @urem_fold(i32 %a) nounwind {
   ret i32 %rem
 }
 
-define i32 @test7() noreturn nounwind  {
+define i32 @trap_intrinsic() noreturn nounwind  {
 entry:
-; ARM: @test7
-; THUMB: @test7
+; ARM: @trap_intrinsic
+; THUMB: @trap_intrinsic
 ; ARM: trap
 ; THUMB: trap
   tail call void @llvm.trap( )
@@ -240,67 +182,3 @@ entry:
 }
 
 declare void @llvm.trap() nounwind
-
-define void @unaligned_i16_store(i16 %x, i16* %y) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i16_store
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-
-; THUMB-STRICT-ALIGN: @unaligned_i16_store
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-
-  store i16 %x, i16* %y, align 1
-  ret void
-}
-
-define i16 @unaligned_i16_load(i16* %x) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i16_load
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-
-; THUMB-STRICT-ALIGN: @unaligned_i16_load
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-
-  %0 = load i16* %x, align 1
-  ret i16 %0
-}
-
-define void @unaligned_i32_store(i32 %x, i32* %y) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i32_store
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-
-; THUMB-STRICT-ALIGN: @unaligned_i32_store
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-
-  store i32 %x, i32* %y, align 1
-  ret void
-}
-
-define i32 @unaligned_i32_load(i32* %x) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i32_load
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-
-; THUMB-STRICT-ALIGN: @unaligned_i32_load
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-
-  %0 = load i32* %x, align 1
-  ret i32 %0
-}
diff --git a/test/CodeGen/ARM/fast-tail-call.ll b/test/CodeGen/ARM/fast-tail-call.ll
new file mode 100644
index 000000000000..9fbdc9d24b01
--- /dev/null
+++ b/test/CodeGen/ARM/fast-tail-call.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=thumbv7-linux-gnueabi -O0 -arm-tail-calls < %s | FileCheck %s
+
+; Primarily a non-crash test: Thumbv7 Linux does not have FastISel support,
+; which led (via a convoluted route) to DAG nodes after a TC_RETURN that
+; couldn't possibly work.
+
+declare i8* @g(i8*)
+
+define i8* @f(i8* %a) {
+entry:
+  %0 = tail call i8* @g(i8* %a)
+  ret i8* %0
+; CHECK: b g
+; CHECK-NOT: ldr
+; CHECK-NOT: str
+}
diff --git a/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll b/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
new file mode 100644
index 000000000000..a32ab6d09317
--- /dev/null
+++ b/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
@@ -0,0 +1,18 @@
+; fastisel should not fold add with non-pointer bitwidth
+; sext(a) + sext(b) != sext(a + b)
+; RUN: llc -mtriple=armv7-apple-ios %s -O0 -o - | FileCheck %s
+
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+entry:
+  %ptr.addr = alloca i8*, align 8
+  %add = add i8 64, 64 ; 0x40 + 0x40
+  %0 = load i8** %ptr.addr, align 8
+
+  ; CHECK-LABEL: _gep_promotion:
+  ; CHECK: ldrb {{r[0-9]+}}, {{\[r[0-9]+\]}}
+  %arrayidx = getelementptr inbounds i8* %0, i8 %add
+
+  %1 = load i8* %arrayidx, align 1
+  ret i8 %1
+}
+
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 5511d24cb280..1de057208ce3 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -4,11 +4,11 @@
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
 entry:
-; SOFT: test1:
+; SOFT-LABEL: test1:
 ; SOFT: lsr r1, r1, #31
 ; SOFT: bfi r0, r1, #31, #1
 
-; HARD: test1:
+; HARD-LABEL: test1:
 ; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
 ; HARD: vbsl [[REG1]], d
   %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
@@ -17,11 +17,11 @@ entry:
 
 define double @test2(double %x, double %y) nounwind {
 entry:
-; SOFT: test2:
+; SOFT-LABEL: test2:
 ; SOFT: lsr r2, r3, #31
 ; SOFT: bfi r1, r2, #31, #1
 
-; HARD: test2:
+; HARD-LABEL: test2:
 ; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
 ; HARD: vshl.i64 [[REG2]], [[REG2]], #32
 ; HARD: vbsl [[REG2]], d1, d0
@@ -31,7 +31,7 @@ entry:
 
 define double @test3(double %x, double %y, double %z) nounwind {
 entry:
-; SOFT: test3:
+; SOFT-LABEL: test3:
 ; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000
 ; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
 ; SOFT: vbsl [[REG3]],
@@ -43,7 +43,7 @@ entry:
 ; rdar://9287902
 define float @test4() nounwind {
 entry:
-; SOFT: test4:
+; SOFT-LABEL: test4:
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
 ; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 8f13f395e078..a4fecfe14588 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -9,15 +9,15 @@ entry:
 	ret float %0
 }
 
-; VFP2: test:
+; VFP2-LABEL: test:
 ; VFP2: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
-; NFP1: test:
+; NFP1-LABEL: test:
 ; NFP1: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
-; NFP0: test:
+; NFP0-LABEL: test:
 ; NFP0: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
-; CORTEXA8: test:
+; CORTEXA8-LABEL: test:
 ; CORTEXA8: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
-; CORTEXA9: test:
+; CORTEXA9-LABEL: test:
 ; CORTEXA9: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll
index b63f609e755a..f2486c65d3a2 100644
--- a/test/CodeGen/ARM/fmacs.ll
+++ b/test/CodeGen/ARM/fmacs.ll
@@ -6,13 +6,13 @@
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
-; VFP2: t1:
+; VFP2-LABEL: t1:
 ; VFP2: vmla.f32
 
-; NEON: t1:
+; NEON-LABEL: t1:
 ; NEON: vmla.f32
 
-; A8: t1:
+; A8-LABEL: t1:
 ; A8: vmul.f32
 ; A8: vadd.f32
 	%0 = fmul float %a, %b
@@ -22,13 +22,13 @@ entry:
 
 define double @t2(double %acc, double %a, double %b) {
 entry:
-; VFP2: t2:
+; VFP2-LABEL: t2:
 ; VFP2: vmla.f64
 
-; NEON: t2:
+; NEON-LABEL: t2:
 ; NEON: vmla.f64
 
-; A8: t2:
+; A8-LABEL: t2:
 ; A8: vmul.f64
 ; A8: vadd.f64
 	%0 = fmul double %a, %b
@@ -38,13 +38,13 @@ entry:
 
 define float @t3(float %acc, float %a, float %b) {
 entry:
-; VFP2: t3:
+; VFP2-LABEL: t3:
 ; VFP2: vmla.f32
 
-; NEON: t3:
+; NEON-LABEL: t3:
 ; NEON: vmla.f32
 
-; A8: t3:
+; A8-LABEL: t3:
 ; A8: vmul.f32
 ; A8: vadd.f32
 	%0 = fmul float %a, %b
@@ -56,18 +56,18 @@ entry:
 ; rdar://8659675
 define void @t4(float %acc1, float %a, float %b, float %acc2, float %c, float* %P1, float* %P2) {
 entry:
-; A8: t4:
+; A8-LABEL: t4:
 ; A8: vmul.f32
 ; A8: vmul.f32
 ; A8: vadd.f32
 ; A8: vadd.f32
 
 ; Two vmla with now RAW hazard
-; A9: t4:
+; A9-LABEL: t4:
 ; A9: vmla.f32
 ; A9: vmla.f32
 
-; HARD: t4:
+; HARD-LABEL: t4:
 ; HARD: vmla.f32 s0, s1, s2
 ; HARD: vmla.f32 s3, s1, s4
   %0 = fmul float %a, %b
@@ -81,18 +81,18 @@ entry:
 
 define float @t5(float %a, float %b, float %c, float %d, float %e) {
 entry:
-; A8: t5:
+; A8-LABEL: t5:
 ; A8: vmul.f32
 ; A8: vmul.f32
 ; A8: vadd.f32
 ; A8: vadd.f32
 
-; A9: t5:
+; A9-LABEL: t5:
 ; A9: vmla.f32
 ; A9: vmul.f32
 ; A9: vadd.f32
 
-; HARD: t5:
+; HARD-LABEL: t5:
 ; HARD: vmla.f32 s4, s0, s1
 ; HARD: vmul.f32 s0, s2, s3
 ; HARD: vadd.f32 s0, s4, s0
diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll
index a182833a7a2c..f16ec172cb70 100644
--- a/test/CodeGen/ARM/fmscs.ll
+++ b/test/CodeGen/ARM/fmscs.ll
@@ -4,13 +4,13 @@
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
-; VFP2: t1:
+; VFP2-LABEL: t1:
 ; VFP2: vnmls.f32
 
-; NEON: t1:
+; NEON-LABEL: t1:
 ; NEON: vnmls.f32
 
-; A8: t1:
+; A8-LABEL: t1:
 ; A8: vmul.f32
 ; A8: vsub.f32
 	%0 = fmul float %a, %b
@@ -20,13 +20,13 @@ entry:
 
 define double @t2(double %acc, double %a, double %b) {
 entry:
-; VFP2: t2:
+; VFP2-LABEL: t2:
 ; VFP2: vnmls.f64
 
-; NEON: t2:
+; NEON-LABEL: t2:
 ; NEON: vnmls.f64
 
-; A8: t2:
+; A8-LABEL: t2:
 ; A8: vmul.f64
 ; A8: vsub.f64
 	%0 = fmul double %a, %b
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index f5245c946398..d11f6bd1bd99 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -11,19 +11,19 @@ entry:
 	ret float %0
 }
 
-; VFP2: test:
+; VFP2-LABEL: test:
 ; VFP2: 	vmul.f32	s
 
-; NFP1: test:
+; NFP1-LABEL: test:
 ; NFP1: 	vmul.f32	d
-; NFP0: test:
+; NFP0-LABEL: test:
 ; NFP0: 	vmul.f32	s
 
-; CORTEXA8: test:
+; CORTEXA8-LABEL: test:
 ; CORTEXA8: 	vmul.f32	s
-; CORTEXA8U: test:
+; CORTEXA8U-LABEL: test:
 ; CORTEXA8U: 	vmul.f32	d
-; CORTEXA9: test:
+; CORTEXA9-LABEL: test:
 ; CORTEXA9: 	vmul.f32	s
 
 ; VFP2: test2
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index d84690ba4e4b..dc4c2e33e491 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -14,22 +14,22 @@ entry:
 	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
 	ret float %retval
 }
-; VFP2: test1:
+; VFP2-LABEL: test1:
 ; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
 
-; NFP1: test1:
+; NFP1-LABEL: test1:
 ; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
 
-; NFP0: test1:
+; NFP0-LABEL: test1:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
-; CORTEXA8: test1:
+; CORTEXA8-LABEL: test1:
 ; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
 
-; CORTEXA8U: test1:
+; CORTEXA8U-LABEL: test1:
 ; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
-; CORTEXA9: test1:
+; CORTEXA9-LABEL: test1:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
 
 define float @test2(float* %a) {
@@ -41,21 +41,21 @@ entry:
 	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
 	ret float %retval
 }
-; VFP2: test2:
+; VFP2-LABEL: test2:
 ; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
 
-; NFP1: test2:
+; NFP1-LABEL: test2:
 ; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
 
-; NFP0: test2:
+; NFP0-LABEL: test2:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
-; CORTEXA8: test2:
+; CORTEXA8-LABEL: test2:
 ; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
 
-; CORTEXA8U: test2:
+; CORTEXA8U-LABEL: test2:
 ; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
-; CORTEXA9: test2:
+; CORTEXA9-LABEL: test2:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
 
diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll
index 1763d46e06c4..825feaa0453f 100644
--- a/test/CodeGen/ARM/fnmacs.ll
+++ b/test/CodeGen/ARM/fnmacs.ll
@@ -4,13 +4,13 @@
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
-; VFP2: t1:
+; VFP2-LABEL: t1:
 ; VFP2: vmls.f32
 
-; NEON: t1:
+; NEON-LABEL: t1:
 ; NEON: vmls.f32
 
-; A8: t1:
+; A8-LABEL: t1:
 ; A8: vmul.f32
 ; A8: vsub.f32
 	%0 = fmul float %a, %b
@@ -20,13 +20,13 @@ entry:
 
 define double @t2(double %acc, double %a, double %b) {
 entry:
-; VFP2: t2:
+; VFP2-LABEL: t2:
 ; VFP2: vmls.f64
 
-; NEON: t2:
+; NEON-LABEL: t2:
 ; NEON: vmls.f64
 
-; A8: t2:
+; A8-LABEL: t2:
 ; A8: vmul.f64
 ; A8: vsub.f64
 	%0 = fmul double %a, %b
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index c30806173428..78ccb6095e05 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -7,17 +7,17 @@
 
 define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
-; VFP2: t1:
+; VFP2-LABEL: t1:
 ; VFP2: vnmla.f32
 
-; NEON: t1:
+; NEON-LABEL: t1:
 ; NEON: vnmla.f32
 
-; A8U: t1:
+; A8U-LABEL: t1:
 ; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 ; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
 
-; A8: t1:
+; A8-LABEL: t1:
 ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 ; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
@@ -28,17 +28,17 @@ entry:
 
 define float @t2(float %acc, float %a, float %b) nounwind {
 entry:
-; VFP2: t2:
+; VFP2-LABEL: t2:
 ; VFP2: vnmla.f32
 
-; NEON: t2:
+; NEON-LABEL: t2:
 ; NEON: vnmla.f32
 
-; A8U: t2:
+; A8U-LABEL: t2:
 ; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
 ; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
 
-; A8: t2:
+; A8-LABEL: t2:
 ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
 ; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
@@ -49,17 +49,17 @@ entry:
 
 define double @t3(double %acc, double %a, double %b) nounwind {
 entry:
-; VFP2: t3:
+; VFP2-LABEL: t3:
 ; VFP2: vnmla.f64
 
-; NEON: t3:
+; NEON-LABEL: t3:
 ; NEON: vnmla.f64
 
-; A8U: t3:
+; A8U-LABEL: t3:
 ; A8U: vnmul.f64 d
 ; A8U: vsub.f64 d
 
-; A8: t3:
+; A8-LABEL: t3:
 ; A8: vnmul.f64 d
 ; A8: vsub.f64 d
 	%0 = fmul double %a, %b
@@ -70,17 +70,17 @@ entry:
 
 define double @t4(double %acc, double %a, double %b) nounwind {
 entry:
-; VFP2: t4:
+; VFP2-LABEL: t4:
 ; VFP2: vnmla.f64
 
-; NEON: t4:
+; NEON-LABEL: t4:
 ; NEON: vnmla.f64
 
-; A8U: t4:
+; A8U-LABEL: t4:
 ; A8U: vnmul.f64 d
 ; A8U: vsub.f64 d
 
-; A8: t4:
+; A8-LABEL: t4:
 ; A8: vnmul.f64 d
 ; A8: vsub.f64 d
 	%0 = fmul double %a, %b
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
new file mode 100644
index 000000000000..67fd129fd1c9
--- /dev/null
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -0,0 +1,164 @@
+; RUN: llc -mtriple=thumbv7-apple-darwin-eabi < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
+; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS
+
+
+declare void @bar(i8*)
+
+%bigVec = type [2 x double]
+
+@var = global %bigVec zeroinitializer
+
+define void @check_simple() minsize {
+; CHECK-LABEL: check_simple:
+; CHECK: push.w {r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp, sp,
+; ...
+; CHECK-NOT: add sp, sp,
+; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
+
+; CHECK-T1-LABEL: check_simple:
+; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
+; CHECK-T1: add r7, sp, #16
+; CHECK-T1-NOT: sub sp, sp,
+; ...
+; CHECK-T1-NOT: add sp, sp,
+; CHECK-T1: pop {r0, r1, r2, r3, r7, pc}
+
+  ; iOS always has a frame pointer and messing with the push affects
+  ; how it's set in the prologue. Make sure we get that right.
+; CHECK-IOS-LABEL: check_simple:
+; CHECK-IOS: push {r3, r4, r5, r6, r7, lr}
+; CHECK-NOT: sub sp,
+; CHECK-IOS: add r7, sp, #16
+; CHECK-NOT: sub sp,
+; ...
+; CHECK-NOT: add sp,
+; CHEC: pop {r3, r4, r5, r6, r7, pc}
+
+  %var = alloca i8, i32 16
+  call void @bar(i8* %var)
+  ret void
+}
+
+define void @check_simple_too_big() minsize {
+; CHECK-LABEL: check_simple_too_big:
+; CHECK: push.w {r11, lr}
+; CHECK: sub sp,
+; ...
+; CHECK: add sp,
+; CHECK: pop.w {r11, pc}
+  %var = alloca i8, i32 64
+  call void @bar(i8* %var)
+  ret void
+}
+
+define void @check_vfp_fold() minsize {
+; CHECK-LABEL: check_vfp_fold:
+; CHECK: push {r[[GLOBREG:[0-9]+]], lr}
+; CHECK: vpush {d6, d7, d8, d9}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: vldmia r[[GLOBREG]], {d8, d9}
+; ...
+; CHECK-NOT: add sp,
+; CHECK: vpop {d6, d7, d8, d9}
+; CHECKL pop {r[[GLOBREG]], pc}
+
+  ; iOS uses aligned NEON stores here, which is convenient since we
+  ; want to make sure that works too.
+; CHECK-IOS-LABEL: check_vfp_fold:
+; CHECK-IOS: push {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-IOS: sub.w r4, sp, #16
+; CHECK-IOS: bic r4, r4, #15
+; CHECK-IOS: mov sp, r4
+; CHECK-IOS: vst1.64 {d8, d9}, [r4:128]
+; ...
+; CHECK-IOS: add r4, sp, #16
+; CHECK-IOS: vld1.64 {d8, d9}, [r4:128]
+; CHECK-IOS: mov sp, r4
+; CHECK-IOS: pop {r4, r7, pc}
+
+  %var = alloca i8, i32 16
+
+  %tmp = load %bigVec* @var
+  call void @bar(i8* %var)
+  store %bigVec %tmp, %bigVec* @var
+
+  ret void
+}
+
+; This function should use just enough space that the "add sp, sp, ..." could be
+; folded in except that doing so would clobber the value being returned.
+define i64 @check_no_return_clobber() minsize {
+; CHECK-LABEL: check_no_return_clobber:
+; CHECK: push.w {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: add sp, #40
+; CHECK: pop.w {r11, pc}
+
+  ; Just to keep iOS FileCheck within previous function:
+; CHECK-IOS-LABEL: check_no_return_clobber:
+
+  %var = alloca i8, i32 40
+  call void @bar(i8* %var)
+  ret i64 0
+}
+
+define arm_aapcs_vfpcc double @check_vfp_no_return_clobber() minsize {
+; CHECK-LABEL: check_vfp_no_return_clobber:
+; CHECK: push {r[[GLOBREG:[0-9]+]], lr}
+; CHECK: vpush {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9}
+; CHECK-NOT: sub sp,
+; ...
+; CHECK: add sp, #64
+; CHECK: vpop {d8, d9}
+; CHECK: pop {r[[GLOBREG]], pc}
+
+  %var = alloca i8, i32 64
+
+  %tmp = load %bigVec* @var
+  call void @bar(i8* %var)
+  store %bigVec %tmp, %bigVec* @var
+
+  ret double 1.0
+}
+
+@dbl = global double 0.0
+
+; PR18136: there was a bug determining where the first eligible pop in a
+; basic-block was when the entire block was epilogue code.
+define void @test_fold_point(i1 %tst) minsize {
+; CHECK-LABEL: test_fold_point:
+
+  ; Important to check for beginning of basic block, because if it gets
+  ; if-converted the test is probably no longer checking what it should.
+; CHECK: {{LBB[0-9]+_2}}:
+; CHECK-NEXT: vpop {d7, d8}
+; CHECK-NEXT: pop {r4, pc}
+
+  ; With a guaranteed frame-pointer, we want to make sure that its offset in the
+  ; push block is correct, even if a few registers have been tacked onto a later
+  ; vpush (PR18160).
+; CHECK-IOS-LABEL: test_fold_point:
+; CHECK-IOS: push {r4, r7, lr}
+; CHECK-IOS-NEXT: add r7, sp, #4
+; CHECK-IOS-NEXT: vpush {d7, d8}
+
+  ; We want some memory so there's a stack adjustment to fold...
+  %var = alloca i8, i32 8
+
+  ; We want a long-lived floating register so that a callee-saved dN is used and
+  ; there's both a vpop and a pop.
+  %live_val = load double* @dbl
+  br i1 %tst, label %true, label %end
+true:
+  call void @bar(i8* %var)
+  store double %live_val, double* @dbl
+  br label %end
+end:
+  ; We want the epilogue to be the only thing in a basic block so that we hit
+  ; the correct edge-case (first inst in block is correct one to adjust).
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 93601cf9d6c9..fbf3a4a56ad5 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f(i32 %a) {
-;CHECK: f:
+;CHECK-LABEL: f:
 ;CHECK: vmov
 ;CHECK-NEXT: vcvt.f32.s32
 ;CHECK-NEXT: vmov
@@ -11,7 +11,7 @@ entry:
 }
 
 define double @g(i32 %a) {
-;CHECK: g:
+;CHECK-LABEL: g:
 ;CHECK: vmov
 ;CHECK-NEXT: vcvt.f64.s32
 ;CHECK-NEXT: vmov
@@ -21,7 +21,7 @@ entry:
 }
 
 define double @uint_to_double(i32 %a) {
-;CHECK: uint_to_double:
+;CHECK-LABEL: uint_to_double:
 ;CHECK: vmov
 ;CHECK-NEXT: vcvt.f64.u32
 ;CHECK-NEXT: vmov
@@ -31,7 +31,7 @@ entry:
 }
 
 define float @uint_to_float(i32 %a) {
-;CHECK: uint_to_float:
+;CHECK-LABEL: uint_to_float:
 ;CHECK: vmov
 ;CHECK-NEXT: vcvt.f32.u32
 ;CHECK-NEXT: vmov
@@ -41,7 +41,7 @@ entry:
 }
 
 define double @h(double* %v) {
-;CHECK: h:
+;CHECK-LABEL: h:
 ;CHECK: vldr
 ;CHECK-NEXT: vmov
 entry:
@@ -50,20 +50,20 @@ entry:
 }
 
 define float @h2() {
-;CHECK: h2:
+;CHECK-LABEL: h2:
 ;CHECK: mov r0, #1065353216
 entry:
         ret float 1.000000e+00
 }
 
 define double @f2(double %a) {
-;CHECK: f2:
+;CHECK-LABEL: f2:
 ;CHECK-NOT: vmov
         ret double %a
 }
 
 define void @f3() {
-;CHECK: f3:
+;CHECK-LABEL: f3:
 ;CHECK-NOT: vmov
 ;CHECK: f4
 entry:
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index 1261ea502129..a5c1aed277bb 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -8,8 +8,8 @@ target triple = "armv7-eabi"
 @z = common global i16 0
 
 define arm_aapcs_vfpcc void @foo() nounwind {
-; CHECK: foo:
-; CHECK-FP6: foo:
+; CHECK-LABEL: foo:
+; CHECK-FP6-LABEL: foo:
 entry:
   %0 = load i16* @x, align 2
   %1 = load i16* @y, align 2
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 3c47eb580ff1..f0d910052a4d 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -6,9 +6,9 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
 
 define i32 @test1(float %a, float %b) {
-; VFP2: test1:
+; VFP2-LABEL: test1:
 ; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
-; NEON: test1:
+; NEON-LABEL: test1:
 ; NEON: vadd.f32 [[D0:d[0-9]+]]
 ; NEON: vcvt.s32.f32 d0, [[D0]]
 entry:
@@ -18,9 +18,9 @@ entry:
 }
 
 define i32 @test2(float %a, float %b) {
-; VFP2: test2:
+; VFP2-LABEL: test2:
 ; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
-; NEON: test2:
+; NEON-LABEL: test2:
 ; NEON: vadd.f32 [[D0:d[0-9]+]]
 ; NEON: vcvt.u32.f32 d0, [[D0]]
 entry:
@@ -30,9 +30,9 @@ entry:
 }
 
 define float @test3(i32 %a, i32 %b) {
-; VFP2: test3:
+; VFP2-LABEL: test3:
 ; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
-; NEON: test3:
+; NEON-LABEL: test3:
 ; NEON: vcvt.f32.u32 d
 entry:
         %0 = add i32 %a, %b
@@ -41,9 +41,9 @@ entry:
 }
 
 define float @test4(i32 %a, i32 %b) {
-; VFP2: test4:
+; VFP2-LABEL: test4:
 ; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
-; NEON: test4:
+; NEON-LABEL: test4:
 ; NEON: vcvt.f32.s32 d
 entry:
         %0 = add i32 %a, %b
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index 40ea33becebb..cc880148da85 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a, float %b) {
-;CHECK: f1:
+;CHECK-LABEL: f1:
 ;CHECK: vadd.f32
 entry:
 	%tmp = fadd float %a, %b		; <float> [#uses=1]
@@ -9,7 +9,7 @@ entry:
 }
 
 define double @f2(double %a, double %b) {
-;CHECK: f2:
+;CHECK-LABEL: f2:
 ;CHECK: vadd.f64
 entry:
 	%tmp = fadd double %a, %b		; <double> [#uses=1]
@@ -17,7 +17,7 @@ entry:
 }
 
 define float @f3(float %a, float %b) {
-;CHECK: f3:
+;CHECK-LABEL: f3:
 ;CHECK: vmul.f32
 entry:
 	%tmp = fmul float %a, %b		; <float> [#uses=1]
@@ -25,7 +25,7 @@ entry:
 }
 
 define double @f4(double %a, double %b) {
-;CHECK: f4:
+;CHECK-LABEL: f4:
 ;CHECK: vmul.f64
 entry:
 	%tmp = fmul double %a, %b		; <double> [#uses=1]
@@ -33,7 +33,7 @@ entry:
 }
 
 define float @f5(float %a, float %b) {
-;CHECK: f5:
+;CHECK-LABEL: f5:
 ;CHECK: vsub.f32
 entry:
 	%tmp = fsub float %a, %b		; <float> [#uses=1]
@@ -41,7 +41,7 @@ entry:
 }
 
 define double @f6(double %a, double %b) {
-;CHECK: f6:
+;CHECK-LABEL: f6:
 ;CHECK: vsub.f64
 entry:
 	%tmp = fsub double %a, %b		; <double> [#uses=1]
@@ -49,7 +49,7 @@ entry:
 }
 
 define float @f7(float %a) {
-;CHECK: f7:
+;CHECK-LABEL: f7:
 ;CHECK: eor
 entry:
 	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]
@@ -57,7 +57,7 @@ entry:
 }
 
 define double @f8(double %a) {
-;CHECK: f8:
+;CHECK-LABEL: f8:
 ;CHECK: vneg.f64
 entry:
 	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
@@ -65,7 +65,7 @@ entry:
 }
 
 define float @f9(float %a, float %b) {
-;CHECK: f9:
+;CHECK-LABEL: f9:
 ;CHECK: vdiv.f32
 entry:
 	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
@@ -73,7 +73,7 @@ entry:
 }
 
 define double @f10(double %a, double %b) {
-;CHECK: f10:
+;CHECK-LABEL: f10:
 ;CHECK: vdiv.f64
 entry:
 	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
@@ -81,7 +81,7 @@ entry:
 }
 
 define float @f11(float %a) {
-;CHECK: f11:
+;CHECK-LABEL: f11:
 ;CHECK: bic
 entry:
 	%tmp1 = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
@@ -91,7 +91,7 @@ entry:
 declare float @fabsf(float)
 
 define double @f12(double %a) {
-;CHECK: f12:
+;CHECK-LABEL: f12:
 ;CHECK: vabs.f64
 entry:
 	%tmp1 = call double @fabs( double %a ) readnone	; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 2d8f7108e0ec..3a0af16bf6d6 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -5,7 +5,7 @@
 ; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: vldr [[S0:s[0-9]+]],
 ; CHECK: vldr [[S1:s[0-9]+]],
 ; CHECK: vcmpe.f32 [[S1]], [[S0]]
@@ -29,13 +29,12 @@ bb2:
 ; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK-NOT: vldr
-; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
-; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK: ldrd [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], [r0]
 ; CHECK-NOT: b LBB
-; CHECK: cmp [[REG1]], #0
 ; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmp [[REG1]], #0
 ; CHECK: cmpeq [[REG2]], #0
 ; CHECK-NOT: vcmpe.f32
 ; CHECK-NOT: vmrs
@@ -55,7 +54,7 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK-NOT: vldr
 ; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
 ; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll
index 260ec49cd86b..916a1ae4952a 100644
--- a/test/CodeGen/ARM/fpcmp.ll
+++ b/test/CodeGen/ARM/fpcmp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define i32 @f1(float %a) {
-;CHECK: f1:
+;CHECK-LABEL: f1:
 ;CHECK: vcmpe.f32
 ;CHECK: movmi
 entry:
@@ -11,7 +11,7 @@ entry:
 }
 
 define i32 @f2(float %a) {
-;CHECK: f2:
+;CHECK-LABEL: f2:
 ;CHECK: vcmpe.f32
 ;CHECK: moveq
 entry:
@@ -21,7 +21,7 @@ entry:
 }
 
 define i32 @f3(float %a) {
-;CHECK: f3:
+;CHECK-LABEL: f3:
 ;CHECK: vcmpe.f32
 ;CHECK: movgt
 entry:
@@ -31,7 +31,7 @@ entry:
 }
 
 define i32 @f4(float %a) {
-;CHECK: f4:
+;CHECK-LABEL: f4:
 ;CHECK: vcmpe.f32
 ;CHECK: movge
 entry:
@@ -41,7 +41,7 @@ entry:
 }
 
 define i32 @f5(float %a) {
-;CHECK: f5:
+;CHECK-LABEL: f5:
 ;CHECK: vcmpe.f32
 ;CHECK: movls
 entry:
@@ -51,7 +51,7 @@ entry:
 }
 
 define i32 @f6(float %a) {
-;CHECK: f6:
+;CHECK-LABEL: f6:
 ;CHECK: vcmpe.f32
 ;CHECK: movne
 entry:
@@ -61,7 +61,7 @@ entry:
 }
 
 define i32 @g1(double %a) {
-;CHECK: g1:
+;CHECK-LABEL: g1:
 ;CHECK: vcmpe.f64
 ;CHECK: movmi
 entry:
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 4a4c5b1c8b05..d84c7ae82eca 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -3,7 +3,7 @@
 
 define i32 @f7(float %a, float %b) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: vcmpe.f32
 ; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: movweq
diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll
index 638dde9d8a0f..0679a47ded7b 100644
--- a/test/CodeGen/ARM/fpconsts.ll
+++ b/test/CodeGen/ARM/fpconsts.ll
@@ -2,7 +2,7 @@
 
 define float @t1(float %x) nounwind readnone optsize {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: vmov.f32 s{{.*}}, #4.000000e+00
   %0 = fadd float %x, 4.000000e+00
   ret float %0
@@ -10,7 +10,7 @@ entry:
 
 define double @t2(double %x) nounwind readnone optsize {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: vmov.f64 d{{.*}}, #3.000000e+00
   %0 = fadd double %x, 3.000000e+00
   ret double %0
@@ -18,7 +18,7 @@ entry:
 
 define double @t3(double %x) nounwind readnone optsize {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: vmov.f64 d{{.*}}, #-1.300000e+01
   %0 = fmul double %x, -1.300000e+01
   ret double %0
@@ -26,7 +26,7 @@ entry:
 
 define float @t4(float %x) nounwind readnone optsize {
 entry:
-; CHECK: t4:
+; CHECK-LABEL: t4:
 ; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01
   %0 = fmul float %x, -2.400000e+01
   ret float %0
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index 1b4c008bb775..326e0628b4e5 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 
 define float @f1(double %x) {
-;CHECK-VFP: f1:
+;CHECK-VFP-LABEL: f1:
 ;CHECK-VFP: vcvt.f32.f64
-;CHECK: f1:
+;CHECK-LABEL: f1:
 ;CHECK: truncdfsf2
 entry:
 	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
@@ -12,9 +12,9 @@ entry:
 }
 
 define double @f2(float %x) {
-;CHECK-VFP: f2:
+;CHECK-VFP-LABEL: f2:
 ;CHECK-VFP: vcvt.f64.f32
-;CHECK: f2:
+;CHECK-LABEL: f2:
 ;CHECK: extendsfdf2
 entry:
 	%tmp1 = fpext float %x to double		; <double> [#uses=1]
@@ -22,9 +22,9 @@ entry:
 }
 
 define i32 @f3(float %x) {
-;CHECK-VFP: f3:
+;CHECK-VFP-LABEL: f3:
 ;CHECK-VFP: vcvt.s32.f32
-;CHECK: f3:
+;CHECK-LABEL: f3:
 ;CHECK: fixsfsi
 entry:
 	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
@@ -32,9 +32,9 @@ entry:
 }
 
 define i32 @f4(float %x) {
-;CHECK-VFP: f4:
+;CHECK-VFP-LABEL: f4:
 ;CHECK-VFP: vcvt.u32.f32
-;CHECK: f4:
+;CHECK-LABEL: f4:
 ;CHECK: fixunssfsi
 entry:
 	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
@@ -42,9 +42,9 @@ entry:
 }
 
 define i32 @f5(double %x) {
-;CHECK-VFP: f5:
+;CHECK-VFP-LABEL: f5:
 ;CHECK-VFP: vcvt.s32.f64
-;CHECK: f5:
+;CHECK-LABEL: f5:
 ;CHECK: fixdfsi
 entry:
 	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
@@ -52,9 +52,9 @@ entry:
 }
 
 define i32 @f6(double %x) {
-;CHECK-VFP: f6:
+;CHECK-VFP-LABEL: f6:
 ;CHECK-VFP: vcvt.u32.f64
-;CHECK: f6:
+;CHECK-LABEL: f6:
 ;CHECK: fixunsdfsi
 entry:
 	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
@@ -62,9 +62,9 @@ entry:
 }
 
 define float @f7(i32 %a) {
-;CHECK-VFP: f7:
+;CHECK-VFP-LABEL: f7:
 ;CHECK-VFP: vcvt.f32.s32
-;CHECK: f7:
+;CHECK-LABEL: f7:
 ;CHECK: floatsisf
 entry:
 	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
@@ -72,9 +72,9 @@ entry:
 }
 
 define double @f8(i32 %a) {
-;CHECK-VFP: f8:
+;CHECK-VFP-LABEL: f8:
 ;CHECK-VFP: vcvt.f64.s32
-;CHECK: f8:
+;CHECK-LABEL: f8:
 ;CHECK: floatsidf
 entry:
 	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
@@ -82,9 +82,9 @@ entry:
 }
 
 define float @f9(i32 %a) {
-;CHECK-VFP: f9:
+;CHECK-VFP-LABEL: f9:
 ;CHECK-VFP: vcvt.f32.u32
-;CHECK: f9:
+;CHECK-LABEL: f9:
 ;CHECK: floatunsisf
 entry:
 	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
@@ -92,9 +92,9 @@ entry:
 }
 
 define double @f10(i32 %a) {
-;CHECK-VFP: f10:
+;CHECK-VFP-LABEL: f10:
 ;CHECK-VFP: vcvt.f64.u32
-;CHECK: f10:
+;CHECK-LABEL: f10:
 ;CHECK: floatunsidf
 entry:
 	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 8faa57896a8d..8fbd1d805840 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mov r0, #0
         ret float 0.000000e+00
 }
 
 define float @f2(float* %v, float %u) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: vldr{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
@@ -15,7 +15,7 @@ define float @f2(float* %v, float %u) {
 }
 
 define float @f2offset(float* %v, float %u) {
-; CHECK: f2offset:
+; CHECK-LABEL: f2offset:
 ; CHECK: vldr{{.*}}, #4]
         %addr = getelementptr float* %v, i32 1
         %tmp = load float* %addr
@@ -24,7 +24,7 @@ define float @f2offset(float* %v, float %u) {
 }
 
 define float @f2noffset(float* %v, float %u) {
-; CHECK: f2noffset:
+; CHECK-LABEL: f2noffset:
 ; CHECK: vldr{{.*}}, #-4]
         %addr = getelementptr float* %v, i32 -1
         %tmp = load float* %addr
@@ -33,7 +33,7 @@ define float @f2noffset(float* %v, float %u) {
 }
 
 define void @f3(float %a, float %b, float* %v) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: vstr{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index 299cb8f81503..740868725e90 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -44,6 +44,6 @@ define void @foo9(double %x) {
 	store i16 %tmp, i16* null
 	ret void
 }
-; CHECK: foo9:
+; CHECK-LABEL: foo9:
 ; CHECK: 	vmov	r0, s0
 
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
index 303d165de0b6..e29f291dc2c5 100644
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -2,7 +2,7 @@
 ; Check generated fused MAC and MLS.
 
 define double @fusedMACTest1(double %d1, double %d2, double %d3) {
-;CHECK: fusedMACTest1:
+;CHECK-LABEL: fusedMACTest1:
 ;CHECK: vfma.f64
   %1 = fmul double %d1, %d2
   %2 = fadd double %1, %d3
@@ -10,7 +10,7 @@ define double @fusedMACTest1(double %d1, double %d2, double %d3) {
 }
 
 define float @fusedMACTest2(float %f1, float %f2, float %f3) {
-;CHECK: fusedMACTest2:
+;CHECK-LABEL: fusedMACTest2:
 ;CHECK: vfma.f32
   %1 = fmul float %f1, %f2
   %2 = fadd float %1, %f3
@@ -18,7 +18,7 @@ define float @fusedMACTest2(float %f1, float %f2, float %f3) {
 }
 
 define double @fusedMACTest3(double %d1, double %d2, double %d3) {
-;CHECK: fusedMACTest3:
+;CHECK-LABEL: fusedMACTest3:
 ;CHECK: vfms.f64
   %1 = fmul double %d2, %d3
   %2 = fsub double %d1, %1
@@ -26,7 +26,7 @@ define double @fusedMACTest3(double %d1, double %d2, double %d3) {
 }
 
 define float @fusedMACTest4(float %f1, float %f2, float %f3) {
-;CHECK: fusedMACTest4:
+;CHECK-LABEL: fusedMACTest4:
 ;CHECK: vfms.f32
   %1 = fmul float %f2, %f3
   %2 = fsub float %f1, %1
@@ -34,7 +34,7 @@ define float @fusedMACTest4(float %f1, float %f2, float %f3) {
 }
 
 define double @fusedMACTest5(double %d1, double %d2, double %d3) {
-;CHECK: fusedMACTest5:
+;CHECK-LABEL: fusedMACTest5:
 ;CHECK: vfnma.f64
   %1 = fmul double %d1, %d2
   %2 = fsub double -0.0, %1
@@ -43,7 +43,7 @@ define double @fusedMACTest5(double %d1, double %d2, double %d3) {
 }
 
 define float @fusedMACTest6(float %f1, float %f2, float %f3) {
-;CHECK: fusedMACTest6:
+;CHECK-LABEL: fusedMACTest6:
 ;CHECK: vfnma.f32
   %1 = fmul float %f1, %f2
   %2 = fsub float -0.0, %1
@@ -52,7 +52,7 @@ define float @fusedMACTest6(float %f1, float %f2, float %f3) {
 }
 
 define double @fusedMACTest7(double %d1, double %d2, double %d3) {
-;CHECK: fusedMACTest7:
+;CHECK-LABEL: fusedMACTest7:
 ;CHECK: vfnms.f64
   %1 = fmul double %d1, %d2
   %2 = fsub double %1, %d3
@@ -60,7 +60,7 @@ define double @fusedMACTest7(double %d1, double %d2, double %d3) {
 }
 
 define float @fusedMACTest8(float %f1, float %f2, float %f3) {
-;CHECK: fusedMACTest8:
+;CHECK-LABEL: fusedMACTest8:
 ;CHECK: vfnms.f32
   %1 = fmul float %f1, %f2
   %2 = fsub float %1, %f3
@@ -68,7 +68,7 @@ define float @fusedMACTest8(float %f1, float %f2, float %f3) {
 }
 
 define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
-;CHECK: fusedMACTest9:
+;CHECK-LABEL: fusedMACTest9:
 ;CHECK: vfma.f32
   %mul = fmul <2 x float> %a, %b
   %add = fadd <2 x float> %mul, %a
@@ -76,7 +76,7 @@ define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
 }
 
 define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
-;CHECK: fusedMACTest10:
+;CHECK-LABEL: fusedMACTest10:
 ;CHECK: vfms.f32
   %mul = fmul <2 x float> %a, %b
   %sub = fsub <2 x float> %a, %mul
@@ -84,7 +84,7 @@ define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
 }
 
 define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
-;CHECK: fusedMACTest11:
+;CHECK-LABEL: fusedMACTest11:
 ;CHECK: vfma.f32
   %mul = fmul <4 x float> %a, %b
   %add = fadd <4 x float> %mul, %a
@@ -92,7 +92,7 @@ define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
 }
 
 define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
-;CHECK: fusedMACTest12:
+;CHECK-LABEL: fusedMACTest12:
 ;CHECK: vfms.f32
   %mul = fmul <4 x float> %a, %b
   %sub = fsub <4 x float> %a, %mul
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index eb71149d83a9..3101500f2ca8 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -57,7 +57,7 @@ define i32 @test1() {
 
 
 
-; LinuxPIC: test1:
+; LinuxPIC-LABEL: test1:
 ; LinuxPIC: 	ldr r0, .LCPI0_0
 ; LinuxPIC: 	ldr r1, .LCPI0_1
 	
diff --git a/test/CodeGen/ARM/hidden-vis-2.ll b/test/CodeGen/ARM/hidden-vis-2.ll
index 8bb2c6e0c915..18d38d40072c 100644
--- a/test/CodeGen/ARM/hidden-vis-2.ll
+++ b/test/CodeGen/ARM/hidden-vis-2.ll
@@ -4,7 +4,7 @@
 
 define i32 @t() nounwind readonly {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: ldr
 ; CHECK-NEXT: ldr
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/hidden-vis.ll b/test/CodeGen/ARM/hidden-vis.ll
index 3544ae81a0a4..ce2ce2c1de54 100644
--- a/test/CodeGen/ARM/hidden-vis.ll
+++ b/test/CodeGen/ARM/hidden-vis.ll
@@ -6,18 +6,18 @@
 
 define weak hidden void @t1() nounwind {
 ; LINUX: .hidden t1
-; LINUX: t1:
+; LINUX-LABEL: t1:
 
 ; DARWIN: .private_extern _t1
-; DARWIN: t1:
+; DARWIN-LABEL: t1:
   ret void
 }
 
 define weak void @t2() nounwind {
-; LINUX: t2:
+; LINUX-LABEL: t2:
 ; LINUX: .hidden a
 
-; DARWIN: t2:
+; DARWIN-LABEL: t2:
 ; DARWIN: .private_extern _a
   ret void
 }
diff --git a/test/CodeGen/ARM/ifconv-kills.ll b/test/CodeGen/ARM/ifconv-kills.ll
new file mode 100644
index 000000000000..bf54ba2f730c
--- /dev/null
+++ b/test/CodeGen/ARM/ifconv-kills.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march arm -mcpu swift -verify-machineinstrs
+
+declare i32 @f(i32 %p0, i32 %p1)
+
+define i32 @foo(i32* %ptr) {
+entry:
+  %cmp = icmp ne i32* %ptr, null
+  br i1 %cmp, label %if.then, label %if.else
+
+; present something which can be easily if-converted
+if.then:
+  ; %R0 should be killed here
+  %valt = load i32* %ptr, align 4
+  br label %return
+
+if.else:
+  ; %R0 should be killed here, however after if-conversion the %R0 kill
+  ; has to be removed because if.then will follow after this and still
+  ; read it.
+  %addr = getelementptr inbounds i32* %ptr, i32 4
+  %vale = load i32* %addr, align 4
+  br label %return
+
+return:
+  %phival = phi i32 [ %valt, %if.then ], [ %vale, %if.else ]
+  ; suggest to bring %phival/%valt/%vale into %R1 (because otherwise there
+  ; will be no kills in if.then/if.else)
+  %retval = call i32 @f (i32 0, i32 %phival)
+  ret i32 %retval
+}
diff --git a/test/CodeGen/ARM/ifconv-regmask.ll b/test/CodeGen/ARM/ifconv-regmask.ll
new file mode 100644
index 000000000000..d45f65f9567f
--- /dev/null
+++ b/test/CodeGen/ARM/ifconv-regmask.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs
+
+%union.opcode = type { i32 }
+
+@opcode = external global %union.opcode, align 4
+
+; Function Attrs: nounwind ssp
+define i32 @sfu() {
+entry:
+  %bf.load = load i32* getelementptr inbounds (%union.opcode* @opcode, i32 0, i32 0), align 4
+  %bf.lshr = lshr i32 %bf.load, 26
+  %bf.clear = and i32 %bf.lshr, 7
+  switch i32 %bf.clear, label %return [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %call = tail call i32 @func0()
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  %call2 = tail call i32 @func1()
+  br label %return
+
+return:                                           ; preds = %sw.bb1, %sw.bb, %entry
+  %retval.0 = phi i32 [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ -1, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: nounwind ssp
+declare i32 @func0()
+
+; Function Attrs: nounwind ssp
+declare i32 @func1()
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index fd831442c14b..5a55653239d1 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -2,8 +2,8 @@
 ; RUN: llc < %s -march=arm -mcpu=swift     | FileCheck %s -check-prefix=SWIFT
 
 define i32 @t1(i32 %a, i32 %b) {
-; A8: t1:
-; SWIFT: t1:
+; A8-LABEL: t1:
+; SWIFT-LABEL: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index a5082d836587..26c72723b287 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -6,7 +6,7 @@
 
 define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: vpop {d8}
 ; CHECK-NOT: vpopne
 ; CHECK: pop {r7, pc}
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
index 0f142eef7a3c..dba8a3f1a6af 100644
--- a/test/CodeGen/ARM/ifcvt11.ll
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -6,7 +6,7 @@
 %struct.xyz_t = type { double, double, double }
 
 define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
-; CHECK: effie:
+; CHECK-LABEL: effie:
 entry:
   %0 = icmp sgt i32 %tsets, 0
   br i1 %0, label %bb.nph, label %bb6
diff --git a/test/CodeGen/ARM/ifcvt12.ll b/test/CodeGen/ARM/ifcvt12.ll
index 77bdca57e555..b61f4e1bb51b 100644
--- a/test/CodeGen/ARM/ifcvt12.ll
+++ b/test/CodeGen/ARM/ifcvt12.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mlsne r0, r0, r1, r2
     %tmp1 = icmp eq i32 %a, 0
     br i1 %tmp1, label %cond_false, label %cond_true
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index 1bca10a7c646..e34edecf57ee 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: bxlt lr
 	%tmp2 = icmp sgt i32 %c, 10
 	%tmp5 = icmp slt i32 %d, 4
@@ -19,7 +19,7 @@ UnifiedReturnBlock:
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: bxgt lr
 ; CHECK: cmp
 ; CHECK: addge
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index eef4de050b35..fa7d61887d9d 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: cmp r2, #1
 ; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
index d247f14d91ce..53c789d184f6 100644
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 ; Do not if-convert when branches go to the different loops.
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK-NOT: subgt
 ; CHECK-NOT: suble
 ; Don't use
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 5081791bc257..31e3e00c468e 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -12,10 +12,10 @@ entry:
 }
 
 define i32 @t1(i32 %a, i32 %b) {
-; A8: t1:
+; A8-LABEL: t1:
 ; A8: poplt {r7, pc}
 
-; SWIFT: t1:
+; SWIFT-LABEL: t1:
 ; SWIFT: pop {r7, pc}
 ; SWIFT: pop {r7, pc}
 entry:
diff --git a/test/CodeGen/ARM/indirect-reg-input.ll b/test/CodeGen/ARM/indirect-reg-input.ll
index 86728fa61934..b936455975c6 100644
--- a/test/CodeGen/ARM/indirect-reg-input.ll
+++ b/test/CodeGen/ARM/indirect-reg-input.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: not llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
 
 ; Check for error message:
 ; CHECK: error: inline asm not supported yet: don't know how to handle tied indirect register inputs
diff --git a/test/CodeGen/ARM/indirectbr-2.ll b/test/CodeGen/ARM/indirectbr-2.ll
index 084f520a8ee5..0c41da658009 100644
--- a/test/CodeGen/ARM/indirectbr-2.ll
+++ b/test/CodeGen/ARM/indirectbr-2.ll
@@ -8,7 +8,7 @@
 ; The indirect branch has the two destinations as successors. The lone PHI
 ; statement shouldn't be implicitly defined.
 
-; CHECK:      func:
+; CHECK-LABEL:      func:
 ; CHECK:      Ltmp1:    @ Block address taken
 ; CHECK-NOT:            @ implicit-def: R0
 ; CHECK:                @ 4-byte Reload
diff --git a/test/CodeGen/ARM/indirectbr-3.ll b/test/CodeGen/ARM/indirectbr-3.ll
new file mode 100644
index 000000000000..5a9c45902edc
--- /dev/null
+++ b/test/CodeGen/ARM/indirectbr-3.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; If ARMBaseInstrInfo::AnalyzeBlocks returns the wrong value, which was possible
+; for blocks with indirect branches, the IfConverter could end up deleting
+; blocks that were the destinations of indirect branches, leaving branches to
+; nowhere.
+; <rdar://problem/14464830>
+
+define i32 @preserve_blocks(i32 %x) {
+; preserve_blocks:
+; CHECK: Block address taken
+; CHECK: movs r0, #2
+; CHECK: movs r0, #1
+; CHECK-NOT: Address of block that was removed by CodeGen
+entry:
+  %c2 = icmp slt i32 %x, 3
+  %blockaddr = select i1 %c2, i8* blockaddress(@preserve_blocks, %ibt1), i8* blockaddress(@preserve_blocks, %ibt2)
+  %c1 = icmp eq i32 %x, 0
+  br i1 %c1, label %pre_ib, label %nextblock
+
+nextblock:
+  ret i32 3
+
+ibt1:
+  ret i32 2
+
+ibt2:
+  ret i32 1
+
+pre_ib:
+  indirectbr i8* %blockaddr, [ label %ibt1, label %ibt2 ]
+}
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 341c33f84ff3..1aeeb916e489 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -1,14 +1,15 @@
 ; RUN: llc < %s -relocation-model=pic -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -relocation-model=pic -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=THUMB
 ; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -relocation-model=static -mtriple=thumbv8-apple-darwin | FileCheck %s -check-prefix=THUMB2
 
 @nextaddr = global i8* null                       ; <i8**> [#uses=2]
 @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
 
 define internal i32 @foo(i32 %i) nounwind {
-; ARM: foo:
-; THUMB: foo:
-; THUMB2: foo:
+; ARM-LABEL: foo:
+; THUMB-LABEL: foo:
+; THUMB2-LABEL: foo:
 entry:
   %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
@@ -48,15 +49,18 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; ARM-LABEL: %L1
 ; ARM: ldr [[R1:r[0-9]+]], LCPI
 ; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
 ; ARM: str [[R1b]]
-; THUMB: ldr.n
+; THUMB-LABEL: %L1
+; THUMB: ldr
 ; THUMB: add
-; THUMB: ldr.n [[R2:r[0-9]+]], LCPI
+; THUMB: ldr [[R2:r[0-9]+]], LCPI
 ; THUMB: add [[R2]], pc
 ; THUMB: str [[R2]]
-; THUMB2: ldr.n [[R2:r[0-9]+]], LCPI
+; THUMB2-LABEL: %L1
+; THUMB2: ldr [[R2:r[0-9]+]], LCPI
 ; THUMB2-NEXT: str{{(.w)?}} [[R2]]
   store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
   ret i32 %res.3
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
index be5eb8157317..683a0c4b7d30 100644
--- a/test/CodeGen/ARM/inlineasm-64bit.ll
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -O3  -mtriple=arm-linux-gnueabi | FileCheck %s
-
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s
 ; check if regs are passing correctly
 define void @i64_write(i64* %p, i64 %val) nounwind {
-; CHECK: i64_write:
+; CHECK-LABEL: i64_write:
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
 ; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
   %1 = tail call i64 asm sideeffect "1: ldrexd $0, ${0:H}, [$2]\0A strexd $0, $3, ${3:H}, [$2]\0A teq $0, #0\0A bne 1b", "=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %val) nounwind
@@ -12,7 +12,7 @@ define void @i64_write(i64* %p, i64 %val) nounwind {
 ; check if register allocation can reuse the registers
 define void @multi_writes(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind {
 entry:
-; CHECK: multi_writes:
+; CHECK-LABEL: multi_writes:
 ; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
 ; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
 ; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
@@ -44,11 +44,63 @@ entry:
 
 ; check if callee-saved registers used by inline asm are saved/restored
 define void @foo(i64* %p, i64 %i) nounwind {
-; CHECK:foo:
-; CHECK: push {{{r[4-9]|r10|r11}}
+; CHECK-LABEL:foo:
+; CHECK: {{push|push.w}} {{{r[4-9]|r10|r11}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
 ; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
-; CHECK: pop {{{r[4-9]|r10|r11}}
+; CHECK: {{pop|pop.w}} {{{r[4-9]|r10|r11}}
   %1 = tail call { i64, i64 } asm sideeffect "@ atomic64_set\0A1: ldrexd $0, ${0:H}, [$3]\0Aldrexd $1, ${1:H}, [$3]\0A strexd $0, $4, ${4:H}, [$3]\0A teq $0, #0\0A bne 1b", "=&r,=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %i) nounwind
   ret void
 }
+
+; return *p;
+define i64 @ldrd_test(i64* %p) nounwind {
+; CHECK-LABEL: ldrd_test:
+  %1 = tail call i64 asm "ldrd $0, ${0:H}, [$1]", "=r,r"(i64* %p) nounwind
+  ret i64 %1
+}
+
+define i64 @QR_test(i64* %p) nounwind {
+; CHECK-LABEL: QR_test:
+; CHECK: ldrd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+  %1 = tail call i64 asm "ldrd ${0:Q}, ${0:R}, [$1]", "=r,r"(i64* %p) nounwind
+  ret i64 %1
+}
+
+define i64 @defuse_test(i64 %p) nounwind {
+; CHECK-LABEL: defuse_test:
+; CHECK: add {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, #1
+  %1 = tail call i64 asm "add $0, ${0:H}, #1", "=r,0"(i64 %p) nounwind
+  ret i64 %1
+}
+
+; *p = (hi << 32) | lo;
+define void @strd_test(i64* %p, i32 %lo, i32 %hi) nounwind {
+; CHECK-LABEL: strd_test:
+; CHECK: strd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+  %1 = zext i32 %hi to i64
+  %2 = shl nuw i64 %1, 32
+  %3 = sext i32 %lo to i64
+  %4 = or i64 %2, %3
+  tail call void asm sideeffect "strd $0, ${0:H}, [$1]", "r,r"(i64 %4, i64* %p) nounwind
+  ret void
+}
+
+; Make sure we don't untie operands by mistake.
+define i64 @tied_64bit_test(i64 %in) nounwind {
+; CHECK-LABEL: tied_64bit_test:
+; CHECK: OUT([[OUTREG:r[0-9]+]]), IN([[OUTREG]])
+  %addr = alloca i64
+  call void asm "OUT($0), IN($1)", "=*rm,0"(i64* %addr, i64 %in)
+  ret i64 %in
+}
+
+; If we explicitly name a tied operand, then the code should lookup the operand
+; we were tied to for information about register class and so on.
+define i64 @tied_64bit_lookback_test(i64 %in) nounwind {
+; CHECK-LABEL: tied_64bit_lookback_test:
+; CHECK: OUTLO([[LO:r[0-9]+]]) OUTHI([[HI:r[0-9]+]]) INLO([[LO]]) INHI([[HI]])
+  %vars = call {i64, i32, i64} asm "OUTLO(${2:Q}) OUTHI(${2:R}) INLO(${3:Q}) INHI(${3:R})", "=r,=r,=r,2"(i64 %in)
+  %res = extractvalue {i64, i32, i64} %vars, 2
+  ret i64 %res
+}
diff --git a/test/CodeGen/ARM/inlineasm4.ll b/test/CodeGen/ARM/inlineasm4.ll
index 9ed4b997a634..4a1bccaf61c5 100644
--- a/test/CodeGen/ARM/inlineasm4.ll
+++ b/test/CodeGen/ARM/inlineasm4.ll
@@ -4,7 +4,7 @@ define double @f(double %x) {
 entry:
   %0 = tail call double asm "mov     ${0:R}, #4\0A", "=&r"()
   ret double %0
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK:	mov     r1, #4
 }
 
@@ -12,6 +12,6 @@ define double @g(double %x) {
 entry:
   %0 = tail call double asm "mov     ${0:Q}, #4\0A", "=&r"()
   ret double %0
-; CHECK: g:
+; CHECK-LABEL: g:
 ; CHECK:	mov     r0, #4
 }
diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll
new file mode 100644
index 000000000000..217fd696237e
--- /dev/null
+++ b/test/CodeGen/ARM/interrupt-attr.ll
@@ -0,0 +1,130 @@
+; RUN: llc -mtriple=arm-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s
+; RUN: llc -mtriple=thumb-none-none-eabi -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-m3 -o - %s | FileCheck --check-prefix=CHECK-M %s
+
+declare arm_aapcscc void @bar()
+
+@bigvar = global [16 x i32] zeroinitializer
+
+define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
+  ; Must save all registers except banked sp and lr (we save lr anyway because
+  ; we actually need it at the end to execute the return ourselves).
+
+  ; Also need special function return setting pc and CPSR simultaneously.
+; CHECK-A-LABEL: irq_fn:
+; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: add r11, sp, #16
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; CHECK-A: bl bar
+; CHECK-A: sub sp, r11, #16
+; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: subs pc, lr, #4
+
+; CHECK-A-THUMB-LABEL: irq_fn:
+; CHECK-A-THUMB: push {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-A-THUMB: mov r4, sp
+; CHECK-A-THUMB: add r7, sp, #20
+; CHECK-A-THUMB: bic r4, r4, #7
+; CHECK-A-THUMB: bl bar
+; CHECK-A-THUMB: sub.w r4, r7,  #20
+; CHECK-A-THUMB: mov sp, r4
+; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-A-THUMB: subs pc, lr, #4
+
+  ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
+  ; appropriate sentinel so no special return needed).
+; CHECK-M: push {r4, r7, lr}
+; CHECK-M: add r7, sp, #4
+; CHECK-M: sub sp, #4
+; CHECK-M: mov r4, sp
+; CHECK-M: mov sp, r4
+; CHECK-M: blx _bar
+; CHECK-M: subs r4, r7, #4
+; CHECK-M: mov sp, r4
+; CHECK-M: pop {r4, r7, pc}
+
+  call arm_aapcscc void @bar()
+  ret void
+}
+
+define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
+; CHECK-A-LABEL: fiq_fn:
+; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; 32 to get past r0, r1, ..., r7
+; CHECK-A: add r11, sp, #32
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+  ; 32 must match above
+; CHECK-A: sub sp, r11, #32
+; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+; CHECK-A: subs pc, lr, #4
+
+  %val = load volatile [16 x i32]* @bigvar
+  store volatile [16 x i32] %val, [16 x i32]* @bigvar
+  ret void
+}
+
+define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
+; CHECK-A-LABEL: swi_fn:
+; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-A: add r11, sp, #44
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+; CHECK-A: sub sp, r11, #44
+; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-A: subs pc, lr, #0
+
+  %val = load volatile [16 x i32]* @bigvar
+  store volatile [16 x i32] %val, [16 x i32]* @bigvar
+  ret void
+}
+
+define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
+; CHECK-A-LABEL: undef_fn:
+; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: add r11, sp, #16
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+; CHECK-A: sub sp, r11, #16
+; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: subs pc, lr, #0
+
+  call void @bar()
+  ret void
+}
+
+define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" {
+; CHECK-A-LABEL: abort_fn:
+; CHECK-A: push {r0, r1, r2, r3, r11, lr}
+; CHECK-A: add r11, sp, #16
+; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: bic sp, sp, #7
+; [...]
+; CHECK-A: sub sp, r11, #16
+; CHECK-A: pop {r0, r1, r2, r3, r11, lr}
+; CHECK-A: subs pc, lr, #4
+
+  call void @bar()
+  ret void
+}
+
+@var = global double 0.0
+
+; We don't save VFP regs, since it would be a massive overhead in the general
+; case.
+define arm_aapcscc void @floating_fn() alignstack(8) "interrupt"="IRQ" {
+; CHECK-A-LABEL: floating_fn:
+; CHECK-A-NOT: vpush
+; CHECK-A-NOT: vstr
+; CHECK-A-NOT: vstm
+; CHECK-A: vadd.f64 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  %lhs = load volatile double* @var
+  %rhs = load volatile double* @var
+  %sum = fadd double %lhs, %rhs
+  store double %sum, double* @var
+  ret void
+}
diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll
new file mode 100644
index 000000000000..c038fe6da84a
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics-crypto.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
+  %tmp = load <16 x i8>* %a
+  %tmp2 = load <16 x i8>* %b
+  %tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2)
+  ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2)
+  ; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4)
+  ; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5)
+  ; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}}
+  ret <16 x i8> %tmp6
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) {
+  %tmp = load <4 x i32>* %a
+  %tmp2 = load <4 x i32>* %b
+  %tmp3 = load <4 x i32>* %c
+  %res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp)
+  ; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}}
+  %res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1)
+  ; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}}
+  %res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
+  ; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3)
+  ; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}}
+  ret <4 x i32> %res10
+}
+
+declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>)
+declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>)
+declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/ARM/intrinsics-v8.ll b/test/CodeGen/ARM/intrinsics-v8.ll
new file mode 100644
index 000000000000..247bfc1e5884
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics-v8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv8 -mattr=+db | FileCheck %s
+
+define void @test() {
+  ; CHECK: dmb sy
+  call void @llvm.arm.dmb(i32 15)
+  ; CHECK: dmb osh
+  call void @llvm.arm.dmb(i32 3)
+  ; CHECK: dsb sy
+  call void @llvm.arm.dsb(i32 15)
+  ; CHECK: dsb ishld
+  call void @llvm.arm.dsb(i32 9)
+  ; CHECK: sevl
+  tail call void @llvm.arm.sevl() nounwind
+  ret void
+}
+
+declare void @llvm.arm.dmb(i32)
+declare void @llvm.arm.dsb(i32)
+declare void @llvm.arm.sevl() nounwind
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index db78fd06ab2d..d5b805c721b7 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -4,9 +4,9 @@
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
 define i32 @t1() {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: pop
-; V4T: t1:
+; V4T-LABEL: t1:
 ; V4T: pop
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
@@ -15,9 +15,9 @@ define i32 @t1() {
 }
 
 define i32 @t2() {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: pop
-; V4T: t2:
+; V4T-LABEL: t2:
 ; V4T: pop
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
@@ -27,10 +27,10 @@ define i32 @t2() {
 }
 
 define i32 @t3() {
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: ldmib
 ; CHECK: pop
-; V4T: t3:
+; V4T-LABEL: t3:
 ; V4T: ldmib
 ; V4T: pop
 ; V4T-NEXT: bx lr
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
index 011e61caea96..e4c695b87bec 100644
--- a/test/CodeGen/ARM/ldr.ll
+++ b/test/CodeGen/ARM/ldr.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i32 @f1(i32* %v) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldr r0
 entry:
         %tmp = load i32* %v
@@ -9,7 +9,7 @@ entry:
 }
 
 define i32 @f2(i32* %v) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldr r0
 entry:
         %tmp2 = getelementptr i32* %v, i32 1023
@@ -18,7 +18,7 @@ entry:
 }
 
 define i32 @f3(i32* %v) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov
 ; CHECK: ldr r0
 entry:
@@ -28,7 +28,7 @@ entry:
 }
 
 define i32 @f4(i32 %base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: mvn
 ; CHECK: ldr r0
 entry:
@@ -39,7 +39,7 @@ entry:
 }
 
 define i32 @f5(i32 %base, i32 %offset) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldr r0
 entry:
         %tmp1 = add i32 %base, %offset
@@ -49,7 +49,7 @@ entry:
 }
 
 define i32 @f6(i32 %base, i32 %offset) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ldr r0{{.*}}lsl{{.*}}
 entry:
         %tmp1 = shl i32 %offset, 2
@@ -60,7 +60,7 @@ entry:
 }
 
 define i32 @f7(i32 %base, i32 %offset) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ldr r0{{.*}}lsr{{.*}}
 entry:
         %tmp1 = lshr i32 %offset, 2
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index a6ca43448380..f5ff7dda5e04 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 ; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: ldr {{.*, \[.*]}}, -r2
 ; CHECK-NOT: ldr
 define i32 @test1(i32 %a, i32 %b, i32 %c) {
@@ -13,7 +13,7 @@ define i32 @test1(i32 %a, i32 %b, i32 %c) {
         ret i32 %tmp5
 }
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: ldr {{.*, \[.*\]}}, #-16
 ; CHECK-NOT: ldr
 define i32 @test2(i32 %a, i32 %b) {
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index 6c40ad7326b6..82818272cf22 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 ; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: ldr {{.*!}}
 ; CHECK-NOT: ldr
 define i32* @test1(i32* %X, i32* %dest) {
@@ -11,7 +11,7 @@ define i32* @test1(i32* %X, i32* %dest) {
         ret i32* %Y
 }
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: ldr {{.*!}}
 ; CHECK-NOT: ldr
 define i32 @test2(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 73b546d021d5..864d18a88ae6 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -13,10 +13,10 @@
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-; A8: t:
+; A8-LABEL: t:
 ; A8:   ldrd r2, r3, [r2]
 
-; M3: t:
+; M3-LABEL: t:
 ; M3-NOT: ldrd
 
 	%0 = load i64** @b, align 4
@@ -67,3 +67,31 @@ bb:                                               ; preds = %bb, %entry
 return:                                           ; preds = %bb, %entry
   ret void
 }
+
+; rdar://13978317
+; Pair of loads not formed when lifetime markers are set.
+%struct.Test = type { i32, i32, i32 }
+
+@TestVar = external global %struct.Test
+
+define void @Func1() nounwind ssp {
+; CHECK: @Func1
+entry: 
+; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
+; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}}
+; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4]
+; A8-NEXT: add [[FIELD1]], [[FIELD2]]
+; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
+  %orig_blocks = alloca [256 x i16], align 2
+  %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start(i64 512, i8* %0) nounwind
+  %tmp1 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 1), align 4
+  %tmp2 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 2), align 4
+  %add = add nsw i32 %tmp2, %tmp1
+  store i32 %add, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 0), align 4
+  call void @llvm.lifetime.end(i64 512, i8* %0) nounwind
+  ret void
+}
+
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll
index 1c69e15bbbfb..61c459c7435e 100644
--- a/test/CodeGen/ARM/ldst-f32-2-i32.ll
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -3,7 +3,7 @@
 ; rdar://8944252
 
 define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 entry:
   %src6 = bitcast float* %src to i8*
   %0 = icmp eq i32 %width, 0
diff --git a/test/CodeGen/ARM/ldstrex.ll b/test/CodeGen/ARM/ldstrex.ll
new file mode 100644
index 000000000000..5eaae53da994
--- /dev/null
+++ b/test/CodeGen/ARM/ldstrex.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin > %t
+; RUN: FileCheck %s < %t
+; RUN: FileCheck %s < %t --check-prefix=CHECK-T2ADDRMODE
+
+%0 = type { i32, i32 }
+
+; CHECK-LABEL: f0:
+; CHECK: ldrexd
+define i64 @f0(i8* %p) nounwind readonly {
+entry:
+  %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p)
+  %0 = extractvalue %0 %ldrexd, 1
+  %1 = extractvalue %0 %ldrexd, 0
+  %2 = zext i32 %0 to i64
+  %3 = zext i32 %1 to i64
+  %shl = shl nuw i64 %2, 32
+  %4 = or i64 %shl, %3
+  ret i64 %4
+}
+
+; CHECK-LABEL: f1:
+; CHECK: strexd
+define i32 @f1(i8* %ptr, i64 %val) nounwind {
+entry:
+  %tmp4 = trunc i64 %val to i32
+  %tmp6 = lshr i64 %val, 32
+  %tmp7 = trunc i64 %tmp6 to i32
+  %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
+  ret i32 %strexd
+}
+
+declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly
+declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
+
+; CHECK-LABEL: test_load_i8:
+; CHECK: ldrexb r0, [r0]
+; CHECK-NOT: uxtb
+define i32 @test_load_i8(i8* %addr) {
+  %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr)
+  ret i32 %val
+}
+
+; CHECK-LABEL: test_load_i16:
+; CHECK: ldrexh r0, [r0]
+; CHECK-NOT: uxth
+define i32 @test_load_i16(i16* %addr) {
+  %val = call i32 @llvm.arm.ldrex.p0i16(i16* %addr)
+  ret i32 %val
+}
+
+; CHECK-LABEL: test_load_i32:
+; CHECK: ldrex r0, [r0]
+define i32 @test_load_i32(i32* %addr) {
+  %val = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+  ret i32 %val
+}
+
+declare i32 @llvm.arm.ldrex.p0i8(i8*) nounwind readonly
+declare i32 @llvm.arm.ldrex.p0i16(i16*) nounwind readonly
+declare i32 @llvm.arm.ldrex.p0i32(i32*) nounwind readonly
+
+; CHECK-LABEL: test_store_i8:
+; CHECK-NOT: uxtb
+; CHECK: strexb r0, r1, [r2]
+define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
+  %extval = zext i8 %val to i32
+  %res = call i32 @llvm.arm.strex.p0i8(i32 %extval, i8* %addr)
+  ret i32 %res
+}
+
+; CHECK-LABEL: test_store_i16:
+; CHECK-NOT: uxth
+; CHECK: strexh r0, r1, [r2]
+define i32 @test_store_i16(i32, i16 %val, i16* %addr) {
+  %extval = zext i16 %val to i32
+  %res = call i32 @llvm.arm.strex.p0i16(i32 %extval, i16* %addr)
+  ret i32 %res
+}
+
+; CHECK-LABEL: test_store_i32:
+; CHECK: strex r0, r1, [r2]
+define i32 @test_store_i32(i32, i32 %val, i32* %addr) {
+  %res = call i32 @llvm.arm.strex.p0i32(i32 %val, i32* %addr)
+  ret i32 %res
+}
+
+declare i32 @llvm.arm.strex.p0i8(i32, i8*) nounwind
+declare i32 @llvm.arm.strex.p0i16(i32, i16*) nounwind
+declare i32 @llvm.arm.strex.p0i32(i32, i32*) nounwind
+
+; CHECK-LABEL: test_clear:
+; CHECK: clrex
+define void @test_clear() {
+  call void @llvm.arm.clrex()
+  ret void
+}
+
+declare void @llvm.arm.clrex() nounwind
+
+@base = global i32* null
+
+define void @excl_addrmode() {
+; CHECK-T2ADDRMODE-LABEL: excl_addrmode:
+  %base1020 = load i32** @base
+  %offset1020 = getelementptr i32* %base1020, i32 255
+  call i32 @llvm.arm.ldrex.p0i32(i32* %offset1020)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1020)
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
+
+  %base1024 = load i32** @base
+  %offset1024 = getelementptr i32* %base1024, i32 256
+  call i32 @llvm.arm.ldrex.p0i32(i32* %offset1024)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1024)
+; CHECK-T2ADDRMODE: add.w r[[ADDR:[0-9]+]], {{r[0-9]+}}, #1024
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
+
+  %base1 = load i32** @base
+  %addr8 = bitcast i32* %base1 to i8*
+  %offset1_8 = getelementptr i8* %addr8, i32 1
+  %offset1 = bitcast i8* %offset1_8 to i32*
+  call i32 @llvm.arm.ldrex.p0i32(i32* %offset1)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1)
+; CHECK-T2ADDRMODE: adds r[[ADDR:[0-9]+]], #1
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
+
+  %local = alloca i8, i32 1024
+  %local32 = bitcast i8* %local to i32*
+  call i32 @llvm.arm.ldrex.p0i32(i32* %local32)
+  call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32)
+; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp
+; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
+; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/ldstrexd.ll b/test/CodeGen/ARM/ldstrexd.ll
deleted file mode 100644
index 0c0911a86e72..000000000000
--- a/test/CodeGen/ARM/ldstrexd.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
-
-%0 = type { i32, i32 }
-
-; CHECK: f0:
-; CHECK: ldrexd
-define i64 @f0(i8* %p) nounwind readonly {
-entry:
-  %ldrexd = tail call %0 @llvm.arm.ldrexd(i8* %p)
-  %0 = extractvalue %0 %ldrexd, 1
-  %1 = extractvalue %0 %ldrexd, 0
-  %2 = zext i32 %0 to i64
-  %3 = zext i32 %1 to i64
-  %shl = shl nuw i64 %2, 32
-  %4 = or i64 %shl, %3
-  ret i64 %4
-}
-
-; CHECK: f1:
-; CHECK: strexd
-define i32 @f1(i8* %ptr, i64 %val) nounwind {
-entry:
-  %tmp4 = trunc i64 %val to i32
-  %tmp6 = lshr i64 %val, 32
-  %tmp7 = trunc i64 %tmp6 to i32
-  %strexd = tail call i32 @llvm.arm.strexd(i32 %tmp4, i32 %tmp7, i8* %ptr)
-  ret i32 %strexd
-}
-
-declare %0 @llvm.arm.ldrexd(i8*) nounwind readonly
-declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
-
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
index 4d75f581a1d2..8a3ba96497e7 100644
--- a/test/CodeGen/ARM/lit.local.cfg
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/ARM/load-address-masked.ll b/test/CodeGen/ARM/load-address-masked.ll
new file mode 100644
index 000000000000..65cc31104bc9
--- /dev/null
+++ b/test/CodeGen/ARM/load-address-masked.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv4t-unknown-linux-gnueabi -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv4t-unknown-linux-gnueabi"
+
+@a = global i32 0, align 4
+
+define i32 @foo() {
+entry:
+  ret i32 and (i32 ptrtoint (i32* @a to i32), i32 255)
+}
+
+; CHECK-LABEL: foo:
+; CHECK: ldrb    r0, .LCPI0_0
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
index 15a415df731d..00ca2e8b1b75 100644
--- a/test/CodeGen/ARM/load-global.ll
+++ b/test/CodeGen/ARM/load-global.ll
@@ -26,7 +26,7 @@ define i32 @test1() {
 ; PIC: .long L_G$non_lazy_ptr-(LPC0_0+8)
 
 ; PIC_T: _test1
-; PIC_T: ldr.n r0, LCPI0_0
+; PIC_T: ldr r0, LCPI0_0
 ; PIC_T: add r0, pc
 ; PIC_T: ldr r0, [r0]
 ; PIC_T: ldr r0, [r0]
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
index bdd408164992..7a208ea41752 100644
--- a/test/CodeGen/ARM/load_i1_select.ll
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7-apple-ios0.0.0"
 ; Codegen should only compare one bit of the loaded value.
 ; rdar://10887484
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: ldrb r[[R0:[0-9]+]], [r0]
 ; CHECK: tst.w r[[R0]], #1
 define void @foo(i8* %call, double* %p) nounwind {
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index 0f1c7be6a3d2..7fffc81797cb 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -1,33 +1,33 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i64 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 entry:
         ret i64 0
 }
 
 define i64 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 entry:
         ret i64 1
 }
 
 define i64 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mvn r0, #-2147483648
 entry:
         ret i64 2147483647
 }
 
 define i64 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mov r0, #-2147483648
 entry:
         ret i64 2147483648
 }
 
 define i64 @f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mvn r0, #0
 ; CHECK: mvn r1, #-2147483648
 entry:
@@ -35,7 +35,7 @@ entry:
 }
 
 define i64 @f6(i64 %x, i64 %y) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: adds
 ; CHECK: adc
 entry:
@@ -44,7 +44,7 @@ entry:
 }
 
 define void @f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 entry:
         %tmp = call i64 @f8( )          ; <i64> [#uses=0]
         ret void
@@ -53,7 +53,7 @@ entry:
 declare i64 @f8()
 
 define i64 @f9(i64 %a, i64 %b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: subs r
 ; CHECK: sbc
 entry:
@@ -62,7 +62,7 @@ entry:
 }
 
 define i64 @f(i32 %a, i32 %b) {
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: smull
 entry:
         %tmp = sext i32 %a to i64               ; <i64> [#uses=1]
@@ -72,7 +72,7 @@ entry:
 }
 
 define i64 @g(i32 %a, i32 %b) {
-; CHECK: g:
+; CHECK-LABEL: g:
 ; CHECK: umull
 entry:
         %tmp = zext i32 %a to i64               ; <i64> [#uses=1]
@@ -82,7 +82,7 @@ entry:
 }
 
 define i64 @f10() {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 entry:
         %a = alloca i64, align 8                ; <i64*> [#uses=1]
         %retval = load i64* %a          ; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll
index e4a00e9ac303..2cf91c32bc1a 100644
--- a/test/CodeGen/ARM/longMAC.ll
+++ b/test/CodeGen/ARM/longMAC.ll
@@ -2,7 +2,7 @@
 ; Check generated signed and unsigned multiply accumulate long.
 
 define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
-;CHECK: MACLongTest1:
+;CHECK-LABEL: MACLongTest1:
 ;CHECK: umlal
   %conv = zext i32 %a to i64
   %conv1 = zext i32 %b to i64
@@ -12,7 +12,7 @@ define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
 }
 
 define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c)  {
-;CHECK: MACLongTest2:
+;CHECK-LABEL: MACLongTest2:
 ;CHECK: smlal
   %conv = sext i32 %a to i64
   %conv1 = sext i32 %b to i64
@@ -22,7 +22,7 @@ define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c)  {
 }
 
 define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
-;CHECK: MACLongTest3:
+;CHECK-LABEL: MACLongTest3:
 ;CHECK: umlal
   %conv = zext i32 %b to i64
   %conv1 = zext i32 %a to i64
@@ -33,7 +33,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
 }
 
 define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
-;CHECK: MACLongTest4:
+;CHECK-LABEL: MACLongTest4:
 ;CHECK: smlal
   %conv = sext i32 %b to i64
   %conv1 = sext i32 %a to i64
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index a99a7ec86c1e..3e986d802d81 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i64 @f0(i64 %A, i64 %B) {
-; CHECK: f0
+; CHECK-LABEL: f0:
 ; CHECK:      lsrs    r3, r3, #1
 ; CHECK-NEXT: rrx     r2, r2
 ; CHECK-NEXT: subs    r0, r0, r2
@@ -13,7 +13,7 @@ define i64 @f0(i64 %A, i64 %B) {
 }
 
 define i32 @f1(i64 %x, i64 %y) {
-; CHECK: f1
+; CHECK-LABEL: f1:
 ; CHECK: lsl{{.*}}r2
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -21,7 +21,7 @@ define i32 @f1(i64 %x, i64 %y) {
 }
 
 define i32 @f2(i64 %x, i64 %y) {
-; CHECK: f2
+; CHECK-LABEL: f2:
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
@@ -34,7 +34,7 @@ define i32 @f2(i64 %x, i64 %y) {
 }
 
 define i32 @f3(i64 %x, i64 %y) {
-; CHECK: f3
+; CHECK-LABEL: f3:
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
index 248c4bd1beea..103642b8b72f 100644
--- a/test/CodeGen/ARM/lsr-icmp-imm.ll
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -4,7 +4,7 @@
 ; LSR should compare against the post-incremented induction variable.
 ; In this case, the immediate value is -2 which requires a cmn instruction.
 ;
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: %for.body
 ; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
 ; CHECK: cmn{{.*}}[[IV]], #2
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 9b0f3e54e88a..26d4be2e06ff 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -7,8 +7,7 @@
 ; CHECK: sub sp, #{{40|32|28|24}}
 
 ; CHECK: %for.inc
-; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
-; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
+; CHECK-NOT: ldr
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 03abd762a261..7e4b309fd9d1 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -6,7 +6,7 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
@@ -25,7 +25,7 @@ entry:
 ; rdar://10660865
 define void @f2() nounwind ssp {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cmp
 ; CHECK: poplt
 ; CHECK-NOT: cmp
@@ -49,7 +49,7 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 ; rdar://12462006
 define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: sub
 ; CHECK: cmp
 ; CHECK: blt
diff --git a/test/CodeGen/ARM/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll
index 8656c5bbd72c..fc9b22614d6d 100644
--- a/test/CodeGen/ARM/machine-licm.ll
+++ b/test/CodeGen/ARM/machine-licm.ll
@@ -12,7 +12,7 @@
 
 define void @t(i32* nocapture %vals, i32 %c) nounwind {
 entry:
-; ARM: t:
+; ARM-LABEL: t:
 ; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
 ; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool.
 ; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy
@@ -23,14 +23,14 @@ entry:
 ; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
 ; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
 
-; MOVT: t:
+; MOVT-LABEL: t:
 ; MOVT: movw [[REGISTER_2:r[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+8))
 ; MOVT: movt [[REGISTER_2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+8))
 ; MOVT: LPC0_0:
 ; MOVT: ldr r{{[0-9]+}}, [pc, [[REGISTER_2]]]
 ; MOVT: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
 
-; THUMB: t:
+; THUMB-LABEL: t:
   %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
   br i1 %0, label %return, label %bb.nph
 
@@ -40,7 +40,7 @@ bb.nph:                                           ; preds = %entry
 ; ARM: .section
 
 ; THUMB: BB#1
-; THUMB: ldr.n r2, LCPI0_0
+; THUMB: ldr r2, LCPI0_0
 ; THUMB: add r2, pc
 ; THUMB: ldr r{{[0-9]+}}, [r2]
 ; THUMB: LBB0_2
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index d846e5cb268b..946c63ed40c8 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -15,7 +15,7 @@
 
 define i32 @t0() {
 entry:
-; CHECK: t0:
+; CHECK-LABEL: t0:
 ; CHECK: vldr [[REG1:d[0-9]+]],
 ; CHECK: vstr [[REG1]], 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
@@ -24,7 +24,7 @@ entry:
 
 define void @t1(i8* nocapture %C) nounwind {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
 ; CHECK: adds r0, #15
@@ -37,7 +37,7 @@ entry:
 
 define void @t2(i8* nocapture %C) nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: ldr [[REG2:r[0-9]+]], [r1, #32]
 ; CHECK: str [[REG2]], [r0, #32]
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
@@ -52,7 +52,7 @@ entry:
 
 define void @t3(i8* nocapture %C) nounwind {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
 ; CHECK: adds r0, #16
@@ -65,7 +65,7 @@ entry:
 
 define void @t4(i8* nocapture %C) nounwind {
 entry:
-; CHECK: t4:
+; CHECK-LABEL: t4:
 ; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
 ; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
@@ -74,7 +74,7 @@ entry:
 
 define void @t5(i8* nocapture %C) nounwind {
 entry:
-; CHECK: t5:
+; CHECK-LABEL: t5:
 ; CHECK: movs [[REG5:r[0-9]+]], #0
 ; CHECK: strb [[REG5]], [r0, #6]
 ; CHECK: movw [[REG6:r[0-9]+]], #21587
@@ -87,7 +87,7 @@ entry:
 
 define void @t6() nounwind {
 entry:
-; CHECK: t6:
+; CHECK-LABEL: t6:
 ; CHECK: vld1.8 {[[REG8:d[0-9]+]]}, [r0]
 ; CHECK: vstr [[REG8]], [r1]
 ; CHECK: adds r1, #6
diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll
index ee8c36433885..4e86d05b0a1c 100644
--- a/test/CodeGen/ARM/memset-inline.ll
+++ b/test/CodeGen/ARM/memset-inline.ll
@@ -2,7 +2,7 @@
 
 define void @t1(i8* nocapture %c) nounwind optsize {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: movs r1, #0
 ; CHECK: str r1, [r0]
 ; CHECK: str r1, [r0, #4]
@@ -13,7 +13,7 @@ entry:
 
 define void @t2() nounwind ssp {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: add.w r1, r0, #10
 ; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
 ; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll
index 4b15326008a4..5da335fa2030 100644
--- a/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/test/CodeGen/ARM/misched-copy-arm.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=thumb -mcpu=swift -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
 ;
 ; Loop counter copies should be eliminated.
 ; There is also a MUL here, but we don't care where it is scheduled.
@@ -28,3 +28,52 @@ for.end:                                          ; preds = %for.body, %entry
   %s.0.lcssa = phi i32 [ 0, %entry ], [ %mul, %for.body ]
   ret i32 %s.0.lcssa
 }
+
+
+; This case was a crasher in constrainLocalCopy.
+; The problem was the t2LDR_PRE defining both the global and local lrg.
+; CHECK-LABEL: *** Final schedule for BB#5 ***
+; CHECK: %[[R4:vreg[0-9]+]]<def>, %[[R1:vreg[0-9]+]]<def,tied2> = t2LDR_PRE %[[R1]]<tied1>
+; CHECK: %vreg{{[0-9]+}}<def> = COPY %[[R1]]
+; CHECK: %vreg{{[0-9]+}}<def> = COPY %[[R4]]
+; CHECK-LABEL: MACHINEINSTRS
+%struct.rtx_def = type { [4 x i8], [1 x %union.rtunion_def] }
+%union.rtunion_def = type { i64 }
+
+; Function Attrs: nounwind ssp
+declare hidden fastcc void @df_ref_record(i32* nocapture, %struct.rtx_def*, %struct.rtx_def**, %struct.rtx_def*, i32, i32) #0
+
+; Function Attrs: nounwind ssp
+define hidden fastcc void @df_def_record_1(i32* nocapture %df, %struct.rtx_def* %x, %struct.rtx_def* %insn) #0 {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %if.end28, %entry
+  %loc.0 = phi %struct.rtx_def** [ %rtx31, %if.end28 ], [ undef, %entry ]
+  %dst.0 = phi %struct.rtx_def* [ %0, %if.end28 ], [ undef, %entry ]
+  switch i32 undef, label %if.end47 [
+    i32 61, label %if.then46
+    i32 64, label %if.then24
+    i32 132, label %if.end28
+    i32 133, label %if.end28
+  ]
+
+if.then24:                                        ; preds = %while.cond
+  br label %if.end28
+
+if.end28:                                         ; preds = %if.then24, %while.cond, %while.cond
+  %dst.1 = phi %struct.rtx_def* [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ]
+  %arrayidx30 = getelementptr inbounds %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0
+  %rtx31 = bitcast %union.rtunion_def* %arrayidx30 to %struct.rtx_def**
+  %0 = load %struct.rtx_def** %rtx31, align 4
+  br label %while.cond
+
+if.then46:                                        ; preds = %while.cond
+  tail call fastcc void @df_ref_record(i32* %df, %struct.rtx_def* %dst.0, %struct.rtx_def** %loc.0, %struct.rtx_def* %insn, i32 0, i32 undef)
+  unreachable
+
+if.end47:                                         ; preds = %while.cond
+  ret void
+}
+
+attributes #0 = { nounwind ssp }
diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll
index 066bf98de651..8f0d3a89a30a 100644
--- a/test/CodeGen/ARM/mls.ll
+++ b/test/CodeGen/ARM/mls.ll
@@ -14,15 +14,15 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     ret i32 %tmp2
 }
 
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mls	r0, r0, r1, r2
-; NO_MULOPS: f1:
+; NO_MULOPS-LABEL: f1:
 ; NO_MULOPS: mul r0, r0, r1
 ; NO_MULOPS-NEXT: sub r0, r2, r0
 
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mul r0, r0, r1
 ; CHECK-NEXT: sub r0, r0, r2
-; NO_MULOPS: f2:
+; NO_MULOPS-LABEL: f2:
 ; NO_MULOPS: mul r0, r0, r1
 ; NO_MULOPS-NEXT: sub r0, r0, r2
diff --git a/test/CodeGen/ARM/movt.ll b/test/CodeGen/ARM/movt.ll
index e82aca0e9c69..25c1bfe32044 100644
--- a/test/CodeGen/ARM/movt.ll
+++ b/test/CodeGen/ARM/movt.ll
@@ -2,7 +2,7 @@
 ; rdar://7317664
 
 define i32 @t(i32 %X) nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movt r0, #65535
 entry:
 	%0 = or i32 %X, -65536
@@ -10,7 +10,7 @@ entry:
 }
 
 define i32 @t2(i32 %X) nounwind {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: movt r0, #65534
 entry:
 	%0 = or i32 %X, -131072
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index c50a23354678..482d8f2888ce 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -2,7 +2,7 @@
 
 define i32 @t9(i32 %v) nounwind readnone {
 entry:
-; CHECK: t9:
+; CHECK-LABEL: t9:
 ; CHECK: add r0, r0, r0, lsl #3
 	%0 = mul i32 %v, 9
 	ret i32 %0
@@ -10,7 +10,7 @@ entry:
 
 define i32 @t7(i32 %v) nounwind readnone {
 entry:
-; CHECK: t7:
+; CHECK-LABEL: t7:
 ; CHECK: rsb r0, r0, r0, lsl #3
 	%0 = mul i32 %v, 7
 	ret i32 %0
@@ -18,7 +18,7 @@ entry:
 
 define i32 @t5(i32 %v) nounwind readnone {
 entry:
-; CHECK: t5:
+; CHECK-LABEL: t5:
 ; CHECK: add r0, r0, r0, lsl #2
         %0 = mul i32 %v, 5
         ret i32 %0
@@ -26,7 +26,7 @@ entry:
 
 define i32 @t3(i32 %v) nounwind readnone {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: add r0, r0, r0, lsl #1
         %0 = mul i32 %v, 3
         ret i32 %0
@@ -34,7 +34,7 @@ entry:
 
 define i32 @t12288(i32 %v) nounwind readnone {
 entry:
-; CHECK: t12288:
+; CHECK-LABEL: t12288:
 ; CHECK: add r0, r0, r0, lsl #1
 ; CHECK: lsl{{.*}}#12
         %0 = mul i32 %v, 12288
@@ -43,7 +43,7 @@ entry:
 
 define i32 @tn9(i32 %v) nounwind readnone {
 entry:
-; CHECK: tn9:
+; CHECK-LABEL: tn9:
 ; CHECK: add	r0, r0, r0, lsl #3
 ; CHECK: rsb	r0, r0, #0
         %0 = mul i32 %v, -9
@@ -52,7 +52,7 @@ entry:
 
 define i32 @tn7(i32 %v) nounwind readnone {
 entry:
-; CHECK: tn7:
+; CHECK-LABEL: tn7:
 ; CHECK: sub r0, r0, r0, lsl #3
 	%0 = mul i32 %v, -7
 	ret i32 %0
@@ -60,7 +60,7 @@ entry:
 
 define i32 @tn5(i32 %v) nounwind readnone {
 entry:
-; CHECK: tn5:
+; CHECK-LABEL: tn5:
 ; CHECK: add r0, r0, r0, lsl #2
 ; CHECK: rsb r0, r0, #0
         %0 = mul i32 %v, -5
@@ -69,7 +69,7 @@ entry:
 
 define i32 @tn3(i32 %v) nounwind readnone {
 entry:
-; CHECK: tn3:
+; CHECK-LABEL: tn3:
 ; CHECK: sub r0, r0, r0, lsl #2
         %0 = mul i32 %v, -3
         ret i32 %0
@@ -77,7 +77,7 @@ entry:
 
 define i32 @tn12288(i32 %v) nounwind readnone {
 entry:
-; CHECK: tn12288:
+; CHECK-LABEL: tn12288:
 ; CHECK: sub r0, r0, r0, lsl #2
 ; CHECK: lsl{{.*}}#12
         %0 = mul i32 %v, -12288
diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll
index 932004c5dd85..63705c502779 100644
--- a/test/CodeGen/ARM/mulhi.ll
+++ b/test/CodeGen/ARM/mulhi.ll
@@ -3,13 +3,13 @@
 ; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=M3
 
 define i32 @smulhi(i32 %x, i32 %y) nounwind {
-; V6: smulhi:
+; V6-LABEL: smulhi:
 ; V6: smmul
 
-; V4: smulhi:
+; V4-LABEL: smulhi:
 ; V4: smull
 
-; M3: smulhi:
+; M3-LABEL: smulhi:
 ; M3: smull
         %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
         %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
@@ -20,13 +20,13 @@ define i32 @smulhi(i32 %x, i32 %y) nounwind {
 }
 
 define i32 @umulhi(i32 %x, i32 %y) nounwind {
-; V6: umulhi:
+; V6-LABEL: umulhi:
 ; V6: umull
 
-; V4: umulhi:
+; V4-LABEL: umulhi:
 ; V4: umull
 
-; M3: umulhi:
+; M3-LABEL: umulhi:
 ; M3: umull
         %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
         %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
@@ -38,13 +38,13 @@ define i32 @umulhi(i32 %x, i32 %y) nounwind {
 
 ; rdar://r10152911
 define i32 @t3(i32 %a) nounwind {
-; V6: t3:
+; V6-LABEL: t3:
 ; V6: smmla
 
-; V4: t3:
+; V4-LABEL: t3:
 ; V4: smull
 
-; M3: t3:
+; M3-LABEL: t3:
 ; M3-NOT: smmla
 ; M3: smull
 entry:
diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll
index 571c21a833ec..2c5ccd7442e0 100644
--- a/test/CodeGen/ARM/mvn.ll
+++ b/test/CodeGen/ARM/mvn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep mvn | count 8
+; RUN: llc < %s -march=arm | grep mvn | count 9
 
 define i32 @f1() {
 entry:
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
index c00f0d17c9f5..dd2e67fe7753 100644
--- a/test/CodeGen/ARM/neon-spfp.ll
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -1,41 +1,41 @@
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=CHECK-LINUXA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-LINUXA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK-LINUXA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=CHECK-LINUXA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=CHECK-LINUXSWIFT
 
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
-; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFEA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK-UNSAFESWIFT
 
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
-; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=CHECK-DARWINA5
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-DARWINA8
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK-DARWINA9
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=CHECK-DARWINA15
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=CHECK-DARWINSWIFT
 
 ; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
 ; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
 
 @.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1
 
-; CHECK-LINUXA5: main:
-; CHECK-LINUXA8: main:
-; CHECK-LINUXA9: main:
-; CHECK-LINUXA15: main:
-; CHECK-LINUXSWIFT: main:
-; CHECK-UNSAFEA5: main:
-; CHECK-UNSAFEA8: main:
-; CHECK-UNSAFEA9: main:
-; CHECK-UNSAFEA15: main:
-; CHECK-UNSAFESWIFT: main:
-; CHECK-DARWINA5: main:
-; CHECK-DARWINA8: main:
-; CHECK-DARWINA9: main:
-; CHECK-DARWINA15: main:
-; CHECK-DARWINSWIFT: main:
+; CHECK-LINUXA5-LABEL: main:
+; CHECK-LINUXA8-LABEL: main:
+; CHECK-LINUXA9-LABEL: main:
+; CHECK-LINUXA15-LABEL: main:
+; CHECK-LINUXSWIFT-LABEL: main:
+; CHECK-UNSAFEA5-LABEL: main:
+; CHECK-UNSAFEA8-LABEL: main:
+; CHECK-UNSAFEA9-LABEL: main:
+; CHECK-UNSAFEA15-LABEL: main:
+; CHECK-UNSAFESWIFT-LABEL: main:
+; CHECK-DARWINA5-LABEL: main:
+; CHECK-DARWINA8-LABEL: main:
+; CHECK-DARWINA9-LABEL: main:
+; CHECK-DARWINA15-LABEL: main:
+; CHECK-DARWINSWIFT-LABEL: main:
 define i32 @main() {
 entry:
   br label %for.body
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index 0a7c8b2b6aae..2e45919e7790 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
 
 define float @fmin_ole(float %x) nounwind {
-;CHECK: fmin_ole:
+;CHECK-LABEL: fmin_ole:
 ;CHECK: vmin.f32
   %cond = fcmp ole float 1.0, %x
   %min1 = select i1 %cond, float 1.0, float %x
@@ -9,7 +9,7 @@ define float @fmin_ole(float %x) nounwind {
 }
 
 define float @fmin_ole_zero(float %x) nounwind {
-;CHECK: fmin_ole_zero:
+;CHECK-LABEL: fmin_ole_zero:
 ;CHECK-NOT: vmin.f32
   %cond = fcmp ole float 0.0, %x
   %min1 = select i1 %cond, float 0.0, float %x
@@ -17,7 +17,7 @@ define float @fmin_ole_zero(float %x) nounwind {
 }
 
 define float @fmin_ult(float %x) nounwind {
-;CHECK: fmin_ult:
+;CHECK-LABEL: fmin_ult:
 ;CHECK: vmin.f32
   %cond = fcmp ult float %x, 1.0
   %min1 = select i1 %cond, float %x, float 1.0
@@ -25,7 +25,7 @@ define float @fmin_ult(float %x) nounwind {
 }
 
 define float @fmax_ogt(float %x) nounwind {
-;CHECK: fmax_ogt:
+;CHECK-LABEL: fmax_ogt:
 ;CHECK: vmax.f32
   %cond = fcmp ogt float 1.0, %x
   %max1 = select i1 %cond, float 1.0, float %x
@@ -33,7 +33,7 @@ define float @fmax_ogt(float %x) nounwind {
 }
 
 define float @fmax_uge(float %x) nounwind {
-;CHECK: fmax_uge:
+;CHECK-LABEL: fmax_uge:
 ;CHECK: vmax.f32
   %cond = fcmp uge float %x, 1.0
   %max1 = select i1 %cond, float %x, float 1.0
@@ -41,7 +41,7 @@ define float @fmax_uge(float %x) nounwind {
 }
 
 define float @fmax_uge_zero(float %x) nounwind {
-;CHECK: fmax_uge_zero:
+;CHECK-LABEL: fmax_uge_zero:
 ;CHECK-NOT: vmax.f32
   %cond = fcmp uge float %x, 0.0
   %max1 = select i1 %cond, float %x, float 0.0
@@ -49,7 +49,7 @@ define float @fmax_uge_zero(float %x) nounwind {
 }
 
 define float @fmax_olt_reverse(float %x) nounwind {
-;CHECK: fmax_olt_reverse:
+;CHECK-LABEL: fmax_olt_reverse:
 ;CHECK: vmax.f32
   %cond = fcmp olt float %x, 1.0
   %max1 = select i1 %cond, float 1.0, float %x
@@ -57,7 +57,7 @@ define float @fmax_olt_reverse(float %x) nounwind {
 }
 
 define float @fmax_ule_reverse(float %x) nounwind {
-;CHECK: fmax_ule_reverse:
+;CHECK-LABEL: fmax_ule_reverse:
 ;CHECK: vmax.f32
   %cond = fcmp ult float 1.0, %x
   %max1 = select i1 %cond, float %x, float 1.0
@@ -65,7 +65,7 @@ define float @fmax_ule_reverse(float %x) nounwind {
 }
 
 define float @fmin_oge_reverse(float %x) nounwind {
-;CHECK: fmin_oge_reverse:
+;CHECK-LABEL: fmin_oge_reverse:
 ;CHECK: vmin.f32
   %cond = fcmp oge float %x, 1.0
   %min1 = select i1 %cond, float 1.0, float %x
@@ -73,7 +73,7 @@ define float @fmin_oge_reverse(float %x) nounwind {
 }
 
 define float @fmin_ugt_reverse(float %x) nounwind {
-;CHECK: fmin_ugt_reverse:
+;CHECK-LABEL: fmin_ugt_reverse:
 ;CHECK: vmin.f32
   %cond = fcmp ugt float 1.0, %x
   %min1 = select i1 %cond, float %x, float 1.0
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
index 277bd05ba3b6..d286d16486c1 100644
--- a/test/CodeGen/ARM/neon_spill.ll
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -24,7 +24,7 @@ declare arm_aapcs_vfpcc %2** @func4()
 define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
   call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
-  %3 = load %0** %2, align 4, !tbaa !0
+  %3 = load %0** %2, align 4
   store float 0.000000e+00, float* undef, align 4
   %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
   call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
@@ -35,11 +35,11 @@ define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
   %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
   %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
   %8 = fadd float undef, -1.000000e+05
-  store float %8, float* undef, align 16, !tbaa !3
+  store float %8, float* undef, align 16
   %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
   %10 = fmul float undef, 2.000000e+05
   %11 = fadd float %10, -1.000000e+05
-  store float %11, float* undef, align 4, !tbaa !3
+  store float %11, float* undef, align 4
   call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   ret void
 }
@@ -47,8 +47,3 @@ define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
 declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
 
 declare arm_aapcs_vfpcc i32 @rand()
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll
index bf2770b15b01..76b604423986 100644
--- a/test/CodeGen/ARM/neon_vabs.ll
+++ b/test/CodeGen/ARM/neon_vabs.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <4 x i32> @test1(<4 x i32> %a) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: vabs.s32 q
         %tmp1neg = sub <4 x i32> zeroinitializer, %a
         %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -10,7 +10,7 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind {
 }
 
 define <4 x i32> @test2(<4 x i32> %a) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: vabs.s32 q
         %tmp1neg = sub <4 x i32> zeroinitializer, %a
         %b = icmp sge <4 x i32> %a, zeroinitializer
@@ -19,7 +19,7 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind {
 }
 
 define <8 x i16> @test3(<8 x i16> %a) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: vabs.s16 q
         %tmp1neg = sub <8 x i16> zeroinitializer, %a
         %b = icmp sgt <8 x i16> %a, zeroinitializer
@@ -28,7 +28,7 @@ define <8 x i16> @test3(<8 x i16> %a) nounwind {
 }
 
 define <16 x i8> @test4(<16 x i8> %a) nounwind {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: vabs.s8 q
         %tmp1neg = sub <16 x i8> zeroinitializer, %a
         %b = icmp slt <16 x i8> %a, zeroinitializer
@@ -37,7 +37,7 @@ define <16 x i8> @test4(<16 x i8> %a) nounwind {
 }
 
 define <4 x i32> @test5(<4 x i32> %a) nounwind {
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: vabs.s32 q
         %tmp1neg = sub <4 x i32> zeroinitializer, %a
         %b = icmp sle <4 x i32> %a, zeroinitializer
@@ -46,7 +46,7 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind {
 }
 
 define <2 x i32> @test6(<2 x i32> %a) nounwind {
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: vabs.s32 d
         %tmp1neg = sub <2 x i32> zeroinitializer, %a
         %b = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
@@ -55,7 +55,7 @@ define <2 x i32> @test6(<2 x i32> %a) nounwind {
 }
 
 define <2 x i32> @test7(<2 x i32> %a) nounwind {
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: vabs.s32 d
         %tmp1neg = sub <2 x i32> zeroinitializer, %a
         %b = icmp sge <2 x i32> %a, zeroinitializer
@@ -64,7 +64,7 @@ define <2 x i32> @test7(<2 x i32> %a) nounwind {
 }
 
 define <4 x i16> @test8(<4 x i16> %a) nounwind {
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: vabs.s16 d
         %tmp1neg = sub <4 x i16> zeroinitializer, %a
         %b = icmp sgt <4 x i16> %a, zeroinitializer
@@ -73,7 +73,7 @@ define <4 x i16> @test8(<4 x i16> %a) nounwind {
 }
 
 define <8 x i8> @test9(<8 x i8> %a) nounwind {
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: vabs.s8 d
         %tmp1neg = sub <8 x i8> zeroinitializer, %a
         %b = icmp slt <8 x i8> %a, zeroinitializer
@@ -82,7 +82,7 @@ define <8 x i8> @test9(<8 x i8> %a) nounwind {
 }
 
 define <2 x i32> @test10(<2 x i32> %a) nounwind {
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: vabs.s32 d
         %tmp1neg = sub <2 x i32> zeroinitializer, %a
         %b = icmp sle <2 x i32> %a, zeroinitializer
diff --git a/test/CodeGen/ARM/no-fpu.ll b/test/CodeGen/ARM/no-fpu.ll
new file mode 100644
index 000000000000..fff4bccb80e9
--- /dev/null
+++ b/test/CodeGen/ARM/no-fpu.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,-vfp2 | FileCheck --check-prefix=NONEON-NOVFP %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon | FileCheck --check-prefix=NONEON %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-vfp2 | FileCheck --check-prefix=NOVFP %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,+vfp2 | FileCheck --check-prefix=NONEON-VFP %s
+
+; Check no NEON instructions are selected when feature is disabled.
+define void @neonop(i64* nocapture readonly %a, i64* nocapture %b) #0 {
+  %1 = bitcast i64* %a to <2 x i64>*
+  %wide.load = load <2 x i64>* %1, align 8
+  ; NONEON-NOVFP-NOT: vld1.64
+  ; NONEON-NOT: vld1.64
+  %add = add <2 x i64> %wide.load, %wide.load
+  ; NONEON-NOVFP-NOT: vadd.i64
+  ; NONEON-NOT: vadd.i64
+  %2 = bitcast i64* %b to <2 x i64>*
+  store <2 x i64> %add, <2 x i64>* %2, align 8
+  ; NONEON-NOVFP-NOT: vst1.64
+  ; NONEON-NOT: vst1.64
+  ret void
+}
+
+; Likewise with VFP instructions.
+define double @fpmult(double %a, double %b) {
+  %res = fmul double %a, %b
+  ; NONEON-NOVFP-NOT: vmov
+  ; NONEON-NOVFP-NOT: vmul.f64
+  ; NOVFP-NOT: vmov
+  ; NOVFP-NOT: vmul.f64
+  ; NONEON-VFP: vmov
+  ; NONEON-VFP: vmul.f64
+  ret double %res
+}
+
diff --git a/test/CodeGen/ARM/noreturn.ll b/test/CodeGen/ARM/noreturn.ll
new file mode 100644
index 000000000000..4c876cec9c10
--- /dev/null
+++ b/test/CodeGen/ARM/noreturn.ll
@@ -0,0 +1,50 @@
+; RUN: llc -O3 -o - %s | FileCheck %s
+; Test case from PR16882.
+target triple = "thumbv7s-apple-ios"
+
+define i32 @test1() {
+; CHECK-LABEL: @test1
+; CHECK-NOT: push
+entry:
+  tail call void @overflow() #0
+  unreachable
+}
+
+; Function Attrs: noreturn nounwind
+declare void @overflow() #0
+
+define i32 @test2(i32 %x, i32 %y) {
+; CHECK-LABEL: @test2
+; CHECK-NOT: push
+; CHECK-NOT: pop
+entry:
+  %conv = sext i32 %x to i64
+  %conv1 = sext i32 %y to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %conv2 = trunc i64 %mul to i32
+  %conv3 = sext i32 %conv2 to i64
+  %cmp = icmp eq i64 %mul, %conv3
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @overflow() #0
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret i32 %conv2
+}
+
+; Test case for PR17825.
+define i32 @test3() {
+; CHECK-LABEL: @test3
+; CHECK: push
+entry:
+  tail call void @overflow_with_unwind() #1
+  unreachable
+}
+
+; Function Attrs: noreturn
+declare void @overflow_with_unwind() #1
+
+attributes #0 = { noreturn nounwind }
+attributes #1 = { noreturn }
diff --git a/test/CodeGen/ARM/optselect-regclass.ll b/test/CodeGen/ARM/optselect-regclass.ll
new file mode 100644
index 000000000000..1aa452089646
--- /dev/null
+++ b/test/CodeGen/ARM/optselect-regclass.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs
+%union.opcode.0.2.5.8.15.28 = type { i32 }
+
+@opcode = external global %union.opcode.0.2.5.8.15.28, align 4
+@operands = external hidden global [50 x i8], align 4
+@.str86 = external hidden unnamed_addr constant [13 x i8], align 1
+
+; Function Attrs: nounwind ssp
+define void @xfr() {
+entry:
+  %bf.load4 = load i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
+  %bf.clear10 = and i32 %bf.load4, 65535
+  %and11 = and i32 %bf.load4, 32768
+  %tobool12 = icmp ne i32 %and11, 0
+  %cond13 = select i1 %tobool12, i32 1073676288, i32 0
+  %or = or i32 %cond13, %bf.clear10
+  %shl = shl nuw i32 %or, 2
+  %add = add i32 0, %shl
+  tail call void (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* getelementptr inbounds ([50 x i8]* @operands, i32 0, i32 0), i32 0, i32 50, i8* getelementptr inbounds ([13 x i8]* @.str86, i32 0, i32 0), i32 undef, i32 undef, i32 %add)
+  ret void
+}
+
+declare void @__sprintf_chk(i8*, i32, i32, i8*, ...)
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index 90151767b919..fbc115518f88 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -78,11 +78,34 @@ define i32 @test7(i32 %X, i32 %Y) {
 	ret i32 %tmp57
 }
 
+; Arithmetic and logic right shift does not have the same semantics if shifting
+; by more than 16 in this context.
+
 ; CHECK: test8
-; CHECK: pkhtb   r0, r0, r1, asr #22
+; CHECK-NOT: pkhtb   r0, r0, r1, asr #22
 define i32 @test8(i32 %X, i32 %Y) {
 	%tmp1 = and i32 %X, -65536
 	%tmp3 = lshr i32 %Y, 22
 	%tmp57 = or i32 %tmp3, %tmp1
 	ret i32 %tmp57
 }
+
+; CHECK-LABEL: test9:
+; CHECK: pkhtb r0, r0, r1, asr #16
+define i32 @test9(i32 %src1, i32 %src2) {
+entry:
+    %tmp = and i32 %src1, -65536
+    %tmp2 = lshr i32 %src2, 16
+    %tmp3 = or i32 %tmp, %tmp2
+    ret i32 %tmp3
+}
+
+; CHECK-LABEL: test10:
+; CHECK: pkhtb r0, r0, r1, asr #17
+define i32 @test10(i32 %src1, i32 %src2) {
+entry:
+    %tmp = and i32 %src1, -65536
+    %tmp2 = ashr i32 %src2, 17
+    %tmp3 = or i32 %tmp, %tmp2
+    ret i32 %tmp3
+}
diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll
index e72d51f06d4c..3c6a187d99a0 100644
--- a/test/CodeGen/ARM/peephole-bitcast.ll
+++ b/test/CodeGen/ARM/peephole-bitcast.ll
@@ -10,7 +10,7 @@
 
 define void @t(float %x) nounwind ssp {
 entry:
-; CHECK:     t:
+; CHECK-LABEL:     t:
 ; CHECK-NOT: vmov
 ; CHECK:     bl
   %0 = bitcast float %x to i32
diff --git a/test/CodeGen/ARM/pic.ll b/test/CodeGen/ARM/pic.ll
new file mode 100644
index 000000000000..9fc7a63bd687
--- /dev/null
+++ b/test/CodeGen/ARM/pic.ll
@@ -0,0 +1,23 @@
+; Check the function call in PIC relocation model.
+
+; If the relocation model is PIC, then the "bl" instruction for the function
+; call to the external function should come with PLT fixup type.
+
+; RUN:  llc < %s -mtriple=armv7-unknown-linux-gnueabi \
+; RUN:           -relocation-model=pic -fast-isel -verify-machineinstrs \
+; RUN:    | FileCheck %s
+
+define void @test() {
+entry:
+
+  %0 = call i32 @get()
+; CHECK: bl get(PLT)
+
+  call void @put(i32 %0)
+; CHECK: bl put(PLT)
+
+  ret void
+}
+
+declare i32 @get()
+declare void @put(i32)
diff --git a/test/CodeGen/ARM/popcnt.ll b/test/CodeGen/ARM/popcnt.ll
index 0b9c9467c206..bdf793d91b0a 100644
--- a/test/CodeGen/ARM/popcnt.ll
+++ b/test/CodeGen/ARM/popcnt.ll
@@ -2,7 +2,7 @@
 ; Implement ctpop with vcnt
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
-;CHECK: vcnt8:
+;CHECK-LABEL: vcnt8:
 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
@@ -10,7 +10,7 @@ define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 }
 
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
-;CHECK: vcntQ8:
+;CHECK-LABEL: vcntQ8:
 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
@@ -18,7 +18,7 @@ define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind {
-; CHECK: vcnt16:
+; CHECK-LABEL: vcnt16:
 ; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
@@ -30,7 +30,7 @@ define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind {
 }
 
 define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind {
-; CHECK: vcntQ16:
+; CHECK-LABEL: vcntQ16:
 ; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
@@ -42,7 +42,7 @@ define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind {
-; CHECK: vcnt32:
+; CHECK-LABEL: vcnt32:
 ; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
@@ -57,7 +57,7 @@ define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind {
 }
 
 define <4 x i32> @vcntQ32(<4 x i32>* %A) nounwind {
-; CHECK: vcntQ32:
+; CHECK-LABEL: vcntQ32:
 ; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
@@ -79,7 +79,7 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
 
 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
-;CHECK: vclz8:
+;CHECK-LABEL: vclz8:
 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
@@ -87,7 +87,7 @@ define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
-;CHECK: vclz16:
+;CHECK-LABEL: vclz16:
 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
@@ -95,7 +95,7 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
-;CHECK: vclz32:
+;CHECK-LABEL: vclz32:
 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
@@ -103,7 +103,7 @@ define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 }
 
 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
-;CHECK: vclzQ8:
+;CHECK-LABEL: vclzQ8:
 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
@@ -111,7 +111,7 @@ define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
-;CHECK: vclzQ16:
+;CHECK-LABEL: vclzQ16:
 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
@@ -119,7 +119,7 @@ define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
-;CHECK: vclzQ32:
+;CHECK-LABEL: vclzQ32:
 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
@@ -135,7 +135,7 @@ declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
 
 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
-;CHECK: vclss8:
+;CHECK-LABEL: vclss8:
 ;CHECK: vcls.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
@@ -143,7 +143,7 @@ define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
-;CHECK: vclss16:
+;CHECK-LABEL: vclss16:
 ;CHECK: vcls.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
@@ -151,7 +151,7 @@ define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
-;CHECK: vclss32:
+;CHECK-LABEL: vclss32:
 ;CHECK: vcls.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
@@ -159,7 +159,7 @@ define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 }
 
 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
-;CHECK: vclsQs8:
+;CHECK-LABEL: vclsQs8:
 ;CHECK: vcls.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
@@ -167,7 +167,7 @@ define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
-;CHECK: vclsQs16:
+;CHECK-LABEL: vclsQs16:
 ;CHECK: vcls.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
@@ -175,7 +175,7 @@ define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
-;CHECK: vclsQs32:
+;CHECK-LABEL: vclsQs32:
 ;CHECK: vcls.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
diff --git a/test/CodeGen/ARM/prefetch-thumb.ll b/test/CodeGen/ARM/prefetch-thumb.ll
new file mode 100644
index 000000000000..e6f6ae8d18b2
--- /dev/null
+++ b/test/CodeGen/ARM/prefetch-thumb.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb -mattr=+v7         | FileCheck %s -check-prefix=THUMB2
+; TODO: This test case will be merged back into prefetch.ll when ARM mode issue is solved.
+
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
+
+define void @t6() {
+entry:
+;ARM: t6:
+;ARM: pld [sp]
+;ARM: pld [sp, #50]
+
+;THUMB2: t6:
+;THUMB2: pld [sp]
+;THUMB2: pld [sp, #50]
+
+%red = alloca [100 x i8], align 1
+%0 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 0
+%1 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 50
+call void @llvm.prefetch(i8* %0, i32 0, i32 3, i32 1)
+call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
+ret void
+}
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 9c8ff2b40962..5badb3114814 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -6,15 +6,15 @@
 
 define void @t1(i8* %ptr) nounwind  {
 entry:
-; ARM: t1:
+; ARM-LABEL: t1:
 ; ARM-NOT: pldw [r0]
 ; ARM: pld [r0]
 
-; ARM-MP: t1:
+; ARM-MP-LABEL: t1:
 ; ARM-MP: pldw [r0]
 ; ARM-MP: pld [r0]
 
-; THUMB2: t1:
+; THUMB2-LABEL: t1:
 ; THUMB2-NOT: pldw [r0]
 ; THUMB2: pld [r0]
   tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
@@ -24,10 +24,10 @@ entry:
 
 define void @t2(i8* %ptr) nounwind  {
 entry:
-; ARM: t2:
+; ARM-LABEL: t2:
 ; ARM: pld [r0, #1023]
 
-; THUMB2: t2:
+; THUMB2-LABEL: t2:
 ; THUMB2: pld [r0, #1023]
   %tmp = getelementptr i8* %ptr, i32 1023
   tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 )
@@ -36,10 +36,10 @@ entry:
 
 define void @t3(i32 %base, i32 %offset) nounwind  {
 entry:
-; ARM: t3:
+; ARM-LABEL: t3:
 ; ARM: pld [r0, r1, lsr #2]
 
-; THUMB2: t3:
+; THUMB2-LABEL: t3:
 ; THUMB2: lsrs r1, r1, #2
 ; THUMB2: pld [r0, r1]
   %tmp1 = lshr i32 %offset, 2
@@ -51,10 +51,10 @@ entry:
 
 define void @t4(i32 %base, i32 %offset) nounwind  {
 entry:
-; ARM: t4:
+; ARM-LABEL: t4:
 ; ARM: pld [r0, r1, lsl #2]
 
-; THUMB2: t4:
+; THUMB2-LABEL: t4:
 ; THUMB2: pld [r0, r1, lsl #2]
   %tmp1 = shl i32 %offset, 2
   %tmp2 = add i32 %base, %tmp1
@@ -67,10 +67,10 @@ declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
 
 define void @t5(i8* %ptr) nounwind  {
 entry:
-; ARM: t5:
+; ARM-LABEL: t5:
 ; ARM: pli [r0]
 
-; THUMB2: t5:
+; THUMB2-LABEL: t5:
 ; THUMB2: pli [r0]
   tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 0 )
   ret void
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index 94578d82fddc..e48c292db466 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -2,7 +2,7 @@
 ;
 ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
 ; CHECK: .Lfoo:
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: bl .Lfoo
 ; CHECK: .long .Lbaz
 ; CHECK: .Lbaz:
diff --git a/test/CodeGen/ARM/readcyclecounter.ll b/test/CodeGen/ARM/readcyclecounter.ll
new file mode 100644
index 000000000000..db47ad355d09
--- /dev/null
+++ b/test/CodeGen/ARM/readcyclecounter.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -mattr=-perfmon < %s | FileCheck %s --check-prefix=CHECK-NO-PERFMON
+; RUN: llc -mtriple=armv6-none-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK-NO-PERFMON
+
+; The performance monitor we're looking for is an ARMv7 extension. It should be
+; possible to disable it, but realistically present on at least every v7-A
+; processor (but not on v6, at least by default).
+
+declare i64 @llvm.readcyclecounter()
+
+define i64 @get_count() {
+  %val = call i64 @llvm.readcyclecounter()
+  ret i64 %val
+
+  ; As usual, exact registers only sort of matter but the cycle-count had better
+  ; end up in r0 in the end.
+
+; CHECK: mrc p15, #0, r0, c9, c13, #0
+; CHECK: {{movs?}} r1, #0
+
+; CHECK-NO-PERFMON: {{movs?}} r0, #0
+; CHECK-NO-PERFMON: {{movs?}} r1, #0
+}
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index fd2083cf9f41..25484f484853 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -11,7 +11,7 @@
 
 define void @t1(i16* %i_ptr, i16* %o_ptr, %struct.int32x4_t* nocapture %vT0ptr, %struct.int32x4_t* nocapture %vT1ptr) nounwind {
 entry:
-; CHECK:        t1:
+; CHECK-LABEL:        t1:
 ; CHECK:        vld1.16
 ; CHECK-NOT:    vmov d
 ; CHECK:        vmovl.s16
@@ -44,7 +44,7 @@ entry:
 
 define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, %struct.int16x8_t* nocapture %vT1ptr) nounwind {
 entry:
-; CHECK:        t2:
+; CHECK-LABEL:        t2:
 ; CHECK:        vld1.16
 ; CHECK-NOT:    vmov
 ; CHECK:        vmul.i16
@@ -73,7 +73,7 @@ entry:
 }
 
 define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
-; CHECK:        t3:
+; CHECK-LABEL:        t3:
 ; CHECK:        vld3.8
 ; CHECK:        vmul.i8
 ; CHECK:        vmov r
@@ -92,7 +92,7 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
 
 define void @t4(i32* %in, i32* %out) nounwind {
 entry:
-; CHECK:        t4:
+; CHECK-LABEL:        t4:
 ; CHECK:        vld2.32
 ; CHECK-NOT:    vmov
 ; CHECK:        vld2.32
@@ -135,7 +135,7 @@ return2:
 }
 
 define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
-; CHECK:        t5:
+; CHECK-LABEL:        t5:
 ; CHECK:        vld1.32
 ; How can FileCheck match Q and D registers? We need a lisp interpreter.
 ; CHECK:        vorr {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
@@ -153,7 +153,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
-; CHECK:        t6:
+; CHECK-LABEL:        t6:
 ; CHECK:        vldr
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
@@ -167,7 +167,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 
 define void @t7(i32* %iptr, i32* %optr) nounwind {
 entry:
-; CHECK:        t7:
+; CHECK-LABEL:        t7:
 ; CHECK:        vld2.32
 ; CHECK:        vst2.32
 ; CHECK:        vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}},
@@ -189,7 +189,7 @@ entry:
 
 ; PR7156
 define arm_aapcs_vfpcc i32 @t8() nounwind {
-; CHECK: t8:
+; CHECK-LABEL: t8:
 ; CHECK: vrsqrte.f32 q8, q8
 bb.nph55.bb.nph55.split_crit_edge:
   br label %bb3
@@ -238,11 +238,10 @@ bb14:                                             ; preds = %bb6
 
 ; PR7157
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
-; CHECK:        t9:
-; CHECK:        vldr
-; CHECK-NOT:    vmov d{{.*}}, d16
-; CHECK:        vmov.i32 d17
+; CHECK-LABEL:        t9:
+; CHECK: vmov.i32 d16, #0x0
 ; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT:   vorr d17, d16, d16
 ; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
@@ -270,7 +269,7 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; PR7162
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
-; CHECK: t10:
+; CHECK-LABEL: t10:
 ; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
 ; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
 ; CHECK: vadd.f32 q8, q8, q8
diff --git a/test/CodeGen/ARM/ret_sret_vector.ll b/test/CodeGen/ARM/ret_sret_vector.ll
index 9bb3519555e8..f9c46262b6a6 100644
--- a/test/CodeGen/ARM/ret_sret_vector.ll
+++ b/test/CodeGen/ARM/ret_sret_vector.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7-apple-ios3.0.0"
 define <4 x double> @PR14337(<4 x double> %a, <4 x double> %b) {
   %foo = fadd <4 x double>  %a, %b
   ret <4 x double> %foo
-; CHECK: PR14337:
+; CHECK-LABEL: PR14337:
 ; CHECK: vst1.64
 ; CHECK: vst1.64
 }
diff --git a/test/CodeGen/ARM/returned-ext.ll b/test/CodeGen/ARM/returned-ext.ll
index 670b12f249d4..d2cdeb096a88 100644
--- a/test/CodeGen/ARM/returned-ext.ll
+++ b/test/CodeGen/ARM/returned-ext.ll
@@ -10,13 +10,13 @@ declare zeroext i16 @bothzext16(i16 zeroext returned %x)
 ; The zeroext param attribute below is meant to have no effect
 define i16 @test_identity(i16 zeroext %x) {
 entry:
-; CHECKELF: test_identity:
+; CHECKELF-LABEL: test_identity:
 ; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
 ; CHECKELF: bl identity16
 ; CHECKELF: uxth r0, r0
 ; CHECKELF: bl identity32
 ; CHECKELF: mov r0, [[SAVEX]]
-; CHECKT2D: test_identity:
+; CHECKT2D-LABEL: test_identity:
 ; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
 ; CHECKT2D: blx _identity16
 ; CHECKT2D: uxth r0, r0
@@ -32,7 +32,7 @@ entry:
 ; x is not considered equal to %call (see SelectionDAGBuilder.cpp)
 define i16 @test_matched_ret(i16 %x) {
 entry:
-; CHECKELF: test_matched_ret:
+; CHECKELF-LABEL: test_matched_ret:
 
 ; This shouldn't be required
 ; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
@@ -44,7 +44,7 @@ entry:
 ; This shouldn't be required
 ; CHECKELF: mov r0, [[SAVEX]]
 
-; CHECKT2D: test_matched_ret:
+; CHECKT2D-LABEL: test_matched_ret:
 
 ; This shouldn't be required
 ; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
@@ -64,13 +64,13 @@ entry:
 
 define i16 @test_mismatched_ret(i16 %x) {
 entry:
-; CHECKELF: test_mismatched_ret:
+; CHECKELF-LABEL: test_mismatched_ret:
 ; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
 ; CHECKELF: bl retzext16
 ; CHECKELF: sxth r0, {{r[0-9]+}}
 ; CHECKELF: bl identity32
 ; CHECKELF: mov r0, [[SAVEX]]
-; CHECKT2D: test_mismatched_ret:
+; CHECKT2D-LABEL: test_mismatched_ret:
 ; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
 ; CHECKT2D: blx _retzext16
 ; CHECKT2D: sxth r0, {{r[0-9]+}}
@@ -84,13 +84,13 @@ entry:
 
 define i16 @test_matched_paramext(i16 %x) {
 entry:
-; CHECKELF: test_matched_paramext:
+; CHECKELF-LABEL: test_matched_paramext:
 ; CHECKELF: uxth r0, r0
 ; CHECKELF: bl paramzext16
 ; CHECKELF: uxth r0, r0
 ; CHECKELF: bl identity32
 ; CHECKELF: b paramzext16
-; CHECKT2D: test_matched_paramext:
+; CHECKT2D-LABEL: test_matched_paramext:
 ; CHECKT2D: uxth r0, r0
 ; CHECKT2D: blx _paramzext16
 ; CHECKT2D: uxth r0, r0
@@ -113,11 +113,11 @@ entry:
 ; optimization, don't bother checking: just verify that the calls are made
 ; in the correct order as a basic sanity check
 
-; CHECKELF: test_matched_paramext2:
+; CHECKELF-LABEL: test_matched_paramext2:
 ; CHECKELF: bl paramzext16
 ; CHECKELF: bl identity32
 ; CHECKELF: b paramzext16
-; CHECKT2D: test_matched_paramext2:
+; CHECKT2D-LABEL: test_matched_paramext2:
 ; CHECKT2D: blx _paramzext16
 ; CHECKT2D: blx _identity32
 ; CHECKT2D: b.w _paramzext16
@@ -133,7 +133,7 @@ entry:
 
 define i16 @test_matched_bothext(i16 %x) {
 entry:
-; CHECKELF: test_matched_bothext:
+; CHECKELF-LABEL: test_matched_bothext:
 ; CHECKELF: uxth r0, r0
 ; CHECKELF: bl bothzext16
 ; CHECKELF-NOT: uxth r0, r0
@@ -141,7 +141,7 @@ entry:
 ; FIXME: Tail call should be OK here
 ; CHECKELF: bl identity32
 
-; CHECKT2D: test_matched_bothext:
+; CHECKT2D-LABEL: test_matched_bothext:
 ; CHECKT2D: uxth r0, r0
 ; CHECKT2D: blx _bothzext16
 ; CHECKT2D-NOT: uxth r0, r0
@@ -157,14 +157,14 @@ entry:
 
 define i16 @test_mismatched_bothext(i16 %x) {
 entry:
-; CHECKELF: test_mismatched_bothext:
+; CHECKELF-LABEL: test_mismatched_bothext:
 ; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
 ; CHECKELF: uxth r0, {{r[0-9]+}}
 ; CHECKELF: bl bothzext16
 ; CHECKELF: sxth r0, [[SAVEX]]
 ; CHECKELF: bl identity32
 ; CHECKELF: mov r0, [[SAVEX]]
-; CHECKT2D: test_mismatched_bothext:
+; CHECKT2D-LABEL: test_mismatched_bothext:
 ; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
 ; CHECKT2D: uxth r0, {{r[0-9]+}}
 ; CHECKT2D: blx _bothzext16
diff --git a/test/CodeGen/ARM/returned-trunc-tail-calls.ll b/test/CodeGen/ARM/returned-trunc-tail-calls.ll
new file mode 100644
index 000000000000..59467271a7a7
--- /dev/null
+++ b/test/CodeGen/ARM/returned-trunc-tail-calls.ll
@@ -0,0 +1,111 @@
+; RUN: llc < %s -mtriple=armv7 -arm-tail-calls | FileCheck %s
+
+declare i16 @ret16(i16 returned)
+declare i32 @ret32(i32 returned)
+
+define i32 @test1(i32 %val) {
+; CHECK-LABEL: test1:
+; CHECK: bl {{_?}}ret16
+  %in = trunc i32 %val to i16
+  tail call i16 @ret16(i16 returned %in)
+  ret i32 %val
+}
+
+define i16 @test2(i32 %val) {
+; CHECK-LABEL: test2:
+; CHECK: b {{_?}}ret16
+  %in = trunc i32 %val to i16
+  tail call i16 @ret16(i16 returned %in)
+  ret i16 %in
+}
+
+declare {i32, i8} @take_i32_i8({i32, i8} returned)
+define { i8, i8 } @test_nocommon_value({i32, i32} %in) {
+; CHECK-LABEL: test_nocommon_value:
+; CHECK: b {{_?}}take_i32_i8
+
+  %first = extractvalue {i32, i32} %in, 0
+  %first.trunc = trunc i32 %first to i8
+
+  %second = extractvalue {i32, i32} %in, 1
+  %second.trunc = trunc i32 %second to i8
+
+  %tmp = insertvalue {i32, i8} undef, i32 %first, 0
+  %callval = insertvalue {i32, i8} %tmp, i8 %second.trunc, 1
+  tail call {i32, i8} @take_i32_i8({i32, i8} returned %callval)
+
+  %restmp = insertvalue {i8, i8} undef, i8 %first.trunc, 0
+  %res = insertvalue {i8, i8} %restmp, i8 %second.trunc, 1
+  ret {i8, i8} %res
+}
+
+declare {i32, {i32, i32}} @give_i32_i32_i32()
+define {{i32, i32}, i32} @test_structs_different_shape() {
+; CHECK-LABEL: test_structs_different_shape:
+; CHECK: b {{_?}}give_i32_i32_i32
+  %val = tail call {i32, {i32, i32}} @give_i32_i32_i32()
+
+  %first = extractvalue {i32, {i32, i32}} %val, 0
+  %second = extractvalue {i32, {i32, i32}} %val, 1, 0
+  %third = extractvalue {i32, {i32, i32}} %val, 1, 1
+
+  %restmp = insertvalue {{i32, i32}, i32} undef, i32 %first, 0, 0
+  %reseventmper = insertvalue {{i32, i32}, i32} %restmp, i32 %second, 0, 1
+  %res = insertvalue {{i32, i32}, i32} %reseventmper, i32 %third, 1
+
+  ret {{i32, i32}, i32} %res
+}
+
+define i32 @test_undef_asymmetry() {
+; CHECK: test_undef_asymmetry:
+; CHECK: bl {{_?}}ret32
+; CHECK-NOT: jmp
+  tail call i32 @ret32(i32 returned undef)
+  ret i32 2
+}
+
+define {{}, {{}, i32, {}}, [1 x i32]} @evil_empty_aggregates() {
+; CHECK-LABEL: evil_empty_aggregates:
+; CHECK: b {{_?}}give_i32_i32_i32
+  %agg = tail call {i32, {i32, i32}} @give_i32_i32_i32()
+
+  %first = extractvalue {i32, {i32, i32}} %agg, 0
+  %second = extractvalue {i32, {i32, i32}} %agg, 1, 0
+
+  %restmp = insertvalue {{}, {{}, i32, {}}, [1 x i32]} undef, i32 %first, 1, 1
+  %res = insertvalue {{}, {{}, i32, {}}, [1 x i32]} %restmp, i32 %second, 2, 0
+  ret {{}, {{}, i32, {}}, [1 x i32]} %res
+}
+
+define i32 @structure_is_unimportant() {
+; CHECK-LABEL: structure_is_unimportant:
+; CHECK: b {{_?}}give_i32_i32_i32
+  %val = tail call {i32, {i32, i32}} @give_i32_i32_i32()
+
+  %res = extractvalue {i32, {i32, i32}} %val, 0
+  ret i32 %res
+}
+
+declare i64 @give_i64()
+define i64 @direct_i64_ok() {
+; CHECK-LABEL: direct_i64_ok:
+; CHECK: b {{_?}}give_i64
+  %val = tail call i64 @give_i64()
+  ret i64 %val
+}
+
+declare {i64, i32} @give_i64_i32()
+define {i32, i32} @trunc_i64_not_ok() {
+; CHECK-LABEL: trunc_i64_not_ok:
+; CHECK: bl {{_?}}give_i64_i32
+  %agg = tail call {i64, i32} @give_i64_i32()
+
+  %first = extractvalue {i64, i32} %agg, 0
+  %second = extractvalue {i64, i32} %agg, 1
+  %first.trunc = trunc i64 %first to i32
+
+  %tmp = insertvalue {i32, i32} undef, i32 %first.trunc, 0
+  %ret = insertvalue {i32, i32} %tmp, i32 %second, 1
+
+  ret {i32, i32} %ret
+}
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 6bb67431198a..6c380aee3d93 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -32,7 +32,7 @@ define i32 @test2(i32 %X) nounwind {
 ; rdar://9147637
 define i32 @test3(i16 zeroext %a) nounwind {
 entry:
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: revsh r0, r0
   %0 = tail call i16 @llvm.bswap.i16(i16 %a)
   %1 = sext i16 %0 to i32
@@ -43,7 +43,7 @@ declare i16 @llvm.bswap.i16(i16) nounwind readnone
 
 define i32 @test4(i16 zeroext %a) nounwind {
 entry:
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: revsh r0, r0
   %conv = zext i16 %a to i32
   %shr9 = lshr i16 %a, 8
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
index d29693e4cf92..36fbd1939c55 100644
--- a/test/CodeGen/ARM/sbfx.ll
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(i32 %a) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sbfx r0, r0, #0, #20
     %tmp = shl i32 %a, 12
     %tmp2 = ashr i32 %tmp, 12
@@ -11,7 +11,7 @@ entry:
 
 define i32 @f2(i32 %a) {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: bfc	r0, #20, #12
     %tmp = shl i32 %a, 12
     %tmp2 = lshr i32 %tmp, 12
@@ -20,7 +20,7 @@ entry:
 
 define i32 @f3(i32 %a) {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sbfx r0, r0, #5, #3
     %tmp = shl i32 %a, 24
     %tmp2 = ashr i32 %tmp, 29
@@ -29,7 +29,7 @@ entry:
 
 define i32 @f4(i32 %a) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ubfx r0, r0, #5, #3
     %tmp = shl i32 %a, 24
     %tmp2 = lshr i32 %tmp, 29
@@ -38,7 +38,7 @@ entry:
 
 define i32 @f5(i32 %a) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: sbfx
 ; CHECK: bx
     %tmp = shl i32 %a, 3
diff --git a/test/CodeGen/ARM/section-name.ll b/test/CodeGen/ARM/section-name.ll
new file mode 100644
index 000000000000..a0aad4733bc8
--- /dev/null
+++ b/test/CodeGen/ARM/section-name.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi | FileCheck %s
+
+; CHECK: .text
+; CHECK: .globl test1
+; CHECK: .type test1,%function
+define void @test1() {
+entry:
+  ret void
+}
+
+; CHECK: .section .test2,"ax",%progbits
+; CHECK: .globl test2
+; CHECK: .type test2,%function
+define void @test2() section ".test2" {
+entry:
+  ret void
+}
+
+; CHECK: .section .text.test3,"axG",%progbits,test3,comdat
+; CHECK: .weak test3
+; CHECK: .type test3,%function
+define linkonce_odr void @test3() {
+entry:
+  ret void
+}
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index c9ac66acbfd8..6f4bfb81d51b 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -4,18 +4,18 @@
 
 define i32 @t1(i32 %c) nounwind readnone {
 entry:
-; ARM: t1:
+; ARM-LABEL: t1:
 ; ARM: mov [[R1:r[0-9]+]], #101
 ; ARM: orr [[R1b:r[0-9]+]], [[R1]], #256
-; ARM: movgt r0, #123
+; ARM: movgt {{r[0-1]}}, #123
 
-; ARMT2: t1:
-; ARMT2: movw r0, #357
-; ARMT2: movgt r0, #123
+; ARMT2-LABEL: t1:
+; ARMT2: movw [[R:r[0-1]]], #357
+; ARMT2: movwgt [[R]], #123
 
-; THUMB2: t1:
-; THUMB2: movw r0, #357
-; THUMB2: movgt r0, #123
+; THUMB2-LABEL: t1:
+; THUMB2: movw [[R:r[0-1]]], #357
+; THUMB2: movgt [[R]], #123
 
   %0 = icmp sgt i32 %c, 1
   %1 = select i1 %0, i32 123, i32 357
@@ -24,18 +24,18 @@ entry:
 
 define i32 @t2(i32 %c) nounwind readnone {
 entry:
-; ARM: t2:
-; ARM: mov r0, #123
-; ARM: movgt r0, #101
-; ARM: orrgt r0, r0, #256
+; ARM-LABEL: t2:
+; ARM: mov [[R:r[0-9]+]], #101
+; ARM: orr [[R]], [[R]], #256
+; ARM: movle [[R]], #123
 
-; ARMT2: t2:
-; ARMT2: mov r0, #123
-; ARMT2: movwgt r0, #357
+; ARMT2-LABEL: t2:
+; ARMT2: mov [[R:r[0-1]]], #123
+; ARMT2: movwgt [[R]], #357
 
-; THUMB2: t2:
-; THUMB2: mov{{(s|\.w)}} r0, #123
-; THUMB2: movwgt r0, #357
+; THUMB2-LABEL: t2:
+; THUMB2: mov{{(s|\.w)}} [[R:r[0-1]]], #123
+; THUMB2: movwgt [[R]], #357
 
   %0 = icmp sgt i32 %c, 1
   %1 = select i1 %0, i32 357, i32 123
@@ -44,17 +44,17 @@ entry:
 
 define i32 @t3(i32 %a) nounwind readnone {
 entry:
-; ARM: t3:
-; ARM: mov r0, #0
-; ARM: moveq r0, #1
+; ARM-LABEL: t3:
+; ARM: mov [[R:r[0-1]]], #0
+; ARM: moveq [[R]], #1
 
-; ARMT2: t3:
-; ARMT2: mov r0, #0
-; ARMT2: moveq r0, #1
+; ARMT2-LABEL: t3:
+; ARMT2: mov [[R:r[0-1]]], #0
+; ARMT2: movweq [[R]], #1
 
-; THUMB2: t3:
-; THUMB2: mov{{(s|\.w)}} r0, #0
-; THUMB2: moveq r0, #1
+; THUMB2-LABEL: t3:
+; THUMB2: mov{{(s|\.w)}} [[R:r[0-1]]], #0
+; THUMB2: moveq [[R]], #1
   %0 = icmp eq i32 %a, 160
   %1 = zext i1 %0 to i32
   ret i32 %1
@@ -62,15 +62,15 @@ entry:
 
 define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
 entry:
-; ARM: t4:
+; ARM-LABEL: t4:
 ; ARM: ldr
 ; ARM: mov{{lt|ge}}
 
-; ARMT2: t4:
+; ARMT2-LABEL: t4:
 ; ARMT2: movwlt [[R0:r[0-9]+]], #65365
 ; ARMT2: movtlt [[R0]], #65365
 
-; THUMB2: t4:
+; THUMB2-LABEL: t4:
 ; THUMB2: mvnlt [[R0:r[0-9]+]], #11141290
   %0 = icmp slt i32 %a, %b
   %1 = select i1 %0, i32 4283826005, i32 %x
@@ -80,13 +80,13 @@ entry:
 ; rdar://9758317
 define i32 @t5(i32 %a) nounwind {
 entry:
-; ARM: t5:
+; ARM-LABEL: t5:
 ; ARM-NOT: mov
 ; ARM: cmp r0, #1
 ; ARM-NOT: mov
 ; ARM: movne r0, #0
 
-; THUMB2: t5:
+; THUMB2-LABEL: t5:
 ; THUMB2-NOT: mov
 ; THUMB2: cmp r0, #1
 ; THUMB2: it ne
@@ -98,12 +98,12 @@ entry:
 
 define i32 @t6(i32 %a) nounwind {
 entry:
-; ARM: t6:
+; ARM-LABEL: t6:
 ; ARM-NOT: mov
 ; ARM: cmp r0, #0
 ; ARM: movne r0, #1
 
-; THUMB2: t6:
+; THUMB2-LABEL: t6:
 ; THUMB2-NOT: mov
 ; THUMB2: cmp r0, #0
 ; THUMB2: it ne
diff --git a/test/CodeGen/ARM/select-undef.ll b/test/CodeGen/ARM/select-undef.ll
new file mode 100644
index 000000000000..23f7eb8b352f
--- /dev/null
+++ b/test/CodeGen/ARM/select-undef.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mcpu=swift -verify-machineinstrs
+define i32 @func(i32 %arg0, i32 %arg1) {
+entry:
+  %cmp = icmp slt i32 %arg0, 10
+  %v = select i1 %cmp, i32 undef, i32 %arg1
+  ret i32 %v
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 62708ed53d05..ed006d643f87 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
 
 define i32 @f1(i32 %a.s) {
-;CHECK: f1:
+;CHECK-LABEL: f1:
 ;CHECK: moveq
 entry:
     %tmp = icmp eq i32 %a.s, 4
@@ -12,7 +12,7 @@ entry:
 }
 
 define i32 @f2(i32 %a.s) {
-;CHECK: f2:
+;CHECK-LABEL: f2:
 ;CHECK: movgt
 entry:
     %tmp = icmp sgt i32 %a.s, 4
@@ -21,7 +21,7 @@ entry:
 }
 
 define i32 @f3(i32 %a.s, i32 %b.s) {
-;CHECK: f3:
+;CHECK-LABEL: f3:
 ;CHECK: movlt
 entry:
     %tmp = icmp slt i32 %a.s, %b.s
@@ -30,7 +30,7 @@ entry:
 }
 
 define i32 @f4(i32 %a.s, i32 %b.s) {
-;CHECK: f4:
+;CHECK-LABEL: f4:
 ;CHECK: movle
 entry:
     %tmp = icmp sle i32 %a.s, %b.s
@@ -39,7 +39,7 @@ entry:
 }
 
 define i32 @f5(i32 %a.u, i32 %b.u) {
-;CHECK: f5:
+;CHECK-LABEL: f5:
 ;CHECK: movls
 entry:
     %tmp = icmp ule i32 %a.u, %b.u
@@ -48,7 +48,7 @@ entry:
 }
 
 define i32 @f6(i32 %a.u, i32 %b.u) {
-;CHECK: f6:
+;CHECK-LABEL: f6:
 ;CHECK: movhi
 entry:
     %tmp = icmp ugt i32 %a.u, %b.u
@@ -57,10 +57,10 @@ entry:
 }
 
 define double @f7(double %a, double %b) {
-;CHECK: f7:
+;CHECK-LABEL: f7:
 ;CHECK: movlt
-;CHECK: movlt
-;CHECK-VFP: f7:
+;CHECK: movge
+;CHECK-VFP-LABEL: f7:
 ;CHECK-VFP: vmovmi
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
@@ -75,7 +75,7 @@ define double @f7(double %a, double %b) {
 ; into the constant pool based on the value of the "icmp". If we have one "it"
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
-; CHECK-NEON:      _f8:
+; CHECK-NEON-LABEL: f8:
 ; CHECK-NEON:      movw    [[R3:r[0-9]+]], #1123
 ; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
 ; CHECK-NEON-NEXT: cmp     r0, [[R3]]
@@ -94,7 +94,7 @@ define arm_apcscc float @f8(i32 %a) nounwind {
 ; Glue values can only have a single use, but the following test exposed a
 ; case where a SELECT was lowered with 2 uses of a comparison, causing the
 ; scheduler to assert.
-; CHECK-VFP: f9:
+; CHECK-VFP-LABEL: f9:
 
 declare i8* @objc_msgSend(i8*, i8*, ...)
 define void @f9() optsize {
@@ -113,7 +113,7 @@ entry:
   ret void
 }
 
-; CHECK: f10
+; CHECK-LABEL: f10:
 define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ; CHECK-NOT: floatsisf
   %1 = icmp eq i32 %a, %b
@@ -122,7 +122,7 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
   ret float %3
 }
 
-; CHECK: f11
+; CHECK-LABEL: f11:
 define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ; CHECK-NOT: floatsisf
   %1 = icmp eq i32 %a, %b
@@ -130,7 +130,7 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
   ret float %2
 }
 
-; CHECK: f12
+; CHECK-LABEL: f12:
 define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 ; CHECK-NOT: floatunsisf
   %1 = icmp eq i32 %a, %b
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 750780891261..e13504a42a16 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -3,11 +3,11 @@
 ; rdar://8662825
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
-; ARM: t1:
+; ARM-LABEL: t1:
 ; ARM: suble r1, r1, #-2147483647
 ; ARM: mov r0, r1
 
-; T2: t1:
+; T2-LABEL: t1:
 ; T2: mvn r0, #-2147483648
 ; T2: addle r1, r0
 ; T2: mov r0, r1
@@ -18,11 +18,11 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 }
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-; ARM: t2:
+; ARM-LABEL: t2:
 ; ARM: suble r1, r1, #10
 ; ARM: mov r0, r1
 
-; T2: t2:
+; T2-LABEL: t2:
 ; T2: suble r1, #10
 ; T2: mov r0, r1
   %tmp1 = icmp sgt i32 %c, 10
@@ -32,11 +32,11 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 }
 
 define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
-; ARM: t3:
+; ARM-LABEL: t3:
 ; ARM: andge r3, r3, r2
 ; ARM: mov r0, r3
 
-; T2: t3:
+; T2-LABEL: t3:
 ; T2: andge r3, r2
 ; T2: mov r0, r3
   %cond = icmp slt i32 %a, %b
@@ -46,11 +46,11 @@ define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
 }
 
 define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
-; ARM: t4:
+; ARM-LABEL: t4:
 ; ARM: orrge r3, r3, r2
 ; ARM: mov r0, r3
 
-; T2: t4:
+; T2-LABEL: t4:
 ; T2: orrge r3, r2
 ; T2: mov r0, r3
   %cond = icmp slt i32 %a, %b
@@ -61,11 +61,11 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
 
 define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
 entry:
-; ARM: t5:
+; ARM-LABEL: t5:
 ; ARM-NOT: moveq
 ; ARM: orreq r2, r2, #1
 
-; T2: t5:
+; T2-LABEL: t5:
 ; T2-NOT: moveq
 ; T2: orreq r2, r2, #1
   %tmp1 = icmp eq i32 %a, %b
@@ -75,11 +75,11 @@ entry:
 }
 
 define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-; ARM: t6:
+; ARM-LABEL: t6:
 ; ARM-NOT: movge
 ; ARM: eorlt r3, r3, r2
 
-; T2: t6:
+; T2-LABEL: t6:
 ; T2-NOT: movge
 ; T2: eorlt r3, r2
   %cond = icmp slt i32 %a, %b
@@ -90,11 +90,11 @@ define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 
 define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
 entry:
-; ARM: t7:
+; ARM-LABEL: t7:
 ; ARM-NOT: lsleq
 ; ARM: andeq r2, r2, r2, lsl #1
 
-; T2: t7:
+; T2-LABEL: t7:
 ; T2-NOT: lsleq.w
 ; T2: andeq.w r2, r2, r2, lsl #1
   %tmp1 = shl i32 %c, 1
@@ -106,11 +106,11 @@ entry:
 
 ; Fold ORRri into movcc.
 define i32 @t8(i32 %a, i32 %b) nounwind {
-; ARM: t8:
+; ARM-LABEL: t8:
 ; ARM: cmp r0, r1
 ; ARM: orrge r0, r1, #1
 
-; T2: t8:
+; T2-LABEL: t8:
 ; T2: cmp r0, r1
 ; T2: orrge r0, r1, #1
   %x = or i32 %b, 1
@@ -121,11 +121,11 @@ define i32 @t8(i32 %a, i32 %b) nounwind {
 
 ; Fold ANDrr into movcc.
 define i32 @t9(i32 %a, i32 %b, i32 %c) nounwind {
-; ARM: t9:
+; ARM-LABEL: t9:
 ; ARM: cmp r0, r1
 ; ARM: andge r0, r1, r2
 
-; T2: t9:
+; T2-LABEL: t9:
 ; T2: cmp r0, r1
 ; T2: andge.w r0, r1, r2
   %x = and i32 %b, %c
@@ -136,11 +136,11 @@ define i32 @t9(i32 %a, i32 %b, i32 %c) nounwind {
 
 ; Fold EORrs into movcc.
 define i32 @t10(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-; ARM: t10:
+; ARM-LABEL: t10:
 ; ARM: cmp r0, r1
 ; ARM: eorge r0, r1, r2, lsl #7
 
-; T2: t10:
+; T2-LABEL: t10:
 ; T2: cmp r0, r1
 ; T2: eorge.w r0, r1, r2, lsl #7
   %s = shl i32 %c, 7
@@ -152,11 +152,11 @@ define i32 @t10(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 
 ; Fold ORRri into movcc, reversing the condition.
 define i32 @t11(i32 %a, i32 %b) nounwind {
-; ARM: t11:
+; ARM-LABEL: t11:
 ; ARM: cmp r0, r1
 ; ARM: orrlt r0, r1, #1
 
-; T2: t11:
+; T2-LABEL: t11:
 ; T2: cmp r0, r1
 ; T2: orrlt r0, r1, #1
   %x = or i32 %b, 1
@@ -167,11 +167,11 @@ define i32 @t11(i32 %a, i32 %b) nounwind {
 
 ; Fold ADDri12 into movcc
 define i32 @t12(i32 %a, i32 %b) nounwind {
-; ARM: t12:
+; ARM-LABEL: t12:
 ; ARM: cmp r0, r1
 ; ARM: addge r0, r1,
 
-; T2: t12:
+; T2-LABEL: t12:
 ; T2: cmp r0, r1
 ; T2: addwge r0, r1, #3000
   %x = add i32 %b, 3000
diff --git a/test/CodeGen/ARM/setcc-sentinals.ll b/test/CodeGen/ARM/setcc-sentinals.ll
new file mode 100644
index 000000000000..8878f9bf22df
--- /dev/null
+++ b/test/CodeGen/ARM/setcc-sentinals.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mcpu=cortex-a8 -march=arm -asm-verbose=false | FileCheck %s
+
+define zeroext i1 @test0(i32 %x) nounwind {
+; CHECK-LABEL: test0:
+; CHECK-NEXT: add [[REG:(r[0-9]+)|(lr)]], r0, #1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: cmp [[REG]], #1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: bx  lr
+  %cmp1 = icmp ne i32 %x, -1
+  %not.cmp = icmp ne i32 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  ret i1 %.cmp1
+}
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index eb971ff72e74..f14adcae663c 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -4,10 +4,10 @@
 
 
 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
-; A8: test1:
+; A8-LABEL: test1:
 ; A8: add r0, r0, r1, lsl r2
 
-; A9: test1:
+; A9-LABEL: test1:
 ; A9: add r0, r0, r1, lsl r2
         %shift.upgrd.1 = zext i8 %sh to i32
         %A = shl i32 %Y, %shift.upgrd.1
@@ -16,10 +16,10 @@ define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
 }
 
 define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
-; A8: test2:
+; A8-LABEL: test2:
 ; A8: bic r0, r0, r1, asr r2
 
-; A9: test2:
+; A9-LABEL: test2:
 ; A9: bic r0, r0, r1, asr r2
         %shift.upgrd.2 = zext i8 %sh to i32
         %A = ashr i32 %Y, %shift.upgrd.2
@@ -30,12 +30,12 @@ define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
 
 define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
 entry:
-; A8: test3:
+; A8-LABEL: test3:
 ; A8: ldr r0, [r0, r2, lsl #2]
 ; A8: ldr r1, [r1, r2, lsl #2]
 
 ; lsl #2 is free
-; A9: test3:
+; A9-LABEL: test3:
 ; A9: ldr r0, [r0, r2, lsl #2]
 ; A9: ldr r1, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
@@ -53,13 +53,13 @@ declare i8* @malloc(...)
 
 define fastcc void @test4(i16 %addr) nounwind {
 entry:
-; A8: test4:
+; A8-LABEL: test4:
 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
 ; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A8: str [[REG]], [r0, r1, lsl #2]
 ; A8-NOT: str [[REG]], [r0]
 
-; A9: test4:
+; A9-LABEL: test4:
 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
 ; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A9: str [[REG]], [r0, r1, lsl #2]
diff --git a/test/CodeGen/ARM/sincos.ll b/test/CodeGen/ARM/sincos.ll
new file mode 100644
index 000000000000..30b2664e3726
--- /dev/null
+++ b/test/CodeGen/ARM/sincos.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios6 -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT
+; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS
+
+; Combine sin / cos into a single call.
+; rdar://12856873
+
+define float @test1(float %x) nounwind {
+entry:
+; SINCOS-LABEL: test1:
+; SINCOS: bl ___sincosf_stret
+
+; NOOPT-LABEL: test1:
+; NOOPT: bl _sinf
+; NOOPT: bl _cosf
+  %call = tail call float @sinf(float %x) nounwind readnone
+  %call1 = tail call float @cosf(float %x) nounwind readnone
+  %add = fadd float %call, %call1
+  ret float %add
+}
+
+define double @test2(double %x) nounwind {
+entry:
+; SINCOS-LABEL: test2:
+; SINCOS: bl ___sincos_stret
+
+; NOOPT-LABEL: test2:
+; NOOPT: bl _sin
+; NOOPT: bl _cos
+  %call = tail call double @sin(double %x) nounwind readnone
+  %call1 = tail call double @cos(double %x) nounwind readnone
+  %add = fadd double %call, %call1
+  ret double %add
+}
+
+declare float  @sinf(float) readonly
+declare double @sin(double) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index e93cdbc10a46..b9246635e408 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -10,7 +10,7 @@
 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
-; CHECK: aaa:
+; CHECK-LABEL: aaa:
 ; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp:128
 ; CHECK: vld1.64 {{.*}}sp:128
diff --git a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
new file mode 100644
index 000000000000..f5cda14861af
--- /dev/null
+++ b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
@@ -0,0 +1,32 @@
+; RUN: llc -O3 -mcpu=swift -mtriple=armv7s-apple-ios6.0.0 %s -o /dev/null
+; rdar://14811848
+
+; Make sure that we do not emit the BMOVPCB_CALL instruction for now or if we
+; fix the assumptions in its implementation that we do not crash when doing it.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv7s-apple-ios6.0.0"
+
+@main.title = private unnamed_addr constant [15 x i8] c"foo and stuff\0A\00", align 1
+@.str = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+; Function Attrs: nounwind optsize ssp
+define i32 @main() #0 {
+entry:
+  %title = alloca [15 x i8], align 1
+  %0 = getelementptr inbounds [15 x i8]* %title, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false)
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0) #3
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #2
+
+attributes #0 = { nounwind optsize ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind optsize }
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
index 97916f169b0f..32e3b856c03c 100644
--- a/test/CodeGen/ARM/str_post.ll
+++ b/test/CodeGen/ARM/str_post.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 
 define i16 @test1(i32* %X, i16* %A) {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: strh {{.*}}[{{.*}}], #-4
         %Y = load i32* %X               ; <i32> [#uses=1]
         %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
@@ -12,7 +12,7 @@ define i16 @test1(i32* %X, i16* %A) {
 }
 
 define i32 @test2(i32* %X, i32* %A) {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: str {{.*}}[{{.*}}],
         %Y = load i32* %X               ; <i32> [#uses=1]
         store i32 %Y, i32* %A
diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll
new file mode 100644
index 000000000000..465ee1218fda
--- /dev/null
+++ b/test/CodeGen/ARM/struct-byval-frame-index.ll
@@ -0,0 +1,219 @@
+; RUN: llc < %s -mcpu=cortex-a15 -verify-machineinstrs | FileCheck %s
+
+; Check a spill right after a function call with large struct byval is correctly
+; generated.
+; PR16393
+
+; CHECK: set_stored_macroblock_parameters
+; CHECK: str r{{.*}}, [sp, [[SLOT:#[0-9]+]]] @ 4-byte Spill
+; CHECK: bl RestoreMVBlock8x8
+; CHECK: bl RestoreMVBlock8x8
+; CHECK: bl RestoreMVBlock8x8
+; CHECK: ldr r{{.*}}, [sp, [[SLOT]]] @ 4-byte Reload
+
+target triple = "armv7l-unknown-linux-gnueabihf"
+
+%structA = type { double, [16 x [16 x i16]], [16 x [16 x i16]], [16 x [16 x i16]], i32****, i32***, i32, i16, [4 x i32], [4 x i32], i8**, [16 x i8], [16 x i8], i32, i64, i32, i16******, i16******, [2 x [4 x [4 x i8]]], i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%structB = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %structC*, %structD*, %structK*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %structL*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+%structC = type { i32, i32, [100 x %structD*], i32, float, float, float }
+%structD = type { i32, i32, i32, i32, i32, i32, %structE*, %structH*, %structJ*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+%structE = type { %structF*, %structG, %structG }
+%structF = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+%structG = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+%structH = type { [3 x [11 x %structI]], [2 x [9 x %structI]], [2 x [10 x %structI]], [2 x [6 x %structI]], [4 x %structI], [4 x %structI], [3 x %structI] }
+%structI = type { i16, i8, i32 }
+%structJ = type { [2 x %structI], [4 x %structI], [3 x [4 x %structI]], [10 x [4 x %structI]], [10 x [15 x %structI]], [10 x [15 x %structI]], [10 x [5 x %structI]], [10 x [5 x %structI]], [10 x [15 x %structI]], [10 x [15 x %structI]] }
+%structK = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %structK*, %structK*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%structL = type { i32, i32, i32, i32, i32, %structL* }
+%structM = type { i32, i32, i32, i32, i32, i32, [6 x [33 x i64]], [6 x [33 x i64]], [6 x [33 x i64]], [6 x [33 x i64]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16**, i16****, i16****, i16*****, i16***, i8*, i8***, i64***, i64***, i16****, i8**, i8**, %structM*, %structM*, %structM*, i32, i32, i32, i32, i32, i32, i32 }
+%structN = type { i32, [16 x [16 x i32]], [16 x [16 x i32]], [16 x [16 x i32]], [3 x [16 x [16 x i32]]], [4 x i16], [4 x i8], [4 x i8], [4 x i8], [16 x [16 x i16]], [16 x [16 x i16]], [16 x [16 x i32]] }
+
+@cofAC = external global i32****, align 4
+@cofDC = external global i32***, align 4
+@rdopt = external global %structA*, align 4
+@img = external global %structB*
+@enc_picture = external global %structM*
+@si_frame_indicator = external global i32, align 4
+@sp2_frame_indicator = external global i32, align 4
+@lrec = external global i32**, align 4
+@tr8x8 = external global %structN, align 4
+@best_mode = external global i16, align 2
+@best_c_imode = external global i32, align 4
+@best_i16offset = external global i32, align 4
+@bi_pred_me = external global i16, align 2
+@b8mode = external global [4 x i32], align 4
+@b8pdir = external global [4 x i32], align 4
+@b4_intra_pred_modes = external global [16 x i8], align 1
+@b8_intra_pred_modes8x8 = external global [16 x i8], align 1
+@b4_ipredmode = external global [16 x i8], align 1
+@b8_ipredmode8x8 = external global [4 x [4 x i8]], align 1
+@rec_mbY = external global [16 x [16 x i16]], align 2
+@lrec_rec = external global [16 x [16 x i32]], align 4
+@rec_mbU = external global [16 x [16 x i16]], align 2
+@rec_mbV = external global [16 x [16 x i16]], align 2
+@lrec_rec_U = external global [16 x [16 x i32]], align 4
+@lrec_uv = external global i32***, align 4
+@lrec_rec_V = external global [16 x [16 x i32]], align 4
+@cbp = external global i32, align 4
+@cbp_blk = external global i64, align 8
+@luma_transform_size_8x8_flag = external global i32, align 4
+@frefframe = external global [4 x [4 x i8]], align 1
+@brefframe = external global [4 x [4 x i8]], align 1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+; Function Attrs: nounwind
+declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1
+
+; Function Attrs: nounwind
+define void @set_stored_macroblock_parameters() #1 {
+entry:
+  %0 = load %structB** @img, align 4
+  %1 = load i32* undef, align 4
+  %mb_data = getelementptr inbounds %structB* %0, i32 0, i32 61
+  %2 = load %structK** %mb_data, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  br i1 undef, label %for.body20, label %if.end
+
+for.body20:                                       ; preds = %for.end
+  unreachable
+
+if.end:                                           ; preds = %for.end
+  br i1 undef, label %if.end40, label %for.cond31.preheader
+
+for.cond31.preheader:                             ; preds = %if.end
+  unreachable
+
+if.end40:                                         ; preds = %if.end
+  br i1 undef, label %if.end43, label %if.then42
+
+if.then42:                                        ; preds = %if.end40
+  br label %if.end43
+
+if.end43:                                         ; preds = %if.then42, %if.end40
+  br i1 undef, label %if.end164, label %for.cond47.preheader
+
+for.cond47.preheader:                             ; preds = %if.end43
+  br i1 undef, label %for.body119, label %if.end164
+
+for.body119:                                      ; preds = %for.body119, %for.cond47.preheader
+  br i1 undef, label %for.body119, label %if.end164
+
+if.end164:                                        ; preds = %for.body119, %for.cond47.preheader, %if.end43
+  store i32*** null, i32**** @cofDC, align 4
+  %mb_type = getelementptr inbounds %structK* %2, i32 %1, i32 8
+  br i1 undef, label %if.end230, label %if.then169
+
+if.then169:                                       ; preds = %if.end164
+  br i1 undef, label %for.cond185.preheader, label %for.cond210.preheader
+
+for.cond185.preheader:                            ; preds = %if.then169
+  unreachable
+
+for.cond210.preheader:                            ; preds = %if.then169
+  unreachable
+
+if.end230:                                        ; preds = %if.end164
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i32]* @b8mode to i8*), i32 16, i32 4, i1 false)
+  %b8pdir = getelementptr inbounds %structK* %2, i32 %1, i32 15
+  %3 = bitcast [4 x i32]* %b8pdir to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i32 4, i1 false)
+  br i1 undef, label %if.end236, label %if.then233
+
+if.then233:                                       ; preds = %if.end230
+  unreachable
+
+if.end236:                                        ; preds = %if.end230
+  %cmp242 = icmp ne i16 undef, 8
+  %4 = load i32* @luma_transform_size_8x8_flag, align 4
+  %tobool245 = icmp ne i32 %4, 0
+  %or.cond812 = or i1 %cmp242, %tobool245
+  br i1 %or.cond812, label %if.end249, label %land.lhs.true246
+
+land.lhs.true246:                                 ; preds = %if.end236
+  br i1 undef, label %if.end249, label %if.then248
+
+if.then248:                                       ; preds = %land.lhs.true246
+  tail call void asm sideeffect "", "~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"() nounwind
+  tail call void @RestoreMVBlock8x8(i32 1, i32 0, %structN* byval @tr8x8, i32 0) #0
+  tail call void @RestoreMVBlock8x8(i32 1, i32 2, %structN* byval @tr8x8, i32 0) #0
+  tail call void @RestoreMVBlock8x8(i32 1, i32 3, %structN* byval @tr8x8, i32 0) #0
+  br label %if.end249
+
+if.end249:                                        ; preds = %if.then248, %land.lhs.true246, %if.end236
+  %5 = load i32* @luma_transform_size_8x8_flag, align 4
+  %6 = load %structA** @rdopt, align 4
+  %luma_transform_size_8x8_flag264 = getelementptr inbounds %structA* %6, i32 0, i32 21
+  store i32 %5, i32* %luma_transform_size_8x8_flag264, align 4
+  %7 = load i32* undef, align 4
+  %add281 = add nsw i32 %7, 0
+  br label %for.body285
+
+for.body285:                                      ; preds = %for.inc503, %if.end249
+  %8 = phi %structB* [ undef, %if.end249 ], [ %.pre1155, %for.inc503 ]
+  %i.21103 = phi i32 [ 0, %if.end249 ], [ %inc504, %for.inc503 ]
+  %block_x286 = getelementptr inbounds %structB* %8, i32 0, i32 37
+  %9 = load i32* %block_x286, align 4
+  %add287 = add nsw i32 %9, %i.21103
+  %shr289 = ashr i32 %i.21103, 1
+  %add290 = add nsw i32 %shr289, 0
+  %arrayidx292 = getelementptr inbounds %structK* %2, i32 %1, i32 15, i32 %add290
+  %10 = load %structM** @enc_picture, align 4
+  %ref_idx = getelementptr inbounds %structM* %10, i32 0, i32 35
+  %11 = load i8**** %ref_idx, align 4
+  %12 = load i8*** %11, align 4
+  %arrayidx313 = getelementptr inbounds i8** %12, i32 %add281
+  %13 = load i8** %arrayidx313, align 4
+  %arrayidx314 = getelementptr inbounds i8* %13, i32 %add287
+  store i8 -1, i8* %arrayidx314, align 1
+  %14 = load %structB** @img, align 4
+  %MbaffFrameFlag327 = getelementptr inbounds %structB* %14, i32 0, i32 100
+  %15 = load i32* %MbaffFrameFlag327, align 4
+  %tobool328 = icmp eq i32 %15, 0
+  br i1 %tobool328, label %if.end454, label %if.then329
+
+if.then329:                                       ; preds = %for.body285
+  %16 = load %structA** @rdopt, align 4
+  br label %if.end454
+
+if.end454:                                        ; preds = %if.then329, %for.body285
+  %17 = load i32* %arrayidx292, align 4
+  %cmp457 = icmp eq i32 %17, 0
+  br i1 %cmp457, label %if.then475, label %lor.lhs.false459
+
+lor.lhs.false459:                                 ; preds = %if.end454
+  %18 = load i32* %mb_type, align 4
+  switch i32 %18, label %for.inc503 [
+    i32 9, label %if.then475
+    i32 10, label %if.then475
+    i32 13, label %if.then475
+    i32 14, label %if.then475
+  ]
+
+if.then475:                                       ; preds = %lor.lhs.false459, %lor.lhs.false459, %lor.lhs.false459, %lor.lhs.false459, %if.end454
+  store i16 0, i16* undef, align 2
+  br label %for.inc503
+
+for.inc503:                                       ; preds = %if.then475, %lor.lhs.false459
+  %inc504 = add nsw i32 %i.21103, 1
+  %.pre1155 = load %structB** @img, align 4
+  br label %for.body285
+}
+
+; Function Attrs: nounwind
+declare void @update_offset_params(i32, i32) #1
+
+; Function Attrs: nounwind
+declare void @RestoreMVBlock8x8(i32, i32, %structN* byval nocapture, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
index e9541c278803..130925a0c237 100644
--- a/test/CodeGen/ARM/struct_byval.ll
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s -check-prefix=THUMB
 
 ; rdar://9877866
 %struct.SmallStruct = type { i32, [8 x i32], [37 x i8] }
@@ -6,10 +7,14 @@
 
 define i32 @f() nounwind ssp {
 entry:
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: ldr
 ; CHECK: str
 ; CHECK-NOT:bne
+; THUMB-LABEL: f:
+; THUMB: ldr
+; THUMB: str
+; THUMB-NOT:bne
   %st = alloca %struct.SmallStruct, align 4
   %call = call i32 @e1(%struct.SmallStruct* byval %st)
   ret i32 0
@@ -18,11 +23,16 @@ entry:
 ; Generate a loop for large struct byval
 define i32 @g() nounwind ssp {
 entry:
-; CHECK: g:
+; CHECK-LABEL: g:
 ; CHECK: ldr
 ; CHECK: sub
 ; CHECK: str
 ; CHECK: bne
+; THUMB-LABEL: g:
+; THUMB: ldr
+; THUMB: sub
+; THUMB: str
+; THUMB: bne
   %st = alloca %struct.LargeStruct, align 4
   %call = call i32 @e2(%struct.LargeStruct* byval %st)
   ret i32 0
@@ -31,11 +41,16 @@ entry:
 ; Generate a loop using NEON instructions
 define i32 @h() nounwind ssp {
 entry:
-; CHECK: h:
+; CHECK-LABEL: h:
 ; CHECK: vld1
 ; CHECK: sub
 ; CHECK: vst1
 ; CHECK: bne
+; THUMB-LABEL: h:
+; THUMB: vld1
+; THUMB: sub
+; THUMB: vst1
+; THUMB: bne
   %st = alloca %struct.LargeStruct, align 16
   %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st)
   ret i32 0
@@ -49,8 +64,10 @@ declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
 ; We can't do tail call since address of s is passed to the callee and part of
 ; s is in caller's local frame.
 define void @f3(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f3
+; CHECK-LABEL: f3
 ; CHECK: bl _consumestruct
+; THUMB-LABEL: f3
+; THUMB: blx _consumestruct
 entry:
   %0 = bitcast %struct.SmallStruct* %s to i8*
   tail call void @consumestruct(i8* %0, i32 80) optsize
@@ -58,8 +75,10 @@ entry:
 }
 
 define void @f4(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f4
+; CHECK-LABEL: f4
 ; CHECK: bl _consumestruct
+; THUMB-LABEL: f4
+; THUMB: blx _consumestruct
 entry:
   %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
   %0 = bitcast i32* %addr to i8*
@@ -69,8 +88,10 @@ entry:
 
 ; We can do tail call here since s is in the incoming argument area.
 define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f5
+; CHECK-LABEL: f5
 ; CHECK: b _consumestruct
+; THUMB-LABEL: f5
+; THUMB: b.w _consumestruct
 entry:
   %0 = bitcast %struct.SmallStruct* %s to i8*
   tail call void @consumestruct(i8* %0, i32 80) optsize
@@ -78,8 +99,10 @@ entry:
 }
 
 define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
-; CHECK: f6
+; CHECK-LABEL: f6
 ; CHECK: b _consumestruct
+; THUMB-LABEL: f6
+; THUMB: b.w _consumestruct
 entry:
   %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
   %0 = bitcast i32* %addr to i8*
@@ -88,3 +111,19 @@ entry:
 }
 
 declare void @consumestruct(i8* nocapture %structp, i32 %structsize) nounwind
+
+; PR17309
+%struct.I.8 = type { [10 x i32], [3 x i8] }
+
+declare void @use_I(%struct.I.8* byval)
+define void @test_I_16() {
+; CHECK-LABEL: test_I_16
+; CHECK: ldrb
+; CHECK: strb
+; THUMB-LABEL: test_I_16
+; THUMB: ldrb
+; THUMB: strb
+entry:
+  call void @use_I(%struct.I.8* byval align 16 undef)
+  ret void
+}
diff --git a/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
new file mode 100644
index 000000000000..189926941eb2
--- /dev/null
+++ b/test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
@@ -0,0 +1,1523 @@
+;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -disassemble - | FileCheck %s --check-prefix=ARM
+;RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple thumbv7-none-linux-gnueabi -disassemble - | FileCheck %s --check-prefix=THUMB2
+;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=-neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -disassemble - | FileCheck %s --check-prefix=NO_NEON
+;We want to have both positive and negative checks for thumb1. These checks
+;are not easy to do in a single pass so we generate the output once to a
+;temp file and run filecheck twice with different prefixes.
+;RUN: llc < %s -mtriple=thumbv5-none-linux-gnueabi              -verify-machineinstrs -filetype=obj | llvm-objdump -triple thumbv5-none-linux-gnueabi -disassemble - > %t
+;RUN: cat %t | FileCheck %s --check-prefix=THUMB1
+;RUN: cat %t | FileCheck %s --check-prefix=T1POST
+
+;This file contains auto generated tests for the lowering of passing structs
+;byval in the arm backend. We have tests for both packed and unpacked
+;structs at varying alignments. Each test is run for arm, thumb2 and thumb1.
+;We check for the strings in the generated object code using llvm-objdump
+;because it provides better assurance that we are generating instructions
+;for the correct architecture. Otherwise we could accidently generate an
+;ARM instruction for THUMB1 and wouldn't detect it because the assembly
+;code representation is the same, but the object code would be generated
+;incorrectly. For each test we check for the label, a load instruction of the
+;correct form, a branch if it will be generated with a loop, and the leftover
+;cleanup if the number of bytes does not divide evenly by the store size
+
+%struct.A = type <{ [ 10 x i32 ] }> ; 40 bytes
+declare void @use_A(%struct.A* byval)
+%struct.B = type <{ [ 10 x i32 ], i8 }> ; 41 bytes
+declare void @use_B(%struct.B* byval)
+%struct.C = type <{ [ 10 x i32 ], [ 3 x i8 ] }> ; 43 bytes
+declare void @use_C(%struct.C* byval)
+%struct.D = type <{ [ 100 x i32 ] }> ; 400 bytes
+declare void @use_D(%struct.D* byval)
+%struct.E = type <{ [ 100 x i32 ], i8 }> ; 401 bytes
+declare void @use_E(%struct.E* byval)
+%struct.F = type <{ [ 100 x i32 ], [ 3 x i8 ] }> ; 403 bytes
+declare void @use_F(%struct.F* byval)
+%struct.G = type  { [ 10 x i32 ] }  ; 40 bytes
+declare void @use_G(%struct.G* byval)
+%struct.H = type  { [ 10 x i32 ], i8 }  ; 41 bytes
+declare void @use_H(%struct.H* byval)
+%struct.I = type  { [ 10 x i32 ], [ 3 x i8 ] }  ; 43 bytes
+declare void @use_I(%struct.I* byval)
+%struct.J = type  { [ 100 x i32 ] }  ; 400 bytes
+declare void @use_J(%struct.J* byval)
+%struct.K = type  { [ 100 x i32 ], i8 }  ; 401 bytes
+declare void @use_K(%struct.K* byval)
+%struct.L = type  { [ 100 x i32 ], [ 3 x i8 ] }  ; 403 bytes
+declare void @use_L(%struct.L* byval)
+
+;ARM-LABEL:    test_A_1:
+;THUMB2-LABEL: test_A_1:
+;NO_NEON-LABEL:test_A_1:
+;THUMB1-LABEL: test_A_1:
+;T1POST-LABEL: test_A_1:
+  define void @test_A_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.A, align 1
+    call void @use_A(%struct.A* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_2:
+;THUMB2-LABEL: test_A_2:
+;NO_NEON-LABEL:test_A_2:
+;THUMB1-LABEL: test_A_2:
+;T1POST-LABEL: test_A_2:
+  define void @test_A_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.A, align 2
+    call void @use_A(%struct.A* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_4:
+;THUMB2-LABEL: test_A_4:
+;NO_NEON-LABEL:test_A_4:
+;THUMB1-LABEL: test_A_4:
+;T1POST-LABEL: test_A_4:
+  define void @test_A_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.A, align 4
+    call void @use_A(%struct.A* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_8:
+;THUMB2-LABEL: test_A_8:
+;NO_NEON-LABEL:test_A_8:
+;THUMB1-LABEL: test_A_8:
+;T1POST-LABEL: test_A_8:
+  define void @test_A_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.A, align 8
+    call void @use_A(%struct.A* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_A_16:
+;THUMB2-LABEL: test_A_16:
+;NO_NEON-LABEL:test_A_16:
+;THUMB1-LABEL: test_A_16:
+;T1POST-LABEL: test_A_16:
+  define void @test_A_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.A, align 16
+    call void @use_A(%struct.A* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_1:
+;THUMB2-LABEL: test_B_1:
+;NO_NEON-LABEL:test_B_1:
+;THUMB1-LABEL: test_B_1:
+;T1POST-LABEL: test_B_1:
+  define void @test_B_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.B, align 1
+    call void @use_B(%struct.B* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_2:
+;THUMB2-LABEL: test_B_2:
+;NO_NEON-LABEL:test_B_2:
+;THUMB1-LABEL: test_B_2:
+;T1POST-LABEL: test_B_2:
+  define void @test_B_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.B, align 2
+    call void @use_B(%struct.B* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_4:
+;THUMB2-LABEL: test_B_4:
+;NO_NEON-LABEL:test_B_4:
+;THUMB1-LABEL: test_B_4:
+;T1POST-LABEL: test_B_4:
+  define void @test_B_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.B, align 4
+    call void @use_B(%struct.B* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_8:
+;THUMB2-LABEL: test_B_8:
+;NO_NEON-LABEL:test_B_8:
+;THUMB1-LABEL: test_B_8:
+;T1POST-LABEL: test_B_8:
+  define void @test_B_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.B, align 8
+    call void @use_B(%struct.B* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_B_16:
+;THUMB2-LABEL: test_B_16:
+;NO_NEON-LABEL:test_B_16:
+;THUMB1-LABEL: test_B_16:
+;T1POST-LABEL: test_B_16:
+  define void @test_B_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.B, align 16
+    call void @use_B(%struct.B* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_1:
+;THUMB2-LABEL: test_C_1:
+;NO_NEON-LABEL:test_C_1:
+;THUMB1-LABEL: test_C_1:
+;T1POST-LABEL: test_C_1:
+  define void @test_C_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.C, align 1
+    call void @use_C(%struct.C* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_2:
+;THUMB2-LABEL: test_C_2:
+;NO_NEON-LABEL:test_C_2:
+;THUMB1-LABEL: test_C_2:
+;T1POST-LABEL: test_C_2:
+  define void @test_C_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.C, align 2
+    call void @use_C(%struct.C* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_4:
+;THUMB2-LABEL: test_C_4:
+;NO_NEON-LABEL:test_C_4:
+;THUMB1-LABEL: test_C_4:
+;T1POST-LABEL: test_C_4:
+  define void @test_C_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.C, align 4
+    call void @use_C(%struct.C* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_8:
+;THUMB2-LABEL: test_C_8:
+;NO_NEON-LABEL:test_C_8:
+;THUMB1-LABEL: test_C_8:
+;T1POST-LABEL: test_C_8:
+  define void @test_C_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.C, align 8
+    call void @use_C(%struct.C* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_C_16:
+;THUMB2-LABEL: test_C_16:
+;NO_NEON-LABEL:test_C_16:
+;THUMB1-LABEL: test_C_16:
+;T1POST-LABEL: test_C_16:
+  define void @test_C_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.C, align 16
+    call void @use_C(%struct.C* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_1:
+;THUMB2-LABEL: test_D_1:
+;NO_NEON-LABEL:test_D_1:
+;THUMB1-LABEL: test_D_1:
+;T1POST-LABEL: test_D_1:
+  define void @test_D_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.D, align 1
+    call void @use_D(%struct.D* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_2:
+;THUMB2-LABEL: test_D_2:
+;NO_NEON-LABEL:test_D_2:
+;THUMB1-LABEL: test_D_2:
+;T1POST-LABEL: test_D_2:
+  define void @test_D_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.D, align 2
+    call void @use_D(%struct.D* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_4:
+;THUMB2-LABEL: test_D_4:
+;NO_NEON-LABEL:test_D_4:
+;THUMB1-LABEL: test_D_4:
+;T1POST-LABEL: test_D_4:
+  define void @test_D_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.D, align 4
+    call void @use_D(%struct.D* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_8:
+;THUMB2-LABEL: test_D_8:
+;NO_NEON-LABEL:test_D_8:
+;THUMB1-LABEL: test_D_8:
+;T1POST-LABEL: test_D_8:
+  define void @test_D_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.D, align 8
+    call void @use_D(%struct.D* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_D_16:
+;THUMB2-LABEL: test_D_16:
+;NO_NEON-LABEL:test_D_16:
+;THUMB1-LABEL: test_D_16:
+;T1POST-LABEL: test_D_16:
+  define void @test_D_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.D, align 16
+    call void @use_D(%struct.D* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_1:
+;THUMB2-LABEL: test_E_1:
+;NO_NEON-LABEL:test_E_1:
+;THUMB1-LABEL: test_E_1:
+;T1POST-LABEL: test_E_1:
+  define void @test_E_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.E, align 1
+    call void @use_E(%struct.E* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_2:
+;THUMB2-LABEL: test_E_2:
+;NO_NEON-LABEL:test_E_2:
+;THUMB1-LABEL: test_E_2:
+;T1POST-LABEL: test_E_2:
+  define void @test_E_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.E, align 2
+    call void @use_E(%struct.E* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_4:
+;THUMB2-LABEL: test_E_4:
+;NO_NEON-LABEL:test_E_4:
+;THUMB1-LABEL: test_E_4:
+;T1POST-LABEL: test_E_4:
+  define void @test_E_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.E, align 4
+    call void @use_E(%struct.E* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_8:
+;THUMB2-LABEL: test_E_8:
+;NO_NEON-LABEL:test_E_8:
+;THUMB1-LABEL: test_E_8:
+;T1POST-LABEL: test_E_8:
+  define void @test_E_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.E, align 8
+    call void @use_E(%struct.E* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_E_16:
+;THUMB2-LABEL: test_E_16:
+;NO_NEON-LABEL:test_E_16:
+;THUMB1-LABEL: test_E_16:
+;T1POST-LABEL: test_E_16:
+  define void @test_E_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.E, align 16
+    call void @use_E(%struct.E* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_1:
+;THUMB2-LABEL: test_F_1:
+;NO_NEON-LABEL:test_F_1:
+;THUMB1-LABEL: test_F_1:
+;T1POST-LABEL: test_F_1:
+  define void @test_F_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.F, align 1
+    call void @use_F(%struct.F* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_2:
+;THUMB2-LABEL: test_F_2:
+;NO_NEON-LABEL:test_F_2:
+;THUMB1-LABEL: test_F_2:
+;T1POST-LABEL: test_F_2:
+  define void @test_F_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.F, align 2
+    call void @use_F(%struct.F* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_4:
+;THUMB2-LABEL: test_F_4:
+;NO_NEON-LABEL:test_F_4:
+;THUMB1-LABEL: test_F_4:
+;T1POST-LABEL: test_F_4:
+  define void @test_F_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.F, align 4
+    call void @use_F(%struct.F* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_8:
+;THUMB2-LABEL: test_F_8:
+;NO_NEON-LABEL:test_F_8:
+;THUMB1-LABEL: test_F_8:
+;T1POST-LABEL: test_F_8:
+  define void @test_F_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.F, align 8
+    call void @use_F(%struct.F* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_F_16:
+;THUMB2-LABEL: test_F_16:
+;NO_NEON-LABEL:test_F_16:
+;THUMB1-LABEL: test_F_16:
+;T1POST-LABEL: test_F_16:
+  define void @test_F_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.F, align 16
+    call void @use_F(%struct.F* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_1:
+;THUMB2-LABEL: test_G_1:
+;NO_NEON-LABEL:test_G_1:
+;THUMB1-LABEL: test_G_1:
+;T1POST-LABEL: test_G_1:
+  define void @test_G_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.G, align 1
+    call void @use_G(%struct.G* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_2:
+;THUMB2-LABEL: test_G_2:
+;NO_NEON-LABEL:test_G_2:
+;THUMB1-LABEL: test_G_2:
+;T1POST-LABEL: test_G_2:
+  define void @test_G_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.G, align 2
+    call void @use_G(%struct.G* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_4:
+;THUMB2-LABEL: test_G_4:
+;NO_NEON-LABEL:test_G_4:
+;THUMB1-LABEL: test_G_4:
+;T1POST-LABEL: test_G_4:
+  define void @test_G_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.G, align 4
+    call void @use_G(%struct.G* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_8:
+;THUMB2-LABEL: test_G_8:
+;NO_NEON-LABEL:test_G_8:
+;THUMB1-LABEL: test_G_8:
+;T1POST-LABEL: test_G_8:
+  define void @test_G_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.G, align 8
+    call void @use_G(%struct.G* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_G_16:
+;THUMB2-LABEL: test_G_16:
+;NO_NEON-LABEL:test_G_16:
+;THUMB1-LABEL: test_G_16:
+;T1POST-LABEL: test_G_16:
+  define void @test_G_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.G, align 16
+    call void @use_G(%struct.G* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_1:
+;THUMB2-LABEL: test_H_1:
+;NO_NEON-LABEL:test_H_1:
+;THUMB1-LABEL: test_H_1:
+;T1POST-LABEL: test_H_1:
+  define void @test_H_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.H, align 1
+    call void @use_H(%struct.H* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_2:
+;THUMB2-LABEL: test_H_2:
+;NO_NEON-LABEL:test_H_2:
+;THUMB1-LABEL: test_H_2:
+;T1POST-LABEL: test_H_2:
+  define void @test_H_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.H, align 2
+    call void @use_H(%struct.H* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_4:
+;THUMB2-LABEL: test_H_4:
+;NO_NEON-LABEL:test_H_4:
+;THUMB1-LABEL: test_H_4:
+;T1POST-LABEL: test_H_4:
+  define void @test_H_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.H, align 4
+    call void @use_H(%struct.H* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_8:
+;THUMB2-LABEL: test_H_8:
+;NO_NEON-LABEL:test_H_8:
+;THUMB1-LABEL: test_H_8:
+;T1POST-LABEL: test_H_8:
+  define void @test_H_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.H, align 8
+    call void @use_H(%struct.H* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_H_16:
+;THUMB2-LABEL: test_H_16:
+;NO_NEON-LABEL:test_H_16:
+;THUMB1-LABEL: test_H_16:
+;T1POST-LABEL: test_H_16:
+  define void @test_H_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.H, align 16
+    call void @use_H(%struct.H* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_1:
+;THUMB2-LABEL: test_I_1:
+;NO_NEON-LABEL:test_I_1:
+;THUMB1-LABEL: test_I_1:
+;T1POST-LABEL: test_I_1:
+  define void @test_I_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.I, align 1
+    call void @use_I(%struct.I* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_2:
+;THUMB2-LABEL: test_I_2:
+;NO_NEON-LABEL:test_I_2:
+;THUMB1-LABEL: test_I_2:
+;T1POST-LABEL: test_I_2:
+  define void @test_I_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.I, align 2
+    call void @use_I(%struct.I* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_4:
+;THUMB2-LABEL: test_I_4:
+;NO_NEON-LABEL:test_I_4:
+;THUMB1-LABEL: test_I_4:
+;T1POST-LABEL: test_I_4:
+  define void @test_I_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.I, align 4
+    call void @use_I(%struct.I* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_8:
+;THUMB2-LABEL: test_I_8:
+;NO_NEON-LABEL:test_I_8:
+;THUMB1-LABEL: test_I_8:
+;T1POST-LABEL: test_I_8:
+  define void @test_I_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.I, align 8
+    call void @use_I(%struct.I* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_I_16:
+;THUMB2-LABEL: test_I_16:
+;NO_NEON-LABEL:test_I_16:
+;THUMB1-LABEL: test_I_16:
+;T1POST-LABEL: test_I_16:
+  define void @test_I_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.I, align 16
+    call void @use_I(%struct.I* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_1:
+;THUMB2-LABEL: test_J_1:
+;NO_NEON-LABEL:test_J_1:
+;THUMB1-LABEL: test_J_1:
+;T1POST-LABEL: test_J_1:
+  define void @test_J_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.J, align 1
+    call void @use_J(%struct.J* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_2:
+;THUMB2-LABEL: test_J_2:
+;NO_NEON-LABEL:test_J_2:
+;THUMB1-LABEL: test_J_2:
+;T1POST-LABEL: test_J_2:
+  define void @test_J_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.J, align 2
+    call void @use_J(%struct.J* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_4:
+;THUMB2-LABEL: test_J_4:
+;NO_NEON-LABEL:test_J_4:
+;THUMB1-LABEL: test_J_4:
+;T1POST-LABEL: test_J_4:
+  define void @test_J_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.J, align 4
+    call void @use_J(%struct.J* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_8:
+;THUMB2-LABEL: test_J_8:
+;NO_NEON-LABEL:test_J_8:
+;THUMB1-LABEL: test_J_8:
+;T1POST-LABEL: test_J_8:
+  define void @test_J_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.J, align 8
+    call void @use_J(%struct.J* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_J_16:
+;THUMB2-LABEL: test_J_16:
+;NO_NEON-LABEL:test_J_16:
+;THUMB1-LABEL: test_J_16:
+;T1POST-LABEL: test_J_16:
+  define void @test_J_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.J, align 16
+    call void @use_J(%struct.J* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_1:
+;THUMB2-LABEL: test_K_1:
+;NO_NEON-LABEL:test_K_1:
+;THUMB1-LABEL: test_K_1:
+;T1POST-LABEL: test_K_1:
+  define void @test_K_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.K, align 1
+    call void @use_K(%struct.K* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_2:
+;THUMB2-LABEL: test_K_2:
+;NO_NEON-LABEL:test_K_2:
+;THUMB1-LABEL: test_K_2:
+;T1POST-LABEL: test_K_2:
+  define void @test_K_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.K, align 2
+    call void @use_K(%struct.K* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_4:
+;THUMB2-LABEL: test_K_4:
+;NO_NEON-LABEL:test_K_4:
+;THUMB1-LABEL: test_K_4:
+;T1POST-LABEL: test_K_4:
+  define void @test_K_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.K, align 4
+    call void @use_K(%struct.K* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_8:
+;THUMB2-LABEL: test_K_8:
+;NO_NEON-LABEL:test_K_8:
+;THUMB1-LABEL: test_K_8:
+;T1POST-LABEL: test_K_8:
+  define void @test_K_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.K, align 8
+    call void @use_K(%struct.K* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_K_16:
+;THUMB2-LABEL: test_K_16:
+;NO_NEON-LABEL:test_K_16:
+;THUMB1-LABEL: test_K_16:
+;T1POST-LABEL: test_K_16:
+  define void @test_K_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.K, align 16
+    call void @use_K(%struct.K* byval align 16 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_1:
+;THUMB2-LABEL: test_L_1:
+;NO_NEON-LABEL:test_L_1:
+;THUMB1-LABEL: test_L_1:
+;T1POST-LABEL: test_L_1:
+  define void @test_L_1() {
+;ARM:         ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;ARM:         bne
+
+;THUMB2:      ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;THUMB2:      bne
+
+;NO_NEON:     ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;NO_NEON:     bne
+
+;THUMB1:      ldrb    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #1
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrb    r{{[0-9]+}}, [{{.*}}], #1
+  entry:
+    %a = alloca %struct.L, align 1
+    call void @use_L(%struct.L* byval align 1 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_2:
+;THUMB2-LABEL: test_L_2:
+;NO_NEON-LABEL:test_L_2:
+;THUMB1-LABEL: test_L_2:
+;T1POST-LABEL: test_L_2:
+  define void @test_L_2() {
+;ARM:         ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;ARM:         bne
+
+;THUMB2:      ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;THUMB2:      bne
+
+;NO_NEON:     ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;NO_NEON:     bne
+
+;THUMB1:      ldrh    r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #2
+;THUMB1:      bne
+
+;T1POST-NOT:  ldrh    r{{[0-9]+}}, [{{.*}}], #2
+  entry:
+    %a = alloca %struct.L, align 2
+    call void @use_L(%struct.L* byval align 2 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_4:
+;THUMB2-LABEL: test_L_4:
+;NO_NEON-LABEL:test_L_4:
+;THUMB1-LABEL: test_L_4:
+;T1POST-LABEL: test_L_4:
+  define void @test_L_4() {
+;ARM:         ldr     r{{[0-9]+}}, [{{.*}}], #4
+;ARM:         bne
+
+;THUMB2:      ldr     r{{[0-9]+}}, [{{.*}}], #4
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  ldr     r{{[0-9]+}}, [{{.*}}], #4
+  entry:
+    %a = alloca %struct.L, align 4
+    call void @use_L(%struct.L* byval align 4 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_8:
+;THUMB2-LABEL: test_L_8:
+;NO_NEON-LABEL:test_L_8:
+;THUMB1-LABEL: test_L_8:
+;T1POST-LABEL: test_L_8:
+  define void @test_L_8() {
+;ARM:         vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.L, align 8
+    call void @use_L(%struct.L* byval align 8 %a)
+    ret void
+  }
+;ARM-LABEL:    test_L_16:
+;THUMB2-LABEL: test_L_16:
+;NO_NEON-LABEL:test_L_16:
+;THUMB1-LABEL: test_L_16:
+;T1POST-LABEL: test_L_16:
+  define void @test_L_16() {
+;ARM:         vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;ARM:         bne
+
+;THUMB2:      vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+;THUMB2:      bne
+
+;NO_NEON:     ldr     r{{[0-9]+}}, [{{.*}}], #4
+;NO_NEON:     bne
+;NO_NEON-NOT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+
+;THUMB1:      ldr     r{{[0-9]+}}, {{\[}}[[BASE:r[0-9]+]]{{\]}}
+;THUMB1:      adds    [[BASE]], #4
+;THUMB1:      bne
+
+;T1POST-NOT:  vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r{{.*}}]!
+  entry:
+    %a = alloca %struct.L, align 16
+    call void @use_L(%struct.L* byval align 16 %a)
+    ret void
+  }
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
index 2961b94d2c1e..19727dabf09e 100644
--- a/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -1,8 +1,11 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s --check-prefix=V7
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi | FileCheck %s -check-prefix=V8
+
 
 define i32 @f(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: subs
 ; CHECK-NOT: cmp
   %cmp = icmp sgt i32 %a, %b
@@ -13,7 +16,7 @@ entry:
 
 define i32 @g(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: g:
+; CHECK-LABEL: g:
 ; CHECK: subs
 ; CHECK-NOT: cmp
   %cmp = icmp slt i32 %a, %b
@@ -24,7 +27,7 @@ entry:
 
 define i32 @h(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: h:
+; CHECK-LABEL: h:
 ; CHECK: subs
 ; CHECK-NOT: cmp
   %cmp = icmp sgt i32 %a, 3
@@ -36,7 +39,7 @@ entry:
 ; rdar://11725965
 define i32 @i(i32 %a, i32 %b) nounwind readnone ssp {
 entry:
-; CHECK: i:
+; CHECK-LABEL: i:
 ; CHECK: subs
 ; CHECK-NOT: cmp
   %cmp = icmp ult i32 %a, %b
@@ -48,7 +51,7 @@ entry:
 ; a swapped sub.
 define i32 @j(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: j:
+; CHECK-LABEL: j:
 ; CHECK: sub
 ; CHECK: cmp
   %cmp = icmp eq i32 %b, %a
@@ -84,3 +87,60 @@ land.lhs.true:                                    ; preds = %num2long.exit
 if.end11:                                         ; preds = %num2long.exit
   ret i32 23
 }
+
+define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
+entry:
+; CHECK-LABEL: float_sel:
+; CHECK-NOT: cmp
+; V8-LABEL: float_sel:
+; V8-NOT: cmp
+; V8: vseleq.f32
+  %sub = sub i32 %a, %b
+  %cmp = icmp eq i32 %sub, 0
+  %ret = select i1 %cmp, float %x, float %y
+  ret float %ret
+}
+
+define double @double_sel(i32 %a, i32 %b, double %x, double %y) {
+entry:
+; CHECK-LABEL: double_sel:
+; CHECK-NOT: cmp
+; V8-LABEL: double_sel:
+; V8-NOT: cmp
+; V8: vseleq.f64
+  %sub = sub i32 %a, %b
+  %cmp = icmp eq i32 %sub, 0
+  %ret = select i1 %cmp, double %x, double %y
+  ret double %ret
+}
+
+@t = common global i32 0
+define double @double_sub(i32 %a, i32 %b, double %x, double %y) {
+entry:
+; CHECK-LABEL: double_sub:
+; CHECK: subs
+; CHECK-NOT: cmp
+; V8-LABEL: double_sub:
+; V8: vsel
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub i32 %a, %b
+  store i32 %sub, i32* @t
+  %ret = select i1 %cmp, double %x, double %y
+  ret double %ret
+}
+
+define double @double_sub_swap(i32 %a, i32 %b, double %x, double %y) {
+entry:
+; V7-LABEL: double_sub_swap:
+; V7-NOT: cmp
+; V7: subs
+; V8-LABEL: double_sub_swap:
+; V8-NOT: subs
+; V8: cmp
+; V8: vsel
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub i32 %b, %a
+  %ret = select i1 %cmp, double %x, double %y
+  store i32 %sub, i32* @t
+  ret double %ret
+}
diff --git a/test/CodeGen/ARM/swift-atomics.ll b/test/CodeGen/ARM/swift-atomics.ll
new file mode 100644
index 000000000000..1d7181557100
--- /dev/null
+++ b/test/CodeGen/ARM/swift-atomics.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=armv7-apple-ios6.0 -mcpu=swift < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios6.0 < %s | FileCheck %s --check-prefix=CHECK-STRICT-ATOMIC
+
+; Release operations only need the store barrier provided by a "dmb ishst",
+
+define void @test_store_release(i32* %p, i32 %v) {
+; CHECK-LABEL: test_store_release:
+; CHECK: dmb ishst
+; CHECK: str
+
+; CHECK-STRICT-ATOMIC: dmb {{ish$}}
+  store atomic i32 %v, i32* %p release, align 4
+  ret void
+}
+
+; However, if sequential consistency is needed *something* must ensure a release
+; followed by an acquire does not get reordered. In that case a "dmb ishst" is
+; not adequate.
+define i32 @test_seq_cst(i32* %p, i32 %v) {
+; CHECK-LABEL: test_seq_cst:
+; CHECK: dmb ishst
+; CHECK: str
+; CHECK: dmb {{ish$}}
+; CHECK: ldr
+; CHECK: dmb {{ish$}}
+
+; CHECK-STRICT-ATOMIC: dmb {{ish$}}
+; CHECK-STRICT-ATOMIC: dmb {{ish$}}
+
+  store atomic i32 %v, i32* %p seq_cst, align 4
+  %val = load atomic i32* %p seq_cst, align 4
+  ret i32 %val
+}
+
+; Also, pure acquire operations should definitely not have an ishst barrier.
+
+define i32 @test_acq(i32* %addr) {
+; CHECK-LABEL: test_acq:
+; CHECK: ldr
+; CHECK: dmb {{ish$}}
+
+; CHECK-STRICT-ATOMIC: dmb {{ish$}}
+  %val = load atomic i32* %addr acquire, align 4
+  ret i32 %val
+}
diff --git a/test/CodeGen/ARM/swift-vldm.ll b/test/CodeGen/ARM/swift-vldm.ll
new file mode 100644
index 000000000000..67ae00ad7db8
--- /dev/null
+++ b/test/CodeGen/ARM/swift-vldm.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mcpu=swift -mtriple=armv7s-apple-ios | FileCheck %s
+
+; Check that we avoid producing vldm instructions using d registers that
+; begin in the most-significant half of a q register. These require more
+; micro-ops on swift and so aren't worth combining.
+
+; CHECK-LABEL: test_vldm
+; CHECK: vldmia r{{[0-9]+}}, {d2, d3, d4}
+; CHECK-NOT: vldmia r{{[0-9]+}}, {d1, d2, d3, d4}
+
+declare fastcc void @force_register(double %d0, double %d1, double %d2, double %d3, double %d4) 
+
+define void @test_vldm(double* %x, double * %y) {
+entry:
+  %addr1 = getelementptr double * %x, i32 1
+  %addr2 = getelementptr double * %x, i32 2
+  %addr3 = getelementptr double * %x, i32 3
+  %d0 = load double * %y
+  %d1 = load double * %x
+  %d2 = load double * %addr1
+  %d3 = load double * %addr2
+  %d4 = load double * %addr3
+  ; We are trying to force x[0-3] in registers d1 to d4 so that we can test we
+  ; don't form a "vldmia rX, {d1, d2, d3, d4}".
+  ; We are relying on the calling convention and that register allocation
+  ; properly coalesces registers.
+  call fastcc void @force_register(double %d0, double %d1, double %d2, double %d3, double %d4)
+  ret void
+}
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
index eb4d0bab929e..d654056eaf3d 100644
--- a/test/CodeGen/ARM/tail-dup.ll
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -2,7 +2,7 @@
 
 ; We should be able to tail-duplicate the basic block containing the indirectbr
 ; into all of its predecessors.
-; CHECK: fn:
+; CHECK-LABEL: fn:
 ; CHECK: mov pc
 ; CHECK: mov pc
 ; CHECK: mov pc
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
index 220b0f173739..37e9a4af3be5 100644
--- a/test/CodeGen/ARM/tail-opts.ll
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -14,7 +14,7 @@ declare i8* @choose(i8*, i8*)
 ; BranchFolding should tail-duplicate the indirect jump to avoid
 ; redundant branching.
 
-; CHECK: tail_duplicate_me:
+; CHECK-LABEL: tail_duplicate_me:
 ; CHECK:      qux
 ; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
 ; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
index 93340c300cd4..9203f166ffa9 100644
--- a/test/CodeGen/ARM/test-sharedidx.ll
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -14,7 +14,7 @@
 ; rdar://10674430
 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
 entry:
-; CHECK: sharedidx:
+; CHECK-LABEL: sharedidx:
   %cmp8 = icmp eq i32 %len, 0
   br i1 %cmp8, label %for.end, label %for.body
 
diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll
index f06e4a4f8ddc..cb42de69f0aa 100644
--- a/test/CodeGen/ARM/this-return.ll
+++ b/test/CodeGen/ARM/this-return.ll
@@ -17,12 +17,12 @@ declare %struct.B* @B_ctor_complete_nothisret(%struct.B*, i32)
 
 define %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) {
 entry:
-; CHECKELF: C_ctor_base:
+; CHECKELF-LABEL: C_ctor_base:
 ; CHECKELF-NOT: mov {{r[0-9]+}}, r0
 ; CHECKELF: bl A_ctor_base
 ; CHECKELF-NOT: mov r0, {{r[0-9]+}}
 ; CHECKELF: b B_ctor_base
-; CHECKT2D: C_ctor_base:
+; CHECKT2D-LABEL: C_ctor_base:
 ; CHECKT2D-NOT: mov {{r[0-9]+}}, r0
 ; CHECKT2D: blx _A_ctor_base
 ; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
@@ -36,12 +36,12 @@ entry:
 
 define %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) {
 entry:
-; CHECKELF: C_ctor_base_nothisret:
+; CHECKELF-LABEL: C_ctor_base_nothisret:
 ; CHECKELF: mov [[SAVETHIS:r[0-9]+]], r0
 ; CHECKELF: bl A_ctor_base_nothisret
 ; CHECKELF: mov r0, [[SAVETHIS]]
 ; CHECKELF-NOT: b B_ctor_base_nothisret
-; CHECKT2D: C_ctor_base_nothisret:
+; CHECKT2D-LABEL: C_ctor_base_nothisret:
 ; CHECKT2D: mov [[SAVETHIS:r[0-9]+]], r0
 ; CHECKT2D: blx _A_ctor_base_nothisret
 ; CHECKT2D: mov r0, [[SAVETHIS]]
@@ -55,9 +55,9 @@ entry:
 
 define %struct.C* @C_ctor_complete(%struct.C* %this, i32 %x) {
 entry:
-; CHECKELF: C_ctor_complete:
+; CHECKELF-LABEL: C_ctor_complete:
 ; CHECKELF: b C_ctor_base
-; CHECKT2D: C_ctor_complete:
+; CHECKT2D-LABEL: C_ctor_complete:
 ; CHECKT2D: b.w _C_ctor_base
   %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x)
   ret %struct.C* %this
@@ -65,9 +65,9 @@ entry:
 
 define %struct.C* @C_ctor_complete_nothisret(%struct.C* %this, i32 %x) {
 entry:
-; CHECKELF: C_ctor_complete_nothisret:
+; CHECKELF-LABEL: C_ctor_complete_nothisret:
 ; CHECKELF-NOT: b C_ctor_base_nothisret
-; CHECKT2D: C_ctor_complete_nothisret:
+; CHECKT2D-LABEL: C_ctor_complete_nothisret:
 ; CHECKT2D-NOT: b.w _C_ctor_base_nothisret
   %call = tail call %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x)
   ret %struct.C* %this
@@ -75,12 +75,12 @@ entry:
 
 define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) {
 entry:
-; CHECKELF: D_ctor_base:
+; CHECKELF-LABEL: D_ctor_base:
 ; CHECKELF-NOT: mov {{r[0-9]+}}, r0
 ; CHECKELF: bl B_ctor_complete
 ; CHECKELF-NOT: mov r0, {{r[0-9]+}}
 ; CHECKELF: b B_ctor_complete
-; CHECKT2D: D_ctor_base:
+; CHECKT2D-LABEL: D_ctor_base:
 ; CHECKT2D-NOT: mov {{r[0-9]+}}, r0
 ; CHECKT2D: blx _B_ctor_complete
 ; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
@@ -93,9 +93,9 @@ entry:
 
 define %struct.E* @E_ctor_base(%struct.E* %this, i32 %x) {
 entry:
-; CHECKELF: E_ctor_base:
+; CHECKELF-LABEL: E_ctor_base:
 ; CHECKELF-NOT: b B_ctor_complete
-; CHECKT2D: E_ctor_base:
+; CHECKT2D-LABEL: E_ctor_base:
 ; CHECKT2D-NOT: b.w _B_ctor_complete
   %b = getelementptr inbounds %struct.E* %this, i32 0, i32 0
   %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
index aa88ae0c1a86..e07e8aab77aa 100644
--- a/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -39,4 +39,4 @@ bb3:
 }
 
 declare noalias i8* @strdup(i8* nocapture) nounwind
-declare i32 @_called_func(i8*, i32*) nounwind
-\ No newline at end of file
+declare i32 @_called_func(i8*, i32*) nounwind
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index a25352c0f03d..47c5dccd6fee 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,14 +1,15 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
 ; PR11107
 
 define i32 @test(i32 %a, i32 %b) {
 entry:
 ; CHECK:        cmp
 ; CHECK-NEXT:   it    mi
-; CHECK-NEXT:   rsbmi
+; CHECK-NEXT:   rsb{{s?}}mi
 ; CHECK-NEXT:   cmp
 ; CHECK-NEXT:   it    mi
-; CHECK-NEXT:   rsbmi
+; CHECK-NEXT:   rsb{{s?}}mi
  %cmp1 = icmp slt i32 %a, 0
  %sub1 = sub nsw i32 0, %a
  %abs1 = select i1 %cmp1, i32 %sub1, i32 %a
diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll
index a5f3c9005af0..ccc9032313b8 100644
--- a/test/CodeGen/ARM/tls-models.ll
+++ b/test/CodeGen/ARM/tls-models.ll
@@ -21,9 +21,9 @@ entry:
   ret i32* @external_gd
 
   ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
-  ; CHECK-NONPIC:   f1:
+  ; CHECK-NONPIC-LABEL:   f1:
   ; CHECK-NONPIC:   external_gd(gottpoff)
-  ; CHECK-PIC:      f1:
+  ; CHECK-PIC-LABEL:      f1:
   ; CHECK-PIC:      external_gd(tlsgd)
 }
 
@@ -33,9 +33,9 @@ entry:
 
   ; Non-PIC code can use local exec, PIC code can use local dynamic,
   ; but that is not implemented, so falls back to general dynamic.
-  ; CHECK-NONPIC:   f2:
+  ; CHECK-NONPIC-LABEL:   f2:
   ; CHECK-NONPIC:   internal_gd(tpoff)
-  ; CHECK-PIC:      f2:
+  ; CHECK-PIC-LABEL:      f2:
   ; CHECK-PIC:      internal_gd(tlsgd)
 }
 
@@ -48,9 +48,9 @@ entry:
 
   ; Non-PIC code can use initial exec, PIC should use local dynamic,
   ; but that is not implemented, so falls back to general dynamic.
-  ; CHECK-NONPIC:   f3:
+  ; CHECK-NONPIC-LABEL:   f3:
   ; CHECK-NONPIC:   external_ld(gottpoff)
-  ; CHECK-PIC:      f3:
+  ; CHECK-PIC-LABEL:      f3:
   ; CHECK-PIC:      external_ld(tlsgd)
 }
 
@@ -60,9 +60,9 @@ entry:
 
   ; Non-PIC code can use local exec, PIC code can use local dynamic,
   ; but that is not implemented, so it falls back to general dynamic.
-  ; CHECK-NONPIC:   f4:
+  ; CHECK-NONPIC-LABEL:   f4:
   ; CHECK-NONPIC:   internal_ld(tpoff)
-  ; CHECK-PIC:      f4:
+  ; CHECK-PIC-LABEL:      f4:
   ; CHECK-PIC:      internal_ld(tlsgd)
 }
 
@@ -74,9 +74,9 @@ entry:
   ret i32* @external_ie
 
   ; Non-PIC and PIC code will use initial exec as specified.
-  ; CHECK-NONPIC:   f5:
+  ; CHECK-NONPIC-LABEL:   f5:
   ; CHECK-NONPIC:   external_ie(gottpoff)
-  ; CHECK-PIC:      f5:
+  ; CHECK-PIC-LABEL:      f5:
   ; CHECK-PIC:      external_ie(gottpoff)
 }
 
@@ -85,9 +85,9 @@ entry:
   ret i32* @internal_ie
 
   ; Non-PIC code can use local exec, PIC code use initial exec as specified.
-  ; CHECK-NONPIC:   f6:
+  ; CHECK-NONPIC-LABEL:   f6:
   ; CHECK-NONPIC:   internal_ie(tpoff)
-  ; CHECK-PIC:      f6:
+  ; CHECK-PIC-LABEL:      f6:
   ; CHECK-PIC:      internal_ie(gottpoff)
 }
 
@@ -99,9 +99,9 @@ entry:
   ret i32* @external_le
 
   ; Non-PIC and PIC code will use local exec as specified.
-  ; CHECK-NONPIC:   f7:
+  ; CHECK-NONPIC-LABEL:   f7:
   ; CHECK-NONPIC:   external_le(tpoff)
-  ; CHECK-PIC:      f7:
+  ; CHECK-PIC-LABEL:      f7:
   ; CHECK-PIC:      external_le(tpoff)
 }
 
@@ -110,8 +110,8 @@ entry:
   ret i32* @internal_le
 
   ; Non-PIC and PIC code will use local exec as specified.
-  ; CHECK-NONPIC:   f8:
+  ; CHECK-NONPIC-LABEL:   f8:
   ; CHECK-NONPIC:   internal_le(tpoff)
-  ; CHECK-PIC:      f8:
+  ; CHECK-PIC-LABEL:      f8:
   ; CHECK-PIC:      internal_le(tpoff)
 }
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index 57370c4de1c2..f04812583114 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -6,10 +6,10 @@
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
 define i32 @f() {
-; CHECK-NONPIC: f:
+; CHECK-NONPIC-LABEL: f:
 ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
 ; CHECK-NONPIC: i(gottpoff)
-; CHECK-PIC: f:
+; CHECK-PIC-LABEL: f:
 ; CHECK-PIC: __tls_get_addr
 entry:
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
@@ -17,10 +17,10 @@ entry:
 }
 
 define i32* @g() {
-; CHECK-NONPIC: g:
+; CHECK-NONPIC-LABEL: g:
 ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
 ; CHECK-NONPIC: i(gottpoff)
-; CHECK-PIC: g:
+; CHECK-PIC-LABEL: g:
 ; CHECK-PIC: __tls_get_addr
 entry:
 	ret i32* @i
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index a4e3c3c0efa9..6cb26e331ba0 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -9,13 +9,13 @@
 ; RUN: llc -mtriple=armv7 -mattr=+nacl-trap -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
-; RUN: llc -fast-isel -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN: llc -verify-machineinstrs -fast-isel -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
 ; RUN: llc -mtriple=armv7 -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7 - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
-; RUN: llc -fast-isel -mtriple=armv7 -filetype=obj %s -o - \
+; RUN: llc -verify-machineinstrs -fast-isel -mtriple=armv7 -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7 - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
 ; rdar://7961298
@@ -23,10 +23,10 @@
 
 define void @t() nounwind {
 entry:
-; INSTR: t:
+; INSTR-LABEL: t:
 ; INSTR: trap
 
-; FUNC: t:
+; FUNC-LABEL: t:
 ; FUNC: bl __trap
 
 ; ENCODING-NACL: f0 de fe e7
@@ -39,10 +39,10 @@ entry:
 
 define void @t2() nounwind {
 entry:
-; INSTR: t2:
+; INSTR-LABEL: t2:
 ; INSTR: trap
 
-; FUNC: t2:
+; FUNC-LABEL: t2:
 ; FUNC: bl __trap
 
 ; ENCODING-NACL: f0 de fe e7
diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll
index fc2aa1e568e2..2172f6b9a6cd 100644
--- a/test/CodeGen/ARM/twoaddrinstr.ll
+++ b/test/CodeGen/ARM/twoaddrinstr.ll
@@ -3,7 +3,7 @@
 
 define void @PR13378() nounwind {
 ; This was orriginally a crasher trying to schedule the instructions.
-; CHECK:      PR13378:
+; CHECK-LABEL:      PR13378:
 ; CHECK:        vld1.32
 ; CHECK-NEXT:   vst1.32
 ; CHECK-NEXT:   vst1.32
diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll
index fa5c0168fefe..19875ce94071 100644
--- a/test/CodeGen/ARM/umulo-32.ll
+++ b/test/CodeGen/ARM/umulo-32.ll
@@ -2,8 +2,8 @@
 
 %umul.ty = type { i32, i1 }
 
-define i32 @func(i32 %a) nounwind {
-; CHECK: func
+define i32 @test1(i32 %a) nounwind {
+; CHECK: test1:
 ; CHECK: muldi3
   %tmp0 = tail call %umul.ty @llvm.umul.with.overflow.i32(i32 %a, i32 37)
   %tmp1 = extractvalue %umul.ty %tmp0, 0
@@ -13,8 +13,8 @@ define i32 @func(i32 %a) nounwind {
 
 declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
 
-define i32 @f(i32 %argc, i8** %argv) ssp {
-; CHECK: func
+define i32 @test2(i32 %argc, i8** %argv) ssp {
+; CHECK: test2:
 ; CHECK: str     r0
 ; CHECK: movs    r2
 ; CHECK: mov     r1
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index 3064202eb3fe..e7ff63f8dbb0 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -7,7 +7,7 @@
 
 define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-; EXPANDED: t:
+; EXPANDED-LABEL: t:
 ; EXPANDED: ldrb [[R2:r[0-9]+]]
 ; EXPANDED: ldrb [[R3:r[0-9]+]]
 ; EXPANDED: ldrb [[R12:r[0-9]+]]
@@ -17,7 +17,7 @@ entry:
 ; EXPANDED: strb [[R3]]
 ; EXPANDED: strb [[R2]]
 
-; UNALIGNED: t:
+; UNALIGNED-LABEL: t:
 ; UNALIGNED: ldr r1
 ; UNALIGNED: str r1
 
@@ -30,13 +30,13 @@ entry:
 
 define void @hword(double* %a, double* %b) nounwind {
 entry:
-; EXPANDED: hword:
+; EXPANDED-LABEL: hword:
 ; EXPANDED-NOT: vld1
 ; EXPANDED: ldrh
 ; EXPANDED-NOT: str1
 ; EXPANDED: strh
 
-; UNALIGNED: hword:
+; UNALIGNED-LABEL: hword:
 ; UNALIGNED: vld1.16
 ; UNALIGNED: vst1.16
   %tmp = load double* %a, align 2
@@ -46,13 +46,13 @@ entry:
 
 define void @byte(double* %a, double* %b) nounwind {
 entry:
-; EXPANDED: byte:
+; EXPANDED-LABEL: byte:
 ; EXPANDED-NOT: vld1
 ; EXPANDED: ldrb
 ; EXPANDED-NOT: str1
 ; EXPANDED: strb
 
-; UNALIGNED: byte:
+; UNALIGNED-LABEL: byte:
 ; UNALIGNED: vld1.8
 ; UNALIGNED: vst1.8
   %tmp = load double* %a, align 1
@@ -62,11 +62,11 @@ entry:
 
 define void @byte_word_ops(i32* %a, i32* %b) nounwind {
 entry:
-; EXPANDED: byte_word_ops:
+; EXPANDED-LABEL: byte_word_ops:
 ; EXPANDED: ldrb
 ; EXPANDED: strb
 
-; UNALIGNED: byte_word_ops:
+; UNALIGNED-LABEL: byte_word_ops:
 ; UNALIGNED-NOT: ldrb
 ; UNALIGNED: ldr
 ; UNALIGNED-NOT: strb
diff --git a/test/CodeGen/ARM/unaligned_load_store_vector.ll b/test/CodeGen/ARM/unaligned_load_store_vector.ll
index 25ae6517937b..968a2c7ad0bb 100644
--- a/test/CodeGen/ARM/unaligned_load_store_vector.ll
+++ b/test/CodeGen/ARM/unaligned_load_store_vector.ll
@@ -4,7 +4,7 @@
 ;SIZE  = 64
 ;TYPE  = <8 x i8>
 define void @v64_v8i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v8i8_1:
+;CHECK-LABEL: v64_v8i8_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -22,7 +22,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <4 x i16>
 define void @v64_v4i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v4i16_1:
+;CHECK-LABEL: v64_v4i16_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -40,7 +40,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <2 x i32>
 define void @v64_v2i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v2i32_1:
+;CHECK-LABEL: v64_v2i32_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -58,7 +58,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <2 x float>
 define void @v64_v2f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v2f32_1:
+;CHECK-LABEL: v64_v2f32_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -76,7 +76,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <16 x i8>
 define void @v128_v16i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v16i8_1:
+;CHECK-LABEL: v128_v16i8_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -94,7 +94,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <8 x i16>
 define void @v128_v8i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v8i16_1:
+;CHECK-LABEL: v128_v8i16_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -112,7 +112,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <4 x i32>
 define void @v128_v4i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v4i32_1:
+;CHECK-LABEL: v128_v4i32_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -130,7 +130,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <2 x i64>
 define void @v128_v2i64_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v2i64_1:
+;CHECK-LABEL: v128_v2i64_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -148,7 +148,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <4 x float>
 define void @v128_v4f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v4f32_1:
+;CHECK-LABEL: v128_v4f32_1:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -166,7 +166,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <8 x i8>
 define void @v64_v8i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v8i8_2:
+;CHECK-LABEL: v64_v8i8_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -184,7 +184,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <4 x i16>
 define void @v64_v4i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v4i16_2:
+;CHECK-LABEL: v64_v4i16_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -202,7 +202,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <2 x i32>
 define void @v64_v2i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v2i32_2:
+;CHECK-LABEL: v64_v2i32_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -220,7 +220,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <2 x float>
 define void @v64_v2f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v2f32_2:
+;CHECK-LABEL: v64_v2f32_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -238,7 +238,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <16 x i8>
 define void @v128_v16i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v16i8_2:
+;CHECK-LABEL: v128_v16i8_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -256,7 +256,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <8 x i16>
 define void @v128_v8i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v8i16_2:
+;CHECK-LABEL: v128_v8i16_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -274,7 +274,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <4 x i32>
 define void @v128_v4i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v4i32_2:
+;CHECK-LABEL: v128_v4i32_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -292,7 +292,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <2 x i64>
 define void @v128_v2i64_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v2i64_2:
+;CHECK-LABEL: v128_v2i64_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -310,7 +310,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <4 x float>
 define void @v128_v4f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v4f32_2:
+;CHECK-LABEL: v128_v4f32_2:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -328,7 +328,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <8 x i8>
 define void @v64_v8i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v8i8_4:
+;CHECK-LABEL: v64_v8i8_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -346,7 +346,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <4 x i16>
 define void @v64_v4i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v4i16_4:
+;CHECK-LABEL: v64_v4i16_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -364,7 +364,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <2 x i32>
 define void @v64_v2i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v2i32_4:
+;CHECK-LABEL: v64_v2i32_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -382,7 +382,7 @@ entry:
 ;SIZE  = 64
 ;TYPE  = <2 x float>
 define void @v64_v2f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v64_v2f32_4:
+;CHECK-LABEL: v64_v2f32_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -400,7 +400,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <16 x i8>
 define void @v128_v16i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v16i8_4:
+;CHECK-LABEL: v128_v16i8_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -418,7 +418,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <8 x i16>
 define void @v128_v8i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v8i16_4:
+;CHECK-LABEL: v128_v8i16_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -436,7 +436,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <4 x i32>
 define void @v128_v4i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v4i32_4:
+;CHECK-LABEL: v128_v4i32_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -454,7 +454,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <2 x i64>
 define void @v128_v2i64_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v2i64_4:
+;CHECK-LABEL: v128_v2i64_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
@@ -472,7 +472,7 @@ entry:
 ;SIZE  = 128
 ;TYPE  = <4 x float>
 define void @v128_v4f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
-;CHECK: v128_v4f32_4:
+;CHECK-LABEL: v128_v4f32_4:
 entry:
   %po = getelementptr i8* %out, i32 0
   %pi = getelementptr i8* %in,  i32 0
diff --git a/test/CodeGen/ARM/undef-sext.ll b/test/CodeGen/ARM/undef-sext.ll
index 2c28da3b6461..c6d76d0017df 100644
--- a/test/CodeGen/ARM/undef-sext.ll
+++ b/test/CodeGen/ARM/undef-sext.ll
@@ -4,7 +4,7 @@
 
 define i32 @t(i32* %a) nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: ldr r0, [r0]
 ; CHECK: bx lr
   %0 = sext i16 undef to i32
diff --git a/test/CodeGen/ARM/unwind-init.ll b/test/CodeGen/ARM/unwind-init.ll
new file mode 100644
index 000000000000..1e12f5510823
--- /dev/null
+++ b/test/CodeGen/ARM/unwind-init.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=armv7-unknown-linux-gnueabi < %s | FileCheck %s
+; Check that all callee-saved registers are saved and restored in functions
+; that call __builtin_unwind_init(). This is its undocumented behavior in gcc,
+; and it is used in compiling libgcc_eh.
+; See also PR8541
+
+declare void @llvm.eh.unwind.init()
+
+define void @calls_unwind_init() {
+  call void @llvm.eh.unwind.init()
+  ret void
+}
+
+; CHECK-LABEL: calls_unwind_init:
+; CHECK: push    {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: vpush   {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK: vpop    {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK: pop     {r4, r5, r6, r7, r8, r9, r10, r11, pc}
diff --git a/test/CodeGen/ARM/v1-constant-fold.ll b/test/CodeGen/ARM/v1-constant-fold.ll
index b86d5db29c4b..eb49a81ab763 100644
--- a/test/CodeGen/ARM/v1-constant-fold.ll
+++ b/test/CodeGen/ARM/v1-constant-fold.ll
@@ -2,7 +2,7 @@
 
 ; PR15611. Check that we don't crash when constant folding v1i32 types.
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 define void @foo(i32 %arg) {
 bb:
   %tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
diff --git a/test/CodeGen/ARM/va_arg.ll b/test/CodeGen/ARM/va_arg.ll
index af477b40a781..f18b49822847 100644
--- a/test/CodeGen/ARM/va_arg.ll
+++ b/test/CodeGen/ARM/va_arg.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s
 ; Test that we correctly align elements when using va_arg
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NOT: bfc
 ; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
 ; CHECK: bfc	[[REG]], #0, #3
@@ -17,7 +17,7 @@ entry:
   ret i64 %0
 }
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK-NOT: bfc
 ; CHECK: add	[[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
 ; CHECK: bfc	[[REG]], #0, #3
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
index 4fe1c434799d..97139e9b6ccc 100644
--- a/test/CodeGen/ARM/vaba.ll
+++ b/test/CodeGen/ARM/vaba.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vabas8:
+;CHECK-LABEL: vabas8:
 ;CHECK: vaba.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -12,7 +12,7 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vabas16:
+;CHECK-LABEL: vabas16:
 ;CHECK: vaba.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -23,7 +23,7 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vabas32:
+;CHECK-LABEL: vabas32:
 ;CHECK: vaba.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -34,7 +34,7 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vabau8:
+;CHECK-LABEL: vabau8:
 ;CHECK: vaba.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -45,7 +45,7 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vabau16:
+;CHECK-LABEL: vabau16:
 ;CHECK: vaba.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -56,7 +56,7 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vabau32:
+;CHECK-LABEL: vabau32:
 ;CHECK: vaba.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -67,7 +67,7 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK: vabaQs8:
+;CHECK-LABEL: vabaQs8:
 ;CHECK: vaba.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -78,7 +78,7 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK: vabaQs16:
+;CHECK-LABEL: vabaQs16:
 ;CHECK: vaba.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -89,7 +89,7 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK: vabaQs32:
+;CHECK-LABEL: vabaQs32:
 ;CHECK: vaba.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -100,7 +100,7 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK: vabaQu8:
+;CHECK-LABEL: vabaQu8:
 ;CHECK: vaba.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -111,7 +111,7 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK: vabaQu16:
+;CHECK-LABEL: vabaQu16:
 ;CHECK: vaba.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -122,7 +122,7 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK: vabaQu32:
+;CHECK-LABEL: vabaQu32:
 ;CHECK: vaba.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -149,7 +149,7 @@ declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind read
 declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vabals8:
+;CHECK-LABEL: vabals8:
 ;CHECK: vabal.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -161,7 +161,7 @@ define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vabals16:
+;CHECK-LABEL: vabals16:
 ;CHECK: vabal.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -173,7 +173,7 @@ define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vabals32:
+;CHECK-LABEL: vabals32:
 ;CHECK: vabal.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -185,7 +185,7 @@ define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vabalu8:
+;CHECK-LABEL: vabalu8:
 ;CHECK: vabal.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -197,7 +197,7 @@ define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vabalu16:
+;CHECK-LABEL: vabalu16:
 ;CHECK: vabal.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -209,7 +209,7 @@ define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vabalu32:
+;CHECK-LABEL: vabalu32:
 ;CHECK: vabal.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index 9ec734fa7641..2eb6d935de83 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vabds8:
+;CHECK-LABEL: vabds8:
 ;CHECK: vabd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vabds16:
+;CHECK-LABEL: vabds16:
 ;CHECK: vabd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vabds32:
+;CHECK-LABEL: vabds32:
 ;CHECK: vabd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vabdu8:
+;CHECK-LABEL: vabdu8:
 ;CHECK: vabd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -37,7 +37,7 @@ define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vabdu16:
+;CHECK-LABEL: vabdu16:
 ;CHECK: vabd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -46,7 +46,7 @@ define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vabdu32:
+;CHECK-LABEL: vabdu32:
 ;CHECK: vabd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -55,7 +55,7 @@ define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vabdf32:
+;CHECK-LABEL: vabdf32:
 ;CHECK: vabd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -64,7 +64,7 @@ define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vabdQs8:
+;CHECK-LABEL: vabdQs8:
 ;CHECK: vabd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -73,7 +73,7 @@ define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vabdQs16:
+;CHECK-LABEL: vabdQs16:
 ;CHECK: vabd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -82,7 +82,7 @@ define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vabdQs32:
+;CHECK-LABEL: vabdQs32:
 ;CHECK: vabd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -91,7 +91,7 @@ define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vabdQu8:
+;CHECK-LABEL: vabdQu8:
 ;CHECK: vabd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -100,7 +100,7 @@ define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vabdQu16:
+;CHECK-LABEL: vabdQu16:
 ;CHECK: vabd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -109,7 +109,7 @@ define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vabdQu32:
+;CHECK-LABEL: vabdQu32:
 ;CHECK: vabd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -118,7 +118,7 @@ define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vabdQf32:
+;CHECK-LABEL: vabdQf32:
 ;CHECK: vabd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -147,7 +147,7 @@ declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind read
 declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vabdls8:
+;CHECK-LABEL: vabdls8:
 ;CHECK: vabdl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -157,7 +157,7 @@ define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vabdls16:
+;CHECK-LABEL: vabdls16:
 ;CHECK: vabdl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -167,7 +167,7 @@ define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vabdls32:
+;CHECK-LABEL: vabdls32:
 ;CHECK: vabdl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -177,7 +177,7 @@ define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vabdlu8:
+;CHECK-LABEL: vabdlu8:
 ;CHECK: vabdl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -187,7 +187,7 @@ define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vabdlu16:
+;CHECK-LABEL: vabdlu16:
 ;CHECK: vabdl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -197,7 +197,7 @@ define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vabdlu32:
+;CHECK-LABEL: vabdlu32:
 ;CHECK: vabdl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index 18ba61f81e65..96dd38ec2e68 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
-;CHECK: vabss8:
+;CHECK-LABEL: vabss8:
 ;CHECK: vabs.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
@@ -9,7 +9,7 @@ define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
-;CHECK: vabss16:
+;CHECK-LABEL: vabss16:
 ;CHECK: vabs.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
@@ -17,7 +17,7 @@ define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
-;CHECK: vabss32:
+;CHECK-LABEL: vabss32:
 ;CHECK: vabs.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
@@ -25,7 +25,7 @@ define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
 }
 
 define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
-;CHECK: vabsf32:
+;CHECK-LABEL: vabsf32:
 ;CHECK: vabs.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
@@ -33,7 +33,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
 }
 
 define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
-;CHECK: vabsQs8:
+;CHECK-LABEL: vabsQs8:
 ;CHECK: vabs.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
@@ -41,7 +41,7 @@ define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
-;CHECK: vabsQs16:
+;CHECK-LABEL: vabsQs16:
 ;CHECK: vabs.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
@@ -49,7 +49,7 @@ define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
-;CHECK: vabsQs32:
+;CHECK-LABEL: vabsQs32:
 ;CHECK: vabs.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
@@ -57,7 +57,7 @@ define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
 }
 
 define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
-;CHECK: vabsQf32:
+;CHECK-LABEL: vabsQf32:
 ;CHECK: vabs.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
@@ -75,7 +75,7 @@ declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
 declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
 
 define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
-;CHECK: vqabss8:
+;CHECK-LABEL: vqabss8:
 ;CHECK: vqabs.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
@@ -83,7 +83,7 @@ define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
-;CHECK: vqabss16:
+;CHECK-LABEL: vqabss16:
 ;CHECK: vqabs.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
@@ -91,7 +91,7 @@ define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
-;CHECK: vqabss32:
+;CHECK-LABEL: vqabss32:
 ;CHECK: vqabs.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
@@ -99,7 +99,7 @@ define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
 }
 
 define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
-;CHECK: vqabsQs8:
+;CHECK-LABEL: vqabsQs8:
 ;CHECK: vqabs.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
@@ -107,7 +107,7 @@ define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
-;CHECK: vqabsQs16:
+;CHECK-LABEL: vqabsQs16:
 ;CHECK: vqabs.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
@@ -115,7 +115,7 @@ define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
-;CHECK: vqabsQs32:
+;CHECK-LABEL: vqabsQs32:
 ;CHECK: vqabs.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index a830e968ff78..fcb5408272f4 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddi8:
+;CHECK-LABEL: vaddi8:
 ;CHECK: vadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddi16:
+;CHECK-LABEL: vaddi16:
 ;CHECK: vadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddi32:
+;CHECK-LABEL: vaddi32:
 ;CHECK: vadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vaddi64:
+;CHECK-LABEL: vaddi64:
 ;CHECK: vadd.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vaddf32:
+;CHECK-LABEL: vaddf32:
 ;CHECK: vadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -46,7 +46,7 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vaddQi8:
+;CHECK-LABEL: vaddQi8:
 ;CHECK: vadd.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -55,7 +55,7 @@ define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vaddQi16:
+;CHECK-LABEL: vaddQi16:
 ;CHECK: vadd.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -64,7 +64,7 @@ define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vaddQi32:
+;CHECK-LABEL: vaddQi32:
 ;CHECK: vadd.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -73,7 +73,7 @@ define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vaddQi64:
+;CHECK-LABEL: vaddQi64:
 ;CHECK: vadd.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -82,7 +82,7 @@ define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vaddQf32:
+;CHECK-LABEL: vaddQf32:
 ;CHECK: vadd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -90,39 +90,8 @@ define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 	ret <4 x float> %tmp3
 }
 
-define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vaddhni16:
-;CHECK: vaddhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vaddhni32:
-;CHECK: vaddhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vaddhni64:
-;CHECK: vaddhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
 define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vraddhni16:
+;CHECK-LABEL: vraddhni16:
 ;CHECK: vraddhn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -131,7 +100,7 @@ define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vraddhni32:
+;CHECK-LABEL: vraddhni32:
 ;CHECK: vraddhn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -140,7 +109,7 @@ define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vraddhni64:
+;CHECK-LABEL: vraddhni64:
 ;CHECK: vraddhn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -152,8 +121,35 @@ declare <8 x i8>  @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind rea
 declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
 
+define <8 x i8> @vaddhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK-LABEL: vaddhni16_natural:
+; CHECK: vaddhn.i16
+  %sum = add <8 x i16> %A, %B
+  %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %trunc = trunc <8 x i16> %shift to <8 x i8>
+  ret <8 x i8> %trunc
+}
+
+define <4 x i16> @vaddhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK-LABEL: vaddhni32_natural:
+; CHECK: vaddhn.i32
+  %sum = add <4 x i32> %A, %B
+  %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+  %trunc = trunc <4 x i32> %shift to <4 x i16>
+  ret <4 x i16> %trunc
+}
+
+define <2 x i32> @vaddhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK-LABEL: vaddhni64_natural:
+; CHECK: vaddhn.i64
+  %sum = add <2 x i64> %A, %B
+  %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
+  %trunc = trunc <2 x i64> %shift to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
 define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddls8:
+;CHECK-LABEL: vaddls8:
 ;CHECK: vaddl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -164,7 +160,7 @@ define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddls16:
+;CHECK-LABEL: vaddls16:
 ;CHECK: vaddl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -175,7 +171,7 @@ define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddls32:
+;CHECK-LABEL: vaddls32:
 ;CHECK: vaddl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -186,7 +182,7 @@ define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddlu8:
+;CHECK-LABEL: vaddlu8:
 ;CHECK: vaddl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -197,7 +193,7 @@ define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddlu16:
+;CHECK-LABEL: vaddlu16:
 ;CHECK: vaddl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -208,7 +204,7 @@ define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddlu32:
+;CHECK-LABEL: vaddlu32:
 ;CHECK: vaddl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -219,7 +215,7 @@ define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddws8:
+;CHECK-LABEL: vaddws8:
 ;CHECK: vaddw.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -229,7 +225,7 @@ define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddws16:
+;CHECK-LABEL: vaddws16:
 ;CHECK: vaddw.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -239,7 +235,7 @@ define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddws32:
+;CHECK-LABEL: vaddws32:
 ;CHECK: vaddw.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -249,7 +245,7 @@ define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddwu8:
+;CHECK-LABEL: vaddwu8:
 ;CHECK: vaddw.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -259,7 +255,7 @@ define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddwu16:
+;CHECK-LABEL: vaddwu16:
 ;CHECK: vaddw.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -269,7 +265,7 @@ define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddwu32:
+;CHECK-LABEL: vaddwu32:
 ;CHECK: vaddw.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index 51f9bdf9718b..7b48441958f6 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
 
 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_andi8:
+;CHECK-LABEL: v_andi8:
 ;CHECK: vand
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_andi16:
+;CHECK-LABEL: v_andi16:
 ;CHECK: vand
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_andi32:
+;CHECK-LABEL: v_andi32:
 ;CHECK: vand
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_andi64:
+;CHECK-LABEL: v_andi64:
 ;CHECK: vand
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_andQi8:
+;CHECK-LABEL: v_andQi8:
 ;CHECK: vand
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -46,7 +46,7 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_andQi16:
+;CHECK-LABEL: v_andQi16:
 ;CHECK: vand
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -55,7 +55,7 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_andQi32:
+;CHECK-LABEL: v_andQi32:
 ;CHECK: vand
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -64,7 +64,7 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_andQi64:
+;CHECK-LABEL: v_andQi64:
 ;CHECK: vand
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -73,7 +73,7 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_bici8:
+;CHECK-LABEL: v_bici8:
 ;CHECK: vbic
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -83,7 +83,7 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_bici16:
+;CHECK-LABEL: v_bici16:
 ;CHECK: vbic
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -93,7 +93,7 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_bici32:
+;CHECK-LABEL: v_bici32:
 ;CHECK: vbic
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -103,7 +103,7 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_bici64:
+;CHECK-LABEL: v_bici64:
 ;CHECK: vbic
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -113,7 +113,7 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_bicQi8:
+;CHECK-LABEL: v_bicQi8:
 ;CHECK: vbic
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -123,7 +123,7 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_bicQi16:
+;CHECK-LABEL: v_bicQi16:
 ;CHECK: vbic
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -133,7 +133,7 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_bicQi32:
+;CHECK-LABEL: v_bicQi32:
 ;CHECK: vbic
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -143,7 +143,7 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_bicQi64:
+;CHECK-LABEL: v_bicQi64:
 ;CHECK: vbic
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -153,7 +153,7 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_eori8:
+;CHECK-LABEL: v_eori8:
 ;CHECK: veor
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -162,7 +162,7 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_eori16:
+;CHECK-LABEL: v_eori16:
 ;CHECK: veor
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -171,7 +171,7 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_eori32:
+;CHECK-LABEL: v_eori32:
 ;CHECK: veor
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -180,7 +180,7 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_eori64:
+;CHECK-LABEL: v_eori64:
 ;CHECK: veor
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -189,7 +189,7 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_eorQi8:
+;CHECK-LABEL: v_eorQi8:
 ;CHECK: veor
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -198,7 +198,7 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_eorQi16:
+;CHECK-LABEL: v_eorQi16:
 ;CHECK: veor
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -207,7 +207,7 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_eorQi32:
+;CHECK-LABEL: v_eorQi32:
 ;CHECK: veor
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -216,7 +216,7 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_eorQi64:
+;CHECK-LABEL: v_eorQi64:
 ;CHECK: veor
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -225,7 +225,7 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
-;CHECK: v_mvni8:
+;CHECK-LABEL: v_mvni8:
 ;CHECK: vmvn
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -233,7 +233,7 @@ define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
-;CHECK: v_mvni16:
+;CHECK-LABEL: v_mvni16:
 ;CHECK: vmvn
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -241,7 +241,7 @@ define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
-;CHECK: v_mvni32:
+;CHECK-LABEL: v_mvni32:
 ;CHECK: vmvn
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
@@ -249,7 +249,7 @@ define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
-;CHECK: v_mvni64:
+;CHECK-LABEL: v_mvni64:
 ;CHECK: vmvn
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
@@ -257,7 +257,7 @@ define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
-;CHECK: v_mvnQi8:
+;CHECK-LABEL: v_mvnQi8:
 ;CHECK: vmvn
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -265,7 +265,7 @@ define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
-;CHECK: v_mvnQi16:
+;CHECK-LABEL: v_mvnQi16:
 ;CHECK: vmvn
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
@@ -273,7 +273,7 @@ define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
-;CHECK: v_mvnQi32:
+;CHECK-LABEL: v_mvnQi32:
 ;CHECK: vmvn
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
@@ -281,7 +281,7 @@ define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
-;CHECK: v_mvnQi64:
+;CHECK-LABEL: v_mvnQi64:
 ;CHECK: vmvn
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
@@ -289,7 +289,7 @@ define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_orri8:
+;CHECK-LABEL: v_orri8:
 ;CHECK: vorr
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -298,7 +298,7 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_orri16:
+;CHECK-LABEL: v_orri16:
 ;CHECK: vorr
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -307,7 +307,7 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_orri32:
+;CHECK-LABEL: v_orri32:
 ;CHECK: vorr
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -316,7 +316,7 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_orri64:
+;CHECK-LABEL: v_orri64:
 ;CHECK: vorr
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -325,7 +325,7 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_orrQi8:
+;CHECK-LABEL: v_orrQi8:
 ;CHECK: vorr
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -334,7 +334,7 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_orrQi16:
+;CHECK-LABEL: v_orrQi16:
 ;CHECK: vorr
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -343,7 +343,7 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_orrQi32:
+;CHECK-LABEL: v_orrQi32:
 ;CHECK: vorr
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -352,7 +352,7 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_orrQi64:
+;CHECK-LABEL: v_orrQi64:
 ;CHECK: vorr
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -361,7 +361,7 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_orni8:
+;CHECK-LABEL: v_orni8:
 ;CHECK: vorn
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -371,7 +371,7 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_orni16:
+;CHECK-LABEL: v_orni16:
 ;CHECK: vorn
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -381,7 +381,7 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_orni32:
+;CHECK-LABEL: v_orni32:
 ;CHECK: vorn
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -391,7 +391,7 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_orni64:
+;CHECK-LABEL: v_orni64:
 ;CHECK: vorn
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -401,7 +401,7 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_ornQi8:
+;CHECK-LABEL: v_ornQi8:
 ;CHECK: vorn
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -411,7 +411,7 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_ornQi16:
+;CHECK-LABEL: v_ornQi16:
 ;CHECK: vorn
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -421,7 +421,7 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_ornQi32:
+;CHECK-LABEL: v_ornQi32:
 ;CHECK: vorn
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -431,7 +431,7 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_ornQi64:
+;CHECK-LABEL: v_ornQi64:
 ;CHECK: vorn
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -441,7 +441,7 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vtsti8:
+;CHECK-LABEL: vtsti8:
 ;CHECK: vtst.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -452,7 +452,7 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vtsti16:
+;CHECK-LABEL: vtsti16:
 ;CHECK: vtst.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -463,7 +463,7 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vtsti32:
+;CHECK-LABEL: vtsti32:
 ;CHECK: vtst.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -474,7 +474,7 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vtstQi8:
+;CHECK-LABEL: vtstQi8:
 ;CHECK: vtst.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -485,7 +485,7 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vtstQi16:
+;CHECK-LABEL: vtstQi16:
 ;CHECK: vtst.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -496,7 +496,7 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vtstQi32:
+;CHECK-LABEL: vtstQi32:
 ;CHECK: vtst.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -507,7 +507,7 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
-; CHECK: v_orrimm:
+; CHECK-LABEL: v_orrimm:
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vorr
@@ -527,7 +527,7 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
-; CHECK: v_bicimm:
+; CHECK-LABEL: v_bicimm:
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vbic
@@ -537,7 +537,7 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
 }
 
 define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
-; CHECK: v_bicimmQ:
+; CHECK-LABEL: v_bicimmQ:
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vbic
diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll
index ffda0a51bdd0..5e033fe2a647 100644
--- a/test/CodeGen/ARM/vbsl-constant.ll
+++ b/test/CodeGen/ARM/vbsl-constant.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: v_bsli8:
+;CHECK-LABEL: v_bsli8:
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
@@ -15,7 +15,7 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: v_bsli16:
+;CHECK-LABEL: v_bsli16:
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
@@ -29,7 +29,7 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: v_bsli32:
+;CHECK-LABEL: v_bsli32:
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
@@ -43,7 +43,7 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
-;CHECK: v_bsli64:
+;CHECK-LABEL: v_bsli64:
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vldr
@@ -58,7 +58,7 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 }
 
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK: v_bslQi8:
+;CHECK-LABEL: v_bslQi8:
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
@@ -72,7 +72,7 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK: v_bslQi16:
+;CHECK-LABEL: v_bslQi16:
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
@@ -86,7 +86,7 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 }
 
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK: v_bslQi32:
+;CHECK-LABEL: v_bslQi32:
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
@@ -100,7 +100,7 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 }
 
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
-;CHECK: v_bslQi64:
+;CHECK-LABEL: v_bslQi64:
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vld1.64
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
index 750fb0de5383..1e53e51f8bb0 100644
--- a/test/CodeGen/ARM/vbsl.ll
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -3,7 +3,7 @@
 ; rdar://12471808
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: v_bsli8:
+;CHECK-LABEL: v_bsli8:
 ;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -16,7 +16,7 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: v_bsli16:
+;CHECK-LABEL: v_bsli16:
 ;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -29,7 +29,7 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: v_bsli32:
+;CHECK-LABEL: v_bsli32:
 ;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -42,7 +42,7 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
-;CHECK: v_bsli64:
+;CHECK-LABEL: v_bsli64:
 ;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -55,7 +55,7 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 }
 
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK: v_bslQi8:
+;CHECK-LABEL: v_bslQi8:
 ;CHECK: vbsl
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -68,7 +68,7 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK: v_bslQi16:
+;CHECK-LABEL: v_bslQi16:
 ;CHECK: vbsl
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -81,7 +81,7 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 }
 
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK: v_bslQi32:
+;CHECK-LABEL: v_bslQi32:
 ;CHECK: vbsl
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -94,7 +94,7 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 }
 
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
-;CHECK: v_bslQi64:
+;CHECK-LABEL: v_bslQi64:
 ;CHECK: vbsl
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -107,84 +107,84 @@ define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwin
 }
 
 define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: vbsl
   %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind
   ret <8 x i8> %vbsl.i
 }
 
 define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: vbsl
   %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind
   ret <4 x i16> %vbsl3.i
 }
 
 define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: vbsl
   %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind
   ret <2 x i32> %vbsl3.i
 }
 
 define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone optsize ssp {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: vbsl
   %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
   ret <2 x float> %vbsl4.i
 }
 
 define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
-; CHECK: g1:
+; CHECK-LABEL: g1:
 ; CHECK: vbsl
   %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind
   ret <16 x i8> %vbsl.i
 }
 
 define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp {
-; CHECK: g2:
+; CHECK-LABEL: g2:
 ; CHECK: vbsl
   %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind
   ret <8 x i16> %vbsl3.i
 }
 
 define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK: g3:
+; CHECK-LABEL: g3:
 ; CHECK: vbsl
   %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind
   ret <4 x i32> %vbsl3.i
 }
 
 define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp {
-; CHECK: g4:
+; CHECK-LABEL: g4:
 ; CHECK: vbsl
   %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind
   ret <4 x float> %vbsl4.i
 }
 
 define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
-; CHECK: test_vbsl_s64:
+; CHECK-LABEL: test_vbsl_s64:
 ; CHECK: vbsl d
   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
   ret <1 x i64> %vbsl3.i
 }
 
 define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
-; CHECK: test_vbsl_u64:
+; CHECK-LABEL: test_vbsl_u64:
 ; CHECK: vbsl d
   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
   ret <1 x i64> %vbsl3.i
 }
 
 define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK: test_vbslq_s64:
+; CHECK-LABEL: test_vbslq_s64:
 ; CHECK: vbsl q
   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
   ret <2 x i64> %vbsl3.i
 }
 
 define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK: test_vbslq_u64:
+; CHECK-LABEL: test_vbslq_u64:
 ; CHECK: vbsl q
   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
   ret <2 x i64> %vbsl3.i
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index 051c349a06a4..0a1f2ebe4f83 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vceqi8:
+;CHECK-LABEL: vceqi8:
 ;CHECK: vceq.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -11,7 +11,7 @@ define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vceqi16:
+;CHECK-LABEL: vceqi16:
 ;CHECK: vceq.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -21,7 +21,7 @@ define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vceqi32:
+;CHECK-LABEL: vceqi32:
 ;CHECK: vceq.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -31,7 +31,7 @@ define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vceqf32:
+;CHECK-LABEL: vceqf32:
 ;CHECK: vceq.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -41,7 +41,7 @@ define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vceqQi8:
+;CHECK-LABEL: vceqQi8:
 ;CHECK: vceq.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -51,7 +51,7 @@ define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vceqQi16:
+;CHECK-LABEL: vceqQi16:
 ;CHECK: vceq.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -61,7 +61,7 @@ define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vceqQi32:
+;CHECK-LABEL: vceqQi32:
 ;CHECK: vceq.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -71,7 +71,7 @@ define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vceqQf32:
+;CHECK-LABEL: vceqQf32:
 ;CHECK: vceq.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -81,7 +81,7 @@ define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
-;CHECK: vceqi8Z:
+;CHECK-LABEL: vceqi8Z:
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vceq.i8
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index bf5f0b9efb2f..81a59dbdfe90 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vcges8:
+;CHECK-LABEL: vcges8:
 ;CHECK: vcge.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -11,7 +11,7 @@ define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vcges16:
+;CHECK-LABEL: vcges16:
 ;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -21,7 +21,7 @@ define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vcges32:
+;CHECK-LABEL: vcges32:
 ;CHECK: vcge.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -31,7 +31,7 @@ define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vcgeu8:
+;CHECK-LABEL: vcgeu8:
 ;CHECK: vcge.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -41,7 +41,7 @@ define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vcgeu16:
+;CHECK-LABEL: vcgeu16:
 ;CHECK: vcge.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -51,7 +51,7 @@ define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vcgeu32:
+;CHECK-LABEL: vcgeu32:
 ;CHECK: vcge.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -61,7 +61,7 @@ define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcgef32:
+;CHECK-LABEL: vcgef32:
 ;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -71,7 +71,7 @@ define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vcgeQs8:
+;CHECK-LABEL: vcgeQs8:
 ;CHECK: vcge.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -81,7 +81,7 @@ define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vcgeQs16:
+;CHECK-LABEL: vcgeQs16:
 ;CHECK: vcge.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -91,7 +91,7 @@ define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vcgeQs32:
+;CHECK-LABEL: vcgeQs32:
 ;CHECK: vcge.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -101,7 +101,7 @@ define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vcgeQu8:
+;CHECK-LABEL: vcgeQu8:
 ;CHECK: vcge.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -111,7 +111,7 @@ define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vcgeQu16:
+;CHECK-LABEL: vcgeQu16:
 ;CHECK: vcge.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -121,7 +121,7 @@ define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vcgeQu32:
+;CHECK-LABEL: vcgeQu32:
 ;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -131,7 +131,7 @@ define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vcgeQf32:
+;CHECK-LABEL: vcgeQf32:
 ;CHECK: vcge.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -141,7 +141,7 @@ define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vacgef32:
+;CHECK-LABEL: vacgef32:
 ;CHECK: vacge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -150,7 +150,7 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vacgeQf32:
+;CHECK-LABEL: vacgeQf32:
 ;CHECK: vacge.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -162,7 +162,7 @@ declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readn
 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
 
 define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
-;CHECK: vcgei8Z:
+;CHECK-LABEL: vcgei8Z:
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcge.s8
@@ -173,7 +173,7 @@ define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
 }
 
 define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
-;CHECK: vclei8Z:
+;CHECK-LABEL: vclei8Z:
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcle.s8
@@ -187,7 +187,7 @@ define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
 ; Floating-point comparisons against zero produce results with integer
 ; elements, not floating-point elements.
 define void @test_vclez_fp() nounwind optsize {
-;CHECK: test_vclez_fp
+;CHECK-LABEL: test_vclez_fp:
 ;CHECK: vcle.f32
 entry:
   %0 = fcmp ole <4 x float> undef, zeroinitializer
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 2243bac91fb1..056866fe994b 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
 
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vcgts8:
+;CHECK-LABEL: vcgts8:
 ;CHECK: vcgt.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -12,7 +12,7 @@ define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vcgts16:
+;CHECK-LABEL: vcgts16:
 ;CHECK: vcgt.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -22,7 +22,7 @@ define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vcgts32:
+;CHECK-LABEL: vcgts32:
 ;CHECK: vcgt.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -32,7 +32,7 @@ define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vcgtu8:
+;CHECK-LABEL: vcgtu8:
 ;CHECK: vcgt.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -42,7 +42,7 @@ define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vcgtu16:
+;CHECK-LABEL: vcgtu16:
 ;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -52,7 +52,7 @@ define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vcgtu32:
+;CHECK-LABEL: vcgtu32:
 ;CHECK: vcgt.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -62,7 +62,7 @@ define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcgtf32:
+;CHECK-LABEL: vcgtf32:
 ;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -72,7 +72,7 @@ define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vcgtQs8:
+;CHECK-LABEL: vcgtQs8:
 ;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -82,7 +82,7 @@ define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vcgtQs16:
+;CHECK-LABEL: vcgtQs16:
 ;CHECK: vcgt.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -92,7 +92,7 @@ define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vcgtQs32:
+;CHECK-LABEL: vcgtQs32:
 ;CHECK: vcgt.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -102,7 +102,7 @@ define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vcgtQu8:
+;CHECK-LABEL: vcgtQu8:
 ;CHECK: vcgt.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -112,7 +112,7 @@ define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vcgtQu16:
+;CHECK-LABEL: vcgtQu16:
 ;CHECK: vcgt.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -122,7 +122,7 @@ define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vcgtQu32:
+;CHECK-LABEL: vcgtQu32:
 ;CHECK: vcgt.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -132,7 +132,7 @@ define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vcgtQf32:
+;CHECK-LABEL: vcgtQf32:
 ;CHECK: vcgt.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -142,7 +142,7 @@ define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vacgtf32:
+;CHECK-LABEL: vacgtf32:
 ;CHECK: vacgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -151,7 +151,7 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vacgtQf32:
+;CHECK-LABEL: vacgtQf32:
 ;CHECK: vacgt.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -161,7 +161,7 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 
 ; rdar://7923010
 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vcgt_zext:
+;CHECK-LABEL: vcgt_zext:
 ;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
 ;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
 ;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
@@ -176,7 +176,7 @@ declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readn
 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
 
 define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
-;CHECK: vcgti8Z:
+;CHECK-LABEL: vcgti8Z:
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcgt.s8
@@ -187,7 +187,7 @@ define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
 }
 
 define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
-;CHECK: vclti8Z:
+;CHECK-LABEL: vclti8Z:
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vclt.s8
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 9f55c24b4029..0b539799833d 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -2,7 +2,7 @@
 ; NB: this tests vcnt, vclz, and vcls
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
-;CHECK: vcnt8:
+;CHECK-LABEL: vcnt8:
 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
@@ -10,7 +10,7 @@ define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 }
 
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
-;CHECK: vcntQ8:
+;CHECK-LABEL: vcntQ8:
 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
@@ -21,7 +21,7 @@ declare <8 x i8>  @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
 
 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
-;CHECK: vclz8:
+;CHECK-LABEL: vclz8:
 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
@@ -29,7 +29,7 @@ define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
-;CHECK: vclz16:
+;CHECK-LABEL: vclz16:
 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
@@ -37,7 +37,7 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
-;CHECK: vclz32:
+;CHECK-LABEL: vclz32:
 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
@@ -45,7 +45,7 @@ define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 }
 
 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
-;CHECK: vclzQ8:
+;CHECK-LABEL: vclzQ8:
 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
@@ -53,7 +53,7 @@ define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
-;CHECK: vclzQ16:
+;CHECK-LABEL: vclzQ16:
 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
@@ -61,7 +61,7 @@ define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
-;CHECK: vclzQ32:
+;CHECK-LABEL: vclzQ32:
 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
@@ -77,7 +77,7 @@ declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
 
 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
-;CHECK: vclss8:
+;CHECK-LABEL: vclss8:
 ;CHECK: vcls.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
@@ -85,7 +85,7 @@ define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
-;CHECK: vclss16:
+;CHECK-LABEL: vclss16:
 ;CHECK: vcls.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
@@ -93,7 +93,7 @@ define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
-;CHECK: vclss32:
+;CHECK-LABEL: vclss32:
 ;CHECK: vcls.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
@@ -101,7 +101,7 @@ define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 }
 
 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
-;CHECK: vclsQs8:
+;CHECK-LABEL: vclsQs8:
 ;CHECK: vcls.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
@@ -109,7 +109,7 @@ define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
-;CHECK: vclsQs16:
+;CHECK-LABEL: vclsQs16:
 ;CHECK: vcls.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
@@ -117,7 +117,7 @@ define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
-;CHECK: vclsQs32:
+;CHECK-LABEL: vclsQs32:
 ;CHECK: vcls.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
diff --git a/test/CodeGen/ARM/vcvt-cost.ll b/test/CodeGen/ARM/vcvt-cost.ll
index 0d45c40b8814..5e56a5b34cf0 100644
--- a/test/CodeGen/ARM/vcvt-cost.ll
+++ b/test/CodeGen/ARM/vcvt-cost.ll
@@ -4,7 +4,7 @@
 ; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
 %T0_5 = type <8 x i8>
 %T1_5 = type <8 x i32>
-; CHECK: func_cvt5:
+; CHECK-LABEL: func_cvt5:
 define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
 ; CHECK: vmovl.s8
 ; CHECK: vmovl.s16
@@ -20,7 +20,7 @@ define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
 ;; is improved the cost needs to change.
 %TA0_5 = type <8 x i8>
 %TA1_5 = type <8 x i32>
-; CHECK: func_cvt1:
+; CHECK-LABEL: func_cvt1:
 define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
 ; CHECK: vmovl.u8
 ; CHECK: vmovl.u16
@@ -35,7 +35,7 @@ define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
 
 %T0_51 = type <8 x i32>
 %T1_51 = type <8 x i8>
-; CHECK: func_cvt51:
+; CHECK-LABEL: func_cvt51:
 define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i32
@@ -50,7 +50,7 @@ define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
 
 %TT0_5 = type <16 x i8>
 %TT1_5 = type <16 x i32>
-; CHECK: func_cvt52:
+; CHECK-LABEL: func_cvt52:
 define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
 ; CHECK: vmovl.s16
 ; CHECK: vmovl.s16
@@ -67,7 +67,7 @@ define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
 ;; is improved the cost needs to change.
 %TTA0_5 = type <16 x i8>
 %TTA1_5 = type <16 x i32>
-; CHECK: func_cvt12:
+; CHECK-LABEL: func_cvt12:
 define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
 ; CHECK: vmovl.u16
 ; CHECK: vmovl.u16
@@ -83,7 +83,7 @@ define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
 
 %TT0_51 = type <16 x i32>
 %TT1_51 = type <16 x i8>
-; CHECK: func_cvt512:
+; CHECK-LABEL: func_cvt512:
 define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i32
@@ -99,7 +99,7 @@ define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
   ret void
 }
 
-; CHECK: sext_v4i16_v4i64:
+; CHECK-LABEL: sext_v4i16_v4i64:
 define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
@@ -111,7 +111,7 @@ define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
   ret void
 }
 
-; CHECK: zext_v4i16_v4i64:
+; CHECK-LABEL: zext_v4i16_v4i64:
 define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
@@ -123,7 +123,7 @@ define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
   ret void
 }
 
-; CHECK: sext_v8i16_v8i64:
+; CHECK-LABEL: sext_v8i16_v8i64:
 define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
@@ -137,7 +137,7 @@ define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
   ret void
 }
 
-; CHECK: zext_v8i16_v8i64:
+; CHECK-LABEL: zext_v8i16_v8i64:
 define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
diff --git a/test/CodeGen/ARM/vcvt-v8.ll b/test/CodeGen/ARM/vcvt-v8.ll
new file mode 100644
index 000000000000..c449009e1e1f
--- /dev/null
+++ b/test/CodeGen/ARM/vcvt-v8.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=armv8 -mattr=+neon | FileCheck %s
+define <4 x i32> @vcvtasq(<4 x float>* %A) {
+; CHECK: vcvtasq
+; CHECK: vcvta.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtasd(<2 x float>* %A) {
+; CHECK: vcvtasd
+; CHECK: vcvta.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtnsq(<4 x float>* %A) {
+; CHECK: vcvtnsq
+; CHECK: vcvtn.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtnsd(<2 x float>* %A) {
+; CHECK: vcvtnsd
+; CHECK: vcvtn.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtpsq(<4 x float>* %A) {
+; CHECK: vcvtpsq
+; CHECK: vcvtp.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtpsd(<2 x float>* %A) {
+; CHECK: vcvtpsd
+; CHECK: vcvtp.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtmsq(<4 x float>* %A) {
+; CHECK: vcvtmsq
+; CHECK: vcvtm.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtmsd(<2 x float>* %A) {
+; CHECK: vcvtmsd
+; CHECK: vcvtm.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtauq(<4 x float>* %A) {
+; CHECK: vcvtauq
+; CHECK: vcvta.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtaud(<2 x float>* %A) {
+; CHECK: vcvtaud
+; CHECK: vcvta.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtnuq(<4 x float>* %A) {
+; CHECK: vcvtnuq
+; CHECK: vcvtn.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtnud(<2 x float>* %A) {
+; CHECK: vcvtnud
+; CHECK: vcvtn.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtpuq(<4 x float>* %A) {
+; CHECK: vcvtpuq
+; CHECK: vcvtp.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtpud(<2 x float>* %A) {
+; CHECK: vcvtpud
+; CHECK: vcvtp.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtmuq(<4 x float>* %A) {
+; CHECK: vcvtmuq
+; CHECK: vcvtm.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %tmp1)
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @vcvtmud(<2 x float>* %A) {
+; CHECK: vcvtmud
+; CHECK: vcvtm.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %tmp1)
+  ret <2 x i32> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index c078f493094b..4f17dc559480 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s
 
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
-;CHECK: vcvt_f32tos32:
+;CHECK-LABEL: vcvt_f32tos32:
 ;CHECK: vcvt.s32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
@@ -9,7 +9,7 @@ define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
 }
 
 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
-;CHECK: vcvt_f32tou32:
+;CHECK-LABEL: vcvt_f32tou32:
 ;CHECK: vcvt.u32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
@@ -17,7 +17,7 @@ define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
 }
 
 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
-;CHECK: vcvt_s32tof32:
+;CHECK-LABEL: vcvt_s32tof32:
 ;CHECK: vcvt.f32.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
@@ -25,7 +25,7 @@ define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
 }
 
 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
-;CHECK: vcvt_u32tof32:
+;CHECK-LABEL: vcvt_u32tof32:
 ;CHECK: vcvt.f32.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
@@ -33,7 +33,7 @@ define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
 }
 
 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
-;CHECK: vcvtQ_f32tos32:
+;CHECK-LABEL: vcvtQ_f32tos32:
 ;CHECK: vcvt.s32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
@@ -41,7 +41,7 @@ define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
 }
 
 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
-;CHECK: vcvtQ_f32tou32:
+;CHECK-LABEL: vcvtQ_f32tou32:
 ;CHECK: vcvt.u32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
@@ -49,7 +49,7 @@ define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
 }
 
 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
-;CHECK: vcvtQ_s32tof32:
+;CHECK-LABEL: vcvtQ_s32tof32:
 ;CHECK: vcvt.f32.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
@@ -57,7 +57,7 @@ define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
 }
 
 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
-;CHECK: vcvtQ_u32tof32:
+;CHECK-LABEL: vcvtQ_u32tof32:
 ;CHECK: vcvt.f32.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
@@ -65,7 +65,7 @@ define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
-;CHECK: vcvt_n_f32tos32:
+;CHECK-LABEL: vcvt_n_f32tos32:
 ;CHECK: vcvt.s32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
@@ -73,7 +73,7 @@ define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
 }
 
 define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
-;CHECK: vcvt_n_f32tou32:
+;CHECK-LABEL: vcvt_n_f32tou32:
 ;CHECK: vcvt.u32.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
@@ -81,7 +81,7 @@ define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
 }
 
 define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
-;CHECK: vcvt_n_s32tof32:
+;CHECK-LABEL: vcvt_n_s32tof32:
 ;CHECK: vcvt.f32.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
@@ -89,7 +89,7 @@ define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
 }
 
 define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
-;CHECK: vcvt_n_u32tof32:
+;CHECK-LABEL: vcvt_n_u32tof32:
 ;CHECK: vcvt.f32.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
@@ -102,7 +102,7 @@ declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwi
 declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
 
 define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
-;CHECK: vcvtQ_n_f32tos32:
+;CHECK-LABEL: vcvtQ_n_f32tos32:
 ;CHECK: vcvt.s32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
@@ -110,7 +110,7 @@ define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
 }
 
 define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
-;CHECK: vcvtQ_n_f32tou32:
+;CHECK-LABEL: vcvtQ_n_f32tou32:
 ;CHECK: vcvt.u32.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
@@ -118,7 +118,7 @@ define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
 }
 
 define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
-;CHECK: vcvtQ_n_s32tof32:
+;CHECK-LABEL: vcvtQ_n_s32tof32:
 ;CHECK: vcvt.f32.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
@@ -126,7 +126,7 @@ define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
 }
 
 define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
-;CHECK: vcvtQ_n_u32tof32:
+;CHECK-LABEL: vcvtQ_n_u32tof32:
 ;CHECK: vcvt.f32.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
@@ -139,7 +139,7 @@ declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwi
 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
 
 define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
-;CHECK: vcvt_f16tof32:
+;CHECK-LABEL: vcvt_f16tof32:
 ;CHECK: vcvt.f32.f16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
@@ -147,7 +147,7 @@ define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
-;CHECK: vcvt_f32tof16:
+;CHECK-LABEL: vcvt_f32tof16:
 ;CHECK: vcvt.f16.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
@@ -156,3 +156,44 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
 
 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
+
+
+define <4 x i16> @fix_float_to_i16(<4 x float> %in) {
+; CHECK-LABEL: fix_float_to_i16:
+; CHECK: vcvt.u32.f32 [[TMP:q[0-9]+]], {{q[0-9]+}}, #1
+; CHECK: vmovn.i32 {{d[0-9]+}}, [[TMP]]
+
+  %scale = fmul <4 x float> %in, <float 2.0, float 2.0, float 2.0, float 2.0>
+  %conv = fptoui <4 x float> %scale to <4 x i16>
+  ret <4 x i16> %conv
+}
+
+define <2 x i64> @fix_float_to_i64(<2 x float> %in) {
+; CHECK-LABEL: fix_float_to_i64:
+; CHECK: bl
+; CHECK: bl
+
+  %scale = fmul <2 x float> %in, <float 2.0, float 2.0>
+  %conv = fptoui <2 x float> %scale to <2 x i64>
+  ret <2 x i64> %conv
+}
+
+define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
+; CHECK-LABEL: fix_double_to_i16:
+; CHECK: vcvt.s32.f64
+; CHECK: vcvt.s32.f64
+
+  %scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
+  %conv = fptoui <4 x double> %scale to <4 x i16>
+  ret <4 x i16> %conv
+}
+
+define <2 x i64> @fix_double_to_i64(<2 x double> %in) {
+; CHECK-LABEL: fix_double_to_i64:
+; CHECK: bl
+; CHECK: bl
+  %scale = fmul <2 x double> %in, <double 2.0, double 2.0>
+  %conv = fptoui <2 x double> %scale to <2 x i64>
+  ret <2 x i64> %conv
+}
+
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index e6f1338b8539..96807f7280f8 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -95,3 +95,44 @@ entry:
 }
 
 declare void @foo_float32x4_t(<4 x float>)
+
+define <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) {
+; CHECK-LABEL: fix_unsigned_i16_to_float:
+; CHECK: vmovl.u16 [[TMP:q[0-9]+]], {{d[0-9]+}}
+; CHECK: vcvt.f32.u32 {{q[0-9]+}}, [[TMP]], #1
+
+    %conv = uitofp <4 x i16> %in to <4 x float>
+    %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
+    ret <4 x float> %shift
+}
+
+define <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) {
+; CHECK-LABEL: fix_signed_i16_to_float:
+; CHECK: vmovl.s16 [[TMP:q[0-9]+]], {{d[0-9]+}}
+; CHECK: vcvt.f32.s32 {{q[0-9]+}}, [[TMP]], #1
+
+    %conv = sitofp <4 x i16> %in to <4 x float>
+    %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
+    ret <4 x float> %shift
+}
+
+define <2 x float> @fix_i64_to_float(<2 x i64> %in) {
+; CHECK-LABEL: fix_i64_to_float:
+; CHECK: bl
+; CHECK: bl
+
+    %conv = uitofp <2 x i64> %in to <2 x float>
+    %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
+    ret <2 x float> %shift
+}
+
+define <2 x double> @fix_i64_to_double(<2 x i64> %in) {
+; CHECK-LABEL: fix_i64_to_double:
+; CHECK: bl
+; CHECK: bl
+
+    %conv = uitofp <2 x i64> %in to <2 x double>
+    %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
+    ret <2 x double> %shift
+}
+
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 2cf94d63ca14..b24be2654dfc 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_dup8(i8 %A) nounwind {
-;CHECK: v_dup8:
+;CHECK-LABEL: v_dup8:
 ;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
@@ -15,7 +15,7 @@ define <8 x i8> @v_dup8(i8 %A) nounwind {
 }
 
 define <4 x i16> @v_dup16(i16 %A) nounwind {
-;CHECK: v_dup16:
+;CHECK-LABEL: v_dup16:
 ;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
@@ -25,7 +25,7 @@ define <4 x i16> @v_dup16(i16 %A) nounwind {
 }
 
 define <2 x i32> @v_dup32(i32 %A) nounwind {
-;CHECK: v_dup32:
+;CHECK-LABEL: v_dup32:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
@@ -33,7 +33,7 @@ define <2 x i32> @v_dup32(i32 %A) nounwind {
 }
 
 define <2 x float> @v_dupfloat(float %A) nounwind {
-;CHECK: v_dupfloat:
+;CHECK-LABEL: v_dupfloat:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
@@ -41,7 +41,7 @@ define <2 x float> @v_dupfloat(float %A) nounwind {
 }
 
 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
-;CHECK: v_dupQ8:
+;CHECK-LABEL: v_dupQ8:
 ;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
@@ -63,7 +63,7 @@ define <16 x i8> @v_dupQ8(i8 %A) nounwind {
 }
 
 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
-;CHECK: v_dupQ16:
+;CHECK-LABEL: v_dupQ16:
 ;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
@@ -77,7 +77,7 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind {
 }
 
 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
-;CHECK: v_dupQ32:
+;CHECK-LABEL: v_dupQ32:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
@@ -87,7 +87,7 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
 }
 
 define <4 x float> @v_dupQfloat(float %A) nounwind {
-;CHECK: v_dupQfloat:
+;CHECK-LABEL: v_dupQfloat:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
 	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
@@ -99,7 +99,7 @@ define <4 x float> @v_dupQfloat(float %A) nounwind {
 ; Check to make sure it works with shuffles, too.
 
 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
-;CHECK: v_shuffledup8:
+;CHECK-LABEL: v_shuffledup8:
 ;CHECK: vdup.8
 	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
@@ -107,7 +107,7 @@ define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
 }
 
 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
-;CHECK: v_shuffledup16:
+;CHECK-LABEL: v_shuffledup16:
 ;CHECK: vdup.16
 	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -115,7 +115,7 @@ define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
 }
 
 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
-;CHECK: v_shuffledup32:
+;CHECK-LABEL: v_shuffledup32:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -123,7 +123,7 @@ define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
 }
 
 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
-;CHECK: v_shuffledupfloat:
+;CHECK-LABEL: v_shuffledupfloat:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -131,7 +131,7 @@ define <2 x float> @v_shuffledupfloat(float %A) nounwind {
 }
 
 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
-;CHECK: v_shuffledupQ8:
+;CHECK-LABEL: v_shuffledupQ8:
 ;CHECK: vdup.8
 	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -139,7 +139,7 @@ define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
 }
 
 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
-;CHECK: v_shuffledupQ16:
+;CHECK-LABEL: v_shuffledupQ16:
 ;CHECK: vdup.16
 	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -147,7 +147,7 @@ define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
 }
 
 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
-;CHECK: v_shuffledupQ32:
+;CHECK-LABEL: v_shuffledupQ32:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -155,7 +155,7 @@ define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
 }
 
 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
-;CHECK: v_shuffledupQfloat:
+;CHECK-LABEL: v_shuffledupQfloat:
 ;CHECK: vdup.32
 	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
@@ -163,7 +163,7 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 }
 
 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
-;CHECK: vduplane8:
+;CHECK-LABEL: vduplane8:
 ;CHECK: vdup.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -171,7 +171,7 @@ define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
-;CHECK: vduplane16:
+;CHECK-LABEL: vduplane16:
 ;CHECK: vdup.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -179,7 +179,7 @@ define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
-;CHECK: vduplane32:
+;CHECK-LABEL: vduplane32:
 ;CHECK: vdup.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
@@ -187,7 +187,7 @@ define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
 }
 
 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
-;CHECK: vduplanefloat:
+;CHECK-LABEL: vduplanefloat:
 ;CHECK: vdup.32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
@@ -195,7 +195,7 @@ define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
 }
 
 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
-;CHECK: vduplaneQ8:
+;CHECK-LABEL: vduplaneQ8:
 ;CHECK: vdup.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -203,7 +203,7 @@ define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
-;CHECK: vduplaneQ16:
+;CHECK-LABEL: vduplaneQ16:
 ;CHECK: vdup.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -211,7 +211,7 @@ define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
-;CHECK: vduplaneQ32:
+;CHECK-LABEL: vduplaneQ32:
 ;CHECK: vdup.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -219,7 +219,7 @@ define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
 }
 
 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
-;CHECK: vduplaneQfloat:
+;CHECK-LABEL: vduplaneQfloat:
 ;CHECK: vdup.32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -251,7 +251,7 @@ entry:
 }
 
 ; Radar 7373643
-;CHECK: redundantVdup:
+;CHECK-LABEL: redundantVdup:
 ;CHECK: vmov.i8
 ;CHECK-NOT: vdup.8
 ;CHECK: vstr
@@ -263,7 +263,7 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind {
 }
 
 define <4 x i32> @tdupi(i32 %x, i32 %y) {
-;CHECK: tdupi
+;CHECK-LABEL: tdupi:
 ;CHECK: vdup.32
   %1 = insertelement <4 x i32> undef, i32 %x, i32 0
   %2 = insertelement <4 x i32> %1, i32 %x, i32 1
@@ -273,7 +273,7 @@ define <4 x i32> @tdupi(i32 %x, i32 %y) {
 }
 
 define <4 x float> @tdupf(float %x, float %y) {
-;CHECK: tdupf
+;CHECK-LABEL: tdupf:
 ;CHECK: vdup.32
   %1 = insertelement <4 x float> undef, float %x, i32 0
   %2 = insertelement <4 x float> %1, float %x, i32 1
@@ -285,7 +285,7 @@ define <4 x float> @tdupf(float %x, float %y) {
 ; This test checks that when splatting an element from a vector into another,
 ; the value isn't moved out to GPRs first.
 define <4 x i32> @tduplane(<4 x i32> %invec) {
-;CHECK: tduplane
+;CHECK-LABEL: tduplane:
 ;CHECK-NOT: vmov {{.*}}, d16[1]
 ;CHECK: vdup.32 {{.*}}, d16[1]
   %in = extractelement <4 x i32> %invec, i32 1
@@ -297,7 +297,7 @@ define <4 x i32> @tduplane(<4 x i32> %invec) {
 }
 
 define <2 x float> @check_f32(<4 x float> %v) nounwind {
-;CHECK: check_f32:
+;CHECK-LABEL: check_f32:
 ;CHECK: vdup.32 {{.*}}, d{{..}}[1]
   %x = extractelement <4 x float> %v, i32 3
   %1 = insertelement  <2 x float> undef, float %x, i32 0
@@ -306,7 +306,7 @@ define <2 x float> @check_f32(<4 x float> %v) nounwind {
 }
 
 define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
-;CHECK: check_i32:
+;CHECK-LABEL: check_i32:
 ;CHECK: vdup.32 {{.*}}, d{{..}}[1]
   %x = extractelement <4 x i32> %v, i32 3
   %1 = insertelement  <2 x i32> undef, i32 %x, i32 0
@@ -315,7 +315,7 @@ define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
 }
 
 define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
-;CHECK: check_i16:
+;CHECK-LABEL: check_i16:
 ;CHECK: vdup.16 {{.*}}, d{{..}}[3]
   %x = extractelement <8 x i16> %v, i32 3
   %1 = insertelement  <4 x i16> undef, i16 %x, i32 0
@@ -324,7 +324,7 @@ define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
 }
 
 define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
-;CHECK: check_i8:
+;CHECK-LABEL: check_i8:
 ;CHECK: vdup.8 {{.*}}, d{{..}}[3]
   %x = extractelement <16 x i8> %v, i32 3
   %1 = insertelement  <8  x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 42964deb0b5e..759da2235e41 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -29,7 +29,7 @@ entry:
 
 ; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
 ; converted back to be used as a vector type.
-; CHECK: test_vmovrrd_combine
+; CHECK-LABEL: test_vmovrrd_combine:
 define <4 x i32> @test_vmovrrd_combine() nounwind {
 entry:
   br i1 undef, label %bb1, label %bb2
@@ -136,7 +136,7 @@ define i16 @foldBuildVectors() {
 
 ; Test that we are generating vrev and vext for reverse shuffles of v8i16
 ; shuffles.
-; CHECK: reverse_v8i16
+; CHECK-LABEL: reverse_v8i16:
 define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
   %v0 = load <8 x i16>* %loadaddr
   ; CHECK: vrev64.16
@@ -149,7 +149,7 @@ define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
 
 ; Test that we are generating vrev and vext for reverse shuffles of v16i8
 ; shuffles.
-; CHECK: reverse_v16i8
+; CHECK-LABEL: reverse_v16i8:
 define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
   %v0 = load <16 x i8>* %loadaddr
   ; CHECK: vrev64.8
@@ -160,3 +160,87 @@ define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
   store <16 x i8> %v1, <16 x i8>* %storeaddr
   ret void
 }
+
+; <rdar://problem/14170854>.
+; vldr cannot handle unaligned loads.
+; Fall back to vld1.32, which can, instead of using the general purpose loads
+; followed by a costly sequence of instructions to build the vector register.
+; CHECK-LABEL: t3:
+; CHECK: vld1.32 {[[REG:d[0-9]+]][0]}
+; CHECK: vld1.32 {[[REG]][1]}
+; CHECK: vmull.u8 q{{[0-9]+}}, [[REG]], [[REG]]
+define <8 x i16> @t3(i8 zeroext %xf, i8* nocapture %sp0, i8* nocapture %sp1, i32* nocapture %outp) {
+entry:
+  %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
+  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp1.0.cast = bitcast i8* %sp1 to i32*
+  %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+  %vecinit = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
+  %vecinit1 = insertelement <2 x i32> %vecinit, i32 %pix_sp1.0.copyload, i32 1
+  %0 = bitcast <2 x i32> %vecinit1 to <8 x i8>
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0)
+  ret <8 x i16> %vmull.i
+}
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)
+
+; Check that (insert_vector_elt (load)) => (vector_load).
+; Thus, check that scalar_to_vector do not interfer with that.
+define <8 x i16> @t4(i8* nocapture %sp0) {
+; CHECK-LABEL: t4:
+; CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r0]
+entry:
+  %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
+  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
+  %0 = bitcast <2 x i32> %vec to <8 x i8>
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0)
+  ret <8 x i16> %vmull.i
+}
+
+; Make sure vector load is used for all three loads.
+; Lowering to build vector was breaking the single use property of the load of
+;  %pix_sp0.0.copyload.
+; CHECK-LABEL: t5:
+; CHECK: vld1.32 {[[REG1:d[0-9]+]][1]}, [r0]
+; CHECK: vorr [[REG2:d[0-9]+]], [[REG1]], [[REG1]]
+; CHECK: vld1.32 {[[REG1]][0]}, [r1]
+; CHECK: vld1.32 {[[REG2]][0]}, [r2]
+; CHECK: vmull.u8 q{{[0-9]+}}, [[REG1]], [[REG2]]
+define <8 x i16> @t5(i8* nocapture %sp0, i8* nocapture %sp1, i8* nocapture %sp2) {
+entry:
+  %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
+  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp1.0.cast = bitcast i8* %sp1 to i32*
+  %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+  %pix_sp2.0.cast = bitcast i8* %sp2 to i32*
+  %pix_sp2.0.copyload = load i32* %pix_sp2.0.cast, align 1
+  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 1
+  %vecinit1 = insertelement <2 x i32> %vec, i32 %pix_sp1.0.copyload, i32 0
+  %vecinit2 = insertelement <2 x i32> %vec, i32 %pix_sp2.0.copyload, i32 0
+  %0 = bitcast <2 x i32> %vecinit1 to <8 x i8>
+  %1 = bitcast <2 x i32> %vecinit2 to <8 x i8>
+  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %1)
+  ret <8 x i16> %vmull.i
+}
+
+; <rdar://problem/14989896> Make sure we manage to truncate a vector from an
+; illegal type to a legal type.
+define <2 x i8> @test_truncate(<2 x i128> %in) {
+; CHECK-LABEL: test_truncate:
+; CHECK: mov [[BASE:r[0-9]+]], sp
+; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
+; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4
+; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
+; REG2 Should map on the same Q register as REG1, i.e., REG2 = REG1 - 1, but we
+; cannot express that.
+; CHECK-NEXT: vmov.32 [[REG2:d[0-9]+]][0], r0
+; CHECK-NEXT: vmov.32 [[REG2]][1], r1
+; The Q register used here should match floor(REG1/2), but we cannot express that.
+; CHECK-NEXT: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}
+; CHECK-NEXT: vmov r0, r1, [[RES]]
+entry:
+  %res = trunc <2 x i128> %in to <2 x i8>
+  ret <2 x i8> %res
+}
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
index 22af79762128..f3218969c78e 100644
--- a/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
 
-; CHECK: f:
+; CHECK-LABEL: f:
 define float @f(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
   ; CHECK: vmovl.u16
@@ -18,7 +18,7 @@ define float @f(<4 x i16>* nocapture %in) {
   ret float %7
 }
 
-; CHECK: g:
+; CHECK-LABEL: g:
 define float @g(<4 x i8>* nocapture %in) {
 ; Note: vld1 here is reasonably important. Mixing VFP and NEON
 ; instructions is bad on some cores
@@ -39,7 +39,7 @@ define float @g(<4 x i8>* nocapture %in) {
   ret float %7
 }
 
-; CHECK: h:
+; CHECK-LABEL: h:
 define <4 x i8> @h(<4 x float> %v) {
   ; CHECK: vcvt.{{[us]}}32.f32
   ; CHECK: vmovn.i32
@@ -47,7 +47,7 @@ define <4 x i8> @h(<4 x float> %v) {
   ret <4 x i8> %1
 }
 
-; CHECK: i:
+; CHECK-LABEL: i:
 define <4 x i8> @i(<4 x i8>* %x) {
 ; Note: vld1 here is reasonably important. Mixing VFP and NEON
 ; instructions is bad on some cores
@@ -62,7 +62,7 @@ define <4 x i8> @i(<4 x i8>* %x) {
   %2 = sdiv <4 x i8> zeroinitializer, %1
   ret <4 x i8> %2
 }
-; CHECK: j:
+; CHECK-LABEL: j:
 define <4 x i32> @j(<4 x i8>* %in) nounwind {
   ; CHECK: vld1
   ; CHECK: vmovl.u8
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index f404eb8be5b7..5555a4759b00 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: test_vextd:
+;CHECK-LABEL: test_vextd:
 ;CHECK: vext
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: test_vextRd:
+;CHECK-LABEL: test_vextRd:
 ;CHECK: vext
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -19,7 +19,7 @@ define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: test_vextq:
+;CHECK-LABEL: test_vextq:
 ;CHECK: vext
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -28,7 +28,7 @@ define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: test_vextRq:
+;CHECK-LABEL: test_vextRq:
 ;CHECK: vext
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -37,7 +37,7 @@ define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: test_vextd16:
+;CHECK-LABEL: test_vextd16:
 ;CHECK: vext
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -46,7 +46,7 @@ define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: test_vextq32:
+;CHECK-LABEL: test_vextq32:
 ;CHECK: vext
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -57,7 +57,7 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ; Undef shuffle indices should not prevent matching to VEXT:
 
 define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: test_vextd_undef:
+;CHECK-LABEL: test_vextd_undef:
 ;CHECK: vext
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -66,7 +66,7 @@ define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: test_vextRq_undef:
+;CHECK-LABEL: test_vextRq_undef:
 ;CHECK: vext
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -75,7 +75,7 @@ define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @test_vextq_undef_op2(<16 x i8> %a) nounwind {
-;CHECK: test_vextq_undef_op2:
+;CHECK-LABEL: test_vextq_undef_op2:
 ;CHECK: vext
 entry:
   %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
@@ -83,7 +83,7 @@ entry:
 }
 
 define <8 x i8> @test_vextd_undef_op2(<8 x i8> %a) nounwind {
-;CHECK: test_vextd_undef_op2:
+;CHECK-LABEL: test_vextd_undef_op2:
 ;CHECK: vext
 entry:
   %tmp1 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
@@ -92,7 +92,7 @@ entry:
 
 
 define <16 x i8> @test_vextq_undef_op2_undef(<16 x i8> %a) nounwind {
-;CHECK: test_vextq_undef_op2_undef:
+;CHECK-LABEL: test_vextq_undef_op2_undef:
 ;CHECK: vext
 entry:
   %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1>
@@ -100,7 +100,7 @@ entry:
 }
 
 define <8 x i8> @test_vextd_undef_op2_undef(<8 x i8> %a) nounwind {
-;CHECK: test_vextd_undef_op2_undef:
+;CHECK-LABEL: test_vextd_undef_op2_undef:
 ;CHECK: vext
 entry:
   %tmp1 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1>
@@ -114,7 +114,7 @@ entry:
 ; Also checks interleaving of sources is handled correctly.
 ; Essence: a vext is used on %A and something saner than stack load/store for final result.
 define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: test_interleaved:
+;CHECK-LABEL: test_interleaved:
 ;CHECK: vext.16
 ;CHECK-NOT: vext.16
 ;CHECK: vzip.16
@@ -126,7 +126,7 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 
 ; An undef in the shuffle list should still be optimizable
 define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: test_undef:
+;CHECK-LABEL: test_undef:
 ;CHECK: vzip.16
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
@@ -136,20 +136,26 @@ define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 
 ; We should ignore a build_vector with more than two sources.
 ; Use illegal <32 x i16> type to produce such a shuffle after legalizing types.
-; Try to look for fallback to stack expansion.
+; Try to look for fallback to by-element inserts.
 define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
-;CHECK: test_multisource:
-;CHECK: vst1.16
+;CHECK-LABEL: test_multisource:
+;CHECK: vmov.16 [[REG:d[0-9]+]][0]
+;CHECK: vmov.16 [[REG]][1]
+;CHECK: vmov.16 [[REG]][2]
+;CHECK: vmov.16 [[REG]][3]
         %tmp1 = load <32 x i16>* %B
         %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
         ret <4 x i16> %tmp2
 }
 
 ; We don't handle shuffles using more than half of a 128-bit vector.
-; Again, test for fallback to stack expansion
+; Again, test for fallback to by-element inserts.
 define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
-;CHECK: test_largespan:
-;CHECK: vst1.16
+;CHECK-LABEL: test_largespan:
+;CHECK: vmov.16 [[REG:d[0-9]+]][0]
+;CHECK: vmov.16 [[REG]][1]
+;CHECK: vmov.16 [[REG]][2]
+;CHECK: vmov.16 [[REG]][3]
         %tmp1 = load <8 x i16>* %B
         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
         ret <4 x i16> %tmp2
@@ -159,8 +165,15 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
 ; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
 ; lowering loop can result otherwise).
 define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: test_illegal:
-;CHECK: vst1.16
+;CHECK-LABEL: test_illegal:
+;CHECK: vmov.16 [[REG:d[0-9]+]][0]
+;CHECK: vmov.16 [[REG]][1]
+;CHECK: vmov.16 [[REG]][2]
+;CHECK: vmov.16 [[REG]][3]
+;CHECK: vmov.16 [[REG2:d[0-9]+]][0]
+;CHECK: vmov.16 [[REG2]][1]
+;CHECK: vmov.16 [[REG2]][2]
+;CHECK: vmov.16 [[REG2]][3]
        %tmp1 = load <8 x i16>* %A
        %tmp2 = load <8 x i16>* %B
        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
@@ -170,7 +183,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ; PR11129
 ; Make sure this doesn't crash
 define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
-; CHECK: test_elem_mismatch:
+; CHECK-LABEL: test_elem_mismatch:
 ; CHECK: vstr
   %tmp0 = load <2 x i64>* %src, align 16
   %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
index 6946d02637ea..a23db7be7615 100644
--- a/test/CodeGen/ARM/vfcmp.ll
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -4,7 +4,7 @@
 
 ; une is implemented with VCEQ/VMVN
 define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcunef32:
+;CHECK-LABEL: vcunef32:
 ;CHECK: vceq.f32
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
@@ -16,7 +16,7 @@ define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; olt is implemented with VCGT
 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcoltf32:
+;CHECK-LABEL: vcoltf32:
 ;CHECK: vcgt.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -27,7 +27,7 @@ define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; ole is implemented with VCGE
 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcolef32:
+;CHECK-LABEL: vcolef32:
 ;CHECK: vcge.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -38,7 +38,7 @@ define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; uge is implemented with VCGT/VMVN
 define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcugef32:
+;CHECK-LABEL: vcugef32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
@@ -50,7 +50,7 @@ define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; ule is implemented with VCGT/VMVN
 define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vculef32:
+;CHECK-LABEL: vculef32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
@@ -62,7 +62,7 @@ define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; ugt is implemented with VCGE/VMVN
 define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcugtf32:
+;CHECK-LABEL: vcugtf32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
@@ -74,7 +74,7 @@ define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; ult is implemented with VCGE/VMVN
 define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcultf32:
+;CHECK-LABEL: vcultf32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x float>* %A
@@ -86,7 +86,7 @@ define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; ueq is implemented with VCGT/VCGT/VORR/VMVN
 define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcueqf32:
+;CHECK-LABEL: vcueqf32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
@@ -100,7 +100,7 @@ define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; one is implemented with VCGT/VCGT/VORR
 define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vconef32:
+;CHECK-LABEL: vconef32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
@@ -113,7 +113,7 @@ define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; uno is implemented with VCGT/VCGE/VORR/VMVN
 define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcunof32:
+;CHECK-LABEL: vcunof32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
@@ -127,7 +127,7 @@ define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 ; ord is implemented with VCGT/VCGE/VORR
 define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vcordf32:
+;CHECK-LABEL: vcordf32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 7a4b34f4a3f0..5d2943cbfd2f 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -14,7 +14,7 @@ declare float @fabsf(float)
 declare double @fabs(double)
 
 define void @test_abs(float* %P, double* %D) {
-;CHECK: test_abs:
+;CHECK-LABEL: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
 ;CHECK: vabs.f32
 	%b = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
@@ -27,7 +27,7 @@ define void @test_abs(float* %P, double* %D) {
 }
 
 define void @test_add(float* %P, double* %D) {
-;CHECK: test_add:
+;CHECK-LABEL: test_add:
 	%a = load float* %P		; <float> [#uses=2]
 	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
@@ -38,7 +38,7 @@ define void @test_add(float* %P, double* %D) {
 }
 
 define void @test_ext_round(float* %P, double* %D) {
-;CHECK: test_ext_round:
+;CHECK-LABEL: test_ext_round:
 	%a = load float* %P		; <float> [#uses=1]
 ;CHECK: vcvt.f64.f32
 ;CHECK: vcvt.f32.f64
@@ -51,7 +51,7 @@ define void @test_ext_round(float* %P, double* %D) {
 }
 
 define void @test_fma(float* %P1, float* %P2, float* %P3) {
-;CHECK: test_fma:
+;CHECK-LABEL: test_fma:
 	%a1 = load float* %P1		; <float> [#uses=1]
 	%a2 = load float* %P2		; <float> [#uses=1]
 	%a3 = load float* %P3		; <float> [#uses=1]
@@ -63,7 +63,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 }
 
 define i32 @test_ftoi(float* %P1) {
-;CHECK: test_ftoi:
+;CHECK-LABEL: test_ftoi:
 	%a1 = load float* %P1		; <float> [#uses=1]
 ;CHECK: vcvt.s32.f32
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
@@ -71,7 +71,7 @@ define i32 @test_ftoi(float* %P1) {
 }
 
 define i32 @test_ftou(float* %P1) {
-;CHECK: test_ftou:
+;CHECK-LABEL: test_ftou:
 	%a1 = load float* %P1		; <float> [#uses=1]
 ;CHECK: vcvt.u32.f32
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
@@ -79,7 +79,7 @@ define i32 @test_ftou(float* %P1) {
 }
 
 define i32 @test_dtoi(double* %P1) {
-;CHECK: test_dtoi:
+;CHECK-LABEL: test_dtoi:
 	%a1 = load double* %P1		; <double> [#uses=1]
 ;CHECK: vcvt.s32.f64
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
@@ -87,7 +87,7 @@ define i32 @test_dtoi(double* %P1) {
 }
 
 define i32 @test_dtou(double* %P1) {
-;CHECK: test_dtou:
+;CHECK-LABEL: test_dtou:
 	%a1 = load double* %P1		; <double> [#uses=1]
 ;CHECK: vcvt.u32.f64
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
@@ -95,7 +95,7 @@ define i32 @test_dtou(double* %P1) {
 }
 
 define void @test_utod(double* %P1, i32 %X) {
-;CHECK: test_utod:
+;CHECK-LABEL: test_utod:
 ;CHECK: vcvt.f64.u32
 	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
@@ -103,7 +103,7 @@ define void @test_utod(double* %P1, i32 %X) {
 }
 
 define void @test_utod2(double* %P1, i8 %X) {
-;CHECK: test_utod2:
+;CHECK-LABEL: test_utod2:
 ;CHECK: vcvt.f64.u32
 	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
 	store double %b1, double* %P1
@@ -111,7 +111,7 @@ define void @test_utod2(double* %P1, i8 %X) {
 }
 
 define void @test_cmp(float* %glob, i32 %X) {
-;CHECK: test_cmp:
+;CHECK-LABEL: test_cmp:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=2]
 	%tmp3 = getelementptr float* %glob, i32 2		; <float*> [#uses=1]
@@ -139,7 +139,7 @@ declare i32 @bar(...)
 declare i32 @baz(...)
 
 define void @test_cmpfp0(float* %glob, i32 %X) {
-;CHECK: test_cmpfp0:
+;CHECK-LABEL: test_cmpfp0:
 entry:
 	%tmp = load float* %glob		; <float> [#uses=1]
 ;CHECK: vcmpe.f32
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index c9ce3b7450b6..2518ee2278cc 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-elf"
 
 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
-;CHECK: vget_lanes8:
+;CHECK-LABEL: vget_lanes8:
 ;CHECK: vmov.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
@@ -12,7 +12,7 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
-;CHECK: vget_lanes16:
+;CHECK-LABEL: vget_lanes16:
 ;CHECK: vmov.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
@@ -21,7 +21,7 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
 }
 
 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
-;CHECK: vget_laneu8:
+;CHECK-LABEL: vget_laneu8:
 ;CHECK: vmov.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
@@ -30,7 +30,7 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
 }
 
 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
-;CHECK: vget_laneu16:
+;CHECK-LABEL: vget_laneu16:
 ;CHECK: vmov.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
@@ -40,7 +40,7 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
-;CHECK: vget_lanei32:
+;CHECK-LABEL: vget_lanei32:
 ;CHECK: vmov.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = add <2 x i32> %tmp1, %tmp1
@@ -49,7 +49,7 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
-;CHECK: vgetQ_lanes8:
+;CHECK-LABEL: vgetQ_lanes8:
 ;CHECK: vmov.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
@@ -58,7 +58,7 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
-;CHECK: vgetQ_lanes16:
+;CHECK-LABEL: vgetQ_lanes16:
 ;CHECK: vmov.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
@@ -67,7 +67,7 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
-;CHECK: vgetQ_laneu8:
+;CHECK-LABEL: vgetQ_laneu8:
 ;CHECK: vmov.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
@@ -76,7 +76,7 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
 }
 
 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
-;CHECK: vgetQ_laneu16:
+;CHECK-LABEL: vgetQ_laneu16:
 ;CHECK: vmov.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
@@ -86,7 +86,7 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
 
 ; Do a vector add to keep the extraction from being done directly from memory.
 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
-;CHECK: vgetQ_lanei32:
+;CHECK-LABEL: vgetQ_lanei32:
 ;CHECK: vmov.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
@@ -159,7 +159,7 @@ return:                                           ; preds = %entry
 }
 
 define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
-;CHECK: vset_lane8:
+;CHECK-LABEL: vset_lane8:
 ;CHECK: vmov.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
@@ -167,7 +167,7 @@ define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
 }
 
 define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
-;CHECK: vset_lane16:
+;CHECK-LABEL: vset_lane16:
 ;CHECK: vmov.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
@@ -175,7 +175,7 @@ define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
 }
 
 define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
-;CHECK: vset_lane32:
+;CHECK-LABEL: vset_lane32:
 ;CHECK: vmov.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
@@ -183,7 +183,7 @@ define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
 }
 
 define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
-;CHECK: vsetQ_lane8:
+;CHECK-LABEL: vsetQ_lane8:
 ;CHECK: vmov.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
@@ -191,7 +191,7 @@ define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
 }
 
 define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
-;CHECK: vsetQ_lane16:
+;CHECK-LABEL: vsetQ_lane16:
 ;CHECK: vmov.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
@@ -199,7 +199,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
 }
 
 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
-;CHECK: vsetQ_lane32:
+;CHECK-LABEL: vsetQ_lane32:
 ;CHECK: vmov.32 d{{.*}}[1], r1
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
@@ -207,7 +207,7 @@ define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 }
 
 define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
-;CHECK: test_vset_lanef32:
+;CHECK-LABEL: test_vset_lanef32:
 ;CHECK: vmov.f32 s3, s0
 ;CHECK: vmov.f64 d0, d1
 entry:
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
index 379e062838f6..9c2ed579c98e 100644
--- a/test/CodeGen/ARM/vhadd.ll
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vhadds8:
+;CHECK-LABEL: vhadds8:
 ;CHECK: vhadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vhadds16:
+;CHECK-LABEL: vhadds16:
 ;CHECK: vhadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vhadds32:
+;CHECK-LABEL: vhadds32:
 ;CHECK: vhadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vhaddu8:
+;CHECK-LABEL: vhaddu8:
 ;CHECK: vhadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -37,7 +37,7 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vhaddu16:
+;CHECK-LABEL: vhaddu16:
 ;CHECK: vhadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -46,7 +46,7 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vhaddu32:
+;CHECK-LABEL: vhaddu32:
 ;CHECK: vhadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -55,7 +55,7 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vhaddQs8:
+;CHECK-LABEL: vhaddQs8:
 ;CHECK: vhadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -64,7 +64,7 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vhaddQs16:
+;CHECK-LABEL: vhaddQs16:
 ;CHECK: vhadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -73,7 +73,7 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vhaddQs32:
+;CHECK-LABEL: vhaddQs32:
 ;CHECK: vhadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -82,7 +82,7 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vhaddQu8:
+;CHECK-LABEL: vhaddQu8:
 ;CHECK: vhadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -91,7 +91,7 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vhaddQu16:
+;CHECK-LABEL: vhaddQu16:
 ;CHECK: vhadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -100,7 +100,7 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vhaddQu32:
+;CHECK-LABEL: vhaddQu32:
 ;CHECK: vhadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -125,7 +125,7 @@ declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind rea
 declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrhadds8:
+;CHECK-LABEL: vrhadds8:
 ;CHECK: vrhadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -134,7 +134,7 @@ define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrhadds16:
+;CHECK-LABEL: vrhadds16:
 ;CHECK: vrhadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -143,7 +143,7 @@ define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrhadds32:
+;CHECK-LABEL: vrhadds32:
 ;CHECK: vrhadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -152,7 +152,7 @@ define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrhaddu8:
+;CHECK-LABEL: vrhaddu8:
 ;CHECK: vrhadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -161,7 +161,7 @@ define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrhaddu16:
+;CHECK-LABEL: vrhaddu16:
 ;CHECK: vrhadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -170,7 +170,7 @@ define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrhaddu32:
+;CHECK-LABEL: vrhaddu32:
 ;CHECK: vrhadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -179,7 +179,7 @@ define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrhaddQs8:
+;CHECK-LABEL: vrhaddQs8:
 ;CHECK: vrhadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -188,7 +188,7 @@ define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrhaddQs16:
+;CHECK-LABEL: vrhaddQs16:
 ;CHECK: vrhadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -197,7 +197,7 @@ define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrhaddQs32:
+;CHECK-LABEL: vrhaddQs32:
 ;CHECK: vrhadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -206,7 +206,7 @@ define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrhaddQu8:
+;CHECK-LABEL: vrhaddQu8:
 ;CHECK: vrhadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -215,7 +215,7 @@ define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrhaddQu16:
+;CHECK-LABEL: vrhaddQu16:
 ;CHECK: vrhadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -224,7 +224,7 @@ define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrhaddQu32:
+;CHECK-LABEL: vrhaddQu32:
 ;CHECK: vrhadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
index 0f0d0279a521..4bc2e87ab577 100644
--- a/test/CodeGen/ARM/vhsub.ll
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vhsubs8:
+;CHECK-LABEL: vhsubs8:
 ;CHECK: vhsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vhsubs16:
+;CHECK-LABEL: vhsubs16:
 ;CHECK: vhsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vhsubs32:
+;CHECK-LABEL: vhsubs32:
 ;CHECK: vhsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vhsubu8:
+;CHECK-LABEL: vhsubu8:
 ;CHECK: vhsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -37,7 +37,7 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vhsubu16:
+;CHECK-LABEL: vhsubu16:
 ;CHECK: vhsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -46,7 +46,7 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vhsubu32:
+;CHECK-LABEL: vhsubu32:
 ;CHECK: vhsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -55,7 +55,7 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vhsubQs8:
+;CHECK-LABEL: vhsubQs8:
 ;CHECK: vhsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -64,7 +64,7 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vhsubQs16:
+;CHECK-LABEL: vhsubQs16:
 ;CHECK: vhsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -73,7 +73,7 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vhsubQs32:
+;CHECK-LABEL: vhsubQs32:
 ;CHECK: vhsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -82,7 +82,7 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vhsubQu8:
+;CHECK-LABEL: vhsubQu8:
 ;CHECK: vhsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -91,7 +91,7 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vhsubQu16:
+;CHECK-LABEL: vhsubQu16:
 ;CHECK: vhsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -100,7 +100,7 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vhsubQu32:
+;CHECK-LABEL: vhsubQu32:
 ;CHECK: vhsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll
index 2d8cb893bd86..0a8f103102b1 100644
--- a/test/CodeGen/ARM/vicmp.ll
+++ b/test/CodeGen/ARM/vicmp.ll
@@ -7,7 +7,7 @@
 ; the other operations.
 
 define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vcnei8:
+;CHECK-LABEL: vcnei8:
 ;CHECK: vceq.i8
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i8>* %A
@@ -18,7 +18,7 @@ define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vcnei16:
+;CHECK-LABEL: vcnei16:
 ;CHECK: vceq.i16
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i16>* %A
@@ -29,7 +29,7 @@ define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vcnei32:
+;CHECK-LABEL: vcnei32:
 ;CHECK: vceq.i32
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <2 x i32>* %A
@@ -40,7 +40,7 @@ define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vcneQi8:
+;CHECK-LABEL: vcneQi8:
 ;CHECK: vceq.i8
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <16 x i8>* %A
@@ -51,7 +51,7 @@ define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vcneQi16:
+;CHECK-LABEL: vcneQi16:
 ;CHECK: vceq.i16
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <8 x i16>* %A
@@ -62,7 +62,7 @@ define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vcneQi32:
+;CHECK-LABEL: vcneQi32:
 ;CHECK: vceq.i32
 ;CHECK-NEXT: vmvn
 	%tmp1 = load <4 x i32>* %A
@@ -73,7 +73,7 @@ define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vcltQs8:
+;CHECK-LABEL: vcltQs8:
 ;CHECK: vcgt.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -83,7 +83,7 @@ define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vcles16:
+;CHECK-LABEL: vcles16:
 ;CHECK: vcge.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -93,7 +93,7 @@ define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vcltu16:
+;CHECK-LABEL: vcltu16:
 ;CHECK: vcgt.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -103,7 +103,7 @@ define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vcleQu32:
+;CHECK-LABEL: vcleQu32:
 ;CHECK: vcge.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index 994f05dacb84..444d0d5b5edc 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
 
 define <8 x i8> @vld1i8(i8* %A) nounwind {
-;CHECK: vld1i8:
+;CHECK-LABEL: vld1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld1.8 {d16}, [r0:64]
 	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
@@ -10,7 +10,7 @@ define <8 x i8> @vld1i8(i8* %A) nounwind {
 }
 
 define <4 x i16> @vld1i16(i16* %A) nounwind {
-;CHECK: vld1i16:
+;CHECK-LABEL: vld1i16:
 ;CHECK: vld1.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
@@ -19,7 +19,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
 
 ;Check for a post-increment updating load. 
 define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
-;CHECK: vld1i16_update:
+;CHECK-LABEL: vld1i16_update:
 ;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
@@ -30,7 +30,7 @@ define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
 }
 
 define <2 x i32> @vld1i32(i32* %A) nounwind {
-;CHECK: vld1i32:
+;CHECK-LABEL: vld1i32:
 ;CHECK: vld1.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
@@ -39,7 +39,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
 
 ;Check for a post-increment updating load with register increment.
 define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
-;CHECK: vld1i32_update:
+;CHECK-LABEL: vld1i32_update:
 ;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
@@ -50,7 +50,7 @@ define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
 }
 
 define <2 x float> @vld1f(float* %A) nounwind {
-;CHECK: vld1f:
+;CHECK-LABEL: vld1f:
 ;CHECK: vld1.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1)
@@ -58,7 +58,7 @@ define <2 x float> @vld1f(float* %A) nounwind {
 }
 
 define <1 x i64> @vld1i64(i64* %A) nounwind {
-;CHECK: vld1i64:
+;CHECK-LABEL: vld1i64:
 ;CHECK: vld1.64
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
@@ -66,7 +66,7 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
 }
 
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
-;CHECK: vld1Qi8:
+;CHECK-LABEL: vld1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld1.8 {d16, d17}, [r0:64]
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
@@ -75,7 +75,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 
 ;Check for a post-increment updating load.
 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
-;CHECK: vld1Qi8_update:
+;CHECK-LABEL: vld1Qi8_update:
 ;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
 	%A = load i8** %ptr
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
@@ -85,7 +85,7 @@ define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 }
 
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
-;CHECK: vld1Qi16:
+;CHECK-LABEL: vld1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -94,7 +94,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 }
 
 define <4 x i32> @vld1Qi32(i32* %A) nounwind {
-;CHECK: vld1Qi32:
+;CHECK-LABEL: vld1Qi32:
 ;CHECK: vld1.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
@@ -102,7 +102,7 @@ define <4 x i32> @vld1Qi32(i32* %A) nounwind {
 }
 
 define <4 x float> @vld1Qf(float* %A) nounwind {
-;CHECK: vld1Qf:
+;CHECK-LABEL: vld1Qf:
 ;CHECK: vld1.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1)
@@ -110,7 +110,7 @@ define <4 x float> @vld1Qf(float* %A) nounwind {
 }
 
 define <2 x i64> @vld1Qi64(i64* %A) nounwind {
-;CHECK: vld1Qi64:
+;CHECK-LABEL: vld1Qi64:
 ;CHECK: vld1.64
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index caa016e929d8..fddafeab91cc 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -12,7 +12,7 @@
 %struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
 
 define <8 x i8> @vld2i8(i8* %A) nounwind {
-;CHECK: vld2i8:
+;CHECK-LABEL: vld2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld2.8 {d16, d17}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
@@ -23,7 +23,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 }
 
 define <4 x i16> @vld2i16(i16* %A) nounwind {
-;CHECK: vld2i16:
+;CHECK-LABEL: vld2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -35,7 +35,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
 }
 
 define <2 x i32> @vld2i32(i32* %A) nounwind {
-;CHECK: vld2i32:
+;CHECK-LABEL: vld2i32:
 ;CHECK: vld2.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
@@ -46,7 +46,7 @@ define <2 x i32> @vld2i32(i32* %A) nounwind {
 }
 
 define <2 x float> @vld2f(float* %A) nounwind {
-;CHECK: vld2f:
+;CHECK-LABEL: vld2f:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
@@ -58,7 +58,7 @@ define <2 x float> @vld2f(float* %A) nounwind {
 
 ;Check for a post-increment updating load. 
 define <2 x float> @vld2f_update(float** %ptr) nounwind {
-;CHECK: vld2f_update:
+;CHECK-LABEL: vld2f_update:
 ;CHECK: vld2.32 {d16, d17}, [r1]!
 	%A = load float** %ptr
 	%tmp0 = bitcast float* %A to i8*
@@ -72,7 +72,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
 }
 
 define <1 x i64> @vld2i64(i64* %A) nounwind {
-;CHECK: vld2i64:
+;CHECK-LABEL: vld2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
@@ -84,7 +84,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
 }
 
 define <16 x i8> @vld2Qi8(i8* %A) nounwind {
-;CHECK: vld2Qi8:
+;CHECK-LABEL: vld2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
@@ -96,7 +96,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 
 ;Check for a post-increment updating load with register increment.
 define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
-;CHECK: vld2Qi8_update:
+;CHECK-LABEL: vld2Qi8_update:
 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
@@ -109,7 +109,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 }
 
 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
-;CHECK: vld2Qi16:
+;CHECK-LABEL: vld2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -121,7 +121,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 }
 
 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
-;CHECK: vld2Qi32:
+;CHECK-LABEL: vld2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
@@ -133,7 +133,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 }
 
 define <4 x float> @vld2Qf(float* %A) nounwind {
-;CHECK: vld2Qf:
+;CHECK-LABEL: vld2Qf:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index ad63e1f716b2..400541fb90a2 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -13,7 +13,7 @@
 %struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
 
 define <8 x i8> @vld3i8(i8* %A) nounwind {
-;CHECK: vld3i8:
+;CHECK-LABEL: vld3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
@@ -24,7 +24,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind {
 }
 
 define <4 x i16> @vld3i16(i16* %A) nounwind {
-;CHECK: vld3i16:
+;CHECK-LABEL: vld3i16:
 ;CHECK: vld3.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
@@ -36,7 +36,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 
 ;Check for a post-increment updating load with register increment.
 define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
-;CHECK: vld3i16_update:
+;CHECK-LABEL: vld3i16_update:
 ;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
@@ -50,7 +50,7 @@ define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
 }
 
 define <2 x i32> @vld3i32(i32* %A) nounwind {
-;CHECK: vld3i32:
+;CHECK-LABEL: vld3i32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
@@ -61,7 +61,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind {
 }
 
 define <2 x float> @vld3f(float* %A) nounwind {
-;CHECK: vld3f:
+;CHECK-LABEL: vld3f:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1)
@@ -72,7 +72,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
 }
 
 define <1 x i64> @vld3i64(i64* %A) nounwind {
-;CHECK: vld3i64:
+;CHECK-LABEL: vld3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
 	%tmp0 = bitcast i64* %A to i8*
@@ -84,7 +84,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 }
 
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
-;CHECK: vld3Qi8:
+;CHECK-LABEL: vld3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
 ;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
@@ -96,7 +96,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 }
 
 define <8 x i16> @vld3Qi16(i16* %A) nounwind {
-;CHECK: vld3Qi16:
+;CHECK-LABEL: vld3Qi16:
 ;CHECK: vld3.16
 ;CHECK: vld3.16
 	%tmp0 = bitcast i16* %A to i8*
@@ -108,7 +108,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind {
 }
 
 define <4 x i32> @vld3Qi32(i32* %A) nounwind {
-;CHECK: vld3Qi32:
+;CHECK-LABEL: vld3Qi32:
 ;CHECK: vld3.32
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
@@ -121,7 +121,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
 
 ;Check for a post-increment updating load. 
 define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
-;CHECK: vld3Qi32_update:
+;CHECK-LABEL: vld3Qi32_update:
 ;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
 ;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
 	%A = load i32** %ptr
@@ -136,7 +136,7 @@ define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
 }
 
 define <4 x float> @vld3Qf(float* %A) nounwind {
-;CHECK: vld3Qf:
+;CHECK-LABEL: vld3Qf:
 ;CHECK: vld3.32
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 9ee5fe46eea2..f7376b503a30 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -12,7 +12,7 @@
 %struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
 
 define <8 x i8> @vld4i8(i8* %A) nounwind {
-;CHECK: vld4i8:
+;CHECK-LABEL: vld4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
@@ -24,7 +24,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 
 ;Check for a post-increment updating load with register increment.
 define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
-;CHECK: vld4i8_update:
+;CHECK-LABEL: vld4i8_update:
 ;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
@@ -37,7 +37,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 }
 
 define <4 x i16> @vld4i16(i16* %A) nounwind {
-;CHECK: vld4i16:
+;CHECK-LABEL: vld4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -49,7 +49,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 }
 
 define <2 x i32> @vld4i32(i32* %A) nounwind {
-;CHECK: vld4i32:
+;CHECK-LABEL: vld4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
@@ -61,7 +61,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
 }
 
 define <2 x float> @vld4f(float* %A) nounwind {
-;CHECK: vld4f:
+;CHECK-LABEL: vld4f:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1)
@@ -72,7 +72,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
 }
 
 define <1 x i64> @vld4i64(i64* %A) nounwind {
-;CHECK: vld4i64:
+;CHECK-LABEL: vld4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
@@ -84,7 +84,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 }
 
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
-;CHECK: vld4Qi8:
+;CHECK-LABEL: vld4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
 ;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
@@ -96,7 +96,7 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 }
 
 define <8 x i16> @vld4Qi16(i16* %A) nounwind {
-;CHECK: vld4Qi16:
+;CHECK-LABEL: vld4Qi16:
 ;Check for no alignment specifier.
 ;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
 ;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
@@ -110,7 +110,7 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 
 ;Check for a post-increment updating load. 
 define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
-;CHECK: vld4Qi16_update:
+;CHECK-LABEL: vld4Qi16_update:
 ;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
 ;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
 	%A = load i16** %ptr
@@ -125,7 +125,7 @@ define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
 }
 
 define <4 x i32> @vld4Qi32(i32* %A) nounwind {
-;CHECK: vld4Qi32:
+;CHECK-LABEL: vld4Qi32:
 ;CHECK: vld4.32
 ;CHECK: vld4.32
 	%tmp0 = bitcast i32* %A to i8*
@@ -137,7 +137,7 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind {
 }
 
 define <4 x float> @vld4Qf(float* %A) nounwind {
-;CHECK: vld4Qf:
+;CHECK-LABEL: vld4Qf:
 ;CHECK: vld4.32
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 7c7319c090ba..5509f3e0a0da 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vld1dupi8(i8* %A) nounwind {
-;CHECK: vld1dupi8:
+;CHECK-LABEL: vld1dupi8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[]}, [r0]
 	%tmp1 = load i8* %A, align 8
@@ -11,7 +11,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
 }
 
 define <4 x i16> @vld1dupi16(i16* %A) nounwind {
-;CHECK: vld1dupi16:
+;CHECK-LABEL: vld1dupi16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld1.16 {d16[]}, [r0:16]
 	%tmp1 = load i16* %A, align 8
@@ -21,7 +21,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 }
 
 define <2 x i32> @vld1dupi32(i32* %A) nounwind {
-;CHECK: vld1dupi32:
+;CHECK-LABEL: vld1dupi32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp1 = load i32* %A, align 8
@@ -31,7 +31,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 }
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
-;CHECK: vld1dupf:
+;CHECK-LABEL: vld1dupf:
 ;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
@@ -40,7 +40,7 @@ define <2 x float> @vld1dupf(float* %A) nounwind {
 }
 
 define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
-;CHECK: vld1dupQi8:
+;CHECK-LABEL: vld1dupQi8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[], d17[]}, [r0]
 	%tmp1 = load i8* %A, align 8
@@ -50,7 +50,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 }
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
-;CHECK: vld1dupQf:
+;CHECK-LABEL: vld1dupQf:
 ;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
@@ -63,7 +63,7 @@ define <4 x float> @vld1dupQf(float* %A) nounwind {
 %struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
 
 define <8 x i8> @vld2dupi8(i8* %A) nounwind {
-;CHECK: vld2dupi8:
+;CHECK-LABEL: vld2dupi8:
 ;Check the (default) alignment value.
 ;CHECK: vld2.8 {d16[], d17[]}, [r0]
 	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
@@ -76,7 +76,7 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind {
 }
 
 define <4 x i16> @vld2dupi16(i8* %A) nounwind {
-;CHECK: vld2dupi16:
+;CHECK-LABEL: vld2dupi16:
 ;Check that a power-of-two alignment smaller than the total size of the memory
 ;being loaded is ignored.
 ;CHECK: vld2.16 {d16[], d17[]}, [r0]
@@ -91,7 +91,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
 
 ;Check for a post-increment updating load. 
 define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
-;CHECK: vld2dupi16_update:
+;CHECK-LABEL: vld2dupi16_update:
 ;CHECK: vld2.16 {d16[], d17[]}, [r1]!
 	%A = load i16** %ptr
         %A2 = bitcast i16* %A to i8*
@@ -107,7 +107,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 }
 
 define <2 x i32> @vld2dupi32(i8* %A) nounwind {
-;CHECK: vld2dupi32:
+;CHECK-LABEL: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
 	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
@@ -128,7 +128,7 @@ declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>,
 
 ;Check for a post-increment updating load with register increment.
 define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
-;CHECK: vld3dupi8_update:
+;CHECK-LABEL: vld3dupi8_update:
 ;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
 	%A = load i8** %ptr
 	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
@@ -146,7 +146,7 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
 }
 
 define <4 x i16> @vld3dupi16(i8* %A) nounwind {
-;CHECK: vld3dupi16:
+;CHECK-LABEL: vld3dupi16:
 ;Check the (default) alignment value. VLD3 does not support alignment.
 ;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
 	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
@@ -169,7 +169,7 @@ declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>,
 
 ;Check for a post-increment updating load.
 define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
-;CHECK: vld4dupi16_update:
+;CHECK-LABEL: vld4dupi16_update:
 ;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
 	%A = load i16** %ptr
         %A2 = bitcast i16* %A to i8*
@@ -191,7 +191,7 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
 }
 
 define <2 x i32> @vld4dupi32(i8* %A) nounwind {
-;CHECK: vld4dupi32:
+;CHECK-LABEL: vld4dupi32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
 ;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index f35fa92f5dc7..7a83a4c0cac6 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s
 
 define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vld1lanei8:
+;CHECK-LABEL: vld1lanei8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[3]}, [r0]
 	%tmp1 = load <8 x i8>* %B
@@ -12,7 +12,7 @@ define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vld1lanei16:
+;CHECK-LABEL: vld1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
@@ -22,7 +22,7 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vld1lanei32:
+;CHECK-LABEL: vld1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
@@ -32,7 +32,7 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vld1lanei32a32:
+;CHECK-LABEL: vld1lanei32a32:
 ;Check the alignment value.  Legal values are none or :32.
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
@@ -42,7 +42,7 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vld1lanef:
+;CHECK-LABEL: vld1lanef:
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
@@ -51,7 +51,7 @@ define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
-;CHECK: vld1laneQi8:
+;CHECK-LABEL: vld1laneQi8:
 ;CHECK: vld1.8 {d17[1]}, [r0]
 	%tmp1 = load <16 x i8>* %B
 	%tmp2 = load i8* %A, align 8
@@ -60,7 +60,7 @@ define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vld1laneQi16:
+;CHECK-LABEL: vld1laneQi16:
 ;CHECK: vld1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = load i16* %A, align 8
@@ -69,7 +69,7 @@ define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vld1laneQi32:
+;CHECK-LABEL: vld1laneQi32:
 ;CHECK: vld1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = load i32* %A, align 8
@@ -78,7 +78,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vld1laneQf:
+;CHECK-LABEL: vld1laneQf:
 ;CHECK: vld1.32 {d16[0]}, [r0:32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
@@ -96,7 +96,7 @@ define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 %struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
 
 define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vld2lanei8:
+;CHECK-LABEL: vld2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
@@ -108,7 +108,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vld2lanei16:
+;CHECK-LABEL: vld2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
@@ -121,7 +121,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vld2lanei32:
+;CHECK-LABEL: vld2lanei32:
 ;CHECK: vld2.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -134,7 +134,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 ;Check for a post-increment updating load.
 define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
-;CHECK: vld2lanei32_update:
+;CHECK-LABEL: vld2lanei32_update:
 ;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
@@ -149,7 +149,7 @@ define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vld2lanef:
+;CHECK-LABEL: vld2lanef:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -161,7 +161,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vld2laneQi16:
+;CHECK-LABEL: vld2laneQi16:
 ;Check the (default) alignment.
 ;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
@@ -174,7 +174,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vld2laneQi32:
+;CHECK-LABEL: vld2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
@@ -187,7 +187,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vld2laneQf:
+;CHECK-LABEL: vld2laneQf:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -217,7 +217,7 @@ declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x flo
 %struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
 
 define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vld3lanei8:
+;CHECK-LABEL: vld3lanei8:
 ;CHECK: vld3.8
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
@@ -230,7 +230,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vld3lanei16:
+;CHECK-LABEL: vld3lanei16:
 ;Check the (default) alignment value.  VLD3 does not support alignment.
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
@@ -245,7 +245,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vld3lanei32:
+;CHECK-LABEL: vld3lanei32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -259,7 +259,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vld3lanef:
+;CHECK-LABEL: vld3lanef:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -273,7 +273,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vld3laneQi16:
+;CHECK-LABEL: vld3laneQi16:
 ;Check the (default) alignment value.  VLD3 does not support alignment.
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
@@ -289,7 +289,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 ;Check for a post-increment updating load with register increment.
 define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
-;CHECK: vld3laneQi16_update:
+;CHECK-LABEL: vld3laneQi16_update:
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
@@ -306,7 +306,7 @@ define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounw
 }
 
 define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vld3laneQi32:
+;CHECK-LABEL: vld3laneQi32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
@@ -320,7 +320,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vld3laneQf:
+;CHECK-LABEL: vld3laneQf:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -352,7 +352,7 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 %struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
 
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vld4lanei8:
+;CHECK-LABEL: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
 	%tmp1 = load <8 x i8>* %B
@@ -369,7 +369,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 ;Check for a post-increment updating load.
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
-;CHECK: vld4lanei8_update:
+;CHECK-LABEL: vld4lanei8_update:
 ;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
@@ -387,7 +387,7 @@ define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vld4lanei16:
+;CHECK-LABEL: vld4lanei16:
 ;Check that a power-of-two alignment smaller than the total size of the memory
 ;being loaded is ignored.
 ;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
@@ -405,7 +405,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vld4lanei32:
+;CHECK-LABEL: vld4lanei32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
 ;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
@@ -423,7 +423,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vld4lanef:
+;CHECK-LABEL: vld4lanef:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -439,7 +439,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vld4laneQi16:
+;CHECK-LABEL: vld4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i16* %A to i8*
@@ -456,7 +456,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vld4laneQi32:
+;CHECK-LABEL: vld4laneQi32:
 ;Check the (default) alignment.
 ;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i32* %A to i8*
@@ -473,7 +473,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vld4laneQf:
+;CHECK-LABEL: vld4laneQf:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -502,7 +502,7 @@ declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x flo
 ; we don't currently have a QQQQ_VFP2 super-regclass.  (The "0" for the low
 ; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
 define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
-;CHECK: test_qqqq_regsequence_subreg
+;CHECK-LABEL: test_qqqq_regsequence_subreg:
 ;CHECK: vld3.16
   %tmp63 = extractvalue [6 x i64] %b, 5
   %tmp64 = zext i64 %tmp63 to i128
diff --git a/test/CodeGen/ARM/vldm-liveness.ll b/test/CodeGen/ARM/vldm-liveness.ll
new file mode 100644
index 000000000000..751f447077be
--- /dev/null
+++ b/test/CodeGen/ARM/vldm-liveness.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
+
+; ARM load store optimizer was dealing with a sequence like:
+;     s1 = VLDRS [r0, 1], Q0<imp-def>
+;     s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
+;     s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
+;     s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
+;
+; It decided to combine the {s0, s1} loads into a single instruction in the
+; third position. However, this leaves the instruction defining s3 with a stray
+; imp-use of Q0, which is undefined.
+;
+; The verifier catches this, so this test just makes sure that appropriate
+; liveness flags are added.
+;
+; I believe the change will be tested as long as the vldmia is not the first of
+; the loads. Earlier optimisations may perturb the output over time, but
+; fiddling the indices should be sufficient to restore the test.
+
+define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
+; CHECK-LABEL: foo:
+; CHECK: vldr s3, [r0, #8]
+; CHECK: vldmia r0, {s0, s1}
+; CHECK: vldr s2, [r0, #16]
+   %off0 = getelementptr float* %ptr, i32 0
+   %val0 = load float* %off0
+   %off1 = getelementptr float* %ptr, i32 1
+   %val1 = load float* %off1
+   %off4 = getelementptr float* %ptr, i32 4
+   %val4 = load float* %off4
+   %off2 = getelementptr float* %ptr, i32 2
+   %val2 = load float* %off2
+
+   %vec1 = insertelement <4 x float> undef, float %val0, i32 0
+   %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
+   %vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
+   %vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
+
+   ret <4 x float> %vec4
+}
diff --git a/test/CodeGen/ARM/vldm-sched-a9.ll b/test/CodeGen/ARM/vldm-sched-a9.ll
new file mode 100644
index 000000000000..d0a9ac6d2b56
--- /dev/null
+++ b/test/CodeGen/ARM/vldm-sched-a9.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm -mtriple=armv7-linux-gnueabihf -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32-S64"
+
+; This test will generate spills/fills using vldmia instructions that access 64 bytes of memory.
+; Check that we don't crash when we generate these instructions on Cortex-A9.
+
+; CHECK: test:
+; CHECK: vstmia
+; CHECK: vldmia
+define void @test(i64* %src) #0 {
+entry:
+  %arrayidx39 = getelementptr inbounds i64* %src, i32 13
+  %vecinit285 = shufflevector <16 x i64> undef, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit285, <16 x i64>* undef, align 128
+  %0 = load i64* undef, align 8
+  %vecinit379 = insertelement <16 x i64> undef, i64 %0, i32 9
+  %1 = load i64* undef, align 8
+  %vecinit419 = insertelement <16 x i64> undef, i64 %1, i32 15
+  store <16 x i64> %vecinit419, <16 x i64>* undef, align 128
+  %vecinit579 = insertelement <16 x i64> undef, i64 0, i32 4
+  %vecinit582 = shufflevector <16 x i64> %vecinit579, <16 x i64> <i64 6, i64 7, i64 8, i64 9, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit584 = insertelement <16 x i64> %vecinit582, i64 undef, i32 9
+  %vecinit586 = insertelement <16 x i64> %vecinit584, i64 0, i32 10
+  %vecinit589 = shufflevector <16 x i64> %vecinit586, <16 x i64> <i64 12, i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 18, i32 19, i32 undef>
+  %2 = load i64* undef, align 8
+  %vecinit591 = insertelement <16 x i64> %vecinit589, i64 %2, i32 15
+  store <16 x i64> %vecinit591, <16 x i64>* undef, align 128
+  %vecinit694 = shufflevector <16 x i64> undef, <16 x i64> <i64 13, i64 14, i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+  store <16 x i64> %vecinit694, <16 x i64>* undef, align 128
+  %3 = load i64* undef, align 8
+  %vecinit1331 = insertelement <16 x i64> undef, i64 %3, i32 14
+  %4 = load i64* undef, align 8
+  %vecinit1468 = insertelement <16 x i64> undef, i64 %4, i32 11
+  %vecinit1471 = shufflevector <16 x i64> %vecinit1468, <16 x i64> <i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 undef, i32 undef>
+  %vecinit1474 = shufflevector <16 x i64> %vecinit1471, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit1474, <16 x i64>* undef, align 128
+  %vecinit1552 = shufflevector <16 x i64> undef, <16 x i64> <i64 10, i64 11, i64 12, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1555 = shufflevector <16 x i64> %vecinit1552, <16 x i64> <i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 undef, i32 undef>
+  %vecinit1558 = shufflevector <16 x i64> %vecinit1555, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit1558, <16 x i64>* undef, align 128
+  %vecinit1591 = shufflevector <16 x i64> undef, <16 x i64> <i64 3, i64 4, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1594 = shufflevector <16 x i64> %vecinit1591, <16 x i64> <i64 5, i64 6, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1597 = shufflevector <16 x i64> %vecinit1594, <16 x i64> <i64 7, i64 8, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1599 = insertelement <16 x i64> %vecinit1597, i64 undef, i32 8
+  %vecinit1601 = insertelement <16 x i64> %vecinit1599, i64 undef, i32 9
+  %vecinit1603 = insertelement <16 x i64> %vecinit1601, i64 undef, i32 10
+  %5 = load i64* undef, align 8
+  %vecinit1605 = insertelement <16 x i64> %vecinit1603, i64 %5, i32 11
+  %vecinit1608 = shufflevector <16 x i64> %vecinit1605, <16 x i64> <i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 undef>
+  %6 = load i64* undef, align 8
+  %vecinit1610 = insertelement <16 x i64> %vecinit1608, i64 %6, i32 15
+  store <16 x i64> %vecinit1610, <16 x i64>* undef, align 128
+  %vecinit2226 = shufflevector <16 x i64> undef, <16 x i64> <i64 6, i64 7, i64 8, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %7 = load i64* undef, align 8
+  %vecinit2228 = insertelement <16 x i64> %vecinit2226, i64 %7, i32 8
+  %vecinit2230 = insertelement <16 x i64> %vecinit2228, i64 undef, i32 9
+  %vecinit2233 = shufflevector <16 x i64> %vecinit2230, <16 x i64> <i64 11, i64 12, i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef>
+  %vecinit2236 = shufflevector <16 x i64> %vecinit2233, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit2236, <16 x i64>* undef, align 128
+  %vecinit2246 = shufflevector <16 x i64> undef, <16 x i64> <i64 4, i64 5, i64 6, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit2249 = shufflevector <16 x i64> %vecinit2246, <16 x i64> <i64 7, i64 8, i64 9, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit2252 = shufflevector <16 x i64> %vecinit2249, <16 x i64> <i64 10, i64 11, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit2255 = shufflevector <16 x i64> %vecinit2252, <16 x i64> <i64 12, i64 13, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 undef, i32 undef, i32 undef>
+  %8 = load i64* %arrayidx39, align 8
+  %vecinit2257 = insertelement <16 x i64> %vecinit2255, i64 %8, i32 13
+  %vecinit2260 = shufflevector <16 x i64> %vecinit2257, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+  store <16 x i64> %vecinit2260, <16 x i64>* null, align 128
+  ret void
+}
+attributes #0 = { noredzone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll
index e3527c1a4d9b..81f45782a96f 100644
--- a/test/CodeGen/ARM/vminmax.ll
+++ b/test/CodeGen/ARM/vminmax.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmins8:
+;CHECK-LABEL: vmins8:
 ;CHECK: vmin.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmins16:
+;CHECK-LABEL: vmins16:
 ;CHECK: vmin.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmins32:
+;CHECK-LABEL: vmins32:
 ;CHECK: vmin.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vminu8:
+;CHECK-LABEL: vminu8:
 ;CHECK: vmin.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -37,7 +37,7 @@ define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vminu16:
+;CHECK-LABEL: vminu16:
 ;CHECK: vmin.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -46,7 +46,7 @@ define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vminu32:
+;CHECK-LABEL: vminu32:
 ;CHECK: vmin.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -55,7 +55,7 @@ define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vminf32:
+;CHECK-LABEL: vminf32:
 ;CHECK: vmin.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -64,7 +64,7 @@ define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vminQs8:
+;CHECK-LABEL: vminQs8:
 ;CHECK: vmin.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -73,7 +73,7 @@ define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vminQs16:
+;CHECK-LABEL: vminQs16:
 ;CHECK: vmin.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -82,7 +82,7 @@ define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vminQs32:
+;CHECK-LABEL: vminQs32:
 ;CHECK: vmin.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -91,7 +91,7 @@ define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vminQu8:
+;CHECK-LABEL: vminQu8:
 ;CHECK: vmin.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -100,7 +100,7 @@ define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vminQu16:
+;CHECK-LABEL: vminQu16:
 ;CHECK: vmin.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -109,7 +109,7 @@ define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vminQu32:
+;CHECK-LABEL: vminQu32:
 ;CHECK: vmin.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -118,7 +118,7 @@ define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vminQf32:
+;CHECK-LABEL: vminQf32:
 ;CHECK: vmin.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -147,7 +147,7 @@ declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind read
 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmaxs8:
+;CHECK-LABEL: vmaxs8:
 ;CHECK: vmax.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -156,7 +156,7 @@ define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmaxs16:
+;CHECK-LABEL: vmaxs16:
 ;CHECK: vmax.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -165,7 +165,7 @@ define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmaxs32:
+;CHECK-LABEL: vmaxs32:
 ;CHECK: vmax.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -174,7 +174,7 @@ define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmaxu8:
+;CHECK-LABEL: vmaxu8:
 ;CHECK: vmax.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -183,7 +183,7 @@ define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmaxu16:
+;CHECK-LABEL: vmaxu16:
 ;CHECK: vmax.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -192,7 +192,7 @@ define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmaxu32:
+;CHECK-LABEL: vmaxu32:
 ;CHECK: vmax.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -201,7 +201,7 @@ define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vmaxf32:
+;CHECK-LABEL: vmaxf32:
 ;CHECK: vmax.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -210,7 +210,7 @@ define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vmaxQs8:
+;CHECK-LABEL: vmaxQs8:
 ;CHECK: vmax.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -219,7 +219,7 @@ define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vmaxQs16:
+;CHECK-LABEL: vmaxQs16:
 ;CHECK: vmax.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -228,7 +228,7 @@ define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vmaxQs32:
+;CHECK-LABEL: vmaxQs32:
 ;CHECK: vmax.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -237,7 +237,7 @@ define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vmaxQu8:
+;CHECK-LABEL: vmaxQu8:
 ;CHECK: vmax.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -246,7 +246,7 @@ define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vmaxQu16:
+;CHECK-LABEL: vmaxQu16:
 ;CHECK: vmax.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -255,7 +255,7 @@ define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vmaxQu32:
+;CHECK-LABEL: vmaxQu32:
 ;CHECK: vmax.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -264,7 +264,7 @@ define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vmaxQf32:
+;CHECK-LABEL: vmaxQf32:
 ;CHECK: vmax.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll
new file mode 100644
index 000000000000..f6ce64c54a39
--- /dev/null
+++ b/test/CodeGen/ARM/vminmaxnm.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
+
+define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
+; CHECK: vmaxnmq
+; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+  ret <4 x float> %tmp3
+}
+
+define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK: vmaxnmd
+; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+  ret <2 x float> %tmp3
+}
+
+define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
+; CHECK: vminnmq
+; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+  ret <4 x float> %tmp3
+}
+
+define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK: vminnmd
+; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+  ret <2 x float> %tmp3
+}
+
+define float @fp-armv8_vminnm_o(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vminnm_o
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK: fp-armv8_vminnm_o
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp olt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vminnm_u(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vminnm_u
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK: fp-armv8_vminnm_u
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ult float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_o(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vmaxnm_o
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK: fp-armv8_vmaxnm_o
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_u(float %a, float %b) {
+; CHECK-FAST: fp-armv8_vmaxnm_u
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK: fp-armv8_vmaxnm_u
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ugt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+
+declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index 9c6b210be797..caf655609c2b 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
-;CHECK: vmlai8:
+;CHECK-LABEL: vmlai8:
 ;CHECK: vmla.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -12,7 +12,7 @@ define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlai16:
+;CHECK-LABEL: vmlai16:
 ;CHECK: vmla.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -23,7 +23,7 @@ define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlai32:
+;CHECK-LABEL: vmlai32:
 ;CHECK: vmla.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -34,7 +34,7 @@ define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
-;CHECK: vmlaf32:
+;CHECK-LABEL: vmlaf32:
 ;CHECK: vmla.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -45,7 +45,7 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
-;CHECK: vmlaQi8:
+;CHECK-LABEL: vmlaQi8:
 ;CHECK: vmla.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -56,7 +56,7 @@ define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK: vmlaQi16:
+;CHECK-LABEL: vmlaQi16:
 ;CHECK: vmla.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -67,7 +67,7 @@ define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK: vmlaQi32:
+;CHECK-LABEL: vmlaQi32:
 ;CHECK: vmla.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -78,7 +78,7 @@ define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
-;CHECK: vmlaQf32:
+;CHECK-LABEL: vmlaQf32:
 ;CHECK: vmla.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -89,7 +89,7 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 }
 
 define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlals8:
+;CHECK-LABEL: vmlals8:
 ;CHECK: vmlal.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -102,7 +102,7 @@ define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlals16:
+;CHECK-LABEL: vmlals16:
 ;CHECK: vmlal.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -115,7 +115,7 @@ define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlals32:
+;CHECK-LABEL: vmlals32:
 ;CHECK: vmlal.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -128,7 +128,7 @@ define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlalu8:
+;CHECK-LABEL: vmlalu8:
 ;CHECK: vmlal.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -141,7 +141,7 @@ define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlalu16:
+;CHECK-LABEL: vmlalu16:
 ;CHECK: vmlal.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -154,7 +154,7 @@ define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlalu32:
+;CHECK-LABEL: vmlalu32:
 ;CHECK: vmlal.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index 65e7fe41bb3a..61f3424909e3 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
-;CHECK: vmlsi8:
+;CHECK-LABEL: vmlsi8:
 ;CHECK: vmls.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -12,7 +12,7 @@ define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 }
 
 define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlsi16:
+;CHECK-LABEL: vmlsi16:
 ;CHECK: vmls.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -23,7 +23,7 @@ define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlsi32:
+;CHECK-LABEL: vmlsi32:
 ;CHECK: vmls.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -34,7 +34,7 @@ define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
-;CHECK: vmlsf32:
+;CHECK-LABEL: vmlsf32:
 ;CHECK: vmls.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -45,7 +45,7 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 }
 
 define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
-;CHECK: vmlsQi8:
+;CHECK-LABEL: vmlsQi8:
 ;CHECK: vmls.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -56,7 +56,7 @@ define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 }
 
 define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK: vmlsQi16:
+;CHECK-LABEL: vmlsQi16:
 ;CHECK: vmls.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -67,7 +67,7 @@ define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 }
 
 define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK: vmlsQi32:
+;CHECK-LABEL: vmlsQi32:
 ;CHECK: vmls.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -78,7 +78,7 @@ define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 }
 
 define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
-;CHECK: vmlsQf32:
+;CHECK-LABEL: vmlsQf32:
 ;CHECK: vmls.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -89,7 +89,7 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 }
 
 define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlsls8:
+;CHECK-LABEL: vmlsls8:
 ;CHECK: vmlsl.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -102,7 +102,7 @@ define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlsls16:
+;CHECK-LABEL: vmlsls16:
 ;CHECK: vmlsl.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -115,7 +115,7 @@ define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlsls32:
+;CHECK-LABEL: vmlsls32:
 ;CHECK: vmlsl.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -128,7 +128,7 @@ define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlslu8:
+;CHECK-LABEL: vmlslu8:
 ;CHECK: vmlsl.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -141,7 +141,7 @@ define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlslu16:
+;CHECK-LABEL: vmlslu16:
 ;CHECK: vmlsl.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -154,7 +154,7 @@ define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlslu32:
+;CHECK-LABEL: vmlslu32:
 ;CHECK: vmlsl.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index 0c2387960b4e..8b63138bda81 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -1,169 +1,169 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_movi8() nounwind {
-;CHECK: v_movi8:
+;CHECK-LABEL: v_movi8:
 ;CHECK: vmov.i8 d{{.*}}, #0x8
 	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <4 x i16> @v_movi16a() nounwind {
-;CHECK: v_movi16a:
+;CHECK-LABEL: v_movi16a:
 ;CHECK: vmov.i16 d{{.*}}, #0x10
 	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
 }
 
 define <4 x i16> @v_movi16b() nounwind {
-;CHECK: v_movi16b:
+;CHECK-LABEL: v_movi16b:
 ;CHECK: vmov.i16 d{{.*}}, #0x1000
 	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i16> @v_mvni16a() nounwind {
-;CHECK: v_mvni16a:
+;CHECK-LABEL: v_mvni16a:
 ;CHECK: vmvn.i16 d{{.*}}, #0x10
 	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
 }
 
 define <4 x i16> @v_mvni16b() nounwind {
-;CHECK: v_mvni16b:
+;CHECK-LABEL: v_mvni16b:
 ;CHECK: vmvn.i16 d{{.*}}, #0x1000
 	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
 }
 
 define <2 x i32> @v_movi32a() nounwind {
-;CHECK: v_movi32a:
+;CHECK-LABEL: v_movi32a:
 ;CHECK: vmov.i32 d{{.*}}, #0x20
 	ret <2 x i32> < i32 32, i32 32 >
 }
 
 define <2 x i32> @v_movi32b() nounwind {
-;CHECK: v_movi32b:
+;CHECK-LABEL: v_movi32b:
 ;CHECK: vmov.i32 d{{.*}}, #0x2000
 	ret <2 x i32> < i32 8192, i32 8192 >
 }
 
 define <2 x i32> @v_movi32c() nounwind {
-;CHECK: v_movi32c:
+;CHECK-LABEL: v_movi32c:
 ;CHECK: vmov.i32 d{{.*}}, #0x200000
 	ret <2 x i32> < i32 2097152, i32 2097152 >
 }
 
 define <2 x i32> @v_movi32d() nounwind {
-;CHECK: v_movi32d:
+;CHECK-LABEL: v_movi32d:
 ;CHECK: vmov.i32 d{{.*}}, #0x20000000
 	ret <2 x i32> < i32 536870912, i32 536870912 >
 }
 
 define <2 x i32> @v_movi32e() nounwind {
-;CHECK: v_movi32e:
+;CHECK-LABEL: v_movi32e:
 ;CHECK: vmov.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 define <2 x i32> @v_movi32f() nounwind {
-;CHECK: v_movi32f:
+;CHECK-LABEL: v_movi32f:
 ;CHECK: vmov.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
 define <2 x i32> @v_mvni32a() nounwind {
-;CHECK: v_mvni32a:
+;CHECK-LABEL: v_mvni32a:
 ;CHECK: vmvn.i32 d{{.*}}, #0x20
 	ret <2 x i32> < i32 4294967263, i32 4294967263 >
 }
 
 define <2 x i32> @v_mvni32b() nounwind {
-;CHECK: v_mvni32b:
+;CHECK-LABEL: v_mvni32b:
 ;CHECK: vmvn.i32 d{{.*}}, #0x2000
 	ret <2 x i32> < i32 4294959103, i32 4294959103 >
 }
 
 define <2 x i32> @v_mvni32c() nounwind {
-;CHECK: v_mvni32c:
+;CHECK-LABEL: v_mvni32c:
 ;CHECK: vmvn.i32 d{{.*}}, #0x200000
 	ret <2 x i32> < i32 4292870143, i32 4292870143 >
 }
 
 define <2 x i32> @v_mvni32d() nounwind {
-;CHECK: v_mvni32d:
+;CHECK-LABEL: v_mvni32d:
 ;CHECK: vmvn.i32 d{{.*}}, #0x20000000
 	ret <2 x i32> < i32 3758096383, i32 3758096383 >
 }
 
 define <2 x i32> @v_mvni32e() nounwind {
-;CHECK: v_mvni32e:
+;CHECK-LABEL: v_mvni32e:
 ;CHECK: vmvn.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
 }
 
 define <2 x i32> @v_mvni32f() nounwind {
-;CHECK: v_mvni32f:
+;CHECK-LABEL: v_mvni32f:
 ;CHECK: vmvn.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
 }
 
 define <1 x i64> @v_movi64() nounwind {
-;CHECK: v_movi64:
+;CHECK-LABEL: v_movi64:
 ;CHECK: vmov.i64 d{{.*}}, #0xff0000ff0000ffff
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
 define <16 x i8> @v_movQi8() nounwind {
-;CHECK: v_movQi8:
+;CHECK-LABEL: v_movQi8:
 ;CHECK: vmov.i8 q{{.*}}, #0x8
 	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <8 x i16> @v_movQi16a() nounwind {
-;CHECK: v_movQi16a:
+;CHECK-LABEL: v_movQi16a:
 ;CHECK: vmov.i16 q{{.*}}, #0x10
 	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
 }
 
 define <8 x i16> @v_movQi16b() nounwind {
-;CHECK: v_movQi16b:
+;CHECK-LABEL: v_movQi16b:
 ;CHECK: vmov.i16 q{{.*}}, #0x1000
 	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
 }
 
 define <4 x i32> @v_movQi32a() nounwind {
-;CHECK: v_movQi32a:
+;CHECK-LABEL: v_movQi32a:
 ;CHECK: vmov.i32 q{{.*}}, #0x20
 	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
 }
 
 define <4 x i32> @v_movQi32b() nounwind {
-;CHECK: v_movQi32b:
+;CHECK-LABEL: v_movQi32b:
 ;CHECK: vmov.i32 q{{.*}}, #0x2000
 	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
 }
 
 define <4 x i32> @v_movQi32c() nounwind {
-;CHECK: v_movQi32c:
+;CHECK-LABEL: v_movQi32c:
 ;CHECK: vmov.i32 q{{.*}}, #0x200000
 	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
 }
 
 define <4 x i32> @v_movQi32d() nounwind {
-;CHECK: v_movQi32d:
+;CHECK-LABEL: v_movQi32d:
 ;CHECK: vmov.i32 q{{.*}}, #0x20000000
 	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
 }
 
 define <4 x i32> @v_movQi32e() nounwind {
-;CHECK: v_movQi32e:
+;CHECK-LABEL: v_movQi32e:
 ;CHECK: vmov.i32 q{{.*}}, #0x20ff
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 define <4 x i32> @v_movQi32f() nounwind {
-;CHECK: v_movQi32f:
+;CHECK-LABEL: v_movQi32f:
 ;CHECK: vmov.i32 q{{.*}}, #0x20ffff
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 define <2 x i64> @v_movQi64() nounwind {
-;CHECK: v_movQi64:
+;CHECK-LABEL: v_movQi64:
 ;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
@@ -172,7 +172,7 @@ define <2 x i64> @v_movQi64() nounwind {
 %struct.int8x8_t = type { <8 x i8> }
 define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
-;CHECK: vdupn128:
+;CHECK-LABEL: vdupn128:
 ;CHECK: vmov.i8 d{{.*}}, #0x80
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
@@ -181,7 +181,7 @@ entry:
 
 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
-;CHECK: vdupnneg75:
+;CHECK-LABEL: vdupnneg75:
 ;CHECK: vmov.i8 d{{.*}}, #0xb5
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
@@ -189,7 +189,7 @@ entry:
 }
 
 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
-;CHECK: vmovls8:
+;CHECK-LABEL: vmovls8:
 ;CHECK: vmovl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
@@ -197,7 +197,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
-;CHECK: vmovls16:
+;CHECK-LABEL: vmovls16:
 ;CHECK: vmovl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
@@ -205,7 +205,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
-;CHECK: vmovls32:
+;CHECK-LABEL: vmovls32:
 ;CHECK: vmovl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
@@ -213,7 +213,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
 }
 
 define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
-;CHECK: vmovlu8:
+;CHECK-LABEL: vmovlu8:
 ;CHECK: vmovl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
@@ -221,7 +221,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
-;CHECK: vmovlu16:
+;CHECK-LABEL: vmovlu16:
 ;CHECK: vmovl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
@@ -229,7 +229,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
-;CHECK: vmovlu32:
+;CHECK-LABEL: vmovlu32:
 ;CHECK: vmovl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
@@ -237,7 +237,7 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
 }
 
 define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
-;CHECK: vmovni16:
+;CHECK-LABEL: vmovni16:
 ;CHECK: vmovn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
@@ -245,7 +245,7 @@ define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
-;CHECK: vmovni32:
+;CHECK-LABEL: vmovni32:
 ;CHECK: vmovn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
@@ -253,7 +253,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
-;CHECK: vmovni64:
+;CHECK-LABEL: vmovni64:
 ;CHECK: vmovn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
@@ -261,7 +261,7 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
-;CHECK: vqmovns16:
+;CHECK-LABEL: vqmovns16:
 ;CHECK: vqmovn.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
@@ -269,7 +269,7 @@ define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
-;CHECK: vqmovns32:
+;CHECK-LABEL: vqmovns32:
 ;CHECK: vqmovn.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
@@ -277,7 +277,7 @@ define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
-;CHECK: vqmovns64:
+;CHECK-LABEL: vqmovns64:
 ;CHECK: vqmovn.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
@@ -285,7 +285,7 @@ define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
-;CHECK: vqmovnu16:
+;CHECK-LABEL: vqmovnu16:
 ;CHECK: vqmovn.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
@@ -293,7 +293,7 @@ define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
-;CHECK: vqmovnu32:
+;CHECK-LABEL: vqmovnu32:
 ;CHECK: vqmovn.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
@@ -301,7 +301,7 @@ define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
-;CHECK: vqmovnu64:
+;CHECK-LABEL: vqmovnu64:
 ;CHECK: vqmovn.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
@@ -309,7 +309,7 @@ define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
-;CHECK: vqmovuns16:
+;CHECK-LABEL: vqmovuns16:
 ;CHECK: vqmovun.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
@@ -317,7 +317,7 @@ define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
-;CHECK: vqmovuns32:
+;CHECK-LABEL: vqmovuns32:
 ;CHECK: vqmovun.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
@@ -325,7 +325,7 @@ define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
-;CHECK: vqmovuns64:
+;CHECK-LABEL: vqmovuns64:
 ;CHECK: vqmovun.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
@@ -358,7 +358,7 @@ define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
 ; rdar://10437054
 define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
 entry:
-;CHECK: v_mov_v2f32:
+;CHECK-LABEL: v_mov_v2f32:
 ;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01
   store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
   ret void
@@ -366,7 +366,7 @@ entry:
 
 define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
 entry:
-;CHECK: v_mov_v4f32:
+;CHECK-LABEL: v_mov_v4f32:
 ;CHECK: vmov.f32 q{{.*}}, #3.100000e+01
   store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
   ret void
@@ -374,7 +374,7 @@ entry:
 
 define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
 entry:
-;CHECK: v_mov_v4f32_undef:
+;CHECK-LABEL: v_mov_v4f32_undef:
 ;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
   %a = load <4 x float> *%p
   %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
@@ -386,7 +386,7 @@ entry:
 ; rdar://10723651
 define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
 entry:
-;CHECK: any_extend
+;CHECK-LABEL: any_extend:
 ;CHECK: vmovl
   %and.i186 = zext <4 x i1> %x to <4 x i32>
   %add.i185 = sub <4 x i32> %and.i186, %y
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index eb5ad8f0c3d0..de329acdf3c7 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmuli8:
+;CHECK-LABEL: vmuli8:
 ;CHECK: vmul.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmuli16:
+;CHECK-LABEL: vmuli16:
 ;CHECK: vmul.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmuli32:
+;CHECK-LABEL: vmuli32:
 ;CHECK: vmul.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vmulf32:
+;CHECK-LABEL: vmulf32:
 ;CHECK: vmul.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -37,7 +37,7 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmulp8:
+;CHECK-LABEL: vmulp8:
 ;CHECK: vmul.p8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -46,7 +46,7 @@ define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vmulQi8:
+;CHECK-LABEL: vmulQi8:
 ;CHECK: vmul.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -55,7 +55,7 @@ define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vmulQi16:
+;CHECK-LABEL: vmulQi16:
 ;CHECK: vmul.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -64,7 +64,7 @@ define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vmulQi32:
+;CHECK-LABEL: vmulQi32:
 ;CHECK: vmul.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -73,7 +73,7 @@ define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vmulQf32:
+;CHECK-LABEL: vmulQf32:
 ;CHECK: vmul.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -82,7 +82,7 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vmulQp8:
+;CHECK-LABEL: vmulQp8:
 ;CHECK: vmul.p8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -95,7 +95,7 @@ declare <16 x i8>  @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind rea
 
 define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vmul_lanef32:
+; CHECK-LABEL: test_vmul_lanef32:
 ; CHECK: vmul.f32 d0, d0, d1[0]
   %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
   %1 = fmul <2 x float> %0, %arg0_float32x2_t     ; <<2 x float>> [#uses=1]
@@ -104,7 +104,7 @@ entry:
 
 define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
 entry:
-; CHECK: test_vmul_lanes16:
+; CHECK-LABEL: test_vmul_lanes16:
 ; CHECK: vmul.i16 d0, d0, d1[1]
   %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
   %1 = mul <4 x i16> %0, %arg0_int16x4_t          ; <<4 x i16>> [#uses=1]
@@ -113,7 +113,7 @@ entry:
 
 define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vmul_lanes32:
+; CHECK-LABEL: test_vmul_lanes32:
 ; CHECK: vmul.i32 d0, d0, d1[1]
   %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
   %1 = mul <2 x i32> %0, %arg0_int32x2_t          ; <<2 x i32>> [#uses=1]
@@ -122,7 +122,7 @@ entry:
 
 define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vmulQ_lanef32:
+; CHECK-LABEL: test_vmulQ_lanef32:
 ; CHECK: vmul.f32 q0, q0, d2[1]
   %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
   %1 = fmul <4 x float> %0, %arg0_float32x4_t     ; <<4 x float>> [#uses=1]
@@ -131,7 +131,7 @@ entry:
 
 define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
 entry:
-; CHECK: test_vmulQ_lanes16:
+; CHECK-LABEL: test_vmulQ_lanes16:
 ; CHECK: vmul.i16 q0, q0, d2[1]
   %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %1 = mul <8 x i16> %0, %arg0_int16x8_t          ; <<8 x i16>> [#uses=1]
@@ -140,7 +140,7 @@ entry:
 
 define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vmulQ_lanes32:
+; CHECK-LABEL: test_vmulQ_lanes32:
 ; CHECK: vmul.i32 q0, q0, d2[1]
   %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
   %1 = mul <4 x i32> %0, %arg0_int32x4_t          ; <<4 x i32>> [#uses=1]
@@ -148,7 +148,7 @@ entry:
 }
 
 define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmulls8:
+;CHECK-LABEL: vmulls8:
 ;CHECK: vmull.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -159,7 +159,7 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmulls8_int:
+;CHECK-LABEL: vmulls8_int:
 ;CHECK: vmull.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -168,7 +168,7 @@ define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmulls16:
+;CHECK-LABEL: vmulls16:
 ;CHECK: vmull.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -179,7 +179,7 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmulls16_int:
+;CHECK-LABEL: vmulls16_int:
 ;CHECK: vmull.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -188,7 +188,7 @@ define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmulls32:
+;CHECK-LABEL: vmulls32:
 ;CHECK: vmull.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -199,7 +199,7 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmulls32_int:
+;CHECK-LABEL: vmulls32_int:
 ;CHECK: vmull.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -208,7 +208,7 @@ define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmullu8:
+;CHECK-LABEL: vmullu8:
 ;CHECK: vmull.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -219,7 +219,7 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmullu8_int:
+;CHECK-LABEL: vmullu8_int:
 ;CHECK: vmull.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -228,7 +228,7 @@ define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmullu16:
+;CHECK-LABEL: vmullu16:
 ;CHECK: vmull.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -239,7 +239,7 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmullu16_int:
+;CHECK-LABEL: vmullu16_int:
 ;CHECK: vmull.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -248,7 +248,7 @@ define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmullu32:
+;CHECK-LABEL: vmullu32:
 ;CHECK: vmull.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -259,7 +259,7 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmullu32_int:
+;CHECK-LABEL: vmullu32_int:
 ;CHECK: vmull.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -268,7 +268,7 @@ define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmullp8:
+;CHECK-LABEL: vmullp8:
 ;CHECK: vmull.p8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -441,7 +441,7 @@ define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
 ; rdar://9197392
 define void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind {
 entry:
-; CHECK: distribute:
+; CHECK-LABEL: distribute:
 ; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]]
 ; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]]
   %0 = trunc i32 %mul to i8
@@ -515,6 +515,17 @@ entry:
   ret void
 }
 
+define <8 x i8> @no_distribute(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+; CHECK: no_distribute
+; CHECK: vadd.i8
+; CHECK: vmul.i8
+; CHECK-NOT: vmla.i8
+  %0 = add <8 x i8> %a, %b
+  %1 = mul <8x i8> %0, %0
+  ret <8 x i8> %1
+}
+
 ; If one operand has a zero-extend and the other a sign-extend, vmull
 ; cannot be used.
 define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
@@ -623,3 +634,21 @@ entry:
   store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
   ret void
 }
+
+define void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture readonly %src) nounwind {
+;   Look for doing a normal scalar FP load rather than an to-all-lanes load.
+;   e.g., "ldr s0, [r2]" rathern than "vld1.32  {d18[], d19[]}, [r2:32]"
+;   Then check that the vector multiply has folded the splat to all lanes
+;   and used a vector * scalar instruction.
+; CHECK: vldr  {{s[0-9]+}}, [r2]
+; CHECK: vmul.f32  q8, q8, d0[0]
+  %tmp = load float* %src, align 4
+  %tmp5 = load <4 x float>* %a, align 4
+  %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0
+  %tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2
+  %tmp9 = insertelement <4 x float> %tmp8, float %tmp, i32 3
+  %tmp10 = fmul <4 x float> %tmp9, %tmp5
+  store <4 x float> %tmp10, <4 x float>* %dst, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
index 4a10732458e8..1be4f748213a 100644
--- a/test/CodeGen/ARM/vneg.ll
+++ b/test/CodeGen/ARM/vneg.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
-;CHECK: vnegs8:
+;CHECK-LABEL: vnegs8:
 ;CHECK: vneg.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
@@ -9,7 +9,7 @@ define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
-;CHECK: vnegs16:
+;CHECK-LABEL: vnegs16:
 ;CHECK: vneg.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
@@ -17,7 +17,7 @@ define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
-;CHECK: vnegs32:
+;CHECK-LABEL: vnegs32:
 ;CHECK: vneg.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
@@ -25,7 +25,7 @@ define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
 }
 
 define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
-;CHECK: vnegf32:
+;CHECK-LABEL: vnegf32:
 ;CHECK: vneg.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = fsub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
@@ -33,7 +33,7 @@ define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
 }
 
 define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
-;CHECK: vnegQs8:
+;CHECK-LABEL: vnegQs8:
 ;CHECK: vneg.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
@@ -41,7 +41,7 @@ define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
-;CHECK: vnegQs16:
+;CHECK-LABEL: vnegQs16:
 ;CHECK: vneg.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
@@ -49,7 +49,7 @@ define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
-;CHECK: vnegQs32:
+;CHECK-LABEL: vnegQs32:
 ;CHECK: vneg.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
@@ -57,7 +57,7 @@ define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
 }
 
 define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
-;CHECK: vnegQf32:
+;CHECK-LABEL: vnegQf32:
 ;CHECK: vneg.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
@@ -65,7 +65,7 @@ define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
 }
 
 define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
-;CHECK: vqnegs8:
+;CHECK-LABEL: vqnegs8:
 ;CHECK: vqneg.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
@@ -73,7 +73,7 @@ define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
-;CHECK: vqnegs16:
+;CHECK-LABEL: vqnegs16:
 ;CHECK: vqneg.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
@@ -81,7 +81,7 @@ define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
-;CHECK: vqnegs32:
+;CHECK-LABEL: vqnegs32:
 ;CHECK: vqneg.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
@@ -89,7 +89,7 @@ define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
 }
 
 define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
-;CHECK: vqnegQs8:
+;CHECK-LABEL: vqnegQs8:
 ;CHECK: vqneg.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
@@ -97,7 +97,7 @@ define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
-;CHECK: vqnegQs16:
+;CHECK-LABEL: vqnegQs16:
 ;CHECK: vqneg.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
@@ -105,7 +105,7 @@ define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
-;CHECK: vqnegQs32:
+;CHECK-LABEL: vqnegQs32:
 ;CHECK: vqneg.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll
index 7296e936cd73..a616a8d270a7 100644
--- a/test/CodeGen/ARM/vpadal.ll
+++ b/test/CodeGen/ARM/vpadal.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpadals8:
+;CHECK-LABEL: vpadals8:
 ;CHECK: vpadal.s8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpadals16:
+;CHECK-LABEL: vpadals16:
 ;CHECK: vpadal.s16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpadals32:
+;CHECK-LABEL: vpadals32:
 ;CHECK: vpadal.s32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpadalu8:
+;CHECK-LABEL: vpadalu8:
 ;CHECK: vpadal.u8
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -37,7 +37,7 @@ define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpadalu16:
+;CHECK-LABEL: vpadalu16:
 ;CHECK: vpadal.u16
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -46,7 +46,7 @@ define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpadalu32:
+;CHECK-LABEL: vpadalu32:
 ;CHECK: vpadal.u32
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -55,7 +55,7 @@ define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vpadalQs8:
+;CHECK-LABEL: vpadalQs8:
 ;CHECK: vpadal.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -64,7 +64,7 @@ define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vpadalQs16:
+;CHECK-LABEL: vpadalQs16:
 ;CHECK: vpadal.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -73,7 +73,7 @@ define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vpadalQs32:
+;CHECK-LABEL: vpadalQs32:
 ;CHECK: vpadal.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -82,7 +82,7 @@ define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vpadalQu8:
+;CHECK-LABEL: vpadalQu8:
 ;CHECK: vpadal.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -91,7 +91,7 @@ define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vpadalQu16:
+;CHECK-LABEL: vpadalQu16:
 ;CHECK: vpadal.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -100,7 +100,7 @@ define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vpadalQu32:
+;CHECK-LABEL: vpadalQu32:
 ;CHECK: vpadal.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <4 x i32>* %B
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index 1ba68f552385..f84721f996cd 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpaddi8:
+;CHECK-LABEL: vpaddi8:
 ;CHECK: vpadd.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpaddi16:
+;CHECK-LABEL: vpaddi16:
 ;CHECK: vpadd.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpaddi32:
+;CHECK-LABEL: vpaddi32:
 ;CHECK: vpadd.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vpaddf32:
+;CHECK-LABEL: vpaddf32:
 ;CHECK: vpadd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -43,7 +43,7 @@ declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind read
 declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
-;CHECK: vpaddls8:
+;CHECK-LABEL: vpaddls8:
 ;CHECK: vpaddl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
@@ -51,7 +51,7 @@ define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
 }
 
 define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
-;CHECK: vpaddls16:
+;CHECK-LABEL: vpaddls16:
 ;CHECK: vpaddl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
@@ -59,7 +59,7 @@ define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
 }
 
 define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
-;CHECK: vpaddls32:
+;CHECK-LABEL: vpaddls32:
 ;CHECK: vpaddl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
@@ -67,7 +67,7 @@ define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
 }
 
 define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
-;CHECK: vpaddlu8:
+;CHECK-LABEL: vpaddlu8:
 ;CHECK: vpaddl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
@@ -75,7 +75,7 @@ define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
 }
 
 define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
-;CHECK: vpaddlu16:
+;CHECK-LABEL: vpaddlu16:
 ;CHECK: vpaddl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
@@ -83,7 +83,7 @@ define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
 }
 
 define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
-;CHECK: vpaddlu32:
+;CHECK-LABEL: vpaddlu32:
 ;CHECK: vpaddl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
@@ -91,7 +91,7 @@ define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
 }
 
 define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
-;CHECK: vpaddlQs8:
+;CHECK-LABEL: vpaddlQs8:
 ;CHECK: vpaddl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
@@ -99,7 +99,7 @@ define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
 }
 
 define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
-;CHECK: vpaddlQs16:
+;CHECK-LABEL: vpaddlQs16:
 ;CHECK: vpaddl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
@@ -107,7 +107,7 @@ define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
 }
 
 define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
-;CHECK: vpaddlQs32:
+;CHECK-LABEL: vpaddlQs32:
 ;CHECK: vpaddl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
@@ -115,7 +115,7 @@ define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
 }
 
 define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
-;CHECK: vpaddlQu8:
+;CHECK-LABEL: vpaddlQu8:
 ;CHECK: vpaddl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
@@ -123,7 +123,7 @@ define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
 }
 
 define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
-;CHECK: vpaddlQu16:
+;CHECK-LABEL: vpaddlQu16:
 ;CHECK: vpaddl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
@@ -131,7 +131,7 @@ define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
 }
 
 define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
-;CHECK: vpaddlQu32:
+;CHECK-LABEL: vpaddlQu32:
 ;CHECK: vpaddl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll
index b75bcc99f6b6..c68b3193c19a 100644
--- a/test/CodeGen/ARM/vpminmax.ll
+++ b/test/CodeGen/ARM/vpminmax.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpmins8:
+;CHECK-LABEL: vpmins8:
 ;CHECK: vpmin.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpmins16:
+;CHECK-LABEL: vpmins16:
 ;CHECK: vpmin.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpmins32:
+;CHECK-LABEL: vpmins32:
 ;CHECK: vpmin.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpminu8:
+;CHECK-LABEL: vpminu8:
 ;CHECK: vpmin.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -37,7 +37,7 @@ define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpminu16:
+;CHECK-LABEL: vpminu16:
 ;CHECK: vpmin.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -46,7 +46,7 @@ define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpminu32:
+;CHECK-LABEL: vpminu32:
 ;CHECK: vpmin.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -55,7 +55,7 @@ define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vpminf32:
+;CHECK-LABEL: vpminf32:
 ;CHECK: vpmin.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -74,7 +74,7 @@ declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind rea
 declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpmaxs8:
+;CHECK-LABEL: vpmaxs8:
 ;CHECK: vpmax.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -83,7 +83,7 @@ define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpmaxs16:
+;CHECK-LABEL: vpmaxs16:
 ;CHECK: vpmax.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -92,7 +92,7 @@ define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpmaxs32:
+;CHECK-LABEL: vpmaxs32:
 ;CHECK: vpmax.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -101,7 +101,7 @@ define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpmaxu8:
+;CHECK-LABEL: vpmaxu8:
 ;CHECK: vpmax.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -110,7 +110,7 @@ define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpmaxu16:
+;CHECK-LABEL: vpmaxu16:
 ;CHECK: vpmax.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -119,7 +119,7 @@ define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpmaxu32:
+;CHECK-LABEL: vpmaxu32:
 ;CHECK: vpmax.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -128,7 +128,7 @@ define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vpmaxf32:
+;CHECK-LABEL: vpmaxf32:
 ;CHECK: vpmax.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll
index a1669b60ab56..784076685462 100644
--- a/test/CodeGen/ARM/vqadd.ll
+++ b/test/CodeGen/ARM/vqadd.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqadds8:
+;CHECK-LABEL: vqadds8:
 ;CHECK: vqadd.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqadds16:
+;CHECK-LABEL: vqadds16:
 ;CHECK: vqadd.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqadds32:
+;CHECK-LABEL: vqadds32:
 ;CHECK: vqadd.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqadds64:
+;CHECK-LABEL: vqadds64:
 ;CHECK: vqadd.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqaddu8:
+;CHECK-LABEL: vqaddu8:
 ;CHECK: vqadd.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -46,7 +46,7 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqaddu16:
+;CHECK-LABEL: vqaddu16:
 ;CHECK: vqadd.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -55,7 +55,7 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqaddu32:
+;CHECK-LABEL: vqaddu32:
 ;CHECK: vqadd.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -64,7 +64,7 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqaddu64:
+;CHECK-LABEL: vqaddu64:
 ;CHECK: vqadd.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -73,7 +73,7 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqaddQs8:
+;CHECK-LABEL: vqaddQs8:
 ;CHECK: vqadd.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -82,7 +82,7 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqaddQs16:
+;CHECK-LABEL: vqaddQs16:
 ;CHECK: vqadd.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -91,7 +91,7 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqaddQs32:
+;CHECK-LABEL: vqaddQs32:
 ;CHECK: vqadd.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -100,7 +100,7 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqaddQs64:
+;CHECK-LABEL: vqaddQs64:
 ;CHECK: vqadd.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -109,7 +109,7 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqaddQu8:
+;CHECK-LABEL: vqaddQu8:
 ;CHECK: vqadd.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -118,7 +118,7 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqaddQu16:
+;CHECK-LABEL: vqaddQu16:
 ;CHECK: vqadd.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -127,7 +127,7 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqaddQu32:
+;CHECK-LABEL: vqaddQu32:
 ;CHECK: vqadd.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -136,7 +136,7 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqaddQu64:
+;CHECK-LABEL: vqaddQu64:
 ;CHECK: vqadd.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
index 08e7d2b2c0d4..d298167d3a91 100644
--- a/test/CodeGen/ARM/vqdmul.ll
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-elf"
 
 define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqdmulhs16:
+;CHECK-LABEL: vqdmulhs16:
 ;CHECK: vqdmulh.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -12,7 +12,7 @@ define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqdmulhs32:
+;CHECK-LABEL: vqdmulhs32:
 ;CHECK: vqdmulh.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -21,7 +21,7 @@ define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqdmulhQs16:
+;CHECK-LABEL: vqdmulhQs16:
 ;CHECK: vqdmulh.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -30,7 +30,7 @@ define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqdmulhQs32:
+;CHECK-LABEL: vqdmulhQs32:
 ;CHECK: vqdmulh.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -81,7 +81,7 @@ declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind re
 declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqrdmulhs16:
+;CHECK-LABEL: vqrdmulhs16:
 ;CHECK: vqrdmulh.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -90,7 +90,7 @@ define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqrdmulhs32:
+;CHECK-LABEL: vqrdmulhs32:
 ;CHECK: vqrdmulh.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -99,7 +99,7 @@ define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqrdmulhQs16:
+;CHECK-LABEL: vqrdmulhQs16:
 ;CHECK: vqrdmulh.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -108,7 +108,7 @@ define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqrdmulhQs32:
+;CHECK-LABEL: vqrdmulhQs32:
 ;CHECK: vqrdmulh.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -159,7 +159,7 @@ declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind r
 declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqdmulls16:
+;CHECK-LABEL: vqdmulls16:
 ;CHECK: vqdmull.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -168,7 +168,7 @@ define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqdmulls32:
+;CHECK-LABEL: vqdmulls32:
 ;CHECK: vqdmull.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -197,84 +197,92 @@ entry:
 declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
 
-define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vqdmlals16:
+define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK-LABEL: vqdmlals16_natural:
 ;CHECK: vqdmlal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
-	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
-	ret <4 x i32> %tmp4
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = load <4 x i16>* %C
+        %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
+        %tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
+        ret <4 x i32> %tmp5
 }
 
-define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vqdmlals32:
+define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK-LABEL: vqdmlals32_natural:
 ;CHECK: vqdmlal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
-	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
-	ret <2 x i64> %tmp4
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = load <2 x i32>* %C
+        %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
+        %tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
+        ret <2 x i64> %tmp5
 }
 
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16_natural(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlal_lanes16
+; CHECK-LABEL: test_vqdmlal_lanes16_natural:
 ; CHECK: vqdmlal.s16 q0, d2, d3[1]
   %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
-  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0)
+  %2 = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1)
+  ret <4 x i32> %2
 }
 
-define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32_natural(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlal_lanes32
+; CHECK-LABEL: test_vqdmlal_lanes32_natural:
 ; CHECK: vqdmlal.s32 q0, d2, d3[1]
   %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
-  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0)
+  %2 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1)
+  ret <2 x i64> %2
 }
 
-declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32>  @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
-define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vqdmlsls16:
+define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK-LABEL: vqdmlsls16_natural:
 ;CHECK: vqdmlsl.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
-	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
-	ret <4 x i32> %tmp4
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = load <4 x i16>* %C
+        %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
+        %tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
+        ret <4 x i32> %tmp5
 }
 
-define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vqdmlsls32:
+define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK-LABEL: vqdmlsls32_natural:
 ;CHECK: vqdmlsl.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
-	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
-	ret <2 x i64> %tmp4
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = load <2 x i32>* %C
+        %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
+        %tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
+        ret <2 x i64> %tmp5
 }
 
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16_natural(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlsl_lanes16
+; CHECK-LABEL: test_vqdmlsl_lanes16_natural:
 ; CHECK: vqdmlsl.s16 q0, d2, d3[1]
   %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
-  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0)
+  %2 = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1)
+  ret <4 x i32> %2
 }
 
-define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32_natural(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
 entry:
-; CHECK: test_vqdmlsl_lanes32
+; CHECK-LABEL: test_vqdmlsl_lanes32_natural:
 ; CHECK: vqdmlsl.s32 q0, d2, d3[1]
   %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
-  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0)
+  %2 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1)
+  ret <2 x i64> %2
 }
 
-declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32>  @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll
index e4d29a337cf0..b5cd71613d4a 100644
--- a/test/CodeGen/ARM/vqshl.ll
+++ b/test/CodeGen/ARM/vqshl.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqshls8:
+;CHECK-LABEL: vqshls8:
 ;CHECK: vqshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqshls16:
+;CHECK-LABEL: vqshls16:
 ;CHECK: vqshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqshls32:
+;CHECK-LABEL: vqshls32:
 ;CHECK: vqshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqshls64:
+;CHECK-LABEL: vqshls64:
 ;CHECK: vqshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqshlu8:
+;CHECK-LABEL: vqshlu8:
 ;CHECK: vqshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -46,7 +46,7 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqshlu16:
+;CHECK-LABEL: vqshlu16:
 ;CHECK: vqshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -55,7 +55,7 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqshlu32:
+;CHECK-LABEL: vqshlu32:
 ;CHECK: vqshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -64,7 +64,7 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqshlu64:
+;CHECK-LABEL: vqshlu64:
 ;CHECK: vqshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -73,7 +73,7 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqshlQs8:
+;CHECK-LABEL: vqshlQs8:
 ;CHECK: vqshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -82,7 +82,7 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqshlQs16:
+;CHECK-LABEL: vqshlQs16:
 ;CHECK: vqshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -91,7 +91,7 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqshlQs32:
+;CHECK-LABEL: vqshlQs32:
 ;CHECK: vqshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -100,7 +100,7 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqshlQs64:
+;CHECK-LABEL: vqshlQs64:
 ;CHECK: vqshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -109,7 +109,7 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqshlQu8:
+;CHECK-LABEL: vqshlQu8:
 ;CHECK: vqshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -118,7 +118,7 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqshlQu16:
+;CHECK-LABEL: vqshlQu16:
 ;CHECK: vqshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -127,7 +127,7 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqshlQu32:
+;CHECK-LABEL: vqshlQu32:
 ;CHECK: vqshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -136,7 +136,7 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqshlQu64:
+;CHECK-LABEL: vqshlQu64:
 ;CHECK: vqshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -145,7 +145,7 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
-;CHECK: vqshls_n8:
+;CHECK-LABEL: vqshls_n8:
 ;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -153,7 +153,7 @@ define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
-;CHECK: vqshls_n16:
+;CHECK-LABEL: vqshls_n16:
 ;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -161,7 +161,7 @@ define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
-;CHECK: vqshls_n32:
+;CHECK-LABEL: vqshls_n32:
 ;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
@@ -169,7 +169,7 @@ define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
-;CHECK: vqshls_n64:
+;CHECK-LABEL: vqshls_n64:
 ;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
@@ -177,7 +177,7 @@ define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
-;CHECK: vqshlu_n8:
+;CHECK-LABEL: vqshlu_n8:
 ;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -185,7 +185,7 @@ define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
-;CHECK: vqshlu_n16:
+;CHECK-LABEL: vqshlu_n16:
 ;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -193,7 +193,7 @@ define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
-;CHECK: vqshlu_n32:
+;CHECK-LABEL: vqshlu_n32:
 ;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
@@ -201,7 +201,7 @@ define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
-;CHECK: vqshlu_n64:
+;CHECK-LABEL: vqshlu_n64:
 ;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
@@ -209,7 +209,7 @@ define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
-;CHECK: vqshlsu_n8:
+;CHECK-LABEL: vqshlsu_n8:
 ;CHECK: vqshlu.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -217,7 +217,7 @@ define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
-;CHECK: vqshlsu_n16:
+;CHECK-LABEL: vqshlsu_n16:
 ;CHECK: vqshlu.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -225,7 +225,7 @@ define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
-;CHECK: vqshlsu_n32:
+;CHECK-LABEL: vqshlsu_n32:
 ;CHECK: vqshlu.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
@@ -233,7 +233,7 @@ define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
-;CHECK: vqshlsu_n64:
+;CHECK-LABEL: vqshlsu_n64:
 ;CHECK: vqshlu.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
@@ -241,7 +241,7 @@ define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
-;CHECK: vqshlQs_n8:
+;CHECK-LABEL: vqshlQs_n8:
 ;CHECK: vqshl.s8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -249,7 +249,7 @@ define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
-;CHECK: vqshlQs_n16:
+;CHECK-LABEL: vqshlQs_n16:
 ;CHECK: vqshl.s16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
@@ -257,7 +257,7 @@ define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
-;CHECK: vqshlQs_n32:
+;CHECK-LABEL: vqshlQs_n32:
 ;CHECK: vqshl.s32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
@@ -265,7 +265,7 @@ define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
-;CHECK: vqshlQs_n64:
+;CHECK-LABEL: vqshlQs_n64:
 ;CHECK: vqshl.s64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
@@ -273,7 +273,7 @@ define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
-;CHECK: vqshlQu_n8:
+;CHECK-LABEL: vqshlQu_n8:
 ;CHECK: vqshl.u8{{.*#7}}
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -281,7 +281,7 @@ define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
-;CHECK: vqshlQu_n16:
+;CHECK-LABEL: vqshlQu_n16:
 ;CHECK: vqshl.u16{{.*#15}}
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
@@ -289,7 +289,7 @@ define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
-;CHECK: vqshlQu_n32:
+;CHECK-LABEL: vqshlQu_n32:
 ;CHECK: vqshl.u32{{.*#31}}
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
@@ -297,7 +297,7 @@ define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
-;CHECK: vqshlQu_n64:
+;CHECK-LABEL: vqshlQu_n64:
 ;CHECK: vqshl.u64{{.*#63}}
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
@@ -305,7 +305,7 @@ define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
-;CHECK: vqshlQsu_n8:
+;CHECK-LABEL: vqshlQsu_n8:
 ;CHECK: vqshlu.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -313,7 +313,7 @@ define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
-;CHECK: vqshlQsu_n16:
+;CHECK-LABEL: vqshlQsu_n16:
 ;CHECK: vqshlu.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
@@ -321,7 +321,7 @@ define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
-;CHECK: vqshlQsu_n32:
+;CHECK-LABEL: vqshlQsu_n32:
 ;CHECK: vqshlu.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
@@ -329,7 +329,7 @@ define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
-;CHECK: vqshlQsu_n64:
+;CHECK-LABEL: vqshlQsu_n64:
 ;CHECK: vqshlu.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
@@ -367,7 +367,7 @@ declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind
 declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqrshls8:
+;CHECK-LABEL: vqrshls8:
 ;CHECK: vqrshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -376,7 +376,7 @@ define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqrshls16:
+;CHECK-LABEL: vqrshls16:
 ;CHECK: vqrshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -385,7 +385,7 @@ define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqrshls32:
+;CHECK-LABEL: vqrshls32:
 ;CHECK: vqrshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -394,7 +394,7 @@ define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqrshls64:
+;CHECK-LABEL: vqrshls64:
 ;CHECK: vqrshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -403,7 +403,7 @@ define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqrshlu8:
+;CHECK-LABEL: vqrshlu8:
 ;CHECK: vqrshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -412,7 +412,7 @@ define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqrshlu16:
+;CHECK-LABEL: vqrshlu16:
 ;CHECK: vqrshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -421,7 +421,7 @@ define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqrshlu32:
+;CHECK-LABEL: vqrshlu32:
 ;CHECK: vqrshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -430,7 +430,7 @@ define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqrshlu64:
+;CHECK-LABEL: vqrshlu64:
 ;CHECK: vqrshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -439,7 +439,7 @@ define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqrshlQs8:
+;CHECK-LABEL: vqrshlQs8:
 ;CHECK: vqrshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -448,7 +448,7 @@ define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqrshlQs16:
+;CHECK-LABEL: vqrshlQs16:
 ;CHECK: vqrshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -457,7 +457,7 @@ define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqrshlQs32:
+;CHECK-LABEL: vqrshlQs32:
 ;CHECK: vqrshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -466,7 +466,7 @@ define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqrshlQs64:
+;CHECK-LABEL: vqrshlQs64:
 ;CHECK: vqrshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -475,7 +475,7 @@ define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqrshlQu8:
+;CHECK-LABEL: vqrshlQu8:
 ;CHECK: vqrshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -484,7 +484,7 @@ define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqrshlQu16:
+;CHECK-LABEL: vqrshlQu16:
 ;CHECK: vqrshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -493,7 +493,7 @@ define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqrshlQu32:
+;CHECK-LABEL: vqrshlQu32:
 ;CHECK: vqrshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -502,7 +502,7 @@ define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqrshlQu64:
+;CHECK-LABEL: vqrshlQu64:
 ;CHECK: vqrshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll
index 5da79432bb42..4abae700f877 100644
--- a/test/CodeGen/ARM/vqshrn.ll
+++ b/test/CodeGen/ARM/vqshrn.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
-;CHECK: vqshrns8:
+;CHECK-LABEL: vqshrns8:
 ;CHECK: vqshrn.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -9,7 +9,7 @@ define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
-;CHECK: vqshrns16:
+;CHECK-LABEL: vqshrns16:
 ;CHECK: vqshrn.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -17,7 +17,7 @@ define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
-;CHECK: vqshrns32:
+;CHECK-LABEL: vqshrns32:
 ;CHECK: vqshrn.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
@@ -25,7 +25,7 @@ define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
-;CHECK: vqshrnu8:
+;CHECK-LABEL: vqshrnu8:
 ;CHECK: vqshrn.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -33,7 +33,7 @@ define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
-;CHECK: vqshrnu16:
+;CHECK-LABEL: vqshrnu16:
 ;CHECK: vqshrn.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -41,7 +41,7 @@ define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
-;CHECK: vqshrnu32:
+;CHECK-LABEL: vqshrnu32:
 ;CHECK: vqshrn.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
@@ -49,7 +49,7 @@ define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
-;CHECK: vqshruns8:
+;CHECK-LABEL: vqshruns8:
 ;CHECK: vqshrun.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -57,7 +57,7 @@ define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
-;CHECK: vqshruns16:
+;CHECK-LABEL: vqshruns16:
 ;CHECK: vqshrun.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -65,7 +65,7 @@ define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
-;CHECK: vqshruns32:
+;CHECK-LABEL: vqshruns32:
 ;CHECK: vqshrun.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
@@ -85,7 +85,7 @@ declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind
 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
-;CHECK: vqrshrns8:
+;CHECK-LABEL: vqrshrns8:
 ;CHECK: vqrshrn.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -93,7 +93,7 @@ define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
-;CHECK: vqrshrns16:
+;CHECK-LABEL: vqrshrns16:
 ;CHECK: vqrshrn.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -101,7 +101,7 @@ define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
-;CHECK: vqrshrns32:
+;CHECK-LABEL: vqrshrns32:
 ;CHECK: vqrshrn.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
@@ -109,7 +109,7 @@ define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
-;CHECK: vqrshrnu8:
+;CHECK-LABEL: vqrshrnu8:
 ;CHECK: vqrshrn.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -117,7 +117,7 @@ define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
-;CHECK: vqrshrnu16:
+;CHECK-LABEL: vqrshrnu16:
 ;CHECK: vqrshrn.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -125,7 +125,7 @@ define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
-;CHECK: vqrshrnu32:
+;CHECK-LABEL: vqrshrnu32:
 ;CHECK: vqrshrn.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
@@ -133,7 +133,7 @@ define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
-;CHECK: vqrshruns8:
+;CHECK-LABEL: vqrshruns8:
 ;CHECK: vqrshrun.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -141,7 +141,7 @@ define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
-;CHECK: vqrshruns16:
+;CHECK-LABEL: vqrshruns16:
 ;CHECK: vqrshrun.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -149,7 +149,7 @@ define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
-;CHECK: vqrshruns32:
+;CHECK-LABEL: vqrshruns32:
 ;CHECK: vqrshrun.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll
index 4231fca37e37..90bc3492fc53 100644
--- a/test/CodeGen/ARM/vqsub.ll
+++ b/test/CodeGen/ARM/vqsub.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqsubs8:
+;CHECK-LABEL: vqsubs8:
 ;CHECK: vqsub.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqsubs16:
+;CHECK-LABEL: vqsubs16:
 ;CHECK: vqsub.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqsubs32:
+;CHECK-LABEL: vqsubs32:
 ;CHECK: vqsub.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqsubs64:
+;CHECK-LABEL: vqsubs64:
 ;CHECK: vqsub.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqsubu8:
+;CHECK-LABEL: vqsubu8:
 ;CHECK: vqsub.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -46,7 +46,7 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqsubu16:
+;CHECK-LABEL: vqsubu16:
 ;CHECK: vqsub.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -55,7 +55,7 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqsubu32:
+;CHECK-LABEL: vqsubu32:
 ;CHECK: vqsub.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -64,7 +64,7 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqsubu64:
+;CHECK-LABEL: vqsubu64:
 ;CHECK: vqsub.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -73,7 +73,7 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqsubQs8:
+;CHECK-LABEL: vqsubQs8:
 ;CHECK: vqsub.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -82,7 +82,7 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqsubQs16:
+;CHECK-LABEL: vqsubQs16:
 ;CHECK: vqsub.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -91,7 +91,7 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqsubQs32:
+;CHECK-LABEL: vqsubQs32:
 ;CHECK: vqsub.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -100,7 +100,7 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqsubQs64:
+;CHECK-LABEL: vqsubQs64:
 ;CHECK: vqsub.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -109,7 +109,7 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqsubQu8:
+;CHECK-LABEL: vqsubQu8:
 ;CHECK: vqsub.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -118,7 +118,7 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqsubQu16:
+;CHECK-LABEL: vqsubQu16:
 ;CHECK: vqsub.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -127,7 +127,7 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqsubQu32:
+;CHECK-LABEL: vqsubQu32:
 ;CHECK: vqsub.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -136,7 +136,7 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqsubQu64:
+;CHECK-LABEL: vqsubQu64:
 ;CHECK: vqsub.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll
index 99989e9d6144..c0deca995764 100644
--- a/test/CodeGen/ARM/vrec.ll
+++ b/test/CodeGen/ARM/vrec.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
-;CHECK: vrecpei32:
+;CHECK-LABEL: vrecpei32:
 ;CHECK: vrecpe.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
@@ -9,7 +9,7 @@ define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
 }
 
 define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
-;CHECK: vrecpeQi32:
+;CHECK-LABEL: vrecpeQi32:
 ;CHECK: vrecpe.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
@@ -17,7 +17,7 @@ define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
-;CHECK: vrecpef32:
+;CHECK-LABEL: vrecpef32:
 ;CHECK: vrecpe.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
@@ -25,7 +25,7 @@ define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
 }
 
 define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
-;CHECK: vrecpeQf32:
+;CHECK-LABEL: vrecpeQf32:
 ;CHECK: vrecpe.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
@@ -39,7 +39,7 @@ declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
 
 define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vrecpsf32:
+;CHECK-LABEL: vrecpsf32:
 ;CHECK: vrecps.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -48,7 +48,7 @@ define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vrecpsQf32:
+;CHECK-LABEL: vrecpsQf32:
 ;CHECK: vrecps.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -60,7 +60,7 @@ declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwi
 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
-;CHECK: vrsqrtei32:
+;CHECK-LABEL: vrsqrtei32:
 ;CHECK: vrsqrte.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
@@ -68,7 +68,7 @@ define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
 }
 
 define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
-;CHECK: vrsqrteQi32:
+;CHECK-LABEL: vrsqrteQi32:
 ;CHECK: vrsqrte.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
@@ -76,7 +76,7 @@ define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
-;CHECK: vrsqrtef32:
+;CHECK-LABEL: vrsqrtef32:
 ;CHECK: vrsqrte.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
@@ -84,7 +84,7 @@ define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
 }
 
 define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
-;CHECK: vrsqrteQf32:
+;CHECK-LABEL: vrsqrteQf32:
 ;CHECK: vrsqrte.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
@@ -98,7 +98,7 @@ declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
 
 define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vrsqrtsf32:
+;CHECK-LABEL: vrsqrtsf32:
 ;CHECK: vrsqrts.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -107,7 +107,7 @@ define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vrsqrtsQf32:
+;CHECK-LABEL: vrsqrtsQf32:
 ;CHECK: vrsqrts.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index 122ec0357fbe..b6da694e1805 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
-;CHECK: test_vrev64D8:
+;CHECK-LABEL: test_vrev64D8:
 ;CHECK: vrev64.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -9,7 +9,7 @@ define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
-;CHECK: test_vrev64D16:
+;CHECK-LABEL: test_vrev64D16:
 ;CHECK: vrev64.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -17,7 +17,7 @@ define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
-;CHECK: test_vrev64D32:
+;CHECK-LABEL: test_vrev64D32:
 ;CHECK: vrev64.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
@@ -25,7 +25,7 @@ define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
 }
 
 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
-;CHECK: test_vrev64Df:
+;CHECK-LABEL: test_vrev64Df:
 ;CHECK: vrev64.32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
@@ -33,7 +33,7 @@ define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
 }
 
 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
-;CHECK: test_vrev64Q8:
+;CHECK-LABEL: test_vrev64Q8:
 ;CHECK: vrev64.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
@@ -41,7 +41,7 @@ define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
-;CHECK: test_vrev64Q16:
+;CHECK-LABEL: test_vrev64Q16:
 ;CHECK: vrev64.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
@@ -49,7 +49,7 @@ define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
-;CHECK: test_vrev64Q32:
+;CHECK-LABEL: test_vrev64Q32:
 ;CHECK: vrev64.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
@@ -57,7 +57,7 @@ define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
 }
 
 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
-;CHECK: test_vrev64Qf:
+;CHECK-LABEL: test_vrev64Qf:
 ;CHECK: vrev64.32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
@@ -65,7 +65,7 @@ define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
 }
 
 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
-;CHECK: test_vrev32D8:
+;CHECK-LABEL: test_vrev32D8:
 ;CHECK: vrev32.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
@@ -73,7 +73,7 @@ define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
-;CHECK: test_vrev32D16:
+;CHECK-LABEL: test_vrev32D16:
 ;CHECK: vrev32.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
@@ -81,7 +81,7 @@ define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
 }
 
 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
-;CHECK: test_vrev32Q8:
+;CHECK-LABEL: test_vrev32Q8:
 ;CHECK: vrev32.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -89,7 +89,7 @@ define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
-;CHECK: test_vrev32Q16:
+;CHECK-LABEL: test_vrev32Q16:
 ;CHECK: vrev32.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -97,7 +97,7 @@ define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
 }
 
 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
-;CHECK: test_vrev16D8:
+;CHECK-LABEL: test_vrev16D8:
 ;CHECK: vrev16.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -105,7 +105,7 @@ define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
 }
 
 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
-;CHECK: test_vrev16Q8:
+;CHECK-LABEL: test_vrev16Q8:
 ;CHECK: vrev16.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -115,7 +115,7 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
 ; Undef shuffle indices should not prevent matching to VREV:
 
 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
-;CHECK: test_vrev64D8_undef:
+;CHECK-LABEL: test_vrev64D8_undef:
 ;CHECK: vrev64.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -123,7 +123,7 @@ define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
-;CHECK: test_vrev32Q16_undef:
+;CHECK-LABEL: test_vrev32Q16_undef:
 ;CHECK: vrev32.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
@@ -133,7 +133,7 @@ define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 ; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
 
 define void @test_with_vcombine(<4 x float>* %v) nounwind {
-;CHECK: test_with_vcombine:
+;CHECK-LABEL: test_with_vcombine:
 ;CHECK-NOT: vext
 ;CHECK: vrev64.32
   %tmp1 = load <4 x float>* %v, align 16
@@ -151,7 +151,7 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
 ; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
 ; to <2 x i16> when stored to memory.
 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
-; CHECK: test_vrev64:
+; CHECK-LABEL: test_vrev64:
 ; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll
new file mode 100644
index 000000000000..7e1f7146fd1c
--- /dev/null
+++ b/test/CodeGen/ARM/vsel.ll
@@ -0,0 +1,309 @@
+; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+define void @test_vsel32sgt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sgt
+  %tst1 = icmp sgt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f32 s0, s0, s1
+  ret void
+}
+define void @test_vsel64sgt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sgt
+  %tst1 = icmp sgt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f64 d16, d0, d1
+  ret void
+}
+define void @test_vsel32sge(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sge
+  %tst1 = icmp sge i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselge.f32 s0, s0, s1
+  ret void
+}
+define void @test_vsel64sge(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sge
+  %tst1 = icmp sge i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselge.f64 d16, d0, d1
+  ret void
+}
+define void @test_vsel32eq(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32eq
+  %tst1 = icmp eq i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vseleq.f32 s0, s0, s1
+  ret void
+}
+define void @test_vsel64eq(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64eq
+  %tst1 = icmp eq i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vseleq.f64 d16, d0, d1
+  ret void
+}
+define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32slt
+  %tst1 = icmp slt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f32 s0, s1, s0
+  ret void
+}
+define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64slt
+  %tst1 = icmp slt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselgt.f64 d16, d1, d0
+  ret void
+}
+define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) {
+; CHECK: test_vsel32sle
+  %tst1 = icmp sle i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: cmp r0, r1
+; CHECK: vselge.f32 s0, s1, s0
+  ret void
+}
+define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) {
+; CHECK: test_vsel64sle
+  %tst1 = icmp sle i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: cmp r0, r1
+; CHECK: vselge.f64 d16, d1, d0
+  ret void
+}
+define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ogt
+  %tst1 = fcmp ogt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64ogt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ogt
+  %tst1 = fcmp ogt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32oge(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32oge
+  %tst1 = fcmp oge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64oge(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64oge
+  %tst1 = fcmp oge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32oeq(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32oeq
+  %tst1 = fcmp oeq float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64oeq(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64oeq
+  %tst1 = fcmp oeq float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32ugt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ugt
+  %tst1 = fcmp ugt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ugt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ugt
+  %tst1 = fcmp ugt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32uge(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32uge
+  %tst1 = fcmp uge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64uge(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64uge
+  %tst1 = fcmp uge float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32olt(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32olt
+  %tst1 = fcmp olt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64olt(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64olt
+  %tst1 = fcmp olt float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselgt.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32ult(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ult
+  %tst1 = fcmp ult float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ult(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ult
+  %tst1 = fcmp ult float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselge.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32ole(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ole
+  %tst1 = fcmp ole float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64ole(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ole
+  %tst1 = fcmp ole float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s1, s0
+; CHECK: vselge.f64 d16, d1, d2
+  ret void
+}
+define void @test_vsel32ule(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ule
+  %tst1 = fcmp ule float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ule(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ule
+  %tst1 = fcmp ule float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselgt.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32ord(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32ord
+  %tst1 = fcmp ord float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64ord(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64ord
+  %tst1 = fcmp ord float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32une(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32une
+  %tst1 = fcmp une float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f32 s0, s3, s2
+  ret void
+}
+define void @test_vsel64une(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64une
+  %tst1 = fcmp une float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vseleq.f64 d16, d2, d1
+  ret void
+}
+define void @test_vsel32uno(float %lhs32, float %rhs32, float %a, float %b) {
+; CHECK: test_vsel32uno
+  %tst1 = fcmp uno float %lhs32, %rhs32
+  %val1 = select i1 %tst1, float %a, float %b
+  store float %val1, float* @varfloat
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f32 s0, s2, s3
+  ret void
+}
+define void @test_vsel64uno(float %lhs32, float %rhs32, double %a, double %b) {
+; CHECK: test_vsel64uno
+  %tst1 = fcmp uno float %lhs32, %rhs32
+  %val1 = select i1 %tst1, double %a, double %b
+  store double %val1, double* @vardouble
+; CHECK: vcmpe.f32 s0, s1
+; CHECK: vselvs.f64 d16, d1, d2
+  ret void
+}
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
index 7e79d6c68c2b..9ea56a47bd23 100644
--- a/test/CodeGen/ARM/vselect_imax.ll
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -1,3 +1,4 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 ; Make sure that ARM backend with NEON handles vselect.
 
@@ -14,17 +15,14 @@ define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
 ; lowering we also need to adjust the cost.
 %T0_10 = type <16 x i16>
 %T1_10 = type <16 x i1>
-; CHECK: func_blend10:
+; CHECK-LABEL: func_blend10:
 define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
                            %T1_10* %blend, %T0_10* %storeaddr) {
   %v0 = load %T0_10* %loadaddr
   %v1 = load %T0_10* %loadaddr2
   %c = icmp slt %T0_10 %v0, %v1
-; CHECK: vst1
-; CHECK: vst1
-; CHECK: vst1
-; CHECK: vst1
-; CHECK: vld
+; CHECK: vbsl
+; CHECK: vbsl
 ; COST: func_blend10
 ; COST: cost of 40 {{.*}} select
   %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
@@ -33,16 +31,14 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
 }
 %T0_14 = type <8 x i32>
 %T1_14 = type <8 x i1>
-; CHECK: func_blend14:
+; CHECK-LABEL: func_blend14:
 define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
                            %T1_14* %blend, %T0_14* %storeaddr) {
   %v0 = load %T0_14* %loadaddr
   %v1 = load %T0_14* %loadaddr2
   %c = icmp slt %T0_14 %v0, %v1
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
+; CHECK: vbsl
+; CHECK: vbsl
 ; COST: func_blend14
 ; COST: cost of 41 {{.*}} select
   %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
@@ -51,16 +47,14 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
 }
 %T0_15 = type <16 x i32>
 %T1_15 = type <16 x i1>
-; CHECK: func_blend15:
+; CHECK-LABEL: func_blend15:
 define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
                            %T1_15* %blend, %T0_15* %storeaddr) {
+; CHECK: vbsl
+; CHECK: vbsl
   %v0 = load %T0_15* %loadaddr
   %v1 = load %T0_15* %loadaddr2
   %c = icmp slt %T0_15 %v0, %v1
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
 ; COST: func_blend15
 ; COST: cost of 82 {{.*}} select
   %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
@@ -69,16 +63,14 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
 }
 %T0_18 = type <4 x i64>
 %T1_18 = type <4 x i1>
-; CHECK: func_blend18:
+; CHECK-LABEL: func_blend18:
 define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
                            %T1_18* %blend, %T0_18* %storeaddr) {
+; CHECK: vbsl
+; CHECK: vbsl
   %v0 = load %T0_18* %loadaddr
   %v1 = load %T0_18* %loadaddr2
   %c = icmp slt %T0_18 %v0, %v1
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
-; CHECK: strh
 ; COST: func_blend18
 ; COST: cost of 19 {{.*}} select
   %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
@@ -87,16 +79,16 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
 }
 %T0_19 = type <8 x i64>
 %T1_19 = type <8 x i1>
-; CHECK: func_blend19:
+; CHECK-LABEL: func_blend19:
 define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
                            %T1_19* %blend, %T0_19* %storeaddr) {
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
   %v0 = load %T0_19* %loadaddr
   %v1 = load %T0_19* %loadaddr2
   %c = icmp slt %T0_19 %v0, %v1
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
 ; COST: func_blend19
 ; COST: cost of 50 {{.*}} select
   %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
@@ -105,16 +97,20 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
 }
 %T0_20 = type <16 x i64>
 %T1_20 = type <16 x i1>
-; CHECK: func_blend20:
+; CHECK-LABEL: func_blend20:
 define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
                            %T1_20* %blend, %T0_20* %storeaddr) {
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
+; CHECK: vbsl
   %v0 = load %T0_20* %loadaddr
   %v1 = load %T0_20* %loadaddr2
   %c = icmp slt %T0_20 %v0, %v1
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
 ; COST: func_blend20
 ; COST: cost of 100 {{.*}} select
   %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll
index f3cbec7457d0..de380d3d12b3 100644
--- a/test/CodeGen/ARM/vshift.ll
+++ b/test/CodeGen/ARM/vshift.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vshls8:
+;CHECK-LABEL: vshls8:
 ;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vshls16:
+;CHECK-LABEL: vshls16:
 ;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vshls32:
+;CHECK-LABEL: vshls32:
 ;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vshls64:
+;CHECK-LABEL: vshls64:
 ;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
-;CHECK: vshli8:
+;CHECK-LABEL: vshli8:
 ;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
@@ -45,7 +45,7 @@ define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
-;CHECK: vshli16:
+;CHECK-LABEL: vshli16:
 ;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
@@ -53,7 +53,7 @@ define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
-;CHECK: vshli32:
+;CHECK-LABEL: vshli32:
 ;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
@@ -61,7 +61,7 @@ define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
-;CHECK: vshli64:
+;CHECK-LABEL: vshli64:
 ;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
@@ -69,7 +69,7 @@ define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vshlQs8:
+;CHECK-LABEL: vshlQs8:
 ;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -78,7 +78,7 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vshlQs16:
+;CHECK-LABEL: vshlQs16:
 ;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -87,7 +87,7 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vshlQs32:
+;CHECK-LABEL: vshlQs32:
 ;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -96,7 +96,7 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vshlQs64:
+;CHECK-LABEL: vshlQs64:
 ;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -105,7 +105,7 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
-;CHECK: vshlQi8:
+;CHECK-LABEL: vshlQi8:
 ;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
@@ -113,7 +113,7 @@ define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
-;CHECK: vshlQi16:
+;CHECK-LABEL: vshlQi16:
 ;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
@@ -121,7 +121,7 @@ define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
-;CHECK: vshlQi32:
+;CHECK-LABEL: vshlQi32:
 ;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
@@ -129,7 +129,7 @@ define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
-;CHECK: vshlQi64:
+;CHECK-LABEL: vshlQi64:
 ;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
@@ -137,7 +137,7 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vlshru8:
+;CHECK-LABEL: vlshru8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
@@ -147,7 +147,7 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vlshru16:
+;CHECK-LABEL: vlshru16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
@@ -157,7 +157,7 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vlshru32:
+;CHECK-LABEL: vlshru32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
@@ -167,7 +167,7 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vlshru64:
+;CHECK-LABEL: vlshru64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
@@ -177,7 +177,7 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
-;CHECK: vlshri8:
+;CHECK-LABEL: vlshri8:
 ;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -185,7 +185,7 @@ define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
-;CHECK: vlshri16:
+;CHECK-LABEL: vlshri16:
 ;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
@@ -193,7 +193,7 @@ define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
-;CHECK: vlshri32:
+;CHECK-LABEL: vlshri32:
 ;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
@@ -201,7 +201,7 @@ define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
-;CHECK: vlshri64:
+;CHECK-LABEL: vlshri64:
 ;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
@@ -209,7 +209,7 @@ define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vlshrQu8:
+;CHECK-LABEL: vlshrQu8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
@@ -219,7 +219,7 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vlshrQu16:
+;CHECK-LABEL: vlshrQu16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
@@ -229,7 +229,7 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vlshrQu32:
+;CHECK-LABEL: vlshrQu32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
@@ -239,7 +239,7 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vlshrQu64:
+;CHECK-LABEL: vlshrQu64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
@@ -249,7 +249,7 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
-;CHECK: vlshrQi8:
+;CHECK-LABEL: vlshrQi8:
 ;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -257,7 +257,7 @@ define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
-;CHECK: vlshrQi16:
+;CHECK-LABEL: vlshrQi16:
 ;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -265,7 +265,7 @@ define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
-;CHECK: vlshrQi32:
+;CHECK-LABEL: vlshrQi32:
 ;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
@@ -273,7 +273,7 @@ define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
-;CHECK: vlshrQi64:
+;CHECK-LABEL: vlshrQi64:
 ;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
@@ -288,7 +288,7 @@ entry:
 }
 
 define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vashrs8:
+;CHECK-LABEL: vashrs8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
@@ -298,7 +298,7 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vashrs16:
+;CHECK-LABEL: vashrs16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
@@ -308,7 +308,7 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vashrs32:
+;CHECK-LABEL: vashrs32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
@@ -318,7 +318,7 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vashrs64:
+;CHECK-LABEL: vashrs64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
@@ -328,7 +328,7 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
-;CHECK: vashri8:
+;CHECK-LABEL: vashri8:
 ;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -336,7 +336,7 @@ define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
-;CHECK: vashri16:
+;CHECK-LABEL: vashri16:
 ;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
@@ -344,7 +344,7 @@ define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
-;CHECK: vashri32:
+;CHECK-LABEL: vashri32:
 ;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
@@ -352,7 +352,7 @@ define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
-;CHECK: vashri64:
+;CHECK-LABEL: vashri64:
 ;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
@@ -360,7 +360,7 @@ define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vashrQs8:
+;CHECK-LABEL: vashrQs8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
@@ -370,7 +370,7 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vashrQs16:
+;CHECK-LABEL: vashrQs16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
@@ -380,7 +380,7 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vashrQs32:
+;CHECK-LABEL: vashrQs32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
@@ -390,7 +390,7 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vashrQs64:
+;CHECK-LABEL: vashrQs64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
@@ -400,7 +400,7 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
-;CHECK: vashrQi8:
+;CHECK-LABEL: vashrQi8:
 ;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
@@ -408,7 +408,7 @@ define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
-;CHECK: vashrQi16:
+;CHECK-LABEL: vashrQi16:
 ;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
@@ -416,7 +416,7 @@ define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
-;CHECK: vashrQi32:
+;CHECK-LABEL: vashrQi32:
 ;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
@@ -424,7 +424,7 @@ define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
-;CHECK: vashrQi64:
+;CHECK-LABEL: vashrQi64:
 ;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll
index 3a4f8574e397..27610bfa677d 100644
--- a/test/CodeGen/ARM/vshiftins.ll
+++ b/test/CodeGen/ARM/vshiftins.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsli8:
+;CHECK-LABEL: vsli8:
 ;CHECK: vsli.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsli16:
+;CHECK-LABEL: vsli16:
 ;CHECK: vsli.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsli32:
+;CHECK-LABEL: vsli32:
 ;CHECK: vsli.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vsli64:
+;CHECK-LABEL: vsli64:
 ;CHECK: vsli.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vsliQ8:
+;CHECK-LABEL: vsliQ8:
 ;CHECK: vsli.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -46,7 +46,7 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vsliQ16:
+;CHECK-LABEL: vsliQ16:
 ;CHECK: vsli.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -55,7 +55,7 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vsliQ32:
+;CHECK-LABEL: vsliQ32:
 ;CHECK: vsli.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -64,7 +64,7 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vsliQ64:
+;CHECK-LABEL: vsliQ64:
 ;CHECK: vsli.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -73,7 +73,7 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsri8:
+;CHECK-LABEL: vsri8:
 ;CHECK: vsri.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -82,7 +82,7 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsri16:
+;CHECK-LABEL: vsri16:
 ;CHECK: vsri.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -91,7 +91,7 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsri32:
+;CHECK-LABEL: vsri32:
 ;CHECK: vsri.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -100,7 +100,7 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vsri64:
+;CHECK-LABEL: vsri64:
 ;CHECK: vsri.64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -109,7 +109,7 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vsriQ8:
+;CHECK-LABEL: vsriQ8:
 ;CHECK: vsri.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -118,7 +118,7 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vsriQ16:
+;CHECK-LABEL: vsriQ16:
 ;CHECK: vsri.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -127,7 +127,7 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vsriQ32:
+;CHECK-LABEL: vsriQ32:
 ;CHECK: vsri.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -136,7 +136,7 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vsriQ64:
+;CHECK-LABEL: vsriQ64:
 ;CHECK: vsri.64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll
index 818e71b8ff89..462f7fe7fb05 100644
--- a/test/CodeGen/ARM/vshl.ll
+++ b/test/CodeGen/ARM/vshl.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vshls8:
+;CHECK-LABEL: vshls8:
 ;CHECK: vshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vshls16:
+;CHECK-LABEL: vshls16:
 ;CHECK: vshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vshls32:
+;CHECK-LABEL: vshls32:
 ;CHECK: vshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vshls64:
+;CHECK-LABEL: vshls64:
 ;CHECK: vshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vshlu8:
+;CHECK-LABEL: vshlu8:
 ;CHECK: vshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -46,7 +46,7 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vshlu16:
+;CHECK-LABEL: vshlu16:
 ;CHECK: vshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -55,7 +55,7 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vshlu32:
+;CHECK-LABEL: vshlu32:
 ;CHECK: vshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -64,7 +64,7 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vshlu64:
+;CHECK-LABEL: vshlu64:
 ;CHECK: vshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -73,7 +73,7 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vshlQs8:
+;CHECK-LABEL: vshlQs8:
 ;CHECK: vshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -82,7 +82,7 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vshlQs16:
+;CHECK-LABEL: vshlQs16:
 ;CHECK: vshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -91,7 +91,7 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vshlQs32:
+;CHECK-LABEL: vshlQs32:
 ;CHECK: vshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -100,7 +100,7 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vshlQs64:
+;CHECK-LABEL: vshlQs64:
 ;CHECK: vshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -109,7 +109,7 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vshlQu8:
+;CHECK-LABEL: vshlQu8:
 ;CHECK: vshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -118,7 +118,7 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vshlQu16:
+;CHECK-LABEL: vshlQu16:
 ;CHECK: vshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -127,7 +127,7 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vshlQu32:
+;CHECK-LABEL: vshlQu32:
 ;CHECK: vshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -136,7 +136,7 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vshlQu64:
+;CHECK-LABEL: vshlQu64:
 ;CHECK: vshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -148,7 +148,7 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ; Test a mix of both signed and unsigned intrinsics.
 
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
-;CHECK: vshli8:
+;CHECK-LABEL: vshli8:
 ;CHECK: vshl.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -156,7 +156,7 @@ define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
-;CHECK: vshli16:
+;CHECK-LABEL: vshli16:
 ;CHECK: vshl.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -164,7 +164,7 @@ define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
-;CHECK: vshli32:
+;CHECK-LABEL: vshli32:
 ;CHECK: vshl.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
@@ -172,7 +172,7 @@ define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
-;CHECK: vshli64:
+;CHECK-LABEL: vshli64:
 ;CHECK: vshl.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
@@ -180,7 +180,7 @@ define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
-;CHECK: vshlQi8:
+;CHECK-LABEL: vshlQi8:
 ;CHECK: vshl.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -188,7 +188,7 @@ define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
-;CHECK: vshlQi16:
+;CHECK-LABEL: vshlQi16:
 ;CHECK: vshl.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
@@ -196,7 +196,7 @@ define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
-;CHECK: vshlQi32:
+;CHECK-LABEL: vshlQi32:
 ;CHECK: vshl.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
@@ -204,7 +204,7 @@ define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
-;CHECK: vshlQi64:
+;CHECK-LABEL: vshlQi64:
 ;CHECK: vshl.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
@@ -214,7 +214,7 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ; Right shift by immediate:
 
 define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
-;CHECK: vshrs8:
+;CHECK-LABEL: vshrs8:
 ;CHECK: vshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -222,7 +222,7 @@ define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
-;CHECK: vshrs16:
+;CHECK-LABEL: vshrs16:
 ;CHECK: vshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -230,7 +230,7 @@ define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
-;CHECK: vshrs32:
+;CHECK-LABEL: vshrs32:
 ;CHECK: vshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
@@ -238,7 +238,7 @@ define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
-;CHECK: vshrs64:
+;CHECK-LABEL: vshrs64:
 ;CHECK: vshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
@@ -246,7 +246,7 @@ define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
-;CHECK: vshru8:
+;CHECK-LABEL: vshru8:
 ;CHECK: vshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -254,7 +254,7 @@ define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
-;CHECK: vshru16:
+;CHECK-LABEL: vshru16:
 ;CHECK: vshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -262,7 +262,7 @@ define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
-;CHECK: vshru32:
+;CHECK-LABEL: vshru32:
 ;CHECK: vshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
@@ -270,7 +270,7 @@ define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
-;CHECK: vshru64:
+;CHECK-LABEL: vshru64:
 ;CHECK: vshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
@@ -278,7 +278,7 @@ define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
-;CHECK: vshrQs8:
+;CHECK-LABEL: vshrQs8:
 ;CHECK: vshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -286,7 +286,7 @@ define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
-;CHECK: vshrQs16:
+;CHECK-LABEL: vshrQs16:
 ;CHECK: vshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -294,7 +294,7 @@ define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
-;CHECK: vshrQs32:
+;CHECK-LABEL: vshrQs32:
 ;CHECK: vshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -302,7 +302,7 @@ define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
-;CHECK: vshrQs64:
+;CHECK-LABEL: vshrQs64:
 ;CHECK: vshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
@@ -310,7 +310,7 @@ define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
-;CHECK: vshrQu8:
+;CHECK-LABEL: vshrQu8:
 ;CHECK: vshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -318,7 +318,7 @@ define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
-;CHECK: vshrQu16:
+;CHECK-LABEL: vshrQu16:
 ;CHECK: vshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -326,7 +326,7 @@ define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
-;CHECK: vshrQu32:
+;CHECK-LABEL: vshrQu32:
 ;CHECK: vshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -334,7 +334,7 @@ define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
-;CHECK: vshrQu64:
+;CHECK-LABEL: vshrQu64:
 ;CHECK: vshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
@@ -362,7 +362,7 @@ declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind re
 declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrshls8:
+;CHECK-LABEL: vrshls8:
 ;CHECK: vrshl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -371,7 +371,7 @@ define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrshls16:
+;CHECK-LABEL: vrshls16:
 ;CHECK: vrshl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -380,7 +380,7 @@ define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrshls32:
+;CHECK-LABEL: vrshls32:
 ;CHECK: vrshl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -389,7 +389,7 @@ define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vrshls64:
+;CHECK-LABEL: vrshls64:
 ;CHECK: vrshl.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -398,7 +398,7 @@ define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrshlu8:
+;CHECK-LABEL: vrshlu8:
 ;CHECK: vrshl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -407,7 +407,7 @@ define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrshlu16:
+;CHECK-LABEL: vrshlu16:
 ;CHECK: vrshl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -416,7 +416,7 @@ define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrshlu32:
+;CHECK-LABEL: vrshlu32:
 ;CHECK: vrshl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -425,7 +425,7 @@ define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vrshlu64:
+;CHECK-LABEL: vrshlu64:
 ;CHECK: vrshl.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -434,7 +434,7 @@ define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrshlQs8:
+;CHECK-LABEL: vrshlQs8:
 ;CHECK: vrshl.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -443,7 +443,7 @@ define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrshlQs16:
+;CHECK-LABEL: vrshlQs16:
 ;CHECK: vrshl.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -452,7 +452,7 @@ define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrshlQs32:
+;CHECK-LABEL: vrshlQs32:
 ;CHECK: vrshl.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -461,7 +461,7 @@ define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrshlQs64:
+;CHECK-LABEL: vrshlQs64:
 ;CHECK: vrshl.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -470,7 +470,7 @@ define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrshlQu8:
+;CHECK-LABEL: vrshlQu8:
 ;CHECK: vrshl.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -479,7 +479,7 @@ define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrshlQu16:
+;CHECK-LABEL: vrshlQu16:
 ;CHECK: vrshl.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -488,7 +488,7 @@ define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrshlQu32:
+;CHECK-LABEL: vrshlQu32:
 ;CHECK: vrshl.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -497,7 +497,7 @@ define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrshlQu64:
+;CHECK-LABEL: vrshlQu64:
 ;CHECK: vrshl.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -506,7 +506,7 @@ define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
-;CHECK: vrshrs8:
+;CHECK-LABEL: vrshrs8:
 ;CHECK: vrshr.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -514,7 +514,7 @@ define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
-;CHECK: vrshrs16:
+;CHECK-LABEL: vrshrs16:
 ;CHECK: vrshr.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -522,7 +522,7 @@ define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
-;CHECK: vrshrs32:
+;CHECK-LABEL: vrshrs32:
 ;CHECK: vrshr.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
@@ -530,7 +530,7 @@ define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
-;CHECK: vrshrs64:
+;CHECK-LABEL: vrshrs64:
 ;CHECK: vrshr.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
@@ -538,7 +538,7 @@ define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
 }
 
 define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
-;CHECK: vrshru8:
+;CHECK-LABEL: vrshru8:
 ;CHECK: vrshr.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -546,7 +546,7 @@ define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
-;CHECK: vrshru16:
+;CHECK-LABEL: vrshru16:
 ;CHECK: vrshr.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -554,7 +554,7 @@ define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
-;CHECK: vrshru32:
+;CHECK-LABEL: vrshru32:
 ;CHECK: vrshr.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
@@ -562,7 +562,7 @@ define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
 }
 
 define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
-;CHECK: vrshru64:
+;CHECK-LABEL: vrshru64:
 ;CHECK: vrshr.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
@@ -570,7 +570,7 @@ define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
-;CHECK: vrshrQs8:
+;CHECK-LABEL: vrshrQs8:
 ;CHECK: vrshr.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -578,7 +578,7 @@ define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
-;CHECK: vrshrQs16:
+;CHECK-LABEL: vrshrQs16:
 ;CHECK: vrshr.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -586,7 +586,7 @@ define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
-;CHECK: vrshrQs32:
+;CHECK-LABEL: vrshrQs32:
 ;CHECK: vrshr.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -594,7 +594,7 @@ define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
-;CHECK: vrshrQs64:
+;CHECK-LABEL: vrshrQs64:
 ;CHECK: vrshr.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
@@ -602,7 +602,7 @@ define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
 }
 
 define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
-;CHECK: vrshrQu8:
+;CHECK-LABEL: vrshrQu8:
 ;CHECK: vrshr.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
@@ -610,7 +610,7 @@ define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
 }
 
 define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
-;CHECK: vrshrQu16:
+;CHECK-LABEL: vrshrQu16:
 ;CHECK: vrshr.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
@@ -618,7 +618,7 @@ define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
-;CHECK: vrshrQu32:
+;CHECK-LABEL: vrshrQu32:
 ;CHECK: vrshr.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
@@ -626,7 +626,7 @@ define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
-;CHECK: vrshrQu64:
+;CHECK-LABEL: vrshrQu64:
 ;CHECK: vrshr.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll
index 8e85b98f49b1..ae806641480b 100644
--- a/test/CodeGen/ARM/vshll.ll
+++ b/test/CodeGen/ARM/vshll.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
-;CHECK: vshlls8:
+;CHECK-LABEL: vshlls8:
 ;CHECK: vshll.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -9,7 +9,7 @@ define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
-;CHECK: vshlls16:
+;CHECK-LABEL: vshlls16:
 ;CHECK: vshll.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -17,7 +17,7 @@ define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
-;CHECK: vshlls32:
+;CHECK-LABEL: vshlls32:
 ;CHECK: vshll.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
@@ -25,7 +25,7 @@ define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
 }
 
 define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
-;CHECK: vshllu8:
+;CHECK-LABEL: vshllu8:
 ;CHECK: vshll.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
@@ -33,7 +33,7 @@ define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
-;CHECK: vshllu16:
+;CHECK-LABEL: vshllu16:
 ;CHECK: vshll.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
@@ -41,7 +41,7 @@ define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
-;CHECK: vshllu32:
+;CHECK-LABEL: vshllu32:
 ;CHECK: vshll.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
@@ -51,7 +51,7 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 ; The following tests use the maximum shift count, so the signedness is
 ; irrelevant.  Test both signed and unsigned versions.
 define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
-;CHECK: vshlli8:
+;CHECK-LABEL: vshlli8:
 ;CHECK: vshll.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
@@ -59,7 +59,7 @@ define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
 }
 
 define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
-;CHECK: vshlli16:
+;CHECK-LABEL: vshlli16:
 ;CHECK: vshll.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
@@ -67,7 +67,7 @@ define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
 }
 
 define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
-;CHECK: vshlli32:
+;CHECK-LABEL: vshlli32:
 ;CHECK: vshll.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll
index e2544f424a2c..40a94fee0d78 100644
--- a/test/CodeGen/ARM/vshrn.ll
+++ b/test/CodeGen/ARM/vshrn.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
-;CHECK: vshrns8:
+;CHECK-LABEL: vshrns8:
 ;CHECK: vshrn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -9,7 +9,7 @@ define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
-;CHECK: vshrns16:
+;CHECK-LABEL: vshrns16:
 ;CHECK: vshrn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -17,7 +17,7 @@ define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
-;CHECK: vshrns32:
+;CHECK-LABEL: vshrns32:
 ;CHECK: vshrn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
@@ -29,7 +29,7 @@ declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind re
 declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
-;CHECK: vrshrns8:
+;CHECK-LABEL: vrshrns8:
 ;CHECK: vrshrn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
@@ -37,7 +37,7 @@ define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
 }
 
 define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
-;CHECK: vrshrns16:
+;CHECK-LABEL: vrshrns16:
 ;CHECK: vrshrn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
@@ -45,7 +45,7 @@ define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
 }
 
 define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
-;CHECK: vrshrns32:
+;CHECK-LABEL: vrshrns32:
 ;CHECK: vrshrn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll
index acb672d00fa2..7a211c31ac0c 100644
--- a/test/CodeGen/ARM/vsra.ll
+++ b/test/CodeGen/ARM/vsra.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsras8:
+;CHECK-LABEL: vsras8:
 ;CHECK: vsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -11,7 +11,7 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsras16:
+;CHECK-LABEL: vsras16:
 ;CHECK: vsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -21,7 +21,7 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsras32:
+;CHECK-LABEL: vsras32:
 ;CHECK: vsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -31,7 +31,7 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vsras64:
+;CHECK-LABEL: vsras64:
 ;CHECK: vsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -41,7 +41,7 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vsraQs8:
+;CHECK-LABEL: vsraQs8:
 ;CHECK: vsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -51,7 +51,7 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vsraQs16:
+;CHECK-LABEL: vsraQs16:
 ;CHECK: vsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -61,7 +61,7 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vsraQs32:
+;CHECK-LABEL: vsraQs32:
 ;CHECK: vsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -71,7 +71,7 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vsraQs64:
+;CHECK-LABEL: vsraQs64:
 ;CHECK: vsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -81,7 +81,7 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsrau8:
+;CHECK-LABEL: vsrau8:
 ;CHECK: vsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -91,7 +91,7 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsrau16:
+;CHECK-LABEL: vsrau16:
 ;CHECK: vsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -101,7 +101,7 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsrau32:
+;CHECK-LABEL: vsrau32:
 ;CHECK: vsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -111,7 +111,7 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vsrau64:
+;CHECK-LABEL: vsrau64:
 ;CHECK: vsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -121,7 +121,7 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vsraQu8:
+;CHECK-LABEL: vsraQu8:
 ;CHECK: vsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -131,7 +131,7 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vsraQu16:
+;CHECK-LABEL: vsraQu16:
 ;CHECK: vsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -141,7 +141,7 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vsraQu32:
+;CHECK-LABEL: vsraQu32:
 ;CHECK: vsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -151,7 +151,7 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vsraQu64:
+;CHECK-LABEL: vsraQu64:
 ;CHECK: vsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -161,7 +161,7 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrsras8:
+;CHECK-LABEL: vrsras8:
 ;CHECK: vrsra.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -171,7 +171,7 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrsras16:
+;CHECK-LABEL: vrsras16:
 ;CHECK: vrsra.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -181,7 +181,7 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrsras32:
+;CHECK-LABEL: vrsras32:
 ;CHECK: vrsra.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -191,7 +191,7 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vrsras64:
+;CHECK-LABEL: vrsras64:
 ;CHECK: vrsra.s64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -201,7 +201,7 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrsrau8:
+;CHECK-LABEL: vrsrau8:
 ;CHECK: vrsra.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -211,7 +211,7 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrsrau16:
+;CHECK-LABEL: vrsrau16:
 ;CHECK: vrsra.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -221,7 +221,7 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrsrau32:
+;CHECK-LABEL: vrsrau32:
 ;CHECK: vrsra.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -231,7 +231,7 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vrsrau64:
+;CHECK-LABEL: vrsrau64:
 ;CHECK: vrsra.u64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -241,7 +241,7 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrsraQs8:
+;CHECK-LABEL: vrsraQs8:
 ;CHECK: vrsra.s8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -251,7 +251,7 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrsraQs16:
+;CHECK-LABEL: vrsraQs16:
 ;CHECK: vrsra.s16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -261,7 +261,7 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrsraQs32:
+;CHECK-LABEL: vrsraQs32:
 ;CHECK: vrsra.s32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -271,7 +271,7 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrsraQs64:
+;CHECK-LABEL: vrsraQs64:
 ;CHECK: vrsra.s64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -281,7 +281,7 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrsraQu8:
+;CHECK-LABEL: vrsraQu8:
 ;CHECK: vrsra.u8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -291,7 +291,7 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrsraQu16:
+;CHECK-LABEL: vrsraQu16:
 ;CHECK: vrsra.u16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -301,7 +301,7 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrsraQu32:
+;CHECK-LABEL: vrsraQu32:
 ;CHECK: vrsra.u32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -311,7 +311,7 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrsraQu64:
+;CHECK-LABEL: vrsraQu64:
 ;CHECK: vrsra.u64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index e1f3e8890724..36439fd7adf2 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst1i8:
+;CHECK-LABEL: vst1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst1.8 {d16}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 }
 
 define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst1i16:
+;CHECK-LABEL: vst1i16:
 ;CHECK: vst1.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst1i32:
+;CHECK-LABEL: vst1i32:
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define void @vst1f(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst1f:
+;CHECK-LABEL: vst1f:
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -38,7 +38,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
-;CHECK: vst1f_update:
+;CHECK-LABEL: vst1f_update:
 ;CHECK: vst1.32 {d16}, [r1]!
 	%A = load float** %ptr
 	%tmp0 = bitcast float* %A to i8*
@@ -50,7 +50,7 @@ define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
 }
 
 define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
-;CHECK: vst1i64:
+;CHECK-LABEL: vst1i64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
@@ -59,7 +59,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 }
 
 define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
-;CHECK: vst1Qi8:
+;CHECK-LABEL: vst1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.8 {d16, d17}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
@@ -68,7 +68,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 }
 
 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst1Qi16:
+;CHECK-LABEL: vst1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -79,7 +79,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 
 ;Check for a post-increment updating store with register increment.
 define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
-;CHECK: vst1Qi16_update:
+;CHECK-LABEL: vst1Qi16_update:
 ;CHECK: vst1.16 {d16, d17}, [r1:64], r2
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
@@ -91,7 +91,7 @@ define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 }
 
 define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst1Qi32:
+;CHECK-LABEL: vst1Qi32:
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
@@ -100,7 +100,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst1Qf:
+;CHECK-LABEL: vst1Qf:
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -109,7 +109,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
 }
 
 define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
-;CHECK: vst1Qi64:
+;CHECK-LABEL: vst1Qi64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <2 x i64>* %B
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index a31f8635fe3b..7551a562cf0e 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst2i8:
+;CHECK-LABEL: vst2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst2.8 {d16, d17}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
@@ -11,7 +11,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 
 ;Check for a post-increment updating store with register increment.
 define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
-;CHECK: vst2i8_update:
+;CHECK-LABEL: vst2i8_update:
 ;CHECK: vst2.8 {d16, d17}, [r1], r2
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
@@ -22,7 +22,7 @@ define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 }
 
 define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst2i16:
+;CHECK-LABEL: vst2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -32,7 +32,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst2i32:
+;CHECK-LABEL: vst2i32:
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -41,7 +41,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define void @vst2f(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst2f:
+;CHECK-LABEL: vst2f:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -50,7 +50,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 }
 
 define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
-;CHECK: vst2i64:
+;CHECK-LABEL: vst2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
@@ -61,7 +61,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
-;CHECK: vst2i64_update:
+;CHECK-LABEL: vst2i64_update:
 ;CHECK: vst1.64 {d16, d17}, [r1:64]!
 	%A = load i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
@@ -73,7 +73,7 @@ define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 }
 
 define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
-;CHECK: vst2Qi8:
+;CHECK-LABEL: vst2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
@@ -82,7 +82,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 }
 
 define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst2Qi16:
+;CHECK-LABEL: vst2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -92,7 +92,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst2Qi32:
+;CHECK-LABEL: vst2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
@@ -102,7 +102,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst2Qf:
+;CHECK-LABEL: vst2Qf:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -111,7 +111,7 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 }
 
 define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
-;CHECK: vst2update
+;CHECK-LABEL: vst2update:
 ;CHECK: vst2.16 {d16, d17}, [r0]!
 	%tmp1 = load <4 x i16>* %B
 	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
@@ -120,7 +120,7 @@ define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
 }
 
 define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
-;CHECK: vst2update2
+;CHECK-LABEL: vst2update2:
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
   %tmp1 = load <4 x float>* %this
   call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 281bb730feb7..91eb7fce2b74 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon -fast-isel=0 -O0 | FileCheck %s
 
 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst3i8:
+;CHECK-LABEL: vst3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
@@ -11,7 +11,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 }
 
 define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst3i16:
+;CHECK-LABEL: vst3i16:
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
@@ -20,7 +20,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst3i32:
+;CHECK-LABEL: vst3i32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -30,7 +30,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
-;CHECK: vst3i32_update:
+;CHECK-LABEL: vst3i32_update:
 ;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
@@ -42,7 +42,7 @@ define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 }
 
 define void @vst3f(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst3f:
+;CHECK-LABEL: vst3f:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -51,7 +51,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 }
 
 define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
-;CHECK: vst3i64:
+;CHECK-LABEL: vst3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
@@ -62,7 +62,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 }
 
 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
-;CHECK: vst3Qi8:
+;CHECK-LABEL: vst3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
@@ -73,7 +73,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 }
 
 define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst3Qi16:
+;CHECK-LABEL: vst3Qi16:
 ;CHECK: vst3.16
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
@@ -84,7 +84,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
-;CHECK: vst3Qi16_update:
+;CHECK-LABEL: vst3Qi16_update:
 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
 	%A = load i16** %ptr
@@ -97,7 +97,7 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
 }
 
 define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst3Qi32:
+;CHECK-LABEL: vst3Qi32:
 ;CHECK: vst3.32
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
@@ -107,7 +107,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst3Qf:
+;CHECK-LABEL: vst3Qf:
 ;CHECK: vst3.32
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index 7dedb2fafee2..ef5c83a57dbb 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst4i8:
+;CHECK-LABEL: vst4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
@@ -11,7 +11,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 
 ;Check for a post-increment updating store with register increment.
 define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
-;CHECK: vst4i8_update:
+;CHECK-LABEL: vst4i8_update:
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
@@ -22,7 +22,7 @@ define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 }
 
 define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst4i16:
+;CHECK-LABEL: vst4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
@@ -32,7 +32,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst4i32:
+;CHECK-LABEL: vst4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
@@ -42,7 +42,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define void @vst4f(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst4f:
+;CHECK-LABEL: vst4f:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -51,7 +51,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 }
 
 define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
-;CHECK: vst4i64:
+;CHECK-LABEL: vst4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
@@ -61,7 +61,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 }
 
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
-;CHECK: vst4Qi8:
+;CHECK-LABEL: vst4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
 ;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
@@ -71,7 +71,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 }
 
 define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst4Qi16:
+;CHECK-LABEL: vst4Qi16:
 ;Check for no alignment specifier.
 ;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
 ;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
@@ -82,7 +82,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst4Qi32:
+;CHECK-LABEL: vst4Qi32:
 ;CHECK: vst4.32
 ;CHECK: vst4.32
 	%tmp0 = bitcast i32* %A to i8*
@@ -92,7 +92,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst4Qf:
+;CHECK-LABEL: vst4Qf:
 ;CHECK: vst4.32
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
@@ -103,7 +103,7 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
-;CHECK: vst4Qf_update:
+;CHECK-LABEL: vst4Qf_update:
 ;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
 ;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
 	%A = load float** %ptr
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 67f251f70689..34c5c70fffa3 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst1lanei8:
+;CHECK-LABEL: vst1lanei8:
 ;Check the (default) alignment.
 ;CHECK: vst1.8 {d16[3]}, [r0]
 	%tmp1 = load <8 x i8>* %B
@@ -12,8 +12,8 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
-;CHECK: vst1lanei8_update:
-;CHECK: vst1.8 {d16[3]}, [r2]!
+;CHECK-LABEL: vst1lanei8_update:
+;CHECK: vst1.8 {d16[3]}, [{{r[0-9]}}]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 3
@@ -24,7 +24,7 @@ define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 }
 
 define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst1lanei16:
+;CHECK-LABEL: vst1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vst1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
@@ -34,7 +34,7 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst1lanei32:
+;CHECK-LABEL: vst1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
@@ -44,7 +44,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst1lanef:
+;CHECK-LABEL: vst1lanef:
 ;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
@@ -53,7 +53,7 @@ define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
-;CHECK: vst1laneQi8:
+;CHECK-LABEL: vst1laneQi8:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.8 {d17[1]}, [r0]
 	%tmp1 = load <16 x i8>* %B
@@ -63,7 +63,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 }
 
 define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst1laneQi16:
+;CHECK-LABEL: vst1laneQi16:
 ;CHECK: vst1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
         %tmp2 = extractelement <8 x i16> %tmp1, i32 5
@@ -72,7 +72,7 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst1laneQi32:
+;CHECK-LABEL: vst1laneQi32:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
@@ -83,7 +83,7 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
-;CHECK: vst1laneQi32_update:
+;CHECK-LABEL: vst1laneQi32_update:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
 	%A = load i32** %ptr
@@ -96,7 +96,7 @@ define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 }
 
 define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst1laneQf:
+;CHECK-LABEL: vst1laneQf:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r0]
 	%tmp1 = load <4 x float>* %B
@@ -106,7 +106,7 @@ define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
 }
 
 define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst2lanei8:
+;CHECK-LABEL: vst2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
@@ -115,7 +115,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 }
 
 define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst2lanei16:
+;CHECK-LABEL: vst2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
@@ -126,7 +126,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 
 ;Check for a post-increment updating store with register increment.
 define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
-;CHECK: vst2lanei16_update:
+;CHECK-LABEL: vst2lanei16_update:
 ;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
@@ -138,7 +138,7 @@ define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
 }
 
 define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst2lanei32:
+;CHECK-LABEL: vst2lanei32:
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -147,7 +147,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst2lanef:
+;CHECK-LABEL: vst2lanef:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -156,7 +156,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst2laneQi16:
+;CHECK-LABEL: vst2laneQi16:
 ;Check the (default) alignment.
 ;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
@@ -166,7 +166,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst2laneQi32:
+;CHECK-LABEL: vst2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
 	%tmp0 = bitcast i32* %A to i8*
@@ -176,7 +176,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst2laneQf:
+;CHECK-LABEL: vst2laneQf:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -194,7 +194,7 @@ declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
 declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
 
 define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst3lanei8:
+;CHECK-LABEL: vst3lanei8:
 ;CHECK: vst3.8
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
@@ -202,7 +202,7 @@ define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 }
 
 define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst3lanei16:
+;CHECK-LABEL: vst3lanei16:
 ;Check the (default) alignment value.  VST3 does not support alignment.
 ;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
@@ -212,7 +212,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst3lanei32:
+;CHECK-LABEL: vst3lanei32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
@@ -221,7 +221,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst3lanef:
+;CHECK-LABEL: vst3lanef:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -230,7 +230,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst3laneQi16:
+;CHECK-LABEL: vst3laneQi16:
 ;Check the (default) alignment value.  VST3 does not support alignment.
 ;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
@@ -240,7 +240,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst3laneQi32:
+;CHECK-LABEL: vst3laneQi32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
@@ -250,7 +250,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
-;CHECK: vst3laneQi32_update:
+;CHECK-LABEL: vst3laneQi32_update:
 ;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
 	%A = load i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
@@ -262,7 +262,7 @@ define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 }
 
 define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst3laneQf:
+;CHECK-LABEL: vst3laneQf:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -281,7 +281,7 @@ declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x f
 
 
 define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
-;CHECK: vst4lanei8:
+;CHECK-LABEL: vst4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 	%tmp1 = load <8 x i8>* %B
@@ -291,7 +291,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 
 ;Check for a post-increment updating store.
 define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
-;CHECK: vst4lanei8_update:
+;CHECK-LABEL: vst4lanei8_update:
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
@@ -302,7 +302,7 @@ define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 }
 
 define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
-;CHECK: vst4lanei16:
+;CHECK-LABEL: vst4lanei16:
 ;CHECK: vst4.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
@@ -311,7 +311,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 }
 
 define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
-;CHECK: vst4lanei32:
+;CHECK-LABEL: vst4lanei32:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 	%tmp0 = bitcast i32* %A to i8*
@@ -321,7 +321,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 }
 
 define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
-;CHECK: vst4lanef:
+;CHECK-LABEL: vst4lanef:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>* %B
@@ -330,7 +330,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 }
 
 define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
-;CHECK: vst4laneQi16:
+;CHECK-LABEL: vst4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 	%tmp0 = bitcast i16* %A to i8*
@@ -340,7 +340,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 }
 
 define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
-;CHECK: vst4laneQi32:
+;CHECK-LABEL: vst4laneQi32:
 ;Check the (default) alignment.
 ;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
@@ -350,7 +350,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 }
 
 define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
-;CHECK: vst4laneQf:
+;CHECK-LABEL: vst4laneQf:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>* %B
@@ -360,7 +360,7 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 
 ; Make sure this doesn't crash; PR10258
 define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind readnone {
-;CHECK: variable_insertelement:
+;CHECK-LABEL: variable_insertelement:
     %r = insertelement <8 x i16> %a, i16 %b, i32 %c
     ret <8 x i16> %r
 }
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index df77bb31fc8b..6b95b97378e0 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsubi8:
+;CHECK-LABEL: vsubi8:
 ;CHECK: vsub.i8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -10,7 +10,7 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsubi16:
+;CHECK-LABEL: vsubi16:
 ;CHECK: vsub.i16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -19,7 +19,7 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsubi32:
+;CHECK-LABEL: vsubi32:
 ;CHECK: vsub.i32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -28,7 +28,7 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vsubi64:
+;CHECK-LABEL: vsubi64:
 ;CHECK: vsub.i64
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
@@ -37,7 +37,7 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 }
 
 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vsubf32:
+;CHECK-LABEL: vsubf32:
 ;CHECK: vsub.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
@@ -46,7 +46,7 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vsubQi8:
+;CHECK-LABEL: vsubQi8:
 ;CHECK: vsub.i8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
@@ -55,7 +55,7 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vsubQi16:
+;CHECK-LABEL: vsubQi16:
 ;CHECK: vsub.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -64,7 +64,7 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vsubQi32:
+;CHECK-LABEL: vsubQi32:
 ;CHECK: vsub.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -73,7 +73,7 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vsubQi64:
+;CHECK-LABEL: vsubQi64:
 ;CHECK: vsub.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -82,7 +82,7 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 }
 
 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vsubQf32:
+;CHECK-LABEL: vsubQf32:
 ;CHECK: vsub.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
@@ -90,39 +90,35 @@ define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 	ret <4 x float> %tmp3
 }
 
-define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vsubhni16:
-;CHECK: vsubhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i8> %tmp3
+define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK-LABEL: vsubhni16_natural:
+; CHECK: vsubhn.i16
+  %sum = sub <8 x i16> %A, %B
+  %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %trunc = trunc <8 x i16> %shift to <8 x i8>
+  ret <8 x i8> %trunc
 }
 
-define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vsubhni32:
-;CHECK: vsubhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i16> %tmp3
+define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK-LABEL: vsubhni32_natural:
+; CHECK: vsubhn.i32
+  %sum = sub <4 x i32> %A, %B
+  %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+  %trunc = trunc <4 x i32> %shift to <4 x i16>
+  ret <4 x i16> %trunc
 }
 
-define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vsubhni64:
-;CHECK: vsubhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i32> %tmp3
+define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK-LABEL: vsubhni64_natural:
+; CHECK: vsubhn.i64
+  %sum = sub <2 x i64> %A, %B
+  %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
+  %trunc = trunc <2 x i64> %shift to <2 x i32>
+  ret <2 x i32> %trunc
 }
 
-declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
 define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrsubhni16:
+;CHECK-LABEL: vrsubhni16:
 ;CHECK: vrsubhn.i16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
@@ -131,7 +127,7 @@ define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrsubhni32:
+;CHECK-LABEL: vrsubhni32:
 ;CHECK: vrsubhn.i32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
@@ -140,7 +136,7 @@ define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrsubhni64:
+;CHECK-LABEL: vrsubhni64:
 ;CHECK: vrsubhn.i64
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i64>* %B
@@ -153,7 +149,7 @@ declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind re
 declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsubls8:
+;CHECK-LABEL: vsubls8:
 ;CHECK: vsubl.s8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -164,7 +160,7 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsubls16:
+;CHECK-LABEL: vsubls16:
 ;CHECK: vsubl.s16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -175,7 +171,7 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsubls32:
+;CHECK-LABEL: vsubls32:
 ;CHECK: vsubl.s32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -186,7 +182,7 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsublu8:
+;CHECK-LABEL: vsublu8:
 ;CHECK: vsubl.u8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -197,7 +193,7 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsublu16:
+;CHECK-LABEL: vsublu16:
 ;CHECK: vsubl.u16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -208,7 +204,7 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsublu32:
+;CHECK-LABEL: vsublu32:
 ;CHECK: vsubl.u32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -219,7 +215,7 @@ define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsubws8:
+;CHECK-LABEL: vsubws8:
 ;CHECK: vsubw.s8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -229,7 +225,7 @@ define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsubws16:
+;CHECK-LABEL: vsubws16:
 ;CHECK: vsubw.s16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -239,7 +235,7 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsubws32:
+;CHECK-LABEL: vsubws32:
 ;CHECK: vsubw.s32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -249,7 +245,7 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsubwu8:
+;CHECK-LABEL: vsubwu8:
 ;CHECK: vsubw.u8
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -259,7 +255,7 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsubwu16:
+;CHECK-LABEL: vsubwu16:
 ;CHECK: vsubw.u16
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -269,7 +265,7 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsubwu32:
+;CHECK-LABEL: vsubwu32:
 ;CHECK: vsubw.u32
 	%tmp1 = load <2 x i64>* %A
 	%tmp2 = load <2 x i32>* %B
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
index 926498739e8a..21614b044f9a 100644
--- a/test/CodeGen/ARM/vtbl.ll
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -5,7 +5,7 @@
 %struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
 
 define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vtbl1:
+;CHECK-LABEL: vtbl1:
 ;CHECK: vtbl.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -14,7 +14,7 @@ define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
-;CHECK: vtbl2:
+;CHECK-LABEL: vtbl2:
 ;CHECK: vtbl.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load %struct.__neon_int8x8x2_t* %B
@@ -25,7 +25,7 @@ define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
 }
 
 define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
-;CHECK: vtbl3:
+;CHECK-LABEL: vtbl3:
 ;CHECK: vtbl.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load %struct.__neon_int8x8x3_t* %B
@@ -37,7 +37,7 @@ define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
 }
 
 define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
-;CHECK: vtbl4:
+;CHECK-LABEL: vtbl4:
 ;CHECK: vtbl.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load %struct.__neon_int8x8x4_t* %B
@@ -50,7 +50,7 @@ define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
 }
 
 define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vtbx1:
+;CHECK-LABEL: vtbx1:
 ;CHECK: vtbx.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -60,7 +60,7 @@ define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
-;CHECK: vtbx2:
+;CHECK-LABEL: vtbx2:
 ;CHECK: vtbx.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load %struct.__neon_int8x8x2_t* %B
@@ -72,7 +72,7 @@ define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C
 }
 
 define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
-;CHECK: vtbx3:
+;CHECK-LABEL: vtbx3:
 ;CHECK: vtbx.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load %struct.__neon_int8x8x3_t* %B
@@ -85,7 +85,7 @@ define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C
 }
 
 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
-;CHECK: vtbx4:
+;CHECK-LABEL: vtbx4:
 ;CHECK: vtbx.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load %struct.__neon_int8x8x4_t* %B
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index b1c2f93b47c6..7d101bc61952 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vtrni8:
+;CHECK-LABEL: vtrni8:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <8 x i8>* %A
@@ -13,7 +13,7 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vtrni16:
+;CHECK-LABEL: vtrni16:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <4 x i16>* %A
@@ -25,7 +25,7 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vtrni32:
+;CHECK-LABEL: vtrni32:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.i32
 	%tmp1 = load <2 x i32>* %A
@@ -37,7 +37,7 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vtrnf:
+;CHECK-LABEL: vtrnf:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.f32
 	%tmp1 = load <2 x float>* %A
@@ -49,7 +49,7 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vtrnQi8:
+;CHECK-LABEL: vtrnQi8:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <16 x i8>* %A
@@ -61,7 +61,7 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vtrnQi16:
+;CHECK-LABEL: vtrnQi16:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <8 x i16>* %A
@@ -73,7 +73,7 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vtrnQi32:
+;CHECK-LABEL: vtrnQi32:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.i32
 	%tmp1 = load <4 x i32>* %A
@@ -85,7 +85,7 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vtrnQf:
+;CHECK-LABEL: vtrnQf:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.f32
 	%tmp1 = load <4 x float>* %A
@@ -99,7 +99,7 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; Undef shuffle indices should not prevent matching to VTRN:
 
 define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vtrni8_undef:
+;CHECK-LABEL: vtrni8_undef:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <8 x i8>* %A
@@ -111,7 +111,7 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vtrnQi16_undef:
+;CHECK-LABEL: vtrnQi16_undef:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <8 x i16>* %A
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 9130f628919a..2d193c114192 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vuzpi8:
+;CHECK-LABEL: vuzpi8:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <8 x i8>* %A
@@ -13,7 +13,7 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vuzpi16:
+;CHECK-LABEL: vuzpi16:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <4 x i16>* %A
@@ -27,7 +27,7 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
 
 define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vuzpQi8:
+;CHECK-LABEL: vuzpQi8:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <16 x i8>* %A
@@ -39,7 +39,7 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vuzpQi16:
+;CHECK-LABEL: vuzpQi16:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <8 x i16>* %A
@@ -51,7 +51,7 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vuzpQi32:
+;CHECK-LABEL: vuzpQi32:
 ;CHECK: vuzp.32
 ;CHECK-NEXT: vadd.i32
 	%tmp1 = load <4 x i32>* %A
@@ -63,7 +63,7 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vuzpQf:
+;CHECK-LABEL: vuzpQf:
 ;CHECK: vuzp.32
 ;CHECK-NEXT: vadd.f32
 	%tmp1 = load <4 x float>* %A
@@ -77,7 +77,7 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; Undef shuffle indices should not prevent matching to VUZP:
 
 define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vuzpi8_undef:
+;CHECK-LABEL: vuzpi8_undef:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <8 x i8>* %A
@@ -89,7 +89,7 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vuzpQi16_undef:
+;CHECK-LABEL: vuzpQi16_undef:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <8 x i16>* %A
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index 926970aeb29b..f71aef7ef139 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vzipi8:
+;CHECK-LABEL: vzipi8:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <8 x i8>* %A
@@ -13,7 +13,7 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vzipi16:
+;CHECK-LABEL: vzipi16:
 ;CHECK: vzip.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <4 x i16>* %A
@@ -27,7 +27,7 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
 
 define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vzipQi8:
+;CHECK-LABEL: vzipQi8:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <16 x i8>* %A
@@ -39,7 +39,7 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vzipQi16:
+;CHECK-LABEL: vzipQi16:
 ;CHECK: vzip.16
 ;CHECK-NEXT: vadd.i16
 	%tmp1 = load <8 x i16>* %A
@@ -51,7 +51,7 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vzipQi32:
+;CHECK-LABEL: vzipQi32:
 ;CHECK: vzip.32
 ;CHECK-NEXT: vadd.i32
 	%tmp1 = load <4 x i32>* %A
@@ -63,7 +63,7 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vzipQf:
+;CHECK-LABEL: vzipQf:
 ;CHECK: vzip.32
 ;CHECK-NEXT: vadd.f32
 	%tmp1 = load <4 x float>* %A
@@ -77,7 +77,7 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; Undef shuffle indices should not prevent matching to VZIP:
 
 define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vzipi8_undef:
+;CHECK-LABEL: vzipi8_undef:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <8 x i8>* %A
@@ -89,7 +89,7 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vzipQi8_undef:
+;CHECK-LABEL: vzipQi8_undef:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
 	%tmp1 = load <16 x i8>* %A
diff --git a/test/CodeGen/ARM/warn-stack.ll b/test/CodeGen/ARM/warn-stack.ll
new file mode 100644
index 000000000000..9538bbf10488
--- /dev/null
+++ b/test/CodeGen/ARM/warn-stack.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple thumbv7-apple-ios3.0.0 -warn-stack-size=80 < %s 2>&1 >/dev/null | FileCheck %s
+; Check the internal option that warns when the stack size exceeds the
+; given amount.
+; <rdar://13987214>
+
+; CHECK-NOT: nowarn
+define void @nowarn() nounwind ssp {
+entry:
+  %buffer = alloca [12 x i8], align 1
+  %arraydecay = getelementptr inbounds [12 x i8]* %buffer, i64 0, i64 0
+  call void @doit(i8* %arraydecay) nounwind
+  ret void
+}
+
+; CHECK: warning: Stack size limit exceeded (96) in warn.
+define void @warn() nounwind ssp {
+entry:
+  %buffer = alloca [80 x i8], align 1
+  %arraydecay = getelementptr inbounds [80 x i8]* %buffer, i64 0, i64 0
+  call void @doit(i8* %arraydecay) nounwind
+  ret void
+}
+
+declare void @doit(i8*)
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
index 4d4b4a4a7e21..4063dd1b8612 100644
--- a/test/CodeGen/CPP/lit.local.cfg
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'CppBackend' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index 6281ada73fc6..3f17ce1e0b16 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -63,30 +63,58 @@ bb47:		; preds = %bb46, %bb44
 	br label %bb44
 }
 
-declare i32 @pthread_once(i32*, void ()*)
+define i32 @pthread_once(i32*, void ()*) {
+  ret i32 0
+}
 
-declare i8* @pthread_getspecific(i32)
+define i8* @pthread_getspecific(i32) {
+  ret i8* null
+}
 
-declare i32 @pthread_setspecific(i32, i8*)
+define i32 @pthread_setspecific(i32, i8*) {
+  ret i32 0
+}
 
-declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+define i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) {
+  ret i32 0
+}
 
-declare i32 @pthread_cancel(i64)
+define i32 @pthread_cancel(i64) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+define i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.Alignment*)
+define i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.Alignment*) {
+  ret i32 0
+}
 
-declare i32 @pthread_key_create(i32*, void (i8*)*)
+define i32 @pthread_key_create(i32*, void (i8*)*) {
+  ret i32 0
+}
 
-declare i32 @pthread_key_delete(i32)
+define i32 @pthread_key_delete(i32) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutexattr_init(%struct.Alignment*)
+define i32 @pthread_mutexattr_init(%struct.Alignment*) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutexattr_settype(%struct.Alignment*, i32)
+define i32 @pthread_mutexattr_settype(%struct.Alignment*, i32) {
+  ret i32 0
+}
 
-declare i32 @pthread_mutexattr_destroy(%struct.Alignment*)
+define i32 @pthread_mutexattr_destroy(%struct.Alignment*) {
+  ret i32 0
+}
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index d3fc20467aa8..8de6b0d4bd31 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -23,7 +23,7 @@ bb32:                                             ; preds = %bb6
 %3 = load double* %1, align 4
 %4 = load double* %0, align 4
 call void @Parse_Vector(double* %0) nounwind
-%5 = call i32 @llvm.objectsize.i32(i8* undef, i1 false)
+%5 = call i32 @llvm.objectsize.i32.p0i8(i8* undef, i1 false)
 %6 = icmp eq i32 %5, -1
 br i1 %6, label %bb34, label %bb33
 
@@ -36,7 +36,7 @@ unreachable
 }
 
 declare void @Parse_Vector(double*)
-declare i32 @llvm.objectsize.i32(i8*, i1)
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
 
 
 ; PR9578
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
index ce3364d45ed8..840eeb0cbf31 100644
--- a/test/CodeGen/Generic/dbg_value.ll
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -10,4 +10,5 @@ define void @t(%0*, i32, i32, i32, i32) nounwind {
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!0 = metadata !{i32 0} ;
+; !0 should conform to the format of DIVariable.
+!0 = metadata !{i32 786689, null, metadata !"a", null, i32 0, null, i32 0, i32 0} ;
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/CodeGen/Generic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CodeGen/Hexagon/BranchPredict.ll b/test/CodeGen/Hexagon/BranchPredict.ll
new file mode 100644
index 000000000000..4ab1966bf04d
--- /dev/null
+++ b/test/CodeGen/Hexagon/BranchPredict.ll
@@ -0,0 +1,76 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check if the branch probabilities are reflected in the instructions:
+; The basic block placement pass should place the more probable successor
+; block as the fall-through block. The unconditional jump in the predecessor
+; should then get the right hint (not_taken or ":nt")
+
+
+@j = external global i32
+
+define i32 @foo(i32 %a) nounwind {
+; CHECK: if{{ *}}(!p{{[0-3]}}.new) jump:nt
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.else, label %if.then, !prof !0
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %a, 10
+  %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 4) nounwind
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval.0
+}
+
+declare i32 @foobar(...)
+
+define i32 @bar(i32 %a) nounwind {
+; CHECK: if{{ *}}(p{{[0-3]}}.new) jump:nt
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.else, label %if.then, !prof !1
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %a, 10
+  %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 4) nounwind
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval.0
+}
+
+define i32 @foo_bar(i32 %a, i16 signext %b) nounwind {
+; CHECK: if{{ *}}(!cmp.eq(r{{[0-9]*}}.new, #0)) jump:nt
+entry:
+  %0 = load i32* @j, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.else, label %if.then, !prof !0
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %a, 10
+  %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %add1 = add nsw i32 %a, 4
+  %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add1) nounwind
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval.0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
index 9cee3e215d62..6d060c1b9e26 100644
--- a/test/CodeGen/Hexagon/adde.ll
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -31,4 +31,4 @@ entry:
         %tmp2122 = trunc i128 %tmp21 to i64
         store i64 %tmp2122, i64* %RH
         ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index f8c9e44c831d..aea4ffe2eee5 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,11 +1,8 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
 ; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
-; CHECK: r0 = #1
-; CHECK: r1 = #2
-; CHECK: r2 = #3
-; CHECK: r3 = #4
-; CHECK: r4 = #5
-; CHECK: r5 = #6
+; CHECK: r1:0 = combine(#2, #1)
+; CHECK: r3:2 = combine(#4, #3)
+; CHECK: r5:4 = combine(#6, #5)
 
 
 define void @foo() nounwind {
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
index 8b99ef715546..e100cf7196f1 100644
--- a/test/CodeGen/Hexagon/combine_ir.ll
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -4,7 +4,7 @@
 
 define void @word(i32* nocapture %a) nounwind {
 entry:
-  %0 = load i32* %a, align 4, !tbaa !0
+  %0 = load i32* %a, align 4
   %1 = zext i32 %0 to i64
   tail call void @bar(i64 %1) nounwind
   ret void
@@ -17,10 +17,10 @@ declare void @bar(i64)
 
 define void @halfword(i16* nocapture %a) nounwind {
 entry:
-  %0 = load i16* %a, align 2, !tbaa !3
+  %0 = load i16* %a, align 2
   %1 = zext i16 %0 to i64
   %add.ptr = getelementptr inbounds i16* %a, i32 1
-  %2 = load i16* %add.ptr, align 2, !tbaa !3
+  %2 = load i16* %add.ptr, align 2
   %3 = zext i16 %2 to i64
   %4 = shl nuw nsw i64 %3, 16
   %ins = or i64 %4, %1
@@ -33,18 +33,13 @@ entry:
 
 define void @byte(i8* nocapture %a) nounwind {
 entry:
-  %0 = load i8* %a, align 1, !tbaa !1
+  %0 = load i8* %a, align 1
   %1 = zext i8 %0 to i64
   %add.ptr = getelementptr inbounds i8* %a, i32 1
-  %2 = load i8* %add.ptr, align 1, !tbaa !1
+  %2 = load i8* %add.ptr, align 1
   %3 = zext i8 %2 to i64
   %4 = shl nuw nsw i64 %3, 8
   %ins = or i64 %4, %1
   tail call void @bar(i64 %ins) nounwind
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Hexagon/extload-combine.ll b/test/CodeGen/Hexagon/extload-combine.ll
new file mode 100644
index 000000000000..b3b8bf07032a
--- /dev/null
+++ b/test/CodeGen/Hexagon/extload-combine.ll
@@ -0,0 +1,80 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+; Check that the combine/stxw instructions are being generated.
+; In case of combine one of the operand should be 0 and another should be
+; the output of absolute addressing load instruction.
+
+@a = external global i16
+@b = external global i16
+@c = external global i16
+@char_a = external global i8
+@char_b = external global i8
+@char_c = external global i8
+@int_a = external global i32
+@int_b = external global i32
+@int_c = external global i32
+
+; Function Attrs: nounwind
+define i64 @short_test1() #0 {
+; CHECK: [[VAR:r[0-9]+]]{{ *}}={{ *}}memuh(##
+; CHECK: combine(#0, [[VAR]])
+entry:
+  store i16 0, i16* @a, align 2
+  %0 = load i16* @b, align 2
+  %conv2 = zext i16 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @short_test2() #0 {
+; CHECK: [[VAR1:r[0-9]+]]{{ *}}={{ *}}memh(##
+; CHECK: sxtw([[VAR1]])
+entry:
+  store i16 0, i16* @a, align 2
+  %0 = load i16* @c, align 2
+  %conv2 = sext i16 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @char_test1() #0 {
+; CHECK: [[VAR2:r[0-9]+]]{{ *}}={{ *}}memub(##
+; CHECK: combine(#0, [[VAR2]])
+entry:
+  store i8 0, i8* @char_a, align 1
+  %0 = load i8* @char_b, align 1
+  %conv2 = zext i8 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @char_test2() #0 {
+; CHECK: [[VAR3:r[0-9]+]]{{ *}}={{ *}}memb(##
+; CHECK: sxtw([[VAR3]])
+entry:
+  store i8 0, i8* @char_a, align 1
+  %0 = load i8* @char_c, align 1
+  %conv2 = sext i8 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @int_test1() #0 {
+; CHECK: [[VAR4:r[0-9]+]]{{ *}}={{ *}}memw(##
+; CHECK: combine(#0, [[VAR4]])
+entry:
+  store i32 0, i32* @int_a, align 4
+  %0 = load i32* @int_b, align 4
+  %conv = zext i32 %0 to i64
+  ret i64 %conv
+}
+
+; Function Attrs: nounwind
+define i64 @int_test2() #0 {
+; CHECK: [[VAR5:r[0-9]+]]{{ *}}={{ *}}memw(##
+; CHECK: sxtw([[VAR5]])
+entry:
+  store i32 0, i32* @int_a, align 4
+  %0 = load i32* @int_c, align 4
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index 17fe7b982d8f..bfdd8130d5bf 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -34,28 +34,31 @@ for.end:                                          ; preds = %for.body
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
+
+!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786473, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !28, null, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !9}
-!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
-!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !11 = metadata !{metadata !12}
 !12 = metadata !{metadata !13, metadata !14, metadata !15}
 !13 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
 !14 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
 !15 = metadata !{i32 786688, metadata !16, metadata !"i", metadata !6, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
-!16 = metadata !{i32 786443, metadata !5, i32 1, i32 26, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!16 = metadata !{i32 786443, metadata !28, metadata !5, i32 1, i32 26, i32 0} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
 !17 = metadata !{i32 1, i32 15, metadata !5, null}
 !18 = metadata !{i32 1, i32 23, metadata !5, null}
 !19 = metadata !{i32 3, i32 8, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !16, i32 3, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!20 = metadata !{i32 786443, metadata !28, metadata !16, i32 3, i32 3, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
 !21 = metadata !{i32 4, i32 5, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !20, i32 3, i32 28, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!22 = metadata !{i32 786443, metadata !28, metadata !20, i32 3, i32 28, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
 !26 = metadata !{i32 3, i32 23, metadata !20, null}
 !27 = metadata !{i32 6, i32 1, metadata !16, null}
+!28 = metadata !{metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
index eb44c2905c9d..c5d05a5e6ed8 100644
--- a/test/CodeGen/Hexagon/i16_VarArg.ll
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -37,4 +37,4 @@ define i32 @main() {
         %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
         call i32 (i8*, ...)* @printf( i8* %lt_s, i16 %val1 )
         ret i32 0
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
index 7dbfb25cd2b7..37f27787c186 100644
--- a/test/CodeGen/Hexagon/i1_VarArg.ll
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -41,4 +41,4 @@ define i32 @main() {
         call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )
         call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )
         ret i32 0
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
index 687b178824ce..6f056ff417af 100644
--- a/test/CodeGen/Hexagon/i8_VarArg.ll
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -37,4 +37,4 @@ define i32 @main() {
         %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
         call i32 (i8*, ...)* @printf( i8* %lt_s, i8 %val1 )
         ret i32 0
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Hexagon/indirect-br.ll b/test/CodeGen/Hexagon/indirect-br.ll
index 919e50189160..188eebff5c73 100644
--- a/test/CodeGen/Hexagon/indirect-br.ll
+++ b/test/CodeGen/Hexagon/indirect-br.ll
@@ -11,4 +11,4 @@ test_label:
 
 ret:
         ret i32 -1
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
index 24324b2792e3..e96bab818a3c 100644
--- a/test/CodeGen/Hexagon/lit.local.cfg
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Hexagon' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Hexagon/memops.ll b/test/CodeGen/Hexagon/memops.ll
index 5498848d8560..fca1a73811a9 100644
--- a/test/CodeGen/Hexagon/memops.ll
+++ b/test/CodeGen/Hexagon/memops.ll
@@ -4,11 +4,11 @@
 define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -16,11 +16,11 @@ define void @memop_unsigned_char_add(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
@@ -28,51 +28,51 @@ define void @memop_unsigned_char_sub(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %p, align 1, !tbaa !0
+  store i8 %or3, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %p, align 1, !tbaa !0
+  store i8 %and3, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
 define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -80,11 +80,11 @@ define void @memop_unsigned_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -93,11 +93,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -106,11 +106,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -118,9 +118,9 @@ define void @memop_unsigned_char_or_index(i8* nocapture %p, i32 %i, i8 zeroext %
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -128,9 +128,9 @@ define void @memop_unsigned_char_and_index(i8* nocapture %p, i32 %i, i8 zeroext
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -138,11 +138,11 @@ define void @memop_unsigned_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -150,11 +150,11 @@ define void @memop_unsigned_char_setbit_index(i8* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -162,11 +162,11 @@ define void @memop_unsigned_char_add5_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -175,11 +175,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -188,11 +188,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -200,9 +200,9 @@ define void @memop_unsigned_char_or_index5(i8* nocapture %p, i8 zeroext %x) noun
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -210,9 +210,9 @@ define void @memop_unsigned_char_and_index5(i8* nocapture %p, i8 zeroext %x) nou
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -220,11 +220,11 @@ define void @memop_unsigned_char_clrbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -232,22 +232,22 @@ define void @memop_unsigned_char_setbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
 define void @memop_signed_char_add5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -255,11 +255,11 @@ define void @memop_signed_char_add(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
@@ -267,51 +267,51 @@ define void @memop_signed_char_sub(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  store i8 %conv2, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %p, align 1, !tbaa !0
+  store i8 %or3, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %p, align 1, !tbaa !0
+  store i8 %and3, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
 define void @memop_signed_char_setbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i8* %p, align 1, !tbaa !0
+  %0 = load i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  store i8 %conv1, i8* %p, align 1
   ret void
 }
 
@@ -319,11 +319,11 @@ define void @memop_signed_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -332,11 +332,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -345,11 +345,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -357,9 +357,9 @@ define void @memop_signed_char_or_index(i8* nocapture %p, i32 %i, i8 signext %x)
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -367,9 +367,9 @@ define void @memop_signed_char_and_index(i8* nocapture %p, i32 %i, i8 signext %x
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -377,11 +377,11 @@ define void @memop_signed_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -389,11 +389,11 @@ define void @memop_signed_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -401,11 +401,11 @@ define void @memop_signed_char_add5_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -414,11 +414,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -427,11 +427,11 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr, align 1
   ret void
 }
 
@@ -439,9 +439,9 @@ define void @memop_signed_char_or_index5(i8* nocapture %p, i8 signext %x) nounwi
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
-  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %or3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -449,9 +449,9 @@ define void @memop_signed_char_and_index5(i8* nocapture %p, i8 signext %x) nounw
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
-  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %and3, i8* %add.ptr, align 1
   ret void
 }
 
@@ -459,11 +459,11 @@ define void @memop_signed_char_clrbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -471,22 +471,22 @@ define void @memop_signed_char_setbit_index5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
 define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -494,11 +494,11 @@ define void @memop_unsigned_short_add(i16* nocapture %p, i16 zeroext %x) nounwin
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
@@ -506,51 +506,51 @@ define void @memop_unsigned_short_sub(i16* nocapture %p, i16 zeroext %x) nounwin
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %p, align 2, !tbaa !2
+  store i16 %or3, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %p, align 2, !tbaa !2
+  store i16 %and3, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
 define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -558,11 +558,11 @@ define void @memop_unsigned_short_add5_index(i16* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -571,11 +571,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -584,11 +584,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -596,9 +596,9 @@ define void @memop_unsigned_short_or_index(i16* nocapture %p, i32 %i, i16 zeroex
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -606,9 +606,9 @@ define void @memop_unsigned_short_and_index(i16* nocapture %p, i32 %i, i16 zeroe
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -616,11 +616,11 @@ define void @memop_unsigned_short_clrbit_index(i16* nocapture %p, i32 %i) nounwi
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -628,11 +628,11 @@ define void @memop_unsigned_short_setbit_index(i16* nocapture %p, i32 %i) nounwi
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -640,11 +640,11 @@ define void @memop_unsigned_short_add5_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -653,11 +653,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -666,11 +666,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -678,9 +678,9 @@ define void @memop_unsigned_short_or_index5(i16* nocapture %p, i16 zeroext %x) n
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -688,9 +688,9 @@ define void @memop_unsigned_short_and_index5(i16* nocapture %p, i16 zeroext %x)
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -698,11 +698,11 @@ define void @memop_unsigned_short_clrbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -710,22 +710,22 @@ define void @memop_unsigned_short_setbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
 define void @memop_signed_short_add5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -733,11 +733,11 @@ define void @memop_signed_short_add(i16* nocapture %p, i16 signext %x) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
@@ -745,51 +745,51 @@ define void @memop_signed_short_sub(i16* nocapture %p, i16 signext %x) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  store i16 %conv2, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %p, align 2, !tbaa !2
+  store i16 %or3, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %p, align 2, !tbaa !2
+  store i16 %and3, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
 define void @memop_signed_short_setbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i16* %p, align 2, !tbaa !2
+  %0 = load i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  store i16 %conv1, i16* %p, align 2
   ret void
 }
 
@@ -797,11 +797,11 @@ define void @memop_signed_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -810,11 +810,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -823,11 +823,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -835,9 +835,9 @@ define void @memop_signed_short_or_index(i16* nocapture %p, i32 %i, i16 signext
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -845,9 +845,9 @@ define void @memop_signed_short_and_index(i16* nocapture %p, i32 %i, i16 signext
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -855,11 +855,11 @@ define void @memop_signed_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -867,11 +867,11 @@ define void @memop_signed_short_setbit_index(i16* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -879,11 +879,11 @@ define void @memop_signed_short_add5_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -892,11 +892,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -905,11 +905,11 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv2, i16* %add.ptr, align 2
   ret void
 }
 
@@ -917,9 +917,9 @@ define void @memop_signed_short_or_index5(i16* nocapture %p, i16 signext %x) nou
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
-  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %or3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -927,9 +927,9 @@ define void @memop_signed_short_and_index5(i16* nocapture %p, i16 signext %x) no
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
-  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %and3, i16* %add.ptr, align 2
   ret void
 }
 
@@ -937,11 +937,11 @@ define void @memop_signed_short_clrbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -949,74 +949,74 @@ define void @memop_signed_short_setbit_index5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
 define void @memop_signed_int_add5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add i32 %0, 5
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add i32 %0, %x
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %sub = sub i32 %0, %x
-  store i32 %sub, i32* %p, align 4, !tbaa !3
+  store i32 %sub, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_signed_int_setbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
@@ -1024,9 +1024,9 @@ define void @memop_signed_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1034,9 +1034,9 @@ define void @memop_signed_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounw
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1044,9 +1044,9 @@ define void @memop_signed_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounw
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1054,9 +1054,9 @@ define void @memop_signed_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwi
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1064,9 +1064,9 @@ define void @memop_signed_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounw
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1074,9 +1074,9 @@ define void @memop_signed_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1084,9 +1084,9 @@ define void @memop_signed_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1094,9 +1094,9 @@ define void @memop_signed_int_add5_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1104,9 +1104,9 @@ define void @memop_signed_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1114,9 +1114,9 @@ define void @memop_signed_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1124,9 +1124,9 @@ define void @memop_signed_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1134,9 +1134,9 @@ define void @memop_signed_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1144,9 +1144,9 @@ define void @memop_signed_int_clrbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1154,72 +1154,72 @@ define void @memop_signed_int_setbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
 define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add nsw i32 %0, 5
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %add = add nsw i32 %0, %x
-  store i32 %add, i32* %p, align 4, !tbaa !3
+  store i32 %add, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %sub = sub nsw i32 %0, %x
-  store i32 %sub, i32* %p, align 4, !tbaa !3
+  store i32 %sub, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %p, align 4, !tbaa !3
+  store i32 %and, i32* %p, align 4
   ret void
 }
 
 define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i32* %p, align 4, !tbaa !3
+  %0 = load i32* %p, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %p, align 4, !tbaa !3
+  store i32 %or, i32* %p, align 4
   ret void
 }
 
@@ -1227,9 +1227,9 @@ define void @memop_unsigned_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1237,9 +1237,9 @@ define void @memop_unsigned_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nou
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1247,9 +1247,9 @@ define void @memop_unsigned_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nou
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub nsw i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1257,9 +1257,9 @@ define void @memop_unsigned_int_or_index(i32* nocapture %p, i32 %i, i32 %x) noun
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1267,9 +1267,9 @@ define void @memop_unsigned_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nou
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1277,9 +1277,9 @@ define void @memop_unsigned_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1287,9 +1287,9 @@ define void @memop_unsigned_int_setbit_index(i32* nocapture %p, i32 %i) nounwind
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1297,9 +1297,9 @@ define void @memop_unsigned_int_add5_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, 5
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1307,9 +1307,9 @@ define void @memop_unsigned_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %add = add nsw i32 %0, %x
-  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %add, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1317,9 +1317,9 @@ define void @memop_unsigned_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %sub = sub nsw i32 %0, %x
-  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %sub, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1327,9 +1327,9 @@ define void @memop_unsigned_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, %x
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1337,9 +1337,9 @@ define void @memop_unsigned_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, %x
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1347,9 +1347,9 @@ define void @memop_unsigned_int_clrbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %and = and i32 %0, -33
-  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %and, i32* %add.ptr, align 4
   ret void
 }
 
@@ -1357,13 +1357,8 @@ define void @memop_unsigned_int_setbit_index5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %0 = load i32* %add.ptr, align 4
   %or = or i32 %0, 128
-  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  store i32 %or, i32* %add.ptr, align 4
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
-!2 = metadata !{metadata !"short", metadata !0}
-!3 = metadata !{metadata !"int", metadata !0}
diff --git a/test/CodeGen/Hexagon/packetize_cond_inst.ll b/test/CodeGen/Hexagon/packetize_cond_inst.ll
new file mode 100644
index 000000000000..a48a9f62ec61
--- /dev/null
+++ b/test/CodeGen/Hexagon/packetize_cond_inst.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mcpu=hexagonv4 -tail-dup-size=1 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Make sure we put the two conditionally executed adds in a packet.
+; ifcnv_add:
+;     {
+;       p0 = cmp.gt(r2, r1)
+;       if (!p0.new) r0 = add(r2, r1)
+;       if (p0.new) r0 = add(r0, #10)
+;     }
+; CHECK: cmp
+; CHECK-NEXT: add
+; CHECH-NEXT: add
+define i32 @ifcnv_add(i32, i32, i32) nounwind readnone {
+  %4 = icmp sgt i32 %2, %1
+  br i1 %4, label %5, label %7
+
+; <label>:5                                       ; preds = %3
+  %6 = add nsw i32 %0, 10
+  br label %9
+
+; <label>:7                                       ; preds = %3
+  %8 = add nsw i32 %2, %1
+  br label %9
+
+; <label>:9                                       ; preds = %7, %5
+  %10 = phi i32 [ %6, %5 ], [ %8, %7 ]
+  %11 = add nsw i32 %10, 1
+  ret i32 %11
+}
diff --git a/test/CodeGen/Hexagon/pred-gp.ll b/test/CodeGen/Hexagon/pred-gp.ll
new file mode 100644
index 000000000000..299bd8679dad
--- /dev/null
+++ b/test/CodeGen/Hexagon/pred-gp.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we are able to predicate instructions with gp-relative
+; addressing mode.
+
+@d = external global i32
+@c = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @test2(i8 zeroext %a, i8 zeroext %b) #0 {
+; CHECK: if{{ *}}({{!*}}p{{[0-3]+}}{{[.new]*}}){{ *}}r{{[0-9]+}}{{ *}}={{ *}}memw(##{{[cd]}})
+; CHECK: if{{ *}}({{!*}}p{{[0-3]+}}){{ *}}r{{[0-9]+}}{{ *}}={{ *}}memw(##{{[cd]}})
+entry:
+  %cmp = icmp eq i8 %a, %b
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %0 = load i32* @d, align 4
+  store i32 %0, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %0, %if.then ]
+  ret i32 %1
+}
diff --git a/test/CodeGen/Hexagon/pred-instrs.ll b/test/CodeGen/Hexagon/pred-instrs.ll
new file mode 100644
index 000000000000..800073e49b03
--- /dev/null
+++ b/test/CodeGen/Hexagon/pred-instrs.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we are able to predicate instructions.
+
+; CHECK: if{{ *}}({{!*}}p{{[0-3]}}{{[.new]*}}){{ *}}r{{[0-9]+}}{{ *}}={{ *}}{{and|aslh}}
+; CHECK: if{{ *}}({{!*}}p{{[0-3]}}{{[.new]*}}){{ *}}r{{[0-9]+}}{{ *}}={{ *}}{{and|aslh}}
+@a = external global i32
+@d = external global i32
+
+; Function Attrs: nounwind
+define i32 @test1(i8 zeroext %la, i8 zeroext %lb) {
+entry:
+  %cmp = icmp eq i8 %la, %lb
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %conv1 = zext i8 %la to i32
+  %shl = shl nuw nsw i32 %conv1, 16
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %and8 = and i8 %lb, %la
+  %and = zext i8 %and8 to i32
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge = phi i32 [ %and, %if.else ], [ %shl, %if.then ]
+  store i32 %storemerge, i32* @a, align 4
+  %0 = load i32* @d, align 4
+  ret i32 %0
+}
diff --git a/test/CodeGen/Hexagon/split-const32-const64.ll b/test/CodeGen/Hexagon/split-const32-const64.ll
new file mode 100644
index 000000000000..2815253545c5
--- /dev/null
+++ b/test/CodeGen/Hexagon/split-const32-const64.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-small-data-threshold=0 < %s | FileCheck %s
+
+; Check that CONST32/CONST64 instructions are 'not' generated when
+; small-data-threshold is set to 0.
+
+; with immediate value.
+@a = external global i32
+@b = external global i32
+@la = external global i64
+@lb = external global i64
+
+define void @test1() nounwind {
+; CHECK-NOT: CONST32 
+entry:
+  store i32 12345670, i32* @a, align 4
+  store i32 12345670, i32* @b, align 4
+  ret void
+}
+
+define void @test2() nounwind {
+; CHECK-NOT: CONST64
+entry:
+  store i64 1234567890123, i64* @la, align 8
+  store i64 1234567890123, i64* @lb, align 8
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
index 84172e957d04..735ac9eb82e4 100644
--- a/test/CodeGen/Hexagon/sube.ll
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -26,4 +26,4 @@ entry:
         %tmp2122 = trunc i128 %tmp21 to i64
         store i64 %tmp2122, i64* %RH
         ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Hexagon/tail-call-trunc.ll b/test/CodeGen/Hexagon/tail-call-trunc.ll
new file mode 100644
index 000000000000..98214c7b1e97
--- /dev/null
+++ b/test/CodeGen/Hexagon/tail-call-trunc.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+declare i32 @ret_i32()
+
+define i8 @test_i8() {
+; CHECK-LABEL: test_i8:
+; CHECK: jump ret_i32
+  %res = tail call i32 @ret_i32()
+  %val = trunc i32 %res to i8
+  ret i8 %val
+}
+
+define i16 @test_i16() {
+; CHECK-LABEL: test_i16:
+; CHECK: jump ret_i32
+  %res = tail call i32 @ret_i32()
+  %val = trunc i32 %res to i16
+  ret i16 %val
+}
+
+declare i64 @ret_i64()
+define i32 @test_i32() {
+; CHECK-LABEL: test_i32:
+; CHECK: call ret_i64
+  %res = tail call i64 @ret_i64()
+  %val = trunc i64 %res to i32
+  ret i32 42
+}
diff --git a/test/CodeGen/Hexagon/tfr-to-combine.ll b/test/CodeGen/Hexagon/tfr-to-combine.ll
new file mode 100644
index 000000000000..e3057cd1611d
--- /dev/null
+++ b/test/CodeGen/Hexagon/tfr-to-combine.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  -O3 < %s | FileCheck %s
+
+; Check that we combine TFRs and TFRIs into COMBINEs.
+
+@a = external global i16
+@b = external global i16
+@c = external global i16
+
+; Function Attrs: nounwind
+define i64 @test1() #0 {
+; CHECK: combine(#10, #0)
+entry:
+  store i16 0, i16* @a, align 2
+  store i16 10, i16* @b, align 2
+  ret i64 10
+}
+
+; Function Attrs: nounwind
+define i64 @test2() #0 {
+; CHECK: combine(#0, r{{[0-9]+}})
+entry:
+  store i16 0, i16* @a, align 2
+  %0 = load i16* @c, align 2
+  %conv2 = zext i16 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @test4() #0 {
+; CHECK: combine(#0, ##100)
+entry:
+  store i16 100, i16* @b, align 2
+  store i16 0, i16* @a, align 2
+  ret i64 0
+}
diff --git a/test/CodeGen/Hexagon/union-1.ll b/test/CodeGen/Hexagon/union-1.ll
index 7c6da744ec51..fe79f9510fe8 100644
--- a/test/CodeGen/Hexagon/union-1.ll
+++ b/test/CodeGen/Hexagon/union-1.ll
@@ -5,10 +5,10 @@
 
 define void @word(i32* nocapture %a) nounwind {
 entry:
-  %0 = load i32* %a, align 4, !tbaa !0
+  %0 = load i32* %a, align 4
   %1 = zext i32 %0 to i64
   %add.ptr = getelementptr inbounds i32* %a, i32 1
-  %2 = load i32* %add.ptr, align 4, !tbaa !0
+  %2 = load i32* %add.ptr, align 4
   %3 = zext i32 %2 to i64
   %4 = shl nuw i64 %3, 32
   %ins = or i64 %4, %1
@@ -17,7 +17,3 @@ entry:
 }
 
 declare void @bar(i64)
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/zextloadi1.ll b/test/CodeGen/Hexagon/zextloadi1.ll
index cb6e6fdf84a5..b58d9332695d 100644
--- a/test/CodeGen/Hexagon/zextloadi1.ll
+++ b/test/CodeGen/Hexagon/zextloadi1.ll
@@ -22,4 +22,4 @@ define void @i65_ls() nounwind  {
         %tmp = load i65* @i65_l
         store i65 %tmp, i65* @i65_s
         ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index d5162b964a08..953e576af85c 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -12,17 +12,19 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !12, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, metadata !6, metadata !6, metadata !11, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 0}
 !7 = metadata !{i32 786688, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 786443, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786443, metadata !12, metadata !0, i32 2, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !9 = metadata !{i32 3, i32 11, metadata !8, null}
 !10 = metadata !{i32 4, i32 2, metadata !8, null}
 !11 = metadata !{metadata !0}
 !12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/MBlaze/DbgValueOtherTargets.test b/test/CodeGen/MBlaze/DbgValueOtherTargets.test
deleted file mode 100644
index 8b850f51105b..000000000000
--- a/test/CodeGen/MBlaze/DbgValueOtherTargets.test
+++ /dev/null
@@ -1 +0,0 @@
-RUN: llc -O0 -march=mblaze -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/MBlaze/brind.ll b/test/CodeGen/MBlaze/brind.ll
deleted file mode 100644
index 2229a873827a..000000000000
--- a/test/CodeGen/MBlaze/brind.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; Ensure that the select instruction is supported and is lowered to 
-; some sort of branch instruction.
-;
-; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
-
-declare i32 @printf(i8*, ...)
-@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
-
-@BLKS = private constant [5 x i8*]
-    [ i8* blockaddress(@brind, %L1),
-      i8* blockaddress(@brind, %L2),
-      i8* blockaddress(@brind, %L3),
-      i8* blockaddress(@brind, %L4),
-      i8* blockaddress(@brind, %L5) ]
-
-define i32 @brind(i32 %a, i32 %b)
-{
-    ; CHECK:        brind:
-entry:
-    br label %loop
-
-loop:
-    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.8, %finish ]
-    %dst.0 = getelementptr [5 x i8*]* @BLKS, i32 0, i32 %tmp.0
-    %dst.1 = load i8** %dst.0
-    indirectbr i8* %dst.1, [ label %L1,
-                             label %L2,
-                             label %L3,
-                             label %L4,
-                             label %L5 ]
-    ; CHECK:        brad {{r[0-9]*}}
-
-L1:
-    %tmp.1 = add i32 %a, %b
-    br label %finish
-    ; CHECK:        brid
-
-L2:
-    %tmp.2 = sub i32 %a, %b
-    br label %finish
-    ; CHECK:        brid
-
-L3:
-    %tmp.3 = mul i32 %a, %b
-    br label %finish
-    ; CHECK:        brid
-
-L4:
-    %tmp.4 = sdiv i32 %a, %b
-    br label %finish
-    ; CHECK:        brid
-
-L5:
-    %tmp.5 = srem i32 %a, %b
-    br label %finish
-
-finish:
-    %tmp.6 = phi i32 [ %tmp.1, %L1 ],
-                     [ %tmp.2, %L2 ],
-                     [ %tmp.3, %L3 ],
-                     [ %tmp.4, %L4 ],
-                     [ %tmp.5, %L5 ]
-
-    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
-                                 i32 %tmp.6)
-
-    %tmp.7 = add i32 %tmp.0, 1
-    %tmp.8 = urem i32 %tmp.7, 5
-
-    br label %loop
-    ; CHECK:        brad {{r[0-9]*}}
-}
diff --git a/test/CodeGen/MBlaze/callind.ll b/test/CodeGen/MBlaze/callind.ll
deleted file mode 100644
index bfc8d001fd5d..000000000000
--- a/test/CodeGen/MBlaze/callind.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; Ensure that indirect calls work and that they are lowered to some
-; sort of branch and link instruction.
-;
-; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
-
-declare i32 @printf(i8*, ...)
-@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
-
-@FUNS = private constant [5 x i32 (i32,i32)*]
-    [ i32 (i32,i32)* @doadd,
-      i32 (i32,i32)* @dosub,
-      i32 (i32,i32)* @domul,
-      i32 (i32,i32)* @dodiv,
-      i32 (i32,i32)* @dorem ]
-
-define i32 @doadd(i32 %a, i32 %b)
-{
-    ; CHECK:        doadd:
-    %tmp.0 = add i32 %a, %b
-    ret i32 %tmp.0
-    ; CHECK:        rtsd
-}
-
-define i32 @dosub(i32 %a, i32 %b)
-{
-    ; CHECK:        dosub:
-    %tmp.0 = sub i32 %a, %b
-    ret i32 %tmp.0
-    ; CHECK:        rtsd
-}
-
-define i32 @domul(i32 %a, i32 %b)
-{
-    ; CHECK:        domul:
-    %tmp.0 = mul i32 %a, %b
-    ret i32 %tmp.0
-    ; CHECK:        rtsd
-}
-
-define i32 @dodiv(i32 %a, i32 %b)
-{
-    ; CHECK:        dodiv:
-    %tmp.0 = sdiv i32 %a, %b
-    ret i32 %tmp.0
-    ; CHECK:        rtsd
-}
-
-define i32 @dorem(i32 %a, i32 %b)
-{
-    ; CHECK:        dorem:
-    %tmp.0 = srem i32 %a, %b
-    ret i32 %tmp.0
-    ; CHECK:        rtsd
-}
-
-define i32 @callind(i32 %a, i32 %b)
-{
-    ; CHECK:        callind:
-entry:
-    br label %loop
-
-loop:
-    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.3, %loop ]
-    %dst.0 = getelementptr [5 x i32 (i32,i32)*]* @FUNS, i32 0, i32 %tmp.0
-    %dst.1 = load i32 (i32,i32)** %dst.0
-    %tmp.1 = call i32 %dst.1(i32 %a, i32 %b)
-    ; CHECK-NOT:    brli
-    ; CHECK-NOT:    brlai
-    ; CHECK:        brl
-
-    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
-                                 i32 %tmp.1)
-    ; CHECK:        brl
-
-    %tmp.2 = add i32 %tmp.0, 1
-    %tmp.3 = urem i32 %tmp.2, 5
-
-    br label %loop
-    ; CHECK:        br
-}
diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll
deleted file mode 100644
index 827fd3272bd4..000000000000
--- a/test/CodeGen/MBlaze/cc.ll
+++ /dev/null
@@ -1,266 +0,0 @@
-; Test some of the calling convention lowering done by the MBlaze backend.
-; We test that integer values are passed in the correct registers and
-; returned in the correct registers. Additionally, we test that the stack
-; is used as appropriate for passing arguments that cannot be placed into
-; registers.
-;
-; RUN: llc < %s -march=mblaze | FileCheck %s
-
-declare i32 @printf(i8*, ...)
-@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
-
-define void @params0_noret() {
-    ; CHECK:        params0_noret:
-    ret void
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-}
-
-define i8 @params0_8bitret() {
-    ; CHECK:        params0_8bitret:
-    ret i8 1
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r0, 1}}
-}
-
-define i16 @params0_16bitret() {
-    ; CHECK:        params0_16bitret:
-    ret i16 1
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r0, 1}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-}
-
-define i32 @params0_32bitret() {
-    ; CHECK:        params0_32bitret:
-    ret i32 1
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r0, 1}}
-}
-
-define i64 @params0_64bitret() {
-    ; CHECK:        params0_64bitret:
-    ret i64 1
-    ; CHECK:        {{.* r3, r0, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r4, r0, 1}}
-}
-
-define i32 @params1_32bitret(i32 %a) {
-    ; CHECK:        params1_32bitret:
-    ret i32 %a
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r5, r0}}
-}
-
-define i32 @params2_32bitret(i32 %a, i32 %b) {
-    ; CHECK:        params2_32bitret:
-    ret i32 %b
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r6, r0}}
-}
-
-define i32 @params3_32bitret(i32 %a, i32 %b, i32 %c) {
-    ; CHECK:        params3_32bitret:
-    ret i32 %c
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r7, r0}}
-}
-
-define i32 @params4_32bitret(i32 %a, i32 %b, i32 %c, i32 %d) {
-    ; CHECK:        params4_32bitret:
-    ret i32 %d
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r8, r0}}
-}
-
-define i32 @params5_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
-    ; CHECK:        params5_32bitret:
-    ret i32 %e
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r9, r0}}
-}
-
-define i32 @params6_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
-    ; CHECK:        params6_32bitret:
-    ret i32 %f
-    ; CHECK-NOT:    {{.* r3, .*, .*}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-    ; CHECK:        {{.* r3, r10, r0}}
-}
-
-define i32 @params7_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
-                             i32 %g) {
-    ; CHECK:        params7_32bitret:
-    ret i32 %g
-    ; CHECK:        {{lwi? r3, r1, 32}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-}
-
-define i32 @params8_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
-                             i32 %g, i32 %h) {
-    ; CHECK:        params8_32bitret:
-    ret i32 %h
-    ; CHECK:        {{lwi? r3, r1, 36}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-}
-
-define i32 @params9_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
-                             i32 %g, i32 %h, i32 %i) {
-    ; CHECK:        params9_32bitret:
-    ret i32 %i
-    ; CHECK:        {{lwi? r3, r1, 40}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-}
-
-define i32 @params10_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
-                              i32 %g, i32 %h, i32 %i, i32 %j) {
-    ; CHECK:        params10_32bitret:
-    ret i32 %j
-    ; CHECK:        {{lwi? r3, r1, 44}}
-    ; CHECK-NOT:    {{.* r4, .*, .*}}
-    ; CHECK:        rtsd
-}
-
-define void @testing() {
-    %MSG.1 = getelementptr [13 x i8]* @MSG, i32 0, i32 0
-
-    call void @params0_noret()
-    ; CHECK:        brlid
-
-    %tmp.1 = call i8 @params0_8bitret()
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i8 %tmp.1)
-
-    %tmp.2 = call i16 @params0_16bitret()
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i16 %tmp.2)
-
-    %tmp.3 = call i32 @params0_32bitret()
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.3)
-
-    %tmp.4 = call i64 @params0_64bitret()
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i64 %tmp.4)
-
-    %tmp.5 = call i32 @params1_32bitret(i32 1)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.5)
-
-    %tmp.6 = call i32 @params2_32bitret(i32 1, i32 2)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.6)
-
-    %tmp.7 = call i32 @params3_32bitret(i32 1, i32 2, i32 3)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.7)
-
-    %tmp.8 = call i32 @params4_32bitret(i32 1, i32 2, i32 3, i32 4)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        {{.* r8, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.8)
-
-    %tmp.9 = call i32 @params5_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        {{.* r8, .*, .*}}
-    ; CHECK:        {{.* r9, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.9)
-
-    %tmp.10 = call i32 @params6_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                         i32 6)
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        {{.* r8, .*, .*}}
-    ; CHECK:        {{.* r9, .*, .*}}
-    ; CHECK:        {{.* r10, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.10)
-
-    %tmp.11 = call i32 @params7_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                         i32 6, i32 7)
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        {{.* r8, .*, .*}}
-    ; CHECK:        {{.* r9, .*, .*}}
-    ; CHECK:        {{.* r10, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.11)
-
-    %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                         i32 6, i32 7, i32 8)
-    ; CHECK:        {{swi? .*, r1, 32}}
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        {{.* r8, .*, .*}}
-    ; CHECK:        {{.* r9, .*, .*}}
-    ; CHECK:        {{.* r10, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.12)
-
-    %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                         i32 6, i32 7, i32 8, i32 9)
-    ; CHECK:        {{swi? .*, r1, 36}}
-    ; CHECK:        {{swi? .*, r1, 32}}
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        {{.* r8, .*, .*}}
-    ; CHECK:        {{.* r9, .*, .*}}
-    ; CHECK:        {{.* r10, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.13)
-
-    %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                          i32 6, i32 7, i32 8, i32 9, i32 10)
-    ; CHECK:        {{swi? .*, r1, 40}}
-    ; CHECK:        {{swi? .*, r1, 36}}
-    ; CHECK:        {{swi? .*, r1, 32}}
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{.* r5, .*, .*}}
-    ; CHECK:        {{.* r6, .*, .*}}
-    ; CHECK:        {{.* r7, .*, .*}}
-    ; CHECK:        {{.* r8, .*, .*}}
-    ; CHECK:        {{.* r9, .*, .*}}
-    ; CHECK:        {{.* r10, .*, .*}}
-    ; CHECK:        brlid
-    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.14)
-
-    ret void
-}
diff --git a/test/CodeGen/MBlaze/div.ll b/test/CodeGen/MBlaze/div.ll
deleted file mode 100644
index 621784a4f21c..000000000000
--- a/test/CodeGen/MBlaze/div.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; Ensure that multiplication is lowered to function calls when the multiplier
-; unit is not available in the hardware and that function calls are not used
-; when the multiplier unit is available in the hardware.
-;
-; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
-; RUN: llc < %s -march=mblaze -mattr=+div | FileCheck -check-prefix=DIV %s
-
-define i8 @test_i8(i8 %a, i8 %b) {
-    ; FUN:        test_i8:
-    ; DIV:        test_i8:
-
-    %tmp.1 = udiv i8 %a, %b
-    ; FUN-NOT:    idiv
-    ; FUN:        brlid
-    ; DIV-NOT:    brlid
-    ; DIV:        idiv
-
-    %tmp.2 = sdiv i8 %a, %b
-    ; FUN-NOT:    idiv
-    ; FUN:        brlid
-    ; DIV-NOT:    brlid
-    ; DIV-NOT:    idiv
-    ; DIV:        idivu
-
-    %tmp.3 = add i8 %tmp.1, %tmp.2
-    ret i8 %tmp.3
-    ; FUN:        rtsd
-    ; DIV:        rtsd
-}
-
-define i16 @test_i16(i16 %a, i16 %b) {
-    ; FUN:        test_i16:
-    ; DIV:        test_i16:
-
-    %tmp.1 = udiv i16 %a, %b
-    ; FUN-NOT:    idiv
-    ; FUN:        brlid
-    ; DIV-NOT:    brlid
-    ; DIV:        idiv
-
-    %tmp.2 = sdiv i16 %a, %b
-    ; FUN-NOT:    idiv
-    ; FUN:        brlid
-    ; DIV-NOT:    brlid
-    ; DIV-NOT:    idiv
-    ; DIV:        idivu
-
-    %tmp.3 = add i16 %tmp.1, %tmp.2
-    ret i16 %tmp.3
-    ; FUN:        rtsd
-    ; DIV:        rtsd
-}
-
-define i32 @test_i32(i32 %a, i32 %b) {
-    ; FUN:        test_i32:
-    ; DIV:        test_i32:
-
-    %tmp.1 = udiv i32 %a, %b
-    ; FUN-NOT:    idiv
-    ; FUN:        brlid
-    ; DIV-NOT:    brlid
-    ; DIV:        idiv
-
-    %tmp.2 = sdiv i32 %a, %b
-    ; FUN-NOT:    idiv
-    ; FUN:        brlid
-    ; DIV-NOT:    brlid
-    ; DIV-NOT:    idiv
-    ; DIV:        idivu
-
-    %tmp.3 = add i32 %tmp.1, %tmp.2
-    ret i32 %tmp.3
-    ; FUN:        rtsd
-    ; DIV:        rtsd
-}
diff --git a/test/CodeGen/MBlaze/fpu.ll b/test/CodeGen/MBlaze/fpu.ll
deleted file mode 100644
index 2aef4fd64105..000000000000
--- a/test/CodeGen/MBlaze/fpu.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; Ensure that floating point operations are lowered to function calls when the
-; FPU is not available in the hardware and that function calls are not used
-; when the FPU is available in the hardware.
-;
-; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
-; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
-
-define float @test_add(float %a, float %b) {
-    ; FUN:        test_add:
-    ; FPU:        test_add:
-
-    %tmp.1 = fadd float %a, %b
-    ; FUN:        brlid
-    ; FPU-NOT:    brlid
-
-    ret float %tmp.1
-    ; FUN:        rtsd
-    ; FPU:        rtsd
-    ; FUN-NOT:    fadd
-    ; FPU-NEXT:   fadd
-}
-
-define float @test_sub(float %a, float %b) {
-    ; FUN:        test_sub:
-    ; FPU:        test_sub:
-
-    %tmp.1 = fsub float %a, %b
-    ; FUN:        brlid
-    ; FPU-NOT:    brlid
-
-    ret float %tmp.1
-    ; FUN:        rtsd
-    ; FPU:        rtsd
-    ; FUN-NOT:    frsub
-    ; FPU-NEXT:   frsub
-}
-
-define float @test_mul(float %a, float %b) {
-    ; FUN:        test_mul:
-    ; FPU:        test_mul:
-
-    %tmp.1 = fmul float %a, %b
-    ; FUN:        brlid
-    ; FPU-NOT:    brlid
-
-    ret float %tmp.1
-    ; FUN:        rtsd
-    ; FPU:        rtsd
-    ; FUN-NOT:    fmul
-    ; FPU-NEXT:   fmul
-}
-
-define float @test_div(float %a, float %b) {
-    ; FUN:        test_div:
-    ; FPU:        test_div:
-
-    %tmp.1 = fdiv float %a, %b
-    ; FUN:        brlid
-    ; FPU-NOT:    brlid
-
-    ret float %tmp.1
-    ; FUN:        rtsd
-    ; FPU:        rtsd
-    ; FUN-NOT:    fdiv
-    ; FPU-NEXT:   fdiv
-}
diff --git a/test/CodeGen/MBlaze/fsl.ll b/test/CodeGen/MBlaze/fsl.ll
deleted file mode 100644
index 5444f82dd63c..000000000000
--- a/test/CodeGen/MBlaze/fsl.ll
+++ /dev/null
@@ -1,319 +0,0 @@
-; Ensure that the FSL instrinsic instruction generate single FSL instructions
-; at the machine level. Additionally, ensure that dynamic values use the
-; dynamic version of the instructions and that constant values use the
-; constant version of the instructions.
-;
-; RUN: llc -O3 < %s -march=mblaze | FileCheck %s
-
-declare i32 @llvm.mblaze.fsl.get(i32 %port)
-declare i32 @llvm.mblaze.fsl.aget(i32 %port)
-declare i32 @llvm.mblaze.fsl.cget(i32 %port)
-declare i32 @llvm.mblaze.fsl.caget(i32 %port)
-declare i32 @llvm.mblaze.fsl.eget(i32 %port)
-declare i32 @llvm.mblaze.fsl.eaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.ecget(i32 %port)
-declare i32 @llvm.mblaze.fsl.ecaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.nget(i32 %port)
-declare i32 @llvm.mblaze.fsl.naget(i32 %port)
-declare i32 @llvm.mblaze.fsl.ncget(i32 %port)
-declare i32 @llvm.mblaze.fsl.ncaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.neget(i32 %port)
-declare i32 @llvm.mblaze.fsl.neaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.necget(i32 %port)
-declare i32 @llvm.mblaze.fsl.necaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tget(i32 %port)
-declare i32 @llvm.mblaze.fsl.taget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tcget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tcaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.teget(i32 %port)
-declare i32 @llvm.mblaze.fsl.teaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tecget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tecaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tnget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tnaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tncget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tncaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tneget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tneaget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tnecget(i32 %port)
-declare i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
-
-declare void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
-declare void @llvm.mblaze.fsl.tput(i32 %port)
-declare void @llvm.mblaze.fsl.taput(i32 %port)
-declare void @llvm.mblaze.fsl.tcput(i32 %port)
-declare void @llvm.mblaze.fsl.tcaput(i32 %port)
-declare void @llvm.mblaze.fsl.tnput(i32 %port)
-declare void @llvm.mblaze.fsl.tnaput(i32 %port)
-declare void @llvm.mblaze.fsl.tncput(i32 %port)
-declare void @llvm.mblaze.fsl.tncaput(i32 %port)
-
-define void @fsl_get(i32 %port) {
-    ; CHECK:        fsl_get:
-    %v0  = call i32 @llvm.mblaze.fsl.get(i32 %port)
-    ; CHECK:        getd
-    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 %port)
-    ; CHECK-NEXT:   agetd
-    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 %port)
-    ; CHECK-NEXT:   cgetd
-    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 %port)
-    ; CHECK-NEXT:   cagetd
-    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 %port)
-    ; CHECK-NEXT:   egetd
-    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 %port)
-    ; CHECK-NEXT:   eagetd
-    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 %port)
-    ; CHECK-NEXT:   ecgetd
-    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 %port)
-    ; CHECK-NEXT:   ecagetd
-    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 %port)
-    ; CHECK-NEXT:   ngetd
-    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 %port)
-    ; CHECK-NEXT:   nagetd
-    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 %port)
-    ; CHECK-NEXT:   ncgetd
-    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 %port)
-    ; CHECK-NEXT:   ncagetd
-    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 %port)
-    ; CHECK-NEXT:   negetd
-    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 %port)
-    ; CHECK-NEXT:   neagetd
-    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 %port)
-    ; CHECK-NEXT:   necgetd
-    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 %port)
-    ; CHECK-NEXT:   necagetd
-    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 %port)
-    ; CHECK-NEXT:   tgetd
-    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 %port)
-    ; CHECK-NEXT:   tagetd
-    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 %port)
-    ; CHECK-NEXT:   tcgetd
-    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 %port)
-    ; CHECK-NEXT:   tcagetd
-    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 %port)
-    ; CHECK-NEXT:   tegetd
-    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 %port)
-    ; CHECK-NEXT:   teagetd
-    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 %port)
-    ; CHECK-NEXT:   tecgetd
-    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 %port)
-    ; CHECK-NEXT:   tecagetd
-    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 %port)
-    ; CHECK-NEXT:   tngetd
-    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 %port)
-    ; CHECK-NEXT:   tnagetd
-    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 %port)
-    ; CHECK-NEXT:   tncgetd
-    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 %port)
-    ; CHECK-NEXT:   tncagetd
-    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 %port)
-    ; CHECK-NEXT:   tnegetd
-    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 %port)
-    ; CHECK-NEXT:   tneagetd
-    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 %port)
-    ; CHECK-NEXT:   tnecgetd
-    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
-    ; CHECK-NEXT:   tnecagetd
-    ret void
-    ; CHECK:        rtsd
-}
-
-define void @fslc_get() {
-    ; CHECK:        fslc_get:
-    %v0  = call i32 @llvm.mblaze.fsl.get(i32 1)
-    ; CHECK:        get
-    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 1)
-    ; CHECK-NOT:    agetd
-    ; CHECK:        aget
-    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 1)
-    ; CHECK-NOT:    cgetd
-    ; CHECK:        cget
-    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 1)
-    ; CHECK-NOT:    cagetd
-    ; CHECK:        caget
-    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 1)
-    ; CHECK-NOT:    egetd
-    ; CHECK:        eget
-    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 1)
-    ; CHECK-NOT:    eagetd
-    ; CHECK:        eaget
-    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 1)
-    ; CHECK-NOT:    ecgetd
-    ; CHECK:        ecget
-    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 1)
-    ; CHECK-NOT:    ecagetd
-    ; CHECK:        ecaget
-    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 1)
-    ; CHECK-NOT:    ngetd
-    ; CHECK:        nget
-    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 1)
-    ; CHECK-NOT:    nagetd
-    ; CHECK:        naget
-    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 1)
-    ; CHECK-NOT:    ncgetd
-    ; CHECK:        ncget
-    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 1)
-    ; CHECK-NOT:    ncagetd
-    ; CHECK:        ncaget
-    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 1)
-    ; CHECK-NOT:    negetd
-    ; CHECK:        neget
-    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 1)
-    ; CHECK-NOT:    neagetd
-    ; CHECK:        neaget
-    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 1)
-    ; CHECK-NOT:    necgetd
-    ; CHECK:        necget
-    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 1)
-    ; CHECK-NOT:    necagetd
-    ; CHECK:        necaget
-    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 1)
-    ; CHECK-NOT:    tgetd
-    ; CHECK:        tget
-    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 1)
-    ; CHECK-NOT:    tagetd
-    ; CHECK:        taget
-    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 1)
-    ; CHECK-NOT:    tcgetd
-    ; CHECK:        tcget
-    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 1)
-    ; CHECK-NOT:    tcagetd
-    ; CHECK:        tcaget
-    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 1)
-    ; CHECK-NOT:    tegetd
-    ; CHECK:        teget
-    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 1)
-    ; CHECK-NOT:    teagetd
-    ; CHECK:        teaget
-    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 1)
-    ; CHECK-NOT:    tecgetd
-    ; CHECK:        tecget
-    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 1)
-    ; CHECK-NOT:    tecagetd
-    ; CHECK:        tecaget
-    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 1)
-    ; CHECK-NOT:    tngetd
-    ; CHECK:        tnget
-    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 1)
-    ; CHECK-NOT:    tnagetd
-    ; CHECK:        tnaget
-    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 1)
-    ; CHECK-NOT:    tncgetd
-    ; CHECK:        tncget
-    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 1)
-    ; CHECK-NOT:    tncagetd
-    ; CHECK:        tncaget
-    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 1)
-    ; CHECK-NOT:    tnegetd
-    ; CHECK:        tneget
-    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 1)
-    ; CHECK-NOT:    tneagetd
-    ; CHECK:        tneaget
-    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 1)
-    ; CHECK-NOT:    tnecgetd
-    ; CHECK:        tnecget
-    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1)
-    ; CHECK-NOT:    tnecagetd
-    ; CHECK:        tnecaget
-    ret void
-    ; CHECK:        rtsd
-}
-
-define void @putfsl(i32 %value, i32 %port) {
-    ; CHECK:        putfsl:
-    call void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
-    ; CHECK:        putd
-    call void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
-    ; CHECK-NEXT:   aputd
-    call void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
-    ; CHECK-NEXT:   cputd
-    call void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
-    ; CHECK-NEXT:   caputd
-    call void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
-    ; CHECK-NEXT:   nputd
-    call void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
-    ; CHECK-NEXT:   naputd
-    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
-    ; CHECK-NEXT:   ncputd
-    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
-    ; CHECK-NEXT:   ncaputd
-    call void @llvm.mblaze.fsl.tput(i32 %port)
-    ; CHECK-NEXT:   tputd
-    call void @llvm.mblaze.fsl.taput(i32 %port)
-    ; CHECK-NEXT:   taputd
-    call void @llvm.mblaze.fsl.tcput(i32 %port)
-    ; CHECK-NEXT:   tcputd
-    call void @llvm.mblaze.fsl.tcaput(i32 %port)
-    ; CHECK-NEXT:   tcaputd
-    call void @llvm.mblaze.fsl.tnput(i32 %port)
-    ; CHECK-NEXT:   tnputd
-    call void @llvm.mblaze.fsl.tnaput(i32 %port)
-    ; CHECK-NEXT:   tnaputd
-    call void @llvm.mblaze.fsl.tncput(i32 %port)
-    ; CHECK-NEXT:   tncputd
-    call void @llvm.mblaze.fsl.tncaput(i32 %port)
-    ; CHECK-NEXT:   tncaputd
-    ret void
-    ; CHECK:        rtsd
-}
-
-define void @putfsl_const(i32 %value) {
-    ; CHECK:        putfsl_const:
-    call void @llvm.mblaze.fsl.put(i32 %value, i32 1)
-    ; CHECK-NOT:    putd
-    ; CHECK:        put
-    call void @llvm.mblaze.fsl.aput(i32 %value, i32 1)
-    ; CHECK-NOT:    aputd
-    ; CHECK:        aput
-    call void @llvm.mblaze.fsl.cput(i32 %value, i32 1)
-    ; CHECK-NOT:    cputd
-    ; CHECK:        cput
-    call void @llvm.mblaze.fsl.caput(i32 %value, i32 1)
-    ; CHECK-NOT:    caputd
-    ; CHECK:        caput
-    call void @llvm.mblaze.fsl.nput(i32 %value, i32 1)
-    ; CHECK-NOT:    nputd
-    ; CHECK:        nput
-    call void @llvm.mblaze.fsl.naput(i32 %value, i32 1)
-    ; CHECK-NOT:    naputd
-    ; CHECK:        naput
-    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 1)
-    ; CHECK-NOT:    ncputd
-    ; CHECK:        ncput
-    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 1)
-    ; CHECK-NOT:    ncaputd
-    ; CHECK:        ncaput
-    call void @llvm.mblaze.fsl.tput(i32 1)
-    ; CHECK-NOT:    tputd
-    ; CHECK:        tput
-    call void @llvm.mblaze.fsl.taput(i32 1)
-    ; CHECK-NOT:    taputd
-    ; CHECK:        taput
-    call void @llvm.mblaze.fsl.tcput(i32 1)
-    ; CHECK-NOT:    tcputd
-    ; CHECK:        tcput
-    call void @llvm.mblaze.fsl.tcaput(i32 1)
-    ; CHECK-NOT:    tcaputd
-    ; CHECK:        tcaput
-    call void @llvm.mblaze.fsl.tnput(i32 1)
-    ; CHECK-NOT:    tnputd
-    ; CHECK:        tnput
-    call void @llvm.mblaze.fsl.tnaput(i32 1)
-    ; CHECK-NOT:    tnaputd
-    ; CHECK:        tnaput
-    call void @llvm.mblaze.fsl.tncput(i32 1)
-    ; CHECK-NOT:    tncputd
-    ; CHECK:        tncput
-    call void @llvm.mblaze.fsl.tncaput(i32 1)
-    ; CHECK-NOT:    tncaputd
-    ; CHECK:        tncaput
-    ret void
-    ; CHECK:        rtsd
-}
diff --git a/test/CodeGen/MBlaze/imm.ll b/test/CodeGen/MBlaze/imm.ll
deleted file mode 100644
index 6effd3e09a24..000000000000
--- a/test/CodeGen/MBlaze/imm.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; Ensure that all immediate values that are 32-bits or less can be loaded 
-; using a single instruction and that immediate values 64-bits or less can
-; be loaded using two instructions.
-;
-; RUN: llc < %s -march=mblaze | FileCheck %s
-; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
-
-define i8 @retimm_i8() {
-    ; CHECK:        retimm_i8:
-    ; CHECK:        rtsd
-    ; CHECK-NEXT:   add
-    ; FPU:          retimm_i8:
-    ; FPU:          rtsd
-    ; FPU-NEXT:     add
-    ret i8 123
-}
-
-define i16 @retimm_i16() {
-    ; CHECK:        retimm_i16:
-    ; CHECK:        rtsd
-    ; CHECK-NEXT:   add
-    ; FPU:          retimm_i16:
-    ; FPU:          rtsd
-    ; FPU-NEXT:     add
-    ret i16 31212
-}
-
-define i32 @retimm_i32() {
-    ; CHECK:        retimm_i32:
-    ; CHECK:        add
-    ; CHECK-NEXT:   rtsd
-    ; FPU:          retimm_i32:
-    ; FPU:          add
-    ; FPU-NEXT:     rtsd
-    ret i32 2938128
-}
-
-define i64 @retimm_i64() {
-    ; CHECK:        retimm_i64:
-    ; CHECK:        add
-    ; CHECK-NEXT:   rtsd
-    ; CHECK-NEXT:   add
-    ; FPU:          retimm_i64:
-    ; FPU:          add
-    ; FPU-NEXT:     rtsd
-    ; FPU-NEXT:     add
-    ret i64 94581823
-}
-
-define float @retimm_float() {
-    ; CHECK:        retimm_float:
-    ; CHECK:        add
-    ; CHECK-NEXT:   rtsd
-    ; FPU:          retimm_float:
-    ; FPU:          or
-    ; FPU-NEXT:     rtsd
-    ret float 12.0
-}
-
-define double @retimm_double() {
-    ; CHECK:        retimm_double:
-    ; CHECK:        add
-    ; CHECK-NEXT:   add
-    ; CHECK-NEXT:   rtsd
-    ; FPU:          retimm_double:
-    ; FPU:          add
-    ; FPU-NEXT:     add
-    ; FPU-NEXT:     rtsd
-    ret double 598382.39283873
-}
diff --git a/test/CodeGen/MBlaze/intr.ll b/test/CodeGen/MBlaze/intr.ll
deleted file mode 100644
index 79c6bffd00cb..000000000000
--- a/test/CodeGen/MBlaze/intr.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; Ensure that the MBlaze interrupt_handler calling convention (cc73) is handled
-; correctly correctly by the MBlaze backend.
-;
-; RUN: llc < %s -march=mblaze | FileCheck %s
-
-@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
-@_interrupt_handler = alias void ()* @myintr
-
-define cc73 void @myintr() nounwind noinline {
-  ; CHECK:        myintr:
-  ; CHECK:        swi   r3, r1
-  ; CHECK:        swi   r4, r1
-  ; CHECK:        swi   r5, r1
-  ; CHECK:        swi   r6, r1
-  ; CHECK:        swi   r7, r1
-  ; CHECK:        swi   r8, r1
-  ; CHECK:        swi   r9, r1
-  ; CHECK:        swi   r10, r1
-  ; CHECK:        swi   r11, r1
-  ; CHECK:        swi   r12, r1
-  ; CHECK:        swi   r17, r1
-  ; CHECK:        swi   r18, r1
-  ; CHECK:        mfs   r11, rmsr
-  ; CHECK:        swi   r11, r1
-  entry:
-    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
-      ret void
-
-  ; CHECK:        lwi   r11, r1
-  ; CHECK:        mts   rmsr, r11
-  ; CHECK:        lwi   r18, r1
-  ; CHECK:        lwi   r17, r1
-  ; CHECK:        lwi   r12, r1
-  ; CHECK:        lwi   r11, r1
-  ; CHECK:        lwi   r10, r1
-  ; CHECK:        lwi   r9, r1
-  ; CHECK:        lwi   r8, r1
-  ; CHECK:        lwi   r7, r1
-  ; CHECK:        lwi   r6, r1
-  ; CHECK:        lwi   r5, r1
-  ; CHECK:        lwi   r4, r1
-  ; CHECK:        lwi   r3, r1
-  ; CHECK:        rtid  r14, 0
-}
-
-  ; CHECK:    .globl  _interrupt_handler
-  ; CHECK:    _interrupt_handler = myintr
-declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/MBlaze/jumptable.ll b/test/CodeGen/MBlaze/jumptable.ll
deleted file mode 100644
index 299084d0ed23..000000000000
--- a/test/CodeGen/MBlaze/jumptable.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; Ensure that jump tables can be handled by the mblaze backend. The
-; jump table should be lowered to a "br" instruction using one of the
-; available registers.
-;
-; RUN: llc < %s -march=mblaze | FileCheck %s
-
-define i32 @jmptable(i32 %arg)
-{
-    ; CHECK:        jmptable:
-    switch i32 %arg, label %DEFAULT [ i32 0, label %L0
-                                      i32 1, label %L1
-                                      i32 2, label %L2
-                                      i32 3, label %L3
-                                      i32 4, label %L4
-                                      i32 5, label %L5
-                                      i32 6, label %L6
-                                      i32 7, label %L7
-                                      i32 8, label %L8
-                                      i32 9, label %L9 ]
-
-    ; CHECK:        lw   [[REG:r[0-9]*]]
-    ; CHECK:        brad [[REG]]
-L0:
-    %var0 = add i32 %arg, 0
-    br label %DONE
-
-L1:
-    %var1 = add i32 %arg, 1
-    br label %DONE
-
-L2:
-    %var2 = add i32 %arg, 2
-    br label %DONE
-
-L3:
-    %var3 = add i32 %arg, 3
-    br label %DONE
-
-L4:
-    %var4 = add i32 %arg, 4
-    br label %DONE
-
-L5:
-    %var5 = add i32 %arg, 5
-    br label %DONE
-
-L6:
-    %var6 = add i32 %arg, 6
-    br label %DONE
-
-L7:
-    %var7 = add i32 %arg, 7
-    br label %DONE
-
-L8:
-    %var8 = add i32 %arg, 8
-    br label %DONE
-
-L9:
-    %var9 = add i32 %arg, 9
-    br label %DONE
-
-DEFAULT:
-    unreachable
-
-DONE:
-    %rval = phi i32 [ %var0, %L0 ],
-                    [ %var1, %L1 ],
-                    [ %var2, %L2 ],
-                    [ %var3, %L3 ],
-                    [ %var4, %L4 ],
-                    [ %var5, %L5 ],
-                    [ %var6, %L6 ],
-                    [ %var7, %L7 ],
-                    [ %var8, %L8 ],
-                    [ %var9, %L9 ]
-    ret i32 %rval
-    ; CHECK:        rtsd
-}
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
deleted file mode 100644
index ff4928de4b9c..000000000000
--- a/test/CodeGen/MBlaze/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
-targets = set(config.root.targets_to_build.split())
-if not 'MBlaze' in targets:
-    config.unsupported = True
-
diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll
deleted file mode 100644
index 7439d0b6fe22..000000000000
--- a/test/CodeGen/MBlaze/loop.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; Test some complicated looping constructs to ensure that they
-; compile successfully and that some sort of branching is used
-; in the resulting code.
-;
-; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
-
-declare i32 @printf(i8*, ...)
-@MSG = internal constant [19 x i8] c"Message: %d %d %d\0A\00"
-
-define i32 @loop(i32 %a, i32 %b)
-{
-    ; CHECK:        loop:
-entry:
-    br label %loop_outer
-
-loop_outer:
-    %outer.0 = phi i32 [ 0, %entry ], [ %outer.2, %loop_outer_finish ]
-    br label %loop_inner
-
-loop_inner:
-    %inner.0 = phi i32 [ %a, %loop_outer ], [ %inner.3, %loop_inner_finish ]
-    %inner.1 = phi i32 [ %b, %loop_outer ], [ %inner.4, %loop_inner_finish ]
-    %inner.2 = phi i32 [  0, %loop_outer ], [ %inner.5, %loop_inner_finish ]
-    %inner.3 = add i32 %inner.0, %inner.1
-    %inner.4 = mul i32 %inner.2, 11
-    br label %loop_inner_finish
-
-loop_inner_finish:
-    %inner.5 = add i32 %inner.2, 1
-    call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0),
-                                 i32 %inner.0, i32 %inner.1, i32 %inner.2 )
-
-    %inner.6 = icmp eq i32 %inner.5, 100
-    ; CHECK:        cmp [[REG:r[0-9]*]]
-
-    br i1 %inner.6, label %loop_inner, label %loop_outer_finish
-    ; CHECK:        {{beqid|bneid}} [[REG]]
-
-loop_outer_finish:
-    %outer.1 = add i32 %outer.0, 1
-    %outer.2 = urem i32 %outer.1, 1500
-    br label %loop_outer
-    ; CHECK:        br
-}
diff --git a/test/CodeGen/MBlaze/mul.ll b/test/CodeGen/MBlaze/mul.ll
deleted file mode 100644
index cefdb8d56f21..000000000000
--- a/test/CodeGen/MBlaze/mul.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; Ensure that multiplication is lowered to function calls when the multiplier
-; unit is not available in the hardware and that function calls are not used
-; when the multiplier unit is available in the hardware.
-;
-; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
-; RUN: llc < %s -march=mblaze -mattr=+mul | FileCheck -check-prefix=MUL %s
-
-define i8 @test_i8(i8 %a, i8 %b) {
-    ; FUN:        test_i8:
-    ; MUL:        test_i8:
-
-    %tmp.1 = mul i8 %a, %b
-    ; FUN-NOT:    mul
-    ; FUN:        brlid
-    ; MUL-NOT:    brlid
-
-    ret i8 %tmp.1
-    ; FUN:        rtsd
-    ; MUL:        rtsd
-    ; MUL:        mul
-}
-
-define i16 @test_i16(i16 %a, i16 %b) {
-    ; FUN:        test_i16:
-    ; MUL:        test_i16:
-
-    %tmp.1 = mul i16 %a, %b
-    ; FUN-NOT:    mul
-    ; FUN:        brlid
-    ; MUL-NOT:    brlid
-
-    ret i16 %tmp.1
-    ; FUN:        rtsd
-    ; MUL:        rtsd
-    ; MUL:        mul
-}
-
-define i32 @test_i32(i32 %a, i32 %b) {
-    ; FUN:        test_i32:
-    ; MUL:        test_i32:
-
-    %tmp.1 = mul i32 %a, %b
-    ; FUN-NOT:    mul
-    ; FUN:        brlid
-    ; MUL-NOT:    brlid
-
-    ret i32 %tmp.1
-    ; FUN:        rtsd
-    ; MUL:        rtsd
-    ; MUL:        mul
-}
diff --git a/test/CodeGen/MBlaze/mul64.ll b/test/CodeGen/MBlaze/mul64.ll
deleted file mode 100644
index e0ef4138af74..000000000000
--- a/test/CodeGen/MBlaze/mul64.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Ensure that multiplication is lowered to function calls when the 64-bit
-; multiplier unit is not available in the hardware and that function calls
-; are not used when the 64-bit multiplier unit is available in the hardware.
-;
-; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
-; RUN: llc < %s -march=mblaze -mattr=+mul,+mul64 | \
-; RUN:      FileCheck -check-prefix=MUL %s
-
-define i64 @test_i64(i64 %a, i64 %b) {
-    ; FUN:        test_i64:
-    ; MUL:        test_i64:
-
-    %tmp.1 = mul i64 %a, %b
-    ; FUN-NOT:    mul
-    ; FUN:        brlid
-    ; MUL-NOT:    brlid
-    ; MUL:        mulh
-    ; MUL:        mul
-
-    ret i64 %tmp.1
-    ; FUN:        rtsd
-    ; MUL:        rtsd
-}
diff --git a/test/CodeGen/MBlaze/select.ll b/test/CodeGen/MBlaze/select.ll
deleted file mode 100644
index 47a88a1e3c25..000000000000
--- a/test/CodeGen/MBlaze/select.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; Ensure that the select instruction is supported and is lowered to 
-; some sort of branch instruction.
-;
-; RUN: llc < %s -march=mblaze | FileCheck %s
-
-define i32 @testsel(i32 %a, i32 %b)
-{
-    ; CHECK:        testsel:
-    %tmp.1 = icmp eq i32 %a, %b
-    ; CHECK:        cmp
-    %tmp.2 = select i1 %tmp.1, i32 %a, i32 %b
-    ; CHECK:        {{bne|beq}}
-    ret i32 %tmp.2
-    ; CHECK:        rtsd
-}
diff --git a/test/CodeGen/MBlaze/shift.ll b/test/CodeGen/MBlaze/shift.ll
deleted file mode 100644
index 99f0519c020c..000000000000
--- a/test/CodeGen/MBlaze/shift.ll
+++ /dev/null
@@ -1,115 +0,0 @@
-; Ensure that shifts are lowered to loops when the barrel shifter unit is
-; not available in the hardware and that loops are not used when the
-; barrel shifter unit is available in the hardware.
-;
-; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
-; RUN: llc < %s -march=mblaze -mattr=+barrel | FileCheck -check-prefix=SHT %s
-
-define i8 @test_i8(i8 %a, i8 %b) {
-    ; FUN:        test_i8:
-    ; SHT:        test_i8:
-
-    %tmp.1 = shl i8 %a, %b
-    ; FUN:        andi
-    ; FUN:        add
-    ; FUN:        bnei
-    ; SHT-NOT:    bnei
-
-    ret i8 %tmp.1
-    ; FUN:        rtsd
-    ; SHT:        rtsd
-    ; FUN-NOT:    bsll
-    ; SHT-NEXT:   bsll
-}
-
-define i8 @testc_i8(i8 %a, i8 %b) {
-    ; FUN:        testc_i8:
-    ; SHT:        testc_i8:
-
-    %tmp.1 = shl i8 %a, 5
-    ; FUN:        andi
-    ; FUN:        add
-    ; FUN:        bnei
-    ; SHT-NOT:    andi
-    ; SHT-NOT:    add
-    ; SHT-NOT:    bnei
-
-    ret i8 %tmp.1
-    ; FUN:        rtsd
-    ; SHT:        rtsd
-    ; FUN-NOT:    bsll
-    ; SHT-NEXT:   bslli
-}
-
-define i16 @test_i16(i16 %a, i16 %b) {
-    ; FUN:        test_i16:
-    ; SHT:        test_i16:
-
-    %tmp.1 = shl i16 %a, %b
-    ; FUN:        andi
-    ; FUN:        add
-    ; FUN:        bnei
-    ; SHT-NOT:    bnei
-
-    ret i16 %tmp.1
-    ; FUN:        rtsd
-    ; SHT:        rtsd
-    ; FUN-NOT:    bsll
-    ; SHT-NEXT:   bsll
-}
-
-define i16 @testc_i16(i16 %a, i16 %b) {
-    ; FUN:        testc_i16:
-    ; SHT:        testc_i16:
-
-    %tmp.1 = shl i16 %a, 5
-    ; FUN:        andi
-    ; FUN:        add
-    ; FUN:        bnei
-    ; SHT-NOT:    andi
-    ; SHT-NOT:    add
-    ; SHT-NOT:    bnei
-
-    ret i16 %tmp.1
-    ; FUN:        rtsd
-    ; SHT:        rtsd
-    ; FUN-NOT:    bsll
-    ; SHT-NEXT:   bslli
-}
-
-define i32 @test_i32(i32 %a, i32 %b) {
-    ; FUN:        test_i32:
-    ; SHT:        test_i32:
-
-    %tmp.1 = shl i32 %a, %b
-    ; FUN:        andi
-    ; FUN:        add
-    ; FUN:        bnei
-    ; SHT-NOT:    andi
-    ; SHT-NOT:    bnei
-
-    ret i32 %tmp.1
-    ; FUN:        rtsd
-    ; SHT:        rtsd
-    ; FUN-NOT:    bsll
-    ; SHT-NEXT:   bsll
-}
-
-define i32 @testc_i32(i32 %a, i32 %b) {
-    ; FUN:        testc_i32:
-    ; SHT:        testc_i32:
-
-    %tmp.1 = shl i32 %a, 5
-    ; FUN:        andi
-    ; FUN:        add
-    ; FUN:        bnei
-    ; SHT-NOT:    andi
-    ; SHT-NOT:    add
-    ; SHT-NOT:    bnei
-
-    ret i32 %tmp.1
-    ; FUN:        rtsd
-    ; SHT:        rtsd
-    ; FUN-NOT:    bsll
-    ; SHT-NEXT:   bslli
-}
diff --git a/test/CodeGen/MBlaze/svol.ll b/test/CodeGen/MBlaze/svol.ll
deleted file mode 100644
index c1e96202845a..000000000000
--- a/test/CodeGen/MBlaze/svol.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; Ensure that the MBlaze save_volatiles calling convention (cc74) is handled
-; correctly correctly by the MBlaze backend.
-;
-; RUN: llc < %s -march=mblaze | FileCheck %s
-
-@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
-
-define cc74 void @mysvol() nounwind noinline {
-  ; CHECK:        mysvol:
-  ; CHECK:        swi   r3, r1
-  ; CHECK:        swi   r4, r1
-  ; CHECK:        swi   r5, r1
-  ; CHECK:        swi   r6, r1
-  ; CHECK:        swi   r7, r1
-  ; CHECK:        swi   r8, r1
-  ; CHECK:        swi   r9, r1
-  ; CHECK:        swi   r10, r1
-  ; CHECK:        swi   r11, r1
-  ; CHECK:        swi   r12, r1
-  ; CHECK:        swi   r17, r1
-  ; CHECK:        swi   r18, r1
-  ; CHECK-NOT:    mfs   r11, rmsr
-  entry:
-    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
-      ret void
-
-  ; CHECK-NOT:    mts   rmsr, r11
-  ; CHECK:        lwi   r18, r1
-  ; CHECK:        lwi   r17, r1
-  ; CHECK:        lwi   r12, r1
-  ; CHECK:        lwi   r11, r1
-  ; CHECK:        lwi   r10, r1
-  ; CHECK:        lwi   r9, r1
-  ; CHECK:        lwi   r8, r1
-  ; CHECK:        lwi   r7, r1
-  ; CHECK:        lwi   r6, r1
-  ; CHECK:        lwi   r5, r1
-  ; CHECK:        lwi   r4, r1
-  ; CHECK:        lwi   r3, r1
-  ; CHECK:        rtsd  r15, 8
-}
-
-define cc74 void @mysvol2() nounwind noinline {
-  ; CHECK:        mysvol2:
-  ; CHECK-NOT:    swi   r3, r1
-  ; CHECK-NOT:    swi   r4, r1
-  ; CHECK-NOT:    swi   r5, r1
-  ; CHECK-NOT:    swi   r6, r1
-  ; CHECK-NOT:    swi   r7, r1
-  ; CHECK-NOT:    swi   r8, r1
-  ; CHECK-NOT:    swi   r9, r1
-  ; CHECK-NOT:    swi   r10, r1
-  ; CHECK-NOT:    swi   r11, r1
-  ; CHECK-NOT:    swi   r12, r1
-  ; CHECK:        swi   r17, r1
-  ; CHECK:        swi   r18, r1
-  ; CHECK-NOT:    mfs   r11, rmsr
-entry:
-
-  ; CHECK-NOT:    mts   rmsr, r11
-  ; CHECK:        lwi   r18, r1
-  ; CHECK:        lwi   r17, r1
-  ; CHECK-NOT:    lwi   r12, r1
-  ; CHECK-NOT:    lwi   r11, r1
-  ; CHECK-NOT:    lwi   r10, r1
-  ; CHECK-NOT:    lwi   r9, r1
-  ; CHECK-NOT:    lwi   r8, r1
-  ; CHECK-NOT:    lwi   r7, r1
-  ; CHECK-NOT:    lwi   r6, r1
-  ; CHECK-NOT:    lwi   r5, r1
-  ; CHECK-NOT:    lwi   r4, r1
-  ; CHECK-NOT:    lwi   r3, r1
-  ; CHECK:        rtsd  r15, 8
-  ret void
-}
-
-  ; CHECK-NOT:    .globl  _interrupt_handler
-  ; CHECK-NOT:    _interrupt_handler = mysvol
-  ; CHECK-NOT:    _interrupt_handler = mysvol2
-declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll b/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
index 94fe5c70e84c..dce9d25ca87a 100644
--- a/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
+++ b/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
@@ -7,7 +7,7 @@ target triple = "msp430-elf"
 
 define signext i8 @foo(i8 signext %_si1, i8 signext %_si2) nounwind readnone {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: call #__mulqi3
   %mul = mul i8 %_si2, %_si1                      ; <i8> [#uses=1]
   ret i8 %mul
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index c7ecb5ab853f..44c92ebc82cc 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -7,7 +7,7 @@ define i16 @am1(i16 %x, i16* %a) nounwind {
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
-; CHECK: am1:
+; CHECK-LABEL: am1:
 ; CHECK:		bis.w	0(r14), r15
 
 @foo = external global i16
@@ -17,7 +17,7 @@ define i16 @am2(i16 %x) nounwind {
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
-; CHECK: am2:
+; CHECK-LABEL: am2:
 ; CHECK:		bis.w	&foo, r15
 
 @bar = internal constant [2 x i8] [ i8 32, i8 64 ]
@@ -28,7 +28,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind {
 	%3 = or i8 %2,%x
 	ret i8 %3
 }
-; CHECK: am3:
+; CHECK-LABEL: am3:
 ; CHECK:		bis.b	bar(r14), r15
 
 define i16 @am4(i16 %x) nounwind {
@@ -36,7 +36,7 @@ define i16 @am4(i16 %x) nounwind {
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
-; CHECK: am4:
+; CHECK-LABEL: am4:
 ; CHECK:		bis.w	&32, r15
 
 define i16 @am5(i16 %x, i16* %a) nounwind {
@@ -45,7 +45,7 @@ define i16 @am5(i16 %x, i16* %a) nounwind {
 	%3 = or i16 %2,%x
 	ret i16 %3
 }
-; CHECK: am5:
+; CHECK-LABEL: am5:
 ; CHECK:		bis.w	4(r14), r15
 
 %S = type { i16, i16 }
@@ -56,7 +56,7 @@ define i16 @am6(i16 %x) nounwind {
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
-; CHECK: am6:
+; CHECK-LABEL: am6:
 ; CHECK:		bis.w	&baz+2, r15
 
 %T = type { i16, [2 x i8] }
@@ -69,6 +69,6 @@ define i8 @am7(i8 %x, i16 %n) nounwind {
 	%4 = or i8 %3,%x
 	ret i8 %4
 }
-; CHECK: am7:
+; CHECK-LABEL: am7:
 ; CHECK:		bis.b	duh+2(r14), r15
 
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index 727c29fc082b..06a3d32d8aa4 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -8,7 +8,7 @@ define void @am1(i16* %a, i16 %x) nounwind {
 	store i16 %2, i16* %a
 	ret void
 }
-; CHECK: am1:
+; CHECK-LABEL: am1:
 ; CHECK:		bis.w	r14, 0(r15)
 
 @foo = external global i16
@@ -19,7 +19,7 @@ define void @am2(i16 %x) nounwind {
 	store i16 %2, i16* @foo
 	ret void
 }
-; CHECK: am2:
+; CHECK-LABEL: am2:
 ; CHECK:		bis.w	r15, &foo
 
 @bar = external global [2 x i8]
@@ -31,7 +31,7 @@ define void @am3(i16 %i, i8 %x) nounwind {
 	store i8 %3, i8* %1
 	ret void
 }
-; CHECK: am3:
+; CHECK-LABEL: am3:
 ; CHECK:		bis.b	r14, bar(r15)
 
 define void @am4(i16 %x) nounwind {
@@ -40,7 +40,7 @@ define void @am4(i16 %x) nounwind {
 	store volatile i16 %2, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
-; CHECK: am4:
+; CHECK-LABEL: am4:
 ; CHECK:		bis.w	r15, &32
 
 define void @am5(i16* %a, i16 %x) readonly {
@@ -50,7 +50,7 @@ define void @am5(i16* %a, i16 %x) readonly {
 	store i16 %3, i16* %1
 	ret void
 }
-; CHECK: am5:
+; CHECK-LABEL: am5:
 ; CHECK:		bis.w	r14, 4(r15)
 
 %S = type { i16, i16 }
@@ -62,7 +62,7 @@ define void @am6(i16 %x) nounwind {
 	store i16 %2, i16* getelementptr (%S* @baz, i32 0, i32 1)
 	ret void
 }
-; CHECK: am6:
+; CHECK-LABEL: am6:
 ; CHECK:		bis.w	r15, &baz+2
 
 %T = type { i16, [2 x i8] }
@@ -76,6 +76,6 @@ define void @am7(i16 %n, i8 %x) nounwind {
 	store i8 %4, i8* %2
 	ret void
 }
-; CHECK: am7:
+; CHECK-LABEL: am7:
 ; CHECK:		bis.b	r14, duh+2(r15)
 
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 7cd345bd8f88..378b7ae58ff6 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -6,7 +6,7 @@ define i16 @am1(i16* %a) nounwind {
 	%1 = load i16* %a
 	ret i16 %1
 }
-; CHECK: am1:
+; CHECK-LABEL: am1:
 ; CHECK:		mov.w	0(r15), r15
 
 @foo = external global i16
@@ -15,7 +15,7 @@ define i16 @am2() nounwind {
 	%1 = load i16* @foo
 	ret i16 %1
 }
-; CHECK: am2:
+; CHECK-LABEL: am2:
 ; CHECK:		mov.w	&foo, r15
 
 @bar = internal constant [2 x i8] [ i8 32, i8 64 ]
@@ -25,14 +25,14 @@ define i8 @am3(i16 %n) nounwind {
 	%2 = load i8* %1
 	ret i8 %2
 }
-; CHECK: am3:
+; CHECK-LABEL: am3:
 ; CHECK:		mov.b	bar(r15), r15
 
 define i16 @am4() nounwind {
 	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	ret i16 %1
 }
-; CHECK: am4:
+; CHECK-LABEL: am4:
 ; CHECK:		mov.w	&32, r15
 
 define i16 @am5(i16* %a) nounwind {
@@ -40,7 +40,7 @@ define i16 @am5(i16* %a) nounwind {
 	%2 = load i16* %1
 	ret i16 %2
 }
-; CHECK: am5:
+; CHECK-LABEL: am5:
 ; CHECK:		mov.w	4(r15), r15
 
 %S = type { i16, i16 }
@@ -50,7 +50,7 @@ define i16 @am6() nounwind {
 	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
 	ret i16 %1
 }
-; CHECK: am6:
+; CHECK-LABEL: am6:
 ; CHECK:		mov.w	&baz+2, r15
 
 %T = type { i16, [2 x i8] }
@@ -62,6 +62,6 @@ define i8 @am7(i16 %n) nounwind {
 	%3= load i8* %2
 	ret i8 %3
 }
-; CHECK: am7:
+; CHECK-LABEL: am7:
 ; CHECK:		mov.b	duh+2(r15), r15
 
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
index 5eeb02f72913..f55fd542645c 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -6,7 +6,7 @@ define void @am1(i16* %a, i16 %b) nounwind {
 	store i16 %b, i16* %a
 	ret void
 }
-; CHECK: am1:
+; CHECK-LABEL: am1:
 ; CHECK:		mov.w	r14, 0(r15)
 
 @foo = external global i16
@@ -15,7 +15,7 @@ define void @am2(i16 %a) nounwind {
 	store i16 %a, i16* @foo
 	ret void
 }
-; CHECK: am2:
+; CHECK-LABEL: am2:
 ; CHECK:		mov.w	r15, &foo
 
 @bar = external global [2 x i8]
@@ -25,14 +25,14 @@ define void @am3(i16 %i, i8 %a) nounwind {
 	store i8 %a, i8* %1
 	ret void
 }
-; CHECK: am3:
+; CHECK-LABEL: am3:
 ; CHECK:		mov.b	r14, bar(r15)
 
 define void @am4(i16 %a) nounwind {
 	store volatile i16 %a, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
-; CHECK: am4:
+; CHECK-LABEL: am4:
 ; CHECK:		mov.w	r15, &32
 
 define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
@@ -40,7 +40,7 @@ define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
 	store i16 %a, i16* %1
 	ret void
 }
-; CHECK: am5:
+; CHECK-LABEL: am5:
 ; CHECK:		mov.w	r14, 4(r15)
 
 %S = type { i16, i16 }
@@ -50,7 +50,7 @@ define void @am6(i16 %a) nounwind {
 	store i16 %a, i16* getelementptr (%S* @baz, i32 0, i32 1)
 	ret void
 }
-; CHECK: am6:
+; CHECK-LABEL: am6:
 ; CHECK:		mov.w	r15, &baz+2
 
 %T = type { i16, [2 x i8] }
@@ -62,6 +62,6 @@ define void @am7(i16 %n, i8 %a) nounwind {
 	store i8 %a, i8* %2
 	ret void
 }
-; CHECK: am7:
+; CHECK-LABEL: am7:
 ; CHECK:		mov.b	r14, duh+2(r15)
 
diff --git a/test/CodeGen/MSP430/Inst16mi.ll b/test/CodeGen/MSP430/Inst16mi.ll
index 33d7aa495d3c..e9ab75cc80bc 100644
--- a/test/CodeGen/MSP430/Inst16mi.ll
+++ b/test/CodeGen/MSP430/Inst16mi.ll
@@ -5,14 +5,14 @@ target triple = "msp430-generic-generic"
 @foo = common global i16 0, align 2
 
 define void @mov() nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.w	#2, &foo
 	store i16 2, i16 * @foo
 	ret void
 }
 
 define void @add() nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.w	#2, &foo
 	%1 = load i16* @foo
 	%2 = add i16 %1, 2
@@ -21,7 +21,7 @@ define void @add() nounwind {
 }
 
 define void @and() nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w	#2, &foo
 	%1 = load i16* @foo
 	%2 = and i16 %1, 2
@@ -30,7 +30,7 @@ define void @and() nounwind {
 }
 
 define void @bis() nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.w	#2, &foo
 	%1 = load i16* @foo
 	%2 = or i16 %1, 2
@@ -39,7 +39,7 @@ define void @bis() nounwind {
 }
 
 define void @xor() nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w	#2, &foo
 	%1 = load i16* @foo
 	%2 = xor i16 %1, 2
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
index d4ae811ac8f0..5c93e37bfa96 100644
--- a/test/CodeGen/MSP430/Inst16mm.ll
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -5,7 +5,7 @@ target triple = "msp430-generic-generic"
 @bar = common global i16 0, align 2
 
 define void @mov() nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.w	&bar, &foo
         %1 = load i16* @bar
         store i16 %1, i16* @foo
@@ -13,7 +13,7 @@ define void @mov() nounwind {
 }
 
 define void @add() nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.w	&bar, &foo
 	%1 = load i16* @bar
 	%2 = load i16* @foo
@@ -23,7 +23,7 @@ define void @add() nounwind {
 }
 
 define void @and() nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w	&bar, &foo
 	%1 = load i16* @bar
 	%2 = load i16* @foo
@@ -33,7 +33,7 @@ define void @and() nounwind {
 }
 
 define void @bis() nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.w	&bar, &foo
 	%1 = load i16* @bar
 	%2 = load i16* @foo
@@ -43,7 +43,7 @@ define void @bis() nounwind {
 }
 
 define void @xor() nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w	&bar, &foo
 	%1 = load i16* @bar
 	%2 = load i16* @foo
@@ -63,7 +63,7 @@ entry:
  store i16 0, i16* %retval
  %0 = load i16* %retval                          ; <i16> [#uses=1]
  ret i16 %0
-; CHECK: mov2:
+; CHECK-LABEL: mov2:
 ; CHECK:	mov.w	2(r1), 6(r1)
 ; CHECK:	mov.w	0(r1), 4(r1)
 }
diff --git a/test/CodeGen/MSP430/Inst16mr.ll b/test/CodeGen/MSP430/Inst16mr.ll
index 2613f0195855..201004893684 100644
--- a/test/CodeGen/MSP430/Inst16mr.ll
+++ b/test/CodeGen/MSP430/Inst16mr.ll
@@ -4,14 +4,14 @@ target triple = "msp430-generic-generic"
 @foo = common global i16 0, align 2
 
 define void @mov(i16 %a) nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.w	r15, &foo
 	store i16 %a, i16* @foo
 	ret void
 }
 
 define void @add(i16 %a) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.w	r15, &foo
 	%1 = load i16* @foo
 	%2 = add i16 %a, %1
@@ -20,7 +20,7 @@ define void @add(i16 %a) nounwind {
 }
 
 define void @and(i16 %a) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w	r15, &foo
 	%1 = load i16* @foo
 	%2 = and i16 %a, %1
@@ -29,7 +29,7 @@ define void @and(i16 %a) nounwind {
 }
 
 define void @bis(i16 %a) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.w	r15, &foo
 	%1 = load i16* @foo
 	%2 = or i16 %a, %1
@@ -38,7 +38,7 @@ define void @bis(i16 %a) nounwind {
 }
 
 define void @bic(i16 zeroext %m) nounwind {
-; CHECK: bic:
+; CHECK-LABEL: bic:
 ; CHECK: bic.w   r15, &foo
         %1 = xor i16 %m, -1
         %2 = load i16* @foo
@@ -48,7 +48,7 @@ define void @bic(i16 zeroext %m) nounwind {
 }
 
 define void @xor(i16 %a) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w	r15, &foo
 	%1 = load i16* @foo
 	%2 = xor i16 %a, %1
diff --git a/test/CodeGen/MSP430/Inst16ri.ll b/test/CodeGen/MSP430/Inst16ri.ll
index 5115a236929c..f89f686ab567 100644
--- a/test/CodeGen/MSP430/Inst16ri.ll
+++ b/test/CodeGen/MSP430/Inst16ri.ll
@@ -3,34 +3,34 @@ target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
 target triple = "msp430-generic-generic"
 
 define i16 @mov() nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.w	#1, r15
 	ret i16 1
 }
 
 define i16 @add(i16 %a, i16 %b) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.w	#1, r15
 	%1 = add i16 %a, 1
 	ret i16 %1
 }
 
 define i16 @and(i16 %a, i16 %b) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w	#1, r15
 	%1 = and i16 %a, 1
 	ret i16 %1
 }
 
 define i16 @bis(i16 %a, i16 %b) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.w	#1, r15
 	%1 = or i16 %a, 1
 	ret i16 %1
 }
 
 define i16 @xor(i16 %a, i16 %b) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w	#1, r15
 	%1 = xor i16 %a, 1
 	ret i16 %1
diff --git a/test/CodeGen/MSP430/Inst16rm.ll b/test/CodeGen/MSP430/Inst16rm.ll
index 02e89c7cac7f..e6c52616c8f6 100644
--- a/test/CodeGen/MSP430/Inst16rm.ll
+++ b/test/CodeGen/MSP430/Inst16rm.ll
@@ -4,7 +4,7 @@ target triple = "msp430-generic-generic"
 @foo = common global i16 0, align 2
 
 define i16 @add(i16 %a) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.w	&foo, r15
 	%1 = load i16* @foo
 	%2 = add i16 %a, %1
@@ -12,7 +12,7 @@ define i16 @add(i16 %a) nounwind {
 }
 
 define i16 @and(i16 %a) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w	&foo, r15
 	%1 = load i16* @foo
 	%2 = and i16 %a, %1
@@ -20,7 +20,7 @@ define i16 @and(i16 %a) nounwind {
 }
 
 define i16 @bis(i16 %a) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.w	&foo, r15
 	%1 = load i16* @foo
 	%2 = or i16 %a, %1
@@ -28,7 +28,7 @@ define i16 @bis(i16 %a) nounwind {
 }
 
 define i16  @bic(i16 %a) nounwind {
-; CHECK: bic:
+; CHECK-LABEL: bic:
 ; CHECK: bic.w	&foo, r15
         %1 = load i16* @foo
         %2 = xor i16 %1, -1
@@ -37,7 +37,7 @@ define i16  @bic(i16 %a) nounwind {
 }
 
 define i16 @xor(i16 %a) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w	&foo, r15
 	%1 = load i16* @foo
 	%2 = xor i16 %a, %1
diff --git a/test/CodeGen/MSP430/Inst16rr.ll b/test/CodeGen/MSP430/Inst16rr.ll
index 2f1ba5b4f131..d74bfae9b938 100644
--- a/test/CodeGen/MSP430/Inst16rr.ll
+++ b/test/CodeGen/MSP430/Inst16rr.ll
@@ -3,34 +3,34 @@ target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
 target triple = "msp430-generic-generic"
 
 define i16 @mov(i16 %a, i16 %b) nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.w	r14, r15
 	ret i16 %b
 }
 
 define i16 @add(i16 %a, i16 %b) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.w	r14, r15
 	%1 = add i16 %a, %b
 	ret i16 %1
 }
 
 define i16 @and(i16 %a, i16 %b) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w	r14, r15
 	%1 = and i16 %a, %b
 	ret i16 %1
 }
 
 define i16 @bis(i16 %a, i16 %b) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.w	r14, r15
 	%1 = or i16 %a, %b
 	ret i16 %1
 }
 
 define i16 @bic(i16 %a, i16 %b) nounwind {
-; CHECK: bic:
+; CHECK-LABEL: bic:
 ; CHECK: bic.w	r14, r15
         %1 = xor i16 %b, -1
         %2 = and i16 %a, %1
@@ -38,7 +38,7 @@ define i16 @bic(i16 %a, i16 %b) nounwind {
 }
 
 define i16 @xor(i16 %a, i16 %b) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w	r14, r15
 	%1 = xor i16 %a, %b
 	ret i16 %1
diff --git a/test/CodeGen/MSP430/Inst8mi.ll b/test/CodeGen/MSP430/Inst8mi.ll
index ef318ce1590d..a2c7b71d66dc 100644
--- a/test/CodeGen/MSP430/Inst8mi.ll
+++ b/test/CodeGen/MSP430/Inst8mi.ll
@@ -4,14 +4,14 @@ target triple = "msp430-generic-generic"
 @foo = common global i8 0, align 1
 
 define void @mov() nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.b	#2, &foo
 	store i8 2, i8 * @foo
 	ret void
 }
 
 define void @add() nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.b	#2, &foo
 	%1 = load i8* @foo
 	%2 = add i8 %1, 2
@@ -20,7 +20,7 @@ define void @add() nounwind {
 }
 
 define void @and() nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.b	#2, &foo
 	%1 = load i8* @foo
 	%2 = and i8 %1, 2
@@ -29,7 +29,7 @@ define void @and() nounwind {
 }
 
 define void @bis() nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.b	#2, &foo
 	%1 = load i8* @foo
 	%2 = or i8 %1, 2
@@ -38,7 +38,7 @@ define void @bis() nounwind {
 }
 
 define void @xor() nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.b	#2, &foo
 	%1 = load i8* @foo
 	%2 = xor i8 %1, 2
diff --git a/test/CodeGen/MSP430/Inst8mm.ll b/test/CodeGen/MSP430/Inst8mm.ll
index a2987ac9b46d..d1ce8bc66b93 100644
--- a/test/CodeGen/MSP430/Inst8mm.ll
+++ b/test/CodeGen/MSP430/Inst8mm.ll
@@ -6,7 +6,7 @@ target triple = "msp430-generic-generic"
 @bar = common global i8 0, align 1
 
 define void @mov() nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.b	&bar, &foo
         %1 = load i8* @bar
         store i8 %1, i8* @foo
@@ -14,7 +14,7 @@ define void @mov() nounwind {
 }
 
 define void @add() nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.b	&bar, &foo
 	%1 = load i8* @bar
 	%2 = load i8* @foo
@@ -24,7 +24,7 @@ define void @add() nounwind {
 }
 
 define void @and() nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.b	&bar, &foo
 	%1 = load i8* @bar
 	%2 = load i8* @foo
@@ -34,7 +34,7 @@ define void @and() nounwind {
 }
 
 define void @bis() nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.b	&bar, &foo
 	%1 = load i8* @bar
 	%2 = load i8* @foo
@@ -44,7 +44,7 @@ define void @bis() nounwind {
 }
 
 define void @xor() nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.b	&bar, &foo
 	%1 = load i8* @bar
 	%2 = load i8* @foo
diff --git a/test/CodeGen/MSP430/Inst8mr.ll b/test/CodeGen/MSP430/Inst8mr.ll
index 428d1fa38d1a..0b3566770cf0 100644
--- a/test/CodeGen/MSP430/Inst8mr.ll
+++ b/test/CodeGen/MSP430/Inst8mr.ll
@@ -4,14 +4,14 @@ target triple = "msp430-generic-generic"
 @foo = common global i8 0, align 1
 
 define void @mov(i8 %a) nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.b	r15, &foo
 	store i8 %a, i8* @foo
 	ret void
 }
 
 define void @and(i8 %a) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.b	r15, &foo
 	%1 = load i8* @foo
 	%2 = and i8 %a, %1
@@ -20,7 +20,7 @@ define void @and(i8 %a) nounwind {
 }
 
 define void @add(i8 %a) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.b	r15, &foo
 	%1 = load i8* @foo
 	%2 = add i8 %a, %1
@@ -29,7 +29,7 @@ define void @add(i8 %a) nounwind {
 }
 
 define void @bis(i8 %a) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.b	r15, &foo
 	%1 = load i8* @foo
 	%2 = or i8 %a, %1
@@ -38,7 +38,7 @@ define void @bis(i8 %a) nounwind {
 }
 
 define void @bic(i8 zeroext %m) nounwind {
-; CHECK: bic:
+; CHECK-LABEL: bic:
 ; CHECK: bic.b   r15, &foo
         %1 = xor i8 %m, -1
         %2 = load i8* @foo
@@ -48,7 +48,7 @@ define void @bic(i8 zeroext %m) nounwind {
 }
 
 define void @xor(i8 %a) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.b	r15, &foo
 	%1 = load i8* @foo
 	%2 = xor i8 %a, %1
diff --git a/test/CodeGen/MSP430/Inst8ri.ll b/test/CodeGen/MSP430/Inst8ri.ll
index ac3418aa6c7b..ec0dff9c563e 100644
--- a/test/CodeGen/MSP430/Inst8ri.ll
+++ b/test/CodeGen/MSP430/Inst8ri.ll
@@ -3,34 +3,34 @@ target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
 target triple = "msp430-generic-generic"
 
 define i8 @mov() nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.b	#1, r15
 	ret i8 1
 }
 
 define i8 @add(i8 %a, i8 %b) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.b	#1, r15
 	%1 = add i8 %a, 1
 	ret i8 %1
 }
 
 define i8 @and(i8 %a, i8 %b) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.b	#1, r15
 	%1 = and i8 %a, 1
 	ret i8 %1
 }
 
 define i8 @bis(i8 %a, i8 %b) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.b	#1, r15
 	%1 = or i8 %a, 1
 	ret i8 %1
 }
 
 define i8 @xor(i8 %a, i8 %b) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.b	#1, r15
 	%1 = xor i8 %a, 1
 	ret i8 %1
diff --git a/test/CodeGen/MSP430/Inst8rm.ll b/test/CodeGen/MSP430/Inst8rm.ll
index c062f04c6b46..308163ed7307 100644
--- a/test/CodeGen/MSP430/Inst8rm.ll
+++ b/test/CodeGen/MSP430/Inst8rm.ll
@@ -4,7 +4,7 @@ target triple = "msp430-generic-generic"
 @foo = common global i8 0, align 1
 
 define i8 @add(i8 %a) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.b	&foo, r15
 	%1 = load i8* @foo
 	%2 = add i8 %a, %1
@@ -12,7 +12,7 @@ define i8 @add(i8 %a) nounwind {
 }
 
 define i8 @and(i8 %a) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.b	&foo, r15
 	%1 = load i8* @foo
 	%2 = and i8 %a, %1
@@ -20,7 +20,7 @@ define i8 @and(i8 %a) nounwind {
 }
 
 define i8 @bis(i8 %a) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.b	&foo, r15
 	%1 = load i8* @foo
 	%2 = or i8 %a, %1
@@ -28,7 +28,7 @@ define i8 @bis(i8 %a) nounwind {
 }
 
 define i8  @bic(i8 %a) nounwind {
-; CHECK: bic:
+; CHECK-LABEL: bic:
 ; CHECK: bic.b  &foo, r15
         %1 = load i8* @foo
         %2 = xor i8 %1, -1
@@ -37,7 +37,7 @@ define i8  @bic(i8 %a) nounwind {
 }
 
 define i8 @xor(i8 %a) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.b	&foo, r15
 	%1 = load i8* @foo
 	%2 = xor i8 %a, %1
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index b9c17d91ef5b..76e8d1911282 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -3,34 +3,34 @@ target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
 target triple = "msp430-generic-generic"
 
 define i8 @mov(i8 %a, i8 %b) nounwind {
-; CHECK: mov:
+; CHECK-LABEL: mov:
 ; CHECK: mov.{{[bw]}} r14, r15
 	ret i8 %b
 }
 
 define i8 @add(i8 %a, i8 %b) nounwind {
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.b
 	%1 = add i8 %a, %b
 	ret i8 %1
 }
 
 define i8 @and(i8 %a, i8 %b) nounwind {
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w	r14, r15
 	%1 = and i8 %a, %b
 	ret i8 %1
 }
 
 define i8 @bis(i8 %a, i8 %b) nounwind {
-; CHECK: bis:
+; CHECK-LABEL: bis:
 ; CHECK: bis.w	r14, r15
 	%1 = or i8 %a, %b
 	ret i8 %1
 }
 
 define i8 @bic(i8 %a, i8 %b) nounwind {
-; CHECK: bic:
+; CHECK-LABEL: bic:
 ; CHECK: bic.b  r14, r15
         %1 = xor i8 %b, -1
         %2 = and i8 %a, %1
@@ -38,7 +38,7 @@ define i8 @bic(i8 %a, i8 %b) nounwind {
 }
 
 define i8 @xor(i8 %a, i8 %b) nounwind {
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w	r14, r15
 	%1 = xor i8 %a, %b
 	ret i8 %1
diff --git a/test/CodeGen/MSP430/bit.ll b/test/CodeGen/MSP430/bit.ll
index 03d672bcbe6a..2ffc191695f0 100644
--- a/test/CodeGen/MSP430/bit.ll
+++ b/test/CodeGen/MSP430/bit.ll
@@ -11,7 +11,7 @@ define i8 @bitbrr(i8 %a, i8 %b) nounwind {
 	%t3 = zext i1 %t2 to i8
 	ret i8 %t3
 }
-; CHECK: bitbrr:
+; CHECK-LABEL: bitbrr:
 ; CHECK: bit.b	r14, r15
 
 define i8 @bitbri(i8 %a) nounwind {
@@ -20,7 +20,7 @@ define i8 @bitbri(i8 %a) nounwind {
 	%t3 = zext i1 %t2 to i8
 	ret i8 %t3
 }
-; CHECK: bitbri:
+; CHECK-LABEL: bitbri:
 ; CHECK: bit.b	#15, r15
 
 define i8 @bitbir(i8 %a) nounwind {
@@ -29,7 +29,7 @@ define i8 @bitbir(i8 %a) nounwind {
 	%t3 = zext i1 %t2 to i8
 	ret i8 %t3
 }
-; CHECK: bitbir:
+; CHECK-LABEL: bitbir:
 ; CHECK: bit.b	#15, r15
 
 define i8 @bitbmi() nounwind {
@@ -39,7 +39,7 @@ define i8 @bitbmi() nounwind {
 	%t4 = zext i1 %t3 to i8
 	ret i8 %t4
 }
-; CHECK: bitbmi:
+; CHECK-LABEL: bitbmi:
 ; CHECK: bit.b	#15, &foo8
 
 define i8 @bitbim() nounwind {
@@ -49,7 +49,7 @@ define i8 @bitbim() nounwind {
 	%t4 = zext i1 %t3 to i8
 	ret i8 %t4
 }
-; CHECK: bitbim:
+; CHECK-LABEL: bitbim:
 ; CHECK: bit.b	#15, &foo8
 
 define i8 @bitbrm(i8 %a) nounwind {
@@ -59,7 +59,7 @@ define i8 @bitbrm(i8 %a) nounwind {
 	%t4 = zext i1 %t3 to i8
 	ret i8 %t4
 }
-; CHECK: bitbrm:
+; CHECK-LABEL: bitbrm:
 ; CHECK: bit.b	&foo8, r15
 
 define i8 @bitbmr(i8 %a) nounwind {
@@ -69,7 +69,7 @@ define i8 @bitbmr(i8 %a) nounwind {
 	%t4 = zext i1 %t3 to i8
 	ret i8 %t4
 }
-; CHECK: bitbmr:
+; CHECK-LABEL: bitbmr:
 ; CHECK: bit.b	r15, &foo8
 
 define i8 @bitbmm() nounwind {
@@ -80,7 +80,7 @@ define i8 @bitbmm() nounwind {
 	%t5 = zext i1 %t4 to i8
 	ret i8 %t5
 }
-; CHECK: bitbmm:
+; CHECK-LABEL: bitbmm:
 ; CHECK: bit.b	&bar8, &foo8
 
 @foo16 = external global i16
@@ -92,7 +92,7 @@ define i16 @bitwrr(i16 %a, i16 %b) nounwind {
 	%t3 = zext i1 %t2 to i16
 	ret i16 %t3
 }
-; CHECK: bitwrr:
+; CHECK-LABEL: bitwrr:
 ; CHECK: bit.w	r14, r15
 
 define i16 @bitwri(i16 %a) nounwind {
@@ -101,7 +101,7 @@ define i16 @bitwri(i16 %a) nounwind {
 	%t3 = zext i1 %t2 to i16
 	ret i16 %t3
 }
-; CHECK: bitwri:
+; CHECK-LABEL: bitwri:
 ; CHECK: bit.w	#4080, r15
 
 define i16 @bitwir(i16 %a) nounwind {
@@ -110,7 +110,7 @@ define i16 @bitwir(i16 %a) nounwind {
 	%t3 = zext i1 %t2 to i16
 	ret i16 %t3
 }
-; CHECK: bitwir:
+; CHECK-LABEL: bitwir:
 ; CHECK: bit.w	#4080, r15
 
 define i16 @bitwmi() nounwind {
@@ -120,7 +120,7 @@ define i16 @bitwmi() nounwind {
 	%t4 = zext i1 %t3 to i16
 	ret i16 %t4
 }
-; CHECK: bitwmi:
+; CHECK-LABEL: bitwmi:
 ; CHECK: bit.w	#4080, &foo16
 
 define i16 @bitwim() nounwind {
@@ -130,7 +130,7 @@ define i16 @bitwim() nounwind {
 	%t4 = zext i1 %t3 to i16
 	ret i16 %t4
 }
-; CHECK: bitwim:
+; CHECK-LABEL: bitwim:
 ; CHECK: bit.w	#4080, &foo16
 
 define i16 @bitwrm(i16 %a) nounwind {
@@ -140,7 +140,7 @@ define i16 @bitwrm(i16 %a) nounwind {
 	%t4 = zext i1 %t3 to i16
 	ret i16 %t4
 }
-; CHECK: bitwrm:
+; CHECK-LABEL: bitwrm:
 ; CHECK: bit.w	&foo16, r15
 
 define i16 @bitwmr(i16 %a) nounwind {
@@ -150,7 +150,7 @@ define i16 @bitwmr(i16 %a) nounwind {
 	%t4 = zext i1 %t3 to i16
 	ret i16 %t4
 }
-; CHECK: bitwmr:
+; CHECK-LABEL: bitwmr:
 ; CHECK: bit.w	r15, &foo16
 
 define i16 @bitwmm() nounwind {
@@ -161,6 +161,6 @@ define i16 @bitwmm() nounwind {
 	%t5 = zext i1 %t4 to i16
 	ret i16 %t5
 }
-; CHECK: bitwmm:
+; CHECK-LABEL: bitwmm:
 ; CHECK: bit.w	&bar16, &foo16
 
diff --git a/test/CodeGen/MSP430/byval.ll b/test/CodeGen/MSP430/byval.ll
index 9dda0a097b56..bd38e95554df 100644
--- a/test/CodeGen/MSP430/byval.ll
+++ b/test/CodeGen/MSP430/byval.ll
@@ -8,7 +8,7 @@ target triple = "msp430---elf"
 
 define i16 @callee(%struct.Foo* byval %f) nounwind {
 entry:
-; CHECK: callee:
+; CHECK-LABEL: callee:
 ; CHECK: mov.w 2(r1), r15
   %0 = getelementptr inbounds %struct.Foo* %f, i32 0, i32 0
   %1 = load i16* %0, align 2
@@ -17,7 +17,7 @@ entry:
 
 define void @caller() nounwind {
 entry:
-; CHECK: caller:
+; CHECK-LABEL: caller:
 ; CHECK: mov.w &foo+4, 4(r1)
 ; CHECK-NEXT: mov.w &foo+2, 2(r1)
 ; CHECK-NEXT: mov.w &foo, 0(r1)
diff --git a/test/CodeGen/MSP430/cc_args.ll b/test/CodeGen/MSP430/cc_args.ll
new file mode 100644
index 000000000000..39e99e263744
--- /dev/null
+++ b/test/CodeGen/MSP430/cc_args.ll
@@ -0,0 +1,118 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16-a0:16:16"
+target triple = "msp430---elf"
+
+define void @test() #0 {
+entry:
+; CHECK: test:
+
+; CHECK: mov.w #1, r15
+; CHECK: call #f_i16
+  call void @f_i16(i16 1)
+
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: call #f_i32
+  call void @f_i32(i32 16909060)
+
+; CHECK: mov.w #1800, r12
+; CHECK: mov.w #1286, r13
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: call #f_i64
+  call void @f_i64(i64 72623859790382856)
+
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: mov.w #1800, r12
+; CHECK: mov.w #1286, r13
+; CHECK: call #f_i32_i32
+  call void @f_i32_i32(i32 16909060, i32 84281096)
+
+; CHECK: mov.w #1, r15
+; CHECK: mov.w #772, r13
+; CHECK: mov.w #258, r14
+; CHECK: mov.w #2, r12
+; CHECK: call #f_i16_i32_i16
+  call void @f_i16_i32_i16(i16 1, i32 16909060, i16 2)
+
+; CHECK: mov.w #2, 8(r1)
+; CHECK: mov.w #258, 6(r1)
+; CHECK: mov.w #772, 4(r1)
+; CHECK: mov.w #1286, 2(r1)
+; CHECK: mov.w #1800, 0(r1)
+; CHECK: mov.w #1, r15
+; CHECK: call #f_i16_i64_i16
+  call void @f_i16_i64_i16(i16 1, i64 72623859790382856, i16 2)
+
+  ret void
+}
+
+@g_i16 = common global i16 0, align 2
+@g_i32 = common global i32 0, align 2
+@g_i64 = common global i64 0, align 2
+
+define void @f_i16(i16 %a) #0 {
+; CHECK: f_i16:
+; CHECK: mov.w r15, &g_i16
+  store volatile i16 %a, i16* @g_i16, align 2
+  ret void
+}
+
+define void @f_i32(i32 %a) #0 {
+; CHECK: f_i32:
+; CHECK: mov.w r15, &g_i32+2
+; CHECK: mov.w r14, &g_i32
+  store volatile i32 %a, i32* @g_i32, align 2
+  ret void
+}
+
+define void @f_i64(i64 %a) #0 {
+; CHECK: f_i64:
+; CHECK: mov.w r15, &g_i64+6
+; CHECK: mov.w r14, &g_i64+4
+; CHECK: mov.w r13, &g_i64+2
+; CHECK: mov.w r12, &g_i64
+  store volatile i64 %a, i64* @g_i64, align 2
+  ret void
+}
+
+define void @f_i32_i32(i32 %a, i32 %b) #0 {
+; CHECK: f_i32_i32:
+; CHECK: mov.w r15, &g_i32+2
+; CHECK: mov.w r14, &g_i32
+  store volatile i32 %a, i32* @g_i32, align 2
+; CHECK: mov.w r13, &g_i32+2
+; CHECK: mov.w r12, &g_i32
+  store volatile i32 %b, i32* @g_i32, align 2
+  ret void
+}
+
+define void @f_i16_i32_i16(i16 %a, i32 %b, i16 %c) #0 {
+; CHECK: f_i16_i32_i16:
+; CHECK: mov.w r15, &g_i16
+  store volatile i16 %a, i16* @g_i16, align 2
+; CHECK: mov.w r14, &g_i32+2
+; CHECK: mov.w r13, &g_i32
+  store volatile i32 %b, i32* @g_i32, align 2
+; CHECK: mov.w r12, &g_i16
+  store volatile i16 %c, i16* @g_i16, align 2
+  ret void
+}
+
+define void @f_i16_i64_i16(i16 %a, i64 %b, i16 %c) #0 {
+; CHECK: f_i16_i64_i16:
+; CHECK: mov.w r15, &g_i16
+  store volatile i16 %a, i16* @g_i16, align 2
+;CHECK: mov.w 10(r4), &g_i64+6
+;CHECK: mov.w 8(r4), &g_i64+4
+;CHECK: mov.w 6(r4), &g_i64+2
+;CHECK: mov.w 4(r4), &g_i64
+  store volatile i64 %b, i64* @g_i64, align 2
+;CHECK: mov.w 12(r4), &g_i16
+  store volatile i16 %c, i16* @g_i16, align 2
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/MSP430/cc_ret.ll b/test/CodeGen/MSP430/cc_ret.ll
new file mode 100644
index 000000000000..c2a9ae664509
--- /dev/null
+++ b/test/CodeGen/MSP430/cc_ret.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16-a0:16:16"
+target triple = "msp430---elf"
+
+define void @test() #0 {
+entry:
+; CHECK: test:
+
+; CHECK: call #f_i16
+; CHECK: mov.w r15, &g_i16
+  %0 = call i16 @f_i16()
+  store volatile i16 %0, i16* @g_i16
+
+; CHECK: call #f_i32
+; CHECK: mov.w r15, &g_i32+2
+; CHECK: mov.w r14, &g_i32
+  %1 = call i32 @f_i32()
+  store volatile i32 %1, i32* @g_i32
+
+; CHECK: call #f_i64
+; CHECK: mov.w r15, &g_i64+6
+; CHECK: mov.w r14, &g_i64+4
+; CHECK: mov.w r13, &g_i64+2
+; CHECK: mov.w r12, &g_i64
+  %2 = call i64 @f_i64()
+  store volatile i64 %2, i64* @g_i64
+
+  ret void
+}
+
+@g_i16 = common global i16 0, align 2
+@g_i32 = common global i32 0, align 2
+@g_i64 = common global i64 0, align 2
+
+define i16 @f_i16() #0 {
+; CHECK: f_i16:
+; CHECK: mov.w #1, r15
+; CHECK: ret
+  ret i16 1
+}
+
+define i32 @f_i32() #0 {
+; CHECK: f_i32:
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: ret
+  ret i32 16909060
+}
+
+define i64 @f_i64() #0 {
+; CHECK: f_i64:
+; CHECK: mov.w #1800, r12
+; CHECK: mov.w #1286, r13
+; CHECK: mov.w #772, r14
+; CHECK: mov.w #258, r15
+; CHECK: ret
+  ret i64 72623859790382856
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/MSP430/fp.ll b/test/CodeGen/MSP430/fp.ll
index c3273eff05cb..018090566f18 100644
--- a/test/CodeGen/MSP430/fp.ll
+++ b/test/CodeGen/MSP430/fp.ll
@@ -5,7 +5,7 @@ target triple = "msp430---elf"
 
 define void @fp() nounwind {
 entry:
-; CHECK: fp:
+; CHECK-LABEL: fp:
 ; CHECK: push.w r4
 ; CHECK: mov.w r1, r4
 ; CHECK: sub.w #2, r1
diff --git a/test/CodeGen/MSP430/indirectbr2.ll b/test/CodeGen/MSP430/indirectbr2.ll
index dc2abf5cd0ff..93788b696553 100644
--- a/test/CodeGen/MSP430/indirectbr2.ll
+++ b/test/CodeGen/MSP430/indirectbr2.ll
@@ -5,7 +5,7 @@ define internal i16 @foo(i16 %i) nounwind {
 entry:
   %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
   %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
-; CHECK: mov.w   .LC.0.2070(r12), pc
+; CHECK: br .LC.0.2070(r12)
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
 L5:                                               ; preds = %bb2
diff --git a/test/CodeGen/MSP430/jumptable.ll b/test/CodeGen/MSP430/jumptable.ll
new file mode 100644
index 000000000000..239d79ed9cba
--- /dev/null
+++ b/test/CodeGen/MSP430/jumptable.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+; Function Attrs: nounwind
+define i16 @test(i16 %i) #0 {
+entry:
+; CHECK-LABEL: test:
+  %retval = alloca i16, align 2
+  %i.addr = alloca i16, align 2
+  store i16 %i, i16* %i.addr, align 2
+  %0 = load i16* %i.addr, align 2
+; CHECK: mov.w #2, r14
+; CHECK: call #__mulhi3hw_noint
+; CHECK: br .LJTI0_0(r15)
+  switch i16 %0, label %sw.default [
+    i16 0, label %sw.bb
+    i16 1, label %sw.bb1
+    i16 2, label %sw.bb2
+    i16 3, label %sw.bb3
+  ]
+
+sw.bb:                                            ; preds = %entry
+  store i16 0, i16* %retval
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  store i16 1, i16* %retval
+  br label %return
+
+sw.bb2:                                           ; preds = %entry
+  store i16 2, i16* %retval
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+  store i16 3, i16* %retval
+  br label %return
+
+sw.default:                                       ; preds = %entry
+  store i16 2, i16* %retval
+  br label %return
+
+return:                                           ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+  %1 = load i16* %retval
+  ret i16 %1
+; CHECK: ret
+}
+
+; CHECK: .LJTI0_0:
+; CHECK-NEXT: .short .LBB0_2
+; CHECK-NEXT: .short .LBB0_4
+; CHECK-NEXT: .short .LBB0_3
+; CHECK-NEXT: .short .LBB0_5
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
index 0ca9fc9c6912..a18fe6f927d8 100644
--- a/test/CodeGen/MSP430/lit.local.cfg
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'MSP430' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/MSP430/postinc.ll b/test/CodeGen/MSP430/postinc.ll
index 8f01b832588d..8d55fd3f8031 100644
--- a/test/CodeGen/MSP430/postinc.ll
+++ b/test/CodeGen/MSP430/postinc.ll
@@ -11,7 +11,7 @@ for.body:                                         ; preds = %for.body, %entry
   %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
   %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
   %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
-; CHECK: add:
+; CHECK-LABEL: add:
 ; CHECK: add.w @r{{[0-9]+}}+, r{{[0-9]+}}
   %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
   %add = add i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %for.body, %entry
   %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
   %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
   %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
-; CHECK: sub:
+; CHECK-LABEL: sub:
 ; CHECK: sub.w @r{{[0-9]+}}+, r{{[0-9]+}}
   %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
   %add = sub i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
@@ -55,7 +55,7 @@ for.body:                                         ; preds = %for.body, %entry
   %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
   %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
   %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
-; CHECK: or:
+; CHECK-LABEL: or:
 ; CHECK: bis.w @r{{[0-9]+}}+, r{{[0-9]+}}
   %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
   %add = or i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
@@ -77,7 +77,7 @@ for.body:                                         ; preds = %for.body, %entry
   %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
   %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
   %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
-; CHECK: xor:
+; CHECK-LABEL: xor:
 ; CHECK: xor.w @r{{[0-9]+}}+, r{{[0-9]+}}
   %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
   %add = xor i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
@@ -99,7 +99,7 @@ for.body:                                         ; preds = %for.body, %entry
   %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
   %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
   %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
-; CHECK: and:
+; CHECK-LABEL: and:
 ; CHECK: and.w @r{{[0-9]+}}+, r{{[0-9]+}}
   %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
   %add = and i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
diff --git a/test/CodeGen/MSP430/setcc.ll b/test/CodeGen/MSP430/setcc.ll
index c99b17e1436e..d5a8057ddd6c 100644
--- a/test/CodeGen/MSP430/setcc.ll
+++ b/test/CodeGen/MSP430/setcc.ll
@@ -8,7 +8,7 @@ define i16 @sccweqand(i16 %a, i16 %b) nounwind {
 	%t3 = zext i1 %t2 to i16
 	ret i16 %t3
 }
-; CHECK: sccweqand:
+; CHECK-LABEL: sccweqand:
 ; CHECK:	bit.w	r14, r15
 ; CHECK:	mov.w	r2, r15
 ; CHECK:	rra.w   r15
@@ -20,7 +20,7 @@ define i16 @sccwneand(i16 %a, i16 %b) nounwind {
 	%t3 = zext i1 %t2 to i16
 	ret i16 %t3
 }
-; CHECK: sccwneand:
+; CHECK-LABEL: sccwneand:
 ; CHECK: 	bit.w	r14, r15
 ; CHECK:	mov.w	r2, r15
 ; CHECK:	and.w	#1, r15
@@ -30,19 +30,19 @@ define i16 @sccwne(i16 %a, i16 %b) nounwind {
 	%t2 = zext i1 %t1 to i16
 	ret i16 %t2
 }
-; CHECK:sccwne:
+; CHECK-LABEL:sccwne:
 ; CHECK:	cmp.w	r14, r15
-; CHECK:	mov.w	r2, r15
-; CHECK:	rra.w	r15
-; CHECK:	and.w	#1, r15
-; CHECK:	xor.w   #1, r15
+; CHECK:	mov.w	r2, r12
+; CHECK:	rra.w	r12
+; CHECK:	mov.w	#1, r15
+; CHECK:	bic.w	r12, r15
 
 define i16 @sccweq(i16 %a, i16 %b) nounwind {
 	%t1 = icmp eq i16 %a, %b
 	%t2 = zext i1 %t1 to i16
 	ret i16 %t2
 }
-; CHECK:sccweq:
+; CHECK-LABEL:sccweq:
 ; CHECK:	cmp.w	r14, r15
 ; CHECK:	mov.w	r2, r15
 ; CHECK:	rra.w	r15
@@ -53,18 +53,17 @@ define i16 @sccwugt(i16 %a, i16 %b) nounwind {
 	%t2 = zext i1 %t1 to i16
 	ret i16 %t2
 }
-; CHECK:sccwugt:
+; CHECK-LABEL:sccwugt:
 ; CHECK:	cmp.w	r15, r14
-; CHECK:	mov.w	r2, r15
-; CHECK:	and.w	#1, r15
-; CHECK:	xor.w	#1, r15
+; CHECK:	mov.w	#1, r15
+; CHECK:	bic.w	r2, r15
 
 define i16 @sccwuge(i16 %a, i16 %b) nounwind {
 	%t1 = icmp uge i16 %a, %b
 	%t2 = zext i1 %t1 to i16
 	ret i16 %t2
 }
-; CHECK:sccwuge:
+; CHECK-LABEL:sccwuge:
 ; CHECK:	cmp.w	r14, r15
 ; CHECK:	mov.w	r2, r15
 ; CHECK:	and.w	#1, r15
@@ -74,18 +73,17 @@ define i16 @sccwult(i16 %a, i16 %b) nounwind {
 	%t2 = zext i1 %t1 to i16
 	ret i16 %t2
 }
-; CHECK:sccwult:
+; CHECK-LABEL:sccwult:
 ; CHECK:	cmp.w	r14, r15
-; CHECK:	mov.w	r2, r15
-; CHECK:	and.w	#1, r15
-; CHECK:	xor.w	#1, r15
+; CHECK:	mov.w	#1, r15
+; CHECK:	bic.w	r2, r15
 
 define i16 @sccwule(i16 %a, i16 %b) nounwind {
 	%t1 = icmp ule i16 %a, %b
 	%t2 = zext i1 %t1 to i16
 	ret i16 %t2
 }
-; CHECK:sccwule:
+; CHECK-LABEL:sccwule:
 ; CHECK:	cmp.w	r15, r14
 ; CHECK:	mov.w	r2, r15
 ; CHECK:	and.w	#1, r15
diff --git a/test/CodeGen/MSP430/shifts.ll b/test/CodeGen/MSP430/shifts.ll
index b5b3054b9621..22ae59ef4b0f 100644
--- a/test/CodeGen/MSP430/shifts.ll
+++ b/test/CodeGen/MSP430/shifts.ll
@@ -4,7 +4,7 @@ target triple = "msp430-elf"
 
 define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
 entry:
-; CHECK: lshr8:
+; CHECK-LABEL: lshr8:
 ; CHECK: rrc.b
   %shr = lshr i8 %a, %cnt
   ret i8 %shr
@@ -12,7 +12,7 @@ entry:
 
 define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone {
 entry:
-; CHECK: ashr8:
+; CHECK-LABEL: ashr8:
 ; CHECK: rra.b
   %shr = ashr i8 %a, %cnt
   ret i8 %shr
@@ -28,7 +28,7 @@ entry:
 
 define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
 entry:
-; CHECK: lshr16:
+; CHECK-LABEL: lshr16:
 ; CHECK: rrc.w
   %shr = lshr i16 %a, %cnt
   ret i16 %shr
@@ -36,7 +36,7 @@ entry:
 
 define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone {
 entry:
-; CHECK: ashr16:
+; CHECK-LABEL: ashr16:
 ; CHECK: rra.w
   %shr = ashr i16 %a, %cnt
   ret i16 %shr
@@ -44,7 +44,7 @@ entry:
 
 define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
 entry:
-; CHECK: shl16:
+; CHECK-LABEL: shl16:
 ; CHECK: rla.w
   %shl = shl i16 %a, %cnt
   ret i16 %shl
diff --git a/test/CodeGen/MSP430/transient-stack-alignment.ll b/test/CodeGen/MSP430/transient-stack-alignment.ll
new file mode 100644
index 000000000000..cca83509cf4c
--- /dev/null
+++ b/test/CodeGen/MSP430/transient-stack-alignment.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16-a0:16:16"
+target triple = "msp430---elf"
+
+define void @test() #0 {
+; CHECK-LABEL: test:
+; CHECK: sub.w #2, r1
+  %1 = alloca i8, align 1
+; CHECK-NEXT: mov.b #0, 1(r1)
+  store i8 0, i8* %1, align 1
+; CHECK-NEXT: add.w #2, r1
+; CHECK-NEXT: ret
+  ret void
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
diff --git a/test/CodeGen/MSP430/vararg.ll b/test/CodeGen/MSP430/vararg.ll
index 603d3ec6b686..9e511fce956f 100644
--- a/test/CodeGen/MSP430/vararg.ll
+++ b/test/CodeGen/MSP430/vararg.ll
@@ -9,7 +9,7 @@ declare void @llvm.va_copy(i8*, i8*) nounwind
 
 define void @va_start(i16 %a, ...) nounwind {
 entry:
-; CHECK: va_start:
+; CHECK-LABEL: va_start:
 ; CHECK: sub.w #2, r1
   %vl = alloca i8*, align 2
   %vl1 = bitcast i8** %vl to i8*
@@ -23,7 +23,7 @@ entry:
 
 define i16 @va_arg(i8* %vl) nounwind {
 entry:
-; CHECK: va_arg:
+; CHECK-LABEL: va_arg:
   %vl.addr = alloca i8*, align 2
 ; CHECK: mov.w r15, 0(r1)
   store i8* %vl, i8** %vl.addr, align 2
@@ -37,7 +37,7 @@ entry:
 
 define void @va_copy(i8* %vl) nounwind {
 entry:
-; CHECK: va_copy:
+; CHECK-LABEL: va_copy:
   %vl.addr = alloca i8*, align 2
   %vl2 = alloca i8*, align 2
 ; CHECK: mov.w r15, 2(r1)
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index 8479ad222d30..3381143c761d 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
 ; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   < %s | FileCheck %s 
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index dbde742ad3fe..e274bc0e14f0 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -51,3 +51,21 @@ entry:
   ret void
 }
 
+; Check that RA doesn't allocate registers in the clobber list.
+; CHECK-LABEL: foo4:
+; CHECK: #APP
+; CHECK-NOT: ulh $2
+; CHECK: #NO_APP
+; CHECK: #APP
+; CHECK-NOT: $f0
+; CHECK: #NO_APP
+
+define void @foo4() {
+entry:
+  %0 = tail call i32 asm sideeffect "ulh $0,16($$sp)\0A\09", "=r,~{$2}"()
+  store i32 %0, i32* @gi2, align 4
+  %1 = load float* @gf0, align 4
+  %2 = tail call double asm sideeffect "cvt.d.s $0, $1\0A\09", "=f,f,~{$f0}"(float %1)
+  store double %2, double* @gd0, align 8
+  ret void
+}
diff --git a/test/CodeGen/Mips/2013-11-18-fp64-const0.ll b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
new file mode 100644
index 000000000000..f8390d9a1ca7
--- /dev/null
+++ b/test/CodeGen/Mips/2013-11-18-fp64-const0.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips -mattr=-fp64 < %s | FileCheck -check-prefix=CHECK-FP32 %s
+; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck -check-prefix=CHECK-FP64 %s
+
+; This test case is a simplified version of an llvm-stress generated test with
+; seed=3718491962.
+; It originally failed on MIPS32 with FP64 with the following error:
+;     LLVM ERROR: ran out of registers during register allocation
+; This was caused by impossible register class restrictions caused by the use
+; of BuildPairF64 instead of BuildPairF64_64.
+
+define void @autogen_SD3718491962() {
+BB:
+  ; CHECK-FP32: mtc1 $zero, $f{{[0-3]*[02468]}}
+  ; CHECK-FP32: mtc1 $zero, $f{{[0-3]*[13579]}}
+
+  ; CHECK-FP64: mtc1 $zero, $f{{[0-9]+}}
+  ; CHECK-FP64-NOT: mtc1 $zero,
+  ; FIXME: A redundant mthc1 is currently emitted. Add a -NOT when it is
+  ;        eliminated
+
+  %Cmp = fcmp ule double 0.000000e+00, undef
+  %Cmp11 = fcmp ueq double 0xFDBD965CF1BB7FDA, undef
+  br label %CF88
+
+CF88:                                             ; preds = %CF86
+  %Sl18 = select i1 %Cmp, i1 %Cmp11, i1 %Cmp
+  br i1 %Sl18, label %CF88, label %CF85
+
+CF85:                                             ; preds = %CF88
+  ret void
+}
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
index 99139abbe848..267cff54291d 100644
--- a/test/CodeGen/Mips/align16.ll
+++ b/test/CodeGen/Mips/align16.ll
@@ -25,7 +25,7 @@ entry:
   call void @p(i32* %arrayidx1)
   ret void
 }
-; 16:	save	$ra, $s0, $s1, 2040
-; 16:	addiu	$sp, -48 # 16 bit inst
-; 16:	addiu	$sp, 48 # 16 bit inst
-; 16:	restore	$ra,  $s0, $s1, 2040
-\ No newline at end of file
+; 16:	save	$ra, $s0, $s1, $s2, 2040
+; 16:	addiu	$sp, -56 # 16 bit inst
+; 16:	addiu	$sp, 56 # 16 bit inst
+; 16:	restore	$ra,  $s0, $s1, $s2, 2040
diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll
index 5ae9a847917b..017665f00bd4 100644
--- a/test/CodeGen/Mips/alloca16.ll
+++ b/test/CodeGen/Mips/alloca16.ll
@@ -19,8 +19,8 @@ entry:
 
 define void @test() nounwind {
 entry:
-; 16: 	.frame	$16,24,$ra
-; 16: 	save 	$ra, $s0, $s1, 24
+; 16: 	.frame	$sp,24,$ra
+; 16: 	save 	$ra, $s0, $s1, $s2, 24
 ; 16: 	move	$16, $sp
 ; 16:	move	${{[0-9]+}}, $sp
 ; 16:	subu	$[[REGISTER:[0-9]+]], ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 819f258c2a40..0e60fe1fbfbc 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=mipsel --disable-machine-licm < %s | FileCheck %s
+; RUN: llc -march=mipsel --disable-machine-licm < %s | FileCheck %s -check-prefix=CHECK-EL
+; RUN: llc -march=mips   --disable-machine-licm < %s | FileCheck %s -check-prefix=CHECK-EB
 
 @x = common global i32 0, align 4
 
@@ -7,13 +8,21 @@ entry:
   %0 = atomicrmw add i32* @x, i32 %incr monotonic
   ret i32 %0
 
-; CHECK:   AtomicLoadAdd32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
-; CHECK:   sc      $[[R2]], 0($[[R0]])
-; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+; CHECK-EL-LABEL:   AtomicLoadAdd32:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK-EL:   addu    $[[R2:[0-9]+]], $[[R1]], $4
+; CHECK-EL:   sc      $[[R2]], 0($[[R0]])
+; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
+
+; CHECK-EB-LABEL:   AtomicLoadAdd32:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK-EB:   addu    $[[R2:[0-9]+]], $[[R1]], $4
+; CHECK-EB:   sc      $[[R2]], 0($[[R0]])
+; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicLoadNand32(i32 %incr) nounwind {
@@ -21,14 +30,23 @@ entry:
   %0 = atomicrmw nand i32* @x, i32 %incr monotonic
   ret i32 %0
 
-; CHECK:   AtomicLoadNand32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
-; CHECK:   and     $[[R3:[0-9]+]], $[[R1]], $4
-; CHECK:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
-; CHECK:   sc      $[[R2]], 0($[[R0]])
-; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+; CHECK-EL-LABEL:   AtomicLoadNand32:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK-EL:   and     $[[R3:[0-9]+]], $[[R1]], $4
+; CHECK-EL:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
+; CHECK-EL:   sc      $[[R2]], 0($[[R0]])
+; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
+
+; CHECK-EB-LABEL:   AtomicLoadNand32:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK-EB:   and     $[[R3:[0-9]+]], $[[R1]], $4
+; CHECK-EB:   nor     $[[R2:[0-9]+]], $zero, $[[R3]]
+; CHECK-EB:   sc      $[[R2]], 0($[[R0]])
+; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicSwap32(i32 %newval) nounwind {
@@ -39,12 +57,19 @@ entry:
   %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic
   ret i32 %0
 
-; CHECK:   AtomicSwap32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      ${{[0-9]+}}, 0($[[R0]])
-; CHECK:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
+; CHECK-EL-LABEL:   AtomicSwap32:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      ${{[0-9]+}}, 0($[[R0]])
+; CHECK-EL:   sc      $[[R2:[0-9]+]], 0($[[R0]])
+; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
+
+; CHECK-EB-LABEL:   AtomicSwap32:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      ${{[0-9]+}}, 0($[[R0]])
+; CHECK-EB:   sc      $[[R2:[0-9]+]], 0($[[R0]])
+; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
@@ -55,14 +80,23 @@ entry:
   %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic
   ret i32 %0
 
-; CHECK:   AtomicCmpSwap32:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(x)
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $2, 0($[[R0]])
-; CHECK:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
-; CHECK:   sc      $[[R2:[0-9]+]], 0($[[R0]])
-; CHECK:   beq     $[[R2]], $zero, $[[BB0]]
-; CHECK:   $[[BB1]]:
+; CHECK-EL-LABEL:   AtomicCmpSwap32:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $2, 0($[[R0]])
+; CHECK-EL:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
+; CHECK-EL:   sc      $[[R2:[0-9]+]], 0($[[R0]])
+; CHECK-EL:   beqz    $[[R2]], $[[BB0]]
+; CHECK-EL:   $[[BB1]]:
+
+; CHECK-EB-LABEL:   AtomicCmpSwap32:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(x)
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $2, 0($[[R0]])
+; CHECK-EB:   bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
+; CHECK-EB:   sc      $[[R2:[0-9]+]], 0($[[R0]])
+; CHECK-EB:   beqz    $[[R2]], $[[BB0]]
+; CHECK-EB:   $[[BB1]]:
 }
 
 
@@ -74,30 +108,56 @@ entry:
   %0 = atomicrmw add i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK:   AtomicLoadAdd8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK:   sc      $[[R14]], 0($[[R2]])
-; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
-
-; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK:   sra     $2, $[[R17]], 24
+; CHECK-EL-LABEL:   AtomicLoadAdd8:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EL:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EL:   sra     $2, $[[R17]], 24
+
+; CHECK-EB-LABEL:   AtomicLoadAdd8:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
+; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EB:   addu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EB:   sra     $2, $[[R17]], 24
 }
 
 define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
@@ -105,30 +165,56 @@ entry:
   %0 = atomicrmw sub i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK:   AtomicLoadSub8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK:   sllv     $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK:   sc      $[[R14]], 0($[[R2]])
-; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
-
-; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK:   sra     $2, $[[R17]], 24
+; CHECK-EL-LABEL:   AtomicLoadSub8:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK-EL:   sllv     $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EL:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EL:   sra     $2, $[[R17]], 24
+
+; CHECK-EB-LABEL:   AtomicLoadSub8:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
+; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EB:   subu    $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EB:   sra     $2, $[[R17]], 24
 }
 
 define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
@@ -136,31 +222,58 @@ entry:
   %0 = atomicrmw nand i8* @y, i8 %incr monotonic
   ret i8 %0
 
-; CHECK:   AtomicLoadNand8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
-; CHECK:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
-; CHECK:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
-; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
-; CHECK:   sc      $[[R14]], 0($[[R2]])
-; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
-
-; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK:   sra     $2, $[[R17]], 24
+; CHECK-EL-LABEL:   AtomicLoadNand8:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EL:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK-EL:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
+; CHECK-EL:   and     $[[R12:[0-9]+]], $[[R11]], $[[R6]]
+; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EL:   sra     $2, $[[R17]], 24
+
+; CHECK-EB-LABEL:   AtomicLoadNand8:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
+; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EB:   and     $[[R18:[0-9]+]], $[[R10]], $[[R9]]
+; CHECK-EB:   nor     $[[R11:[0-9]+]], $zero, $[[R18]]
+; CHECK-EB:   and     $[[R12:[0-9]+]], $[[R11]], $[[R7]]
+; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
+; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EB:   sra     $2, $[[R17]], 24
 }
 
 define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
@@ -168,29 +281,54 @@ entry:
   %0 = atomicrmw xchg i8* @y, i8 %newval monotonic
   ret i8 %0
 
-; CHECK:   AtomicSwap8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
-
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R10:[0-9]+]], 0($[[R2]])
-; CHECK:   and     $[[R18:[0-9]+]], $[[R9]], $[[R6]]
-; CHECK:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
-; CHECK:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
-; CHECK:   sc      $[[R14]], 0($[[R2]])
-; CHECK:   beq     $[[R14]], $zero, $[[BB0]]
-
-; CHECK:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
-; CHECK:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
-; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK:   sra     $2, $[[R17]], 24
+; CHECK-EL-LABEL:   AtomicSwap8:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK-EL:   sllv    $[[R9:[0-9]+]], $4, $[[R4]]
+
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EL:   and     $[[R18:[0-9]+]], $[[R9]], $[[R6]]
+; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EL:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
+; CHECK-EL:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EL:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EL:   and     $[[R15:[0-9]+]], $[[R10]], $[[R6]]
+; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R4]]
+; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EL:   sra     $2, $[[R17]], 24
+
+; CHECK-EB-LABEL:   AtomicSwap8:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
+; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; CHECK-EB:   sllv    $[[R9:[0-9]+]], $4, $[[R5]]
+
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $[[R10:[0-9]+]], 0($[[R2]])
+; CHECK-EB:   and     $[[R18:[0-9]+]], $[[R9]], $[[R7]]
+; CHECK-EB:   and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
+; CHECK-EB:   or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
+; CHECK-EB:   sc      $[[R14]], 0($[[R2]])
+; CHECK-EB:   beqz    $[[R14]], $[[BB0]]
+
+; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
+; CHECK-EB:   srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
+; CHECK-EB:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EB:   sra     $2, $[[R17]], 24
 }
 
 define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
@@ -198,34 +336,64 @@ entry:
   %0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic
   ret i8 %0
 
-; CHECK:   AtomicCmpSwap8:
-; CHECK:   lw      $[[R0:[0-9]+]], %got(y)
-; CHECK:   addiu   $[[R1:[0-9]+]], $zero, -4
-; CHECK:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; CHECK:   andi    $[[R3:[0-9]+]], $[[R0]], 3
-; CHECK:   sll     $[[R4:[0-9]+]], $[[R3]], 3
-; CHECK:   ori     $[[R5:[0-9]+]], $zero, 255
-; CHECK:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
-; CHECK:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
-; CHECK:   andi    $[[R8:[0-9]+]], $4, 255
-; CHECK:   sllv    $[[R9:[0-9]+]], $[[R8]], $[[R4]]
-; CHECK:   andi    $[[R10:[0-9]+]], $5, 255
-; CHECK:   sllv    $[[R11:[0-9]+]], $[[R10]], $[[R4]]
-
-; CHECK:   $[[BB0:[A-Z_0-9]+]]:
-; CHECK:   ll      $[[R12:[0-9]+]], 0($[[R2]])
-; CHECK:   and     $[[R13:[0-9]+]], $[[R12]], $[[R6]]
-; CHECK:   bne     $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
-
-; CHECK:   and     $[[R14:[0-9]+]], $[[R12]], $[[R7]]
-; CHECK:   or      $[[R15:[0-9]+]], $[[R14]], $[[R11]]
-; CHECK:   sc      $[[R15]], 0($[[R2]])
-; CHECK:   beq     $[[R15]], $zero, $[[BB0]]
-
-; CHECK:   $[[BB1]]:
-; CHECK:   srlv    $[[R16:[0-9]+]], $[[R13]], $[[R4]]
-; CHECK:   sll     $[[R17:[0-9]+]], $[[R16]], 24
-; CHECK:   sra     $2, $[[R17]], 24
+; CHECK-EL-LABEL:   AtomicCmpSwap8:
+; CHECK-EL:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EL:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EL:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EL:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EL:   sll     $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EL:   ori     $[[R5:[0-9]+]], $zero, 255
+; CHECK-EL:   sllv    $[[R6:[0-9]+]], $[[R5]], $[[R4]]
+; CHECK-EL:   nor     $[[R7:[0-9]+]], $zero, $[[R6]]
+; CHECK-EL:   andi    $[[R8:[0-9]+]], $4, 255
+; CHECK-EL:   sllv    $[[R9:[0-9]+]], $[[R8]], $[[R4]]
+; CHECK-EL:   andi    $[[R10:[0-9]+]], $5, 255
+; CHECK-EL:   sllv    $[[R11:[0-9]+]], $[[R10]], $[[R4]]
+
+; CHECK-EL:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EL:   ll      $[[R12:[0-9]+]], 0($[[R2]])
+; CHECK-EL:   and     $[[R13:[0-9]+]], $[[R12]], $[[R6]]
+; CHECK-EL:   bne     $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
+
+; CHECK-EL:   and     $[[R14:[0-9]+]], $[[R12]], $[[R7]]
+; CHECK-EL:   or      $[[R15:[0-9]+]], $[[R14]], $[[R11]]
+; CHECK-EL:   sc      $[[R15]], 0($[[R2]])
+; CHECK-EL:   beqz    $[[R15]], $[[BB0]]
+
+; CHECK-EL:   $[[BB1]]:
+; CHECK-EL:   srlv    $[[R16:[0-9]+]], $[[R13]], $[[R4]]
+; CHECK-EL:   sll     $[[R17:[0-9]+]], $[[R16]], 24
+; CHECK-EL:   sra     $2, $[[R17]], 24
+
+; CHECK-EB-LABEL:   AtomicCmpSwap8:
+; CHECK-EB:   lw      $[[R0:[0-9]+]], %got(y)
+; CHECK-EB:   addiu   $[[R1:[0-9]+]], $zero, -4
+; CHECK-EB:   and     $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+; CHECK-EB:   andi    $[[R3:[0-9]+]], $[[R0]], 3
+; CHECK-EB:   xori    $[[R4:[0-9]+]], $[[R3]], 3
+; CHECK-EB:   sll     $[[R5:[0-9]+]], $[[R4]], 3
+; CHECK-EB:   ori     $[[R6:[0-9]+]], $zero, 255
+; CHECK-EB:   sllv    $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; CHECK-EB:   nor     $[[R8:[0-9]+]], $zero, $[[R7]]
+; CHECK-EB:   andi    $[[R9:[0-9]+]], $4, 255
+; CHECK-EB:   sllv    $[[R10:[0-9]+]], $[[R9]], $[[R5]]
+; CHECK-EB:   andi    $[[R11:[0-9]+]], $5, 255
+; CHECK-EB:   sllv    $[[R12:[0-9]+]], $[[R11]], $[[R5]]
+
+; CHECK-EB:   $[[BB0:[A-Z_0-9]+]]:
+; CHECK-EB:   ll      $[[R13:[0-9]+]], 0($[[R2]])
+; CHECK-EB:   and     $[[R14:[0-9]+]], $[[R13]], $[[R7]]
+; CHECK-EB:   bne     $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]]
+
+; CHECK-EB:   and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
+; CHECK-EB:   or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
+; CHECK-EB:   sc      $[[R16]], 0($[[R2]])
+; CHECK-EB:   beqz    $[[R16]], $[[BB0]]
+
+; CHECK-EB:   $[[BB1]]:
+; CHECK-EB:   srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
+; CHECK-EB:   sll     $[[R18:[0-9]+]], $[[R17]], 24
+; CHECK-EB:   sra     $2, $[[R18]], 24
 }
 
 @countsint = common global i32 0, align 4
@@ -235,12 +403,19 @@ entry:
   %0 = atomicrmw add i32* @countsint, i32 %v seq_cst
   ret i32 %0 
 
-; CHECK:   CheckSync:
-; CHECK:   sync 0
-; CHECK:   ll
-; CHECK:   sc
-; CHECK:   beq
-; CHECK:   sync 0
+; CHECK-EL-LABEL:   CheckSync:
+; CHECK-EL:   sync 0
+; CHECK-EL:   ll
+; CHECK-EL:   sc
+; CHECK-EL:   beq
+; CHECK-EL:   sync 0
+
+; CHECK-EB-LABEL:   CheckSync:
+; CHECK-EB:   sync 0
+; CHECK-EB:   ll
+; CHECK-EB:   sc
+; CHECK-EB:   beq
+; CHECK-EB:   sync 0
 }
 
 ; make sure that this assertion in
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index b9c3804e0d72..0f0f01afc142 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -7,7 +7,7 @@ entry:
   %0 = atomicrmw add i32* %mem, i32 %val seq_cst
   %add = add nsw i32 %0, %c
   ret i32 %add
-; 16: foo:
+; 16-LABEL: foo:
 ; 16:	lw	${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
 ; 16: 	lw	${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
 }
@@ -26,7 +26,7 @@ entry:
   %4 = atomicrmw xchg i32* %x, i32 1 seq_cst
   %5 = load volatile i32* %x, align 4
   %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind
-; 16: main:
+; 16-LABEL: main:
 ; 16:	lw	${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
 ; 16: 	lw	${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
 ; 16:	lw	${{[0-9]+}}, %call16(__sync_val_compare_and_swap_4)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/beqzc.ll b/test/CodeGen/Mips/beqzc.ll
new file mode 100644
index 000000000000..4a294c2d817e
--- /dev/null
+++ b/test/CodeGen/Mips/beqzc.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define i32 @main() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %cmp = icmp eq i32 %0, 0
+  %. = select i1 %cmp, i32 10, i32 55
+  store i32 %., i32* @j, align 4
+; cond-b-short: 	beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}  # 16 bit inst
+  ret i32 0
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
+
diff --git a/test/CodeGen/Mips/beqzc1.ll b/test/CodeGen/Mips/beqzc1.ll
new file mode 100644
index 000000000000..8f929a8e3541
--- /dev/null
+++ b/test/CodeGen/Mips/beqzc1.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 0, align 4
+@j = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define i32 @main() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; cond-b-short: 	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}}  # 16 bit inst
+if.then:                                          ; preds = %entry
+  store i32 10, i32* @j, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 0
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/biggot.ll b/test/CodeGen/Mips/biggot.ll
index c4ad851c8258..da287eea6fd1 100644
--- a/test/CodeGen/Mips/biggot.ll
+++ b/test/CodeGen/Mips/biggot.ll
@@ -31,12 +31,12 @@ declare void @foo0(i32)
 
 define void @foo2(i32* nocapture %d, i32* nocapture %s, i32 %n) nounwind {
 entry:
-; O32: foo2:
+; O32-LABEL: foo2:
 ; O32: lui $[[R2:[0-9]+]], %call_hi(memcpy)
 ; O32: addu  $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
 ; O32: lw  ${{[0-9]+}}, %call_lo(memcpy)($[[R3]])
 
-; N64: foo2:
+; N64-LABEL: foo2:
 ; N64: lui $[[R2:[0-9]+]], %call_hi(memcpy)
 ; N64: daddu  $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
 ; N64: ld  ${{[0-9]+}}, %call_lo(memcpy)($[[R3]])
diff --git a/test/CodeGen/Mips/blez_bgez.ll b/test/CodeGen/Mips/blez_bgez.ll
new file mode 100644
index 000000000000..f6a5e4f47a5a
--- /dev/null
+++ b/test/CodeGen/Mips/blez_bgez.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+; CHECK-LABEL: test_blez:
+; CHECK: blez ${{[0-9]+}}, $BB
+
+define void @test_blez(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo1()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare void @foo1()
+
+; CHECK-LABEL: test_bgez:
+; CHECK: bgez ${{[0-9]+}}, $BB
+
+define void @test_bgez(i32 %a) {
+entry:
+  %cmp = icmp slt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo1()
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index 7de7fa6f6bdb..beab65f47196 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -4,6 +4,8 @@
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static   < %s | FileCheck %s -check-prefix=STATIC-MIPS16-1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static   < %s | FileCheck %s -check-prefix=STATIC-MIPS16-2
 
 @reg = common global i8* null, align 4
 
@@ -36,6 +38,14 @@ entry:
 ; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]])
 ; STATIC-N64: ld  $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]])
 ; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]])
+; STATIC-MIPS16-1: .ent	f
+; STATIC-MIPS16-2: .ent	f
+; STATIC-MIPS16-1: li  $[[R1_16:[0-9]+]], %hi($tmp[[TI_16:[0-9]+]])
+; STATIC-MIPS16-1: sll ${{[0-9]+}},  $[[R1_16]], 16
+; STATIC-MIPS16-2: li  ${{[0-9]+}}, %lo($tmp{{[0-9]+}})
+; STATIC-MIPS16-1 jal	dummy
+; STATIC-MIPS16-2 jal	dummy
+
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 2deb037c9c39..68341c1ba25b 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -31,7 +31,7 @@ declare void @foo2(i32)
 ;
 define void @foo3(i32 %a) nounwind {
 entry:
-; Default:     foo3:
+; Default-LABEL:     foo3:
 ; Default:     jalr
 ; Default:     cvt.d.w
 
@@ -49,7 +49,7 @@ declare void @foo4(double)
 ; Check that branch delay slot can be filled with an instruction with operand
 ; $1.
 ;
-; Default:     foo5:
+; Default-LABEL:     foo5:
 ; Default-NOT: nop
 
 define void @foo5(i32 %a) nounwind {
@@ -76,7 +76,7 @@ if.end:
 
 ; Check that delay slot filler can place mov.s or mov.d in delay slot.
 ;
-; Default:     foo6:
+; Default-LABEL:     foo6:
 ; Default-NOT: nop
 ; Default:     .end foo6
 
@@ -90,7 +90,7 @@ declare void @foo7(double, float)
 
 ; Check that a store can move past other memory instructions.
 ;
-; STATICO1:      foo8:
+; STATICO1-LABEL:      foo8:
 ; STATICO1:      jalr ${{[0-9]+}}
 ; STATICO1-NEXT: sw ${{[0-9]+}}, %lo(g1)
 
@@ -109,7 +109,7 @@ entry:
 ; Test searchForward. Check that the second jal's slot is filled with another
 ; instruction in the same block.
 ;
-; FORWARD:     foo10:
+; FORWARD-LABEL:     foo10:
 ; FORWARD:     jal foo11
 ; FORWARD:     jal foo11
 ; FORWARD-NOT: nop
@@ -130,10 +130,10 @@ declare void @foo11()
 ; Check that delay slots of branches in both the entry block and loop body are
 ; filled.
 ;
-; SUCCBB:      succbbs_loop1:
-; SUCCBB:      bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB-LABEL:      succbbs_loop1:
+; SUCCBB:      blez $5, $BB
 ; SUCCBB-NEXT: addiu
-; SUCCBB:      bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB:      bnez ${{[0-9]+}}, $BB
 ; SUCCBB-NEXT: addiu
 
 define i32 @succbbs_loop1(i32* nocapture %a, i32 %n) {
@@ -158,9 +158,16 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; Check that the first branch has its slot filled.
 ;
-; SUCCBB:      succbbs_br1:
-; SUCCBB:      beq ${{[0-9]+}}, $zero, $BB
-; SUCCBB-NEXT: lw $25, %call16(foo100)
+; SUCCBB-LABEL:      succbbs_br1:
+; SUCCBB:      beqz ${{[0-9]+}}, $BB
+; SUCCBB-NEXT: lw ${{[0-9]+}}, %got(foo101)(${{[0-9]+}})
+
+define internal fastcc void @foo101() {
+entry:
+  tail call void @foo100()
+  tail call void @foo100()
+  ret void
+}
 
 define void @succbbs_br1(i32 %a) {
 entry:
@@ -168,7 +175,7 @@ entry:
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  tail call void @foo100() #1
+  tail call fastcc void @foo101()
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.then
diff --git a/test/CodeGen/Mips/brsize3.ll b/test/CodeGen/Mips/brsize3.ll
new file mode 100644
index 000000000000..7b1f44001a9a
--- /dev/null
+++ b/test/CodeGen/Mips/brsize3.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-no-short
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-long
+
+; ModuleID = 'brsize3.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+; Function Attrs: noreturn nounwind optsize
+define void @foo() #0 {
+entry:
+  br label %x
+
+x:                                                ; preds = %x, %entry
+  tail call void asm sideeffect ".space 60000", ""() #1, !srcloc !1
+  br label %x
+; b-long: $BB0_1:
+; b-long:	#APP
+; b-long:	.space 60000
+; b-long:	#NO_APP
+; b-long:	b	$BB0_1
+; b-no-short: $BB0_1:
+; b-no-short:	#APP
+; b-no-short:	.space 60000
+; b-no-short:	#NO_APP
+; b-no-short-NOT:	b	$BB0_1 # 16 bit inst
+
+}
+
+attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind }
+
+!1 = metadata !{i32 45}
diff --git a/test/CodeGen/Mips/brsize3a.ll b/test/CodeGen/Mips/brsize3a.ll
new file mode 100644
index 000000000000..6382fa228e19
--- /dev/null
+++ b/test/CodeGen/Mips/brsize3a.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-short
+
+; ModuleID = 'brsize3.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+; Function Attrs: noreturn nounwind optsize
+define void @foo() #0 {
+entry:
+  br label %x
+
+x:                                                ; preds = %x, %entry
+  tail call void asm sideeffect ".space 200", ""() #1, !srcloc !1
+  br label %x
+; b-short: $BB0_1:
+; b-short:	#APP
+; b-short:	.space 200
+; b-short:	#NO_APP
+; b-short:	b	$BB0_1 # 16 bit inst
+
+}
+
+attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind }
+
+!1 = metadata !{i32 45}
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
index a8fc2cdc7431..f17b91aab802 100644
--- a/test/CodeGen/Mips/bswap.ll
+++ b/test/CodeGen/Mips/bswap.ll
@@ -1,20 +1,23 @@
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   | FileCheck %s -check-prefix=mips16 
 
 define i32 @bswap32(i32 %x) nounwind readnone {
 entry:
-; MIPS32: bswap32:
+; MIPS32-LABEL: bswap32:
 ; MIPS32: wsbh $[[R0:[0-9]+]]
 ; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; mips16: .ent bswap32
   %or.3 = call i32 @llvm.bswap.i32(i32 %x)
   ret i32 %or.3
 }
 
 define i64 @bswap64(i64 %x) nounwind readnone {
 entry:
-; MIPS64: bswap64:
+; MIPS64-LABEL: bswap64:
 ; MIPS64: dsbh $[[R0:[0-9]+]]
 ; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
+; mips16: .ent bswap64
   %or.7 = call i64 @llvm.bswap.i64(i64 %x)
   ret i64 %or.7
 }
diff --git a/test/CodeGen/Mips/buildpairextractelementf64.ll b/test/CodeGen/Mips/buildpairextractelementf64.ll
index 585bc250fb8c..490d4273c5b6 100644
--- a/test/CodeGen/Mips/buildpairextractelementf64.ll
+++ b/test/CodeGen/Mips/buildpairextractelementf64.ll
@@ -1,20 +1,31 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s
-; RUN: llc  < %s -march=mips   | FileCheck %s
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=FP32
+; RUN: llc -march=mips  < %s | FileCheck %s -check-prefix=FP32
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64
+; RUN: llc -march=mips -mattr=+fp64 < %s | FileCheck %s -check-prefix=FP64
+
 @a = external global i32
 
+; CHECK-LABEL: f:
+; FP32: mtc1
+; FP32: mtc1
+; FP64-DAG: mtc1
+; FP64-DAG: mthc1
+
 define double @f(i32 %a1, double %d) nounwind {
 entry:
-; CHECK: mtc1
-; CHECK: mtc1
   store i32 %a1, i32* @a, align 4
   %add = fadd double %d, 2.000000e+00
   ret double %add
 }
 
+; CHECK-LABEL: f3:
+; FP32: mfc1
+; FP32: mfc1
+; FP64-DAG: mfc1
+; FP64-DAG: mfhc1
+
 define void @f3(double %d, i32 %a1) nounwind {
 entry:
-; CHECK: mfc1
-; CHECK: mfc1
   tail call void @f2(i32 %a1, double %d) nounwind
   ret void
 }
diff --git a/test/CodeGen/Mips/check-noat.ll b/test/CodeGen/Mips/check-noat.ll
index bfeff677b34d..cfcd367e87af 100644
--- a/test/CodeGen/Mips/check-noat.ll
+++ b/test/CodeGen/Mips/check-noat.ll
@@ -2,7 +2,7 @@
 
 define void @f() nounwind readnone {
 entry:
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: .set  noat
 ; CHECK: .set  at
 
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 81925a4953ce..c24c5ac26ae1 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -5,12 +5,12 @@
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; O32:  lw $[[R0:[0-9]+]], %got(i3)
-; O32:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1) 
-; O32:  movn $[[R0]], $[[R1]], ${{[0-9]+}} 
-; N64:  ldr $[[R0:[0-9]+]] 
-; N64:  ld $[[R1:[0-9]+]], %got_disp(i1)
-; N64:  movn $[[R0]], $[[R1]], ${{[0-9]+}} 
+; O32-DAG:  lw $[[R0:[0-9]+]], %got(i3)
+; O32-DAG:  addiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got(i1)
+; O32:      movn $[[R0]], $[[R1]], ${{[0-9]+}}
+; N64-DAG:  ldr $[[R0:[0-9]+]]
+; N64-DAG:  ld $[[R1:[0-9]+]], %got_disp(i1)
+; N64:      movn $[[R0]], $[[R1]], ${{[0-9]+}}
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -22,11 +22,11 @@ entry:
 @c = global i32 1, align 4
 @d = global i32 0, align 4
 
-; O32: cmov2:
+; O32-LABEL: cmov2:
 ; O32: addiu $[[R1:[0-9]+]], ${{[a-z0-9]+}}, %got(d)
 ; O32: addiu $[[R0:[0-9]+]], ${{[a-z0-9]+}}, %got(c)
 ; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
-; N64: cmov2:
+; N64-LABEL: cmov2:
 ; N64: daddiu $[[R1:[0-9]+]], ${{[0-9]+}}, %got_disp(d)
 ; N64: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %got_disp(c)
 ; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
@@ -39,7 +39,7 @@ entry:
   ret i32 %cond
 }
 
-; O32: cmov3:
+; O32-LABEL: cmov3:
 ; O32: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
 ; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
@@ -49,7 +49,7 @@ entry:
   ret i32 %cond
 }
 
-; N64: cmov4:
+; N64-LABEL: cmov4:
 ; N64: xori $[[R0:[0-9]+]], ${{[0-9]+}}, 234
 ; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
@@ -67,7 +67,7 @@ entry:
 ;  (movz t, (setlt a, N + 1), f)
 ; if N + 1 fits in 16-bit.
 
-; O32: slti0:
+; O32-LABEL: slti0:
 ; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
 ; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 
@@ -78,7 +78,7 @@ entry:
   ret i32 %cond
 }
 
-; O32: slti1:
+; O32-LABEL: slti1:
 ; O32: slt ${{[0-9]+}}
 
 define i32 @slti1(i32 %a) {
@@ -88,7 +88,7 @@ entry:
   ret i32 %cond
 }
 
-; O32: slti2:
+; O32-LABEL: slti2:
 ; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
 ; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 
@@ -99,7 +99,7 @@ entry:
   ret i32 %cond
 }
 
-; O32: slti3:
+; O32-LABEL: slti3:
 ; O32: slt ${{[0-9]+}}
 
 define i32 @slti3(i32 %a) {
@@ -111,7 +111,7 @@ entry:
 
 ; 64-bit patterns.
 
-; N64: slti64_0:
+; N64-LABEL: slti64_0:
 ; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
 ; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 
@@ -122,7 +122,7 @@ entry:
   ret i64 %conv
 }
 
-; N64: slti64_1:
+; N64-LABEL: slti64_1:
 ; N64: slt ${{[0-9]+}}
 
 define i64 @slti64_1(i64 %a) {
@@ -132,7 +132,7 @@ entry:
   ret i64 %conv
 }
 
-; N64: slti64_2:
+; N64-LABEL: slti64_2:
 ; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
 ; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 
@@ -143,7 +143,7 @@ entry:
   ret i64 %conv
 }
 
-; N64: slti64_3:
+; N64-LABEL: slti64_3:
 ; N64: slt ${{[0-9]+}}
 
 define i64 @slti64_3(i64 %a) {
@@ -155,7 +155,7 @@ entry:
 
 ; sltiu instructions.
 
-; O32: sltiu0:
+; O32-LABEL: sltiu0:
 ; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
 ; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 
@@ -166,7 +166,7 @@ entry:
   ret i32 %cond
 }
 
-; O32: sltiu1:
+; O32-LABEL: sltiu1:
 ; O32: sltu ${{[0-9]+}}
 
 define i32 @sltiu1(i32 %a) {
@@ -176,7 +176,7 @@ entry:
   ret i32 %cond
 }
 
-; O32: sltiu2:
+; O32-LABEL: sltiu2:
 ; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
 ; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
 
@@ -187,7 +187,7 @@ entry:
   ret i32 %cond
 }
 
-; O32: sltiu3:
+; O32-LABEL: sltiu3:
 ; O32: sltu ${{[0-9]+}}
 
 define i32 @sltiu3(i32 %a) {
diff --git a/test/CodeGen/Mips/cmplarge.ll b/test/CodeGen/Mips/cmplarge.ll
new file mode 100644
index 000000000000..2a3d30a95492
--- /dev/null
+++ b/test/CodeGen/Mips/cmplarge.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=cmp16
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mipsel--linux-gnu"
+
+%struct.StorablePicture = type { i32, i32, i32, i32 }
+
+
+
+define void @getSubImagesLuma(%struct.StorablePicture* nocapture %s) #0 {
+entry:
+  %size_y = getelementptr inbounds %struct.StorablePicture* %s, i32 0, i32 1
+  %0 = load i32* %size_y, align 4
+  %sub = add nsw i32 %0, -1
+  %add5 = add nsw i32 %0, 20
+  %cmp6 = icmp sgt i32 %add5, -20
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %j.07 = phi i32 [ %inc, %for.body ], [ -20, %entry ]
+  %call = tail call i32 bitcast (i32 (...)* @iClip3 to i32 (i32, i32, i32)*)(i32 0, i32 %sub, i32 %j.07) #2
+  %inc = add nsw i32 %j.07, 1
+  %1 = load i32* %size_y, align 4
+  %add = add nsw i32 %1, 20
+  %cmp = icmp slt i32 %inc, %add
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; cmp16: 	.ent	getSubImagesLuma
+; cmp16:	.end	getSubImagesLuma
+declare i32 @iClip3(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll
new file mode 100644
index 000000000000..8c0cbe3396b7
--- /dev/null
+++ b/test/CodeGen/Mips/const-mult.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=CHECK64
+
+; CHECK-LABEL: mul5_32:
+; CHECK: sll $[[R0:[0-9]+]], $4, 2
+; CHECK: addu ${{[0-9]+}}, $[[R0]], $4
+
+define i32 @mul5_32(i32 %a) {
+entry:
+  %mul = mul nsw i32 %a, 5
+  ret i32 %mul
+}
+
+; CHECK-LABEL:     mul27_32:
+; CHECK-DAG: sll $[[R0:[0-9]+]], $4, 2
+; CHECK-DAG: addu $[[R1:[0-9]+]], $[[R0]], $4
+; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5
+; CHECK:     subu ${{[0-9]+}}, $[[R2]], $[[R1]]
+
+define i32 @mul27_32(i32 %a) {
+entry:
+  %mul = mul nsw i32 %a, 27
+  ret i32 %mul
+}
+
+; CHECK-LABEL:     muln2147483643_32:
+; CHECK-DAG: sll $[[R0:[0-9]+]], $4, 2
+; CHECK-DAG: addu $[[R1:[0-9]+]], $[[R0]], $4
+; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31
+; CHECK:     addu ${{[0-9]+}}, $[[R2]], $[[R1]]
+
+define i32 @muln2147483643_32(i32 %a) {
+entry:
+  %mul = mul nsw i32 %a, -2147483643
+  ret i32 %mul
+}
+
+; CHECK64-LABEL:     muln9223372036854775805_64:
+; CHECK64-DAG: dsll $[[R0:[0-9]+]], $4, 1
+; CHECK64-DAG: daddu $[[R1:[0-9]+]], $[[R0]], $4
+; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63
+; CHECK64:     daddu ${{[0-9]+}}, $[[R2]], $[[R1]]
+
+define i64 @muln9223372036854775805_64(i64 %a) {
+entry:
+  %mul = mul nsw i64 %a, -9223372036854775805
+  ret i64 %mul
+}
diff --git a/test/CodeGen/Mips/const1.ll b/test/CodeGen/Mips/const1.ll
new file mode 100644
index 000000000000..cb2bacaf17a5
--- /dev/null
+++ b/test/CodeGen/Mips/const1.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s 
+
+; ModuleID = 'const1.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mipsel-unknown-linux"
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+  store i32 -559023410, i32* @j, align 4
+  store i32 -87105875, i32* @k, align 4
+  store i32 262991277, i32* @l, align 4
+  ret void
+; CHECK: 	lw	${{[0-9]+}}, $CPI0_0
+; CHECK:	lw	${{[0-9]+}}, $CPI0_1
+; CHECK: 	lw	${{[0-9]+}}, $CPI0_2
+; CHECK: $CPI0_0:
+; CHECK:	.4byte	3735943886
+; CHECK: $CPI0_1:
+; CHECK:	.4byte	4207861421
+; CHECK: $CPI0_2:
+; CHECK:	.4byte	262991277
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b754974ec32ab712ea7d8b52cd8037b24e7d6ed3) (gitosis@dmz-portal.mips.com:llvm.git 8e211187b501bc73edb938fde0019c9a20bcffd5)"}
diff --git a/test/CodeGen/Mips/const4a.ll b/test/CodeGen/Mips/const4a.ll
new file mode 100644
index 000000000000..0332327cec69
--- /dev/null
+++ b/test/CodeGen/Mips/const4a.ll
@@ -0,0 +1,180 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
+
+; ModuleID = 'const4.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+  %0 = load i32* @b, align 4
+; no-load-relax	lw	${{[0-9]+}}, $CPI0_1	# 16 bit inst
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.else
+; no-load-relax:	beqz	${{[0-9]+}}, $BB0_3
+; no-load-relax:	lw	${{[0-9]+}}, %call16(foo)(${{[0-9]+}})
+; no-load-relax:	b	$BB0_4
+; no-load-relax:	.align	2
+; no-load-relax: $CPI0_0:
+; no-load-relax:	.4byte	3735943886
+; no-load-relax: $BB0_3:
+; no-load-relax:	lw	${{[0-9]+}}, %call16(goo)(${{[0-9]+}})
+if.then:                                          ; preds = %entry
+  call void bitcast (void (...)* @foo to void ()*)()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  call void bitcast (void (...)* @goo to void ()*)()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  ret void
+}
+
+declare void @foo(...) #1
+
+declare void @goo(...) #1
+
+declare void @hoo(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
diff --git a/test/CodeGen/Mips/const6.ll b/test/CodeGen/Mips/const6.ll
new file mode 100644
index 000000000000..20cdc09f7be1
--- /dev/null
+++ b/test/CodeGen/Mips/const6.ll
@@ -0,0 +1,164 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
+
+; ModuleID = 'const6.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+; load-relax: 	lw	${{[0-9]+}}, $CPI0_0
+; load-relax:	jrc	 $ra
+; load-relax:	.align	2
+; load-relax: $CPI0_0:
+; load-relax:	.4byte	3735943886
+; load-relax:	.end	t
+
+; no-load-relax: lw	${{[0-9]+}}, $CPI0_1	# 16 bit inst
+; no-load-relax:	jalrc 	${{[0-9]+}}
+; no-load-relax:	b	$BB0_2
+; no-load-relax:	.align	2
+; no-load-relax: $CPI0_0:
+; no-load-relax:	.4byte	3735943886
+; no-load-relax: $BB0_2:
+
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  call void bitcast (void (...)* @hoo to void ()*)()
+  ret void
+}
+
+declare void @hoo(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
+
+
diff --git a/test/CodeGen/Mips/const6a.ll b/test/CodeGen/Mips/const6a.ll
new file mode 100644
index 000000000000..8b402accc7de
--- /dev/null
+++ b/test/CodeGen/Mips/const6a.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax1
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
+
+; ModuleID = 'const6a.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @t() #0 {
+entry:
+  store i32 -559023410, i32* @i, align 4
+; load-relax-NOT: 	lw	${{[0-9]+}}, $CPI0_0 # 16 bit inst
+; load-relax1: lw	${{[0-9]+}}, $CPI0_0
+; load-relax:	jrc	 $ra
+; load-relax:	.align	2
+; load-relax: $CPI0_0:
+; load-relax:	.4byte	3735943886
+; load-relax:	.end	t
+  call void asm sideeffect ".space 40000", ""() #1, !srcloc !1
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind }
+
+!1 = metadata !{i32 121}
diff --git a/test/CodeGen/Mips/ctlz.ll b/test/CodeGen/Mips/ctlz.ll
new file mode 100644
index 000000000000..2ddb72755ac8
--- /dev/null
+++ b/test/CodeGen/Mips/ctlz.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static
+
+@x = global i32 28912, align 4
+@y = common global i32 0, align 4
+
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @x, align 4
+  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
+  store i32 %1, i32* @y, align 4
+  ret i32 0
+}
+
+; static: .end main
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) #1
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/Mips/disable-tail-merge.ll b/test/CodeGen/Mips/disable-tail-merge.ll
new file mode 100644
index 000000000000..b4c093aa8528
--- /dev/null
+++ b/test/CodeGen/Mips/disable-tail-merge.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+
+; CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 23
+; CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 23
+
+define i32 @test1(i32 %a) {
+entry:
+  %tobool = icmp eq i32 %a, 0
+  %0 = load i32* @g0, align 4
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* @g0, align 4
+  %1 = load i32* @g1, align 4
+  %add1 = add nsw i32 %1, 23
+  br label %if.end
+
+if.else:
+  %add2 = add nsw i32 %0, 11
+  store i32 %add2, i32* @g0, align 4
+  %2 = load i32* @g1, align 4
+  %add3 = add nsw i32 %2, 23
+  br label %if.end
+
+if.end:
+  %storemerge = phi i32 [ %add3, %if.else ], [ %add1, %if.then ]
+  store i32 %storemerge, i32* @g1, align 4
+  ret i32 %storemerge
+}
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index c470d1ce2ce5..b631c3b279f4 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -1,34 +1,60 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips -verify-machineinstrs < %s |\
+; RUN: FileCheck %s -check-prefix=TRAP
+; RUN: llc -march=mips -mno-check-zero-division < %s |\
+; RUN: FileCheck %s -check-prefix=NOCHECK
+
+; TRAP-LABEL: sdiv1:
+; TRAP: div $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; TRAP: teq $[[R0]], $zero, 7
+; TRAP: mflo
+
+; NOCHECK-LABEL: sdiv1:
+; NOCHECK-NOT: teq
+; NOCHECK: .end sdiv1
+
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
 
-; CHECK: div $zero,
 define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
   %div = sdiv i32 %a0, %a1
   ret i32 %div
 }
 
-; CHECK: div $zero,
+; TRAP-LABEL: srem1:
+; TRAP: div $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; TRAP: teq $[[R0]], $zero, 7
+; TRAP: mfhi
+
 define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
   %rem = srem i32 %a0, %a1
   ret i32 %rem
 }
 
-; CHECK: divu $zero,
+; TRAP-LABEL: udiv1:
+; TRAP: divu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; TRAP: teq $[[R0]], $zero, 7
+; TRAP: mflo
+
 define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
 
-; CHECK: divu $zero,
+; TRAP-LABEL: urem1:
+; TRAP: divu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; TRAP: teq $[[R0]], $zero, 7
+; TRAP: mfhi
+
 define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
 entry:
   %rem = urem i32 %a0, %a1
   ret i32 %rem
 }
 
-; CHECK: div $zero,
+; TRAP: div $zero,
 define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
   %rem = srem i32 %a0, %a1
@@ -37,7 +63,7 @@ entry:
   ret i32 %div
 }
 
-; CHECK: divu $zero,
+; TRAP: divu $zero,
 define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
   %rem = urem i32 %a0, %a1
@@ -45,3 +71,11 @@ entry:
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
+
+define i32 @killFlags() {
+entry:
+  %0 = load i32* @g0, align 4
+  %1 = load i32* @g1, align 4
+  %div = sdiv i32 %0, %1
+  ret i32 %div
+}
diff --git a/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll b/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll
index 9f2f0661f997..a5fe34c1f684 100644
--- a/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll
+++ b/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s
 
-; CHECK: select_v2q15_eq_:
+; CHECK-LABEL: select_v2q15_eq_:
 ; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.ph ${{[0-9]+}}, $6, $7
 
@@ -17,7 +17,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2q15_lt_:
+; CHECK-LABEL: select_v2q15_lt_:
 ; CHECK: cmp.lt.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, $6, $7
 
@@ -34,7 +34,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2q15_le_:
+; CHECK-LABEL: select_v2q15_le_:
 ; CHECK: cmp.le.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, $6, $7
 
@@ -51,7 +51,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2q15_ne_:
+; CHECK-LABEL: select_v2q15_ne_:
 ; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.ph ${{[0-9]+}}, $7, $6
 
@@ -68,7 +68,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2q15_gt_:
+; CHECK-LABEL: select_v2q15_gt_:
 ; CHECK: cmp.le.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, $7, $6
 
@@ -85,7 +85,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2q15_ge_:
+; CHECK-LABEL: select_v2q15_ge_:
 ; CHECK: cmp.lt.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, $7, $6
 
@@ -102,7 +102,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4ui8_eq_:
+; CHECK-LABEL: select_v4ui8_eq_:
 ; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.qb ${{[0-9]+}}, $6, $7
 
@@ -119,7 +119,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4ui8_lt_:
+; CHECK-LABEL: select_v4ui8_lt_:
 ; CHECK: cmpu.lt.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, $6, $7
 
@@ -136,7 +136,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4ui8_le_:
+; CHECK-LABEL: select_v4ui8_le_:
 ; CHECK: cmpu.le.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, $6, $7
 
@@ -153,7 +153,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4ui8_ne_:
+; CHECK-LABEL: select_v4ui8_ne_:
 ; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.qb ${{[0-9]+}}, $7, $6
 
@@ -170,7 +170,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4ui8_gt_:
+; CHECK-LABEL: select_v4ui8_gt_:
 ; CHECK: cmpu.le.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, $7, $6
 
@@ -187,7 +187,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4ui8_ge_:
+; CHECK-LABEL: select_v4ui8_ge_:
 ; CHECK: cmpu.lt.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, $7, $6
 
@@ -204,7 +204,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2ui16_lt_:
+; CHECK-LABEL: select_v2ui16_lt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -221,7 +221,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2ui16_le_:
+; CHECK-LABEL: select_v2ui16_le_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -238,7 +238,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2ui16_gt_:
+; CHECK-LABEL: select_v2ui16_gt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -255,7 +255,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v2ui16_ge_:
+; CHECK-LABEL: select_v2ui16_ge_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -272,7 +272,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4i8_lt_:
+; CHECK-LABEL: select_v4i8_lt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -289,7 +289,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4i8_le_:
+; CHECK-LABEL: select_v4i8_le_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -306,7 +306,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4i8_gt_:
+; CHECK-LABEL: select_v4i8_gt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -323,7 +323,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: select_v4i8_ge_:
+; CHECK-LABEL: select_v4i8_ge_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -340,7 +340,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2q15_eq_:
+; CHECK-LABEL: compare_v2q15_eq_:
 ; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -355,7 +355,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2q15_lt_:
+; CHECK-LABEL: compare_v2q15_lt_:
 ; CHECK: cmp.lt.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -370,7 +370,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2q15_le_:
+; CHECK-LABEL: compare_v2q15_le_:
 ; CHECK: cmp.le.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -385,7 +385,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2q15_ne_:
+; CHECK-LABEL: compare_v2q15_ne_:
 ; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -400,7 +400,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2q15_gt_:
+; CHECK-LABEL: compare_v2q15_gt_:
 ; CHECK: cmp.le.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -415,7 +415,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2q15_ge_:
+; CHECK-LABEL: compare_v2q15_ge_:
 ; CHECK: cmp.lt.ph $4, $5
 ; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -430,7 +430,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4ui8_eq_:
+; CHECK-LABEL: compare_v4ui8_eq_:
 ; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -445,7 +445,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4ui8_lt_:
+; CHECK-LABEL: compare_v4ui8_lt_:
 ; CHECK: cmpu.lt.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -460,7 +460,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4ui8_le_:
+; CHECK-LABEL: compare_v4ui8_le_:
 ; CHECK: cmpu.le.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -475,7 +475,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4ui8_ne_:
+; CHECK-LABEL: compare_v4ui8_ne_:
 ; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -490,7 +490,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4ui8_gt_:
+; CHECK-LABEL: compare_v4ui8_gt_:
 ; CHECK: cmpu.le.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -505,7 +505,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4ui8_ge_:
+; CHECK-LABEL: compare_v4ui8_ge_:
 ; CHECK: cmpu.lt.qb $4, $5
 ; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
 
@@ -520,7 +520,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2ui16_lt_:
+; CHECK-LABEL: compare_v2ui16_lt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -535,7 +535,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2ui16_le_:
+; CHECK-LABEL: compare_v2ui16_le_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -550,7 +550,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2ui16_gt_:
+; CHECK-LABEL: compare_v2ui16_gt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -565,7 +565,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v2ui16_ge_:
+; CHECK-LABEL: compare_v2ui16_ge_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -580,7 +580,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4i8_lt_:
+; CHECK-LABEL: compare_v4i8_lt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -595,7 +595,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4i8_le_:
+; CHECK-LABEL: compare_v4i8_le_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -610,7 +610,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4i8_gt_:
+; CHECK-LABEL: compare_v4i8_gt_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
@@ -625,7 +625,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; CHECK: compare_v4i8_ge_:
+; CHECK-LABEL: compare_v4i8_ge_:
 ; CHECK-NOT: cmp
 ; CHECK-NOT: pick
 
diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll
index eeb7140ca2cb..f5bb3abed90e 100644
--- a/test/CodeGen/Mips/dsp-patterns.ll
+++ b/test/CodeGen/Mips/dsp-patterns.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=R1
 ; RUN: llc -march=mips -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2
 
-; R1: test_lbux:
+; R1-LABEL: test_lbux:
 ; R1: lbux ${{[0-9]+}}
 
 define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
@@ -11,7 +11,7 @@ entry:
   ret i8 %0
 }
 
-; R1: test_lhx:
+; R1-LABEL: test_lhx:
 ; R1: lhx ${{[0-9]+}}
 
 define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
@@ -21,7 +21,7 @@ entry:
   ret i16 %0
 }
 
-; R1: test_lwx:
+; R1-LABEL: test_lwx:
 ; R1: lwx ${{[0-9]+}}
 
 define i32 @test_lwx(i32* nocapture %b, i32 %i) {
@@ -31,7 +31,7 @@ entry:
   ret i32 %0
 }
 
-; R1: test_add_v2q15_:
+; R1-LABEL: test_add_v2q15_:
 ; R1: addq.ph ${{[0-9]+}}
 
 define { i32 } @test_add_v2q15_(i32 %a.coerce, i32 %b.coerce) {
@@ -44,7 +44,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: test_sub_v2q15_:
+; R1-LABEL: test_sub_v2q15_:
 ; R1: subq.ph ${{[0-9]+}}
 
 define { i32 } @test_sub_v2q15_(i32 %a.coerce, i32 %b.coerce) {
@@ -57,11 +57,11 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R2: test_mul_v2q15_:
+; R2-LABEL: test_mul_v2q15_:
 ; R2: mul.ph ${{[0-9]+}}
 
 ; mul.ph is an R2 instruction. Check that multiply node gets expanded.
-; R1: test_mul_v2q15_:
+; R1-LABEL: test_mul_v2q15_:
 ; R1: mul ${{[0-9]+}}
 ; R1: mul ${{[0-9]+}}
 
@@ -75,7 +75,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: test_add_v4i8_:
+; R1-LABEL: test_add_v4i8_:
 ; R1: addu.qb ${{[0-9]+}}
 
 define { i32 } @test_add_v4i8_(i32 %a.coerce, i32 %b.coerce) {
@@ -88,7 +88,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: test_sub_v4i8_:
+; R1-LABEL: test_sub_v4i8_:
 ; R1: subu.qb ${{[0-9]+}}
 
 define { i32 } @test_sub_v4i8_(i32 %a.coerce, i32 %b.coerce) {
@@ -102,7 +102,7 @@ entry:
 }
 
 ; DSP-ASE doesn't have a v4i8 multiply instruction. Check that multiply node gets expanded.
-; R2: test_mul_v4i8_:
+; R2-LABEL: test_mul_v4i8_:
 ; R2: mul ${{[0-9]+}}
 ; R2: mul ${{[0-9]+}}
 ; R2: mul ${{[0-9]+}}
@@ -118,7 +118,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: test_addsc:
+; R1-LABEL: test_addsc:
 ; R1: addsc ${{[0-9]+}}
 ; R1: addwc ${{[0-9]+}}
 
@@ -128,7 +128,7 @@ entry:
   ret i64 %add
 }
 
-; R1: shift1_v2i16_shl_:
+; R1-LABEL: shift1_v2i16_shl_:
 ; R1: shll.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
 
 define { i32 } @shift1_v2i16_shl_(i32 %a0.coerce) {
@@ -140,7 +140,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: shift1_v2i16_sra_:
+; R1-LABEL: shift1_v2i16_sra_:
 ; R1: shra.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
 
 define { i32 } @shift1_v2i16_sra_(i32 %a0.coerce) {
@@ -152,9 +152,9 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: shift1_v2ui16_srl_:
+; R1-LABEL: shift1_v2ui16_srl_:
 ; R1-NOT: shrl.ph
-; R2: shift1_v2ui16_srl_:
+; R2-LABEL: shift1_v2ui16_srl_:
 ; R2: shrl.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
 
 define { i32 } @shift1_v2ui16_srl_(i32 %a0.coerce) {
@@ -166,7 +166,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: shift1_v4i8_shl_:
+; R1-LABEL: shift1_v4i8_shl_:
 ; R1: shll.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
 
 define { i32 } @shift1_v4i8_shl_(i32 %a0.coerce) {
@@ -178,9 +178,9 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: shift1_v4i8_sra_:
+; R1-LABEL: shift1_v4i8_sra_:
 ; R1-NOT: shra.qb
-; R2: shift1_v4i8_sra_:
+; R2-LABEL: shift1_v4i8_sra_:
 ; R2: shra.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
 
 define { i32 } @shift1_v4i8_sra_(i32 %a0.coerce) {
@@ -192,7 +192,7 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
-; R1: shift1_v4ui8_srl_:
+; R1-LABEL: shift1_v4ui8_srl_:
 ; R1: shrl.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
 
 define { i32 } @shift1_v4ui8_srl_(i32 %a0.coerce) {
@@ -206,7 +206,7 @@ entry:
 
 ; Check that shift node is expanded if splat element size is not 16-bit.
 ;
-; R1: test_vector_splat_imm_v2q15:
+; R1-LABEL: test_vector_splat_imm_v2q15:
 ; R1-NOT: shll.ph
 
 define { i32 } @test_vector_splat_imm_v2q15(i32 %a.coerce) {
@@ -220,7 +220,7 @@ entry:
 
 ; Check that shift node is expanded if splat element size is not 8-bit.
 ;
-; R1: test_vector_splat_imm_v4i8:
+; R1-LABEL: test_vector_splat_imm_v4i8:
 ; R1-NOT: shll.qb
 
 define { i32 } @test_vector_splat_imm_v4i8(i32 %a.coerce) {
@@ -234,7 +234,7 @@ entry:
 
 ; Check that shift node is expanded if shift amount doesn't fit in 4-bit sa field.
 ;
-; R1: test_shift_amount_v2q15:
+; R1-LABEL: test_shift_amount_v2q15:
 ; R1-NOT: shll.ph
 
 define { i32 } @test_shift_amount_v2q15(i32 %a.coerce) {
@@ -248,7 +248,7 @@ entry:
 
 ; Check that shift node is expanded if shift amount doesn't fit in 3-bit sa field.
 ;
-; R1: test_shift_amount_v4i8:
+; R1-LABEL: test_shift_amount_v4i8:
 ; R1-NOT: shll.qb
 
 define { i32 } @test_shift_amount_v4i8(i32 %a.coerce) {
diff --git a/test/CodeGen/Mips/dsp-vec-load-store.ll b/test/CodeGen/Mips/dsp-vec-load-store.ll
new file mode 100644
index 000000000000..7e4a8fedaa8c
--- /dev/null
+++ b/test/CodeGen/Mips/dsp-vec-load-store.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=mipsel -mattr=+dsp < %s
+
+@g1 = common global <2 x i8> zeroinitializer, align 2
+@g0 = common global <2 x i8> zeroinitializer, align 2
+
+define void @extend_load_trunc_store_v2i8() {
+entry:
+  %0 = load <2 x i8>* @g1, align 2
+  store <2 x i8> %0, <2 x i8>* @g0, align 2
+  ret void
+}
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
index 373a9a114453..32fc5e61899a 100644
--- a/test/CodeGen/Mips/eh-return64.ll
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -52,7 +52,9 @@ entry:
   unreachable
 
 ; CHECK:        f2
+; CHECK:        .cfi_startproc
 ; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
+; CHECK:        .cfi_def_cfa_offset [[spoffset]]
 
 ; check that $a0-$a3 are saved on stack.
 ; CHECK:        sd      $4, [[offset0:[0-9]+]]($sp)
@@ -61,10 +63,10 @@ entry:
 ; CHECK:        sd      $7, [[offset3:[0-9]+]]($sp)
 
 ; check that .cfi_offset directives are emitted for $a0-$a3.
-; CHECK:        .cfi_offset 4,
-; CHECK:        .cfi_offset 5,
-; CHECK:        .cfi_offset 6,
-; CHECK:        .cfi_offset 7,
+; CHECK:        .cfi_offset 4, -8
+; CHECK:        .cfi_offset 5, -16
+; CHECK:        .cfi_offset 6, -24
+; CHECK:        .cfi_offset 7, -32
 
 ; check that stack adjustment and handler are put in $v1 and $v0.
 ; CHECK:        move    $3, $4
@@ -83,5 +85,5 @@ entry:
 ; CHECK:        move    $ra, $2
 ; CHECK:        jr      $ra
 ; CHECK:        daddu   $sp, $sp, $3
-
+; CHECK:        .cfi_endproc
 }
diff --git a/test/CodeGen/Mips/emit-big-cst.ll b/test/CodeGen/Mips/emit-big-cst.ll
new file mode 100644
index 000000000000..a168743859a3
--- /dev/null
+++ b/test/CodeGen/Mips/emit-big-cst.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+; Check assembly printing of odd constants.
+
+; CHECK: bigCst:
+; CHECK-NEXT: .8byte 1845068520838224192
+; CHECK-NEXT: .8byte 11776
+; CHECK-NEXT: .size bigCst, 16
+
+@bigCst = internal constant i82 483673642326615442599424
+
+define void @accessBig(i64* %storage) {
+  %addr = bitcast i64* %storage to i82*
+  %bigLoadedCst = load volatile i82* @bigCst
+  %tmp = add i82 %bigLoadedCst, 1
+  store i82 %tmp, i82* %addr
+  ret void
+}
diff --git a/test/CodeGen/Mips/ex2.ll b/test/CodeGen/Mips/ex2.ll
index 67d19e4b84ca..c5535e7661a7 100644
--- a/test/CodeGen/Mips/ex2.ll
+++ b/test/CodeGen/Mips/ex2.ll
@@ -4,12 +4,15 @@
 @_ZTIPKc = external constant i8*
 
 define i32 @main() {
-; 16: main:
+; 16-LABEL: main:
 ; 16: 	.cfi_startproc
-; 16: 	save	$ra, $s0, $s1, 32
-; 16:   .cfi_offset 17, -8
-; 16: 	.cfi_offset 16, -12
+; 16: 	save	$ra, $s0, $s1, $s2, 40
+; 16:   .cfi_def_cfa_offset 40
+; 16:   .cfi_offset 18, -8
+; 16:   .cfi_offset 17, -12
+; 16: 	.cfi_offset 16, -16
 ; 16: 	.cfi_offset 31, -4
+; 16:   .cfi_endproc
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index a164f7047b5c..efaeeea96a5e 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,8 +1,10 @@
-; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc  < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=16
 
 define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
 entry:
-; CHECK: ext ${{[0-9]+}}, $4, 5, 9
+; 32R2: ext ${{[0-9]+}}, $4, 5, 9
+; 16-NOT: ext ${{[0-9]+}}
   %shr = lshr i32 %s, 5
   %and = and i32 %shr, 511
   ret i32 %and
@@ -10,7 +12,8 @@ entry:
 
 define void @ins2_5_9(i32 %s, i32* nocapture %d) nounwind {
 entry:
-; CHECK: ins ${{[0-9]+}}, $4, 5, 9
+; 32R2: ins ${{[0-9]+}}, $4, 5, 9
+; 16-NOT: ins ${{[0-9]+}}
   %and = shl i32 %s, 5
   %shl = and i32 %and, 16352
   %tmp3 = load i32* %d, align 4
diff --git a/test/CodeGen/Mips/f16abs.ll b/test/CodeGen/Mips/f16abs.ll
new file mode 100644
index 000000000000..928914f067dd
--- /dev/null
+++ b/test/CodeGen/Mips/f16abs.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static
+
+@y = global double -1.450000e+00, align 8
+@x = common global double 0.000000e+00, align 8
+
+@y1 = common global float 0.000000e+00, align 4
+@x1 = common global float 0.000000e+00, align 4
+
+
+
+; Function Attrs: nounwind optsize
+define i32 @main() #0 {
+entry:
+  %0 = load double* @y, align 8
+  %call = tail call double @fabs(double %0) #2
+  store double %call, double* @x, align 8
+; static-NOT: 	.ent	__call_stub_fp_fabs
+; static-NOT: 	jal fabs
+  %1 = load float* @y1, align 4
+  %call2 = tail call float @fabsf(float %1) #2
+  store float %call2, float* @x1, align 4
+; static-NOT: 	.ent	__call_stub_fp_fabsf
+; static-NOT: 	jal fabsf
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize readnone
+declare double @fabs(double) #1
+
+declare float @fabsf(float) #1
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind optsize readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #2 = { nounwind optsize readnone }
+
+
+
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
index b36473d6f57a..9f88d0c956b1 100644
--- a/test/CodeGen/Mips/fcopysign-f32-f64.ll
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -7,14 +7,15 @@ declare float @copysignf(float, float) nounwind readnone
 
 define float @func2(float %d, double %f) nounwind readnone {
 entry:
-; 64: func2
-; 64: lui  $[[T0:[0-9]+]], 32767
-; 64: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
-; 64: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 64: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 63
-; 64: sll  $[[SLL:[0-9]+]], ${{[0-9]+}}, 31
-; 64: or   $[[OR:[0-9]+]], $[[AND0]], $[[SLL]]
-; 64: mtc1 $[[OR]], $f0
+; 64:     func2
+; 64-DAG: lui  $[[T0:[0-9]+]], 32767
+; 64-DAG: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 64-DAG: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64-DAG: dsrl $[[DSRL:[0-9]+]], ${{[0-9]+}}, 63
+; 64-DAG: sll  $[[SLL0:[0-9]+]], $[[DSRL]], 0
+; 64-DAG: sll  $[[SLL1:[0-9]+]], $[[SLL0]], 31
+; 64:     or   $[[OR:[0-9]+]], $[[AND0]], $[[SLL1]]
+; 64:     mtc1 $[[OR]], $f0
 
 ; 64R2: dext ${{[0-9]+}}, ${{[0-9]+}}, 63, 1
 ; 64R2: ins  $[[INS:[0-9]+]], ${{[0-9]+}}, 31, 1
@@ -29,14 +30,16 @@ entry:
 define double @func3(double %d, float %f) nounwind readnone {
 entry:
 
-; 64: daddiu $[[T0:[0-9]+]], $zero, 1
-; 64: dsll   $[[T1:[0-9]+]], $[[T0]], 63
-; 64: daddiu $[[MSK0:[0-9]+]], $[[T1]], -1
-; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 64: srl    ${{[0-9]+}}, ${{[0-9]+}}, 31
-; 64: dsll   $[[DSLL:[0-9]+]], ${{[0-9]+}}, 63
-; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[DSLL]]
-; 64: dmtc1  $[[OR]], $f0
+; 64:     func3
+; 64-DAG: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64-DAG: dsll   $[[T1:[0-9]+]], $[[T0]], 63
+; 64-DAG: daddiu $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64-DAG: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64-DAG: srl    $[[SRL:[0-9]+]], ${{[0-9]+}}, 31
+; 64-DAG: sll    $[[SLL:[0-9]+]], $[[SRL]], 0
+; 64-DAG: dsll   $[[DSLL:[0-9]+]], $[[SLL]], 63
+; 64:     or     $[[OR:[0-9]+]], $[[AND0]], $[[DSLL]]
+; 64:     dmtc1  $[[OR]], $f0
 
 ; 64R2: ext   ${{[0-9]+}}, ${{[0-9]+}}, 31, 1
 ; 64R2: dins  $[[INS:[0-9]+]], ${{[0-9]+}}, 63, 1
diff --git a/test/CodeGen/Mips/fixdfsf.ll b/test/CodeGen/Mips/fixdfsf.ll
new file mode 100644
index 000000000000..b08eefd71235
--- /dev/null
+++ b/test/CodeGen/Mips/fixdfsf.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
+
+@x = common global double 0.000000e+00, align 8
+@y = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define void @foo()  {
+entry:
+  %0 = load double* @x, align 8
+  %conv = fptoui double %0 to i32
+  store i32 %conv, i32* @y, align 4
+; pic1:	lw	${{[0-9]+}}, %call16(__fixunsdfsi)(${{[0-9]+}})
+; pic2:	lw	${{[0-9]+}}, %got(__mips16_call_stub_2)(${{[0-9]+}})
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/fp16instrinsmc.ll b/test/CodeGen/Mips/fp16instrinsmc.ll
new file mode 100644
index 000000000000..bb43d2711c26
--- /dev/null
+++ b/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -0,0 +1,391 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask 
+
+@x = global float 1.500000e+00, align 4
+@xn = global float -1.900000e+01, align 4
+@negone = global float -1.000000e+00, align 4
+@one = global float 1.000000e+00, align 4
+@xd = global double 0x40048B0A8EA4481E, align 8
+@xdn = global double 0xC0311F9ADD373963, align 8
+@negoned = global double -1.000000e+00, align 8
+@oned = global float 1.000000e+00, align 4
+@y = common global float 0.000000e+00, align 4
+@yd = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @foo1() #0 {
+; fmask: .ent foo1
+; fmask: .set	noreorder
+; fmask: .set	nomacro
+; fmask: .set	noat
+; fmask: .set	at
+; fmask: .set	macro
+; fmask: .set	reorder
+; fmask: .end	foo1
+entry:
+  %0 = load float* @x, align 4
+  %1 = load float* @one, align 4
+  %call = call float @copysignf(float %0, float %1) #2
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @copysignf(float, float) #1
+
+; Function Attrs: nounwind
+define void @foo2() #0 {
+; fmask:	.ent	foo2
+; fmask:	save	{{.*}}
+; fmask:	.end	foo2
+entry:
+  %0 = load float* @x, align 4
+  %1 = load float* @negone, align 4
+  %call = call float @copysignf(float %0, float %1) #2
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @foo3() #0 {
+entry:
+; fmask: .ent foo3
+; fmask: .set	noreorder
+; fmask: .set	nomacro
+; fmask: .set	noat
+; fmask: .set	at
+; fmask: .set	macro
+; fmask: .set	reorder
+; fmask: .end	foo3
+  %0 = load double* @xd, align 8
+  %1 = load float* @oned, align 4
+  %conv = fpext float %1 to double
+  %call = call double @copysign(double %0, double %conv) #2
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @copysign(double, double) #1
+
+; Function Attrs: nounwind
+define void @foo4() #0 {
+entry:
+; fmask:	.ent	foo4
+; fmask:	save	{{.*}}
+; fmask:	.end	foo4
+  %0 = load double* @xd, align 8
+  %1 = load double* @negoned, align 8
+  %call = call double @copysign(double %0, double %1) #2
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @foo5() #0 {
+entry:
+  %0 = load float* @xn, align 4
+  %call = call float @fabsf(float %0) #2
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @fabsf(float) #1
+
+; Function Attrs: nounwind
+define void @foo6() #0 {
+entry:
+  %0 = load double* @xdn, align 8
+  %call = call double @fabs(double %0) #2
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @fabs(double) #1
+
+; Function Attrs: nounwind
+define void @foo7() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @sinf(float %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(sinf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare float @sinf(float) #0
+
+; Function Attrs: nounwind
+define void @foo8() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @sin(double %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(sin)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind
+declare double @sin(double) #0
+
+; Function Attrs: nounwind
+define void @foo9() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @cosf(float %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(cosf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare float @cosf(float) #0
+
+; Function Attrs: nounwind
+define void @foo10() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @cos(double %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(cos)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind
+declare double @cos(double) #0
+
+; Function Attrs: nounwind
+define void @foo11() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @sqrtf(float %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(sqrtf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare float @sqrtf(float) #0
+
+; Function Attrs: nounwind
+define void @foo12() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @sqrt(double %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(sqrt)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind
+declare double @sqrt(double) #0
+
+; Function Attrs: nounwind
+define void @foo13() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @floorf(float %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(floorf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @floorf(float) #1
+
+; Function Attrs: nounwind
+define void @foo14() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @floor(double %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(floor)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @floor(double) #1
+
+; Function Attrs: nounwind
+define void @foo15() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @nearbyintf(float %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(nearbyintf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @nearbyintf(float) #1
+
+; Function Attrs: nounwind
+define void @foo16() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @nearbyint(double %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(nearbyint)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @nearbyint(double) #1
+
+; Function Attrs: nounwind
+define void @foo17() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @ceilf(float %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(ceilf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @ceilf(float) #1
+
+; Function Attrs: nounwind
+define void @foo18() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @ceil(double %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(ceil)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @ceil(double) #1
+
+; Function Attrs: nounwind
+define void @foo19() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @rintf(float %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(rintf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @rintf(float) #1
+
+; Function Attrs: nounwind
+define void @foo20() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @rint(double %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(rint)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @rint(double) #1
+
+; Function Attrs: nounwind
+define void @foo21() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @truncf(float %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(truncf)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @truncf(float) #1
+
+; Function Attrs: nounwind
+define void @foo22() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @trunc(double %0) #2
+;pic:	lw	${{[0-9]+}}, %call16(trunc)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @trunc(double) #1
+
+; Function Attrs: nounwind
+define void @foo23() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @log2f(float %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(log2f)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare float @log2f(float) #0
+
+; Function Attrs: nounwind
+define void @foo24() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @log2(double %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(log2)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind
+declare double @log2(double) #0
+
+; Function Attrs: nounwind
+define void @foo25() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %call = call float @exp2f(float %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(exp2f)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+  store float %call, float* @y, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare float @exp2f(float) #0
+
+; Function Attrs: nounwind
+define void @foo26() #0 {
+entry:
+  %0 = load double* @xd, align 8
+  %call = call double @exp2(double %0) #3
+;pic:	lw	${{[0-9]+}}, %call16(exp2)(${{[0-9]+}})
+;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+  store double %call, double* @yd, align 8
+  ret void
+}
+
+; Function Attrs: nounwind
+declare double @exp2(double) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
diff --git a/test/CodeGen/Mips/fp16mix.ll b/test/CodeGen/Mips/fp16mix.ll
new file mode 100644
index 000000000000..8d85099ba9f2
--- /dev/null
+++ b/test/CodeGen/Mips/fp16mix.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2 
+
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
+
+; Function Attrs: nounwind optsize readnone
+define void @foo1()  {
+entry:
+  ret void
+; fmask1: .ent foo1
+; fmask1: .set	noreorder
+; fmask1: .set	nomacro
+; fmask1: .set	noat
+; fmask1: .set	at
+; fmask1: .set	macro
+; fmask1: .set	reorder
+; fmask1: .end	foo1
+; fmask2: .ent	foo1
+; fmask2: save	{{.*}}
+; fmask2: .end	foo1
+; fmask1nr: .ent foo1
+; fmask1nr: .set	noreorder
+; fmask1nr: .set	nomacro
+; fmask1nr: .set	noat
+; fmask1nr: .set	at
+; fmask1nr: .set	macro
+; fmask1nr: .set	reorder
+; fmask1nr: .end	foo1
+}
+
+; Function Attrs: nounwind optsize readnone
+define void @foo2()  {
+entry:
+  ret void
+; fmask2: .ent foo2
+; fmask2: .set	noreorder
+; fmask2: .set	nomacro
+; fmask2: .set	noat
+; fmask2: .set	at
+; fmask2: .set	macro
+; fmask2: .set	reorder
+; fmask2: .end	foo2
+; fmask1: .ent	foo2
+; fmask1: save	{{.*}}
+; fmask1: .end	foo2
+; fmask1nr: .ent	foo2
+; fmask1nr: save	{{.*}}
+; fmask1nr: .end	foo2
+}
+
+; Function Attrs: nounwind optsize readnone
+define void @foo3()  {
+entry:
+  ret void
+; fmask1: .ent foo3
+; fmask1: .set	noreorder
+; fmask1: .set	nomacro
+; fmask1: .set	noat
+; fmask1: .set	at
+; fmask1: .set	macro
+; fmask1: .set	reorder
+; fmask1: .end	foo3
+; fmask2:  .ent	foo3
+; fmask2:  save	{{.*}}
+; fmask2:  .end	foo3
+; fmask1r:  .ent	foo3
+; fmask1r:  save	{{.*}}
+; fmask1r:  .end	foo3
+}
+
+; Function Attrs: nounwind optsize readnone
+define void @foo4()  {
+entry:
+  ret void
+; fmask2: .ent foo4
+; fmask2: .set	noreorder
+; fmask2: .set	nomacro
+; fmask2: .set	noat
+; fmask2: .set	at
+; fmask2: .set	macro
+; fmask2: .set	reorder
+; fmask2: .end	foo4
+; fmask1: .ent	foo4
+; fmask1: save	{{.*}}
+; fmask1: .end	foo4
+; fmask1nr: .ent	foo4
+; fmask1nr: save	{{.*}}
+; fmask1nr: .end	foo4
+}
+
+
diff --git a/test/CodeGen/Mips/fpneeded.ll b/test/CodeGen/Mips/fpneeded.ll
index 623883a0d5c0..dcdebb92e40e 100644
--- a/test/CodeGen/Mips/fpneeded.ll
+++ b/test/CodeGen/Mips/fpneeded.ll
@@ -131,7 +131,7 @@ entry:
 ; 32:	.set	reorder
 ; 32:	.end	foo3
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 define void @vv() #0 {
 entry:
diff --git a/test/CodeGen/Mips/fpnotneeded.ll b/test/CodeGen/Mips/fpnotneeded.ll
index dc2ec10817f3..b4fab6414223 100644
--- a/test/CodeGen/Mips/fpnotneeded.ll
+++ b/test/CodeGen/Mips/fpnotneeded.ll
@@ -57,7 +57,7 @@ entry:
 ; 32:	restore	{{.+}} 
 ; 32:	.end	foo
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 
 define float @fv() #0 {
diff --git a/test/CodeGen/Mips/fptr2.ll b/test/CodeGen/Mips/fptr2.ll
new file mode 100644
index 000000000000..77028dbde9aa
--- /dev/null
+++ b/test/CodeGen/Mips/fptr2.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=static16
+
+; Function Attrs: nounwind
+define double @my_mul(double %a, double %b) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  %0 = load double* %a.addr, align 8
+  %1 = load double* %b.addr, align 8
+  %mul = fmul double %0, %1
+  ret double %mul
+}
+
+; static16: 	        .ent	__fn_stub_my_mul
+; static16:     	.set reorder
+; static16-NEXT:	#NO_APP
+; static16: 	        .end __fn_stub_my_mul
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
diff --git a/test/CodeGen/Mips/frame-address.ll b/test/CodeGen/Mips/frame-address.ll
index 92946d9ffd68..0ab7da30e785 100644
--- a/test/CodeGen/Mips/frame-address.ll
+++ b/test/CodeGen/Mips/frame-address.ll
@@ -2,11 +2,16 @@
 
 declare i8* @llvm.frameaddress(i32) nounwind readnone
 
-define i8* @f() nounwind {
+define i8* @f() nounwind uwtable {
 entry:
   %0 = call i8* @llvm.frameaddress(i32 0)
   ret i8* %0
 
+; CHECK: .cfi_startproc
+; CHECK: .cfi_def_cfa_offset 8
+; CHECK: .cfi_offset 30, -4
 ; CHECK:   move    $fp, $sp
+; CHECK: .cfi_def_cfa_register 30
 ; CHECK:   move    $2, $fp
+; CHECK: .cfi_endproc
 }
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index 56ee60785f46..058a041c16a9 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -1,11 +1,11 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
 ;
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
-; RUN: llc  -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
 
 
 @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
@@ -25,17 +25,19 @@ entry:
 ; SR32:  .set noreorder
 ; SR32:  .set nomacro
 ; SR32:  .set noat
-; SR:	save 	$ra, $s0, $s1, [[FS:[0-9]+]]
-; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
-; PE: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
-; PE:	sll	$[[T3:[0-9]+]], $[[T1]], 16
+; SR:	save 	$ra, $s0, $s1, $s2, [[FS:[0-9]+]]
+; PE:    .ent main
+; PE:    .align  2
+; PE-NEXT:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
+; PE-NEXT: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
+; PE:	        sll	$[[T3:[0-9]+]], $[[T1]], 16
 ; C1:	lw	${{[0-9]+}}, %got($.str)(${{[0-9]+}})
 ; C2:	lw	${{[0-9]+}}, %call16(printf)(${{[0-9]+}})
 ; C1:	addiu	${{[0-9]+}}, %lo($.str)
 ; C2:	move	$25, ${{[0-9]+}}
 ; C1:	move 	$gp, ${{[0-9]+}}
 ; C1:	jalrc 	${{[0-9]+}}
-; SR:	restore 	$ra, $s0, $s1, [[FS]]
+; SR:	restore 	$ra, $s0, $s1, $s2, [[FS]]
 ; PE:	li	$2, 0
 ; PE:	jrc 	$ra
 
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
new file mode 100644
index 000000000000..461438e8bec0
--- /dev/null
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -0,0 +1,1030 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+
+@x = common global float 0.000000e+00, align 4
+@y = common global float 0.000000e+00, align 4
+@xd = common global double 0.000000e+00, align 8
+@yd = common global double 0.000000e+00, align 8
+@xy = common global { float, float } zeroinitializer, align 4
+@xyd = common global { double, double } zeroinitializer, align 8
+@ret_sf = common global float 0.000000e+00, align 4
+@ret_df = common global double 0.000000e+00, align 8
+@ret_sc = common global { float, float } zeroinitializer, align 4
+@ret_dc = common global { double, double } zeroinitializer, align 8
+@lx = common global float 0.000000e+00, align 4
+@ly = common global float 0.000000e+00, align 4
+@lxd = common global double 0.000000e+00, align 8
+@lyd = common global double 0.000000e+00, align 8
+@lxy = common global { float, float } zeroinitializer, align 4
+@lxyd = common global { double, double } zeroinitializer, align 8
+@lret_sf = common global float 0.000000e+00, align 4
+@lret_df = common global double 0.000000e+00, align 8
+@lret_sc = common global { float, float } zeroinitializer, align 4
+@lret_dc = common global { double, double } zeroinitializer, align 8
+@.str = private unnamed_addr constant [10 x i8] c"%f %f %i\0A\00", align 1
+@.str1 = private unnamed_addr constant [16 x i8] c"%f=%f %f=%f %i\0A\00", align 1
+@.str2 = private unnamed_addr constant [22 x i8] c"%f=%f %f=%f %f=%f %i\0A\00", align 1
+@.str3 = private unnamed_addr constant [18 x i8] c"%f+%fi=%f+%fi %i\0A\00", align 1
+@.str4 = private unnamed_addr constant [24 x i8] c"%f+%fi=%f+%fi %f=%f %i\0A\00", align 1
+
+; Function Attrs: nounwind
+define void @clear() #0 {
+entry:
+  store float 1.000000e+00, float* @x, align 4
+  store float 1.000000e+00, float* @y, align 4
+  store double 1.000000e+00, double* @xd, align 8
+  store double 1.000000e+00, double* @yd, align 8
+  store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+  store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+  store float 1.000000e+00, float* @ret_sf, align 4
+  store double 1.000000e+00, double* @ret_df, align 8
+  store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  store float 0.000000e+00, float* @lx, align 4
+  store float 0.000000e+00, float* @ly, align 4
+  store double 0.000000e+00, double* @lxd, align 8
+  store double 0.000000e+00, double* @lyd, align 8
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 1)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 1)
+  store float 0.000000e+00, float* @lret_sf, align 4
+  store double 0.000000e+00, double* @lret_df, align 8
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  ret void
+}
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @clear()
+  store float 1.500000e+00, float* @lx, align 4
+  %0 = load float* @lx, align 4
+  call void @v_sf(float %0)
+  %1 = load float* @x, align 4
+  %conv = fpext float %1 to double
+  %2 = load float* @lx, align 4
+  %conv1 = fpext float %2 to double
+  %3 = load float* @x, align 4
+  %4 = load float* @lx, align 4
+  %cmp = fcmp oeq float %3, %4
+  %conv2 = zext i1 %cmp to i32
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2)
+  call void @clear()
+  store double 0x41678C29C0000000, double* @lxd, align 8
+  %5 = load double* @lxd, align 8
+  call void @v_df(double %5)
+  %6 = load double* @xd, align 8
+  %7 = load double* @lxd, align 8
+  %8 = load double* @xd, align 8
+  %9 = load double* @lxd, align 8
+  %cmp3 = fcmp oeq double %8, %9
+  %conv4 = zext i1 %cmp3 to i32
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4)
+  call void @clear()
+  store float 9.000000e+00, float* @lx, align 4
+  store float 1.000000e+01, float* @ly, align 4
+  %10 = load float* @lx, align 4
+  %11 = load float* @ly, align 4
+  call void @v_sf_sf(float %10, float %11)
+  %12 = load float* @x, align 4
+  %conv6 = fpext float %12 to double
+  %13 = load float* @lx, align 4
+  %conv7 = fpext float %13 to double
+  %14 = load float* @y, align 4
+  %conv8 = fpext float %14 to double
+  %15 = load float* @ly, align 4
+  %conv9 = fpext float %15 to double
+  %16 = load float* @x, align 4
+  %17 = load float* @lx, align 4
+  %cmp10 = fcmp oeq float %16, %17
+  br i1 %cmp10, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %entry
+  %18 = load float* @y, align 4
+  %19 = load float* @ly, align 4
+  %cmp12 = fcmp oeq float %18, %19
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %20 = phi i1 [ false, %entry ], [ %cmp12, %land.rhs ]
+  %land.ext = zext i1 %20 to i32
+  %call14 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv6, double %conv7, double %conv8, double %conv9, i32 %land.ext)
+  call void @clear()
+  store float 0x3FFE666660000000, float* @lx, align 4
+  store double 0x4007E613249FF279, double* @lyd, align 8
+  %21 = load float* @lx, align 4
+  %22 = load double* @lyd, align 8
+  call void @v_sf_df(float %21, double %22)
+  %23 = load float* @x, align 4
+  %conv15 = fpext float %23 to double
+  %24 = load float* @lx, align 4
+  %conv16 = fpext float %24 to double
+  %25 = load double* @yd, align 8
+  %26 = load double* @lyd, align 8
+  %27 = load float* @x, align 4
+  %28 = load float* @lx, align 4
+  %cmp17 = fcmp oeq float %27, %28
+  %conv18 = zext i1 %cmp17 to i32
+  %29 = load double* @yd, align 8
+  %30 = load double* @lyd, align 8
+  %cmp19 = fcmp oeq double %29, %30
+  %conv20 = zext i1 %cmp19 to i32
+  %and = and i32 %conv18, %conv20
+  %call21 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv15, double %conv16, double %25, double %26, i32 %and)
+  call void @clear()
+  store double 0x4194E54F94000000, double* @lxd, align 8
+  store float 7.600000e+01, float* @ly, align 4
+  %31 = load double* @lxd, align 8
+  %32 = load float* @ly, align 4
+  call void @v_df_sf(double %31, float %32)
+  %33 = load double* @xd, align 8
+  %34 = load double* @lxd, align 8
+  %35 = load float* @y, align 4
+  %conv22 = fpext float %35 to double
+  %36 = load float* @ly, align 4
+  %conv23 = fpext float %36 to double
+  %37 = load double* @xd, align 8
+  %38 = load double* @lxd, align 8
+  %cmp24 = fcmp oeq double %37, %38
+  %conv25 = zext i1 %cmp24 to i32
+  %39 = load float* @y, align 4
+  %40 = load float* @ly, align 4
+  %cmp26 = fcmp oeq float %39, %40
+  %conv27 = zext i1 %cmp26 to i32
+  %and28 = and i32 %conv25, %conv27
+  %call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %33, double %34, double %conv22, double %conv23, i32 %and28)
+  call void @clear()
+  store double 7.365198e+07, double* @lxd, align 8
+  store double 0x416536CD80000000, double* @lyd, align 8
+  %41 = load double* @lxd, align 8
+  %42 = load double* @lyd, align 8
+  call void @v_df_df(double %41, double %42)
+  %43 = load double* @xd, align 8
+  %44 = load double* @lxd, align 8
+  %45 = load double* @yd, align 8
+  %46 = load double* @lyd, align 8
+  %47 = load double* @xd, align 8
+  %48 = load double* @lxd, align 8
+  %cmp30 = fcmp oeq double %47, %48
+  %conv31 = zext i1 %cmp30 to i32
+  %49 = load double* @yd, align 8
+  %50 = load double* @lyd, align 8
+  %cmp32 = fcmp oeq double %49, %50
+  %conv33 = zext i1 %cmp32 to i32
+  %and34 = and i32 %conv31, %conv33
+  %call35 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %43, double %44, double %45, double %46, i32 %and34)
+  call void @clear()
+  store float 0x4016666660000000, float* @ret_sf, align 4
+  %call36 = call float @sf_v()
+  store float %call36, float* @lret_sf, align 4
+  %51 = load float* @ret_sf, align 4
+  %conv37 = fpext float %51 to double
+  %52 = load float* @lret_sf, align 4
+  %conv38 = fpext float %52 to double
+  %53 = load float* @ret_sf, align 4
+  %54 = load float* @lret_sf, align 4
+  %cmp39 = fcmp oeq float %53, %54
+  %conv40 = zext i1 %cmp39 to i32
+  %call41 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40)
+  call void @clear()
+  store float 4.587300e+06, float* @ret_sf, align 4
+  store float 3.420000e+02, float* @lx, align 4
+  %55 = load float* @lx, align 4
+  %call42 = call float @sf_sf(float %55)
+  store float %call42, float* @lret_sf, align 4
+  %56 = load float* @ret_sf, align 4
+  %conv43 = fpext float %56 to double
+  %57 = load float* @lret_sf, align 4
+  %conv44 = fpext float %57 to double
+  %58 = load float* @x, align 4
+  %conv45 = fpext float %58 to double
+  %59 = load float* @lx, align 4
+  %conv46 = fpext float %59 to double
+  %60 = load float* @ret_sf, align 4
+  %61 = load float* @lret_sf, align 4
+  %cmp47 = fcmp oeq float %60, %61
+  %conv48 = zext i1 %cmp47 to i32
+  %62 = load float* @x, align 4
+  %63 = load float* @lx, align 4
+  %cmp49 = fcmp oeq float %62, %63
+  %conv50 = zext i1 %cmp49 to i32
+  %and51 = and i32 %conv48, %conv50
+  %call52 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv43, double %conv44, double %conv45, double %conv46, i32 %and51)
+  call void @clear()
+  store float 4.445910e+06, float* @ret_sf, align 4
+  store double 0x419A7DB294000000, double* @lxd, align 8
+  %64 = load double* @lxd, align 8
+  %call53 = call float @sf_df(double %64)
+  store float %call53, float* @lret_sf, align 4
+  %65 = load float* @ret_sf, align 4
+  %conv54 = fpext float %65 to double
+  %66 = load float* @lret_sf, align 4
+  %conv55 = fpext float %66 to double
+  %67 = load double* @xd, align 8
+  %68 = load double* @lxd, align 8
+  %69 = load float* @ret_sf, align 4
+  %70 = load float* @lret_sf, align 4
+  %cmp56 = fcmp oeq float %69, %70
+  %conv57 = zext i1 %cmp56 to i32
+  %71 = load double* @xd, align 8
+  %72 = load double* @lxd, align 8
+  %cmp58 = fcmp oeq double %71, %72
+  %conv59 = zext i1 %cmp58 to i32
+  %and60 = and i32 %conv57, %conv59
+  %call61 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv54, double %conv55, double %67, double %68, i32 %and60)
+  call void @clear()
+  store float 0x3FFF4BC6A0000000, float* @ret_sf, align 4
+  store float 4.445500e+03, float* @lx, align 4
+  store float 0x4068ACCCC0000000, float* @ly, align 4
+  %73 = load float* @lx, align 4
+  %74 = load float* @ly, align 4
+  %call62 = call float @sf_sf_sf(float %73, float %74)
+  store float %call62, float* @lret_sf, align 4
+  %75 = load float* @ret_sf, align 4
+  %conv63 = fpext float %75 to double
+  %76 = load float* @lret_sf, align 4
+  %conv64 = fpext float %76 to double
+  %77 = load float* @x, align 4
+  %conv65 = fpext float %77 to double
+  %78 = load float* @lx, align 4
+  %conv66 = fpext float %78 to double
+  %79 = load float* @y, align 4
+  %conv67 = fpext float %79 to double
+  %80 = load float* @ly, align 4
+  %conv68 = fpext float %80 to double
+  %81 = load float* @ret_sf, align 4
+  %82 = load float* @lret_sf, align 4
+  %cmp69 = fcmp oeq float %81, %82
+  br i1 %cmp69, label %land.lhs.true, label %land.end76
+
+land.lhs.true:                                    ; preds = %land.end
+  %83 = load float* @x, align 4
+  %84 = load float* @lx, align 4
+  %cmp71 = fcmp oeq float %83, %84
+  br i1 %cmp71, label %land.rhs73, label %land.end76
+
+land.rhs73:                                       ; preds = %land.lhs.true
+  %85 = load float* @y, align 4
+  %86 = load float* @ly, align 4
+  %cmp74 = fcmp oeq float %85, %86
+  br label %land.end76
+
+land.end76:                                       ; preds = %land.rhs73, %land.lhs.true, %land.end
+  %87 = phi i1 [ false, %land.lhs.true ], [ false, %land.end ], [ %cmp74, %land.rhs73 ]
+  %land.ext77 = zext i1 %87 to i32
+  %call78 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv63, double %conv64, double %conv65, double %conv66, double %conv67, double %conv68, i32 %land.ext77)
+  call void @clear()
+  store float 9.991300e+04, float* @ret_sf, align 4
+  store float 1.114500e+04, float* @lx, align 4
+  store double 9.994445e+07, double* @lyd, align 8
+  %88 = load float* @lx, align 4
+  %89 = load double* @lyd, align 8
+  %call79 = call float @sf_sf_df(float %88, double %89)
+  store float %call79, float* @lret_sf, align 4
+  %90 = load float* @ret_sf, align 4
+  %conv80 = fpext float %90 to double
+  %91 = load float* @lret_sf, align 4
+  %conv81 = fpext float %91 to double
+  %92 = load float* @x, align 4
+  %conv82 = fpext float %92 to double
+  %93 = load float* @lx, align 4
+  %conv83 = fpext float %93 to double
+  %94 = load double* @yd, align 8
+  %95 = load double* @lyd, align 8
+  %96 = load float* @ret_sf, align 4
+  %97 = load float* @lret_sf, align 4
+  %cmp84 = fcmp oeq float %96, %97
+  br i1 %cmp84, label %land.lhs.true86, label %land.end92
+
+land.lhs.true86:                                  ; preds = %land.end76
+  %98 = load float* @x, align 4
+  %99 = load float* @lx, align 4
+  %cmp87 = fcmp oeq float %98, %99
+  br i1 %cmp87, label %land.rhs89, label %land.end92
+
+land.rhs89:                                       ; preds = %land.lhs.true86
+  %100 = load double* @yd, align 8
+  %101 = load double* @lyd, align 8
+  %cmp90 = fcmp oeq double %100, %101
+  br label %land.end92
+
+land.end92:                                       ; preds = %land.rhs89, %land.lhs.true86, %land.end76
+  %102 = phi i1 [ false, %land.lhs.true86 ], [ false, %land.end76 ], [ %cmp90, %land.rhs89 ]
+  %land.ext93 = zext i1 %102 to i32
+  %call94 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv80, double %conv81, double %conv82, double %conv83, double %94, double %95, i32 %land.ext93)
+  call void @clear()
+  store float 0x417CCC7A00000000, float* @ret_sf, align 4
+  store double 0x4172034530000000, double* @lxd, align 8
+  store float 4.456200e+04, float* @ly, align 4
+  %103 = load double* @lxd, align 8
+  %104 = load float* @ly, align 4
+  %call95 = call float @sf_df_sf(double %103, float %104)
+  store float %call95, float* @lret_sf, align 4
+  %105 = load float* @ret_sf, align 4
+  %conv96 = fpext float %105 to double
+  %106 = load float* @lret_sf, align 4
+  %conv97 = fpext float %106 to double
+  %107 = load double* @xd, align 8
+  %108 = load double* @lxd, align 8
+  %109 = load float* @y, align 4
+  %conv98 = fpext float %109 to double
+  %110 = load float* @ly, align 4
+  %conv99 = fpext float %110 to double
+  %111 = load float* @ret_sf, align 4
+  %112 = load float* @lret_sf, align 4
+  %cmp100 = fcmp oeq float %111, %112
+  br i1 %cmp100, label %land.lhs.true102, label %land.end108
+
+land.lhs.true102:                                 ; preds = %land.end92
+  %113 = load double* @xd, align 8
+  %114 = load double* @lxd, align 8
+  %cmp103 = fcmp oeq double %113, %114
+  br i1 %cmp103, label %land.rhs105, label %land.end108
+
+land.rhs105:                                      ; preds = %land.lhs.true102
+  %115 = load float* @y, align 4
+  %116 = load float* @ly, align 4
+  %cmp106 = fcmp oeq float %115, %116
+  br label %land.end108
+
+land.end108:                                      ; preds = %land.rhs105, %land.lhs.true102, %land.end92
+  %117 = phi i1 [ false, %land.lhs.true102 ], [ false, %land.end92 ], [ %cmp106, %land.rhs105 ]
+  %land.ext109 = zext i1 %117 to i32
+  %call110 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv96, double %conv97, double %107, double %108, double %conv98, double %conv99, i32 %land.ext109)
+  call void @clear()
+  store float 3.987721e+06, float* @ret_sf, align 4
+  store double 0x3FF1F49F6DDDC2D8, double* @lxd, align 8
+  store double 0x409129F306A2B170, double* @lyd, align 8
+  %118 = load double* @lxd, align 8
+  %119 = load double* @lyd, align 8
+  %call111 = call float @sf_df_df(double %118, double %119)
+  store float %call111, float* @lret_sf, align 4
+  %120 = load float* @ret_sf, align 4
+  %conv112 = fpext float %120 to double
+  %121 = load float* @lret_sf, align 4
+  %conv113 = fpext float %121 to double
+  %122 = load double* @xd, align 8
+  %123 = load double* @lxd, align 8
+  %124 = load double* @yd, align 8
+  %125 = load double* @lyd, align 8
+  %126 = load float* @ret_sf, align 4
+  %127 = load float* @lret_sf, align 4
+  %cmp114 = fcmp oeq float %126, %127
+  br i1 %cmp114, label %land.lhs.true116, label %land.end122
+
+land.lhs.true116:                                 ; preds = %land.end108
+  %128 = load double* @xd, align 8
+  %129 = load double* @lxd, align 8
+  %cmp117 = fcmp oeq double %128, %129
+  br i1 %cmp117, label %land.rhs119, label %land.end122
+
+land.rhs119:                                      ; preds = %land.lhs.true116
+  %130 = load double* @yd, align 8
+  %131 = load double* @lyd, align 8
+  %cmp120 = fcmp oeq double %130, %131
+  br label %land.end122
+
+land.end122:                                      ; preds = %land.rhs119, %land.lhs.true116, %land.end108
+  %132 = phi i1 [ false, %land.lhs.true116 ], [ false, %land.end108 ], [ %cmp120, %land.rhs119 ]
+  %land.ext123 = zext i1 %132 to i32
+  %call124 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv112, double %conv113, double %122, double %123, double %124, double %125, i32 %land.ext123)
+  call void @clear()
+  store double 1.561234e+01, double* @ret_df, align 8
+  %call125 = call double @df_v()
+  store double %call125, double* @lret_df, align 8
+  %133 = load double* @ret_df, align 8
+  %134 = load double* @lret_df, align 8
+  %135 = load double* @ret_df, align 8
+  %136 = load double* @lret_df, align 8
+  %cmp126 = fcmp oeq double %135, %136
+  %conv127 = zext i1 %cmp126 to i32
+  %call128 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127)
+  call void @clear()
+  store double 1.345873e+01, double* @ret_df, align 8
+  store float 3.434520e+05, float* @lx, align 4
+  %137 = load float* @lx, align 4
+  %call129 = call double @df_sf(float %137)
+  store double %call129, double* @lret_df, align 8
+  %138 = load double* @ret_df, align 8
+  %139 = load double* @lret_df, align 8
+  %140 = load float* @x, align 4
+  %conv130 = fpext float %140 to double
+  %141 = load float* @lx, align 4
+  %conv131 = fpext float %141 to double
+  %142 = load double* @ret_df, align 8
+  %143 = load double* @lret_df, align 8
+  %cmp132 = fcmp oeq double %142, %143
+  %conv133 = zext i1 %cmp132 to i32
+  %144 = load float* @x, align 4
+  %145 = load float* @lx, align 4
+  %cmp134 = fcmp oeq float %144, %145
+  %conv135 = zext i1 %cmp134 to i32
+  %and136 = and i32 %conv133, %conv135
+  %call137 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %138, double %139, double %conv130, double %conv131, i32 %and136)
+  call void @clear()
+  store double 0x4084F3AB7AA25D8D, double* @ret_df, align 8
+  store double 0x4114F671D2F1A9FC, double* @lxd, align 8
+  %146 = load double* @lxd, align 8
+  %call138 = call double @df_df(double %146)
+  store double %call138, double* @lret_df, align 8
+  %147 = load double* @ret_df, align 8
+  %148 = load double* @lret_df, align 8
+  %149 = load double* @xd, align 8
+  %150 = load double* @lxd, align 8
+  %151 = load double* @ret_df, align 8
+  %152 = load double* @lret_df, align 8
+  %cmp139 = fcmp oeq double %151, %152
+  %conv140 = zext i1 %cmp139 to i32
+  %153 = load double* @xd, align 8
+  %154 = load double* @lxd, align 8
+  %cmp141 = fcmp oeq double %153, %154
+  %conv142 = zext i1 %cmp141 to i32
+  %and143 = and i32 %conv140, %conv142
+  %call144 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %147, double %148, double %149, double %150, i32 %and143)
+  call void @clear()
+  store double 6.781956e+03, double* @ret_df, align 8
+  store float 4.445500e+03, float* @lx, align 4
+  store float 0x4068ACCCC0000000, float* @ly, align 4
+  %155 = load float* @lx, align 4
+  %156 = load float* @ly, align 4
+  %call145 = call double @df_sf_sf(float %155, float %156)
+  store double %call145, double* @lret_df, align 8
+  %157 = load double* @ret_df, align 8
+  %158 = load double* @lret_df, align 8
+  %159 = load float* @x, align 4
+  %conv146 = fpext float %159 to double
+  %160 = load float* @lx, align 4
+  %conv147 = fpext float %160 to double
+  %161 = load float* @y, align 4
+  %conv148 = fpext float %161 to double
+  %162 = load float* @ly, align 4
+  %conv149 = fpext float %162 to double
+  %163 = load double* @ret_df, align 8
+  %164 = load double* @lret_df, align 8
+  %cmp150 = fcmp oeq double %163, %164
+  br i1 %cmp150, label %land.lhs.true152, label %land.end158
+
+land.lhs.true152:                                 ; preds = %land.end122
+  %165 = load float* @x, align 4
+  %166 = load float* @lx, align 4
+  %cmp153 = fcmp oeq float %165, %166
+  br i1 %cmp153, label %land.rhs155, label %land.end158
+
+land.rhs155:                                      ; preds = %land.lhs.true152
+  %167 = load float* @y, align 4
+  %168 = load float* @ly, align 4
+  %cmp156 = fcmp oeq float %167, %168
+  br label %land.end158
+
+land.end158:                                      ; preds = %land.rhs155, %land.lhs.true152, %land.end122
+  %169 = phi i1 [ false, %land.lhs.true152 ], [ false, %land.end122 ], [ %cmp156, %land.rhs155 ]
+  %land.ext159 = zext i1 %169 to i32
+  %call160 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %157, double %158, double %conv146, double %conv147, double %conv148, double %conv149, i32 %land.ext159)
+  call void @clear()
+  store double 1.889130e+05, double* @ret_df, align 8
+  store float 9.111450e+05, float* @lx, align 4
+  store double 0x4185320A58000000, double* @lyd, align 8
+  %170 = load float* @lx, align 4
+  %171 = load double* @lyd, align 8
+  %call161 = call double @df_sf_df(float %170, double %171)
+  store double %call161, double* @lret_df, align 8
+  %172 = load double* @ret_df, align 8
+  %173 = load double* @lret_df, align 8
+  %174 = load float* @x, align 4
+  %conv162 = fpext float %174 to double
+  %175 = load float* @lx, align 4
+  %conv163 = fpext float %175 to double
+  %176 = load double* @yd, align 8
+  %177 = load double* @lyd, align 8
+  %178 = load double* @ret_df, align 8
+  %179 = load double* @lret_df, align 8
+  %cmp164 = fcmp oeq double %178, %179
+  br i1 %cmp164, label %land.lhs.true166, label %land.end172
+
+land.lhs.true166:                                 ; preds = %land.end158
+  %180 = load float* @x, align 4
+  %181 = load float* @lx, align 4
+  %cmp167 = fcmp oeq float %180, %181
+  br i1 %cmp167, label %land.rhs169, label %land.end172
+
+land.rhs169:                                      ; preds = %land.lhs.true166
+  %182 = load double* @yd, align 8
+  %183 = load double* @lyd, align 8
+  %cmp170 = fcmp oeq double %182, %183
+  br label %land.end172
+
+land.end172:                                      ; preds = %land.rhs169, %land.lhs.true166, %land.end158
+  %184 = phi i1 [ false, %land.lhs.true166 ], [ false, %land.end158 ], [ %cmp170, %land.rhs169 ]
+  %land.ext173 = zext i1 %184 to i32
+  %call174 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %172, double %173, double %conv162, double %conv163, double %176, double %177, i32 %land.ext173)
+  call void @clear()
+  store double 0x418B2DB900000000, double* @ret_df, align 8
+  store double 0x41B1EF2ED3000000, double* @lxd, align 8
+  store float 1.244562e+06, float* @ly, align 4
+  %185 = load double* @lxd, align 8
+  %186 = load float* @ly, align 4
+  %call175 = call double @df_df_sf(double %185, float %186)
+  store double %call175, double* @lret_df, align 8
+  %187 = load double* @ret_df, align 8
+  %188 = load double* @lret_df, align 8
+  %189 = load double* @xd, align 8
+  %190 = load double* @lxd, align 8
+  %191 = load float* @y, align 4
+  %conv176 = fpext float %191 to double
+  %192 = load float* @ly, align 4
+  %conv177 = fpext float %192 to double
+  %193 = load double* @ret_df, align 8
+  %194 = load double* @lret_df, align 8
+  %cmp178 = fcmp oeq double %193, %194
+  br i1 %cmp178, label %land.lhs.true180, label %land.end186
+
+land.lhs.true180:                                 ; preds = %land.end172
+  %195 = load double* @xd, align 8
+  %196 = load double* @lxd, align 8
+  %cmp181 = fcmp oeq double %195, %196
+  br i1 %cmp181, label %land.rhs183, label %land.end186
+
+land.rhs183:                                      ; preds = %land.lhs.true180
+  %197 = load float* @y, align 4
+  %198 = load float* @ly, align 4
+  %cmp184 = fcmp oeq float %197, %198
+  br label %land.end186
+
+land.end186:                                      ; preds = %land.rhs183, %land.lhs.true180, %land.end172
+  %199 = phi i1 [ false, %land.lhs.true180 ], [ false, %land.end172 ], [ %cmp184, %land.rhs183 ]
+  %land.ext187 = zext i1 %199 to i32
+  %call188 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %187, double %188, double %189, double %190, double %conv176, double %conv177, i32 %land.ext187)
+  call void @clear()
+  store double 3.987721e+06, double* @ret_df, align 8
+  store double 5.223560e+00, double* @lxd, align 8
+  store double 0x40B7D37CC1A8AC5C, double* @lyd, align 8
+  %200 = load double* @lxd, align 8
+  %201 = load double* @lyd, align 8
+  %call189 = call double @df_df_df(double %200, double %201)
+  store double %call189, double* @lret_df, align 8
+  %202 = load double* @ret_df, align 8
+  %203 = load double* @lret_df, align 8
+  %204 = load double* @xd, align 8
+  %205 = load double* @lxd, align 8
+  %206 = load double* @yd, align 8
+  %207 = load double* @lyd, align 8
+  %208 = load double* @ret_df, align 8
+  %209 = load double* @lret_df, align 8
+  %cmp190 = fcmp oeq double %208, %209
+  br i1 %cmp190, label %land.lhs.true192, label %land.end198
+
+land.lhs.true192:                                 ; preds = %land.end186
+  %210 = load double* @xd, align 8
+  %211 = load double* @lxd, align 8
+  %cmp193 = fcmp oeq double %210, %211
+  br i1 %cmp193, label %land.rhs195, label %land.end198
+
+land.rhs195:                                      ; preds = %land.lhs.true192
+  %212 = load double* @yd, align 8
+  %213 = load double* @lyd, align 8
+  %cmp196 = fcmp oeq double %212, %213
+  br label %land.end198
+
+land.end198:                                      ; preds = %land.rhs195, %land.lhs.true192, %land.end186
+  %214 = phi i1 [ false, %land.lhs.true192 ], [ false, %land.end186 ], [ %cmp196, %land.rhs195 ]
+  %land.ext199 = zext i1 %214 to i32
+  %call200 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %202, double %203, double %204, double %205, double %206, double %207, i32 %land.ext199)
+  call void @clear()
+  store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %call201 = call { float, float } @sc_v()
+  %215 = extractvalue { float, float } %call201, 0
+  %216 = extractvalue { float, float } %call201, 1
+  store float %215, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  store float %216, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %ret_sc.real = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv202 = fpext float %ret_sc.real to double
+  %conv203 = fpext float %ret_sc.imag to double
+  %ret_sc.real204 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag205 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv206 = fpext float %ret_sc.real204 to double
+  %conv207 = fpext float %ret_sc.imag205 to double
+  %lret_sc.real = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv208 = fpext float %lret_sc.real to double
+  %conv209 = fpext float %lret_sc.imag to double
+  %lret_sc.real210 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag211 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv212 = fpext float %lret_sc.real210 to double
+  %conv213 = fpext float %lret_sc.imag211 to double
+  %ret_sc.real214 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag215 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %lret_sc.real216 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag217 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %cmp.r = fcmp oeq float %ret_sc.real214, %lret_sc.real216
+  %cmp.i = fcmp oeq float %ret_sc.imag215, %lret_sc.imag217
+  %and.ri = and i1 %cmp.r, %cmp.i
+  %conv218 = zext i1 %and.ri to i32
+  %call219 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %conv202, double %conv207, double %conv208, double %conv213, i32 %conv218)
+  call void @clear()
+  store float 0x3FF7A99300000000, float* @lx, align 4
+  store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %217 = load float* @lx, align 4
+  %call220 = call { float, float } @sc_sf(float %217)
+  %218 = extractvalue { float, float } %call220, 0
+  %219 = extractvalue { float, float } %call220, 1
+  store float %218, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  store float %219, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %ret_sc.real221 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag222 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv223 = fpext float %ret_sc.real221 to double
+  %conv224 = fpext float %ret_sc.imag222 to double
+  %ret_sc.real225 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag226 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv227 = fpext float %ret_sc.real225 to double
+  %conv228 = fpext float %ret_sc.imag226 to double
+  %lret_sc.real229 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag230 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv231 = fpext float %lret_sc.real229 to double
+  %conv232 = fpext float %lret_sc.imag230 to double
+  %lret_sc.real233 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag234 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv235 = fpext float %lret_sc.real233 to double
+  %conv236 = fpext float %lret_sc.imag234 to double
+  %220 = load float* @x, align 4
+  %conv237 = fpext float %220 to double
+  %221 = load float* @lx, align 4
+  %conv238 = fpext float %221 to double
+  %ret_sc.real239 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag240 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %lret_sc.real241 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag242 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %cmp.r243 = fcmp oeq float %ret_sc.real239, %lret_sc.real241
+  %cmp.i244 = fcmp oeq float %ret_sc.imag240, %lret_sc.imag242
+  %and.ri245 = and i1 %cmp.r243, %cmp.i244
+  br i1 %and.ri245, label %land.rhs247, label %land.end250
+
+land.rhs247:                                      ; preds = %land.end198
+  %222 = load float* @x, align 4
+  %223 = load float* @lx, align 4
+  %cmp248 = fcmp oeq float %222, %223
+  br label %land.end250
+
+land.end250:                                      ; preds = %land.rhs247, %land.end198
+  %224 = phi i1 [ false, %land.end198 ], [ %cmp248, %land.rhs247 ]
+  %land.ext251 = zext i1 %224 to i32
+  %call252 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %conv223, double %conv228, double %conv231, double %conv236, double %conv237, double %conv238, i32 %land.ext251)
+  call void @clear()
+  store double 1.234500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  store double 7.677000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %call253 = call { double, double } @dc_v()
+  %225 = extractvalue { double, double } %call253, 0
+  %226 = extractvalue { double, double } %call253, 1
+  store double %225, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  store double %226, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %ret_dc.real254 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag255 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %lret_dc.real256 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag257 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real258 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag259 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real260 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag261 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %cmp.r262 = fcmp oeq double %ret_dc.real258, %lret_dc.real260
+  %cmp.i263 = fcmp oeq double %ret_dc.imag259, %lret_dc.imag261
+  %and.ri264 = and i1 %cmp.r262, %cmp.i263
+  %conv265 = zext i1 %and.ri264 to i32
+  %call266 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %ret_dc.real, double %ret_dc.imag255, double %lret_dc.real, double %lret_dc.imag257, i32 %conv265)
+  call void @clear()
+  store double 0x40AAF6F532617C1C, double* @lxd, align 8
+  store double 4.444500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  store double 7.888000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %227 = load float* @lx, align 4
+  %call267 = call { double, double } @dc_sf(float %227)
+  %228 = extractvalue { double, double } %call267, 0
+  %229 = extractvalue { double, double } %call267, 1
+  store double %228, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  store double %229, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real268 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag269 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %ret_dc.real270 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag271 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real272 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag273 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %lret_dc.real274 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag275 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %230 = load float* @x, align 4
+  %conv276 = fpext float %230 to double
+  %231 = load float* @lx, align 4
+  %conv277 = fpext float %231 to double
+  %ret_dc.real278 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag279 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real280 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag281 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %cmp.r282 = fcmp oeq double %ret_dc.real278, %lret_dc.real280
+  %cmp.i283 = fcmp oeq double %ret_dc.imag279, %lret_dc.imag281
+  %and.ri284 = and i1 %cmp.r282, %cmp.i283
+  br i1 %and.ri284, label %land.rhs286, label %land.end289
+
+land.rhs286:                                      ; preds = %land.end250
+  %232 = load float* @x, align 4
+  %233 = load float* @lx, align 4
+  %cmp287 = fcmp oeq float %232, %233
+  br label %land.end289
+
+land.end289:                                      ; preds = %land.rhs286, %land.end250
+  %234 = phi i1 [ false, %land.end250 ], [ %cmp287, %land.rhs286 ]
+  %land.ext290 = zext i1 %234 to i32
+  %call291 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290)
+  %235 = load i32* %retval
+  ret i32 %235
+}
+
+declare void @v_sf(float) #1
+; stel: .section	.mips16.call.fp.v_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_v_sf
+; stel:	mtc1 $4,$f12
+; stel:	lui  $25,%hi(v_sf)
+; stel:	addiu  $25,$25,%lo(v_sf)
+; stel:	jr $25
+; stel:	.end	__call_stub_fp_v_sf
+
+declare i32 @printf(i8*, ...) #1
+
+declare void @v_df(double) #1
+; stel: .section	.mips16.call.fp.v_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_v_df
+; stel: #APP
+; setl: .set reorder
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f13
+; stel:	lui  $25,%hi(v_df)
+; stel:	addiu  $25,$25,%lo(v_df)
+; stel:	jr $25
+; stel:	.end	__call_stub_fp_v_df
+
+declare void @v_sf_sf(float, float) #1
+; stel: .section	.mips16.call.fp.v_sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_v_sf_sf
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f14
+; stel:	lui  $25,%hi(v_sf_sf)
+; stel:	addiu  $25,$25,%lo(v_sf_sf)
+; stel:	jr $25
+; stel:	.end	__call_stub_fp_v_sf_sf
+
+declare void @v_sf_df(float, double) #1
+; stel: .section	.mips16.call.fp.v_sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_v_sf_df
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $6,$f14
+; stel:	mtc1 $7,$f15
+; stel:	lui  $25,%hi(v_sf_df)
+; stel:	addiu  $25,$25,%lo(v_sf_df)
+; stel:	jr $25
+; stel:	.end	__call_stub_fp_v_sf_df
+
+declare void @v_df_sf(double, float) #1
+; stel: .section	.mips16.call.fp.v_df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_v_df_sf
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f13
+; stel:	mtc1 $6,$f14
+; stel:	lui  $25,%hi(v_df_sf)
+; stel:	addiu  $25,$25,%lo(v_df_sf)
+; stel:	jr $25
+; stel:	.end	__call_stub_fp_v_df_sf
+
+declare void @v_df_df(double, double) #1
+; stel: .section	.mips16.call.fp.v_df_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_v_df_df
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f13
+; stel:	mtc1 $6,$f14
+; stel:	mtc1 $7,$f15
+; stel:	lui  $25,%hi(v_df_df)
+; stel:	addiu  $25,$25,%lo(v_df_df)
+; stel:	jr $25
+; stel:	.end	__call_stub_fp_v_df_df
+
+declare float @sf_v() #1
+; stel: .section	.mips16.call.fp.sf_v,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sf_v
+; stel: move $18, $31
+; stel: jal sf_v
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sf_v
+
+declare float @sf_sf(float) #1
+; stel: .section	.mips16.call.fp.sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sf_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal sf_sf
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sf_sf
+
+declare float @sf_df(double) #1
+; stel: .section	.mips16.call.fp.sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sf_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: move $18, $31
+; stel: jal sf_df
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sf_df
+
+declare float @sf_sf_sf(float, float) #1
+; stel: .section	.mips16.call.fp.sf_sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sf_sf_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f14
+; stel: move $18, $31
+; stel: jal sf_sf_sf
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sf_sf_sf
+
+declare float @sf_sf_df(float, double) #1
+; stel: .section	.mips16.call.fp.sf_sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sf_sf_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal sf_sf_df
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sf_sf_df
+
+declare float @sf_df_sf(double, float) #1
+; stel: .section	.mips16.call.fp.sf_df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sf_df_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: move $18, $31
+; stel: jal sf_df_sf
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sf_df_sf
+
+declare float @sf_df_df(double, double) #1
+; stel: .section	.mips16.call.fp.sf_df_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sf_df_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal sf_df_df
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sf_df_df
+
+declare double @df_v() #1
+; stel: .section	.mips16.call.fp.df_v,"ax",@progbits
+; stel:	.ent	__call_stub_fp_df_v
+; stel: move $18, $31
+; stel: jal df_v
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_df_v
+
+declare double @df_sf(float) #1
+; stel: .section	.mips16.call.fp.df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_df_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal df_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_df_sf
+
+declare double @df_df(double) #1
+; stel: .section	.mips16.call.fp.df_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_df_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: move $18, $31
+; stel: jal df_df
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_df_df
+
+declare double @df_sf_sf(float, float) #1
+; stel: .section	.mips16.call.fp.df_sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_df_sf_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f14
+; stel: move $18, $31
+; stel: jal df_sf_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_df_sf_sf
+
+declare double @df_sf_df(float, double) #1
+; stel: .section	.mips16.call.fp.df_sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_df_sf_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal df_sf_df
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_df_sf_df
+
+declare double @df_df_sf(double, float) #1
+; stel: .section	.mips16.call.fp.df_df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_df_df_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: move $18, $31
+; stel: jal df_df_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_df_df_sf
+
+declare double @df_df_df(double, double) #1
+; stel: .section	.mips16.call.fp.df_df_df,"ax",@progbits
+; stel:	.ent	__call_stub_fp_df_df_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal df_df_df
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_df_df_df
+
+declare { float, float } @sc_v() #1
+; stel: .section	.mips16.call.fp.sc_v,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sc_v
+; stel: move $18, $31
+; stel: jal sc_v
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f2
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sc_v
+
+declare { float, float } @sc_sf(float) #1
+; stel: .section	.mips16.call.fp.sc_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_sc_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal sc_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f2
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_sc_sf
+
+declare { double, double } @dc_v() #1
+; stel: .section	.mips16.call.fp.dc_v,"ax",@progbits
+; stel:	.ent	__call_stub_fp_dc_v
+; stel: move $18, $31
+; stel: jal dc_v
+; stel:	mfc1 $4,$f2
+; stel:	mfc1 $5,$f3
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_dc_v
+
+declare { double, double } @dc_sf(float) #1
+; stel: .section	.mips16.call.fp.dc_sf,"ax",@progbits
+; stel:	.ent	__call_stub_fp_dc_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal dc_sf
+; stel:	mfc1 $4,$f2
+; stel:	mfc1 $5,$f3
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_fp_dc_sf
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf16call32_body.ll b/test/CodeGen/Mips/hf16call32_body.ll
new file mode 100644
index 000000000000..34bae26f85f3
--- /dev/null
+++ b/test/CodeGen/Mips/hf16call32_body.ll
@@ -0,0 +1,294 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+
+@x = external global float
+@xd = external global double
+@y = external global float
+@yd = external global double
+@ret_sf = external global float
+@ret_df = external global double
+@ret_sc = external global { float, float }
+@ret_dc = external global { double, double }
+
+; Function Attrs: nounwind
+define void @v_sf(float %p) #0 {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  %0 = load float* %p.addr, align 4
+  store float %0, float* @x, align 4
+  ret void
+}
+; stel: .section	.mips16.fn.v_sf,"ax",@progbits
+; stel:	.ent	__fn_stub_v_sf
+; stel:		la $25,v_sf
+; stel:		mfc1 $4,$f12
+; stel:		jr $25
+; stel:		__fn_local_v_sf = v_sf
+; stel:	.end	__fn_stub_v_sf
+
+declare i32 @printf(i8*, ...) #1
+
+; Function Attrs: nounwind
+define void @v_df(double %p) #0 {
+entry:
+  %p.addr = alloca double, align 8
+  store double %p, double* %p.addr, align 8
+  %0 = load double* %p.addr, align 8
+  store double %0, double* @xd, align 8
+  ret void
+}
+
+; stel: .section	.mips16.fn.v_df,"ax",@progbits
+; stel:	.ent	__fn_stub_v_df
+; stel:		la $25,v_df
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f13
+; stel:		jr $25
+; stel:		__fn_local_v_df = v_df
+; stel:	.end	__fn_stub_v_df
+
+; Function Attrs: nounwind
+define void @v_sf_sf(float %p1, float %p2) #0 {
+entry:
+  %p1.addr = alloca float, align 4
+  %p2.addr = alloca float, align 4
+  store float %p1, float* %p1.addr, align 4
+  store float %p2, float* %p2.addr, align 4
+  %0 = load float* %p1.addr, align 4
+  store float %0, float* @x, align 4
+  %1 = load float* %p2.addr, align 4
+  store float %1, float* @y, align 4
+  ret void
+}
+
+; stel: .section	.mips16.fn.v_sf_sf,"ax",@progbits
+; stel:	.ent	__fn_stub_v_sf_sf
+; stel:		la $25,v_sf_sf
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f14
+; stel:		jr $25
+; stel:		__fn_local_v_sf_sf = v_sf_sf
+; stel:	.end	__fn_stub_v_sf_sf
+
+; Function Attrs: nounwind
+define void @v_sf_df(float %p1, double %p2) #0 {
+entry:
+  %p1.addr = alloca float, align 4
+  %p2.addr = alloca double, align 8
+  store float %p1, float* %p1.addr, align 4
+  store double %p2, double* %p2.addr, align 8
+  %0 = load float* %p1.addr, align 4
+  store float %0, float* @x, align 4
+  %1 = load double* %p2.addr, align 8
+  store double %1, double* @yd, align 8
+  ret void
+}
+
+; stel: .section	.mips16.fn.v_sf_df,"ax",@progbits
+; stel:	.ent	__fn_stub_v_sf_df
+; stel:		la $25,v_sf_df
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $6,$f14
+; stel:		mfc1 $7,$f15
+; stel:		jr $25
+; stel:		__fn_local_v_sf_df = v_sf_df
+; stel:	.end	__fn_stub_v_sf_df
+
+; Function Attrs: nounwind
+define void @v_df_sf(double %p1, float %p2) #0 {
+entry:
+  %p1.addr = alloca double, align 8
+  %p2.addr = alloca float, align 4
+  store double %p1, double* %p1.addr, align 8
+  store float %p2, float* %p2.addr, align 4
+  %0 = load double* %p1.addr, align 8
+  store double %0, double* @xd, align 8
+  %1 = load float* %p2.addr, align 4
+  store float %1, float* @y, align 4
+  ret void
+}
+
+; stel: .section	.mips16.fn.v_df_sf,"ax",@progbits
+; stel:	.ent	__fn_stub_v_df_sf
+; stel:		la $25,v_df_sf
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f13
+; stel:		mfc1 $6,$f14
+; stel:		jr $25
+; stel:		__fn_local_v_df_sf = v_df_sf
+; stel:	.end	__fn_stub_v_df_sf
+
+; Function Attrs: nounwind
+define void @v_df_df(double %p1, double %p2) #0 {
+entry:
+  %p1.addr = alloca double, align 8
+  %p2.addr = alloca double, align 8
+  store double %p1, double* %p1.addr, align 8
+  store double %p2, double* %p2.addr, align 8
+  %0 = load double* %p1.addr, align 8
+  store double %0, double* @xd, align 8
+  %1 = load double* %p2.addr, align 8
+  store double %1, double* @yd, align 8
+  ret void
+}
+
+; stel: .section	.mips16.fn.v_df_df,"ax",@progbits
+; stel:	.ent	__fn_stub_v_df_df
+; stel:		la $25,v_df_df
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f13
+; stel:		mfc1 $6,$f14
+; stel:		mfc1 $7,$f15
+; stel:		jr $25
+; stel:		__fn_local_v_df_df = v_df_df
+; stel:	.end	__fn_stub_v_df_df
+
+; Function Attrs: nounwind
+define float @sf_v() #0 {
+entry:
+  %0 = load float* @ret_sf, align 4
+  ret float %0
+}
+
+; Function Attrs: nounwind
+define float @sf_sf(float %p) #0 {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  %0 = load float* %p.addr, align 4
+  store float %0, float* @x, align 4
+  %1 = load float* @ret_sf, align 4
+  ret float %1
+}
+
+
+; stel: .section	.mips16.fn.sf_sf,"ax",@progbits
+; stel:	.ent	__fn_stub_sf_sf
+; stel:		la $25,sf_sf
+; stel:		mfc1 $4,$f12
+; stel:		jr $25
+; stel:		__fn_local_sf_sf = sf_sf
+; stel:	.end	__fn_stub_sf_sf
+
+
+; Function Attrs: nounwind
+define float @sf_df(double %p) #0 {
+entry:
+  %p.addr = alloca double, align 8
+  store double %p, double* %p.addr, align 8
+  %0 = load double* %p.addr, align 8
+  store double %0, double* @xd, align 8
+  %1 = load float* @ret_sf, align 4
+  ret float %1
+}
+
+; stel: .section	.mips16.fn.sf_df,"ax",@progbits
+; stel:	.ent	__fn_stub_sf_df
+; stel:		la $25,sf_df
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f13
+; stel:		jr $25
+; stel:		__fn_local_sf_df = sf_df
+; stel:	.end	__fn_stub_sf_df
+
+; Function Attrs: nounwind
+define float @sf_sf_sf(float %p1, float %p2) #0 {
+entry:
+  %p1.addr = alloca float, align 4
+  %p2.addr = alloca float, align 4
+  store float %p1, float* %p1.addr, align 4
+  store float %p2, float* %p2.addr, align 4
+  %0 = load float* %p1.addr, align 4
+  store float %0, float* @x, align 4
+  %1 = load float* %p2.addr, align 4
+  store float %1, float* @y, align 4
+  %2 = load float* @ret_sf, align 4
+  ret float %2
+}
+
+; stel: .section	.mips16.fn.sf_sf_sf,"ax",@progbits
+; stel:	.ent	__fn_stub_sf_sf_sf
+; stel:		la $25,sf_sf_sf
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f14
+; stel:		jr $25
+; stel:		__fn_local_sf_sf_sf = sf_sf_sf
+; stel:	.end	__fn_stub_sf_sf_sf
+
+; Function Attrs: nounwind
+define float @sf_sf_df(float %p1, double %p2) #0 {
+entry:
+  %p1.addr = alloca float, align 4
+  %p2.addr = alloca double, align 8
+  store float %p1, float* %p1.addr, align 4
+  store double %p2, double* %p2.addr, align 8
+  %0 = load float* %p1.addr, align 4
+  store float %0, float* @x, align 4
+  %1 = load double* %p2.addr, align 8
+  store double %1, double* @yd, align 8
+  %2 = load float* @ret_sf, align 4
+  ret float %2
+}
+
+; stel: .section	.mips16.fn.sf_sf_df,"ax",@progbits
+; stel:	.ent	__fn_stub_sf_sf_df
+; stel:		la $25,sf_sf_df
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $6,$f14
+; stel:		mfc1 $7,$f15
+; stel:		jr $25
+; stel:		__fn_local_sf_sf_df = sf_sf_df
+; stel:	.end	__fn_stub_sf_sf_df
+
+; Function Attrs: nounwind
+define float @sf_df_sf(double %p1, float %p2) #0 {
+entry:
+  %p1.addr = alloca double, align 8
+  %p2.addr = alloca float, align 4
+  store double %p1, double* %p1.addr, align 8
+  store float %p2, float* %p2.addr, align 4
+  %0 = load double* %p1.addr, align 8
+  store double %0, double* @xd, align 8
+  %1 = load float* %p2.addr, align 4
+  store float %1, float* @y, align 4
+  %2 = load float* @ret_sf, align 4
+  ret float %2
+}
+
+; stel: .section	.mips16.fn.sf_df_sf,"ax",@progbits
+; stel:	.ent	__fn_stub_sf_df_sf
+; stel:		la $25,sf_df_sf
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f13
+; stel:		mfc1 $6,$f14
+; stel:		jr $25
+; stel:		__fn_local_sf_df_sf = sf_df_sf
+; stel:	.end	__fn_stub_sf_df_sf
+
+; Function Attrs: nounwind
+define float @sf_df_df(double %p1, double %p2) #0 {
+entry:
+  %p1.addr = alloca double, align 8
+  %p2.addr = alloca double, align 8
+  store double %p1, double* %p1.addr, align 8
+  store double %p2, double* %p2.addr, align 8
+  %0 = load double* %p1.addr, align 8
+  store double %0, double* @xd, align 8
+  %1 = load double* %p2.addr, align 8
+  store double %1, double* @yd, align 8
+  %2 = load float* @ret_sf, align 4
+  ret float %2
+}
+
+; stel: .section	.mips16.fn.sf_df_df,"ax",@progbits
+; stel:	.ent	__fn_stub_sf_df_df
+; stel:		la $25,sf_df_df
+; stel:		mfc1 $4,$f12
+; stel:		mfc1 $5,$f13
+; stel:		mfc1 $6,$f14
+; stel:		mfc1 $7,$f15
+; stel:		jr $25
+; stel:		__fn_local_sf_df_df = sf_df_df
+; stel:	.end	__fn_stub_sf_df_df
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf1_body.ll b/test/CodeGen/Mips/hf1_body.ll
new file mode 100644
index 000000000000..b2cce92aa1a4
--- /dev/null
+++ b/test/CodeGen/Mips/hf1_body.ll
@@ -0,0 +1,21 @@
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16  -relocation-model=pic -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=picfp16
+
+@x = external global float
+
+; Function Attrs: nounwind
+define void @v_sf(float %p) #0 {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  %0 = load float* %p.addr, align 4
+  store float %0, float* @x, align 4
+  ret void
+}
+; picfp16:	.ent	__fn_stub_v_sf
+; picfp16:	.cpload  $25
+; picfp16:	.set reorder
+; picfp16:	.reloc 0,R_MIPS_NONE,v_sf
+; picfp16: 	la $25,$__fn_local_v_sf
+; picfp16: 	mfc1 $4,$f12
+; picfp16: 	jr $25
+; picfp16: 	.end	__fn_stub_v_sf
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
new file mode 100644
index 000000000000..25639dad63a8
--- /dev/null
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=picel
+
+@ptrsv = global float ()* @sv, align 4
+@ptrdv = global double ()* @dv, align 4
+@ptrscv = global { float, float } ()* @scv, align 4
+@ptrdcv = global { double, double } ()* @dcv, align 4
+@x = common global float 0.000000e+00, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1
+@xd = common global double 0.000000e+00, align 8
+@xy = common global { float, float } zeroinitializer, align 4
+@.str1 = private unnamed_addr constant [10 x i8] c"%f + %fi\0A\00", align 1
+@xyd = common global { double, double } zeroinitializer, align 8
+
+; Function Attrs: nounwind
+define float @sv() #0 {
+entry:
+  ret float 1.000000e+01
+}
+; picel: 	.ent	sv
+; picel: 	lw	${{[0-9]+}}, %call16(__mips16_ret_sf)(${{[0-9]+}})
+; picel:	.end	sv
+
+; Function Attrs: nounwind
+define double @dv() #0 {
+entry:
+  ret double 1.500000e+01
+}
+
+; picel: 	.ent	dv
+; picel: 	lw	${{[0-9]+}}, %call16(__mips16_ret_df)(${{[0-9]+}})
+; picel:	.end	dv
+
+; Function Attrs: nounwind
+define { float, float } @scv() #0 {
+entry:
+  %retval = alloca { float, float }, align 4
+  %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+  store float 5.000000e+00, float* %real
+  store float 9.900000e+01, float* %imag
+  %0 = load { float, float }* %retval
+  ret { float, float } %0
+}
+
+; picel: 	.ent	scv
+; picel: 	lw	${{[0-9]+}}, %call16(__mips16_ret_sc)(${{[0-9]+}})
+; picel:	.end	scv
+
+; Function Attrs: nounwind
+define { double, double } @dcv() #0 {
+entry:
+  %retval = alloca { double, double }, align 8
+  %real = getelementptr inbounds { double, double }* %retval, i32 0, i32 0
+  %imag = getelementptr inbounds { double, double }* %retval, i32 0, i32 1
+  store double 0x416BC8B0A0000000, double* %real
+  store double 0x41CDCCB763800000, double* %imag
+  %0 = load { double, double }* %retval
+  ret { double, double } %0
+}
+
+; picel: 	.ent	dcv
+; picel: 	lw	${{[0-9]+}}, %call16(__mips16_ret_dc)(${{[0-9]+}})
+; picel:	.end	dcv
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %0 = load float ()** @ptrsv, align 4
+  %call = call float %0()
+  store float %call, float* @x, align 4
+  %1 = load float* @x, align 4
+  %conv = fpext float %1 to double
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double %conv)
+  %2 = load double ()** @ptrdv, align 4
+  %call2 = call double %2()
+  store double %call2, double* @xd, align 8
+  %3 = load double* @xd, align 8
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double %3)
+  %4 = load { float, float } ()** @ptrscv, align 4
+  %call4 = call { float, float } %4()
+  %5 = extractvalue { float, float } %call4, 0
+  %6 = extractvalue { float, float } %call4, 1
+  store float %5, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
+  store float %6, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+  %xy.real = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
+  %xy.imag = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+  %conv5 = fpext float %xy.real to double
+  %conv6 = fpext float %xy.imag to double
+  %xy.real7 = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
+  %xy.imag8 = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+  %conv9 = fpext float %xy.real7 to double
+  %conv10 = fpext float %xy.imag8 to double
+  %call11 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str1, i32 0, i32 0), double %conv5, double %conv10)
+  %7 = load { double, double } ()** @ptrdcv, align 4
+  %call12 = call { double, double } %7()
+  %8 = extractvalue { double, double } %call12, 0
+  %9 = extractvalue { double, double } %call12, 1
+  store double %8, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
+  store double %9, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+  %xyd.real = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
+  %xyd.imag = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+  %xyd.real13 = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
+  %xyd.imag14 = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+  %call15 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str1, i32 0, i32 0), double %xyd.real, double %xyd.imag14)
+  ret i32 0
+}
+
+; picel: 	.ent	main
+
+; picel:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_0)(${{[0-9]+}})
+
+; picel:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_0)(${{[0-9]+}})
+
+; picel:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_0)(${{[0-9]+}})
+
+; picel:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_0)(${{[0-9]+}})
+
+
+declare i32 @printf(i8*, ...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
+
diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll
index c6da8b1ac9a0..f4dd1eb78a1d 100644
--- a/test/CodeGen/Mips/i32k.ll
+++ b/test/CodeGen/Mips/i32k.ll
@@ -1,16 +1,23 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16a
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16b
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
 
 define i32 @main() nounwind {
 entry:
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind
-; 16a:	li	${{[0-9]+}}, 29905
-; 16b:	li	${{[0-9]+}}, 16408
+; 16:	lw	${{[0-9]+}}, 1f
+; 16:	b	2f
+; 16:	.align	2
+; 16: 1: 	.word	1075344593
+; 16: 2:
+
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind
-; 16a:	li	${{[0-9]+}}, 49127
-; 16b:	li	${{[0-9]+}}, 35631
+
+; 16:	lw	${{[0-9]+}}, 1f
+; 16:	b	2f
+; 16:	.align	2
+; 16: 1: 	.word	-1075344593
+; 16: 2:
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 704014cba010..5b2d13518035 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -2,27 +2,27 @@
 
 define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
 entry:
-; CHECK: move $[[R1:[0-9]+]], $5
-; CHECK: move $[[R0:[0-9]+]], $4
-; CHECK: ori $6, ${{[0-9]+}}, 3855
-; CHECK: ori $7, ${{[0-9]+}}, 22136
-; CHECK: lw  $25, %call16(ff1)
+; CHECK-DAG: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK-DAG: lw $[[R3:[0-9]+]], 84($sp)
+; CHECK-DAG: move $[[R1:[0-9]+]], $5
+; CHECK-DAG: move $[[R0:[0-9]+]], $4
+; CHECK-DAG: ori $6, ${{[0-9]+}}, 3855
+; CHECK-DAG: ori $7, ${{[0-9]+}}, 22136
+; CHECK-DAG: lw  $25, %call16(ff1)
 ; CHECK: jalr
   tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
-; CHECK: lw $25, %call16(ff2)
-; CHECK: lw $[[R2:[0-9]+]], 80($sp)
-; CHECK: lw $[[R3:[0-9]+]], 84($sp)
-; CHECK: move $4, $[[R2]]
-; CHECK: move $5, $[[R3]]
+; CHECK-DAG: lw $25, %call16(ff2)
+; CHECK-DAG: move $4, $[[R2]]
+; CHECK-DAG: move $5, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
-; CHECK: lw $25, %call16(ff3)
-; CHECK: sw $[[R1]], 28($sp)
-; CHECK: sw $[[R0]], 24($sp)
-; CHECK: move $6, $[[R2]]
-; CHECK: move $7, $[[R3]]
-; CHECK: jalr $25
+; CHECK-DAG: lw $25, %call16(ff3)
+; CHECK-DAG: sw $[[R1]], 28($sp)
+; CHECK-DAG: sw $[[R0]], 24($sp)
+; CHECK-DAG: move $6, $[[R2]]
+; CHECK-DAG: move $7, $[[R3]]
+; CHECK:     jalr $25
   tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind
   ret void
 }
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index 01978994b2f5..7bb4adc31bd8 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -10,7 +10,7 @@
 ; X with -3
 define i32 @constraint_X() nounwind {
 entry:
-;CHECK_LITTLE_32:   constraint_X:
+;CHECK_LITTLE_32-LABEL:   constraint_X:
 ;CHECK_LITTLE_32: #APP
 ;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
 ;CHECK_LITTLE_32: #NO_APP
@@ -21,7 +21,7 @@ entry:
 ; x with -3
 define i32 @constraint_x() nounwind {
 entry:
-;CHECK_LITTLE_32:   constraint_x:
+;CHECK_LITTLE_32-LABEL:   constraint_x:
 ;CHECK_LITTLE_32: #APP
 ;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffd
 ;CHECK_LITTLE_32: #NO_APP
@@ -32,7 +32,7 @@ entry:
 ; d with -3
 define i32 @constraint_d() nounwind {
 entry:
-;CHECK_LITTLE_32:   constraint_d:
+;CHECK_LITTLE_32-LABEL:   constraint_d:
 ;CHECK_LITTLE_32:   #APP
 ;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-3
 ;CHECK_LITTLE_32:   #NO_APP
@@ -43,7 +43,7 @@ entry:
 ; m with -3
 define i32 @constraint_m() nounwind {
 entry:
-;CHECK_LITTLE_32:   constraint_m:
+;CHECK_LITTLE_32-LABEL:   constraint_m:
 ;CHECK_LITTLE_32:   #APP
 ;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-4
 ;CHECK_LITTLE_32:   #NO_APP
@@ -54,7 +54,7 @@ entry:
 ; z with -3
 define i32 @constraint_z() nounwind {
 entry:
-;CHECK_LITTLE_32: constraint_z:
+;CHECK_LITTLE_32-LABEL: constraint_z:
 ;CHECK_LITTLE_32:    #APP
 ;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},-3
 ;CHECK_LITTLE_32:    #NO_APP
@@ -71,7 +71,7 @@ entry:
 ; a long long in 32 bit mode (use to assert)
 define i32 @constraint_longlong() nounwind {
 entry:
-;CHECK_LITTLE_32: constraint_longlong:
+;CHECK_LITTLE_32-LABEL: constraint_longlong:
 ;CHECK_LITTLE_32:    #APP
 ;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},3
 ;CHECK_LITTLE_32:    #NO_APP
@@ -82,7 +82,7 @@ entry:
 ; D, in little endian the source reg will be 4 bytes into the long long
 define i32 @constraint_D() nounwind {
 entry:
-;CHECK_LITTLE_32: constraint_D:
+;CHECK_LITTLE_32-LABEL: constraint_D:
 ;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
 ;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
 ;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
@@ -91,7 +91,7 @@ entry:
 ;CHECK_LITTLE_32:    #NO_APP
 
 ; D, in big endian the source reg will also be 4 bytes into the long long
-;CHECK_BIG_32:    constraint_D:
+;CHECK_BIG_32-LABEL:    constraint_D:
 ;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
 ;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
 ;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
@@ -107,7 +107,7 @@ entry:
 ; L, in little endian the source reg will be 0 bytes into the long long
 define i32 @constraint_L() nounwind {
 entry:
-;CHECK_LITTLE_32: constraint_L:
+;CHECK_LITTLE_32-LABEL: constraint_L:
 ;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
 ;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
 ;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
@@ -115,7 +115,7 @@ entry:
 ;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
 ;CHECK_LITTLE_32:    #NO_APP
 ; L, in big endian the source reg will be 4 bytes into the long long
-;CHECK_BIG_32: constraint_L:
+;CHECK_BIG_32-LABEL: constraint_L:
 ;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
 ;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
 ;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
@@ -131,7 +131,7 @@ entry:
 ; M, in little endian the source reg will be 4 bytes into the long long
 define i32 @constraint_M() nounwind {
 entry:
-;CHECK_LITTLE_32: constraint_M:
+;CHECK_LITTLE_32-LABEL: constraint_M:
 ;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
 ;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
 ;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
@@ -139,7 +139,7 @@ entry:
 ;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
 ;CHECK_LITTLE_32:    #NO_APP
 ; M, in big endian the source reg will be 0 bytes into the long long
-;CHECK_BIG_32:    constraint_M:
+;CHECK_BIG_32-LABEL:    constraint_M:
 ;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
 ;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
 ;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/int-to-float-conversion.ll b/test/CodeGen/Mips/int-to-float-conversion.ll
new file mode 100644
index 000000000000..c2baf442f4ae
--- /dev/null
+++ b/test/CodeGen/Mips/int-to-float-conversion.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
+
+@i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
+@i3 = common global i32* null, align 4
+
+; 32-LABEL: test_float_int_:
+; 32: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 32: cvt.s.w $f{{[0-9]+}}, $f[[R0]]
+
+define float @test_float_int_(i32 %a) {
+entry:
+  %conv = sitofp i32 %a to float
+  ret float %conv
+}
+
+; 32-LABEL: test_double_int_:
+; 32: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 32: cvt.d.w $f{{[0-9]+}}, $f[[R0]]
+; 64-LABEL: test_double_int_:
+; 64: mtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 64: cvt.d.w $f{{[0-9]+}}, $f[[R0]]
+
+define double @test_double_int_(i32 %a) {
+entry:
+  %conv = sitofp i32 %a to double
+  ret double %conv
+}
+
+; 64-LABEL: test_float_LL_:
+; 64: dmtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 64: cvt.s.l $f{{[0-9]+}}, $f[[R0]]
+
+define float @test_float_LL_(i64 %a) {
+entry:
+  %conv = sitofp i64 %a to float
+  ret float %conv
+}
+
+; 64-LABEL: test_double_LL_:
+; 64: dmtc1 ${{[0-9]+}}, $f[[R0:[0-9]+]]
+; 64: cvt.d.l $f{{[0-9]+}}, $f[[R0]]
+
+define double @test_double_LL_(i64 %a) {
+entry:
+  %conv = sitofp i64 %a to double
+  ret double %conv
+}
diff --git a/test/CodeGen/Mips/largefr1.ll b/test/CodeGen/Mips/largefr1.ll
index 0fe89f71d9f3..9a5fd08d17ac 100644
--- a/test/CodeGen/Mips/largefr1.ll
+++ b/test/CodeGen/Mips/largefr1.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=1
 
+
 @i = common global i32 0, align 4
 @j = common global i32 0, align 4
 @.str = private unnamed_addr constant [8 x i8] c"%i %i \0A\00", align 1
@@ -22,22 +23,34 @@ entry:
 
 define i32 @main() nounwind {
 entry:
-; 1: main: 
-; 1: 1: 	.word	-797992
-; 1:            li ${{[0-9]+}}, 12
-; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1-LABEL: main:
+; 1: 1: 	.word	-798000
+; 1:            lw ${{[0-9]+}}, 1f
+; 1:            b 2f
+; 1:            .align 2
+; 1:            .word	800020
+
+; 1:            b 2f
+; 1:            .align 2
+; 1:            .word	400020
+
+; 1:            move ${{[0-9]+}}, $sp
 ; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-; 2:            move $sp, ${{[0-9]+}}
-; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-; 1:            li ${{[0-9]+}}, 6
-; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1:            addiu ${{[0-9]+}}, ${{[0-9]+}}, 0
+
+
+
+; 1:            b 2f
+; 1:            .align 2
+; 1:            .word	400220
+
+; 1:            move ${{[0-9]+}}, $sp
 ; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-; 2:            move $sp, ${{[0-9]+}}
-; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
-; 1:          	addiu	${{[0-9]+}}, ${{[0-9]+}}, 6800
-; 1: 	        li	${{[0-9]+}}, 1
-; 1:	        sll	${{[0-9]+}}, ${{[0-9]+}}, 16
-; 2: 	        li	${{[0-9]+}}, 34463
+; 1:           	lw	${{[0-9]+}}, 0(${{[0-9]+}})
+
+
+
+
   %retval = alloca i32, align 4
   %one = alloca [100000 x i32], align 4
   %two = alloca [100000 x i32], align 4
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 1e96346d1dd7..09fee3d9063f 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -18,11 +18,11 @@ entry:
 ; 64:  dsll  $[[R0]], $[[R0]], 48
 ; 64:  daddiu  $[[R0]], $[[R0]], -1
 ; 64:  dsll  $[[R0]], $[[R0]], 16
-; 64:  daddiu  $[[R0]], $[[R0]], -48
+; 64:  daddiu  $[[R0]], $[[R0]], -32
 ; 64:  daddu $sp, $sp, $[[R0]]
 ; 64:  lui $[[R1:[0-9]+]], 1
 ; 64:  daddu $[[R1]], $sp, $[[R1]]
-; 64:  sd  $ra, 40($[[R1]])
+; 64:  sd  $ra, 24($[[R1]])
 
   %agg.tmp = alloca %struct.S1, align 1
   %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
diff --git a/test/CodeGen/Mips/lazy-binding.ll b/test/CodeGen/Mips/lazy-binding.ll
new file mode 100644
index 000000000000..839155adad9a
--- /dev/null
+++ b/test/CodeGen/Mips/lazy-binding.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+; CHECK-LABEL: foo6:
+; CHECK: %while.body
+; CHECK: lw  $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+; CHECK: %while.end
+
+define void @foo6(i32 %n) {
+entry:
+  %tobool1 = icmp eq i32 %n, 0
+  br i1 %tobool1, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %dec = add nsw i32 %n.addr.02, -1
+  tail call void @foo2()
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
+declare void @foo2()
+
+; CHECK-LABEL: foo1:
+; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
+; CHECK: jalr $25
+
+define void @foo1() {
+entry:
+  tail call void @foo2()
+  tail call void @foo2()
+  tail call void @foo2()
+  ret void
+}
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index e157c540b538..1fa54b428cd9 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index 1a4f79c191e1..af192d0e9217 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -1,13 +1,17 @@
-; RUN: llc -march=mipsel -force-mips-long-branch < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=N64
 
 @g0 = external global i32
 
 define void @foo1(i32 %s) nounwind {
 entry:
+; O32: nop
+; O32: addiu $sp, $sp, -8
 ; O32: bal
 ; O32: lui $1, 0
 ; O32: addiu $1, $1, {{[0-9]+}} 
+; N64: nop
+; N64: daddiu $sp, $sp, -16
 ; N64: lui $1, 0
 ; N64: daddiu $1, $1, 0
 ; N64: dsll $1, $1, 16
diff --git a/test/CodeGen/Mips/mips16_32_1.ll b/test/CodeGen/Mips/mips16_32_1.ll
index 6f4826ea9600..e156641d4e50 100644
--- a/test/CodeGen/Mips/mips16_32_1.ll
+++ b/test/CodeGen/Mips/mips16_32_1.ll
@@ -11,4 +11,4 @@ entry:
 ; CHECK:	save	{{.+}}
 ; CHECK:	restore	{{.+}} 
 ; CHECK:	.end	foo
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_10.ll b/test/CodeGen/Mips/mips16_32_10.ll
index 330dbfec63b9..7c017b8e4b75 100644
--- a/test/CodeGen/Mips/mips16_32_10.ll
+++ b/test/CodeGen/Mips/mips16_32_10.ll
@@ -54,6 +54,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_3.ll b/test/CodeGen/Mips/mips16_32_3.ll
index 8874a8872534..dd94ec1ce80a 100644
--- a/test/CodeGen/Mips/mips16_32_3.ll
+++ b/test/CodeGen/Mips/mips16_32_3.ll
@@ -65,6 +65,6 @@ entry:
 ; 32:	.set	reorder
 ; 32:	.end	main
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_4.ll b/test/CodeGen/Mips/mips16_32_4.ll
index cdaed6c71be0..5e4907139445 100644
--- a/test/CodeGen/Mips/mips16_32_4.ll
+++ b/test/CodeGen/Mips/mips16_32_4.ll
@@ -60,6 +60,6 @@ entry:
 ; 32:	.end	main
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_5.ll b/test/CodeGen/Mips/mips16_32_5.ll
index 45e0bf49ddd2..17900a2dc75f 100644
--- a/test/CodeGen/Mips/mips16_32_5.ll
+++ b/test/CodeGen/Mips/mips16_32_5.ll
@@ -75,6 +75,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_6.ll b/test/CodeGen/Mips/mips16_32_6.ll
index f4b8e7a91adc..a77031af8be6 100644
--- a/test/CodeGen/Mips/mips16_32_6.ll
+++ b/test/CodeGen/Mips/mips16_32_6.ll
@@ -81,6 +81,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_7.ll b/test/CodeGen/Mips/mips16_32_7.ll
index f8726eadc70c..895b5d4346a8 100644
--- a/test/CodeGen/Mips/mips16_32_7.ll
+++ b/test/CodeGen/Mips/mips16_32_7.ll
@@ -71,6 +71,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_8.ll b/test/CodeGen/Mips/mips16_32_8.ll
index e51f296f9df3..4152d687093e 100644
--- a/test/CodeGen/Mips/mips16_32_8.ll
+++ b/test/CodeGen/Mips/mips16_32_8.ll
@@ -68,7 +68,7 @@ entry:
 ; 32:	.set	reorder
 ; 32:	.end	main
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_9.ll b/test/CodeGen/Mips/mips16_32_9.ll
index f5ff36849015..c9b494f2a890 100644
--- a/test/CodeGen/Mips/mips16_32_9.ll
+++ b/test/CodeGen/Mips/mips16_32_9.ll
@@ -46,6 +46,6 @@ entry:
 
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_fpret.ll b/test/CodeGen/Mips/mips16_fpret.ll
new file mode 100644
index 000000000000..c132f63cfb01
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_fpret.ll
@@ -0,0 +1,76 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=2
+; RUN: llc -mtriple=mipsel-linux-gnu  -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=3
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=4
+
+
+@x = global float 0x41F487E980000000, align 4
+@dx = global double 0x41CDCC8BC4800000, align 8
+@cx = global { float, float } { float 1.000000e+00, float 9.900000e+01 }, align 4
+@dcx = global { double, double } { double 0x42CE5E14A412B480, double 0x423AA4C580DB0000 }, align 8
+
+define float @foox()  {
+entry:
+  %0 = load float* @x, align 4
+  ret float %0
+; 1: 	.ent	foox
+; 1:	lw	$2, %lo(x)(${{[0-9]+}})
+; 1:	jal	__mips16_ret_sf
+}
+
+define double @foodx()  {
+entry:
+  %0 = load double* @dx, align 8
+  ret double %0
+; 1: 	.ent	foodx
+; 1: 	lw	$2, %lo(dx)(${{[0-9]+}})
+; 1:	jal	__mips16_ret_df
+; 2: 	.ent	foodx
+; 2:	lw	$3, 4(${{[0-9]+}})
+; 2:	jal	__mips16_ret_df
+
+}
+
+define { float, float } @foocx()  {
+entry:
+  %retval = alloca { float, float }, align 4
+  %cx.real = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 0)
+  %cx.imag = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 1)
+  %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+  store float %cx.real, float* %real
+  store float %cx.imag, float* %imag
+  %0 = load { float, float }* %retval
+  ret { float, float } %0
+; 1: 	.ent	foocx
+; 1: 	lw	$2, %lo(cx)(${{[0-9]+}})
+; 1:	jal	__mips16_ret_sc
+; 2: 	.ent	foocx
+; 2:	lw	$3, 4(${{[0-9]+}})
+; 2:	jal	__mips16_ret_sc
+}
+
+define { double, double } @foodcx()  {
+entry:
+  %retval = alloca { double, double }, align 8
+  %dcx.real = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 0)
+  %dcx.imag = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 1)
+  %real = getelementptr inbounds { double, double }* %retval, i32 0, i32 0
+  %imag = getelementptr inbounds { double, double }* %retval, i32 0, i32 1
+  store double %dcx.real, double* %real
+  store double %dcx.imag, double* %imag
+  %0 = load { double, double }* %retval
+  ret { double, double } %0
+; 1: 	.ent	foodcx
+; 1: 	lw	${{[0-9]}}, %lo(dcx)(${{[0-9]+}})
+; 1:	jal	__mips16_ret_dc
+; 2: 	.ent	foodcx
+; 2:	lw	${{[0-9]}}, 4(${{[0-9]+}})
+; 2:	jal	__mips16_ret_dc
+; 3: 	.ent	foodcx
+; 3:	lw	$4, 8(${{[0-9]+}})
+; 3:	jal	__mips16_ret_dc
+; 4: 	.ent	foodcx
+; 4:	lw	$5, 12(${{[0-9]+}})
+; 4:	jal	__mips16_ret_dc
+}
diff --git a/test/CodeGen/Mips/mips16fpe.ll b/test/CodeGen/Mips/mips16fpe.ll
index 433543607967..10c5163f7fd0 100644
--- a/test/CodeGen/Mips/mips16fpe.ll
+++ b/test/CodeGen/Mips/mips16fpe.ll
@@ -41,7 +41,7 @@
 
 define void @test_addsf3() nounwind {
 entry:
-;16hf: test_addsf3:
+;16hf-LABEL: test_addsf3:
   %0 = load float* @x, align 4
   %1 = load float* @y, align 4
   %add = fadd float %0, %1
@@ -52,7 +52,7 @@ entry:
 
 define void @test_adddf3() nounwind {
 entry:
-;16hf: test_adddf3:
+;16hf-LABEL: test_adddf3:
   %0 = load double* @xd, align 8
   %1 = load double* @yd, align 8
   %add = fadd double %0, %1
@@ -63,7 +63,7 @@ entry:
 
 define void @test_subsf3() nounwind {
 entry:
-;16hf: test_subsf3:
+;16hf-LABEL: test_subsf3:
   %0 = load float* @x, align 4
   %1 = load float* @y, align 4
   %sub = fsub float %0, %1
@@ -74,7 +74,7 @@ entry:
 
 define void @test_subdf3() nounwind {
 entry:
-;16hf: test_subdf3:
+;16hf-LABEL: test_subdf3:
   %0 = load double* @xd, align 8
   %1 = load double* @yd, align 8
   %sub = fsub double %0, %1
@@ -85,7 +85,7 @@ entry:
 
 define void @test_mulsf3() nounwind {
 entry:
-;16hf: test_mulsf3:
+;16hf-LABEL: test_mulsf3:
   %0 = load float* @x, align 4
   %1 = load float* @y, align 4
   %mul = fmul float %0, %1
@@ -96,7 +96,7 @@ entry:
 
 define void @test_muldf3() nounwind {
 entry:
-;16hf: test_muldf3:
+;16hf-LABEL: test_muldf3:
   %0 = load double* @xd, align 8
   %1 = load double* @yd, align 8
   %mul = fmul double %0, %1
@@ -107,7 +107,7 @@ entry:
 
 define void @test_divsf3() nounwind {
 entry:
-;16hf: test_divsf3:
+;16hf-LABEL: test_divsf3:
   %0 = load float* @y, align 4
   %1 = load float* @x, align 4
   %div = fdiv float %0, %1
@@ -118,7 +118,7 @@ entry:
 
 define void @test_divdf3() nounwind {
 entry:
-;16hf: test_divdf3:
+;16hf-LABEL: test_divdf3:
   %0 = load double* @yd, align 8
   %mul = fmul double %0, 2.000000e+00
   %1 = load double* @xd, align 8
@@ -130,7 +130,7 @@ entry:
 
 define void @test_extendsfdf2() nounwind {
 entry:
-;16hf: test_extendsfdf2:
+;16hf-LABEL: test_extendsfdf2:
   %0 = load float* @x, align 4
   %conv = fpext float %0 to double
   store double %conv, double* @extendsfdf2_result, align 8
@@ -140,7 +140,7 @@ entry:
 
 define void @test_truncdfsf2() nounwind {
 entry:
-;16hf: test_truncdfsf2:
+;16hf-LABEL: test_truncdfsf2:
   %0 = load double* @xd2, align 8
   %conv = fptrunc double %0 to float
   store float %conv, float* @truncdfsf2_result, align 4
@@ -150,7 +150,7 @@ entry:
 
 define void @test_fix_truncsfsi() nounwind {
 entry:
-;16hf: test_fix_truncsfsi:
+;16hf-LABEL: test_fix_truncsfsi:
   %0 = load float* @x, align 4
   %conv = fptosi float %0 to i32
   store i32 %conv, i32* @fix_truncsfsi_result, align 4
@@ -160,7 +160,7 @@ entry:
 
 define void @test_fix_truncdfsi() nounwind {
 entry:
-;16hf: test_fix_truncdfsi:
+;16hf-LABEL: test_fix_truncdfsi:
   %0 = load double* @xd, align 8
   %conv = fptosi double %0 to i32
   store i32 %conv, i32* @fix_truncdfsi_result, align 4
@@ -170,7 +170,7 @@ entry:
 
 define void @test_floatsisf() nounwind {
 entry:
-;16hf: test_floatsisf:
+;16hf-LABEL: test_floatsisf:
   %0 = load i32* @si, align 4
   %conv = sitofp i32 %0 to float
   store float %conv, float* @floatsisf_result, align 4
@@ -180,7 +180,7 @@ entry:
 
 define void @test_floatsidf() nounwind {
 entry:
-;16hf: test_floatsidf:
+;16hf-LABEL: test_floatsidf:
   %0 = load i32* @si, align 4
   %conv = sitofp i32 %0 to double
   store double %conv, double* @floatsidf_result, align 8
@@ -190,7 +190,7 @@ entry:
 
 define void @test_floatunsisf() nounwind {
 entry:
-;16hf: test_floatunsisf:
+;16hf-LABEL: test_floatunsisf:
   %0 = load i32* @ui, align 4
   %conv = uitofp i32 %0 to float
   store float %conv, float* @floatunsisf_result, align 4
@@ -200,7 +200,7 @@ entry:
 
 define void @test_floatunsidf() nounwind {
 entry:
-;16hf: test_floatunsidf:
+;16hf-LABEL: test_floatunsidf:
   %0 = load i32* @ui, align 4
   %conv = uitofp i32 %0 to double
   store double %conv, double* @floatunsidf_result, align 8
@@ -210,7 +210,7 @@ entry:
 
 define void @test_eqsf2() nounwind {
 entry:
-;16hf: test_eqsf2:
+;16hf-LABEL: test_eqsf2:
   %0 = load float* @x, align 4
   %1 = load float* @xx, align 4
   %cmp = fcmp oeq float %0, %1
@@ -222,7 +222,7 @@ entry:
 
 define void @test_eqdf2() nounwind {
 entry:
-;16hf: test_eqdf2:
+;16hf-LABEL: test_eqdf2:
   %0 = load double* @xd, align 8
   %1 = load double* @xxd, align 8
   %cmp = fcmp oeq double %0, %1
@@ -234,7 +234,7 @@ entry:
 
 define void @test_nesf2() nounwind {
 entry:
-;16hf: test_nesf2:
+;16hf-LABEL: test_nesf2:
   %0 = load float* @x, align 4
   %1 = load float* @y, align 4
   %cmp = fcmp une float %0, %1
@@ -246,7 +246,7 @@ entry:
 
 define void @test_nedf2() nounwind {
 entry:
-;16hf: test_nedf2:
+;16hf-LABEL: test_nedf2:
   %0 = load double* @xd, align 8
   %1 = load double* @yd, align 8
   %cmp = fcmp une double %0, %1
@@ -258,7 +258,7 @@ entry:
 
 define void @test_gesf2() nounwind {
 entry:
-;16hf: test_gesf2:
+;16hf-LABEL: test_gesf2:
   %0 = load float* @x, align 4
   %1 = load float* @xx, align 4
   %cmp = fcmp oge float %0, %1
@@ -273,7 +273,7 @@ entry:
 
 define void @test_gedf2() nounwind {
 entry:
-;16hf: test_gedf2:
+;16hf-LABEL: test_gedf2:
   %0 = load double* @xd, align 8
   %1 = load double* @xxd, align 8
   %cmp = fcmp oge double %0, %1
@@ -288,7 +288,7 @@ entry:
 
 define void @test_ltsf2() nounwind {
 entry:
-;16hf: test_ltsf2:
+;16hf-LABEL: test_ltsf2:
   %0 = load float* @x, align 4
   %1 = load float* @xx, align 4
   %lnot = fcmp uge float %0, %1
@@ -304,7 +304,7 @@ entry:
 
 define void @test_ltdf2() nounwind {
 entry:
-;16hf: test_ltdf2:
+;16hf-LABEL: test_ltdf2:
   %0 = load double* @xd, align 8
   %1 = load double* @xxd, align 8
   %lnot = fcmp uge double %0, %1
@@ -320,7 +320,7 @@ entry:
 
 define void @test_lesf2() nounwind {
 entry:
-;16hf: test_lesf2:
+;16hf-LABEL: test_lesf2:
   %0 = load float* @x, align 4
   %1 = load float* @xx, align 4
   %cmp = fcmp ole float %0, %1
@@ -335,7 +335,7 @@ entry:
 
 define void @test_ledf2() nounwind {
 entry:
-;16hf: test_ledf2:
+;16hf-LABEL: test_ledf2:
   %0 = load double* @xd, align 8
   %1 = load double* @xxd, align 8
   %cmp = fcmp ole double %0, %1
@@ -350,7 +350,7 @@ entry:
 
 define void @test_gtsf2() nounwind {
 entry:
-;16hf: test_gtsf2:
+;16hf-LABEL: test_gtsf2:
   %0 = load float* @x, align 4
   %1 = load float* @xx, align 4
   %lnot = fcmp ule float %0, %1
@@ -365,7 +365,7 @@ entry:
 
 define void @test_gtdf2() nounwind {
 entry:
-;16hf: test_gtdf2:
+;16hf-LABEL: test_gtdf2:
   %0 = load double* @xd, align 8
   %1 = load double* @xxd, align 8
   %lnot = fcmp ule double %0, %1
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index 5892cab4f8ea..dc8bbfdd5baf 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -7,7 +7,7 @@
 @gf1 = external global float
 @gd1 = external global double
 
-; CHECK: addLD:
+; CHECK-LABEL: addLD:
 ; CHECK: ld $25, %call16(__addtf3)
 
 define fp128 @addLD() {
@@ -18,7 +18,7 @@ entry:
   ret fp128 %add
 }
 
-; CHECK: subLD:
+; CHECK-LABEL: subLD:
 ; CHECK: ld $25, %call16(__subtf3)
 
 define fp128 @subLD() {
@@ -29,7 +29,7 @@ entry:
   ret fp128 %sub
 }
 
-; CHECK: mulLD:
+; CHECK-LABEL: mulLD:
 ; CHECK: ld $25, %call16(__multf3)
 
 define fp128 @mulLD() {
@@ -40,7 +40,7 @@ entry:
   ret fp128 %mul
 }
 
-; CHECK: divLD:
+; CHECK-LABEL: divLD:
 ; CHECK: ld $25, %call16(__divtf3)
 
 define fp128 @divLD() {
@@ -51,7 +51,7 @@ entry:
   ret fp128 %div
 }
 
-; CHECK: conv_LD_char:
+; CHECK-LABEL: conv_LD_char:
 ; CHECK: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_char(i8 signext %a) {
@@ -60,7 +60,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_short:
+; CHECK-LABEL: conv_LD_short:
 ; CHECK: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_short(i16 signext %a) {
@@ -69,7 +69,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_int:
+; CHECK-LABEL: conv_LD_int:
 ; CHECK: ld $25, %call16(__floatsitf)
 
 define fp128 @conv_LD_int(i32 %a) {
@@ -78,7 +78,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_LL:
+; CHECK-LABEL: conv_LD_LL:
 ; CHECK: ld $25, %call16(__floatditf)
 
 define fp128 @conv_LD_LL(i64 %a) {
@@ -87,7 +87,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_UChar:
+; CHECK-LABEL: conv_LD_UChar:
 ; CHECK: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UChar(i8 zeroext %a) {
@@ -96,7 +96,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_UShort:
+; CHECK-LABEL: conv_LD_UShort:
 ; CHECK: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UShort(i16 zeroext %a) {
@@ -105,7 +105,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_UInt:
+; CHECK-LABEL: conv_LD_UInt:
 ; CHECK: ld $25, %call16(__floatunsitf)
 
 define fp128 @conv_LD_UInt(i32 %a) {
@@ -114,7 +114,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_ULL:
+; CHECK-LABEL: conv_LD_ULL:
 ; CHECK: ld $25, %call16(__floatunditf)
 
 define fp128 @conv_LD_ULL(i64 %a) {
@@ -123,7 +123,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_char_LD:
+; CHECK-LABEL: conv_char_LD:
 ; CHECK: ld $25, %call16(__fixtfsi)
 
 define signext i8 @conv_char_LD(fp128 %a) {
@@ -132,7 +132,7 @@ entry:
   ret i8 %conv
 }
 
-; CHECK: conv_short_LD:
+; CHECK-LABEL: conv_short_LD:
 ; CHECK: ld $25, %call16(__fixtfsi)
 
 define signext i16 @conv_short_LD(fp128 %a) {
@@ -141,7 +141,7 @@ entry:
   ret i16 %conv
 }
 
-; CHECK: conv_int_LD:
+; CHECK-LABEL: conv_int_LD:
 ; CHECK: ld $25, %call16(__fixtfsi)
 
 define i32 @conv_int_LD(fp128 %a) {
@@ -150,7 +150,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: conv_LL_LD:
+; CHECK-LABEL: conv_LL_LD:
 ; CHECK: ld $25, %call16(__fixtfdi)
 
 define i64 @conv_LL_LD(fp128 %a) {
@@ -159,7 +159,7 @@ entry:
   ret i64 %conv
 }
 
-; CHECK: conv_UChar_LD:
+; CHECK-LABEL: conv_UChar_LD:
 ; CHECK: ld $25, %call16(__fixtfsi)
 
 define zeroext i8 @conv_UChar_LD(fp128 %a) {
@@ -168,7 +168,7 @@ entry:
   ret i8 %conv
 }
 
-; CHECK: conv_UShort_LD:
+; CHECK-LABEL: conv_UShort_LD:
 ; CHECK: ld $25, %call16(__fixtfsi)
 
 define zeroext i16 @conv_UShort_LD(fp128 %a) {
@@ -177,7 +177,7 @@ entry:
   ret i16 %conv
 }
 
-; CHECK: conv_UInt_LD:
+; CHECK-LABEL: conv_UInt_LD:
 ; CHECK: ld $25, %call16(__fixunstfsi)
 
 define i32 @conv_UInt_LD(fp128 %a) {
@@ -186,7 +186,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: conv_ULL_LD:
+; CHECK-LABEL: conv_ULL_LD:
 ; CHECK: ld $25, %call16(__fixunstfdi)
 
 define i64 @conv_ULL_LD(fp128 %a) {
@@ -195,7 +195,7 @@ entry:
   ret i64 %conv
 }
 
-; CHECK: conv_LD_float:
+; CHECK-LABEL: conv_LD_float:
 ; CHECK: ld $25, %call16(__extendsftf2)
 
 define fp128 @conv_LD_float(float %a) {
@@ -204,7 +204,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_LD_double:
+; CHECK-LABEL: conv_LD_double:
 ; CHECK: ld $25, %call16(__extenddftf2)
 
 define fp128 @conv_LD_double(double %a) {
@@ -213,7 +213,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: conv_float_LD:
+; CHECK-LABEL: conv_float_LD:
 ; CHECK: ld $25, %call16(__trunctfsf2)
 
 define float @conv_float_LD(fp128 %a) {
@@ -222,7 +222,7 @@ entry:
   ret float %conv
 }
 
-; CHECK: conv_double_LD:
+; CHECK-LABEL: conv_double_LD:
 ; CHECK: ld $25, %call16(__trunctfdf2)
 
 define double @conv_double_LD(fp128 %a) {
@@ -231,13 +231,13 @@ entry:
   ret double %conv
 }
 
-; CHECK: libcall1_fabsl:
-; CHECK: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
-; CHECK: daddiu  $[[R1:[0-9]+]], $zero, 1
-; CHECK: dsll    $[[R2:[0-9]+]], $[[R1]], 63
-; CHECK: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
-; CHECK: and     $4, $[[R0]], $[[R3]]
-; CHECK: ld      $2, 0($[[R4]])
+; CHECK-LABEL:             libcall1_fabsl:
+; CHECK-DAG: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
+; CHECK-DAG: daddiu  $[[R1:[0-9]+]], $zero, 1
+; CHECK-DAG: dsll    $[[R2:[0-9]+]], $[[R1]], 63
+; CHECK-DAG: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
+; CHECK-DAG: and     $4, $[[R0]], $[[R3]]
+; CHECK-DAG: ld      $2, 0($[[R4]])
 
 define fp128 @libcall1_fabsl() {
 entry:
@@ -248,7 +248,7 @@ entry:
 
 declare fp128 @fabsl(fp128) #1
 
-; CHECK: libcall1_ceill:
+; CHECK-LABEL: libcall1_ceill:
 ; CHECK: ld $25, %call16(ceill)
 
 define fp128 @libcall1_ceill() {
@@ -260,7 +260,7 @@ entry:
 
 declare fp128 @ceill(fp128) #1
 
-; CHECK: libcall1_sinl:
+; CHECK-LABEL: libcall1_sinl:
 ; CHECK: ld $25, %call16(sinl)
 
 define fp128 @libcall1_sinl() {
@@ -272,7 +272,7 @@ entry:
 
 declare fp128 @sinl(fp128) #2
 
-; CHECK: libcall1_cosl:
+; CHECK-LABEL: libcall1_cosl:
 ; CHECK: ld $25, %call16(cosl)
 
 define fp128 @libcall1_cosl() {
@@ -284,7 +284,7 @@ entry:
 
 declare fp128 @cosl(fp128) #2
 
-; CHECK: libcall1_expl:
+; CHECK-LABEL: libcall1_expl:
 ; CHECK: ld $25, %call16(expl)
 
 define fp128 @libcall1_expl() {
@@ -296,7 +296,7 @@ entry:
 
 declare fp128 @expl(fp128) #2
 
-; CHECK: libcall1_exp2l:
+; CHECK-LABEL: libcall1_exp2l:
 ; CHECK: ld $25, %call16(exp2l)
 
 define fp128 @libcall1_exp2l() {
@@ -308,7 +308,7 @@ entry:
 
 declare fp128 @exp2l(fp128) #2
 
-; CHECK: libcall1_logl:
+; CHECK-LABEL: libcall1_logl:
 ; CHECK: ld $25, %call16(logl)
 
 define fp128 @libcall1_logl() {
@@ -320,7 +320,7 @@ entry:
 
 declare fp128 @logl(fp128) #2
 
-; CHECK: libcall1_log2l:
+; CHECK-LABEL: libcall1_log2l:
 ; CHECK: ld $25, %call16(log2l)
 
 define fp128 @libcall1_log2l() {
@@ -332,7 +332,7 @@ entry:
 
 declare fp128 @log2l(fp128) #2
 
-; CHECK: libcall1_log10l:
+; CHECK-LABEL: libcall1_log10l:
 ; CHECK: ld $25, %call16(log10l)
 
 define fp128 @libcall1_log10l() {
@@ -344,7 +344,7 @@ entry:
 
 declare fp128 @log10l(fp128) #2
 
-; CHECK: libcall1_nearbyintl:
+; CHECK-LABEL: libcall1_nearbyintl:
 ; CHECK: ld $25, %call16(nearbyintl)
 
 define fp128 @libcall1_nearbyintl() {
@@ -356,7 +356,7 @@ entry:
 
 declare fp128 @nearbyintl(fp128) #1
 
-; CHECK: libcall1_floorl:
+; CHECK-LABEL: libcall1_floorl:
 ; CHECK: ld $25, %call16(floorl)
 
 define fp128 @libcall1_floorl() {
@@ -368,7 +368,7 @@ entry:
 
 declare fp128 @floorl(fp128) #1
 
-; CHECK: libcall1_sqrtl:
+; CHECK-LABEL: libcall1_sqrtl:
 ; CHECK: ld $25, %call16(sqrtl)
 
 define fp128 @libcall1_sqrtl() {
@@ -380,7 +380,7 @@ entry:
 
 declare fp128 @sqrtl(fp128) #2
 
-; CHECK: libcall1_rintl:
+; CHECK-LABEL: libcall1_rintl:
 ; CHECK: ld $25, %call16(rintl)
 
 define fp128 @libcall1_rintl() {
@@ -392,7 +392,7 @@ entry:
 
 declare fp128 @rintl(fp128) #1
 
-; CHECK: libcall_powil:
+; CHECK-LABEL: libcall_powil:
 ; CHECK: ld $25, %call16(__powitf2)
 
 define fp128 @libcall_powil(fp128 %a, i32 %b) {
@@ -403,18 +403,18 @@ entry:
 
 declare fp128 @llvm.powi.f128(fp128, i32) #3
 
-; CHECK: libcall2_copysignl:
-; CHECK: daddiu $[[R2:[0-9]+]], $zero, 1
-; CHECK: dsll   $[[R3:[0-9]+]], $[[R2]], 63
-; CHECK: ld     $[[R0:[0-9]+]], %got_disp(gld1)
-; CHECK: ld     $[[R1:[0-9]+]], 8($[[R0]])
-; CHECK: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
-; CHECK: ld     $[[R5:[0-9]+]], %got_disp(gld0)
-; CHECK: ld     $[[R6:[0-9]+]], 8($[[R5]])
-; CHECK: daddiu $[[R7:[0-9]+]], $[[R3]], -1
-; CHECK: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
-; CHECK: or     $4, $[[R8]], $[[R4]]
-; CHECK: ld     $2, 0($[[R5]])
+; CHECK-LABEL:     libcall2_copysignl:
+; CHECK-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
+; CHECK-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 63
+; CHECK-DAG: ld     $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK-DAG: ld     $[[R1:[0-9]+]], 8($[[R0]])
+; CHECK-DAG: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
+; CHECK-DAG: ld     $[[R5:[0-9]+]], %got_disp(gld0)
+; CHECK-DAG: ld     $[[R6:[0-9]+]], 8($[[R5]])
+; CHECK-DAG: daddiu $[[R7:[0-9]+]], $[[R3]], -1
+; CHECK-DAG: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
+; CHECK-DAG: or     $4, $[[R8]], $[[R4]]
+; CHECK-DAG: ld     $2, 0($[[R5]])
 
 define fp128 @libcall2_copysignl() {
 entry:
@@ -426,7 +426,7 @@ entry:
 
 declare fp128 @copysignl(fp128, fp128) #1
 
-; CHECK: libcall2_powl:
+; CHECK-LABEL: libcall2_powl:
 ; CHECK: ld $25, %call16(powl)
 
 define fp128 @libcall2_powl() {
@@ -439,7 +439,7 @@ entry:
 
 declare fp128 @powl(fp128, fp128) #2
 
-; CHECK: libcall2_fmodl:
+; CHECK-LABEL: libcall2_fmodl:
 ; CHECK: ld $25, %call16(fmodl)
 
 define fp128 @libcall2_fmodl() {
@@ -452,7 +452,7 @@ entry:
 
 declare fp128 @fmodl(fp128, fp128) #2
 
-; CHECK: libcall3_fmal:
+; CHECK-LABEL: libcall3_fmal:
 ; CHECK: ld $25, %call16(fmal)
 
 define fp128 @libcall3_fmal() {
@@ -466,7 +466,7 @@ entry:
 
 declare fp128 @llvm.fma.f128(fp128, fp128, fp128) #4
 
-; CHECK: cmp_lt:
+; CHECK-LABEL: cmp_lt:
 ; CHECK: ld $25, %call16(__lttf2)
 
 define i32 @cmp_lt(fp128 %a, fp128 %b) {
@@ -476,7 +476,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: cmp_le:
+; CHECK-LABEL: cmp_le:
 ; CHECK: ld $25, %call16(__letf2)
 
 define i32 @cmp_le(fp128 %a, fp128 %b) {
@@ -486,7 +486,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: cmp_gt:
+; CHECK-LABEL: cmp_gt:
 ; CHECK: ld $25, %call16(__gttf2)
 
 define i32 @cmp_gt(fp128 %a, fp128 %b) {
@@ -496,7 +496,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: cmp_ge:
+; CHECK-LABEL: cmp_ge:
 ; CHECK: ld $25, %call16(__getf2)
 
 define i32 @cmp_ge(fp128 %a, fp128 %b) {
@@ -506,7 +506,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: cmp_eq:
+; CHECK-LABEL: cmp_eq:
 ; CHECK: ld $25, %call16(__eqtf2)
 
 define i32 @cmp_eq(fp128 %a, fp128 %b) {
@@ -516,7 +516,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: cmp_ne:
+; CHECK-LABEL: cmp_ne:
 ; CHECK: ld $25, %call16(__netf2)
 
 define i32 @cmp_ne(fp128 %a, fp128 %b) {
@@ -526,7 +526,7 @@ entry:
   ret i32 %conv
 }
 
-; CHECK: load_LD_LD:
+; CHECK-LABEL: load_LD_LD:
 ; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
 ; CHECK: ld $2, 0($[[R0]])
 ; CHECK: ld $4, 8($[[R0]])
@@ -537,7 +537,7 @@ entry:
   ret fp128 %0
 }
 
-; CHECK: load_LD_float:
+; CHECK-LABEL: load_LD_float:
 ; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gf1)
 ; CHECK: lw   $4, 0($[[R0]])
 ; CHECK: ld   $25, %call16(__extendsftf2)
@@ -550,7 +550,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: load_LD_double:
+; CHECK-LABEL: load_LD_double:
 ; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gd1)
 ; CHECK: ld   $4, 0($[[R0]])
 ; CHECK: ld   $25, %call16(__extenddftf2)
@@ -563,7 +563,7 @@ entry:
   ret fp128 %conv
 }
 
-; CHECK: store_LD_LD:
+; CHECK-LABEL: store_LD_LD:
 ; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
 ; CHECK: ld $[[R1:[0-9]+]], 0($[[R0]])
 ; CHECK: ld $[[R2:[0-9]+]], 8($[[R0]])
@@ -578,7 +578,7 @@ entry:
   ret void
 }
 
-; CHECK: store_LD_float:
+; CHECK-LABEL: store_LD_float:
 ; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
 ; CHECK: ld   $4, 0($[[R0]])
 ; CHECK: ld   $5, 8($[[R0]])
@@ -595,7 +595,7 @@ entry:
   ret void
 }
 
-; CHECK: store_LD_double:
+; CHECK-LABEL: store_LD_double:
 ; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
 ; CHECK: ld   $4, 0($[[R0]])
 ; CHECK: ld   $5, 8($[[R0]])
@@ -612,7 +612,7 @@ entry:
   ret void
 }
 
-; CHECK: select_LD:
+; CHECK-LABEL: select_LD:
 ; CHECK: movn $8, $6, $4
 ; CHECK: movn $9, $7, $4
 ; CHECK: move $2, $8
@@ -625,7 +625,7 @@ entry:
   ret fp128 %cond
 }
 
-; CHECK: selectCC_LD:
+; CHECK-LABEL: selectCC_LD:
 ; CHECK: move $[[R0:[0-9]+]], $11
 ; CHECK: move $[[R1:[0-9]+]], $10
 ; CHECK: move $[[R2:[0-9]+]], $9
diff --git a/test/CodeGen/Mips/mips64-libcall.ll b/test/CodeGen/Mips/mips64-libcall.ll
index d54598be70d8..290baafd18b9 100644
--- a/test/CodeGen/Mips/mips64-libcall.ll
+++ b/test/CodeGen/Mips/mips64-libcall.ll
@@ -5,7 +5,7 @@
 
 ; Check that %add is not passed in an integer register.
 ;
-; HARD: callfloor:
+; HARD-LABEL: callfloor:
 ; HARD-NOT: dmfc1 $4
 
 define double @callfloor(double %d) nounwind readnone {
@@ -19,7 +19,7 @@ declare double @floor(double) nounwind readnone
 
 ; Check call16.
 ;
-; SOFT: f64add:
+; SOFT-LABEL: f64add:
 ; SOFT: ld $25, %call16(__adddf3)
 
 define double @f64add(double %a, double %b) {
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index 041831149057..2894d698adcc 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,4 +1,7 @@
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck %s
+
+@gll0 = common global i64 0, align 8
+@gll1 = common global i64 0, align 8
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
@@ -86,23 +89,33 @@ entry:
 
 define i64 @f14(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: ddiv $zero
+; CHECK-LABEL: f14:
+; CHECK: ddiv $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; CHECK: teq $[[R0]], $zero, 7
 ; CHECK: mflo
-  %div = sdiv i64 %a, %b
+  %0 = load i64* @gll0, align 8
+  %1 = load i64* @gll1, align 8
+  %div = sdiv i64 %0, %1
   ret i64 %div
 }
 
-define i64 @f15(i64 %a, i64 %b) nounwind readnone {
+define i64 @f15() nounwind readnone {
 entry:
-; CHECK: ddivu $zero
+; CHECK-LABEL: f15:
+; CHECK: ddivu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; CHECK: teq $[[R0]], $zero, 7
 ; CHECK: mflo
-  %div = udiv i64 %a, %b
+  %0 = load i64* @gll0, align 8
+  %1 = load i64* @gll1, align 8
+  %div = udiv i64 %0, %1
   ret i64 %div
 }
 
 define i64 @f16(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: ddiv $zero
+; CHECK-LABEL: f16:
+; CHECK: ddiv $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; CHECK: teq $[[R0]], $zero, 7
 ; CHECK: mfhi
   %rem = srem i64 %a, %b
   ret i64 %rem
@@ -110,7 +123,9 @@ entry:
 
 define i64 @f17(i64 %a, i64 %b) nounwind readnone {
 entry:
-; CHECK: ddivu $zero
+; CHECK-LABEL: f17:
+; CHECK: ddivu $zero, ${{[0-9]+}}, $[[R0:[0-9]+]]
+; CHECK: teq $[[R0]], $zero, 7
 ; CHECK: mfhi
   %rem = urem i64 %a, %b
   ret i64 %rem
@@ -140,4 +155,3 @@ entry:
   %neg = xor i64 %or, -1
   ret i64 %neg
 }
-
diff --git a/test/CodeGen/Mips/misha.ll b/test/CodeGen/Mips/misha.ll
index 80637edb1674..65d3b7b5d874 100644
--- a/test/CodeGen/Mips/misha.ll
+++ b/test/CodeGen/Mips/misha.ll
@@ -25,10 +25,10 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %inc = add nsw i32 %i.010, 1
   %cmp = icmp eq i32 %inc, %conv
   br i1 %cmp, label %for.end, label %for.body
-; 16: sumc:
+; 16-LABEL: sumc:
 ; 16: 	lbu	${{[0-9]+}}, 0(${{[0-9]+}})
 ; 16: 	lbu	${{[0-9]+}}, 0(${{[0-9]+}})
-; 16: sum:
+; 16-LABEL: sum:
 ; 16: 	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
 ; 16: 	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
 
diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
new file mode 100644
index 000000000000..f4854f880542
--- /dev/null
+++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@@ -0,0 +1,93 @@
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 -mcpu=mips32r2 \
+; RUN: < %s | FileCheck %s -check-prefix=LE-PIC
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \
+; RUN: FileCheck %s -check-prefix=LE-STATIC
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \
+; RUN: FileCheck %s -check-prefix=BE-PIC
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-LDC1-SDC1
+
+@g0 = common global double 0.000000e+00, align 8
+
+; LE-PIC-LABEL: test_ldc1:
+; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; LE-PIC-DAG: mtc1 $[[R0]], $f0
+; LE-PIC-DAG: mtc1 $[[R1]], $f1
+; LE-STATIC-LABEL: test_ldc1:
+; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; LE-STATIC-DAG: mtc1 $[[R3]], $f1
+; BE-PIC-LABEL: test_ldc1:
+; BE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; BE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; BE-PIC-DAG: mtc1 $[[R1]], $f0
+; BE-PIC-DAG: mtc1 $[[R0]], $f1
+; CHECK-LDC1-SDC1-LABEL: test_ldc1:
+; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}}
+
+define double @test_ldc1() {
+entry:
+  %0 = load double* @g0, align 8
+  ret double %0
+}
+
+; LE-PIC-LABEL: test_sdc1:
+; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
+; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
+; LE-STATIC-LABEL: test_sdc1:
+; LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
+; BE-PIC-LABEL: test_sdc1:
+; BE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; BE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; BE-PIC-DAG: sw $[[R1]], 0(${{[0-9]+}})
+; BE-PIC-DAG: sw $[[R0]], 4(${{[0-9]+}})
+; CHECK-LDC1-SDC1-LABEL: test_sdc1:
+; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}}
+
+define void @test_sdc1(double %a) {
+entry:
+  store double %a, double* @g0, align 8
+  ret void
+}
+
+
+; LE-PIC-LABEL: test_ldxc1:
+; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; LE-PIC-DAG: mtc1 $[[R0]], $f0
+; LE-PIC-DAG: mtc1 $[[R1]], $f1
+; CHECK-LDC1-SDC1-LABEL: test_ldxc1:
+; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}}
+
+define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
+entry:
+  %arrayidx = getelementptr inbounds double* %a, i32 %i
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+; LE-PIC-LABEL: test_sdxc1:
+; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
+; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
+; CHECK-LDC1-SDC1-LABEL: test_sdxc1:
+; CHECK-LDC1-SDC1: sdxc1 $f{{[0-9]+}}
+
+define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) {
+entry:
+  %arrayidx = getelementptr inbounds double* %a, i32 %i
+  store double %b, double* %arrayidx, align 8
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/2r.ll b/test/CodeGen/Mips/msa/2r.ll
new file mode 100644
index 000000000000..da35ad82cad1
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2r.ll
@@ -0,0 +1,257 @@
+; Test the MSA intrinsics that are encoded with the 2R instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_nloc_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nloc_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nloc_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nloc_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.nloc.b(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_nloc_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.nloc.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_nloc_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_b_ARG1)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.b [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_b_test
+;
+@llvm_mips_nloc_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_nloc_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_nloc_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_nloc_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.nloc.h(<8 x i16> %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_nloc_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.nloc.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_nloc_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_h_ARG1)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.h [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_h_test
+;
+@llvm_mips_nloc_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_nloc_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_nloc_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_nloc_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.nloc.w(<4 x i32> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_nloc_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.nloc.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_nloc_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_w_test
+;
+@llvm_mips_nloc_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_nloc_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_nloc_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_nloc_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.nloc.d(<2 x i64> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_nloc_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.nloc.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_nloc_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nloc_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nloc.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nloc_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nloc_d_test
+;
+@llvm_mips_nlzc_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nlzc_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nlzc_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nlzc_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.nlzc.b(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_nlzc_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.nlzc.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_nlzc_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_b_ARG1)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.b [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_b_test
+;
+@llvm_mips_nlzc_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_nlzc_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_nlzc_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_nlzc_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.nlzc.h(<8 x i16> %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_nlzc_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.nlzc.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_nlzc_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_h_ARG1)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.h [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_h_test
+;
+@llvm_mips_nlzc_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_nlzc_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_nlzc_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_nlzc_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.nlzc.w(<4 x i32> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_nlzc_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.nlzc.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_nlzc_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_w_test
+;
+@llvm_mips_nlzc_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_nlzc_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_nlzc_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_nlzc_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.nlzc.d(<2 x i64> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_nlzc_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.nlzc.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_nlzc_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_nlzc_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: nlzc.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_nlzc_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_nlzc_d_test
+;
+@llvm_mips_pcnt_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_pcnt_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_pcnt_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_pcnt_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.pcnt.b(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_pcnt_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.pcnt.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_pcnt_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_b_ARG1)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.b [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_b_test
+;
+@llvm_mips_pcnt_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_pcnt_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_pcnt_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_pcnt_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.pcnt.h(<8 x i16> %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_pcnt_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.pcnt.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_pcnt_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_h_ARG1)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.h [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_h_test
+;
+@llvm_mips_pcnt_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_pcnt_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_pcnt_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_pcnt_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.pcnt.w(<4 x i32> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_pcnt_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.pcnt.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_pcnt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_w_test
+;
+@llvm_mips_pcnt_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_pcnt_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_pcnt_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_pcnt_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.pcnt.d(<2 x i64> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_pcnt_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.pcnt.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_pcnt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_pcnt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: pcnt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_pcnt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_pcnt_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2r_vector_scalar.ll b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
new file mode 100644
index 000000000000..6f6e1b9ce2f8
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
@@ -0,0 +1,87 @@
+; Test the MSA intrinsics that are encoded with the 2R instruction format and
+; convert scalars to vectors.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fill_b_ARG1 = global i32 23, align 16
+@llvm_mips_fill_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_fill_b_test() nounwind {
+entry:
+  %0 = load i32* @llvm_mips_fill_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.fill.b(i32 %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_fill_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.fill.b(i32) nounwind
+
+; CHECK: llvm_mips_fill_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]],
+; CHECK-DAG: fill.b [[R2:\$w[0-9]+]], [[R1]]
+; CHECK-DAG: st.b [[R2]],
+; CHECK: .size llvm_mips_fill_b_test
+;
+@llvm_mips_fill_h_ARG1 = global i32 23, align 16
+@llvm_mips_fill_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_fill_h_test() nounwind {
+entry:
+  %0 = load i32* @llvm_mips_fill_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.fill.h(i32 %0)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_fill_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.fill.h(i32) nounwind
+
+; CHECK: llvm_mips_fill_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]],
+; CHECK-DAG: fill.h [[R2:\$w[0-9]+]], [[R1]]
+; CHECK-DAG: st.h [[R2]],
+; CHECK: .size llvm_mips_fill_h_test
+;
+@llvm_mips_fill_w_ARG1 = global i32 23, align 16
+@llvm_mips_fill_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fill_w_test() nounwind {
+entry:
+  %0 = load i32* @llvm_mips_fill_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.fill.w(i32 %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_fill_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fill.w(i32) nounwind
+
+; CHECK: llvm_mips_fill_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]],
+; CHECK-DAG: fill.w [[R2:\$w[0-9]+]], [[R1]]
+; CHECK-DAG: st.w [[R2]],
+; CHECK: .size llvm_mips_fill_w_test
+;
+@llvm_mips_fill_d_ARG1 = global i64 23, align 16
+@llvm_mips_fill_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fill_d_test() nounwind {
+entry:
+  %0 = load i64* @llvm_mips_fill_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.fill.d(i64 %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_fill_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fill.d(i64) nounwind
+
+; CHECK: llvm_mips_fill_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], 4(
+; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 0
+; CHECK-DAG: insert.w [[R3]][0], [[R1]]
+; CHECK-DAG: insert.w [[R3]][1], [[R2]]
+; CHECK-DAG: insert.w [[R3]][2], [[R1]]
+; CHECK-DAG: insert.w [[R3]][3], [[R2]]
+; CHECK-DAG: st.w [[R3]],
+; CHECK: .size llvm_mips_fill_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf.ll b/test/CodeGen/Mips/msa/2rf.ll
new file mode 100644
index 000000000000..b361ef5eae21
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf.ll
@@ -0,0 +1,323 @@
+; Test the MSA intrinsics that are encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_flog2_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_flog2_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_flog2_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.flog2.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.flog2.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_flog2_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_flog2_w_test
+;
+@llvm_mips_flog2_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_flog2_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_flog2_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.flog2.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.flog2.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_flog2_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_flog2_d_test
+
+define void @flog2_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+  %1 = tail call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float> %val)
+
+; CHECK: flog2_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size flog2_w_test
+
+define void @flog2_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+  %1 = tail call <2 x double> @llvm.log2.v2f64(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.log2.v2f64(<2 x double> %val)
+
+; CHECK: flog2_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_flog2_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: flog2.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_flog2_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size flog2_d_test
+;
+@llvm_mips_frint_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_frint_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_frint_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.frint.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.frint.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_frint_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frint_w_test
+;
+@llvm_mips_frint_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_frint_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_frint_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.frint.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.frint.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_frint_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frint_d_test
+
+define void @frint_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+  %1 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) nounwind
+
+; CHECK: frint_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size frint_w_test
+
+define void @frint_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+  %1 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.rint.v2f64(<2 x double>) nounwind
+
+; CHECK: frint_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frint_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frint.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frint_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size frint_d_test
+;
+@llvm_mips_frcp_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_frcp_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_frcp_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frcp_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.frcp.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frcp_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.frcp.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_frcp_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frcp_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frcp.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frcp_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frcp_w_test
+;
+@llvm_mips_frcp_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_frcp_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_frcp_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frcp_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.frcp.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frcp_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.frcp.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_frcp_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frcp_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frcp.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frcp_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frcp_d_test
+;
+@llvm_mips_frsqrt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_frsqrt_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_frsqrt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_frsqrt_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.frsqrt.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_frsqrt_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.frsqrt.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_frsqrt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frsqrt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frsqrt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frsqrt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frsqrt_w_test
+;
+@llvm_mips_frsqrt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_frsqrt_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_frsqrt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_frsqrt_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.frsqrt.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_frsqrt_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.frsqrt.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_frsqrt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_frsqrt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: frsqrt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_frsqrt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_frsqrt_d_test
+;
+@llvm_mips_fsqrt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsqrt_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fsqrt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.fsqrt.w(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fsqrt.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fsqrt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fsqrt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fsqrt_w_test
+;
+@llvm_mips_fsqrt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsqrt_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fsqrt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.fsqrt.d(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fsqrt.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_fsqrt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fsqrt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fsqrt_d_test
+
+define void @fsqrt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+  %1 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind
+
+; CHECK: fsqrt_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size fsqrt_w_test
+
+define void @fsqrt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+  %1 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind
+
+; CHECK: fsqrt_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fsqrt_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fsqrt.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fsqrt_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size fsqrt_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_exup.ll b/test/CodeGen/Mips/msa/2rf_exup.ll
new file mode 100644
index 000000000000..8d7cc367040a
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_exup.ll
@@ -0,0 +1,82 @@
+; Test the MSA floating point conversion intrinsics (e.g. float->double) that
+; are encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fexupl_w_ARG1 = global <8 x half> <half 0.000000e+00, half 1.000000e+00, half 2.000000e+00, half 3.000000e+00, half 4.000000e+00, half 5.000000e+00, half 6.000000e+00, half 7.000000e+00>, align 16
+@llvm_mips_fexupl_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupl_w_test() nounwind {
+entry:
+  %0 = load <8 x half>* @llvm_mips_fexupl_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.fexupl.w(<8 x half> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fexupl_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexupl.w(<8 x half>) nounwind
+
+; CHECK: llvm_mips_fexupl_w_test:
+; CHECK: ld.h
+; CHECK: fexupl.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexupl_w_test
+;
+@llvm_mips_fexupl_d_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexupl_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupl_d_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexupl_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.fexupl.d(<4 x float> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fexupl_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fexupl.d(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fexupl_d_test:
+; CHECK: ld.w
+; CHECK: fexupl.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fexupl_d_test
+;
+@llvm_mips_fexupr_w_ARG1 = global <8 x half> <half 0.000000e+00, half 1.000000e+00, half 2.000000e+00, half 3.000000e+00, half 4.000000e+00, half 5.000000e+00, half 6.000000e+00, half 7.000000e+00>, align 16
+@llvm_mips_fexupr_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupr_w_test() nounwind {
+entry:
+  %0 = load <8 x half>* @llvm_mips_fexupr_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.fexupr.w(<8 x half> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_fexupr_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexupr.w(<8 x half>) nounwind
+
+; CHECK: llvm_mips_fexupr_w_test:
+; CHECK: ld.h
+; CHECK: fexupr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexupr_w_test
+;
+@llvm_mips_fexupr_d_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexupr_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fexupr_d_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexupr_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.fexupr.d(<4 x float> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_fexupr_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fexupr.d(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fexupr_d_test:
+; CHECK: ld.w
+; CHECK: fexupr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fexupr_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_float_int.ll b/test/CodeGen/Mips/msa/2rf_float_int.ll
new file mode 100644
index 000000000000..3b5dfda2d1e6
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_float_int.ll
@@ -0,0 +1,90 @@
+; Test the MSA integer to floating point conversion intrinsics that are encoded
+; with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ffint_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffint_s_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffint_s_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffint.s.w(<4 x i32> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffint_s_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffint.s.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffint_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_s_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_s.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_s_w_test
+;
+@llvm_mips_ffint_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ffint_s_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ffint_s_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffint.s.d(<2 x i64> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffint_s_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffint.s.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_ffint_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_s_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_s.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_s_d_test
+;
+@llvm_mips_ffint_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffint_u_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffint_u_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffint.u.w(<4 x i32> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffint_u_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffint.u.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffint_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_u_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_u.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_u_w_test
+;
+@llvm_mips_ffint_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ffint_u_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffint_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ffint_u_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffint.u.d(<2 x i64> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffint_u_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffint.u.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_ffint_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ffint_u_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ffint_u.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ffint_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ffint_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_fq.ll b/test/CodeGen/Mips/msa/2rf_fq.ll
new file mode 100644
index 000000000000..021dd937fad3
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_fq.ll
@@ -0,0 +1,82 @@
+; Test the MSA fixed-point to floating point conversion intrinsics that are
+; encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ffql_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ffql_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffql_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ffql_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffql.w(<8 x i16> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffql_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffql.w(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_ffql_w_test:
+; CHECK: ld.h
+; CHECK: ffql.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ffql_w_test
+;
+@llvm_mips_ffql_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffql_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffql_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffql_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffql.d(<4 x i32> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffql_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffql.d(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffql_d_test:
+; CHECK: ld.w
+; CHECK: ffql.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ffql_d_test
+;
+@llvm_mips_ffqr_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ffqr_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_ffqr_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ffqr_w_ARG1
+  %1 = tail call <4 x float> @llvm.mips.ffqr.w(<8 x i16> %0)
+  store <4 x float> %1, <4 x float>* @llvm_mips_ffqr_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.ffqr.w(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_ffqr_w_test:
+; CHECK: ld.h
+; CHECK: ffqr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ffqr_w_test
+;
+@llvm_mips_ffqr_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ffqr_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_ffqr_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ffqr_d_ARG1
+  %1 = tail call <2 x double> @llvm.mips.ffqr.d(<4 x i32> %0)
+  store <2 x double> %1, <2 x double>* @llvm_mips_ffqr_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.ffqr.d(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_ffqr_d_test:
+; CHECK: ld.w
+; CHECK: ffqr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ffqr_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_int_float.ll b/test/CodeGen/Mips/msa/2rf_int_float.ll
new file mode 100644
index 000000000000..4665ae066a4f
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_int_float.ll
@@ -0,0 +1,217 @@
+; Test the MSA floating point to integer intrinsics that are encoded with the
+; 2RF instruction format. This includes conversions but other instructions such
+; as fclass are also here.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fclass_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fclass_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fclass_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fclass_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.fclass.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_fclass_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fclass.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_fclass_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fclass_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fclass.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fclass_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fclass_w_test
+;
+@llvm_mips_fclass_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fclass_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fclass_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fclass_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.fclass.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_fclass_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fclass.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_fclass_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_fclass_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: fclass.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_fclass_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_fclass_d_test
+;
+@llvm_mips_ftrunc_s_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftrunc_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftrunc_s_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftrunc_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftrunc.s.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftrunc.s.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftrunc_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_s_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_s.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_s_w_test
+;
+@llvm_mips_ftrunc_s_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftrunc_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftrunc_s_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftrunc_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftrunc.s.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftrunc.s.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftrunc_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_s_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_s.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_s_d_test
+;
+@llvm_mips_ftrunc_u_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftrunc_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftrunc_u_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftrunc_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftrunc.u.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftrunc.u.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftrunc_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_u_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_u.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_u_w_test
+;
+@llvm_mips_ftrunc_u_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftrunc_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftrunc_u_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftrunc_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftrunc.u.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftrunc.u.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftrunc_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftrunc_u_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftrunc_u.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftrunc_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftrunc_u_d_test
+;
+@llvm_mips_ftint_s_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftint_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftint_s_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftint_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftint.s.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftint.s.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftint_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_s_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_s.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_s_w_test
+;
+@llvm_mips_ftint_s_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftint_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftint_s_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftint_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftint.s.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftint.s.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftint_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_s_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_s.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_s_d_test
+;
+@llvm_mips_ftint_u_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftint_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftint_u_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftint_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ftint.u.w(<4 x float> %0)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftint.u.w(<4 x float>) nounwind
+
+; CHECK: llvm_mips_ftint_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_u_w_ARG1)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_u.w [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_u_w_test
+;
+@llvm_mips_ftint_u_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftint_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ftint_u_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftint_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ftint.u.d(<2 x double> %0)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ftint.u.d(<2 x double>) nounwind
+
+; CHECK: llvm_mips_ftint_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ftint_u_d_ARG1)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ftint_u.d [[WD:\$w[0-9]+]], [[WS]]
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ftint_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R2]])
+; CHECK: .size llvm_mips_ftint_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/2rf_tq.ll b/test/CodeGen/Mips/msa/2rf_tq.ll
new file mode 100644
index 000000000000..6f3c508f5b8c
--- /dev/null
+++ b/test/CodeGen/Mips/msa/2rf_tq.ll
@@ -0,0 +1,50 @@
+; Test the MSA floating-point to fixed-point conversion intrinsics that are
+; encoded with the 2RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ftq_h_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_ftq_h_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_ftq_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ftq_h_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_ftq_h_ARG1
+  %1 = load <4 x float>* @llvm_mips_ftq_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ftq.h(<4 x float> %0, <4 x float> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ftq_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ftq.h(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_ftq_h_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ftq.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ftq_h_test
+;
+@llvm_mips_ftq_w_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_ftq_w_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_ftq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ftq_w_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_ftq_w_ARG1
+  %1 = load <2 x double>* @llvm_mips_ftq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ftq.w(<2 x double> %0, <2 x double> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ftq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ftq.w(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_ftq_w_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ftq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ftq_w_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-a.ll b/test/CodeGen/Mips/msa/3r-a.ll
new file mode 100644
index 000000000000..dab15b66b7ce
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-a.ll
@@ -0,0 +1,1191 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'a'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+; It should fail to compile without fp64.
+; RUN: not llc -march=mips -mattr=+msa < %s 2>&1 | \
+; RUN:    FileCheck -check-prefix=FP32ERROR %s
+; FP32ERROR: LLVM ERROR: MSA requires a 64-bit FPU register file (FR=1 mode).
+
+@llvm_mips_add_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_add_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_add_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_add_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_add_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_add_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.add.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_add_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.add.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_add_a_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_b_test
+;
+@llvm_mips_add_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_add_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_add_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_add_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_add_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_add_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.add.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_add_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.add.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_add_a_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_h_test
+;
+@llvm_mips_add_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_add_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_add_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_add_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_add_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_add_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.add.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_add_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.add.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_add_a_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_w_test
+;
+@llvm_mips_add_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_add_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_add_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_add_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_add_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_add_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.add.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_add_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.add.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_add_a_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_add_a_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_add_a_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: add_a.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_add_a_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_add_a_d_test
+;
+@llvm_mips_adds_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_adds_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_adds_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_adds_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_adds_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_adds_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.adds.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.adds.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_adds_a_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_b_test
+;
+@llvm_mips_adds_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_adds_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_adds_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_adds_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_adds_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_adds_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.adds.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.adds.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_adds_a_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_h_test
+;
+@llvm_mips_adds_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_adds_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_adds_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_adds_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_adds_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_adds_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.adds.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.adds.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_adds_a_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_w_test
+;
+@llvm_mips_adds_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_adds_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_adds_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_adds_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_adds_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_adds_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.adds.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.adds.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_adds_a_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_a_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_a_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_a.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_a_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_a_d_test
+;
+@llvm_mips_adds_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_adds_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_adds_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_adds_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_adds_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_adds_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.adds.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.adds.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_adds_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_b_test
+;
+@llvm_mips_adds_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_adds_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_adds_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_adds_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_adds_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_adds_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.adds.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.adds.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_adds_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_h_test
+;
+@llvm_mips_adds_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_adds_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_adds_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_adds_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_adds_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_adds_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.adds.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.adds.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_adds_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_w_test
+;
+@llvm_mips_adds_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_adds_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_adds_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_adds_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_adds_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_adds_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.adds.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.adds.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_adds_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_s_d_test
+;
+@llvm_mips_adds_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_adds_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_adds_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_adds_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_adds_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_adds_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.adds.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.adds.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_adds_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_b_test
+;
+@llvm_mips_adds_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_adds_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_adds_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_adds_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_adds_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_adds_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.adds.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.adds.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_adds_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_h_test
+;
+@llvm_mips_adds_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_adds_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_adds_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_adds_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_adds_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_adds_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.adds.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.adds.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_adds_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_w_test
+;
+@llvm_mips_adds_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_adds_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_adds_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_adds_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_adds_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_adds_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.adds.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.adds.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_adds_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_adds_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_adds_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: adds_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_adds_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_adds_u_d_test
+;
+@llvm_mips_addv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_addv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_addv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_addv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_addv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_b_test
+;
+@llvm_mips_addv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_addv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_addv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_addv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_addv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_h_test
+;
+@llvm_mips_addv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_addv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_addv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_addv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_addv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_w_test
+;
+@llvm_mips_addv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_addv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_addv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_addv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_addv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_addv_d_test
+;
+
+define void @addv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+  %2 = add <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
+  ret void
+}
+
+; CHECK: addv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size addv_b_test
+;
+
+define void @addv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+  %2 = add <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
+  ret void
+}
+
+; CHECK: addv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size addv_h_test
+;
+
+define void @addv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+  %2 = add <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
+  ret void
+}
+
+; CHECK: addv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size addv_w_test
+;
+
+define void @addv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+  %2 = add <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
+  ret void
+}
+
+; CHECK: addv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_addv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_addv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: addv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_addv_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size addv_d_test
+;
+@llvm_mips_asub_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_asub_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_asub_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_asub_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_asub_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_asub_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.asub.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.asub.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_asub_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_b_test
+;
+@llvm_mips_asub_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_asub_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_asub_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_asub_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_asub_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_asub_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.asub.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.asub.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_asub_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_h_test
+;
+@llvm_mips_asub_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_asub_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_asub_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_asub_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_asub_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_asub_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.asub.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.asub.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_asub_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_w_test
+;
+@llvm_mips_asub_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_asub_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_asub_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_asub_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_asub_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_asub_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.asub.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.asub.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_asub_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_s_d_test
+;
+@llvm_mips_asub_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_asub_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_asub_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_asub_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_asub_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_asub_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.asub.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.asub.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_asub_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_b_test
+;
+@llvm_mips_asub_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_asub_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_asub_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_asub_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_asub_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_asub_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.asub.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.asub.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_asub_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_h_test
+;
+@llvm_mips_asub_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_asub_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_asub_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_asub_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_asub_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_asub_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.asub.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.asub.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_asub_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_w_test
+;
+@llvm_mips_asub_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_asub_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_asub_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_asub_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_asub_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_asub_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.asub.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.asub.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_asub_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_asub_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_asub_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: asub_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_asub_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_asub_u_d_test
+;
+@llvm_mips_ave_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ave_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ave_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ave_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ave_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ave_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ave.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ave.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ave_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_b_test
+;
+@llvm_mips_ave_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ave_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ave_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ave_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ave_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ave_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ave.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ave.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ave_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_h_test
+;
+@llvm_mips_ave_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ave_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ave_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ave_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ave_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ave_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ave.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ave.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ave_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_w_test
+;
+@llvm_mips_ave_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ave_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ave_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ave_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ave_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ave_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ave.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ave.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ave_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_s_d_test
+;
+@llvm_mips_ave_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ave_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ave_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ave_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ave_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ave_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ave.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ave.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ave_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_b_test
+;
+@llvm_mips_ave_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ave_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ave_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ave_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ave_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ave_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ave.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ave.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ave_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_h_test
+;
+@llvm_mips_ave_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ave_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ave_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ave_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ave_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ave_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ave.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ave.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ave_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_w_test
+;
+@llvm_mips_ave_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ave_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ave_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ave_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ave_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ave_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ave.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ave.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ave_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_ave_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_ave_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ave_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_ave_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_ave_u_d_test
+;
+@llvm_mips_aver_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_aver_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_aver_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_aver_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_aver_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_aver_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.aver.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.aver.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_aver_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_b_test
+;
+@llvm_mips_aver_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_aver_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_aver_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_aver_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_aver_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_aver_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.aver.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.aver.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_aver_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_h_test
+;
+@llvm_mips_aver_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_aver_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_aver_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_aver_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_aver_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_aver_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.aver.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.aver.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_aver_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_w_test
+;
+@llvm_mips_aver_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_aver_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_aver_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_aver_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_aver_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_aver_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.aver.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.aver.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_aver_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_s_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_s_d_test
+;
+@llvm_mips_aver_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_aver_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_aver_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_aver_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_aver_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_aver_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.aver.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.aver.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_aver_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_b_RES)
+; CHECK-DAG: st.b [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_b_test
+;
+@llvm_mips_aver_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_aver_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_aver_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_aver_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_aver_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_aver_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.aver.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.aver.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_aver_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_h_RES)
+; CHECK-DAG: st.h [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_h_test
+;
+@llvm_mips_aver_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_aver_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_aver_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_aver_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_aver_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_aver_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.aver.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.aver.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_aver_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_w_RES)
+; CHECK-DAG: st.w [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_w_test
+;
+@llvm_mips_aver_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_aver_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_aver_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_aver_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_aver_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_aver_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.aver.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.aver.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_aver_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_aver_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_aver_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: aver_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_aver_u_d_RES)
+; CHECK-DAG: st.d [[WD]], 0([[R3]])
+; CHECK: .size llvm_mips_aver_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-b.ll b/test/CodeGen/Mips/msa/3r-b.ll
new file mode 100644
index 000000000000..a05d19b4d490
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-b.ll
@@ -0,0 +1,494 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'b'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bclr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bclr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bclr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bclr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bclr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bclr_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bclr.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bclr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bclr.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_bclr_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: bclr.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bclr_b_test
+;
+@llvm_mips_bclr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bclr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bclr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bclr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bclr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bclr_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.bclr.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_bclr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bclr.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_bclr_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: bclr.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bclr_h_test
+;
+@llvm_mips_bclr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bclr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bclr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bclr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bclr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bclr_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_bclr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_bclr_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: bclr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bclr_w_test
+;
+@llvm_mips_bclr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bclr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bclr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bclr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bclr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bclr_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.bclr.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_bclr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bclr.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_bclr_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: bclr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bclr_d_test
+
+@llvm_mips_binsl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsl_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsl_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_binsl_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_binsl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsl_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_binsl_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.binsl.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_binsl_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsl.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_binsl_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_b_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_b_ARG3)(
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.b [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.b [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_b_test
+
+@llvm_mips_binsl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsl_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsl_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_binsl_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_binsl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsl_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_binsl_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.binsl.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_binsl_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsl.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_binsl_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_h_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_h_ARG3)(
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.h [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.h [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.h [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_h_test
+
+@llvm_mips_binsl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsl_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsl_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_binsl_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_binsl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsl_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_binsl_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.binsl.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_binsl_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsl.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_binsl_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_w_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_w_ARG3)(
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.w [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.w [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.w [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_w_test
+
+@llvm_mips_binsl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsl_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsl_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_binsl_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_binsl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsl_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_binsl_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.binsl.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_binsl_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsl.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_binsl_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsl_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsl_d_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsl_d_ARG3)(
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.d [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsl.d [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.d [[R4]], 0(
+; CHECK: .size llvm_mips_binsl_d_test
+
+@llvm_mips_binsr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsr_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsr_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_binsr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_binsr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsr_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_binsr_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.binsr.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_binsr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsr.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_binsr_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_b_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_b_ARG3)(
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.b [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.b [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_b_test
+
+@llvm_mips_binsr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsr_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsr_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_binsr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_binsr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsr_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_binsr_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.binsr.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_binsr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsr.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_binsr_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_h_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_h_ARG3)(
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.h [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.h [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.h [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_h_test
+
+@llvm_mips_binsr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsr_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsr_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_binsr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_binsr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsr_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_binsr_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.binsr.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_binsr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsr.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_binsr_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_w_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_w_ARG3)(
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.w [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.w [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.w [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_w_test
+
+@llvm_mips_binsr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsr_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsr_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_binsr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_binsr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsr_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_binsr_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.binsr.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_binsr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsr.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_binsr_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsr_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsr_d_ARG2)(
+; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_binsr_d_ARG3)(
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R5:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: ld.d [[R6:\$w[0-9]+]], 0([[R3]])
+; CHECK-DAG: binsr.d [[R4]], [[R5]], [[R6]]
+; CHECK-DAG: st.d [[R4]], 0(
+; CHECK: .size llvm_mips_binsr_d_test
+
+@llvm_mips_bneg_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bneg_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bneg_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bneg_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bneg_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bneg_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bneg.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bneg_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bneg.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_bneg_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: bneg.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bneg_b_test
+;
+@llvm_mips_bneg_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bneg_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bneg_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bneg_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bneg_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bneg_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.bneg.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_bneg_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bneg.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_bneg_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: bneg.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bneg_h_test
+;
+@llvm_mips_bneg_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bneg_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bneg_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bneg_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bneg_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bneg_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_bneg_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_bneg_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: bneg.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bneg_w_test
+;
+@llvm_mips_bneg_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bneg_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bneg_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bneg_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bneg_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bneg_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.bneg.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_bneg_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bneg.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_bneg_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: bneg.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bneg_d_test
+;
+@llvm_mips_bset_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bset_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bset_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bset_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bset_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bset_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bset.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bset_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bset.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_bset_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: bset.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bset_b_test
+;
+@llvm_mips_bset_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bset_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bset_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bset_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bset_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bset_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.bset.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_bset_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bset.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_bset_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: bset.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bset_h_test
+;
+@llvm_mips_bset_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bset_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bset_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bset_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bset_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bset_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_bset_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_bset_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: bset.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bset_w_test
+;
+@llvm_mips_bset_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bset_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bset_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bset_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bset_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bset_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.bset.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_bset_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bset.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_bset_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: bset.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bset_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-c.ll b/test/CodeGen/Mips/msa/3r-c.ll
new file mode 100644
index 000000000000..6ec92c284fec
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-c.ll
@@ -0,0 +1,446 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'c'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ceq_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ceq_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ceq_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ceq_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ceq_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ceq_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ceq.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ceq_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ceq.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ceq_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ceq.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ceq_b_test
+;
+@llvm_mips_ceq_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ceq_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ceq_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ceq_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ceq_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ceq_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ceq.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ceq_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ceq.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ceq_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ceq.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ceq_h_test
+;
+@llvm_mips_ceq_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ceq_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ceq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ceq_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ceq_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ceq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ceq.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ceq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ceq.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ceq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ceq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ceq_w_test
+;
+@llvm_mips_ceq_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ceq_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ceq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ceq_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ceq_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ceq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ceq.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ceq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ceq.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ceq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ceq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ceq_d_test
+;
+@llvm_mips_cle_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_cle_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_cle_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_cle_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_cle_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_cle_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.cle.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.cle.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_cle_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: cle_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_cle_s_b_test
+;
+@llvm_mips_cle_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_cle_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_cle_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_cle_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_cle_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_cle_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.cle.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.cle.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_cle_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: cle_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_cle_s_h_test
+;
+@llvm_mips_cle_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_cle_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_cle_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_cle_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_cle_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_cle_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.cle.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.cle.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_cle_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: cle_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_cle_s_w_test
+;
+@llvm_mips_cle_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_cle_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_cle_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_cle_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_cle_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_cle_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.cle.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.cle.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_cle_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: cle_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_cle_s_d_test
+;
+@llvm_mips_cle_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_cle_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_cle_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_cle_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_cle_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_cle_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.cle.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.cle.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_cle_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: cle_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_cle_u_b_test
+;
+@llvm_mips_cle_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_cle_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_cle_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_cle_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_cle_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_cle_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.cle.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.cle.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_cle_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: cle_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_cle_u_h_test
+;
+@llvm_mips_cle_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_cle_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_cle_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_cle_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_cle_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_cle_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.cle.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.cle.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_cle_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: cle_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_cle_u_w_test
+;
+@llvm_mips_cle_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_cle_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_cle_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_cle_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_cle_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_cle_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.cle.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.cle.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_cle_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: cle_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_cle_u_d_test
+;
+@llvm_mips_clt_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clt_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_clt_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clt_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clt_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_clt_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.clt.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clt.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_clt_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: clt_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clt_s_b_test
+;
+@llvm_mips_clt_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clt_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_clt_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clt_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clt_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_clt_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.clt.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clt.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_clt_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: clt_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clt_s_h_test
+;
+@llvm_mips_clt_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clt_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_clt_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clt_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clt_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_clt_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.clt.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clt.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_clt_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: clt_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clt_s_w_test
+;
+@llvm_mips_clt_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clt_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_clt_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clt_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clt_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_clt_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.clt.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clt.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_clt_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: clt_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clt_s_d_test
+;
+@llvm_mips_clt_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clt_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_clt_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clt_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clt_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_clt_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.clt.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clt.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_clt_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: clt_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clt_u_b_test
+;
+@llvm_mips_clt_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clt_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_clt_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clt_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clt_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_clt_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.clt.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clt.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_clt_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: clt_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clt_u_h_test
+;
+@llvm_mips_clt_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clt_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_clt_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clt_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clt_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_clt_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.clt.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clt.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_clt_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: clt_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clt_u_w_test
+;
+@llvm_mips_clt_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clt_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_clt_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clt_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clt_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_clt_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.clt.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clt.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_clt_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: clt_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clt_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-d.ll b/test/CodeGen/Mips/msa/3r-d.ll
new file mode 100644
index 000000000000..0099554a8eea
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-d.ll
@@ -0,0 +1,478 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'd'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_div_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_div_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_div_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_div_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.div.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.div.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_div_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_div_s_b_test
+;
+@llvm_mips_div_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_div_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_div_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_div_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.div.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.div.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_div_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_div_s_h_test
+;
+@llvm_mips_div_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_div_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_div_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_div_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.div.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.div.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_div_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_div_s_w_test
+;
+@llvm_mips_div_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_div_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_div_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_div_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.div.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.div.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_div_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_div_s_d_test
+;
+
+define void @div_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+  %2 = sdiv <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
+  ret void
+}
+
+; CHECK: div_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_s.b
+; CHECK: st.b
+; CHECK: .size div_s_b_test
+
+define void @div_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+  %2 = sdiv <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
+  ret void
+}
+
+; CHECK: div_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_s.h
+; CHECK: st.h
+; CHECK: .size div_s_h_test
+
+define void @div_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+  %2 = sdiv <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
+  ret void
+}
+
+; CHECK: div_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_s.w
+; CHECK: st.w
+; CHECK: .size div_s_w_test
+
+define void @div_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+  %2 = sdiv <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
+  ret void
+}
+
+; CHECK: div_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_s.d
+; CHECK: st.d
+; CHECK: .size div_s_d_test
+;
+@llvm_mips_div_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_div_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_div_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_div_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.div.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.div.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_div_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_div_u_b_test
+;
+@llvm_mips_div_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_div_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_div_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_div_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.div.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.div.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_div_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_div_u_h_test
+;
+@llvm_mips_div_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_div_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_div_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_div_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.div.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.div.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_div_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_div_u_w_test
+;
+@llvm_mips_div_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_div_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_div_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_div_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.div.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.div.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_div_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_div_u_d_test
+;
+
+define void @div_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+  %2 = udiv <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
+  ret void
+}
+
+; CHECK: div_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: div_u.b
+; CHECK: st.b
+; CHECK: .size div_u_b_test
+
+define void @div_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+  %2 = udiv <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
+  ret void
+}
+
+; CHECK: div_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: div_u.h
+; CHECK: st.h
+; CHECK: .size div_u_h_test
+
+define void @div_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+  %2 = udiv <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
+  ret void
+}
+
+; CHECK: div_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: div_u.w
+; CHECK: st.w
+; CHECK: .size div_u_w_test
+
+define void @div_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+  %2 = udiv <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
+  ret void
+}
+
+; CHECK: div_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: div_u.d
+; CHECK: st.d
+; CHECK: .size div_u_d_test
+;
+@llvm_mips_dotp_s_h_ARG1 = global <16 x i8> <i8  0, i8  1, i8  2, i8  3,
+                                             i8  4, i8  5, i8  6, i8  7,
+                                             i8  8, i8  9, i8 10, i8 11,
+                                             i8 12, i8 13, i8 14, i8 15>,
+                                            align 16
+@llvm_mips_dotp_s_h_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19,
+                                             i8 20, i8 21, i8 22, i8 23,
+                                             i8 24, i8 25, i8 26, i8 27,
+                                             i8 28, i8 29, i8 30, i8 31>,
+                                            align 16
+@llvm_mips_dotp_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0,
+                                             i16 0, i16 0, i16 0, i16 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_s_h_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.dotp.s.h(<16 x i8> %0, <16 x i8> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dotp.s.h(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dotp_s_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: dotp_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dotp_s_h_test
+;
+@llvm_mips_dotp_s_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3,
+                                             i16 4, i16 5, i16 6, i16 7>,
+                                            align 16
+@llvm_mips_dotp_s_w_ARG2 = global <8 x i16> <i16  4, i16  5, i16  6, i16  7,
+                                             i16  8, i16  9, i16 10, i16 11>,
+                                            align 16
+@llvm_mips_dotp_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_s_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.dotp.s.w(<8 x i16> %0, <8 x i16> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dotp.s.w(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dotp_s_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: dotp_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dotp_s_w_test
+;
+@llvm_mips_dotp_s_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 0, i32 1>,
+                                            align 16
+@llvm_mips_dotp_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 2, i32 3>,
+                                            align 16
+@llvm_mips_dotp_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dotp_s_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.dotp.s.d(<4 x i32> %0, <4 x i32> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dotp.s.d(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dotp_s_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: dotp_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dotp_s_d_test
+;
+@llvm_mips_dotp_u_h_ARG1 = global <16 x i8> <i8  0, i8  1, i8  2, i8  3,
+                                             i8  4, i8  5, i8  6, i8  7,
+                                             i8  8, i8  9, i8 10, i8 11,
+                                             i8 12, i8 13, i8 14, i8 15>,
+                                            align 16
+@llvm_mips_dotp_u_h_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19,
+                                             i8 20, i8 21, i8 22, i8 23,
+                                             i8 24, i8 25, i8 26, i8 27,
+                                             i8 28, i8 29, i8 30, i8 31>,
+                                            align 16
+@llvm_mips_dotp_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0,
+                                             i16 0, i16 0, i16 0, i16 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_u_h_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.dotp.u.h(<16 x i8> %0, <16 x i8> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dotp.u.h(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dotp_u_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: dotp_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dotp_u_h_test
+;
+@llvm_mips_dotp_u_w_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3,
+                                             i16 4, i16 5, i16 6, i16 7>,
+                                            align 16
+@llvm_mips_dotp_u_w_ARG2 = global <8 x i16> <i16  4, i16  5, i16  6, i16  7,
+                                             i16  8, i16  9, i16 10, i16 11>,
+                                            align 16
+@llvm_mips_dotp_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                            align 16
+
+define void @llvm_mips_dotp_u_w_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.dotp.u.w(<8 x i16> %0, <8 x i16> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dotp.u.w(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dotp_u_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: dotp_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dotp_u_w_test
+;
+@llvm_mips_dotp_u_d_ARG1 = global <4 x i32> <i32 0, i32 1, i32 0, i32 1>,
+                                            align 16
+@llvm_mips_dotp_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 2, i32 3>,
+                                            align 16
+@llvm_mips_dotp_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dotp_u_d_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.dotp.u.d(<4 x i32> %0, <4 x i32> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dotp.u.d(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dotp_u_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: dotp_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dotp_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-i.ll b/test/CodeGen/Mips/msa/3r-i.ll
new file mode 100644
index 000000000000..2ef30471b026
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-i.ll
@@ -0,0 +1,358 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'i'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ilvev_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvev_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvev_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvev_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvev_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvev_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvev.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvev_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvev.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvev_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvev.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvev_b_test
+;
+@llvm_mips_ilvev_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvev_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvev_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvev_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvev_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvev_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvev.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvev_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvev.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvev_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvev.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvev_h_test
+;
+@llvm_mips_ilvev_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvev_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvev_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvev_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvev_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvev_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvev.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvev_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvev.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvev_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvev.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvev_w_test
+;
+@llvm_mips_ilvev_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvev_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvev_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvev_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvev_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvev_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvev.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvev_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvev.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvev_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvev.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvev_d_test
+;
+@llvm_mips_ilvl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvl_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvl_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvl_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvl.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvl_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvl.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvl_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvl.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvl_b_test
+;
+@llvm_mips_ilvl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvl_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvl_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvl_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvl.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvl_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvl.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvl_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvl.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvl_h_test
+;
+@llvm_mips_ilvl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvl_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvl_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvl_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvl.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvl_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvl.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvl_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvl.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvl_w_test
+;
+@llvm_mips_ilvl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvl_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvl_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvl_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvl.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvl_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvl.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvl_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvl.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvl_d_test
+;
+@llvm_mips_ilvod_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvod_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvod_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvod_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvod_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvod_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvod.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvod_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvod.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvod_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvod.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvod_b_test
+;
+@llvm_mips_ilvod_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvod_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvod_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvod_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvod_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvod_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvod.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvod_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvod.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvod_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvod.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvod_h_test
+;
+@llvm_mips_ilvod_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvod_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvod_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvod_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvod_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvod_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvod.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvod_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvod.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvod_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvod.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvod_w_test
+;
+@llvm_mips_ilvod_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvod_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvod_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvod_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvod_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvod_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvod.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvod_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvod.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvod_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvod.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvod_d_test
+;
+@llvm_mips_ilvr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ilvr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_ilvr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ilvr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ilvr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_ilvr_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.ilvr.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ilvr.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_ilvr_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ilvr.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ilvr_b_test
+;
+@llvm_mips_ilvr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ilvr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_ilvr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ilvr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ilvr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_ilvr_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.ilvr.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ilvr.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_ilvr_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ilvr.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ilvr_h_test
+;
+@llvm_mips_ilvr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ilvr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_ilvr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ilvr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ilvr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_ilvr_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.ilvr.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ilvr.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_ilvr_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ilvr.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ilvr_w_test
+;
+@llvm_mips_ilvr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ilvr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_ilvr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ilvr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ilvr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_ilvr_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.ilvr.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ilvr.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_ilvr_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ilvr.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ilvr_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-m.ll b/test/CodeGen/Mips/msa/3r-m.ll
new file mode 100644
index 000000000000..ddfd720a2f84
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-m.ll
@@ -0,0 +1,862 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'm'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_max_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_max_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_max_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_max_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_max_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_max_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.max.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_max_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.max.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_max_a_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: max_a.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_max_a_b_test
+;
+@llvm_mips_max_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_max_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_max_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_max_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_max_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_max_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.max.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_max_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.max.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_max_a_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: max_a.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_max_a_h_test
+;
+@llvm_mips_max_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_max_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_max_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_max_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_max_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_max_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.max.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_max_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.max.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_max_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: max_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_max_a_w_test
+;
+@llvm_mips_max_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_max_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_max_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_max_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_max_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_max_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.max.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_max_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.max.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_max_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: max_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_max_a_d_test
+;
+@llvm_mips_max_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_max_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_max_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_max_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_max_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_max_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.max.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_max_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.max.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_max_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: max_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_max_s_b_test
+;
+@llvm_mips_max_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_max_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_max_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_max_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_max_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_max_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.max.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_max_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.max.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_max_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: max_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_max_s_h_test
+;
+@llvm_mips_max_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_max_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_max_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_max_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_max_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_max_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.max.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_max_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.max.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_max_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: max_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_max_s_w_test
+;
+@llvm_mips_max_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_max_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_max_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_max_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_max_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_max_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.max.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_max_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.max.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_max_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: max_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_max_s_d_test
+;
+@llvm_mips_max_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_max_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_max_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_max_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_max_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_max_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.max.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_max_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.max.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_max_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: max_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_max_u_b_test
+;
+@llvm_mips_max_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_max_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_max_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_max_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_max_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_max_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.max.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_max_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.max.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_max_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: max_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_max_u_h_test
+;
+@llvm_mips_max_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_max_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_max_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_max_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_max_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_max_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.max.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_max_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.max.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_max_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: max_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_max_u_w_test
+;
+@llvm_mips_max_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_max_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_max_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_max_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_max_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_max_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.max.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_max_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.max.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_max_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: max_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_max_u_d_test
+;
+@llvm_mips_min_a_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_min_a_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_min_a_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_min_a_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_min_a_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_min_a_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.min.a.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_min_a_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.min.a.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_min_a_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: min_a.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_min_a_b_test
+;
+@llvm_mips_min_a_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_min_a_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_min_a_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_min_a_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_min_a_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_min_a_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.min.a.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_min_a_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.min.a.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_min_a_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: min_a.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_min_a_h_test
+;
+@llvm_mips_min_a_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_min_a_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_min_a_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_min_a_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_min_a_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_min_a_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.min.a.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_min_a_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.min.a.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_min_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: min_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_min_a_w_test
+;
+@llvm_mips_min_a_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_min_a_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_min_a_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_min_a_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_min_a_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_min_a_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.min.a.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_min_a_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.min.a.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_min_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: min_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_min_a_d_test
+;
+@llvm_mips_min_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_min_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_min_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_min_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_min_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_min_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.min.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_min_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.min.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_min_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: min_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_min_s_b_test
+;
+@llvm_mips_min_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_min_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_min_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_min_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_min_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_min_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.min.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_min_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.min.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_min_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: min_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_min_s_h_test
+;
+@llvm_mips_min_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_min_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_min_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_min_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_min_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_min_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.min.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_min_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.min.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_min_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: min_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_min_s_w_test
+;
+@llvm_mips_min_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_min_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_min_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_min_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_min_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_min_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.min.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_min_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.min.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_min_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: min_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_min_s_d_test
+;
+@llvm_mips_min_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_min_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_min_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_min_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_min_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_min_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.min.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_min_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.min.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_min_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: min_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_min_u_b_test
+;
+@llvm_mips_min_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_min_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_min_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_min_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_min_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_min_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.min.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_min_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.min.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_min_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: min_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_min_u_h_test
+;
+@llvm_mips_min_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_min_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_min_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_min_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_min_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_min_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.min.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_min_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.min.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_min_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: min_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_min_u_w_test
+;
+@llvm_mips_min_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_min_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_min_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_min_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_min_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_min_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.min.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_min_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.min.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_min_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: min_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_min_u_d_test
+;
+@llvm_mips_mod_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mod_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_mod_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mod_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mod_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mod_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.mod.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mod.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_mod_s_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mod_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mod_s_b_test
+;
+@llvm_mips_mod_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mod_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mod_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mod_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mod_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mod_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mod.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mod.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mod_s_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mod_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mod_s_h_test
+;
+@llvm_mips_mod_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mod_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mod_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mod_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mod_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mod_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mod.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mod.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mod_s_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mod_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mod_s_w_test
+;
+@llvm_mips_mod_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mod_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_mod_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mod_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mod_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mod_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.mod.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mod.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_mod_s_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mod_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mod_s_d_test
+;
+@llvm_mips_mod_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mod_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_mod_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mod_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mod_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mod_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.mod.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mod.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_mod_u_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mod_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mod_u_b_test
+;
+@llvm_mips_mod_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mod_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mod_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mod_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mod_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mod_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mod.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mod.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mod_u_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mod_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mod_u_h_test
+;
+@llvm_mips_mod_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mod_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mod_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mod_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mod_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mod_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mod.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mod.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mod_u_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mod_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mod_u_w_test
+;
+@llvm_mips_mod_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mod_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_mod_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mod_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mod_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mod_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.mod.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mod.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_mod_u_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mod_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mod_u_d_test
+;
+@llvm_mips_mulv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mulv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_mulv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mulv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.mulv.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mulv.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_mulv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mulv.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mulv_b_test
+;
+@llvm_mips_mulv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mulv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mulv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mulv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mulv.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mulv.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mulv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mulv.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mulv_h_test
+;
+@llvm_mips_mulv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mulv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mulv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mulv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mulv.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mulv.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mulv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mulv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mulv_w_test
+;
+@llvm_mips_mulv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mulv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_mulv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mulv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.mulv.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mulv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_mulv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mulv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mulv_d_test
+
+define void @mulv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+  %2 = mul <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
+  ret void
+}
+
+; CHECK: mulv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: mulv.b
+; CHECK: st.b
+; CHECK: .size mulv_b_test
+
+define void @mulv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+  %2 = mul <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
+  ret void
+}
+
+; CHECK: mulv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mulv.h
+; CHECK: st.h
+; CHECK: .size mulv_h_test
+
+define void @mulv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+  %2 = mul <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
+  ret void
+}
+
+; CHECK: mulv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mulv.w
+; CHECK: st.w
+; CHECK: .size mulv_w_test
+
+define void @mulv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+  %2 = mul <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
+  ret void
+}
+
+; CHECK: mulv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: mulv.d
+; CHECK: st.d
+; CHECK: .size mulv_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-p.ll b/test/CodeGen/Mips/msa/3r-p.ll
new file mode 100644
index 000000000000..852023b0824a
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-p.ll
@@ -0,0 +1,182 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'p'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_pckev_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_pckev_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_pckev_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_pckev_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_pckev_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_pckev_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.pckev.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_pckev_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.pckev.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_pckev_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: pckev.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_pckev_b_test
+;
+@llvm_mips_pckev_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_pckev_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_pckev_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_pckev_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_pckev_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_pckev_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.pckev.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_pckev_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.pckev.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_pckev_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: pckev.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_pckev_h_test
+;
+@llvm_mips_pckev_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_pckev_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_pckev_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_pckev_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_pckev_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_pckev_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.pckev.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_pckev_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.pckev.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_pckev_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: pckev.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_pckev_w_test
+;
+@llvm_mips_pckev_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_pckev_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_pckev_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_pckev_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_pckev_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_pckev_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.pckev.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_pckev_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.pckev.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_pckev_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: pckev.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_pckev_d_test
+;
+@llvm_mips_pckod_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_pckod_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_pckod_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_pckod_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_pckod_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_pckod_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.pckod.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_pckod_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.pckod.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_pckod_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: pckod.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_pckod_b_test
+;
+@llvm_mips_pckod_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_pckod_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_pckod_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_pckod_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_pckod_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_pckod_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.pckod.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_pckod_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.pckod.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_pckod_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: pckod.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_pckod_h_test
+;
+@llvm_mips_pckod_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_pckod_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_pckod_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_pckod_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_pckod_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_pckod_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.pckod.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_pckod_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.pckod.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_pckod_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: pckod.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_pckod_w_test
+;
+@llvm_mips_pckod_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_pckod_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_pckod_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_pckod_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_pckod_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_pckod_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.pckod.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_pckod_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.pckod.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_pckod_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: pckod.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_pckod_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-s.ll b/test/CodeGen/Mips/msa/3r-s.ll
new file mode 100644
index 000000000000..30cf265233e5
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-s.ll
@@ -0,0 +1,1353 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 's'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_sld_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sld_b_ARG2 = global i32 10, align 16
+@llvm_mips_sld_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sld_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sld_b_ARG1
+  %1 = load i32* @llvm_mips_sld_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, i32 %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sld_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sld_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.b [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_sld_b_test
+;
+@llvm_mips_sld_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sld_h_ARG2 = global i32 10, align 16
+@llvm_mips_sld_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sld_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sld_h_ARG1
+  %1 = load i32* @llvm_mips_sld_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, i32 %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sld_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sld_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_h_ARG1)
+; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.h [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_sld_h_test
+;
+@llvm_mips_sld_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sld_w_ARG2 = global i32 10, align 16
+@llvm_mips_sld_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sld_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sld_w_ARG1
+  %1 = load i32* @llvm_mips_sld_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, i32 %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sld_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sld_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_w_ARG1)
+; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.w [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_sld_w_test
+;
+@llvm_mips_sld_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sld_d_ARG2 = global i32 10, align 16
+@llvm_mips_sld_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sld_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sld_d_ARG1
+  %1 = load i32* @llvm_mips_sld_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, i32 %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sld_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sld_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_d_ARG1)
+; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]])
+; CHECK-DAG: sld.d [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}}
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_sld_d_test
+;
+@llvm_mips_sll_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sll_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_sll_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sll_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.sll.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sll.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_sll_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_sll_b_test
+;
+@llvm_mips_sll_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sll_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_sll_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sll_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.sll.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sll.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_sll_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_sll_h_test
+;
+@llvm_mips_sll_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sll_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_sll_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sll_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sll.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_sll_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_sll_w_test
+;
+@llvm_mips_sll_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sll_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_sll_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sll_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.sll.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sll.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_sll_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_sll_d_test
+
+define void @sll_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+  %2 = shl <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
+  ret void
+}
+
+; CHECK: sll_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size sll_b_test
+
+define void @sll_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+  %2 = shl <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
+  ret void
+}
+
+; CHECK: sll_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size sll_h_test
+
+define void @sll_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+  %2 = shl <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
+  ret void
+}
+
+; CHECK: sll_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size sll_w_test
+
+define void @sll_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+  %2 = shl <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
+  ret void
+}
+
+; CHECK: sll_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sll_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sll_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sll.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size sll_d_test
+;
+@llvm_mips_sra_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sra_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_sra_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sra_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.sra.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sra.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_sra_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_sra_b_test
+;
+@llvm_mips_sra_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sra_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_sra_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sra_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.sra.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sra.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_sra_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_sra_h_test
+;
+@llvm_mips_sra_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sra_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_sra_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sra_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sra.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_sra_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_sra_w_test
+;
+@llvm_mips_sra_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sra_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_sra_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sra_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.sra.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sra.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_sra_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_sra_d_test
+;
+
+define void @sra_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+  %2 = ashr <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
+  ret void
+}
+
+; CHECK: sra_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size sra_b_test
+
+define void @sra_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+  %2 = ashr <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
+  ret void
+}
+
+; CHECK: sra_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size sra_h_test
+
+define void @sra_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+  %2 = ashr <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
+  ret void
+}
+
+; CHECK: sra_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size sra_w_test
+
+define void @sra_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+  %2 = ashr <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
+  ret void
+}
+
+; CHECK: sra_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sra_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sra_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: sra.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size sra_d_test
+
+@llvm_mips_srar_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srar_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_srar_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srar_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srar_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srar_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.srar.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srar_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srar.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_srar_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_srar_b_test
+;
+@llvm_mips_srar_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srar_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_srar_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srar_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srar_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srar_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.srar.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srar_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srar.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_srar_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_srar_h_test
+;
+@llvm_mips_srar_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srar_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_srar_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srar_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srar_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srar_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.srar.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srar_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srar.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_srar_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_srar_w_test
+;
+@llvm_mips_srar_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srar_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_srar_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srar_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srar_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srar_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.srar.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srar_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srar.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_srar_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srar_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srar_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srar.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_srar_d_test
+;
+@llvm_mips_srl_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srl_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_srl_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.srl.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srl.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_srl_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_srl_b_test
+;
+@llvm_mips_srl_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srl_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_srl_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.srl.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srl.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_srl_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_srl_h_test
+;
+@llvm_mips_srl_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srl_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_srl_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srl.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_srl_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_srl_w_test
+;
+@llvm_mips_srl_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srl_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_srl_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.srl.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srl.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_srl_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_srl_d_test
+;
+@llvm_mips_srlr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srlr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_srlr_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srlr_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srlr_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srlr_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.srlr.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srlr_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srlr.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_srlr_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_srlr_b_test
+;
+@llvm_mips_srlr_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srlr_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_srlr_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srlr_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srlr_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srlr_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.srlr.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srlr_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srlr.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_srlr_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_srlr_h_test
+;
+@llvm_mips_srlr_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srlr_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_srlr_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srlr_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srlr_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srlr_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.srlr.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srlr_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srlr.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_srlr_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_srlr_w_test
+;
+@llvm_mips_srlr_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srlr_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_srlr_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srlr_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srlr_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srlr_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.srlr.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srlr_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srlr.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_srlr_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srlr_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srlr_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srlr.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_srlr_d_test
+;
+
+define void @srl_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+  %2 = lshr <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
+  ret void
+}
+
+; CHECK: srl_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size srl_b_test
+
+define void @srl_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+  %2 = lshr <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
+  ret void
+}
+
+; CHECK: srl_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size srl_h_test
+
+define void @srl_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+  %2 = lshr <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
+  ret void
+}
+
+; CHECK: srl_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size srl_w_test
+
+define void @srl_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+  %2 = lshr <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
+  ret void
+}
+
+; CHECK: srl_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_srl_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_srl_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: srl.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size srl_d_test
+
+@llvm_mips_subs_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subs_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subs_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subs_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subs_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subs_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subs.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subs.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subs_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subs_s_b_test
+;
+@llvm_mips_subs_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subs_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subs_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subs_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subs_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subs_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subs.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subs.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subs_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subs_s_h_test
+;
+@llvm_mips_subs_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subs_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subs_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subs_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subs_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subs_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subs.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subs.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subs_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subs_s_w_test
+;
+@llvm_mips_subs_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subs_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subs_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subs_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subs_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subs_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subs.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subs.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subs_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subs_s_d_test
+;
+@llvm_mips_subs_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subs_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subs_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subs_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subs_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subs_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subs.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subs.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subs_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subs_u_b_test
+;
+@llvm_mips_subs_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subs_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subs_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subs_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subs_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subs_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subs.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subs.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subs_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subs_u_h_test
+;
+@llvm_mips_subs_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subs_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subs_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subs_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subs_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subs_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subs.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subs.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subs_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subs_u_w_test
+;
+@llvm_mips_subs_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subs_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subs_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subs_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subs_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subs_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subs.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subs.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subs_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subs_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subs_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subs_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subs_u_d_test
+;
+@llvm_mips_subsus_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subsus_u_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subsus_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subsus_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subsus.u.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subsus_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subsus.u.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subsus_u_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subsus_u_b_test
+;
+@llvm_mips_subsus_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subsus_u_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subsus_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subsus_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subsus.u.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subsus_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subsus.u.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subsus_u_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subsus_u_h_test
+;
+@llvm_mips_subsus_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subsus_u_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subsus_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subsus_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subsus.u.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subsus_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subsus.u.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subsus_u_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subsus_u_w_test
+;
+@llvm_mips_subsus_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subsus_u_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subsus_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subsus_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subsus.u.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subsus_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subsus.u.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subsus_u_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsus_u_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsus_u_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsus_u.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subsus_u_d_test
+;
+@llvm_mips_subsuu_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subsuu_s_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subsuu_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subsuu_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subsuu.s.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subsuu_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subsuu.s.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_b_test
+;
+@llvm_mips_subsuu_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subsuu_s_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subsuu_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subsuu_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subsuu.s.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subsuu_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subsuu.s.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_h_test
+;
+@llvm_mips_subsuu_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subsuu_s_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subsuu_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subsuu_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subsuu.s.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subsuu_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subsuu.s.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_w_test
+;
+@llvm_mips_subsuu_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subsuu_s_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subsuu_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subsuu_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subsuu.s.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subsuu_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subsuu.s.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subsuu_s_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subsuu_s_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subsuu_s_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subsuu_s.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subsuu_s_d_test
+;
+@llvm_mips_subv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_subv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.subv.b(<16 x i8> %0, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subv.b(<16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_subv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size llvm_mips_subv_b_test
+;
+@llvm_mips_subv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_subv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.subv.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subv.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_subv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size llvm_mips_subv_h_test
+;
+@llvm_mips_subv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_subv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.subv.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subv.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_subv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size llvm_mips_subv_w_test
+;
+@llvm_mips_subv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_subv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.subv.d(<2 x i64> %0, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_subv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size llvm_mips_subv_d_test
+;
+
+define void @subv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+  %2 = sub <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
+  ret void
+}
+
+; CHECK: subv_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_b_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_b_ARG2)
+; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.b [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.b [[WD]]
+; CHECK: .size subv_b_test
+
+define void @subv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+  %2 = sub <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
+  ret void
+}
+
+; CHECK: subv_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_h_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_h_ARG2)
+; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.h [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.h [[WD]]
+; CHECK: .size subv_h_test
+
+define void @subv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+  %2 = sub <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
+  ret void
+}
+
+; CHECK: subv_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_w_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_w_ARG2)
+; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.w [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.w [[WD]]
+; CHECK: .size subv_w_test
+
+define void @subv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+  %2 = sub <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
+  ret void
+}
+
+; CHECK: subv_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_subv_d_ARG1)
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_subv_d_ARG2)
+; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[WT:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: subv.d [[WD:\$w[0-9]+]], [[WS]], [[WT]]
+; CHECK-DAG: st.d [[WD]]
+; CHECK: .size subv_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r-v.ll b/test/CodeGen/Mips/msa/3r-v.ll
new file mode 100644
index 000000000000..c9693f90d556
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r-v.ll
@@ -0,0 +1,105 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format.
+; There are lots of these so this covers those beginning with 'v'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_vshf_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_vshf_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_vshf_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_vshf_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_vshf_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_vshf_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_vshf_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_vshf_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.vshf.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_vshf_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.vshf.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_vshf_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: vshf.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_vshf_b_test
+;
+@llvm_mips_vshf_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_vshf_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_vshf_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_vshf_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_vshf_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_vshf_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_vshf_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_vshf_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.vshf.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_vshf_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.vshf.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_vshf_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: vshf.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_vshf_h_test
+;
+@llvm_mips_vshf_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_vshf_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_vshf_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_vshf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_vshf_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_vshf_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_vshf_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_vshf_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.vshf.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_vshf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.vshf.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_vshf_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: vshf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_vshf_w_test
+;
+@llvm_mips_vshf_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_vshf_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_vshf_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_vshf_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_vshf_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_vshf_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_vshf_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_vshf_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.vshf.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_vshf_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.vshf.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_vshf_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: vshf.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_vshf_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r_4r.ll b/test/CodeGen/Mips/msa/3r_4r.ll
new file mode 100644
index 000000000000..b7fd7283788c
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r_4r.ll
@@ -0,0 +1,206 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format and
+; use the result as a third operand.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_maddv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_maddv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_maddv_b_ARG3 = global <16 x i8> <i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47>, align 16
+@llvm_mips_maddv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_maddv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_maddv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_maddv_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_maddv_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.maddv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_maddv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.maddv.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_maddv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: maddv.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_maddv_b_test
+;
+@llvm_mips_maddv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maddv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_maddv_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_maddv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maddv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maddv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_maddv_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_maddv_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.maddv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_maddv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maddv.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_maddv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: maddv.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maddv_h_test
+;
+@llvm_mips_maddv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maddv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_maddv_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_maddv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maddv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maddv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_maddv_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_maddv_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.maddv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_maddv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maddv.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_maddv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: maddv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maddv_w_test
+;
+@llvm_mips_maddv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_maddv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_maddv_d_ARG3 = global <2 x i64> <i64 4, i64 5>, align 16
+@llvm_mips_maddv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_maddv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_maddv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_maddv_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_maddv_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.maddv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_maddv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.maddv.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_maddv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: maddv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_maddv_d_test
+;
+@llvm_mips_msubv_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_msubv_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_msubv_b_ARG3 = global <16 x i8> <i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47>, align 16
+@llvm_mips_msubv_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_msubv_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_msubv_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_msubv_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_msubv_b_ARG3
+  %3 = tail call <16 x i8> @llvm.mips.msubv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* @llvm_mips_msubv_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.msubv.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_msubv_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: msubv.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_msubv_b_test
+;
+@llvm_mips_msubv_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_msubv_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_msubv_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_msubv_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_msubv_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_msubv_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_msubv_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_msubv_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.msubv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_msubv_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.msubv.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_msubv_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: msubv.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_msubv_h_test
+;
+@llvm_mips_msubv_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_msubv_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_msubv_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_msubv_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_msubv_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_msubv_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_msubv_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_msubv_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.msubv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_msubv_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.msubv.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_msubv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: msubv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_msubv_w_test
+;
+@llvm_mips_msubv_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_msubv_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_msubv_d_ARG3 = global <2 x i64> <i64 4, i64 5>, align 16
+@llvm_mips_msubv_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_msubv_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_msubv_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_msubv_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_msubv_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.msubv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_msubv_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.msubv.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_msubv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: msubv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_msubv_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r_4r_widen.ll b/test/CodeGen/Mips/msa/3r_4r_widen.ll
new file mode 100644
index 000000000000..7063e4566a78
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r_4r_widen.ll
@@ -0,0 +1,307 @@
+; Test the MSA intrinsics that are encoded with the 3R instruction format and
+; use the result as a third operand and results in wider elements than the
+; operands had.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_dpadd_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpadd_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpadd_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpadd_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpadd_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpadd_s_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpadd_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpadd_s_h_test
+;
+@llvm_mips_dpadd_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpadd_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpadd_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpadd_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpadd_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpadd_s_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpadd_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpadd_s_w_test
+;
+@llvm_mips_dpadd_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpadd_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpadd_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpadd_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpadd_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpadd_s_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpadd_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpadd_s_d_test
+;
+@llvm_mips_dpadd_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpadd_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpadd_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpadd_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpadd_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpadd_u_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpadd_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpadd_u_h_test
+;
+@llvm_mips_dpadd_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpadd_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpadd_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpadd_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpadd_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpadd_u_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpadd_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpadd_u_w_test
+;
+@llvm_mips_dpadd_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpadd_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpadd_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpadd_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpadd_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpadd_u_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpadd_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpadd_u_d_test
+;
+@llvm_mips_dpsub_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpsub_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpsub_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpsub_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpsub_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpsub_s_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpsub_s_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpsub_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpsub_s_h_test
+;
+@llvm_mips_dpsub_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpsub_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpsub_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpsub_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpsub_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpsub_s_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpsub_s_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpsub_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpsub_s_w_test
+;
+@llvm_mips_dpsub_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpsub_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpsub_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpsub_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpsub_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpsub_s_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpsub_s_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpsub_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpsub_s_d_test
+;
+@llvm_mips_dpsub_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_dpsub_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
+@llvm_mips_dpsub_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
+@llvm_mips_dpsub_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_dpsub_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_dpsub_u_h_ARG1
+  %1 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG2
+  %2 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_dpsub_u_h_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: ld.h
+; CHECK: dpsub_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_dpsub_u_h_test
+;
+@llvm_mips_dpsub_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_dpsub_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
+@llvm_mips_dpsub_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
+@llvm_mips_dpsub_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_dpsub_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_dpsub_u_w_ARG1
+  %1 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG2
+  %2 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_dpsub_u_w_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.w
+; CHECK: dpsub_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_dpsub_u_w_test
+;
+@llvm_mips_dpsub_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_dpsub_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
+@llvm_mips_dpsub_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
+@llvm_mips_dpsub_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_dpsub_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_dpsub_u_d_ARG1
+  %1 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG2
+  %2 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG3
+  %3 = tail call <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
+  store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_dpsub_u_d_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.d
+; CHECK: dpsub_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_dpsub_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3r_splat.ll b/test/CodeGen/Mips/msa/3r_splat.ll
new file mode 100644
index 000000000000..6b0cb26f8c81
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3r_splat.ll
@@ -0,0 +1,94 @@
+; Test the MSA splat intrinsics that are encoded with the 3R instruction
+; format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | \
+; RUN:     FileCheck -check-prefix=MIPS32 %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | \
+; RUN:     FileCheck -check-prefix=MIPS32 %s
+
+@llvm_mips_splat_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_splat_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_splat_b_test(i32 %a) nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_splat_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.splat.b(<16 x i8> %0, i32 %a)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_splat_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.splat.b(<16 x i8>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_b_test:
+; MIPS32-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_b_ARG1)(
+; MIPS32-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_b_RES)(
+; MIPS32-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.b [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.b [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_b_test
+
+@llvm_mips_splat_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_splat_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_splat_h_test(i32 %a) nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_splat_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.splat.h(<8 x i16> %0, i32 %a)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_splat_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.splat.h(<8 x i16>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_h_test:
+; MIPS32-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_h_ARG1)(
+; MIPS32-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_h_RES)(
+; MIPS32-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.h [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.h [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_h_test
+
+@llvm_mips_splat_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_splat_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_splat_w_test(i32 %a) nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_splat_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.splat.w(<4 x i32> %0, i32 %a)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_splat_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.splat.w(<4 x i32>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_w_test:
+; MIPS32-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_w_ARG1)(
+; MIPS32-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_w_RES)(
+; MIPS32-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS32-DAG: splat.w [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS32-DAG: st.w [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_w_test
+
+@llvm_mips_splat_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_splat_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_splat_d_test(i32 %a) nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_splat_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.splat.d(<2 x i64> %0, i32 %a)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_splat_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.splat.d(<2 x i64>, i32) nounwind
+
+; MIPS32: llvm_mips_splat_d_test:
+; FIXME: This test is currently disabled for MIPS32 because the indices are
+;        difficult to match. This is because 64-bit values cannot be stored in
+;        GPR32.
+; MIPS64-DAG: lw   [[R1:\$[0-9]+]], %got(llvm_mips_splat_d_ARG1)(
+; MIPS64-DAG: lw   [[R2:\$[0-9]+]], %got(llvm_mips_splat_d_RES)(
+; MIPS64-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; MIPS64-DAG: splat.d [[R4:\$w[0-9]+]], [[R3]][$4]
+; MIPS64-DAG: st.d [[R4]], 0([[R2]])
+; MIPS32: .size llvm_mips_splat_d_test
diff --git a/test/CodeGen/Mips/msa/3rf.ll b/test/CodeGen/Mips/msa/3rf.ll
new file mode 100644
index 000000000000..ae665afcc950
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf.ll
@@ -0,0 +1,485 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fadd_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fadd_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fadd_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fadd_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fadd_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fadd.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fadd_w_test
+;
+@llvm_mips_fadd_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fadd_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fadd_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fadd_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fadd_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fadd.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fadd_d_test
+
+define void @fadd_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+  %2 = fadd <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
+  ret void
+}
+
+; CHECK: fadd_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fadd.w
+; CHECK: st.w
+; CHECK: .size fadd_w_test
+
+define void @fadd_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+  %2 = fadd <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
+  ret void
+}
+
+; CHECK: fadd_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fadd.d
+; CHECK: st.d
+; CHECK: .size fadd_d_test
+;
+@llvm_mips_fdiv_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fdiv_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fdiv_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fdiv_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fdiv.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fdiv.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fdiv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fdiv.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fdiv_w_test
+;
+@llvm_mips_fdiv_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fdiv_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fdiv_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fdiv_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fdiv.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fdiv.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fdiv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fdiv.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fdiv_d_test
+
+define void @fdiv_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+  %2 = fdiv <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
+  ret void
+}
+
+; CHECK: fdiv_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fdiv.w
+; CHECK: st.w
+; CHECK: .size fdiv_w_test
+
+define void @fdiv_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+  %2 = fdiv <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
+  ret void
+}
+
+; CHECK: fdiv_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fdiv.d
+; CHECK: st.d
+; CHECK: .size fdiv_d_test
+;
+@llvm_mips_fmin_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmin_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmin_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmin_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmin_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmin_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmin.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmin_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmin.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmin_w_test
+;
+@llvm_mips_fmin_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmin_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmin_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmin_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmin_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmin_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmin_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmin.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmin_d_test
+;
+@llvm_mips_fmin_a_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmin_a_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmin_a_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_a_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmin_a_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmin_a_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmin.a.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmin_a_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmin.a.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmin_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmin_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmin_a_w_test
+;
+@llvm_mips_fmin_a_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmin_a_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmin_a_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmin_a_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmin_a_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmin_a_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmin.a.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmin_a_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmin.a.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmin_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmin_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmin_a_d_test
+;
+@llvm_mips_fmax_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmax_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmax_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmax_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmax_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmax_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmax.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmax_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmax.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmax_w_test
+;
+@llvm_mips_fmax_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmax_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmax_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmax_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmax_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmax_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmax.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmax_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmax.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmax_d_test
+;
+@llvm_mips_fmax_a_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmax_a_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmax_a_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_a_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmax_a_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmax_a_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmax.a.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmax_a_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmax.a.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmax_a_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmax_a.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmax_a_w_test
+;
+@llvm_mips_fmax_a_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmax_a_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmax_a_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmax_a_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmax_a_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmax_a_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmax.a.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmax_a_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmax.a.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmax_a_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmax_a.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmax_a_d_test
+;
+@llvm_mips_fmul_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmul_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmul_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmul_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmul.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmul_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmul.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmul_w_test
+;
+@llvm_mips_fmul_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmul_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmul_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmul_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fmul.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmul.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmul_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmul.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmul_d_test
+
+define void @fmul_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+  %2 = fmul <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
+  ret void
+}
+
+; CHECK: fmul_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmul.w
+; CHECK: st.w
+; CHECK: .size fmul_w_test
+
+define void @fmul_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+  %2 = fmul <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
+  ret void
+}
+
+; CHECK: fmul_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmul.d
+; CHECK: st.d
+; CHECK: .size fmul_d_test
+;
+@llvm_mips_fsub_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsub_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsub_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fsub_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fsub.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fsub.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsub_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsub.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsub_w_test
+;
+@llvm_mips_fsub_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsub_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsub_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fsub_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fsub.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fsub.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsub_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsub.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsub_d_test
+;
+
+define void @fsub_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+  %2 = fsub <4 x float> %0, %1
+  store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
+  ret void
+}
+
+; CHECK: fsub_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsub.w
+; CHECK: st.w
+; CHECK: .size fsub_w_test
+
+define void @fsub_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+  %2 = fsub <2 x double> %0, %1
+  store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
+  ret void
+}
+
+; CHECK: fsub_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsub.d
+; CHECK: st.d
+; CHECK: .size fsub_d_test
diff --git a/test/CodeGen/Mips/msa/3rf_4rf.ll b/test/CodeGen/Mips/msa/3rf_4rf.ll
new file mode 100644
index 000000000000..67ef7fd2bae1
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_4rf.ll
@@ -0,0 +1,106 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; use the result as a third operand.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fmadd_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmadd_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmadd_w_ARG3 = global <4 x float> <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>, align 16
+@llvm_mips_fmadd_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmadd_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmadd_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmadd_w_ARG2
+  %2 = load <4 x float>* @llvm_mips_fmadd_w_ARG3
+  %3 = tail call <4 x float> @llvm.mips.fmadd.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* @llvm_mips_fmadd_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmadd.w(<4 x float>, <4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmadd_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmadd.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmadd_w_test
+;
+@llvm_mips_fmadd_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmadd_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmadd_d_ARG3 = global <2 x double> <double 4.000000e+00, double 5.000000e+00>, align 16
+@llvm_mips_fmadd_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmadd_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmadd_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmadd_d_ARG2
+  %2 = load <2 x double>* @llvm_mips_fmadd_d_ARG3
+  %3 = tail call <2 x double> @llvm.mips.fmadd.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* @llvm_mips_fmadd_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmadd.d(<2 x double>, <2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmadd_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmadd.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmadd_d_test
+;
+@llvm_mips_fmsub_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fmsub_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fmsub_w_ARG3 = global <4 x float> <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>, align 16
+@llvm_mips_fmsub_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fmsub_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fmsub_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fmsub_w_ARG2
+  %2 = load <4 x float>* @llvm_mips_fmsub_w_ARG3
+  %3 = tail call <4 x float> @llvm.mips.fmsub.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* @llvm_mips_fmsub_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmsub.w(<4 x float>, <4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fmsub_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fmsub.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fmsub_w_test
+;
+@llvm_mips_fmsub_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fmsub_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fmsub_d_ARG3 = global <2 x double> <double 4.000000e+00, double 5.000000e+00>, align 16
+@llvm_mips_fmsub_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fmsub_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fmsub_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fmsub_d_ARG2
+  %2 = load <2 x double>* @llvm_mips_fmsub_d_ARG3
+  %3 = tail call <2 x double> @llvm.mips.fmsub.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* @llvm_mips_fmsub_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmsub.d(<2 x double>, <2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fmsub_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fmsub.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fmsub_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_4rf_q.ll b/test/CodeGen/Mips/msa/3rf_4rf_q.ll
new file mode 100644
index 000000000000..de28be0b1c22
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_4rf_q.ll
@@ -0,0 +1,206 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; use the result as a third operand and perform fixed-point operations.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_madd_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_madd_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_madd_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_madd_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_madd_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_madd_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_madd_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_madd_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.madd.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_madd_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.madd.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_madd_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: madd_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_madd_q_h_test
+;
+@llvm_mips_madd_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_madd_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_madd_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_madd_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_madd_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_madd_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_madd_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_madd_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.madd.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_madd_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.madd.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_madd_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: madd_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_madd_q_w_test
+;
+@llvm_mips_maddr_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maddr_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_maddr_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_maddr_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maddr_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.maddr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_maddr_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maddr.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_maddr_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: maddr_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maddr_q_h_test
+;
+@llvm_mips_maddr_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maddr_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_maddr_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_maddr_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maddr_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.maddr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_maddr_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maddr.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_maddr_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: maddr_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maddr_q_w_test
+;
+@llvm_mips_msub_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_msub_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_msub_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_msub_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_msub_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_msub_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_msub_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_msub_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.msub.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_msub_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.msub.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_msub_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: msub_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_msub_q_h_test
+;
+@llvm_mips_msub_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_msub_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_msub_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_msub_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_msub_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_msub_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_msub_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_msub_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.msub.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_msub_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.msub.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_msub_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: msub_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_msub_q_w_test
+;
+@llvm_mips_msubr_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_msubr_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_msubr_q_h_ARG3 = global <8 x i16> <i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23>, align 16
+@llvm_mips_msubr_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_msubr_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG3
+  %3 = tail call <8 x i16> @llvm.mips.msubr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* @llvm_mips_msubr_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.msubr.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_msubr_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: msubr_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_msubr_q_h_test
+;
+@llvm_mips_msubr_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_msubr_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_msubr_q_w_ARG3 = global <4 x i32> <i32 8, i32 9, i32 10, i32 11>, align 16
+@llvm_mips_msubr_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_msubr_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG3
+  %3 = tail call <4 x i32> @llvm.mips.msubr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* @llvm_mips_msubr_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.msubr.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_msubr_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: msubr_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_msubr_q_w_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_exdo.ll b/test/CodeGen/Mips/msa/3rf_exdo.ll
new file mode 100644
index 000000000000..8a7f268a5069
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_exdo.ll
@@ -0,0 +1,50 @@
+; Test the MSA floating-point conversion intrinsics that are encoded with the
+; 3RF instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fexdo_h_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexdo_h_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fexdo_h_RES  = global <8 x half> <half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00, half 0.000000e+00>, align 16
+
+define void @llvm_mips_fexdo_h_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexdo_h_ARG1
+  %1 = load <4 x float>* @llvm_mips_fexdo_h_ARG2
+  %2 = tail call <8 x half> @llvm.mips.fexdo.h(<4 x float> %0, <4 x float> %1)
+  store <8 x half> %2, <8 x half>* @llvm_mips_fexdo_h_RES
+  ret void
+}
+
+declare <8 x half> @llvm.mips.fexdo.h(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fexdo_h_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fexdo.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_fexdo_h_test
+;
+@llvm_mips_fexdo_w_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fexdo_w_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fexdo_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexdo_w_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fexdo_w_ARG1
+  %1 = load <2 x double>* @llvm_mips_fexdo_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fexdo.w(<2 x double> %0, <2 x double> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fexdo_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexdo.w(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fexdo_w_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fexdo.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexdo_w_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_float_int.ll b/test/CodeGen/Mips/msa/3rf_float_int.ll
new file mode 100644
index 000000000000..7b01e1721db9
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_float_int.ll
@@ -0,0 +1,50 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; take an integer as an operand.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fexp2_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fexp2_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_fexp2_w_RES  = global <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, align 16
+
+define void @llvm_mips_fexp2_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fexp2_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_fexp2_w_ARG2
+  %2 = tail call <4 x float> @llvm.mips.fexp2.w(<4 x float> %0, <4 x i32> %1)
+  store <4 x float> %2, <4 x float>* @llvm_mips_fexp2_w_RES
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fexp2.w(<4 x float>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_fexp2_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fexp2.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fexp2_w_test
+;
+@llvm_mips_fexp2_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fexp2_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_fexp2_d_RES  = global <2 x double> <double 0.000000e+00, double 0.000000e+00>, align 16
+
+define void @llvm_mips_fexp2_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fexp2_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_fexp2_d_ARG2
+  %2 = tail call <2 x double> @llvm.mips.fexp2.d(<2 x double> %0, <2 x i64> %1)
+  store <2 x double> %2, <2 x double>* @llvm_mips_fexp2_d_RES
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fexp2.d(<2 x double>, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_fexp2_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fexp2.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fexp2_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_int_float.ll b/test/CodeGen/Mips/msa/3rf_int_float.ll
new file mode 100644
index 000000000000..5624771b8357
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_int_float.ll
@@ -0,0 +1,974 @@
+; Test the MSA intrinsics that are encoded with the 3RF instruction format and
+; produce an integer as a result.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_fcaf_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcaf_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcaf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcaf_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcaf_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcaf_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcaf.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcaf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcaf.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcaf_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcaf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcaf_w_test
+;
+@llvm_mips_fcaf_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcaf_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcaf_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcaf_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcaf_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcaf_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcaf.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcaf_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcaf.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcaf_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcaf.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcaf_d_test
+;
+@llvm_mips_fceq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fceq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fceq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fceq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fceq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fceq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fceq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fceq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fceq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fceq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fceq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fceq_w_test
+;
+@llvm_mips_fceq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fceq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fceq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fceq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fceq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fceq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fceq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fceq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fceq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fceq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fceq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fceq_d_test
+;
+@llvm_mips_fcle_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcle_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcle_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcle_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcle_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcle_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcle.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcle_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcle.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcle_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcle.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcle_w_test
+;
+@llvm_mips_fcle_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcle_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcle_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcle_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcle_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcle_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcle.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcle_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcle.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcle_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcle.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcle_d_test
+;
+@llvm_mips_fclt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fclt_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fclt_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fclt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fclt_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fclt_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fclt.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fclt_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fclt.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fclt_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fclt.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fclt_w_test
+;
+@llvm_mips_fclt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fclt_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fclt_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fclt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fclt_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fclt_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fclt.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fclt_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fclt.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fclt_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fclt.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fclt_d_test
+;
+@llvm_mips_fcor_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcor_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcor_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcor_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcor_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcor_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcor.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcor_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcor.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcor_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcor.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcor_w_test
+;
+@llvm_mips_fcor_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcor_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcor_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcor_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcor_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcor_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcor.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcor_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcor.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcor_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcor.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcor_d_test
+;
+@llvm_mips_fcne_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcne_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcne_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcne_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcne_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcne_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcne.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcne_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcne.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcne_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcne.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcne_w_test
+;
+@llvm_mips_fcne_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcne_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcne_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcne_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcne_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcne_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcne.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcne_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcne.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcne_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcne.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcne_d_test
+;
+@llvm_mips_fcueq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcueq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcueq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcueq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcueq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcueq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcueq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcueq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcueq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcueq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcueq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcueq_w_test
+;
+@llvm_mips_fcueq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcueq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcueq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcueq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcueq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcueq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcueq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcueq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcueq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcueq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcueq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcueq_d_test
+;
+@llvm_mips_fcult_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcult_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcult_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcult_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcult_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcult_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcult.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcult_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcult.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcult_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcult.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcult_w_test
+;
+@llvm_mips_fcult_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcult_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcult_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcult_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcult_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcult_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcult.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcult_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcult.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcult_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcult.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcult_d_test
+;
+@llvm_mips_fcule_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcule_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcule_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcule_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcule_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcule_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcule.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcule_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcule.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcule_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcule.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcule_w_test
+;
+@llvm_mips_fcule_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcule_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcule_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcule_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcule_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcule_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcule.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcule_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcule.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcule_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcule.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcule_d_test
+;
+@llvm_mips_fcun_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcun_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcun_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcun_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcun_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcun_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcun.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcun_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcun.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcun_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcun.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcun_w_test
+;
+@llvm_mips_fcun_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcun_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcun_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcun_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcun_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcun_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcun.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcun_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcun.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcun_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcun.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcun_d_test
+;
+@llvm_mips_fcune_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fcune_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fcune_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fcune_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fcune_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fcune_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fcune.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fcune_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fcune.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fcune_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fcune.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fcune_w_test
+;
+@llvm_mips_fcune_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fcune_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fcune_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fcune_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fcune_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fcune_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fcune.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fcune_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fcune.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fcune_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fcune.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fcune_d_test
+;
+@llvm_mips_fsaf_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsaf_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsaf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsaf_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsaf_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsaf_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsaf.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsaf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsaf.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsaf_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsaf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsaf_w_test
+;
+@llvm_mips_fsaf_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsaf_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsaf_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsaf_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsaf_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsaf_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsaf.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsaf_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsaf.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsaf_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsaf.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsaf_d_test
+;
+@llvm_mips_fseq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fseq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fseq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fseq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fseq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fseq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fseq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fseq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fseq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fseq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fseq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fseq_w_test
+;
+@llvm_mips_fseq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fseq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fseq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fseq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fseq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fseq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fseq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fseq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fseq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fseq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fseq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fseq_d_test
+;
+@llvm_mips_fsle_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsle_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsle_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsle_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsle_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsle_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsle.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsle_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsle.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsle_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsle.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsle_w_test
+;
+@llvm_mips_fsle_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsle_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsle_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsle_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsle_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsle_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsle.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsle_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsle.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsle_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsle.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsle_d_test
+;
+@llvm_mips_fslt_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fslt_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fslt_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fslt_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fslt_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fslt_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fslt.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fslt_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fslt.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fslt_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fslt.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fslt_w_test
+;
+@llvm_mips_fslt_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fslt_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fslt_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fslt_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fslt_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fslt_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fslt.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fslt_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fslt.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fslt_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fslt.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fslt_d_test
+;
+@llvm_mips_fsor_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsor_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsor_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsor_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsor_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsor_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsor.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsor_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsor.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsor_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsor.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsor_w_test
+;
+@llvm_mips_fsor_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsor_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsor_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsor_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsor_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsor_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsor.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsor_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsor.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsor_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsor.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsor_d_test
+;
+@llvm_mips_fsne_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsne_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsne_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsne_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsne_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsne_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsne.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsne_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsne.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsne_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsne.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsne_w_test
+;
+@llvm_mips_fsne_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsne_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsne_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsne_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsne_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsne_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsne.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsne_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsne.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsne_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsne.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsne_d_test
+;
+@llvm_mips_fsueq_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsueq_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsueq_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsueq_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsueq_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsueq_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsueq.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsueq_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsueq.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsueq_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsueq.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsueq_w_test
+;
+@llvm_mips_fsueq_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsueq_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsueq_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsueq_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsueq_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsueq_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsueq.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsueq_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsueq.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsueq_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsueq.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsueq_d_test
+;
+@llvm_mips_fsult_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsult_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsult_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsult_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsult_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsult_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsult.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsult_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsult.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsult_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsult.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsult_w_test
+;
+@llvm_mips_fsult_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsult_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsult_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsult_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsult_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsult_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsult.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsult_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsult.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsult_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsult.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsult_d_test
+;
+@llvm_mips_fsule_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsule_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsule_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsule_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsule_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsule_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsule.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsule_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsule.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsule_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsule.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsule_w_test
+;
+@llvm_mips_fsule_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsule_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsule_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsule_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsule_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsule_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsule.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsule_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsule.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsule_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsule.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsule_d_test
+;
+@llvm_mips_fsun_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsun_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsun_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsun_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsun_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsun_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsun.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsun_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsun.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsun_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsun.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsun_w_test
+;
+@llvm_mips_fsun_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsun_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsun_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsun_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsun_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsun_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsun.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsun_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsun.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsun_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsun.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsun_d_test
+;
+@llvm_mips_fsune_w_ARG1 = global <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, align 16
+@llvm_mips_fsune_w_ARG2 = global <4 x float> <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>, align 16
+@llvm_mips_fsune_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_fsune_w_test() nounwind {
+entry:
+  %0 = load <4 x float>* @llvm_mips_fsune_w_ARG1
+  %1 = load <4 x float>* @llvm_mips_fsune_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.fsune.w(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_fsune_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.fsune.w(<4 x float>, <4 x float>) nounwind
+
+; CHECK: llvm_mips_fsune_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: fsune.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_fsune_w_test
+;
+@llvm_mips_fsune_d_ARG1 = global <2 x double> <double 0.000000e+00, double 1.000000e+00>, align 16
+@llvm_mips_fsune_d_ARG2 = global <2 x double> <double 2.000000e+00, double 3.000000e+00>, align 16
+@llvm_mips_fsune_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_fsune_d_test() nounwind {
+entry:
+  %0 = load <2 x double>* @llvm_mips_fsune_d_ARG1
+  %1 = load <2 x double>* @llvm_mips_fsune_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.fsune.d(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_fsune_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.fsune.d(<2 x double>, <2 x double>) nounwind
+
+; CHECK: llvm_mips_fsune_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: fsune.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_fsune_d_test
+;
diff --git a/test/CodeGen/Mips/msa/3rf_q.ll b/test/CodeGen/Mips/msa/3rf_q.ll
new file mode 100644
index 000000000000..f7000ee913a6
--- /dev/null
+++ b/test/CodeGen/Mips/msa/3rf_q.ll
@@ -0,0 +1,94 @@
+; Test the MSA fixed-point intrinsics that are encoded with the 3RF instruction
+; format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_mul_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mul_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mul_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mul_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mul_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mul_q_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mul.q.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mul_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mul.q.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mul_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mul_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mul_q_h_test
+;
+@llvm_mips_mul_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mul_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mul_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mul_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mul_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mul_q_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mul.q.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mul_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mul.q.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mul_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mul_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mul_q_w_test
+;
+@llvm_mips_mulr_q_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mulr_q_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_mulr_q_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mulr_q_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.mulr.q.h(<8 x i16> %0, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_mulr_q_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mulr.q.h(<8 x i16>, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_mulr_q_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: mulr_q.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mulr_q_h_test
+;
+@llvm_mips_mulr_q_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mulr_q_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_mulr_q_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mulr_q_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.mulr.q.w(<4 x i32> %0, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_mulr_q_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mulr.q.w(<4 x i32>, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_mulr_q_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: mulr_q.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mulr_q_w_test
+;
diff --git a/test/CodeGen/Mips/msa/arithmetic.ll b/test/CodeGen/Mips/msa/arithmetic.ll
new file mode 100644
index 000000000000..09ee5023c7b1
--- /dev/null
+++ b/test/CodeGen/Mips/msa/arithmetic.ll
@@ -0,0 +1,726 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: add_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <16 x i8> %1, %2
+  ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v16i8
+}
+
+define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: add_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <8 x i16> %1, %2
+  ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v8i16
+}
+
+define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: add_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <4 x i32> %1, %2
+  ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v4i32
+}
+
+define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: add_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = add <2 x i64> %1, %2
+  ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v2i64
+}
+
+define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: add_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v16i8_i
+}
+
+define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: add_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
+                          i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v8i16_i
+}
+
+define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: add_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v4i32_i
+}
+
+define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: add_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = add <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v2i64_i
+}
+
+define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: sub_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <16 x i8> %1, %2
+  ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v16i8
+}
+
+define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: sub_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <8 x i16> %1, %2
+  ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v8i16
+}
+
+define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: sub_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <4 x i32> %1, %2
+  ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v4i32
+}
+
+define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: sub_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = sub <2 x i64> %1, %2
+  ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v2i64
+}
+
+define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: sub_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v16i8_i
+}
+
+define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: sub_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
+                          i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v8i16_i
+}
+
+define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: sub_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v4i32_i
+}
+
+define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: sub_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = sub <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v2i64_i
+}
+
+define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: mul_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <16 x i8> %1, %2
+  ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v16i8
+}
+
+define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: mul_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <8 x i16> %1, %2
+  ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v8i16
+}
+
+define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: mul_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <4 x i32> %1, %2
+  ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v4i32
+}
+
+define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: mul_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = mul <2 x i64> %1, %2
+  ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v2i64
+}
+
+define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                         <16 x i8>* %c) nounwind {
+  ; CHECK: maddv_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <16 x i8> %2, %3
+  %5 = add <16 x i8> %4, %1
+  ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v16i8
+}
+
+define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                         <8 x i16>* %c) nounwind {
+  ; CHECK: maddv_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <8 x i16> %2, %3
+  %5 = add <8 x i16> %4, %1
+  ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v8i16
+}
+
+define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                         <4 x i32>* %c) nounwind {
+  ; CHECK: maddv_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <4 x i32> %2, %3
+  %5 = add <4 x i32> %4, %1
+  ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v4i32
+}
+
+define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                         <2 x i64>* %c) nounwind {
+  ; CHECK: maddv_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <2 x i64> %2, %3
+  %5 = add <2 x i64> %4, %1
+  ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size maddv_v2i64
+}
+
+define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                         <16 x i8>* %c) nounwind {
+  ; CHECK: msubv_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <16 x i8> %2, %3
+  %5 = sub <16 x i8> %1, %4
+  ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v16i8
+}
+
+define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                         <8 x i16>* %c) nounwind {
+  ; CHECK: msubv_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <8 x i16> %2, %3
+  %5 = sub <8 x i16> %1, %4
+  ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v8i16
+}
+
+define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                         <4 x i32>* %c) nounwind {
+  ; CHECK: msubv_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <4 x i32> %2, %3
+  %5 = sub <4 x i32> %1, %4
+  ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v4i32
+}
+
+define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                         <2 x i64>* %c) nounwind {
+  ; CHECK: msubv_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = mul <2 x i64> %2, %3
+  %5 = sub <2 x i64> %1, %4
+  ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size msubv_v2i64
+}
+
+define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: div_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <16 x i8> %1, %2
+  ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v16i8
+}
+
+define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: div_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <8 x i16> %1, %2
+  ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v8i16
+}
+
+define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: div_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <4 x i32> %1, %2
+  ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v4i32
+}
+
+define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: div_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = sdiv <2 x i64> %1, %2
+  ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_s_v2i64
+}
+
+define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: div_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <16 x i8> %1, %2
+  ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v16i8
+}
+
+define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: div_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <8 x i16> %1, %2
+  ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v8i16
+}
+
+define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: div_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <4 x i32> %1, %2
+  ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v4i32
+}
+
+define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: div_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = udiv <2 x i64> %1, %2
+  ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size div_u_v2i64
+}
+
+define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: mod_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <16 x i8> %1, %2
+  ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v16i8
+}
+
+define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: mod_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <8 x i16> %1, %2
+  ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v8i16
+}
+
+define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: mod_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <4 x i32> %1, %2
+  ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v4i32
+}
+
+define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: mod_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = srem <2 x i64> %1, %2
+  ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_s_v2i64
+}
+
+define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: mod_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <16 x i8> %1, %2
+  ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v16i8
+}
+
+define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: mod_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <8 x i16> %1, %2
+  ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v8i16
+}
+
+define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: mod_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <4 x i32> %1, %2
+  ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v4i32
+}
+
+define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: mod_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = urem <2 x i64> %1, %2
+  ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mod_u_v2i64
+}
diff --git a/test/CodeGen/Mips/msa/arithmetic_float.ll b/test/CodeGen/Mips/msa/arithmetic_float.ll
new file mode 100644
index 000000000000..dc3872129205
--- /dev/null
+++ b/test/CodeGen/Mips/msa/arithmetic_float.ll
@@ -0,0 +1,456 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: add_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fadd <4 x float> %1, %2
+  ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v4f32
+}
+
+define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: add_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fadd <2 x double> %1, %2
+  ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size add_v2f64
+}
+
+define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: sub_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fsub <4 x float> %1, %2
+  ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v4f32
+}
+
+define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: sub_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fsub <2 x double> %1, %2
+  ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sub_v2f64
+}
+
+define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: mul_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fmul <4 x float> %1, %2
+  ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v4f32
+}
+
+define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: mul_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fmul <2 x double> %1, %2
+  ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mul_v2f64
+}
+
+define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                       <4 x float>* %c) nounwind {
+  ; CHECK: fma_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x float>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
+                                              <4 x float> %3)
+  ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]]
+  store <4 x float> %4, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fma_v4f32
+}
+
+define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                       <2 x double>* %c) nounwind {
+  ; CHECK: fma_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x double>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
+                                               <2 x double> %3)
+  ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]]
+  store <2 x double> %4, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fma_v2f64
+}
+
+define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                       <4 x float>* %c) nounwind {
+  ; CHECK: fmsub_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x float>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = fmul <4 x float> %2, %3
+  %5 = fsub <4 x float> %1, %4
+  ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]]
+  store <4 x float> %5, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fmsub_v4f32
+}
+
+define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                       <2 x double>* %c) nounwind {
+  ; CHECK: fmsub_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x double>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = fmul <2 x double> %2, %3
+  %5 = fsub <2 x double> %1, %4
+  ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]]
+  store <2 x double> %5, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size fmsub_v2f64
+}
+
+define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: fdiv_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fdiv <4 x float> %1, %2
+  ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fdiv_v4f32
+}
+
+define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: fdiv_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fdiv <2 x double> %1, %2
+  ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fdiv_v2f64
+}
+
+define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fabs_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
+  ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fabs_v4f32
+}
+
+define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: fabs_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
+  ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fabs_v2f64
+}
+
+define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fexp2_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
+  ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v4f32
+}
+
+define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: fexp2_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]]
+  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v2f64
+}
+
+define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fexp2_v4f32_2:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
+  %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
+  ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384
+  ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]]
+  ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R5]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v4f32_2
+}
+
+define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK:      .8byte 4611686018427387904
+  ; CHECK-NEXT: .8byte 4611686018427387904
+  ; CHECK: fexp2_v2f64_2:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
+  %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo(
+  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size fexp2_v2f64_2
+}
+
+define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: fsqrt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
+  ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fsqrt_v4f32
+}
+
+define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: fsqrt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
+  ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size fsqrt_v2f64
+}
+
+define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ffint_u_v4f32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = uitofp <4 x i32> %1 to <4 x float>
+  ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_u_v4f32
+}
+
+define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ffint_u_v2f64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = uitofp <2 x i64> %1 to <2 x double>
+  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_u_v2f64
+}
+
+define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ffint_s_v4f32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = sitofp <4 x i32> %1 to <4 x float>
+  ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x float> %2, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_s_v4f32
+}
+
+define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ffint_s_v2f64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = sitofp <2 x i64> %1 to <2 x double>
+  ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x double> %2, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ffint_s_v2f64
+}
+
+define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: ftrunc_u_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptoui <4 x float> %1 to <4 x i32>
+  ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_u_v4f32
+}
+
+define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: ftrunc_u_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptoui <2 x double> %1 to <2 x i64>
+  ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_u_v2f64
+}
+
+define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
+  ; CHECK: ftrunc_s_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptosi <4 x float> %1 to <4 x i32>
+  ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_s_v4f32
+}
+
+define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
+  ; CHECK: ftrunc_s_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = fptosi <2 x double> %1 to <2 x i64>
+  ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ftrunc_s_v2f64
+}
+
+declare <4 x float>  @llvm.fabs.v4f32(<4 x float>  %Val)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
+declare <4 x float>  @llvm.exp2.v4f32(<4 x float>  %val)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val)
+declare <4 x float>  @llvm.fma.v4f32(<4 x float>  %a, <4 x float>  %b,
+                                     <4 x float>  %c)
+declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b,
+                                     <2 x double> %c)
+declare <4 x float>  @llvm.sqrt.v4f32(<4 x float>  %Val)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)
diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll
new file mode 100644
index 000000000000..0169a0780d36
--- /dev/null
+++ b/test/CodeGen/Mips/msa/basic_operations.ll
@@ -0,0 +1,481 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s
+
+@v4i8 = global <4 x i8> <i8 0, i8 0, i8 0, i8 0>
+@v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+@v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+@v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+@v2i64 = global <2 x i64> <i64 0, i64 0>
+@i64 = global i64 0
+
+define void @const_v16i8() nounwind {
+  ; MIPS32-AE: const_v16i8:
+
+  store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8
+  ; MIPS32-BE: ldi.h [[R1:\$w[0-9]+]], 256
+  ; MIPS32-LE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>, <16 x i8>*@v16i8
+  ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 258
+  ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 1027
+  ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 772
+  ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 513
+  ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+
+  store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
+  ; MIPS32-AE: ld.b  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v16i8
+}
+
+define void @const_v8i16() nounwind {
+  ; MIPS32-AE: const_v8i16:
+
+  store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 4
+
+  store volatile <8 x i16> <i16 1, i16 2, i16 1, i16 2, i16 1, i16 2, i16 1, i16 2>, <8 x i16>*@v8i16
+  ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 1
+  ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 2
+  ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 2
+  ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 1
+  ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+
+  store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
+  ; MIPS32-AE: ld.h  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v8i16
+}
+
+define void @const_v4i32() nounwind {
+  ; MIPS32-AE: const_v4i32:
+
+  store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.w [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v4i32
+}
+
+define void @const_v2i64() nounwind {
+  ; MIPS32-AE: const_v2i64:
+
+  store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+
+  store volatile <2 x i64> <i64 72340172838076673, i64 72340172838076673>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 281479271743489, i64 281479271743489>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 4294967297, i64 4294967297>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.w [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 1, i64 1>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ldi.d [[R1:\$w[0-9]+]], 1
+
+  store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
+  ; MIPS32-AE: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32-AE: .size const_v2i64
+}
+
+define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) nounwind {
+  ; MIPS32-AE: nonconst_v16i8:
+
+  %1 = insertelement <16 x i8> undef, i8 %a, i32 0
+  %2 = insertelement <16 x i8> %1, i8 %b, i32 1
+  %3 = insertelement <16 x i8> %2, i8 %c, i32 2
+  %4 = insertelement <16 x i8> %3, i8 %d, i32 3
+  %5 = insertelement <16 x i8> %4, i8 %e, i32 4
+  %6 = insertelement <16 x i8> %5, i8 %f, i32 5
+  %7 = insertelement <16 x i8> %6, i8 %g, i32 6
+  %8 = insertelement <16 x i8> %7, i8 %h, i32 7
+  %9 = insertelement <16 x i8> %8, i8 %h, i32 8
+  %10 = insertelement <16 x i8> %9, i8 %h, i32 9
+  %11 = insertelement <16 x i8> %10, i8 %h, i32 10
+  %12 = insertelement <16 x i8> %11, i8 %h, i32 11
+  %13 = insertelement <16 x i8> %12, i8 %h, i32 12
+  %14 = insertelement <16 x i8> %13, i8 %h, i32 13
+  %15 = insertelement <16 x i8> %14, i8 %h, i32 14
+  %16 = insertelement <16 x i8> %15, i8 %h, i32 15
+  ; MIPS32-AE-DAG: insert.b [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE-DAG: insert.b [[R1]][1], $5
+  ; MIPS32-AE-DAG: insert.b [[R1]][2], $6
+  ; MIPS32-AE-DAG: insert.b [[R1]][3], $7
+  ; MIPS32-BE-DAG: lbu [[R2:\$[0-9]+]], 19($sp)
+  ; MIPS32-LE-DAG: lbu [[R2:\$[0-9]+]], 16($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][4], [[R2]]
+  ; MIPS32-BE-DAG: lbu [[R3:\$[0-9]+]], 23($sp)
+  ; MIPS32-LE-DAG: lbu [[R3:\$[0-9]+]], 20($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][5], [[R3]]
+  ; MIPS32-BE-DAG: lbu [[R4:\$[0-9]+]], 27($sp)
+  ; MIPS32-LE-DAG: lbu [[R4:\$[0-9]+]], 24($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][6], [[R4]]
+  ; MIPS32-BE-DAG: lbu [[R5:\$[0-9]+]], 31($sp)
+  ; MIPS32-LE-DAG: lbu [[R5:\$[0-9]+]], 28($sp)
+  ; MIPS32-AE-DAG: insert.b [[R1]][7], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][8], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][9], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][10], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][11], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][12], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][13], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][14], [[R5]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][15], [[R5]]
+
+  store volatile <16 x i8> %16, <16 x i8>*@v16i8
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v16i8
+}
+
+define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h) nounwind {
+  ; MIPS32-AE: nonconst_v8i16:
+
+  %1 = insertelement <8 x i16> undef, i16 %a, i32 0
+  %2 = insertelement <8 x i16> %1, i16 %b, i32 1
+  %3 = insertelement <8 x i16> %2, i16 %c, i32 2
+  %4 = insertelement <8 x i16> %3, i16 %d, i32 3
+  %5 = insertelement <8 x i16> %4, i16 %e, i32 4
+  %6 = insertelement <8 x i16> %5, i16 %f, i32 5
+  %7 = insertelement <8 x i16> %6, i16 %g, i32 6
+  %8 = insertelement <8 x i16> %7, i16 %h, i32 7
+  ; MIPS32-AE-DAG: insert.h [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE-DAG: insert.h [[R1]][1], $5
+  ; MIPS32-AE-DAG: insert.h [[R1]][2], $6
+  ; MIPS32-AE-DAG: insert.h [[R1]][3], $7
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 18($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 16($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][4], [[R2]]
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 22($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 20($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][5], [[R2]]
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 26($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 24($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][6], [[R2]]
+  ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 30($sp)
+  ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 28($sp)
+  ; MIPS32-AE-DAG: insert.h [[R1]][7], [[R2]]
+
+  store volatile <8 x i16> %8, <8 x i16>*@v8i16
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v8i16
+}
+
+define void @nonconst_v4i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+  ; MIPS32-AE: nonconst_v4i32:
+
+  %1 = insertelement <4 x i32> undef, i32 %a, i32 0
+  %2 = insertelement <4 x i32> %1, i32 %b, i32 1
+  %3 = insertelement <4 x i32> %2, i32 %c, i32 2
+  %4 = insertelement <4 x i32> %3, i32 %d, i32 3
+  ; MIPS32-AE: insert.w [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE: insert.w [[R1]][1], $5
+  ; MIPS32-AE: insert.w [[R1]][2], $6
+  ; MIPS32-AE: insert.w [[R1]][3], $7
+
+  store volatile <4 x i32> %4, <4 x i32>*@v4i32
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v4i32
+}
+
+define void @nonconst_v2i64(i64 %a, i64 %b) nounwind {
+  ; MIPS32-AE: nonconst_v2i64:
+
+  %1 = insertelement <2 x i64> undef, i64 %a, i32 0
+  %2 = insertelement <2 x i64> %1, i64 %b, i32 1
+  ; MIPS32-AE: insert.w [[R1:\$w[0-9]+]][0], $4
+  ; MIPS32-AE: insert.w [[R1]][1], $5
+  ; MIPS32-AE: insert.w [[R1]][2], $6
+  ; MIPS32-AE: insert.w [[R1]][3], $7
+
+  store volatile <2 x i64> %2, <2 x i64>*@v2i64
+
+  ret void
+  ; MIPS32-AE: .size nonconst_v2i64
+}
+
+define i32 @extract_sext_v16i8() nounwind {
+  ; MIPS32-AE: extract_sext_v16i8:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <16 x i8> %2, i32 1
+  %4 = sext i8 %3 to i32
+  ; MIPS32-AE-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: sll
+  ; MIPS32-AE-NOT: sra
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_sext_v16i8
+}
+
+define i32 @extract_sext_v8i16() nounwind {
+  ; MIPS32-AE: extract_sext_v8i16:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <8 x i16> %2, i32 1
+  %4 = sext i16 %3 to i32
+  ; MIPS32-AE-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: sll
+  ; MIPS32-AE-NOT: sra
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_sext_v8i16
+}
+
+define i32 @extract_sext_v4i32() nounwind {
+  ; MIPS32-AE: extract_sext_v4i32:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <4 x i32> %2, i32 1
+  ; MIPS32-AE-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1]
+
+  ret i32 %3
+  ; MIPS32-AE: .size extract_sext_v4i32
+}
+
+define i64 @extract_sext_v2i64() nounwind {
+  ; MIPS32-AE: extract_sext_v2i64:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <2 x i64> %2, i32 1
+  ; MIPS32-AE-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2]
+  ; MIPS32-AE-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3]
+  ; MIPS32-AE-NOT: sll
+  ; MIPS32-AE-NOT: sra
+
+  ret i64 %3
+  ; MIPS32-AE: .size extract_sext_v2i64
+}
+
+define i32 @extract_zext_v16i8() nounwind {
+  ; MIPS32-AE: extract_zext_v16i8:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <16 x i8> %2, i32 1
+  %4 = zext i8 %3 to i32
+  ; MIPS32-AE-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: andi
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_zext_v16i8
+}
+
+define i32 @extract_zext_v8i16() nounwind {
+  ; MIPS32-AE: extract_zext_v8i16:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <8 x i16> %2, i32 1
+  %4 = zext i16 %3 to i32
+  ; MIPS32-AE-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1]
+  ; MIPS32-AE-NOT: andi
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_zext_v8i16
+}
+
+define i32 @extract_zext_v4i32() nounwind {
+  ; MIPS32-AE: extract_zext_v4i32:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <4 x i32> %2, i32 1
+  ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1]
+
+  ret i32 %3
+  ; MIPS32-AE: .size extract_zext_v4i32
+}
+
+define i64 @extract_zext_v2i64() nounwind {
+  ; MIPS32-AE: extract_zext_v2i64:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <2 x i64> %2, i32 1
+  ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2]
+  ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3]
+  ; MIPS32-AE-NOT: andi
+
+  ret i64 %3
+  ; MIPS32-AE: .size extract_zext_v2i64
+}
+
+define void @insert_v16i8(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v16i8:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %a2 = trunc i32 %a to i8
+  %a3 = sext i8 %a2 to i32
+  %a4 = trunc i32 %a3 to i8
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <16 x i8> %1, i8 %a4, i32 1
+  ; MIPS32-AE-DAG: insert.b [[R1]][1], $4
+
+  store <16 x i8> %2, <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: st.b [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v16i8
+}
+
+define void @insert_v8i16(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v8i16:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %a2 = trunc i32 %a to i16
+  %a3 = sext i16 %a2 to i32
+  %a4 = trunc i32 %a3 to i16
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <8 x i16> %1, i16 %a4, i32 1
+  ; MIPS32-AE-DAG: insert.h [[R1]][1], $4
+
+  store <8 x i16> %2, <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: st.h [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v8i16
+}
+
+define void @insert_v4i32(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v4i32:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <4 x i32> %1, i32 %a, i32 1
+  ; MIPS32-AE-DAG: insert.w [[R1]][1], $4
+
+  store <4 x i32> %2, <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v4i32
+}
+
+define void @insert_v2i64(i64 %a) nounwind {
+  ; MIPS32-AE: insert_v2i64:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %2 = insertelement <2 x i64> %1, i64 %a, i32 1
+  ; MIPS32-AE-DAG: insert.w [[R1]][2], $4
+  ; MIPS32-AE-DAG: insert.w [[R1]][3], $5
+
+  store <2 x i64> %2, <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v2i64
+}
+
+define void @truncstore() nounwind {
+  ; MIPS32-AE: truncstore:
+
+  store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>*@v4i8
+  ; TODO: What code should be emitted?
+
+  ret void
+  ; MIPS32-AE: .size truncstore
+}
diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll
new file mode 100644
index 000000000000..1f538108a1fa
--- /dev/null
+++ b/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -0,0 +1,207 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32 %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32 %s
+
+@v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+@v2f64 = global <2 x double> <double 0.0, double 0.0>
+@f32 = global float 0.0
+@f64 = global double 0.0
+
+define void @const_v4f32() nounwind {
+  ; MIPS32: const_v4f32:
+
+  store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32
+  ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
+
+  store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float>*@v4f32
+  ; MIPS32: lui     [[R1:\$[0-9]+]], 16256
+  ; MIPS32: fill.w  [[R2:\$w[0-9]+]], [[R1]]
+
+  store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32
+  ; MIPS32: lui     [[R1:\$[0-9]+]], 18304
+  ; MIPS32: ori     [[R2:\$[0-9]+]], [[R1]], 128
+  ; MIPS32: fill.w  [[R3:\$w[0-9]+]], [[R2]]
+
+  store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32
+  ; MIPS32: ld.w  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v4f32
+}
+
+define void @const_v2f64() nounwind {
+  ; MIPS32: const_v2f64:
+
+  store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64
+  ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
+
+  store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64
+  ; MIPS32: ld.d  [[R1:\$w[0-9]+]], %lo(
+
+  ret void
+  ; MIPS32: .size const_v2f64
+}
+
+define void @nonconst_v4f32() nounwind {
+  ; MIPS32: nonconst_v4f32:
+
+  %1 = load float *@f32
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = insertelement <4 x float> %2, float %1, i32 1
+  %4 = insertelement <4 x float> %3, float %1, i32 2
+  %5 = insertelement <4 x float> %4, float %1, i32 3
+  store volatile <4 x float> %5, <4 x float>*@v4f32
+  ; MIPS32: lwc1 $f[[R1:[0-9]+]], 0(
+  ; MIPS32: splati.w [[R2:\$w[0-9]+]], $w[[R1]]
+
+  ret void
+  ; MIPS32: .size nonconst_v4f32
+}
+
+define void @nonconst_v2f64() nounwind {
+  ; MIPS32: nonconst_v2f64:
+
+  %1 = load double *@f64
+  %2 = insertelement <2 x double> undef, double %1, i32 0
+  %3 = insertelement <2 x double> %2, double %1, i32 1
+  store volatile <2 x double> %3, <2 x double>*@v2f64
+  ; MIPS32: ldc1 $f[[R1:[0-9]+]], 0(
+  ; MIPS32: splati.d [[R2:\$w[0-9]+]], $w[[R1]]
+
+  ret void
+  ; MIPS32: .size nonconst_v2f64
+}
+
+define float @extract_v4f32() nounwind {
+  ; MIPS32: extract_v4f32:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = fadd <4 x float> %1, %1
+  ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <4 x float> %2, i32 1
+  ; Element 1 can be obtained by splatting it across the vector and extracting
+  ; $w0:sub_lo
+  ; MIPS32-DAG: splati.w $w0, [[R1]][1]
+
+  ret float %3
+  ; MIPS32: .size extract_v4f32
+}
+
+define float @extract_v4f32_elt0() nounwind {
+  ; MIPS32: extract_v4f32_elt0:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = fadd <4 x float> %1, %1
+  ; MIPS32-DAG: fadd.w $w0, [[R1]], [[R1]]
+
+  %3 = extractelement <4 x float> %2, i32 0
+  ; Element 0 can be obtained by extracting $w0:sub_lo ($f0)
+  ; MIPS32-NOT: copy_u.w
+  ; MIPS32-NOT: mtc1
+
+  ret float %3
+  ; MIPS32: .size extract_v4f32_elt0
+}
+
+define double @extract_v2f64() nounwind {
+  ; MIPS32: extract_v2f64:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = fadd <2 x double> %1, %1
+  ; MIPS32-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = extractelement <2 x double> %2, i32 1
+  ; Element 1 can be obtained by splatting it across the vector and extracting
+  ; $w0:sub_64
+  ; MIPS32-DAG: splati.d $w0, [[R1]][1]
+  ; MIPS32-NOT: copy_u.w
+  ; MIPS32-NOT: mtc1
+  ; MIPS32-NOT: mthc1
+  ; MIPS32-NOT: sll
+  ; MIPS32-NOT: sra
+
+  ret double %3
+  ; MIPS32: .size extract_v2f64
+}
+
+define double @extract_v2f64_elt0() nounwind {
+  ; MIPS32: extract_v2f64_elt0:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = fadd <2 x double> %1, %1
+  ; MIPS32-DAG: fadd.d $w0, [[R1]], [[R1]]
+
+  %3 = extractelement <2 x double> %2, i32 0
+  ; Element 0 can be obtained by extracting $w0:sub_64 ($f0)
+  ; MIPS32-NOT: copy_u.w
+  ; MIPS32-NOT: mtc1
+  ; MIPS32-NOT: mthc1
+  ; MIPS32-NOT: sll
+  ; MIPS32-NOT: sra
+
+  ret double %3
+  ; MIPS32: .size extract_v2f64_elt0
+}
+
+define void @insert_v4f32(float %a) nounwind {
+  ; MIPS32: insert_v4f32:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = insertelement <4 x float> %1, float %a, i32 1
+  ; float argument passed in $f12
+  ; MIPS32-DAG: insve.w [[R1]][1], $w12[0]
+
+  store <4 x float> %2, <4 x float>* @v4f32
+  ; MIPS32-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v4f32
+}
+
+define void @insert_v2f64(double %a) nounwind {
+  ; MIPS32: insert_v2f64:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+
+  %2 = insertelement <2 x double> %1, double %a, i32 1
+  ; double argument passed in $f12
+  ; MIPS32-DAG: insve.d [[R1]][1], $w12[0]
+
+  store <2 x double> %2, <2 x double>* @v2f64
+  ; MIPS32-DAG: st.d [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v2f64
+}
diff --git a/test/CodeGen/Mips/msa/bit.ll b/test/CodeGen/Mips/msa/bit.ll
new file mode 100644
index 000000000000..59ddbe17a33f
--- /dev/null
+++ b/test/CodeGen/Mips/msa/bit.ll
@@ -0,0 +1,537 @@
+; Test the MSA intrinsics that are encoded with the BIT instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_sat_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sat_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sat_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sat_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.sat.s.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sat.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_b_test:
+; CHECK: ld.b
+; CHECK: sat_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_sat_s_b_test
+;
+@llvm_mips_sat_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sat_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sat_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sat_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.sat.s.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sat.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_h_test:
+; CHECK: ld.h
+; CHECK: sat_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_sat_s_h_test
+;
+@llvm_mips_sat_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sat_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sat_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sat_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.sat.s.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sat.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_w_test:
+; CHECK: ld.w
+; CHECK: sat_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_sat_s_w_test
+;
+@llvm_mips_sat_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sat_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sat_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sat_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.sat.s.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sat.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sat_s_d_test:
+; CHECK: ld.d
+; CHECK: sat_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_sat_s_d_test
+;
+@llvm_mips_sat_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sat_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sat_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sat_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.sat.u.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sat.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_b_test:
+; CHECK: ld.b
+; CHECK: sat_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_sat_u_b_test
+;
+@llvm_mips_sat_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sat_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sat_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sat_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.sat.u.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sat.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_h_test:
+; CHECK: ld.h
+; CHECK: sat_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_sat_u_h_test
+;
+@llvm_mips_sat_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sat_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sat_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sat_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.sat.u.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sat.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_w_test:
+; CHECK: ld.w
+; CHECK: sat_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_sat_u_w_test
+;
+@llvm_mips_sat_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sat_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sat_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sat_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.sat.u.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sat.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sat_u_d_test:
+; CHECK: ld.d
+; CHECK: sat_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_sat_u_d_test
+;
+@llvm_mips_slli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_slli_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_slli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_slli_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.slli.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_slli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.slli.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_slli_b_test:
+; CHECK: ld.b
+; CHECK: slli.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_slli_b_test
+;
+@llvm_mips_slli_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_slli_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_slli_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_slli_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.slli.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_slli_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.slli.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_slli_h_test:
+; CHECK: ld.h
+; CHECK: slli.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_slli_h_test
+;
+@llvm_mips_slli_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_slli_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_slli_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_slli_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_slli_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_slli_w_test:
+; CHECK: ld.w
+; CHECK: slli.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_slli_w_test
+;
+@llvm_mips_slli_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_slli_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_slli_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_slli_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_slli_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_slli_d_test:
+; CHECK: ld.d
+; CHECK: slli.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_slli_d_test
+;
+@llvm_mips_srai_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srai_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srai_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srai_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srai.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srai_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srai.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srai_b_test:
+; CHECK: ld.b
+; CHECK: srai.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srai_b_test
+;
+@llvm_mips_srai_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srai_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srai_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srai_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srai.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srai_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srai.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srai_h_test:
+; CHECK: ld.h
+; CHECK: srai.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srai_h_test
+;
+@llvm_mips_srai_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srai_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srai_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srai_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srai.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srai_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srai.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srai_w_test:
+; CHECK: ld.w
+; CHECK: srai.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srai_w_test
+;
+@llvm_mips_srai_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srai_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srai_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srai_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srai.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srai_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srai.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srai_d_test:
+; CHECK: ld.d
+; CHECK: srai.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srai_d_test
+;
+@llvm_mips_srari_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srari_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srari_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srari_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srari.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srari_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srari.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srari_b_test:
+; CHECK: ld.b
+; CHECK: srari.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srari_b_test
+;
+@llvm_mips_srari_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srari_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srari_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srari_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srari.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srari_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srari.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srari_h_test:
+; CHECK: ld.h
+; CHECK: srari.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srari_h_test
+;
+@llvm_mips_srari_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srari_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srari_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srari_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srari.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srari_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srari.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srari_w_test:
+; CHECK: ld.w
+; CHECK: srari.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srari_w_test
+;
+@llvm_mips_srari_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srari_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srari_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srari_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srari.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srari_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srari.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srari_d_test:
+; CHECK: ld.d
+; CHECK: srari.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srari_d_test
+;
+@llvm_mips_srli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srli_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srli_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srli.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srli.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srli_b_test:
+; CHECK: ld.b
+; CHECK: srli.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srli_b_test
+;
+@llvm_mips_srli_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srli_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srli_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srli_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srli.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srli_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srli.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srli_h_test:
+; CHECK: ld.h
+; CHECK: srli.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srli_h_test
+;
+@llvm_mips_srli_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srli_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srli_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srli_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srli_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srli_w_test:
+; CHECK: ld.w
+; CHECK: srli.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srli_w_test
+;
+@llvm_mips_srli_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srli_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srli_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srli_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srli_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srli_d_test:
+; CHECK: ld.d
+; CHECK: srli.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srli_d_test
+;
+@llvm_mips_srlri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_srlri_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_srlri_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_srlri_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.srlri.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_srlri_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.srlri.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_b_test:
+; CHECK: ld.b
+; CHECK: srlri.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_srlri_b_test
+;
+@llvm_mips_srlri_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_srlri_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_srlri_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_srlri_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.srlri.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_srlri_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.srlri.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_h_test:
+; CHECK: ld.h
+; CHECK: srlri.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_srlri_h_test
+;
+@llvm_mips_srlri_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_srlri_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_srlri_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_srlri_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.srlri.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_srlri_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.srlri.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_w_test:
+; CHECK: ld.w
+; CHECK: srlri.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_srlri_w_test
+;
+@llvm_mips_srlri_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_srlri_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_srlri_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_srlri_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.srlri.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_srlri_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.srlri.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_srlri_d_test:
+; CHECK: ld.d
+; CHECK: srlri.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_srlri_d_test
+;
diff --git a/test/CodeGen/Mips/msa/bitcast.ll b/test/CodeGen/Mips/msa/bitcast.ll
new file mode 100644
index 000000000000..8e880ecd9afb
--- /dev/null
+++ b/test/CodeGen/Mips/msa/bitcast.ll
@@ -0,0 +1,1210 @@
+; Test the bitcast operation for big-endian and little-endian.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=BIGENDIAN %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=LITENDIAN %s
+
+define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v16i8:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v16i8_to_v16i8
+
+; BIGENDIAN: v16i8_to_v16i8:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.b [[R3]],
+; BIGENDIAN: .size v16i8_to_v16i8
+
+define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v8i16:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v16i8_to_v8i16
+
+; BIGENDIAN: v16i8_to_v8i16:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v16i8_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v8f16:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.b [[R2]],
+; LITENDIAN: .size v16i8_to_v8f16
+
+; BIGENDIAN: v16i8_to_v8f16:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.b [[R2]],
+; BIGENDIAN: .size v16i8_to_v8f16
+
+define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v4i32:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v16i8_to_v4i32
+
+; BIGENDIAN: v16i8_to_v4i32:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v16i8_to_v4i32
+
+define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v4f32:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v16i8_to_v4f32
+
+; BIGENDIAN: v16i8_to_v4f32:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v16i8_to_v4f32
+
+define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v2i64:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v16i8_to_v2i64
+
+; BIGENDIAN: v16i8_to_v2i64:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v16i8_to_v2i64
+
+define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <16 x i8>* %src
+  %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
+  %2 = bitcast <16 x i8> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v16i8_to_v2f64:
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v16i8_to_v2f64
+
+; BIGENDIAN: v16i8_to_v2f64:
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v16i8_to_v2f64
+
+define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v16i8:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v8i16_to_v16i8
+
+; BIGENDIAN: v8i16_to_v16i8:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v8i16_to_v16i8
+
+define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v8i16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v8i16_to_v8i16
+
+; BIGENDIAN: v8i16_to_v8i16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.h [[R3]],
+; BIGENDIAN: .size v8i16_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v8f16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.h [[R2]],
+; LITENDIAN: .size v8i16_to_v8f16
+
+; BIGENDIAN: v8i16_to_v8f16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.h [[R2]],
+; BIGENDIAN: .size v8i16_to_v8f16
+
+define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v4i32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v8i16_to_v4i32
+
+; BIGENDIAN: v8i16_to_v4i32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v8i16_to_v4i32
+
+define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v4f32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v8i16_to_v4f32
+
+; BIGENDIAN: v8i16_to_v4f32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v8i16_to_v4f32
+
+define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v2i64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v8i16_to_v2i64
+
+; BIGENDIAN: v8i16_to_v2i64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v8i16_to_v2i64
+
+define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x i16>* %src
+  %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
+  %2 = bitcast <8 x i16> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v8i16_to_v2f64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v8i16_to_v2f64
+
+; BIGENDIAN: v8i16_to_v2f64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v8i16_to_v2f64
+
+;----
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <16 x i8>
+  %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v16i8:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v8f16_to_v16i8
+
+; BIGENDIAN: v8f16_to_v16i8:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v8f16_to_v16i8
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <8 x i16>
+  %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v8i16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.h [[R2]],
+; LITENDIAN: .size v8f16_to_v8i16
+
+; BIGENDIAN: v8f16_to_v8i16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.h [[R2]],
+; BIGENDIAN: .size v8f16_to_v8i16
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <8 x half>
+  store <8 x half> %1, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v8f16:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: st.h [[R1]],
+; LITENDIAN: .size v8f16_to_v8f16
+
+; BIGENDIAN: v8f16_to_v8f16:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: st.h [[R1]],
+; BIGENDIAN: .size v8f16_to_v8f16
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <4 x i32>
+  %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v4i32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v8f16_to_v4i32
+
+; BIGENDIAN: v8f16_to_v4i32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v8f16_to_v4i32
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <4 x float>
+  %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v4f32:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v8f16_to_v4f32
+
+; BIGENDIAN: v8f16_to_v4f32:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v8f16_to_v4f32
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <2 x i64>
+  %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v2i64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v8f16_to_v2i64
+
+; BIGENDIAN: v8f16_to_v2i64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v8f16_to_v2i64
+
+; We can't prevent the (bitcast (load X)) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <8 x half>* %src
+  %1 = bitcast <8 x half> %0 to <2 x double>
+  %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v8f16_to_v2f64:
+; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v8f16_to_v2f64
+
+; BIGENDIAN: v8f16_to_v2f64:
+; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
+; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v8f16_to_v2f64
+;----
+
+define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v16i8:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v4i32_to_v16i8
+
+; BIGENDIAN: v4i32_to_v16i8:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v4i32_to_v16i8
+
+define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v8i16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v4i32_to_v8i16
+
+; BIGENDIAN: v4i32_to_v8i16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v4i32_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v8f16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v4i32_to_v8f16
+
+; BIGENDIAN: v4i32_to_v8f16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.w [[R2]],
+; BIGENDIAN: .size v4i32_to_v8f16
+
+define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v4i32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4i32_to_v4i32
+
+; BIGENDIAN: v4i32_to_v4i32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4i32_to_v4i32
+
+define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v4f32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4i32_to_v4f32
+
+; BIGENDIAN: v4i32_to_v4f32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4i32_to_v4f32
+
+define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v2i64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4i32_to_v2i64
+
+; BIGENDIAN: v4i32_to_v2i64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4i32_to_v2i64
+
+define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x i32>* %src
+  %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
+  %2 = bitcast <4 x i32> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v4i32_to_v2f64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4i32_to_v2f64
+
+; BIGENDIAN: v4i32_to_v2f64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4i32_to_v2f64
+
+define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v16i8:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v4f32_to_v16i8
+
+; BIGENDIAN: v4f32_to_v16i8:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v4f32_to_v16i8
+
+define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v8i16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v4f32_to_v8i16
+
+; BIGENDIAN: v4f32_to_v8i16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v4f32_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v8f16:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.w [[R2]],
+; LITENDIAN: .size v4f32_to_v8f16
+
+; BIGENDIAN: v4f32_to_v8f16:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.w [[R2]],
+; BIGENDIAN: .size v4f32_to_v8f16
+
+define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v4i32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4f32_to_v4i32
+
+; BIGENDIAN: v4f32_to_v4i32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4f32_to_v4i32
+
+define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v4f32:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v4f32_to_v4f32
+
+; BIGENDIAN: v4f32_to_v4f32:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.w [[R3]],
+; BIGENDIAN: .size v4f32_to_v4f32
+
+define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v2i64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4f32_to_v2i64
+
+; BIGENDIAN: v4f32_to_v2i64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4f32_to_v2i64
+
+define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <4 x float>* %src
+  %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
+  %2 = bitcast <4 x float> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v4f32_to_v2f64:
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v4f32_to_v2f64
+
+; BIGENDIAN: v4f32_to_v2f64:
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.d [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.d [[R4]],
+; BIGENDIAN: .size v4f32_to_v2f64
+
+define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v16i8:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v2i64_to_v16i8
+
+; BIGENDIAN: v2i64_to_v16i8:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v2i64_to_v16i8
+
+define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v8i16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v2i64_to_v8i16
+
+; BIGENDIAN: v2i64_to_v8i16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v2i64_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v8f16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v2i64_to_v8f16
+
+; BIGENDIAN: v2i64_to_v8f16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.d [[R2]],
+; BIGENDIAN: .size v2i64_to_v8f16
+
+define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v4i32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2i64_to_v4i32
+
+; BIGENDIAN: v2i64_to_v4i32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2i64_to_v4i32
+
+define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v4f32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2i64_to_v4f32
+
+; BIGENDIAN: v2i64_to_v4f32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2i64_to_v4f32
+
+define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v2i64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2i64_to_v2i64
+
+; BIGENDIAN: v2i64_to_v2i64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2i64_to_v2i64
+
+define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x i64>* %src
+  %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
+  %2 = bitcast <2 x i64> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v2i64_to_v2f64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2i64_to_v2f64
+
+; BIGENDIAN: v2i64_to_v2f64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2i64_to_v2f64
+
+define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <16 x i8>
+  %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
+  store <16 x i8> %3, <16 x i8>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v16i8:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.b [[R3]],
+; LITENDIAN: .size v2f64_to_v16i8
+
+; BIGENDIAN: v2f64_to_v16i8:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R3]], 177
+; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.b [[R4]],
+; BIGENDIAN: .size v2f64_to_v16i8
+
+define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
+  store <8 x i16> %3, <8 x i16>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v8i16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.h [[R3]],
+; LITENDIAN: .size v2f64_to_v8i16
+
+; BIGENDIAN: v2f64_to_v8i16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.h [[R3:\$w[0-9]+]], [[R2]], 27
+; BIGENDIAN: addv.h [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.h [[R4]],
+; BIGENDIAN: .size v2f64_to_v8i16
+
+; We can't prevent the (store (bitcast X), Y) DAG Combine here because there
+; are no operations for v8f16 to put in the way.
+define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <8 x half>
+  store <8 x half> %2, <8 x half>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v8f16:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: st.d [[R2]],
+; LITENDIAN: .size v2f64_to_v8f16
+
+; BIGENDIAN: v2f64_to_v8f16:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: st.d [[R2]],
+; BIGENDIAN: .size v2f64_to_v8f16
+
+define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
+  store <4 x i32> %3, <4 x i32>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v4i32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2f64_to_v4i32
+
+; BIGENDIAN: v2f64_to_v4i32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: addv.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2f64_to_v4i32
+
+define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <4 x float>
+  %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
+  store <4 x float> %3, <4 x float>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v4f32:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.w [[R3]],
+; LITENDIAN: .size v2f64_to_v4f32
+
+; BIGENDIAN: v2f64_to_v4f32:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: shf.w [[R3:\$w[0-9]+]], [[R2]], 177
+; BIGENDIAN: fadd.w [[R4:\$w[0-9]+]], [[R3]], [[R3]]
+; BIGENDIAN: st.w [[R4]],
+; BIGENDIAN: .size v2f64_to_v4f32
+
+define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
+  store <2 x i64> %3, <2 x i64>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v2i64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2f64_to_v2i64
+
+; BIGENDIAN: v2f64_to_v2i64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2f64_to_v2i64
+
+define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
+entry:
+  %0 = load volatile <2 x double>* %src
+  %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
+  %2 = bitcast <2 x double> %1 to <2 x double>
+  %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
+  store <2 x double> %3, <2 x double>* %dst
+  ret void
+}
+
+; LITENDIAN: v2f64_to_v2f64:
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
+; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; LITENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; LITENDIAN: st.d [[R3]],
+; LITENDIAN: .size v2f64_to_v2f64
+
+; BIGENDIAN: v2f64_to_v2f64:
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
+; BIGENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+; BIGENDIAN: st.d [[R3]],
+; BIGENDIAN: .size v2f64_to_v2f64
+
+declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
+declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
+declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
+declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
+declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
+declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll
new file mode 100644
index 000000000000..9a88c47b7e1f
--- /dev/null
+++ b/test/CodeGen/Mips/msa/bitwise.ll
@@ -0,0 +1,1639 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: and_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <16 x i8> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v16i8
+}
+
+define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: and_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v8i16
+}
+
+define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: and_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v4i32
+}
+
+define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: and_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, %2
+  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v2i64
+}
+
+define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: and_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v16i8_i
+}
+
+define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: and_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v8i16_i
+}
+
+define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: and_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v4i32_i
+}
+
+define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: and_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = and <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size and_v2i64_i
+}
+
+define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: or_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <16 x i8> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v16i8
+}
+
+define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: or_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <8 x i16> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v8i16
+}
+
+define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: or_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <4 x i32> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v4i32
+}
+
+define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: or_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <2 x i64> %1, %2
+  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v2i64
+}
+
+define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: or_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v16i8_i
+}
+
+define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: or_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v8i16_i
+}
+
+define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: or_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v4i32_i
+}
+
+define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: or_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <2 x i64> %1, <i64 3, i64 3>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size or_v2i64_i
+}
+
+define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: nor_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <16 x i8> %1, %2
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v16i8
+}
+
+define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: nor_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <8 x i16> %1, %2
+  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v8i16
+}
+
+define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: nor_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <4 x i32> %1, %2
+  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v4i32
+}
+
+define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: nor_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = or <2 x i64> %1, %2
+  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
+  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v2i64
+}
+
+define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: nor_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v16i8_i
+}
+
+define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: nor_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v8i16_i
+}
+
+define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: nor_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v4i32_i
+}
+
+define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: nor_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <2 x i64> %1, <i64 1, i64 1>
+  %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size nor_v2i64_i
+}
+
+define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: xor_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <16 x i8> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v16i8
+}
+
+define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: xor_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <8 x i16> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v8i16
+}
+
+define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: xor_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <4 x i32> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v4i32
+}
+
+define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: xor_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = xor <2 x i64> %1, %2
+  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v2i64
+}
+
+define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: xor_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v16i8_i
+}
+
+define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: xor_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v8i16_i
+}
+
+define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: xor_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v4i32_i
+}
+
+define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: xor_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <2 x i64> %1, <i64 3, i64 3>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
+  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size xor_v2i64_i
+}
+
+define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: sll_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> %1, %2
+  ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v16i8
+}
+
+define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: sll_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> %1, %2
+  ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v8i16
+}
+
+define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: sll_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> %1, %2
+  ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v4i32
+}
+
+define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: sll_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> %1, %2
+  ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v2i64
+}
+
+define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: sll_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v16i8_i
+}
+
+define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: sll_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v8i16_i
+}
+
+define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: sll_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v4i32_i
+}
+
+define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: sll_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shl <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sll_v2i64_i
+}
+
+define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: sra_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <16 x i8> %1, %2
+  ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v16i8
+}
+
+define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: sra_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <8 x i16> %1, %2
+  ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v8i16
+}
+
+define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: sra_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <4 x i32> %1, %2
+  ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v4i32
+}
+
+define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: sra_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = ashr <2 x i64> %1, %2
+  ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v2i64
+}
+
+define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: sra_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v16i8_i
+}
+
+define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: sra_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v8i16_i
+}
+
+define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: sra_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v4i32_i
+}
+
+define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: sra_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = ashr <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size sra_v2i64_i
+}
+
+define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: srl_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <16 x i8> %1, %2
+  ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v16i8
+}
+
+define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: srl_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <8 x i16> %1, %2
+  ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v8i16
+}
+
+define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: srl_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <4 x i32> %1, %2
+  ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v4i32
+}
+
+define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: srl_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = lshr <2 x i64> %1, %2
+  ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v2i64
+}
+
+define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: srl_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v16i8_i
+}
+
+define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: srl_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v8i16_i
+}
+
+define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: srl_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v4i32_i
+}
+
+define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: srl_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = lshr <2 x i64> %1, <i64 1, i64 1>
+  ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size srl_v2i64_i
+}
+
+define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: ctpop_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
+  ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v16i8
+}
+
+define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: ctpop_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
+  ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v8i16
+}
+
+define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ctpop_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
+  ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v4i32
+}
+
+define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ctpop_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
+  ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctpop_v2i64
+}
+
+define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: ctlz_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
+  ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v16i8
+}
+
+define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: ctlz_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
+  ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v8i16
+}
+
+define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ctlz_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
+  ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v4i32
+}
+
+define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ctlz_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
+  ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ctlz_v2i64
+}
+
+define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
+  ; CHECK: bsel_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %m
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1>
+  %5 = and <16 x i8> %1, %3
+  %6 = and <16 x i8> %2, %4
+  %7 = or <16 x i8> %5, %6
+  ; bmnz is the same operation
+  ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]]
+  store <16 x i8> %7, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R1]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v16i8
+}
+
+define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
+  ; CHECK: bsel_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %m
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
+  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1,
+                          i8 -1, i8 -1, i8 -1, i8 -1>
+  %4 = and <16 x i8> %1, %3
+  %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
+                      i8 6, i8 6, i8 6, i8 6,
+                      i8 6, i8 6, i8 6, i8 6,
+                      i8 6, i8 6, i8 6, i8 6>, %2
+  %6 = or <16 x i8> %4, %5
+  ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6
+  store <16 x i8> %6, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v16i8_i
+}
+
+define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bsel_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
+                          i16 6, i16 6, i16 6, i16 6>
+  %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
+                          i16 65529, i16 65529, i16 65529, i16 65529>
+  %5 = or <8 x i16> %3, %4
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6
+  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v8i16
+}
+
+define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bsel_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
+  %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
+  %5 = or <4 x i32> %3, %4
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6
+  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v4i32
+}
+
+define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bsel_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, <i64 6, i64 6>
+  %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
+  %5 = or <2 x i64> %3, %4
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6
+  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v2i64
+}
+
+define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: binsl_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
+                          i8 192, i8 192, i8 192, i8 192,
+                          i8 192, i8 192, i8 192, i8 192,
+                          i8 192, i8 192, i8 192, i8 192>
+  %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63,
+                          i8 63, i8 63, i8 63, i8 63,
+                          i8 63, i8 63, i8 63, i8 63,
+                          i8 63, i8 63, i8 63, i8 63>
+  %5 = or <16 x i8> %3, %4
+  ; CHECK-DAG: binsli.b [[R2]], [[R1]], 2
+  store <16 x i8> %5, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v16i8_i
+}
+
+define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: binsl_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
+                          i16 49152, i16 49152, i16 49152, i16 49152>
+  %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383,
+                          i16 16383, i16 16383, i16 16383, i16 16383>
+  %5 = or <8 x i16> %3, %4
+  ; CHECK-DAG: binsli.h [[R2]], [[R1]], 2
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v8i16_i
+}
+
+define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: binsl_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
+  %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+  %5 = or <4 x i32> %3, %4
+  ; CHECK-DAG: binsli.w [[R2]], [[R1]], 2
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v4i32_i
+}
+
+define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: binsl_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
+  %4 = and <2 x i64> %2, <i64 7, i64 7>
+  %5 = or <2 x i64> %3, %4
+  ; TODO: We use a particularly wide mask here to work around a legalization
+  ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
+  ;       legalized into a constant pool. We should add a test to cover the
+  ;       other cases once they correctly select binsli.d.
+  ; CHECK-DAG: binsli.d [[R2]], [[R1]], 61
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsl_v2i64_i
+}
+
+define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: binsr_v16i8_i:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
+                          i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
+                          i8 252, i8 252, i8 252, i8 252,
+                          i8 252, i8 252, i8 252, i8 252,
+                          i8 252, i8 252, i8 252, i8 252>
+  %5 = or <16 x i8> %3, %4
+  ; CHECK-DAG: binsri.b [[R2]], [[R1]], 2
+  store <16 x i8> %5, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v16i8_i
+}
+
+define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: binsr_v8i16_i:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
+                          i16 3, i16 3, i16 3, i16 3>
+  %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
+                          i16 65532, i16 65532, i16 65532, i16 65532>
+  %5 = or <8 x i16> %3, %4
+  ; CHECK-DAG: binsri.h [[R2]], [[R1]], 2
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v8i16_i
+}
+
+define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: binsr_v4i32_i:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
+  %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
+  %5 = or <4 x i32> %3, %4
+  ; CHECK-DAG: binsri.w [[R2]], [[R1]], 2
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v4i32_i
+}
+
+define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: binsr_v2i64_i:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = and <2 x i64> %1, <i64 3, i64 3>
+  %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
+  %5 = or <2 x i64> %3, %4
+  ; CHECK-DAG: binsri.d [[R2]], [[R1]], 2
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R2]], 0($4)
+
+  ret void
+  ; CHECK: .size binsr_v2i64_i
+}
+
+define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: bclr_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %5 = and <16 x i8> %1, %4
+  ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %5, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v16i8
+}
+
+define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bclr_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
+  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %5 = and <8 x i16> %1, %4
+  ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v8i16
+}
+
+define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bclr_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
+  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %5 = and <4 x i32> %1, %4
+  ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v4i32
+}
+
+define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bclr_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> <i64 1, i64 1>, %2
+  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
+  %5 = and <2 x i64> %1, %4
+  ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v2i64
+}
+
+define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: bset_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
+  %4 = or <16 x i8> %1, %3
+  ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v16i8
+}
+
+define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bset_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
+  %4 = or <8 x i16> %1, %3
+  ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v8i16
+}
+
+define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bset_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
+  %4 = or <4 x i32> %1, %3
+  ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v4i32
+}
+
+define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bset_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> <i64 1, i64 1>, %2
+  %4 = or <2 x i64> %1, %3
+  ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bset_v2i64
+}
+
+define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: bneg_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
+  %4 = xor <16 x i8> %1, %3
+  ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v16i8
+}
+
+define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bneg_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
+  %4 = xor <8 x i16> %1, %3
+  ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v8i16
+}
+
+define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bneg_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
+  %4 = xor <4 x i32> %1, %3
+  ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v4i32
+}
+
+define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bneg_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> <i64 1, i64 1>, %2
+  %4 = xor <2 x i64> %1, %3
+  ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bneg_v2i64
+}
+
+define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: bclri_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <16 x i8> <i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8>,
+                     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %3 = and <16 x i8> %1, %2
+  ; bclri.b and andi.b are exactly equivalent.
+  ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v16i8
+}
+
+define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: bclri_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <8 x i16> <i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8>,
+                     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %3 = and <8 x i16> %1, %2
+  ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v8i16
+}
+
+define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: bclri_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <4 x i32> <i32  8, i32  8, i32  8, i32  8>,
+                     <i32 -1, i32 -1, i32 -1, i32 -1>
+  %3 = and <4 x i32> %1, %2
+  ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v4i32
+}
+
+define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: bclri_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <2 x i64> <i64  8, i64  8>,
+                     <i64 -1, i64 -1>
+  %3 = and <2 x i64> %1, %2
+  ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v2i64
+}
+
+define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: bseti_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v16i8
+}
+
+define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: bseti_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v8i16
+}
+
+define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: bseti_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v4i32
+}
+
+define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: bseti_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = or <2 x i64> %1, <i64 8, i64 8>
+  ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bseti_v2i64
+}
+
+define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: bnegi_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+  ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v16i8
+}
+
+define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: bnegi_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v8i16
+}
+
+define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: bnegi_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
+  ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v4i32
+}
+
+define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: bnegi_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <2 x i64> %1, <i64 8, i64 8>
+  ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bnegi_v2i64
+}
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)
diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll
new file mode 100644
index 000000000000..6408d7ba09f4
--- /dev/null
+++ b/test/CodeGen/Mips/msa/compare.ll
@@ -0,0 +1,2079 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @ceq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ceq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v16i8
+}
+
+define void @ceq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ceq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v8i16
+}
+
+define void @ceq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ceq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v4i32
+}
+
+define void @ceq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ceq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp eq <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceq_v2i64
+}
+
+define void @cle_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: cle_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: cle_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v16i8
+}
+
+define void @cle_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: cle_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: cle_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v8i16
+}
+
+define void @cle_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: cle_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: cle_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v4i32
+}
+
+define void @cle_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: cle_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: cle_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_s_v2i64
+}
+
+define void @cle_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: cle_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: cle_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v16i8
+}
+
+define void @cle_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: cle_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: cle_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v8i16
+}
+
+define void @cle_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: cle_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: cle_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v4i32
+}
+
+define void @cle_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: cle_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: cle_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cle_u_v2i64
+}
+
+define void @clt_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: clt_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: clt_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v16i8
+}
+
+define void @clt_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: clt_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: clt_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v8i16
+}
+
+define void @clt_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: clt_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: clt_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v4i32
+}
+
+define void @clt_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: clt_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: clt_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_s_v2i64
+}
+
+define void @clt_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: clt_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: clt_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v16i8
+}
+
+define void @clt_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: clt_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: clt_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v8i16
+}
+
+define void @clt_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: clt_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: clt_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v4i32
+}
+
+define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: clt_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: clt_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clt_u_v2i64
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: cne_v16i8:
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: xori.b [[R3]], [[R3]], 255
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v16i8
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: cne_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v8i16
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: cne_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v4i32
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: cne_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v2i64
+}
+
+define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: ceqi_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: ceqi.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v16i8
+}
+
+define void @ceqi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: ceqi_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: ceqi.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v8i16
+}
+
+define void @ceqi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: ceqi_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: ceqi.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v4i32
+}
+
+define void @ceqi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: ceqi_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: ceqi.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ceqi_v2i64
+}
+
+define void @clei_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clei_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clei_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v16i8
+}
+
+define void @clei_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clei_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clei_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v8i16
+}
+
+define void @clei_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clei_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clei_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v4i32
+}
+
+define void @clei_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clei_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clei_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_s_v2i64
+}
+
+define void @clei_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clei_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clei_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v16i8
+}
+
+define void @clei_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clei_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clei_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v8i16
+}
+
+define void @clei_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clei_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clei_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v4i32
+}
+
+define void @clei_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clei_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clei_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clei_u_v2i64
+}
+
+define void @clti_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clti_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clti_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v16i8
+}
+
+define void @clti_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clti_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clti_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v8i16
+}
+
+define void @clti_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clti_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clti_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v4i32
+}
+
+define void @clti_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clti_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clti_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_s_v2i64
+}
+
+define void @clti_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: clti_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = sext <16 x i1> %2 to <16 x i8>
+  ; CHECK-DAG: clti_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v16i8
+}
+
+define void @clti_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: clti_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = sext <8 x i1> %2 to <8 x i16>
+  ; CHECK-DAG: clti_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v8i16
+}
+
+define void @clti_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: clti_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = sext <4 x i1> %2 to <4 x i32>
+  ; CHECK-DAG: clti_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v4i32
+}
+
+define void @clti_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: clti_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
+  %3 = sext <2 x i1> %2 to <2 x i64>
+  ; CHECK-DAG: clti_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size clti_u_v2i64
+}
+
+define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bsel_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
+  ; bmnz.v is the same operation
+  ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v16i8
+}
+
+define void @bsel_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bsel_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v8i16
+}
+
+define void @bsel_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bsel_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v4i32
+}
+
+define void @bsel_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bsel_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp sgt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_s_v2i64
+}
+
+define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bsel_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <16 x i8>* %c
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
+  ; bmnz.v is the same operation
+  ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
+  store <16 x i8> %5, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v16i8
+}
+
+define void @bsel_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bsel_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <8 x i16>* %c
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %5, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v8i16
+}
+
+define void @bsel_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bsel_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x i32>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %5, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v4i32
+}
+
+define void @bsel_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bsel_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x i64>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = icmp ugt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %5, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_u_v2i64
+}
+
+define void @bseli_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bseli_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
+  store <16 x i8> %4, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v16i8
+}
+
+define void @bseli_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bseli_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %4, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v8i16
+}
+
+define void @bseli_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bseli_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %4, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v4i32
+}
+
+define void @bseli_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bseli_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %4, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_s_v2i64
+}
+
+define void @bseli_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
+                        <16 x i8>* %c) nounwind {
+  ; CHECK: bseli_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <16 x i8> %1, %2
+  ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
+  store <16 x i8> %4, <16 x i8>* %d
+  ; CHECK-DAG: st.b [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v16i8
+}
+
+define void @bseli_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
+                        <8 x i16>* %c) nounwind {
+  ; CHECK: bseli_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <8 x i16> %1, %2
+  ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <8 x i16> %4, <8 x i16>* %d
+  ; CHECK-DAG: st.h [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v8i16
+}
+
+define void @bseli_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
+                        <4 x i32>* %c) nounwind {
+  ; CHECK: bseli_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <4 x i32> %1, %2
+  ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x i32> %4, <4 x i32>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v4i32
+}
+
+define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
+                        <2 x i64>* %c) nounwind {
+  ; CHECK: bseli_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <2 x i64> %1, %2
+  ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x i64> %4, <2 x i64>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_u_v2i64
+}
+
+define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v16i8
+}
+
+define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v8i16
+}
+
+define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v4i32
+}
+
+define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sgt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_v2i64
+}
+
+define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v16i8
+}
+
+define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v8i16
+}
+
+define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v4i32
+}
+
+define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ugt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_v2i64
+}
+
+define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v16i8
+}
+
+define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v8i16
+}
+
+define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v4i32
+}
+
+define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sge <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_s_eq_v2i64
+}
+
+define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: max_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v16i8
+}
+
+define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: max_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v8i16
+}
+
+define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: max_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v4i32
+}
+
+define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: max_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp uge <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_u_eq_v2i64
+}
+
+define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v16i8
+}
+
+define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v8i16
+}
+
+define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v4i32
+}
+
+define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sgt <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_v2i64
+}
+
+define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v16i8
+}
+
+define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v8i16
+}
+
+define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v4i32
+}
+
+define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_v2i64
+}
+
+define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v16i8
+}
+
+define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v8i16
+}
+
+define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v4i32
+}
+
+define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sge <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_s_eq_v2i64
+}
+
+define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v16i8
+}
+
+define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v8i16
+}
+
+define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v4i32
+}
+
+define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: maxi_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp uge <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size maxi_u_eq_v2i64
+}
+
+define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v16i8
+}
+
+define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v8i16
+}
+
+define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v4i32
+}
+
+define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp slt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_v2i64
+}
+
+define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v16i8
+}
+
+define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v8i16
+}
+
+define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v4i32
+}
+
+define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ult <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_v2i64
+}
+
+define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v16i8
+}
+
+define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v8i16
+}
+
+define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v4i32
+}
+
+define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp sle <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_s_eq_v2i64
+}
+
+define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: min_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v16i8
+}
+
+define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: min_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v8i16
+}
+
+define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: min_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v4i32
+}
+
+define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: min_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ule <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_u_eq_v2i64
+}
+
+define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_s_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v16i8
+}
+
+define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_s_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v8i16
+}
+
+define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_s_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v4i32
+}
+
+define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_s_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_v2i64
+}
+
+define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_u_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v16i8
+}
+
+define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_u_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v8i16
+}
+
+define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_u_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v4i32
+}
+
+define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_u_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_v2i64
+}
+
+define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_s_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v16i8
+}
+
+define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_s_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v8i16
+}
+
+define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_s_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v4i32
+}
+
+define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_s_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_s_eq_v2i64
+}
+
+define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: mini_u_eq_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v16i8
+}
+
+define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: mini_u_eq_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v8i16
+}
+
+define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: mini_u_eq_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v4i32
+}
+
+define void @mini_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: mini_u_eq_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
+  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
+  ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size mini_u_eq_v2i64
+}
diff --git a/test/CodeGen/Mips/msa/compare_float.ll b/test/CodeGen/Mips/msa/compare_float.ll
new file mode 100644
index 000000000000..2fc61f89c7fa
--- /dev/null
+++ b/test/CodeGen/Mips/msa/compare_float.ll
@@ -0,0 +1,663 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+declare <4 x float> @llvm.mips.fmax.w(<4 x float>, <4 x float>) nounwind
+declare <2 x double> @llvm.mips.fmax.d(<2 x double>, <2 x double>) nounwind
+declare <4 x float> @llvm.mips.fmin.w(<4 x float>, <4 x float>) nounwind
+declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind
+
+define void @false_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: false_v4f32:
+
+  %1 = load <4 x float>* %a
+  %2 = load <4 x float>* %b
+  %3 = fcmp false <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  store <4 x i32> %4, <4 x i32>* %c
+  ret void
+
+  ; (setcc $a, $b, SETFALSE) is always folded, so we won't get fcaf:
+  ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+  ; CHECK: .size false_v4f32
+}
+
+define void @false_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: false_v2f64:
+
+  %1 = load <2 x double>* %a
+  %2 = load <2 x double>* %b
+  %3 = fcmp false <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  store <2 x i64> %4, <2 x i64>* %c
+  ret void
+
+  ; FIXME: This code is correct, but poor. Ideally it would be similar to
+  ;        the code in @false_v4f32
+  ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
+  ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
+  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+  ; CHECK: .size false_v2f64
+}
+
+define void @oeq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: oeq_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oeq <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oeq_v4f32
+}
+
+define void @oeq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: oeq_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oeq <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oeq_v2f64
+}
+
+define void @oge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: oge_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oge <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oge_v4f32
+}
+
+define void @oge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: oge_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp oge <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size oge_v2f64
+}
+
+define void @ogt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ogt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ogt_v4f32
+}
+
+define void @ogt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ogt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ogt_v2f64
+}
+
+define void @ole_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ole_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ole <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ole_v4f32
+}
+
+define void @ole_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ole_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ole <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ole_v2f64
+}
+
+define void @olt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: olt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp olt <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size olt_v4f32
+}
+
+define void @olt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: olt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp olt <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size olt_v2f64
+}
+
+define void @one_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: one_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp one <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcne.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size one_v4f32
+}
+
+define void @one_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: one_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp one <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcne.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size one_v2f64
+}
+
+define void @ord_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ord_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ord <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcor.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ord_v4f32
+}
+
+define void @ord_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ord_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ord <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcor.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ord_v2f64
+}
+
+define void @ueq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ueq_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ueq <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcueq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ueq_v4f32
+}
+
+define void @ueq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ueq_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ueq <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcueq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ueq_v2f64
+}
+
+define void @uge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: uge_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uge <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uge_v4f32
+}
+
+define void @uge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: uge_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uge <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uge_v2f64
+}
+
+define void @ugt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ugt_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ugt <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ugt_v4f32
+}
+
+define void @ugt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ugt_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ugt <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ugt_v2f64
+}
+
+define void @ule_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ule_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ule <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ule_v4f32
+}
+
+define void @ule_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ule_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ule <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ule_v2f64
+}
+
+define void @ult_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: ult_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ult <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ult_v4f32
+}
+
+define void @ult_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: ult_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ult <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ult_v2f64
+}
+
+define void @uno_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: uno_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uno <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: fcun.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uno_v4f32
+}
+
+define void @uno_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: uno_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp uno <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: fcun.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size uno_v2f64
+}
+
+define void @true_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: true_v4f32:
+
+  %1 = load <4 x float>* %a
+  %2 = load <4 x float>* %b
+  %3 = fcmp true <4 x float> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  store <4 x i32> %4, <4 x i32>* %c
+  ret void
+
+  ; (setcc $a, $b, SETTRUE) is always folded, so we won't get fcaf:
+  ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+  ; CHECK: .size true_v4f32
+}
+
+define void @true_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: true_v2f64:
+
+  %1 = load <2 x double>* %a
+  %2 = load <2 x double>* %b
+  %3 = fcmp true <2 x double> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  store <2 x i64> %4, <2 x i64>* %c
+  ret void
+
+  ; FIXME: This code is correct, but poor. Ideally it would be similar to
+  ;        the code in @true_v4f32
+  ; CHECK-DAG: ldi.d [[R1:\$w[0-9]+]], 1
+  ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
+  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+  ; CHECK: .size true_v2f64
+}
+
+define void @bsel_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                          <4 x float>* %c) nounwind {
+  ; CHECK: bsel_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <4 x float>* %c
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
+  %4 = fcmp ogt <4 x float> %1, %2
+  ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <4 x i1> %4, <4 x float> %1, <4 x float> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <4 x float> %5, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v4f32
+}
+
+define void @bsel_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                          <2 x double>* %c) nounwind {
+  ; CHECK: bsel_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = load <2 x double>* %c
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
+  %4 = fcmp ogt <2 x double> %1, %2
+  ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %5 = select <2 x i1> %4, <2 x double> %1, <2 x double> %3
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+  store <2 x double> %5, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bsel_v2f64
+}
+
+define void @bseli_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
+                          <4 x float>* %c) nounwind {
+  ; CHECK: bseli_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <4 x float> %1, %2
+  ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <4 x i1> %3, <4 x float> %1, <4 x float> zeroinitializer
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
+  store <4 x float> %4, <4 x float>* %d
+  ; CHECK-DAG: st.w [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_v4f32
+}
+
+define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
+                          <2 x double>* %c) nounwind {
+  ; CHECK: bseli_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = fcmp ogt <2 x double> %1, %2
+  ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
+  %4 = select <2 x i1> %3, <2 x double> %1, <2 x double> zeroinitializer
+  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]]
+  store <2 x double> %4, <2 x double>* %d
+  ; CHECK-DAG: st.d [[R4]], 0($4)
+
+  ret void
+  ; CHECK: .size bseli_v2f64
+}
+
+define void @max_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: max_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %1, <4 x float> %2)
+  ; CHECK-DAG: fmax.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_v4f32
+}
+
+define void @max_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: max_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %1, <2 x double> %2)
+  ; CHECK-DAG: fmax.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size max_v2f64
+}
+
+define void @min_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
+  ; CHECK: min_v4f32:
+
+  %1 = load <4 x float>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x float>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %1, <4 x float> %2)
+  ; CHECK-DAG: fmin.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x float> %3, <4 x float>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_v4f32
+}
+
+define void @min_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
+  ; CHECK: min_v2f64:
+
+  %1 = load <2 x double>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x double>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %1, <2 x double> %2)
+  ; CHECK-DAG: fmin.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x double> %3, <2 x double>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size min_v2f64
+}
diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll
new file mode 100644
index 000000000000..ed3e52cbffc2
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_copy.ll
@@ -0,0 +1,162 @@
+; Test the MSA intrinsics that are encoded with the ELM instruction format and
+; are element extraction operations.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_copy_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_copy_s_b_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_copy_s_b_ARG1
+  %1 = tail call i32 @llvm.mips.copy.s.b(<16 x i8> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_s_b_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_b_test:
+; CHECK: ld.b
+; CHECK: copy_s.b
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_b_test
+;
+@llvm_mips_copy_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_copy_s_h_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_copy_s_h_ARG1
+  %1 = tail call i32 @llvm.mips.copy.s.h(<8 x i16> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_s_h_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_h_test:
+; CHECK: ld.h
+; CHECK: copy_s.h
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_h_test
+;
+@llvm_mips_copy_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_copy_s_w_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_copy_s_w_ARG1
+  %1 = tail call i32 @llvm.mips.copy.s.w(<4 x i32> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_s_w_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_w_test:
+; CHECK: ld.w
+; CHECK: copy_s.w
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_w_test
+;
+@llvm_mips_copy_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_copy_s_d_RES  = global i64 0, align 16
+
+define void @llvm_mips_copy_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_copy_s_d_ARG1
+  %1 = tail call i64 @llvm.mips.copy.s.d(<2 x i64> %0, i32 1)
+  store i64 %1, i64* @llvm_mips_copy_s_d_RES
+  ret void
+}
+
+declare i64 @llvm.mips.copy.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_copy_s_d_test:
+; CHECK: ld.w
+; CHECK: copy_s.w
+; CHECK: copy_s.w
+; CHECK: sw
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_s_d_test
+;
+@llvm_mips_copy_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_copy_u_b_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_copy_u_b_ARG1
+  %1 = tail call i32 @llvm.mips.copy.u.b(<16 x i8> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_u_b_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_b_test:
+; CHECK: ld.b
+; CHECK: copy_u.b
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_b_test
+;
+@llvm_mips_copy_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_copy_u_h_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_copy_u_h_ARG1
+  %1 = tail call i32 @llvm.mips.copy.u.h(<8 x i16> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_u_h_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_h_test:
+; CHECK: ld.h
+; CHECK: copy_u.h
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_h_test
+;
+@llvm_mips_copy_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_copy_u_w_RES  = global i32 0, align 16
+
+define void @llvm_mips_copy_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_copy_u_w_ARG1
+  %1 = tail call i32 @llvm.mips.copy.u.w(<4 x i32> %0, i32 1)
+  store i32 %1, i32* @llvm_mips_copy_u_w_RES
+  ret void
+}
+
+declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_w_test:
+; CHECK: ld.w
+; CHECK: copy_u.w
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_w_test
+;
+@llvm_mips_copy_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_copy_u_d_RES  = global i64 0, align 16
+
+define void @llvm_mips_copy_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_copy_u_d_ARG1
+  %1 = tail call i64 @llvm.mips.copy.u.d(<2 x i64> %0, i32 1)
+  store i64 %1, i64* @llvm_mips_copy_u_d_RES
+  ret void
+}
+
+declare i64 @llvm.mips.copy.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_copy_u_d_test:
+; CHECK: ld.w
+; CHECK: copy_s.w
+; CHECK: copy_s.w
+; CHECK: sw
+; CHECK: sw
+; CHECK: .size llvm_mips_copy_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/elm_cxcmsa.ll b/test/CodeGen/Mips/msa/elm_cxcmsa.ll
new file mode 100644
index 000000000000..8d6b0ee20ab8
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_cxcmsa.ll
@@ -0,0 +1,168 @@
+; Test the MSA ctcmsa and cfcmsa intrinsics (which are encoded with the ELM
+; instruction format).
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define i32 @msa_ir_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 0)
+  ret i32 %0
+}
+
+; CHECK: msa_ir_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $0
+; CHECK: .size msa_ir_cfcmsa_test
+;
+define i32 @msa_csr_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 1)
+  ret i32 %0
+}
+
+; CHECK: msa_csr_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $1
+; CHECK: .size msa_csr_cfcmsa_test
+;
+define i32 @msa_access_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 2)
+  ret i32 %0
+}
+
+; CHECK: msa_access_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $2
+; CHECK: .size msa_access_cfcmsa_test
+;
+define i32 @msa_save_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 3)
+  ret i32 %0
+}
+
+; CHECK: msa_save_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $3
+; CHECK: .size msa_save_cfcmsa_test
+;
+define i32 @msa_modify_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 4)
+  ret i32 %0
+}
+
+; CHECK: msa_modify_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $4
+; CHECK: .size msa_modify_cfcmsa_test
+;
+define i32 @msa_request_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 5)
+  ret i32 %0
+}
+
+; CHECK: msa_request_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $5
+; CHECK: .size msa_request_cfcmsa_test
+;
+define i32 @msa_map_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 6)
+  ret i32 %0
+}
+
+; CHECK: msa_map_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $6
+; CHECK: .size msa_map_cfcmsa_test
+;
+define i32 @msa_unmap_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 7)
+  ret i32 %0
+}
+
+; CHECK: msa_unmap_cfcmsa_test:
+; CHECK: cfcmsa $[[R1:[0-9]+]], $7
+; CHECK: .size msa_unmap_cfcmsa_test
+;
+define void @msa_ir_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 0, i32 1)
+  ret void
+}
+
+; CHECK: msa_ir_ctcmsa_test:
+; CHECK: ctcmsa $0
+; CHECK: .size msa_ir_ctcmsa_test
+;
+define void @msa_csr_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 1, i32 1)
+  ret void
+}
+
+; CHECK: msa_csr_ctcmsa_test:
+; CHECK: ctcmsa $1
+; CHECK: .size msa_csr_ctcmsa_test
+;
+define void @msa_access_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 2, i32 1)
+  ret void
+}
+
+; CHECK: msa_access_ctcmsa_test:
+; CHECK: ctcmsa $2
+; CHECK: .size msa_access_ctcmsa_test
+;
+define void @msa_save_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 3, i32 1)
+  ret void
+}
+
+; CHECK: msa_save_ctcmsa_test:
+; CHECK: ctcmsa $3
+; CHECK: .size msa_save_ctcmsa_test
+;
+define void @msa_modify_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 4, i32 1)
+  ret void
+}
+
+; CHECK: msa_modify_ctcmsa_test:
+; CHECK: ctcmsa $4
+; CHECK: .size msa_modify_ctcmsa_test
+;
+define void @msa_request_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 5, i32 1)
+  ret void
+}
+
+; CHECK: msa_request_ctcmsa_test:
+; CHECK: ctcmsa $5
+; CHECK: .size msa_request_ctcmsa_test
+;
+define void @msa_map_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 6, i32 1)
+  ret void
+}
+
+; CHECK: msa_map_ctcmsa_test:
+; CHECK: ctcmsa $6
+; CHECK: .size msa_map_ctcmsa_test
+;
+define void @msa_unmap_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 7, i32 1)
+  ret void
+}
+
+; CHECK: msa_unmap_ctcmsa_test:
+; CHECK: ctcmsa $7
+; CHECK: .size msa_unmap_ctcmsa_test
+;
+declare i32 @llvm.mips.cfcmsa(i32) nounwind
+declare void @llvm.mips.ctcmsa(i32, i32) nounwind
diff --git a/test/CodeGen/Mips/msa/elm_insv.ll b/test/CodeGen/Mips/msa/elm_insv.ll
new file mode 100644
index 000000000000..fa7ceaf0c6bf
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_insv.ll
@@ -0,0 +1,192 @@
+; Test the MSA element insertion intrinsics that are encoded with the ELM
+; instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_insert_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_insert_b_ARG3 = global i32 27, align 16
+@llvm_mips_insert_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_insert_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_insert_b_ARG1
+  %1 = load i32* @llvm_mips_insert_b_ARG3
+  %2 = tail call <16 x i8> @llvm.mips.insert.b(<16 x i8> %0, i32 1, i32 %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_insert_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.insert.b(<16 x i8>, i32, i32) nounwind
+
+; CHECK: llvm_mips_insert_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0(
+; CHECK-DAG: insert.b [[R2]][1], [[R1]]
+; CHECK-DAG: st.b [[R2]], 0(
+; CHECK: .size llvm_mips_insert_b_test
+;
+@llvm_mips_insert_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_insert_h_ARG3 = global i32 27, align 16
+@llvm_mips_insert_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_insert_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_insert_h_ARG1
+  %1 = load i32* @llvm_mips_insert_h_ARG3
+  %2 = tail call <8 x i16> @llvm.mips.insert.h(<8 x i16> %0, i32 1, i32 %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_insert_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.insert.h(<8 x i16>, i32, i32) nounwind
+
+; CHECK: llvm_mips_insert_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0(
+; CHECK-DAG: insert.h [[R2]][1], [[R1]]
+; CHECK-DAG: st.h [[R2]], 0(
+; CHECK: .size llvm_mips_insert_h_test
+;
+@llvm_mips_insert_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_insert_w_ARG3 = global i32 27, align 16
+@llvm_mips_insert_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_insert_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_insert_w_ARG1
+  %1 = load i32* @llvm_mips_insert_w_ARG3
+  %2 = tail call <4 x i32> @llvm.mips.insert.w(<4 x i32> %0, i32 1, i32 %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_insert_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.insert.w(<4 x i32>, i32, i32) nounwind
+
+; CHECK: llvm_mips_insert_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0(
+; CHECK-DAG: insert.w [[R2]][1], [[R1]]
+; CHECK-DAG: st.w [[R2]], 0(
+; CHECK: .size llvm_mips_insert_w_test
+;
+@llvm_mips_insert_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_insert_d_ARG3 = global i64 27, align 16
+@llvm_mips_insert_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_insert_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_insert_d_ARG1
+  %1 = load i64* @llvm_mips_insert_d_ARG3
+  %2 = tail call <2 x i64> @llvm.mips.insert.d(<2 x i64> %0, i32 1, i64 %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_insert_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.insert.d(<2 x i64>, i32, i64) nounwind
+
+; CHECK: llvm_mips_insert_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], 0(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], 4(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]],
+; CHECK-DAG: insert.w [[R3]][2], [[R1]]
+; CHECK-DAG: insert.w [[R3]][3], [[R2]]
+; CHECK-DAG: st.w [[R3]],
+; CHECK: .size llvm_mips_insert_d_test
+;
+@llvm_mips_insve_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_insve_b_ARG3 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_insve_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_insve_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_insve_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_insve_b_ARG3
+  %2 = tail call <16 x i8> @llvm.mips.insve.b(<16 x i8> %0, i32 1, <16 x i8> %1)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_insve_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.insve.b(<16 x i8>, i32, <16 x i8>) nounwind
+
+; CHECK: llvm_mips_insve_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_b_ARG3)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.b [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.b [[R3]],
+; CHECK: .size llvm_mips_insve_b_test
+;
+@llvm_mips_insve_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_insve_h_ARG3 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_insve_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_insve_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_insve_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_insve_h_ARG3
+  %2 = tail call <8 x i16> @llvm.mips.insve.h(<8 x i16> %0, i32 1, <8 x i16> %1)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_insve_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.insve.h(<8 x i16>, i32, <8 x i16>) nounwind
+
+; CHECK: llvm_mips_insve_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_h_ARG3)(
+; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.h [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.h [[R3]],
+; CHECK: .size llvm_mips_insve_h_test
+;
+@llvm_mips_insve_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_insve_w_ARG3 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_insve_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_insve_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_insve_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_insve_w_ARG3
+  %2 = tail call <4 x i32> @llvm.mips.insve.w(<4 x i32> %0, i32 1, <4 x i32> %1)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_insve_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.insve.w(<4 x i32>, i32, <4 x i32>) nounwind
+
+; CHECK: llvm_mips_insve_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_w_ARG3)(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.w [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.w [[R3]],
+; CHECK: .size llvm_mips_insve_w_test
+;
+@llvm_mips_insve_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_insve_d_ARG3 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_insve_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_insve_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_insve_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_insve_d_ARG3
+  %2 = tail call <2 x i64> @llvm.mips.insve.d(<2 x i64> %0, i32 1, <2 x i64> %1)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_insve_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.insve.d(<2 x i64>, i32, <2 x i64>) nounwind
+
+; CHECK: llvm_mips_insve_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_insve_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_insve_d_ARG3)(
+; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: insve.d [[R3]][1], [[R4]][0]
+; CHECK-DAG: st.d [[R3]],
+; CHECK: .size llvm_mips_insve_d_test
+;
diff --git a/test/CodeGen/Mips/msa/elm_move.ll b/test/CodeGen/Mips/msa/elm_move.ll
new file mode 100644
index 000000000000..98c06c732c36
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_move.ll
@@ -0,0 +1,25 @@
+; Test the MSA move intrinsics (which are encoded with the ELM instruction
+; format).
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_move_vb_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_move_vb_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_move_vb_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_move_vb_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.move.v(<16 x i8> %0)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_move_vb_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.move.v(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_move_vb_test:
+; CHECK: ld.b
+; CHECK: move.v
+; CHECK: st.b
+; CHECK: .size llvm_mips_move_vb_test
+;
diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll
new file mode 100644
index 000000000000..39d670dac841
--- /dev/null
+++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll
@@ -0,0 +1,158 @@
+; Test the MSA intrinsics that are encoded with the ELM instruction format and
+; are either shifts or slides.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_sldi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_sldi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_sldi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_sldi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, i32 1)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_sldi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_b_test:
+; CHECK: ld.b
+; CHECK: sldi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_sldi_b_test
+;
+@llvm_mips_sldi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_sldi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_sldi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_sldi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, i32 1)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_sldi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_h_test:
+; CHECK: ld.h
+; CHECK: sldi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_sldi_h_test
+;
+@llvm_mips_sldi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_sldi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_sldi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_sldi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, i32 1)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_sldi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_w_test:
+; CHECK: ld.w
+; CHECK: sldi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_sldi_w_test
+;
+@llvm_mips_sldi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_sldi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_sldi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_sldi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, i32 1)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_sldi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_sldi_d_test:
+; CHECK: ld.d
+; CHECK: sldi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_sldi_d_test
+;
+@llvm_mips_splati_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_splati_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_splati_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_splati_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.splati.b(<16 x i8> %0, i32 1)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_splati_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.splati.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_splati_b_test:
+; CHECK: ld.b
+; CHECK: splati.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_splati_b_test
+;
+@llvm_mips_splati_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_splati_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_splati_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_splati_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.splati.h(<8 x i16> %0, i32 1)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_splati_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.splati.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_splati_h_test:
+; CHECK: ld.h
+; CHECK: splati.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_splati_h_test
+;
+@llvm_mips_splati_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_splati_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_splati_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_splati_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.splati.w(<4 x i32> %0, i32 1)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_splati_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.splati.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_splati_w_test:
+; CHECK: ld.w
+; CHECK: splati.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_splati_w_test
+;
+@llvm_mips_splati_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_splati_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_splati_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_splati_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.splati.d(<2 x i64> %0, i32 1)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_splati_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.splati.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_splati_d_test:
+; CHECK: ld.d
+; CHECK: splati.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_splati_d_test
+;
diff --git a/test/CodeGen/Mips/msa/endian.ll b/test/CodeGen/Mips/msa/endian.ll
new file mode 100644
index 000000000000..44d1925f1cff
--- /dev/null
+++ b/test/CodeGen/Mips/msa/endian.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=BIGENDIAN %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=LITENDIAN %s
+
+@v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+@v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+@v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+@v2i64 = global <2 x i64> <i64 0, i64 0>
+
+define void @const_v16i8() nounwind {
+  ; LITENDIAN: .byte 0
+  ; LITENDIAN: .byte 1
+  ; LITENDIAN: .byte 2
+  ; LITENDIAN: .byte 3
+  ; LITENDIAN: .byte 4
+  ; LITENDIAN: .byte 5
+  ; LITENDIAN: .byte 6
+  ; LITENDIAN: .byte 7
+  ; LITENDIAN: .byte 8
+  ; LITENDIAN: .byte 9
+  ; LITENDIAN: .byte 10
+  ; LITENDIAN: .byte 11
+  ; LITENDIAN: .byte 12
+  ; LITENDIAN: .byte 13
+  ; LITENDIAN: .byte 14
+  ; LITENDIAN: .byte 15
+  ; LITENDIAN: const_v16i8:
+  ; BIGENDIAN: .byte 0
+  ; BIGENDIAN: .byte 1
+  ; BIGENDIAN: .byte 2
+  ; BIGENDIAN: .byte 3
+  ; BIGENDIAN: .byte 4
+  ; BIGENDIAN: .byte 5
+  ; BIGENDIAN: .byte 6
+  ; BIGENDIAN: .byte 7
+  ; BIGENDIAN: .byte 8
+  ; BIGENDIAN: .byte 9
+  ; BIGENDIAN: .byte 10
+  ; BIGENDIAN: .byte 11
+  ; BIGENDIAN: .byte 12
+  ; BIGENDIAN: .byte 13
+  ; BIGENDIAN: .byte 14
+  ; BIGENDIAN: .byte 15
+  ; BIGENDIAN: const_v16i8:
+
+  store volatile <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8>*@v16i8
+
+  ret void
+}
+
+define void @const_v8i16() nounwind {
+  ; LITENDIAN: .2byte 0
+  ; LITENDIAN: .2byte 1
+  ; LITENDIAN: .2byte 2
+  ; LITENDIAN: .2byte 3
+  ; LITENDIAN: .2byte 4
+  ; LITENDIAN: .2byte 5
+  ; LITENDIAN: .2byte 6
+  ; LITENDIAN: .2byte 7
+  ; LITENDIAN: const_v8i16:
+  ; BIGENDIAN: .2byte 0
+  ; BIGENDIAN: .2byte 1
+  ; BIGENDIAN: .2byte 2
+  ; BIGENDIAN: .2byte 3
+  ; BIGENDIAN: .2byte 4
+  ; BIGENDIAN: .2byte 5
+  ; BIGENDIAN: .2byte 6
+  ; BIGENDIAN: .2byte 7
+  ; BIGENDIAN: const_v8i16:
+
+  store volatile <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16>*@v8i16
+
+  ret void
+}
+
+define void @const_v4i32() nounwind {
+  ; LITENDIAN: .4byte 0
+  ; LITENDIAN: .4byte 1
+  ; LITENDIAN: .4byte 2
+  ; LITENDIAN: .4byte 3
+  ; LITENDIAN: const_v4i32:
+  ; BIGENDIAN: .4byte 0
+  ; BIGENDIAN: .4byte 1
+  ; BIGENDIAN: .4byte 2
+  ; BIGENDIAN: .4byte 3
+  ; BIGENDIAN: const_v4i32:
+
+  store volatile <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>*@v4i32
+
+  ret void
+}
+
+define void @const_v2i64() nounwind {
+  ; LITENDIAN: .4byte 1
+  ; LITENDIAN: .4byte 0
+  ; LITENDIAN: .4byte 2
+  ; LITENDIAN: .4byte 0
+  ; LITENDIAN: const_v2i64:
+  ; BIGENDIAN: .4byte 0
+  ; BIGENDIAN: .4byte 1
+  ; BIGENDIAN: .4byte 0
+  ; BIGENDIAN: .4byte 2
+  ; BIGENDIAN: const_v2i64:
+
+  store volatile <2 x i64> <i64 1, i64 2>, <2 x i64>*@v2i64
+
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll
new file mode 100644
index 000000000000..3088e1ba9893
--- /dev/null
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@@ -0,0 +1,85 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s
+
+define void @loadstore_v16i8_near() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_near:
+
+  %1 = alloca <16 x i8>
+  %2 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0($sp)
+  store volatile <16 x i8> %2, <16 x i8>* %1
+  ; MIPS32-AE: st.b [[R1]], 0($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_near
+}
+
+define void @loadstore_v16i8_just_under_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_under_simm10:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: st.b [[R1]], 496($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_under_simm10
+}
+
+define void @loadstore_v16i8_just_over_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_over_simm10:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [497 x i8] ; Push the frame just over 512 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_over_simm10
+}
+
+define void @loadstore_v16i8_just_under_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_under_simm16:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_under_simm16
+}
+
+define void @loadstore_v16i8_just_over_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_over_simm16:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_over_simm16
+}
diff --git a/test/CodeGen/Mips/msa/i10.ll b/test/CodeGen/Mips/msa/i10.ll
new file mode 100644
index 000000000000..c5a96174a734
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i10.ll
@@ -0,0 +1,89 @@
+; Test the MSA intrinsics that are encoded with the I10 instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bnz_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+define i32 @llvm_mips_bnz_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bnz_b_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.b(<16 x i8> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.b(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_bnz_b_test:
+; CHECK-DAG: ld.b [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.b [[R0]]
+; CHECK: .size llvm_mips_bnz_b_test
+
+@llvm_mips_bnz_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+
+define i32 @llvm_mips_bnz_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bnz_h_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.h(<8 x i16> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.h(<8 x i16>) nounwind
+
+; CHECK: llvm_mips_bnz_h_test:
+; CHECK-DAG: ld.h [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.h [[R0]]
+; CHECK: .size llvm_mips_bnz_h_test
+
+@llvm_mips_bnz_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+
+define i32 @llvm_mips_bnz_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bnz_w_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.w(<4 x i32> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.w(<4 x i32>) nounwind
+
+; CHECK: llvm_mips_bnz_w_test:
+; CHECK-DAG: ld.w [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.w [[R0]]
+; CHECK: .size llvm_mips_bnz_w_test
+
+@llvm_mips_bnz_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+
+define i32 @llvm_mips_bnz_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bnz_d_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.d(<2 x i64> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.d(<2 x i64>) nounwind
+
+; CHECK: llvm_mips_bnz_d_test:
+; CHECK-DAG: ld.d [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.d [[R0]]
+; CHECK: .size llvm_mips_bnz_d_test
+
diff --git a/test/CodeGen/Mips/msa/i5-a.ll b/test/CodeGen/Mips/msa/i5-a.ll
new file mode 100644
index 000000000000..0b507208f429
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-a.ll
@@ -0,0 +1,82 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'a'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_addvi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_addvi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_addvi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_addvi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.addvi.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_addvi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.addvi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_b_test:
+; CHECK: ld.b
+; CHECK: addvi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_addvi_b_test
+;
+@llvm_mips_addvi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_addvi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_addvi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_addvi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.addvi.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_addvi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.addvi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_h_test:
+; CHECK: ld.h
+; CHECK: addvi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_addvi_h_test
+;
+@llvm_mips_addvi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_addvi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_addvi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_addvi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.addvi.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_addvi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.addvi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_w_test:
+; CHECK: ld.w
+; CHECK: addvi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_addvi_w_test
+;
+@llvm_mips_addvi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_addvi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_addvi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_addvi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.addvi.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_addvi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.addvi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_addvi_d_test:
+; CHECK: ld.d
+; CHECK: addvi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_addvi_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll
new file mode 100644
index 000000000000..da6be669f0dd
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-b.ll
@@ -0,0 +1,439 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'b'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bclri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bclri_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bclri_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bclri_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.bclri.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_bclri_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bclri.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_b_test:
+; CHECK: ld.b
+; andi.b is equivalent to bclri.b
+; CHECK: andi.b {{\$w[0-9]}}, {{\$w[0-9]}}, 127
+; CHECK: st.b
+; CHECK: .size llvm_mips_bclri_b_test
+;
+@llvm_mips_bclri_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bclri_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bclri_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bclri_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.bclri.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_bclri_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bclri.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_h_test:
+; CHECK: ld.h
+; CHECK: bclri.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bclri_h_test
+;
+@llvm_mips_bclri_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bclri_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bclri_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bclri_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.bclri.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_bclri_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bclri.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_w_test:
+; CHECK: ld.w
+; CHECK: bclri.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bclri_w_test
+;
+@llvm_mips_bclri_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bclri_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bclri_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bclri_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.bclri.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_bclri_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bclri.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_bclri_d_test:
+; CHECK: ld.d
+; CHECK: bclri.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bclri_d_test
+;
+@llvm_mips_binsli_b_ARG1 = global <16 x i8> zeroinitializer, align 16
+@llvm_mips_binsli_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsli_b_RES  = global <16 x i8> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsli_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsli_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.binsli.b(<16 x i8> %0, <16 x i8> %1, i32 7)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_binsli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsli.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.b [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_b_RES)(
+; CHECK-DAG: st.b [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_b_test
+
+@llvm_mips_binsli_h_ARG1 = global <8 x i16> zeroinitializer, align 16
+@llvm_mips_binsli_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsli_h_RES  = global <8 x i16> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsli_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsli_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.binsli.h(<8 x i16> %0, <8 x i16> %1, i32 7)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_binsli_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsli.h(<8 x i16>, <8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_h_ARG2)(
+; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.h [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_h_RES)(
+; CHECK-DAG: st.h [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_h_test
+
+@llvm_mips_binsli_w_ARG1 = global <4 x i32> zeroinitializer, align 16
+@llvm_mips_binsli_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsli_w_RES  = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsli_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsli_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.binsli.w(<4 x i32> %0, <4 x i32> %1, i32 7)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_binsli_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsli.w(<4 x i32>, <4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_w_ARG2)(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.w [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_w_RES)(
+; CHECK-DAG: st.w [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_w_test
+
+@llvm_mips_binsli_d_ARG1 = global <2 x i64> zeroinitializer, align 16
+@llvm_mips_binsli_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsli_d_RES  = global <2 x i64> zeroinitializer, align 16
+
+define void @llvm_mips_binsli_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsli_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsli_d_ARG2
+  ; TODO: We use a particularly wide mask here to work around a legalization
+  ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
+  ;       legalized into a constant pool. We should add a test to cover the
+  ;       other cases once they correctly select binsli.d.
+  %2 = tail call <2 x i64> @llvm.mips.binsli.d(<2 x i64> %0, <2 x i64> %1, i32 61)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_binsli_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsli.d(<2 x i64>, <2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_binsli_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_d_ARG2)(
+; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsli.d [[R3]], [[R4]], 61
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_d_RES)(
+; CHECK-DAG: st.d [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsli_d_test
+
+@llvm_mips_binsri_b_ARG1 = global <16 x i8> zeroinitializer, align 16
+@llvm_mips_binsri_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_binsri_b_RES  = global <16 x i8> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_binsri_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_binsri_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.binsri.b(<16 x i8> %0, <16 x i8> %1, i32 7)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_binsri_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.binsri.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.b [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_b_RES)(
+; CHECK-DAG: st.b [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_b_test
+
+@llvm_mips_binsri_h_ARG1 = global <8 x i16> zeroinitializer, align 16
+@llvm_mips_binsri_h_ARG2 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_binsri_h_RES  = global <8 x i16> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_binsri_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_binsri_h_ARG2
+  %2 = tail call <8 x i16> @llvm.mips.binsri.h(<8 x i16> %0, <8 x i16> %1, i32 7)
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_binsri_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.binsri.h(<8 x i16>, <8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_h_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_h_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_h_ARG2)(
+; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.h [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_h_RES)(
+; CHECK-DAG: st.h [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_h_test
+
+@llvm_mips_binsri_w_ARG1 = global <4 x i32> zeroinitializer, align 16
+@llvm_mips_binsri_w_ARG2 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_binsri_w_RES  = global <4 x i32> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_binsri_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_binsri_w_ARG2
+  %2 = tail call <4 x i32> @llvm.mips.binsri.w(<4 x i32> %0, <4 x i32> %1, i32 7)
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_binsri_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.binsri.w(<4 x i32>, <4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_w_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_w_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_w_ARG2)(
+; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.w [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_w_RES)(
+; CHECK-DAG: st.w [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_w_test
+
+@llvm_mips_binsri_d_ARG1 = global <2 x i64> zeroinitializer, align 16
+@llvm_mips_binsri_d_ARG2 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_binsri_d_RES  = global <2 x i64> zeroinitializer, align 16
+
+define void @llvm_mips_binsri_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_binsri_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_binsri_d_ARG2
+  %2 = tail call <2 x i64> @llvm.mips.binsri.d(<2 x i64> %0, <2 x i64> %1, i32 7)
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_binsri_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.binsri.d(<2 x i64>, <2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_binsri_d_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_d_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_d_ARG2)(
+; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: binsri.d [[R3]], [[R4]], 7
+; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_d_RES)(
+; CHECK-DAG: st.d [[R3]], 0([[R5]])
+; CHECK: .size llvm_mips_binsri_d_test
+
+@llvm_mips_bnegi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bnegi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bnegi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bnegi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.bnegi.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_bnegi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bnegi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_b_test:
+; CHECK: ld.b
+; CHECK: bnegi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bnegi_b_test
+;
+@llvm_mips_bnegi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bnegi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bnegi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bnegi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.bnegi.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_bnegi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bnegi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_h_test:
+; CHECK: ld.h
+; CHECK: bnegi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bnegi_h_test
+;
+@llvm_mips_bnegi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bnegi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bnegi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bnegi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.bnegi.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_bnegi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bnegi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_w_test:
+; CHECK: ld.w
+; CHECK: bnegi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bnegi_w_test
+;
+@llvm_mips_bnegi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bnegi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bnegi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bnegi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.bnegi.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_bnegi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bnegi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_bnegi_d_test:
+; CHECK: ld.d
+; CHECK: bnegi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bnegi_d_test
+;
+@llvm_mips_bseti_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bseti_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bseti_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bseti_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.bseti.b(<16 x i8> %0, i32 7)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_bseti_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bseti.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_b_test:
+; CHECK: ld.b
+; CHECK: bseti.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_bseti_b_test
+;
+@llvm_mips_bseti_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bseti_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bseti_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bseti_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.bseti.h(<8 x i16> %0, i32 7)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_bseti_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.bseti.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_h_test:
+; CHECK: ld.h
+; CHECK: bseti.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_bseti_h_test
+;
+@llvm_mips_bseti_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bseti_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bseti_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bseti_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.bseti.w(<4 x i32> %0, i32 7)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_bseti_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.bseti.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_w_test:
+; CHECK: ld.w
+; CHECK: bseti.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_bseti_w_test
+;
+@llvm_mips_bseti_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bseti_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bseti_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bseti_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.bseti.d(<2 x i64> %0, i32 7)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_bseti_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.bseti.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_bseti_d_test:
+; CHECK: ld.d
+; CHECK: bseti.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_bseti_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-c.ll b/test/CodeGen/Mips/msa/i5-c.ll
new file mode 100644
index 000000000000..bf1578f30f32
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-c.ll
@@ -0,0 +1,386 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'c'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ceqi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ceqi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ceqi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ceqi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.ceqi.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_ceqi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ceqi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_b_test:
+; CHECK: ld.b
+; CHECK: ceqi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ceqi_b_test
+;
+@llvm_mips_ceqi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ceqi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ceqi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_ceqi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.ceqi.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_ceqi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ceqi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_h_test:
+; CHECK: ld.h
+; CHECK: ceqi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_ceqi_h_test
+;
+@llvm_mips_ceqi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ceqi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ceqi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_ceqi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.ceqi.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ceqi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ceqi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_w_test:
+; CHECK: ld.w
+; CHECK: ceqi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_ceqi_w_test
+;
+@llvm_mips_ceqi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ceqi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ceqi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_ceqi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.ceqi.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ceqi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ceqi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_ceqi_d_test:
+; CHECK: ld.d
+; CHECK: ceqi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_ceqi_d_test
+;
+@llvm_mips_clei_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clei_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clei_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clei_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clei.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clei.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_b_test:
+; CHECK: ld.b
+; CHECK: clei_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clei_s_b_test
+;
+@llvm_mips_clei_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clei_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clei_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clei_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clei.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clei.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_h_test:
+; CHECK: ld.h
+; CHECK: clei_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clei_s_h_test
+;
+@llvm_mips_clei_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clei_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clei_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clei_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clei.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clei.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_w_test:
+; CHECK: ld.w
+; CHECK: clei_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clei_s_w_test
+;
+@llvm_mips_clei_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clei_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clei_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clei_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clei.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clei.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clei_s_d_test:
+; CHECK: ld.d
+; CHECK: clei_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clei_s_d_test
+;
+@llvm_mips_clei_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clei_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clei_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clei_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clei.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clei.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_b_test:
+; CHECK: ld.b
+; CHECK: clei_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clei_u_b_test
+;
+@llvm_mips_clei_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clei_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clei_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clei_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clei.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clei.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_h_test:
+; CHECK: ld.h
+; CHECK: clei_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clei_u_h_test
+;
+@llvm_mips_clei_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clei_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clei_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clei_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clei.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clei.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_w_test:
+; CHECK: ld.w
+; CHECK: clei_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clei_u_w_test
+;
+@llvm_mips_clei_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clei_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clei_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clei_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clei.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clei.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clei_u_d_test:
+; CHECK: ld.d
+; CHECK: clei_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clei_u_d_test
+;
+@llvm_mips_clti_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clti_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clti_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clti_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clti.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clti.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_b_test:
+; CHECK: ld.b
+; CHECK: clti_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clti_s_b_test
+;
+@llvm_mips_clti_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clti_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clti_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clti_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clti.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clti.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_h_test:
+; CHECK: ld.h
+; CHECK: clti_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clti_s_h_test
+;
+@llvm_mips_clti_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clti_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clti_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clti_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clti.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clti.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_w_test:
+; CHECK: ld.w
+; CHECK: clti_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clti_s_w_test
+;
+@llvm_mips_clti_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clti_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clti_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clti_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clti.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clti.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clti_s_d_test:
+; CHECK: ld.d
+; CHECK: clti_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clti_s_d_test
+;
+@llvm_mips_clti_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_clti_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_clti_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_clti_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.clti.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.clti.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_b_test:
+; CHECK: ld.b
+; CHECK: clti_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_clti_u_b_test
+;
+@llvm_mips_clti_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_clti_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_clti_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_clti_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.clti.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.clti.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_h_test:
+; CHECK: ld.h
+; CHECK: clti_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_clti_u_h_test
+;
+@llvm_mips_clti_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_clti_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_clti_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_clti_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.clti.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.clti.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_w_test:
+; CHECK: ld.w
+; CHECK: clti_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_clti_u_w_test
+;
+@llvm_mips_clti_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_clti_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_clti_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_clti_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.clti.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.clti.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_clti_u_d_test:
+; CHECK: ld.d
+; CHECK: clti_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_clti_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-m.ll b/test/CodeGen/Mips/msa/i5-m.ll
new file mode 100644
index 000000000000..27663494324d
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-m.ll
@@ -0,0 +1,310 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 'm'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_maxi_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_maxi_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_maxi_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_maxi_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.maxi.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.maxi.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_b_test:
+; CHECK: ld.b
+; CHECK: maxi_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_maxi_s_b_test
+;
+@llvm_mips_maxi_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maxi_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maxi_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maxi_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.maxi.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maxi.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_h_test:
+; CHECK: ld.h
+; CHECK: maxi_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maxi_s_h_test
+;
+@llvm_mips_maxi_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maxi_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maxi_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maxi_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.maxi.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maxi.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_w_test:
+; CHECK: ld.w
+; CHECK: maxi_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maxi_s_w_test
+;
+@llvm_mips_maxi_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_maxi_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_maxi_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_maxi_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.maxi.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.maxi.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_s_d_test:
+; CHECK: ld.d
+; CHECK: maxi_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_maxi_s_d_test
+;
+@llvm_mips_maxi_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_maxi_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_maxi_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_maxi_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.maxi.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.maxi.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_b_test:
+; CHECK: ld.b
+; CHECK: maxi_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_maxi_u_b_test
+;
+@llvm_mips_maxi_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_maxi_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_maxi_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_maxi_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.maxi.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.maxi.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_h_test:
+; CHECK: ld.h
+; CHECK: maxi_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_maxi_u_h_test
+;
+@llvm_mips_maxi_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_maxi_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_maxi_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_maxi_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.maxi.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.maxi.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_w_test:
+; CHECK: ld.w
+; CHECK: maxi_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_maxi_u_w_test
+;
+@llvm_mips_maxi_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_maxi_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_maxi_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_maxi_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.maxi.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.maxi.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_maxi_u_d_test:
+; CHECK: ld.d
+; CHECK: maxi_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_maxi_u_d_test
+;
+@llvm_mips_mini_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mini_s_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mini_s_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mini_s_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.mini.s.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_s_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mini.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_b_test:
+; CHECK: ld.b
+; CHECK: mini_s.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mini_s_b_test
+;
+@llvm_mips_mini_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mini_s_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mini_s_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mini_s_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.mini.s.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_s_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mini.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_h_test:
+; CHECK: ld.h
+; CHECK: mini_s.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mini_s_h_test
+;
+@llvm_mips_mini_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mini_s_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mini_s_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mini_s_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.mini.s.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_s_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mini.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_w_test:
+; CHECK: ld.w
+; CHECK: mini_s.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mini_s_w_test
+;
+@llvm_mips_mini_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mini_s_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mini_s_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mini_s_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.mini.s.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_s_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mini.s.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_mini_s_d_test:
+; CHECK: ld.d
+; CHECK: mini_s.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mini_s_d_test
+;
+@llvm_mips_mini_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_mini_u_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_mini_u_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_mini_u_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.mini.u.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_u_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.mini.u.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_b_test:
+; CHECK: ld.b
+; CHECK: mini_u.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_mini_u_b_test
+;
+@llvm_mips_mini_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_mini_u_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_mini_u_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_mini_u_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.mini.u.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_u_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.mini.u.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_h_test:
+; CHECK: ld.h
+; CHECK: mini_u.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_mini_u_h_test
+;
+@llvm_mips_mini_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_mini_u_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_mini_u_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_mini_u_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.mini.u.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_u_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.mini.u.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_w_test:
+; CHECK: ld.w
+; CHECK: mini_u.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_mini_u_w_test
+;
+@llvm_mips_mini_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_mini_u_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_mini_u_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_mini_u_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.mini.u.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_u_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.mini.u.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_mini_u_d_test:
+; CHECK: ld.d
+; CHECK: mini_u.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_mini_u_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5-s.ll b/test/CodeGen/Mips/msa/i5-s.ll
new file mode 100644
index 000000000000..184172f63b85
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5-s.ll
@@ -0,0 +1,82 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format.
+; There are lots of these so this covers those beginning with 's'
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_subvi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_subvi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_subvi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_subvi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.subvi.b(<16 x i8> %0, i32 14)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_subvi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.subvi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_b_test:
+; CHECK: ld.b
+; CHECK: subvi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_subvi_b_test
+;
+@llvm_mips_subvi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_subvi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_subvi_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_subvi_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.subvi.h(<8 x i16> %0, i32 14)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_subvi_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.subvi.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_h_test:
+; CHECK: ld.h
+; CHECK: subvi.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_subvi_h_test
+;
+@llvm_mips_subvi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_subvi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_subvi_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_subvi_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.subvi.w(<4 x i32> %0, i32 14)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_subvi_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.subvi.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_w_test:
+; CHECK: ld.w
+; CHECK: subvi.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_subvi_w_test
+;
+@llvm_mips_subvi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_subvi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_subvi_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_subvi_d_ARG1
+  %1 = tail call <2 x i64> @llvm.mips.subvi.d(<2 x i64> %0, i32 14)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_subvi_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.subvi.d(<2 x i64>, i32) nounwind
+
+; CHECK: llvm_mips_subvi_d_test:
+; CHECK: ld.d
+; CHECK: subvi.d
+; CHECK: st.d
+; CHECK: .size llvm_mips_subvi_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i5_ld_st.ll b/test/CodeGen/Mips/msa/i5_ld_st.ll
new file mode 100644
index 000000000000..7cc55f2904be
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i5_ld_st.ll
@@ -0,0 +1,150 @@
+; Test the MSA intrinsics that are encoded with the I5 instruction format and
+; are loads or stores.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_ld_b_ARG = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ld_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ld_b_test() nounwind {
+entry:
+  %0 = bitcast <16 x i8>* @llvm_mips_ld_b_ARG to i8*
+  %1 = tail call <16 x i8> @llvm.mips.ld.b(i8* %0, i32 16)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_ld_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ld.b(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_b_test:
+; CHECK: ld.b [[R1:\$w[0-9]+]], 16(
+; CHECK: st.b
+; CHECK: .size llvm_mips_ld_b_test
+;
+@llvm_mips_ld_h_ARG = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_ld_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_ld_h_test() nounwind {
+entry:
+  %0 = bitcast <8 x i16>* @llvm_mips_ld_h_ARG to i8*
+  %1 = tail call <8 x i16> @llvm.mips.ld.h(i8* %0, i32 16)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_ld_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.ld.h(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_h_test:
+; CHECK: ld.h [[R1:\$w[0-9]+]], 16(
+; CHECK: st.h
+; CHECK: .size llvm_mips_ld_h_test
+;
+@llvm_mips_ld_w_ARG = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_ld_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_ld_w_test() nounwind {
+entry:
+  %0 = bitcast <4 x i32>* @llvm_mips_ld_w_ARG to i8*
+  %1 = tail call <4 x i32> @llvm.mips.ld.w(i8* %0, i32 16)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_ld_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ld.w(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_w_test:
+; CHECK: ld.w [[R1:\$w[0-9]+]], 16(
+; CHECK: st.w
+; CHECK: .size llvm_mips_ld_w_test
+;
+@llvm_mips_ld_d_ARG = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_ld_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_ld_d_test() nounwind {
+entry:
+  %0 = bitcast <2 x i64>* @llvm_mips_ld_d_ARG to i8*
+  %1 = tail call <2 x i64> @llvm.mips.ld.d(i8* %0, i32 16)
+  store <2 x i64> %1, <2 x i64>* @llvm_mips_ld_d_RES
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ld.d(i8*, i32) nounwind
+
+; CHECK: llvm_mips_ld_d_test:
+; CHECK: ld.d [[R1:\$w[0-9]+]], 16(
+; CHECK: st.d
+; CHECK: .size llvm_mips_ld_d_test
+;
+@llvm_mips_st_b_ARG = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_st_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_st_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_st_b_ARG
+  %1 = bitcast <16 x i8>* @llvm_mips_st_b_RES to i8*
+  tail call void @llvm.mips.st.b(<16 x i8> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.b(<16 x i8>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_b_test:
+; CHECK: ld.b
+; CHECK: st.b [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_b_test
+;
+@llvm_mips_st_h_ARG = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_st_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_st_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_st_h_ARG
+  %1 = bitcast <8 x i16>* @llvm_mips_st_h_RES to i8*
+  tail call void @llvm.mips.st.h(<8 x i16> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.h(<8 x i16>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_h_test:
+; CHECK: ld.h
+; CHECK: st.h [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_h_test
+;
+@llvm_mips_st_w_ARG = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_st_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_st_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_st_w_ARG
+  %1 = bitcast <4 x i32>* @llvm_mips_st_w_RES to i8*
+  tail call void @llvm.mips.st.w(<4 x i32> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.w(<4 x i32>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_w_test:
+; CHECK: ld.w
+; CHECK: st.w [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_w_test
+;
+@llvm_mips_st_d_ARG = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_st_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_st_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_st_d_ARG
+  %1 = bitcast <2 x i64>* @llvm_mips_st_d_RES to i8*
+  tail call void @llvm.mips.st.d(<2 x i64> %0, i8* %1, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.st.d(<2 x i64>, i8*, i32) nounwind
+
+; CHECK: llvm_mips_st_d_test:
+; CHECK: ld.d
+; CHECK: st.d [[R1:\$w[0-9]+]], 16(
+; CHECK: .size llvm_mips_st_d_test
+;
diff --git a/test/CodeGen/Mips/msa/i8.ll b/test/CodeGen/Mips/msa/i8.ll
new file mode 100644
index 000000000000..d2931a72feaa
--- /dev/null
+++ b/test/CodeGen/Mips/msa/i8.ll
@@ -0,0 +1,211 @@
+; Test the MSA intrinsics that are encoded with the I8 instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_andi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_andi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_andi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_andi_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.andi.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_andi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.andi.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_andi_b_test:
+; CHECK: ld.b
+; CHECK: andi.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_andi_b_test
+
+@llvm_mips_bmnzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bmnzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmnzi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmnzi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bmnzi_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: bmnzi.b [[R3]], [[R4]], 25
+; CHECK-DAG: st.b [[R3]], 0(
+; CHECK: .size llvm_mips_bmnzi_b_test
+
+@llvm_mips_bmzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bmzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmzi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmzi_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmzi_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmzi_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmzi_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bmzi_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; bmnzi.b is the same as bmzi.b with ws and wd_in swapped
+; CHECK-DAG: bmnzi.b [[R4]], [[R3]], 25
+; CHECK-DAG: st.b [[R4]], 0(
+; CHECK: .size llvm_mips_bmzi_b_test
+
+@llvm_mips_bseli_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bseli_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bseli_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bseli_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bseli_b_ARG2
+  %2 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_bseli_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, <16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_bseli_b_test:
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: bseli.b [[R3]], [[R4]], 25
+; CHECK-DAG: st.b [[R3]], 0(
+; CHECK: .size llvm_mips_bseli_b_test
+
+@llvm_mips_nori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nori_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nori_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nori_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.nori.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_nori_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.nori.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_nori_b_test:
+; CHECK: ld.b
+; CHECK: nori.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_nori_b_test
+;
+@llvm_mips_ori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_ori_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_ori_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_ori_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.ori.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_ori_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.ori.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_ori_b_test:
+; CHECK: ld.b
+; CHECK: ori.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_ori_b_test
+;
+@llvm_mips_shf_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_shf_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_shf_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_shf_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.shf.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_shf_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.shf.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_shf_b_test:
+; CHECK: ld.b
+; CHECK: shf.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_shf_b_test
+;
+@llvm_mips_shf_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_shf_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_shf_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_shf_h_ARG1
+  %1 = tail call <8 x i16> @llvm.mips.shf.h(<8 x i16> %0, i32 25)
+  store <8 x i16> %1, <8 x i16>* @llvm_mips_shf_h_RES
+  ret void
+}
+
+declare <8 x i16> @llvm.mips.shf.h(<8 x i16>, i32) nounwind
+
+; CHECK: llvm_mips_shf_h_test:
+; CHECK: ld.h
+; CHECK: shf.h
+; CHECK: st.h
+; CHECK: .size llvm_mips_shf_h_test
+;
+@llvm_mips_shf_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_shf_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_shf_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_shf_w_ARG1
+  %1 = tail call <4 x i32> @llvm.mips.shf.w(<4 x i32> %0, i32 25)
+  store <4 x i32> %1, <4 x i32>* @llvm_mips_shf_w_RES
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.shf.w(<4 x i32>, i32) nounwind
+
+; CHECK: llvm_mips_shf_w_test:
+; CHECK: ld.w
+; CHECK: shf.w
+; CHECK: st.w
+; CHECK: .size llvm_mips_shf_w_test
+;
+@llvm_mips_xori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_xori_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_xori_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_xori_b_ARG1
+  %1 = tail call <16 x i8> @llvm.mips.xori.b(<16 x i8> %0, i32 25)
+  store <16 x i8> %1, <16 x i8>* @llvm_mips_xori_b_RES
+  ret void
+}
+
+declare <16 x i8> @llvm.mips.xori.b(<16 x i8>, i32) nounwind
+
+; CHECK: llvm_mips_xori_b_test:
+; CHECK: ld.b
+; CHECK: xori.b
+; CHECK: st.b
+; CHECK: .size llvm_mips_xori_b_test
+;
diff --git a/test/CodeGen/Mips/msa/inline-asm.ll b/test/CodeGen/Mips/msa/inline-asm.ll
new file mode 100644
index 000000000000..4a34273f3c00
--- /dev/null
+++ b/test/CodeGen/Mips/msa/inline-asm.ll
@@ -0,0 +1,34 @@
+; A basic inline assembly test
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@v4i32_r  = global <4 x i32> zeroinitializer, align 16
+
+define void @test1() nounwind {
+entry:
+  ; CHECK-LABEL: test1:
+  %0 = call <4 x i32> asm "ldi.w ${0:w}, 1", "=f"()
+  ; CHECK: ldi.w $w{{[1-3]?[0-9]}}, 1
+  store <4 x i32> %0, <4 x i32>* @v4i32_r
+  ret void
+}
+
+define void @test2() nounwind {
+entry:
+  ; CHECK-LABEL: test2:
+  %0 = load <4 x i32>* @v4i32_r
+  %1 = call <4 x i32> asm "addvi.w ${0:w}, ${1:w}, 1", "=f,f"(<4 x i32> %0)
+  ; CHECK: addvi.w $w{{[1-3]?[0-9]}}, $w{{[1-3]?[0-9]}}, 1
+  store <4 x i32> %1, <4 x i32>* @v4i32_r
+  ret void
+}
+
+define void @test3() nounwind {
+entry:
+  ; CHECK-LABEL: test3:
+  %0 = load <4 x i32>* @v4i32_r
+  %1 = call <4 x i32> asm sideeffect "addvi.w ${0:w}, ${1:w}, 1", "=f,f,~{$w0}"(<4 x i32> %0)
+  ; CHECK: addvi.w $w{{([1-9]|[1-3][0-9])}}, $w{{([1-9]|[1-3][0-9])}}, 1
+  store <4 x i32> %1, <4 x i32>* @v4i32_r
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
new file mode 100644
index 000000000000..4beaaa9c1841
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
@@ -0,0 +1,134 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Unexpected illegal type!" assertion.
+; It should at least successfully build.
+
+define void @autogen_SD1704963983(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <4 x double>
+  %A3 = alloca <8 x i64>
+  %A2 = alloca <1 x double>
+  %A1 = alloca double
+  %A = alloca i32
+  %L = load i8* %0
+  store i8 77, i8* %0
+  %E = extractelement <8 x i64> zeroinitializer, i32 2
+  %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15, i32 1, i32 3>
+  %I = insertelement <8 x i64> zeroinitializer, i64 %E, i32 7
+  %Sl = select i1 false, i8* %0, i8* %0
+  %Cmp = icmp eq i32 434069, 272505
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF78, %BB
+  %L5 = load i8* %Sl
+  store i8 %L, i8* %Sl
+  %E6 = extractelement <8 x i32> zeroinitializer, i32 2
+  %Shuff7 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+  %I8 = insertelement <8 x i64> zeroinitializer, i64 %4, i32 7
+  %B = shl <1 x i16> zeroinitializer, zeroinitializer
+  %FC = sitofp <8 x i64> zeroinitializer to <8 x float>
+  %Sl9 = select i1 %Cmp, i8 77, i8 77
+  %Cmp10 = icmp uge <8 x i64> %Shuff, zeroinitializer
+  %L11 = load i8* %0
+  store i8 %Sl9, i8* %0
+  %E12 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 undef, i32 3, i32 5, i32 7>
+  %I14 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 3
+  %B15 = udiv <1 x i16> %B, zeroinitializer
+  %Tr = trunc <8 x i64> %Shuff to <8 x i32>
+  %Sl16 = select i1 %Cmp, i8 77, i8 %5
+  %Cmp17 = icmp ult <8 x i1> %Cmp10, %Cmp10
+  %L18 = load i8* %Sl
+  store i8 -1, i8* %Sl
+  %E19 = extractelement <8 x i32> zeroinitializer, i32 3
+  %Shuff20 = shufflevector <8 x float> %FC, <8 x float> %FC, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 undef>
+  %I21 = insertelement <8 x i64> %Shuff13, i64 %E, i32 0
+  %B22 = urem <8 x i64> %Shuff7, %I21
+  %FC23 = sitofp i32 50347 to float
+  %Sl24 = select i1 %Cmp, double 0.000000e+00, double 0.000000e+00
+  %Cmp25 = icmp ugt i32 465489, 47533
+  br i1 %Cmp25, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF
+  %L26 = load i8* %Sl
+  store i32 50347, i32* %A
+  %E27 = extractelement <8 x i1> %Cmp10, i32 2
+  br i1 %E27, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF81, %CF78
+  %Shuff28 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+  %I29 = insertelement <1 x i16> zeroinitializer, i16 -1, i32 0
+  %B30 = urem <8 x i32> %Tr, zeroinitializer
+  %Tr31 = trunc i32 0 to i16
+  %Sl32 = select i1 %Cmp, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
+  %L33 = load i8* %Sl
+  store i8 %L26, i8* %Sl
+  %E34 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> %B, <1 x i32> undef
+  %I36 = insertelement <8 x i64> %Shuff28, i64 %E, i32 7
+  %B37 = srem <1 x i16> %I29, zeroinitializer
+  %FC38 = sitofp <8 x i32> %B30 to <8 x double>
+  %Sl39 = select i1 %Cmp, double 0.000000e+00, double %Sl24
+  %L40 = load i8* %Sl
+  store i8 %Sl16, i8* %Sl
+  %E41 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff42 = shufflevector <8 x i1> %Cmp17, <8 x i1> %Cmp10, <8 x i32> <i32 14, i32 undef, i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12>
+  %I43 = insertelement <4 x i32> zeroinitializer, i32 272505, i32 0
+  %B44 = urem <8 x i32> %B30, %Tr
+  %PC = bitcast i8* %0 to i64*
+  %Sl45 = select i1 %Cmp, <8 x i1> %Cmp10, <8 x i1> %Shuff42
+  %Cmp46 = fcmp ugt float 0xB856238A00000000, 0x47DA795E40000000
+  br i1 %Cmp46, label %CF77, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF77
+  %L47 = load i64* %PC
+  store i8 77, i8* %Sl
+  %E48 = extractelement <8 x i64> zeroinitializer, i32 2
+  %Shuff49 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff7, <8 x i32> <i32 5, i32 7, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 3>
+  %I50 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+  %B51 = fdiv float 0x46CC2D8000000000, %FC23
+  %PC52 = bitcast <8 x i64>* %A3 to i64*
+  %Sl53 = select i1 %Cmp, <8 x i64> %Shuff, <8 x i64> %Shuff
+  %Cmp54 = fcmp ole float 0x47DA795E40000000, 0xB856238A00000000
+  br i1 %Cmp54, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF80
+  %L55 = load i8* %Sl
+  store i8 %Sl16, i8* %Sl
+  %E56 = extractelement <1 x i16> %B, i32 0
+  %Shuff57 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> <i32 1>
+  %I58 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+  %B59 = srem i32 %E19, %E19
+  %Sl60 = select i1 %Cmp, i8 77, i8 77
+  %Cmp61 = icmp ult <1 x i16> zeroinitializer, %B
+  %L62 = load i8* %Sl
+  store i64 %L47, i64* %PC52
+  %E63 = extractelement <4 x i32> %I43, i32 2
+  %Shuff64 = shufflevector <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3>
+  %I65 = insertelement <8 x i64> %B22, i64 %L47, i32 7
+  %B66 = add <8 x i64> %I50, %I65
+  %FC67 = uitofp i16 %E12 to float
+  %Sl68 = select i1 %Cmp, <8 x i32> %B30, <8 x i32> zeroinitializer
+  %Cmp69 = fcmp ord double 0.000000e+00, 0.000000e+00
+  br i1 %Cmp69, label %CF77, label %CF79
+
+CF79:                                             ; preds = %CF81
+  %L70 = load i32* %A
+  store i64 %4, i64* %PC
+  %E71 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff72 = shufflevector <8 x i32> zeroinitializer, <8 x i32> %B44, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 undef, i32 7, i32 9>
+  %I73 = insertelement <8 x i16> zeroinitializer, i16 %E12, i32 5
+  %B74 = fsub double 0.000000e+00, 0.000000e+00
+  %Sl75 = select i1 %Cmp46, i32 %E6, i32 %E19
+  %Cmp76 = icmp ugt <4 x i32> %I43, zeroinitializer
+  store i8 %L, i8* %Sl
+  store i64 %L47, i64* %PC
+  store i64 %L47, i64* %PC
+  store i8 %L5, i8* %Sl
+  store i8 %L5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll b/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
new file mode 100644
index 000000000000..f9cab037e7cc
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
@@ -0,0 +1,138 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; `Opc && "Cannot copy registers"' assertion.
+; It should at least successfully build.
+
+define void @autogen_SD1935737938(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i64
+  %A3 = alloca <4 x i32>
+  %A2 = alloca i64
+  %A1 = alloca i32
+  %A = alloca <2 x i64>
+  %L = load i8* %0
+  store i8 -1, i8* %0
+  %E = extractelement <2 x i32> zeroinitializer, i32 0
+  %Shuff = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I = insertelement <1 x i64> <i64 -1>, i64 286689, i32 0
+  %B = lshr i8 %L, -69
+  %ZE = fpext float 0xBF2AA5FE80000000 to double
+  %Sl = select i1 true, <1 x i64> <i64 -1>, <1 x i64> <i64 -1>
+  %L5 = load i8* %0
+  store i8 -69, i8* %0
+  %E6 = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
+  %Shuff7 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I8 = insertelement <2 x i32> zeroinitializer, i32 135673, i32 1
+  %B9 = udiv i8 %B, %B
+  %FC = uitofp i32 %3 to double
+  %Sl10 = select i1 true, <1 x i1> zeroinitializer, <1 x i1> zeroinitializer
+  %Cmp = icmp ne <1 x i64> %I, <i64 -1>
+  %L11 = load i8* %0
+  store i8 %L11, i8* %0
+  %E12 = extractelement <1 x i64> <i64 -1>, i32 0
+  %Shuff13 = shufflevector <1 x i64> %Sl, <1 x i64> <i64 -1>, <1 x i32> <i32 1>
+  %I14 = insertelement <1 x i64> %I, i64 303290, i32 0
+  %B15 = frem float 0.000000e+00, 0.000000e+00
+  %Sl16 = select i1 true, <1 x i1> %Cmp, <1 x i1> zeroinitializer
+  %Cmp17 = fcmp one float 0xBD946F9840000000, %B15
+  br label %CF74
+
+CF74:                                             ; preds = %CF74, %CF80, %CF76, %BB
+  %L18 = load i8* %0
+  store i8 -69, i8* %0
+  %E19 = extractelement <1 x i64> %Sl, i32 0
+  %Shuff20 = shufflevector <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10>
+  %I21 = insertelement <2 x i32> %Shuff, i32 135673, i32 0
+  %B22 = urem i32 135673, %3
+  %FC23 = sitofp i8 %L to float
+  %Sl24 = select i1 true, i8 %B, i8 %L18
+  %L25 = load i8* %0
+  store i8 %L, i8* %0
+  %E26 = extractelement <2 x i32> %Shuff, i32 1
+  %Shuff27 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 2, i32 0>
+  %I28 = insertelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i64 %E12, i32 8
+  %B29 = frem double %ZE, 0x235104F0E94F406E
+  %Tr = trunc i64 286689 to i8
+  %Sl30 = select i1 true, float 0x45B13EA500000000, float %B15
+  %Cmp31 = icmp eq i32 %B22, %B22
+  br i1 %Cmp31, label %CF74, label %CF80
+
+CF80:                                             ; preds = %CF74
+  %L32 = load i8* %0
+  store i8 -1, i8* %0
+  %E33 = extractelement <2 x i32> zeroinitializer, i32 1
+  %Shuff34 = shufflevector <1 x i64> %Shuff13, <1 x i64> <i64 -1>, <1 x i32> zeroinitializer
+  %I35 = insertelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 -1, i32 0
+  %FC36 = sitofp <1 x i1> %Cmp to <1 x float>
+  %Sl37 = select i1 true, <8 x i8> %Shuff20, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Cmp38 = icmp sgt <2 x i32> %I21, %Shuff27
+  %L39 = load i8* %0
+  store i8 %Sl24, i8* %0
+  %E40 = extractelement <8 x i64> zeroinitializer, i32 1
+  %Shuff41 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Cmp38, <2 x i32> <i32 0, i32 2>
+  %I42 = insertelement <4 x i32> zeroinitializer, i32 414573, i32 2
+  %B43 = srem i8 %L5, %L39
+  %Sl44 = select i1 %Cmp17, i8 %L, i8 %L
+  %Cmp45 = fcmp une float 0x3AFCE1A0C0000000, 0.000000e+00
+  br i1 %Cmp45, label %CF74, label %CF76
+
+CF76:                                             ; preds = %CF80
+  %L46 = load i8* %0
+  store i8 %L39, i8* %0
+  %E47 = extractelement <2 x i32> %Shuff27, i32 0
+  %Shuff48 = shufflevector <1 x i1> %Sl10, <1 x i1> %Sl10, <1 x i32> <i32 1>
+  %I49 = insertelement <1 x i64> <i64 -1>, i64 %E12, i32 0
+  %FC50 = fptosi double 0x235104F0E94F406E to i32
+  %Sl51 = select i1 %Cmp17, <16 x i64> %I28, <16 x i64> %I28
+  %Cmp52 = icmp ne i8 %Tr, %Sl24
+  br i1 %Cmp52, label %CF74, label %CF75
+
+CF75:                                             ; preds = %CF75, %CF76
+  %L53 = load i8* %0
+  store i8 %L18, i8* %0
+  %E54 = extractelement <8 x i8> %Shuff20, i32 5
+  %Shuff55 = shufflevector <2 x i32> %Shuff, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %I56 = insertelement <4 x i32> %I42, i32 %B22, i32 2
+  %B57 = sub i64 %E40, %E6
+  %Sl58 = select i1 true, i64 303290, i64 %E40
+  %Cmp59 = icmp slt i64 %E40, %E6
+  br i1 %Cmp59, label %CF75, label %CF78
+
+CF78:                                             ; preds = %CF75
+  %L60 = load i8* %0
+  store i8 -69, i8* %0
+  %E61 = extractelement <2 x i32> zeroinitializer, i32 0
+  %Shuff62 = shufflevector <2 x i32> %Shuff7, <2 x i32> %I21, <2 x i32> <i32 1, i32 3>
+  %I63 = insertelement <1 x i1> %Sl16, i1 %Cmp45, i32 0
+  %B64 = and i8 %Sl44, -69
+  %ZE65 = zext <1 x i1> %Shuff48 to <1 x i64>
+  %Sl66 = select i1 true, <1 x i64> %I, <1 x i64> %I49
+  %Cmp67 = icmp ugt i64 286689, %E40
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF78
+  %L68 = load i8* %0
+  store i64 %B57, i64* %2
+  %E69 = extractelement <2 x i1> %Shuff41, i32 1
+  br i1 %E69, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF
+  %Shuff70 = shufflevector <1 x i64> %Shuff34, <1 x i64> <i64 -1>, <1 x i32> zeroinitializer
+  %I71 = insertelement <2 x i32> %Shuff, i32 %E26, i32 0
+  %Se = sext i8 %L60 to i32
+  %Sl72 = select i1 %Cmp45, <2 x i32> %Shuff62, <2 x i32> %I71
+  %Cmp73 = fcmp ugt double 0x235104F0E94F406E, 0x235104F0E94F406E
+  br i1 %Cmp73, label %CF77, label %CF79
+
+CF79:                                             ; preds = %CF77
+  store i8 %L18, i8* %0
+  store i8 %E54, i8* %0
+  store i8 %L39, i8* %0
+  store i8 %L39, i8* %0
+  store i8 %B, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s2090927243-simplified.ll b/test/CodeGen/Mips/msa/llvm-stress-s2090927243-simplified.ll
new file mode 100644
index 000000000000..38113143e6d5
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s2090927243-simplified.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a "Cannot select ..." error.
+; This was because undef's are ignored when checking if a vector constant is a
+; splat, but are legalized to zero if left in the DAG which changes the constant
+; into a non-splat.
+;
+; It should at least successfully build.
+
+define void @autogen_SD2090927243() {
+BB:
+  br label %CF77
+
+CF77:                                             ; preds = %CF77, %CF80
+  %Shuff27 = shufflevector <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>,
+                           <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>,
+                           <16 x i32> <i32 27, i32 29, i32 31, i32 1, i32 3, i32 5, i32 undef, i32 9, i32 11, i32 13, i32 undef, i32 17, i32 19, i32 21, i32 23, i32 undef>
+  %ZE30 = zext <16 x i8> %Shuff27 to <16 x i32>
+  %Cmp32 = fcmp ueq float undef, 0x3CDA6E5E40000000
+  br i1 %Cmp32, label %CF77, label %CF
+
+CF:                                               ; preds = %CF, %CF81
+  %E48 = extractelement <16 x i32> %ZE30, i32 14
+  br i1 undef, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s2501752154-simplified.ll b/test/CodeGen/Mips/msa/llvm-stress-s2501752154-simplified.ll
new file mode 100644
index 000000000000..564ad7436d30
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s2501752154-simplified.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a "Cannot select ..." error.
+; This happened because the legalizer treated undef's in the <4 x float>
+; constant as equivalent to the defined elements when checking if it a constant
+; splat, but then proceeded to legalize the undef's to zero, leaving it as a
+; non-splat that cannot be selected. It should have eliminated the undef's by
+; rewriting the splat constant.
+
+; It should at least successfully build.
+
+define void @autogen_SD2501752154() {
+BB:
+  %BC = bitcast <4 x i32> <i32 -1, i32 -1, i32 undef, i32 undef> to <4 x float>
+  br label %CF74
+
+CF74:                                             ; preds = %CF74, %CF
+  %E54 = extractelement <1 x i1> undef, i32 0
+  br i1 %E54, label %CF74, label %CF79
+
+CF79:                                             ; preds = %CF75
+  %I63 = insertelement <4 x float> %BC, float undef, i32 0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll b/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
new file mode 100644
index 000000000000..e14f405320cb
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
@@ -0,0 +1,141 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA after dereferencing a null this pointer.
+; It should at least successfully build.
+
+define void @autogen_SD2704903805(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i32
+  %A3 = alloca i32
+  %A2 = alloca i8
+  %A1 = alloca i32
+  %A = alloca i8
+  %L = load i8* %0
+  store i8 %5, i8* %0
+  %E = extractelement <2 x i16> zeroinitializer, i32 0
+  %Shuff = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> undef
+  %I = insertelement <1 x i8> <i8 -1>, i8 85, i32 0
+  %B = lshr <2 x i16> zeroinitializer, zeroinitializer
+  %FC = sitofp <4 x i16> zeroinitializer to <4 x float>
+  %Sl = select i1 true, float 0.000000e+00, float 0x401E76A240000000
+  %Cmp = icmp ule i16 -25210, %E
+  br label %CF83
+
+CF83:                                             ; preds = %BB
+  %L5 = load i8* %0
+  store i8 85, i8* %0
+  %E6 = extractelement <1 x i8> <i8 -1>, i32 0
+  %Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I8 = insertelement <4 x i16> zeroinitializer, i16 %E, i32 3
+  %B9 = ashr <2 x i16> %Shuff7, zeroinitializer
+  %FC10 = sitofp i32 -1 to float
+  %Sl11 = select i1 %Cmp, i32 -1, i32 -1
+  %Cmp12 = icmp sgt i32 -1, -1
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF81, %CF83
+  %L13 = load i8* %0
+  store i8 0, i8* %0
+  %E14 = extractelement <2 x i64> zeroinitializer, i32 0
+  %Shuff15 = shufflevector <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
+  %I16 = insertelement <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i64 81222, i32 1
+  %B17 = lshr <2 x i16> zeroinitializer, %B
+  %Tr = trunc i32 272597 to i1
+  br i1 %Tr, label %CF, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF
+  %Sl18 = select i1 %Cmp, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer
+  %Cmp19 = icmp ne i1 %Cmp12, %Cmp
+  br i1 %Cmp19, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF80
+  %L20 = load i8* %0
+  store i8 85, i8* %0
+  %E21 = extractelement <1 x i8> <i8 -1>, i32 0
+  %Shuff22 = shufflevector <1 x i8> <i8 -1>, <1 x i8> %Shuff, <1 x i32> zeroinitializer
+  %I23 = insertelement <1 x i8> <i8 -1>, i8 %L5, i32 0
+  %FC24 = fptoui <4 x float> %FC to <4 x i16>
+  %Sl25 = select i1 %Cmp, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 -1>
+  %Cmp26 = icmp ult <4 x i64> %I16, %Shuff15
+  %L27 = load i8* %0
+  store i8 %L, i8* %0
+  %E28 = extractelement <1 x i8> <i8 -1>, i32 0
+  %Shuff29 = shufflevector <8 x i16> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 5, i32 undef, i32 9>
+  %I30 = insertelement <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i64 %E14, i32 1
+  %B31 = mul i8 %E28, 85
+  %PC = bitcast i32* %A3 to i32*
+  %Sl32 = select i1 %Cmp12, float %FC10, float 0x4712BFE680000000
+  %L33 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E34 = extractelement <2 x i16> zeroinitializer, i32 1
+  %Shuff35 = shufflevector <1 x i8> %Shuff, <1 x i8> <i8 -1>, <1 x i32> zeroinitializer
+  %I36 = insertelement <1 x i8> <i8 -1>, i8 %L13, i32 0
+  %B37 = xor i8 %L27, %L
+  %Sl38 = select i1 %Cmp, i16 %E34, i16 %E
+  %Cmp39 = icmp eq i1 %Cmp19, %Cmp
+  br i1 %Cmp39, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF81
+  %L40 = load i32* %PC
+  store i32 %3, i32* %PC
+  %E41 = extractelement <2 x i32> zeroinitializer, i32 0
+  %Shuff42 = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I43 = insertelement <1 x i8> <i8 -1>, i8 0, i32 0
+  %B44 = or i16 %E, -25210
+  %Se = sext i32 %3 to i64
+  %Sl45 = select i1 true, <1 x i8> %Shuff, <1 x i8> %I43
+  %Cmp46 = icmp sge <1 x i8> %I36, %Shuff
+  %L47 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E48 = extractelement <2 x i16> zeroinitializer, i32 0
+  %Shuff49 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
+  %I50 = insertelement <2 x i32> %Sl25, i32 47963, i32 1
+  %B51 = srem <1 x i8> %I, %Shuff22
+  %FC52 = sitofp i8 %5 to double
+  %Sl53 = select i1 %Cmp39, i8 %L27, i8 85
+  %Cmp54 = icmp slt i16 %E34, %E34
+  br i1 %Cmp54, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF77
+  %L55 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E56 = extractelement <8 x i16> %Shuff29, i32 4
+  %Shuff57 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
+  %I58 = insertelement <1 x i8> %B51, i8 %Sl53, i32 0
+  %ZE = fpext float %FC10 to double
+  %Sl59 = select i1 %Cmp12, <2 x i16> %B9, <2 x i16> zeroinitializer
+  %Cmp60 = fcmp ult double 0.000000e+00, 0.000000e+00
+  br i1 %Cmp60, label %CF78, label %CF79
+
+CF79:                                             ; preds = %CF79, %CF78
+  %L61 = load i32* %PC
+  store i32 %L33, i32* %A3
+  %E62 = extractelement <4 x i64> %Shuff15, i32 1
+  %Shuff63 = shufflevector <8 x i16> %Shuff29, <8 x i16> %Shuff29, <8 x i32> <i32 undef, i32 10, i32 12, i32 undef, i32 undef, i32 undef, i32 4, i32 6>
+  %I64 = insertelement <2 x i64> zeroinitializer, i64 %Se, i32 0
+  %B65 = shl i8 %5, 85
+  %ZE66 = zext <4 x i1> %Cmp26 to <4 x i32>
+  %Sl67 = select i1 %Tr, <1 x i8> %Shuff, <1 x i8> %I23
+  %Cmp68 = fcmp olt float 0x4712BFE680000000, 0x4712BFE680000000
+  br i1 %Cmp68, label %CF79, label %CF82
+
+CF82:                                             ; preds = %CF79
+  %L69 = load i32* %PC
+  store i32 %L33, i32* %PC
+  %E70 = extractelement <8 x i16> zeroinitializer, i32 3
+  %Shuff71 = shufflevector <4 x i64> %Shuff15, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 6, i32 undef, i32 2, i32 4>
+  %I72 = insertelement <1 x i8> <i8 -1>, i8 %L, i32 0
+  %B73 = srem i64 %E62, %Se
+  %ZE74 = zext <4 x i1> %Cmp26 to <4 x i32>
+  %Sl75 = select i1 %Cmp, i32 463279, i32 %L61
+  %Cmp76 = icmp sgt <1 x i8> %Shuff49, %Shuff22
+  store i8 %B31, i8* %0
+  store i8 85, i8* %0
+  store i32 %L33, i32* %PC
+  store i8 %B65, i8* %0
+  store i8 %L5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
new file mode 100644
index 000000000000..1a03e55d9d54
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
@@ -0,0 +1,149 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Don't know how to expand this condition!" unreachable.
+; It should at least successfully build.
+
+define void @autogen_SD3861334421(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <2 x i32>
+  %A3 = alloca <2 x double>
+  %A2 = alloca i64
+  %A1 = alloca i64
+  %A = alloca double
+  %L = load i8* %0
+  store i8 -101, i8* %0
+  %E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+  %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1>
+  %I = insertelement <8 x i64> zeroinitializer, i64 %4, i32 5
+  %B = and i64 116376, 57247
+  %FC = uitofp i8 7 to double
+  %Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %L5 = load i8* %0
+  store i8 %L, i8* %0
+  %E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3
+  %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+  %I8 = insertelement <8 x i8> %Sl, i8 7, i32 4
+  %B9 = or <8 x i64> zeroinitializer, zeroinitializer
+  %Sl10 = select i1 false, i64 116376, i64 380809
+  %Cmp = icmp sgt i32 394647, 17081
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %L11 = load i8* %0
+  store i8 -87, i8* %0
+  %E12 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5>
+  %I14 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 1
+  %B15 = srem i64 %Sl10, 380809
+  %FC16 = sitofp i64 57247 to float
+  %Sl17 = select i1 false, double 0x87A9374869A78EC6, double 0.000000e+00
+  %Cmp18 = icmp uge i8 %L, %5
+  br i1 %Cmp18, label %CF, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF88, %CF
+  %L19 = load i8* %0
+  store i8 -101, i8* %0
+  %E20 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+  %I22 = insertelement <4 x i64> zeroinitializer, i64 127438, i32 1
+  %B23 = fdiv double %Sl17, 0.000000e+00
+  %Sl24 = select i1 %Cmp18, i32 420510, i32 492085
+  %Cmp25 = icmp ugt i1 %Cmp18, false
+  br i1 %Cmp25, label %CF80, label %CF83
+
+CF83:                                             ; preds = %CF83, %CF80
+  %L26 = load i8* %0
+  store i8 -87, i8* %0
+  %E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+  %Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+  %I29 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 492085, i32 1
+  %B30 = lshr <8 x i8> %I8, %I8
+  %FC31 = sitofp <4 x i32> %Shuff28 to <4 x double>
+  %Sl32 = select i1 false, <8 x i8> %I8, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Cmp33 = icmp eq i64 %B, 116376
+  br i1 %Cmp33, label %CF83, label %CF88
+
+CF88:                                             ; preds = %CF83
+  %L34 = load i8* %0
+  store i8 -87, i8* %0
+  %E35 = extractelement <8 x i64> %Shuff, i32 7
+  %Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0>
+  %I37 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 0
+  %B38 = xor <8 x i64> %B9, %B9
+  %ZE = zext i32 0 to i64
+  %Sl39 = select i1 %Cmp33, i8 %L11, i8 %L5
+  %Cmp40 = icmp sgt i1 %Cmp, false
+  br i1 %Cmp40, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF81, %CF85, %CF87, %CF88
+  %L41 = load i8* %0
+  store i8 %L34, i8* %0
+  %E42 = extractelement <8 x i64> %Shuff13, i32 6
+  %Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7>
+  %I44 = insertelement <4 x i64> zeroinitializer, i64 116376, i32 3
+  %B45 = fsub float %FC16, 0x3AC86DCC40000000
+  %Tr = trunc <4 x i64> %I14 to <4 x i32>
+  %Sl46 = select i1 false, <8 x i64> %B38, <8 x i64> zeroinitializer
+  %Cmp47 = icmp sgt i1 %Cmp18, %Cmp18
+  br i1 %Cmp47, label %CF81, label %CF85
+
+CF85:                                             ; preds = %CF81
+  %L48 = load i8* %0
+  store i8 -101, i8* %0
+  %E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2
+  %Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+  %I51 = insertelement <4 x i64> zeroinitializer, i64 %E20, i32 3
+  %B52 = or i32 336955, %Sl24
+  %FC53 = uitofp i8 %L48 to double
+  %Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24
+  %Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer
+  %L56 = load i8* %0
+  store i8 %L11, i8* %0
+  %E57 = extractelement <4 x i64> %Shuff21, i32 1
+  %Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2>
+  %I59 = insertelement <4 x i64> zeroinitializer, i64 %E42, i32 2
+  %B60 = udiv <8 x i8> %Sl, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Tr61 = trunc i8 49 to i1
+  br i1 %Tr61, label %CF81, label %CF84
+
+CF84:                                             ; preds = %CF84, %CF85
+  %Sl62 = select i1 false, i8 %L, i8 %L48
+  %Cmp63 = icmp ne <8 x i64> %I, zeroinitializer
+  %L64 = load i8* %0
+  store i8 %5, i8* %0
+  %E65 = extractelement <8 x i1> %Cmp55, i32 0
+  br i1 %E65, label %CF84, label %CF87
+
+CF87:                                             ; preds = %CF84
+  %Shuff66 = shufflevector <4 x i64> %Shuff21, <4 x i64> %I14, <4 x i32> <i32 3, i32 undef, i32 7, i32 1>
+  %I67 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %Sl54, i32 1
+  %B68 = frem double %B23, %Sl17
+  %ZE69 = zext <8 x i8> %Sl32 to <8 x i64>
+  %Sl70 = select i1 %Tr61, i64 %E20, i64 %E12
+  %Cmp71 = icmp slt <8 x i64> %I, %Shuff
+  %L72 = load i8* %0
+  store i8 %L72, i8* %0
+  %E73 = extractelement <8 x i1> %Cmp55, i32 6
+  br i1 %E73, label %CF81, label %CF82
+
+CF82:                                             ; preds = %CF82, %CF87
+  %Shuff74 = shufflevector <4 x i32> %I67, <4 x i32> %I29, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
+  %I75 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 3
+  %B76 = fsub double 0.000000e+00, %FC53
+  %Tr77 = trunc i32 %E to i8
+  %Sl78 = select i1 %Cmp18, i64* %A2, i64* %2
+  %Cmp79 = icmp eq i32 394647, 492085
+  br i1 %Cmp79, label %CF82, label %CF86
+
+CF86:                                             ; preds = %CF82
+  store i64 %Sl70, i64* %Sl78
+  store i64 %E57, i64* %Sl78
+  store i64 %Sl70, i64* %Sl78
+  store i64 %B, i64* %Sl78
+  store i64 %Sl10, i64* %Sl78
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
new file mode 100644
index 000000000000..96547d90cb40
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
@@ -0,0 +1,143 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Type for zero vector elements is not legal" assertion.
+; It should at least successfully build.
+
+define void @autogen_SD3926023935(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i1
+  %A3 = alloca float
+  %A2 = alloca double
+  %A1 = alloca float
+  %A = alloca double
+  %L = load i8* %0
+  store i8 -123, i8* %0
+  %E = extractelement <4 x i64> zeroinitializer, i32 1
+  %Shuff = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %I = insertelement <2 x i1> zeroinitializer, i1 false, i32 0
+  %BC = bitcast i64 181325 to double
+  %Sl = select i1 false, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
+  %Cmp = icmp ne <4 x i64> zeroinitializer, zeroinitializer
+  %L5 = load i8* %0
+  store i8 %L, i8* %0
+  %E6 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 2, i32 0>
+  %I8 = insertelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i64 498254, i32 4
+  %B = shl i32 0, 364464
+  %Sl9 = select i1 false, i64 %E, i64 498254
+  %Cmp10 = icmp sge i8 -123, %5
+  br label %CF80
+
+CF80:                                             ; preds = %BB
+  %L11 = load i8* %0
+  store i8 -123, i8* %0
+  %E12 = extractelement <2 x i16> zeroinitializer, i32 1
+  %Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %I14 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %B, i32 2
+  %B15 = sdiv i64 334618, -1
+  %PC = bitcast i1* %A4 to i64*
+  %Sl16 = select i1 %Cmp10, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  %Cmp17 = icmp ule <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %Sl16
+  %L18 = load double* %A2
+  store i64 498254, i64* %PC
+  %E19 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff20 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
+  %I21 = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
+  %B22 = fadd double 0.000000e+00, %BC
+  %ZE = zext <2 x i1> %Shuff20 to <2 x i32>
+  %Sl23 = select i1 %Cmp10, <2 x i1> %Shuff20, <2 x i1> zeroinitializer
+  %Cmp24 = icmp ult <2 x i32> zeroinitializer, zeroinitializer
+  %L25 = load i8* %0
+  store i8 %L25, i8* %0
+  %E26 = extractelement <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, i32 3
+  %Shuff27 = shufflevector <4 x i32> %Shuff, <4 x i32> %I14, <4 x i32> <i32 6, i32 0, i32 undef, i32 4>
+  %I28 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 0
+  %B29 = lshr i8 %E26, -43
+  %Tr = trunc i8 %L5 to i1
+  br label %CF79
+
+CF79:                                             ; preds = %CF80
+  %Sl30 = select i1 false, i8 %B29, i8 -123
+  %Cmp31 = icmp sge <2 x i1> %I, %I
+  %L32 = load i64* %PC
+  store i8 -123, i8* %0
+  %E33 = extractelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 2
+  %Shuff34 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+  %I35 = insertelement <4 x i64> zeroinitializer, i64 498254, i32 3
+  %B36 = sub <8 x i64> %I8, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  %PC37 = bitcast i8* %0 to i1*
+  %Sl38 = select i1 %Cmp10, i8 -43, i8 %L5
+  %Cmp39 = icmp eq i64 498254, %B15
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF79
+  %L40 = load double* %A
+  store i1 %Cmp39, i1* %PC37
+  %E41 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff42 = shufflevector <2 x i32> zeroinitializer, <2 x i32> %ZE, <2 x i32> <i32 2, i32 undef>
+  %I43 = insertelement <4 x i32> %Shuff, i32 %3, i32 0
+  %B44 = shl i64 %E41, -1
+  %Se = sext <2 x i1> %I to <2 x i32>
+  %Sl45 = select i1 %Cmp10, i1 false, i1 false
+  br i1 %Sl45, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF
+  %Cmp46 = fcmp uno double 0.000000e+00, 0.000000e+00
+  br i1 %Cmp46, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF83, %CF82, %CF77
+  %L47 = load i64* %PC
+  store i8 -123, i8* %0
+  %E48 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff49 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
+  %I50 = insertelement <2 x i1> zeroinitializer, i1 %Cmp10, i32 0
+  %B51 = sdiv i64 %E19, 463132
+  %Tr52 = trunc i64 %E48 to i32
+  %Sl53 = select i1 %Tr, i1 %Cmp46, i1 %Cmp10
+  br i1 %Sl53, label %CF78, label %CF83
+
+CF83:                                             ; preds = %CF78
+  %Cmp54 = fcmp uge double %L40, %L40
+  br i1 %Cmp54, label %CF78, label %CF82
+
+CF82:                                             ; preds = %CF83
+  %L55 = load i64* %PC
+  store i64 %L32, i64* %PC
+  %E56 = extractelement <2 x i16> %Shuff7, i32 1
+  %Shuff57 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+  %I58 = insertelement <2 x i32> %Sl, i32 %Tr52, i32 0
+  %B59 = or i32 %B, %3
+  %FC = sitofp i64 498254 to double
+  %Sl60 = select i1 false, i64 %E6, i64 -1
+  %Cmp61 = icmp sgt <4 x i32> %Shuff27, %I43
+  %L62 = load i64* %PC
+  store i64 %Sl9, i64* %PC
+  %E63 = extractelement <2 x i32> %ZE, i32 0
+  %Shuff64 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
+  %I65 = insertelement <4 x i32> %Shuff, i32 %3, i32 3
+  %B66 = sub i64 %L47, 53612
+  %Tr67 = trunc i64 %4 to i32
+  %Sl68 = select i1 %Cmp39, i1 %Cmp39, i1 false
+  br i1 %Sl68, label %CF78, label %CF81
+
+CF81:                                             ; preds = %CF82
+  %Cmp69 = icmp ne <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36
+  %L70 = load i8* %0
+  store i64 %L55, i64* %PC
+  %E71 = extractelement <4 x i32> %Shuff49, i32 1
+  %Shuff72 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff34, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %I73 = insertelement <4 x i64> %Shuff64, i64 %E, i32 2
+  %B74 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36
+  %Sl75 = select i1 %Sl68, i64 %B51, i64 %L55
+  %Cmp76 = icmp sgt <8 x i64> %B74, %B36
+  store i1 %Cmp39, i1* %PC37
+  store i64 %E41, i64* %PC
+  store i64 %L32, i64* %PC
+  store i64 %Sl75, i64* %2
+  store i64 %L32, i64* %PC
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll b/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
new file mode 100644
index 000000000000..bef75f3645c8
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
@@ -0,0 +1,152 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed to select instructions for extract_vector_elt for
+; v4f32 on MSA.
+; It should at least successfully build.
+
+define void @autogen_SD3997499501(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <1 x double>
+  %A3 = alloca double
+  %A2 = alloca float
+  %A1 = alloca double
+  %A = alloca double
+  %L = load i8* %0
+  store i8 97, i8* %0
+  %E = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
+  %Shuff = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I = insertelement <4 x i64> zeroinitializer, i64 0, i32 3
+  %Tr = trunc <1 x i64> zeroinitializer to <1 x i8>
+  %Sl = select i1 false, double* %A1, double* %A
+  %Cmp = icmp ne <2 x i64> zeroinitializer, zeroinitializer
+  %L5 = load double* %Sl
+  store float -4.374162e+06, float* %A2
+  %E6 = extractelement <4 x i64> zeroinitializer, i32 3
+  %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
+  %I8 = insertelement <2 x i1> %Shuff, i1 false, i32 0
+  %B = ashr <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %PC = bitcast float* %A2 to float*
+  %Sl9 = select i1 false, i32 82299, i32 0
+  %Cmp10 = icmp slt i8 97, %5
+  br label %CF72
+
+CF72:                                             ; preds = %CF72, %CF80, %CF78, %BB
+  %L11 = load double* %Sl
+  store double 0.000000e+00, double* %Sl
+  %E12 = extractelement <2 x i1> zeroinitializer, i32 0
+  br i1 %E12, label %CF72, label %CF80
+
+CF80:                                             ; preds = %CF72
+  %Shuff13 = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 3, i32 1>
+  %I14 = insertelement <2 x i64> zeroinitializer, i64 %4, i32 1
+  %B15 = fadd double %L5, 0.000000e+00
+  %BC = bitcast i32 0 to float
+  %Sl16 = select i1 %E12, float 0xC7957ED940000000, float %BC
+  %Cmp17 = icmp eq i32 136082, 471909
+  br i1 %Cmp17, label %CF72, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF80
+  %L18 = load double* %Sl
+  store double 0.000000e+00, double* %Sl
+  %E19 = extractelement <2 x i1> zeroinitializer, i32 0
+  br i1 %E19, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF77
+  %Shuff20 = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %I21 = insertelement <8 x i1> zeroinitializer, i1 %Cmp10, i32 7
+  %B22 = sdiv <4 x i64> %Shuff7, zeroinitializer
+  %FC = uitofp i8 97 to double
+  %Sl23 = select i1 %Cmp10, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
+  %L24 = load double* %Sl
+  store float %Sl16, float* %PC
+  %E25 = extractelement <2 x i1> %Shuff, i32 1
+  br i1 %E25, label %CF72, label %CF76
+
+CF76:                                             ; preds = %CF78
+  %Shuff26 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %B22, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
+  %I27 = insertelement <4 x i64> zeroinitializer, i64 %E, i32 2
+  %B28 = mul <4 x i64> %I27, zeroinitializer
+  %ZE = zext <8 x i1> zeroinitializer to <8 x i64>
+  %Sl29 = select i1 %Cmp17, float -4.374162e+06, float -4.374162e+06
+  %L30 = load i8* %0
+  store double %L5, double* %Sl
+  %E31 = extractelement <8 x i1> zeroinitializer, i32 5
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF81, %CF76
+  %Shuff32 = shufflevector <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <16 x i32> <i32 8, i32 undef, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 undef, i32 26, i32 28, i32 30, i32 undef, i32 2, i32 4, i32 6>
+  %I33 = insertelement <8 x i1> zeroinitializer, i1 false, i32 2
+  %BC34 = bitcast <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> to <4 x float>
+  %Sl35 = select i1 %E12, <2 x i1> %I8, <2 x i1> zeroinitializer
+  %Cmp36 = fcmp oge double 0xC2C3BAE2D5C18360, 0xC2C3BAE2D5C18360
+  br i1 %Cmp36, label %CF, label %CF74
+
+CF74:                                             ; preds = %CF74, %CF
+  %L37 = load float* %PC
+  store double 0.000000e+00, double* %Sl
+  %E38 = extractelement <2 x i1> %Sl23, i32 1
+  br i1 %E38, label %CF74, label %CF75
+
+CF75:                                             ; preds = %CF75, %CF82, %CF74
+  %Shuff39 = shufflevector <2 x i1> %Shuff13, <2 x i1> zeroinitializer, <2 x i32> <i32 undef, i32 2>
+  %I40 = insertelement <4 x i64> zeroinitializer, i64 %4, i32 2
+  %Sl41 = select i1 %Cmp10, i32 0, i32 %3
+  %Cmp42 = icmp ne <1 x i64> zeroinitializer, zeroinitializer
+  %L43 = load double* %Sl
+  store i64 %4, i64* %2
+  %E44 = extractelement <2 x i1> %Shuff20, i32 1
+  br i1 %E44, label %CF75, label %CF82
+
+CF82:                                             ; preds = %CF75
+  %Shuff45 = shufflevector <2 x i1> %Sl23, <2 x i1> %Sl23, <2 x i32> <i32 2, i32 0>
+  %I46 = insertelement <4 x i64> zeroinitializer, i64 0, i32 0
+  %B47 = sub i64 %E, %E6
+  %Sl48 = select i1 %Cmp10, double %L5, double %L43
+  %Cmp49 = icmp uge i64 %4, %B47
+  br i1 %Cmp49, label %CF75, label %CF81
+
+CF81:                                             ; preds = %CF82
+  %L50 = load i8* %0
+  store double %L43, double* %Sl
+  %E51 = extractelement <4 x i64> %Shuff7, i32 3
+  %Shuff52 = shufflevector <4 x float> %BC34, <4 x float> %BC34, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+  %I53 = insertelement <2 x i1> %Cmp, i1 %E25, i32 0
+  %B54 = fdiv double %L24, %L43
+  %BC55 = bitcast <4 x i64> zeroinitializer to <4 x double>
+  %Sl56 = select i1 false, i8 %5, i8 97
+  %L57 = load i8* %0
+  store i8 %L50, i8* %0
+  %E58 = extractelement <2 x i1> %Shuff20, i32 1
+  br i1 %E58, label %CF, label %CF73
+
+CF73:                                             ; preds = %CF73, %CF81
+  %Shuff59 = shufflevector <2 x i1> %Shuff13, <2 x i1> %Shuff45, <2 x i32> <i32 undef, i32 0>
+  %I60 = insertelement <4 x float> %Shuff52, float -4.374162e+06, i32 0
+  %B61 = mul <4 x i64> %I46, zeroinitializer
+  %PC62 = bitcast double* %A3 to float*
+  %Sl63 = select i1 %Cmp10, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer
+  %Cmp64 = icmp ne <2 x i1> %Cmp, %Shuff
+  %L65 = load double* %A1
+  store float -4.374162e+06, float* %PC62
+  %E66 = extractelement <8 x i1> %I21, i32 3
+  br i1 %E66, label %CF73, label %CF79
+
+CF79:                                             ; preds = %CF79, %CF73
+  %Shuff67 = shufflevector <8 x i1> %I21, <8 x i1> %I21, <8 x i32> <i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 undef, i32 4>
+  %I68 = insertelement <1 x i1> %Cmp42, i1 %E25, i32 0
+  %B69 = sdiv <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  %Sl70 = select i1 %Cmp49, <2 x i1> %Sl23, <2 x i1> %Shuff45
+  %Cmp71 = icmp ne i1 false, false
+  br i1 %Cmp71, label %CF79, label %CF83
+
+CF83:                                             ; preds = %CF79
+  store double 0.000000e+00, double* %Sl
+  store float %BC, float* %PC62
+  store double %Sl48, double* %Sl
+  store double %FC, double* %Sl
+  store float %BC, float* %PC62
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll b/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll
new file mode 100644
index 000000000000..24e27cbf14b8
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s449609655-simplified.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test is based on an llvm-stress generated test case with seed=449609655
+
+; This test originally failed for MSA with a
+; "Comparison requires equal bit widths" assertion.
+; The legalizer legalized ; the <4 x i8>'s into <4 x i32>'s, then a call to
+; isVSplat() returned the splat value for <i8 -1, i8 -1, ...> as a 32-bit APInt
+; (255), but the zeroinitializer splat value as an 8-bit APInt (0). The
+; assertion occured when trying to check the values were bitwise inverses of
+; each-other.
+;
+; It should at least successfully build.
+
+define void @autogen_SD449609655(i8) {
+BB:
+  %Cmp = icmp ult i8 -3, %0
+  br label %CF78
+
+CF78:                                             ; preds = %CF81, %CF78, %BB
+  %Sl31 = select i1 %Cmp, <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8> zeroinitializer
+  br i1 undef, label %CF78, label %CF81
+
+CF81:                                             ; preds = %CF78
+  br i1 undef, label %CF78, label %CF80
+
+CF80:                                             ; preds = %CF81
+  %I59 = insertelement <4 x i8> %Sl31, i8 undef, i32 1
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll b/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
new file mode 100644
index 000000000000..697871df797d
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
@@ -0,0 +1,139 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; `Num < NumOperands && "Invalid child # of SDNode!"' assertion.
+; It should at least successfully build.
+
+define void @autogen_SD525530439(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca i32
+  %A3 = alloca double
+  %A2 = alloca <1 x double>
+  %A1 = alloca <8 x double>
+  %A = alloca i64
+  %L = load i8* %0
+  store i64 33695, i64* %A
+  %E = extractelement <4 x i32> zeroinitializer, i32 3
+  %Shuff = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 2, i32 0>
+  %I = insertelement <4 x i16> zeroinitializer, i16 -11642, i32 0
+  %B = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %ZE = fpext float 0x3B64A2B880000000 to double
+  %Sl = select i1 true, i16 -1, i16 -11642
+  %L5 = load i8* %0
+  store i8 0, i8* %0
+  %E6 = extractelement <4 x i32> zeroinitializer, i32 2
+  %Shuff7 = shufflevector <8 x i1> zeroinitializer, <8 x i1> zeroinitializer, <8 x i32> <i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 undef>
+  %I8 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 3
+  %B9 = sub i32 71140, 439732
+  %BC = bitcast <2 x i32> <i32 -1, i32 -1> to <2 x float>
+  %Sl10 = select i1 true, i32* %1, i32* %1
+  %Cmp = icmp sge <8 x i64> zeroinitializer, zeroinitializer
+  %L11 = load i32* %Sl10
+  store <1 x double> zeroinitializer, <1 x double>* %A2
+  %E12 = extractelement <4 x i16> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i32> undef
+  %I14 = insertelement <1 x i16> zeroinitializer, i16 %Sl, i32 0
+  %B15 = or i16 -1, %E12
+  %BC16 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %Sl17 = select i1 true, i64 %4, i64 %4
+  %Cmp18 = fcmp ugt float 0xC5ABB1BF80000000, 0x3EEF3D6300000000
+  br label %CF75
+
+CF75:                                             ; preds = %CF75, %BB
+  %L19 = load i32* %Sl10
+  store i32 %L11, i32* %Sl10
+  %E20 = extractelement <4 x i32> zeroinitializer, i32 1
+  %Shuff21 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %I8, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %I22 = insertelement <4 x float> %BC16, float 0x3EEF3D6300000000, i32 2
+  %B23 = shl i32 71140, 439732
+  %ZE24 = fpext <4 x float> %I22 to <4 x double>
+  %Sl25 = select i1 %Cmp18, i32 %L11, i32 %L11
+  %Cmp26 = icmp ne i32 %E20, %L19
+  br i1 %Cmp26, label %CF75, label %CF76
+
+CF76:                                             ; preds = %CF75
+  %L27 = load i32* %Sl10
+  store i32 439732, i32* %Sl10
+  %E28 = extractelement <4 x i32> %Shuff21, i32 3
+  %Shuff29 = shufflevector <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0>
+  %I30 = insertelement <8 x i1> %Shuff7, i1 %Cmp18, i32 4
+  %Sl31 = select i1 %Cmp18, i32 %3, i32 %B23
+  %Cmp32 = icmp ugt i32 0, %3
+  br label %CF74
+
+CF74:                                             ; preds = %CF74, %CF80, %CF78, %CF76
+  %L33 = load i64* %2
+  store i32 71140, i32* %Sl10
+  %E34 = extractelement <4 x i32> zeroinitializer, i32 1
+  %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> undef
+  %I36 = insertelement <4 x i16> zeroinitializer, i16 -11642, i32 0
+  %B37 = mul <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %Shuff29
+  %Sl38 = select i1 %Cmp18, double 0.000000e+00, double 0x2BA9DB480DA732C6
+  %Cmp39 = icmp sgt i16 -11642, %Sl
+  br i1 %Cmp39, label %CF74, label %CF80
+
+CF80:                                             ; preds = %CF74
+  %L40 = load i8* %0
+  store i32 0, i32* %Sl10
+  %E41 = extractelement <8 x i64> zeroinitializer, i32 1
+  %Shuff42 = shufflevector <1 x i16> %I14, <1 x i16> %I14, <1 x i32> undef
+  %I43 = insertelement <4 x i16> %I36, i16 -11642, i32 0
+  %FC = fptoui float 0x455CA2B080000000 to i16
+  %Sl44 = select i1 %Cmp18, i1 %Cmp18, i1 %Cmp39
+  br i1 %Sl44, label %CF74, label %CF78
+
+CF78:                                             ; preds = %CF80
+  %L45 = load i32* %Sl10
+  store i8 %L5, i8* %0
+  %E46 = extractelement <8 x i1> %Shuff7, i32 2
+  br i1 %E46, label %CF74, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF78
+  %Shuff47 = shufflevector <4 x i16> %I43, <4 x i16> zeroinitializer, <4 x i32> <i32 5, i32 undef, i32 1, i32 3>
+  %I48 = insertelement <1 x i16> %Shuff42, i16 %Sl, i32 0
+  %B49 = mul i8 0, %L40
+  %FC50 = uitofp i32 %3 to double
+  %Sl51 = select i1 %Sl44, i32 %L27, i32 0
+  %Cmp52 = icmp sge i8 %B49, 0
+  br i1 %Cmp52, label %CF77, label %CF79
+
+CF79:                                             ; preds = %CF77
+  %L53 = load i32* %Sl10
+  store i8 %L40, i8* %0
+  %E54 = extractelement <4 x i32> zeroinitializer, i32 1
+  %Shuff55 = shufflevector <4 x i32> %Shuff21, <4 x i32> %I8, <4 x i32> <i32 4, i32 6, i32 undef, i32 2>
+  %I56 = insertelement <4 x i32> zeroinitializer, i32 %Sl51, i32 2
+  %Tr = trunc <1 x i64> %Shuff13 to <1 x i16>
+  %Sl57 = select i1 %Cmp18, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>
+  %Cmp58 = icmp uge <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %I56
+  %L59 = load i8* %0
+  store <1 x double> zeroinitializer, <1 x double>* %A2
+  %E60 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff61 = shufflevector <4 x i32> %I8, <4 x i32> %I8, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
+  %I62 = insertelement <4 x i16> zeroinitializer, i16 %E12, i32 1
+  %B63 = and <4 x i32> %Shuff61, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %PC = bitcast double* %A3 to i32*
+  %Sl64 = select i1 %Cmp18, <4 x i32> %Shuff61, <4 x i32> %Shuff55
+  %Cmp65 = icmp sgt i32 439732, %3
+  br label %CF
+
+CF:                                               ; preds = %CF79
+  %L66 = load i32* %Sl10
+  store i32 %E6, i32* %PC
+  %E67 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 2
+  %Shuff68 = shufflevector <4 x i32> %Sl64, <4 x i32> %I8, <4 x i32> <i32 5, i32 undef, i32 1, i32 undef>
+  %I69 = insertelement <4 x i16> %Shuff47, i16 %Sl, i32 3
+  %B70 = sdiv <4 x i64> zeroinitializer, zeroinitializer
+  %FC71 = sitofp i32 %L66 to double
+  %Sl72 = select i1 %Cmp18, i64 %4, i64 %4
+  %Cmp73 = icmp eq <4 x i64> zeroinitializer, %B70
+  store i32 %B23, i32* %PC
+  store i32 %3, i32* %PC
+  store i32 %3, i32* %Sl10
+  store i32 %L27, i32* %1
+  store i32 0, i32* %PC
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll b/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
new file mode 100644
index 000000000000..dc4200ad4285
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
@@ -0,0 +1,143 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed to select instructions for extract_vector_elt for
+; v2f64 on MSA.
+; It should at least successfully build.
+
+define void @autogen_SD997348632(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <2 x i32>
+  %A3 = alloca <16 x i16>
+  %A2 = alloca <4 x i1>
+  %A1 = alloca <4 x i16>
+  %A = alloca <2 x i32>
+  %L = load i8* %0
+  store i8 %L, i8* %0
+  %E = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 1, i32 3, i32 5>
+  %I = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
+  %FC = sitofp <4 x i32> zeroinitializer to <4 x double>
+  %Sl = select i1 false, <4 x i64> %Shuff, <4 x i64> %Shuff
+  %L5 = load i8* %0
+  store i8 %5, i8* %0
+  %E6 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff7 = shufflevector <2 x i1> %I, <2 x i1> %I, <2 x i32> <i32 1, i32 undef>
+  %I8 = insertelement <1 x i16> zeroinitializer, i16 0, i32 0
+  %B = xor i32 376034, %3
+  %FC9 = fptoui float 0x406DB70180000000 to i64
+  %Sl10 = select i1 false, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %Cmp = icmp ult <4 x i64> zeroinitializer, zeroinitializer
+  %L11 = load i8* %0
+  store i8 %L, i8* %0
+  %E12 = extractelement <4 x i64> zeroinitializer, i32 2
+  %Shuff13 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 undef, i32 3>
+  %I14 = insertelement <8 x i32> zeroinitializer, i32 -1, i32 7
+  %B15 = fdiv <4 x double> %FC, %FC
+  %Tr = trunc i32 376034 to i16
+  %Sl16 = select i1 false, <8 x i32> %Sl10, <8 x i32> zeroinitializer
+  %Cmp17 = icmp uge i32 233658, %E
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF79, %CF84, %BB
+  %L18 = load i8* %0
+  store i8 %L, i8* %0
+  %E19 = extractelement <4 x i64> %Sl, i32 3
+  %Shuff20 = shufflevector <2 x i1> %Shuff7, <2 x i1> %I, <2 x i32> <i32 2, i32 0>
+  %I21 = insertelement <4 x i64> zeroinitializer, i64 %FC9, i32 0
+  %B22 = xor <8 x i32> %I14, %I14
+  %Tr23 = trunc i16 0 to i8
+  %Sl24 = select i1 false, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> zeroinitializer
+  %Cmp25 = icmp eq i1 false, false
+  br i1 %Cmp25, label %CF, label %CF79
+
+CF79:                                             ; preds = %CF
+  %L26 = load i8* %0
+  store i8 %L26, i8* %0
+  %E27 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff28 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
+  %I29 = insertelement <16 x i32> %Shuff28, i32 %B, i32 15
+  %B30 = fdiv float 0.000000e+00, -6.749110e+06
+  %Sl31 = select i1 false, i32 %3, i32 %3
+  %Cmp32 = fcmp uno float 0.000000e+00, 0x406DB70180000000
+  br i1 %Cmp32, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF79
+  %L33 = load i8* %0
+  store i8 %L, i8* %0
+  %E34 = extractelement <16 x i32> %Shuff28, i32 1
+  %Shuff35 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I21, <4 x i32> <i32 undef, i32 6, i32 0, i32 2>
+  %I36 = insertelement <4 x double> %FC, double 0xA4A57F449CA36CC2, i32 2
+  %Se = sext <4 x i1> %Cmp to <4 x i32>
+  %Sl37 = select i1 %Cmp17, i32 0, i32 0
+  %Cmp38 = icmp ne i32 440284, 376034
+  br i1 %Cmp38, label %CF78, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF82, %CF78
+  %L39 = load i8* %0
+  store i8 %L, i8* %0
+  %E40 = extractelement <2 x i1> %Shuff20, i32 1
+  br i1 %E40, label %CF80, label %CF82
+
+CF82:                                             ; preds = %CF80
+  %Shuff41 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Shuff20, <2 x i32> <i32 2, i32 0>
+  %I42 = insertelement <2 x i1> %Shuff41, i1 false, i32 0
+  %B43 = sub i32 %E, 0
+  %Sl44 = select i1 %Cmp32, <16 x i32> %Shuff28, <16 x i32> %Shuff28
+  %Cmp45 = icmp sgt <4 x i64> zeroinitializer, %I21
+  %L46 = load i8* %0
+  store i8 %L11, i8* %0
+  %E47 = extractelement <8 x i32> %Sl16, i32 4
+  %Shuff48 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Shuff7, <2 x i32> <i32 undef, i32 1>
+  %I49 = insertelement <2 x i1> %Shuff48, i1 %Cmp17, i32 1
+  %B50 = and <8 x i32> %I14, %Sl10
+  %FC51 = fptoui float -6.749110e+06 to i1
+  br i1 %FC51, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF81, %CF82
+  %Sl52 = select i1 false, float -6.749110e+06, float 0x406DB70180000000
+  %Cmp53 = icmp uge <2 x i32> <i32 -1, i32 -1>, <i32 -1, i32 -1>
+  %L54 = load i8* %0
+  store i8 %L5, i8* %0
+  %E55 = extractelement <8 x i32> zeroinitializer, i32 7
+  %Shuff56 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 4, i32 6, i32 0>
+  %I57 = insertelement <2 x i1> %Shuff7, i1 false, i32 0
+  %B58 = fmul <4 x double> %FC, %FC
+  %FC59 = fptoui <4 x double> %I36 to <4 x i16>
+  %Sl60 = select i1 %Cmp17, <2 x i1> %I, <2 x i1> %I57
+  %Cmp61 = icmp ule <8 x i32> %B50, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %L62 = load i8* %0
+  store i8 %L33, i8* %0
+  %E63 = extractelement <4 x i64> %Shuff, i32 2
+  %Shuff64 = shufflevector <4 x i64> %Shuff56, <4 x i64> %Shuff56, <4 x i32> <i32 5, i32 7, i32 1, i32 undef>
+  %I65 = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
+  %B66 = sdiv i32 %B, %E55
+  %Tr67 = trunc i8 %L54 to i1
+  br i1 %Tr67, label %CF81, label %CF83
+
+CF83:                                             ; preds = %CF83, %CF81
+  %Sl68 = select i1 %Cmp17, i1 %Cmp25, i1 %Tr67
+  br i1 %Sl68, label %CF83, label %CF84
+
+CF84:                                             ; preds = %CF83
+  %Cmp69 = icmp uge i32 %E, %E34
+  br i1 %Cmp69, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF84
+  %L70 = load i8* %0
+  store i8 %L, i8* %0
+  %E71 = extractelement <4 x i64> %Shuff, i32 0
+  %Shuff72 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
+  %I73 = insertelement <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 %B66, i32 1
+  %FC74 = uitofp i1 %Cmp32 to double
+  %Sl75 = select i1 %FC51, i16 9704, i16 0
+  %Cmp76 = icmp ugt <1 x i16> %I8, %I8
+  store i8 %L39, i8* %0
+  store i8 %5, i8* %0
+  store i8 %Tr23, i8* %0
+  store i8 %L, i8* %0
+  store i8 %5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/llvm-stress-sz1-s742806235.ll b/test/CodeGen/Mips/msa/llvm-stress-sz1-s742806235.ll
new file mode 100644
index 000000000000..8c4fcbad65b4
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-sz1-s742806235.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed to select code for a truncstore of a
+; build_vector.
+; It should at least successfully build.
+
+define void @autogen_SD742806235(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca double
+  %A3 = alloca double
+  %A2 = alloca <8 x i8>
+  %A1 = alloca <4 x float>
+  %A = alloca i1
+  store i8 %5, i8* %0
+  store i8 %5, i8* %0
+  store i8 %5, i8* %0
+  store <8 x i8> <i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1>, <8 x i8>* %A2
+  store i8 %5, i8* %0
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/shift-dagcombine.ll b/test/CodeGen/Mips/msa/shift-dagcombine.ll
new file mode 100644
index 000000000000..0d809fb4fbf1
--- /dev/null
+++ b/test/CodeGen/Mips/msa/shift-dagcombine.ll
@@ -0,0 +1,70 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @ashr_v4i32(<4 x i32>* %c) nounwind {
+  ; CHECK-LABEL: ashr_v4i32:
+
+  %1 = ashr <4 x i32> <i32 1, i32 2, i32 4, i32 8>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sra
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], 1
+  ; CHECK-NOT: sra
+  store volatile <4 x i32> %1, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  %2 = ashr <4 x i32> <i32 -2, i32 -4, i32 -8, i32 -16>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sra
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], -2
+  ; CHECK-NOT: sra
+  store volatile <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK-LABEL: .size ashr_v4i32
+}
+
+define void @lshr_v4i32(<4 x i32>* %c) nounwind {
+  ; CHECK-LABEL: lshr_v4i32:
+
+  %1 = lshr <4 x i32> <i32 1, i32 2, i32 4, i32 8>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: srl
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], 1
+  ; CHECK-NOT: srl
+  store volatile <4 x i32> %1, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  %2 = lshr <4 x i32> <i32 -2, i32 -4, i32 -8, i32 -16>,
+                      <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: srl
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], %lo
+  ; CHECK-NOT: srl
+  store volatile <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK-LABEL: .size lshr_v4i32
+}
+
+define void @shl_v4i32(<4 x i32>* %c) nounwind {
+  ; CHECK-LABEL: shl_v4i32:
+
+  %1 = shl <4 x i32> <i32 8, i32 4, i32 2, i32 1>,
+                     <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sll
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], 8
+  ; CHECK-NOT: sll
+  store volatile <4 x i32> %1, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  %2 = shl <4 x i32> <i32 -8, i32 -4, i32 -2, i32 -1>,
+                     <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK-NOT: sll
+  ; CHECK-DAG: ldi.w [[R1:\$w[0-9]+]], -8
+  ; CHECK-NOT: sll
+  store volatile <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R1]], 0($4)
+
+  ret void
+  ; CHECK-LABEL: .size shl_v4i32
+}
diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll
new file mode 100644
index 000000000000..316c669c3ac6
--- /dev/null
+++ b/test/CodeGen/Mips/msa/shuffle.ll
@@ -0,0 +1,803 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_0
+}
+
+define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_1:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_1
+}
+
+define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_2:
+
+  %1 = load <16 x i8>* %a
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_2
+}
+
+define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_3:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
+  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_3
+}
+
+define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: vshf_v16i8_4:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v16i8_4
+}
+
+define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_0
+}
+
+define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_1:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_1
+}
+
+define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_2:
+
+  %1 = load <8 x i16>* %a
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_2
+}
+
+define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_3:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
+  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_3
+}
+
+define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: vshf_v8i16_4:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v8i16_4
+}
+
+; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
+; instruction when using a single vector.
+
+define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_0
+}
+
+define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_1:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_1
+}
+
+define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_2:
+
+  %1 = load <4 x i32>* %a
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_2
+}
+
+define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_3:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
+  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_3
+}
+
+define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: vshf_v4i32_4:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v4i32_4
+}
+
+define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_0
+}
+
+define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_1:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_1
+}
+
+define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_2:
+
+  %1 = load <2 x i64>* %a
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_2
+}
+
+define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_3:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
+  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
+  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_3
+}
+
+define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: vshf_v2i64_4:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size vshf_v2i64_4
+}
+
+define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: shf_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
+  ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v16i8_0
+}
+
+define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: shf_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v8i16_0
+}
+
+define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: shf_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v4i32_0
+}
+
+; shf.d does not exist
+
+define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvev_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v16i8_0
+}
+
+define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvev_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v8i16_0
+}
+
+define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvev_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v4i32_0
+}
+
+define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvev_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
+  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvev_v2i64_0
+}
+
+define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvod_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v16i8_0
+}
+
+define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvod_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v8i16_0
+}
+
+define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvod_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v4i32_0
+}
+
+define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvod_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvod_v2i64_0
+}
+
+define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvl_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v16i8_0
+}
+
+define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvl_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v8i16_0
+}
+
+define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvl_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v4i32_0
+}
+
+define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvl_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
+  ; ilvl.d and ilvev.d are equivalent for v2i64
+  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvl_v2i64_0
+}
+
+define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: ilvr_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v16i8_0
+}
+
+define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: ilvr_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v8i16_0
+}
+
+define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: ilvr_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v4i32_0
+}
+
+define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: ilvr_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
+  ; ilvr.d and ilvod.d are equivalent for v2i64
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size ilvr_v2i64_0
+}
+
+define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: pckev_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v16i8_0
+}
+
+define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: pckev_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v8i16_0
+}
+
+define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: pckev_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v4i32_0
+}
+
+define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: pckev_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
+  ; pckev.d and ilvev.d are equivalent for v2i64
+  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckev_v2i64_0
+}
+
+define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: pckod_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+                     <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v16i8_0
+}
+
+define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: pckod_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v8i16_0
+}
+
+define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: pckod_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v4i32_0
+}
+
+define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: pckod_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
+  ; pckod.d and ilvod.d are equivalent for v2i64
+  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size pckod_v2i64_0
+}
+
+define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: splati_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
+                     <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v16i8_0
+}
+
+define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: splati_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v8i16_0
+}
+
+define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: splati_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  ; shf.w and splati.w are equivalent
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v4i32_0
+}
+
+define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: splati_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v2i64_0
+}
diff --git a/test/CodeGen/Mips/msa/special.ll b/test/CodeGen/Mips/msa/special.ll
new file mode 100644
index 000000000000..60a4369dfb1c
--- /dev/null
+++ b/test/CodeGen/Mips/msa/special.ll
@@ -0,0 +1,26 @@
+; Test the MSA intrinsics that are encoded with the SPECIAL instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define i32 @llvm_mips_lsa_test(i32 %a, i32 %b) nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.lsa(i32 %a, i32 %b, i32 2)
+  ret i32 %0
+}
+
+declare i32 @llvm.mips.lsa(i32, i32, i32) nounwind
+
+; CHECK: llvm_mips_lsa_test:
+; CHECK: lsa {{\$[0-9]+}}, {{\$[0-9]+}}, {{\$[0-9]+}}, 2
+; CHECK: .size llvm_mips_lsa_test
+
+define i32 @lsa_test(i32 %a, i32 %b) nounwind {
+entry:
+  %0 = shl i32 %b, 2
+  %1 = add i32 %a, %0
+  ret i32 %1
+}
+
+; CHECK: lsa_test:
+; CHECK: lsa {{\$[0-9]+}}, {{\$[0-9]+}}, {{\$[0-9]+}}, 2
+; CHECK: .size lsa_test
diff --git a/test/CodeGen/Mips/msa/spill.ll b/test/CodeGen/Mips/msa/spill.ll
new file mode 100644
index 000000000000..66f896ac4684
--- /dev/null
+++ b/test/CodeGen/Mips/msa/spill.ll
@@ -0,0 +1,601 @@
+; Test that the correct instruction is chosen for spill and reload by trying
+; to have 33 live MSA registers simultaneously
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+define i32 @test_i8(<16 x i8>* %p0, <16 x i8>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <16 x i8>* %p0, i32 1
+  %p2  = getelementptr <16 x i8>* %p0, i32 2
+  %p3  = getelementptr <16 x i8>* %p0, i32 3
+  %p4  = getelementptr <16 x i8>* %p0, i32 4
+  %p5  = getelementptr <16 x i8>* %p0, i32 5
+  %p6  = getelementptr <16 x i8>* %p0, i32 6
+  %p7  = getelementptr <16 x i8>* %p0, i32 7
+  %p8  = getelementptr <16 x i8>* %p0, i32 8
+  %p9  = getelementptr <16 x i8>* %p0, i32 9
+  %p10 = getelementptr <16 x i8>* %p0, i32 10
+  %p11 = getelementptr <16 x i8>* %p0, i32 11
+  %p12 = getelementptr <16 x i8>* %p0, i32 12
+  %p13 = getelementptr <16 x i8>* %p0, i32 13
+  %p14 = getelementptr <16 x i8>* %p0, i32 14
+  %p15 = getelementptr <16 x i8>* %p0, i32 15
+  %p16 = getelementptr <16 x i8>* %p0, i32 16
+  %p17 = getelementptr <16 x i8>* %p0, i32 17
+  %p18 = getelementptr <16 x i8>* %p0, i32 18
+  %p19 = getelementptr <16 x i8>* %p0, i32 19
+  %p20 = getelementptr <16 x i8>* %p0, i32 20
+  %p21 = getelementptr <16 x i8>* %p0, i32 21
+  %p22 = getelementptr <16 x i8>* %p0, i32 22
+  %p23 = getelementptr <16 x i8>* %p0, i32 23
+  %p24 = getelementptr <16 x i8>* %p0, i32 24
+  %p25 = getelementptr <16 x i8>* %p0, i32 25
+  %p26 = getelementptr <16 x i8>* %p0, i32 26
+  %p27 = getelementptr <16 x i8>* %p0, i32 27
+  %p28 = getelementptr <16 x i8>* %p0, i32 28
+  %p29 = getelementptr <16 x i8>* %p0, i32 29
+  %p30 = getelementptr <16 x i8>* %p0, i32 30
+  %p31 = getelementptr <16 x i8>* %p0, i32 31
+  %p32 = getelementptr <16 x i8>* %p0, i32 32
+  %p33 = getelementptr <16 x i8>* %p0, i32 33
+  %0  = load <16 x i8>* %p0, align 16
+  %1  = load <16 x i8>* %p1, align 16
+  %2  = load <16 x i8>* %p2, align 16
+  %3  = load <16 x i8>* %p3, align 16
+  %4  = load <16 x i8>* %p4, align 16
+  %5  = load <16 x i8>* %p5, align 16
+  %6  = load <16 x i8>* %p6, align 16
+  %7  = load <16 x i8>* %p7, align 16
+  %8  = load <16 x i8>* %p8, align 16
+  %9  = load <16 x i8>* %p9, align 16
+  %10 = load <16 x i8>* %p10, align 16
+  %11 = load <16 x i8>* %p11, align 16
+  %12 = load <16 x i8>* %p12, align 16
+  %13 = load <16 x i8>* %p13, align 16
+  %14 = load <16 x i8>* %p14, align 16
+  %15 = load <16 x i8>* %p15, align 16
+  %16 = load <16 x i8>* %p16, align 16
+  %17 = load <16 x i8>* %p17, align 16
+  %18 = load <16 x i8>* %p18, align 16
+  %19 = load <16 x i8>* %p19, align 16
+  %20 = load <16 x i8>* %p20, align 16
+  %21 = load <16 x i8>* %p21, align 16
+  %22 = load <16 x i8>* %p22, align 16
+  %23 = load <16 x i8>* %p23, align 16
+  %24 = load <16 x i8>* %p24, align 16
+  %25 = load <16 x i8>* %p25, align 16
+  %26 = load <16 x i8>* %p26, align 16
+  %27 = load <16 x i8>* %p27, align 16
+  %28 = load <16 x i8>* %p28, align 16
+  %29 = load <16 x i8>* %p29, align 16
+  %30 = load <16 x i8>* %p30, align 16
+  %31 = load <16 x i8>* %p31, align 16
+  %32 = load <16 x i8>* %p32, align 16
+  %33 = load <16 x i8>* %p33, align 16
+  %r1  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0,   <16 x i8> %1)
+  %r2  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r1,  <16 x i8> %2)
+  %r3  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r2,  <16 x i8> %3)
+  %r4  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r3,  <16 x i8> %4)
+  %r5  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r4,  <16 x i8> %5)
+  %r6  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r5,  <16 x i8> %6)
+  %r7  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r6,  <16 x i8> %7)
+  %r8  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r7,  <16 x i8> %8)
+  %r9  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r8,  <16 x i8> %9)
+  %r10 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r9,  <16 x i8> %10)
+  %r11 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r10, <16 x i8> %11)
+  %r12 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r11, <16 x i8> %12)
+  %r13 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r12, <16 x i8> %13)
+  %r14 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r13, <16 x i8> %14)
+  %r15 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r14, <16 x i8> %15)
+  %r16 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r15, <16 x i8> %16)
+  %r17 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r16, <16 x i8> %17)
+  %r18 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r17, <16 x i8> %18)
+  %r19 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r18, <16 x i8> %19)
+  %r20 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r19, <16 x i8> %20)
+  %r21 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r20, <16 x i8> %21)
+  %r22 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r21, <16 x i8> %22)
+  %r23 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r22, <16 x i8> %23)
+  %r24 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r23, <16 x i8> %24)
+  %r25 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r24, <16 x i8> %25)
+  %r26 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r25, <16 x i8> %26)
+  %r27 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r26, <16 x i8> %27)
+  %r28 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r27, <16 x i8> %28)
+  %r29 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r28, <16 x i8> %29)
+  %r30 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r29, <16 x i8> %30)
+  %r31 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r30, <16 x i8> %31)
+  %r32 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r31, <16 x i8> %32)
+  %r33 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r32, <16 x i8> %33)
+  %rx1  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r33,   <16 x i8> %1)
+  %rx2  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx1,  <16 x i8> %2)
+  %rx3  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx2,  <16 x i8> %3)
+  %rx4  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx3,  <16 x i8> %4)
+  %rx5  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx4,  <16 x i8> %5)
+  %rx6  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx5,  <16 x i8> %6)
+  %rx7  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx6,  <16 x i8> %7)
+  %rx8  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx7,  <16 x i8> %8)
+  %rx9  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx8,  <16 x i8> %9)
+  %rx10 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx9,  <16 x i8> %10)
+  %rx11 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx10, <16 x i8> %11)
+  %rx12 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx11, <16 x i8> %12)
+  %rx13 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx12, <16 x i8> %13)
+  %rx14 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx13, <16 x i8> %14)
+  %rx15 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx14, <16 x i8> %15)
+  %rx16 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx15, <16 x i8> %16)
+  %rx17 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx16, <16 x i8> %17)
+  %rx18 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx17, <16 x i8> %18)
+  %rx19 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx18, <16 x i8> %19)
+  %rx20 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx19, <16 x i8> %20)
+  %rx21 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx20, <16 x i8> %21)
+  %rx22 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx21, <16 x i8> %22)
+  %rx23 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx22, <16 x i8> %23)
+  %rx24 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx23, <16 x i8> %24)
+  %rx25 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx24, <16 x i8> %25)
+  %rx26 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx25, <16 x i8> %26)
+  %rx27 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx26, <16 x i8> %27)
+  %rx28 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx27, <16 x i8> %28)
+  %rx29 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx28, <16 x i8> %29)
+  %rx30 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx29, <16 x i8> %30)
+  %rx31 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx30, <16 x i8> %31)
+  %rx32 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx31, <16 x i8> %32)
+  %rx33 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %rx32, <16 x i8> %33)
+  %res = call i32 @llvm.mips.copy.s.b(<16 x i8> %rx33, i32 0)
+  ret i32 %res
+}
+
+declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
+declare i32       @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
+
+; CHECK: test_i8:
+; CHECK: st.b {{.*}} Spill
+; CHECK: st.b {{.*}} Spill
+; CHECK: ld.b {{.*}} Reload
+; CHECK: ld.b {{.*}} Reload
+; CHECK: .size
+
+define i32 @test_i16(<8 x i16>* %p0, <8 x i16>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <8 x i16>* %p0, i32 1
+  %p2  = getelementptr <8 x i16>* %p0, i32 2
+  %p3  = getelementptr <8 x i16>* %p0, i32 3
+  %p4  = getelementptr <8 x i16>* %p0, i32 4
+  %p5  = getelementptr <8 x i16>* %p0, i32 5
+  %p6  = getelementptr <8 x i16>* %p0, i32 6
+  %p7  = getelementptr <8 x i16>* %p0, i32 7
+  %p8  = getelementptr <8 x i16>* %p0, i32 8
+  %p9  = getelementptr <8 x i16>* %p0, i32 9
+  %p10 = getelementptr <8 x i16>* %p0, i32 10
+  %p11 = getelementptr <8 x i16>* %p0, i32 11
+  %p12 = getelementptr <8 x i16>* %p0, i32 12
+  %p13 = getelementptr <8 x i16>* %p0, i32 13
+  %p14 = getelementptr <8 x i16>* %p0, i32 14
+  %p15 = getelementptr <8 x i16>* %p0, i32 15
+  %p16 = getelementptr <8 x i16>* %p0, i32 16
+  %p17 = getelementptr <8 x i16>* %p0, i32 17
+  %p18 = getelementptr <8 x i16>* %p0, i32 18
+  %p19 = getelementptr <8 x i16>* %p0, i32 19
+  %p20 = getelementptr <8 x i16>* %p0, i32 20
+  %p21 = getelementptr <8 x i16>* %p0, i32 21
+  %p22 = getelementptr <8 x i16>* %p0, i32 22
+  %p23 = getelementptr <8 x i16>* %p0, i32 23
+  %p24 = getelementptr <8 x i16>* %p0, i32 24
+  %p25 = getelementptr <8 x i16>* %p0, i32 25
+  %p26 = getelementptr <8 x i16>* %p0, i32 26
+  %p27 = getelementptr <8 x i16>* %p0, i32 27
+  %p28 = getelementptr <8 x i16>* %p0, i32 28
+  %p29 = getelementptr <8 x i16>* %p0, i32 29
+  %p30 = getelementptr <8 x i16>* %p0, i32 30
+  %p31 = getelementptr <8 x i16>* %p0, i32 31
+  %p32 = getelementptr <8 x i16>* %p0, i32 32
+  %p33 = getelementptr <8 x i16>* %p0, i32 33
+  %0  = load <8 x i16>* %p0, align 16
+  %1  = load <8 x i16>* %p1, align 16
+  %2  = load <8 x i16>* %p2, align 16
+  %3  = load <8 x i16>* %p3, align 16
+  %4  = load <8 x i16>* %p4, align 16
+  %5  = load <8 x i16>* %p5, align 16
+  %6  = load <8 x i16>* %p6, align 16
+  %7  = load <8 x i16>* %p7, align 16
+  %8  = load <8 x i16>* %p8, align 16
+  %9  = load <8 x i16>* %p9, align 16
+  %10 = load <8 x i16>* %p10, align 16
+  %11 = load <8 x i16>* %p11, align 16
+  %12 = load <8 x i16>* %p12, align 16
+  %13 = load <8 x i16>* %p13, align 16
+  %14 = load <8 x i16>* %p14, align 16
+  %15 = load <8 x i16>* %p15, align 16
+  %16 = load <8 x i16>* %p16, align 16
+  %17 = load <8 x i16>* %p17, align 16
+  %18 = load <8 x i16>* %p18, align 16
+  %19 = load <8 x i16>* %p19, align 16
+  %20 = load <8 x i16>* %p20, align 16
+  %21 = load <8 x i16>* %p21, align 16
+  %22 = load <8 x i16>* %p22, align 16
+  %23 = load <8 x i16>* %p23, align 16
+  %24 = load <8 x i16>* %p24, align 16
+  %25 = load <8 x i16>* %p25, align 16
+  %26 = load <8 x i16>* %p26, align 16
+  %27 = load <8 x i16>* %p27, align 16
+  %28 = load <8 x i16>* %p28, align 16
+  %29 = load <8 x i16>* %p29, align 16
+  %30 = load <8 x i16>* %p30, align 16
+  %31 = load <8 x i16>* %p31, align 16
+  %32 = load <8 x i16>* %p32, align 16
+  %33 = load <8 x i16>* %p33, align 16
+  %r1  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0,   <8 x i16> %1)
+  %r2  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r1,  <8 x i16> %2)
+  %r3  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r2,  <8 x i16> %3)
+  %r4  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r3,  <8 x i16> %4)
+  %r5  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r4,  <8 x i16> %5)
+  %r6  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r5,  <8 x i16> %6)
+  %r7  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r6,  <8 x i16> %7)
+  %r8  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r7,  <8 x i16> %8)
+  %r9  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r8,  <8 x i16> %9)
+  %r10 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r9,  <8 x i16> %10)
+  %r11 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r10, <8 x i16> %11)
+  %r12 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r11, <8 x i16> %12)
+  %r13 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r12, <8 x i16> %13)
+  %r14 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r13, <8 x i16> %14)
+  %r15 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r14, <8 x i16> %15)
+  %r16 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r15, <8 x i16> %16)
+  %r17 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r16, <8 x i16> %17)
+  %r18 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r17, <8 x i16> %18)
+  %r19 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r18, <8 x i16> %19)
+  %r20 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r19, <8 x i16> %20)
+  %r21 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r20, <8 x i16> %21)
+  %r22 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r21, <8 x i16> %22)
+  %r23 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r22, <8 x i16> %23)
+  %r24 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r23, <8 x i16> %24)
+  %r25 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r24, <8 x i16> %25)
+  %r26 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r25, <8 x i16> %26)
+  %r27 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r26, <8 x i16> %27)
+  %r28 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r27, <8 x i16> %28)
+  %r29 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r28, <8 x i16> %29)
+  %r30 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r29, <8 x i16> %30)
+  %r31 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r30, <8 x i16> %31)
+  %r32 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r31, <8 x i16> %32)
+  %r33 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r32, <8 x i16> %33)
+  %rx1  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r33,   <8 x i16> %1)
+  %rx2  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx1,  <8 x i16> %2)
+  %rx3  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx2,  <8 x i16> %3)
+  %rx4  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx3,  <8 x i16> %4)
+  %rx5  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx4,  <8 x i16> %5)
+  %rx6  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx5,  <8 x i16> %6)
+  %rx7  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx6,  <8 x i16> %7)
+  %rx8  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx7,  <8 x i16> %8)
+  %rx9  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx8,  <8 x i16> %9)
+  %rx10 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx9,  <8 x i16> %10)
+  %rx11 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx10, <8 x i16> %11)
+  %rx12 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx11, <8 x i16> %12)
+  %rx13 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx12, <8 x i16> %13)
+  %rx14 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx13, <8 x i16> %14)
+  %rx15 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx14, <8 x i16> %15)
+  %rx16 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx15, <8 x i16> %16)
+  %rx17 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx16, <8 x i16> %17)
+  %rx18 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx17, <8 x i16> %18)
+  %rx19 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx18, <8 x i16> %19)
+  %rx20 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx19, <8 x i16> %20)
+  %rx21 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx20, <8 x i16> %21)
+  %rx22 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx21, <8 x i16> %22)
+  %rx23 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx22, <8 x i16> %23)
+  %rx24 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx23, <8 x i16> %24)
+  %rx25 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx24, <8 x i16> %25)
+  %rx26 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx25, <8 x i16> %26)
+  %rx27 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx26, <8 x i16> %27)
+  %rx28 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx27, <8 x i16> %28)
+  %rx29 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx28, <8 x i16> %29)
+  %rx30 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx29, <8 x i16> %30)
+  %rx31 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx30, <8 x i16> %31)
+  %rx32 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx31, <8 x i16> %32)
+  %rx33 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %rx32, <8 x i16> %33)
+  %res = call i32 @llvm.mips.copy.s.h(<8 x i16> %rx33, i32 0)
+  ret i32 %res
+}
+
+declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
+declare i32       @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
+
+; CHECK: test_i16:
+; CHECK: st.h {{.*}} Spill
+; CHECK: st.h {{.*}} Spill
+; CHECK: ld.h {{.*}} Reload
+; CHECK: ld.h {{.*}} Reload
+; CHECK: .size
+
+define i32 @test_i32(<4 x i32>* %p0, <4 x i32>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <4 x i32>* %p0, i32 1
+  %p2  = getelementptr <4 x i32>* %p0, i32 2
+  %p3  = getelementptr <4 x i32>* %p0, i32 3
+  %p4  = getelementptr <4 x i32>* %p0, i32 4
+  %p5  = getelementptr <4 x i32>* %p0, i32 5
+  %p6  = getelementptr <4 x i32>* %p0, i32 6
+  %p7  = getelementptr <4 x i32>* %p0, i32 7
+  %p8  = getelementptr <4 x i32>* %p0, i32 8
+  %p9  = getelementptr <4 x i32>* %p0, i32 9
+  %p10 = getelementptr <4 x i32>* %p0, i32 10
+  %p11 = getelementptr <4 x i32>* %p0, i32 11
+  %p12 = getelementptr <4 x i32>* %p0, i32 12
+  %p13 = getelementptr <4 x i32>* %p0, i32 13
+  %p14 = getelementptr <4 x i32>* %p0, i32 14
+  %p15 = getelementptr <4 x i32>* %p0, i32 15
+  %p16 = getelementptr <4 x i32>* %p0, i32 16
+  %p17 = getelementptr <4 x i32>* %p0, i32 17
+  %p18 = getelementptr <4 x i32>* %p0, i32 18
+  %p19 = getelementptr <4 x i32>* %p0, i32 19
+  %p20 = getelementptr <4 x i32>* %p0, i32 20
+  %p21 = getelementptr <4 x i32>* %p0, i32 21
+  %p22 = getelementptr <4 x i32>* %p0, i32 22
+  %p23 = getelementptr <4 x i32>* %p0, i32 23
+  %p24 = getelementptr <4 x i32>* %p0, i32 24
+  %p25 = getelementptr <4 x i32>* %p0, i32 25
+  %p26 = getelementptr <4 x i32>* %p0, i32 26
+  %p27 = getelementptr <4 x i32>* %p0, i32 27
+  %p28 = getelementptr <4 x i32>* %p0, i32 28
+  %p29 = getelementptr <4 x i32>* %p0, i32 29
+  %p30 = getelementptr <4 x i32>* %p0, i32 30
+  %p31 = getelementptr <4 x i32>* %p0, i32 31
+  %p32 = getelementptr <4 x i32>* %p0, i32 32
+  %p33 = getelementptr <4 x i32>* %p0, i32 33
+  %0  = load <4 x i32>* %p0, align 16
+  %1  = load <4 x i32>* %p1, align 16
+  %2  = load <4 x i32>* %p2, align 16
+  %3  = load <4 x i32>* %p3, align 16
+  %4  = load <4 x i32>* %p4, align 16
+  %5  = load <4 x i32>* %p5, align 16
+  %6  = load <4 x i32>* %p6, align 16
+  %7  = load <4 x i32>* %p7, align 16
+  %8  = load <4 x i32>* %p8, align 16
+  %9  = load <4 x i32>* %p9, align 16
+  %10 = load <4 x i32>* %p10, align 16
+  %11 = load <4 x i32>* %p11, align 16
+  %12 = load <4 x i32>* %p12, align 16
+  %13 = load <4 x i32>* %p13, align 16
+  %14 = load <4 x i32>* %p14, align 16
+  %15 = load <4 x i32>* %p15, align 16
+  %16 = load <4 x i32>* %p16, align 16
+  %17 = load <4 x i32>* %p17, align 16
+  %18 = load <4 x i32>* %p18, align 16
+  %19 = load <4 x i32>* %p19, align 16
+  %20 = load <4 x i32>* %p20, align 16
+  %21 = load <4 x i32>* %p21, align 16
+  %22 = load <4 x i32>* %p22, align 16
+  %23 = load <4 x i32>* %p23, align 16
+  %24 = load <4 x i32>* %p24, align 16
+  %25 = load <4 x i32>* %p25, align 16
+  %26 = load <4 x i32>* %p26, align 16
+  %27 = load <4 x i32>* %p27, align 16
+  %28 = load <4 x i32>* %p28, align 16
+  %29 = load <4 x i32>* %p29, align 16
+  %30 = load <4 x i32>* %p30, align 16
+  %31 = load <4 x i32>* %p31, align 16
+  %32 = load <4 x i32>* %p32, align 16
+  %33 = load <4 x i32>* %p33, align 16
+  %r1 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
+  %r2 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r1, <4 x i32> %2)
+  %r3 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r2, <4 x i32> %3)
+  %r4 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r3, <4 x i32> %4)
+  %r5 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r4, <4 x i32> %5)
+  %r6 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r5, <4 x i32> %6)
+  %r7 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r6, <4 x i32> %7)
+  %r8 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r7, <4 x i32> %8)
+  %r9 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r8, <4 x i32> %9)
+  %r10 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r9, <4 x i32> %10)
+  %r11 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r10, <4 x i32> %11)
+  %r12 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r11, <4 x i32> %12)
+  %r13 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r12, <4 x i32> %13)
+  %r14 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r13, <4 x i32> %14)
+  %r15 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r14, <4 x i32> %15)
+  %r16 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r15, <4 x i32> %16)
+  %r17 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r16, <4 x i32> %17)
+  %r18 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r17, <4 x i32> %18)
+  %r19 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r18, <4 x i32> %19)
+  %r20 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r19, <4 x i32> %20)
+  %r21 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r20, <4 x i32> %21)
+  %r22 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r21, <4 x i32> %22)
+  %r23 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r22, <4 x i32> %23)
+  %r24 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r23, <4 x i32> %24)
+  %r25 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r24, <4 x i32> %25)
+  %r26 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r25, <4 x i32> %26)
+  %r27 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r26, <4 x i32> %27)
+  %r28 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r27, <4 x i32> %28)
+  %r29 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r28, <4 x i32> %29)
+  %r30 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r29, <4 x i32> %30)
+  %r31 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r30, <4 x i32> %31)
+  %r32 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r31, <4 x i32> %32)
+  %r33 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r32, <4 x i32> %33)
+  %rx1 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r33, <4 x i32> %1)
+  %rx2 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx1, <4 x i32> %2)
+  %rx3 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx2, <4 x i32> %3)
+  %rx4 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx3, <4 x i32> %4)
+  %rx5 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx4, <4 x i32> %5)
+  %rx6 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx5, <4 x i32> %6)
+  %rx7 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx6, <4 x i32> %7)
+  %rx8 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx7, <4 x i32> %8)
+  %rx9 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx8, <4 x i32> %9)
+  %rx10 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx9, <4 x i32> %10)
+  %rx11 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx10, <4 x i32> %11)
+  %rx12 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx11, <4 x i32> %12)
+  %rx13 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx12, <4 x i32> %13)
+  %rx14 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx13, <4 x i32> %14)
+  %rx15 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx14, <4 x i32> %15)
+  %rx16 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx15, <4 x i32> %16)
+  %rx17 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx16, <4 x i32> %17)
+  %rx18 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx17, <4 x i32> %18)
+  %rx19 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx18, <4 x i32> %19)
+  %rx20 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx19, <4 x i32> %20)
+  %rx21 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx20, <4 x i32> %21)
+  %rx22 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx21, <4 x i32> %22)
+  %rx23 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx22, <4 x i32> %23)
+  %rx24 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx23, <4 x i32> %24)
+  %rx25 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx24, <4 x i32> %25)
+  %rx26 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx25, <4 x i32> %26)
+  %rx27 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx26, <4 x i32> %27)
+  %rx28 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx27, <4 x i32> %28)
+  %rx29 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx28, <4 x i32> %29)
+  %rx30 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx29, <4 x i32> %30)
+  %rx31 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx30, <4 x i32> %31)
+  %rx32 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx31, <4 x i32> %32)
+  %rx33 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %rx32, <4 x i32> %33)
+  %res = call i32 @llvm.mips.copy.s.w(<4 x i32> %rx33, i32 0)
+  ret i32 %res
+}
+
+declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
+declare i32       @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
+
+; CHECK: test_i32:
+; CHECK: st.w {{.*}} Spill
+; CHECK: st.w {{.*}} Spill
+; CHECK: ld.w {{.*}} Reload
+; CHECK: ld.w {{.*}} Reload
+; CHECK: .size
+
+define i32 @test_i64(<2 x i64>* %p0, <2 x i64>* %q1) nounwind {
+entry:
+  %p1  = getelementptr <2 x i64>* %p0, i32 1
+  %p2  = getelementptr <2 x i64>* %p0, i32 2
+  %p3  = getelementptr <2 x i64>* %p0, i32 3
+  %p4  = getelementptr <2 x i64>* %p0, i32 4
+  %p5  = getelementptr <2 x i64>* %p0, i32 5
+  %p6  = getelementptr <2 x i64>* %p0, i32 6
+  %p7  = getelementptr <2 x i64>* %p0, i32 7
+  %p8  = getelementptr <2 x i64>* %p0, i32 8
+  %p9  = getelementptr <2 x i64>* %p0, i32 9
+  %p10 = getelementptr <2 x i64>* %p0, i32 10
+  %p11 = getelementptr <2 x i64>* %p0, i32 11
+  %p12 = getelementptr <2 x i64>* %p0, i32 12
+  %p13 = getelementptr <2 x i64>* %p0, i32 13
+  %p14 = getelementptr <2 x i64>* %p0, i32 14
+  %p15 = getelementptr <2 x i64>* %p0, i32 15
+  %p16 = getelementptr <2 x i64>* %p0, i32 16
+  %p17 = getelementptr <2 x i64>* %p0, i32 17
+  %p18 = getelementptr <2 x i64>* %p0, i32 18
+  %p19 = getelementptr <2 x i64>* %p0, i32 19
+  %p20 = getelementptr <2 x i64>* %p0, i32 20
+  %p21 = getelementptr <2 x i64>* %p0, i32 21
+  %p22 = getelementptr <2 x i64>* %p0, i32 22
+  %p23 = getelementptr <2 x i64>* %p0, i32 23
+  %p24 = getelementptr <2 x i64>* %p0, i32 24
+  %p25 = getelementptr <2 x i64>* %p0, i32 25
+  %p26 = getelementptr <2 x i64>* %p0, i32 26
+  %p27 = getelementptr <2 x i64>* %p0, i32 27
+  %p28 = getelementptr <2 x i64>* %p0, i32 28
+  %p29 = getelementptr <2 x i64>* %p0, i32 29
+  %p30 = getelementptr <2 x i64>* %p0, i32 30
+  %p31 = getelementptr <2 x i64>* %p0, i32 31
+  %p32 = getelementptr <2 x i64>* %p0, i32 32
+  %p33 = getelementptr <2 x i64>* %p0, i32 33
+  %0  = load <2 x i64>* %p0, align 16
+  %1  = load <2 x i64>* %p1, align 16
+  %2  = load <2 x i64>* %p2, align 16
+  %3  = load <2 x i64>* %p3, align 16
+  %4  = load <2 x i64>* %p4, align 16
+  %5  = load <2 x i64>* %p5, align 16
+  %6  = load <2 x i64>* %p6, align 16
+  %7  = load <2 x i64>* %p7, align 16
+  %8  = load <2 x i64>* %p8, align 16
+  %9  = load <2 x i64>* %p9, align 16
+  %10 = load <2 x i64>* %p10, align 16
+  %11 = load <2 x i64>* %p11, align 16
+  %12 = load <2 x i64>* %p12, align 16
+  %13 = load <2 x i64>* %p13, align 16
+  %14 = load <2 x i64>* %p14, align 16
+  %15 = load <2 x i64>* %p15, align 16
+  %16 = load <2 x i64>* %p16, align 16
+  %17 = load <2 x i64>* %p17, align 16
+  %18 = load <2 x i64>* %p18, align 16
+  %19 = load <2 x i64>* %p19, align 16
+  %20 = load <2 x i64>* %p20, align 16
+  %21 = load <2 x i64>* %p21, align 16
+  %22 = load <2 x i64>* %p22, align 16
+  %23 = load <2 x i64>* %p23, align 16
+  %24 = load <2 x i64>* %p24, align 16
+  %25 = load <2 x i64>* %p25, align 16
+  %26 = load <2 x i64>* %p26, align 16
+  %27 = load <2 x i64>* %p27, align 16
+  %28 = load <2 x i64>* %p28, align 16
+  %29 = load <2 x i64>* %p29, align 16
+  %30 = load <2 x i64>* %p30, align 16
+  %31 = load <2 x i64>* %p31, align 16
+  %32 = load <2 x i64>* %p32, align 16
+  %33 = load <2 x i64>* %p33, align 16
+  %r1  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0,   <2 x i64> %1)
+  %r2  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r1,  <2 x i64> %2)
+  %r3  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r2,  <2 x i64> %3)
+  %r4  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r3,  <2 x i64> %4)
+  %r5  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r4,  <2 x i64> %5)
+  %r6  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r5,  <2 x i64> %6)
+  %r7  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r6,  <2 x i64> %7)
+  %r8  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r7,  <2 x i64> %8)
+  %r9  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r8,  <2 x i64> %9)
+  %r10 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r9,  <2 x i64> %10)
+  %r11 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r10, <2 x i64> %11)
+  %r12 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r11, <2 x i64> %12)
+  %r13 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r12, <2 x i64> %13)
+  %r14 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r13, <2 x i64> %14)
+  %r15 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r14, <2 x i64> %15)
+  %r16 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r15, <2 x i64> %16)
+  %r17 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r16, <2 x i64> %17)
+  %r18 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r17, <2 x i64> %18)
+  %r19 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r18, <2 x i64> %19)
+  %r20 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r19, <2 x i64> %20)
+  %r21 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r20, <2 x i64> %21)
+  %r22 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r21, <2 x i64> %22)
+  %r23 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r22, <2 x i64> %23)
+  %r24 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r23, <2 x i64> %24)
+  %r25 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r24, <2 x i64> %25)
+  %r26 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r25, <2 x i64> %26)
+  %r27 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r26, <2 x i64> %27)
+  %r28 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r27, <2 x i64> %28)
+  %r29 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r28, <2 x i64> %29)
+  %r30 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r29, <2 x i64> %30)
+  %r31 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r30, <2 x i64> %31)
+  %r32 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r31, <2 x i64> %32)
+  %r33 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r32, <2 x i64> %33)
+  %rx1  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r33,  <2 x i64> %1)
+  %rx2  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx1,  <2 x i64> %2)
+  %rx3  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx2,  <2 x i64> %3)
+  %rx4  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx3,  <2 x i64> %4)
+  %rx5  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx4,  <2 x i64> %5)
+  %rx6  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx5,  <2 x i64> %6)
+  %rx7  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx6,  <2 x i64> %7)
+  %rx8  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx7,  <2 x i64> %8)
+  %rx9  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx8,  <2 x i64> %9)
+  %rx10 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx9,  <2 x i64> %10)
+  %rx11 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx10, <2 x i64> %11)
+  %rx12 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx11, <2 x i64> %12)
+  %rx13 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx12, <2 x i64> %13)
+  %rx14 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx13, <2 x i64> %14)
+  %rx15 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx14, <2 x i64> %15)
+  %rx16 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx15, <2 x i64> %16)
+  %rx17 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx16, <2 x i64> %17)
+  %rx18 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx17, <2 x i64> %18)
+  %rx19 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx18, <2 x i64> %19)
+  %rx20 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx19, <2 x i64> %20)
+  %rx21 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx20, <2 x i64> %21)
+  %rx22 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx21, <2 x i64> %22)
+  %rx23 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx22, <2 x i64> %23)
+  %rx24 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx23, <2 x i64> %24)
+  %rx25 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx24, <2 x i64> %25)
+  %rx26 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx25, <2 x i64> %26)
+  %rx27 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx26, <2 x i64> %27)
+  %rx28 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx27, <2 x i64> %28)
+  %rx29 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx28, <2 x i64> %29)
+  %rx30 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx29, <2 x i64> %30)
+  %rx31 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx30, <2 x i64> %31)
+  %rx32 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx31, <2 x i64> %32)
+  %rx33 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %rx32, <2 x i64> %33)
+  %res1 = bitcast <2 x i64> %rx33 to <4 x i32>
+  %res = call i32 @llvm.mips.copy.s.w(<4 x i32> %res1, i32 0)
+  ret i32 %res
+}
+
+declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: test_i64:
+; CHECK: st.d {{.*}} Spill
+; CHECK: st.d {{.*}} Spill
+; CHECK: ld.d {{.*}} Reload
+; CHECK: ld.d {{.*}} Reload
+; CHECK: .size
diff --git a/test/CodeGen/Mips/msa/vec.ll b/test/CodeGen/Mips/msa/vec.ll
new file mode 100644
index 000000000000..5bddf5aea405
--- /dev/null
+++ b/test/CodeGen/Mips/msa/vec.ll
@@ -0,0 +1,946 @@
+; Test the MSA intrinsics that are encoded with the VEC instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ANYENDIAN %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ANYENDIAN %s
+
+@llvm_mips_and_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_and_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_and_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_and_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_and_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_b_test
+;
+@llvm_mips_and_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_and_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_and_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_and_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_and_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_h_test
+;
+@llvm_mips_and_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_and_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_and_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_and_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_and_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_w_test
+;
+@llvm_mips_and_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_and_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_and_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_and_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_and_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_and_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: and.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_and_v_d_test
+;
+define void @and_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+  %2 = and <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_and_v_b_RES
+  ret void
+}
+
+; CHECK: and_v_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: and.v
+; CHECK: st.b
+; CHECK: .size and_v_b_test
+;
+define void @and_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+  %2 = and <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_and_v_h_RES
+  ret void
+}
+
+; CHECK: and_v_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: and.v
+; CHECK: st.h
+; CHECK: .size and_v_h_test
+;
+
+define void @and_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+  %2 = and <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_and_v_w_RES
+  ret void
+}
+
+; CHECK: and_v_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: and.v
+; CHECK: st.w
+; CHECK: .size and_v_w_test
+;
+
+define void @and_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+  %2 = and <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_and_v_d_RES
+  ret void
+}
+
+; CHECK: and_v_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: and.v
+; CHECK: st.d
+; CHECK: .size and_v_d_test
+;
+@llvm_mips_bmnz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmnz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bmnz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmnz_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmnz_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG3
+  %3 = bitcast <16 x i8> %0 to <16 x i8>
+  %4 = bitcast <16 x i8> %1 to <16 x i8>
+  %5 = bitcast <16 x i8> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <16 x i8>
+  store <16 x i8> %7, <16 x i8>* @llvm_mips_bmnz_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_b_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_b_test
+
+@llvm_mips_bmnz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmnz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bmnz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmnz_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bmnz_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG3
+  %3 = bitcast <8 x i16> %0 to <16 x i8>
+  %4 = bitcast <8 x i16> %1 to <16 x i8>
+  %5 = bitcast <8 x i16> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  store <8 x i16> %7, <8 x i16>* @llvm_mips_bmnz_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_h_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_h_test
+
+@llvm_mips_bmnz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmnz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bmnz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmnz_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bmnz_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG3
+  %3 = bitcast <4 x i32> %0 to <16 x i8>
+  %4 = bitcast <4 x i32> %1 to <16 x i8>
+  %5 = bitcast <4 x i32> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  store <4 x i32> %7, <4 x i32>* @llvm_mips_bmnz_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_w_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_w_test
+
+@llvm_mips_bmnz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmnz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bmnz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmnz_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bmnz_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG3
+  %3 = bitcast <2 x i64> %0 to <16 x i8>
+  %4 = bitcast <2 x i64> %1 to <16 x i8>
+  %5 = bitcast <2 x i64> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  store <2 x i64> %7, <2 x i64>* @llvm_mips_bmnz_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmnz_v_d_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
+; ANYENDIAN: .size llvm_mips_bmnz_v_d_test
+
+@llvm_mips_bmz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bmz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bmz_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bmz_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG3
+  %3 = bitcast <16 x i8> %0 to <16 x i8>
+  %4 = bitcast <16 x i8> %1 to <16 x i8>
+  %5 = bitcast <16 x i8> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <16 x i8>
+  store <16 x i8> %7, <16 x i8>* @llvm_mips_bmz_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_b_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_b_test
+
+@llvm_mips_bmz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bmz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bmz_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bmz_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG3
+  %3 = bitcast <8 x i16> %0 to <16 x i8>
+  %4 = bitcast <8 x i16> %1 to <16 x i8>
+  %5 = bitcast <8 x i16> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  store <8 x i16> %7, <8 x i16>* @llvm_mips_bmz_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_h_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_h_test
+
+@llvm_mips_bmz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bmz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bmz_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bmz_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG3
+  %3 = bitcast <4 x i32> %0 to <16 x i8>
+  %4 = bitcast <4 x i32> %1 to <16 x i8>
+  %5 = bitcast <4 x i32> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  store <4 x i32> %7, <4 x i32>* @llvm_mips_bmz_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_w_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_w_test
+
+@llvm_mips_bmz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bmz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bmz_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bmz_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG3
+  %3 = bitcast <2 x i64> %0 to <16 x i8>
+  %4 = bitcast <2 x i64> %1 to <16 x i8>
+  %5 = bitcast <2 x i64> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  store <2 x i64> %7, <2 x i64>* @llvm_mips_bmz_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bmz_v_d_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
+; ANYENDIAN: .size llvm_mips_bmz_v_d_test
+
+@llvm_mips_bsel_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bsel_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bsel_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_bsel_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_bsel_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG2
+  %2 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG3
+  %3 = bitcast <16 x i8> %0 to <16 x i8>
+  %4 = bitcast <16 x i8> %1 to <16 x i8>
+  %5 = bitcast <16 x i8> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <16 x i8>
+  store <16 x i8> %7, <16 x i8>* @llvm_mips_bsel_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_b_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_b_test
+
+@llvm_mips_bsel_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bsel_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bsel_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_bsel_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_bsel_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG2
+  %2 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG3
+  %3 = bitcast <8 x i16> %0 to <16 x i8>
+  %4 = bitcast <8 x i16> %1 to <16 x i8>
+  %5 = bitcast <8 x i16> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  store <8 x i16> %7, <8 x i16>* @llvm_mips_bsel_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_h_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_h_test
+
+@llvm_mips_bsel_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bsel_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bsel_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_bsel_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_bsel_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG2
+  %2 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG3
+  %3 = bitcast <4 x i32> %0 to <16 x i8>
+  %4 = bitcast <4 x i32> %1 to <16 x i8>
+  %5 = bitcast <4 x i32> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  store <4 x i32> %7, <4 x i32>* @llvm_mips_bsel_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_w_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_w_test
+
+@llvm_mips_bsel_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bsel_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bsel_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_bsel_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_bsel_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG2
+  %2 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG3
+  %3 = bitcast <2 x i64> %0 to <16 x i8>
+  %4 = bitcast <2 x i64> %1 to <16 x i8>
+  %5 = bitcast <2 x i64> %2 to <16 x i8>
+  %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  store <2 x i64> %7, <2 x i64>* @llvm_mips_bsel_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_bsel_v_d_test:
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
+; ANYENDIAN: .size llvm_mips_bsel_v_d_test
+
+@llvm_mips_nor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_nor_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_nor_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_nor_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_nor_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_nor_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_nor_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_b_test
+;
+@llvm_mips_nor_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_nor_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_nor_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_nor_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_nor_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_nor_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_nor_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_h_test
+;
+@llvm_mips_nor_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_nor_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_nor_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_nor_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_nor_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_nor_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_nor_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_w_test
+;
+@llvm_mips_nor_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_nor_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_nor_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_nor_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_nor_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_nor_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_nor_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_nor_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: nor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_nor_v_d_test
+;
+@llvm_mips_or_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_or_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_or_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_or_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_or_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_b_test
+;
+@llvm_mips_or_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_or_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_or_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_or_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_or_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_h_test
+;
+@llvm_mips_or_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_or_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_or_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_or_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_or_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_w_test
+;
+@llvm_mips_or_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_or_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_or_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_or_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_or_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_or_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: or.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_or_v_d_test
+;
+define void @or_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+  %2 = or <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_or_v_b_RES
+  ret void
+}
+
+; CHECK: or_v_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: or.v
+; CHECK: st.b
+; CHECK: .size or_v_b_test
+;
+define void @or_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+  %2 = or <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_or_v_h_RES
+  ret void
+}
+
+; CHECK: or_v_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: or.v
+; CHECK: st.h
+; CHECK: .size or_v_h_test
+;
+
+define void @or_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+  %2 = or <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_or_v_w_RES
+  ret void
+}
+
+; CHECK: or_v_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: or.v
+; CHECK: st.w
+; CHECK: .size or_v_w_test
+;
+
+define void @or_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+  %2 = or <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_or_v_d_RES
+  ret void
+}
+
+; CHECK: or_v_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: or.v
+; CHECK: st.d
+; CHECK: .size or_v_d_test
+;
+@llvm_mips_xor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@llvm_mips_xor_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_xor_v_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+
+define void @llvm_mips_xor_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+  %2 = bitcast <16 x i8> %0 to <16 x i8>
+  %3 = bitcast <16 x i8> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* @llvm_mips_xor_v_b_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_b_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_b_test
+;
+@llvm_mips_xor_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@llvm_mips_xor_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_xor_v_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
+
+define void @llvm_mips_xor_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+  %2 = bitcast <8 x i16> %0 to <16 x i8>
+  %3 = bitcast <8 x i16> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* @llvm_mips_xor_v_h_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_h_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_h_test
+;
+@llvm_mips_xor_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@llvm_mips_xor_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_xor_v_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
+
+define void @llvm_mips_xor_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+  %2 = bitcast <4 x i32> %0 to <16 x i8>
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <4 x i32>
+  store <4 x i32> %5, <4 x i32>* @llvm_mips_xor_v_w_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_w_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_w_test
+;
+@llvm_mips_xor_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
+@llvm_mips_xor_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_xor_v_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
+
+define void @llvm_mips_xor_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+  %2 = bitcast <2 x i64> %0 to <16 x i8>
+  %3 = bitcast <2 x i64> %1 to <16 x i8>
+  %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
+  %5 = bitcast <16 x i8> %4 to <2 x i64>
+  store <2 x i64> %5, <2 x i64>* @llvm_mips_xor_v_d_RES
+  ret void
+}
+
+; ANYENDIAN: llvm_mips_xor_v_d_test:
+; ANYENDIAN: ld.b
+; ANYENDIAN: ld.b
+; ANYENDIAN: xor.v
+; ANYENDIAN: st.b
+; ANYENDIAN: .size llvm_mips_xor_v_d_test
+;
+define void @xor_v_b_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
+  %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+  %2 = xor <16 x i8> %0, %1
+  store <16 x i8> %2, <16 x i8>* @llvm_mips_xor_v_b_RES
+  ret void
+}
+
+; CHECK: xor_v_b_test:
+; CHECK: ld.b
+; CHECK: ld.b
+; CHECK: xor.v
+; CHECK: st.b
+; CHECK: .size xor_v_b_test
+;
+define void @xor_v_h_test() nounwind {
+entry:
+  %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
+  %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+  %2 = xor <8 x i16> %0, %1
+  store <8 x i16> %2, <8 x i16>* @llvm_mips_xor_v_h_RES
+  ret void
+}
+
+; CHECK: xor_v_h_test:
+; CHECK: ld.h
+; CHECK: ld.h
+; CHECK: xor.v
+; CHECK: st.h
+; CHECK: .size xor_v_h_test
+;
+
+define void @xor_v_w_test() nounwind {
+entry:
+  %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
+  %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+  %2 = xor <4 x i32> %0, %1
+  store <4 x i32> %2, <4 x i32>* @llvm_mips_xor_v_w_RES
+  ret void
+}
+
+; CHECK: xor_v_w_test:
+; CHECK: ld.w
+; CHECK: ld.w
+; CHECK: xor.v
+; CHECK: st.w
+; CHECK: .size xor_v_w_test
+;
+
+define void @xor_v_d_test() nounwind {
+entry:
+  %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
+  %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+  %2 = xor <2 x i64> %0, %1
+  store <2 x i64> %2, <2 x i64>* @llvm_mips_xor_v_d_RES
+  ret void
+}
+
+; CHECK: xor_v_d_test:
+; CHECK: ld.d
+; CHECK: ld.d
+; CHECK: xor.v
+; CHECK: st.d
+; CHECK: .size xor_v_d_test
+;
+declare <16 x i8> @llvm.mips.and.v(<16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bmz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bsel.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.nor.v(<16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.or.v(<16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.xor.v(<16 x i8>, <16 x i8>) nounwind
diff --git a/test/CodeGen/Mips/msa/vecs10.ll b/test/CodeGen/Mips/msa/vecs10.ll
new file mode 100644
index 000000000000..e22e0755ef00
--- /dev/null
+++ b/test/CodeGen/Mips/msa/vecs10.ll
@@ -0,0 +1,47 @@
+; Test the MSA intrinsics that are encoded with the VECS10 instruction format.
+
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+
+@llvm_mips_bnz_v_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+define i32 @llvm_mips_bnz_v_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bnz_v_ARG1
+  %1 = tail call i32 @llvm.mips.bnz.v(<16 x i8> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bnz.v(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_bnz_v_test:
+; CHECK-DAG: ld.b [[R0:\$w[0-9]+]]
+; CHECK-DAG: bnz.v [[R0]]
+; CHECK: .size llvm_mips_bnz_v_test
+
+@llvm_mips_bz_v_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+define i32 @llvm_mips_bz_v_test() nounwind {
+entry:
+  %0 = load <16 x i8>* @llvm_mips_bz_v_ARG1
+  %1 = tail call i32 @llvm.mips.bz.v(<16 x i8> %0)
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %true, label %false
+true:
+  ret i32 2
+false:
+  ret i32 3
+}
+
+declare i32 @llvm.mips.bz.v(<16 x i8>) nounwind
+
+; CHECK: llvm_mips_bz_v_test:
+; CHECK-DAG: ld.b [[R0:\$w[0-9]+]]
+; CHECK-DAG: bz.v [[R0]]
+; CHECK: .size llvm_mips_bz_v_test
+;
diff --git a/test/CodeGen/Mips/nomips16.ll b/test/CodeGen/Mips/nomips16.ll
new file mode 100644
index 000000000000..bf7c667d057f
--- /dev/null
+++ b/test/CodeGen/Mips/nomips16.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s 
+
+@x = global float 0.000000e+00, align 4
+@.str = private unnamed_addr constant [20 x i8] c"in main: mips16 %f\0A\00", align 1
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  %0 = load float* @x, align 4
+  %conv = fpext float %0 to double
+  %add = fadd double %conv, 1.500000e+00
+  %conv1 = fptrunc double %add to float
+  store float %conv1, float* @x, align 4
+  ret void
+}
+; CHECK: 	.ent	foo
+; CHECK: 	jal	__mips16_extendsfdf2
+; CHECK:   	.end	foo
+
+; Function Attrs: nounwind
+define void @nofoo() #1 {
+entry:
+  %0 = load float* @x, align 4
+  %conv = fpext float %0 to double
+  %add = fadd double %conv, 3.900000e+00
+  %conv1 = fptrunc double %add to float
+  store float %conv1, float* @x, align 4
+  ret void
+}
+
+; CHECK: 	.ent	nofoo
+; CHECK: 	cvt.d.s	$f{{.+}}, $f{{.+}}
+; CHECK: 	.end	nofoo
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll
index 70b66efee9a8..08e5aab4f7ac 100644
--- a/test/CodeGen/Mips/o32_cc.ll
+++ b/test/CodeGen/Mips/o32_cc.ll
@@ -1,11 +1,12 @@
-; RUN: llc -march=mips < %s | FileCheck %s
-
-; FIXME: Disabled because it unpredictably fails on certain platforms.
-; REQUIRES: disabled
+; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s
+; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=FP32EL %s
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck -check-prefix=FP64EL %s
 
 ; $f12, $f14
-; CHECK: ldc1 $f12, %lo
-; CHECK: ldc1 $f14, %lo
+; CHECK-LABEL: testlowercall0:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: ldc1 $f14, %lo
 define void @testlowercall0() nounwind {
 entry:
   tail call void @f0(double 5.000000e+00, double 6.000000e+00) nounwind
@@ -15,8 +16,9 @@ entry:
 declare void @f0(double, double)
 
 ; $f12, $f14
-; CHECK: lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
+; CHECK-LABEL: testlowercall1:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
 define void @testlowercall1() nounwind {
 entry:
   tail call void @f1(float 8.000000e+00, float 9.000000e+00) nounwind
@@ -26,8 +28,9 @@ entry:
 declare void @f1(float, float)
 
 ; $f12, $f14
-; CHECK: lwc1 $f12, %lo
-; CHECK: ldc1 $f14, %lo
+; CHECK-LABEL: testlowercall2:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: ldc1 $f14, %lo
 define void @testlowercall2() nounwind {
 entry:
   tail call void @f2(float 8.000000e+00, double 6.000000e+00) nounwind
@@ -37,8 +40,9 @@ entry:
 declare void @f2(float, double)
 
 ; $f12, $f14
-; CHECK: ldc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
+; CHECK-LABEL: testlowercall3:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
 define void @testlowercall3() nounwind {
 entry:
   tail call void @f3(double 5.000000e+00, float 9.000000e+00) nounwind
@@ -48,10 +52,11 @@ entry:
 declare void @f3(double, float)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 12
-; CHECK: addiu $5, $zero, 13
-; CHECK: addiu $6, $zero, 14
-; CHECK: addiu $7, $zero, 15
+; CHECK-LABEL: testlowercall4:
+; CHECK-DAG: addiu $4, $zero, 12
+; CHECK-DAG: addiu $5, $zero, 13
+; CHECK-DAG: addiu $6, $zero, 14
+; CHECK-DAG: addiu $7, $zero, 15
 define void @testlowercall4() nounwind {
 entry:
   tail call void @f4(i32 12, i32 13, i32 14, i32 15) nounwind
@@ -61,10 +66,11 @@ entry:
 declare void @f4(i32, i32, i32, i32)
 
 ; $f12, $6, stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 23
+; CHECK-LABEL: testlowercall5:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 23
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall5() nounwind {
 entry:
   tail call void @f5(double 1.500000e+01, i32 23, double 1.700000e+01) nounwind
@@ -74,9 +80,10 @@ entry:
 declare void @f5(double, i32, double)
 
 ; $f12, $6, $7
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 33
-; CHECK: addiu $7, $zero, 24
+; CHECK-LABEL: testlowercall6:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 33
+; CHECK-DAG: addiu $7, $zero, 24
 define void @testlowercall6() nounwind {
 entry:
   tail call void @f6(double 2.500000e+01, i32 33, i32 24) nounwind
@@ -86,9 +93,10 @@ entry:
 declare void @f6(double, i32, i32)
 
 ; $f12, $5, $6
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 43
-; CHECK: addiu $6, $zero, 34
+; CHECK-LABEL: testlowercall7:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 43
+; CHECK-DAG: addiu $6, $zero, 34
 define void @testlowercall7() nounwind {
 entry:
   tail call void @f7(float 1.800000e+01, i32 43, i32 34) nounwind
@@ -98,11 +106,12 @@ entry:
 declare void @f7(float, i32, i32)
 
 ; $4, $5, $6, stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: addiu $4, $zero, 22
-; CHECK: addiu $5, $zero, 53
-; CHECK: addiu $6, $zero, 44
+; CHECK-LABEL: testlowercall8:
+; CHECK-DAG: addiu $4, $zero, 22
+; CHECK-DAG: addiu $5, $zero, 53
+; CHECK-DAG: addiu $6, $zero, 44
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall8() nounwind {
 entry:
   tail call void @f8(i32 22, i32 53, i32 44, double 4.000000e+00) nounwind
@@ -112,10 +121,11 @@ entry:
 declare void @f8(i32, i32, i32, double)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 32
-; CHECK: addiu $5, $zero, 63
-; CHECK: addiu $6, $zero, 54
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall9:
+; CHECK-DAG: addiu $4, $zero, 32
+; CHECK-DAG: addiu $5, $zero, 63
+; CHECK-DAG: addiu $6, $zero, 54
+; CHECK-DAG: lui $7, 16688
 define void @testlowercall9() nounwind {
 entry:
   tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
@@ -125,10 +135,15 @@ entry:
 declare void @f9(i32, i32, i32, float)
 
 ; $4, $5, ($6, $7)
-; CHECK: addiu $4, $zero, 42
-; CHECK: addiu $5, $zero, 73
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall10:
+; CHECK-DAG: addiu $4, $zero, 42
+; CHECK-DAG: addiu $5, $zero, 73
+; FP32EL-LABEL: testlowercall10:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall10:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall10() nounwind {
 entry:
   tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
@@ -138,9 +153,14 @@ entry:
 declare void @f10(i32, i32, double)
 
 ; $4, ($6, $7)
-; CHECK: addiu $4, $zero, 52
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall11:
+; CHECK-DAG: addiu $4, $zero, 52
+; FP32EL-LABEL: testlowercall11:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall11:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall11() nounwind {
 entry:
   tail call void @f11(i32 52, double 1.600000e+01) nounwind
@@ -150,10 +170,11 @@ entry:
 declare void @f11(i32, double)
 
 ; $f12, $f14, $6, $7
-; CHECK: lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: ori $6
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall12:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; CHECK-DAG: lui $6, 16672
+; CHECK-DAG: lui $7, 16808
 define void @testlowercall12() nounwind {
 entry:
   tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
@@ -163,10 +184,11 @@ entry:
 declare void @f12(float, float, float, float)
 
 ; $f12, $5, $6, $7
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 83
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 25
+; CHECK-LABEL: testlowercall13:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 83
+; CHECK-DAG: lui $6, 16800
+; CHECK-DAG: addiu $7, $zero, 25
 define void @testlowercall13() nounwind {
 entry:
   tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
@@ -177,9 +199,10 @@ entry:
 declare void @f13(float, i32, float, i32)
 
 ; $f12, $f14, $7
-; CHECK: ldc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall14:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; CHECK-DAG: lui $7, 16880
 define void @testlowercall14() nounwind {
 entry:
   tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
@@ -189,10 +212,15 @@ entry:
 declare void @f14(double, float, float)
 
 ; $f12, $f14, ($6, $7)
-; CHECK: lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall15:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; FP32EL-LABEL: testlowercall15:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall15:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall15() nounwind {
 entry:
   tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
@@ -202,10 +230,11 @@ entry:
 declare void @f15(float, float, double)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 62
-; CHECK: ori $5
-; CHECK: addiu $6, $zero, 64
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall16:
+; CHECK-DAG: addiu $4, $zero, 62
+; CHECK-DAG: lui $5, 16964
+; CHECK-DAG: addiu $6, $zero, 64
+; CHECK-DAG: lui $7, 16888
 define void @testlowercall16() nounwind {
 entry:
   tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
@@ -215,10 +244,11 @@ entry:
 declare void @f16(i32, float, i32, float)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 72
-; CHECK: ori $5
-; CHECK: addiu $6, $zero, 74
-; CHECK: addiu $7, $zero, 35
+; CHECK-LABEL: testlowercall17:
+; CHECK-DAG: addiu $4, $zero, 72
+; CHECK-DAG: lui $5, 17004
+; CHECK-DAG: addiu $6, $zero, 74
+; CHECK-DAG: addiu $7, $zero, 35
 define void @testlowercall17() nounwind {
 entry:
   tail call void @f17(i32 72, float 5.900000e+01, i32 74, i32 35) nounwind
@@ -228,10 +258,11 @@ entry:
 declare void @f17(i32, float, i32, i32)
 
 ; $4, $5, $6, $7
-; CHECK: addiu $4, $zero, 82
-; CHECK: addiu $5, $zero, 93
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 45
+; CHECK-LABEL: testlowercall18:
+; CHECK-DAG: addiu $4, $zero, 82
+; CHECK-DAG: addiu $5, $zero, 93
+; CHECK-DAG: lui $6, 16928
+; CHECK-DAG: addiu $7, $zero, 45
 define void @testlowercall18() nounwind {
 entry:
   tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
@@ -242,11 +273,16 @@ declare void @f18(i32, i32, float, i32)
 
 
 ; $4, ($6, $7), stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: addiu $4, $zero, 92
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall20:
+; CHECK-DAG: addiu $4, $zero, 92
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
+; FP32EL-LABEL: testlowercall20:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall20:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall20() nounwind {
 entry:
   tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
@@ -256,8 +292,9 @@ entry:
 declare void @f20(i32, double, double)
 
 ; $f12, $5
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 103
+; CHECK-LABEL: testlowercall21:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 103
 define void @testlowercall21() nounwind {
 entry:
   tail call void @f21(float 5.800000e+01, i32 103) nounwind
@@ -267,10 +304,15 @@ entry:
 declare void @f21(float, i32)
 
 ; $f12, $5, ($6, $7)
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 113
-; CHECK: addiu $6, $zero, 0
-; CHECK: ori $7
+; CHECK-LABEL: testlowercall22:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 113
+; FP32EL-LABEL: testlowercall22:
+; FP32EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP32EL-DAG: mfc1 $7, $f{{[0-9]+}}
+; FP64EL-LABEL: testlowercall22:
+; FP64EL-DAG: mfc1 $6, $f{{[0-9]+}}
+; FP64EL-DAG: mfhc1 $7, $f{{[0-9]+}}
 define void @testlowercall22() nounwind {
 entry:
   tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
@@ -280,8 +322,9 @@ entry:
 declare void @f22(float, i32, double)
 
 ; $f12, f6
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 123
+; CHECK-LABEL: testlowercall23:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 123
 define void @testlowercall23() nounwind {
 entry:
   tail call void @f23(double 4.500000e+01, i32 123) nounwind
@@ -291,10 +334,11 @@ entry:
 declare void @f23(double, i32)
 
 ; $f12,$6, stack
-; CHECK: sw
-; CHECK: sw
-; CHECK: ldc1 $f12, %lo
-; CHECK: addiu $6, $zero, 133
+; CHECK-LABEL: testlowercall24:
+; CHECK-DAG: ldc1 $f12, %lo
+; CHECK-DAG: addiu $6, $zero, 133
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 16($sp)
+; CHECK-DAG: sw ${{[a-z0-9]+}}, 20($sp)
 define void @testlowercall24() nounwind {
 entry:
   tail call void @f24(double 5.500000e+01, i32 133, double 6.700000e+01) nounwind
@@ -303,19 +347,19 @@ entry:
 
 declare void @f24(double, i32, double)
 
-; CHECK: lwc1 $f12, %lo
-; lwc1 $f12, %lo
-; CHECK: lwc1 $f14, %lo
-; CHECK: ori $6
-; CHECK: ori $7
-; CHECK: lwc1 $f12, %lo
-; CHECK: addiu $5, $zero, 83
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 25
-; CHECK: addiu $4, $zero, 82
-; CHECK: addiu $5, $zero, 93
-; CHECK: ori $6
-; CHECK: addiu $7, $zero, 45
+; CHECK-LABEL: testlowercall25:
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: lwc1 $f14, %lo
+; CHECK-DAG: lui $6
+; CHECK-DAG: lui $7
+; CHECK-DAG: lwc1 $f12, %lo
+; CHECK-DAG: addiu $5, $zero, 83
+; CHECK-DAG: lui $6
+; CHECK-DAG: addiu $7, $zero, 25
+; CHECK-DAG: addiu $4, $zero, 82
+; CHECK-DAG: addiu $5, $zero, 93
+; CHECK-DAG: lui $6
+; CHECK-DAG: addiu $7, $zero, 45
 define void @testlowercall25() nounwind {
 entry:
   tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index 0a8f85f4825d..5db47acc5a85 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -10,22 +10,23 @@
 
 define void @f1() nounwind {
 entry:
-; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)
-; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
-; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
-; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: sw  $[[R5]], 32($sp)
-; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: sw  $[[R7]], 20($sp)
-; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
-; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
-; CHECK: lw  $7, 4($[[R0]])
+; CHECK-LABEL: f1:
+; CHECK-DAG: lw  $[[R1:[0-9]+]], %got(f1.s1)
+; CHECK-DAG: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK-DAG: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK-DAG: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK-DAG: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK-DAG: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK-DAG: lw  $[[R6:[0-9]+]], 28($[[R0]])
+; CHECK-DAG: sw  $[[R6]], 36($sp)
+; CHECK-DAG: sw  $[[R5]], 32($sp)
+; CHECK-DAG: sw  $[[R4]], 28($sp)
+; CHECK-DAG: sw  $[[R3]], 24($sp)
+; CHECK-DAG: sw  $[[R7]], 20($sp)
+; CHECK-DAG: lw  $[[R2:[0-9]+]], 8($[[R0]])
+; CHECK-DAG: sw  $[[R2]], 16($sp)
+; CHECK-DAG: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK-DAG: lw  $7, 4($[[R0]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
@@ -61,17 +62,17 @@ entry:
 ; CHECK: mfc1 $6, $f[[F0]]
 
   %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
-  %tmp = load i32* %i2, align 4, !tbaa !0
+  %tmp = load i32* %i2, align 4
   %d = getelementptr inbounds %struct.S1* %s1, i32 0, i32 4
-  %tmp1 = load double* %d, align 8, !tbaa !3
+  %tmp1 = load double* %d, align 8
   %ll = getelementptr inbounds %struct.S1* %s1, i32 0, i32 3
-  %tmp2 = load i64* %ll, align 8, !tbaa !4
+  %tmp2 = load i64* %ll, align 8
   %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
-  %tmp3 = load i32* %i, align 4, !tbaa !0
+  %tmp3 = load i32* %i, align 4
   %s = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1
-  %tmp4 = load i16* %s, align 2, !tbaa !5
+  %tmp4 = load i16* %s, align 2
   %c = getelementptr inbounds %struct.S1* %s1, i32 0, i32 0
-  %tmp5 = load i8* %c, align 1, !tbaa !1
+  %tmp5 = load i8* %c, align 1
   tail call void @callee4(i32 %tmp, double %tmp1, i64 %tmp2, i32 %tmp3, i16 signext %tmp4, i8 signext %tmp5, float %f) nounwind
   ret void
 }
@@ -90,9 +91,9 @@ entry:
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
-  %tmp = load i32* %arrayidx, align 4, !tbaa !0
+  %tmp = load i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 3
-  %tmp3 = load i32* %arrayidx2, align 4, !tbaa !0
+  %tmp3 = load i32* %arrayidx2, align 4
   tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp3, i16 signext 4, i8 signext 5, float 6.000000e+00) nounwind
   ret void
 }
@@ -110,11 +111,11 @@ entry:
 ; CHECK: sw  $[[R1]], 24($sp)
 
   %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
-  %tmp = load i32* %i, align 4, !tbaa !0
+  %tmp = load i32* %i, align 4
   %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
-  %tmp1 = load i32* %i2, align 4, !tbaa !0
+  %tmp1 = load i32* %i2, align 4
   %c = getelementptr inbounds %struct.S3* %s3, i32 0, i32 0
-  %tmp2 = load i8* %c, align 1, !tbaa !1
+  %tmp2 = load i8* %c, align 1
   tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp1, i16 signext 4, i8 signext %tmp2, float 6.000000e+00) nounwind
   ret void
 }
@@ -128,10 +129,3 @@ entry:
 }
 
 declare void @f6(%struct.S4* nocapture byval, i64)
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"double", metadata !1}
-!4 = metadata !{metadata !"long long", metadata !1}
-!5 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
index 35332b6550d8..10972e884ac3 100644
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -27,7 +27,7 @@ entry:
   %tmp = load i32* %b, align 4
   ret i32 %tmp
 
-; CHECK: va1:
+; CHECK-LABEL: va1:
 ; CHECK: addiu   $sp, $sp, -16
 ; CHECK: sw      $7, 28($sp)
 ; CHECK: sw      $6, 24($sp)
@@ -53,7 +53,7 @@ entry:
   %tmp = load double* %b, align 8
   ret double %tmp
 
-; CHECK: va2:
+; CHECK-LABEL: va2:
 ; CHECK: addiu   $sp, $sp, -16
 ; CHECK: sw      $7, 28($sp)
 ; CHECK: sw      $6, 24($sp)
@@ -81,7 +81,7 @@ entry:
   %tmp = load i32* %b, align 4
   ret i32 %tmp
 
-; CHECK: va3:
+; CHECK-LABEL: va3:
 ; CHECK: addiu   $sp, $sp, -16
 ; CHECK: sw      $7, 28($sp)
 ; CHECK: sw      $6, 24($sp)
@@ -104,7 +104,7 @@ entry:
   %tmp = load double* %b, align 8
   ret double %tmp
 
-; CHECK: va4:
+; CHECK-LABEL: va4:
 ; CHECK: addiu   $sp, $sp, -24
 ; CHECK: sw      $7, 36($sp)
 ; CHECK: sw      $6, 32($sp)
@@ -132,7 +132,7 @@ entry:
   %tmp = load i32* %d, align 4
   ret i32 %tmp
 
-; CHECK: va5:
+; CHECK-LABEL: va5:
 ; CHECK: addiu   $sp, $sp, -24
 ; CHECK: sw      $7, 36($sp)
 ; CHECK: lw      $2, 36($sp)
@@ -158,7 +158,7 @@ entry:
   %tmp = load double* %d, align 8
   ret double %tmp
 
-; CHECK: va6:
+; CHECK-LABEL: va6:
 ; CHECK: addiu   $sp, $sp, -24
 ; CHECK: sw      $7, 36($sp)
 ; CHECK: addiu   $[[R0:[0-9]+]], $sp, 36
@@ -186,7 +186,7 @@ entry:
   %tmp = load i32* %c, align 4
   ret i32 %tmp
 
-; CHECK: va7:
+; CHECK-LABEL: va7:
 ; CHECK: addiu   $sp, $sp, -24
 ; CHECK: lw      $2, 40($sp)
 }
@@ -209,7 +209,7 @@ entry:
   %tmp = load double* %c, align 8
   ret double %tmp
 
-; CHECK: va8:
+; CHECK-LABEL: va8:
 ; CHECK: addiu   $sp, $sp, -32
 ; CHECK: addiu   ${{[0-9]+}}, $sp, 48
 ; CHECK: ldc1    $f0, 48($sp)
@@ -235,7 +235,7 @@ entry:
   %tmp = load i32* %d, align 4
   ret i32 %tmp
 
-; CHECK: va9:
+; CHECK-LABEL: va9:
 ; CHECK: addiu   $sp, $sp, -32
 ; CHECK: lw      $2, 52($sp)
 }
@@ -260,7 +260,7 @@ entry:
   %tmp = load double* %d, align 8
   ret double %tmp
 
-; CHECK: va10:
+; CHECK-LABEL: va10:
 ; CHECK: addiu   $sp, $sp, -32
 ; CHECK: addiu   $[[R0:[0-9]+]], $sp, 52
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
diff --git a/test/CodeGen/Mips/optimize-fp-math.ll b/test/CodeGen/Mips/optimize-fp-math.ll
new file mode 100644
index 000000000000..8b71dc42344c
--- /dev/null
+++ b/test/CodeGen/Mips/optimize-fp-math.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
+
+; 32-LABEL: test_sqrtf_float_:
+; 32: sqrt.s $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 32: c.un.s $f[[R0]], $f[[R0]]
+; 64-LABEL: test_sqrtf_float_:
+; 64: sqrt.s $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 64: c.un.s $f[[R0]], $f[[R0]]
+
+define float @test_sqrtf_float_(float %a) {
+entry:
+  %call = tail call float @sqrtf(float %a)
+  ret float %call
+}
+
+declare float @sqrtf(float)
+
+; 32-LABEL: test_sqrt_double_:
+; 32: sqrt.d $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 32: c.un.d $f[[R0]], $f[[R0]]
+; 64-LABEL: test_sqrt_double_:
+; 64: sqrt.d $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 64: c.un.d $f[[R0]], $f[[R0]]
+
+define double @test_sqrt_double_(double %a) {
+entry:
+  %call = tail call double @sqrt(double %a)
+  ret double %call
+}
+
+declare double @sqrt(double)
diff --git a/test/CodeGen/Mips/powif64_16.ll b/test/CodeGen/Mips/powif64_16.ll
new file mode 100644
index 000000000000..35a7ca9201e2
--- /dev/null
+++ b/test/CodeGen/Mips/powif64_16.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s 
+
+declare float     @llvm.powi.f32(float  %Val, i32 %power)
+declare double    @llvm.powi.f64(double %Val, i32 %power)
+
+define float @foo_pow_f32(float %y, i32 %p)  {
+  %1 = tail call float @llvm.powi.f32(float %y, i32 %p)
+; CHECK-NOT: .ent	__call_stub_fp_llvm.powi.f32
+; CHECK-NOT: {{.*}} jal llvm.powi.f32
+  ret float %1
+} 
+
+define double @foo_pow_f64(double %y, i32 %p)  {
+  %1 = tail call double @llvm.powi.f64(double %y, i32 %p)
+; CHECK-NOT: .ent	__call_stub_fp_llvm.powi.f64
+; CHECK-NOT: {{.*}} jal llvm.powi.f64 
+  ret double %1
+} 
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind readonly }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index d1a67fd9f4bf..058db0bb977a 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -3,14 +3,14 @@
 ; RUN: llc -march=mips < %s | FileCheck %s
 
 define private void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
   ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: call16($foo)
 ; CHECK: lw $[[R0:[0-9]+]], %got($baz)($
 ; CHECK: lw ${{[0-9]+}}, %lo($baz)($[[R0]])
diff --git a/test/CodeGen/Mips/ra-allocatable.ll b/test/CodeGen/Mips/ra-allocatable.ll
index 76217886829a..afc5cb0c2556 100644
--- a/test/CodeGen/Mips/ra-allocatable.ll
+++ b/test/CodeGen/Mips/ra-allocatable.ll
@@ -98,191 +98,186 @@ entry:
 ; CHECK: lw  $ra, {{[0-9]+}}($sp)            # 4-byte Folded Reload
 ; CHECK: jr  $ra
 
-  %0 = load i32* @a0, align 4, !tbaa !0
-  %1 = load i32** @b0, align 4, !tbaa !3
-  store i32 %0, i32* %1, align 4, !tbaa !0
-  %2 = load i32* @a1, align 4, !tbaa !0
-  %3 = load i32** @b1, align 4, !tbaa !3
-  store i32 %2, i32* %3, align 4, !tbaa !0
-  %4 = load i32* @a2, align 4, !tbaa !0
-  %5 = load i32** @b2, align 4, !tbaa !3
-  store i32 %4, i32* %5, align 4, !tbaa !0
-  %6 = load i32* @a3, align 4, !tbaa !0
-  %7 = load i32** @b3, align 4, !tbaa !3
-  store i32 %6, i32* %7, align 4, !tbaa !0
-  %8 = load i32* @a4, align 4, !tbaa !0
-  %9 = load i32** @b4, align 4, !tbaa !3
-  store i32 %8, i32* %9, align 4, !tbaa !0
-  %10 = load i32* @a5, align 4, !tbaa !0
-  %11 = load i32** @b5, align 4, !tbaa !3
-  store i32 %10, i32* %11, align 4, !tbaa !0
-  %12 = load i32* @a6, align 4, !tbaa !0
-  %13 = load i32** @b6, align 4, !tbaa !3
-  store i32 %12, i32* %13, align 4, !tbaa !0
-  %14 = load i32* @a7, align 4, !tbaa !0
-  %15 = load i32** @b7, align 4, !tbaa !3
-  store i32 %14, i32* %15, align 4, !tbaa !0
-  %16 = load i32* @a8, align 4, !tbaa !0
-  %17 = load i32** @b8, align 4, !tbaa !3
-  store i32 %16, i32* %17, align 4, !tbaa !0
-  %18 = load i32* @a9, align 4, !tbaa !0
-  %19 = load i32** @b9, align 4, !tbaa !3
-  store i32 %18, i32* %19, align 4, !tbaa !0
-  %20 = load i32* @a10, align 4, !tbaa !0
-  %21 = load i32** @b10, align 4, !tbaa !3
-  store i32 %20, i32* %21, align 4, !tbaa !0
-  %22 = load i32* @a11, align 4, !tbaa !0
-  %23 = load i32** @b11, align 4, !tbaa !3
-  store i32 %22, i32* %23, align 4, !tbaa !0
-  %24 = load i32* @a12, align 4, !tbaa !0
-  %25 = load i32** @b12, align 4, !tbaa !3
-  store i32 %24, i32* %25, align 4, !tbaa !0
-  %26 = load i32* @a13, align 4, !tbaa !0
-  %27 = load i32** @b13, align 4, !tbaa !3
-  store i32 %26, i32* %27, align 4, !tbaa !0
-  %28 = load i32* @a14, align 4, !tbaa !0
-  %29 = load i32** @b14, align 4, !tbaa !3
-  store i32 %28, i32* %29, align 4, !tbaa !0
-  %30 = load i32* @a15, align 4, !tbaa !0
-  %31 = load i32** @b15, align 4, !tbaa !3
-  store i32 %30, i32* %31, align 4, !tbaa !0
-  %32 = load i32* @a16, align 4, !tbaa !0
-  %33 = load i32** @b16, align 4, !tbaa !3
-  store i32 %32, i32* %33, align 4, !tbaa !0
-  %34 = load i32* @a17, align 4, !tbaa !0
-  %35 = load i32** @b17, align 4, !tbaa !3
-  store i32 %34, i32* %35, align 4, !tbaa !0
-  %36 = load i32* @a18, align 4, !tbaa !0
-  %37 = load i32** @b18, align 4, !tbaa !3
-  store i32 %36, i32* %37, align 4, !tbaa !0
-  %38 = load i32* @a19, align 4, !tbaa !0
-  %39 = load i32** @b19, align 4, !tbaa !3
-  store i32 %38, i32* %39, align 4, !tbaa !0
-  %40 = load i32* @a20, align 4, !tbaa !0
-  %41 = load i32** @b20, align 4, !tbaa !3
-  store i32 %40, i32* %41, align 4, !tbaa !0
-  %42 = load i32* @a21, align 4, !tbaa !0
-  %43 = load i32** @b21, align 4, !tbaa !3
-  store i32 %42, i32* %43, align 4, !tbaa !0
-  %44 = load i32* @a22, align 4, !tbaa !0
-  %45 = load i32** @b22, align 4, !tbaa !3
-  store i32 %44, i32* %45, align 4, !tbaa !0
-  %46 = load i32* @a23, align 4, !tbaa !0
-  %47 = load i32** @b23, align 4, !tbaa !3
-  store i32 %46, i32* %47, align 4, !tbaa !0
-  %48 = load i32* @a24, align 4, !tbaa !0
-  %49 = load i32** @b24, align 4, !tbaa !3
-  store i32 %48, i32* %49, align 4, !tbaa !0
-  %50 = load i32* @a25, align 4, !tbaa !0
-  %51 = load i32** @b25, align 4, !tbaa !3
-  store i32 %50, i32* %51, align 4, !tbaa !0
-  %52 = load i32* @a26, align 4, !tbaa !0
-  %53 = load i32** @b26, align 4, !tbaa !3
-  store i32 %52, i32* %53, align 4, !tbaa !0
-  %54 = load i32* @a27, align 4, !tbaa !0
-  %55 = load i32** @b27, align 4, !tbaa !3
-  store i32 %54, i32* %55, align 4, !tbaa !0
-  %56 = load i32* @a28, align 4, !tbaa !0
-  %57 = load i32** @b28, align 4, !tbaa !3
-  store i32 %56, i32* %57, align 4, !tbaa !0
-  %58 = load i32* @a29, align 4, !tbaa !0
-  %59 = load i32** @b29, align 4, !tbaa !3
-  store i32 %58, i32* %59, align 4, !tbaa !0
-  %60 = load i32* @a0, align 4, !tbaa !0
-  %61 = load i32** @c0, align 4, !tbaa !3
-  store i32 %60, i32* %61, align 4, !tbaa !0
-  %62 = load i32* @a1, align 4, !tbaa !0
-  %63 = load i32** @c1, align 4, !tbaa !3
-  store i32 %62, i32* %63, align 4, !tbaa !0
-  %64 = load i32* @a2, align 4, !tbaa !0
-  %65 = load i32** @c2, align 4, !tbaa !3
-  store i32 %64, i32* %65, align 4, !tbaa !0
-  %66 = load i32* @a3, align 4, !tbaa !0
-  %67 = load i32** @c3, align 4, !tbaa !3
-  store i32 %66, i32* %67, align 4, !tbaa !0
-  %68 = load i32* @a4, align 4, !tbaa !0
-  %69 = load i32** @c4, align 4, !tbaa !3
-  store i32 %68, i32* %69, align 4, !tbaa !0
-  %70 = load i32* @a5, align 4, !tbaa !0
-  %71 = load i32** @c5, align 4, !tbaa !3
-  store i32 %70, i32* %71, align 4, !tbaa !0
-  %72 = load i32* @a6, align 4, !tbaa !0
-  %73 = load i32** @c6, align 4, !tbaa !3
-  store i32 %72, i32* %73, align 4, !tbaa !0
-  %74 = load i32* @a7, align 4, !tbaa !0
-  %75 = load i32** @c7, align 4, !tbaa !3
-  store i32 %74, i32* %75, align 4, !tbaa !0
-  %76 = load i32* @a8, align 4, !tbaa !0
-  %77 = load i32** @c8, align 4, !tbaa !3
-  store i32 %76, i32* %77, align 4, !tbaa !0
-  %78 = load i32* @a9, align 4, !tbaa !0
-  %79 = load i32** @c9, align 4, !tbaa !3
-  store i32 %78, i32* %79, align 4, !tbaa !0
-  %80 = load i32* @a10, align 4, !tbaa !0
-  %81 = load i32** @c10, align 4, !tbaa !3
-  store i32 %80, i32* %81, align 4, !tbaa !0
-  %82 = load i32* @a11, align 4, !tbaa !0
-  %83 = load i32** @c11, align 4, !tbaa !3
-  store i32 %82, i32* %83, align 4, !tbaa !0
-  %84 = load i32* @a12, align 4, !tbaa !0
-  %85 = load i32** @c12, align 4, !tbaa !3
-  store i32 %84, i32* %85, align 4, !tbaa !0
-  %86 = load i32* @a13, align 4, !tbaa !0
-  %87 = load i32** @c13, align 4, !tbaa !3
-  store i32 %86, i32* %87, align 4, !tbaa !0
-  %88 = load i32* @a14, align 4, !tbaa !0
-  %89 = load i32** @c14, align 4, !tbaa !3
-  store i32 %88, i32* %89, align 4, !tbaa !0
-  %90 = load i32* @a15, align 4, !tbaa !0
-  %91 = load i32** @c15, align 4, !tbaa !3
-  store i32 %90, i32* %91, align 4, !tbaa !0
-  %92 = load i32* @a16, align 4, !tbaa !0
-  %93 = load i32** @c16, align 4, !tbaa !3
-  store i32 %92, i32* %93, align 4, !tbaa !0
-  %94 = load i32* @a17, align 4, !tbaa !0
-  %95 = load i32** @c17, align 4, !tbaa !3
-  store i32 %94, i32* %95, align 4, !tbaa !0
-  %96 = load i32* @a18, align 4, !tbaa !0
-  %97 = load i32** @c18, align 4, !tbaa !3
-  store i32 %96, i32* %97, align 4, !tbaa !0
-  %98 = load i32* @a19, align 4, !tbaa !0
-  %99 = load i32** @c19, align 4, !tbaa !3
-  store i32 %98, i32* %99, align 4, !tbaa !0
-  %100 = load i32* @a20, align 4, !tbaa !0
-  %101 = load i32** @c20, align 4, !tbaa !3
-  store i32 %100, i32* %101, align 4, !tbaa !0
-  %102 = load i32* @a21, align 4, !tbaa !0
-  %103 = load i32** @c21, align 4, !tbaa !3
-  store i32 %102, i32* %103, align 4, !tbaa !0
-  %104 = load i32* @a22, align 4, !tbaa !0
-  %105 = load i32** @c22, align 4, !tbaa !3
-  store i32 %104, i32* %105, align 4, !tbaa !0
-  %106 = load i32* @a23, align 4, !tbaa !0
-  %107 = load i32** @c23, align 4, !tbaa !3
-  store i32 %106, i32* %107, align 4, !tbaa !0
-  %108 = load i32* @a24, align 4, !tbaa !0
-  %109 = load i32** @c24, align 4, !tbaa !3
-  store i32 %108, i32* %109, align 4, !tbaa !0
-  %110 = load i32* @a25, align 4, !tbaa !0
-  %111 = load i32** @c25, align 4, !tbaa !3
-  store i32 %110, i32* %111, align 4, !tbaa !0
-  %112 = load i32* @a26, align 4, !tbaa !0
-  %113 = load i32** @c26, align 4, !tbaa !3
-  store i32 %112, i32* %113, align 4, !tbaa !0
-  %114 = load i32* @a27, align 4, !tbaa !0
-  %115 = load i32** @c27, align 4, !tbaa !3
-  store i32 %114, i32* %115, align 4, !tbaa !0
-  %116 = load i32* @a28, align 4, !tbaa !0
-  %117 = load i32** @c28, align 4, !tbaa !3
-  store i32 %116, i32* %117, align 4, !tbaa !0
-  %118 = load i32* @a29, align 4, !tbaa !0
-  %119 = load i32** @c29, align 4, !tbaa !3
-  store i32 %118, i32* %119, align 4, !tbaa !0
-  %120 = load i32* @a0, align 4, !tbaa !0
+  %0 = load i32* @a0, align 4
+  %1 = load i32** @b0, align 4
+  store i32 %0, i32* %1, align 4
+  %2 = load i32* @a1, align 4
+  %3 = load i32** @b1, align 4
+  store i32 %2, i32* %3, align 4
+  %4 = load i32* @a2, align 4
+  %5 = load i32** @b2, align 4
+  store i32 %4, i32* %5, align 4
+  %6 = load i32* @a3, align 4
+  %7 = load i32** @b3, align 4
+  store i32 %6, i32* %7, align 4
+  %8 = load i32* @a4, align 4
+  %9 = load i32** @b4, align 4
+  store i32 %8, i32* %9, align 4
+  %10 = load i32* @a5, align 4
+  %11 = load i32** @b5, align 4
+  store i32 %10, i32* %11, align 4
+  %12 = load i32* @a6, align 4
+  %13 = load i32** @b6, align 4
+  store i32 %12, i32* %13, align 4
+  %14 = load i32* @a7, align 4
+  %15 = load i32** @b7, align 4
+  store i32 %14, i32* %15, align 4
+  %16 = load i32* @a8, align 4
+  %17 = load i32** @b8, align 4
+  store i32 %16, i32* %17, align 4
+  %18 = load i32* @a9, align 4
+  %19 = load i32** @b9, align 4
+  store i32 %18, i32* %19, align 4
+  %20 = load i32* @a10, align 4
+  %21 = load i32** @b10, align 4
+  store i32 %20, i32* %21, align 4
+  %22 = load i32* @a11, align 4
+  %23 = load i32** @b11, align 4
+  store i32 %22, i32* %23, align 4
+  %24 = load i32* @a12, align 4
+  %25 = load i32** @b12, align 4
+  store i32 %24, i32* %25, align 4
+  %26 = load i32* @a13, align 4
+  %27 = load i32** @b13, align 4
+  store i32 %26, i32* %27, align 4
+  %28 = load i32* @a14, align 4
+  %29 = load i32** @b14, align 4
+  store i32 %28, i32* %29, align 4
+  %30 = load i32* @a15, align 4
+  %31 = load i32** @b15, align 4
+  store i32 %30, i32* %31, align 4
+  %32 = load i32* @a16, align 4
+  %33 = load i32** @b16, align 4
+  store i32 %32, i32* %33, align 4
+  %34 = load i32* @a17, align 4
+  %35 = load i32** @b17, align 4
+  store i32 %34, i32* %35, align 4
+  %36 = load i32* @a18, align 4
+  %37 = load i32** @b18, align 4
+  store i32 %36, i32* %37, align 4
+  %38 = load i32* @a19, align 4
+  %39 = load i32** @b19, align 4
+  store i32 %38, i32* %39, align 4
+  %40 = load i32* @a20, align 4
+  %41 = load i32** @b20, align 4
+  store i32 %40, i32* %41, align 4
+  %42 = load i32* @a21, align 4
+  %43 = load i32** @b21, align 4
+  store i32 %42, i32* %43, align 4
+  %44 = load i32* @a22, align 4
+  %45 = load i32** @b22, align 4
+  store i32 %44, i32* %45, align 4
+  %46 = load i32* @a23, align 4
+  %47 = load i32** @b23, align 4
+  store i32 %46, i32* %47, align 4
+  %48 = load i32* @a24, align 4
+  %49 = load i32** @b24, align 4
+  store i32 %48, i32* %49, align 4
+  %50 = load i32* @a25, align 4
+  %51 = load i32** @b25, align 4
+  store i32 %50, i32* %51, align 4
+  %52 = load i32* @a26, align 4
+  %53 = load i32** @b26, align 4
+  store i32 %52, i32* %53, align 4
+  %54 = load i32* @a27, align 4
+  %55 = load i32** @b27, align 4
+  store i32 %54, i32* %55, align 4
+  %56 = load i32* @a28, align 4
+  %57 = load i32** @b28, align 4
+  store i32 %56, i32* %57, align 4
+  %58 = load i32* @a29, align 4
+  %59 = load i32** @b29, align 4
+  store i32 %58, i32* %59, align 4
+  %60 = load i32* @a0, align 4
+  %61 = load i32** @c0, align 4
+  store i32 %60, i32* %61, align 4
+  %62 = load i32* @a1, align 4
+  %63 = load i32** @c1, align 4
+  store i32 %62, i32* %63, align 4
+  %64 = load i32* @a2, align 4
+  %65 = load i32** @c2, align 4
+  store i32 %64, i32* %65, align 4
+  %66 = load i32* @a3, align 4
+  %67 = load i32** @c3, align 4
+  store i32 %66, i32* %67, align 4
+  %68 = load i32* @a4, align 4
+  %69 = load i32** @c4, align 4
+  store i32 %68, i32* %69, align 4
+  %70 = load i32* @a5, align 4
+  %71 = load i32** @c5, align 4
+  store i32 %70, i32* %71, align 4
+  %72 = load i32* @a6, align 4
+  %73 = load i32** @c6, align 4
+  store i32 %72, i32* %73, align 4
+  %74 = load i32* @a7, align 4
+  %75 = load i32** @c7, align 4
+  store i32 %74, i32* %75, align 4
+  %76 = load i32* @a8, align 4
+  %77 = load i32** @c8, align 4
+  store i32 %76, i32* %77, align 4
+  %78 = load i32* @a9, align 4
+  %79 = load i32** @c9, align 4
+  store i32 %78, i32* %79, align 4
+  %80 = load i32* @a10, align 4
+  %81 = load i32** @c10, align 4
+  store i32 %80, i32* %81, align 4
+  %82 = load i32* @a11, align 4
+  %83 = load i32** @c11, align 4
+  store i32 %82, i32* %83, align 4
+  %84 = load i32* @a12, align 4
+  %85 = load i32** @c12, align 4
+  store i32 %84, i32* %85, align 4
+  %86 = load i32* @a13, align 4
+  %87 = load i32** @c13, align 4
+  store i32 %86, i32* %87, align 4
+  %88 = load i32* @a14, align 4
+  %89 = load i32** @c14, align 4
+  store i32 %88, i32* %89, align 4
+  %90 = load i32* @a15, align 4
+  %91 = load i32** @c15, align 4
+  store i32 %90, i32* %91, align 4
+  %92 = load i32* @a16, align 4
+  %93 = load i32** @c16, align 4
+  store i32 %92, i32* %93, align 4
+  %94 = load i32* @a17, align 4
+  %95 = load i32** @c17, align 4
+  store i32 %94, i32* %95, align 4
+  %96 = load i32* @a18, align 4
+  %97 = load i32** @c18, align 4
+  store i32 %96, i32* %97, align 4
+  %98 = load i32* @a19, align 4
+  %99 = load i32** @c19, align 4
+  store i32 %98, i32* %99, align 4
+  %100 = load i32* @a20, align 4
+  %101 = load i32** @c20, align 4
+  store i32 %100, i32* %101, align 4
+  %102 = load i32* @a21, align 4
+  %103 = load i32** @c21, align 4
+  store i32 %102, i32* %103, align 4
+  %104 = load i32* @a22, align 4
+  %105 = load i32** @c22, align 4
+  store i32 %104, i32* %105, align 4
+  %106 = load i32* @a23, align 4
+  %107 = load i32** @c23, align 4
+  store i32 %106, i32* %107, align 4
+  %108 = load i32* @a24, align 4
+  %109 = load i32** @c24, align 4
+  store i32 %108, i32* %109, align 4
+  %110 = load i32* @a25, align 4
+  %111 = load i32** @c25, align 4
+  store i32 %110, i32* %111, align 4
+  %112 = load i32* @a26, align 4
+  %113 = load i32** @c26, align 4
+  store i32 %112, i32* %113, align 4
+  %114 = load i32* @a27, align 4
+  %115 = load i32** @c27, align 4
+  store i32 %114, i32* %115, align 4
+  %116 = load i32* @a28, align 4
+  %117 = load i32** @c28, align 4
+  store i32 %116, i32* %117, align 4
+  %118 = load i32* @a29, align 4
+  %119 = load i32** @c29, align 4
+  store i32 %118, i32* %119, align 4
+  %120 = load i32* @a0, align 4
   ret i32 %120
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/Mips/return-vector.ll b/test/CodeGen/Mips/return-vector.ll
index 739c43c68a55..0e0d51587541 100644
--- a/test/CodeGen/Mips/return-vector.ll
+++ b/test/CodeGen/Mips/return-vector.ll
@@ -30,7 +30,7 @@ entry:
   %add7 = add i32 %add5, %add6
   ret i32 %add7
 
-; CHECK:        call_i8:
+; CHECK-LABEL:        call_i8:
 ; CHECK:        call16(i8)
 ; CHECK:        addiu   $4, $sp, 32
 ; CHECK:        lw      $[[R0:[a-z0-9]+]], 60($sp)
@@ -56,7 +56,7 @@ entry:
   %add3 = fadd float %add1, %add2
   ret float %add3
 
-; CHECK:        call_f4:
+; CHECK-LABEL:        call_f4:
 ; CHECK:        call16(f4)
 ; CHECK:        addiu   $4, $sp, 16
 ; CHECK:        lwc1    $[[R0:[a-z0-9]+]], 28($sp)
@@ -78,7 +78,7 @@ entry:
   %add3 = fadd double %add1, %add2
   ret double %add3
 
-; CHECK:        call_d4:
+; CHECK-LABEL:        call_d4:
 ; CHECK:        call16(d4)
 ; CHECK:        addiu   $4, $sp, 32
 ; CHECK:        ldc1    $[[R0:[a-z0-9]+]], 56($sp)
@@ -109,7 +109,7 @@ entry:
   %add3 = add i32 %add1, %add2
   ret i32 %add3
 
-; CHECK:        call_i4:
+; CHECK-LABEL:        call_i4:
 ; CHECK:        call16(i4)
 ; CHECK-NOT:    lw
 ; CHECK:        addu    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
@@ -126,7 +126,7 @@ entry:
   %add1 = fadd float %v0, %v1
   ret float %add1
 
-; CHECK:        call_f2:
+; CHECK-LABEL:        call_f2:
 ; CHECK:        call16(f2)
 ; CHECK-NOT:    lwc1
 ; CHECK:        add.s    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
@@ -141,7 +141,7 @@ entry:
   %add1 = fadd double %v0, %v1
   ret double %add1
 
-; CHECK:        call_d2:
+; CHECK-LABEL:        call_d2:
 ; CHECK:        call16(d2)
 ; CHECK-NOT:    ldc1
 ; CHECK:        add.d    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
@@ -158,7 +158,7 @@ define <8 x i32> @return_i8() {
 entry:
   ret <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 
-; CHECK:        return_i8:
+; CHECK-LABEL:        return_i8:
 ; CHECK:        sw      $[[R0:[a-z0-9]+]], 28($4)
 ; CHECK:        sw      $[[R1:[a-z0-9]+]], 24($4)
 ; CHECK:        sw      $[[R2:[a-z0-9]+]], 20($4)
@@ -178,12 +178,12 @@ entry:
   %vecins4 = insertelement <4 x float> %vecins3, float %d, i32 3
   ret <4 x float> %vecins4
 
-; CHECK:        return_f4:
-; CHECK:        lwc1    $[[R0:[a-z0-9]+]], 16($sp)
-; CHECK:        swc1    $[[R0]], 12($4)
-; CHECK:        sw      $7, 8($4)
-; CHECK:        sw      $6, 4($4)
-; CHECK:        sw      $5, 0($4)
+; CHECK-LABEL:        return_f4:
+; CHECK-DAG:    lwc1    $[[R0:[a-z0-9]+]], 16($sp)
+; CHECK-DAG:    swc1    $[[R0]], 12($4)
+; CHECK-DAG:    sw      $7, 8($4)
+; CHECK-DAG:    sw      $6, 4($4)
+; CHECK-DAG:    sw      $5, 0($4)
 }
 
 
@@ -195,11 +195,11 @@ entry:
   %vecins4 = insertelement <4 x double> %vecins3, double %d, i32 3
   ret <4 x double> %vecins4
 
-; CHECK:        return_d4:
-; CHECK:        sdc1    $[[R0:[a-z0-9]+]], 24($4)
-; CHECK:        sdc1    $[[R1:[a-z0-9]+]], 16($4)
-; CHECK:        sdc1    $[[R2:[a-z0-9]+]], 8($4)
-; CHECK:        sdc1    $[[R3:[a-z0-9]+]], 0($4)
+; CHECK-LABEL:            return_d4:
+; CHECK-DAG:        sdc1    $[[R0:[a-z0-9]+]], 24($4)
+; CHECK-DAG:        sdc1    $[[R1:[a-z0-9]+]], 16($4)
+; CHECK-DAG:        sdc1    $[[R2:[a-z0-9]+]], 8($4)
+; CHECK-DAG:        sdc1    $[[R3:[a-z0-9]+]], 0($4)
 }
 
 
@@ -212,7 +212,7 @@ define <4 x i32> @return_i4() {
 entry:
   ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 
-; CHECK:        return_i4:
+; CHECK-LABEL:        return_i4:
 ; CHECK:        addiu   $2, $zero, 0
 ; CHECK:        addiu   $3, $zero, 1
 ; CHECK:        addiu   $4, $zero, 2
@@ -226,7 +226,7 @@ entry:
   %vecins2 = insertelement <2 x float> %vecins1, float %b, i32 1
   ret <2 x float> %vecins2
 
-; CHECK:        return_f2:
+; CHECK-LABEL:        return_f2:
 ; CHECK:        mov.s   $f0, $f12
 ; CHECK:        mov.s   $f2, $f14
 }
@@ -238,7 +238,7 @@ entry:
   %vecins2 = insertelement <2 x double> %vecins1, double %b, i32 1
   ret <2 x double> %vecins2
 
-; CHECK:        return_d2:
+; CHECK-LABEL:        return_d2:
 ; CHECK:        mov.d   $f0, $f12
 ; CHECK:        mov.d   $f2, $f14
 }
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
index 4f3cfb7df41c..813bbdf18bbd 100644
--- a/test/CodeGen/Mips/rotate.ll
+++ b/test/CodeGen/Mips/rotate.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float   < %s | FileCheck %s -check-prefix=mips16 
 
 ; CHECK:  rotrv $2, $4
+; mips16: .ent rot0
 define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
 entry:
   %shl = shl i32 %a, %b
@@ -11,6 +13,7 @@ entry:
 }
 
 ; CHECK:  rotr  $2, $4, 22
+; mips16: .ent rot1
 define i32 @rot1(i32 %a) nounwind readnone {
 entry:
   %shl = shl i32 %a, 10
@@ -20,6 +23,7 @@ entry:
 }
 
 ; CHECK:  rotrv $2, $4, $5
+; mips16: .ent rot2
 define i32 @rot2(i32 %a, i32 %b) nounwind readnone {
 entry:
   %shr = lshr i32 %a, %b
@@ -30,6 +34,7 @@ entry:
 }
 
 ; CHECK:  rotr  $2, $4, 10
+; mips16: .ent rot3
 define i32 @rot3(i32 %a) nounwind readnone {
 entry:
   %shr = lshr i32 %a, 10
diff --git a/test/CodeGen/Mips/sel1c.ll b/test/CodeGen/Mips/sel1c.ll
new file mode 100644
index 000000000000..4c4784de6aa8
--- /dev/null
+++ b/test/CodeGen/Mips/sel1c.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define void @t() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %cmp = icmp eq i32 %0, %1
+  %cond = select i1 %cmp, i32 1, i32 2
+  store i32 %cond, i32* @k, align 4
+  ret void
+; cond-b-short:	bteqz	$BB0_{{[0-9]+}}  # 16 bit inst
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/sel2c.ll b/test/CodeGen/Mips/sel2c.ll
new file mode 100644
index 000000000000..25dfaa9ba87e
--- /dev/null
+++ b/test/CodeGen/Mips/sel2c.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@k = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize
+define void @t() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %cmp = icmp ne i32 %0, %1
+  %cond = select i1 %cmp, i32 1, i32 2
+  store i32 %cond, i32* @k, align 4
+; cond-b-short:	btnez	$BB0_{{[0-9]+}}  # 16 bit inst
+  ret void
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/selectcc.ll b/test/CodeGen/Mips/selectcc.ll
index a17517e7d145..aeef60ecb806 100644
--- a/test/CodeGen/Mips/selectcc.ll
+++ b/test/CodeGen/Mips/selectcc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -pre-RA-sched=source < %s | FileCheck %s --check-prefix=SOURCE-SCHED
 
 @gf0 = external global float
 @gf1 = external global float
@@ -7,6 +8,21 @@
 
 define float @select_cc_f32(float %a, float %b) nounwind {
 entry:
+; SOURCE-SCHED: lui
+; SOURCE-SCHED: addiu
+; SOURCE-SCHED: addu
+; SOURCE-SCHED: lw
+; SOURCE-SCHED: sw
+; SOURCE-SCHED: lw
+; SOURCE-SCHED: lui
+; SOURCE-SCHED: sw
+; SOURCE-SCHED: addiu
+; SOURCE-SCHED: addiu
+; SOURCE-SCHED: c.olt.s
+; SOURCE-SCHED: movt
+; SOURCE-SCHED: mtc1
+; SOURCE-SCHED: jr
+
   store float 0.000000e+00, float* @gf0, align 4
   store float 1.000000e+00, float* @gf1, align 4
   %cmp = fcmp olt float %a, %b
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
index 26015523106d..64834b256fe5 100644
--- a/test/CodeGen/Mips/selnek.ll
+++ b/test/CodeGen/Mips/selnek.ll
@@ -104,4 +104,4 @@ attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
 ; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 
 ; 16:	cmpi	${{[0-9]+}}, 1000
-; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
-\ No newline at end of file
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
diff --git a/test/CodeGen/Mips/setcc-se.ll b/test/CodeGen/Mips/setcc-se.ll
new file mode 100644
index 000000000000..99071c42f169
--- /dev/null
+++ b/test/CodeGen/Mips/setcc-se.ll
@@ -0,0 +1,155 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g1 = external global i32
+
+; CHECK-LABEL: seteq0:
+; CHECK: sltiu ${{[0-9]+}}, $4, 1
+
+define i32 @seteq0(i32 %a) {
+entry:
+  %cmp = icmp eq i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: setne0:
+; CHECK: sltu ${{[0-9]+}}, $zero, $4
+
+define i32 @setne0(i32 %a) {
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: slti_beq0:
+; CHECK: slti $[[R0:[0-9]+]], $4, -32768
+; CHECK: beqz $[[R0]]
+
+define void @slti_beq0(i32 %a) {
+entry:
+  %cmp = icmp slt i32 %a, -32768
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: slti_beq1:
+; CHECK: slt ${{[0-9]+}}
+
+define void @slti_beq1(i32 %a) {
+entry:
+  %cmp = icmp slt i32 %a, -32769
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: slti_beq2:
+; CHECK: slti $[[R0:[0-9]+]], $4, 32767
+; CHECK: beqz $[[R0]]
+
+define void @slti_beq2(i32 %a) {
+entry:
+  %cmp = icmp slt i32 %a, 32767
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: slti_beq3:
+; CHECK: slt ${{[0-9]+}}
+
+define void @slti_beq3(i32 %a) {
+entry:
+  %cmp = icmp slt i32 %a, 32768
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: sltiu_beq0:
+; CHECK: sltiu $[[R0:[0-9]+]], $4, 32767
+; CHECK: beqz $[[R0]]
+
+define void @sltiu_beq0(i32 %a) {
+entry:
+  %cmp = icmp ult i32 %a, 32767
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: sltiu_beq1:
+; CHECK: sltu ${{[0-9]+}}
+
+define void @sltiu_beq1(i32 %a) {
+entry:
+  %cmp = icmp ult i32 %a, 32768
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: sltiu_beq2:
+; CHECK: sltiu $[[R0:[0-9]+]], $4, -32768
+; CHECK: beqz $[[R0]]
+
+define void @sltiu_beq2(i32 %a) {
+entry:
+  %cmp = icmp ult i32 %a, -32768
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: sltiu_beq3:
+; CHECK: sltu ${{[0-9]+}}
+
+define void @sltiu_beq3(i32 %a) {
+entry:
+  %cmp = icmp ult i32 %a, -32769
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @g1, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/Mips/simplebr.ll b/test/CodeGen/Mips/simplebr.ll
new file mode 100644
index 000000000000..a1d63671b4ee
--- /dev/null
+++ b/test/CodeGen/Mips/simplebr.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+; ModuleID = 'simplebr.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+@i = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  call void bitcast (void (...)* @goo to void ()*)()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  call void bitcast (void (...)* @hoo to void ()*)()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; CHECK-STATIC16:	b	$BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst
+
+declare void @goo(...) #1
+
+declare void @hoo(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+
+
diff --git a/test/CodeGen/Mips/sint-fp-store_pattern.ll b/test/CodeGen/Mips/sint-fp-store_pattern.ll
new file mode 100644
index 000000000000..c44ea080a886
--- /dev/null
+++ b/test/CodeGen/Mips/sint-fp-store_pattern.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
+
+@gint_ = external global i32
+@gLL_ = external global i64
+
+; 32-LABEL: store_int_float_:
+; 32: trunc.w.s $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 32: swc1 $f[[R0]],
+
+define void @store_int_float_(float %a) {
+entry:
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* @gint_, align 4
+  ret void
+}
+
+; 32-LABEL: store_int_double_:
+; 32: trunc.w.d $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 32: swc1 $f[[R0]],
+; 64-LABEL: store_int_double_:
+; 64: trunc.w.d $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 64: swc1 $f[[R0]],
+
+define void @store_int_double_(double %a) {
+entry:
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* @gint_, align 4
+  ret void
+}
+
+; 64-LABEL: store_LL_float_:
+; 64: trunc.l.s $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 64: sdc1 $f[[R0]],
+
+define void @store_LL_float_(float %a) {
+entry:
+  %conv = fptosi float %a to i64
+  store i64 %conv, i64* @gLL_, align 8
+  ret void
+}
+
+; 64-LABEL: store_LL_double_:
+; 64: trunc.l.d $f[[R0:[0-9]+]], $f{{[0-9]+}}
+; 64: sdc1 $f[[R0]],
+
+define void @store_LL_double_(double %a) {
+entry:
+  %conv = fptosi double %a to i64
+  store i64 %conv, i64* @gLL_, align 8
+  ret void
+}
diff --git a/test/CodeGen/Mips/stack-alignment.ll b/test/CodeGen/Mips/stack-alignment.ll
new file mode 100644
index 000000000000..b18f96695ff5
--- /dev/null
+++ b/test/CodeGen/Mips/stack-alignment.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mipsel -mattr=+fp64 < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
+
+; 32:      addiu  $sp, $sp, -8
+; 64:      addiu  $sp, $sp, -16
+
+define i32 @foo1() #0 {
+entry:
+  ret i32 14
+}
+
+attributes #0 = { "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/Mips/stackcoloring.ll b/test/CodeGen/Mips/stackcoloring.ll
new file mode 100644
index 000000000000..4987dad5338b
--- /dev/null
+++ b/test/CodeGen/Mips/stackcoloring.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g1 = external global i32*
+
+; CHECK-LABEL: foo1:
+; CHECK: lw ${{[0-9]+}}, %got(g1)
+; CHECK: # %for.body
+; CHECK: # %for.end
+
+define i32 @foo1() {
+entry:
+  %b = alloca [16 x i32], align 4
+  %0 = bitcast [16 x i32]* %b to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %0)
+  %arraydecay = getelementptr inbounds [16 x i32]* %b, i32 0, i32 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %v.04 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %1 = load i32** @g1, align 4
+  %arrayidx = getelementptr inbounds i32* %1, i32 %i.05
+  %2 = load i32* %arrayidx, align 4
+  %call = call i32 @foo2(i32 %2, i32* %arraydecay)
+  %add = add nsw i32 %call, %v.04
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  call void @llvm.lifetime.end(i64 64, i8* %0)
+  ret i32 %add
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare i32 @foo2(i32, i32*)
+
+declare void @llvm.lifetime.end(i64, i8* nocapture)
diff --git a/test/CodeGen/Mips/stchar.ll b/test/CodeGen/Mips/stchar.ll
index c00c9fd9d2a1..12eae3487ff1 100644
--- a/test/CodeGen/Mips/stchar.ll
+++ b/test/CodeGen/Mips/stchar.ll
@@ -50,8 +50,8 @@ entry:
   %conv1.i = sext i8 %3 to i32
   %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
   ret void
-; 16_b: test:
-; 16_h: test:
+; 16_b-LABEL: test:
+; 16_h-LABEL: test:
 ; 16_b:	sb	${{[0-9]+}}, [[offset1:[0-9]+]](${{[0-9]+}})
 ; 16_b: lb      ${{[0-9]+}}, [[offset1]](${{[0-9]+}})
 ; 16_h:	sh	${{[0-9]+}}, [[offset2:[0-9]+]](${{[0-9]+}})
diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll
index bcd33fca70ed..30f47abc06cb 100644
--- a/test/CodeGen/Mips/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall.ll
@@ -243,3 +243,16 @@ entry:
   ret i32 %call
 }
 
+; Check that there is a chain edge between the load and store nodes.
+;
+; PIC32-LABEL: caller14:
+; PIC32: lw ${{[0-9]+}}, 16($sp)
+; PIC32: sw $4, 16($sp)
+
+define void @caller14(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+  tail call void @callee14(i32 %e, i32 %b, i32 %c, i32 %d, i32 %a)
+  ret void
+}
+
+declare void @callee14(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
index ce98cc826223..3c810542cca3 100644
--- a/test/CodeGen/Mips/tls-alias.ll
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -4,7 +4,7 @@
 @bar = hidden alias i32* @foo
 
 define i32* @zed() {
-; CHECK: __tls_get_addr
-; CHECK-NEXT: %tlsgd(bar)
+; CHECK-DAG: __tls_get_addr
+; CHECK-DAG: %tlsgd(bar)
        ret i32* @bar
 }
diff --git a/test/CodeGen/Mips/tls-models.ll b/test/CodeGen/Mips/tls-models.ll
index 8f5789ec7995..1a958dceaa28 100644
--- a/test/CodeGen/Mips/tls-models.ll
+++ b/test/CodeGen/Mips/tls-models.ll
@@ -20,9 +20,9 @@ entry:
   ret i32* @external_gd
 
   ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
-  ; CHECK-NONPIC:   f1:
+  ; CHECK-NONPIC-LABEL:   f1:
   ; CHECK-NONPIC:   %gottprel
-  ; CHECK-PIC:      f1:
+  ; CHECK-PIC-LABEL:      f1:
   ; CHECK-PIC:      %tlsgd
 }
 
@@ -31,9 +31,9 @@ entry:
   ret i32* @internal_gd
 
   ; Non-PIC code can use local exec, PIC code can use local dynamic.
-  ; CHECK-NONPIC:   f2:
+  ; CHECK-NONPIC-LABEL:   f2:
   ; CHECK-NONPIC:   %tprel_hi
-  ; CHECK-PIC:      f2:
+  ; CHECK-PIC-LABEL:      f2:
   ; CHECK-PIC:      %tlsldm
 }
 
@@ -45,9 +45,9 @@ entry:
   ret i32* @external_ld
 
   ; Non-PIC code can use initial exec, PIC should use local dynamic.
-  ; CHECK-NONPIC:   f3:
+  ; CHECK-NONPIC-LABEL:   f3:
   ; CHECK-NONPIC:   %gottprel
-  ; CHECK-PIC:      f3:
+  ; CHECK-PIC-LABEL:      f3:
   ; CHECK-PIC:      %tlsldm
 }
 
@@ -56,9 +56,9 @@ entry:
   ret i32* @internal_ld
 
   ; Non-PIC code can use local exec, PIC code can use local dynamic.
-  ; CHECK-NONPIC:   f4:
+  ; CHECK-NONPIC-LABEL:   f4:
   ; CHECK-NONPIC:   %tprel_hi
-  ; CHECK-PIC:      f4:
+  ; CHECK-PIC-LABEL:      f4:
   ; CHECK-PIC:      %tlsldm
 }
 
@@ -70,9 +70,9 @@ entry:
   ret i32* @external_ie
 
   ; Non-PIC and PIC code will use initial exec as specified.
-  ; CHECK-NONPIC:   f5:
+  ; CHECK-NONPIC-LABEL:   f5:
   ; CHECK-NONPIC:   %gottprel
-  ; CHECK-PIC:      f5:
+  ; CHECK-PIC-LABEL:      f5:
   ; CHECK-PIC:      %gottprel
 }
 
@@ -81,9 +81,9 @@ entry:
   ret i32* @internal_ie
 
   ; Non-PIC code can use local exec, PIC code use initial exec as specified.
-  ; CHECK-NONPIC:   f6:
+  ; CHECK-NONPIC-LABEL:   f6:
   ; CHECK-NONPIC:   %tprel_hi
-  ; CHECK-PIC:      f6:
+  ; CHECK-PIC-LABEL:      f6:
   ; CHECK-PIC:      %gottprel
 }
 
@@ -95,9 +95,9 @@ entry:
   ret i32* @external_le
 
   ; Non-PIC and PIC code will use local exec as specified.
-  ; CHECK-NONPIC:   f7:
+  ; CHECK-NONPIC-LABEL:   f7:
   ; CHECK-NONPIC:   %tprel_hi
-  ; CHECK-PIC:      f7:
+  ; CHECK-PIC-LABEL:      f7:
   ; CHECK-PIC:      %tprel_hi
 }
 
@@ -106,8 +106,8 @@ entry:
   ret i32* @internal_le
 
   ; Non-PIC and PIC code will use local exec as specified.
-  ; CHECK-NONPIC:   f8:
+  ; CHECK-NONPIC-LABEL:   f8:
   ; CHECK-NONPIC:   %tprel_hi
-  ; CHECK-PIC:      f8:
+  ; CHECK-PIC-LABEL:      f8:
   ; CHECK-PIC:      %tprel_hi
 }
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index b86d25e5e5e8..23a8f93a9d7c 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -13,14 +13,14 @@ entry:
   %tmp = load i32* @t1, align 4
   ret i32 %tmp
 
-; CHECK: f1:
-
-; PIC:   addu    $[[R0:[a-z0-9]+]], $2, $25
-; PIC:   lw      $25, %call16(__tls_get_addr)($[[R0]])
-; PIC:   addiu   $4, $[[R0]], %tlsgd(t1)
-; PIC:   jalr    $25
-; PIC:   lw      $2, 0($2)
-
+; PIC-LABEL:       f1:
+; PIC-DAG:   addu    $[[R0:[a-z0-9]+]], $2, $25
+; PIC-DAG:   lw      $25, %call16(__tls_get_addr)($[[R0]])
+; PIC-DAG:   addiu   $4, $[[R0]], %tlsgd(t1)
+; PIC-DAG:   jalr    $25
+; PIC-DAG:   lw      $2, 0($2)
+
+; STATIC-LABEL:   f1:
 ; STATIC:   lui     $[[R0:[0-9]+]], %tprel_hi(t1)
 ; STATIC:   addiu   $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
 ; STATIC:   rdhwr   $3, $29
@@ -36,17 +36,19 @@ entry:
   %tmp = load i32* @t2, align 4
   ret i32 %tmp
 
-; CHECK: f2:
-
-; PIC:   addu    $[[R0:[a-z0-9]+]], $2, $25
-; PIC:   lw      $25, %call16(__tls_get_addr)($[[R0]])
-; PIC:   addiu   $4, $[[R0]], %tlsgd(t2)
-; PIC:   jalr    $25
-; PIC:   lw      $2, 0($2)
+; PIC-LABEL:       f2:
+; PIC-DAG:   addu    $[[R0:[a-z0-9]+]], $2, $25
+; PIC-DAG:   lw      $25, %call16(__tls_get_addr)($[[R0]])
+; PIC-DAG:   addiu   $4, $[[R0]], %tlsgd(t2)
+; PIC-DAG:   jalr    $25
+; PIC-DAG:   lw      $2, 0($2)
 
+; STATICGP-LABEL: f2:
 ; STATICGP: lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
 ; STATICGP: addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
 ; STATICGP: lw      ${{[0-9]+}}, %gottprel(t2)($[[GP]])
+
+; STATIC-LABEL:   f2:
 ; STATIC:   lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
 ; STATIC:   addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
 ; STATIC:   rdhwr   $3, $29
@@ -59,7 +61,7 @@ entry:
 
 define i32 @f3() nounwind {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 
 ; PIC:   addiu   $4, ${{[a-z0-9]+}}, %tlsldm(f3.i)
 ; PIC:   jalr    $25
diff --git a/test/CodeGen/Mips/tnaked.ll b/test/CodeGen/Mips/tnaked.ll
index f5bdd915b28c..08f1ab5be86e 100644
--- a/test/CodeGen/Mips/tnaked.ll
+++ b/test/CodeGen/Mips/tnaked.ll
@@ -7,7 +7,7 @@ entry:
 }
 
 ; CHECK: 	.ent	tnaked
-; CHECK:          tnaked: 
+; CHECK-LABEL:          tnaked:
 ; CHECK-NOT:	.frame	{{.*}}
 ; CHECK-NOT:     .mask 	{{.*}}
 ; CHECK-NOT:	.fmask	{{.*}}
@@ -19,11 +19,11 @@ entry:
 }
 
 ; CHECK: 	.ent	tnonaked
-; CHECK:         tnonaked: 
+; CHECK-LABEL:         tnonaked:
 ; CHECK:	.frame	$fp,8,$ra
 ; CHECK:        .mask 	0x40000000,-4
 ; CHECK:	.fmask	0x00000000,0
 ; CHECK: 	addiu	$sp, $sp, -8
 
-attributes #0 = { naked noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { naked noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/trap.ll b/test/CodeGen/Mips/trap.ll
new file mode 100644
index 000000000000..beb4b894632b
--- /dev/null
+++ b/test/CodeGen/Mips/trap.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+
+declare void @llvm.trap()
+
+define void @f1() {
+entry:
+  call void @llvm.trap()
+  unreachable
+
+; CHECK:        break
+}
diff --git a/test/CodeGen/Mips/trap1.ll b/test/CodeGen/Mips/trap1.ll
new file mode 100644
index 000000000000..bfcd7fed30d9
--- /dev/null
+++ b/test/CodeGen/Mips/trap1.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+
+declare void @llvm.trap()
+
+; Function Attrs: nounwind optsize readnone
+define i32 @main()  {
+entry:
+  call void @llvm.trap()
+  unreachable
+; pic: break 0
+  ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 7f880b6fe379..19f3af7f344a 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -9,17 +9,17 @@
 
 define void @foo1() nounwind {
 entry:
-; CHECK-EL: lbu ${{[0-9]+}}, 2($[[R0:[0-9]+]])
-; CHECK-EL: lbu ${{[0-9]+}}, 3($[[R0]])
-; CHECK-EL: jalr
-; CHECK-EL: lwl $[[R1:[0-9]+]], 3($[[R2:[0-9]+]])
-; CHECK-EL: lwr $[[R1]], 0($[[R2]])
+; CHECK-EL-DAG: lbu ${{[0-9]+}}, 2($[[R0:[0-9]+]])
+; CHECK-EL-DAG: lbu ${{[0-9]+}}, 3($[[R0]])
+; CHECK-EL:     jalr
+; CHECK-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[R2:[0-9]+]])
+; CHECK-EL-DAG: lwr $[[R1]], 0($[[R2]])
 
-; CHECK-EB: lbu ${{[0-9]+}}, 3($[[R0:[0-9]+]])
-; CHECK-EB: lbu ${{[0-9]+}}, 2($[[R0]])
-; CHECK-EB: jalr
-; CHECK-EB: lwl $[[R1:[0-9]+]], 0($[[R2:[0-9]+]])
-; CHECK-BE: lwr $[[R1]], 3($[[R2]])
+; CHECK-EB-DAG: lbu ${{[0-9]+}}, 3($[[R0:[0-9]+]])
+; CHECK-EB-DAG: lbu ${{[0-9]+}}, 2($[[R0]])
+; CHECK-EB:     jalr
+; CHECK-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[R2:[0-9]+]])
+; CHECK-EB-DAG: lwr $[[R1]], 3($[[R2]])
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
   tail call void @foo4(%struct.S4* byval @s4) nounwind
diff --git a/test/CodeGen/NVPTX/add-128bit.ll b/test/CodeGen/NVPTX/add-128bit.ll
new file mode 100644
index 000000000000..29e3cdffae7b
--- /dev/null
+++ b/test/CodeGen/NVPTX/add-128bit.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+
+define void @foo(i64 %a, i64 %add, i128* %retptr) {
+; CHECK:        add.s64
+; CHECK:        setp.lt.u64
+; CHECK:        setp.lt.u64
+; CHECK:        selp.b64
+; CHECK:        selp.b64
+; CHECK:        add.s64
+  %t1 = sext i64 %a to i128
+  %add2 = zext i64 %add to i128
+  %val = add i128 %t1, %add2
+  store i128 %val, i128* %retptr
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/bug17709.ll b/test/CodeGen/NVPTX/bug17709.ll
new file mode 100644
index 000000000000..92f0fcb11e41
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug17709.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; ModuleID = '__kernelgen_main_module'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define linker_private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {
+entry:
+  ;unreachable
+  %t0 = insertvalue {double, double} undef, double 1.0, 0
+  %t1 = insertvalue {double, double} %t0, double 1.0, 1
+  ret { double, double } %t1
+}
+
+%struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
+%"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+%"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+@replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
+
+; CHECK: .visible .entry __kernelgen_main
+define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {
+entry:
+  %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/callchain.ll b/test/CodeGen/NVPTX/callchain.ll
new file mode 100644
index 000000000000..60b118b6a199
--- /dev/null
+++ b/test/CodeGen/NVPTX/callchain.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx"
+
+define void @foo(i8* %ptr) {
+  %fnptr = bitcast i8* %ptr to void ()*
+; CHECK: prototype_0 : .callprototype ()_ ()
+  tail call void %fnptr()
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll
index 16af0a336ddc..c595f215f6f1 100644
--- a/test/CodeGen/NVPTX/compare-int.ll
+++ b/test/CodeGen/NVPTX/compare-int.ll
@@ -195,7 +195,7 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) {
 
 define i16 @icmp_eq_i16(i16 %a, i16 %b) {
 ; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp eq i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -204,7 +204,7 @@ define i16 @icmp_eq_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_ne_i16(i16 %a, i16 %b) {
 ; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ne i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -213,7 +213,7 @@ define i16 @icmp_ne_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
 ; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ugt i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -222,7 +222,7 @@ define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_uge_i16(i16 %a, i16 %b) {
 ; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp uge i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -231,7 +231,7 @@ define i16 @icmp_uge_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_ult_i16(i16 %a, i16 %b) {
 ; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ult i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -240,7 +240,7 @@ define i16 @icmp_ult_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_ule_i16(i16 %a, i16 %b) {
 ; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ule i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -249,7 +249,7 @@ define i16 @icmp_ule_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
 ; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp sgt i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -258,7 +258,7 @@ define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_sge_i16(i16 %a, i16 %b) {
 ; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp sge i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -267,7 +267,7 @@ define i16 @icmp_sge_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_slt_i16(i16 %a, i16 %b) {
 ; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp slt i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -276,7 +276,7 @@ define i16 @icmp_slt_i16(i16 %a, i16 %b) {
 
 define i16 @icmp_sle_i16(i16 %a, i16 %b) {
 ; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp sle i16 %a, %b
   %ret = zext i1 %cmp to i16
@@ -288,8 +288,8 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
 
 define i8 @icmp_eq_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp eq i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -298,8 +298,8 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_ne_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ne i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -308,8 +308,8 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ugt i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -318,8 +318,8 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_uge_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp uge i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -328,8 +328,8 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_ult_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ult i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -338,8 +338,8 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_ule_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ule i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -348,8 +348,8 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp sgt i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -358,8 +358,8 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_sge_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp sge i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -368,8 +368,8 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_slt_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp slt i8 %a, %b
   %ret = zext i1 %cmp to i8
@@ -378,8 +378,8 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_sle_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp sle i8 %a, %b
   %ret = zext i1 %cmp to i8
diff --git a/test/CodeGen/NVPTX/constant-vectors.ll b/test/CodeGen/NVPTX/constant-vectors.ll
new file mode 100644
index 000000000000..208c2d970f31
--- /dev/null
+++ b/test/CodeGen/NVPTX/constant-vectors.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-nvidia-cuda"
+
+; CHECK: .visible .global .align 16 .b8 testArray[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+@testArray = constant [2 x <4 x i8>] [<4 x i8> <i8 0, i8 1, i8 2, i8 3>, <4 x i8> <i8 4, i8 5, i8 6, i8 7>], align 16
diff --git a/test/CodeGen/NVPTX/convert-int-sm20.ll b/test/CodeGen/NVPTX/convert-int-sm20.ll
index fad240e03d2a..227cd31e11b3 100644
--- a/test/CodeGen/NVPTX/convert-int-sm20.ll
+++ b/test/CodeGen/NVPTX/convert-int-sm20.ll
@@ -8,16 +8,16 @@
 ; i16
 
 define i16 @cvt_i16_i32(i32 %x) {
-; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
-; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
+; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}]
+; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
 ; CHECK: ret
   %a = trunc i32 %x to i16
   ret i16 %a
 }
 
 define i16 @cvt_i16_i64(i64 %x) {
-; CHECK: ld.param.u16 %rs[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
-; CHECK: st.param.b16 [func_retval{{[0-9]+}}+0], %rs[[R0]]
+; CHECK: ld.param.u16 %r[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}]
+; CHECK: st.param.b32 [func_retval{{[0-9]+}}+0], %r[[R0]]
 ; CHECK: ret
   %a = trunc i64 %x to i16
   ret i16 %a
diff --git a/test/CodeGen/NVPTX/ctlz.ll b/test/CodeGen/NVPTX/ctlz.ll
new file mode 100644
index 000000000000..bed15a9f6a54
--- /dev/null
+++ b/test/CodeGen/NVPTX/ctlz.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+declare i16 @llvm.ctlz.i16(i16, i1) readnone
+declare i32 @llvm.ctlz.i32(i32, i1) readnone
+declare i64 @llvm.ctlz.i64(i64, i1) readnone
+
+define i32 @myctpop(i32 %a) {
+; CHECK: clz.b32
+  %val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
+  ret i32 %val
+}
+
+define i16 @myctpop16(i16 %a) {
+; CHECK: clz.b32
+  %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
+  ret i16 %val
+}
+
+define i64 @myctpop64(i64 %a) {
+; CHECK: clz.b64
+  %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
+  ret i64 %val
+}
+
+
+define i32 @myctpop_2(i32 %a) {
+; CHECK: clz.b32
+  %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
+  ret i32 %val
+}
+
+define i16 @myctpop16_2(i16 %a) {
+; CHECK: clz.b32
+  %val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
+  ret i16 %val
+}
+
+define i64 @myctpop64_2(i64 %a) {
+; CHECK: clz.b64
+  %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
+  ret i64 %val
+}
diff --git a/test/CodeGen/NVPTX/ctpop.ll b/test/CodeGen/NVPTX/ctpop.ll
new file mode 100644
index 000000000000..b961d4d27bdd
--- /dev/null
+++ b/test/CodeGen/NVPTX/ctpop.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+define i32 @myctpop(i32 %a) {
+; CHECK: popc.b32
+  %val = tail call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %val
+}
+
+define i16 @myctpop16(i16 %a) {
+; CHECK: popc.b32
+  %val = tail call i16 @llvm.ctpop.i16(i16 %a)
+  ret i16 %val
+}
+
+define i64 @myctpop64(i64 %a) {
+; CHECK: popc.b64
+  %val = tail call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %val
+}
+
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
diff --git a/test/CodeGen/NVPTX/cttz.ll b/test/CodeGen/NVPTX/cttz.ll
new file mode 100644
index 000000000000..124ba9d1e9a7
--- /dev/null
+++ b/test/CodeGen/NVPTX/cttz.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+declare i16 @llvm.cttz.i16(i16, i1) readnone
+declare i32 @llvm.cttz.i32(i32, i1) readnone
+declare i64 @llvm.cttz.i64(i64, i1) readnone
+
+define i32 @myctpop(i32 %a) {
+; CHECK: popc.b32
+  %val = call i32 @llvm.cttz.i32(i32 %a, i1 false) readnone
+  ret i32 %val
+}
+
+define i16 @myctpop16(i16 %a) {
+; CHECK: popc.b32
+  %val = call i16 @llvm.cttz.i16(i16 %a, i1 false) readnone
+  ret i16 %val
+}
+
+define i64 @myctpop64(i64 %a) {
+; CHECK: popc.b64
+  %val = call i64 @llvm.cttz.i64(i64 %a, i1 false) readnone
+  ret i64 %val
+}
+
+
+define i32 @myctpop_2(i32 %a) {
+; CHECK: popc.b32
+  %val = call i32 @llvm.cttz.i32(i32 %a, i1 true) readnone
+  ret i32 %val
+}
+
+define i16 @myctpop16_2(i16 %a) {
+; CHECK: popc.b32
+  %val = call i16 @llvm.cttz.i16(i16 %a, i1 true) readnone
+  ret i16 %val
+}
+
+define i64 @myctpop64_2(i64 %a) {
+; CHECK: popc.b64
+  %val = call i64 @llvm.cttz.i64(i64 %a, i1 true) readnone
+  ret i64 %val
+}
diff --git a/test/CodeGen/NVPTX/fast-math.ll b/test/CodeGen/NVPTX/fast-math.ll
new file mode 100644
index 000000000000..9da26adc1511
--- /dev/null
+++ b/test/CodeGen/NVPTX/fast-math.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+declare float @llvm.nvvm.sqrt.f(float)
+
+
+; CHECK: sqrt_div
+; CHECK: sqrt.rn.f32
+; CHECK: div.rn.f32
+define float @sqrt_div(float %a, float %b) {
+  %t1 = tail call float @llvm.nvvm.sqrt.f(float %a)
+  %t2 = fdiv float %t1, %b
+  ret float %t2
+}
+
+; CHECK: sqrt_div_fast
+; CHECK: sqrt.approx.f32
+; CHECK: div.approx.f32
+define float @sqrt_div_fast(float %a, float %b) #0 {
+  %t1 = tail call float @llvm.nvvm.sqrt.f(float %a)
+  %t2 = fdiv float %t1, %b
+  ret float %t2
+}
+
+
+; CHECK: fadd
+; CHECK: add.f32
+define float @fadd(float %a, float %b) {
+  %t1 = fadd float %a, %b
+  ret float %t1
+}
+
+; CHECK: fadd_ftz
+; CHECK: add.ftz.f32
+define float @fadd_ftz(float %a, float %b) #1 {
+  %t1 = fadd float %a, %b
+  ret float %t1
+}
+
+
+
+attributes #0 = { "unsafe-fp-math" = "true" }
+attributes #1 = { "nvptx-f32ftz" = "true" }
diff --git a/test/CodeGen/NVPTX/fp-literals.ll b/test/CodeGen/NVPTX/fp-literals.ll
new file mode 100644
index 000000000000..0cc2413e009f
--- /dev/null
+++ b/test/CodeGen/NVPTX/fp-literals.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Make sure we can properly differentiate between single-precision and
+; double-precision FP literals.
+
+; CHECK: myaddf
+; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, 0f3F800000
+define float @myaddf(float %a) {
+  %ret = fadd float %a, 1.0
+  ret float %ret
+}
+
+; CHECK: myaddd
+; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, 0d3FF0000000000000
+define double @myaddd(double %a) {
+  %ret = fadd double %a, 1.0
+  ret double %ret
+}
diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll
index c9cb2f71f425..2a527989e410 100644
--- a/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx-nvidia-cuda"
 
 ; Ensure global variables in address space 0 are promoted to address space 1
 
diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll
index 0595325977e1..1dd8ae40db4f 100644
--- a/test/CodeGen/NVPTX/i1-global.ll
+++ b/test/CodeGen/NVPTX/i1-global.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-
+target triple = "nvptx-nvidia-cuda"
 
 ; CHECK: .visible .global .align 1 .u8 mypred
 @mypred = addrspace(1) global i1 true, align 1
diff --git a/test/CodeGen/NVPTX/i1-int-to-fp.ll b/test/CodeGen/NVPTX/i1-int-to-fp.ll
new file mode 100644
index 000000000000..3979179399ee
--- /dev/null
+++ b/test/CodeGen/NVPTX/i1-int-to-fp.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK-LABEL: foo
+; CHECK: setp
+; CHECK: selp
+; CHECK: cvt.rn.f32.u32
+define float @foo(i1 %a) {
+  %ret = uitofp i1 %a to float
+  ret float %ret
+}
+
+; CHECK-LABEL: foo2
+; CHECK: setp
+; CHECK: selp
+; CHECK: cvt.rn.f32.s32
+define float @foo2(i1 %a) {
+  %ret = sitofp i1 %a to float
+  ret float %ret
+}
+
+; CHECK-LABEL: foo3
+; CHECK: setp
+; CHECK: selp
+; CHECK: cvt.rn.f64.u32
+define double @foo3(i1 %a) {
+  %ret = uitofp i1 %a to double
+  ret double %ret
+}
+
+; CHECK-LABEL: foo4
+; CHECK: setp
+; CHECK: selp
+; CHECK: cvt.rn.f64.s32
+define double @foo4(i1 %a) {
+  %ret = sitofp i1 %a to double
+  ret double %ret
+}
diff --git a/test/CodeGen/NVPTX/i1-param.ll b/test/CodeGen/NVPTX/i1-param.ll
index fabd61a25d2f..f4df87439322 100644
--- a/test/CodeGen/NVPTX/i1-param.ll
+++ b/test/CodeGen/NVPTX/i1-param.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx-nvidia-cuda"
 
 ; Make sure predicate (i1) operands to kernels get expanded out to .u8
 
diff --git a/test/CodeGen/NVPTX/i8-param.ll b/test/CodeGen/NVPTX/i8-param.ll
new file mode 100644
index 000000000000..84daa9f66316
--- /dev/null
+++ b/test/CodeGen/NVPTX/i8-param.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+; CHECK: .visible .func  (.param .b32 func_retval0) callee
+define i8 @callee(i8 %a) {
+; CHECK: ld.param.u8
+  %ret = add i8 %a, 42
+; CHECK: st.param.b32
+  ret i8 %ret
+}
+
+; CHECK: .visible .func caller
+define void @caller(i8* %a) {
+; CHECK: ld.u8
+  %val = load i8* %a
+  %ret = tail call i8 @callee(i8 %val)
+; CHECK: ld.param.b32
+  store i8 %ret, i8* %a
+  ret void
+}
+
+  
diff --git a/test/CodeGen/NVPTX/implicit-def.ll b/test/CodeGen/NVPTX/implicit-def.ll
new file mode 100644
index 000000000000..06d3d562046e
--- /dev/null
+++ b/test/CodeGen/NVPTX/implicit-def.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 -asm-verbose=1 | FileCheck %s
+
+; CHECK: // implicit-def: %f[[F0:[0-9]+]]
+; CHECK: add.f32         %f{{[0-9]+}}, %f{{[0-9]+}}, %f[[F0]];
+define float @foo(float %a) {
+  %ret = fadd float %a, undef
+  ret float %ret
+}
+
diff --git a/test/CodeGen/NVPTX/inline-asm.ll b/test/CodeGen/NVPTX/inline-asm.ll
new file mode 100644
index 000000000000..d76eb4239ee3
--- /dev/null
+++ b/test/CodeGen/NVPTX/inline-asm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+define float @test(float %x) {
+entry:
+; CHECK: ex2.approx.ftz.f32 %f{{[0-9]+}}, %f{{[0-9]+}}
+  %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x)
+  ret float %0
+}
diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll
index 53a28f333798..af91bb442412 100644
--- a/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -2,231 +2,231 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
 define ptx_device i32 @test_tid_x() {
-; CHECK: mov.u32 %r0, %tid.x;
+; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_y() {
-; CHECK: mov.u32 %r0, %tid.y;
+; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_z() {
-; CHECK: mov.u32 %r0, %tid.z;
+; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_tid_w() {
-; CHECK: mov.u32 %r0, %tid.w;
+; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.tid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_x() {
-; CHECK: mov.u32 %r0, %ntid.x;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_y() {
-; CHECK: mov.u32 %r0, %ntid.y;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_z() {
-; CHECK: mov.u32 %r0, %ntid.z;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ntid_w() {
-; CHECK: mov.u32 %r0, %ntid.w;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ntid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_laneid() {
-; CHECK: mov.u32 %r0, %laneid;
+; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.laneid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_warpid() {
-; CHECK: mov.u32 %r0, %warpid;
+; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.warpid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nwarpid() {
-; CHECK: mov.u32 %r0, %nwarpid;
+; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nwarpid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_x() {
-; CHECK: mov.u32 %r0, %ctaid.x;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_y() {
-; CHECK: mov.u32 %r0, %ctaid.y;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_z() {
-; CHECK: mov.u32 %r0, %ctaid.z;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_ctaid_w() {
-; CHECK: mov.u32 %r0, %ctaid.w;
+; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.ctaid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_x() {
-; CHECK: mov.u32 %r0, %nctaid.x;
+; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.x()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_y() {
-; CHECK: mov.u32 %r0, %nctaid.y;
+; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.y()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_z() {
-; CHECK: mov.u32 %r0, %nctaid.z;
+; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.z()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nctaid_w() {
-; CHECK: mov.u32 %r0, %nctaid.w;
+; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nctaid.w()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_smid() {
-; CHECK: mov.u32 %r0, %smid;
+; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.smid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_nsmid() {
-; CHECK: mov.u32 %r0, %nsmid;
+; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.nsmid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_gridid() {
-; CHECK: mov.u32 %r0, %gridid;
+; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.gridid()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_eq() {
-; CHECK: mov.u32 %r0, %lanemask_eq;
+; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.eq()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_le() {
-; CHECK: mov.u32 %r0, %lanemask_le;
+; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.le()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_lt() {
-; CHECK: mov.u32 %r0, %lanemask_lt;
+; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.lt()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_ge() {
-; CHECK: mov.u32 %r0, %lanemask_ge;
+; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.ge()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_lanemask_gt() {
-; CHECK: mov.u32 %r0, %lanemask_gt;
+; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.lanemask.gt()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_clock() {
-; CHECK: mov.u32 %r0, %clock;
+; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.clock()
 	ret i32 %x
 }
 
 define ptx_device i64 @test_clock64() {
-; CHECK: mov.u64 %rl0, %clock64;
+; CHECK: mov.u64 %rl{{[0-9]+}}, %clock64;
 ; CHECK: ret;
 	%x = call i64 @llvm.ptx.read.clock64()
 	ret i64 %x
 }
 
 define ptx_device i32 @test_pm0() {
-; CHECK: mov.u32 %r0, %pm0;
+; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm0()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm1() {
-; CHECK: mov.u32 %r0, %pm1;
+; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm1()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm2() {
-; CHECK: mov.u32 %r0, %pm2;
+; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm2()
 	ret i32 %x
 }
 
 define ptx_device i32 @test_pm3() {
-; CHECK: mov.u32 %r0, %pm3;
+; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
 ; CHECK: ret;
 	%x = call i32 @llvm.ptx.read.pm3()
 	ret i32 %x
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
index 1676f20643d2..78e1e7789014 100644
--- a/test/CodeGen/NVPTX/intrinsics.ll
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -2,14 +2,14 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
 define ptx_device float @test_fabsf(float %f) {
-; CHECK: abs.f32 %f0, %f0;
+; CHECK: abs.f32 %f{{[0-9]+}}, %f{{[0-9]+}};
 ; CHECK: ret;
 	%x = call float @llvm.fabs.f32(float %f)
 	ret float %x
 }
 
 define ptx_device double @test_fabs(double %d) {
-; CHECK: abs.f64 %fl0, %fl0;
+; CHECK: abs.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}};
 ; CHECK: ret;
 	%x = call double @llvm.fabs.f64(double %d)
 	ret double %x
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
index 3265868d3c52..133ef09afdb2 100644
--- a/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -4,27 +4,27 @@
 
 ;; i8
 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
-; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i8 addrspace(1)* %ptr
   ret i8 %a
 }
 
 define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
-; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i8 addrspace(3)* %ptr
   ret i8 %a
 }
 
 define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
-; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i8 addrspace(5)* %ptr
   ret i8 %a
@@ -32,27 +32,27 @@ define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
 
 ;; i16
 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
-; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i16 addrspace(1)* %ptr
   ret i16 %a
 }
 
 define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
-; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i16 addrspace(3)* %ptr
   ret i16 %a
 }
 
 define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
-; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i16 addrspace(5)* %ptr
   ret i16 %a
diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll
index 81a5216f963a..3728268c24d5 100644
--- a/test/CodeGen/NVPTX/ld-generic.ll
+++ b/test/CodeGen/NVPTX/ld-generic.ll
@@ -4,9 +4,9 @@
 
 ;; i8
 define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
-; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i8 addrspace(0)* %ptr
   ret i8 %a
@@ -14,9 +14,9 @@ define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
 
 ;; i16
 define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
-; PTX32: ld.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
-; PTX64: ld.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
 ; PTX64: ret
   %a = load i16 addrspace(0)* %ptr
   ret i16 %a
diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll
new file mode 100644
index 000000000000..81a82b2c38b5
--- /dev/null
+++ b/test/CodeGen/NVPTX/ldu-i8.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+declare i8 @llvm.nvvm.ldu.global.i.i8(i8*)
+
+define i8 @foo(i8* %a) {
+; Ensure we properly truncate off the high-order 24 bits
+; CHECK:        ldu.global.u8
+; CHECK:        cvt.u32.u16
+; CHECK:        and.b32         %r{{[0-9]+}}, %r{{[0-9]+}}, 255
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8(i8* %a)
+  ret i8 %val
+}
diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
new file mode 100644
index 000000000000..26cadc401b79
--- /dev/null
+++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+define void @reg_plus_offset(i32* %a) {
+; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
+; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
+  %p2 = getelementptr i32* %a, i32 8
+  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p2), !align !1
+  %p3 = getelementptr i32* %a, i32 9
+  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32(i32* %p3), !align !1
+  %t3 = mul i32 %t1, %t2
+  store i32 %t3, i32* %a
+  ret void
+}
+
+!1 = metadata !{ i32 4 }
+
+declare i32 @llvm.nvvm.ldu.global.i.i32(i32*)
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
diff --git a/test/CodeGen/NVPTX/lit.local.cfg b/test/CodeGen/NVPTX/lit.local.cfg
index 7180c841d6e8..85cf8c2c8c07 100644
--- a/test/CodeGen/NVPTX/lit.local.cfg
+++ b/test/CodeGen/NVPTX/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'NVPTX' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/NVPTX/load-sext-i1.ll b/test/CodeGen/NVPTX/load-sext-i1.ll
new file mode 100644
index 000000000000..d836740eed94
--- /dev/null
+++ b/test/CodeGen/NVPTX/load-sext-i1.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx-nvidia-cuda"
+
+define void @main(i1* %a1, i32 %a2, i32* %arg3) {
+; CHECK: ld.u8
+; CHECK-NOT: ld.u1
+  %t1 = getelementptr i1* %a1, i32 %a2
+  %t2 = load i1* %t1
+  %t3 = sext i1 %t2 to i32
+  store i32 %t3, i32* %arg3
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/local-stack-frame.ll b/test/CodeGen/NVPTX/local-stack-frame.ll
new file mode 100644
index 000000000000..178dff1a5d3f
--- /dev/null
+++ b/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+; Ensure we access the local stack properly
+
+; PTX32:        mov.u32         %r{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX32:        cvta.local.u32  %SP, %r{{[0-9]+}};
+; PTX32:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
+; PTX32:        st.u32  [%SP+0], %r{{[0-9]+}};
+; PTX64:        mov.u64         %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX64:        cvta.local.u64  %SP, %rl{{[0-9]+}};
+; PTX64:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
+; PTX64:        st.u32  [%SP+0], %r{{[0-9]+}};
+define void @foo(i32 %a) {
+  %local = alloca i32, align 4
+  store i32 %a, i32* %local
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/module-inline-asm.ll b/test/CodeGen/NVPTX/module-inline-asm.ll
new file mode 100644
index 000000000000..cdbcf2013c00
--- /dev/null
+++ b/test/CodeGen/NVPTX/module-inline-asm.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+; CHECK: .global .b32 val;
+module asm ".global .b32 val;"
+
+define void @foo() {
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll
index 779f7798d883..e7a81be01b14 100644
--- a/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -2,22 +2,22 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
 
 define ptx_kernel void @t1(i1* %a) {
-; PTX32:      mov.u16 %rc{{[0-9]+}}, 0;
-; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}};
-; PTX64:      mov.u16 %rc{{[0-9]+}}, 0;
-; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}};
+; PTX32:      mov.u16 %rs{{[0-9]+}}, 0;
+; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
+; PTX64:      mov.u16 %rs{{[0-9]+}}, 0;
+; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}};
   store i1 false, i1* %a
   ret void
 }
 
 
 define ptx_kernel void @t2(i1* %a, i8* %b) {
-; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
-; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1;
-; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1;
-; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
-; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1;
-; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1;
+; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
+; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
+; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
+; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
 
   %t1 = load i1* %a
   %t2 = select i1 %t1, i8 1, i8 2
diff --git a/test/CodeGen/NVPTX/pr16278.ll b/test/CodeGen/NVPTX/pr16278.ll
new file mode 100644
index 000000000000..5432a848442c
--- /dev/null
+++ b/test/CodeGen/NVPTX/pr16278.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+@one_f = addrspace(4) global float 1.000000e+00, align 4
+
+define float @foo() {
+; CHECK: ld.const.f32
+  %val = load float addrspace(4)* @one_f
+  ret float %val
+}
diff --git a/test/CodeGen/NVPTX/pr17529.ll b/test/CodeGen/NVPTX/pr17529.ll
new file mode 100644
index 000000000000..a16214225674
--- /dev/null
+++ b/test/CodeGen/NVPTX/pr17529.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; Function Attrs: nounwind
+; CHECK: .func kernelgen_memcpy
+define ptx_device void @kernelgen_memcpy(i8* nocapture %dst) #0 {
+entry:
+  br i1 undef, label %for.end, label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
+  %scevgep9 = getelementptr i8* %dst, i64 %index
+  %scevgep910 = bitcast i8* %scevgep9 to <4 x i8>*
+  store <4 x i8> undef, <4 x i8>* %scevgep910, align 1
+  %index.next = add i64 %index, 4
+  %0 = icmp eq i64 undef, %index.next
+  br i1 %0, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  br i1 undef, label %for.end, label %for.body.preheader1
+
+for.body.preheader1:                              ; preds = %middle.block
+  %scevgep2 = getelementptr i8* %dst, i64 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader1
+  %lsr.iv3 = phi i8* [ %scevgep2, %for.body.preheader1 ], [ %scevgep4, %for.body ]
+  store i8 undef, i8* %lsr.iv3, align 1
+  %scevgep4 = getelementptr i8* %lsr.iv3, i64 1
+  br label %for.body
+
+for.end:                                          ; preds = %middle.block, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll
index 5a9dac152e41..4aeff0924955 100644
--- a/test/CodeGen/NVPTX/refl1.ll
+++ b/test/CodeGen/NVPTX/refl1.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-nvidia-cuda"
 
 ; Function Attrs: nounwind
 ; CHECK: .entry foo
diff --git a/test/CodeGen/NVPTX/rsqrt.ll b/test/CodeGen/NVPTX/rsqrt.ll
new file mode 100644
index 000000000000..3a52a493abdd
--- /dev/null
+++ b/test/CodeGen/NVPTX/rsqrt.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=1 -nvptx-prec-sqrtf32=0 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+declare float @llvm.nvvm.sqrt.f(float)
+
+define float @foo(float %a) {
+; CHECK: rsqrt.approx.f32
+  %val = tail call float @llvm.nvvm.sqrt.f(float %a)
+  %ret = fdiv float 1.0, %val
+  ret float %ret
+}
+  
diff --git a/test/CodeGen/NVPTX/sext-in-reg.ll b/test/CodeGen/NVPTX/sext-in-reg.ll
new file mode 100644
index 000000000000..b516dfaf39a0
--- /dev/null
+++ b/test/CodeGen/NVPTX/sext-in-reg.ll
@@ -0,0 +1,111 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+define void @one(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+; CHECK: cvt.s64.s8
+; CHECK: cvt.s64.s8
+entry:
+  %sext = shl i64 %a, 56
+  %conv1 = ashr exact i64 %sext, 56
+  %sext1 = shl i64 %b, 56
+  %conv4 = ashr exact i64 %sext1, 56
+  %shr = ashr i64 %a, 16
+  %shr9 = ashr i64 %b, 16
+  %add = add nsw i64 %conv4, %conv1
+  store i64 %add, i64* %p1, align 8
+  %add17 = add nsw i64 %shr9, %shr
+  store i64 %add17, i64* %p2, align 8
+  ret void
+}
+
+
+define void @two(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+entry:
+; CHECK: cvt.s64.s32
+; CHECK: cvt.s64.s32
+  %sext = shl i64 %a, 32
+  %conv1 = ashr exact i64 %sext, 32
+  %sext1 = shl i64 %b, 32
+  %conv4 = ashr exact i64 %sext1, 32
+  %shr = ashr i64 %a, 16
+  %shr9 = ashr i64 %b, 16
+  %add = add nsw i64 %conv4, %conv1
+  store i64 %add, i64* %p1, align 8
+  %add17 = add nsw i64 %shr9, %shr
+  store i64 %add17, i64* %p2, align 8
+  ret void
+}
+
+
+define void @three(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+entry:
+; CHECK: cvt.s64.s16
+; CHECK: cvt.s64.s16
+  %sext = shl i64 %a, 48
+  %conv1 = ashr exact i64 %sext, 48
+  %sext1 = shl i64 %b, 48
+  %conv4 = ashr exact i64 %sext1, 48
+  %shr = ashr i64 %a, 16
+  %shr9 = ashr i64 %b, 16
+  %add = add nsw i64 %conv4, %conv1
+  store i64 %add, i64* %p1, align 8
+  %add17 = add nsw i64 %shr9, %shr
+  store i64 %add17, i64* %p2, align 8
+  ret void
+}
+
+
+define void @four(i32 %a, i32 %b, i32* %p1, i32* %p2) {
+entry:
+; CHECK: cvt.s32.s8
+; CHECK: cvt.s32.s8
+  %sext = shl i32 %a, 24
+  %conv1 = ashr exact i32 %sext, 24
+  %sext1 = shl i32 %b, 24
+  %conv4 = ashr exact i32 %sext1, 24
+  %shr = ashr i32 %a, 16
+  %shr9 = ashr i32 %b, 16
+  %add = add nsw i32 %conv4, %conv1
+  store i32 %add, i32* %p1, align 4
+  %add17 = add nsw i32 %shr9, %shr
+  store i32 %add17, i32* %p2, align 4
+  ret void
+}
+
+
+define void @five(i32 %a, i32 %b, i32* %p1, i32* %p2) {
+entry:
+; CHECK: cvt.s32.s16
+; CHECK: cvt.s32.s16
+  %sext = shl i32 %a, 16
+  %conv1 = ashr exact i32 %sext, 16
+  %sext1 = shl i32 %b, 16
+  %conv4 = ashr exact i32 %sext1, 16
+  %shr = ashr i32 %a, 16
+  %shr9 = ashr i32 %b, 16
+  %add = add nsw i32 %conv4, %conv1
+  store i32 %add, i32* %p1, align 4
+  %add17 = add nsw i32 %shr9, %shr
+  store i32 %add17, i32* %p2, align 4
+  ret void
+}
+
+
+define void @six(i16 %a, i16 %b, i16* %p1, i16* %p2) {
+entry:
+; CHECK: cvt.s16.s8
+; CHECK: cvt.s16.s8
+  %sext = shl i16 %a, 8
+  %conv1 = ashr exact i16 %sext, 8
+  %sext1 = shl i16 %b, 8
+  %conv4 = ashr exact i16 %sext1, 8
+  %shr = ashr i16 %a, 8
+  %shr9 = ashr i16 %b, 8
+  %add = add nsw i16 %conv4, %conv1
+  store i16 %add, i16* %p1, align 4
+  %add17 = add nsw i16 %shr9, %shr
+  store i16 %add17, i16* %p2, align 4
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/sext-params.ll b/test/CodeGen/NVPTX/sext-params.ll
new file mode 100644
index 000000000000..a559630f3591
--- /dev/null
+++ b/test/CodeGen/NVPTX/sext-params.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+define i8 @foo(i8 signext %a) {
+; CHECK: ld.param.s8
+  %ret = add i8 %a, 3
+  ret i8 %ret
+}
+
+define i8 @bar(i8 zeroext %a) {
+; CHECK: ld.param.u8
+  %ret = add i8 %a, 3
+  ret i8 %ret
+}
diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll
index 0b26d802df84..68c09fe065bc 100644
--- a/test/CodeGen/NVPTX/st-addrspace.ll
+++ b/test/CodeGen/NVPTX/st-addrspace.ll
@@ -5,27 +5,27 @@
 ;; i8
 
 define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
-; PTX32: st.global.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX32: ret
-; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX64: ret
   store i8 %a, i8 addrspace(1)* %ptr
   ret void
 }
 
 define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
-; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX32: ret
-; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX64: ret
   store i8 %a, i8 addrspace(3)* %ptr
   ret void
 }
 
 define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
-; PTX32: st.local.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.local.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX32: ret
-; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX64: ret
   store i8 %a, i8 addrspace(5)* %ptr
   ret void
diff --git a/test/CodeGen/NVPTX/st-generic.ll b/test/CodeGen/NVPTX/st-generic.ll
index 59a1fe021119..b9c616fbd19e 100644
--- a/test/CodeGen/NVPTX/st-generic.ll
+++ b/test/CodeGen/NVPTX/st-generic.ll
@@ -5,9 +5,9 @@
 ;; i8
 
 define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
-; PTX32: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX32: ret
-; PTX64: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX64: ret
   store i8 %a, i8 addrspace(0)* %ptr
   ret void
diff --git a/test/CodeGen/NVPTX/vec-param-load.ll b/test/CodeGen/NVPTX/vec-param-load.ll
new file mode 100644
index 000000000000..a384348a6590
--- /dev/null
+++ b/test/CodeGen/NVPTX/vec-param-load.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+define <16 x float> @foo(<16 x float> %a) {
+; Make sure we index into vectors properly
+; CHECK: ld.param.v4.f32         {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0];
+; CHECK: ld.param.v4.f32         {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+16];
+; CHECK: ld.param.v4.f32         {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+32];
+; CHECK: ld.param.v4.f32         {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [foo_param_0+48];
+  ret <16 x float> %a
+}
diff --git a/test/CodeGen/NVPTX/vec8.ll b/test/CodeGen/NVPTX/vec8.ll
new file mode 100644
index 000000000000..03f5cfc6cb01
--- /dev/null
+++ b/test/CodeGen/NVPTX/vec8.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-unknown-cuda"
+
+; CHECK: .visible .func foo
+define void @foo(<8 x i8> %a, i8* %b) {
+  %t0 = extractelement <8 x i8> %a, i32 0
+; CHECK-DAG: ld.param.v4.u8
+; CHECK-DAG: ld.param.u32
+  store i8 %t0, i8* %b
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/vector-args.ll b/test/CodeGen/NVPTX/vector-args.ll
index 80deae46935a..c6c8e73bf83e 100644
--- a/test/CodeGen/NVPTX/vector-args.ll
+++ b/test/CodeGen/NVPTX/vector-args.ll
@@ -4,8 +4,7 @@
 define float @foo(<2 x float> %a) {
 ; CHECK: .func (.param .b32 func_retval0) foo
 ; CHECK: .param .align 8 .b8 foo_param_0[8]
-; CHECK: ld.param.f32 %f{{[0-9]+}}
-; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
   %t1 = fmul <2 x float> %a, %a
   %t2 = extractelement <2 x float> %t1, i32 0
   %t3 = extractelement <2 x float> %t1, i32 1
@@ -17,11 +16,20 @@ define float @foo(<2 x float> %a) {
 define float @bar(<4 x float> %a) {
 ; CHECK: .func (.param .b32 func_retval0) bar
 ; CHECK: .param .align 16 .b8 bar_param_0[16]
-; CHECK: ld.param.f32 %f{{[0-9]+}}
-; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
   %t1 = fmul <4 x float> %a, %a
   %t2 = extractelement <4 x float> %t1, i32 0
   %t3 = extractelement <4 x float> %t1, i32 1
   %t4 = fadd float %t2, %t3
   ret float %t4
 }
+
+
+define <4 x float> @baz(<4 x float> %a) {
+; CHECK: .func  (.param .align 16 .b8 func_retval0[16]) baz
+; CHECK: .param .align 16 .b8 baz_param_0[16]
+; CHECK: ld.param.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+; CHECK: st.param.v4.f32 [func_retval0+0], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+  %t1 = fmul <4 x float> %a, %a
+  ret <4 x float> %t1
+}
diff --git a/test/CodeGen/NVPTX/vector-stores.ll b/test/CodeGen/NVPTX/vector-stores.ll
new file mode 100644
index 000000000000..49418122da55
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-stores.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK: .visible .func foo1
+; CHECK: st.v2.f32
+define void @foo1(<2 x float> %val, <2 x float>* %ptr) {
+  store <2 x float> %val, <2 x float>* %ptr
+  ret void
+}
+
+; CHECK: .visible .func foo2
+; CHECK: st.v4.f32
+define void @foo2(<4 x float> %val, <4 x float>* %ptr) {
+  store <4 x float> %val, <4 x float>* %ptr
+  ret void
+}
+
+; CHECK: .visible .func foo3
+; CHECK: st.v2.u32
+define void @foo3(<2 x i32> %val, <2 x i32>* %ptr) {
+  store <2 x i32> %val, <2 x i32>* %ptr
+  ret void
+}
+
+; CHECK: .visible .func foo4
+; CHECK: st.v4.u32
+define void @foo4(<4 x i32> %val, <4 x i32>* %ptr) {
+  store <4 x i32> %val, <4 x i32>* %ptr
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index ecf45efee2e6..3d3728dcde12 100644
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,9 +1,23 @@
-; RUN: llc < %s
+; RUN: llc < %s | FileCheck %s
 ;; Formerly crashed, see PR 1508
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin8"
 	%struct.Range = type { i64, i64 }
 
+; CHECK: .cfi_startproc
+; CHECK: .cfi_personality 155, L___gxx_personality_v0$non_lazy_ptr
+; CHECK: .cfi_lsda 16, Lexception0
+; CHECK: .cfi_def_cfa_offset 176
+; CHECK: .cfi_offset r31, -8
+; CHECK: .cfi_offset lr, 16
+; CHECK: .cfi_def_cfa_register r31
+; CHECK: .cfi_offset r27, -16
+; CHECK: .cfi_offset r28, -24
+; CHECK: .cfi_offset r29, -32
+; CHECK: .cfi_offset r30, -40
+; CHECK: .cfi_endproc
+
+
 define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) {
 entry:
 	%effectiveRange = alloca %struct.Range, align 8		; <%struct.Range*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2009-09-18-carrybit.ll b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
index 6c23a6162c9d..8d5ea8af0f28 100644
--- a/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
+++ b/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
@@ -5,7 +5,7 @@ target triple = "powerpc-apple-darwin9.6"
 
 define i64 @foo(i64 %r.0.ph, i64 %q.0.ph, i32 %sr1.1.ph) nounwind {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: subfc
 ; CHECK: subfe
 ; CHECK: subfc
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 097611a7619c..b0c37b80ed2f 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 -break-anti-dependencies=none | FileCheck %s
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index d1a3c9f46b57..a25ce07e83bf 100644
--- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -6,7 +6,7 @@ target triple = "powerpc-apple-darwin9.8"
 define i32 @main() nounwind {
 entry:
 ; Make sure we're generating references using the red zone
-; CHECK: main:
+; CHECK-LABEL: main:
 ; CHECK: stw r2, -12(r1)
   %retval = alloca i32
   %0 = alloca i32
diff --git a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
index a18829e1bce8..b1cbb36fe041 100644
--- a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
+++ b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
@@ -9,7 +9,7 @@ entry:
   store i64 %z2, i64* %xx, align 4
   ret void
 
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: sldi {{.*}}, {{.*}}, 32
 ; Note: it's okay if someday CodeGen gets smart enough to optimize out
 ; the shift.
diff --git a/test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll b/test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll
new file mode 100644
index 000000000000..542a766300ef
--- /dev/null
+++ b/test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @test(i8* %base, i8 %val) {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i32 -1
+  store i8 %val, i8* %arrayidx, align 1
+  %arrayidx2 = getelementptr inbounds i8* %base, i32 1
+  store i8 %val, i8* %arrayidx2, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @test
+; CHECK: %entry
+; CHECK-NEXT: stbu 4, -1(3)
+; CHECK-NEXT: stb 4, 2(3)
+; CHECK-NEXT: blr
+
+define i64* @test64(i64* %base, i64 %val) {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i32 -1
+  store i64 %val, i64* %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds i64* %base, i32 1
+  store i64 %val, i64* %arrayidx2, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @test64
+; CHECK: %entry
+; CHECK-NEXT: stdu 4, -8(3)
+; CHECK-NEXT: std 4, 16(3)
+; CHECK-NEXT: blr
+
diff --git a/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll b/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
new file mode 100644
index 000000000000..9bf25c8ffe49
--- /dev/null
+++ b/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@g_51 = external global [8 x i32], align 4
+
+; CHECK: func_7
+
+; Function Attrs: nounwind
+define fastcc void @func_7() #0 {
+entry:
+  %arrayidx638 = getelementptr inbounds [3 x [1 x i32]]* undef, i64 0, i64 1, i64 0
+  br i1 undef, label %for.cond940, label %if.end1018
+
+for.cond940:                                      ; preds = %for.cond940, %if.else876
+  %l_655.1 = phi i32* [ getelementptr inbounds ([8 x i32]* @g_51, i64 0, i64 6), %entry ], [ %l_654.0, %for.cond940 ]
+  %l_654.0 = phi i32* [ null, %entry ], [ %arrayidx638, %for.cond940 ]
+  %exitcond = icmp eq i32 undef, 20
+  br i1 %exitcond, label %if.end1018, label %for.cond940
+
+if.end1018:                                       ; preds = %for.end957, %for.end834
+  %l_655.3.ph33 = phi i32* [ %l_655.1, %for.cond940 ], [ getelementptr inbounds ([8 x i32]* @g_51, i64 0, i64 6), %entry ]
+  store i32 0, i32* %l_655.3.ph33, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 28dd08c7fed1..4588bc05352b 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
-; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32-RS
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-RS-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC32
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC64
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=CHECK-PPC32-NOFP
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=CHECK-PPC64-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=CHECK-PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=CHECK-PPC32-RS-NOFP
 
 ; CHECK-PPC32: stw r31, -4(r1)
 ; CHECK-PPC32: lwz r1, 0(r1)
diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll
index 8c928ce8bcad..500d126ebed7 100644
--- a/test/CodeGen/PowerPC/addc.ll
+++ b/test/CodeGen/PowerPC/addc.ll
@@ -5,7 +5,7 @@ define i64 @add_ll(i64 %a, i64 %b) nounwind {
 entry:
         %tmp.2 = add i64 %b, %a         ; <i64> [#uses=1]
         ret i64 %tmp.2
-; CHECK: add_ll:
+; CHECK-LABEL: add_ll:
 ; CHECK: addc r4, r6, r4
 ; CHECK: adde r3, r5, r3
 ; CHECK: blr
@@ -15,7 +15,7 @@ define i64 @add_l_5(i64 %a) nounwind {
 entry:
         %tmp.1 = add i64 %a, 5          ; <i64> [#uses=1]
         ret i64 %tmp.1
-; CHECK: add_l_5:
+; CHECK-LABEL: add_l_5:
 ; CHECK: addic r4, r4, 5
 ; CHECK: addze r3, r3
 ; CHECK: blr
@@ -25,7 +25,7 @@ define i64 @add_l_m5(i64 %a) nounwind {
 entry:
         %tmp.1 = add i64 %a, -5         ; <i64> [#uses=1]
         ret i64 %tmp.1
-; CHECK: add_l_m5:
+; CHECK-LABEL: add_l_m5:
 ; CHECK: addic r4, r4, -5
 ; CHECK: addme r3, r3
 ; CHECK: blr
diff --git a/test/CodeGen/PowerPC/addrfuncstr.ll b/test/CodeGen/PowerPC/addrfuncstr.ll
new file mode 100644
index 000000000000..6750b5cfebf6
--- /dev/null
+++ b/test/CodeGen/PowerPC/addrfuncstr.ll
@@ -0,0 +1,27 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+; Verify that a constant with an initializer that may turn into a dynamic
+; relocation is not placed in .rodata, but rather in .data.rel.ro.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.x = type { i64 (i8*, i64, i64, %struct._IO_FILE*)* }
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@_ZL1y = internal constant %struct.x { i64 (i8*, i64, i64, %struct._IO_FILE*)* @fread }, align 8
+
+; Function Attrs: nounwind
+define %struct.x* @_Z3foov() #0 {
+entry:
+  ret %struct.x* @_ZL1y
+}
+
+declare i64 @fread(i8*, i64, i64, %struct._IO_FILE*) #1
+
+; CHECK: .section .data.rel.ro
+; CHECK: .quad fread
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/altivec-ord.ll b/test/CodeGen/PowerPC/altivec-ord.ll
new file mode 100644
index 000000000000..6aea8433a1eb
--- /dev/null
+++ b/test/CodeGen/PowerPC/altivec-ord.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <4 x i16> @test(<4 x float> %f, <4 x float> %g) {
+entry:
+	%r = fcmp ord <4 x float> %f, %g
+	%s = sext <4 x i1> %r to <4 x i16>
+	ret <4 x i16> %s
+}
+
+define <4 x i16> @test2(<4 x float> %f, <4 x float> %g) {
+entry:
+	%r = fcmp one <4 x float> %f, %g
+	%s = sext <4 x i1> %r to <4 x i16>
+	ret <4 x i16> %s
+}
+
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
index 52587e2c0b87..1525e05501ee 100644
--- a/test/CodeGen/PowerPC/anon_aggr.ll
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -1,6 +1,9 @@
-; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -fast-isel=false < %s | FileCheck %s
+; RUN: llc -O0 -mcpu=g4 -mtriple=powerpc-apple-darwin8 < %s | FileCheck -check-prefix=DARWIN32 %s
+; RUN: llc -O0 -mcpu=ppc970 -mtriple=powerpc64-apple-darwin8 < %s | FileCheck -check-prefix=DARWIN64 %s
 
 ; Test case for PR 14779: anonymous aggregates are not handled correctly.
+; Darwin bug report PR 15821 is similar.
 ; The bug is triggered by passing a byval structure after an anonymous
 ; aggregate.
 
@@ -17,13 +20,33 @@ unequal:
   ret i8* %ptr
 }
 
-; CHECK: func1:
+; CHECK-LABEL: func1:
 ; CHECK: cmpld {{[0-9]+}}, 4, 5
-; CHECK: std 4, -[[OFFSET1:[0-9]+]]
-; CHECK: std 5, -[[OFFSET2:[0-9]+]]
+; CHECK-DAG: std 4, -[[OFFSET1:[0-9]+]]
+; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]]
 ; CHECK: ld 3, -[[OFFSET1]](1)
 ; CHECK: ld 3, -[[OFFSET2]](1)
 
+; DARWIN32: _func1:
+; DARWIN32: mr
+; DARWIN32: mr r[[REG1:[0-9]+]], r[[REGA:[0-9]+]]
+; DARWIN32: mr r[[REG2:[0-9]+]], r[[REGB:[0-9]+]]
+; DARWIN32: cmplw cr{{[0-9]+}}, r[[REGA]], r[[REGB]]
+; DARWIN32: stw r[[REG1]], -[[OFFSET1:[0-9]+]]
+; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
+; DARWIN32: lwz r3, -[[OFFSET1]]
+; DARWIN32: lwz r3, -[[OFFSET2]]
+
+; DARWIN64: _func1:
+; DARWIN64: mr
+; DARWIN64: mr r[[REG1:[0-9]+]], r[[REGA:[0-9]+]]
+; DARWIN64: mr r[[REG2:[0-9]+]], r[[REGB:[0-9]+]]
+; DARWIN64: cmpld cr{{[0-9]+}}, r[[REGA]], r[[REGB]]
+; DARWIN64: std r[[REG1]], -[[OFFSET1:[0-9]+]]
+; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]]
+; DARWIN64: ld r3, -[[OFFSET1]]
+; DARWIN64: ld r3, -[[OFFSET2]]
+
 
 define i8* @func2({ i64, i8* } %array1, %tarray* byval %array2) {
 entry:
@@ -38,15 +61,38 @@ unequal:
   ret i8* %array2_ptr
 }
 
-; CHECK: func2:
+; CHECK-LABEL: func2:
 ; CHECK: addi [[REG1:[0-9]+]], 1, 64
 ; CHECK: ld [[REG2:[0-9]+]], 8([[REG1]])
 ; CHECK: cmpld {{[0-9]+}}, 4, [[REG2]]
-; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]]
-; CHECK: std 4, -[[OFFSET2:[0-9]+]]
+; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]]
+; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]]
 ; CHECK: ld 3, -[[OFFSET2]](1)
 ; CHECK: ld 3, -[[OFFSET1]](1)
 
+; DARWIN32: _func2:
+; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36
+; DARWIN32: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]])
+; DARWIN32: mr
+; DARWIN32: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]]
+; DARWIN32: cmplw cr{{[0-9]+}}, r[[REGA]], r[[REG2]]
+; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
+; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
+; DARWIN32: lwz r3, -[[OFFSET1]]
+; DARWIN32: lwz r3, -[[OFFSET2]]
+
+; DARWIN64: _func2:
+; DARWIN64: addi r[[REG1:[0-9]+]], r1, 64
+; DARWIN64: ld r[[REG2:[0-9]+]], 8(r[[REG1]])
+; DARWIN64: mr
+; DARWIN64: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]]
+; DARWIN64: cmpld cr{{[0-9]+}}, r[[REGA]], r[[REG2]]
+; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
+; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]]
+; DARWIN64: ld r3, -[[OFFSET1]]
+; DARWIN64: ld r3, -[[OFFSET2]]
+
+
 define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
 entry:
   %tmp1 = getelementptr inbounds { i64, i8* }* %array1, i32 0, i32 1
@@ -61,7 +107,7 @@ unequal:
   ret i8* %array2_ptr
 }
 
-; CHECK: func3:
+; CHECK-LABEL: func3:
 ; CHECK: addi [[REG1:[0-9]+]], 1, 64
 ; CHECK: addi [[REG2:[0-9]+]], 1, 48
 ; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
@@ -72,6 +118,29 @@ unequal:
 ; CHECK: ld 3, -[[OFFSET2]](1)
 ; CHECK: ld 3, -[[OFFSET1]](1)
 
+; DARWIN32: _func3:
+; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 40
+; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24
+; DARWIN32: lwz r[[REG3:[0-9]+]], 48(r[[REGSP]])
+; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]])
+; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
+; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
+; DARWIN32: stw r[[REG4]], -[[OFFSET2:[0-9]+]]
+; DARWIN32: lwz r3, -[[OFFSET2]]
+; DARWIN32: lwz r3, -[[OFFSET1]]
+
+; DARWIN64: _func3:
+; DARWIN64: addi r[[REG1:[0-9]+]], r1, 64
+; DARWIN64: addi r[[REG2:[0-9]+]], r1, 48
+; DARWIN64: ld r[[REG3:[0-9]+]], 8(r[[REG1]])
+; DARWIN64: ld r[[REG4:[0-9]+]], 8(r[[REG2]])
+; DARWIN64: cmpld cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
+; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
+; DARWIN64: std r[[REG4]], -[[OFFSET2:[0-9]+]]
+; DARWIN64: ld r3, -[[OFFSET2]]
+; DARWIN64: ld r3, -[[OFFSET1]]
+
+
 define i8* @func4(i64 %p1, i64 %p2, i64 %p3, i64 %p4,
                   i64 %p5, i64 %p6, i64 %p7, i64 %p8,
                   { i64, i8* } %array1, %tarray* byval %array2) {
@@ -87,7 +156,7 @@ unequal:
   ret i8* %array2_ptr
 }
 
-; CHECK: func4:
+; CHECK-LABEL: func4:
 ; CHECK: addi [[REG1:[0-9]+]], 1, 128
 ; CHECK: ld [[REG2:[0-9]+]], 120(1)
 ; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
@@ -97,3 +166,24 @@ unequal:
 ; CHECK: ld 3, -[[OFFSET1]](1)
 ; CHECK: ld 3, -[[OFFSET2]](1)
 
+; DARWIN32: _func4:
+; DARWIN32: lwz r[[REG4:[0-9]+]], 96(r1)
+; DARWIN32: addi r[[REG1:[0-9]+]], r1, 100
+; DARWIN32: lwz r[[REG3:[0-9]+]], 108(r1)
+; DARWIN32: mr r[[REG2:[0-9]+]], r[[REG4]]
+; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
+; DARWIN32: stw r[[REG4]], -[[OFFSET1:[0-9]+]]
+; DARWIN32: stw r[[REG3]], -[[OFFSET2:[0-9]+]]
+; DARWIN32: lwz r[[REG1]], -[[OFFSET1]]
+; DARWIN32: lwz r[[REG1]], -[[OFFSET2]]
+
+; DARWIN64: _func4:
+; DARWIN64: addi r[[REG1:[0-9]+]], r1, 128
+; DARWIN64: ld r[[REG2:[0-9]+]], 120(r1)
+; DARWIN64: ld r[[REG3:[0-9]+]], 8(r[[REG1]])
+; DARWIN64: mr r[[REG4:[0-9]+]], r[[REG2]]
+; DARWIN64: cmpld cr{{[0-9]+}}, r[[REG2]], r[[REG3]]
+; DARWIN64: std r[[REG4]], -[[OFFSET1:[0-9]+]]
+; DARWIN64: std r[[REG3]], -[[OFFSET2:[0-9]+]]
+; DARWIN64: ld r3, -[[OFFSET1]]
+; DARWIN64: ld r3, -[[OFFSET2]]
diff --git a/test/CodeGen/PowerPC/ashr-neg1.ll b/test/CodeGen/PowerPC/ashr-neg1.ll
new file mode 100644
index 000000000000..28e74f4d2988
--- /dev/null
+++ b/test/CodeGen/PowerPC/ashr-neg1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD30723(i32) {
+BB:
+  br label %CF80
+
+CF80:                                             ; preds = %CF80, %BB
+  %B = ashr i32 %0, -1
+  br i1 undef, label %CF80, label %CF84
+
+CF84:                                             ; preds = %CF84, %CF80
+  %Cmp62 = icmp sge i32 undef, %B
+  br i1 %Cmp62, label %CF84, label %CF85
+
+CF85:                                             ; preds = %CF85, %CF84
+  br label %CF85
+}
diff --git a/test/CodeGen/PowerPC/asm-dialect.ll b/test/CodeGen/PowerPC/asm-dialect.ll
new file mode 100644
index 000000000000..e8fd2516b5a5
--- /dev/null
+++ b/test/CodeGen/PowerPC/asm-dialect.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | FileCheck %s
+
+; This test verifies that we choose "assembler variant 1" (which GCC
+; uses for "new-style mnemonics" as opposed to POWER mnemonics) when
+; processing multi-variant inline asm statements, on all subtargets.
+
+; CHECK: subfe
+; CHECK-NOT: sfe
+
+define i32 @test(i32 %in1, i32 %in2) {
+entry:
+  %0 = tail call i32 asm "$(sfe$|subfe$) $0,$1,$2", "=r,r,r"(i32 %in1, i32 %in2)
+  ret i32 %0
+}
+
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
index d04a6c98ee19..b19125b064e7 100644
--- a/test/CodeGen/PowerPC/asym-regclass-copy.ll
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -52,5 +52,5 @@ declare void @free(i8* nocapture) #0
 
 declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index 838db20ddd1b..1737916375ca 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 |  FileCheck %s
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
-; CHECK: exchange_and_add:
+; CHECK-LABEL: exchange_and_add:
 ; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}}
   %tmp = atomicrmw add i32* %mem, i32 %val monotonic
 ; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}}
@@ -9,7 +9,7 @@ define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 }
 
 define i32 @exchange_and_cmp(i32* %mem) nounwind {
-; CHECK: exchange_and_cmp:
+; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: lwarx
   %tmp = cmpxchg i32* %mem, i32 0, i32 1 monotonic
 ; CHECK: stwcx.
@@ -18,7 +18,7 @@ define i32 @exchange_and_cmp(i32* %mem) nounwind {
 }
 
 define i32 @exchange(i32* %mem, i32 %val) nounwind {
-; CHECK: exchange:
+; CHECK-LABEL: exchange:
 ; CHECK: lwarx
   %tmp = atomicrmw xchg i32* %mem, i32 1 monotonic
 ; CHECK: stwcx.
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 40b4a2eea976..e56a77966714 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc64 | FileCheck %s
 
 define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
-; CHECK: exchange_and_add:
+; CHECK-LABEL: exchange_and_add:
 ; CHECK: ldarx
   %tmp = atomicrmw add i64* %mem, i64 %val monotonic
 ; CHECK: stdcx.
@@ -9,7 +9,7 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 }
 
 define i64 @exchange_and_cmp(i64* %mem) nounwind {
-; CHECK: exchange_and_cmp:
+; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: ldarx
   %tmp = cmpxchg i64* %mem, i64 0, i64 1 monotonic
 ; CHECK: stdcx.
@@ -18,7 +18,7 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {
 }
 
 define i64 @exchange(i64* %mem, i64 %val) nounwind {
-; CHECK: exchange:
+; CHECK-LABEL: exchange:
 ; CHECK: ldarx
   %tmp = atomicrmw xchg i64* %mem, i64 1 monotonic
 ; CHECK: stdcx.
diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll
index 656a85860df0..e487558e942a 100644
--- a/test/CodeGen/PowerPC/bdzlr.ll
+++ b/test/CodeGen/PowerPC/bdzlr.ll
@@ -35,15 +35,15 @@ for.body:                                         ; preds = %for.body.for.body_c
   %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
   %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
-  %1 = load i32* %tt, align 4, !tbaa !0
-  store i32 %1, i32* undef, align 4, !tbaa !0
+  %1 = load i32* %tt, align 4
+  store i32 %1, i32* undef, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
-  %.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3
+  %.pre = load %struct.lua_TValue.17.692** undef, align 8
   br label %for.body
 
 for.end:                                          ; preds = %for.body, %if.end, %entry
@@ -57,8 +57,3 @@ for.end:                                          ; preds = %for.body, %if.end,
 }
 
 attributes #0 = { nounwind }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/PowerPC/bv-pres-v8i1.ll b/test/CodeGen/PowerPC/bv-pres-v8i1.ll
new file mode 100644
index 000000000000..5bf84ed1c5c8
--- /dev/null
+++ b/test/CodeGen/PowerPC/bv-pres-v8i1.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD70() {
+BB:
+  br label %CF78
+
+CF78:                                             ; preds = %CF87, %CF78, %BB
+  br i1 undef, label %CF78, label %CF87
+
+CF87:                                             ; preds = %CF78
+  %Cmp19 = icmp sge <8 x i1> zeroinitializer, zeroinitializer
+  %Cmp26 = icmp slt i32 -1, undef
+  br i1 %Cmp26, label %CF78, label %CF79
+
+CF79:                                             ; preds = %CF79, %CF87
+  br i1 undef, label %CF79, label %CF82
+
+CF82:                                             ; preds = %CF82, %CF79
+  br i1 undef, label %CF82, label %CF84
+
+CF84:                                             ; preds = %CF82
+  br label %CF
+
+CF:                                               ; preds = %CF88, %CF, %CF84
+  br i1 undef, label %CF, label %CF85
+
+CF85:                                             ; preds = %CF85, %CF
+  %I52 = insertelement <8 x i1> %Cmp19, i1 %Cmp26, i32 6
+  %Cmp61 = icmp ult i32 477567, undef
+  br i1 %Cmp61, label %CF85, label %CF88
+
+CF88:                                             ; preds = %CF85
+  %E63 = extractelement <8 x i1> %I52, i32 5
+  br i1 %E63, label %CF, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF88
+  br label %CF80
+}
diff --git a/test/CodeGen/PowerPC/bv-widen-undef.ll b/test/CodeGen/PowerPC/bv-widen-undef.ll
new file mode 100644
index 000000000000..9e58f0d95023
--- /dev/null
+++ b/test/CodeGen/PowerPC/bv-widen-undef.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD4357(i8) {
+BB:
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  br i1 undef, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF81, %CF77, %CF
+  %Shuff12 = shufflevector <2 x i8> <i8 -1, i8 -1>, <2 x i8> <i8 -1, i8 -1>, <2 x i32> <i32 0, i32 undef>
+  br i1 undef, label %CF77, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF77
+  %B21 = mul <2 x i8> %Shuff12, <i8 -1, i8 -1>
+  %Cmp24 = fcmp une ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000
+  br i1 %Cmp24, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF80
+  %I36 = insertelement <2 x i8> %B21, i8 %0, i32 0
+  br label %CF77
+}
diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll
index f12152ff0fca..3eb30e93fd31 100644
--- a/test/CodeGen/PowerPC/complex-return.ll
+++ b/test/CodeGen/PowerPC/complex-return.ll
@@ -23,7 +23,7 @@ entry:
   ret { ppc_fp128, ppc_fp128 } %0
 }
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: lfd 3
 ; CHECK: lfd 4
 ; CHECK: lfd 2
@@ -49,7 +49,7 @@ entry:
   ret { float, float } %0
 }
 
-; CHECK: oof:
+; CHECK-LABEL: oof:
 ; CHECK: lfs 2
 ; CHECK: lfs 1
 
diff --git a/test/CodeGen/PowerPC/copysignl.ll b/test/CodeGen/PowerPC/copysignl.ll
new file mode 100644
index 000000000000..4b801b791d62
--- /dev/null
+++ b/test/CodeGen/PowerPC/copysignl.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @foo_d_ll(ppc_fp128 %a, ppc_fp128 %b) #0 {
+entry:
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %a, ppc_fp128 %b) #0
+  %conv = fptrunc ppc_fp128 %call to double
+  ret double %conv
+
+; CHECK-LABEL: @foo_d_ll
+; CHECK: fcpsgn 1, 3, 1
+; CHECK: blr
+}
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
+
+define double @foo_dl(double %a, ppc_fp128 %b) #0 {
+entry:
+  %conv = fptrunc ppc_fp128 %b to double
+  %call = tail call double @copysign(double %a, double %conv) #0
+  ret double %call
+
+; CHECK-LABEL: @foo_dl
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+declare double @copysign(double, double) #0
+
+define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
+entry:
+  %conv = fpext double %a to ppc_fp128
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %b) #0
+  ret ppc_fp128 %call
+
+; CHECK-LABEL: @foo_ll
+; CHECK: bl copysignl
+; CHECK: blr
+}
+
+define ppc_fp128 @foo_ld(double %a, double %b) #0 {
+entry:
+  %conv = fpext double %a to ppc_fp128
+  %conv1 = fpext double %b to ppc_fp128
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
+  ret ppc_fp128 %call
+
+; CHECK-LABEL: @foo_ld
+; CHECK: bl copysignl
+; CHECK: blr
+}
+
+define ppc_fp128 @foo_lf(double %a, float %b) #0 {
+entry:
+  %conv = fpext double %a to ppc_fp128
+  %conv1 = fpext float %b to ppc_fp128
+  %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
+  ret ppc_fp128 %call
+
+; CHECK-LABEL: @foo_lf
+; CHECK: bl copysignl
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
index d6df7a237668..be0dbad6289e 100644
--- a/test/CodeGen/PowerPC/cr-spills.ll
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -53,11 +53,11 @@ for.cond286.preheader:                            ; preds = %for.body252
 
 for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
   %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
-  %1 = load i32* undef, align 4, !tbaa !0
-  %2 = load i32* @weight_luma, align 4, !tbaa !0
-  %3 = load i32* @wp_luma_round, align 4, !tbaa !0
-  %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
-  %5 = load i32* @offset_luma, align 4, !tbaa !0
+  %1 = load i32* undef, align 4
+  %2 = load i32* @weight_luma, align 4
+  %3 = load i32* @wp_luma_round, align 4
+  %4 = load i32* @luma_log_weight_denom, align 4
+  %5 = load i32* @offset_luma, align 4
   %incdec.ptr502.sum = add i64 undef, 16
   br label %for.body293
 
@@ -68,7 +68,7 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
   %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
   %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
-  %6 = load i16* %refptr.11590, align 2, !tbaa !3
+  %6 = load i16* %refptr.11590, align 2
   %conv294 = zext i16 %6 to i32
   %mul295 = mul nsw i32 %conv294, %2
   %add296 = add nsw i32 %mul295, %3
@@ -78,16 +78,16 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
   %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
   %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
-  %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+  %7 = load i16* %srcptr.41591, align 2
   %conv300 = zext i16 %7 to i32
   %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
   %idxprom302 = sext i32 %sub301 to i64
   %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
-  %8 = load i32* %arrayidx303, align 4, !tbaa !0
+  %8 = load i32* %arrayidx303, align 4
   %add304 = add nsw i32 %8, %LineSadBlk0.01588
-  %9 = load i32* undef, align 4, !tbaa !0
+  %9 = load i32* undef, align 4
   %add318 = add nsw i32 %add304, %9
-  %10 = load i16* undef, align 2, !tbaa !3
+  %10 = load i16* undef, align 2
   %conv321 = zext i16 %10 to i32
   %mul322 = mul nsw i32 %conv321, %2
   %add323 = add nsw i32 %mul322, %3
@@ -100,22 +100,22 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %sub329 = sub nsw i32 %cond.i5.i1508, 0
   %idxprom330 = sext i32 %sub329 to i64
   %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
-  %11 = load i32* %arrayidx331, align 4, !tbaa !0
+  %11 = load i32* %arrayidx331, align 4
   %add332 = add nsw i32 %add318, %11
   %cmp.i.i1501 = icmp sgt i32 undef, 0
   %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
   %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
   %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
   %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
-  %12 = load i16* null, align 2, !tbaa !3
+  %12 = load i16* null, align 2
   %conv342 = zext i16 %12 to i32
   %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
   %idxprom344 = sext i32 %sub343 to i64
   %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
-  %13 = load i32* %arrayidx345, align 4, !tbaa !0
+  %13 = load i32* %arrayidx345, align 4
   %add346 = add nsw i32 %add332, %13
   %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
-  %14 = load i16* null, align 2, !tbaa !3
+  %14 = load i16* null, align 2
   %conv349 = zext i16 %14 to i32
   %mul350 = mul nsw i32 %conv349, %2
   %add351 = add nsw i32 %mul350, %3
@@ -126,15 +126,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
   %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
   %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
-  %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+  %15 = load i16* %incdec.ptr341, align 2
   %conv356 = zext i16 %15 to i32
   %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
   %idxprom358 = sext i32 %sub357 to i64
   %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
-  %16 = load i32* %arrayidx359, align 4, !tbaa !0
+  %16 = load i32* %arrayidx359, align 4
   %add360 = add nsw i32 %16, %LineSadBlk1.01587
   %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
-  %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+  %17 = load i16* %incdec.ptr348, align 2
   %conv363 = zext i16 %17 to i32
   %mul364 = mul nsw i32 %conv363, %2
   %add365 = add nsw i32 %mul364, %3
@@ -145,15 +145,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
   %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
   %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
-  %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+  %18 = load i16* %incdec.ptr355, align 2
   %conv370 = zext i16 %18 to i32
   %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
   %idxprom372 = sext i32 %sub371 to i64
   %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
-  %19 = load i32* %arrayidx373, align 4, !tbaa !0
+  %19 = load i32* %arrayidx373, align 4
   %add374 = add nsw i32 %add360, %19
   %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
-  %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+  %20 = load i16* %incdec.ptr362, align 2
   %conv377 = zext i16 %20 to i32
   %mul378 = mul nsw i32 %conv377, %2
   %add379 = add nsw i32 %mul378, %3
@@ -164,14 +164,14 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
   %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
   %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
-  %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+  %21 = load i16* %incdec.ptr369, align 2
   %conv384 = zext i16 %21 to i32
   %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
   %idxprom386 = sext i32 %sub385 to i64
   %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
-  %22 = load i32* %arrayidx387, align 4, !tbaa !0
+  %22 = load i32* %arrayidx387, align 4
   %add388 = add nsw i32 %add374, %22
-  %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+  %23 = load i16* %incdec.ptr376, align 2
   %conv391 = zext i16 %23 to i32
   %mul392 = mul nsw i32 %conv391, %2
   %add395 = add nsw i32 0, %5
@@ -180,25 +180,25 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
   %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
   %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
-  %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+  %24 = load i16* %incdec.ptr383, align 2
   %conv398 = zext i16 %24 to i32
   %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
   %idxprom400 = sext i32 %sub399 to i64
   %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
-  %25 = load i32* %arrayidx401, align 4, !tbaa !0
+  %25 = load i32* %arrayidx401, align 4
   %add402 = add nsw i32 %add388, %25
   %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
   %cmp.i4.i1483 = icmp slt i32 undef, %1
   %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
-  %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+  %26 = load i16* %incdec.ptr397, align 2
   %conv412 = zext i16 %26 to i32
   %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
   %idxprom414 = sext i32 %sub413 to i64
   %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
-  %27 = load i32* %arrayidx415, align 4, !tbaa !0
+  %27 = load i32* %arrayidx415, align 4
   %add416 = add nsw i32 %27, %LineSadBlk2.01585
   %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
-  %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+  %28 = load i16* %incdec.ptr404, align 2
   %conv419 = zext i16 %28 to i32
   %mul420 = mul nsw i32 %conv419, %2
   %add421 = add nsw i32 %mul420, %3
@@ -212,10 +212,10 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %sub427 = sub nsw i32 %cond.i5.i1480, 0
   %idxprom428 = sext i32 %sub427 to i64
   %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
-  %29 = load i32* %arrayidx429, align 4, !tbaa !0
+  %29 = load i32* %arrayidx429, align 4
   %add430 = add nsw i32 %add416, %29
   %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
-  %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+  %30 = load i16* %incdec.ptr418, align 2
   %conv433 = zext i16 %30 to i32
   %mul434 = mul nsw i32 %conv433, %2
   %add435 = add nsw i32 %mul434, %3
@@ -225,15 +225,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
   %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
   %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
-  %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+  %31 = load i16* %incdec.ptr425, align 2
   %conv440 = zext i16 %31 to i32
   %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
   %idxprom442 = sext i32 %sub441 to i64
   %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
-  %32 = load i32* %arrayidx443, align 4, !tbaa !0
+  %32 = load i32* %arrayidx443, align 4
   %add444 = add nsw i32 %add430, %32
   %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
-  %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+  %33 = load i16* %incdec.ptr432, align 2
   %conv447 = zext i16 %33 to i32
   %mul448 = mul nsw i32 %conv447, %2
   %add449 = add nsw i32 %mul448, %3
@@ -244,15 +244,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
   %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
   %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
-  %34 = load i16* undef, align 2, !tbaa !3
+  %34 = load i16* undef, align 2
   %conv454 = zext i16 %34 to i32
   %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
   %idxprom456 = sext i32 %sub455 to i64
   %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
-  %35 = load i32* %arrayidx457, align 4, !tbaa !0
+  %35 = load i32* %arrayidx457, align 4
   %add458 = add nsw i32 %add444, %35
   %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
-  %36 = load i16* %incdec.ptr446, align 2, !tbaa !3
+  %36 = load i16* %incdec.ptr446, align 2
   %conv461 = zext i16 %36 to i32
   %mul462 = mul nsw i32 %conv461, %2
   %add463 = add nsw i32 %mul462, %3
@@ -263,12 +263,12 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
   %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
   %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
-  %37 = load i16* %incdec.ptr453, align 2, !tbaa !3
+  %37 = load i16* %incdec.ptr453, align 2
   %conv468 = zext i16 %37 to i32
   %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
   %idxprom470 = sext i32 %sub469 to i64
   %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
-  %38 = load i32* %arrayidx471, align 4, !tbaa !0
+  %38 = load i32* %arrayidx471, align 4
   %add472 = add nsw i32 %38, %LineSadBlk3.01586
   %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
   %add477 = add nsw i32 0, %3
@@ -279,15 +279,15 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
   %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
   %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
-  %39 = load i16* %incdec.ptr467, align 2, !tbaa !3
+  %39 = load i16* %incdec.ptr467, align 2
   %conv482 = zext i16 %39 to i32
   %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
   %idxprom484 = sext i32 %sub483 to i64
   %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
-  %40 = load i32* %arrayidx485, align 4, !tbaa !0
+  %40 = load i32* %arrayidx485, align 4
   %add486 = add nsw i32 %add472, %40
   %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
-  %41 = load i16* %incdec.ptr474, align 2, !tbaa !3
+  %41 = load i16* %incdec.ptr474, align 2
   %conv489 = zext i16 %41 to i32
   %mul490 = mul nsw i32 %conv489, %2
   %add491 = add nsw i32 %mul490, %3
@@ -298,14 +298,14 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
   %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
   %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
-  %42 = load i16* %incdec.ptr481, align 2, !tbaa !3
+  %42 = load i16* %incdec.ptr481, align 2
   %conv496 = zext i16 %42 to i32
   %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
   %idxprom498 = sext i32 %sub497 to i64
   %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
-  %43 = load i32* %arrayidx499, align 4, !tbaa !0
+  %43 = load i32* %arrayidx499, align 4
   %add500 = add nsw i32 %add486, %43
-  %44 = load i16* %incdec.ptr488, align 2, !tbaa !3
+  %44 = load i16* %incdec.ptr488, align 2
   %conv503 = zext i16 %44 to i32
   %mul504 = mul nsw i32 %conv503, %2
   %add505 = add nsw i32 %mul504, %3
@@ -315,22 +315,22 @@ for.body293:                                      ; preds = %for.body293, %for.c
   %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
   %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
   %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
-  %45 = load i16* %incdec.ptr495, align 2, !tbaa !3
+  %45 = load i16* %incdec.ptr495, align 2
   %conv510 = zext i16 %45 to i32
   %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
   %idxprom512 = sext i32 %sub511 to i64
   %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
-  %46 = load i32* %arrayidx513, align 4, !tbaa !0
+  %46 = load i32* %arrayidx513, align 4
   %add514 = add nsw i32 %add500, %46
   %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
   %exitcond1692 = icmp eq i32 undef, 4
   br i1 %exitcond1692, label %for.end520, label %for.body293
 
 for.end520:                                       ; preds = %for.body293
-  store i32 %add346, i32* undef, align 4, !tbaa !0
-  store i32 %add402, i32* undef, align 4, !tbaa !0
-  store i32 %add458, i32* undef, align 4, !tbaa !0
-  store i32 %add514, i32* null, align 4, !tbaa !0
+  store i32 %add346, i32* undef, align 4
+  store i32 %add402, i32* undef, align 4
+  store i32 %add458, i32* undef, align 4
+  store i32 %add514, i32* null, align 4
   br i1 undef, label %for.end543, label %for.cond290.preheader
 
 for.end543:                                       ; preds = %for.end520
@@ -400,10 +400,5 @@ for.end999:                                       ; preds = %for.inc997
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll
index f1cbc5afa8ac..a9b4b3607830 100644
--- a/test/CodeGen/PowerPC/crsave.ll
+++ b/test/CodeGen/PowerPC/crsave.ll
@@ -1,9 +1,9 @@
-; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
-; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 < %s | FileCheck %s -check-prefix=PPC32
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 < %s | FileCheck %s -check-prefix=PPC64
 
 declare void @foo()
 
-define i32 @test_cr2() nounwind {
+define i32 @test_cr2() nounwind uwtable {
 entry:
   %ret = alloca i32, align 4
   %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind
@@ -18,14 +18,19 @@ entry:
 ; PPC32: mfcr 12
 ; PPC32-NEXT: stw 12, 24(31)
 ; PPC32: lwz 12, 24(31)
-; PPC32-NEXT: mtcrf 32, 12
+; PPC32-NEXT: mtocrf 32, 12
 
+; PPC64: .cfi_startproc
 ; PPC64: mfcr 12
 ; PPC64: stw 12, 8(1)
 ; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
+; PPC64: .cfi_def_cfa_offset 128
+; PPC64: .cfi_offset lr, 16
+; PPC64: .cfi_offset cr2, 8
 ; PPC64: addi 1, 1, [[AMT]]
 ; PPC64: lwz 12, 8(1)
-; PPC64: mtcrf 32, 12
+; PPC64: mtocrf 32, 12
+; PPC64: .cfi_endproc
 
 define i32 @test_cr234() nounwind {
 entry:
@@ -42,16 +47,16 @@ entry:
 ; PPC32: mfcr 12
 ; PPC32-NEXT: stw 12, 24(31)
 ; PPC32: lwz 12, 24(31)
-; PPC32-NEXT: mtcrf 32, 12
-; PPC32-NEXT: mtcrf 16, 12
-; PPC32-NEXT: mtcrf 8, 12
+; PPC32-NEXT: mtocrf 32, 12
+; PPC32-NEXT: mtocrf 16, 12
+; PPC32-NEXT: mtocrf 8, 12
 
 ; PPC64: mfcr 12
 ; PPC64: stw 12, 8(1)
 ; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
 ; PPC64: addi 1, 1, [[AMT]]
 ; PPC64: lwz 12, 8(1)
-; PPC64: mtcrf 32, 12
-; PPC64: mtcrf 16, 12
-; PPC64: mtcrf 8, 12
+; PPC64: mtocrf 32, 12
+; PPC64: mtocrf 16, 12
+; PPC64: mtocrf 8, 12
 
diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll
index 04e4ffb0d48d..1a669eb051d8 100644
--- a/test/CodeGen/PowerPC/ctr-cleanup.ll
+++ b/test/CodeGen/PowerPC/ctr-cleanup.ll
@@ -22,4 +22,4 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/ctrloop-asm.ll b/test/CodeGen/PowerPC/ctrloop-asm.ll
new file mode 100644
index 000000000000..28afbf2babcf
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-asm.ll
@@ -0,0 +1,38 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+define void @test1(i32 %c) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  call void asm sideeffect "", "~{r5}"() nounwind
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test1
+; CHECK: mtctr
+}
+
+define void @test2(i32 %c) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  call void asm sideeffect "", "~{ctr}"() nounwind
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test2
+; CHECK-NOT: mtctr
+}
+
diff --git a/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
new file mode 100644
index 000000000000..2f0440912cc9
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mcpu=ppc | FileCheck %s
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+define ppc_fp128 @foo(ppc_fp128* nocapture %n, ppc_fp128 %d) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x.05 = phi ppc_fp128 [ %d, %entry ], [ %conv, %for.body ]
+  %arrayidx = getelementptr inbounds ppc_fp128* %n, i32 %i.06
+  %0 = load ppc_fp128* %arrayidx, align 8
+  %conv = tail call ppc_fp128 @copysignl(ppc_fp128 %x.05, ppc_fp128 %d) nounwind readonly
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret ppc_fp128 %conv
+}
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
+
+; CHECK: @foo
+; CHECK-NOT: mtctr
+
diff --git a/test/CodeGen/PowerPC/ctrloop-fp64.ll b/test/CodeGen/PowerPC/ctrloop-fp64.ll
new file mode 100644
index 000000000000..77555ac58de2
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-fp64.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mcpu=ppc | FileCheck %s
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+define i64 @foo(double* nocapture %n) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
+  %arrayidx = getelementptr inbounds double* %n, i32 %i.06
+  %0 = load double* %arrayidx, align 8
+  %conv = sitofp i64 %x.05 to double
+  %add = fadd double %conv, %0
+  %conv1 = fptosi double %add to i64
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i64 %conv1
+}
+
+; CHECK: @foo
+; CHECK-NOT: mtctr
+
+@init_value = global double 1.000000e+00, align 8
+@data64 = global [8000 x i64] zeroinitializer, align 8
+
+define i32 @main(i32 %argc, i8** nocapture %argv) {
+entry:
+  %0 = load double* @init_value, align 8
+  %conv = fptosi double %0 to i64
+  %broadcast.splatinsert.i = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %broadcast.splat.i = shufflevector <2 x i64> %broadcast.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
+  br label %vector.body.i
+
+vector.body.i:                                    ; preds = %vector.body.i, %entry
+  %index.i = phi i32 [ 0, %entry ], [ %index.next.i, %vector.body.i ]
+  %next.gep.i = getelementptr [8000 x i64]* @data64, i32 0, i32 %index.i
+  %1 = bitcast i64* %next.gep.i to <2 x i64>*
+  store <2 x i64> %broadcast.splat.i, <2 x i64>* %1, align 8
+  %next.gep.sum24.i = or i32 %index.i, 2
+  %2 = getelementptr [8000 x i64]* @data64, i32 0, i32 %next.gep.sum24.i
+  %3 = bitcast i64* %2 to <2 x i64>*
+  store <2 x i64> %broadcast.splat.i, <2 x i64>* %3, align 8
+  %index.next.i = add i32 %index.i, 4
+  %4 = icmp eq i32 %index.next.i, 8000
+  br i1 %4, label %_Z4fillIPxxEvT_S1_T0_.exit, label %vector.body.i
+
+_Z4fillIPxxEvT_S1_T0_.exit:                       ; preds = %vector.body.i
+  ret i32 0
+}
+
+; CHECK: @main
+; CHECK: __fixdfdi
+; CHECK: mtctr
+
diff --git a/test/CodeGen/PowerPC/ctrloop-i64.ll b/test/CodeGen/PowerPC/ctrloop-i64.ll
new file mode 100644
index 000000000000..9e01392a458f
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-i64.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mcpu=ppc | FileCheck %s
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+define i64 @foo(i64* nocapture %n, i64 %d) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
+  %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
+  %0 = load i64* %arrayidx, align 8
+  %conv = udiv i64 %x.05, %d
+  %conv1 = add i64 %conv, %0
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i64 %conv1
+}
+
+; CHECK: @foo
+; CHECK-NOT: mtctr
+
+define i64 @foo2(i64* nocapture %n, i64 %d) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
+  %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
+  %0 = load i64* %arrayidx, align 8
+  %conv = sdiv i64 %x.05, %d
+  %conv1 = add i64 %conv, %0
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i64 %conv1
+}
+
+; CHECK: @foo2
+; CHECK-NOT: mtctr
+
+define i64 @foo3(i64* nocapture %n, i64 %d) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
+  %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
+  %0 = load i64* %arrayidx, align 8
+  %conv = urem i64 %x.05, %d
+  %conv1 = add i64 %conv, %0
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i64 %conv1
+}
+
+; CHECK: @foo3
+; CHECK-NOT: mtctr
+
+define i64 @foo4(i64* nocapture %n, i64 %d) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
+  %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
+  %0 = load i64* %arrayidx, align 8
+  %conv = srem i64 %x.05, %d
+  %conv1 = add i64 %conv, %0
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i64 %conv1
+}
+
+; CHECK: @foo4
+; CHECK-NOT: mtctr
+
diff --git a/test/CodeGen/PowerPC/ctrloop-large-ec.ll b/test/CodeGen/PowerPC/ctrloop-large-ec.ll
new file mode 100644
index 000000000000..c18bdabdb03a
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-large-ec.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=ppc32 < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+define void @fn1() {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %inc3 = phi i64 [ %inc, %for.body ], [ undef, %entry ]
+  %inc = add nsw i64 %inc3, 1
+  %tobool = icmp eq i64 %inc, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; On PPC32, CTR is also 32 bits, and so cannot hold a 64-bit count.
+; CHECK: @fn1
+; CHECK-NOT: mtctr
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/ctrloop-le.ll b/test/CodeGen/PowerPC/ctrloop-le.ll
new file mode 100644
index 000000000000..7b8185ed5261
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-le.ll
@@ -0,0 +1,441 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+; CHECK: test_pos1_ir_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 28395, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 9073, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 21956, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 16782, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 19097, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 14040
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, 14040
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 13710
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, 13710
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 9920
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, 9920
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 18924
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, 18924
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_sle
+; CHECK: bdnz
+; a < b
+define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 11812
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, 11812
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_sle
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_sle
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_sle
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_sle
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_sle
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/ctrloop-lt.ll b/test/CodeGen/PowerPC/ctrloop-lt.ll
new file mode 100644
index 000000000000..eaab61a826d9
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-lt.ll
@@ -0,0 +1,438 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+; CHECK: test_pos1_ir_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 8531, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_slt
+; FIXME: Support this loop!
+; CHECK: bdnz
+; a < b
+define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9152, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_slt
+; FIXME: Support this loop!
+; CHECK: bdnz
+; a < b
+define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 18851, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 25466, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9295, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 31236
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, 31236
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22653
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, 22653
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1431
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, 1431
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22403
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, 22403
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 21715
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, 21715
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_slt
+; CHECK: bdnz
+; a < b
+define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/ctrloop-ne.ll b/test/CodeGen/PowerPC/ctrloop-ne.ll
new file mode 100644
index 000000000000..636030a15dd2
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-ne.ll
@@ -0,0 +1,449 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+
+; CHECK: test_pos1_ir_ne
+; CHECK: bdnz
+; a < b
+define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32623, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 29554, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 15692, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 10449, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32087, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_ne
+; CHECK: bdnz
+; a < b
+define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 3472
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, 3472
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 8730
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, 8730
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1493
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, 1493
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1706
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, 1706
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1886
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, 1886
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_ne
+; CHECK: bdnz
+; a < b
+define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_ne
+; FIXME: Support this loop!
+; CHECK-NOT: bdnz
+; a < b
+define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/ctrloops.ll b/test/CodeGen/PowerPC/ctrloops.ll
index f11e332d5fbc..ca00f687aa4e 100644
--- a/test/CodeGen/PowerPC/ctrloops.ll
+++ b/test/CodeGen/PowerPC/ctrloops.ll
@@ -22,7 +22,7 @@ for.end:                                          ; preds = %for.body
 ; CHECK: @test1
 ; CHECK-NOT: or 3, 3, 3
 ; CHECK: mtctr
-; CHECK-NOT: addi
+; CHECK-NOT: addi {[0-9]+}
 ; CHECK-NOT: cmplwi
 ; CHECK: bdnz
 }
@@ -45,7 +45,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK: @test2
 ; CHECK: mtctr
-; CHECK-NOT: addi
+; CHECK-NOT: addi {[0-9]+}
 ; CHECK-NOT: cmplwi
 ; CHECK: bdnz
 }
@@ -69,7 +69,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK: @test3
 ; CHECK: mtctr
-; CHECK-NOT: addi
+; CHECK-NOT: addi {[0-9]+}
 ; CHECK-NOT: cmplwi
 ; CHECK: bdnz
 }
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 21e36618c5c1..cb93decac8e9 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=g5 | FileCheck %s
-; CHECK: main:
+; CHECK-LABEL: main:
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@@ -15,18 +15,19 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !21, i32 12, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !"", metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 720942, metadata !21, null, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13, i32 0} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !21} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !13 = metadata !{metadata !14}
 !14 = metadata !{metadata !15, metadata !16}
 !15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -34,5 +35,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !17 = metadata !{i32 1, i32 14, metadata !5, null}
 !18 = metadata !{i32 1, i32 26, metadata !5, null}
 !19 = metadata !{i32 2, i32 3, metadata !20, null}
-!20 = metadata !{i32 720907, metadata !5, i32 1, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-
+!20 = metadata !{i32 720907, metadata !21, metadata !5, i32 1, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{metadata !"dbg.c", metadata !"/src"}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/PowerPC/dyn-alloca-aligned.ll b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
new file mode 100644
index 000000000000..a5d45b8e94a0
--- /dev/null
+++ b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s = type { i32, i32 }
+
+declare void @bar(i32*, i32*) #0
+
+define void @goo(%struct.s* byval nocapture readonly %a, i32 signext %n) #0 {
+entry:
+  %0 = zext i32 %n to i64
+  %vla = alloca i32, i64 %0, align 128
+  %vla1 = alloca i32, i64 %0, align 128
+  %a2 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %1 = load i32* %a2, align 4
+  store i32 %1, i32* %vla1, align 128
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %2 = load i32* %b, align 4
+  %arrayidx3 = getelementptr inbounds i32* %vla1, i64 1
+  store i32 %2, i32* %arrayidx3, align 4
+  call void @bar(i32* %vla1, i32* %vla) #0
+  ret void
+
+; CHECK-LABEL: @goo
+
+; CHECK-DAG: li [[REG1:[0-9]+]], -128
+; CHECK-DAG: neg [[REG2:[0-9]+]],
+; CHECK: and [[REG1]], [[REG2]], [[REG1]]
+; CHECK: stdux {{[0-9]+}}, 1, [[REG1]]
+
+; CHECK: blr
+
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/emptystruct.ll b/test/CodeGen/PowerPC/emptystruct.ll
index 36b4abd2bfad..47cfadd0a7bb 100644
--- a/test/CodeGen/PowerPC/emptystruct.ll
+++ b/test/CodeGen/PowerPC/emptystruct.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false < %s | FileCheck %s
 
 ; This tests correct handling of empty aggregate parameters and return values.
 ; An empty parameter passed by value does not consume a protocol register or
@@ -25,9 +25,8 @@ entry:
   ret void
 }
 
-; CHECK: callee:
+; CHECK-LABEL: callee:
 ; CHECK: std 4,
-; CHECK: std 3,
 ; CHECK-NOT: std 5,
 ; CHECK-NOT: std 6,
 ; CHECK: blr
@@ -43,9 +42,8 @@ entry:
   ret void
 }
 
-; CHECK: caller:
+; CHECK-LABEL: caller:
 ; CHECK: addi 4,
-; CHECK: std 3,
 ; CHECK-NOT: std 5,
 ; CHECK-NOT: std 6,
 ; CHECK: bl callee
diff --git a/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll b/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
new file mode 100644
index 000000000000..7bdda0494b8f
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ELF64: t1
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 124
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ELF64: t2
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1148
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ELF64: t3
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 140
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ELF64: t4
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1284
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-binary.ll b/test/CodeGen/PowerPC/fast-isel-binary.ll
new file mode 100644
index 000000000000..43a6cd085055
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-binary.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; Test add with non-legal types
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ELF64: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ELF64: add
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8_imm(i8 %a) nounwind ssp {
+entry:
+; ELF64: add_i8_imm
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, 22;
+; ELF64: addi
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ELF64: add
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16_imm(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: add_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, 243;
+; ELF64: addi
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ELF64: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ELF64: or
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8_imm(i8 %a) nounwind ssp {
+entry:
+; ELF64: or_i8_imm
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, -13;
+; ELF64: ori
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ELF64: or
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16_imm(i16 %a) nounwind ssp {
+entry:
+; ELF64: or_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, 273;
+; ELF64: ori
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ELF64: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ELF64: subf
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8_imm(i8 %a) nounwind ssp {
+entry:
+; ELF64: sub_i8_imm
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, 22;
+; ELF64: addi
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ELF64: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ELF64: subf
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16_imm(i16 %a) nounwind ssp {
+entry:
+; ELF64: sub_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, 247;
+; ELF64: addi
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16_badimm(i16 %a) nounwind ssp {
+entry:
+; ELF64: sub_i16_imm
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, -32768;
+; ELF64: subf
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-br-const.ll b/test/CodeGen/PowerPC/fast-isel-br-const.ll
new file mode 100644
index 000000000000..2cfb8a225745
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-br-const.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+entry:
+; ELF64: t1
+  %x = add i32 %a, %b  
+  br i1 1, label %if.then, label %if.else
+; ELF64-NOT: b {{\.?}}LBB0_1
+
+if.then:                                          ; preds = %entry
+  call void @foo1()
+  br label %if.end7
+
+if.else:                                          ; preds = %entry
+  br i1 0, label %if.then2, label %if.else3
+; ELF64: b {{\.?}}LBB0_4
+
+if.then2:                                         ; preds = %if.else
+  call void @foo2()
+  br label %if.end6
+
+if.else3:                                         ; preds = %if.else
+  %y = sub i32 %a, %b
+  br i1 1, label %if.then5, label %if.end
+; ELF64-NOT: b {{\.?}}LBB0_5
+
+if.then5:                                         ; preds = %if.else3
+  call void @foo1()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then5, %if.else3
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.end, %if.then2
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end6, %if.then
+  ret i32 0
+}
+
+declare void @foo1()
+
+declare void @foo2()
diff --git a/test/CodeGen/PowerPC/fast-isel-call.ll b/test/CodeGen/PowerPC/fast-isel-call.ll
new file mode 100644
index 000000000000..33a8ba903e3d
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-call.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define i32 @t1(i8 signext %a) nounwind {
+  %1 = sext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t2(i8 zeroext %a) nounwind {
+  %1 = zext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t3(i16 signext %a) nounwind {
+  %1 = sext i16 %a to i32
+  ret i32 %1
+}
+
+define i32 @t4(i16 zeroext %a) nounwind {
+  %1 = zext i16 %a to i32
+  ret i32 %1
+}
+
+define void @foo(i8 %a, i16 %b) nounwind {
+; ELF64: foo
+  %1 = call i32 @t1(i8 signext %a)
+; ELF64: extsb
+  %2 = call i32 @t2(i8 zeroext %a)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+  %3 = call i32 @t3(i16 signext %b)
+; ELF64: extsh
+  %4 = call i32 @t4(i16 zeroext %b)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+
+;; A few test to check materialization
+  %5 = call i32 @t2(i8 zeroext 255)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+  %6 = call i32 @t4(i16 zeroext 65535)
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+  ret void
+}
+
+define void @foo2() nounwind {
+  %1 = call signext i16 @t5()
+  %2 = call zeroext i16 @t6()
+  %3 = call signext i8 @t7()
+  %4 = call zeroext i8 @t8()
+  ret void
+}
+
+declare signext i16 @t5();
+declare zeroext i16 @t6();
+declare signext i8 @t7();
+declare zeroext i8 @t8();
+
+define i32 @t10(i32 %argc, i8** nocapture %argv) {
+entry:
+; ELF64: t10
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
+; ELF64: li 3, 0
+; ELF64: li 4, 248
+; ELF64: li 5, 187
+; ELF64: li 6, 28
+; ELF64: li 7, 40
+; ELF64: li 8, 186
+; ELF64: rldicl 3, 3, 0, 56
+; ELF64: rldicl 4, 4, 0, 56
+; ELF64: rldicl 5, 5, 0, 56
+; ELF64: rldicl 6, 6, 0, 56
+; ELF64: rldicl 7, 7, 0, 56
+; ELF64: rldicl 8, 8, 0, 56
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
+
+define i32 @bar0(i32 %i) nounwind {
+  ret i32 0
+}
+
+; Function pointers are not yet implemented.
+;define void @foo3() uwtable {
+;  %fptr = alloca i32 (i32)*, align 8
+;  store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
+;  %1 = load i32 (i32)** %fptr, align 8
+;  %call = call i32 %1(i32 0)
+;  ret void
+;}
+
+; Intrinsic calls not yet implemented, and udiv isn't one for PPC anyway.
+;define i32 @LibCall(i32 %a, i32 %b) {
+;entry:
+;        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
+;        ret i32 %tmp1
+;}
+
+declare void @float_foo(float %f) ssp
+
+define void @float_const() ssp {
+entry:
+; ELF64: float_const
+  call void @float_foo(float 0x401C666660000000)
+; ELF64: addis [[REG:[0-9]+]], 2, .LCPI[[SUF:[0-9_]+]]@toc@ha
+; ELF64: lfs 1, .LCPI[[SUF]]@toc@l([[REG]])
+  ret void
+}
+
+define void @float_reg(float %dummy, float %f) ssp {
+entry:
+; ELF64: float_reg
+  call void @float_foo(float %f)
+; ELF64: fmr 1, 2
+  ret void
+}
+
+declare void @double_foo(double %d) ssp
+
+define void @double_const() ssp {
+entry:
+; ELF64: double_const
+  call void @double_foo(double 0x1397723CCABD0000401C666660000000)
+; ELF64: addis [[REG2:[0-9]+]], 2, .LCPI[[SUF2:[0-9_]+]]@toc@ha
+; ELF64: lfd 1, .LCPI[[SUF2]]@toc@l([[REG2]])
+  ret void
+}
+
+define void @double_reg(double %dummy, double %d) ssp {
+entry:
+; ELF64: double_reg
+  call void @double_foo(double %d)
+; ELF64: fmr 1, 2
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
new file mode 100644
index 000000000000..33f7a79783cc
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
@@ -0,0 +1,289 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define void @t1a(float %a) uwtable ssp {
+entry:
+; ELF64: t1a
+  %cmp = fcmp oeq float %a, 0.000000e+00
+; ELF64: addis
+; ELF64: lfs
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @foo()
+
+define void @t1b(float %a) uwtable ssp {
+entry:
+; ELF64: t1b
+  %cmp = fcmp oeq float %a, -0.000000e+00
+; ELF64: addis
+; ELF64: lfs
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2a(double %a) uwtable ssp {
+entry:
+; ELF64: t2a
+  %cmp = fcmp oeq double %a, 0.000000e+00
+; ELF64: addis
+; ELF64: lfd
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2b(double %a) uwtable ssp {
+entry:
+; ELF64: t2b
+  %cmp = fcmp oeq double %a, -0.000000e+00
+; ELF64: addis
+; ELF64: lfd
+; ELF64: fcmpu
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t4(i8 signext %a) uwtable ssp {
+entry:
+; ELF64: t4
+  %cmp = icmp eq i8 %a, -1
+; ELF64: extsb
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t5(i8 zeroext %a) uwtable ssp {
+entry:
+; ELF64: t5
+  %cmp = icmp eq i8 %a, 1
+; ELF64: extsb
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t6(i16 signext %a) uwtable ssp {
+entry:
+; ELF64: t6
+  %cmp = icmp eq i16 %a, -1
+; ELF64: extsh
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t7(i16 zeroext %a) uwtable ssp {
+entry:
+; ELF64: t7
+  %cmp = icmp eq i16 %a, 1
+; ELF64: extsh
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t8(i32 %a) uwtable ssp {
+entry:
+; ELF64: t8
+  %cmp = icmp eq i32 %a, -1
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t9(i32 %a) uwtable ssp {
+entry:
+; ELF64: t9
+  %cmp = icmp eq i32 %a, 1
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t10(i32 %a) uwtable ssp {
+entry:
+; ELF64: t10
+  %cmp = icmp eq i32 %a, 384
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t11(i32 %a) uwtable ssp {
+entry:
+; ELF64: t11
+  %cmp = icmp eq i32 %a, 4096
+; ELF64: cmpwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t12(i8 %a) uwtable ssp {
+entry:
+; ELF64: t12
+  %cmp = icmp ugt i8 %a, -113
+; ELF64: rlwinm
+; ELF64: cmplwi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t13() nounwind ssp {
+entry:
+; ELF64: t13
+  %cmp = icmp slt i32 -123, -2147483648
+; ELF64: li
+; ELF64: lis
+; ELF64: cmpw
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
+define void @t14(i64 %a) uwtable ssp {
+entry:
+; ELF64: t14
+  %cmp = icmp eq i64 %a, -1
+; ELF64: cmpdi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t15(i64 %a) uwtable ssp {
+entry:
+; ELF64: t15
+  %cmp = icmp eq i64 %a, 1
+; ELF64: cmpdi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t16(i64 %a) uwtable ssp {
+entry:
+; ELF64: t16
+  %cmp = icmp eq i64 %a, 384
+; ELF64: cmpdi
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t17(i64 %a) uwtable ssp {
+entry:
+; ELF64: t17
+  %cmp = icmp eq i64 %a, 32768
+; Extra operand so we don't match on cmpdi.
+; ELF64: cmpd {{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
new file mode 100644
index 000000000000..a31c31210c39
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -0,0 +1,305 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; Test sitofp
+
+define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i64
+  %b.addr = alloca float, align 4
+  %conv = sitofp i64 %a to float
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i32
+  %b.addr = alloca float, align 4
+  %conv = sitofp i32 %a to float
+; ELF64: std
+; ELF64: lfiwax
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i16
+  %b.addr = alloca float, align 4
+  %conv = sitofp i16 %a to float
+; ELF64: extsh
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ELF64: sitofp_single_i8
+  %b.addr = alloca float, align 4
+  %conv = sitofp i8 %a to float
+; ELF64: extsb
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfids
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i32
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+; ELF64: std
+; ELF64: lfiwax
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i64
+  %b.addr = alloca double, align 8
+  %conv = sitofp i64 %a to double
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i16
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+; ELF64: extsh
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ELF64: sitofp_double_i8
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+; ELF64: extsb
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfid
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test uitofp
+
+define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i64
+  %b.addr = alloca float, align 4
+  %conv = uitofp i64 %a to float
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i32
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+; ELF64: std
+; ELF64: lfiwzx
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i16
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ELF64: uitofp_single_i8
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidus
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i64
+  %b.addr = alloca double, align 8
+  %conv = uitofp i64 %a to double
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i32
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+; ELF64: std
+; ELF64: lfiwzx
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i16
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ELF64: uitofp_double_i8
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64: std
+; ELF64: lfd
+; ELF64: fcfidu
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptosi_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptosi float %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+; ELF64: fctiwz
+; ELF64: stfd
+; ELF64: lwa
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptosi_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptosi_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptosi double %a to i64
+; ELF64: fctidz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float_i32(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i32
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+; ELF64: fctiwuz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_float_i64(float %a) nounwind ssp {
+entry:
+; ELF64: fptoui_float_i64
+  %b.addr = alloca i64, align 4
+  %conv = fptoui float %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double_i32(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i32
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+; ELF64: fctiwuz
+; ELF64: stfd
+; ELF64: lwz
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+define void @fptoui_double_i64(double %a) nounwind ssp {
+entry:
+; ELF64: fptoui_double_i64
+  %b.addr = alloca i64, align 8
+  %conv = fptoui double %a to i64
+; ELF64: fctiduz
+; ELF64: stfd
+; ELF64: ld
+  store i64 %conv, i64* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-crash.ll b/test/CodeGen/PowerPC/fast-isel-crash.ll
new file mode 100644
index 000000000000..1813fc96acee
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-crash.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+; Ensure this doesn't crash.
+
+%union.anon = type { <16 x i32> }
+
+@__md0 = external global [137 x i8]
+
+define internal void @stretch(<4 x i8> addrspace(1)* %src, <4 x i8> addrspace(1)* %dst, i32 %width, i32 %height, i32 %iLS, i32 %oLS, <2 x float> %c, <4 x float> %param) nounwind {
+entry:
+  ret void
+}
+
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+entry:
+  ret i32 undef
+}
+
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+entry:
+  call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-ext.ll b/test/CodeGen/PowerPC/fast-isel-ext.ll
new file mode 100644
index 000000000000..753305a68dda
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-ext.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; zext
+
+define i32 @zext_8_32(i8 %a) nounwind ssp {
+; ELF64: zext_8_32
+  %r = zext i8 %a to i32
+; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+  ret i32 %r
+}
+
+define i32 @zext_16_32(i16 %a) nounwind ssp {
+; ELF64: zext_16_32
+  %r = zext i16 %a to i32
+; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+  ret i32 %r
+}
+
+define i64 @zext_8_64(i8 %a) nounwind ssp {
+; ELF64: zext_8_64
+  %r = zext i8 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+  ret i64 %r
+}
+
+define i64 @zext_16_64(i16 %a) nounwind ssp {
+; ELF64: zext_16_64
+  %r = zext i16 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+  ret i64 %r
+}
+
+define i64 @zext_32_64(i32 %a) nounwind ssp {
+; ELF64: zext_32_64
+  %r = zext i32 %a to i64
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+  ret i64 %r
+}
+
+; sext
+
+define i32 @sext_8_32(i8 %a) nounwind ssp {
+; ELF64: sext_8_32
+  %r = sext i8 %a to i32
+; ELF64: extsb
+  ret i32 %r
+}
+
+define i32 @sext_16_32(i16 %a) nounwind ssp {
+; ELF64: sext_16_32
+  %r = sext i16 %a to i32
+; ELF64: extsh
+  ret i32 %r
+}
+
+define i64 @sext_8_64(i8 %a) nounwind ssp {
+; ELF64: sext_8_64
+  %r = sext i8 %a to i64
+; ELF64: extsb
+  ret i64 %r
+}
+
+define i64 @sext_16_64(i16 %a) nounwind ssp {
+; ELF64: sext_16_64
+  %r = sext i16 %a to i64
+; ELF64: extsh
+  ret i64 %r
+}
+
+define i64 @sext_32_64(i32 %a) nounwind ssp {
+; ELF64: sext_32_64
+  %r = sext i32 %a to i64
+; ELF64: extsw
+  ret i64 %r
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-fold.ll b/test/CodeGen/PowerPC/fast-isel-fold.ll
new file mode 100644
index 000000000000..4de345f309af
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-fold.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+@c = global i32 4, align 4
+
+define void @t1() nounwind uwtable ssp {
+; ELF64: t1
+  %1 = load i8* @a, align 1
+  call void @foo1(i8 zeroext %1)
+; ELF64: lbz
+; ELF64-NOT: rldicl
+; ELF64-NOT: rlwinm
+  ret void
+}
+
+define void @t2() nounwind uwtable ssp {
+; ELF64: t2
+  %1 = load i16* @b, align 2
+  call void @foo2(i16 zeroext %1)
+; ELF64: lhz
+; ELF64-NOT: rldicl
+; ELF64-NOT: rlwinm
+  ret void
+}
+
+define void @t2a() nounwind uwtable ssp {
+; ELF64: t2a
+  %1 = load i32* @c, align 4
+  call void @foo3(i32 zeroext %1)
+; ELF64: lwz
+; ELF64-NOT: rldicl
+; ELF64-NOT: rlwinm
+  ret void
+}
+
+declare void @foo1(i8 zeroext)
+declare void @foo2(i16 zeroext)
+declare void @foo3(i32 zeroext)
+
+define i32 @t3() nounwind uwtable ssp {
+; ELF64: t3
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i32
+; ELF64: lbz
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i32 @t4() nounwind uwtable ssp {
+; ELF64: t4
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i32
+; ELF64: lhz
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i32 @t5() nounwind uwtable ssp {
+; ELF64: t5
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i32
+; ELF64: lha
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i32 @t6() nounwind uwtable ssp {
+; ELF64: t6
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i32
+; ELF64: lbz
+; ELF64-NOT: rlwinm
+  ret i32 %2
+}
+
+define i64 @t7() nounwind uwtable ssp {
+; ELF64: t7
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i64
+; ELF64: lbz
+; ELF64-NOT: rldicl
+  ret i64 %2
+}
+
+define i64 @t8() nounwind uwtable ssp {
+; ELF64: t8
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i64
+; ELF64: lhz
+; ELF64-NOT: rldicl
+  ret i64 %2
+}
+
+define i64 @t9() nounwind uwtable ssp {
+; ELF64: t9
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i64
+; ELF64: lha
+; ELF64-NOT: extsh
+  ret i64 %2
+}
+
+define i64 @t10() nounwind uwtable ssp {
+; ELF64: t10
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i64
+; ELF64: lbz
+; ELF64: extsb
+  ret i64 %2
+}
+
+define i64 @t11() nounwind uwtable ssp {
+; ELF64: t11
+  %1 = load i32* @c, align 4
+  %2 = zext i32 %1 to i64
+; ELF64: lwz
+; ELF64-NOT: rldicl
+  ret i64 %2
+}
+
+define i64 @t12() nounwind uwtable ssp {
+; ELF64: t12
+  %1 = load i32* @c, align 4
+  %2 = sext i32 %1 to i64
+; ELF64: lwa
+; ELF64-NOT: extsw
+  ret i64 %2
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
new file mode 100644
index 000000000000..88ccf918ae96
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define void @t1(i8* %x) {
+entry:
+; ELF64: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ELF64: mtctr 3
+; ELF64: bctr
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store.ll b/test/CodeGen/PowerPC/fast-isel-load-store.ll
new file mode 100644
index 000000000000..026b15fe5e4e
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-load-store.ll
@@ -0,0 +1,202 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+; This test verifies that load/store instructions are properly generated,
+; and that they pass MI verification.
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+@c = global i32 4, align 4
+@d = global i64 8, align 8
+@e = global float 1.25, align 4
+@f = global double 3.5, align 8
+
+%struct.s = type<{ i8, i32 }>
+%struct.t = type<{ i8, i64 }>
+
+@g = global %struct.s <{ i8 1, i32 2 }>, align 1
+@h = global %struct.t <{ i8 1, i64 2 }>, align 1
+
+@i = common global [8192 x i64] zeroinitializer, align 8
+
+; load
+
+define i8 @t1() nounwind uwtable ssp {
+; ELF64: t1
+  %1 = load i8* @a, align 1
+; ELF64: lbz
+  %2 = add nsw i8 %1, 1
+; ELF64: addi
+  ret i8 %2
+}
+
+define i16 @t2() nounwind uwtable ssp {
+; ELF64: t2
+  %1 = load i16* @b, align 2
+; ELF64: lhz
+  %2 = add nsw i16 %1, 1
+; ELF64: addi
+  ret i16 %2
+}
+
+define i32 @t3() nounwind uwtable ssp {
+; ELF64: t3
+  %1 = load i32* @c, align 4
+; ELF64: lwz
+  %2 = add nsw i32 %1, 1
+; ELF64: addi
+  ret i32 %2
+}
+
+define i64 @t4() nounwind uwtable ssp {
+; ELF64: t4
+  %1 = load i64* @d, align 4
+; ELF64: ld
+  %2 = add nsw i64 %1, 1
+; ELF64: addi
+  ret i64 %2
+}
+
+define float @t5() nounwind uwtable ssp {
+; ELF64: t5
+  %1 = load float* @e, align 4
+; ELF64: lfs
+  %2 = fadd float %1, 1.0
+; ELF64: fadds
+  ret float %2
+}
+
+define double @t6() nounwind uwtable ssp {
+; ELF64: t6
+  %1 = load double* @f, align 8
+; ELF64: lfd
+  %2 = fadd double %1, 1.0
+; ELF64: fadd
+  ret double %2
+}
+
+; store
+
+define void @t7(i8 %v) nounwind uwtable ssp {
+; ELF64: t7
+  %1 = add nsw i8 %v, 1
+  store i8 %1, i8* @a, align 1
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: stb
+  ret void
+}
+
+define void @t8(i16 %v) nounwind uwtable ssp {
+; ELF64: t8
+  %1 = add nsw i16 %v, 1
+  store i16 %1, i16* @b, align 2
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: sth
+  ret void
+}
+
+define void @t9(i32 %v) nounwind uwtable ssp {
+; ELF64: t9
+  %1 = add nsw i32 %v, 1
+  store i32 %1, i32* @c, align 4
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: stw
+  ret void
+}
+
+define void @t10(i64 %v) nounwind uwtable ssp {
+; ELF64: t10
+  %1 = add nsw i64 %v, 1
+  store i64 %1, i64* @d, align 4
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: std
+  ret void
+}
+
+define void @t11(float %v) nounwind uwtable ssp {
+; ELF64: t11
+  %1 = fadd float %v, 1.0
+  store float %1, float* @e, align 4
+; ELF64: fadds
+; ELF64: stfs
+  ret void
+}
+
+define void @t12(double %v) nounwind uwtable ssp {
+; ELF64: t12
+  %1 = fadd double %v, 1.0
+  store double %1, double* @f, align 8
+; ELF64: fadd
+; ELF64: stfd
+  ret void
+}
+
+;; lwa requires an offset divisible by 4, so we need lwax here.
+define i64 @t13() nounwind uwtable ssp {
+; ELF64: t13
+  %1 = load i32* getelementptr inbounds (%struct.s* @g, i32 0, i32 1), align 1
+  %2 = sext i32 %1 to i64
+; ELF64: li
+; ELF64: lwax
+  %3 = add nsw i64 %2, 1
+; ELF64: addi
+  ret i64 %3
+}
+
+;; ld requires an offset divisible by 4, so we need ldx here.
+define i64 @t14() nounwind uwtable ssp {
+; ELF64: t14
+  %1 = load i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
+; ELF64: li
+; ELF64: ldx
+  %2 = add nsw i64 %1, 1
+; ELF64: addi
+  ret i64 %2
+}
+
+;; std requires an offset divisible by 4, so we need stdx here.
+define void @t15(i64 %v) nounwind uwtable ssp {
+; ELF64: t15
+  %1 = add nsw i64 %v, 1
+  store i64 %1, i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
+; ELF64: addis
+; ELF64: addi
+; ELF64: addi
+; ELF64: li
+; ELF64: stdx
+  ret void
+}
+
+;; ld requires an offset that fits in 16 bits, so we need ldx here.
+define i64 @t16() nounwind uwtable ssp {
+; ELF64: t16
+  %1 = load i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
+; ELF64: lis
+; ELF64: ori
+; ELF64: ldx
+  %2 = add nsw i64 %1, 1
+; ELF64: addi
+  ret i64 %2
+}
+
+;; std requires an offset that fits in 16 bits, so we need stdx here.
+define void @t17(i64 %v) nounwind uwtable ssp {
+; ELF64: t17
+  %1 = add nsw i64 %v, 1
+  store i64 %1, i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
+; ELF64: addis
+; ELF64: ld
+; ELF64: addi
+; ELF64: lis
+; ELF64: ori
+; ELF64: stdx
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/fast-isel-redefinition.ll b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
new file mode 100644
index 000000000000..72422bda4433
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
@@ -0,0 +1,10 @@
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+; This isn't exactly a useful set of command-line options, but check that it
+; doesn't crash.  (It crashed formerly on ARM, and proved useful in
+; discovering a bug on PowerPC as well.)
+
+define i32 @f(i32* %x) nounwind ssp {
+  %y = getelementptr inbounds i32* %x, i32 5000
+  %tmp103 = load i32* %y, align 4
+  ret i32 %tmp103
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
new file mode 100644
index 000000000000..fa19f8b11fd6
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret2
+; ELF64: extsb
+; ELF64: blr
+  ret i8 %a
+}
+
+define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret3
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64: blr
+  ret i8 %a
+}
+
+define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret4
+; ELF64: extsh
+; ELF64: blr
+  ret i16 %a
+}
+
+define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret5
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: blr
+  ret i16 %a
+}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret6
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64: blr
+  ret i16 %a
+}
+
+define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret7
+; ELF64: extsw
+; ELF64: blr
+  ret i32 %a
+}
+
+define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret8
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+; ELF64: blr
+  ret i32 %a
+}
+
+define i32 @ret9(i32 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret9
+; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+; ELF64: blr
+  ret i32 %a
+}
+
+define i64 @ret10(i64 %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret10
+; ELF64-NOT: exts
+; ELF64-NOT: rldicl
+; ELF64: blr
+  ret i64 %a
+}
+
+define float @ret11(float %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret11
+; ELF64: blr
+  ret float %a
+}
+
+define double @ret12(double %a) nounwind uwtable ssp {
+entry:
+; ELF64: ret12
+; ELF64: blr
+  ret double %a
+}
+
+define i8 @ret13() nounwind uwtable ssp {
+entry:
+; ELF64: ret13
+; ELF64: li
+; ELF64: blr
+  ret i8 15;
+}
+
+define i16 @ret14() nounwind uwtable ssp {
+entry:
+; ELF64: ret14
+; ELF64: li
+; ELF64: blr
+  ret i16 -225;
+}
+
+define i32 @ret15() nounwind uwtable ssp {
+entry:
+; ELF64: ret15
+; ELF64: lis
+; ELF64: ori
+; ELF64: blr
+  ret i32 278135;
+}
+
+define i64 @ret16() nounwind uwtable ssp {
+entry:
+; ELF64: ret16
+; ELF64: li
+; ELF64: sldi
+; ELF64: oris
+; ELF64: ori
+; ELF64: blr
+  ret i64 27813515225;
+}
+
+define float @ret17() nounwind uwtable ssp {
+entry:
+; ELF64: ret17
+; ELF64: addis
+; ELF64: lfs
+; ELF64: blr
+  ret float 2.5;
+}
+
+define double @ret18() nounwind uwtable ssp {
+entry:
+; ELF64: ret18
+; ELF64: addis
+; ELF64: lfd
+; ELF64: blr
+  ret double 2.5e-33;
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-shifter.ll b/test/CodeGen/PowerPC/fast-isel-shifter.ll
new file mode 100644
index 000000000000..198bfbecda63
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-shifter.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+
+define i32 @shl() nounwind ssp {
+entry:
+; ELF64: shl
+; ELF64: slw
+  %shl = shl i32 -1, 2
+  ret i32 %shl
+}
+
+define i32 @shl_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ELF64: shl_reg
+; ELF64: slw
+  %shl = shl i32 %src1, %src2
+  ret i32 %shl
+}
+
+define i32 @lshr() nounwind ssp {
+entry:
+; ELF64: lshr
+; ELF64: srw
+  %lshr = lshr i32 -1, 2
+  ret i32 %lshr
+}
+
+define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ELF64: lshr_reg
+; ELF64: srw
+  %lshr = lshr i32 %src1, %src2
+  ret i32 %lshr
+}
+
+define i32 @ashr() nounwind ssp {
+entry:
+; ELF64: ashr
+; ELF64: srawi
+  %ashr = ashr i32 -1, 2
+  ret i32 %ashr
+}
+
+define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ELF64: ashr_reg
+; ELF64: sraw
+  %ashr = ashr i32 %src1, %src2
+  ret i32 %ashr
+}
+
diff --git a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
new file mode 100644
index 000000000000..4bcacf009746
--- /dev/null
+++ b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
@@ -0,0 +1,17 @@
+; fastisel should not fold add with non-pointer bitwidth
+; sext(a) + sext(b) != sext(a + b)
+; RUN: llc -mtriple=powerpc64-unknown-freebsd10.0 %s -O0 -o - | FileCheck %s
+
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+entry:
+  %ptr.addr = alloca i8*, align 8
+  %add = add i8 64, 64 ; 0x40 + 0x40
+  %0 = load i8** %ptr.addr, align 8
+
+  ; CHECK-LABEL: gep_promotion:
+  ; CHECK: lbz {{[0-9]+}}, 0({{.*}})
+  %arrayidx = getelementptr inbounds i8* %0, i8 %add
+
+  %1 = load i8* %arrayidx, align 1
+  ret i8 %1
+}
diff --git a/test/CodeGen/PowerPC/fcpsgn.ll b/test/CodeGen/PowerPC/fcpsgn.ll
new file mode 100644
index 000000000000..f4699816340a
--- /dev/null
+++ b/test/CodeGen/PowerPC/fcpsgn.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @foo_dd(double %a, double %b) #0 {
+entry:
+  %call = tail call double @copysign(double %a, double %b) #0
+  ret double %call
+
+; CHECK-LABEL: @foo_dd
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+declare double @copysign(double, double) #0
+
+define float @foo_ss(float %a, float %b) #0 {
+entry:
+  %call = tail call float @copysignf(float %a, float %b) #0
+  ret float %call
+
+; CHECK-LABEL: @foo_ss
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+declare float @copysignf(float, float) #0
+
+define float @foo_sd(float %a, double %b) #0 {
+entry:
+  %conv = fptrunc double %b to float
+  %call = tail call float @copysignf(float %a, float %conv) #0
+  ret float %call
+
+; CHECK-LABEL: @foo_sd
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+define double @foo_ds(double %a, float %b) #0 {
+entry:
+  %conv = fpext float %b to double
+  %call = tail call double @copysign(double %a, double %conv) #0
+  ret double %call
+
+; CHECK-LABEL: @foo_ds
+; CHECK: fcpsgn 1, 2, 1
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/floatPSA.ll b/test/CodeGen/PowerPC/floatPSA.ll
index b5631a160561..f14c73630a6f 100644
--- a/test/CodeGen/PowerPC/floatPSA.ll
+++ b/test/CodeGen/PowerPC/floatPSA.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -fast-isel=false < %s | FileCheck %s
 
 ; This verifies that single-precision floating point values that can't
 ; be passed in registers are stored in the rightmost word of the parameter
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index a173c9154041..db19761b431c 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -4,7 +4,7 @@ define double @test_FMADD1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
-; CHECK: test_FMADD1:
+; CHECK-LABEL: test_FMADD1:
 ; CHECK: fmadd
 ; CHECK-NEXT: blr
 }
@@ -13,7 +13,7 @@ define double @test_FMADD2(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
-; CHECK: test_FMADD2:
+; CHECK-LABEL: test_FMADD2:
 ; CHECK: fmadd
 ; CHECK-NEXT: blr
 }
@@ -22,7 +22,7 @@ define double @test_FMSUB(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fsub double %D, %C		; <double> [#uses=1]
 	ret double %E
-; CHECK: test_FMSUB:
+; CHECK-LABEL: test_FMSUB:
 ; CHECK: fmsub
 ; CHECK-NEXT: blr
 }
@@ -32,7 +32,7 @@ define double @test_FNMADD1(double %A, double %B, double %C) {
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
-; CHECK: test_FNMADD1:
+; CHECK-LABEL: test_FNMADD1:
 ; CHECK: fnmadd
 ; CHECK-NEXT: blr
 }
@@ -42,7 +42,7 @@ define double @test_FNMADD2(double %A, double %B, double %C) {
 	%E = fadd double %C, %D		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
-; CHECK: test_FNMADD2:
+; CHECK-LABEL: test_FNMADD2:
 ; CHECK: fnmadd
 ; CHECK-NEXT: blr
 }
@@ -51,7 +51,7 @@ define double @test_FNMSUB1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fsub double %C, %D		; <double> [#uses=1]
 	ret double %E
-; CHECK: test_FNMSUB1:
+; CHECK-LABEL: test_FNMSUB1:
 ; CHECK: fnmsub
 ; CHECK-NEXT: blr
 }
@@ -61,7 +61,7 @@ define double @test_FNMSUB2(double %A, double %B, double %C) {
 	%E = fsub double %D, %C		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
-; CHECK: test_FNMSUB2:
+; CHECK-LABEL: test_FNMSUB2:
 ; CHECK: fnmsub
 ; CHECK-NEXT: blr
 }
@@ -71,7 +71,7 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
 	%E = fsub float %D, %C		; <float> [#uses=1]
 	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
 	ret float %F
-; CHECK: test_FNMSUBS:
+; CHECK-LABEL: test_FNMSUBS:
 ; CHECK: fnmsubs
 ; CHECK-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll
index eabd4a68aa83..4480273673f6 100644
--- a/test/CodeGen/PowerPC/frameaddr.ll
+++ b/test/CodeGen/PowerPC/frameaddr.ll
@@ -40,8 +40,8 @@ declare void @use(i8*)
 
 declare i8* @llvm.frameaddress(i32) #2
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { noreturn nounwind }
 attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
diff --git a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
new file mode 100644
index 000000000000..f97d0ff6268c
--- /dev/null
+++ b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
@@ -0,0 +1,139 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::__exception_ptr::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 }
+%"class.std::__1::__shared_count" = type { i32 (...)**, i64 }
+%"class.std::__exception_ptr::exception_ptr" = type { i8* }
+%"class.std::__1::mutex" = type { %union.pthread_mutex_t }
+%union.pthread_mutex_t = type { %"struct.<anonymous union>::__pthread_mutex_s" }
+%"struct.<anonymous union>::__pthread_mutex_s" = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_internal_list }
+%struct.__pthread_internal_list = type { %struct.__pthread_internal_list*, %struct.__pthread_internal_list* }
+%"class.std::__1::condition_variable" = type { %union.pthread_cond_t }
+%union.pthread_cond_t = type { %struct.anon }
+%struct.anon = type { i32, i32, i64, i64, i64, i8*, i32, i32 }
+%"class.std::__1::unique_lock" = type { %"class.std::__1::mutex"*, i8 }
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: optsize
+define void @_ZNSt3__117__assoc_sub_state4copyEv(%"class.std::__1::__assoc_sub_state"* %this) #0 align 2 {
+entry:
+  %__lk = alloca %"class.std::__1::unique_lock", align 8
+  %ref.tmp = alloca %"class.std::__exception_ptr::exception_ptr", align 8
+  %tmp = alloca { i64, i64 }, align 8
+  %agg.tmp = alloca %"class.std::__exception_ptr::exception_ptr", align 8
+  %__mut_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 2
+  %__m_.i.i = getelementptr inbounds %"class.std::__1::unique_lock"* %__lk, i64 0, i32 0
+  store %"class.std::__1::mutex"* %__mut_, %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  %__owns_.i.i = getelementptr inbounds %"class.std::__1::unique_lock"* %__lk, i64 0, i32 1
+  store i8 1, i8* %__owns_.i.i, align 8, !tbaa !6
+  call void @_ZNSt3__15mutex4lockEv(%"class.std::__1::mutex"* %__mut_) #4
+  invoke void @_ZNSt3__117__assoc_sub_state10__sub_waitERNS_11unique_lockINS_5mutexEEE(%"class.std::__1::__assoc_sub_state"* %this, %"class.std::__1::unique_lock"* %__lk) #4
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %__exception_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 1
+  %0 = bitcast { i64, i64 }* %tmp to i8*
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 16, i32 8, i1 false)
+  call void @_ZNSt15__exception_ptr13exception_ptrC1EMS0_FvvE(%"class.std::__exception_ptr::exception_ptr"* %ref.tmp, { i64, i64 }* byval %tmp) #5
+  %call = call zeroext i1 @_ZNSt15__exception_ptrneERKNS_13exception_ptrES2_(%"class.std::__exception_ptr::exception_ptr"* %__exception_, %"class.std::__exception_ptr::exception_ptr"* %ref.tmp) #5
+  call void @_ZNSt15__exception_ptr13exception_ptrD1Ev(%"class.std::__exception_ptr::exception_ptr"* %ref.tmp) #5
+  br i1 %call, label %if.then, label %if.end
+
+if.then:                                          ; preds = %invoke.cont
+  call void @_ZNSt15__exception_ptr13exception_ptrC1ERKS0_(%"class.std::__exception_ptr::exception_ptr"* %agg.tmp, %"class.std::__exception_ptr::exception_ptr"* %__exception_) #5
+  invoke void @_ZSt17rethrow_exceptionNSt15__exception_ptr13exception_ptrE(%"class.std::__exception_ptr::exception_ptr"* %agg.tmp) #6
+          to label %invoke.cont4 unwind label %lpad3
+
+invoke.cont4:                                     ; preds = %if.then
+  unreachable
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = extractvalue { i8*, i32 } %1, 1
+  br label %ehcleanup
+
+lpad3:                                            ; preds = %if.then
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %5 = extractvalue { i8*, i32 } %4, 0
+  %6 = extractvalue { i8*, i32 } %4, 1
+  call void @_ZNSt15__exception_ptr13exception_ptrD1Ev(%"class.std::__exception_ptr::exception_ptr"* %agg.tmp) #5
+  br label %ehcleanup
+
+if.end:                                           ; preds = %invoke.cont
+  %7 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+  %tobool.i.i = icmp eq i8 %7, 0
+  br i1 %tobool.i.i, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %if.end
+  %8 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %8) #5
+  br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit
+
+_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit:      ; preds = %if.then.i.i, %if.end
+  ret void
+
+ehcleanup:                                        ; preds = %lpad3, %lpad
+  %exn.slot.0 = phi i8* [ %5, %lpad3 ], [ %2, %lpad ]
+  %ehselector.slot.0 = phi i32 [ %6, %lpad3 ], [ %3, %lpad ]
+  %9 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+  %tobool.i.i9 = icmp eq i8 %9, 0
+  br i1 %tobool.i.i9, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12, label %if.then.i.i11
+
+if.then.i.i11:                                    ; preds = %ehcleanup
+  %10 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %10) #5
+  br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12
+
+_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12:    ; preds = %if.then.i.i11, %ehcleanup
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+  resume { i8*, i32 } %lpad.val5
+}
+
+; Function Attrs: optsize
+declare void @_ZNSt3__117__assoc_sub_state10__sub_waitERNS_11unique_lockINS_5mutexEEE(%"class.std::__1::__assoc_sub_state"*, %"class.std::__1::unique_lock"*) #0 align 2
+
+; Function Attrs: nounwind optsize
+declare zeroext i1 @_ZNSt15__exception_ptrneERKNS_13exception_ptrES2_(%"class.std::__exception_ptr::exception_ptr"*, %"class.std::__exception_ptr::exception_ptr"*) #1
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt15__exception_ptr13exception_ptrC1EMS0_FvvE(%"class.std::__exception_ptr::exception_ptr"*, { i64, i64 }* byval) #1
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt15__exception_ptr13exception_ptrD1Ev(%"class.std::__exception_ptr::exception_ptr"*) #1
+
+; Function Attrs: noreturn optsize
+declare void @_ZSt17rethrow_exceptionNSt15__exception_ptr13exception_ptrE(%"class.std::__exception_ptr::exception_ptr"*) #2
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt15__exception_ptr13exception_ptrC1ERKS0_(%"class.std::__exception_ptr::exception_ptr"*, %"class.std::__exception_ptr::exception_ptr"*) #1
+
+; Function Attrs: nounwind optsize
+declare void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"*) #1
+
+; Function Attrs: optsize
+declare void @_ZNSt3__15mutex4lockEv(%"class.std::__1::mutex"*) #0
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
+
+attributes #0 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noreturn optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+attributes #4 = { optsize }
+attributes #5 = { nounwind optsize }
+attributes #6 = { noreturn optsize }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"bool", metadata !1}
+!4 = metadata !{i8 0, i8 2}
+!5 = metadata !{metadata !0, metadata !0, i64 0}
+!6 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/CodeGen/PowerPC/hello-reloc.s b/test/CodeGen/PowerPC/hello-reloc.s
new file mode 100644
index 000000000000..9bbfb3817890
--- /dev/null
+++ b/test/CodeGen/PowerPC/hello-reloc.s
@@ -0,0 +1,84 @@
+; This tests for the basic implementation of PPCMachObjectWriter.cpp,
+; which is responsible for writing mach-o relocation entries for (PIC)
+; PowerPC objects.
+; NOTE: Darwin PPC asm syntax is not yet supported by PPCAsmParser,
+; so this test case uses ELF PPC asm syntax to produce a mach-o object.
+; Once PPCAsmParser supports darwin asm syntax, this test case should
+; be updated accordingly.  
+
+; RUN: llvm-mc -filetype=obj -relocation-model=pic -mcpu=g4 -triple=powerpc-apple-darwin8 %s -o - | llvm-readobj -relocations | FileCheck -check-prefix=DARWIN-G4-DUMP %s
+
+;	.machine ppc7400
+	.section	__TEXT,__textcoal_nt,coalesced,pure_instructions
+	.section	__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	_main
+	.align	4
+_main:                                  ; @main
+; BB#0:                                 ; %entry
+	mflr 0
+	stw 31, -4(1)
+	stw 0, 8(1)
+	stwu 1, -80(1)
+	bl L0$pb
+L0$pb:
+	mr 31, 1
+	li 5, 0
+	mflr 2
+	stw 3, 68(31)
+	stw 5, 72(31)
+	stw 4, 64(31)
+	addis 2, 2, (L_.str-L0$pb)@ha
+	la 3, (L_.str-L0$pb)@l(2)
+	bl L_puts$stub
+	li 3, 0
+	addi 1, 1, 80
+	lwz 0, 8(1)
+	lwz 31, -4(1)
+	mtlr 0
+	blr
+
+	.section	__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.align	4
+L_puts$stub:
+	.indirect_symbol	_puts
+	mflr 0
+	bcl 20, 31, L_puts$stub$tmp
+L_puts$stub$tmp:
+	mflr 11
+	addis 11, 11, (L_puts$lazy_ptr-L_puts$stub$tmp)@ha
+	mtlr 0
+	lwzu 12, (L_puts$lazy_ptr-L_puts$stub$tmp)@l(11)
+	mtctr 12
+	bctr
+	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
+L_puts$lazy_ptr:
+	.indirect_symbol	_puts
+	.long	dyld_stub_binding_helper
+
+.subsections_via_symbols
+	.section	__TEXT,__cstring,cstring_literals
+L_.str:                                 ; @.str
+	.asciz	 "Hello, world!"
+
+; DARWIN-G4-DUMP:Format: Mach-O 32-bit ppc
+; DARWIN-G4-DUMP:Arch: powerpc
+; DARWIN-G4-DUMP:AddressSize: 32bit
+; DARWIN-G4-DUMP:Relocations [
+; DARWIN-G4-DUMP:  Section __text {
+; DARWIN-G4-DUMP:    0x34 1 2 0 PPC_RELOC_BR24 0 -
+; DARWIN-G4-DUMP:    0x30 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0x2C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x60 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:  }
+; DARWIN-G4-DUMP:  Section __picsymbolstub1 {
+; DARWIN-G4-DUMP:    0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
+; DARWIN-G4-DUMP:    0x18 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:  }
+; DARWIN-G4-DUMP:  Section __la_symbol_ptr {
+; DARWIN-G4-DUMP:    0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
+; DARWIN-G4-DUMP:  }
+; DARWIN-G4-DUMP:]
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index d2a3239ab865..5770d788caf7 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -22,6 +22,6 @@ entry:
 ; Also check that with -enable-unsafe-fp-math we do not get that extra
 ; code sequence.  Simply verify that there is no "isel" present.
 
-; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE
 ; CHECK-UNSAFE-NOT: isel
 
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 4b6f88bb4a00..fd06fd9b7f46 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -6,9 +6,9 @@
 @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
 
 define internal i32 @foo(i32 %i) nounwind {
-; PIC: foo:
-; STATIC: foo:
-; PPC64: foo:
+; PIC-LABEL: foo:
+; STATIC-LABEL: foo:
+; PPC64-LABEL: foo:
 entry:
   %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
new file mode 100644
index 000000000000..5e31cd58301c
--- /dev/null
+++ b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@@ -0,0 +1,108 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux -mcpu=a2 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%struct.BG_CoordinateMapping_t = type { [4 x i8] }
+
+; Function Attrs: alwaysinline inlinehint nounwind
+define zeroext i32 @Kernel_RanksToCoords(i64 %mapsize, %struct.BG_CoordinateMapping_t* %map, i64* %numentries) #0 {
+entry:
+  %mapsize.addr = alloca i64, align 8
+  %map.addr = alloca %struct.BG_CoordinateMapping_t*, align 8
+  %numentries.addr = alloca i64*, align 8
+  %r0 = alloca i64, align 8
+  %r3 = alloca i64, align 8
+  %r4 = alloca i64, align 8
+  %r5 = alloca i64, align 8
+  %tmp = alloca i64, align 8
+  store i64 %mapsize, i64* %mapsize.addr, align 8
+  store %struct.BG_CoordinateMapping_t* %map, %struct.BG_CoordinateMapping_t** %map.addr, align 8
+  store i64* %numentries, i64** %numentries.addr, align 8
+  store i64 1055, i64* %r0, align 8
+  %0 = load i64* %mapsize.addr, align 8
+  store i64 %0, i64* %r3, align 8
+  %1 = load %struct.BG_CoordinateMapping_t** %map.addr, align 8
+  %2 = ptrtoint %struct.BG_CoordinateMapping_t* %1 to i64
+  store i64 %2, i64* %r4, align 8
+  %3 = load i64** %numentries.addr, align 8
+  %4 = ptrtoint i64* %3 to i64
+  store i64 %4, i64* %r5, align 8
+  %5 = load i64* %r0, align 8
+  %6 = load i64* %r3, align 8
+  %7 = load i64* %r4, align 8
+  %8 = load i64* %r5, align 8
+  %9 = call { i64, i64, i64, i64 } asm sideeffect "sc", "={r0},={r3},={r4},={r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 %5, i64 %6, i64 %7, i64 %8) #1, !srcloc !0
+
+; CHECK-LABEL: @Kernel_RanksToCoords
+
+; These need to be 64-bit loads, not 32-bit loads (not lwz).
+; CHECK-NOT: lwz
+
+; CHECK: #APP
+; CHECK: sc
+; CHECK: #NO_APP
+
+; CHECK: blr
+
+  %asmresult = extractvalue { i64, i64, i64, i64 } %9, 0
+  %asmresult1 = extractvalue { i64, i64, i64, i64 } %9, 1
+  %asmresult2 = extractvalue { i64, i64, i64, i64 } %9, 2
+  %asmresult3 = extractvalue { i64, i64, i64, i64 } %9, 3
+  store i64 %asmresult, i64* %r0, align 8
+  store i64 %asmresult1, i64* %r3, align 8
+  store i64 %asmresult2, i64* %r4, align 8
+  store i64 %asmresult3, i64* %r5, align 8
+  %10 = load i64* %r3, align 8
+  store i64 %10, i64* %tmp
+  %11 = load i64* %tmp
+  %conv = trunc i64 %11 to i32
+  ret i32 %conv
+}
+
+declare void @mtrace()
+
+define signext i32 @main(i32 signext %argc, i8** %argv) {
+entry:
+  %argc.addr = alloca i32, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  %0 = call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1076)
+  %asmresult1.i = extractvalue { i64, i64 } %0, 1
+  %conv.i = trunc i64 %asmresult1.i to i32
+  %cmp = icmp eq i32 %conv.i, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK-LABEL: @main
+
+; CHECK-DAG: mr [[REG:[0-9]+]], 3
+; CHECK-DAG: li 0, 1076
+; CHECK:     stw [[REG]],
+
+; CHECK:     #APP
+; CHECK:     sc
+; CHECK:     #NO_APP
+                                      
+; CHECK:     cmpwi {{[0-9]+}}, [[REG]], 1
+
+; CHECK: blr
+
+if.then:                                          ; preds = %entry
+  call void @mtrace()
+  %.pre = load i32* %argc.addr, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %1 = phi i32 [ %.pre, %if.then ], [ %argc, %entry ]
+  %cmp1 = icmp slt i32 %1, 2
+  br i1 %cmp1, label %usage, label %if.end40
+
+usage:    
+  ret i32 8
+
+if.end40:
+  ret i32 0
+}
+
+attributes #0 = { alwaysinline inlinehint nounwind }
+attributes #1 = { nounwind }
+
+!0 = metadata !{i32 -2146895770}
diff --git a/test/CodeGen/PowerPC/isel-rc-nox0.ll b/test/CodeGen/PowerPC/isel-rc-nox0.ll
new file mode 100644
index 000000000000..ac99aa408bdd
--- /dev/null
+++ b/test/CodeGen/PowerPC/isel-rc-nox0.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@g_62 = external global [1 x [9 x i32]], align 4
+
+; Function Attrs: nounwind
+define void @main() #0 {
+entry:
+  br i1 undef, label %cond.true, label %for.cond1.preheader.i
+
+cond.true:                                        ; preds = %entry
+  br label %for.cond1.preheader.i
+
+for.cond1.preheader.i:                            ; preds = %for.cond1.preheader.i, %cond.true, %entry
+  br i1 undef, label %crc32_gentab.exit, label %for.cond1.preheader.i
+
+crc32_gentab.exit:                                ; preds = %for.cond1.preheader.i
+  %tobool.i19.i.i = icmp eq i32 undef, 0
+  %retval.0.i.i.i = select i1 %tobool.i19.i.i, i32* getelementptr inbounds ([1 x [9 x i32]]* @g_62, i64 0, i64 0, i64 6), i32* getelementptr inbounds ([1 x [9 x i32]]* @g_62, i64 0, i64 0, i64 8)
+  br label %for.cond1.preheader.i2961.i
+
+for.cond1.preheader.i2961.i:                      ; preds = %for.inc44.i2977.i, %crc32_gentab.exit
+  call void @llvm.memset.p0i8.i64(i8* bitcast ([1 x [9 x i32]]* @g_62 to i8*), i8 -1, i64 36, i32 4, i1 false) #1
+  %0 = load i32* %retval.0.i.i.i, align 4
+  %tobool.i2967.i = icmp eq i32 %0, 0
+  br label %for.body21.i2968.i
+
+for.body21.i2968.i:                               ; preds = %safe_mod_func_int32_t_s_s.exit.i2974.i, %for.cond1.preheader.i2961.i
+  br i1 %tobool.i2967.i, label %safe_mod_func_int32_t_s_s.exit.i2974.i, label %for.inc44.i2977.i
+
+safe_mod_func_int32_t_s_s.exit.i2974.i:           ; preds = %for.body21.i2968.i
+  br i1 undef, label %for.body21.i2968.i, label %for.inc44.i2977.i
+
+for.inc44.i2977.i:                                ; preds = %safe_mod_func_int32_t_s_s.exit.i2974.i, %for.body21.i2968.i
+  br i1 undef, label %func_80.exit2978.i, label %for.cond1.preheader.i2961.i
+
+func_80.exit2978.i:                               ; preds = %for.inc44.i2977.i
+  unreachable
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
index a10c5ddb36fb..82d4fef10cb3 100644
--- a/test/CodeGen/PowerPC/jaggedstructs.ll
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false < %s | FileCheck %s
 
 ; This tests receiving and re-passing parameters consisting of structures
 ; of size 3, 5, 6, and 7.  They are to be found/placed right-adjusted in
@@ -18,25 +18,25 @@ entry:
   ret void
 }
 
-; CHECK: std 6, 216(1)
-; CHECK: std 5, 208(1)
-; CHECK: std 4, 200(1)
-; CHECK: std 3, 192(1)
-; CHECK: lbz {{[0-9]+}}, 199(1)
-; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: std 6, 184(1)
+; CHECK: std 5, 176(1)
+; CHECK: std 4, 168(1)
+; CHECK: std 3, 160(1)
+; CHECK: lbz {{[0-9]+}}, 167(1)
+; CHECK: lhz {{[0-9]+}}, 165(1)
 ; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: sth {{[0-9]+}}, 53(1)
-; CHECK: lbz {{[0-9]+}}, 207(1)
-; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: lbz {{[0-9]+}}, 175(1)
+; CHECK: lwz {{[0-9]+}}, 171(1)
 ; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: stw {{[0-9]+}}, 59(1)
-; CHECK: lhz {{[0-9]+}}, 214(1)
-; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: lhz {{[0-9]+}}, 182(1)
+; CHECK: lwz {{[0-9]+}}, 178(1)
 ; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: stw {{[0-9]+}}, 66(1)
-; CHECK: lbz {{[0-9]+}}, 223(1)
-; CHECK: lhz {{[0-9]+}}, 221(1)
-; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: lbz {{[0-9]+}}, 191(1)
+; CHECK: lhz {{[0-9]+}}, 189(1)
+; CHECK: lwz {{[0-9]+}}, 185(1)
 ; CHECK: stb {{[0-9]+}}, 79(1)
 ; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: stw {{[0-9]+}}, 73(1)
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index aaa31d93d5f2..2e463005586f 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/PowerPC/mcm-1.ll b/test/CodeGen/PowerPC/mcm-1.ll
index a57fb9dd98d0..4e31550c40d4 100644
--- a/test/CodeGen/PowerPC/mcm-1.ll
+++ b/test/CodeGen/PowerPC/mcm-1.ll
@@ -17,7 +17,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_external:
+; CHECK-LABEL: test_external:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
 ; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
 ; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
index 4bec3e16fa04..b479559b97f5 100644
--- a/test/CodeGen/PowerPC/mcm-10.ll
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -16,7 +16,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_fn_static:
+; CHECK-LABEL: test_fn_static:
 ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
 ; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
 ; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
index f2bc4c9cb72c..c49e8655cf5b 100644
--- a/test/CodeGen/PowerPC/mcm-11.ll
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -16,7 +16,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_file_static:
+; CHECK-LABEL: test_file_static:
 ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
 ; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
 ; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
index 911305d4355f..b31b6053fca0 100644
--- a/test/CodeGen/PowerPC/mcm-12.ll
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -13,6 +13,6 @@ entry:
 
 ; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
 ; CHECK: .quad 4562098671269285104
-; CHECK: test_double_const:
+; CHECK-LABEL: test_double_const:
 ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
 ; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
index f0dff4c5a39c..fee98d838ff1 100644
--- a/test/CodeGen/PowerPC/mcm-2.ll
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -17,7 +17,7 @@ entry:
   ret i32 %0
 }
 
-; MEDIUM: test_fn_static:
+; MEDIUM-LABEL: test_fn_static:
 ; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
 ; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
 ; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
@@ -26,12 +26,14 @@ entry:
 ; MEDIUM: .local [[VAR]]
 ; MEDIUM: .comm [[VAR]],4,4
 
-; LARGE: test_fn_static:
+; LARGE-LABEL: test_fn_static:
 ; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
 ; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
 ; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
 ; LARGE: stw {{[0-9]+}}, 0([[REG2]])
-; LARGE: .type [[VAR]],@object
-; LARGE: .local [[VAR]]
-; LARGE: .comm [[VAR]],4,4
+; LARGE: [[VAR]]:
+; LARGE: .tc [[VAR2:[a-z0-9A-Z_.]+]][TC],[[VAR2]]
+; LARGE: .type [[VAR2]],@object
+; LARGE: .local [[VAR2]]
+; LARGE: .comm [[VAR2]],4,4
 
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
index b7905503f458..b6d681d580ad 100644
--- a/test/CodeGen/PowerPC/mcm-3.ll
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -17,7 +17,7 @@ entry:
   ret i32 %0
 }
 
-; MEDIUM: test_file_static:
+; MEDIUM-LABEL: test_file_static:
 ; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
 ; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
 ; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
@@ -28,14 +28,16 @@ entry:
 ; MEDIUM: [[VAR]]:
 ; MEDIUM: .long 5
 
-; LARGE: test_file_static:
+; LARGE-LABEL: test_file_static:
 ; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
 ; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
 ; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
 ; LARGE: stw {{[0-9]+}}, 0([[REG2]])
-; LARGE: .type [[VAR]],@object
-; LARGE: .data
-; LARGE: .globl [[VAR]]
 ; LARGE: [[VAR]]:
+; LARGE: .tc [[VAR2:[a-z0-9A-Z_.]+]][TC],[[VAR2]]
+; LARGE: .type [[VAR2]],@object
+; LARGE: .data
+; LARGE: .globl [[VAR2]]
+; LARGE: [[VAR2]]:
 ; LARGE: .long 5
 
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
index 47c60c936038..73dd902cd028 100644
--- a/test/CodeGen/PowerPC/mcm-4.ll
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
-; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large -fast-isel=false <%s | FileCheck -check-prefix=LARGE %s
 
 ; Test correct code generation for medium and large code model
 ; for loading a value from the constant pool (TOC-relative).
@@ -14,14 +14,14 @@ entry:
 
 ; MEDIUM: [[VAR:[a-z0-9A-Z_.]+]]:
 ; MEDIUM: .quad 4562098671269285104
-; MEDIUM: test_double_const:
+; MEDIUM-LABEL: test_double_const:
 ; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
 ; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
 ; MEDIUM: lfd {{[0-9]+}}, 0([[REG2]])
 
 ; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
 ; LARGE: .quad 4562098671269285104
-; LARGE: test_double_const:
-; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
-; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE-LABEL: test_double_const:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
 ; LARGE: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
index 1be27b7e8cc0..92ddecaeb8c8 100644
--- a/test/CodeGen/PowerPC/mcm-5.ll
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -51,7 +51,7 @@ sw.epilog:                                        ; preds = %sw.bb3, %sw.default
   ret i32 %5
 }
 
-; CHECK: test_jump_table:
+; CHECK-LABEL: test_jump_table:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
 ; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
 ; CHECK: ldx {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
diff --git a/test/CodeGen/PowerPC/mcm-6.ll b/test/CodeGen/PowerPC/mcm-6.ll
index 35efaaa5628f..f7838b4b2527 100644
--- a/test/CodeGen/PowerPC/mcm-6.ll
+++ b/test/CodeGen/PowerPC/mcm-6.ll
@@ -17,7 +17,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_tentative:
+; CHECK-LABEL: test_tentative:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
 ; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
 ; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-7.ll b/test/CodeGen/PowerPC/mcm-7.ll
index 0dd39ee4109d..7caa13bcdcf8 100644
--- a/test/CodeGen/PowerPC/mcm-7.ll
+++ b/test/CodeGen/PowerPC/mcm-7.ll
@@ -18,7 +18,7 @@ entry:
 
 declare signext i32 @foo(i32 signext)
 
-; CHECK: test_fnaddr:
+; CHECK-LABEL: test_fnaddr:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
 ; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
 ; CHECK: .section .toc
diff --git a/test/CodeGen/PowerPC/mcm-8.ll b/test/CodeGen/PowerPC/mcm-8.ll
index 3ece786d6447..643548f6b125 100644
--- a/test/CodeGen/PowerPC/mcm-8.ll
+++ b/test/CodeGen/PowerPC/mcm-8.ll
@@ -16,7 +16,7 @@ entry:
   ret i8 %1
 }
 
-; CHECK: test_avext:
+; CHECK-LABEL: test_avext:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
 ; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
 ; CHECK: lbz {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-9.ll b/test/CodeGen/PowerPC/mcm-9.ll
index f366f45cc863..7906b6abea6a 100644
--- a/test/CodeGen/PowerPC/mcm-9.ll
+++ b/test/CodeGen/PowerPC/mcm-9.ll
@@ -7,8 +7,7 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
-@ei = external global i32
-@a = alias i32* @ei
+@a = external global i32
 
 define signext i32 @test_external() nounwind {
 entry:
@@ -18,7 +17,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_external:
+; CHECK-LABEL: test_external:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
 ; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
 ; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-default.ll b/test/CodeGen/PowerPC/mcm-default.ll
index 19de2536aec3..8d4ff14118f3 100644
--- a/test/CodeGen/PowerPC/mcm-default.ll
+++ b/test/CodeGen/PowerPC/mcm-default.ll
@@ -16,7 +16,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: test_external:
+; CHECK-LABEL: test_external:
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
 ; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
 ; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
index bc60b3baf2bb..a6e985545164 100644
--- a/test/CodeGen/PowerPC/mcm-obj-2.ll
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -20,7 +20,7 @@ entry:
 ; accessing function-scoped variable si.
 ;
 ; CHECK: Relocations [
-; CHECK:   Section (1) .text {
+; CHECK:   Section (2) .rela.text {
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index 720c5fb6dd65..d3d05eb48d32 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -1,6 +1,6 @@
-; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
 ; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM %s
-; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj %s -o - | \
+; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
 ; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
 
 ; FIXME: When asm-parse is available, could make this an assembly test.
@@ -22,12 +22,12 @@ entry:
 ; accessing external variable ei.
 ;
 ; MEDIUM:      Relocations [
-; MEDIUM:        Section (1) .text {
+; MEDIUM:        Section (2) .rela.text {
 ; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
 ; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 ;
 ; LARGE:       Relocations [
-; LARGE:         Section (1) .text {
+; LARGE:         Section (2) .rela.text {
 ; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
 ; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 
diff --git a/test/CodeGen/PowerPC/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll
index 8fae7ad4d1df..b259ff182c0c 100644
--- a/test/CodeGen/PowerPC/misched-inorder-latency.ll
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -6,7 +6,7 @@ target triple = "powerpc64-bgq-linux"
 
 ; %val1 is a load live out of %entry. It should be hoisted
 ; above the add.
-; CHECK: testload:
+; CHECK-LABEL: testload:
 ; CHECK: %entry
 ; CHECK: lwz
 ; CHECK: addi
@@ -34,7 +34,7 @@ end:
 ; The prefetch gets a default latency of 3 cycles and should be hoisted
 ; above the add.
 ;
-; CHECK: testprefetch:
+; CHECK-LABEL: testprefetch:
 ; CHECK: %entry
 ; CHECK: dcbt
 ; CHECK: addi
diff --git a/test/CodeGen/PowerPC/mulli64.ll b/test/CodeGen/PowerPC/mulli64.ll
new file mode 100644
index 000000000000..21bc9cc37700
--- /dev/null
+++ b/test/CodeGen/PowerPC/mulli64.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(i64 %a) #0 {
+entry:
+  %mul = mul nsw i64 %a, 3
+  ret i64 %mul
+}
+
+; CHECK-LABEL: @foo
+; CHECK: mulli 3, 3, 3
+; CHECK: blr
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll
index 2f6995c65dd8..2e649930da61 100644
--- a/test/CodeGen/PowerPC/negctr.ll
+++ b/test/CodeGen/PowerPC/negctr.ll
@@ -14,9 +14,12 @@ for.body:                                         ; preds = %for.body, %entry
   %exitcond = icmp eq i32 %lftr.wideiv, 0
   br i1 %exitcond, label %for.end, label %for.body
 
-; FIXME: We currently can't form the 32-bit unsigned trip count necessary here!
 ; CHECK: @main
-; CHECK-NOT: bdnz
+; CHECK: li [[REG:[0-9]+]], 0
+; CHECK: oris [[REG2:[0-9]+]], [[REG]], 65535
+; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
+; CHECK: mtctr [[REG3]]
+; CHECK: bdnz
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
@@ -80,4 +83,4 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/optcmp.ll b/test/CodeGen/PowerPC/optcmp.ll
index 523f329303bf..35aabfa52c1d 100644
--- a/test/CodeGen/PowerPC/optcmp.ll
+++ b/test/CodeGen/PowerPC/optcmp.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 define signext i32 @foo(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
 entry:
   %sub = sub nsw i32 %a, %b
-  store i32 %sub, i32* %c, align 4, !tbaa !0
+  store i32 %sub, i32* %c, align 4
   %cmp = icmp sgt i32 %a, %b
   %cond = select i1 %cmp, i32 %a, i32 %b
   ret i32 %cond
@@ -17,7 +17,7 @@ entry:
 define signext i32 @foo2(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
 entry:
   %shl = shl i32 %a, %b
-  store i32 %shl, i32* %c, align 4, !tbaa !0
+  store i32 %shl, i32* %c, align 4
   %cmp = icmp sgt i32 %shl, 0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -29,7 +29,7 @@ entry:
 define i64 @fool(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %a, %b
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp sgt i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -43,7 +43,7 @@ entry:
 define i64 @foolb(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %a, %b
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp sle i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -57,7 +57,7 @@ entry:
 define i64 @foolc(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %b, %a
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp sgt i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -71,7 +71,7 @@ entry:
 define i64 @foold(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %b, %a
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp eq i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -85,7 +85,7 @@ entry:
 define i64 @foold2(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %sub = sub nsw i64 %a, %b
-  store i64 %sub, i64* %c, align 8, !tbaa !3
+  store i64 %sub, i64* %c, align 8
   %cmp = icmp eq i64 %a, %b
   %cond = select i1 %cmp, i64 %a, i64 %b
   ret i64 %cond
@@ -99,7 +99,7 @@ entry:
 define i64 @foo2l(i64 %a, i64 %b, i64* nocapture %c) #0 {
 entry:
   %shl = shl i64 %a, %b
-  store i64 %shl, i64* %c, align 8, !tbaa !3
+  store i64 %shl, i64* %c, align 8
   %cmp = icmp sgt i64 %shl, 0
   %conv1 = zext i1 %cmp to i64
   ret i64 %conv1
@@ -112,7 +112,7 @@ entry:
 define double @food(double %a, double %b, double* nocapture %c) #0 {
 entry:
   %sub = fsub double %a, %b
-  store double %sub, double* %c, align 8, !tbaa !3
+  store double %sub, double* %c, align 8
   %cmp = fcmp ogt double %a, %b
   %cond = select i1 %cmp, double %a, double %b
   ret double %cond
@@ -125,7 +125,7 @@ entry:
 define float @foof(float %a, float %b, float* nocapture %c) #0 {
 entry:
   %sub = fsub float %a, %b
-  store float %sub, float* %c, align 4, !tbaa !3
+  store float %sub, float* %c, align 4
   %cmp = fcmp ogt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
@@ -135,9 +135,18 @@ entry:
 ; CHECK: stfs 0, 0(5)
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"long", metadata !1}
-!4 = metadata !{metadata !"any pointer", metadata !1}
+declare i64 @llvm.ctpop.i64(i64);
+
+define signext i64 @fooct(i64 signext %a, i64 signext %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  %subc = call i64 @llvm.ctpop.i64(i64 %sub)
+  store i64 %subc, i64* %c, align 4
+  %cmp = icmp sgt i64 %subc, 0
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @fooct
+; CHECK-NOT: popcntd.
+}
 
diff --git a/test/CodeGen/PowerPC/ppc32-vacopy.ll b/test/CodeGen/PowerPC/ppc32-vacopy.ll
new file mode 100644
index 000000000000..bc394125f135
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc32-vacopy.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple="powerpc-unknown-linux-gnu" < %s | FileCheck %s
+; PR15286
+
+%va_list = type {i8, i8, i16, i8*, i8*}
+declare void @llvm.va_copy(i8*, i8*)
+
+define void @test_vacopy() nounwind {
+entry:
+	%0 = alloca %va_list
+	%1 = alloca %va_list
+	%2 = bitcast %va_list* %0 to i8*
+	%3 = bitcast %va_list* %1 to i8*
+
+	call void @llvm.va_copy(i8* %3, i8* %2)
+
+	ret void
+}
+; CHECK: test_vacopy:
+; CHECK: lwz [[REG1:[0-9]+]], {{.*}}
+; CHECK: lwz [[REG2:[0-9]+]], {{.*}}
+; CHECK: lwz [[REG3:[0-9]+]], {{.*}}
+; CHECK: stw [[REG1]], {{.*}}
+; CHECK: stw [[REG2]], {{.*}}
+; CHECK: stw [[REG3]], {{.*}}
diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index 10b70d02e5cc..764d3ce5fd45 100644
--- a/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false < %s | FileCheck %s
 
 ; Verify internal alignment of long double in a struct.  The double
 ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index c382edbbce4e..1f3bb7111efd 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -12,7 +12,7 @@ define weak void @foo_weak() nounwind {
 
 ; Calls to local function does not require the TOC restore 'nop'
 define void @test_direct() nounwind readnone {
-; CHECK: test_direct:
+; CHECK-LABEL: test_direct:
   tail call void @foo() nounwind
 ; CHECK: bl foo
 ; CHECK-NOT: nop
@@ -22,7 +22,7 @@ define void @test_direct() nounwind readnone {
 ; Calls to weak function requires a TOC restore 'nop' because they
 ; may be overridden in a different module.
 define void @test_weak() nounwind readnone {
-; CHECK: test_weak:
+; CHECK-LABEL: test_weak:
   tail call void @foo_weak() nounwind
 ; CHECK: bl foo
 ; CHECK-NEXT: nop
@@ -31,7 +31,7 @@ define void @test_weak() nounwind readnone {
 
 ; Indirect calls requires a full stub creation
 define void @test_indirect(void ()* nocapture %fp) nounwind {
-; CHECK: test_indirect:
+; CHECK-LABEL: test_indirect:
   tail call void %fp() nounwind
 ; CHECK: ld [[FP:[0-9]+]], 0(3)
 ; CHECK: ld 11, 16(3)
@@ -44,7 +44,7 @@ define void @test_indirect(void ()* nocapture %fp) nounwind {
 
 ; Absolute vales should be have the TOC restore 'nop'
 define void @test_abs() nounwind {
-; CHECK: test_abs:
+; CHECK-LABEL: test_abs:
   tail call void inttoptr (i64 1024 to void ()*)() nounwind
 ; CHECK: bla 1024
 ; CHECK-NEXT: nop
@@ -55,7 +55,7 @@ declare double @sin(double) nounwind
 
 ; External functions call should also have a 'nop'
 define double @test_external(double %x) nounwind {
-; CHECK: test_external:
+; CHECK-LABEL: test_external:
   %call = tail call double @sin(double %x) nounwind
 ; CHECK: bl sin
 ; CHECK-NEXT: nop
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
index 7f30ef883e9a..f349919b7e99 100644
--- a/test/CodeGen/PowerPC/ppc64-toc.ll
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -8,7 +8,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 
 define i64 @access_int64(i64 %a) nounwind readonly {
 entry:
-; CHECK: access_int64:
+; CHECK-LABEL: access_int64:
 ; CHECK-NEXT: .align  3
 ; CHECK-NEXT: .quad   .L.access_int64
 ; CHECK-NEXT: .quad   .TOC.@tocbase
@@ -23,7 +23,7 @@ entry:
 
 define i64 @internal_static_var(i64 %a) nounwind {
 entry:
-; CHECK: internal_static_var:
+; CHECK-LABEL: internal_static_var:
 ; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
   %0 = load i64* @internal_static_var.x, align 8
   %cmp = icmp eq i64 %0, %a
@@ -33,7 +33,7 @@ entry:
 
 define i32 @access_double(double %a) nounwind readnone {
 entry:
-; CHECK: access_double:
+; CHECK-LABEL: access_double:
 ; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
   %cmp = fcmp oeq double %a, 2.000000e+00
   %conv = zext i1 %cmp to i32 
@@ -43,7 +43,7 @@ entry:
 
 define i32 @access_double_array(double %a, i32 %i) nounwind readonly {
 entry:
-; CHECK: access_double_array:
+; CHECK-LABEL: access_double_array:
   %idxprom = sext i32 %i to i64
   %arrayidx = getelementptr inbounds [32 x double]* @double_array, i64 0, i64 %idxprom
   %0 = load double* %arrayidx, align 8
diff --git a/test/CodeGen/PowerPC/pr13891.ll b/test/CodeGen/PowerPC/pr13891.ll
index 3ae73850a342..4be65dd43d6a 100644
--- a/test/CodeGen/PowerPC/pr13891.ll
+++ b/test/CodeGen/PowerPC/pr13891.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 %struct.foo = type { i8, i8 }
 
 define void @_Z5check3foos(%struct.foo* nocapture byval %f, i16 signext %i) noinline {
-; CHECK: _Z5check3foos:
+; CHECK-LABEL: _Z5check3foos:
 ; CHECK: sth 3, {{[0-9]+}}(1)
 ; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
 entry:
diff --git a/test/CodeGen/PowerPC/pr15031.ll b/test/CodeGen/PowerPC/pr15031.ll
index 5ccf941a1f16..e58ad80e139b 100644
--- a/test/CodeGen/PowerPC/pr15031.ll
+++ b/test/CodeGen/PowerPC/pr15031.ll
@@ -317,54 +317,42 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry, %if.then
   %Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
   %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
-  %1 = load i32* %RegNo.i.i, align 4, !tbaa !0
+  %1 = load i32* %RegNo.i.i, align 4
   %cmp.i = icmp eq i32 %1, %Reg.addr.0
   br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
 
 if.end.i:                                         ; preds = %if.end
   %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 3
-  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8, !tbaa !3
+  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8
   %tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
   br i1 %tobool.i, label %if.end13.i, label %if.then3.i
 
 if.then3.i:                                       ; preds = %if.end.i
   %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr"* %2, i64 0, i32 2
-  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8, !tbaa !3
+  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8
   %tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
   br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
 
 if.then6.i:                                       ; preds = %if.then3.i
   %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
-  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8, !tbaa !3
+  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8
   %tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
   br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
 
 if.then9.i:                                       ; preds = %if.then6.i
   %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction"* %4, i64 0, i32 5
-  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8, !tbaa !3
+  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8
   tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
-  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4
   tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
   br label %_ZN4llvm14MachineOperand6setRegEj.exit
 
 if.end13.i:                                       ; preds = %if.then6.i, %if.then3.i, %if.end.i
-  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4
   br label %_ZN4llvm14MachineOperand6setRegEj.exit
 
 _ZN4llvm14MachineOperand6setRegEj.exit:           ; preds = %if.end, %if.then9.i, %if.end13.i
   ret void
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
-!4 = metadata !{metadata !"vtable pointer", metadata !2}
-!5 = metadata !{metadata !"long", metadata !1}
-!6 = metadata !{i64 0, i64 8, metadata !3, i64 8, i64 8, metadata !5}
-!7 = metadata !{metadata !"short", metadata !1}
-!8 = metadata !{i64 0, i64 1, metadata !1, i64 1, i64 4, metadata !0, i64 2, i64 1, metadata !1, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 4, i64 4, metadata !0, i64 4, i64 4, metadata !0, i64 8, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !5, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 8, metadata !3, i64 16, i64 4, metadata !0, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 4, metadata !0}
-!9 = metadata !{metadata !"bool", metadata !1}
-!10 = metadata !{i8 0, i8 2}
-
 ; CHECK-NOT: lbzu 3, 1(3)
diff --git a/test/CodeGen/PowerPC/pr16556-2.ll b/test/CodeGen/PowerPC/pr16556-2.ll
new file mode 100644
index 000000000000..e2dae4573c72
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr16556-2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s
+
+; This test formerly failed because of wrong custom lowering for
+; fptosi of ppc_fp128.
+
+target datalayout = "E-p:32:32:32-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:64:128-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+%core.time.TickDuration = type { i64 }
+
+@_D4core4time12TickDuration11ticksPerSecyl = global i64 0
+@.str5 = internal unnamed_addr constant [40 x i8] c"..\5Cldc\5Cruntime\5Cdruntime\5Csrc\5Ccore\5Ctime.d\00"
+@.str83 = internal constant [10 x i8] c"null this\00"
+@.modulefilename = internal constant { i32, i8* } { i32 39, i8* getelementptr inbounds ([40 x i8]* @.str5, i32 0, i32 0) }
+
+declare i8* @_d_assert_msg({ i32, i8* }, { i32, i8* }, i32)
+
+
+define weak_odr fastcc i64 @_D4core4time12TickDuration30__T2toVAyaa7_7365636f6e6473TlZ2toMxFNaNbNfZl(%core.time.TickDuration* %.this_arg) {
+entry:
+  %unitsPerSec = alloca i64, align 8
+  %tmp = icmp ne %core.time.TickDuration* %.this_arg, null
+  br i1 %tmp, label %noassert, label %assert
+
+assert:                                           ; preds = %entry
+  %tmp1 = load { i32, i8* }* @.modulefilename
+  %0 = call i8* @_d_assert_msg({ i32, i8* } { i32 9, i8* getelementptr inbounds ([10 x i8]* @.str83, i32 0, i32 0) }, { i32, i8* } %tmp1, i32 1586)
+  unreachable
+
+noassert:                                         ; preds = %entry
+  %tmp2 = getelementptr %core.time.TickDuration* %.this_arg, i32 0, i32 0
+  %tmp3 = load i64* %tmp2
+  %tmp4 = sitofp i64 %tmp3 to ppc_fp128
+  %tmp5 = load i64* @_D4core4time12TickDuration11ticksPerSecyl
+  %tmp6 = sitofp i64 %tmp5 to ppc_fp128
+  %tmp7 = fdiv ppc_fp128 %tmp6, 0xM80000000000000000000000000000000
+  %tmp8 = fdiv ppc_fp128 %tmp4, %tmp7
+  %tmp9 = fptosi ppc_fp128 %tmp8 to i64
+  ret i64 %tmp9
+}
+
diff --git a/test/CodeGen/PowerPC/pr16556.ll b/test/CodeGen/PowerPC/pr16556.ll
new file mode 100644
index 000000000000..dc36f0b6eafc
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr16556.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+
+; This test formerly failed due to no handling for a ppc_fp128 undef.
+
+target datalayout = "E-p:32:32:32-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:64:128-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+%core.time.TickDuration.37.125 = type { i64 }
+
+define weak_odr fastcc i64 @_D4core4time12TickDuration30__T2toVAyaa7_7365636f6e6473TlZ2toMxFNaNbNfZl(%core.time.TickDuration.37.125* %.this_arg) {
+entry:
+  br i1 undef, label %noassert, label %assert
+
+assert:                                           ; preds = %entry
+  unreachable
+
+noassert:                                         ; preds = %entry
+  %tmp9 = fptosi ppc_fp128 undef to i64
+  ret i64 %tmp9
+}
diff --git a/test/CodeGen/PowerPC/pr16573.ll b/test/CodeGen/PowerPC/pr16573.ll
new file mode 100644
index 000000000000..7a7a8decc81f
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr16573.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test() {
+  %1 = fptrunc ppc_fp128 0xM818F2887B9295809800000000032D000 to double
+  ret double %1
+}
+
+; CHECK: .quad -9111018957755033591
+
diff --git a/test/CodeGen/PowerPC/pr17168.ll b/test/CodeGen/PowerPC/pr17168.ll
new file mode 100644
index 000000000000..2848221e0764
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr17168.ll
@@ -0,0 +1,521 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s
+
+; This test formerly failed due to a DBG_VALUE being placed prior to a PHI
+; when fast-isel is partially successful before punting to DAG-isel.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@grid_points = external global [3 x i32], align 4
+
+; Function Attrs: nounwind
+define fastcc void @compute_rhs() #0 {
+entry:
+  br i1 undef, label %for.cond871.preheader.for.inc960_crit_edge, label %for.end1042, !dbg !439
+
+for.cond871.preheader.for.inc960_crit_edge:       ; preds = %for.cond871.preheader.for.inc960_crit_edge, %entry
+  br i1 false, label %for.cond871.preheader.for.inc960_crit_edge, label %for.cond964.preheader, !dbg !439
+
+for.cond964.preheader:                            ; preds = %for.cond871.preheader.for.inc960_crit_edge
+  br i1 undef, label %for.cond968.preheader, label %for.end1042, !dbg !441
+
+for.cond968.preheader:                            ; preds = %for.cond968.preheader, %for.cond964.preheader
+  br i1 false, label %for.cond968.preheader, label %for.end1042, !dbg !441
+
+for.end1042:                                      ; preds = %for.cond968.preheader, %for.cond964.preheader, %entry
+  %0 = phi i32 [ undef, %for.cond964.preheader ], [ undef, %for.cond968.preheader ], [ undef, %entry ]
+  %1 = load i32* getelementptr inbounds ([3 x i32]* @grid_points, i64 0, i64 0), align 4, !dbg !443, !tbaa !444
+  tail call void @llvm.dbg.value(metadata !447, i64 0, metadata !119), !dbg !448
+  %sub10454270 = add nsw i32 %0, -1, !dbg !448
+  %cmp10464271 = icmp sgt i32 %sub10454270, 1, !dbg !448
+  %sub11134263 = add nsw i32 %1, -1, !dbg !450
+  %cmp11144264 = icmp sgt i32 %sub11134263, 1, !dbg !450
+  br i1 %cmp11144264, label %for.cond1116.preheader, label %for.cond1816.preheader.for.inc1898_crit_edge, !dbg !450
+
+for.cond1116.preheader:                           ; preds = %for.inc1658, %for.end1042
+  br i1 %cmp10464271, label %for.body1123, label %for.inc1658, !dbg !452
+
+for.body1123:                                     ; preds = %for.body1123, %for.cond1116.preheader
+  br label %for.body1123, !dbg !455
+
+for.inc1658:                                      ; preds = %for.cond1116.preheader
+  br i1 undef, label %for.cond1116.preheader, label %for.cond1816.preheader.for.inc1898_crit_edge, !dbg !450
+
+for.cond1816.preheader.for.inc1898_crit_edge:     ; preds = %for.cond1816.preheader.for.inc1898_crit_edge, %for.inc1658, %for.end1042
+  br label %for.cond1816.preheader.for.inc1898_crit_edge, !dbg !458
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!438, !464}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 190311)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !298, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"bt.c", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !82, metadata !102, metadata !114, metadata !132, metadata !145, metadata !154, metadata !155, metadata !162, metadata !183, metadata !200, metadata !201, metadata !207, metadata !208, metadata !215, metadata !221, metadata !230, metadata !238, metadata !246, metadata !255, metadata !260, metadata !261, metadata !268, metadata !274, metadata !279, metadata !280, metadata !287, metadata !293}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 74, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !12, i32 74} ; [ DW_TAG_subprogram ] [line 74] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8, metadata !9}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_unsigned_char]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17, metadata !18, metadata !19, metadata !21, metadata !22, metadata !23, metadata !25, metadata !26}
+!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777290, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 74]
+!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554506, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 74]
+!15 = metadata !{i32 786688, metadata !4, metadata !"niter", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [niter] [line 76]
+!16 = metadata !{i32 786688, metadata !4, metadata !"step", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [step] [line 76]
+!17 = metadata !{i32 786688, metadata !4, metadata !"n3", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n3] [line 76]
+!18 = metadata !{i32 786688, metadata !4, metadata !"nthreads", metadata !5, i32 77, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [nthreads] [line 77]
+!19 = metadata !{i32 786688, metadata !4, metadata !"navg", metadata !5, i32 78, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [navg] [line 78]
+!20 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!21 = metadata !{i32 786688, metadata !4, metadata !"mflops", metadata !5, i32 78, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [mflops] [line 78]
+!22 = metadata !{i32 786688, metadata !4, metadata !"tmax", metadata !5, i32 80, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tmax] [line 80]
+!23 = metadata !{i32 786688, metadata !4, metadata !"verified", metadata !5, i32 81, metadata !24, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [verified] [line 81]
+!24 = metadata !{i32 786454, metadata !1, null, metadata !"boolean", i32 12, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [boolean] [line 12, size 0, align 0, offset 0] [from int]
+!25 = metadata !{i32 786688, metadata !4, metadata !"class", metadata !5, i32 82, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [class] [line 82]
+!26 = metadata !{i32 786688, metadata !4, metadata !"fp", metadata !5, i32 83, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [fp] [line 83]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from FILE]
+!28 = metadata !{i32 786454, metadata !1, null, metadata !"FILE", i32 49, i64 0, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_typedef ] [FILE] [line 49, size 0, align 0, offset 0] [from _IO_FILE]
+!29 = metadata !{i32 786451, metadata !30, null, metadata !"_IO_FILE", i32 271, i64 1728, i64 64, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [_IO_FILE] [line 271, size 1728, align 64, offset 0] [def] [from ]
+!30 = metadata !{metadata !"/usr/include/libio.h", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!31 = metadata !{metadata !32, metadata !33, metadata !34, metadata !35, metadata !36, metadata !37, metadata !38, metadata !39, metadata !40, metadata !41, metadata !42, metadata !43, metadata !44, metadata !52, metadata !53, metadata !54, metadata !55, metadata !58, metadata !60, metadata !62, metadata !66, metadata !68, metadata !70, metadata !71, metadata !72, metadata !73, metadata !74, metadata !77, metadata !78}
+!32 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_flags", i32 272, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [_flags] [line 272, size 32, align 32, offset 0] [from int]
+!33 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_ptr", i32 277, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_ptr] [line 277, size 64, align 64, offset 64] [from ]
+!34 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_end", i32 278, i64 64, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_end] [line 278, size 64, align 64, offset 128] [from ]
+!35 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_base", i32 279, i64 64, i64 64, i64 192, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_base] [line 279, size 64, align 64, offset 192] [from ]
+!36 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_base", i32 280, i64 64, i64 64, i64 256, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_base] [line 280, size 64, align 64, offset 256] [from ]
+!37 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_ptr", i32 281, i64 64, i64 64, i64 320, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_ptr] [line 281, size 64, align 64, offset 320] [from ]
+!38 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_end", i32 282, i64 64, i64 64, i64 384, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_end] [line 282, size 64, align 64, offset 384] [from ]
+!39 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_buf_base", i32 283, i64 64, i64 64, i64 448, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_buf_base] [line 283, size 64, align 64, offset 448] [from ]
+!40 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_buf_end", i32 284, i64 64, i64 64, i64 512, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_buf_end] [line 284, size 64, align 64, offset 512] [from ]
+!41 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_save_base", i32 286, i64 64, i64 64, i64 576, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_save_base] [line 286, size 64, align 64, offset 576] [from ]
+!42 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_backup_base", i32 287, i64 64, i64 64, i64 640, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_backup_base] [line 287, size 64, align 64, offset 640] [from ]
+!43 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_save_end", i32 288, i64 64, i64 64, i64 704, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_save_end] [line 288, size 64, align 64, offset 704] [from ]
+!44 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_markers", i32 290, i64 64, i64 64, i64 768, i32 0, metadata !45} ; [ DW_TAG_member ] [_markers] [line 290, size 64, align 64, offset 768] [from ]
+!45 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !46} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_marker]
+!46 = metadata !{i32 786451, metadata !30, null, metadata !"_IO_marker", i32 186, i64 192, i64 64, i32 0, i32 0, null, metadata !47, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [_IO_marker] [line 186, size 192, align 64, offset 0] [def] [from ]
+!47 = metadata !{metadata !48, metadata !49, metadata !51}
+!48 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_next", i32 187, i64 64, i64 64, i64 0, i32 0, metadata !45} ; [ DW_TAG_member ] [_next] [line 187, size 64, align 64, offset 0] [from ]
+!49 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_sbuf", i32 188, i64 64, i64 64, i64 64, i32 0, metadata !50} ; [ DW_TAG_member ] [_sbuf] [line 188, size 64, align 64, offset 64] [from ]
+!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_FILE]
+!51 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_pos", i32 192, i64 32, i64 32, i64 128, i32 0, metadata !8} ; [ DW_TAG_member ] [_pos] [line 192, size 32, align 32, offset 128] [from int]
+!52 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_chain", i32 292, i64 64, i64 64, i64 832, i32 0, metadata !50} ; [ DW_TAG_member ] [_chain] [line 292, size 64, align 64, offset 832] [from ]
+!53 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_fileno", i32 294, i64 32, i64 32, i64 896, i32 0, metadata !8} ; [ DW_TAG_member ] [_fileno] [line 294, size 32, align 32, offset 896] [from int]
+!54 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_flags2", i32 298, i64 32, i64 32, i64 928, i32 0, metadata !8} ; [ DW_TAG_member ] [_flags2] [line 298, size 32, align 32, offset 928] [from int]
+!55 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_old_offset", i32 300, i64 64, i64 64, i64 960, i32 0, metadata !56} ; [ DW_TAG_member ] [_old_offset] [line 300, size 64, align 64, offset 960] [from __off_t]
+!56 = metadata !{i32 786454, metadata !30, null, metadata !"__off_t", i32 141, i64 0, i64 0, i64 0, i32 0, metadata !57} ; [ DW_TAG_typedef ] [__off_t] [line 141, size 0, align 0, offset 0] [from long int]
+!57 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!58 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_cur_column", i32 304, i64 16, i64 16, i64 1024, i32 0, metadata !59} ; [ DW_TAG_member ] [_cur_column] [line 304, size 16, align 16, offset 1024] [from unsigned short]
+!59 = metadata !{i32 786468, null, null, metadata !"unsigned short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!60 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_vtable_offset", i32 305, i64 8, i64 8, i64 1040, i32 0, metadata !61} ; [ DW_TAG_member ] [_vtable_offset] [line 305, size 8, align 8, offset 1040] [from signed char]
+!61 = metadata !{i32 786468, null, null, metadata !"signed char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!62 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_shortbuf", i32 306, i64 8, i64 8, i64 1048, i32 0, metadata !63} ; [ DW_TAG_member ] [_shortbuf] [line 306, size 8, align 8, offset 1048] [from ]
+!63 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 8, i64 8, i32 0, i32 0, metadata !11, metadata !64, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!64 = metadata !{metadata !65}
+!65 = metadata !{i32 786465, i64 0, i64 1}        ; [ DW_TAG_subrange_type ] [0, 0]
+!66 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_lock", i32 310, i64 64, i64 64, i64 1088, i32 0, metadata !67} ; [ DW_TAG_member ] [_lock] [line 310, size 64, align 64, offset 1088] [from ]
+!67 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!68 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_offset", i32 319, i64 64, i64 64, i64 1152, i32 0, metadata !69} ; [ DW_TAG_member ] [_offset] [line 319, size 64, align 64, offset 1152] [from __off64_t]
+!69 = metadata !{i32 786454, metadata !30, null, metadata !"__off64_t", i32 142, i64 0, i64 0, i64 0, i32 0, metadata !57} ; [ DW_TAG_typedef ] [__off64_t] [line 142, size 0, align 0, offset 0] [from long int]
+!70 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad1", i32 328, i64 64, i64 64, i64 1216, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad1] [line 328, size 64, align 64, offset 1216] [from ]
+!71 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad2", i32 329, i64 64, i64 64, i64 1280, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad2] [line 329, size 64, align 64, offset 1280] [from ]
+!72 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad3", i32 330, i64 64, i64 64, i64 1344, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad3] [line 330, size 64, align 64, offset 1344] [from ]
+!73 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad4", i32 331, i64 64, i64 64, i64 1408, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad4] [line 331, size 64, align 64, offset 1408] [from ]
+!74 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad5", i32 332, i64 64, i64 64, i64 1472, i32 0, metadata !75} ; [ DW_TAG_member ] [__pad5] [line 332, size 64, align 64, offset 1472] [from size_t]
+!75 = metadata !{i32 786454, metadata !30, null, metadata !"size_t", i32 42, i64 0, i64 0, i64 0, i32 0, metadata !76} ; [ DW_TAG_typedef ] [size_t] [line 42, size 0, align 0, offset 0] [from long unsigned int]
+!76 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!77 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_mode", i32 334, i64 32, i64 32, i64 1536, i32 0, metadata !8} ; [ DW_TAG_member ] [_mode] [line 334, size 32, align 32, offset 1536] [from int]
+!78 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_unused2", i32 336, i64 160, i64 8, i64 1568, i32 0, metadata !79} ; [ DW_TAG_member ] [_unused2] [line 336, size 160, align 8, offset 1568] [from ]
+!79 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !11, metadata !80, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!80 = metadata !{metadata !81}
+!81 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
+!82 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"verify", metadata !"verify", metadata !"", i32 2388, metadata !83, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !86, i32 2388} ; [ DW_TAG_subprogram ] [line 2388] [local] [def] [verify]
+!83 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !84, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!84 = metadata !{null, metadata !8, metadata !10, metadata !85}
+!85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from boolean]
+!86 = metadata !{metadata !87, metadata !88, metadata !89, metadata !90, metadata !94, metadata !95, metadata !96, metadata !97, metadata !98, metadata !99, metadata !100, metadata !101}
+!87 = metadata !{i32 786689, metadata !82, metadata !"no_time_steps", metadata !5, i32 16779604, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [no_time_steps] [line 2388]
+!88 = metadata !{i32 786689, metadata !82, metadata !"class", metadata !5, i32 33556820, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [class] [line 2388]
+!89 = metadata !{i32 786689, metadata !82, metadata !"verified", metadata !5, i32 50334036, metadata !85, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [verified] [line 2388]
+!90 = metadata !{i32 786688, metadata !82, metadata !"xcrref", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcrref] [line 2397]
+!91 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 320, i64 64, i32 0, i32 0, metadata !20, metadata !92, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 64, offset 0] [from double]
+!92 = metadata !{metadata !93}
+!93 = metadata !{i32 786465, i64 0, i64 5}        ; [ DW_TAG_subrange_type ] [0, 4]
+!94 = metadata !{i32 786688, metadata !82, metadata !"xceref", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xceref] [line 2397]
+!95 = metadata !{i32 786688, metadata !82, metadata !"xcrdif", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcrdif] [line 2397]
+!96 = metadata !{i32 786688, metadata !82, metadata !"xcedif", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcedif] [line 2397]
+!97 = metadata !{i32 786688, metadata !82, metadata !"epsilon", metadata !5, i32 2398, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [epsilon] [line 2398]
+!98 = metadata !{i32 786688, metadata !82, metadata !"xce", metadata !5, i32 2398, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xce] [line 2398]
+!99 = metadata !{i32 786688, metadata !82, metadata !"xcr", metadata !5, i32 2398, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcr] [line 2398]
+!100 = metadata !{i32 786688, metadata !82, metadata !"dtref", metadata !5, i32 2398, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtref] [line 2398]
+!101 = metadata !{i32 786688, metadata !82, metadata !"m", metadata !5, i32 2399, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 2399]
+!102 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"rhs_norm", metadata !"rhs_norm", metadata !"", i32 266, metadata !103, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !106, i32 266} ; [ DW_TAG_subprogram ] [line 266] [local] [def] [rhs_norm]
+!103 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !104, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!104 = metadata !{null, metadata !105}
+!105 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
+!106 = metadata !{metadata !107, metadata !108, metadata !109, metadata !110, metadata !111, metadata !112, metadata !113}
+!107 = metadata !{i32 786689, metadata !102, metadata !"rms", metadata !5, i32 16777482, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rms] [line 266]
+!108 = metadata !{i32 786688, metadata !102, metadata !"i", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 271]
+!109 = metadata !{i32 786688, metadata !102, metadata !"j", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 271]
+!110 = metadata !{i32 786688, metadata !102, metadata !"k", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 271]
+!111 = metadata !{i32 786688, metadata !102, metadata !"d", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 271]
+!112 = metadata !{i32 786688, metadata !102, metadata !"m", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 271]
+!113 = metadata !{i32 786688, metadata !102, metadata !"add", metadata !5, i32 272, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [add] [line 272]
+!114 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"compute_rhs", metadata !"compute_rhs", metadata !"", i32 1767, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @compute_rhs, null, null, metadata !117, i32 1767} ; [ DW_TAG_subprogram ] [line 1767] [local] [def] [compute_rhs]
+!115 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !116, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!116 = metadata !{null}
+!117 = metadata !{metadata !118, metadata !119, metadata !120, metadata !121, metadata !122, metadata !123, metadata !124, metadata !125, metadata !126, metadata !127, metadata !128, metadata !129, metadata !130, metadata !131}
+!118 = metadata !{i32 786688, metadata !114, metadata !"i", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1769]
+!119 = metadata !{i32 786688, metadata !114, metadata !"j", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1769]
+!120 = metadata !{i32 786688, metadata !114, metadata !"k", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1769]
+!121 = metadata !{i32 786688, metadata !114, metadata !"m", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 1769]
+!122 = metadata !{i32 786688, metadata !114, metadata !"rho_inv", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [rho_inv] [line 1770]
+!123 = metadata !{i32 786688, metadata !114, metadata !"uijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [uijk] [line 1770]
+!124 = metadata !{i32 786688, metadata !114, metadata !"up1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [up1] [line 1770]
+!125 = metadata !{i32 786688, metadata !114, metadata !"um1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [um1] [line 1770]
+!126 = metadata !{i32 786688, metadata !114, metadata !"vijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vijk] [line 1770]
+!127 = metadata !{i32 786688, metadata !114, metadata !"vp1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vp1] [line 1770]
+!128 = metadata !{i32 786688, metadata !114, metadata !"vm1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vm1] [line 1770]
+!129 = metadata !{i32 786688, metadata !114, metadata !"wijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wijk] [line 1770]
+!130 = metadata !{i32 786688, metadata !114, metadata !"wp1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wp1] [line 1770]
+!131 = metadata !{i32 786688, metadata !114, metadata !"wm1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wm1] [line 1770]
+!132 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"error_norm", metadata !"error_norm", metadata !"", i32 225, metadata !103, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !133, i32 225} ; [ DW_TAG_subprogram ] [line 225] [local] [def] [error_norm]
+!133 = metadata !{metadata !134, metadata !135, metadata !136, metadata !137, metadata !138, metadata !139, metadata !140, metadata !141, metadata !142, metadata !143, metadata !144}
+!134 = metadata !{i32 786689, metadata !132, metadata !"rms", metadata !5, i32 16777441, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rms] [line 225]
+!135 = metadata !{i32 786688, metadata !132, metadata !"i", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 232]
+!136 = metadata !{i32 786688, metadata !132, metadata !"j", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 232]
+!137 = metadata !{i32 786688, metadata !132, metadata !"k", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 232]
+!138 = metadata !{i32 786688, metadata !132, metadata !"m", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 232]
+!139 = metadata !{i32 786688, metadata !132, metadata !"d", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 232]
+!140 = metadata !{i32 786688, metadata !132, metadata !"xi", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 233]
+!141 = metadata !{i32 786688, metadata !132, metadata !"eta", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 233]
+!142 = metadata !{i32 786688, metadata !132, metadata !"zeta", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 233]
+!143 = metadata !{i32 786688, metadata !132, metadata !"u_exact", metadata !5, i32 233, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u_exact] [line 233]
+!144 = metadata !{i32 786688, metadata !132, metadata !"add", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [add] [line 233]
+!145 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"exact_solution", metadata !"exact_solution", metadata !"", i32 643, metadata !146, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !148, i32 644} ; [ DW_TAG_subprogram ] [line 643] [local] [def] [scope 644] [exact_solution]
+!146 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !147, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!147 = metadata !{null, metadata !20, metadata !20, metadata !20, metadata !105}
+!148 = metadata !{metadata !149, metadata !150, metadata !151, metadata !152, metadata !153}
+!149 = metadata !{i32 786689, metadata !145, metadata !"xi", metadata !5, i32 16777859, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [xi] [line 643]
+!150 = metadata !{i32 786689, metadata !145, metadata !"eta", metadata !5, i32 33555075, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [eta] [line 643]
+!151 = metadata !{i32 786689, metadata !145, metadata !"zeta", metadata !5, i32 50332291, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [zeta] [line 643]
+!152 = metadata !{i32 786689, metadata !145, metadata !"dtemp", metadata !5, i32 67109508, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [dtemp] [line 644]
+!153 = metadata !{i32 786688, metadata !145, metadata !"m", metadata !5, i32 653, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 653]
+!154 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"set_constants", metadata !"set_constants", metadata !"", i32 2191, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 2191} ; [ DW_TAG_subprogram ] [line 2191] [local] [def] [set_constants]
+!155 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsinit", metadata !"lhsinit", metadata !"", i32 855, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !156, i32 855} ; [ DW_TAG_subprogram ] [line 855] [local] [def] [lhsinit]
+!156 = metadata !{metadata !157, metadata !158, metadata !159, metadata !160, metadata !161}
+!157 = metadata !{i32 786688, metadata !155, metadata !"i", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 857]
+!158 = metadata !{i32 786688, metadata !155, metadata !"j", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 857]
+!159 = metadata !{i32 786688, metadata !155, metadata !"k", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 857]
+!160 = metadata !{i32 786688, metadata !155, metadata !"m", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 857]
+!161 = metadata !{i32 786688, metadata !155, metadata !"n", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 857]
+!162 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"initialize", metadata !"initialize", metadata !"", i32 669, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !163, i32 669} ; [ DW_TAG_subprogram ] [line 669] [local] [def] [initialize]
+!163 = metadata !{metadata !164, metadata !165, metadata !166, metadata !167, metadata !168, metadata !169, metadata !170, metadata !171, metadata !172, metadata !173, metadata !174, metadata !179, metadata !180, metadata !181, metadata !182}
+!164 = metadata !{i32 786688, metadata !162, metadata !"i", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 679]
+!165 = metadata !{i32 786688, metadata !162, metadata !"j", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 679]
+!166 = metadata !{i32 786688, metadata !162, metadata !"k", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 679]
+!167 = metadata !{i32 786688, metadata !162, metadata !"m", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 679]
+!168 = metadata !{i32 786688, metadata !162, metadata !"ix", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ix] [line 679]
+!169 = metadata !{i32 786688, metadata !162, metadata !"iy", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [iy] [line 679]
+!170 = metadata !{i32 786688, metadata !162, metadata !"iz", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [iz] [line 679]
+!171 = metadata !{i32 786688, metadata !162, metadata !"xi", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 680]
+!172 = metadata !{i32 786688, metadata !162, metadata !"eta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 680]
+!173 = metadata !{i32 786688, metadata !162, metadata !"zeta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 680]
+!174 = metadata !{i32 786688, metadata !162, metadata !"Pface", metadata !5, i32 680, metadata !175, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pface] [line 680]
+!175 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1920, i64 64, i32 0, i32 0, metadata !20, metadata !176, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1920, align 64, offset 0] [from double]
+!176 = metadata !{metadata !177, metadata !178, metadata !93}
+!177 = metadata !{i32 786465, i64 0, i64 2}       ; [ DW_TAG_subrange_type ] [0, 1]
+!178 = metadata !{i32 786465, i64 0, i64 3}       ; [ DW_TAG_subrange_type ] [0, 2]
+!179 = metadata !{i32 786688, metadata !162, metadata !"Pxi", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pxi] [line 680]
+!180 = metadata !{i32 786688, metadata !162, metadata !"Peta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Peta] [line 680]
+!181 = metadata !{i32 786688, metadata !162, metadata !"Pzeta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pzeta] [line 680]
+!182 = metadata !{i32 786688, metadata !162, metadata !"temp", metadata !5, i32 680, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [temp] [line 680]
+!183 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"exact_rhs", metadata !"exact_rhs", metadata !"", i32 301, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !184, i32 301} ; [ DW_TAG_subprogram ] [line 301] [local] [def] [exact_rhs]
+!184 = metadata !{metadata !185, metadata !186, metadata !187, metadata !188, metadata !189, metadata !190, metadata !191, metadata !192, metadata !193, metadata !194, metadata !195, metadata !196, metadata !197, metadata !198, metadata !199}
+!185 = metadata !{i32 786688, metadata !183, metadata !"dtemp", metadata !5, i32 310, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtemp] [line 310]
+!186 = metadata !{i32 786688, metadata !183, metadata !"xi", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 310]
+!187 = metadata !{i32 786688, metadata !183, metadata !"eta", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 310]
+!188 = metadata !{i32 786688, metadata !183, metadata !"zeta", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 310]
+!189 = metadata !{i32 786688, metadata !183, metadata !"dtpp", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtpp] [line 310]
+!190 = metadata !{i32 786688, metadata !183, metadata !"m", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 311]
+!191 = metadata !{i32 786688, metadata !183, metadata !"i", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 311]
+!192 = metadata !{i32 786688, metadata !183, metadata !"j", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 311]
+!193 = metadata !{i32 786688, metadata !183, metadata !"k", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 311]
+!194 = metadata !{i32 786688, metadata !183, metadata !"ip1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ip1] [line 311]
+!195 = metadata !{i32 786688, metadata !183, metadata !"im1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [im1] [line 311]
+!196 = metadata !{i32 786688, metadata !183, metadata !"jp1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jp1] [line 311]
+!197 = metadata !{i32 786688, metadata !183, metadata !"jm1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jm1] [line 311]
+!198 = metadata !{i32 786688, metadata !183, metadata !"km1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [km1] [line 311]
+!199 = metadata !{i32 786688, metadata !183, metadata !"kp1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [kp1] [line 311]
+!200 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"adi", metadata !"adi", metadata !"", i32 210, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 210} ; [ DW_TAG_subprogram ] [line 210] [local] [def] [adi]
+!201 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"add", metadata !"add", metadata !"", i32 187, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !202, i32 187} ; [ DW_TAG_subprogram ] [line 187] [local] [def] [add]
+!202 = metadata !{metadata !203, metadata !204, metadata !205, metadata !206}
+!203 = metadata !{i32 786688, metadata !201, metadata !"i", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 193]
+!204 = metadata !{i32 786688, metadata !201, metadata !"j", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 193]
+!205 = metadata !{i32 786688, metadata !201, metadata !"k", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 193]
+!206 = metadata !{i32 786688, metadata !201, metadata !"m", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 193]
+!207 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_solve", metadata !"z_solve", metadata !"", i32 3457, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 3457} ; [ DW_TAG_subprogram ] [line 3457] [local] [def] [z_solve]
+!208 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_backsubstitute", metadata !"z_backsubstitute", metadata !"", i32 3480, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !209, i32 3480} ; [ DW_TAG_subprogram ] [line 3480] [local] [def] [z_backsubstitute]
+!209 = metadata !{metadata !210, metadata !211, metadata !212, metadata !213, metadata !214}
+!210 = metadata !{i32 786688, metadata !208, metadata !"i", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3492]
+!211 = metadata !{i32 786688, metadata !208, metadata !"j", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3492]
+!212 = metadata !{i32 786688, metadata !208, metadata !"k", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3492]
+!213 = metadata !{i32 786688, metadata !208, metadata !"m", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 3492]
+!214 = metadata !{i32 786688, metadata !208, metadata !"n", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 3492]
+!215 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_solve_cell", metadata !"z_solve_cell", metadata !"", i32 3512, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !216, i32 3512} ; [ DW_TAG_subprogram ] [line 3512] [local] [def] [z_solve_cell]
+!216 = metadata !{metadata !217, metadata !218, metadata !219, metadata !220}
+!217 = metadata !{i32 786688, metadata !215, metadata !"i", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3527]
+!218 = metadata !{i32 786688, metadata !215, metadata !"j", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3527]
+!219 = metadata !{i32 786688, metadata !215, metadata !"k", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3527]
+!220 = metadata !{i32 786688, metadata !215, metadata !"ksize", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ksize] [line 3527]
+!221 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"binvrhs", metadata !"binvrhs", metadata !"", i32 3154, metadata !222, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !225, i32 3154} ; [ DW_TAG_subprogram ] [line 3154] [local] [def] [binvrhs]
+!222 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !223, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!223 = metadata !{null, metadata !224, metadata !105}
+!224 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!225 = metadata !{metadata !226, metadata !227, metadata !228, metadata !229}
+!226 = metadata !{i32 786689, metadata !221, metadata !"lhs", metadata !5, i32 16780370, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [lhs] [line 3154]
+!227 = metadata !{i32 786689, metadata !221, metadata !"r", metadata !5, i32 33557586, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 3154]
+!228 = metadata !{i32 786688, metadata !221, metadata !"pivot", metadata !5, i32 3159, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pivot] [line 3159]
+!229 = metadata !{i32 786688, metadata !221, metadata !"coeff", metadata !5, i32 3159, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [coeff] [line 3159]
+!230 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"matmul_sub", metadata !"matmul_sub", metadata !"", i32 2841, metadata !231, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !233, i32 2842} ; [ DW_TAG_subprogram ] [line 2841] [local] [def] [scope 2842] [matmul_sub]
+!231 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !232, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!232 = metadata !{null, metadata !224, metadata !224, metadata !224}
+!233 = metadata !{metadata !234, metadata !235, metadata !236, metadata !237}
+!234 = metadata !{i32 786689, metadata !230, metadata !"ablock", metadata !5, i32 16780057, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ablock] [line 2841]
+!235 = metadata !{i32 786689, metadata !230, metadata !"bblock", metadata !5, i32 33557273, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bblock] [line 2841]
+!236 = metadata !{i32 786689, metadata !230, metadata !"cblock", metadata !5, i32 50334490, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [cblock] [line 2842]
+!237 = metadata !{i32 786688, metadata !230, metadata !"j", metadata !5, i32 2851, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2851]
+!238 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"matvec_sub", metadata !"matvec_sub", metadata !"", i32 2814, metadata !239, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !241, i32 2814} ; [ DW_TAG_subprogram ] [line 2814] [local] [def] [matvec_sub]
+!239 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !240, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!240 = metadata !{null, metadata !224, metadata !105, metadata !105}
+!241 = metadata !{metadata !242, metadata !243, metadata !244, metadata !245}
+!242 = metadata !{i32 786689, metadata !238, metadata !"ablock", metadata !5, i32 16780030, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ablock] [line 2814]
+!243 = metadata !{i32 786689, metadata !238, metadata !"avec", metadata !5, i32 33557246, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [avec] [line 2814]
+!244 = metadata !{i32 786689, metadata !238, metadata !"bvec", metadata !5, i32 50334462, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bvec] [line 2814]
+!245 = metadata !{i32 786688, metadata !238, metadata !"i", metadata !5, i32 2823, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2823]
+!246 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"binvcrhs", metadata !"binvcrhs", metadata !"", i32 2885, metadata !247, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !249, i32 2885} ; [ DW_TAG_subprogram ] [line 2885] [local] [def] [binvcrhs]
+!247 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !248, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!248 = metadata !{null, metadata !224, metadata !224, metadata !105}
+!249 = metadata !{metadata !250, metadata !251, metadata !252, metadata !253, metadata !254}
+!250 = metadata !{i32 786689, metadata !246, metadata !"lhs", metadata !5, i32 16780101, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [lhs] [line 2885]
+!251 = metadata !{i32 786689, metadata !246, metadata !"c", metadata !5, i32 33557317, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 2885]
+!252 = metadata !{i32 786689, metadata !246, metadata !"r", metadata !5, i32 50334533, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 2885]
+!253 = metadata !{i32 786688, metadata !246, metadata !"pivot", metadata !5, i32 2890, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pivot] [line 2890]
+!254 = metadata !{i32 786688, metadata !246, metadata !"coeff", metadata !5, i32 2890, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [coeff] [line 2890]
+!255 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsz", metadata !"lhsz", metadata !"", i32 1475, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !256, i32 1475} ; [ DW_TAG_subprogram ] [line 1475] [local] [def] [lhsz]
+!256 = metadata !{metadata !257, metadata !258, metadata !259}
+!257 = metadata !{i32 786688, metadata !255, metadata !"i", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1484]
+!258 = metadata !{i32 786688, metadata !255, metadata !"j", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1484]
+!259 = metadata !{i32 786688, metadata !255, metadata !"k", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1484]
+!260 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_solve", metadata !"y_solve", metadata !"", i32 3299, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 3299} ; [ DW_TAG_subprogram ] [line 3299] [local] [def] [y_solve]
+!261 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_backsubstitute", metadata !"y_backsubstitute", metadata !"", i32 3323, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !262, i32 3323} ; [ DW_TAG_subprogram ] [line 3323] [local] [def] [y_backsubstitute]
+!262 = metadata !{metadata !263, metadata !264, metadata !265, metadata !266, metadata !267}
+!263 = metadata !{i32 786688, metadata !261, metadata !"i", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3335]
+!264 = metadata !{i32 786688, metadata !261, metadata !"j", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3335]
+!265 = metadata !{i32 786688, metadata !261, metadata !"k", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3335]
+!266 = metadata !{i32 786688, metadata !261, metadata !"m", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 3335]
+!267 = metadata !{i32 786688, metadata !261, metadata !"n", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 3335]
+!268 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_solve_cell", metadata !"y_solve_cell", metadata !"", i32 3355, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !269, i32 3355} ; [ DW_TAG_subprogram ] [line 3355] [local] [def] [y_solve_cell]
+!269 = metadata !{metadata !270, metadata !271, metadata !272, metadata !273}
+!270 = metadata !{i32 786688, metadata !268, metadata !"i", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3370]
+!271 = metadata !{i32 786688, metadata !268, metadata !"j", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3370]
+!272 = metadata !{i32 786688, metadata !268, metadata !"k", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3370]
+!273 = metadata !{i32 786688, metadata !268, metadata !"jsize", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jsize] [line 3370]
+!274 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsy", metadata !"lhsy", metadata !"", i32 1181, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !275, i32 1181} ; [ DW_TAG_subprogram ] [line 1181] [local] [def] [lhsy]
+!275 = metadata !{metadata !276, metadata !277, metadata !278}
+!276 = metadata !{i32 786688, metadata !274, metadata !"i", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1190]
+!277 = metadata !{i32 786688, metadata !274, metadata !"j", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1190]
+!278 = metadata !{i32 786688, metadata !274, metadata !"k", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1190]
+!279 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_solve", metadata !"x_solve", metadata !"", i32 2658, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 2658} ; [ DW_TAG_subprogram ] [line 2658] [local] [def] [x_solve]
+!280 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_backsubstitute", metadata !"x_backsubstitute", metadata !"", i32 2684, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !281, i32 2684} ; [ DW_TAG_subprogram ] [line 2684] [local] [def] [x_backsubstitute]
+!281 = metadata !{metadata !282, metadata !283, metadata !284, metadata !285, metadata !286}
+!282 = metadata !{i32 786688, metadata !280, metadata !"i", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2696]
+!283 = metadata !{i32 786688, metadata !280, metadata !"j", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2696]
+!284 = metadata !{i32 786688, metadata !280, metadata !"k", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 2696]
+!285 = metadata !{i32 786688, metadata !280, metadata !"m", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 2696]
+!286 = metadata !{i32 786688, metadata !280, metadata !"n", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 2696]
+!287 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_solve_cell", metadata !"x_solve_cell", metadata !"", i32 2716, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !288, i32 2716} ; [ DW_TAG_subprogram ] [line 2716] [local] [def] [x_solve_cell]
+!288 = metadata !{metadata !289, metadata !290, metadata !291, metadata !292}
+!289 = metadata !{i32 786688, metadata !287, metadata !"i", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2728]
+!290 = metadata !{i32 786688, metadata !287, metadata !"j", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2728]
+!291 = metadata !{i32 786688, metadata !287, metadata !"k", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 2728]
+!292 = metadata !{i32 786688, metadata !287, metadata !"isize", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [isize] [line 2728]
+!293 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsx", metadata !"lhsx", metadata !"", i32 898, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !294, i32 898} ; [ DW_TAG_subprogram ] [line 898] [local] [def] [lhsx]
+!294 = metadata !{metadata !295, metadata !296, metadata !297}
+!295 = metadata !{i32 786688, metadata !293, metadata !"i", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 907]
+!296 = metadata !{i32 786688, metadata !293, metadata !"j", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 907]
+!297 = metadata !{i32 786688, metadata !293, metadata !"k", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 907]
+!298 = metadata !{metadata !299, metadata !304, metadata !305, metadata !309, metadata !310, metadata !311, metadata !312, metadata !313, metadata !314, metadata !315, metadata !316, metadata !317, metadata !318, metadata !319, metadata !320, metadata !321, metadata !322, metadata !323, metadata !324, metadata !325, metadata !326, metadata !327, metadata !328, metadata !329, metadata !330, metadata !331, metadata !332, metadata !333, metadata !334, metadata !335, metadata !336, metadata !337, metadata !338, metadata !339, metadata !340, metadata !341, metadata !342, metadata !343, metadata !347, metadata !350, metadata !351, metadata !352, metadata !353, metadata !354, metadata !355, metadata !356, metadata !360, metadata !361, metadata !362, metadata !363, metadata !364, metadata !365, metadata !366, metadata !367, metadata !368, metadata !369, metadata !370, metadata !371, metadata !372, metadata !373, metadata !374, metadata !375, metadata !376, metadata !377, metadata !378, metadata !379, metadata !380, metadata !381, metadata !382, metadata !383, metadata !384, metadata !385, metadata !386, metadata !387, metadata !388, metadata !389, metadata !390, metadata !391, metadata !392, metadata !393, metadata !394, metadata !395, metadata !396, metadata !397, metadata !398, metadata !399, metadata !400, metadata !401, metadata !402, metadata !403, metadata !404, metadata !405, metadata !406, metadata !407, metadata !408, metadata !409, metadata !410, metadata !411, metadata !412, metadata !413, metadata !414, metadata !415, metadata !416, metadata !417, metadata !418, metadata !419, metadata !422, metadata !426, metadata !427, metadata !430, metadata !431, metadata !434, metadata !435, metadata !436, metadata !437}
+!299 = metadata !{i32 786484, i32 0, null, metadata !"grid_points", metadata !"grid_points", metadata !"", metadata !300, i32 28, metadata !302, i32 1, i32 1, [3 x i32]* @grid_points, null} ; [ DW_TAG_variable ] [grid_points] [line 28] [local] [def]
+!300 = metadata !{i32 786473, metadata !301}      ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/./header.h]
+!301 = metadata !{metadata !"./header.h", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!302 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 96, i64 32, i32 0, i32 0, metadata !8, metadata !303, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 96, align 32, offset 0] [from int]
+!303 = metadata !{metadata !178}
+!304 = metadata !{i32 786484, i32 0, null, metadata !"dt", metadata !"dt", metadata !"", metadata !300, i32 35, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dt] [line 35] [local] [def]
+!305 = metadata !{i32 786484, i32 0, null, metadata !"rhs", metadata !"rhs", metadata !"", metadata !300, i32 68, metadata !306, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [rhs] [line 68] [local] [def]
+!306 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1385839040, i64 64, i32 0, i32 0, metadata !20, metadata !307, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1385839040, align 64, offset 0] [from double]
+!307 = metadata !{metadata !308, metadata !308, metadata !308, metadata !93}
+!308 = metadata !{i32 786465, i64 0, i64 163}     ; [ DW_TAG_subrange_type ] [0, 162]
+!309 = metadata !{i32 786484, i32 0, null, metadata !"zzcon5", metadata !"zzcon5", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon5] [line 42] [local] [def]
+!310 = metadata !{i32 786484, i32 0, null, metadata !"zzcon4", metadata !"zzcon4", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon4] [line 42] [local] [def]
+!311 = metadata !{i32 786484, i32 0, null, metadata !"zzcon3", metadata !"zzcon3", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon3] [line 42] [local] [def]
+!312 = metadata !{i32 786484, i32 0, null, metadata !"dz5tz1", metadata !"dz5tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz5tz1] [line 43] [local] [def]
+!313 = metadata !{i32 786484, i32 0, null, metadata !"dz4tz1", metadata !"dz4tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz4tz1] [line 43] [local] [def]
+!314 = metadata !{i32 786484, i32 0, null, metadata !"dz3tz1", metadata !"dz3tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz3tz1] [line 43] [local] [def]
+!315 = metadata !{i32 786484, i32 0, null, metadata !"zzcon2", metadata !"zzcon2", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon2] [line 42] [local] [def]
+!316 = metadata !{i32 786484, i32 0, null, metadata !"dz2tz1", metadata !"dz2tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz2tz1] [line 43] [local] [def]
+!317 = metadata !{i32 786484, i32 0, null, metadata !"tz2", metadata !"tz2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz2] [line 31] [local] [def]
+!318 = metadata !{i32 786484, i32 0, null, metadata !"dz1tz1", metadata !"dz1tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz1tz1] [line 43] [local] [def]
+!319 = metadata !{i32 786484, i32 0, null, metadata !"yycon5", metadata !"yycon5", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon5] [line 40] [local] [def]
+!320 = metadata !{i32 786484, i32 0, null, metadata !"yycon4", metadata !"yycon4", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon4] [line 40] [local] [def]
+!321 = metadata !{i32 786484, i32 0, null, metadata !"yycon3", metadata !"yycon3", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon3] [line 40] [local] [def]
+!322 = metadata !{i32 786484, i32 0, null, metadata !"dy5ty1", metadata !"dy5ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy5ty1] [line 41] [local] [def]
+!323 = metadata !{i32 786484, i32 0, null, metadata !"dy4ty1", metadata !"dy4ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy4ty1] [line 41] [local] [def]
+!324 = metadata !{i32 786484, i32 0, null, metadata !"dy3ty1", metadata !"dy3ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy3ty1] [line 41] [local] [def]
+!325 = metadata !{i32 786484, i32 0, null, metadata !"yycon2", metadata !"yycon2", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon2] [line 40] [local] [def]
+!326 = metadata !{i32 786484, i32 0, null, metadata !"dy2ty1", metadata !"dy2ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy2ty1] [line 41] [local] [def]
+!327 = metadata !{i32 786484, i32 0, null, metadata !"ty2", metadata !"ty2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty2] [line 31] [local] [def]
+!328 = metadata !{i32 786484, i32 0, null, metadata !"dy1ty1", metadata !"dy1ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy1ty1] [line 41] [local] [def]
+!329 = metadata !{i32 786484, i32 0, null, metadata !"dssp", metadata !"dssp", metadata !"", metadata !300, i32 35, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dssp] [line 35] [local] [def]
+!330 = metadata !{i32 786484, i32 0, null, metadata !"c1", metadata !"c1", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1] [line 45] [local] [def]
+!331 = metadata !{i32 786484, i32 0, null, metadata !"xxcon5", metadata !"xxcon5", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon5] [line 38] [local] [def]
+!332 = metadata !{i32 786484, i32 0, null, metadata !"xxcon4", metadata !"xxcon4", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon4] [line 38] [local] [def]
+!333 = metadata !{i32 786484, i32 0, null, metadata !"xxcon3", metadata !"xxcon3", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon3] [line 38] [local] [def]
+!334 = metadata !{i32 786484, i32 0, null, metadata !"dx5tx1", metadata !"dx5tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx5tx1] [line 39] [local] [def]
+!335 = metadata !{i32 786484, i32 0, null, metadata !"dx4tx1", metadata !"dx4tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx4tx1] [line 39] [local] [def]
+!336 = metadata !{i32 786484, i32 0, null, metadata !"dx3tx1", metadata !"dx3tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx3tx1] [line 39] [local] [def]
+!337 = metadata !{i32 786484, i32 0, null, metadata !"c2", metadata !"c2", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2] [line 45] [local] [def]
+!338 = metadata !{i32 786484, i32 0, null, metadata !"con43", metadata !"con43", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [con43] [line 48] [local] [def]
+!339 = metadata !{i32 786484, i32 0, null, metadata !"xxcon2", metadata !"xxcon2", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon2] [line 38] [local] [def]
+!340 = metadata !{i32 786484, i32 0, null, metadata !"dx2tx1", metadata !"dx2tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx2tx1] [line 39] [local] [def]
+!341 = metadata !{i32 786484, i32 0, null, metadata !"tx2", metadata !"tx2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx2] [line 31] [local] [def]
+!342 = metadata !{i32 786484, i32 0, null, metadata !"dx1tx1", metadata !"dx1tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx1tx1] [line 39] [local] [def]
+!343 = metadata !{i32 786484, i32 0, null, metadata !"forcing", metadata !"forcing", metadata !"", metadata !300, i32 66, metadata !344, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [forcing] [line 66] [local] [def]
+!344 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1663006848, i64 64, i32 0, i32 0, metadata !20, metadata !345, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1663006848, align 64, offset 0] [from double]
+!345 = metadata !{metadata !308, metadata !308, metadata !308, metadata !346}
+!346 = metadata !{i32 786465, i64 0, i64 6}       ; [ DW_TAG_subrange_type ] [0, 5]
+!347 = metadata !{i32 786484, i32 0, null, metadata !"qs", metadata !"qs", metadata !"", metadata !300, i32 63, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [qs] [line 63] [local] [def]
+!348 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 277167808, i64 64, i32 0, i32 0, metadata !20, metadata !349, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 277167808, align 64, offset 0] [from double]
+!349 = metadata !{metadata !308, metadata !308, metadata !308}
+!350 = metadata !{i32 786484, i32 0, null, metadata !"square", metadata !"square", metadata !"", metadata !300, i32 65, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [square] [line 65] [local] [def]
+!351 = metadata !{i32 786484, i32 0, null, metadata !"ws", metadata !"ws", metadata !"", metadata !300, i32 62, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ws] [line 62] [local] [def]
+!352 = metadata !{i32 786484, i32 0, null, metadata !"vs", metadata !"vs", metadata !"", metadata !300, i32 61, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [vs] [line 61] [local] [def]
+!353 = metadata !{i32 786484, i32 0, null, metadata !"us", metadata !"us", metadata !"", metadata !300, i32 60, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [us] [line 60] [local] [def]
+!354 = metadata !{i32 786484, i32 0, null, metadata !"rho_i", metadata !"rho_i", metadata !"", metadata !300, i32 64, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [rho_i] [line 64] [local] [def]
+!355 = metadata !{i32 786484, i32 0, null, metadata !"u", metadata !"u", metadata !"", metadata !300, i32 67, metadata !306, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [u] [line 67] [local] [def]
+!356 = metadata !{i32 786484, i32 0, null, metadata !"ce", metadata !"ce", metadata !"", metadata !300, i32 36, metadata !357, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ce] [line 36] [local] [def]
+!357 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 4160, i64 64, i32 0, i32 0, metadata !20, metadata !358, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 4160, align 64, offset 0] [from double]
+!358 = metadata !{metadata !93, metadata !359}
+!359 = metadata !{i32 786465, i64 0, i64 13}      ; [ DW_TAG_subrange_type ] [0, 12]
+!360 = metadata !{i32 786484, i32 0, null, metadata !"dnzm1", metadata !"dnzm1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnzm1] [line 44] [local] [def]
+!361 = metadata !{i32 786484, i32 0, null, metadata !"dnym1", metadata !"dnym1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnym1] [line 44] [local] [def]
+!362 = metadata !{i32 786484, i32 0, null, metadata !"dnxm1", metadata !"dnxm1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnxm1] [line 44] [local] [def]
+!363 = metadata !{i32 786484, i32 0, null, metadata !"zzcon1", metadata !"zzcon1", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon1] [line 42] [local] [def]
+!364 = metadata !{i32 786484, i32 0, null, metadata !"yycon1", metadata !"yycon1", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon1] [line 40] [local] [def]
+!365 = metadata !{i32 786484, i32 0, null, metadata !"xxcon1", metadata !"xxcon1", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon1] [line 38] [local] [def]
+!366 = metadata !{i32 786484, i32 0, null, metadata !"con16", metadata !"con16", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [con16] [line 48] [local] [def]
+!367 = metadata !{i32 786484, i32 0, null, metadata !"c2iv", metadata !"c2iv", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2iv] [line 48] [local] [def]
+!368 = metadata !{i32 786484, i32 0, null, metadata !"c3c4tz3", metadata !"c3c4tz3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4tz3] [line 48] [local] [def]
+!369 = metadata !{i32 786484, i32 0, null, metadata !"c3c4ty3", metadata !"c3c4ty3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4ty3] [line 48] [local] [def]
+!370 = metadata !{i32 786484, i32 0, null, metadata !"c3c4tx3", metadata !"c3c4tx3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4tx3] [line 48] [local] [def]
+!371 = metadata !{i32 786484, i32 0, null, metadata !"comz6", metadata !"comz6", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz6] [line 47] [local] [def]
+!372 = metadata !{i32 786484, i32 0, null, metadata !"comz5", metadata !"comz5", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz5] [line 47] [local] [def]
+!373 = metadata !{i32 786484, i32 0, null, metadata !"comz4", metadata !"comz4", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz4] [line 47] [local] [def]
+!374 = metadata !{i32 786484, i32 0, null, metadata !"comz1", metadata !"comz1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz1] [line 47] [local] [def]
+!375 = metadata !{i32 786484, i32 0, null, metadata !"dtdssp", metadata !"dtdssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtdssp] [line 45] [local] [def]
+!376 = metadata !{i32 786484, i32 0, null, metadata !"c2dttz1", metadata !"c2dttz1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dttz1] [line 47] [local] [def]
+!377 = metadata !{i32 786484, i32 0, null, metadata !"c2dtty1", metadata !"c2dtty1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dtty1] [line 47] [local] [def]
+!378 = metadata !{i32 786484, i32 0, null, metadata !"c2dttx1", metadata !"c2dttx1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dttx1] [line 47] [local] [def]
+!379 = metadata !{i32 786484, i32 0, null, metadata !"dttz2", metadata !"dttz2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttz2] [line 46] [local] [def]
+!380 = metadata !{i32 786484, i32 0, null, metadata !"dttz1", metadata !"dttz1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttz1] [line 46] [local] [def]
+!381 = metadata !{i32 786484, i32 0, null, metadata !"dtty2", metadata !"dtty2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtty2] [line 46] [local] [def]
+!382 = metadata !{i32 786484, i32 0, null, metadata !"dtty1", metadata !"dtty1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtty1] [line 46] [local] [def]
+!383 = metadata !{i32 786484, i32 0, null, metadata !"dttx2", metadata !"dttx2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttx2] [line 46] [local] [def]
+!384 = metadata !{i32 786484, i32 0, null, metadata !"dttx1", metadata !"dttx1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttx1] [line 46] [local] [def]
+!385 = metadata !{i32 786484, i32 0, null, metadata !"c5dssp", metadata !"c5dssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c5dssp] [line 45] [local] [def]
+!386 = metadata !{i32 786484, i32 0, null, metadata !"c4dssp", metadata !"c4dssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c4dssp] [line 45] [local] [def]
+!387 = metadata !{i32 786484, i32 0, null, metadata !"dzmax", metadata !"dzmax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dzmax] [line 37] [local] [def]
+!388 = metadata !{i32 786484, i32 0, null, metadata !"dymax", metadata !"dymax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dymax] [line 37] [local] [def]
+!389 = metadata !{i32 786484, i32 0, null, metadata !"dxmax", metadata !"dxmax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dxmax] [line 37] [local] [def]
+!390 = metadata !{i32 786484, i32 0, null, metadata !"dz5", metadata !"dz5", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz5] [line 34] [local] [def]
+!391 = metadata !{i32 786484, i32 0, null, metadata !"dz4", metadata !"dz4", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz4] [line 34] [local] [def]
+!392 = metadata !{i32 786484, i32 0, null, metadata !"dz3", metadata !"dz3", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz3] [line 34] [local] [def]
+!393 = metadata !{i32 786484, i32 0, null, metadata !"dz2", metadata !"dz2", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz2] [line 34] [local] [def]
+!394 = metadata !{i32 786484, i32 0, null, metadata !"dz1", metadata !"dz1", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz1] [line 34] [local] [def]
+!395 = metadata !{i32 786484, i32 0, null, metadata !"dy5", metadata !"dy5", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy5] [line 33] [local] [def]
+!396 = metadata !{i32 786484, i32 0, null, metadata !"dy4", metadata !"dy4", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy4] [line 33] [local] [def]
+!397 = metadata !{i32 786484, i32 0, null, metadata !"dy3", metadata !"dy3", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy3] [line 33] [local] [def]
+!398 = metadata !{i32 786484, i32 0, null, metadata !"dy2", metadata !"dy2", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy2] [line 33] [local] [def]
+!399 = metadata !{i32 786484, i32 0, null, metadata !"dy1", metadata !"dy1", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy1] [line 33] [local] [def]
+!400 = metadata !{i32 786484, i32 0, null, metadata !"dx5", metadata !"dx5", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx5] [line 32] [local] [def]
+!401 = metadata !{i32 786484, i32 0, null, metadata !"dx4", metadata !"dx4", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx4] [line 32] [local] [def]
+!402 = metadata !{i32 786484, i32 0, null, metadata !"dx3", metadata !"dx3", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx3] [line 32] [local] [def]
+!403 = metadata !{i32 786484, i32 0, null, metadata !"dx2", metadata !"dx2", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx2] [line 32] [local] [def]
+!404 = metadata !{i32 786484, i32 0, null, metadata !"dx1", metadata !"dx1", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx1] [line 32] [local] [def]
+!405 = metadata !{i32 786484, i32 0, null, metadata !"tz3", metadata !"tz3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz3] [line 31] [local] [def]
+!406 = metadata !{i32 786484, i32 0, null, metadata !"tz1", metadata !"tz1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz1] [line 31] [local] [def]
+!407 = metadata !{i32 786484, i32 0, null, metadata !"ty3", metadata !"ty3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty3] [line 31] [local] [def]
+!408 = metadata !{i32 786484, i32 0, null, metadata !"ty1", metadata !"ty1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty1] [line 31] [local] [def]
+!409 = metadata !{i32 786484, i32 0, null, metadata !"tx3", metadata !"tx3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx3] [line 31] [local] [def]
+!410 = metadata !{i32 786484, i32 0, null, metadata !"tx1", metadata !"tx1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx1] [line 31] [local] [def]
+!411 = metadata !{i32 786484, i32 0, null, metadata !"conz1", metadata !"conz1", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [conz1] [line 45] [local] [def]
+!412 = metadata !{i32 786484, i32 0, null, metadata !"c1345", metadata !"c1345", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1345] [line 44] [local] [def]
+!413 = metadata !{i32 786484, i32 0, null, metadata !"c3c4", metadata !"c3c4", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4] [line 44] [local] [def]
+!414 = metadata !{i32 786484, i32 0, null, metadata !"c1c5", metadata !"c1c5", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1c5] [line 44] [local] [def]
+!415 = metadata !{i32 786484, i32 0, null, metadata !"c1c2", metadata !"c1c2", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1c2] [line 44] [local] [def]
+!416 = metadata !{i32 786484, i32 0, null, metadata !"c5", metadata !"c5", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c5] [line 45] [local] [def]
+!417 = metadata !{i32 786484, i32 0, null, metadata !"c4", metadata !"c4", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c4] [line 45] [local] [def]
+!418 = metadata !{i32 786484, i32 0, null, metadata !"c3", metadata !"c3", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3] [line 45] [local] [def]
+!419 = metadata !{i32 786484, i32 0, null, metadata !"lhs", metadata !"lhs", metadata !"", metadata !300, i32 69, metadata !420, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [lhs] [line 69] [local] [def]
+!420 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 20787585600, i64 64, i32 0, i32 0, metadata !20, metadata !421, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 20787585600, align 64, offset 0] [from double]
+!421 = metadata !{metadata !308, metadata !308, metadata !308, metadata !178, metadata !93, metadata !93}
+!422 = metadata !{i32 786484, i32 0, null, metadata !"q", metadata !"q", metadata !"", metadata !300, i32 73, metadata !423, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [q] [line 73] [local] [def]
+!423 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 10368, i64 64, i32 0, i32 0, metadata !20, metadata !424, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 10368, align 64, offset 0] [from double]
+!424 = metadata !{metadata !425}
+!425 = metadata !{i32 786465, i64 0, i64 162}     ; [ DW_TAG_subrange_type ] [0, 161]
+!426 = metadata !{i32 786484, i32 0, null, metadata !"cuf", metadata !"cuf", metadata !"", metadata !300, i32 72, metadata !423, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [cuf] [line 72] [local] [def]
+!427 = metadata !{i32 786484, i32 0, null, metadata !"buf", metadata !"buf", metadata !"", metadata !300, i32 75, metadata !428, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [buf] [line 75] [local] [def]
+!428 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 51840, i64 64, i32 0, i32 0, metadata !20, metadata !429, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 51840, align 64, offset 0] [from double]
+!429 = metadata !{metadata !425, metadata !93}
+!430 = metadata !{i32 786484, i32 0, null, metadata !"ue", metadata !"ue", metadata !"", metadata !300, i32 74, metadata !428, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ue] [line 74] [local] [def]
+!431 = metadata !{i32 786484, i32 0, null, metadata !"njac", metadata !"njac", metadata !"", metadata !300, i32 86, metadata !432, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [njac] [line 86] [local] [def]
+!432 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 6886684800, i64 64, i32 0, i32 0, metadata !20, metadata !433, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 6886684800, align 64, offset 0] [from double]
+!433 = metadata !{metadata !308, metadata !308, metadata !425, metadata !93, metadata !93}
+!434 = metadata !{i32 786484, i32 0, null, metadata !"fjac", metadata !"fjac", metadata !"", metadata !300, i32 84, metadata !432, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [fjac] [line 84] [local] [def]
+!435 = metadata !{i32 786484, i32 0, null, metadata !"tmp3", metadata !"tmp3", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp3] [line 88] [local] [def]
+!436 = metadata !{i32 786484, i32 0, null, metadata !"tmp2", metadata !"tmp2", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp2] [line 88] [local] [def]
+!437 = metadata !{i32 786484, i32 0, null, metadata !"tmp1", metadata !"tmp1", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp1] [line 88] [local] [def]
+!438 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!439 = metadata !{i32 1898, i32 0, metadata !440, null}
+!440 = metadata !{i32 786443, metadata !1, metadata !114, i32 1898, i32 0, i32 107} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!441 = metadata !{i32 1913, i32 0, metadata !442, null}
+!442 = metadata !{i32 786443, metadata !1, metadata !114, i32 1913, i32 0, i32 115} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!443 = metadata !{i32 1923, i32 0, metadata !114, null}
+!444 = metadata !{metadata !"int", metadata !445}
+!445 = metadata !{metadata !"omnipotent char", metadata !446}
+!446 = metadata !{metadata !"Simple C/C++ TBAA"}
+!447 = metadata !{i32 1}
+!448 = metadata !{i32 1925, i32 0, metadata !449, null}
+!449 = metadata !{i32 786443, metadata !1, metadata !114, i32 1925, i32 0, i32 121} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!450 = metadata !{i32 1939, i32 0, metadata !451, null}
+!451 = metadata !{i32 786443, metadata !1, metadata !114, i32 1939, i32 0, i32 127} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!452 = metadata !{i32 1940, i32 0, metadata !453, null}
+!453 = metadata !{i32 786443, metadata !1, metadata !454, i32 1940, i32 0, i32 129} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!454 = metadata !{i32 786443, metadata !1, metadata !451, i32 1939, i32 0, i32 128} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!455 = metadata !{i32 1941, i32 0, metadata !456, null}
+!456 = metadata !{i32 786443, metadata !1, metadata !457, i32 1941, i32 0, i32 131} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!457 = metadata !{i32 786443, metadata !1, metadata !453, i32 1940, i32 0, i32 130} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!458 = metadata !{i32 2020, i32 0, metadata !459, null}
+!459 = metadata !{i32 786443, metadata !1, metadata !460, i32 2020, i32 0, i32 149} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!460 = metadata !{i32 786443, metadata !1, metadata !461, i32 2019, i32 0, i32 148} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!461 = metadata !{i32 786443, metadata !1, metadata !462, i32 2019, i32 0, i32 147} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!462 = metadata !{i32 786443, metadata !1, metadata !463, i32 2018, i32 0, i32 146} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!463 = metadata !{i32 786443, metadata !1, metadata !114, i32 2018, i32 0, i32 145} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!464 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/PowerPC/pr17354.ll b/test/CodeGen/PowerPC/pr17354.ll
new file mode 100644
index 000000000000..dca81b1c2ca6
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr17354.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=pwr7 -relocation-model=pic <%s | FileCheck %s
+
+; Test that PR17354 is fixed.  We must generate a nop following even
+; local calls when generating code for shared libraries, to permit
+; TOC fixup.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.CS = type { i32 }
+
+@_ZL3glb = internal global [1 x %struct.CS] zeroinitializer, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define internal void @__cxx_global_var_init() section ".text.startup" {
+entry:
+  call void @_Z4funcv(%struct.CS* sret getelementptr inbounds ([1 x %struct.CS]* @_ZL3glb, i64 0, i64 0))
+  ret void
+}
+
+; CHECK-LABEL: __cxx_global_var_init:
+; CHECK: bl _Z4funcv
+; CHECK-NEXT: nop
+
+; Function Attrs: nounwind
+define void @_Z4funcv(%struct.CS* noalias sret %agg.result) #0 {
+entry:
+  %a_ = getelementptr inbounds %struct.CS* %agg.result, i32 0, i32 0
+  store i32 0, i32* %a_, align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll
index 89705faa46e9..891e801dd3b5 100644
--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -14,8 +14,8 @@ entry:
   ret double %r
 
 ; CHECK: @foo
-; CHECK: frsqrte
-; CHECK: fnmsub
+; CHECK-DAG: frsqrte
+; CHECK-DAG: fnmsub
 ; CHECK: fmul
 ; CHECK: fmadd
 ; CHECK: fmul
@@ -39,8 +39,8 @@ entry:
   ret double %r
 
 ; CHECK: @foof
-; CHECK: frsqrtes
-; CHECK: fnmsubs
+; CHECK-DAG: frsqrtes
+; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
 ; CHECK: fmadds
 ; CHECK: fmuls
@@ -61,8 +61,8 @@ entry:
   ret float %r
 
 ; CHECK: @foo
-; CHECK: frsqrte
-; CHECK: fnmsub
+; CHECK-DAG: frsqrte
+; CHECK-DAG: fnmsub
 ; CHECK: fmul
 ; CHECK: fmadd
 ; CHECK: fmul
@@ -86,8 +86,8 @@ entry:
   ret float %r
 
 ; CHECK: @goo
-; CHECK: frsqrtes
-; CHECK: fnmsubs
+; CHECK-DAG: frsqrtes
+; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
 ; CHECK: fmadds
 ; CHECK: fmuls
@@ -120,8 +120,8 @@ entry:
   ret double %r
 
 ; CHECK: @foo2
-; CHECK: fre
-; CHECK: fnmsub
+; CHECK-DAG: fre
+; CHECK-DAG: fnmsub
 ; CHECK: fmadd
 ; CHECK: fnmsub
 ; CHECK: fmadd
@@ -139,8 +139,8 @@ entry:
   ret float %r
 
 ; CHECK: @goo2
-; CHECK: fres
-; CHECK: fnmsubs
+; CHECK-DAG: fres
+; CHECK-DAG: fnmsubs
 ; CHECK: fmadds
 ; CHECK: fmuls
 ; CHECK: blr
@@ -169,8 +169,9 @@ entry:
   ret double %r
 
 ; CHECK: @foo3
-; CHECK: frsqrte
-; CHECK: fnmsub
+; CHECK: fcmpu
+; CHECK-DAG: frsqrte
+; CHECK-DAG: fnmsub
 ; CHECK: fmul
 ; CHECK: fmadd
 ; CHECK: fmul
@@ -195,8 +196,9 @@ entry:
   ret float %r
 
 ; CHECK: @goo3
-; CHECK: frsqrtes
-; CHECK: fnmsubs
+; CHECK: fcmpu
+; CHECK-DAG: frsqrtes
+; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
 ; CHECK: fmadds
 ; CHECK: fmuls
@@ -217,7 +219,8 @@ entry:
 
 ; CHECK: @hoo3
 ; CHECK: vrsqrtefp
-; CHECK: vrefp
+; CHECK-DAG: vrefp
+; CHECK-DAG: vcmpeqfp
 
 ; CHECK-SAFE: @hoo3
 ; CHECK-SAFE-NOT: vrsqrtefp
diff --git a/test/CodeGen/PowerPC/reg-names.ll b/test/CodeGen/PowerPC/reg-names.ll
new file mode 100644
index 000000000000..f8fa7e4020e9
--- /dev/null
+++ b/test/CodeGen/PowerPC/reg-names.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names < %s | FileCheck -check-prefix=CHECK-FN %s
+
+define i64 @test1(i64 %a, i64 %b) {
+; CHECK-LABEL: @test1
+; CHECK-FN-LABEL: @test1
+
+entry:
+  ret i64 %b
+
+; CHECK: mr 3, 4
+; CHECK-FN: mr r3, r4
+
+; CHECK: blr
+; CHECK-FN: blr
+}
+
diff --git a/test/CodeGen/PowerPC/reloc-align.ll b/test/CodeGen/PowerPC/reloc-align.ll
new file mode 100644
index 000000000000..13d6adadfcae
--- /dev/null
+++ b/test/CodeGen/PowerPC/reloc-align.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mcpu=pwr7 -O1 < %s | FileCheck %s
+
+; This test verifies that the peephole optimization of address accesses
+; does not produce a load or store with a relocation that can't be
+; satisfied for a given instruction encoding.  Reduced from a test supplied
+; by Hal Finkel.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S1 = type { [8 x i8] }
+
+@main.l_1554 = internal global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 -1, i8 -6, i8 57, i8 62, i8 -48, i8 0, i8 58, i8 80 }, align 1
+
+; Function Attrs: nounwind readonly
+define signext i32 @main() #0 {
+entry:
+  %call = tail call fastcc signext i32 @func_90(%struct.S1* byval bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @main.l_1554 to %struct.S1*))
+; CHECK-NOT: ld {{[0-9]+}}, main.l_1554@toc@l
+  ret i32 %call
+}
+
+; Function Attrs: nounwind readonly
+define internal fastcc signext i32 @func_90(%struct.S1* byval nocapture %p_91) #0 {
+entry:
+  %0 = bitcast %struct.S1* %p_91 to i64*
+  %bf.load = load i64* %0, align 1
+  %bf.shl = shl i64 %bf.load, 26
+  %bf.ashr = ashr i64 %bf.shl, 54
+  %bf.cast = trunc i64 %bf.ashr to i32
+  ret i32 %bf.cast
+}
+
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/remap-crash.ll b/test/CodeGen/PowerPC/remap-crash.ll
new file mode 100644
index 000000000000..515f720ba448
--- /dev/null
+++ b/test/CodeGen/PowerPC/remap-crash.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD13() {
+BB:
+  br label %CF78
+
+CF78:                                             ; preds = %CF87, %CF86, %CF78, %BB
+  %Cmp = icmp ule <16 x i64> zeroinitializer, zeroinitializer
+  br i1 undef, label %CF78, label %CF86
+
+CF86:                                             ; preds = %CF78
+  br i1 undef, label %CF78, label %CF84
+
+CF84:                                             ; preds = %CF84, %CF86
+  br i1 undef, label %CF84, label %CF87
+
+CF87:                                             ; preds = %CF84
+  br i1 undef, label %CF78, label %CF82
+
+CF82:                                             ; preds = %CF82, %CF87
+  br i1 undef, label %CF82, label %CF83
+
+CF83:                                             ; preds = %CF82
+  br label %CF
+
+CF:                                               ; preds = %CF80, %CF81, %CF, %CF83
+  br i1 undef, label %CF, label %CF81
+
+CF81:                                             ; preds = %CF
+  %Se = sext <16 x i1> %Cmp to <16 x i16>
+  br i1 undef, label %CF, label %CF80
+
+CF80:                                             ; preds = %CF81
+  br i1 undef, label %CF, label %CF76
+
+CF76:                                             ; preds = %CF76, %CF80
+  %Sl58 = select i1 undef, <16 x i16> %Se, <16 x i16> %Se
+  br label %CF76
+}
+
+define void @autogen_SD1067() {
+BB:
+  %FC = sitofp <4 x i32> zeroinitializer to <4 x ppc_fp128>
+  br label %CF77
+
+CF77:                                             ; preds = %CF77, %BB
+  %brmerge = or i1 false, undef
+  br i1 %brmerge, label %CF77, label %CF85
+
+CF85:                                             ; preds = %CF77
+  %Shuff19 = shufflevector <4 x ppc_fp128> %FC, <4 x ppc_fp128> %FC, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+  br label %CF75
+
+CF75:                                             ; preds = %CF75, %CF85
+  br label %CF75
+}
diff --git a/test/CodeGen/PowerPC/rlwimi-and.ll b/test/CodeGen/PowerPC/rlwimi-and.ll
new file mode 100644
index 000000000000..7963249ddf83
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwimi-and.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @test() align 2 {
+entry:
+  br i1 undef, label %codeRepl1, label %codeRepl31
+
+codeRepl1:                                        ; preds = %entry
+  br i1 undef, label %codeRepl4, label %codeRepl29
+
+codeRepl4:                                        ; preds = %codeRepl1
+  br i1 undef, label %codeRepl12, label %codeRepl17
+
+codeRepl12:                                       ; preds = %codeRepl4
+  unreachable
+
+codeRepl17:                                       ; preds = %codeRepl4
+  %0 = load i8* undef, align 2
+  %1 = and i8 %0, 1
+  %not.tobool.i.i.i = icmp eq i8 %1, 0
+  %2 = select i1 %not.tobool.i.i.i, i16 0, i16 256
+  %3 = load i8* undef, align 1
+  %4 = and i8 %3, 1
+  %not.tobool.i.1.i.i = icmp eq i8 %4, 0
+  %rvml38.sroa.1.1.insert.ext = select i1 %not.tobool.i.1.i.i, i16 0, i16 1
+  %rvml38.sroa.0.0.insert.insert = or i16 %rvml38.sroa.1.1.insert.ext, %2
+  store i16 %rvml38.sroa.0.0.insert.insert, i16* undef, align 2
+  unreachable
+
+; FIXME: the SLWI could be folded into the RLWIMI to give a rotate of 8.
+; CHECK: @test
+; CHECK-DAG: slwi [[R1:[0-9]+]], {{[0-9]+}}, 31
+; CHECK-DAG: rlwinm [[R2:[0-9]+]], {{[0-9]+}}, 0, 31, 31
+; CHECK: rlwimi [[R2]], [[R1]], 9, 23, 23
+
+codeRepl29:                                       ; preds = %codeRepl1
+  unreachable
+
+codeRepl31:                                       ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
index 2b5e1c9a289b..bf0a6415df67 100644
--- a/test/CodeGen/PowerPC/rounding-ops.ll
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -7,11 +6,8 @@ define float @test1(float %x) nounwind  {
   %call = tail call float @floorf(float %x) nounwind readnone
   ret float %call
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: frim 1, 1
-
-; CHECK-FM: test1:
-; CHECK-FM: frim 1, 1
 }
 
 declare float @floorf(float) nounwind readnone
@@ -20,50 +16,38 @@ define double @test2(double %x) nounwind  {
   %call = tail call double @floor(double %x) nounwind readnone
   ret double %call
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: frim 1, 1
-
-; CHECK-FM: test2:
-; CHECK-FM: frim 1, 1
 }
 
 declare double @floor(double) nounwind readnone
 
 define float @test3(float %x) nounwind  {
-  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  %call = tail call float @roundf(float %x) nounwind readnone
   ret float %call
 
-; CHECK: test3:
-; CHECK-NOT: frin
-
-; CHECK-FM: test3:
-; CHECK-FM: frin 1, 1
+; CHECK-LABEL: test3:
+; CHECK: frin 1, 1
 }
 
-declare float @nearbyintf(float) nounwind readnone
+declare float @roundf(float) nounwind readnone
 
 define double @test4(double %x) nounwind  {
-  %call = tail call double @nearbyint(double %x) nounwind readnone
+  %call = tail call double @round(double %x) nounwind readnone
   ret double %call
 
-; CHECK: test4:
-; CHECK-NOT: frin
-
-; CHECK-FM: test4:
-; CHECK-FM: frin 1, 1
+; CHECK-LABEL: test4:
+; CHECK: frin 1, 1
 }
 
-declare double @nearbyint(double) nounwind readnone
+declare double @round(double) nounwind readnone
 
 define float @test5(float %x) nounwind  {
   %call = tail call float @ceilf(float %x) nounwind readnone
   ret float %call
 
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: frip 1, 1
-
-; CHECK-FM: test5:
-; CHECK-FM: frip 1, 1
 }
 
 declare float @ceilf(float) nounwind readnone
@@ -72,11 +56,8 @@ define double @test6(double %x) nounwind  {
   %call = tail call double @ceil(double %x) nounwind readnone
   ret double %call
 
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: frip 1, 1
-
-; CHECK-FM: test6:
-; CHECK-FM: frip 1, 1
 }
 
 declare double @ceil(double) nounwind readnone
@@ -85,11 +66,8 @@ define float @test9(float %x) nounwind  {
   %call = tail call float @truncf(float %x) nounwind readnone
   ret float %call
 
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: friz 1, 1
-
-; CHECK-FM: test9:
-; CHECK-FM: friz 1, 1
 }
 
 declare float @truncf(float) nounwind readnone
@@ -98,50 +76,9 @@ define double @test10(double %x) nounwind  {
   %call = tail call double @trunc(double %x) nounwind readnone
   ret double %call
 
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: friz 1, 1
-
-; CHECK-FM: test10:
-; CHECK-FM: friz 1, 1
 }
 
 declare double @trunc(double) nounwind readnone
 
-define void @test11(float %x, float* %y) nounwind  {
-  %call = tail call float @rintf(float %x) nounwind readnone
-  store float %call, float* %y
-  ret void
-
-; CHECK: test11:
-; CHECK-NOT: frin
-
-; CHECK-FM: test11:
-; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
-; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
-; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
-; CHECK-FM: mtfsb1 6
-; CHECK-FM: .LBB[[BB]]_2:
-; CHECK-FM: blr
-}
-
-declare float @rintf(float) nounwind readnone
-
-define void @test12(double %x, double* %y) nounwind  {
-  %call = tail call double @rint(double %x) nounwind readnone
-  store double %call, double* %y
-  ret void
-
-; CHECK: test12:
-; CHECK-NOT: frin
-
-; CHECK-FM: test12:
-; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
-; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
-; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
-; CHECK-FM: mtfsb1 6
-; CHECK-FM: .LBB[[BB]]_2:
-; CHECK-FM: blr
-}
-
-declare double @rint(double) nounwind readnone
-
diff --git a/test/CodeGen/PowerPC/rs-undef-use.ll b/test/CodeGen/PowerPC/rs-undef-use.ll
new file mode 100644
index 000000000000..24dd5fd9da99
--- /dev/null
+++ b/test/CodeGen/PowerPC/rs-undef-use.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD156869(i8*, i64*) {
+BB:
+  %A3 = alloca <2 x i1>
+  %A2 = alloca <8 x i32>
+  br label %CF
+
+CF:                                               ; preds = %CF85, %CF, %BB
+  br i1 undef, label %CF, label %CF82.critedge
+
+CF82.critedge:                                    ; preds = %CF
+  store i8 -59, i8* %0
+  br label %CF82
+
+CF82:                                             ; preds = %CF82, %CF82.critedge
+  %L17 = load i8* %0
+  %E18 = extractelement <2 x i64> undef, i32 0
+  %PC = bitcast <2 x i1>* %A3 to i64*
+  br i1 undef, label %CF82, label %CF84.critedge
+
+CF84.critedge:                                    ; preds = %CF82
+  store i64 455385, i64* %PC
+  br label %CF84
+
+CF84:                                             ; preds = %CF84, %CF84.critedge
+  %L40 = load i64* %PC
+  store i64 -1, i64* %PC
+  %Sl46 = select i1 undef, i1 undef, i1 false
+  br i1 %Sl46, label %CF84, label %CF85
+
+CF85:                                             ; preds = %CF84
+  %L47 = load i64* %PC
+  store i64 %E18, i64* %PC
+  %PC52 = bitcast <8 x i32>* %A2 to ppc_fp128*
+  store ppc_fp128 0xM4D436562A0416DE00000000000000000, ppc_fp128* %PC52
+  %PC59 = bitcast i64* %1 to i8*
+  %Cmp61 = icmp slt i64 %L47, %L40
+  br i1 %Cmp61, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF85
+  br i1 undef, label %CF77, label %CF81
+
+CF81:                                             ; preds = %CF77
+  store i8 %L17, i8* %PC59
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/set0-v8i16.ll b/test/CodeGen/PowerPC/set0-v8i16.ll
new file mode 100644
index 000000000000..13d51dfc8e4f
--- /dev/null
+++ b/test/CodeGen/PowerPC/set0-v8i16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD367951() {
+BB:
+  %Shuff = shufflevector <16 x i16> zeroinitializer, <16 x i16> zeroinitializer, <16 x i32> <i32 26, i32 28, i32 30, i32 undef, i32 2, i32 4, i32 undef, i32 undef, i32 10, i32 undef, i32 14, i32 16, i32 undef, i32 20, i32 undef, i32 24>
+  %Shuff7 = shufflevector <16 x i16> zeroinitializer, <16 x i16> %Shuff, <16 x i32> <i32 20, i32 undef, i32 24, i32 26, i32 28, i32 undef, i32 0, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18>
+  %Cmp11 = icmp ugt <16 x i16> %Shuff7, zeroinitializer
+  %E27 = extractelement <16 x i1> %Cmp11, i32 5
+  br label %CF76
+
+CF76:                                             ; preds = %CF80, %CF76, %BB
+  br i1 undef, label %CF76, label %CF80
+
+CF80:                                             ; preds = %CF76
+  %Sl37 = select i1 %E27, <16 x i16> undef, <16 x i16> %Shuff
+  br label %CF76
+}
diff --git a/test/CodeGen/PowerPC/sj-ctr-loop.ll b/test/CodeGen/PowerPC/sj-ctr-loop.ll
new file mode 100644
index 000000000000..1866bcd17420
--- /dev/null
+++ b/test/CodeGen/PowerPC/sj-ctr-loop.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.__jmp_buf_tag.1.15.17.21.25.49.53.55 = type { [64 x i64], i32, %struct.__sigset_t.0.14.16.20.24.48.52.54, [8 x i8] }
+%struct.__sigset_t.0.14.16.20.24.48.52.54 = type { [16 x i64] }
+
+@env_sigill = external global [1 x %struct.__jmp_buf_tag.1.15.17.21.25.49.53.55], align 16
+
+; CHECK-LABEL: @main
+; CHECK-NOT: mtctr
+
+; Function Attrs: nounwind
+define void @main() #0 {
+entry:
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %for.body.lr.ph, label %for.end.thread
+
+for.end.thread:                                   ; preds = %if.end
+  br label %return
+
+for.body.lr.ph:                                   ; preds = %if.end
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %cmp2 = icmp slt i32 %inc, undef
+  br i1 %cmp2, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond, %for.body.lr.ph
+  %i.032 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.cond ]
+  %0 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag.1.15.17.21.25.49.53.55]* @env_sigill to i8*))
+  %inc = add nsw i32 %i.032, 1
+  br i1 false, label %if.else, label %for.cond
+
+if.else:                                          ; preds = %for.body
+  unreachable
+
+for.end:                                          ; preds = %for.cond
+  unreachable
+
+return:                                           ; preds = %for.end.thread, %entry
+  ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @llvm.eh.sjlj.setjmp(i8*) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
index 7ea35dafc3fa..414640b2b7e3 100644
--- a/test/CodeGen/PowerPC/sjlj.ll
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -20,6 +20,7 @@ entry:
 ; CHECK: ld [[REG2:[0-9]+]], 8([[REG]])
 ; CHECK: ld 1, 16([[REG]])
 ; CHECK: mtctr [[REG2]]
+; CHECK: ld 30, 32([[REG]])
 ; CHECK: ld 2, 24([[REG]])
 ; CHECK: bctr
 
@@ -63,15 +64,16 @@ return:                                           ; preds = %if.end, %if.then
 ; CHECK: std
 ; Make sure that we're not saving VRSAVE on non-Darwin:
 ; CHECK-NOT: mfspr
-; CHECK: stfd
-; CHECK: stvx
 
-; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
-; CHECK: std 31, env_sigill@toc@l([[REG]])
-; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
-; CHECK: std [[REG]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
-; CHECK: std 1, 16([[REG]])
-; CHECK: std 2, 24([[REG]])
+; CHECK-DAG: stfd
+; CHECK-DAG: stvx
+
+; CHECK-DAG: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK-DAG: std 31, env_sigill@toc@l([[REG]])
+; CHECK-DAG: addi [[REGA:[0-9]+]], [[REG]], env_sigill@toc@l
+; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK-DAG: std 1, 16([[REGA]])
+; CHECK-DAG: std 2, 24([[REGA]])
 ; CHECK: bcl 20, 31, .LBB1_1
 ; CHECK: li 3, 1
 ; CHECK: #EH_SjLj_Setup	.LBB1_1
@@ -99,13 +101,59 @@ return:                                           ; preds = %if.end, %if.then
 ; CHECK-NOAV: blr
 }
 
+define signext i32 @main2() #0 {
+entry:
+  %a = alloca i8, align 64
+  call void @bar(i8* %a)
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
+  %1 = call i8* @llvm.stacksave()
+  store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+  %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  %tobool = icmp ne i32 %2, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+if.else:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32* %retval
+  ret i32 %3
+
+; CHECK: @main2
+
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: std 31, env_sigill@toc@l([[REG]])
+; CHECK: addi [[REGB:[0-9]+]], [[REG]], env_sigill@toc@l
+; CHECK-DAG: std [[REGB]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK-DAG: std 1, 16([[REGB]])
+; CHECK-DAG: std 2, 24([[REGB]])
+; CHECK-DAG: std 30, 32([[REGB]])
+; CHECK: bcl 20, 31,
+
+; CHECK: blr
+}
+
+declare void @bar(i8*) #3
+
 declare i8* @llvm.frameaddress(i32) #2
 
 declare i8* @llvm.stacksave() #3
 
 declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { noreturn nounwind }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
diff --git a/test/CodeGen/PowerPC/stack-protector.ll b/test/CodeGen/PowerPC/stack-protector.ll
index 810630f6978f..b81d94181cdf 100644
--- a/test/CodeGen/PowerPC/stack-protector.ll
+++ b/test/CodeGen/PowerPC/stack-protector.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_guard"
-; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_fail"
+; RUN: llc -march=ppc32 -mtriple=ppc32-unknown-linux < %s | FileCheck %s
+; CHECK: __stack_chk_guard
+; CHECK: __stack_chk_fail
 
 @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
new file mode 100644
index 000000000000..1c7a36aeeabf
--- /dev/null
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -0,0 +1,147 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -disable-fp-elim < %s | FileCheck -check-prefix=CHECK-FP %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s = type { i32, i32 }
+
+declare void @bar(i32*)
+
+define void @goo(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32], align 32
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 32
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx)
+  ret void
+}
+
+; CHECK-LABEL: @goo
+
+; CHECK-DAG: mflr 0
+; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-DAG: std 30, -16(1)
+; CHECK-DAG: mr 30, 1
+; CHECK-DAG: std 0, 16(1)
+; CHECK-DAG: subfic 0, [[REG]], -160
+; CHECK: stdux 1, 1, 0
+
+; CHECK: .cfi_offset r30, -16
+; CHECK: .cfi_offset lr, 16
+
+; CHECK: std 3, 48(30)
+
+; CHECK: ld 1, 0(1)
+; CHECK-DAG: ld 0, 16(1)
+; CHECK-DAG: ld 30, -16(1)
+; CHECK-DAG: mtlr 0
+; CHECK: blr
+
+; CHECK-FP-LABEL: @goo
+
+; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-FP-DAG: std 31, -8(1)
+; CHECK-FP-DAG: std 30, -16(1)
+; CHECK-FP-DAG: mr 30, 1
+; CHECK-FP-DAG: std 0, 16(1)
+; CHECK-FP-DAG: subfic 0, [[REG]], -160
+; CHECK-FP: stdux 1, 1, 0
+
+; CHECK-FP: .cfi_offset r31, -8
+; CHECK-FP: .cfi_offset r30, -16
+; CHECK-FP: .cfi_offset lr, 16
+
+; CHECK-FP: mr 31, 1
+
+; CHECK-FP: std 3, 48(30)
+
+; CHECK-FP: ld 1, 0(1)
+; CHECK-FP-DAG: ld 0, 16(1)
+; CHECK-FP-DAG: ld 31, -8(1)
+; CHECK-FP-DAG: ld 30, -16(1)
+; CHECK-FP-DAG: mtlr 0
+; CHECK-FP: blr
+
+; The large-frame-size case.
+define void @hoo(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [200000 x i32], align 32
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 32
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx)
+  ret void
+}
+
+; CHECK-LABEL: @hoo
+
+; CHECK-DAG: lis [[REG1:[0-9]+]], -13
+; CHECK-DAG: rldicl [[REG3:[0-9]+]], 1, 0, 59
+; CHECK-DAG: mflr 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51808
+; CHECK-DAG: std 30, -16(1)
+; CHECK-DAG: mr 30, 1
+; CHECK-DAG: std 0, 16(1)
+; CHECK-DAG: subfc 0, [[REG3]], [[REG2]]
+; CHECK: stdux 1, 1, 0
+
+; CHECK: blr
+
+; Make sure that the FP save area is still allocated correctly relative to
+; where r30 is saved.
+define void @loo(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32], align 32
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 32
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx)
+  call void asm sideeffect "", "~{f30}"() nounwind
+  ret void
+}
+
+; CHECK-LABEL: @loo
+
+; CHECK-DAG: mflr 0
+; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-DAG: std 30, -32(1)
+; CHECK-DAG: mr 30, 1
+; CHECK-DAG: std 0, 16(1)
+; CHECK-DAG: subfic 0, [[REG]], -192
+; CHECK: stdux 1, 1, 0
+
+; CHECK: stfd 30, -16(30)
+
+; CHECK: blr
+
+; CHECK-FP-LABEL: @loo
+
+; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-FP-DAG: std 31, -24(1)
+; CHECK-FP-DAG: std 30, -32(1)
+; CHECK-FP-DAG: mr 30, 1
+; CHECK-FP-DAG: std 0, 16(1)
+; CHECK-FP-DAG: subfic 0, [[REG]], -192
+; CHECK-FP: stdux 1, 1, 0
+
+; CHECK-FP: stfd 30, -16(30)
+
+; CHECK-FP: blr
diff --git a/test/CodeGen/PowerPC/std-unal-fi.ll b/test/CodeGen/PowerPC/std-unal-fi.ll
new file mode 100644
index 000000000000..8b9606e1624f
--- /dev/null
+++ b/test/CodeGen/PowerPC/std-unal-fi.ll
@@ -0,0 +1,119 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD4932(i8) {
+BB:
+  %A4 = alloca i8
+  %A = alloca <1 x ppc_fp128>
+  %Shuff = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 undef, i32 29, i32 31, i32 1, i32 3, i32 5>
+  br label %CF
+
+CF:                                               ; preds = %CF80, %CF, %BB
+  %L5 = load i64* undef
+  store i8 %0, i8* %A4
+  %Shuff7 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> %Shuff, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 undef, i32 20, i32 22, i32 24, i32 26>
+  %PC10 = bitcast i8* %A4 to ppc_fp128*
+  br i1 undef, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF81, %CF83, %CF77, %CF
+  br i1 undef, label %CF77, label %CF82
+
+CF82:                                             ; preds = %CF82, %CF77
+  %L19 = load i64* undef
+  store <1 x ppc_fp128> zeroinitializer, <1 x ppc_fp128>* %A
+  store i8 -65, i8* %A4
+  br i1 undef, label %CF82, label %CF83
+
+CF83:                                             ; preds = %CF82
+  %L34 = load i64* undef
+  br i1 undef, label %CF77, label %CF81
+
+CF81:                                             ; preds = %CF83
+  %Shuff43 = shufflevector <16 x i32> %Shuff7, <16 x i32> undef, <16 x i32> <i32 15, i32 17, i32 19, i32 21, i32 23, i32 undef, i32 undef, i32 29, i32 31, i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+  store ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128* %PC10
+  br i1 undef, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF81
+  br i1 undef, label %CF78, label %CF79
+
+CF79:                                             ; preds = %CF79, %CF78
+  br i1 undef, label %CF79, label %CF80
+
+CF80:                                             ; preds = %CF79
+  store i64 %L19, i64* undef
+  %Cmp75 = icmp uge i32 206779, undef
+  br i1 %Cmp75, label %CF, label %CF76
+
+CF76:                                             ; preds = %CF80
+  store i64 %L5, i64* undef
+  store i64 %L34, i64* undef
+  ret void
+}
+
+define void @autogen_SD88042(i8*, i32*, i8) {
+BB:
+  %A4 = alloca <2 x i1>
+  %A = alloca <16 x float>
+  %L = load i8* %0
+  %Sl = select i1 false, <16 x float>* %A, <16 x float>* %A
+  %PC = bitcast <2 x i1>* %A4 to i64*
+  %Sl27 = select i1 false, i8 undef, i8 %L
+  br label %CF
+
+CF:                                               ; preds = %CF78, %CF, %BB
+  %PC33 = bitcast i32* %1 to i32*
+  br i1 undef, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF80, %CF77, %CF
+  store <16 x float> zeroinitializer, <16 x float>* %Sl
+  %L58 = load i32* %PC33
+  store i8 0, i8* %0
+  br i1 undef, label %CF77, label %CF80
+
+CF80:                                             ; preds = %CF77
+  store i64 0, i64* %PC
+  %E67 = extractelement <8 x i1> zeroinitializer, i32 1
+  br i1 %E67, label %CF77, label %CF78
+
+CF78:                                             ; preds = %CF80
+  %Cmp73 = icmp eq i32 189865, %L58
+  br i1 %Cmp73, label %CF, label %CF76
+
+CF76:                                             ; preds = %CF78
+  store i8 %2, i8* %0
+  store i8 %Sl27, i8* %0
+  ret void
+}
+
+define void @autogen_SD37497(i8*, i32*, i64*) {
+BB:
+  %A1 = alloca i1
+  %I8 = insertelement <1 x i32> <i32 -1>, i32 454855, i32 0
+  %Cmp = icmp ult <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, undef
+  %L10 = load i64* %2
+  %E11 = extractelement <4 x i1> %Cmp, i32 2
+  br label %CF72
+
+CF72:                                             ; preds = %CF74, %CF72, %BB
+  store double 0xB47BB29A53790718, double* undef
+  %E18 = extractelement <1 x i32> <i32 -1>, i32 0
+  %FC22 = sitofp <1 x i32> %I8 to <1 x float>
+  br i1 undef, label %CF72, label %CF74
+
+CF74:                                             ; preds = %CF72
+  store i8 0, i8* %0
+  %PC = bitcast i1* %A1 to i64*
+  %L31 = load i64* %PC
+  store i64 477323, i64* %PC
+  %Sl37 = select i1 false, i32* undef, i32* %1
+  %Cmp38 = icmp ugt i1 undef, undef
+  br i1 %Cmp38, label %CF72, label %CF73
+
+CF73:                                             ; preds = %CF74
+  store i64 %L31, i64* %PC
+  %B55 = fdiv <1 x float> undef, %FC22
+  %Sl63 = select i1 %E11, i32* undef, i32* %Sl37
+  store i32 %E18, i32* %Sl63
+  store i64 %L10, i64* %PC
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll
index 538ed24fbc46..7b9e8f720a17 100644
--- a/test/CodeGen/PowerPC/store-update.ll
+++ b/test/CodeGen/PowerPC/store-update.ll
@@ -3,166 +3,166 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
-define i8* @stbu(i8* %base, i8 zeroext %val) nounwind {
+define i8* @test_stbu(i8* %base, i8 zeroext %val) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i8* %base, i64 16
   store i8 %val, i8* %arrayidx, align 1
   ret i8* %arrayidx
 }
-; CHECK: @stbu
+; CHECK: @test_stbu
 ; CHECK: %entry
 ; CHECK-NEXT: stbu
 ; CHECK-NEXT: blr
 
-define i8* @stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
+define i8* @test_stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i8* %base, i64 %offset
   store i8 %val, i8* %arrayidx, align 1
   ret i8* %arrayidx
 }
-; CHECK: @stbux
+; CHECK: @test_stbux
 ; CHECK: %entry
 ; CHECK-NEXT: stbux
 ; CHECK-NEXT: blr
 
-define i16* @sthu(i16* %base, i16 zeroext %val) nounwind {
+define i16* @test_sthu(i16* %base, i16 zeroext %val) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i16* %base, i64 16
   store i16 %val, i16* %arrayidx, align 2
   ret i16* %arrayidx
 }
-; CHECK: @sthu
+; CHECK: @test_sthu
 ; CHECK: %entry
 ; CHECK-NEXT: sthu
 ; CHECK-NEXT: blr
 
-define i16* @sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
+define i16* @test_sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i16* %base, i64 %offset
   store i16 %val, i16* %arrayidx, align 2
   ret i16* %arrayidx
 }
-; CHECK: @sthux
+; CHECK: @test_sthux
 ; CHECK: %entry
 ; CHECK-NEXT: sldi
 ; CHECK-NEXT: sthux
 ; CHECK-NEXT: blr
 
-define i32* @stwu(i32* %base, i32 zeroext %val) nounwind {
+define i32* @test_stwu(i32* %base, i32 zeroext %val) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i32* %base, i64 16
   store i32 %val, i32* %arrayidx, align 4
   ret i32* %arrayidx
 }
-; CHECK: @stwu
+; CHECK: @test_stwu
 ; CHECK: %entry
 ; CHECK-NEXT: stwu
 ; CHECK-NEXT: blr
 
-define i32* @stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
+define i32* @test_stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i32* %base, i64 %offset
   store i32 %val, i32* %arrayidx, align 4
   ret i32* %arrayidx
 }
-; CHECK: @stwux
+; CHECK: @test_stwux
 ; CHECK: %entry
 ; CHECK-NEXT: sldi
 ; CHECK-NEXT: stwux
 ; CHECK-NEXT: blr
 
-define i8* @stbu8(i8* %base, i64 %val) nounwind {
+define i8* @test_stbu8(i8* %base, i64 %val) nounwind {
 entry:
   %conv = trunc i64 %val to i8
   %arrayidx = getelementptr inbounds i8* %base, i64 16
   store i8 %conv, i8* %arrayidx, align 1
   ret i8* %arrayidx
 }
-; CHECK: @stbu
+; CHECK: @test_stbu8
 ; CHECK: %entry
 ; CHECK-NEXT: stbu
 ; CHECK-NEXT: blr
 
-define i8* @stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
+define i8* @test_stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
 entry:
   %conv = trunc i64 %val to i8
   %arrayidx = getelementptr inbounds i8* %base, i64 %offset
   store i8 %conv, i8* %arrayidx, align 1
   ret i8* %arrayidx
 }
-; CHECK: @stbux
+; CHECK: @test_stbux8
 ; CHECK: %entry
 ; CHECK-NEXT: stbux
 ; CHECK-NEXT: blr
 
-define i16* @sthu8(i16* %base, i64 %val) nounwind {
+define i16* @test_sthu8(i16* %base, i64 %val) nounwind {
 entry:
   %conv = trunc i64 %val to i16
   %arrayidx = getelementptr inbounds i16* %base, i64 16
   store i16 %conv, i16* %arrayidx, align 2
   ret i16* %arrayidx
 }
-; CHECK: @sthu
+; CHECK: @test_sthu
 ; CHECK: %entry
 ; CHECK-NEXT: sthu
 ; CHECK-NEXT: blr
 
-define i16* @sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
+define i16* @test_sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
 entry:
   %conv = trunc i64 %val to i16
   %arrayidx = getelementptr inbounds i16* %base, i64 %offset
   store i16 %conv, i16* %arrayidx, align 2
   ret i16* %arrayidx
 }
-; CHECK: @sthux
+; CHECK: @test_sthux
 ; CHECK: %entry
 ; CHECK-NEXT: sldi
 ; CHECK-NEXT: sthux
 ; CHECK-NEXT: blr
 
-define i32* @stwu8(i32* %base, i64 %val) nounwind {
+define i32* @test_stwu8(i32* %base, i64 %val) nounwind {
 entry:
   %conv = trunc i64 %val to i32
   %arrayidx = getelementptr inbounds i32* %base, i64 16
   store i32 %conv, i32* %arrayidx, align 4
   ret i32* %arrayidx
 }
-; CHECK: @stwu
+; CHECK: @test_stwu
 ; CHECK: %entry
 ; CHECK-NEXT: stwu
 ; CHECK-NEXT: blr
 
-define i32* @stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
+define i32* @test_stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
 entry:
   %conv = trunc i64 %val to i32
   %arrayidx = getelementptr inbounds i32* %base, i64 %offset
   store i32 %conv, i32* %arrayidx, align 4
   ret i32* %arrayidx
 }
-; CHECK: @stwux
+; CHECK: @test_stwux
 ; CHECK: %entry
 ; CHECK-NEXT: sldi
 ; CHECK-NEXT: stwux
 ; CHECK-NEXT: blr
 
-define i64* @stdu(i64* %base, i64 %val) nounwind {
+define i64* @test_stdu(i64* %base, i64 %val) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i64* %base, i64 16
   store i64 %val, i64* %arrayidx, align 8
   ret i64* %arrayidx
 }
-; CHECK: @stdu
+; CHECK: @test_stdu
 ; CHECK: %entry
 ; CHECK-NEXT: stdu
 ; CHECK-NEXT: blr
 
-define i64* @stdux(i64* %base, i64 %val, i64 %offset) nounwind {
+define i64* @test_stdux(i64* %base, i64 %val, i64 %offset) nounwind {
 entry:
   %arrayidx = getelementptr inbounds i64* %base, i64 %offset
   store i64 %val, i64* %arrayidx, align 8
   ret i64* %arrayidx
 }
-; CHECK: @stdux
+; CHECK: @test_stdux
 ; CHECK: %entry
 ; CHECK-NEXT: sldi
 ; CHECK-NEXT: stdux
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
index 2a17e740ea01..5b8dead16893 100644
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -fast-isel=false < %s | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index 54de6060d0f0..fb3bd7cd57e6 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -fast-isel=false < %s | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/sub-bv-types.ll b/test/CodeGen/PowerPC/sub-bv-types.ll
new file mode 100644
index 000000000000..c72fae6a2194
--- /dev/null
+++ b/test/CodeGen/PowerPC/sub-bv-types.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @autogen_SD10521() {
+BB:
+  %Shuff7 = shufflevector <16 x i16> zeroinitializer, <16 x i16> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 undef, i32 22, i32 undef, i32 26, i32 undef, i32 30>
+  br label %CF
+
+CF:                                               ; preds = %CF78, %CF, %BB
+  %I27 = insertelement <16 x i16> %Shuff7, i16 1360, i32 8
+  %B28 = sub <16 x i16> %I27, %Shuff7
+  br i1 undef, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF
+  %B42 = xor <16 x i16> %B28, %Shuff7
+  br label %CF
+}
diff --git a/test/CodeGen/PowerPC/subsumes-pred-regs.ll b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
new file mode 100644
index 000000000000..97ac788164ab
--- /dev/null
+++ b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mcpu=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define zeroext i1 @test1() unnamed_addr #0 align 2 {
+
+; CHECK-LABEL: @test1
+
+entry:
+  br i1 undef, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %entry
+  unreachable
+
+lor.end:                                          ; preds = %entry
+  br i1 undef, label %land.rhs, label %if.then
+
+if.then:                                          ; preds = %lor.end
+  br i1 undef, label %return, label %if.end.i24
+
+if.end.i24:                                       ; preds = %if.then
+  %0 = load i32* undef, align 4
+  %lnot.i.i16.i23 = icmp eq i32 %0, 0
+  br i1 %lnot.i.i16.i23, label %if.end7.i37, label %test.exit27.i34
+
+test.exit27.i34: ; preds = %if.end.i24
+  br i1 undef, label %return, label %if.end7.i37
+
+if.end7.i37:                                      ; preds = %test.exit27.i34, %if.end.i24
+  %tobool.i.i36 = icmp eq i8 undef, 0
+  br i1 %tobool.i.i36, label %return, label %if.then9.i39
+
+if.then9.i39:                                     ; preds = %if.end7.i37
+  br i1 %lnot.i.i16.i23, label %return, label %lor.rhs.i.i49
+
+; CHECK: .LBB0_7:
+; CHECK:	beq 1, .LBB0_10
+; CHECK:	beq 0, .LBB0_10
+; CHECK: .LBB0_9:
+
+lor.rhs.i.i49:                                    ; preds = %if.then9.i39
+  %cmp.i.i.i.i48 = icmp ne i64 undef, 0
+  br label %return
+
+land.rhs:                                         ; preds = %lor.end
+  br i1 undef, label %return, label %if.end.i
+
+if.end.i:                                         ; preds = %land.rhs
+  br i1 undef, label %return, label %if.then9.i
+
+if.then9.i:                                       ; preds = %if.end.i
+  br i1 undef, label %return, label %lor.rhs.i.i
+
+lor.rhs.i.i:                                      ; preds = %if.then9.i
+  %cmp.i.i.i.i = icmp ne i64 undef, 0
+  br label %return
+
+return:                                           ; preds = %lor.rhs.i.i, %if.then9.i, %if.end.i, %land.rhs, %lor.rhs.i.i49, %if.then9.i39, %if.end7.i37, %test.exit27.i34, %if.then
+  %retval.0 = phi i1 [ false, %if.then ], [ false, %test.exit27.i34 ], [ true, %if.end7.i37 ], [ true, %if.then9.i39 ], [ %cmp.i.i.i.i48, %lor.rhs.i.i49 ], [ false, %land.rhs ], [ true, %if.end.i ], [ true, %if.then9.i ], [ %cmp.i.i.i.i, %lor.rhs.i.i ]
+  ret i1 %retval.0
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
index 91ff5797389b..7c51b67aeecb 100644
--- a/test/CodeGen/PowerPC/svr4-redzone.ll
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -7,11 +7,11 @@ entry:
 	%0 = add i32 1, 2
 	ret void
 }
-; PPC32: regalloc:
+; PPC32-LABEL: regalloc:
 ; PPC32-NOT: stwu 1, -{{[0-9]+}}(1)
 ; PPC32: blr
 
-; PPC64: regalloc:
+; PPC64-LABEL: regalloc:
 ; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
 ; PPC64: blr
 
@@ -20,10 +20,10 @@ entry:
 	%0 = alloca i8, i32 4
 	ret void
 }
-; PPC32: smallstack:
+; PPC32-LABEL: smallstack:
 ; PPC32: stwu 1, -16(1)
 
-; PPC64: smallstack:
+; PPC64-LABEL: smallstack:
 ; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
 ; PPC64: blr
 
@@ -32,8 +32,8 @@ entry:
 	%0 = alloca i8, i32 230
 	ret void
 }
-; PPC32: bigstack:
+; PPC32-LABEL: bigstack:
 ; PPC32: stwu 1, -240(1)
 
-; PPC64: bigstack:
+; PPC64-LABEL: bigstack:
 ; PPC64: stdu 1, -352(1)
diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll
index 20d8fe46ea17..c2faf9062469 100644
--- a/test/CodeGen/PowerPC/tls-2.ll
+++ b/test/CodeGen/PowerPC/tls-2.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-unknown-freebsd10.0"
 
 @a = thread_local global i32 0, align 4
 
-;CHECK:          localexec:
+;CHECK-LABEL:          localexec:
 define i32 @localexec() nounwind {
 entry:
 ;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
diff --git a/test/CodeGen/PowerPC/tls-gd-obj.ll b/test/CodeGen/PowerPC/tls-gd-obj.ll
deleted file mode 100644
index ffc0db0d14cb..000000000000
--- a/test/CodeGen/PowerPC/tls-gd-obj.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: llvm-readobj -r | FileCheck %s
-
-; Test correct relocation generation for thread-local storage using
-; the general dynamic model and integrated assembly.
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-@a = thread_local global i32 0, align 4
-
-define signext i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @a, align 4
-  ret i32 %0
-}
-
-; Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
-; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
-; for the call to __tls_get_addr.
-;
-; CHECK: Relocations [
-; CHECK:   Section (1) .text {
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_HA a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_LO a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSGD          a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
deleted file mode 100644
index 0f7a35295234..000000000000
--- a/test/CodeGen/PowerPC/tls-ie-obj.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
-; RUN: llvm-readobj -r | FileCheck %s
-
-; Test correct relocation generation for thread-local storage
-; using the initial-exec model and integrated assembly.
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-@a = external thread_local global i32
-
-define signext i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @a, align 4
-  ret i32 %0
-}
-
-; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
-; accessing external variable a.
-;
-; CHECK: Relocations [
-; CHECK:   Section (1) .text {
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA    a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLS               a
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ld-obj.ll b/test/CodeGen/PowerPC/tls-ld-obj.ll
deleted file mode 100644
index 29ee87684552..000000000000
--- a/test/CodeGen/PowerPC/tls-ld-obj.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: llvm-readobj -r | FileCheck %s
-
-; Test correct relocation generation for thread-local storage using
-; the local dynamic model.
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-@a = hidden thread_local global i32 0, align 4
-
-define signext i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @a, align 4
-  ret i32 %0
-}
-
-; Verify generation of R_PPC64_GOT_TLSLD16_HA, R_PPC64_GOT_TLSLD16_LO,
-; R_PPC64_TLSLD, R_PPC64_DTPREL16_HA, and R_PPC64_DTPREL16_LO for
-; accessing external variable a, and R_PPC64_REL24 for the call to
-; __tls_get_addr.
-;
-; CHECK: Relocations [
-; CHECK:   Section (1) .text {
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_HA a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_LO a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSLD          a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_HA    a
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_LO    a
-; CHECK:   }
-; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 2daa60ab37f2..4e0a822399dd 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -5,8 +5,8 @@ target triple = "powerpc64-unknown-freebsd10.0"
 
 @a = thread_local global i32 0, align 4
 
-;OPT0:          localexec:
-;OPT1:          localexec:
+;OPT0-LABEL:          localexec:
+;OPT1-LABEL:          localexec:
 define i32 @localexec() nounwind {
 entry:
 ;OPT0:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
diff --git a/test/CodeGen/PowerPC/unal-altivec.ll b/test/CodeGen/PowerPC/unal-altivec.ll
new file mode 100644
index 000000000000..7f333a1c508b
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-altivec.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @foo(float* noalias nocapture %a, float* noalias nocapture %b) #0 {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float* %b, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %.sum11 = or i64 %index, 4
+  %2 = getelementptr float* %b, i64 %.sum11
+  %3 = bitcast float* %2 to <4 x float>*
+  %wide.load8 = load <4 x float>* %3, align 4
+  %4 = fadd <4 x float> %wide.load, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %5 = fadd <4 x float> %wide.load8, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %6 = getelementptr inbounds float* %a, i64 %index
+  %7 = bitcast float* %6 to <4 x float>*
+  store <4 x float> %4, <4 x float>* %7, align 4
+  %.sum12 = or i64 %index, 4
+  %8 = getelementptr float* %a, i64 %.sum12
+  %9 = bitcast float* %8 to <4 x float>*
+  store <4 x float> %5, <4 x float>* %9, align 4
+  %index.next = add i64 %index, 8
+  %10 = icmp eq i64 %index.next, 16000
+  br i1 %10, label %for.end, label %vector.body
+
+; CHECK: @foo
+; CHECK-DAG: li [[C0:[0-9]+]], 0
+; CHECK-DAG: li [[C16:[0-9]+]], 16
+; CHECK-DAG: li [[C31:[0-9]+]], 31
+; CHECK-DAG: lvx [[CNST:[0-9]+]],
+; CHECK: .LBB0_1:
+; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[C0]]
+; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], [[C16]]
+; CHECK-DAG: lvx [[LD3:[0-9]+]], [[B3]], [[C31]]
+; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[PC]]
+; CHECK-DAG: vperm [[R2:[0-9]+]], [[LD2]], [[LD3]], [[PC]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R2]], [[CNST]]
+; CHECK: blr
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/unal-altivec2.ll b/test/CodeGen/PowerPC/unal-altivec2.ll
new file mode 100644
index 000000000000..7464675470f9
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-altivec2.ll
@@ -0,0 +1,166 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(float* noalias nocapture %x, float* noalias nocapture readonly %y) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+; CHECK-LABEL: @foo
+; CHECK: lvsl
+; CHECK: blr
+  %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
+  %0 = getelementptr inbounds float* %y, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load)
+  %3 = getelementptr inbounds float* %x, i64 %index
+  %4 = bitcast float* %3 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %4, align 4
+  %index.next = add i64 %index, 4
+  %5 = getelementptr inbounds float* %y, i64 %index.next
+  %6 = bitcast float* %5 to <4 x float>*
+  %wide.load.1 = load <4 x float>* %6, align 4
+  %7 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.1)
+  %8 = getelementptr inbounds float* %x, i64 %index.next
+  %9 = bitcast float* %8 to <4 x float>*
+  store <4 x float> %7, <4 x float>* %9, align 4
+  %index.next.1 = add i64 %index.next, 4
+  %10 = getelementptr inbounds float* %y, i64 %index.next.1
+  %11 = bitcast float* %10 to <4 x float>*
+  %wide.load.2 = load <4 x float>* %11, align 4
+  %12 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.2)
+  %13 = getelementptr inbounds float* %x, i64 %index.next.1
+  %14 = bitcast float* %13 to <4 x float>*
+  store <4 x float> %12, <4 x float>* %14, align 4
+  %index.next.2 = add i64 %index.next.1, 4
+  %15 = getelementptr inbounds float* %y, i64 %index.next.2
+  %16 = bitcast float* %15 to <4 x float>*
+  %wide.load.3 = load <4 x float>* %16, align 4
+  %17 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.3)
+  %18 = getelementptr inbounds float* %x, i64 %index.next.2
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> %17, <4 x float>* %19, align 4
+  %index.next.3 = add i64 %index.next.2, 4
+  %20 = getelementptr inbounds float* %y, i64 %index.next.3
+  %21 = bitcast float* %20 to <4 x float>*
+  %wide.load.4 = load <4 x float>* %21, align 4
+  %22 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.4)
+  %23 = getelementptr inbounds float* %x, i64 %index.next.3
+  %24 = bitcast float* %23 to <4 x float>*
+  store <4 x float> %22, <4 x float>* %24, align 4
+  %index.next.4 = add i64 %index.next.3, 4
+  %25 = getelementptr inbounds float* %y, i64 %index.next.4
+  %26 = bitcast float* %25 to <4 x float>*
+  %wide.load.5 = load <4 x float>* %26, align 4
+  %27 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.5)
+  %28 = getelementptr inbounds float* %x, i64 %index.next.4
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> %27, <4 x float>* %29, align 4
+  %index.next.5 = add i64 %index.next.4, 4
+  %30 = getelementptr inbounds float* %y, i64 %index.next.5
+  %31 = bitcast float* %30 to <4 x float>*
+  %wide.load.6 = load <4 x float>* %31, align 4
+  %32 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.6)
+  %33 = getelementptr inbounds float* %x, i64 %index.next.5
+  %34 = bitcast float* %33 to <4 x float>*
+  store <4 x float> %32, <4 x float>* %34, align 4
+  %index.next.6 = add i64 %index.next.5, 4
+  %35 = getelementptr inbounds float* %y, i64 %index.next.6
+  %36 = bitcast float* %35 to <4 x float>*
+  %wide.load.7 = load <4 x float>* %36, align 4
+  %37 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.7)
+  %38 = getelementptr inbounds float* %x, i64 %index.next.6
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> %37, <4 x float>* %39, align 4
+  %index.next.7 = add i64 %index.next.6, 4
+  %40 = getelementptr inbounds float* %y, i64 %index.next.7
+  %41 = bitcast float* %40 to <4 x float>*
+  %wide.load.8 = load <4 x float>* %41, align 4
+  %42 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.8)
+  %43 = getelementptr inbounds float* %x, i64 %index.next.7
+  %44 = bitcast float* %43 to <4 x float>*
+  store <4 x float> %42, <4 x float>* %44, align 4
+  %index.next.8 = add i64 %index.next.7, 4
+  %45 = getelementptr inbounds float* %y, i64 %index.next.8
+  %46 = bitcast float* %45 to <4 x float>*
+  %wide.load.9 = load <4 x float>* %46, align 4
+  %47 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.9)
+  %48 = getelementptr inbounds float* %x, i64 %index.next.8
+  %49 = bitcast float* %48 to <4 x float>*
+  store <4 x float> %47, <4 x float>* %49, align 4
+  %index.next.9 = add i64 %index.next.8, 4
+  %50 = getelementptr inbounds float* %y, i64 %index.next.9
+  %51 = bitcast float* %50 to <4 x float>*
+  %wide.load.10 = load <4 x float>* %51, align 4
+  %52 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.10)
+  %53 = getelementptr inbounds float* %x, i64 %index.next.9
+  %54 = bitcast float* %53 to <4 x float>*
+  store <4 x float> %52, <4 x float>* %54, align 4
+  %index.next.10 = add i64 %index.next.9, 4
+  %55 = getelementptr inbounds float* %y, i64 %index.next.10
+  %56 = bitcast float* %55 to <4 x float>*
+  %wide.load.11 = load <4 x float>* %56, align 4
+  %57 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.11)
+  %58 = getelementptr inbounds float* %x, i64 %index.next.10
+  %59 = bitcast float* %58 to <4 x float>*
+  store <4 x float> %57, <4 x float>* %59, align 4
+  %index.next.11 = add i64 %index.next.10, 4
+  %60 = getelementptr inbounds float* %y, i64 %index.next.11
+  %61 = bitcast float* %60 to <4 x float>*
+  %wide.load.12 = load <4 x float>* %61, align 4
+  %62 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.12)
+  %63 = getelementptr inbounds float* %x, i64 %index.next.11
+  %64 = bitcast float* %63 to <4 x float>*
+  store <4 x float> %62, <4 x float>* %64, align 4
+  %index.next.12 = add i64 %index.next.11, 4
+  %65 = getelementptr inbounds float* %y, i64 %index.next.12
+  %66 = bitcast float* %65 to <4 x float>*
+  %wide.load.13 = load <4 x float>* %66, align 4
+  %67 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.13)
+  %68 = getelementptr inbounds float* %x, i64 %index.next.12
+  %69 = bitcast float* %68 to <4 x float>*
+  store <4 x float> %67, <4 x float>* %69, align 4
+  %index.next.13 = add i64 %index.next.12, 4
+  %70 = getelementptr inbounds float* %y, i64 %index.next.13
+  %71 = bitcast float* %70 to <4 x float>*
+  %wide.load.14 = load <4 x float>* %71, align 4
+  %72 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.14)
+  %73 = getelementptr inbounds float* %x, i64 %index.next.13
+  %74 = bitcast float* %73 to <4 x float>*
+  store <4 x float> %72, <4 x float>* %74, align 4
+  %index.next.14 = add i64 %index.next.13, 4
+  %75 = getelementptr inbounds float* %y, i64 %index.next.14
+  %76 = bitcast float* %75 to <4 x float>*
+  %wide.load.15 = load <4 x float>* %76, align 4
+  %77 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.15)
+  %78 = getelementptr inbounds float* %x, i64 %index.next.14
+  %79 = bitcast float* %78 to <4 x float>*
+  store <4 x float> %77, <4 x float>* %79, align 4
+  %index.next.15 = add i64 %index.next.14, 4
+  %80 = icmp eq i64 %index.next.15, 2048
+  br i1 %80, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm_cos_v4f32(<4 x float>) #1
+
+define <2 x double> @bar(double* %x) {
+entry:
+  %p = bitcast double* %x to <2 x double>*
+  %r = load <2 x double>* %p, align 8
+
+; CHECK-LABEL: @bar
+; CHECK-NOT: lvsl
+; CHECK: blr
+
+  ret <2 x double> %r
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
index 169bd787c0c1..9f29e31cb902 100644
--- a/test/CodeGen/PowerPC/unal4-std.ll
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -24,4 +24,4 @@ if.end210:                                        ; preds = %entry
 ; CHECK: stdx {{[0-9]+}}, 0,
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unwind-dw2-g.ll b/test/CodeGen/PowerPC/unwind-dw2-g.ll
new file mode 100644
index 000000000000..260d03664295
--- /dev/null
+++ b/test/CodeGen/PowerPC/unwind-dw2-g.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  call void @llvm.eh.unwind.init(), !dbg !9
+  ret void, !dbg !10
+}
+
+; CHECK: @foo
+; CHECK-NOT: .cfi_offset vrsave
+; CHECK: blr
+
+; Function Attrs: nounwind
+declare void @llvm.eh.unwind.init() #0
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/unwind-dw2.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"/tmp/unwind-dw2.c", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/unwind-dw2.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!9 = metadata !{i32 2, i32 0, metadata !4, null}
+!10 = metadata !{i32 3, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/PowerPC/unwind-dw2.ll b/test/CodeGen/PowerPC/unwind-dw2.ll
new file mode 100644
index 000000000000..e58edff65d5f
--- /dev/null
+++ b/test/CodeGen/PowerPC/unwind-dw2.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  call void @llvm.eh.unwind.init()
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.eh.unwind.init() #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/vaddsplat.ll b/test/CodeGen/PowerPC/vaddsplat.ll
index e65148aff03a..4236fabea0a6 100644
--- a/test/CodeGen/PowerPC/vaddsplat.ll
+++ b/test/CodeGen/PowerPC/vaddsplat.ll
@@ -16,7 +16,7 @@ define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
        ret void
 }
 
-; CHECK: test_v4i32_pos_even:
+; CHECK-LABEL: test_v4i32_pos_even:
 ; CHECK: vspltisw [[REG1:[0-9]+]], 9
 ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
 
@@ -27,7 +27,7 @@ define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
        ret void
 }
 
-; CHECK: test_v4i32_neg_even:
+; CHECK-LABEL: test_v4i32_neg_even:
 ; CHECK: vspltisw [[REG1:[0-9]+]], -14
 ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
 
@@ -38,7 +38,7 @@ define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
        ret void
 }
 
-; CHECK: test_v8i16_pos_even:
+; CHECK-LABEL: test_v8i16_pos_even:
 ; CHECK: vspltish [[REG1:[0-9]+]], 15
 ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
 
@@ -49,7 +49,7 @@ define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
        ret void
 }
 
-; CHECK: test_v8i16_neg_even:
+; CHECK-LABEL: test_v8i16_neg_even:
 ; CHECK: vspltish [[REG1:[0-9]+]], -16
 ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
 
@@ -60,7 +60,7 @@ define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
        ret void
 }
 
-; CHECK: test_v16i8_pos_even:
+; CHECK-LABEL: test_v16i8_pos_even:
 ; CHECK: vspltisb [[REG1:[0-9]+]], 8
 ; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
 
@@ -71,7 +71,7 @@ define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
        ret void
 }
 
-; CHECK: test_v16i8_neg_even:
+; CHECK-LABEL: test_v16i8_neg_even:
 ; CHECK: vspltisb [[REG1:[0-9]+]], -9
 ; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
 
@@ -82,7 +82,7 @@ define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
        ret void
 }
 
-; CHECK: test_v4i32_pos_odd:
+; CHECK-LABEL: test_v4i32_pos_odd:
 ; CHECK: vspltisw [[REG2:[0-9]+]], -16
 ; CHECK: vspltisw [[REG1:[0-9]+]], 11
 ; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
@@ -94,7 +94,7 @@ define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
        ret void
 }
 
-; CHECK: test_v4i32_neg_odd:
+; CHECK-LABEL: test_v4i32_neg_odd:
 ; CHECK: vspltisw [[REG2:[0-9]+]], -16
 ; CHECK: vspltisw [[REG1:[0-9]+]], -11
 ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
@@ -106,7 +106,7 @@ define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
        ret void
 }
 
-; CHECK: test_v8i16_pos_odd:
+; CHECK-LABEL: test_v8i16_pos_odd:
 ; CHECK: vspltish [[REG2:[0-9]+]], -16
 ; CHECK: vspltish [[REG1:[0-9]+]], 15
 ; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
@@ -118,7 +118,7 @@ define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
        ret void
 }
 
-; CHECK: test_v8i16_neg_odd:
+; CHECK-LABEL: test_v8i16_neg_odd:
 ; CHECK: vspltish [[REG2:[0-9]+]], -16
 ; CHECK: vspltish [[REG1:[0-9]+]], -15
 ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
@@ -130,7 +130,7 @@ define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
        ret void
 }
 
-; CHECK: test_v16i8_pos_odd:
+; CHECK-LABEL: test_v16i8_pos_odd:
 ; CHECK: vspltisb [[REG2:[0-9]+]], -16
 ; CHECK: vspltisb [[REG1:[0-9]+]], 1
 ; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
@@ -142,7 +142,7 @@ define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
        ret void
 }
 
-; CHECK: test_v16i8_neg_odd:
+; CHECK-LABEL: test_v16i8_neg_odd:
 ; CHECK: vspltisb [[REG2:[0-9]+]], -16
 ; CHECK: vspltisb [[REG1:[0-9]+]], -1
 ; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG2]]
diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll
index 90f0480d6ad2..dfd205634b1f 100644
--- a/test/CodeGen/PowerPC/varargs.ll
+++ b/test/CodeGen/PowerPC/varargs.ll
@@ -7,14 +7,14 @@ define i8* @test1(i8** %foo) nounwind {
   ret i8* %A
 }
 
-; P32: test1:
+; P32-LABEL: test1:
 ; P32: lwz r2, 0(r3)
 ; P32: addi r4, r2, 4
 ; P32: stw r4, 0(r3)
 ; P32: lwz r3, 0(r2)
 ; P32: blr 
 
-; P64: test1:
+; P64-LABEL: test1:
 ; P64: ld r2, 0(r3)
 ; P64: addi r4, r2, 8
 ; P64: std r4, 0(r3)
diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll
new file mode 100644
index 000000000000..3239cf6c06ab
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s2 = type { i64, <4 x float> }
+
+@ve = external global <4 x float>
+@n = external global i64
+
+; Function Attrs: nounwind
+define void @test1(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, <4 x float> inreg %vs.coerce) #0 {
+entry:
+  store <4 x float> %vs.coerce, <4 x float>* @ve, align 16
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK: stvx 2,
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define void @test2(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, %struct.s2* byval nocapture readonly %vs) #0 {
+entry:
+  %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
+  %0 = load i64* %m, align 8
+  store i64 %0, i64* @n, align 8
+  %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
+  %1 = load <4 x float>* %v, align 16
+  store <4 x float> %1, <4 x float>* @ve, align 16
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: ld {{[0-9]+}}, 112(1)
+; CHECK: li [[REG16:[0-9]+]], 16
+; CHECK: addi [[REGB:[0-9]+]], 1, 112
+; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define void @test3(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, %struct.s2* byval nocapture readonly %vs) #0 {
+entry:
+  %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
+  %0 = load i64* %m, align 8
+  store i64 %0, i64* @n, align 8
+  %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
+  %1 = load <4 x float>* %v, align 16
+  store <4 x float> %1, <4 x float>* @ve, align 16
+  ret void
+
+; CHECK-LABEL: @test3
+; CHECK: ld {{[0-9]+}}, 128(1)
+; CHECK: li [[REG16:[0-9]+]], 16
+; CHECK: addi [[REGB:[0-9]+]], 1, 128
+; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index eb41667610cd..83e0e0263061 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -14,7 +14,7 @@ define <2 x i8> @v2si8_cmp(<2 x i8> %x, <2 x i8> %y) nounwind readnone {
   %sext = sext <2 x i1> %cmp to <2 x i8>
   ret <2 x i8> %sext
 }
-; CHECK: v2si8_cmp:
+; CHECK-LABEL: v2si8_cmp:
 ; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
@@ -23,7 +23,7 @@ define <4 x i8> @v4si8_cmp(<4 x i8> %x, <4 x i8> %y) nounwind readnone {
   %sext = sext <4 x i1> %cmp to <4 x i8>
   ret <4 x i8> %sext
 }
-; CHECK: v4si8_cmp:
+; CHECK-LABEL: v4si8_cmp:
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
@@ -32,7 +32,7 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
   %sext = sext <8 x i1> %cmp to <8 x i8>
   ret <8 x i8> %sext
 }
-; CHECK: v8si8_cmp:
+; CHECK-LABEL: v8si8_cmp:
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
@@ -43,7 +43,7 @@ define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK: v16si8_cmp_eq:
+; CHECK-LABEL: v16si8_cmp_eq:
 ; CHECK: vcmpequb 2, 2, 3
 
 define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
@@ -52,7 +52,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK:     v16si8_cmp_ne:
+; CHECK-LABEL:     v16si8_cmp_ne:
 ; CHECK:     vcmpequb [[RET:[0-9]+]], 2, 3
 ; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
@@ -62,7 +62,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK:      v16si8_cmp_le:
+; CHECK-LABEL:      v16si8_cmp_le:
 ; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2
 ; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
@@ -73,7 +73,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK:      v16ui8_cmp_le:
+; CHECK-LABEL:      v16ui8_cmp_le:
 ; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2
 ; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
@@ -84,7 +84,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK: v16si8_cmp_lt:
+; CHECK-LABEL: v16si8_cmp_lt:
 ; CHECK: vcmpgtsb 2, 3, 2
 
 define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
@@ -93,7 +93,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK: v16ui8_cmp_lt:
+; CHECK-LABEL: v16ui8_cmp_lt:
 ; CHECK: vcmpgtub 2, 3, 2
 
 define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
@@ -102,7 +102,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK: v16si8_cmp_gt:
+; CHECK-LABEL: v16si8_cmp_gt:
 ; CHECK: vcmpgtsb 2, 2, 3
 
 define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
@@ -111,7 +111,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK: v16ui8_cmp_gt:
+; CHECK-LABEL: v16ui8_cmp_gt:
 ; CHECK: vcmpgtub 2, 2, 3
 
 define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
@@ -120,7 +120,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK:      v16si8_cmp_ge:
+; CHECK-LABEL:      v16si8_cmp_ge:
 ; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3
 ; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
@@ -131,7 +131,7 @@ entry:
   %sext = sext <16 x i1> %cmp to <16 x i8>
   ret <16 x i8> %sext
 }
-; CHECK:      v16ui8_cmp_ge:
+; CHECK-LABEL:      v16ui8_cmp_ge:
 ; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3
 ; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
@@ -142,7 +142,7 @@ define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone {
   %sext = sext <32 x i1> %cmp to <32 x i8>
   ret <32 x i8> %sext
 }
-; CHECK: v32si8_cmp:
+; CHECK-LABEL: v32si8_cmp:
 ; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
@@ -152,7 +152,7 @@ define <2 x i16> @v2si16_cmp(<2 x i16> %x, <2 x i16> %y) nounwind readnone {
   %sext = sext <2 x i1> %cmp to <2 x i16>
   ret <2 x i16> %sext
 }
-; CHECK: v2si16_cmp:
+; CHECK-LABEL: v2si16_cmp:
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
@@ -161,7 +161,7 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
   %sext = sext <4 x i1> %cmp to <4 x i16>
   ret <4 x i16> %sext
 }
-; CHECK: v4si16_cmp:
+; CHECK-LABEL: v4si16_cmp:
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
@@ -173,7 +173,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK: v8si16_cmp_eq:
+; CHECK-LABEL: v8si16_cmp_eq:
 ; CHECK: vcmpequh 2, 2, 3
 
 define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
@@ -182,7 +182,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK:      v8si16_cmp_ne:
+; CHECK-LABEL:      v8si16_cmp_ne:
 ; CHECK:      vcmpequh [[RET:[0-9]+]], 2, 3
 ; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
@@ -192,7 +192,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK:      v8si16_cmp_le:
+; CHECK-LABEL:      v8si16_cmp_le:
 ; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2
 ; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
@@ -203,7 +203,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK:      v8ui16_cmp_le:
+; CHECK-LABEL:      v8ui16_cmp_le:
 ; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2
 ; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
@@ -214,7 +214,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK: v8si16_cmp_lt:
+; CHECK-LABEL: v8si16_cmp_lt:
 ; CHECK: vcmpgtsh 2, 3, 2
 
 define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
@@ -223,7 +223,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK: v8ui16_cmp_lt:
+; CHECK-LABEL: v8ui16_cmp_lt:
 ; CHECK: vcmpgtuh 2, 3, 2
 
 define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
@@ -232,7 +232,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK: v8si16_cmp_gt:
+; CHECK-LABEL: v8si16_cmp_gt:
 ; CHECK: vcmpgtsh 2, 2, 3
 
 define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
@@ -241,7 +241,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK: v8ui16_cmp_gt:
+; CHECK-LABEL: v8ui16_cmp_gt:
 ; CHECK: vcmpgtuh 2, 2, 3
 
 define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
@@ -250,7 +250,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK:      v8si16_cmp_ge:
+; CHECK-LABEL:      v8si16_cmp_ge:
 ; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3
 ; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
@@ -261,7 +261,7 @@ entry:
   %sext = sext <8 x i1> %cmp to <8 x i16>
   ret <8 x i16> %sext
 }
-; CHECK:      v8ui16_cmp_ge:
+; CHECK-LABEL:      v8ui16_cmp_ge:
 ; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3
 ; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
@@ -272,7 +272,7 @@ define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone {
   %sext = sext <16 x i1> %cmp to <16 x i16>
   ret <16 x i16> %sext
 }
-; CHECK: v16si16_cmp:
+; CHECK-LABEL: v16si16_cmp:
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
@@ -282,7 +282,7 @@ define <32 x i16> @v32si16_cmp(<32 x i16> %x, <32 x i16> %y) nounwind readnone {
   %sext = sext <32 x i1> %cmp to <32 x i16>
   ret <32 x i16> %sext
 }
-; CHECK: v32si16_cmp:
+; CHECK-LABEL: v32si16_cmp:
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
@@ -294,7 +294,7 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone {
   %sext = sext <2 x i1> %cmp to <2 x i32>
   ret <2 x i32> %sext
 }
-; CHECK: v2si32_cmp:
+; CHECK-LABEL: v2si32_cmp:
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
@@ -306,7 +306,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK: v4si32_cmp_eq:
+; CHECK-LABEL: v4si32_cmp_eq:
 ; CHECK: vcmpequw 2, 2, 3
 
 define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
@@ -315,7 +315,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK:      v4si32_cmp_ne:
+; CHECK-LABEL:      v4si32_cmp_ne:
 ; CHECK:      vcmpequw [[RCMP:[0-9]+]], 2, 3
 ; CHECK-NEXT: vnor     2, [[RCMP]], [[RCMP]]
 
@@ -325,7 +325,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK:      v4si32_cmp_le:
+; CHECK-LABEL:      v4si32_cmp_le:
 ; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2
 ; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
@@ -336,7 +336,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK:      v4ui32_cmp_le:
+; CHECK-LABEL:      v4ui32_cmp_le:
 ; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2
 ; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
@@ -347,7 +347,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK: v4si32_cmp_lt:
+; CHECK-LABEL: v4si32_cmp_lt:
 ; CHECK: vcmpgtsw 2, 3, 2
 
 define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
@@ -356,7 +356,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK: v4ui32_cmp_lt:
+; CHECK-LABEL: v4ui32_cmp_lt:
 ; CHECK: vcmpgtuw 2, 3, 2
 
 define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
@@ -365,7 +365,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK: v4si32_cmp_gt:
+; CHECK-LABEL: v4si32_cmp_gt:
 ; CHECK: vcmpgtsw 2, 2, 3
 
 define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
@@ -374,7 +374,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK: v4ui32_cmp_gt:
+; CHECK-LABEL: v4ui32_cmp_gt:
 ; CHECK: vcmpgtuw 2, 2, 3
 
 define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
@@ -383,7 +383,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK:      v4si32_cmp_ge:
+; CHECK-LABEL:      v4si32_cmp_ge:
 ; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3
 ; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
@@ -394,7 +394,7 @@ entry:
   %sext = sext <4 x i1> %cmp to <4 x i32>
   ret <4 x i32> %sext
 }
-; CHECK:      v4ui32_cmp_ge:
+; CHECK-LABEL:      v4ui32_cmp_ge:
 ; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3
 ; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
@@ -405,7 +405,7 @@ define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone {
   %sext = sext <8 x i1> %cmp to <8 x i32>
   ret <8 x i32> %sext
 }
-; CHECK: v8si32_cmp:
+; CHECK-LABEL: v8si32_cmp:
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
@@ -415,7 +415,7 @@ define <16 x i32> @v16si32_cmp(<16 x i32> %x, <16 x i32> %y) nounwind readnone {
   %sext = sext <16 x i1> %cmp to <16 x i32>
   ret <16 x i32> %sext
 }
-; CHECK: v16si32_cmp:
+; CHECK-LABEL: v16si32_cmp:
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
@@ -427,7 +427,7 @@ define <32 x i32> @v32si32_cmp(<32 x i32> %x, <32 x i32> %y) nounwind readnone {
   %sext = sext <32 x i1> %cmp to <32 x i32>
   ret <32 x i32> %sext
 }
-; CHECK: v32si32_cmp:
+; CHECK-LABEL: v32si32_cmp:
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
@@ -445,7 +445,7 @@ entry:
   %0 = bitcast <2 x i32> %sext to <2 x float>
   ret <2 x float> %0
 }
-; CHECK: v2f32_cmp:
+; CHECK-LABEL: v2f32_cmp:
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
@@ -458,7 +458,7 @@ entry:
   %0 = bitcast <4 x i32> %sext to <4 x float>
   ret <4 x float> %0
 }
-; CHECK: v4f32_cmp_eq:
+; CHECK-LABEL: v4f32_cmp_eq:
 ; CHECK: vcmpeqfp 2, 2, 3
 
 define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone {
@@ -468,7 +468,7 @@ entry:
   %0 = bitcast <4 x i32> %sext to <4 x float>
   ret <4 x float> %0
 }
-; CHECK:      v4f32_cmp_ne:
+; CHECK-LABEL:      v4f32_cmp_ne:
 ; CHECK:      vcmpeqfp [[RET:[0-9]+]], 2, 3
 ; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
@@ -479,7 +479,7 @@ entry:
   %0 = bitcast <4 x i32> %sext to <4 x float>
   ret <4 x float> %0
 }
-; CHECK:      v4f32_cmp_le:
+; CHECK-LABEL:      v4f32_cmp_le:
 ; CHECK:      vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3
 ; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2
 ; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
@@ -491,7 +491,7 @@ entry:
   %0 = bitcast <4 x i32> %sext to <4 x float>
   ret <4 x float> %0
 }
-; CHECK: v4f32_cmp_lt:
+; CHECK-LABEL: v4f32_cmp_lt:
 ; CHECK: vcmpgtfp 2, 3, 2
 
 define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone {
@@ -501,7 +501,7 @@ entry:
   %0 = bitcast <4 x i32> %sext to <4 x float>
   ret <4 x float> %0
 }
-; CHECK: v4f32_cmp_ge:
+; CHECK-LABEL: v4f32_cmp_ge:
 ; CHECK: vcmpgefp 2, 2, 3
 
 define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone {
@@ -511,7 +511,7 @@ entry:
   %0 = bitcast <4 x i32> %sext to <4 x float>
   ret <4 x float> %0
 }
-; CHECK: v4f32_cmp_gt:
+; CHECK-LABEL: v4f32_cmp_gt:
 ; CHECK: vcmpgtfp 2, 2, 3
 
 
@@ -522,6 +522,6 @@ entry:
   %0 = bitcast <8 x i32> %sext to <8 x float>
   ret <8 x float> %0
 }
-; CHECK: v8f32_cmp:
+; CHECK-LABEL: v8f32_cmp:
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index e4799e50e6ad..f16b9f511f53 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -17,14 +17,14 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	store <4 x float> %tmp13, <4 x float>* %P3
 	ret void
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NOT: CPI
 }
 
 define <4 x i32> @test_30() nounwind {
 	ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
 
-; CHECK: test_30:
+; CHECK-LABEL: test_30:
 ; CHECK: vspltisw
 ; CHECK-NEXT: vadduwm
 ; CHECK-NEXT: blr
@@ -33,7 +33,7 @@ define <4 x i32> @test_30() nounwind {
 define <4 x i32> @test_29() nounwind {
 	ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
 
-; CHECK: test_29:
+; CHECK-LABEL: test_29:
 ; CHECK: vspltisw
 ; CHECK-NEXT: vspltisw
 ; CHECK-NEXT: vsubuwm
@@ -43,7 +43,7 @@ define <4 x i32> @test_29() nounwind {
 define <8 x i16> @test_n30() nounwind {
 	ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
 
-; CHECK: test_n30:
+; CHECK-LABEL: test_n30:
 ; CHECK: vspltish
 ; CHECK-NEXT: vadduhm
 ; CHECK-NEXT: blr
@@ -52,7 +52,7 @@ define <8 x i16> @test_n30() nounwind {
 define <16 x i8> @test_n104() nounwind {
 	ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
 
-; CHECK: test_n104:
+; CHECK-LABEL: test_n104:
 ; CHECK: vspltisb
 ; CHECK-NEXT: vslb
 ; CHECK-NEXT: blr
@@ -61,7 +61,7 @@ define <16 x i8> @test_n104() nounwind {
 define <4 x i32> @test_vsldoi() nounwind {
 	ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
 
-; CHECK: test_vsldoi:
+; CHECK-LABEL: test_vsldoi:
 ; CHECK: vspltisw
 ; CHECK-NEXT: vsldoi
 ; CHECK-NEXT: blr
@@ -70,7 +70,7 @@ define <4 x i32> @test_vsldoi() nounwind {
 define <8 x i16> @test_vsldoi_65023() nounwind {
 	ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >
 
-; CHECK: test_vsldoi_65023:
+; CHECK-LABEL: test_vsldoi_65023:
 ; CHECK: vspltish
 ; CHECK-NEXT: vsldoi
 ; CHECK-NEXT: blr
@@ -79,7 +79,7 @@ define <8 x i16> @test_vsldoi_65023() nounwind {
 define <4 x i32> @test_rol() nounwind {
 	ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
 
-; CHECK: test_rol:
+; CHECK-LABEL: test_rol:
 ; CHECK: vspltisw
 ; CHECK-NEXT: vrlw
 ; CHECK-NEXT: blr
diff --git a/test/CodeGen/PowerPC/vec_conv.ll b/test/CodeGen/PowerPC/vec_conv.ll
index a475e9499df2..a39ae9100355 100644
--- a/test/CodeGen/PowerPC/vec_conv.ll
+++ b/test/CodeGen/PowerPC/vec_conv.ll
@@ -17,7 +17,7 @@ entry:
   store <4 x i32> %1, <4 x i32>* %y, align 16
   ret void
 }
-;CHECK: v4f32_to_v4i32:
+;CHECK-LABEL: v4f32_to_v4i32:
 ;CHECK: vctsxs {{[0-9]+}}, {{[0-9]+}}, 0
 
 
@@ -29,7 +29,7 @@ entry:
   store <4 x i32> %1, <4 x i32>* %y, align 16
   ret void
 }
-;CHECK: v4f32_to_v4u32:
+;CHECK-LABEL: v4f32_to_v4u32:
 ;CHECK: vctuxs {{[0-9]+}}, {{[0-9]+}}, 0
 
 
@@ -41,7 +41,7 @@ entry:
   store <4 x float> %1, <4 x float>* %y, align 16
   ret void
 }
-;CHECK: v4i32_to_v4f32:
+;CHECK-LABEL: v4i32_to_v4f32:
 ;CHECK: vcfsx {{[0-9]+}}, {{[0-9]+}}, 0
 
 
@@ -53,5 +53,5 @@ entry:
   store <4 x float> %1, <4 x float>* %y, align 16
   ret void
 }
-;CHECK: v4u32_to_v4f32:
+;CHECK-LABEL: v4u32_to_v4f32:
 ;CHECK: vcfux {{[0-9]+}}, {{[0-9]+}}, 0
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
index 998645d90da6..8d16e15b8f44 100644
--- a/test/CodeGen/PowerPC/vec_extload.ll
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -5,7 +5,7 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
-; Altivec does not provides an sext intruction, so it expands
+; Altivec does not provides an sext instruction, so it expands
 ; a set of vector stores (stvx), bytes load/sign expand/store
 ; (lbz/stb), and a final vector load (lvx) to load the result
 ; extended vector.
@@ -14,7 +14,7 @@ define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
   %c = sext <16 x i4> %b to <16 x i8>
   ret <16 x i8> %c
 }
-; CHECK: v16si8_sext_in_reg:
+; CHECK-LABEL: v16si8_sext_in_reg:
 ; CHECK: vslb
 ; CHECK: vsrab
 ; CHECK: blr 
@@ -26,7 +26,7 @@ define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) {
   %c = zext <16 x i4> %b to <16 x i8>
   ret <16 x i8> %c
 }
-; CHECK:      v16si8_zext_in_reg:
+; CHECK-LABEL:      v16si8_zext_in_reg:
 ; CHECK:      vspltisb [[VMASK:[0-9]+]], 15
 ; CHECK-NEXT: vand 2, 2, [[VMASK]]
 
@@ -36,7 +36,7 @@ define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
   %c = sext <8 x i8> %b to <8 x i16>
   ret <8 x i16> %c
 }
-; CHECK: v8si16_sext_in_reg:
+; CHECK-LABEL: v8si16_sext_in_reg:
 ; CHECK: vslh
 ; CHECK: vsrah
 ; CHECK: blr 
@@ -48,7 +48,7 @@ define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) {
   %c = zext <8 x i8> %b to <8 x i16>
   ret <8 x i16> %c
 }
-; CHECK:      v8si16_zext_in_reg:
+; CHECK-LABEL:      v8si16_zext_in_reg:
 ; CHECK:      ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2)
 ; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]]
 ; CHECK-NEXT: vand 2, 2, [[VMASK]]
@@ -60,7 +60,7 @@ define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
   %c = sext <4 x i16> %b to <4 x i32>
   ret <4 x i32> %c
 }
-; CHECK: v4si32_sext_in_reg:
+; CHECK-LABEL: v4si32_sext_in_reg:
 ; CHECK: vslw
 ; CHECK: vsraw
 ; CHECK: blr 
@@ -71,7 +71,7 @@ define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
   %c = zext <4 x i16> %b to <4 x i32>
   ret <4 x i32> %c
 }
-; CHECK:      v4si32_zext_in_reg:
+; CHECK-LABEL:      v4si32_zext_in_reg:
 ; CHECK:      vspltisw [[VMASK:[0-9]+]], -16
 ; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]]
 ; CHECK-NEXT: vand 2, 2, [[VMASK]]
diff --git a/test/CodeGen/PowerPC/vec_fmuladd.ll b/test/CodeGen/PowerPC/vec_fmuladd.ll
new file mode 100644
index 000000000000..5683b607934c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_fmuladd.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float> %val, <2 x float>, <2 x float>)
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float> %val, <4 x float>, <4 x float>)
+declare <8 x float> @llvm.fmuladd.v8f32(<8 x float> %val, <8 x float>, <8 x float>)
+declare <2 x double> @llvm.fmuladd.v2f64(<2 x double> %val, <2 x double>, <2 x double>)
+declare <4 x double> @llvm.fmuladd.v4f64(<4 x double> %val, <4 x double>, <4 x double>)
+
+define <2 x float> @v2f32_fmuladd(<2 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <2 x float> @llvm.fmuladd.v2f32 (<2 x float> %x, <2 x float> %x, <2 x float> %x)
+  ret <2 x float> %fmuladd
+}
+; fmuladd (<2 x float>) is promoted to fmuladd (<4 x float>)
+; CHECK-LABEL: v2f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x float> @v4f32_fmuladd(<4 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <4 x float> @llvm.fmuladd.v4f32 (<4 x float> %x, <4 x float> %x, <4 x float> %x)
+  ret <4 x float> %fmuladd
+}
+; CHECK-LABEL: v4f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <8 x float> @v8f32_fmuladd(<8 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <8 x float> @llvm.fmuladd.v8f32 (<8 x float> %x, <8 x float> %x, <8 x float> %x)
+  ret <8 x float> %fmuladd
+}
+; CHECK-LABEL: v8f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <2 x double> @v2f64_fmuladd(<2 x double> %x) nounwind readnone {
+entry:
+  %fmuladd = call <2 x double> @llvm.fmuladd.v2f64 (<2 x double> %x, <2 x double> %x, <2 x double> %x)
+  ret <2 x double> %fmuladd
+}
+; CHECK-LABEL: v2f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x double> @v4f64_fmuladd(<4 x double> %x) nounwind readnone {
+entry:
+  %fmuladd = call <4 x double> @llvm.fmuladd.v4f64 (<4 x double> %x, <4 x double> %x, <4 x double> %x)
+  ret <4 x double> %fmuladd
+}
+; CHECK-LABEL: v4f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index 53bc75dd1078..c376751d8060 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -6,7 +6,7 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp3
 }
-; CHECK: test_v4i32:
+; CHECK-LABEL: test_v4i32:
 ; CHECK: vmsumuhm
 ; CHECK-NOT: mullw
 
@@ -16,7 +16,7 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]
 	ret <8 x i16> %tmp3
 }
-; CHECK: test_v8i16:
+; CHECK-LABEL: test_v8i16:
 ; CHECK: vmladduhm
 ; CHECK-NOT: mullw
 
@@ -26,7 +26,7 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]
 	ret <16 x i8> %tmp3
 }
-; CHECK: test_v16i8:
+; CHECK-LABEL: test_v16i8:
 ; CHECK: vmuloub
 ; CHECK: vmuleub
 ; CHECK-NOT: mullw
@@ -40,7 +40,7 @@ define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
 ; Check the creation of a negative zero float vector by creating a vector of
 ; all bits set and shifting it 31 bits to left, resulting a an vector of 
 ; 4 x 0x80000000 (-0.0 as float).
-; CHECK: test_float:
+; CHECK-LABEL: test_float:
 ; CHECK: vspltisw [[ZNEG:[0-9]+]], -1
 ; CHECK: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
 ; CHECK: vmaddfp
diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll
index 7c55638620a9..ace187b3e72b 100644
--- a/test/CodeGen/PowerPC/vec_rounding.ll
+++ b/test/CodeGen/PowerPC/vec_rounding.ll
@@ -12,7 +12,7 @@ define <2 x double> @floor_v2f64(<2 x double> %p)
   %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK: floor_v2f64:
+; CHECK-LABEL: floor_v2f64:
 ; CHECK: frim
 ; CHECK: frim
 
@@ -22,7 +22,7 @@ define <4 x double> @floor_v4f64(<4 x double> %p)
   %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK: floor_v4f64:
+; CHECK-LABEL: floor_v4f64:
 ; CHECK: frim
 ; CHECK: frim
 ; CHECK: frim
@@ -34,7 +34,7 @@ define <2 x double> @ceil_v2f64(<2 x double> %p)
   %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK: ceil_v2f64:
+; CHECK-LABEL: ceil_v2f64:
 ; CHECK: frip
 ; CHECK: frip
 
@@ -44,7 +44,7 @@ define <4 x double> @ceil_v4f64(<4 x double> %p)
   %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK: ceil_v4f64:
+; CHECK-LABEL: ceil_v4f64:
 ; CHECK: frip
 ; CHECK: frip
 ; CHECK: frip
@@ -56,7 +56,7 @@ define <2 x double> @trunc_v2f64(<2 x double> %p)
   %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK: trunc_v2f64:
+; CHECK-LABEL: trunc_v2f64:
 ; CHECK: friz
 ; CHECK: friz
 
@@ -66,7 +66,7 @@ define <4 x double> @trunc_v4f64(<4 x double> %p)
   %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK: trunc_v4f64:
+; CHECK-LABEL: trunc_v4f64:
 ; CHECK: friz
 ; CHECK: friz
 ; CHECK: friz
@@ -78,7 +78,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %p)
   %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
-; CHECK: nearbyint_v2f64:
+; CHECK-LABEL: nearbyint_v2f64:
 ; CHECK: bl nearbyint
 ; CHECK: bl nearbyint
 
@@ -88,7 +88,7 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %p)
   %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
   ret <4 x double> %t
 }
-; CHECK: nearbyint_v4f64:
+; CHECK-LABEL: nearbyint_v4f64:
 ; CHECK: bl nearbyint
 ; CHECK: bl nearbyint
 ; CHECK: bl nearbyint
@@ -101,7 +101,7 @@ define <4 x float> @floor_v4f32(<4 x float> %p)
   %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK: floor_v4f32:
+; CHECK-LABEL: floor_v4f32:
 ; CHECK: vrfim
 
 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
@@ -110,7 +110,7 @@ define <8 x float> @floor_v8f32(<8 x float> %p)
   %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK: floor_v8f32:
+; CHECK-LABEL: floor_v8f32:
 ; CHECK: vrfim
 ; CHECK: vrfim
 
@@ -120,7 +120,7 @@ define <4 x float> @ceil_v4f32(<4 x float> %p)
   %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK: ceil_v4f32:
+; CHECK-LABEL: ceil_v4f32:
 ; CHECK: vrfip
 
 declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
@@ -129,7 +129,7 @@ define <8 x float> @ceil_v8f32(<8 x float> %p)
   %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK: ceil_v8f32:
+; CHECK-LABEL: ceil_v8f32:
 ; CHECK: vrfip
 ; CHECK: vrfip
 
@@ -139,7 +139,7 @@ define <4 x float> @trunc_v4f32(<4 x float> %p)
   %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK: trunc_v4f32:
+; CHECK-LABEL: trunc_v4f32:
 ; CHECK: vrfiz
 
 declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
@@ -148,7 +148,7 @@ define <8 x float> @trunc_v8f32(<8 x float> %p)
   %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK: trunc_v8f32:
+; CHECK-LABEL: trunc_v8f32:
 ; CHECK: vrfiz
 ; CHECK: vrfiz
 
@@ -158,7 +158,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %p)
   %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
   ret <4 x float> %t
 }
-; CHECK: nearbyint_v4f32:
+; CHECK-LABEL: nearbyint_v4f32:
 ; CHECK: vrfin
 
 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
@@ -167,6 +167,6 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %p)
   %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
-; CHECK: nearbyint_v8f32:
+; CHECK-LABEL: nearbyint_v8f32:
 ; CHECK: vrfin
 ; CHECK: vrfin
diff --git a/test/CodeGen/PowerPC/vec_sqrt.ll b/test/CodeGen/PowerPC/vec_sqrt.ll
index 055da1a229d1..a85c3ffad155 100644
--- a/test/CodeGen/PowerPC/vec_sqrt.ll
+++ b/test/CodeGen/PowerPC/vec_sqrt.ll
@@ -18,7 +18,7 @@ entry:
   ret <2 x float> %sqrt
 }
 ; sqrt (<2 x float>) is promoted to sqrt (<4 x float>)
-; CHECK: v2f32_sqrt:
+; CHECK-LABEL: v2f32_sqrt:
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
@@ -29,7 +29,7 @@ entry:
   %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %x)
   ret <4 x float> %sqrt
 }
-; CHECK: v4f32_sqrt:
+; CHECK-LABEL: v4f32_sqrt:
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
@@ -40,7 +40,7 @@ entry:
   %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %x)
   ret <8 x float> %sqrt
 }
-; CHECK: v8f32_sqrt:
+; CHECK-LABEL: v8f32_sqrt:
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}}
@@ -55,7 +55,7 @@ entry:
   %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %x)
   ret <2 x double> %sqrt
 }
-; CHECK: v2f64_sqrt:
+; CHECK-LABEL: v2f64_sqrt:
 ; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
 
@@ -64,7 +64,7 @@ entry:
   %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %x)
   ret <4 x double> %sqrt
 }
-; CHECK: v4f64_sqrt:
+; CHECK-LABEL: v4f64_sqrt:
 ; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index e4c3b0db1726..859a85a14101 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -59,6 +59,14 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
         ret void
 }
 
+define void @test_rem(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = frem %f8 %p, %q            ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
 ;;; TEST VECTOR CONSTRUCTS
 
 define void @test_cst(%f4* %P, %f4* %S) {
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
index 9fb3d03477c9..c3d1bf8f1ead 100644
--- a/test/CodeGen/PowerPC/vrspill.ll
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -1,5 +1,5 @@
-; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs  < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs -fast-isel=false < %s | FileCheck %s
 
 ; This verifies that we generate correct spill/reload code for vector regs.
 
@@ -13,7 +13,6 @@ entry:
   ret void
 }
 
-; CHECK: stvx 2, 1,
-; CHECK: lvx 2, 1,
+; CHECK: stvx 2,
 
 declare void @foo(i32*)
diff --git a/test/CodeGen/PowerPC/zero-not-run.ll b/test/CodeGen/PowerPC/zero-not-run.ll
new file mode 100644
index 000000000000..9df0d6e004ef
--- /dev/null
+++ b/test/CodeGen/PowerPC/zero-not-run.ll
@@ -0,0 +1,27 @@
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define internal i32* @func_65(i32* %p_66) #0 {
+entry:
+  br i1 undef, label %for.body, label %for.end731
+
+for.body:                                         ; preds = %entry
+  %0 = load i32* undef, align 4
+  %or31 = or i32 %0, 319143828
+  store i32 %or31, i32* undef, align 4
+  %cmp32 = icmp eq i32 319143828, %or31
+  %conv33 = zext i1 %cmp32 to i32
+  %conv34 = sext i32 %conv33 to i64
+  %call35 = call i64 @safe_mod_func_uint64_t_u_u(i64 %conv34, i64 -10)
+  unreachable
+
+for.end731:                                       ; preds = %entry
+  ret i32* undef
+}
+
+; Function Attrs: nounwind
+declare i64 @safe_mod_func_uint64_t_u_u(i64, i64) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
index 114f9e74474f..3c4fcf740c39 100644
--- a/test/CodeGen/R600/128bit-kernel-args.ll
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -1,16 +1,26 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; CHECK: @v4i32_kernel_arg
-; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
+; R600-CHECK: @v4i32_kernel_arg
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
+; SI-CHECK: @v4i32_kernel_arg
+; SI-CHECK: BUFFER_STORE_DWORDX4
 define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32>  %in) {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(1)* %out
   ret void
 }
 
-; CHECK: @v4f32_kernel_arg
-; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+; R600-CHECK: @v4f32_kernel_arg
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
+; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
+; SI-CHECK: @v4f32_kernel_arg
+; SI-CHECK: BUFFER_STORE_DWORDX4
 define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
 entry:
   store <4 x float> %in, <4 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
new file mode 100644
index 000000000000..7a126878bef4
--- /dev/null
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
+; the global address space(1) uses 64-bit pointers.  These tests check to make sure
+; the correct pointer size is used for the local address space.
+
+; The e{{32|64}} suffix on the instructions refers to the encoding size and not
+; the size of the operands.  The operand size is denoted in the instruction name.
+; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
+; instructions with B64, U64, and I64 take 64-bit operands.
+
+; CHECK-LABEL: @local_address_load
+; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
+; CHECK: DS_READ_B32 [[PTR]]
+define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = load i32 addrspace(3)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @local_address_gep
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: DS_READ_B32 [[VPTR]]
+define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
+entry:
+  %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
+  %1 = load i32 addrspace(3)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @local_address_gep_const_offset
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
+; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; CHECK: DS_READ_B32 [[VPTR]]
+define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = getelementptr i32 addrspace(3)* %in, i32 1
+  %1 = load i32 addrspace(3)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @null_32bit_lds_ptr:
+; CHECK: V_CMP_NE_I32
+; CHECK-NOT: V_CMP_NE_I32
+; CHECK: V_CNDMASK_B32
+define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
+  %cmp = icmp ne i32 addrspace(3)* %lds, null
+  %x = select i1 %cmp, i32 123, i32 456
+  store i32 %x, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @mul_32bit_ptr:
+; CHECK: V_MUL_LO_I32
+; CHECK-NEXT: V_ADD_I32_e32
+; CHECK-NEXT: DS_READ_B32
+define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
+  %ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
+  %val = load float addrspace(3)* %ptr
+  store float %val, float addrspace(1)* %out
+  ret void
+}
+
+@g_lds = addrspace(3) global float zeroinitializer, align 4
+
+; CHECK-LABEL: @infer_ptr_alignment_global_offset:
+; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
+; CHECK: DS_READ_B32 v{{[0-9]+}}, 0, [[REG]]
+define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
+  %val = load float addrspace(3)* @g_lds
+  store float %val, float addrspace(1)* %out
+  ret void
+}
+
+
+@ptr = addrspace(3) global i32 addrspace(3)* null
+@dst = addrspace(3) global [16384 x i32] zeroinitializer
+
+; SI-LABEL: @global_ptr:
+; SI-CHECK: DS_WRITE_B32
+define void @global_ptr() nounwind {
+  store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
+  ret void
+}
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
new file mode 100644
index 000000000000..0d6bfb144d3d
--- /dev/null
+++ b/test/CodeGen/R600/64bit-kernel-args.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; SI-CHECK: @f64_kernel_arg
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 9
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 11
+; SI-CHECK: BUFFER_STORE_DWORDX2
+define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
+entry:
+  store double %in, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index 185998b26095..3d5506bfa5d2 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -1,14 +1,55 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK-LABEL: @test1:
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+;SI-CHECK-LABEL: @test1:
+;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI-CHECK-NOT: [[REG]]
+;SI-CHECK: BUFFER_STORE_DWORD [[REG]],
+define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = add i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @test2:
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @test2:
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32> addrspace(1)* %b_ptr
+  %result = add <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @test4:
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @test4:
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32> addrspace(1)* %b_ptr
   %result = add <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
new file mode 100644
index 000000000000..303a1cb03914
--- /dev/null
+++ b/test/CodeGen/R600/add_i64.ll
@@ -0,0 +1,59 @@
+; XFAIL: *
+; This will fail until i64 add is enabled
+
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+
+declare i32 @llvm.SI.tid() readnone
+
+; SI-LABEL: @test_i64_vreg:
+define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.SI.tid() readnone
+  %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
+  %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
+  %a = load i64 addrspace(1)* %a_ptr
+  %b = load i64 addrspace(1)* %b_ptr
+  %result = add i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; Check that the SGPR add operand is correctly moved to a VGPR.
+; SI-LABEL: @sgpr_operand:
+define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
+  %foo = load i64 addrspace(1)* %in, align 8
+  %result = add i64 %foo, %a
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; Swap the arguments. Check that the SGPR -> VGPR copy works with the
+; SGPR as other operand.
+;
+; SI-LABEL: @sgpr_operand_reversed:
+define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
+  %foo = load i64 addrspace(1)* %in, align 8
+  %result = add i64 %a, %foo
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+
+; SI-LABEL: @test_v2i64_sreg:
+define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
+  %result = add <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: @test_v2i64_vreg:
+define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.SI.tid() readnone
+  %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
+  %a = load <2 x i64> addrspace(1)* %a_ptr
+  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %result = add <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
new file mode 100644
index 000000000000..1fc616a4ed42
--- /dev/null
+++ b/test/CodeGen/R600/address-space.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+
+; Test that codegenprepare understands address space sizes
+
+%struct.foo = type { [3 x float], [3 x float] }
+
+; CHECK-LABEL: @do_as_ptr_calcs:
+; CHECK: S_ADD_I32 {{s[0-9]+}},
+; CHECK: S_ADD_I32 [[SREG1:s[0-9]+]],
+; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
+; CHECK: DS_READ_B32 [[VREG1]],
+define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
+entry:
+  %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
+  %y = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
+  br label %bb32
+
+bb32:
+  %a = load float addrspace(3)* %x, align 4
+  %b = load float addrspace(3)* %y, align 4
+  %cmp = fcmp one float %a, %b
+  br i1 %cmp, label %bb34, label %bb33
+
+bb33:
+  unreachable
+
+bb34:
+  unreachable
+}
+
+
diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll
deleted file mode 100644
index 48496f6febf6..000000000000
--- a/test/CodeGen/R600/alu-split.ll
+++ /dev/null
@@ -1,851 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: ALU
-;CHECK: ALU
-;CHECK: ALU
-;CHECK-NOT: ALU
-;CHECK: CF_END
-
-define void @main() #0 {
-main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
-  %5 = extractelement <4 x float> %4, i32 0
-  %6 = fcmp une float 0x4016F2B020000000, %5
-  %7 = select i1 %6, float 1.000000e+00, float 0.000000e+00
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
-  %9 = extractelement <4 x float> %8, i32 1
-  %10 = fcmp une float 0x401FDCC640000000, %9
-  %11 = select i1 %10, float 1.000000e+00, float 0.000000e+00
-  %12 = fsub float -0.000000e+00, %7
-  %13 = fptosi float %12 to i32
-  %14 = fsub float -0.000000e+00, %11
-  %15 = fptosi float %14 to i32
-  %16 = bitcast i32 %13 to float
-  %17 = bitcast i32 %15 to float
-  %18 = bitcast float %16 to i32
-  %19 = bitcast float %17 to i32
-  %20 = or i32 %18, %19
-  %21 = bitcast i32 %20 to float
-  %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
-  %23 = extractelement <4 x float> %22, i32 0
-  %24 = fcmp une float 0xC00574BC60000000, %23
-  %25 = select i1 %24, float 1.000000e+00, float 0.000000e+00
-  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
-  %27 = extractelement <4 x float> %26, i32 1
-  %28 = fcmp une float 0x40210068E0000000, %27
-  %29 = select i1 %28, float 1.000000e+00, float 0.000000e+00
-  %30 = fsub float -0.000000e+00, %25
-  %31 = fptosi float %30 to i32
-  %32 = fsub float -0.000000e+00, %29
-  %33 = fptosi float %32 to i32
-  %34 = bitcast i32 %31 to float
-  %35 = bitcast i32 %33 to float
-  %36 = bitcast float %34 to i32
-  %37 = bitcast float %35 to i32
-  %38 = or i32 %36, %37
-  %39 = bitcast i32 %38 to float
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
-  %41 = extractelement <4 x float> %40, i32 0
-  %42 = fcmp une float 0xBFC9A6B500000000, %41
-  %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
-  %45 = extractelement <4 x float> %44, i32 1
-  %46 = fcmp une float 0xC0119BDA60000000, %45
-  %47 = select i1 %46, float 1.000000e+00, float 0.000000e+00
-  %48 = fsub float -0.000000e+00, %43
-  %49 = fptosi float %48 to i32
-  %50 = fsub float -0.000000e+00, %47
-  %51 = fptosi float %50 to i32
-  %52 = bitcast i32 %49 to float
-  %53 = bitcast i32 %51 to float
-  %54 = bitcast float %52 to i32
-  %55 = bitcast float %53 to i32
-  %56 = or i32 %54, %55
-  %57 = bitcast i32 %56 to float
-  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
-  %59 = extractelement <4 x float> %58, i32 0
-  %60 = fcmp une float 0xC02085D640000000, %59
-  %61 = select i1 %60, float 1.000000e+00, float 0.000000e+00
-  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
-  %63 = extractelement <4 x float> %62, i32 1
-  %64 = fcmp une float 0xBFD7C1BDA0000000, %63
-  %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00
-  %66 = fsub float -0.000000e+00, %61
-  %67 = fptosi float %66 to i32
-  %68 = fsub float -0.000000e+00, %65
-  %69 = fptosi float %68 to i32
-  %70 = bitcast i32 %67 to float
-  %71 = bitcast i32 %69 to float
-  %72 = bitcast float %70 to i32
-  %73 = bitcast float %71 to i32
-  %74 = or i32 %72, %73
-  %75 = bitcast i32 %74 to float
-  %76 = insertelement <4 x float> undef, float %21, i32 0
-  %77 = insertelement <4 x float> %76, float %39, i32 1
-  %78 = insertelement <4 x float> %77, float %57, i32 2
-  %79 = insertelement <4 x float> %78, float %75, i32 3
-  %80 = insertelement <4 x float> undef, float %21, i32 0
-  %81 = insertelement <4 x float> %80, float %39, i32 1
-  %82 = insertelement <4 x float> %81, float %57, i32 2
-  %83 = insertelement <4 x float> %82, float %75, i32 3
-  %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83)
-  %85 = bitcast float %84 to i32
-  %86 = icmp ne i32 %85, 0
-  %87 = sext i1 %86 to i32
-  %88 = bitcast i32 %87 to float
-  %89 = bitcast float %88 to i32
-  %90 = xor i32 %89, -1
-  %91 = bitcast i32 %90 to float
-  %92 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
-  %93 = extractelement <4 x float> %92, i32 0
-  %94 = fcmp une float 0x401FDCC640000000, %93
-  %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
-  %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
-  %97 = extractelement <4 x float> %96, i32 1
-  %98 = fcmp une float 0xC00574BC60000000, %97
-  %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
-  %100 = fsub float -0.000000e+00, %95
-  %101 = fptosi float %100 to i32
-  %102 = fsub float -0.000000e+00, %99
-  %103 = fptosi float %102 to i32
-  %104 = bitcast i32 %101 to float
-  %105 = bitcast i32 %103 to float
-  %106 = bitcast float %104 to i32
-  %107 = bitcast float %105 to i32
-  %108 = or i32 %106, %107
-  %109 = bitcast i32 %108 to float
-  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
-  %111 = extractelement <4 x float> %110, i32 0
-  %112 = fcmp une float 0x40210068E0000000, %111
-  %113 = select i1 %112, float 1.000000e+00, float 0.000000e+00
-  %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
-  %115 = extractelement <4 x float> %114, i32 1
-  %116 = fcmp une float 0xBFC9A6B500000000, %115
-  %117 = select i1 %116, float 1.000000e+00, float 0.000000e+00
-  %118 = fsub float -0.000000e+00, %113
-  %119 = fptosi float %118 to i32
-  %120 = fsub float -0.000000e+00, %117
-  %121 = fptosi float %120 to i32
-  %122 = bitcast i32 %119 to float
-  %123 = bitcast i32 %121 to float
-  %124 = bitcast float %122 to i32
-  %125 = bitcast float %123 to i32
-  %126 = or i32 %124, %125
-  %127 = bitcast i32 %126 to float
-  %128 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
-  %129 = extractelement <4 x float> %128, i32 0
-  %130 = fcmp une float 0xC0119BDA60000000, %129
-  %131 = select i1 %130, float 1.000000e+00, float 0.000000e+00
-  %132 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
-  %133 = extractelement <4 x float> %132, i32 1
-  %134 = fcmp une float 0xC02085D640000000, %133
-  %135 = select i1 %134, float 1.000000e+00, float 0.000000e+00
-  %136 = fsub float -0.000000e+00, %131
-  %137 = fptosi float %136 to i32
-  %138 = fsub float -0.000000e+00, %135
-  %139 = fptosi float %138 to i32
-  %140 = bitcast i32 %137 to float
-  %141 = bitcast i32 %139 to float
-  %142 = bitcast float %140 to i32
-  %143 = bitcast float %141 to i32
-  %144 = or i32 %142, %143
-  %145 = bitcast i32 %144 to float
-  %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
-  %147 = extractelement <4 x float> %146, i32 0
-  %148 = fcmp une float 0xBFD7C1BDA0000000, %147
-  %149 = select i1 %148, float 1.000000e+00, float 0.000000e+00
-  %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
-  %151 = extractelement <4 x float> %150, i32 1
-  %152 = fcmp une float 0x401E1D7DC0000000, %151
-  %153 = select i1 %152, float 1.000000e+00, float 0.000000e+00
-  %154 = fsub float -0.000000e+00, %149
-  %155 = fptosi float %154 to i32
-  %156 = fsub float -0.000000e+00, %153
-  %157 = fptosi float %156 to i32
-  %158 = bitcast i32 %155 to float
-  %159 = bitcast i32 %157 to float
-  %160 = bitcast float %158 to i32
-  %161 = bitcast float %159 to i32
-  %162 = or i32 %160, %161
-  %163 = bitcast i32 %162 to float
-  %164 = insertelement <4 x float> undef, float %109, i32 0
-  %165 = insertelement <4 x float> %164, float %127, i32 1
-  %166 = insertelement <4 x float> %165, float %145, i32 2
-  %167 = insertelement <4 x float> %166, float %163, i32 3
-  %168 = insertelement <4 x float> undef, float %109, i32 0
-  %169 = insertelement <4 x float> %168, float %127, i32 1
-  %170 = insertelement <4 x float> %169, float %145, i32 2
-  %171 = insertelement <4 x float> %170, float %163, i32 3
-  %172 = call float @llvm.AMDGPU.dp4(<4 x float> %167, <4 x float> %171)
-  %173 = bitcast float %172 to i32
-  %174 = icmp ne i32 %173, 0
-  %175 = sext i1 %174 to i32
-  %176 = bitcast i32 %175 to float
-  %177 = bitcast float %176 to i32
-  %178 = xor i32 %177, -1
-  %179 = bitcast i32 %178 to float
-  %180 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
-  %181 = extractelement <4 x float> %180, i32 0
-  %182 = fcmp une float 0x401FDCC640000000, %181
-  %183 = select i1 %182, float 1.000000e+00, float 0.000000e+00
-  %184 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
-  %185 = extractelement <4 x float> %184, i32 1
-  %186 = fcmp une float 0xC00574BC60000000, %185
-  %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00
-  %188 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
-  %189 = extractelement <4 x float> %188, i32 2
-  %190 = fcmp une float 0x40210068E0000000, %189
-  %191 = select i1 %190, float 1.000000e+00, float 0.000000e+00
-  %192 = fsub float -0.000000e+00, %183
-  %193 = fptosi float %192 to i32
-  %194 = fsub float -0.000000e+00, %187
-  %195 = fptosi float %194 to i32
-  %196 = fsub float -0.000000e+00, %191
-  %197 = fptosi float %196 to i32
-  %198 = bitcast i32 %193 to float
-  %199 = bitcast i32 %195 to float
-  %200 = bitcast i32 %197 to float
-  %201 = bitcast float %199 to i32
-  %202 = bitcast float %200 to i32
-  %203 = or i32 %201, %202
-  %204 = bitcast i32 %203 to float
-  %205 = bitcast float %198 to i32
-  %206 = bitcast float %204 to i32
-  %207 = or i32 %205, %206
-  %208 = bitcast i32 %207 to float
-  %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %210 = extractelement <4 x float> %209, i32 0
-  %211 = fcmp une float 0xBFC9A6B500000000, %210
-  %212 = select i1 %211, float 1.000000e+00, float 0.000000e+00
-  %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %214 = extractelement <4 x float> %213, i32 1
-  %215 = fcmp une float 0xC0119BDA60000000, %214
-  %216 = select i1 %215, float 1.000000e+00, float 0.000000e+00
-  %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %218 = extractelement <4 x float> %217, i32 2
-  %219 = fcmp une float 0xC02085D640000000, %218
-  %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00
-  %221 = fsub float -0.000000e+00, %212
-  %222 = fptosi float %221 to i32
-  %223 = fsub float -0.000000e+00, %216
-  %224 = fptosi float %223 to i32
-  %225 = fsub float -0.000000e+00, %220
-  %226 = fptosi float %225 to i32
-  %227 = bitcast i32 %222 to float
-  %228 = bitcast i32 %224 to float
-  %229 = bitcast i32 %226 to float
-  %230 = bitcast float %228 to i32
-  %231 = bitcast float %229 to i32
-  %232 = or i32 %230, %231
-  %233 = bitcast i32 %232 to float
-  %234 = bitcast float %227 to i32
-  %235 = bitcast float %233 to i32
-  %236 = or i32 %234, %235
-  %237 = bitcast i32 %236 to float
-  %238 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
-  %239 = extractelement <4 x float> %238, i32 0
-  %240 = fcmp une float 0xBFD7C1BDA0000000, %239
-  %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
-  %242 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
-  %243 = extractelement <4 x float> %242, i32 1
-  %244 = fcmp une float 0x401E1D7DC0000000, %243
-  %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00
-  %246 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
-  %247 = extractelement <4 x float> %246, i32 2
-  %248 = fcmp une float 0xC019893740000000, %247
-  %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00
-  %250 = fsub float -0.000000e+00, %241
-  %251 = fptosi float %250 to i32
-  %252 = fsub float -0.000000e+00, %245
-  %253 = fptosi float %252 to i32
-  %254 = fsub float -0.000000e+00, %249
-  %255 = fptosi float %254 to i32
-  %256 = bitcast i32 %251 to float
-  %257 = bitcast i32 %253 to float
-  %258 = bitcast i32 %255 to float
-  %259 = bitcast float %257 to i32
-  %260 = bitcast float %258 to i32
-  %261 = or i32 %259, %260
-  %262 = bitcast i32 %261 to float
-  %263 = bitcast float %256 to i32
-  %264 = bitcast float %262 to i32
-  %265 = or i32 %263, %264
-  %266 = bitcast i32 %265 to float
-  %267 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
-  %268 = extractelement <4 x float> %267, i32 0
-  %269 = fcmp une float 0x40220F0D80000000, %268
-  %270 = select i1 %269, float 1.000000e+00, float 0.000000e+00
-  %271 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
-  %272 = extractelement <4 x float> %271, i32 1
-  %273 = fcmp une float 0xC018E2EB20000000, %272
-  %274 = select i1 %273, float 1.000000e+00, float 0.000000e+00
-  %275 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
-  %276 = extractelement <4 x float> %275, i32 2
-  %277 = fcmp une float 0xBFEA8DB8C0000000, %276
-  %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00
-  %279 = fsub float -0.000000e+00, %270
-  %280 = fptosi float %279 to i32
-  %281 = fsub float -0.000000e+00, %274
-  %282 = fptosi float %281 to i32
-  %283 = fsub float -0.000000e+00, %278
-  %284 = fptosi float %283 to i32
-  %285 = bitcast i32 %280 to float
-  %286 = bitcast i32 %282 to float
-  %287 = bitcast i32 %284 to float
-  %288 = bitcast float %286 to i32
-  %289 = bitcast float %287 to i32
-  %290 = or i32 %288, %289
-  %291 = bitcast i32 %290 to float
-  %292 = bitcast float %285 to i32
-  %293 = bitcast float %291 to i32
-  %294 = or i32 %292, %293
-  %295 = bitcast i32 %294 to float
-  %296 = insertelement <4 x float> undef, float %208, i32 0
-  %297 = insertelement <4 x float> %296, float %237, i32 1
-  %298 = insertelement <4 x float> %297, float %266, i32 2
-  %299 = insertelement <4 x float> %298, float %295, i32 3
-  %300 = insertelement <4 x float> undef, float %208, i32 0
-  %301 = insertelement <4 x float> %300, float %237, i32 1
-  %302 = insertelement <4 x float> %301, float %266, i32 2
-  %303 = insertelement <4 x float> %302, float %295, i32 3
-  %304 = call float @llvm.AMDGPU.dp4(<4 x float> %299, <4 x float> %303)
-  %305 = bitcast float %304 to i32
-  %306 = icmp ne i32 %305, 0
-  %307 = sext i1 %306 to i32
-  %308 = bitcast i32 %307 to float
-  %309 = bitcast float %308 to i32
-  %310 = xor i32 %309, -1
-  %311 = bitcast i32 %310 to float
-  %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
-  %313 = extractelement <4 x float> %312, i32 0
-  %314 = fcmp une float 0xC00574BC60000000, %313
-  %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
-  %316 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
-  %317 = extractelement <4 x float> %316, i32 1
-  %318 = fcmp une float 0x40210068E0000000, %317
-  %319 = select i1 %318, float 1.000000e+00, float 0.000000e+00
-  %320 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
-  %321 = extractelement <4 x float> %320, i32 2
-  %322 = fcmp une float 0xBFC9A6B500000000, %321
-  %323 = select i1 %322, float 1.000000e+00, float 0.000000e+00
-  %324 = fsub float -0.000000e+00, %315
-  %325 = fptosi float %324 to i32
-  %326 = fsub float -0.000000e+00, %319
-  %327 = fptosi float %326 to i32
-  %328 = fsub float -0.000000e+00, %323
-  %329 = fptosi float %328 to i32
-  %330 = bitcast i32 %325 to float
-  %331 = bitcast i32 %327 to float
-  %332 = bitcast i32 %329 to float
-  %333 = bitcast float %331 to i32
-  %334 = bitcast float %332 to i32
-  %335 = or i32 %333, %334
-  %336 = bitcast i32 %335 to float
-  %337 = bitcast float %330 to i32
-  %338 = bitcast float %336 to i32
-  %339 = or i32 %337, %338
-  %340 = bitcast i32 %339 to float
-  %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
-  %342 = extractelement <4 x float> %341, i32 0
-  %343 = fcmp une float 0xC0119BDA60000000, %342
-  %344 = select i1 %343, float 1.000000e+00, float 0.000000e+00
-  %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
-  %346 = extractelement <4 x float> %345, i32 1
-  %347 = fcmp une float 0xC02085D640000000, %346
-  %348 = select i1 %347, float 1.000000e+00, float 0.000000e+00
-  %349 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
-  %350 = extractelement <4 x float> %349, i32 2
-  %351 = fcmp une float 0xBFD7C1BDA0000000, %350
-  %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00
-  %353 = fsub float -0.000000e+00, %344
-  %354 = fptosi float %353 to i32
-  %355 = fsub float -0.000000e+00, %348
-  %356 = fptosi float %355 to i32
-  %357 = fsub float -0.000000e+00, %352
-  %358 = fptosi float %357 to i32
-  %359 = bitcast i32 %354 to float
-  %360 = bitcast i32 %356 to float
-  %361 = bitcast i32 %358 to float
-  %362 = bitcast float %360 to i32
-  %363 = bitcast float %361 to i32
-  %364 = or i32 %362, %363
-  %365 = bitcast i32 %364 to float
-  %366 = bitcast float %359 to i32
-  %367 = bitcast float %365 to i32
-  %368 = or i32 %366, %367
-  %369 = bitcast i32 %368 to float
-  %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
-  %371 = extractelement <4 x float> %370, i32 0
-  %372 = fcmp une float 0x401E1D7DC0000000, %371
-  %373 = select i1 %372, float 1.000000e+00, float 0.000000e+00
-  %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
-  %375 = extractelement <4 x float> %374, i32 1
-  %376 = fcmp une float 0xC019893740000000, %375
-  %377 = select i1 %376, float 1.000000e+00, float 0.000000e+00
-  %378 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
-  %379 = extractelement <4 x float> %378, i32 2
-  %380 = fcmp une float 0x40220F0D80000000, %379
-  %381 = select i1 %380, float 1.000000e+00, float 0.000000e+00
-  %382 = fsub float -0.000000e+00, %373
-  %383 = fptosi float %382 to i32
-  %384 = fsub float -0.000000e+00, %377
-  %385 = fptosi float %384 to i32
-  %386 = fsub float -0.000000e+00, %381
-  %387 = fptosi float %386 to i32
-  %388 = bitcast i32 %383 to float
-  %389 = bitcast i32 %385 to float
-  %390 = bitcast i32 %387 to float
-  %391 = bitcast float %389 to i32
-  %392 = bitcast float %390 to i32
-  %393 = or i32 %391, %392
-  %394 = bitcast i32 %393 to float
-  %395 = bitcast float %388 to i32
-  %396 = bitcast float %394 to i32
-  %397 = or i32 %395, %396
-  %398 = bitcast i32 %397 to float
-  %399 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
-  %400 = extractelement <4 x float> %399, i32 0
-  %401 = fcmp une float 0xC018E2EB20000000, %400
-  %402 = select i1 %401, float 1.000000e+00, float 0.000000e+00
-  %403 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
-  %404 = extractelement <4 x float> %403, i32 1
-  %405 = fcmp une float 0xBFEA8DB8C0000000, %404
-  %406 = select i1 %405, float 1.000000e+00, float 0.000000e+00
-  %407 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
-  %408 = extractelement <4 x float> %407, i32 2
-  %409 = fcmp une float 0x4015236E20000000, %408
-  %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00
-  %411 = fsub float -0.000000e+00, %402
-  %412 = fptosi float %411 to i32
-  %413 = fsub float -0.000000e+00, %406
-  %414 = fptosi float %413 to i32
-  %415 = fsub float -0.000000e+00, %410
-  %416 = fptosi float %415 to i32
-  %417 = bitcast i32 %412 to float
-  %418 = bitcast i32 %414 to float
-  %419 = bitcast i32 %416 to float
-  %420 = bitcast float %418 to i32
-  %421 = bitcast float %419 to i32
-  %422 = or i32 %420, %421
-  %423 = bitcast i32 %422 to float
-  %424 = bitcast float %417 to i32
-  %425 = bitcast float %423 to i32
-  %426 = or i32 %424, %425
-  %427 = bitcast i32 %426 to float
-  %428 = insertelement <4 x float> undef, float %340, i32 0
-  %429 = insertelement <4 x float> %428, float %369, i32 1
-  %430 = insertelement <4 x float> %429, float %398, i32 2
-  %431 = insertelement <4 x float> %430, float %427, i32 3
-  %432 = insertelement <4 x float> undef, float %340, i32 0
-  %433 = insertelement <4 x float> %432, float %369, i32 1
-  %434 = insertelement <4 x float> %433, float %398, i32 2
-  %435 = insertelement <4 x float> %434, float %427, i32 3
-  %436 = call float @llvm.AMDGPU.dp4(<4 x float> %431, <4 x float> %435)
-  %437 = bitcast float %436 to i32
-  %438 = icmp ne i32 %437, 0
-  %439 = sext i1 %438 to i32
-  %440 = bitcast i32 %439 to float
-  %441 = bitcast float %440 to i32
-  %442 = xor i32 %441, -1
-  %443 = bitcast i32 %442 to float
-  %444 = load <4 x float> addrspace(8)* null
-  %445 = extractelement <4 x float> %444, i32 0
-  %446 = fcmp une float 0xC00574BC60000000, %445
-  %447 = select i1 %446, float 1.000000e+00, float 0.000000e+00
-  %448 = load <4 x float> addrspace(8)* null
-  %449 = extractelement <4 x float> %448, i32 1
-  %450 = fcmp une float 0x40210068E0000000, %449
-  %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00
-  %452 = load <4 x float> addrspace(8)* null
-  %453 = extractelement <4 x float> %452, i32 2
-  %454 = fcmp une float 0xBFC9A6B500000000, %453
-  %455 = select i1 %454, float 1.000000e+00, float 0.000000e+00
-  %456 = load <4 x float> addrspace(8)* null
-  %457 = extractelement <4 x float> %456, i32 3
-  %458 = fcmp une float 0xC0119BDA60000000, %457
-  %459 = select i1 %458, float 1.000000e+00, float 0.000000e+00
-  %460 = fsub float -0.000000e+00, %447
-  %461 = fptosi float %460 to i32
-  %462 = fsub float -0.000000e+00, %451
-  %463 = fptosi float %462 to i32
-  %464 = fsub float -0.000000e+00, %455
-  %465 = fptosi float %464 to i32
-  %466 = fsub float -0.000000e+00, %459
-  %467 = fptosi float %466 to i32
-  %468 = bitcast i32 %461 to float
-  %469 = bitcast i32 %463 to float
-  %470 = bitcast i32 %465 to float
-  %471 = bitcast i32 %467 to float
-  %472 = bitcast float %468 to i32
-  %473 = bitcast float %469 to i32
-  %474 = or i32 %472, %473
-  %475 = bitcast i32 %474 to float
-  %476 = bitcast float %470 to i32
-  %477 = bitcast float %471 to i32
-  %478 = or i32 %476, %477
-  %479 = bitcast i32 %478 to float
-  %480 = bitcast float %475 to i32
-  %481 = bitcast float %479 to i32
-  %482 = or i32 %480, %481
-  %483 = bitcast i32 %482 to float
-  %484 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %485 = extractelement <4 x float> %484, i32 0
-  %486 = fcmp une float 0xC02085D640000000, %485
-  %487 = select i1 %486, float 1.000000e+00, float 0.000000e+00
-  %488 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %489 = extractelement <4 x float> %488, i32 1
-  %490 = fcmp une float 0xBFD7C1BDA0000000, %489
-  %491 = select i1 %490, float 1.000000e+00, float 0.000000e+00
-  %492 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %493 = extractelement <4 x float> %492, i32 2
-  %494 = fcmp une float 0x401E1D7DC0000000, %493
-  %495 = select i1 %494, float 1.000000e+00, float 0.000000e+00
-  %496 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %497 = extractelement <4 x float> %496, i32 3
-  %498 = fcmp une float 0xC019893740000000, %497
-  %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00
-  %500 = fsub float -0.000000e+00, %487
-  %501 = fptosi float %500 to i32
-  %502 = fsub float -0.000000e+00, %491
-  %503 = fptosi float %502 to i32
-  %504 = fsub float -0.000000e+00, %495
-  %505 = fptosi float %504 to i32
-  %506 = fsub float -0.000000e+00, %499
-  %507 = fptosi float %506 to i32
-  %508 = bitcast i32 %501 to float
-  %509 = bitcast i32 %503 to float
-  %510 = bitcast i32 %505 to float
-  %511 = bitcast i32 %507 to float
-  %512 = bitcast float %508 to i32
-  %513 = bitcast float %509 to i32
-  %514 = or i32 %512, %513
-  %515 = bitcast i32 %514 to float
-  %516 = bitcast float %510 to i32
-  %517 = bitcast float %511 to i32
-  %518 = or i32 %516, %517
-  %519 = bitcast i32 %518 to float
-  %520 = bitcast float %515 to i32
-  %521 = bitcast float %519 to i32
-  %522 = or i32 %520, %521
-  %523 = bitcast i32 %522 to float
-  %524 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %525 = extractelement <4 x float> %524, i32 0
-  %526 = fcmp une float 0x40220F0D80000000, %525
-  %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00
-  %528 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %529 = extractelement <4 x float> %528, i32 1
-  %530 = fcmp une float 0xC018E2EB20000000, %529
-  %531 = select i1 %530, float 1.000000e+00, float 0.000000e+00
-  %532 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %533 = extractelement <4 x float> %532, i32 2
-  %534 = fcmp une float 0xBFEA8DB8C0000000, %533
-  %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
-  %536 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %537 = extractelement <4 x float> %536, i32 3
-  %538 = fcmp une float 0x4015236E20000000, %537
-  %539 = select i1 %538, float 1.000000e+00, float 0.000000e+00
-  %540 = fsub float -0.000000e+00, %527
-  %541 = fptosi float %540 to i32
-  %542 = fsub float -0.000000e+00, %531
-  %543 = fptosi float %542 to i32
-  %544 = fsub float -0.000000e+00, %535
-  %545 = fptosi float %544 to i32
-  %546 = fsub float -0.000000e+00, %539
-  %547 = fptosi float %546 to i32
-  %548 = bitcast i32 %541 to float
-  %549 = bitcast i32 %543 to float
-  %550 = bitcast i32 %545 to float
-  %551 = bitcast i32 %547 to float
-  %552 = bitcast float %548 to i32
-  %553 = bitcast float %549 to i32
-  %554 = or i32 %552, %553
-  %555 = bitcast i32 %554 to float
-  %556 = bitcast float %550 to i32
-  %557 = bitcast float %551 to i32
-  %558 = or i32 %556, %557
-  %559 = bitcast i32 %558 to float
-  %560 = bitcast float %555 to i32
-  %561 = bitcast float %559 to i32
-  %562 = or i32 %560, %561
-  %563 = bitcast i32 %562 to float
-  %564 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %565 = extractelement <4 x float> %564, i32 0
-  %566 = fcmp une float 0x4016ED5D00000000, %565
-  %567 = select i1 %566, float 1.000000e+00, float 0.000000e+00
-  %568 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %569 = extractelement <4 x float> %568, i32 1
-  %570 = fcmp une float 0x402332FEC0000000, %569
-  %571 = select i1 %570, float 1.000000e+00, float 0.000000e+00
-  %572 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %573 = extractelement <4 x float> %572, i32 2
-  %574 = fcmp une float 0xC01484B5E0000000, %573
-  %575 = select i1 %574, float 1.000000e+00, float 0.000000e+00
-  %576 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %577 = extractelement <4 x float> %576, i32 3
-  %578 = fcmp une float 0x400179A6C0000000, %577
-  %579 = select i1 %578, float 1.000000e+00, float 0.000000e+00
-  %580 = fsub float -0.000000e+00, %567
-  %581 = fptosi float %580 to i32
-  %582 = fsub float -0.000000e+00, %571
-  %583 = fptosi float %582 to i32
-  %584 = fsub float -0.000000e+00, %575
-  %585 = fptosi float %584 to i32
-  %586 = fsub float -0.000000e+00, %579
-  %587 = fptosi float %586 to i32
-  %588 = bitcast i32 %581 to float
-  %589 = bitcast i32 %583 to float
-  %590 = bitcast i32 %585 to float
-  %591 = bitcast i32 %587 to float
-  %592 = bitcast float %588 to i32
-  %593 = bitcast float %589 to i32
-  %594 = or i32 %592, %593
-  %595 = bitcast i32 %594 to float
-  %596 = bitcast float %590 to i32
-  %597 = bitcast float %591 to i32
-  %598 = or i32 %596, %597
-  %599 = bitcast i32 %598 to float
-  %600 = bitcast float %595 to i32
-  %601 = bitcast float %599 to i32
-  %602 = or i32 %600, %601
-  %603 = bitcast i32 %602 to float
-  %604 = insertelement <4 x float> undef, float %483, i32 0
-  %605 = insertelement <4 x float> %604, float %523, i32 1
-  %606 = insertelement <4 x float> %605, float %563, i32 2
-  %607 = insertelement <4 x float> %606, float %603, i32 3
-  %608 = insertelement <4 x float> undef, float %483, i32 0
-  %609 = insertelement <4 x float> %608, float %523, i32 1
-  %610 = insertelement <4 x float> %609, float %563, i32 2
-  %611 = insertelement <4 x float> %610, float %603, i32 3
-  %612 = call float @llvm.AMDGPU.dp4(<4 x float> %607, <4 x float> %611)
-  %613 = bitcast float %612 to i32
-  %614 = icmp ne i32 %613, 0
-  %615 = sext i1 %614 to i32
-  %616 = bitcast i32 %615 to float
-  %617 = bitcast float %616 to i32
-  %618 = xor i32 %617, -1
-  %619 = bitcast i32 %618 to float
-  %620 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %621 = extractelement <4 x float> %620, i32 0
-  %622 = fcmp une float 0x40210068E0000000, %621
-  %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00
-  %624 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %625 = extractelement <4 x float> %624, i32 1
-  %626 = fcmp une float 0xBFC9A6B500000000, %625
-  %627 = select i1 %626, float 1.000000e+00, float 0.000000e+00
-  %628 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %629 = extractelement <4 x float> %628, i32 2
-  %630 = fcmp une float 0xC0119BDA60000000, %629
-  %631 = select i1 %630, float 1.000000e+00, float 0.000000e+00
-  %632 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %633 = extractelement <4 x float> %632, i32 3
-  %634 = fcmp une float 0xC02085D640000000, %633
-  %635 = select i1 %634, float 1.000000e+00, float 0.000000e+00
-  %636 = fsub float -0.000000e+00, %623
-  %637 = fptosi float %636 to i32
-  %638 = fsub float -0.000000e+00, %627
-  %639 = fptosi float %638 to i32
-  %640 = fsub float -0.000000e+00, %631
-  %641 = fptosi float %640 to i32
-  %642 = fsub float -0.000000e+00, %635
-  %643 = fptosi float %642 to i32
-  %644 = bitcast i32 %637 to float
-  %645 = bitcast i32 %639 to float
-  %646 = bitcast i32 %641 to float
-  %647 = bitcast i32 %643 to float
-  %648 = bitcast float %644 to i32
-  %649 = bitcast float %645 to i32
-  %650 = or i32 %648, %649
-  %651 = bitcast i32 %650 to float
-  %652 = bitcast float %646 to i32
-  %653 = bitcast float %647 to i32
-  %654 = or i32 %652, %653
-  %655 = bitcast i32 %654 to float
-  %656 = bitcast float %651 to i32
-  %657 = bitcast float %655 to i32
-  %658 = or i32 %656, %657
-  %659 = bitcast i32 %658 to float
-  %660 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %661 = extractelement <4 x float> %660, i32 0
-  %662 = fcmp une float 0xBFD7C1BDA0000000, %661
-  %663 = select i1 %662, float 1.000000e+00, float 0.000000e+00
-  %664 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %665 = extractelement <4 x float> %664, i32 1
-  %666 = fcmp une float 0x401E1D7DC0000000, %665
-  %667 = select i1 %666, float 1.000000e+00, float 0.000000e+00
-  %668 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %669 = extractelement <4 x float> %668, i32 2
-  %670 = fcmp une float 0xC019893740000000, %669
-  %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00
-  %672 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %673 = extractelement <4 x float> %672, i32 3
-  %674 = fcmp une float 0x40220F0D80000000, %673
-  %675 = select i1 %674, float 1.000000e+00, float 0.000000e+00
-  %676 = fsub float -0.000000e+00, %663
-  %677 = fptosi float %676 to i32
-  %678 = fsub float -0.000000e+00, %667
-  %679 = fptosi float %678 to i32
-  %680 = fsub float -0.000000e+00, %671
-  %681 = fptosi float %680 to i32
-  %682 = fsub float -0.000000e+00, %675
-  %683 = fptosi float %682 to i32
-  %684 = bitcast i32 %677 to float
-  %685 = bitcast i32 %679 to float
-  %686 = bitcast i32 %681 to float
-  %687 = bitcast i32 %683 to float
-  %688 = bitcast float %684 to i32
-  %689 = bitcast float %685 to i32
-  %690 = or i32 %688, %689
-  %691 = bitcast i32 %690 to float
-  %692 = bitcast float %686 to i32
-  %693 = bitcast float %687 to i32
-  %694 = or i32 %692, %693
-  %695 = bitcast i32 %694 to float
-  %696 = bitcast float %691 to i32
-  %697 = bitcast float %695 to i32
-  %698 = or i32 %696, %697
-  %699 = bitcast i32 %698 to float
-  %700 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %701 = extractelement <4 x float> %700, i32 0
-  %702 = fcmp une float 0xC018E2EB20000000, %701
-  %703 = select i1 %702, float 1.000000e+00, float 0.000000e+00
-  %704 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %705 = extractelement <4 x float> %704, i32 1
-  %706 = fcmp une float 0xBFEA8DB8C0000000, %705
-  %707 = select i1 %706, float 1.000000e+00, float 0.000000e+00
-  %708 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %709 = extractelement <4 x float> %708, i32 2
-  %710 = fcmp une float 0x4015236E20000000, %709
-  %711 = select i1 %710, float 1.000000e+00, float 0.000000e+00
-  %712 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %713 = extractelement <4 x float> %712, i32 3
-  %714 = fcmp une float 0x4016ED5D00000000, %713
-  %715 = select i1 %714, float 1.000000e+00, float 0.000000e+00
-  %716 = fsub float -0.000000e+00, %703
-  %717 = fptosi float %716 to i32
-  %718 = fsub float -0.000000e+00, %707
-  %719 = fptosi float %718 to i32
-  %720 = fsub float -0.000000e+00, %711
-  %721 = fptosi float %720 to i32
-  %722 = fsub float -0.000000e+00, %715
-  %723 = fptosi float %722 to i32
-  %724 = bitcast i32 %717 to float
-  %725 = bitcast i32 %719 to float
-  %726 = bitcast i32 %721 to float
-  %727 = bitcast i32 %723 to float
-  %728 = bitcast float %724 to i32
-  %729 = bitcast float %725 to i32
-  %730 = or i32 %728, %729
-  %731 = bitcast i32 %730 to float
-  %732 = bitcast float %726 to i32
-  %733 = bitcast float %727 to i32
-  %734 = or i32 %732, %733
-  %735 = bitcast i32 %734 to float
-  %736 = bitcast float %731 to i32
-  %737 = bitcast float %735 to i32
-  %738 = or i32 %736, %737
-  %739 = bitcast i32 %738 to float
-  %740 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %741 = extractelement <4 x float> %740, i32 0
-  %742 = fcmp une float 0x402332FEC0000000, %741
-  %743 = select i1 %742, float 1.000000e+00, float 0.000000e+00
-  %744 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %745 = extractelement <4 x float> %744, i32 1
-  %746 = fcmp une float 0xC01484B5E0000000, %745
-  %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00
-  %748 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %749 = extractelement <4 x float> %748, i32 2
-  %750 = fcmp une float 0x400179A6C0000000, %749
-  %751 = select i1 %750, float 1.000000e+00, float 0.000000e+00
-  %752 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %753 = extractelement <4 x float> %752, i32 3
-  %754 = fcmp une float 0xBFEE752540000000, %753
-  %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
-  %756 = fsub float -0.000000e+00, %743
-  %757 = fptosi float %756 to i32
-  %758 = fsub float -0.000000e+00, %747
-  %759 = fptosi float %758 to i32
-  %760 = fsub float -0.000000e+00, %751
-  %761 = fptosi float %760 to i32
-  %762 = fsub float -0.000000e+00, %755
-  %763 = fptosi float %762 to i32
-  %764 = bitcast i32 %757 to float
-  %765 = bitcast i32 %759 to float
-  %766 = bitcast i32 %761 to float
-  %767 = bitcast i32 %763 to float
-  %768 = bitcast float %764 to i32
-  %769 = bitcast float %765 to i32
-  %770 = or i32 %768, %769
-  %771 = bitcast i32 %770 to float
-  %772 = bitcast float %766 to i32
-  %773 = bitcast float %767 to i32
-  %774 = or i32 %772, %773
-  %775 = bitcast i32 %774 to float
-  %776 = bitcast float %771 to i32
-  %777 = bitcast float %775 to i32
-  %778 = or i32 %776, %777
-  %779 = bitcast i32 %778 to float
-  %780 = insertelement <4 x float> undef, float %659, i32 0
-  %781 = insertelement <4 x float> %780, float %699, i32 1
-  %782 = insertelement <4 x float> %781, float %739, i32 2
-  %783 = insertelement <4 x float> %782, float %779, i32 3
-  %784 = insertelement <4 x float> undef, float %659, i32 0
-  %785 = insertelement <4 x float> %784, float %699, i32 1
-  %786 = insertelement <4 x float> %785, float %739, i32 2
-  %787 = insertelement <4 x float> %786, float %779, i32 3
-  %788 = call float @llvm.AMDGPU.dp4(<4 x float> %783, <4 x float> %787)
-  %789 = bitcast float %788 to i32
-  %790 = icmp ne i32 %789, 0
-  %791 = sext i1 %790 to i32
-  %792 = bitcast i32 %791 to float
-  %793 = bitcast float %792 to i32
-  %794 = xor i32 %793, -1
-  %795 = bitcast i32 %794 to float
-  %796 = bitcast float %91 to i32
-  %797 = bitcast float %179 to i32
-  %798 = and i32 %796, %797
-  %799 = bitcast i32 %798 to float
-  %800 = bitcast float %311 to i32
-  %801 = bitcast float %443 to i32
-  %802 = and i32 %800, %801
-  %803 = bitcast i32 %802 to float
-  %804 = bitcast float %799 to i32
-  %805 = bitcast float %803 to i32
-  %806 = and i32 %804, %805
-  %807 = bitcast i32 %806 to float
-  %808 = bitcast float %619 to i32
-  %809 = bitcast float %795 to i32
-  %810 = and i32 %808, %809
-  %811 = bitcast i32 %810 to float
-  %812 = bitcast float %807 to i32
-  %813 = bitcast float %811 to i32
-  %814 = and i32 %812, %813
-  %815 = bitcast i32 %814 to float
-  %816 = bitcast float %815 to i32
-  %817 = icmp ne i32 %816, 0
-  %. = select i1 %817, float 1.000000e+00, float 0.000000e+00
-  %.32 = select i1 %817, float 0.000000e+00, float 1.000000e+00
-  %818 = insertelement <4 x float> undef, float %0, i32 0
-  %819 = insertelement <4 x float> %818, float %1, i32 1
-  %820 = insertelement <4 x float> %819, float %2, i32 2
-  %821 = insertelement <4 x float> %820, float %3, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %821, i32 60, i32 1)
-  %822 = insertelement <4 x float> undef, float %.32, i32 0
-  %823 = insertelement <4 x float> %822, float %., i32 1
-  %824 = insertelement <4 x float> %823, float 0.000000e+00, i32 2
-  %825 = insertelement <4 x float> %824, float 1.000000e+00, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %825, i32 0, i32 2)
-  ret void
-}
-
-declare float @llvm.R600.load.input(i32) #1
-
-declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index 166af2d8d128..ee9bc836eb86 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -1,11 +1,36 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: @test2
+;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+;SI-CHECK: @test2
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = and <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @test4
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @test4
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
new file mode 100644
index 000000000000..652bbfe2a415
--- /dev/null
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -0,0 +1,18 @@
+; XFAIL: *
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+declare i32 @llvm.SI.tid() readnone
+
+
+; SI-LABEL: @test_array_ptr_calc(
+define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [16 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.SI.tid() readnone
+  %a_ptr = getelementptr [16 x i32] addrspace(1)* %inA, i32 1, i32 %tid
+  %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
+  %a = load i32 addrspace(1)* %a_ptr
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = add i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
new file mode 100644
index 000000000000..0bc48a3590b2
--- /dev/null
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @atomic_add_local
+; R600-CHECK: LDS_ADD *
+; SI-CHECK-LABEL: @atomic_add_local
+; SI-CHECK: DS_ADD_U32_RTN 0
+define void @atomic_add_local(i32 addrspace(3)* %local) {
+entry:
+   %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+   ret void
+}
+
+; R600-CHECK-LABEL: @atomic_add_ret_local
+; R600-CHECK: LDS_ADD_RET *
+; SI-CHECK-LABEL: @atomic_add_ret_local
+; SI-CHECK: DS_ADD_U32_RTN 0
+define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+entry:
+  %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
new file mode 100644
index 000000000000..e4a682932c82
--- /dev/null
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @atomic_sub_local
+; R600-CHECK: LDS_SUB *
+; SI-CHECK-LABEL: @atomic_sub_local
+; SI-CHECK: DS_SUB_U32_RTN 0
+define void @atomic_sub_local(i32 addrspace(3)* %local) {
+entry:
+   %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+   ret void
+}
+
+; R600-CHECK-LABEL: @atomic_sub_ret_local
+; R600-CHECK: LDS_SUB_RET *
+; SI-CHECK-LABEL: @atomic_sub_ret_local
+; SI-CHECK: DS_SUB_U32_RTN 0
+define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+entry:
+  %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
index 4244dcf3c77b..bbfe856fc930 100644
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
@@ -36,10 +36,10 @@ entry:
 ; SHA-256 Ma function
 ; ((x & z) | (y & (x | z)))
 ; R600-CHECK: @bfi_sha256_ma
-; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV.x}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: V_XOR_B32_e32 [[DST:VGPR[0-9]+]], {{VGPR[0-9]+, VGPR[0-9]+}}
-; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{VGPR[0-9]+, VGPR[0-9]+}}
+; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
+; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
+; SI-CHECK: V_XOR_B32_e64 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
+; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
 
 define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
 entry:
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
new file mode 100644
index 000000000000..6b683769fe06
--- /dev/null
+++ b/test/CodeGen/R600/big_alu.ll
@@ -0,0 +1,1174 @@
+;RUN: llc < %s -march=r600 -mcpu=cedar
+;REQUIRES: asserts
+
+;This test ensures that R600 backend can handle ifcvt properly
+;and do not generate ALU clauses with more than 128 instructions.
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg0, i32 0
+  %1 = extractelement <4 x float> %reg0, i32 1
+  %2 = extractelement <4 x float> %reg0, i32 2
+  %3 = extractelement <4 x float> %reg0, i32 3
+  %4 = extractelement <4 x float> %reg1, i32 0
+  %5 = extractelement <4 x float> %reg9, i32 0
+  %6 = extractelement <4 x float> %reg8, i32 0
+  %7 = fcmp ugt float %6, 0.000000e+00
+  %8 = select i1 %7, float %4, float %5
+  %9 = extractelement <4 x float> %reg1, i32 1
+  %10 = extractelement <4 x float> %reg9, i32 1
+  %11 = extractelement <4 x float> %reg8, i32 0
+  %12 = fcmp ugt float %11, 0.000000e+00
+  %13 = select i1 %12, float %9, float %10
+  %14 = extractelement <4 x float> %reg1, i32 2
+  %15 = extractelement <4 x float> %reg9, i32 2
+  %16 = extractelement <4 x float> %reg8, i32 0
+  %17 = fcmp ugt float %16, 0.000000e+00
+  %18 = select i1 %17, float %14, float %15
+  %19 = extractelement <4 x float> %reg1, i32 3
+  %20 = extractelement <4 x float> %reg9, i32 3
+  %21 = extractelement <4 x float> %reg8, i32 0
+  %22 = extractelement <4 x float> %reg2, i32 0
+  %23 = extractelement <4 x float> %reg2, i32 1
+  %24 = extractelement <4 x float> %reg2, i32 2
+  %25 = extractelement <4 x float> %reg2, i32 3
+  %26 = extractelement <4 x float> %reg3, i32 0
+  %27 = extractelement <4 x float> %reg3, i32 1
+  %28 = extractelement <4 x float> %reg3, i32 2
+  %29 = extractelement <4 x float> %reg3, i32 3
+  %30 = extractelement <4 x float> %reg4, i32 0
+  %31 = extractelement <4 x float> %reg4, i32 1
+  %32 = extractelement <4 x float> %reg4, i32 2
+  %33 = extractelement <4 x float> %reg4, i32 3
+  %34 = extractelement <4 x float> %reg5, i32 0
+  %35 = extractelement <4 x float> %reg5, i32 1
+  %36 = extractelement <4 x float> %reg5, i32 2
+  %37 = extractelement <4 x float> %reg5, i32 3
+  %38 = extractelement <4 x float> %reg6, i32 0
+  %39 = extractelement <4 x float> %reg6, i32 1
+  %40 = extractelement <4 x float> %reg6, i32 2
+  %41 = extractelement <4 x float> %reg6, i32 3
+  %42 = extractelement <4 x float> %reg7, i32 0
+  %43 = extractelement <4 x float> %reg7, i32 1
+  %44 = extractelement <4 x float> %reg7, i32 2
+  %45 = extractelement <4 x float> %reg7, i32 3
+  %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %47 = extractelement <4 x float> %46, i32 0
+  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %49 = extractelement <4 x float> %48, i32 1
+  %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %51 = extractelement <4 x float> %50, i32 2
+  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %53 = extractelement <4 x float> %52, i32 0
+  %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %55 = extractelement <4 x float> %54, i32 0
+  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %57 = extractelement <4 x float> %56, i32 1
+  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %59 = extractelement <4 x float> %58, i32 2
+  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %61 = extractelement <4 x float> %60, i32 3
+  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %63 = extractelement <4 x float> %62, i32 0
+  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %65 = extractelement <4 x float> %64, i32 1
+  %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %67 = extractelement <4 x float> %66, i32 2
+  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %69 = extractelement <4 x float> %68, i32 0
+  %70 = fcmp oge float %69, 3.500000e+00
+  %71 = sext i1 %70 to i32
+  %72 = bitcast i32 %71 to float
+  %73 = bitcast float %72 to i32
+  %74 = icmp ne i32 %73, 0
+  %. = select i1 %74, float 0.000000e+00, float 0.000000e+00
+  %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %76 = extractelement <4 x float> %75, i32 0
+  %77 = fcmp oge float %76, 2.000000e+00
+  %78 = sext i1 %77 to i32
+  %79 = bitcast i32 %78 to float
+  %80 = bitcast float %79 to i32
+  %81 = icmp ne i32 %80, 0
+  br i1 %81, label %IF137, label %ENDIF136
+
+IF137:                                            ; preds = %main_body
+  %82 = insertelement <4 x float> undef, float %30, i32 0
+  %83 = insertelement <4 x float> %82, float %31, i32 1
+  %84 = insertelement <4 x float> %83, float %32, i32 2
+  %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3
+  %86 = insertelement <4 x float> undef, float %30, i32 0
+  %87 = insertelement <4 x float> %86, float %31, i32 1
+  %88 = insertelement <4 x float> %87, float %32, i32 2
+  %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
+  %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
+  %91 = call float @llvm.AMDGPU.rsq(float %90)
+  %92 = fmul float %30, %91
+  %93 = fmul float %31, %91
+  %94 = fmul float %32, %91
+  %95 = insertelement <4 x float> undef, float %92, i32 0
+  %96 = insertelement <4 x float> %95, float %93, i32 1
+  %97 = insertelement <4 x float> %96, float %94, i32 2
+  %98 = insertelement <4 x float> %97, float 0.000000e+00, i32 3
+  %99 = insertelement <4 x float> undef, float %37, i32 0
+  %100 = insertelement <4 x float> %99, float %38, i32 1
+  %101 = insertelement <4 x float> %100, float %39, i32 2
+  %102 = insertelement <4 x float> %101, float 0.000000e+00, i32 3
+  %103 = call float @llvm.AMDGPU.dp4(<4 x float> %98, <4 x float> %102)
+  %104 = insertelement <4 x float> undef, float %92, i32 0
+  %105 = insertelement <4 x float> %104, float %93, i32 1
+  %106 = insertelement <4 x float> %105, float %94, i32 2
+  %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 3
+  %108 = insertelement <4 x float> undef, float %40, i32 0
+  %109 = insertelement <4 x float> %108, float %41, i32 1
+  %110 = insertelement <4 x float> %109, float %42, i32 2
+  %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 3
+  %112 = call float @llvm.AMDGPU.dp4(<4 x float> %107, <4 x float> %111)
+  %113 = fsub float -0.000000e+00, %92
+  %114 = fsub float -0.000000e+00, %93
+  %115 = fsub float -0.000000e+00, %94
+  %116 = insertelement <4 x float> undef, float %34, i32 0
+  %117 = insertelement <4 x float> %116, float %35, i32 1
+  %118 = insertelement <4 x float> %117, float %36, i32 2
+  %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3
+  %120 = insertelement <4 x float> undef, float %113, i32 0
+  %121 = insertelement <4 x float> %120, float %114, i32 1
+  %122 = insertelement <4 x float> %121, float %115, i32 2
+  %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
+  %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
+  %125 = fdiv float 1.000000e+00, %124
+  %126 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %127 = extractelement <4 x float> %126, i32 0
+  %128 = fmul float %127, %125
+  %129 = fmul float %103, %128
+  %130 = fmul float %112, %128
+  %131 = bitcast float %. to i32
+  %132 = sitofp i32 %131 to float
+  %133 = fdiv float 1.000000e+00, %132
+  %134 = bitcast float %. to i32
+  %135 = add i32 %134, -1
+  %136 = bitcast i32 %135 to float
+  %137 = bitcast float %136 to i32
+  br label %LOOP
+
+ENDIF136:                                         ; preds = %main_body, %ENDIF154
+  %temp68.1 = phi float [ %600, %ENDIF154 ], [ 0.000000e+00, %main_body ]
+  %temp69.0 = phi float [ %602, %ENDIF154 ], [ 0.000000e+00, %main_body ]
+  %temp70.0 = phi float [ %604, %ENDIF154 ], [ 1.000000e+00, %main_body ]
+  %138 = fmul float %26, 0x3F847AE140000000
+  %139 = fmul float %27, 0x3F847AE140000000
+  %140 = fmul float %28, 0x3F847AE140000000
+  %141 = insertelement <4 x float> undef, float %138, i32 0
+  %142 = insertelement <4 x float> %141, float %139, i32 1
+  %143 = insertelement <4 x float> %142, float %140, i32 2
+  %144 = insertelement <4 x float> %143, float 0.000000e+00, i32 3
+  %145 = extractelement <4 x float> %144, i32 0
+  %146 = extractelement <4 x float> %144, i32 1
+  %147 = extractelement <4 x float> %144, i32 2
+  %148 = extractelement <4 x float> %144, i32 3
+  %149 = insertelement <4 x float> undef, float %145, i32 0
+  %150 = insertelement <4 x float> %149, float %146, i32 1
+  %151 = insertelement <4 x float> %150, float %147, i32 2
+  %152 = insertelement <4 x float> %151, float %148, i32 3
+  %153 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %152, i32 16, i32 0, i32 3)
+  %154 = extractelement <4 x float> %153, i32 0
+  %155 = extractelement <4 x float> %153, i32 1
+  %156 = extractelement <4 x float> %153, i32 2
+  %157 = extractelement <4 x float> %153, i32 3
+  %158 = fmul float %26, 0x3F45A07B40000000
+  %159 = fmul float %27, 0x3F45A07B40000000
+  %160 = fmul float %28, 0x3F45A07B40000000
+  %161 = insertelement <4 x float> undef, float %158, i32 0
+  %162 = insertelement <4 x float> %161, float %159, i32 1
+  %163 = insertelement <4 x float> %162, float %160, i32 2
+  %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 3
+  %165 = extractelement <4 x float> %164, i32 0
+  %166 = extractelement <4 x float> %164, i32 1
+  %167 = extractelement <4 x float> %164, i32 2
+  %168 = extractelement <4 x float> %164, i32 3
+  %169 = insertelement <4 x float> undef, float %165, i32 0
+  %170 = insertelement <4 x float> %169, float %166, i32 1
+  %171 = insertelement <4 x float> %170, float %167, i32 2
+  %172 = insertelement <4 x float> %171, float %168, i32 3
+  %173 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %172, i32 16, i32 0, i32 3)
+  %174 = extractelement <4 x float> %173, i32 0
+  %175 = extractelement <4 x float> %173, i32 1
+  %176 = extractelement <4 x float> %173, i32 2
+  %177 = extractelement <4 x float> %173, i32 3
+  %178 = fmul float %176, 3.000000e+03
+  %179 = fadd float %178, %28
+  %180 = fdiv float 1.000000e+00, %33
+  %181 = fmul float %32, %180
+  %182 = call float @fabs(float %181)
+  %183 = fmul float %174, 0x3FD99999A0000000
+  %184 = fadd float %183, 0x3FAEB851E0000000
+  %185 = fmul float %175, 0x3FE3333340000000
+  %186 = fadd float %185, %184
+  %187 = fmul float %176, 2.000000e+00
+  %188 = fadd float %187, %186
+  %189 = fmul float %177, 4.000000e+00
+  %190 = fadd float %189, %188
+  %191 = fmul float %154, 0x3FB99999A0000000
+  %192 = fadd float %191, %190
+  %193 = fmul float %155, 0x3FD99999A0000000
+  %194 = fadd float %193, %192
+  %195 = fmul float %156, 0x3FE99999A0000000
+  %196 = fadd float %195, %194
+  %197 = fmul float %157, 0x4000CCCCC0000000
+  %198 = fadd float %197, %196
+  %199 = fmul float 0xBE5EFB4CC0000000, %182
+  %200 = fmul float %199, %182
+  %201 = call float @llvm.AMDIL.exp.(float %200)
+  %202 = call float @llvm.AMDGPU.lrp(float %201, float %198, float 0x3FA99999A0000000)
+  %203 = fadd float %202, 0x3FF4CCCCC0000000
+  %204 = fmul float %203, 0x3FE1C71C80000000
+  %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00)
+  %206 = fadd float %202, 0x3FF4CCCCC0000000
+  %207 = fmul float %206, 0x3FE1C71C80000000
+  %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00)
+  %209 = fadd float %202, 2.000000e+00
+  %210 = fmul float %209, 0x3FD611A7A0000000
+  %211 = call float @llvm.AMDIL.clamp.(float %210, float 0.000000e+00, float 1.000000e+00)
+  %212 = fmul float 2.000000e+00, %205
+  %213 = fsub float -0.000000e+00, %212
+  %214 = fadd float 3.000000e+00, %213
+  %215 = fmul float %205, %214
+  %216 = fmul float %205, %215
+  %217 = fmul float 2.000000e+00, %208
+  %218 = fsub float -0.000000e+00, %217
+  %219 = fadd float 3.000000e+00, %218
+  %220 = fmul float %208, %219
+  %221 = fmul float %208, %220
+  %222 = fmul float 2.000000e+00, %211
+  %223 = fsub float -0.000000e+00, %222
+  %224 = fadd float 3.000000e+00, %223
+  %225 = fmul float %211, %224
+  %226 = fmul float %211, %225
+  %227 = fmul float %26, 0x3F368B5CC0000000
+  %228 = fmul float %27, 0x3F368B5CC0000000
+  %229 = insertelement <4 x float> undef, float %227, i32 0
+  %230 = insertelement <4 x float> %229, float %228, i32 1
+  %231 = insertelement <4 x float> %230, float 0.000000e+00, i32 2
+  %232 = insertelement <4 x float> %231, float 0.000000e+00, i32 3
+  %233 = extractelement <4 x float> %232, i32 0
+  %234 = extractelement <4 x float> %232, i32 1
+  %235 = insertelement <4 x float> undef, float %233, i32 0
+  %236 = insertelement <4 x float> %235, float %234, i32 1
+  %237 = insertelement <4 x float> %236, float undef, i32 2
+  %238 = insertelement <4 x float> %237, float undef, i32 3
+  %239 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %238, i32 17, i32 1, i32 2)
+  %240 = extractelement <4 x float> %239, i32 0
+  %241 = insertelement <4 x float> undef, float %240, i32 0
+  %242 = insertelement <4 x float> %241, float %228, i32 1
+  %243 = insertelement <4 x float> %242, float 0.000000e+00, i32 2
+  %244 = insertelement <4 x float> %243, float 0.000000e+00, i32 3
+  %245 = extractelement <4 x float> %244, i32 0
+  %246 = insertelement <4 x float> undef, float %245, i32 0
+  %247 = insertelement <4 x float> %246, float undef, i32 1
+  %248 = insertelement <4 x float> %247, float undef, i32 2
+  %249 = insertelement <4 x float> %248, float undef, i32 3
+  %250 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %249, i32 18, i32 2, i32 1)
+  %251 = extractelement <4 x float> %250, i32 0
+  %252 = extractelement <4 x float> %250, i32 1
+  %253 = extractelement <4 x float> %250, i32 2
+  %254 = extractelement <4 x float> %250, i32 3
+  %255 = fmul float %251, %216
+  %256 = fmul float %252, %221
+  %257 = fmul float %253, %226
+  %258 = fmul float %254, 0.000000e+00
+  %259 = fadd float %202, 0x3FF4CCCCC0000000
+  %260 = fmul float %259, 0x3FE1C71C80000000
+  %261 = call float @llvm.AMDIL.clamp.(float %260, float 0.000000e+00, float 1.000000e+00)
+  %262 = fadd float %202, 0x3FF4CCCCC0000000
+  %263 = fmul float %262, 0x3FE1C71C80000000
+  %264 = call float @llvm.AMDIL.clamp.(float %263, float 0.000000e+00, float 1.000000e+00)
+  %265 = fadd float %202, 2.000000e+00
+  %266 = fmul float %265, 0x3FD611A7A0000000
+  %267 = call float @llvm.AMDIL.clamp.(float %266, float 0.000000e+00, float 1.000000e+00)
+  %268 = fmul float 2.000000e+00, %261
+  %269 = fsub float -0.000000e+00, %268
+  %270 = fadd float 3.000000e+00, %269
+  %271 = fmul float %261, %270
+  %272 = fmul float %261, %271
+  %273 = fmul float 2.000000e+00, %264
+  %274 = fsub float -0.000000e+00, %273
+  %275 = fadd float 3.000000e+00, %274
+  %276 = fmul float %264, %275
+  %277 = fmul float %264, %276
+  %278 = fmul float 2.000000e+00, %267
+  %279 = fsub float -0.000000e+00, %278
+  %280 = fadd float 3.000000e+00, %279
+  %281 = fmul float %267, %280
+  %282 = fmul float %267, %281
+  %283 = fmul float %26, 0x3F22DFD6A0000000
+  %284 = fmul float %27, 0x3F22DFD6A0000000
+  %285 = insertelement <4 x float> undef, float %283, i32 0
+  %286 = insertelement <4 x float> %285, float %284, i32 1
+  %287 = insertelement <4 x float> %286, float 0.000000e+00, i32 2
+  %288 = insertelement <4 x float> %287, float 0.000000e+00, i32 3
+  %289 = extractelement <4 x float> %288, i32 0
+  %290 = extractelement <4 x float> %288, i32 1
+  %291 = insertelement <4 x float> undef, float %289, i32 0
+  %292 = insertelement <4 x float> %291, float %290, i32 1
+  %293 = insertelement <4 x float> %292, float undef, i32 2
+  %294 = insertelement <4 x float> %293, float undef, i32 3
+  %295 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %294, i32 19, i32 3, i32 2)
+  %296 = extractelement <4 x float> %295, i32 0
+  %297 = extractelement <4 x float> %295, i32 1
+  %298 = extractelement <4 x float> %295, i32 2
+  %299 = extractelement <4 x float> %295, i32 3
+  %300 = fmul float %296, %272
+  %301 = fmul float %297, %277
+  %302 = fmul float %298, %282
+  %303 = fmul float %299, 0.000000e+00
+  %304 = fmul float %temp68.1, %37
+  %305 = fmul float %temp68.1, %38
+  %306 = fmul float %temp68.1, %39
+  %307 = fmul float %temp69.0, %40
+  %308 = fadd float %307, %304
+  %309 = fmul float %temp69.0, %41
+  %310 = fadd float %309, %305
+  %311 = fmul float %temp69.0, %42
+  %312 = fadd float %311, %306
+  %313 = fmul float %temp70.0, %34
+  %314 = fadd float %313, %308
+  %315 = fmul float %temp70.0, %35
+  %316 = fadd float %315, %310
+  %317 = fmul float %temp70.0, %36
+  %318 = fadd float %317, %312
+  %319 = insertelement <4 x float> undef, float %314, i32 0
+  %320 = insertelement <4 x float> %319, float %316, i32 1
+  %321 = insertelement <4 x float> %320, float %318, i32 2
+  %322 = insertelement <4 x float> %321, float 0.000000e+00, i32 3
+  %323 = insertelement <4 x float> undef, float %314, i32 0
+  %324 = insertelement <4 x float> %323, float %316, i32 1
+  %325 = insertelement <4 x float> %324, float %318, i32 2
+  %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
+  %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
+  %328 = call float @llvm.AMDGPU.rsq(float %327)
+  %329 = fmul float %314, %328
+  %330 = fmul float %316, %328
+  %331 = fmul float %318, %328
+  %332 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %333 = extractelement <4 x float> %332, i32 0
+  %334 = fsub float -0.000000e+00, %333
+  %335 = fadd float 1.000000e+00, %334
+  %336 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %337 = extractelement <4 x float> %336, i32 0
+  %338 = fsub float -0.000000e+00, %337
+  %339 = fadd float 1.000000e+00, %338
+  %340 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %341 = extractelement <4 x float> %340, i32 0
+  %342 = fsub float -0.000000e+00, %341
+  %343 = fadd float 1.000000e+00, %342
+  %344 = fsub float -0.000000e+00, %335
+  %345 = fadd float %202, %344
+  %346 = fsub float -0.000000e+00, %339
+  %347 = fadd float %202, %346
+  %348 = fadd float %347, 0xBFE3333340000000
+  %349 = fsub float -0.000000e+00, %202
+  %350 = fsub float -0.000000e+00, %343
+  %351 = fadd float %349, %350
+  %352 = insertelement <4 x float> undef, float %43, i32 0
+  %353 = insertelement <4 x float> %352, float %44, i32 1
+  %354 = insertelement <4 x float> %353, float %45, i32 2
+  %355 = insertelement <4 x float> %354, float 0.000000e+00, i32 3
+  %356 = insertelement <4 x float> undef, float %43, i32 0
+  %357 = insertelement <4 x float> %356, float %44, i32 1
+  %358 = insertelement <4 x float> %357, float %45, i32 2
+  %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
+  %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
+  %361 = call float @llvm.AMDGPU.rsq(float %360)
+  %362 = fmul float %45, %361
+  %363 = call float @fabs(float %362)
+  %364 = fmul float %176, 0x3FECCCCCC0000000
+  %365 = fadd float %364, %363
+  %366 = fadd float %365, 0xBFEFAE1480000000
+  %367 = fmul float %366, 0xC023FFFFC0000000
+  %368 = call float @llvm.AMDIL.clamp.(float %367, float 0.000000e+00, float 1.000000e+00)
+  %369 = fsub float -0.000000e+00, %335
+  %370 = fadd float %202, %369
+  %371 = fadd float %370, 0x3FBEB851E0000000
+  %372 = fsub float -0.000000e+00, %339
+  %373 = fadd float %202, %372
+  %374 = fadd float %373, 0xBFE0A3D700000000
+  %375 = fsub float -0.000000e+00, %202
+  %376 = fsub float -0.000000e+00, %343
+  %377 = fadd float %375, %376
+  %378 = insertelement <4 x float> undef, float %43, i32 0
+  %379 = insertelement <4 x float> %378, float %44, i32 1
+  %380 = insertelement <4 x float> %379, float %45, i32 2
+  %381 = insertelement <4 x float> %380, float 0.000000e+00, i32 3
+  %382 = insertelement <4 x float> undef, float %43, i32 0
+  %383 = insertelement <4 x float> %382, float %44, i32 1
+  %384 = insertelement <4 x float> %383, float %45, i32 2
+  %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
+  %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
+  %387 = call float @llvm.AMDGPU.rsq(float %386)
+  %388 = fmul float %45, %387
+  %389 = call float @fabs(float %388)
+  %390 = fmul float %176, 0x3FF51EB860000000
+  %391 = fadd float %390, %389
+  %392 = fadd float %391, 0xBFEFAE1480000000
+  %393 = fmul float %392, 0xC0490001A0000000
+  %394 = call float @llvm.AMDIL.clamp.(float %393, float 0.000000e+00, float 1.000000e+00)
+  %395 = fmul float 2.000000e+00, %368
+  %396 = fsub float -0.000000e+00, %395
+  %397 = fadd float 3.000000e+00, %396
+  %398 = fmul float %368, %397
+  %399 = fmul float %368, %398
+  %400 = call float @llvm.AMDGPU.lrp(float %399, float %255, float %345)
+  %401 = call float @llvm.AMDGPU.lrp(float %399, float %256, float %348)
+  %402 = call float @llvm.AMDGPU.lrp(float %399, float %257, float %351)
+  %403 = call float @llvm.AMDGPU.lrp(float %399, float %258, float 0.000000e+00)
+  %404 = fmul float 2.000000e+00, %394
+  %405 = fsub float -0.000000e+00, %404
+  %406 = fadd float 3.000000e+00, %405
+  %407 = fmul float %394, %406
+  %408 = fmul float %394, %407
+  %409 = call float @llvm.AMDGPU.lrp(float %408, float %255, float %371)
+  %410 = call float @llvm.AMDGPU.lrp(float %408, float %256, float %374)
+  %411 = call float @llvm.AMDGPU.lrp(float %408, float %257, float %377)
+  %412 = call float @llvm.AMDGPU.lrp(float %408, float %258, float 0x3FD3333340000000)
+  %413 = fcmp oge float 2.200000e+03, %179
+  %414 = sext i1 %413 to i32
+  %415 = bitcast i32 %414 to float
+  %416 = bitcast float %415 to i32
+  %417 = icmp ne i32 %416, 0
+  br i1 %417, label %IF161, label %ENDIF160
+
+LOOP:                                             ; preds = %ENDIF139, %IF137
+  %temp88.0 = phi float [ 0.000000e+00, %IF137 ], [ %446, %ENDIF139 ]
+  %temp92.0 = phi float [ 1.000000e+00, %IF137 ], [ %.temp92.0, %ENDIF139 ]
+  %temp96.0 = phi float [ 0.000000e+00, %IF137 ], [ %477, %ENDIF139 ]
+  %418 = bitcast float %temp96.0 to i32
+  %419 = icmp sge i32 %418, %137
+  %420 = sext i1 %419 to i32
+  %421 = bitcast i32 %420 to float
+  %422 = bitcast float %421 to i32
+  %423 = icmp ne i32 %422, 0
+  br i1 %423, label %IF140, label %ENDIF139
+
+IF140:                                            ; preds = %LOOP
+  %424 = fmul float %133, 5.000000e-01
+  %425 = fmul float %129, %temp92.0
+  %426 = fadd float %425, %22
+  %427 = fmul float %130, %temp92.0
+  %428 = fadd float %427, %23
+  %429 = insertelement <4 x float> undef, float %426, i32 0
+  %430 = insertelement <4 x float> %429, float %428, i32 1
+  %431 = insertelement <4 x float> %430, float 0.000000e+00, i32 2
+  %432 = insertelement <4 x float> %431, float 0.000000e+00, i32 3
+  %433 = extractelement <4 x float> %432, i32 0
+  %434 = extractelement <4 x float> %432, i32 1
+  %435 = insertelement <4 x float> undef, float %433, i32 0
+  %436 = insertelement <4 x float> %435, float %434, i32 1
+  %437 = insertelement <4 x float> %436, float undef, i32 2
+  %438 = insertelement <4 x float> %437, float undef, i32 3
+  %439 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %438, i32 20, i32 4, i32 2)
+  %440 = extractelement <4 x float> %439, i32 3
+  %441 = fcmp oge float %temp92.0, %440
+  %442 = sext i1 %441 to i32
+  %443 = bitcast i32 %442 to float
+  %444 = bitcast float %443 to i32
+  %445 = icmp ne i32 %444, 0
+  br i1 %445, label %IF146, label %ENDIF145
+
+ENDIF139:                                         ; preds = %LOOP
+  %446 = fadd float %temp88.0, %133
+  %447 = fmul float %129, %446
+  %448 = fadd float %447, %22
+  %449 = fmul float %130, %446
+  %450 = fadd float %449, %23
+  %451 = insertelement <4 x float> undef, float %448, i32 0
+  %452 = insertelement <4 x float> %451, float %450, i32 1
+  %453 = insertelement <4 x float> %452, float 0.000000e+00, i32 2
+  %454 = insertelement <4 x float> %453, float 0.000000e+00, i32 3
+  %455 = extractelement <4 x float> %454, i32 0
+  %456 = extractelement <4 x float> %454, i32 1
+  %457 = insertelement <4 x float> undef, float %455, i32 0
+  %458 = insertelement <4 x float> %457, float %456, i32 1
+  %459 = insertelement <4 x float> %458, float undef, i32 2
+  %460 = insertelement <4 x float> %459, float undef, i32 3
+  %461 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %460, i32 20, i32 4, i32 2)
+  %462 = extractelement <4 x float> %461, i32 3
+  %463 = fcmp olt float 0x3FEFDF3B60000000, %temp92.0
+  %464 = sext i1 %463 to i32
+  %465 = bitcast i32 %464 to float
+  %466 = fcmp oge float %446, %462
+  %467 = sext i1 %466 to i32
+  %468 = bitcast i32 %467 to float
+  %469 = bitcast float %465 to i32
+  %470 = bitcast float %468 to i32
+  %471 = and i32 %469, %470
+  %472 = bitcast i32 %471 to float
+  %473 = bitcast float %472 to i32
+  %474 = icmp ne i32 %473, 0
+  %.temp92.0 = select i1 %474, float %446, float %temp92.0
+  %475 = bitcast float %temp96.0 to i32
+  %476 = add i32 %475, 1
+  %477 = bitcast i32 %476 to float
+  br label %LOOP
+
+IF146:                                            ; preds = %IF140
+  %478 = fmul float 2.000000e+00, %424
+  %479 = fsub float -0.000000e+00, %478
+  %480 = fadd float %temp92.0, %479
+  br label %ENDIF145
+
+ENDIF145:                                         ; preds = %IF140, %IF146
+  %temp88.1 = phi float [ %480, %IF146 ], [ %temp92.0, %IF140 ]
+  %481 = fadd float %temp88.1, %424
+  %482 = fmul float %424, 5.000000e-01
+  %483 = fmul float %129, %481
+  %484 = fadd float %483, %22
+  %485 = fmul float %130, %481
+  %486 = fadd float %485, %23
+  %487 = insertelement <4 x float> undef, float %484, i32 0
+  %488 = insertelement <4 x float> %487, float %486, i32 1
+  %489 = insertelement <4 x float> %488, float 0.000000e+00, i32 2
+  %490 = insertelement <4 x float> %489, float %440, i32 3
+  %491 = extractelement <4 x float> %490, i32 0
+  %492 = extractelement <4 x float> %490, i32 1
+  %493 = insertelement <4 x float> undef, float %491, i32 0
+  %494 = insertelement <4 x float> %493, float %492, i32 1
+  %495 = insertelement <4 x float> %494, float undef, i32 2
+  %496 = insertelement <4 x float> %495, float undef, i32 3
+  %497 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %496, i32 20, i32 4, i32 2)
+  %498 = extractelement <4 x float> %497, i32 3
+  %499 = fcmp oge float %481, %498
+  %500 = sext i1 %499 to i32
+  %501 = bitcast i32 %500 to float
+  %502 = bitcast float %501 to i32
+  %503 = icmp ne i32 %502, 0
+  br i1 %503, label %IF149, label %ENDIF148
+
+IF149:                                            ; preds = %ENDIF145
+  %504 = fmul float 2.000000e+00, %482
+  %505 = fsub float -0.000000e+00, %504
+  %506 = fadd float %481, %505
+  br label %ENDIF148
+
+ENDIF148:                                         ; preds = %ENDIF145, %IF149
+  %temp88.2 = phi float [ %506, %IF149 ], [ %481, %ENDIF145 ]
+  %temp92.2 = phi float [ %481, %IF149 ], [ %temp92.0, %ENDIF145 ]
+  %507 = fadd float %temp88.2, %482
+  %508 = fmul float %482, 5.000000e-01
+  %509 = fmul float %129, %507
+  %510 = fadd float %509, %22
+  %511 = fmul float %130, %507
+  %512 = fadd float %511, %23
+  %513 = insertelement <4 x float> undef, float %510, i32 0
+  %514 = insertelement <4 x float> %513, float %512, i32 1
+  %515 = insertelement <4 x float> %514, float 0.000000e+00, i32 2
+  %516 = insertelement <4 x float> %515, float %498, i32 3
+  %517 = extractelement <4 x float> %516, i32 0
+  %518 = extractelement <4 x float> %516, i32 1
+  %519 = insertelement <4 x float> undef, float %517, i32 0
+  %520 = insertelement <4 x float> %519, float %518, i32 1
+  %521 = insertelement <4 x float> %520, float undef, i32 2
+  %522 = insertelement <4 x float> %521, float undef, i32 3
+  %523 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %522, i32 20, i32 4, i32 2)
+  %524 = extractelement <4 x float> %523, i32 3
+  %525 = fcmp oge float %507, %524
+  %526 = sext i1 %525 to i32
+  %527 = bitcast i32 %526 to float
+  %528 = bitcast float %527 to i32
+  %529 = icmp ne i32 %528, 0
+  br i1 %529, label %IF152, label %ENDIF151
+
+IF152:                                            ; preds = %ENDIF148
+  %530 = fmul float 2.000000e+00, %508
+  %531 = fsub float -0.000000e+00, %530
+  %532 = fadd float %507, %531
+  br label %ENDIF151
+
+ENDIF151:                                         ; preds = %ENDIF148, %IF152
+  %temp88.3 = phi float [ %532, %IF152 ], [ %507, %ENDIF148 ]
+  %temp92.3 = phi float [ %507, %IF152 ], [ %temp92.2, %ENDIF148 ]
+  %533 = fadd float %temp88.3, %508
+  %534 = fmul float %508, 5.000000e-01
+  %535 = fmul float %129, %533
+  %536 = fadd float %535, %22
+  %537 = fmul float %130, %533
+  %538 = fadd float %537, %23
+  %539 = insertelement <4 x float> undef, float %536, i32 0
+  %540 = insertelement <4 x float> %539, float %538, i32 1
+  %541 = insertelement <4 x float> %540, float 0.000000e+00, i32 2
+  %542 = insertelement <4 x float> %541, float %524, i32 3
+  %543 = extractelement <4 x float> %542, i32 0
+  %544 = extractelement <4 x float> %542, i32 1
+  %545 = insertelement <4 x float> undef, float %543, i32 0
+  %546 = insertelement <4 x float> %545, float %544, i32 1
+  %547 = insertelement <4 x float> %546, float undef, i32 2
+  %548 = insertelement <4 x float> %547, float undef, i32 3
+  %549 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %548, i32 20, i32 4, i32 2)
+  %550 = extractelement <4 x float> %549, i32 3
+  %551 = fcmp oge float %533, %550
+  %552 = sext i1 %551 to i32
+  %553 = bitcast i32 %552 to float
+  %554 = bitcast float %553 to i32
+  %555 = icmp ne i32 %554, 0
+  br i1 %555, label %IF155, label %ENDIF154
+
+IF155:                                            ; preds = %ENDIF151
+  %556 = fmul float 2.000000e+00, %534
+  %557 = fsub float -0.000000e+00, %556
+  %558 = fadd float %533, %557
+  br label %ENDIF154
+
+ENDIF154:                                         ; preds = %ENDIF151, %IF155
+  %temp88.4 = phi float [ %558, %IF155 ], [ %533, %ENDIF151 ]
+  %temp92.4 = phi float [ %533, %IF155 ], [ %temp92.3, %ENDIF151 ]
+  %559 = fadd float %temp88.4, %534
+  %560 = fmul float %129, %559
+  %561 = fadd float %560, %22
+  %562 = fmul float %130, %559
+  %563 = fadd float %562, %23
+  %564 = insertelement <4 x float> undef, float %561, i32 0
+  %565 = insertelement <4 x float> %564, float %563, i32 1
+  %566 = insertelement <4 x float> %565, float 0.000000e+00, i32 2
+  %567 = insertelement <4 x float> %566, float %550, i32 3
+  %568 = extractelement <4 x float> %567, i32 0
+  %569 = extractelement <4 x float> %567, i32 1
+  %570 = insertelement <4 x float> undef, float %568, i32 0
+  %571 = insertelement <4 x float> %570, float %569, i32 1
+  %572 = insertelement <4 x float> %571, float undef, i32 2
+  %573 = insertelement <4 x float> %572, float undef, i32 3
+  %574 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %573, i32 20, i32 4, i32 2)
+  %575 = extractelement <4 x float> %574, i32 3
+  %576 = fcmp oge float %559, %575
+  %577 = sext i1 %576 to i32
+  %578 = bitcast i32 %577 to float
+  %579 = bitcast float %578 to i32
+  %580 = icmp ne i32 %579, 0
+  %.temp92.4 = select i1 %580, float %559, float %temp92.4
+  %581 = fmul float %129, %.temp92.4
+  %582 = fadd float %581, %22
+  %583 = fmul float %130, %.temp92.4
+  %584 = fadd float %583, %23
+  %585 = insertelement <4 x float> undef, float %582, i32 0
+  %586 = insertelement <4 x float> %585, float %584, i32 1
+  %587 = insertelement <4 x float> %586, float 0.000000e+00, i32 2
+  %588 = insertelement <4 x float> %587, float %575, i32 3
+  %589 = extractelement <4 x float> %588, i32 0
+  %590 = extractelement <4 x float> %588, i32 1
+  %591 = insertelement <4 x float> undef, float %589, i32 0
+  %592 = insertelement <4 x float> %591, float %590, i32 1
+  %593 = insertelement <4 x float> %592, float undef, i32 2
+  %594 = insertelement <4 x float> %593, float undef, i32 3
+  %595 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %594, i32 20, i32 4, i32 2)
+  %596 = extractelement <4 x float> %595, i32 0
+  %597 = extractelement <4 x float> %595, i32 1
+  %598 = extractelement <4 x float> %595, i32 2
+  %599 = fmul float %596, 2.000000e+00
+  %600 = fadd float %599, -1.000000e+00
+  %601 = fmul float %597, 2.000000e+00
+  %602 = fadd float %601, -1.000000e+00
+  %603 = fmul float %598, 2.000000e+00
+  %604 = fadd float %603, -1.000000e+00
+  br label %ENDIF136
+
+IF161:                                            ; preds = %ENDIF136
+  %605 = fmul float %202, 0x3FB99999A0000000
+  %606 = fcmp uge float 0x3FE4CCCCC0000000, %605
+  %607 = select i1 %606, float 0x3FE4CCCCC0000000, float %605
+  %608 = fcmp uge float %607, 5.000000e-01
+  %609 = select i1 %608, float 5.000000e-01, float %607
+  %610 = call float @llvm.AMDGPU.lrp(float %609, float %400, float %300)
+  %611 = call float @llvm.AMDGPU.lrp(float %609, float %401, float %301)
+  %612 = call float @llvm.AMDGPU.lrp(float %609, float %402, float %302)
+  %613 = call float @llvm.AMDGPU.lrp(float %609, float %403, float %303)
+  %614 = insertelement <4 x float> undef, float %329, i32 0
+  %615 = insertelement <4 x float> %614, float %330, i32 1
+  %616 = insertelement <4 x float> %615, float %331, i32 2
+  %617 = insertelement <4 x float> %616, float 0.000000e+00, i32 3
+  %618 = insertelement <4 x float> undef, float %63, i32 0
+  %619 = insertelement <4 x float> %618, float %65, i32 1
+  %620 = insertelement <4 x float> %619, float %67, i32 2
+  %621 = insertelement <4 x float> %620, float 0.000000e+00, i32 3
+  %622 = call float @llvm.AMDGPU.dp4(<4 x float> %617, <4 x float> %621)
+  %623 = fcmp uge float 0x3FE6666660000000, %622
+  %624 = select i1 %623, float 0x3FE6666660000000, float %622
+  %625 = fmul float %8, %624
+  %626 = fmul float %13, %624
+  %627 = fmul float %18, %624
+  %628 = insertelement <4 x float> undef, float %34, i32 0
+  %629 = insertelement <4 x float> %628, float %35, i32 1
+  %630 = insertelement <4 x float> %629, float %36, i32 2
+  %631 = insertelement <4 x float> %630, float 0.000000e+00, i32 3
+  %632 = insertelement <4 x float> undef, float %63, i32 0
+  %633 = insertelement <4 x float> %632, float %65, i32 1
+  %634 = insertelement <4 x float> %633, float %67, i32 2
+  %635 = insertelement <4 x float> %634, float 0.000000e+00, i32 3
+  %636 = call float @llvm.AMDGPU.dp4(<4 x float> %631, <4 x float> %635)
+  %637 = fcmp uge float 0x3FECCCCCC0000000, %636
+  %638 = select i1 %637, float 0x3FECCCCCC0000000, float %636
+  %639 = fmul float %625, %638
+  %640 = fmul float %626, %638
+  %641 = fmul float %627, %638
+  br label %ENDIF160
+
+ENDIF160:                                         ; preds = %ENDIF136, %IF161
+  %temp84.0 = phi float [ %610, %IF161 ], [ %255, %ENDIF136 ]
+  %temp85.0 = phi float [ %611, %IF161 ], [ %256, %ENDIF136 ]
+  %temp86.0 = phi float [ %612, %IF161 ], [ %257, %ENDIF136 ]
+  %temp87.0 = phi float [ %613, %IF161 ], [ %258, %ENDIF136 ]
+  %temp92.6 = phi float [ %639, %IF161 ], [ %415, %ENDIF136 ]
+  %temp93.0 = phi float [ %640, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
+  %temp94.0 = phi float [ %641, %IF161 ], [ 0.000000e+00, %ENDIF136 ]
+  %642 = fcmp olt float 2.200000e+03, %179
+  %643 = sext i1 %642 to i32
+  %644 = bitcast i32 %643 to float
+  %645 = fcmp olt float %179, 2.300000e+03
+  %646 = sext i1 %645 to i32
+  %647 = bitcast i32 %646 to float
+  %648 = bitcast float %644 to i32
+  %649 = bitcast float %647 to i32
+  %650 = and i32 %648, %649
+  %651 = bitcast i32 %650 to float
+  %652 = bitcast float %651 to i32
+  %653 = icmp ne i32 %652, 0
+  br i1 %653, label %IF164, label %ENDIF163
+
+IF164:                                            ; preds = %ENDIF160
+  %654 = fmul float %202, 5.000000e-01
+  %655 = fcmp uge float 0x3FE4CCCCC0000000, %654
+  %656 = select i1 %655, float 0x3FE4CCCCC0000000, float %654
+  %657 = fcmp uge float %656, 0x3FD6666660000000
+  %658 = select i1 %657, float 0x3FD6666660000000, float %656
+  %659 = call float @llvm.AMDGPU.lrp(float %658, float %400, float %300)
+  %660 = call float @llvm.AMDGPU.lrp(float %658, float %401, float %301)
+  %661 = call float @llvm.AMDGPU.lrp(float %658, float %402, float %302)
+  %662 = call float @llvm.AMDGPU.lrp(float %658, float %403, float %303)
+  %663 = insertelement <4 x float> undef, float %329, i32 0
+  %664 = insertelement <4 x float> %663, float %330, i32 1
+  %665 = insertelement <4 x float> %664, float %331, i32 2
+  %666 = insertelement <4 x float> %665, float 0.000000e+00, i32 3
+  %667 = insertelement <4 x float> undef, float %63, i32 0
+  %668 = insertelement <4 x float> %667, float %65, i32 1
+  %669 = insertelement <4 x float> %668, float %67, i32 2
+  %670 = insertelement <4 x float> %669, float 0.000000e+00, i32 3
+  %671 = call float @llvm.AMDGPU.dp4(<4 x float> %666, <4 x float> %670)
+  %672 = fcmp uge float 0x3FE6666660000000, %671
+  %673 = select i1 %672, float 0x3FE6666660000000, float %671
+  %674 = fmul float %8, %673
+  %675 = fmul float %13, %673
+  %676 = fmul float %18, %673
+  %677 = insertelement <4 x float> undef, float %34, i32 0
+  %678 = insertelement <4 x float> %677, float %35, i32 1
+  %679 = insertelement <4 x float> %678, float %36, i32 2
+  %680 = insertelement <4 x float> %679, float 0.000000e+00, i32 3
+  %681 = insertelement <4 x float> undef, float %63, i32 0
+  %682 = insertelement <4 x float> %681, float %65, i32 1
+  %683 = insertelement <4 x float> %682, float %67, i32 2
+  %684 = insertelement <4 x float> %683, float 0.000000e+00, i32 3
+  %685 = call float @llvm.AMDGPU.dp4(<4 x float> %680, <4 x float> %684)
+  %686 = fcmp uge float 0x3FECCCCCC0000000, %685
+  %687 = select i1 %686, float 0x3FECCCCCC0000000, float %685
+  %688 = fmul float %674, %687
+  %689 = fmul float %675, %687
+  %690 = fmul float %676, %687
+  br label %ENDIF163
+
+ENDIF163:                                         ; preds = %ENDIF160, %IF164
+  %temp84.1 = phi float [ %659, %IF164 ], [ %temp84.0, %ENDIF160 ]
+  %temp85.1 = phi float [ %660, %IF164 ], [ %temp85.0, %ENDIF160 ]
+  %temp86.1 = phi float [ %661, %IF164 ], [ %temp86.0, %ENDIF160 ]
+  %temp87.1 = phi float [ %662, %IF164 ], [ %temp87.0, %ENDIF160 ]
+  %temp92.7 = phi float [ %688, %IF164 ], [ %temp92.6, %ENDIF160 ]
+  %temp93.1 = phi float [ %689, %IF164 ], [ %temp93.0, %ENDIF160 ]
+  %temp94.1 = phi float [ %690, %IF164 ], [ %temp94.0, %ENDIF160 ]
+  %691 = fcmp oge float %179, 2.300000e+03
+  %692 = sext i1 %691 to i32
+  %693 = bitcast i32 %692 to float
+  %694 = fcmp olt float %179, 2.480000e+03
+  %695 = sext i1 %694 to i32
+  %696 = bitcast i32 %695 to float
+  %697 = bitcast float %693 to i32
+  %698 = bitcast float %696 to i32
+  %699 = and i32 %697, %698
+  %700 = bitcast i32 %699 to float
+  %701 = bitcast float %700 to i32
+  %702 = icmp ne i32 %701, 0
+  br i1 %702, label %IF167, label %ENDIF166
+
+IF167:                                            ; preds = %ENDIF163
+  %703 = fmul float %202, 5.000000e-01
+  %704 = fcmp uge float 0x3FE4CCCCC0000000, %703
+  %705 = select i1 %704, float 0x3FE4CCCCC0000000, float %703
+  %706 = fcmp uge float %705, 0x3FD3333340000000
+  %707 = select i1 %706, float 0x3FD3333340000000, float %705
+  %708 = call float @llvm.AMDGPU.lrp(float %707, float %409, float %300)
+  %709 = call float @llvm.AMDGPU.lrp(float %707, float %410, float %301)
+  %710 = call float @llvm.AMDGPU.lrp(float %707, float %411, float %302)
+  %711 = call float @llvm.AMDGPU.lrp(float %707, float %412, float %303)
+  %712 = insertelement <4 x float> undef, float %329, i32 0
+  %713 = insertelement <4 x float> %712, float %330, i32 1
+  %714 = insertelement <4 x float> %713, float %331, i32 2
+  %715 = insertelement <4 x float> %714, float 0.000000e+00, i32 3
+  %716 = insertelement <4 x float> undef, float %63, i32 0
+  %717 = insertelement <4 x float> %716, float %65, i32 1
+  %718 = insertelement <4 x float> %717, float %67, i32 2
+  %719 = insertelement <4 x float> %718, float 0.000000e+00, i32 3
+  %720 = call float @llvm.AMDGPU.dp4(<4 x float> %715, <4 x float> %719)
+  %721 = fcmp uge float 0x3FEB333340000000, %720
+  %722 = select i1 %721, float 0x3FEB333340000000, float %720
+  %723 = fmul float %8, %722
+  %724 = fmul float %13, %722
+  %725 = fmul float %18, %722
+  %726 = insertelement <4 x float> undef, float %34, i32 0
+  %727 = insertelement <4 x float> %726, float %35, i32 1
+  %728 = insertelement <4 x float> %727, float %36, i32 2
+  %729 = insertelement <4 x float> %728, float 0.000000e+00, i32 3
+  %730 = insertelement <4 x float> undef, float %63, i32 0
+  %731 = insertelement <4 x float> %730, float %65, i32 1
+  %732 = insertelement <4 x float> %731, float %67, i32 2
+  %733 = insertelement <4 x float> %732, float 0.000000e+00, i32 3
+  %734 = call float @llvm.AMDGPU.dp4(<4 x float> %729, <4 x float> %733)
+  %735 = fcmp uge float 0x3FECCCCCC0000000, %734
+  %736 = select i1 %735, float 0x3FECCCCCC0000000, float %734
+  %737 = fmul float %723, %736
+  %738 = fmul float %724, %736
+  %739 = fmul float %725, %736
+  br label %ENDIF166
+
+ENDIF166:                                         ; preds = %ENDIF163, %IF167
+  %temp84.2 = phi float [ %708, %IF167 ], [ %temp84.1, %ENDIF163 ]
+  %temp85.2 = phi float [ %709, %IF167 ], [ %temp85.1, %ENDIF163 ]
+  %temp86.2 = phi float [ %710, %IF167 ], [ %temp86.1, %ENDIF163 ]
+  %temp87.2 = phi float [ %711, %IF167 ], [ %temp87.1, %ENDIF163 ]
+  %temp92.8 = phi float [ %737, %IF167 ], [ %temp92.7, %ENDIF163 ]
+  %temp93.2 = phi float [ %738, %IF167 ], [ %temp93.1, %ENDIF163 ]
+  %temp94.2 = phi float [ %739, %IF167 ], [ %temp94.1, %ENDIF163 ]
+  %740 = fcmp oge float %179, 2.480000e+03
+  %741 = sext i1 %740 to i32
+  %742 = bitcast i32 %741 to float
+  %743 = fcmp olt float %179, 2.530000e+03
+  %744 = sext i1 %743 to i32
+  %745 = bitcast i32 %744 to float
+  %746 = bitcast float %742 to i32
+  %747 = bitcast float %745 to i32
+  %748 = and i32 %746, %747
+  %749 = bitcast i32 %748 to float
+  %750 = bitcast float %749 to i32
+  %751 = icmp ne i32 %750, 0
+  br i1 %751, label %IF170, label %ENDIF169
+
+IF170:                                            ; preds = %ENDIF166
+  %752 = fmul float %202, 5.000000e-01
+  %753 = fcmp uge float 0x3FE4CCCCC0000000, %752
+  %754 = select i1 %753, float 0x3FE4CCCCC0000000, float %752
+  %755 = fcmp uge float %754, 0x3FC99999A0000000
+  %756 = select i1 %755, float 0x3FC99999A0000000, float %754
+  %757 = call float @llvm.AMDGPU.lrp(float %756, float %409, float %300)
+  %758 = call float @llvm.AMDGPU.lrp(float %756, float %410, float %301)
+  %759 = call float @llvm.AMDGPU.lrp(float %756, float %411, float %302)
+  %760 = call float @llvm.AMDGPU.lrp(float %756, float %412, float %303)
+  %761 = insertelement <4 x float> undef, float %329, i32 0
+  %762 = insertelement <4 x float> %761, float %330, i32 1
+  %763 = insertelement <4 x float> %762, float %331, i32 2
+  %764 = insertelement <4 x float> %763, float 0.000000e+00, i32 3
+  %765 = insertelement <4 x float> undef, float %63, i32 0
+  %766 = insertelement <4 x float> %765, float %65, i32 1
+  %767 = insertelement <4 x float> %766, float %67, i32 2
+  %768 = insertelement <4 x float> %767, float 0.000000e+00, i32 3
+  %769 = call float @llvm.AMDGPU.dp4(<4 x float> %764, <4 x float> %768)
+  %770 = fcmp uge float 0x3FEB333340000000, %769
+  %771 = select i1 %770, float 0x3FEB333340000000, float %769
+  %772 = fmul float %8, %771
+  %773 = fmul float %13, %771
+  %774 = fmul float %18, %771
+  %775 = insertelement <4 x float> undef, float %34, i32 0
+  %776 = insertelement <4 x float> %775, float %35, i32 1
+  %777 = insertelement <4 x float> %776, float %36, i32 2
+  %778 = insertelement <4 x float> %777, float 0.000000e+00, i32 3
+  %779 = insertelement <4 x float> undef, float %63, i32 0
+  %780 = insertelement <4 x float> %779, float %65, i32 1
+  %781 = insertelement <4 x float> %780, float %67, i32 2
+  %782 = insertelement <4 x float> %781, float 0.000000e+00, i32 3
+  %783 = call float @llvm.AMDGPU.dp4(<4 x float> %778, <4 x float> %782)
+  %784 = fcmp uge float 0x3FECCCCCC0000000, %783
+  %785 = select i1 %784, float 0x3FECCCCCC0000000, float %783
+  %786 = fmul float %772, %785
+  %787 = fmul float %773, %785
+  %788 = fmul float %774, %785
+  br label %ENDIF169
+
+ENDIF169:                                         ; preds = %ENDIF166, %IF170
+  %temp84.3 = phi float [ %757, %IF170 ], [ %temp84.2, %ENDIF166 ]
+  %temp85.3 = phi float [ %758, %IF170 ], [ %temp85.2, %ENDIF166 ]
+  %temp86.3 = phi float [ %759, %IF170 ], [ %temp86.2, %ENDIF166 ]
+  %temp87.3 = phi float [ %760, %IF170 ], [ %temp87.2, %ENDIF166 ]
+  %temp92.9 = phi float [ %786, %IF170 ], [ %temp92.8, %ENDIF166 ]
+  %temp93.3 = phi float [ %787, %IF170 ], [ %temp93.2, %ENDIF166 ]
+  %temp94.3 = phi float [ %788, %IF170 ], [ %temp94.2, %ENDIF166 ]
+  %789 = fcmp oge float %179, 2.530000e+03
+  %790 = sext i1 %789 to i32
+  %791 = bitcast i32 %790 to float
+  %792 = fcmp olt float %179, 2.670000e+03
+  %793 = sext i1 %792 to i32
+  %794 = bitcast i32 %793 to float
+  %795 = bitcast float %791 to i32
+  %796 = bitcast float %794 to i32
+  %797 = and i32 %795, %796
+  %798 = bitcast i32 %797 to float
+  %799 = bitcast float %798 to i32
+  %800 = icmp ne i32 %799, 0
+  br i1 %800, label %IF173, label %ENDIF172
+
+IF173:                                            ; preds = %ENDIF169
+  %801 = fmul float %202, 5.000000e-01
+  %802 = fcmp uge float 0x3FE4CCCCC0000000, %801
+  %803 = select i1 %802, float 0x3FE4CCCCC0000000, float %801
+  %804 = fcmp uge float %803, 0x3FB99999A0000000
+  %805 = select i1 %804, float 0x3FB99999A0000000, float %803
+  %806 = call float @llvm.AMDGPU.lrp(float %805, float %400, float %300)
+  %807 = call float @llvm.AMDGPU.lrp(float %805, float %401, float %301)
+  %808 = call float @llvm.AMDGPU.lrp(float %805, float %402, float %302)
+  %809 = call float @llvm.AMDGPU.lrp(float %805, float %403, float %303)
+  %810 = insertelement <4 x float> undef, float %329, i32 0
+  %811 = insertelement <4 x float> %810, float %330, i32 1
+  %812 = insertelement <4 x float> %811, float %331, i32 2
+  %813 = insertelement <4 x float> %812, float 0.000000e+00, i32 3
+  %814 = insertelement <4 x float> undef, float %63, i32 0
+  %815 = insertelement <4 x float> %814, float %65, i32 1
+  %816 = insertelement <4 x float> %815, float %67, i32 2
+  %817 = insertelement <4 x float> %816, float 0.000000e+00, i32 3
+  %818 = call float @llvm.AMDGPU.dp4(<4 x float> %813, <4 x float> %817)
+  %819 = fcmp uge float 0x3FEB333340000000, %818
+  %820 = select i1 %819, float 0x3FEB333340000000, float %818
+  %821 = fmul float %8, %820
+  %822 = fmul float %13, %820
+  %823 = fmul float %18, %820
+  %824 = insertelement <4 x float> undef, float %34, i32 0
+  %825 = insertelement <4 x float> %824, float %35, i32 1
+  %826 = insertelement <4 x float> %825, float %36, i32 2
+  %827 = insertelement <4 x float> %826, float 0.000000e+00, i32 3
+  %828 = insertelement <4 x float> undef, float %63, i32 0
+  %829 = insertelement <4 x float> %828, float %65, i32 1
+  %830 = insertelement <4 x float> %829, float %67, i32 2
+  %831 = insertelement <4 x float> %830, float 0.000000e+00, i32 3
+  %832 = call float @llvm.AMDGPU.dp4(<4 x float> %827, <4 x float> %831)
+  %833 = fcmp uge float 0x3FECCCCCC0000000, %832
+  %834 = select i1 %833, float 0x3FECCCCCC0000000, float %832
+  %835 = fmul float %821, %834
+  %836 = fmul float %822, %834
+  %837 = fmul float %823, %834
+  br label %ENDIF172
+
+ENDIF172:                                         ; preds = %ENDIF169, %IF173
+  %temp84.4 = phi float [ %806, %IF173 ], [ %temp84.3, %ENDIF169 ]
+  %temp85.4 = phi float [ %807, %IF173 ], [ %temp85.3, %ENDIF169 ]
+  %temp86.4 = phi float [ %808, %IF173 ], [ %temp86.3, %ENDIF169 ]
+  %temp87.4 = phi float [ %809, %IF173 ], [ %temp87.3, %ENDIF169 ]
+  %temp92.10 = phi float [ %835, %IF173 ], [ %temp92.9, %ENDIF169 ]
+  %temp93.4 = phi float [ %836, %IF173 ], [ %temp93.3, %ENDIF169 ]
+  %temp94.4 = phi float [ %837, %IF173 ], [ %temp94.3, %ENDIF169 ]
+  %838 = fcmp oge float %179, 2.670000e+03
+  %839 = sext i1 %838 to i32
+  %840 = bitcast i32 %839 to float
+  %841 = bitcast float %840 to i32
+  %842 = icmp ne i32 %841, 0
+  br i1 %842, label %IF176, label %ENDIF175
+
+IF176:                                            ; preds = %ENDIF172
+  %843 = fmul float %202, 0x3FB99999A0000000
+  %844 = fcmp uge float 0.000000e+00, %843
+  %845 = select i1 %844, float 0.000000e+00, float %843
+  %846 = fcmp uge float %845, 0x3FD99999A0000000
+  %847 = select i1 %846, float 0x3FD99999A0000000, float %845
+  %848 = call float @llvm.AMDGPU.lrp(float %847, float %400, float %300)
+  %849 = call float @llvm.AMDGPU.lrp(float %847, float %401, float %301)
+  %850 = call float @llvm.AMDGPU.lrp(float %847, float %402, float %302)
+  %851 = call float @llvm.AMDGPU.lrp(float %847, float %403, float %303)
+  %852 = insertelement <4 x float> undef, float %329, i32 0
+  %853 = insertelement <4 x float> %852, float %330, i32 1
+  %854 = insertelement <4 x float> %853, float %331, i32 2
+  %855 = insertelement <4 x float> %854, float 0.000000e+00, i32 3
+  %856 = insertelement <4 x float> undef, float %63, i32 0
+  %857 = insertelement <4 x float> %856, float %65, i32 1
+  %858 = insertelement <4 x float> %857, float %67, i32 2
+  %859 = insertelement <4 x float> %858, float 0.000000e+00, i32 3
+  %860 = call float @llvm.AMDGPU.dp4(<4 x float> %855, <4 x float> %859)
+  %861 = fcmp uge float 0x3FEB333340000000, %860
+  %862 = select i1 %861, float 0x3FEB333340000000, float %860
+  %863 = fmul float %8, %862
+  %864 = fmul float %13, %862
+  %865 = fmul float %18, %862
+  %866 = insertelement <4 x float> undef, float %34, i32 0
+  %867 = insertelement <4 x float> %866, float %35, i32 1
+  %868 = insertelement <4 x float> %867, float %36, i32 2
+  %869 = insertelement <4 x float> %868, float 0.000000e+00, i32 3
+  %870 = insertelement <4 x float> undef, float %63, i32 0
+  %871 = insertelement <4 x float> %870, float %65, i32 1
+  %872 = insertelement <4 x float> %871, float %67, i32 2
+  %873 = insertelement <4 x float> %872, float 0.000000e+00, i32 3
+  %874 = call float @llvm.AMDGPU.dp4(<4 x float> %869, <4 x float> %873)
+  %875 = fcmp uge float 0x3FECCCCCC0000000, %874
+  %876 = select i1 %875, float 0x3FECCCCCC0000000, float %874
+  %877 = fmul float %863, %876
+  %878 = fmul float %864, %876
+  %879 = fmul float %865, %876
+  br label %ENDIF175
+
+ENDIF175:                                         ; preds = %ENDIF172, %IF176
+  %temp84.5 = phi float [ %848, %IF176 ], [ %temp84.4, %ENDIF172 ]
+  %temp85.5 = phi float [ %849, %IF176 ], [ %temp85.4, %ENDIF172 ]
+  %temp86.5 = phi float [ %850, %IF176 ], [ %temp86.4, %ENDIF172 ]
+  %temp87.5 = phi float [ %851, %IF176 ], [ %temp87.4, %ENDIF172 ]
+  %temp92.11 = phi float [ %877, %IF176 ], [ %temp92.10, %ENDIF172 ]
+  %temp93.5 = phi float [ %878, %IF176 ], [ %temp93.4, %ENDIF172 ]
+  %temp94.5 = phi float [ %879, %IF176 ], [ %temp94.4, %ENDIF172 ]
+  %880 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %881 = extractelement <4 x float> %880, i32 0
+  %882 = fcmp olt float %881, %179
+  %883 = sext i1 %882 to i32
+  %884 = bitcast i32 %883 to float
+  %885 = bitcast float %884 to i32
+  %886 = icmp ne i32 %885, 0
+  br i1 %886, label %IF179, label %ENDIF178
+
+IF179:                                            ; preds = %ENDIF175
+  %887 = fadd float %202, 1.000000e+00
+  %888 = fadd float %202, 1.000000e+00
+  %889 = fadd float %202, 1.000000e+00
+  %890 = insertelement <4 x float> undef, float %43, i32 0
+  %891 = insertelement <4 x float> %890, float %44, i32 1
+  %892 = insertelement <4 x float> %891, float %45, i32 2
+  %893 = insertelement <4 x float> %892, float 0.000000e+00, i32 3
+  %894 = insertelement <4 x float> undef, float %43, i32 0
+  %895 = insertelement <4 x float> %894, float %44, i32 1
+  %896 = insertelement <4 x float> %895, float %45, i32 2
+  %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
+  %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
+  %899 = call float @llvm.AMDGPU.rsq(float %898)
+  %900 = fmul float %45, %899
+  %901 = call float @fabs(float %900)
+  %902 = fmul float %176, 0x3FECCCCCC0000000
+  %903 = fadd float %902, %901
+  %904 = fadd float %903, 0xBFEFAE1480000000
+  %905 = fmul float %904, 0xC043FFFE20000000
+  %906 = call float @llvm.AMDIL.clamp.(float %905, float 0.000000e+00, float 1.000000e+00)
+  %907 = fmul float 2.000000e+00, %906
+  %908 = fsub float -0.000000e+00, %907
+  %909 = fadd float 3.000000e+00, %908
+  %910 = fmul float %906, %909
+  %911 = fmul float %906, %910
+  %912 = call float @llvm.AMDGPU.lrp(float %911, float %temp84.5, float %887)
+  %913 = call float @llvm.AMDGPU.lrp(float %911, float %temp85.5, float %888)
+  %914 = call float @llvm.AMDGPU.lrp(float %911, float %temp86.5, float %889)
+  %915 = call float @llvm.AMDGPU.lrp(float %911, float %temp87.5, float 0.000000e+00)
+  %916 = fmul float %202, 5.000000e-01
+  %917 = fcmp uge float 0x3FE4CCCCC0000000, %916
+  %918 = select i1 %917, float 0x3FE4CCCCC0000000, float %916
+  %919 = fcmp uge float %918, 0x3FE3333340000000
+  %920 = select i1 %919, float 0x3FE3333340000000, float %918
+  %921 = call float @llvm.AMDGPU.lrp(float %920, float %912, float %temp84.5)
+  %922 = call float @llvm.AMDGPU.lrp(float %920, float %913, float %temp85.5)
+  %923 = call float @llvm.AMDGPU.lrp(float %920, float %914, float %temp86.5)
+  %924 = call float @llvm.AMDGPU.lrp(float %920, float %915, float %temp87.5)
+  %925 = insertelement <4 x float> undef, float %329, i32 0
+  %926 = insertelement <4 x float> %925, float %330, i32 1
+  %927 = insertelement <4 x float> %926, float %331, i32 2
+  %928 = insertelement <4 x float> %927, float 0.000000e+00, i32 3
+  %929 = insertelement <4 x float> undef, float %63, i32 0
+  %930 = insertelement <4 x float> %929, float %65, i32 1
+  %931 = insertelement <4 x float> %930, float %67, i32 2
+  %932 = insertelement <4 x float> %931, float 0.000000e+00, i32 3
+  %933 = call float @llvm.AMDGPU.dp4(<4 x float> %928, <4 x float> %932)
+  %934 = fcmp uge float 0x3FE99999A0000000, %933
+  %935 = select i1 %934, float 0x3FE99999A0000000, float %933
+  %936 = fmul float %8, %935
+  %937 = fmul float %13, %935
+  %938 = fmul float %18, %935
+  %939 = insertelement <4 x float> undef, float %34, i32 0
+  %940 = insertelement <4 x float> %939, float %35, i32 1
+  %941 = insertelement <4 x float> %940, float %36, i32 2
+  %942 = insertelement <4 x float> %941, float 0.000000e+00, i32 3
+  %943 = insertelement <4 x float> undef, float %63, i32 0
+  %944 = insertelement <4 x float> %943, float %65, i32 1
+  %945 = insertelement <4 x float> %944, float %67, i32 2
+  %946 = insertelement <4 x float> %945, float 0.000000e+00, i32 3
+  %947 = call float @llvm.AMDGPU.dp4(<4 x float> %942, <4 x float> %946)
+  %948 = fcmp uge float 0x3FECCCCCC0000000, %947
+  %949 = select i1 %948, float 0x3FECCCCCC0000000, float %947
+  %950 = fmul float %936, %949
+  %951 = fmul float %937, %949
+  %952 = fmul float %938, %949
+  br label %ENDIF178
+
+ENDIF178:                                         ; preds = %ENDIF175, %IF179
+  %temp84.6 = phi float [ %921, %IF179 ], [ %temp84.5, %ENDIF175 ]
+  %temp85.6 = phi float [ %922, %IF179 ], [ %temp85.5, %ENDIF175 ]
+  %temp86.6 = phi float [ %923, %IF179 ], [ %temp86.5, %ENDIF175 ]
+  %temp87.6 = phi float [ %924, %IF179 ], [ %temp87.5, %ENDIF175 ]
+  %temp92.12 = phi float [ %950, %IF179 ], [ %temp92.11, %ENDIF175 ]
+  %temp93.6 = phi float [ %951, %IF179 ], [ %temp93.5, %ENDIF175 ]
+  %temp94.6 = phi float [ %952, %IF179 ], [ %temp94.5, %ENDIF175 ]
+  %953 = fmul float %55, %temp92.12
+  %954 = fmul float %57, %temp93.6
+  %955 = fmul float %59, %temp94.6
+  %956 = fmul float %61, 0.000000e+00
+  %957 = fmul float %temp84.6, %953
+  %958 = fmul float %temp85.6, %954
+  %959 = fmul float %temp86.6, %955
+  %960 = fmul float %temp87.6, %956
+  %961 = fmul float %2, -2.000000e+00
+  %962 = fadd float %961, 1.000000e+00
+  %963 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %964 = extractelement <4 x float> %963, i32 2
+  %965 = fsub float -0.000000e+00, %964
+  %966 = fadd float %962, %965
+  %967 = fdiv float 1.000000e+00, %966
+  %968 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+  %969 = extractelement <4 x float> %968, i32 2
+  %970 = fmul float %969, %967
+  %971 = fsub float -0.000000e+00, %53
+  %972 = fmul float %971, %53
+  %973 = fmul float %972, %970
+  %974 = fmul float %973, %970
+  %975 = fmul float %974, 0x3FF7154760000000
+  %976 = call float @llvm.AMDIL.exp.(float %975)
+  %977 = fcmp oeq float %53, 1.000000e+00
+  %978 = sext i1 %977 to i32
+  %979 = bitcast i32 %978 to float
+  %980 = bitcast float %979 to i32
+  %981 = icmp ne i32 %980, 0
+  %.184 = select i1 %981, float 1.000000e+00, float %976
+  %982 = call float @llvm.AMDGPU.lrp(float %.184, float %957, float %47)
+  %983 = call float @llvm.AMDGPU.lrp(float %.184, float %958, float %49)
+  %984 = call float @llvm.AMDGPU.lrp(float %.184, float %959, float %51)
+  %985 = insertelement <4 x float> undef, float %982, i32 0
+  %986 = insertelement <4 x float> %985, float %983, i32 1
+  %987 = insertelement <4 x float> %986, float %984, i32 2
+  %988 = insertelement <4 x float> %987, float %960, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %988, i32 0, i32 0)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #1
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
+
+; Function Attrs: readonly
+declare float @fabs(float) #2
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.exp.(float) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.lrp(float, float, float) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.clamp.(float, float, float) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+attributes #2 = { readonly }
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
new file mode 100644
index 000000000000..bccc41638570
--- /dev/null
+++ b/test/CodeGen/R600/bitcast.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; This test just checks that the compiler doesn't crash.
+; CHECK-LABEL: @v32i8_to_v8i32
+; CHECK: S_ENDPGM
+
+define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
+entry:
+  %1 = load <32 x i8> addrspace(2)* %0
+  %2 = bitcast <32 x i8> %1 to <8 x i32>
+  %3 = extractelement <8 x i32> %2, i32 1
+  %4 = icmp ne i32 %3, 0
+  %5 = select i1 %4, float 0.0, float 1.0
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+
diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll
new file mode 100644
index 000000000000..8179de13e869
--- /dev/null
+++ b/test/CodeGen/R600/build_vector.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK: @build_vector2
+; R600-CHECK: MOV
+; R600-CHECK: MOV
+; R600-CHECK-NOT: MOV
+; SI-CHECK: @build_vector2
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
+define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
+entry:
+  store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @build_vector4
+; R600-CHECK: MOV
+; R600-CHECK: MOV
+; R600-CHECK: MOV
+; R600-CHECK: MOV
+; R600-CHECK-NOT: MOV
+; SI-CHECK: @build_vector4
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
+; SI-CHECK-DAG: V_MOV_B32_e32 v[[W:[0-9]]], 8
+; SI-CHECK: BUFFER_STORE_DWORDX4 v{{\[}}[[X]]:[[W]]{{\]}}
+define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
+entry:
+  store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll
index e152bf6d559d..f7c4e5b22cb1 100644
--- a/test/CodeGen/R600/call_fs.ll
+++ b/test/CodeGen/R600/call_fs.ll
@@ -3,8 +3,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s
 
 ; EG-CHECK: @call_fs
+; EG-CHECK: .long 257
 ; EG-CHECK: CALL_FS  ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
 ; R600-CHECK: @call_fs
+; R600-CHECK: .long 257
 ; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
 
 
diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/R600/combine_vloads.ll
new file mode 100644
index 000000000000..f8ec712c1ec8
--- /dev/null
+++ b/test/CodeGen/R600/combine_vloads.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+
+;
+; kernel void combine_vloads(global char8* src, global char8* result) {
+;   for (int i = 0; i < 1024; ++i)
+;     result[i] = src[0] + src[1] + src[2] + src[3];
+; }
+;
+
+
+; 128-bit loads instead of many 8-bit
+; EG-LABEL: @combine_vloads:
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
+entry:
+  br label %for.body
+
+for.exit:                                         ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
+  %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
+  %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
+  %vecload2 = load <8 x i32> addrspace(1)* %0, align 32
+  %1 = bitcast <8 x i32> %vecload2 to <32 x i8>
+  %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp9 = add nsw <8 x i8> %tmp5, %tmp8
+  %tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %tmp13 = add nsw <8 x i8> %tmp9, %tmp12
+  %tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %tmp17 = add nsw <8 x i8> %tmp13, %tmp16
+  %scevgep = getelementptr <8 x i8> addrspace(1)* %result, i32 %i.01
+  %2 = bitcast <8 x i8> %tmp17 to <2 x i32>
+  %3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)*
+  store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8
+  %tmp19 = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %tmp19, 1024
+  br i1 %exitcond, label %for.exit, label %for.body
+}
diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll
new file mode 100644
index 000000000000..99f0d99b3529
--- /dev/null
+++ b/test/CodeGen/R600/complex-folding.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main
+; CHECK-NOT: MOV
+define void @main(<4 x float> inreg %reg0) #0 {
+entry:
+  %0 = extractelement <4 x float> %reg0, i32 0
+  %1 = call float @fabs(float %0)
+  %2 = fptoui float %1 to i32
+  %3 = bitcast i32 %2 to float
+  %4 = insertelement <4 x float> undef, float %3, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
+  ret void
+}
+
+declare float @fabs(float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/dot4-folding.ll b/test/CodeGen/R600/dot4-folding.ll
new file mode 100644
index 000000000000..3e8330f9b3ed
--- /dev/null
+++ b/test/CodeGen/R600/dot4-folding.ll
@@ -0,0 +1,27 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Exactly one constant vector can be folded into dot4, which means exactly
+; 4 MOV instructions
+; CHECK: @main
+; CHECK: MOV
+; CHECK: MOV
+; CHECK: MOV
+; CHECK: MOV
+; CHECK-NOT: MOV
+; CHECK-NOT: MOV
+; CHECK-NOT: MOV
+; CHECK-NOT: MOV
+
+define void @main(float addrspace(1)* %out) {
+main_body:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
+  %3 = insertelement <4 x float> undef, float %2, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index f460f13d53e0..93851504bd4e 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s
 
 ; ELF-CHECK: Format: ELF32
 ; ELF-CHECK: Name: .AMDGPU.config
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
new file mode 100644
index 000000000000..aa660b38838d
--- /dev/null
+++ b/test/CodeGen/R600/extload.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+
+; EG-LABEL: @anyext_load_i8:
+; EG: AND_INT
+; EG-NEXT: 255
+define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
+  %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
+  %load = load i32 addrspace(1)* %cast, align 1
+  %x = bitcast i32 %load to <4 x i8>
+  %castOut = bitcast i8 addrspace(1)* %out to <4 x i8> addrspace(1)*
+  store <4 x i8> %x, <4 x i8> addrspace(1)* %castOut, align 1
+  ret void
+}
+
+; EG-LABEL: @anyext_load_i16:
+; EG: AND_INT
+; EG: LSHL
+; EG: 65535
+define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
+  %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
+  %load = load i32 addrspace(1)* %cast, align 1
+  %x = bitcast i32 %load to <2 x i16>
+  %castOut = bitcast i16 addrspace(1)* %out to <2 x i16> addrspace(1)*
+  store <2 x i16> %x, <2 x i16> addrspace(1)* %castOut, align 1
+  ret void
+}
+
+; EG-LABEL: @anyext_load_lds_i8:
+; EG: AND_INT
+; EG-NEXT: 255
+define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
+  %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
+  %load = load i32 addrspace(3)* %cast, align 1
+  %x = bitcast i32 %load to <4 x i8>
+  %castOut = bitcast i8 addrspace(3)* %out to <4 x i8> addrspace(3)*
+  store <4 x i8> %x, <4 x i8> addrspace(3)* %castOut, align 1
+  ret void
+}
+
+; EG-LABEL: @anyext_load_lds_i16:
+; EG: AND_INT
+; EG: LSHL
+; EG: 65535
+define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
+  %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
+  %load = load i32 addrspace(3)* %cast, align 1
+  %x = bitcast i32 %load to <2 x i16>
+  %castOut = bitcast i16 addrspace(3)* %out to <2 x i16> addrspace(3)*
+  store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
+  ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index 85f2882289fa..a5f5df96b5d9 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -1,16 +1,54 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-;CHECK: MOV * T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}}
+; DAGCombiner will transform:
+; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
+; unless isFabsFree returns true
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @fabs( float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-   ret void
+; R600-CHECK-LABEL: @fabs_free
+; R600-CHECK-NOT: AND
+; R600-CHECK: |PV.{{[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_free
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+
+define void @fabs_free(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = bitcast i32 %in to float
+  %1 = call float @fabs(float %0)
+  store float %1, float addrspace(1)* %out
+  ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+; R600-CHECK-LABEL: @fabs_v2
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v2
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
 
-declare void @llvm.AMDGPU.store.output(float, i32)
+; R600-CHECK-LABEL: @fabs_v4
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v4
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
 
 declare float @fabs(float ) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index 9a672329e75c..f467bb785779 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -1,26 +1,40 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fadd_f32
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fadd_f32() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fadd float %r0, %r1
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+; R600-CHECK: @fadd_f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI-CHECK: @fadd_f32
+; SI-CHECK: V_ADD_F32
+define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
+entry:
+   %0 = fadd float %a, %b
+   store float %0, float addrspace(1)* %out
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
-; CHECK: @fadd_v4f32
-; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @fadd_v2f32
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; SI-CHECK: @fadd_v2f32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
+define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+entry:
+  %0 = fadd <2 x float> %a, %b
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
 
+; R600-CHECK: @fadd_v4f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fadd_v4f32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
+; SI-CHECK: V_ADD_F32
 define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
new file mode 100644
index 000000000000..48cd3cfc8dfb
--- /dev/null
+++ b/test/CodeGen/R600/fadd64.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fadd_f64
+; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
+
+define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fadd double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
index 7373a214790e..1d4e323d3abf 100644
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -2,7 +2,7 @@
 
 ;Not checking arguments 2 and 3 to CNDE, because they may change between
 ;registers and literal.x depending on what the optimizer does.
-;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index dc3a779dd609..c76a75876565 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ; CHECK: @fcmp_sext
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: SETE_DX10  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
new file mode 100644
index 000000000000..bcc7a8c8567a
--- /dev/null
+++ b/test/CodeGen/R600/fcmp64.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; CHECK: @flt_f64
+; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+
+define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fcmp ult double %r0, %r1
+   %r3 = select i1 %r2, double %r0, double %r1
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+; CHECK: @fle_f64
+; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+
+define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fcmp ule double %r0, %r1
+   %r3 = select i1 %r2, double %r0, double %r1
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+; CHECK: @fgt_f64
+; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+
+define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fcmp ugt double %r0, %r1
+   %r3 = select i1 %r2, double %r0, double %r1
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+; CHECK: @fge_f64
+; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+
+define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fcmp uge double %r0, %r1
+   %r3 = select i1 %r2, double %r0, double %r1
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+; CHECK: @fne_f64
+; CHECK: V_CMP_NEQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+
+define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fcmp une double %r0, %r1
+   %r3 = select i1 %r2, double %r0, double %r1
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+; CHECK: @feq_f64
+; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+
+define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fcmp ueq double %r0, %r1
+   %r3 = select i1 %r2, double %r0, double %r1
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
new file mode 100644
index 000000000000..5c5ee7e9091b
--- /dev/null
+++ b/test/CodeGen/R600/fconst64.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fconst_f64
+; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00
+; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00
+
+define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+   %r1 = load double addrspace(1)* %in
+   %r2 = fadd double %r1, 5.000000e+00
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
index 2e68e36be4d8..3d21524de0f4 100644
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -1,15 +1,46 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; These tests check that fdiv is expanded correctly and also test that the
+; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
+; instruction groups.
 
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+; R600-CHECK: @fdiv_v2f32
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
+; SI-CHECK: @fdiv_v2f32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+entry:
+  %0 = fdiv <2 x float> %a, %b
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @fdiv_v4f32
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
+; SI-CHECK: @fdiv_v4f32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+; SI-CHECK-DAG: V_RCP_F32
+; SI-CHECK-DAG: V_MUL_F32
+define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
   %b = load <4 x float> addrspace(1) * %b_ptr
diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll
new file mode 100644
index 000000000000..79b5c8bb96ee
--- /dev/null
+++ b/test/CodeGen/R600/fdiv64.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fdiv_f64
+; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+
+define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fdiv double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fetch-limits.r600.ll b/test/CodeGen/R600/fetch-limits.r600.ll
new file mode 100644
index 000000000000..f78d1d968e5d
--- /dev/null
+++ b/test/CodeGen/R600/fetch-limits.r600.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=r600 -mcpu=r600 | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=rs880 | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=rv670 | FileCheck %s
+
+; R600 supports 8 fetches in a clause
+; CHECK: @fetch_limits_r600
+; CHECK: Fetch clause
+; CHECK: Fetch clause
+
+define void @fetch_limits_r600() #0 {
+entry:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
+  %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
+  %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
+  %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %3, i32 0, i32 0, i32 1)
+  %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %4, i32 0, i32 0, i32 1)
+  %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %5, i32 0, i32 0, i32 1)
+  %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %6, i32 0, i32 0, i32 1)
+  %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %7, i32 0, i32 0, i32 1)
+  %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
+  %a = fadd <4 x float> %res0, %res1
+  %b = fadd <4 x float> %res2, %res3
+  %c = fadd <4 x float> %res4, %res5
+  %d = fadd <4 x float> %res6, %res7
+  %e = fadd <4 x float> %res8, %a
+
+  %bc = fadd <4 x float> %b, %c
+  %de = fadd <4 x float> %d, %e
+
+  %bcde = fadd <4 x float> %bc, %de
+
+  call void @llvm.R600.store.swizzle(<4 x float> %bcde, i32 0, i32 1)
+  ret void
+}
+
+attributes #0 = { "ShaderType"="0" } ; Pixel Shader
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/fetch-limits.r700+.ll b/test/CodeGen/R600/fetch-limits.r700+.ll
new file mode 100644
index 000000000000..1a8a43fccc72
--- /dev/null
+++ b/test/CodeGen/R600/fetch-limits.r700+.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=r600 -mcpu=rv710 | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=rv730 | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=rv770 | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=cedar | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=sumo | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=juniper | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=barts | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=turks | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=caicos | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
+
+; r700+ supports 16 fetches in a clause
+; CHECK: @fetch_limits_r700
+; CHECK: Fetch clause
+; CHECK: Fetch clause
+
+define void @fetch_limits_r700() #0 {
+entry:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %9 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %11 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %14 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %15 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
+  %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
+  %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
+  %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %3, i32 0, i32 0, i32 1)
+  %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %4, i32 0, i32 0, i32 1)
+  %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %5, i32 0, i32 0, i32 1)
+  %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %6, i32 0, i32 0, i32 1)
+  %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %7, i32 0, i32 0, i32 1)
+  %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
+  %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %9, i32 0, i32 0, i32 1)
+  %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %10, i32 0, i32 0, i32 1)
+  %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %11, i32 0, i32 0, i32 1)
+  %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %12, i32 0, i32 0, i32 1)
+  %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %13, i32 0, i32 0, i32 1)
+  %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %14, i32 0, i32 0, i32 1)
+  %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %15, i32 0, i32 0, i32 1)
+  %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %16, i32 0, i32 0, i32 1)
+  %a = fadd <4 x float> %res0, %res1
+  %b = fadd <4 x float> %res2, %res3
+  %c = fadd <4 x float> %res4, %res5
+  %d = fadd <4 x float> %res6, %res7
+  %e = fadd <4 x float> %res8, %res9
+  %f = fadd <4 x float> %res10, %res11
+  %g = fadd <4 x float> %res12, %res13
+  %h = fadd <4 x float> %res14, %res15
+  %i = fadd <4 x float> %res16, %a
+
+  %bc = fadd <4 x float> %b, %c
+  %de = fadd <4 x float> %d, %e
+  %fg = fadd <4 x float> %f, %g
+  %hi = fadd <4 x float> %h, %i
+
+  %bcde = fadd <4 x float> %bc, %de
+  %fghi = fadd <4 x float> %fg, %hi
+
+  %bcdefghi = fadd <4 x float> %bcde, %fghi
+  call void @llvm.R600.store.swizzle(<4 x float> %bcdefghi, i32 0, i32 1)
+  ret void
+}
+
+attributes #0 = { "ShaderType"="0" } ; Pixel Shader
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
index 877d69a65b43..67e86c41fdcf 100644
--- a/test/CodeGen/R600/floor.ll
+++ b/test/CodeGen/R600/floor.ll
@@ -2,15 +2,15 @@
 
 ;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = call float @floor(float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @floor(float) readonly
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll
new file mode 100644
index 000000000000..51e9d29a5ca2
--- /dev/null
+++ b/test/CodeGen/R600/fma.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fma_f32
+; CHECK: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                     float addrspace(1)* %in2, float addrspace(1)* %in3) {
+   %r0 = load float addrspace(1)* %in1
+   %r1 = load float addrspace(1)* %in2
+   %r2 = load float addrspace(1)* %in3
+   %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
+   store float %r3, float addrspace(1)* %out
+   ret void
+}
+
+declare float @llvm.fma.f32(float, float, float)
+
+; CHECK: @fma_f64
+; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = load double addrspace(1)* %in3
+   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+declare double @llvm.fma.f64(double, double, double)
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
index 62001edc3aa5..935e35123f45 100644
--- a/test/CodeGen/R600/fmad.ll
+++ b/test/CodeGen/R600/fmad.ll
@@ -1,19 +1,19 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: MULADD_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = call float @llvm.R600.load.input(i32 2)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
+   %r2 = extractelement <4 x float> %reg0, i32 2
    %r3 = fmul float %r0, %r1
-	%r4 = fadd float %r3, %r2
-   call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+   %r4 = fadd float %r3, %r2
+   %vec = insertelement <4 x float> undef, float %r4, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @fabs(float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
index 8b704e56484b..d7127f485c74 100644
--- a/test/CodeGen/R600/fmax.ll
+++ b/test/CodeGen/R600/fmax.ll
@@ -2,15 +2,16 @@
 
 ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fcmp uge float %r0, %r1
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
+   %r2 = fcmp oge float %r0, %r1
    %r3 = select i1 %r2, float %r0, float %r1
-   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   %vec = insertelement <4 x float> undef, float %r3, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll
index 5e34b7c8902e..defa8c09638a 100644
--- a/test/CodeGen/R600/fmin.ll
+++ b/test/CodeGen/R600/fmin.ll
@@ -2,15 +2,16 @@
 
 ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
    %r2 = fcmp uge float %r0, %r1
    %r3 = select i1 %r2, float %r1, float %r0
-   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   %vec = insertelement <4 x float> undef, float %r3, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index c29294632dc0..2a7825f9ecf7 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -1,26 +1,44 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fmul_f32
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fmul_f32() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fmul float %r0, %r1
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-   ret void
+; R600-CHECK: @fmul_f32
+; R600-CHECK: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI-CHECK: @fmul_f32
+; SI-CHECK: V_MUL_F32
+define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fmul float %a, %b
+  store float %0, float addrspace(1)* %out
+  ret void
 }
 
 declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
-; CHECK: @fmul_v4f32
-; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @fmul_v2f32
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; SI-CHECK: @fmul_v2f32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
+define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+entry:
+  %0 = fmul <2 x float> %a, %b
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
 
+; R600-CHECK: @fmul_v4f32
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fmul_v4f32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
+; SI-CHECK: V_MUL_F32
 define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll
deleted file mode 100644
index 74a58f74026a..000000000000
--- a/test/CodeGen/R600/fmul.v4f32.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
-  %result = fmul <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
new file mode 100644
index 000000000000..7c7bf041496b
--- /dev/null
+++ b/test/CodeGen/R600/fmul64.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fmul_f64
+; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fmul double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll
new file mode 100644
index 000000000000..48944f629482
--- /dev/null
+++ b/test/CodeGen/R600/fmuladd.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fmuladd_f32
+; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                         float addrspace(1)* %in2, float addrspace(1)* %in3) {
+   %r0 = load float addrspace(1)* %in1
+   %r1 = load float addrspace(1)* %in2
+   %r2 = load float addrspace(1)* %in3
+   %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
+   store float %r3, float addrspace(1)* %out
+   ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+
+; CHECK: @fmuladd_f64
+; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                         double addrspace(1)* %in2, double addrspace(1)* %in3) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = load double addrspace(1)* %in3
+   %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
new file mode 100644
index 000000000000..9446aa8ea9c3
--- /dev/null
+++ b/test/CodeGen/R600/fneg.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @fneg
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+define void @fneg(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fsub float -0.000000e+00, %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fneg_v2
+; R600-CHECK: -PV
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_v2
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
+entry:
+  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fneg_v4
+; R600-CHECK: -PV
+; R600-CHECK: -T
+; R600-CHECK: -PV
+; R600-CHECK: -PV
+; SI-CHECK-LABEL: @fneg_v4
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
+define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
+entry:
+  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; DAGCombiner will transform:
+; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
+; unless the target returns true for isNegFree()
+
+; R600-CHECK-LABEL: @fneg_free
+; R600-CHECK-NOT: XOR
+; R600-CHECK: -KC0[2].Z
+; SI-CHECK-LABEL: @fneg_free
+; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
+; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0
+define void @fneg_free(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = bitcast i32 %in to float
+  %1 = fsub float 0.0, %0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fp64_to_sint.ll b/test/CodeGen/R600/fp64_to_sint.ll
new file mode 100644
index 000000000000..185e21c9caa3
--- /dev/null
+++ b/test/CodeGen/R600/fp64_to_sint.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @fp64_to_sint
+; CHECK: V_CVT_I32_F64_e32
+define void @fp64_to_sint(i32 addrspace(1)* %out, double %in) {
+  %result = fptosi double %in to i32
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index f5716e1d47e6..8302b4f8233e 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -1,11 +1,28 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fp_to_sint_v4i32
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @fp_to_sint_v2i32
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; SI-CHECK: @fp_to_sint_v2i32
+; SI-CHECK: V_CVT_I32_F32_e32
+; SI-CHECK: V_CVT_I32_F32_e32
+define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+  %result = fptosi <2 x float> %in to <2 x i32>
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
 
+; R600-CHECK: @fp_to_sint_v4i32
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; SI-CHECK: @fp_to_sint_v4i32
+; SI-CHECK: V_CVT_I32_F32_e32
+; SI-CHECK: V_CVT_I32_F32_e32
+; SI-CHECK: V_CVT_I32_F32_e32
+; SI-CHECK: V_CVT_I32_F32_e32
 define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %value = load <4 x float> addrspace(1) * %in
   %result = fptosi <4 x float> %value to <4 x i32>
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
index 1c3c0c62cf50..77db43b39c5f 100644
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -1,10 +1,29 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fp_to_uint_v4i32
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @fp_to_uint_v2i32
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fp_to_uint_v2i32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
+
+define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+  %result = fptoui <2 x float> %in to <2 x i32>
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @fp_to_uint_v4i32
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; SI-CHECK: @fp_to_uint_v4i32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
+; SI-CHECK: V_CVT_U32_F32_e32
 
 define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %value = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll
new file mode 100644
index 000000000000..143ee79fa151
--- /dev/null
+++ b/test/CodeGen/R600/fpext.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @fpext
+; CHECK: V_CVT_F64_F32_e32
+define void @fpext(double addrspace(1)* %out, float %in) {
+  %result = fpext float %in to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll
new file mode 100644
index 000000000000..20a8c00ba498
--- /dev/null
+++ b/test/CodeGen/R600/fptrunc.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @fptrunc
+; CHECK: V_CVT_F32_F64_e32
+define void @fptrunc(float addrspace(1)* %out, double %in) {
+  %result = fptrunc double %in to float
+  store float %result, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
new file mode 100644
index 000000000000..ae50b17d38fc
--- /dev/null
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fsqrt_f32
+; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
+
+define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+   %r0 = load float addrspace(1)* %in
+   %r1 = call float @llvm.sqrt.f32(float %r0)
+   store float %r1, float addrspace(1)* %out
+   ret void
+}
+
+; CHECK: @fsqrt_f64
+; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+
+define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+   %r0 = load double addrspace(1)* %in
+   %r1 = call double @llvm.sqrt.f64(double %r0)
+   store double %r1, double addrspace(1)* %out
+   ret void
+}
+
+declare float @llvm.sqrt.f32(float %Val)
+declare double @llvm.sqrt.f64(double %Val)
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index f784cde46cd2..4f74efba4d8b 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,26 +1,44 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @fsub_f32
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-
-define void @fsub_f32() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fsub float %r0, %r1
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
-   ret void
+; R600-CHECK: @fsub_f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
+; SI-CHECK: @fsub_f32
+; SI-CHECK: V_SUB_F32
+define void @fsub_f32(float addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fsub float %a, %b
+  store float %0, float addrspace(1)* %out
+  ret void
 }
 
 declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
-; CHECK: @fsub_v4f32
-; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @fsub_v2f32
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
+; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
+; SI-CHECK: @fsub_v2f32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
+define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+entry:
+  %0 = fsub <2 x float> %a, %b
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
 
+; R600-CHECK: @fsub_v4f32
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; SI-CHECK: @fsub_v4f32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUB_F32
 define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
new file mode 100644
index 000000000000..1445a20839ad
--- /dev/null
+++ b/test/CodeGen/R600/fsub64.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; CHECK: @fsub_f64
+; CHECK: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}, 0, 0, 0, 0, 2
+
+define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = fsub double %r0, %r1
+   store double %r2, double addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
new file mode 100644
index 000000000000..4ea21dde8a05
--- /dev/null
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+
+define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
+; CHECK-LABEL @use_gep_address_space:
+; CHECK: S_ADD_I32
+  %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
+  store i32 99, i32 addrspace(3)* %p
+  ret void
+}
+
+define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
+; CHECK-LABEL: @gep_as_vector_v4:
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+  %p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+  %p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
+  %p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
+  %p2 = extractelement <4 x i32 addrspace(3)*> %p, i32 2
+  %p3 = extractelement <4 x i32 addrspace(3)*> %p, i32 3
+  store i32 99, i32 addrspace(3)* %p0
+  store i32 99, i32 addrspace(3)* %p1
+  store i32 99, i32 addrspace(3)* %p2
+  store i32 99, i32 addrspace(3)* %p3
+  ret void
+}
+
+define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
+; CHECK-LABEL: @gep_as_vector_v2:
+; CHECK: S_ADD_I32
+; CHECK: S_ADD_I32
+  %p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
+  %p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
+  %p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
+  store i32 99, i32 addrspace(3)* %p0
+  store i32 99, i32 addrspace(3)* %p1
+  ret void
+}
+
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index e3005fe82da1..71705a64f50e 100644
--- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -3,7 +3,7 @@
 ;Test that a select with reversed True/False values is correctly lowered
 ;to a SETNE_INT.  There should only be one SETNE_INT instruction.
 
-;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK-NOT: SETNE_INT
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
index 979efb00e7bd..b047315be71a 100644
--- a/test/CodeGen/R600/imm.ll
+++ b/test/CodeGen/R600/imm.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ; Use a 64-bit value with lo bits that can be represented as an inline constant
 ; CHECK: @i64_imm_inline_lo
-; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 [[LO_VGPR:VGPR[0-9]+]], [[LO]]
-; CHECK: BUFFER_STORE_DWORDX2 [[LO_VGPR]]_
+; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
+; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
 define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
 entry:
   store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
@@ -13,9 +13,9 @@ entry:
 
 ; Use a 64-bit value with hi bits that can be represented as an inline constant
 ; CHECK: @i64_imm_inline_hi
-; CHECK: S_MOV_B32 [[HI:SGPR[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 [[HI_VGPR:VGPR[0-9]+]], [[HI]]
-; CHECK: BUFFER_STORE_DWORDX2 {{VGPR[0-9]+}}_[[HI_VGPR]]
+; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
+; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
 define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
 entry:
   store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
new file mode 100644
index 000000000000..169d69b7c25c
--- /dev/null
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; Tests for indirect addressing on SI, which is implemented using dynamic
+; indexing of vectors.
+
+; CHECK: extract_w_offset
+; CHECK: S_MOV_B32 m0
+; CHECK-NEXT: V_MOVRELS_B32_e32
+define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = add i32 %in, 1
+  %1 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: extract_wo_offset
+; CHECK: S_MOV_B32 m0
+; CHECK-NEXT: V_MOVRELS_B32_e32
+define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: insert_w_offset
+; CHECK: S_MOV_B32 m0
+; CHECK-NEXT: V_MOVRELD_B32_e32
+define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = add i32 %in, 1
+  %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
+  %2 = extractelement <4 x float> %1, i32 2
+  store float %2, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: insert_wo_offset
+; CHECK: S_MOV_B32 m0
+; CHECK-NEXT: V_MOVRELD_B32_e32
+define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
+  %1 = extractelement <4 x float> %0, i32 2
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
new file mode 100644
index 000000000000..05aeccebac00
--- /dev/null
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -0,0 +1,16 @@
+; XFAIL: *
+; RUN: llc < %s -march=r600 -mcpu=redwood -o %t
+
+define void @var_insert(<4 x i32> addrspace(1)* %out, <4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+entry:
+  %tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx		; <<4 x i32>> [#uses=1]
+  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+define void @var_extract(i32 addrspace(1)* %out, <4 x i32> %x, i32 %idx) nounwind  {
+entry:
+  %tmp3 = extractelement <4 x i32> %x, i32 %idx		; <<i32>> [#uses=1]
+  store i32 %tmp3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 3d70e4bd54aa..0baa3cd3e1a3 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ; CHECK: @main1
-; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
+; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
 define void @main1() {
 main_body:
   %0 = load <4 x float> addrspace(8)* null
@@ -10,7 +10,7 @@ main_body:
   %3 = extractelement <4 x float> %2, i32 0
   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %5 = extractelement <4 x float> %4, i32 0
-  %6 = fcmp ult float %1, 0.000000e+00
+  %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
   %8 = load <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
@@ -18,7 +18,7 @@ main_body:
   %11 = extractelement <4 x float> %10, i32 1
   %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
-  %14 = fcmp ult float %9, 0.000000e+00
+  %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
   %16 = load <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
@@ -26,7 +26,7 @@ main_body:
   %19 = extractelement <4 x float> %18, i32 2
   %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %21 = extractelement <4 x float> %20, i32 2
-  %22 = fcmp ult float %17, 0.000000e+00
+  %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
   %24 = load <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
@@ -34,7 +34,7 @@ main_body:
   %27 = extractelement <4 x float> %26, i32 3
   %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 3
-  %30 = fcmp ult float %25, 0.000000e+00
+  %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
   %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
   %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
@@ -58,7 +58,7 @@ main_body:
   %3 = extractelement <4 x float> %2, i32 0
   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 1
-  %6 = fcmp ult float %1, 0.000000e+00
+  %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
   %8 = load <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
@@ -66,7 +66,7 @@ main_body:
   %11 = extractelement <4 x float> %10, i32 0
   %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
-  %14 = fcmp ult float %9, 0.000000e+00
+  %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
   %16 = load <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
@@ -74,7 +74,7 @@ main_body:
   %19 = extractelement <4 x float> %18, i32 3
   %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %21 = extractelement <4 x float> %20, i32 2
-  %22 = fcmp ult float %17, 0.000000e+00
+  %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
   %24 = load <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
@@ -82,7 +82,7 @@ main_body:
   %27 = extractelement <4 x float> %26, i32 3
   %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 2
-  %30 = fcmp ult float %25, 0.000000e+00
+  %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
   %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
   %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
new file mode 100644
index 000000000000..247e3163823f
--- /dev/null
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -0,0 +1,455 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; EG-CHECK-LABEL: @i8_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+
+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i8_zext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_zext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i8_sext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i8_sext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
+entry:
+  %0 = sext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+
+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_zext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_zext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_sext_arg
+; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i16_sext_arg
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+
+define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
+entry:
+  %0 = sext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @i32_arg
+; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @i32_arg
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
+entry:
+  store i32 %in, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @f32_arg
+; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; SI-CHECK-LABEL: @f32_arg
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
+entry:
+  store float %in, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v2i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
+entry:
+  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v2i16_arg
+; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
+entry:
+  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-CHECK-LABEL: @v2i32_arg
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
+entry:
+  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v2f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
+; SI-CHECK-LABEL: @v2f32_arg
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
+entry:
+  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v3i8_arg
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
+; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
+; SI-CHECK-LABEL: @v3i8_arg
+define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
+entry:
+  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v3i16_arg
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
+; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
+; SI-CHECK-LABEL: @v3i16_arg
+define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
+entry:
+  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
+  ret void
+}
+; EG-CHECK-LABEL: @v3i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-CHECK-LABEL: @v3i32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
+entry:
+  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v3f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; SI-CHECK-LABEL: @v3f32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
+entry:
+  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v4i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
+entry:
+  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v4i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
+entry:
+  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; SI-CHECK-LABEL: @v4i32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v4f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
+; SI-CHECK-LABEL: @v4f32_arg
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
+entry:
+  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v8i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
+entry:
+  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v8i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
+entry:
+  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; SI-CHECK-LABEL: @v8i32_arg
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
+entry:
+  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v8f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+; SI-CHECK-LABEL: @v8f32_arg
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
+entry:
+  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16i8_arg
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; EG-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @v16i8_arg
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
+entry:
+  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16i16_arg
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; EG-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @v16i16_arg
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
+entry:
+  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16i32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; SI-CHECK-LABEL: @v16i32_arg
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
+entry:
+  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-CHECK-LABEL: @v16f32_arg
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
+; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
+; SI-CHECK-LABEL: @v16f32_arg
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
+entry:
+  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll
new file mode 100644
index 000000000000..63a4332d3c41
--- /dev/null
+++ b/test/CodeGen/R600/lds-output-queue.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s
+;
+; This test checks that the lds input queue will is empty at the end of
+; the ALU clause.
+
+; CHECK-LABEL: @lds_input_queue
+; CHECK: LDS_READ_RET * OQAP
+; CHECK-NOT: ALU clause
+; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
+
+@local_mem = internal addrspace(3) unnamed_addr global [2 x i32] [i32 1, i32 2], align 4
+
+define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
+entry:
+  %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+  %1 = load i32 addrspace(3)* %0
+  call void @llvm.AMDGPU.barrier.local()
+
+  ; This will start a new clause for the vertex fetch
+  %2 = load i32 addrspace(1)* %in
+  %3 = add i32 %1, %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+declare void @llvm.AMDGPU.barrier.local()
+
+; The machine scheduler does not do proper alias analysis and assumes that
+; loads from global values (Note that a global value is different that a
+; value from global memory.  A global value is a value that is declared
+; outside of a function, it can reside in any address space) alias with
+; all other loads.
+;
+; This is a problem for scheduling the reads from the local data share (lds).
+; These reads are implemented using two instructions.  The first copies the
+; data from lds into the lds output queue, and the second moves the data from
+; the input queue into main memory.  These two instructions don't have to be
+; scheduled one after the other, but they do need to be scheduled in the same
+; clause.  The aliasing problem mentioned above causes problems when there is a
+; load from global memory which immediately follows a load from a global value that
+; has been declared in the local memory space:
+;
+;  %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+;  %1 = load i32 addrspace(3)* %0
+;  %2 = load i32 addrspace(1)* %in
+;
+; The instruction selection phase will generate ISA that looks like this:
+; %OQAP = LDS_READ_RET
+; %vreg0 = MOV %OQAP
+; %vreg1 = VTX_READ_32
+; %vreg2 = ADD_INT %vreg1, %vreg0
+;
+; The bottom scheduler will schedule the two ALU instructions first:
+;
+; UNSCHEDULED:
+; %OQAP = LDS_READ_RET
+; %vreg1 = VTX_READ_32
+;
+; SCHEDULED:
+;
+; vreg0 = MOV %OQAP
+; vreg2 = ADD_INT %vreg1, %vreg2
+;
+; The lack of proper aliasing results in the local memory read (LDS_READ_RET)
+; to consider the global memory read (VTX_READ_32) has a chain dependency, so
+; the global memory read will always be scheduled first.  This will give us a
+; final program which looks like this:
+;
+; Alu clause:
+; %OQAP = LDS_READ_RET
+; VTX clause:
+; %vreg1 = VTX_READ_32
+; Alu clause:
+; vreg0 = MOV %OQAP
+; vreg2 = ADD_INT %vreg1, %vreg2
+;
+; This is an illegal program because the OQAP def and use know occur in
+; different ALU clauses.
+;
+; This test checks this scenario and makes sure it doesn't result in an
+; illegal program.  For now, we have fixed this issue by merging the
+; LDS_READ_RET and MOV together during instruction selection and then
+; expanding them after scheduling.  Once the scheduler has better alias
+; analysis, we should be able to keep these instructions sparate before
+; scheduling.
+;
+; CHECK-LABEL: @local_global_alias
+; CHECK: LDS_READ_RET
+; CHECK-NOT: ALU clause
+; CHECK MOV * T{{[0-9]\.[XYZW]}}, OQAP
+define void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
+  %1 = load i32 addrspace(3)* %0
+  %2 = load i32 addrspace(1)* %in
+  %3 = add i32 %2, %1
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll
new file mode 100644
index 000000000000..2185180fd83f
--- /dev/null
+++ b/test/CodeGen/R600/lds-size.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test makes sure we do not double count global values when they are
+; used in different basic blocks.
+
+; CHECK-LABEL: @test
+; CHECK: .long   166120
+; CHECK-NEXT: .long   1
+@lds = internal addrspace(3) unnamed_addr global i32 zeroinitializer, align 4
+
+define void @test(i32 addrspace(1)* %out, i32 %cond) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %if, label %else
+
+if:
+  store i32 1, i32 addrspace(3)* @lds
+  br label %endif
+
+else:
+  store i32 2, i32 addrspace(3)* @lds
+  br label %endif
+
+endif:
+  ret void
+}
diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg
index 36ee493e5945..2d8930ad0e88 100644
--- a/test/CodeGen/R600/lit.local.cfg
+++ b/test/CodeGen/R600/lit.local.cfg
@@ -1,13 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'R600' in targets:
     config.unsupported = True
-
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index 21e5d4c4de9a..47191e0a27fb 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -2,12 +2,13 @@
 
 ; Test using an integer literal constant.
 ; Generated ASM should be:
-; ADD_INT REG literal.x, 5
+; ADD_INT KC0[2].Z literal.x, 5
 ; or
-; ADD_INT literal.x REG, 5
+; ADD_INT literal.x KC0[2].Z, 5
 
 ; CHECK: @i32_literal
-; CHECK: ADD_INT * {{[A-Z0-9,. ]*}}literal.x
+; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5
 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -18,12 +19,13 @@ entry:
 
 ; Test using a float literal constant.
 ; Generated ASM should be:
-; ADD REG literal.x, 5.0
+; ADD KC0[2].Z literal.x, 5.0
 ; or
-; ADD literal.x REG, 5.0
+; ADD literal.x KC0[2].Z, 5.0
 
 ; CHECK: @float_literal
-; CHECK: ADD * {{[A-Z0-9,. ]*}}literal.x
+; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.0
 define void @float_literal(float addrspace(1)* %out, float %in) {
 entry:
@@ -32,168 +34,31 @@ entry:
   ret void
 }
 
-; CHECK: @main
-; CHECK: -2147483648
-; CHECK-NEXT-NOT: -2147483648
+; Make sure inline literals are folded into REG_SEQUENCE instructions.
+; CHECK: @inline_literal_reg_sequence
+; CHECK: MOV {{\** *}}T[[GPR:[0-9]]].X, 0.0
+; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Y, 0.0
+; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Z, 0.0
+; CHECK-NEXT: MOV {{\** *}}T[[GPR]].W, 0.0
 
-define void @main() #0 {
-main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
-  %4 = call float @llvm.R600.load.input(i32 8)
-  %5 = call float @llvm.R600.load.input(i32 9)
-  %6 = call float @llvm.R600.load.input(i32 10)
-  %7 = call float @llvm.R600.load.input(i32 11)
-  %8 = call float @llvm.R600.load.input(i32 12)
-  %9 = call float @llvm.R600.load.input(i32 13)
-  %10 = call float @llvm.R600.load.input(i32 14)
-  %11 = call float @llvm.R600.load.input(i32 15)
-  %12 = load <4 x float> addrspace(8)* null
-  %13 = extractelement <4 x float> %12, i32 0
-  %14 = fsub float -0.000000e+00, %13
-  %15 = fadd float %0, %14
-  %16 = load <4 x float> addrspace(8)* null
-  %17 = extractelement <4 x float> %16, i32 1
-  %18 = fsub float -0.000000e+00, %17
-  %19 = fadd float %1, %18
-  %20 = load <4 x float> addrspace(8)* null
-  %21 = extractelement <4 x float> %20, i32 2
-  %22 = fsub float -0.000000e+00, %21
-  %23 = fadd float %2, %22
-  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %25 = extractelement <4 x float> %24, i32 0
-  %26 = fmul float %25, %0
-  %27 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %28 = extractelement <4 x float> %27, i32 1
-  %29 = fmul float %28, %0
-  %30 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %31 = extractelement <4 x float> %30, i32 2
-  %32 = fmul float %31, %0
-  %33 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %34 = extractelement <4 x float> %33, i32 3
-  %35 = fmul float %34, %0
-  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %37 = extractelement <4 x float> %36, i32 0
-  %38 = fmul float %37, %1
-  %39 = fadd float %38, %26
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %41 = extractelement <4 x float> %40, i32 1
-  %42 = fmul float %41, %1
-  %43 = fadd float %42, %29
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %45 = extractelement <4 x float> %44, i32 2
-  %46 = fmul float %45, %1
-  %47 = fadd float %46, %32
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %49 = extractelement <4 x float> %48, i32 3
-  %50 = fmul float %49, %1
-  %51 = fadd float %50, %35
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %53 = extractelement <4 x float> %52, i32 0
-  %54 = fmul float %53, %2
-  %55 = fadd float %54, %39
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %57 = extractelement <4 x float> %56, i32 1
-  %58 = fmul float %57, %2
-  %59 = fadd float %58, %43
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %61 = extractelement <4 x float> %60, i32 2
-  %62 = fmul float %61, %2
-  %63 = fadd float %62, %47
-  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %65 = extractelement <4 x float> %64, i32 3
-  %66 = fmul float %65, %2
-  %67 = fadd float %66, %51
-  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %69 = extractelement <4 x float> %68, i32 0
-  %70 = fmul float %69, %3
-  %71 = fadd float %70, %55
-  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %73 = extractelement <4 x float> %72, i32 1
-  %74 = fmul float %73, %3
-  %75 = fadd float %74, %59
-  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %77 = extractelement <4 x float> %76, i32 2
-  %78 = fmul float %77, %3
-  %79 = fadd float %78, %63
-  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %81 = extractelement <4 x float> %80, i32 3
-  %82 = fmul float %81, %3
-  %83 = fadd float %82, %67
-  %84 = insertelement <4 x float> undef, float %15, i32 0
-  %85 = insertelement <4 x float> %84, float %19, i32 1
-  %86 = insertelement <4 x float> %85, float %23, i32 2
-  %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 3
-  %88 = insertelement <4 x float> undef, float %15, i32 0
-  %89 = insertelement <4 x float> %88, float %19, i32 1
-  %90 = insertelement <4 x float> %89, float %23, i32 2
-  %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3
-  %92 = call float @llvm.AMDGPU.dp4(<4 x float> %87, <4 x float> %91)
-  %93 = call float @fabs(float %92)
-  %94 = call float @llvm.AMDGPU.rsq(float %93)
-  %95 = fmul float %15, %94
-  %96 = fmul float %19, %94
-  %97 = fmul float %23, %94
-  %98 = insertelement <4 x float> undef, float %4, i32 0
-  %99 = insertelement <4 x float> %98, float %5, i32 1
-  %100 = insertelement <4 x float> %99, float %6, i32 2
-  %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3
-  %102 = insertelement <4 x float> undef, float %4, i32 0
-  %103 = insertelement <4 x float> %102, float %5, i32 1
-  %104 = insertelement <4 x float> %103, float %6, i32 2
-  %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
-  %106 = call float @llvm.AMDGPU.dp4(<4 x float> %101, <4 x float> %105)
-  %107 = call float @fabs(float %106)
-  %108 = call float @llvm.AMDGPU.rsq(float %107)
-  %109 = fmul float %4, %108
-  %110 = fmul float %5, %108
-  %111 = fmul float %6, %108
-  %112 = insertelement <4 x float> undef, float %95, i32 0
-  %113 = insertelement <4 x float> %112, float %96, i32 1
-  %114 = insertelement <4 x float> %113, float %97, i32 2
-  %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 3
-  %116 = insertelement <4 x float> undef, float %109, i32 0
-  %117 = insertelement <4 x float> %116, float %110, i32 1
-  %118 = insertelement <4 x float> %117, float %111, i32 2
-  %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3
-  %120 = call float @llvm.AMDGPU.dp4(<4 x float> %115, <4 x float> %119)
-  %121 = fsub float -0.000000e+00, %120
-  %122 = fcmp uge float 0.000000e+00, %121
-  %123 = select i1 %122, float 0.000000e+00, float %121
-  %124 = insertelement <4 x float> undef, float %8, i32 0
-  %125 = insertelement <4 x float> %124, float %9, i32 1
-  %126 = insertelement <4 x float> %125, float 5.000000e-01, i32 2
-  %127 = insertelement <4 x float> %126, float 1.000000e+00, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %127, i32 60, i32 1)
-  %128 = insertelement <4 x float> undef, float %71, i32 0
-  %129 = insertelement <4 x float> %128, float %75, i32 1
-  %130 = insertelement <4 x float> %129, float %79, i32 2
-  %131 = insertelement <4 x float> %130, float %83, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %131, i32 0, i32 2)
-  %132 = insertelement <4 x float> undef, float %123, i32 0
-  %133 = insertelement <4 x float> %132, float %96, i32 1
-  %134 = insertelement <4 x float> %133, float %97, i32 2
-  %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3
-  call void @llvm.R600.store.swizzle(<4 x float> %135, i32 1, i32 2)
+define void @inline_literal_reg_sequence(<4 x i32> addrspace(1)* %out) {
+entry:
+  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
   ret void
 }
 
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
+; CHECK: @inline_literal_dot4
+; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
+; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
+; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
+; CHECK-NEXT: DOT4 * T[[GPR]].W (MASKED), 1.0
+define void @inline_literal_dot4(float addrspace(1)* %out) {
+entry:
+  %0 = call float @llvm.AMDGPU.dp4(<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
 
-; Function Attrs: readnone
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
-; Function Attrs: readonly
-declare float @fabs(float) #2
-
-; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
-
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { "ShaderType"="1" }
 attributes #1 = { readnone }
-attributes #2 = { readonly }
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
new file mode 100644
index 000000000000..8d3c9ca22300
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: GROUP_BARRIER
+
+define void @test(i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.tidig.x()
+  %1 = getelementptr i32 addrspace(1)* %out, i32 %0
+  store i32 %0, i32 addrspace(1)* %1
+  call void @llvm.AMDGPU.barrier.local()
+  %2 = call i32 @llvm.r600.read.local.size.x()
+  %3 = sub i32 %2, 1
+  %4 = sub i32 %3, %0
+  %5 = getelementptr i32 addrspace(1)* %out, i32 %4
+  %6 = load i32 addrspace(1)* %5
+  store i32 %6, i32 addrspace(1)* %1
+  ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare void @llvm.AMDGPU.barrier.local()
+declare i32 @llvm.r600.read.local.size.x() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
new file mode 100644
index 000000000000..110bbfde68b9
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
@@ -0,0 +1,59 @@
+
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @cube
+; CHECK: CUBE T{{[0-9]}}.X
+; CHECK: CUBE T{{[0-9]}}.Y
+; CHECK: CUBE T{{[0-9]}}.Z
+; CHECK: CUBE * T{{[0-9]}}.W
+define void @cube() #0 {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %1 = extractelement <4 x float> %0, i32 3
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = fdiv float %3, %1
+  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %6 = extractelement <4 x float> %5, i32 1
+  %7 = fdiv float %6, %1
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %9 = extractelement <4 x float> %8, i32 2
+  %10 = fdiv float %9, %1
+  %11 = insertelement <4 x float> undef, float %4, i32 0
+  %12 = insertelement <4 x float> %11, float %7, i32 1
+  %13 = insertelement <4 x float> %12, float %10, i32 2
+  %14 = insertelement <4 x float> %13, float 1.000000e+00, i32 3
+  %15 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %14)
+  %16 = extractelement <4 x float> %15, i32 0
+  %17 = extractelement <4 x float> %15, i32 1
+  %18 = extractelement <4 x float> %15, i32 2
+  %19 = extractelement <4 x float> %15, i32 3
+  %20 = call float @fabs(float %18)
+  %21 = fdiv float 1.000000e+00, %20
+  %22 = fmul float %16, %21
+  %23 = fadd float %22, 1.500000e+00
+  %24 = fmul float %17, %21
+  %25 = fadd float %24, 1.500000e+00
+  %26 = insertelement <4 x float> undef, float %25, i32 0
+  %27 = insertelement <4 x float> %26, float %23, i32 1
+  %28 = insertelement <4 x float> %27, float %19, i32 2
+  %29 = insertelement <4 x float> %28, float %25, i32 3
+  %30 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %29, i32 16, i32 0, i32 4)
+  call void @llvm.R600.store.swizzle(<4 x float> %30, i32 0, i32 0)
+  ret void
+}
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1
+
+; Function Attrs: readnone
+declare float @fabs(float) #1
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
new file mode 100644
index 000000000000..1336f4eeeedd
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK: V_MAX_I32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.imax(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
new file mode 100644
index 000000000000..3435ea471e47
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK: V_MIN_I32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.imin(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
index cc0732b3fffd..83b56a5029d3 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
@@ -2,16 +2,16 @@
 
 ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
    %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   %vec = insertelement <4 x float> undef, float %r2, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @llvm.AMDGPU.mul(float ,float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
index 74331fa26934..aac014bde456 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -1,21 +1,21 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5
-;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6
-;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7
-;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10
-;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11
-;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12
-;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15
-;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN
+;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZZ}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZZ}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZZ}} RID:0 SID:0 CT:UUNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYYW}} RID:0 SID:0 CT:NNUN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
+;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYYZ}} RID:0 SID:0 CT:NNUN
+;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
+;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
+;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
 
 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
    %addr = load <4 x float> addrspace(1)* %in
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
index ff22a6919677..e6bb2c4e9455 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -1,16 +1,16 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @amdgpu_trunc
+; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI-CHECK: @amdgpu_trunc
+; SI-CHECK: V_TRUNC_F32
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.AMDGPU.trunc( float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-   ret void
+define void @amdgpu_trunc(float addrspace(1)* %out, float %x) {
+entry:
+  %0 = call float @llvm.AMDGPU.trunc(float %x)
+  store float %0, float addrspace(1)* %out
+  ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @llvm.AMDGPU.trunc(float ) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
new file mode 100644
index 000000000000..4cfa133208e3
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK: V_MAX_U32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.umax(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
new file mode 100644
index 000000000000..14af0519bc90
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK: V_MIN_U32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.umin(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
index e45722c3fa67..0438eccc8862 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: S_MOV_B32
 ;CHECK-NEXT: V_INTERP_MOV_F32
diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll
new file mode 100644
index 000000000000..59e00f01c96b
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -0,0 +1,131 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-DAG: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 2, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 1, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 4, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, -1
+
+define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
+   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
+   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
+   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
+   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
+   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
+   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
+   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
+   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
+   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
+   %res1 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v1,
+      <32 x i8> undef, i32 1)
+   %res2 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v2,
+      <32 x i8> undef, i32 2)
+   %res3 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v3,
+      <32 x i8> undef, i32 3)
+   %res4 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v4,
+      <32 x i8> undef, i32 4)
+   %res5 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v5,
+      <32 x i8> undef, i32 5)
+   %res6 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v6,
+      <32 x i8> undef, i32 6)
+   %res10 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v10,
+      <32 x i8> undef, i32 10)
+   %res11 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v11,
+      <32 x i8> undef, i32 11)
+   %res15 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v15,
+      <32 x i8> undef, i32 15)
+   %res16 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v16,
+      <32 x i8> undef, i32 16)
+   %e1 = extractelement <4 x i32> %res1, i32 0
+   %e2 = extractelement <4 x i32> %res2, i32 1
+   %e3 = extractelement <4 x i32> %res3, i32 2
+   %e4 = extractelement <4 x i32> %res4, i32 3
+   %t0 = extractelement <4 x i32> %res5, i32 0
+   %t1 = extractelement <4 x i32> %res5, i32 1
+   %e5 = add i32 %t0, %t1
+   %t2 = extractelement <4 x i32> %res6, i32 0
+   %t3 = extractelement <4 x i32> %res6, i32 2
+   %e6 = add i32 %t2, %t3
+   %t10 = extractelement <4 x i32> %res10, i32 2
+   %t11 = extractelement <4 x i32> %res10, i32 3
+   %e10 = add i32 %t10, %t11
+   %t12 = extractelement <4 x i32> %res11, i32 0
+   %t13 = extractelement <4 x i32> %res11, i32 1
+   %t14 = extractelement <4 x i32> %res11, i32 2
+   %t15 = add i32 %t12, %t13
+   %e11 = add i32 %t14, %t15
+   %t28 = extractelement <4 x i32> %res15, i32 0
+   %t29 = extractelement <4 x i32> %res15, i32 1
+   %t30 = extractelement <4 x i32> %res15, i32 2
+   %t31 = extractelement <4 x i32> %res15, i32 3
+   %t32 = add i32 %t28, %t29
+   %t33 = add i32 %t30, %t31
+   %e15 = add i32 %t32, %t33
+   %e16 = extractelement <4 x i32> %res16, i32 3
+   %s1 = add i32 %e1, %e2
+   %s2 = add i32 %s1, %e3
+   %s3 = add i32 %s2, %e4
+   %s4 = add i32 %s3, %e5
+   %s5 = add i32 %s4, %e6
+   %s9 = add i32 %s5, %e10
+   %s10 = add i32 %s9, %e11
+   %s14 = add i32 %s10, %e15
+   %s15 = add i32 %s14, %e16
+   %s16 = bitcast i32 %s15 to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
+   ret void
+}
+
+; Test that ccordinates are stored in vgprs and not sgprs
+; CHECK: vgpr_coords
+; CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
+define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
+  %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !2
+  %22 = getelementptr float addrspace(2)* %21, i32 0
+  %23 = load float addrspace(2)* %22, !tbaa !2, !invariant.load !1
+  %24 = getelementptr float addrspace(2)* %21, i32 1
+  %25 = load float addrspace(2)* %24, !tbaa !2, !invariant.load !1
+  %26 = getelementptr float addrspace(2)* %21, i32 4
+  %27 = load float addrspace(2)* %26, !tbaa !2, !invariant.load !1
+  %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
+  %29 = load <32 x i8> addrspace(2)* %28, !tbaa !2
+  %30 = bitcast float %27 to i32
+  %31 = bitcast float %23 to i32
+  %32 = bitcast float %25 to i32
+  %33 = insertelement <4 x i32> undef, i32 %31, i32 0
+  %34 = insertelement <4 x i32> %33, i32 %32, i32 1
+  %35 = insertelement <4 x i32> %34, i32 %30, i32 2
+  %36 = insertelement <4 x i32> %35, i32 undef, i32 3
+  %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2)
+  %38 = extractelement <4 x i32> %37, i32 0
+  %39 = extractelement <4 x i32> %37, i32 1
+  %40 = extractelement <4 x i32> %37, i32 2
+  %41 = extractelement <4 x i32> %37, i32 3
+  %42 = bitcast i32 %38 to float
+  %43 = bitcast i32 %39 to float
+  %44 = bitcast i32 %40 to float
+  %45 = bitcast i32 %41 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45)
+  ret void
+}
+
+declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <32 x i8>, i32) readnone
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null}
+!1 = metadata !{}
+!2 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.resinfo.ll b/test/CodeGen/R600/llvm.SI.resinfo.ll
new file mode 100644
index 000000000000..af3afc1e1d92
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.resinfo.ll
@@ -0,0 +1,110 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 2, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 1, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 4, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
+;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, -1
+
+define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
+		  i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
+   %res1 = call <4 x i32> @llvm.SI.resinfo(i32 %a1, <32 x i8> undef, i32 1)
+   %res2 = call <4 x i32> @llvm.SI.resinfo(i32 %a2, <32 x i8> undef, i32 2)
+   %res3 = call <4 x i32> @llvm.SI.resinfo(i32 %a3, <32 x i8> undef, i32 3)
+   %res4 = call <4 x i32> @llvm.SI.resinfo(i32 %a4, <32 x i8> undef, i32 4)
+   %res5 = call <4 x i32> @llvm.SI.resinfo(i32 %a5, <32 x i8> undef, i32 5)
+   %res6 = call <4 x i32> @llvm.SI.resinfo(i32 %a6, <32 x i8> undef, i32 6)
+   %res7 = call <4 x i32> @llvm.SI.resinfo(i32 %a7, <32 x i8> undef, i32 7)
+   %res8 = call <4 x i32> @llvm.SI.resinfo(i32 %a8, <32 x i8> undef, i32 8)
+   %res9 = call <4 x i32> @llvm.SI.resinfo(i32 %a9, <32 x i8> undef, i32 9)
+   %res10 = call <4 x i32> @llvm.SI.resinfo(i32 %a10, <32 x i8> undef, i32 10)
+   %res11 = call <4 x i32> @llvm.SI.resinfo(i32 %a11, <32 x i8> undef, i32 11)
+   %res12 = call <4 x i32> @llvm.SI.resinfo(i32 %a12, <32 x i8> undef, i32 12)
+   %res13 = call <4 x i32> @llvm.SI.resinfo(i32 %a13, <32 x i8> undef, i32 13)
+   %res14 = call <4 x i32> @llvm.SI.resinfo(i32 %a14, <32 x i8> undef, i32 14)
+   %res15 = call <4 x i32> @llvm.SI.resinfo(i32 %a15, <32 x i8> undef, i32 15)
+   %res16 = call <4 x i32> @llvm.SI.resinfo(i32 %a16, <32 x i8> undef, i32 16)
+   %e1 = extractelement <4 x i32> %res1, i32 0
+   %e2 = extractelement <4 x i32> %res2, i32 1
+   %e3 = extractelement <4 x i32> %res3, i32 2
+   %e4 = extractelement <4 x i32> %res4, i32 3
+   %t0 = extractelement <4 x i32> %res5, i32 0
+   %t1 = extractelement <4 x i32> %res5, i32 1
+   %e5 = add i32 %t0, %t1
+   %t2 = extractelement <4 x i32> %res6, i32 0
+   %t3 = extractelement <4 x i32> %res6, i32 2
+   %e6 = add i32 %t2, %t3
+   %t4 = extractelement <4 x i32> %res7, i32 0
+   %t5 = extractelement <4 x i32> %res7, i32 3
+   %e7 = add i32 %t4, %t5
+   %t6 = extractelement <4 x i32> %res8, i32 1
+   %t7 = extractelement <4 x i32> %res8, i32 2
+   %e8 = add i32 %t6, %t7
+   %t8 = extractelement <4 x i32> %res9, i32 1
+   %t9 = extractelement <4 x i32> %res9, i32 3
+   %e9 = add i32 %t8, %t9
+   %t10 = extractelement <4 x i32> %res10, i32 2
+   %t11 = extractelement <4 x i32> %res10, i32 3
+   %e10 = add i32 %t10, %t11
+   %t12 = extractelement <4 x i32> %res11, i32 0
+   %t13 = extractelement <4 x i32> %res11, i32 1
+   %t14 = extractelement <4 x i32> %res11, i32 2
+   %t15 = add i32 %t12, %t13
+   %e11 = add i32 %t14, %t15
+   %t16 = extractelement <4 x i32> %res12, i32 0
+   %t17 = extractelement <4 x i32> %res12, i32 1
+   %t18 = extractelement <4 x i32> %res12, i32 3
+   %t19 = add i32 %t16, %t17
+   %e12 = add i32 %t18, %t19
+   %t20 = extractelement <4 x i32> %res13, i32 0
+   %t21 = extractelement <4 x i32> %res13, i32 2
+   %t22 = extractelement <4 x i32> %res13, i32 3
+   %t23 = add i32 %t20, %t21
+   %e13 = add i32 %t22, %t23
+   %t24 = extractelement <4 x i32> %res14, i32 1
+   %t25 = extractelement <4 x i32> %res14, i32 2
+   %t26 = extractelement <4 x i32> %res14, i32 3
+   %t27 = add i32 %t24, %t25
+   %e14 = add i32 %t26, %t27
+   %t28 = extractelement <4 x i32> %res15, i32 0
+   %t29 = extractelement <4 x i32> %res15, i32 1
+   %t30 = extractelement <4 x i32> %res15, i32 2
+   %t31 = extractelement <4 x i32> %res15, i32 3
+   %t32 = add i32 %t28, %t29
+   %t33 = add i32 %t30, %t31
+   %e15 = add i32 %t32, %t33
+   %e16 = extractelement <4 x i32> %res16, i32 3
+   %s1 = add i32 %e1, %e2
+   %s2 = add i32 %s1, %e3
+   %s3 = add i32 %s2, %e4
+   %s4 = add i32 %s3, %e5
+   %s5 = add i32 %s4, %e6
+   %s6 = add i32 %s5, %e7
+   %s7 = add i32 %s6, %e8
+   %s8 = add i32 %s7, %e9
+   %s9 = add i32 %s8, %e10
+   %s10 = add i32 %s9, %e11
+   %s11 = add i32 %s10, %e12
+   %s12 = add i32 %s11, %e13
+   %s13 = add i32 %s12, %e14
+   %s14 = add i32 %s13, %e15
+   %s15 = add i32 %s14, %e16
+   %s16 = bitcast i32 %s15 to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
+   ret void
+}
+
+declare <4 x i32> @llvm.SI.resinfo(i32, <32 x i8>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll b/test/CodeGen/R600/llvm.SI.sample-masked.ll
new file mode 100644
index 000000000000..e5e4ec4f0674
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll
@@ -0,0 +1,93 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+; CHECK-LABEL: @v1
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 13
+define void @v1(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 2
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v2
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 11
+define void @v2(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v3
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
+define void @v3(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 2
+  %4 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v4
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 7
+define void @v4(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 2
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
+  ret void
+}
+
+; CHECK-LABEL: @v5
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
+define void @v5(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+; CHECK-LABEL: @v6
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 6
+define void @v6(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 1
+  %3 = extractelement <4 x float> %1, i32 2
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+; CHECK-LABEL: @v7
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 9
+define void @v7(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
+  ret void
+}
+
+declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index de06354a5646..d41737c65927 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 2
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 1
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 4
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -35,37 +35,37 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
    %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
    %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
-      <8 x i32> undef, <4 x i32> undef, i32 1)
+      <32 x i8> undef, <16 x i8> undef, i32 1)
    %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
-      <8 x i32> undef, <4 x i32> undef, i32 2)
+      <32 x i8> undef, <16 x i8> undef, i32 2)
    %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
-      <8 x i32> undef, <4 x i32> undef, i32 3)
+      <32 x i8> undef, <16 x i8> undef, i32 3)
    %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
-      <8 x i32> undef, <4 x i32> undef, i32 4)
+      <32 x i8> undef, <16 x i8> undef, i32 4)
    %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
-      <8 x i32> undef, <4 x i32> undef, i32 5)
+      <32 x i8> undef, <16 x i8> undef, i32 5)
    %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
-      <8 x i32> undef, <4 x i32> undef, i32 6)
+      <32 x i8> undef, <16 x i8> undef, i32 6)
    %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
-      <8 x i32> undef, <4 x i32> undef, i32 7)
+      <32 x i8> undef, <16 x i8> undef, i32 7)
    %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
-      <8 x i32> undef, <4 x i32> undef, i32 8)
+      <32 x i8> undef, <16 x i8> undef, i32 8)
    %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
-      <8 x i32> undef, <4 x i32> undef, i32 9)
+      <32 x i8> undef, <16 x i8> undef, i32 9)
    %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
-      <8 x i32> undef, <4 x i32> undef, i32 10)
+      <32 x i8> undef, <16 x i8> undef, i32 10)
    %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
-      <8 x i32> undef, <4 x i32> undef, i32 11)
+      <32 x i8> undef, <16 x i8> undef, i32 11)
    %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
-      <8 x i32> undef, <4 x i32> undef, i32 12)
+      <32 x i8> undef, <16 x i8> undef, i32 12)
    %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
-      <8 x i32> undef, <4 x i32> undef, i32 13)
+      <32 x i8> undef, <16 x i8> undef, i32 13)
    %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
-      <8 x i32> undef, <4 x i32> undef, i32 14)
+      <32 x i8> undef, <16 x i8> undef, i32 14)
    %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
-      <8 x i32> undef, <4 x i32> undef, i32 15)
+      <32 x i8> undef, <16 x i8> undef, i32 15)
    %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
-      <8 x i32> undef, <4 x i32> undef, i32 16)
+      <32 x i8> undef, <16 x i8> undef, i32 16)
    %e1 = extractelement <4 x float> %res1, i32 0
    %e2 = extractelement <4 x float> %res2, i32 1
    %e3 = extractelement <4 x float> %res3, i32 2
@@ -135,6 +135,23 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    ret void
 }
 
-declare <4 x float> @llvm.SI.sample.(<4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+; CHECK: @v1
+; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
+define void @v1(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 2
+  %5 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
+  ret void
+}
+
+
+declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
+
+declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/R600/llvm.SI.sampled.ll
new file mode 100644
index 000000000000..21ac725ae039
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sampled.ll
@@ -0,0 +1,140 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 2
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 1
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 4
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
+
+define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
+   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
+   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
+   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
+   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
+   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
+   %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
+   %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
+   %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
+   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
+   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
+   %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
+   %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
+   %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
+   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
+   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
+   %res1 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v1,
+      <32 x i8> undef, <16 x i8> undef, i32 1)
+   %res2 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v2,
+      <32 x i8> undef, <16 x i8> undef, i32 2)
+   %res3 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v3,
+      <32 x i8> undef, <16 x i8> undef, i32 3)
+   %res4 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v4,
+      <32 x i8> undef, <16 x i8> undef, i32 4)
+   %res5 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v5,
+      <32 x i8> undef, <16 x i8> undef, i32 5)
+   %res6 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v6,
+      <32 x i8> undef, <16 x i8> undef, i32 6)
+   %res7 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v7,
+      <32 x i8> undef, <16 x i8> undef, i32 7)
+   %res8 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v8,
+      <32 x i8> undef, <16 x i8> undef, i32 8)
+   %res9 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v9,
+      <32 x i8> undef, <16 x i8> undef, i32 9)
+   %res10 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v10,
+      <32 x i8> undef, <16 x i8> undef, i32 10)
+   %res11 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v11,
+      <32 x i8> undef, <16 x i8> undef, i32 11)
+   %res12 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v12,
+      <32 x i8> undef, <16 x i8> undef, i32 12)
+   %res13 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v13,
+      <32 x i8> undef, <16 x i8> undef, i32 13)
+   %res14 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v14,
+      <32 x i8> undef, <16 x i8> undef, i32 14)
+   %res15 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v15,
+      <32 x i8> undef, <16 x i8> undef, i32 15)
+   %res16 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v16,
+      <32 x i8> undef, <16 x i8> undef, i32 16)
+   %e1 = extractelement <4 x float> %res1, i32 0
+   %e2 = extractelement <4 x float> %res2, i32 1
+   %e3 = extractelement <4 x float> %res3, i32 2
+   %e4 = extractelement <4 x float> %res4, i32 3
+   %t0 = extractelement <4 x float> %res5, i32 0
+   %t1 = extractelement <4 x float> %res5, i32 1
+   %e5 = fadd float %t0, %t1
+   %t2 = extractelement <4 x float> %res6, i32 0
+   %t3 = extractelement <4 x float> %res6, i32 2
+   %e6 = fadd float %t2, %t3
+   %t4 = extractelement <4 x float> %res7, i32 0
+   %t5 = extractelement <4 x float> %res7, i32 3
+   %e7 = fadd float %t4, %t5
+   %t6 = extractelement <4 x float> %res8, i32 1
+   %t7 = extractelement <4 x float> %res8, i32 2
+   %e8 = fadd float %t6, %t7
+   %t8 = extractelement <4 x float> %res9, i32 1
+   %t9 = extractelement <4 x float> %res9, i32 3
+   %e9 = fadd float %t8, %t9
+   %t10 = extractelement <4 x float> %res10, i32 2
+   %t11 = extractelement <4 x float> %res10, i32 3
+   %e10 = fadd float %t10, %t11
+   %t12 = extractelement <4 x float> %res11, i32 0
+   %t13 = extractelement <4 x float> %res11, i32 1
+   %t14 = extractelement <4 x float> %res11, i32 2
+   %t15 = fadd float %t12, %t13
+   %e11 = fadd float %t14, %t15
+   %t16 = extractelement <4 x float> %res12, i32 0
+   %t17 = extractelement <4 x float> %res12, i32 1
+   %t18 = extractelement <4 x float> %res12, i32 3
+   %t19 = fadd float %t16, %t17
+   %e12 = fadd float %t18, %t19
+   %t20 = extractelement <4 x float> %res13, i32 0
+   %t21 = extractelement <4 x float> %res13, i32 2
+   %t22 = extractelement <4 x float> %res13, i32 3
+   %t23 = fadd float %t20, %t21
+   %e13 = fadd float %t22, %t23
+   %t24 = extractelement <4 x float> %res14, i32 1
+   %t25 = extractelement <4 x float> %res14, i32 2
+   %t26 = extractelement <4 x float> %res14, i32 3
+   %t27 = fadd float %t24, %t25
+   %e14 = fadd float %t26, %t27
+   %t28 = extractelement <4 x float> %res15, i32 0
+   %t29 = extractelement <4 x float> %res15, i32 1
+   %t30 = extractelement <4 x float> %res15, i32 2
+   %t31 = extractelement <4 x float> %res15, i32 3
+   %t32 = fadd float %t28, %t29
+   %t33 = fadd float %t30, %t31
+   %e15 = fadd float %t32, %t33
+   %e16 = extractelement <4 x float> %res16, i32 3
+   %s1 = fadd float %e1, %e2
+   %s2 = fadd float %s1, %e3
+   %s3 = fadd float %s2, %e4
+   %s4 = fadd float %s3, %e5
+   %s5 = fadd float %s4, %e6
+   %s6 = fadd float %s5, %e7
+   %s7 = fadd float %s6, %e8
+   %s8 = fadd float %s7, %e9
+   %s9 = fadd float %s8, %e10
+   %s10 = fadd float %s9, %e11
+   %s11 = fadd float %s10, %e12
+   %s12 = fadd float %s11, %e13
+   %s13 = fadd float %s12, %e14
+   %s14 = fadd float %s13, %e15
+   %s15 = fadd float %s14, %e16
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
new file mode 100644
index 000000000000..fa7c3cabadc5
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
@@ -0,0 +1,44 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK_LABEL: @test1
+;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 32, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test1(i32 %a1, i32 %vaddr) {
+    %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+    call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+        i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+;CHECK_LABEL: @test2
+;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 24, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test2(i32 %a1, i32 %vaddr) {
+    %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+    call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+        i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+;CHECK_LABEL: @test3
+;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 16, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test3(i32 %a1, i32 %vaddr) {
+    %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
+    call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
+        i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+;CHECK_LABEL: @test4
+;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+define void @test4(i32 %vdata, i32 %vaddr) {
+    call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
+        i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
+        i32 1, i32 0)
+    ret void
+}
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/R600/llvm.SI.tid.ll
new file mode 100644
index 000000000000..fe17304732ad
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.tid.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+;CHECK: V_MBCNT_LO_U32_B32_e64
+;CHECK: V_MBCNT_HI_U32_B32_e32
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+  %4 = call i32 @llvm.SI.tid()
+  %5 = bitcast i32 %4 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
+  ret void
+}
+
+declare i32 @llvm.SI.tid() readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index 9b2816707042..aaf2305dd0ba 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -1,16 +1,19 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MULADD_IEEE *
+;CHECK: FRACT *
+;CHECK: ADD *
+;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = call float @llvm.cos.f32(float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
 declare float @llvm.cos.f32(float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.floor.ll b/test/CodeGen/R600/llvm.floor.ll
new file mode 100644
index 000000000000..f7071cd9b879
--- /dev/null
+++ b/test/CodeGen/R600/llvm.floor.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK: @f32
+; R600-CHECK: FLOOR
+; SI-CHECK: @f32
+; SI-CHECK: V_FLOOR_F32_e32
+define void @f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.floor.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v2f32
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; SI-CHECK: @v2f32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.floor.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v4f32
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; R600-CHECK: FLOOR
+; SI-CHECK: @v4f32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: V_FLOOR_F32_e32
+define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare float @llvm.floor.f32(float) #0
+
+; Function Attrs: nounwind readonly
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0
+
+attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
index 91b774282906..b587d2b2aea1 100644
--- a/test/CodeGen/R600/llvm.pow.ll
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -1,19 +1,19 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK-NEXT: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
+;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
    %r2 = call float @llvm.pow.f32( float %r0, float %r1)
-   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   %vec = insertelement <4 x float> undef, float %r2, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @llvm.pow.f32(float ,float ) readonly
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll
new file mode 100644
index 000000000000..c174b335f0e8
--- /dev/null
+++ b/test/CodeGen/R600/llvm.rint.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK: @f32
+; R600-CHECK: RNDNE
+; SI-CHECK: @f32
+; SI-CHECK: V_RNDNE_F32_e32
+define void @f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.rint.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v2f32
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; SI-CHECK: @v2f32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @v4f32
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; R600-CHECK: RNDNE
+; SI-CHECK: @v4f32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+; SI-CHECK: V_RNDNE_F32_e32
+define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare float @llvm.rint.f32(float) #0
+
+; Function Attrs: nounwind readonly
+declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
+
+attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll
new file mode 100644
index 000000000000..e06d45d4a373
--- /dev/null
+++ b/test/CodeGen/R600/llvm.round.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
+
+; FUNC-LABEL: @f32
+; R600: FRACT
+; R600-DAG: ADD
+; R600-DAG: CEIL
+; R600-DAG: FLOOR
+; R600: CNDGE
+define void @f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.round.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; The vector tests are really difficult to verify, since it can be hard to
+; predict how the scheduler will order the instructions.  We already have
+; a test for the scalar case, so the vector tests just check that the
+; compiler doesn't crash.
+
+; FUNC-LABEL: v2f32
+; R600: CF_END
+define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: v4f32
+; R600: CF_END
+define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.round.f32(float)
+declare <2 x float> @llvm.round.v2f32(<2 x float>)
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 803dc2d6debc..9eb998315fef 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -1,16 +1,19 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MULADD_IEEE *
+;CHECK: FRACT *
+;CHECK: ADD *
+;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = call float @llvm.sin.f32( float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
 declare float @llvm.sin.f32(float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll
new file mode 100644
index 000000000000..0d0d18618990
--- /dev/null
+++ b/test/CodeGen/R600/llvm.sqrt.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @sqrt_f32
+; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
+; R600-CHECK: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS
+; SI-CHECK-LABEL: @sqrt_f32
+; SI-CHECK: V_SQRT_F32_e32
+define void @sqrt_f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.sqrt.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @sqrt_v2f32
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS
+; SI-CHECK-LABEL: @sqrt_v2f32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @sqrt_v4f32
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Z, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].W
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS
+; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X
+; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS
+; SI-CHECK-LABEL: @sqrt_v4f32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK: V_SQRT_F32_e32
+define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.sqrt.f32(float %in)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll
new file mode 100644
index 000000000000..ca86d0e36907
--- /dev/null
+++ b/test/CodeGen/R600/load-input-fold.ll
@@ -0,0 +1,118 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman
+;REQUIRES: asserts
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
+  %5 = extractelement <4 x float> %reg2, i32 1
+  %6 = extractelement <4 x float> %reg2, i32 2
+  %7 = extractelement <4 x float> %reg2, i32 3
+  %8 = extractelement <4 x float> %reg3, i32 0
+  %9 = extractelement <4 x float> %reg3, i32 1
+  %10 = extractelement <4 x float> %reg3, i32 2
+  %11 = extractelement <4 x float> %reg3, i32 3
+  %12 = load <4 x float> addrspace(8)* null
+  %13 = extractelement <4 x float> %12, i32 0
+  %14 = fmul float %0, %13
+  %15 = load <4 x float> addrspace(8)* null
+  %16 = extractelement <4 x float> %15, i32 1
+  %17 = fmul float %0, %16
+  %18 = load <4 x float> addrspace(8)* null
+  %19 = extractelement <4 x float> %18, i32 2
+  %20 = fmul float %0, %19
+  %21 = load <4 x float> addrspace(8)* null
+  %22 = extractelement <4 x float> %21, i32 3
+  %23 = fmul float %0, %22
+  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %25 = extractelement <4 x float> %24, i32 0
+  %26 = fmul float %1, %25
+  %27 = fadd float %26, %14
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %29 = extractelement <4 x float> %28, i32 1
+  %30 = fmul float %1, %29
+  %31 = fadd float %30, %17
+  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %33 = extractelement <4 x float> %32, i32 2
+  %34 = fmul float %1, %33
+  %35 = fadd float %34, %20
+  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %37 = extractelement <4 x float> %36, i32 3
+  %38 = fmul float %1, %37
+  %39 = fadd float %38, %23
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %41 = extractelement <4 x float> %40, i32 0
+  %42 = fmul float %2, %41
+  %43 = fadd float %42, %27
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %45 = extractelement <4 x float> %44, i32 1
+  %46 = fmul float %2, %45
+  %47 = fadd float %46, %31
+  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %49 = extractelement <4 x float> %48, i32 2
+  %50 = fmul float %2, %49
+  %51 = fadd float %50, %35
+  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %53 = extractelement <4 x float> %52, i32 3
+  %54 = fmul float %2, %53
+  %55 = fadd float %54, %39
+  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %57 = extractelement <4 x float> %56, i32 0
+  %58 = fmul float %3, %57
+  %59 = fadd float %58, %43
+  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %61 = extractelement <4 x float> %60, i32 1
+  %62 = fmul float %3, %61
+  %63 = fadd float %62, %47
+  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %65 = extractelement <4 x float> %64, i32 2
+  %66 = fmul float %3, %65
+  %67 = fadd float %66, %51
+  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %69 = extractelement <4 x float> %68, i32 3
+  %70 = fmul float %3, %69
+  %71 = fadd float %70, %55
+  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %73 = extractelement <4 x float> %72, i32 0
+  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %75 = extractelement <4 x float> %74, i32 1
+  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %77 = extractelement <4 x float> %76, i32 2
+  %78 = insertelement <4 x float> undef, float %4, i32 0
+  %79 = insertelement <4 x float> %78, float %5, i32 1
+  %80 = insertelement <4 x float> %79, float %6, i32 2
+  %81 = insertelement <4 x float> %80, float 0.000000e+00, i32 3
+  %82 = insertelement <4 x float> undef, float %73, i32 0
+  %83 = insertelement <4 x float> %82, float %75, i32 1
+  %84 = insertelement <4 x float> %83, float %77, i32 2
+  %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3
+  %86 = call float @llvm.AMDGPU.dp4(<4 x float> %81, <4 x float> %85)
+  %87 = insertelement <4 x float> undef, float %86, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %87, i32 2, i32 2)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+; Function Attrs: readonly
+declare float @fabs(float) #2
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.clamp.(float, float, float) #1
+
+; Function Attrs: nounwind readonly
+declare float @llvm.pow.f32(float, float) #3
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
+attributes #2 = { readonly }
+attributes #3 = { nounwind readonly }
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index b03245ae87b3..e4492d7d6e7b 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -1,8 +1,17 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK  %s
+
+;===------------------------------------------------------------------------===;
+; GLOBAL ADDRESS SPACE
+;===------------------------------------------------------------------------===;
 
 ; Load an i8 value from the global address space.
-; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+; R600-CHECK-LABEL: @load_i8
+; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
 
+; SI-CHECK-LABEL: @load_i8
+; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %1 = load i8 addrspace(1)* %in
   %2 = zext i8 %1 to i32
@@ -10,11 +19,665 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
   ret void
 }
 
+; R600-CHECK-LABEL: @load_i8_sext
+; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
+; R600-CHECK: 24
+; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
+; R600-CHECK: 24
+; SI-CHECK-LABEL: @load_i8_sext
+; SI-CHECK: BUFFER_LOAD_SBYTE
+define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+entry:
+  %0 = load i8 addrspace(1)* %in
+  %1 = sext i8 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @load_v2i8
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(1)* %in
+  %1 = zext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i8_sext
+; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 24
+; SI-CHECK-LABEL: @load_v2i8_sext
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(1)* %in
+  %1 = sext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; R600-CHECK: VTX_READ_8
+; SI-CHECK-LABEL: @load_v4i8
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: BUFFER_LOAD_UBYTE
+define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(1)* %in
+  %1 = zext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8_sext
+; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: 24
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
+; R600-CHECK-DAG: 24
+; SI-CHECK-LABEL: @load_v4i8_sext
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: BUFFER_LOAD_SBYTE
+define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(1)* %in
+  %1 = sext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; Load an i16 value from the global address space.
+; R600-CHECK-LABEL: @load_i16
+; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+; SI-CHECK-LABEL: @load_i16
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+entry:
+  %0 = load i16	 addrspace(1)* %in
+  %1 = zext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i16_sext
+; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
+; R600-CHECK: 16
+; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
+; R600-CHECK: 16
+; SI-CHECK-LABEL: @load_i16_sext
+; SI-CHECK: BUFFER_LOAD_SSHORT
+define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+entry:
+  %0 = load i16 addrspace(1)* %in
+  %1 = sext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @load_v2i16
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(1)* %in
+  %1 = zext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i16_sext
+; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 16
+; SI-CHECK-LABEL: @load_v2i16_sext
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(1)* %in
+  %1 = sext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; R600-CHECK: VTX_READ_16
+; SI-CHECK-LABEL: @load_v4i16
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(1)* %in
+  %1 = zext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16_sext
+; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
+; R600-CHECK-DAG: 16
+; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
+; R600-CHECK-DAG: 16
+; SI-CHECK-LABEL: @load_v4i16_sext
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: BUFFER_LOAD_SSHORT
+define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(1)* %in
+  %1 = sext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; load an i32 value from the global address space.
+; R600-CHECK-LABEL: @load_i32
+; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+
+; SI-CHECK-LABEL: @load_i32
+; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; load a f32 value from the global address space.
+; R600-CHECK-LABEL: @load_f32
+; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+
+; SI-CHECK-LABEL: @load_f32
+; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; load a v2f32 value from the global address space
+; R600-CHECK-LABEL: @load_v2f32
+; R600-CHECK: VTX_READ_64
+
+; SI-CHECK-LABEL: @load_v2f32
+; SI-CHECK: BUFFER_LOAD_DWORDX2
+define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x float> addrspace(1)* %in
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i64
+; R600-CHECK: MEM_RAT
+; R600-CHECK: MEM_RAT
+
+; SI-CHECK-LABEL: @load_i64
+; SI-CHECK: BUFFER_LOAD_DWORDX2
+define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+entry:
+  %0 = load i64 addrspace(1)* %in
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i64_sext
+; R600-CHECK: MEM_RAT
+; R600-CHECK: MEM_RAT
+; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
+; R600-CHECK: 31
+; SI-CHECK-LABEL: @load_i64_sext
+; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]]
+
+define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %1 = sext i32 %0 to i64
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i64_zext
+; R600-CHECK: MEM_RAT
+; R600-CHECK: MEM_RAT
+define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %1 = zext i32 %0 to i64
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
+
+;===------------------------------------------------------------------------===;
+; CONSTANT ADDRESS SPACE
+;===------------------------------------------------------------------------===;
+
+; Load a sign-extended i8 value
+; R600-CHECK-LABEL: @load_const_i8_sext
+; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
+; R600-CHECK: 24
+; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
+; R600-CHECK: 24
+; SI-CHECK-LABEL: @load_const_i8_sext
+; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
+define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
+entry:
+  %0 = load i8 addrspace(2)* %in
+  %1 = sext i8 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load an aligned i8 value
+; R600-CHECK-LABEL: @load_const_i8_aligned
+; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+; SI-CHECK-LABEL: @load_const_i8_aligned
+; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
+entry:
+  %0 = load i8 addrspace(2)* %in
+  %1 = zext i8 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load an un-aligned i8 value
+; R600-CHECK-LABEL: @load_const_i8_unaligned
+; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+; SI-CHECK-LABEL: @load_const_i8_unaligned
+; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
+entry:
+  %0 = getelementptr i8 addrspace(2)* %in, i32 1
+  %1 = load i8 addrspace(2)* %0
+  %2 = zext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load a sign-extended i16 value
+; R600-CHECK-LABEL: @load_const_i16_sext
+; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
+; R600-CHECK: 16
+; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
+; R600-CHECK: 16
+; SI-CHECK-LABEL: @load_const_i16_sext
+; SI-CHECK: BUFFER_LOAD_SSHORT
+define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
+entry:
+  %0 = load i16 addrspace(2)* %in
+  %1 = sext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load an aligned i16 value
+; R600-CHECK-LABEL: @load_const_i16_aligned
+; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+; SI-CHECK-LABEL: @load_const_i16_aligned
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
+entry:
+  %0 = load i16 addrspace(2)* %in
+  %1 = zext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load an un-aligned i16 value
+; R600-CHECK-LABEL: @load_const_i16_unaligned
+; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+; SI-CHECK-LABEL: @load_const_i16_unaligned
+; SI-CHECK: BUFFER_LOAD_USHORT
+define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
+entry:
+  %0 = getelementptr i16 addrspace(2)* %in, i32 1
+  %1 = load i16 addrspace(2)* %0
+  %2 = zext i16 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load an i32 value from the constant address space.
+; R600-CHECK-LABEL: @load_const_addrspace_i32
+; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+
+; SI-CHECK-LABEL: @load_const_addrspace_i32
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
+define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+entry:
+  %0 = load i32 addrspace(2)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
 ; Load a f32 value from the constant address space.
-; CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
+; R600-CHECK-LABEL: @load_const_addrspace_f32
+; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
+; SI-CHECK-LABEL: @load_const_addrspace_f32
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
   %1 = load float addrspace(2)* %in
   store float %1, float addrspace(1)* %out
   ret void
 }
+
+;===------------------------------------------------------------------------===;
+; LOCAL ADDRESS SPACE
+;===------------------------------------------------------------------------===;
+
+; Load an i8 value from the local address space.
+; R600-CHECK-LABEL: @load_i8_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; SI-CHECK-LABEL: @load_i8_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U8
+define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
+  %1 = load i8 addrspace(3)* %in
+  %2 = zext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i8_sext_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: ASHR
+; SI-CHECK-LABEL: @load_i8_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I8
+define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
+entry:
+  %0 = load i8 addrspace(3)* %in
+  %1 = sext i8 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i8_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; SI-CHECK-LABEL: @load_v2i8_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(3)* %in
+  %1 = zext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i8_sext_local
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v2i8_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i8> addrspace(3)* %in
+  %1 = sext <2 x i8> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8_local
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; R600-CHECK: LDS_UBYTE_READ_RET
+; SI-CHECK-LABEL: @load_v4i8_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+; SI-CHECK: DS_READ_U8
+define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(3)* %in
+  %1 = zext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i8_sext_local
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: LDS_UBYTE_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v4i8_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+; SI-CHECK: DS_READ_I8
+define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i8> addrspace(3)* %in
+  %1 = sext <4 x i8> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; Load an i16 value from the local address space.
+; R600-CHECK-LABEL: @load_i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK-LABEL: @load_i16_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U16
+define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
+entry:
+  %0 = load i16	 addrspace(3)* %in
+  %1 = zext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_i16_sext_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: ASHR
+; SI-CHECK-LABEL: @load_i16_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I16
+define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
+entry:
+  %0 = load i16 addrspace(3)* %in
+  %1 = sext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK-LABEL: @load_v2i16_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(3)* %in
+  %1 = zext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v2i16_sext_local
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v2i16_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x i16> addrspace(3)* %in
+  %1 = sext <2 x i16> %0 to <2 x i32>
+  store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK-LABEL: @load_v4i16_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(3)* %in
+  %1 = zext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @load_v4i16_sext_local
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK-LABEL: @load_v4i16_sext_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+  %0 = load <4 x i16> addrspace(3)* %in
+  %1 = sext <4 x i16> %0 to <4 x i32>
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; load an i32 value from the glocal address space.
+; R600-CHECK-LABEL: @load_i32_local
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-LABEL: @load_i32_local
+; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: DS_READ_B32
+define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = load i32 addrspace(3)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; load a f32 value from the global address space.
+; R600-CHECK-LABEL: @load_f32_local
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: DS_READ_B32
+define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
+entry:
+  %0 = load float addrspace(3)* %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; load a v2f32 value from the local address space
+; R600-CHECK-LABEL: @load_v2f32_local
+; R600-CHECK: LDS_READ_RET
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B32
+define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
+entry:
+  %0 = load <2 x float> addrspace(3)* %in
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll
new file mode 100644
index 000000000000..81a6310bbcc9
--- /dev/null
+++ b/test/CodeGen/R600/load.vec.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK  %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK  %s
+
+; load a v2i32 value from the global address space.
+; EG-CHECK: @load_v2i32
+; EG-CHECK: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
+; SI-CHECK: @load_v2i32
+; SI-CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %a = load <2 x i32> addrspace(1) * %in
+  store <2 x i32> %a, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; load a v4i32 value from the global address space.
+; EG-CHECK: @load_v4i32
+; EG-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
+; SI-CHECK: @load_v4i32
+; SI-CHECK: BUFFER_LOAD_DWORDX4 v[{{[0-9]+:[0-9]+}}]
+define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %a = load <4 x i32> addrspace(1) * %in
+  store <4 x i32> %a, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll
new file mode 100644
index 000000000000..e351e4135a7d
--- /dev/null
+++ b/test/CodeGen/R600/load64.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+
+; load a f64 value from the global address space.
+; CHECK: @load_f64
+; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+entry:
+  %0 = load double addrspace(1)* %in
+  store double %0, double addrspace(1)* %out
+  ret void
+}
+
+; Load a f64 value from the constant address space.
+; CHECK: @load_const_addrspace_f64
+; CHECK: S_LOAD_DWORDX2 s[{{[0-9]+:[0-9]+}}]
+define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
+  %1 = load double addrspace(2)* %in
+  store double %1, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
new file mode 100644
index 000000000000..e2d840645d01
--- /dev/null
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+
+@local_memory_two_objects.local_mem0 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4
+@local_memory_two_objects.local_mem1 = internal addrspace(3) unnamed_addr global [4 x i32] zeroinitializer, align 4
+
+; EG-CHECK: @local_memory_two_objects
+
+; Check that the LDS size emitted correctly
+; EG-CHECK: .long 166120
+; EG-CHECK-NEXT: .long 8
+; SI-CHECK: .long 47180
+; SI-CHECK-NEXT: .long 32768
+
+; We would like to check the the lds writes are using different
+; addresses, but due to variations in the scheduler, we can't do
+; this consistently on evergreen GPUs.
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; SI-CHECK: DS_WRITE_B32 0, {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
+; SI-CHECK-NOT: DS_WRITE_B32 0, {{v[0-9]*}}, v[[ADDRW]]
+
+; GROUP_BARRIER must be the last instruction in a clause
+; EG-CHECK: GROUP_BARRIER
+; EG-CHECK-NEXT: ALU clause
+
+; Make sure the lds reads are using different addresses.
+; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
+; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, 0, [[ADDRR:v[0-9]+]]
+; SI-CHECK-NOT: DS_READ_B32 {{v[0-9]+}}, 0, [[ADDRR]]
+
+define void @local_memory_two_objects(i32 addrspace(1)* %out) {
+entry:
+  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %arrayidx = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
+  store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4
+  %mul = shl nsw i32 %x.i, 1
+  %arrayidx1 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
+  store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4
+  %sub = sub nsw i32 3, %x.i
+  call void @llvm.AMDGPU.barrier.local()
+  %arrayidx2 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
+  %0 = load i32 addrspace(3)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32 addrspace(1)* %out, i32 %x.i
+  store i32 %0, i32 addrspace(1)* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
+  %1 = load i32 addrspace(3)* %arrayidx4, align 4
+  %add = add nsw i32 %x.i, 4
+  %arrayidx5 = getelementptr inbounds i32 addrspace(1)* %out, i32 %add
+  store i32 %1, i32 addrspace(1)* %arrayidx5, align 4
+  ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare void @llvm.AMDGPU.barrier.local()
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
new file mode 100644
index 000000000000..2168a3d0bd27
--- /dev/null
+++ b/test/CodeGen/R600/local-memory.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s
+
+@local_memory.local_mem = internal addrspace(3) unnamed_addr global [128 x i32] zeroinitializer, align 4
+
+; EG-CHECK-LABEL: @local_memory
+; SI-CHECK-LABEL: @local_memory
+; CI-CHECK-LABEL: @local_memory
+
+; Check that the LDS size emitted correctly
+; EG-CHECK: .long 166120
+; EG-CHECK-NEXT: .long 128
+; SI-CHECK: .long 47180
+; SI-CHECK-NEXT: .long 65536
+; CI-CHECK: .long 47180
+; CI-CHECK-NEXT: .long 32768
+
+; EG-CHECK: LDS_WRITE
+; SI-CHECK_NOT: S_WQM_B64
+; SI-CHECK: DS_WRITE_B32 0
+
+; GROUP_BARRIER must be the last instruction in a clause
+; EG-CHECK: GROUP_BARRIER
+; EG-CHECK-NEXT: ALU clause
+; SI-CHECK: S_BARRIER
+
+; EG-CHECK: LDS_READ_RET
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, 0
+
+define void @local_memory(i32 addrspace(1)* %out) {
+entry:
+  %y.i = call i32 @llvm.r600.read.tidig.x() #0
+  %arrayidx = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
+  store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
+  %add = add nsw i32 %y.i, 1
+  %cmp = icmp eq i32 %add, 16
+  %.add = select i1 %cmp, i32 0, i32 %add
+  call void @llvm.AMDGPU.barrier.local()
+  %arrayidx1 = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
+  %0 = load i32 addrspace(3)* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i32 %y.i
+  store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
+  ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare void @llvm.AMDGPU.barrier.local()
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
index 8a5458b89809..b46d8e9dfb04 100644
--- a/test/CodeGen/R600/loop-address.ll
+++ b/test/CodeGen/R600/loop-address.ll
@@ -1,13 +1,9 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: TEX
 ;CHECK: ALU_PUSH
-;CHECK: JUMP @4
-;CHECK: ELSE @16
-;CHECK: TEX
-;CHECK: LOOP_START_DX10 @15
-;CHECK: LOOP_BREAK @14
-;CHECK: POP @16
+;CHECK: LOOP_START_DX10 @11
+;CHECK: LOOP_BREAK @10
+;CHECK: POP @10
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
 target triple = "r600--"
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
index fb698da62719..216283910009 100644
--- a/test/CodeGen/R600/lshl.ll
+++ b/test/CodeGen/R600/lshl.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
+;CHECK: S_LSHL_B32 s{{[0-9]}}, s{{[0-9]}}, 1
 
 define void @test(i32 %p) {
    %i = mul i32 %p, 2
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
index e0ed3ac07866..886d1c4854df 100644
--- a/test/CodeGen/R600/lshr.ll
+++ b/test/CodeGen/R600/lshr.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+;CHECK: S_LSHR_B32 s{{[0-9]}}, s{{[0-9]}}, 1
 
 define void @test(i32 %p) {
    %i = udiv i32 %p, 2
diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll
new file mode 100644
index 000000000000..df063ece35ae
--- /dev/null
+++ b/test/CodeGen/R600/mad_int24.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; EG-CHECK: @i32_mad24
+; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
+; EG-CHECK: MULLO_INT
+; CM-CHECK: MULADD_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
+; SI-CHECK: V_MAD_I32_I24
+define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = shl i32 %a, 8
+  %a_24 = ashr i32 %0, 8
+  %1 = shl i32 %b, 8
+  %b_24 = ashr i32 %1, 8
+  %2 = mul i32 %a_24, %b_24
+  %3 = add i32 %2, %c
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll
new file mode 100644
index 000000000000..66a070ed9d4a
--- /dev/null
+++ b/test/CodeGen/R600/mad_uint24.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; EG-CHECK-LABEL: @u32_mad24
+; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
+; SI-CHECK-LABEL: @u32_mad24
+; SI-CHECK: V_MAD_U32_U24
+
+define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = shl i32 %a, 8
+  %a_24 = lshr i32 %0, 8
+  %1 = shl i32 %b, 8
+  %b_24 = lshr i32 %1, 8
+  %2 = mul i32 %a_24, %b_24
+  %3 = add i32 %2, %c
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_mad24
+; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
+; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
+; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
+; The order of A and B does not matter.
+; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]]
+; The result must be sign-extended
+; EG-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], PV.[[MAD_CHAN]], literal.x
+; EG-CHECK: 16
+; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
+; EG-CHECK: 16
+; SI-CHECK-LABEL: @i16_mad24
+; SI-CHECK: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MAD]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]]
+
+define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
+entry:
+  %0 = mul i16 %a, %b
+  %1 = add i16 %0, %c
+  %2 = sext i16 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @i8_mad24
+; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
+; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
+; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
+; The order of A and B does not matter.
+; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]]
+; The result must be sign-extended
+; EG-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], PV.[[MAD_CHAN]], literal.x
+; EG-CHECK: 24
+; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
+; EG-CHECK: 24
+; SI-CHECK-LABEL: @i8_mad24
+; SI-CHECK: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]]
+
+define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
+entry:
+  %0 = mul i8 %a, %b
+  %1 = add i8 %0, %c
+  %2 = sext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll
new file mode 100644
index 000000000000..65a6d2b5fc95
--- /dev/null
+++ b/test/CodeGen/R600/max-literals.ll
@@ -0,0 +1,67 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main
+; CHECK: ADD *
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
+  %5 = fadd float %0, 2.0
+  %6 = fadd float %1, 3.0
+  %7 = fadd float %2, 4.0
+  %8 = fadd float %3, 5.0
+  %9 = bitcast float %4 to i32
+  %10 = mul i32 %9, 6
+  %11 = bitcast i32 %10 to float
+  %12 = insertelement <4 x float> undef, float %5, i32 0
+  %13 = insertelement <4 x float> %12, float %6, i32 1
+  %14 = insertelement <4 x float> %13, float %7, i32 2
+  %15 = insertelement <4 x float> %14, float %8, i32 3
+  %16 = insertelement <4 x float> %15, float %11, i32 3
+
+  %17 = call float @llvm.AMDGPU.dp4(<4 x float> %15,<4 x float> %16)
+  %18 = insertelement <4 x float> undef, float %17, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
+  ret void
+}
+
+; CHECK: @main
+; CHECK-NOT: ADD *
+
+define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
+  %5 = fadd float %0, 2.0
+  %6 = fadd float %1, 3.0
+  %7 = fadd float %2, 4.0
+  %8 = fadd float %3, 2.0
+  %9 = bitcast float %4 to i32
+  %10 = mul i32 %9, 6
+  %11 = bitcast i32 %10 to float
+  %12 = insertelement <4 x float> undef, float %5, i32 0
+  %13 = insertelement <4 x float> %12, float %6, i32 1
+  %14 = insertelement <4 x float> %13, float %7, i32 2
+  %15 = insertelement <4 x float> %14, float %8, i32 3
+  %16 = insertelement <4 x float> %15, float %11, i32 3
+
+  %17 = call float @llvm.AMDGPU.dp4(<4 x float> %15,<4 x float> %16)
+  %18 = insertelement <4 x float> undef, float %17, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index 7278e9039840..8c27e28df164 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -1,12 +1,38 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ; mul24 and mad24 are affected
-;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+;EG-CHECK: @test2
+;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @test2
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = mul <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @test4
+;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @test4
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll
new file mode 100644
index 000000000000..66a1a9e5bd99
--- /dev/null
+++ b/test/CodeGen/R600/mul_int24.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; EG-CHECK: @i32_mul24
+; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
+; EG-CHECK: MULLO_INT
+; CM-CHECK: MUL_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI-CHECK: V_MUL_I32_I24
+define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = shl i32 %a, 8
+  %a_24 = ashr i32 %0, 8
+  %1 = shl i32 %b, 8
+  %b_24 = ashr i32 %1, 8
+  %2 = mul i32 %a_24, %b_24
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
new file mode 100644
index 000000000000..6e6d5496789f
--- /dev/null
+++ b/test/CodeGen/R600/mul_uint24.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; EG-CHECK-LABEL: @u32_mul24
+; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI-CHECK-LABEL: @u32_mul24
+; SI-CHECK: V_MUL_U32_U24
+
+define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = shl i32 %a, 8
+  %a_24 = lshr i32 %0, 8
+  %1 = shl i32 %b, 8
+  %b_24 = lshr i32 %1, 8
+  %2 = mul i32 %a_24, %b_24
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @i16_mul24
+; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
+; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
+; The order of A and B does not matter.
+; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]]
+; The result must be sign-extended
+; EG-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], PV.[[MUL_CHAN]], literal.x
+; EG-CHECK: 16
+; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
+; EG-CHECK: 16
+; SI-CHECK-LABEL: @i16_mul24
+; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MUL]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]]
+
+define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
+entry:
+  %0 = mul i16 %a, %b
+  %1 = sext i16 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @i8_mul24
+; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
+; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
+; The order of A and B does not matter.
+; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]]
+; The result must be sign-extended
+; EG-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], PV.[[MUL_CHAN]], literal.x
+; EG-CHECK: 24
+; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
+; EG-CHECK: 24
+; SI-CHECK-LABEL: @i8_mul24
+; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]]
+; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]]
+
+define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
+entry:
+  %0 = mul i8 %a, %b
+  %1 = sext i8 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
index bc17a597873e..d5fc01412123 100644
--- a/test/CodeGen/R600/mulhu.ll
+++ b/test/CodeGen/R600/mulhu.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
-;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
-;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, -1431655765
+;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
+;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0
 
 define void @test(i32 %p) {
    %i = udiv i32 %p, 3
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index b0dbb021e822..35d23b3d27ad 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -1,13 +1,53 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; CHECK: @or_v4i32
-; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG-CHECK-LABEL: @or_v2i32
+; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+;SI-CHECK-LABEL: @or_v2i32
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = or <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @or_v4i32
+; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @or_v4i32
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
   %result = or <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+; EG-CHECK-LABEL: @or_i64
+; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; SI-CHECK-LABEL: @or_i64
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
+define void @or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+	%0 = or i64 %a, %b
+	store i64 %0, i64 addrspace(1)* %out
+	ret void
+}
diff --git a/test/CodeGen/R600/packetizer.ll b/test/CodeGen/R600/packetizer.ll
new file mode 100644
index 000000000000..0a405c57ea93
--- /dev/null
+++ b/test/CodeGen/R600/packetizer.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
+
+; CHECK: @test
+; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X
+; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Y
+; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z
+; CHECK: BIT_ALIGN_INT * T{{[0-9]}}.W
+
+define void @test(i32 addrspace(1)* %out, i32 %x_arg, i32 %y_arg, i32 %z_arg, i32 %w_arg, i32 %e) {
+entry:
+  %shl = sub i32 32, %e
+  %x = add i32 %x_arg, 1
+  %x.0 = shl i32 %x, %shl
+  %x.1 = lshr i32 %x, %e
+  %x.2 = or i32 %x.0, %x.1
+  %y = add i32 %y_arg, 1
+  %y.0 = shl i32 %y, %shl
+  %y.1 = lshr i32 %y, %e
+  %y.2 = or i32 %y.0, %y.1
+  %z = add i32 %z_arg, 1
+  %z.0 = shl i32 %z, %shl
+  %z.1 = lshr i32 %z, %e
+  %z.2 = or i32 %z.0, %z.1
+  %w = add i32 %w_arg, 1
+  %w.0 = shl i32 %w, %shl
+  %w.1 = lshr i32 %w, %e
+  %w.2 = or i32 %w.0, %w.1
+  %xy = or i32 %x.2, %y.2
+  %zw = or i32 %z.2, %w.2
+  %xyzw = or i32 %xy, %zw
+  store i32 %xyzw, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
new file mode 100644
index 000000000000..4afaf684bfce
--- /dev/null
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -0,0 +1,54 @@
+; Function Attrs: nounwind
+; RUN: llc < %s -march=r600 -mcpu=redwood  | FileCheck %s
+;
+; CFG flattening should use parallel-and mode to generate branch conditions and
+; then merge if-regions with the same bodies.
+;
+; CHECK: AND_INT
+; CHECK-NEXT: AND_INT
+; CHECK-NEXT: OR_INT
+define void @_Z9chk1D_512v() #0 {
+entry:
+  %a0 = alloca i32, align 4
+  %b0 = alloca i32, align 4
+  %c0 = alloca i32, align 4
+  %d0 = alloca i32, align 4
+  %a1 = alloca i32, align 4
+  %b1 = alloca i32, align 4
+  %c1 = alloca i32, align 4
+  %d1 = alloca i32, align 4
+  %data = alloca i32, align 4
+  %0 = load i32* %a0, align 4
+  %1 = load i32* %b0, align 4
+  %cmp = icmp ne i32 %0, %1
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %2 = load i32* %c0, align 4
+  %3 = load i32* %d0, align 4
+  %cmp1 = icmp ne i32 %2, %3
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true
+  store i32 1, i32* %data, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %land.lhs.true, %entry
+  %4 = load i32* %a1, align 4
+  %5 = load i32* %b1, align 4
+  %cmp2 = icmp ne i32 %4, %5
+  br i1 %cmp2, label %land.lhs.true3, label %if.end6
+
+land.lhs.true3:                                   ; preds = %if.end
+  %6 = load i32* %c1, align 4
+  %7 = load i32* %d1, align 4
+  %cmp4 = icmp ne i32 %6, %7
+  br i1 %cmp4, label %if.then5, label %if.end6
+
+if.then5:                                         ; preds = %land.lhs.true3
+  store i32 1, i32* %data, align 4
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.then5, %land.lhs.true3, %if.end
+  ret void
+}
diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll
new file mode 100644
index 000000000000..b0db7cdd0671
--- /dev/null
+++ b/test/CodeGen/R600/parallelorifcollapse.ll
@@ -0,0 +1,61 @@
+; Function Attrs: nounwind
+; RUN: llc < %s -march=r600 -mcpu=redwood  | FileCheck %s
+;
+; CFG flattening should use parallel-or to generate branch conditions and
+; then merge if-regions with the same bodies.
+;
+; CHECK: OR_INT
+; CHECK-NEXT: OR_INT
+; CHECK-NEXT: OR_INT
+define void @_Z9chk1D_512v() #0 {
+entry:
+  %a0 = alloca i32, align 4
+  %b0 = alloca i32, align 4
+  %c0 = alloca i32, align 4
+  %d0 = alloca i32, align 4
+  %a1 = alloca i32, align 4
+  %b1 = alloca i32, align 4
+  %c1 = alloca i32, align 4
+  %d1 = alloca i32, align 4
+  %data = alloca i32, align 4
+  %0 = load i32* %a0, align 4
+  %1 = load i32* %b0, align 4
+  %cmp = icmp ne i32 %0, %1
+  br i1 %cmp, label %land.lhs.true, label %if.else
+
+land.lhs.true:                                    ; preds = %entry
+  %2 = load i32* %c0, align 4
+  %3 = load i32* %d0, align 4
+  %cmp1 = icmp ne i32 %2, %3
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %land.lhs.true
+  br label %if.end
+
+if.else:                                          ; preds = %land.lhs.true, %entry
+  store i32 1, i32* %data, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %4 = load i32* %a1, align 4
+  %5 = load i32* %b1, align 4
+  %cmp2 = icmp ne i32 %4, %5
+  br i1 %cmp2, label %land.lhs.true3, label %if.else6
+
+land.lhs.true3:                                   ; preds = %if.end
+  %6 = load i32* %c1, align 4
+  %7 = load i32* %d1, align 4
+  %cmp4 = icmp ne i32 %6, %7
+  br i1 %cmp4, label %if.then5, label %if.else6
+
+if.then5:                                         ; preds = %land.lhs.true3
+  br label %if.end7
+
+if.else6:                                         ; preds = %land.lhs.true3, %if.end
+  store i32 1, i32* %data, align 4
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.else6, %if.then5
+  ret void
+}
+
diff --git a/test/CodeGen/R600/predicate-dp4.ll b/test/CodeGen/R600/predicate-dp4.ll
new file mode 100644
index 000000000000..e48d6a7aa9a8
--- /dev/null
+++ b/test/CodeGen/R600/predicate-dp4.ll
@@ -0,0 +1,27 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman
+
+; CHECK-LABEL: @main
+; CHECK: PRED_SETE_INT * Pred,
+; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one
+define void @main(<4 x float> inreg) #0 {
+main_body:
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = bitcast float %1 to i32
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %IF, label %ENDIF
+
+IF:                                             ; preds = %main_body
+  %4 = call float @llvm.AMDGPU.dp4(<4 x float> %0, <4 x float> %0)
+  br label %ENDIF
+
+ENDIF:                                            ; preds = %IF, %main_body
+  %5 = phi float [%4, %IF], [0.000000e+00, %main_body]
+  %6 = insertelement <4 x float> undef, float %5, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %6, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+attributes #1 = { readnone }
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index 0d3eeef26307..902508ff9e05 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mattr=disable-irstructurizer -mcpu=redwood | FileCheck %s
 
 ; These tests make sure the compiler is optimizing branches using predicates
 ; when it is legal to do so.
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
new file mode 100644
index 000000000000..48a013c8e549
--- /dev/null
+++ b/test/CodeGen/R600/private-memory.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+
+; This test checks that uses and defs of the AR register happen in the same
+; instruction clause.
+
+; R600-CHECK-LABEL: @mova_same_clause
+; R600-CHECK: MOVA_INT
+; R600-CHECK-NOT: ALU clause
+; R600-CHECK: 0 + AR.x
+; R600-CHECK: MOVA_INT
+; R600-CHECK-NOT: ALU clause
+; R600-CHECK: 0 + AR.x
+
+; SI-CHECK-LABEL: @mova_same_clause
+; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_MOVRELD
+; SI-CHECK: S_CBRANCH
+; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_MOVRELD
+; SI-CHECK: S_CBRANCH
+define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %0 = load i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
+  store i32 4, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
+  %1 = load i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
+  store i32 5, i32* %arrayidx3, align 4
+  %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
+  %2 = load i32* %arrayidx10, align 4
+  store i32 %2, i32 addrspace(1)* %out, align 4
+  %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
+  %3 = load i32* %arrayidx12
+  %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
+  store i32 %3, i32 addrspace(1)* %arrayidx13
+  ret void
+}
+
+; This test checks that the stack offset is calculated correctly for structs.
+; All register loads/stores should be optimized away, so there shouldn't be
+; any MOVA instructions.
+;
+; XXX: This generated code has unnecessary MOVs, we should be able to optimize
+; this.
+
+; R600-CHECK-LABEL: @multiple_structs
+; R600-CHECK-NOT: MOVA_INT
+; SI-CHECK-LABEL: @multiple_structs
+; SI-CHECK-NOT: V_MOVREL
+%struct.point = type { i32, i32 }
+
+define void @multiple_structs(i32 addrspace(1)* %out) {
+entry:
+  %a = alloca %struct.point
+  %b = alloca %struct.point
+  %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
+  %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
+  %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
+  %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
+  store i32 0, i32* %a.x.ptr
+  store i32 1, i32* %a.y.ptr
+  store i32 2, i32* %b.x.ptr
+  store i32 3, i32* %b.y.ptr
+  %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
+  %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
+  %a.indirect = load i32* %a.indirect.ptr
+  %b.indirect = load i32* %b.indirect.ptr
+  %0 = add i32 %a.indirect, %b.indirect
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; Test direct access of a private array inside a loop.  The private array
+; loads and stores should be lowered to copies, so there shouldn't be any
+; MOVA instructions.
+
+; R600-CHECK-LABLE: @direct_loop
+; R600-CHECK-NOT: MOVA_INT
+; SI-CHECK-LABEL: @direct_loop
+; SI-CHECK-NOT: V_MOVREL
+
+define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %prv_array_const = alloca [2 x i32]
+  %prv_array = alloca [2 x i32]
+  %a = load i32 addrspace(1)* %in
+  %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %b = load i32 addrspace(1)* %b_src_ptr
+  %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  store i32 %a, i32* %a_dst_ptr
+  %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
+  store i32 %b, i32* %b_dst_ptr
+  br label %for.body
+
+for.body:
+  %inc = phi i32 [0, %entry], [%count, %for.body]
+  %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+  %x = load i32* %x_ptr
+  %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %y = load i32* %y_ptr
+  %xy = add i32 %x, %y
+  store i32 %xy, i32* %y_ptr
+  %count = add i32 %inc, 1
+  %done = icmp eq i32 %count, 4095
+  br i1 %done, label %for.end, label %for.body
+
+for.end:
+  %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
+  %value = load i32* %value_ptr
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll
new file mode 100644
index 000000000000..e5615b99728e
--- /dev/null
+++ b/test/CodeGen/R600/pv-packing.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
+
+;CHECK: DOT4  T{{[0-9]\.X}}
+;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg2, i32 0
+  %4 = extractelement <4 x float> %reg2, i32 1
+  %5 = extractelement <4 x float> %reg2, i32 2
+  %6 = extractelement <4 x float> %reg3, i32 0
+  %7 = extractelement <4 x float> %reg3, i32 1
+  %8 = extractelement <4 x float> %reg3, i32 2
+  %9 = load <4 x float> addrspace(8)* null
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
+  %12 = fmul float %0, %3
+  %13 = fadd float %12, %6
+  %14 = fmul float %1, %4
+  %15 = fadd float %14, %7
+  %16 = fmul float %2, %5
+  %17 = fadd float %16, %8
+  %18 = fmul float %11, %11
+  %19 = fadd float %18, %0
+  %20 = insertelement <4 x float> undef, float %13, i32 0
+  %21 = insertelement <4 x float> %20, float %15, i32 1
+  %22 = insertelement <4 x float> %21, float %17, i32 2
+  %23 = insertelement <4 x float> %22, float %19, i32 3
+  %24 = call float @llvm.AMDGPU.dp4(<4 x float> %23, <4 x float> %10)
+  %25 = insertelement <4 x float> undef, float %24, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %25, i32 0, i32 2)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 37c3d9d7d6d1..5a930b292682 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,38 +1,38 @@
 ; RUN: llc < %s -march=r600 | FileCheck %s
 
 ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK-NEXT: CNDGE T{{[0-9].[XYZW]}}, PV.x
+;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
 
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
-  %4 = call float @llvm.R600.load.input(i32 8)
-  %5 = call float @llvm.R600.load.input(i32 9)
-  %6 = call float @llvm.R600.load.input(i32 10)
-  %7 = call float @llvm.R600.load.input(i32 11)
-  %8 = call float @llvm.R600.load.input(i32 12)
-  %9 = call float @llvm.R600.load.input(i32 13)
-  %10 = call float @llvm.R600.load.input(i32 14)
-  %11 = call float @llvm.R600.load.input(i32 15)
-  %12 = call float @llvm.R600.load.input(i32 16)
-  %13 = call float @llvm.R600.load.input(i32 17)
-  %14 = call float @llvm.R600.load.input(i32 18)
-  %15 = call float @llvm.R600.load.input(i32 19)
-  %16 = call float @llvm.R600.load.input(i32 20)
-  %17 = call float @llvm.R600.load.input(i32 21)
-  %18 = call float @llvm.R600.load.input(i32 22)
-  %19 = call float @llvm.R600.load.input(i32 23)
-  %20 = call float @llvm.R600.load.input(i32 24)
-  %21 = call float @llvm.R600.load.input(i32 25)
-  %22 = call float @llvm.R600.load.input(i32 26)
-  %23 = call float @llvm.R600.load.input(i32 27)
-  %24 = call float @llvm.R600.load.input(i32 28)
-  %25 = call float @llvm.R600.load.input(i32 29)
-  %26 = call float @llvm.R600.load.input(i32 30)
-  %27 = call float @llvm.R600.load.input(i32 31)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = extractelement <4 x float> %reg2, i32 0
+  %5 = extractelement <4 x float> %reg2, i32 1
+  %6 = extractelement <4 x float> %reg2, i32 2
+  %7 = extractelement <4 x float> %reg2, i32 3
+  %8 = extractelement <4 x float> %reg3, i32 0
+  %9 = extractelement <4 x float> %reg3, i32 1
+  %10 = extractelement <4 x float> %reg3, i32 2
+  %11 = extractelement <4 x float> %reg3, i32 3
+  %12 = extractelement <4 x float> %reg4, i32 0
+  %13 = extractelement <4 x float> %reg4, i32 1
+  %14 = extractelement <4 x float> %reg4, i32 2
+  %15 = extractelement <4 x float> %reg4, i32 3
+  %16 = extractelement <4 x float> %reg5, i32 0
+  %17 = extractelement <4 x float> %reg5, i32 1
+  %18 = extractelement <4 x float> %reg5, i32 2
+  %19 = extractelement <4 x float> %reg5, i32 3
+  %20 = extractelement <4 x float> %reg6, i32 0
+  %21 = extractelement <4 x float> %reg6, i32 1
+  %22 = extractelement <4 x float> %reg6, i32 2
+  %23 = extractelement <4 x float> %reg6, i32 3
+  %24 = extractelement <4 x float> %reg7, i32 0
+  %25 = extractelement <4 x float> %reg7, i32 1
+  %26 = extractelement <4 x float> %reg7, i32 2
+  %27 = extractelement <4 x float> %reg7, i32 3
   %28 = load <4 x float> addrspace(8)* null
   %29 = extractelement <4 x float> %28, i32 0
   %30 = fmul float %0, %29
@@ -219,9 +219,6 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
 
 ; Function Attrs: readonly
diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll
index c8040a1b4cd5..b760c882f4e3 100644
--- a/test/CodeGen/R600/r600-encoding.ll
+++ b/test/CodeGen/R600/r600-encoding.ll
@@ -5,20 +5,21 @@
 ; the VLIW4/5 GPUs.
 
 ; EG-CHECK: @test
-; EG-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}]
+; EG-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}]
 
 ; R600-CHECK: @test
-; R600-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
+; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
 
-define void @test() {
+define void @test(<4 x float> inreg %reg0) #0 {
 entry:
-  %0 = call float @llvm.R600.load.input(i32 0)
-  %1 = call float @llvm.R600.load.input(i32 1)
-  %2 = fmul float %0, %1
-  call void @llvm.AMDGPU.store.output(float %2, i32 0)
+  %r0 = extractelement <4 x float> %reg0, i32 0
+  %r1 = extractelement <4 x float> %reg0, i32 1
+  %r2 = fmul float %r0, %r1
+  %vec = insertelement <4 x float> undef, float %r2, i32 0
+  call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
   ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll
new file mode 100644
index 000000000000..73bc0635ab21
--- /dev/null
+++ b/test/CodeGen/R600/r600-export-fix.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=r600 -mcpu=cedar | FileCheck %s
+
+;CHECK:	EXPORT T{{[0-9]}}.XYZW
+;CHECK:	EXPORT T{{[0-9]}}.0000
+;CHECK: EXPORT T{{[0-9]}}.0000
+;CHECK: EXPORT T{{[0-9]}}.0XZW
+;CHECK: EXPORT T{{[0-9]}}.XYZW
+;CHECK: EXPORT T{{[0-9]}}.YX00
+;CHECK: EXPORT T{{[0-9]}}.0000
+;CHECK: EXPORT T{{[0-9]}}.0000
+
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fmul float %5, %0
+  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %8 = extractelement <4 x float> %7, i32 1
+  %9 = fmul float %8, %0
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %11 = extractelement <4 x float> %10, i32 2
+  %12 = fmul float %11, %0
+  %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %14 = extractelement <4 x float> %13, i32 3
+  %15 = fmul float %14, %0
+  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %17 = extractelement <4 x float> %16, i32 0
+  %18 = fmul float %17, %1
+  %19 = fadd float %18, %6
+  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %21 = extractelement <4 x float> %20, i32 1
+  %22 = fmul float %21, %1
+  %23 = fadd float %22, %9
+  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %25 = extractelement <4 x float> %24, i32 2
+  %26 = fmul float %25, %1
+  %27 = fadd float %26, %12
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %29 = extractelement <4 x float> %28, i32 3
+  %30 = fmul float %29, %1
+  %31 = fadd float %30, %15
+  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %33 = extractelement <4 x float> %32, i32 0
+  %34 = fmul float %33, %2
+  %35 = fadd float %34, %19
+  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %37 = extractelement <4 x float> %36, i32 1
+  %38 = fmul float %37, %2
+  %39 = fadd float %38, %23
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %41 = extractelement <4 x float> %40, i32 2
+  %42 = fmul float %41, %2
+  %43 = fadd float %42, %27
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %45 = extractelement <4 x float> %44, i32 3
+  %46 = fmul float %45, %2
+  %47 = fadd float %46, %31
+  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %49 = extractelement <4 x float> %48, i32 0
+  %50 = fmul float %49, %3
+  %51 = fadd float %50, %35
+  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %53 = extractelement <4 x float> %52, i32 1
+  %54 = fmul float %53, %3
+  %55 = fadd float %54, %39
+  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %57 = extractelement <4 x float> %56, i32 2
+  %58 = fmul float %57, %3
+  %59 = fadd float %58, %43
+  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %61 = extractelement <4 x float> %60, i32 3
+  %62 = fmul float %61, %3
+  %63 = fadd float %62, %47
+  %64 = load <4 x float> addrspace(8)* null
+  %65 = extractelement <4 x float> %64, i32 0
+  %66 = load <4 x float> addrspace(8)* null
+  %67 = extractelement <4 x float> %66, i32 1
+  %68 = load <4 x float> addrspace(8)* null
+  %69 = extractelement <4 x float> %68, i32 2
+  %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %71 = extractelement <4 x float> %70, i32 0
+  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %73 = extractelement <4 x float> %72, i32 1
+  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %75 = extractelement <4 x float> %74, i32 2
+  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %77 = extractelement <4 x float> %76, i32 0
+  %78 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %79 = extractelement <4 x float> %78, i32 1
+  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %81 = extractelement <4 x float> %80, i32 2
+  %82 = insertelement <4 x float> undef, float %51, i32 0
+  %83 = insertelement <4 x float> %82, float %55, i32 1
+  %84 = insertelement <4 x float> %83, float %59, i32 2
+  %85 = insertelement <4 x float> %84, float %63, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %85, i32 60, i32 1)
+  %86 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1
+  %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2
+  %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %89, i32 0, i32 2)
+  %90 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 1
+  %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 2
+  %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %93, i32 1, i32 2)
+  %94 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %95 = insertelement <4 x float> %94, float %65, i32 1
+  %96 = insertelement <4 x float> %95, float %67, i32 2
+  %97 = insertelement <4 x float> %96, float %69, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %97, i32 2, i32 2)
+  %98 = insertelement <4 x float> undef, float %77, i32 0
+  %99 = insertelement <4 x float> %98, float %79, i32 1
+  %100 = insertelement <4 x float> %99, float %81, i32 2
+  %101 = insertelement <4 x float> %100, float %71, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %101, i32 3, i32 2)
+  %102 = insertelement <4 x float> undef, float %73, i32 0
+  %103 = insertelement <4 x float> %102, float %75, i32 1
+  %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 2
+  %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %105, i32 4, i32 2)
+  %106 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1
+  %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2
+  %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %109, i32 5, i32 2)
+  %110 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1
+  %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2
+  %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %113, i32 6, i32 2)
+  ret void
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
new file mode 100644
index 000000000000..6dee3ef89cf5
--- /dev/null
+++ b/test/CodeGen/R600/r600cfg.ll
@@ -0,0 +1,120 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood
+;REQUIRES: asserts
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = bitcast float %0 to i32
+  %5 = icmp eq i32 %4, 0
+  %6 = sext i1 %5 to i32
+  %7 = bitcast i32 %6 to float
+  %8 = bitcast float %7 to i32
+  %9 = icmp ne i32 %8, 0
+  %. = select i1 %9, float 0x36A0000000000000, float %0
+  br label %LOOP
+
+LOOP:                                             ; preds = %LOOP47, %main_body
+  %temp12.0 = phi float [ 0x36A0000000000000, %main_body ], [ %temp12.1, %LOOP47 ]
+  %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %38, %LOOP47 ]
+  %temp4.1 = phi float [ %., %main_body ], [ %52, %LOOP47 ]
+  %10 = bitcast float %temp4.1 to i32
+  %11 = icmp eq i32 %10, 1
+  %12 = sext i1 %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %13 to i32
+  %15 = icmp ne i32 %14, 0
+  br i1 %15, label %IF41, label %ENDIF40
+
+IF41:                                             ; preds = %LOOP
+  %16 = insertelement <4 x float> undef, float %0, i32 0
+  %17 = insertelement <4 x float> %16, float %temp8.0, i32 1
+  %18 = insertelement <4 x float> %17, float %temp12.0, i32 2
+  %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
+  %20 = insertelement <4 x float> undef, float %0, i32 0
+  %21 = insertelement <4 x float> %20, float %temp8.0, i32 1
+  %22 = insertelement <4 x float> %21, float %temp12.0, i32 2
+  %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
+  %24 = insertelement <4 x float> undef, float %0, i32 0
+  %25 = insertelement <4 x float> %24, float %temp8.0, i32 1
+  %26 = insertelement <4 x float> %25, float %temp12.0, i32 2
+  %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
+  %28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+  %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
+  %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
+  %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
+  %32 = insertelement <4 x float> undef, float %0, i32 0
+  %33 = insertelement <4 x float> %32, float %temp8.0, i32 1
+  %34 = insertelement <4 x float> %33, float %temp12.0, i32 2
+  %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
+  ret void
+
+ENDIF40:                                          ; preds = %LOOP
+  %36 = bitcast float %temp8.0 to i32
+  %37 = add i32 %36, 1
+  %38 = bitcast i32 %37 to float
+  %39 = bitcast float %temp4.1 to i32
+  %40 = urem i32 %39, 2
+  %41 = bitcast i32 %40 to float
+  %42 = bitcast float %41 to i32
+  %43 = icmp eq i32 %42, 0
+  %44 = sext i1 %43 to i32
+  %45 = bitcast i32 %44 to float
+  %46 = bitcast float %45 to i32
+  %47 = icmp ne i32 %46, 0
+  %48 = bitcast float %temp4.1 to i32
+  br i1 %47, label %IF44, label %ELSE45
+
+IF44:                                             ; preds = %ENDIF40
+  %49 = udiv i32 %48, 2
+  br label %ENDIF43
+
+ELSE45:                                           ; preds = %ENDIF40
+  %50 = mul i32 3, %48
+  %51 = add i32 %50, 1
+  br label %ENDIF43
+
+ENDIF43:                                          ; preds = %ELSE45, %IF44
+  %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
+  %52 = bitcast i32 %.sink to float
+  %53 = load <4 x float> addrspace(8)* null
+  %54 = extractelement <4 x float> %53, i32 0
+  %55 = bitcast float %54 to i32
+  br label %LOOP47
+
+LOOP47:                                           ; preds = %ENDIF48, %ENDIF43
+  %temp12.1 = phi float [ %temp12.0, %ENDIF43 ], [ %67, %ENDIF48 ]
+  %temp28.0 = phi float [ 0.000000e+00, %ENDIF43 ], [ %70, %ENDIF48 ]
+  %56 = bitcast float %temp28.0 to i32
+  %57 = icmp uge i32 %56, %55
+  %58 = sext i1 %57 to i32
+  %59 = bitcast i32 %58 to float
+  %60 = bitcast float %59 to i32
+  %61 = icmp ne i32 %60, 0
+  br i1 %61, label %LOOP, label %ENDIF48
+
+ENDIF48:                                          ; preds = %LOOP47
+  %62 = bitcast float %temp12.1 to i32
+  %63 = mul i32 %62, 2
+  %64 = bitcast i32 %63 to float
+  %65 = bitcast float %64 to i32
+  %66 = urem i32 %65, 2147483647
+  %67 = bitcast i32 %66 to float
+  %68 = bitcast float %temp28.0 to i32
+  %69 = add i32 %68, 1
+  %70 = bitcast i32 %69 to float
+  br label %LOOP47
+}
+
+declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll
index 27839296703f..b4ac47afced7 100644
--- a/test/CodeGen/R600/reciprocal.ll
+++ b/test/CodeGen/R600/reciprocal.ll
@@ -2,15 +2,14 @@
 
 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0  {
+   %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = fdiv float 1.0, %r0
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   %vec = insertelement <4 x float> undef, float %r1, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-declare void @llvm.AMDGPU.store.output(float, i32)
-
-declare float @llvm.AMDGPU.rcp(float ) readnone
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll
new file mode 100644
index 000000000000..edf7aeebea0f
--- /dev/null
+++ b/test/CodeGen/R600/rotr.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+
+; R600-CHECK-LABEL: @rotr:
+; R600-CHECK: BIT_ALIGN_INT
+
+; SI-CHECK-LABEL: @rotr:
+; SI-CHECK: V_ALIGNBIT_B32
+define void @rotr(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+entry:
+  %0 = sub i32 32, %y
+  %1 = shl i32 %x, %0
+  %2 = lshr i32 %x, %y
+  %3 = or i32 %1, %2
+  store i32 %3, i32 addrspace(1)* %in
+  ret void
+}
+
+; R600-CHECK-LABEL: @rotl:
+; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
+; R600-CHECK-NEXT: 32
+; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
+
+
+; SI-CHECK-LABEL: @rotl:
+; SI-CHECK: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
+; SI-CHECK: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
+; SI-CHECK: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+entry:
+  %0 = shl i32 %x, %y
+  %1 = sub i32 32, %y
+  %2 = lshr i32 %x, %1
+  %3 = or i32 %0, %2
+  store i32 %3, i32 addrspace(1)* %in
+  ret void
+}
diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/R600/rv7x0_count3.ll
new file mode 100644
index 000000000000..c3fd923e4593
--- /dev/null
+++ b/test/CodeGen/R600/rv7x0_count3.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=r600 -show-mc-encoding  -mcpu=rv710 | FileCheck %s
+
+; CHECK: TEX 9 @6 ;  encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
+
+define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+   %1 = extractelement <4 x float> %reg1, i32 0
+   %2 = extractelement <4 x float> %reg1, i32 1
+   %3 = extractelement <4 x float> %reg1, i32 2
+   %4 = extractelement <4 x float> %reg1, i32 3
+   %5 = insertelement <4 x float> undef, float %1, i32 0
+   %6 = insertelement <4 x float> %5, float %2, i32 1
+   %7 = insertelement <4 x float> %6, float %3, i32 2
+   %8 = insertelement <4 x float> %7, float %4, i32 3
+   %9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
+   %10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 1, i32 0, i32 1)
+   %11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 2, i32 0, i32 1)
+   %12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 3, i32 0, i32 1)
+   %13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 4, i32 0, i32 1)
+   %14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 5, i32 0, i32 1)
+   %15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 6, i32 0, i32 1)
+   %16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 7, i32 0, i32 1)
+   %17 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 8, i32 0, i32 1)
+   %18 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 9, i32 0, i32 1)
+   %19 = fadd <4 x float> %9, %10
+   %20 = fadd <4 x float> %19, %11
+   %21 = fadd <4 x float> %20, %12
+   %22 = fadd <4 x float> %21, %13
+   %23 = fadd <4 x float> %22, %14
+   %24 = fadd <4 x float> %23, %15
+   %25 = fadd <4 x float> %24, %16
+   %26 = fadd <4 x float> %25, %17
+   %27 = fadd <4 x float> %26, %18
+   call void @llvm.R600.store.swizzle(<4 x float> %27, i32 0, i32 2)
+   ret void
+}
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
index ba9620c40a49..11e8f5176f44 100644
--- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
+++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
@@ -1,12 +1,12 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
-define void @main() {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
 main_body:
-  %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
-  %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
-  %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
-  %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
   %4 = fcmp ult float %1, 0.000000e+00
   %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
   %6 = fsub float -0.000000e+00, %5
@@ -74,10 +74,9 @@ ELSE17:                                           ; preds = %ELSE
   br label %ENDIF
 }
 
-declare float @llvm.R600.interp.input(i32, i32) #0
-
 declare float @llvm.AMDIL.clamp.(float, float, float) #0
 
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #0 = { readnone }
+attributes #1 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
index 5e875c49ab51..b917ec6413e9 100644
--- a/test/CodeGen/R600/schedule-fs-loop-nested.ll
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
index d142cacd4335..d6c194b19b27 100644
--- a/test/CodeGen/R600/schedule-fs-loop.ll
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
index 6afd6772926b..38aad1850f81 100644
--- a/test/CodeGen/R600/schedule-if-2.ll
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
index 347d92fd6a0e..f960c9323940 100644
--- a/test/CodeGen/R600/schedule-if.ll
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
 ;REQUIRES: asserts
 
 define void @main() {
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
index 44b7c2f68002..33b20d36737b 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -1,12 +1,12 @@
 ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
 ;REQUIRES: asserts
 
-define void @main() {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
-  %0 = call float @llvm.R600.load.input(i32 4)
-  %1 = call float @llvm.R600.load.input(i32 5)
-  %2 = call float @llvm.R600.load.input(i32 6)
-  %3 = call float @llvm.R600.load.input(i32 7)
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
   %4 = fcmp ult float %0, 0.000000e+00
   %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
   %6 = fsub float -0.000000e+00, %5
@@ -127,8 +127,6 @@ ENDIF19:                                          ; preds = %ENDIF16
   br label %LOOP
 }
 
-declare float @llvm.R600.load.input(i32) #0
-
 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
 
-attributes #0 = { readnone }
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/select.ll b/test/CodeGen/R600/select.ll
new file mode 100644
index 000000000000..f9401424ac12
--- /dev/null
+++ b/test/CodeGen/R600/select.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Normally icmp + select is optimized to select_cc, when this happens the
+; DAGLegalizer never sees the select and doesn't have a chance to leaglize it.
+;
+; In order to avoid the select_cc optimization, this test case calculates the
+; condition for the select in a separate basic block.
+
+; CHECK-LABEL: @select
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
+; CHECK-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
+define void @select (i32 addrspace(1)* %i32out, float addrspace(1)* %f32out,
+                     <2 x i32> addrspace(1)* %v2i32out, <2 x float> addrspace(1)* %v2f32out,
+                     <4 x i32> addrspace(1)* %v4i32out, <4 x float> addrspace(1)* %v4f32out,
+                     i32 %cond) {
+entry:
+  br label %for
+body:
+  %inc = add i32 %i, 1
+  %br_cmp.i = icmp eq i1 %br_cmp, 0
+  br label %for
+for:
+  %i = phi i32 [ %inc, %body], [ 0, %entry ]
+  %br_cmp = phi i1 [ %br_cmp.i, %body ], [ 0, %entry ]
+  %0 = icmp eq i32 %cond, %i
+  %1 = select i1 %br_cmp, i32 2, i32 3
+  %2 = select i1 %br_cmp, float 2.0 , float 5.0
+  %3 = select i1 %br_cmp, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 5>
+  %4 = select i1 %br_cmp, <2 x float> <float 2.0, float 3.0>, <2 x float> <float 4.0, float 5.0>
+  %5 = select i1 %br_cmp, <4 x i32> <i32 2 , i32 3, i32 4, i32 5>, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+  %6 = select i1 %br_cmp, <4 x float> <float 2.0, float 3.0, float 4.0, float 5.0>, <4 x float> <float 6.0, float 7.0, float 8.0, float 9.0>
+  br i1 %0, label %body, label %done
+
+done:
+  store i32 %1, i32 addrspace(1)* %i32out
+  store float %2, float addrspace(1)* %f32out
+  store <2 x i32> %3, <2 x i32> addrspace(1)* %v2i32out
+  store <2 x float> %4, <2 x float> addrspace(1)* %v2f32out
+  store <4 x i32> %5, <4 x i32> addrspace(1)* %v4i32out
+  store <4 x float> %6, <4 x float> addrspace(1)* %v4f32out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc-cnd.ll b/test/CodeGen/R600/selectcc-cnd.ll
index d7287b487896..0bfca6937488 100644
--- a/test/CodeGen/R600/selectcc-cnd.ll
+++ b/test/CodeGen/R600/selectcc-cnd.ll
@@ -1,8 +1,8 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK-NOT: SETE
-;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
-;CHECK-NEXT: {{[-0-9]+\(2.0}}
+;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
+;CHECK: 1073741824
 define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
   %1 = load float addrspace(1)* %in
   %2 = fcmp oeq float %1, 0.0
diff --git a/test/CodeGen/R600/selectcc-cnde-int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll
index 768dc7dbf418..d568888f7cb2 100644
--- a/test/CodeGen/R600/selectcc-cnde-int.ll
+++ b/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK-NOT: SETE_INT
-;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
+;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
 ;CHECK-NEXT: 2
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %1 = load i32 addrspace(1)* %in
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index 02d935390423..834c03069522 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -6,7 +6,7 @@
 
 define void @test_a(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 0.000000e+00
+  %0 = fcmp olt float %in, 0.000000e+00
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -31,9 +31,10 @@ ENDIF:
 ; CHECK: @test_b
 ; CHECK: SET{{[GTEQN]+}}_DX10
 ; CHECK-NEXT: PRED_
+; CHECK-NEXT: ALU clause starting
 define void @test_b(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 0.0
+  %0 = fcmp olt float %in, 0.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
index eb6e9d2f2ba5..5c7d4998d07c 100644
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -5,7 +5,8 @@
 ; SET*DX10 instructions.
 
 ; CHECK: @fcmp_une_select_fptosi
-; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -18,7 +19,8 @@ entry:
 }
 
 ; CHECK: @fcmp_une_select_i32
-; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -28,12 +30,13 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ueq_select_fptosi
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: @fcmp_oeq_select_fptosi
+; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ueq float %in, 5.0
+  %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -41,23 +44,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ueq_select_i32
-; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: @fcmp_oeq_select_i32
+; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ueq float %in, 5.0
+  %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ugt_select_fptosi
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: @fcmp_ogt_select_fptosi
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ugt float %in, 5.0
+  %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -65,23 +70,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ugt_select_i32
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: @fcmp_ogt_select_i32
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ugt float %in, 5.0
+  %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_uge_select_fptosi
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: @fcmp_oge_select_fptosi
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp uge float %in, 5.0
+  %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -89,23 +96,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_uge_select_i32
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK: @fcmp_oge_select_i32
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp uge float %in, 5.0
+  %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ule_select_fptosi
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK: @fcmp_ole_select_fptosi
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ule float %in, 5.0
+  %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -113,23 +122,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ule_select_i32
-; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK: @fcmp_ole_select_i32
+; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ule float %in, 5.0
+  %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ult_select_fptosi
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK: @fcmp_olt_select_fptosi
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 5.0
+  %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -137,12 +148,13 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ult_select_i32
-; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK: @fcmp_olt_select_i32
+; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 5.0
+  %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index 0752f2e63dbf..8d34c4ad4fe5 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -1,7 +1,24 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: @setcc_v2i32
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
+
+define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) {
+  %result = icmp eq <2 x i32> %a, %b
+  %sext = sext <2 x i1> %result to <2 x i32>
+  store <2 x i32> %sext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @setcc_v4i32
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
@@ -10,3 +27,307 @@ define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;;;==========================================================================;;;
+;; Float comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @f32_oeq
+; R600: SETE_DX10
+; SI: V_CMP_EQ_F32
+define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp oeq float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ogt
+; R600: SETGT_DX10
+; SI: V_CMP_GT_F32
+define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ogt float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_oge
+; R600: SETGE_DX10
+; SI: V_CMP_GE_F32
+define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp oge float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_olt
+; R600: SETGT_DX10
+; SI: V_CMP_LT_F32
+define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp olt float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ole
+; R600: SETGE_DX10
+; SI: V_CMP_LE_F32
+define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ole float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_one
+; R600-DAG: SETE_DX10
+; R600-DAG: SETE_DX10
+; R600-DAG: AND_INT
+; R600-DAG: SETNE_DX10
+; R600-DAG: AND_INT
+; R600-DAG: SETNE_INT
+; SI: V_CMP_O_F32
+; SI: V_CMP_NEQ_F32
+; SI: S_AND_B64
+define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp one float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ord
+; R600-DAG: SETE_DX10
+; R600-DAG: SETE_DX10
+; R600-DAG: AND_INT
+; R600-DAG: SETNE_INT
+; SI: V_CMP_O_F32
+define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ord float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ueq
+; R600-DAG: SETNE_DX10
+; R600-DAG: SETNE_DX10
+; R600-DAG: OR_INT
+; R600-DAG: SETE_DX10
+; R600-DAG: OR_INT
+; R600-DAG: SETNE_INT
+; SI: V_CMP_U_F32
+; SI: V_CMP_EQ_F32
+; SI: S_OR_B64
+define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ueq float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ugt
+; R600: SETGE
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_GT_F32
+; SI: S_OR_B64
+define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ugt float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_uge
+; R600: SETGT
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_GE_F32
+; SI: S_OR_B64
+define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp uge float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ult
+; R600: SETGE
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_LT_F32
+; SI: S_OR_B64
+define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ult float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_ule
+; R600: SETGT
+; R600: SETE_DX10
+; SI: V_CMP_U_F32
+; SI: V_CMP_LE_F32
+; SI: S_OR_B64
+define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp ule float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_une
+; R600: SETNE_DX10
+; SI: V_CMP_NEQ_F32
+define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp une float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f32_uno
+; R600: SETNE_DX10
+; R600: SETNE_DX10
+; R600: OR_INT
+; R600: SETNE_INT
+; SI: V_CMP_U_F32
+define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = fcmp uno float %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+;;;==========================================================================;;;
+;; 32-bit integer comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @i32_eq
+; R600: SETE_INT
+; SI: V_CMP_EQ_I32
+define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp eq i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ne
+; R600: SETNE_INT
+; SI: V_CMP_NE_I32
+define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ne i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ugt
+; R600: SETGT_UINT
+; SI: V_CMP_GT_U32
+define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ugt i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_uge
+; R600: SETGE_UINT
+; SI: V_CMP_GE_U32
+define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp uge i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ult
+; R600: SETGT_UINT
+; SI: V_CMP_LT_U32
+define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ult i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_ule
+; R600: SETGE_UINT
+; SI: V_CMP_LE_U32
+define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp ule i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_sgt
+; R600: SETGT_INT
+; SI: V_CMP_GT_I32
+define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp sgt i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_sge
+; R600: SETGE_INT
+; SI: V_CMP_GE_I32
+define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp sge i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_slt
+; R600: SETGT_INT
+; SI: V_CMP_LT_I32
+define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp slt i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i32_sle
+; R600: SETGE_INT
+; SI: V_CMP_LE_I32
+define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+  %0 = icmp sle i32 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll
new file mode 100644
index 000000000000..9202fc01f555
--- /dev/null
+++ b/test/CodeGen/R600/setcc64.ll
@@ -0,0 +1,263 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; XXX: Merge this into setcc, once R600 supports 64-bit operations
+
+;;;==========================================================================;;;
+;; Double comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @f64_oeq
+; SI: V_CMP_EQ_F64
+define void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp oeq double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ogt
+; SI: V_CMP_GT_F64
+define void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ogt double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_oge
+; SI: V_CMP_GE_F64
+define void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp oge double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_olt
+; SI: V_CMP_LT_F64
+define void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp olt double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ole
+; SI: V_CMP_LE_F64
+define void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ole double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_one
+; SI: V_CMP_O_F64
+; SI: V_CMP_NEQ_F64
+; SI: S_AND_B64
+define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp one double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ord
+; SI: V_CMP_O_F64
+define void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ord double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ueq
+; SI: V_CMP_U_F64
+; SI: V_CMP_EQ_F64
+; SI: S_OR_B64
+define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ueq double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ugt
+; SI: V_CMP_U_F64
+; SI: V_CMP_GT_F64
+; SI: S_OR_B64
+define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ugt double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_uge
+; SI: V_CMP_U_F64
+; SI: V_CMP_GE_F64
+; SI: S_OR_B64
+define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp uge double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ult
+; SI: V_CMP_U_F64
+; SI: V_CMP_LT_F64
+; SI: S_OR_B64
+define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ult double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_ule
+; SI: V_CMP_U_F64
+; SI: V_CMP_LE_F64
+; SI: S_OR_B64
+define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp ule double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_une
+; SI: V_CMP_NEQ_F64
+define void @f64_une(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp une double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @f64_uno
+; SI: V_CMP_U_F64
+define void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) {
+entry:
+  %0 = fcmp uno double %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+;;;==========================================================================;;;
+;; 64-bit integer comparisons
+;;;==========================================================================;;;
+
+; FUNC-LABEL: @i64_eq
+; SI: V_CMP_EQ_I64
+define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp eq i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ne
+; SI: V_CMP_NE_I64
+define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ne i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ugt
+; SI: V_CMP_GT_U64
+define void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ugt i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_uge
+; SI: V_CMP_GE_U64
+define void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp uge i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ult
+; SI: V_CMP_LT_U64
+define void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ult i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_ule
+; SI: V_CMP_LE_U64
+define void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp ule i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_sgt
+; SI: V_CMP_GT_I64
+define void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp sgt i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_sge
+; SI: V_CMP_GE_I64
+define void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp sge i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_slt
+; SI: V_CMP_LT_I64
+define void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp slt i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @i64_sle
+; SI: V_CMP_LE_I64
+define void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = icmp sle i64 %a, %b
+  %1 = sext i1 %0 to i32
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index 4622203ffdbc..8633a4b804af 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
 
 define void @main(float %p) {
 main_body:
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index 0bf5801b1c33..c77a37e19041 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
 
 define void @main(float %p) {
 main_body:
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
new file mode 100644
index 000000000000..d74161bf6dc1
--- /dev/null
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; Copy VGPR -> SGPR used twice as an instruction operand, which is then
+; used in an REG_SEQUENCE that also needs to be handled.
+
+; SI-LABEL: @test_dup_operands:
+; SI: V_ADD_I32_e32
+define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
+  %a = load <2 x i32> addrspace(1)* %in
+  %lo = extractelement <2 x i32> %a, i32 0
+  %hi = extractelement <2 x i32> %a, i32 1
+  %add = add i32 %lo, %lo
+  %vec0 = insertelement <2 x i32> undef, i32 %add, i32 0
+  %vec1 = insertelement <2 x i32> %vec0, i32 %hi, i32 1
+  store <2 x i32> %vec1, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
new file mode 100644
index 000000000000..5472c1bb1ca9
--- /dev/null
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -0,0 +1,327 @@
+; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
+
+; This test checks that no VGPR to SGPR copies are created by the register
+; allocator.
+; CHECK-LABEL: @phi1
+; CHECK: S_BUFFER_LOAD_DWORD [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0
+; CHECK: V_MOV_B32_e32 v{{[0-9]}}, [[DST]]
+
+define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
+  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
+  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
+  %25 = fptosi float %23 to i32
+  %26 = icmp ne i32 %25, 0
+  br i1 %26, label %ENDIF, label %ELSE
+
+ELSE:                                             ; preds = %main_body
+  %27 = fsub float -0.000000e+00, %22
+  br label %ENDIF
+
+ENDIF:                                            ; preds = %main_body, %ELSE
+  %temp.0 = phi float [ %27, %ELSE ], [ %22, %main_body ]
+  %28 = fadd float %temp.0, %24
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %28, float %28, float 0.000000e+00, float 1.000000e+00)
+  ret void
+}
+
+; Make sure this program doesn't crash
+; CHECK-LABEL: @phi2
+define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
+  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
+  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
+  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
+  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
+  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
+  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
+  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
+  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
+  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
+  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
+  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
+  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
+  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
+  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
+  %37 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
+  %38 = load <32 x i8> addrspace(2)* %37, !tbaa !1
+  %39 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
+  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !1
+  %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
+  %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
+  %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
+  %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
+  %45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
+  %46 = bitcast float %41 to i32
+  %47 = bitcast float %42 to i32
+  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
+  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
+  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %38, <16 x i8> %40, i32 2)
+  %51 = extractelement <4 x float> %50, i32 2
+  %52 = call float @fabs(float %51)
+  %53 = fmul float %43, %43
+  %54 = fmul float %44, %44
+  %55 = fadd float %54, %53
+  %56 = fmul float %45, %45
+  %57 = fadd float %55, %56
+  %58 = call float @llvm.AMDGPU.rsq(float %57)
+  %59 = fmul float %43, %58
+  %60 = fmul float %44, %58
+  %61 = fmul float %45, %58
+  %62 = fmul float %59, %23
+  %63 = fmul float %60, %24
+  %64 = fadd float %63, %62
+  %65 = fmul float %61, %25
+  %66 = fadd float %64, %65
+  %67 = fsub float -0.000000e+00, %26
+  %68 = fmul float %66, %52
+  %69 = fadd float %68, %67
+  %70 = fmul float %27, %69
+  %71 = fmul float %28, %69
+  %72 = call float @fabs(float %70)
+  %73 = fcmp olt float 0x3EE4F8B580000000, %72
+  %74 = sext i1 %73 to i32
+  %75 = bitcast i32 %74 to float
+  %76 = bitcast float %75 to i32
+  %77 = icmp ne i32 %76, 0
+  br i1 %77, label %IF, label %ENDIF
+
+IF:                                               ; preds = %main_body
+  %78 = fsub float -0.000000e+00, %70
+  %79 = call float @llvm.AMDIL.exp.(float %78)
+  %80 = fsub float -0.000000e+00, %79
+  %81 = fadd float 1.000000e+00, %80
+  %82 = fdiv float 1.000000e+00, %70
+  %83 = fmul float %81, %82
+  %84 = fmul float %32, %83
+  br label %ENDIF
+
+ENDIF:                                            ; preds = %main_body, %IF
+  %temp4.0 = phi float [ %84, %IF ], [ %32, %main_body ]
+  %85 = call float @fabs(float %71)
+  %86 = fcmp olt float 0x3EE4F8B580000000, %85
+  %87 = sext i1 %86 to i32
+  %88 = bitcast i32 %87 to float
+  %89 = bitcast float %88 to i32
+  %90 = icmp ne i32 %89, 0
+  br i1 %90, label %IF25, label %ENDIF24
+
+IF25:                                             ; preds = %ENDIF
+  %91 = fsub float -0.000000e+00, %71
+  %92 = call float @llvm.AMDIL.exp.(float %91)
+  %93 = fsub float -0.000000e+00, %92
+  %94 = fadd float 1.000000e+00, %93
+  %95 = fdiv float 1.000000e+00, %71
+  %96 = fmul float %94, %95
+  %97 = fmul float %36, %96
+  br label %ENDIF24
+
+ENDIF24:                                          ; preds = %ENDIF, %IF25
+  %temp8.0 = phi float [ %97, %IF25 ], [ %36, %ENDIF ]
+  %98 = fmul float %29, %temp4.0
+  %99 = fmul float %30, %temp4.0
+  %100 = fmul float %31, %temp4.0
+  %101 = fmul float %33, %temp8.0
+  %102 = fadd float %101, %98
+  %103 = fmul float %34, %temp8.0
+  %104 = fadd float %103, %99
+  %105 = fmul float %35, %temp8.0
+  %106 = fadd float %105, %100
+  %107 = call float @llvm.pow.f32(float %52, float %22)
+  %108 = fsub float -0.000000e+00, %102
+  %109 = fmul float %108, %107
+  %110 = fsub float -0.000000e+00, %104
+  %111 = fmul float %110, %107
+  %112 = fsub float -0.000000e+00, %106
+  %113 = fmul float %112, %107
+  %114 = call i32 @llvm.SI.packf16(float %109, float %111)
+  %115 = bitcast i32 %114 to float
+  %116 = call i32 @llvm.SI.packf16(float %113, float 1.000000e+00)
+  %117 = bitcast i32 %116 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %115, float %117, float %115, float %117)
+  ret void
+}
+
+; We just want ot make sure the program doesn't crash
+; CHECK-LABEL: @loop
+
+define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
+  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
+  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
+  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12)
+  %26 = fptosi float %25 to i32
+  %27 = bitcast i32 %26 to float
+  %28 = bitcast float %27 to i32
+  br label %LOOP
+
+LOOP:                                             ; preds = %ENDIF, %main_body
+  %temp4.0 = phi float [ %22, %main_body ], [ %temp5.0, %ENDIF ]
+  %temp5.0 = phi float [ %23, %main_body ], [ %temp6.0, %ENDIF ]
+  %temp6.0 = phi float [ %24, %main_body ], [ %temp4.0, %ENDIF ]
+  %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %37, %ENDIF ]
+  %29 = bitcast float %temp8.0 to i32
+  %30 = icmp sge i32 %29, %28
+  %31 = sext i1 %30 to i32
+  %32 = bitcast i32 %31 to float
+  %33 = bitcast float %32 to i32
+  %34 = icmp ne i32 %33, 0
+  br i1 %34, label %IF, label %ENDIF
+
+IF:                                               ; preds = %LOOP
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00)
+  ret void
+
+ENDIF:                                            ; preds = %LOOP
+  %35 = bitcast float %temp8.0 to i32
+  %36 = add i32 %35, 1
+  %37 = bitcast i32 %36 to float
+  br label %LOOP
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: readonly
+declare float @fabs(float) #2
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { readonly }
+attributes #3 = { readnone }
+attributes #4 = { nounwind readonly }
+
+!0 = metadata !{metadata !"const", null}
+!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #3
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.exp.(float) #3
+
+; Function Attrs: nounwind readonly
+declare float @llvm.pow.f32(float, float) #4
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+; This checks for a bug in the FixSGPRCopies pass where VReg96
+; registers were being identified as an SGPR regclass which was causing
+; an assertion failure.
+
+; CHECK-LABEL: @sample_v3
+; CHECK: IMAGE_SAMPLE
+; CHECK: IMAGE_SAMPLE
+; CHECK: EXP
+; CHECK: S_ENDPGM
+define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+
+entry:
+  %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !2
+  %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16)
+  %24 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !2
+  %26 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !2
+  %28 = fcmp oeq float %23, 0.0
+  br i1 %28, label %if, label %else
+
+if:
+  %val.if = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 0, i32 0>, <32 x i8> %25, <16 x i8> %27, i32 2)
+  %val.if.0 = extractelement <4 x float> %val.if, i32 0
+  %val.if.1 = extractelement <4 x float> %val.if, i32 1
+  %val.if.2 = extractelement <4 x float> %val.if, i32 2
+  br label %endif
+
+else:
+  %val.else = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 1, i32 0>, <32 x i8> %25, <16 x i8> %27, i32 2)
+  %val.else.0 = extractelement <4 x float> %val.else, i32 0
+  %val.else.1 = extractelement <4 x float> %val.else, i32 1
+  %val.else.2 = extractelement <4 x float> %val.else, i32 2
+  br label %endif
+
+endif:
+  %val.0 = phi float [%val.if.0, %if], [%val.else.0, %else]
+  %val.1 = phi float [%val.if.1, %if], [%val.else.1, %else]
+  %val.2 = phi float [%val.if.2, %if], [%val.else.2, %else]
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.0)
+  ret void
+}
+
+!2 = metadata !{metadata !"const", null, i32 1}
+
+; CHECK-LABEL: @copy1
+; CHECK: BUFFER_LOAD_DWORD
+; CHECK: V_ADD
+; CHECK: S_ENDPGM
+define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
+entry:
+  %0 = load float addrspace(1)* %in0
+  %1 = fcmp oeq float %0, 0.0
+  br i1 %1, label %if0, label %endif
+
+if0:
+  %2 = bitcast float %0 to i32
+  %3 = fcmp olt float %0, 0.0
+  br i1 %3, label %if1, label %endif
+
+if1:
+  %4 = add i32 %2, 1
+  br label %endif
+
+endif:
+  %5 = phi i32 [ 0, %entry ], [ %2, %if0 ], [ %4, %if1 ]
+  %6 = bitcast i32 %5 to float
+  store float %6, float addrspace(1)* %out
+  ret void
+}
+
+; This test is just checking that we don't crash / assertion fail.
+; CHECK-LABEL: @copy2
+; CHECK: S_ENDPGM
+
+define void @copy2([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+entry:
+  br label %LOOP68
+
+LOOP68:
+  %temp4.7 = phi float [ 0.000000e+00, %entry ], [ %v, %ENDIF69 ]
+  %t = phi i32 [ 20, %entry ], [ %x, %ENDIF69 ]
+  %g = icmp eq i32 0, %t
+  %l = bitcast float %temp4.7 to i32
+  br i1 %g, label %IF70, label %ENDIF69
+
+IF70:
+  %q = icmp ne i32 %l, 13
+  %temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  ret void
+
+ENDIF69:
+  %u = add i32 %l, %t
+  %v = bitcast i32 %u to float
+  %x = add i32 %t, -1
+  br label %LOOP68
+}
+
+attributes #0 = { "ShaderType"="0" }
+
diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll
new file mode 100644
index 000000000000..0484fc9a8563
--- /dev/null
+++ b/test/CodeGen/R600/shared-op-cycle.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main
+; CHECK: MULADD_IEEE *
+; CHECK-NOT: MULADD_IEEE *
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+   %w0 = extractelement <4 x float> %reg0, i32 3
+   %w1 = extractelement <4 x float> %reg1, i32 3
+   %w2 = extractelement <4 x float> %reg2, i32 3
+   %sq0 = fmul float %w0, %w0
+   %r0 = fadd float %sq0, 2.0
+   %sq1 = fmul float %w1, %w1
+   %r1 = fadd float %sq1, 2.0
+   %sq2 = fmul float %w2, %w2
+   %r2 = fadd float %sq2, 2.0
+   %v0 = insertelement <4 x float> undef, float %r0, i32 0
+   %v1 = insertelement <4 x float> %v0, float %r1, i32 1
+   %v2 = insertelement <4 x float> %v1, float %r2, i32 2
+   %res = call float @llvm.AMDGPU.dp4(<4 x float> %v2, <4 x float> %v2)
+   %vecres = insertelement <4 x float> undef, float %res, i32 0
+   call void @llvm.R600.store.swizzle(<4 x float> %vecres, i32 0, i32 2)
+   ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index 43cc1e26fc01..4a6aab4a104a 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -1,13 +1,43 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; CHECK: @shl_v4i32
-; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: @shl_v2i32
+;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+;SI-CHECK: @shl_v2i32
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = shl <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @shl_v4i32
+;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @shl_v4i32
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
   %result = shl <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+; XXX: Add SI test for i64 shl once i64 stores and i64 function arguments are
+; supported.
diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll
deleted file mode 100644
index b69e327bf6df..000000000000
--- a/test/CodeGen/R600/short-args.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; CHECK: @i8_arg
-; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
-entry:
-  %0 = zext i8 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; CHECK: @i8_zext_arg
-; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
-entry:
-  %0 = zext i8 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; CHECK: @i16_arg
-; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
-entry:
-  %0 = zext i16 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; CHECK: @i16_zext_arg
-; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
-entry:
-  %0 = zext i16 %in to i32
-  store i32 %0, i32 addrspace(1)* %out, align 4
-  ret void
-}
diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll
new file mode 100644
index 000000000000..9886fe9169bb
--- /dev/null
+++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s
+
+
+define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
+; CHECK-LABEL: @test:
+
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.bb
+    i32 60, label %sw.bb
+  ]
+
+sw.bb:
+  unreachable
+
+sw.default:
+  unreachable
+
+sw.epilog:
+  ret void
+}
+
diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/R600/si-lod-bias.ll
new file mode 100644
index 000000000000..8d7a79cdbda0
--- /dev/null
+++ b/test/CodeGen/R600/si-lod-bias.ll
@@ -0,0 +1,51 @@
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; This shader has the potential to generated illegal VGPR to SGPR copies if
+; the wrong register class is used for the REG_SEQUENCE instructions.
+
+; CHECK: @main
+; CHECK: IMAGE_SAMPLE_B v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
+  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
+  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !1
+  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
+  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !1
+  %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
+  %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
+  %29 = bitcast float %22 to i32
+  %30 = bitcast float %27 to i32
+  %31 = bitcast float %28 to i32
+  %32 = insertelement <4 x i32> undef, i32 %29, i32 0
+  %33 = insertelement <4 x i32> %32, i32 %30, i32 1
+  %34 = insertelement <4 x i32> %33, i32 %31, i32 2
+  %35 = insertelement <4 x i32> %34, i32 undef, i32 3
+  %36 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %35, <32 x i8> %24, <16 x i8> %26, i32 2)
+  %37 = extractelement <4 x float> %36, i32 0
+  %38 = extractelement <4 x float> %36, i32 1
+  %39 = extractelement <4 x float> %36, i32 2
+  %40 = extractelement <4 x float> %36, i32 3
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %37, float %38, float %39, float %40)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null}
+!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
new file mode 100644
index 000000000000..05c5e31f3fad
--- /dev/null
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -0,0 +1,692 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+
+; XXX: Enable when spilling is supported
+; XFAIL: *
+
+; These tests check that the compiler won't crash when it needs to spill
+; SGPRs.
+
+; CHECK-LABEL: @main
+; Writing to M0 from an SMRD instruction will hang the GPU.
+; CHECK-NOT: S_BUFFER_LOAD_DWORD m0
+; CHECK: S_ENDPGM
+@ddxy_lds = external addrspace(3) global [64 x i32]
+
+define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+  %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
+  %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
+  %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
+  %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112)
+  %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116)
+  %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120)
+  %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128)
+  %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132)
+  %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140)
+  %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144)
+  %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160)
+  %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176)
+  %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180)
+  %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184)
+  %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 192)
+  %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 196)
+  %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 200)
+  %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 208)
+  %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 212)
+  %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 216)
+  %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 224)
+  %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 240)
+  %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 244)
+  %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 248)
+  %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 256)
+  %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 272)
+  %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 276)
+  %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 280)
+  %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 288)
+  %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292)
+  %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 296)
+  %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 304)
+  %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 308)
+  %56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 312)
+  %57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368)
+  %58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372)
+  %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
+  %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
+  %61 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
+  %63 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
+  %65 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
+  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
+  %67 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
+  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
+  %69 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
+  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
+  %71 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
+  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
+  %73 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
+  %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
+  %75 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
+  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
+  %77 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
+  %78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
+  %79 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
+  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
+  %81 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
+  %82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
+  %83 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
+  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
+  %85 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
+  %86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
+  %87 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
+  %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
+  %89 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
+  %90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
+  %91 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
+  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
+  %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
+  %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
+  %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
+  %96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %4, <2 x i32> %6)
+  %97 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %4, <2 x i32> %6)
+  %98 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %4, <2 x i32> %6)
+  %99 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %4, <2 x i32> %6)
+  %100 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %4, <2 x i32> %6)
+  %101 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %4, <2 x i32> %6)
+  %102 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %4, <2 x i32> %6)
+  %103 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %4, <2 x i32> %6)
+  %104 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %4, <2 x i32> %6)
+  %105 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %4, <2 x i32> %6)
+  %106 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %4, <2 x i32> %6)
+  %107 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %4, <2 x i32> %6)
+  %108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
+  %109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
+  %110 = call i32 @llvm.SI.tid()
+  %111 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
+  %112 = bitcast float %93 to i32
+  store i32 %112, i32 addrspace(3)* %111
+  %113 = bitcast float %94 to i32
+  store i32 %113, i32 addrspace(3)* %111
+  %114 = call i32 @llvm.SI.tid()
+  %115 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
+  %116 = and i32 %114, -4
+  %117 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
+  %118 = add i32 %116, 1
+  %119 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
+  %120 = bitcast float %93 to i32
+  store i32 %120, i32 addrspace(3)* %115
+  %121 = load i32 addrspace(3)* %117
+  %122 = bitcast i32 %121 to float
+  %123 = load i32 addrspace(3)* %119
+  %124 = bitcast i32 %123 to float
+  %125 = fsub float %124, %122
+  %126 = bitcast float %94 to i32
+  store i32 %126, i32 addrspace(3)* %115
+  %127 = load i32 addrspace(3)* %117
+  %128 = bitcast i32 %127 to float
+  %129 = load i32 addrspace(3)* %119
+  %130 = bitcast i32 %129 to float
+  %131 = fsub float %130, %128
+  %132 = insertelement <4 x float> undef, float %125, i32 0
+  %133 = insertelement <4 x float> %132, float %131, i32 1
+  %134 = insertelement <4 x float> %133, float %131, i32 2
+  %135 = insertelement <4 x float> %134, float %131, i32 3
+  %136 = extractelement <4 x float> %135, i32 0
+  %137 = extractelement <4 x float> %135, i32 1
+  %138 = fmul float %60, %93
+  %139 = fmul float %60, %94
+  %140 = fmul float %60, %94
+  %141 = fmul float %60, %94
+  %142 = call i32 @llvm.SI.tid()
+  %143 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
+  %144 = bitcast float %138 to i32
+  store i32 %144, i32 addrspace(3)* %143
+  %145 = bitcast float %139 to i32
+  store i32 %145, i32 addrspace(3)* %143
+  %146 = bitcast float %140 to i32
+  store i32 %146, i32 addrspace(3)* %143
+  %147 = bitcast float %141 to i32
+  store i32 %147, i32 addrspace(3)* %143
+  %148 = call i32 @llvm.SI.tid()
+  %149 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
+  %150 = and i32 %148, -4
+  %151 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
+  %152 = add i32 %150, 2
+  %153 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
+  %154 = bitcast float %138 to i32
+  store i32 %154, i32 addrspace(3)* %149
+  %155 = load i32 addrspace(3)* %151
+  %156 = bitcast i32 %155 to float
+  %157 = load i32 addrspace(3)* %153
+  %158 = bitcast i32 %157 to float
+  %159 = fsub float %158, %156
+  %160 = bitcast float %139 to i32
+  store i32 %160, i32 addrspace(3)* %149
+  %161 = load i32 addrspace(3)* %151
+  %162 = bitcast i32 %161 to float
+  %163 = load i32 addrspace(3)* %153
+  %164 = bitcast i32 %163 to float
+  %165 = fsub float %164, %162
+  %166 = bitcast float %140 to i32
+  store i32 %166, i32 addrspace(3)* %149
+  %167 = load i32 addrspace(3)* %151
+  %168 = bitcast i32 %167 to float
+  %169 = load i32 addrspace(3)* %153
+  %170 = bitcast i32 %169 to float
+  %171 = fsub float %170, %168
+  %172 = bitcast float %141 to i32
+  store i32 %172, i32 addrspace(3)* %149
+  %173 = load i32 addrspace(3)* %151
+  %174 = bitcast i32 %173 to float
+  %175 = load i32 addrspace(3)* %153
+  %176 = bitcast i32 %175 to float
+  %177 = fsub float %176, %174
+  %178 = insertelement <4 x float> undef, float %159, i32 0
+  %179 = insertelement <4 x float> %178, float %165, i32 1
+  %180 = insertelement <4 x float> %179, float %171, i32 2
+  %181 = insertelement <4 x float> %180, float %177, i32 3
+  %182 = extractelement <4 x float> %181, i32 0
+  %183 = extractelement <4 x float> %181, i32 1
+  %184 = fdiv float 1.000000e+00, %97
+  %185 = fmul float %33, %184
+  %186 = fcmp uge float 1.000000e+00, %185
+  %187 = select i1 %186, float %185, float 1.000000e+00
+  %188 = fmul float %187, %30
+  %189 = call float @ceil(float %188)
+  %190 = fcmp uge float 3.000000e+00, %189
+  %191 = select i1 %190, float 3.000000e+00, float %189
+  %192 = fdiv float 1.000000e+00, %191
+  %193 = fdiv float 1.000000e+00, %30
+  %194 = fmul float %191, %193
+  %195 = fmul float %31, %194
+  %196 = fmul float %95, %95
+  %197 = fmul float %96, %96
+  %198 = fadd float %197, %196
+  %199 = fmul float %97, %97
+  %200 = fadd float %198, %199
+  %201 = call float @llvm.AMDGPU.rsq(float %200)
+  %202 = fmul float %95, %201
+  %203 = fmul float %96, %201
+  %204 = fmul float %202, %29
+  %205 = fmul float %203, %29
+  %206 = fmul float %204, -1.000000e+00
+  %207 = fmul float %205, 1.000000e+00
+  %208 = fmul float %206, %32
+  %209 = fmul float %207, %32
+  %210 = fsub float -0.000000e+00, %208
+  %211 = fadd float %93, %210
+  %212 = fsub float -0.000000e+00, %209
+  %213 = fadd float %94, %212
+  %214 = fmul float %206, %192
+  %215 = fmul float %207, %192
+  %216 = fmul float -1.000000e+00, %192
+  %217 = bitcast float %136 to i32
+  %218 = bitcast float %182 to i32
+  %219 = bitcast float %137 to i32
+  %220 = bitcast float %183 to i32
+  %221 = insertelement <8 x i32> undef, i32 %217, i32 0
+  %222 = insertelement <8 x i32> %221, i32 %218, i32 1
+  %223 = insertelement <8 x i32> %222, i32 %219, i32 2
+  %224 = insertelement <8 x i32> %223, i32 %220, i32 3
+  br label %LOOP
+
+LOOP:                                             ; preds = %ENDIF, %main_body
+  %temp24.0 = phi float [ 1.000000e+00, %main_body ], [ %258, %ENDIF ]
+  %temp28.0 = phi float [ %211, %main_body ], [ %253, %ENDIF ]
+  %temp29.0 = phi float [ %213, %main_body ], [ %255, %ENDIF ]
+  %temp30.0 = phi float [ 1.000000e+00, %main_body ], [ %257, %ENDIF ]
+  %225 = fcmp oge float %temp24.0, %191
+  %226 = sext i1 %225 to i32
+  %227 = bitcast i32 %226 to float
+  %228 = bitcast float %227 to i32
+  %229 = icmp ne i32 %228, 0
+  br i1 %229, label %IF, label %ENDIF
+
+IF:                                               ; preds = %LOOP
+  %230 = bitcast float %136 to i32
+  %231 = bitcast float %182 to i32
+  %232 = bitcast float %137 to i32
+  %233 = bitcast float %183 to i32
+  %234 = insertelement <8 x i32> undef, i32 %230, i32 0
+  %235 = insertelement <8 x i32> %234, i32 %231, i32 1
+  %236 = insertelement <8 x i32> %235, i32 %232, i32 2
+  %237 = insertelement <8 x i32> %236, i32 %233, i32 3
+  br label %LOOP65
+
+ENDIF:                                            ; preds = %LOOP
+  %238 = bitcast float %temp28.0 to i32
+  %239 = bitcast float %temp29.0 to i32
+  %240 = insertelement <8 x i32> %224, i32 %238, i32 4
+  %241 = insertelement <8 x i32> %240, i32 %239, i32 5
+  %242 = insertelement <8 x i32> %241, i32 undef, i32 6
+  %243 = insertelement <8 x i32> %242, i32 undef, i32 7
+  %244 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %243, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %245 = extractelement <4 x float> %244, i32 3
+  %246 = fcmp oge float %temp30.0, %245
+  %247 = sext i1 %246 to i32
+  %248 = bitcast i32 %247 to float
+  %249 = bitcast float %248 to i32
+  %250 = and i32 %249, 1065353216
+  %251 = bitcast i32 %250 to float
+  %252 = fmul float %214, %251
+  %253 = fadd float %252, %temp28.0
+  %254 = fmul float %215, %251
+  %255 = fadd float %254, %temp29.0
+  %256 = fmul float %216, %251
+  %257 = fadd float %256, %temp30.0
+  %258 = fadd float %temp24.0, 1.000000e+00
+  br label %LOOP
+
+LOOP65:                                           ; preds = %ENDIF66, %IF
+  %temp24.1 = phi float [ 0.000000e+00, %IF ], [ %610, %ENDIF66 ]
+  %temp28.1 = phi float [ %temp28.0, %IF ], [ %605, %ENDIF66 ]
+  %temp29.1 = phi float [ %temp29.0, %IF ], [ %607, %ENDIF66 ]
+  %temp30.1 = phi float [ %temp30.0, %IF ], [ %609, %ENDIF66 ]
+  %temp32.0 = phi float [ 1.000000e+00, %IF ], [ %611, %ENDIF66 ]
+  %259 = fcmp oge float %temp24.1, %195
+  %260 = sext i1 %259 to i32
+  %261 = bitcast i32 %260 to float
+  %262 = bitcast float %261 to i32
+  %263 = icmp ne i32 %262, 0
+  br i1 %263, label %IF67, label %ENDIF66
+
+IF67:                                             ; preds = %LOOP65
+  %264 = bitcast float %136 to i32
+  %265 = bitcast float %182 to i32
+  %266 = bitcast float %137 to i32
+  %267 = bitcast float %183 to i32
+  %268 = bitcast float %temp28.1 to i32
+  %269 = bitcast float %temp29.1 to i32
+  %270 = insertelement <8 x i32> undef, i32 %264, i32 0
+  %271 = insertelement <8 x i32> %270, i32 %265, i32 1
+  %272 = insertelement <8 x i32> %271, i32 %266, i32 2
+  %273 = insertelement <8 x i32> %272, i32 %267, i32 3
+  %274 = insertelement <8 x i32> %273, i32 %268, i32 4
+  %275 = insertelement <8 x i32> %274, i32 %269, i32 5
+  %276 = insertelement <8 x i32> %275, i32 undef, i32 6
+  %277 = insertelement <8 x i32> %276, i32 undef, i32 7
+  %278 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %277, <32 x i8> %66, <16 x i8> %68, i32 2)
+  %279 = extractelement <4 x float> %278, i32 0
+  %280 = extractelement <4 x float> %278, i32 1
+  %281 = extractelement <4 x float> %278, i32 2
+  %282 = extractelement <4 x float> %278, i32 3
+  %283 = fmul float %282, %47
+  %284 = bitcast float %136 to i32
+  %285 = bitcast float %182 to i32
+  %286 = bitcast float %137 to i32
+  %287 = bitcast float %183 to i32
+  %288 = bitcast float %temp28.1 to i32
+  %289 = bitcast float %temp29.1 to i32
+  %290 = insertelement <8 x i32> undef, i32 %284, i32 0
+  %291 = insertelement <8 x i32> %290, i32 %285, i32 1
+  %292 = insertelement <8 x i32> %291, i32 %286, i32 2
+  %293 = insertelement <8 x i32> %292, i32 %287, i32 3
+  %294 = insertelement <8 x i32> %293, i32 %288, i32 4
+  %295 = insertelement <8 x i32> %294, i32 %289, i32 5
+  %296 = insertelement <8 x i32> %295, i32 undef, i32 6
+  %297 = insertelement <8 x i32> %296, i32 undef, i32 7
+  %298 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %297, <32 x i8> %82, <16 x i8> %84, i32 2)
+  %299 = extractelement <4 x float> %298, i32 0
+  %300 = extractelement <4 x float> %298, i32 1
+  %301 = extractelement <4 x float> %298, i32 2
+  %302 = bitcast float %136 to i32
+  %303 = bitcast float %182 to i32
+  %304 = bitcast float %137 to i32
+  %305 = bitcast float %183 to i32
+  %306 = bitcast float %temp28.1 to i32
+  %307 = bitcast float %temp29.1 to i32
+  %308 = insertelement <8 x i32> undef, i32 %302, i32 0
+  %309 = insertelement <8 x i32> %308, i32 %303, i32 1
+  %310 = insertelement <8 x i32> %309, i32 %304, i32 2
+  %311 = insertelement <8 x i32> %310, i32 %305, i32 3
+  %312 = insertelement <8 x i32> %311, i32 %306, i32 4
+  %313 = insertelement <8 x i32> %312, i32 %307, i32 5
+  %314 = insertelement <8 x i32> %313, i32 undef, i32 6
+  %315 = insertelement <8 x i32> %314, i32 undef, i32 7
+  %316 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %315, <32 x i8> %78, <16 x i8> %80, i32 2)
+  %317 = extractelement <4 x float> %316, i32 0
+  %318 = extractelement <4 x float> %316, i32 1
+  %319 = extractelement <4 x float> %316, i32 2
+  %320 = fmul float %317, %23
+  %321 = fmul float %318, %24
+  %322 = fmul float %319, %25
+  %323 = fmul float %299, %26
+  %324 = fadd float %323, %320
+  %325 = fmul float %300, %27
+  %326 = fadd float %325, %321
+  %327 = fmul float %301, %28
+  %328 = fadd float %327, %322
+  %329 = fadd float %279, %324
+  %330 = fadd float %280, %326
+  %331 = fadd float %281, %328
+  %332 = bitcast float %136 to i32
+  %333 = bitcast float %182 to i32
+  %334 = bitcast float %137 to i32
+  %335 = bitcast float %183 to i32
+  %336 = bitcast float %temp28.1 to i32
+  %337 = bitcast float %temp29.1 to i32
+  %338 = insertelement <8 x i32> undef, i32 %332, i32 0
+  %339 = insertelement <8 x i32> %338, i32 %333, i32 1
+  %340 = insertelement <8 x i32> %339, i32 %334, i32 2
+  %341 = insertelement <8 x i32> %340, i32 %335, i32 3
+  %342 = insertelement <8 x i32> %341, i32 %336, i32 4
+  %343 = insertelement <8 x i32> %342, i32 %337, i32 5
+  %344 = insertelement <8 x i32> %343, i32 undef, i32 6
+  %345 = insertelement <8 x i32> %344, i32 undef, i32 7
+  %346 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %345, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %347 = extractelement <4 x float> %346, i32 0
+  %348 = extractelement <4 x float> %346, i32 1
+  %349 = extractelement <4 x float> %346, i32 2
+  %350 = fadd float %347, -5.000000e-01
+  %351 = fadd float %348, -5.000000e-01
+  %352 = fadd float %349, -5.000000e-01
+  %353 = fmul float %350, %350
+  %354 = fmul float %351, %351
+  %355 = fadd float %354, %353
+  %356 = fmul float %352, %352
+  %357 = fadd float %355, %356
+  %358 = call float @llvm.AMDGPU.rsq(float %357)
+  %359 = fmul float %350, %358
+  %360 = fmul float %351, %358
+  %361 = fmul float %352, %358
+  %362 = bitcast float %136 to i32
+  %363 = bitcast float %182 to i32
+  %364 = bitcast float %137 to i32
+  %365 = bitcast float %183 to i32
+  %366 = bitcast float %temp28.1 to i32
+  %367 = bitcast float %temp29.1 to i32
+  %368 = insertelement <8 x i32> undef, i32 %362, i32 0
+  %369 = insertelement <8 x i32> %368, i32 %363, i32 1
+  %370 = insertelement <8 x i32> %369, i32 %364, i32 2
+  %371 = insertelement <8 x i32> %370, i32 %365, i32 3
+  %372 = insertelement <8 x i32> %371, i32 %366, i32 4
+  %373 = insertelement <8 x i32> %372, i32 %367, i32 5
+  %374 = insertelement <8 x i32> %373, i32 undef, i32 6
+  %375 = insertelement <8 x i32> %374, i32 undef, i32 7
+  %376 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %375, <32 x i8> %70, <16 x i8> %72, i32 2)
+  %377 = extractelement <4 x float> %376, i32 0
+  %378 = extractelement <4 x float> %376, i32 1
+  %379 = extractelement <4 x float> %376, i32 2
+  %380 = extractelement <4 x float> %376, i32 3
+  %381 = fsub float -0.000000e+00, %95
+  %382 = fsub float -0.000000e+00, %96
+  %383 = fsub float -0.000000e+00, %97
+  %384 = fmul float %359, %381
+  %385 = fmul float %360, %382
+  %386 = fadd float %385, %384
+  %387 = fmul float %361, %383
+  %388 = fadd float %386, %387
+  %389 = fmul float %388, %359
+  %390 = fmul float %388, %360
+  %391 = fmul float %388, %361
+  %392 = fmul float 2.000000e+00, %389
+  %393 = fmul float 2.000000e+00, %390
+  %394 = fmul float 2.000000e+00, %391
+  %395 = fsub float -0.000000e+00, %392
+  %396 = fadd float %381, %395
+  %397 = fsub float -0.000000e+00, %393
+  %398 = fadd float %382, %397
+  %399 = fsub float -0.000000e+00, %394
+  %400 = fadd float %383, %399
+  %401 = fmul float %396, %98
+  %402 = fmul float %396, %99
+  %403 = fmul float %396, %100
+  %404 = fmul float %398, %101
+  %405 = fadd float %404, %401
+  %406 = fmul float %398, %102
+  %407 = fadd float %406, %402
+  %408 = fmul float %398, %103
+  %409 = fadd float %408, %403
+  %410 = fmul float %400, %104
+  %411 = fadd float %410, %405
+  %412 = fmul float %400, %105
+  %413 = fadd float %412, %407
+  %414 = fmul float %400, %106
+  %415 = fadd float %414, %409
+  %416 = bitcast float %136 to i32
+  %417 = bitcast float %182 to i32
+  %418 = bitcast float %137 to i32
+  %419 = bitcast float %183 to i32
+  %420 = bitcast float %temp28.1 to i32
+  %421 = bitcast float %temp29.1 to i32
+  %422 = insertelement <8 x i32> undef, i32 %416, i32 0
+  %423 = insertelement <8 x i32> %422, i32 %417, i32 1
+  %424 = insertelement <8 x i32> %423, i32 %418, i32 2
+  %425 = insertelement <8 x i32> %424, i32 %419, i32 3
+  %426 = insertelement <8 x i32> %425, i32 %420, i32 4
+  %427 = insertelement <8 x i32> %426, i32 %421, i32 5
+  %428 = insertelement <8 x i32> %427, i32 undef, i32 6
+  %429 = insertelement <8 x i32> %428, i32 undef, i32 7
+  %430 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %429, <32 x i8> %86, <16 x i8> %88, i32 2)
+  %431 = extractelement <4 x float> %430, i32 0
+  %432 = extractelement <4 x float> %430, i32 1
+  %433 = extractelement <4 x float> %430, i32 2
+  %434 = fmul float %48, %411
+  %435 = fmul float %49, %411
+  %436 = fmul float %50, %411
+  %437 = fmul float %51, %413
+  %438 = fadd float %437, %434
+  %439 = fmul float %52, %413
+  %440 = fadd float %439, %435
+  %441 = fmul float %53, %413
+  %442 = fadd float %441, %436
+  %443 = fmul float %54, %415
+  %444 = fadd float %443, %438
+  %445 = fmul float %55, %415
+  %446 = fadd float %445, %440
+  %447 = fmul float %56, %415
+  %448 = fadd float %447, %442
+  %449 = insertelement <4 x float> undef, float %444, i32 0
+  %450 = insertelement <4 x float> %449, float %446, i32 1
+  %451 = insertelement <4 x float> %450, float %448, i32 2
+  %452 = insertelement <4 x float> %451, float %195, i32 3
+  %453 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %452)
+  %454 = extractelement <4 x float> %453, i32 0
+  %455 = extractelement <4 x float> %453, i32 1
+  %456 = extractelement <4 x float> %453, i32 2
+  %457 = extractelement <4 x float> %453, i32 3
+  %458 = call float @fabs(float %456)
+  %459 = fdiv float 1.000000e+00, %458
+  %460 = fmul float %454, %459
+  %461 = fadd float %460, 1.500000e+00
+  %462 = fmul float %455, %459
+  %463 = fadd float %462, 1.500000e+00
+  %464 = bitcast float %463 to i32
+  %465 = bitcast float %461 to i32
+  %466 = bitcast float %457 to i32
+  %467 = insertelement <4 x i32> undef, i32 %464, i32 0
+  %468 = insertelement <4 x i32> %467, i32 %465, i32 1
+  %469 = insertelement <4 x i32> %468, i32 %466, i32 2
+  %470 = insertelement <4 x i32> %469, i32 undef, i32 3
+  %471 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %470, <32 x i8> %90, <16 x i8> %92, i32 4)
+  %472 = extractelement <4 x float> %471, i32 0
+  %473 = extractelement <4 x float> %471, i32 1
+  %474 = extractelement <4 x float> %471, i32 2
+  %475 = fmul float %431, %472
+  %476 = fadd float %475, %329
+  %477 = fmul float %432, %473
+  %478 = fadd float %477, %330
+  %479 = fmul float %433, %474
+  %480 = fadd float %479, %331
+  %481 = fmul float %107, %107
+  %482 = fmul float %108, %108
+  %483 = fadd float %482, %481
+  %484 = fmul float %109, %109
+  %485 = fadd float %483, %484
+  %486 = call float @llvm.AMDGPU.rsq(float %485)
+  %487 = fmul float %107, %486
+  %488 = fmul float %108, %486
+  %489 = fmul float %109, %486
+  %490 = fmul float %377, %40
+  %491 = fmul float %378, %41
+  %492 = fmul float %379, %42
+  %493 = fmul float %359, %487
+  %494 = fmul float %360, %488
+  %495 = fadd float %494, %493
+  %496 = fmul float %361, %489
+  %497 = fadd float %495, %496
+  %498 = fmul float %497, %359
+  %499 = fmul float %497, %360
+  %500 = fmul float %497, %361
+  %501 = fmul float 2.000000e+00, %498
+  %502 = fmul float 2.000000e+00, %499
+  %503 = fmul float 2.000000e+00, %500
+  %504 = fsub float -0.000000e+00, %501
+  %505 = fadd float %487, %504
+  %506 = fsub float -0.000000e+00, %502
+  %507 = fadd float %488, %506
+  %508 = fsub float -0.000000e+00, %503
+  %509 = fadd float %489, %508
+  %510 = fmul float %95, %95
+  %511 = fmul float %96, %96
+  %512 = fadd float %511, %510
+  %513 = fmul float %97, %97
+  %514 = fadd float %512, %513
+  %515 = call float @llvm.AMDGPU.rsq(float %514)
+  %516 = fmul float %95, %515
+  %517 = fmul float %96, %515
+  %518 = fmul float %97, %515
+  %519 = fmul float %505, %516
+  %520 = fmul float %507, %517
+  %521 = fadd float %520, %519
+  %522 = fmul float %509, %518
+  %523 = fadd float %521, %522
+  %524 = fsub float -0.000000e+00, %523
+  %525 = fcmp uge float %524, 0.000000e+00
+  %526 = select i1 %525, float %524, float 0.000000e+00
+  %527 = fmul float %43, %380
+  %528 = fadd float %527, 1.000000e+00
+  %529 = call float @llvm.pow.f32(float %526, float %528)
+  %530 = fmul float %476, %37
+  %531 = fmul float %478, %38
+  %532 = fmul float %480, %39
+  %533 = fmul float %359, %487
+  %534 = fmul float %360, %488
+  %535 = fadd float %534, %533
+  %536 = fmul float %361, %489
+  %537 = fadd float %535, %536
+  %538 = fcmp uge float %537, 0.000000e+00
+  %539 = select i1 %538, float %537, float 0.000000e+00
+  %540 = fmul float %530, %539
+  %541 = fmul float %531, %539
+  %542 = fmul float %532, %539
+  %543 = fmul float %490, %529
+  %544 = fadd float %543, %540
+  %545 = fmul float %491, %529
+  %546 = fadd float %545, %541
+  %547 = fmul float %492, %529
+  %548 = fadd float %547, %542
+  %549 = fmul float %476, %34
+  %550 = fmul float %478, %35
+  %551 = fmul float %480, %36
+  %552 = fmul float %544, %57
+  %553 = fadd float %552, %549
+  %554 = fmul float %546, %58
+  %555 = fadd float %554, %550
+  %556 = fmul float %548, %59
+  %557 = fadd float %556, %551
+  %558 = bitcast float %136 to i32
+  %559 = bitcast float %182 to i32
+  %560 = bitcast float %137 to i32
+  %561 = bitcast float %183 to i32
+  %562 = bitcast float %temp28.1 to i32
+  %563 = bitcast float %temp29.1 to i32
+  %564 = insertelement <8 x i32> undef, i32 %558, i32 0
+  %565 = insertelement <8 x i32> %564, i32 %559, i32 1
+  %566 = insertelement <8 x i32> %565, i32 %560, i32 2
+  %567 = insertelement <8 x i32> %566, i32 %561, i32 3
+  %568 = insertelement <8 x i32> %567, i32 %562, i32 4
+  %569 = insertelement <8 x i32> %568, i32 %563, i32 5
+  %570 = insertelement <8 x i32> %569, i32 undef, i32 6
+  %571 = insertelement <8 x i32> %570, i32 undef, i32 7
+  %572 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %571, <32 x i8> %74, <16 x i8> %76, i32 2)
+  %573 = extractelement <4 x float> %572, i32 0
+  %574 = extractelement <4 x float> %572, i32 1
+  %575 = extractelement <4 x float> %572, i32 2
+  %576 = fmul float %573, %44
+  %577 = fadd float %576, %553
+  %578 = fmul float %574, %45
+  %579 = fadd float %578, %555
+  %580 = fmul float %575, %46
+  %581 = fadd float %580, %557
+  %582 = call i32 @llvm.SI.packf16(float %577, float %579)
+  %583 = bitcast i32 %582 to float
+  %584 = call i32 @llvm.SI.packf16(float %581, float %283)
+  %585 = bitcast i32 %584 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %583, float %585, float %583, float %585)
+  ret void
+
+ENDIF66:                                          ; preds = %LOOP65
+  %586 = bitcast float %temp28.1 to i32
+  %587 = bitcast float %temp29.1 to i32
+  %588 = insertelement <8 x i32> %237, i32 %586, i32 4
+  %589 = insertelement <8 x i32> %588, i32 %587, i32 5
+  %590 = insertelement <8 x i32> %589, i32 undef, i32 6
+  %591 = insertelement <8 x i32> %590, i32 undef, i32 7
+  %592 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %591, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %593 = extractelement <4 x float> %592, i32 3
+  %594 = fcmp oge float %temp30.1, %593
+  %595 = sext i1 %594 to i32
+  %596 = bitcast i32 %595 to float
+  %597 = bitcast float %596 to i32
+  %598 = and i32 %597, 1065353216
+  %599 = bitcast i32 %598 to float
+  %600 = fmul float 5.000000e-01, %temp32.0
+  %601 = fsub float -0.000000e+00, %600
+  %602 = fmul float %599, %temp32.0
+  %603 = fadd float %602, %601
+  %604 = fmul float %214, %603
+  %605 = fadd float %604, %temp28.1
+  %606 = fmul float %215, %603
+  %607 = fadd float %606, %temp29.1
+  %608 = fmul float %216, %603
+  %609 = fadd float %608, %temp30.1
+  %610 = fadd float %temp24.1, 1.000000e+00
+  %611 = fmul float %temp32.0, 5.000000e-01
+  br label %LOOP65
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: readnone
+declare i32 @llvm.SI.tid() #2
+
+; Function Attrs: readonly
+declare float @ceil(float) #3
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #2
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
+
+; Function Attrs: readnone
+declare float @fabs(float) #2
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: nounwind readonly
+declare float @llvm.pow.f32(float, float) #4
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { readnone }
+attributes #3 = { readonly }
+attributes #4 = { nounwind readonly }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll
new file mode 100644
index 000000000000..093234f71958
--- /dev/null
+++ b/test/CodeGen/R600/si-vector-hang.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+
+; CHECK: @test_8_min_char
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; CHECK: BUFFER_STORE_BYTE
+; ModuleID = 'radeon'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+; Function Attrs: nounwind
+define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
+entry:
+  %0 = load i8 addrspace(1)* %in0, align 1
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %arrayidx2.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 1
+  %2 = load i8 addrspace(1)* %arrayidx2.i.i, align 1
+  %3 = insertelement <8 x i8> %1, i8 %2, i32 1
+  %arrayidx6.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 2
+  %4 = load i8 addrspace(1)* %arrayidx6.i.i, align 1
+  %5 = insertelement <8 x i8> %3, i8 %4, i32 2
+  %arrayidx10.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 3
+  %6 = load i8 addrspace(1)* %arrayidx10.i.i, align 1
+  %7 = insertelement <8 x i8> %5, i8 %6, i32 3
+  %arrayidx.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 4
+  %8 = load i8 addrspace(1)* %arrayidx.i.i, align 1
+  %9 = insertelement <8 x i8> undef, i8 %8, i32 0
+  %arrayidx2.i9.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 5
+  %10 = load i8 addrspace(1)* %arrayidx2.i9.i, align 1
+  %11 = insertelement <8 x i8> %9, i8 %10, i32 1
+  %arrayidx6.i11.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 6
+  %12 = load i8 addrspace(1)* %arrayidx6.i11.i, align 1
+  %13 = insertelement <8 x i8> %11, i8 %12, i32 2
+  %arrayidx10.i13.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 7
+  %14 = load i8 addrspace(1)* %arrayidx10.i13.i, align 1
+  %15 = insertelement <8 x i8> %13, i8 %14, i32 3
+  %vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  %16 = load i8 addrspace(1)* %in1, align 1
+  %17 = insertelement <8 x i8> undef, i8 %16, i32 0
+  %arrayidx2.i.i4 = getelementptr inbounds i8 addrspace(1)* %in1, i64 1
+  %18 = load i8 addrspace(1)* %arrayidx2.i.i4, align 1
+  %19 = insertelement <8 x i8> %17, i8 %18, i32 1
+  %arrayidx6.i.i5 = getelementptr inbounds i8 addrspace(1)* %in1, i64 2
+  %20 = load i8 addrspace(1)* %arrayidx6.i.i5, align 1
+  %21 = insertelement <8 x i8> %19, i8 %20, i32 2
+  %arrayidx10.i.i6 = getelementptr inbounds i8 addrspace(1)* %in1, i64 3
+  %22 = load i8 addrspace(1)* %arrayidx10.i.i6, align 1
+  %23 = insertelement <8 x i8> %21, i8 %22, i32 3
+  %arrayidx.i.i7 = getelementptr inbounds i8 addrspace(1)* %in1, i64 4
+  %24 = load i8 addrspace(1)* %arrayidx.i.i7, align 1
+  %25 = insertelement <8 x i8> undef, i8 %24, i32 0
+  %arrayidx2.i9.i8 = getelementptr inbounds i8 addrspace(1)* %in1, i64 5
+  %26 = load i8 addrspace(1)* %arrayidx2.i9.i8, align 1
+  %27 = insertelement <8 x i8> %25, i8 %26, i32 1
+  %arrayidx6.i11.i9 = getelementptr inbounds i8 addrspace(1)* %in1, i64 6
+  %28 = load i8 addrspace(1)* %arrayidx6.i11.i9, align 1
+  %29 = insertelement <8 x i8> %27, i8 %28, i32 2
+  %arrayidx10.i13.i10 = getelementptr inbounds i8 addrspace(1)* %in1, i64 7
+  %30 = load i8 addrspace(1)* %arrayidx10.i13.i10, align 1
+  %31 = insertelement <8 x i8> %29, i8 %30, i32 3
+  %vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  %cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
+  %cond.i = select <8 x i1> %cmp.i, <8 x i8> %vecinit5.i, <8 x i8> %vecinit5.i11
+  %32 = extractelement <8 x i8> %cond.i, i32 0
+  store i8 %32, i8 addrspace(1)* %out, align 1
+  %33 = extractelement <8 x i8> %cond.i, i32 1
+  %arrayidx2.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 1
+  store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1
+  %34 = extractelement <8 x i8> %cond.i, i32 2
+  %arrayidx.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 2
+  store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1
+  %35 = extractelement <8 x i8> %cond.i, i32 3
+  %arrayidx2.i6.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 3
+  store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1
+  %arrayidx.i.i3 = getelementptr inbounds i8 addrspace(1)* %out, i64 4
+  %36 = extractelement <8 x i8> %cond.i, i32 4
+  store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1
+  %37 = extractelement <8 x i8> %cond.i, i32 5
+  %arrayidx2.i.i6.i = getelementptr inbounds i8 addrspace(1)* %out, i64 5
+  store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1
+  %38 = extractelement <8 x i8> %cond.i, i32 6
+  %arrayidx.i.i7.i = getelementptr inbounds i8 addrspace(1)* %out, i64 6
+  store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1
+  %39 = extractelement <8 x i8> %cond.i, i32 7
+  %arrayidx2.i6.i8.i = getelementptr inbounds i8 addrspace(1)* %out, i64 7
+  store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
+
+!0 = metadata !{null}
+!1 = metadata !{null}
+!2 = metadata !{null}
+!3 = metadata !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
+!4 = metadata !{null}
+!5 = metadata !{null}
+!6 = metadata !{null}
+!7 = metadata !{null}
+!8 = metadata !{null}
diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll
new file mode 100644
index 000000000000..1212cee9446e
--- /dev/null
+++ b/test/CodeGen/R600/sign_extend.ll
@@ -0,0 +1,12 @@
+
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK: V_ASHR
+define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c)  {
+entry:
+  %0 = mul i32 %a, %b
+  %1 = add i32 %0, %c
+  %2 = sext i32 %1 to i64
+  store i64 %2, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
index 91a8eb7f57b4..9241799091c0 100644
--- a/test/CodeGen/R600/sint_to_fp.ll
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -1,11 +1,28 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @sint_to_fp_v4i32
-; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @sint_to_fp_v2i32
+; R600-CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
+; R600-CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+; SI-CHECK: @sint_to_fp_v2i32
+; SI-CHECK: V_CVT_F32_I32_e32
+; SI-CHECK: V_CVT_F32_I32_e32
+define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
+  %result = sitofp <2 x i32> %in to <2 x float>
+  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  ret void
+}
 
+; R600-CHECK: @sint_to_fp_v4i32
+; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @sint_to_fp_v4i32
+; SI-CHECK: V_CVT_F32_I32_e32
+; SI-CHECK: V_CVT_F32_I32_e32
+; SI-CHECK: V_CVT_F32_I32_e32
+; SI-CHECK: V_CVT_F32_I32_e32
 define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %value = load <4 x i32> addrspace(1) * %in
   %result = sitofp <4 x i32> %value to <4 x float>
diff --git a/test/CodeGen/R600/sint_to_fp64.ll b/test/CodeGen/R600/sint_to_fp64.ll
new file mode 100644
index 000000000000..5abc9d15965d
--- /dev/null
+++ b/test/CodeGen/R600/sint_to_fp64.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+
+; CHECK: @sint_to_fp64
+; CHECK: V_CVT_F64_I32_e32
+define void @sint_to_fp64(double addrspace(1)* %out, i32 %in) {
+  %result = sitofp i32 %in to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index 972542d346f4..fe9df104ae11 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -1,13 +1,54 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; CHECK: @ashr_v4i32
-; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK-LABEL: @ashr_v2i32
+;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+;SI-CHECK-LABEL: @ashr_v2i32
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = ashr <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @ashr_v4i32
+;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK-LABEL: @ashr_v4i32
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
   %result = ashr <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK-LABEL: @ashr_i64
+;EG-CHECK: ASHR
+
+;SI-CHECK-LABEL: @ashr_i64
+;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
+define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = sext i32 %in to i64
+  %1 = ashr i64 %0, 8
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 5f63600b75f0..76373552fb16 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -1,12 +1,40 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; CHECK: @lshr_v4i32
-; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: @lshr_v2i32
+;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+;SI-CHECK: @lshr_v2i32
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = lshr <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+
+;EG-CHECK: @lshr_v4i32
+;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @lshr_v4i32
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
   %result = lshr <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
new file mode 100644
index 000000000000..01210ce1f944
--- /dev/null
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -0,0 +1,8 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI < %s
+
+define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
+  %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+  store <4 x i32*> %p, <4 x i32*>* %out
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index 4d673f3ea326..5e51d5691747 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -1,13 +1,282 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+
+;===------------------------------------------------------------------------===;
+; Global Address Space
+;===------------------------------------------------------------------------===;
+
+; i8 store
+; EG-CHECK-LABEL: @store_i8
+; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
+; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
+; IG 0: Get the byte index and truncate the value
+; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43)
+; IG 1: Truncate the calculated the shift amount for the mask
+; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK-NEXT: 3
+; IG 2: Shift the value and the mask
+; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
+; EG-CHECK-NEXT: 255
+; IG 3: Initialize the Y and Z channels to zero
+;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
+; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
+; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
+
+; SI-CHECK-LABEL: @store_i8
+; SI-CHECK: BUFFER_STORE_BYTE
+
+define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
+entry:
+  store i8 %in, i8 addrspace(1)* %out
+  ret void
+}
+
+; i16 store
+; EG-CHECK-LABEL: @store_i16
+; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
+; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
+; IG 0: Get the byte index and truncate the value
+; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
+; IG 1: Truncate the calculated the shift amount for the mask
+; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK: 3
+; IG 2: Shift the value and the mask
+; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
+; EG-CHECK-NEXT: 65535
+; IG 3: Initialize the Y and Z channels to zero
+;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
+; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
+; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
+
+; SI-CHECK-LABEL: @store_i16
+; SI-CHECK: BUFFER_STORE_SHORT
+define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
+entry:
+  store i16 %in, i16 addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_v2i8
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK-NOT: MEM_RAT MSKOR
+; SI-CHECK-LABEL: @store_v2i8
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
+entry:
+  %0 = trunc <2 x i32> %in to <2 x i8>
+  store <2 x i8> %0, <2 x i8> addrspace(1)* %out
+  ret void
+}
+
+
+; EG-CHECK-LABEL: @store_v2i16
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v2i16
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v2i16
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
+entry:
+  %0 = trunc <2 x i32> %in to <2 x i16>
+  store <2 x i16> %0, <2 x i16> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_v4i8
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v4i8
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v4i8
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: BUFFER_STORE_BYTE
+define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
+entry:
+  %0 = trunc <4 x i32> %in to <4 x i8>
+  store <4 x i8> %0, <4 x i8> addrspace(1)* %out
+  ret void
+}
 
 ; floating-point store
-; EG-CHECK: @store_f32
-; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-; SI-CHECK: @store_f32
+; EG-CHECK-LABEL: @store_f32
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
+; CM-CHECK-LABEL: @store_f32
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
+; SI-CHECK-LABEL: @store_f32
 ; SI-CHECK: BUFFER_STORE_DWORD
 
 define void @store_f32(float addrspace(1)* %out, float %in) {
   store float %in, float addrspace(1)* %out
   ret void
 }
+
+; EG-CHECK-LABEL: @store_v4i16
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK: MEM_RAT MSKOR
+; EG-CHECK-NOT: MEM_RAT MSKOR
+; SI-CHECK-LABEL: @store_v4i16
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK-NOT: BUFFER_STORE_BYTE
+define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
+entry:
+  %0 = trunc <4 x i32> %in to <4 x i16>
+  store <4 x i16> %0, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
+; vec2 floating-point stores
+; EG-CHECK-LABEL: @store_v2f32
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v2f32
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v2f32
+; SI-CHECK: BUFFER_STORE_DWORDX2
+
+define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
+  %1 = insertelement <2 x float> %0, float %b, i32 1
+  store <2 x float> %1, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_v4i32
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @store_v4i32
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @store_v4i32
+; SI-CHECK: BUFFER_STORE_DWORDX4
+define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+;===------------------------------------------------------------------------===;
+; Local Address Space
+;===------------------------------------------------------------------------===;
+
+; EG-CHECK-LABEL: @store_local_i8
+; EG-CHECK: LDS_BYTE_WRITE
+; SI-CHECK-LABEL: @store_local_i8
+; SI-CHECK: DS_WRITE_B8
+define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
+  store i8 %in, i8 addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_i16
+; EG-CHECK: LDS_SHORT_WRITE
+; SI-CHECK-LABEL: @store_local_i16
+; SI-CHECK: DS_WRITE_B16
+define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
+  store i16 %in, i16 addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v2i16
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v2i16
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v2i16
+; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: DS_WRITE_B16
+define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
+entry:
+  store <2 x i16> %in, <2 x i16> addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v4i8
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v4i8
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v4i8
+; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: DS_WRITE_B8
+define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
+entry:
+  store <4 x i8> %in, <4 x i8> addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v2i32
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v2i32
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v2i32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
+entry:
+  store <2 x i32> %in, <2 x i32> addrspace(3)* %out
+  ret void
+}
+
+; EG-CHECK-LABEL: @store_local_v4i32
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; CM-CHECK-LABEL: @store_local_v4i32
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; SI-CHECK-LABEL: @store_local_v4i32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(3)* %out
+  ret void
+}
+
+; The stores in this function are combined by the optimizer to create a
+; 64-bit store with 32-bit alignment.  This is legal for SI and the legalizer
+; should not try to split the 64-bit store back into 2 32-bit stores.
+;
+; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
+; be two 32-bit stores.
+
+; EG-CHECK-LABEL: @vecload2
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; CM-CHECK-LABEL: @vecload2
+; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
+; SI-CHECK-LABEL: @vecload2
+; SI-CHECK: BUFFER_STORE_DWORDX2
+define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
+entry:
+  %0 = load i32 addrspace(2)* %mem, align 4
+  %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1
+  %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
index 5ffb7f1809f8..00589a0c6c86 100644
--- a/test/CodeGen/R600/store.r600.ll
+++ b/test/CodeGen/R600/store.r600.ll
@@ -4,7 +4,7 @@
 
 ; v4i32 store
 ; EG-CHECK: @store_v4i32
-; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 
 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %1 = load <4 x i32> addrspace(1) * %in
@@ -14,7 +14,7 @@ define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
 
 ; v4f32 store
 ; EG-CHECK: @store_v4f32
-; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %1 = load <4 x float> addrspace(1) * %in
   store <4 x float> %1, <4 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/structurize.ll b/test/CodeGen/R600/structurize.ll
new file mode 100644
index 000000000000..c2acd938ad05
--- /dev/null
+++ b/test/CodeGen/R600/structurize.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood -mattr=disable-irstructurizer | FileCheck %s
+; Test case for a crash in the AMDILCFGStructurizer from a CFG like this:
+;
+;                            entry
+;                           /     \
+;               diamond_head       branch_from
+;                 /      \           |
+;    diamond_false        diamond_true
+;                 \      /
+;                   done
+;
+; When the diamond_true branch had more than 100 instructions.
+;
+;
+
+; CHECK-LABEL: @branch_into_diamond
+; === entry block:
+; CHECK: ALU_PUSH_BEFORE
+; === Branch instruction (IF):
+; CHECK: JUMP
+  ; === branch_from block
+  ; CHECK: ALU
+  ; === Duplicated diamond_true block (There can be more than one ALU clause):
+  ; === XXX: We should be able to optimize this so the basic block is not
+  ; === duplicated.  See comments in
+  ; === AMDGPUCFGStructurizer::improveSimpleJumpintoIf()
+  ; CHECK: ALU
+; === Branch instruction (ELSE):
+; CHECK: ELSE
+  ; === diamond_head block:
+  ; CHECK: ALU_PUSH_BEFORE
+  ; === Branch instruction (IF):
+  ; CHECK: JUMP
+    ; === diamond_true block (There can be more than one ALU clause):
+    ; ALU
+  ; === Branch instruction (ELSE):
+  ; CHECK: ELSE
+    ; === diamond_false block plus implicit ENDIF
+    ; CHECK: ALU_POP_AFTER
+; === Branch instruction (ENDIF):
+; CHECK: POP
+; === done block:
+; CHECK: ALU
+; CHECK: MEM_RAT_CACHELESS
+; CHECK: CF_END
+
+
+define void @branch_into_diamond(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+entry:
+%0 = icmp ne i32 %a, 0
+  br i1 %0, label %diamond_head, label %branch_from
+
+diamond_head:
+  %1 = icmp ne i32 %a, 1
+  br i1 %1, label %diamond_true, label %diamond_false
+
+branch_from:
+  %2 = add i32 %a, 1
+  br label %diamond_true
+
+diamond_false:
+  %3 = add i32 %a, 2
+  br label %done
+
+diamond_true:
+  %4 = phi i32 [%2, %branch_from], [%a, %diamond_head]
+  ; This block needs to be > 100 ISA instructions to hit the bug,
+  ; so we'll use udiv instructions.
+  %div0 = udiv i32 %a, %b
+  %div1 = udiv i32 %div0, %4
+  %div2 = udiv i32 %div1, 11
+  %div3 = udiv i32 %div2, %a
+  %div4 = udiv i32 %div3, %b
+  %div5 = udiv i32 %div4, %c
+  %div6 = udiv i32 %div5, %div0
+  %div7 = udiv i32 %div6, %div1
+  br label %done
+
+done:
+  %5 = phi i32 [%3, %diamond_false], [%div7, %diamond_true]
+  store i32 %5, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/structurize1.ll b/test/CodeGen/R600/structurize1.ll
new file mode 100644
index 000000000000..8c10301a1686
--- /dev/null
+++ b/test/CodeGen/R600/structurize1.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=r600 -mattr=disable-ifcvt -mcpu=redwood | FileCheck %s
+
+; This tests for abug where the AMDILCFGStructurizer was crashing on loops
+; like this:
+;
+; for (i = 0; i < x; i++) {
+;   if (cond0) {
+;     if (cond1) {
+;
+;     } else {
+;
+;     }
+;     if (cond2) {
+;
+;     }
+;   }
+; }
+
+; CHECK-LABEL: @if_inside_loop
+; CHECK: LOOP_START_DX10
+; CHECK: END_LOOP
+define void @if_inside_loop(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  br label %for.body
+
+for.body:
+  %0 = phi i32 [0, %entry], [%inc, %for.inc]
+  %val = phi i32 [0, %entry], [%val.for.inc, %for.inc]
+  %inc = add i32 %0, 1
+  %1 = icmp ult i32 10, %a
+  br i1 %1, label %for.inc, label %if.then
+
+if.then:
+  %2 = icmp ne i32 0, %b
+  br i1 %2, label %if.then.true, label %if.then.false
+
+if.then.true:
+  %3 = add i32 %a, %val
+  br label %if
+
+if.then.false:
+  %4 = mul i32 %a, %val
+  br label %if
+
+if:
+  %val.if = phi i32 [%3, %if.then.true], [%4, %if.then.false]
+  %5 = icmp ne i32 0, %c
+  br i1 %5, label %if.true, label %for.inc
+
+if.true:
+  %6 = add i32 %a, %val.if
+  br label %for.inc
+
+for.inc:
+  %val.for.inc = phi i32 [%val, %for.body], [%val.if, %if], [%6, %if.true]
+  %7 = icmp ne i32 0, %d
+  br i1 %7, label %for.body, label %exit
+
+exit:
+  store i32 %val.for.inc, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index 12bfba39753e..5fdd2b820c1a 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -1,11 +1,36 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: @test2
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+;SI-CHECK: @test2
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = sub <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @test4
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @test4
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
new file mode 100644
index 000000000000..16c3f191935c
--- /dev/null
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+
+;EG-CHECK: @main
+;EG-CHECK: EXPORT T{{[0-9]+}}.XYXX
+;EG-CHECK: EXPORT T{{[0-9]+}}.ZXXX
+;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
+;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW
+
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = extractelement <4 x float> %reg1, i32 2
+  %3 = extractelement <4 x float> %reg1, i32 3
+  %4 = load <4 x float> addrspace(8)* null
+  %5 = extractelement <4 x float> %4, i32 1
+  %6 = load <4 x float> addrspace(8)* null
+  %7 = extractelement <4 x float> %6, i32 2
+  %8 = load <4 x float> addrspace(8)* null
+  %9 = extractelement <4 x float> %8, i32 0
+  %10 = fmul float 0.000000e+00, %9
+  %11 = load <4 x float> addrspace(8)* null
+  %12 = extractelement <4 x float> %11, i32 0
+  %13 = fmul float %5, %12
+  %14 = load <4 x float> addrspace(8)* null
+  %15 = extractelement <4 x float> %14, i32 0
+  %16 = fmul float 0.000000e+00, %15
+  %17 = load <4 x float> addrspace(8)* null
+  %18 = extractelement <4 x float> %17, i32 0
+  %19 = fmul float 0.000000e+00, %18
+  %20 = load <4 x float> addrspace(8)* null
+  %21 = extractelement <4 x float> %20, i32 0
+  %22 = fmul float %7, %21
+  %23 = load <4 x float> addrspace(8)* null
+  %24 = extractelement <4 x float> %23, i32 0
+  %25 = fmul float 0.000000e+00, %24
+  %26 = load <4 x float> addrspace(8)* null
+  %27 = extractelement <4 x float> %26, i32 0
+  %28 = fmul float 0.000000e+00, %27
+  %29 = load <4 x float> addrspace(8)* null
+  %30 = extractelement <4 x float> %29, i32 0
+  %31 = fmul float 0.000000e+00, %30
+  %32 = load <4 x float> addrspace(8)* null
+  %33 = extractelement <4 x float> %32, i32 0
+  %34 = fmul float 0.000000e+00, %33
+  %35 = load <4 x float> addrspace(8)* null
+  %36 = extractelement <4 x float> %35, i32 0
+  %37 = fmul float 0.000000e+00, %36
+  %38 = load <4 x float> addrspace(8)* null
+  %39 = extractelement <4 x float> %38, i32 0
+  %40 = fmul float 1.000000e+00, %39
+  %41 = load <4 x float> addrspace(8)* null
+  %42 = extractelement <4 x float> %41, i32 0
+  %43 = fmul float 0.000000e+00, %42
+  %44 = load <4 x float> addrspace(8)* null
+  %45 = extractelement <4 x float> %44, i32 0
+  %46 = fmul float 0.000000e+00, %45
+  %47 = load <4 x float> addrspace(8)* null
+  %48 = extractelement <4 x float> %47, i32 0
+  %49 = fmul float 0.000000e+00, %48
+  %50 = load <4 x float> addrspace(8)* null
+  %51 = extractelement <4 x float> %50, i32 0
+  %52 = fmul float 0.000000e+00, %51
+  %53 = load <4 x float> addrspace(8)* null
+  %54 = extractelement <4 x float> %53, i32 0
+  %55 = fmul float 1.000000e+00, %54
+  %56 = insertelement <4 x float> undef, float %0, i32 0
+  %57 = insertelement <4 x float> %56, float %1, i32 1
+  %58 = insertelement <4 x float> %57, float %2, i32 2
+  %59 = insertelement <4 x float> %58, float %3, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %59, i32 60, i32 1)
+  %60 = insertelement <4 x float> undef, float %10, i32 0
+  %61 = insertelement <4 x float> %60, float %13, i32 1
+  %62 = insertelement <4 x float> %61, float %16, i32 2
+  %63 = insertelement <4 x float> %62, float %19, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %63, i32 0, i32 2)
+  %64 = insertelement <4 x float> undef, float %22, i32 0
+  %65 = insertelement <4 x float> %64, float %25, i32 1
+  %66 = insertelement <4 x float> %65, float %28, i32 2
+  %67 = insertelement <4 x float> %66, float %31, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %67, i32 1, i32 2)
+  %68 = insertelement <4 x float> undef, float %34, i32 0
+  %69 = insertelement <4 x float> %68, float %37, i32 1
+  %70 = insertelement <4 x float> %69, float %40, i32 2
+  %71 = insertelement <4 x float> %70, float %43, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %71, i32 2, i32 2)
+  %72 = insertelement <4 x float> undef, float %46, i32 0
+  %73 = insertelement <4 x float> %72, float %49, i32 1
+  %74 = insertelement <4 x float> %73, float %52, i32 2
+  %75 = insertelement <4 x float> %74, float %55, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %75, i32 3, i32 2)
+  ret void
+}
+
+; EG-CHECK: @main2
+; EG-CHECK: T{{[0-9]+}}.XY__
+; EG-CHECK: T{{[0-9]+}}.YXZ0
+
+define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+main_body:
+  %0 = extractelement <4 x float> %reg1, i32 0
+  %1 = extractelement <4 x float> %reg1, i32 1
+  %2 = fadd float %0, 2.5
+  %3 = fmul float %1, 3.5
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = call float @llvm.cos.f32(float %5)
+  %7 = load <4 x float> addrspace(8)* null
+  %8 = extractelement <4 x float> %7, i32 0
+  %9 = load <4 x float> addrspace(8)* null
+  %10 = extractelement <4 x float> %9, i32 1
+  %11 = insertelement <4 x float> undef, float %2, i32 0
+  %12 = insertelement <4 x float> %11, float %3, i32 1
+  call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
+  %13 = insertelement <4 x float> undef, float %6, i32 0
+  %14 = insertelement <4 x float> %13, float %8, i32 1
+  %15 = insertelement <4 x float> %14, float %10, i32 2
+  %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare float @llvm.cos.f32(float) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll
new file mode 100644
index 000000000000..cbb9c50974a4
--- /dev/null
+++ b/test/CodeGen/R600/tex-clause-antidep.ll
@@ -0,0 +1,25 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX
+;CHECK-NEXT: ALU
+
+define void @test(<4 x float> inreg %reg0) #0 {
+  %1 = extractelement <4 x float> %reg0, i32 0
+  %2 = extractelement <4 x float> %reg0, i32 1
+  %3 = extractelement <4 x float> %reg0, i32 2
+  %4 = extractelement <4 x float> %reg0, i32 3
+  %5 = insertelement <4 x float> undef, float %1, i32 0
+  %6 = insertelement <4 x float> %5, float %2, i32 1
+  %7 = insertelement <4 x float> %6, float %3, i32 2
+  %8 = insertelement <4 x float> %7, float %4, i32 3
+  %9 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %10 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %11 = fadd <4 x float> %9, %10
+  call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0)
+  ret void
+}
+
+declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/R600/texture-input-merge.ll
new file mode 100644
index 000000000000..789538af5821
--- /dev/null
+++ b/test/CodeGen/R600/texture-input-merge.ll
@@ -0,0 +1,31 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: MOV
+
+define void @test(<4 x float> inreg %reg0) #0 {
+  %1 = extractelement <4 x float> %reg0, i32 0
+  %2 = extractelement <4 x float> %reg0, i32 1
+  %3 = extractelement <4 x float> %reg0, i32 2
+  %4 = extractelement <4 x float> %reg0, i32 3
+  %5 = fmul float %1, 3.0
+  %6 = fmul float %2, 3.0
+  %7 = fmul float %3, 3.0
+  %8 = fmul float %4, 3.0
+  %9 = insertelement <4 x float> undef, float %5, i32 0
+  %10 = insertelement <4 x float> %9, float %6, i32 1
+  %11 = insertelement <4 x float> undef, float %7, i32 0
+  %12 = insertelement <4 x float> %11, float %5, i32 1
+  %13 = insertelement <4 x float> undef, float %8, i32 0
+  %14 = call <4 x float> @llvm.R600.tex(<4 x float> %10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %15 = call <4 x float> @llvm.R600.tex(<4 x float> %12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %16 = call <4 x float> @llvm.R600.tex(<4 x float> %13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %17 = fadd <4 x float> %14, %15
+  %18 = fadd <4 x float> %17, %16
+  call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 0)
+  ret void
+}
+
+declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+\ No newline at end of file
diff --git a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
new file mode 100644
index 000000000000..ec959c21798a
--- /dev/null
+++ b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This tests for a bug in the SelectionDAG where custom lowered truncated
+; vector stores at the end of a basic block were not being added to the
+; LegalizedNodes list, which triggered an assertion failure.
+
+; CHECK-LABEL: @test
+; CHECK: MEM_RAT_CACHELESS STORE_RAW
+define void @test(<4 x i8> addrspace(1)* %out, i32 %cond, <4 x i8> %in) {
+entry:
+  %0 = icmp eq i32 %cond, 0
+  br i1 %0, label %if, label %done
+
+if:
+  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
+  br label %done
+
+done:
+  ret void
+}
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
new file mode 100644
index 000000000000..0bd320ad9ceb
--- /dev/null
+++ b/test/CodeGen/R600/trunc.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+
+define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
+; SI-LABEL: @trunc_i64_to_i32_store
+; SI: S_LOAD_DWORD s0, s[0:1], 11
+; SI: V_MOV_B32_e32 v0, s0
+; SI: BUFFER_STORE_DWORD v0
+
+; EG-LABEL: @trunc_i64_to_i32_store
+; EG: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG: LSHR
+; EG-NEXT: 2(
+
+  %result = trunc i64 %in to i32 store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @trunc_shl_i64:
+; SI: S_LOAD_DWORDX2
+; SI: S_LOAD_DWORDX2 [[SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: S_LSHL_B64 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, [[SREG]], 2
+; SI: MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
+define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
+  %b = shl i64 %a, 2
+  %result = trunc i64 %b to i32
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
index b81e3667ce89..53713217a1c0 100644
--- a/test/CodeGen/R600/udiv.ll
+++ b/test/CodeGen/R600/udiv.ll
@@ -1,11 +1,43 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+
+;EG-CHECK-LABEL: @test
+;EG-CHECK-NOT: SETGE_INT
+;EG-CHECK: CF_END
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1) * %in
+  %b = load i32 addrspace(1) * %b_ptr
+  %result = udiv i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
 
 ;The code generated by udiv is long and complex and may frequently change.
 ;The goal of this test is to make sure the ISel doesn't fail when it gets
 ;a v4i32 udiv
-;CHECK: CF_END
 
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+;EG-CHECK-LABEL: @test2
+;EG-CHECK: CF_END
+;SI-CHECK-LABEL: @test2
+;SI-CHECK: S_ENDPGM
+
+define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = udiv <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK-LABEL: @test4
+;EG-CHECK: CF_END
+;SI-CHECK-LABEL: @test4
+;SI-CHECK: S_ENDPGM
+
+define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index 9054fc4c2cc8..a5ac3555afde 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -1,14 +1,46 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; CHECK: @uint_to_fp_v4i32
-; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK-LABEL: @uint_to_fp_v2i32
+; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
+; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+; SI-CHECK-LABEL: @uint_to_fp_v2i32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_CVT_F32_U32_e32
+define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
+  %result = uitofp <2 x i32> %in to <2 x float>
+  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  ret void
+}
 
+; R600-CHECK-LABEL: @uint_to_fp_v4i32
+; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK-LABEL: @uint_to_fp_v4i32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_CVT_F32_U32_e32
 define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %value = load <4 x i32> addrspace(1) * %in
   %result = uitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
 }
+
+; R600-CHECK-LABEL: @uint_to_fp_i64_f32
+; R600-CHECK: UINT_TO_FLT
+; R600-CHECK: UINT_TO_FLT
+; R600-CHECK: MULADD_IEEE
+; SI-CHECK-LABEL: @uint_to_fp_i64_f32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_MAD_F32
+define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) {
+entry:
+  %0 = uitofp i64 %in to float
+  store float %0, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
new file mode 100644
index 000000000000..2824ff8a88c5
--- /dev/null
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @unaligned_load_store_i32:
+; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+  %v = load i32 addrspace(3)* %p, align 1
+  store i32 %v, i32 addrspace(3)* %r, align 1
+  ret void
+}
+
+; SI-LABEL: @unaligned_load_store_v4i32:
+; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
+  %v = load <4 x i32> addrspace(3)* %p, align 1
+  store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
+  ret void
+}
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
index b311f4cfa7f9..f986a0251dce 100644
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -2,8 +2,9 @@
 
 ; These tests are for condition codes that are not supported by the hardware
 
-; CHECK: @slt
-; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-LABEL: @slt
+; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
 define void @slt(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -13,8 +14,9 @@ entry:
   ret void
 }
 
-; CHECK: @ult_i32
-; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-LABEL: @ult_i32
+; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
 define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -24,9 +26,11 @@ entry:
   ret void
 }
 
-; CHECK: @ult_float
-; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-LABEL: @ult_float
+; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
 define void @ult_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ult float %in, 5.0
@@ -35,9 +39,22 @@ entry:
   ret void
 }
 
-; CHECK: @olt
-; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
-;CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-LABEL: @ult_float_native
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ult_float_native(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 0.0, float 1.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @olt(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp olt float %in, 5.0
@@ -46,8 +63,9 @@ entry:
   ret void
 }
 
-; CHECK: @sle
-; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-LABEL: @sle
+; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
 define void @sle(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -57,8 +75,9 @@ entry:
   ret void
 }
 
-; CHECK: @ule_i32
-; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-LABEL: @ule_i32
+; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
 define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -68,9 +87,11 @@ entry:
   ret void
 }
 
-; CHECK: @ule_float
-; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-LABEL: @ule_float
+; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
 define void @ule_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ule float %in, 5.0
@@ -79,8 +100,21 @@ entry:
   ret void
 }
 
-; CHECK: @ole
-; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-LABEL: @ule_float_native
+; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ule_float_native(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 0.0, float 1.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @ole
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
 ; CHECK-NEXT:1084227584(5.000000e+00)
 define void @ole(float addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
index dad02dd76f0a..e808e3d2cf19 100644
--- a/test/CodeGen/R600/urecip.ll
+++ b/test/CodeGen/R600/urecip.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK: V_RCP_IFLAG_F32_e32
 
diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
index a2cc0bd2e84e..8045145bd10d 100644
--- a/test/CodeGen/R600/urem.ll
+++ b/test/CodeGen/R600/urem.ll
@@ -1,11 +1,30 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ;The code generated by urem is long and complex and may frequently change.
 ;The goal of this test is to make sure the ISel doesn't fail when it gets
-;a v4i32 urem
-;CHECK: CF_END
+;a v2i32/v4i32 urem
 
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+;EG-CHECK: @test2
+;EG-CHECK: CF_END
+;SI-CHECK: @test2
+;SI-CHECK: S_ENDPGM
+
+define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
+  %a = load <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %result = urem <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @test4
+;EG-CHECK: CF_END
+;SI-CHECK: @test4
+;SI-CHECK: S_ENDPGM
+
+define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll
new file mode 100644
index 000000000000..7ea7a5c079cf
--- /dev/null
+++ b/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
+
+; NI-CHECK: @vtx_fetch32
+; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
+; CM-CHECK: @vtx_fetch32
+; CM-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
+
+define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; NI-CHECK: @vtx_fetch128
+; NI-CHECK: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00
+; XXX: Add a case for Cayman when v4i32 stores are supported.
+
+define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+entry:
+  %0 = load <4 x i32> addrspace(1)* %in
+  store <4 x i32> %0, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index 6e459df847e7..dca7b067b26e 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -1,10 +1,53 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; CHECK: @test_select_v4i32
-; CHECK: CNDE_INT T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: @test_select_v2i32
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @test_select_v2i32
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+
+define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
+entry:
+  %0 = load <2 x i32> addrspace(1)* %in0
+  %1 = load <2 x i32> addrspace(1)* %in1
+  %cmp = icmp ne <2 x i32> %0, %1
+  %result = select <2 x i1> %cmp, <2 x i32> %0, <2 x i32> %1
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @test_select_v2f32
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @test_select_v2f32
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+
+define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
+entry:
+  %0 = load <2 x float> addrspace(1)* %in0
+  %1 = load <2 x float> addrspace(1)* %in1
+  %cmp = fcmp une <2 x float> %0, %1
+  %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
+  store <2 x float> %result, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @test_select_v4i32
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @test_select_v4i32
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
 
 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
 entry:
@@ -15,3 +58,19 @@ entry:
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK: @test_select_v4f32
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
+entry:
+  %0 = load <4 x float> addrspace(1)* %in0
+  %1 = load <4 x float> addrspace(1)* %in1
+  %cmp = fcmp une <4 x float> %0, %1
+  %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/vselect64.ll b/test/CodeGen/R600/vselect64.ll
new file mode 100644
index 000000000000..604695b4fa6b
--- /dev/null
+++ b/test/CodeGen/R600/vselect64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck  %s
+; XXX: Merge this test into vselect.ll once SI supports 64-bit select.
+
+; CHECK-LABEL: @test_select_v4i64
+; Make sure the vectors aren't being stored on the stack.  We know they are
+; being stored on the stack if the shaders uses at leat 10 registers.
+; CHECK-NOT: {{\**}} MOV T{{[0-9][0-9]}}.X
+define void @test_select_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> %c) {
+entry:
+       %cmp = icmp ne  <4 x i32> %c, <i32 0, i32 0, i32 0, i32 0>
+       %result = select <4 x i1> %cmp, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> <i64 4, i64 5, i64 6, i64 7>
+       store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+       ret void
+}
+
diff --git a/test/CodeGen/R600/vtx-schedule.ll b/test/CodeGen/R600/vtx-schedule.ll
new file mode 100644
index 000000000000..97d37ed84ce2
--- /dev/null
+++ b/test/CodeGen/R600/vtx-schedule.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test is for a scheduler bug where VTX_READ instructions that used
+; the result of another VTX_READ instruction were being grouped in the
+; same fetch clasue.
+
+; CHECK: @test
+; CHECK: Fetch clause
+; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0
+; CHECK: Fetch clause
+; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
+define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) {
+entry:
+  %0 = load i32 addrspace(1)* addrspace(1)* %in0
+  %1 = load i32 addrspace(1)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
new file mode 100644
index 000000000000..2cf88fe9f735
--- /dev/null
+++ b/test/CodeGen/R600/wait.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=r600 -mcpu=SI --verify-machineinstrs | FileCheck %s
+
+;CHECK-LABEL: @main
+;CHECK: S_WAITCNT lgkmcnt(0)
+;CHECK: S_WAITCNT vmcnt(0)
+;CHECK: S_WAITCNT expcnt(0) lgkmcnt(0)
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
+main_body:
+  %10 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
+  %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
+  %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6)
+  %13 = extractelement <4 x float> %12, i32 0
+  %14 = extractelement <4 x float> %12, i32 1
+  %15 = extractelement <4 x float> %12, i32 2
+  %16 = extractelement <4 x float> %12, i32 3
+  %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
+  %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0
+  %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6)
+  %20 = extractelement <4 x float> %19, i32 0
+  %21 = extractelement <4 x float> %19, i32 1
+  %22 = extractelement <4 x float> %19, i32 2
+  %23 = extractelement <4 x float> %19, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
new file mode 100644
index 000000000000..9618d7fb1970
--- /dev/null
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -0,0 +1,211 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+
+; R600-CHECK: @ngroups_x
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].X
+; SI-CHECK: @ngroups_x
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @ngroups_x (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.ngroups.x() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @ngroups_y
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].Y
+; SI-CHECK: @ngroups_y
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 1
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @ngroups_y (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.ngroups.y() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @ngroups_z
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].Z
+; SI-CHECK: @ngroups_z
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 2
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @ngroups_z (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.ngroups.z() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @global_size_x
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[0].W
+; SI-CHECK: @global_size_x
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 3
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @global_size_x (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.global.size.x() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @global_size_y
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].X
+; SI-CHECK: @global_size_y
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 4
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @global_size_y (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.global.size.y() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @global_size_z
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].Y
+; SI-CHECK: @global_size_z
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 5
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @global_size_z (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.global.size.z() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @local_size_x
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].Z
+; SI-CHECK: @local_size_x
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 6
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @local_size_x (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.local.size.x() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @local_size_y
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[1].W
+; SI-CHECK: @local_size_y
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 7
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @local_size_y (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.local.size.y() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK: @local_size_z
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; R600-CHECK: MOV [[VAL]], KC0[2].X
+; SI-CHECK: @local_size_z
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 8
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @local_size_z (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.local.size.z() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; The tgid values are stored in ss offset by the number of user ss.
+; Currently we always use exactly 2 user ss for the pointer to the
+; kernel arguments, but this may change in the future.
+
+; SI-CHECK: @tgid_x
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s2
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @tgid_x (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.tgid.x() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK: @tgid_y
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s3
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @tgid_y (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.tgid.y() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK: @tgid_z
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
+; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+define void @tgid_z (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.tgid.z() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK: @tidig_x
+; SI-CHECK: BUFFER_STORE_DWORD v0
+define void @tidig_x (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.tidig.x() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK: @tidig_y
+; SI-CHECK: BUFFER_STORE_DWORD v1
+define void @tidig_y (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.tidig.y() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK: @tidig_z
+; SI-CHECK: BUFFER_STORE_DWORD v2
+define void @tidig_z (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.tidig.z() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.r600.read.ngroups.x() #0
+declare i32 @llvm.r600.read.ngroups.y() #0
+declare i32 @llvm.r600.read.ngroups.z() #0
+
+declare i32 @llvm.r600.read.global.size.x() #0
+declare i32 @llvm.r600.read.global.size.y() #0
+declare i32 @llvm.r600.read.global.size.z() #0
+
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
+declare i32 @llvm.r600.read.tgid.x() #0
+declare i32 @llvm.r600.read.tgid.y() #0
+declare i32 @llvm.r600.read.tgid.z() #0
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
+declare i32 @llvm.r600.read.tidig.z() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
new file mode 100644
index 000000000000..b1cbe3ffbc41
--- /dev/null
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; We want all MULLO_INT inst to be last in their instruction group
+;CHECK: @fill3d
+;CHECK-NOT: MULLO_INT T[0-9]+
+
+; ModuleID = 'radeon'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+; Function Attrs: nounwind
+define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
+entry:
+  %x.i = tail call i32 @llvm.r600.read.global.size.x() #1
+  %y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1
+  %mul = mul i32 %y.i18, %x.i
+  %z.i17 = tail call i32 @llvm.r600.read.global.size.z() #1
+  %mul3 = mul i32 %mul, %z.i17
+  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
+  %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
+  %mul26.i = mul i32 %x.i12.i, %x.i.i
+  %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.i16 = add i32 %x.i4.i, %mul26.i
+  %mul7 = mul i32 %add.i16, %y.i18
+  %y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
+  %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
+  %mul30.i = mul i32 %y.i14.i, %y.i.i
+  %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %add.i14 = add i32 %mul30.i, %mul7
+  %mul819 = add i32 %add.i14, %y.i6.i
+  %add = mul i32 %mul819, %z.i17
+  %z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
+  %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
+  %mul33.i = mul i32 %z.i16.i, %z.i.i
+  %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
+  %add.i = add i32 %z.i8.i, %mul33.i
+  %add13 = add i32 %add.i, %add
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13
+  store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.global.size.z() #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!opencl.kernels = !{!0, !1, !2}
+
+!0 = metadata !{null}
+!1 = metadata !{null}
+!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index cf612e0a1fbe..c12b0c1ce2c9 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -1,13 +1,56 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; CHECK: @xor_v4i32
-; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: @xor_v2i32
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+;SI-CHECK: @xor_v2i32
+;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+
+define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
+  %a = load <2 x i32> addrspace(1) * %in0
+  %b = load <2 x i32> addrspace(1) * %in1
+  %result = xor <2 x i32> %a, %b
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+;EG-CHECK: @xor_v4i32
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+;SI-CHECK: @xor_v4i32
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
+  %a = load <4 x i32> addrspace(1) * %in0
+  %b = load <4 x i32> addrspace(1) * %in1
   %result = xor <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;EG-CHECK: @xor_i1
+;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+
+;SI-CHECK: @xor_i1
+;SI-CHECK: S_XOR_B64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+
+define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+  %a = load float addrspace(1) * %in0
+  %b = load float addrspace(1) * %in1
+  %acmp = fcmp oge float %a, 0.000000e+00
+  %bcmp = fcmp oge float %b, 0.000000e+00
+  %xor = xor i1 %acmp, %bcmp
+  %result = select i1 %xor, float %a, float %b
+  store float %result, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
new file mode 100644
index 000000000000..481b3b328259
--- /dev/null
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK: @test
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
+; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
+
+; SI-CHECK: @test
+; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]]], 0
+; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[ZERO]]{{\]}}
+define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = mul i32 %a, %b
+  %1 = add i32 %0, %c
+  %2 = zext i32 %1 to i64
+  store i64 %2, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/SI/sanity.ll b/test/CodeGen/SI/sanity.ll
deleted file mode 100644
index 62cdcf5eca28..000000000000
--- a/test/CodeGen/SI/sanity.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
-
-; CHECK: S_ENDPGM
-
-define void @main() {
-main_body:
-  call void @llvm.AMDGPU.shader.type(i32 1)
-  %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
-  %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0
-  %2 = load <4 x i32> addrspace(2)* %1
-  %3 = call i32 @llvm.SI.vs.load.buffer.index()
-  %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3)
-  %5 = extractelement <4 x float> %4, i32 0
-  %6 = extractelement <4 x float> %4, i32 1
-  %7 = extractelement <4 x float> %4, i32 2
-  %8 = extractelement <4 x float> %4, i32 3
-  %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
-  %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1
-  %11 = load <4 x i32> addrspace(2)* %10
-  %12 = call i32 @llvm.SI.vs.load.buffer.index()
-  %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12)
-  %14 = extractelement <4 x float> %13, i32 0
-  %15 = extractelement <4 x float> %13, i32 1
-  %16 = extractelement <4 x float> %13, i32 2
-  %17 = extractelement <4 x float> %13, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17)
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8)
-  ret void
-}
-
-declare void @llvm.AMDGPU.shader.type(i32)
-
-declare i32 @llvm.SI.vs.load.buffer.index() readnone
-
-declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32)
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/SPARC/2011-01-11-CC.ll b/test/CodeGen/SPARC/2011-01-11-CC.ll
index f676fd836947..50f3a65ff9a9 100755
--- a/test/CodeGen/SPARC/2011-01-11-CC.ll
+++ b/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=sparc <%s | FileCheck %s -check-prefix=V8
 ; RUN: llc -march=sparc -mattr=v9 <%s | FileCheck %s -check-prefix=V9
+; RUN: llc -mtriple=sparc64-unknown-linux <%s | FileCheck %s -check-prefix=SPARC64
 
 
 define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind readnone noinline {
@@ -21,10 +22,10 @@ entry:
 define i32 @test_select_int_icc(i32 %a, i32 %b, i32 %c) nounwind readnone noinline {
 entry:
 ; V8: test_select_int_icc
-; V8: subcc
+; V8: cmp
 ; V8: {{be|bne}}
 ; V9: test_select_int_icc
-; V9: subcc
+; V9: cmp
 ; V9-NOT: {{be|bne}}
 ; V9: mov{{e|ne}} %icc
   %0 = icmp eq i32 %a, 0
@@ -36,10 +37,10 @@ entry:
 define float @test_select_fp_icc(i32 %a, float %f1, float %f2) nounwind readnone noinline {
 entry:
 ; V8: test_select_fp_icc
-; V8: subcc
+; V8: cmp
 ; V8: {{be|bne}}
 ; V9: test_select_fp_icc
-; V9: subcc
+; V9: cmp
 ; V9-NOT: {{be|bne}}
 ; V9: fmovs{{e|ne}} %icc
   %0 = icmp eq i32 %a, 0
@@ -50,10 +51,10 @@ entry:
 define double @test_select_dfp_icc(i32 %a, double %f1, double %f2) nounwind readnone noinline {
 entry:
 ; V8: test_select_dfp_icc
-; V8: subcc
+; V8: cmp
 ; V8: {{be|bne}}
 ; V9: test_select_dfp_icc
-; V9: subcc
+; V9: cmp
 ; V9-NOT: {{be|bne}}
 ; V9: fmovd{{e|ne}} %icc
   %0 = icmp eq i32 %a, 0
@@ -63,11 +64,13 @@ entry:
 
 define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind readnone noinline {
 entry:
-;V8: test_select_int_fcc
+;V8-LABEL: test_select_int_fcc:
 ;V8: fcmps
+;V8-NEXT: nop
 ;V8: {{fbe|fbne}}
-;V9: test_select_int_fcc
+;V9-LABEL: test_select_int_fcc:
 ;V9: fcmps
+;V9-NEXT-NOT: nop
 ;V9-NOT: {{fbe|fbne}}
 ;V9: mov{{e|ne}} %fcc0
   %0 = fcmp une float %f, 0.000000e+00
@@ -78,10 +81,10 @@ entry:
 
 define float @test_select_fp_fcc(float %f, float %f1, float %f2) nounwind readnone noinline {
 entry:
-;V8: test_select_fp_fcc
+;V8-LABEL: test_select_fp_fcc:
 ;V8: fcmps
 ;V8: {{fbe|fbne}}
-;V9: test_select_fp_fcc
+;V9-LABEL: test_select_fp_fcc:
 ;V9: fcmps
 ;V9-NOT: {{fbe|fbne}}
 ;V9: fmovs{{e|ne}} %fcc0
@@ -92,14 +95,97 @@ entry:
 
 define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind readnone noinline {
 entry:
-;V8: test_select_dfp_fcc
+;V8-LABEL: test_select_dfp_fcc:
 ;V8: fcmpd
+;V8-NEXT: nop
 ;V8: {{fbne|fbe}}
-;V9: test_select_dfp_fcc
+;V9-LABEL: test_select_dfp_fcc:
 ;V9: fcmpd
+;V9-NEXT-NOT: nop
 ;V9-NOT: {{fbne|fbe}}
 ;V9: fmovd{{e|ne}} %fcc0
   %0 = fcmp une double %f, 0.000000e+00
   %1 = select i1 %0, double %f1, double %f2
   ret double %1
 }
+
+define i32 @test_float_cc(double %a, double %b, i32 %c, i32 %d) {
+entry:
+; V8-LABEL: test_float_cc
+; V8:       fcmpd
+; V8:       {{fbl|fbuge}} .LBB
+; V8:       fcmpd
+; V8:       {{fbule|fbg}} .LBB
+
+; V9-LABEL: test_float_cc
+; V9:       fcmpd
+; V9:       {{fbl|fbuge}} .LBB
+; V9:       fcmpd
+; V9:       {{fbule|fbg}} .LBB
+
+   %0 = fcmp uge double %a, 0.000000e+00
+   br i1 %0, label %loop, label %loop.2
+
+loop:
+   %1 = icmp eq i32 %c, 10
+   br i1 %1, label %loop, label %exit.0
+
+loop.2:
+   %2 = fcmp ogt double %b, 0.000000e+00
+   br i1 %2, label %exit.1, label %loop
+
+exit.0:
+   ret i32 0
+
+exit.1:
+   ret i32 1
+}
+
+; V8-LABEL: test_adde_sube
+; V8:       addcc
+; V8:       addxcc
+; V8:       addxcc
+; V8:       addxcc
+; V8:       subcc
+; V8:       subxcc
+; V8:       subxcc
+; V8:       subxcc
+
+
+; V9-LABEL: test_adde_sube
+; V9:       addcc
+; V9:       addxcc
+; V9:       addxcc
+; V9:       addxcc
+; V9:       subcc
+; V9:       subxcc
+; V9:       subxcc
+; V9:       subxcc
+
+; SPARC64-LABEL: test_adde_sube
+; SPARC64:       addcc
+; SPARC64:       addxcc
+; SPARC64:       addxcc
+; SPARC64:       addxcc
+; SPARC64:       subcc
+; SPARC64:       subxcc
+; SPARC64:       subxcc
+; SPARC64:       subxcc
+
+
+define void @test_adde_sube(i8* %a, i8* %b, i8* %sum, i8* %diff) {
+entry:
+   %0 = bitcast i8* %a to i128*
+   %1 = bitcast i8* %b to i128*
+   %2 = load i128* %0
+   %3 = load i128* %1
+   %4 = add i128 %2, %3
+   %5 = bitcast i8* %sum to i128*
+   store i128 %4, i128* %5
+   tail call void asm sideeffect "", "=*m,*m"(i128 *%0, i128* %5) nounwind
+   %6 = load i128* %0
+   %7 = sub i128 %2, %6
+   %8 = bitcast i8* %diff to i128*
+   store i128 %7, i128* %8
+   ret void
+}
diff --git a/test/CodeGen/SPARC/2011-01-11-Call.ll b/test/CodeGen/SPARC/2011-01-11-Call.ll
index 7350e9232428..a0f478e119a3 100644
--- a/test/CodeGen/SPARC/2011-01-11-Call.ll
+++ b/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -1,4 +1,24 @@
 ; RUN: llc -march=sparc -O0 <%s
+; RUN: llc -march=sparc   <%s | FileCheck %s --check-prefix=V8
+; RUN: llc -march=sparcv9 <%s | FileCheck %s --check-prefix=V9
+
+; V8-LABEL: test
+; V8:       save %sp
+; V8:       call foo
+; V8-NEXT:  nop
+; V8:       call bar
+; V8-NEXT:  nop
+; V8:       jmp %i7+8
+; V8-NEXT:  restore
+
+; V9-LABEL: test
+; V9:       save %sp
+; V9:       call foo
+; V9-NEXT:  nop
+; V9:       call bar
+; V9-NEXT:  nop
+; V9:       jmp %i7+8
+; V9-NEXT:  restore
 
 define void @test() nounwind {
 entry:
@@ -11,3 +31,23 @@ declare i32 @foo(...)
 
 declare void @bar(...)
 
+
+; V8-LABEL: test_tail_call_with_return
+; V8:       save %sp
+; V8:       call foo
+; V8-NEXT:  nop
+; V8:       jmp %i7+8
+; V8-NEXT:  restore %g0, %o0, %o0
+
+; V9-LABEL: test_tail_call_with_return
+; V9:       save %sp
+; V9:       call foo
+; V9-NEXT:  nop
+; V9:       jmp %i7+8
+; V9-NEXT:  restore %g0, %o0, %o0
+
+define i32 @test_tail_call_with_return() nounwind {
+entry:
+ %0 = tail call i32 (...)* @foo() nounwind
+ ret i32 %0
+}
diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
index 9e6583ca2ce1..7cc7868e44f9 100644
--- a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
+++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
@@ -3,26 +3,31 @@
 ;RUN: llc -march=sparc -regalloc=basic < %s | FileCheck %s -check-prefix=V8
 ;RUN: llc -march=sparc -regalloc=basic -mattr=v9 < %s | FileCheck %s -check-prefix=V9
 
+
 define i8* @frameaddr() nounwind readnone {
 entry:
-;V8: frameaddr
-;V8: or %g0, %fp, {{.+}}
+;V8-LABEL: frameaddr:
+;V8: save %sp, -96, %sp
+;V8: jmp %i7+8
+;V8: restore %g0, %fp, %o0
 
-;V9: frameaddr
-;V9: or %g0, %fp, {{.+}}
+;V9-LABEL: frameaddr:
+;V9: save %sp, -96, %sp
+;V9: jmp %i7+8
+;V9: restore %g0, %fp, %o0
   %0 = tail call i8* @llvm.frameaddress(i32 0)
   ret i8* %0
 }
 
 define i8* @frameaddr2() nounwind readnone {
 entry:
-;V8: frameaddr2
+;V8-LABEL: frameaddr2:
 ;V8: ta 3
 ;V8: ld [%fp+56], {{.+}}
 ;V8: ld [{{.+}}+56], {{.+}}
 ;V8: ld [{{.+}}+56], {{.+}}
 
-;V9: frameaddr2
+;V9-LABEL: frameaddr2:
 ;V9: flushw
 ;V9: ld [%fp+56], {{.+}}
 ;V9: ld [{{.+}}+56], {{.+}}
@@ -37,28 +42,42 @@ declare i8* @llvm.frameaddress(i32) nounwind readnone
 
 define i8* @retaddr() nounwind readnone {
 entry:
-;V8: retaddr
-;V8: or %g0, %i7, {{.+}}
+;V8-LABEL: retaddr:
+;V8: or %g0, %o7, {{.+}}
+
+;V9-LABEL: retaddr:
+;V9: or %g0, %o7, {{.+}}
 
-;V9: retaddr
-;V9: or %g0, %i7, {{.+}}
   %0 = tail call i8* @llvm.returnaddress(i32 0)
   ret i8* %0
 }
 
 define i8* @retaddr2() nounwind readnone {
 entry:
-;V8: retaddr2
+;V8-LABEL: retaddr2:
 ;V8: ta 3
 ;V8: ld [%fp+56], {{.+}}
 ;V8: ld [{{.+}}+56], {{.+}}
 ;V8: ld [{{.+}}+60], {{.+}}
 
-;V9: retaddr2
+;V9-LABEL: retaddr2:
 ;V9: flushw
 ;V9: ld [%fp+56], {{.+}}
 ;V9: ld [{{.+}}+56], {{.+}}
 ;V9: ld [{{.+}}+60], {{.+}}
+
+;V8LEAF-LABEL: retaddr2:
+;V8LEAF: ta 3
+;V8LEAF: ld [%fp+56], %[[R:[goli][0-7]]]
+;V8LEAF: ld [%[[R]]+56], %[[R1:[goli][0-7]]]
+;V8LEAF: ld [%[[R1]]+60], {{.+}}
+
+;V9LEAF-LABEL: retaddr2:
+;V9LEAF: flushw
+;V9LEAF: ld [%fp+56], %[[R:[goli][0-7]]]
+;V9LEAF: ld [%[[R]]+56], %[[R1:[goli][0-7]]]
+;V9LEAF: ld [%[[R1]]+60], {{.+}}
+
   %0 = tail call i8* @llvm.returnaddress(i32 3)
   ret i8* %0
 }
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index 71fdb4e0d60f..c71e7c00b916 100644
--- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -40,7 +40,7 @@ bb:                                               ; preds = %entry, %bb
   %a_addr.0 = add i32 %.pn, %a_addr.18
   %3 = add nsw i32 %1, 1
   %exitcond = icmp eq i32 %3, %b
-;CHECK:      subcc
+;CHECK:      cmp
 ;CHECK:      bne
 ;CHECK-NOT:  nop
   br i1 %exitcond, label %bb5, label %bb
@@ -48,18 +48,18 @@ bb:                                               ; preds = %entry, %bb
 bb5:                                              ; preds = %bb, %entry
   %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ]
 ;CHECK:      jmp
-;CHECK-NEXT: restore
+;CHECK-NOT: restore
   ret i32 %a_addr.1.lcssa
 }
 
 define i32 @test_inlineasm(i32 %a) nounwind {
 entry:
-;CHECK:      test_inlineasm
+;CHECK-LABEL:      test_inlineasm:
 ;CHECK:      sethi
 ;CHECK:      !NO_APP
-;CHECK-NEXT: subcc
+;CHECK-NEXT: cmp
 ;CHECK-NEXT: bg
-;CHECK-NEXT: nop
+;CHECK-NEXT: or
   tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
   %0 = icmp slt i32 %a, 0
   br i1 %0, label %bb, label %bb1
@@ -80,11 +80,104 @@ declare i32 @bar(i32)
 
 define i32 @test_implicit_def() nounwind {
 entry:
-;UNOPT:       test_implicit_def
+;UNOPT-LABEL:       test_implicit_def:
 ;UNOPT:       call func
 ;UNOPT-NEXT:  nop
   %0 = tail call i32 @func(i32* undef) nounwind
   ret i32 0
 }
 
+define i32 @prevent_o7_in_call_delay_slot(i32 %i0) {
+entry:
+;CHECK-LABEL:       prevent_o7_in_call_delay_slot:
+;CHECK:       add %i0, 2, %o5
+;CHECK:       add %i0, 3, %o7
+;CHECK:       add %o5, %o7, %o0
+;CHECK:       call bar
+;CHECK-NEXT:  nop
+  %0 = add nsw i32 %i0, 2
+  %1 = add nsw i32 %i0, 3
+  tail call void asm sideeffect "", "r,r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o6},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"(i32 %0, i32 %1)
+  %2 = add nsw i32 %0, %1
+  %3 = tail call i32 @bar(i32 %2)
+  ret i32 %3
+}
+
+
 declare i32 @func(i32*)
+
+
+define i32 @restore_add(i32 %a, i32 %b) {
+entry:
+;CHECK-LABEL:  restore_add:
+;CHECK:  jmp %i7+8
+;CHECK:  restore %o0, %i1, %o0
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  %1 = add nsw i32 %0, %b
+  ret i32 %1
+}
+
+define i32 @restore_add_imm(i32 %a) {
+entry:
+;CHECK-LABEL:  restore_add_imm:
+;CHECK:  jmp %i7+8
+;CHECK:  restore %o0, 20, %o0
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  %1 = add nsw i32 %0, 20
+  ret i32 %1
+}
+
+define i32 @restore_or(i32 %a) {
+entry:
+;CHECK-LABEL:  restore_or:
+;CHECK:  jmp %i7+8
+;CHECK:  restore %g0, %o0, %o0
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %0
+}
+
+define i32 @restore_or_imm(i32 %a) {
+entry:
+;CHECK-LABEL:  restore_or_imm:
+;CHECK:  or %o0, 20, %i0
+;CHECK:  jmp %i7+8
+;CHECK:  restore %g0, %g0, %g0
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  %1 = or i32 %0, 20
+  ret i32 %1
+}
+
+
+define i32 @restore_sethi(i32 %a) {
+entry:
+;CHECK-LABEL: restore_sethi:
+;CHECK-NOT: sethi  3
+;CHECK: restore %g0, 3072, %o0
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  %1 = icmp ne i32 %0, 0
+  %2 = select i1 %1, i32 3072, i32 0
+  ret i32 %2
+}
+
+define i32 @restore_sethi_3bit(i32 %a) {
+entry:
+;CHECK-LABEL: restore_sethi_3bit:
+;CHECK: sethi  6
+;CHECK-NOT: restore %g0, 6144, %o0
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  %1 = icmp ne i32 %0, 0
+  %2 = select i1 %1, i32 6144, i32 0
+  ret i32 %2
+}
+
+define i32 @restore_sethi_large(i32 %a) {
+entry:
+;CHECK-LABEL: restore_sethi_large:
+;CHECK: sethi  4000, %i0
+;CHECK: restore %g0, %g0, %g0
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  %1 = icmp ne i32 %0, 0
+  %2 = select i1 %1, i32 4096000, i32 0
+  ret i32 %2
+}
+
diff --git a/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll b/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
index 85c16e4684ed..408b13d70a38 100644
--- a/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
+++ b/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
@@ -6,7 +6,7 @@
 
 define i32 @test() nounwind {
 entry:
-;CHECK:     test
+;CHECK-LABEL:     test:
 ;CHECK:     st
 ;CHECK:     st
 ;CHECK:     st
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
index 5393392951e0..fc44bc495f46 100644
--- a/test/CodeGen/SPARC/2011-01-22-SRet.ll
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -4,9 +4,9 @@
 
 define weak void @make_foo(%struct.foo_t* noalias sret %agg.result, i32 %a, i32 %b, i32 %c) nounwind {
 entry:
-;CHECK: make_foo
-;CHECK: ld [%fp+64], {{.+}}
-;CHECK: jmp %i7+12
+;CHECK-LABEL: make_foo:
+;CHECK: ld [%sp+64], {{.+}}
+;CHECK: jmp %o7+12
   %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
   store i32 %a, i32* %0, align 4
   %1 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 1
@@ -18,9 +18,9 @@ entry:
 
 define i32 @test() nounwind {
 entry:
-;CHECK: test
+;CHECK-LABEL: test:
 ;CHECK: st {{.+}}, [%sp+64]
-;CHECK: make_foo
+;CHECK: call make_foo
 ;CHECK: unimp 12
   %f = alloca %struct.foo_t, align 8
   call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
diff --git a/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
new file mode 100644
index 000000000000..81f586fe8a7c
--- /dev/null
+++ b/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=sparc   < %s | FileCheck %s --check-prefix=V8
+; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefix=SPARC64
+
+; V8-LABEL: variable_alloca_with_adj_call_stack
+; V8:       save %sp, -96, %sp
+; V8:       add {{.+}}, 96, %o0
+; V8:       add %sp, -16, %sp
+; V8:       call foo
+; V8:       add %sp, 16, %sp
+
+; SPARC64-LABEL: variable_alloca_with_adj_call_stack
+; SPARC64:       save %sp, -128, %sp
+; SPARC64:       add {{.+}}, 2175, %o0
+; SPARC64:       add %sp, -80, %sp
+; SPARC64:       call foo
+; SPARC64:       add %sp, 80, %sp
+
+define void @variable_alloca_with_adj_call_stack(i32 %num) {
+entry:
+  %0 = alloca i8, i32 %num, align 8
+  call void @foo(i8* %0, i8* %0, i8* %0, i8* %0, i8* %0, i8* %0, i8* %0, i8* %0, i8* %0, i8* %0)
+  ret void
+}
+
+
+declare void @foo(i8* , i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*);
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index ec9713572141..8b752a1a2c3c 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler | FileCheck %s
+; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s
 
 ; CHECK: intarg
 ; The save/restore frame is not strictly necessary here, but we would need to
@@ -376,3 +376,38 @@ define signext i32 @ret_nosext(i32 signext %a0) {
 define signext i32 @ret_nozext(i32 signext %a0) {
   ret i32 %a0
 }
+
+; CHECK-LABEL: test_register_directive
+; CHECK:       .register %g2, #scratch
+; CHECK:       .register %g3, #scratch
+; CHECK:       add %i0, 2, %g2
+; CHECK:       add %i0, 3, %g3
+define i32 @test_register_directive(i32 %i0) {
+entry:
+  %0 = add nsw i32 %i0, 2
+  %1 = add nsw i32 %i0, 3
+  tail call void asm sideeffect "", "r,r,~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7},~{g1},~{g4},~{g5},~{g6},~{g7}"(i32 %0, i32 %1)
+  %2 = add nsw i32 %0, %1
+  ret i32 %2
+}
+
+; CHECK-LABEL: test_large_stack
+
+; CHECK:       sethi 16, %g1
+; CHECK:       xor %g1, -176, %g1
+; CHECK:       save %sp, %g1, %sp
+
+; CHECK:       sethi 14, %g1
+; CHECK:       xor %g1, -1, %g1
+; CHECK:       add %g1, %fp, %g1
+; CHECK:       call use_buf
+
+define i32 @test_large_stack() {
+entry:
+  %buffer1 = alloca [16384 x i8], align 8
+  %buffer1.sub = getelementptr inbounds [16384 x i8]* %buffer1, i32 0, i32 0
+  %0 = call i32 @use_buf(i32 16384, i8* %buffer1.sub)
+  ret i32 %0
+}
+
+declare i32 @use_buf(i32, i8*)
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
index 2bbf7deb1684..f5ed047592e9 100644
--- a/test/CodeGen/SPARC/64bit.ll
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -1,13 +1,22 @@
-; RUN: llc < %s -march=sparcv9 | FileCheck %s
+; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s
+; RUN: llc < %s -march=sparcv9  | FileCheck %s -check-prefix=OPT
 
-; CHECK: ret2:
+; CHECK-LABEL: ret2:
 ; CHECK: or %g0, %i1, %i0
+
+; OPT-LABEL: ret2:
+; OPT: jmp %o7+8
+; OPT: or %g0, %o1, %o0
 define i64 @ret2(i64 %a, i64 %b) {
   ret i64 %b
 }
 
 ; CHECK: shl_imm
 ; CHECK: sllx %i0, 7, %i0
+
+; OPT-LABEL: shl_imm:
+; OPT: jmp %o7+8
+; OPT: sllx %o0, 7, %o0
 define i64 @shl_imm(i64 %a) {
   %x = shl i64 %a, 7
   ret i64 %x
@@ -15,6 +24,10 @@ define i64 @shl_imm(i64 %a) {
 
 ; CHECK: sra_reg
 ; CHECK: srax %i0, %i1, %i0
+
+; OPT-LABEL: sra_reg:
+; OPT: jmp %o7+8
+; OPT: srax %o0, %o1, %o0
 define i64 @sra_reg(i64 %a, i64 %b) {
   %x = ashr i64 %a, %b
   ret i64 %x
@@ -26,13 +39,21 @@ define i64 @sra_reg(i64 %a, i64 %b) {
 ;     restore %g0, %g0, %o0
 ;
 ; CHECK: ret_imm0
-; CHECK: or %g0, %g0, %i0
+; CHECK: or %g0, 0, %i0
+
+; OPT: ret_imm0
+; OPT: jmp %o7+8
+; OPT: or %g0, 0, %o0
 define i64 @ret_imm0() {
   ret i64 0
 }
 
 ; CHECK: ret_simm13
 ; CHECK: or %g0, -4096, %i0
+
+; OPT:   ret_simm13
+; OPT:   jmp %o7+8
+; OPT:   or %g0, -4096, %o0
 define i64 @ret_simm13() {
   ret i64 -4096
 }
@@ -41,13 +62,23 @@ define i64 @ret_simm13() {
 ; CHECK: sethi 4, %i0
 ; CHECK-NOT: or
 ; CHECK: restore
+
+; OPT:  ret_sethi
+; OPT:  jmp %o7+8
+; OPT:  sethi 4, %o0
 define i64 @ret_sethi() {
   ret i64 4096
 }
 
-; CHECK: ret_sethi
+; CHECK: ret_sethi_or
 ; CHECK: sethi 4, [[R:%[goli][0-7]]]
 ; CHECK: or [[R]], 1, %i0
+
+; OPT: ret_sethi_or
+; OPT: sethi 4, [[R:%[go][0-7]]]
+; OPT: jmp %o7+8
+; OPT: or [[R]], 1, %o0
+
 define i64 @ret_sethi_or() {
   ret i64 4097
 }
@@ -55,6 +86,12 @@ define i64 @ret_sethi_or() {
 ; CHECK: ret_nimm33
 ; CHECK: sethi 4, [[R:%[goli][0-7]]]
 ; CHECK: xor [[R]], -4, %i0
+
+; OPT: ret_nimm33
+; OPT: sethi 4, [[R:%[go][0-7]]]
+; OPT: jmp %o7+8
+; OPT: xor [[R]], -4, %o0
+
 define i64 @ret_nimm33() {
   ret i64 -4100
 }
@@ -124,6 +161,14 @@ define i64 @loads(i64* %p, i32* %q, i32* %r, i16* %s) {
   ret i64 %x3
 }
 
+; CHECK: load_bool
+; CHECK: ldub [%i0], %i0
+define i64 @load_bool(i1* %p) {
+  %a = load i1* %p
+  %b = zext i1 %a to i64
+  ret i64 %b
+}
+
 ; CHECK: stores
 ; CHECK: ldx [%i0+8], [[R:%[goli][0-7]]]
 ; CHECK: stx [[R]], [%i0+16]
@@ -181,3 +226,85 @@ define i64 @unsigned_divide(i64 %a, i64 %b) {
   %r = udiv i64 %a, %b
   ret i64 %r
 }
+
+define void @access_fi() {
+entry:
+  %b = alloca [32 x i8], align 1
+  %arraydecay = getelementptr inbounds [32 x i8]* %b, i64 0, i64 0
+  call void @g(i8* %arraydecay) #2
+  ret void
+}
+
+declare void @g(i8*)
+
+; CHECK: expand_setcc
+; CHECK: cmp %i0, 1
+; CHECK: movl %xcc, 1,
+define i32 @expand_setcc(i64 %a) {
+  %cond = icmp sle i64 %a, 0
+  %cast2 = zext i1 %cond to i32
+  %RV = sub i32 1, %cast2
+  ret i32 %RV
+}
+
+; CHECK: spill_i64
+; CHECK: stx
+; CHECK: ldx
+define i64 @spill_i64(i64 %x) {
+  call void asm sideeffect "", "~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7}"()
+  ret i64 %x
+}
+
+; CHECK: bitcast_i64_f64
+; CHECK: std
+; CHECK: ldx
+define i64 @bitcast_i64_f64(double %x) {
+  %y = bitcast double %x to i64
+  ret i64 %y
+}
+
+; CHECK: bitcast_f64_i64
+; CHECK: stx
+; CHECK: ldd
+define double @bitcast_f64_i64(i64 %x) {
+  %y = bitcast i64 %x to double
+  ret double %y
+}
+
+; CHECK-LABEL: store_zero:
+; CHECK: stx %g0, [%i0]
+; CHECK: stx %g0, [%i1+8]
+
+; OPT-LABEL:  store_zero:
+; OPT:  stx %g0, [%o0]
+; OPT:  stx %g0, [%o1+8]
+define i64 @store_zero(i64* nocapture %a, i64* nocapture %b) {
+entry:
+  store i64 0, i64* %a, align 8
+  %0 = getelementptr inbounds i64* %b, i32 1
+  store i64 0, i64* %0, align 8
+  ret i64 0
+}
+
+; CHECK-LABEL: bit_ops
+; CHECK:       popc
+
+; OPT-LABEL: bit_ops
+; OPT:       popc
+
+define i64 @bit_ops(i64 %arg) {
+entry:
+  %0 = tail call i64 @llvm.ctpop.i64(i64 %arg)
+  %1 = tail call i64 @llvm.ctlz.i64(i64 %arg, i1 true)
+  %2 = tail call i64 @llvm.cttz.i64(i64 %arg, i1 true)
+  %3 = tail call i64 @llvm.bswap.i64(i64 %arg)
+  %4 = add i64 %0, %1
+  %5 = add i64 %2, %3
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll
index 6e66a262a4f2..7451b04eadfe 100644
--- a/test/CodeGen/SPARC/64cond.ll
+++ b/test/CodeGen/SPARC/64cond.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=sparcv9 | FileCheck %s
-; Testing 64-bit conditionals.
+; RUN: llc < %s -mtriple=sparc64-pc-openbsd -disable-sparc-leaf-proc | FileCheck %s
+; Testing 64-bit conditionals. The sparc64 triple is an alias for sparcv9.
 
 ; CHECK: cmpri
-; CHECK: subcc %i1, 1
-; CHECK: bpe %xcc,
+; CHECK: cmp %i1, 1
+; CHECK: be %xcc,
 define void @cmpri(i64* %p, i64 %x) {
 entry:
   %tobool = icmp eq i64 %x, 1
@@ -18,8 +18,8 @@ if.end:
 }
 
 ; CHECK: cmprr
-; CHECK: subcc %i1, %i2
-; CHECK: bpgu %xcc,
+; CHECK: cmp %i1, %i2
+; CHECK: bgu %xcc,
 define void @cmprr(i64* %p, i64 %x, i64 %y) {
 entry:
   %tobool = icmp ugt i64 %x, %y
@@ -34,9 +34,9 @@ if.end:
 }
 
 ; CHECK: selecti32_xcc
-; CHECK: subcc %i0, %i1
+; CHECK: cmp %i0, %i1
 ; CHECK: movg %xcc, %i2, %i3
-; CHECK: or %g0, %i3, %i0
+; CHECK: restore %g0, %i3, %o0
 define i32 @selecti32_xcc(i64 %x, i64 %y, i32 %a, i32 %b) {
 entry:
   %tobool = icmp sgt i64 %x, %y
@@ -45,12 +45,81 @@ entry:
 }
 
 ; CHECK: selecti64_xcc
-; CHECK: subcc %i0, %i1
+; CHECK: cmp %i0, %i1
 ; CHECK: movg %xcc, %i2, %i3
-; CHECK: or %g0, %i3, %i0
+; CHECK: restore %g0, %i3, %o0
 define i64 @selecti64_xcc(i64 %x, i64 %y, i64 %a, i64 %b) {
 entry:
   %tobool = icmp sgt i64 %x, %y
   %rv = select i1 %tobool, i64 %a, i64 %b
   ret i64 %rv
 }
+
+; CHECK: selecti64_icc
+; CHECK: cmp %i0, %i1
+; CHECK: movg %icc, %i2, %i3
+; CHECK: restore %g0, %i3, %o0
+define i64 @selecti64_icc(i32 %x, i32 %y, i64 %a, i64 %b) {
+entry:
+  %tobool = icmp sgt i32 %x, %y
+  %rv = select i1 %tobool, i64 %a, i64 %b
+  ret i64 %rv
+}
+
+; CHECK: selecti64_fcc
+; CHECK: fcmps %f1, %f3
+; CHECK: movul %fcc0, %i2, %i3
+; CHECK: restore %g0, %i3, %o0
+define i64 @selecti64_fcc(float %x, float %y, i64 %a, i64 %b) {
+entry:
+  %tobool = fcmp ult float %x, %y
+  %rv = select i1 %tobool, i64 %a, i64 %b
+  ret i64 %rv
+}
+
+; CHECK: selectf32_xcc
+; CHECK: cmp %i0, %i1
+; CHECK: fmovsg %xcc, %f5, %f7
+; CHECK: fmovs %f7, %f1
+define float @selectf32_xcc(i64 %x, i64 %y, float %a, float %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, float %a, float %b
+  ret float %rv
+}
+
+; CHECK: selectf64_xcc
+; CHECK: cmp %i0, %i1
+; CHECK: fmovdg %xcc, %f4, %f6
+; CHECK: fmovd %f6, %f0
+define double @selectf64_xcc(i64 %x, i64 %y, double %a, double %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, double %a, double %b
+  ret double %rv
+}
+
+; The MOVXCC instruction can't use %g0 for its tied operand.
+; CHECK: select_consti64_xcc
+; CHECK: cmp
+; CHECK: movg %xcc, 123, %i{{[0-2]}}
+define i64 @select_consti64_xcc(i64 %x, i64 %y) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, i64 123, i64 0
+  ret i64 %rv
+}
+
+; CHECK-LABEL: setcc_resultty
+; CHECK:       cmp
+; CHECK:       movne %xcc, 1, [[R:%[gilo][0-7]]]
+; CHECK:       or [[R]], %i1, %i0
+
+define i1 @setcc_resultty(i64 %a, i1 %b) {
+  %a0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 32)
+  %a1 = extractvalue { i64, i1 } %a0, 1
+  %a4 = or i1 %a1, %b
+  ret i1 %a4
+}
+
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 4352e6246301..ba858253287a 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -3,8 +3,8 @@
 define i32 @test0(i32 %X) {
 	%tmp.1 = add i32 %X, 1
 	ret i32 %tmp.1
-; CHECK: test0:
-; CHECK: add %i0, 1, %i0
+; CHECK-LABEL: test0:
+; CHECK: add %o0, 1, %o0
 }
 
 
@@ -13,14 +13,26 @@ define i32 @test1(i32 %X, i32 %Y) {
         %A = xor i32 %X, %Y
         %B = xor i32 %A, -1
         ret i32 %B
-; CHECK: test1:
-; CHECK: xnor %i0, %i1, %i0
+; CHECK-LABEL: test1:
+; CHECK: xnor %o0, %o1, %o0
 }
 
 define i32 @test2(i32 %X, i32 %Y) {
         %A = xor i32 %X, -1
         %B = xor i32 %A, %Y
         ret i32 %B
-; CHECK: test2:
-; CHECK: xnor %i0, %i1, %i0
+; CHECK-LABEL: test2:
+; CHECK: xnor %o0, %o1, %o0
 }
+
+; CHECK-LABEL: store_zero:
+; CHECK: st   %g0, [%o0]
+; CHECK: st   %g0, [%o1+4]
+define i32 @store_zero(i32* %a, i32* %b) {
+entry:
+  store i32 0, i32* %a, align 4
+  %0 = getelementptr inbounds i32* %b, i32 1
+  store i32 0, i32* %0, align 4
+  ret i32 0
+}
+
diff --git a/test/CodeGen/SPARC/blockaddr.ll b/test/CodeGen/SPARC/blockaddr.ll
new file mode 100644
index 000000000000..c3d527013c1b
--- /dev/null
+++ b/test/CodeGen/SPARC/blockaddr.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=sparc   -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=medium | FileCheck --check-prefix=abs44 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=large  | FileCheck --check-prefix=abs64 %s
+; RUN: llc < %s -march=sparc   -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v8pic32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v9pic32 %s
+
+;
+; copied from test/CodeGen/Mips/blockaddr.ll and modified for SPARC
+;
+@reg = common global i8* null, align 4
+
+define i8* @dummy(i8* %x) nounwind readnone noinline {
+entry:
+  ret i8* %x
+}
+
+; abs32-LABEL: func_block_addr:
+; abs32: sethi %hi([[BLK:.+]]), [[R:%[gilo][0-7]]]
+; abs32: call dummy
+; abs32: add  [[R]], %lo([[BLK]]), %o0
+; abs32: jmp %o0
+
+; abs44-LABEL: func_block_addr:
+; abs44: sethi %h44([[BLK:.+]]), [[R:%[gilo][0-7]]]
+; abs44: add [[R]], %m44([[BLK]]), [[R1:%[gilo][0-7]]]
+; abs44: sllx [[R1]], 12, [[R2:%[gilo][0-7]]]
+; abs44: call dummy
+; abs44: add [[R2]], %l44([[BLK]]), %o0
+; abs44: jmp %o0
+
+; abs64-LABEL: func_block_addr:
+; abs64: sethi %hi([[BLK:.+]]), [[R:%[gilo][0-7]]]
+; abs64: add [[R]], %lo([[BLK]]), [[R1:%[gilo][0-7]]]
+; abs64: sethi %hh([[BLK]]), [[R2:%[gilo][0-7]]]
+; abs64: add [[R2]], %hm([[BLK]]), [[R3:%[gilo][0-7]]]
+; abs64: sllx [[R3]], 32,  [[R4:%[gilo][0-7]]]
+; abs64: call dummy
+; abs64: add [[R2]], [[R1]], %o0
+; abs64: jmp %o0
+
+
+; v8pic32: func_block_addr
+; v8pic32: sethi %hi(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[R:%[gilo][0-7]]]
+; v8pic32: or [[R]], %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[R1:%[gilo][0-7]]]
+; v8pic32: add [[R1]], %o7, %[[R2:[gilo][0-7]]]
+; v8pic32: sethi %hi([[BLK:.+]]), [[R3:%[gilo][0-7]]]
+; v8pic32: add  [[R3]], %lo([[BLK]]), %[[R4:[gilo][0-7]]]
+; v8pic32: call dummy
+; v8pic32: ld [%[[R2]]+%[[R4]]], %o0
+; v8pic32: jmp %o0
+
+
+; v9pic32: func_block_addr
+; v9pic32: sethi %hi(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[R:%[gilo][0-7]]]
+; v9pic32: or [[R]], %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[R1:%[gilo][0-7]]]
+; v9pic32: add [[R1]], %o7, %[[R2:[gilo][0-7]]]
+; v9pic32: sethi %hi([[BLK:.+]]), [[R3:%[gilo][0-7]]]
+; v9pic32: add  [[R3]], %lo([[BLK]]), %[[R4:[gilo][0-7]]]
+; v9pic32: call dummy
+; v9pic32: ldx [%[[R2]]+%[[R4]]], %o0
+; v9pic32: jmp %o0
+
+
+define void @func_block_addr() nounwind {
+entry:
+  %call = tail call i8* @dummy(i8* blockaddress(@func_block_addr, %baz))
+  indirectbr i8* %call, [label %baz, label %foo]
+
+foo:                                              ; preds = %foo, %entry
+  store i8* blockaddress(@func_block_addr, %foo), i8** @reg, align 4
+  br label %foo
+
+baz:                                              ; preds = %entry
+  store i8* null, i8** @reg, align 4
+  ret void
+}
diff --git a/test/CodeGen/SPARC/constpool.ll b/test/CodeGen/SPARC/constpool.ll
index d93a53b3ac04..b861676ce3e1 100644
--- a/test/CodeGen/SPARC/constpool.ll
+++ b/test/CodeGen/SPARC/constpool.ll
@@ -12,15 +12,17 @@ entry:
 
 ; abs32: floatCP
 ; abs32: sethi %hi(.LCPI0_0), %[[R:[gilo][0-7]]]
+; abs32: jmp %o7+8
 ; abs32: ld [%[[R]]+%lo(.LCPI0_0)], %f
-; abs32: jmp %i7+8
+
 
 ; abs44: floatCP
 ; abs44: sethi %h44(.LCPI0_0), %[[R1:[gilo][0-7]]]
 ; abs44: add %[[R1]], %m44(.LCPI0_0), %[[R2:[gilo][0-7]]]
 ; abs44: sllx %[[R2]], 12, %[[R3:[gilo][0-7]]]
+; abs44: jmp %o7+8
 ; abs44: ld [%[[R3]]+%l44(.LCPI0_0)], %f1
-; abs44: jmp %i7+8
+
 
 ; abs64: floatCP
 ; abs64: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
@@ -28,8 +30,9 @@ entry:
 ; abs64: sethi %hh(.LCPI0_0), %[[R3:[gilo][0-7]]]
 ; abs64: add %[[R3]], %hm(.LCPI0_0), %[[R4:[gilo][0-7]]]
 ; abs64: sllx %[[R4]], 32, %[[R5:[gilo][0-7]]]
+; abs64: jmp %o7+8
 ; abs64: ld [%[[R5]]+%[[R2]]], %f1
-; abs64: jmp %i7+8
+
 
 ; v8pic32: floatCP
 ; v8pic32: _GLOBAL_OFFSET_TABLE_
@@ -38,6 +41,9 @@ entry:
 ; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
 ; v8pic32: ld [%[[Gaddr]]], %f0
 ; v8pic32: jmp %i7+8
+; v8pic32: restore
+
+
 
 ; v9pic32: floatCP
 ; v9pic32: _GLOBAL_OFFSET_TABLE_
@@ -46,3 +52,6 @@ entry:
 ; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
 ; v9pic32: ld [%[[Gaddr]]], %f1
 ; v9pic32: jmp %i7+8
+; v9pic32: restore
+
+
diff --git a/test/CodeGen/SPARC/exception.ll b/test/CodeGen/SPARC/exception.ll
new file mode 100644
index 000000000000..cb5b6e5c1168
--- /dev/null
+++ b/test/CodeGen/SPARC/exception.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=sparc | FileCheck %s
+
+
+%struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo }
+%struct.__type_info_pseudo = type { i8*, i8* }
+
+@_ZTIi = external constant %struct.__fundamental_type_info_pseudo
+@_ZTIf = external constant %struct.__fundamental_type_info_pseudo
+@.cst = linker_private unnamed_addr constant [12 x i8] c"catched int\00", align 64
+@.cst1 = linker_private unnamed_addr constant [14 x i8] c"catched float\00", align 64
+
+; CHECK-LABEL: main:
+; CHECK:       .cfi_startproc
+; CHECK:       .cfi_def_cfa_register 30
+; CHECK:       .cfi_window_save
+; CHECK:       .cfi_register 15, 31
+
+; CHECK:        call __cxa_throw
+; CHECK:        call __cxa_throw
+
+; CHECK:        call __cxa_begin_catch
+; CHECK:        call __cxa_end_catch
+
+; CHECK:        call __cxa_begin_catch
+; CHECK:        call __cxa_end_catch
+
+; CHECK:        .cfi_endproc
+
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) unnamed_addr #0 {
+entry:
+  %0 = icmp eq i32 %argc, 2
+  %1 = tail call i8* @__cxa_allocate_exception(i32 4) #1
+  br i1 %0, label %"3", label %"4"
+
+"3":                                              ; preds = %entry
+  %2 = bitcast i8* %1 to i32*
+  store i32 0, i32* %2, align 4
+  invoke void @__cxa_throw(i8* %1, i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*), void (i8*)* null) #2
+          to label %3 unwind label %"8"
+
+; <label>:3                                       ; preds = %"3"
+  unreachable
+
+"4":                                              ; preds = %entry
+  %4 = bitcast i8* %1 to float*
+  store float 1.000000e+00, float* %4, align 4
+
+
+  invoke void @__cxa_throw(i8* %1, i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIf to i8*), void (i8*)* null) #2
+          to label %5 unwind label %"8"
+
+; <label>:5                                       ; preds = %"4"
+  unreachable
+
+"5":                                              ; preds = %"13", %"11"
+  %6 = phi i32 [ 2, %"13" ], [ 0, %"11" ]
+  ret i32 %6
+
+"8":                                              ; preds = %"4", %"3"
+  %exc = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIi
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIf
+  %exc_ptr12 = extractvalue { i8*, i32 } %exc, 0
+  %filter13 = extractvalue { i8*, i32 } %exc, 1
+  %typeid = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*))
+  %7 = icmp eq i32 %filter13, %typeid
+  br i1 %7, label %"11", label %8
+
+; <label>:8                                       ; preds = %"8"
+  %typeid8 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIf to i8*))
+  %9 = icmp eq i32 %filter13, %typeid8
+  br i1 %9, label %"13", label %"9"
+
+"9":                                              ; preds = %8
+  resume { i8*, i32 } %exc
+
+"11":                                             ; preds = %"8"
+  %10 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr12) #1
+  %11 = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.cst, i32 0, i32 0))
+  tail call void @__cxa_end_catch() #1
+  br label %"5"
+
+"13":                                             ; preds = %8
+  %12 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr12) #1
+  %13 = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @.cst1, i32 0, i32 0))
+  tail call void @__cxa_end_catch() #1
+  br label %"5"
+}
+
+; Function Attrs: nounwind
+declare i8* @__cxa_allocate_exception(i32) #1
+
+; Function Attrs: noreturn
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) #2
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+; Function Attrs: nounwind
+declare i8* @__cxa_begin_catch(i8*) #1
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #1
+
+declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+
+attributes #0 = { "no-frame-pointer-elim-non-leaf"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { noreturn }
+attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/SPARC/float.ll b/test/CodeGen/SPARC/float.ll
new file mode 100644
index 000000000000..66367042cad9
--- /dev/null
+++ b/test/CodeGen/SPARC/float.ll
@@ -0,0 +1,249 @@
+; RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8
+; RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=V8-UNOPT
+; RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
+; RUN: llc -mtriple=sparc64-unknown-linux < %s | FileCheck %s -check-prefix=SPARC64
+
+; V8-LABEL:     test_neg:
+; V8:     call get_double
+; V8:     fnegs %f0, %f0
+
+; V8-UNOPT-LABEL:     test_neg:
+; V8-UNOPT:     fnegs
+; V8-UNOPT:     ! implicit-def
+; V8-UNOPT:     fmovs {{.+}}, %f0
+; V8-UNOPT:     fmovs {{.+}}, %f1
+
+; V9-LABEL:     test_neg:
+; V9:     fnegd %f0, %f0
+
+; SPARC64-LABEL: test_neg:
+; SPARC64:       fnegd %f0, %f0
+
+define double @test_neg() {
+entry:
+  %0 = tail call double @get_double()
+  %1 = fsub double -0.000000e+00, %0
+  ret double %1
+}
+
+; V8-LABEL:     test_abs:
+; V8:     fabss %f0, %f0
+
+; V8-UNOPT-LABEL:     test_abs:
+; V8-UNOPT:     fabss
+; V8-UNOPT:     ! implicit-def
+; V8-UNOPT:     fmovs {{.+}}, %f0
+; V8-UNOPT:     fmovs {{.+}}, %f1
+
+; V9-LABEL:     test_abs:
+; V9:     fabsd %f0, %f0
+
+
+; SPARC64-LABEL:     test_abs:
+; SPARC64:     fabsd %f0, %f0
+
+define double @test_abs() {
+entry:
+  %0 = tail call double @get_double()
+  %1 = tail call double @llvm.fabs.f64(double %0)
+  ret double %1
+}
+
+declare double @get_double()
+declare double @llvm.fabs.f64(double) nounwind readonly
+
+; V8-LABEL:    test_v9_floatreg:
+; V8:          fsubd {{.+}}, {{.+}}, {{.+}}
+; V8:          faddd {{.+}}, {{.+}}, [[R:%f(((1|2)?(0|2|4|6|8))|30)]]
+; V8:          std [[R]], [%{{.+}}]
+; V8:          ldd [%{{.+}}], %f0
+
+; V9-LABEL:    test_v9_floatreg:
+; V9:          fsubd {{.+}}, {{.+}}, {{.+}}
+; V9:          faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
+; V9:          fmovd [[R]], %f0
+
+; SPARC64-LABEL:    test_v9_floatreg:
+; SPARC64:          fsubd {{.+}}, {{.+}}, {{.+}}
+; SPARC64:          faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
+; SPARC64:          fmovd [[R]], %f0
+
+define double @test_v9_floatreg() {
+entry:
+  %0 = tail call double @get_double()
+  %1 = tail call double @get_double()
+  %2 = fsub double %0, %1
+  tail call void asm sideeffect "", "~{f0},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
+  %3 = fadd double %2, %2
+  ret double %3
+}
+
+; V8-LABEL:    test_xtos_stox
+; V8:          call __floatdisf
+; V8:          call __fixsfdi
+
+; V9-LABEL:    test_xtos_stox
+; V9:          call __floatdisf
+; V9:          call __fixsfdi
+
+; SPARC64-LABEL:    test_xtos_stox
+; SPARC64:          fxtos
+; SPARC64:          fstox
+
+define void @test_xtos_stox(i64 %a, i64* %ptr0, float* %ptr1) {
+entry:
+  %0 = sitofp i64 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptosi float %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_itos_stoi
+; V8:          fitos
+; V8:          fstoi
+
+; V9-LABEL:    test_itos_stoi
+; V9:          fitos
+; V9:          fstoi
+
+; SPARC64-LABEL:    test_itos_stoi
+; SPARC64:          fitos
+; SPARC64:          fstoi
+
+define void @test_itos_stoi(i32 %a, i32* %ptr0, float* %ptr1) {
+entry:
+  %0 = sitofp i32 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptosi float %0 to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
+
+
+; V8-LABEL:    test_xtod_dtox
+; V8:          call __floatdidf
+; V8:          call __fixdfdi
+
+; V9-LABEL:    test_xtod_dtox
+; V9:          call __floatdidf
+; V9:          call __fixdfdi
+
+; SPARC64-LABEL:    test_xtod_dtox
+; SPARC64:          fxtod
+; SPARC64:          fdtox
+
+define void @test_xtod_dtox(i64 %a, i64* %ptr0, double* %ptr1) {
+entry:
+  %0 = sitofp i64 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptosi double %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_itod_dtoi
+; V8:          fitod
+; V8:          fdtoi
+
+; V9-LABEL:    test_itod_dtoi
+; V9:          fitod
+; V9:          fdtoi
+
+; SPARC64-LABEL:    test_itod_dtoi
+; SPARC64:          fitod
+; SPARC64:          fdtoi
+
+define void @test_itod_dtoi(i32 %a, i32* %ptr0, double* %ptr1) {
+entry:
+  %0 = sitofp i32 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptosi double %0 to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_uxtos_stoux
+; V8:          call __floatundisf
+; V8:          call __fixunssfdi
+
+; V9-LABEL:    test_uxtos_stoux
+; V9:          call __floatundisf
+; V9:          call __fixunssfdi
+
+; SPARC64-LABEL:   test_uxtos_stoux
+; SPARC64-NOT:     call __floatundisf
+; SPARC64-NOT:     call __fixunssfdi
+
+define void @test_uxtos_stoux(i64 %a, i64* %ptr0, float* %ptr1) {
+entry:
+  %0 = uitofp i64 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptoui float %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_utos_stou
+; V8:          fdtos
+; V8:          fstoi
+
+; V9-LABEL:    test_utos_stou
+; V9:          fdtos
+; V9:          fstoi
+
+; SPARC64-LABEL:    test_utos_stou
+; SPARC64:     fdtos
+; SPARC64:     fstoi
+
+define void @test_utos_stou(i32 %a, i32* %ptr0, float* %ptr1) {
+entry:
+  %0 = uitofp i32 %a to float
+  store float %0, float* %ptr1, align 8
+  %1 = fptoui float %0 to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
+
+
+; V8-LABEL:    test_uxtod_dtoux
+; V8:          call __floatundidf
+; V8:          call __fixunsdfdi
+
+; V9-LABEL:    test_uxtod_dtoux
+; V9:          call __floatundidf
+; V9:          call __fixunsdfdi
+
+; SPARC64-LABEL:    test_uxtod_dtoux
+; SPARC64-NOT:          call __floatundidf
+; SPARC64-NOT:          call __floatunsdfdi
+
+define void @test_uxtod_dtoux(i64 %a, i64* %ptr0, double* %ptr1) {
+entry:
+  %0 = uitofp i64 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptoui double %0 to i64
+  store i64 %1, i64* %ptr0, align 8
+  ret void
+}
+
+; V8-LABEL:    test_utod_dtou
+; V8-NOT:      fitod
+; V8:          fdtoi
+
+; V9-LABEL:    test_utod_dtou
+; V9-NOT:      fitod
+; V9:          fdtoi
+
+; SPARC64-LABEL:    test_utod_dtou
+; SPARC64-NOT:      fitod
+; SPARC64:          fdtoi
+
+define void @test_utod_dtou(i32 %a, double %b, i32* %ptr0, double* %ptr1) {
+entry:
+  %0 = uitofp i32 %a to double
+  store double %0, double* %ptr1, align 8
+  %1 = fptoui double %b to i32
+  store i32 %1, i32* %ptr0, align 8
+  ret void
+}
diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll
new file mode 100644
index 000000000000..c761361e773e
--- /dev/null
+++ b/test/CodeGen/SPARC/fp128.ll
@@ -0,0 +1,234 @@
+; RUN: llc < %s -march=sparc -mattr=hard-quad-float | FileCheck %s --check-prefix=HARD
+; RUN: llc < %s -march=sparc -mattr=-hard-quad-float | FileCheck %s --check-prefix=SOFT
+
+
+; HARD-LABEL: f128_ops
+; HARD:       ldd
+; HARD:       ldd
+; HARD:       ldd
+; HARD:       ldd
+; HARD:       faddq [[R0:.+]],  [[R1:.+]],  [[R2:.+]]
+; HARD:       fsubq [[R2]], [[R3:.+]], [[R4:.+]]
+; HARD:       fmulq [[R4]], [[R5:.+]], [[R6:.+]]
+; HARD:       fdivq [[R6]], [[R2]]
+; HARD:       std
+; HARD:       std
+
+; SOFT-LABEL: f128_ops
+; SOFT:       ldd
+; SOFT:       ldd
+; SOFT:       ldd
+; SOFT:       ldd
+; SOFT:       call _Q_add
+; SOFT:       call _Q_sub
+; SOFT:       call _Q_mul
+; SOFT:       call _Q_div
+; SOFT:       std
+; SOFT:       std
+
+define void @f128_ops(fp128* noalias sret %scalar.result, fp128* byval %a, fp128* byval %b, fp128* byval %c, fp128* byval %d) {
+entry:
+  %0 = load fp128* %a, align 8
+  %1 = load fp128* %b, align 8
+  %2 = load fp128* %c, align 8
+  %3 = load fp128* %d, align 8
+  %4 = fadd fp128 %0, %1
+  %5 = fsub fp128 %4, %2
+  %6 = fmul fp128 %5, %3
+  %7 = fdiv fp128 %6, %4
+  store fp128 %7, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: f128_spill
+; HARD:       std %f{{.+}}, [%[[S0:.+]]]
+; HARD:       std %f{{.+}}, [%[[S1:.+]]]
+; HARD-DAG:   ldd [%[[S0]]], %f{{.+}}
+; HARD-DAG:   ldd [%[[S1]]], %f{{.+}}
+; HARD:       jmp
+
+; SOFT-LABEL: f128_spill
+; SOFT:       std %f{{.+}}, [%[[S0:.+]]]
+; SOFT:       std %f{{.+}}, [%[[S1:.+]]]
+; SOFT-DAG:   ldd [%[[S0]]], %f{{.+}}
+; SOFT-DAG:   ldd [%[[S1]]], %f{{.+}}
+; SOFT:       jmp
+
+define void @f128_spill(fp128* noalias sret %scalar.result, fp128* byval %a) {
+entry:
+  %0 = load fp128* %a, align 8
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
+  store fp128 %0, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: f128_compare
+; HARD:       fcmpq
+; HARD-NEXT:  nop
+
+; SOFT-LABEL: f128_compare
+; SOFT:       _Q_cmp
+
+define i32 @f128_compare(fp128* byval %f0, fp128* byval %f1, i32 %a, i32 %b) {
+entry:
+   %0 = load fp128* %f0, align 8
+   %1 = load fp128* %f1, align 8
+   %cond = fcmp ult fp128 %0, %1
+   %ret = select i1 %cond, i32 %a, i32 %b
+   ret i32 %ret
+}
+
+; HARD-LABEL: f128_compare2
+; HARD:       fcmpq
+; HARD:       fb{{ule|g}}
+
+; SOFT-LABEL: f128_compare2
+; SOFT:       _Q_cmp
+; SOFT:       cmp
+
+define i32 @f128_compare2() {
+entry:
+  %0 = fcmp ogt fp128 undef, 0xL00000000000000000000000000000000
+  br i1 %0, label %"5", label %"7"
+
+"5":                                              ; preds = %entry
+  ret i32 0
+
+"7":                                              ; preds = %entry
+  ret i32 1
+}
+
+
+; HARD-LABEL: f128_abs
+; HARD:       fabss
+
+; SOFT-LABEL: f128_abs
+; SOFT:       fabss
+
+define void @f128_abs(fp128* noalias sret %scalar.result, fp128* byval %a) {
+entry:
+  %0 = load fp128* %a, align 8
+  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
+  store fp128 %1, fp128* %scalar.result, align 8
+  ret void
+}
+
+declare fp128 @llvm.fabs.f128(fp128) nounwind readonly
+
+; HARD-LABEL: int_to_f128
+; HARD:       fitoq
+
+; SOFT-LABEL: int_to_f128
+; SOFT:       _Q_itoq
+
+define void @int_to_f128(fp128* noalias sret %scalar.result, i32 %i) {
+entry:
+  %0 = sitofp i32 %i to fp128
+  store fp128 %0, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: fp128_unaligned
+; HARD:       ldub
+; HARD:       faddq
+; HARD:       stb
+; HARD:       jmp
+
+; SOFT-LABEL: fp128_unaligned
+; SOFT:       ldub
+; SOFT:       call _Q_add
+; SOFT:       stb
+; SOFT:       jmp
+
+define void @fp128_unaligned(fp128* %a, fp128* %b, fp128* %c) {
+entry:
+  %0 = load fp128* %a, align 1
+  %1 = load fp128* %b, align 1
+  %2 = fadd fp128 %0, %1
+  store fp128 %2, fp128* %c, align 1
+  ret void
+}
+
+; HARD-LABEL: uint_to_f128
+; HARD:       fdtoq
+
+; SOFT-LABEL: uint_to_f128
+; SOFT:       _Q_utoq
+
+define void @uint_to_f128(fp128* noalias sret %scalar.result, i32 %i) {
+entry:
+  %0 = uitofp i32 %i to fp128
+  store fp128 %0, fp128* %scalar.result, align 8
+  ret void
+}
+
+; HARD-LABEL: f128_to_i32
+; HARD:       fqtoi
+; HARD:       fqtoi
+
+; SOFT-LABEL: f128_to_i32
+; SOFT:       call _Q_qtou
+; SOFT:       call _Q_qtoi
+
+
+define i32 @f128_to_i32(fp128* %a, fp128* %b) {
+entry:
+  %0 = load fp128* %a, align 8
+  %1 = load fp128* %b, align 8
+  %2 = fptoui fp128 %0 to i32
+  %3 = fptosi fp128 %1 to i32
+  %4 = add i32 %2, %3
+  ret i32 %4
+}
+
+; HARD-LABEL:    test_itoq_qtoi
+; HARD:          call _Q_lltoq
+; HARD:          call _Q_qtoll
+; HARD:          fitoq
+; HARD:          fqtoi
+
+; SOFT-LABEL:    test_itoq_qtoi
+; SOFT:          call _Q_lltoq
+; SOFT:          call _Q_qtoll
+; SOFT:          call _Q_itoq
+; SOFT:          call _Q_qtoi
+
+define void @test_itoq_qtoi(i64 %a, i32 %b, i64* %ptr0, fp128* %ptr1) {
+entry:
+  %0 = sitofp i64 %a to fp128
+  store  fp128 %0, fp128* %ptr1, align 8
+  %1 = fptosi fp128 %0 to i64
+  store  i64 %1, i64* %ptr0, align 8
+  %2 = sitofp i32 %b to fp128
+  store  fp128 %2, fp128* %ptr1, align 8
+  %3 = fptosi fp128 %2 to i32
+  %4 = bitcast i64* %ptr0 to i32*
+  store  i32 %3, i32* %4, align 8
+  ret void
+}
+
+; HARD-LABEL:    test_utoq_qtou
+; HARD-DAG:      call _Q_ulltoq
+; HARD-DAG:      call _Q_qtoull
+; HARD-DAG:      fdtoq
+; HARD-DAG:      fqtoi
+
+; SOFT-LABEL:    test_utoq_qtou
+; SOFT-DAG:      call _Q_ulltoq
+; SOFT-DAG:      call _Q_qtoull
+; SOFT-DAG:      call _Q_utoq
+; SOFT-DAG:      call _Q_qtou
+
+define void @test_utoq_qtou(i64 %a, i32 %b, i64* %ptr0, fp128* %ptr1) {
+entry:
+  %0 = uitofp i64 %a to fp128
+  store  fp128 %0, fp128* %ptr1, align 8
+  %1 = fptoui fp128 %0 to i64
+  store  i64 %1, i64* %ptr0, align 8
+  %2 = uitofp i32 %b to fp128
+  store  fp128 %2, fp128* %ptr1, align 8
+  %3 = fptoui fp128 %2 to i32
+  %4 = bitcast i64* %ptr0 to i32*
+  store  i32 %3, i32* %4, align 8
+  ret void
+}
diff --git a/test/CodeGen/SPARC/globals.ll b/test/CodeGen/SPARC/globals.ll
index 8d8de58f7ccf..7e3effe3f4ce 100644
--- a/test/CodeGen/SPARC/globals.ll
+++ b/test/CodeGen/SPARC/globals.ll
@@ -14,15 +14,17 @@ define zeroext i8 @loadG() {
 
 ; abs32: loadG
 ; abs32: sethi %hi(G), %[[R:[gilo][0-7]]]
-; abs32: ldub [%[[R]]+%lo(G)], %i0
-; abs32: jmp %i7+8
+; abs32: jmp %o7+8
+; abs32: ldub [%[[R]]+%lo(G)], %o0
+
 
 ; abs44: loadG
 ; abs44: sethi %h44(G), %[[R1:[gilo][0-7]]]
 ; abs44: add %[[R1]], %m44(G), %[[R2:[gilo][0-7]]]
 ; abs44: sllx %[[R2]], 12, %[[R3:[gilo][0-7]]]
-; abs44: ldub [%[[R3]]+%l44(G)], %i0
-; abs44: jmp %i7+8
+; abs44: jmp %o7+8
+; abs44: ldub [%[[R3]]+%l44(G)], %o0
+
 
 ; abs64: loadG
 ; abs64: sethi %hi(G), %[[R1:[gilo][0-7]]]
@@ -30,8 +32,9 @@ define zeroext i8 @loadG() {
 ; abs64: sethi %hh(G), %[[R3:[gilo][0-7]]]
 ; abs64: add %[[R3]], %hm(G), %[[R4:[gilo][0-7]]]
 ; abs64: sllx %[[R4]], 32, %[[R5:[gilo][0-7]]]
-; abs64: ldub [%[[R5]]+%[[R2]]], %i0
-; abs64: jmp %i7+8
+; abs64: jmp %o7+8
+; abs64: ldub [%[[R5]]+%[[R2]]], %o0
+
 
 ; v8pic32: loadG
 ; v8pic32: _GLOBAL_OFFSET_TABLE_
@@ -40,6 +43,8 @@ define zeroext i8 @loadG() {
 ; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
 ; v8pic32: ldub [%[[Gaddr]]], %i0
 ; v8pic32: jmp %i7+8
+; v8pic32: restore
+
 
 ; v9pic32: loadG
 ; v9pic32: _GLOBAL_OFFSET_TABLE_
@@ -48,3 +53,5 @@ define zeroext i8 @loadG() {
 ; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
 ; v9pic32: ldub [%[[Gaddr]]], %i0
 ; v9pic32: jmp %i7+8
+; v9pic32: restore
+
diff --git a/test/CodeGen/SPARC/leafproc.ll b/test/CodeGen/SPARC/leafproc.ll
new file mode 100644
index 000000000000..0a7ae083d208
--- /dev/null
+++ b/test/CodeGen/SPARC/leafproc.ll
@@ -0,0 +1,80 @@
+; RUN: llc -march=sparc -disable-sparc-leaf-proc=0 < %s | FileCheck %s
+
+; CHECK-LABEL:      func_nobody:
+; CHECK:      jmp %o7+8
+; CHECK-NEXT: nop
+define void @func_nobody() {
+entry:
+  ret void
+}
+
+
+; CHECK-LABEL:      return_int_const:
+; CHECK:      jmp %o7+8
+; CHECK-NEXT: or %g0, 1729, %o0
+define i32 @return_int_const() {
+entry:
+  ret i32 1729
+}
+
+; CHECK-LABEL:      return_double_const:
+; CHECK:      sethi
+; CHECK:      jmp %o7+8
+; CHECK-NEXT: ldd {{.*}}, %f0
+
+define double @return_double_const() {
+entry:
+  ret double 0.000000e+00
+}
+
+; CHECK-LABEL:      leaf_proc_with_args:
+; CHECK:      add {{%o[0-1]}}, {{%o[0-1]}}, [[R:%[go][0-7]]]
+; CHECK:      jmp %o7+8
+; CHECK-NEXT: add [[R]], %o2, %o0
+
+define i32 @leaf_proc_with_args(i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = add nsw i32 %b, %a
+  %1 = add nsw i32 %0, %c
+  ret i32 %1
+}
+
+; CHECK-LABEL:     leaf_proc_with_args_in_stack:
+; CHECK-DAG: ld [%sp+92], {{%[go][0-7]}}
+; CHECK-DAG: ld [%sp+96], {{%[go][0-7]}}
+; CHECK:     jmp %o7+8
+; CHECK-NEXT: add {{.*}}, %o0
+define i32 @leaf_proc_with_args_in_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
+entry:
+  %0 = add nsw i32 %b, %a
+  %1 = add nsw i32 %0, %c
+  %2 = add nsw i32 %1, %d
+  %3 = add nsw i32 %2, %e
+  %4 = add nsw i32 %3, %f
+  %5 = add nsw i32 %4, %g
+  %6 = add nsw i32 %5, %h
+  ret i32 %6
+}
+
+; CHECK-LABEL:      leaf_proc_with_local_array:
+; CHECK:      add %sp, -104, %sp
+; CHECK:      or %g0, 1, [[R1:%[go][0-7]]]
+; CHECK:      st [[R1]], [%sp+96]
+; CHECK:      or %g0, 2, [[R2:%[go][0-7]]]
+; CHECK:      st [[R2]], [%sp+100]
+; CHECK:      ld {{.+}}, %o0
+; CHECK:      jmp %o7+8
+; CHECK-NEXT: add %sp, 104, %sp
+
+define i32 @leaf_proc_with_local_array(i32 %a, i32 %b, i32 %c) {
+entry:
+  %array = alloca [2 x i32], align 4
+  %0 = sub nsw i32 %b, %c
+  %1 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 0
+  store i32 1, i32* %1, align 4
+  %2 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 1
+  store i32 2, i32* %2, align 4
+  %3 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 %a
+  %4 = load i32* %3, align 4
+  ret i32 %4
+}
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
index 6f30a8797967..4d344fa91a9e 100644
--- a/test/CodeGen/SPARC/lit.local.cfg
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/SPARC/rem.ll b/test/CodeGen/SPARC/rem.ll
new file mode 100644
index 000000000000..abef1fc112b4
--- /dev/null
+++ b/test/CodeGen/SPARC/rem.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+
+; CHECK-LABEL: test1:
+; CHECK:        sdivx %o0, %o1, %o2
+; CHECK-NEXT:   mulx %o2, %o1, %o1
+; CHECK-NEXT:   jmp %o7+8
+; CHECK-NEXT:   sub %o0, %o1, %o0
+
+define i64 @test1(i64 %X, i64 %Y) {
+        %tmp1 = srem i64 %X, %Y
+        ret i64 %tmp1
+}
+
+; CHECK-LABEL: test2:
+; CHECK:        udivx %o0, %o1, %o2
+; CHECK-NEXT:   mulx %o2, %o1, %o1
+; CHECK-NEXT:   jmp %o7+8
+; CHECK-NEXT:   sub %o0, %o1, %o0
+
+define i64 @test2(i64 %X, i64 %Y) {
+        %tmp1 = urem i64 %X, %Y
+        ret i64 %tmp1
+}
+
+; PR18150
+; CHECK-LABEL: test3
+; CHECK:       sethi 2545, [[R0:%[gilo][0-7]]]
+; CHECK:       or    [[R0]], 379, [[R1:%[gilo][0-7]]]
+; CHECK:       mulx  %o0, [[R1]], [[R2:%[gilo][0-7]]]
+; CHECK:       udivx [[R2]], 1021, [[R3:%[gilo][0-7]]]
+; CHECK:       mulx  [[R3]], 1021, [[R4:%[gilo][0-7]]]
+; CHECK:       sub   [[R2]], [[R4]], %o0
+
+define i64 @test3(i64 %b) {
+entry:
+  %mul = mul i64 %b, 2606459
+  %rem = urem i64 %mul, 1021
+  ret i64 %rem
+}
diff --git a/test/CodeGen/SPARC/setjmp.ll b/test/CodeGen/SPARC/setjmp.ll
new file mode 100644
index 000000000000..39984fb14bcb
--- /dev/null
+++ b/test/CodeGen/SPARC/setjmp.ll
@@ -0,0 +1,72 @@
+;RUN: llc -march=sparc   < %s | FileCheck %s
+;RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefix=V9
+
+
+%0 = type { [32 x i32] }
+%struct.jmpbuf_env = type { i32, i32, [1 x %struct.__jmp_buf_tag], i32 }
+%struct.__jmp_buf_tag = type { [3 x i32], i32, %0 }
+
+@jenv = common unnamed_addr global %struct.jmpbuf_env* null
+@.cst = linker_private unnamed_addr constant [30 x i8] c"in bar with jmp_buf's id: %d\0A\00", align 64
+
+; CHECK-LABEL: foo
+; CHECK-DAG:   st {{.+}}, [%i0]
+; CHECK-DAG:   st {{.+}}, [%i0+4]
+; CHECK:       call _setjmp
+; CHECK:       ld [%fp+{{.+}}], %[[R:[gilo][0-7]]]
+; CHECK:       st %o0, [%[[R]]+{{.+}}]
+
+; V9-LABEL:   foo
+; V9-DAG:     st {{.+}}, [%i0]
+; V9-DAG:     st {{.+}}, [%i0+4]
+; V9:         call _setjmp
+; V9:         ldx [%fp+{{.+}}], %[[R:[gilo][0-7]]]
+; V9:         st %o0, [%[[R]]+{{.+}}]
+
+; Function Attrs: nounwind
+define i32 @foo(%struct.jmpbuf_env* byval %inbuf) #0 {
+entry:
+  %0 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 0
+  store i32 0, i32* %0, align 4, !tbaa !4
+  %1 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 1
+  store i32 1, i32* %1, align 4, !tbaa !4
+  %2 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 2, i32 0
+  %3 = call i32 @_setjmp(%struct.__jmp_buf_tag* %2) #2
+  %4 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 3
+  store i32 %3, i32* %4, align 4, !tbaa !4
+  store %struct.jmpbuf_env* %inbuf, %struct.jmpbuf_env** @jenv, align 4, !tbaa !3
+  %5 = load i32* %1, align 4, !tbaa !4
+  %6 = icmp eq i32 %5, 1
+  %7 = icmp eq i32 %3, 0
+  %or.cond = and i1 %6, %7
+  br i1 %or.cond, label %"4.i", label %bar.exit
+
+"4.i":                                            ; preds = %entry
+  call void @longjmp(%struct.__jmp_buf_tag* %2, i32 0) #1
+  unreachable
+
+bar.exit:                                         ; preds = %entry
+  %8 = load i32* %0, align 4, !tbaa !4
+  %9 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([30 x i8]* @.cst, i32 0, i32 0), i32 %8) #0
+  ret i32 0
+}
+
+; Function Attrs: nounwind returns_twice
+declare i32 @_setjmp(%struct.__jmp_buf_tag*) #2
+
+; Function Attrs: noreturn nounwind
+declare void @longjmp(%struct.__jmp_buf_tag*, i32) #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture, ...) #0
+
+
+attributes #0 = { nounwind }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind returns_twice }
+
+!0 = metadata !{metadata !"alias set 6: struct.jmpbuf_env*", metadata !1}
+!1 = metadata !{metadata !1}
+!2 = metadata !{metadata !"alias set 3: int", metadata !1}
+!3 = metadata !{metadata !0, metadata !0, i64 0}
+!4 = metadata !{metadata !2, metadata !2, i64 0}
diff --git a/test/CodeGen/SPARC/tls.ll b/test/CodeGen/SPARC/tls.ll
new file mode 100644
index 000000000000..660ddff0fae9
--- /dev/null
+++ b/test/CodeGen/SPARC/tls.ll
@@ -0,0 +1,73 @@
+; RUN: llc <%s -march=sparc   -relocation-model=static | FileCheck %s --check-prefix=v8abs
+; RUN: llc <%s -march=sparcv9 -relocation-model=static | FileCheck %s --check-prefix=v9abs
+; RUN: llc <%s -march=sparc   -relocation-model=pic    | FileCheck %s --check-prefix=pic
+; RUN: llc <%s -march=sparcv9 -relocation-model=pic    | FileCheck %s --check-prefix=pic
+
+
+@local_symbol = internal thread_local global i32 0
+@extern_symbol = external thread_local global i32
+
+; v8abs-LABEL:  test_tls_local
+; v8abs:        sethi  %tle_hix22(local_symbol), [[R0:%[goli][0-7]]]
+; v8abs:        xor    [[R0]], %tle_lox10(local_symbol), [[R1:%[goli][0-7]]]
+; v8abs:        ld     [%g7+[[R1]]]
+
+; v9abs-LABEL:  test_tls_local
+; v9abs:        sethi  %tle_hix22(local_symbol), [[R0:%[goli][0-7]]]
+; v9abs:        xor    [[R0]], %tle_lox10(local_symbol), [[R1:%[goli][0-7]]]
+; v9abs:        ld     [%g7+[[R1]]]
+
+; pic-LABEL:  test_tls_local
+; pic:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; pic:        add    [[PC]], %o7, [[GOTBASE:%[goli][0-7]]]
+; pic-DAG:    sethi  %tldm_hi22(local_symbol), [[R0:%[goli][0-7]]]
+; pic-DAG:    add    [[R0]], %tldm_lo10(local_symbol), [[R1:%[goli][0-7]]]
+; pic-DAG:    add    [[GOTBASE]], [[R1]], %o0, %tldm_add(local_symbol)
+; pic-DAG:    call   __tls_get_addr, %tldm_call(local_symbol)
+; pic-DAG:    sethi  %tldo_hix22(local_symbol), [[R2:%[goli][0-7]]]
+; pic-DAG:    xor    [[R2]], %tldo_lox10(local_symbol), [[R3:%[goli][0-7]]]
+; pic:        add    %o0, [[R3]], {{.+}}, %tldo_add(local_symbol)
+
+define i32 @test_tls_local() {
+entry:
+  %0 = load i32* @local_symbol, align 4
+  %1 = add i32 %0, 1
+  store i32 %1, i32* @local_symbol, align 4
+  ret i32 %1
+}
+
+
+; v8abs-LABEL:  test_tls_extern
+; v8abs:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; v8abs:        add    [[PC]], %o7, %[[GOTBASE:[goli][0-7]]]
+; v8abs:        sethi  %tie_hi22(extern_symbol), [[R1:%[goli][0-7]]]
+; v8abs:        add    [[R1]], %tie_lo10(extern_symbol), %[[R2:[goli][0-7]]]
+; v8abs:        ld     [%[[GOTBASE]]+%[[R2]]], [[R3:%[goli][0-7]]], %tie_ld(extern_symbol)
+; v8abs:        add    %g7, [[R3]], %[[R4:[goli][0-7]]], %tie_add(extern_symbol)
+; v8abs:        ld     [%[[R4]]]
+
+; v9abs-LABEL:  test_tls_extern
+; v9abs:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; v9abs:        add    [[PC]], %o7, %[[GOTBASE:[goli][0-7]]]
+; v9abs:        sethi  %tie_hi22(extern_symbol), [[R1:%[goli][0-7]]]
+; v9abs:        add    [[R1]], %tie_lo10(extern_symbol), %[[R2:[goli][0-7]]]
+; v9abs:        ldx    [%[[GOTBASE]]+%[[R2]]], [[R3:%[goli][0-7]]], %tie_ldx(extern_symbol)
+; v9abs:        add    %g7, [[R3]], %[[R4:[goli][0-7]]], %tie_add(extern_symbol)
+; v9abs:        ld     [%[[R4]]]
+
+; pic-LABEL:  test_tls_extern
+; pic:        or     {{%[goli][0-7]}}, %lo(_GLOBAL_OFFSET_TABLE_+{{.+}}), [[PC:%[goli][0-7]]]
+; pic:        add    [[PC]], %o7, [[GOTBASE:%[goli][0-7]]]
+; pic:        sethi  %tgd_hi22(extern_symbol), [[R0:%[goli][0-7]]]
+; pic:        add    [[R0]], %tgd_lo10(extern_symbol), [[R1:%[goli][0-7]]]
+; pic:        add    [[GOTBASE]], [[R1]], %o0, %tgd_add(extern_symbol)
+; pic:        call   __tls_get_addr, %tgd_call(extern_symbol)
+; pic-NEXT:   nop
+
+define i32 @test_tls_extern() {
+entry:
+  %0 = load i32* @extern_symbol, align 4
+  %1 = add i32 %0, 1
+  store i32 %1, i32* @extern_symbol, align 4
+  ret i32 %1
+}
diff --git a/test/CodeGen/SPARC/varargs.ll b/test/CodeGen/SPARC/varargs.ll
index b13f90e6ca71..76e16cd44f6a 100644
--- a/test/CodeGen/SPARC/varargs.ll
+++ b/test/CodeGen/SPARC/varargs.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -disable-block-placement | FileCheck %s
+; RUN: llc < %s -disable-block-placement -disable-sparc-leaf-proc=0 | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32:64-S128"
 target triple = "sparcv9-sun-solaris"
 
diff --git a/test/CodeGen/SystemZ/Large/branch-range-01.py b/test/CodeGen/SystemZ/Large/branch-range-01.py
new file mode 100644
index 000000000000..552c9ca0ea85
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-01.py
@@ -0,0 +1,105 @@
+# Test normal conditional branches in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffd8 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 8 bytes if it uses a short branch
+# and 10 if it uses a long one.  The ones before "main:" have to take the branch
+# length into account -- which is 4 bytes for short branches -- so the final
+# (0x28 - 4) / 8 == 4 blocks can use short branches.  The ones after "main:"
+# do not, so the first 0x28 / 8 == 5 can use short branches.  However,
+# the conservative algorithm we use makes one branch unnecessarily long
+# on each side.
+#
+# CHECK: c %r4, 0(%r3)
+# CHECK: jge [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 4(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 8(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 12(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 16(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 20(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 24(%r3)
+# CHECK: j{{g?}}e [[LABEL]]
+# CHECK: c %r4, 28(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 32(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 36(%r3)
+# CHECK: je [[LABEL]]
+# ...main goes here...
+# CHECK: c %r4, 100(%r3)
+# CHECK: je [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 104(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 108(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 112(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 116(%r3)
+# CHECK: j{{g?}}e [[LABEL]]
+# CHECK: c %r4, 120(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 124(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 128(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 132(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 136(%r3)
+# CHECK: jge [[LABEL]]
+
+branch_blocks = 10
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
+    print '  %%bcur%d = load volatile i32 *%%bstop%d' % (i, i)
+    print '  %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25)
+    print '  %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+    print '  %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-02.py b/test/CodeGen/SystemZ/Large/branch-range-02.py
new file mode 100644
index 000000000000..0b21ced99a1f
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-02.py
@@ -0,0 +1,82 @@
+# Test normal conditional branches in cases where block alignments cause
+# some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu -align-all-blocks=8 | FileCheck %s
+
+# Construct:
+#
+# b0:
+#   conditional branch to end
+#   ...
+# b<N>:
+#   conditional branch to end
+# b<N+1>:
+#   conditional branch to b0
+#   ...
+# b<2*N>:
+#   conditional branch to b0
+# end:
+#
+# with N == 256 + 4.  The -align-all-blocks=8 option ensures that all blocks
+# are 256 bytes in size.  The first 4 blocks and the last 4 blocks are then
+# out of range.
+#
+# CHECK: c %r4, 0(%r3)
+# CHECK: jge [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 4(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 8(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 12(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 16(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 20(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 24(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 28(%r3)
+# CHECK: je [[LABEL]]
+# ...lots of other blocks...
+# CHECK: c %r4, 1004(%r3)
+# CHECK: je [[LABEL:\.L[^ ]*]]
+# CHECK: c %r4, 1008(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1012(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1016(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1020(%r3)
+# CHECK: je [[LABEL]]
+# CHECK: c %r4, 1024(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 1028(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 1032(%r3)
+# CHECK: jge [[LABEL]]
+# CHECK: c %r4, 1036(%r3)
+# CHECK: jge [[LABEL]]
+
+blocks = 256 + 4
+
+print 'define void @f1(i8 *%base, i32 *%stop, i32 %limit) {'
+print 'entry:'
+print '  br label %b0'
+print ''
+
+a, b = 1, 1
+for i in xrange(blocks):
+    a, b = b, a + b
+    value = a % 256
+    next = 'b%d' % (i + 1) if i + 1 < blocks else 'end'
+    other = 'end' if 2 * i < blocks else 'b0'
+    print 'b%d:' % i
+    print '  store volatile i8 %d, i8 *%%base' % value
+    print '  %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
+    print '  %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+    print '  %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
+    print '  br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next)
+
+print ''
+print '%s:' % next
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-03.py b/test/CodeGen/SystemZ/Large/branch-range-03.py
new file mode 100644
index 000000000000..75cdf247c6f3
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-03.py
@@ -0,0 +1,107 @@
+# Test 32-bit COMPARE AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 14 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.
+#
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 1(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 2(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 3(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 4(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 5(%r3)
+# CHECK: crje %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 6(%r3)
+# CHECK: crje %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 7(%r3)
+# CHECK: crje %r4, [[REG]], [[LABEL]]
+# ...main goes here...
+# CHECK: lb [[REG:%r[0-5]]], 25(%r3)
+# CHECK: crje %r4, [[REG]], [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 26(%r3)
+# CHECK: crje %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 27(%r3)
+# CHECK: crje %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 28(%r3)
+# CHECK: crje %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 29(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 30(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 31(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 32(%r3)
+# CHECK: cr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop, i32 %limit) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
+    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
+    print '  %%btest%d = icmp eq i32 %%limit, %%bext%d' % (i, i)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
+    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%aext%d = sext i8 %%acur%d to i32' % (i, i)
+    print '  %%atest%d = icmp eq i32 %%limit, %%aext%d' % (i, i)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-04.py b/test/CodeGen/SystemZ/Large/branch-range-04.py
new file mode 100644
index 000000000000..3ae3ae9c37f7
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-04.py
@@ -0,0 +1,111 @@
+# Test 64-bit COMPARE AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 16 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 1(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 2(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 3(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 4(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# ...as mentioned above, the next one could be a CGRJE instead...
+# CHECK: lgb [[REG:%r[0-5]]], 5(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 6(%r3)
+# CHECK: cgrje %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 7(%r3)
+# CHECK: cgrje %r4, [[REG]], [[LABEL]]
+# ...main goes here...
+# CHECK: lgb [[REG:%r[0-5]]], 25(%r3)
+# CHECK: cgrje %r4, [[REG]], [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 26(%r3)
+# CHECK: cgrje %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 27(%r3)
+# CHECK: cgrje %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 28(%r3)
+# CHECK: cgrje %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 29(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 30(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 31(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 32(%r3)
+# CHECK: cgr %r4, [[REG]]
+# CHECK: jge [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop, i64 %limit) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
+    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
+    print '  %%btest%d = icmp eq i64 %%limit, %%bext%d' % (i, i)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
+    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%aext%d = sext i8 %%acur%d to i64' % (i, i)
+    print '  %%atest%d = icmp eq i64 %%limit, %%aext%d' % (i, i)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-05.py b/test/CodeGen/SystemZ/Large/branch-range-05.py
new file mode 100644
index 000000000000..6928b8fc21d6
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-05.py
@@ -0,0 +1,109 @@
+# Test 32-bit COMPARE IMMEDIATE AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 16 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 50
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 51
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 52
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 53
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 54
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CIJL instead...
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 55
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cijl [[REG]], 56, [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cijl [[REG]], 57, [[LABEL]]
+# ...main goes here...
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cijl [[REG]], 100, [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cijl [[REG]], 101, [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cijl [[REG]], 102, [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cijl [[REG]], 103, [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 104
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 105
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 106
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: chi [[REG]], 107
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bcur%d = load volatile i8 *%%stop' % i
+    print '  %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
+    print '  %%btest%d = icmp slt i32 %%bext%d, %d' % (i, i, i + 50)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%acur%d = load volatile i8 *%%stop' % i
+    print '  %%aext%d = sext i8 %%acur%d to i32' % (i, i)
+    print '  %%atest%d = icmp slt i32 %%aext%d, %d' % (i, i, i + 100)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-06.py b/test/CodeGen/SystemZ/Large/branch-range-06.py
new file mode 100644
index 000000000000..aabc72fa6ec8
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-06.py
@@ -0,0 +1,109 @@
+# Test 64-bit COMPARE IMMEDIATE AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 16 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 50
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 51
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 52
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 53
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 54
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CGIJL instead...
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 55
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cgijl [[REG]], 56, [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cgijl [[REG]], 57, [[LABEL]]
+# ...main goes here...
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cgijl [[REG]], 100, [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cgijl [[REG]], 101, [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cgijl [[REG]], 102, [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cgijl [[REG]], 103, [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 104
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 105
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 106
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: cghi [[REG]], 107
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bcur%d = load volatile i8 *%%stop' % i
+    print '  %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
+    print '  %%btest%d = icmp slt i64 %%bext%d, %d' % (i, i, i + 50)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%acur%d = load volatile i8 *%%stop' % i
+    print '  %%aext%d = sext i8 %%acur%d to i64' % (i, i)
+    print '  %%atest%d = icmp slt i64 %%aext%d, %d' % (i, i, i + 100)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-07.py b/test/CodeGen/SystemZ/Large/branch-range-07.py
new file mode 100644
index 000000000000..90c442092e82
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-07.py
@@ -0,0 +1,68 @@
+# Test 32-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
+# of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# loopN:
+#   load of countN
+#   ...
+# loop0:
+#   0xffd8 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   decrement of countN
+#   conditional branch to loopN
+# afterN:
+#
+# Each load occupies 4 bytes.  Each decrement and branch occupies 4
+# bytes if BRCT can be used, otherwise it occupies 10 bytes (AHI + BRCL).
+# This means that loop 6 contains 5 * 4 + 0xffd8 + 5 * 4 == 0x10000 bytes
+# and is therefore (just) in range.  Loop 7 is out of range.
+#
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: ahi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: ahi {{%r[0-9]+}}, -1
+# CHECK: jglh
+
+branch_blocks = 8
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i32 *%counts) {'
+print 'entry:'
+
+for i in xrange(branch_blocks - 1, -1, -1):
+    print '  %%countptr%d = getelementptr i32 *%%counts, i64 %d' % (i, i)
+    print '  %%initcount%d = load i32 *%%countptr%d' % (i, i)
+    print '  br label %%loop%d' % i
+    
+    print 'loop%d:' % i
+    block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
+    block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
+    print ('  %%count%d = phi i32 [ %%initcount%d, %%%s ],'
+           ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
+
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%nextcount%d = add i32 %%count%d, -1' % (i, i)
+    print '  %%test%d = icmp ne i32 %%nextcount%d, 0' % (i, i)
+    print '  br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-08.py b/test/CodeGen/SystemZ/Large/branch-range-08.py
new file mode 100644
index 000000000000..ac1b1370a3e3
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-08.py
@@ -0,0 +1,69 @@
+# Test 64-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
+# of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# loopN:
+#   load of countN
+#   ...
+# loop0:
+#   0xffd8 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   decrement of countN
+#   conditional branch to loopN
+# afterN:
+#
+# Each load occupies 6 bytes.  Each decrement and branch occupies 4
+# bytes if BRCTG can be used, otherwise it occupies 10 bytes (AGHI + BRCL).
+# This means that loop 5 contains 4 * 6 + 0xffd8 + 4 * 4 == 0x10000 bytes
+# and is therefore (just) in range.  Loop 6 is out of range.
+#
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+
+branch_blocks = 8
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i64 *%counts) {'
+print 'entry:'
+
+for i in xrange(branch_blocks - 1, -1, -1):
+    print '  %%countptr%d = getelementptr i64 *%%counts, i64 %d' % (i, i)
+    print '  %%initcount%d = load i64 *%%countptr%d' % (i, i)
+    print '  br label %%loop%d' % i
+    
+    print 'loop%d:' % i
+    block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
+    block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
+    print ('  %%count%d = phi i64 [ %%initcount%d, %%%s ],'
+           ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
+
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%nextcount%d = add i64 %%count%d, -1' % (i, i)
+    print '  %%test%d = icmp ne i64 %%nextcount%d, 0' % (i, i)
+    print '  br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-09.py b/test/CodeGen/SystemZ/Large/branch-range-09.py
new file mode 100644
index 000000000000..b3fd81324dab
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-09.py
@@ -0,0 +1,107 @@
+# Test 32-bit COMPARE LOGICAL AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 14 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.
+#
+# CHECK: lb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 1(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 2(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 3(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 4(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 5(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 6(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 7(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# ...main goes here...
+# CHECK: lb [[REG:%r[0-5]]], 25(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL:\.L[^ ]*]]
+# CHECK: lb [[REG:%r[0-5]]], 26(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 27(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 28(%r3)
+# CHECK: clrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 29(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 30(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 31(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lb [[REG:%r[0-5]]], 32(%r3)
+# CHECK: clr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop, i32 %limit) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
+    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
+    print '  %%btest%d = icmp ult i32 %%limit, %%bext%d' % (i, i)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
+    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%aext%d = sext i8 %%acur%d to i32' % (i, i)
+    print '  %%atest%d = icmp ult i32 %%limit, %%aext%d' % (i, i)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-10.py b/test/CodeGen/SystemZ/Large/branch-range-10.py
new file mode 100644
index 000000000000..3aeea3ebccdf
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-10.py
@@ -0,0 +1,111 @@
+# Test 64-bit COMPARE LOGICAL AND BRANCH in cases where the sheer number of
+# instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffcc bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 12 bytes if it uses a short
+# branch and 16 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x34 - 6) / 12 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x34 / 12 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: lgb [[REG:%r[0-5]]], 0(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 1(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 2(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 3(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 4(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CLGRJL instead...
+# CHECK: lgb [[REG:%r[0-5]]], 5(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 6(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 7(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# ...main goes here...
+# CHECK: lgb [[REG:%r[0-5]]], 25(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL:\.L[^ ]*]]
+# CHECK: lgb [[REG:%r[0-5]]], 26(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 27(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 28(%r3)
+# CHECK: clgrjl %r4, [[REG]], [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 29(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 30(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 31(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+# CHECK: lgb [[REG:%r[0-5]]], 32(%r3)
+# CHECK: clgr %r4, [[REG]]
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffcc
+
+print 'define void @f1(i8 *%base, i8 *%stop, i64 %limit) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
+    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
+    print '  %%btest%d = icmp ult i64 %%limit, %%bext%d' % (i, i)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
+    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%aext%d = sext i8 %%acur%d to i64' % (i, i)
+    print '  %%atest%d = icmp ult i64 %%limit, %%aext%d' % (i, i)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-11.py b/test/CodeGen/SystemZ/Large/branch-range-11.py
new file mode 100644
index 000000000000..034902c4a342
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-11.py
@@ -0,0 +1,127 @@
+# Test 32-bit COMPARE LOGICAL IMMEDIATE AND BRANCH in cases where the sheer
+# number of instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffc6 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 14 bytes if it uses a short
+# branch and 20 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x3a - 6) / 14 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x3a / 14 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 50
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 51
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 52
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 53
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 54
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CLIJL instead...
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 55
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 56, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 57, [[LABEL]]
+# ...main goes here...
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 100, [[LABEL:\.L[^ ]*]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 101, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 102, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clijl [[REG]], 103, [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 104
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 105
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 106
+# CHECK: jgl [[LABEL]]
+# CHECK: l [[REG:%r[0-5]]], 0(%r3)
+# CHECK: s [[REG]], 0(%r4)
+# CHECK: clfi [[REG]], 107
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffc6
+
+print 'define void @f1(i8 *%base, i32 *%stopa, i32 *%stopb) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bcur%da = load volatile i32 *%%stopa' % i
+    print '  %%bcur%db = load volatile i32 *%%stopb' % i
+    print '  %%bsub%d = sub i32 %%bcur%da, %%bcur%db' % (i, i, i)
+    print '  %%btest%d = icmp ult i32 %%bsub%d, %d' % (i, i, i + 50)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%acur%da = load volatile i32 *%%stopa' % i
+    print '  %%acur%db = load volatile i32 *%%stopb' % i
+    print '  %%asub%d = sub i32 %%acur%da, %%acur%db' % (i, i, i)
+    print '  %%atest%d = icmp ult i32 %%asub%d, %d' % (i, i, i + 100)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-12.py b/test/CodeGen/SystemZ/Large/branch-range-12.py
new file mode 100644
index 000000000000..007d477e2140
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-12.py
@@ -0,0 +1,127 @@
+# Test 64-bit COMPARE LOGICAL IMMEDIATE AND BRANCH in cases where the sheer
+# number of instructions causes some branches to be out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# before0:
+#   conditional branch to after0
+#   ...
+# beforeN:
+#   conditional branch to after0
+# main:
+#   0xffb4 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   conditional branch to main
+# afterN:
+#
+# Each conditional branch sequence occupies 18 bytes if it uses a short
+# branch and 24 if it uses a long one.  The ones before "main:" have to
+# take the branch length into account, which is 6 for short branches,
+# so the final (0x4c - 6) / 18 == 3 blocks can use short branches.
+# The ones after "main:" do not, so the first 0x4c / 18 == 4 blocks
+# can use short branches.  The conservative algorithm we use makes
+# one of the forward branches unnecessarily long, as noted in the
+# check output below.
+#
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 50
+# CHECK: jgl [[LABEL:\.L[^ ]*]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 51
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 52
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 53
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 54
+# CHECK: jgl [[LABEL]]
+# ...as mentioned above, the next one could be a CLGIJL instead...
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 55
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 56, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 57, [[LABEL]]
+# ...main goes here...
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 100, [[LABEL:\.L[^ ]*]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 101, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 102, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgijl [[REG]], 103, [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 104
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 105
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 106
+# CHECK: jgl [[LABEL]]
+# CHECK: lg [[REG:%r[0-5]]], 0(%r3)
+# CHECK: sg [[REG]], 0(%r4)
+# CHECK: clgfi [[REG]], 107
+# CHECK: jgl [[LABEL]]
+
+branch_blocks = 8
+main_size = 0xffb4
+
+print 'define void @f1(i8 *%base, i64 *%stopa, i64 *%stopb) {'
+print 'entry:'
+print '  br label %before0'
+print ''
+
+for i in xrange(branch_blocks):
+    next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
+    print 'before%d:' % i
+    print '  %%bcur%da = load volatile i64 *%%stopa' % i
+    print '  %%bcur%db = load volatile i64 *%%stopb' % i
+    print '  %%bsub%d = sub i64 %%bcur%da, %%bcur%db' % (i, i, i)
+    print '  %%btest%d = icmp ult i64 %%bsub%d, %d' % (i, i, i + 50)
+    print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
+    print ''
+
+print '%s:' % next
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%acur%da = load volatile i64 *%%stopa' % i
+    print '  %%acur%db = load volatile i64 *%%stopb' % i
+    print '  %%asub%d = sub i64 %%acur%da, %%acur%db' % (i, i, i)
+    print '  %%atest%d = icmp ult i64 %%asub%d, %d' % (i, i, i + 100)
+    print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/lit.local.cfg b/test/CodeGen/SystemZ/Large/lit.local.cfg
new file mode 100644
index 000000000000..9a02f849c347
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/lit.local.cfg
@@ -0,0 +1,10 @@
+config.suffixes = ['.py']
+
+# These tests take on the order of seconds to run, so skip them unless
+# running natively.
+if config.root.host_arch not in ['SystemZ']:
+    config.unsupported = True
+
+targets = set(config.root.targets_to_build.split())
+if not 'SystemZ' in targets:
+    config.unsupported = True
diff --git a/test/CodeGen/SystemZ/Large/spill-01.py b/test/CodeGen/SystemZ/Large/spill-01.py
new file mode 100644
index 000000000000..3c1d0b611bb4
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/spill-01.py
@@ -0,0 +1,40 @@
+# Test cases where MVC is used for spill slots that end up being out of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# There are 8 usable call-saved GPRs, two of which are needed for the base
+# registers.  The first 160 bytes of the frame are needed for the ABI
+# call frame, and a further 8 bytes are needed for the emergency spill slot.
+# That means we will have at least one out-of-range slot if:
+#
+#    count == (4096 - 168) / 8 + 6 + 1 == 498
+#
+# Add in some extra room and check both %r15+4096 (the first out-of-range slot)
+# and %r15+4104.
+#
+# CHECK: f1:
+# CHECK: lay [[REG:%r[0-5]]], 4096(%r15)
+# CHECK: mvc 0(8,[[REG]]), {{[0-9]+}}({{%r[0-9]+}})
+# CHECK: brasl %r14, foo@PLT
+# CHECK: lay [[REG:%r[0-5]]], 4096(%r15)
+# CHECK: mvc {{[0-9]+}}(8,{{%r[0-9]+}}), 8([[REG]])
+# CHECK: br %r14
+count = 500
+
+print 'declare void @foo()'
+print ''
+print 'define void @f1(i64 *%base0, i64 *%base1) {'
+
+for i in range(count):
+    print '  %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
+    print '  %%val%d = load i64 *%%ptr%d' % (i, i)
+    print ''
+
+print '  call void @foo()'
+print ''
+
+for i in range(count):
+    print '  store i64 %%val%d, i64 *%%ptr%d' % (i, i)
+
+print ''
+print '  ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/spill-02.py b/test/CodeGen/SystemZ/Large/spill-02.py
new file mode 100644
index 000000000000..0aa43d18054b
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/spill-02.py
@@ -0,0 +1,73 @@
+# Test cases where we spill from one frame index to another, both of which
+# are out of range of MVC, and both of which need emergency spill slots.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# CHECK: f1:
+# CHECK: %fallthru
+# CHECK-DAG: stg [[REG1:%r[0-9]+]], 8168(%r15)
+# CHECK-DAG: stg [[REG2:%r[0-9]+]], 8176(%r15)
+# CHECK-DAG: lay [[REG3:%r[0-9]+]], 8192(%r15)
+# CHECK-DAG: lay [[REG4:%r[0-9]+]], 4096(%r15)
+# CHECK: mvc 0(8,[[REG3]]), 4088([[REG4]])
+# CHECK-DAG: lg [[REG1]], 8168(%r15)
+# CHECK-DAG: lg [[REG2]], 8176(%r15)
+# CHECK: %skip
+# CHECK: br %r14
+
+# Arrange for %foo's spill slot to be at 8184(%r15) and the alloca area to be at
+# 8192(%r15).  The two emergency spill slots live below that, so this requires
+# the first 8168 bytes to be used for the call.  160 of these bytes are
+# allocated for the ABI frame.  There are also 5 argument registers, one of
+# which is used as a base pointer.
+args = (8168 - 160) / 8 + (5 - 1)
+
+print 'declare i64 *@foo(i64 *%s)' % (', i64' * args)
+print 'declare void @bar(i64 *)'
+print ''
+print 'define i64 @f1(i64 %foo) {'
+print 'entry:'
+
+# Make the allocation big, so that it goes at the top of the frame.
+print '  %array = alloca [1000 x i64]'
+print '  %area = getelementptr [1000 x i64] *%array, i64 0, i64 0'
+print '  %%base = call i64 *@foo(i64 *%%area%s)' % (', i64 0' * args)
+print ''
+
+# Make sure all GPRs are used.  One is needed for the stack pointer and
+# another for %base, so we need 14 live values.
+count = 14
+for i in range(count):
+    print '  %%ptr%d = getelementptr i64 *%%base, i64 %d' % (i, i / 2)
+    print '  %%val%d = load volatile i64 *%%ptr%d' % (i, i)
+    print ''
+
+# Encourage the register allocator to give preference to these %vals
+# by using them several times.
+for j in range(4):
+    for i in range(count):
+        print '  store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
+    print ''
+
+# Copy the incoming argument, which we expect to be spilled, to the frame
+# index for the alloca area.  Also throw in a volatile store, so that this
+# block cannot be reordered with the surrounding code.
+print '  %cond = icmp eq i64 %val0, %val1'
+print '  br i1 %cond, label %skip, label %fallthru'
+print ''
+print 'fallthru:'
+print '  store i64 %foo, i64 *%area'
+print '  store volatile i64 %val0, i64 *%ptr0'
+print '  br label %skip'
+print ''
+print 'skip:'
+
+# Use each %val a few more times to emphasise the point, and to make sure
+# that they are live across the store of %foo.
+for j in range(4):
+    for i in range(count):
+        print '  store volatile i64 %%val%d, i64 *%%ptr%d' % (i, i)
+    print ''
+
+print '  call void @bar(i64 *%area)'
+print '  ret i64 0'
+print '}'
diff --git a/test/CodeGen/SystemZ/addr-01.ll b/test/CodeGen/SystemZ/addr-01.ll
index c125ffa71a71..d0960cdb1047 100644
--- a/test/CodeGen/SystemZ/addr-01.ll
+++ b/test/CodeGen/SystemZ/addr-01.ll
@@ -5,7 +5,7 @@
 
 ; A simple index address.
 define void @f1(i64 %addr, i64 %index) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lb %r0, 0(%r3,%r2)
 ; CHECK: br %r14
   %add = add i64 %addr, %index
@@ -16,7 +16,7 @@ define void @f1(i64 %addr, i64 %index) {
 
 ; An address with an index and a displacement (order 1).
 define void @f2(i64 %addr, i64 %index) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lb %r0, 100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %addr, %index
@@ -28,7 +28,7 @@ define void @f2(i64 %addr, i64 %index) {
 
 ; An address with an index and a displacement (order 2).
 define void @f3(i64 %addr, i64 %index) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lb %r0, 100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %addr, 100
@@ -40,7 +40,7 @@ define void @f3(i64 %addr, i64 %index) {
 
 ; An address with an index and a subtracted displacement (order 1).
 define void @f4(i64 %addr, i64 %index) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lb %r0, -100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %addr, %index
@@ -52,7 +52,7 @@ define void @f4(i64 %addr, i64 %index) {
 
 ; An address with an index and a subtracted displacement (order 2).
 define void @f5(i64 %addr, i64 %index) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lb %r0, -100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = sub i64 %addr, 100
@@ -64,7 +64,7 @@ define void @f5(i64 %addr, i64 %index) {
 
 ; An address with an index and a displacement added using OR.
 define void @f6(i64 %addr, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nill %r2, 65528
 ; CHECK: lb %r0, 6(%r3,%r2)
 ; CHECK: br %r14
@@ -78,7 +78,7 @@ define void @f6(i64 %addr, i64 %index) {
 
 ; Like f6, but without the masking.  This OR doesn't count as a displacement.
 define void @f7(i64 %addr, i64 %index) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oill %r2, 6
 ; CHECK: lb %r0, 0(%r3,%r2)
 ; CHECK: br %r14
@@ -92,7 +92,7 @@ define void @f7(i64 %addr, i64 %index) {
 ; Like f6, but with the OR applied after the index.  We don't know anything
 ; about the alignment of %add here.
 define void @f8(i64 %addr, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: nill %r2, 65528
 ; CHECK: agr %r2, %r3
 ; CHECK: oill %r2, 6
diff --git a/test/CodeGen/SystemZ/addr-02.ll b/test/CodeGen/SystemZ/addr-02.ll
index 6772c1d41800..56c48794b072 100644
--- a/test/CodeGen/SystemZ/addr-02.ll
+++ b/test/CodeGen/SystemZ/addr-02.ll
@@ -6,7 +6,7 @@
 
 ; A simple index address.
 define void @f1(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lb %r0, 0(%r3,%r2)
 ; CHECK: br %r14
   %add = add i64 %addr, %index
@@ -18,7 +18,7 @@ define void @f1(i64 %addr, i64 %index, i8 **%dst) {
 
 ; An address with an index and a displacement (order 1).
 define void @f2(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lb %r0, 100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %addr, %index
@@ -31,7 +31,7 @@ define void @f2(i64 %addr, i64 %index, i8 **%dst) {
 
 ; An address with an index and a displacement (order 2).
 define void @f3(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lb %r0, 100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %addr, 100
@@ -44,7 +44,7 @@ define void @f3(i64 %addr, i64 %index, i8 **%dst) {
 
 ; An address with an index and a subtracted displacement (order 1).
 define void @f4(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lb %r0, -100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %addr, %index
@@ -57,7 +57,7 @@ define void @f4(i64 %addr, i64 %index, i8 **%dst) {
 
 ; An address with an index and a subtracted displacement (order 2).
 define void @f5(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lb %r0, -100(%r3,%r2)
 ; CHECK: br %r14
   %add1 = sub i64 %addr, 100
@@ -70,7 +70,7 @@ define void @f5(i64 %addr, i64 %index, i8 **%dst) {
 
 ; An address with an index and a displacement added using OR.
 define void @f6(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nill %r2, 65528
 ; CHECK: lb %r0, 6(%r3,%r2)
 ; CHECK: br %r14
@@ -85,7 +85,7 @@ define void @f6(i64 %addr, i64 %index, i8 **%dst) {
 
 ; Like f6, but without the masking.  This OR doesn't count as a displacement.
 define void @f7(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oill %r2, 6
 ; CHECK: lb %r0, 0(%r3,%r2)
 ; CHECK: br %r14
@@ -100,7 +100,7 @@ define void @f7(i64 %addr, i64 %index, i8 **%dst) {
 ; Like f6, but with the OR applied after the index.  We don't know anything
 ; about the alignment of %add here.
 define void @f8(i64 %addr, i64 %index, i8 **%dst) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: nill %r2, 65528
 ; CHECK: agr %r2, %r3
 ; CHECK: oill %r2, 6
diff --git a/test/CodeGen/SystemZ/addr-03.ll b/test/CodeGen/SystemZ/addr-03.ll
index dbdb9f15b4f1..1146926a4c2e 100644
--- a/test/CodeGen/SystemZ/addr-03.ll
+++ b/test/CodeGen/SystemZ/addr-03.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lb %r0, 0
 ; CHECK: br %r14
   %ptr = inttoptr i64 0 to i8 *
@@ -12,7 +12,7 @@ define void @f1() {
 }
 
 define void @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lb %r0, -524288
 ; CHECK: br %r14
   %ptr = inttoptr i64 -524288 to i8 *
@@ -21,7 +21,7 @@ define void @f2() {
 }
 
 define void @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: lb %r0, -524289
 ; CHECK: br %r14
   %ptr = inttoptr i64 -524289 to i8 *
@@ -30,7 +30,7 @@ define void @f3() {
 }
 
 define void @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lb %r0, 524287
 ; CHECK: br %r14
   %ptr = inttoptr i64 524287 to i8 *
@@ -39,7 +39,7 @@ define void @f4() {
 }
 
 define void @f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: lb %r0, 524288
 ; CHECK: br %r14
   %ptr = inttoptr i64 524288 to i8 *
diff --git a/test/CodeGen/SystemZ/alias-01.ll b/test/CodeGen/SystemZ/alias-01.ll
new file mode 100644
index 000000000000..8839aade7a0e
--- /dev/null
+++ b/test/CodeGen/SystemZ/alias-01.ll
@@ -0,0 +1,19 @@
+; Test 32-bit ANDs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check that there are no spills.
+define void @f1(<16 x i32> *%src1, <16 x float> *%dest) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r15
+; CHECK: br %r14
+  %val = load <16 x i32> *%src1, !tbaa !1
+  %add = add <16 x i32> %val, %val
+  %res = bitcast <16 x i32> %add to <16 x float>
+  store <16 x float> %res, <16 x float> *%dest, !tbaa !2
+  ret void
+}
+
+!0 = metadata !{ metadata !"root" }
+!1 = metadata !{ metadata !"set1", metadata !0 }
+!2 = metadata !{ metadata !"set2", metadata !0 }
diff --git a/test/CodeGen/SystemZ/alloca-01.ll b/test/CodeGen/SystemZ/alloca-01.ll
index 1852c9135059..2ddefd70cc9d 100644
--- a/test/CodeGen/SystemZ/alloca-01.ll
+++ b/test/CodeGen/SystemZ/alloca-01.ll
@@ -1,8 +1,7 @@
 ; Test variable-sized allocas and addresses based on them in cases where
 ; stack arguments are needed.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK1
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK2
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-A
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-B
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-C
@@ -15,17 +14,6 @@ declare i64 @bar(i8 *%a, i8 *%b, i8 *%c, i8 *%d, i8 *%e, i64 %f, i64 %g)
 ; There are two stack arguments, so an offset of 160 + 2 * 8 == 176
 ; is added to the copy of %r15.
 define i64 @f1(i64 %length, i64 %index) {
-; The full allocation sequence is:
-;
-;    la %r0, 7(%r2)      1
-;    nill %r0, 0xfff8    1
-;    lgr %r1, %r15         2
-;    sgr %r1, %r0        1 2
-;    lgr %r15, %r1         2
-;
-; The third instruction does not depend on the first two, so check for
-; two fully-ordered sequences.
-;
 ; FIXME: a better sequence would be:
 ;
 ;    lgr %r1, %r15
@@ -33,38 +21,34 @@ define i64 @f1(i64 %length, i64 %index) {
 ;    nill %r1, 0xfff8
 ;    lgr %r15, %r1
 ;
-; CHECK1: f1:
-; CHECK1: la %r0, 7(%r2)
-; CHECK1: nill %r0, 65528
-; CHECK1: sgr %r1, %r0
-; CHECK1: lgr %r15, %r1
-;
-; CHECK2: f1:
-; CHECK2: lgr %r1, %r15
-; CHECK2: sgr %r1, %r0
-; CHECK2: lgr %r15, %r1
+; CHECK-LABEL: f1:
+; CHECK-DAG: la [[REG1:%r[0-5]]], 7(%r2)
+; CHECK-DAG: nill [[REG1]], 65528
+; CHECK-DAG: lgr [[REG2:%r[0-5]]], %r15
+; CHECK: sgr [[REG2]], [[REG1]]
+; CHECK: lgr %r15, [[REG2]]
 ;
-; CHECK-A: f1:
+; CHECK-A-LABEL: f1:
 ; CHECK-A: lgr %r15, %r1
 ; CHECK-A: la %r2, 176(%r1)
 ;
-; CHECK-B: f1:
+; CHECK-B-LABEL: f1:
 ; CHECK-B: lgr %r15, %r1
 ; CHECK-B: la %r3, 177(%r1)
 ;
-; CHECK-C: f1:
+; CHECK-C-LABEL: f1:
 ; CHECK-C: lgr %r15, %r1
 ; CHECK-C: la %r4, 4095({{%r3,%r1|%r1,%r3}})
 ;
-; CHECK-D: f1:
+; CHECK-D-LABEL: f1:
 ; CHECK-D: lgr %r15, %r1
 ; CHECK-D: lay %r5, 4096({{%r3,%r1|%r1,%r3}})
 ;
-; CHECK-E: f1:
+; CHECK-E-LABEL: f1:
 ; CHECK-E: lgr %r15, %r1
 ; CHECK-E: lay %r6, 4271({{%r3,%r1|%r1,%r3}})
 ;
-; CHECK-FP: f1:
+; CHECK-FP-LABEL: f1:
 ; CHECK-FP: lgr %r11, %r15
 ; CHECK-FP: lmg %r6, %r15, 224(%r11)
   %a = alloca i8, i64 %length
diff --git a/test/CodeGen/SystemZ/alloca-02.ll b/test/CodeGen/SystemZ/alloca-02.ll
index fbb095f4d12d..b5787b102358 100644
--- a/test/CodeGen/SystemZ/alloca-02.ll
+++ b/test/CodeGen/SystemZ/alloca-02.ll
@@ -9,40 +9,43 @@
 declare i64 @bar(i8 *%a)
 
 define i64 @f1(i64 %length, i64 %index) {
-; CHECK-A: f1:
+; CHECK-A-LABEL: f1:
 ; CHECK-A: lgr %r15, [[ADDR:%r[1-5]]]
 ; CHECK-A: la %r2, 160([[ADDR]])
 ; CHECK-A: mvi 0(%r2), 0
 ;
-; CHECK-B: f1:
+; CHECK-B-LABEL: f1:
 ; CHECK-B: lgr %r15, [[ADDR:%r[1-5]]]
 ; CHECK-B: la %r2, 160([[ADDR]])
 ; CHECK-B: mvi 4095(%r2), 1
 ;
-; CHECK-C: f1:
+; CHECK-C-LABEL: f1:
 ; CHECK-C: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-C: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-C: mvi 0([[TMP]]), 2
+; CHECK-C-DAG: la %r2, 160([[ADDR]])
+; CHECK-C-DAG: lhi [[TMP:%r[0-5]]], 2
+; CHECK-C: stc [[TMP]], 0({{%r3,%r2|%r2,%r3}})
 ;
-; CHECK-D: f1:
+; CHECK-D-LABEL: f1:
 ; CHECK-D: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-D: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-D: mvi 4095([[TMP]]), 3
+; CHECK-D-DAG: la %r2, 160([[ADDR]])
+; CHECK-D-DAG: lhi [[TMP:%r[0-5]]], 3
+; CHECK-D: stc [[TMP]], 4095({{%r3,%r2|%r2,%r3}})
 ;
-; CHECK-E: f1:
+; CHECK-E-LABEL: f1:
 ; CHECK-E: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-E: mviy 4096([[TMP]]), 4
+; CHECK-E-DAG: la %r2, 160([[ADDR]])
+; CHECK-E-DAG: lhi [[TMP:%r[0-5]]], 4
+; CHECK-E: stcy [[TMP]], 4096({{%r3,%r2|%r2,%r3}})
   %a = alloca i8, i64 %length
-  store i8 0, i8 *%a
+  store volatile i8 0, i8 *%a
   %b = getelementptr i8 *%a, i64 4095
-  store i8 1, i8 *%b
+  store volatile i8 1, i8 *%b
   %c = getelementptr i8 *%a, i64 %index
-  store i8 2, i8 *%c
+  store volatile i8 2, i8 *%c
   %d = getelementptr i8 *%c, i64 4095
-  store i8 3, i8 *%d
+  store volatile i8 3, i8 *%d
   %e = getelementptr i8 *%d, i64 1
-  store i8 4, i8 *%e
+  store volatile i8 4, i8 *%e
   %count = call i64 @bar(i8 *%a)
   %res = add i64 %count, 1
   ret i64 %res
diff --git a/test/CodeGen/SystemZ/and-01.ll b/test/CodeGen/SystemZ/and-01.ll
index 8dd106b7c015..3b230ba1081f 100644
--- a/test/CodeGen/SystemZ/and-01.ll
+++ b/test/CodeGen/SystemZ/and-01.ll
@@ -1,10 +1,13 @@
 ; Test 32-bit ANDs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @foo()
 
 ; Check NR.
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: nr %r2, %r3
 ; CHECK: br %r14
   %and = and i32 %a, %b
@@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) {
 
 ; Check the low end of the N range.
 define i32 @f2(i32 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: n %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned N range.
 define i32 @f3(i32 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: n %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) {
 
 ; Check the next word up, which should use NY instead of N.
 define i32 @f4(i32 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ny %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned NY range.
 define i32 @f5(i32 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ny %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: n %r2, 0(%r3)
 ; CHECK: br %r14
@@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned NY range.
 define i32 @f7(i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ny %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) {
 
 ; Check the low end of the NY range.
 define i32 @f8(i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ny %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: n %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) {
 
 ; Check that N allows an index.
 define i32 @f10(i32 %a, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: n %r2, 4092({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
 
 ; Check that NY allows an index.
 define i32 @f11(i32 %a, i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: ny %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
   %and = and i32 %a, %b
   ret i32 %and
 }
+
+; Check that ANDs of spilled values can use N rather than NR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: n %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %and0 = and i32 %ret, %val0
+  %and1 = and i32 %and0, %val1
+  %and2 = and i32 %and1, %val2
+  %and3 = and i32 %and2, %val3
+  %and4 = and i32 %and3, %val4
+  %and5 = and i32 %and4, %val5
+  %and6 = and i32 %and5, %val6
+  %and7 = and i32 %and6, %val7
+  %and8 = and i32 %and7, %val8
+  %and9 = and i32 %and8, %val9
+
+  ret i32 %and9
+}
diff --git a/test/CodeGen/SystemZ/and-02.ll b/test/CodeGen/SystemZ/and-02.ll
index a0fff81492ad..a7f08b7bb790 100644
--- a/test/CodeGen/SystemZ/and-02.ll
+++ b/test/CodeGen/SystemZ/and-02.ll
@@ -1,93 +1,226 @@
 ; Test 32-bit ANDs in which the second operand is constant.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
 
-; Check the lowest useful NILF value.
+; ANDs with 1 can use NILF.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: nilf %r2, 1
 ; CHECK: br %r14
   %and = and i32 %a, 1
   ret i32 %and
 }
 
+; ...but RISBLG is available as a three-address form.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: risblg %r2, %r3, 31, 159, 0
+; CHECK: br %r14
+  %and = and i32 %b, 1
+  ret i32 %and
+}
+
+; ...same for 4.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: risblg %r2, %r3, 29, 157, 0
+; CHECK: br %r14
+  %and = and i32 %b, 4
+  ret i32 %and
+}
+
+; ANDs with 5 must use NILF.
+define i32 @f4(i32 %a) {
+; CHECK-LABEL: f4:
+; CHECK: nilf %r2, 5
+; CHECK: br %r14
+  %and = and i32 %a, 5
+  ret i32 %and
+}
+
+; ...a single RISBLG isn't enough.
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: risb
+; CHECK: br %r14
+  %and = and i32 %b, 5
+  ret i32 %and
+}
+
 ; Check the highest 16-bit constant that must be handled by NILF.
-define i32 @f2(i32 %a) {
-; CHECK: f2:
+define i32 @f6(i32 %a) {
+; CHECK-LABEL: f6:
+; CHECK: nilf %r2, 65533
+; CHECK: br %r14
+  %and = and i32 %a, 65533
+  ret i32 %and
+}
+
+; ...a single RISBLG isn't enough.
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: risb
+; CHECK: br %r14
+  %and = and i32 %b, 65533
+  ret i32 %and
+}
+
+; Check the next highest value, which can use NILF.
+define i32 @f8(i32 %a) {
+; CHECK-LABEL: f8:
 ; CHECK: nilf %r2, 65534
 ; CHECK: br %r14
   %and = and i32 %a, 65534
   ret i32 %and
 }
 
+; ...although the three-address case should use RISBLG.
+define i32 @f9(i32 %a, i32 %b) {
+; CHECK-LABEL: f9:
+; CHECK: risblg %r2, %r3, 16, 158, 0
+; CHECK: br %r14
+  %and = and i32 %b, 65534
+  ret i32 %and
+}
+
 ; ANDs of 0xffff are zero extensions from i16.
-define i32 @f3(i32 %a) {
-; CHECK: f3:
-; CHECK: llhr %r2, %r2
+define i32 @f10(i32 %a, i32 %b) {
+; CHECK-LABEL: f10:
+; CHECK: llhr %r2, %r3
 ; CHECK: br %r14
-  %and = and i32 %a, 65535
+  %and = and i32 %b, 65535
   ret i32 %and
 }
 
 ; Check the next value up, which must again use NILF.
-define i32 @f4(i32 %a) {
-; CHECK: f4:
+define i32 @f11(i32 %a) {
+; CHECK-LABEL: f11:
 ; CHECK: nilf %r2, 65536
 ; CHECK: br %r14
   %and = and i32 %a, 65536
   ret i32 %and
 }
 
-; Check the lowest useful NILH value.  (LLHR is used instead of NILH of 0.)
-define i32 @f5(i32 %a) {
-; CHECK: f5:
+; ...but the three-address case can use RISBLG.
+define i32 @f12(i32 %a, i32 %b) {
+; CHECK-LABEL: f12:
+; CHECK: risblg %r2, %r3, 15, 143, 0
+; CHECK: br %r14
+  %and = and i32 %b, 65536
+  ret i32 %and
+}
+
+; Check the lowest useful NILH value.
+define i32 @f13(i32 %a) {
+; CHECK-LABEL: f13:
 ; CHECK: nilh %r2, 1
 ; CHECK: br %r14
   %and = and i32 %a, 131071
   ret i32 %and
 }
 
+; ...but RISBLG is OK in the three-address case.
+define i32 @f14(i32 %a, i32 %b) {
+; CHECK-LABEL: f14:
+; CHECK: risblg %r2, %r3, 15, 159, 0
+; CHECK: br %r14
+  %and = and i32 %b, 131071
+  ret i32 %and
+}
+
 ; Check the highest useful NILF value.
-define i32 @f6(i32 %a) {
-; CHECK: f6:
+define i32 @f15(i32 %a) {
+; CHECK-LABEL: f15:
 ; CHECK: nilf %r2, 4294901758
 ; CHECK: br %r14
   %and = and i32 %a, -65538
   ret i32 %and
 }
 
-; Check the highest useful NILH value, which is one up from the above.
-define i32 @f7(i32 %a) {
-; CHECK: f7:
+; Check the next value up, which is the highest useful NILH value.
+define i32 @f16(i32 %a) {
+; CHECK-LABEL: f16:
 ; CHECK: nilh %r2, 65534
 ; CHECK: br %r14
   %and = and i32 %a, -65537
   ret i32 %and
 }
 
-; Check the low end of the NILL range, which is one up again.
-define i32 @f8(i32 %a) {
-; CHECK: f8:
+; Check the next value up, which is the first useful NILL value.
+define i32 @f17(i32 %a) {
+; CHECK-LABEL: f17:
 ; CHECK: nill %r2, 0
 ; CHECK: br %r14
   %and = and i32 %a, -65536
   ret i32 %and
 }
 
-; Check the next value up.
-define i32 @f9(i32 %a) {
-; CHECK: f9:
+; ...although the three-address case should use RISBLG.
+define i32 @f18(i32 %a, i32 %b) {
+; CHECK-LABEL: f18:
+; CHECK: risblg %r2, %r3, 0, 143, 0
+; CHECK: br %r14
+  %and = and i32 %b, -65536
+  ret i32 %and
+}
+
+; Check the next value up again, which can still use NILL.
+define i32 @f19(i32 %a) {
+; CHECK-LABEL: f19:
 ; CHECK: nill %r2, 1
 ; CHECK: br %r14
   %and = and i32 %a, -65535
   ret i32 %and
 }
 
-; Check the highest useful NILL value.
-define i32 @f10(i32 %a) {
-; CHECK: f10:
+; Check the next value up again, which cannot use RISBLG.
+define i32 @f20(i32 %a, i32 %b) {
+; CHECK-LABEL: f20:
+; CHECK-NOT: risb
+; CHECK: br %r14
+  %and = and i32 %b, -65534
+  ret i32 %and
+}
+
+; Check the last useful mask, which can use NILL.
+define i32 @f21(i32 %a) {
+; CHECK-LABEL: f21:
 ; CHECK: nill %r2, 65534
 ; CHECK: br %r14
   %and = and i32 %a, -2
   ret i32 %and
 }
+
+; ...or RISBLG for the three-address case.
+define i32 @f22(i32 %a, i32 %b) {
+; CHECK-LABEL: f22:
+; CHECK: risblg %r2, %r3, 0, 158, 0
+; CHECK: br %r14
+  %and = and i32 %b, -2
+  ret i32 %and
+}
+
+; Test that RISBLG can be used when inserting a non-wraparound mask
+; into another register.
+define i64 @f23(i64 %a, i32 %b) {
+; CHECK-LABEL: f23:
+; CHECK: risblg %r2, %r3, 30, 158, 0
+; CHECK: br %r14
+  %and1 = and i64 %a, -4294967296
+  %and2 = and i32 %b, 2
+  %ext = zext i32 %and2 to i64
+  %or = or i64 %and1, %ext
+  ret i64 %or
+}
+
+; ...and when inserting a wrap-around mask.
+define i64 @f24(i64 %a, i32 %b) {
+; CHECK-LABEL: f24:
+; CHECK: risblg %r2, %r3, 30, 156
+; CHECK: br %r14
+  %and1 = and i64 %a, -4294967296
+  %and2 = and i32 %b, -5
+  %ext = zext i32 %and2 to i64
+  %or = or i64 %and1, %ext
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/and-03.ll b/test/CodeGen/SystemZ/and-03.ll
index 3fe8d3cf3bf8..a0560d46e4ea 100644
--- a/test/CodeGen/SystemZ/and-03.ll
+++ b/test/CodeGen/SystemZ/and-03.ll
@@ -1,10 +1,13 @@
 ; Test 64-bit ANDs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i64 @foo()
 
 ; Check NGR.
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ngr %r2, %r3
 ; CHECK: br %r14
   %and = and i64 %a, %b
@@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) {
 
 ; Check NG with no displacement.
 define i64 @f2(i64 %a, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ng %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i64 *%src
@@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) {
 
 ; Check the high end of the aligned NG range.
 define i64 @f3(i64 %a, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ng %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: ng %r2, 0(%r3)
 ; CHECK: br %r14
@@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned NG range.
 define i64 @f5(i64 %a, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ng %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) {
 
 ; Check the low end of the NG range.
 define i64 @f6(i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ng %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: ng %r2, 0(%r3)
 ; CHECK: br %r14
@@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) {
 
 ; Check that NG allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ng %r2, 524280({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %and = and i64 %a, %b
   ret i64 %and
 }
+
+; Check that ANDs of spilled values can use NG rather than NGR.
+define i64 @f9(i64 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: ng %r2, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %and0 = and i64 %ret, %val0
+  %and1 = and i64 %and0, %val1
+  %and2 = and i64 %and1, %val2
+  %and3 = and i64 %and2, %val3
+  %and4 = and i64 %and3, %val4
+  %and5 = and i64 %and4, %val5
+  %and6 = and i64 %and5, %val6
+  %and7 = and i64 %and6, %val7
+  %and8 = and i64 %and7, %val8
+  %and9 = and i64 %and8, %val9
+
+  ret i64 %and9
+}
diff --git a/test/CodeGen/SystemZ/and-04.ll b/test/CodeGen/SystemZ/and-04.ll
index 62def60026e1..efb21f36425c 100644
--- a/test/CodeGen/SystemZ/and-04.ll
+++ b/test/CodeGen/SystemZ/and-04.ll
@@ -2,13 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; There is no 64-bit AND instruction for a mask of 1.
-; FIXME: we ought to be able to require "ngr %r2, %r0", but at the moment,
-; two-address optimisations force "ngr %r0, %r2; lgr %r2, %r0" instead.
+; Use RISBG for a single bit.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
-; CHECK: lghi %r0, 1
-; CHECK: ngr
+; CHECK-LABEL: f1:
+; CHECK: risbg %r2, %r2, 63, 191, 0
 ; CHECK: br %r14
   %and = and i64 %a, 1
   ret i64 %and
@@ -16,165 +13,171 @@ define i64 @f1(i64 %a) {
 
 ; Likewise 0xfffe.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
-; CHECK: llill %r0, 65534
-; CHECK: ngr
+; CHECK-LABEL: f2:
+; CHECK: risbg %r2, %r2, 48, 190, 0
 ; CHECK: br %r14
   %and = and i64 %a, 65534
   ret i64 %and
 }
 
 ; ...but 0xffff is a 16-bit zero extension.
-define i64 @f3(i64 %a) {
-; CHECK: f3:
-; CHECK: llghr %r2, %r2
+define i64 @f3(i64 %a, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: llghr %r2, %r3
 ; CHECK: br %r14
-  %and = and i64 %a, 65535
+  %and = and i64 %b, 65535
   ret i64 %and
 }
 
-; Check the next value up, which again has no dedicated instruction.
+; Check the next value up, which can again use RISBG.
 define i64 @f4(i64 %a) {
-; CHECK: f4:
-; CHECK: llilh %r0, 1
-; CHECK: ngr
+; CHECK-LABEL: f4:
+; CHECK: risbg %r2, %r2, 47, 175, 0
 ; CHECK: br %r14
   %and = and i64 %a, 65536
   ret i64 %and
 }
 
-; Check 0xfffffffe.
+; Check 0xfffffffe, which can also use RISBG.
 define i64 @f5(i64 %a) {
-; CHECK: f5:
-; CHECK: lilf %r0, 4294967294
-; CHECK: ngr
+; CHECK-LABEL: f5:
+; CHECK: risbg %r2, %r2, 32, 190, 0
 ; CHECK: br %r14
   %and = and i64 %a, 4294967294
   ret i64 %and
 }
 
 ; Check the next value up, which is a 32-bit zero extension.
-define i64 @f6(i64 %a) {
-; CHECK: f6:
-; CHECK: llgfr %r2, %r2
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: llgfr %r2, %r3
 ; CHECK: br %r14
-  %and = and i64 %a, 4294967295
+  %and = and i64 %b, 4294967295
   ret i64 %and
 }
 
 ; Check the lowest useful NIHF value (0x00000001_ffffffff).
 define i64 @f7(i64 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: nihf %r2, 1
 ; CHECK: br %r14
   %and = and i64 %a, 8589934591
   ret i64 %and
 }
 
-; Check the low end of the NIHH range (0x0000ffff_ffffffff).
-define i64 @f8(i64 %a) {
-; CHECK: f8:
-; CHECK: nihh %r2, 0
+; ...but RISBG can be used if a three-address form is useful.
+define i64 @f8(i64 %a, i64 %b) {
+; CHECK-LABEL: f8:
+; CHECK: risbg %r2, %r3, 31, 191, 0
 ; CHECK: br %r14
-  %and = and i64 %a, 281474976710655
+  %and = and i64 %b, 8589934591
   ret i64 %and
 }
 
-; Check the highest useful NIHH value (0xfffeffff_ffffffff).
+; Check the lowest NIHH value outside the RISBG range (0x0002ffff_ffffffff).
 define i64 @f9(i64 %a) {
-; CHECK: f9:
-; CHECK: nihh %r2, 65534
+; CHECK-LABEL: f9:
+; CHECK: nihh %r2, 2
 ; CHECK: br %r14
-  %and = and i64 %a, -281474976710657
+  %and = and i64 %a, 844424930131967
   ret i64 %and
 }
 
-; Check the highest useful NIHF value (0xfffefffe_ffffffff).
+; Check the highest NIHH value outside the RISBG range (0xfffaffff_ffffffff).
 define i64 @f10(i64 %a) {
-; CHECK: f10:
-; CHECK: nihf %r2, 4294901758
+; CHECK-LABEL: f10:
+; CHECK: nihh %r2, 65530
 ; CHECK: br %r14
-  %and = and i64 %a, -281479271677953
+  %and = and i64 %a, -1407374883553281
   ret i64 %and
 }
 
-; Check the low end of the NIHL range (0xffff0000_ffffffff).
+; Check the highest useful NIHF value (0xfffefffe_ffffffff).
 define i64 @f11(i64 %a) {
-; CHECK: f11:
-; CHECK: nihl %r2, 0
+; CHECK-LABEL: f11:
+; CHECK: nihf %r2, 4294901758
 ; CHECK: br %r14
-  %and = and i64 %a, -281470681743361
+  %and = and i64 %a, -281479271677953
   ret i64 %and
 }
 
-; Check the highest useful NIHL value (0xfffffffe_ffffffff).
+; Check the lowest NIHL value outside the RISBG range (0xffff0002_ffffffff).
 define i64 @f12(i64 %a) {
-; CHECK: f12:
-; CHECK: nihl %r2, 65534
+; CHECK-LABEL: f12:
+; CHECK: nihl %r2, 2
 ; CHECK: br %r14
-  %and = and i64 %a, -4294967297
+  %and = and i64 %a, -281462091808769
   ret i64 %and
 }
 
-; Check the low end of the NILF range (0xffffffff_00000000).
+; Check the highest NIHL value outside the RISBG range (0xfffffffa_ffffffff).
 define i64 @f13(i64 %a) {
-; CHECK: f13:
-; CHECK: nilf %r2, 0
+; CHECK-LABEL: f13:
+; CHECK: nihl %r2, 65530
 ; CHECK: br %r14
-  %and = and i64 %a, -4294967296
+  %and = and i64 %a, -21474836481
   ret i64 %and
 }
 
-; Check the low end of the NILH range (0xffffffff_0000ffff).
+; Check the lowest NILF value outside the RISBG range (0xffffffff_00000002).
 define i64 @f14(i64 %a) {
-; CHECK: f14:
-; CHECK: nilh %r2, 0
+; CHECK-LABEL: f14:
+; CHECK: nilf %r2, 2
 ; CHECK: br %r14
-  %and = and i64 %a, -4294901761
+  %and = and i64 %a, -4294967294
   ret i64 %and
 }
 
-; Check the next value up, which must use NILF.
+; Check the lowest NILH value outside the RISBG range (0xffffffff_0002ffff).
 define i64 @f15(i64 %a) {
-; CHECK: f15:
-; CHECK: nilf %r2, 65536
+; CHECK-LABEL: f15:
+; CHECK: nilh %r2, 2
 ; CHECK: br %r14
-  %and = and i64 %a, -4294901760
+  %and = and i64 %a, -4294770689
   ret i64 %and
 }
 
-; Check the maximum useful NILF value (0xffffffff_fffefffe).
+; Check the next value up, which must use NILF.
 define i64 @f16(i64 %a) {
-; CHECK: f16:
-; CHECK: nilf %r2, 4294901758
+; CHECK-LABEL: f16:
+; CHECK: nilf %r2, 196608
 ; CHECK: br %r14
-  %and = and i64 %a, -65538
+  %and = and i64 %a, -4294770688
   ret i64 %and
 }
 
-; Check the highest useful NILH value, which is one greater than the above.
+; Check the highest NILH value outside the RISBG range (0xffffffff_fffaffff).
 define i64 @f17(i64 %a) {
-; CHECK: f17:
-; CHECK: nilh %r2, 65534
+; CHECK-LABEL: f17:
+; CHECK: nilh %r2, 65530
 ; CHECK: br %r14
-  %and = and i64 %a, -65537
+  %and = and i64 %a, -327681
   ret i64 %and
 }
 
-; Check the low end of the NILL range, which is one greater again.
+; Check the maximum useful NILF value (0xffffffff_fffefffe).
 define i64 @f18(i64 %a) {
-; CHECK: f18:
-; CHECK: nill %r2, 0
+; CHECK-LABEL: f18:
+; CHECK: nilf %r2, 4294901758
 ; CHECK: br %r14
-  %and = and i64 %a, -65536
+  %and = and i64 %a, -65538
   ret i64 %and
 }
 
-; Check the highest useful NILL value.
+; Check the lowest NILL value outside the RISBG range (0xffffffff_ffff0002).
 define i64 @f19(i64 %a) {
-; CHECK: f19:
-; CHECK: nill %r2, 65534
+; CHECK-LABEL: f19:
+; CHECK: nill %r2, 2
+; CHECK: br %r14
+  %and = and i64 %a, -65534
+  ret i64 %and
+}
+
+; Check the highest NILL value outside the RISBG range.
+define i64 @f20(i64 %a) {
+; CHECK-LABEL: f20:
+; CHECK: nill %r2, 65530
 ; CHECK: br %r14
-  %and = and i64 %a, -2
+  %and = and i64 %a, -6
   ret i64 %and
 }
diff --git a/test/CodeGen/SystemZ/and-05.ll b/test/CodeGen/SystemZ/and-05.ll
index 457391165d5e..dafd9d5c51b0 100644
--- a/test/CodeGen/SystemZ/and-05.ll
+++ b/test/CodeGen/SystemZ/and-05.ll
@@ -4,7 +4,7 @@
 
 ; Check the lowest useful constant, expressed as a signed integer.
 define void @f1(i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ni 0(%r2), 1
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -15,7 +15,7 @@ define void @f1(i8 *%ptr) {
 
 ; Check the highest useful constant, expressed as a signed integer.
 define void @f2(i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -26,7 +26,7 @@ define void @f2(i8 *%ptr) {
 
 ; Check the lowest useful constant, expressed as an unsigned integer.
 define void @f3(i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ni 0(%r2), 1
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -37,7 +37,7 @@ define void @f3(i8 *%ptr) {
 
 ; Check the highest useful constant, expressed as a unsigned integer.
 define void @f4(i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -48,7 +48,7 @@ define void @f4(i8 *%ptr) {
 
 ; Check the high end of the NI range.
 define void @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ni 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4095
@@ -60,7 +60,7 @@ define void @f5(i8 *%src) {
 
 ; Check the next byte up, which should use NIY instead of NI.
 define void @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: niy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4096
@@ -72,7 +72,7 @@ define void @f6(i8 *%src) {
 
 ; Check the high end of the NIY range.
 define void @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: niy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -85,7 +85,7 @@ define void @f7(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, 524288
 ; CHECK: ni 0(%r2), 127
 ; CHECK: br %r14
@@ -98,7 +98,7 @@ define void @f8(i8 *%src) {
 
 ; Check the high end of the negative NIY range.
 define void @f9(i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: niy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -110,7 +110,7 @@ define void @f9(i8 *%src) {
 
 ; Check the low end of the NIY range.
 define void @f10(i8 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: niy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -123,7 +123,7 @@ define void @f10(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f11(i8 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r2, -524289
 ; CHECK: ni 0(%r2), 127
 ; CHECK: br %r14
@@ -136,7 +136,7 @@ define void @f11(i8 *%src) {
 
 ; Check that NI does not allow an index
 define void @f12(i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: agr %r2, %r3
 ; CHECK: ni 4095(%r2), 127
 ; CHECK: br %r14
@@ -151,7 +151,7 @@ define void @f12(i64 %src, i64 %index) {
 
 ; Check that NIY does not allow an index
 define void @f13(i64 %src, i64 %index) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: agr %r2, %r3
 ; CHECK: niy 4096(%r2), 127
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/and-06.ll b/test/CodeGen/SystemZ/and-06.ll
index bbb5e7b7b9dd..f796618dd4f4 100644
--- a/test/CodeGen/SystemZ/and-06.ll
+++ b/test/CodeGen/SystemZ/and-06.ll
@@ -5,7 +5,7 @@
 
 ; Zero extension to 32 bits, negative constant.
 define void @f1(i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -18,7 +18,7 @@ define void @f1(i8 *%ptr) {
 
 ; Zero extension to 64 bits, negative constant.
 define void @f2(i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -31,7 +31,7 @@ define void @f2(i8 *%ptr) {
 
 ; Zero extension to 32 bits, positive constant.
 define void @f3(i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -44,7 +44,7 @@ define void @f3(i8 *%ptr) {
 
 ; Zero extension to 64 bits, positive constant.
 define void @f4(i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -57,7 +57,7 @@ define void @f4(i8 *%ptr) {
 
 ; Sign extension to 32 bits, negative constant.
 define void @f5(i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -70,7 +70,7 @@ define void @f5(i8 *%ptr) {
 
 ; Sign extension to 64 bits, negative constant.
 define void @f6(i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -83,7 +83,7 @@ define void @f6(i8 *%ptr) {
 
 ; Sign extension to 32 bits, positive constant.
 define void @f7(i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -96,7 +96,7 @@ define void @f7(i8 *%ptr) {
 
 ; Sign extension to 64 bits, positive constant.
 define void @f8(i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
diff --git a/test/CodeGen/SystemZ/and-07.ll b/test/CodeGen/SystemZ/and-07.ll
new file mode 100644
index 000000000000..ad4c4af59fd7
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-07.ll
@@ -0,0 +1,39 @@
+; Test the three-operand forms of AND.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check NRK.
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f1:
+; CHECK: nrk %r2, %r3, %r4
+; CHECK: br %r14
+  %and = and i32 %b, %c
+  ret i32 %and
+}
+
+; Check that we can still use NR in obvious cases.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: nr %r2, %r3
+; CHECK: br %r14
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check NGRK.
+define i64 @f3(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: f3:
+; CHECK: ngrk %r2, %r3, %r4
+; CHECK: br %r14
+  %and = and i64 %b, %c
+  ret i64 %and
+}
+
+; Check that we can still use NGR in obvious cases.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: ngr %r2, %r3
+; CHECK: br %r14
+  %and = and i64 %a, %b
+  ret i64 %and
+}
diff --git a/test/CodeGen/SystemZ/and-08.ll b/test/CodeGen/SystemZ/and-08.ll
new file mode 100644
index 000000000000..7ded115aedff
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-08.ll
@@ -0,0 +1,378 @@
+; Test memory-to-memory ANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
+
+; Test the simple i8 case.
+define void @f1(i8 *%ptr1) {
+; CHECK-LABEL: f1:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %and = and i8 %val, %old
+  store i8 %and, i8 *%ptr2
+  ret void
+}
+
+; ...and again in reverse.
+define void @f2(i8 *%ptr1) {
+; CHECK-LABEL: f2:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %and = and i8 %old, %val
+  store i8 %and, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where one value is zero-extended to 32 bits and the other
+; sign-extended.
+define void @f3(i8 *%ptr1) {
+; CHECK-LABEL: f3:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = zext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = sext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; ...and again with the extension types reversed.
+define void @f4(i8 *%ptr1) {
+; CHECK-LABEL: f4:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = sext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = zext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; ...and again with two sign extensions.
+define void @f5(i8 *%ptr1) {
+; CHECK-LABEL: f5:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = sext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = sext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; ...and again with two zero extensions.
+define void @f6(i8 *%ptr1) {
+; CHECK-LABEL: f6:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = zext i8 %val to i32
+  %old = load i8 *%ptr2
+  %extold = zext i8 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where the value is extended to 64 bits (just one case
+; this time).
+define void @f7(i8 *%ptr1) {
+; CHECK-LABEL: f7:
+; CHECK: nc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %extval = sext i8 %val to i64
+  %old = load i8 *%ptr2
+  %extold = zext i8 %old to i64
+  %and = and i64 %extval, %extold
+  %trunc = trunc i64 %and to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test the simple i16 case.
+define void @f8(i16 *%ptr1) {
+; CHECK-LABEL: f8:
+; CHECK: nc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %old = load i16 *%ptr2
+  %and = and i16 %val, %old
+  store i16 %and, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is extended to 32 bits.
+define void @f9(i16 *%ptr1) {
+; CHECK-LABEL: f9:
+; CHECK: nc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %extval = zext i16 %val to i32
+  %old = load i16 *%ptr2
+  %extold = sext i16 %old to i32
+  %and = and i32 %extval, %extold
+  %trunc = trunc i32 %and to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is extended to 64 bits.
+define void @f10(i16 *%ptr1) {
+; CHECK-LABEL: f10:
+; CHECK: nc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %extval = sext i16 %val to i64
+  %old = load i16 *%ptr2
+  %extold = zext i16 %old to i64
+  %and = and i64 %extval, %extold
+  %trunc = trunc i64 %and to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test the simple i32 case.
+define void @f11(i32 *%ptr1) {
+; CHECK-LABEL: f11:
+; CHECK: nc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %old = load i32 *%ptr2
+  %and = and i32 %old, %val
+  store i32 %and, i32 *%ptr2
+  ret void
+}
+
+; Test i32 cases where the value is extended to 64 bits.
+define void @f12(i32 *%ptr1) {
+; CHECK-LABEL: f12:
+; CHECK: nc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %extval = sext i32 %val to i64
+  %old = load i32 *%ptr2
+  %extold = zext i32 %old to i64
+  %and = and i64 %extval, %extold
+  %trunc = trunc i64 %and to i32
+  store i32 %trunc, i32 *%ptr2
+  ret void
+}
+
+; Test the i64 case.
+define void @f13(i64 *%ptr1) {
+; CHECK-LABEL: f13:
+; CHECK: nc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; Make sure that we don't use NC if the first load is volatile.
+define void @f14(i64 *%ptr1) {
+; CHECK-LABEL: f14:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load volatile i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...likewise the second.
+define void @f15(i64 *%ptr1) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load volatile i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...likewise the store.
+define void @f16(i64 *%ptr1) {
+; CHECK-LABEL: f16:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store volatile i64 %and, i64 *%ptr2
+  ret void
+}
+
+; Test that NC is not used for aligned loads and stores if there is
+; no way of telling whether they alias.  We don't want to use NC in
+; cases where the addresses could be equal.
+define void @f17(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f17:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...but if one of the loads isn't aligned, we can't be sure.
+define void @f18(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f18:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2
+  %old = load i64 *%ptr2
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; Repeat the previous test with the operands in the opposite order.
+define void @f19(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f19:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2
+  %old = load i64 *%ptr2
+  %and = and i64 %val, %old
+  store i64 %and, i64 *%ptr2
+  ret void
+}
+
+; ...and again with the other operand being unaligned.
+define void @f20(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f20:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2, align 2
+  %and = and i64 %val, %old
+  store i64 %and, i64 *%ptr2, align 2
+  ret void
+}
+
+; Test a case where there is definite overlap.
+define void @f21(i64 %base) {
+; CHECK-LABEL: f21:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %add = add i64 %base, 1
+  %ptr1 = inttoptr i64 %base to i64 *
+  %ptr2 = inttoptr i64 %add to i64 *
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2, align 1
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 1
+  ret void
+}
+
+; Test that we can use NC for global addresses for i8.
+define void @f22(i8 *%ptr) {
+; CHECK-LABEL: f22:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
+; CHECK: br %r14
+  %val = load i8 *@g1src
+  %old = load i8 *@g1dst
+  %and = and i8 %val, %old
+  store i8 %and, i8 *@g1dst
+  ret void
+}
+
+; Test that we use NC even where LHRL and STHRL are available.
+define void @f23(i16 *%ptr) {
+; CHECK-LABEL: f23:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g2src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
+; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
+; CHECK: br %r14
+  %val = load i16 *@g2src
+  %old = load i16 *@g2dst
+  %and = and i16 %val, %old
+  store i16 %and, i16 *@g2dst
+  ret void
+}
+
+; Test a case where offset disambiguation is enough.
+define void @f24(i64 *%ptr1) {
+; CHECK-LABEL: f24:
+; CHECK: nc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1, align 1
+  %old = load i64 *%ptr2, align 1
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 1
+  ret void
+}
+
+; Test a case where TBAA tells us there is no alias.
+define void @f25(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f25:
+; CHECK: nc 0(8,%r3), 0(%r2)
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2, !tbaa !3
+  %old = load i64 *%ptr2, align 2, !tbaa !4
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 2, !tbaa !4
+  ret void
+}
+
+; Test a case where TBAA information is present but doesn't help.
+define void @f26(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f26:
+; CHECK-NOT: nc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2, !tbaa !3
+  %old = load i64 *%ptr2, align 2, !tbaa !3
+  %and = and i64 %old, %val
+  store i64 %and, i64 *%ptr2, align 2, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{ metadata !"root" }
+!1 = metadata !{ metadata !"set1", metadata !0 }
+!2 = metadata !{ metadata !"set2", metadata !0 }
+!3 = metadata !{ metadata !1, metadata !1, i64 0}
+!4 = metadata !{ metadata !2, metadata !2, i64 0}
diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll
index a6b80c54db51..3105503eda53 100644
--- a/test/CodeGen/SystemZ/args-01.ll
+++ b/test/CodeGen/SystemZ/args-01.ll
@@ -17,43 +17,42 @@ declare void @bar(i8, i16, i32, i64, float, double, fp128, i64,
 ; normally use %f0/%f2 as the first available 128-bit pair.  This choice
 ; is hard-coded in the FP128 tests.
 ;
-; The order of the CHECK-INT loads doesn't matter.  The same goes for the
-; CHECK_FP128-* stores and the CHECK-STACK stores.  It would be OK to reorder
+; The order of the CHECK-STACK stores doesn't matter.  It would be OK to reorder
 ; them in response to future code changes.
 define void @foo() {
-; CHECK-INT: foo:
-; CHECK-INT: lhi %r2, 1
-; CHECK-INT: lhi %r3, 2
-; CHECK-INT: lhi %r4, 3
-; CHECK-INT: lghi %r5, 4
-; CHECK-INT: la %r6, {{224|240}}(%r15)
+; CHECK-INT-LABEL: foo:
+; CHECK-INT-DAG: lhi %r2, 1
+; CHECK-INT-DAG: lhi %r3, 2
+; CHECK-INT-DAG: lhi %r4, 3
+; CHECK-INT-DAG: lghi %r5, 4
+; CHECK-INT-DAG: la %r6, {{224|240}}(%r15)
 ; CHECK-INT: brasl %r14, bar@PLT
 ;
-; CHECK-FLOAT: foo:
+; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
 ; CHECK-FLOAT: lcebr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
-; CHECK-DOUBLE: foo:
+; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
 ; CHECK-DOUBLE: lcdbr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
-; CHECK-FP128-1: foo:
+; CHECK-FP128-1-LABEL: foo:
 ; CHECK-FP128-1: aghi %r15, -256
 ; CHECK-FP128-1: lzxr %f0
-; CHECK-FP128-1: std %f0, 224(%r15)
-; CHECK-FP128-1: std %f2, 232(%r15)
+; CHECK-FP128-1-DAG: std %f0, 224(%r15)
+; CHECK-FP128-1-DAG: std %f2, 232(%r15)
 ; CHECK-FP128-1: brasl %r14, bar@PLT
 ;
-; CHECK-FP128-2: foo:
+; CHECK-FP128-2-LABEL: foo:
 ; CHECK-FP128-2: aghi %r15, -256
 ; CHECK-FP128-2: lzxr %f0
-; CHECK-FP128-2: std %f0, 240(%r15)
-; CHECK-FP128-2: std %f2, 248(%r15)
+; CHECK-FP128-2-DAG: std %f0, 240(%r15)
+; CHECK-FP128-2-DAG: std %f2, 248(%r15)
 ; CHECK-FP128-2: brasl %r14, bar@PLT
 ;
-; CHECK-STACK: foo:
+; CHECK-STACK-LABEL: foo:
 ; CHECK-STACK: aghi %r15, -256
 ; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
 ; CHECK-STACK: stg [[REGISTER]], 216(%r15)
diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll
index 9ea111c2e021..8686df88e679 100644
--- a/test/CodeGen/SystemZ/args-02.ll
+++ b/test/CodeGen/SystemZ/args-02.ll
@@ -18,43 +18,42 @@ declare void @bar(i8 signext, i16 signext, i32 signext, i64, float, double,
 ; normally use %f0/%f2 as the first available 128-bit pair.  This choice
 ; is hard-coded in the FP128 tests.
 ;
-; The order of the CHECK-INT loads doesn't matter.  The same goes for the
-; CHECK_FP128-* stores and the CHECK-STACK stores.  It would be OK to reorder
+; The order of the CHECK-STACK stores doesn't matter.  It would be OK to reorder
 ; them in response to future code changes.
 define void @foo() {
-; CHECK-INT: foo:
-; CHECK-INT: lghi %r2, -1
-; CHECK-INT: lghi %r3, -2
-; CHECK-INT: lghi %r4, -3
-; CHECK-INT: lghi %r5, -4
-; CHECK-INT: la %r6, {{224|240}}(%r15)
+; CHECK-INT-LABEL: foo:
+; CHECK-INT-DAG: lghi %r2, -1
+; CHECK-INT-DAG: lghi %r3, -2
+; CHECK-INT-DAG: lghi %r4, -3
+; CHECK-INT-DAG: lghi %r5, -4
+; CHECK-INT-DAG: la %r6, {{224|240}}(%r15)
 ; CHECK-INT: brasl %r14, bar@PLT
 ;
-; CHECK-FLOAT: foo:
+; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
 ; CHECK-FLOAT: lcebr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
-; CHECK-DOUBLE: foo:
+; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
 ; CHECK-DOUBLE: lcdbr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
-; CHECK-FP128-1: foo:
+; CHECK-FP128-1-LABEL: foo:
 ; CHECK-FP128-1: aghi %r15, -256
 ; CHECK-FP128-1: lzxr %f0
-; CHECK-FP128-1: std %f0, 224(%r15)
-; CHECK-FP128-1: std %f2, 232(%r15)
+; CHECK-FP128-1-DAG: std %f0, 224(%r15)
+; CHECK-FP128-1-DAG: std %f2, 232(%r15)
 ; CHECK-FP128-1: brasl %r14, bar@PLT
 ;
-; CHECK-FP128-2: foo:
+; CHECK-FP128-2-LABEL: foo:
 ; CHECK-FP128-2: aghi %r15, -256
 ; CHECK-FP128-2: lzxr %f0
-; CHECK-FP128-2: std %f0, 240(%r15)
-; CHECK-FP128-2: std %f2, 248(%r15)
+; CHECK-FP128-2-DAG: std %f0, 240(%r15)
+; CHECK-FP128-2-DAG: std %f2, 248(%r15)
 ; CHECK-FP128-2: brasl %r14, bar@PLT
 ;
-; CHECK-STACK: foo:
+; CHECK-STACK-LABEL: foo:
 ; CHECK-STACK: aghi %r15, -256
 ; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
 ; CHECK-STACK: stg [[REGISTER]], 216(%r15)
diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll
index f954d584fcf4..d7d3ea105df7 100644
--- a/test/CodeGen/SystemZ/args-03.ll
+++ b/test/CodeGen/SystemZ/args-03.ll
@@ -18,43 +18,42 @@ declare void @bar(i8 zeroext, i16 zeroext, i32 zeroext, i64, float, double,
 ; normally use %f0/%f2 as the first available 128-bit pair.  This choice
 ; is hard-coded in the FP128 tests.
 ;
-; The order of the CHECK-INT loads doesn't matter.  The same goes for the
-; CHECK_FP128-* stores and the CHECK-STACK stores.  It would be OK to reorder
+; The order of the CHECK-STACK stores doesn't matter.  It would be OK to reorder
 ; them in response to future code changes.
 define void @foo() {
-; CHECK-INT: foo:
-; CHECK-INT: lghi %r2, 255
-; CHECK-INT: llill %r3, 65534
-; CHECK-INT: llilf %r4, 4294967293
-; CHECK-INT: lghi %r5, -4
-; CHECK-INT: la %r6, {{224|240}}(%r15)
+; CHECK-INT-LABEL: foo:
+; CHECK-INT-DAG: lghi %r2, 255
+; CHECK-INT-DAG: llill %r3, 65534
+; CHECK-INT-DAG: llilf %r4, 4294967293
+; CHECK-INT-DAG: lghi %r5, -4
+; CHECK-INT-DAG: la %r6, {{224|240}}(%r15)
 ; CHECK-INT: brasl %r14, bar@PLT
 ;
-; CHECK-FLOAT: foo:
+; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
 ; CHECK-FLOAT: lcebr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
-; CHECK-DOUBLE: foo:
+; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
 ; CHECK-DOUBLE: lcdbr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
-; CHECK-FP128-1: foo:
+; CHECK-FP128-1-LABEL: foo:
 ; CHECK-FP128-1: aghi %r15, -256
 ; CHECK-FP128-1: lzxr %f0
-; CHECK-FP128-1: std %f0, 224(%r15)
-; CHECK-FP128-1: std %f2, 232(%r15)
+; CHECK-FP128-1-DAG: std %f0, 224(%r15)
+; CHECK-FP128-1-DAG: std %f2, 232(%r15)
 ; CHECK-FP128-1: brasl %r14, bar@PLT
 ;
-; CHECK-FP128-2: foo:
+; CHECK-FP128-2-LABEL: foo:
 ; CHECK-FP128-2: aghi %r15, -256
 ; CHECK-FP128-2: lzxr %f0
-; CHECK-FP128-2: std %f0, 240(%r15)
-; CHECK-FP128-2: std %f2, 248(%r15)
+; CHECK-FP128-2-DAG: std %f0, 240(%r15)
+; CHECK-FP128-2-DAG: std %f2, 248(%r15)
 ; CHECK-FP128-2: brasl %r14, bar@PLT
 ;
-; CHECK-STACK: foo:
+; CHECK-STACK-LABEL: foo:
 ; CHECK-STACK: aghi %r15, -256
 ; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
 ; CHECK-STACK: stg [[REGISTER]], 216(%r15)
diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll
index 8340494ff4dc..1178bb4dafdf 100644
--- a/test/CodeGen/SystemZ/args-04.ll
+++ b/test/CodeGen/SystemZ/args-04.ll
@@ -5,7 +5,7 @@
 
 ; Do some arithmetic so that we can see the register being used.
 define i8 @f1(i8 %r2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ahi %r2, 1
 ; CHECK: br %r14
   %y = add i8 %r2, 1
@@ -13,21 +13,21 @@ define i8 @f1(i8 %r2) {
 }
 
 define i16 @f2(i8 %r2, i16 %r3) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: {{lr|lgr}} %r2, %r3
 ; CHECK: br %r14
   ret i16 %r3
 }
 
 define i32 @f3(i8 %r2, i16 %r3, i32 %r4) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: {{lr|lgr}} %r2, %r4
 ; CHECK: br %r14
   ret i32 %r4
 }
 
 define i64 @f4(i8 %r2, i16 %r3, i32 %r4, i64 %r5) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: {{lr|lgr}} %r2, %r5
 ; CHECK: br %r14
   ret i64 %r5
@@ -35,7 +35,7 @@ define i64 @f4(i8 %r2, i16 %r3, i32 %r4, i64 %r5) {
 
 ; Do some arithmetic so that we can see the register being used.
 define float @f5(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aebr %f0, %f0
 ; CHECK: br %r14
   %y = fadd float %f0, %f0
@@ -43,7 +43,7 @@ define float @f5(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0) {
 }
 
 define double @f6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   ret double %f2
@@ -54,7 +54,7 @@ define double @f6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2) {
 ; be copied.
 define void @f7(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
                 fp128 %r6) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ld %f0, 0(%r6)
 ; CHECK: ld %f2, 8(%r6)
 ; CHECK: axbr %f0, %f0
@@ -68,7 +68,7 @@ define void @f7(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
 
 define i64 @f8(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
                fp128 %r6, i64 %s1) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lg %r2, 160(%r15)
 ; CHECK: br %r14
   ret i64 %s1
@@ -76,7 +76,7 @@ define i64 @f8(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
 
 define float @f9(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
                  fp128 %r6, i64 %s1, float %f4) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ler %f0, %f4
 ; CHECK: br %r14
   ret float %f4
@@ -84,7 +84,7 @@ define float @f9(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
 
 define double @f10(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
                    fp128 %r6, i64 %s1, float %f4, double %f6) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ldr %f0, %f6
 ; CHECK: br %r14
   ret double %f6
@@ -92,7 +92,7 @@ define double @f10(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
 
 define i64 @f11(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
                 fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: lg %r2, 168(%r15)
 ; CHECK: br %r14
   ret i64 %s2
@@ -102,7 +102,7 @@ define i64 @f11(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
 define float @f12(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
                   fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2,
                   float %s3) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: le %f0, 180(%r15)
 ; CHECK: br %r14
   ret float %s3
@@ -112,7 +112,7 @@ define float @f12(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
 define void @f13(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
                  fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2,
                  float %s3, fp128 %s4) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: lg [[REGISTER:%r[1-5]+]], 184(%r15)
 ; CHECK: ld %f0, 0([[REGISTER]])
 ; CHECK: ld %f2, 8([[REGISTER]])
diff --git a/test/CodeGen/SystemZ/args-05.ll b/test/CodeGen/SystemZ/args-05.ll
index 9fa193a68e57..8a6ef4c54ffe 100644
--- a/test/CodeGen/SystemZ/args-05.ll
+++ b/test/CodeGen/SystemZ/args-05.ll
@@ -4,7 +4,7 @@
 
 ; Zero extension of something that is already zero-extended.
 define void @f1(i32 zeroext %r2, i64 *%r3) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: %r2
 ; CHECK: stg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -15,7 +15,7 @@ define void @f1(i32 zeroext %r2, i64 *%r3) {
 
 ; Sign extension of something that is already sign-extended.
 define void @f2(i32 signext %r2, i64 *%r3) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: %r2
 ; CHECK: stg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -26,7 +26,7 @@ define void @f2(i32 signext %r2, i64 *%r3) {
 
 ; Sign extension of something that is already zero-extended.
 define void @f3(i32 zeroext %r2, i64 *%r3) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lgfr [[REGISTER:%r[0-5]+]], %r2
 ; CHECK: stg [[REGISTER]], 0(%r3)
 ; CHECK: br %r14
@@ -37,7 +37,7 @@ define void @f3(i32 zeroext %r2, i64 *%r3) {
 
 ; Zero extension of something that is already sign-extended.
 define void @f4(i32 signext %r2, i64 *%r3) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llgfr [[REGISTER:%r[0-5]+]], %r2
 ; CHECK: stg [[REGISTER]], 0(%r3)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/args-06.ll b/test/CodeGen/SystemZ/args-06.ll
index b2f8bee2c6b5..644fcec982ef 100644
--- a/test/CodeGen/SystemZ/args-06.ll
+++ b/test/CodeGen/SystemZ/args-06.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define i8 @f1(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ar %r2, %r3
 ; CHECK: ar %r2, %r4
 ; CHECK: ar %r2, %r5
@@ -22,13 +22,13 @@ define i8 @f1(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g) {
 }
 
 define i16 @f2(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ar %r2, %r3
 ; CHECK: ar %r2, %r4
 ; CHECK: ar %r2, %r5
 ; CHECK: ar %r2, %r6
-; CHECK: lh {{%r[0-5]}}, 166(%r15)
-; CHECK: lh {{%r[0-5]}}, 174(%r15)
+; CHECK: ah %r2, 166(%r15)
+; CHECK: ah %r2, 174(%r15)
 ; CHECK: br %r14
   %addb = add i16 %a, %b
   %addc = add i16 %addb, %c
@@ -40,7 +40,7 @@ define i16 @f2(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g) {
 }
 
 define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ar %r2, %r3
 ; CHECK: ar %r2, %r4
 ; CHECK: ar %r2, %r5
@@ -58,7 +58,7 @@ define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
 }
 
 define i64 @f4(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agr %r2, %r3
 ; CHECK: agr %r2, %r4
 ; CHECK: agr %r2, %r5
diff --git a/test/CodeGen/SystemZ/asm-01.ll b/test/CodeGen/SystemZ/asm-01.ll
index 016d04c614cb..801378c5fcbd 100644
--- a/test/CodeGen/SystemZ/asm-01.ll
+++ b/test/CodeGen/SystemZ/asm-01.ll
@@ -5,7 +5,7 @@
 
 ; Check the lowest range.
 define void @f1(i64 %base) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
   %addr = inttoptr i64 %base to i64 *
@@ -15,7 +15,7 @@ define void @f1(i64 %base) {
 
 ; Check the next lowest byte.
 define void @f2(i64 %base) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r2, -1
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
@@ -27,7 +27,7 @@ define void @f2(i64 %base) {
 
 ; Check the highest range.
 define void @f3(i64 %base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: blah 4095(%r2)
 ; CHECK: br %r14
   %add = add i64 %base, 4095
@@ -38,7 +38,7 @@ define void @f3(i64 %base) {
 
 ; Check the next highest byte.
 define void @f4(i64 %base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
@@ -50,7 +50,7 @@ define void @f4(i64 %base) {
 
 ; Check that indices aren't allowed
 define void @f5(i64 %base, i64 %index) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agr %r2, %r3
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-02.ll b/test/CodeGen/SystemZ/asm-02.ll
index 12d8bec161ce..ad1e35bb3621 100644
--- a/test/CodeGen/SystemZ/asm-02.ll
+++ b/test/CodeGen/SystemZ/asm-02.ll
@@ -5,7 +5,7 @@
 
 ; Check the lowest range.
 define void @f1(i64 %base) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
   %addr = inttoptr i64 %base to i64 *
@@ -15,7 +15,7 @@ define void @f1(i64 %base) {
 
 ; Check the next lowest byte.
 define void @f2(i64 %base) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r2, -1
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
@@ -27,7 +27,7 @@ define void @f2(i64 %base) {
 
 ; Check the highest range.
 define void @f3(i64 %base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: blah 4095(%r2)
 ; CHECK: br %r14
   %add = add i64 %base, 4095
@@ -38,7 +38,7 @@ define void @f3(i64 %base) {
 
 ; Check the next highest byte.
 define void @f4(i64 %base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-03.ll b/test/CodeGen/SystemZ/asm-03.ll
index a6f3f2a5cb60..fa3e1a7d01d8 100644
--- a/test/CodeGen/SystemZ/asm-03.ll
+++ b/test/CodeGen/SystemZ/asm-03.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1(i64 %base) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
   %addr = inttoptr i64 %base to i64 *
diff --git a/test/CodeGen/SystemZ/asm-04.ll b/test/CodeGen/SystemZ/asm-04.ll
index 0560949eb069..af7ea9fdef94 100644
--- a/test/CodeGen/SystemZ/asm-04.ll
+++ b/test/CodeGen/SystemZ/asm-04.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1(i64 %base) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
   %addr = inttoptr i64 %base to i64 *
diff --git a/test/CodeGen/SystemZ/asm-05.ll b/test/CodeGen/SystemZ/asm-05.ll
index dae90b09eafe..e18cb757b142 100644
--- a/test/CodeGen/SystemZ/asm-05.ll
+++ b/test/CodeGen/SystemZ/asm-05.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1(i64 %base) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: blah 0(%r2)
 ; CHECK: br %r14
   %addr = inttoptr i64 %base to i64 *
diff --git a/test/CodeGen/SystemZ/asm-06.ll b/test/CodeGen/SystemZ/asm-06.ll
index c0e24a366486..f9848a2df6fc 100644
--- a/test/CodeGen/SystemZ/asm-06.ll
+++ b/test/CodeGen/SystemZ/asm-06.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define i64 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi %r1, 1
 ; CHECK: blah %r2 %r1
 ; CHECK: br %r14
@@ -12,7 +12,7 @@ define i64 @f1() {
 }
 
 define i64 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi %r1, 2
 ; CHECK: blah %r2 %r1
 ; CHECK: br %r14
@@ -21,7 +21,7 @@ define i64 @f2() {
 }
 
 define i64 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lhi %r1, 3
 ; CHECK: blah %r2 %r1
 ; CHECK: br %r14
@@ -30,7 +30,7 @@ define i64 @f3() {
 }
 
 define i64 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lghi %r1, 4
 ; CHECK: blah %r2 %r1
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-07.ll b/test/CodeGen/SystemZ/asm-07.ll
index e07286d9a4d6..bf63150cd818 100644
--- a/test/CodeGen/SystemZ/asm-07.ll
+++ b/test/CodeGen/SystemZ/asm-07.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define i64 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi %r0, 1
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
@@ -12,7 +12,7 @@ define i64 @f1() {
 }
 
 define i64 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi %r0, 2
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
@@ -21,7 +21,7 @@ define i64 @f2() {
 }
 
 define i64 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lhi %r0, 3
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
@@ -30,7 +30,7 @@ define i64 @f3() {
 }
 
 define i64 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lghi %r0, 4
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-08.ll b/test/CodeGen/SystemZ/asm-08.ll
index 15abc4d0d2ed..166233752db2 100644
--- a/test/CodeGen/SystemZ/asm-08.ll
+++ b/test/CodeGen/SystemZ/asm-08.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define i64 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi %r0, 1
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
@@ -12,7 +12,7 @@ define i64 @f1() {
 }
 
 define i64 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi %r0, 2
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
@@ -21,7 +21,7 @@ define i64 @f2() {
 }
 
 define i64 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lhi %r0, 3
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
@@ -30,7 +30,7 @@ define i64 @f3() {
 }
 
 define i64 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lghi %r0, 4
 ; CHECK: blah %r2 %r0
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-09.ll b/test/CodeGen/SystemZ/asm-09.ll
index 1541170924b7..5cd7efb94009 100644
--- a/test/CodeGen/SystemZ/asm-09.ll
+++ b/test/CodeGen/SystemZ/asm-09.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1(i32 *%dst) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi %r0, 100
 ; CHECK: blah %r0
 ; CHECK: st %r0, 0(%r2)
@@ -14,7 +14,7 @@ define void @f1(i32 *%dst) {
 }
 
 define void @f2(i32 *%dst) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi %r0, 101
 ; CHECK: blah %r0
 ; CHECK: st %r0, 0(%r2)
@@ -25,7 +25,7 @@ define void @f2(i32 *%dst) {
 }
 
 define void @f3(i32 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lhi %r0, 102
 ; CHECK: blah %r0
 ; CHECK: st %r0, 0(%r2)
@@ -37,7 +37,7 @@ define void @f3(i32 *%dst) {
 
 ; FIXME: this uses "lhi %r0, 103", but should use "lghi %r0, 103".
 define void @f4(i32 *%dst) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: blah %r0
 ; CHECK: st %r0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +47,7 @@ define void @f4(i32 *%dst) {
 }
 
 define i64 @f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lghi %r2, 104
 ; CHECK: blah %r2
 ; CHECK: br %r14
@@ -56,7 +56,7 @@ define i64 @f5() {
 }
 
 define i64 @f6() {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lghi %r2, 105
 ; CHECK: blah %r2
 ; CHECK: br %r14
@@ -65,7 +65,7 @@ define i64 @f6() {
 }
 
 define i64 @f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lghi %r2, 106
 ; CHECK: blah %r2
 ; CHECK: br %r14
@@ -74,7 +74,7 @@ define i64 @f7() {
 }
 
 define i64 @f8() {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lghi %r2, 107
 ; CHECK: blah %r2
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-10.ll b/test/CodeGen/SystemZ/asm-10.ll
index 676c2028b056..0eccc1972187 100644
--- a/test/CodeGen/SystemZ/asm-10.ll
+++ b/test/CodeGen/SystemZ/asm-10.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define float @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lzer %f1
 ; CHECK: blah %f0 %f1
 ; CHECK: br %r14
@@ -12,7 +12,7 @@ define float @f1() {
 }
 
 define double @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lzdr %f1
 ; CHECK: blah %f0 %f1
 ; CHECK: br %r14
@@ -21,7 +21,7 @@ define double @f2() {
 }
 
 define double @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lzxr %f1
 ; CHECK: blah %f0 %f1
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-11.ll b/test/CodeGen/SystemZ/asm-11.ll
index 9bd8d7c33f01..8aeb784134a3 100644
--- a/test/CodeGen/SystemZ/asm-11.ll
+++ b/test/CodeGen/SystemZ/asm-11.ll
@@ -4,7 +4,7 @@
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi [[REG:%r[0-5]]], -1
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
@@ -14,7 +14,7 @@ define i32 @f1() {
 
 ; Test the first valid value.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: blah %r2 0
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 0)
@@ -23,7 +23,7 @@ define i32 @f2() {
 
 ; Test the last valid value.
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: blah %r2 255
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 255)
@@ -32,7 +32,7 @@ define i32 @f3() {
 
 ; Test 1 above the last valid value.
 define i32 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lhi [[REG:%r[0-5]]], 256
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-12.ll b/test/CodeGen/SystemZ/asm-12.ll
index dd920f11fdec..feecbacf09e8 100644
--- a/test/CodeGen/SystemZ/asm-12.ll
+++ b/test/CodeGen/SystemZ/asm-12.ll
@@ -4,7 +4,7 @@
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi [[REG:%r[0-5]]], -1
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
@@ -14,7 +14,7 @@ define i32 @f1() {
 
 ; Test the first valid value.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: blah %r2 0
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 0)
@@ -23,7 +23,7 @@ define i32 @f2() {
 
 ; Test the last valid value.
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: blah %r2 4095
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 4095)
@@ -32,7 +32,7 @@ define i32 @f3() {
 
 ; Test 1 above the last valid value.
 define i32 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lhi [[REG:%r[0-5]]], 4096
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-13.ll b/test/CodeGen/SystemZ/asm-13.ll
index af3fdb361533..b88170079ecc 100644
--- a/test/CodeGen/SystemZ/asm-13.ll
+++ b/test/CodeGen/SystemZ/asm-13.ll
@@ -4,7 +4,7 @@
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: iilf [[REG:%r[0-5]]], 4294934527
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
@@ -14,7 +14,7 @@ define i32 @f1() {
 
 ; Test the first valid value.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: blah %r2 -32768
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 -32768)
@@ -23,7 +23,7 @@ define i32 @f2() {
 
 ; Test the last valid value.
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: blah %r2 32767
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 32767)
@@ -32,7 +32,7 @@ define i32 @f3() {
 
 ; Test 1 above the last valid value.
 define i32 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llill [[REG:%r[0-5]]], 32768
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-14.ll b/test/CodeGen/SystemZ/asm-14.ll
index b6b28d6b32fc..bcd8b1ebc3df 100644
--- a/test/CodeGen/SystemZ/asm-14.ll
+++ b/test/CodeGen/SystemZ/asm-14.ll
@@ -4,7 +4,7 @@
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: iilf [[REG:%r[0-5]]], 4294443007
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
@@ -14,7 +14,7 @@ define i32 @f1() {
 
 ; Test the first valid value.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: blah %r2 -524288
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 -524288)
@@ -23,7 +23,7 @@ define i32 @f2() {
 
 ; Test the last valid value.
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: blah %r2 524287
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 524287)
@@ -32,7 +32,7 @@ define i32 @f3() {
 
 ; Test 1 above the last valid value.
 define i32 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llilh [[REG:%r[0-5]]], 8
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-15.ll b/test/CodeGen/SystemZ/asm-15.ll
index 4d0e2b4c3be3..886ee0e897dc 100644
--- a/test/CodeGen/SystemZ/asm-15.ll
+++ b/test/CodeGen/SystemZ/asm-15.ll
@@ -4,7 +4,7 @@
 
 ; Test 1 below the valid value.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: iilf [[REG:%r[0-5]]], 2147483646
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
@@ -14,7 +14,7 @@ define i32 @f1() {
 
 ; Test the first valid value.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: blah %r2 2147483647
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647)
@@ -23,7 +23,7 @@ define i32 @f2() {
 
 ; Test 1 above the valid value.
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llilh [[REG:%r[0-5]]], 32768
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-16.ll b/test/CodeGen/SystemZ/asm-16.ll
index 4d0e2b4c3be3..886ee0e897dc 100644
--- a/test/CodeGen/SystemZ/asm-16.ll
+++ b/test/CodeGen/SystemZ/asm-16.ll
@@ -4,7 +4,7 @@
 
 ; Test 1 below the valid value.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: iilf [[REG:%r[0-5]]], 2147483646
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
@@ -14,7 +14,7 @@ define i32 @f1() {
 
 ; Test the first valid value.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: blah %r2 2147483647
 ; CHECK: br %r14
   %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647)
@@ -23,7 +23,7 @@ define i32 @f2() {
 
 ; Test 1 above the valid value.
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llilh [[REG:%r[0-5]]], 32768
 ; CHECK: blah %r2 [[REG]]
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/asm-17.ll b/test/CodeGen/SystemZ/asm-17.ll
new file mode 100644
index 000000000000..7bc9da32ea95
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-17.ll
@@ -0,0 +1,105 @@
+; Test explicit register names.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i32 GPRs.
+define i32 @f1() {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r4, 1
+; CHECK: blah %r4
+; CHECK: lr %r2, %r4
+; CHECK: br %r14
+  %ret = call i32 asm "blah $0", "={r4},0" (i32 1)
+  ret i32 %ret
+}
+
+; Test i64 GPRs.
+define i64 @f2() {
+; CHECK-LABEL: f2:
+; CHECK: lghi %r4, 1
+; CHECK: blah %r4
+; CHECK: lgr %r2, %r4
+; CHECK: br %r14
+  %ret = call i64 asm "blah $0", "={r4},0" (i64 1)
+  ret i64 %ret
+}
+
+; Test i32 FPRs.
+define float @f3() {
+; CHECK-LABEL: f3:
+; CHECK: lzer %f4
+; CHECK: blah %f4
+; CHECK: ler %f0, %f4
+; CHECK: br %r14
+  %ret = call float asm "blah $0", "={f4},0" (float 0.0)
+  ret float %ret
+}
+
+; Test i64 FPRs.
+define double @f4() {
+; CHECK-LABEL: f4:
+; CHECK: lzdr %f4
+; CHECK: blah %f4
+; CHECK: ldr %f0, %f4
+; CHECK: br %r14
+  %ret = call double asm "blah $0", "={f4},0" (double 0.0)
+  ret double %ret
+}
+
+; Test i128 FPRs.
+define void @f5(fp128 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: lzxr %f4
+; CHECK: blah %f4
+; CHECK-DAG: std %f4, 0(%r2)
+; CHECK-DAG: std %f6, 8(%r2)
+; CHECK: br %r14
+  %ret = call fp128 asm "blah $0", "={f4},0" (fp128 0xL00000000000000000000000000000000)
+  store fp128 %ret, fp128 *%dest
+  ret void
+}
+
+; Test clobbers of GPRs and CC.
+define i32 @f6(i32 %in) {
+; CHECK-LABEL: f6:
+; CHECK: lr [[REG:%r[01345]]], %r2
+; CHECK: blah
+; CHECK: lr %r2, [[REG]]
+; CHECK: br %r14
+  call void asm sideeffect "blah", "~{r2},~{cc}"()
+  ret i32 %in
+}
+
+; Test clobbers of FPRs and CC.
+define float @f7(float %in) {
+; CHECK-LABEL: f7:
+; CHECK: ler [[REG:%f[1-7]]], %f0
+; CHECK: blah
+; CHECK: ler %f0, [[REG]]
+; CHECK: br %r14
+  call void asm sideeffect "blah", "~{f0},~{cc}"()
+  ret float %in
+}
+
+; Test that both registers in a GR128 pair get hoisted.
+define void @f8(i32 %count) {
+; CHECK-LABEL: f8
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lhi %r1, 1
+; CHECK: %loop
+; CHECK-NOT: %r
+; CHECK: blah %r0, %r1
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %this = phi i32 [ %count, %entry ], [ %next, %loop ]
+  call void asm sideeffect "blah $0, $1", "{r0},{r1}" (i32 0, i32 1)
+  %next = sub i32 %this, 1
+  %cmp = icmp ne i32 %next, 0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll
new file mode 100644
index 000000000000..d60654b7863d
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-18.ll
@@ -0,0 +1,745 @@
+; Test high-word operations, using "h" constraints to force a high
+; register and "r" constraints to force a low register.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test loads and stores involving mixtures of high and low registers.
+define void @f1(i32 *%ptr1, i32 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lfh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: l [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: lfh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: ly [[REG4:%r[0-5]]], 524284(%r3)
+; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK-DAG: stfh [[REG1]], 0(%r2)
+; CHECK-DAG: st [[REG2]], 0(%r3)
+; CHECK-DAG: stfh [[REG3]], 4096(%r2)
+; CHECK-DAG: sty [[REG4]], 524284(%r3)
+; CHECK: br %r14
+  %ptr3 = getelementptr i32 *%ptr1, i64 1024
+  %ptr4 = getelementptr i32 *%ptr2, i64 131071
+  %old1 = load i32 *%ptr1
+  %old2 = load i32 *%ptr2
+  %old3 = load i32 *%ptr3
+  %old4 = load i32 *%ptr4
+  %res = call { i32, i32, i32, i32 } asm "blah $0, $1, $2, $3",
+              "=h,=r,=h,=r,0,1,2,3"(i32 %old1, i32 %old2, i32 %old3, i32 %old4)
+  %new1 = extractvalue { i32, i32, i32, i32 } %res, 0
+  %new2 = extractvalue { i32, i32, i32, i32 } %res, 1
+  %new3 = extractvalue { i32, i32, i32, i32 } %res, 2
+  %new4 = extractvalue { i32, i32, i32, i32 } %res, 3
+  store i32 %new1, i32 *%ptr1
+  store i32 %new2, i32 *%ptr2
+  store i32 %new3, i32 *%ptr3
+  store i32 %new4, i32 *%ptr4
+  ret void
+}
+
+; Test moves involving mixtures of high and low registers.
+define i32 @f2(i32 %old) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 0, 159, 32
+; CHECK-DAG: lr %r3, %r2
+; CHECK: stepa [[REG1]], %r2, %r3
+; CHECK: risbhg {{%r[0-5]}}, [[REG1]], 0, 159, 0
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: risblg %r2, [[REG2]], 0, 159, 32
+; CHECK: br %r14
+  %tmp = call i32 asm "stepa $1, $2, $3",
+                      "=h,0,{r2},{r3}"(i32 %old, i32 %old, i32 %old)
+  %new = call i32 asm "stepb $1, $2", "=&h,0,h"(i32 %tmp, i32 %tmp)
+  ret i32 %new
+}
+
+; Test sign-extending 8-bit loads into mixtures of high and low registers.
+define void @f3(i8 *%ptr1, i8 *%ptr2) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: lbh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: lb [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: lbh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: lb [[REG4:%r[0-5]]], 524287(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i8 *%ptr1, i64 4096
+  %ptr4 = getelementptr i8 *%ptr2, i64 524287
+  %val1 = load i8 *%ptr1
+  %val2 = load i8 *%ptr2
+  %val3 = load i8 *%ptr3
+  %val4 = load i8 *%ptr4
+  %ext1 = sext i8 %val1 to i32
+  %ext2 = sext i8 %val2 to i32
+  %ext3 = sext i8 %val3 to i32
+  %ext4 = sext i8 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test sign-extending 16-bit loads into mixtures of high and low registers.
+define void @f4(i16 *%ptr1, i16 *%ptr2) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: lhh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: lh [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: lhh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: lhy [[REG4:%r[0-5]]], 524286(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i16 *%ptr1, i64 2048
+  %ptr4 = getelementptr i16 *%ptr2, i64 262143
+  %val1 = load i16 *%ptr1
+  %val2 = load i16 *%ptr2
+  %val3 = load i16 *%ptr3
+  %val4 = load i16 *%ptr4
+  %ext1 = sext i16 %val1 to i32
+  %ext2 = sext i16 %val2 to i32
+  %ext3 = sext i16 %val3 to i32
+  %ext4 = sext i16 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test zero-extending 8-bit loads into mixtures of high and low registers.
+define void @f5(i8 *%ptr1, i8 *%ptr2) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: llch [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: llc [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: llch [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: llc [[REG4:%r[0-5]]], 524287(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i8 *%ptr1, i64 4096
+  %ptr4 = getelementptr i8 *%ptr2, i64 524287
+  %val1 = load i8 *%ptr1
+  %val2 = load i8 *%ptr2
+  %val3 = load i8 *%ptr3
+  %val4 = load i8 *%ptr4
+  %ext1 = zext i8 %val1 to i32
+  %ext2 = zext i8 %val2 to i32
+  %ext3 = zext i8 %val3 to i32
+  %ext4 = zext i8 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test zero-extending 16-bit loads into mixtures of high and low registers.
+define void @f6(i16 *%ptr1, i16 *%ptr2) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: llhh [[REG1:%r[0-5]]], 0(%r2)
+; CHECK-DAG: llh [[REG2:%r[0-5]]], 0(%r3)
+; CHECK-DAG: llhh [[REG3:%r[0-5]]], 4096(%r2)
+; CHECK-DAG: llh [[REG4:%r[0-5]]], 524286(%r3)
+; CHECK: blah [[REG1]], [[REG2]]
+; CHECK: br %r14
+  %ptr3 = getelementptr i16 *%ptr1, i64 2048
+  %ptr4 = getelementptr i16 *%ptr2, i64 262143
+  %val1 = load i16 *%ptr1
+  %val2 = load i16 *%ptr2
+  %val3 = load i16 *%ptr3
+  %val4 = load i16 *%ptr4
+  %ext1 = zext i16 %val1 to i32
+  %ext2 = zext i16 %val2 to i32
+  %ext3 = zext i16 %val3 to i32
+  %ext4 = zext i16 %val4 to i32
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 %ext1, i32 %ext2, i32 %ext3, i32 %ext4)
+  ret void
+}
+
+; Test truncating stores of high and low registers into 8-bit memory.
+define void @f7(i8 *%ptr1, i8 *%ptr2) {
+; CHECK-LABEL: f7:
+; CHECK: blah [[REG1:%r[0-5]]], [[REG2:%r[0-5]]]
+; CHECK-DAG: stch [[REG1]], 0(%r2)
+; CHECK-DAG: stc [[REG2]], 0(%r3)
+; CHECK-DAG: stch [[REG1]], 4096(%r2)
+; CHECK-DAG: stcy [[REG2]], 524287(%r3)
+; CHECK: br %r14
+  %res = call { i32, i32 } asm "blah $0, $1", "=h,=r"()
+  %res1 = extractvalue { i32, i32 } %res, 0
+  %res2 = extractvalue { i32, i32 } %res, 1
+  %trunc1 = trunc i32 %res1 to i8
+  %trunc2 = trunc i32 %res2 to i8
+  %ptr3 = getelementptr i8 *%ptr1, i64 4096
+  %ptr4 = getelementptr i8 *%ptr2, i64 524287
+  store i8 %trunc1, i8 *%ptr1
+  store i8 %trunc2, i8 *%ptr2
+  store i8 %trunc1, i8 *%ptr3
+  store i8 %trunc2, i8 *%ptr4
+  ret void
+}
+
+; Test truncating stores of high and low registers into 16-bit memory.
+define void @f8(i16 *%ptr1, i16 *%ptr2) {
+; CHECK-LABEL: f8:
+; CHECK: blah [[REG1:%r[0-5]]], [[REG2:%r[0-5]]]
+; CHECK-DAG: sthh [[REG1]], 0(%r2)
+; CHECK-DAG: sth [[REG2]], 0(%r3)
+; CHECK-DAG: sthh [[REG1]], 4096(%r2)
+; CHECK-DAG: sthy [[REG2]], 524286(%r3)
+; CHECK: br %r14
+  %res = call { i32, i32 } asm "blah $0, $1", "=h,=r"()
+  %res1 = extractvalue { i32, i32 } %res, 0
+  %res2 = extractvalue { i32, i32 } %res, 1
+  %trunc1 = trunc i32 %res1 to i16
+  %trunc2 = trunc i32 %res2 to i16
+  %ptr3 = getelementptr i16 *%ptr1, i64 2048
+  %ptr4 = getelementptr i16 *%ptr2, i64 262143
+  store i16 %trunc1, i16 *%ptr1
+  store i16 %trunc2, i16 *%ptr2
+  store i16 %trunc1, i16 *%ptr3
+  store i16 %trunc2, i16 *%ptr4
+  ret void
+}
+
+; Test zero extensions from 8 bits between mixtures of high and low registers.
+define i32 @f9(i8 %val1, i8 %val2) {
+; CHECK-LABEL: f9:
+; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 24, 159, 32
+; CHECK-DAG: llcr [[REG2:%r[0-5]]], %r3
+; CHECK: stepa [[REG1]], [[REG2]]
+; CHECK: risbhg [[REG3:%r[0-5]]], [[REG1]], 24, 159, 0
+; CHECK: stepb [[REG3]]
+; CHECK: risblg %r2, [[REG3]], 24, 159, 32
+; CHECK: br %r14
+  %ext1 = zext i8 %val1 to i32
+  %ext2 = zext i8 %val2 to i32
+  %val3 = call i8 asm sideeffect "stepa $0, $1", "=h,0,r"(i32 %ext1, i32 %ext2)
+  %ext3 = zext i8 %val3 to i32
+  %val4 = call i8 asm sideeffect "stepb $0", "=h,0"(i32 %ext3)
+  %ext4 = zext i8 %val4 to i32
+  ret i32 %ext4
+}
+
+; Test zero extensions from 16 bits between mixtures of high and low registers.
+define i32 @f10(i16 %val1, i16 %val2) {
+; CHECK-LABEL: f10:
+; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 16, 159, 32
+; CHECK-DAG: llhr [[REG2:%r[0-5]]], %r3
+; CHECK: stepa [[REG1]], [[REG2]]
+; CHECK: risbhg [[REG3:%r[0-5]]], [[REG1]], 16, 159, 0
+; CHECK: stepb [[REG3]]
+; CHECK: risblg %r2, [[REG3]], 16, 159, 32
+; CHECK: br %r14
+  %ext1 = zext i16 %val1 to i32
+  %ext2 = zext i16 %val2 to i32
+  %val3 = call i16 asm sideeffect "stepa $0, $1", "=h,0,r"(i32 %ext1, i32 %ext2)
+  %ext3 = zext i16 %val3 to i32
+  %val4 = call i16 asm sideeffect "stepb $0", "=h,0"(i32 %ext3)
+  %ext4 = zext i16 %val4 to i32
+  ret i32 %ext4
+}
+
+; Test loads of 16-bit constants into mixtures of high and low registers.
+define void @f11() {
+; CHECK-LABEL: f11:
+; CHECK-DAG: iihf [[REG1:%r[0-5]]], 4294934529
+; CHECK-DAG: lhi [[REG2:%r[0-5]]], -32768
+; CHECK-DAG: llihl [[REG3:%r[0-5]]], 32766
+; CHECK-DAG: lhi [[REG4:%r[0-5]]], 32767
+; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK: br %r14
+  call void asm sideeffect "blah $0, $1, $2, $3",
+                           "h,r,h,r"(i32 -32767, i32 -32768,
+                                     i32 32766, i32 32767)
+  ret void
+}
+
+; Test loads of unsigned constants into mixtures of high and low registers.
+; For stepc, we expect the h and r operands to be paired by the register
+; allocator.  It doesn't really matter which comes first: LLILL/IIHF would
+; be just as good.
+define void @f12() {
+; CHECK-LABEL: f12:
+; CHECK-DAG: llihl [[REG1:%r[0-5]]], 32768
+; CHECK-DAG: llihl [[REG2:%r[0-5]]], 65535
+; CHECK-DAG: llihh [[REG3:%r[0-5]]], 1
+; CHECK-DAG: llihh [[REG4:%r[0-5]]], 65535
+; CHECK: stepa [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK-DAG: llill [[REG1:%r[0-5]]], 32769
+; CHECK-DAG: llill [[REG2:%r[0-5]]], 65534
+; CHECK-DAG: llilh [[REG3:%r[0-5]]], 2
+; CHECK-DAG: llilh [[REG4:%r[0-5]]], 65534
+; CHECK: stepb [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK-DAG: llihl [[REG1:%r[0-5]]], 32770
+; CHECK-DAG: iilf [[REG1]], 65533
+; CHECK-DAG: llihh [[REG2:%r[0-5]]], 4
+; CHECK-DAG: iilf [[REG2]], 524288
+; CHECK: stepc [[REG1]], [[REG1]], [[REG2]], [[REG2]]
+; CHECK-DAG: iihf [[REG1:%r[0-5]]], 3294967296
+; CHECK-DAG: iilf [[REG2:%r[0-5]]], 4294567296
+; CHECK-DAG: iihf [[REG3:%r[0-5]]], 1000000000
+; CHECK-DAG: iilf [[REG4:%r[0-5]]], 400000
+; CHECK: stepd [[REG1]], [[REG2]], [[REG3]], [[REG4]]
+; CHECK: br %r14
+  call void asm sideeffect "stepa $0, $1, $2, $3",
+                           "h,h,h,h"(i32 32768, i32 65535,
+                                     i32 65536, i32 -65536)
+  call void asm sideeffect "stepb $0, $1, $2, $3",
+                           "r,r,r,r"(i32 32769, i32 65534,
+                                     i32 131072, i32 -131072)
+  call void asm sideeffect "stepc $0, $1, $2, $3",
+                           "h,r,h,r"(i32 32770, i32 65533,
+                                     i32 262144, i32 524288)
+  call void asm sideeffect "stepd $0, $1, $2, $3",
+                           "h,r,h,r"(i32 -1000000000, i32 -400000,
+                                     i32 1000000000, i32 400000)
+  ret void
+}
+
+; Test selects involving high registers.
+define void @f13(i32 %x, i32 %y) {
+; CHECK-LABEL: f13:
+; CHECK: llihl [[REG:%r[0-5]]], 0
+; CHECK: cije %r2, 0
+; CHECK: iihf [[REG]], 2102030405
+; CHECK: blah [[REG]]
+; CHECK: br %r14
+  %cmp = icmp eq i32 %x, 0
+  %val = select i1 %cmp, i32 0, i32 2102030405
+  call void asm sideeffect "blah $0", "h"(i32 %val)
+  ret void
+}
+
+; Test selects involving low registers.
+define void @f14(i32 %x, i32 %y) {
+; CHECK-LABEL: f14:
+; CHECK: lhi [[REG:%r[0-5]]], 0
+; CHECK: cije %r2, 0
+; CHECK: iilf [[REG]], 2102030405
+; CHECK: blah [[REG]]
+; CHECK: br %r14
+  %cmp = icmp eq i32 %x, 0
+  %val = select i1 %cmp, i32 0, i32 2102030405
+  call void asm sideeffect "blah $0", "r"(i32 %val)
+  ret void
+}
+
+; Test immediate insertion involving high registers.
+define void @f15() {
+; CHECK-LABEL: f15:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: iihh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: iihl [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %and1 = and i32 %res1, 65535
+  %or1 = or i32 %and1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %or1)
+  %and2 = and i32 %res2, -65536
+  %or2 = or i32 %and2, 34661
+  call void asm sideeffect "stepc $0", "h"(i32 %or2)
+  ret void
+}
+
+; Test immediate insertion involving low registers.
+define void @f16() {
+; CHECK-LABEL: f16:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: iilh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: iill [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %and1 = and i32 %res1, 65535
+  %or1 = or i32 %and1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %or1)
+  %and2 = and i32 %res2, -65536
+  %or2 = or i32 %and2, 34661
+  call void asm sideeffect "stepc $0", "r"(i32 %or2)
+  ret void
+}
+
+; Test immediate OR involving high registers.
+define void @f17() {
+; CHECK-LABEL: f17:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: oihh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: oihl [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: oihf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %or1 = or i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %or1)
+  %or2 = or i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %or2)
+  %or3 = or i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "h"(i32 %or3)
+  ret void
+}
+
+; Test immediate OR involving low registers.
+define void @f18() {
+; CHECK-LABEL: f18:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: oilh [[REG]], 4660
+; CHECK: stepb [[REG]]
+; CHECK: oill [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: oilf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %or1 = or i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %or1)
+  %or2 = or i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %or2)
+  %or3 = or i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "r"(i32 %or3)
+  ret void
+}
+
+; Test immediate XOR involving high registers.
+define void @f19() {
+; CHECK-LABEL: f19:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: xihf [[REG]], 305397760
+; CHECK: stepb [[REG]]
+; CHECK: xihf [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: xihf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %xor1 = xor i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %xor1)
+  %xor2 = xor i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %xor2)
+  %xor3 = xor i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "h"(i32 %xor3)
+  ret void
+}
+
+; Test immediate XOR involving low registers.
+define void @f20() {
+; CHECK-LABEL: f20:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: xilf [[REG]], 305397760
+; CHECK: stepb [[REG]]
+; CHECK: xilf [[REG]], 34661
+; CHECK: stepc [[REG]]
+; CHECK: xilf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %xor1 = xor i32 %res1, 305397760
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %xor1)
+  %xor2 = xor i32 %res2, 34661
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %xor2)
+  %xor3 = xor i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "r"(i32 %xor3)
+  ret void
+}
+
+; Test two-operand immediate AND involving high registers.
+define void @f21() {
+; CHECK-LABEL: f21:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: nihh [[REG]], 4096
+; CHECK: stepb [[REG]]
+; CHECK: nihl [[REG]], 57536
+; CHECK: stepc [[REG]]
+; CHECK: nihf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %and1 = and i32 %res1, 268500991
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %and1)
+  %and2 = and i32 %res2, -8000
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %and2)
+  %and3 = and i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "h"(i32 %and3)
+  ret void
+}
+
+; Test two-operand immediate AND involving low registers.
+define void @f22() {
+; CHECK-LABEL: f22:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: nilh [[REG]], 4096
+; CHECK: stepb [[REG]]
+; CHECK: nill [[REG]], 57536
+; CHECK: stepc [[REG]]
+; CHECK: nilf [[REG]], 12345678
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %and1 = and i32 %res1, 268500991
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %and1)
+  %and2 = and i32 %res2, -8000
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %and2)
+  %and3 = and i32 %res3, 12345678
+  call void asm sideeffect "stepd $0", "r"(i32 %and3)
+  ret void
+}
+
+; Test three-operand immediate AND involving mixtures of low and high registers.
+define i32 @f23(i32 %old) {
+; CHECK-LABEL: f23:
+; CHECK-DAG: risblg [[REG1:%r[0-5]]], %r2, 28, 158, 0
+; CHECK-DAG: risbhg [[REG2:%r[0-5]]], %r2, 24, 158, 32
+; CHECK: stepa %r2, [[REG1]], [[REG2]]
+; CHECK-DAG: risbhg [[REG3:%r[0-5]]], [[REG2]], 25, 159, 0
+; CHECK-DAG: risblg %r2, [[REG2]], 24, 152, 32
+; CHECK: stepb [[REG2]], [[REG3]], %r2
+; CHECK: br %r14
+  %and1 = and i32 %old, 14
+  %and2 = and i32 %old, 254
+  %res1 = call i32 asm "stepa $1, $2, $3",
+                       "=h,r,r,0"(i32 %old, i32 %and1, i32 %and2)
+  %and3 = and i32 %res1, 127
+  %and4 = and i32 %res1, 128
+  %res2 = call i32 asm "stepb $1, $2, $3",
+                       "=r,h,h,0"(i32 %res1, i32 %and3, i32 %and4)
+  ret i32 %res2
+}
+
+; Test RISB[LH]G insertions involving mixtures of high and low registers.
+define i32 @f24(i32 %old) {
+; CHECK-LABEL: f24:
+; CHECK-DAG: risblg [[REG1:%r[0-5]]], %r2, 28, 158, 1
+; CHECK-DAG: risbhg [[REG2:%r[0-5]]], %r2, 24, 158, 29
+; CHECK: stepa %r2, [[REG1]], [[REG2]]
+; CHECK-DAG: risbhg [[REG3:%r[0-5]]], [[REG2]], 25, 159, 62
+; CHECK-DAG: risblg %r2, [[REG2]], 24, 152, 37
+; CHECK: stepb [[REG2]], [[REG3]], %r2
+; CHECK: br %r14
+  %shift1 = shl i32 %old, 1
+  %and1 = and i32 %shift1, 14
+  %shift2 = lshr i32 %old, 3
+  %and2 = and i32 %shift2, 254
+  %res1 = call i32 asm "stepa $1, $2, $3",
+                       "=h,r,r,0"(i32 %old, i32 %and1, i32 %and2)
+  %shift3 = lshr i32 %res1, 2
+  %and3 = and i32 %shift3, 127
+  %shift4 = shl i32 %res1, 5
+  %and4 = and i32 %shift4, 128
+  %res2 = call i32 asm "stepb $1, $2, $3",
+                       "=r,h,h,0"(i32 %res1, i32 %and3, i32 %and4)
+  ret i32 %res2
+}
+
+; Test TMxx involving mixtures of high and low registers.
+define i32 @f25(i32 %old) {
+; CHECK-LABEL: f25:
+; CHECK-DAG: tmll %r2, 1
+; CHECK-DAG: tmlh %r2, 1
+; CHECK: stepa [[REG1:%r[0-5]]],
+; CHECK-DAG: tmhl [[REG1]], 1
+; CHECK-DAG: tmhh [[REG1]], 1
+; CHECK: stepb %r2,
+; CHECK: br %r14
+  %and1 = and i32 %old, 1
+  %and2 = and i32 %old, 65536
+  %cmp1 = icmp eq i32 %and1, 0
+  %cmp2 = icmp eq i32 %and2, 0
+  %sel1 = select i1 %cmp1, i32 100, i32 200
+  %sel2 = select i1 %cmp2, i32 100, i32 200
+  %res1 = call i32 asm "stepa $0, $1, $2",
+                       "=h,r,r"(i32 %sel1, i32 %sel2)
+  %and3 = and i32 %res1, 1
+  %and4 = and i32 %res1, 65536
+  %cmp3 = icmp eq i32 %and3, 0
+  %cmp4 = icmp eq i32 %and4, 0
+  %sel3 = select i1 %cmp3, i32 100, i32 200
+  %sel4 = select i1 %cmp4, i32 100, i32 200
+  %res2 = call i32 asm "stepb $0, $1, $2",
+                       "=r,h,h"(i32 %sel3, i32 %sel4)
+  ret i32 %res2
+}
+
+; Test two-operand halfword immediate addition involving high registers.
+define void @f26() {
+; CHECK-LABEL: f26:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: aih [[REG]], -32768
+; CHECK: stepb [[REG]]
+; CHECK: aih [[REG]], 1
+; CHECK: stepc [[REG]]
+; CHECK: aih [[REG]], 32767
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %add1 = add i32 %res1, -32768
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %add1)
+  %add2 = add i32 %res2, 1
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %add2)
+  %add3 = add i32 %res3, 32767
+  call void asm sideeffect "stepd $0", "h"(i32 %add3)
+  ret void
+}
+
+; Test two-operand halfword immediate addition involving low registers.
+define void @f27() {
+; CHECK-LABEL: f27:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: ahi [[REG]], -32768
+; CHECK: stepb [[REG]]
+; CHECK: ahi [[REG]], 1
+; CHECK: stepc [[REG]]
+; CHECK: ahi [[REG]], 32767
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %add1 = add i32 %res1, -32768
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %add1)
+  %add2 = add i32 %res2, 1
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %add2)
+  %add3 = add i32 %res3, 32767
+  call void asm sideeffect "stepd $0", "r"(i32 %add3)
+  ret void
+}
+
+; Test three-operand halfword immediate addition involving mixtures of low
+; and high registers.  RISBHG/AIH would be OK too, instead of AHIK/RISBHG.
+define i32 @f28(i32 %old) {
+; CHECK-LABEL: f28:
+; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14
+; CHECK: stepa %r2, [[REG1]]
+; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254
+; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32
+; CHECK: stepb [[REG1]], [[REG2]]
+; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0
+; CHECK: aih [[REG3]], 127
+; CHECK: stepc [[REG2]], [[REG3]]
+; CHECK: risblg %r2, [[REG3]], 0, 159, 32
+; CHECK: ahi %r2, 128
+; CHECK: stepd [[REG3]], %r2
+; CHECK: br %r14
+  %add1 = add i32 %old, 14
+  %res1 = call i32 asm "stepa $1, $2",
+                       "=r,r,0"(i32 %old, i32 %add1)
+  %add2 = add i32 %res1, 254
+  %res2 = call i32 asm "stepb $1, $2",
+                       "=h,r,0"(i32 %res1, i32 %add2)
+  %add3 = add i32 %res2, 127
+  %res3 = call i32 asm "stepc $1, $2",
+                       "=h,h,0"(i32 %res2, i32 %add3)
+  %add4 = add i32 %res3, 128
+  %res4 = call i32 asm "stepd $1, $2",
+                       "=r,h,0"(i32 %res3, i32 %add4)
+  ret i32 %res4
+}
+
+; Test large immediate addition involving high registers.
+define void @f29() {
+; CHECK-LABEL: f29:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: aih [[REG]], -32769
+; CHECK: stepb [[REG]]
+; CHECK: aih [[REG]], 32768
+; CHECK: stepc [[REG]]
+; CHECK: aih [[REG]], 1000000000
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %add1 = add i32 %res1, -32769
+  %res2 = call i32 asm "stepb $0, $1", "=h,h"(i32 %add1)
+  %add2 = add i32 %res2, 32768
+  %res3 = call i32 asm "stepc $0, $1", "=h,h"(i32 %add2)
+  %add3 = add i32 %res3, 1000000000
+  call void asm sideeffect "stepd $0", "h"(i32 %add3)
+  ret void
+}
+
+; Test large immediate addition involving low registers.
+define void @f30() {
+; CHECK-LABEL: f30:
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], -32769
+; CHECK: stepb [[REG]]
+; CHECK: afi [[REG]], 32768
+; CHECK: stepc [[REG]]
+; CHECK: afi [[REG]], 1000000000
+; CHECK: stepd [[REG]]
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %add1 = add i32 %res1, -32769
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %add1)
+  %add2 = add i32 %res2, 32768
+  %res3 = call i32 asm "stepc $0, $1", "=r,r"(i32 %add2)
+  %add3 = add i32 %res3, 1000000000
+  call void asm sideeffect "stepd $0", "r"(i32 %add3)
+  ret void
+}
+
+; Test large immediate comparison involving high registers.
+define i32 @f31() {
+; CHECK-LABEL: f31:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: cih [[REG1]], 1000000000
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: clih [[REG2]], 1000000000
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %cmp1 = icmp sle i32 %res1, 1000000000
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1)
+  %cmp2 = icmp ule i32 %res2, 1000000000
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  ret i32 %sel2
+}
+
+; Test large immediate comparison involving low registers.
+define i32 @f32() {
+; CHECK-LABEL: f32:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: cfi [[REG1]], 1000000000
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: clfi [[REG2]], 1000000000
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %cmp1 = icmp sle i32 %res1, 1000000000
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1)
+  %cmp2 = icmp ule i32 %res2, 1000000000
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  ret i32 %sel2
+}
+
+; Test memory comparison involving high registers.
+define void @f33(i32 *%ptr1, i32 *%ptr2) {
+; CHECK-LABEL: f33:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: chf [[REG1]], 0(%r2)
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: clhf [[REG2]], 0(%r3)
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=h"()
+  %load1 = load i32 *%ptr1
+  %cmp1 = icmp sle i32 %res1, %load1
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1)
+  %load2 = load i32 *%ptr2
+  %cmp2 = icmp ule i32 %res2, %load2
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  store i32 %sel2, i32 *%ptr1
+  ret void
+}
+
+; Test memory comparison involving low registers.
+define void @f34(i32 *%ptr1, i32 *%ptr2) {
+; CHECK-LABEL: f34:
+; CHECK: stepa [[REG1:%r[0-5]]]
+; CHECK: c [[REG1]], 0(%r2)
+; CHECK: stepb [[REG2:%r[0-5]]]
+; CHECK: cl [[REG2]], 0(%r3)
+; CHECK: br %r14
+  %res1 = call i32 asm "stepa $0", "=r"()
+  %load1 = load i32 *%ptr1
+  %cmp1 = icmp sle i32 %res1, %load1
+  %sel1 = select i1 %cmp1, i32 0, i32 1
+  %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1)
+  %load2 = load i32 *%ptr2
+  %cmp2 = icmp ule i32 %res2, %load2
+  %sel2 = select i1 %cmp2, i32 0, i32 1
+  store i32 %sel2, i32 *%ptr1
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/atomic-load-01.ll b/test/CodeGen/SystemZ/atomic-load-01.ll
index 3e86bcf78ae6..a5bc8833e78a 100644
--- a/test/CodeGen/SystemZ/atomic-load-01.ll
+++ b/test/CodeGen/SystemZ/atomic-load-01.ll
@@ -5,7 +5,7 @@
 ; This is just a placeholder to make sure that loads are handled.
 ; The CS-based sequence is probably far too conservative.
 define i8 @f1(i8 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cs
 ; CHECK: br %r14
   %val = load atomic i8 *%src seq_cst, align 1
diff --git a/test/CodeGen/SystemZ/atomic-load-02.ll b/test/CodeGen/SystemZ/atomic-load-02.ll
index d6168cedb8a8..2c9bbdb488a1 100644
--- a/test/CodeGen/SystemZ/atomic-load-02.ll
+++ b/test/CodeGen/SystemZ/atomic-load-02.ll
@@ -5,7 +5,7 @@
 ; This is just a placeholder to make sure that loads are handled.
 ; The CS-based sequence is probably far too conservative.
 define i16 @f1(i16 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cs
 ; CHECK: br %r14
   %val = load atomic i16 *%src seq_cst, align 2
diff --git a/test/CodeGen/SystemZ/atomic-load-03.ll b/test/CodeGen/SystemZ/atomic-load-03.ll
index fcf0cf3d5a90..1fb41f5e39aa 100644
--- a/test/CodeGen/SystemZ/atomic-load-03.ll
+++ b/test/CodeGen/SystemZ/atomic-load-03.ll
@@ -5,7 +5,7 @@
 ; This is just a placeholder to make sure that loads are handled.
 ; Using CS is probably too conservative.
 define i32 @f1(i32 %dummy, i32 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi %r2, 0
 ; CHECK: cs %r2, %r2, 0(%r3)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/atomic-load-04.ll b/test/CodeGen/SystemZ/atomic-load-04.ll
index 9593d35fef0c..92cac406e200 100644
--- a/test/CodeGen/SystemZ/atomic-load-04.ll
+++ b/test/CodeGen/SystemZ/atomic-load-04.ll
@@ -5,7 +5,7 @@
 ; This is just a placeholder to make sure that loads are handled.
 ; Using CSG is probably too conservative.
 define i64 @f1(i64 %dummy, i64 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lghi %r2, 0
 ; CHECK: csg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/atomic-store-01.ll b/test/CodeGen/SystemZ/atomic-store-01.ll
index b316e5cd6309..53ed24f623cf 100644
--- a/test/CodeGen/SystemZ/atomic-store-01.ll
+++ b/test/CodeGen/SystemZ/atomic-store-01.ll
@@ -5,7 +5,7 @@
 ; This is just a placeholder to make sure that stores are handled.
 ; The CS-based sequence is probably far too conservative.
 define void @f1(i8 %val, i8 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cs
 ; CHECK: br %r14
   store atomic i8 %val, i8 *%src seq_cst, align 1
diff --git a/test/CodeGen/SystemZ/atomic-store-02.ll b/test/CodeGen/SystemZ/atomic-store-02.ll
index c76171431883..42d6695b51d9 100644
--- a/test/CodeGen/SystemZ/atomic-store-02.ll
+++ b/test/CodeGen/SystemZ/atomic-store-02.ll
@@ -5,7 +5,7 @@
 ; This is just a placeholder to make sure that stores are handled.
 ; The CS-based sequence is probably far too conservative.
 define void @f1(i16 %val, i16 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cs
 ; CHECK: br %r14
   store atomic i16 %val, i16 *%src seq_cst, align 2
diff --git a/test/CodeGen/SystemZ/atomic-store-03.ll b/test/CodeGen/SystemZ/atomic-store-03.ll
index 6e2996313db6..846c86fd3662 100644
--- a/test/CodeGen/SystemZ/atomic-store-03.ll
+++ b/test/CodeGen/SystemZ/atomic-store-03.ll
@@ -5,11 +5,11 @@
 ; This is just a placeholder to make sure that stores are handled.
 ; Using CS is probably too conservative.
 define void @f1(i32 %val, i32 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r0, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: cs %r0, %r2, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   store atomic i32 %val, i32 *%src seq_cst, align 4
   ret void
diff --git a/test/CodeGen/SystemZ/atomic-store-04.ll b/test/CodeGen/SystemZ/atomic-store-04.ll
index 7a611c8cf081..24615b115658 100644
--- a/test/CodeGen/SystemZ/atomic-store-04.ll
+++ b/test/CodeGen/SystemZ/atomic-store-04.ll
@@ -5,11 +5,11 @@
 ; This is just a placeholder to make sure that stores are handled.
 ; Using CS is probably too conservative.
 define void @f1(i64 %val, i64 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r0, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: csg %r0, %r2, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   store atomic i64 %val, i64 *%src seq_cst, align 8
   ret void
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-01.ll b/test/CodeGen/SystemZ/atomicrmw-add-01.ll
index 2a84857f836d..25f71f31ef1b 100644
--- a/test/CodeGen/SystemZ/atomicrmw-add-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-add-01.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK: ar [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 
 ; Check the minimum signed value.  We add 0x80000000 to the rotated word.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) {
 ; CHECK: afi [[ROT]], -2147483648
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i8 *%src, i8 -128 seq_cst
   ret i8 %res
@@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) {
 
 ; Check addition of -1.  We add 0xff000000 to the rotated word.
 define i8 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: afi [[ROT]], -16777216
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i8 *%src, i8 -1 seq_cst
   ret i8 %res
@@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) {
 
 ; Check addition of 1.  We add 0x01000000 to the rotated word.
 define i8 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: afi [[ROT]], 16777216
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i8 *%src, i8 1 seq_cst
   ret i8 %res
@@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) {
 
 ; Check the maximum signed value.  We add 0x7f000000 to the rotated word.
 define i8 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: afi [[ROT]], 2130706432
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i8 *%src, i8 127 seq_cst
   ret i8 %res
@@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) {
 ; Check addition of a large unsigned value.  We add 0xfe000000 to the
 ; rotated word, expressed as a negative AFI operand.
 define i8 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: afi [[ROT]], -33554432
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i8 *%src, i8 254 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-02.ll b/test/CodeGen/SystemZ/atomicrmw-add-02.ll
index 3dd482dd323d..cd4e4784c372 100644
--- a/test/CodeGen/SystemZ/atomicrmw-add-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-add-02.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK: ar [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 
 ; Check the minimum signed value.  We add 0x80000000 to the rotated word.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) {
 ; CHECK: afi [[ROT]], -2147483648
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i16 *%src, i16 -32768 seq_cst
   ret i16 %res
@@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) {
 
 ; Check addition of -1.  We add 0xffff0000 to the rotated word.
 define i16 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: afi [[ROT]], -65536
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i16 *%src, i16 -1 seq_cst
   ret i16 %res
@@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) {
 
 ; Check addition of 1.  We add 0x00010000 to the rotated word.
 define i16 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: afi [[ROT]], 65536
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i16 *%src, i16 1 seq_cst
   ret i16 %res
@@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) {
 
 ; Check the maximum signed value.  We add 0x7fff0000 to the rotated word.
 define i16 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: afi [[ROT]], 2147418112
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i16 *%src, i16 32767 seq_cst
   ret i16 %res
@@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) {
 ; Check addition of a large unsigned value.  We add 0xfffe0000 to the
 ; rotated word, expressed as a negative AFI operand.
 define i16 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: afi [[ROT]], -131072
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw add i16 *%src, i16 65534 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-03.ll b/test/CodeGen/SystemZ/atomicrmw-add-03.ll
index 01eb8e0d7464..a81af72d1ed9 100644
--- a/test/CodeGen/SystemZ/atomicrmw-add-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-add-03.ll
@@ -1,16 +1,16 @@
 ; Test 32-bit atomic additions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check addition of a variable.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: ar %r0, %r4
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check addition of 1, which can use AHI.
 define i32 @f2(i32 %dummy, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: ahi %r0, 1
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 1 seq_cst
   ret i32 %res
@@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) {
 
 ; Check the high end of the AHI range.
 define i32 @f3(i32 %dummy, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ahi %r0, 32767
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 32767 seq_cst
@@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) {
 
 ; Check the next value up, which must use AFI.
 define i32 @f4(i32 %dummy, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: afi %r0, 32768
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 32768 seq_cst
@@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) {
 
 ; Check the high end of the AFI range.
 define i32 @f5(i32 %dummy, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: afi %r0, 2147483647
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 2147483647 seq_cst
@@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) {
 
 ; Check the next value up, which gets treated as a negative operand.
 define i32 @f6(i32 %dummy, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: afi %r0, -2147483648
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 2147483648 seq_cst
@@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) {
 
 ; Check addition of -1, which can use AHI.
 define i32 @f7(i32 %dummy, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ahi %r0, -1
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 -1 seq_cst
@@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) {
 
 ; Check the low end of the AHI range.
 define i32 @f8(i32 %dummy, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ahi %r0, -32768
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 -32768 seq_cst
@@ -86,7 +86,7 @@ define i32 @f8(i32 %dummy, i32 *%src) {
 
 ; Check the next value down, which must use AFI instead.
 define i32 @f9(i32 %dummy, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: afi %r0, -32769
 ; CHECK: br %r14
   %res = atomicrmw add i32 *%src, i32 -32769 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-04.ll b/test/CodeGen/SystemZ/atomicrmw-add-04.ll
index 6b1d20bd080e..e7905491f2e0 100644
--- a/test/CodeGen/SystemZ/atomicrmw-add-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-add-04.ll
@@ -1,16 +1,16 @@
 ; Test 64-bit atomic additions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check addition of a variable.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: agr %r0, %r4
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check addition of 1, which can use AGHI.
 define i64 @f2(i64 %dummy, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: aghi %r0, 1
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 1 seq_cst
   ret i64 %res
@@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the AGHI range.
 define i64 @f3(i64 %dummy, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aghi %r0, 32767
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 32767 seq_cst
@@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) {
 
 ; Check the next value up, which must use AGFI.
 define i64 @f4(i64 %dummy, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r0, 32768
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 32768 seq_cst
@@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the AGFI range.
 define i64 @f5(i64 %dummy, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r0, 2147483647
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 2147483647 seq_cst
@@ -59,7 +59,7 @@ define i64 @f5(i64 %dummy, i64 *%src) {
 
 ; Check the next value up, which must use a register addition.
 define i64 @f6(i64 %dummy, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agr
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 2147483648 seq_cst
@@ -68,7 +68,7 @@ define i64 @f6(i64 %dummy, i64 *%src) {
 
 ; Check addition of -1, which can use AGHI.
 define i64 @f7(i64 %dummy, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: aghi %r0, -1
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 -1 seq_cst
@@ -77,7 +77,7 @@ define i64 @f7(i64 %dummy, i64 *%src) {
 
 ; Check the low end of the AGHI range.
 define i64 @f8(i64 %dummy, i64 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: aghi %r0, -32768
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 -32768 seq_cst
@@ -86,7 +86,7 @@ define i64 @f8(i64 %dummy, i64 *%src) {
 
 ; Check the next value down, which must use AGFI instead.
 define i64 @f9(i64 %dummy, i64 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r0, -32769
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 -32769 seq_cst
@@ -95,7 +95,7 @@ define i64 @f9(i64 %dummy, i64 *%src) {
 
 ; Check the low end of the AGFI range.
 define i64 @f10(i64 %dummy, i64 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r0, -2147483648
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 -2147483648 seq_cst
@@ -104,7 +104,7 @@ define i64 @f10(i64 %dummy, i64 *%src) {
 
 ; Check the next value down, which must use a register addition.
 define i64 @f11(i64 %dummy, i64 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agr
 ; CHECK: br %r14
   %res = atomicrmw add i64 *%src, i64 -2147483649 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-01.ll b/test/CodeGen/SystemZ/atomicrmw-and-01.ll
index ebbce8e7872b..6d2f541c3a35 100644
--- a/test/CodeGen/SystemZ/atomicrmw-and-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-and-01.ll
@@ -13,7 +13,7 @@
 ;   before being used, and that the low bits are set to 1.  This sequence is
 ;   independent of the other loop prologue instructions.
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK: nr [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: oilf %r3, 16777215
 ; CHECK-SHIFT2: rll
@@ -48,7 +48,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 
 ; Check the minimum signed value.  We AND the rotated word with 0x80ffffff.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -57,11 +57,11 @@ define i8 @f2(i8 *%src) {
 ; CHECK: nilh [[ROT]], 33023
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -69,7 +69,7 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i8 *%src, i8 -128 seq_cst
   ret i8 %res
@@ -77,13 +77,13 @@ define i8 @f2(i8 *%src) {
 
 ; Check ANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfeffffff.
 define i8 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nilh [[ROT]], 65279
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i8 *%src, i8 -2 seq_cst
   ret i8 %res
@@ -91,13 +91,13 @@ define i8 @f3(i8 *%src) {
 
 ; Check ANDs of 1.  We AND the rotated word with 0x01ffffff.
 define i8 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nilh [[ROT]], 511
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i8 *%src, i8 1 seq_cst
   ret i8 %res
@@ -105,13 +105,13 @@ define i8 @f4(i8 *%src) {
 
 ; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
 define i8 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: nilh [[ROT]], 32767
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i8 *%src, i8 127 seq_cst
   ret i8 %res
@@ -120,13 +120,13 @@ define i8 @f5(i8 *%src) {
 ; Check ANDs of a large unsigned value.  We AND the rotated word with
 ; 0xfdffffff.
 define i8 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nilh [[ROT]], 65023
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i8 *%src, i8 253 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-02.ll b/test/CodeGen/SystemZ/atomicrmw-and-02.ll
index b63ca4ab4407..572b22484b28 100644
--- a/test/CodeGen/SystemZ/atomicrmw-and-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-and-02.ll
@@ -13,7 +13,7 @@
 ;   before being used, and that the low bits are set to 1.  This sequence is
 ;   independent of the other loop prologue instructions.
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK: nr [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: oill %r3, 65535
 ; CHECK-SHIFT2: rll
@@ -48,7 +48,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 
 ; Check the minimum signed value.  We AND the rotated word with 0x8000ffff.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -57,11 +57,11 @@ define i16 @f2(i16 *%src) {
 ; CHECK: nilh [[ROT]], 32768
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -69,7 +69,7 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i16 *%src, i16 -32768 seq_cst
   ret i16 %res
@@ -77,13 +77,13 @@ define i16 @f2(i16 *%src) {
 
 ; Check ANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfffeffff.
 define i16 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nilh [[ROT]], 65534
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i16 *%src, i16 -2 seq_cst
   ret i16 %res
@@ -91,13 +91,13 @@ define i16 @f3(i16 *%src) {
 
 ; Check ANDs of 1.  We AND the rotated word with 0x0001ffff.
 define i16 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nilh [[ROT]], 1
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i16 *%src, i16 1 seq_cst
   ret i16 %res
@@ -105,13 +105,13 @@ define i16 @f4(i16 *%src) {
 
 ; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
 define i16 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: nilh [[ROT]], 32767
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i16 *%src, i16 32767 seq_cst
   ret i16 %res
@@ -120,13 +120,13 @@ define i16 @f5(i16 *%src) {
 ; Check ANDs of a large unsigned value.  We AND the rotated word with
 ; 0xfffdffff.
 define i16 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nilh [[ROT]], 65533
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw and i16 *%src, i16 65533 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-03.ll b/test/CodeGen/SystemZ/atomicrmw-and-03.ll
index ec69edcf1a47..8d813a140249 100644
--- a/test/CodeGen/SystemZ/atomicrmw-and-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-and-03.ll
@@ -1,16 +1,16 @@
 ; Test 32-bit atomic ANDs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check ANDs of a variable.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: nr %r0, %r4
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check ANDs of 1.
 define i32 @f2(i32 %dummy, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: nilf %r0, 1
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 1 seq_cst
   ret i32 %res
@@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) {
 
 ; Check ANDs of the low end of the NILH range.
 define i32 @f3(i32 %dummy, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nilh %r0, 0
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 65535 seq_cst
@@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) {
 
 ; Check the next value up, which must use NILF.
 define i32 @f4(i32 %dummy, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nilf %r0, 65536
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 65536 seq_cst
@@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) {
 
 ; Check the largest useful NILL value.
 define i32 @f5(i32 %dummy, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: nill %r0, 65534
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 -2 seq_cst
@@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) {
 
 ; Check the low end of the NILL range.
 define i32 @f6(i32 %dummy, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nill %r0, 0
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 -65536 seq_cst
@@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) {
 
 ; Check the largest useful NILH value, which is one less than the above.
 define i32 @f7(i32 %dummy, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: nilh %r0, 65534
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 -65537 seq_cst
@@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) {
 
 ; Check the highest useful NILF value, which is one less than the above.
 define i32 @f8(i32 %dummy, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: nilf %r0, 4294901758
 ; CHECK: br %r14
   %res = atomicrmw and i32 *%src, i32 -65538 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-04.ll b/test/CodeGen/SystemZ/atomicrmw-and-04.ll
index 71f29baa0e6f..89899a6a03af 100644
--- a/test/CodeGen/SystemZ/atomicrmw-and-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-and-04.ll
@@ -1,156 +1,170 @@
 ; Test 64-bit atomic ANDs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check ANDs of a variable.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: ngr %r0, %r4
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw and i64 *%src, i64 %b seq_cst
   ret i64 %res
 }
 
-; Check ANDs of 1, which must be done using a register.
+; Check ANDs of 1, which are done using a register.  (We could use RISBG
+; instead, but that isn't implemented yet.)
 define i64 @f2(i64 %dummy, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ngr
 ; CHECK: br %r14
   %res = atomicrmw and i64 *%src, i64 1 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NIHF range.
+; Check the equivalent of NIHF with 1, which can use RISBG instead.
 define i64 @f3(i64 %dummy, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: lgr %r0, %r2
-; CHECK: nihf %r0, 0
+; CHECK: risbg %r0, %r2, 31, 191, 0
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 4294967295 seq_cst
+  %res = atomicrmw and i64 *%src, i64 8589934591 seq_cst
   ret i64 %res
 }
 
-; Check the next value up, which must use a register.
+; Check the lowest NIHF value outside the range of RISBG.
 define i64 @f4(i64 %dummy, i64 *%src) {
-; CHECK: f4:
-; CHECK: ngr
+; CHECK-LABEL: f4:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: nihf %r0, 2
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 4294967296 seq_cst
+  %res = atomicrmw and i64 *%src, i64 12884901887 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NIHH range.
+; Check the next value up, which must use a register.
 define i64 @f5(i64 %dummy, i64 *%src) {
-; CHECK: f5:
-; CHECK: nihh %r0, 0
+; CHECK-LABEL: f5:
+; CHECK: ngr
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 281474976710655 seq_cst
+  %res = atomicrmw and i64 *%src, i64 12884901888 seq_cst
   ret i64 %res
 }
 
-; Check the next value up, which must use a register.
+; Check the lowest NIHH value outside the range of RISBG.
 define i64 @f6(i64 %dummy, i64 *%src) {
-; CHECK: f6:
-; CHECK: ngr
+; CHECK-LABEL: f6:
+; CHECK: nihh {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 281474976710656 seq_cst
+  %res = atomicrmw and i64 *%src, i64 844424930131967 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NILL value.
+; Check the next value up, which must use a register.
 define i64 @f7(i64 %dummy, i64 *%src) {
-; CHECK: f7:
-; CHECK: nill %r0, 65534
+; CHECK-LABEL: f7:
+; CHECK: ngr
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -2 seq_cst
+  %res = atomicrmw and i64 *%src, i64 281474976710656 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NILL range.
+; Check the highest NILL value outside the range of RISBG.
 define i64 @f8(i64 %dummy, i64 *%src) {
-; CHECK: f8:
-; CHECK: nill %r0, 0
+; CHECK-LABEL: f8:
+; CHECK: nill {{%r[0-5]}}, 65530
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -65536 seq_cst
+  %res = atomicrmw and i64 *%src, i64 -6 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NILH value, which is one less than the above.
+; Check the lowest NILL value outside the range of RISBG.
 define i64 @f9(i64 %dummy, i64 *%src) {
-; CHECK: f9:
-; CHECK: nilh %r0, 65534
+; CHECK-LABEL: f9:
+; CHECK: nill {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -65537 seq_cst
+  %res = atomicrmw and i64 *%src, i64 -65534 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NILF value, which is one less than the above.
+; Check the highest useful NILF value.
 define i64 @f10(i64 %dummy, i64 *%src) {
-; CHECK: f10:
-; CHECK: nilf %r0, 4294901758
+; CHECK-LABEL: f10:
+; CHECK: nilf {{%r[0-5]}}, 4294901758
 ; CHECK: br %r14
   %res = atomicrmw and i64 *%src, i64 -65538 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NILH range.
+; Check the highest NILH value outside the range of RISBG.
 define i64 @f11(i64 %dummy, i64 *%src) {
-; CHECK: f11:
-; CHECK: nilh %r0, 0
+; CHECK-LABEL: f11:
+; CHECK: nilh {{%r[0-5]}}, 65530
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -4294901761 seq_cst
+  %res = atomicrmw and i64 *%src, i64 -327681 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NILF range.
+; Check the lowest NILH value outside the range of RISBG.
 define i64 @f12(i64 %dummy, i64 *%src) {
-; CHECK: f12:
-; CHECK: nilf %r0, 0
+; CHECK-LABEL: f12:
+; CHECK: nilh {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -4294967296 seq_cst
+  %res = atomicrmw and i64 *%src, i64 -4294770689 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NIHL value, which is one less than the above.
+; Check the lowest NILF value outside the range of RISBG.
 define i64 @f13(i64 %dummy, i64 *%src) {
-; CHECK: f13:
-; CHECK: nihl %r0, 65534
+; CHECK-LABEL: f13:
+; CHECK: nilf {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -4294967297 seq_cst
+  %res = atomicrmw and i64 *%src, i64 -4294967294 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NIHL range.
+; Check the highest NIHL value outside the range of RISBG.
 define i64 @f14(i64 %dummy, i64 *%src) {
-; CHECK: f14:
-; CHECK: nihl %r0, 0
+; CHECK-LABEL: f14:
+; CHECK: nihl {{%r[0-5]}}, 65530
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -281470681743361 seq_cst
+  %res = atomicrmw and i64 *%src, i64 -21474836481 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NIHH value, which is 1<<32 less than the above.
+; Check the lowest NIHL value outside the range of RISBG.
 define i64 @f15(i64 %dummy, i64 *%src) {
-; CHECK: f15:
-; CHECK: nihh %r0, 65534
+; CHECK-LABEL: f15:
+; CHECK: nihl {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw and i64 *%src, i64 -281474976710657 seq_cst
+  %res = atomicrmw and i64 *%src, i64 -281462091808769 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NIHF value, which is 1<<32 less than the above.
+; Check the highest NIHH value outside the range of RISBG.
 define i64 @f16(i64 %dummy, i64 *%src) {
-; CHECK: f16:
-; CHECK: nihf %r0, 4294901758
+; CHECK-LABEL: f16:
+; CHECK: nihh {{%r[0-5]}}, 65530
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -1407374883553281 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHF value.
+define i64 @f17(i64 %dummy, i64 *%src) {
+; CHECK-LABEL: f17:
+; CHECK: nihf {{%r[0-5]}}, 4294901758
 ; CHECK: br %r14
   %res = atomicrmw and i64 *%src, i64 -281479271677953 seq_cst
   ret i64 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
index c6ec77e91b3d..2b750c46e261 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
@@ -13,23 +13,22 @@
 ;   before being used, and that the low bits are set to 1.  This sequence is
 ;   independent of the other loop prologue instructions.
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: cr [[ROT]], %r3
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: crjle [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 39, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -37,10 +36,10 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: crjle {{%r[0-9]+}}, %r3
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -50,23 +49,22 @@ define i8 @f1(i8 *%src, i8 %b) {
 
 ; Check signed maximum.
 define i8 @f2(i8 *%src, i8 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: cr [[ROT]], %r3
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: crjhe [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 39, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -74,10 +72,10 @@ define i8 @f2(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: crjhe {{%r[0-9]+}}, %r3
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -87,23 +85,22 @@ define i8 @f2(i8 *%src, i8 %b) {
 
 ; Check unsigned minimum.
 define i8 @f3(i8 *%src, i8 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: clrjle [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 39, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -111,10 +108,10 @@ define i8 @f3(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjle {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -124,23 +121,22 @@ define i8 @f3(i8 *%src, i8 %b) {
 
 ; Check unsigned maximum.
 define i8 @f4(i8 *%src, i8 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: clrjhe [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 39, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -148,10 +144,10 @@ define i8 @f4(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjhe {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -162,15 +158,15 @@ define i8 @f4(i8 *%src, i8 %b) {
 ; Check the lowest useful signed minimum value.  We need to load 0x81000000
 ; into the source register.
 define i8 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 33024
-; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: crjle [[ROT:%r[0-9]+]], [[SRC2]]
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw min i8 *%src, i8 -127 seq_cst
   ret i8 %res
@@ -179,15 +175,15 @@ define i8 @f5(i8 *%src) {
 ; Check the highest useful signed maximum value.  We need to load 0x7e000000
 ; into the source register.
 define i8 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 32256
-; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: crjhe [[ROT:%r[0-9]+]], [[SRC2]]
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw max i8 *%src, i8 126 seq_cst
   ret i8 %res
@@ -196,15 +192,15 @@ define i8 @f6(i8 *%src) {
 ; Check the lowest useful unsigned minimum value.  We need to load 0x01000000
 ; into the source register.
 define i8 @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 256
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjle [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f7:
+; CHECK-SHIFT1-LABEL: f7:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f7:
+; CHECK-SHIFT2-LABEL: f7:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw umin i8 *%src, i8 1 seq_cst
   ret i8 %res
@@ -213,15 +209,15 @@ define i8 @f7(i8 *%src) {
 ; Check the highest useful unsigned maximum value.  We need to load 0xfe000000
 ; into the source register.
 define i8 @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 65024
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjhe [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f8:
+; CHECK-SHIFT1-LABEL: f8:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f8:
+; CHECK-SHIFT2-LABEL: f8:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw umax i8 *%src, i8 254 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
index 9612e99b7387..98ffedf28c69 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
@@ -13,23 +13,22 @@
 ;   before being used, and that the low bits are set to 1.  This sequence is
 ;   independent of the other loop prologue instructions.
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: cr [[ROT]], %r3
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: crjle [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 47, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -37,10 +36,10 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: crjle {{%r[0-9]+}}, %r3
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -50,23 +49,22 @@ define i16 @f1(i16 *%src, i16 %b) {
 
 ; Check signed maximum.
 define i16 @f2(i16 *%src, i16 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: cr [[ROT]], %r3
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: crjhe [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 47, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -74,10 +72,10 @@ define i16 @f2(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: crjhe {{%r[0-9]+}}, %r3
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -87,23 +85,22 @@ define i16 @f2(i16 *%src, i16 %b) {
 
 ; Check unsigned minimum.
 define i16 @f3(i16 *%src, i16 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: clrjle [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 47, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -111,10 +108,10 @@ define i16 @f3(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjle {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -124,23 +121,22 @@ define i16 @f3(i16 *%src, i16 %b) {
 
 ; Check unsigned maximum.
 define i16 @f4(i16 *%src, i16 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
 ; CHECK: [[LOOP:\.[^:]*]]:
 ; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
-; CHECK: clr [[ROT]], %r3
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: clrjhe [[ROT]], %r3, [[KEEP:\..*]]
 ; CHECK: risbg [[ROT]], %r3, 32, 47, 0
 ; CHECK: [[KEEP]]:
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -148,10 +144,10 @@ define i16 @f4(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
-; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: clrjhe {{%r[0-9]+}}, %r3,
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: br %r14
@@ -162,15 +158,15 @@ define i16 @f4(i16 *%src, i16 %b) {
 ; Check the lowest useful signed minimum value.  We need to load 0x80010000
 ; into the source register.
 define i16 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 32769
-; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: crjle [[ROT:%r[0-9]+]], [[SRC2]]
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw min i16 *%src, i16 -32767 seq_cst
   ret i16 %res
@@ -179,15 +175,15 @@ define i16 @f5(i16 *%src) {
 ; Check the highest useful signed maximum value.  We need to load 0x7ffe0000
 ; into the source register.
 define i16 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 32766
-; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: crjhe [[ROT:%r[0-9]+]], [[SRC2]]
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw max i16 *%src, i16 32766 seq_cst
   ret i16 %res
@@ -196,15 +192,15 @@ define i16 @f6(i16 *%src) {
 ; Check the lowest useful unsigned maximum value.  We need to load 0x00010000
 ; into the source register.
 define i16 @f7(i16 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 1
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjle [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f7:
+; CHECK-SHIFT1-LABEL: f7:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f7:
+; CHECK-SHIFT2-LABEL: f7:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw umin i16 *%src, i16 1 seq_cst
   ret i16 %res
@@ -213,15 +209,15 @@ define i16 @f7(i16 *%src) {
 ; Check the highest useful unsigned maximum value.  We need to load 0xfffe0000
 ; into the source register.
 define i16 @f8(i16 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llilh [[SRC2:%r[0-9]+]], 65534
-; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: clrjhe [[ROT:%r[0-9]+]], [[SRC2]],
 ; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f8:
+; CHECK-SHIFT1-LABEL: f8:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f8:
+; CHECK-SHIFT2-LABEL: f8:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw umax i16 *%src, i16 65534 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
index b5809bdc1693..f2152c6f28bc 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
@@ -1,18 +1,18 @@
-; Test 32-bit atomic minimum and maximum.
+; Test 32-bit atomic minimum and maximum.  Here we match the z10 versions,
+; which can't use LOCR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check signed minium.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: cr %r2, %r4
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: crjle %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lr [[NEW]], %r4
 ; CHECK: cs %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw min i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -20,15 +20,14 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check signed maximum.
 define i32 @f2(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: cr %r2, %r4
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: crjhe %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lr [[NEW]], %r4
 ; CHECK: cs %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw max i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -36,15 +35,14 @@ define i32 @f2(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check unsigned minimum.
 define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clr %r2, %r4
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: clrjle %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lr [[NEW]], %r4
 ; CHECK: cs %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw umin i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -52,15 +50,14 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check unsigned maximum.
 define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clr %r2, %r4
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: clrjhe %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lr [[NEW]], %r4
 ; CHECK: cs %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw umax i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -68,7 +65,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the high end of the aligned CS range.
 define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: l %r2, 4092(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3)
 ; CHECK: br %r14
@@ -79,7 +76,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the next word up, which requires CSY.
 define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ly %r2, 4096(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3)
 ; CHECK: br %r14
@@ -90,7 +87,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the high end of the aligned CSY range.
 define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ly %r2, 524284(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3)
 ; CHECK: br %r14
@@ -101,7 +98,7 @@ define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the next word up, which needs separate address logic.
 define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r3, 524288
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
@@ -113,7 +110,7 @@ define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the high end of the negative aligned CSY range.
 define i32 @f9(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ly %r2, -4(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3)
 ; CHECK: br %r14
@@ -124,7 +121,7 @@ define i32 @f9(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the low end of the CSY range.
 define i32 @f10(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ly %r2, -524288(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3)
 ; CHECK: br %r14
@@ -135,7 +132,7 @@ define i32 @f10(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the next word down, which needs separate address logic.
 define i32 @f11(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r3, -524292
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
@@ -147,7 +144,7 @@ define i32 @f11(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check that indexed addresses are not allowed.
 define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: agr %r3, %r4
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
@@ -158,18 +155,17 @@ define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) {
   ret i32 %res
 }
 
-; Check that constants are forced into a register.
+; Check that constants are handled.
 define i32 @f13(i32 %dummy, i32 *%ptr) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: lhi [[LIMIT:%r[0-9]+]], 42
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: cr %r2, [[LIMIT]]
 ; CHECK: lr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}le [[KEEP:\..*]]
-; CHECK: lr [[NEW]], [[LIMIT]]
+; CHECK: crjle %r2, [[LIMIT]], [[KEEP:\..*]]
+; CHECK: lhi [[NEW]], 42
 ; CHECK: cs %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw min i32 *%ptr, i32 42 seq_cst
   ret i32 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
index 68978547d3e9..037eb1aa9367 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
@@ -1,18 +1,18 @@
-; Test 64-bit atomic minimum and maximum.
+; Test 64-bit atomic minimum and maximum.  Here we match the z10 versions,
+; which can't use LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check signed minium.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: cgr %r2, %r4
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: cgrjle %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lgr [[NEW]], %r4
 ; CHECK: csg %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw min i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -20,15 +20,14 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check signed maximum.
 define i64 @f2(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: cgr %r2, %r4
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: cgrjhe %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lgr [[NEW]], %r4
 ; CHECK: csg %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw max i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -36,15 +35,14 @@ define i64 @f2(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check unsigned minimum.
 define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clgr %r2, %r4
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: clgrjle %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lgr [[NEW]], %r4
 ; CHECK: csg %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw umin i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -52,15 +50,14 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check unsigned maximum.
 define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: clgr %r2, %r4
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: clgrjhe %r2, %r4, [[KEEP:\..*]]
 ; CHECK: lgr [[NEW]], %r4
 ; CHECK: csg %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw umax i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -68,7 +65,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the high end of the aligned CSG range.
 define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lg %r2, 524280(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3)
 ; CHECK: br %r14
@@ -79,7 +76,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the next doubleword up, which requires separate address logic.
 define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
@@ -91,7 +88,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the low end of the CSG range.
 define i64 @f7(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lg %r2, -524288(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3)
 ; CHECK: br %r14
@@ -102,7 +99,7 @@ define i64 @f7(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the next doubleword down, which requires separate address logic.
 define i64 @f8(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r3, -524296
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
@@ -114,7 +111,7 @@ define i64 @f8(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check that indexed addresses are not allowed.
 define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agr %r3, %r4
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
@@ -125,18 +122,17 @@ define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) {
   ret i64 %res
 }
 
-; Check that constants are forced into a register.
+; Check that constants are handled.
 define i64 @f10(i64 %dummy, i64 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: lghi [[LIMIT:%r[0-9]+]], 42
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LOOP:\.[^:]*]]:
-; CHECK: cgr %r2, [[LIMIT]]
 ; CHECK: lgr [[NEW:%r[0-9]+]], %r2
-; CHECK: j{{g?}}le [[KEEP:\..*]]
-; CHECK: lgr [[NEW]], [[LIMIT]]
+; CHECK: cgrjle %r2, [[LIMIT]], [[KEEP:\..*]]
+; CHECK: lghi [[NEW]], 42
 ; CHECK: csg %r2, [[NEW]], 0(%r3)
-; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: jl [[LOOP]]
 ; CHECK: br %r14
   %res = atomicrmw min i64 *%ptr, i64 42 seq_cst
   ret i64 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-01.ll b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll
index 1ede3b465be2..db5bb8ff9e79 100644
--- a/test/CodeGen/SystemZ/atomicrmw-nand-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll
@@ -13,7 +13,7 @@
 ;   before being used, and that the low bits are set to 1.  This sequence is
 ;   independent of the other loop prologue instructions.
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -23,11 +23,11 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK: xilf [[ROT]], 4278190080
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -35,7 +35,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: oilf %r3, 16777215
 ; CHECK-SHIFT2: rll
@@ -49,7 +49,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 
 ; Check the minimum signed value.  We AND the rotated word with 0x80ffffff.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -59,11 +59,11 @@ define i8 @f2(i8 *%src) {
 ; CHECK: xilf [[ROT]], 4278190080
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -71,7 +71,7 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i8 *%src, i8 -128 seq_cst
   ret i8 %res
@@ -79,14 +79,14 @@ define i8 @f2(i8 *%src) {
 
 ; Check NANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfeffffff.
 define i8 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nilh [[ROT]], 65279
 ; CHECK: xilf [[ROT]], 4278190080
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i8 *%src, i8 -2 seq_cst
   ret i8 %res
@@ -94,14 +94,14 @@ define i8 @f3(i8 *%src) {
 
 ; Check NANDs of 1.  We AND the rotated word with 0x01ffffff.
 define i8 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nilh [[ROT]], 511
 ; CHECK: xilf [[ROT]], 4278190080
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i8 *%src, i8 1 seq_cst
   ret i8 %res
@@ -109,14 +109,14 @@ define i8 @f4(i8 *%src) {
 
 ; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
 define i8 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: nilh [[ROT]], 32767
 ; CHECK: xilf [[ROT]], 4278190080
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i8 *%src, i8 127 seq_cst
   ret i8 %res
@@ -125,14 +125,14 @@ define i8 @f5(i8 *%src) {
 ; Check NANDs of a large unsigned value.  We AND the rotated word with
 ; 0xfdffffff.
 define i8 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nilh [[ROT]], 65023
 ; CHECK: xilf [[ROT]], 4278190080
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i8 *%src, i8 253 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-02.ll b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll
index d5cf864a3f79..6141543e0db2 100644
--- a/test/CodeGen/SystemZ/atomicrmw-nand-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll
@@ -13,7 +13,7 @@
 ;   before being used, and that the low bits are set to 1.  This sequence is
 ;   independent of the other loop prologue instructions.
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -23,11 +23,11 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK: xilf [[ROT]], 4294901760
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -35,7 +35,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: oill %r3, 65535
 ; CHECK-SHIFT2: rll
@@ -49,7 +49,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 
 ; Check the minimum signed value.  We AND the rotated word with 0x8000ffff.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -59,11 +59,11 @@ define i16 @f2(i16 *%src) {
 ; CHECK: xilf [[ROT]], 4294901760
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -71,7 +71,7 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i16 *%src, i16 -32768 seq_cst
   ret i16 %res
@@ -79,14 +79,14 @@ define i16 @f2(i16 *%src) {
 
 ; Check NANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfffeffff.
 define i16 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nilh [[ROT]], 65534
 ; CHECK: xilf [[ROT]], 4294901760
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i16 *%src, i16 -2 seq_cst
   ret i16 %res
@@ -94,14 +94,14 @@ define i16 @f3(i16 *%src) {
 
 ; Check ANDs of 1.  We AND the rotated word with 0x0001ffff.
 define i16 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nilh [[ROT]], 1
 ; CHECK: xilf [[ROT]], 4294901760
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i16 *%src, i16 1 seq_cst
   ret i16 %res
@@ -109,14 +109,14 @@ define i16 @f4(i16 *%src) {
 
 ; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
 define i16 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: nilh [[ROT]], 32767
 ; CHECK: xilf [[ROT]], 4294901760
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i16 *%src, i16 32767 seq_cst
   ret i16 %res
@@ -125,14 +125,14 @@ define i16 @f5(i16 *%src) {
 ; Check NANDs of a large unsigned value.  We AND the rotated word with
 ; 0xfffdffff.
 define i16 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nilh [[ROT]], 65533
 ; CHECK: xilf [[ROT]], 4294901760
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw nand i16 *%src, i16 65533 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-03.ll b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll
index cc2a0866b391..c7a6691083d3 100644
--- a/test/CodeGen/SystemZ/atomicrmw-nand-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll
@@ -1,17 +1,17 @@
 ; Test 32-bit atomic NANDs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check NANDs of a variable.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: nr %r0, %r4
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw nand i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -19,14 +19,14 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check NANDs of 1.
 define i32 @f2(i32 %dummy, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: nilf %r0, 1
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw nand i32 *%src, i32 1 seq_cst
   ret i32 %res
@@ -34,7 +34,7 @@ define i32 @f2(i32 %dummy, i32 *%src) {
 
 ; Check NANDs of the low end of the NILH range.
 define i32 @f3(i32 %dummy, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nilh %r0, 0
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
@@ -44,7 +44,7 @@ define i32 @f3(i32 %dummy, i32 *%src) {
 
 ; Check the next value up, which must use NILF.
 define i32 @f4(i32 %dummy, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nilf %r0, 65536
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
@@ -54,7 +54,7 @@ define i32 @f4(i32 %dummy, i32 *%src) {
 
 ; Check the largest useful NILL value.
 define i32 @f5(i32 %dummy, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: nill %r0, 65534
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
@@ -64,7 +64,7 @@ define i32 @f5(i32 %dummy, i32 *%src) {
 
 ; Check the low end of the NILL range.
 define i32 @f6(i32 %dummy, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: nill %r0, 0
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
@@ -74,7 +74,7 @@ define i32 @f6(i32 %dummy, i32 *%src) {
 
 ; Check the largest useful NILH value, which is one less than the above.
 define i32 @f7(i32 %dummy, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: nilh %r0, 65534
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
@@ -84,7 +84,7 @@ define i32 @f7(i32 %dummy, i32 *%src) {
 
 ; Check the highest useful NILF value, which is one less than the above.
 define i32 @f8(i32 %dummy, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: nilf %r0, 4294901758
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-04.ll b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll
index 0c857d97fe83..91fe639cd726 100644
--- a/test/CodeGen/SystemZ/atomicrmw-nand-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll
@@ -1,10 +1,10 @@
 ; Test 64-bit atomic NANDs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check NANDs of a variable.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lgr %r0, %r2
@@ -12,171 +12,165 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
 ; CHECK: lcgr %r0, %r0
 ; CHECK: aghi %r0, -1
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw nand i64 *%src, i64 %b seq_cst
   ret i64 %res
 }
 
-; Check NANDs of 1, which must be done using a register.
+; Check NANDs of 1, which are done using a register.  (We could use RISBG
+; instead, but that isn't implemented yet.)
 define i64 @f2(i64 %dummy, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ngr
 ; CHECK: br %r14
   %res = atomicrmw nand i64 *%src, i64 1 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NIHF range.
+; Check the equivalent of NIHF with 1, which can use RISBG instead.
 define i64 @f3(i64 %dummy, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: lgr %r0, %r2
-; CHECK: nihf %r0, 0
+; CHECK: risbg %r0, %r2, 31, 191, 0
 ; CHECK: lcgr %r0, %r0
 ; CHECK: aghi %r0, -1
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 4294967295 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 8589934591 seq_cst
   ret i64 %res
 }
 
-; Check the next value up, which must use a register.
+; Check the lowest NIHF value outside the range of RISBG.
 define i64 @f4(i64 %dummy, i64 *%src) {
-; CHECK: f4:
-; CHECK: ngr
+; CHECK-LABEL: f4:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: nihf %r0, 2
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 4294967296 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 12884901887 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NIHH range.
+; Check the next value up, which must use a register.
 define i64 @f5(i64 %dummy, i64 *%src) {
-; CHECK: f5:
-; CHECK: nihh %r0, 0
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f5:
+; CHECK: ngr
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 281474976710655 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 12884901888 seq_cst
   ret i64 %res
 }
 
-; Check the next value up, which must use a register.
+; Check the lowest NIHH value outside the range of RISBG.
 define i64 @f6(i64 %dummy, i64 *%src) {
-; CHECK: f6:
-; CHECK: ngr
+; CHECK-LABEL: f6:
+; CHECK: nihh {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 281474976710656 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 844424930131967 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NILL value.
+; Check the next value up, which must use a register.
 define i64 @f7(i64 %dummy, i64 *%src) {
-; CHECK: f7:
-; CHECK: nill %r0, 65534
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f7:
+; CHECK: ngr
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -2 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 281474976710656 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NILL range.
+; Check the highest NILL value outside the range of RISBG.
 define i64 @f8(i64 %dummy, i64 *%src) {
-; CHECK: f8:
-; CHECK: nill %r0, 0
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f8:
+; CHECK: nill {{%r[0-5]}}, 65530
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -65536 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 -6 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NILH value, which is one less than the above.
+; Check the lowest NILL value outside the range of RISBG.
 define i64 @f9(i64 %dummy, i64 *%src) {
-; CHECK: f9:
-; CHECK: nilh %r0, 65534
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f9:
+; CHECK: nill {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -65537 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 -65534 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NILF value, which is one less than the above.
+; Check the highest useful NILF value.
 define i64 @f10(i64 %dummy, i64 *%src) {
-; CHECK: f10:
-; CHECK: nilf %r0, 4294901758
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f10:
+; CHECK: nilf {{%r[0-5]}}, 4294901758
 ; CHECK: br %r14
   %res = atomicrmw nand i64 *%src, i64 -65538 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NILH range.
+; Check the highest NILH value outside the range of RISBG.
 define i64 @f11(i64 %dummy, i64 *%src) {
-; CHECK: f11:
-; CHECK: nilh %r0, 0
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f11:
+; CHECK: nilh {{%r[0-5]}}, 65530
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -4294901761 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 -327681 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NILF range.
+; Check the lowest NILH value outside the range of RISBG.
 define i64 @f12(i64 %dummy, i64 *%src) {
-; CHECK: f12:
-; CHECK: nilf %r0, 0
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f12:
+; CHECK: nilh {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -4294967296 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 -4294770689 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NIHL value, which is one less than the above.
+; Check the lowest NILF value outside the range of RISBG.
 define i64 @f13(i64 %dummy, i64 *%src) {
-; CHECK: f13:
-; CHECK: nihl %r0, 65534
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f13:
+; CHECK: nilf {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -4294967297 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 -4294967294 seq_cst
   ret i64 %res
 }
 
-; Check the low end of the NIHL range.
+; Check the highest NIHL value outside the range of RISBG.
 define i64 @f14(i64 %dummy, i64 *%src) {
-; CHECK: f14:
-; CHECK: nihl %r0, 0
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f14:
+; CHECK: nihl {{%r[0-5]}}, 65530
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -281470681743361 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 -21474836481 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NIHH value, which is 1<<32 less than the above.
+; Check the lowest NIHL value outside the range of RISBG.
 define i64 @f15(i64 %dummy, i64 *%src) {
-; CHECK: f15:
-; CHECK: nihh %r0, 65534
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f15:
+; CHECK: nihl {{%r[0-5]}}, 2
 ; CHECK: br %r14
-  %res = atomicrmw nand i64 *%src, i64 -281474976710657 seq_cst
+  %res = atomicrmw nand i64 *%src, i64 -281462091808769 seq_cst
   ret i64 %res
 }
 
-; Check the highest useful NIHF value, which is 1<<32 less than the above.
+; Check the highest NIHH value outside the range of RISBG.
 define i64 @f16(i64 %dummy, i64 *%src) {
-; CHECK: f16:
-; CHECK: nihf %r0, 4294901758
-; CHECK: lcgr %r0, %r0
-; CHECK: aghi %r0, -1
+; CHECK-LABEL: f16:
+; CHECK: nihh {{%r[0-5]}}, 65530
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -1407374883553281 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHF value.
+define i64 @f17(i64 %dummy, i64 *%src) {
+; CHECK-LABEL: f17:
+; CHECK: nihf {{%r[0-5]}}, 4294901758
 ; CHECK: br %r14
   %res = atomicrmw nand i64 *%src, i64 -281479271677953 seq_cst
   ret i64 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-01.ll b/test/CodeGen/SystemZ/atomicrmw-or-01.ll
index 31303b769237..caba621addc0 100644
--- a/test/CodeGen/SystemZ/atomicrmw-or-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-or-01.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK: or [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 
 ; Check the minimum signed value.  We OR the rotated word with 0x80000000.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) {
 ; CHECK: oilh [[ROT]], 32768
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i8 *%src, i8 -128 seq_cst
   ret i8 %res
@@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) {
 
 ; Check ORs of -2 (-1 isn't useful).  We OR the rotated word with 0xfe000000.
 define i8 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oilh [[ROT]], 65024
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i8 *%src, i8 -2 seq_cst
   ret i8 %res
@@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) {
 
 ; Check ORs of 1.  We OR the rotated word with 0x01000000.
 define i8 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oilh [[ROT]], 256
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i8 *%src, i8 1 seq_cst
   ret i8 %res
@@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) {
 
 ; Check the maximum signed value.  We OR the rotated word with 0x7f000000.
 define i8 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oilh [[ROT]], 32512
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i8 *%src, i8 127 seq_cst
   ret i8 %res
@@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) {
 ; Check ORs of a large unsigned value.  We OR the rotated word with
 ; 0xfd000000.
 define i8 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oilh [[ROT]], 64768
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i8 *%src, i8 253 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-02.ll b/test/CodeGen/SystemZ/atomicrmw-or-02.ll
index 9880d0b9859f..877c642a35ae 100644
--- a/test/CodeGen/SystemZ/atomicrmw-or-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-or-02.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK: or [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 
 ; Check the minimum signed value.  We OR the rotated word with 0x80000000.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) {
 ; CHECK: oilh [[ROT]], 32768
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i16 *%src, i16 -32768 seq_cst
   ret i16 %res
@@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) {
 
 ; Check ORs of -2 (-1 isn't useful).  We OR the rotated word with 0xfffe0000.
 define i16 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oilh [[ROT]], 65534
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i16 *%src, i16 -2 seq_cst
   ret i16 %res
@@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) {
 
 ; Check ORs of 1.  We OR the rotated word with 0x00010000.
 define i16 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oilh [[ROT]], 1
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i16 *%src, i16 1 seq_cst
   ret i16 %res
@@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) {
 
 ; Check the maximum signed value.  We OR the rotated word with 0x7fff0000.
 define i16 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oilh [[ROT]], 32767
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i16 *%src, i16 32767 seq_cst
   ret i16 %res
@@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) {
 ; Check ORs of a large unsigned value.  We OR the rotated word with
 ; 0xfffd0000.
 define i16 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oilh [[ROT]], 65533
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw or i16 *%src, i16 65533 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-03.ll b/test/CodeGen/SystemZ/atomicrmw-or-03.ll
index 33fd21b04c6c..9a0aa86feb5d 100644
--- a/test/CodeGen/SystemZ/atomicrmw-or-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-or-03.ll
@@ -1,16 +1,16 @@
 ; Test 32-bit atomic ORs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check ORs of a variable.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: or %r0, %r4
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the lowest useful OILL value.
 define i32 @f2(i32 %dummy, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: oill %r0, 1
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 1 seq_cst
   ret i32 %res
@@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) {
 
 ; Check the high end of the OILL range.
 define i32 @f3(i32 %dummy, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oill %r0, 65535
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 65535 seq_cst
@@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) {
 
 ; Check the lowest useful OILH value, which is the next value up.
 define i32 @f4(i32 %dummy, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oilh %r0, 1
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 65536 seq_cst
@@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) {
 
 ; Check the lowest useful OILF value, which is the next value up.
 define i32 @f5(i32 %dummy, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oilf %r0, 65537
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 65537 seq_cst
@@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) {
 
 ; Check the high end of the OILH range.
 define i32 @f6(i32 %dummy, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oilh %r0, 65535
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 -65536 seq_cst
@@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) {
 
 ; Check the next value up, which must use OILF.
 define i32 @f7(i32 %dummy, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oilf %r0, 4294901761
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 -65535 seq_cst
@@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) {
 
 ; Check the largest useful OILF value.
 define i32 @f8(i32 %dummy, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: oilf %r0, 4294967294
 ; CHECK: br %r14
   %res = atomicrmw or i32 *%src, i32 -2 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-04.ll b/test/CodeGen/SystemZ/atomicrmw-or-04.ll
index a74f6f9dd501..dbc0f11cc74c 100644
--- a/test/CodeGen/SystemZ/atomicrmw-or-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-or-04.ll
@@ -1,16 +1,16 @@
 ; Test 64-bit atomic ORs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check ORs of a variable.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: ogr %r0, %r4
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the lowest useful OILL value.
 define i64 @f2(i64 %dummy, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: oill %r0, 1
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 1 seq_cst
   ret i64 %res
@@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the OILL range.
 define i64 @f3(i64 %dummy, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oill %r0, 65535
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 65535 seq_cst
@@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) {
 
 ; Check the lowest useful OILH value, which is the next value up.
 define i64 @f4(i64 %dummy, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oilh %r0, 1
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 65536 seq_cst
@@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src) {
 
 ; Check the lowest useful OILF value, which is the next value up again.
 define i64 @f5(i64 %dummy, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oilf %r0, 65537
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 65537 seq_cst
@@ -59,7 +59,7 @@ define i64 @f5(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the OILH range.
 define i64 @f6(i64 %dummy, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oilh %r0, 65535
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 4294901760 seq_cst
@@ -68,7 +68,7 @@ define i64 @f6(i64 %dummy, i64 *%src) {
 
 ; Check the next value up, which must use OILF.
 define i64 @f7(i64 %dummy, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oilf %r0, 4294901761
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 4294901761 seq_cst
@@ -77,7 +77,7 @@ define i64 @f7(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the OILF range.
 define i64 @f8(i64 %dummy, i64 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: oilf %r0, 4294967295
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 4294967295 seq_cst
@@ -86,7 +86,7 @@ define i64 @f8(i64 %dummy, i64 *%src) {
 
 ; Check the lowest useful OIHL value, which is one greater than above.
 define i64 @f9(i64 %dummy, i64 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: oihl %r0, 1
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 4294967296 seq_cst
@@ -96,7 +96,7 @@ define i64 @f9(i64 %dummy, i64 *%src) {
 ; Check the next value up, which must use a register.  (We could use
 ; combinations of OIH* and OIL* instead, but that isn't implemented.)
 define i64 @f10(i64 %dummy, i64 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ogr
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 4294967297 seq_cst
@@ -105,7 +105,7 @@ define i64 @f10(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the OIHL range.
 define i64 @f11(i64 %dummy, i64 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: oihl %r0, 65535
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 281470681743360 seq_cst
@@ -114,7 +114,7 @@ define i64 @f11(i64 %dummy, i64 *%src) {
 
 ; Check the lowest useful OIHH value, which is 1<<32 greater than above.
 define i64 @f12(i64 %dummy, i64 *%src) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: oihh %r0, 1
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 281474976710656 seq_cst
@@ -123,7 +123,7 @@ define i64 @f12(i64 %dummy, i64 *%src) {
 
 ; Check the lowest useful OIHF value, which is 1<<32 greater again.
 define i64 @f13(i64 %dummy, i64 *%src) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: oihf %r0, 65537
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 281479271677952 seq_cst
@@ -132,7 +132,7 @@ define i64 @f13(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the OIHH range.
 define i64 @f14(i64 %dummy, i64 *%src) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: oihh %r0, 65535
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 18446462598732840960 seq_cst
@@ -141,7 +141,7 @@ define i64 @f14(i64 %dummy, i64 *%src) {
 
 ; Check the next value up, which must use a register.
 define i64 @f15(i64 %dummy, i64 *%src) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: ogr
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 18446462598732840961 seq_cst
@@ -150,7 +150,7 @@ define i64 @f15(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the OIHF range.
 define i64 @f16(i64 %dummy, i64 *%src) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: oihf %r0, 4294967295
 ; CHECK: br %r14
   %res = atomicrmw or i64 *%src, i64 -4294967296 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-01.ll b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll
index d073dc5ec29f..2c08ebd9f5fc 100644
--- a/test/CodeGen/SystemZ/atomicrmw-sub-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK: sr [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 
 ; Check the minimum signed value.  We add 0x80000000 to the rotated word.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) {
 ; CHECK: afi [[ROT]], -2147483648
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i8 *%src, i8 -128 seq_cst
   ret i8 %res
@@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) {
 
 ; Check subtraction of -1.  We add 0x01000000 to the rotated word.
 define i8 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: afi [[ROT]], 16777216
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i8 *%src, i8 -1 seq_cst
   ret i8 %res
@@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) {
 
 ; Check subtraction of -1.  We add 0xff000000 to the rotated word.
 define i8 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: afi [[ROT]], -16777216
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i8 *%src, i8 1 seq_cst
   ret i8 %res
@@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) {
 
 ; Check the maximum signed value.  We add 0x81000000 to the rotated word.
 define i8 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: afi [[ROT]], -2130706432
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i8 *%src, i8 127 seq_cst
   ret i8 %res
@@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) {
 ; Check subtraction of a large unsigned value.  We add 0x02000000 to the
 ; rotated word.
 define i8 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: afi [[ROT]], 33554432
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i8 *%src, i8 254 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-02.ll b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll
index 449d92ff3b30..f82ebd9aaaae 100644
--- a/test/CodeGen/SystemZ/atomicrmw-sub-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK: sr [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 
 ; Check the minimum signed value.  We add 0x80000000 to the rotated word.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) {
 ; CHECK: afi [[ROT]], -2147483648
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i16 *%src, i16 -32768 seq_cst
   ret i16 %res
@@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) {
 
 ; Check subtraction of -1.  We add 0x00010000 to the rotated word.
 define i16 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: afi [[ROT]], 65536
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i16 *%src, i16 -1 seq_cst
   ret i16 %res
@@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) {
 
 ; Check subtraction of 1.  We add 0xffff0000 to the rotated word.
 define i16 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: afi [[ROT]], -65536
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i16 *%src, i16 1 seq_cst
   ret i16 %res
@@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) {
 
 ; Check the maximum signed value.  We add 0x80010000 to the rotated word.
 define i16 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: afi [[ROT]], -2147418112
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i16 *%src, i16 32767 seq_cst
   ret i16 %res
@@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) {
 ; Check subtraction of a large unsigned value.  We add 0x00020000 to the
 ; rotated word.
 define i16 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: afi [[ROT]], 131072
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw sub i16 *%src, i16 65534 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-03.ll b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll
index da07fb57ef72..a3031c6806b5 100644
--- a/test/CodeGen/SystemZ/atomicrmw-sub-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll
@@ -1,16 +1,16 @@
 ; Test 32-bit atomic subtractions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check subtraction of a variable.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: sr %r0, %r4
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check subtraction of 1, which can use AHI.
 define i32 @f2(i32 %dummy, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: ahi %r0, -1
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 1 seq_cst
   ret i32 %res
@@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) {
 
 ; Check the low end of the AHI range.
 define i32 @f3(i32 %dummy, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ahi %r0, -32768
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 32768 seq_cst
@@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) {
 
 ; Check the next value down, which must use AFI.
 define i32 @f4(i32 %dummy, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: afi %r0, -32769
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 32769 seq_cst
@@ -50,7 +50,7 @@ define i32 @f4(i32 %dummy, i32 *%src) {
 
 ; Check the low end of the AFI range.
 define i32 @f5(i32 %dummy, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: afi %r0, -2147483648
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 2147483648 seq_cst
@@ -59,7 +59,7 @@ define i32 @f5(i32 %dummy, i32 *%src) {
 
 ; Check the next value up, which gets treated as a positive operand.
 define i32 @f6(i32 %dummy, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: afi %r0, 2147483647
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 2147483649 seq_cst
@@ -68,7 +68,7 @@ define i32 @f6(i32 %dummy, i32 *%src) {
 
 ; Check subtraction of -1, which can use AHI.
 define i32 @f7(i32 %dummy, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ahi %r0, 1
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 -1 seq_cst
@@ -77,7 +77,7 @@ define i32 @f7(i32 %dummy, i32 *%src) {
 
 ; Check the high end of the AHI range.
 define i32 @f8(i32 %dummy, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ahi %r0, 32767
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 -32767 seq_cst
@@ -86,7 +86,7 @@ define i32 @f8(i32 %dummy, i32 *%src) {
 
 ; Check the next value down, which must use AFI instead.
 define i32 @f9(i32 %dummy, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: afi %r0, 32768
 ; CHECK: br %r14
   %res = atomicrmw sub i32 *%src, i32 -32768 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-04.ll b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll
index 26f75afe85f4..911648b6137e 100644
--- a/test/CodeGen/SystemZ/atomicrmw-sub-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll
@@ -1,16 +1,16 @@
 ; Test 64-bit atomic subtractions.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check subtraction of a variable.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: sgr %r0, %r4
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check subtraction of 1, which can use AGHI.
 define i64 @f2(i64 %dummy, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: aghi %r0, -1
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 1 seq_cst
   ret i64 %res
@@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) {
 
 ; Check the low end of the AGHI range.
 define i64 @f3(i64 %dummy, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aghi %r0, -32768
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 32768 seq_cst
@@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) {
 
 ; Check the next value up, which must use AGFI.
 define i64 @f4(i64 %dummy, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r0, -32769
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 32769 seq_cst
@@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src) {
 
 ; Check the low end of the AGFI range.
 define i64 @f5(i64 %dummy, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r0, -2147483648
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 2147483648 seq_cst
@@ -59,7 +59,7 @@ define i64 @f5(i64 %dummy, i64 *%src) {
 
 ; Check the next value up, which must use a register operation.
 define i64 @f6(i64 %dummy, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sgr
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 2147483649 seq_cst
@@ -68,7 +68,7 @@ define i64 @f6(i64 %dummy, i64 *%src) {
 
 ; Check subtraction of -1, which can use AGHI.
 define i64 @f7(i64 %dummy, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: aghi %r0, 1
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 -1 seq_cst
@@ -77,7 +77,7 @@ define i64 @f7(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the AGHI range.
 define i64 @f8(i64 %dummy, i64 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: aghi %r0, 32767
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 -32767 seq_cst
@@ -86,7 +86,7 @@ define i64 @f8(i64 %dummy, i64 *%src) {
 
 ; Check the next value down, which must use AGFI instead.
 define i64 @f9(i64 %dummy, i64 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r0, 32768
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 -32768 seq_cst
@@ -95,7 +95,7 @@ define i64 @f9(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the AGFI range.
 define i64 @f10(i64 %dummy, i64 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r0, 2147483647
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 -2147483647 seq_cst
@@ -104,7 +104,7 @@ define i64 @f10(i64 %dummy, i64 *%src) {
 
 ; Check the next value down, which must use a register operation.
 define i64 @f11(i64 %dummy, i64 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: sgr
 ; CHECK: br %r14
   %res = atomicrmw sub i64 *%src, i64 -2147483648 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll
index e33597b7297d..52575c634971 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll
@@ -11,7 +11,7 @@
 ;   being used in the RISBG (in contrast to things like atomic addition,
 ;   which shift %r3 left so that %b is at the high end of the word).
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -20,11 +20,11 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK: risbg [[ROT]], %r3, 32, 39, 24
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT: f1:
+; CHECK-SHIFT-LABEL: f1:
 ; CHECK-SHIFT-NOT: %r3
 ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT-NOT: %r3
@@ -43,12 +43,12 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; Check exchange with a constant.  We should force the constant into
 ; a register and use the sequence above.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi [[VALUE:%r[0-9]+]], 88
 ; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 39, 24
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT: f2:
+; CHECK-SHIFT-LABEL: f2:
 ; CHECK-SHIFT: br %r14
   %res = atomicrmw xchg i8 *%src, i8 88 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll
index 31f802625a32..04be623ada89 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll
@@ -11,7 +11,7 @@
 ;   being used in the RISBG (in contrast to things like atomic addition,
 ;   which shift %r3 left so that %b is at the high end of the word).
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -20,11 +20,11 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK: risbg [[ROT]], %r3, 32, 47, 16
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT: f1:
+; CHECK-SHIFT-LABEL: f1:
 ; CHECK-SHIFT-NOT: %r3
 ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT-NOT: %r3
@@ -43,12 +43,12 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; Check exchange with a constant.  We should force the constant into
 ; a register and use the sequence above.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi [[VALUE:%r[0-9]+]], -25536
 ; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 47, 16
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT: f2:
+; CHECK-SHIFT-LABEL: f2:
 ; CHECK-SHIFT: br %r14
   %res = atomicrmw xchg i16 *%src, i16 40000 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
index 37581ab9d602..a602a02a189e 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
@@ -4,11 +4,11 @@
 
 ; Check register exchange.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: cs %r2, %r4, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xchg i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -16,7 +16,7 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the high end of the aligned CS range.
 define i32 @f2(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 4092(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3)
 ; CHECK: br %r14
@@ -27,7 +27,7 @@ define i32 @f2(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the next word up, which requires CSY.
 define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ly %r2, 4096(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3)
 ; CHECK: br %r14
@@ -38,7 +38,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the high end of the aligned CSY range.
 define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ly %r2, 524284(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3)
 ; CHECK: br %r14
@@ -49,7 +49,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the next word up, which needs separate address logic.
 define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r3, 524288
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
@@ -61,7 +61,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the high end of the negative aligned CSY range.
 define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ly %r2, -4(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3)
 ; CHECK: br %r14
@@ -72,7 +72,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the low end of the CSY range.
 define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ly %r2, -524288(%r3)
 ; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3)
 ; CHECK: br %r14
@@ -83,7 +83,7 @@ define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the next word down, which needs separate address logic.
 define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r3, -524292
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
@@ -95,7 +95,7 @@ define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check that indexed addresses are not allowed.
 define i32 @f9(i32 %dummy, i64 %base, i64 %index, i32 %b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agr %r3, %r4
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
@@ -109,12 +109,12 @@ define i32 @f9(i32 %dummy, i64 %base, i64 %index, i32 %b) {
 ; Check exchange of a constant.  We should force it into a register and
 ; use the sequence above.
 define i32 @f10(i32 %dummy, i32 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: llill [[VALUE:%r[0-9+]]], 40000
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: cs %r2, [[VALUE]], 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xchg i32 *%src, i32 40000 seq_cst
   ret i32 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
index a68295ea8b04..80c0eeb7121b 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
@@ -4,11 +4,11 @@
 
 ; Check register exchange.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: csg %r2, %r4, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xchg i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -16,7 +16,7 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the high end of the aligned CSG range.
 define i64 @f2(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lg %r2, 524280(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3)
 ; CHECK: br %r14
@@ -27,7 +27,7 @@ define i64 @f2(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the next doubleword up, which requires separate address logic.
 define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r3, 524288
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
@@ -39,7 +39,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the low end of the CSG range.
 define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lg %r2, -524288(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3)
 ; CHECK: br %r14
@@ -50,7 +50,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the next doubleword down, which requires separate address logic.
 define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r3, -524296
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
@@ -62,7 +62,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check that indexed addresses are not allowed.
 define i64 @f6(i64 %dummy, i64 %base, i64 %index, i64 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agr %r3, %r4
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
@@ -76,12 +76,12 @@ define i64 @f6(i64 %dummy, i64 %base, i64 %index, i64 %b) {
 ; Check exchange of a constant.  We should force it into a register and
 ; use the sequence above.
 define i64 @f7(i64 %dummy, i64 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llilf [[VALUE:%r[0-9+]]], 3000000000
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^:]*]]:
 ; CHECK: csg %r2, [[VALUE]], 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xchg i64 *%ptr, i64 3000000000 seq_cst
   ret i64 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-01.ll b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll
index 13cdf02f486c..e8fef2d31d2c 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xor-01.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i8 @f1(i8 *%src, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK: xr [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 24
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i8 @f1(i8 *%src, i8 %b) {
 
 ; Check the minimum signed value.  We XOR the rotated word with 0x80000000.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i8 @f2(i8 *%src) {
 ; CHECK: xilf [[ROT]], 2147483648
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i8 @f2(i8 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i8 *%src, i8 -128 seq_cst
   ret i8 %res
@@ -76,13 +76,13 @@ define i8 @f2(i8 *%src) {
 
 ; Check XORs of -1.  We XOR the rotated word with 0xff000000.
 define i8 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xilf [[ROT]], 4278190080
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i8 *%src, i8 -1 seq_cst
   ret i8 %res
@@ -90,13 +90,13 @@ define i8 @f3(i8 *%src) {
 
 ; Check XORs of 1.  We XOR the rotated word with 0x01000000.
 define i8 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xilf [[ROT]], 16777216
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i8 *%src, i8 1 seq_cst
   ret i8 %res
@@ -104,13 +104,13 @@ define i8 @f4(i8 *%src) {
 
 ; Check the maximum signed value.  We XOR the rotated word with 0x7f000000.
 define i8 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xilf [[ROT]], 2130706432
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i8 *%src, i8 127 seq_cst
   ret i8 %res
@@ -119,13 +119,13 @@ define i8 @f5(i8 *%src) {
 ; Check XORs of a large unsigned value.  We XOR the rotated word with
 ; 0xfd000000.
 define i8 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: xilf [[ROT]], 4244635648
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i8 *%src, i8 253 seq_cst
   ret i8 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-02.ll b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll
index 4faa64f8e837..9405c2ec0c08 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xor-02.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll
@@ -13,7 +13,7 @@
 ;   before being used.  This shift is independent of the other loop prologue
 ;   instructions.
 define i16 @f1(i16 *%src, i16 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -22,11 +22,11 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK: xr [[ROT]], %r3
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1-LABEL: f1:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -34,7 +34,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2-LABEL: f1:
 ; CHECK-SHIFT2: sll %r3, 16
 ; CHECK-SHIFT2: rll
 ; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3
@@ -47,7 +47,7 @@ define i16 @f1(i16 *%src, i16 %b) {
 
 ; Check the minimum signed value.  We XOR the rotated word with 0x80000000.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK: nill %r2, 65532
 ; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
@@ -56,11 +56,11 @@ define i16 @f2(i16 *%src) {
 ; CHECK: xilf [[ROT]], 2147483648
 ; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
 ; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1-LABEL: f2:
 ; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
 ; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT1: rll
@@ -68,7 +68,7 @@ define i16 @f2(i16 *%src) {
 ; CHECK-SHIFT1: rll
 ; CHECK-SHIFT1: br %r14
 ;
-; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2-LABEL: f2:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i16 *%src, i16 -32768 seq_cst
   ret i16 %res
@@ -76,13 +76,13 @@ define i16 @f2(i16 *%src) {
 
 ; Check XORs of -1.  We XOR the rotated word with 0xffff0000.
 define i16 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xilf [[ROT]], 4294901760
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1-LABEL: f3:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2-LABEL: f3:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i16 *%src, i16 -1 seq_cst
   ret i16 %res
@@ -90,13 +90,13 @@ define i16 @f3(i16 *%src) {
 
 ; Check XORs of 1.  We XOR the rotated word with 0x00010000.
 define i16 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xilf [[ROT]], 65536
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1-LABEL: f4:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2-LABEL: f4:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i16 *%src, i16 1 seq_cst
   ret i16 %res
@@ -104,13 +104,13 @@ define i16 @f4(i16 *%src) {
 
 ; Check the maximum signed value.  We XOR the rotated word with 0x7fff0000.
 define i16 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xilf [[ROT]], 2147418112
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1-LABEL: f5:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2-LABEL: f5:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i16 *%src, i16 32767 seq_cst
   ret i16 %res
@@ -119,13 +119,13 @@ define i16 @f5(i16 *%src) {
 ; Check XORs of a large unsigned value.  We XOR the rotated word with
 ; 0xfffd0000.
 define i16 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: xilf [[ROT]], 4294770688
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1-LABEL: f6:
 ; CHECK-SHIFT1: br %r14
-; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2-LABEL: f6:
 ; CHECK-SHIFT2: br %r14
   %res = atomicrmw xor i16 *%src, i16 65533 seq_cst
   ret i16 %res
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-03.ll b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll
index 23884f888e13..d719d0bd7140 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xor-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll
@@ -1,16 +1,16 @@
 ; Test 32-bit atomic XORs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check XORs of a variable.
 define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: xr %r0, %r4
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xor i32 *%src, i32 %b seq_cst
   ret i32 %res
@@ -18,13 +18,13 @@ define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Check the lowest useful constant.
 define i32 @f2(i32 %dummy, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lr %r0, %r2
 ; CHECK: xilf %r0, 1
 ; CHECK: cs %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xor i32 *%src, i32 1 seq_cst
   ret i32 %res
@@ -32,7 +32,7 @@ define i32 @f2(i32 %dummy, i32 *%src) {
 
 ; Check an arbitrary constant.
 define i32 @f3(i32 %dummy, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xilf %r0, 3000000000
 ; CHECK: br %r14
   %res = atomicrmw xor i32 *%src, i32 3000000000 seq_cst
@@ -41,7 +41,7 @@ define i32 @f3(i32 %dummy, i32 *%src) {
 
 ; Check bitwise negation.
 define i32 @f4(i32 %dummy, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
   %res = atomicrmw xor i32 *%src, i32 -1 seq_cst
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-04.ll b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll
index 21130fb47776..c17a879f37cd 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xor-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll
@@ -1,16 +1,16 @@
 ; Test 64-bit atomic XORs.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check XORs of a variable.
 define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: xgr %r0, %r4
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xor i64 *%src, i64 %b seq_cst
   ret i64 %res
@@ -18,13 +18,13 @@ define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
 
 ; Check the lowest useful XILF value.
 define i64 @f2(i64 %dummy, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lg %r2, 0(%r3)
 ; CHECK: [[LABEL:\.[^ ]*]]:
 ; CHECK: lgr %r0, %r2
 ; CHECK: xilf %r0, 1
 ; CHECK: csg %r2, %r0, 0(%r3)
-; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: jl [[LABEL]]
 ; CHECK: br %r14
   %res = atomicrmw xor i64 *%src, i64 1 seq_cst
   ret i64 %res
@@ -32,7 +32,7 @@ define i64 @f2(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the XILF range.
 define i64 @f3(i64 %dummy, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xilf %r0, 4294967295
 ; CHECK: br %r14
   %res = atomicrmw xor i64 *%src, i64 4294967295 seq_cst
@@ -41,7 +41,7 @@ define i64 @f3(i64 %dummy, i64 *%src) {
 
 ; Check the lowest useful XIHF value, which is one greater than above.
 define i64 @f4(i64 %dummy, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xihf %r0, 1
 ; CHECK: br %r14
   %res = atomicrmw xor i64 *%src, i64 4294967296 seq_cst
@@ -51,7 +51,7 @@ define i64 @f4(i64 %dummy, i64 *%src) {
 ; Check the next value up, which must use a register.  (We could use
 ; combinations of XIH* and XIL* instead, but that isn't implemented.)
 define i64 @f5(i64 %dummy, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xgr
 ; CHECK: br %r14
   %res = atomicrmw xor i64 *%src, i64 4294967297 seq_cst
@@ -60,7 +60,7 @@ define i64 @f5(i64 %dummy, i64 *%src) {
 
 ; Check the high end of the XIHF range.
 define i64 @f6(i64 %dummy, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: xihf %r0, 4294967295
 ; CHECK: br %r14
   %res = atomicrmw xor i64 *%src, i64 -4294967296 seq_cst
@@ -69,7 +69,7 @@ define i64 @f6(i64 %dummy, i64 *%src) {
 
 ; Check the next value up, which must use a register.
 define i64 @f7(i64 %dummy, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: xgr
 ; CHECK: br %r14
   %res = atomicrmw xor i64 *%src, i64 -4294967295 seq_cst
diff --git a/test/CodeGen/SystemZ/branch-01.ll b/test/CodeGen/SystemZ/branch-01.ll
index 8ff91ac38e80..12ed2d32a801 100644
--- a/test/CodeGen/SystemZ/branch-01.ll
+++ b/test/CodeGen/SystemZ/branch-01.ll
@@ -3,10 +3,10 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1(i8 *%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: mvi 0(%r2), 1
-; CHECK: j{{g?}} .L[[LABEL]]
+; CHECK: j .L[[LABEL]]
   br label %loop
 loop:
   store volatile i8 1, i8 *%dest
diff --git a/test/CodeGen/SystemZ/branch-02.ll b/test/CodeGen/SystemZ/branch-02.ll
index cde9b568b38c..38b5d27049d8 100644
--- a/test/CodeGen/SystemZ/branch-02.ll
+++ b/test/CodeGen/SystemZ/branch-02.ll
@@ -1,14 +1,15 @@
 ; Test all condition-code masks that are relevant for signed integer
-; comparisons.
+; comparisons, in cases where a separate branch is better than COMPARE
+; AND BRANCH.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1(i32 *%src, i32 %target) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: c %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}e .L[[LABEL]]
+; CHECK-NEXT: je .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -19,11 +20,11 @@ exit:
 }
 
 define void @f2(i32 *%src, i32 %target) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: c %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}lh .L[[LABEL]]
+; CHECK-NEXT: jlh .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -34,11 +35,11 @@ exit:
 }
 
 define void @f3(i32 *%src, i32 %target) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: c %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}le .L[[LABEL]]
+; CHECK-NEXT: jle .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -49,11 +50,11 @@ exit:
 }
 
 define void @f4(i32 *%src, i32 %target) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: c %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}l .L[[LABEL]]
+; CHECK-NEXT: jl .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -64,11 +65,11 @@ exit:
 }
 
 define void @f5(i32 *%src, i32 %target) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: c %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}h .L[[LABEL]]
+; CHECK-NEXT: jh .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -79,11 +80,11 @@ exit:
 }
 
 define void @f6(i32 *%src, i32 %target) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: c %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}he .L[[LABEL]]
+; CHECK-NEXT: jhe .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
diff --git a/test/CodeGen/SystemZ/branch-03.ll b/test/CodeGen/SystemZ/branch-03.ll
index 1e447d034a39..ef31a9c696ea 100644
--- a/test/CodeGen/SystemZ/branch-03.ll
+++ b/test/CodeGen/SystemZ/branch-03.ll
@@ -3,11 +3,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 define void @f1(i32 *%src, i32 %target) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: cl %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}le .L[[LABEL]]
+; CHECK-NEXT: jle .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -18,11 +18,11 @@ exit:
 }
 
 define void @f2(i32 *%src, i32 %target) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: cl %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}l .L[[LABEL]]
+; CHECK-NEXT: jl .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -33,11 +33,11 @@ exit:
 }
 
 define void @f3(i32 *%src, i32 %target) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: cl %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}h .L[[LABEL]]
+; CHECK-NEXT: jh .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
@@ -48,11 +48,11 @@ exit:
 }
 
 define void @f4(i32 *%src, i32 %target) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: cl %r3, 0(%r2)
-; CHECK-NEXT: j{{g?}}he .L[[LABEL]]
+; CHECK-NEXT: jhe .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile i32 *%src
diff --git a/test/CodeGen/SystemZ/branch-04.ll b/test/CodeGen/SystemZ/branch-04.ll
index 3d4175041db2..fafb234616f1 100644
--- a/test/CodeGen/SystemZ/branch-04.ll
+++ b/test/CodeGen/SystemZ/branch-04.ll
@@ -4,11 +4,11 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define void @f1(float *%src, float %target) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}e .L[[LABEL]]
+; CHECK-NEXT: je .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -19,11 +19,11 @@ exit:
 }
 
 define void @f2(float *%src, float %target) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}lh .L[[LABEL]]
+; CHECK-NEXT: jlh .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -34,11 +34,11 @@ exit:
 }
 
 define void @f3(float *%src, float %target) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}le .L[[LABEL]]
+; CHECK-NEXT: jle .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -49,11 +49,11 @@ exit:
 }
 
 define void @f4(float *%src, float %target) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}l .L[[LABEL]]
+; CHECK-NEXT: jl .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -64,11 +64,11 @@ exit:
 }
 
 define void @f5(float *%src, float %target) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}h .L[[LABEL]]
+; CHECK-NEXT: jh .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -79,11 +79,11 @@ exit:
 }
 
 define void @f6(float *%src, float %target) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}he .L[[LABEL]]
+; CHECK-NEXT: jhe .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -94,11 +94,11 @@ exit:
 }
 
 define void @f7(float *%src, float %target) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}nlh .L[[LABEL]]
+; CHECK-NEXT: jnlh .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -109,11 +109,11 @@ exit:
 }
 
 define void @f8(float *%src, float %target) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}ne .L[[LABEL]]
+; CHECK-NEXT: jne .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -124,11 +124,11 @@ exit:
 }
 
 define void @f9(float *%src, float %target) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}nh .L[[LABEL]]
+; CHECK-NEXT: jnh .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -139,11 +139,11 @@ exit:
 }
 
 define void @f10(float *%src, float %target) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}nhe .L[[LABEL]]
+; CHECK-NEXT: jnhe .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -154,11 +154,11 @@ exit:
 }
 
 define void @f11(float *%src, float %target) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}nle .L[[LABEL]]
+; CHECK-NEXT: jnle .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -169,11 +169,11 @@ exit:
 }
 
 define void @f12(float *%src, float %target) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}nl .L[[LABEL]]
+; CHECK-NEXT: jnl .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -186,11 +186,11 @@ exit:
 ; "jno" == "jump if no overflow", which corresponds to "jump if ordered"
 ; rather than "jump if not ordered" after a floating-point comparison.
 define void @f13(float *%src, float %target) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}no .L[[LABEL]]
+; CHECK-NEXT: jno .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
@@ -203,11 +203,11 @@ exit:
 ; "jo" == "jump if overflow", which corresponds to "jump if not ordered"
 ; rather than "jump if ordered" after a floating-point comparison.
 define void @f14(float *%src, float %target) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: .cfi_startproc
 ; CHECK: .L[[LABEL:.*]]:
 ; CHECK: ceb %f0, 0(%r2)
-; CHECK-NEXT: j{{g?}}o .L[[LABEL]]
+; CHECK-NEXT: jo .L[[LABEL]]
   br label %loop
 loop:
   %val = load volatile float *%src
diff --git a/test/CodeGen/SystemZ/branch-05.ll b/test/CodeGen/SystemZ/branch-05.ll
index d149e0b7013b..b2157b5ac778 100644
--- a/test/CodeGen/SystemZ/branch-05.ll
+++ b/test/CodeGen/SystemZ/branch-05.ll
@@ -3,10 +3,9 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 define i32 @f1(i32 %x, i32 %y, i32 %op) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ahi %r4, -1
-; CHECK: clfi %r4, 5
-; CHECK-NEXT: j{{g?}}g
+; CHECK: clijh %r4, 5,
 ; CHECK: llgfr [[OP64:%r[0-5]]], %r4
 ; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
 ; CHECK: larl [[BASE:%r[1-5]]]
diff --git a/test/CodeGen/SystemZ/branch-06.ll b/test/CodeGen/SystemZ/branch-06.ll
new file mode 100644
index 000000000000..2fa23b744afb
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-06.ll
@@ -0,0 +1,190 @@
+; Test all condition-code masks that are relevant for CRJ.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @foo()
+@g1 = global i16 0
+
+define void @f1(i32 %target) {
+; CHECK-LABEL: f1:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: crje %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp eq i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(i32 %target) {
+; CHECK-LABEL: f2:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: crjlh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp ne i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(i32 %target) {
+; CHECK-LABEL: f3:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: crjle %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp sle i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(i32 %target) {
+; CHECK-LABEL: f4:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: crjl %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp slt i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f5(i32 %target) {
+; CHECK-LABEL: f5:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: crjh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp sgt i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f6(i32 %target) {
+; CHECK-LABEL: f6:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: crjhe %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp sge i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Check that CRJ is used for checking equality with a zero-extending
+; character load.
+define void @f7(i8 *%targetptr) {
+; CHECK-LABEL: f7:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: llc [[REG:%r[0-5]]],
+; CHECK: crje %r2, [[REG]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %byte = load i8 *%targetptr
+  %target = zext i8 %byte to i32
+  %cond = icmp eq i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; ...and zero-extending i16 loads.
+define void @f8(i16 *%targetptr) {
+; CHECK-LABEL: f8:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: llh [[REG:%r[0-5]]],
+; CHECK: crje %r2, [[REG]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %half = load i16 *%targetptr
+  %target = zext i16 %half to i32
+  %cond = icmp eq i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; ...unless the address is a global.
+define void @f9(i16 *%targetptr) {
+; CHECK-LABEL: f9:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clhrl %r2, g1
+; CHECK: je .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %half = load i16 *@g1
+  %target = zext i16 %half to i32
+  %cond = icmp eq i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Check that CRJ is used for checking order between two zero-extending
+; byte loads, even if the original comparison was unsigned.
+define void @f10(i8 *%targetptr1) {
+; CHECK-LABEL: f10:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK-DAG: llc [[REG1:%r[0-5]]], 0(
+; CHECK-DAG: llc [[REG2:%r[0-5]]], 1(
+; CHECK: crjl [[REG1]], [[REG2]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %targetptr2 = getelementptr i8 *%targetptr1, i64 1
+  %byte1 = load i8 *%targetptr1
+  %byte2 = load i8 *%targetptr2
+  %ext1 = zext i8 %byte1 to i32
+  %ext2 = zext i8 %byte2 to i32
+  %cond = icmp ult i32 %ext1, %ext2
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; ...likewise halfword loads.
+define void @f11(i16 *%targetptr1) {
+; CHECK-LABEL: f11:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK-DAG: llh [[REG1:%r[0-5]]], 0(
+; CHECK-DAG: llh [[REG2:%r[0-5]]], 2(
+; CHECK: crjl [[REG1]], [[REG2]], .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %targetptr2 = getelementptr i16 *%targetptr1, i64 1
+  %half1 = load i16 *%targetptr1
+  %half2 = load i16 *%targetptr2
+  %ext1 = zext i16 %half1 to i32
+  %ext2 = zext i16 %half2 to i32
+  %cond = icmp ult i32 %ext1, %ext2
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-07.ll b/test/CodeGen/SystemZ/branch-07.ll
new file mode 100644
index 000000000000..bac607133a89
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-07.ll
@@ -0,0 +1,157 @@
+; Test all condition-code masks that are relevant for CGRJ.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @foo()
+
+; Test EQ.
+define void @f1(i64 %target) {
+; CHECK-LABEL: f1:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cgrje %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp eq i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Test NE.
+define void @f2(i64 %target) {
+; CHECK-LABEL: f2:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cgrjlh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp ne i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Test SLE.
+define void @f3(i64 %target) {
+; CHECK-LABEL: f3:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cgrjle %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp sle i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Test SLT.
+define void @f4(i64 %target) {
+; CHECK-LABEL: f4:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cgrjl %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp slt i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Test SGT.
+define void @f5(i64 %target) {
+; CHECK-LABEL: f5:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cgrjh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp sgt i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Test SGE.
+define void @f6(i64 %target) {
+; CHECK-LABEL: f6:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cgrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp sge i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Test a vector of 0/-1 results for i32 EQ.
+define i64 @f7(i64 %a, i64 %b) {
+; CHECK-LABEL: f7:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], -268435456
+; CHECK: sra [[REG]], 31
+; CHECK: br %r14
+  %avec = bitcast i64 %a to <2 x i32>
+  %bvec = bitcast i64 %b to <2 x i32>
+  %cmp = icmp eq <2 x i32> %avec, %bvec
+  %ext = sext <2 x i1> %cmp to <2 x i32>
+  %ret = bitcast <2 x i32> %ext to i64
+  ret i64 %ret
+}
+
+; Test a vector of 0/-1 results for i32 NE.
+define i64 @f8(i64 %a, i64 %b) {
+; CHECK-LABEL: f8:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], 1879048192
+; CHECK: sra [[REG]], 31
+; CHECK: br %r14
+  %avec = bitcast i64 %a to <2 x i32>
+  %bvec = bitcast i64 %b to <2 x i32>
+  %cmp = icmp ne <2 x i32> %avec, %bvec
+  %ext = sext <2 x i1> %cmp to <2 x i32>
+  %ret = bitcast <2 x i32> %ext to i64
+  ret i64 %ret
+}
+
+; Test a vector of 0/-1 results for i64 EQ.
+define void @f9(i64 %a, i64 %b, <2 x i64> *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], -268435456
+; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32
+; CHECK: srag {{%r[0-5]}}, [[REG2]], 63
+; CHECK: br %r14
+  %avec = bitcast i64 %a to <2 x i32>
+  %bvec = bitcast i64 %b to <2 x i32>
+  %cmp = icmp eq <2 x i32> %avec, %bvec
+  %ext = sext <2 x i1> %cmp to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> *%dest
+  ret void
+}
+
+; Test a vector of 0/-1 results for i64 NE.
+define void @f10(i64 %a, i64 %b, <2 x i64> *%dest) {
+; CHECK-LABEL: f10:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: afi [[REG]], 1879048192
+; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32
+; CHECK: srag {{%r[0-5]}}, [[REG2]], 63
+; CHECK: br %r14
+  %avec = bitcast i64 %a to <2 x i32>
+  %bvec = bitcast i64 %b to <2 x i32>
+  %cmp = icmp ne <2 x i32> %avec, %bvec
+  %ext = sext <2 x i1> %cmp to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> *%dest
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-08.ll b/test/CodeGen/SystemZ/branch-08.ll
new file mode 100644
index 000000000000..6741d29aec03
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-08.ll
@@ -0,0 +1,46 @@
+; Test SystemZInstrInfo::AnalyzeBranch and SystemZInstrInfo::InsertBranch.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo() noreturn
+
+; Check a case where a separate branch is needed and where the original
+; order should be reversed.
+define i32 @f1(i32 %a, i32 *%bptr) {
+; CHECK-LABEL: f1:
+; CHECK: cl %r2, 0(%r3)
+; CHECK: jl .L[[LABEL:.*]]
+; CHECK: br %r14
+; CHECK: .L[[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+entry:
+  %b = load i32 *%bptr
+  %cmp = icmp ult i32 %a, %b
+  br i1 %cmp, label %callit, label %return
+
+callit:
+  call void @foo()
+  unreachable
+
+return:
+  ret i32 1
+}
+
+; Same again with a fused compare and branch.
+define i32 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: cije %r2, 0, .L[[LABEL:.*]]
+; CHECK: br %r14
+; CHECK: .L[[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+entry:
+  %cmp = icmp eq i32 %a, 0
+  br i1 %cmp, label %callit, label %return
+
+callit:
+  call void @foo()
+  unreachable
+
+return:
+  ret i32 1
+}
diff --git a/test/CodeGen/SystemZ/branch-09.ll b/test/CodeGen/SystemZ/branch-09.ll
new file mode 100644
index 000000000000..5591f5bede6a
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-09.ll
@@ -0,0 +1,62 @@
+; Test all condition-code masks that are relevant for CLRJ.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @foo()
+@g1 = global i16 0
+
+define void @f1(i32 %target) {
+; CHECK-LABEL: f1:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjle %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp ule i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(i32 %target) {
+; CHECK-LABEL: f2:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjl %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp ult i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(i32 %target) {
+; CHECK-LABEL: f3:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp ugt i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(i32 %target) {
+; CHECK-LABEL: f4:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i32 @foo()
+  %cond = icmp uge i32 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-10.ll b/test/CodeGen/SystemZ/branch-10.ll
new file mode 100644
index 000000000000..ec6e759e8e74
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-10.ll
@@ -0,0 +1,62 @@
+; Test all condition-code masks that are relevant for CLGRJ.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @foo()
+@g1 = global i16 0
+
+define void @f1(i64 %target) {
+; CHECK-LABEL: f1:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjle %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp ule i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(i64 %target) {
+; CHECK-LABEL: f2:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjl %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp ult i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(i64 %target) {
+; CHECK-LABEL: f3:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjh %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp ugt i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(i64 %target) {
+; CHECK-LABEL: f4:
+; CHECK: .cfi_def_cfa_offset
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: clgrjhe %r2, {{%r[0-9]+}}, .L[[LABEL]]
+  br label %loop
+loop:
+  %val = call i64 @foo()
+  %cond = icmp uge i64 %val, %target
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/bswap-01.ll b/test/CodeGen/SystemZ/bswap-01.ll
index 952903df50f9..7e6c83af3f81 100644
--- a/test/CodeGen/SystemZ/bswap-01.ll
+++ b/test/CodeGen/SystemZ/bswap-01.ll
@@ -7,18 +7,18 @@ declare i64 @llvm.bswap.i64(i64 %a)
 
 ; Check 32-bit register-to-register byteswaps.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lrvr [[REGISTER:%r[0-5]]], %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
 
 ; Check 64-bit register-to-register byteswaps.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lrvgr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
diff --git a/test/CodeGen/SystemZ/bswap-02.ll b/test/CodeGen/SystemZ/bswap-02.ll
index e9b7eb5f055b..db69ea53dfe1 100644
--- a/test/CodeGen/SystemZ/bswap-02.ll
+++ b/test/CodeGen/SystemZ/bswap-02.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.bswap.i32(i32 %a)
 
 ; Check LRV with no displacement.
 define i32 @f1(i32 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lrv %r2, 0(%r2)
 ; CHECK: br %r14
   %a = load i32 *%src
@@ -16,7 +16,7 @@ define i32 @f1(i32 *%src) {
 
 ; Check the high end of the aligned LRV range.
 define i32 @f2(i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lrv %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -28,7 +28,7 @@ define i32 @f2(i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f3(i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lrv %r2, 0(%r2)
 ; CHECK: br %r14
@@ -40,7 +40,7 @@ define i32 @f3(i32 *%src) {
 
 ; Check the high end of the negative aligned LRV range.
 define i32 @f4(i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lrv %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -51,7 +51,7 @@ define i32 @f4(i32 *%src) {
 
 ; Check the low end of the LRV range.
 define i32 @f5(i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lrv %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -63,7 +63,7 @@ define i32 @f5(i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, -524292
 ; CHECK: lrv %r2, 0(%r2)
 ; CHECK: br %r14
@@ -75,7 +75,7 @@ define i32 @f6(i32 *%src) {
 
 ; Check that LRV allows an index.
 define i32 @f7(i64 %src, i64 %index) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lrv %r2, 524287({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -85,3 +85,92 @@ define i32 @f7(i64 %src, i64 %index) {
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
+
+; Check that volatile accesses do not use LRV, which might access the
+; storage multple times.
+define i32 @f8(i32 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: l [[REG:%r[0-5]]], 0(%r2)
+; CHECK: lrvr %r2, [[REG]]
+; CHECK: br %r14
+  %a = load volatile i32 *%src
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Test a case where we spill the source of at least one LRVR.  We want
+; to use LRV if possible.
+define void @f9(i32 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: lrv {{%r[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %swapped0 = call i32 @llvm.bswap.i32(i32 %val0)
+  %swapped1 = call i32 @llvm.bswap.i32(i32 %val1)
+  %swapped2 = call i32 @llvm.bswap.i32(i32 %val2)
+  %swapped3 = call i32 @llvm.bswap.i32(i32 %val3)
+  %swapped4 = call i32 @llvm.bswap.i32(i32 %val4)
+  %swapped5 = call i32 @llvm.bswap.i32(i32 %val5)
+  %swapped6 = call i32 @llvm.bswap.i32(i32 %val6)
+  %swapped7 = call i32 @llvm.bswap.i32(i32 %val7)
+  %swapped8 = call i32 @llvm.bswap.i32(i32 %val8)
+  %swapped9 = call i32 @llvm.bswap.i32(i32 %val9)
+  %swapped10 = call i32 @llvm.bswap.i32(i32 %val10)
+  %swapped11 = call i32 @llvm.bswap.i32(i32 %val11)
+  %swapped12 = call i32 @llvm.bswap.i32(i32 %val12)
+  %swapped13 = call i32 @llvm.bswap.i32(i32 %val13)
+  %swapped14 = call i32 @llvm.bswap.i32(i32 %val14)
+  %swapped15 = call i32 @llvm.bswap.i32(i32 %val15)
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+
+  store volatile i32 %swapped0, i32 *%ptr
+  store volatile i32 %swapped1, i32 *%ptr
+  store volatile i32 %swapped2, i32 *%ptr
+  store volatile i32 %swapped3, i32 *%ptr
+  store volatile i32 %swapped4, i32 *%ptr
+  store volatile i32 %swapped5, i32 *%ptr
+  store volatile i32 %swapped6, i32 *%ptr
+  store volatile i32 %swapped7, i32 *%ptr
+  store volatile i32 %swapped8, i32 *%ptr
+  store volatile i32 %swapped9, i32 *%ptr
+  store volatile i32 %swapped10, i32 *%ptr
+  store volatile i32 %swapped11, i32 *%ptr
+  store volatile i32 %swapped12, i32 *%ptr
+  store volatile i32 %swapped13, i32 *%ptr
+  store volatile i32 %swapped14, i32 *%ptr
+  store volatile i32 %swapped15, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/bswap-03.ll b/test/CodeGen/SystemZ/bswap-03.ll
index 2e6bcdce2651..d9e5ad1b52f6 100644
--- a/test/CodeGen/SystemZ/bswap-03.ll
+++ b/test/CodeGen/SystemZ/bswap-03.ll
@@ -6,7 +6,7 @@ declare i64 @llvm.bswap.i64(i64 %a)
 
 ; Check LRVG with no displacement.
 define i64 @f1(i64 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lrvg %r2, 0(%r2)
 ; CHECK: br %r14
   %a = load i64 *%src
@@ -16,7 +16,7 @@ define i64 @f1(i64 *%src) {
 
 ; Check the high end of the aligned LRVG range.
 define i64 @f2(i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lrvg %r2, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -28,7 +28,7 @@ define i64 @f2(i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f3(i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lrvg %r2, 0(%r2)
 ; CHECK: br %r14
@@ -40,7 +40,7 @@ define i64 @f3(i64 *%src) {
 
 ; Check the high end of the negative aligned LRVG range.
 define i64 @f4(i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lrvg %r2, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -51,7 +51,7 @@ define i64 @f4(i64 *%src) {
 
 ; Check the low end of the LRVG range.
 define i64 @f5(i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lrvg %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -63,7 +63,7 @@ define i64 @f5(i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f6(i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, -524296
 ; CHECK: lrvg %r2, 0(%r2)
 ; CHECK: br %r14
@@ -75,7 +75,7 @@ define i64 @f6(i64 *%src) {
 
 ; Check that LRVG allows an index.
 define i64 @f7(i64 %src, i64 %index) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lrvg %r2, 524287({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -85,3 +85,92 @@ define i64 @f7(i64 %src, i64 %index) {
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
+
+; Check that volatile accesses do not use LRVG, which might access the
+; storage multple times.
+define i64 @f8(i64 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
+; CHECK: lrvgr %r2, [[REG]]
+; CHECK: br %r14
+  %a = load volatile i64 *%src
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
+
+; Test a case where we spill the source of at least one LRVGR.  We want
+; to use LRVG if possible.
+define void @f9(i64 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: lrvg {{%r[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %swapped0 = call i64 @llvm.bswap.i64(i64 %val0)
+  %swapped1 = call i64 @llvm.bswap.i64(i64 %val1)
+  %swapped2 = call i64 @llvm.bswap.i64(i64 %val2)
+  %swapped3 = call i64 @llvm.bswap.i64(i64 %val3)
+  %swapped4 = call i64 @llvm.bswap.i64(i64 %val4)
+  %swapped5 = call i64 @llvm.bswap.i64(i64 %val5)
+  %swapped6 = call i64 @llvm.bswap.i64(i64 %val6)
+  %swapped7 = call i64 @llvm.bswap.i64(i64 %val7)
+  %swapped8 = call i64 @llvm.bswap.i64(i64 %val8)
+  %swapped9 = call i64 @llvm.bswap.i64(i64 %val9)
+  %swapped10 = call i64 @llvm.bswap.i64(i64 %val10)
+  %swapped11 = call i64 @llvm.bswap.i64(i64 %val11)
+  %swapped12 = call i64 @llvm.bswap.i64(i64 %val12)
+  %swapped13 = call i64 @llvm.bswap.i64(i64 %val13)
+  %swapped14 = call i64 @llvm.bswap.i64(i64 %val14)
+  %swapped15 = call i64 @llvm.bswap.i64(i64 %val15)
+
+  store volatile i64 %val0, i64 *%ptr
+  store volatile i64 %val1, i64 *%ptr
+  store volatile i64 %val2, i64 *%ptr
+  store volatile i64 %val3, i64 *%ptr
+  store volatile i64 %val4, i64 *%ptr
+  store volatile i64 %val5, i64 *%ptr
+  store volatile i64 %val6, i64 *%ptr
+  store volatile i64 %val7, i64 *%ptr
+  store volatile i64 %val8, i64 *%ptr
+  store volatile i64 %val9, i64 *%ptr
+  store volatile i64 %val10, i64 *%ptr
+  store volatile i64 %val11, i64 *%ptr
+  store volatile i64 %val12, i64 *%ptr
+  store volatile i64 %val13, i64 *%ptr
+  store volatile i64 %val14, i64 *%ptr
+  store volatile i64 %val15, i64 *%ptr
+
+  store volatile i64 %swapped0, i64 *%ptr
+  store volatile i64 %swapped1, i64 *%ptr
+  store volatile i64 %swapped2, i64 *%ptr
+  store volatile i64 %swapped3, i64 *%ptr
+  store volatile i64 %swapped4, i64 *%ptr
+  store volatile i64 %swapped5, i64 *%ptr
+  store volatile i64 %swapped6, i64 *%ptr
+  store volatile i64 %swapped7, i64 *%ptr
+  store volatile i64 %swapped8, i64 *%ptr
+  store volatile i64 %swapped9, i64 *%ptr
+  store volatile i64 %swapped10, i64 *%ptr
+  store volatile i64 %swapped11, i64 *%ptr
+  store volatile i64 %swapped12, i64 *%ptr
+  store volatile i64 %swapped13, i64 *%ptr
+  store volatile i64 %swapped14, i64 *%ptr
+  store volatile i64 %swapped15, i64 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/bswap-04.ll b/test/CodeGen/SystemZ/bswap-04.ll
index 192327bd256c..29d5a7b07212 100644
--- a/test/CodeGen/SystemZ/bswap-04.ll
+++ b/test/CodeGen/SystemZ/bswap-04.ll
@@ -5,21 +5,21 @@
 declare i32 @llvm.bswap.i32(i32 %a)
 
 ; Check STRV with no displacement.
-define void @f1(i32 *%src, i32 %a) {
-; CHECK: f1:
+define void @f1(i32 *%dst, i32 %a) {
+; CHECK-LABEL: f1:
 ; CHECK: strv %r3, 0(%r2)
 ; CHECK: br %r14
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
-  store i32 %swapped, i32 *%src
+  store i32 %swapped, i32 *%dst
   ret void
 }
 
 ; Check the high end of the aligned STRV range.
-define void @f2(i32 *%src, i32 %a) {
-; CHECK: f2:
+define void @f2(i32 *%dst, i32 %a) {
+; CHECK-LABEL: f2:
 ; CHECK: strv %r3, 524284(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 131071
+  %ptr = getelementptr i32 *%dst, i64 131071
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
@@ -27,34 +27,34 @@ define void @f2(i32 *%src, i32 %a) {
 
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f3(i32 *%src, i32 %a) {
-; CHECK: f3:
+define void @f3(i32 *%dst, i32 %a) {
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: strv %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 131072
+  %ptr = getelementptr i32 *%dst, i64 131072
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
 }
 
 ; Check the high end of the negative aligned STRV range.
-define void @f4(i32 *%src, i32 %a) {
-; CHECK: f4:
+define void @f4(i32 *%dst, i32 %a) {
+; CHECK-LABEL: f4:
 ; CHECK: strv %r3, -4(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 -1
+  %ptr = getelementptr i32 *%dst, i64 -1
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
 }
 
 ; Check the low end of the STRV range.
-define void @f5(i32 *%src, i32 %a) {
-; CHECK: f5:
+define void @f5(i32 *%dst, i32 %a) {
+; CHECK-LABEL: f5:
 ; CHECK: strv %r3, -524288(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 -131072
+  %ptr = getelementptr i32 *%dst, i64 -131072
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
@@ -62,12 +62,12 @@ define void @f5(i32 *%src, i32 %a) {
 
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f6(i32 *%src, i32 %a) {
-; CHECK: f6:
+define void @f6(i32 *%dst, i32 %a) {
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, -524292
 ; CHECK: strv %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i32 *%src, i64 -131073
+  %ptr = getelementptr i32 *%dst, i64 -131073
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   store i32 %swapped, i32 *%ptr
   ret void
@@ -75,7 +75,7 @@ define void @f6(i32 *%src, i32 %a) {
 
 ; Check that STRV allows an index.
 define void @f7(i64 %src, i64 %index, i32 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: strv %r4, 524287({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -85,3 +85,15 @@ define void @f7(i64 %src, i64 %index, i32 %a) {
   store i32 %swapped, i32 *%ptr
   ret void
 }
+
+; Check that volatile stores do not use STRV, which might access the
+; storage multple times.
+define void @f8(i32 *%dst, i32 %a) {
+; CHECK-LABEL: f8:
+; CHECK: lrvr [[REG:%r[0-5]]], %r3
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store volatile i32 %swapped, i32 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/bswap-05.ll b/test/CodeGen/SystemZ/bswap-05.ll
index e58cb80c3981..5c8361e26cea 100644
--- a/test/CodeGen/SystemZ/bswap-05.ll
+++ b/test/CodeGen/SystemZ/bswap-05.ll
@@ -5,21 +5,21 @@
 declare i64 @llvm.bswap.i64(i64 %a)
 
 ; Check STRVG with no displacement.
-define void @f1(i64 *%src, i64 %a) {
-; CHECK: f1:
+define void @f1(i64 *%dst, i64 %a) {
+; CHECK-LABEL: f1:
 ; CHECK: strvg %r3, 0(%r2)
 ; CHECK: br %r14
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
-  store i64 %swapped, i64 *%src
+  store i64 %swapped, i64 *%dst
   ret void
 }
 
 ; Check the high end of the aligned STRVG range.
-define void @f2(i64 *%src, i64 %a) {
-; CHECK: f2:
+define void @f2(i64 *%dst, i64 %a) {
+; CHECK-LABEL: f2:
 ; CHECK: strvg %r3, 524280(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 65535
+  %ptr = getelementptr i64 *%dst, i64 65535
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
@@ -27,34 +27,34 @@ define void @f2(i64 *%src, i64 %a) {
 
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f3(i64 *%src, i64 %a) {
-; CHECK: f3:
+define void @f3(i64 *%dst, i64 %a) {
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: strvg %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 65536
+  %ptr = getelementptr i64 *%dst, i64 65536
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
 }
 
 ; Check the high end of the negative aligned STRVG range.
-define void @f4(i64 *%src, i64 %a) {
-; CHECK: f4:
+define void @f4(i64 *%dst, i64 %a) {
+; CHECK-LABEL: f4:
 ; CHECK: strvg %r3, -8(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 -1
+  %ptr = getelementptr i64 *%dst, i64 -1
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
 }
 
 ; Check the low end of the STRVG range.
-define void @f5(i64 *%src, i64 %a) {
-; CHECK: f5:
+define void @f5(i64 *%dst, i64 %a) {
+; CHECK-LABEL: f5:
 ; CHECK: strvg %r3, -524288(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 -65536
+  %ptr = getelementptr i64 *%dst, i64 -65536
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
@@ -62,12 +62,12 @@ define void @f5(i64 *%src, i64 %a) {
 
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
-define void @f6(i64 *%src, i64 %a) {
-; CHECK: f6:
+define void @f6(i64 *%dst, i64 %a) {
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, -524296
 ; CHECK: strvg %r3, 0(%r2)
 ; CHECK: br %r14
-  %ptr = getelementptr i64 *%src, i64 -65537
+  %ptr = getelementptr i64 *%dst, i64 -65537
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   store i64 %swapped, i64 *%ptr
   ret void
@@ -75,7 +75,7 @@ define void @f6(i64 *%src, i64 %a) {
 
 ; Check that STRVG allows an index.
 define void @f7(i64 %src, i64 %index, i64 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: strvg %r4, 524287({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -85,3 +85,15 @@ define void @f7(i64 %src, i64 %index, i64 %a) {
   store i64 %swapped, i64 *%ptr
   ret void
 }
+
+; Check that volatile stores do not use STRVG, which might access the
+; storage multple times.
+define void @f8(i64 *%dst, i64 %a) {
+; CHECK-LABEL: f8:
+; CHECK: lrvgr [[REG:%r[0-5]]], %r3
+; CHECK: stg [[REG]], 0(%r2)
+; CHECK: br %r14
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store volatile i64 %swapped, i64 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/call-01.ll b/test/CodeGen/SystemZ/call-01.ll
index 1b9172bdd819..42b6afdd98d4 100644
--- a/test/CodeGen/SystemZ/call-01.ll
+++ b/test/CodeGen/SystemZ/call-01.ll
@@ -6,7 +6,7 @@ declare i64 @bar()
 
 ; We must allocate 160 bytes for the callee and save and restore %r14.
 define i64 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK: aghi %r15, -160
 ; CHECK: brasl %r14, bar@PLT
diff --git a/test/CodeGen/SystemZ/call-02.ll b/test/CodeGen/SystemZ/call-02.ll
index 07dd67bab1b6..5f14d12249f5 100644
--- a/test/CodeGen/SystemZ/call-02.ll
+++ b/test/CodeGen/SystemZ/call-02.ll
@@ -4,7 +4,7 @@
 
 ; We must allocate 160 bytes for the callee and save and restore %r14.
 define i64 @f1(i64() *%bar) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK: aghi %r15, -160
 ; CHECK: basr %r14, %r2
diff --git a/test/CodeGen/SystemZ/call-03.ll b/test/CodeGen/SystemZ/call-03.ll
new file mode 100644
index 000000000000..1f314eae58c8
--- /dev/null
+++ b/test/CodeGen/SystemZ/call-03.ll
@@ -0,0 +1,125 @@
+; Test sibling calls.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @ok(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                 float %f4, double %f6)
+declare void @uses_r6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, i64 %r6)
+declare void @uses_indirect(fp128 %r2)
+declare void @uses_stack(float %f0, float %f2, float %f4, float %f6,
+                         float %stack)
+declare i32 @returns_i32()
+declare i64 @returns_i64()
+
+; Check the maximum number of arguments that we can pass and still use
+; a sibling call.
+define void @f1() {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lzer %f0
+; CHECK-DAG: lzdr %f2
+; CHECK-DAG: lhi %r2, 1
+; CHECK-DAG: lhi %r3, 2
+; CHECK-DAG: lhi %r4, 3
+; CHECK-DAG: lghi %r5, 4
+; CHECK-DAG: {{ler %f4, %f0|lzer %f4}}
+; CHECK-DAG: {{ldr %f6, %f2|lzdr %f6}}
+; CHECK: jg ok@PLT
+  tail call void @ok(i8 1, i16 2, i32 3, i64 4, float 0.0, double 0.0,
+                     float 0.0, double 0.0)
+  ret void
+}
+
+; Check a call that uses %r6 to pass an argument.  At the moment we don't
+; use sibling calls in that case.
+define void @f2() {
+; CHECK-LABEL: f2:
+; CHECK: brasl %r14, uses_r6@PLT
+; CHECK: br %r14
+  tail call void @uses_r6(i8 1, i16 2, i32 3, i64 4, i64 5)
+  ret void
+}
+
+; Check a call that passes indirect arguments.  We can't use sibling
+; calls in that case.
+define void @f3() {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, uses_indirect@PLT
+; CHECK: br %r14
+  tail call void @uses_indirect(fp128 0xL00000000000000000000000000000000)
+  ret void
+}
+
+; Check a call that uses direct stack arguments, which again prevents
+; sibling calls
+define void @f4() {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, uses_stack@PLT
+; CHECK: br %r14
+  tail call void @uses_stack(float 0.0, float 0.0, float 0.0, float 0.0,
+                             float 0.0)
+  ret void
+}
+
+; Check an indirect call.  In this case the only acceptable choice for
+; the target register is %r1.
+define void @f5(void(i32, i32, i32, i32) *%foo) {
+; CHECK-LABEL: f5:
+; CHECK: lgr %r1, %r2
+; CHECK-DAG: lhi %r2, 1
+; CHECK-DAG: lhi %r3, 2
+; CHECK-DAG: lhi %r4, 3
+; CHECK-DAG: lhi %r5, 4
+; CHECK: br %r1
+  tail call void %foo(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check an indirect call that will be forced into a call-saved GPR
+; (which should be %r13, the highest GPR not used for anything else).
+define void @f6(void(i32) *%foo) {
+; CHECK-LABEL: f6:
+; CHECK: stmg %r13, %r15, 104(%r15)
+; CHECK: lgr %r13, %r2
+; CHECK: brasl %r14, returns_i32
+; CHECK: lgr %r1, %r13
+; CHECK: lmg %r13, %r15, 264(%r15)
+; CHECK: br %r1
+  %arg = call i32 @returns_i32()
+  tail call void %foo(i32 %arg)
+  ret void
+}
+
+; Test a function that returns a value.
+define i64 @f7() {
+; CHECK-LABEL: f7:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  ret i64 %res
+}
+
+; Test a function that returns a value truncated from i64 to i32.
+define i32 @f8() {
+; CHECK-LABEL: f8:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  %trunc = trunc i64 %res to i32
+  ret i32 %trunc
+}
+
+; Test a function that returns a value truncated from i64 to i7.
+define i7 @f9() {
+; CHECK-LABEL: f9:
+; CHECK: jg returns_i64@PLT
+  %res = tail call i64 @returns_i64()
+  %trunc = trunc i64 %res to i7
+  ret i7 %trunc
+}
+
+; Test a function that returns a value truncated from i32 to i8.
+define i8 @f10() {
+; CHECK-LABEL: f10:
+; CHECK: jg returns_i32@PLT
+  %res = tail call i32 @returns_i32()
+  %trunc = trunc i32 %res to i8
+  ret i8 %trunc
+}
diff --git a/test/CodeGen/SystemZ/cmpxchg-01.ll b/test/CodeGen/SystemZ/cmpxchg-01.ll
index 477bcb00e9bd..d5ea97786900 100644
--- a/test/CodeGen/SystemZ/cmpxchg-01.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-01.ll
@@ -11,24 +11,23 @@
 ;   being used in the RISBG (in contrast to things like atomic addition,
 ;   which shift %r3 left so that %b is at the high end of the word).
 define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) {
-; CHECK-MAIN: f1:
+; CHECK-MAIN-LABEL: f1:
 ; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3
 ; CHECK-MAIN: nill %r3, 65532
 ; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3)
 ; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
 ; CHECK-MAIN: rll %r2, [[OLD]], 8([[SHIFT]])
 ; CHECK-MAIN: risbg %r4, %r2, 32, 55, 0
-; CHECK-MAIN: cr %r2, %r4
-; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: crjlh %r2, %r4, [[EXIT:\.[^ ]*]]
 ; CHECK-MAIN: risbg %r5, %r2, 32, 55, 0
 ; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -8({{%r[1-9]+}})
 ; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3)
-; CHECK-MAIN: j{{g?}}lh [[LOOP]]
+; CHECK-MAIN: jl [[LOOP]]
 ; CHECK-MAIN: [[EXIT]]:
 ; CHECK-MAIN-NOT: %r2
 ; CHECK-MAIN: br %r14
 ;
-; CHECK-SHIFT: f1:
+; CHECK-SHIFT-LABEL: f1:
 ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3
 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT: rll
@@ -40,13 +39,13 @@ define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) {
 ; Check compare and swap with constants.  We should force the constants into
 ; registers and use the sequence above.
 define i8 @f2(i8 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi [[CMP:%r[0-9]+]], 42
 ; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 55, 0
 ; CHECK: risbg
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT: f2:
+; CHECK-SHIFT-LABEL: f2:
 ; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88
 ; CHECK-SHIFT: risbg
 ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 55, 0
diff --git a/test/CodeGen/SystemZ/cmpxchg-02.ll b/test/CodeGen/SystemZ/cmpxchg-02.ll
index cc3452320b3d..08c79d717c1e 100644
--- a/test/CodeGen/SystemZ/cmpxchg-02.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-02.ll
@@ -11,24 +11,23 @@
 ;   being used in the RISBG (in contrast to things like atomic addition,
 ;   which shift %r3 left so that %b is at the high end of the word).
 define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) {
-; CHECK-MAIN: f1:
+; CHECK-MAIN-LABEL: f1:
 ; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3
 ; CHECK-MAIN: nill %r3, 65532
 ; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3)
 ; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
 ; CHECK-MAIN: rll %r2, [[OLD]], 16([[SHIFT]])
 ; CHECK-MAIN: risbg %r4, %r2, 32, 47, 0
-; CHECK-MAIN: cr %r2, %r4
-; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: crjlh %r2, %r4, [[EXIT:\.[^ ]*]]
 ; CHECK-MAIN: risbg %r5, %r2, 32, 47, 0
 ; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -16({{%r[1-9]+}})
 ; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3)
-; CHECK-MAIN: j{{g?}}lh [[LOOP]]
+; CHECK-MAIN: jl [[LOOP]]
 ; CHECK-MAIN: [[EXIT]]:
 ; CHECK-MAIN-NOT: %r2
 ; CHECK-MAIN: br %r14
 ;
-; CHECK-SHIFT: f1:
+; CHECK-SHIFT-LABEL: f1:
 ; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3
 ; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
 ; CHECK-SHIFT: rll
@@ -40,13 +39,13 @@ define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) {
 ; Check compare and swap with constants.  We should force the constants into
 ; registers and use the sequence above.
 define i16 @f2(i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi [[CMP:%r[0-9]+]], 42
 ; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 47, 0
 ; CHECK: risbg
 ; CHECK: br %r14
 ;
-; CHECK-SHIFT: f2:
+; CHECK-SHIFT-LABEL: f2:
 ; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88
 ; CHECK-SHIFT: risbg
 ; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 47, 0
diff --git a/test/CodeGen/SystemZ/cmpxchg-03.ll b/test/CodeGen/SystemZ/cmpxchg-03.ll
index 45e224eda84c..3917979ac24c 100644
--- a/test/CodeGen/SystemZ/cmpxchg-03.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-03.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the CS range.
 define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %val = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst
@@ -13,7 +13,7 @@ define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) {
 
 ; Check the high end of the aligned CS range.
 define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cs %r2, %r3, 4092(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -23,7 +23,7 @@ define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) {
 
 ; Check the next word up, which should use CSY instead of CS.
 define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: csy %r2, %r3, 4096(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -33,7 +33,7 @@ define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) {
 
 ; Check the high end of the aligned CSY range.
 define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: csy %r2, %r3, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -44,7 +44,7 @@ define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r4, 524288
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) {
 
 ; Check the high end of the negative aligned CSY range.
 define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: csy %r2, %r3, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -65,7 +65,7 @@ define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) {
 
 ; Check the low end of the CSY range.
 define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: csy %r2, %r3, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -76,7 +76,7 @@ define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r4, -524292
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -87,7 +87,7 @@ define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) {
 
 ; Check that CS does not allow an index.
 define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agr %r4, %r5
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -99,7 +99,7 @@ define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
 
 ; Check that CSY does not allow an index.
 define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agr %r4, %r5
 ; CHECK: csy %r2, %r3, 4096(%r4)
 ; CHECK: br %r14
@@ -112,7 +112,7 @@ define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
 
 ; Check that a constant %cmp value is loaded into a register first.
 define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: lhi %r2, 1001
 ; CHECK: cs %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -122,7 +122,7 @@ define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) {
 
 ; Check that a constant %swap value is loaded into a register first.
 define i32 @f12(i32 %cmp, i32 *%ptr) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: lhi [[SWAP:%r[0-9]+]], 1002
 ; CHECK: cs %r2, [[SWAP]], 0(%r3)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/cmpxchg-04.ll b/test/CodeGen/SystemZ/cmpxchg-04.ll
index f8969ee08449..f58868f04f2d 100644
--- a/test/CodeGen/SystemZ/cmpxchg-04.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-04.ll
@@ -4,7 +4,7 @@
 
 ; Check CSG without a displacement.
 define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
   %val = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst
@@ -13,7 +13,7 @@ define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) {
 
 ; Check the high end of the aligned CSG range.
 define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: csg %r2, %r3, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -24,7 +24,7 @@ define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r4, 524288
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -35,7 +35,7 @@ define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) {
 
 ; Check the high end of the negative aligned CSG range.
 define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: csg %r2, %r3, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -45,7 +45,7 @@ define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) {
 
 ; Check the low end of the CSG range.
 define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: csg %r2, %r3, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -56,7 +56,7 @@ define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r4, -524296
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -67,7 +67,7 @@ define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) {
 
 ; Check that CSG does not allow an index.
 define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agr %r4, %r5
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -79,7 +79,7 @@ define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) {
 
 ; Check that a constant %cmp value is loaded into a register first.
 define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lghi %r2, 1001
 ; CHECK: csg %r2, %r3, 0(%r4)
 ; CHECK: br %r14
@@ -89,7 +89,7 @@ define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) {
 
 ; Check that a constant %swap value is loaded into a register first.
 define i64 @f9(i64 %cmp, i64 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lghi [[SWAP:%r[0-9]+]], 1002
 ; CHECK: csg %r2, [[SWAP]], 0(%r3)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/cond-load-01.ll b/test/CodeGen/SystemZ/cond-load-01.ll
new file mode 100644
index 000000000000..1030226798d1
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-load-01.ll
@@ -0,0 +1,130 @@
+; Test LOC.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @foo(i32 *)
+
+; Test the simple case.
+define i32 @f1(i32 %easy, i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK: clfi %r4, 42
+; CHECK: loche %r2, 0(%r3)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  ret i32 %res
+}
+
+; ...and again with the operands swapped.
+define i32 @f2(i32 %easy, i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK: clfi %r4, 42
+; CHECK: locl %r2, 0(%r3)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %other, i32 %easy
+  ret i32 %res
+}
+
+; Check the high end of the aligned LOC range.
+define i32 @f3(i32 %easy, i32 *%base, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK: clfi %r4, 42
+; CHECK: loche %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  ret i32 %res
+}
+
+; Check the next word up.  Other sequences besides this one would be OK.
+define i32 @f4(i32 %easy, i32 *%base, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: clfi %r4, 42
+; CHECK: loche %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  ret i32 %res
+}
+
+; Check the low end of the LOC range.
+define i32 @f5(i32 %easy, i32 *%base, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK: clfi %r4, 42
+; CHECK: loche %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  ret i32 %res
+}
+
+; Check the next word down, with the same comments as f4.
+define i32 @f6(i32 %easy, i32 *%base, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524292
+; CHECK: clfi %r4, 42
+; CHECK: loche %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  ret i32 %res
+}
+
+; Try a frame index base.
+define i32 @f7(i32 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: loche %r2, {{[0-9]+}}(%r15)
+; CHECK: br %r14
+  %ptr = alloca i32
+  %easy = call i32 @foo(i32 *%ptr)
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  ret i32 %res
+}
+
+; Try a case when an index is involved.
+define i32 @f8(i32 %easy, i32 %limit, i64 %base, i64 %index) {
+; CHECK-LABEL: f8:
+; CHECK: clfi %r3, 42
+; CHECK: loche %r2, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  ret i32 %res
+}
+
+; Test that conditionally-executed loads do not use LOC, since it is allowed
+; to trap even when the condition is false.
+define i32 @f9(i32 %easy, i32 %limit, i32 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: loc
+; CHECK: br %r14
+entry:
+  %cmp = icmp ule i32 %easy, %limit
+  br i1 %cmp, label %load, label %exit
+
+load:
+  %other = load i32 *%ptr
+  br label %exit
+
+exit:
+  %res = phi i32 [ %easy, %entry ], [ %other, %load ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/cond-load-02.ll b/test/CodeGen/SystemZ/cond-load-02.ll
new file mode 100644
index 000000000000..e97f4728bc0b
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-load-02.ll
@@ -0,0 +1,130 @@
+; Test LOCG.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i64 @foo(i64 *)
+
+; Test the simple case.
+define i64 @f1(i64 %easy, i64 *%ptr, i64 %limit) {
+; CHECK-LABEL: f1:
+; CHECK: clgfi %r4, 42
+; CHECK: locghe %r2, 0(%r3)
+; CHECK: br %r14
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %easy, i64 %other
+  ret i64 %res
+}
+
+; ...and again with the operands swapped.
+define i64 @f2(i64 %easy, i64 *%ptr, i64 %limit) {
+; CHECK-LABEL: f2:
+; CHECK: clgfi %r4, 42
+; CHECK: locgl %r2, 0(%r3)
+; CHECK: br %r14
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %other, i64 %easy
+  ret i64 %res
+}
+
+; Check the high end of the aligned LOCG range.
+define i64 @f3(i64 %easy, i64 *%base, i64 %limit) {
+; CHECK-LABEL: f3:
+; CHECK: clgfi %r4, 42
+; CHECK: locghe %r2, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65535
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %easy, i64 %other
+  ret i64 %res
+}
+
+; Check the next doubleword up.  Other sequences besides this one would be OK.
+define i64 @f4(i64 %easy, i64 *%base, i64 %limit) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: clgfi %r4, 42
+; CHECK: locghe %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65536
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %easy, i64 %other
+  ret i64 %res
+}
+
+; Check the low end of the LOCG range.
+define i64 @f5(i64 %easy, i64 *%base, i64 %limit) {
+; CHECK-LABEL: f5:
+; CHECK: clgfi %r4, 42
+; CHECK: locghe %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65536
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %easy, i64 %other
+  ret i64 %res
+}
+
+; Check the next doubleword down, with the same comments as f4.
+define i64 @f6(i64 %easy, i64 *%base, i64 %limit) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524296
+; CHECK: clgfi %r4, 42
+; CHECK: locghe %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65537
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %easy, i64 %other
+  ret i64 %res
+}
+
+; Try a frame index base.
+define i64 @f7(i64 %alt, i64 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: locghe %r2, {{[0-9]+}}(%r15)
+; CHECK: br %r14
+  %ptr = alloca i64
+  %easy = call i64 @foo(i64 *%ptr)
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %easy, i64 %other
+  ret i64 %res
+}
+
+; Try a case when an index is involved.
+define i64 @f8(i64 %easy, i64 %limit, i64 %base, i64 %index) {
+; CHECK-LABEL: f8:
+; CHECK: clgfi %r3, 42
+; CHECK: locghe %r2, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i64 *
+  %cond = icmp ult i64 %limit, 42
+  %other = load i64 *%ptr
+  %res = select i1 %cond, i64 %easy, i64 %other
+  ret i64 %res
+}
+
+; Test that conditionally-executed loads do not use LOCG, since it is allowed
+; to trap even when the condition is false.
+define i64 @f9(i64 %easy, i64 %limit, i64 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: locg
+; CHECK: br %r14
+entry:
+  %cmp = icmp ule i64 %easy, %limit
+  br i1 %cmp, label %load, label %exit
+
+load:
+  %other = load i64 *%ptr
+  br label %exit
+
+exit:
+  %res = phi i64 [ %easy, %entry ], [ %other, %load ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/cond-move-01.ll b/test/CodeGen/SystemZ/cond-move-01.ll
new file mode 100644
index 000000000000..088dee0232ea
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-move-01.ll
@@ -0,0 +1,48 @@
+; Test LOCR and LOCGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test LOCR.
+define i32 @f1(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK: clfi %r4, 42
+; CHECK: locrhe %r2, %r3
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 %a, i32 %b
+  ret i32 %res
+}
+
+; Test LOCGR.
+define i64 @f2(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f2:
+; CHECK: clgfi %r4, 42
+; CHECK: locgrhe %r2, %r3
+; CHECK: br %r14
+  %cond = icmp ult i64 %limit, 42
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Test LOCR in a case that could use COMPARE AND BRANCH.  We prefer using
+; LOCR if possible.
+define i32 @f3(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK: chi %r4, 42
+; CHECK: locrlh %r2, %r3
+; CHECK: br %r14
+  %cond = icmp eq i32 %limit, 42
+  %res = select i1 %cond, i32 %a, i32 %b
+  ret i32 %res
+}
+
+; ...and again for LOCGR.
+define i64 @f4(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f4:
+; CHECK: cghi %r4, 42
+; CHECK: locgrlh %r2, %r3
+; CHECK: br %r14
+  %cond = icmp eq i64 %limit, 42
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/cond-store-01.ll b/test/CodeGen/SystemZ/cond-store-01.ll
new file mode 100644
index 000000000000..d55ea2133e8f
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-01.ll
@@ -0,0 +1,398 @@
+; Test 8-bit conditional stores that are presented as selects.  The volatile
+; tests require z10, which use a branch instead of a LOCR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare void @foo(i8 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %alt, i8 %orig
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 32 bits, with the
+; loaded value first.
+define void @f3(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = sext i8 %orig to i32
+  %res = select i1 %cond, i32 %ext, i32 %alt
+  %trunc = trunc i32 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f4(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = sext i8 %orig to i32
+  %res = select i1 %cond, i32 %alt, i32 %ext
+  %trunc = trunc i32 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 32 bits, with the
+; loaded value first.
+define void @f5(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = zext i8 %orig to i32
+  %res = select i1 %cond, i32 %ext, i32 %alt
+  %trunc = trunc i32 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f6(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = zext i8 %orig to i32
+  %res = select i1 %cond, i32 %alt, i32 %ext
+  %trunc = trunc i32 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 64 bits, with the
+; loaded value first.
+define void @f7(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = sext i8 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f8(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = sext i8 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 64 bits, with the
+; loaded value first.
+define void @f9(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = zext i8 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f10(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %ext = zext i8 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the STC range.
+define void @f11(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 4095(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%base, i64 4095
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which should use STCY instead of STC.
+define void @f12(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f12:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r3, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%base, i64 4096
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the STCY range.
+define void @f13(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f13:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r3, 524287(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%base, i64 524287
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f14(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f14:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%base, i64 524288
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check the low end of the STCY range.
+define void @f15(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%base, i64 -524288
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f16(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f16:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524289
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%base, i64 -524289
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check that STCY allows an index.
+define void @f17(i64 %base, i64 %index, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f17:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r4, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i8 *
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f18(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f18:
+; CHECK: lb {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stc {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load volatile i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; ...likewise stores.  In this case we should have a conditional load into %r3.
+define void @f19(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK-LABEL: f19:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: lb %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store volatile i8 %res, i8 *%ptr
+  ret void
+}
+
+; Check that atomic loads are not matched.  The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f20(i8 *%ptr, i8 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CS.
+; CHECK-LABEL: f20:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: jl
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stc {{%r[0-9]+}},
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load atomic i8 *%ptr unordered, align 1
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; ...likewise stores.
+define void @f21(i8 *%ptr, i8 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CS.
+; CHECK-LABEL: f21:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: lb %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store atomic i8 %res, i8 *%ptr unordered, align 1
+  ret void
+}
+
+; Try a frame index base.
+define void @f22(i8 %alt, i32 %limit) {
+; CHECK-LABEL: f22:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: stc {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i8
+  call void @foo(i8 *%ptr)
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i8 *%ptr
+  %res = select i1 %cond, i8 %orig, i8 %alt
+  store i8 %res, i8 *%ptr
+  call void @foo(i8 *%ptr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/cond-store-02.ll b/test/CodeGen/SystemZ/cond-store-02.ll
new file mode 100644
index 000000000000..91bc4860b384
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-02.ll
@@ -0,0 +1,398 @@
+; Test 16-bit conditional stores that are presented as selects.  The volatile
+; tests require z10, which use a branch instead of a LOCR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare void @foo(i16 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %alt, i16 %orig
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 32 bits, with the
+; loaded value first.
+define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = sext i16 %orig to i32
+  %res = select i1 %cond, i32 %ext, i32 %alt
+  %trunc = trunc i32 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = sext i16 %orig to i32
+  %res = select i1 %cond, i32 %alt, i32 %ext
+  %trunc = trunc i32 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 32 bits, with the
+; loaded value first.
+define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = zext i16 %orig to i32
+  %res = select i1 %cond, i32 %ext, i32 %alt
+  %trunc = trunc i32 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = zext i16 %orig to i32
+  %res = select i1 %cond, i32 %alt, i32 %ext
+  %trunc = trunc i32 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 64 bits, with the
+; loaded value first.
+define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = sext i16 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = sext i16 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 64 bits, with the
+; loaded value first.
+define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = zext i16 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %ext = zext i16 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i16
+  store i16 %trunc, i16 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STH range.
+define void @f11(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 4094(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 2047
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check the next halfword up, which should use STHY instead of STH.
+define void @f12(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f12:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r3, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 2048
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STHY range.
+define void @f13(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f13:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r3, 524286(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 262143
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f14(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f14:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 262144
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check the low end of the STHY range.
+define void @f15(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 -262144
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f16(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f16:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524290
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 -262145
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check that STHY allows an index.
+define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f17:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r4, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i16 *
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f18:
+; CHECK: lh {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: sth {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load volatile i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; ...likewise stores.  In this case we should have a conditional load into %r3.
+define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK-LABEL: f19:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: lh %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: sth %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store volatile i16 %res, i16 *%ptr
+  ret void
+}
+
+; Check that atomic loads are not matched.  The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CS.
+; CHECK-LABEL: f20:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: jl
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: sth {{%r[0-9]+}},
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load atomic i16 *%ptr unordered, align 2
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  ret void
+}
+
+; ...likewise stores.
+define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CS.
+; CHECK-LABEL: f21:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: lh %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store atomic i16 %res, i16 *%ptr unordered, align 2
+  ret void
+}
+
+; Try a frame index base.
+define void @f22(i16 %alt, i32 %limit) {
+; CHECK-LABEL: f22:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i16
+  call void @foo(i16 *%ptr)
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i16 *%ptr
+  %res = select i1 %cond, i16 %orig, i16 %alt
+  store i16 %res, i16 *%ptr
+  call void @foo(i16 *%ptr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/cond-store-03.ll b/test/CodeGen/SystemZ/cond-store-03.ll
new file mode 100644
index 000000000000..d4fd48d61324
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-03.ll
@@ -0,0 +1,322 @@
+; Test 32-bit conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %alt, i32 %orig
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 64 bits, with the
+; loaded value first.
+define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %ext = sext i32 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %ext = sext i32 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 32 bits, with the
+; loaded value first.
+define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %ext = zext i32 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %ext = zext i32 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned ST range.
+define void @f7(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 4092(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1023
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the next word up, which should use STY instead of ST.
+define void @f8(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r3, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1024
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STY range.
+define void @f9(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r3, 524284(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f10(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the low end of the STY range.
+define void @f11(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f12(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f12:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524292
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check that STY allows an index.
+define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f13:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r4, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f14:
+; CHECK: l {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: st {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load volatile i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; ...likewise stores.  In this case we should have a conditional load into %r3.
+define void @f15(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f15:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: l %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: st %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store volatile i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check that atomic loads are not matched.  The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CS.
+; CHECK-LABEL: f16:
+; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: st {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load atomic i32 *%ptr unordered, align 4
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; ...likewise stores.
+define void @f17(i32 *%ptr, i32 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CS.
+; CHECK-LABEL: f17:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: l %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store atomic i32 %res, i32 *%ptr unordered, align 4
+  ret void
+}
+
+; Try a frame index base.
+define void @f18(i32 %alt, i32 %limit) {
+; CHECK-LABEL: f18:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i32
+  call void @foo(i32 *%ptr)
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  call void @foo(i32 *%ptr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/cond-store-04.ll b/test/CodeGen/SystemZ/cond-store-04.ll
new file mode 100644
index 000000000000..fc565c432fff
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-04.ll
@@ -0,0 +1,214 @@
+; Test 64-bit conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare void @foo(i64 *)
+
+; Test with the loaded value first.
+define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %alt, i64 %orig
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STG range.
+define void @f3(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, 524280(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65535
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65536
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the low end of the STG range.
+define void @f5(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65536
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524296
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65537
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check that STG allows an index.
+define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r4, 524287(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f8(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: lg {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stg {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load volatile i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; ...likewise stores.  In this case we should have a conditional load into %r3.
+define void @f9(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: lg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: stg %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store volatile i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check that atomic loads are not matched.  The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CSG.
+; CHECK-LABEL: f10:
+; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stg {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load atomic i64 *%ptr unordered, align 8
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; ...likewise stores.
+define void @f11(i64 *%ptr, i64 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CSG.
+; CHECK-LABEL: f11:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: lg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store atomic i64 %res, i64 *%ptr unordered, align 8
+  ret void
+}
+
+; Try a frame index base.
+define void @f12(i64 %alt, i32 %limit) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: stg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i64
+  call void @foo(i64 *%ptr)
+  %cond = icmp ult i32 %limit, 420
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  call void @foo(i64 *%ptr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/cond-store-05.ll b/test/CodeGen/SystemZ/cond-store-05.ll
new file mode 100644
index 000000000000..f8056f73c928
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-05.ll
@@ -0,0 +1,213 @@
+; Test f32 conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(float *)
+
+; Test with the loaded value first.
+define void @f1(float *%ptr, float %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(float *%ptr, float %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %alt, float %orig
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STE range.
+define void @f3(float *%base, float %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: ste %f0, 4092(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check the next word up, which should use STEY instead of STE.
+define void @f4(float *%base, float %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STEY range.
+define void @f5(float *%base, float %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, 524284(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 131071
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(float *%base, float %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 131072
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check the low end of the STEY range.
+define void @f7(float *%base, float %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -131072
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(float *%base, float %alt, i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524292
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -131073
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check that STEY allows an index.
+define void @f9(i64 %base, i64 %index, float %alt, i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to float *
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f10(float *%ptr, float %alt, i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK: le {{%f[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: ste {{%f[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load volatile float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  ret void
+}
+
+; ...likewise stores.  In this case we should have a conditional load into %f0.
+define void @f11(float *%ptr, float %alt, i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: le %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: ste %f0, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store volatile float %res, float *%ptr
+  ret void
+}
+
+; Try a frame index base.
+define void @f12(float %alt, i32 %limit) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: ste {{%f[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca float
+  call void @foo(float *%ptr)
+  %cond = icmp ult i32 %limit, 420
+  %orig = load float *%ptr
+  %res = select i1 %cond, float %orig, float %alt
+  store float %res, float *%ptr
+  call void @foo(float *%ptr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/cond-store-06.ll b/test/CodeGen/SystemZ/cond-store-06.ll
new file mode 100644
index 000000000000..66681958d474
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-06.ll
@@ -0,0 +1,213 @@
+; Test f64 conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(double *)
+
+; Test with the loaded value first.
+define void @f1(double *%ptr, double %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(double *%ptr, double %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %alt, double %orig
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STD range.
+define void @f3(double *%base, double %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: std %f0, 4088(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which should use STDY instead of STD.
+define void @f4(double *%base, double %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STDY range.
+define void @f5(double *%base, double %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, 524280(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 65535
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(double *%base, double %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 65536
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check the low end of the STDY range.
+define void @f7(double *%base, double %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -65536
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(double *%base, double %alt, i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524296
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -65537
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check that STDY allows an index.
+define void @f9(i64 %base, i64 %index, double %alt, i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, 524287(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to double *
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f10(double *%ptr, double %alt, i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK: ld {{%f[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: std {{%f[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load volatile double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  ret void
+}
+
+; ...likewise stores.  In this case we should have a conditional load into %f0.
+define void @f11(double *%ptr, double %alt, i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK: jhe [[LABEL:[^ ]*]]
+; CHECK: ld %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: std %f0, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store volatile double %res, double *%ptr
+  ret void
+}
+
+; Try a frame index base.
+define void @f12(double %alt, i32 %limit) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: std {{%f[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca double
+  call void @foo(double *%ptr)
+  %cond = icmp ult i32 %limit, 420
+  %orig = load double *%ptr
+  %res = select i1 %cond, double %orig, double %alt
+  store double %res, double *%ptr
+  call void @foo(double *%ptr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/cond-store-07.ll b/test/CodeGen/SystemZ/cond-store-07.ll
new file mode 100644
index 000000000000..b1df525566a3
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-07.ll
@@ -0,0 +1,186 @@
+; Test STOCs that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK: clfi %r4, 42
+; CHECK: stoche %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK: clfi %r4, 42
+; CHECK: stocl %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %alt, i32 %orig
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 64 bits, with the
+; loaded value first.
+define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK: clfi %r4, 42
+; CHECK: stoche %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %ext = sext i32 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK: clfi %r4, 42
+; CHECK: stocl %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %ext = sext i32 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 32 bits, with the
+; loaded value first.
+define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK: clfi %r4, 42
+; CHECK: stoche %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %ext = zext i32 %orig to i64
+  %res = select i1 %cond, i64 %ext, i64 %alt
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK: clfi %r4, 42
+; CHECK: stocl %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %ext = zext i32 %orig to i64
+  %res = select i1 %cond, i64 %alt, i64 %ext
+  %trunc = trunc i64 %res to i32
+  store i32 %trunc, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STOC range.
+define void @f7(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: clfi %r4, 42
+; CHECK: stoche %r3, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the next word up.  Other sequences besides this one would be OK.
+define void @f8(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: agfi %r2, 524288
+; CHECK: clfi %r4, 42
+; CHECK: stoche %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the low end of the STOC range.
+define void @f9(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK: clfi %r4, 42
+; CHECK: stoche %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the next word down, with the same comments as f8.
+define void @f10(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK: agfi %r2, -524292
+; CHECK: clfi %r4, 42
+; CHECK: stoche %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Try a frame index base.
+define void @f11(i32 %alt, i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stoche {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i32
+  call void @foo(i32 *%ptr)
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  call void @foo(i32 *%ptr)
+  ret void
+}
+
+; Test that conditionally-executed stores do not use STOC, since STOC
+; is allowed to trap even when the condition is false.
+define void @f12(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f12:
+; CHECK-NOT: stoc
+; CHECK: br %r14
+entry:
+  %cmp = icmp ule i32 %a, %b
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/cond-store-08.ll b/test/CodeGen/SystemZ/cond-store-08.ll
new file mode 100644
index 000000000000..56dc7ee7777c
--- /dev/null
+++ b/test/CodeGen/SystemZ/cond-store-08.ll
@@ -0,0 +1,124 @@
+; Test STOCGs that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare void @foo(i64 *)
+
+; Test with the loaded value first.
+define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK: clfi %r4, 42
+; CHECK: stocghe %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK: clfi %r4, 42
+; CHECK: stocgl %r3, 0(%r2)
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %alt, i64 %orig
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STOCG range.
+define void @f3(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK: clfi %r4, 42
+; CHECK: stocghe %r3, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65535
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword up.  Other sequences besides this one would be OK.
+define void @f4(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r2, 524288
+; CHECK: clfi %r4, 42
+; CHECK: stocghe %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65536
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the low end of the STOCG range.
+define void @f5(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK: clfi %r4, 42
+; CHECK: stocghe %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65536
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword down, with the same comments as f4.
+define void @f6(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r2, -524296
+; CHECK: clfi %r4, 42
+; CHECK: stocghe %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65537
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  ret void
+}
+
+; Try a frame index base.
+define void @f7(i64 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stocghe {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i64
+  call void @foo(i64 *%ptr)
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i64 *%ptr
+  %res = select i1 %cond, i64 %orig, i64 %alt
+  store i64 %res, i64 *%ptr
+  call void @foo(i64 *%ptr)
+  ret void
+}
+
+; Test that conditionally-executed stores do not use STOC, since STOC
+; is allowed to trap even when the condition is false.
+define void @f8(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: stocg %r3, 0(%r4)
+; CHECK: br %r14
+entry:
+  %cmp = icmp ule i64 %a, %b
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i64 %b, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll
index 81b3fb273d14..0b4067da3d14 100644
--- a/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -5,7 +5,7 @@
 ; Test f32.
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lpebr %f0, %f0
 ; CHECK: br %r14
   %res = call float @llvm.fabs.f32(float %f)
@@ -15,7 +15,7 @@ define float @f1(float %f) {
 ; Test f64.
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lpdbr %f0, %f0
 ; CHECK: br %r14
   %res = call double @llvm.fabs.f64(double %f)
@@ -27,7 +27,7 @@ define double @f2(double %f) {
 ; processing so that using FPRs is unequivocally better.
 declare fp128 @llvm.fabs.f128(fp128 %f)
 define void @f3(fp128 *%ptr, fp128 *%ptr2) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lpxbr
 ; CHECK: dxbr
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll
index 513d49c7acf5..909c48a06377 100644
--- a/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -5,7 +5,7 @@
 ; Test f32.
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lnebr %f0, %f0
 ; CHECK: br %r14
   %abs = call float @llvm.fabs.f32(float %f)
@@ -16,7 +16,7 @@ define float @f1(float %f) {
 ; Test f64.
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lndbr %f0, %f0
 ; CHECK: br %r14
   %abs = call double @llvm.fabs.f64(double %f)
@@ -29,7 +29,7 @@ define double @f2(double %f) {
 ; extra processing so that using FPRs is unequivocally better.
 declare fp128 @llvm.fabs.f128(fp128 %f)
 define void @f3(fp128 *%ptr, fp128 *%ptr2) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lnxbr
 ; CHECK: dxbr
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-add-01.ll b/test/CodeGen/SystemZ/fp-add-01.ll
index 7ce0777b8870..28a212801a63 100644
--- a/test/CodeGen/SystemZ/fp-add-01.ll
+++ b/test/CodeGen/SystemZ/fp-add-01.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare float @foo()
+
 ; Check register addition.
 define float @f1(float %f1, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: aebr %f0, %f2
 ; CHECK: br %r14
   %res = fadd float %f1, %f2
@@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) {
 
 ; Check the low end of the AEB range.
 define float @f2(float %f1, float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aeb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load float *%ptr
@@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) {
 
 ; Check the high end of the aligned AEB range.
 define float @f3(float %f1, float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define float @f4(float %f1, float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: aeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define float @f5(float %f1, float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: aeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) {
 
 ; Check that AEB allows indices.
 define float @f6(float %f1, float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: aeb %f0, 400(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) {
   %res = fadd float %f1, %f2
   ret float %res
 }
+
+; Check that additions of spilled values can use AEB rather than AEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: aeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%ptr0, i64 2
+  %ptr2 = getelementptr float *%ptr0, i64 4
+  %ptr3 = getelementptr float *%ptr0, i64 6
+  %ptr4 = getelementptr float *%ptr0, i64 8
+  %ptr5 = getelementptr float *%ptr0, i64 10
+  %ptr6 = getelementptr float *%ptr0, i64 12
+  %ptr7 = getelementptr float *%ptr0, i64 14
+  %ptr8 = getelementptr float *%ptr0, i64 16
+  %ptr9 = getelementptr float *%ptr0, i64 18
+  %ptr10 = getelementptr float *%ptr0, i64 20
+
+  %val0 = load float *%ptr0
+  %val1 = load float *%ptr1
+  %val2 = load float *%ptr2
+  %val3 = load float *%ptr3
+  %val4 = load float *%ptr4
+  %val5 = load float *%ptr5
+  %val6 = load float *%ptr6
+  %val7 = load float *%ptr7
+  %val8 = load float *%ptr8
+  %val9 = load float *%ptr9
+  %val10 = load float *%ptr10
+
+  %ret = call float @foo()
+
+  %add0 = fadd float %ret, %val0
+  %add1 = fadd float %add0, %val1
+  %add2 = fadd float %add1, %val2
+  %add3 = fadd float %add2, %val3
+  %add4 = fadd float %add3, %val4
+  %add5 = fadd float %add4, %val5
+  %add6 = fadd float %add5, %val6
+  %add7 = fadd float %add6, %val7
+  %add8 = fadd float %add7, %val8
+  %add9 = fadd float %add8, %val9
+  %add10 = fadd float %add9, %val10
+
+  ret float %add10
+}
diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll
index 08eb90efbfaa..067c7474fb43 100644
--- a/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/test/CodeGen/SystemZ/fp-add-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare double @foo()
+
 ; Check register addition.
 define double @f1(double %f1, double %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: adbr %f0, %f2
 ; CHECK: br %r14
   %res = fadd double %f1, %f2
@@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) {
 
 ; Check the low end of the ADB range.
 define double @f2(double %f1, double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: adb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load double *%ptr
@@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) {
 
 ; Check the high end of the aligned ADB range.
 define double @f3(double %f1, double *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: adb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 511
@@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(double %f1, double *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: adb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define double @f5(double %f1, double *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: adb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) {
 
 ; Check that ADB allows indices.
 define double @f6(double %f1, double *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: adb %f0, 800(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) {
   %res = fadd double %f1, %f2
   ret double %res
 }
+
+; Check that additions of spilled values can use ADB rather than ADBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: adb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%ptr0, i64 2
+  %ptr2 = getelementptr double *%ptr0, i64 4
+  %ptr3 = getelementptr double *%ptr0, i64 6
+  %ptr4 = getelementptr double *%ptr0, i64 8
+  %ptr5 = getelementptr double *%ptr0, i64 10
+  %ptr6 = getelementptr double *%ptr0, i64 12
+  %ptr7 = getelementptr double *%ptr0, i64 14
+  %ptr8 = getelementptr double *%ptr0, i64 16
+  %ptr9 = getelementptr double *%ptr0, i64 18
+  %ptr10 = getelementptr double *%ptr0, i64 20
+
+  %val0 = load double *%ptr0
+  %val1 = load double *%ptr1
+  %val2 = load double *%ptr2
+  %val3 = load double *%ptr3
+  %val4 = load double *%ptr4
+  %val5 = load double *%ptr5
+  %val6 = load double *%ptr6
+  %val7 = load double *%ptr7
+  %val8 = load double *%ptr8
+  %val9 = load double *%ptr9
+  %val10 = load double *%ptr10
+
+  %ret = call double @foo()
+
+  %add0 = fadd double %ret, %val0
+  %add1 = fadd double %add0, %val1
+  %add2 = fadd double %add1, %val2
+  %add3 = fadd double %add2, %val3
+  %add4 = fadd double %add3, %val4
+  %add5 = fadd double %add4, %val5
+  %add6 = fadd double %add5, %val6
+  %add7 = fadd double %add6, %val7
+  %add8 = fadd double %add7, %val8
+  %add9 = fadd double %add8, %val9
+  %add10 = fadd double %add9, %val10
+
+  ret double %add10
+}
diff --git a/test/CodeGen/SystemZ/fp-add-03.ll b/test/CodeGen/SystemZ/fp-add-03.ll
index 13ffb023b6fb..cb4042eee472 100644
--- a/test/CodeGen/SystemZ/fp-add-03.ll
+++ b/test/CodeGen/SystemZ/fp-add-03.ll
@@ -4,7 +4,7 @@
 
 ; There is no memory form of 128-bit addition.
 define void @f1(fp128 *%ptr, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lxebr %f0, %f0
 ; CHECK: ld %f1, 0(%r2)
 ; CHECK: ld %f3, 8(%r2)
diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll
index b80a71595e87..d7c0cce9c2a5 100644
--- a/test/CodeGen/SystemZ/fp-cmp-01.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-01.ll
@@ -1,12 +1,15 @@
-; Test 32-bit floating-point comparison.
+; Test 32-bit floating-point comparison.  The tests assume a z10 implementation
+; of select, using conditional branches rather than LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare float @foo()
 
 ; Check comparison with registers.
 define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cebr %f0, %f2
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %cond = fcmp oeq float %f1, %f2
@@ -16,9 +19,9 @@ define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) {
 
 ; Check the low end of the CEB range.
 define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ceb %f0, 0(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %f2 = load float *%ptr
@@ -29,9 +32,9 @@ define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) {
 
 ; Check the high end of the aligned CEB range.
 define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ceb %f0, 4092(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -44,10 +47,10 @@ define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r4, 4096
 ; CHECK: ceb %f0, 0(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1024
@@ -59,10 +62,10 @@ define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r4, -4
 ; CHECK: ceb %f0, 0(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 -1
@@ -74,10 +77,10 @@ define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) {
 
 ; Check that CEB allows indices.
 define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r5, 2
 ; CHECK: ceb %f0, 400(%r1,%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr1 = getelementptr float *%base, i64 %index
@@ -87,3 +90,230 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) {
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
 }
+
+; Check that comparisons of spilled values can use CEB rather than CEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%ptr0, i64 2
+  %ptr2 = getelementptr float *%ptr0, i64 4
+  %ptr3 = getelementptr float *%ptr0, i64 6
+  %ptr4 = getelementptr float *%ptr0, i64 8
+  %ptr5 = getelementptr float *%ptr0, i64 10
+  %ptr6 = getelementptr float *%ptr0, i64 12
+  %ptr7 = getelementptr float *%ptr0, i64 14
+  %ptr8 = getelementptr float *%ptr0, i64 16
+  %ptr9 = getelementptr float *%ptr0, i64 18
+  %ptr10 = getelementptr float *%ptr0, i64 20
+
+  %val0 = load float *%ptr0
+  %val1 = load float *%ptr1
+  %val2 = load float *%ptr2
+  %val3 = load float *%ptr3
+  %val4 = load float *%ptr4
+  %val5 = load float *%ptr5
+  %val6 = load float *%ptr6
+  %val7 = load float *%ptr7
+  %val8 = load float *%ptr8
+  %val9 = load float *%ptr9
+  %val10 = load float *%ptr10
+
+  %ret = call float @foo()
+
+  %cmp0 = fcmp olt float %ret, %val0
+  %cmp1 = fcmp olt float %ret, %val1
+  %cmp2 = fcmp olt float %ret, %val2
+  %cmp3 = fcmp olt float %ret, %val3
+  %cmp4 = fcmp olt float %ret, %val4
+  %cmp5 = fcmp olt float %ret, %val5
+  %cmp6 = fcmp olt float %ret, %val6
+  %cmp7 = fcmp olt float %ret, %val7
+  %cmp8 = fcmp olt float %ret, %val8
+  %cmp9 = fcmp olt float %ret, %val9
+  %cmp10 = fcmp olt float %ret, %val10
+
+  %sel0 = select i1 %cmp0, float %ret, float 0.0
+  %sel1 = select i1 %cmp1, float %sel0, float 1.0
+  %sel2 = select i1 %cmp2, float %sel1, float 2.0
+  %sel3 = select i1 %cmp3, float %sel2, float 3.0
+  %sel4 = select i1 %cmp4, float %sel3, float 4.0
+  %sel5 = select i1 %cmp5, float %sel4, float 5.0
+  %sel6 = select i1 %cmp6, float %sel5, float 6.0
+  %sel7 = select i1 %cmp7, float %sel6, float 7.0
+  %sel8 = select i1 %cmp8, float %sel7, float 8.0
+  %sel9 = select i1 %cmp9, float %sel8, float 9.0
+  %sel10 = select i1 %cmp10, float %sel9, float 10.0
+
+  ret float %sel10
+}
+
+; Check comparison with zero.
+define i64 @f8(i64 %a, i64 %b, float %f) {
+; CHECK-LABEL: f8:
+; CHECK: ltebr %f0, %f0
+; CHECK-NEXT: je
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %cond = fcmp oeq float %f, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CEB to be used,
+; first with oeq.
+define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: je {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then one.
+define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jlh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp one float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then olt.
+define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp olt float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ole.
+define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jhe {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ole float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then oge.
+define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jle {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp oge float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ogt.
+define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jl {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ogt float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ueq.
+define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnlh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ueq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then une.
+define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jne {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp une float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ult.
+define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnle {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ult float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ule.
+define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnl {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ule float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then uge.
+define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f19:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnh {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp uge float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; ...then ugt.
+define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) {
+; CHECK-LABEL: f20:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: jnhe {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load float *%ptr
+  %cond = fcmp ugt float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll
index 8227308ce810..c61f04ed244e 100644
--- a/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -1,12 +1,15 @@
-; Test 64-bit floating-point comparison.
+; Test 64-bit floating-point comparison.  The tests assume a z10 implementation
+; of select, using conditional branches rather than LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare double @foo()
 
 ; Check comparison with registers.
 define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cdbr %f0, %f2
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %cond = fcmp oeq double %f1, %f2
@@ -16,9 +19,9 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
 
 ; Check the low end of the CDB range.
 define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %f2 = load double *%ptr
@@ -29,9 +32,9 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
 
 ; Check the high end of the aligned CDB range.
 define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cdb %f0, 4088(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 511
@@ -44,10 +47,10 @@ define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r4, 4096
 ; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 512
@@ -59,10 +62,10 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r4, -8
 ; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 -1
@@ -74,10 +77,10 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) {
 
 ; Check that CDB allows indices.
 define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r5, 3
 ; CHECK: cdb %f0, 800(%r1,%r4)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr1 = getelementptr double *%base, i64 %index
@@ -87,3 +90,86 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
 }
+
+; Check that comparisons of spilled values can use CDB rather than CDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: cdb {{%f[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%ptr0, i64 2
+  %ptr2 = getelementptr double *%ptr0, i64 4
+  %ptr3 = getelementptr double *%ptr0, i64 6
+  %ptr4 = getelementptr double *%ptr0, i64 8
+  %ptr5 = getelementptr double *%ptr0, i64 10
+  %ptr6 = getelementptr double *%ptr0, i64 12
+  %ptr7 = getelementptr double *%ptr0, i64 14
+  %ptr8 = getelementptr double *%ptr0, i64 16
+  %ptr9 = getelementptr double *%ptr0, i64 18
+  %ptr10 = getelementptr double *%ptr0, i64 20
+
+  %val0 = load double *%ptr0
+  %val1 = load double *%ptr1
+  %val2 = load double *%ptr2
+  %val3 = load double *%ptr3
+  %val4 = load double *%ptr4
+  %val5 = load double *%ptr5
+  %val6 = load double *%ptr6
+  %val7 = load double *%ptr7
+  %val8 = load double *%ptr8
+  %val9 = load double *%ptr9
+  %val10 = load double *%ptr10
+
+  %ret = call double @foo()
+
+  %cmp0 = fcmp olt double %ret, %val0
+  %cmp1 = fcmp olt double %ret, %val1
+  %cmp2 = fcmp olt double %ret, %val2
+  %cmp3 = fcmp olt double %ret, %val3
+  %cmp4 = fcmp olt double %ret, %val4
+  %cmp5 = fcmp olt double %ret, %val5
+  %cmp6 = fcmp olt double %ret, %val6
+  %cmp7 = fcmp olt double %ret, %val7
+  %cmp8 = fcmp olt double %ret, %val8
+  %cmp9 = fcmp olt double %ret, %val9
+  %cmp10 = fcmp olt double %ret, %val10
+
+  %sel0 = select i1 %cmp0, double %ret, double 0.0
+  %sel1 = select i1 %cmp1, double %sel0, double 1.0
+  %sel2 = select i1 %cmp2, double %sel1, double 2.0
+  %sel3 = select i1 %cmp3, double %sel2, double 3.0
+  %sel4 = select i1 %cmp4, double %sel3, double 4.0
+  %sel5 = select i1 %cmp5, double %sel4, double 5.0
+  %sel6 = select i1 %cmp6, double %sel5, double 6.0
+  %sel7 = select i1 %cmp7, double %sel6, double 7.0
+  %sel8 = select i1 %cmp8, double %sel7, double 8.0
+  %sel9 = select i1 %cmp9, double %sel8, double 9.0
+  %sel10 = select i1 %cmp10, double %sel9, double 10.0
+
+  ret double %sel10
+}
+
+; Check comparison with zero.
+define i64 @f8(i64 %a, i64 %b, double %f) {
+; CHECK-LABEL: f8:
+; CHECK: ltdbr %f0, %f0
+; CHECK-NEXT: je
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %cond = fcmp oeq double %f, 0.0
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CDB to be used,
+define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-NEXT: jl {{\.L.*}}
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f1 = load double *%ptr
+  %cond = fcmp ogt double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-03.ll b/test/CodeGen/SystemZ/fp-cmp-03.ll
index fd12c93e27a5..e777d00c9687 100644
--- a/test/CodeGen/SystemZ/fp-cmp-03.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-03.ll
@@ -1,15 +1,16 @@
-; Test 128-bit floating-point comparison.
+; Test 128-bit floating-point comparison.  The tests assume a z10 implementation
+; of select, using conditional branches rather than LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; There is no memory form of 128-bit comparison.
 define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lxebr %f0, %f0
 ; CHECK: ld %f1, 0(%r4)
 ; CHECK: ld %f3, 8(%r4)
 ; CHECK: cxbr %f1, %f0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %f2x = fpext float %f2 to fp128
@@ -18,3 +19,18 @@ define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) {
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
 }
+
+; Check comparison with zero.
+define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: ld %f0, 0(%r4)
+; CHECK: ld %f2, 8(%r4)
+; CHECK: ltxbr %f0, %f0
+; CHECK-NEXT: je
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f = load fp128 *%ptr
+  %cond = fcmp oeq fp128 %f, 0xL00000000000000000000000000000000
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-04.ll b/test/CodeGen/SystemZ/fp-cmp-04.ll
new file mode 100644
index 000000000000..8d842164fa4f
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -0,0 +1,348 @@
+; Test that floating-point compares are ommitted if CC already has the
+; right value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare float @llvm.fabs.f32(float %f)
+
+; Test addition followed by EQ, which can use the CC result of the addition.
+define float @f1(float %a, float %b, float *%dest) {
+; CHECK-LABEL: f1:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = fadd float %a, %b
+  %cmp = fcmp oeq float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %b, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; ...and again with LT.
+define float @f2(float %a, float %b, float *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = fadd float %a, %b
+  %cmp = fcmp olt float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %b, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; ...and again with GT.
+define float @f3(float %a, float %b, float *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: jh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = fadd float %a, %b
+  %cmp = fcmp ogt float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %b, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; ...and again with UEQ.
+define float @f4(float %a, float %b, float *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: jnlh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = fadd float %a, %b
+  %cmp = fcmp ueq float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %b, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; Subtraction also provides a zero-based CC value.
+define float @f5(float %a, float %b, float *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: seb %f0, 0(%r2)
+; CHECK-NEXT: jnhe .L{{.*}}
+; CHECK: br %r14
+entry:
+  %cur = load float *%dest
+  %res = fsub float %a, %cur
+  %cmp = fcmp ult float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %b, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; Test the result of LOAD POSITIVE.
+define float @f6(float %dummy, float %a, float *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: lpebr %f0, %f2
+; CHECK-NEXT: jh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = call float @llvm.fabs.f32(float %a)
+  %cmp = fcmp ogt float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %res, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; Test the result of LOAD NEGATIVE.
+define float @f7(float %dummy, float %a, float *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: lnebr %f0, %f2
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %abs = call float @llvm.fabs.f32(float %a)
+  %res = fsub float -0.0, %abs
+  %cmp = fcmp olt float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %res, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; Test the result of LOAD COMPLEMENT.
+define float @f8(float %dummy, float %a, float *%dest) {
+; CHECK-LABEL: f8:
+; CHECK: lcebr %f0, %f2
+; CHECK-NEXT: jle .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = fsub float -0.0, %a
+  %cmp = fcmp ole float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %res, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; Multiplication (for example) does not modify CC.
+define float @f9(float %a, float %b, float *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: meebr %f0, %f2
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: jlh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = fmul float %a, %b
+  %cmp = fcmp one float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %b, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; Test a combination involving a CC-setting instruction followed by
+; a non-CC-setting instruction.
+define float @f10(float %a, float %b, float %c, float *%dest) {
+; CHECK-LABEL: f10:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: debr %f0, %f4
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = fadd float %a, %b
+  %res = fdiv float %add, %c
+  %cmp = fcmp une float %res, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %b, float *%dest
+  br label %exit
+
+exit:
+  ret float %res
+}
+
+; Test a case where CC is set based on a different register from the
+; compare input.
+define float @f11(float %a, float %b, float %c, float *%dest1, float *%dest2) {
+; CHECK-LABEL: f11:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: sebr %f4, %f0
+; CHECK-NEXT: ste %f4, 0(%r2)
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = fadd float %a, %b
+  %sub = fsub float %c, %add
+  store float %sub, float *%dest1
+  %cmp = fcmp oeq float %add, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %sub, float *%dest2
+  br label %exit
+
+exit:
+  ret float %add
+}
+
+; Test that LER gets converted to LTEBR where useful.
+define float @f12(float %dummy, float %val, float *%dest) {
+; CHECK-LABEL: f12:
+; CHECK: ltebr %f0, %f2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f0
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{f0}"(float %val)
+  %cmp = fcmp olt float %val, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %val, float *%dest
+  br label %exit
+
+exit:
+  ret float %val
+}
+
+; Test that LDR gets converted to LTDBR where useful.
+define double @f13(double %dummy, double %val, double *%dest) {
+; CHECK-LABEL: f13:
+; CHECK: ltdbr %f0, %f2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f0
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{f0}"(double %val)
+  %cmp = fcmp olt double %val, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store double %val, double *%dest
+  br label %exit
+
+exit:
+  ret double %val
+}
+
+; Test that LXR gets converted to LTXBR where useful.
+define void @f14(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f14:
+; CHECK: ltxbr
+; CHECK-NEXT: dxbr
+; CHECK-NEXT: std
+; CHECK-NEXT: std
+; CHECK-NEXT: mxbr
+; CHECK-NEXT: std
+; CHECK-NEXT: std
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %val1 = load fp128 *%ptr1
+  %val2 = load fp128 *%ptr2
+  %div = fdiv fp128 %val1, %val2
+  store fp128 %div, fp128 *%ptr1
+  %mul = fmul fp128 %val1, %val2
+  store fp128 %mul, fp128 *%ptr2
+  %cmp = fcmp olt fp128 %val1, 0xL00000000000000000000000000000000
+  br i1 %cmp, label %exit, label %store
+
+store:
+  call void asm sideeffect "blah", ""()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test a case where it is the source rather than destination of LER that
+; we need.
+define float @f15(float %val, float %dummy, float *%dest) {
+; CHECK-LABEL: f15:
+; CHECK: ltebr %f2, %f0
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{f2}"(float %val)
+  %cmp = fcmp olt float %val, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store float %val, float *%dest
+  br label %exit
+
+exit:
+  ret float %val
+}
+
+; Test a case where it is the source rather than destination of LDR that
+; we need.
+define double @f16(double %val, double %dummy, double *%dest) {
+; CHECK-LABEL: f16:
+; CHECK: ltdbr %f2, %f0
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{f2}"(double %val)
+  %cmp = fcmp olt double %val, 0.0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store double %val, double *%dest
+  br label %exit
+
+exit:
+  ret double %val
+}
diff --git a/test/CodeGen/SystemZ/fp-const-01.ll b/test/CodeGen/SystemZ/fp-const-01.ll
index 65209d661e97..3a4ddf087803 100644
--- a/test/CodeGen/SystemZ/fp-const-01.ll
+++ b/test/CodeGen/SystemZ/fp-const-01.ll
@@ -4,7 +4,7 @@
 
 ; Test f32.
 define float @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lzer %f0
 ; CHECK: br %r14
   ret float 0.0
@@ -12,7 +12,7 @@ define float @f1() {
 
 ; Test f64.
 define double @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lzdr %f0
 ; CHECK: br %r14
   ret double 0.0
@@ -20,7 +20,7 @@ define double @f2() {
 
 ; Test f128.
 define void @f3(fp128 *%x) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lzxr %f0
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll
index 2dedf54e6f7d..96f857895ecf 100644
--- a/test/CodeGen/SystemZ/fp-const-02.ll
+++ b/test/CodeGen/SystemZ/fp-const-02.ll
@@ -4,7 +4,7 @@
 
 ; Test f32.
 define float @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lzer [[REGISTER:%f[0-5]+]]
 ; CHECK: lcebr %f0, [[REGISTER]]
 ; CHECK: br %r14
@@ -13,7 +13,7 @@ define float @f1() {
 
 ; Test f64.
 define double @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lzdr [[REGISTER:%f[0-5]+]]
 ; CHECK: lcdbr %f0, [[REGISTER]]
 ; CHECK: br %r14
@@ -22,7 +22,7 @@ define double @f2() {
 
 ; Test f128.
 define void @f3(fp128 *%x) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lzxr [[REGISTER:%f[0-5]+]]
 ; CHECK: lcxbr %f0, [[REGISTER]]
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-const-03.ll b/test/CodeGen/SystemZ/fp-const-03.ll
index 4c287e4c08a3..b2ae94db0b7e 100644
--- a/test/CodeGen/SystemZ/fp-const-03.ll
+++ b/test/CodeGen/SystemZ/fp-const-03.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
 
 define float @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}}
 ; CHECK: le %f0, 0([[REGISTER]])
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-const-04.ll b/test/CodeGen/SystemZ/fp-const-04.ll
index 847c380e3b98..d5526884dc69 100644
--- a/test/CodeGen/SystemZ/fp-const-04.ll
+++ b/test/CodeGen/SystemZ/fp-const-04.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
 
 define double @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}}
 ; CHECK: ldeb %f0, 0([[REGISTER]])
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-const-05.ll b/test/CodeGen/SystemZ/fp-const-05.ll
index 48f84ce5bee8..d81e3db91f48 100644
--- a/test/CodeGen/SystemZ/fp-const-05.ll
+++ b/test/CodeGen/SystemZ/fp-const-05.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
 
 define void @f1(fp128 *%x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
 ; CHECK: lxeb %f0, 0([[REGISTER]])
 ; CHECK: std %f0, 0(%r2)
diff --git a/test/CodeGen/SystemZ/fp-const-06.ll b/test/CodeGen/SystemZ/fp-const-06.ll
index 1da3d5eafaae..088810ba8e40 100644
--- a/test/CodeGen/SystemZ/fp-const-06.ll
+++ b/test/CodeGen/SystemZ/fp-const-06.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
 
 define double @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
 ; CHECK: ld %f0, 0([[REGISTER]])
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-const-07.ll b/test/CodeGen/SystemZ/fp-const-07.ll
index 5a108452a8e0..87e8f68b372f 100644
--- a/test/CodeGen/SystemZ/fp-const-07.ll
+++ b/test/CodeGen/SystemZ/fp-const-07.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
 
 define void @f1(fp128 *%x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
 ; CHECK: lxdb %f0, 0([[REGISTER]])
 ; CHECK: std %f0, 0(%r2)
diff --git a/test/CodeGen/SystemZ/fp-const-08.ll b/test/CodeGen/SystemZ/fp-const-08.ll
index 6a8a1ab3f9b7..8845adbebc56 100644
--- a/test/CodeGen/SystemZ/fp-const-08.ll
+++ b/test/CodeGen/SystemZ/fp-const-08.ll
@@ -6,7 +6,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
 
 define void @f1(fp128 *%x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
 ; CHECK: ld %f0, 0([[REGISTER]])
 ; CHECK: ld %f2, 8([[REGISTER]])
diff --git a/test/CodeGen/SystemZ/fp-const-09.ll b/test/CodeGen/SystemZ/fp-const-09.ll
index 435dcbacc19d..0c7d726e9d09 100644
--- a/test/CodeGen/SystemZ/fp-const-09.ll
+++ b/test/CodeGen/SystemZ/fp-const-09.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
 
 define void @f1(fp128 *%x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
 ; CHECK: ld %f0, 0([[REGISTER]])
 ; CHECK: ld %f2, 8([[REGISTER]])
diff --git a/test/CodeGen/SystemZ/fp-conv-01.ll b/test/CodeGen/SystemZ/fp-conv-01.ll
index 6c8ef4899776..49ed43bce51c 100644
--- a/test/CodeGen/SystemZ/fp-conv-01.ll
+++ b/test/CodeGen/SystemZ/fp-conv-01.ll
@@ -4,7 +4,7 @@
 
 ; Test f64->f32.
 define float @f1(double %d1, double %d2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ledbr %f0, %f2
 ; CHECK: br %r14
   %res = fptrunc double %d2 to float
@@ -13,7 +13,7 @@ define float @f1(double %d1, double %d2) {
 
 ; Test f128->f32.
 define float @f2(fp128 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lexbr %f0, %f0
 ; CHECK: br %r14
   %val = load fp128 *%ptr
@@ -24,7 +24,7 @@ define float @f2(fp128 *%ptr) {
 ; Make sure that we don't use %f0 as the destination of LEXBR when %f2
 ; is still live.
 define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lexbr %f1, %f1
 ; CHECK: aebr %f1, %f2
 ; CHECK: ste %f1, 0(%r2)
@@ -38,7 +38,7 @@ define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) {
 
 ; Test f128->f64.
 define double @f4(fp128 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldxbr %f0, %f0
 ; CHECK: br %r14
   %val = load fp128 *%ptr
@@ -48,7 +48,7 @@ define double @f4(fp128 *%ptr) {
 
 ; Like f3, but for f128->f64.
 define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldxbr %f1, %f1
 ; CHECK: adbr %f1, %f2
 ; CHECK: std %f1, 0(%r2)
diff --git a/test/CodeGen/SystemZ/fp-conv-02.ll b/test/CodeGen/SystemZ/fp-conv-02.ll
index f284e1dc2ae4..93fb7c8d4d92 100644
--- a/test/CodeGen/SystemZ/fp-conv-02.ll
+++ b/test/CodeGen/SystemZ/fp-conv-02.ll
@@ -4,7 +4,7 @@
 
 ; Check register extension.
 define double @f1(float %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldebr %f0, %f0
 ; CHECK: br %r14
   %res = fpext float %val to double
@@ -13,7 +13,7 @@ define double @f1(float %val) {
 
 ; Check the low end of the LDEB range.
 define double @f2(float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldeb %f0, 0(%r2)
 ; CHECK: br %r14
   %val = load float *%ptr
@@ -23,7 +23,7 @@ define double @f2(float *%ptr) {
 
 ; Check the high end of the aligned LDEB range.
 define double @f3(float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ldeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -35,7 +35,7 @@ define double @f3(float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: ldeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +47,7 @@ define double @f4(float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define double @f5(float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: ldeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +59,7 @@ define double @f5(float *%base) {
 
 ; Check that LDEB allows indices.
 define double @f6(float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: ldeb %f0, 400(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +69,84 @@ define double @f6(float *%base, i64 %index) {
   %res = fpext float %val to double
   ret double %res
 }
+
+; Test a case where we spill the source of at least one LDEBR.  We want
+; to use LDEB if possible.
+define void @f7(double *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f7:
+; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile float *%ptr2
+  %val1 = load volatile float *%ptr2
+  %val2 = load volatile float *%ptr2
+  %val3 = load volatile float *%ptr2
+  %val4 = load volatile float *%ptr2
+  %val5 = load volatile float *%ptr2
+  %val6 = load volatile float *%ptr2
+  %val7 = load volatile float *%ptr2
+  %val8 = load volatile float *%ptr2
+  %val9 = load volatile float *%ptr2
+  %val10 = load volatile float *%ptr2
+  %val11 = load volatile float *%ptr2
+  %val12 = load volatile float *%ptr2
+  %val13 = load volatile float *%ptr2
+  %val14 = load volatile float *%ptr2
+  %val15 = load volatile float *%ptr2
+  %val16 = load volatile float *%ptr2
+
+  %ext0 = fpext float %val0 to double
+  %ext1 = fpext float %val1 to double
+  %ext2 = fpext float %val2 to double
+  %ext3 = fpext float %val3 to double
+  %ext4 = fpext float %val4 to double
+  %ext5 = fpext float %val5 to double
+  %ext6 = fpext float %val6 to double
+  %ext7 = fpext float %val7 to double
+  %ext8 = fpext float %val8 to double
+  %ext9 = fpext float %val9 to double
+  %ext10 = fpext float %val10 to double
+  %ext11 = fpext float %val11 to double
+  %ext12 = fpext float %val12 to double
+  %ext13 = fpext float %val13 to double
+  %ext14 = fpext float %val14 to double
+  %ext15 = fpext float %val15 to double
+  %ext16 = fpext float %val16 to double
+
+  store volatile float %val0, float *%ptr2
+  store volatile float %val1, float *%ptr2
+  store volatile float %val2, float *%ptr2
+  store volatile float %val3, float *%ptr2
+  store volatile float %val4, float *%ptr2
+  store volatile float %val5, float *%ptr2
+  store volatile float %val6, float *%ptr2
+  store volatile float %val7, float *%ptr2
+  store volatile float %val8, float *%ptr2
+  store volatile float %val9, float *%ptr2
+  store volatile float %val10, float *%ptr2
+  store volatile float %val11, float *%ptr2
+  store volatile float %val12, float *%ptr2
+  store volatile float %val13, float *%ptr2
+  store volatile float %val14, float *%ptr2
+  store volatile float %val15, float *%ptr2
+  store volatile float %val16, float *%ptr2
+
+  store volatile double %ext0, double *%ptr1
+  store volatile double %ext1, double *%ptr1
+  store volatile double %ext2, double *%ptr1
+  store volatile double %ext3, double *%ptr1
+  store volatile double %ext4, double *%ptr1
+  store volatile double %ext5, double *%ptr1
+  store volatile double %ext6, double *%ptr1
+  store volatile double %ext7, double *%ptr1
+  store volatile double %ext8, double *%ptr1
+  store volatile double %ext9, double *%ptr1
+  store volatile double %ext10, double *%ptr1
+  store volatile double %ext11, double *%ptr1
+  store volatile double %ext12, double *%ptr1
+  store volatile double %ext13, double *%ptr1
+  store volatile double %ext14, double *%ptr1
+  store volatile double %ext15, double *%ptr1
+  store volatile double %ext16, double *%ptr1
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-03.ll b/test/CodeGen/SystemZ/fp-conv-03.ll
index 703a141e3e12..d42ce6650aaf 100644
--- a/test/CodeGen/SystemZ/fp-conv-03.ll
+++ b/test/CodeGen/SystemZ/fp-conv-03.ll
@@ -4,7 +4,7 @@
 
 ; Check register extension.
 define void @f1(fp128 *%dst, float %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lxebr %f0, %f0
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -16,7 +16,7 @@ define void @f1(fp128 *%dst, float %val) {
 
 ; Check the low end of the LXEB range.
 define void @f2(fp128 *%dst, float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lxeb %f0, 0(%r3)
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -29,7 +29,7 @@ define void @f2(fp128 *%dst, float *%ptr) {
 
 ; Check the high end of the aligned LXEB range.
 define void @f3(fp128 *%dst, float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lxeb %f0, 4092(%r3)
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -44,7 +44,7 @@ define void @f3(fp128 *%dst, float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f4(fp128 *%dst, float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r3, 4096
 ; CHECK: lxeb %f0, 0(%r3)
 ; CHECK: std %f0, 0(%r2)
@@ -59,7 +59,7 @@ define void @f4(fp128 *%dst, float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define void @f5(fp128 *%dst, float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r3, -4
 ; CHECK: lxeb %f0, 0(%r3)
 ; CHECK: std %f0, 0(%r2)
@@ -74,7 +74,7 @@ define void @f5(fp128 *%dst, float *%base) {
 
 ; Check that LXEB allows indices.
 define void @f6(fp128 *%dst, float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r4, 2
 ; CHECK: lxeb %f0, 400(%r1,%r3)
 ; CHECK: std %f0, 0(%r2)
@@ -87,3 +87,84 @@ define void @f6(fp128 *%dst, float *%base, i64 %index) {
   store fp128 %res, fp128 *%dst
   ret void
 }
+
+; Test a case where we spill the source of at least one LXEBR.  We want
+; to use LXEB if possible.
+define void @f7(fp128 *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f7:
+; CHECK: lxeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile float *%ptr2
+  %val1 = load volatile float *%ptr2
+  %val2 = load volatile float *%ptr2
+  %val3 = load volatile float *%ptr2
+  %val4 = load volatile float *%ptr2
+  %val5 = load volatile float *%ptr2
+  %val6 = load volatile float *%ptr2
+  %val7 = load volatile float *%ptr2
+  %val8 = load volatile float *%ptr2
+  %val9 = load volatile float *%ptr2
+  %val10 = load volatile float *%ptr2
+  %val11 = load volatile float *%ptr2
+  %val12 = load volatile float *%ptr2
+  %val13 = load volatile float *%ptr2
+  %val14 = load volatile float *%ptr2
+  %val15 = load volatile float *%ptr2
+  %val16 = load volatile float *%ptr2
+
+  %ext0 = fpext float %val0 to fp128
+  %ext1 = fpext float %val1 to fp128
+  %ext2 = fpext float %val2 to fp128
+  %ext3 = fpext float %val3 to fp128
+  %ext4 = fpext float %val4 to fp128
+  %ext5 = fpext float %val5 to fp128
+  %ext6 = fpext float %val6 to fp128
+  %ext7 = fpext float %val7 to fp128
+  %ext8 = fpext float %val8 to fp128
+  %ext9 = fpext float %val9 to fp128
+  %ext10 = fpext float %val10 to fp128
+  %ext11 = fpext float %val11 to fp128
+  %ext12 = fpext float %val12 to fp128
+  %ext13 = fpext float %val13 to fp128
+  %ext14 = fpext float %val14 to fp128
+  %ext15 = fpext float %val15 to fp128
+  %ext16 = fpext float %val16 to fp128
+
+  store volatile float %val0, float *%ptr2
+  store volatile float %val1, float *%ptr2
+  store volatile float %val2, float *%ptr2
+  store volatile float %val3, float *%ptr2
+  store volatile float %val4, float *%ptr2
+  store volatile float %val5, float *%ptr2
+  store volatile float %val6, float *%ptr2
+  store volatile float %val7, float *%ptr2
+  store volatile float %val8, float *%ptr2
+  store volatile float %val9, float *%ptr2
+  store volatile float %val10, float *%ptr2
+  store volatile float %val11, float *%ptr2
+  store volatile float %val12, float *%ptr2
+  store volatile float %val13, float *%ptr2
+  store volatile float %val14, float *%ptr2
+  store volatile float %val15, float *%ptr2
+  store volatile float %val16, float *%ptr2
+
+  store volatile fp128 %ext0, fp128 *%ptr1
+  store volatile fp128 %ext1, fp128 *%ptr1
+  store volatile fp128 %ext2, fp128 *%ptr1
+  store volatile fp128 %ext3, fp128 *%ptr1
+  store volatile fp128 %ext4, fp128 *%ptr1
+  store volatile fp128 %ext5, fp128 *%ptr1
+  store volatile fp128 %ext6, fp128 *%ptr1
+  store volatile fp128 %ext7, fp128 *%ptr1
+  store volatile fp128 %ext8, fp128 *%ptr1
+  store volatile fp128 %ext9, fp128 *%ptr1
+  store volatile fp128 %ext10, fp128 *%ptr1
+  store volatile fp128 %ext11, fp128 *%ptr1
+  store volatile fp128 %ext12, fp128 *%ptr1
+  store volatile fp128 %ext13, fp128 *%ptr1
+  store volatile fp128 %ext14, fp128 *%ptr1
+  store volatile fp128 %ext15, fp128 *%ptr1
+  store volatile fp128 %ext16, fp128 *%ptr1
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-04.ll b/test/CodeGen/SystemZ/fp-conv-04.ll
index b7b516693430..518d6c28d867 100644
--- a/test/CodeGen/SystemZ/fp-conv-04.ll
+++ b/test/CodeGen/SystemZ/fp-conv-04.ll
@@ -4,7 +4,7 @@
 
 ; Check register extension.
 define void @f1(fp128 *%dst, double %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lxdbr %f0, %f0
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -16,7 +16,7 @@ define void @f1(fp128 *%dst, double %val) {
 
 ; Check the low end of the LXDB range.
 define void @f2(fp128 *%dst, double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lxdb %f0, 0(%r3)
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -29,7 +29,7 @@ define void @f2(fp128 *%dst, double *%ptr) {
 
 ; Check the high end of the aligned LXDB range.
 define void @f3(fp128 *%dst, double *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lxdb %f0, 4088(%r3)
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -44,7 +44,7 @@ define void @f3(fp128 *%dst, double *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f4(fp128 *%dst, double *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r3, 4096
 ; CHECK: lxdb %f0, 0(%r3)
 ; CHECK: std %f0, 0(%r2)
@@ -59,7 +59,7 @@ define void @f4(fp128 *%dst, double *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define void @f5(fp128 *%dst, double *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r3, -8
 ; CHECK: lxdb %f0, 0(%r3)
 ; CHECK: std %f0, 0(%r2)
@@ -74,7 +74,7 @@ define void @f5(fp128 *%dst, double *%base) {
 
 ; Check that LXDB allows indices.
 define void @f6(fp128 *%dst, double *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r4, 3
 ; CHECK: lxdb %f0, 800(%r1,%r3)
 ; CHECK: std %f0, 0(%r2)
@@ -87,3 +87,84 @@ define void @f6(fp128 *%dst, double *%base, i64 %index) {
   store fp128 %res, fp128 *%dst
   ret void
 }
+
+; Test a case where we spill the source of at least one LXDBR.  We want
+; to use LXDB if possible.
+define void @f7(fp128 *%ptr1, double *%ptr2) {
+; CHECK-LABEL: f7:
+; CHECK: lxdb {{%f[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+  %val0 = load volatile double *%ptr2
+  %val1 = load volatile double *%ptr2
+  %val2 = load volatile double *%ptr2
+  %val3 = load volatile double *%ptr2
+  %val4 = load volatile double *%ptr2
+  %val5 = load volatile double *%ptr2
+  %val6 = load volatile double *%ptr2
+  %val7 = load volatile double *%ptr2
+  %val8 = load volatile double *%ptr2
+  %val9 = load volatile double *%ptr2
+  %val10 = load volatile double *%ptr2
+  %val11 = load volatile double *%ptr2
+  %val12 = load volatile double *%ptr2
+  %val13 = load volatile double *%ptr2
+  %val14 = load volatile double *%ptr2
+  %val15 = load volatile double *%ptr2
+  %val16 = load volatile double *%ptr2
+
+  %ext0 = fpext double %val0 to fp128
+  %ext1 = fpext double %val1 to fp128
+  %ext2 = fpext double %val2 to fp128
+  %ext3 = fpext double %val3 to fp128
+  %ext4 = fpext double %val4 to fp128
+  %ext5 = fpext double %val5 to fp128
+  %ext6 = fpext double %val6 to fp128
+  %ext7 = fpext double %val7 to fp128
+  %ext8 = fpext double %val8 to fp128
+  %ext9 = fpext double %val9 to fp128
+  %ext10 = fpext double %val10 to fp128
+  %ext11 = fpext double %val11 to fp128
+  %ext12 = fpext double %val12 to fp128
+  %ext13 = fpext double %val13 to fp128
+  %ext14 = fpext double %val14 to fp128
+  %ext15 = fpext double %val15 to fp128
+  %ext16 = fpext double %val16 to fp128
+
+  store volatile double %val0, double *%ptr2
+  store volatile double %val1, double *%ptr2
+  store volatile double %val2, double *%ptr2
+  store volatile double %val3, double *%ptr2
+  store volatile double %val4, double *%ptr2
+  store volatile double %val5, double *%ptr2
+  store volatile double %val6, double *%ptr2
+  store volatile double %val7, double *%ptr2
+  store volatile double %val8, double *%ptr2
+  store volatile double %val9, double *%ptr2
+  store volatile double %val10, double *%ptr2
+  store volatile double %val11, double *%ptr2
+  store volatile double %val12, double *%ptr2
+  store volatile double %val13, double *%ptr2
+  store volatile double %val14, double *%ptr2
+  store volatile double %val15, double *%ptr2
+  store volatile double %val16, double *%ptr2
+
+  store volatile fp128 %ext0, fp128 *%ptr1
+  store volatile fp128 %ext1, fp128 *%ptr1
+  store volatile fp128 %ext2, fp128 *%ptr1
+  store volatile fp128 %ext3, fp128 *%ptr1
+  store volatile fp128 %ext4, fp128 *%ptr1
+  store volatile fp128 %ext5, fp128 *%ptr1
+  store volatile fp128 %ext6, fp128 *%ptr1
+  store volatile fp128 %ext7, fp128 *%ptr1
+  store volatile fp128 %ext8, fp128 *%ptr1
+  store volatile fp128 %ext9, fp128 *%ptr1
+  store volatile fp128 %ext10, fp128 *%ptr1
+  store volatile fp128 %ext11, fp128 *%ptr1
+  store volatile fp128 %ext12, fp128 *%ptr1
+  store volatile fp128 %ext13, fp128 *%ptr1
+  store volatile fp128 %ext14, fp128 *%ptr1
+  store volatile fp128 %ext15, fp128 *%ptr1
+  store volatile fp128 %ext16, fp128 *%ptr1
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-05.ll b/test/CodeGen/SystemZ/fp-conv-05.ll
index 2d887324c3e6..deeffbf30c0e 100644
--- a/test/CodeGen/SystemZ/fp-conv-05.ll
+++ b/test/CodeGen/SystemZ/fp-conv-05.ll
@@ -4,7 +4,7 @@
 
 ; Check i32->f32.
 define float @f1(i32 %i) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cefbr %f0, %r2
 ; CHECK: br %r14
   %conv = sitofp i32 %i to float
@@ -13,7 +13,7 @@ define float @f1(i32 %i) {
 
 ; Check i32->f64.
 define double @f2(i32 %i) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cdfbr %f0, %r2
 ; CHECK: br %r14
   %conv = sitofp i32 %i to double
@@ -22,7 +22,7 @@ define double @f2(i32 %i) {
 
 ; Check i32->f128.
 define void @f3(i32 %i, fp128 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cxfbr %f0, %r2
 ; CHECK: std %f0, 0(%r3)
 ; CHECK: std %f2, 8(%r3)
diff --git a/test/CodeGen/SystemZ/fp-conv-06.ll b/test/CodeGen/SystemZ/fp-conv-06.ll
index 1b39b67d49b8..466c1456a0cb 100644
--- a/test/CodeGen/SystemZ/fp-conv-06.ll
+++ b/test/CodeGen/SystemZ/fp-conv-06.ll
@@ -5,7 +5,7 @@
 ; Check i32->f32.  There is no native instruction, so we must promote
 ; to i64 first.
 define float @f1(i32 %i) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2
 ; CHECK: cegbr %f0, [[REGISTER]]
 ; CHECK: br %r14
@@ -15,7 +15,7 @@ define float @f1(i32 %i) {
 
 ; Check i32->f64.
 define double @f2(i32 %i) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2
 ; CHECK: cdgbr %f0, [[REGISTER]]
 ; CHECK: br %r14
@@ -25,7 +25,7 @@ define double @f2(i32 %i) {
 
 ; Check i32->f128.
 define void @f3(i32 %i, fp128 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2
 ; CHECK: cxgbr %f0, [[REGISTER]]
 ; CHECK: std %f0, 0(%r3)
diff --git a/test/CodeGen/SystemZ/fp-conv-07.ll b/test/CodeGen/SystemZ/fp-conv-07.ll
index 0ebbd37d512d..aba5c4c0195d 100644
--- a/test/CodeGen/SystemZ/fp-conv-07.ll
+++ b/test/CodeGen/SystemZ/fp-conv-07.ll
@@ -4,7 +4,7 @@
 
 ; Test i64->f32.
 define float @f1(i64 %i) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cegbr %f0, %r2
 ; CHECK: br %r14
   %conv = sitofp i64 %i to float
@@ -13,7 +13,7 @@ define float @f1(i64 %i) {
 
 ; Test i64->f64.
 define double @f2(i64 %i) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cdgbr %f0, %r2
 ; CHECK: br %r14
   %conv = sitofp i64 %i to double
@@ -22,7 +22,7 @@ define double @f2(i64 %i) {
 
 ; Test i64->f128.
 define void @f3(i64 %i, fp128 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cxgbr %f0, %r2
 ; CHECK: std %f0, 0(%r3)
 ; CHECK: std %f2, 8(%r3)
diff --git a/test/CodeGen/SystemZ/fp-conv-08.ll b/test/CodeGen/SystemZ/fp-conv-08.ll
index 20c4e30f0796..69b2d13e29f0 100644
--- a/test/CodeGen/SystemZ/fp-conv-08.ll
+++ b/test/CodeGen/SystemZ/fp-conv-08.ll
@@ -5,7 +5,7 @@
 ; Test i64->f32.  There's no native support for unsigned i64-to-fp conversions,
 ; but we should be able to implement them using signed i64-to-fp conversions.
 define float @f1(i64 %i) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cegbr
 ; CHECK: aebr
 ; CHECK: br %r14
@@ -15,9 +15,9 @@ define float @f1(i64 %i) {
 
 ; Test i64->f64.
 define double @f2(i64 %i) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldgr
-; CHECL: adbr
+; CHECK: adbr
 ; CHECK: br %r14
   %conv = uitofp i64 %i to double
   ret double %conv
@@ -25,7 +25,7 @@ define double @f2(i64 %i) {
 
 ; Test i64->f128.
 define void @f3(i64 %i, fp128 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cxgbr
 ; CHECK: axbr
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-conv-09.ll b/test/CodeGen/SystemZ/fp-conv-09.ll
index e3c0352cf84e..6aee73644a16 100644
--- a/test/CodeGen/SystemZ/fp-conv-09.ll
+++ b/test/CodeGen/SystemZ/fp-conv-09.ll
@@ -4,7 +4,7 @@
 
 ; Test f32->i32.
 define i32 @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cfebr %r2, 5, %f0
 ; CHECK: br %r14
   %conv = fptosi float %f to i32
@@ -13,7 +13,7 @@ define i32 @f1(float %f) {
 
 ; Test f64->i32.
 define i32 @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cfdbr %r2, 5, %f0
 ; CHECK: br %r14
   %conv = fptosi double %f to i32
@@ -22,7 +22,7 @@ define i32 @f2(double %f) {
 
 ; Test f128->i32.
 define i32 @f3(fp128 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: ld %f2, 8(%r2)
 ; CHECK: cfxbr %r2, 5, %f0
diff --git a/test/CodeGen/SystemZ/fp-conv-10.ll b/test/CodeGen/SystemZ/fp-conv-10.ll
index bb8878bacee8..723d19d2a1de 100644
--- a/test/CodeGen/SystemZ/fp-conv-10.ll
+++ b/test/CodeGen/SystemZ/fp-conv-10.ll
@@ -9,7 +9,7 @@
 
 ; Test f32->i32.
 define i32 @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cebr
 ; CHECK: sebr
 ; CHECK: cfebr
@@ -21,7 +21,7 @@ define i32 @f1(float %f) {
 
 ; Test f64->i32.
 define i32 @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cdbr
 ; CHECK: sdbr
 ; CHECK: cfdbr
@@ -33,7 +33,7 @@ define i32 @f2(double %f) {
 
 ; Test f128->i32.
 define i32 @f3(fp128 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cxbr
 ; CHECK: sxbr
 ; CHECK: cfxbr
diff --git a/test/CodeGen/SystemZ/fp-conv-11.ll b/test/CodeGen/SystemZ/fp-conv-11.ll
index 2a36cb955cb5..46f4cb3a6d89 100644
--- a/test/CodeGen/SystemZ/fp-conv-11.ll
+++ b/test/CodeGen/SystemZ/fp-conv-11.ll
@@ -4,7 +4,7 @@
 
 ; Test f32->i64.
 define i64 @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cgebr %r2, 5, %f0
 ; CHECK: br %r14
   %conv = fptosi float %f to i64
@@ -13,7 +13,7 @@ define i64 @f1(float %f) {
 
 ; Test f64->i64.
 define i64 @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cgdbr %r2, 5, %f0
 ; CHECK: br %r14
   %conv = fptosi double %f to i64
@@ -22,7 +22,7 @@ define i64 @f2(double %f) {
 
 ; Test f128->i64.
 define i64 @f3(fp128 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: ld %f2, 8(%r2)
 ; CHECK: cgxbr %r2, 5, %f0
diff --git a/test/CodeGen/SystemZ/fp-conv-12.ll b/test/CodeGen/SystemZ/fp-conv-12.ll
index 4445b14ee8ef..6cc343abdafc 100644
--- a/test/CodeGen/SystemZ/fp-conv-12.ll
+++ b/test/CodeGen/SystemZ/fp-conv-12.ll
@@ -8,7 +8,7 @@
 
 ; Test f32->i64.
 define i64 @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cebr
 ; CHECK: sebr
 ; CHECK: cgebr
@@ -20,7 +20,7 @@ define i64 @f1(float %f) {
 
 ; Test f64->i64.
 define i64 @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cdbr
 ; CHECK: sdbr
 ; CHECK: cgdbr
@@ -32,7 +32,7 @@ define i64 @f2(double %f) {
 
 ; Test f128->i64.
 define i64 @f3(fp128 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cxbr
 ; CHECK: sxbr
 ; CHECK: cgxbr
diff --git a/test/CodeGen/SystemZ/fp-copysign-01.ll b/test/CodeGen/SystemZ/fp-copysign-01.ll
index 458d475bdf3c..50177e5f41bf 100644
--- a/test/CodeGen/SystemZ/fp-copysign-01.ll
+++ b/test/CodeGen/SystemZ/fp-copysign-01.ll
@@ -9,7 +9,7 @@ declare fp128 @copysignl(fp128, fp128) readnone
 
 ; Test f32 copies in which the sign comes from an f32.
 define float @f1(float %a, float %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: %f2
 ; CHECK: cpsdr %f0, %f0, %f2
 ; CHECK: br %r14
@@ -19,7 +19,7 @@ define float @f1(float %a, float %b) {
 
 ; Test f32 copies in which the sign comes from an f64.
 define float @f2(float %a, double %bd) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: %f2
 ; CHECK: cpsdr %f0, %f0, %f2
 ; CHECK: br %r14
@@ -30,7 +30,7 @@ define float @f2(float %a, double %bd) {
 
 ; Test f32 copies in which the sign comes from an f128.
 define float @f3(float %a, fp128 *%bptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2)
 ; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
 ; CHECK: cpsdr %f0, %f0, [[BHIGH]]
@@ -43,7 +43,7 @@ define float @f3(float %a, fp128 *%bptr) {
 
 ; Test f64 copies in which the sign comes from an f32.
 define double @f4(double %a, float %bf) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %f2
 ; CHECK: cpsdr %f0, %f0, %f2
 ; CHECK: br %r14
@@ -54,7 +54,7 @@ define double @f4(double %a, float %bf) {
 
 ; Test f64 copies in which the sign comes from an f64.
 define double @f5(double %a, double %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: %f2
 ; CHECK: cpsdr %f0, %f0, %f2
 ; CHECK: br %r14
@@ -64,7 +64,7 @@ define double @f5(double %a, double %b) {
 
 ; Test f64 copies in which the sign comes from an f128.
 define double @f6(double %a, fp128 *%bptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2)
 ; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
 ; CHECK: cpsdr %f0, %f0, [[BHIGH]]
@@ -79,7 +79,7 @@ define double @f6(double %a, fp128 *%bptr) {
 ; need any register shuffling here; %a should be tied to %c, with CPSDR
 ; just changing the high register.
 define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3)
 ; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3)
 ; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0
@@ -95,7 +95,7 @@ define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) {
 
 ; As above, but the sign comes from an f64.
 define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3)
 ; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3)
 ; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0
@@ -112,7 +112,7 @@ define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) {
 ; As above, but the sign comes from an f128.  Don't require the low part
 ; of %b to be loaded, since it isn't used.
 define void @f9(fp128 *%cptr, fp128 *%aptr, fp128 *%bptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3)
 ; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3)
 ; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r4)
diff --git a/test/CodeGen/SystemZ/fp-div-01.ll b/test/CodeGen/SystemZ/fp-div-01.ll
index 080d45eb2bfb..1b99463327b4 100644
--- a/test/CodeGen/SystemZ/fp-div-01.ll
+++ b/test/CodeGen/SystemZ/fp-div-01.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare float @foo()
+
 ; Check register division.
 define float @f1(float %f1, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: debr %f0, %f2
 ; CHECK: br %r14
   %res = fdiv float %f1, %f2
@@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) {
 
 ; Check the low end of the DEB range.
 define float @f2(float %f1, float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: deb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load float *%ptr
@@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) {
 
 ; Check the high end of the aligned DEB range.
 define float @f3(float %f1, float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: deb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define float @f4(float %f1, float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: deb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define float @f5(float %f1, float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: deb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) {
 
 ; Check that DEB allows indices.
 define float @f6(float %f1, float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: deb %f0, 400(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) {
   %res = fdiv float %f1, %f2
   ret float %res
 }
+
+; Check that divisions of spilled values can use DEB rather than DEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: deb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%ptr0, i64 2
+  %ptr2 = getelementptr float *%ptr0, i64 4
+  %ptr3 = getelementptr float *%ptr0, i64 6
+  %ptr4 = getelementptr float *%ptr0, i64 8
+  %ptr5 = getelementptr float *%ptr0, i64 10
+  %ptr6 = getelementptr float *%ptr0, i64 12
+  %ptr7 = getelementptr float *%ptr0, i64 14
+  %ptr8 = getelementptr float *%ptr0, i64 16
+  %ptr9 = getelementptr float *%ptr0, i64 18
+  %ptr10 = getelementptr float *%ptr0, i64 20
+
+  %val0 = load float *%ptr0
+  %val1 = load float *%ptr1
+  %val2 = load float *%ptr2
+  %val3 = load float *%ptr3
+  %val4 = load float *%ptr4
+  %val5 = load float *%ptr5
+  %val6 = load float *%ptr6
+  %val7 = load float *%ptr7
+  %val8 = load float *%ptr8
+  %val9 = load float *%ptr9
+  %val10 = load float *%ptr10
+
+  %ret = call float @foo()
+
+  %div0 = fdiv float %ret, %val0
+  %div1 = fdiv float %div0, %val1
+  %div2 = fdiv float %div1, %val2
+  %div3 = fdiv float %div2, %val3
+  %div4 = fdiv float %div3, %val4
+  %div5 = fdiv float %div4, %val5
+  %div6 = fdiv float %div5, %val6
+  %div7 = fdiv float %div6, %val7
+  %div8 = fdiv float %div7, %val8
+  %div9 = fdiv float %div8, %val9
+  %div10 = fdiv float %div9, %val10
+
+  ret float %div10
+}
diff --git a/test/CodeGen/SystemZ/fp-div-02.ll b/test/CodeGen/SystemZ/fp-div-02.ll
index c5cae15a824b..513664bd9496 100644
--- a/test/CodeGen/SystemZ/fp-div-02.ll
+++ b/test/CodeGen/SystemZ/fp-div-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare double @foo()
+
 ; Check register division.
 define double @f1(double %f1, double %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ddbr %f0, %f2
 ; CHECK: br %r14
   %res = fdiv double %f1, %f2
@@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) {
 
 ; Check the low end of the DDB range.
 define double @f2(double %f1, double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ddb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load double *%ptr
@@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) {
 
 ; Check the high end of the aligned DDB range.
 define double @f3(double %f1, double *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ddb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 511
@@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(double %f1, double *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: ddb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define double @f5(double %f1, double *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: ddb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) {
 
 ; Check that DDB allows indices.
 define double @f6(double %f1, double *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: ddb %f0, 800(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) {
   %res = fdiv double %f1, %f2
   ret double %res
 }
+
+; Check that divisions of spilled values can use DDB rather than DDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: ddb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%ptr0, i64 2
+  %ptr2 = getelementptr double *%ptr0, i64 4
+  %ptr3 = getelementptr double *%ptr0, i64 6
+  %ptr4 = getelementptr double *%ptr0, i64 8
+  %ptr5 = getelementptr double *%ptr0, i64 10
+  %ptr6 = getelementptr double *%ptr0, i64 12
+  %ptr7 = getelementptr double *%ptr0, i64 14
+  %ptr8 = getelementptr double *%ptr0, i64 16
+  %ptr9 = getelementptr double *%ptr0, i64 18
+  %ptr10 = getelementptr double *%ptr0, i64 20
+
+  %val0 = load double *%ptr0
+  %val1 = load double *%ptr1
+  %val2 = load double *%ptr2
+  %val3 = load double *%ptr3
+  %val4 = load double *%ptr4
+  %val5 = load double *%ptr5
+  %val6 = load double *%ptr6
+  %val7 = load double *%ptr7
+  %val8 = load double *%ptr8
+  %val9 = load double *%ptr9
+  %val10 = load double *%ptr10
+
+  %ret = call double @foo()
+
+  %div0 = fdiv double %ret, %val0
+  %div1 = fdiv double %div0, %val1
+  %div2 = fdiv double %div1, %val2
+  %div3 = fdiv double %div2, %val3
+  %div4 = fdiv double %div3, %val4
+  %div5 = fdiv double %div4, %val5
+  %div6 = fdiv double %div5, %val6
+  %div7 = fdiv double %div6, %val7
+  %div8 = fdiv double %div7, %val8
+  %div9 = fdiv double %div8, %val9
+  %div10 = fdiv double %div9, %val10
+
+  ret double %div10
+}
diff --git a/test/CodeGen/SystemZ/fp-div-03.ll b/test/CodeGen/SystemZ/fp-div-03.ll
index 18f2d7449a80..079b349b4084 100644
--- a/test/CodeGen/SystemZ/fp-div-03.ll
+++ b/test/CodeGen/SystemZ/fp-div-03.ll
@@ -4,7 +4,7 @@
 
 ; There is no memory form of 128-bit division.
 define void @f1(fp128 *%ptr, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lxebr %f0, %f0
 ; CHECK: ld %f1, 0(%r2)
 ; CHECK: ld %f3, 8(%r2)
diff --git a/test/CodeGen/SystemZ/fp-move-01.ll b/test/CodeGen/SystemZ/fp-move-01.ll
index 73cd978c5975..d16502f2f7c8 100644
--- a/test/CodeGen/SystemZ/fp-move-01.ll
+++ b/test/CodeGen/SystemZ/fp-move-01.ll
@@ -4,14 +4,14 @@
 
 ; Test f32 moves.
 define float @f1(float %a, float %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ler %f0, %f2
   ret float %b
 }
 
 ; Test f64 moves.
 define double @f2(double %a, double %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldr %f0, %f2
   ret double %b
 }
@@ -19,7 +19,7 @@ define double @f2(double %a, double %b) {
 ; Test f128 moves.  Since f128s are passed by reference, we need to force
 ; a copy by other means.
 define void @f3(fp128 *%x) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lxr
 ; CHECK: axbr
   %val = load volatile fp128 *%x
diff --git a/test/CodeGen/SystemZ/fp-move-02.ll b/test/CodeGen/SystemZ/fp-move-02.ll
index 9d87797c8f92..505ee8d37a4e 100644
--- a/test/CodeGen/SystemZ/fp-move-02.ll
+++ b/test/CodeGen/SystemZ/fp-move-02.ll
@@ -1,11 +1,17 @@
-; Test moves between FPRs and GPRs.
+; Test moves between FPRs and GPRs.  The 32-bit cases test the z10
+; implementation, which has no high-word support.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare i64 @foo()
+declare double @bar()
+@dptr = external global double
+@iptr = external global i64
 
 ; Test 32-bit moves from GPRs to FPRs.  The GPR must be moved into the high
 ; 32 bits of the FPR.
 define float @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 32
 ; CHECK: ldgr %f0, [[REGISTER]]
   %res = bitcast i32 %a to float
@@ -15,8 +21,8 @@ define float @f1(i32 %a) {
 ; Like f1, but create a situation where the shift can be folded with
 ; surrounding code.
 define float @f2(i64 %big) {
-; CHECK: f2:
-; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 31
+; CHECK-LABEL: f2:
+; CHECK: risbg [[REGISTER:%r[0-5]]], %r2, 0, 159, 31
 ; CHECK: ldgr %f0, [[REGISTER]]
   %shift = lshr i64 %big, 1
   %a = trunc i64 %shift to i32
@@ -26,8 +32,8 @@ define float @f2(i64 %big) {
 
 ; Another example of the same thing.
 define float @f3(i64 %big) {
-; CHECK: f3:
-; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 2
+; CHECK-LABEL: f3:
+; CHECK: risbg [[REGISTER:%r[0-5]]], %r2, 0, 159, 2
 ; CHECK: ldgr %f0, [[REGISTER]]
   %shift = ashr i64 %big, 30
   %a = trunc i64 %shift to i32
@@ -37,7 +43,7 @@ define float @f3(i64 %big) {
 
 ; Like f1, but the value to transfer is already in the high 32 bits.
 define float @f4(i64 %big) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r2
 ; CHECK: nilf %r2, 0
 ; CHECK-NOT: %r2
@@ -50,7 +56,7 @@ define float @f4(i64 %big) {
 
 ; Test 64-bit moves from GPRs to FPRs.
 define double @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldgr %f0, %r2
   %res = bitcast i64 %a to double
   ret double %res
@@ -59,11 +65,12 @@ define double @f5(i64 %a) {
 ; Test 128-bit moves from GPRs to FPRs.  i128 isn't a legitimate type,
 ; so this goes through memory.
 define void @f6(fp128 *%a, i128 *%b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lg
 ; CHECK: lg
 ; CHECK: stg
 ; CHECK: stg
+; CHECK: br %r14
   %val = load i128 *%b
   %res = bitcast i128 %val to fp128
   store fp128 %res, fp128 *%a
@@ -73,7 +80,7 @@ define void @f6(fp128 *%a, i128 *%b) {
 ; Test 32-bit moves from FPRs to GPRs.  The high 32 bits of the FPR should
 ; be moved into the low 32 bits of the GPR.
 define i32 @f7(float %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgdr [[REGISTER:%r[0-5]]], %f0
 ; CHECK: srlg %r2, [[REGISTER]], 32
   %res = bitcast float %a to i32
@@ -82,7 +89,7 @@ define i32 @f7(float %a) {
 
 ; Test 64-bit moves from FPRs to GPRs.
 define i64 @f8(double %a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lgdr %r2, %f0
   %res = bitcast double %a to i64
   ret i64 %res
@@ -90,7 +97,7 @@ define i64 @f8(double %a) {
 
 ; Test 128-bit moves from FPRs to GPRs, with the same restriction as f6.
 define void @f9(fp128 *%a, i128 *%b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ld
 ; CHECK: ld
 ; CHECK: std
@@ -101,3 +108,286 @@ define void @f9(fp128 *%a, i128 *%b) {
   ret void
 }
 
+; Test cases where the destination of an LGDR needs to be spilled.
+; We shouldn't have any integer stack stores or floating-point loads.
+define void @f10(double %extra) {
+; CHECK-LABEL: f10:
+; CHECK: dptr
+; CHECK-NOT: stg {{.*}}(%r15)
+; CHECK: %loop
+; CHECK-NOT: ld {{.*}}(%r15)
+; CHECK: %exit
+; CHECK: br %r14
+entry:
+  %double0 = load volatile double *@dptr
+  %biased0 = fadd double %double0, %extra
+  %int0 = bitcast double %biased0 to i64
+  %double1 = load volatile double *@dptr
+  %biased1 = fadd double %double1, %extra
+  %int1 = bitcast double %biased1 to i64
+  %double2 = load volatile double *@dptr
+  %biased2 = fadd double %double2, %extra
+  %int2 = bitcast double %biased2 to i64
+  %double3 = load volatile double *@dptr
+  %biased3 = fadd double %double3, %extra
+  %int3 = bitcast double %biased3 to i64
+  %double4 = load volatile double *@dptr
+  %biased4 = fadd double %double4, %extra
+  %int4 = bitcast double %biased4 to i64
+  %double5 = load volatile double *@dptr
+  %biased5 = fadd double %double5, %extra
+  %int5 = bitcast double %biased5 to i64
+  %double6 = load volatile double *@dptr
+  %biased6 = fadd double %double6, %extra
+  %int6 = bitcast double %biased6 to i64
+  %double7 = load volatile double *@dptr
+  %biased7 = fadd double %double7, %extra
+  %int7 = bitcast double %biased7 to i64
+  %double8 = load volatile double *@dptr
+  %biased8 = fadd double %double8, %extra
+  %int8 = bitcast double %biased8 to i64
+  %double9 = load volatile double *@dptr
+  %biased9 = fadd double %double9, %extra
+  %int9 = bitcast double %biased9 to i64
+  br label %loop
+
+loop:
+  %start = call i64 @foo()
+  %or0 = or i64 %start, %int0
+  %or1 = or i64 %or0, %int1
+  %or2 = or i64 %or1, %int2
+  %or3 = or i64 %or2, %int3
+  %or4 = or i64 %or3, %int4
+  %or5 = or i64 %or4, %int5
+  %or6 = or i64 %or5, %int6
+  %or7 = or i64 %or6, %int7
+  %or8 = or i64 %or7, %int8
+  %or9 = or i64 %or8, %int9
+  store i64 %or9, i64 *@iptr
+  %cont = icmp ne i64 %start, 1
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; ...likewise LDGR, with the requirements the other way around.
+define void @f11(i64 %mask) {
+; CHECK-LABEL: f11:
+; CHECK: iptr
+; CHECK-NOT: std {{.*}}(%r15)
+; CHECK: %loop
+; CHECK-NOT: lg {{.*}}(%r15)
+; CHECK: %exit
+; CHECK: br %r14
+entry:
+  %int0 = load volatile i64 *@iptr
+  %masked0 = and i64 %int0, %mask
+  %double0 = bitcast i64 %masked0 to double
+  %int1 = load volatile i64 *@iptr
+  %masked1 = and i64 %int1, %mask
+  %double1 = bitcast i64 %masked1 to double
+  %int2 = load volatile i64 *@iptr
+  %masked2 = and i64 %int2, %mask
+  %double2 = bitcast i64 %masked2 to double
+  %int3 = load volatile i64 *@iptr
+  %masked3 = and i64 %int3, %mask
+  %double3 = bitcast i64 %masked3 to double
+  %int4 = load volatile i64 *@iptr
+  %masked4 = and i64 %int4, %mask
+  %double4 = bitcast i64 %masked4 to double
+  %int5 = load volatile i64 *@iptr
+  %masked5 = and i64 %int5, %mask
+  %double5 = bitcast i64 %masked5 to double
+  %int6 = load volatile i64 *@iptr
+  %masked6 = and i64 %int6, %mask
+  %double6 = bitcast i64 %masked6 to double
+  %int7 = load volatile i64 *@iptr
+  %masked7 = and i64 %int7, %mask
+  %double7 = bitcast i64 %masked7 to double
+  %int8 = load volatile i64 *@iptr
+  %masked8 = and i64 %int8, %mask
+  %double8 = bitcast i64 %masked8 to double
+  %int9 = load volatile i64 *@iptr
+  %masked9 = and i64 %int9, %mask
+  %double9 = bitcast i64 %masked9 to double
+  br label %loop
+
+loop:
+  %start = call double @bar()
+  %add0 = fadd double %start, %double0
+  %add1 = fadd double %add0, %double1
+  %add2 = fadd double %add1, %double2
+  %add3 = fadd double %add2, %double3
+  %add4 = fadd double %add3, %double4
+  %add5 = fadd double %add4, %double5
+  %add6 = fadd double %add5, %double6
+  %add7 = fadd double %add6, %double7
+  %add8 = fadd double %add7, %double8
+  %add9 = fadd double %add8, %double9
+  store double %add9, double *@dptr
+  %cont = fcmp one double %start, 1.0
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Test cases where the source of an LDGR needs to be spilled.
+; We shouldn't have any integer stack stores or floating-point loads.
+define void @f12() {
+; CHECK-LABEL: f12:
+; CHECK: %loop
+; CHECK-NOT: std {{.*}}(%r15)
+; CHECK: %exit
+; CHECK: foo@PLT
+; CHECK-NOT: lg {{.*}}(%r15)
+; CHECK: foo@PLT
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %int0 = phi i64 [ 0, %entry ], [ %add0, %loop ]
+  %int1 = phi i64 [ 0, %entry ], [ %add1, %loop ]
+  %int2 = phi i64 [ 0, %entry ], [ %add2, %loop ]
+  %int3 = phi i64 [ 0, %entry ], [ %add3, %loop ]
+  %int4 = phi i64 [ 0, %entry ], [ %add4, %loop ]
+  %int5 = phi i64 [ 0, %entry ], [ %add5, %loop ]
+  %int6 = phi i64 [ 0, %entry ], [ %add6, %loop ]
+  %int7 = phi i64 [ 0, %entry ], [ %add7, %loop ]
+  %int8 = phi i64 [ 0, %entry ], [ %add8, %loop ]
+  %int9 = phi i64 [ 0, %entry ], [ %add9, %loop ]
+
+  %bias = call i64 @foo()
+  %add0 = add i64 %int0, %bias
+  %add1 = add i64 %int1, %bias
+  %add2 = add i64 %int2, %bias
+  %add3 = add i64 %int3, %bias
+  %add4 = add i64 %int4, %bias
+  %add5 = add i64 %int5, %bias
+  %add6 = add i64 %int6, %bias
+  %add7 = add i64 %int7, %bias
+  %add8 = add i64 %int8, %bias
+  %add9 = add i64 %int9, %bias
+  %cont = icmp ne i64 %bias, 1
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  %unused1 = call i64 @foo()
+  %factor = load volatile double *@dptr
+
+  %conv0 = bitcast i64 %add0 to double
+  %mul0 = fmul double %conv0, %factor
+  store volatile double %mul0, double *@dptr
+  %conv1 = bitcast i64 %add1 to double
+  %mul1 = fmul double %conv1, %factor
+  store volatile double %mul1, double *@dptr
+  %conv2 = bitcast i64 %add2 to double
+  %mul2 = fmul double %conv2, %factor
+  store volatile double %mul2, double *@dptr
+  %conv3 = bitcast i64 %add3 to double
+  %mul3 = fmul double %conv3, %factor
+  store volatile double %mul3, double *@dptr
+  %conv4 = bitcast i64 %add4 to double
+  %mul4 = fmul double %conv4, %factor
+  store volatile double %mul4, double *@dptr
+  %conv5 = bitcast i64 %add5 to double
+  %mul5 = fmul double %conv5, %factor
+  store volatile double %mul5, double *@dptr
+  %conv6 = bitcast i64 %add6 to double
+  %mul6 = fmul double %conv6, %factor
+  store volatile double %mul6, double *@dptr
+  %conv7 = bitcast i64 %add7 to double
+  %mul7 = fmul double %conv7, %factor
+  store volatile double %mul7, double *@dptr
+  %conv8 = bitcast i64 %add8 to double
+  %mul8 = fmul double %conv8, %factor
+  store volatile double %mul8, double *@dptr
+  %conv9 = bitcast i64 %add9 to double
+  %mul9 = fmul double %conv9, %factor
+  store volatile double %mul9, double *@dptr
+
+  %unused2 = call i64 @foo()
+
+  ret void
+}
+
+; ...likewise LGDR, with the requirements the other way around.
+define void @f13() {
+; CHECK-LABEL: f13:
+; CHECK: %loop
+; CHECK-NOT: stg {{.*}}(%r15)
+; CHECK: %exit
+; CHECK: foo@PLT
+; CHECK-NOT: ld {{.*}}(%r15)
+; CHECK: foo@PLT
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %double0 = phi double [ 1.0, %entry ], [ %mul0, %loop ]
+  %double1 = phi double [ 1.0, %entry ], [ %mul1, %loop ]
+  %double2 = phi double [ 1.0, %entry ], [ %mul2, %loop ]
+  %double3 = phi double [ 1.0, %entry ], [ %mul3, %loop ]
+  %double4 = phi double [ 1.0, %entry ], [ %mul4, %loop ]
+  %double5 = phi double [ 1.0, %entry ], [ %mul5, %loop ]
+  %double6 = phi double [ 1.0, %entry ], [ %mul6, %loop ]
+  %double7 = phi double [ 1.0, %entry ], [ %mul7, %loop ]
+  %double8 = phi double [ 1.0, %entry ], [ %mul8, %loop ]
+  %double9 = phi double [ 1.0, %entry ], [ %mul9, %loop ]
+
+  %factor = call double @bar()
+  %mul0 = fmul double %double0, %factor
+  %mul1 = fmul double %double1, %factor
+  %mul2 = fmul double %double2, %factor
+  %mul3 = fmul double %double3, %factor
+  %mul4 = fmul double %double4, %factor
+  %mul5 = fmul double %double5, %factor
+  %mul6 = fmul double %double6, %factor
+  %mul7 = fmul double %double7, %factor
+  %mul8 = fmul double %double8, %factor
+  %mul9 = fmul double %double9, %factor
+  %cont = fcmp one double %factor, 1.0
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  %unused1 = call i64 @foo()
+  %bias = load volatile i64 *@iptr
+
+  %conv0 = bitcast double %mul0 to i64
+  %add0 = add i64 %conv0, %bias
+  store volatile i64 %add0, i64 *@iptr
+  %conv1 = bitcast double %mul1 to i64
+  %add1 = add i64 %conv1, %bias
+  store volatile i64 %add1, i64 *@iptr
+  %conv2 = bitcast double %mul2 to i64
+  %add2 = add i64 %conv2, %bias
+  store volatile i64 %add2, i64 *@iptr
+  %conv3 = bitcast double %mul3 to i64
+  %add3 = add i64 %conv3, %bias
+  store volatile i64 %add3, i64 *@iptr
+  %conv4 = bitcast double %mul4 to i64
+  %add4 = add i64 %conv4, %bias
+  store volatile i64 %add4, i64 *@iptr
+  %conv5 = bitcast double %mul5 to i64
+  %add5 = add i64 %conv5, %bias
+  store volatile i64 %add5, i64 *@iptr
+  %conv6 = bitcast double %mul6 to i64
+  %add6 = add i64 %conv6, %bias
+  store volatile i64 %add6, i64 *@iptr
+  %conv7 = bitcast double %mul7 to i64
+  %add7 = add i64 %conv7, %bias
+  store volatile i64 %add7, i64 *@iptr
+  %conv8 = bitcast double %mul8 to i64
+  %add8 = add i64 %conv8, %bias
+  store volatile i64 %add8, i64 *@iptr
+  %conv9 = bitcast double %mul9 to i64
+  %add9 = add i64 %conv9, %bias
+  store volatile i64 %add9, i64 *@iptr
+
+  %unused2 = call i64 @foo()
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-move-03.ll b/test/CodeGen/SystemZ/fp-move-03.ll
index 37dbdfad7b87..1273358f65ad 100644
--- a/test/CodeGen/SystemZ/fp-move-03.ll
+++ b/test/CodeGen/SystemZ/fp-move-03.ll
@@ -4,7 +4,7 @@
 
 ; Test the low end of the LE range.
 define float @f1(float *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: le %f0, 0(%r2)
 ; CHECK: br %r14
   %val = load float *%src
@@ -13,7 +13,7 @@ define float @f1(float *%src) {
 
 ; Test the high end of the LE range.
 define float @f2(float *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: le %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 1023
@@ -23,7 +23,7 @@ define float @f2(float *%src) {
 
 ; Check the next word up, which should use LEY instead of LE.
 define float @f3(float *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ley %f0, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 1024
@@ -33,7 +33,7 @@ define float @f3(float *%src) {
 
 ; Check the high end of the aligned LEY range.
 define float @f4(float *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ley %f0, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 131071
@@ -44,7 +44,7 @@ define float @f4(float *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define float @f5(float *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: le %f0, 0(%r2)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define float @f5(float *%src) {
 
 ; Check the high end of the negative aligned LEY range.
 define float @f6(float *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ley %f0, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 -1
@@ -65,7 +65,7 @@ define float @f6(float *%src) {
 
 ; Check the low end of the LEY range.
 define float @f7(float *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ley %f0, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 -131072
@@ -76,7 +76,7 @@ define float @f7(float *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define float @f8(float *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524292
 ; CHECK: le %f0, 0(%r2)
 ; CHECK: br %r14
@@ -87,7 +87,7 @@ define float @f8(float *%src) {
 
 ; Check that LE allows an index.
 define float @f9(i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: le %f0, 4092({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -99,7 +99,7 @@ define float @f9(i64 %src, i64 %index) {
 
 ; Check that LEY allows an index.
 define float @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/fp-move-04.ll b/test/CodeGen/SystemZ/fp-move-04.ll
index 72e90d1fffd1..1b0278fdee0f 100644
--- a/test/CodeGen/SystemZ/fp-move-04.ll
+++ b/test/CodeGen/SystemZ/fp-move-04.ll
@@ -4,7 +4,7 @@
 
 ; Test the low end of the LD range.
 define double @f1(double *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: br %r14
   %val = load double *%src
@@ -13,7 +13,7 @@ define double @f1(double *%src) {
 
 ; Test the high end of the LD range.
 define double @f2(double *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ld %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 511
@@ -23,7 +23,7 @@ define double @f2(double *%src) {
 
 ; Check the next doubleword up, which should use LDY instead of LD.
 define double @f3(double *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ldy %f0, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 512
@@ -33,7 +33,7 @@ define double @f3(double *%src) {
 
 ; Check the high end of the aligned LDY range.
 define double @f4(double *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldy %f0, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 65535
@@ -44,7 +44,7 @@ define double @f4(double *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f5(double *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define double @f5(double *%src) {
 
 ; Check the high end of the negative aligned LDY range.
 define double @f6(double *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ldy %f0, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 -1
@@ -65,7 +65,7 @@ define double @f6(double *%src) {
 
 ; Check the low end of the LDY range.
 define double @f7(double *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ldy %f0, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 -65536
@@ -76,7 +76,7 @@ define double @f7(double *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f8(double *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524296
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: br %r14
@@ -87,7 +87,7 @@ define double @f8(double *%src) {
 
 ; Check that LD allows an index.
 define double @f9(i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ld %f0, 4095({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -99,7 +99,7 @@ define double @f9(i64 %src, i64 %index) {
 
 ; Check that LDY allows an index.
 define double @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ldy %f0, 4096({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll
index 66ad048fbed7..d302a0f9c633 100644
--- a/test/CodeGen/SystemZ/fp-move-05.ll
+++ b/test/CodeGen/SystemZ/fp-move-05.ll
@@ -4,7 +4,7 @@
 
 ; Check loads with no offset.
 define double @f1(i64 %src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: ld %f2, 8(%r2)
 ; CHECK: br %r14
@@ -16,7 +16,7 @@ define double @f1(i64 %src) {
 
 ; Check the highest aligned offset that allows LD for both halves.
 define double @f2(i64 %src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ld %f0, 4080(%r2)
 ; CHECK: ld %f2, 4088(%r2)
 ; CHECK: br %r14
@@ -29,7 +29,7 @@ define double @f2(i64 %src) {
 
 ; Check the next doubleword up, which requires a mixture of LD and LDY.
 define double @f3(i64 %src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ld %f0, 4088(%r2)
 ; CHECK: ldy %f2, 4096(%r2)
 ; CHECK: br %r14
@@ -42,7 +42,7 @@ define double @f3(i64 %src) {
 
 ; Check the next doubleword after that, which requires LDY for both halves.
 define double @f4(i64 %src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldy %f0, 4096(%r2)
 ; CHECK: ldy %f2, 4104(%r2)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define double @f4(i64 %src) {
 
 ; Check the highest aligned offset that allows LDY for both halves.
 define double @f5(i64 %src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldy %f0, 524272(%r2)
 ; CHECK: ldy %f2, 524280(%r2)
 ; CHECK: br %r14
@@ -69,7 +69,7 @@ define double @f5(i64 %src) {
 ; Check the next doubleword up, which requires separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f6(i64 %src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lay %r1, 524280(%r2)
 ; CHECK: ld %f0, 0(%r1)
 ; CHECK: ld %f2, 8(%r1)
@@ -84,7 +84,7 @@ define double @f6(i64 %src) {
 ; Check the highest aligned negative offset, which needs a combination of
 ; LDY and LD.
 define double @f7(i64 %src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ldy %f0, -8(%r2)
 ; CHECK: ld %f2, 0(%r2)
 ; CHECK: br %r14
@@ -97,7 +97,7 @@ define double @f7(i64 %src) {
 
 ; Check the next doubleword down, which requires LDY for both halves.
 define double @f8(i64 %src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ldy %f0, -16(%r2)
 ; CHECK: ldy %f2, -8(%r2)
 ; CHECK: br %r14
@@ -110,7 +110,7 @@ define double @f8(i64 %src) {
 
 ; Check the lowest offset that allows LDY for both halves.
 define double @f9(i64 %src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ldy %f0, -524288(%r2)
 ; CHECK: ldy %f2, -524280(%r2)
 ; CHECK: br %r14
@@ -124,7 +124,7 @@ define double @f9(i64 %src) {
 ; Check the next doubleword down, which requires separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f10(i64 %src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r2, -524296
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: ld %f2, 8(%r2)
@@ -138,7 +138,7 @@ define double @f10(i64 %src) {
 
 ; Check that indices are allowed.
 define double @f11(i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: ld %f0, 4088({{%r2,%r3|%r3,%r2}})
 ; CHECK: ldy %f2, 4096({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-move-06.ll b/test/CodeGen/SystemZ/fp-move-06.ll
index b660c2ac223d..da67691729e3 100644
--- a/test/CodeGen/SystemZ/fp-move-06.ll
+++ b/test/CodeGen/SystemZ/fp-move-06.ll
@@ -4,7 +4,7 @@
 
 ; Test the low end of the STE range.
 define void @f1(float *%ptr, float %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ste %f0, 0(%r2)
 ; CHECK: br %r14
   store float %val, float *%ptr
@@ -13,7 +13,7 @@ define void @f1(float *%ptr, float %val) {
 
 ; Test the high end of the STE range.
 define void @f2(float *%src, float %val) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ste %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 1023
@@ -23,7 +23,7 @@ define void @f2(float *%src, float %val) {
 
 ; Check the next word up, which should use STEY instead of STE.
 define void @f3(float *%src, float %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: stey %f0, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 1024
@@ -33,7 +33,7 @@ define void @f3(float *%src, float %val) {
 
 ; Check the high end of the aligned STEY range.
 define void @f4(float *%src, float %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: stey %f0, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 131071
@@ -44,7 +44,7 @@ define void @f4(float *%src, float %val) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f5(float *%src, float %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: ste %f0, 0(%r2)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define void @f5(float *%src, float %val) {
 
 ; Check the high end of the negative aligned STEY range.
 define void @f6(float *%src, float %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: stey %f0, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 -1
@@ -65,7 +65,7 @@ define void @f6(float *%src, float %val) {
 
 ; Check the low end of the STEY range.
 define void @f7(float *%src, float %val) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: stey %f0, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%src, i64 -131072
@@ -76,7 +76,7 @@ define void @f7(float *%src, float %val) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f8(float *%src, float %val) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524292
 ; CHECK: ste %f0, 0(%r2)
 ; CHECK: br %r14
@@ -87,7 +87,7 @@ define void @f8(float *%src, float %val) {
 
 ; Check that STE allows an index.
 define void @f9(i64 %src, i64 %index, float %val) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ste %f0, 4092({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -99,7 +99,7 @@ define void @f9(i64 %src, i64 %index, float %val) {
 
 ; Check that STEY allows an index.
 define void @f10(i64 %src, i64 %index, float %val) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: stey %f0, 4096({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/fp-move-07.ll b/test/CodeGen/SystemZ/fp-move-07.ll
index 0cb0474157d0..a4f1820d1204 100644
--- a/test/CodeGen/SystemZ/fp-move-07.ll
+++ b/test/CodeGen/SystemZ/fp-move-07.ll
@@ -4,7 +4,7 @@
 
 ; Test the low end of the STD range.
 define void @f1(double *%src, double %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: br %r14
   store double %val, double *%src
@@ -13,7 +13,7 @@ define void @f1(double *%src, double %val) {
 
 ; Test the high end of the STD range.
 define void @f2(double *%src, double %val) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: std %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 511
@@ -23,7 +23,7 @@ define void @f2(double *%src, double %val) {
 
 ; Check the next doubleword up, which should use STDY instead of STD.
 define void @f3(double *%src, double %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: stdy %f0, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 512
@@ -33,7 +33,7 @@ define void @f3(double *%src, double %val) {
 
 ; Check the high end of the aligned STDY range.
 define void @f4(double *%src, double %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: stdy %f0, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 65535
@@ -44,7 +44,7 @@ define void @f4(double *%src, double %val) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f5(double *%src, double %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define void @f5(double *%src, double %val) {
 
 ; Check the high end of the negative aligned STDY range.
 define void @f6(double *%src, double %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: stdy %f0, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 -1
@@ -65,7 +65,7 @@ define void @f6(double *%src, double %val) {
 
 ; Check the low end of the STDY range.
 define void @f7(double *%src, double %val) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: stdy %f0, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%src, i64 -65536
@@ -76,7 +76,7 @@ define void @f7(double *%src, double %val) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f8(double *%src, double %val) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524296
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: br %r14
@@ -87,7 +87,7 @@ define void @f8(double *%src, double %val) {
 
 ; Check that STD allows an index.
 define void @f9(i64 %src, i64 %index, double %val) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: std %f0, 4095({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -99,7 +99,7 @@ define void @f9(i64 %src, i64 %index, double %val) {
 
 ; Check that STDY allows an index.
 define void @f10(i64 %src, i64 %index, double %val) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: stdy %f0, 4096({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/fp-move-08.ll b/test/CodeGen/SystemZ/fp-move-08.ll
index 448d2ace1762..88038abc0dab 100644
--- a/test/CodeGen/SystemZ/fp-move-08.ll
+++ b/test/CodeGen/SystemZ/fp-move-08.ll
@@ -4,7 +4,7 @@
 
 ; Check stores with no offset.
 define void @f1(i64 %src, double %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
@@ -16,7 +16,7 @@ define void @f1(i64 %src, double %val) {
 
 ; Check the highest aligned offset that allows STD for both halves.
 define void @f2(i64 %src, double %val) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: std %f0, 4080(%r2)
 ; CHECK: std %f2, 4088(%r2)
 ; CHECK: br %r14
@@ -29,7 +29,7 @@ define void @f2(i64 %src, double %val) {
 
 ; Check the next doubleword up, which requires a mixture of STD and STDY.
 define void @f3(i64 %src, double %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: std %f0, 4088(%r2)
 ; CHECK: stdy %f2, 4096(%r2)
 ; CHECK: br %r14
@@ -42,7 +42,7 @@ define void @f3(i64 %src, double %val) {
 
 ; Check the next doubleword after that, which requires STDY for both halves.
 define void @f4(i64 %src, double %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: stdy %f0, 4096(%r2)
 ; CHECK: stdy %f2, 4104(%r2)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define void @f4(i64 %src, double %val) {
 
 ; Check the highest aligned offset that allows STDY for both halves.
 define void @f5(i64 %src, double %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: stdy %f0, 524272(%r2)
 ; CHECK: stdy %f2, 524280(%r2)
 ; CHECK: br %r14
@@ -69,7 +69,7 @@ define void @f5(i64 %src, double %val) {
 ; Check the next doubleword up, which requires separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f6(i64 %src, double %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lay %r1, 524280(%r2)
 ; CHECK: std %f0, 0(%r1)
 ; CHECK: std %f2, 8(%r1)
@@ -84,7 +84,7 @@ define void @f6(i64 %src, double %val) {
 ; Check the highest aligned negative offset, which needs a combination of
 ; STDY and STD.
 define void @f7(i64 %src, double %val) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: stdy %f0, -8(%r2)
 ; CHECK: std %f2, 0(%r2)
 ; CHECK: br %r14
@@ -97,7 +97,7 @@ define void @f7(i64 %src, double %val) {
 
 ; Check the next doubleword down, which requires STDY for both halves.
 define void @f8(i64 %src, double %val) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: stdy %f0, -16(%r2)
 ; CHECK: stdy %f2, -8(%r2)
 ; CHECK: br %r14
@@ -110,7 +110,7 @@ define void @f8(i64 %src, double %val) {
 
 ; Check the lowest offset that allows STDY for both halves.
 define void @f9(i64 %src, double %val) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: stdy %f0, -524288(%r2)
 ; CHECK: stdy %f2, -524280(%r2)
 ; CHECK: br %r14
@@ -124,7 +124,7 @@ define void @f9(i64 %src, double %val) {
 ; Check the next doubleword down, which requires separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f10(i64 %src, double %val) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r2, -524296
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -138,7 +138,7 @@ define void @f10(i64 %src, double %val) {
 
 ; Check that indices are allowed.
 define void @f11(i64 %src, i64 %index, double %val) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: std %f0, 4088({{%r2,%r3|%r3,%r2}})
 ; CHECK: stdy %f2, 4096({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-move-09.ll b/test/CodeGen/SystemZ/fp-move-09.ll
new file mode 100644
index 000000000000..52b2ee2e31ab
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-09.ll
@@ -0,0 +1,62 @@
+; Test moves between FPRs and GPRs for z196 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check that moves from i32s to floats can use high registers.
+define float @f1(i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: llhh [[REG:%r[0-5]]], 0(%r2)
+; CHECK: oihh [[REG]], 16256
+; CHECK: ldgr %f0, [[REG]]
+; CHECK: br %r14
+  %base = load i16 *%ptr
+  %ext = zext i16 %base to i32
+  %full = or i32 %ext, 1065353216
+  %res = bitcast i32 %full to float
+  ret float %res
+}
+
+; Check that moves from floats to i32s can use high registers.
+; This "store the low byte" technique is used by llvmpipe, for example.
+define void @f2(float %val, i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: lgdr [[REG:%r[0-5]]], %f0
+; CHECK: stch [[REG]], 0(%r2)
+; CHECK: br %r14
+  %res = bitcast float %val to i32
+  %trunc = trunc i32 %res to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Like f2, but with a conditional store.
+define void @f3(float %val, i8 *%ptr, i32 %which) {
+; CHECK-LABEL: f3:
+; CHECK: cijlh %r3, 0,
+; CHECK: lgdr [[REG:%r[0-5]]], %f0
+; CHECK: stch [[REG]], 0(%r2)
+; CHECK: br %r14
+  %int = bitcast float %val to i32
+  %trunc = trunc i32 %int to i8
+  %old = load i8 *%ptr
+  %cmp = icmp eq i32 %which, 0
+  %res = select i1 %cmp, i8 %trunc, i8 %old
+  store i8 %res, i8 *%ptr
+  ret void
+}
+
+; ...and again with 16-bit memory.
+define void @f4(float %val, i16 *%ptr, i32 %which) {
+; CHECK-LABEL: f4:
+; CHECK: cijlh %r3, 0,
+; CHECK: lgdr [[REG:%r[0-5]]], %f0
+; CHECK: sthh [[REG]], 0(%r2)
+; CHECK: br %r14
+  %int = bitcast float %val to i32
+  %trunc = trunc i32 %int to i16
+  %old = load i16 *%ptr
+  %cmp = icmp eq i32 %which, 0
+  %res = select i1 %cmp, i16 %trunc, i16 %old
+  store i16 %res, i16 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-01.ll b/test/CodeGen/SystemZ/fp-mul-01.ll
index 68c78ee2da6b..7562d6bf071b 100644
--- a/test/CodeGen/SystemZ/fp-mul-01.ll
+++ b/test/CodeGen/SystemZ/fp-mul-01.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare float @foo()
+
 ; Check register multiplication.
 define float @f1(float %f1, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: meebr %f0, %f2
 ; CHECK: br %r14
   %res = fmul float %f1, %f2
@@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) {
 
 ; Check the low end of the MEEB range.
 define float @f2(float %f1, float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: meeb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load float *%ptr
@@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) {
 
 ; Check the high end of the aligned MEEB range.
 define float @f3(float %f1, float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: meeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define float @f4(float %f1, float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: meeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define float @f5(float %f1, float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: meeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) {
 
 ; Check that MEEB allows indices.
 define float @f6(float %f1, float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: meeb %f0, 400(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) {
   %res = fmul float %f1, %f2
   ret float %res
 }
+
+; Check that multiplications of spilled values can use MEEB rather than MEEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: meeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%ptr0, i64 2
+  %ptr2 = getelementptr float *%ptr0, i64 4
+  %ptr3 = getelementptr float *%ptr0, i64 6
+  %ptr4 = getelementptr float *%ptr0, i64 8
+  %ptr5 = getelementptr float *%ptr0, i64 10
+  %ptr6 = getelementptr float *%ptr0, i64 12
+  %ptr7 = getelementptr float *%ptr0, i64 14
+  %ptr8 = getelementptr float *%ptr0, i64 16
+  %ptr9 = getelementptr float *%ptr0, i64 18
+  %ptr10 = getelementptr float *%ptr0, i64 20
+
+  %val0 = load float *%ptr0
+  %val1 = load float *%ptr1
+  %val2 = load float *%ptr2
+  %val3 = load float *%ptr3
+  %val4 = load float *%ptr4
+  %val5 = load float *%ptr5
+  %val6 = load float *%ptr6
+  %val7 = load float *%ptr7
+  %val8 = load float *%ptr8
+  %val9 = load float *%ptr9
+  %val10 = load float *%ptr10
+
+  %ret = call float @foo()
+
+  %mul0 = fmul float %ret, %val0
+  %mul1 = fmul float %mul0, %val1
+  %mul2 = fmul float %mul1, %val2
+  %mul3 = fmul float %mul2, %val3
+  %mul4 = fmul float %mul3, %val4
+  %mul5 = fmul float %mul4, %val5
+  %mul6 = fmul float %mul5, %val6
+  %mul7 = fmul float %mul6, %val7
+  %mul8 = fmul float %mul7, %val8
+  %mul9 = fmul float %mul8, %val9
+  %mul10 = fmul float %mul9, %val10
+
+  ret float %mul10
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-02.ll b/test/CodeGen/SystemZ/fp-mul-02.ll
index ec51a4c1d679..cf4448fd7dd1 100644
--- a/test/CodeGen/SystemZ/fp-mul-02.ll
+++ b/test/CodeGen/SystemZ/fp-mul-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare float @foo()
+
 ; Check register multiplication.
 define double @f1(float %f1, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mdebr %f0, %f2
 ; CHECK: br %r14
   %f1x = fpext float %f1 to double
@@ -15,7 +17,7 @@ define double @f1(float %f1, float %f2) {
 
 ; Check the low end of the MDEB range.
 define double @f2(float %f1, float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mdeb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load float *%ptr
@@ -27,7 +29,7 @@ define double @f2(float %f1, float *%ptr) {
 
 ; Check the high end of the aligned MDEB range.
 define double @f3(float %f1, float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mdeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -41,7 +43,7 @@ define double @f3(float %f1, float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(float %f1, float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: mdeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -55,7 +57,7 @@ define double @f4(float %f1, float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define double @f5(float %f1, float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: mdeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -69,7 +71,7 @@ define double @f5(float %f1, float *%base) {
 
 ; Check that MDEB allows indices.
 define double @f6(float %f1, float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: mdeb %f0, 400(%r1,%r2)
 ; CHECK: br %r14
@@ -81,3 +83,121 @@ define double @f6(float %f1, float *%base, i64 %index) {
   %res = fmul double %f1x, %f2x
   ret double %res
 }
+
+; Check that multiplications of spilled values can use MDEB rather than MDEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mdeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%ptr0, i64 2
+  %ptr2 = getelementptr float *%ptr0, i64 4
+  %ptr3 = getelementptr float *%ptr0, i64 6
+  %ptr4 = getelementptr float *%ptr0, i64 8
+  %ptr5 = getelementptr float *%ptr0, i64 10
+  %ptr6 = getelementptr float *%ptr0, i64 12
+  %ptr7 = getelementptr float *%ptr0, i64 14
+  %ptr8 = getelementptr float *%ptr0, i64 16
+  %ptr9 = getelementptr float *%ptr0, i64 18
+  %ptr10 = getelementptr float *%ptr0, i64 20
+
+  %val0 = load float *%ptr0
+  %val1 = load float *%ptr1
+  %val2 = load float *%ptr2
+  %val3 = load float *%ptr3
+  %val4 = load float *%ptr4
+  %val5 = load float *%ptr5
+  %val6 = load float *%ptr6
+  %val7 = load float *%ptr7
+  %val8 = load float *%ptr8
+  %val9 = load float *%ptr9
+  %val10 = load float *%ptr10
+
+  %frob0 = fadd float %val0, %val0
+  %frob1 = fadd float %val1, %val1
+  %frob2 = fadd float %val2, %val2
+  %frob3 = fadd float %val3, %val3
+  %frob4 = fadd float %val4, %val4
+  %frob5 = fadd float %val5, %val5
+  %frob6 = fadd float %val6, %val6
+  %frob7 = fadd float %val7, %val7
+  %frob8 = fadd float %val8, %val8
+  %frob9 = fadd float %val9, %val9
+  %frob10 = fadd float %val9, %val10
+
+  store float %frob0, float *%ptr0
+  store float %frob1, float *%ptr1
+  store float %frob2, float *%ptr2
+  store float %frob3, float *%ptr3
+  store float %frob4, float *%ptr4
+  store float %frob5, float *%ptr5
+  store float %frob6, float *%ptr6
+  store float %frob7, float *%ptr7
+  store float %frob8, float *%ptr8
+  store float %frob9, float *%ptr9
+  store float %frob10, float *%ptr10
+
+  %ret = call float @foo()
+
+  %accext0 = fpext float %ret to double
+  %ext0 = fpext float %frob0 to double
+  %mul0 = fmul double %accext0, %ext0
+  %extra0 = fmul double %mul0, 1.01
+  %trunc0 = fptrunc double %extra0 to float
+
+  %accext1 = fpext float %trunc0 to double
+  %ext1 = fpext float %frob1 to double
+  %mul1 = fmul double %accext1, %ext1
+  %extra1 = fmul double %mul1, 1.11
+  %trunc1 = fptrunc double %extra1 to float
+
+  %accext2 = fpext float %trunc1 to double
+  %ext2 = fpext float %frob2 to double
+  %mul2 = fmul double %accext2, %ext2
+  %extra2 = fmul double %mul2, 1.21
+  %trunc2 = fptrunc double %extra2 to float
+
+  %accext3 = fpext float %trunc2 to double
+  %ext3 = fpext float %frob3 to double
+  %mul3 = fmul double %accext3, %ext3
+  %extra3 = fmul double %mul3, 1.31
+  %trunc3 = fptrunc double %extra3 to float
+
+  %accext4 = fpext float %trunc3 to double
+  %ext4 = fpext float %frob4 to double
+  %mul4 = fmul double %accext4, %ext4
+  %extra4 = fmul double %mul4, 1.41
+  %trunc4 = fptrunc double %extra4 to float
+
+  %accext5 = fpext float %trunc4 to double
+  %ext5 = fpext float %frob5 to double
+  %mul5 = fmul double %accext5, %ext5
+  %extra5 = fmul double %mul5, 1.51
+  %trunc5 = fptrunc double %extra5 to float
+
+  %accext6 = fpext float %trunc5 to double
+  %ext6 = fpext float %frob6 to double
+  %mul6 = fmul double %accext6, %ext6
+  %extra6 = fmul double %mul6, 1.61
+  %trunc6 = fptrunc double %extra6 to float
+
+  %accext7 = fpext float %trunc6 to double
+  %ext7 = fpext float %frob7 to double
+  %mul7 = fmul double %accext7, %ext7
+  %extra7 = fmul double %mul7, 1.71
+  %trunc7 = fptrunc double %extra7 to float
+
+  %accext8 = fpext float %trunc7 to double
+  %ext8 = fpext float %frob8 to double
+  %mul8 = fmul double %accext8, %ext8
+  %extra8 = fmul double %mul8, 1.81
+  %trunc8 = fptrunc double %extra8 to float
+
+  %accext9 = fpext float %trunc8 to double
+  %ext9 = fpext float %frob9 to double
+  %mul9 = fmul double %accext9, %ext9
+  %extra9 = fmul double %mul9, 1.91
+  %trunc9 = fptrunc double %extra9 to float
+
+  ret float %trunc9
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-03.ll b/test/CodeGen/SystemZ/fp-mul-03.ll
index 9849247deccb..6d296f07d1f2 100644
--- a/test/CodeGen/SystemZ/fp-mul-03.ll
+++ b/test/CodeGen/SystemZ/fp-mul-03.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare double @foo()
+
 ; Check register multiplication.
 define double @f1(double %f1, double %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mdbr %f0, %f2
 ; CHECK: br %r14
   %res = fmul double %f1, %f2
@@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) {
 
 ; Check the low end of the MDB range.
 define double @f2(double %f1, double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mdb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load double *%ptr
@@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) {
 
 ; Check the high end of the aligned MDB range.
 define double @f3(double %f1, double *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mdb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 511
@@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(double %f1, double *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: mdb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define double @f5(double %f1, double *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: mdb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) {
 
 ; Check that MDB allows indices.
 define double @f6(double %f1, double *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: mdb %f0, 800(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) {
   %res = fmul double %f1, %f2
   ret double %res
 }
+
+; Check that multiplications of spilled values can use MDB rather than MDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mdb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%ptr0, i64 2
+  %ptr2 = getelementptr double *%ptr0, i64 4
+  %ptr3 = getelementptr double *%ptr0, i64 6
+  %ptr4 = getelementptr double *%ptr0, i64 8
+  %ptr5 = getelementptr double *%ptr0, i64 10
+  %ptr6 = getelementptr double *%ptr0, i64 12
+  %ptr7 = getelementptr double *%ptr0, i64 14
+  %ptr8 = getelementptr double *%ptr0, i64 16
+  %ptr9 = getelementptr double *%ptr0, i64 18
+  %ptr10 = getelementptr double *%ptr0, i64 20
+
+  %val0 = load double *%ptr0
+  %val1 = load double *%ptr1
+  %val2 = load double *%ptr2
+  %val3 = load double *%ptr3
+  %val4 = load double *%ptr4
+  %val5 = load double *%ptr5
+  %val6 = load double *%ptr6
+  %val7 = load double *%ptr7
+  %val8 = load double *%ptr8
+  %val9 = load double *%ptr9
+  %val10 = load double *%ptr10
+
+  %ret = call double @foo()
+
+  %mul0 = fmul double %ret, %val0
+  %mul1 = fmul double %mul0, %val1
+  %mul2 = fmul double %mul1, %val2
+  %mul3 = fmul double %mul2, %val3
+  %mul4 = fmul double %mul3, %val4
+  %mul5 = fmul double %mul4, %val5
+  %mul6 = fmul double %mul5, %val6
+  %mul7 = fmul double %mul6, %val7
+  %mul8 = fmul double %mul7, %val8
+  %mul9 = fmul double %mul8, %val9
+  %mul10 = fmul double %mul9, %val10
+
+  ret double %mul10
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-04.ll b/test/CodeGen/SystemZ/fp-mul-04.ll
index 712ead85cbd4..3c4325e6cbbb 100644
--- a/test/CodeGen/SystemZ/fp-mul-04.ll
+++ b/test/CodeGen/SystemZ/fp-mul-04.ll
@@ -2,11 +2,13 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare double @foo()
+
 ; Check register multiplication.  "mxdbr %f0, %f2" is not valid from LLVM's
 ; point of view, because %f2 is the low register of the FP128 %f0.  Pass the
 ; multiplier in %f4 instead.
 define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mxdbr %f0, %f4
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
@@ -20,7 +22,7 @@ define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) {
 
 ; Check the low end of the MXDB range.
 define void @f2(double %f1, double *%ptr, fp128 *%dst) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mxdb %f0, 0(%r2)
 ; CHECK: std %f0, 0(%r3)
 ; CHECK: std %f2, 8(%r3)
@@ -35,7 +37,7 @@ define void @f2(double %f1, double *%ptr, fp128 *%dst) {
 
 ; Check the high end of the aligned MXDB range.
 define void @f3(double %f1, double *%base, fp128 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mxdb %f0, 4088(%r2)
 ; CHECK: std %f0, 0(%r3)
 ; CHECK: std %f2, 8(%r3)
@@ -52,7 +54,7 @@ define void @f3(double %f1, double *%base, fp128 *%dst) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f4(double %f1, double *%base, fp128 *%dst) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: mxdb %f0, 0(%r2)
 ; CHECK: std %f0, 0(%r3)
@@ -69,7 +71,7 @@ define void @f4(double %f1, double *%base, fp128 *%dst) {
 
 ; Check negative displacements, which also need separate address logic.
 define void @f5(double %f1, double *%base, fp128 *%dst) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: mxdb %f0, 0(%r2)
 ; CHECK: std %f0, 0(%r3)
@@ -86,7 +88,7 @@ define void @f5(double %f1, double *%base, fp128 *%dst) {
 
 ; Check that MXDB allows indices.
 define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: mxdb %f0, 800(%r1,%r2)
 ; CHECK: std %f0, 0(%r4)
@@ -101,3 +103,131 @@ define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) {
   store fp128 %res, fp128 *%dst
   ret void
 }
+
+; Check that multiplications of spilled values can use MXDB rather than MXDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mxdb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%ptr0, i64 2
+  %ptr2 = getelementptr double *%ptr0, i64 4
+  %ptr3 = getelementptr double *%ptr0, i64 6
+  %ptr4 = getelementptr double *%ptr0, i64 8
+  %ptr5 = getelementptr double *%ptr0, i64 10
+  %ptr6 = getelementptr double *%ptr0, i64 12
+  %ptr7 = getelementptr double *%ptr0, i64 14
+  %ptr8 = getelementptr double *%ptr0, i64 16
+  %ptr9 = getelementptr double *%ptr0, i64 18
+  %ptr10 = getelementptr double *%ptr0, i64 20
+
+  %val0 = load double *%ptr0
+  %val1 = load double *%ptr1
+  %val2 = load double *%ptr2
+  %val3 = load double *%ptr3
+  %val4 = load double *%ptr4
+  %val5 = load double *%ptr5
+  %val6 = load double *%ptr6
+  %val7 = load double *%ptr7
+  %val8 = load double *%ptr8
+  %val9 = load double *%ptr9
+  %val10 = load double *%ptr10
+
+  %frob0 = fadd double %val0, %val0
+  %frob1 = fadd double %val1, %val1
+  %frob2 = fadd double %val2, %val2
+  %frob3 = fadd double %val3, %val3
+  %frob4 = fadd double %val4, %val4
+  %frob5 = fadd double %val5, %val5
+  %frob6 = fadd double %val6, %val6
+  %frob7 = fadd double %val7, %val7
+  %frob8 = fadd double %val8, %val8
+  %frob9 = fadd double %val9, %val9
+  %frob10 = fadd double %val9, %val10
+
+  store double %frob0, double *%ptr0
+  store double %frob1, double *%ptr1
+  store double %frob2, double *%ptr2
+  store double %frob3, double *%ptr3
+  store double %frob4, double *%ptr4
+  store double %frob5, double *%ptr5
+  store double %frob6, double *%ptr6
+  store double %frob7, double *%ptr7
+  store double %frob8, double *%ptr8
+  store double %frob9, double *%ptr9
+  store double %frob10, double *%ptr10
+
+  %ret = call double @foo()
+
+  %accext0 = fpext double %ret to fp128
+  %ext0 = fpext double %frob0 to fp128
+  %mul0 = fmul fp128 %accext0, %ext0
+  %const0 = fpext double 1.01 to fp128
+  %extra0 = fmul fp128 %mul0, %const0
+  %trunc0 = fptrunc fp128 %extra0 to double
+
+  %accext1 = fpext double %trunc0 to fp128
+  %ext1 = fpext double %frob1 to fp128
+  %mul1 = fmul fp128 %accext1, %ext1
+  %const1 = fpext double 1.11 to fp128
+  %extra1 = fmul fp128 %mul1, %const1
+  %trunc1 = fptrunc fp128 %extra1 to double
+
+  %accext2 = fpext double %trunc1 to fp128
+  %ext2 = fpext double %frob2 to fp128
+  %mul2 = fmul fp128 %accext2, %ext2
+  %const2 = fpext double 1.21 to fp128
+  %extra2 = fmul fp128 %mul2, %const2
+  %trunc2 = fptrunc fp128 %extra2 to double
+
+  %accext3 = fpext double %trunc2 to fp128
+  %ext3 = fpext double %frob3 to fp128
+  %mul3 = fmul fp128 %accext3, %ext3
+  %const3 = fpext double 1.31 to fp128
+  %extra3 = fmul fp128 %mul3, %const3
+  %trunc3 = fptrunc fp128 %extra3 to double
+
+  %accext4 = fpext double %trunc3 to fp128
+  %ext4 = fpext double %frob4 to fp128
+  %mul4 = fmul fp128 %accext4, %ext4
+  %const4 = fpext double 1.41 to fp128
+  %extra4 = fmul fp128 %mul4, %const4
+  %trunc4 = fptrunc fp128 %extra4 to double
+
+  %accext5 = fpext double %trunc4 to fp128
+  %ext5 = fpext double %frob5 to fp128
+  %mul5 = fmul fp128 %accext5, %ext5
+  %const5 = fpext double 1.51 to fp128
+  %extra5 = fmul fp128 %mul5, %const5
+  %trunc5 = fptrunc fp128 %extra5 to double
+
+  %accext6 = fpext double %trunc5 to fp128
+  %ext6 = fpext double %frob6 to fp128
+  %mul6 = fmul fp128 %accext6, %ext6
+  %const6 = fpext double 1.61 to fp128
+  %extra6 = fmul fp128 %mul6, %const6
+  %trunc6 = fptrunc fp128 %extra6 to double
+
+  %accext7 = fpext double %trunc6 to fp128
+  %ext7 = fpext double %frob7 to fp128
+  %mul7 = fmul fp128 %accext7, %ext7
+  %const7 = fpext double 1.71 to fp128
+  %extra7 = fmul fp128 %mul7, %const7
+  %trunc7 = fptrunc fp128 %extra7 to double
+
+  %accext8 = fpext double %trunc7 to fp128
+  %ext8 = fpext double %frob8 to fp128
+  %mul8 = fmul fp128 %accext8, %ext8
+  %const8 = fpext double 1.81 to fp128
+  %extra8 = fmul fp128 %mul8, %const8
+  %trunc8 = fptrunc fp128 %extra8 to double
+
+  %accext9 = fpext double %trunc8 to fp128
+  %ext9 = fpext double %frob9 to fp128
+  %mul9 = fmul fp128 %accext9, %ext9
+  %const9 = fpext double 1.91 to fp128
+  %extra9 = fmul fp128 %mul9, %const9
+  %trunc9 = fptrunc fp128 %extra9 to double
+
+  ret double %trunc9
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-05.ll b/test/CodeGen/SystemZ/fp-mul-05.ll
index df5bc4e70755..0be1fe8b41a0 100644
--- a/test/CodeGen/SystemZ/fp-mul-05.ll
+++ b/test/CodeGen/SystemZ/fp-mul-05.ll
@@ -4,7 +4,7 @@
 
 ; There is no memory form of 128-bit multiplication.
 define void @f1(fp128 *%ptr, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lxebr %f0, %f0
 ; CHECK: ld %f1, 0(%r2)
 ; CHECK: ld %f3, 8(%r2)
diff --git a/test/CodeGen/SystemZ/fp-mul-06.ll b/test/CodeGen/SystemZ/fp-mul-06.ll
index 8124c680371d..3f631a68b575 100644
--- a/test/CodeGen/SystemZ/fp-mul-06.ll
+++ b/test/CodeGen/SystemZ/fp-mul-06.ll
@@ -3,7 +3,7 @@
 declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
 
 define float @f1(float %f1, float %f2, float %acc) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: maebr %f4, %f0, %f2
 ; CHECK: ler %f0, %f4
 ; CHECK: br %r14
@@ -12,7 +12,7 @@ define float @f1(float %f1, float %f2, float %acc) {
 }
 
 define float @f2(float %f1, float *%ptr, float %acc) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: maeb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
@@ -22,7 +22,7 @@ define float @f2(float %f1, float *%ptr, float %acc) {
 }
 
 define float @f3(float %f1, float *%base, float %acc) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: maeb %f2, %f0, 4092(%r2)
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
@@ -36,7 +36,7 @@ define float @f4(float %f1, float *%base, float %acc) {
 ; The important thing here is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: maeb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
@@ -51,7 +51,7 @@ define float @f5(float %f1, float *%base, float %acc) {
 ; Here too the important thing is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: maeb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
@@ -63,7 +63,7 @@ define float @f5(float %f1, float *%base, float %acc) {
 }
 
 define float @f6(float %f1, float *%base, i64 %index, float %acc) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: maeb %f2, %f0, 0(%r1,%r2)
 ; CHECK: ler %f0, %f2
@@ -75,7 +75,7 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) {
 }
 
 define float @f7(float %f1, float *%base, i64 %index, float %acc) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
 ; CHECK: ler %f0, %f2
@@ -88,7 +88,7 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) {
 }
 
 define float @f8(float %f1, float *%base, i64 %index, float %acc) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
 ; CHECK: maeb %f2, %f0, 0(%r1)
diff --git a/test/CodeGen/SystemZ/fp-mul-07.ll b/test/CodeGen/SystemZ/fp-mul-07.ll
index b8e44830f331..e4f590447215 100644
--- a/test/CodeGen/SystemZ/fp-mul-07.ll
+++ b/test/CodeGen/SystemZ/fp-mul-07.ll
@@ -3,7 +3,7 @@
 declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
 
 define double @f1(double %f1, double %f2, double %acc) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: madbr %f4, %f0, %f2
 ; CHECK: ldr %f0, %f4
 ; CHECK: br %r14
@@ -12,7 +12,7 @@ define double @f1(double %f1, double %f2, double %acc) {
 }
 
 define double @f2(double %f1, double *%ptr, double %acc) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: madb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
@@ -22,7 +22,7 @@ define double @f2(double %f1, double *%ptr, double %acc) {
 }
 
 define double @f3(double %f1, double *%base, double %acc) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: madb %f2, %f0, 4088(%r2)
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
@@ -36,7 +36,7 @@ define double @f4(double %f1, double *%base, double %acc) {
 ; The important thing here is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: madb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
@@ -51,7 +51,7 @@ define double @f5(double %f1, double *%base, double %acc) {
 ; Here too the important thing is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: madb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
@@ -63,7 +63,7 @@ define double @f5(double %f1, double *%base, double %acc) {
 }
 
 define double @f6(double %f1, double *%base, i64 %index, double %acc) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: madb %f2, %f0, 0(%r1,%r2)
 ; CHECK: ldr %f0, %f2
@@ -75,7 +75,7 @@ define double @f6(double %f1, double *%base, i64 %index, double %acc) {
 }
 
 define double @f7(double %f1, double *%base, i64 %index, double %acc) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
 ; CHECK: ldr %f0, %f2
@@ -88,7 +88,7 @@ define double @f7(double %f1, double *%base, i64 %index, double %acc) {
 }
 
 define double @f8(double %f1, double *%base, i64 %index, double %acc) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
 ; CHECK: madb %f2, %f0, 0(%r1)
diff --git a/test/CodeGen/SystemZ/fp-mul-08.ll b/test/CodeGen/SystemZ/fp-mul-08.ll
index 5c1474063a16..ab5fcb2cbefd 100644
--- a/test/CodeGen/SystemZ/fp-mul-08.ll
+++ b/test/CodeGen/SystemZ/fp-mul-08.ll
@@ -3,7 +3,7 @@
 declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
 
 define float @f1(float %f1, float %f2, float %acc) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: msebr %f4, %f0, %f2
 ; CHECK: ler %f0, %f4
 ; CHECK: br %r14
@@ -13,7 +13,7 @@ define float @f1(float %f1, float %f2, float %acc) {
 }
 
 define float @f2(float %f1, float *%ptr, float %acc) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mseb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
@@ -24,7 +24,7 @@ define float @f2(float %f1, float *%ptr, float %acc) {
 }
 
 define float @f3(float %f1, float *%base, float %acc) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mseb %f2, %f0, 4092(%r2)
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
@@ -39,7 +39,7 @@ define float @f4(float %f1, float *%base, float %acc) {
 ; The important thing here is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: mseb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
@@ -55,7 +55,7 @@ define float @f5(float %f1, float *%base, float %acc) {
 ; Here too the important thing is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: mseb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
@@ -68,7 +68,7 @@ define float @f5(float %f1, float *%base, float %acc) {
 }
 
 define float @f6(float %f1, float *%base, i64 %index, float %acc) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: mseb %f2, %f0, 0(%r1,%r2)
 ; CHECK: ler %f0, %f2
@@ -81,7 +81,7 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) {
 }
 
 define float @f7(float %f1, float *%base, i64 %index, float %acc) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
 ; CHECK: ler %f0, %f2
@@ -95,7 +95,7 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) {
 }
 
 define float @f8(float %f1, float *%base, i64 %index, float %acc) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
 ; CHECK: mseb %f2, %f0, 0(%r1)
diff --git a/test/CodeGen/SystemZ/fp-mul-09.ll b/test/CodeGen/SystemZ/fp-mul-09.ll
index bcae1e35e6eb..7e740968a8c7 100644
--- a/test/CodeGen/SystemZ/fp-mul-09.ll
+++ b/test/CodeGen/SystemZ/fp-mul-09.ll
@@ -3,7 +3,7 @@
 declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
 
 define double @f1(double %f1, double %f2, double %acc) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: msdbr %f4, %f0, %f2
 ; CHECK: ldr %f0, %f4
 ; CHECK: br %r14
@@ -13,7 +13,7 @@ define double @f1(double %f1, double %f2, double %acc) {
 }
 
 define double @f2(double %f1, double *%ptr, double %acc) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: msdb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
@@ -24,7 +24,7 @@ define double @f2(double %f1, double *%ptr, double %acc) {
 }
 
 define double @f3(double %f1, double *%base, double %acc) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: msdb %f2, %f0, 4088(%r2)
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
@@ -39,7 +39,7 @@ define double @f4(double %f1, double *%base, double %acc) {
 ; The important thing here is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: msdb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
@@ -55,7 +55,7 @@ define double @f5(double %f1, double *%base, double %acc) {
 ; Here too the important thing is that we don't generate an out-of-range
 ; displacement.  Other sequences besides this one would be OK.
 ;
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: msdb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
@@ -68,7 +68,7 @@ define double @f5(double %f1, double *%base, double %acc) {
 }
 
 define double @f6(double %f1, double *%base, i64 %index, double %acc) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: msdb %f2, %f0, 0(%r1,%r2)
 ; CHECK: ldr %f0, %f2
@@ -81,7 +81,7 @@ define double @f6(double %f1, double *%base, i64 %index, double %acc) {
 }
 
 define double @f7(double %f1, double *%base, i64 %index, double %acc) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
 ; CHECK: ldr %f0, %f2
@@ -95,7 +95,7 @@ define double @f7(double %f1, double *%base, i64 %index, double %acc) {
 }
 
 define double @f8(double %f1, double *%base, i64 %index, double %acc) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
 ; CHECK: msdb %f2, %f0, 0(%r1)
diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll
index 09a4a53e41d1..1cc6d816fee3 100644
--- a/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -4,7 +4,7 @@
 
 ; Test f32.
 define float @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lcebr %f0, %f0
 ; CHECK: br %r14
   %res = fsub float -0.0, %f
@@ -13,7 +13,7 @@ define float @f1(float %f) {
 
 ; Test f64.
 define double @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lcdbr %f0, %f0
 ; CHECK: br %r14
   %res = fsub double -0.0, %f
@@ -24,7 +24,7 @@ define double @f2(double %f) {
 ; be better implemented using an XI on the upper byte.  Do some extra
 ; processing so that using FPRs is unequivocally better.
 define void @f3(fp128 *%ptr, fp128 *%ptr2) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lcxbr
 ; CHECK: dxbr
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/fp-round-01.ll b/test/CodeGen/SystemZ/fp-round-01.ll
index 20325c336648..565db5ad4f51 100644
--- a/test/CodeGen/SystemZ/fp-round-01.ll
+++ b/test/CodeGen/SystemZ/fp-round-01.ll
@@ -1,32 +1,31 @@
-; Test rint()-like rounding, with non-integer values triggering an
-; inexact condition.
+; Test rounding functions for z10.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
-; Test f32.
+; Test rint for f32.
 declare float @llvm.rint.f32(float %f)
 define float @f1(float %f) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: fiebr %f0, 0, %f0
 ; CHECK: br %r14
   %res = call float @llvm.rint.f32(float %f)
   ret float %res
 }
 
-; Test f64.
+; Test rint for f64.
 declare double @llvm.rint.f64(double %f)
 define double @f2(double %f) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: fidbr %f0, 0, %f0
 ; CHECK: br %r14
   %res = call double @llvm.rint.f64(double %f)
   ret double %res
 }
 
-; Test f128.
+; Test rint for f128.
 declare fp128 @llvm.rint.f128(fp128 %f)
 define void @f3(fp128 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: fixbr %f0, 0, %f0
 ; CHECK: br %r14
   %src = load fp128 *%ptr
@@ -34,3 +33,118 @@ define void @f3(fp128 *%ptr) {
   store fp128 %res, fp128 *%ptr
   ret void
 }
+
+; Test nearbyint for f32.
+declare float @llvm.nearbyint.f32(float %f)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, nearbyintf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.nearbyint.f32(float %f)
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.nearbyint.f64(double %f)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, nearbyint@PLT
+; CHECK: br %r14
+  %res = call double @llvm.nearbyint.f64(double %f)
+  ret double %res
+}
+
+; Test nearbyint for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test floor for f32.
+declare float @llvm.floor.f32(float %f)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, floorf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.floor.f32(float %f)
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.floor.f64(double %f)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, floor@PLT
+; CHECK: br %r14
+  %res = call double @llvm.floor.f64(double %f)
+  ret double %res
+}
+
+; Test floor for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test ceil for f32.
+declare float @llvm.ceil.f32(float %f)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, ceilf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.ceil.f32(float %f)
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.ceil.f64(double %f)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, ceil@PLT
+; CHECK: br %r14
+  %res = call double @llvm.ceil.f64(double %f)
+  ret double %res
+}
+
+; Test ceil for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test trunc for f32.
+declare float @llvm.trunc.f32(float %f)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, truncf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.trunc.f32(float %f)
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.trunc.f64(double %f)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, trunc@PLT
+; CHECK: br %r14
+  %res = call double @llvm.trunc.f64(double %f)
+  ret double %res
+}
+
+; Test trunc for f128: omitted for now because we cannot handle
+; indirect arguments.
+
+; Test round for f32.
+declare float @llvm.round.f32(float %f)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, roundf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.round.f32(float %f)
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.round.f64(double %f)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, round@PLT
+; CHECK: br %r14
+  %res = call double @llvm.round.f64(double %f)
+  ret double %res
+}
+
+; Test round for f128: omitted for now because we cannot handle
+; indirect arguments.
diff --git a/test/CodeGen/SystemZ/fp-round-02.ll b/test/CodeGen/SystemZ/fp-round-02.ll
new file mode 100644
index 000000000000..d79c9c47050a
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-round-02.ll
@@ -0,0 +1,195 @@
+; Test rounding functions for z196 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.rint.f32(float %f)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.rint.f32(float %f)
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.rint.f64(double %f)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.rint.f64(double %f)
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.rint.f128(fp128 %f)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.rint.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.nearbyint.f32(float %f)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.nearbyint.f32(float %f)
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.nearbyint.f64(double %f)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.nearbyint.f64(double %f)
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.nearbyint.f128(fp128 %f)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: fixbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.nearbyint.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.floor.f32(float %f)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.floor.f32(float %f)
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.floor.f64(double %f)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.floor.f64(double %f)
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.floor.f128(fp128 %f)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: fixbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.floor.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.ceil.f32(float %f)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.ceil.f32(float %f)
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.ceil.f64(double %f)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.ceil.f64(double %f)
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.ceil.f128(fp128 %f)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: fixbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.ceil.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.trunc.f32(float %f)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.trunc.f32(float %f)
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.trunc.f64(double %f)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.trunc.f64(double %f)
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.trunc.f128(fp128 %f)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: fixbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.trunc.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.round.f32(float %f)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.round.f32(float %f)
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.round.f64(double %f)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.round.f64(double %f)
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.round.f128(fp128 %f)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: fixbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.round.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll
index 7ed27f56d0d0..7465af456b83 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-01.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll
@@ -2,11 +2,12 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-declare float @llvm.sqrt.f32(float %f)
+declare float @llvm.sqrt.f32(float)
+declare float @sqrtf(float)
 
 ; Check register square root.
 define float @f1(float %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sqebr %f0, %f0
 ; CHECK: br %r14
   %res = call float @llvm.sqrt.f32(float %val)
@@ -15,7 +16,7 @@ define float @f1(float %val) {
 
 ; Check the low end of the SQEB range.
 define float @f2(float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sqeb %f0, 0(%r2)
 ; CHECK: br %r14
   %val = load float *%ptr
@@ -25,7 +26,7 @@ define float @f2(float *%ptr) {
 
 ; Check the high end of the aligned SQEB range.
 define float @f3(float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sqeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -37,7 +38,7 @@ define float @f3(float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define float @f4(float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: sqeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -49,7 +50,7 @@ define float @f4(float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define float @f5(float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: sqeb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -61,7 +62,7 @@ define float @f5(float *%base) {
 
 ; Check that SQEB allows indices.
 define float @f6(float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: sqeb %f0, 400(%r1,%r2)
 ; CHECK: br %r14
@@ -71,3 +72,98 @@ define float @f6(float *%base, i64 %index) {
   %res = call float @llvm.sqrt.f32(float %val)
   ret float %res
 }
+
+; Test a case where we spill the source of at least one SQEBR.  We want
+; to use SQEB if possible.
+define void @f7(float *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile float *%ptr
+  %val1 = load volatile float *%ptr
+  %val2 = load volatile float *%ptr
+  %val3 = load volatile float *%ptr
+  %val4 = load volatile float *%ptr
+  %val5 = load volatile float *%ptr
+  %val6 = load volatile float *%ptr
+  %val7 = load volatile float *%ptr
+  %val8 = load volatile float *%ptr
+  %val9 = load volatile float *%ptr
+  %val10 = load volatile float *%ptr
+  %val11 = load volatile float *%ptr
+  %val12 = load volatile float *%ptr
+  %val13 = load volatile float *%ptr
+  %val14 = load volatile float *%ptr
+  %val15 = load volatile float *%ptr
+  %val16 = load volatile float *%ptr
+
+  %sqrt0 = call float @llvm.sqrt.f32(float %val0)
+  %sqrt1 = call float @llvm.sqrt.f32(float %val1)
+  %sqrt2 = call float @llvm.sqrt.f32(float %val2)
+  %sqrt3 = call float @llvm.sqrt.f32(float %val3)
+  %sqrt4 = call float @llvm.sqrt.f32(float %val4)
+  %sqrt5 = call float @llvm.sqrt.f32(float %val5)
+  %sqrt6 = call float @llvm.sqrt.f32(float %val6)
+  %sqrt7 = call float @llvm.sqrt.f32(float %val7)
+  %sqrt8 = call float @llvm.sqrt.f32(float %val8)
+  %sqrt9 = call float @llvm.sqrt.f32(float %val9)
+  %sqrt10 = call float @llvm.sqrt.f32(float %val10)
+  %sqrt11 = call float @llvm.sqrt.f32(float %val11)
+  %sqrt12 = call float @llvm.sqrt.f32(float %val12)
+  %sqrt13 = call float @llvm.sqrt.f32(float %val13)
+  %sqrt14 = call float @llvm.sqrt.f32(float %val14)
+  %sqrt15 = call float @llvm.sqrt.f32(float %val15)
+  %sqrt16 = call float @llvm.sqrt.f32(float %val16)
+
+  store volatile float %val0, float *%ptr
+  store volatile float %val1, float *%ptr
+  store volatile float %val2, float *%ptr
+  store volatile float %val3, float *%ptr
+  store volatile float %val4, float *%ptr
+  store volatile float %val5, float *%ptr
+  store volatile float %val6, float *%ptr
+  store volatile float %val7, float *%ptr
+  store volatile float %val8, float *%ptr
+  store volatile float %val9, float *%ptr
+  store volatile float %val10, float *%ptr
+  store volatile float %val11, float *%ptr
+  store volatile float %val12, float *%ptr
+  store volatile float %val13, float *%ptr
+  store volatile float %val14, float *%ptr
+  store volatile float %val15, float *%ptr
+  store volatile float %val16, float *%ptr
+
+  store volatile float %sqrt0, float *%ptr
+  store volatile float %sqrt1, float *%ptr
+  store volatile float %sqrt2, float *%ptr
+  store volatile float %sqrt3, float *%ptr
+  store volatile float %sqrt4, float *%ptr
+  store volatile float %sqrt5, float *%ptr
+  store volatile float %sqrt6, float *%ptr
+  store volatile float %sqrt7, float *%ptr
+  store volatile float %sqrt8, float *%ptr
+  store volatile float %sqrt9, float *%ptr
+  store volatile float %sqrt10, float *%ptr
+  store volatile float %sqrt11, float *%ptr
+  store volatile float %sqrt12, float *%ptr
+  store volatile float %sqrt13, float *%ptr
+  store volatile float %sqrt14, float *%ptr
+  store volatile float %sqrt15, float *%ptr
+  store volatile float %sqrt16, float *%ptr
+
+  ret void
+}
+
+; Check that a call to the normal sqrtf function is lowered.
+define float @f8(float %dummy, float %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqebr %f0, %f2
+; CHECK: cebr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ler %f0, %f2
+; CHECK: jg sqrtf@PLT
+  %res = tail call float @sqrtf(float %val)
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll
index 22a91ad2f4f7..66ffd19d6c31 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-02.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll
@@ -3,10 +3,11 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 declare double @llvm.sqrt.f64(double %f)
+declare double @sqrt(double)
 
 ; Check register square root.
 define double @f1(double %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sqdbr %f0, %f0
 ; CHECK: br %r14
   %res = call double @llvm.sqrt.f64(double %val)
@@ -15,7 +16,7 @@ define double @f1(double %val) {
 
 ; Check the low end of the SQDB range.
 define double @f2(double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sqdb %f0, 0(%r2)
 ; CHECK: br %r14
   %val = load double *%ptr
@@ -25,7 +26,7 @@ define double @f2(double *%ptr) {
 
 ; Check the high end of the aligned SQDB range.
 define double @f3(double *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sqdb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 511
@@ -37,7 +38,7 @@ define double @f3(double *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(double *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: sqdb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -49,7 +50,7 @@ define double @f4(double *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define double @f5(double *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: sqdb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -61,7 +62,7 @@ define double @f5(double *%base) {
 
 ; Check that SQDB allows indices.
 define double @f6(double *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: sqdb %f0, 800(%r1,%r2)
 ; CHECK: br %r14
@@ -71,3 +72,98 @@ define double @f6(double *%base, i64 %index) {
   %res = call double @llvm.sqrt.f64(double %val)
   ret double %res
 }
+
+; Test a case where we spill the source of at least one SQDBR.  We want
+; to use SQDB if possible.
+define void @f7(double *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: sqdb {{%f[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+  %val0 = load volatile double *%ptr
+  %val1 = load volatile double *%ptr
+  %val2 = load volatile double *%ptr
+  %val3 = load volatile double *%ptr
+  %val4 = load volatile double *%ptr
+  %val5 = load volatile double *%ptr
+  %val6 = load volatile double *%ptr
+  %val7 = load volatile double *%ptr
+  %val8 = load volatile double *%ptr
+  %val9 = load volatile double *%ptr
+  %val10 = load volatile double *%ptr
+  %val11 = load volatile double *%ptr
+  %val12 = load volatile double *%ptr
+  %val13 = load volatile double *%ptr
+  %val14 = load volatile double *%ptr
+  %val15 = load volatile double *%ptr
+  %val16 = load volatile double *%ptr
+
+  %sqrt0 = call double @llvm.sqrt.f64(double %val0)
+  %sqrt1 = call double @llvm.sqrt.f64(double %val1)
+  %sqrt2 = call double @llvm.sqrt.f64(double %val2)
+  %sqrt3 = call double @llvm.sqrt.f64(double %val3)
+  %sqrt4 = call double @llvm.sqrt.f64(double %val4)
+  %sqrt5 = call double @llvm.sqrt.f64(double %val5)
+  %sqrt6 = call double @llvm.sqrt.f64(double %val6)
+  %sqrt7 = call double @llvm.sqrt.f64(double %val7)
+  %sqrt8 = call double @llvm.sqrt.f64(double %val8)
+  %sqrt9 = call double @llvm.sqrt.f64(double %val9)
+  %sqrt10 = call double @llvm.sqrt.f64(double %val10)
+  %sqrt11 = call double @llvm.sqrt.f64(double %val11)
+  %sqrt12 = call double @llvm.sqrt.f64(double %val12)
+  %sqrt13 = call double @llvm.sqrt.f64(double %val13)
+  %sqrt14 = call double @llvm.sqrt.f64(double %val14)
+  %sqrt15 = call double @llvm.sqrt.f64(double %val15)
+  %sqrt16 = call double @llvm.sqrt.f64(double %val16)
+
+  store volatile double %val0, double *%ptr
+  store volatile double %val1, double *%ptr
+  store volatile double %val2, double *%ptr
+  store volatile double %val3, double *%ptr
+  store volatile double %val4, double *%ptr
+  store volatile double %val5, double *%ptr
+  store volatile double %val6, double *%ptr
+  store volatile double %val7, double *%ptr
+  store volatile double %val8, double *%ptr
+  store volatile double %val9, double *%ptr
+  store volatile double %val10, double *%ptr
+  store volatile double %val11, double *%ptr
+  store volatile double %val12, double *%ptr
+  store volatile double %val13, double *%ptr
+  store volatile double %val14, double *%ptr
+  store volatile double %val15, double *%ptr
+  store volatile double %val16, double *%ptr
+
+  store volatile double %sqrt0, double *%ptr
+  store volatile double %sqrt1, double *%ptr
+  store volatile double %sqrt2, double *%ptr
+  store volatile double %sqrt3, double *%ptr
+  store volatile double %sqrt4, double *%ptr
+  store volatile double %sqrt5, double *%ptr
+  store volatile double %sqrt6, double *%ptr
+  store volatile double %sqrt7, double *%ptr
+  store volatile double %sqrt8, double *%ptr
+  store volatile double %sqrt9, double *%ptr
+  store volatile double %sqrt10, double *%ptr
+  store volatile double %sqrt11, double *%ptr
+  store volatile double %sqrt12, double *%ptr
+  store volatile double %sqrt13, double *%ptr
+  store volatile double %sqrt14, double *%ptr
+  store volatile double %sqrt15, double *%ptr
+  store volatile double %sqrt16, double *%ptr
+
+  ret void
+}
+
+; Check that a call to the normal sqrt function is lowered.
+define double @f8(double %dummy, double %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqdbr %f0, %f2
+; CHECK: cdbr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ldr %f0, %f2
+; CHECK: jg sqrt@PLT
+  %res = tail call double @sqrt(double %val)
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-03.ll b/test/CodeGen/SystemZ/fp-sqrt-03.ll
index 1b49af41254f..71426440aca3 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-03.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-03.ll
@@ -6,7 +6,7 @@ declare fp128 @llvm.sqrt.f128(fp128 %f)
 
 ; There's no memory form of SQXBR.
 define void @f1(fp128 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: ld %f2, 8(%r2)
 ; CHECK: sqxbr %f0, %f0
diff --git a/test/CodeGen/SystemZ/fp-sub-01.ll b/test/CodeGen/SystemZ/fp-sub-01.ll
index b03f04bd017e..76f46f626705 100644
--- a/test/CodeGen/SystemZ/fp-sub-01.ll
+++ b/test/CodeGen/SystemZ/fp-sub-01.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare float @foo()
+
 ; Check register subtraction.
 define float @f1(float %f1, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sebr %f0, %f2
 ; CHECK: br %r14
   %res = fsub float %f1, %f2
@@ -13,7 +15,7 @@ define float @f1(float %f1, float %f2) {
 
 ; Check the low end of the SEB range.
 define float @f2(float %f1, float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: seb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load float *%ptr
@@ -23,7 +25,7 @@ define float @f2(float %f1, float *%ptr) {
 
 ; Check the high end of the aligned SEB range.
 define float @f3(float %f1, float *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: seb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float *%base, i64 1023
@@ -35,7 +37,7 @@ define float @f3(float %f1, float *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define float @f4(float %f1, float *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: seb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define float @f4(float %f1, float *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define float @f5(float %f1, float *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -4
 ; CHECK: seb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define float @f5(float %f1, float *%base) {
 
 ; Check that SEB allows indices.
 define float @f6(float %f1, float *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 2
 ; CHECK: seb %f0, 400(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define float @f6(float %f1, float *%base, i64 %index) {
   %res = fsub float %f1, %f2
   ret float %res
 }
+
+; Check that subtractions of spilled values can use SEB rather than SEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: seb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%ptr0, i64 2
+  %ptr2 = getelementptr float *%ptr0, i64 4
+  %ptr3 = getelementptr float *%ptr0, i64 6
+  %ptr4 = getelementptr float *%ptr0, i64 8
+  %ptr5 = getelementptr float *%ptr0, i64 10
+  %ptr6 = getelementptr float *%ptr0, i64 12
+  %ptr7 = getelementptr float *%ptr0, i64 14
+  %ptr8 = getelementptr float *%ptr0, i64 16
+  %ptr9 = getelementptr float *%ptr0, i64 18
+  %ptr10 = getelementptr float *%ptr0, i64 20
+
+  %val0 = load float *%ptr0
+  %val1 = load float *%ptr1
+  %val2 = load float *%ptr2
+  %val3 = load float *%ptr3
+  %val4 = load float *%ptr4
+  %val5 = load float *%ptr5
+  %val6 = load float *%ptr6
+  %val7 = load float *%ptr7
+  %val8 = load float *%ptr8
+  %val9 = load float *%ptr9
+  %val10 = load float *%ptr10
+
+  %ret = call float @foo()
+
+  %sub0 = fsub float %ret, %val0
+  %sub1 = fsub float %sub0, %val1
+  %sub2 = fsub float %sub1, %val2
+  %sub3 = fsub float %sub2, %val3
+  %sub4 = fsub float %sub3, %val4
+  %sub5 = fsub float %sub4, %val5
+  %sub6 = fsub float %sub5, %val6
+  %sub7 = fsub float %sub6, %val7
+  %sub8 = fsub float %sub7, %val8
+  %sub9 = fsub float %sub8, %val9
+  %sub10 = fsub float %sub9, %val10
+
+  ret float %sub10
+}
diff --git a/test/CodeGen/SystemZ/fp-sub-02.ll b/test/CodeGen/SystemZ/fp-sub-02.ll
index bf9848c2fd51..99cafed8d08b 100644
--- a/test/CodeGen/SystemZ/fp-sub-02.ll
+++ b/test/CodeGen/SystemZ/fp-sub-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare double @foo()
+
 ; Check register subtraction.
 define double @f1(double %f1, double %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sdbr %f0, %f2
 ; CHECK: br %r14
   %res = fsub double %f1, %f2
@@ -13,7 +15,7 @@ define double @f1(double %f1, double %f2) {
 
 ; Check the low end of the SDB range.
 define double @f2(double %f1, double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sdb %f0, 0(%r2)
 ; CHECK: br %r14
   %f2 = load double *%ptr
@@ -23,7 +25,7 @@ define double @f2(double %f1, double *%ptr) {
 
 ; Check the high end of the aligned SDB range.
 define double @f3(double %f1, double *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sdb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double *%base, i64 511
@@ -35,7 +37,7 @@ define double @f3(double %f1, double *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(double %f1, double *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: sdb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define double @f4(double %f1, double *%base) {
 
 ; Check negative displacements, which also need separate address logic.
 define double @f5(double %f1, double *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -8
 ; CHECK: sdb %f0, 0(%r2)
 ; CHECK: br %r14
@@ -59,7 +61,7 @@ define double @f5(double %f1, double *%base) {
 
 ; Check that SDB allows indices.
 define double @f6(double %f1, double *%base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r1, %r3, 3
 ; CHECK: sdb %f0, 800(%r1,%r2)
 ; CHECK: br %r14
@@ -69,3 +71,49 @@ define double @f6(double %f1, double *%base, i64 %index) {
   %res = fsub double %f1, %f2
   ret double %res
 }
+
+; Check that subtractions of spilled values can use SDB rather than SDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: sdb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%ptr0, i64 2
+  %ptr2 = getelementptr double *%ptr0, i64 4
+  %ptr3 = getelementptr double *%ptr0, i64 6
+  %ptr4 = getelementptr double *%ptr0, i64 8
+  %ptr5 = getelementptr double *%ptr0, i64 10
+  %ptr6 = getelementptr double *%ptr0, i64 12
+  %ptr7 = getelementptr double *%ptr0, i64 14
+  %ptr8 = getelementptr double *%ptr0, i64 16
+  %ptr9 = getelementptr double *%ptr0, i64 18
+  %ptr10 = getelementptr double *%ptr0, i64 20
+
+  %val0 = load double *%ptr0
+  %val1 = load double *%ptr1
+  %val2 = load double *%ptr2
+  %val3 = load double *%ptr3
+  %val4 = load double *%ptr4
+  %val5 = load double *%ptr5
+  %val6 = load double *%ptr6
+  %val7 = load double *%ptr7
+  %val8 = load double *%ptr8
+  %val9 = load double *%ptr9
+  %val10 = load double *%ptr10
+
+  %ret = call double @foo()
+
+  %sub0 = fsub double %ret, %val0
+  %sub1 = fsub double %sub0, %val1
+  %sub2 = fsub double %sub1, %val2
+  %sub3 = fsub double %sub2, %val3
+  %sub4 = fsub double %sub3, %val4
+  %sub5 = fsub double %sub4, %val5
+  %sub6 = fsub double %sub5, %val6
+  %sub7 = fsub double %sub6, %val7
+  %sub8 = fsub double %sub7, %val8
+  %sub9 = fsub double %sub8, %val9
+  %sub10 = fsub double %sub9, %val10
+
+  ret double %sub10
+}
diff --git a/test/CodeGen/SystemZ/fp-sub-03.ll b/test/CodeGen/SystemZ/fp-sub-03.ll
index 82bb94dd28b3..a1404c4ff0e7 100644
--- a/test/CodeGen/SystemZ/fp-sub-03.ll
+++ b/test/CodeGen/SystemZ/fp-sub-03.ll
@@ -4,7 +4,7 @@
 
 ; There is no memory form of 128-bit subtraction.
 define void @f1(fp128 *%ptr, float %f2) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lxebr %f0, %f0
 ; CHECK: ld %f1, 0(%r2)
 ; CHECK: ld %f3, 8(%r2)
diff --git a/test/CodeGen/SystemZ/frame-01.ll b/test/CodeGen/SystemZ/frame-01.ll
index 0d343128c4cd..f61836ca8552 100644
--- a/test/CodeGen/SystemZ/frame-01.ll
+++ b/test/CodeGen/SystemZ/frame-01.ll
@@ -3,9 +3,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare void @foo(i32 *)
+
 ; The CFA offset is 160 (the caller-allocated part of the frame) + 168.
 define void @f1(i64 %x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: aghi %r15, -168
 ; CHECK: .cfi_def_cfa_offset 328
 ; CHECK: stg %r2, 160(%r15)
@@ -18,18 +20,18 @@ define void @f1(i64 %x) {
 
 ; Check frames of size 32760, which is the largest size that can be both
 ; allocated and freed using AGHI.  This size is big enough to require
-; an emergency spill slot at 160(%r15), for instructions with unsigned
+; two emergency spill slots at 160(%r15), for instructions with unsigned
 ; 12-bit offsets that end up being out of range.  Fill the remaining
-; 32760 - 168 bytes by allocating (32760 - 168) / 8 = 4074 doublewords.
+; 32760 - 176 bytes by allocating (32760 - 176) / 8 = 4073 doublewords.
 define void @f2(i64 %x) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r15, -32760
 ; CHECK: .cfi_def_cfa_offset 32920
-; CHECK: stg %r2, 168(%r15)
+; CHECK: stg %r2, 176(%r15)
 ; CHECK: aghi %r15, 32760
 ; CHECK: br %r14
-  %y = alloca [4074 x i64], align 8
-  %ptr = getelementptr inbounds [4074 x i64]* %y, i64 0, i64 0
+  %y = alloca [4073 x i64], align 8
+  %ptr = getelementptr inbounds [4073 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
@@ -37,14 +39,14 @@ define void @f2(i64 %x) {
 ; Allocate one more doubleword.  This is the one frame size that we can
 ; allocate using AGHI but must free using AGFI.
 define void @f3(i64 %x) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aghi %r15, -32768
 ; CHECK: .cfi_def_cfa_offset 32928
-; CHECK: stg %r2, 168(%r15)
+; CHECK: stg %r2, 176(%r15)
 ; CHECK: agfi %r15, 32768
 ; CHECK: br %r14
-  %y = alloca [4075 x i64], align 8
-  %ptr = getelementptr inbounds [4075 x i64]* %y, i64 0, i64 0
+  %y = alloca [4074 x i64], align 8
+  %ptr = getelementptr inbounds [4074 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
@@ -52,14 +54,14 @@ define void @f3(i64 %x) {
 ; Allocate another doubleword on top of that.  The allocation and free
 ; must both use AGFI.
 define void @f4(i64 %x) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r15, -32776
 ; CHECK: .cfi_def_cfa_offset 32936
-; CHECK: stg %r2, 168(%r15)
+; CHECK: stg %r2, 176(%r15)
 ; CHECK: agfi %r15, 32776
 ; CHECK: br %r14
-  %y = alloca [4076 x i64], align 8
-  %ptr = getelementptr inbounds [4076 x i64]* %y, i64 0, i64 0
+  %y = alloca [4075 x i64], align 8
+  %ptr = getelementptr inbounds [4075 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
@@ -67,13 +69,13 @@ define void @f4(i64 %x) {
 ; The largest size that can be both allocated and freed using AGFI.
 ; At this point the frame is too big to represent properly in the CFI.
 define void @f5(i64 %x) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r15, -2147483640
-; CHECK: stg %r2, 168(%r15)
+; CHECK: stg %r2, 176(%r15)
 ; CHECK: agfi %r15, 2147483640
 ; CHECK: br %r14
-  %y = alloca [268435434 x i64], align 8
-  %ptr = getelementptr inbounds [268435434 x i64]* %y, i64 0, i64 0
+  %y = alloca [268435433 x i64], align 8
+  %ptr = getelementptr inbounds [268435433 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
@@ -81,14 +83,14 @@ define void @f5(i64 %x) {
 ; The only frame size that can be allocated using a single AGFI but which
 ; must be freed using two instructions.
 define void @f6(i64 %x) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r15, -2147483648
-; CHECK: stg %r2, 168(%r15)
+; CHECK: stg %r2, 176(%r15)
 ; CHECK: agfi %r15, 2147483640
 ; CHECK: aghi %r15, 8
 ; CHECK: br %r14
-  %y = alloca [268435435 x i64], align 8
-  %ptr = getelementptr inbounds [268435435 x i64]* %y, i64 0, i64 0
+  %y = alloca [268435434 x i64], align 8
+  %ptr = getelementptr inbounds [268435434 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
@@ -96,15 +98,29 @@ define void @f6(i64 %x) {
 ; The smallest frame size that needs two instructions to both allocate
 ; and free the frame.
 define void @f7(i64 %x) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r15, -2147483648
 ; CHECK: aghi %r15, -8
-; CHECK: stg %r2, 168(%r15)
+; CHECK: stg %r2, 176(%r15)
 ; CHECK: agfi %r15, 2147483640
 ; CHECK: aghi %r15, 16
 ; CHECK: br %r14
-  %y = alloca [268435436 x i64], align 8
-  %ptr = getelementptr inbounds [268435436 x i64]* %y, i64 0, i64 0
+  %y = alloca [268435435 x i64], align 8
+  %ptr = getelementptr inbounds [268435435 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
+
+; Make sure that LA can be rematerialized.
+define void @f8() {
+; CHECK-LABEL: f8:
+; CHECK: la %r2, 164(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: la %r2, 164(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i32
+  call void @foo(i32 *%ptr)
+  call void @foo(i32 *%ptr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-02.ll b/test/CodeGen/SystemZ/frame-02.ll
index 589703ec0e74..9a7f8eac9eba 100644
--- a/test/CodeGen/SystemZ/frame-02.ll
+++ b/test/CodeGen/SystemZ/frame-02.ll
@@ -7,7 +7,7 @@
 ; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
 ; (the caller-allocated part of the frame) + 224.
 define void @f1(float *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: aghi %r15, -224
 ; CHECK: .cfi_def_cfa_offset 384
 ; CHECK: std %f8, 216(%r15)
@@ -91,7 +91,7 @@ define void @f1(float *%ptr) {
 ; Like f1, but requires one fewer FPR.  We allocate in numerical order,
 ; so %f15 is the one that gets dropped.
 define void @f2(float *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r15, -216
 ; CHECK: .cfi_def_cfa_offset 376
 ; CHECK: std %f8, 208(%r15)
@@ -169,7 +169,7 @@ define void @f2(float *%ptr) {
 
 ; Like f1, but should require only one call-saved FPR.
 define void @f3(float *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aghi %r15, -168
 ; CHECK: .cfi_def_cfa_offset 328
 ; CHECK: std %f8, 160(%r15)
@@ -218,7 +218,7 @@ define void @f3(float *%ptr) {
 ; This function should use all call-clobbered FPRs but no call-saved ones.
 ; It shouldn't need to create a frame.
 define void @f4(float *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r15
 ; CHECK-NOT: %f8
 ; CHECK-NOT: %f9
diff --git a/test/CodeGen/SystemZ/frame-03.ll b/test/CodeGen/SystemZ/frame-03.ll
index 3c4a49977a12..db146c7c985d 100644
--- a/test/CodeGen/SystemZ/frame-03.ll
+++ b/test/CodeGen/SystemZ/frame-03.ll
@@ -9,7 +9,7 @@
 ; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
 ; (the caller-allocated part of the frame) + 224.
 define void @f1(double *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: aghi %r15, -224
 ; CHECK: .cfi_def_cfa_offset 384
 ; CHECK: std %f8, 216(%r15)
@@ -93,7 +93,7 @@ define void @f1(double *%ptr) {
 ; Like f1, but requires one fewer FPR.  We allocate in numerical order,
 ; so %f15 is the one that gets dropped.
 define void @f2(double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r15, -216
 ; CHECK: .cfi_def_cfa_offset 376
 ; CHECK: std %f8, 208(%r15)
@@ -171,7 +171,7 @@ define void @f2(double *%ptr) {
 
 ; Like f1, but should require only one call-saved FPR.
 define void @f3(double *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aghi %r15, -168
 ; CHECK: .cfi_def_cfa_offset 328
 ; CHECK: std %f8, 160(%r15)
@@ -220,7 +220,7 @@ define void @f3(double *%ptr) {
 ; This function should use all call-clobbered FPRs but no call-saved ones.
 ; It shouldn't need to create a frame.
 define void @f4(double *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r15
 ; CHECK-NOT: %f8
 ; CHECK-NOT: %f9
diff --git a/test/CodeGen/SystemZ/frame-04.ll b/test/CodeGen/SystemZ/frame-04.ll
index 360f85cde322..93c59a3bc15f 100644
--- a/test/CodeGen/SystemZ/frame-04.ll
+++ b/test/CodeGen/SystemZ/frame-04.ll
@@ -8,7 +8,7 @@
 ; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
 ; (the caller-allocated part of the frame) + 224.
 define void @f1(fp128 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: aghi %r15, -224
 ; CHECK: .cfi_def_cfa_offset 384
 ; CHECK: std %f8, 216(%r15)
@@ -68,7 +68,7 @@ define void @f1(fp128 *%ptr) {
 ; Like f1, but requires one fewer FPR pair.  We allocate in numerical order,
 ; so %f13+%f15 is the pair that gets dropped.
 define void @f2(fp128 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r15, -208
 ; CHECK: .cfi_def_cfa_offset 368
 ; CHECK: std %f8, 200(%r15)
@@ -121,7 +121,7 @@ define void @f2(fp128 *%ptr) {
 ; Like f1, but requires only one call-saved FPR pair.  We allocate in
 ; numerical order so the pair should be %f8+%f10.
 define void @f3(fp128 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aghi %r15, -176
 ; CHECK: .cfi_def_cfa_offset 336
 ; CHECK: std %f8, 168(%r15)
@@ -160,7 +160,7 @@ define void @f3(fp128 *%ptr) {
 ; This function should use all call-clobbered FPRs but no call-saved ones.
 ; It shouldn't need to create a frame.
 define void @f4(fp128 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r15
 ; CHECK-NOT: %f8
 ; CHECK-NOT: %f9
diff --git a/test/CodeGen/SystemZ/frame-05.ll b/test/CodeGen/SystemZ/frame-05.ll
index 3a159fcd5941..f95284deeb79 100644
--- a/test/CodeGen/SystemZ/frame-05.ll
+++ b/test/CodeGen/SystemZ/frame-05.ll
@@ -14,7 +14,7 @@
 ; Use a different address for the final store, so that we can check that
 ; %r15 isn't referenced again until after that.
 define void @f1(i32 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r6, %r15, 48(%r15)
 ; CHECK-NOT: %r15
 ; CHECK: .cfi_offset %r6, -112
@@ -82,7 +82,7 @@ define void @f1(i32 *%ptr) {
 ; from %r14 down, so that the STMG/LMG sequences aren't any longer than
 ; they need to be.
 define void @f2(i32 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: stmg %r7, %r15, 56(%r15)
 ; CHECK-NOT: %r15
 ; CHECK: .cfi_offset %r7, -104
@@ -145,7 +145,7 @@ define void @f2(i32 *%ptr) {
 
 ; Like f1, but only needs one call-saved GPR, which ought to be %r14.
 define void @f3(i32 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK-NOT: %r15
 ; CHECK: .cfi_offset %r14, -48
@@ -188,7 +188,7 @@ define void @f3(i32 *%ptr) {
 ; This function should use all call-clobbered GPRs but no call-saved ones.
 ; It shouldn't need to touch the stack at all.
 define void @f4(i32 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r15
 ; CHECK-NOT: %r6
 ; CHECK-NOT: %r7
diff --git a/test/CodeGen/SystemZ/frame-06.ll b/test/CodeGen/SystemZ/frame-06.ll
index 4c361f1e9fc9..ad22f10903ad 100644
--- a/test/CodeGen/SystemZ/frame-06.ll
+++ b/test/CodeGen/SystemZ/frame-06.ll
@@ -11,7 +11,7 @@
 ; Use a different address for the final store, so that we can check that
 ; %r15 isn't referenced again until after that.
 define void @f1(i64 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r6, %r15, 48(%r15)
 ; CHECK-NOT: %r15
 ; CHECK: .cfi_offset %r6, -112
@@ -79,7 +79,7 @@ define void @f1(i64 *%ptr) {
 ; from %r14 down, so that the STMG/LMG sequences aren't any longer than
 ; they need to be.
 define void @f2(i64 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: stmg %r7, %r15, 56(%r15)
 ; CHECK-NOT: %r15
 ; CHECK: .cfi_offset %r7, -104
@@ -142,7 +142,7 @@ define void @f2(i64 *%ptr) {
 
 ; Like f1, but only needs one call-saved GPR, which ought to be %r14.
 define void @f3(i64 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK-NOT: %r15
 ; CHECK: .cfi_offset %r14, -48
@@ -185,7 +185,7 @@ define void @f3(i64 *%ptr) {
 ; This function should use all call-clobbered GPRs but no call-saved ones.
 ; It shouldn't need to touch the stack at all.
 define void @f4(i64 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r15
 ; CHECK-NOT: %r6
 ; CHECK-NOT: %r7
diff --git a/test/CodeGen/SystemZ/frame-07.ll b/test/CodeGen/SystemZ/frame-07.ll
index cfe9f868c07b..eab313744b94 100644
--- a/test/CodeGen/SystemZ/frame-07.ll
+++ b/test/CodeGen/SystemZ/frame-07.ll
@@ -5,11 +5,11 @@
 
 ; Test a frame size that requires some FPRs to be saved and loaded using
 ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.
-; The frame is big enough to require an emergency spill slot at 160(%r15),
+; The frame is big enough to require two emergency spill slots at 160(%r15),
 ; as well as the 8 FPR save slots.  Get a frame of size 4128 by allocating
-; (4128 - 168 - 8 * 8) / 8 = 487 extra doublewords.
+; (4128 - 176 - 8 * 8) / 8 = 486 extra doublewords.
 define void @f1(double *%ptr, i64 %x) {
-; CHECK-NOFP: f1:
+; CHECK-NOFP-LABEL: f1:
 ; CHECK-NOFP: aghi %r15, -4128
 ; CHECK-NOFP: .cfi_def_cfa_offset 4288
 ; CHECK-NOFP: stdy %f8, 4120(%r15)
@@ -40,7 +40,7 @@ define void @f1(double *%ptr, i64 %x) {
 ; CHECK-NOFP: aghi %r15, 4128
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f1:
+; CHECK-FP-LABEL: f1:
 ; CHECK-FP: stmg %r11, %r15, 88(%r15)
 ; CHECK-FP: aghi %r15, -4128
 ; CHECK-FP: .cfi_def_cfa_offset 4288
@@ -65,8 +65,8 @@ define void @f1(double *%ptr, i64 %x) {
 ; CHECK-FP: ld %f15, 4064(%r11)
 ; CHECK-FP: lmg %r11, %r15, 4216(%r11)
 ; CHECK-FP: br %r14
-  %y = alloca [487 x i64], align 8
-  %elem = getelementptr inbounds [487 x i64]* %y, i64 0, i64 0
+  %y = alloca [486 x i64], align 8
+  %elem = getelementptr inbounds [486 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %elem
   %l0 = load volatile double *%ptr
   %l1 = load volatile double *%ptr
@@ -127,9 +127,9 @@ define void @f1(double *%ptr, i64 %x) {
 ; good optimisation but is really a different test.
 ;
 ; As above, get a frame of size 524320 by allocating
-; (524320 - 168 - 8 * 8) / 8 = 65511 extra doublewords.
+; (524320 - 176 - 8 * 8) / 8 = 65510 extra doublewords.
 define void @f2(double *%ptr, i64 %x) {
-; CHECK-NOFP: f2:
+; CHECK-NOFP-LABEL: f2:
 ; CHECK-NOFP: agfi %r15, -524320
 ; CHECK-NOFP: .cfi_def_cfa_offset 524480
 ; CHECK-NOFP: llilh [[INDEX:%r[1-5]]], 8
@@ -161,7 +161,7 @@ define void @f2(double *%ptr, i64 %x) {
 ; CHECK-NOFP: agfi %r15, 524320
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f2:
+; CHECK-FP-LABEL: f2:
 ; CHECK-FP: stmg %r11, %r15, 88(%r15)
 ; CHECK-FP: agfi %r15, -524320
 ; CHECK-FP: .cfi_def_cfa_offset 524480
@@ -194,8 +194,8 @@ define void @f2(double *%ptr, i64 %x) {
 ; CHECK-FP: aghi %r11, 128
 ; CHECK-FP: lmg %r11, %r15, 524280(%r11)
 ; CHECK-FP: br %r14
-  %y = alloca [65511 x i64], align 8
-  %elem = getelementptr inbounds [65511 x i64]* %y, i64 0, i64 0
+  %y = alloca [65510 x i64], align 8
+  %elem = getelementptr inbounds [65510 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %elem
   %l0 = load volatile double *%ptr
   %l1 = load volatile double *%ptr
diff --git a/test/CodeGen/SystemZ/frame-08.ll b/test/CodeGen/SystemZ/frame-08.ll
index 6cf6378268f4..da2a6142fb47 100644
--- a/test/CodeGen/SystemZ/frame-08.ll
+++ b/test/CodeGen/SystemZ/frame-08.ll
@@ -3,11 +3,11 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 ; This is the largest frame size that can use a plain LMG for %r6 and above.
-; It is big enough to require an emergency spill slot at 160(%r15),
-; so get a frame of size 524232 by allocating (524232 - 168) / 8 = 65508
+; It is big enough to require two emergency spill slots at 160(%r15),
+; so get a frame of size 524232 by allocating (524232 - 176) / 8 = 65507
 ; extra doublewords.
 define void @f1(i32 *%ptr, i64 %x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r6, %r15, 48(%r15)
 ; CHECK: .cfi_offset %r6, -112
 ; CHECK: .cfi_offset %r7, -104
@@ -64,18 +64,18 @@ define void @f1(i32 *%ptr, i64 %x) {
   store volatile i32 %add12, i32 *%ptr
   store volatile i32 %add13, i32 *%ptr
   store volatile i32 %add14, i32 *%ptr
-  %y = alloca [65508 x i64], align 8
-  %entry = getelementptr inbounds [65508 x i64]* %y, i64 0, i64 0
+  %y = alloca [65507 x i64], align 8
+  %entry = getelementptr inbounds [65507 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %entry
   ret void
 }
 
 ; This is the largest frame size that can use a plain LMG for %r14 and above
-; It is big enough to require an emergency spill slot at 160(%r15),
-; so get a frame of size 524168 by allocating (524168 - 168) / 8 = 65500
+; It is big enough to require two emergency spill slots at 160(%r15),
+; so get a frame of size 524168 by allocating (524168 - 176) / 8 = 65499
 ; extra doublewords.
 define void @f2(i32 *%ptr, i64 %x) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK: .cfi_offset %r14, -48
 ; CHECK: .cfi_offset %r15, -40
@@ -100,8 +100,8 @@ define void @f2(i32 *%ptr, i64 %x) {
   store volatile i32 %add4, i32 *%ptr
   store volatile i32 %add5, i32 *%ptr
   store volatile i32 %add14, i32 *%ptr
-  %y = alloca [65500 x i64], align 8
-  %entry = getelementptr inbounds [65500 x i64]* %y, i64 0, i64 0
+  %y = alloca [65499 x i64], align 8
+  %entry = getelementptr inbounds [65499 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %entry
   ret void
 }
@@ -110,7 +110,7 @@ define void @f2(i32 *%ptr, i64 %x) {
 ; frame size that needs two instructions to perform the final LMG for
 ; %r6 and above.
 define void @f3(i32 *%ptr, i64 %x) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: stmg %r6, %r15, 48(%r15)
 ; CHECK: .cfi_offset %r6, -112
 ; CHECK: .cfi_offset %r7, -104
@@ -167,8 +167,8 @@ define void @f3(i32 *%ptr, i64 %x) {
   store volatile i32 %add12, i32 *%ptr
   store volatile i32 %add13, i32 *%ptr
   store volatile i32 %add14, i32 *%ptr
-  %y = alloca [65509 x i64], align 8
-  %entry = getelementptr inbounds [65509 x i64]* %y, i64 0, i64 0
+  %y = alloca [65508 x i64], align 8
+  %entry = getelementptr inbounds [65508 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %entry
   ret void
 }
@@ -177,7 +177,7 @@ define void @f3(i32 *%ptr, i64 %x) {
 ; frame size that needs two instructions to perform the final LMG for
 ; %r14 and %r15.
 define void @f4(i32 *%ptr, i64 %x) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK: .cfi_offset %r14, -48
 ; CHECK: .cfi_offset %r15, -40
@@ -202,8 +202,8 @@ define void @f4(i32 *%ptr, i64 %x) {
   store volatile i32 %add4, i32 *%ptr
   store volatile i32 %add5, i32 *%ptr
   store volatile i32 %add14, i32 *%ptr
-  %y = alloca [65501 x i64], align 8
-  %entry = getelementptr inbounds [65501 x i64]* %y, i64 0, i64 0
+  %y = alloca [65500 x i64], align 8
+  %entry = getelementptr inbounds [65500 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %entry
   ret void
 }
@@ -211,7 +211,7 @@ define void @f4(i32 *%ptr, i64 %x) {
 ; This is the largest frame size for which the prepatory increment for
 ; "lmg %r14, %r15, ..." can be done using AGHI.
 define void @f5(i32 *%ptr, i64 %x) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK: .cfi_offset %r14, -48
 ; CHECK: .cfi_offset %r15, -40
@@ -236,8 +236,8 @@ define void @f5(i32 *%ptr, i64 %x) {
   store volatile i32 %add4, i32 *%ptr
   store volatile i32 %add5, i32 *%ptr
   store volatile i32 %add14, i32 *%ptr
-  %y = alloca [69595 x i64], align 8
-  %entry = getelementptr inbounds [69595 x i64]* %y, i64 0, i64 0
+  %y = alloca [69594 x i64], align 8
+  %entry = getelementptr inbounds [69594 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %entry
   ret void
 }
@@ -245,7 +245,7 @@ define void @f5(i32 *%ptr, i64 %x) {
 ; This is the smallest frame size for which the prepatory increment for
 ; "lmg %r14, %r15, ..." needs to be done using AGFI.
 define void @f6(i32 *%ptr, i64 %x) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: stmg %r14, %r15, 112(%r15)
 ; CHECK: .cfi_offset %r14, -48
 ; CHECK: .cfi_offset %r15, -40
@@ -270,8 +270,8 @@ define void @f6(i32 *%ptr, i64 %x) {
   store volatile i32 %add4, i32 *%ptr
   store volatile i32 %add5, i32 *%ptr
   store volatile i32 %add14, i32 *%ptr
-  %y = alloca [69596 x i64], align 8
-  %entry = getelementptr inbounds [69596 x i64]* %y, i64 0, i64 0
+  %y = alloca [69595 x i64], align 8
+  %entry = getelementptr inbounds [69595 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %entry
   ret void
 }
diff --git a/test/CodeGen/SystemZ/frame-09.ll b/test/CodeGen/SystemZ/frame-09.ll
index eac633623c5f..8a4f99c343a0 100644
--- a/test/CodeGen/SystemZ/frame-09.ll
+++ b/test/CodeGen/SystemZ/frame-09.ll
@@ -6,7 +6,7 @@
 ; We don't need to allocate any more than the caller-provided 160-byte
 ; area though.
 define i32 @f1(i32 %x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r11, %r15, 88(%r15)
 ; CHECK: .cfi_offset %r11, -72
 ; CHECK: .cfi_offset %r15, -40
@@ -22,7 +22,7 @@ define i32 @f1(i32 %x) {
 ; Make sure that frame accesses after the initial allocation are relative
 ; to %r11 rather than %r15.
 define void @f2(i64 %x) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: stmg %r11, %r15, 88(%r15)
 ; CHECK: .cfi_offset %r11, -72
 ; CHECK: .cfi_offset %r15, -40
@@ -41,7 +41,7 @@ define void @f2(i64 %x) {
 ; This function should require all GPRs but no other spill slots.
 ; It shouldn't need to allocate its own frame.
 define void @f3(i32 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: stmg %r6, %r15, 48(%r15)
 ; CHECK-NOT: %r15
 ; CHECK-NOT: %r11
@@ -107,11 +107,11 @@ define void @f3(i32 *%ptr) {
   ret void
 }
 
-; The largest frame for which the LMG is in range.  This frame has an
-; emergency spill slot at 160(%r11), so create a frame of size 524192
-; by allocating (524192 - 168) / 8 = 65503 doublewords.
+; The largest frame for which the LMG is in range.  This frame has two
+; emergency spill slots at 160(%r11), so create a frame of size 524192
+; by allocating (524192 - 176) / 8 = 65502 doublewords.
 define void @f4(i64 %x) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: stmg %r11, %r15, 88(%r15)
 ; CHECK: .cfi_offset %r11, -72
 ; CHECK: .cfi_offset %r15, -40
@@ -119,19 +119,19 @@ define void @f4(i64 %x) {
 ; CHECK: .cfi_def_cfa_offset 524352
 ; CHECK: lgr %r11, %r15
 ; CHECK: .cfi_def_cfa_register %r11
-; CHECK: stg %r2, 168(%r11)
+; CHECK: stg %r2, 176(%r11)
 ; CHECK-NOT: ag
 ; CHECK: lmg %r11, %r15, 524280(%r11)
 ; CHECK: br %r14
-  %y = alloca [65503 x i64], align 8
-  %ptr = getelementptr inbounds [65503 x i64]* %y, i64 0, i64 0
+  %y = alloca [65502 x i64], align 8
+  %ptr = getelementptr inbounds [65502 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
 
 ; The next frame size larger than f4.
 define void @f5(i64 %x) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: stmg %r11, %r15, 88(%r15)
 ; CHECK: .cfi_offset %r11, -72
 ; CHECK: .cfi_offset %r15, -40
@@ -139,12 +139,12 @@ define void @f5(i64 %x) {
 ; CHECK: .cfi_def_cfa_offset 524360
 ; CHECK: lgr %r11, %r15
 ; CHECK: .cfi_def_cfa_register %r11
-; CHECK: stg %r2, 168(%r11)
+; CHECK: stg %r2, 176(%r11)
 ; CHECK: aghi %r11, 8
 ; CHECK: lmg %r11, %r15, 524280(%r11)
 ; CHECK: br %r14
-  %y = alloca [65504 x i64], align 8
-  %ptr = getelementptr inbounds [65504 x i64]* %y, i64 0, i64 0
+  %y = alloca [65503 x i64], align 8
+  %ptr = getelementptr inbounds [65503 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %ptr
   ret void
 }
diff --git a/test/CodeGen/SystemZ/frame-10.ll b/test/CodeGen/SystemZ/frame-10.ll
index 399a4125933d..b96973a9cb9d 100644
--- a/test/CodeGen/SystemZ/frame-10.ll
+++ b/test/CodeGen/SystemZ/frame-10.ll
@@ -5,7 +5,7 @@
 declare i8 *@llvm.stacksave()
 
 define void @f1(i8 **%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stg %r15, 0(%r2)
 ; CHECK: br %r14
   %addr = call i8 *@llvm.stacksave()
diff --git a/test/CodeGen/SystemZ/frame-11.ll b/test/CodeGen/SystemZ/frame-11.ll
index 84222056e6d0..5145b4d1c862 100644
--- a/test/CodeGen/SystemZ/frame-11.ll
+++ b/test/CodeGen/SystemZ/frame-11.ll
@@ -7,7 +7,7 @@ declare void @llvm.stackrestore(i8 *)
 ; we should use a frame pointer and tear down the frame based on %r11
 ; rather than %r15.
 define void @f1(i8 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r11, %r15, 88(%r15)
 ; CHECK: lgr %r11, %r15
 ; CHECK: lgr %r15, %r2
diff --git a/test/CodeGen/SystemZ/frame-13.ll b/test/CodeGen/SystemZ/frame-13.ll
index fa6b845ea6f2..393850fbf617 100644
--- a/test/CodeGen/SystemZ/frame-13.ll
+++ b/test/CodeGen/SystemZ/frame-13.ll
@@ -1,8 +1,11 @@
 ; Test the handling of base + 12-bit displacement addresses for large frames,
-; in cases where no 20-bit form exists.
+; in cases where no 20-bit form exists.  The tests here assume z10 register
+; pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 
 ; This file tests what happens when a displacement is converted from
 ; being relative to the start of a frame object to being relative to
@@ -17,22 +20,22 @@
 ; First check the highest in-range offset after conversion, which is 4092
 ; for word-addressing instructions like MVHI.
 ;
-; The last in-range doubleword offset is 4088.  Since the frame has an
-; emergency spill slot at 160(%r15), the amount that we need to allocate
-; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980
+; The last in-range doubleword offset is 4088.  Since the frame has two
+; emergency spill slots at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is (4088 - 176) / 4 = 978
 ; words.
 define void @f1() {
-; CHECK-NOFP: f1:
+; CHECK-NOFP-LABEL: f1:
 ; CHECK-NOFP: mvhi 4092(%r15), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f1:
+; CHECK-FP-LABEL: f1:
 ; CHECK-FP: mvhi 4092(%r11), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x i32], align 8
-  %region2 = alloca [980 x i32], align 8
-  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 1
-  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 1
+  %region1 = alloca [978 x i32], align 8
+  %region2 = alloca [978 x i32], align 8
+  %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 1
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
@@ -40,19 +43,19 @@ define void @f1() {
 
 ; Test the first out-of-range offset.  We cannot use an index register here.
 define void @f2() {
-; CHECK-NOFP: f2:
+; CHECK-NOFP-LABEL: f2:
 ; CHECK-NOFP: lay %r1, 4096(%r15)
 ; CHECK-NOFP: mvhi 0(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f2:
+; CHECK-FP-LABEL: f2:
 ; CHECK-FP: lay %r1, 4096(%r11)
 ; CHECK-FP: mvhi 0(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x i32], align 8
-  %region2 = alloca [980 x i32], align 8
-  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
-  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
+  %region1 = alloca [978 x i32], align 8
+  %region2 = alloca [978 x i32], align 8
+  %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
@@ -60,19 +63,19 @@ define void @f2() {
 
 ; Test the next offset after that.
 define void @f3() {
-; CHECK-NOFP: f3:
+; CHECK-NOFP-LABEL: f3:
 ; CHECK-NOFP: lay %r1, 4096(%r15)
 ; CHECK-NOFP: mvhi 4(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f3:
+; CHECK-FP-LABEL: f3:
 ; CHECK-FP: lay %r1, 4096(%r11)
 ; CHECK-FP: mvhi 4(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x i32], align 8
-  %region2 = alloca [980 x i32], align 8
-  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 3
-  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 3
+  %region1 = alloca [978 x i32], align 8
+  %region2 = alloca [978 x i32], align 8
+  %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 3
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
@@ -80,19 +83,19 @@ define void @f3() {
 
 ; Add 4096 bytes (1024 words) to the size of each object and repeat.
 define void @f4() {
-; CHECK-NOFP: f4:
+; CHECK-NOFP-LABEL: f4:
 ; CHECK-NOFP: lay %r1, 4096(%r15)
 ; CHECK-NOFP: mvhi 4092(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f4:
+; CHECK-FP-LABEL: f4:
 ; CHECK-FP: lay %r1, 4096(%r11)
 ; CHECK-FP: mvhi 4092(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [2004 x i32], align 8
-  %region2 = alloca [2004 x i32], align 8
-  %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1
-  %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1
+  %region1 = alloca [2002 x i32], align 8
+  %region2 = alloca [2002 x i32], align 8
+  %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 1
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
@@ -100,19 +103,19 @@ define void @f4() {
 
 ; ...as above.
 define void @f5() {
-; CHECK-NOFP: f5:
+; CHECK-NOFP-LABEL: f5:
 ; CHECK-NOFP: lay %r1, 8192(%r15)
 ; CHECK-NOFP: mvhi 0(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f5:
+; CHECK-FP-LABEL: f5:
 ; CHECK-FP: lay %r1, 8192(%r11)
 ; CHECK-FP: mvhi 0(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [2004 x i32], align 8
-  %region2 = alloca [2004 x i32], align 8
-  %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 2
-  %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 2
+  %region1 = alloca [2002 x i32], align 8
+  %region2 = alloca [2002 x i32], align 8
+  %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 2
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
@@ -120,41 +123,41 @@ define void @f5() {
 
 ; ...as above.
 define void @f6() {
-; CHECK-NOFP: f6:
+; CHECK-NOFP-LABEL: f6:
 ; CHECK-NOFP: lay %r1, 8192(%r15)
 ; CHECK-NOFP: mvhi 4(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f6:
+; CHECK-FP-LABEL: f6:
 ; CHECK-FP: lay %r1, 8192(%r11)
 ; CHECK-FP: mvhi 4(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [2004 x i32], align 8
-  %region2 = alloca [2004 x i32], align 8
-  %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 3
-  %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 3
+  %region1 = alloca [2002 x i32], align 8
+  %region2 = alloca [2002 x i32], align 8
+  %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 3
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
 }
 
 ; Now try an offset of 4092 from the start of the object, with the object
-; being at offset 8192.  This time we need objects of (8192 - 168) / 4 = 2006
+; being at offset 8192.  This time we need objects of (8192 - 176) / 4 = 2004
 ; words.
 define void @f7() {
-; CHECK-NOFP: f7:
+; CHECK-NOFP-LABEL: f7:
 ; CHECK-NOFP: lay %r1, 8192(%r15)
 ; CHECK-NOFP: mvhi 4092(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f7:
+; CHECK-FP-LABEL: f7:
 ; CHECK-FP: lay %r1, 8192(%r11)
 ; CHECK-FP: mvhi 4092(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [2006 x i32], align 8
-  %region2 = alloca [2006 x i32], align 8
-  %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023
-  %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023
+  %region1 = alloca [2004 x i32], align 8
+  %region2 = alloca [2004 x i32], align 8
+  %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1023
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
@@ -163,72 +166,71 @@ define void @f7() {
 ; Keep the object-relative offset the same but bump the size of the
 ; objects by one doubleword.
 define void @f8() {
-; CHECK-NOFP: f8:
+; CHECK-NOFP-LABEL: f8:
 ; CHECK-NOFP: lay %r1, 12288(%r15)
 ; CHECK-NOFP: mvhi 4(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f8:
+; CHECK-FP-LABEL: f8:
 ; CHECK-FP: lay %r1, 12288(%r11)
 ; CHECK-FP: mvhi 4(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [2008 x i32], align 8
-  %region2 = alloca [2008 x i32], align 8
-  %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1023
-  %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1023
+  %region1 = alloca [2006 x i32], align 8
+  %region2 = alloca [2006 x i32], align 8
+  %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
 }
 
 ; Check a case where the original displacement is out of range.  The backend
-; should force an LAY from the outset.  We don't yet do any kind of anchor
-; optimization, so there should be no offset on the MVHI itself.
+; should force STY to be used instead.
 define void @f9() {
-; CHECK-NOFP: f9:
-; CHECK-NOFP: lay %r1, 12296(%r15)
-; CHECK-NOFP: mvhi 0(%r1), 42
+; CHECK-NOFP-LABEL: f9:
+; CHECK-NOFP: lhi [[TMP:%r[0-5]]], 42
+; CHECK-NOFP: sty [[TMP]], 12296(%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f9:
-; CHECK-FP: lay %r1, 12296(%r11)
-; CHECK-FP: mvhi 0(%r1), 42
+; CHECK-FP-LABEL: f9:
+; CHECK-FP: lhi [[TMP:%r[0-5]]], 42
+; CHECK-FP: sty [[TMP]], 12296(%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [2008 x i32], align 8
-  %region2 = alloca [2008 x i32], align 8
-  %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1024
-  %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1024
+  %region1 = alloca [2006 x i32], align 8
+  %region2 = alloca [2006 x i32], align 8
+  %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1024
+  %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1024
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   ret void
 }
 
-; Repeat f2 in a case that needs the emergency spill slot (because all
+; Repeat f2 in a case that needs the emergency spill slots (because all
 ; call-clobbered registers are live and no call-saved ones have been
 ; allocated).
 define void @f10(i32 *%vptr) {
-; CHECK-NOFP: f10:
-; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP-LABEL: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 ; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
 ; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
-; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f10:
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP-LABEL: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 ; CHECK-FP: lay [[REGISTER]], 4096(%r11)
 ; CHECK-FP: mvhi 0([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
   %i1 = load volatile i32 *%vptr
   %i3 = load volatile i32 *%vptr
   %i4 = load volatile i32 *%vptr
   %i5 = load volatile i32 *%vptr
-  %region1 = alloca [980 x i32], align 8
-  %region2 = alloca [980 x i32], align 8
-  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
-  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
+  %region1 = alloca [978 x i32], align 8
+  %region2 = alloca [978 x i32], align 8
+  %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   store volatile i32 %i0, i32 *%vptr
@@ -239,26 +241,26 @@ define void @f10(i32 *%vptr) {
   ret void
 }
 
-; And again with maximum register pressure.  The only spill slot that the
-; NOFP case needs is the emergency one, so the offsets are the same as for f2.
+; And again with maximum register pressure.  The only spill slots that the
+; NOFP case needs are the emergency ones, so the offsets are the same as for f2.
 ; However, the FP case uses %r11 as the frame pointer and must therefore
 ; spill a second register.  This leads to an extra displacement of 8.
 define void @f11(i32 *%vptr) {
-; CHECK-NOFP: f11:
+; CHECK-NOFP-LABEL: f11:
 ; CHECK-NOFP: stmg %r6, %r15,
-; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 ; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
 ; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
-; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: lmg %r6, %r15,
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f11:
+; CHECK-FP-LABEL: f11:
 ; CHECK-FP: stmg %r6, %r15,
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 ; CHECK-FP: lay [[REGISTER]], 4096(%r11)
 ; CHECK-FP: mvhi 8([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: lmg %r6, %r15,
 ; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
@@ -275,10 +277,10 @@ define void @f11(i32 *%vptr) {
   %i12 = load volatile i32 *%vptr
   %i13 = load volatile i32 *%vptr
   %i14 = load volatile i32 *%vptr
-  %region1 = alloca [980 x i32], align 8
-  %region2 = alloca [980 x i32], align 8
-  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
-  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
+  %region1 = alloca [978 x i32], align 8
+  %region2 = alloca [978 x i32], align 8
+  %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2
   store volatile i32 42, i32 *%ptr1
   store volatile i32 42, i32 *%ptr2
   store volatile i32 %i0, i32 *%vptr
diff --git a/test/CodeGen/SystemZ/frame-14.ll b/test/CodeGen/SystemZ/frame-14.ll
index d8ff0a54a761..3b48179c40b6 100644
--- a/test/CodeGen/SystemZ/frame-14.ll
+++ b/test/CodeGen/SystemZ/frame-14.ll
@@ -1,9 +1,13 @@
 ; Test the handling of base + displacement addresses for large frames,
 ; in cases where both 12-bit and 20-bit displacements are allowed.
+; The tests here assume z10 register pressure, without the high words
+; being available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
-
 ; This file tests what happens when a displacement is converted from
 ; being relative to the start of a frame object to being relative to
 ; the frame itself.  In some cases the test is only possible if two
@@ -16,21 +20,21 @@
 
 ; First check the highest offset that is in range of the 12-bit form.
 ;
-; The last in-range doubleword offset is 4088.  Since the frame has an
-; emergency spill slot at 160(%r15), the amount that we need to allocate
-; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes.
+; The last in-range doubleword offset is 4088.  Since the frame has two
+; emergency spill slots at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is 4088 - 176 = 3912 bytes.
 define void @f1() {
-; CHECK-NOFP: f1:
+; CHECK-NOFP-LABEL: f1:
 ; CHECK-NOFP: mvi 4095(%r15), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f1:
+; CHECK-FP-LABEL: f1:
 ; CHECK-FP: mvi 4095(%r11), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [3920 x i8], align 8
-  %region2 = alloca [3920 x i8], align 8
-  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7
-  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7
+  %region1 = alloca [3912 x i8], align 8
+  %region2 = alloca [3912 x i8], align 8
+  %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 7
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -38,17 +42,17 @@ define void @f1() {
 
 ; Test the first offset that is out-of-range of the 12-bit form.
 define void @f2() {
-; CHECK-NOFP: f2:
+; CHECK-NOFP-LABEL: f2:
 ; CHECK-NOFP: mviy 4096(%r15), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f2:
+; CHECK-FP-LABEL: f2:
 ; CHECK-FP: mviy 4096(%r11), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [3920 x i8], align 8
-  %region2 = alloca [3920 x i8], align 8
-  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [3912 x i8], align 8
+  %region2 = alloca [3912 x i8], align 8
+  %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 8
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -57,19 +61,19 @@ define void @f2() {
 ; Test the last offset that is in range of the 20-bit form.
 ;
 ; The last in-range doubleword offset is 524280, so by the same reasoning
-; as above, we need to allocate objects of 524280 - 168 = 524122 bytes.
+; as above, we need to allocate objects of 524280 - 176 = 524104 bytes.
 define void @f3() {
-; CHECK-NOFP: f3:
+; CHECK-NOFP-LABEL: f3:
 ; CHECK-NOFP: mviy 524287(%r15), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f3:
+; CHECK-FP-LABEL: f3:
 ; CHECK-FP: mviy 524287(%r11), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 7
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -79,21 +83,21 @@ define void @f3() {
 ; and the offset is also out of LAY's range, so expect a constant load
 ; followed by an addition.
 define void @f4() {
-; CHECK-NOFP: f4:
+; CHECK-NOFP-LABEL: f4:
 ; CHECK-NOFP: llilh %r1, 8
 ; CHECK-NOFP: agr %r1, %r15
 ; CHECK-NOFP: mvi 0(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f4:
+; CHECK-FP-LABEL: f4:
 ; CHECK-FP: llilh %r1, 8
 ; CHECK-FP: agr %r1, %r11
 ; CHECK-FP: mvi 0(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -102,21 +106,21 @@ define void @f4() {
 ; Add 4095 to the previous offset, to test the other end of the MVI range.
 ; The instruction will actually be STCY before frame lowering.
 define void @f5() {
-; CHECK-NOFP: f5:
+; CHECK-NOFP-LABEL: f5:
 ; CHECK-NOFP: llilh %r1, 8
 ; CHECK-NOFP: agr %r1, %r15
 ; CHECK-NOFP: mvi 4095(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f5:
+; CHECK-FP-LABEL: f5:
 ; CHECK-FP: llilh %r1, 8
 ; CHECK-FP: agr %r1, %r11
 ; CHECK-FP: mvi 4095(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4103
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4103
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -124,21 +128,21 @@ define void @f5() {
 
 ; Test the next offset after that, which uses MVIY instead of MVI.
 define void @f6() {
-; CHECK-NOFP: f6:
+; CHECK-NOFP-LABEL: f6:
 ; CHECK-NOFP: llilh %r1, 8
 ; CHECK-NOFP: agr %r1, %r15
 ; CHECK-NOFP: mviy 4096(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f6:
+; CHECK-FP-LABEL: f6:
 ; CHECK-FP: llilh %r1, 8
 ; CHECK-FP: agr %r1, %r11
 ; CHECK-FP: mviy 4096(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4104
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4104
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -149,21 +153,21 @@ define void @f6() {
 ; anchors 0x10000 bytes apart, so that the high part can be loaded using
 ; LLILH while still using MVI in more cases than 0x40000 anchors would.
 define void @f7() {
-; CHECK-NOFP: f7:
+; CHECK-NOFP-LABEL: f7:
 ; CHECK-NOFP: llilh %r1, 23
 ; CHECK-NOFP: agr %r1, %r15
 ; CHECK-NOFP: mviy 65535(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f7:
+; CHECK-FP-LABEL: f7:
 ; CHECK-FP: llilh %r1, 23
 ; CHECK-FP: agr %r1, %r11
 ; CHECK-FP: mviy 65535(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [1048408 x i8], align 8
-  %region2 = alloca [1048408 x i8], align 8
-  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
-  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
+  %region1 = alloca [1048400 x i8], align 8
+  %region2 = alloca [1048400 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048400 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048400 x i8]* %region2, i64 0, i64 524287
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -172,21 +176,21 @@ define void @f7() {
 ; Keep the object-relative offset the same but bump the size of the
 ; objects by one doubleword.
 define void @f8() {
-; CHECK-NOFP: f8:
+; CHECK-NOFP-LABEL: f8:
 ; CHECK-NOFP: llilh %r1, 24
 ; CHECK-NOFP: agr %r1, %r15
 ; CHECK-NOFP: mvi 7(%r1), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f8:
+; CHECK-FP-LABEL: f8:
 ; CHECK-FP: llilh %r1, 24
 ; CHECK-FP: agr %r1, %r11
 ; CHECK-FP: mvi 7(%r1), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [1048416 x i8], align 8
-  %region2 = alloca [1048416 x i8], align 8
-  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287
-  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287
+  %region1 = alloca [1048408 x i8], align 8
+  %region2 = alloca [1048408 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
@@ -200,56 +204,56 @@ define void @f8() {
 ; The LA then gets lowered into the LLILH/LA form.  The exact sequence
 ; isn't that important though.
 define void @f9() {
-; CHECK-NOFP: f9:
+; CHECK-NOFP-LABEL: f9:
 ; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16
 ; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15)
 ; CHECK-NOFP: agfi [[R2]], 524288
 ; CHECK-NOFP: mvi 0([[R2]]), 42
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f9:
+; CHECK-FP-LABEL: f9:
 ; CHECK-FP: llilh [[R1:%r[1-5]]], 16
 ; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11)
 ; CHECK-FP: agfi [[R2]], 524288
 ; CHECK-FP: mvi 0([[R2]]), 42
 ; CHECK-FP: br %r14
-  %region1 = alloca [1048416 x i8], align 8
-  %region2 = alloca [1048416 x i8], align 8
-  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288
-  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288
+  %region1 = alloca [1048408 x i8], align 8
+  %region2 = alloca [1048408 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524288
+  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524288
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   ret void
 }
 
-; Repeat f4 in a case that needs the emergency spill slot (because all
+; Repeat f4 in a case that needs the emergency spill slots (because all
 ; call-clobbered registers are live and no call-saved ones have been
 ; allocated).
 define void @f10(i32 *%vptr) {
-; CHECK-NOFP: f10:
-; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP-LABEL: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 ; CHECK-NOFP: llilh [[REGISTER]], 8
 ; CHECK-NOFP: agr [[REGISTER]], %r15
 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42
-; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f10:
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP-LABEL: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 ; CHECK-FP: llilh [[REGISTER]], 8
 ; CHECK-FP: agr [[REGISTER]], %r11
 ; CHECK-FP: mvi 0([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
   %i1 = load volatile i32 *%vptr
   %i3 = load volatile i32 *%vptr
   %i4 = load volatile i32 *%vptr
   %i5 = load volatile i32 *%vptr
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   store volatile i32 %i0, i32 *%vptr
@@ -260,28 +264,28 @@ define void @f10(i32 *%vptr) {
   ret void
 }
 
-; And again with maximum register pressure.  The only spill slot that the
-; NOFP case needs is the emergency one, so the offsets are the same as for f4.
+; And again with maximum register pressure.  The only spill slots that the
+; NOFP case needs are the emergency ones, so the offsets are the same as for f4.
 ; However, the FP case uses %r11 as the frame pointer and must therefore
 ; spill a second register.  This leads to an extra displacement of 8.
 define void @f11(i32 *%vptr) {
-; CHECK-NOFP: f11:
+; CHECK-NOFP-LABEL: f11:
 ; CHECK-NOFP: stmg %r6, %r15,
-; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 ; CHECK-NOFP: llilh [[REGISTER]], 8
 ; CHECK-NOFP: agr [[REGISTER]], %r15
 ; CHECK-NOFP: mvi 0([[REGISTER]]), 42
-; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: lmg %r6, %r15,
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f11:
+; CHECK-FP-LABEL: f11:
 ; CHECK-FP: stmg %r6, %r15,
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 ; CHECK-FP: llilh [[REGISTER]], 8
 ; CHECK-FP: agr [[REGISTER]], %r11
 ; CHECK-FP: mvi 8([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: lmg %r6, %r15,
 ; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
@@ -298,10 +302,10 @@ define void @f11(i32 *%vptr) {
   %i12 = load volatile i32 *%vptr
   %i13 = load volatile i32 *%vptr
   %i14 = load volatile i32 *%vptr
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
   store volatile i8 42, i8 *%ptr1
   store volatile i8 42, i8 *%ptr2
   store volatile i32 %i0, i32 *%vptr
diff --git a/test/CodeGen/SystemZ/frame-15.ll b/test/CodeGen/SystemZ/frame-15.ll
index bc87e174d0b6..b3c95e73c1af 100644
--- a/test/CodeGen/SystemZ/frame-15.ll
+++ b/test/CodeGen/SystemZ/frame-15.ll
@@ -1,8 +1,11 @@
 ; Test the handling of base + index + 12-bit displacement addresses for
-; large frames, in cases where no 20-bit form exists.
+; large frames, in cases where no 20-bit form exists.  The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 
 declare void @foo(float *%ptr1, float *%ptr2)
 
@@ -19,25 +22,25 @@ declare void @foo(float *%ptr1, float *%ptr2)
 ; First check the highest in-range offset after conversion, which is 4092
 ; for word-addressing instructions like LDEB.
 ;
-; The last in-range doubleword offset is 4088.  Since the frame has an
-; emergency spill slot at 160(%r15), the amount that we need to allocate
-; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980
+; The last in-range doubleword offset is 4088.  Since the frame has two
+; emergency spill slots at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is (4088 - 176) / 4 = 978
 ; words.
 define void @f1(double *%dst) {
-; CHECK-NOFP: f1:
+; CHECK-NOFP-LABEL: f1:
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f1:
+; CHECK-FP-LABEL: f1:
 ; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x float], align 8
-  %region2 = alloca [980 x float], align 8
-  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [978 x float], align 8
+  %region2 = alloca [978 x float], align 8
+  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 1
-  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 1
+  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 1
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -49,22 +52,22 @@ define void @f1(double *%dst) {
 
 ; Test the first out-of-range offset.
 define void @f2(double *%dst) {
-; CHECK-NOFP: f2:
+; CHECK-NOFP-LABEL: f2:
 ; CHECK-NOFP: lghi %r1, 4096
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f2:
+; CHECK-FP-LABEL: f2:
 ; CHECK-FP: lghi %r1, 4096
 ; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x float], align 8
-  %region2 = alloca [980 x float], align 8
-  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [978 x float], align 8
+  %region2 = alloca [978 x float], align 8
+  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2
-  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2
+  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -76,22 +79,22 @@ define void @f2(double *%dst) {
 
 ; Test the next offset after that.
 define void @f3(double *%dst) {
-; CHECK-NOFP: f3:
+; CHECK-NOFP-LABEL: f3:
 ; CHECK-NOFP: lghi %r1, 4096
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f3:
+; CHECK-FP-LABEL: f3:
 ; CHECK-FP: lghi %r1, 4096
 ; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x float], align 8
-  %region2 = alloca [980 x float], align 8
-  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [978 x float], align 8
+  %region2 = alloca [978 x float], align 8
+  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 3
-  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 3
+  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 3
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -103,22 +106,22 @@ define void @f3(double *%dst) {
 
 ; Add 4096 bytes (1024 words) to the size of each object and repeat.
 define void @f4(double *%dst) {
-; CHECK-NOFP: f4:
+; CHECK-NOFP-LABEL: f4:
 ; CHECK-NOFP: lghi %r1, 4096
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f4:
+; CHECK-FP-LABEL: f4:
 ; CHECK-FP: lghi %r1, 4096
 ; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [2004 x float], align 8
-  %region2 = alloca [2004 x float], align 8
-  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [2002 x float], align 8
+  %region2 = alloca [2002 x float], align 8
+  %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1
-  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1
+  %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 1
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -130,22 +133,22 @@ define void @f4(double *%dst) {
 
 ; ...as above.
 define void @f5(double *%dst) {
-; CHECK-NOFP: f5:
+; CHECK-NOFP-LABEL: f5:
 ; CHECK-NOFP: lghi %r1, 8192
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f5:
+; CHECK-FP-LABEL: f5:
 ; CHECK-FP: lghi %r1, 8192
 ; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [2004 x float], align 8
-  %region2 = alloca [2004 x float], align 8
-  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [2002 x float], align 8
+  %region2 = alloca [2002 x float], align 8
+  %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 2
-  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 2
+  %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 2
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -157,22 +160,22 @@ define void @f5(double *%dst) {
 
 ; ...as above.
 define void @f6(double *%dst) {
-; CHECK-NOFP: f6:
+; CHECK-NOFP-LABEL: f6:
 ; CHECK-NOFP: lghi %r1, 8192
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f6:
+; CHECK-FP-LABEL: f6:
 ; CHECK-FP: lghi %r1, 8192
 ; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [2004 x float], align 8
-  %region2 = alloca [2004 x float], align 8
-  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [2002 x float], align 8
+  %region2 = alloca [2002 x float], align 8
+  %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 3
-  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 3
+  %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 3
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -183,25 +186,25 @@ define void @f6(double *%dst) {
 }
 
 ; Now try an offset of 4092 from the start of the object, with the object
-; being at offset 8192.  This time we need objects of (8192 - 168) / 4 = 2006
+; being at offset 8192.  This time we need objects of (8192 - 168) / 4 = 2004
 ; words.
 define void @f7(double *%dst) {
-; CHECK-NOFP: f7:
+; CHECK-NOFP-LABEL: f7:
 ; CHECK-NOFP: lghi %r1, 8192
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f7:
+; CHECK-FP-LABEL: f7:
 ; CHECK-FP: lghi %r1, 8192
 ; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [2006 x float], align 8
-  %region2 = alloca [2006 x float], align 8
-  %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [2004 x float], align 8
+  %region2 = alloca [2004 x float], align 8
+  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023
-  %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023
+  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1023
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -214,22 +217,22 @@ define void @f7(double *%dst) {
 ; Keep the object-relative offset the same but bump the size of the
 ; objects by one doubleword.
 define void @f8(double *%dst) {
-; CHECK-NOFP: f8:
+; CHECK-NOFP-LABEL: f8:
 ; CHECK-NOFP: lghi %r1, 12288
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f8:
+; CHECK-FP-LABEL: f8:
 ; CHECK-FP: lghi %r1, 12288
 ; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [2008 x float], align 8
-  %region2 = alloca [2008 x float], align 8
-  %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [2006 x float], align 8
+  %region2 = alloca [2006 x float], align 8
+  %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1023
-  %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1023
+  %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -243,22 +246,22 @@ define void @f8(double *%dst) {
 ; should force an LAY from the outset.  We don't yet do any kind of anchor
 ; optimization, so there should be no offset on the LDEB itself.
 define void @f9(double *%dst) {
-; CHECK-NOFP: f9:
+; CHECK-NOFP-LABEL: f9:
 ; CHECK-NOFP: lay %r1, 12296(%r15)
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f9:
+; CHECK-FP-LABEL: f9:
 ; CHECK-FP: lay %r1, 12296(%r11)
 ; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1)
 ; CHECK-FP: br %r14
-  %region1 = alloca [2008 x float], align 8
-  %region2 = alloca [2008 x float], align 8
-  %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [2006 x float], align 8
+  %region2 = alloca [2006 x float], align 8
+  %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1024
-  %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1024
+  %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1024
+  %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1024
   %float1 = load float *%ptr1
   %float2 = load float *%ptr2
   %double1 = fpext float %float1 to double
@@ -268,31 +271,31 @@ define void @f9(double *%dst) {
   ret void
 }
 
-; Repeat f2 in a case that needs the emergency spill slot, because all
+; Repeat f2 in a case that needs the emergency spill slots, because all
 ; call-clobbered and allocated call-saved registers are live.  Note that
 ; %vptr and %dst are copied to call-saved registers, freeing up %r2 and
 ; %r3 during the main test.
 define void @f10(i32 *%vptr, double *%dst) {
-; CHECK-NOFP: f10:
-; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP-LABEL: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 ; CHECK-NOFP: lghi [[REGISTER]], 4096
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r15)
-; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f10:
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP-LABEL: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 ; CHECK-FP: lghi [[REGISTER]], 4096
 ; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r11)
-; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x float], align 8
-  %region2 = alloca [980 x float], align 8
-  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [978 x float], align 8
+  %region2 = alloca [978 x float], align 8
+  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2
-  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2
+  %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
   %i0 = load volatile i32 *%vptr
   %i1 = load volatile i32 *%vptr
   %i2 = load volatile i32 *%vptr
@@ -318,24 +321,24 @@ define void @f10(i32 *%vptr, double *%dst) {
 
 ; Repeat f2 in a case where the index register is already occupied.
 define void @f11(double *%dst, i64 %index) {
-; CHECK-NOFP: f11:
+; CHECK-NOFP-LABEL: f11:
 ; CHECK-NOFP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3
 ; CHECK-NOFP: lay %r1, 4096(%r15)
 ; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f11:
+; CHECK-FP-LABEL: f11:
 ; CHECK-FP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3
 ; CHECK-FP: lay %r1, 4096(%r11)
 ; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1)
 ; CHECK-FP: br %r14
-  %region1 = alloca [980 x float], align 8
-  %region2 = alloca [980 x float], align 8
-  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
-  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  %region1 = alloca [978 x float], align 8
+  %region2 = alloca [978 x float], align 8
+  %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
   call void @foo(float *%start1, float *%start2)
-  %elem1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2
-  %elem2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2
+  %elem1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
+  %elem2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
   %base1 = ptrtoint float *%elem1 to i64
   %base2 = ptrtoint float *%elem2 to i64
   %addr1 = add i64 %base1, %index
diff --git a/test/CodeGen/SystemZ/frame-16.ll b/test/CodeGen/SystemZ/frame-16.ll
index cc5529f920ca..f7e2dfa35149 100644
--- a/test/CodeGen/SystemZ/frame-16.ll
+++ b/test/CodeGen/SystemZ/frame-16.ll
@@ -1,8 +1,12 @@
 ; Test the handling of base + index + displacement addresses for large frames,
 ; in cases where both 12-bit and 20-bit displacements are allowed.
+; The tests here assume z10 register pressure, without the high words
+; being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | \
+; RUN:   FileCheck -check-prefix=CHECK-FP %s
 
 ; This file tests what happens when a displacement is converted from
 ; being relative to the start of a frame object to being relative to
@@ -16,21 +20,21 @@
 
 ; First check the highest offset that is in range of the 12-bit form.
 ;
-; The last in-range doubleword offset is 4088.  Since the frame has an
-; emergency spill slot at 160(%r15), the amount that we need to allocate
-; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes.
+; The last in-range doubleword offset is 4088.  Since the frame has two
+; emergency spill slots at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is 4088 - 176 = 3912 bytes.
 define void @f1(i8 %byte) {
-; CHECK-NOFP: f1:
+; CHECK-NOFP-LABEL: f1:
 ; CHECK-NOFP: stc %r2, 4095(%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f1:
+; CHECK-FP-LABEL: f1:
 ; CHECK-FP: stc %r2, 4095(%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [3920 x i8], align 8
-  %region2 = alloca [3920 x i8], align 8
-  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7
-  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7
+  %region1 = alloca [3912 x i8], align 8
+  %region2 = alloca [3912 x i8], align 8
+  %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 7
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -38,17 +42,17 @@ define void @f1(i8 %byte) {
 
 ; Test the first offset that is out-of-range of the 12-bit form.
 define void @f2(i8 %byte) {
-; CHECK-NOFP: f2:
+; CHECK-NOFP-LABEL: f2:
 ; CHECK-NOFP: stcy %r2, 4096(%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f2:
+; CHECK-FP-LABEL: f2:
 ; CHECK-FP: stcy %r2, 4096(%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [3920 x i8], align 8
-  %region2 = alloca [3920 x i8], align 8
-  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [3912 x i8], align 8
+  %region2 = alloca [3912 x i8], align 8
+  %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 8
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -57,19 +61,19 @@ define void @f2(i8 %byte) {
 ; Test the last offset that is in range of the 20-bit form.
 ;
 ; The last in-range doubleword offset is 524280, so by the same reasoning
-; as above, we need to allocate objects of 524280 - 168 = 524122 bytes.
+; as above, we need to allocate objects of 524280 - 176 = 524104 bytes.
 define void @f3(i8 %byte) {
-; CHECK-NOFP: f3:
+; CHECK-NOFP-LABEL: f3:
 ; CHECK-NOFP: stcy %r2, 524287(%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f3:
+; CHECK-FP-LABEL: f3:
 ; CHECK-FP: stcy %r2, 524287(%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 7
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -79,19 +83,19 @@ define void @f3(i8 %byte) {
 ; and the offset is also out of LAY's range, so expect a constant load
 ; followed by an addition.
 define void @f4(i8 %byte) {
-; CHECK-NOFP: f4:
+; CHECK-NOFP-LABEL: f4:
 ; CHECK-NOFP: llilh %r1, 8
 ; CHECK-NOFP: stc %r2, 0(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f4:
+; CHECK-FP-LABEL: f4:
 ; CHECK-FP: llilh %r1, 8
 ; CHECK-FP: stc %r2, 0(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -100,19 +104,19 @@ define void @f4(i8 %byte) {
 ; Add 4095 to the previous offset, to test the other end of the STC range.
 ; The instruction will actually be STCY before frame lowering.
 define void @f5(i8 %byte) {
-; CHECK-NOFP: f5:
+; CHECK-NOFP-LABEL: f5:
 ; CHECK-NOFP: llilh %r1, 8
 ; CHECK-NOFP: stc %r2, 4095(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f5:
+; CHECK-FP-LABEL: f5:
 ; CHECK-FP: llilh %r1, 8
 ; CHECK-FP: stc %r2, 4095(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4103
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4103
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -120,19 +124,19 @@ define void @f5(i8 %byte) {
 
 ; Test the next offset after that, which uses STCY instead of STC.
 define void @f6(i8 %byte) {
-; CHECK-NOFP: f6:
+; CHECK-NOFP-LABEL: f6:
 ; CHECK-NOFP: llilh %r1, 8
 ; CHECK-NOFP: stcy %r2, 4096(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f6:
+; CHECK-FP-LABEL: f6:
 ; CHECK-FP: llilh %r1, 8
 ; CHECK-FP: stcy %r2, 4096(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4104
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4104
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -143,19 +147,19 @@ define void @f6(i8 %byte) {
 ; anchors 0x10000 bytes apart, so that the high part can be loaded using
 ; LLILH while still using STC in more cases than 0x40000 anchors would.
 define void @f7(i8 %byte) {
-; CHECK-NOFP: f7:
+; CHECK-NOFP-LABEL: f7:
 ; CHECK-NOFP: llilh %r1, 23
 ; CHECK-NOFP: stcy %r2, 65535(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f7:
+; CHECK-FP-LABEL: f7:
 ; CHECK-FP: llilh %r1, 23
 ; CHECK-FP: stcy %r2, 65535(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [1048408 x i8], align 8
-  %region2 = alloca [1048408 x i8], align 8
-  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
-  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
+  %region1 = alloca [1048400 x i8], align 8
+  %region2 = alloca [1048400 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048400 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048400 x i8]* %region2, i64 0, i64 524287
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -164,19 +168,19 @@ define void @f7(i8 %byte) {
 ; Keep the object-relative offset the same but bump the size of the
 ; objects by one doubleword.
 define void @f8(i8 %byte) {
-; CHECK-NOFP: f8:
+; CHECK-NOFP-LABEL: f8:
 ; CHECK-NOFP: llilh %r1, 24
 ; CHECK-NOFP: stc %r2, 7(%r1,%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f8:
+; CHECK-FP-LABEL: f8:
 ; CHECK-FP: llilh %r1, 24
 ; CHECK-FP: stc %r2, 7(%r1,%r11)
 ; CHECK-FP: br %r14
-  %region1 = alloca [1048416 x i8], align 8
-  %region2 = alloca [1048416 x i8], align 8
-  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287
-  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287
+  %region1 = alloca [1048408 x i8], align 8
+  %region2 = alloca [1048408 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
@@ -190,53 +194,53 @@ define void @f8(i8 %byte) {
 ; The LA then gets lowered into the LLILH/LA form.  The exact sequence
 ; isn't that important though.
 define void @f9(i8 %byte) {
-; CHECK-NOFP: f9:
+; CHECK-NOFP-LABEL: f9:
 ; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16
 ; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15)
 ; CHECK-NOFP: agfi [[R2]], 524288
 ; CHECK-NOFP: stc %r2, 0([[R2]])
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f9:
+; CHECK-FP-LABEL: f9:
 ; CHECK-FP: llilh [[R1:%r[1-5]]], 16
 ; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11)
 ; CHECK-FP: agfi [[R2]], 524288
 ; CHECK-FP: stc %r2, 0([[R2]])
 ; CHECK-FP: br %r14
-  %region1 = alloca [1048416 x i8], align 8
-  %region2 = alloca [1048416 x i8], align 8
-  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288
-  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288
+  %region1 = alloca [1048408 x i8], align 8
+  %region2 = alloca [1048408 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524288
+  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524288
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
 }
 
-; Repeat f4 in a case that needs the emergency spill slot (because all
+; Repeat f4 in a case that needs the emergency spill slots (because all
 ; call-clobbered registers are live and no call-saved ones have been
 ; allocated).
 define void @f10(i32 *%vptr, i8 %byte) {
-; CHECK-NOFP: f10:
-; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP-LABEL: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 ; CHECK-NOFP: llilh [[REGISTER]], 8
 ; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15)
-; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f10:
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP-LABEL: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 ; CHECK-FP: llilh [[REGISTER]], 8
 ; CHECK-FP: stc %r3, 0([[REGISTER]],%r11)
-; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
   %i1 = load volatile i32 *%vptr
   %i4 = load volatile i32 *%vptr
   %i5 = load volatile i32 *%vptr
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   store volatile i32 %i0, i32 *%vptr
@@ -246,26 +250,26 @@ define void @f10(i32 *%vptr, i8 %byte) {
   ret void
 }
 
-; And again with maximum register pressure.  The only spill slot that the
-; NOFP case needs is the emergency one, so the offsets are the same as for f4.
+; And again with maximum register pressure.  The only spill slots that the
+; NOFP case needs are the emergency ones, so the offsets are the same as for f4.
 ; However, the FP case uses %r11 as the frame pointer and must therefore
 ; spill a second register.  This leads to an extra displacement of 8.
 define void @f11(i32 *%vptr, i8 %byte) {
-; CHECK-NOFP: f11:
+; CHECK-NOFP-LABEL: f11:
 ; CHECK-NOFP: stmg %r6, %r15,
-; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r15)
 ; CHECK-NOFP: llilh [[REGISTER]], 8
 ; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15)
-; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: lmg %r6, %r15,
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f11:
+; CHECK-FP-LABEL: f11:
 ; CHECK-FP: stmg %r6, %r15,
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
 ; CHECK-FP: llilh [[REGISTER]], 8
 ; CHECK-FP: stc %r3, 8([[REGISTER]],%r11)
-; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: lmg %r6, %r15,
 ; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
@@ -281,10 +285,10 @@ define void @f11(i32 *%vptr, i8 %byte) {
   %i12 = load volatile i32 *%vptr
   %i13 = load volatile i32 *%vptr
   %i14 = load volatile i32 *%vptr
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   store volatile i32 %i0, i32 *%vptr
@@ -305,22 +309,22 @@ define void @f11(i32 *%vptr, i8 %byte) {
 
 ; Repeat f4 in a case where the index register is already occupied.
 define void @f12(i8 %byte, i64 %index) {
-; CHECK-NOFP: f12:
+; CHECK-NOFP-LABEL: f12:
 ; CHECK-NOFP: llilh %r1, 8
 ; CHECK-NOFP: agr %r1, %r15
 ; CHECK-NOFP: stc %r2, 0(%r3,%r1)
 ; CHECK-NOFP: br %r14
 ;
-; CHECK-FP: f12:
+; CHECK-FP-LABEL: f12:
 ; CHECK-FP: llilh %r1, 8
 ; CHECK-FP: agr %r1, %r11
 ; CHECK-FP: stc %r2, 0(%r3,%r1)
 ; CHECK-FP: br %r14
-  %region1 = alloca [524112 x i8], align 8
-  %region2 = alloca [524112 x i8], align 8
+  %region1 = alloca [524104 x i8], align 8
+  %region2 = alloca [524104 x i8], align 8
   %index1 = add i64 %index, 8
-  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 %index1
-  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 %index1
+  %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 %index1
+  %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 %index1
   store volatile i8 %byte, i8 *%ptr1
   store volatile i8 %byte, i8 *%ptr2
   ret void
diff --git a/test/CodeGen/SystemZ/frame-17.ll b/test/CodeGen/SystemZ/frame-17.ll
index 613d9f879558..97cf83dfd78e 100644
--- a/test/CodeGen/SystemZ/frame-17.ll
+++ b/test/CodeGen/SystemZ/frame-17.ll
@@ -6,7 +6,7 @@
 ; 4-byte spill slot, rounded to 8 bytes.  The frame size should be exactly
 ; 160 + 8 * 8 = 232.
 define void @f1(float *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: aghi %r15, -232
 ; CHECK: std %f8, 224(%r15)
 ; CHECK: std %f9, 216(%r15)
@@ -70,7 +70,7 @@ define void @f1(float *%ptr) {
 
 ; Same for doubles, except that the full spill slot is used.
 define void @f2(double *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r15, -232
 ; CHECK: std %f8, 224(%r15)
 ; CHECK: std %f9, 216(%r15)
@@ -131,7 +131,7 @@ define void @f2(double *%ptr) {
 
 ; The long double case needs a 16-byte spill slot.
 define void @f3(fp128 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: aghi %r15, -240
 ; CHECK: std %f8, 232(%r15)
 ; CHECK: std %f9, 224(%r15)
diff --git a/test/CodeGen/SystemZ/frame-18.ll b/test/CodeGen/SystemZ/frame-18.ll
index a9977ed04b42..21dfc1238a13 100644
--- a/test/CodeGen/SystemZ/frame-18.ll
+++ b/test/CodeGen/SystemZ/frame-18.ll
@@ -1,11 +1,12 @@
-; Test spilling of GPRs.
+; Test spilling of GPRs.  The tests here assume z10 register pressure,
+; without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; We need to allocate a 4-byte spill slot, rounded to 8 bytes.  The frame
 ; size should be exactly 160 + 8 = 168.
 define void @f1(i32 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stmg %r6, %r15, 48(%r15)
 ; CHECK: aghi %r15, -168
 ; CHECK-NOT: 160(%r15)
@@ -50,7 +51,7 @@ define void @f1(i32 *%ptr) {
 
 ; Same for i64, except that the full spill slot is used.
 define void @f2(i64 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: stmg %r6, %r15, 48(%r15)
 ; CHECK: aghi %r15, -168
 ; CHECK: stg [[REGISTER:%r[0-9]+]], 160(%r15)
diff --git a/test/CodeGen/SystemZ/insert-01.ll b/test/CodeGen/SystemZ/insert-01.ll
index 98ddf56959bf..0b54e85dc4ed 100644
--- a/test/CodeGen/SystemZ/insert-01.ll
+++ b/test/CodeGen/SystemZ/insert-01.ll
@@ -5,7 +5,7 @@
 ; Check a plain insertion with (or (and ... -0xff) (zext (load ....))).
 ; The whole sequence can be performed by IC.
 define i32 @f1(i32 %orig, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -18,7 +18,7 @@ define i32 @f1(i32 %orig, i8 *%ptr) {
 
 ; Like f1, but with the operands reversed.
 define i32 @f2(i32 %orig, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -32,7 +32,7 @@ define i32 @f2(i32 %orig, i8 *%ptr) {
 ; Check a case where more bits than lower 8 are masked out of the
 ; register value.  We can use IC but must keep the original mask.
 define i32 @f3(i32 %orig, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -45,7 +45,7 @@ define i32 @f3(i32 %orig, i8 *%ptr) {
 
 ; Like f3, but with the operands reversed.
 define i32 @f4(i32 %orig, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -58,7 +58,7 @@ define i32 @f4(i32 %orig, i8 *%ptr) {
 
 ; Check a case where the low 8 bits are cleared by a shift left.
 define i32 @f5(i32 %orig, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sll %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -71,7 +71,7 @@ define i32 @f5(i32 %orig, i8 *%ptr) {
 
 ; Like f5, but with the operands reversed.
 define i32 @f6(i32 %orig, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sll %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -84,7 +84,7 @@ define i32 @f6(i32 %orig, i8 *%ptr) {
 
 ; Check insertions into a constant.
 define i32 @f7(i32 %orig, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lhi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -96,7 +96,7 @@ define i32 @f7(i32 %orig, i8 *%ptr) {
 
 ; Like f7, but with the operands reversed.
 define i32 @f8(i32 %orig, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lhi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -108,7 +108,7 @@ define i32 @f8(i32 %orig, i8 *%ptr) {
 
 ; Check the high end of the IC range.
 define i32 @f9(i32 %orig, i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ic %r2, 4095(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4095
@@ -121,7 +121,7 @@ define i32 @f9(i32 %orig, i8 *%src) {
 
 ; Check the next byte up, which should use ICY instead of IC.
 define i32 @f10(i32 %orig, i8 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: icy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4096
@@ -134,7 +134,7 @@ define i32 @f10(i32 %orig, i8 *%src) {
 
 ; Check the high end of the ICY range.
 define i32 @f11(i32 %orig, i8 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: icy %r2, 524287(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -148,7 +148,7 @@ define i32 @f11(i32 %orig, i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f12(i32 %orig, i8 *%src) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: agfi %r3, 524288
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -162,7 +162,7 @@ define i32 @f12(i32 %orig, i8 *%src) {
 
 ; Check the high end of the negative ICY range.
 define i32 @f13(i32 %orig, i8 *%src) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: icy %r2, -1(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -175,7 +175,7 @@ define i32 @f13(i32 %orig, i8 *%src) {
 
 ; Check the low end of the ICY range.
 define i32 @f14(i32 %orig, i8 *%src) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: icy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -189,7 +189,7 @@ define i32 @f14(i32 %orig, i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f15(i32 %orig, i8 *%src) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: agfi %r3, -524289
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -203,7 +203,7 @@ define i32 @f15(i32 %orig, i8 *%src) {
 
 ; Check that IC allows an index.
 define i32 @f16(i32 %orig, i8 *%src, i64 %index) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %ptr1 = getelementptr i8 *%src, i64 %index
@@ -217,7 +217,7 @@ define i32 @f16(i32 %orig, i8 *%src, i64 %index) {
 
 ; Check that ICY allows an index.
 define i32 @f17(i32 %orig, i8 *%src, i64 %index) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %ptr1 = getelementptr i8 *%src, i64 %index
diff --git a/test/CodeGen/SystemZ/insert-02.ll b/test/CodeGen/SystemZ/insert-02.ll
index 471889dede6a..7a85b0bee4d8 100644
--- a/test/CodeGen/SystemZ/insert-02.ll
+++ b/test/CodeGen/SystemZ/insert-02.ll
@@ -5,7 +5,7 @@
 ; Check a plain insertion with (or (and ... -0xff) (zext (load ....))).
 ; The whole sequence can be performed by IC.
 define i64 @f1(i64 %orig, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -18,7 +18,7 @@ define i64 @f1(i64 %orig, i8 *%ptr) {
 
 ; Like f1, but with the operands reversed.
 define i64 @f2(i64 %orig, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -32,7 +32,7 @@ define i64 @f2(i64 %orig, i8 *%ptr) {
 ; Check a case where more bits than lower 8 are masked out of the
 ; register value.  We can use IC but must keep the original mask.
 define i64 @f3(i64 %orig, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -45,7 +45,7 @@ define i64 @f3(i64 %orig, i8 *%ptr) {
 
 ; Like f3, but with the operands reversed.
 define i64 @f4(i64 %orig, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -58,7 +58,7 @@ define i64 @f4(i64 %orig, i8 *%ptr) {
 
 ; Check a case where the low 8 bits are cleared by a shift left.
 define i64 @f5(i64 %orig, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sllg %r2, %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -71,7 +71,7 @@ define i64 @f5(i64 %orig, i8 *%ptr) {
 
 ; Like f5, but with the operands reversed.
 define i64 @f6(i64 %orig, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r2, %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -84,7 +84,7 @@ define i64 @f6(i64 %orig, i8 *%ptr) {
 
 ; Check insertions into a constant.
 define i64 @f7(i64 %orig, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lghi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -96,7 +96,7 @@ define i64 @f7(i64 %orig, i8 *%ptr) {
 
 ; Like f7, but with the operands reversed.
 define i64 @f8(i64 %orig, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lghi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -108,7 +108,7 @@ define i64 @f8(i64 %orig, i8 *%ptr) {
 
 ; Check the high end of the IC range.
 define i64 @f9(i64 %orig, i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ic %r2, 4095(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4095
@@ -121,7 +121,7 @@ define i64 @f9(i64 %orig, i8 *%src) {
 
 ; Check the next byte up, which should use ICY instead of IC.
 define i64 @f10(i64 %orig, i8 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: icy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4096
@@ -134,7 +134,7 @@ define i64 @f10(i64 %orig, i8 *%src) {
 
 ; Check the high end of the ICY range.
 define i64 @f11(i64 %orig, i8 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: icy %r2, 524287(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -148,7 +148,7 @@ define i64 @f11(i64 %orig, i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f12(i64 %orig, i8 *%src) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: agfi %r3, 524288
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -162,7 +162,7 @@ define i64 @f12(i64 %orig, i8 *%src) {
 
 ; Check the high end of the negative ICY range.
 define i64 @f13(i64 %orig, i8 *%src) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: icy %r2, -1(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -175,7 +175,7 @@ define i64 @f13(i64 %orig, i8 *%src) {
 
 ; Check the low end of the ICY range.
 define i64 @f14(i64 %orig, i8 *%src) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: icy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -189,7 +189,7 @@ define i64 @f14(i64 %orig, i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f15(i64 %orig, i8 *%src) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: agfi %r3, -524289
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
@@ -203,7 +203,7 @@ define i64 @f15(i64 %orig, i8 *%src) {
 
 ; Check that IC allows an index.
 define i64 @f16(i64 %orig, i8 *%src, i64 %index) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %ptr1 = getelementptr i8 *%src, i64 %index
@@ -217,7 +217,7 @@ define i64 @f16(i64 %orig, i8 *%src, i64 %index) {
 
 ; Check that ICY allows an index.
 define i64 @f17(i64 %orig, i8 *%src, i64 %index) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %ptr1 = getelementptr i8 *%src, i64 %index
diff --git a/test/CodeGen/SystemZ/insert-03.ll b/test/CodeGen/SystemZ/insert-03.ll
index 261eabd1be7d..c3c1ae316c9f 100644
--- a/test/CodeGen/SystemZ/insert-03.ll
+++ b/test/CodeGen/SystemZ/insert-03.ll
@@ -5,7 +5,7 @@
 ; Check the lowest useful IILL value.  (We use NILL rather than IILL
 ; to clear 16 bits.)
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: ni
 ; CHECK: iill %r2, 1
 ; CHECK: br %r14
@@ -16,7 +16,7 @@ define i32 @f1(i32 %a) {
 
 ; Check a middle value.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: ni
 ; CHECK: iill %r2, 32769
 ; CHECK: br %r14
@@ -28,7 +28,7 @@ define i32 @f2(i32 %a) {
 ; Check the highest useful IILL value.  (We use OILL rather than IILL
 ; to set 16 bits.)
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: ni
 ; CHECK: iill %r2, 65534
 ; CHECK: br %r14
@@ -39,7 +39,7 @@ define i32 @f3(i32 %a) {
 
 ; Check the lowest useful IILH value.
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: ni
 ; CHECK: iilh %r2, 1
 ; CHECK: br %r14
@@ -50,7 +50,7 @@ define i32 @f4(i32 %a) {
 
 ; Check a middle value.
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: ni
 ; CHECK: iilh %r2, 32767
 ; CHECK: br %r14
@@ -61,7 +61,7 @@ define i32 @f5(i32 %a) {
 
 ; Check the highest useful IILH value.
 define i32 @f6(i32 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: ni
 ; CHECK: iilh %r2, 65534
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/insert-04.ll b/test/CodeGen/SystemZ/insert-04.ll
index 07f88b9859eb..5ce99dfcb7ba 100644
--- a/test/CodeGen/SystemZ/insert-04.ll
+++ b/test/CodeGen/SystemZ/insert-04.ll
@@ -5,7 +5,7 @@
 ; Check the lowest useful IILL value.  (We use NILL rather than IILL
 ; to clear 16 bits.)
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: ni
 ; CHECK: iill %r2, 1
 ; CHECK: br %r14
@@ -16,7 +16,7 @@ define i64 @f1(i64 %a) {
 
 ; Check a middle value.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: ni
 ; CHECK: iill %r2, 32769
 ; CHECK: br %r14
@@ -28,7 +28,7 @@ define i64 @f2(i64 %a) {
 ; Check the highest useful IILL value.  (We use OILL rather than IILL
 ; to set 16 bits.)
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: ni
 ; CHECK: iill %r2, 65534
 ; CHECK: br %r14
@@ -39,7 +39,7 @@ define i64 @f3(i64 %a) {
 
 ; Check the lowest useful IILH value.
 define i64 @f4(i64 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: ni
 ; CHECK: iilh %r2, 1
 ; CHECK: br %r14
@@ -50,7 +50,7 @@ define i64 @f4(i64 %a) {
 
 ; Check a middle value.
 define i64 @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: ni
 ; CHECK: iilh %r2, 32767
 ; CHECK: br %r14
@@ -61,7 +61,7 @@ define i64 @f5(i64 %a) {
 
 ; Check the highest useful IILH value.
 define i64 @f6(i64 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: ni
 ; CHECK: iilh %r2, 65534
 ; CHECK: br %r14
@@ -72,7 +72,7 @@ define i64 @f6(i64 %a) {
 
 ; Check the lowest useful IIHL value.
 define i64 @f7(i64 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: ni
 ; CHECK: iihl %r2, 1
 ; CHECK: br %r14
@@ -83,7 +83,7 @@ define i64 @f7(i64 %a) {
 
 ; Check a middle value.
 define i64 @f8(i64 %a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK-NOT: ni
 ; CHECK: iihl %r2, 32767
 ; CHECK: br %r14
@@ -94,7 +94,7 @@ define i64 @f8(i64 %a) {
 
 ; Check the highest useful IIHL value.
 define i64 @f9(i64 %a) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK-NOT: ni
 ; CHECK: iihl %r2, 65534
 ; CHECK: br %r14
@@ -105,7 +105,7 @@ define i64 @f9(i64 %a) {
 
 ; Check the lowest useful IIHH value.
 define i64 @f10(i64 %a) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: ni
 ; CHECK: iihh %r2, 1
 ; CHECK: br %r14
@@ -116,7 +116,7 @@ define i64 @f10(i64 %a) {
 
 ; Check a middle value.
 define i64 @f11(i64 %a) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK-NOT: ni
 ; CHECK: iihh %r2, 32767
 ; CHECK: br %r14
@@ -127,7 +127,7 @@ define i64 @f11(i64 %a) {
 
 ; Check the highest useful IIHH value.
 define i64 @f12(i64 %a) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK-NOT: ni
 ; CHECK: iihh %r2, 65534
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/insert-05.ll b/test/CodeGen/SystemZ/insert-05.ll
index da51676b99cf..b76859a568f3 100644
--- a/test/CodeGen/SystemZ/insert-05.ll
+++ b/test/CodeGen/SystemZ/insert-05.ll
@@ -4,7 +4,7 @@
 
 ; Prefer LHI over IILF for signed 16-bit constants.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: ni
 ; CHECK: lhi %r2, 1
 ; CHECK: br %r14
@@ -15,7 +15,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the LHI range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: ni
 ; CHECK: lhi %r2, 32767
 ; CHECK: br %r14
@@ -26,7 +26,7 @@ define i64 @f2(i64 %a) {
 
 ; Check the next value up, which should use IILF instead.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: ni
 ; CHECK: iilf %r2, 32768
 ; CHECK: br %r14
@@ -37,7 +37,7 @@ define i64 @f3(i64 %a) {
 
 ; Check a value in which the lower 16 bits are clear.
 define i64 @f4(i64 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: ni
 ; CHECK: iilf %r2, 65536
 ; CHECK: br %r14
@@ -48,7 +48,7 @@ define i64 @f4(i64 %a) {
 
 ; Check the highest useful IILF value (-0x8001).
 define i64 @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: ni
 ; CHECK: iilf %r2, 4294934527
 ; CHECK: br %r14
@@ -59,7 +59,7 @@ define i64 @f5(i64 %a) {
 
 ; Check the next value up, which should use LHI instead.
 define i64 @f6(i64 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: ni
 ; CHECK: lhi %r2, -32768
 ; CHECK: br %r14
@@ -71,7 +71,7 @@ define i64 @f6(i64 %a) {
 ; Check the highest useful LHI value.  (We use OILF for -1 instead, although
 ; LHI might be better there too.)
 define i64 @f7(i64 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: ni
 ; CHECK: lhi %r2, -2
 ; CHECK: br %r14
@@ -83,7 +83,7 @@ define i64 @f7(i64 %a) {
 ; Check that SRLG is still used if some of the high bits are known to be 0
 ; (and so might be removed from the mask).
 define i64 @f8(i64 %a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: srlg %r2, %r2, 1
 ; CHECK-NEXT: iilf %r2, 32768
 ; CHECK: br %r14
@@ -95,7 +95,7 @@ define i64 @f8(i64 %a) {
 
 ; Repeat f8 with addition, which is known to be equivalent to OR in this case.
 define i64 @f9(i64 %a) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: srlg %r2, %r2, 1
 ; CHECK-NEXT: iilf %r2, 32768
 ; CHECK: br %r14
@@ -107,7 +107,7 @@ define i64 @f9(i64 %a) {
 
 ; Repeat f8 with already-zero bits removed from the mask.
 define i64 @f10(i64 %a) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: srlg %r2, %r2, 1
 ; CHECK-NEXT: iilf %r2, 32768
 ; CHECK: br %r14
@@ -119,7 +119,7 @@ define i64 @f10(i64 %a) {
 
 ; Repeat f10 with addition, which is known to be equivalent to OR in this case.
 define i64 @f11(i64 %a) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: srlg %r2, %r2, 1
 ; CHECK-NEXT: iilf %r2, 32768
 ; CHECK: br %r14
@@ -131,7 +131,7 @@ define i64 @f11(i64 %a) {
 
 ; Check the lowest useful IIHF value.
 define i64 @f12(i64 %a) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK-NOT: ni
 ; CHECK: iihf %r2, 1
 ; CHECK: br %r14
@@ -142,7 +142,7 @@ define i64 @f12(i64 %a) {
 
 ; Check a value in which the lower 16 bits are clear.
 define i64 @f13(i64 %a) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK-NOT: ni
 ; CHECK: iihf %r2, 2147483648
 ; CHECK: br %r14
@@ -153,7 +153,7 @@ define i64 @f13(i64 %a) {
 
 ; Check the highest useful IIHF value (0xfffffffe).
 define i64 @f14(i64 %a) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK-NOT: ni
 ; CHECK: iihf %r2, 4294967294
 ; CHECK: br %r14
@@ -165,7 +165,7 @@ define i64 @f14(i64 %a) {
 ; Check a case in which some of the low 32 bits are known to be clear,
 ; and so could be removed from the AND mask.
 define i64 @f15(i64 %a) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: sllg %r2, %r2, 1
 ; CHECK-NEXT: iihf %r2, 1
 ; CHECK: br %r14
@@ -177,7 +177,7 @@ define i64 @f15(i64 %a) {
 
 ; Repeat f15 with the zero bits explicitly removed from the mask.
 define i64 @f16(i64 %a) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: sllg %r2, %r2, 1
 ; CHECK-NEXT: iihf %r2, 1
 ; CHECK: br %r14
@@ -189,7 +189,7 @@ define i64 @f16(i64 %a) {
 
 ; Check concatenation of two i32s.
 define i64 @f17(i32 %a) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: msr %r2, %r2
 ; CHECK-NEXT: iihf %r2, 1
 ; CHECK: br %r14
@@ -201,7 +201,7 @@ define i64 @f17(i32 %a) {
 
 ; Repeat f17 with the operands reversed.
 define i64 @f18(i32 %a) {
-; CHECK: f18:
+; CHECK-LABEL: f18:
 ; CHECK: msr %r2, %r2
 ; CHECK-NEXT: iihf %r2, 1
 ; CHECK: br %r14
@@ -213,7 +213,7 @@ define i64 @f18(i32 %a) {
 
 ; The truncation here isn't free; we need an explicit zero extension.
 define i64 @f19(i32 %a) {
-; CHECK: f19:
+; CHECK-LABEL: f19:
 ; CHECK: llgcr %r2, %r2
 ; CHECK: oihl %r2, 1
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/insert-06.ll b/test/CodeGen/SystemZ/insert-06.ll
index 4a13ef47c888..edcd0c5dccd2 100644
--- a/test/CodeGen/SystemZ/insert-06.ll
+++ b/test/CodeGen/SystemZ/insert-06.ll
@@ -4,7 +4,7 @@
 
 ; Insertion of an i32 can be done using LR.
 define i64 @f1(i64 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
@@ -16,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) {
 
 ; ... and again with the operands reversed.
 define i64 @f2(i64 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
@@ -28,7 +28,7 @@ define i64 @f2(i64 %a, i32 %b) {
 
 ; Like f1, but with "in register" zero extension.
 define i64 @f3(i64 %a, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
@@ -40,7 +40,7 @@ define i64 @f3(i64 %a, i64 %b) {
 
 ; ... and again with the operands reversed.
 define i64 @f4(i64 %a, i64 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
@@ -52,7 +52,7 @@ define i64 @f4(i64 %a, i64 %b) {
 
 ; Unary operations can be done directly into the low half.
 define i64 @f5(i64 %a, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lcr %r2, %r3
 ; CHECK: br %r14
@@ -65,7 +65,7 @@ define i64 @f5(i64 %a, i32 %b) {
 
 ; ...likewise three-operand binary operations like RLL.
 define i64 @f6(i64 %a, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: rll %r2, %r3, 1
 ; CHECK: br %r14
@@ -81,7 +81,7 @@ define i64 @f6(i64 %a, i32 %b) {
 ; Loads can be done directly into the low half.  The range of L is checked
 ; in the move tests.
 define i64 @f7(i64 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: br %r14
@@ -94,7 +94,7 @@ define i64 @f7(i64 %a, i32 *%src) {
 
 ; ...likewise extending loads.
 define i64 @f8(i64 %a, i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lb %r2, 0(%r3)
 ; CHECK: br %r14
@@ -110,7 +110,7 @@ define i64 @f8(i64 %a, i8 *%src) {
 ; that the upper half of one OR operand and the lower half of the other are
 ; both clear.
 define i64 @f9(i64 %a, i32 %b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: sllg %r2, %r2, 32
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
@@ -122,7 +122,7 @@ define i64 @f9(i64 %a, i32 %b) {
 
 ; ...and again with the operands reversed.
 define i64 @f10(i64 %a, i32 %b) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: sllg %r2, %r2, 32
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
@@ -134,7 +134,7 @@ define i64 @f10(i64 %a, i32 %b) {
 
 ; Like f9, but with "in register" zero extension.
 define i64 @f11(i64 %a, i64 %b) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
   %shift = shl i64 %a, 32
@@ -145,7 +145,7 @@ define i64 @f11(i64 %a, i64 %b) {
 
 ; ...and again with the operands reversed.
 define i64 @f12(i64 %a, i64 %b) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
   %shift = shl i64 %a, 32
@@ -156,7 +156,7 @@ define i64 @f12(i64 %a, i64 %b) {
 
 ; Like f9, but for larger shifts than 32.
 define i64 @f13(i64 %a, i32 %b) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: sllg %r2, %r2, 60
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
@@ -165,3 +165,16 @@ define i64 @f13(i64 %a, i32 %b) {
   %or = or i64 %shift, %low
   ret i64 %or
 }
+
+; We previously wrongly removed the upper AND as dead.
+define i64 @f14(i64 %a, i64 %b) {
+; CHECK-LABEL: f14:
+; CHECK: risbg {{%r[0-5]}}, %r2, 6, 134, 0
+; CHECK: br %r14
+  %and1 = and i64 %a, 144115188075855872
+  %and2 = and i64 %b, 15
+  %or = or i64 %and1, %and2
+  %res = icmp eq i64 %or, 0
+  %ext = sext i1 %res to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-abs-01.ll b/test/CodeGen/SystemZ/int-abs-01.ll
new file mode 100644
index 000000000000..40fb61192c6e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-abs-01.ll
@@ -0,0 +1,83 @@
+; Test integer absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i32->i32 absolute using slt.
+define i32 @f1(i32 %val) {
+; CHECK-LABEL: f1:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %neg, i32 %val
+  ret i32 %res
+}
+
+; Test i32->i32 absolute using sle.
+define i32 @f2(i32 %val) {
+; CHECK-LABEL: f2:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sle i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %neg, i32 %val
+  ret i32 %res
+}
+
+; Test i32->i32 absolute using sgt.
+define i32 @f3(i32 %val) {
+; CHECK-LABEL: f3:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sgt i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %val, i32 %neg
+  ret i32 %res
+}
+
+; Test i32->i32 absolute using sge.
+define i32 @f4(i32 %val) {
+; CHECK-LABEL: f4:
+; CHECK: lpr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sge i32 %val, 0
+  %neg = sub i32 0, %val
+  %res = select i1 %cmp, i32 %val, i32 %neg
+  ret i32 %res
+}
+
+; Test i32->i64 absolute.
+define i64 @f5(i32 %val) {
+; CHECK-LABEL: f5:
+; CHECK: lpgfr %r2, %r2
+; CHECK: br %r14
+  %ext = sext i32 %val to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %res = select i1 %cmp, i64 %neg, i64 %ext
+  ret i64 %res
+}
+
+; Test i32->i64 absolute that uses an "in-register" form of sign extension.
+define i64 @f6(i64 %val) {
+; CHECK-LABEL: f6:
+; CHECK: lpgfr %r2, %r2
+; CHECK: br %r14
+  %trunc = trunc i64 %val to i32
+  %ext = sext i32 %trunc to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %res = select i1 %cmp, i64 %neg, i64 %ext
+  ret i64 %res
+}
+
+; Test i64 absolute.
+define i64 @f7(i64 %val) {
+; CHECK-LABEL: f7:
+; CHECK: lpgr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i64 %val, 0
+  %neg = sub i64 0, %val
+  %res = select i1 %cmp, i64 %neg, i64 %val
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-add-01.ll b/test/CodeGen/SystemZ/int-add-01.ll
index d12ac229774e..4114686e41e8 100644
--- a/test/CodeGen/SystemZ/int-add-01.ll
+++ b/test/CodeGen/SystemZ/int-add-01.ll
@@ -5,7 +5,7 @@
 
 ; Check the low end of the AH range.
 define i32 @f1(i32 %lhs, i16 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ah %r2, 0(%r3)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -16,7 +16,7 @@ define i32 @f1(i32 %lhs, i16 *%src) {
 
 ; Check the high end of the aligned AH range.
 define i32 @f2(i32 %lhs, i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ah %r2, 4094(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2047
@@ -28,7 +28,7 @@ define i32 @f2(i32 %lhs, i16 *%src) {
 
 ; Check the next halfword up, which should use AHY instead of AH.
 define i32 @f3(i32 %lhs, i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ahy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2048
@@ -40,7 +40,7 @@ define i32 @f3(i32 %lhs, i16 *%src) {
 
 ; Check the high end of the aligned AHY range.
 define i32 @f4(i32 %lhs, i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ahy %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -53,7 +53,7 @@ define i32 @f4(i32 %lhs, i16 *%src) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f5(i32 %lhs, i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r3, 524288
 ; CHECK: ah %r2, 0(%r3)
 ; CHECK: br %r14
@@ -66,7 +66,7 @@ define i32 @f5(i32 %lhs, i16 *%src) {
 
 ; Check the high end of the negative aligned AHY range.
 define i32 @f6(i32 %lhs, i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ahy %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -78,7 +78,7 @@ define i32 @f6(i32 %lhs, i16 *%src) {
 
 ; Check the low end of the AHY range.
 define i32 @f7(i32 %lhs, i16 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ahy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -91,7 +91,7 @@ define i32 @f7(i32 %lhs, i16 *%src) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f8(i32 %lhs, i16 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r3, -524290
 ; CHECK: ah %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +104,7 @@ define i32 @f8(i32 %lhs, i16 *%src) {
 
 ; Check that AH allows an index.
 define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ah %r2, 4094({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -118,7 +118,7 @@ define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
 
 ; Check that AHY allows an index.
 define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ahy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/int-add-02.ll b/test/CodeGen/SystemZ/int-add-02.ll
index 568ad1c4471d..4386b5a4d496 100644
--- a/test/CodeGen/SystemZ/int-add-02.ll
+++ b/test/CodeGen/SystemZ/int-add-02.ll
@@ -1,10 +1,13 @@
 ; Test 32-bit addition in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @foo()
 
 ; Check AR.
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ar %r2, %r3
 ; CHECK: br %r14
   %add = add i32 %a, %b
@@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) {
 
 ; Check the low end of the A range.
 define i32 @f2(i32 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: a %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned A range.
 define i32 @f3(i32 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: a %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) {
 
 ; Check the next word up, which should use AY instead of A.
 define i32 @f4(i32 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ay %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned AY range.
 define i32 @f5(i32 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ay %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: a %r2, 0(%r3)
 ; CHECK: br %r14
@@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned AY range.
 define i32 @f7(i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ay %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) {
 
 ; Check the low end of the AY range.
 define i32 @f8(i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ay %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: a %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) {
 
 ; Check that A allows an index.
 define i32 @f10(i32 %a, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: a %r2, 4092({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
 
 ; Check that AY allows an index.
 define i32 @f11(i32 %a, i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: ay %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
   %add = add i32 %a, %b
   ret i32 %add
 }
+
+; Check that additions of spilled values can use A rather than AR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: a %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %add0 = add i32 %ret, %val0
+  %add1 = add i32 %add0, %val1
+  %add2 = add i32 %add1, %val2
+  %add3 = add i32 %add2, %val3
+  %add4 = add i32 %add3, %val4
+  %add5 = add i32 %add4, %val5
+  %add6 = add i32 %add5, %val6
+  %add7 = add i32 %add6, %val7
+  %add8 = add i32 %add7, %val8
+  %add9 = add i32 %add8, %val9
+
+  ret i32 %add9
+}
diff --git a/test/CodeGen/SystemZ/int-add-03.ll b/test/CodeGen/SystemZ/int-add-03.ll
index 46103575b7b2..56000a80cd9b 100644
--- a/test/CodeGen/SystemZ/int-add-03.ll
+++ b/test/CodeGen/SystemZ/int-add-03.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check AGFR.
 define i64 @f1(i64 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: agfr %r2, %r3
 ; CHECK: br %r14
   %bext = sext i32 %b to i64
@@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) {
 
 ; Check AGF with no displacement.
 define i64 @f2(i64 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: agf %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) {
 
 ; Check the high end of the aligned AGF range.
 define i64 @f3(i64 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: agf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned AGF range.
 define i64 @f5(i64 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) {
 
 ; Check the low end of the AGF range.
 define i64 @f6(i64 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524292
 ; CHECK: agf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) {
 
 ; Check that AGF allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agf %r2, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %add = add i64 %a, %bext
   ret i64 %add
 }
+
+; Check that additions of spilled values can use AGF rather than AGFR.
+define i64 @f9(i32 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: agf %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %frob0 = add i32 %val0, 100
+  %frob1 = add i32 %val1, 100
+  %frob2 = add i32 %val2, 100
+  %frob3 = add i32 %val3, 100
+  %frob4 = add i32 %val4, 100
+  %frob5 = add i32 %val5, 100
+  %frob6 = add i32 %val6, 100
+  %frob7 = add i32 %val7, 100
+  %frob8 = add i32 %val8, 100
+  %frob9 = add i32 %val9, 100
+
+  store i32 %frob0, i32 *%ptr0
+  store i32 %frob1, i32 *%ptr1
+  store i32 %frob2, i32 *%ptr2
+  store i32 %frob3, i32 *%ptr3
+  store i32 %frob4, i32 *%ptr4
+  store i32 %frob5, i32 *%ptr5
+  store i32 %frob6, i32 *%ptr6
+  store i32 %frob7, i32 *%ptr7
+  store i32 %frob8, i32 *%ptr8
+  store i32 %frob9, i32 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %ext0 = sext i32 %frob0 to i64
+  %ext1 = sext i32 %frob1 to i64
+  %ext2 = sext i32 %frob2 to i64
+  %ext3 = sext i32 %frob3 to i64
+  %ext4 = sext i32 %frob4 to i64
+  %ext5 = sext i32 %frob5 to i64
+  %ext6 = sext i32 %frob6 to i64
+  %ext7 = sext i32 %frob7 to i64
+  %ext8 = sext i32 %frob8 to i64
+  %ext9 = sext i32 %frob9 to i64
+
+  %add0 = add i64 %ret, %ext0
+  %add1 = add i64 %add0, %ext1
+  %add2 = add i64 %add1, %ext2
+  %add3 = add i64 %add2, %ext3
+  %add4 = add i64 %add3, %ext4
+  %add5 = add i64 %add4, %ext5
+  %add6 = add i64 %add5, %ext6
+  %add7 = add i64 %add6, %ext7
+  %add8 = add i64 %add7, %ext8
+  %add9 = add i64 %add8, %ext9
+
+  ret i64 %add9
+}
diff --git a/test/CodeGen/SystemZ/int-add-04.ll b/test/CodeGen/SystemZ/int-add-04.ll
index 1c2dc76781ce..675e36babfa7 100644
--- a/test/CodeGen/SystemZ/int-add-04.ll
+++ b/test/CodeGen/SystemZ/int-add-04.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check ALGFR.
 define i64 @f1(i64 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: algfr %r2, %r3
 ; CHECK: br %r14
   %bext = zext i32 %b to i64
@@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) {
 
 ; Check ALGF with no displacement.
 define i64 @f2(i64 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: algf %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) {
 
 ; Check the high end of the aligned ALGF range.
 define i64 @f3(i64 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: algf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: algf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned ALGF range.
 define i64 @f5(i64 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: algf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) {
 
 ; Check the low end of the ALGF range.
 define i64 @f6(i64 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: algf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524292
 ; CHECK: algf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) {
 
 ; Check that ALGF allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: algf %r2, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %add = add i64 %a, %bext
   ret i64 %add
 }
+
+; Check that additions of spilled values can use ALGF rather than ALGFR.
+define i64 @f9(i32 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: algf %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %frob0 = add i32 %val0, 100
+  %frob1 = add i32 %val1, 100
+  %frob2 = add i32 %val2, 100
+  %frob3 = add i32 %val3, 100
+  %frob4 = add i32 %val4, 100
+  %frob5 = add i32 %val5, 100
+  %frob6 = add i32 %val6, 100
+  %frob7 = add i32 %val7, 100
+  %frob8 = add i32 %val8, 100
+  %frob9 = add i32 %val9, 100
+
+  store i32 %frob0, i32 *%ptr0
+  store i32 %frob1, i32 *%ptr1
+  store i32 %frob2, i32 *%ptr2
+  store i32 %frob3, i32 *%ptr3
+  store i32 %frob4, i32 *%ptr4
+  store i32 %frob5, i32 *%ptr5
+  store i32 %frob6, i32 *%ptr6
+  store i32 %frob7, i32 *%ptr7
+  store i32 %frob8, i32 *%ptr8
+  store i32 %frob9, i32 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %ext0 = zext i32 %frob0 to i64
+  %ext1 = zext i32 %frob1 to i64
+  %ext2 = zext i32 %frob2 to i64
+  %ext3 = zext i32 %frob3 to i64
+  %ext4 = zext i32 %frob4 to i64
+  %ext5 = zext i32 %frob5 to i64
+  %ext6 = zext i32 %frob6 to i64
+  %ext7 = zext i32 %frob7 to i64
+  %ext8 = zext i32 %frob8 to i64
+  %ext9 = zext i32 %frob9 to i64
+
+  %add0 = add i64 %ret, %ext0
+  %add1 = add i64 %add0, %ext1
+  %add2 = add i64 %add1, %ext2
+  %add3 = add i64 %add2, %ext3
+  %add4 = add i64 %add3, %ext4
+  %add5 = add i64 %add4, %ext5
+  %add6 = add i64 %add5, %ext6
+  %add7 = add i64 %add6, %ext7
+  %add8 = add i64 %add7, %ext8
+  %add9 = add i64 %add8, %ext9
+
+  ret i64 %add9
+}
diff --git a/test/CodeGen/SystemZ/int-add-05.ll b/test/CodeGen/SystemZ/int-add-05.ll
index ae32cc4ad01a..a05fdd9059c1 100644
--- a/test/CodeGen/SystemZ/int-add-05.ll
+++ b/test/CodeGen/SystemZ/int-add-05.ll
@@ -1,10 +1,13 @@
 ; Test 64-bit addition in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i64 @foo()
 
 ; Check AGR.
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: agr %r2, %r3
 ; CHECK: br %r14
   %add = add i64 %a, %b
@@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) {
 
 ; Check AG with no displacement.
 define i64 @f2(i64 %a, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ag %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i64 *%src
@@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) {
 
 ; Check the high end of the aligned AG range.
 define i64 @f3(i64 %a, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ag %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: ag %r2, 0(%r3)
 ; CHECK: br %r14
@@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned AG range.
 define i64 @f5(i64 %a, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ag %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) {
 
 ; Check the low end of the AG range.
 define i64 @f6(i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ag %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: ag %r2, 0(%r3)
 ; CHECK: br %r14
@@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) {
 
 ; Check that AG allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: ag %r2, 524280({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %add = add i64 %a, %b
   ret i64 %add
 }
+
+; Check that additions of spilled values can use AG rather than AGR.
+define i64 @f9(i64 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: ag %r2, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %add0 = add i64 %ret, %val0
+  %add1 = add i64 %add0, %val1
+  %add2 = add i64 %add1, %val2
+  %add3 = add i64 %add2, %val3
+  %add4 = add i64 %add3, %val4
+  %add5 = add i64 %add4, %val5
+  %add6 = add i64 %add5, %val6
+  %add7 = add i64 %add6, %val7
+  %add8 = add i64 %add7, %val8
+  %add9 = add i64 %add8, %val9
+
+  ret i64 %add9
+}
diff --git a/test/CodeGen/SystemZ/int-add-06.ll b/test/CodeGen/SystemZ/int-add-06.ll
index 3a9c698dd241..142c7559802e 100644
--- a/test/CodeGen/SystemZ/int-add-06.ll
+++ b/test/CodeGen/SystemZ/int-add-06.ll
@@ -4,7 +4,7 @@
 
 ; Check additions of 1.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ahi %r2, 1
 ; CHECK: br %r14
   %add = add i32 %a, 1
@@ -13,7 +13,7 @@ define i32 @f1(i32 %a) {
 
 ; Check the high end of the AHI range.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ahi %r2, 32767
 ; CHECK: br %r14
   %add = add i32 %a, 32767
@@ -22,7 +22,7 @@ define i32 @f2(i32 %a) {
 
 ; Check the next value up, which must use AFI instead.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: afi %r2, 32768
 ; CHECK: br %r14
   %add = add i32 %a, 32768
@@ -31,7 +31,7 @@ define i32 @f3(i32 %a) {
 
 ; Check the high end of the signed 32-bit range.
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: afi %r2, 2147483647
 ; CHECK: br %r14
   %add = add i32 %a, 2147483647
@@ -40,7 +40,7 @@ define i32 @f4(i32 %a) {
 
 ; Check the next value up, which is treated as a negative value.
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: afi %r2, -2147483648
 ; CHECK: br %r14
   %add = add i32 %a, 2147483648
@@ -49,7 +49,7 @@ define i32 @f5(i32 %a) {
 
 ; Check the high end of the negative AHI range.
 define i32 @f6(i32 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ahi %r2, -1
 ; CHECK: br %r14
   %add = add i32 %a, -1
@@ -58,7 +58,7 @@ define i32 @f6(i32 %a) {
 
 ; Check the low end of the AHI range.
 define i32 @f7(i32 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ahi %r2, -32768
 ; CHECK: br %r14
   %add = add i32 %a, -32768
@@ -67,7 +67,7 @@ define i32 @f7(i32 %a) {
 
 ; Check the next value down, which must use AFI instead.
 define i32 @f8(i32 %a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: afi %r2, -32769
 ; CHECK: br %r14
   %add = add i32 %a, -32769
@@ -76,7 +76,7 @@ define i32 @f8(i32 %a) {
 
 ; Check the low end of the signed 32-bit range.
 define i32 @f9(i32 %a) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: afi %r2, -2147483648
 ; CHECK: br %r14
   %add = add i32 %a, -2147483648
@@ -85,7 +85,7 @@ define i32 @f9(i32 %a) {
 
 ; Check the next value down, which is treated as a positive value.
 define i32 @f10(i32 %a) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: afi %r2, 2147483647
 ; CHECK: br %r14
   %add = add i32 %a, -2147483649
diff --git a/test/CodeGen/SystemZ/int-add-07.ll b/test/CodeGen/SystemZ/int-add-07.ll
index a065bb2ee137..e9e0212e4df8 100644
--- a/test/CodeGen/SystemZ/int-add-07.ll
+++ b/test/CodeGen/SystemZ/int-add-07.ll
@@ -4,7 +4,7 @@
 
 ; Check additions of 1.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: {{aghi %r2, 1|la %r[0-5], 1\(%r2\)}}
 ; CHECK: br %r14
   %add = add i64 %a, 1
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the AGHI range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: aghi %r2, 32767
 ; CHECK: br %r14
   %add = add i64 %a, 32767
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
 
 ; Check the next value up, which must use AGFI instead.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: {{agfi %r2, 32768|lay %r[0-5], 32768\(%r2\)}}
 ; CHECK: br %r14
   %add = add i64 %a, 32768
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
 
 ; Check the high end of the AGFI range.
 define i64 @f4(i64 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r2, 2147483647
 ; CHECK: br %r14
   %add = add i64 %a, 2147483647
@@ -40,7 +40,7 @@ define i64 @f4(i64 %a) {
 
 ; Check the next value up, which must use ALGFI instead.
 define i64 @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: algfi %r2, 2147483648
 ; CHECK: br %r14
   %add = add i64 %a, 2147483648
@@ -49,7 +49,7 @@ define i64 @f5(i64 %a) {
 
 ; Check the high end of the ALGFI range.
 define i64 @f6(i64 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: algfi %r2, 4294967295
 ; CHECK: br %r14
   %add = add i64 %a, 4294967295
@@ -58,7 +58,7 @@ define i64 @f6(i64 %a) {
 
 ; Check the next value up, which must be loaded into a register first.
 define i64 @f7(i64 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llihl %r0, 1
 ; CHECK: agr
 ; CHECK: br %r14
@@ -68,7 +68,7 @@ define i64 @f7(i64 %a) {
 
 ; Check the high end of the negative AGHI range.
 define i64 @f8(i64 %a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: aghi %r2, -1
 ; CHECK: br %r14
   %add = add i64 %a, -1
@@ -77,7 +77,7 @@ define i64 @f8(i64 %a) {
 
 ; Check the low end of the AGHI range.
 define i64 @f9(i64 %a) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: aghi %r2, -32768
 ; CHECK: br %r14
   %add = add i64 %a, -32768
@@ -86,7 +86,7 @@ define i64 @f9(i64 %a) {
 
 ; Check the next value down, which must use AGFI instead.
 define i64 @f10(i64 %a) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: {{agfi %r2, -32769|lay %r[0-5]+, -32769\(%r2\)}}
 ; CHECK: br %r14
   %add = add i64 %a, -32769
@@ -95,7 +95,7 @@ define i64 @f10(i64 %a) {
 
 ; Check the low end of the AGFI range.
 define i64 @f11(i64 %a) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r2, -2147483648
 ; CHECK: br %r14
   %add = add i64 %a, -2147483648
@@ -104,7 +104,7 @@ define i64 @f11(i64 %a) {
 
 ; Check the next value down, which must use SLGFI instead.
 define i64 @f12(i64 %a) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: slgfi %r2, 2147483649
 ; CHECK: br %r14
   %add = add i64 %a, -2147483649
@@ -113,7 +113,7 @@ define i64 @f12(i64 %a) {
 
 ; Check the low end of the SLGFI range.
 define i64 @f13(i64 %a) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: slgfi %r2, 4294967295
 ; CHECK: br %r14
   %add = add i64 %a, -4294967295
@@ -122,7 +122,7 @@ define i64 @f13(i64 %a) {
 
 ; Check the next value down, which must use register addition instead.
 define i64 @f14(i64 %a) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: llihf %r0, 4294967295
 ; CHECK: agr
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/int-add-08.ll b/test/CodeGen/SystemZ/int-add-08.ll
index b1f820fe3d84..bcef914ed872 100644
--- a/test/CodeGen/SystemZ/int-add-08.ll
+++ b/test/CodeGen/SystemZ/int-add-08.ll
@@ -1,10 +1,13 @@
 ; Test 128-bit addition in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i128 *@foo()
 
 ; Test register addition.
 define void @f1(i128 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: algr
 ; CHECK: alcgr
 ; CHECK: br %r14
@@ -17,7 +20,7 @@ define void @f1(i128 *%ptr) {
 ; Test memory addition with no offset.  Making the load of %a volatile
 ; should force the memory operand to be %b.
 define void @f2(i128 *%aptr, i64 %addr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: alg {{%r[0-5]}}, 8(%r3)
 ; CHECK: alcg {{%r[0-5]}}, 0(%r3)
 ; CHECK: br %r14
@@ -31,7 +34,7 @@ define void @f2(i128 *%aptr, i64 %addr) {
 
 ; Test the highest aligned offset that is in range of both ALG and ALCG.
 define void @f3(i128 *%aptr, i64 %base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: alg {{%r[0-5]}}, 524280(%r3)
 ; CHECK: alcg {{%r[0-5]}}, 524272(%r3)
 ; CHECK: br %r14
@@ -46,7 +49,7 @@ define void @f3(i128 *%aptr, i64 %base) {
 
 ; Test the next doubleword up, which requires separate address logic for ALG.
 define void @f4(i128 *%aptr, i64 %base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lgr [[BASE:%r[1-5]]], %r3
 ; CHECK: agfi [[BASE]], 524288
 ; CHECK: alg {{%r[0-5]}}, 0([[BASE]])
@@ -65,7 +68,7 @@ define void @f4(i128 *%aptr, i64 %base) {
 ; both instructions.  It would be better to create an anchor at 524288
 ; that both instructions can use, but that isn't implemented yet.
 define void @f5(i128 *%aptr, i64 %base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: alg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: br %r14
@@ -80,7 +83,7 @@ define void @f5(i128 *%aptr, i64 %base) {
 
 ; Test the lowest displacement that is in range of both ALG and ALCG.
 define void @f6(i128 *%aptr, i64 %base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: alg {{%r[0-5]}}, -524280(%r3)
 ; CHECK: alcg {{%r[0-5]}}, -524288(%r3)
 ; CHECK: br %r14
@@ -95,7 +98,7 @@ define void @f6(i128 *%aptr, i64 %base) {
 
 ; Test the next doubleword down, which is out of range of the ALCG.
 define void @f7(i128 *%aptr, i64 %base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: alg {{%r[0-5]}}, -524288(%r3)
 ; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: br %r14
@@ -108,3 +111,34 @@ define void @f7(i128 *%aptr, i64 %base) {
   ret void
 }
 
+; Check that additions of spilled values can use ALG and ALCG rather than
+; ALGR and ALCGR.
+define void @f8(i128 *%ptr0) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: alg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: alcg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i128 *%ptr0, i128 2
+  %ptr2 = getelementptr i128 *%ptr0, i128 4
+  %ptr3 = getelementptr i128 *%ptr0, i128 6
+  %ptr4 = getelementptr i128 *%ptr0, i128 8
+
+  %val0 = load i128 *%ptr0
+  %val1 = load i128 *%ptr1
+  %val2 = load i128 *%ptr2
+  %val3 = load i128 *%ptr3
+  %val4 = load i128 *%ptr4
+
+  %retptr = call i128 *@foo()
+
+  %ret = load i128 *%retptr
+  %add0 = add i128 %ret, %val0
+  %add1 = add i128 %add0, %val1
+  %add2 = add i128 %add1, %val2
+  %add3 = add i128 %add2, %val3
+  %add4 = add i128 %add3, %val4
+  store i128 %add4, i128 *%retptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-add-09.ll b/test/CodeGen/SystemZ/int-add-09.ll
index bfe63389f189..fd151a7f979a 100644
--- a/test/CodeGen/SystemZ/int-add-09.ll
+++ b/test/CodeGen/SystemZ/int-add-09.ll
@@ -1,13 +1,13 @@
 ; Test 128-bit addition in which the second operand is constant.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check additions of 1.  The XOR ensures that we don't instead load the
 ; constant into a register and use memory addition.
 define void @f1(i128 *%aptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: algfi {{%r[0-5]}}, 1
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
@@ -18,9 +18,9 @@ define void @f1(i128 *%aptr) {
 
 ; Check the high end of the ALGFI range.
 define void @f2(i128 *%aptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: algfi {{%r[0-5]}}, 4294967295
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
@@ -31,9 +31,9 @@ define void @f2(i128 *%aptr) {
 
 ; Check the next value up, which must use register addition.
 define void @f3(i128 *%aptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: algr
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
@@ -44,9 +44,9 @@ define void @f3(i128 *%aptr) {
 
 ; Check addition of -1, which must also use register addition.
 define void @f4(i128 *%aptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: algr
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 128
diff --git a/test/CodeGen/SystemZ/int-add-10.ll b/test/CodeGen/SystemZ/int-add-10.ll
index 17cfdbe33771..01d0a661ed29 100644
--- a/test/CodeGen/SystemZ/int-add-10.ll
+++ b/test/CodeGen/SystemZ/int-add-10.ll
@@ -5,9 +5,9 @@
 ; Check register additions.  The XOR ensures that we don't instead zero-extend
 ; %b into a register and use memory addition.
 define void @f1(i128 *%aptr, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: algfr {{%r[0-5]}}, %r3
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -19,9 +19,9 @@ define void @f1(i128 *%aptr, i32 %b) {
 
 ; Like f1, but using an "in-register" extension.
 define void @f2(i128 *%aptr, i64 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: algfr {{%r[0-5]}}, %r3
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -35,9 +35,9 @@ define void @f2(i128 *%aptr, i64 %b) {
 ; Test register addition in cases where the second operand is zero extended
 ; from i64 rather than i32, but is later masked to i32 range.
 define void @f3(i128 *%aptr, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: algfr {{%r[0-5]}}, %r3
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -50,9 +50,9 @@ define void @f3(i128 *%aptr, i64 %b) {
 
 ; Test ALGF with no offset.
 define void @f4(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -65,9 +65,9 @@ define void @f4(i128 *%aptr, i32 *%bsrc) {
 
 ; Check the high end of the ALGF range.
 define void @f5(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: algf {{%r[0-5]}}, 524284(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -82,10 +82,10 @@ define void @f5(i128 *%aptr, i32 *%bsrc) {
 ; Check the next word up, which must use separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f6(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -99,9 +99,9 @@ define void @f6(i128 *%aptr, i32 *%bsrc) {
 
 ; Check the high end of the negative aligned ALGF range.
 define void @f7(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: algf {{%r[0-5]}}, -4(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -115,9 +115,9 @@ define void @f7(i128 *%aptr, i32 *%bsrc) {
 
 ; Check the low end of the ALGF range.
 define void @f8(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: algf {{%r[0-5]}}, -524288(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -132,10 +132,10 @@ define void @f8(i128 *%aptr, i32 *%bsrc) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f9(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
-; CHECK: alcgr
+; CHECK: alcg
 ; CHECK: br %r14
   %a = load i128 *%aptr
   %xor = xor i128 %a, 127
@@ -149,7 +149,7 @@ define void @f9(i128 *%aptr, i32 *%bsrc) {
 
 ; Check that ALGF allows an index.
 define void @f10(i128 *%aptr, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: algf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %a = load i128 *%aptr
diff --git a/test/CodeGen/SystemZ/int-add-11.ll b/test/CodeGen/SystemZ/int-add-11.ll
index 47a776ecf6ec..679c206094f3 100644
--- a/test/CodeGen/SystemZ/int-add-11.ll
+++ b/test/CodeGen/SystemZ/int-add-11.ll
@@ -1,10 +1,11 @@
-; Test 32-bit additions of constants to memory.
+; Test 32-bit additions of constants to memory.  The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Check additions of 1.
 define void @f1(i32 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: asi 0(%r2), 1
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -15,7 +16,7 @@ define void @f1(i32 *%ptr) {
 
 ; Check the high end of the constant range.
 define void @f2(i32 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: asi 0(%r2), 127
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -27,7 +28,7 @@ define void @f2(i32 *%ptr) {
 ; Check the next constant up, which must use an addition and a store.
 ; Both L/AHI and LHI/A would be OK.
 define void @f3(i32 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: asi
 ; CHECK: st %r0, 0(%r2)
 ; CHECK: br %r14
@@ -39,7 +40,7 @@ define void @f3(i32 *%ptr) {
 
 ; Check the low end of the constant range.
 define void @f4(i32 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: asi 0(%r2), -128
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -50,7 +51,7 @@ define void @f4(i32 *%ptr) {
 
 ; Check the next value down, with the same comment as f3.
 define void @f5(i32 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: asi
 ; CHECK: st %r0, 0(%r2)
 ; CHECK: br %r14
@@ -62,7 +63,7 @@ define void @f5(i32 *%ptr) {
 
 ; Check the high end of the aligned ASI range.
 define void @f6(i32 *%base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: asi 524284(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131071
@@ -75,7 +76,7 @@ define void @f6(i32 *%base) {
 ; Check the next word up, which must use separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f7(i32 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r2, 524288
 ; CHECK: asi 0(%r2), 1
 ; CHECK: br %r14
@@ -88,7 +89,7 @@ define void @f7(i32 *%base) {
 
 ; Check the low end of the ASI range.
 define void @f8(i32 *%base) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: asi -524288(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131072
@@ -101,7 +102,7 @@ define void @f8(i32 *%base) {
 ; Check the next word down, which must use separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f9(i32 *%base) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524292
 ; CHECK: asi 0(%r2), 1
 ; CHECK: br %r14
@@ -114,7 +115,7 @@ define void @f9(i32 *%base) {
 
 ; Check that ASI does not allow indices.
 define void @f10(i64 %base, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agr %r2, %r3
 ; CHECK: asi 4(%r2), 1
 ; CHECK: br %r14
@@ -126,3 +127,169 @@ define void @f10(i64 %base, i64 %index) {
   store i32 %add, i32 *%ptr
   ret void
 }
+
+; Check that adding 127 to a spilled value can use ASI.
+define void @f11(i32 *%ptr, i32 %sel) {
+; CHECK-LABEL: f11:
+; CHECK: asi {{[0-9]+}}(%r15), 127
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i32 %val0, 127
+  %add1 = add i32 %val1, 127
+  %add2 = add i32 %val2, 127
+  %add3 = add i32 %val3, 127
+  %add4 = add i32 %val4, 127
+  %add5 = add i32 %val5, 127
+  %add6 = add i32 %val6, 127
+  %add7 = add i32 %val7, 127
+  %add8 = add i32 %val8, 127
+  %add9 = add i32 %val9, 127
+  %add10 = add i32 %val10, 127
+  %add11 = add i32 %val11, 127
+  %add12 = add i32 %val12, 127
+  %add13 = add i32 %val13, 127
+  %add14 = add i32 %val14, 127
+  %add15 = add i32 %val15, 127
+  br label %store
+
+store:
+  %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i32 %new0, i32 *%ptr
+  store volatile i32 %new1, i32 *%ptr
+  store volatile i32 %new2, i32 *%ptr
+  store volatile i32 %new3, i32 *%ptr
+  store volatile i32 %new4, i32 *%ptr
+  store volatile i32 %new5, i32 *%ptr
+  store volatile i32 %new6, i32 *%ptr
+  store volatile i32 %new7, i32 *%ptr
+  store volatile i32 %new8, i32 *%ptr
+  store volatile i32 %new9, i32 *%ptr
+  store volatile i32 %new10, i32 *%ptr
+  store volatile i32 %new11, i32 *%ptr
+  store volatile i32 %new12, i32 *%ptr
+  store volatile i32 %new13, i32 *%ptr
+  store volatile i32 %new14, i32 *%ptr
+  store volatile i32 %new15, i32 *%ptr
+
+  ret void
+}
+
+; Check that adding -128 to a spilled value can use ASI.
+define void @f12(i32 *%ptr, i32 %sel) {
+; CHECK-LABEL: f12:
+; CHECK: asi {{[0-9]+}}(%r15), -128
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i32 %val0, -128
+  %add1 = add i32 %val1, -128
+  %add2 = add i32 %val2, -128
+  %add3 = add i32 %val3, -128
+  %add4 = add i32 %val4, -128
+  %add5 = add i32 %val5, -128
+  %add6 = add i32 %val6, -128
+  %add7 = add i32 %val7, -128
+  %add8 = add i32 %val8, -128
+  %add9 = add i32 %val9, -128
+  %add10 = add i32 %val10, -128
+  %add11 = add i32 %val11, -128
+  %add12 = add i32 %val12, -128
+  %add13 = add i32 %val13, -128
+  %add14 = add i32 %val14, -128
+  %add15 = add i32 %val15, -128
+  br label %store
+
+store:
+  %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i32 %new0, i32 *%ptr
+  store volatile i32 %new1, i32 *%ptr
+  store volatile i32 %new2, i32 *%ptr
+  store volatile i32 %new3, i32 *%ptr
+  store volatile i32 %new4, i32 *%ptr
+  store volatile i32 %new5, i32 *%ptr
+  store volatile i32 %new6, i32 *%ptr
+  store volatile i32 %new7, i32 *%ptr
+  store volatile i32 %new8, i32 *%ptr
+  store volatile i32 %new9, i32 *%ptr
+  store volatile i32 %new10, i32 *%ptr
+  store volatile i32 %new11, i32 *%ptr
+  store volatile i32 %new12, i32 *%ptr
+  store volatile i32 %new13, i32 *%ptr
+  store volatile i32 %new14, i32 *%ptr
+  store volatile i32 %new15, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-add-12.ll b/test/CodeGen/SystemZ/int-add-12.ll
index ae1c1f735fa7..741cce19d72c 100644
--- a/test/CodeGen/SystemZ/int-add-12.ll
+++ b/test/CodeGen/SystemZ/int-add-12.ll
@@ -4,7 +4,7 @@
 
 ; Check additions of 1.
 define void @f1(i64 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: agsi 0(%r2), 1
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -15,7 +15,7 @@ define void @f1(i64 *%ptr) {
 
 ; Check the high end of the constant range.
 define void @f2(i64 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: agsi 0(%r2), 127
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -27,7 +27,7 @@ define void @f2(i64 *%ptr) {
 ; Check the next constant up, which must use an addition and a store.
 ; Both LG/AGHI and LGHI/AG would be OK.
 define void @f3(i64 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: agsi
 ; CHECK: stg %r0, 0(%r2)
 ; CHECK: br %r14
@@ -39,7 +39,7 @@ define void @f3(i64 *%ptr) {
 
 ; Check the low end of the constant range.
 define void @f4(i64 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agsi 0(%r2), -128
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -50,7 +50,7 @@ define void @f4(i64 *%ptr) {
 
 ; Check the next value down, with the same comment as f3.
 define void @f5(i64 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: agsi
 ; CHECK: stg %r0, 0(%r2)
 ; CHECK: br %r14
@@ -62,7 +62,7 @@ define void @f5(i64 *%ptr) {
 
 ; Check the high end of the aligned AGSI range.
 define void @f6(i64 *%base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agsi 524280(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 65535
@@ -75,7 +75,7 @@ define void @f6(i64 *%base) {
 ; Check the next doubleword up, which must use separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f7(i64 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r2, 524288
 ; CHECK: agsi 0(%r2), 1
 ; CHECK: br %r14
@@ -88,7 +88,7 @@ define void @f7(i64 *%base) {
 
 ; Check the low end of the AGSI range.
 define void @f8(i64 *%base) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agsi -524288(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -65536
@@ -101,7 +101,7 @@ define void @f8(i64 *%base) {
 ; Check the next doubleword down, which must use separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f9(i64 *%base) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524296
 ; CHECK: agsi 0(%r2), 1
 ; CHECK: br %r14
@@ -114,7 +114,7 @@ define void @f9(i64 *%base) {
 
 ; Check that AGSI does not allow indices.
 define void @f10(i64 %base, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agr %r2, %r3
 ; CHECK: agsi 8(%r2), 1
 ; CHECK: br %r14
@@ -126,3 +126,169 @@ define void @f10(i64 %base, i64 %index) {
   store i64 %add, i64 *%ptr
   ret void
 }
+
+; Check that adding 127 to a spilled value can use AGSI.
+define void @f11(i64 *%ptr, i32 %sel) {
+; CHECK-LABEL: f11:
+; CHECK: agsi {{[0-9]+}}(%r15), 127
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i64 %val0, 127
+  %add1 = add i64 %val1, 127
+  %add2 = add i64 %val2, 127
+  %add3 = add i64 %val3, 127
+  %add4 = add i64 %val4, 127
+  %add5 = add i64 %val5, 127
+  %add6 = add i64 %val6, 127
+  %add7 = add i64 %val7, 127
+  %add8 = add i64 %val8, 127
+  %add9 = add i64 %val9, 127
+  %add10 = add i64 %val10, 127
+  %add11 = add i64 %val11, 127
+  %add12 = add i64 %val12, 127
+  %add13 = add i64 %val13, 127
+  %add14 = add i64 %val14, 127
+  %add15 = add i64 %val15, 127
+  br label %store
+
+store:
+  %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i64 %new0, i64 *%ptr
+  store volatile i64 %new1, i64 *%ptr
+  store volatile i64 %new2, i64 *%ptr
+  store volatile i64 %new3, i64 *%ptr
+  store volatile i64 %new4, i64 *%ptr
+  store volatile i64 %new5, i64 *%ptr
+  store volatile i64 %new6, i64 *%ptr
+  store volatile i64 %new7, i64 *%ptr
+  store volatile i64 %new8, i64 *%ptr
+  store volatile i64 %new9, i64 *%ptr
+  store volatile i64 %new10, i64 *%ptr
+  store volatile i64 %new11, i64 *%ptr
+  store volatile i64 %new12, i64 *%ptr
+  store volatile i64 %new13, i64 *%ptr
+  store volatile i64 %new14, i64 *%ptr
+  store volatile i64 %new15, i64 *%ptr
+
+  ret void
+}
+
+; Check that adding -128 to a spilled value can use AGSI.
+define void @f12(i64 *%ptr, i32 %sel) {
+; CHECK-LABEL: f12:
+; CHECK: agsi {{[0-9]+}}(%r15), -128
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %test = icmp ne i32 %sel, 0
+  br i1 %test, label %add, label %store
+
+add:
+  %add0 = add i64 %val0, -128
+  %add1 = add i64 %val1, -128
+  %add2 = add i64 %val2, -128
+  %add3 = add i64 %val3, -128
+  %add4 = add i64 %val4, -128
+  %add5 = add i64 %val5, -128
+  %add6 = add i64 %val6, -128
+  %add7 = add i64 %val7, -128
+  %add8 = add i64 %val8, -128
+  %add9 = add i64 %val9, -128
+  %add10 = add i64 %val10, -128
+  %add11 = add i64 %val11, -128
+  %add12 = add i64 %val12, -128
+  %add13 = add i64 %val13, -128
+  %add14 = add i64 %val14, -128
+  %add15 = add i64 %val15, -128
+  br label %store
+
+store:
+  %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ]
+  %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ]
+  %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ]
+  %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ]
+  %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ]
+  %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ]
+  %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ]
+  %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ]
+  %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ]
+  %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ]
+  %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ]
+  %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ]
+  %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ]
+  %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ]
+  %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ]
+  %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ]
+
+  store volatile i64 %new0, i64 *%ptr
+  store volatile i64 %new1, i64 *%ptr
+  store volatile i64 %new2, i64 *%ptr
+  store volatile i64 %new3, i64 *%ptr
+  store volatile i64 %new4, i64 *%ptr
+  store volatile i64 %new5, i64 *%ptr
+  store volatile i64 %new6, i64 *%ptr
+  store volatile i64 %new7, i64 *%ptr
+  store volatile i64 %new8, i64 *%ptr
+  store volatile i64 %new9, i64 *%ptr
+  store volatile i64 %new10, i64 *%ptr
+  store volatile i64 %new11, i64 *%ptr
+  store volatile i64 %new12, i64 *%ptr
+  store volatile i64 %new13, i64 *%ptr
+  store volatile i64 %new14, i64 *%ptr
+  store volatile i64 %new15, i64 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-add-13.ll b/test/CodeGen/SystemZ/int-add-13.ll
new file mode 100644
index 000000000000..7dfabbcc59e0
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-13.ll
@@ -0,0 +1,39 @@
+; Test the three-operand forms of addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check ARK.
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f1:
+; CHECK: ark %r2, %r3, %r4
+; CHECK: br %r14
+  %add = add i32 %b, %c
+  ret i32 %add
+}
+
+; Check that we can still use AR in obvious cases.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: ar %r2, %r3
+; CHECK: br %r14
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check AGRK.
+define i64 @f3(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: f3:
+; CHECK: agrk %r2, %r3, %r4
+; CHECK: br %r14
+  %add = add i64 %b, %c
+  ret i64 %add
+}
+
+; Check that we can still use AGR in obvious cases.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agr %r2, %r3
+; CHECK: br %r14
+  %add = add i64 %a, %b
+  ret i64 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-14.ll b/test/CodeGen/SystemZ/int-add-14.ll
new file mode 100644
index 000000000000..07323789c3b0
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-14.ll
@@ -0,0 +1,67 @@
+; Test 32-bit addition in which the second operand is constant and in which
+; three-operand forms are available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check additions of 1.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: ahik %r2, %r3, 1
+; CHECK: br %r14
+  %add = add i32 %b, 1
+  ret i32 %add
+}
+
+; Check the high end of the AHIK range.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: ahik %r2, %r3, 32767
+; CHECK: br %r14
+  %add = add i32 %b, 32767
+  ret i32 %add
+}
+
+; Check the next value up, which must use AFI instead.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: afi {{%r[0-5]}}, 32768
+; CHECK: br %r14
+  %add = add i32 %b, 32768
+  ret i32 %add
+}
+
+; Check the high end of the negative AHIK range.
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK: ahik %r2, %r3, -1
+; CHECK: br %r14
+  %add = add i32 %b, -1
+  ret i32 %add
+}
+
+; Check the low end of the AHIK range.
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: ahik %r2, %r3, -32768
+; CHECK: br %r14
+  %add = add i32 %b, -32768
+  ret i32 %add
+}
+
+; Check the next value down, which must use AFI instead.
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: afi {{%r[0-5]}}, -32769
+; CHECK: br %r14
+  %add = add i32 %b, -32769
+  ret i32 %add
+}
+
+; Check that AHI is still used in obvious cases.
+define i32 @f7(i32 %a) {
+; CHECK-LABEL: f7:
+; CHECK: ahi %r2, 1
+; CHECK: br %r14
+  %add = add i32 %a, 1
+  ret i32 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-15.ll b/test/CodeGen/SystemZ/int-add-15.ll
new file mode 100644
index 000000000000..041ec19142dc
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-15.ll
@@ -0,0 +1,67 @@
+; Test 64-bit addition in which the second operand is constant and in which
+; three-operand forms are available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check additions of 1.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: {{aghik %r2, %r3, 1|la %r2, 1\(%r3\)}}
+; CHECK: br %r14
+  %add = add i64 %b, 1
+  ret i64 %add
+}
+
+; Check the high end of the AGHIK range.
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: aghik %r2, %r3, 32767
+; CHECK: br %r14
+  %add = add i64 %b, 32767
+  ret i64 %add
+}
+
+; Check the next value up, which must use AGFI instead.
+define i64 @f3(i64 %a, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: {{agfi %r[0-5], 32768|lay %r2, 32768\(%r3\)}}
+; CHECK: br %r14
+  %add = add i64 %b, 32768
+  ret i64 %add
+}
+
+; Check the high end of the negative AGHIK range.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: aghik %r2, %r3, -1
+; CHECK: br %r14
+  %add = add i64 %b, -1
+  ret i64 %add
+}
+
+; Check the low end of the AGHIK range.
+define i64 @f5(i64 %a, i64 %b) {
+; CHECK-LABEL: f5:
+; CHECK: aghik %r2, %r3, -32768
+; CHECK: br %r14
+  %add = add i64 %b, -32768
+  ret i64 %add
+}
+
+; Check the next value down, which must use AGFI instead.
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: {{agfi %r[0-5], -32769|lay %r2, -32769\(%r3\)}}
+; CHECK: br %r14
+  %add = add i64 %b, -32769
+  ret i64 %add
+}
+
+; Check that AGHI is still used in obvious cases.
+define i64 @f7(i64 %a) {
+; CHECK-LABEL: f7:
+; CHECK: aghi %r2, 32000
+; CHECK: br %r14
+  %add = add i64 %a, 32000
+  ret i64 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-16.ll b/test/CodeGen/SystemZ/int-add-16.ll
new file mode 100644
index 000000000000..36cc13e5fc6e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-16.ll
@@ -0,0 +1,93 @@
+; Test 128-bit addition when the distinct-operands facility is available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test the case where both operands are in registers.
+define i64 @f1(i64 %a, i64 %b, i64 %c, i64 %d, i64 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: algrk %r2, %r4, %r5
+; CHECK: alcgr
+; CHECK: br %r14
+  %x1 = insertelement <2 x i64> undef, i64 %b, i32 0
+  %x2 = insertelement <2 x i64> %x1, i64 %c, i32 1
+  %x = bitcast <2 x i64> %x2 to i128
+  %y2 = insertelement <2 x i64> %x1, i64 %d, i32 1
+  %y = bitcast <2 x i64> %y2 to i128
+  %add = add i128 %x, %y
+  %addv = bitcast i128 %add to <2 x i64>
+  %high = extractelement <2 x i64> %addv, i32 0
+  store i64 %high, i64 *%ptr
+  %low = extractelement <2 x i64> %addv, i32 1
+  ret i64 %low
+}
+
+; Test addition of 1.
+define void @f2(i64 %a, i64 %b, i128 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: alghsik {{%r[0-5]}}, %r3, 1
+; CHECK: alcgr
+; CHECK: br %r14
+  %x1 = insertelement <2 x i64> undef, i64 %a, i32 0
+  %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1
+  %x = bitcast <2 x i64> %x2 to i128
+  %add = add i128 %x, 1
+  store i128 %add, i128 *%ptr
+  ret void
+}
+
+; Test the upper end of the ALGHSIK range.
+define void @f3(i64 %a, i64 %b, i128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: alghsik {{%r[0-5]}}, %r3, 32767
+; CHECK: alcgr
+; CHECK: br %r14
+  %x1 = insertelement <2 x i64> undef, i64 %a, i32 0
+  %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1
+  %x = bitcast <2 x i64> %x2 to i128
+  %add = add i128 %x, 32767
+  store i128 %add, i128 *%ptr
+  ret void
+}
+
+; Test the next value up, which should use ALGFI instead.
+define void @f4(i64 %a, i64 %b, i128 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: algfi %r3, 32768
+; CHECK: alcgr
+; CHECK: br %r14
+  %x1 = insertelement <2 x i64> undef, i64 %a, i32 0
+  %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1
+  %x = bitcast <2 x i64> %x2 to i128
+  %add = add i128 %x, 32768
+  store i128 %add, i128 *%ptr
+  ret void
+}
+
+; Test the lower end of the ALGHSIK range.
+define void @f5(i64 %a, i64 %b, i128 *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: alghsik {{%r[0-5]}}, %r3, -32768
+; CHECK: alcgr
+; CHECK: br %r14
+  %x1 = insertelement <2 x i64> undef, i64 %a, i32 0
+  %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1
+  %x = bitcast <2 x i64> %x2 to i128
+  %add = add i128 %x, -32768
+  store i128 %add, i128 *%ptr
+  ret void
+}
+
+; Test the next value down, which cannot use either ALGHSIK or ALGFI.
+define void @f6(i64 %a, i64 %b, i128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: alghsik
+; CHECK-NOT: algfi
+; CHECK: alcgr
+; CHECK: br %r14
+  %x1 = insertelement <2 x i64> undef, i64 %a, i32 0
+  %x2 = insertelement <2 x i64> %x1, i64 %b, i32 1
+  %x = bitcast <2 x i64> %x2 to i128
+  %add = add i128 %x, -32769
+  store i128 %add, i128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-01.ll b/test/CodeGen/SystemZ/int-cmp-01.ll
index aa432f0b04fb..6653b6f706f7 100644
--- a/test/CodeGen/SystemZ/int-cmp-01.ll
+++ b/test/CodeGen/SystemZ/int-cmp-01.ll
@@ -5,7 +5,7 @@
 
 ; Check the low end of the CH range.
 define void @f1(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ch %r2, 0(%r3)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -18,7 +18,7 @@ define void @f1(i32 %lhs, i16 *%src, i32 *%dst) {
 
 ; Check the high end of the aligned CH range.
 define void @f2(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ch %r2, 4094(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2047
@@ -32,7 +32,7 @@ define void @f2(i32 %lhs, i16 *%src, i32 *%dst) {
 
 ; Check the next halfword up, which should use CHY instead of CH.
 define void @f3(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: chy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2048
@@ -46,7 +46,7 @@ define void @f3(i32 %lhs, i16 *%src, i32 *%dst) {
 
 ; Check the high end of the aligned CHY range.
 define void @f4(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: chy %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -61,7 +61,7 @@ define void @f4(i32 %lhs, i16 *%src, i32 *%dst) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f5(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r3, 524288
 ; CHECK: ch %r2, 0(%r3)
 ; CHECK: br %r14
@@ -76,7 +76,7 @@ define void @f5(i32 %lhs, i16 *%src, i32 *%dst) {
 
 ; Check the high end of the negative aligned CHY range.
 define void @f6(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: chy %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -90,7 +90,7 @@ define void @f6(i32 %lhs, i16 *%src, i32 *%dst) {
 
 ; Check the low end of the CHY range.
 define void @f7(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: chy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -105,7 +105,7 @@ define void @f7(i32 %lhs, i16 *%src, i32 *%dst) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f8(i32 %lhs, i16 *%src, i32 *%dst) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r3, -524290
 ; CHECK: ch %r2, 0(%r3)
 ; CHECK: br %r14
@@ -120,7 +120,7 @@ define void @f8(i32 %lhs, i16 *%src, i32 *%dst) {
 
 ; Check that CH allows an index.
 define void @f9(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ch %r2, 4094({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -136,7 +136,7 @@ define void @f9(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
 
 ; Check that CHY allows an index.
 define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: chy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -149,3 +149,17 @@ define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
   store i32 %res, i32 *%dst
   ret void
 }
+
+; Check the comparison can be reversed if that allows CH to be used.
+define double @f11(double %a, double %b, i32 %rhs, i16 *%src) {
+; CHECK-LABEL: f11:
+; CHECK: ch %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %half = load i16 *%src
+  %lhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-02.ll b/test/CodeGen/SystemZ/int-cmp-02.ll
index c158fb4af77f..4a8a1a9ade33 100644
--- a/test/CodeGen/SystemZ/int-cmp-02.ll
+++ b/test/CodeGen/SystemZ/int-cmp-02.ll
@@ -2,11 +2,12 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i32 @foo()
+
 ; Check register comparison.
 define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
-; CHECK: f1:
-; CHECK: cr %r2, %r3
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f1:
+; CHECK: crjl %r2, %r3
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i32 %i1, %i2
@@ -16,9 +17,9 @@ define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
 
 ; Check the low end of the C range.
 define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: c %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = load i32 *%ptr
@@ -29,9 +30,9 @@ define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
 
 ; Check the high end of the aligned C range.
 define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: c %r2, 4092(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1023
@@ -43,9 +44,9 @@ define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the next word up, which should use CY instead of C.
 define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cy %r2, 4096(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1024
@@ -57,9 +58,9 @@ define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the high end of the aligned CY range.
 define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cy %r2, 524284(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131071
@@ -72,10 +73,10 @@ define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: c %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131072
@@ -87,9 +88,9 @@ define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the high end of the negative aligned CY range.
 define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: cy %r2, -4(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -1
@@ -101,9 +102,9 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the low end of the CY range.
 define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cy %r2, -524288(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131072
@@ -116,10 +117,10 @@ define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: c %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131073
@@ -131,9 +132,9 @@ define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check that C allows an index.
 define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: c %r2, 4092({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -147,9 +148,9 @@ define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
 
 ; Check that CY allows an index.
 define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: cy %r2, 4096({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -160,3 +161,36 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; The first branch here got recreated by InsertBranch while splitting the
+; critical edge %entry->%while.body, which lost the kills information for CC.
+define void @f12(i32 %a, i32 %b) {
+; CHECK-LABEL: f12:
+; CHECK: cije %r2, 0
+; CHECK: crjlh %r2,
+; CHECK: br %r14
+entry:
+  %cmp11 = icmp eq i32 %a, 0
+  br i1 %cmp11, label %while.end, label %while.body
+
+while.body:
+  %c = call i32 @foo()
+  %cmp12 = icmp eq i32 %c, %b
+  br i1 %cmp12, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+; Check the comparison can be reversed if that allows C to be used.
+define double @f13(double %a, double %b, i32 %i2, i32 *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: c %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-03.ll b/test/CodeGen/SystemZ/int-cmp-03.ll
index 4203bee6ac44..aa654e086dc6 100644
--- a/test/CodeGen/SystemZ/int-cmp-03.ll
+++ b/test/CodeGen/SystemZ/int-cmp-03.ll
@@ -4,9 +4,8 @@
 
 ; Check register comparison.
 define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
-; CHECK: f1:
-; CHECK: clr %r2, %r3
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f1:
+; CHECK: clrjl %r2, %r3
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i32 %i1, %i2
@@ -16,9 +15,9 @@ define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
 
 ; Check the low end of the CL range.
 define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cl %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = load i32 *%ptr
@@ -29,9 +28,9 @@ define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
 
 ; Check the high end of the aligned CL range.
 define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cl %r2, 4092(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1023
@@ -43,9 +42,9 @@ define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the next word up, which should use CLY instead of CL.
 define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cly %r2, 4096(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1024
@@ -57,9 +56,9 @@ define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the high end of the aligned CLY range.
 define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cly %r2, 524284(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131071
@@ -72,10 +71,10 @@ define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: cl %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131072
@@ -87,9 +86,9 @@ define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the high end of the negative aligned CLY range.
 define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: cly %r2, -4(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -1
@@ -101,9 +100,9 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the low end of the CLY range.
 define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cly %r2, -524288(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131072
@@ -116,10 +115,10 @@ define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: cl %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131073
@@ -131,9 +130,9 @@ define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check that CL allows an index.
 define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: cl %r2, 4092({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -147,9 +146,9 @@ define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
 
 ; Check that CLY allows an index.
 define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: cly %r2, 4096({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -160,3 +159,16 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check the comparison can be reversed if that allows CL to be used.
+define double @f12(double %a, double %b, i32 %i2, i32 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: cl %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-04.ll b/test/CodeGen/SystemZ/int-cmp-04.ll
index d0625fbddbae..a6606f392923 100644
--- a/test/CodeGen/SystemZ/int-cmp-04.ll
+++ b/test/CodeGen/SystemZ/int-cmp-04.ll
@@ -5,7 +5,7 @@
 
 ; Check CGH with no displacement.
 define void @f1(i64 %lhs, i16 *%src, i64 *%dst) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cgh %r2, 0(%r3)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -18,7 +18,7 @@ define void @f1(i64 %lhs, i16 *%src, i64 *%dst) {
 
 ; Check the high end of the aligned CGH range.
 define void @f2(i64 %lhs, i16 *%src, i64 *%dst) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cgh %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -33,7 +33,7 @@ define void @f2(i64 %lhs, i16 *%src, i64 *%dst) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f3(i64 %lhs, i16 *%src, i64 *%dst) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r3, 524288
 ; CHECK: cgh %r2, 0(%r3)
 ; CHECK: br %r14
@@ -48,7 +48,7 @@ define void @f3(i64 %lhs, i16 *%src, i64 *%dst) {
 
 ; Check the high end of the negative aligned CGH range.
 define void @f4(i64 %lhs, i16 *%src, i64 *%dst) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cgh %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -62,7 +62,7 @@ define void @f4(i64 %lhs, i16 *%src, i64 *%dst) {
 
 ; Check the low end of the CGH range.
 define void @f5(i64 %lhs, i16 *%src, i64 *%dst) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cgh %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -77,7 +77,7 @@ define void @f5(i64 %lhs, i16 *%src, i64 *%dst) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f6(i64 %lhs, i16 *%src, i64 *%dst) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, -524290
 ; CHECK: cgh %r2, 0(%r3)
 ; CHECK: br %r14
@@ -92,7 +92,7 @@ define void @f6(i64 %lhs, i16 *%src, i64 *%dst) {
 
 ; Check that CGH allows an index.
 define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: cgh %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -105,3 +105,17 @@ define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) {
   store i64 %res, i64 *%dst
   ret void
 }
+
+; Check the comparison can be reversed if that allows CGH to be used.
+define double @f8(double %a, double %b, i64 %rhs, i16 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: cgh %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %half = load i16 *%src
+  %lhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-05.ll b/test/CodeGen/SystemZ/int-cmp-05.ll
index 2ab64d5319a8..f15b76bb87fe 100644
--- a/test/CodeGen/SystemZ/int-cmp-05.ll
+++ b/test/CodeGen/SystemZ/int-cmp-05.ll
@@ -2,11 +2,13 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check signed register comparison.
 define double @f1(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = sext i32 %unext to i64
@@ -17,7 +19,7 @@ define double @f1(double %a, double %b, i64 %i1, i32 %unext) {
 
 ; Check unsigned register comparison, which can't use CGFR.
 define double @f2(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: cgfr
 ; CHECK: br %r14
   %i2 = sext i32 %unext to i64
@@ -28,9 +30,9 @@ define double @f2(double %a, double %b, i64 %i1, i32 %unext) {
 
 ; Check register equality.
 define double @f3(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = sext i32 %unext to i64
@@ -41,9 +43,9 @@ define double @f3(double %a, double %b, i64 %i1, i32 %unext) {
 
 ; Check register inequality.
 define double @f4(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = sext i32 %unext to i64
@@ -52,11 +54,11 @@ define double @f4(double %a, double %b, i64 %i1, i32 %unext) {
   ret double %res
 }
 
-; Check signed comparisonn with memory.
+; Check signed comparison with memory.
 define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -68,7 +70,7 @@ define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check unsigned comparison with memory.
 define double @f6(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: cgf
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -80,9 +82,9 @@ define double @f6(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check memory equality.
 define double @f7(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: cgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -94,9 +96,9 @@ define double @f7(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check memory inequality.
 define double @f8(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -108,9 +110,9 @@ define double @f8(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check the high end of the aligned CGF range.
 define double @f9(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cgf %r2, 524284(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131071
@@ -124,10 +126,10 @@ define double @f9(double %a, double %b, i64 %i1, i32 *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f10(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r3, 524288
 ; CHECK: cgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131072
@@ -140,9 +142,9 @@ define double @f10(double %a, double %b, i64 %i1, i32 *%base) {
 
 ; Check the high end of the negative aligned CGF range.
 define double @f11(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: cgf %r2, -4(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -1
@@ -155,9 +157,9 @@ define double @f11(double %a, double %b, i64 %i1, i32 *%base) {
 
 ; Check the low end of the CGF range.
 define double @f12(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: cgf %r2, -524288(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131072
@@ -171,10 +173,10 @@ define double @f12(double %a, double %b, i64 %i1, i32 *%base) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: agfi %r3, -524292
 ; CHECK: cgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131073
@@ -187,9 +189,9 @@ define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
 
 ; Check that CGF allows an index.
 define double @f14(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: cgf %r2, 524284({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -201,3 +203,104 @@ define double @f14(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check that comparisons of spilled values can use CGF rather than CGFR.
+define i64 @f15(i32 *%ptr0) {
+; CHECK-LABEL: f15:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: cgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %frob0 = add i32 %val0, 100
+  %frob1 = add i32 %val1, 100
+  %frob2 = add i32 %val2, 100
+  %frob3 = add i32 %val3, 100
+  %frob4 = add i32 %val4, 100
+  %frob5 = add i32 %val5, 100
+  %frob6 = add i32 %val6, 100
+  %frob7 = add i32 %val7, 100
+  %frob8 = add i32 %val8, 100
+  %frob9 = add i32 %val9, 100
+
+  store i32 %frob0, i32 *%ptr0
+  store i32 %frob1, i32 *%ptr1
+  store i32 %frob2, i32 *%ptr2
+  store i32 %frob3, i32 *%ptr3
+  store i32 %frob4, i32 *%ptr4
+  store i32 %frob5, i32 *%ptr5
+  store i32 %frob6, i32 *%ptr6
+  store i32 %frob7, i32 *%ptr7
+  store i32 %frob8, i32 *%ptr8
+  store i32 %frob9, i32 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %ext0 = sext i32 %frob0 to i64
+  %ext1 = sext i32 %frob1 to i64
+  %ext2 = sext i32 %frob2 to i64
+  %ext3 = sext i32 %frob3 to i64
+  %ext4 = sext i32 %frob4 to i64
+  %ext5 = sext i32 %frob5 to i64
+  %ext6 = sext i32 %frob6 to i64
+  %ext7 = sext i32 %frob7 to i64
+  %ext8 = sext i32 %frob8 to i64
+  %ext9 = sext i32 %frob9 to i64
+
+  %cmp0 = icmp slt i64 %ret, %ext0
+  %cmp1 = icmp slt i64 %ret, %ext1
+  %cmp2 = icmp slt i64 %ret, %ext2
+  %cmp3 = icmp slt i64 %ret, %ext3
+  %cmp4 = icmp slt i64 %ret, %ext4
+  %cmp5 = icmp slt i64 %ret, %ext5
+  %cmp6 = icmp slt i64 %ret, %ext6
+  %cmp7 = icmp slt i64 %ret, %ext7
+  %cmp8 = icmp slt i64 %ret, %ext8
+  %cmp9 = icmp slt i64 %ret, %ext9
+
+  %sel0 = select i1 %cmp0, i64 %ret, i64 0
+  %sel1 = select i1 %cmp1, i64 %sel0, i64 1
+  %sel2 = select i1 %cmp2, i64 %sel1, i64 2
+  %sel3 = select i1 %cmp3, i64 %sel2, i64 3
+  %sel4 = select i1 %cmp4, i64 %sel3, i64 4
+  %sel5 = select i1 %cmp5, i64 %sel4, i64 5
+  %sel6 = select i1 %cmp6, i64 %sel5, i64 6
+  %sel7 = select i1 %cmp7, i64 %sel6, i64 7
+  %sel8 = select i1 %cmp8, i64 %sel7, i64 8
+  %sel9 = select i1 %cmp9, i64 %sel8, i64 9
+
+  ret i64 %sel9
+}
+
+; Check the comparison can be reversed if that allows CGF to be used.
+define double @f16(double %a, double %b, i64 %i2, i32 *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: cgf %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i1 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-06.ll b/test/CodeGen/SystemZ/int-cmp-06.ll
index 26f6dbfe6f22..8ab62e89ec39 100644
--- a/test/CodeGen/SystemZ/int-cmp-06.ll
+++ b/test/CodeGen/SystemZ/int-cmp-06.ll
@@ -2,11 +2,13 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check unsigned register comparison.
 define double @f1(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = zext i32 %unext to i64
@@ -17,9 +19,9 @@ define double @f1(double %a, double %b, i64 %i1, i32 %unext) {
 
 ; ...and again with a different representation.
 define double @f2(double %a, double %b, i64 %i1, i64 %unext) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = and i64 %unext, 4294967295
@@ -30,7 +32,7 @@ define double @f2(double %a, double %b, i64 %i1, i64 %unext) {
 
 ; Check signed register comparison, which can't use CLGFR.
 define double @f3(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: clgfr
 ; CHECK: br %r14
   %i2 = zext i32 %unext to i64
@@ -41,7 +43,7 @@ define double @f3(double %a, double %b, i64 %i1, i32 %unext) {
 
 ; ...and again with a different representation
 define double @f4(double %a, double %b, i64 %i1, i64 %unext) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: clgfr
 ; CHECK: br %r14
   %i2 = and i64 %unext, 4294967295
@@ -52,9 +54,9 @@ define double @f4(double %a, double %b, i64 %i1, i64 %unext) {
 
 ; Check register equality.
 define double @f5(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = zext i32 %unext to i64
@@ -65,9 +67,9 @@ define double @f5(double %a, double %b, i64 %i1, i32 %unext) {
 
 ; ...and again with a different representation
 define double @f6(double %a, double %b, i64 %i1, i64 %unext) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: clgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = and i64 %unext, 4294967295
@@ -78,9 +80,9 @@ define double @f6(double %a, double %b, i64 %i1, i64 %unext) {
 
 ; Check register inequality.
 define double @f7(double %a, double %b, i64 %i1, i32 %unext) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: clgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = zext i32 %unext to i64
@@ -91,9 +93,9 @@ define double @f7(double %a, double %b, i64 %i1, i32 %unext) {
 
 ; ...and again with a different representation
 define double @f8(double %a, double %b, i64 %i1, i64 %unext) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clgfr %r2, %r3
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = and i64 %unext, 4294967295
@@ -102,11 +104,11 @@ define double @f8(double %a, double %b, i64 %i1, i64 %unext) {
   ret double %res
 }
 
-; Check unsigned comparisonn with memory.
+; Check unsigned comparison with memory.
 define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: clgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -118,7 +120,7 @@ define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check signed comparison with memory.
 define double @f10(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: clgf
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -130,9 +132,9 @@ define double @f10(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check memory equality.
 define double @f11(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: clgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -144,9 +146,9 @@ define double @f11(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check memory inequality.
 define double @f12(double %a, double %b, i64 %i1, i32 *%ptr) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: clgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %unext = load i32 *%ptr
@@ -158,9 +160,9 @@ define double @f12(double %a, double %b, i64 %i1, i32 *%ptr) {
 
 ; Check the high end of the aligned CLGF range.
 define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: clgf %r2, 524284(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131071
@@ -174,10 +176,10 @@ define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f14(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: agfi %r3, 524288
 ; CHECK: clgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 131072
@@ -190,9 +192,9 @@ define double @f14(double %a, double %b, i64 %i1, i32 *%base) {
 
 ; Check the high end of the negative aligned CLGF range.
 define double @f15(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: clgf %r2, -4(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -1
@@ -205,9 +207,9 @@ define double @f15(double %a, double %b, i64 %i1, i32 *%base) {
 
 ; Check the low end of the CLGF range.
 define double @f16(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: clgf %r2, -524288(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131072
@@ -221,10 +223,10 @@ define double @f16(double %a, double %b, i64 %i1, i32 *%base) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f17(double %a, double %b, i64 %i1, i32 *%base) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: agfi %r3, -524292
 ; CHECK: clgf %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -131073
@@ -237,9 +239,9 @@ define double @f17(double %a, double %b, i64 %i1, i32 *%base) {
 
 ; Check that CLGF allows an index.
 define double @f18(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
-; CHECK: f18:
+; CHECK-LABEL: f18:
 ; CHECK: clgf %r2, 524284({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -251,3 +253,104 @@ define double @f18(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check that comparisons of spilled values can use CLGF rather than CLGFR.
+define i64 @f19(i32 *%ptr0) {
+; CHECK-LABEL: f19:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: clgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %frob0 = add i32 %val0, 100
+  %frob1 = add i32 %val1, 100
+  %frob2 = add i32 %val2, 100
+  %frob3 = add i32 %val3, 100
+  %frob4 = add i32 %val4, 100
+  %frob5 = add i32 %val5, 100
+  %frob6 = add i32 %val6, 100
+  %frob7 = add i32 %val7, 100
+  %frob8 = add i32 %val8, 100
+  %frob9 = add i32 %val9, 100
+
+  store i32 %frob0, i32 *%ptr0
+  store i32 %frob1, i32 *%ptr1
+  store i32 %frob2, i32 *%ptr2
+  store i32 %frob3, i32 *%ptr3
+  store i32 %frob4, i32 *%ptr4
+  store i32 %frob5, i32 *%ptr5
+  store i32 %frob6, i32 *%ptr6
+  store i32 %frob7, i32 *%ptr7
+  store i32 %frob8, i32 *%ptr8
+  store i32 %frob9, i32 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %ext0 = zext i32 %frob0 to i64
+  %ext1 = zext i32 %frob1 to i64
+  %ext2 = zext i32 %frob2 to i64
+  %ext3 = zext i32 %frob3 to i64
+  %ext4 = zext i32 %frob4 to i64
+  %ext5 = zext i32 %frob5 to i64
+  %ext6 = zext i32 %frob6 to i64
+  %ext7 = zext i32 %frob7 to i64
+  %ext8 = zext i32 %frob8 to i64
+  %ext9 = zext i32 %frob9 to i64
+
+  %cmp0 = icmp ult i64 %ret, %ext0
+  %cmp1 = icmp ult i64 %ret, %ext1
+  %cmp2 = icmp ult i64 %ret, %ext2
+  %cmp3 = icmp ult i64 %ret, %ext3
+  %cmp4 = icmp ult i64 %ret, %ext4
+  %cmp5 = icmp ult i64 %ret, %ext5
+  %cmp6 = icmp ult i64 %ret, %ext6
+  %cmp7 = icmp ult i64 %ret, %ext7
+  %cmp8 = icmp ult i64 %ret, %ext8
+  %cmp9 = icmp ult i64 %ret, %ext9
+
+  %sel0 = select i1 %cmp0, i64 %ret, i64 0
+  %sel1 = select i1 %cmp1, i64 %sel0, i64 1
+  %sel2 = select i1 %cmp2, i64 %sel1, i64 2
+  %sel3 = select i1 %cmp3, i64 %sel2, i64 3
+  %sel4 = select i1 %cmp4, i64 %sel3, i64 4
+  %sel5 = select i1 %cmp5, i64 %sel4, i64 5
+  %sel6 = select i1 %cmp6, i64 %sel5, i64 6
+  %sel7 = select i1 %cmp7, i64 %sel6, i64 7
+  %sel8 = select i1 %cmp8, i64 %sel7, i64 8
+  %sel9 = select i1 %cmp9, i64 %sel8, i64 9
+
+  ret i64 %sel9
+}
+
+; Check the comparison can be reversed if that allows CLGF to be used.
+define double @f20(double %a, double %b, i64 %i2, i32 *%ptr) {
+; CHECK-LABEL: f20:
+; CHECK: clgf %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i1 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-07.ll b/test/CodeGen/SystemZ/int-cmp-07.ll
index 1a6f6226dd9f..530d1787a770 100644
--- a/test/CodeGen/SystemZ/int-cmp-07.ll
+++ b/test/CodeGen/SystemZ/int-cmp-07.ll
@@ -4,9 +4,8 @@
 
 ; Check CGR.
 define double @f1(double %a, double %b, i64 %i1, i64 %i2) {
-; CHECK: f1:
-; CHECK: cgr %r2, %r3
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f1:
+; CHECK: cgrjl %r2, %r3
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, %i2
@@ -16,9 +15,9 @@ define double @f1(double %a, double %b, i64 %i1, i64 %i2) {
 
 ; Check CG with no displacement.
 define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cg %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = load i64 *%ptr
@@ -29,9 +28,9 @@ define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
 
 ; Check the high end of the aligned CG range.
 define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cg %r2, 524280(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 65535
@@ -44,10 +43,10 @@ define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: cg %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 65536
@@ -59,9 +58,9 @@ define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check the high end of the negative aligned CG range.
 define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cg %r2, -8(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -1
@@ -73,9 +72,9 @@ define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check the low end of the CG range.
 define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cg %r2, -524288(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -65536
@@ -88,10 +87,10 @@ define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: cg %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -65537
@@ -103,9 +102,9 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check that CG allows an index.
 define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cg %r2, 524280({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -116,3 +115,16 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check the comparison can be reversed if that allows CG to be used.
+define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: cg %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-08.ll b/test/CodeGen/SystemZ/int-cmp-08.ll
index 6e9a13e9cede..ebf158a1144b 100644
--- a/test/CodeGen/SystemZ/int-cmp-08.ll
+++ b/test/CodeGen/SystemZ/int-cmp-08.ll
@@ -4,9 +4,8 @@
 
 ; Check CLGR.
 define double @f1(double %a, double %b, i64 %i1, i64 %i2) {
-; CHECK: f1:
-; CHECK: clgr %r2, %r3
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f1:
+; CHECK: clgrjl %r2, %r3
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i64 %i1, %i2
@@ -16,9 +15,9 @@ define double @f1(double %a, double %b, i64 %i1, i64 %i2) {
 
 ; Check CLG with no displacement.
 define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clg %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %i2 = load i64 *%ptr
@@ -29,9 +28,9 @@ define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
 
 ; Check the high end of the aligned CLG range.
 define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clg %r2, 524280(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 65535
@@ -44,10 +43,10 @@ define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: clg %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 65536
@@ -59,9 +58,9 @@ define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check the high end of the negative aligned CLG range.
 define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clg %r2, -8(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -1
@@ -73,9 +72,9 @@ define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check the low end of the CLG range.
 define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: clg %r2, -524288(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -65536
@@ -88,10 +87,10 @@ define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: clg %r2, 0(%r3)
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -65537
@@ -103,9 +102,9 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check that CLG allows an index.
 define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clg %r2, 524280({{%r4,%r3|%r3,%r4}})
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add1 = add i64 %base, %index
@@ -116,3 +115,16 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check the comparison can be reversed if that allows CLG to be used.
+define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: clg %r2, 0(%r3)
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i1 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-09.ll b/test/CodeGen/SystemZ/int-cmp-09.ll
index bb7213c6a436..0eb8c6688c0c 100644
--- a/test/CodeGen/SystemZ/int-cmp-09.ll
+++ b/test/CodeGen/SystemZ/int-cmp-09.ll
@@ -4,9 +4,8 @@
 
 ; Check comparisons with 0.
 define double @f1(double %a, double %b, i32 %i1) {
-; CHECK: f1:
-; CHECK: chi %r2, 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f1:
+; CHECK: cijl %r2, 0
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i32 %i1, 0
@@ -14,23 +13,45 @@ define double @f1(double %a, double %b, i32 %i1) {
   ret double %res
 }
 
-; Check comparisons with 1.
+; Check comparisons with 2.
 define double @f2(double %a, double %b, i32 %i1) {
-; CHECK: f2:
-; CHECK: chi %r2, 1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f2:
+; CHECK: cijl %r2, 2
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %cond = icmp slt i32 %i1, 1
+  %cond = icmp slt i32 %i1, 2
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
 
-; Check the high end of the CHI range.
+; Check the high end of the CIJ range.
 define double @f3(double %a, double %b, i32 %i1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
+; CHECK: cijl %r2, 127
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CHI instead.
+define double @f4(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f4:
+; CHECK: chi %r2, 128
+; CHECK-NEXT: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CHI range.
+define double @f5(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f5:
 ; CHECK: chi %r2, 32767
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i32 %i1, 32767
@@ -39,10 +60,10 @@ define double @f3(double %a, double %b, i32 %i1) {
 }
 
 ; Check the next value up, which must use CFI.
-define double @f4(double %a, double %b, i32 %i1) {
-; CHECK: f4:
+define double @f6(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f6:
 ; CHECK: cfi %r2, 32768
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i32 %i1, 32768
@@ -51,10 +72,10 @@ define double @f4(double %a, double %b, i32 %i1) {
 }
 
 ; Check the high end of the signed 32-bit range.
-define double @f5(double %a, double %b, i32 %i1) {
-; CHECK: f5:
+define double @f7(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f7:
 ; CHECK: cfi %r2, 2147483647
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i32 %i1, 2147483647
@@ -63,10 +84,10 @@ define double @f5(double %a, double %b, i32 %i1) {
 }
 
 ; Check the next value up, which should be treated as a negative value.
-define double @f6(double %a, double %b, i32 %i1) {
-; CHECK: f6:
+define double @f8(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f8:
 ; CHECK: cfi %r2, -2147483648
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i32 %i1, 2147483648
@@ -74,11 +95,10 @@ define double @f6(double %a, double %b, i32 %i1) {
   ret double %res
 }
 
-; Check the high end of the negative CHI range.
-define double @f7(double %a, double %b, i32 %i1) {
-; CHECK: f7:
-; CHECK: chi %r2, -1
-; CHECK-NEXT: j{{g?}}l
+; Check the high end of the negative CIJ range.
+define double @f9(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f9:
+; CHECK: cijl %r2, -1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i32 %i1, -1
@@ -86,11 +106,34 @@ define double @f7(double %a, double %b, i32 %i1) {
   ret double %res
 }
 
+; Check the low end of the CIJ range.
+define double @f10(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f10:
+; CHECK: cijl %r2, -128
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CHI instead.
+define double @f11(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f11:
+; CHECK: chi %r2, -129
+; CHECK-NEXT: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
 ; Check the low end of the CHI range.
-define double @f8(double %a, double %b, i32 %i1) {
-; CHECK: f8:
+define double @f12(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f12:
 ; CHECK: chi %r2, -32768
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i32 %i1, -32768
@@ -99,10 +142,10 @@ define double @f8(double %a, double %b, i32 %i1) {
 }
 
 ; Check the next value down, which must use CFI instead.
-define double @f9(double %a, double %b, i32 %i1) {
-; CHECK: f9:
+define double @f13(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f13:
 ; CHECK: cfi %r2, -32769
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i32 %i1, -32769
@@ -111,10 +154,10 @@ define double @f9(double %a, double %b, i32 %i1) {
 }
 
 ; Check the low end of the signed 32-bit range.
-define double @f10(double %a, double %b, i32 %i1) {
-; CHECK: f10:
+define double @f14(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f14:
 ; CHECK: cfi %r2, -2147483648
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i32 %i1, -2147483648
@@ -123,13 +166,57 @@ define double @f10(double %a, double %b, i32 %i1) {
 }
 
 ; Check the next value down, which should be treated as a positive value.
-define double @f11(double %a, double %b, i32 %i1) {
-; CHECK: f11:
+define double @f15(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f15:
 ; CHECK: cfi %r2, 2147483647
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i32 %i1, -2147483649
   %res = select i1 %cond, double %a, double %b
   ret double %res
 }
+
+; Check that < 1 becomes <= 0.
+define double @f16(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f16:
+; CHECK: cijle %r2, 0
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that >= 1 becomes > 0.
+define double @f17(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f17:
+; CHECK: cijh %r2, 0
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp sge i32 %i1, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that > -1 becomes >= 0.
+define double @f18(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f18:
+; CHECK: cijhe %r2, 0
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp sgt i32 %i1, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that <= -1 becomes < 0.
+define double @f19(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f19:
+; CHECK: cijl %r2, 0
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp sle i32 %i1, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-10.ll b/test/CodeGen/SystemZ/int-cmp-10.ll
index f2d3ccd64af6..4d4c4bbd20d1 100644
--- a/test/CodeGen/SystemZ/int-cmp-10.ll
+++ b/test/CodeGen/SystemZ/int-cmp-10.ll
@@ -2,12 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; Check a value near the low end of the range.  We use CFI for comparisons
-; with zero, or things that are equivalent to them.
+; Check a value near the low end of the range.  We use signed forms for
+; comparisons with zero, or things that are equivalent to them.
 define double @f1(double %a, double %b, i32 %i1) {
-; CHECK: f1:
-; CHECK: clfi %r2, 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-LABEL: f1:
+; CHECK: clijh %r2, 1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ugt i32 %i1, 1
@@ -15,11 +14,34 @@ define double @f1(double %a, double %b, i32 %i1) {
   ret double %res
 }
 
-; Check a value near the high end of the range.
+; Check the top of the CLIJ range.
 define double @f2(double %a, double %b, i32 %i1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
+; CHECK: clijl %r2, 255
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i32 %i1, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which needs a separate comparison.
+define double @f3(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f3:
+; CHECK: clfi %r2, 256
+; CHECK: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i32 %i1, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check a value near the high end of the range.
+define double @f4(double %a, double %b, i32 %i1) {
+; CHECK-LABEL: f4:
 ; CHECK: clfi %r2, 4294967280
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i32 %i1, 4294967280
diff --git a/test/CodeGen/SystemZ/int-cmp-11.ll b/test/CodeGen/SystemZ/int-cmp-11.ll
index 1bfb0c61cb90..c74135a5d393 100644
--- a/test/CodeGen/SystemZ/int-cmp-11.ll
+++ b/test/CodeGen/SystemZ/int-cmp-11.ll
@@ -4,9 +4,8 @@
 
 ; Check comparisons with 0.
 define double @f1(double %a, double %b, i64 %i1) {
-; CHECK: f1:
-; CHECK: cghi %r2, 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f1:
+; CHECK: cgijl %r2, 0
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, 0
@@ -16,9 +15,8 @@ define double @f1(double %a, double %b, i64 %i1) {
 
 ; Check comparisons with 1.
 define double @f2(double %a, double %b, i64 %i1) {
-; CHECK: f2:
-; CHECK: cghi %r2, 1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f2:
+; CHECK: cgijle %r2, 0
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, 1
@@ -26,11 +24,34 @@ define double @f2(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the CGHI range.
+; Check the high end of the CGIJ range.
 define double @f3(double %a, double %b, i64 %i1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
+; CHECK: cgijl %r2, 127
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CGHI instead.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f4:
+; CHECK: cghi %r2, 128
+; CHECK-NEXT: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGHI range.
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f5:
 ; CHECK: cghi %r2, 32767
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, 32767
@@ -39,10 +60,10 @@ define double @f3(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use CGFI.
-define double @f4(double %a, double %b, i64 %i1) {
-; CHECK: f4:
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f6:
 ; CHECK: cgfi %r2, 32768
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, 32768
@@ -51,10 +72,10 @@ define double @f4(double %a, double %b, i64 %i1) {
 }
 
 ; Check the high end of the CGFI range.
-define double @f5(double %a, double %b, i64 %i1) {
-; CHECK: f5:
+define double @f7(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f7:
 ; CHECK: cgfi %r2, 2147483647
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, 2147483647
@@ -63,10 +84,9 @@ define double @f5(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use register comparison.
-define double @f6(double %a, double %b, i64 %i1) {
-; CHECK: f6:
-; CHECK: cgr
-; CHECK-NEXT: j{{g?}}l
+define double @f8(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f8:
+; CHECK: cgrjl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, 2147483648
@@ -74,11 +94,10 @@ define double @f6(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the negative CGHI range.
-define double @f7(double %a, double %b, i64 %i1) {
-; CHECK: f7:
-; CHECK: cghi %r2, -1
-; CHECK-NEXT: j{{g?}}l
+; Check the high end of the negative CGIJ range.
+define double @f9(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f9:
+; CHECK: cgijl %r2, -1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, -1
@@ -86,11 +105,34 @@ define double @f7(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
+; Check the low end of the CGIJ range.
+define double @f10(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f10:
+; CHECK: cgijl %r2, -128
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CGHI instead.
+define double @f11(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f11:
+; CHECK: cghi %r2, -129
+; CHECK-NEXT: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
 ; Check the low end of the CGHI range.
-define double @f8(double %a, double %b, i64 %i1) {
-; CHECK: f8:
+define double @f12(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f12:
 ; CHECK: cghi %r2, -32768
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, -32768
@@ -99,10 +141,10 @@ define double @f8(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value down, which must use CGFI instead.
-define double @f9(double %a, double %b, i64 %i1) {
-; CHECK: f9:
+define double @f13(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f13:
 ; CHECK: cgfi %r2, -32769
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, -32769
@@ -111,10 +153,10 @@ define double @f9(double %a, double %b, i64 %i1) {
 }
 
 ; Check the low end of the CGFI range.
-define double @f10(double %a, double %b, i64 %i1) {
-; CHECK: f10:
+define double @f14(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f14:
 ; CHECK: cgfi %r2, -2147483648
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, -2147483648
@@ -123,10 +165,9 @@ define double @f10(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value down, which must use register comparison.
-define double @f11(double %a, double %b, i64 %i1) {
-; CHECK: f11:
-; CHECK: cgr
-; CHECK-NEXT: j{{g?}}l
+define double @f15(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f15:
+; CHECK: cgrjl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp slt i64 %i1, -2147483649
diff --git a/test/CodeGen/SystemZ/int-cmp-12.ll b/test/CodeGen/SystemZ/int-cmp-12.ll
index 0288730c3a80..077b22423e06 100644
--- a/test/CodeGen/SystemZ/int-cmp-12.ll
+++ b/test/CodeGen/SystemZ/int-cmp-12.ll
@@ -2,12 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; Check a value near the low end of the range.  We use CGFI for comparisons
-; with zero, or things that are equivalent to them.
+; Check a value near the low end of the range.  We use signed forms for
+; comparisons with zero, or things that are equivalent to them.
 define double @f1(double %a, double %b, i64 %i1) {
-; CHECK: f1:
-; CHECK: clgfi %r2, 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-LABEL: f1:
+; CHECK: clgijh %r2, 1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ugt i64 %i1, 1
@@ -15,11 +14,34 @@ define double @f1(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the CLGFI range.
+; Check the top of the CLGIJ range.
 define double @f2(double %a, double %b, i64 %i1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
+; CHECK: clgijl %r2, 255
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which needs a separate comparison.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f3:
+; CHECK: clgfi %r2, 256
+; CHECK: jl
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLGFI range.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f4:
 ; CHECK: clgfi %r2, 4294967295
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i64 %i1, 4294967295
@@ -28,10 +50,9 @@ define double @f2(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use a register comparison.
-define double @f3(double %a, double %b, i64 %i1) {
-; CHECK: f3:
-; CHECK: clgr %r2,
-; CHECK-NEXT: j{{g?}}l
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f5:
+; CHECK: clgrjl %r2,
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ult i64 %i1, 4294967296
diff --git a/test/CodeGen/SystemZ/int-cmp-13.ll b/test/CodeGen/SystemZ/int-cmp-13.ll
index c180831debb0..53af0c868a25 100644
--- a/test/CodeGen/SystemZ/int-cmp-13.ll
+++ b/test/CodeGen/SystemZ/int-cmp-13.ll
@@ -4,9 +4,8 @@
 
 ; Check comparisons with 0.
 define double @f1(double %a, double %b, i64 %i1) {
-; CHECK: f1:
-; CHECK: cghi %r2, 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-LABEL: f1:
+; CHECK: cgije %r2, 0
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, 0
@@ -14,11 +13,34 @@ define double @f1(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the CGHI range.
+; Check the high end of the CGIJ range.
 define double @f2(double %a, double %b, i64 %i1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
+; CHECK: cgije %r2, 127
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CGHI instead.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f3:
+; CHECK: cghi %r2, 128
+; CHECK-NEXT: je
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGHI range.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f4:
 ; CHECK: cghi %r2, 32767
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, 32767
@@ -27,10 +49,10 @@ define double @f2(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use CGFI.
-define double @f3(double %a, double %b, i64 %i1) {
-; CHECK: f3:
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f5:
 ; CHECK: cgfi %r2, 32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, 32768
@@ -39,10 +61,10 @@ define double @f3(double %a, double %b, i64 %i1) {
 }
 
 ; Check the high end of the CGFI range.
-define double @f4(double %a, double %b, i64 %i1) {
-; CHECK: f4:
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f6:
 ; CHECK: cgfi %r2, 2147483647
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, 2147483647
@@ -51,10 +73,10 @@ define double @f4(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which should use CLGFI instead.
-define double @f5(double %a, double %b, i64 %i1) {
-; CHECK: f5:
+define double @f7(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f7:
 ; CHECK: clgfi %r2, 2147483648
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, 2147483648
@@ -63,10 +85,10 @@ define double @f5(double %a, double %b, i64 %i1) {
 }
 
 ; Check the high end of the CLGFI range.
-define double @f6(double %a, double %b, i64 %i1) {
-; CHECK: f6:
+define double @f8(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f8:
 ; CHECK: clgfi %r2, 4294967295
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, 4294967295
@@ -75,10 +97,9 @@ define double @f6(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use a register comparison.
-define double @f7(double %a, double %b, i64 %i1) {
-; CHECK: f7:
-; CHECK: cgr %r2,
-; CHECK-NEXT: j{{g?}}e
+define double @f9(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f9:
+; CHECK: cgrje %r2,
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, 4294967296
@@ -86,11 +107,10 @@ define double @f7(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the negative CGHI range.
-define double @f8(double %a, double %b, i64 %i1) {
-; CHECK: f8:
-; CHECK: cghi %r2, -1
-; CHECK-NEXT: j{{g?}}e
+; Check the high end of the negative CGIJ range.
+define double @f10(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f10:
+; CHECK: cgije %r2, -1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, -1
@@ -98,11 +118,34 @@ define double @f8(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
+; Check the low end of the CGIJ range.
+define double @f11(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f11:
+; CHECK: cgije %r2, -128
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CGHI instead.
+define double @f12(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f12:
+; CHECK: cghi %r2, -129
+; CHECK-NEXT: je
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
 ; Check the low end of the CGHI range.
-define double @f9(double %a, double %b, i64 %i1) {
-; CHECK: f9:
+define double @f13(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f13:
 ; CHECK: cghi %r2, -32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, -32768
@@ -111,10 +154,10 @@ define double @f9(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value down, which must use CGFI instead.
-define double @f10(double %a, double %b, i64 %i1) {
-; CHECK: f10:
+define double @f14(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f14:
 ; CHECK: cgfi %r2, -32769
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, -32769
@@ -123,10 +166,10 @@ define double @f10(double %a, double %b, i64 %i1) {
 }
 
 ; Check the low end of the CGFI range.
-define double @f11(double %a, double %b, i64 %i1) {
-; CHECK: f11:
+define double @f15(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f15:
 ; CHECK: cgfi %r2, -2147483648
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, -2147483648
@@ -135,10 +178,9 @@ define double @f11(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value down, which must use register comparison.
-define double @f12(double %a, double %b, i64 %i1) {
-; CHECK: f12:
-; CHECK: cgr
-; CHECK-NEXT: j{{g?}}e
+define double @f16(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f16:
+; CHECK: cgrje
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp eq i64 %i1, -2147483649
diff --git a/test/CodeGen/SystemZ/int-cmp-14.ll b/test/CodeGen/SystemZ/int-cmp-14.ll
index 6a7e0e6d552a..4dbd0ece3af6 100644
--- a/test/CodeGen/SystemZ/int-cmp-14.ll
+++ b/test/CodeGen/SystemZ/int-cmp-14.ll
@@ -4,9 +4,8 @@
 
 ; Check comparisons with 0.
 define double @f1(double %a, double %b, i64 %i1) {
-; CHECK: f1:
-; CHECK: cghi %r2, 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-LABEL: f1:
+; CHECK: cgijlh %r2, 0
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, 0
@@ -14,11 +13,34 @@ define double @f1(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the CGHI range.
+; Check the high end of the CGIJ range.
 define double @f2(double %a, double %b, i64 %i1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
+; CHECK: cgijlh %r2, 127
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CGHI instead.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f3:
+; CHECK: cghi %r2, 128
+; CHECK-NEXT: jlh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGHI range.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f4:
 ; CHECK: cghi %r2, 32767
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, 32767
@@ -27,10 +49,10 @@ define double @f2(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use CGFI.
-define double @f3(double %a, double %b, i64 %i1) {
-; CHECK: f3:
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f5:
 ; CHECK: cgfi %r2, 32768
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, 32768
@@ -39,10 +61,10 @@ define double @f3(double %a, double %b, i64 %i1) {
 }
 
 ; Check the high end of the CGFI range.
-define double @f4(double %a, double %b, i64 %i1) {
-; CHECK: f4:
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f6:
 ; CHECK: cgfi %r2, 2147483647
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, 2147483647
@@ -51,10 +73,10 @@ define double @f4(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which should use CLGFI instead.
-define double @f5(double %a, double %b, i64 %i1) {
-; CHECK: f5:
+define double @f7(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f7:
 ; CHECK: clgfi %r2, 2147483648
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, 2147483648
@@ -63,10 +85,10 @@ define double @f5(double %a, double %b, i64 %i1) {
 }
 
 ; Check the high end of the CLGFI range.
-define double @f6(double %a, double %b, i64 %i1) {
-; CHECK: f6:
+define double @f8(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f8:
 ; CHECK: clgfi %r2, 4294967295
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, 4294967295
@@ -75,10 +97,9 @@ define double @f6(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value up, which must use a register comparison.
-define double @f7(double %a, double %b, i64 %i1) {
-; CHECK: f7:
-; CHECK: cgr %r2,
-; CHECK-NEXT: j{{g?}}lh
+define double @f9(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f9:
+; CHECK: cgrjlh %r2,
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, 4294967296
@@ -86,11 +107,10 @@ define double @f7(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
-; Check the high end of the negative CGHI range.
-define double @f8(double %a, double %b, i64 %i1) {
-; CHECK: f8:
-; CHECK: cghi %r2, -1
-; CHECK-NEXT: j{{g?}}lh
+; Check the high end of the negative CGIJ range.
+define double @f10(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f10:
+; CHECK: cgijlh %r2, -1
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, -1
@@ -98,11 +118,34 @@ define double @f8(double %a, double %b, i64 %i1) {
   ret double %res
 }
 
+; Check the low end of the CGIJ range.
+define double @f11(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f11:
+; CHECK: cgijlh %r2, -128
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CGHI instead.
+define double @f12(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f12:
+; CHECK: cghi %r2, -129
+; CHECK-NEXT: jlh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
 ; Check the low end of the CGHI range.
-define double @f9(double %a, double %b, i64 %i1) {
-; CHECK: f9:
+define double @f13(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f13:
 ; CHECK: cghi %r2, -32768
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, -32768
@@ -111,10 +154,10 @@ define double @f9(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value down, which must use CGFI instead.
-define double @f10(double %a, double %b, i64 %i1) {
-; CHECK: f10:
+define double @f14(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f14:
 ; CHECK: cgfi %r2, -32769
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, -32769
@@ -123,10 +166,10 @@ define double @f10(double %a, double %b, i64 %i1) {
 }
 
 ; Check the low end of the CGFI range.
-define double @f11(double %a, double %b, i64 %i1) {
-; CHECK: f11:
+define double @f15(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f15:
 ; CHECK: cgfi %r2, -2147483648
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, -2147483648
@@ -135,10 +178,9 @@ define double @f11(double %a, double %b, i64 %i1) {
 }
 
 ; Check the next value down, which must use register comparison.
-define double @f12(double %a, double %b, i64 %i1) {
-; CHECK: f12:
-; CHECK: cgr
-; CHECK-NEXT: j{{g?}}lh
+define double @f16(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f16:
+; CHECK: cgrjlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %cond = icmp ne i64 %i1, -2147483649
diff --git a/test/CodeGen/SystemZ/int-cmp-15.ll b/test/CodeGen/SystemZ/int-cmp-15.ll
index 6bb7e2b3ac3d..48a068e49e8f 100644
--- a/test/CodeGen/SystemZ/int-cmp-15.ll
+++ b/test/CodeGen/SystemZ/int-cmp-15.ll
@@ -4,9 +4,9 @@
 
 ; Check ordered comparisons near the low end of the unsigned 8-bit range.
 define double @f1(double %a, double %b, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cli 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp ugt i8 %val, 1
@@ -16,9 +16,9 @@ define double @f1(double %a, double %b, i8 *%ptr) {
 
 ; Check ordered comparisons near the high end of the unsigned 8-bit range.
 define double @f2(double %a, double %b, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cli 0(%r2), 254
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp ult i8 %val, 254
@@ -28,9 +28,9 @@ define double @f2(double %a, double %b, i8 *%ptr) {
 
 ; Check tests for negative bytes.
 define double @f3(double %a, double %b, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp slt i8 %val, 0
@@ -40,9 +40,9 @@ define double @f3(double %a, double %b, i8 *%ptr) {
 
 ; ...and an alternative form.
 define double @f4(double %a, double %b, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp sle i8 %val, -1
@@ -52,9 +52,9 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 
 ; Check tests for non-negative bytes.
 define double @f5(double %a, double %b, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp sge i8 %val, 0
@@ -64,9 +64,9 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 
 ; ...and an alternative form.
 define double @f6(double %a, double %b, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp sgt i8 %val, -1
@@ -76,9 +76,9 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 
 ; Check equality comparisons at the low end of the signed 8-bit range.
 define double @f7(double %a, double %b, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp eq i8 %val, -128
@@ -88,9 +88,9 @@ define double @f7(double %a, double %b, i8 *%ptr) {
 
 ; Check equality comparisons at the low end of the unsigned 8-bit range.
 define double @f8(double %a, double %b, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp eq i8 %val, 0
@@ -100,9 +100,9 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 
 ; Check equality comparisons at the high end of the signed 8-bit range.
 define double @f9(double %a, double %b, i8 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp eq i8 %val, 127
@@ -112,9 +112,9 @@ define double @f9(double %a, double %b, i8 *%ptr) {
 
 ; Check equality comparisons at the high end of the unsigned 8-bit range.
 define double @f10(double %a, double %b, i8 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %cond = icmp eq i8 %val, 255
@@ -124,7 +124,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the CLI range.
 define double @f11(double %a, double %b, i8 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: cli 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4095
@@ -136,7 +136,7 @@ define double @f11(double %a, double %b, i8 *%src) {
 
 ; Check the next byte up, which should use CLIY instead of CLI.
 define double @f12(double %a, double %b, i8 *%src) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: cliy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4096
@@ -148,7 +148,7 @@ define double @f12(double %a, double %b, i8 *%src) {
 
 ; Check the high end of the CLIY range.
 define double @f13(double %a, double %b, i8 *%src) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: cliy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -161,7 +161,7 @@ define double @f13(double %a, double %b, i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f14(double %a, double %b, i8 *%src) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: agfi %r2, 524288
 ; CHECK: cli 0(%r2), 127
 ; CHECK: br %r14
@@ -174,7 +174,7 @@ define double @f14(double %a, double %b, i8 *%src) {
 
 ; Check the high end of the negative CLIY range.
 define double @f15(double %a, double %b, i8 *%src) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: cliy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -186,7 +186,7 @@ define double @f15(double %a, double %b, i8 *%src) {
 
 ; Check the low end of the CLIY range.
 define double @f16(double %a, double %b, i8 *%src) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: cliy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -199,7 +199,7 @@ define double @f16(double %a, double %b, i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define double @f17(double %a, double %b, i8 *%src) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: agfi %r2, -524289
 ; CHECK: cli 0(%r2), 127
 ; CHECK: br %r14
@@ -212,7 +212,7 @@ define double @f17(double %a, double %b, i8 *%src) {
 
 ; Check that CLI does not allow an index
 define double @f18(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f18:
+; CHECK-LABEL: f18:
 ; CHECK: agr %r2, %r3
 ; CHECK: cli 4095(%r2), 127
 ; CHECK: br %r14
@@ -227,7 +227,7 @@ define double @f18(double %a, double %b, i64 %base, i64 %index) {
 
 ; Check that CLIY does not allow an index
 define double @f19(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f19:
+; CHECK-LABEL: f19:
 ; CHECK: agr %r2, %r3
 ; CHECK: cliy 4096(%r2), 127
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/int-cmp-16.ll b/test/CodeGen/SystemZ/int-cmp-16.ll
index 8af854efaabf..be206d9c9470 100644
--- a/test/CodeGen/SystemZ/int-cmp-16.ll
+++ b/test/CodeGen/SystemZ/int-cmp-16.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 8-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the 8-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the signed 8-bit range, using sign extension.
 define double @f6(double %a, double %b, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 
 ; Check the low end of the signed 8-bit range, using sign extension.
 define double @f9(double %a, double %b, i8 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i8 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-17.ll b/test/CodeGen/SystemZ/int-cmp-17.ll
index d4d5e98b8358..3df4ecc66803 100644
--- a/test/CodeGen/SystemZ/int-cmp-17.ll
+++ b/test/CodeGen/SystemZ/int-cmp-17.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 8-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the 8-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the signed 8-bit range, using sign extension.
 define double @f6(double %a, double %b, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 
 ; Check the low end of the signed 8-bit range, using sign extension.
 define double @f9(double %a, double %b, i8 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i8 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-18.ll b/test/CodeGen/SystemZ/int-cmp-18.ll
index 9822dc212248..d03d6ac9a2c7 100644
--- a/test/CodeGen/SystemZ/int-cmp-18.ll
+++ b/test/CodeGen/SystemZ/int-cmp-18.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 8-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the 8-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the signed 8-bit range, using sign extension.
 define double @f6(double %a, double %b, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 
 ; Check the low end of the signed 8-bit range, using sign extension.
 define double @f9(double %a, double %b, i8 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i8 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-19.ll b/test/CodeGen/SystemZ/int-cmp-19.ll
index 7d29dbcedcd6..b5f0856b4002 100644
--- a/test/CodeGen/SystemZ/int-cmp-19.ll
+++ b/test/CodeGen/SystemZ/int-cmp-19.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 8-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the 8-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cli 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 
 ; Check the high end of the signed 8-bit range, using sign extension.
 define double @f6(double %a, double %b, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i8 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cli 0(%r2), 255
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 
 ; Check the low end of the signed 8-bit range, using sign extension.
 define double @f9(double %a, double %b, i8 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i8 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-20.ll b/test/CodeGen/SystemZ/int-cmp-20.ll
index 8fffbc86a737..98c41cd3a240 100644
--- a/test/CodeGen/SystemZ/int-cmp-20.ll
+++ b/test/CodeGen/SystemZ/int-cmp-20.ll
@@ -6,9 +6,9 @@
 ; Check unsigned comparison near the low end of the CLI range, using zero
 ; extension.
 define double @f1(double %a, double %b, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cli 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison near the low end of the CLI range, using sign
 ; extension.
 define double @f2(double %a, double %b, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cli 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison near the high end of the CLI range, using zero
 ; extension.
 define double @f3(double %a, double %b, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cli 0(%r2), 254
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison near the high end of the CLI range, using sign
 ; extension.
 define double @f4(double %a, double %b, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cli 0(%r2), 254
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -62,8 +62,8 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison above the high end of the CLI range, using zero
 ; extension.  The condition is always true.
 define double @f5(double %a, double %b, i8 *%ptr) {
-; CHECK: f5:
-; CHECK-NOT: cli
+; CHECK-LABEL: f5:
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -78,8 +78,8 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 ; unlikely to occur in practice, we don't bother optimizing the second case,
 ; and simply ignore CLI for this range.  First check the low end of the range.
 define double @f6(double %a, double %b, i8 *%ptr) {
-; CHECK: f6:
-; CHECK-NOT: cli
+; CHECK-LABEL: f6:
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -90,8 +90,8 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 
 ; ...and then the high end.
 define double @f7(double %a, double %b, i8 *%ptr) {
-; CHECK: f7:
-; CHECK-NOT: cli
+; CHECK-LABEL: f7:
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -103,9 +103,9 @@ define double @f7(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the low end of the CLI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f8(double %a, double %b, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cli 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -117,8 +117,8 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the low end of the CLI range, using sign
 ; extension.  This cannot use CLI.
 define double @f9(double %a, double %b, i8 *%ptr) {
-; CHECK: f9:
-; CHECK-NOT: cli
+; CHECK-LABEL: f9:
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -130,9 +130,9 @@ define double @f9(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the high end of the CLI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f10(double %a, double %b, i8 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: cli 0(%r2), 254
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -144,8 +144,8 @@ define double @f10(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the high end of the CLI range, using sign
 ; extension.  This cannot use CLI.
 define double @f11(double %a, double %b, i8 *%ptr) {
-; CHECK: f11:
-; CHECK-NOT: cli
+; CHECK-LABEL: f11:
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -157,8 +157,8 @@ define double @f11(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison above the high end of the CLI range, using zero
 ; extension.  The condition is always true.
 define double @f12(double %a, double %b, i8 *%ptr) {
-; CHECK: f12:
-; CHECK-NOT: cli
+; CHECK-LABEL: f12:
+; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i32
@@ -169,9 +169,9 @@ define double @f12(double %a, double %b, i8 *%ptr) {
 
 ; Check tests for nonnegative values.
 define double @f13(double %a, double %b, i8 *%ptr) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -182,9 +182,9 @@ define double @f13(double %a, double %b, i8 *%ptr) {
 
 ; ...and another form
 define double @f14(double %a, double %b, i8 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -195,9 +195,9 @@ define double @f14(double %a, double %b, i8 *%ptr) {
 
 ; Check tests for negative values.
 define double @f15(double %a, double %b, i8 *%ptr) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
@@ -208,9 +208,9 @@ define double @f15(double %a, double %b, i8 *%ptr) {
 
 ; ...and another form
 define double @f16(double %a, double %b, i8 *%ptr) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i32
diff --git a/test/CodeGen/SystemZ/int-cmp-21.ll b/test/CodeGen/SystemZ/int-cmp-21.ll
index 43447b8fda07..ca9225dead92 100644
--- a/test/CodeGen/SystemZ/int-cmp-21.ll
+++ b/test/CodeGen/SystemZ/int-cmp-21.ll
@@ -6,9 +6,9 @@
 ; Check unsigned comparison near the low end of the CLI range, using zero
 ; extension.
 define double @f1(double %a, double %b, i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cli 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison near the low end of the CLI range, using sign
 ; extension.
 define double @f2(double %a, double %b, i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cli 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison near the high end of the CLI range, using zero
 ; extension.
 define double @f3(double %a, double %b, i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cli 0(%r2), 254
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison near the high end of the CLI range, using sign
 ; extension.
 define double @f4(double %a, double %b, i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cli 0(%r2), 254
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -62,7 +62,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
 ; Check unsigned comparison above the high end of the CLI range, using zero
 ; extension.  The condition is always true.
 define double @f5(double %a, double %b, i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -78,7 +78,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
 ; unlikely to occur in practice, we don't bother optimizing the second case,
 ; and simply ignore CLI for this range.  First check the low end of the range.
 define double @f6(double %a, double %b, i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -90,7 +90,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
 
 ; ...and then the high end.
 define double @f7(double %a, double %b, i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -103,9 +103,9 @@ define double @f7(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the low end of the CLI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f8(double %a, double %b, i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cli 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -117,7 +117,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the low end of the CLI range, using sign
 ; extension.  This cannot use CLI.
 define double @f9(double %a, double %b, i8 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -130,9 +130,9 @@ define double @f9(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the high end of the CLI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f10(double %a, double %b, i8 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: cli 0(%r2), 254
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = zext i8 %val to i64
@@ -144,7 +144,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison near the high end of the CLI range, using sign
 ; extension.  This cannot use CLI.
 define double @f11(double %a, double %b, i8 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -157,7 +157,7 @@ define double @f11(double %a, double %b, i8 *%ptr) {
 ; Check signed comparison above the high end of the CLI range, using zero
 ; extension.  The condition is always true.
 define double @f12(double %a, double %b, i8 *%ptr) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -169,9 +169,9 @@ define double @f12(double %a, double %b, i8 *%ptr) {
 
 ; Check tests for nonnegative values.
 define double @f13(double %a, double %b, i8 *%ptr) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -182,9 +182,9 @@ define double @f13(double %a, double %b, i8 *%ptr) {
 
 ; ...and another form
 define double @f14(double %a, double %b, i8 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: cli 0(%r2), 128
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -195,9 +195,9 @@ define double @f14(double %a, double %b, i8 *%ptr) {
 
 ; Check tests for negative values.
 define double @f15(double %a, double %b, i8 *%ptr) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
@@ -208,9 +208,9 @@ define double @f15(double %a, double %b, i8 *%ptr) {
 
 ; ...and another form
 define double @f16(double %a, double %b, i8 *%ptr) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: cli 0(%r2), 127
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i8 *%ptr
   %ext = sext i8 %val to i64
diff --git a/test/CodeGen/SystemZ/int-cmp-22.ll b/test/CodeGen/SystemZ/int-cmp-22.ll
index 513d4be2f423..43daec95b7d8 100644
--- a/test/CodeGen/SystemZ/int-cmp-22.ll
+++ b/test/CodeGen/SystemZ/int-cmp-22.ll
@@ -4,9 +4,9 @@
 
 ; Check comparisons with 0.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: chhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with 1.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
-; CHECK: chhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f2:
+; CHECK: chhsi 0(%r2), 0
+; CHECK-NEXT: jle
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check a value near the high end of the signed 16-bit range.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: chhsi 0(%r2), 32766
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with -1.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: chhsi 0(%r2), -1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) {
 
 ; Check a value near the low end of the 16-bit signed range.
 define double @f5(double %a, double %b, i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: chhsi 0(%r2), -32766
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the CHHSI range.
 define double @f6(double %a, double %b, i16 %i1, i16 *%base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: chhsi 4094(%r3), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 2047
@@ -83,10 +83,10 @@ define double @f6(double %a, double %b, i16 %i1, i16 *%base) {
 
 ; Check the next halfword up, which needs separate address logic,
 define double @f7(double %a, double %b, i16 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: aghi %r2, 4096
 ; CHECK: chhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 2048
@@ -98,10 +98,10 @@ define double @f7(double %a, double %b, i16 *%base) {
 
 ; Check negative offsets, which also need separate address logic.
 define double @f8(double %a, double %b, i16 *%base) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: aghi %r2, -2
 ; CHECK: chhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 -1
@@ -113,10 +113,10 @@ define double @f8(double %a, double %b, i16 *%base) {
 
 ; Check that CHHSI does not allow indices.
 define double @f9(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agr {{%r2, %r3|%r3, %r2}}
 ; CHECK: chhsi 0({{%r[23]}}), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add = add i64 %base, %index
diff --git a/test/CodeGen/SystemZ/int-cmp-23.ll b/test/CodeGen/SystemZ/int-cmp-23.ll
index 40e13310d55c..99fe74b1c787 100644
--- a/test/CodeGen/SystemZ/int-cmp-23.ll
+++ b/test/CodeGen/SystemZ/int-cmp-23.ll
@@ -4,9 +4,9 @@
 
 ; Check a value near the low end of the unsigned 16-bit range.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check a value near the high end of the unsigned 16-bit range.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 65534
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the CLHHSI range.
 define double @f3(double %a, double %b, i16 %i1, i16 *%base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clhhsi 4094(%r3), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 2047
@@ -44,10 +44,10 @@ define double @f3(double %a, double %b, i16 %i1, i16 *%base) {
 
 ; Check the next halfword up, which needs separate address logic,
 define double @f4(double %a, double %b, i16 *%base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: aghi %r2, 4096
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 2048
@@ -59,10 +59,10 @@ define double @f4(double %a, double %b, i16 *%base) {
 
 ; Check negative offsets, which also need separate address logic.
 define double @f5(double %a, double %b, i16 *%base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: aghi %r2, -2
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%base, i64 -1
@@ -74,10 +74,10 @@ define double @f5(double %a, double %b, i16 *%base) {
 
 ; Check that CLHHSI does not allow indices.
 define double @f6(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agr {{%r2, %r3|%r3, %r2}}
 ; CHECK: clhhsi 0({{%r[23]}}), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add = add i64 %base, %index
diff --git a/test/CodeGen/SystemZ/int-cmp-24.ll b/test/CodeGen/SystemZ/int-cmp-24.ll
index 46186cd74b53..1a8e587b0341 100644
--- a/test/CodeGen/SystemZ/int-cmp-24.ll
+++ b/test/CodeGen/SystemZ/int-cmp-24.ll
@@ -4,9 +4,9 @@
 
 ; Check the low end of the unsigned 16-bit range.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the unsigned 16-bit range.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check the low end of the signed 16-bit range.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clhhsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the signed 16-bit range.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clhhsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-25.ll b/test/CodeGen/SystemZ/int-cmp-25.ll
index a3a223fa3448..50803df1ba91 100644
--- a/test/CodeGen/SystemZ/int-cmp-25.ll
+++ b/test/CodeGen/SystemZ/int-cmp-25.ll
@@ -4,9 +4,9 @@
 
 ; Check the low end of the unsigned 16-bit range.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the unsigned 16-bit range.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check the low end of the signed 16-bit range.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clhhsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the signed 16-bit range.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clhhsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-26.ll b/test/CodeGen/SystemZ/int-cmp-26.ll
index 31330b2a6397..60778654b275 100644
--- a/test/CodeGen/SystemZ/int-cmp-26.ll
+++ b/test/CodeGen/SystemZ/int-cmp-26.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 16-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the 16-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the signed 16-bit range, using sign extension.
 define double @f6(double %a, double %b, i16 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: clhhsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i16 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i16 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) {
 
 ; Check the low end of the signed 16-bit range, using sign extension.
 define double @f9(double %a, double %b, i16 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: clhhsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i16 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-27.ll b/test/CodeGen/SystemZ/int-cmp-27.ll
index 7cbea3d92526..3102f5c5faa4 100644
--- a/test/CodeGen/SystemZ/int-cmp-27.ll
+++ b/test/CodeGen/SystemZ/int-cmp-27.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 16-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the 16-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the signed 16-bit range, using sign extension.
 define double @f6(double %a, double %b, i16 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: clhhsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i16 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i16 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) {
 
 ; Check the low end of the signed 16-bit range, using sign extension.
 define double @f9(double %a, double %b, i16 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: clhhsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i16 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-28.ll b/test/CodeGen/SystemZ/int-cmp-28.ll
index 629eb4f06013..c3b905974ebc 100644
--- a/test/CodeGen/SystemZ/int-cmp-28.ll
+++ b/test/CodeGen/SystemZ/int-cmp-28.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 16-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the 16-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the signed 16-bit range, using sign extension.
 define double @f6(double %a, double %b, i16 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: clhhsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i16 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i16 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) {
 
 ; Check the low end of the signed 16-bit range, using sign extension.
 define double @f9(double %a, double %b, i16 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: clhhsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i16 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-29.ll b/test/CodeGen/SystemZ/int-cmp-29.ll
index de41dd782d21..1b40d8cfb2ae 100644
--- a/test/CodeGen/SystemZ/int-cmp-29.ll
+++ b/test/CodeGen/SystemZ/int-cmp-29.ll
@@ -5,9 +5,9 @@
 
 ; Check the low end of the 16-bit unsigned range, with zero extension.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the 16-bit unsigned range, with zero extension.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 
 ; Check the next value up, with zero extension.  The condition is always false.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -44,7 +44,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 ; Check comparisons with -1, with zero extension.
 ; This condition is also always false.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with 0, using sign extension.
 define double @f5(double %a, double %b, i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clhhsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -69,9 +69,9 @@ define double @f5(double %a, double %b, i16 *%ptr) {
 
 ; Check the high end of the signed 16-bit range, using sign extension.
 define double @f6(double %a, double %b, i16 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: clhhsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -83,7 +83,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
 ; Check the next value up, using sign extension.
 ; The condition is always false.
 define double @f7(double %a, double %b, i16 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -95,9 +95,9 @@ define double @f7(double %a, double %b, i16 *%ptr) {
 
 ; Check comparisons with -1, using sign extension.
 define double @f8(double %a, double %b, i16 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clhhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -108,9 +108,9 @@ define double @f8(double %a, double %b, i16 *%ptr) {
 
 ; Check the low end of the signed 16-bit range, using sign extension.
 define double @f9(double %a, double %b, i16 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: clhhsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
 ; Check the next value down, using sign extension.
 ; The condition is always false.
 define double @f10(double %a, double %b, i16 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-30.ll b/test/CodeGen/SystemZ/int-cmp-30.ll
index 713ad8ef841d..6c9498cb3320 100644
--- a/test/CodeGen/SystemZ/int-cmp-30.ll
+++ b/test/CodeGen/SystemZ/int-cmp-30.ll
@@ -6,9 +6,9 @@
 ; Check unsigned comparison near the low end of the CLHHSI range, using zero
 ; extension.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison near the low end of the CLHHSI range, using sign
 ; extension.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison near the high end of the CLHHSI range, using zero
 ; extension.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clhhsi 0(%r2), 65534
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison near the high end of the CLHHSI range, using sign
 ; extension.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clhhsi 0(%r2), 65534
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -62,7 +62,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison above the high end of the CLHHSI range, using zero
 ; extension.  The condition is always true.
 define double @f5(double %a, double %b, i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -79,7 +79,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
 ; and simply ignore CLHHSI for this range.  First check the low end of the
 ; range.
 define double @f6(double %a, double %b, i16 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -91,7 +91,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
 
 ; ...and then the high end.
 define double @f7(double %a, double %b, i16 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -104,9 +104,9 @@ define double @f7(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the low end of the CLHHSI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f8(double %a, double %b, i16 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -118,9 +118,9 @@ define double @f8(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the low end of the CLHHSI range, using sign
 ; extension.  This should use CHHSI instead.
 define double @f9(double %a, double %b, i16 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: chhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -132,9 +132,9 @@ define double @f9(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the high end of the CLHHSI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f10(double %a, double %b, i16 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: clhhsi 0(%r2), 65534
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i32
@@ -146,9 +146,9 @@ define double @f10(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the high end of the CLHHSI range, using sign
 ; extension.  This should use CHHSI instead.
 define double @f11(double %a, double %b, i16 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: chhsi 0(%r2), -2
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -160,7 +160,7 @@ define double @f11(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison above the high end of the CLHHSI range, using zero
 ; extension.  The condition is always true.
 define double @f12(double %a, double %b, i16 *%ptr) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -173,9 +173,9 @@ define double @f12(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the high end of the CHHSI range, using sign
 ; extension.
 define double @f13(double %a, double %b, i16 *%ptr) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: chhsi 0(%r2), 32766
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -187,7 +187,7 @@ define double @f13(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison above the high end of the CHHSI range, using sign
 ; extension.  This condition is always true.
 define double @f14(double %a, double %b, i16 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -200,9 +200,9 @@ define double @f14(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the low end of the CHHSI range, using sign
 ; extension.
 define double @f15(double %a, double %b, i16 *%ptr) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: chhsi 0(%r2), -32767
-; CHECK-NEXT: j{{g?}}g
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i32
@@ -214,7 +214,7 @@ define double @f15(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison below the low end of the CHHSI range, using sign
 ; extension.  This condition is always true.
 define double @f16(double %a, double %b, i16 *%ptr) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-31.ll b/test/CodeGen/SystemZ/int-cmp-31.ll
index cabe9b83a135..21539f20470e 100644
--- a/test/CodeGen/SystemZ/int-cmp-31.ll
+++ b/test/CodeGen/SystemZ/int-cmp-31.ll
@@ -6,9 +6,9 @@
 ; Check unsigned comparison near the low end of the CLHHSI range, using zero
 ; extension.
 define double @f1(double %a, double %b, i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -20,9 +20,9 @@ define double @f1(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison near the low end of the CLHHSI range, using sign
 ; extension.
 define double @f2(double %a, double %b, i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -34,9 +34,9 @@ define double @f2(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison near the high end of the CLHHSI range, using zero
 ; extension.
 define double @f3(double %a, double %b, i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clhhsi 0(%r2), 65534
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -48,9 +48,9 @@ define double @f3(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison near the high end of the CLHHSI range, using sign
 ; extension.
 define double @f4(double %a, double %b, i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clhhsi 0(%r2), 65534
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -62,7 +62,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
 ; Check unsigned comparison above the high end of the CLHHSI range, using zero
 ; extension.  The condition is always true.
 define double @f5(double %a, double %b, i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -79,7 +79,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
 ; and simply ignore CLHHSI for this range.  First check the low end of the
 ; range.
 define double @f6(double %a, double %b, i16 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -91,7 +91,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
 
 ; ...and then the high end.
 define double @f7(double %a, double %b, i16 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -104,9 +104,9 @@ define double @f7(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the low end of the CLHHSI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f8(double %a, double %b, i16 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: clhhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -118,9 +118,9 @@ define double @f8(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the low end of the CLHHSI range, using sign
 ; extension.  This should use CHHSI instead.
 define double @f9(double %a, double %b, i16 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: chhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -132,9 +132,9 @@ define double @f9(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the high end of the CLHHSI range, using zero
 ; extension.  This is equivalent to unsigned comparison.
 define double @f10(double %a, double %b, i16 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: clhhsi 0(%r2), 65534
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = zext i16 %val to i64
@@ -146,9 +146,9 @@ define double @f10(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the high end of the CLHHSI range, using sign
 ; extension.  This should use CHHSI instead.
 define double @f11(double %a, double %b, i16 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: chhsi 0(%r2), -2
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -160,7 +160,7 @@ define double @f11(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison above the high end of the CLHHSI range, using zero
 ; extension.  The condition is always true.
 define double @f12(double %a, double %b, i16 *%ptr) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -173,9 +173,9 @@ define double @f12(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the high end of the CHHSI range, using sign
 ; extension.
 define double @f13(double %a, double %b, i16 *%ptr) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: chhsi 0(%r2), 32766
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -187,7 +187,7 @@ define double @f13(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison above the high end of the CHHSI range, using sign
 ; extension.  This condition is always true.
 define double @f14(double %a, double %b, i16 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
@@ -200,9 +200,9 @@ define double @f14(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison near the low end of the CHHSI range, using sign
 ; extension.
 define double @f15(double %a, double %b, i16 *%ptr) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: chhsi 0(%r2), -32767
-; CHECK-NEXT: j{{g?}}g
+; CHECK-NEXT: jh
 ; CHECK: br %r14
   %val = load i16 *%ptr
   %ext = sext i16 %val to i64
@@ -214,7 +214,7 @@ define double @f15(double %a, double %b, i16 *%ptr) {
 ; Check signed comparison below the low end of the CHHSI range, using sign
 ; extension.  This condition is always true.
 define double @f16(double %a, double %b, i16 *%ptr) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
   %val = load i16 *%ptr
diff --git a/test/CodeGen/SystemZ/int-cmp-32.ll b/test/CodeGen/SystemZ/int-cmp-32.ll
index 4bdeebb35c99..6596f9f3ad84 100644
--- a/test/CodeGen/SystemZ/int-cmp-32.ll
+++ b/test/CodeGen/SystemZ/int-cmp-32.ll
@@ -4,9 +4,9 @@
 
 ; Check ordered comparisons with 0.
 define double @f1(double %a, double %b, i32 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: chsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i32 *%ptr) {
 
 ; Check ordered comparisons with 1.
 define double @f2(double %a, double %b, i32 *%ptr) {
-; CHECK: f2:
-; CHECK: chsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f2:
+; CHECK: chsi 0(%r2), 0
+; CHECK-NEXT: jle
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i32 *%ptr) {
 
 ; Check ordered comparisons with the high end of the signed 16-bit range.
 define double @f3(double %a, double %b, i32 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: chsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -43,7 +43,7 @@ define double @f3(double %a, double %b, i32 *%ptr) {
 
 ; Check the next value up, which can't use CHSI.
 define double @f4(double %a, double %b, i32 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -54,9 +54,9 @@ define double @f4(double %a, double %b, i32 *%ptr) {
 
 ; Check ordered comparisons with -1.
 define double @f5(double %a, double %b, i32 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: chsi 0(%r2), -1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -67,9 +67,9 @@ define double @f5(double %a, double %b, i32 *%ptr) {
 
 ; Check ordered comparisons with the low end of the 16-bit signed range.
 define double @f6(double %a, double %b, i32 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: chsi 0(%r2), -32768
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -80,7 +80,7 @@ define double @f6(double %a, double %b, i32 *%ptr) {
 
 ; Check the next value down, which can't use CHSI.
 define double @f7(double %a, double %b, i32 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -91,9 +91,9 @@ define double @f7(double %a, double %b, i32 *%ptr) {
 
 ; Check equality comparisons with 0.
 define double @f8(double %a, double %b, i32 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: chsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -104,9 +104,9 @@ define double @f8(double %a, double %b, i32 *%ptr) {
 
 ; Check equality comparisons with 1.
 define double @f9(double %a, double %b, i32 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: chsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -117,9 +117,9 @@ define double @f9(double %a, double %b, i32 *%ptr) {
 
 ; Check equality comparisons with the high end of the signed 16-bit range.
 define double @f10(double %a, double %b, i32 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: chsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -130,7 +130,7 @@ define double @f10(double %a, double %b, i32 *%ptr) {
 
 ; Check the next value up, which can't use CHSI.
 define double @f11(double %a, double %b, i32 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -141,9 +141,9 @@ define double @f11(double %a, double %b, i32 *%ptr) {
 
 ; Check equality comparisons with -1.
 define double @f12(double %a, double %b, i32 *%ptr) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: chsi 0(%r2), -1
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -154,9 +154,9 @@ define double @f12(double %a, double %b, i32 *%ptr) {
 
 ; Check equality comparisons with the low end of the 16-bit signed range.
 define double @f13(double %a, double %b, i32 *%ptr) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: chsi 0(%r2), -32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -167,7 +167,7 @@ define double @f13(double %a, double %b, i32 *%ptr) {
 
 ; Check the next value down, which should be treated as a positive value.
 define double @f14(double %a, double %b, i32 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -178,9 +178,9 @@ define double @f14(double %a, double %b, i32 *%ptr) {
 
 ; Check the high end of the CHSI range.
 define double @f15(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: chsi 4092(%r3), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1023
@@ -192,10 +192,10 @@ define double @f15(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the next word up, which needs separate address logic,
 define double @f16(double %a, double %b, i32 *%base) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: aghi %r2, 4096
 ; CHECK: chsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1024
@@ -207,10 +207,10 @@ define double @f16(double %a, double %b, i32 *%base) {
 
 ; Check negative offsets, which also need separate address logic.
 define double @f17(double %a, double %b, i32 *%base) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: aghi %r2, -4
 ; CHECK: chsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -1
@@ -222,10 +222,10 @@ define double @f17(double %a, double %b, i32 *%base) {
 
 ; Check that CHSI does not allow indices.
 define double @f18(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f18:
+; CHECK-LABEL: f18:
 ; CHECK: agr {{%r2, %r3|%r3, %r2}}
 ; CHECK: chsi 0({{%r[23]}}), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add = add i64 %base, %index
diff --git a/test/CodeGen/SystemZ/int-cmp-33.ll b/test/CodeGen/SystemZ/int-cmp-33.ll
index 0144806d4465..e5a653b3815d 100644
--- a/test/CodeGen/SystemZ/int-cmp-33.ll
+++ b/test/CodeGen/SystemZ/int-cmp-33.ll
@@ -5,9 +5,9 @@
 ; Check ordered comparisons with a constant near the low end of the unsigned
 ; 16-bit range.
 define double @f1(double %a, double %b, i32 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clfhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i32 *%ptr) {
 
 ; Check ordered comparisons with the high end of the unsigned 16-bit range.
 define double @f2(double %a, double %b, i32 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clfhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i32 *%ptr) {
 
 ; Check the next value up, which can't use CLFHSI.
 define double @f3(double %a, double %b, i32 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: clfhsi
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i32 *%ptr) {
 ; Check equality comparisons with 32768, the lowest value for which
 ; we prefer CLFHSI to CHSI.
 define double @f4(double %a, double %b, i32 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clfhsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i32 *%ptr) {
 
 ; Check equality comparisons with the high end of the unsigned 16-bit range.
 define double @f5(double %a, double %b, i32 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clfhsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -69,7 +69,7 @@ define double @f5(double %a, double %b, i32 *%ptr) {
 
 ; Check the next value up, which can't use CLFHSI.
 define double @f6(double %a, double %b, i32 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: clfhsi
 ; CHECK: br %r14
   %val = load i32 *%ptr
@@ -80,9 +80,9 @@ define double @f6(double %a, double %b, i32 *%ptr) {
 
 ; Check the high end of the CLFHSI range.
 define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: clfhsi 4092(%r3), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1023
@@ -94,10 +94,10 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
 
 ; Check the next word up, which needs separate address logic,
 define double @f8(double %a, double %b, i32 *%base) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: aghi %r2, 4096
 ; CHECK: clfhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 1024
@@ -109,10 +109,10 @@ define double @f8(double %a, double %b, i32 *%base) {
 
 ; Check negative offsets, which also need separate address logic.
 define double @f9(double %a, double %b, i32 *%base) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: aghi %r2, -4
 ; CHECK: clfhsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%base, i64 -1
@@ -124,10 +124,10 @@ define double @f9(double %a, double %b, i32 *%base) {
 
 ; Check that CLFHSI does not allow indices.
 define double @f10(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agr {{%r2, %r3|%r3, %r2}}
 ; CHECK: clfhsi 0({{%r[23]}}), 1
-; CHECK-NEXT: j{{g?}}h
+; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add = add i64 %base, %index
diff --git a/test/CodeGen/SystemZ/int-cmp-34.ll b/test/CodeGen/SystemZ/int-cmp-34.ll
index b10bd4e08031..8a0219775a4e 100644
--- a/test/CodeGen/SystemZ/int-cmp-34.ll
+++ b/test/CodeGen/SystemZ/int-cmp-34.ll
@@ -4,9 +4,9 @@
 
 ; Check ordered comparisons with 0.
 define double @f1(double %a, double %b, i64 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cghsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -17,9 +17,9 @@ define double @f1(double %a, double %b, i64 *%ptr) {
 
 ; Check ordered comparisons with 1.
 define double @f2(double %a, double %b, i64 *%ptr) {
-; CHECK: f2:
-; CHECK: cghsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-LABEL: f2:
+; CHECK: cghsi 0(%r2), 0
+; CHECK-NEXT: jle
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -30,9 +30,9 @@ define double @f2(double %a, double %b, i64 *%ptr) {
 
 ; Check ordered comparisons with the high end of the signed 16-bit range.
 define double @f3(double %a, double %b, i64 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cghsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -43,7 +43,7 @@ define double @f3(double %a, double %b, i64 *%ptr) {
 
 ; Check the next value up, which can't use CGHSI.
 define double @f4(double %a, double %b, i64 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -54,9 +54,9 @@ define double @f4(double %a, double %b, i64 *%ptr) {
 
 ; Check ordered comparisons with -1.
 define double @f5(double %a, double %b, i64 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cghsi 0(%r2), -1
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -67,9 +67,9 @@ define double @f5(double %a, double %b, i64 *%ptr) {
 
 ; Check ordered comparisons with the low end of the 16-bit signed range.
 define double @f6(double %a, double %b, i64 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cghsi 0(%r2), -32768
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -80,7 +80,7 @@ define double @f6(double %a, double %b, i64 *%ptr) {
 
 ; Check the next value down, which should be treated as a positive value.
 define double @f7(double %a, double %b, i64 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -91,9 +91,9 @@ define double @f7(double %a, double %b, i64 *%ptr) {
 
 ; Check equality comparisons with 0.
 define double @f8(double %a, double %b, i64 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cghsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -104,9 +104,9 @@ define double @f8(double %a, double %b, i64 *%ptr) {
 
 ; Check equality comparisons with 1.
 define double @f9(double %a, double %b, i64 *%ptr) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cghsi 0(%r2), 1
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -117,9 +117,9 @@ define double @f9(double %a, double %b, i64 *%ptr) {
 
 ; Check equality comparisons with the high end of the signed 16-bit range.
 define double @f10(double %a, double %b, i64 *%ptr) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: cghsi 0(%r2), 32767
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -130,7 +130,7 @@ define double @f10(double %a, double %b, i64 *%ptr) {
 
 ; Check the next value up, which can't use CGHSI.
 define double @f11(double %a, double %b, i64 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -141,9 +141,9 @@ define double @f11(double %a, double %b, i64 *%ptr) {
 
 ; Check equality comparisons with -1.
 define double @f12(double %a, double %b, i64 *%ptr) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: cghsi 0(%r2), -1
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -154,9 +154,9 @@ define double @f12(double %a, double %b, i64 *%ptr) {
 
 ; Check equality comparisons with the low end of the 16-bit signed range.
 define double @f13(double %a, double %b, i64 *%ptr) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: cghsi 0(%r2), -32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -167,7 +167,7 @@ define double @f13(double %a, double %b, i64 *%ptr) {
 
 ; Check the next value down, which should be treated as a positive value.
 define double @f14(double %a, double %b, i64 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -178,9 +178,9 @@ define double @f14(double %a, double %b, i64 *%ptr) {
 
 ; Check the high end of the CGHSI range.
 define double @f15(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: cghsi 4088(%r3), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 511
@@ -192,10 +192,10 @@ define double @f15(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check the next doubleword up, which needs separate address logic,
 define double @f16(double %a, double %b, i64 *%base) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: aghi %r2, 4096
 ; CHECK: cghsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 512
@@ -207,10 +207,10 @@ define double @f16(double %a, double %b, i64 *%base) {
 
 ; Check negative offsets, which also need separate address logic.
 define double @f17(double %a, double %b, i64 *%base) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: aghi %r2, -8
 ; CHECK: cghsi 0(%r2), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -1
@@ -222,10 +222,10 @@ define double @f17(double %a, double %b, i64 *%base) {
 
 ; Check that CGHSI does not allow indices.
 define double @f18(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f18:
+; CHECK-LABEL: f18:
 ; CHECK: agr {{%r2, %r3|%r3, %r2}}
 ; CHECK: cghsi 0({{%r[23]}}), 0
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add = add i64 %base, %index
diff --git a/test/CodeGen/SystemZ/int-cmp-35.ll b/test/CodeGen/SystemZ/int-cmp-35.ll
index 9934906ba8d4..539248a86a7b 100644
--- a/test/CodeGen/SystemZ/int-cmp-35.ll
+++ b/test/CodeGen/SystemZ/int-cmp-35.ll
@@ -5,9 +5,9 @@
 ; Check ordered comparisons with a constant near the low end of the unsigned
 ; 16-bit range.
 define double @f1(double %a, double %b, i64 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clghsi 0(%r2), 2
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -18,9 +18,9 @@ define double @f1(double %a, double %b, i64 *%ptr) {
 
 ; Check ordered comparisons with the high end of the unsigned 16-bit range.
 define double @f2(double %a, double %b, i64 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clghsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -31,7 +31,7 @@ define double @f2(double %a, double %b, i64 *%ptr) {
 
 ; Check the next value up, which can't use CLGHSI.
 define double @f3(double %a, double %b, i64 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: clghsi
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -43,9 +43,9 @@ define double @f3(double %a, double %b, i64 *%ptr) {
 ; Check equality comparisons with 32768, the lowest value for which
 ; we prefer CLGHSI to CGHSI.
 define double @f4(double %a, double %b, i64 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clghsi 0(%r2), 32768
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -56,9 +56,9 @@ define double @f4(double %a, double %b, i64 *%ptr) {
 
 ; Check equality comparisons with the high end of the unsigned 16-bit range.
 define double @f5(double %a, double %b, i64 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: clghsi 0(%r2), 65535
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -69,7 +69,7 @@ define double @f5(double %a, double %b, i64 *%ptr) {
 
 ; Check the next value up, which can't use CLGHSI.
 define double @f6(double %a, double %b, i64 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: clghsi
 ; CHECK: br %r14
   %val = load i64 *%ptr
@@ -80,9 +80,9 @@ define double @f6(double %a, double %b, i64 *%ptr) {
 
 ; Check the high end of the CLGHSI range.
 define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: clghsi 4088(%r3), 2
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 511
@@ -94,10 +94,10 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
 
 ; Check the next doubleword up, which needs separate address logic,
 define double @f8(double %a, double %b, i64 *%base) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: aghi %r2, 4096
 ; CHECK: clghsi 0(%r2), 2
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 512
@@ -109,10 +109,10 @@ define double @f8(double %a, double %b, i64 *%base) {
 
 ; Check negative offsets, which also need separate address logic.
 define double @f9(double %a, double %b, i64 *%base) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: aghi %r2, -8
 ; CHECK: clghsi 0(%r2), 2
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%base, i64 -1
@@ -124,10 +124,10 @@ define double @f9(double %a, double %b, i64 *%base) {
 
 ; Check that CLGHSI does not allow indices.
 define double @f10(double %a, double %b, i64 %base, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agr {{%r2, %r3|%r3, %r2}}
 ; CHECK: clghsi 0({{%r[23]}}), 2
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %add = add i64 %base, %index
diff --git a/test/CodeGen/SystemZ/int-cmp-36.ll b/test/CodeGen/SystemZ/int-cmp-36.ll
index 0813594325e4..fa2d4bf6c617 100644
--- a/test/CodeGen/SystemZ/int-cmp-36.ll
+++ b/test/CodeGen/SystemZ/int-cmp-36.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i16 1
+@h = global i16 1, align 1, section "foo"
 
 ; Check signed comparison.
 define i32 @f1(i32 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: chrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -26,7 +27,7 @@ exit:
 
 ; Check unsigned comparison, which cannot use CHRL.
 define i32 @f2(i32 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: chrl
 ; CHECK: br %r14
 entry:
@@ -44,9 +45,9 @@ exit:
 
 ; Check equality.
 define i32 @f3(i32 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: chrl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -63,9 +64,9 @@ exit:
 
 ; Check inequality.
 define i32 @f4(i32 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: chrl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -79,3 +80,42 @@ exit:
   %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
   ret i32 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i32 @f5(i32 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: ch %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+  %val = load i16 *@h, align 1
+  %src2 = sext i16 %val to i32
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check the comparison can be reversed if that allows CHRL to be used.
+define i32 @f6(i32 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: chrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = sext i16 %val to i32
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-37.ll b/test/CodeGen/SystemZ/int-cmp-37.ll
index aebd1f610d27..8095ed173012 100644
--- a/test/CodeGen/SystemZ/int-cmp-37.ll
+++ b/test/CodeGen/SystemZ/int-cmp-37.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i16 1
+@h = global i16 1, align 1, section "foo"
 
 ; Check unsigned comparison.
 define i32 @f1(i32 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clhrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -26,7 +27,7 @@ exit:
 
 ; Check signed comparison.
 define i32 @f2(i32 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: clhrl
 ; CHECK: br %r14
 entry:
@@ -44,9 +45,9 @@ exit:
 
 ; Check equality.
 define i32 @f3(i32 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clhrl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -63,9 +64,9 @@ exit:
 
 ; Check inequality.
 define i32 @f4(i32 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clhrl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -79,3 +80,42 @@ exit:
   %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
   ret i32 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i32 @f5(i32 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: llh [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: clrjl %r2, [[VAL]],
+; CHECK: br %r14
+entry:
+  %val = load i16 *@h, align 1
+  %src2 = zext i16 %val to i32
+  %cond = icmp ult i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check the comparison can be reversed if that allows CLHRL to be used.
+define i32 @f6(i32 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: clhrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = zext i16 %val to i32
+  %cond = icmp ult i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-38.ll b/test/CodeGen/SystemZ/int-cmp-38.ll
index 347073027554..901758378729 100644
--- a/test/CodeGen/SystemZ/int-cmp-38.ll
+++ b/test/CodeGen/SystemZ/int-cmp-38.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i32 1
+@h = global i32 1, align 2, section "foo"
 
 ; Check signed comparisons.
 define i32 @f1(i32 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: crl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %src2 = load i32 *@g
@@ -25,9 +26,9 @@ exit:
 
 ; Check unsigned comparisons.
 define i32 @f2(i32 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %src2 = load i32 *@g
@@ -43,9 +44,9 @@ exit:
 
 ; Check equality, which can use CRL or CLRL.
 define i32 @f3(i32 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: c{{l?}}rl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %src2 = load i32 *@g
@@ -61,9 +62,9 @@ exit:
 
 ; ...likewise inequality.
 define i32 @f4(i32 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: c{{l?}}rl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %src2 = load i32 *@g
@@ -76,3 +77,59 @@ exit:
   %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
   ret i32 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i32 @f5(i32 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: c %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+  %src2 = load i32 *@h, align 2
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Repeat f2 with an unaligned address.
+define i32 @f6(i32 %src1) {
+; CHECK-LABEL: f6:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: cl %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+  %src2 = load i32 *@h, align 2
+  %cond = icmp ult i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check the comparison can be reversed if that allows CRL to be used.
+define i32 @f7(i32 %src2) {
+; CHECK-LABEL: f7:
+; CHECK: crl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %src1 = load i32 *@g
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i32 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-39.ll b/test/CodeGen/SystemZ/int-cmp-39.ll
index 1129dce84a44..fc9547d4ceb4 100644
--- a/test/CodeGen/SystemZ/int-cmp-39.ll
+++ b/test/CodeGen/SystemZ/int-cmp-39.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i16 1
+@h = global i16 1, align 1, section "foo"
 
 ; Check signed comparison.
 define i64 @f1(i64 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cghrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -26,7 +27,7 @@ exit:
 
 ; Check unsigned comparison, which cannot use CHRL.
 define i64 @f2(i64 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: cghrl
 ; CHECK: br %r14
 entry:
@@ -44,9 +45,9 @@ exit:
 
 ; Check equality.
 define i64 @f3(i64 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cghrl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -63,9 +64,9 @@ exit:
 
 ; Check inequality.
 define i64 @f4(i64 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cghrl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -79,3 +80,42 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: cgh %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+  %val = load i16 *@h, align 1
+  %src2 = sext i16 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CGHRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: cghrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = sext i16 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-40.ll b/test/CodeGen/SystemZ/int-cmp-40.ll
index 8d9fd9aa140a..9c532f1cbc6b 100644
--- a/test/CodeGen/SystemZ/int-cmp-40.ll
+++ b/test/CodeGen/SystemZ/int-cmp-40.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i16 1
+@h = global i16 1, align 1, section "foo"
 
 ; Check unsigned comparison.
 define i64 @f1(i64 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clghrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -26,7 +27,7 @@ exit:
 
 ; Check signed comparison.
 define i64 @f2(i64 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: clghrl
 ; CHECK: br %r14
 entry:
@@ -44,9 +45,9 @@ exit:
 
 ; Check equality.
 define i64 @f3(i64 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clghrl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -63,9 +64,9 @@ exit:
 
 ; Check inequality.
 define i64 @f4(i64 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clghrl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %val = load i16 *@g
@@ -79,3 +80,42 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: llgh [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: clgrjl %r2, [[VAL]],
+; CHECK: br %r14
+entry:
+  %val = load i16 *@h, align 1
+  %src2 = zext i16 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CLGHRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: clghrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src1 = zext i16 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-41.ll b/test/CodeGen/SystemZ/int-cmp-41.ll
index 0808bffe6d3e..77f6e7d76f1c 100644
--- a/test/CodeGen/SystemZ/int-cmp-41.ll
+++ b/test/CodeGen/SystemZ/int-cmp-41.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i32 1
+@h = global i32 1, align 2, section "foo"
 
 ; Check signed comparison.
 define i64 @f1(i64 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cgfrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %val = load i32 *@g
@@ -26,7 +27,7 @@ exit:
 
 ; Check unsigned comparison, which cannot use CHRL.
 define i64 @f2(i64 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: cgfrl
 ; CHECK: br %r14
 entry:
@@ -44,9 +45,9 @@ exit:
 
 ; Check equality.
 define i64 @f3(i64 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cgfrl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %val = load i32 *@g
@@ -63,9 +64,9 @@ exit:
 
 ; Check inequality.
 define i64 @f4(i64 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cgfrl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %val = load i32 *@g
@@ -79,3 +80,42 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: cgf %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+  %val = load i32 *@h, align 2
+  %src2 = sext i32 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CGFRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: cgfrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src1 = sext i32 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-42.ll b/test/CodeGen/SystemZ/int-cmp-42.ll
index 5c67581dc29a..94ef0082c441 100644
--- a/test/CodeGen/SystemZ/int-cmp-42.ll
+++ b/test/CodeGen/SystemZ/int-cmp-42.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i32 1
+@h = global i32 1, align 2, section "foo"
 
 ; Check unsigned comparison.
 define i64 @f1(i64 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clgfrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %val = load i32 *@g
@@ -26,7 +27,7 @@ exit:
 
 ; Check signed comparison.
 define i64 @f2(i64 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: clgfrl
 ; CHECK: br %r14
 entry:
@@ -44,9 +45,9 @@ exit:
 
 ; Check equality.
 define i64 @f3(i64 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: clgfrl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %val = load i32 *@g
@@ -63,9 +64,9 @@ exit:
 
 ; Check inequality.
 define i64 @f4(i64 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: clgfrl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %val = load i32 *@g
@@ -79,3 +80,42 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: clgf %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+  %val = load i32 *@h, align 2
+  %src2 = zext i32 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CLGFRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: clgfrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src1 = zext i32 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-43.ll b/test/CodeGen/SystemZ/int-cmp-43.ll
index f387293b2b1b..1a625886dec2 100644
--- a/test/CodeGen/SystemZ/int-cmp-43.ll
+++ b/test/CodeGen/SystemZ/int-cmp-43.ll
@@ -4,12 +4,13 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 @g = global i64 1
+@h = global i64 1, align 4, section "foo"
 
 ; Check signed comparisons.
 define i64 @f1(i64 %src1) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cgrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %src2 = load i64 *@g
@@ -25,9 +26,9 @@ exit:
 
 ; Check unsigned comparisons.
 define i64 @f2(i64 %src1) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: clgrl %r2, g
-; CHECK-NEXT: j{{g?}}l
+; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
   %src2 = load i64 *@g
@@ -43,9 +44,9 @@ exit:
 
 ; Check equality, which can use CRL or CLRL.
 define i64 @f3(i64 %src1) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: c{{l?}}grl %r2, g
-; CHECK-NEXT: j{{g?}}e
+; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
   %src2 = load i64 *@g
@@ -61,9 +62,9 @@ exit:
 
 ; ...likewise inequality.
 define i64 @f4(i64 %src1) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: c{{l?}}grl %r2, g
-; CHECK-NEXT: j{{g?}}lh
+; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
   %src2 = load i64 *@g
@@ -76,3 +77,40 @@ exit:
   %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
   ret i64 %res
 }
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK-LABEL: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: cg %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+  %src2 = load i64 *@h, align 4
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CGRL to be used.
+define i64 @f6(i64 %src2) {
+; CHECK-LABEL: f6:
+; CHECK: cgrl %r2, g
+; CHECK-NEXT: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %src1 = load i64 *@g
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src2, %src2
+  br label %exit
+exit:
+  %res = phi i64 [ %src2, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll
new file mode 100644
index 000000000000..ae0133f10860
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -0,0 +1,799 @@
+; Test that compares are ommitted if CC already has the right value
+; (z10 version).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare void @foo()
+
+; Addition provides enough for equality comparisons with zero.  First teest
+; the EQ case.
+define i32 @f1(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f1:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and again with NE.
+define i32 @f2(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; SLT requires a comparison.
+define i32 @f3(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...SLE too.
+define i32 @f4(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijle %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp sle i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...SGT too.
+define i32 @f5(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijh %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp sgt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...SGE too.
+define i32 @f6(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: cijhe %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  %cmp = icmp sge i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Subtraction also provides enough for equality comparisons with zero.
+define i32 @f7(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: s %r2, 0(%r4)
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %cur = load i32 *%dest
+  %res = sub i32 %a, %cur
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...but not for ordered comparisons.
+define i32 @f8(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK: s %r2, 0(%r4)
+; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %cur = load i32 *%dest
+  %res = sub i32 %a, %cur
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Logic register-register instructions also provide enough for equality
+; comparisons with zero.
+define i32 @f9(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: nr %r2, %r3
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, %b
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...but not for ordered comparisons.
+define i32 @f10(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f10:
+; CHECK: nr %r2, %r3
+; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, %b
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Logic register-immediate instructions also provide enough for equality
+; comparisons with zero if the immediate covers the whole register.
+define i32 @f11(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f11:
+; CHECK: nilf %r2, 100000001
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, 100000001
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Partial logic register-immediate instructions do not provide simple
+; zero results.
+define i32 @f12(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f12:
+; CHECK: nill %r2, 65436
+; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i32 %a, -100
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; SRA provides the same CC result as a comparison with zero.
+define i32 @f13(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f13:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and again with NE.
+define i32 @f14(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f14:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jlh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SLT.
+define i32 @f15(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f15:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SLE.
+define i32 @f16(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f16:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jle .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp sle i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SGT.
+define i32 @f17(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f17:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp sgt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and SGE.
+define i32 @f18(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f18:
+; CHECK: sra %r2, 0(%r3)
+; CHECK-NEXT: jhe .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = ashr i32 %a, %b
+  %cmp = icmp sge i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; RISBG provides the same result as a comparison against zero.
+; Test the EQ case.
+define i64 @f19(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f19:
+; CHECK: risbg %r2, %r3, 0, 190, 0
+; CHECK-NEXT: je .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i64 %b, -2
+  %cmp = icmp eq i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %b, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; ...and the SLT case.
+define i64 @f20(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f20:
+; CHECK: risbg %r2, %r3, 0, 190, 0
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = and i64 %b, -2
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %b, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; Test a case where the register we're testing is set by a non-CC-clobbering
+; instruction.
+define i32 @f21(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f21:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: cije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  %res = call i32 asm "blah $0", "=r,0" (i32 %add)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; ...and again with a CC-clobbering instruction.
+define i32 @f22(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f22:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: cije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  %res = call i32 asm "blah $0", "=r,0,~{cc}" (i32 %add)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Check that stores do not interfere.
+define i32 @f23(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
+; CHECK-LABEL: f23:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: st %r2, 0(%r4)
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %res = add i32 %a, 1000000
+  store i32 %res, i32 *%dest1
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %b, i32 *%dest2
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Check that calls do interfere.
+define void @f24(i32 *%ptr) {
+; CHECK-LABEL: f24:
+; CHECK: afi [[REG:%r[0-9]+]], 1000000
+; CHECK-NEXT: brasl %r14, foo@PLT
+; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %val = load i32 *%ptr
+  %xor = xor i32 %val, 1
+  %add = add i32 %xor, 1000000
+  call void @foo()
+  %cmp = icmp ne i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %add, i32 *%ptr
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that inline asms don't interfere if they don't clobber CC.
+define void @f25(i32 %a, i32 *%ptr) {
+; CHECK-LABEL: f25:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jne .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  call void asm sideeffect "blah", "r"(i32 %add)
+  %cmp = icmp ne i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %add, i32 *%ptr
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...but do interfere if they do clobber CC.
+define void @f26(i32 %a, i32 *%ptr) {
+; CHECK-LABEL: f26:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: cijlh %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  call void asm sideeffect "blah", "r,~{cc}"(i32 %add)
+  %cmp = icmp ne i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %add, i32 *%ptr
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test a case where CC is set based on a different register from the
+; compare input.
+define i32 @f27(i32 %a, i32 %b, i32 *%dest1, i32 *%dest2) {
+; CHECK-LABEL: f27:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: sr %r3, %r2
+; CHECK-NEXT: st %r3, 0(%r4)
+; CHECK-NEXT: cije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i32 %a, 1000000
+  %sub = sub i32 %b, %add
+  store i32 %sub, i32 *%dest1
+  %cmp = icmp eq i32 %add, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %sub, i32 *%dest2
+  br label %exit
+
+exit:
+  ret i32 %add
+}
+
+; Make sure that we don't confuse a base register for a destination.
+define void @f28(i64 %a, i64 *%dest) {
+; CHECK-LABEL: f28:
+; CHECK: xi 0(%r2), 15
+; CHECK: cgije %r2, 0, .L{{.*}}
+; CHECK: br %r14
+entry:
+  %ptr = inttoptr i64 %a to i8 *
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 15
+  store i8 %xor, i8 *%ptr
+  %cmp = icmp eq i64 %a, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %a, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test that L gets converted to LT where useful.
+define i32 @f29(i64 %base, i64 %index, i32 *%dest) {
+; CHECK-LABEL: f29:
+; CHECK: lt %r2, 0({{%r2,%r3|%r3,%r2}})
+; CHECK-NEXT: jle .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %res = load i32 *%ptr
+  %cmp = icmp sle i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %res, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Test that LY gets converted to LT where useful.
+define i32 @f30(i64 %base, i64 %index, i32 *%dest) {
+; CHECK-LABEL: f30:
+; CHECK: lt %r2, 100000({{%r2,%r3|%r3,%r2}})
+; CHECK-NEXT: jle .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 100000
+  %ptr = inttoptr i64 %add2 to i32 *
+  %res = load i32 *%ptr
+  %cmp = icmp sle i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %res, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; Test that LG gets converted to LTG where useful.
+define i64 @f31(i64 %base, i64 %index, i64 *%dest) {
+; CHECK-LABEL: f31:
+; CHECK: ltg %r2, 0({{%r2,%r3|%r3,%r2}})
+; CHECK-NEXT: jhe .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i64 *
+  %res = load i64 *%ptr
+  %cmp = icmp sge i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %res, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; Test that LGF gets converted to LTGF where useful.
+define i64 @f32(i64 %base, i64 %index, i64 *%dest) {
+; CHECK-LABEL: f32:
+; CHECK: ltgf %r2, 0({{%r2,%r3|%r3,%r2}})
+; CHECK-NEXT: jh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %val = load i32 *%ptr
+  %res = sext i32 %val to i64
+  %cmp = icmp sgt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %res, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; Test that LR gets converted to LTR where useful.
+define i32 @f33(i32 %dummy, i32 %val, i32 *%dest) {
+; CHECK-LABEL: f33:
+; CHECK: ltr %r2, %r3
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{r2}"(i32 %val)
+  %cmp = icmp slt i32 %val, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %val, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %val
+}
+
+; Test that LGR gets converted to LTGR where useful.
+define i64 @f34(i64 %dummy, i64 %val, i64 *%dest) {
+; CHECK-LABEL: f34:
+; CHECK: ltgr %r2, %r3
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jh .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{r2}"(i64 %val)
+  %cmp = icmp sgt i64 %val, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %val, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %val
+}
+
+; Test that LGFR gets converted to LTGFR where useful.
+define i64 @f35(i64 %dummy, i32 %val, i64 *%dest) {
+; CHECK-LABEL: f35:
+; CHECK: ltgfr %r2, %r3
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jh .L{{.*}}
+; CHECK: br %r14
+entry:
+  %ext = sext i32 %val to i64
+  call void asm sideeffect "blah $0", "{r2}"(i64 %ext)
+  %cmp = icmp sgt i64 %ext, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %ext, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %ext
+}
+
+; Test a case where it is the source rather than destination of LR that
+; we need.
+define i32 @f36(i32 %val, i32 %dummy, i32 *%dest) {
+; CHECK-LABEL: f36:
+; CHECK: ltr %r3, %r2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r3
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{r3}"(i32 %val)
+  %cmp = icmp slt i32 %val, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %val, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %val
+}
+
+; Test a case where it is the source rather than destination of LGR that
+; we need.
+define i64 @f37(i64 %val, i64 %dummy, i64 *%dest) {
+; CHECK-LABEL: f37:
+; CHECK: ltgr %r3, %r2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r3
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  call void asm sideeffect "blah $0", "{r3}"(i64 %val)
+  %cmp = icmp slt i64 %val, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 %val, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %val
+}
+
+; Test a case where it is the source rather than destination of LGFR that
+; we need.
+define i32 @f38(i32 %val, i64 %dummy, i32 *%dest) {
+; CHECK-LABEL: f38:
+; CHECK: ltgfr %r3, %r2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %r3
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jl .L{{.*}}
+; CHECK: br %r14
+entry:
+  %ext = sext i32 %val to i64
+  call void asm sideeffect "blah $0", "{r3}"(i64 %ext)
+  %cmp = icmp slt i32 %val, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 %val, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-45.ll b/test/CodeGen/SystemZ/int-cmp-45.ll
new file mode 100644
index 000000000000..753a528e46c9
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-45.ll
@@ -0,0 +1,115 @@
+; Test that compares are ommitted if CC already has the right value
+; (z196 version).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Addition provides enough for equality comparisons with zero.  First teest
+; the EQ case with LOC.
+define i32 @f1(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f1:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: loce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %arg = select i1 %cmp, i32 %c, i32 %b
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with STOC.
+define i32 @f2(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f2:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: stoce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %newval = select i1 %cmp, i32 %b, i32 %c
+  store i32 %newval, i32 *%cptr
+  ret i32 %add
+}
+
+; Reverse the select order and test with LOCR.
+define i32 @f3(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f3:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: locrne %r3, %r4
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with LOC.
+define i32 @f4(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f4:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: locne %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with STOC.
+define i32 @f5(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f5:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: stocne %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp eq i32 %add, 0
+  %c = load i32 *%cptr
+  %newval = select i1 %cmp, i32 %c, i32 %b
+  store i32 %newval, i32 *%cptr
+  ret i32 %add
+}
+
+; Change the EQ in f3 to NE.
+define i32 @f6(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f6:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: locre %r3, %r4
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp ne i32 %add, 0
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with LOC.
+define i32 @f7(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f7:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: loce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp ne i32 %add, 0
+  %c = load i32 *%cptr
+  %arg = select i1 %cmp, i32 %b, i32 %c
+  call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
+  ret i32 %add
+}
+
+; ...and again with STOC.
+define i32 @f8(i32 %a, i32 %b, i32 *%cptr) {
+; CHECK-LABEL: f8:
+; CHECK: afi %r2, 1000000
+; CHECK-NEXT: stoce %r3, 0(%r4)
+; CHECK: br %r14
+  %add = add i32 %a, 1000000
+  %cmp = icmp ne i32 %add, 0
+  %c = load i32 *%cptr
+  %newval = select i1 %cmp, i32 %c, i32 %b
+  store i32 %newval, i32 *%cptr
+  ret i32 %add
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-46.ll b/test/CodeGen/SystemZ/int-cmp-46.ll
new file mode 100644
index 000000000000..f311942b9f86
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-46.ll
@@ -0,0 +1,491 @@
+; Test the use of TEST UNDER MASK for 32-bit operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+@g = global i32 0
+
+; Check the lowest useful TMLL value.
+define void @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: tmll %r2, 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 1
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLL range.
+define void @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: tmll %r2, 65535
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 65535
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMLH value, which is the next value up.
+define void @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: tmlh %r2, 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 65536
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the next value up again, which cannot use TM.
+define void @f4(i32 %a) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: {{tm[lh].}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 4294901759
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLH range.
+define void @f5(i32 %a) {
+; CHECK-LABEL: f5:
+; CHECK: tmlh %r2, 65535
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 4294901760
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for LT comparisons that are equivalent to
+; an equality comparison with zero.
+define void @f6(i32 %a) {
+; CHECK-LABEL: f6:
+; CHECK: tmll %r2, 240
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp slt i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with LE.
+define void @f7(i32 %a) {
+; CHECK-LABEL: f7:
+; CHECK: tmll %r2, 240
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp sle i32 %and, 15
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for GE comparisons that are equivalent to
+; an inequality comparison with zero.
+define void @f8(i32 %a) {
+; CHECK-LABEL: f8:
+; CHECK: tmll %r2, 240
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp uge i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with GT.
+define void @f9(i32 %a) {
+; CHECK-LABEL: f9:
+; CHECK: tmll %r2, 240
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 240
+  %cmp = icmp ugt i32 %and, 15
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for LT comparisons that effectively
+; test whether the top bit is clear.
+define void @f10(i32 %a) {
+; CHECK-LABEL: f10:
+; CHECK: tmll %r2, 35
+; CHECK: jle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 35
+  %cmp = icmp ult i32 %and, 8
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with LE.
+define void @f11(i32 %a) {
+; CHECK-LABEL: f11:
+; CHECK: tmll %r2, 35
+; CHECK: jle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 35
+  %cmp = icmp ule i32 %and, 31
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for GE comparisons that effectively test
+; whether the top bit is set.
+define void @f12(i32 %a) {
+; CHECK-LABEL: f12:
+; CHECK: tmll %r2, 140
+; CHECK: jnle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 140
+  %cmp = icmp uge i32 %and, 128
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again for GT.
+define void @f13(i32 %a) {
+; CHECK-LABEL: f13:
+; CHECK: tmll %r2, 140
+; CHECK: jnle {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 140
+  %cmp = icmp ugt i32 %and, 126
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for equality comparisons with the mask.
+define void @f14(i32 %a) {
+; CHECK-LABEL: f14:
+; CHECK: tmll %r2, 101
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 101
+  %cmp = icmp eq i32 %and, 101
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for inequality comparisons with the mask.
+define void @f15(i32 %a) {
+; CHECK-LABEL: f15:
+; CHECK: tmll %r2, 65519
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 65519
+  %cmp = icmp ne i32 %and, 65519
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for LT comparisons that are equivalent
+; to inequality comparisons with the mask.
+define void @f16(i32 %a) {
+; CHECK-LABEL: f16:
+; CHECK: tmll %r2, 130
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 130
+  %cmp = icmp ult i32 %and, 129
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again with LE.
+define void @f17(i32 %a) {
+; CHECK-LABEL: f17:
+; CHECK: tmll %r2, 130
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 130
+  %cmp = icmp ule i32 %and, 128
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for GE comparisons that are equivalent
+; to equality comparisons with the mask.
+define void @f18(i32 %a) {
+; CHECK-LABEL: f18:
+; CHECK: tmll %r2, 194
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 194
+  %cmp = icmp uge i32 %and, 193
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ...same again for GT.
+define void @f19(i32 %a) {
+; CHECK-LABEL: f19:
+; CHECK: tmll %r2, 194
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 194
+  %cmp = icmp ugt i32 %and, 192
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for equality comparisons for the low bit
+; when the mask has two bits.
+define void @f20(i32 %a) {
+; CHECK-LABEL: f20:
+; CHECK: tmll %r2, 20
+; CHECK: jl {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp eq i32 %and, 4
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for inequality comparisons for the low bit
+; when the mask has two bits.
+define void @f21(i32 %a) {
+; CHECK-LABEL: f21:
+; CHECK: tmll %r2, 20
+; CHECK: jnl {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp ne i32 %and, 4
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for equality comparisons for the high bit
+; when the mask has two bits.
+define void @f22(i32 %a) {
+; CHECK-LABEL: f22:
+; CHECK: tmll %r2, 20
+; CHECK: jh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp eq i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can use TMLL for inequality comparisons for the high bit
+; when the mask has two bits.
+define void @f23(i32 %a) {
+; CHECK-LABEL: f23:
+; CHECK: tmll %r2, 20
+; CHECK: jnh {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i32 %a, 20
+  %cmp = icmp ne i32 %and, 16
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHL into a TMxx mask.
+define void @f24(i32 %a) {
+; CHECK-LABEL: f24:
+; CHECK: tmll %r2, 255
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shl = shl i32 %a, 12
+  %and = and i32 %shl, 1044480
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHR into a TMxx mask.
+define void @f25(i32 %a) {
+; CHECK-LABEL: f25:
+; CHECK: tmlh %r2, 512
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shr = lshr i32 %a, 25
+  %and = and i32 %shr, 1
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-47.ll b/test/CodeGen/SystemZ/int-cmp-47.ll
new file mode 100644
index 000000000000..9ebcbfe525ba
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-47.ll
@@ -0,0 +1,234 @@
+; Test the use of TEST UNDER MASK for 64-bit operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+@g = global i32 0
+
+; Check the lowest useful TMLL value.
+define void @f1(i64 %a) {
+; CHECK-LABEL: f1:
+; CHECK: tmll %r2, 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 1
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLL range.
+define void @f2(i64 %a) {
+; CHECK-LABEL: f2:
+; CHECK: tmll %r2, 65535
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 65535
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMLH value, which is the next value up.
+define void @f3(i64 %a) {
+; CHECK-LABEL: f3:
+; CHECK: tmlh %r2, 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 65536
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the next value up again, which cannot use TM.
+define void @f4(i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: {{tm[lh].}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294901759
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMLH range.
+define void @f5(i64 %a) {
+; CHECK-LABEL: f5:
+; CHECK: tmlh %r2, 65535
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294901760
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMHL value.
+define void @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: tmhl %r2, 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294967296
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the next value up again, which cannot use TM.
+define void @f7(i64 %a) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: {{tm[lh].}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294967297
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMHL range.
+define void @f8(i64 %a) {
+; CHECK-LABEL: f8:
+; CHECK: tmhl %r2, 65535
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 281470681743360
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMHH value.
+define void @f9(i64 %a) {
+; CHECK-LABEL: f9:
+; CHECK: tmhh %r2, 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 281474976710656
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the high end of the TMHH range.
+define void @f10(i64 %a) {
+; CHECK-LABEL: f10:
+; CHECK: tmhh %r2, 65535
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 18446462598732840960
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHL into a TMxx mask.
+define void @f11(i64 %a) {
+; CHECK-LABEL: f11:
+; CHECK: tmhl %r2, 32768
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shl = shl i64 %a, 1
+  %and = and i64 %shl, 281474976710656
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can fold an SHR into a TMxx mask.
+define void @f12(i64 %a) {
+; CHECK-LABEL: f12:
+; CHECK: tmhh %r2, 256
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %shr = lshr i64 %a, 56
+  %and = and i64 %shr, 1
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-48.ll b/test/CodeGen/SystemZ/int-cmp-48.ll
new file mode 100644
index 000000000000..d7c6370a2323
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-48.ll
@@ -0,0 +1,245 @@
+; Test the use of TM and TMY.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+@g = global i32 0
+
+; Check a simple branching use of TM.
+define void @f1(i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+
+; Check that we do not fold across an aliasing store.
+define void @f2(i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
+; CHECK: mvi 0(%r2), 0
+; CHECK: tmll [[REG]], 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+entry:
+  %byte = load i8 *%src
+  store i8 0, i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a simple select-based use of TM.
+define double @f3(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f3:
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that we do not fold across an aliasing store.
+define double @f4(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f4:
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: mvi 0(%r2), 0
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  store i8 0, i8 *%src
+  ret double %res
+}
+
+; Check an inequality check.
+define double @f5(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f5:
+; CHECK: tm 0(%r2), 1
+; CHECK: jne {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 1
+  %cmp = icmp ne i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that we can also use TM for equality comparisons with the mask.
+define double @f6(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f6:
+; CHECK: tm 0(%r2), 254
+; CHECK: jo {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 254
+  %cmp = icmp eq i8 %and, 254
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check inequality comparisons with the mask.
+define double @f7(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f7:
+; CHECK: tm 0(%r2), 254
+; CHECK: jno {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 254
+  %cmp = icmp ne i8 %and, 254
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that we do not use the memory TM instruction when CC is being tested
+; for 2.
+define double @f8(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f8:
+; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
+; CHECK: tmll [[REG]], 3
+; CHECK: jh {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 3
+  %cmp = icmp eq i8 %and, 2
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; ...likewise 1.
+define double @f9(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f9:
+; CHECK: llc [[REG:%r[0-5]]], 0(%r2)
+; CHECK: tmll [[REG]], 3
+; CHECK: jl {{\.L.*}}
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %and = and i8 %byte, 3
+  %cmp = icmp eq i8 %and, 1
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the high end of the TM range.
+define double @f10(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f10:
+; CHECK: tm 4095(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the low end of the positive TMY range.
+define double @f11(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f11:
+; CHECK: tmy 4096(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the high end of the TMY range.
+define double @f12(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f12:
+; CHECK: tmy 524287(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the next byte up, which needs separate address logic.
+define double @f13(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f13:
+; CHECK: agfi %r2, 524288
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the low end of the TMY range.
+define double @f14(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f14:
+; CHECK: tmy -524288(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check the next byte down, which needs separate address logic.
+define double @f15(i8 *%src, double %a, double %b) {
+; CHECK-LABEL: f15:
+; CHECK: agfi %r2, -524289
+; CHECK: tm 0(%r2), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
+
+; Check that TM(Y) does not allow an index
+define double @f16(i8 *%src, i64 %index, double %a, double %b) {
+; CHECK-LABEL: f16:
+; CHECK: tm 0({{%r[1-5]}}), 1
+; CHECK: je {{\.L.*}}
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 %index
+  %byte = load i8 *%ptr
+  %and = and i8 %byte, 1
+  %cmp = icmp eq i8 %and, 0
+  %res = select i1 %cmp, double %b, double %a
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-49.ll b/test/CodeGen/SystemZ/int-cmp-49.ll
new file mode 100644
index 000000000000..83f18a2a18a6
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-49.ll
@@ -0,0 +1,49 @@
+; That that we don't try to use z196 instructions on z10 for TMHH and TMHL.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0 | FileCheck %s
+
+@g = global i32 0
+
+; Check the lowest useful TMHL value.
+define void @f1(i64 %a) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: tmhl {{%r[0-5]}}, 1
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 4294967296
+  %cmp = icmp eq i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the lowest useful TMHH value.
+define void @f2(i64 %a) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: tmhh {{%r[0-5]}}, 1
+; CHECK-NOT: risblg
+; CHECK-NOT: risbhg
+; CHECK: br %r14
+entry:
+  %and = and i64 %a, 281474976710656
+  %cmp = icmp ne i64 %and, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 1, i32 *@g
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-const-01.ll b/test/CodeGen/SystemZ/int-const-01.ll
index a580154e6b57..e94c05897fae 100644
--- a/test/CodeGen/SystemZ/int-const-01.ll
+++ b/test/CodeGen/SystemZ/int-const-01.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare void @foo(i32, i32, i32, i32)
+
 ; Check 0.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhi %r2, 0
 ; CHECK: br %r14
   ret i32 0
@@ -12,7 +14,7 @@ define i32 @f1() {
 
 ; Check the high end of the LHI range.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhi %r2, 32767
 ; CHECK: br %r14
   ret i32 32767
@@ -20,7 +22,7 @@ define i32 @f2() {
 
 ; Check the next value up, which must use LLILL instead.
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llill %r2, 32768
 ; CHECK: br %r14
   ret i32 32768
@@ -28,7 +30,7 @@ define i32 @f3() {
 
 ; Check the high end of the LLILL range.
 define i32 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llill %r2, 65535
 ; CHECK: br %r14
   ret i32 65535
@@ -36,7 +38,7 @@ define i32 @f4() {
 
 ; Check the first useful LLILH value, which is the next one up.
 define i32 @f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llilh %r2, 1
 ; CHECK: br %r14
   ret i32 65536
@@ -44,7 +46,7 @@ define i32 @f5() {
 
 ; Check the first useful IILF value, which is the next one up again.
 define i32 @f6() {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: iilf %r2, 65537
 ; CHECK: br %r14
   ret i32 65537
@@ -52,7 +54,7 @@ define i32 @f6() {
 
 ; Check the high end of the LLILH range.
 define i32 @f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llilh %r2, 65535
 ; CHECK: br %r14
   ret i32 -65536
@@ -60,7 +62,7 @@ define i32 @f7() {
 
 ; Check the next value up, which must use IILF.
 define i32 @f8() {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: iilf %r2, 4294901761
 ; CHECK: br %r14
   ret i32 -65535
@@ -68,7 +70,7 @@ define i32 @f8() {
 
 ; Check the highest useful IILF value, 0xffff7fff
 define i32 @f9() {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: iilf %r2, 4294934527
 ; CHECK: br %r14
   ret i32 -32769
@@ -76,7 +78,7 @@ define i32 @f9() {
 
 ; Check the next value up, which should use LHI.
 define i32 @f10() {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: lhi %r2, -32768
 ; CHECK: br %r14
   ret i32 -32768
@@ -84,8 +86,28 @@ define i32 @f10() {
 
 ; Check -1.
 define i32 @f11() {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: lhi %r2, -1
 ; CHECK: br %r14
   ret i32 -1
 }
+
+; Check that constant loads are rematerialized.
+define i32 @f12() {
+; CHECK-LABEL: f12:
+; CHECK-DAG: lhi %r2, 42
+; CHECK-DAG: llill %r3, 32768
+; CHECK-DAG: llilh %r4, 1
+; CHECK-DAG: iilf %r5, 65537
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: lhi %r2, 42
+; CHECK-DAG: llill %r3, 32768
+; CHECK-DAG: llilh %r4, 1
+; CHECK-DAG: iilf %r5, 65537
+; CHECK: brasl %r14, foo@PLT
+; CHECK: lhi %r2, 42
+; CHECK: br %r14
+  call void @foo(i32 42, i32 32768, i32 65536, i32 65537)
+  call void @foo(i32 42, i32 32768, i32 65536, i32 65537)
+  ret i32 42
+}
diff --git a/test/CodeGen/SystemZ/int-const-02.ll b/test/CodeGen/SystemZ/int-const-02.ll
index b345e3f2a2a1..e71abc69b3b6 100644
--- a/test/CodeGen/SystemZ/int-const-02.ll
+++ b/test/CodeGen/SystemZ/int-const-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare void @foo(i64, i64, i64, i64)
+
 ; Check 0.
 define i64 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lghi %r2, 0
 ; CHECK-NEXT: br %r14
   ret i64 0
@@ -12,7 +14,7 @@ define i64 @f1() {
 
 ; Check the high end of the LGHI range.
 define i64 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lghi %r2, 32767
 ; CHECK-NEXT: br %r14
   ret i64 32767
@@ -20,7 +22,7 @@ define i64 @f2() {
 
 ; Check the next value up, which must use LLILL instead.
 define i64 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llill %r2, 32768
 ; CHECK-NEXT: br %r14
   ret i64 32768
@@ -28,7 +30,7 @@ define i64 @f3() {
 
 ; Check the high end of the LLILL range.
 define i64 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llill %r2, 65535
 ; CHECK-NEXT: br %r14
   ret i64 65535
@@ -36,7 +38,7 @@ define i64 @f4() {
 
 ; Check the first useful LLILH value, which is the next one up.
 define i64 @f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llilh %r2, 1
 ; CHECK-NEXT: br %r14
   ret i64 65536
@@ -44,7 +46,7 @@ define i64 @f5() {
 
 ; Check the first useful LGFI value, which is the next one up again.
 define i64 @f6() {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgfi %r2, 65537
 ; CHECK-NEXT: br %r14
   ret i64 65537
@@ -52,7 +54,7 @@ define i64 @f6() {
 
 ; Check the high end of the LGFI range.
 define i64 @f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgfi %r2, 2147483647
 ; CHECK-NEXT: br %r14
   ret i64 2147483647
@@ -60,7 +62,7 @@ define i64 @f7() {
 
 ; Check the next value up, which should use LLILH instead.
 define i64 @f8() {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llilh %r2, 32768
 ; CHECK-NEXT: br %r14
   ret i64 2147483648
@@ -68,7 +70,7 @@ define i64 @f8() {
 
 ; Check the next value up again, which should use LLILF.
 define i64 @f9() {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: llilf %r2, 2147483649
 ; CHECK-NEXT: br %r14
   ret i64 2147483649
@@ -76,7 +78,7 @@ define i64 @f9() {
 
 ; Check the high end of the LLILH range.
 define i64 @f10() {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: llilh %r2, 65535
 ; CHECK-NEXT: br %r14
   ret i64 4294901760
@@ -84,7 +86,7 @@ define i64 @f10() {
 
 ; Check the next value up, which must use LLILF.
 define i64 @f11() {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: llilf %r2, 4294901761
 ; CHECK-NEXT: br %r14
   ret i64 4294901761
@@ -92,7 +94,7 @@ define i64 @f11() {
 
 ; Check the high end of the LLILF range.
 define i64 @f12() {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: llilf %r2, 4294967295
 ; CHECK-NEXT: br %r14
   ret i64 4294967295
@@ -100,7 +102,7 @@ define i64 @f12() {
 
 ; Check the lowest useful LLIHL value, which is the next one up.
 define i64 @f13() {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: llihl %r2, 1
 ; CHECK-NEXT: br %r14
   ret i64 4294967296
@@ -108,7 +110,7 @@ define i64 @f13() {
 
 ; Check the next value up, which must use a combination of two instructions.
 define i64 @f14() {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: llihl %r2, 1
 ; CHECK-NEXT: oill %r2, 1
 ; CHECK-NEXT: br %r14
@@ -117,7 +119,7 @@ define i64 @f14() {
 
 ; Check the high end of the OILL range.
 define i64 @f15() {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: llihl %r2, 1
 ; CHECK-NEXT: oill %r2, 65535
 ; CHECK-NEXT: br %r14
@@ -126,7 +128,7 @@ define i64 @f15() {
 
 ; Check the next value up, which should use OILH instead.
 define i64 @f16() {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: llihl %r2, 1
 ; CHECK-NEXT: oilh %r2, 1
 ; CHECK-NEXT: br %r14
@@ -135,7 +137,7 @@ define i64 @f16() {
 
 ; Check the next value up again, which should use OILF.
 define i64 @f17() {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: llihl %r2, 1
 ; CHECK-NEXT: oilf %r2, 65537
 ; CHECK-NEXT: br %r14
@@ -144,7 +146,7 @@ define i64 @f17() {
 
 ; Check the high end of the OILH range.
 define i64 @f18() {
-; CHECK: f18:
+; CHECK-LABEL: f18:
 ; CHECK: llihl %r2, 1
 ; CHECK-NEXT: oilh %r2, 65535
 ; CHECK-NEXT: br %r14
@@ -153,7 +155,7 @@ define i64 @f18() {
 
 ; Check the high end of the OILF range.
 define i64 @f19() {
-; CHECK: f19:
+; CHECK-LABEL: f19:
 ; CHECK: llihl %r2, 1
 ; CHECK-NEXT: oilf %r2, 4294967295
 ; CHECK-NEXT: br %r14
@@ -162,7 +164,7 @@ define i64 @f19() {
 
 ; Check the high end of the LLIHL range.
 define i64 @f20() {
-; CHECK: f20:
+; CHECK-LABEL: f20:
 ; CHECK: llihl %r2, 65535
 ; CHECK-NEXT: br %r14
   ret i64 281470681743360
@@ -170,7 +172,7 @@ define i64 @f20() {
 
 ; Check the lowest useful LLIHH value, which is 1<<32 greater than the above.
 define i64 @f21() {
-; CHECK: f21:
+; CHECK-LABEL: f21:
 ; CHECK: llihh %r2, 1
 ; CHECK-NEXT: br %r14
   ret i64 281474976710656
@@ -178,7 +180,7 @@ define i64 @f21() {
 
 ; Check the lowest useful LLIHF value, which is 1<<32 greater again.
 define i64 @f22() {
-; CHECK: f22:
+; CHECK-LABEL: f22:
 ; CHECK: llihf %r2, 65537
 ; CHECK-NEXT: br %r14
   ret i64 281479271677952
@@ -186,7 +188,7 @@ define i64 @f22() {
 
 ; Check the highest end of the LLIHH range.
 define i64 @f23() {
-; CHECK: f23:
+; CHECK-LABEL: f23:
 ; CHECK: llihh %r2, 65535
 ; CHECK-NEXT: br %r14
   ret i64 -281474976710656
@@ -194,7 +196,7 @@ define i64 @f23() {
 
 ; Check the next value up, which must use OILL too.
 define i64 @f24() {
-; CHECK: f24:
+; CHECK-LABEL: f24:
 ; CHECK: llihh %r2, 65535
 ; CHECK-NEXT: oill %r2, 1
 ; CHECK-NEXT: br %r14
@@ -203,7 +205,7 @@ define i64 @f24() {
 
 ; Check the high end of the LLIHF range.
 define i64 @f25() {
-; CHECK: f25:
+; CHECK-LABEL: f25:
 ; CHECK: llihf %r2, 4294967295
 ; CHECK-NEXT: br %r14
   ret i64 -4294967296
@@ -211,7 +213,7 @@ define i64 @f25() {
 
 ; Check -1.
 define i64 @f26() {
-; CHECK: f26:
+; CHECK-LABEL: f26:
 ; CHECK: lghi %r2, -1
 ; CHECK-NEXT: br %r14
   ret i64 -1
@@ -219,7 +221,7 @@ define i64 @f26() {
 
 ; Check the low end of the LGHI range.
 define i64 @f27() {
-; CHECK: f27:
+; CHECK-LABEL: f27:
 ; CHECK: lghi %r2, -32768
 ; CHECK-NEXT: br %r14
   ret i64 -32768
@@ -227,7 +229,7 @@ define i64 @f27() {
 
 ; Check the next value down, which must use LGFI instead.
 define i64 @f28() {
-; CHECK: f28:
+; CHECK-LABEL: f28:
 ; CHECK: lgfi %r2, -32769
 ; CHECK-NEXT: br %r14
   ret i64 -32769
@@ -235,7 +237,7 @@ define i64 @f28() {
 
 ; Check the low end of the LGFI range.
 define i64 @f29() {
-; CHECK: f29:
+; CHECK-LABEL: f29:
 ; CHECK: lgfi %r2, -2147483648
 ; CHECK-NEXT: br %r14
   ret i64 -2147483648
@@ -243,9 +245,41 @@ define i64 @f29() {
 
 ; Check the next value down, which needs a two-instruction sequence.
 define i64 @f30() {
-; CHECK: f30:
+; CHECK-LABEL: f30:
 ; CHECK: llihf %r2, 4294967295
 ; CHECK-NEXT: oilf %r2, 2147483647
 ; CHECK-NEXT: br %r14
   ret i64 -2147483649
 }
+
+; Check that constant loads are rematerialized.
+define i64 @f31() {
+; CHECK-LABEL: f31:
+; CHECK-DAG: lghi %r2, 42
+; CHECK-DAG: lgfi %r3, 65537
+; CHECK-DAG: llilf %r4, 2147483649
+; CHECK-DAG: llihf %r5, 65537
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: llill %r2, 32768
+; CHECK-DAG: llilh %r3, 1
+; CHECK-DAG: llihl %r4, 1
+; CHECK-DAG: llihh %r5, 1
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: lghi %r2, 42
+; CHECK-DAG: lgfi %r3, 65537
+; CHECK-DAG: llilf %r4, 2147483649
+; CHECK-DAG: llihf %r5, 65537
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: llill %r2, 32768
+; CHECK-DAG: llilh %r3, 1
+; CHECK-DAG: llihl %r4, 1
+; CHECK-DAG: llihh %r5, 1
+; CHECK: brasl %r14, foo@PLT
+; CHECK: lghi %r2, 42
+; CHECK: br %r14
+  call void @foo(i64 42, i64 65537, i64 2147483649, i64 281479271677952)
+  call void @foo(i64 32768, i64 65536, i64 4294967296, i64 281474976710656)
+  call void @foo(i64 42, i64 65537, i64 2147483649, i64 281479271677952)
+  call void @foo(i64 32768, i64 65536, i64 4294967296, i64 281474976710656)
+  ret i64 42
+}
diff --git a/test/CodeGen/SystemZ/int-const-03.ll b/test/CodeGen/SystemZ/int-const-03.ll
index 807b7e463ced..af1cef2c138a 100644
--- a/test/CodeGen/SystemZ/int-const-03.ll
+++ b/test/CodeGen/SystemZ/int-const-03.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the unsigned range.
 define void @f1(i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mvi 0(%r2), 0
 ; CHECK: br %r14
   store i8 0, i8 *%ptr
@@ -13,7 +13,7 @@ define void @f1(i8 *%ptr) {
 
 ; Check the high end of the signed range.
 define void @f2(i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mvi 0(%r2), 127
 ; CHECK: br %r14
   store i8 127, i8 *%ptr
@@ -22,7 +22,7 @@ define void @f2(i8 *%ptr) {
 
 ; Check the next value up.
 define void @f3(i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mvi 0(%r2), 128
 ; CHECK: br %r14
   store i8 -128, i8 *%ptr
@@ -31,7 +31,7 @@ define void @f3(i8 *%ptr) {
 
 ; Check the high end of the unsigned range.
 define void @f4(i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mvi 0(%r2), 255
 ; CHECK: br %r14
   store i8 255, i8 *%ptr
@@ -40,7 +40,7 @@ define void @f4(i8 *%ptr) {
 
 ; Check -1.
 define void @f5(i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mvi 0(%r2), 255
 ; CHECK: br %r14
   store i8 -1, i8 *%ptr
@@ -49,7 +49,7 @@ define void @f5(i8 *%ptr) {
 
 ; Check the low end of the signed range.
 define void @f6(i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: mvi 0(%r2), 128
 ; CHECK: br %r14
   store i8 -128, i8 *%ptr
@@ -58,7 +58,7 @@ define void @f6(i8 *%ptr) {
 
 ; Check the next value down.
 define void @f7(i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: mvi 0(%r2), 127
 ; CHECK: br %r14
   store i8 -129, i8 *%ptr
@@ -67,7 +67,7 @@ define void @f7(i8 *%ptr) {
 
 ; Check the high end of the MVI range.
 define void @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: mvi 4095(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4095
@@ -77,7 +77,7 @@ define void @f8(i8 *%src) {
 
 ; Check the next byte up, which should use MVIY instead of MVI.
 define void @f9(i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: mviy 4096(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4096
@@ -87,7 +87,7 @@ define void @f9(i8 *%src) {
 
 ; Check the high end of the MVIY range.
 define void @f10(i8 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: mviy 524287(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -98,7 +98,7 @@ define void @f10(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f11(i8 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r2, 524288
 ; CHECK: mvi 0(%r2), 42
 ; CHECK: br %r14
@@ -109,7 +109,7 @@ define void @f11(i8 *%src) {
 
 ; Check the high end of the negative MVIY range.
 define void @f12(i8 *%src) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: mviy -1(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -119,7 +119,7 @@ define void @f12(i8 *%src) {
 
 ; Check the low end of the MVIY range.
 define void @f13(i8 *%src) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: mviy -524288(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -130,7 +130,7 @@ define void @f13(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f14(i8 *%src) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: agfi %r2, -524289
 ; CHECK: mvi 0(%r2), 42
 ; CHECK: br %r14
@@ -139,11 +139,11 @@ define void @f14(i8 *%src) {
   ret void
 }
 
-; Check that MVI does not allow an index
+; Check that MVI does not allow an index.  We prefer STC in that case.
 define void @f15(i64 %src, i64 %index) {
-; CHECK: f15:
-; CHECK: agr %r2, %r3
-; CHECK: mvi 4095(%r2), 42
+; CHECK-LABEL: f15:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: stc [[TMP]], 4095({{%r2,%r3|%r3,%r2}}
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4095
@@ -152,11 +152,11 @@ define void @f15(i64 %src, i64 %index) {
   ret void
 }
 
-; Check that MVIY does not allow an index
+; Check that MVIY does not allow an index.  We prefer STCY in that case.
 define void @f16(i64 %src, i64 %index) {
-; CHECK: f16:
-; CHECK: agr %r2, %r3
-; CHECK: mviy 4096(%r2), 42
+; CHECK-LABEL: f16:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: stcy [[TMP]], 4096({{%r2,%r3|%r3,%r2}}
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
diff --git a/test/CodeGen/SystemZ/int-const-04.ll b/test/CodeGen/SystemZ/int-const-04.ll
index 41c7306c89aa..aced50b5601b 100644
--- a/test/CodeGen/SystemZ/int-const-04.ll
+++ b/test/CodeGen/SystemZ/int-const-04.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the unsigned range.
 define void @f1(i16 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mvhhi 0(%r2), 0
 ; CHECK: br %r14
   store i16 0, i16 *%ptr
@@ -13,7 +13,7 @@ define void @f1(i16 *%ptr) {
 
 ; Check the high end of the signed range.
 define void @f2(i16 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mvhhi 0(%r2), 32767
 ; CHECK: br %r14
   store i16 32767, i16 *%ptr
@@ -22,7 +22,7 @@ define void @f2(i16 *%ptr) {
 
 ; Check the next value up.
 define void @f3(i16 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mvhhi 0(%r2), -32768
 ; CHECK: br %r14
   store i16 -32768, i16 *%ptr
@@ -31,7 +31,7 @@ define void @f3(i16 *%ptr) {
 
 ; Check the high end of the unsigned range.
 define void @f4(i16 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mvhhi 0(%r2), -1
 ; CHECK: br %r14
   store i16 65535, i16 *%ptr
@@ -40,7 +40,7 @@ define void @f4(i16 *%ptr) {
 
 ; Check -1.
 define void @f5(i16 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mvhhi 0(%r2), -1
 ; CHECK: br %r14
   store i16 -1, i16 *%ptr
@@ -49,7 +49,7 @@ define void @f5(i16 *%ptr) {
 
 ; Check the low end of the signed range.
 define void @f6(i16 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: mvhhi 0(%r2), -32768
 ; CHECK: br %r14
   store i16 -32768, i16 *%ptr
@@ -58,7 +58,7 @@ define void @f6(i16 *%ptr) {
 
 ; Check the next value down.
 define void @f7(i16 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: mvhhi 0(%r2), 32767
 ; CHECK: br %r14
   store i16 -32769, i16 *%ptr
@@ -67,7 +67,7 @@ define void @f7(i16 *%ptr) {
 
 ; Check the high end of the MVHHI range.
 define void @f8(i16 *%a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: mvhhi 4094(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%a, i64 2047
@@ -75,34 +75,34 @@ define void @f8(i16 *%a) {
   ret void
 }
 
-; Check the next halfword up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next halfword up, which is out of range.  We prefer STHY
+; in that case.
 define void @f9(i16 *%a) {
-; CHECK: f9:
-; CHECK: aghi %r2, 4096
-; CHECK: mvhhi 0(%r2), 42
+; CHECK-LABEL: f9:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sthy [[TMP]], 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%a, i64 2048
   store i16 42, i16 *%ptr
   ret void
 }
 
-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STHY.
 define void @f10(i16 *%a) {
-; CHECK: f10:
-; CHECK: aghi %r2, -2
-; CHECK: mvhhi 0(%r2), 42
+; CHECK-LABEL: f10:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sthy [[TMP]], -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%a, i64 -1
   store i16 42, i16 *%ptr
   ret void
 }
 
-; Check that MVHHI does not allow an index
+; Check that MVHHI does not allow an index.
 define void @f11(i64 %src, i64 %index) {
-; CHECK: f11:
-; CHECK: agr %r2, %r3
-; CHECK: mvhhi 0(%r2), 42
+; CHECK-LABEL: f11:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sth [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
   %add = add i64 %src, %index
   %ptr = inttoptr i64 %add to i16 *
diff --git a/test/CodeGen/SystemZ/int-const-05.ll b/test/CodeGen/SystemZ/int-const-05.ll
index b85fd6b68207..98d6851c197d 100644
--- a/test/CodeGen/SystemZ/int-const-05.ll
+++ b/test/CodeGen/SystemZ/int-const-05.ll
@@ -4,7 +4,7 @@
 
 ; Check moves of zero.
 define void @f1(i32 *%a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mvhi 0(%r2), 0
 ; CHECK: br %r14
   store i32 0, i32 *%a
@@ -13,7 +13,7 @@ define void @f1(i32 *%a) {
 
 ; Check the high end of the signed 16-bit range.
 define void @f2(i32 *%a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mvhi 0(%r2), 32767
 ; CHECK: br %r14
   store i32 32767, i32 *%a
@@ -22,7 +22,7 @@ define void @f2(i32 *%a) {
 
 ; Check the next value up, which can't use MVHI.
 define void @f3(i32 *%a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: mvhi
 ; CHECK: br %r14
   store i32 32768, i32 *%a
@@ -31,7 +31,7 @@ define void @f3(i32 *%a) {
 
 ; Check moves of -1.
 define void @f4(i32 *%a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mvhi 0(%r2), -1
 ; CHECK: br %r14
   store i32 -1, i32 *%a
@@ -40,7 +40,7 @@ define void @f4(i32 *%a) {
 
 ; Check the low end of the MVHI range.
 define void @f5(i32 *%a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mvhi 0(%r2), -32768
 ; CHECK: br %r14
   store i32 -32768, i32 *%a
@@ -49,7 +49,7 @@ define void @f5(i32 *%a) {
 
 ; Check the next value down, which can't use MVHI.
 define void @f6(i32 *%a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: mvhi
 ; CHECK: br %r14
   store i32 -32769, i32 *%a
@@ -58,7 +58,7 @@ define void @f6(i32 *%a) {
 
 ; Check the high end of the MVHI range.
 define void @f7(i32 *%a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: mvhi 4092(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%a, i64 1023
@@ -66,34 +66,33 @@ define void @f7(i32 *%a) {
   ret void
 }
 
-; Check the next word up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next word up, which is out of range.  We prefer STY in that case.
 define void @f8(i32 *%a) {
-; CHECK: f8:
-; CHECK: aghi %r2, 4096
-; CHECK: mvhi 0(%r2), 42
+; CHECK-LABEL: f8:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sty [[TMP]], 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%a, i64 1024
   store i32 42, i32 *%ptr
   ret void
 }
 
-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STY.
 define void @f9(i32 *%a) {
-; CHECK: f9:
-; CHECK: aghi %r2, -4
-; CHECK: mvhi 0(%r2), 42
+; CHECK-LABEL: f9:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sty [[TMP]], -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%a, i64 -1
   store i32 42, i32 *%ptr
   ret void
 }
 
-; Check that MVHI does not allow an index
+; Check that MVHI does not allow an index.
 define void @f10(i64 %src, i64 %index) {
-; CHECK: f10:
-; CHECK: agr %r2, %r3
-; CHECK: mvhi 0(%r2), 42
+; CHECK-LABEL: f10:
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: st [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
   %add = add i64 %src, %index
   %ptr = inttoptr i64 %add to i32 *
diff --git a/test/CodeGen/SystemZ/int-const-06.ll b/test/CodeGen/SystemZ/int-const-06.ll
index 9f14347cf880..cf07c665dde7 100644
--- a/test/CodeGen/SystemZ/int-const-06.ll
+++ b/test/CodeGen/SystemZ/int-const-06.ll
@@ -4,7 +4,7 @@
 
 ; Check moves of zero.
 define void @f1(i64 *%a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mvghi 0(%r2), 0
 ; CHECK: br %r14
   store i64 0, i64 *%a
@@ -13,7 +13,7 @@ define void @f1(i64 *%a) {
 
 ; Check the high end of the signed 16-bit range.
 define void @f2(i64 *%a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mvghi 0(%r2), 32767
 ; CHECK: br %r14
   store i64 32767, i64 *%a
@@ -22,7 +22,7 @@ define void @f2(i64 *%a) {
 
 ; Check the next value up, which can't use MVGHI.
 define void @f3(i64 *%a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: mvghi
 ; CHECK: br %r14
   store i64 32768, i64 *%a
@@ -31,7 +31,7 @@ define void @f3(i64 *%a) {
 
 ; Check moves of -1.
 define void @f4(i64 *%a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mvghi 0(%r2), -1
 ; CHECK: br %r14
   store i64 -1, i64 *%a
@@ -40,7 +40,7 @@ define void @f4(i64 *%a) {
 
 ; Check the low end of the MVGHI range.
 define void @f5(i64 *%a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mvghi 0(%r2), -32768
 ; CHECK: br %r14
   store i64 -32768, i64 *%a
@@ -49,7 +49,7 @@ define void @f5(i64 *%a) {
 
 ; Check the next value down, which can't use MVGHI.
 define void @f6(i64 *%a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: mvghi
 ; CHECK: br %r14
   store i64 -32769, i64 *%a
@@ -58,7 +58,7 @@ define void @f6(i64 *%a) {
 
 ; Check the high end of the MVGHI range.
 define void @f7(i64 *%a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: mvghi 4088(%r2), 42
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%a, i64 511
@@ -66,34 +66,34 @@ define void @f7(i64 *%a) {
   ret void
 }
 
-; Check the next doubleword up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next doubleword up, which is out of range.  We prefer STG
+; in that case.
 define void @f8(i64 *%a) {
-; CHECK: f8:
-; CHECK: aghi %r2, 4096
-; CHECK: mvghi 0(%r2), 42
+; CHECK-LABEL: f8:
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%a, i64 512
   store i64 42, i64 *%ptr
   ret void
 }
 
-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STG.
 define void @f9(i64 *%a) {
-; CHECK: f9:
-; CHECK: aghi %r2, -8
-; CHECK: mvghi 0(%r2), 42
+; CHECK-LABEL: f9:
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%a, i64 -1
   store i64 42, i64 *%ptr
   ret void
 }
 
-; Check that MVGHI does not allow an index
+; Check that MVGHI does not allow an index.
 define void @f10(i64 %src, i64 %index) {
-; CHECK: f10:
-; CHECK: agr %r2, %r3
-; CHECK: mvghi 0(%r2), 42
+; CHECK-LABEL: f10:
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
   %add = add i64 %src, %index
   %ptr = inttoptr i64 %add to i64 *
diff --git a/test/CodeGen/SystemZ/int-conv-01.ll b/test/CodeGen/SystemZ/int-conv-01.ll
index 643ac6ae2510..e5c411cdec1d 100644
--- a/test/CodeGen/SystemZ/int-conv-01.ll
+++ b/test/CodeGen/SystemZ/int-conv-01.ll
@@ -4,9 +4,9 @@
 
 ; Test register extension, starting with an i32.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lbr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i32 %a to i8
   %ext = sext i8 %byte to i32
   ret i32 %ext
@@ -14,9 +14,9 @@ define i32 @f1(i32 %a) {
 
 ; ...and again with an i64.
 define i32 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lbr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i64 %a to i8
   %ext = sext i8 %byte to i32
   ret i32 %ext
@@ -24,7 +24,7 @@ define i32 @f2(i64 %a) {
 
 ; Check LB with no displacement.
 define i32 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
   %byte = load i8 *%src
@@ -34,7 +34,7 @@ define i32 @f3(i8 *%src) {
 
 ; Check the high end of the LB range.
 define i32 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lb %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -46,7 +46,7 @@ define i32 @f4(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
@@ -58,7 +58,7 @@ define i32 @f5(i8 *%src) {
 
 ; Check the high end of the negative LB range.
 define i32 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lb %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -69,7 +69,7 @@ define i32 @f6(i8 *%src) {
 
 ; Check the low end of the LB range.
 define i32 @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lb %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -81,7 +81,7 @@ define i32 @f7(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524289
 ; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
@@ -93,7 +93,7 @@ define i32 @f8(i8 *%src) {
 
 ; Check that LB allows an index
 define i32 @f9(i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lb %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -103,3 +103,97 @@ define i32 @f9(i64 %src, i64 %index) {
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
+
+; Test a case where we spill the source of at least one LBR.  We want
+; to use LB if possible.
+define void @f10(i32 *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: lb {{%r[0-9]+}}, 16{{[37]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i8
+  %trunc1 = trunc i32 %val1 to i8
+  %trunc2 = trunc i32 %val2 to i8
+  %trunc3 = trunc i32 %val3 to i8
+  %trunc4 = trunc i32 %val4 to i8
+  %trunc5 = trunc i32 %val5 to i8
+  %trunc6 = trunc i32 %val6 to i8
+  %trunc7 = trunc i32 %val7 to i8
+  %trunc8 = trunc i32 %val8 to i8
+  %trunc9 = trunc i32 %val9 to i8
+  %trunc10 = trunc i32 %val10 to i8
+  %trunc11 = trunc i32 %val11 to i8
+  %trunc12 = trunc i32 %val12 to i8
+  %trunc13 = trunc i32 %val13 to i8
+  %trunc14 = trunc i32 %val14 to i8
+  %trunc15 = trunc i32 %val15 to i8
+
+  %ext0 = sext i8 %trunc0 to i32
+  %ext1 = sext i8 %trunc1 to i32
+  %ext2 = sext i8 %trunc2 to i32
+  %ext3 = sext i8 %trunc3 to i32
+  %ext4 = sext i8 %trunc4 to i32
+  %ext5 = sext i8 %trunc5 to i32
+  %ext6 = sext i8 %trunc6 to i32
+  %ext7 = sext i8 %trunc7 to i32
+  %ext8 = sext i8 %trunc8 to i32
+  %ext9 = sext i8 %trunc9 to i32
+  %ext10 = sext i8 %trunc10 to i32
+  %ext11 = sext i8 %trunc11 to i32
+  %ext12 = sext i8 %trunc12 to i32
+  %ext13 = sext i8 %trunc13 to i32
+  %ext14 = sext i8 %trunc14 to i32
+  %ext15 = sext i8 %trunc15 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-02.ll b/test/CodeGen/SystemZ/int-conv-02.ll
index 86144d3e6450..dd7760d08cf5 100644
--- a/test/CodeGen/SystemZ/int-conv-02.ll
+++ b/test/CodeGen/SystemZ/int-conv-02.ll
@@ -1,12 +1,13 @@
-; Test zero extensions from a byte to an i32.
+; Test zero extensions from a byte to an i32.    The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Test register extension, starting with an i32.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: llcr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i32 %a to i8
   %ext = zext i8 %byte to i32
   ret i32 %ext
@@ -14,9 +15,9 @@ define i32 @f1(i32 %a) {
 
 ; ...and again with an i64.
 define i32 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llcr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i64 %a to i8
   %ext = zext i8 %byte to i32
   ret i32 %ext
@@ -24,16 +25,16 @@ define i32 @f2(i64 %a) {
 
 ; Check ANDs that are equivalent to zero extension.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llcr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %ext = and i32 %a, 255
   ret i32 %ext
 }
 
 ; Check LLC with no displacement.
 define i32 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llc %r2, 0(%r2)
 ; CHECK: br %r14
   %byte = load i8 *%src
@@ -43,7 +44,7 @@ define i32 @f4(i8 *%src) {
 
 ; Check the high end of the LLC range.
 define i32 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llc %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -55,7 +56,7 @@ define i32 @f5(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, 524288
 ; CHECK: llc %r2, 0(%r2)
 ; CHECK: br %r14
@@ -67,7 +68,7 @@ define i32 @f6(i8 *%src) {
 
 ; Check the high end of the negative LLC range.
 define i32 @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llc %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -78,7 +79,7 @@ define i32 @f7(i8 *%src) {
 
 ; Check the low end of the LLC range.
 define i32 @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llc %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -90,7 +91,7 @@ define i32 @f8(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524289
 ; CHECK: llc %r2, 0(%r2)
 ; CHECK: br %r14
@@ -102,7 +103,7 @@ define i32 @f9(i8 *%src) {
 
 ; Check that LLC allows an index
 define i32 @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: llc %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -112,3 +113,97 @@ define i32 @f10(i64 %src, i64 %index) {
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
+
+; Test a case where we spill the source of at least one LLCR.  We want
+; to use LLC if possible.
+define void @f11(i32 *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: llc {{%r[0-9]+}}, 16{{[37]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i8
+  %trunc1 = trunc i32 %val1 to i8
+  %trunc2 = trunc i32 %val2 to i8
+  %trunc3 = trunc i32 %val3 to i8
+  %trunc4 = trunc i32 %val4 to i8
+  %trunc5 = trunc i32 %val5 to i8
+  %trunc6 = trunc i32 %val6 to i8
+  %trunc7 = trunc i32 %val7 to i8
+  %trunc8 = trunc i32 %val8 to i8
+  %trunc9 = trunc i32 %val9 to i8
+  %trunc10 = trunc i32 %val10 to i8
+  %trunc11 = trunc i32 %val11 to i8
+  %trunc12 = trunc i32 %val12 to i8
+  %trunc13 = trunc i32 %val13 to i8
+  %trunc14 = trunc i32 %val14 to i8
+  %trunc15 = trunc i32 %val15 to i8
+
+  %ext0 = zext i8 %trunc0 to i32
+  %ext1 = zext i8 %trunc1 to i32
+  %ext2 = zext i8 %trunc2 to i32
+  %ext3 = zext i8 %trunc3 to i32
+  %ext4 = zext i8 %trunc4 to i32
+  %ext5 = zext i8 %trunc5 to i32
+  %ext6 = zext i8 %trunc6 to i32
+  %ext7 = zext i8 %trunc7 to i32
+  %ext8 = zext i8 %trunc8 to i32
+  %ext9 = zext i8 %trunc9 to i32
+  %ext10 = zext i8 %trunc10 to i32
+  %ext11 = zext i8 %trunc11 to i32
+  %ext12 = zext i8 %trunc12 to i32
+  %ext13 = zext i8 %trunc13 to i32
+  %ext14 = zext i8 %trunc14 to i32
+  %ext15 = zext i8 %trunc15 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-03.ll b/test/CodeGen/SystemZ/int-conv-03.ll
index 73b8dbb43a13..cad9581296a4 100644
--- a/test/CodeGen/SystemZ/int-conv-03.ll
+++ b/test/CodeGen/SystemZ/int-conv-03.ll
@@ -4,9 +4,9 @@
 
 ; Test register extension, starting with an i32.
 define i64 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lgbr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i32 %a to i8
   %ext = sext i8 %byte to i64
   ret i64 %ext
@@ -14,9 +14,9 @@ define i64 @f1(i32 %a) {
 
 ; ...and again with an i64.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lgbr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i64 %a to i8
   %ext = sext i8 %byte to i64
   ret i64 %ext
@@ -24,7 +24,7 @@ define i64 @f2(i64 %a) {
 
 ; Check LGB with no displacement.
 define i64 @f3(i8 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lgb %r2, 0(%r2)
 ; CHECK: br %r14
   %byte = load i8 *%src
@@ -34,7 +34,7 @@ define i64 @f3(i8 *%src) {
 
 ; Check the high end of the LGB range.
 define i64 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lgb %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -46,7 +46,7 @@ define i64 @f4(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lgb %r2, 0(%r2)
 ; CHECK: br %r14
@@ -58,7 +58,7 @@ define i64 @f5(i8 *%src) {
 
 ; Check the high end of the negative LGB range.
 define i64 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgb %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -69,7 +69,7 @@ define i64 @f6(i8 *%src) {
 
 ; Check the low end of the LGB range.
 define i64 @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgb %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -81,7 +81,7 @@ define i64 @f7(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524289
 ; CHECK: lgb %r2, 0(%r2)
 ; CHECK: br %r14
@@ -93,7 +93,7 @@ define i64 @f8(i8 *%src) {
 
 ; Check that LGB allows an index
 define i64 @f9(i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lgb %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -103,3 +103,97 @@ define i64 @f9(i64 %src, i64 %index) {
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
+
+; Test a case where we spill the source of at least one LGBR.  We want
+; to use LGB if possible.
+define void @f10(i64 *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: lgb {{%r[0-9]+}}, 167(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %trunc0 = trunc i64 %val0 to i8
+  %trunc1 = trunc i64 %val1 to i8
+  %trunc2 = trunc i64 %val2 to i8
+  %trunc3 = trunc i64 %val3 to i8
+  %trunc4 = trunc i64 %val4 to i8
+  %trunc5 = trunc i64 %val5 to i8
+  %trunc6 = trunc i64 %val6 to i8
+  %trunc7 = trunc i64 %val7 to i8
+  %trunc8 = trunc i64 %val8 to i8
+  %trunc9 = trunc i64 %val9 to i8
+  %trunc10 = trunc i64 %val10 to i8
+  %trunc11 = trunc i64 %val11 to i8
+  %trunc12 = trunc i64 %val12 to i8
+  %trunc13 = trunc i64 %val13 to i8
+  %trunc14 = trunc i64 %val14 to i8
+  %trunc15 = trunc i64 %val15 to i8
+
+  %ext0 = sext i8 %trunc0 to i64
+  %ext1 = sext i8 %trunc1 to i64
+  %ext2 = sext i8 %trunc2 to i64
+  %ext3 = sext i8 %trunc3 to i64
+  %ext4 = sext i8 %trunc4 to i64
+  %ext5 = sext i8 %trunc5 to i64
+  %ext6 = sext i8 %trunc6 to i64
+  %ext7 = sext i8 %trunc7 to i64
+  %ext8 = sext i8 %trunc8 to i64
+  %ext9 = sext i8 %trunc9 to i64
+  %ext10 = sext i8 %trunc10 to i64
+  %ext11 = sext i8 %trunc11 to i64
+  %ext12 = sext i8 %trunc12 to i64
+  %ext13 = sext i8 %trunc13 to i64
+  %ext14 = sext i8 %trunc14 to i64
+  %ext15 = sext i8 %trunc15 to i64
+
+  store volatile i64 %val0, i64 *%ptr
+  store volatile i64 %val1, i64 *%ptr
+  store volatile i64 %val2, i64 *%ptr
+  store volatile i64 %val3, i64 *%ptr
+  store volatile i64 %val4, i64 *%ptr
+  store volatile i64 %val5, i64 *%ptr
+  store volatile i64 %val6, i64 *%ptr
+  store volatile i64 %val7, i64 *%ptr
+  store volatile i64 %val8, i64 *%ptr
+  store volatile i64 %val9, i64 *%ptr
+  store volatile i64 %val10, i64 *%ptr
+  store volatile i64 %val11, i64 *%ptr
+  store volatile i64 %val12, i64 *%ptr
+  store volatile i64 %val13, i64 *%ptr
+  store volatile i64 %val14, i64 *%ptr
+  store volatile i64 %val15, i64 *%ptr
+
+  store volatile i64 %ext0, i64 *%ptr
+  store volatile i64 %ext1, i64 *%ptr
+  store volatile i64 %ext2, i64 *%ptr
+  store volatile i64 %ext3, i64 *%ptr
+  store volatile i64 %ext4, i64 *%ptr
+  store volatile i64 %ext5, i64 *%ptr
+  store volatile i64 %ext6, i64 *%ptr
+  store volatile i64 %ext7, i64 *%ptr
+  store volatile i64 %ext8, i64 *%ptr
+  store volatile i64 %ext9, i64 *%ptr
+  store volatile i64 %ext10, i64 *%ptr
+  store volatile i64 %ext11, i64 *%ptr
+  store volatile i64 %ext12, i64 *%ptr
+  store volatile i64 %ext13, i64 *%ptr
+  store volatile i64 %ext14, i64 *%ptr
+  store volatile i64 %ext15, i64 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-04.ll b/test/CodeGen/SystemZ/int-conv-04.ll
index 4cec5242e880..1c6be7b6e8a4 100644
--- a/test/CodeGen/SystemZ/int-conv-04.ll
+++ b/test/CodeGen/SystemZ/int-conv-04.ll
@@ -4,9 +4,9 @@
 
 ; Test register extension, starting with an i32.
 define i64 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: llgcr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i32 %a to i8
   %ext = zext i8 %byte to i64
   ret i64 %ext
@@ -14,9 +14,9 @@ define i64 @f1(i32 %a) {
 
 ; ...and again with an i64.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llgcr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %byte = trunc i64 %a to i8
   %ext = zext i8 %byte to i64
   ret i64 %ext
@@ -24,16 +24,16 @@ define i64 @f2(i64 %a) {
 
 ; Check ANDs that are equivalent to zero extension.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llgcr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %ext = and i64 %a, 255
   ret i64 %ext
 }
 
 ; Check LLGC with no displacement.
 define i64 @f4(i8 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llgc %r2, 0(%r2)
 ; CHECK: br %r14
   %byte = load i8 *%src
@@ -43,7 +43,7 @@ define i64 @f4(i8 *%src) {
 
 ; Check the high end of the LLGC range.
 define i64 @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llgc %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -55,7 +55,7 @@ define i64 @f5(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, 524288
 ; CHECK: llgc %r2, 0(%r2)
 ; CHECK: br %r14
@@ -67,7 +67,7 @@ define i64 @f6(i8 *%src) {
 
 ; Check the high end of the negative LLGC range.
 define i64 @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llgc %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -78,7 +78,7 @@ define i64 @f7(i8 *%src) {
 
 ; Check the low end of the LLGC range.
 define i64 @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llgc %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -90,7 +90,7 @@ define i64 @f8(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f9(i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524289
 ; CHECK: llgc %r2, 0(%r2)
 ; CHECK: br %r14
@@ -102,7 +102,7 @@ define i64 @f9(i8 *%src) {
 
 ; Check that LLGC allows an index
 define i64 @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: llgc %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -112,3 +112,97 @@ define i64 @f10(i64 %src, i64 %index) {
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
+
+; Test a case where we spill the source of at least one LLGCR.  We want
+; to use LLGC if possible.
+define void @f11(i64 *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: llgc {{%r[0-9]+}}, 167(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %trunc0 = trunc i64 %val0 to i8
+  %trunc1 = trunc i64 %val1 to i8
+  %trunc2 = trunc i64 %val2 to i8
+  %trunc3 = trunc i64 %val3 to i8
+  %trunc4 = trunc i64 %val4 to i8
+  %trunc5 = trunc i64 %val5 to i8
+  %trunc6 = trunc i64 %val6 to i8
+  %trunc7 = trunc i64 %val7 to i8
+  %trunc8 = trunc i64 %val8 to i8
+  %trunc9 = trunc i64 %val9 to i8
+  %trunc10 = trunc i64 %val10 to i8
+  %trunc11 = trunc i64 %val11 to i8
+  %trunc12 = trunc i64 %val12 to i8
+  %trunc13 = trunc i64 %val13 to i8
+  %trunc14 = trunc i64 %val14 to i8
+  %trunc15 = trunc i64 %val15 to i8
+
+  %ext0 = zext i8 %trunc0 to i64
+  %ext1 = zext i8 %trunc1 to i64
+  %ext2 = zext i8 %trunc2 to i64
+  %ext3 = zext i8 %trunc3 to i64
+  %ext4 = zext i8 %trunc4 to i64
+  %ext5 = zext i8 %trunc5 to i64
+  %ext6 = zext i8 %trunc6 to i64
+  %ext7 = zext i8 %trunc7 to i64
+  %ext8 = zext i8 %trunc8 to i64
+  %ext9 = zext i8 %trunc9 to i64
+  %ext10 = zext i8 %trunc10 to i64
+  %ext11 = zext i8 %trunc11 to i64
+  %ext12 = zext i8 %trunc12 to i64
+  %ext13 = zext i8 %trunc13 to i64
+  %ext14 = zext i8 %trunc14 to i64
+  %ext15 = zext i8 %trunc15 to i64
+
+  store volatile i64 %val0, i64 *%ptr
+  store volatile i64 %val1, i64 *%ptr
+  store volatile i64 %val2, i64 *%ptr
+  store volatile i64 %val3, i64 *%ptr
+  store volatile i64 %val4, i64 *%ptr
+  store volatile i64 %val5, i64 *%ptr
+  store volatile i64 %val6, i64 *%ptr
+  store volatile i64 %val7, i64 *%ptr
+  store volatile i64 %val8, i64 *%ptr
+  store volatile i64 %val9, i64 *%ptr
+  store volatile i64 %val10, i64 *%ptr
+  store volatile i64 %val11, i64 *%ptr
+  store volatile i64 %val12, i64 *%ptr
+  store volatile i64 %val13, i64 *%ptr
+  store volatile i64 %val14, i64 *%ptr
+  store volatile i64 %val15, i64 *%ptr
+
+  store volatile i64 %ext0, i64 *%ptr
+  store volatile i64 %ext1, i64 *%ptr
+  store volatile i64 %ext2, i64 *%ptr
+  store volatile i64 %ext3, i64 *%ptr
+  store volatile i64 %ext4, i64 *%ptr
+  store volatile i64 %ext5, i64 *%ptr
+  store volatile i64 %ext6, i64 *%ptr
+  store volatile i64 %ext7, i64 *%ptr
+  store volatile i64 %ext8, i64 *%ptr
+  store volatile i64 %ext9, i64 *%ptr
+  store volatile i64 %ext10, i64 *%ptr
+  store volatile i64 %ext11, i64 *%ptr
+  store volatile i64 %ext12, i64 *%ptr
+  store volatile i64 %ext13, i64 *%ptr
+  store volatile i64 %ext14, i64 *%ptr
+  store volatile i64 %ext15, i64 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-05.ll b/test/CodeGen/SystemZ/int-conv-05.ll
index 5358f7d9228a..5eade93ac584 100644
--- a/test/CodeGen/SystemZ/int-conv-05.ll
+++ b/test/CodeGen/SystemZ/int-conv-05.ll
@@ -4,9 +4,9 @@
 
 ; Test register extension, starting with an i32.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i32 %a to i16
   %ext = sext i16 %half to i32
   ret i32 %ext
@@ -14,9 +14,9 @@ define i32 @f1(i32 %a) {
 
 ; ...and again with an i64.
 define i32 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lhr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i64 %a to i16
   %ext = sext i16 %half to i32
   ret i32 %ext
@@ -24,7 +24,7 @@ define i32 @f2(i64 %a) {
 
 ; Check the low end of the LH range.
 define i32 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -34,7 +34,7 @@ define i32 @f3(i16 *%src) {
 
 ; Check the high end of the LH range.
 define i32 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lh %r2, 4094(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2047
@@ -45,7 +45,7 @@ define i32 @f4(i16 *%src) {
 
 ; Check the next halfword up, which needs LHY rather than LH.
 define i32 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lhy %r2, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2048
@@ -56,7 +56,7 @@ define i32 @f5(i16 *%src) {
 
 ; Check the high end of the LHY range.
 define i32 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lhy %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -68,7 +68,7 @@ define i32 @f6(i16 *%src) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f7(i16 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -80,7 +80,7 @@ define i32 @f7(i16 *%src) {
 
 ; Check the high end of the negative LHY range.
 define i32 @f8(i16 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lhy %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -91,7 +91,7 @@ define i32 @f8(i16 *%src) {
 
 ; Check the low end of the LHY range.
 define i32 @f9(i16 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lhy %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -103,7 +103,7 @@ define i32 @f9(i16 *%src) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f10(i16 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r2, -524290
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -115,7 +115,7 @@ define i32 @f10(i16 *%src) {
 
 ; Check that LH allows an index
 define i32 @f11(i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: lh %r2, 4094(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -128,7 +128,7 @@ define i32 @f11(i64 %src, i64 %index) {
 
 ; Check that LH allows an index
 define i32 @f12(i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: lhy %r2, 4096(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -138,3 +138,97 @@ define i32 @f12(i64 %src, i64 %index) {
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
+
+; Test a case where we spill the source of at least one LHR.  We want
+; to use LH if possible.
+define void @f13(i32 *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: lh {{%r[0-9]+}}, 16{{[26]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i16
+  %trunc1 = trunc i32 %val1 to i16
+  %trunc2 = trunc i32 %val2 to i16
+  %trunc3 = trunc i32 %val3 to i16
+  %trunc4 = trunc i32 %val4 to i16
+  %trunc5 = trunc i32 %val5 to i16
+  %trunc6 = trunc i32 %val6 to i16
+  %trunc7 = trunc i32 %val7 to i16
+  %trunc8 = trunc i32 %val8 to i16
+  %trunc9 = trunc i32 %val9 to i16
+  %trunc10 = trunc i32 %val10 to i16
+  %trunc11 = trunc i32 %val11 to i16
+  %trunc12 = trunc i32 %val12 to i16
+  %trunc13 = trunc i32 %val13 to i16
+  %trunc14 = trunc i32 %val14 to i16
+  %trunc15 = trunc i32 %val15 to i16
+
+  %ext0 = sext i16 %trunc0 to i32
+  %ext1 = sext i16 %trunc1 to i32
+  %ext2 = sext i16 %trunc2 to i32
+  %ext3 = sext i16 %trunc3 to i32
+  %ext4 = sext i16 %trunc4 to i32
+  %ext5 = sext i16 %trunc5 to i32
+  %ext6 = sext i16 %trunc6 to i32
+  %ext7 = sext i16 %trunc7 to i32
+  %ext8 = sext i16 %trunc8 to i32
+  %ext9 = sext i16 %trunc9 to i32
+  %ext10 = sext i16 %trunc10 to i32
+  %ext11 = sext i16 %trunc11 to i32
+  %ext12 = sext i16 %trunc12 to i32
+  %ext13 = sext i16 %trunc13 to i32
+  %ext14 = sext i16 %trunc14 to i32
+  %ext15 = sext i16 %trunc15 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-06.ll b/test/CodeGen/SystemZ/int-conv-06.ll
index 64af612d65f5..33860d12270f 100644
--- a/test/CodeGen/SystemZ/int-conv-06.ll
+++ b/test/CodeGen/SystemZ/int-conv-06.ll
@@ -1,12 +1,13 @@
-; Test zero extensions from a halfword to an i32.
+; Test zero extensions from a halfword to an i32.  The tests here
+; assume z10 register pressure, without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 
 ; Test register extension, starting with an i32.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: llhr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i32 %a to i16
   %ext = zext i16 %half to i32
   ret i32 %ext
@@ -14,9 +15,9 @@ define i32 @f1(i32 %a) {
 
 ; ...and again with an i64.
 define i32 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llhr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i64 %a to i16
   %ext = zext i16 %half to i32
   ret i32 %ext
@@ -24,16 +25,16 @@ define i32 @f2(i64 %a) {
 
 ; Check ANDs that are equivalent to zero extension.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llhr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %ext = and i32 %a, 65535
   ret i32 %ext
 }
 
 ; Check LLH with no displacement.
 define i32 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llh %r2, 0(%r2)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -43,7 +44,7 @@ define i32 @f4(i16 *%src) {
 
 ; Check the high end of the LLH range.
 define i32 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llh %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -55,7 +56,7 @@ define i32 @f5(i16 *%src) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, 524288
 ; CHECK: llh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -67,7 +68,7 @@ define i32 @f6(i16 *%src) {
 
 ; Check the high end of the negative LLH range.
 define i32 @f7(i16 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llh %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -78,7 +79,7 @@ define i32 @f7(i16 *%src) {
 
 ; Check the low end of the LLH range.
 define i32 @f8(i16 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llh %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -90,7 +91,7 @@ define i32 @f8(i16 *%src) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i16 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524290
 ; CHECK: llh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -102,7 +103,7 @@ define i32 @f9(i16 *%src) {
 
 ; Check that LLH allows an index
 define i32 @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: llh %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -112,3 +113,97 @@ define i32 @f10(i64 %src, i64 %index) {
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
+
+; Test a case where we spill the source of at least one LLHR.  We want
+; to use LLH if possible.
+define void @f11(i32 *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: llh {{%r[0-9]+}}, 16{{[26]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i16
+  %trunc1 = trunc i32 %val1 to i16
+  %trunc2 = trunc i32 %val2 to i16
+  %trunc3 = trunc i32 %val3 to i16
+  %trunc4 = trunc i32 %val4 to i16
+  %trunc5 = trunc i32 %val5 to i16
+  %trunc6 = trunc i32 %val6 to i16
+  %trunc7 = trunc i32 %val7 to i16
+  %trunc8 = trunc i32 %val8 to i16
+  %trunc9 = trunc i32 %val9 to i16
+  %trunc10 = trunc i32 %val10 to i16
+  %trunc11 = trunc i32 %val11 to i16
+  %trunc12 = trunc i32 %val12 to i16
+  %trunc13 = trunc i32 %val13 to i16
+  %trunc14 = trunc i32 %val14 to i16
+  %trunc15 = trunc i32 %val15 to i16
+
+  %ext0 = zext i16 %trunc0 to i32
+  %ext1 = zext i16 %trunc1 to i32
+  %ext2 = zext i16 %trunc2 to i32
+  %ext3 = zext i16 %trunc3 to i32
+  %ext4 = zext i16 %trunc4 to i32
+  %ext5 = zext i16 %trunc5 to i32
+  %ext6 = zext i16 %trunc6 to i32
+  %ext7 = zext i16 %trunc7 to i32
+  %ext8 = zext i16 %trunc8 to i32
+  %ext9 = zext i16 %trunc9 to i32
+  %ext10 = zext i16 %trunc10 to i32
+  %ext11 = zext i16 %trunc11 to i32
+  %ext12 = zext i16 %trunc12 to i32
+  %ext13 = zext i16 %trunc13 to i32
+  %ext14 = zext i16 %trunc14 to i32
+  %ext15 = zext i16 %trunc15 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-07.ll b/test/CodeGen/SystemZ/int-conv-07.ll
index 041caa244c8e..4b78c773d1ea 100644
--- a/test/CodeGen/SystemZ/int-conv-07.ll
+++ b/test/CodeGen/SystemZ/int-conv-07.ll
@@ -4,9 +4,9 @@
 
 ; Test register extension, starting with an i32.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lghr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i64 %a to i16
   %ext = sext i16 %half to i64
   ret i64 %ext
@@ -14,9 +14,9 @@ define i64 @f1(i64 %a) {
 
 ; ...and again with an i64.
 define i64 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lghr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i32 %a to i16
   %ext = sext i16 %half to i64
   ret i64 %ext
@@ -24,7 +24,7 @@ define i64 @f2(i32 %a) {
 
 ; Check LGH with no displacement.
 define i64 @f3(i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lgh %r2, 0(%r2)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -34,7 +34,7 @@ define i64 @f3(i16 *%src) {
 
 ; Check the high end of the LGH range.
 define i64 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lgh %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -46,7 +46,7 @@ define i64 @f4(i16 *%src) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lgh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -58,7 +58,7 @@ define i64 @f5(i16 *%src) {
 
 ; Check the high end of the negative LGH range.
 define i64 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgh %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -69,7 +69,7 @@ define i64 @f6(i16 *%src) {
 
 ; Check the low end of the LGH range.
 define i64 @f7(i16 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgh %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -81,7 +81,7 @@ define i64 @f7(i16 *%src) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f8(i16 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524290
 ; CHECK: lgh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -93,7 +93,7 @@ define i64 @f8(i16 *%src) {
 
 ; Check that LGH allows an index.
 define i64 @f9(i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lgh %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -103,3 +103,97 @@ define i64 @f9(i64 %src, i64 %index) {
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
+
+; Test a case where we spill the source of at least one LGHR.  We want
+; to use LGH if possible.
+define void @f10(i64 *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: lgh {{%r[0-9]+}}, 166(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %trunc0 = trunc i64 %val0 to i16
+  %trunc1 = trunc i64 %val1 to i16
+  %trunc2 = trunc i64 %val2 to i16
+  %trunc3 = trunc i64 %val3 to i16
+  %trunc4 = trunc i64 %val4 to i16
+  %trunc5 = trunc i64 %val5 to i16
+  %trunc6 = trunc i64 %val6 to i16
+  %trunc7 = trunc i64 %val7 to i16
+  %trunc8 = trunc i64 %val8 to i16
+  %trunc9 = trunc i64 %val9 to i16
+  %trunc10 = trunc i64 %val10 to i16
+  %trunc11 = trunc i64 %val11 to i16
+  %trunc12 = trunc i64 %val12 to i16
+  %trunc13 = trunc i64 %val13 to i16
+  %trunc14 = trunc i64 %val14 to i16
+  %trunc15 = trunc i64 %val15 to i16
+
+  %ext0 = sext i16 %trunc0 to i64
+  %ext1 = sext i16 %trunc1 to i64
+  %ext2 = sext i16 %trunc2 to i64
+  %ext3 = sext i16 %trunc3 to i64
+  %ext4 = sext i16 %trunc4 to i64
+  %ext5 = sext i16 %trunc5 to i64
+  %ext6 = sext i16 %trunc6 to i64
+  %ext7 = sext i16 %trunc7 to i64
+  %ext8 = sext i16 %trunc8 to i64
+  %ext9 = sext i16 %trunc9 to i64
+  %ext10 = sext i16 %trunc10 to i64
+  %ext11 = sext i16 %trunc11 to i64
+  %ext12 = sext i16 %trunc12 to i64
+  %ext13 = sext i16 %trunc13 to i64
+  %ext14 = sext i16 %trunc14 to i64
+  %ext15 = sext i16 %trunc15 to i64
+
+  store volatile i64 %val0, i64 *%ptr
+  store volatile i64 %val1, i64 *%ptr
+  store volatile i64 %val2, i64 *%ptr
+  store volatile i64 %val3, i64 *%ptr
+  store volatile i64 %val4, i64 *%ptr
+  store volatile i64 %val5, i64 *%ptr
+  store volatile i64 %val6, i64 *%ptr
+  store volatile i64 %val7, i64 *%ptr
+  store volatile i64 %val8, i64 *%ptr
+  store volatile i64 %val9, i64 *%ptr
+  store volatile i64 %val10, i64 *%ptr
+  store volatile i64 %val11, i64 *%ptr
+  store volatile i64 %val12, i64 *%ptr
+  store volatile i64 %val13, i64 *%ptr
+  store volatile i64 %val14, i64 *%ptr
+  store volatile i64 %val15, i64 *%ptr
+
+  store volatile i64 %ext0, i64 *%ptr
+  store volatile i64 %ext1, i64 *%ptr
+  store volatile i64 %ext2, i64 *%ptr
+  store volatile i64 %ext3, i64 *%ptr
+  store volatile i64 %ext4, i64 *%ptr
+  store volatile i64 %ext5, i64 *%ptr
+  store volatile i64 %ext6, i64 *%ptr
+  store volatile i64 %ext7, i64 *%ptr
+  store volatile i64 %ext8, i64 *%ptr
+  store volatile i64 %ext9, i64 *%ptr
+  store volatile i64 %ext10, i64 *%ptr
+  store volatile i64 %ext11, i64 *%ptr
+  store volatile i64 %ext12, i64 *%ptr
+  store volatile i64 %ext13, i64 *%ptr
+  store volatile i64 %ext14, i64 *%ptr
+  store volatile i64 %ext15, i64 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-08.ll b/test/CodeGen/SystemZ/int-conv-08.ll
index 3d7f96675da9..6b6cb672fb9a 100644
--- a/test/CodeGen/SystemZ/int-conv-08.ll
+++ b/test/CodeGen/SystemZ/int-conv-08.ll
@@ -4,9 +4,9 @@
 
 ; Test register extension, starting with an i32.
 define i64 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: llghr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i32 %a to i16
   %ext = zext i16 %half to i64
   ret i64 %ext
@@ -14,9 +14,9 @@ define i64 @f1(i32 %a) {
 
 ; ...and again with an i64.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llghr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %half = trunc i64 %a to i16
   %ext = zext i16 %half to i64
   ret i64 %ext
@@ -24,16 +24,16 @@ define i64 @f2(i64 %a) {
 
 ; Check ANDs that are equivalent to zero extension.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llghr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %ext = and i64 %a, 65535
   ret i64 %ext
 }
 
 ; Check LLGH with no displacement.
 define i64 @f4(i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llgh %r2, 0(%r2)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -43,7 +43,7 @@ define i64 @f4(i16 *%src) {
 
 ; Check the high end of the LLGH range.
 define i64 @f5(i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llgh %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -55,7 +55,7 @@ define i64 @f5(i16 *%src) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f6(i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, 524288
 ; CHECK: llgh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -67,7 +67,7 @@ define i64 @f6(i16 *%src) {
 
 ; Check the high end of the negative LLGH range.
 define i64 @f7(i16 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llgh %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -78,7 +78,7 @@ define i64 @f7(i16 *%src) {
 
 ; Check the low end of the LLGH range.
 define i64 @f8(i16 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llgh %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -90,7 +90,7 @@ define i64 @f8(i16 *%src) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f9(i16 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524290
 ; CHECK: llgh %r2, 0(%r2)
 ; CHECK: br %r14
@@ -102,7 +102,7 @@ define i64 @f9(i16 *%src) {
 
 ; Check that LLGH allows an index
 define i64 @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: llgh %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -112,3 +112,97 @@ define i64 @f10(i64 %src, i64 %index) {
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
+
+; Test a case where we spill the source of at least one LLGHR.  We want
+; to use LLGH if possible.
+define void @f11(i64 *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: llgh {{%r[0-9]+}}, 166(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i64 *%ptr
+  %val1 = load volatile i64 *%ptr
+  %val2 = load volatile i64 *%ptr
+  %val3 = load volatile i64 *%ptr
+  %val4 = load volatile i64 *%ptr
+  %val5 = load volatile i64 *%ptr
+  %val6 = load volatile i64 *%ptr
+  %val7 = load volatile i64 *%ptr
+  %val8 = load volatile i64 *%ptr
+  %val9 = load volatile i64 *%ptr
+  %val10 = load volatile i64 *%ptr
+  %val11 = load volatile i64 *%ptr
+  %val12 = load volatile i64 *%ptr
+  %val13 = load volatile i64 *%ptr
+  %val14 = load volatile i64 *%ptr
+  %val15 = load volatile i64 *%ptr
+
+  %trunc0 = trunc i64 %val0 to i16
+  %trunc1 = trunc i64 %val1 to i16
+  %trunc2 = trunc i64 %val2 to i16
+  %trunc3 = trunc i64 %val3 to i16
+  %trunc4 = trunc i64 %val4 to i16
+  %trunc5 = trunc i64 %val5 to i16
+  %trunc6 = trunc i64 %val6 to i16
+  %trunc7 = trunc i64 %val7 to i16
+  %trunc8 = trunc i64 %val8 to i16
+  %trunc9 = trunc i64 %val9 to i16
+  %trunc10 = trunc i64 %val10 to i16
+  %trunc11 = trunc i64 %val11 to i16
+  %trunc12 = trunc i64 %val12 to i16
+  %trunc13 = trunc i64 %val13 to i16
+  %trunc14 = trunc i64 %val14 to i16
+  %trunc15 = trunc i64 %val15 to i16
+
+  %ext0 = zext i16 %trunc0 to i64
+  %ext1 = zext i16 %trunc1 to i64
+  %ext2 = zext i16 %trunc2 to i64
+  %ext3 = zext i16 %trunc3 to i64
+  %ext4 = zext i16 %trunc4 to i64
+  %ext5 = zext i16 %trunc5 to i64
+  %ext6 = zext i16 %trunc6 to i64
+  %ext7 = zext i16 %trunc7 to i64
+  %ext8 = zext i16 %trunc8 to i64
+  %ext9 = zext i16 %trunc9 to i64
+  %ext10 = zext i16 %trunc10 to i64
+  %ext11 = zext i16 %trunc11 to i64
+  %ext12 = zext i16 %trunc12 to i64
+  %ext13 = zext i16 %trunc13 to i64
+  %ext14 = zext i16 %trunc14 to i64
+  %ext15 = zext i16 %trunc15 to i64
+
+  store volatile i64 %val0, i64 *%ptr
+  store volatile i64 %val1, i64 *%ptr
+  store volatile i64 %val2, i64 *%ptr
+  store volatile i64 %val3, i64 *%ptr
+  store volatile i64 %val4, i64 *%ptr
+  store volatile i64 %val5, i64 *%ptr
+  store volatile i64 %val6, i64 *%ptr
+  store volatile i64 %val7, i64 *%ptr
+  store volatile i64 %val8, i64 *%ptr
+  store volatile i64 %val9, i64 *%ptr
+  store volatile i64 %val10, i64 *%ptr
+  store volatile i64 %val11, i64 *%ptr
+  store volatile i64 %val12, i64 *%ptr
+  store volatile i64 %val13, i64 *%ptr
+  store volatile i64 %val14, i64 *%ptr
+  store volatile i64 %val15, i64 *%ptr
+
+  store volatile i64 %ext0, i64 *%ptr
+  store volatile i64 %ext1, i64 *%ptr
+  store volatile i64 %ext2, i64 *%ptr
+  store volatile i64 %ext3, i64 *%ptr
+  store volatile i64 %ext4, i64 *%ptr
+  store volatile i64 %ext5, i64 *%ptr
+  store volatile i64 %ext6, i64 *%ptr
+  store volatile i64 %ext7, i64 *%ptr
+  store volatile i64 %ext8, i64 *%ptr
+  store volatile i64 %ext9, i64 *%ptr
+  store volatile i64 %ext10, i64 *%ptr
+  store volatile i64 %ext11, i64 *%ptr
+  store volatile i64 %ext12, i64 *%ptr
+  store volatile i64 %ext13, i64 *%ptr
+  store volatile i64 %ext14, i64 *%ptr
+  store volatile i64 %ext15, i64 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-09.ll b/test/CodeGen/SystemZ/int-conv-09.ll
index 6e93886895d5..b9c508917d4d 100644
--- a/test/CodeGen/SystemZ/int-conv-09.ll
+++ b/test/CodeGen/SystemZ/int-conv-09.ll
@@ -4,18 +4,18 @@
 
 ; Test register extension, starting with an i32.
 define i64 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lgfr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %ext = sext i32 %a to i64
   ret i64 %ext
 }
 
 ; ...and again with an i64.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lgfr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %word = trunc i64 %a to i32
   %ext = sext i32 %word to i64
   ret i64 %ext
@@ -23,7 +23,7 @@ define i64 @f2(i64 %a) {
 
 ; Check LGF with no displacement.
 define i64 @f3(i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lgf %r2, 0(%r2)
 ; CHECK: br %r14
   %word = load i32 *%src
@@ -33,7 +33,7 @@ define i64 @f3(i32 *%src) {
 
 ; Check the high end of the LGF range.
 define i64 @f4(i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lgf %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -45,7 +45,7 @@ define i64 @f4(i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f5(i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lgf %r2, 0(%r2)
 ; CHECK: br %r14
@@ -57,7 +57,7 @@ define i64 @f5(i32 *%src) {
 
 ; Check the high end of the negative LGF range.
 define i64 @f6(i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgf %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -68,7 +68,7 @@ define i64 @f6(i32 *%src) {
 
 ; Check the low end of the LGF range.
 define i64 @f7(i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgf %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -80,7 +80,7 @@ define i64 @f7(i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f8(i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524292
 ; CHECK: lgf %r2, 0(%r2)
 ; CHECK: br %r14
@@ -92,7 +92,7 @@ define i64 @f8(i32 *%src) {
 
 ; Check that LGF allows an index.
 define i64 @f9(i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lgf %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/int-conv-10.ll b/test/CodeGen/SystemZ/int-conv-10.ll
index 918bc1de8fa5..781c74c7fa23 100644
--- a/test/CodeGen/SystemZ/int-conv-10.ll
+++ b/test/CodeGen/SystemZ/int-conv-10.ll
@@ -4,18 +4,18 @@
 
 ; Test register extension, starting with an i32.
 define i64 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: llgfr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %ext = zext i32 %a to i64
   ret i64 %ext
 }
 
 ; ...and again with an i64.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llgfr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %word = trunc i64 %a to i32
   %ext = zext i32 %word to i64
   ret i64 %ext
@@ -23,16 +23,16 @@ define i64 @f2(i64 %a) {
 
 ; Check ANDs that are equivalent to zero extension.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: llgfr %r2, %r2
-; CHECk: br %r14
+; CHECK: br %r14
   %ext = and i64 %a, 4294967295
   ret i64 %ext
 }
 
 ; Check LLGF with no displacement.
 define i64 @f4(i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llgf %r2, 0(%r2)
 ; CHECK: br %r14
   %word = load i32 *%src
@@ -42,7 +42,7 @@ define i64 @f4(i32 *%src) {
 
 ; Check the high end of the LLGF range.
 define i64 @f5(i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: llgf %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -54,7 +54,7 @@ define i64 @f5(i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f6(i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, 524288
 ; CHECK: llgf %r2, 0(%r2)
 ; CHECK: br %r14
@@ -66,7 +66,7 @@ define i64 @f6(i32 *%src) {
 
 ; Check the high end of the negative LLGF range.
 define i64 @f7(i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: llgf %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -77,7 +77,7 @@ define i64 @f7(i32 *%src) {
 
 ; Check the low end of the LLGF range.
 define i64 @f8(i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: llgf %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -89,7 +89,7 @@ define i64 @f8(i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f9(i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524292
 ; CHECK: llgf %r2, 0(%r2)
 ; CHECK: br %r14
@@ -101,7 +101,7 @@ define i64 @f9(i32 *%src) {
 
 ; Check that LLGF allows an index.
 define i64 @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: llgf %r2, 524287(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/int-conv-11.ll b/test/CodeGen/SystemZ/int-conv-11.ll
new file mode 100644
index 000000000000..30769621bf82
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-11.ll
@@ -0,0 +1,350 @@
+; Test spills of zero extensions when high GR32s are available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test a case where we spill the source of at least one LLCRMux.  We want
+; to use LLC(H) if possible.
+define void @f1(i32 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: llc{{h?}} {{%r[0-9]+}}, 16{{[37]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+  %val16 = load volatile i32 *%ptr
+  %val17 = load volatile i32 *%ptr
+  %val18 = load volatile i32 *%ptr
+  %val19 = load volatile i32 *%ptr
+  %val20 = load volatile i32 *%ptr
+  %val21 = load volatile i32 *%ptr
+  %val22 = load volatile i32 *%ptr
+  %val23 = load volatile i32 *%ptr
+  %val24 = load volatile i32 *%ptr
+  %val25 = load volatile i32 *%ptr
+  %val26 = load volatile i32 *%ptr
+  %val27 = load volatile i32 *%ptr
+  %val28 = load volatile i32 *%ptr
+  %val29 = load volatile i32 *%ptr
+  %val30 = load volatile i32 *%ptr
+  %val31 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i8
+  %trunc1 = trunc i32 %val1 to i8
+  %trunc2 = trunc i32 %val2 to i8
+  %trunc3 = trunc i32 %val3 to i8
+  %trunc4 = trunc i32 %val4 to i8
+  %trunc5 = trunc i32 %val5 to i8
+  %trunc6 = trunc i32 %val6 to i8
+  %trunc7 = trunc i32 %val7 to i8
+  %trunc8 = trunc i32 %val8 to i8
+  %trunc9 = trunc i32 %val9 to i8
+  %trunc10 = trunc i32 %val10 to i8
+  %trunc11 = trunc i32 %val11 to i8
+  %trunc12 = trunc i32 %val12 to i8
+  %trunc13 = trunc i32 %val13 to i8
+  %trunc14 = trunc i32 %val14 to i8
+  %trunc15 = trunc i32 %val15 to i8
+  %trunc16 = trunc i32 %val16 to i8
+  %trunc17 = trunc i32 %val17 to i8
+  %trunc18 = trunc i32 %val18 to i8
+  %trunc19 = trunc i32 %val19 to i8
+  %trunc20 = trunc i32 %val20 to i8
+  %trunc21 = trunc i32 %val21 to i8
+  %trunc22 = trunc i32 %val22 to i8
+  %trunc23 = trunc i32 %val23 to i8
+  %trunc24 = trunc i32 %val24 to i8
+  %trunc25 = trunc i32 %val25 to i8
+  %trunc26 = trunc i32 %val26 to i8
+  %trunc27 = trunc i32 %val27 to i8
+  %trunc28 = trunc i32 %val28 to i8
+  %trunc29 = trunc i32 %val29 to i8
+  %trunc30 = trunc i32 %val30 to i8
+  %trunc31 = trunc i32 %val31 to i8
+
+  %ext0 = zext i8 %trunc0 to i32
+  %ext1 = zext i8 %trunc1 to i32
+  %ext2 = zext i8 %trunc2 to i32
+  %ext3 = zext i8 %trunc3 to i32
+  %ext4 = zext i8 %trunc4 to i32
+  %ext5 = zext i8 %trunc5 to i32
+  %ext6 = zext i8 %trunc6 to i32
+  %ext7 = zext i8 %trunc7 to i32
+  %ext8 = zext i8 %trunc8 to i32
+  %ext9 = zext i8 %trunc9 to i32
+  %ext10 = zext i8 %trunc10 to i32
+  %ext11 = zext i8 %trunc11 to i32
+  %ext12 = zext i8 %trunc12 to i32
+  %ext13 = zext i8 %trunc13 to i32
+  %ext14 = zext i8 %trunc14 to i32
+  %ext15 = zext i8 %trunc15 to i32
+  %ext16 = zext i8 %trunc16 to i32
+  %ext17 = zext i8 %trunc17 to i32
+  %ext18 = zext i8 %trunc18 to i32
+  %ext19 = zext i8 %trunc19 to i32
+  %ext20 = zext i8 %trunc20 to i32
+  %ext21 = zext i8 %trunc21 to i32
+  %ext22 = zext i8 %trunc22 to i32
+  %ext23 = zext i8 %trunc23 to i32
+  %ext24 = zext i8 %trunc24 to i32
+  %ext25 = zext i8 %trunc25 to i32
+  %ext26 = zext i8 %trunc26 to i32
+  %ext27 = zext i8 %trunc27 to i32
+  %ext28 = zext i8 %trunc28 to i32
+  %ext29 = zext i8 %trunc29 to i32
+  %ext30 = zext i8 %trunc30 to i32
+  %ext31 = zext i8 %trunc31 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+  store volatile i32 %val16, i32 *%ptr
+  store volatile i32 %val17, i32 *%ptr
+  store volatile i32 %val18, i32 *%ptr
+  store volatile i32 %val19, i32 *%ptr
+  store volatile i32 %val20, i32 *%ptr
+  store volatile i32 %val21, i32 *%ptr
+  store volatile i32 %val22, i32 *%ptr
+  store volatile i32 %val23, i32 *%ptr
+  store volatile i32 %val24, i32 *%ptr
+  store volatile i32 %val25, i32 *%ptr
+  store volatile i32 %val26, i32 *%ptr
+  store volatile i32 %val27, i32 *%ptr
+  store volatile i32 %val28, i32 *%ptr
+  store volatile i32 %val29, i32 *%ptr
+  store volatile i32 %val30, i32 *%ptr
+  store volatile i32 %val31, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+  store volatile i32 %ext16, i32 *%ptr
+  store volatile i32 %ext17, i32 *%ptr
+  store volatile i32 %ext18, i32 *%ptr
+  store volatile i32 %ext19, i32 *%ptr
+  store volatile i32 %ext20, i32 *%ptr
+  store volatile i32 %ext21, i32 *%ptr
+  store volatile i32 %ext22, i32 *%ptr
+  store volatile i32 %ext23, i32 *%ptr
+  store volatile i32 %ext24, i32 *%ptr
+  store volatile i32 %ext25, i32 *%ptr
+  store volatile i32 %ext26, i32 *%ptr
+  store volatile i32 %ext27, i32 *%ptr
+  store volatile i32 %ext28, i32 *%ptr
+  store volatile i32 %ext29, i32 *%ptr
+  store volatile i32 %ext30, i32 *%ptr
+  store volatile i32 %ext31, i32 *%ptr
+
+  ret void
+}
+
+; Same again with i16, which should use LLH(H).
+define void @f2(i32 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: llh{{h?}} {{%r[0-9]+}}, 16{{[26]}}(%r15)
+; CHECK: br %r14
+  %val0 = load volatile i32 *%ptr
+  %val1 = load volatile i32 *%ptr
+  %val2 = load volatile i32 *%ptr
+  %val3 = load volatile i32 *%ptr
+  %val4 = load volatile i32 *%ptr
+  %val5 = load volatile i32 *%ptr
+  %val6 = load volatile i32 *%ptr
+  %val7 = load volatile i32 *%ptr
+  %val8 = load volatile i32 *%ptr
+  %val9 = load volatile i32 *%ptr
+  %val10 = load volatile i32 *%ptr
+  %val11 = load volatile i32 *%ptr
+  %val12 = load volatile i32 *%ptr
+  %val13 = load volatile i32 *%ptr
+  %val14 = load volatile i32 *%ptr
+  %val15 = load volatile i32 *%ptr
+  %val16 = load volatile i32 *%ptr
+  %val17 = load volatile i32 *%ptr
+  %val18 = load volatile i32 *%ptr
+  %val19 = load volatile i32 *%ptr
+  %val20 = load volatile i32 *%ptr
+  %val21 = load volatile i32 *%ptr
+  %val22 = load volatile i32 *%ptr
+  %val23 = load volatile i32 *%ptr
+  %val24 = load volatile i32 *%ptr
+  %val25 = load volatile i32 *%ptr
+  %val26 = load volatile i32 *%ptr
+  %val27 = load volatile i32 *%ptr
+  %val28 = load volatile i32 *%ptr
+  %val29 = load volatile i32 *%ptr
+  %val30 = load volatile i32 *%ptr
+  %val31 = load volatile i32 *%ptr
+
+  %trunc0 = trunc i32 %val0 to i16
+  %trunc1 = trunc i32 %val1 to i16
+  %trunc2 = trunc i32 %val2 to i16
+  %trunc3 = trunc i32 %val3 to i16
+  %trunc4 = trunc i32 %val4 to i16
+  %trunc5 = trunc i32 %val5 to i16
+  %trunc6 = trunc i32 %val6 to i16
+  %trunc7 = trunc i32 %val7 to i16
+  %trunc8 = trunc i32 %val8 to i16
+  %trunc9 = trunc i32 %val9 to i16
+  %trunc10 = trunc i32 %val10 to i16
+  %trunc11 = trunc i32 %val11 to i16
+  %trunc12 = trunc i32 %val12 to i16
+  %trunc13 = trunc i32 %val13 to i16
+  %trunc14 = trunc i32 %val14 to i16
+  %trunc15 = trunc i32 %val15 to i16
+  %trunc16 = trunc i32 %val16 to i16
+  %trunc17 = trunc i32 %val17 to i16
+  %trunc18 = trunc i32 %val18 to i16
+  %trunc19 = trunc i32 %val19 to i16
+  %trunc20 = trunc i32 %val20 to i16
+  %trunc21 = trunc i32 %val21 to i16
+  %trunc22 = trunc i32 %val22 to i16
+  %trunc23 = trunc i32 %val23 to i16
+  %trunc24 = trunc i32 %val24 to i16
+  %trunc25 = trunc i32 %val25 to i16
+  %trunc26 = trunc i32 %val26 to i16
+  %trunc27 = trunc i32 %val27 to i16
+  %trunc28 = trunc i32 %val28 to i16
+  %trunc29 = trunc i32 %val29 to i16
+  %trunc30 = trunc i32 %val30 to i16
+  %trunc31 = trunc i32 %val31 to i16
+
+  %ext0 = zext i16 %trunc0 to i32
+  %ext1 = zext i16 %trunc1 to i32
+  %ext2 = zext i16 %trunc2 to i32
+  %ext3 = zext i16 %trunc3 to i32
+  %ext4 = zext i16 %trunc4 to i32
+  %ext5 = zext i16 %trunc5 to i32
+  %ext6 = zext i16 %trunc6 to i32
+  %ext7 = zext i16 %trunc7 to i32
+  %ext8 = zext i16 %trunc8 to i32
+  %ext9 = zext i16 %trunc9 to i32
+  %ext10 = zext i16 %trunc10 to i32
+  %ext11 = zext i16 %trunc11 to i32
+  %ext12 = zext i16 %trunc12 to i32
+  %ext13 = zext i16 %trunc13 to i32
+  %ext14 = zext i16 %trunc14 to i32
+  %ext15 = zext i16 %trunc15 to i32
+  %ext16 = zext i16 %trunc16 to i32
+  %ext17 = zext i16 %trunc17 to i32
+  %ext18 = zext i16 %trunc18 to i32
+  %ext19 = zext i16 %trunc19 to i32
+  %ext20 = zext i16 %trunc20 to i32
+  %ext21 = zext i16 %trunc21 to i32
+  %ext22 = zext i16 %trunc22 to i32
+  %ext23 = zext i16 %trunc23 to i32
+  %ext24 = zext i16 %trunc24 to i32
+  %ext25 = zext i16 %trunc25 to i32
+  %ext26 = zext i16 %trunc26 to i32
+  %ext27 = zext i16 %trunc27 to i32
+  %ext28 = zext i16 %trunc28 to i32
+  %ext29 = zext i16 %trunc29 to i32
+  %ext30 = zext i16 %trunc30 to i32
+  %ext31 = zext i16 %trunc31 to i32
+
+  store volatile i32 %val0, i32 *%ptr
+  store volatile i32 %val1, i32 *%ptr
+  store volatile i32 %val2, i32 *%ptr
+  store volatile i32 %val3, i32 *%ptr
+  store volatile i32 %val4, i32 *%ptr
+  store volatile i32 %val5, i32 *%ptr
+  store volatile i32 %val6, i32 *%ptr
+  store volatile i32 %val7, i32 *%ptr
+  store volatile i32 %val8, i32 *%ptr
+  store volatile i32 %val9, i32 *%ptr
+  store volatile i32 %val10, i32 *%ptr
+  store volatile i32 %val11, i32 *%ptr
+  store volatile i32 %val12, i32 *%ptr
+  store volatile i32 %val13, i32 *%ptr
+  store volatile i32 %val14, i32 *%ptr
+  store volatile i32 %val15, i32 *%ptr
+  store volatile i32 %val16, i32 *%ptr
+  store volatile i32 %val17, i32 *%ptr
+  store volatile i32 %val18, i32 *%ptr
+  store volatile i32 %val19, i32 *%ptr
+  store volatile i32 %val20, i32 *%ptr
+  store volatile i32 %val21, i32 *%ptr
+  store volatile i32 %val22, i32 *%ptr
+  store volatile i32 %val23, i32 *%ptr
+  store volatile i32 %val24, i32 *%ptr
+  store volatile i32 %val25, i32 *%ptr
+  store volatile i32 %val26, i32 *%ptr
+  store volatile i32 %val27, i32 *%ptr
+  store volatile i32 %val28, i32 *%ptr
+  store volatile i32 %val29, i32 *%ptr
+  store volatile i32 %val30, i32 *%ptr
+  store volatile i32 %val31, i32 *%ptr
+
+  store volatile i32 %ext0, i32 *%ptr
+  store volatile i32 %ext1, i32 *%ptr
+  store volatile i32 %ext2, i32 *%ptr
+  store volatile i32 %ext3, i32 *%ptr
+  store volatile i32 %ext4, i32 *%ptr
+  store volatile i32 %ext5, i32 *%ptr
+  store volatile i32 %ext6, i32 *%ptr
+  store volatile i32 %ext7, i32 *%ptr
+  store volatile i32 %ext8, i32 *%ptr
+  store volatile i32 %ext9, i32 *%ptr
+  store volatile i32 %ext10, i32 *%ptr
+  store volatile i32 %ext11, i32 *%ptr
+  store volatile i32 %ext12, i32 *%ptr
+  store volatile i32 %ext13, i32 *%ptr
+  store volatile i32 %ext14, i32 *%ptr
+  store volatile i32 %ext15, i32 *%ptr
+  store volatile i32 %ext16, i32 *%ptr
+  store volatile i32 %ext17, i32 *%ptr
+  store volatile i32 %ext18, i32 *%ptr
+  store volatile i32 %ext19, i32 *%ptr
+  store volatile i32 %ext20, i32 *%ptr
+  store volatile i32 %ext21, i32 *%ptr
+  store volatile i32 %ext22, i32 *%ptr
+  store volatile i32 %ext23, i32 *%ptr
+  store volatile i32 %ext24, i32 *%ptr
+  store volatile i32 %ext25, i32 *%ptr
+  store volatile i32 %ext26, i32 *%ptr
+  store volatile i32 %ext27, i32 *%ptr
+  store volatile i32 %ext28, i32 *%ptr
+  store volatile i32 %ext29, i32 *%ptr
+  store volatile i32 %ext30, i32 *%ptr
+  store volatile i32 %ext31, i32 *%ptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-div-01.ll b/test/CodeGen/SystemZ/int-div-01.ll
index 492ece91497e..2c21186e3369 100644
--- a/test/CodeGen/SystemZ/int-div-01.ll
+++ b/test/CodeGen/SystemZ/int-div-01.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i32 @foo()
+
 ; Test register division.  The result is in the second of the two registers.
 define void @f1(i32 *%dest, i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lgfr %r1, %r3
 ; CHECK: dsgfr %r0, %r4
 ; CHECK: st %r1, 0(%r2)
@@ -16,7 +18,7 @@ define void @f1(i32 *%dest, i32 %a, i32 %b) {
 
 ; Test register remainder.  The result is in the first of the two registers.
 define void @f2(i32 *%dest, i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lgfr %r1, %r3
 ; CHECK: dsgfr %r0, %r4
 ; CHECK: st %r0, 0(%r2)
@@ -28,7 +30,7 @@ define void @f2(i32 *%dest, i32 %a, i32 %b) {
 
 ; Test that division and remainder use a single instruction.
 define i32 @f3(i32 %dummy, i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: %r2
 ; CHECK: lgfr %r3, %r3
 ; CHECK-NOT: %r2
@@ -45,7 +47,7 @@ define i32 @f3(i32 %dummy, i32 %a, i32 %b) {
 ; Check that the sign extension of the dividend is elided when the argument
 ; is already sign-extended.
 define i32 @f4(i32 %dummy, i32 signext %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgfr %r2, %r4
 ; CHECK-NOT: dsgfr
@@ -59,7 +61,7 @@ define i32 @f4(i32 %dummy, i32 signext %a, i32 %b) {
 
 ; Test that memory dividends are loaded using sign extension (LGF).
 define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: %r2
 ; CHECK: lgf %r3, 0(%r3)
 ; CHECK-NOT: %r2
@@ -76,7 +78,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
 
 ; Test memory division with no displacement.
 define void @f6(i32 *%dest, i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgfr %r1, %r3
 ; CHECK: dsgf %r0, 0(%r4)
 ; CHECK: st %r1, 0(%r2)
@@ -89,7 +91,7 @@ define void @f6(i32 *%dest, i32 %a, i32 *%src) {
 
 ; Test memory remainder with no displacement.
 define void @f7(i32 *%dest, i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgfr %r1, %r3
 ; CHECK: dsgf %r0, 0(%r4)
 ; CHECK: st %r0, 0(%r2)
@@ -102,7 +104,7 @@ define void @f7(i32 *%dest, i32 %a, i32 *%src) {
 
 ; Test both memory division and memory remainder.
 define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK-NOT: %r2
 ; CHECK: lgfr %r3, %r3
 ; CHECK-NOT: %r2
@@ -119,7 +121,7 @@ define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check the high end of the DSGF range.
 define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: dsgf %r2, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -131,7 +133,7 @@ define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r4, 524288
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
@@ -143,7 +145,7 @@ define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned DSGF range.
 define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: dsgf %r2, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -154,7 +156,7 @@ define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check the low end of the DSGF range.
 define i32 @f12(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: dsgf %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -166,7 +168,7 @@ define i32 @f12(i32 %dummy, i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f13(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: agfi %r4, -524292
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
@@ -178,7 +180,7 @@ define i32 @f13(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check that DSGF allows an index.
 define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: dsgf %r2, 524287(%r5,%r4)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -188,3 +190,62 @@ define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) {
   %rem = srem i32 %a, %b
   ret i32 %rem
 }
+
+; Make sure that we still use DSGFR rather than DSGR in cases where
+; a load and division cannot be combined.
+define void @f15(i32 *%dest, i32 *%src) {
+; CHECK-LABEL: f15:
+; CHECK: l [[B:%r[0-9]+]], 0(%r3)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: lgfr %r1, %r2
+; CHECK: dsgfr %r0, [[B]]
+; CHECK: br %r14
+  %b = load i32 *%src
+  %a = call i32 @foo()
+  %div = sdiv i32 %a, %b
+  store i32 %div, i32 *%dest
+  ret void
+}
+
+; Check that divisions of spilled values can use DSGF rather than DSGFR.
+define i32 @f16(i32 *%ptr0) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: dsgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %div0 = sdiv i32 %ret, %val0
+  %div1 = sdiv i32 %div0, %val1
+  %div2 = sdiv i32 %div1, %val2
+  %div3 = sdiv i32 %div2, %val3
+  %div4 = sdiv i32 %div3, %val4
+  %div5 = sdiv i32 %div4, %val5
+  %div6 = sdiv i32 %div5, %val6
+  %div7 = sdiv i32 %div6, %val7
+  %div8 = sdiv i32 %div7, %val8
+  %div9 = sdiv i32 %div8, %val9
+
+  ret i32 %div9
+}
diff --git a/test/CodeGen/SystemZ/int-div-02.ll b/test/CodeGen/SystemZ/int-div-02.ll
index 7954384d2962..f3287a56c6cd 100644
--- a/test/CodeGen/SystemZ/int-div-02.ll
+++ b/test/CodeGen/SystemZ/int-div-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i32 @foo()
+
 ; Test register division.  The result is in the second of the two registers.
 define void @f1(i32 %dummy, i32 %a, i32 %b, i32 *%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lhi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -18,7 +20,7 @@ define void @f1(i32 %dummy, i32 %a, i32 %b, i32 *%dest) {
 
 ; Test register remainder.  The result is in the first of the two registers.
 define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%dest) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lhi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -32,7 +34,7 @@ define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%dest) {
 
 ; Test that division and remainder use a single instruction.
 define i32 @f3(i32 %dummy1, i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lhi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -48,7 +50,7 @@ define i32 @f3(i32 %dummy1, i32 %a, i32 %b) {
 
 ; Test memory division with no displacement.
 define void @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lhi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -63,7 +65,7 @@ define void @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
 
 ; Test memory remainder with no displacement.
 define void @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lhi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -78,7 +80,7 @@ define void @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
 
 ; Test both memory division and memory remainder.
 define i32 @f6(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lhi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -95,7 +97,7 @@ define i32 @f6(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check the high end of the DL range.
 define i32 @f7(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: dl %r2, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -107,7 +109,7 @@ define i32 @f7(i32 %dummy, i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r4, 524288
 ; CHECK: dl %r2, 0(%r4)
 ; CHECK: br %r14
@@ -119,7 +121,7 @@ define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned DL range.
 define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: dl %r2, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -130,7 +132,7 @@ define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check the low end of the DL range.
 define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: dl %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -142,7 +144,7 @@ define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r4, -524292
 ; CHECK: dl %r2, 0(%r4)
 ; CHECK: br %r14
@@ -154,7 +156,7 @@ define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
 
 ; Check that DL allows an index.
 define i32 @f12(i32 %dummy, i32 %a, i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: dl %r2, 524287(%r5,%r4)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -164,3 +166,46 @@ define i32 @f12(i32 %dummy, i32 %a, i64 %src, i64 %index) {
   %rem = urem i32 %a, %b
   ret i32 %rem
 }
+
+; Check that divisions of spilled values can use DL rather than DLR.
+define i32 @f13(i32 *%ptr0) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: dl {{%r[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %div0 = udiv i32 %ret, %val0
+  %div1 = udiv i32 %div0, %val1
+  %div2 = udiv i32 %div1, %val2
+  %div3 = udiv i32 %div2, %val3
+  %div4 = udiv i32 %div3, %val4
+  %div5 = udiv i32 %div4, %val5
+  %div6 = udiv i32 %div5, %val6
+  %div7 = udiv i32 %div6, %val7
+  %div8 = udiv i32 %div7, %val8
+  %div9 = udiv i32 %div8, %val9
+
+  ret i32 %div9
+}
diff --git a/test/CodeGen/SystemZ/int-div-03.ll b/test/CodeGen/SystemZ/int-div-03.ll
index b950f2b02035..7c0409018f16 100644
--- a/test/CodeGen/SystemZ/int-div-03.ll
+++ b/test/CodeGen/SystemZ/int-div-03.ll
@@ -3,9 +3,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Test register division.  The result is in the second of the two registers.
 define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgfr %r2, %r4
 ; CHECK: stg %r3, 0(%r5)
@@ -18,7 +20,7 @@ define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
 
 ; Test register remainder.  The result is in the first of the two registers.
 define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgfr %r2, %r4
 ; CHECK: stg %r2, 0(%r5)
@@ -31,7 +33,7 @@ define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
 
 ; Test that division and remainder use a single instruction.
 define i64 @f3(i64 %dummy, i64 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgfr %r2, %r4
 ; CHECK: ogr %r2, %r3
@@ -46,7 +48,7 @@ define i64 @f3(i64 %dummy, i64 %a, i32 %b) {
 ; Test register division when the dividend is zero rather than sign extended.
 ; We can't use dsgfr here
 define void @f4(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: dsgfr
 ; CHECK: br %r14
   %bext = zext i32 %b to i64
@@ -57,7 +59,7 @@ define void @f4(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
 
 ; ...likewise remainder.
 define void @f5(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: dsgfr
 ; CHECK: br %r14
   %bext = zext i32 %b to i64
@@ -68,7 +70,7 @@ define void @f5(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
 
 ; Test memory division with no displacement.
 define void @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: stg %r3, 0(%r5)
@@ -82,7 +84,7 @@ define void @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
 
 ; Test memory remainder with no displacement.
 define void @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: stg %r2, 0(%r5)
@@ -96,7 +98,7 @@ define void @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
 
 ; Test both memory division and memory remainder.
 define i64 @f8(i64 %dummy, i64 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK-NOT: {{dsgf|dsgfr}}
@@ -112,7 +114,7 @@ define i64 @f8(i64 %dummy, i64 %a, i32 *%src) {
 
 ; Check the high end of the DSGF range.
 define i64 @f9(i64 %dummy, i64 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: dsgf %r2, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -125,7 +127,7 @@ define i64 @f9(i64 %dummy, i64 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f10(i64 %dummy, i64 %a, i32 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r4, 524288
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
@@ -138,7 +140,7 @@ define i64 @f10(i64 %dummy, i64 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned DSGF range.
 define i64 @f11(i64 %dummy, i64 %a, i32 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: dsgf %r2, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -150,7 +152,7 @@ define i64 @f11(i64 %dummy, i64 %a, i32 *%src) {
 
 ; Check the low end of the DSGF range.
 define i64 @f12(i64 %dummy, i64 %a, i32 *%src) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: dsgf %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -163,7 +165,7 @@ define i64 @f12(i64 %dummy, i64 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f13(i64 %dummy, i64 %a, i32 *%src) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: agfi %r4, -524292
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
@@ -176,7 +178,7 @@ define i64 @f13(i64 %dummy, i64 %a, i32 *%src) {
 
 ; Check that DSGF allows an index.
 define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: dsgf %r2, 524287(%r5,%r4)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -187,3 +189,20 @@ define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) {
   %rem = srem i64 %a, %bext
   ret i64 %rem
 }
+
+; Make sure that we still use DSGFR rather than DSGR in cases where
+; a load and division cannot be combined.
+define void @f15(i64 *%dest, i32 *%src) {
+; CHECK-LABEL: f15:
+; CHECK: l [[B:%r[0-9]+]], 0(%r3)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: lgr %r1, %r2
+; CHECK: dsgfr %r0, [[B]]
+; CHECK: br %r14
+  %b = load i32 *%src
+  %a = call i64 @foo()
+  %ext = sext i32 %b to i64
+  %div = sdiv i64 %a, %ext
+  store i64 %div, i64 *%dest
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-div-04.ll b/test/CodeGen/SystemZ/int-div-04.ll
index 3f72be9a47da..87f1e105f6a4 100644
--- a/test/CodeGen/SystemZ/int-div-04.ll
+++ b/test/CodeGen/SystemZ/int-div-04.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Testg register division.  The result is in the second of the two registers.
 define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgr %r2, %r4
 ; CHECK: stg %r3, 0(%r5)
@@ -16,7 +18,7 @@ define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
 
 ; Testg register remainder.  The result is in the first of the two registers.
 define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgr %r2, %r4
 ; CHECK: stg %r2, 0(%r5)
@@ -28,7 +30,7 @@ define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
 
 ; Testg that division and remainder use a single instruction.
 define i64 @f3(i64 %dummy1, i64 %a, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsgr %r2, %r4
 ; CHECK-NOT: dsgr
@@ -42,7 +44,7 @@ define i64 @f3(i64 %dummy1, i64 %a, i64 %b) {
 
 ; Testg memory division with no displacement.
 define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: stg %r3, 0(%r5)
@@ -55,7 +57,7 @@ define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
 
 ; Testg memory remainder with no displacement.
 define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: stg %r2, 0(%r5)
@@ -68,7 +70,7 @@ define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
 
 ; Testg both memory division and memory remainder.
 define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK-NOT: {{dsg|dsgr}}
@@ -83,7 +85,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the high end of the DSG range.
 define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: dsg %r2, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -95,7 +97,7 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r4, 524288
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: br %r14
@@ -107,7 +109,7 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned DSG range.
 define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: dsg %r2, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -118,7 +120,7 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the low end of the DSG range.
 define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: dsg %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -130,7 +132,7 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r4, -524296
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: br %r14
@@ -142,7 +144,7 @@ define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check that DSG allows an index.
 define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: dsg %r2, 524287(%r5,%r4)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -152,3 +154,49 @@ define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
+
+; Check that divisions of spilled values can use DSG rather than DSGR.
+define i64 @f13(i64 *%ptr0) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: dsg {{%r[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+  %ptr10 = getelementptr i64 *%ptr0, i64 20
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+  %val10 = load i64 *%ptr10
+
+  %ret = call i64 @foo()
+
+  %div0 = sdiv i64 %ret, %val0
+  %div1 = sdiv i64 %div0, %val1
+  %div2 = sdiv i64 %div1, %val2
+  %div3 = sdiv i64 %div2, %val3
+  %div4 = sdiv i64 %div3, %val4
+  %div5 = sdiv i64 %div4, %val5
+  %div6 = sdiv i64 %div5, %val6
+  %div7 = sdiv i64 %div6, %val7
+  %div8 = sdiv i64 %div7, %val8
+  %div9 = sdiv i64 %div8, %val9
+  %div10 = sdiv i64 %div9, %val10
+
+  ret i64 %div10
+}
diff --git a/test/CodeGen/SystemZ/int-div-05.ll b/test/CodeGen/SystemZ/int-div-05.ll
index 04f622b44e74..817983005a9d 100644
--- a/test/CodeGen/SystemZ/int-div-05.ll
+++ b/test/CodeGen/SystemZ/int-div-05.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Testg register division.  The result is in the second of the two registers.
 define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lghi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -18,7 +20,7 @@ define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
 
 ; Testg register remainder.  The result is in the first of the two registers.
 define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lghi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -32,7 +34,7 @@ define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
 
 ; Testg that division and remainder use a single instruction.
 define i64 @f3(i64 %dummy1, i64 %a, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lghi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -48,7 +50,7 @@ define i64 @f3(i64 %dummy1, i64 %a, i64 %b) {
 
 ; Testg memory division with no displacement.
 define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lghi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -63,7 +65,7 @@ define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
 
 ; Testg memory remainder with no displacement.
 define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lghi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -78,7 +80,7 @@ define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
 
 ; Testg both memory division and memory remainder.
 define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: %r3
 ; CHECK: {{llill|lghi}} %r2, 0
 ; CHECK-NOT: %r3
@@ -95,7 +97,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the high end of the DLG range.
 define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: dlg %r2, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -107,7 +109,7 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r4, 524288
 ; CHECK: dlg %r2, 0(%r4)
 ; CHECK: br %r14
@@ -119,7 +121,7 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned DLG range.
 define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: dlg %r2, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -130,7 +132,7 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the low end of the DLG range.
 define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: dlg %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -142,7 +144,7 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r4, -524296
 ; CHECK: dlg %r2, 0(%r4)
 ; CHECK: br %r14
@@ -154,7 +156,7 @@ define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check that DLG allows an index.
 define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: dlg %r2, 524287(%r5,%r4)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -164,3 +166,49 @@ define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
+
+; Check that divisions of spilled values can use DLG rather than DLGR.
+define i64 @f13(i64 *%ptr0) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: dlg {{%r[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+  %ptr10 = getelementptr i64 *%ptr0, i64 20
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+  %val10 = load i64 *%ptr10
+
+  %ret = call i64 @foo()
+
+  %div0 = udiv i64 %ret, %val0
+  %div1 = udiv i64 %div0, %val1
+  %div2 = udiv i64 %div1, %val2
+  %div3 = udiv i64 %div2, %val3
+  %div4 = udiv i64 %div3, %val4
+  %div5 = udiv i64 %div4, %val5
+  %div6 = udiv i64 %div5, %val6
+  %div7 = udiv i64 %div6, %val7
+  %div8 = udiv i64 %div7, %val8
+  %div9 = udiv i64 %div8, %val9
+  %div10 = udiv i64 %div9, %val10
+
+  ret i64 %div10
+}
diff --git a/test/CodeGen/SystemZ/int-div-06.ll b/test/CodeGen/SystemZ/int-div-06.ll
new file mode 100644
index 000000000000..8576b1b6270a
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-div-06.ll
@@ -0,0 +1,56 @@
+; Test that divisions by constants are implemented as multiplications.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check signed 32-bit division.
+define i32 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: lgfr [[REG:%r[0-5]]], %r2
+; CHECK: msgfi [[REG]], 502748801
+; CHECK-DAG: srlg [[RES1:%r[0-5]]], [[REG]], 63
+; CHECK-DAG: srag %r2, [[REG]], 46
+; CHECK: ar %r2, [[RES1]]
+; CHECK: br %r14
+  %b = sdiv i32 %a, 139968
+  ret i32 %b
+}
+
+; Check unsigned 32-bit division.
+define i32 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: llgfr [[REG:%r[0-5]]], %r2
+; CHECK: msgfi [[REG]], 502748801
+; CHECK: srlg %r2, [[REG]], 46
+; CHECK: br %r14
+  %b = udiv i32 %a, 139968
+  ret i32 %b
+}
+
+; Check signed 64-bit division.
+define i64 @f3(i64 %dummy, i64 %a) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: llihf [[CONST:%r[0-5]]], 1005497601
+; CHECK-DAG: oilf [[CONST]], 4251762321
+; CHECK-DAG: srag [[REG:%r[0-5]]], %r3, 63
+; CHECK-DAG: ngr [[REG]], [[CONST]]
+; CHECK-DAG: mlgr %r2, [[CONST]]
+; CHECK: sgr %r2, [[REG]]
+; CHECK: srlg [[RES1:%r[0-5]]], %r2, 63
+; CHECK: srag %r2, %r2, 15
+; CHECK: agr %r2, [[RES1]]
+; CHECK: br %r14
+  %b = sdiv i64 %a, 139968
+  ret i64 %b
+}
+
+; Check unsigned 64-bit division.
+define i64 @f4(i64 %dummy, i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK: llihf [[CONST:%r[0-5]]], 1005497601
+; CHECK: oilf [[CONST]], 4251762321
+; CHECK: mlgr %r2, [[CONST]]
+; CHECK: srlg %r2, %r2, 15
+; CHECK: br %r14
+  %b = udiv i64 %a, 139968
+  ret i64 %b
+}
diff --git a/test/CodeGen/SystemZ/int-move-01.ll b/test/CodeGen/SystemZ/int-move-01.ll
index ae890ade3275..038e6887d67c 100644
--- a/test/CodeGen/SystemZ/int-move-01.ll
+++ b/test/CodeGen/SystemZ/int-move-01.ll
@@ -4,7 +4,7 @@
 
 ; Test 8-bit moves, which should get promoted to i32.
 define i8 @f1(i8 %a, i8 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
   ret i8 %b
@@ -12,7 +12,7 @@ define i8 @f1(i8 %a, i8 %b) {
 
 ; Test 16-bit moves, which again should get promoted to i32.
 define i16 @f2(i16 %a, i16 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
   ret i16 %b
@@ -20,7 +20,7 @@ define i16 @f2(i16 %a, i16 %b) {
 
 ; Test 32-bit moves.
 define i32 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lr %r2, %r3
 ; CHECK: br %r14
   ret i32 %b
@@ -28,7 +28,7 @@ define i32 @f3(i32 %a, i32 %b) {
 
 ; Test 64-bit moves.
 define i64 @f4(i64 %a, i64 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   ret i64 %b
diff --git a/test/CodeGen/SystemZ/int-move-02.ll b/test/CodeGen/SystemZ/int-move-02.ll
index 467e22d89c5a..5fc0843290f9 100644
--- a/test/CodeGen/SystemZ/int-move-02.ll
+++ b/test/CodeGen/SystemZ/int-move-02.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the L range.
 define i32 @f1(i32 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
   %val = load i32 *%src
@@ -13,7 +13,7 @@ define i32 @f1(i32 *%src) {
 
 ; Check the high end of the aligned L range.
 define i32 @f2(i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: l %r2, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -23,7 +23,7 @@ define i32 @f2(i32 *%src) {
 
 ; Check the next word up, which should use LY instead of L.
 define i32 @f3(i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ly %r2, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -33,7 +33,7 @@ define i32 @f3(i32 *%src) {
 
 ; Check the high end of the aligned LY range.
 define i32 @f4(i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ly %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -44,7 +44,7 @@ define i32 @f4(i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f5(i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r2, 524288
 ; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
@@ -55,7 +55,7 @@ define i32 @f5(i32 *%src) {
 
 ; Check the high end of the negative aligned LY range.
 define i32 @f6(i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ly %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -65,7 +65,7 @@ define i32 @f6(i32 *%src) {
 
 ; Check the low end of the LY range.
 define i32 @f7(i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: ly %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -76,7 +76,7 @@ define i32 @f7(i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f8(i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, -524292
 ; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
@@ -87,7 +87,7 @@ define i32 @f8(i32 *%src) {
 
 ; Check that L allows an index.
 define i32 @f9(i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: l %r2, 4095({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -99,7 +99,7 @@ define i32 @f9(i64 %src, i64 %index) {
 
 ; Check that LY allows an index.
 define i32 @f10(i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ly %r2, 4096({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/int-move-03.ll b/test/CodeGen/SystemZ/int-move-03.ll
index 97c70a2740c1..2894512e8eea 100644
--- a/test/CodeGen/SystemZ/int-move-03.ll
+++ b/test/CodeGen/SystemZ/int-move-03.ll
@@ -4,7 +4,7 @@
 
 ; Check LG with no displacement.
 define i64 @f1(i64 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
   %val = load i64 *%src
@@ -13,7 +13,7 @@ define i64 @f1(i64 *%src) {
 
 ; Check the high end of the aligned LG range.
 define i64 @f2(i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lg %r2, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -24,7 +24,7 @@ define i64 @f2(i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f3(i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
@@ -35,7 +35,7 @@ define i64 @f3(i64 *%src) {
 
 ; Check the high end of the negative aligned LG range.
 define i64 @f4(i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lg %r2, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -45,7 +45,7 @@ define i64 @f4(i64 *%src) {
 
 ; Check the low end of the LG range.
 define i64 @f5(i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lg %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -56,7 +56,7 @@ define i64 @f5(i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f6(i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, -524296
 ; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
@@ -67,7 +67,7 @@ define i64 @f6(i64 *%src) {
 
 ; Check that LG allows an index.
 define i64 @f7(i64 %src, i64 %index) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lg %r2, 524287({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/int-move-04.ll b/test/CodeGen/SystemZ/int-move-04.ll
index 9736657b1efa..d97ed2f54a4b 100644
--- a/test/CodeGen/SystemZ/int-move-04.ll
+++ b/test/CodeGen/SystemZ/int-move-04.ll
@@ -4,7 +4,7 @@
 
 ; Test an i8 store, which should get converted into an i32 truncation.
 define void @f1(i8 *%dst, i8 %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
   store i8 %val, i8 *%dst
@@ -13,7 +13,7 @@ define void @f1(i8 *%dst, i8 %val) {
 
 ; Test an i32 truncating store.
 define void @f2(i8 *%dst, i32 %val) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
   %trunc = trunc i32 %val to i8
@@ -23,7 +23,7 @@ define void @f2(i8 *%dst, i32 %val) {
 
 ; Test an i64 truncating store.
 define void @f3(i8 *%dst, i64 %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
   %trunc = trunc i64 %val to i8
@@ -33,7 +33,7 @@ define void @f3(i8 *%dst, i64 %val) {
 
 ; Check the high end of the STC range.
 define void @f4(i8 *%dst, i8 %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: stc %r3, 4095(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%dst, i64 4095
@@ -43,7 +43,7 @@ define void @f4(i8 *%dst, i8 %val) {
 
 ; Check the next byte up, which should use STCY instead of STC.
 define void @f5(i8 *%dst, i8 %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: stcy %r3, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%dst, i64 4096
@@ -53,7 +53,7 @@ define void @f5(i8 *%dst, i8 %val) {
 
 ; Check the high end of the STCY range.
 define void @f6(i8 *%dst, i8 %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: stcy %r3, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%dst, i64 524287
@@ -64,7 +64,7 @@ define void @f6(i8 *%dst, i8 %val) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f7(i8 *%dst, i8 %val) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r2, 524288
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
@@ -75,7 +75,7 @@ define void @f7(i8 *%dst, i8 %val) {
 
 ; Check the high end of the negative STCY range.
 define void @f8(i8 *%dst, i8 %val) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: stcy %r3, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%dst, i64 -1
@@ -85,7 +85,7 @@ define void @f8(i8 *%dst, i8 %val) {
 
 ; Check the low end of the STCY range.
 define void @f9(i8 *%dst, i8 %val) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: stcy %r3, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%dst, i64 -524288
@@ -96,7 +96,7 @@ define void @f9(i8 *%dst, i8 %val) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f10(i8 *%dst, i8 %val) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r2, -524289
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
@@ -107,7 +107,7 @@ define void @f10(i8 *%dst, i8 %val) {
 
 ; Check that STC allows an index.
 define void @f11(i64 %dst, i64 %index, i8 %val) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: stc %r4, 4095(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %dst, %index
@@ -119,7 +119,7 @@ define void @f11(i64 %dst, i64 %index, i8 %val) {
 
 ; Check that STCY allows an index.
 define void @f12(i64 %dst, i64 %index, i8 %val) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: stcy %r4, 4096(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %dst, %index
diff --git a/test/CodeGen/SystemZ/int-move-05.ll b/test/CodeGen/SystemZ/int-move-05.ll
index f61477e71830..c21b88aa7baa 100644
--- a/test/CodeGen/SystemZ/int-move-05.ll
+++ b/test/CodeGen/SystemZ/int-move-05.ll
@@ -4,7 +4,7 @@
 
 ; Test an i16 store, which should get converted into an i32 truncation.
 define void @f1(i16 *%dst, i16 %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
   store i16 %val, i16 *%dst
@@ -13,7 +13,7 @@ define void @f1(i16 *%dst, i16 %val) {
 
 ; Test an i32 truncating store.
 define void @f2(i16 *%dst, i32 %val) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
   %trunc = trunc i32 %val to i16
@@ -23,7 +23,7 @@ define void @f2(i16 *%dst, i32 %val) {
 
 ; Test an i64 truncating store.
 define void @f3(i16 *%dst, i64 %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
   %trunc = trunc i64 %val to i16
@@ -33,7 +33,7 @@ define void @f3(i16 *%dst, i64 %val) {
 
 ; Check the high end of the STH range.
 define void @f4(i16 *%dst, i16 %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sth %r3, 4094(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%dst, i64 2047
@@ -43,7 +43,7 @@ define void @f4(i16 *%dst, i16 %val) {
 
 ; Check the next halfword up, which should use STHY instead of STH.
 define void @f5(i16 *%dst, i16 %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sthy %r3, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%dst, i64 2048
@@ -53,7 +53,7 @@ define void @f5(i16 *%dst, i16 %val) {
 
 ; Check the high end of the aligned STHY range.
 define void @f6(i16 *%dst, i16 %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sthy %r3, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%dst, i64 262143
@@ -64,7 +64,7 @@ define void @f6(i16 *%dst, i16 %val) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f7(i16 *%dst, i16 %val) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r2, 524288
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
@@ -75,7 +75,7 @@ define void @f7(i16 *%dst, i16 %val) {
 
 ; Check the high end of the negative aligned STHY range.
 define void @f8(i16 *%dst, i16 %val) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sthy %r3, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%dst, i64 -1
@@ -85,7 +85,7 @@ define void @f8(i16 *%dst, i16 %val) {
 
 ; Check the low end of the STHY range.
 define void @f9(i16 *%dst, i16 %val) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: sthy %r3, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%dst, i64 -262144
@@ -96,7 +96,7 @@ define void @f9(i16 *%dst, i16 %val) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f10(i16 *%dst, i16 %val) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: agfi %r2, -524290
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
@@ -107,7 +107,7 @@ define void @f10(i16 *%dst, i16 %val) {
 
 ; Check that STH allows an index.
 define void @f11(i64 %dst, i64 %index, i16 %val) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: sth %r4, 4094({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %dst, %index
@@ -119,7 +119,7 @@ define void @f11(i64 %dst, i64 %index, i16 %val) {
 
 ; Check that STHY allows an index.
 define void @f12(i64 %dst, i64 %index, i16 %val) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: sthy %r4, 4096({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %dst, %index
diff --git a/test/CodeGen/SystemZ/int-move-06.ll b/test/CodeGen/SystemZ/int-move-06.ll
index 5b35a32ff543..b8c6f53e15d8 100644
--- a/test/CodeGen/SystemZ/int-move-06.ll
+++ b/test/CodeGen/SystemZ/int-move-06.ll
@@ -4,7 +4,7 @@
 
 ; Test an i32 store.
 define void @f1(i32 *%dst, i32 %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: br %r14
   store i32 %val, i32 *%dst
@@ -20,7 +20,7 @@ define void @f2(i32 *%dst, i64 %val) {
 
 ; Check the high end of the aligned ST range.
 define void @f3(i32 *%dst, i32 %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: st %r3, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%dst, i64 1023
@@ -30,7 +30,7 @@ define void @f3(i32 *%dst, i32 %val) {
 
 ; Check the next word up, which should use STY instead of ST.
 define void @f4(i32 *%dst, i32 %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sty %r3, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%dst, i64 1024
@@ -40,7 +40,7 @@ define void @f4(i32 *%dst, i32 %val) {
 
 ; Check the high end of the aligned STY range.
 define void @f5(i32 *%dst, i32 %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sty %r3, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%dst, i64 131071
@@ -51,7 +51,7 @@ define void @f5(i32 *%dst, i32 %val) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f6(i32 *%dst, i32 %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, 524288
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: br %r14
@@ -62,7 +62,7 @@ define void @f6(i32 *%dst, i32 %val) {
 
 ; Check the high end of the negative aligned STY range.
 define void @f7(i32 *%dst, i32 %val) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sty %r3, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%dst, i64 -1
@@ -72,7 +72,7 @@ define void @f7(i32 *%dst, i32 %val) {
 
 ; Check the low end of the STY range.
 define void @f8(i32 *%dst, i32 %val) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sty %r3, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%dst, i64 -131072
@@ -83,7 +83,7 @@ define void @f8(i32 *%dst, i32 %val) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f9(i32 *%dst, i32 %val) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r2, -524292
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: br %r14
@@ -94,7 +94,7 @@ define void @f9(i32 *%dst, i32 %val) {
 
 ; Check that ST allows an index.
 define void @f10(i64 %dst, i64 %index, i32 %val) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: st %r4, 4095(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %dst, %index
@@ -106,7 +106,7 @@ define void @f10(i64 %dst, i64 %index, i32 %val) {
 
 ; Check that STY allows an index.
 define void @f11(i64 %dst, i64 %index, i32 %val) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: sty %r4, 4096(%r3,%r2)
 ; CHECK: br %r14
   %add1 = add i64 %dst, %index
diff --git a/test/CodeGen/SystemZ/int-move-07.ll b/test/CodeGen/SystemZ/int-move-07.ll
index ab21ab039534..5cac1e5b1a2e 100644
--- a/test/CodeGen/SystemZ/int-move-07.ll
+++ b/test/CodeGen/SystemZ/int-move-07.ll
@@ -4,7 +4,7 @@
 
 ; Check STG with no displacement.
 define void @f1(i64 *%dst, i64 %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: br %r14
   store i64 %val, i64 *%dst
@@ -13,7 +13,7 @@ define void @f1(i64 *%dst, i64 %val) {
 
 ; Check the high end of the aligned STG range.
 define void @f2(i64 *%dst, i64 %val) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: stg %r3, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%dst, i64 65535
@@ -24,7 +24,7 @@ define void @f2(i64 *%dst, i64 %val) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f3(i64 *%dst, i64 %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: agfi %r2, 524288
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: br %r14
@@ -35,7 +35,7 @@ define void @f3(i64 *%dst, i64 %val) {
 
 ; Check the high end of the negative aligned STG range.
 define void @f4(i64 *%dst, i64 %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: stg %r3, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%dst, i64 -1
@@ -45,7 +45,7 @@ define void @f4(i64 *%dst, i64 %val) {
 
 ; Check the low end of the STG range.
 define void @f5(i64 *%dst, i64 %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: stg %r3, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%dst, i64 -65536
@@ -56,7 +56,7 @@ define void @f5(i64 *%dst, i64 %val) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f6(i64 *%dst, i64 %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r2, -524296
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: br %r14
@@ -67,7 +67,7 @@ define void @f6(i64 *%dst, i64 %val) {
 
 ; Check that STG allows an index.
 define void @f7(i64 %dst, i64 %index, i64 %val) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: stg %r4, 524287({{%r3,%r2|%r2,%r3}})
 ; CHECK: br %r14
   %add1 = add i64 %dst, %index
diff --git a/test/CodeGen/SystemZ/int-move-08.ll b/test/CodeGen/SystemZ/int-move-08.ll
index 5640fec3299f..56fcbc6d802b 100644
--- a/test/CodeGen/SystemZ/int-move-08.ll
+++ b/test/CodeGen/SystemZ/int-move-08.ll
@@ -6,10 +6,16 @@
 @gsrc32 = global i32 1
 @gdst16 = global i16 2
 @gdst32 = global i32 2
+@gsrc16u = global i16 1, align 1, section "foo"
+@gsrc32u = global i32 1, align 2, section "foo"
+@gdst16u = global i16 2, align 1, section "foo"
+@gdst32u = global i32 2, align 2, section "foo"
+@garray8 = global [2 x i8] [i8 100, i8 101]
+@garray16 = global [2 x i16] [i16 102, i16 103]
 
 ; Check sign-extending loads from i16.
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lhrl %r2, gsrc16
 ; CHECK: br %r14
   %val = load i16 *@gsrc16
@@ -19,7 +25,7 @@ define i32 @f1() {
 
 ; Check zero-extending loads from i16.
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llhrl %r2, gsrc16
 ; CHECK: br %r14
   %val = load i16 *@gsrc16
@@ -29,7 +35,7 @@ define i32 @f2() {
 
 ; Check truncating 16-bit stores.
 define void @f3(i32 %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sthrl %r2, gdst16
 ; CHECK: br %r14
   %half = trunc i32 %val to i16
@@ -39,7 +45,7 @@ define void @f3(i32 %val) {
 
 ; Check plain loads and stores.
 define void @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lrl %r0, gsrc32
 ; CHECK: strl %r0, gdst32
 ; CHECK: br %r14
@@ -47,3 +53,82 @@ define void @f4() {
   store i32 %val, i32 *@gdst32
   ret void
 }
+
+; Repeat f1 with an unaligned variable.
+define i32 @f5() {
+; CHECK-LABEL: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
+; CHECK: lh %r2, 0([[REG]])
+; CHECK: br %r14
+  %val = load i16 *@gsrc16u, align 1
+  %ext = sext i16 %val to i32
+  ret i32 %ext
+}
+
+; Repeat f2 with an unaligned variable.
+define i32 @f6() {
+; CHECK-LABEL: f6:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
+; CHECK: llh %r2, 0([[REG]])
+; CHECK: br %r14
+  %val = load i16 *@gsrc16u, align 1
+  %ext = zext i16 %val to i32
+  ret i32 %ext
+}
+
+; Repeat f3 with an unaligned variable.
+define void @f7(i32 %val) {
+; CHECK-LABEL: f7:
+; CHECK: lgrl [[REG:%r[0-5]]], gdst16u
+; CHECK: sth %r2, 0([[REG]])
+; CHECK: br %r14
+  %half = trunc i32 %val to i16
+  store i16 %half, i16 *@gdst16u, align 1
+  ret void
+}
+
+; Repeat f4 with unaligned variables.
+define void @f8() {
+; CHECK-LABEL: f8:
+; CHECK: larl [[REG:%r[0-5]]], gsrc32u
+; CHECK: l [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: larl [[REG:%r[0-5]]], gdst32u
+; CHECK: st [[VAL]], 0([[REG]])
+; CHECK: br %r14
+  %val = load i32 *@gsrc32u, align 2
+  store i32 %val, i32 *@gdst32u, align 2
+  ret void
+}
+
+; Test a case where we want to use one LARL for accesses to two different
+; parts of a variable.
+define void @f9() {
+; CHECK-LABEL: f9:
+; CHECK: larl [[REG:%r[0-5]]], garray8
+; CHECK: llc [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: srl [[VAL]], 1
+; CHECK: stc [[VAL]], 1([[REG]])
+; CHECK: br %r14
+  %ptr1 = getelementptr [2 x i8] *@garray8, i64 0, i64 0
+  %ptr2 = getelementptr [2 x i8] *@garray8, i64 0, i64 1
+  %val = load i8 *%ptr1
+  %shr = lshr i8 %val, 1
+  store i8 %shr, i8 *%ptr2
+  ret void
+}
+
+; Test a case where we want to use separate relative-long addresses for
+; two different parts of a variable.
+define void @f10() {
+; CHECK-LABEL: f10:
+; CHECK: llhrl [[VAL:%r[0-5]]], garray16
+; CHECK: srl [[VAL]], 1
+; CHECK: sthrl [[VAL]], garray16+2
+; CHECK: br %r14
+  %ptr1 = getelementptr [2 x i16] *@garray16, i64 0, i64 0
+  %ptr2 = getelementptr [2 x i16] *@garray16, i64 0, i64 1
+  %val = load i16 *%ptr1
+  %shr = lshr i16 %val, 1
+  store i16 %shr, i16 *%ptr2
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-move-09.ll b/test/CodeGen/SystemZ/int-move-09.ll
index a7a8c82951f5..b5c9cb13d288 100644
--- a/test/CodeGen/SystemZ/int-move-09.ll
+++ b/test/CodeGen/SystemZ/int-move-09.ll
@@ -8,10 +8,16 @@
 @gdst16 = global i16 2
 @gdst32 = global i32 2
 @gdst64 = global i64 2
+@gsrc16u = global i16 1, align 1, section "foo"
+@gsrc32u = global i32 1, align 2, section "foo"
+@gsrc64u = global i64 1, align 4, section "foo"
+@gdst16u = global i16 2, align 1, section "foo"
+@gdst32u = global i32 2, align 2, section "foo"
+@gdst64u = global i64 2, align 4, section "foo"
 
 ; Check sign-extending loads from i16.
 define i64 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lghrl %r2, gsrc16
 ; CHECK: br %r14
   %val = load i16 *@gsrc16
@@ -21,7 +27,7 @@ define i64 @f1() {
 
 ; Check zero-extending loads from i16.
 define i64 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: llghrl %r2, gsrc16
 ; CHECK: br %r14
   %val = load i16 *@gsrc16
@@ -31,7 +37,7 @@ define i64 @f2() {
 
 ; Check sign-extending loads from i32.
 define i64 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lgfrl %r2, gsrc32
 ; CHECK: br %r14
   %val = load i32 *@gsrc32
@@ -41,7 +47,7 @@ define i64 @f3() {
 
 ; Check zero-extending loads from i32.
 define i64 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: llgfrl %r2, gsrc32
 ; CHECK: br %r14
   %val = load i32 *@gsrc32
@@ -51,7 +57,7 @@ define i64 @f4() {
 
 ; Check truncating 16-bit stores.
 define void @f5(i64 %val) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sthrl %r2, gdst16
 ; CHECK: br %r14
   %half = trunc i64 %val to i16
@@ -61,7 +67,7 @@ define void @f5(i64 %val) {
 
 ; Check truncating 32-bit stores.
 define void @f6(i64 %val) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: strl %r2, gdst32
 ; CHECK: br %r14
   %word = trunc i64 %val to i32
@@ -71,7 +77,7 @@ define void @f6(i64 %val) {
 
 ; Check plain loads and stores.
 define void @f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgrl %r0, gsrc64
 ; CHECK: stgrl %r0, gdst64
 ; CHECK: br %r14
@@ -79,3 +85,82 @@ define void @f7() {
   store i64 %val, i64 *@gdst64
   ret void
 }
+
+; Repeat f1 with an unaligned variable.
+define i64 @f8() {
+; CHECK-LABEL: f8:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
+; CHECK: lgh %r2, 0([[REG]])
+; CHECK: br %r14
+  %val = load i16 *@gsrc16u, align 1
+  %ext = sext i16 %val to i64
+  ret i64 %ext
+}
+
+; Repeat f2 with an unaligned variable.
+define i64 @f9() {
+; CHECK-LABEL: f9:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
+; CHECK: llgh %r2, 0([[REG]])
+; CHECK: br %r14
+  %val = load i16 *@gsrc16u, align 1
+  %ext = zext i16 %val to i64
+  ret i64 %ext
+}
+
+; Repeat f3 with an unaligned variable.
+define i64 @f10() {
+; CHECK-LABEL: f10:
+; CHECK: larl [[REG:%r[0-5]]], gsrc32u
+; CHECK: lgf %r2, 0([[REG]])
+; CHECK: br %r14
+  %val = load i32 *@gsrc32u, align 2
+  %ext = sext i32 %val to i64
+  ret i64 %ext
+}
+
+; Repeat f4 with an unaligned variable.
+define i64 @f11() {
+; CHECK-LABEL: f11:
+; CHECK: larl [[REG:%r[0-5]]], gsrc32u
+; CHECK: llgf %r2, 0([[REG]])
+; CHECK: br %r14
+  %val = load i32 *@gsrc32u, align 2
+  %ext = zext i32 %val to i64
+  ret i64 %ext
+}
+
+; Repeat f5 with an unaligned variable.
+define void @f12(i64 %val) {
+; CHECK-LABEL: f12:
+; CHECK: lgrl [[REG:%r[0-5]]], gdst16u@GOT
+; CHECK: sth %r2, 0([[REG]])
+; CHECK: br %r14
+  %half = trunc i64 %val to i16
+  store i16 %half, i16 *@gdst16u, align 1
+  ret void
+}
+
+; Repeat f6 with an unaligned variable.
+define void @f13(i64 %val) {
+; CHECK-LABEL: f13:
+; CHECK: larl [[REG:%r[0-5]]], gdst32u
+; CHECK: st %r2, 0([[REG]])
+; CHECK: br %r14
+  %word = trunc i64 %val to i32
+  store i32 %word, i32 *@gdst32u, align 2
+  ret void
+}
+
+; Repeat f7 with unaligned variables.
+define void @f14() {
+; CHECK-LABEL: f14:
+; CHECK: larl [[REG:%r[0-5]]], gsrc64u
+; CHECK: lg [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: larl [[REG:%r[0-5]]], gdst64u
+; CHECK: stg [[VAL]], 0([[REG]])
+; CHECK: br %r14
+  %val = load i64 *@gsrc64u, align 4
+  store i64 %val, i64 *@gdst64u, align 4
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-mul-01.ll b/test/CodeGen/SystemZ/int-mul-01.ll
index e1246e2156e3..d5f7155f8c48 100644
--- a/test/CodeGen/SystemZ/int-mul-01.ll
+++ b/test/CodeGen/SystemZ/int-mul-01.ll
@@ -5,7 +5,7 @@
 
 ; Check the low end of the MH range.
 define i32 @f1(i32 %lhs, i16 *%src) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mh %r2, 0(%r3)
 ; CHECK: br %r14
   %half = load i16 *%src
@@ -16,7 +16,7 @@ define i32 @f1(i32 %lhs, i16 *%src) {
 
 ; Check the high end of the aligned MH range.
 define i32 @f2(i32 %lhs, i16 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mh %r2, 4094(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2047
@@ -28,7 +28,7 @@ define i32 @f2(i32 %lhs, i16 *%src) {
 
 ; Check the next halfword up, which should use MHY instead of MH.
 define i32 @f3(i32 %lhs, i16 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mhy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 2048
@@ -40,7 +40,7 @@ define i32 @f3(i32 %lhs, i16 *%src) {
 
 ; Check the high end of the aligned MHY range.
 define i32 @f4(i32 %lhs, i16 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mhy %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 262143
@@ -53,7 +53,7 @@ define i32 @f4(i32 %lhs, i16 *%src) {
 ; Check the next halfword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f5(i32 %lhs, i16 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: agfi %r3, 524288
 ; CHECK: mh %r2, 0(%r3)
 ; CHECK: br %r14
@@ -66,7 +66,7 @@ define i32 @f5(i32 %lhs, i16 *%src) {
 
 ; Check the high end of the negative aligned MHY range.
 define i32 @f6(i32 %lhs, i16 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: mhy %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -1
@@ -78,7 +78,7 @@ define i32 @f6(i32 %lhs, i16 *%src) {
 
 ; Check the low end of the MHY range.
 define i32 @f7(i32 %lhs, i16 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: mhy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16 *%src, i64 -262144
@@ -91,7 +91,7 @@ define i32 @f7(i32 %lhs, i16 *%src) {
 ; Check the next halfword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f8(i32 %lhs, i16 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r3, -524290
 ; CHECK: mh %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +104,7 @@ define i32 @f8(i32 %lhs, i16 *%src) {
 
 ; Check that MH allows an index.
 define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: mh %r2, 4094({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -118,7 +118,7 @@ define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
 
 ; Check that MHY allows an index.
 define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: mhy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
diff --git a/test/CodeGen/SystemZ/int-mul-02.ll b/test/CodeGen/SystemZ/int-mul-02.ll
index d39c4dd0961c..d002a7f2f9bd 100644
--- a/test/CodeGen/SystemZ/int-mul-02.ll
+++ b/test/CodeGen/SystemZ/int-mul-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i32 @foo()
+
 ; Check MSR.
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: msr %r2, %r3
 ; CHECK: br %r14
   %mul = mul i32 %a, %b
@@ -13,7 +15,7 @@ define i32 @f1(i32 %a, i32 %b) {
 
 ; Check the low end of the MS range.
 define i32 @f2(i32 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ms %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -23,7 +25,7 @@ define i32 @f2(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned MS range.
 define i32 @f3(i32 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ms %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -34,7 +36,7 @@ define i32 @f3(i32 %a, i32 *%src) {
 
 ; Check the next word up, which should use MSY instead of MS.
 define i32 @f4(i32 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: msy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -45,7 +47,7 @@ define i32 @f4(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned MSY range.
 define i32 @f5(i32 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: msy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -57,7 +59,7 @@ define i32 @f5(i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: ms %r2, 0(%r3)
 ; CHECK: br %r14
@@ -69,7 +71,7 @@ define i32 @f6(i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned MSY range.
 define i32 @f7(i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: msy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -80,7 +82,7 @@ define i32 @f7(i32 %a, i32 *%src) {
 
 ; Check the low end of the MSY range.
 define i32 @f8(i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: msy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -92,7 +94,7 @@ define i32 @f8(i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: ms %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +106,7 @@ define i32 @f9(i32 %a, i32 *%src) {
 
 ; Check that MS allows an index.
 define i32 @f10(i32 %a, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ms %r2, 4092({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -117,7 +119,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
 
 ; Check that MSY allows an index.
 define i32 @f11(i32 %a, i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: msy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -127,3 +129,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
+
+; Check that multiplications of spilled values can use MS rather than MSR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: ms %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %mul0 = mul i32 %ret, %val0
+  %mul1 = mul i32 %mul0, %val1
+  %mul2 = mul i32 %mul1, %val2
+  %mul3 = mul i32 %mul2, %val3
+  %mul4 = mul i32 %mul3, %val4
+  %mul5 = mul i32 %mul4, %val5
+  %mul6 = mul i32 %mul5, %val6
+  %mul7 = mul i32 %mul6, %val7
+  %mul8 = mul i32 %mul7, %val8
+  %mul9 = mul i32 %mul8, %val9
+
+  ret i32 %mul9
+}
diff --git a/test/CodeGen/SystemZ/int-mul-03.ll b/test/CodeGen/SystemZ/int-mul-03.ll
index ab4ef9edd235..df18050d0242 100644
--- a/test/CodeGen/SystemZ/int-mul-03.ll
+++ b/test/CodeGen/SystemZ/int-mul-03.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check MSGFR.
 define i64 @f1(i64 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: msgfr %r2, %r3
 ; CHECK: br %r14
   %bext = sext i32 %b to i64
@@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) {
 
 ; Check MSGF with no displacement.
 define i64 @f2(i64 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: msgf %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) {
 
 ; Check the high end of the aligned MSGF range.
 define i64 @f3(i64 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: msgf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: msgf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned MSGF range.
 define i64 @f5(i64 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: msgf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) {
 
 ; Check the low end of the MSGF range.
 define i64 @f6(i64 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: msgf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524292
 ; CHECK: msgf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) {
 
 ; Check that MSGF allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: msgf %r2, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %mul = mul i64 %a, %bext
   ret i64 %mul
 }
+
+; Check that multiplications of spilled values can use MSGF rather than MSGFR.
+define i64 @f9(i32 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: msgf %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %frob0 = add i32 %val0, 100
+  %frob1 = add i32 %val1, 100
+  %frob2 = add i32 %val2, 100
+  %frob3 = add i32 %val3, 100
+  %frob4 = add i32 %val4, 100
+  %frob5 = add i32 %val5, 100
+  %frob6 = add i32 %val6, 100
+  %frob7 = add i32 %val7, 100
+  %frob8 = add i32 %val8, 100
+  %frob9 = add i32 %val9, 100
+
+  store i32 %frob0, i32 *%ptr0
+  store i32 %frob1, i32 *%ptr1
+  store i32 %frob2, i32 *%ptr2
+  store i32 %frob3, i32 *%ptr3
+  store i32 %frob4, i32 *%ptr4
+  store i32 %frob5, i32 *%ptr5
+  store i32 %frob6, i32 *%ptr6
+  store i32 %frob7, i32 *%ptr7
+  store i32 %frob8, i32 *%ptr8
+  store i32 %frob9, i32 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %ext0 = sext i32 %frob0 to i64
+  %ext1 = sext i32 %frob1 to i64
+  %ext2 = sext i32 %frob2 to i64
+  %ext3 = sext i32 %frob3 to i64
+  %ext4 = sext i32 %frob4 to i64
+  %ext5 = sext i32 %frob5 to i64
+  %ext6 = sext i32 %frob6 to i64
+  %ext7 = sext i32 %frob7 to i64
+  %ext8 = sext i32 %frob8 to i64
+  %ext9 = sext i32 %frob9 to i64
+
+  %mul0 = mul i64 %ret, %ext0
+  %mul1 = mul i64 %mul0, %ext1
+  %mul2 = mul i64 %mul1, %ext2
+  %mul3 = mul i64 %mul2, %ext3
+  %mul4 = mul i64 %mul3, %ext4
+  %mul5 = mul i64 %mul4, %ext5
+  %mul6 = mul i64 %mul5, %ext6
+  %mul7 = mul i64 %mul6, %ext7
+  %mul8 = mul i64 %mul7, %ext8
+  %mul9 = mul i64 %mul8, %ext9
+
+  ret i64 %mul9
+}
diff --git a/test/CodeGen/SystemZ/int-mul-04.ll b/test/CodeGen/SystemZ/int-mul-04.ll
index 94c263978341..183a9a748c37 100644
--- a/test/CodeGen/SystemZ/int-mul-04.ll
+++ b/test/CodeGen/SystemZ/int-mul-04.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check MSGR.
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: msgr %r2, %r3
 ; CHECK: br %r14
   %mul = mul i64 %a, %b
@@ -13,7 +15,7 @@ define i64 @f1(i64 %a, i64 %b) {
 
 ; Check MSG with no displacement.
 define i64 @f2(i64 %a, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: msg %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i64 *%src
@@ -23,7 +25,7 @@ define i64 @f2(i64 %a, i64 *%src) {
 
 ; Check the high end of the aligned MSG range.
 define i64 @f3(i64 %a, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: msg %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -35,7 +37,7 @@ define i64 @f3(i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: msg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -47,7 +49,7 @@ define i64 @f4(i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned MSG range.
 define i64 @f5(i64 %a, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: msg %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -58,7 +60,7 @@ define i64 @f5(i64 %a, i64 *%src) {
 
 ; Check the low end of the MSG range.
 define i64 @f6(i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: msg %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -70,7 +72,7 @@ define i64 @f6(i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: msg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -82,7 +84,7 @@ define i64 @f7(i64 %a, i64 *%src) {
 
 ; Check that MSG allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: msg %r2, 524280({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -92,3 +94,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
+
+; Check that multiplications of spilled values can use MSG rather than MSGR.
+define i64 @f9(i64 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: msg %r2, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %mul0 = mul i64 %ret, %val0
+  %mul1 = mul i64 %mul0, %val1
+  %mul2 = mul i64 %mul1, %val2
+  %mul3 = mul i64 %mul2, %val3
+  %mul4 = mul i64 %mul3, %val4
+  %mul5 = mul i64 %mul4, %val5
+  %mul6 = mul i64 %mul5, %val6
+  %mul7 = mul i64 %mul6, %val7
+  %mul8 = mul i64 %mul7, %val8
+  %mul9 = mul i64 %mul8, %val9
+
+  ret i64 %mul9
+}
diff --git a/test/CodeGen/SystemZ/int-mul-05.ll b/test/CodeGen/SystemZ/int-mul-05.ll
index 5e4031b5d77d..93f140d84504 100644
--- a/test/CodeGen/SystemZ/int-mul-05.ll
+++ b/test/CodeGen/SystemZ/int-mul-05.ll
@@ -4,7 +4,7 @@
 
 ; Check multiplication by 2, which should use shifts.
 define i32 @f1(i32 %a, i32 *%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sll %r2, 1
 ; CHECK: br %r14
   %mul = mul i32 %a, 2
@@ -13,7 +13,7 @@ define i32 @f1(i32 %a, i32 *%dest) {
 
 ; Check multiplication by 3.
 define i32 @f2(i32 %a, i32 *%dest) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mhi %r2, 3
 ; CHECK: br %r14
   %mul = mul i32 %a, 3
@@ -22,7 +22,7 @@ define i32 @f2(i32 %a, i32 *%dest) {
 
 ; Check the high end of the MHI range.
 define i32 @f3(i32 %a, i32 *%dest) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mhi %r2, 32767
 ; CHECK: br %r14
   %mul = mul i32 %a, 32767
@@ -31,7 +31,7 @@ define i32 @f3(i32 %a, i32 *%dest) {
 
 ; Check the next value up, which should use shifts.
 define i32 @f4(i32 %a, i32 *%dest) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sll %r2, 15
 ; CHECK: br %r14
   %mul = mul i32 %a, 32768
@@ -40,7 +40,7 @@ define i32 @f4(i32 %a, i32 *%dest) {
 
 ; Check the next value up again, which can use MSFI.
 define i32 @f5(i32 %a, i32 *%dest) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: msfi %r2, 32769
 ; CHECK: br %r14
   %mul = mul i32 %a, 32769
@@ -49,7 +49,7 @@ define i32 @f5(i32 %a, i32 *%dest) {
 
 ; Check the high end of the MSFI range.
 define i32 @f6(i32 %a, i32 *%dest) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: msfi %r2, 2147483647
 ; CHECK: br %r14
   %mul = mul i32 %a, 2147483647
@@ -58,7 +58,7 @@ define i32 @f6(i32 %a, i32 *%dest) {
 
 ; Check the next value up, which should use shifts.
 define i32 @f7(i32 %a, i32 *%dest) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sll %r2, 31
 ; CHECK: br %r14
   %mul = mul i32 %a, 2147483648
@@ -67,7 +67,7 @@ define i32 @f7(i32 %a, i32 *%dest) {
 
 ; Check the next value up again, which is treated as a negative value.
 define i32 @f8(i32 %a, i32 *%dest) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: msfi %r2, -2147483647
 ; CHECK: br %r14
   %mul = mul i32 %a, 2147483649
@@ -76,7 +76,7 @@ define i32 @f8(i32 %a, i32 *%dest) {
 
 ; Check multiplication by -1, which is a negation.
 define i32 @f9(i32 %a, i32 *%dest) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lcr %r2, %r2
 ; CHECK: br %r14
   %mul = mul i32 %a, -1
@@ -85,7 +85,7 @@ define i32 @f9(i32 %a, i32 *%dest) {
 
 ; Check multiplication by -2, which should use shifts.
 define i32 @f10(i32 %a, i32 *%dest) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: sll %r2, 1
 ; CHECK: lcr %r2, %r2
 ; CHECK: br %r14
@@ -95,7 +95,7 @@ define i32 @f10(i32 %a, i32 *%dest) {
 
 ; Check multiplication by -3.
 define i32 @f11(i32 %a, i32 *%dest) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: mhi %r2, -3
 ; CHECK: br %r14
   %mul = mul i32 %a, -3
@@ -104,7 +104,7 @@ define i32 @f11(i32 %a, i32 *%dest) {
 
 ; Check the lowest useful MHI value.
 define i32 @f12(i32 %a, i32 *%dest) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: mhi %r2, -32767
 ; CHECK: br %r14
   %mul = mul i32 %a, -32767
@@ -113,7 +113,7 @@ define i32 @f12(i32 %a, i32 *%dest) {
 
 ; Check the next value down, which should use shifts.
 define i32 @f13(i32 %a, i32 *%dest) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: sll %r2, 15
 ; CHECK: lcr %r2, %r2
 ; CHECK: br %r14
@@ -123,7 +123,7 @@ define i32 @f13(i32 %a, i32 *%dest) {
 
 ; Check the next value down again, which can use MSFI.
 define i32 @f14(i32 %a, i32 *%dest) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: msfi %r2, -32769
 ; CHECK: br %r14
   %mul = mul i32 %a, -32769
@@ -132,7 +132,7 @@ define i32 @f14(i32 %a, i32 *%dest) {
 
 ; Check the lowest useful MSFI value.
 define i32 @f15(i32 %a, i32 *%dest) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: msfi %r2, -2147483647
 ; CHECK: br %r14
   %mul = mul i32 %a, -2147483647
@@ -141,7 +141,7 @@ define i32 @f15(i32 %a, i32 *%dest) {
 
 ; Check the next value down, which should use shifts.
 define i32 @f16(i32 %a, i32 *%dest) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: sll %r2, 31
 ; CHECK-NOT: lcr
 ; CHECK: br %r14
@@ -151,7 +151,7 @@ define i32 @f16(i32 %a, i32 *%dest) {
 
 ; Check the next value down again, which is treated as a positive value.
 define i32 @f17(i32 %a, i32 *%dest) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: msfi %r2, 2147483647
 ; CHECK: br %r14
   %mul = mul i32 %a, -2147483649
diff --git a/test/CodeGen/SystemZ/int-mul-06.ll b/test/CodeGen/SystemZ/int-mul-06.ll
index a3546059c023..ae9f9c6e4db5 100644
--- a/test/CodeGen/SystemZ/int-mul-06.ll
+++ b/test/CodeGen/SystemZ/int-mul-06.ll
@@ -4,7 +4,7 @@
 
 ; Check multiplication by 2, which should use shifts.
 define i64 @f1(i64 %a, i64 *%dest) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg %r2, %r2, 1
 ; CHECK: br %r14
   %mul = mul i64 %a, 2
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a, i64 *%dest) {
 
 ; Check multiplication by 3.
 define i64 @f2(i64 %a, i64 *%dest) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mghi %r2, 3
 ; CHECK: br %r14
   %mul = mul i64 %a, 3
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a, i64 *%dest) {
 
 ; Check the high end of the MGHI range.
 define i64 @f3(i64 %a, i64 *%dest) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mghi %r2, 32767
 ; CHECK: br %r14
   %mul = mul i64 %a, 32767
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a, i64 *%dest) {
 
 ; Check the next value up, which should use shifts.
 define i64 @f4(i64 %a, i64 *%dest) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sllg %r2, %r2, 15
 ; CHECK: br %r14
   %mul = mul i64 %a, 32768
@@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 *%dest) {
 
 ; Check the next value up again, which can use MSGFI.
 define i64 @f5(i64 %a, i64 *%dest) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: msgfi %r2, 32769
 ; CHECK: br %r14
   %mul = mul i64 %a, 32769
@@ -49,7 +49,7 @@ define i64 @f5(i64 %a, i64 *%dest) {
 
 ; Check the high end of the MSGFI range.
 define i64 @f6(i64 %a, i64 *%dest) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: msgfi %r2, 2147483647
 ; CHECK: br %r14
   %mul = mul i64 %a, 2147483647
@@ -58,7 +58,7 @@ define i64 @f6(i64 %a, i64 *%dest) {
 
 ; Check the next value up, which should use shifts.
 define i64 @f7(i64 %a, i64 *%dest) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sllg %r2, %r2, 31
 ; CHECK: br %r14
   %mul = mul i64 %a, 2147483648
@@ -67,7 +67,7 @@ define i64 @f7(i64 %a, i64 *%dest) {
 
 ; Check the next value up again, which cannot use a constant multiplicatoin.
 define i64 @f8(i64 %a, i64 *%dest) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK-NOT: msgfi
 ; CHECK: br %r14
   %mul = mul i64 %a, 2147483649
@@ -76,7 +76,7 @@ define i64 @f8(i64 %a, i64 *%dest) {
 
 ; Check multiplication by -1, which is a negation.
 define i64 @f9(i64 %a, i64 *%dest) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: lcgr {{%r[0-5]}}, %r2
 ; CHECK: br %r14
   %mul = mul i64 %a, -1
@@ -85,7 +85,7 @@ define i64 @f9(i64 %a, i64 *%dest) {
 
 ; Check multiplication by -2, which should use shifts.
 define i64 @f10(i64 %a, i64 *%dest) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 1
 ; CHECK: lcgr %r2, [[SHIFTED]]
 ; CHECK: br %r14
@@ -95,7 +95,7 @@ define i64 @f10(i64 %a, i64 *%dest) {
 
 ; Check multiplication by -3.
 define i64 @f11(i64 %a, i64 *%dest) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: mghi %r2, -3
 ; CHECK: br %r14
   %mul = mul i64 %a, -3
@@ -104,7 +104,7 @@ define i64 @f11(i64 %a, i64 *%dest) {
 
 ; Check the lowest useful MGHI value.
 define i64 @f12(i64 %a, i64 *%dest) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: mghi %r2, -32767
 ; CHECK: br %r14
   %mul = mul i64 %a, -32767
@@ -113,7 +113,7 @@ define i64 @f12(i64 %a, i64 *%dest) {
 
 ; Check the next value down, which should use shifts.
 define i64 @f13(i64 %a, i64 *%dest) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 15
 ; CHECK: lcgr %r2, [[SHIFTED]]
 ; CHECK: br %r14
@@ -123,7 +123,7 @@ define i64 @f13(i64 %a, i64 *%dest) {
 
 ; Check the next value down again, which can use MSGFI.
 define i64 @f14(i64 %a, i64 *%dest) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: msgfi %r2, -32769
 ; CHECK: br %r14
   %mul = mul i64 %a, -32769
@@ -132,7 +132,7 @@ define i64 @f14(i64 %a, i64 *%dest) {
 
 ; Check the lowest useful MSGFI value.
 define i64 @f15(i64 %a, i64 *%dest) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: msgfi %r2, -2147483647
 ; CHECK: br %r14
   %mul = mul i64 %a, -2147483647
@@ -141,7 +141,7 @@ define i64 @f15(i64 %a, i64 *%dest) {
 
 ; Check the next value down, which should use shifts.
 define i64 @f16(i64 %a, i64 *%dest) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 31
 ; CHECK: lcgr %r2, [[SHIFTED]]
 ; CHECK: br %r14
@@ -151,7 +151,7 @@ define i64 @f16(i64 %a, i64 *%dest) {
 
 ; Check the next value down again, which cannot use constant multiplication
 define i64 @f17(i64 %a, i64 *%dest) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK-NOT: msgfi
 ; CHECK: br %r14
   %mul = mul i64 %a, -2147483649
diff --git a/test/CodeGen/SystemZ/int-mul-07.ll b/test/CodeGen/SystemZ/int-mul-07.ll
index 2459cc359930..874f43dd398f 100644
--- a/test/CodeGen/SystemZ/int-mul-07.ll
+++ b/test/CodeGen/SystemZ/int-mul-07.ll
@@ -7,7 +7,7 @@
 
 ; Check zero-extended multiplication in which only the high part is used.
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: msgr
 ; CHECK: br %r14
   %ax = zext i32 %a to i64
@@ -20,7 +20,7 @@ define i32 @f1(i32 %a, i32 %b) {
 
 ; Check sign-extended multiplication in which only the high part is used.
 define i32 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: msgfr
 ; CHECK: br %r14
   %ax = sext i32 %a to i64
@@ -34,7 +34,7 @@ define i32 @f2(i32 %a, i32 %b) {
 ; Check zero-extended multiplication in which the result is split into
 ; high and low halves.
 define i32 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: msgr
 ; CHECK: br %r14
   %ax = zext i32 %a to i64
@@ -50,7 +50,7 @@ define i32 @f3(i32 %a, i32 %b) {
 ; Check sign-extended multiplication in which the result is split into
 ; high and low halves.
 define i32 @f4(i32 %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: msgfr
 ; CHECK: br %r14
   %ax = sext i32 %a to i64
diff --git a/test/CodeGen/SystemZ/int-mul-08.ll b/test/CodeGen/SystemZ/int-mul-08.ll
index 09ebe7a7b489..90b26a4f3dde 100644
--- a/test/CodeGen/SystemZ/int-mul-08.ll
+++ b/test/CodeGen/SystemZ/int-mul-08.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check zero-extended multiplication in which only the high part is used.
 define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: mlgr %r2, %r4
 ; CHECK: br %r14
@@ -19,10 +21,14 @@ define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
 ; Check sign-extended multiplication in which only the high part is used.
 ; This needs a rather convoluted sequence.
 define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
-; CHECK: f2:
-; CHECK: mlgr
-; CHECK: agr
-; CHECK: agr
+; CHECK-LABEL: f2:
+; CHECK-DAG: srag [[RES1:%r[0-5]]], %r3, 63
+; CHECK-DAG: srag [[RES2:%r[0-5]]], %r4, 63
+; CHECK-DAG: ngr [[RES1]], %r4
+; CHECK-DAG: ngr [[RES2]], %r3
+; CHECK-DAG: agr [[RES2]], [[RES1]]
+; CHECK-DAG: mlgr %r2, %r4
+; CHECK: sgr %r2, [[RES2]]
 ; CHECK: br %r14
   %ax = sext i64 %a to i128
   %bx = sext i64 %b to i128
@@ -35,7 +41,7 @@ define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
 ; Check zero-extended multiplication in which only part of the high half
 ; is used.
 define i64 @f3(i64 %dummy, i64 %a, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: mlgr %r2, %r4
 ; CHECK: srlg %r2, %r2, 3
@@ -51,7 +57,7 @@ define i64 @f3(i64 %dummy, i64 %a, i64 %b) {
 ; Check zero-extended multiplication in which the result is split into
 ; high and low halves.
 define i64 @f4(i64 %dummy, i64 %a, i64 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: mlgr %r2, %r4
 ; CHECK: ogr %r2, %r3
@@ -68,7 +74,7 @@ define i64 @f4(i64 %dummy, i64 %a, i64 %b) {
 
 ; Check division by a constant, which should use multiplication instead.
 define i64 @f5(i64 %dummy, i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mlgr %r2,
 ; CHECK: srlg %r2, %r2,
 ; CHECK: br %r14
@@ -78,7 +84,7 @@ define i64 @f5(i64 %dummy, i64 %a) {
 
 ; Check MLG with no displacement.
 define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: mlg %r2, 0(%r4)
 ; CHECK: br %r14
@@ -93,7 +99,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the high end of the aligned MLG range.
 define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: mlg %r2, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -109,7 +115,7 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
 ; Check the next doubleword up, which requires separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r4, 524288
 ; CHECK: mlg %r2, 0(%r4)
 ; CHECK: br %r14
@@ -125,7 +131,7 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned MLG range.
 define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: mlg %r2, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -140,7 +146,7 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
 
 ; Check the low end of the MLG range.
 define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: mlg %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -156,7 +162,7 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f11(i64 *%dest, i64 %a, i64 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r4, -524296
 ; CHECK: mlg %r2, 0(%r4)
 ; CHECK: br %r14
@@ -172,7 +178,7 @@ define i64 @f11(i64 *%dest, i64 %a, i64 *%src) {
 
 ; Check that MLG allows an index.
 define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: mlg %r2, 524287(%r5,%r4)
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -186,3 +192,77 @@ define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) {
   %high = trunc i128 %highx to i64
   ret i64 %high
 }
+
+; Check that multiplications of spilled values can use MLG rather than MLGR.
+define i64 @f13(i64 *%ptr0) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mlg {{%r[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %retx = zext i64 %ret to i128
+  %val0x = zext i64 %val0 to i128
+  %mul0d = mul i128 %retx, %val0x
+  %mul0x = lshr i128 %mul0d, 64
+
+  %val1x = zext i64 %val1 to i128
+  %mul1d = mul i128 %mul0x, %val1x
+  %mul1x = lshr i128 %mul1d, 64
+
+  %val2x = zext i64 %val2 to i128
+  %mul2d = mul i128 %mul1x, %val2x
+  %mul2x = lshr i128 %mul2d, 64
+
+  %val3x = zext i64 %val3 to i128
+  %mul3d = mul i128 %mul2x, %val3x
+  %mul3x = lshr i128 %mul3d, 64
+
+  %val4x = zext i64 %val4 to i128
+  %mul4d = mul i128 %mul3x, %val4x
+  %mul4x = lshr i128 %mul4d, 64
+
+  %val5x = zext i64 %val5 to i128
+  %mul5d = mul i128 %mul4x, %val5x
+  %mul5x = lshr i128 %mul5d, 64
+
+  %val6x = zext i64 %val6 to i128
+  %mul6d = mul i128 %mul5x, %val6x
+  %mul6x = lshr i128 %mul6d, 64
+
+  %val7x = zext i64 %val7 to i128
+  %mul7d = mul i128 %mul6x, %val7x
+  %mul7x = lshr i128 %mul7d, 64
+
+  %val8x = zext i64 %val8 to i128
+  %mul8d = mul i128 %mul7x, %val8x
+  %mul8x = lshr i128 %mul8d, 64
+
+  %val9x = zext i64 %val9 to i128
+  %mul9d = mul i128 %mul8x, %val9x
+  %mul9x = lshr i128 %mul9d, 64
+
+  %mul9 = trunc i128 %mul9x to i64
+  ret i64 %mul9
+}
diff --git a/test/CodeGen/SystemZ/int-neg-01.ll b/test/CodeGen/SystemZ/int-neg-01.ll
index 6114f4efbc9a..a342fa792201 100644
--- a/test/CodeGen/SystemZ/int-neg-01.ll
+++ b/test/CodeGen/SystemZ/int-neg-01.ll
@@ -4,7 +4,7 @@
 
 ; Test i32->i32 negation.
 define i32 @f1(i32 %val) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lcr %r2, %r2
 ; CHECK: br %r14
   %neg = sub i32 0, %val
@@ -13,7 +13,7 @@ define i32 @f1(i32 %val) {
 
 ; Test i32->i64 negation.
 define i64 @f2(i32 %val) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lcgfr %r2, %r2
 ; CHECK: br %r14
   %ext = sext i32 %val to i64
@@ -23,7 +23,7 @@ define i64 @f2(i32 %val) {
 
 ; Test i32->i64 negation that uses an "in-register" form of sign extension.
 define i64 @f3(i64 %val) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lcgfr %r2, %r2
 ; CHECK: br %r14
   %trunc = trunc i64 %val to i32
@@ -34,7 +34,7 @@ define i64 @f3(i64 %val) {
 
 ; Test i64 negation.
 define i64 @f4(i64 %val) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lcgr %r2, %r2
 ; CHECK: br %r14
   %neg = sub i64 0, %val
diff --git a/test/CodeGen/SystemZ/int-neg-02.ll b/test/CodeGen/SystemZ/int-neg-02.ll
new file mode 100644
index 000000000000..e26194c162d4
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-neg-02.ll
@@ -0,0 +1,91 @@
+; Test negative integer absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i32->i32 negative absolute using slt.
+define i32 @f1(i32 %val) {
+; CHECK-LABEL: f1:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %neg, i32 %val
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i32 negative absolute using sle.
+define i32 @f2(i32 %val) {
+; CHECK-LABEL: f2:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sle i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %neg, i32 %val
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i32 negative absolute using sgt.
+define i32 @f3(i32 %val) {
+; CHECK-LABEL: f3:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sgt i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %val, i32 %neg
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i32 negative absolute using sge.
+define i32 @f4(i32 %val) {
+; CHECK-LABEL: f4:
+; CHECK: lnr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp sge i32 %val, 0
+  %neg = sub i32 0, %val
+  %abs = select i1 %cmp, i32 %val, i32 %neg
+  %res = sub i32 0, %abs
+  ret i32 %res
+}
+
+; Test i32->i64 negative absolute.
+define i64 @f5(i32 %val) {
+; CHECK-LABEL: f5:
+; CHECK: lngfr %r2, %r2
+; CHECK: br %r14
+  %ext = sext i32 %val to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %abs = select i1 %cmp, i64 %neg, i64 %ext
+  %res = sub i64 0, %abs
+  ret i64 %res
+}
+
+; Test i32->i64 negative absolute that uses an "in-register" form of
+; sign extension.
+define i64 @f6(i64 %val) {
+; CHECK-LABEL: f6:
+; CHECK: lngfr %r2, %r2
+; CHECK: br %r14
+  %trunc = trunc i64 %val to i32
+  %ext = sext i32 %trunc to i64
+  %cmp = icmp slt i64 %ext, 0
+  %neg = sub i64 0, %ext
+  %abs = select i1 %cmp, i64 %neg, i64 %ext
+  %res = sub i64 0, %abs
+  ret i64 %res
+}
+
+; Test i64 negative absolute.
+define i64 @f7(i64 %val) {
+; CHECK-LABEL: f7:
+; CHECK: lngr %r2, %r2
+; CHECK: br %r14
+  %cmp = icmp slt i64 %val, 0
+  %neg = sub i64 0, %val
+  %abs = select i1 %cmp, i64 %neg, i64 %val
+  %res = sub i64 0, %abs
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-sub-01.ll b/test/CodeGen/SystemZ/int-sub-01.ll
index 9a738148f7ef..8d1e56ddcaba 100644
--- a/test/CodeGen/SystemZ/int-sub-01.ll
+++ b/test/CodeGen/SystemZ/int-sub-01.ll
@@ -1,10 +1,13 @@
 ; Test 32-bit subtraction.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @foo()
 
 ; Check SR.
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sr %r2, %r3
 ; CHECK: br %r14
   %sub = sub i32 %a, %b
@@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) {
 
 ; Check the low end of the S range.
 define i32 @f2(i32 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: s %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned S range.
 define i32 @f3(i32 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: s %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) {
 
 ; Check the next word up, which should use SY instead of S.
 define i32 @f4(i32 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned SY range.
 define i32 @f5(i32 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: s %r2, 0(%r3)
 ; CHECK: br %r14
@@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned SY range.
 define i32 @f7(i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) {
 
 ; Check the low end of the SY range.
 define i32 @f8(i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: s %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) {
 
 ; Check that S allows an index.
 define i32 @f10(i32 %a, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: s %r2, 4092({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
 
 ; Check that SY allows an index.
 define i32 @f11(i32 %a, i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: sy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
+
+; Check that subtractions of spilled values can use S rather than SR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: s %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %sub0 = sub i32 %ret, %val0
+  %sub1 = sub i32 %sub0, %val1
+  %sub2 = sub i32 %sub1, %val2
+  %sub3 = sub i32 %sub2, %val3
+  %sub4 = sub i32 %sub3, %val4
+  %sub5 = sub i32 %sub4, %val5
+  %sub6 = sub i32 %sub5, %val6
+  %sub7 = sub i32 %sub6, %val7
+  %sub8 = sub i32 %sub7, %val8
+  %sub9 = sub i32 %sub8, %val9
+
+  ret i32 %sub9
+}
diff --git a/test/CodeGen/SystemZ/int-sub-02.ll b/test/CodeGen/SystemZ/int-sub-02.ll
index 5150a960a554..a1c5ec50ee9c 100644
--- a/test/CodeGen/SystemZ/int-sub-02.ll
+++ b/test/CodeGen/SystemZ/int-sub-02.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check SGFR.
 define i64 @f1(i64 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sgfr %r2, %r3
 ; CHECK: br %r14
   %bext = sext i32 %b to i64
@@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) {
 
 ; Check SGF with no displacement.
 define i64 @f2(i64 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sgf %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) {
 
 ; Check the high end of the aligned SGF range.
 define i64 @f3(i64 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sgf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: sgf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned SGF range.
 define i64 @f5(i64 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sgf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) {
 
 ; Check the low end of the SGF range.
 define i64 @f6(i64 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sgf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524292
 ; CHECK: sgf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) {
 
 ; Check that SGF allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sgf %r2, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %sub = sub i64 %a, %bext
   ret i64 %sub
 }
+
+; Check that subtractions of spilled values can use SGF rather than SGFR.
+define i64 @f9(i32 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: sgf %r2, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %frob0 = add i32 %val0, 100
+  %frob1 = add i32 %val1, 100
+  %frob2 = add i32 %val2, 100
+  %frob3 = add i32 %val3, 100
+  %frob4 = add i32 %val4, 100
+  %frob5 = add i32 %val5, 100
+  %frob6 = add i32 %val6, 100
+  %frob7 = add i32 %val7, 100
+  %frob8 = add i32 %val8, 100
+  %frob9 = add i32 %val9, 100
+
+  store i32 %frob0, i32 *%ptr0
+  store i32 %frob1, i32 *%ptr1
+  store i32 %frob2, i32 *%ptr2
+  store i32 %frob3, i32 *%ptr3
+  store i32 %frob4, i32 *%ptr4
+  store i32 %frob5, i32 *%ptr5
+  store i32 %frob6, i32 *%ptr6
+  store i32 %frob7, i32 *%ptr7
+  store i32 %frob8, i32 *%ptr8
+  store i32 %frob9, i32 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %ext0 = sext i32 %frob0 to i64
+  %ext1 = sext i32 %frob1 to i64
+  %ext2 = sext i32 %frob2 to i64
+  %ext3 = sext i32 %frob3 to i64
+  %ext4 = sext i32 %frob4 to i64
+  %ext5 = sext i32 %frob5 to i64
+  %ext6 = sext i32 %frob6 to i64
+  %ext7 = sext i32 %frob7 to i64
+  %ext8 = sext i32 %frob8 to i64
+  %ext9 = sext i32 %frob9 to i64
+
+  %sub0 = sub i64 %ret, %ext0
+  %sub1 = sub i64 %sub0, %ext1
+  %sub2 = sub i64 %sub1, %ext2
+  %sub3 = sub i64 %sub2, %ext3
+  %sub4 = sub i64 %sub3, %ext4
+  %sub5 = sub i64 %sub4, %ext5
+  %sub6 = sub i64 %sub5, %ext6
+  %sub7 = sub i64 %sub6, %ext7
+  %sub8 = sub i64 %sub7, %ext8
+  %sub9 = sub i64 %sub8, %ext9
+
+  ret i64 %sub9
+}
diff --git a/test/CodeGen/SystemZ/int-sub-03.ll b/test/CodeGen/SystemZ/int-sub-03.ll
index 73571b3591f5..44edd84bda4f 100644
--- a/test/CodeGen/SystemZ/int-sub-03.ll
+++ b/test/CodeGen/SystemZ/int-sub-03.ll
@@ -2,9 +2,11 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i64 @foo()
+
 ; Check SLGFR.
 define i64 @f1(i64 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: slgfr %r2, %r3
 ; CHECK: br %r14
   %bext = zext i32 %b to i64
@@ -14,7 +16,7 @@ define i64 @f1(i64 %a, i32 %b) {
 
 ; Check SLGF with no displacement.
 define i64 @f2(i64 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: slgf %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -25,7 +27,7 @@ define i64 @f2(i64 %a, i32 *%src) {
 
 ; Check the high end of the aligned SLGF range.
 define i64 @f3(i64 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: slgf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -38,7 +40,7 @@ define i64 @f3(i64 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: slgf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -51,7 +53,7 @@ define i64 @f4(i64 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned SLGF range.
 define i64 @f5(i64 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: slgf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -63,7 +65,7 @@ define i64 @f5(i64 %a, i32 *%src) {
 
 ; Check the low end of the SLGF range.
 define i64 @f6(i64 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: slgf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -76,7 +78,7 @@ define i64 @f6(i64 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524292
 ; CHECK: slgf %r2, 0(%r3)
 ; CHECK: br %r14
@@ -89,7 +91,7 @@ define i64 @f7(i64 %a, i32 *%src) {
 
 ; Check that SLGF allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: slgf %r2, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -100,3 +102,79 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %sub = sub i64 %a, %bext
   ret i64 %sub
 }
+
+; Check that subtractions of spilled values can use SLGF rather than SLGFR.
+define i64 @f9(i32 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: slgf %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %frob0 = add i32 %val0, 100
+  %frob1 = add i32 %val1, 100
+  %frob2 = add i32 %val2, 100
+  %frob3 = add i32 %val3, 100
+  %frob4 = add i32 %val4, 100
+  %frob5 = add i32 %val5, 100
+  %frob6 = add i32 %val6, 100
+  %frob7 = add i32 %val7, 100
+  %frob8 = add i32 %val8, 100
+  %frob9 = add i32 %val9, 100
+
+  store i32 %frob0, i32 *%ptr0
+  store i32 %frob1, i32 *%ptr1
+  store i32 %frob2, i32 *%ptr2
+  store i32 %frob3, i32 *%ptr3
+  store i32 %frob4, i32 *%ptr4
+  store i32 %frob5, i32 *%ptr5
+  store i32 %frob6, i32 *%ptr6
+  store i32 %frob7, i32 *%ptr7
+  store i32 %frob8, i32 *%ptr8
+  store i32 %frob9, i32 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %ext0 = zext i32 %frob0 to i64
+  %ext1 = zext i32 %frob1 to i64
+  %ext2 = zext i32 %frob2 to i64
+  %ext3 = zext i32 %frob3 to i64
+  %ext4 = zext i32 %frob4 to i64
+  %ext5 = zext i32 %frob5 to i64
+  %ext6 = zext i32 %frob6 to i64
+  %ext7 = zext i32 %frob7 to i64
+  %ext8 = zext i32 %frob8 to i64
+  %ext9 = zext i32 %frob9 to i64
+
+  %sub0 = sub i64 %ret, %ext0
+  %sub1 = sub i64 %sub0, %ext1
+  %sub2 = sub i64 %sub1, %ext2
+  %sub3 = sub i64 %sub2, %ext3
+  %sub4 = sub i64 %sub3, %ext4
+  %sub5 = sub i64 %sub4, %ext5
+  %sub6 = sub i64 %sub5, %ext6
+  %sub7 = sub i64 %sub6, %ext7
+  %sub8 = sub i64 %sub7, %ext8
+  %sub9 = sub i64 %sub8, %ext9
+
+  ret i64 %sub9
+}
diff --git a/test/CodeGen/SystemZ/int-sub-04.ll b/test/CodeGen/SystemZ/int-sub-04.ll
index 545d34216809..85104536c5d7 100644
--- a/test/CodeGen/SystemZ/int-sub-04.ll
+++ b/test/CodeGen/SystemZ/int-sub-04.ll
@@ -1,10 +1,13 @@
 ; Test 64-bit subtraction in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i64 @foo()
 
 ; Check SGR.
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sgr %r2, %r3
 ; CHECK: br %r14
   %sub = sub i64 %a, %b
@@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) {
 
 ; Check SG with no displacement.
 define i64 @f2(i64 %a, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sg %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i64 *%src
@@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) {
 
 ; Check the high end of the aligned SG range.
 define i64 @f3(i64 %a, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sg %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: sg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned SG range.
 define i64 @f5(i64 %a, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sg %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) {
 
 ; Check the low end of the SG range.
 define i64 @f6(i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sg %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: sg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) {
 
 ; Check that SG allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sg %r2, 524280({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
+
+; Check that subtractions of spilled values can use SG rather than SGR.
+define i64 @f9(i64 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: sg %r2, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %sub0 = sub i64 %ret, %val0
+  %sub1 = sub i64 %sub0, %val1
+  %sub2 = sub i64 %sub1, %val2
+  %sub3 = sub i64 %sub2, %val3
+  %sub4 = sub i64 %sub3, %val4
+  %sub5 = sub i64 %sub4, %val5
+  %sub6 = sub i64 %sub5, %val6
+  %sub7 = sub i64 %sub6, %val7
+  %sub8 = sub i64 %sub7, %val8
+  %sub9 = sub i64 %sub8, %val9
+
+  ret i64 %sub9
+}
diff --git a/test/CodeGen/SystemZ/int-sub-05.ll b/test/CodeGen/SystemZ/int-sub-05.ll
index 1475b244f678..85ea14cd15f3 100644
--- a/test/CodeGen/SystemZ/int-sub-05.ll
+++ b/test/CodeGen/SystemZ/int-sub-05.ll
@@ -1,10 +1,13 @@
-; Test 128-bit addition in which the second operand is variable.
+; Test 128-bit subtraction in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i128 *@foo()
 
 ; Test register addition.
 define void @f1(i128 *%ptr, i64 %high, i64 %low) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: slgr {{%r[0-5]}}, %r4
 ; CHECK: slbgr {{%r[0-5]}}, %r3
 ; CHECK: br %r14
@@ -20,7 +23,7 @@ define void @f1(i128 *%ptr, i64 %high, i64 %low) {
 
 ; Test memory addition with no offset.
 define void @f2(i64 %addr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: slg {{%r[0-5]}}, 8(%r2)
 ; CHECK: slbg {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
@@ -35,7 +38,7 @@ define void @f2(i64 %addr) {
 
 ; Test the highest aligned offset that is in range of both SLG and SLBG.
 define void @f3(i64 %base) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: slg {{%r[0-5]}}, 524280(%r2)
 ; CHECK: slbg {{%r[0-5]}}, 524272(%r2)
 ; CHECK: br %r14
@@ -51,7 +54,7 @@ define void @f3(i64 %base) {
 
 ; Test the next doubleword up, which requires separate address logic for SLG.
 define void @f4(i64 %base) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lgr [[BASE:%r[1-5]]], %r2
 ; CHECK: agfi [[BASE]], 524288
 ; CHECK: slg {{%r[0-5]}}, 0([[BASE]])
@@ -71,7 +74,7 @@ define void @f4(i64 %base) {
 ; both instructions.  It would be better to create an anchor at 524288
 ; that both instructions can use, but that isn't implemented yet.
 define void @f5(i64 %base) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: slg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: br %r14
@@ -87,7 +90,7 @@ define void @f5(i64 %base) {
 
 ; Test the lowest displacement that is in range of both SLG and SLBG.
 define void @f6(i64 %base) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: slg {{%r[0-5]}}, -524280(%r2)
 ; CHECK: slbg {{%r[0-5]}}, -524288(%r2)
 ; CHECK: br %r14
@@ -103,7 +106,7 @@ define void @f6(i64 %base) {
 
 ; Test the next doubleword down, which is out of range of the SLBG.
 define void @f7(i64 %base) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: slg {{%r[0-5]}}, -524288(%r2)
 ; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}})
 ; CHECK: br %r14
@@ -116,3 +119,35 @@ define void @f7(i64 %base) {
   store i128 %sub, i128 *%aptr
   ret void
 }
+
+; Check that subtractions of spilled values can use SLG and SLBG rather than
+; SLGR and SLBGR.
+define void @f8(i128 *%ptr0) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: slg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: slbg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i128 *%ptr0, i128 2
+  %ptr2 = getelementptr i128 *%ptr0, i128 4
+  %ptr3 = getelementptr i128 *%ptr0, i128 6
+  %ptr4 = getelementptr i128 *%ptr0, i128 8
+
+  %val0 = load i128 *%ptr0
+  %val1 = load i128 *%ptr1
+  %val2 = load i128 *%ptr2
+  %val3 = load i128 *%ptr3
+  %val4 = load i128 *%ptr4
+
+  %retptr = call i128 *@foo()
+
+  %ret = load i128 *%retptr
+  %sub0 = sub i128 %ret, %val0
+  %sub1 = sub i128 %sub0, %val1
+  %sub2 = sub i128 %sub1, %val2
+  %sub3 = sub i128 %sub2, %val3
+  %sub4 = sub i128 %sub3, %val4
+  store i128 %sub4, i128 *%retptr
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-sub-06.ll b/test/CodeGen/SystemZ/int-sub-06.ll
index 0e04d51e2bc7..395d584b23de 100644
--- a/test/CodeGen/SystemZ/int-sub-06.ll
+++ b/test/CodeGen/SystemZ/int-sub-06.ll
@@ -5,7 +5,7 @@
 ; Check register additions.  The XOR ensures that we don't instead zero-extend
 ; %b into a register and use memory addition.
 define void @f1(i128 *%aptr, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: slgfr {{%r[0-5]}}, %r3
 ; CHECK: slbgr
 ; CHECK: br %r14
@@ -19,7 +19,7 @@ define void @f1(i128 *%aptr, i32 %b) {
 
 ; Like f1, but using an "in-register" extension.
 define void @f2(i128 *%aptr, i64 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: slgfr {{%r[0-5]}}, %r3
 ; CHECK: slbgr
 ; CHECK: br %r14
@@ -35,7 +35,7 @@ define void @f2(i128 *%aptr, i64 %b) {
 ; Test register addition in cases where the second operand is zero extended
 ; from i64 rather than i32, but is later masked to i32 range.
 define void @f3(i128 *%aptr, i64 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: slgfr {{%r[0-5]}}, %r3
 ; CHECK: slbgr
 ; CHECK: br %r14
@@ -50,7 +50,7 @@ define void @f3(i128 *%aptr, i64 %b) {
 
 ; Test SLGF with no offset.
 define void @f4(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: slgf {{%r[0-5]}}, 0(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
@@ -65,7 +65,7 @@ define void @f4(i128 *%aptr, i32 *%bsrc) {
 
 ; Check the high end of the SLGF range.
 define void @f5(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: slgf {{%r[0-5]}}, 524284(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
@@ -82,7 +82,7 @@ define void @f5(i128 *%aptr, i32 *%bsrc) {
 ; Check the next word up, which must use separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f6(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: slgf {{%r[0-5]}}, 0(%r3)
 ; CHECK: slbgr
@@ -99,7 +99,7 @@ define void @f6(i128 *%aptr, i32 *%bsrc) {
 
 ; Check the high end of the negative aligned SLGF range.
 define void @f7(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: slgf {{%r[0-5]}}, -4(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
@@ -115,7 +115,7 @@ define void @f7(i128 *%aptr, i32 *%bsrc) {
 
 ; Check the low end of the SLGF range.
 define void @f8(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: slgf {{%r[0-5]}}, -524288(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
@@ -132,7 +132,7 @@ define void @f8(i128 *%aptr, i32 *%bsrc) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f9(i128 *%aptr, i32 *%bsrc) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: slgf {{%r[0-5]}}, 0(%r3)
 ; CHECK: slbgr
@@ -149,7 +149,7 @@ define void @f9(i128 *%aptr, i32 *%bsrc) {
 
 ; Check that SLGF allows an index.
 define void @f10(i128 *%aptr, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: slgf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %a = load i128 *%aptr
diff --git a/test/CodeGen/SystemZ/int-sub-07.ll b/test/CodeGen/SystemZ/int-sub-07.ll
new file mode 100644
index 000000000000..5c1f42c1cc96
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-07.ll
@@ -0,0 +1,131 @@
+; Test 32-bit subtraction in which the second operand is a sign-extended
+; i16 memory value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SH range.
+define i32 @f1(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f1:
+; CHECK: sh %r2, 0(%r3)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned SH range.
+define i32 @f2(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: sh %r2, 4094(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2047
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which should use SHY instead of SH.
+define i32 @f3(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f3:
+; CHECK: shy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2048
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned SHY range.
+define i32 @f4(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f4:
+; CHECK: shy %r2, 524286(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f5(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f5:
+; CHECK: agfi %r3, 524288
+; CHECK: sh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the negative aligned SHY range.
+define i32 @f6(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f6:
+; CHECK: shy %r2, -2(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the low end of the SHY range.
+define i32 @f7(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f7:
+; CHECK: shy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i32 %lhs, i16 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: agfi %r3, -524290
+; CHECK: sh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that SH allows an index.
+define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
+; CHECK-LABEL: f9:
+; CHECK: sh %r2, 4094({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %sub1 = add i64 %src, %index
+  %sub2 = add i64 %sub1, 4094
+  %ptr = inttoptr i64 %sub2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that SHY allows an index.
+define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
+; CHECK-LABEL: f10:
+; CHECK: shy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %sub1 = add i64 %src, %index
+  %sub2 = add i64 %sub1, 4096
+  %ptr = inttoptr i64 %sub2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-sub-08.ll b/test/CodeGen/SystemZ/int-sub-08.ll
new file mode 100644
index 000000000000..f0a5e1e063a1
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-08.ll
@@ -0,0 +1,39 @@
+; Test the three-operand forms of subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check SRK.
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f1:
+; CHECK: srk %r2, %r3, %r4
+; CHECK: br %r14
+  %sub = sub i32 %b, %c
+  ret i32 %sub
+}
+
+; Check that we can still use SR in obvious cases.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: sr %r2, %r3
+; CHECK: br %r14
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check SGRK.
+define i64 @f3(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: f3:
+; CHECK: sgrk %r2, %r3, %r4
+; CHECK: br %r14
+  %sub = sub i64 %b, %c
+  ret i64 %sub
+}
+
+; Check that we can still use SGR in obvious cases.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: sgr %r2, %r3
+; CHECK: br %r14
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
diff --git a/test/CodeGen/SystemZ/int-sub-09.ll b/test/CodeGen/SystemZ/int-sub-09.ll
new file mode 100644
index 000000000000..00a60d3819c9
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-09.ll
@@ -0,0 +1,22 @@
+; Test 128-bit subtraction when the distinct-operands facility is available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Test the case where both operands are in registers.
+define i64 @f1(i64 %a, i64 %b, i64 %c, i64 %d, i64 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: slgrk %r2, %r4, %r5
+; CHECK: slbgr
+; CHECK: br %r14
+  %x1 = insertelement <2 x i64> undef, i64 %b, i32 0
+  %x2 = insertelement <2 x i64> %x1, i64 %c, i32 1
+  %x = bitcast <2 x i64> %x2 to i128
+  %y2 = insertelement <2 x i64> %x1, i64 %d, i32 1
+  %y = bitcast <2 x i64> %y2 to i128
+  %sub = sub i128 %x, %y
+  %subv = bitcast i128 %sub to <2 x i64>
+  %high = extractelement <2 x i64> %subv, i32 0
+  store i64 %high, i64 *%ptr
+  %low = extractelement <2 x i64> %subv, i32 1
+  ret i64 %low
+}
diff --git a/test/CodeGen/SystemZ/la-01.ll b/test/CodeGen/SystemZ/la-01.ll
index b43e3f8662dc..31d204125170 100644
--- a/test/CodeGen/SystemZ/la-01.ll
+++ b/test/CodeGen/SystemZ/la-01.ll
@@ -15,9 +15,11 @@ define void @df() {
   ret void
 }
 
+declare void @foo(i32 *)
+
 ; Test a load of a fully-aligned external variable.
 define i32 *@f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: larl %r2, e4
 ; CHECK-NEXT: br %r14
   ret i32 *@e4
@@ -25,7 +27,7 @@ define i32 *@f1() {
 
 ; Test a load of a fully-aligned local variable.
 define i32 *@f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: larl %r2, d4
 ; CHECK-NEXT: br %r14
   ret i32 *@d4
@@ -33,7 +35,7 @@ define i32 *@f2() {
 
 ; Test a load of a 2-byte-aligned external variable.
 define i32 *@f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: larl %r2, e2
 ; CHECK-NEXT: br %r14
   ret i32 *@e2
@@ -41,7 +43,7 @@ define i32 *@f3() {
 
 ; Test a load of a 2-byte-aligned local variable.
 define i32 *@f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: larl %r2, d2
 ; CHECK-NEXT: br %r14
   ret i32 *@d2
@@ -49,7 +51,7 @@ define i32 *@f4() {
 
 ; Test a load of an unaligned external variable, which must go via the GOT.
 define i32 *@f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lgrl %r2, e1@GOT
 ; CHECK-NEXT: br %r14
   ret i32 *@e1
@@ -57,7 +59,7 @@ define i32 *@f5() {
 
 ; Test a load of an unaligned local variable, which must go via the GOT.
 define i32 *@f6() {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgrl %r2, d1@GOT
 ; CHECK-NEXT: br %r14
   ret i32 *@d1
@@ -65,7 +67,7 @@ define i32 *@f6() {
 
 ; Test a load of an external function.
 define void() *@f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: larl %r2, ef
 ; CHECK-NEXT: br %r14
   ret void() *@ef
@@ -73,8 +75,21 @@ define void() *@f7() {
 
 ; Test a load of a local function.
 define void() *@f8() {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: larl %r2, df
 ; CHECK-NEXT: br %r14
   ret void() *@df
 }
+
+; Test that LARL can be rematerialized.
+define i32 @f9() {
+; CHECK-LABEL: f9:
+; CHECK: larl %r2, d2
+; CHECK: brasl %r14, foo@PLT
+; CHECK: larl %r2, d2
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  call void @foo(i32 *@d2)
+  call void @foo(i32 *@d2)
+  ret i32 0
+}
diff --git a/test/CodeGen/SystemZ/la-02.ll b/test/CodeGen/SystemZ/la-02.ll
index 4c5374a0925b..d7362d67e3bf 100644
--- a/test/CodeGen/SystemZ/la-02.ll
+++ b/test/CodeGen/SystemZ/la-02.ll
@@ -23,7 +23,7 @@ define hidden void @hf() {
 ; Test loads of external variables.  There is no guarantee that the
 ; variable will be in range of LARL.
 define i32 *@f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lgrl %r2, ev@GOT
 ; CHECK: br %r14
   ret i32 *@ev
@@ -31,7 +31,7 @@ define i32 *@f1() {
 
 ; ...likewise locally-defined normal-visibility variables.
 define i32 *@f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lgrl %r2, dv@GOT
 ; CHECK: br %r14
   ret i32 *@dv
@@ -39,7 +39,7 @@ define i32 *@f2() {
 
 ; ...likewise protected variables.
 define i32 *@f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: lgrl %r2, pv@GOT
 ; CHECK: br %r14
   ret i32 *@pv
@@ -47,7 +47,7 @@ define i32 *@f3() {
 
 ; ...likewise hidden variables.
 define i32 *@f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: lgrl %r2, hv@GOT
 ; CHECK: br %r14
   ret i32 *@hv
@@ -56,7 +56,7 @@ define i32 *@f4() {
 ; Check loads of external functions.  This could use LARL, but we don't have
 ; code to detect that yet.
 define void() *@f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lgrl %r2, ef@GOT
 ; CHECK: br %r14
   ret void() *@ef
@@ -64,7 +64,7 @@ define void() *@f5() {
 
 ; ...likewise locally-defined normal-visibility functions.
 define void() *@f6() {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgrl %r2, df@GOT
 ; CHECK: br %r14
   ret void() *@df
@@ -72,7 +72,7 @@ define void() *@f6() {
 
 ; ...likewise protected functions.
 define void() *@f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lgrl %r2, pf@GOT
 ; CHECK: br %r14
   ret void() *@pf
@@ -80,7 +80,7 @@ define void() *@f7() {
 
 ; ...likewise hidden functions.
 define void() *@f8() {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: lgrl %r2, hf@GOT
 ; CHECK: br %r14
   ret void() *@hf
diff --git a/test/CodeGen/SystemZ/la-03.ll b/test/CodeGen/SystemZ/la-03.ll
index 9449b2bfbec0..1ff3fefde6c9 100644
--- a/test/CodeGen/SystemZ/la-03.ll
+++ b/test/CodeGen/SystemZ/la-03.ll
@@ -20,7 +20,7 @@ define hidden void @hf() {
 
 ; Test loads of external variables, which must go via the GOT.
 define i32 *@f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lgrl %r2, ev@GOT
 ; CHECK: br %r14
   ret i32 *@ev
@@ -29,7 +29,7 @@ define i32 *@f1() {
 ; Check loads of locally-defined normal-visibility variables, which might
 ; be overridden.  The load must go via the GOT.
 define i32 *@f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: lgrl %r2, dv@GOT
 ; CHECK: br %r14
   ret i32 *@dv
@@ -38,7 +38,7 @@ define i32 *@f2() {
 ; Check loads of protected variables, which in the small code model
 ; must be in range of LARL.
 define i32 *@f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: larl %r2, pv
 ; CHECK: br %r14
   ret i32 *@pv
@@ -46,7 +46,7 @@ define i32 *@f3() {
 
 ; ...likewise hidden variables.
 define i32 *@f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: larl %r2, hv
 ; CHECK: br %r14
   ret i32 *@hv
@@ -54,7 +54,7 @@ define i32 *@f4() {
 
 ; Like f1, but for functions.
 define void() *@f5() {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: lgrl %r2, ef@GOT
 ; CHECK: br %r14
   ret void() *@ef
@@ -62,7 +62,7 @@ define void() *@f5() {
 
 ; Like f2, but for functions.
 define void() *@f6() {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: lgrl %r2, df@GOT
 ; CHECK: br %r14
   ret void() *@df
@@ -70,7 +70,7 @@ define void() *@f6() {
 
 ; Like f3, but for functions.
 define void() *@f7() {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: larl %r2, pf
 ; CHECK: br %r14
   ret void() *@pf
@@ -78,7 +78,7 @@ define void() *@f7() {
 
 ; Like f4, but for functions.
 define void() *@f8() {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: larl %r2, hf
 ; CHECK: br %r14
   ret void() *@hf
diff --git a/test/CodeGen/SystemZ/la-04.ll b/test/CodeGen/SystemZ/la-04.ll
index 4c3636481e7d..4d47308e04ba 100644
--- a/test/CodeGen/SystemZ/la-04.ll
+++ b/test/CodeGen/SystemZ/la-04.ll
@@ -4,7 +4,7 @@
 
 ; Do some arbitrary work and return the address of the following label.
 define i8 *@f1(i8 *%addr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mvi 0(%r2), 1
 ; CHECK: [[LABEL:\.L.*]]:
 ; CHECK: larl %r2, [[LABEL]]
diff --git a/test/CodeGen/SystemZ/lit.local.cfg b/test/CodeGen/SystemZ/lit.local.cfg
index 79528d178f23..b12af09434be 100644
--- a/test/CodeGen/SystemZ/lit.local.cfg
+++ b/test/CodeGen/SystemZ/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'SystemZ' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/SystemZ/loop-01.ll b/test/CodeGen/SystemZ/loop-01.ll
new file mode 100644
index 000000000000..580080173563
--- /dev/null
+++ b/test/CodeGen/SystemZ/loop-01.ll
@@ -0,0 +1,124 @@
+; Test loop tuning.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test that strength reduction is applied to addresses with a scale factor,
+; but that indexed addressing can still be used.
+define void @f1(i32 *%dest, i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: sllg
+; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ 0, %entry ], [ %next, %loop ]
+  %ptr = getelementptr i32 *%dest, i64 %index
+  store i32 %a, i32 *%ptr
+  %next = add i64 %index, 1
+  %cmp = icmp ne i64 %next, 100
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Test a loop that should be converted into dbr form and then use BRCT.
+define void @f2(i32 *%src, i32 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[REG:%r[0-5]]], 100
+; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
+; CHECK: brct [[REG]], [[LABEL]]
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
+  %next = add i32 %count, 1
+  %val = load volatile i32 *%src
+  %cmp = icmp eq i32 %val, 0
+  br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+  %add = add i32 %val, 1
+  store volatile i32 %add, i32 *%dest
+  br label %loop.next
+
+loop.next:
+  %cont = icmp ne i32 %next, 100
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Like f2, but for BRCTG.
+define void @f3(i64 *%src, i64 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: lghi [[REG:%r[0-5]]], 100
+; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
+; CHECK: brctg [[REG]], [[LABEL]]
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
+  %next = add i64 %count, 1
+  %val = load volatile i64 *%src
+  %cmp = icmp eq i64 %val, 0
+  br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+  %add = add i64 %val, 1
+  store volatile i64 %add, i64 *%dest
+  br label %loop.next
+
+loop.next:
+  %cont = icmp ne i64 %next, 100
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Test a loop with a 64-bit decremented counter in which the 32-bit
+; low part of the counter is used after the decrement.  This is an example
+; of a subregister use being the only thing that blocks a conversion to BRCTG.
+define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) {
+; CHECK-LABEL: f4:
+; CHECK: aghi [[REG:%r[0-5]]], -1
+; CHECK: lr [[REG2:%r[0-5]]], [[REG]]
+; CHECK: stg [[REG2]],
+; CHECK: jne {{\..*}}
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
+  store volatile i64 %left, i64 *%dest2
+  %val = load volatile i32 *%src
+  %cmp = icmp eq i32 %val, 0
+  br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+  %add = add i32 %val, 1
+  store volatile i32 %add, i32 *%dest
+  br label %loop.next
+
+loop.next:
+  %next = add i64 %left, -1
+  %ext = zext i32 %val to i64
+  %shl = shl i64 %ext, 32
+  %and = and i64 %next, 4294967295
+  %or = or i64 %shl, %and
+  store volatile i64 %or, i64 *%dest2
+  %cont = icmp ne i64 %next, 0
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/memchr-01.ll b/test/CodeGen/SystemZ/memchr-01.ll
new file mode 100644
index 000000000000..c51690b9848d
--- /dev/null
+++ b/test/CodeGen/SystemZ/memchr-01.ll
@@ -0,0 +1,21 @@
+; Test memchr using SRST, with a weird but usable prototype.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@memchr(i8 *%src, i16 %char, i32 %len)
+
+; Test a simple forwarded call.
+define i8 *@f1(i8 *%src, i16 %char, i32 %len) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lgr [[REG:%r[1-5]]], %r2
+; CHECK-DAG: algfr %r2, %r4
+; CHECK-DAG: llcr %r0, %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: jl {{\.L.*}}
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+  %res = call i8 *@memchr(i8 *%src, i16 %char, i32 %len)
+  ret i8 *%res
+}
diff --git a/test/CodeGen/SystemZ/memchr-02.ll b/test/CodeGen/SystemZ/memchr-02.ll
new file mode 100644
index 000000000000..982b3964f190
--- /dev/null
+++ b/test/CodeGen/SystemZ/memchr-02.ll
@@ -0,0 +1,57 @@
+; Test memchr using SRST, with the correct prototype.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@memchr(i8 *%src, i32 %char, i64 %len)
+
+; Test a simple forwarded call.
+define i8 *@f1(i64 %len, i8 *%src, i32 %char) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: agr %r2, %r3
+; CHECK-DAG: llcr %r0, %r4
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: srst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: jl {{\.L.*}}
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+  %res = call i8 *@memchr(i8 *%src, i32 %char, i64 %len)
+  ret i8 *%res
+}
+
+; Test a doubled call with no use of %r0 in between.  There should be a
+; single load of %r0.
+define i8 *@f2(i8 *%src, i8 *%charptr, i64 %len) {
+; CHECK-LABEL: f2:
+; CHECK: llc %r0, 0(%r3)
+; CHECK-NOT: %r0
+; CHECK: srst [[RES1:%r[1-5]]], %r2
+; CHECK-NOT: %r0
+; CHECK: srst %r2, [[RES1]]
+; CHECK: br %r14
+  %char = load volatile i8 *%charptr
+  %charext = zext i8 %char to i32
+  %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
+  %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
+  ret i8 *%res2
+}
+
+; Test a doubled call with a use of %r0 in between.  %r0 must be loaded
+; for each loop.
+define i8 *@f3(i8 *%src, i8 *%charptr, i64 %len) {
+; CHECK-LABEL: f3:
+; CHECK: llc [[CHAR:%r[1-5]]], 0(%r3)
+; CHECK: lr %r0, [[CHAR]]
+; CHECK: srst [[RES1:%r[1-5]]], %r2
+; CHECK: lhi %r0, 0
+; CHECK: blah %r0
+; CHECK: lr %r0, [[CHAR]]
+; CHECK: srst %r2, [[RES1]]
+; CHECK: br %r14
+  %char = load volatile i8 *%charptr
+  %charext = zext i8 %char to i32
+  %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
+  call void asm sideeffect "blah $0", "{r0}" (i32 0)
+  %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
+  ret i8 *%res2
+}
diff --git a/test/CodeGen/SystemZ/memcmp-01.ll b/test/CodeGen/SystemZ/memcmp-01.ll
new file mode 100644
index 000000000000..a01441946937
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcmp-01.ll
@@ -0,0 +1,221 @@
+; Test memcmp using CLC, with i32 results.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
+
+; Zero-length comparisons should be optimized away.
+define i32 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r2, 0
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
+  ret i32 %res
+}
+
+; Check a case where the result is used as an integer.
+define i32 @f2(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f2:
+; CHECK: clc 0(2,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
+  ret i32 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: clc 0(3,%r2), 0(%r3)
+; CHECK-NEXT: je {{\..*}}
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for inequality.
+define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: clc 0(4,%r2), 0(%r3)
+; CHECK-NEXT: jlh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
+  %cmp = icmp ne i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested via slt.
+define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: clc 0(5,%r2), 0(%r3)
+; CHECK-NEXT: jl {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for sgt.
+define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: clc 0(6,%r2), 0(%r3)
+; CHECK-NEXT: jh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
+  %cmp = icmp sgt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the upper end of the CLC range.  Here the result is used both as
+; an integer and for branching.
+define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: jl {{.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
+
+; 257 bytes needs two CLCs.
+define i32 @f8(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f8:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
+  ret i32 %res
+}
+
+; Test a comparison of 258 bytes in which the CC result can be used directly.
+define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: jl .L
+; CHECK: br %r14
+entry:
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test the largest size that can use two CLCs.
+define i32 @f10(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f10:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512)
+  ret i32 %res
+}
+
+; Test the smallest size that needs 3 CLCs.
+define i32 @f11(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f11:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: jlh [[LABEL]]
+; CHECK: clc 512(1,%r2), 512(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513)
+  ret i32 %res
+}
+
+; Test the largest size than can use 3 CLCs.
+define i32 @f12(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f12:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(256,%r2), 256(%r3)
+; CHECK: jlh [[LABEL]]
+; CHECK: clc 512(256,%r2), 512(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768)
+  ret i32 %res
+}
+
+; The next size up uses a loop instead.  We leave the more complicated
+; loop tests to memcpy-01.ll, which shares the same form.
+define i32 @f13(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f13:
+; CHECK: lghi [[COUNT:%r[0-5]]], 3
+; CHECK: [[LOOP:.L[^:]*]]:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK-DAG: la %r2, 256(%r2)
+; CHECK-DAG: la %r3, 256(%r3)
+; CHECK: brctg [[COUNT]], [[LOOP]]
+; CHECK: clc 0(1,%r2), 0(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/memcmp-02.ll b/test/CodeGen/SystemZ/memcmp-02.ll
new file mode 100644
index 000000000000..74b090dcdd8e
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcmp-02.ll
@@ -0,0 +1,139 @@
+; Test memcmp using CLC, with i64 results.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
+
+; Zero-length comparisons should be optimized away.
+define i64 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 0)
+  ret i64 %res
+}
+
+; Check a case where the result is used as an integer.
+define i64 @f2(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f2:
+; CHECK: clc 0(2,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 2)
+  ret i64 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f3(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: clc 0(3,%r2), 0(%r3)
+; CHECK-NEXT: je {{\..*}}
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 3)
+  %cmp = icmp eq i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for inequality.
+define void @f4(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: clc 0(4,%r2), 0(%r3)
+; CHECK-NEXT: jlh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 4)
+  %cmp = icmp ne i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested via slt.
+define void @f5(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: clc 0(5,%r2), 0(%r3)
+; CHECK-NEXT: jl {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 5)
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check a case where the result is tested for sgt.
+define void @f6(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: clc 0(6,%r2), 0(%r3)
+; CHECK-NEXT: jh {{\..*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 6)
+  %cmp = icmp sgt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check the upper end of the CLC range.  Here the result is used both as
+; an integer and for branching.
+define i64 @f7(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: jl {{.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 256)
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
+
+; 257 bytes needs two CLCs.
+define i64 @f8(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f8:
+; CHECK: clc 0(256,%r2), 0(%r3)
+; CHECK: jlh [[LABEL:\..*]]
+; CHECK: clc 256(1,%r2), 256(%r3)
+; CHECK: [[LABEL]]:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: br %r14
+  %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 257)
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/memcpy-01.ll b/test/CodeGen/SystemZ/memcpy-01.ll
new file mode 100644
index 000000000000..b53ec5452e25
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcpy-01.ll
@@ -0,0 +1,235 @@
+; Test memcpy using MVC.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind
+declare void @foo(i8 *, i8 *)
+
+; Test a no-op move, i32 version.
+define void @f1(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 0, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test a no-op move, i64 version.
+define void @f2(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 0, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test a 1-byte move, i32 version.
+define void @f3(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f3:
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 1, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test a 1-byte move, i64 version.
+define void @f4(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f4:
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the upper range of a single MVC, i32 version.
+define void @f5(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f5:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 256, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the upper range of a single MVC, i64 version.
+define void @f6(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f6:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 256, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the first case that needs two MVCs.
+define void @f7(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f7:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(1,%r2), 256(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the last-but-one case that needs two MVCs.
+define void @f8(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(255,%r2), 256(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 511, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the last case that needs two MVCs.
+define void @f9(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f9:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 512, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test an arbitrary value that uses straight-line code.
+define void @f10(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f10:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(255,%r2), 1024(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; ...and again in cases where not all parts are in range of MVC.
+define void @f11(i8 *%srcbase, i8 *%destbase) {
+; CHECK-LABEL: f11:
+; CHECK: mvc 4000(256,%r2), 3500(%r3)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2)
+; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3)
+; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3)
+; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
+; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
+; CHECK: br %r14
+  %dest = getelementptr i8 *%srcbase, i64 4000
+  %src = getelementptr i8* %destbase, i64 3500
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; ...and again with a destination frame base that goes out of range.
+define void @f12() {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 4076(256,%r15), 2100(%r15)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15)
+; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15)
+; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15)
+; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15)
+; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %arr = alloca [6000 x i8]
+  %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 3900
+  %src = getelementptr [6000 x i8] *%arr, i64 0, i64 1924
+  call void @foo(i8 *%dest, i8 *%src)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  call void @foo(i8 *%dest, i8 *%src)
+  ret void
+}
+
+; ...and again with a source frame base that goes out of range.
+define void @f13() {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 200(256,%r15), 3826(%r15)
+; CHECK: mvc 456(256,%r15), 4082(%r15)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15)
+; CHECK: mvc 712(256,%r15), 0([[NEWSRC]])
+; CHECK: mvc 968(256,%r15), 256([[NEWSRC]])
+; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]])
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %arr = alloca [6000 x i8]
+  %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 24
+  %src = getelementptr [6000 x i8] *%arr, i64 0, i64 3650
+  call void @foo(i8 *%dest, i8 *%src)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+                                       i1 false)
+  call void @foo(i8 *%dest, i8 *%src)
+  ret void
+}
+
+; Test the last case that is done using straight-line code.
+define void @f14(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f14:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(256,%r2), 1024(%r3)
+; CHECK: mvc 1280(256,%r2), 1280(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1536, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; Test the first case that is done using a loop.
+define void @f15(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f15:
+; CHECK: lghi [[COUNT:%r[0-5]]], 6
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 768(%r2)
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: la %r2, 256(%r2)
+; CHECK: la %r3, 256(%r3)
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
+                                       i1 false)
+  ret void
+}
+
+; ...and again with frame bases, where the base must be loaded into a
+; register before the loop.
+define void @f16() {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6
+; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15)
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 2368([[BASE]])
+; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]])
+; CHECK: la [[BASE]], 256([[BASE]])
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]])
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %arr = alloca [3200 x i8]
+  %dest = getelementptr [3200 x i8] *%arr, i64 0, i64 1600
+  %src = getelementptr [3200 x i8] *%arr, i64 0, i64 0
+  call void @foo(i8 *%dest, i8 *%src)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
+                                       i1 false)
+  call void @foo(i8 *%dest, i8 *%src)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/memcpy-02.ll b/test/CodeGen/SystemZ/memcpy-02.ll
new file mode 100644
index 000000000000..2b010911f88e
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcpy-02.ll
@@ -0,0 +1,392 @@
+; Test load/store pairs that act as memcpys.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g1src = global i8 1
+@g1dst = global i8 1
+@g2src = global i16 2
+@g2dst = global i16 2
+@g3 = global i32 3
+@g4 = global i64 4
+@g5src = external global fp128, align 16
+@g5dst = external global fp128, align 16
+
+; Test the simple i8 case.
+define void @f1(i8 *%ptr1) {
+; CHECK-LABEL: f1:
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  store i8 %val, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where the value is zero-extended to 32 bits.
+define void @f2(i8 *%ptr1) {
+; CHECK-LABEL: f2:
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %ext = zext i8 %val to i32
+  %trunc = trunc i32 %ext to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where the value is zero-extended to 64 bits.
+define void @f3(i8 *%ptr1) {
+; CHECK-LABEL: f3:
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %ext = zext i8 %val to i64
+  %trunc = trunc i64 %ext to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where the value is sign-extended to 32 bits.
+define void @f4(i8 *%ptr1) {
+; CHECK-LABEL: f4:
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %ext = sext i8 %val to i32
+  %trunc = trunc i32 %ext to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test i8 cases where the value is sign-extended to 64 bits.
+define void @f5(i8 *%ptr1) {
+; CHECK-LABEL: f5:
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %ext = sext i8 %val to i64
+  %trunc = trunc i64 %ext to i8
+  store i8 %trunc, i8 *%ptr2
+  ret void
+}
+
+; Test the simple i16 case.
+define void @f6(i16 *%ptr1) {
+; CHECK-LABEL: f6:
+; CHECK: mvc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  store i16 %val, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is zero-extended to 32 bits.
+define void @f7(i16 *%ptr1) {
+; CHECK-LABEL: f7:
+; CHECK: mvc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %ext = zext i16 %val to i32
+  %trunc = trunc i32 %ext to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is zero-extended to 64 bits.
+define void @f8(i16 *%ptr1) {
+; CHECK-LABEL: f8:
+; CHECK: mvc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %ext = zext i16 %val to i64
+  %trunc = trunc i64 %ext to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is sign-extended to 32 bits.
+define void @f9(i16 *%ptr1) {
+; CHECK-LABEL: f9:
+; CHECK: mvc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %ext = sext i16 %val to i32
+  %trunc = trunc i32 %ext to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test i16 cases where the value is sign-extended to 64 bits.
+define void @f10(i16 *%ptr1) {
+; CHECK-LABEL: f10:
+; CHECK: mvc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %ext = sext i16 %val to i64
+  %trunc = trunc i64 %ext to i16
+  store i16 %trunc, i16 *%ptr2
+  ret void
+}
+
+; Test the simple i32 case.
+define void @f11(i32 *%ptr1) {
+; CHECK-LABEL: f11:
+; CHECK: mvc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  store i32 %val, i32 *%ptr2
+  ret void
+}
+
+; Test i32 cases where the value is zero-extended to 64 bits.
+define void @f12(i32 *%ptr1) {
+; CHECK-LABEL: f12:
+; CHECK: mvc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %ext = zext i32 %val to i64
+  %trunc = trunc i64 %ext to i32
+  store i32 %trunc, i32 *%ptr2
+  ret void
+}
+
+; Test i32 cases where the value is sign-extended to 64 bits.
+define void @f13(i32 *%ptr1) {
+; CHECK-LABEL: f13:
+; CHECK: mvc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %ext = sext i32 %val to i64
+  %trunc = trunc i64 %ext to i32
+  store i32 %trunc, i32 *%ptr2
+  ret void
+}
+
+; Test the i64 case.
+define void @f14(i64 *%ptr1) {
+; CHECK-LABEL: f14:
+; CHECK: mvc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  store i64 %val, i64 *%ptr2
+  ret void
+}
+
+; Test the f32 case.
+define void @f15(float *%ptr1) {
+; CHECK-LABEL: f15:
+; CHECK: mvc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr float *%ptr1, i64 1
+  %val = load float *%ptr1
+  store float %val, float *%ptr2
+  ret void
+}
+
+; Test the f64 case.
+define void @f16(double *%ptr1) {
+; CHECK-LABEL: f16:
+; CHECK: mvc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr double *%ptr1, i64 1
+  %val = load double *%ptr1
+  store double %val, double *%ptr2
+  ret void
+}
+
+; Test the f128 case.
+define void @f17(fp128 *%ptr1) {
+; CHECK-LABEL: f17:
+; CHECK: mvc 16(16,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr fp128 *%ptr1, i64 1
+  %val = load fp128 *%ptr1
+  store fp128 %val, fp128 *%ptr2
+  ret void
+}
+
+; Make sure that we don't use MVC if the load is volatile.
+define void @f18(i64 *%ptr1) {
+; CHECK-LABEL: f18:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load volatile i64 *%ptr1
+  store i64 %val, i64 *%ptr2
+  ret void
+}
+
+; ...likewise the store.
+define void @f19(i64 *%ptr1) {
+; CHECK-LABEL: f19:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  store volatile i64 %val, i64 *%ptr2
+  ret void
+}
+
+; Test that MVC is not used for aligned loads and stores if there is
+; no way of telling whether they alias.  We don't want to use MVC in
+; cases where the addresses could be equal.
+define void @f20(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f20:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %val = load i64 *%ptr1
+  store i64 %val, i64 *%ptr2
+  ret void
+}
+
+; ...and again for unaligned loads and stores.
+define void @f21(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f21:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2
+  store i64 %val, i64 *%ptr2, align 2
+  ret void
+}
+
+; Test a case where there is definite overlap.
+define void @f22(i64 %base) {
+; CHECK-LABEL: f22:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %add = add i64 %base, 1
+  %ptr1 = inttoptr i64 %base to i64 *
+  %ptr2 = inttoptr i64 %add to i64 *
+  %val = load i64 *%ptr1, align 1
+  store i64 %val, i64 *%ptr2, align 1
+  ret void
+}
+
+; Test that we can use MVC for global addresses for i8.
+define void @f23(i8 *%ptr) {
+; CHECK-LABEL: f23:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g1src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
+; CHECK: mvc 0(1,[[DST]]), 0([[SRC]])
+; CHECK: br %r14
+  %val = load i8 *@g1src
+  store i8 %val, i8 *@g1dst
+  ret void
+}
+
+; Test that we use LHRL and STHRL for i16.
+define void @f24(i16 *%ptr) {
+; CHECK-LABEL: f24:
+; CHECK: lhrl [[REG:%r[0-5]]], g2src
+; CHECK: sthrl [[REG]], g2dst
+; CHECK: br %r14
+  %val = load i16 *@g2src
+  store i16 %val, i16 *@g2dst
+  ret void
+}
+
+; Test that we use LRL for i32.
+define void @f25(i32 *%ptr) {
+; CHECK-LABEL: f25:
+; CHECK: lrl [[REG:%r[0-5]]], g3
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %val = load i32 *@g3
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; ...likewise STRL.
+define void @f26(i32 *%ptr) {
+; CHECK-LABEL: f26:
+; CHECK: l [[REG:%r[0-5]]], 0(%r2)
+; CHECK: strl [[REG]], g3
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  store i32 %val, i32 *@g3
+  ret void
+}
+
+; Test that we use LGRL for i64.
+define void @f27(i64 *%ptr) {
+; CHECK-LABEL: f27:
+; CHECK: lgrl [[REG:%r[0-5]]], g4
+; CHECK: stg [[REG]], 0(%r2)
+; CHECK: br %r14
+  %val = load i64 *@g4
+  store i64 %val, i64 *%ptr
+  ret void
+}
+
+; ...likewise STGRL.
+define void @f28(i64 *%ptr) {
+; CHECK-LABEL: f28:
+; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
+; CHECK: stgrl [[REG]], g4
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  store i64 %val, i64 *@g4
+  ret void
+}
+
+; Test that we can use MVC for global addresses for fp128.
+define void @f29(fp128 *%ptr) {
+; CHECK-LABEL: f29:
+; CHECK-DAG: larl [[SRC:%r[0-5]]], g5src
+; CHECK-DAG: larl [[DST:%r[0-5]]], g5dst
+; CHECK: mvc 0(16,[[DST]]), 0([[SRC]])
+; CHECK: br %r14
+  %val = load fp128 *@g5src, align 16
+  store fp128 %val, fp128 *@g5dst, align 16
+  ret void
+}
+
+; Test a case where offset disambiguation is enough.
+define void @f30(i64 *%ptr1) {
+; CHECK-LABEL: f30:
+; CHECK: mvc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1, align 1
+  store i64 %val, i64 *%ptr2, align 1
+  ret void
+}
+
+; Test f21 in cases where TBAA tells us there is no alias.
+define void @f31(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f31:
+; CHECK: mvc 0(8,%r3), 0(%r2)
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2, !tbaa !1
+  store i64 %val, i64 *%ptr2, align 2, !tbaa !2
+  ret void
+}
+
+; Test f21 in cases where TBAA is present but doesn't help.
+define void @f32(i64 *%ptr1, i64 *%ptr2) {
+; CHECK-LABEL: f32:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %val = load i64 *%ptr1, align 2, !tbaa !1
+  store i64 %val, i64 *%ptr2, align 2, !tbaa !1
+  ret void
+}
+
+!0 = metadata !{ metadata !"root" }
+!1 = metadata !{ metadata !3, metadata !3, i64 0 }
+!2 = metadata !{ metadata !4, metadata !4, i64 0 }
+!3 = metadata !{ metadata !"set1", metadata !0 }
+!4 = metadata !{ metadata !"set2", metadata !0 }
diff --git a/test/CodeGen/SystemZ/memset-01.ll b/test/CodeGen/SystemZ/memset-01.ll
new file mode 100644
index 000000000000..f17901cc73ab
--- /dev/null
+++ b/test/CodeGen/SystemZ/memset-01.ll
@@ -0,0 +1,160 @@
+; Test memset in cases where the set value is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind
+
+; No bytes, i32 version.
+define void @f1(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 0, i32 1, i1 false)
+  ret void
+}
+
+; No bytes, i64 version.
+define void @f2(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 0, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i32 version.
+define void @f3(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f3:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 1, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i64 version.
+define void @f4(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f4:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i32 version.
+define void @f5(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stc %r3, 0(%r2)
+; CHECK-DAG: stc %r3, 1(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 2, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i64 version.
+define void @f6(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stc %r3, 0(%r2)
+; CHECK-DAG: stc %r3, 1(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 2, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i32 version.
+define void @f7(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f7:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(2,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 3, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i64 version.
+define void @f8(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f8:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(2,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 3, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i32 version.
+define void @f9(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f9:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 257, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i64 version.
+define void @f10(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f10:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 257, i32 1, i1 false)
+  ret void
+}
+
+; 258 bytes, i32 version.  We need two MVCs.
+define void @f11(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f11:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 258, i32 1, i1 false)
+  ret void
+}
+
+; 258 bytes, i64 version.
+define void @f12(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f12:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 258, i32 1, i1 false)
+  ret void
+}
+
+; Test the largest case for which straight-line code is used.
+define void @f13(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f13:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(256,%r2), 256(%r2)
+; CHECK: mvc 513(256,%r2), 512(%r2)
+; CHECK: mvc 769(256,%r2), 768(%r2)
+; CHECK: mvc 1025(256,%r2), 1024(%r2)
+; CHECK: mvc 1281(256,%r2), 1280(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1537, i32 1,
+                                  i1 false)
+  ret void
+}
+
+; Test the next size up, which uses a loop.  We leave the other corner
+; cases to memcpy-01.ll.
+define void @f14(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f14:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: lghi [[COUNT:%r[0-5]]], 6
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 769(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: la %r2, 256(%r2)
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1538, i32 1,
+                                  i1 false)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/memset-02.ll b/test/CodeGen/SystemZ/memset-02.ll
new file mode 100644
index 000000000000..b4724c0b5745
--- /dev/null
+++ b/test/CodeGen/SystemZ/memset-02.ll
@@ -0,0 +1,162 @@
+; Test memset in cases where the set value is a constant other than 0 and -1.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind
+
+; No bytes, i32 version.
+define void @f1(i8 *%dest) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 0, i32 1, i1 false)
+  ret void
+}
+
+; No bytes, i64 version.
+define void @f2(i8 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 0, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i32 version.
+define void @f3(i8 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: mvi 0(%r2), 128
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 1, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i64 version.
+define void @f4(i8 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: mvi 0(%r2), 128
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 1, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i32 version.
+define void @f5(i8 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: mvhhi 0(%r2), -32640
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 2, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i64 version.
+define void @f6(i8 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: mvhhi 0(%r2), -32640
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 2, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i32 version.
+define void @f7(i8 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: mvhhi 0(%r2), -32640
+; CHECK-DAG: mvi 2(%r2), 128
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 3, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i64 version.
+define void @f8(i8 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK-DAG: mvhhi 0(%r2), -32640
+; CHECK-DAG: mvi 2(%r2), 128
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 3, i32 1, i1 false)
+  ret void
+}
+
+; 4 bytes, i32 version.
+define void @f9(i8 *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: iilf [[REG:%r[0-5]]], 2155905152
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 4, i32 1, i1 false)
+  ret void
+}
+
+; 4 bytes, i64 version.
+define void @f10(i8 *%dest) {
+; CHECK-LABEL: f10:
+; CHECK: iilf [[REG:%r[0-5]]], 2155905152
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 4, i32 1, i1 false)
+  ret void
+}
+
+; 5 bytes, i32 version.
+define void @f11(i8 *%dest) {
+; CHECK-LABEL: f11:
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(4,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 5, i32 1, i1 false)
+  ret void
+}
+
+; 5 bytes, i64 version.
+define void @f12(i8 *%dest) {
+; CHECK-LABEL: f12:
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(4,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 5, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i32 version.
+define void @f13(i8 *%dest) {
+; CHECK-LABEL: f13:
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 257, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i64 version.
+define void @f14(i8 *%dest) {
+; CHECK-LABEL: f14:
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 257, i32 1, i1 false)
+  ret void
+}
+
+; 258 bytes, i32 version.  We need two MVCs.
+define void @f15(i8 *%dest) {
+; CHECK-LABEL: f15:
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 258, i32 1, i1 false)
+  ret void
+}
+
+; 258 bytes, i64 version.
+define void @f16(i8 *%dest) {
+; CHECK-LABEL: f16:
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 258, i32 1, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/memset-03.ll b/test/CodeGen/SystemZ/memset-03.ll
new file mode 100644
index 000000000000..a95f89fc7c0a
--- /dev/null
+++ b/test/CodeGen/SystemZ/memset-03.ll
@@ -0,0 +1,382 @@
+; Test memsets that clear all bits.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind
+
+; No bytes, i32 version.
+define void @f1(i8 *%dest) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 0, i32 1, i1 false)
+  ret void
+}
+
+; No bytes, i64 version.
+define void @f2(i8 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 0, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i32 version.
+define void @f3(i8 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: mvi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 1, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i64 version.
+define void @f4(i8 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: mvi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 1, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i32 version.
+define void @f5(i8 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: mvhhi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 2, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i64 version.
+define void @f6(i8 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: mvhhi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 2, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i32 version.
+define void @f7(i8 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: mvhhi 0(%r2), 0
+; CHECK-DAG: mvi 2(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 3, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i64 version.
+define void @f8(i8 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK-DAG: mvhhi 0(%r2), 0
+; CHECK-DAG: mvi 2(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 3, i32 1, i1 false)
+  ret void
+}
+
+; 4 bytes, i32 version.
+define void @f9(i8 *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 4, i32 1, i1 false)
+  ret void
+}
+
+; 4 bytes, i64 version.
+define void @f10(i8 *%dest) {
+; CHECK-LABEL: f10:
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 4, i32 1, i1 false)
+  ret void
+}
+
+; 5 bytes, i32 version.
+define void @f11(i8 *%dest) {
+; CHECK-LABEL: f11:
+; CHECK-DAG: mvhi 0(%r2), 0
+; CHECK-DAG: mvi 4(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 5, i32 1, i1 false)
+  ret void
+}
+
+; 5 bytes, i64 version.
+define void @f12(i8 *%dest) {
+; CHECK-LABEL: f12:
+; CHECK-DAG: mvhi 0(%r2), 0
+; CHECK-DAG: mvi 4(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 5, i32 1, i1 false)
+  ret void
+}
+
+; 6 bytes, i32 version.
+define void @f13(i8 *%dest) {
+; CHECK-LABEL: f13:
+; CHECK-DAG: mvhi 0(%r2), 0
+; CHECK-DAG: mvhhi 4(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 6, i32 1, i1 false)
+  ret void
+}
+
+; 6 bytes, i64 version.
+define void @f14(i8 *%dest) {
+; CHECK-LABEL: f14:
+; CHECK-DAG: mvhi 0(%r2), 0
+; CHECK-DAG: mvhhi 4(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 6, i32 1, i1 false)
+  ret void
+}
+
+; 7 bytes, i32 version.
+define void @f15(i8 *%dest) {
+; CHECK-LABEL: f15:
+; CHECK: xc 0(7,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 7, i32 1, i1 false)
+  ret void
+}
+
+; 7 bytes, i64 version.
+define void @f16(i8 *%dest) {
+; CHECK-LABEL: f16:
+; CHECK: xc 0(7,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 7, i32 1, i1 false)
+  ret void
+}
+
+; 8 bytes, i32 version.
+define void @f17(i8 *%dest) {
+; CHECK-LABEL: f17:
+; CHECK: mvghi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 8, i32 1, i1 false)
+  ret void
+}
+
+; 8 bytes, i64 version.
+define void @f18(i8 *%dest) {
+; CHECK-LABEL: f18:
+; CHECK: mvghi 0(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 8, i32 1, i1 false)
+  ret void
+}
+
+; 9 bytes, i32 version.
+define void @f19(i8 *%dest) {
+; CHECK-LABEL: f19:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 9, i32 1, i1 false)
+  ret void
+}
+
+; 9 bytes, i64 version.
+define void @f20(i8 *%dest) {
+; CHECK-LABEL: f20:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 9, i32 1, i1 false)
+  ret void
+}
+
+; 10 bytes, i32 version.
+define void @f21(i8 *%dest) {
+; CHECK-LABEL: f21:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvhhi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 10, i32 1, i1 false)
+  ret void
+}
+
+; 10 bytes, i64 version.
+define void @f22(i8 *%dest) {
+; CHECK-LABEL: f22:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvhhi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 10, i32 1, i1 false)
+  ret void
+}
+
+; 11 bytes, i32 version.
+define void @f23(i8 *%dest) {
+; CHECK-LABEL: f23:
+; CHECK: xc 0(11,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 11, i32 1, i1 false)
+  ret void
+}
+
+; 11 bytes, i64 version.
+define void @f24(i8 *%dest) {
+; CHECK-LABEL: f24:
+; CHECK: xc 0(11,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 11, i32 1, i1 false)
+  ret void
+}
+
+; 12 bytes, i32 version.
+define void @f25(i8 *%dest) {
+; CHECK-LABEL: f25:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvhi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 12, i32 1, i1 false)
+  ret void
+}
+
+; 12 bytes, i64 version.
+define void @f26(i8 *%dest) {
+; CHECK-LABEL: f26:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvhi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 12, i32 1, i1 false)
+  ret void
+}
+
+; 13 bytes, i32 version.
+define void @f27(i8 *%dest) {
+; CHECK-LABEL: f27:
+; CHECK: xc 0(13,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 13, i32 1, i1 false)
+  ret void
+}
+
+; 13 bytes, i64 version.
+define void @f28(i8 *%dest) {
+; CHECK-LABEL: f28:
+; CHECK: xc 0(13,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 13, i32 1, i1 false)
+  ret void
+}
+
+; 14 bytes, i32 version.
+define void @f29(i8 *%dest) {
+; CHECK-LABEL: f29:
+; CHECK: xc 0(14,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 14, i32 1, i1 false)
+  ret void
+}
+
+; 14 bytes, i64 version.
+define void @f30(i8 *%dest) {
+; CHECK-LABEL: f30:
+; CHECK: xc 0(14,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 14, i32 1, i1 false)
+  ret void
+}
+
+; 15 bytes, i32 version.
+define void @f31(i8 *%dest) {
+; CHECK-LABEL: f31:
+; CHECK: xc 0(15,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 15, i32 1, i1 false)
+  ret void
+}
+
+; 15 bytes, i64 version.
+define void @f32(i8 *%dest) {
+; CHECK-LABEL: f32:
+; CHECK: xc 0(15,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 15, i32 1, i1 false)
+  ret void
+}
+
+; 16 bytes, i32 version.
+define void @f33(i8 *%dest) {
+; CHECK-LABEL: f33:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvghi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 16, i32 1, i1 false)
+  ret void
+}
+
+; 16 bytes, i64 version.
+define void @f34(i8 *%dest) {
+; CHECK-LABEL: f34:
+; CHECK-DAG: mvghi 0(%r2), 0
+; CHECK-DAG: mvghi 8(%r2), 0
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 16, i32 1, i1 false)
+  ret void
+}
+
+; 17 bytes, i32 version.
+define void @f35(i8 *%dest) {
+; CHECK-LABEL: f35:
+; CHECK: xc 0(17,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 17, i32 1, i1 false)
+  ret void
+}
+
+; 17 bytes, i64 version.
+define void @f36(i8 *%dest) {
+; CHECK-LABEL: f36:
+; CHECK: xc 0(17,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 17, i32 1, i1 false)
+  ret void
+}
+
+; 256 bytes, i32 version.
+define void @f37(i8 *%dest) {
+; CHECK-LABEL: f37:
+; CHECK: xc 0(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 256, i32 1, i1 false)
+  ret void
+}
+
+; 256 bytes, i64 version.
+define void @f38(i8 *%dest) {
+; CHECK-LABEL: f38:
+; CHECK: xc 0(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 256, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i32 version.  We need two MVCs.
+define void @f39(i8 *%dest) {
+; CHECK-LABEL: f39:
+; CHECK: xc 0(256,%r2), 0(%r2)
+; CHECK: xc 256(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 257, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i64 version.
+define void @f40(i8 *%dest) {
+; CHECK-LABEL: f40:
+; CHECK: xc 0(256,%r2), 0(%r2)
+; CHECK: xc 256(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 257, i32 1, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/memset-04.ll b/test/CodeGen/SystemZ/memset-04.ll
new file mode 100644
index 000000000000..7906e8d10a1f
--- /dev/null
+++ b/test/CodeGen/SystemZ/memset-04.ll
@@ -0,0 +1,398 @@
+; Test memsets that set all bits.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.memset.p0i8.i32(i8 *nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8 *nocapture, i8, i64, i32, i1) nounwind
+
+; No bytes, i32 version.
+define void @f1(i8 *%dest) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 0, i32 1, i1 false)
+  ret void
+}
+
+; No bytes, i64 version.
+define void @f2(i8 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 0, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i32 version.
+define void @f3(i8 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: mvi 0(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 1, i32 1, i1 false)
+  ret void
+}
+
+; 1 byte, i64 version.
+define void @f4(i8 *%dest) {
+; CHECK-LABEL: f4:
+; CHECK: mvi 0(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 1, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i32 version.
+define void @f5(i8 *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: mvhhi 0(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 2, i32 1, i1 false)
+  ret void
+}
+
+; 2 bytes, i64 version.
+define void @f6(i8 *%dest) {
+; CHECK-LABEL: f6:
+; CHECK: mvhhi 0(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 2, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i32 version.
+define void @f7(i8 *%dest) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: mvhhi 0(%r2), -1
+; CHECK-DAG: mvi 2(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 3, i32 1, i1 false)
+  ret void
+}
+
+; 3 bytes, i64 version.
+define void @f8(i8 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK-DAG: mvhhi 0(%r2), -1
+; CHECK-DAG: mvi 2(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 3, i32 1, i1 false)
+  ret void
+}
+
+; 4 bytes, i32 version.
+define void @f9(i8 *%dest) {
+; CHECK-LABEL: f9:
+; CHECK: mvhi 0(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 4, i32 1, i1 false)
+  ret void
+}
+
+; 4 bytes, i64 version.
+define void @f10(i8 *%dest) {
+; CHECK-LABEL: f10:
+; CHECK: mvhi 0(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 4, i32 1, i1 false)
+  ret void
+}
+
+; 5 bytes, i32 version.
+define void @f11(i8 *%dest) {
+; CHECK-LABEL: f11:
+; CHECK-DAG: mvhi 0(%r2), -1
+; CHECK-DAG: mvi 4(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 5, i32 1, i1 false)
+  ret void
+}
+
+; 5 bytes, i64 version.
+define void @f12(i8 *%dest) {
+; CHECK-LABEL: f12:
+; CHECK-DAG: mvhi 0(%r2), -1
+; CHECK-DAG: mvi 4(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 5, i32 1, i1 false)
+  ret void
+}
+
+; 6 bytes, i32 version.
+define void @f13(i8 *%dest) {
+; CHECK-LABEL: f13:
+; CHECK-DAG: mvhi 0(%r2), -1
+; CHECK-DAG: mvhhi 4(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 6, i32 1, i1 false)
+  ret void
+}
+
+; 6 bytes, i64 version.
+define void @f14(i8 *%dest) {
+; CHECK-LABEL: f14:
+; CHECK-DAG: mvhi 0(%r2), -1
+; CHECK-DAG: mvhhi 4(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 6, i32 1, i1 false)
+  ret void
+}
+
+; 7 bytes, i32 version.
+define void @f15(i8 *%dest) {
+; CHECK-LABEL: f15:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(6,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 7, i32 1, i1 false)
+  ret void
+}
+
+; 7 bytes, i64 version.
+define void @f16(i8 *%dest) {
+; CHECK-LABEL: f16:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(6,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 7, i32 1, i1 false)
+  ret void
+}
+
+; 8 bytes, i32 version.
+define void @f17(i8 *%dest) {
+; CHECK-LABEL: f17:
+; CHECK: mvghi 0(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 8, i32 1, i1 false)
+  ret void
+}
+
+; 8 bytes, i64 version.
+define void @f18(i8 *%dest) {
+; CHECK-LABEL: f18:
+; CHECK: mvghi 0(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 8, i32 1, i1 false)
+  ret void
+}
+
+; 9 bytes, i32 version.
+define void @f19(i8 *%dest) {
+; CHECK-LABEL: f19:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvi 8(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 9, i32 1, i1 false)
+  ret void
+}
+
+; 9 bytes, i64 version.
+define void @f20(i8 *%dest) {
+; CHECK-LABEL: f20:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvi 8(%r2), 255
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 9, i32 1, i1 false)
+  ret void
+}
+
+; 10 bytes, i32 version.
+define void @f21(i8 *%dest) {
+; CHECK-LABEL: f21:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvhhi 8(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 10, i32 1, i1 false)
+  ret void
+}
+
+; 10 bytes, i64 version.
+define void @f22(i8 *%dest) {
+; CHECK-LABEL: f22:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvhhi 8(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 10, i32 1, i1 false)
+  ret void
+}
+
+; 11 bytes, i32 version.
+define void @f23(i8 *%dest) {
+; CHECK-LABEL: f23:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(10,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 11, i32 1, i1 false)
+  ret void
+}
+
+; 11 bytes, i64 version.
+define void @f24(i8 *%dest) {
+; CHECK-LABEL: f24:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(10,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 11, i32 1, i1 false)
+  ret void
+}
+
+; 12 bytes, i32 version.
+define void @f25(i8 *%dest) {
+; CHECK-LABEL: f25:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvhi 8(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 12, i32 1, i1 false)
+  ret void
+}
+
+; 12 bytes, i64 version.
+define void @f26(i8 *%dest) {
+; CHECK-LABEL: f26:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvhi 8(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 12, i32 1, i1 false)
+  ret void
+}
+
+; 13 bytes, i32 version.
+define void @f27(i8 *%dest) {
+; CHECK-LABEL: f27:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(12,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 13, i32 1, i1 false)
+  ret void
+}
+
+; 13 bytes, i64 version.
+define void @f28(i8 *%dest) {
+; CHECK-LABEL: f28:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(12,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 13, i32 1, i1 false)
+  ret void
+}
+
+; 14 bytes, i32 version.
+define void @f29(i8 *%dest) {
+; CHECK-LABEL: f29:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(13,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 14, i32 1, i1 false)
+  ret void
+}
+
+; 14 bytes, i64 version.
+define void @f30(i8 *%dest) {
+; CHECK-LABEL: f30:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(13,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 14, i32 1, i1 false)
+  ret void
+}
+
+; 15 bytes, i32 version.
+define void @f31(i8 *%dest) {
+; CHECK-LABEL: f31:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(14,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 15, i32 1, i1 false)
+  ret void
+}
+
+; 15 bytes, i64 version.
+define void @f32(i8 *%dest) {
+; CHECK-LABEL: f32:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(14,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 15, i32 1, i1 false)
+  ret void
+}
+
+; 16 bytes, i32 version.
+define void @f33(i8 *%dest) {
+; CHECK-LABEL: f33:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvghi 8(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 16, i32 1, i1 false)
+  ret void
+}
+
+; 16 bytes, i64 version.
+define void @f34(i8 *%dest) {
+; CHECK-LABEL: f34:
+; CHECK-DAG: mvghi 0(%r2), -1
+; CHECK-DAG: mvghi 8(%r2), -1
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 16, i32 1, i1 false)
+  ret void
+}
+
+; 17 bytes, i32 version.
+define void @f35(i8 *%dest) {
+; CHECK-LABEL: f35:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(16,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 17, i32 1, i1 false)
+  ret void
+}
+
+; 17 bytes, i64 version.
+define void @f36(i8 *%dest) {
+; CHECK-LABEL: f36:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(16,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 17, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i32 version.
+define void @f37(i8 *%dest) {
+; CHECK-LABEL: f37:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 257, i32 1, i1 false)
+  ret void
+}
+
+; 257 bytes, i64 version.
+define void @f38(i8 *%dest) {
+; CHECK-LABEL: f38:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 257, i32 1, i1 false)
+  ret void
+}
+
+; 258 bytes, i32 version.  We need two MVCs.
+define void @f39(i8 *%dest) {
+; CHECK-LABEL: f39:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 258, i32 1, i1 false)
+  ret void
+}
+
+; 258 bytes, i64 version.
+define void @f40(i8 *%dest) {
+; CHECK-LABEL: f40:
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
+; CHECK: br %r14
+  call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 258, i32 1, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/or-01.ll b/test/CodeGen/SystemZ/or-01.ll
index 20c93129efca..23946d320678 100644
--- a/test/CodeGen/SystemZ/or-01.ll
+++ b/test/CodeGen/SystemZ/or-01.ll
@@ -1,10 +1,13 @@
 ; Test 32-bit ORs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @foo()
 
 ; Check OR.
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: or %r2, %r3
 ; CHECK: br %r14
   %or = or i32 %a, %b
@@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) {
 
 ; Check the low end of the O range.
 define i32 @f2(i32 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: o %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned O range.
 define i32 @f3(i32 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: o %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) {
 
 ; Check the next word up, which should use OY instead of O.
 define i32 @f4(i32 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned OY range.
 define i32 @f5(i32 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: o %r2, 0(%r3)
 ; CHECK: br %r14
@@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned OY range.
 define i32 @f7(i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) {
 
 ; Check the low end of the OY range.
 define i32 @f8(i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: oy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: o %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) {
 
 ; Check that O allows an index.
 define i32 @f10(i32 %a, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: o %r2, 4092({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
 
 ; Check that OY allows an index.
 define i32 @f11(i32 %a, i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: oy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
   %or = or i32 %a, %b
   ret i32 %or
 }
+
+; Check that ORs of spilled values can use O rather than OR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: o %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %or0 = or i32 %ret, %val0
+  %or1 = or i32 %or0, %val1
+  %or2 = or i32 %or1, %val2
+  %or3 = or i32 %or2, %val3
+  %or4 = or i32 %or3, %val4
+  %or5 = or i32 %or4, %val5
+  %or6 = or i32 %or5, %val6
+  %or7 = or i32 %or6, %val7
+  %or8 = or i32 %or7, %val8
+  %or9 = or i32 %or8, %val9
+
+  ret i32 %or9
+}
diff --git a/test/CodeGen/SystemZ/or-02.ll b/test/CodeGen/SystemZ/or-02.ll
index 377a3e604c60..267be2089e49 100644
--- a/test/CodeGen/SystemZ/or-02.ll
+++ b/test/CodeGen/SystemZ/or-02.ll
@@ -4,7 +4,7 @@
 
 ; Check the lowest useful OILL value.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: oill %r2, 1
 ; CHECK: br %r14
   %or = or i32 %a, 1
@@ -13,7 +13,7 @@ define i32 @f1(i32 %a) {
 
 ; Check the high end of the OILL range.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: oill %r2, 65535
 ; CHECK: br %r14
   %or = or i32 %a, 65535
@@ -22,7 +22,7 @@ define i32 @f2(i32 %a) {
 
 ; Check the lowest useful OILH range, which is the next value up.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oilh %r2, 1
 ; CHECK: br %r14
   %or = or i32 %a, 65536
@@ -31,7 +31,7 @@ define i32 @f3(i32 %a) {
 
 ; Check the lowest useful OILF value, which is the next value up again.
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oilf %r2, 65537
 ; CHECK: br %r14
   %or = or i32 %a, 65537
@@ -40,7 +40,7 @@ define i32 @f4(i32 %a) {
 
 ; Check the high end of the OILH range.
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oilh %r2, 65535
 ; CHECK: br %r14
   %or = or i32 %a, -65536
@@ -49,7 +49,7 @@ define i32 @f5(i32 %a) {
 
 ; Check the next value up, which must use OILF instead.
 define i32 @f6(i32 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oilf %r2, 4294901761
 ; CHECK: br %r14
   %or = or i32 %a, -65535
@@ -58,7 +58,7 @@ define i32 @f6(i32 %a) {
 
 ; Check the highest useful OILF value.
 define i32 @f7(i32 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oilf %r2, 4294967294
 ; CHECK: br %r14
   %or = or i32 %a, -2
diff --git a/test/CodeGen/SystemZ/or-03.ll b/test/CodeGen/SystemZ/or-03.ll
index 16f84f1635a8..5fdbdfd1ed1f 100644
--- a/test/CodeGen/SystemZ/or-03.ll
+++ b/test/CodeGen/SystemZ/or-03.ll
@@ -1,10 +1,13 @@
 ; Test 64-bit ORs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i64 @foo()
 
 ; Check OGR.
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ogr %r2, %r3
 ; CHECK: br %r14
   %or = or i64 %a, %b
@@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) {
 
 ; Check OG with no displacement.
 define i64 @f2(i64 %a, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: og %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i64 *%src
@@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) {
 
 ; Check the high end of the aligned OG range.
 define i64 @f3(i64 %a, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: og %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: og %r2, 0(%r3)
 ; CHECK: br %r14
@@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned OG range.
 define i64 @f5(i64 %a, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: og %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) {
 
 ; Check the low end of the OG range.
 define i64 @f6(i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: og %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: og %r2, 0(%r3)
 ; CHECK: br %r14
@@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) {
 
 ; Check that OG allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: og %r2, 524280({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %or = or i64 %a, %b
   ret i64 %or
 }
+
+; Check that ORs of spilled values can use OG rather than OGR.
+define i64 @f9(i64 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: og %r2, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %or0 = or i64 %ret, %val0
+  %or1 = or i64 %or0, %val1
+  %or2 = or i64 %or1, %val2
+  %or3 = or i64 %or2, %val3
+  %or4 = or i64 %or3, %val4
+  %or5 = or i64 %or4, %val5
+  %or6 = or i64 %or5, %val6
+  %or7 = or i64 %or6, %val7
+  %or8 = or i64 %or7, %val8
+  %or9 = or i64 %or8, %val9
+
+  ret i64 %or9
+}
diff --git a/test/CodeGen/SystemZ/or-04.ll b/test/CodeGen/SystemZ/or-04.ll
index a8278423981a..87a30d564549 100644
--- a/test/CodeGen/SystemZ/or-04.ll
+++ b/test/CodeGen/SystemZ/or-04.ll
@@ -4,7 +4,7 @@
 
 ; Check the lowest useful OILL value.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: oill %r2, 1
 ; CHECK: br %r14
   %or = or i64 %a, 1
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the OILL range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: oill %r2, 65535
 ; CHECK: br %r14
   %or = or i64 %a, 65535
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
 
 ; Check the lowest useful OILH value, which is the next value up.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oilh %r2, 1
 ; CHECK: br %r14
   %or = or i64 %a, 65536
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
 
 ; Check the lowest useful OILF value, which is the next value up again.
 define i64 @f4(i64 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oilf %r2, 4294901759
 ; CHECK: br %r14
   %or = or i64 %a, 4294901759
@@ -40,7 +40,7 @@ define i64 @f4(i64 %a) {
 
 ; Check the high end of the OILH range.
 define i64 @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oilh %r2, 65535
 ; CHECK: br %r14
   %or = or i64 %a, 4294901760
@@ -49,7 +49,7 @@ define i64 @f5(i64 %a) {
 
 ; Check the high end of the OILF range.
 define i64 @f6(i64 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oilf %r2, 4294967295
 ; CHECK: br %r14
   %or = or i64 %a, 4294967295
@@ -58,7 +58,7 @@ define i64 @f6(i64 %a) {
 
 ; Check the lowest useful OIHL value, which is the next value up.
 define i64 @f7(i64 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oihl %r2, 1
 ; CHECK: br %r14
   %or = or i64 %a, 4294967296
@@ -67,7 +67,7 @@ define i64 @f7(i64 %a) {
 
 ; Check the next value up again, which must use two ORs.
 define i64 @f8(i64 %a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: oihl %r2, 1
 ; CHECK: oill %r2, 1
 ; CHECK: br %r14
@@ -77,7 +77,7 @@ define i64 @f8(i64 %a) {
 
 ; Check the high end of the OILL range.
 define i64 @f9(i64 %a) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: oihl %r2, 1
 ; CHECK: oill %r2, 65535
 ; CHECK: br %r14
@@ -87,7 +87,7 @@ define i64 @f9(i64 %a) {
 
 ; Check the next value up, which must use OILH
 define i64 @f10(i64 %a) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: oihl %r2, 1
 ; CHECK: oilh %r2, 1
 ; CHECK: br %r14
@@ -97,7 +97,7 @@ define i64 @f10(i64 %a) {
 
 ; Check the next value up again, which must use OILF
 define i64 @f11(i64 %a) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: oihl %r2, 1
 ; CHECK: oilf %r2, 65537
 ; CHECK: br %r14
@@ -107,7 +107,7 @@ define i64 @f11(i64 %a) {
 
 ; Check the high end of the OIHL range.
 define i64 @f12(i64 %a) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: oihl %r2, 65535
 ; CHECK: br %r14
   %or = or i64 %a, 281470681743360
@@ -117,7 +117,7 @@ define i64 @f12(i64 %a) {
 ; Check a combination of the high end of the OIHL range and the high end
 ; of the OILF range.
 define i64 @f13(i64 %a) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: oihl %r2, 65535
 ; CHECK: oilf %r2, 4294967295
 ; CHECK: br %r14
@@ -127,7 +127,7 @@ define i64 @f13(i64 %a) {
 
 ; Check the lowest useful OIHH value.
 define i64 @f14(i64 %a) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: oihh %r2, 1
 ; CHECK: br %r14
   %or = or i64 %a, 281474976710656
@@ -136,7 +136,7 @@ define i64 @f14(i64 %a) {
 
 ; Check the next value up, which needs two ORs.
 define i64 @f15(i64 %a) {
-; CHECK: f15:
+; CHECK-LABEL: f15:
 ; CHECK: oihh %r2, 1
 ; CHECK: oill %r2, 1
 ; CHECK: br %r14
@@ -146,7 +146,7 @@ define i64 @f15(i64 %a) {
 
 ; Check the lowest useful OIHF value.
 define i64 @f16(i64 %a) {
-; CHECK: f16:
+; CHECK-LABEL: f16:
 ; CHECK: oihf %r2, 65537
 ; CHECK: br %r14
   %or = or i64 %a, 281479271677952
@@ -155,7 +155,7 @@ define i64 @f16(i64 %a) {
 
 ; Check the high end of the OIHH range.
 define i64 @f17(i64 %a) {
-; CHECK: f17:
+; CHECK-LABEL: f17:
 ; CHECK: oihh %r2, 65535
 ; CHECK: br %r14
   %or = or i64 %a, 18446462598732840960
@@ -164,7 +164,7 @@ define i64 @f17(i64 %a) {
 
 ; Check the high end of the OIHF range.
 define i64 @f18(i64 %a) {
-; CHECK: f18:
+; CHECK-LABEL: f18:
 ; CHECK: oihf %r2, 4294967295
 ; CHECK: br %r14
   %or = or i64 %a, -4294967296
@@ -173,7 +173,7 @@ define i64 @f18(i64 %a) {
 
 ; Check the highest useful OR value.
 define i64 @f19(i64 %a) {
-; CHECK: f19:
+; CHECK-LABEL: f19:
 ; CHECK: oihf %r2, 4294967295
 ; CHECK: oilf %r2, 4294967294
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/or-05.ll b/test/CodeGen/SystemZ/or-05.ll
index 9b6c10d4b5ce..d90589128674 100644
--- a/test/CodeGen/SystemZ/or-05.ll
+++ b/test/CodeGen/SystemZ/or-05.ll
@@ -4,7 +4,7 @@
 
 ; Check the lowest useful constant, expressed as a signed integer.
 define void @f1(i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: oi 0(%r2), 1
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -15,7 +15,7 @@ define void @f1(i8 *%ptr) {
 
 ; Check the highest useful constant, expressed as a signed integer.
 define void @f2(i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -26,7 +26,7 @@ define void @f2(i8 *%ptr) {
 
 ; Check the lowest useful constant, expressed as an unsigned integer.
 define void @f3(i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oi 0(%r2), 1
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -37,7 +37,7 @@ define void @f3(i8 *%ptr) {
 
 ; Check the highest useful constant, expressed as a unsigned integer.
 define void @f4(i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -48,7 +48,7 @@ define void @f4(i8 *%ptr) {
 
 ; Check the high end of the OI range.
 define void @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oi 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4095
@@ -60,7 +60,7 @@ define void @f5(i8 *%src) {
 
 ; Check the next byte up, which should use OIY instead of OI.
 define void @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oiy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4096
@@ -72,7 +72,7 @@ define void @f6(i8 *%src) {
 
 ; Check the high end of the OIY range.
 define void @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oiy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -85,7 +85,7 @@ define void @f7(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, 524288
 ; CHECK: oi 0(%r2), 127
 ; CHECK: br %r14
@@ -98,7 +98,7 @@ define void @f8(i8 *%src) {
 
 ; Check the high end of the negative OIY range.
 define void @f9(i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: oiy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -110,7 +110,7 @@ define void @f9(i8 *%src) {
 
 ; Check the low end of the OIY range.
 define void @f10(i8 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: oiy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -123,7 +123,7 @@ define void @f10(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f11(i8 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r2, -524289
 ; CHECK: oi 0(%r2), 127
 ; CHECK: br %r14
@@ -136,7 +136,7 @@ define void @f11(i8 *%src) {
 
 ; Check that OI does not allow an index
 define void @f12(i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: agr %r2, %r3
 ; CHECK: oi 4095(%r2), 127
 ; CHECK: br %r14
@@ -151,7 +151,7 @@ define void @f12(i64 %src, i64 %index) {
 
 ; Check that OIY does not allow an index
 define void @f13(i64 %src, i64 %index) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: agr %r2, %r3
 ; CHECK: oiy 4096(%r2), 127
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/or-06.ll b/test/CodeGen/SystemZ/or-06.ll
index a24a18a191f1..0a865d350942 100644
--- a/test/CodeGen/SystemZ/or-06.ll
+++ b/test/CodeGen/SystemZ/or-06.ll
@@ -5,7 +5,7 @@
 
 ; Zero extension to 32 bits, negative constant.
 define void @f1(i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -18,7 +18,7 @@ define void @f1(i8 *%ptr) {
 
 ; Zero extension to 64 bits, negative constant.
 define void @f2(i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -31,7 +31,7 @@ define void @f2(i8 *%ptr) {
 
 ; Zero extension to 32 bits, positive constant.
 define void @f3(i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -44,7 +44,7 @@ define void @f3(i8 *%ptr) {
 
 ; Zero extension to 64 bits, positive constant.
 define void @f4(i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -57,7 +57,7 @@ define void @f4(i8 *%ptr) {
 
 ; Sign extension to 32 bits, negative constant.
 define void @f5(i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -70,7 +70,7 @@ define void @f5(i8 *%ptr) {
 
 ; Sign extension to 64 bits, negative constant.
 define void @f6(i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -83,7 +83,7 @@ define void @f6(i8 *%ptr) {
 
 ; Sign extension to 32 bits, positive constant.
 define void @f7(i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -96,7 +96,7 @@ define void @f7(i8 *%ptr) {
 
 ; Sign extension to 64 bits, positive constant.
 define void @f8(i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
diff --git a/test/CodeGen/SystemZ/or-07.ll b/test/CodeGen/SystemZ/or-07.ll
new file mode 100644
index 000000000000..9fff88e71657
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-07.ll
@@ -0,0 +1,39 @@
+; Test the three-operand forms of OR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check XRK.
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f1:
+; CHECK: ork %r2, %r3, %r4
+; CHECK: br %r14
+  %or = or i32 %b, %c
+  ret i32 %or
+}
+
+; Check that we can still use OR in obvious cases.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check OGRK.
+define i64 @f3(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: f3:
+; CHECK: ogrk %r2, %r3, %r4
+; CHECK: br %r14
+  %or = or i64 %b, %c
+  ret i64 %or
+}
+
+; Check that we can still use OGR in obvious cases.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %or = or i64 %a, %b
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/or-08.ll b/test/CodeGen/SystemZ/or-08.ll
new file mode 100644
index 000000000000..8f5bf3170bed
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-08.ll
@@ -0,0 +1,57 @@
+; Test memory-to-memory ORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the simple i8 case.
+define void @f1(i8 *%ptr1) {
+; CHECK-LABEL: f1:
+; CHECK: oc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %or = or i8 %val, %old
+  store i8 %or, i8 *%ptr2
+  ret void
+}
+
+; Test the simple i16 case.
+define void @f2(i16 *%ptr1) {
+; CHECK-LABEL: f2:
+; CHECK: oc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %old = load i16 *%ptr2
+  %or = or i16 %val, %old
+  store i16 %or, i16 *%ptr2
+  ret void
+}
+
+; Test the simple i32 case.
+define void @f3(i32 *%ptr1) {
+; CHECK-LABEL: f3:
+; CHECK: oc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %old = load i32 *%ptr2
+  %or = or i32 %old, %val
+  store i32 %or, i32 *%ptr2
+  ret void
+}
+
+; Test the i64 case.
+define void @f4(i64 *%ptr1) {
+; CHECK-LABEL: f4:
+; CHECK: oc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %or = or i64 %old, %val
+  store i64 %or, i64 *%ptr2
+  ret void
+}
+
+; Leave other more complicated tests to and-08.ll.
diff --git a/test/CodeGen/SystemZ/prefetch-01.ll b/test/CodeGen/SystemZ/prefetch-01.ll
new file mode 100644
index 000000000000..bb7fea99ca7c
--- /dev/null
+++ b/test/CodeGen/SystemZ/prefetch-01.ll
@@ -0,0 +1,87 @@
+; Test data prefetching.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+@g = global [4096 x i8] zeroinitializer
+
+; Check that instruction read prefetches are ignored.
+define void @f1(i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0)
+  ret void
+}
+
+; Check that instruction write prefetches are ignored.
+define void @f2(i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 0)
+  ret void
+}
+
+; Check data read prefetches.
+define void @f3(i8 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: pfd 1, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 1)
+  ret void
+}
+
+; Check data write prefetches.
+define void @f4(i8 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: pfd 2, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check an address at the negative end of the range.
+define void @f5(i8 *%base, i64 %index) {
+; CHECK-LABEL: f5:
+; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+  %add = add i64 %index, -524288
+  %ptr = getelementptr i8 *%base, i64 %add
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check an address at the positive end of the range.
+define void @f6(i8 *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+  %add = add i64 %index, 524287
+  %ptr = getelementptr i8 *%base, i64 %add
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check that the next address up still compiles.
+define void @f7(i8 *%base, i64 %index) {
+; CHECK-LABEL: f7:
+; CHECK: 524288
+; CHECK: pfd 2,
+; CHECK: br %r14
+  %add = add i64 %index, 524288
+  %ptr = getelementptr i8 *%base, i64 %add
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
+
+; Check pc-relative prefetches.
+define void @f8() {
+; CHECK-LABEL: f8:
+; CHECK: pfdrl 2, g
+; CHECK: br %r14
+  %ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0
+  call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/risbg-01.ll b/test/CodeGen/SystemZ/risbg-01.ll
new file mode 100644
index 000000000000..a4d11fdae5b9
--- /dev/null
+++ b/test/CodeGen/SystemZ/risbg-01.ll
@@ -0,0 +1,472 @@
+; Test sequences that can use RISBG with a zeroed first operand.
+; The tests here assume that RISBLG isn't available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test an extraction of bit 0 from a right-shifted value.
+define i32 @f1(i32 %foo) {
+; CHECK-LABEL: f1:
+; CHECK: risbg %r2, %r2, 63, 191, 54
+; CHECK: br %r14
+  %shr = lshr i32 %foo, 10
+  %and = and i32 %shr, 1
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f2(i64 %foo) {
+; CHECK-LABEL: f2:
+; CHECK: risbg %r2, %r2, 63, 191, 54
+; CHECK: br %r14
+  %shr = lshr i64 %foo, 10
+  %and = and i64 %shr, 1
+  ret i64 %and
+}
+
+; Test an extraction of other bits from a right-shifted value.
+define i32 @f3(i32 %foo) {
+; CHECK-LABEL: f3:
+; CHECK: risbg %r2, %r2, 60, 189, 42
+; CHECK: br %r14
+  %shr = lshr i32 %foo, 22
+  %and = and i32 %shr, 12
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f4(i64 %foo) {
+; CHECK-LABEL: f4:
+; CHECK: risbg %r2, %r2, 60, 189, 42
+; CHECK: br %r14
+  %shr = lshr i64 %foo, 22
+  %and = and i64 %shr, 12
+  ret i64 %and
+}
+
+; Test an extraction of most bits from a right-shifted value.
+; The range should be reduced to exclude the zeroed high bits.
+define i32 @f5(i32 %foo) {
+; CHECK-LABEL: f5:
+; CHECK: risbg %r2, %r2, 34, 188, 62
+; CHECK: br %r14
+  %shr = lshr i32 %foo, 2
+  %and = and i32 %shr, -8
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f6(i64 %foo) {
+; CHECK-LABEL: f6:
+; CHECK: risbg %r2, %r2, 2, 188, 62
+; CHECK: br %r14
+  %shr = lshr i64 %foo, 2
+  %and = and i64 %shr, -8
+  ret i64 %and
+}
+
+; Try the next value up (mask ....1111001).  This needs a separate shift
+; and mask.
+define i32 @f7(i32 %foo) {
+; CHECK-LABEL: f7:
+; CHECK: srl %r2, 2
+; CHECK: nill %r2, 65529
+; CHECK: br %r14
+  %shr = lshr i32 %foo, 2
+  %and = and i32 %shr, -7
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f8(i64 %foo) {
+; CHECK-LABEL: f8:
+; CHECK: srlg %r2, %r2, 2
+; CHECK: nill %r2, 65529
+; CHECK: br %r14
+  %shr = lshr i64 %foo, 2
+  %and = and i64 %shr, -7
+  ret i64 %and
+}
+
+; Test an extraction of bits from a left-shifted value.  The range should
+; be reduced to exclude the zeroed low bits.
+define i32 @f9(i32 %foo) {
+; CHECK-LABEL: f9:
+; CHECK: risbg %r2, %r2, 56, 189, 2
+; CHECK: br %r14
+  %shr = shl i32 %foo, 2
+  %and = and i32 %shr, 255
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f10(i64 %foo) {
+; CHECK-LABEL: f10:
+; CHECK: risbg %r2, %r2, 56, 189, 2
+; CHECK: br %r14
+  %shr = shl i64 %foo, 2
+  %and = and i64 %shr, 255
+  ret i64 %and
+}
+
+; Try a wrap-around mask (mask ....111100001111).  This needs a separate shift
+; and mask.
+define i32 @f11(i32 %foo) {
+; CHECK-LABEL: f11:
+; CHECK: sll %r2, 2
+; CHECK: nill %r2, 65295
+; CHECK: br %r14
+  %shr = shl i32 %foo, 2
+  %and = and i32 %shr, -241
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f12(i64 %foo) {
+; CHECK-LABEL: f12:
+; CHECK: sllg %r2, %r2, 2
+; CHECK: nill %r2, 65295
+; CHECK: br %r14
+  %shr = shl i64 %foo, 2
+  %and = and i64 %shr, -241
+  ret i64 %and
+}
+
+; Test an extraction from a rotated value, no mask wraparound.
+; This is equivalent to the lshr case, because the bits from the
+; shl are not used.
+define i32 @f13(i32 %foo) {
+; CHECK-LABEL: f13:
+; CHECK: risbg %r2, %r2, 56, 188, 46
+; CHECK: br %r14
+  %parta = shl i32 %foo, 14
+  %partb = lshr i32 %foo, 18
+  %rotl = or i32 %parta, %partb
+  %and = and i32 %rotl, 248
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f14(i64 %foo) {
+; CHECK-LABEL: f14:
+; CHECK: risbg %r2, %r2, 56, 188, 14
+; CHECK: br %r14
+  %parta = shl i64 %foo, 14
+  %partb = lshr i64 %foo, 50
+  %rotl = or i64 %parta, %partb
+  %and = and i64 %rotl, 248
+  ret i64 %and
+}
+
+; Try a case in which only the bits from the shl are used.
+define i32 @f15(i32 %foo) {
+; CHECK-LABEL: f15:
+; CHECK: risbg %r2, %r2, 47, 177, 14
+; CHECK: br %r14
+  %parta = shl i32 %foo, 14
+  %partb = lshr i32 %foo, 18
+  %rotl = or i32 %parta, %partb
+  %and = and i32 %rotl, 114688
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f16(i64 %foo) {
+; CHECK-LABEL: f16:
+; CHECK: risbg %r2, %r2, 47, 177, 14
+; CHECK: br %r14
+  %parta = shl i64 %foo, 14
+  %partb = lshr i64 %foo, 50
+  %rotl = or i64 %parta, %partb
+  %and = and i64 %rotl, 114688
+  ret i64 %and
+}
+
+; Test a 32-bit rotate in which both parts of the OR are needed.
+; This needs a separate shift and mask.
+define i32 @f17(i32 %foo) {
+; CHECK-LABEL: f17:
+; CHECK: rll %r2, %r2, 4
+; CHECK: nilf %r2, 126
+; CHECK: br %r14
+  %parta = shl i32 %foo, 4
+  %partb = lshr i32 %foo, 28
+  %rotl = or i32 %parta, %partb
+  %and = and i32 %rotl, 126
+  ret i32 %and
+}
+
+; ...and for i64, where RISBG should do the rotate too.
+define i64 @f18(i64 %foo) {
+; CHECK-LABEL: f18:
+; CHECK: risbg %r2, %r2, 57, 190, 4
+; CHECK: br %r14
+  %parta = shl i64 %foo, 4
+  %partb = lshr i64 %foo, 60
+  %rotl = or i64 %parta, %partb
+  %and = and i64 %rotl, 126
+  ret i64 %and
+}
+
+; Test an arithmetic shift right in which some of the sign bits are kept.
+; This needs a separate shift and mask.
+define i32 @f19(i32 %foo) {
+; CHECK-LABEL: f19:
+; CHECK: sra %r2, 28
+; CHECK: nilf %r2, 30
+; CHECK: br %r14
+  %shr = ashr i32 %foo, 28
+  %and = and i32 %shr, 30
+  ret i32 %and
+}
+
+; ...and again with i64.  In this case RISBG is the best way of doing the AND.
+define i64 @f20(i64 %foo) {
+; CHECK-LABEL: f20:
+; CHECK: srag [[REG:%r[0-5]]], %r2, 60
+; CHECK: risbg %r2, [[REG]], 59, 190, 0
+; CHECK: br %r14
+  %shr = ashr i64 %foo, 60
+  %and = and i64 %shr, 30
+  ret i64 %and
+}
+
+; Now try an arithmetic right shift in which the sign bits aren't needed.
+; Introduce a second use of %shr so that the ashr doesn't decompose to
+; an lshr.
+define i32 @f21(i32 %foo, i32 *%dest) {
+; CHECK-LABEL: f21:
+; CHECK: risbg %r2, %r2, 60, 190, 36
+; CHECK: br %r14
+  %shr = ashr i32 %foo, 28
+  store i32 %shr, i32 *%dest
+  %and = and i32 %shr, 14
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f22(i64 %foo, i64 *%dest) {
+; CHECK-LABEL: f22:
+; CHECK: risbg %r2, %r2, 60, 190, 4
+; CHECK: br %r14
+  %shr = ashr i64 %foo, 60
+  store i64 %shr, i64 *%dest
+  %and = and i64 %shr, 14
+  ret i64 %and
+}
+
+; Check that we use RISBG for shifted values even if the AND is a
+; natural zero extension.
+define i64 @f23(i64 %foo) {
+; CHECK-LABEL: f23:
+; CHECK: risbg %r2, %r2, 56, 191, 62
+; CHECK: br %r14
+  %shr = lshr i64 %foo, 2
+  %and = and i64 %shr, 255
+  ret i64 %and
+}
+
+; Test a case where the AND comes before a rotate.  This needs a separate
+; mask and rotate.
+define i32 @f24(i32 %foo) {
+; CHECK-LABEL: f24:
+; CHECK: nilf %r2, 14
+; CHECK: rll %r2, %r2, 3
+; CHECK: br %r14
+  %and = and i32 %foo, 14
+  %parta = shl i32 %and, 3
+  %partb = lshr i32 %and, 29
+  %rotl = or i32 %parta, %partb
+  ret i32 %rotl
+}
+
+; ...and again with i64, where a single RISBG is enough.
+define i64 @f25(i64 %foo) {
+; CHECK-LABEL: f25:
+; CHECK: risbg %r2, %r2, 57, 187, 3
+; CHECK: br %r14
+  %and = and i64 %foo, 14
+  %parta = shl i64 %and, 3
+  %partb = lshr i64 %and, 61
+  %rotl = or i64 %parta, %partb
+  ret i64 %rotl
+}
+
+; Test a wrap-around case in which the AND comes before a rotate.
+; This again needs a separate mask and rotate.
+define i32 @f26(i32 %foo) {
+; CHECK-LABEL: f26:
+; CHECK: nill %r2, 65487
+; CHECK: rll %r2, %r2, 5
+; CHECK: br %r14
+  %and = and i32 %foo, -49
+  %parta = shl i32 %and, 5
+  %partb = lshr i32 %and, 27
+  %rotl = or i32 %parta, %partb
+  ret i32 %rotl
+}
+
+; ...and again with i64, where a single RISBG is OK.
+define i64 @f27(i64 %foo) {
+; CHECK-LABEL: f27:
+; CHECK: risbg %r2, %r2, 55, 180, 5
+; CHECK: br %r14
+  %and = and i64 %foo, -49
+  %parta = shl i64 %and, 5
+  %partb = lshr i64 %and, 59
+  %rotl = or i64 %parta, %partb
+  ret i64 %rotl
+}
+
+; Test a case where the AND comes before a shift left.
+define i32 @f28(i32 %foo) {
+; CHECK-LABEL: f28:
+; CHECK: risbg %r2, %r2, 32, 173, 17
+; CHECK: br %r14
+  %and = and i32 %foo, 32766
+  %shl = shl i32 %and, 17
+  ret i32 %shl
+}
+
+; ...and again with i64.
+define i64 @f29(i64 %foo) {
+; CHECK-LABEL: f29:
+; CHECK: risbg %r2, %r2, 0, 141, 49
+; CHECK: br %r14
+  %and = and i64 %foo, 32766
+  %shl = shl i64 %and, 49
+  ret i64 %shl
+}
+
+; Test the next shift up from f28, in which the mask should get shortened.
+define i32 @f30(i32 %foo) {
+; CHECK-LABEL: f30:
+; CHECK: risbg %r2, %r2, 32, 172, 18
+; CHECK: br %r14
+  %and = and i32 %foo, 32766
+  %shl = shl i32 %and, 18
+  ret i32 %shl
+}
+
+; ...and again with i64.
+define i64 @f31(i64 %foo) {
+; CHECK-LABEL: f31:
+; CHECK: risbg %r2, %r2, 0, 140, 50
+; CHECK: br %r14
+  %and = and i64 %foo, 32766
+  %shl = shl i64 %and, 50
+  ret i64 %shl
+}
+
+; Test a wrap-around case in which the shift left comes after the AND.
+; We can't use RISBG for the shift in that case.
+define i32 @f32(i32 %foo) {
+; CHECK-LABEL: f32:
+; CHECK: sll %r2
+; CHECK: br %r14
+  %and = and i32 %foo, -7
+  %shl = shl i32 %and, 10
+  ret i32 %shl
+}
+
+; ...and again with i64.
+define i64 @f33(i64 %foo) {
+; CHECK-LABEL: f33:
+; CHECK: sllg %r2
+; CHECK: br %r14
+  %and = and i64 %foo, -7
+  %shl = shl i64 %and, 10
+  ret i64 %shl
+}
+
+; Test a case where the AND comes before a shift right.
+define i32 @f34(i32 %foo) {
+; CHECK-LABEL: f34:
+; CHECK: risbg %r2, %r2, 57, 191, 55
+; CHECK: br %r14
+  %and = and i32 %foo, 65535
+  %shl = lshr i32 %and, 9
+  ret i32 %shl
+}
+
+; ...and again with i64.
+define i64 @f35(i64 %foo) {
+; CHECK-LABEL: f35:
+; CHECK: risbg %r2, %r2, 57, 191, 55
+; CHECK: br %r14
+  %and = and i64 %foo, 65535
+  %shl = lshr i64 %and, 9
+  ret i64 %shl
+}
+
+; Test a wrap-around case where the AND comes before a shift right.
+; We can't use RISBG for the shift in that case.
+define i32 @f36(i32 %foo) {
+; CHECK-LABEL: f36:
+; CHECK: srl %r2
+; CHECK: br %r14
+  %and = and i32 %foo, -25
+  %shl = lshr i32 %and, 1
+  ret i32 %shl
+}
+
+; ...and again with i64.
+define i64 @f37(i64 %foo) {
+; CHECK-LABEL: f37:
+; CHECK: srlg %r2
+; CHECK: br %r14
+  %and = and i64 %foo, -25
+  %shl = lshr i64 %and, 1
+  ret i64 %shl
+}
+
+; Test a combination involving a large ASHR and a shift left.  We can't
+; use RISBG there.
+define i64 @f38(i64 %foo) {
+; CHECK-LABEL: f38:
+; CHECK: srag {{%r[0-5]}}
+; CHECK: sllg {{%r[0-5]}}
+; CHECK: br %r14
+  %ashr = ashr i64 %foo, 32
+  %shl = shl i64 %ashr, 5
+  ret i64 %shl
+}
+
+; Try a similar thing in which no shifted sign bits are kept.
+define i64 @f39(i64 %foo, i64 *%dest) {
+; CHECK-LABEL: f39:
+; CHECK: srag [[REG:%r[01345]]], %r2, 35
+; CHECK: risbg %r2, %r2, 33, 189, 31
+; CHECK: br %r14
+  %ashr = ashr i64 %foo, 35
+  store i64 %ashr, i64 *%dest
+  %shl = shl i64 %ashr, 2
+  %and = and i64 %shl, 2147483647
+  ret i64 %and
+}
+
+; ...and again with the next highest shift value, where one sign bit is kept.
+define i64 @f40(i64 %foo, i64 *%dest) {
+; CHECK-LABEL: f40:
+; CHECK: srag [[REG:%r[01345]]], %r2, 36
+; CHECK: risbg %r2, [[REG]], 33, 189, 2
+; CHECK: br %r14
+  %ashr = ashr i64 %foo, 36
+  store i64 %ashr, i64 *%dest
+  %shl = shl i64 %ashr, 2
+  %and = and i64 %shl, 2147483647
+  ret i64 %and
+}
+
+; In this case the sign extension is converted to a pair of 32-bit shifts,
+; which is then extended to 64 bits.  We previously used the wrong bit size
+; when testing whether the shifted-in bits of the shift right were significant.
+define i64 @f41(i1 %x) {
+; CHECK-LABEL: f41:
+; CHECK: sll %r2, 31
+; CHECK: sra %r2, 31
+; CHECK: llgcr %r2, %r2
+; CHECK: br %r14
+  %ext = sext i1 %x to i8
+  %ext2 = zext i8 %ext to i64
+  ret i64 %ext2
+}
diff --git a/test/CodeGen/SystemZ/risbg-02.ll b/test/CodeGen/SystemZ/risbg-02.ll
new file mode 100644
index 000000000000..5ccfab028b02
--- /dev/null
+++ b/test/CodeGen/SystemZ/risbg-02.ll
@@ -0,0 +1,93 @@
+; Test sequences that can use RISBG with a normal first operand.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test a case with two ANDs.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: risbg %r2, %r3, 60, 62, 0
+; CHECK: br %r14
+  %anda = and i32 %a, -15
+  %andb = and i32 %b, 14
+  %or = or i32 %anda, %andb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: risbg %r2, %r3, 60, 62, 0
+; CHECK: br %r14
+  %anda = and i64 %a, -15
+  %andb = and i64 %b, 14
+  %or = or i64 %anda, %andb
+  ret i64 %or
+}
+
+; Test a case with two ANDs and a shift.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: risbg %r2, %r3, 60, 63, 56
+; CHECK: br %r14
+  %anda = and i32 %a, -16
+  %shr = lshr i32 %b, 8
+  %andb = and i32 %shr, 15
+  %or = or i32 %anda, %andb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: risbg %r2, %r3, 60, 63, 56
+; CHECK: br %r14
+  %anda = and i64 %a, -16
+  %shr = lshr i64 %b, 8
+  %andb = and i64 %shr, 15
+  %or = or i64 %anda, %andb
+  ret i64 %or
+}
+
+; Test a case with a single AND and a left shift.
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: risbg %r2, %r3, 32, 53, 10
+; CHECK: br %r14
+  %anda = and i32 %a, 1023
+  %shlb = shl i32 %b, 10
+  %or = or i32 %anda, %shlb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: risbg %r2, %r3, 0, 53, 10
+; CHECK: br %r14
+  %anda = and i64 %a, 1023
+  %shlb = shl i64 %b, 10
+  %or = or i64 %anda, %shlb
+  ret i64 %or
+}
+
+; Test a case with a single AND and a right shift.
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK-LABEL: f7:
+; CHECK: risbg %r2, %r3, 40, 63, 56
+; CHECK: br %r14
+  %anda = and i32 %a, -16777216
+  %shrb = lshr i32 %b, 8
+  %or = or i32 %anda, %shrb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f8(i64 %a, i64 %b) {
+; CHECK-LABEL: f8:
+; CHECK: risbg %r2, %r3, 8, 63, 56
+; CHECK: br %r14
+  %anda = and i64 %a, -72057594037927936
+  %shrb = lshr i64 %b, 8
+  %or = or i64 %anda, %shrb
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/rnsbg-01.ll b/test/CodeGen/SystemZ/rnsbg-01.ll
new file mode 100644
index 000000000000..666aeb21e8d8
--- /dev/null
+++ b/test/CodeGen/SystemZ/rnsbg-01.ll
@@ -0,0 +1,257 @@
+; Test sequences that can use RNSBG.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test a simple mask, which is a wrap-around case.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: rnsbg %r2, %r3, 59, 56, 0
+; CHECK: br %r14
+  %orb = or i32 %b, 96
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: rnsbg %r2, %r3, 59, 56, 0
+; CHECK: br %r14
+  %orb = or i64 %b, 96
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case where no wraparound is needed.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: rnsbg %r2, %r3, 58, 61, 0
+; CHECK: br %r14
+  %orb = or i32 %b, -61
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: rnsbg %r2, %r3, 58, 61, 0
+; CHECK: br %r14
+  %orb = or i64 %b, -61
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with just a left shift.  This can't use RNSBG.
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: sll {{%r[0-5]}}
+; CHECK: nr {{%r[0-5]}}
+; CHECK: br %r14
+  %shrb = shl i32 %b, 20
+  %and = and i32 %a, %shrb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f7(i64 %a, i64 %b) {
+; CHECK-LABEL: f7:
+; CHECK: sllg {{%r[0-5]}}
+; CHECK: ngr {{%r[0-5]}}
+; CHECK: br %r14
+  %shrb = shl i64 %b, 20
+  %and = and i64 %a, %shrb
+  ret i64 %and
+}
+
+; Test a case with just a rotate.  This can't use RNSBG.
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK-LABEL: f8:
+; CHECK: rll {{%r[0-5]}}
+; CHECK: nr {{%r[0-5]}}
+; CHECK: br %r14
+  %shlb = shl i32 %b, 22
+  %shrb = lshr i32 %b, 10
+  %rotlb = or i32 %shlb, %shrb
+  %and = and i32 %a, %rotlb
+  ret i32 %and
+}
+
+; ...and again with i64, which can.
+define i64 @f9(i64 %a, i64 %b) {
+; CHECK-LABEL: f9:
+; CHECK: rnsbg %r2, %r3, 0, 63, 44
+; CHECK: br %r14
+  %shlb = shl i64 %b, 44
+  %shrb = lshr i64 %b, 20
+  %rotlb = or i64 %shlb, %shrb
+  %and = and i64 %a, %rotlb
+  ret i64 %and
+}
+
+; Test a case with a left shift and OR, where the OR covers all shifted bits.
+; We can do the whole thing using RNSBG.
+define i32 @f10(i32 %a, i32 %b) {
+; CHECK-LABEL: f10:
+; CHECK: rnsbg %r2, %r3, 32, 56, 7
+; CHECK: br %r14
+  %shlb = shl i32 %b, 7
+  %orb = or i32 %shlb, 127
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f11(i64 %a, i64 %b) {
+; CHECK-LABEL: f11:
+; CHECK: rnsbg %r2, %r3, 0, 56, 7
+; CHECK: br %r14
+  %shlb = shl i64 %b, 7
+  %orb = or i64 %shlb, 127
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a left shift and OR, where the OR doesn't cover all
+; shifted bits.  We can't use RNSBG for the shift, but we can for the OR
+; and AND.
+define i32 @f12(i32 %a, i32 %b) {
+; CHECK-LABEL: f12:
+; CHECK: sll %r3, 7
+; CHECK: rnsbg %r2, %r3, 32, 57, 0
+; CHECK: br %r14
+  %shlb = shl i32 %b, 7
+  %orb = or i32 %shlb, 63
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f13(i64 %a, i64 %b) {
+; CHECK-LABEL: f13:
+; CHECK: sllg [[REG:%r[01345]]], %r3, 7
+; CHECK: rnsbg %r2, [[REG]], 0, 57, 0
+; CHECK: br %r14
+  %shlb = shl i64 %b, 7
+  %orb = or i64 %shlb, 63
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a right shift and OR, where the OR covers all the shifted
+; bits.  The whole thing can be done using RNSBG.
+define i32 @f14(i32 %a, i32 %b) {
+; CHECK-LABEL: f14:
+; CHECK: rnsbg %r2, %r3, 60, 63, 37
+; CHECK: br %r14
+  %shrb = lshr i32 %b, 27
+  %orb = or i32 %shrb, -16
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f15(i64 %a, i64 %b) {
+; CHECK-LABEL: f15:
+; CHECK: rnsbg %r2, %r3, 60, 63, 5
+; CHECK: br %r14
+  %shrb = lshr i64 %b, 59
+  %orb = or i64 %shrb, -16
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a right shift and OR, where the OR doesn't cover all the
+; shifted bits.  The shift needs to be done separately, but the OR and AND
+; can use RNSBG.
+define i32 @f16(i32 %a, i32 %b) {
+; CHECK-LABEL: f16:
+; CHECK: srl %r3, 29
+; CHECK: rnsbg %r2, %r3, 60, 63, 0
+; CHECK: br %r14
+  %shrb = lshr i32 %b, 29
+  %orb = or i32 %shrb, -16
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f17(i64 %a, i64 %b) {
+; CHECK-LABEL: f17:
+; CHECK: srlg [[REG:%r[01345]]], %r3, 61
+; CHECK: rnsbg %r2, [[REG]], 60, 63, 0
+; CHECK: br %r14
+  %shrb = lshr i64 %b, 61
+  %orb = or i64 %shrb, -16
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a combination involving an ASHR in which the sign bits matter.
+; We can't use RNSBG for the ASHR in that case, but we can for the rest.
+define i32 @f18(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f18:
+; CHECK: sra %r3, 4
+; CHECK: rnsbg %r2, %r3, 32, 62, 1
+; CHECK: br %r14
+  %ashrb = ashr i32 %b, 4
+  store i32 %ashrb, i32 *%dest
+  %shlb = shl i32 %ashrb, 1
+  %orb = or i32 %shlb, 1
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f19(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f19:
+; CHECK: srag [[REG:%r[0145]]], %r3, 34
+; CHECK: rnsbg %r2, [[REG]], 0, 62, 1
+; CHECK: br %r14
+  %ashrb = ashr i64 %b, 34
+  store i64 %ashrb, i64 *%dest
+  %shlb = shl i64 %ashrb, 1
+  %orb = or i64 %shlb, 1
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a combination involving an ASHR in which the sign bits don't matter.
+define i32 @f20(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f20:
+; CHECK: rnsbg %r2, %r3, 48, 62, 48
+; CHECK: br %r14
+  %ashrb = ashr i32 %b, 17
+  store i32 %ashrb, i32 *%dest
+  %shlb = shl i32 %ashrb, 1
+  %orb = or i32 %shlb, -65535
+  %and = and i32 %a, %orb
+  ret i32 %and
+}
+
+; ...and again with i64.
+define i64 @f21(i64 %a, i64 %b, i64 *%dest) {
+; CHECK-LABEL: f21:
+; CHECK: rnsbg %r2, %r3, 48, 62, 16
+; CHECK: br %r14
+  %ashrb = ashr i64 %b, 49
+  store i64 %ashrb, i64 *%dest
+  %shlb = shl i64 %ashrb, 1
+  %orb = or i64 %shlb, -65535
+  %and = and i64 %a, %orb
+  ret i64 %and
+}
+
+; Test a case with a shift, OR, and rotate where the OR covers all shifted bits.
+define i64 @f22(i64 %a, i64 %b) {
+; CHECK-LABEL: f22:
+; CHECK: rnsbg %r2, %r3, 60, 54, 9
+; CHECK: br %r14
+  %shlb = shl i64 %b, 5
+  %orb = or i64 %shlb, 31
+  %shlorb = shl i64 %orb, 4
+  %shrorb = lshr i64 %orb, 60
+  %rotlorb = or i64 %shlorb, %shrorb
+  %and = and i64 %a, %rotlorb
+  ret i64 %and
+}
diff --git a/test/CodeGen/SystemZ/rosbg-01.ll b/test/CodeGen/SystemZ/rosbg-01.ll
new file mode 100644
index 000000000000..0abacccba14c
--- /dev/null
+++ b/test/CodeGen/SystemZ/rosbg-01.ll
@@ -0,0 +1,110 @@
+; Test sequences that can use ROSBG.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the simple case.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: rosbg %r2, %r3, 59, 59, 0
+; CHECK: br %r14
+  %andb = and i32 %b, 16
+  %or = or i32 %a, %andb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: rosbg %r2, %r3, 59, 59, 0
+; CHECK: br %r14
+  %andb = and i64 %b, 16
+  %or = or i64 %a, %andb
+  ret i64 %or
+}
+
+; Test a case where wraparound is needed.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: rosbg %r2, %r3, 63, 60, 0
+; CHECK: br %r14
+  %andb = and i32 %b, -7
+  %or = or i32 %a, %andb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: rosbg %r2, %r3, 63, 60, 0
+; CHECK: br %r14
+  %andb = and i64 %b, -7
+  %or = or i64 %a, %andb
+  ret i64 %or
+}
+
+; Test a case with just a shift.
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: rosbg %r2, %r3, 32, 51, 12
+; CHECK: br %r14
+  %shrb = shl i32 %b, 12
+  %or = or i32 %a, %shrb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f7(i64 %a, i64 %b) {
+; CHECK-LABEL: f7:
+; CHECK: rosbg %r2, %r3, 0, 51, 12
+; CHECK: br %r14
+  %shrb = shl i64 %b, 12
+  %or = or i64 %a, %shrb
+  ret i64 %or
+}
+
+; Test a case with just a rotate.  This can't use ROSBG.
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK-LABEL: f8:
+; CHECK: rll {{%r[0-5]}}
+; CHECK: or {{%r[0-5]}}
+; CHECK: br %r14
+  %shlb = shl i32 %b, 30
+  %shrb = lshr i32 %b, 2
+  %rotlb = or i32 %shlb, %shrb
+  %or = or i32 %a, %rotlb
+  ret i32 %or
+}
+
+; ...and again with i64, which can.
+define i64 @f9(i64 %a, i64 %b) {
+; CHECK-LABEL: f9:
+; CHECK: rosbg %r2, %r3, 0, 63, 47
+; CHECK: br %r14
+  %shlb = shl i64 %b, 47
+  %shrb = lshr i64 %b, 17
+  %rotlb = or i64 %shlb, %shrb
+  %or = or i64 %a, %rotlb
+  ret i64 %or
+}
+
+; Test a case with a shift and AND.
+define i32 @f10(i32 %a, i32 %b) {
+; CHECK-LABEL: f10:
+; CHECK: rosbg %r2, %r3, 56, 59, 4
+; CHECK: br %r14
+  %shrb = shl i32 %b, 4
+  %andb = and i32 %shrb, 240
+  %or = or i32 %a, %andb
+  ret i32 %or
+}
+
+; ...and again with i64.
+define i64 @f11(i64 %a, i64 %b) {
+; CHECK-LABEL: f11:
+; CHECK: rosbg %r2, %r3, 56, 59, 4
+; CHECK: br %r14
+  %shrb = shl i64 %b, 4
+  %andb = and i64 %shrb, 240
+  %or = or i64 %a, %andb
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/rxsbg-01.ll b/test/CodeGen/SystemZ/rxsbg-01.ll
new file mode 100644
index 000000000000..5491bff2ecdc
--- /dev/null
+++ b/test/CodeGen/SystemZ/rxsbg-01.ll
@@ -0,0 +1,112 @@
+; Test sequences that can use RXSBG.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the simple case.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: rxsbg %r2, %r3, 59, 59, 0
+; CHECK: br %r14
+  %andb = and i32 %b, 16
+  %xor = xor i32 %a, %andb
+  ret i32 %xor
+}
+
+; ...and again with i64.
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: rxsbg %r2, %r3, 59, 59, 0
+; CHECK: br %r14
+  %andb = and i64 %b, 16
+  %xor = xor i64 %a, %andb
+  ret i64 %xor
+}
+
+; Test a case where wraparound is needed.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: rxsbg %r2, %r3, 63, 60, 0
+; CHECK: br %r14
+  %andb = and i32 %b, -7
+  %xor = xor i32 %a, %andb
+  ret i32 %xor
+}
+
+; ...and again with i64.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: rxsbg %r2, %r3, 63, 60, 0
+; CHECK: br %r14
+  %andb = and i64 %b, -7
+  %xor = xor i64 %a, %andb
+  ret i64 %xor
+}
+
+; Test a case with just a shift.
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: rxsbg %r2, %r3, 32, 51, 12
+; CHECK: br %r14
+  %shlb = shl i32 %b, 12
+  %xor = xor i32 %a, %shlb
+  ret i32 %xor
+}
+
+; ...and again with i64.
+define i64 @f7(i64 %a, i64 %b) {
+; CHECK-LABEL: f7:
+; CHECK: rxsbg %r2, %r3, 0, 51, 12
+; CHECK: br %r14
+  %shlb = shl i64 %b, 12
+  %xor = xor i64 %a, %shlb
+  ret i64 %xor
+}
+
+; Test a case with just a rotate (using XOR for the rotate combination too,
+; to test that this kind of rotate does get recognised by the target-
+; independent code).  This can't use RXSBG.
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK-LABEL: f8:
+; CHECK: rll {{%r[0-5]}}
+; CHECK: xr {{%r[0-5]}}
+; CHECK: br %r14
+  %shlb = shl i32 %b, 30
+  %shrb = lshr i32 %b, 2
+  %rotlb = xor i32 %shlb, %shrb
+  %xor = xor i32 %a, %rotlb
+  ret i32 %xor
+}
+
+; ...and again with i64, which can use RXSBG for the rotate.
+define i64 @f9(i64 %a, i64 %b) {
+; CHECK-LABEL: f9:
+; CHECK: rxsbg %r2, %r3, 0, 63, 47
+; CHECK: br %r14
+  %shlb = shl i64 %b, 47
+  %shrb = lshr i64 %b, 17
+  %rotlb = xor i64 %shlb, %shrb
+  %xor = xor i64 %a, %rotlb
+  ret i64 %xor
+}
+
+; Test a case with a shift and AND.
+define i32 @f10(i32 %a, i32 %b) {
+; CHECK-LABEL: f10:
+; CHECK: rxsbg %r2, %r3, 56, 59, 4
+; CHECK: br %r14
+  %shlb = shl i32 %b, 4
+  %andb = and i32 %shlb, 240
+  %xor = xor i32 %a, %andb
+  ret i32 %xor
+}
+
+; ...and again with i64.
+define i64 @f11(i64 %a, i64 %b) {
+; CHECK-LABEL: f11:
+; CHECK: rxsbg %r2, %r3, 56, 59, 4
+; CHECK: br %r14
+  %shlb = shl i64 %b, 4
+  %andb = and i64 %shlb, 240
+  %xor = xor i64 %a, %andb
+  ret i64 %xor
+}
diff --git a/test/CodeGen/SystemZ/setcc-01.ll b/test/CodeGen/SystemZ/setcc-01.ll
new file mode 100644
index 000000000000..4626760fa25b
--- /dev/null
+++ b/test/CodeGen/SystemZ/setcc-01.ll
@@ -0,0 +1,74 @@
+; Test SETCC for every integer condition.  The tests here assume that
+; RISBLG isn't available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test CC in { 0 }, with 3 don't care.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -268435456
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = icmp eq i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1 }, with 3 don't care.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = icmp slt i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1 }, with 3 don't care.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -536870912
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = icmp sle i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 2 }, with 3 don't care.
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = icmp sgt i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 2 }, with 3 don't care.
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: xilf [[REG]], 4294967295
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = icmp sge i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 2 }, with 3 don't care.
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, 1879048192
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = icmp ne i32 %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/setcc-02.ll b/test/CodeGen/SystemZ/setcc-02.ll
new file mode 100644
index 000000000000..6a7be47a92b7
--- /dev/null
+++ b/test/CodeGen/SystemZ/setcc-02.ll
@@ -0,0 +1,174 @@
+; Test SETCC for every floating-point condition.  The tests here assume that
+; RISBLG isn't available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test CC in { 0 }
+define i32 @f1(float %a, float %b) {
+; CHECK-LABEL: f1:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -268435456
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp oeq float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1 }
+define i32 @f2(float %a, float %b) {
+; CHECK-LABEL: f2:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, -268435456
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp olt float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1 }
+define i32 @f3(float %a, float %b) {
+; CHECK-LABEL: f3:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -536870912
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ole float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 2 }
+define i32 @f4(float %a, float %b) {
+; CHECK-LABEL: f4:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, 1342177280
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ogt float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 2 }
+define i32 @f5(float %a, float %b) {
+; CHECK-LABEL: f5:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: xilf [[REG]], 4294967295
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = fcmp oge float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 2 }
+define i32 @f6(float %a, float %b) {
+; CHECK-LABEL: f6:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: afi [[REG]], 268435456
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = fcmp one float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1, 2 }
+define i32 @f7(float %a, float %b) {
+; CHECK-LABEL: f7:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, -805306368
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ord float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 3 }
+define i32 @f8(float %a, float %b) {
+; CHECK-LABEL: f8:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, 1342177280
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp uno float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 3 }
+define i32 @f9(float %a, float %b) {
+; CHECK-LABEL: f9:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: afi [[REG]], -268435456
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = fcmp ueq float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 3 }
+define i32 @f10(float %a, float %b) {
+; CHECK-LABEL: f10:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 36
+; CHECK: br %r14
+  %cond = fcmp ult float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 1, 3 }
+define i32 @f11(float %a, float %b) {
+; CHECK-LABEL: f11:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, -805306368
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp ule float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 2, 3 }
+define i32 @f12(float %a, float %b) {
+; CHECK-LABEL: f12:
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK-NEXT: risbg %r2, [[REG]], 63, 191, 35
+; CHECK: br %r14
+  %cond = fcmp ugt float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 0, 2, 3 }
+define i32 @f13(float %a, float %b) {
+; CHECK-LABEL: f13:
+; CHECK: ipm %r2
+; CHECK-NEXT: xilf %r2, 268435456
+; CHECK-NEXT: afi %r2, 1879048192
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp uge float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
+
+; Test CC in { 1, 2, 3 }
+define i32 @f14(float %a, float %b) {
+; CHECK-LABEL: f14:
+; CHECK: ipm %r2
+; CHECK-NEXT: afi %r2, 1879048192
+; CHECK-NEXT: srl %r2, 31
+; CHECK: br %r14
+  %cond = fcmp une float %a, %b
+  %res = zext i1 %cond to i32
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/shift-01.ll b/test/CodeGen/SystemZ/shift-01.ll
index e5a459aaa828..5dab36b379c4 100644
--- a/test/CodeGen/SystemZ/shift-01.ll
+++ b/test/CodeGen/SystemZ/shift-01.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the SLL range.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sll %r2, 1
 ; CHECK: br %r14
   %shift = shl i32 %a, 1
@@ -13,7 +13,7 @@ define i32 @f1(i32 %a) {
 
 ; Check the high end of the defined SLL range.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sll %r2, 31
 ; CHECK: br %r14
   %shift = shl i32 %a, 31
@@ -22,7 +22,7 @@ define i32 @f2(i32 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: sll %r2, 32
 ; CHECK: br %r14
   %shift = shl i32 %a, 32
@@ -31,7 +31,7 @@ define i32 @f3(i32 %a) {
 
 ; Make sure that we don't generate negative shift amounts.
 define i32 @f4(i32 %a, i32 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: sll %r2, -1{{.*}}
 ; CHECK: br %r14
   %sub = sub i32 %amt, 1
@@ -41,7 +41,7 @@ define i32 @f4(i32 %a, i32 %amt) {
 
 ; Check variable shifts.
 define i32 @f5(i32 %a, i32 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sll %r2, 0(%r3)
 ; CHECK: br %r14
   %shift = shl i32 %a, %amt
@@ -50,7 +50,7 @@ define i32 @f5(i32 %a, i32 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i32 @f6(i32 %a, i32 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sll %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -60,7 +60,7 @@ define i32 @f6(i32 %a, i32 %amt) {
 
 ; ...and again with a truncated 64-bit shift amount.
 define i32 @f7(i32 %a, i64 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sll %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -72,7 +72,7 @@ define i32 @f7(i32 %a, i64 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i32 @f8(i32 %a, i32 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sll %r2, 4095(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 4095
@@ -82,7 +82,7 @@ define i32 @f8(i32 %a, i32 %amt) {
 
 ; Check the next value up.  Again, we could mask the amount instead.
 define i32 @f9(i32 %a, i32 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ahi %r3, 4096
 ; CHECK: sll %r2, 0(%r3)
 ; CHECK: br %r14
@@ -93,7 +93,7 @@ define i32 @f9(i32 %a, i32 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i32 @f10(i32 %a, i32 %b, i32 %c) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ar {{%r3, %r4|%r4, %r3}}
 ; CHECK: sll %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -104,7 +104,7 @@ define i32 @f10(i32 %a, i32 %b, i32 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i32 @f11(i32 %a, i32 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: sll %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-02.ll b/test/CodeGen/SystemZ/shift-02.ll
index 38093a8ff7a0..27e73cd3a1f8 100644
--- a/test/CodeGen/SystemZ/shift-02.ll
+++ b/test/CodeGen/SystemZ/shift-02.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the SRL range.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: srl %r2, 1
 ; CHECK: br %r14
   %shift = lshr i32 %a, 1
@@ -13,7 +13,7 @@ define i32 @f1(i32 %a) {
 
 ; Check the high end of the defined SRL range.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: srl %r2, 31
 ; CHECK: br %r14
   %shift = lshr i32 %a, 31
@@ -22,7 +22,7 @@ define i32 @f2(i32 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: srl %r2, 32
 ; CHECK: br %r14
   %shift = lshr i32 %a, 32
@@ -31,7 +31,7 @@ define i32 @f3(i32 %a) {
 
 ; Make sure that we don't generate negative shift amounts.
 define i32 @f4(i32 %a, i32 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: srl %r2, -1{{.*}}
 ; CHECK: br %r14
   %sub = sub i32 %amt, 1
@@ -41,7 +41,7 @@ define i32 @f4(i32 %a, i32 %amt) {
 
 ; Check variable shifts.
 define i32 @f5(i32 %a, i32 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: srl %r2, 0(%r3)
 ; CHECK: br %r14
   %shift = lshr i32 %a, %amt
@@ -50,7 +50,7 @@ define i32 @f5(i32 %a, i32 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i32 @f6(i32 %a, i32 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: srl %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -60,7 +60,7 @@ define i32 @f6(i32 %a, i32 %amt) {
 
 ; ...and again with a truncated 64-bit shift amount.
 define i32 @f7(i32 %a, i64 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: srl %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -72,7 +72,7 @@ define i32 @f7(i32 %a, i64 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i32 @f8(i32 %a, i32 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: srl %r2, 4095(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 4095
@@ -82,7 +82,7 @@ define i32 @f8(i32 %a, i32 %amt) {
 
 ; Check the next value up.  Again, we could mask the amount instead.
 define i32 @f9(i32 %a, i32 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ahi %r3, 4096
 ; CHECK: srl %r2, 0(%r3)
 ; CHECK: br %r14
@@ -93,7 +93,7 @@ define i32 @f9(i32 %a, i32 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i32 @f10(i32 %a, i32 %b, i32 %c) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ar {{%r3, %r4|%r4, %r3}}
 ; CHECK: srl %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -104,7 +104,7 @@ define i32 @f10(i32 %a, i32 %b, i32 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i32 @f11(i32 %a, i32 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: srl %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-03.ll b/test/CodeGen/SystemZ/shift-03.ll
index ca510f3c429b..c45ae48b4071 100644
--- a/test/CodeGen/SystemZ/shift-03.ll
+++ b/test/CodeGen/SystemZ/shift-03.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the SRA range.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sra %r2, 1
 ; CHECK: br %r14
   %shift = ashr i32 %a, 1
@@ -13,7 +13,7 @@ define i32 @f1(i32 %a) {
 
 ; Check the high end of the defined SRA range.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sra %r2, 31
 ; CHECK: br %r14
   %shift = ashr i32 %a, 31
@@ -22,7 +22,7 @@ define i32 @f2(i32 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: sra %r2, 32
 ; CHECK: br %r14
   %shift = ashr i32 %a, 32
@@ -31,7 +31,7 @@ define i32 @f3(i32 %a) {
 
 ; Make sure that we don't generate negative shift amounts.
 define i32 @f4(i32 %a, i32 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NOT: sra %r2, -1{{.*}}
 ; CHECK: br %r14
   %sub = sub i32 %amt, 1
@@ -41,7 +41,7 @@ define i32 @f4(i32 %a, i32 %amt) {
 
 ; Check variable shifts.
 define i32 @f5(i32 %a, i32 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sra %r2, 0(%r3)
 ; CHECK: br %r14
   %shift = ashr i32 %a, %amt
@@ -50,7 +50,7 @@ define i32 @f5(i32 %a, i32 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i32 @f6(i32 %a, i32 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sra %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -60,7 +60,7 @@ define i32 @f6(i32 %a, i32 %amt) {
 
 ; ...and again with a truncated 64-bit shift amount.
 define i32 @f7(i32 %a, i64 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sra %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -72,7 +72,7 @@ define i32 @f7(i32 %a, i64 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i32 @f8(i32 %a, i32 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sra %r2, 4095(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 4095
@@ -82,7 +82,7 @@ define i32 @f8(i32 %a, i32 %amt) {
 
 ; Check the next value up.  Again, we could mask the amount instead.
 define i32 @f9(i32 %a, i32 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: ahi %r3, 4096
 ; CHECK: sra %r2, 0(%r3)
 ; CHECK: br %r14
@@ -93,7 +93,7 @@ define i32 @f9(i32 %a, i32 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i32 @f10(i32 %a, i32 %b, i32 %c) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: ar {{%r3, %r4|%r4, %r3}}
 ; CHECK: sra %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -104,7 +104,7 @@ define i32 @f10(i32 %a, i32 %b, i32 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i32 @f11(i32 %a, i32 *%ptr) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: sra %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-04.ll b/test/CodeGen/SystemZ/shift-04.ll
index 0146a86ee062..04b39d002c5d 100644
--- a/test/CodeGen/SystemZ/shift-04.ll
+++ b/test/CodeGen/SystemZ/shift-04.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the RLL range.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: rll %r2, %r2, 1
 ; CHECK: br %r14
   %parta = shl i32 %a, 1
@@ -15,7 +15,7 @@ define i32 @f1(i32 %a) {
 
 ; Check the high end of the defined RLL range.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: rll %r2, %r2, 31
 ; CHECK: br %r14
   %parta = shl i32 %a, 31
@@ -26,7 +26,7 @@ define i32 @f2(i32 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: rll
 ; CHECK: br %r14
   %parta = shl i32 %a, 32
@@ -37,7 +37,7 @@ define i32 @f3(i32 %a) {
 
 ; Check variable shifts.
 define i32 @f4(i32 %a, i32 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: rll %r2, %r2, 0(%r3)
 ; CHECK: br %r14
   %amtb = sub i32 32, %amt
@@ -49,7 +49,7 @@ define i32 @f4(i32 %a, i32 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i32 @f5(i32 %a, i32 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: rll %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -62,7 +62,7 @@ define i32 @f5(i32 %a, i32 %amt) {
 
 ; ...and again with a truncated 64-bit shift amount.
 define i32 @f6(i32 %a, i64 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: rll %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -76,7 +76,7 @@ define i32 @f6(i32 %a, i64 %amt) {
 
 ; ...and again with a different truncation representation.
 define i32 @f7(i32 %a, i64 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: rll %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -92,7 +92,7 @@ define i32 @f7(i32 %a, i64 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i32 @f8(i32 %a, i32 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: rll %r2, %r2, 524287(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 524287
@@ -106,7 +106,7 @@ define i32 @f8(i32 %a, i32 %amt) {
 ; Check the next value up, which without masking must use a separate
 ; addition.
 define i32 @f9(i32 %a, i32 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: afi %r3, 524288
 ; CHECK: rll %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -120,7 +120,7 @@ define i32 @f9(i32 %a, i32 %amt) {
 
 ; Check cases where 1 is subtracted from the shift amount.
 define i32 @f10(i32 %a, i32 %amt) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: rll %r2, %r2, -1(%r3)
 ; CHECK: br %r14
   %suba = sub i32 %amt, 1
@@ -134,7 +134,7 @@ define i32 @f10(i32 %a, i32 %amt) {
 ; Check the lowest value that can be subtracted from the shift amount.
 ; Again, we could mask the shift amount instead.
 define i32 @f11(i32 %a, i32 %amt) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: rll %r2, %r2, -524288(%r3)
 ; CHECK: br %r14
   %suba = sub i32 %amt, 524288
@@ -148,7 +148,7 @@ define i32 @f11(i32 %a, i32 %amt) {
 ; Check the next value down, which without masking must use a separate
 ; addition.
 define i32 @f12(i32 %a, i32 %amt) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: afi %r3, -524289
 ; CHECK: rll %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -162,7 +162,7 @@ define i32 @f12(i32 %a, i32 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i32 @f13(i32 %a, i32 %b, i32 %c) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: ar {{%r3, %r4|%r4, %r3}}
 ; CHECK: rll %r2, %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -176,7 +176,7 @@ define i32 @f13(i32 %a, i32 %b, i32 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i32 @f14(i32 %a, i32 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: rll %r2, %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-05.ll b/test/CodeGen/SystemZ/shift-05.ll
index 8c0ca9381bcb..833b2fbae1e5 100644
--- a/test/CodeGen/SystemZ/shift-05.ll
+++ b/test/CodeGen/SystemZ/shift-05.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the SLLG range.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: sllg %r2, %r2, 1
 ; CHECK: br %r14
   %shift = shl i64 %a, 1
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the defined SLLG range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sllg %r2, %r2, 63
 ; CHECK: br %r14
   %shift = shl i64 %a, 63
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: sllg
 ; CHECK: br %r14
   %shift = shl i64 %a, 64
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
 
 ; Check variable shifts.
 define i64 @f4(i64 %a, i64 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sllg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
   %shift = shl i64 %a, %amt
@@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i64 @f5(i64 %a, i64 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sllg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -50,7 +50,7 @@ define i64 @f5(i64 %a, i64 %amt) {
 
 ; ...and again with a sign-extended 32-bit shift amount.
 define i64 @f6(i64 %a, i32 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: sllg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -61,7 +61,7 @@ define i64 @f6(i64 %a, i32 %amt) {
 
 ; ...and now with a zero-extended 32-bit shift amount.
 define i64 @f7(i64 %a, i32 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: sllg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -73,7 +73,7 @@ define i64 @f7(i64 %a, i32 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i64 @f8(i64 %a, i64 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: sllg %r2, %r2, 524287(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 524287
@@ -84,7 +84,7 @@ define i64 @f8(i64 %a, i64 %amt) {
 ; Check the next value up, which without masking must use a separate
 ; addition.
 define i64 @f9(i64 %a, i64 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: a{{g?}}fi %r3, 524288
 ; CHECK: sllg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -95,7 +95,7 @@ define i64 @f9(i64 %a, i64 %amt) {
 
 ; Check cases where 1 is subtracted from the shift amount.
 define i64 @f10(i64 %a, i64 %amt) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: sllg %r2, %r2, -1(%r3)
 ; CHECK: br %r14
   %sub = sub i64 %amt, 1
@@ -106,7 +106,7 @@ define i64 @f10(i64 %a, i64 %amt) {
 ; Check the lowest value that can be subtracted from the shift amount.
 ; Again, we could mask the shift amount instead.
 define i64 @f11(i64 %a, i64 %amt) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: sllg %r2, %r2, -524288(%r3)
 ; CHECK: br %r14
   %sub = sub i64 %amt, 524288
@@ -117,7 +117,7 @@ define i64 @f11(i64 %a, i64 %amt) {
 ; Check the next value down, which without masking must use a separate
 ; addition.
 define i64 @f12(i64 %a, i64 %amt) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: a{{g?}}fi %r3, -524289
 ; CHECK: sllg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -128,7 +128,7 @@ define i64 @f12(i64 %a, i64 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i64 @f13(i64 %a, i64 %b, i64 %c) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
 ; CHECK: sllg %r2, %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -139,7 +139,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i64 @f14(i64 %a, i64 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: sllg %r2, %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-06.ll b/test/CodeGen/SystemZ/shift-06.ll
index 5f600b45a884..74cae1213a3e 100644
--- a/test/CodeGen/SystemZ/shift-06.ll
+++ b/test/CodeGen/SystemZ/shift-06.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the SRLG range.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: srlg %r2, %r2, 1
 ; CHECK: br %r14
   %shift = lshr i64 %a, 1
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the defined SRLG range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: srlg %r2, %r2, 63
 ; CHECK: br %r14
   %shift = lshr i64 %a, 63
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: srlg
 ; CHECK: br %r14
   %shift = lshr i64 %a, 64
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
 
 ; Check variable shifts.
 define i64 @f4(i64 %a, i64 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: srlg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
   %shift = lshr i64 %a, %amt
@@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i64 @f5(i64 %a, i64 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: srlg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -50,7 +50,7 @@ define i64 @f5(i64 %a, i64 %amt) {
 
 ; ...and again with a sign-extended 32-bit shift amount.
 define i64 @f6(i64 %a, i32 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: srlg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -61,7 +61,7 @@ define i64 @f6(i64 %a, i32 %amt) {
 
 ; ...and now with a zero-extended 32-bit shift amount.
 define i64 @f7(i64 %a, i32 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: srlg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -73,7 +73,7 @@ define i64 @f7(i64 %a, i32 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i64 @f8(i64 %a, i64 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: srlg %r2, %r2, 524287(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 524287
@@ -84,7 +84,7 @@ define i64 @f8(i64 %a, i64 %amt) {
 ; Check the next value up, which without masking must use a separate
 ; addition.
 define i64 @f9(i64 %a, i64 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: a{{g?}}fi %r3, 524288
 ; CHECK: srlg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -95,7 +95,7 @@ define i64 @f9(i64 %a, i64 %amt) {
 
 ; Check cases where 1 is subtracted from the shift amount.
 define i64 @f10(i64 %a, i64 %amt) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: srlg %r2, %r2, -1(%r3)
 ; CHECK: br %r14
   %sub = sub i64 %amt, 1
@@ -106,7 +106,7 @@ define i64 @f10(i64 %a, i64 %amt) {
 ; Check the lowest value that can be subtracted from the shift amount.
 ; Again, we could mask the shift amount instead.
 define i64 @f11(i64 %a, i64 %amt) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: srlg %r2, %r2, -524288(%r3)
 ; CHECK: br %r14
   %sub = sub i64 %amt, 524288
@@ -117,7 +117,7 @@ define i64 @f11(i64 %a, i64 %amt) {
 ; Check the next value down, which without masking must use a separate
 ; addition.
 define i64 @f12(i64 %a, i64 %amt) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: a{{g?}}fi %r3, -524289
 ; CHECK: srlg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -128,7 +128,7 @@ define i64 @f12(i64 %a, i64 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i64 @f13(i64 %a, i64 %b, i64 %c) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
 ; CHECK: srlg %r2, %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -139,7 +139,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i64 @f14(i64 %a, i64 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: srlg %r2, %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-07.ll b/test/CodeGen/SystemZ/shift-07.ll
index ef583e8f3f0d..712849df8ad1 100644
--- a/test/CodeGen/SystemZ/shift-07.ll
+++ b/test/CodeGen/SystemZ/shift-07.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the SRAG range.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: srag %r2, %r2, 1
 ; CHECK: br %r14
   %shift = ashr i64 %a, 1
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the defined SRAG range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: srag %r2, %r2, 63
 ; CHECK: br %r14
   %shift = ashr i64 %a, 63
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: srag
 ; CHECK: br %r14
   %shift = ashr i64 %a, 64
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
 
 ; Check variable shifts.
 define i64 @f4(i64 %a, i64 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: srag %r2, %r2, 0(%r3)
 ; CHECK: br %r14
   %shift = ashr i64 %a, %amt
@@ -40,7 +40,7 @@ define i64 @f4(i64 %a, i64 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i64 @f5(i64 %a, i64 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: srag %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -50,7 +50,7 @@ define i64 @f5(i64 %a, i64 %amt) {
 
 ; ...and again with a sign-extended 32-bit shift amount.
 define i64 @f6(i64 %a, i32 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: srag %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -61,7 +61,7 @@ define i64 @f6(i64 %a, i32 %amt) {
 
 ; ...and now with a zero-extended 32-bit shift amount.
 define i64 @f7(i64 %a, i32 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: srag %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -73,7 +73,7 @@ define i64 @f7(i64 %a, i32 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i64 @f8(i64 %a, i64 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: srag %r2, %r2, 524287(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 524287
@@ -84,7 +84,7 @@ define i64 @f8(i64 %a, i64 %amt) {
 ; Check the next value up, which without masking must use a separate
 ; addition.
 define i64 @f9(i64 %a, i64 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: a{{g?}}fi %r3, 524288
 ; CHECK: srag %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -95,7 +95,7 @@ define i64 @f9(i64 %a, i64 %amt) {
 
 ; Check cases where 1 is subtracted from the shift amount.
 define i64 @f10(i64 %a, i64 %amt) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: srag %r2, %r2, -1(%r3)
 ; CHECK: br %r14
   %sub = sub i64 %amt, 1
@@ -106,7 +106,7 @@ define i64 @f10(i64 %a, i64 %amt) {
 ; Check the lowest value that can be subtracted from the shift amount.
 ; Again, we could mask the shift amount instead.
 define i64 @f11(i64 %a, i64 %amt) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: srag %r2, %r2, -524288(%r3)
 ; CHECK: br %r14
   %sub = sub i64 %amt, 524288
@@ -117,7 +117,7 @@ define i64 @f11(i64 %a, i64 %amt) {
 ; Check the next value down, which without masking must use a separate
 ; addition.
 define i64 @f12(i64 %a, i64 %amt) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: a{{g?}}fi %r3, -524289
 ; CHECK: srag %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -128,7 +128,7 @@ define i64 @f12(i64 %a, i64 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i64 @f13(i64 %a, i64 %b, i64 %c) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
 ; CHECK: srag %r2, %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -139,7 +139,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i64 @f14(i64 %a, i64 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: srag %r2, %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-08.ll b/test/CodeGen/SystemZ/shift-08.ll
index 0688a0671671..47283b50221c 100644
--- a/test/CodeGen/SystemZ/shift-08.ll
+++ b/test/CodeGen/SystemZ/shift-08.ll
@@ -4,7 +4,7 @@
 
 ; Check the low end of the RLLG range.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: rllg %r2, %r2, 1
 ; CHECK: br %r14
   %parta = shl i64 %a, 1
@@ -15,7 +15,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the defined RLLG range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: rllg %r2, %r2, 63
 ; CHECK: br %r14
   %parta = shl i64 %a, 63
@@ -26,7 +26,7 @@ define i64 @f2(i64 %a) {
 
 ; We don't generate shifts by out-of-range values.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NOT: rllg
 ; CHECK: br %r14
   %parta = shl i64 %a, 64
@@ -37,7 +37,7 @@ define i64 @f3(i64 %a) {
 
 ; Check variable shifts.
 define i64 @f4(i64 %a, i64 %amt) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: rllg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
   %amtb = sub i64 64, %amt
@@ -49,7 +49,7 @@ define i64 @f4(i64 %a, i64 %amt) {
 
 ; Check shift amounts that have a constant term.
 define i64 @f5(i64 %a, i64 %amt) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: rllg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 10
@@ -62,7 +62,7 @@ define i64 @f5(i64 %a, i64 %amt) {
 
 ; ...and again with a sign-extended 32-bit shift amount.
 define i64 @f6(i64 %a, i32 %amt) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: rllg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -77,7 +77,7 @@ define i64 @f6(i64 %a, i32 %amt) {
 
 ; ...and now with a zero-extended 32-bit shift amount.
 define i64 @f7(i64 %a, i32 %amt) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: rllg %r2, %r2, 10(%r3)
 ; CHECK: br %r14
   %add = add i32 %amt, 10
@@ -93,7 +93,7 @@ define i64 @f7(i64 %a, i32 %amt) {
 ; Check shift amounts that have the largest in-range constant term.  We could
 ; mask the amount instead.
 define i64 @f8(i64 %a, i64 %amt) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: rllg %r2, %r2, 524287(%r3)
 ; CHECK: br %r14
   %add = add i64 %amt, 524287
@@ -107,7 +107,7 @@ define i64 @f8(i64 %a, i64 %amt) {
 ; Check the next value up, which without masking must use a separate
 ; addition.
 define i64 @f9(i64 %a, i64 %amt) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: a{{g?}}fi %r3, 524288
 ; CHECK: rllg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -121,7 +121,7 @@ define i64 @f9(i64 %a, i64 %amt) {
 
 ; Check cases where 1 is subtracted from the shift amount.
 define i64 @f10(i64 %a, i64 %amt) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: rllg %r2, %r2, -1(%r3)
 ; CHECK: br %r14
   %suba = sub i64 %amt, 1
@@ -135,7 +135,7 @@ define i64 @f10(i64 %a, i64 %amt) {
 ; Check the lowest value that can be subtracted from the shift amount.
 ; Again, we could mask the shift amount instead.
 define i64 @f11(i64 %a, i64 %amt) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: rllg %r2, %r2, -524288(%r3)
 ; CHECK: br %r14
   %suba = sub i64 %amt, 524288
@@ -149,7 +149,7 @@ define i64 @f11(i64 %a, i64 %amt) {
 ; Check the next value down, which without masking must use a separate
 ; addition.
 define i64 @f12(i64 %a, i64 %amt) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: a{{g?}}fi %r3, -524289
 ; CHECK: rllg %r2, %r2, 0(%r3)
 ; CHECK: br %r14
@@ -163,7 +163,7 @@ define i64 @f12(i64 %a, i64 %amt) {
 
 ; Check that we don't try to generate "indexed" shifts.
 define i64 @f13(i64 %a, i64 %b, i64 %c) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
 ; CHECK: rllg %r2, %r2, 0({{%r[34]}})
 ; CHECK: br %r14
@@ -177,7 +177,7 @@ define i64 @f13(i64 %a, i64 %b, i64 %c) {
 
 ; Check that the shift amount uses an address register.  It cannot be in %r0.
 define i64 @f14(i64 %a, i64 *%ptr) {
-; CHECK: f14:
+; CHECK-LABEL: f14:
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: rllg %r2, %r2, 0(%r1)
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/shift-09.ll b/test/CodeGen/SystemZ/shift-09.ll
new file mode 100644
index 000000000000..c87cf0d9a1ee
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-09.ll
@@ -0,0 +1,63 @@
+; Test three-operand shifts.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check that we use SLLK over SLL where useful.
+define i32 @f1(i32 %a, i32 %b, i32 %amt) {
+; CHECK-LABEL: f1:
+; CHECK: sllk %r2, %r3, 15(%r4)
+; CHECK: br %r14
+  %add = add i32 %amt, 15
+  %shift = shl i32 %b, %add
+  ret i32 %shift
+}
+
+; Check that we use SLL over SLLK where possible.
+define i32 @f2(i32 %a, i32 %amt) {
+; CHECK-LABEL: f2:
+; CHECK: sll %r2, 15(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 15
+  %shift = shl i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that we use SRLK over SRL where useful.
+define i32 @f3(i32 %a, i32 %b, i32 %amt) {
+; CHECK-LABEL: f3:
+; CHECK: srlk %r2, %r3, 15(%r4)
+; CHECK: br %r14
+  %add = add i32 %amt, 15
+  %shift = lshr i32 %b, %add
+  ret i32 %shift
+}
+
+; Check that we use SRL over SRLK where possible.
+define i32 @f4(i32 %a, i32 %amt) {
+; CHECK-LABEL: f4:
+; CHECK: srl %r2, 15(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 15
+  %shift = lshr i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that we use SRAK over SRA where useful.
+define i32 @f5(i32 %a, i32 %b, i32 %amt) {
+; CHECK-LABEL: f5:
+; CHECK: srak %r2, %r3, 15(%r4)
+; CHECK: br %r14
+  %add = add i32 %amt, 15
+  %shift = ashr i32 %b, %add
+  ret i32 %shift
+}
+
+; Check that we use SRA over SRAK where possible.
+define i32 @f6(i32 %a, i32 %amt) {
+; CHECK-LABEL: f6:
+; CHECK: sra %r2, 15(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 15
+  %shift = ashr i32 %a, %add
+  ret i32 %shift
+}
diff --git a/test/CodeGen/SystemZ/shift-10.ll b/test/CodeGen/SystemZ/shift-10.ll
new file mode 100644
index 000000000000..46ed2180dfd4
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-10.ll
@@ -0,0 +1,78 @@
+; Test compound shifts.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test a shift right followed by a sign extension.  This can use two shifts.
+define i64 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 62
+; CHECK: srag %r2, [[REG]], 63
+; CHECK: br %r14
+  %shr = lshr i32 %a, 1
+  %trunc = trunc i32 %shr to i1
+  %ext = sext i1 %trunc to i64
+  ret i64 %ext
+}
+
+; ...and again with the highest shift count.
+define i64 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 32
+; CHECK: srag %r2, [[REG]], 63
+; CHECK: br %r14
+  %shr = lshr i32 %a, 31
+  %trunc = trunc i32 %shr to i1
+  %ext = sext i1 %trunc to i64
+  ret i64 %ext
+}
+
+; Test a left shift that of an extended right shift in a case where folding
+; is possible.
+define i64 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: risbg %r2, %r2, 27, 181, 9
+; CHECK: br %r14
+  %shr = lshr i32 %a, 1
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 10
+  %and = and i64 %shl, 137438952960
+  ret i64 %and
+}
+
+; ...and again with a larger right shift.
+define i64 @f4(i32 %a) {
+; CHECK-LABEL: f4:
+; CHECK: risbg %r2, %r2, 30, 158, 3
+; CHECK: br %r14
+  %shr = lshr i32 %a, 30
+  %ext = sext i32 %shr to i64
+  %shl = shl i64 %ext, 33
+  %and = and i64 %shl, 8589934592
+  ret i64 %and
+}
+
+; Repeat the previous test in a case where all bits outside the
+; bottom 3 matter.
+define i64 @f5(i32 %a) {
+; CHECK-LABEL: f5:
+; CHECK: risbg %r2, %r2, 29, 158, 3
+; CHECK: lhi %r2, 7
+; CHECK: br %r14
+  %shr = lshr i32 %a, 30
+  %ext = sext i32 %shr to i64
+  %shl = shl i64 %ext, 33
+  %or = or i64 %shl, 7
+  ret i64 %or
+}
+
+; Test that SRA gets replaced with SRL if the sign bit is the only one
+; that matters.
+define i64 @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: risbg %r2, %r2, 55, 183, 19
+; CHECK: br %r14
+  %shl = shl i64 %a, 10
+  %shr = ashr i64 %shl, 60
+  %and = and i64 %shr, 256
+  ret i64 %and
+}
diff --git a/test/CodeGen/SystemZ/spill-01.ll b/test/CodeGen/SystemZ/spill-01.ll
new file mode 100644
index 000000000000..ca64a88f2a0d
--- /dev/null
+++ b/test/CodeGen/SystemZ/spill-01.ll
@@ -0,0 +1,548 @@
+; Test spilling using MVC.  The tests here assume z10 register pressure,
+; without the high words being available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+declare void @foo()
+
+@g0 = global i32 0
+@g1 = global i32 1
+@g2 = global i32 2
+@g3 = global i32 3
+@g4 = global i32 4
+@g5 = global i32 5
+@g6 = global i32 6
+@g7 = global i32 7
+@g8 = global i32 8
+@g9 = global i32 9
+
+@h0 = global i64 0
+@h1 = global i64 1
+@h2 = global i64 2
+@h3 = global i64 3
+@h4 = global i64 4
+@h5 = global i64 5
+@h6 = global i64 6
+@h7 = global i64 7
+@h8 = global i64 8
+@h9 = global i64 9
+
+; This function shouldn't spill anything
+define void @f1(i32 *%ptr0) {
+; CHECK-LABEL: f1:
+; CHECK: stmg
+; CHECK: aghi %r15, -160
+; CHECK-NOT: %r15
+; CHECK: brasl %r14, foo@PLT
+; CHECK-NOT: %r15
+; CHECK: lmg
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i32 2
+  %ptr2 = getelementptr i32 *%ptr0, i32 4
+  %ptr3 = getelementptr i32 *%ptr0, i32 6
+  %ptr4 = getelementptr i32 *%ptr0, i32 8
+  %ptr5 = getelementptr i32 *%ptr0, i32 10
+  %ptr6 = getelementptr i32 *%ptr0, i32 12
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+
+  call void @foo()
+
+  store i32 %val0, i32 *%ptr0
+  store i32 %val1, i32 *%ptr1
+  store i32 %val2, i32 *%ptr2
+  store i32 %val3, i32 *%ptr3
+  store i32 %val4, i32 *%ptr4
+  store i32 %val5, i32 *%ptr5
+  store i32 %val6, i32 *%ptr6
+
+  ret void
+}
+
+; Test a case where at least one i32 load and at least one i32 store
+; need spills.
+define void @f2(i32 *%ptr0) {
+; CHECK-LABEL: f2:
+; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+
+  call void @foo()
+
+  store i32 %val0, i32 *%ptr0
+  store i32 %val1, i32 *%ptr1
+  store i32 %val2, i32 *%ptr2
+  store i32 %val3, i32 *%ptr3
+  store i32 %val4, i32 *%ptr4
+  store i32 %val5, i32 *%ptr5
+  store i32 %val6, i32 *%ptr6
+  store i32 %val7, i32 *%ptr7
+  store i32 %val8, i32 *%ptr8
+
+  ret void
+}
+
+; Test a case where at least one i64 load and at least one i64 store
+; need spills.
+define void @f3(i64 *%ptr0) {
+; CHECK-LABEL: f3:
+; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+
+  call void @foo()
+
+  store i64 %val0, i64 *%ptr0
+  store i64 %val1, i64 *%ptr1
+  store i64 %val2, i64 *%ptr2
+  store i64 %val3, i64 *%ptr3
+  store i64 %val4, i64 *%ptr4
+  store i64 %val5, i64 *%ptr5
+  store i64 %val6, i64 *%ptr6
+  store i64 %val7, i64 *%ptr7
+  store i64 %val8, i64 *%ptr8
+
+  ret void
+}
+
+
+; Test a case where at least at least one f32 load and at least one f32 store
+; need spills.  The 8 call-saved FPRs could be used for 8 of the %vals
+; (and are at the time of writing), but it would really be better to use
+; MVC for all 10.
+define void @f4(float *%ptr0) {
+; CHECK-LABEL: f4:
+; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%ptr0, i64 2
+  %ptr2 = getelementptr float *%ptr0, i64 4
+  %ptr3 = getelementptr float *%ptr0, i64 6
+  %ptr4 = getelementptr float *%ptr0, i64 8
+  %ptr5 = getelementptr float *%ptr0, i64 10
+  %ptr6 = getelementptr float *%ptr0, i64 12
+  %ptr7 = getelementptr float *%ptr0, i64 14
+  %ptr8 = getelementptr float *%ptr0, i64 16
+  %ptr9 = getelementptr float *%ptr0, i64 18
+
+  %val0 = load float *%ptr0
+  %val1 = load float *%ptr1
+  %val2 = load float *%ptr2
+  %val3 = load float *%ptr3
+  %val4 = load float *%ptr4
+  %val5 = load float *%ptr5
+  %val6 = load float *%ptr6
+  %val7 = load float *%ptr7
+  %val8 = load float *%ptr8
+  %val9 = load float *%ptr9
+
+  call void @foo()
+
+  store float %val0, float *%ptr0
+  store float %val1, float *%ptr1
+  store float %val2, float *%ptr2
+  store float %val3, float *%ptr3
+  store float %val4, float *%ptr4
+  store float %val5, float *%ptr5
+  store float %val6, float *%ptr6
+  store float %val7, float *%ptr7
+  store float %val8, float *%ptr8
+  store float %val9, float *%ptr9
+
+  ret void
+}
+
+; Similarly for f64.
+define void @f5(double *%ptr0) {
+; CHECK-LABEL: f5:
+; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%ptr0, i64 2
+  %ptr2 = getelementptr double *%ptr0, i64 4
+  %ptr3 = getelementptr double *%ptr0, i64 6
+  %ptr4 = getelementptr double *%ptr0, i64 8
+  %ptr5 = getelementptr double *%ptr0, i64 10
+  %ptr6 = getelementptr double *%ptr0, i64 12
+  %ptr7 = getelementptr double *%ptr0, i64 14
+  %ptr8 = getelementptr double *%ptr0, i64 16
+  %ptr9 = getelementptr double *%ptr0, i64 18
+
+  %val0 = load double *%ptr0
+  %val1 = load double *%ptr1
+  %val2 = load double *%ptr2
+  %val3 = load double *%ptr3
+  %val4 = load double *%ptr4
+  %val5 = load double *%ptr5
+  %val6 = load double *%ptr6
+  %val7 = load double *%ptr7
+  %val8 = load double *%ptr8
+  %val9 = load double *%ptr9
+
+  call void @foo()
+
+  store double %val0, double *%ptr0
+  store double %val1, double *%ptr1
+  store double %val2, double *%ptr2
+  store double %val3, double *%ptr3
+  store double %val4, double *%ptr4
+  store double %val5, double *%ptr5
+  store double %val6, double *%ptr6
+  store double %val7, double *%ptr7
+  store double %val8, double *%ptr8
+  store double %val9, double *%ptr9
+
+  ret void
+}
+
+; Repeat f2 with atomic accesses.  We shouldn't use MVC here.
+define void @f6(i32 *%ptr0) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+
+  %val0 = load atomic i32 *%ptr0 unordered, align 4
+  %val1 = load atomic i32 *%ptr1 unordered, align 4
+  %val2 = load atomic i32 *%ptr2 unordered, align 4
+  %val3 = load atomic i32 *%ptr3 unordered, align 4
+  %val4 = load atomic i32 *%ptr4 unordered, align 4
+  %val5 = load atomic i32 *%ptr5 unordered, align 4
+  %val6 = load atomic i32 *%ptr6 unordered, align 4
+  %val7 = load atomic i32 *%ptr7 unordered, align 4
+  %val8 = load atomic i32 *%ptr8 unordered, align 4
+
+  call void @foo()
+
+  store atomic i32 %val0, i32 *%ptr0 unordered, align 4
+  store atomic i32 %val1, i32 *%ptr1 unordered, align 4
+  store atomic i32 %val2, i32 *%ptr2 unordered, align 4
+  store atomic i32 %val3, i32 *%ptr3 unordered, align 4
+  store atomic i32 %val4, i32 *%ptr4 unordered, align 4
+  store atomic i32 %val5, i32 *%ptr5 unordered, align 4
+  store atomic i32 %val6, i32 *%ptr6 unordered, align 4
+  store atomic i32 %val7, i32 *%ptr7 unordered, align 4
+  store atomic i32 %val8, i32 *%ptr8 unordered, align 4
+
+  ret void
+}
+
+; ...likewise volatile accesses.
+define void @f7(i32 *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+
+  %val0 = load volatile i32 *%ptr0
+  %val1 = load volatile i32 *%ptr1
+  %val2 = load volatile i32 *%ptr2
+  %val3 = load volatile i32 *%ptr3
+  %val4 = load volatile i32 *%ptr4
+  %val5 = load volatile i32 *%ptr5
+  %val6 = load volatile i32 *%ptr6
+  %val7 = load volatile i32 *%ptr7
+  %val8 = load volatile i32 *%ptr8
+
+  call void @foo()
+
+  store volatile i32 %val0, i32 *%ptr0
+  store volatile i32 %val1, i32 *%ptr1
+  store volatile i32 %val2, i32 *%ptr2
+  store volatile i32 %val3, i32 *%ptr3
+  store volatile i32 %val4, i32 *%ptr4
+  store volatile i32 %val5, i32 *%ptr5
+  store volatile i32 %val6, i32 *%ptr6
+  store volatile i32 %val7, i32 *%ptr7
+  store volatile i32 %val8, i32 *%ptr8
+
+  ret void
+}
+
+; Check that LRL and STRL are not converted.
+define void @f8() {
+; CHECK-LABEL: f8:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %val0 = load i32 *@g0
+  %val1 = load i32 *@g1
+  %val2 = load i32 *@g2
+  %val3 = load i32 *@g3
+  %val4 = load i32 *@g4
+  %val5 = load i32 *@g5
+  %val6 = load i32 *@g6
+  %val7 = load i32 *@g7
+  %val8 = load i32 *@g8
+  %val9 = load i32 *@g9
+
+  call void @foo()
+
+  store i32 %val0, i32 *@g0
+  store i32 %val1, i32 *@g1
+  store i32 %val2, i32 *@g2
+  store i32 %val3, i32 *@g3
+  store i32 %val4, i32 *@g4
+  store i32 %val5, i32 *@g5
+  store i32 %val6, i32 *@g6
+  store i32 %val7, i32 *@g7
+  store i32 %val8, i32 *@g8
+  store i32 %val9, i32 *@g9
+
+  ret void
+}
+
+; Likewise LGRL and STGRL.
+define void @f9() {
+; CHECK-LABEL: f9:
+; CHECK-NOT: mvc
+; CHECK: br %r14
+  %val0 = load i64 *@h0
+  %val1 = load i64 *@h1
+  %val2 = load i64 *@h2
+  %val3 = load i64 *@h3
+  %val4 = load i64 *@h4
+  %val5 = load i64 *@h5
+  %val6 = load i64 *@h6
+  %val7 = load i64 *@h7
+  %val8 = load i64 *@h8
+  %val9 = load i64 *@h9
+
+  call void @foo()
+
+  store i64 %val0, i64 *@h0
+  store i64 %val1, i64 *@h1
+  store i64 %val2, i64 *@h2
+  store i64 %val3, i64 *@h3
+  store i64 %val4, i64 *@h4
+  store i64 %val5, i64 *@h5
+  store i64 %val6, i64 *@h6
+  store i64 %val7, i64 *@h7
+  store i64 %val8, i64 *@h8
+  store i64 %val9, i64 *@h9
+
+  ret void
+}
+
+; This showed a problem with the way stack coloring updated instructions.
+; The copy from %val9 to %newval8 can be done using an MVC, which then
+; has two frame index operands.  Stack coloring chose a valid renumbering
+; [FI0, FI1] -> [FI1, FI2], but applied it in the form FI0 -> FI1 -> FI2,
+; so that both operands ended up being the same.
+define void @f10() {
+; CHECK-LABEL: f10:
+; CHECK: lgrl [[REG:%r[0-9]+]], h9
+; CHECK: stg [[REG]], [[VAL9:[0-9]+]](%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc [[NEWVAL8:[0-9]+]](8,%r15), [[VAL9]](%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: lg [[REG:%r[0-9]+]], [[NEWVAL8]](%r15)
+; CHECK: stgrl [[REG]], h8
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i64 *@h0
+  %val1 = load volatile i64 *@h1
+  %val2 = load volatile i64 *@h2
+  %val3 = load volatile i64 *@h3
+  %val4 = load volatile i64 *@h4
+  %val5 = load volatile i64 *@h5
+  %val6 = load volatile i64 *@h6
+  %val7 = load volatile i64 *@h7
+  %val8 = load volatile i64 *@h8
+  %val9 = load volatile i64 *@h9
+
+  call void @foo()
+
+  store volatile i64 %val0, i64 *@h0
+  store volatile i64 %val1, i64 *@h1
+  store volatile i64 %val2, i64 *@h2
+  store volatile i64 %val3, i64 *@h3
+  store volatile i64 %val4, i64 *@h4
+  store volatile i64 %val5, i64 *@h5
+  store volatile i64 %val6, i64 *@h6
+  store volatile i64 %val7, i64 *@h7
+
+  %check = load volatile i64 *@h0
+  %cond = icmp eq i64 %check, 0
+  br i1 %cond, label %skip, label %fallthru
+
+fallthru:
+  call void @foo()
+
+  store volatile i64 %val0, i64 *@h0
+  store volatile i64 %val1, i64 *@h1
+  store volatile i64 %val2, i64 *@h2
+  store volatile i64 %val3, i64 *@h3
+  store volatile i64 %val4, i64 *@h4
+  store volatile i64 %val5, i64 *@h5
+  store volatile i64 %val6, i64 *@h6
+  store volatile i64 %val7, i64 *@h7
+  store volatile i64 %val8, i64 *@h8
+  br label %skip
+
+skip:
+  %newval8 = phi i64 [ %val8, %entry ], [ %val9, %fallthru ]
+  call void @foo()
+
+  store volatile i64 %val0, i64 *@h0
+  store volatile i64 %val1, i64 *@h1
+  store volatile i64 %val2, i64 *@h2
+  store volatile i64 %val3, i64 *@h3
+  store volatile i64 %val4, i64 *@h4
+  store volatile i64 %val5, i64 *@h5
+  store volatile i64 %val6, i64 *@h6
+  store volatile i64 %val7, i64 *@h7
+  store volatile i64 %newval8, i64 *@h8
+  store volatile i64 %val9, i64 *@h9
+
+  ret void
+}
+
+; This used to generate a no-op MVC.  It is very sensitive to spill heuristics.
+define void @f11() {
+; CHECK-LABEL: f11:
+; CHECK-NOT: mvc [[OFFSET:[0-9]+]](8,%r15), [[OFFSET]](%r15)
+; CHECK: br %r14
+entry:
+  %val0 = load volatile i64 *@h0
+  %val1 = load volatile i64 *@h1
+  %val2 = load volatile i64 *@h2
+  %val3 = load volatile i64 *@h3
+  %val4 = load volatile i64 *@h4
+  %val5 = load volatile i64 *@h5
+  %val6 = load volatile i64 *@h6
+  %val7 = load volatile i64 *@h7
+
+  %altval0 = load volatile i64 *@h0
+  %altval1 = load volatile i64 *@h1
+
+  call void @foo()
+
+  store volatile i64 %val0, i64 *@h0
+  store volatile i64 %val1, i64 *@h1
+  store volatile i64 %val2, i64 *@h2
+  store volatile i64 %val3, i64 *@h3
+  store volatile i64 %val4, i64 *@h4
+  store volatile i64 %val5, i64 *@h5
+  store volatile i64 %val6, i64 *@h6
+  store volatile i64 %val7, i64 *@h7
+
+  %check = load volatile i64 *@h0
+  %cond = icmp eq i64 %check, 0
+  br i1 %cond, label %a1, label %b1
+
+a1:
+  call void @foo()
+  br label %join1
+
+b1:
+  call void @foo()
+  br label %join1
+
+join1:
+  %newval0 = phi i64 [ %val0, %a1 ], [ %altval0, %b1 ]
+
+  call void @foo()
+
+  store volatile i64 %val1, i64 *@h1
+  store volatile i64 %val2, i64 *@h2
+  store volatile i64 %val3, i64 *@h3
+  store volatile i64 %val4, i64 *@h4
+  store volatile i64 %val5, i64 *@h5
+  store volatile i64 %val6, i64 *@h6
+  store volatile i64 %val7, i64 *@h7
+  br i1 %cond, label %a2, label %b2
+
+a2:
+  call void @foo()
+  br label %join2
+
+b2:
+  call void @foo()
+  br label %join2
+
+join2:
+  %newval1 = phi i64 [ %val1, %a2 ], [ %altval1, %b2 ]
+
+  call void @foo()
+
+  store volatile i64 %val2, i64 *@h2
+  store volatile i64 %val3, i64 *@h3
+  store volatile i64 %val4, i64 *@h4
+  store volatile i64 %val5, i64 *@h5
+  store volatile i64 %val6, i64 *@h6
+  store volatile i64 %val7, i64 *@h7
+
+  call void @foo()
+
+  store volatile i64 %newval0, i64 *@h0
+  store volatile i64 %newval1, i64 *@h1
+  store volatile i64 %val2, i64 *@h2
+  store volatile i64 %val3, i64 *@h3
+  store volatile i64 %val4, i64 *@h4
+  store volatile i64 %val5, i64 *@h5
+  store volatile i64 %val6, i64 *@h6
+  store volatile i64 %val7, i64 *@h7
+
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/strcmp-01.ll b/test/CodeGen/SystemZ/strcmp-01.ll
new file mode 100644
index 000000000000..122c160babaf
--- /dev/null
+++ b/test/CodeGen/SystemZ/strcmp-01.ll
@@ -0,0 +1,70 @@
+; Test strcmp using CLST, i32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare signext i32 @strcmp(i8 *%src1, i8 *%src2)
+
+; Check a case where the result is used as an integer.
+define i32 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: br %r14
+  %res = call i32 @strcmp(i8 *%src1, i8 *%src2)
+  ret i32 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: je {{\.L.*}}
+; CHECK: br %r14
+  %res = call i32 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp eq i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test a case where the result is used both as an integer and for
+; branching.
+define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll %r2, [[REG]], 31
+; CHECK: jl {{\.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i32 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp slt i32 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i32 0, i32 *%dest
+  br label %exit
+
+exit:
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/strcmp-02.ll b/test/CodeGen/SystemZ/strcmp-02.ll
new file mode 100644
index 000000000000..27bd00b47fd3
--- /dev/null
+++ b/test/CodeGen/SystemZ/strcmp-02.ll
@@ -0,0 +1,72 @@
+; Test strcmp using CLST, i64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @strcmp(i8 *%src1, i8 *%src2)
+
+; Check a case where the result is used as an integer.
+define i64 @f1(i8 *%src1, i8 *%src2) {
+; CHECK-LABEL: f1:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: br %r14
+  %res = call i64 @strcmp(i8 *%src1, i8 *%src2)
+  ret i64 %res
+}
+
+; Check a case where the result is tested for equality.
+define void @f2(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: je {{\.L.*}}
+; CHECK: br %r14
+  %res = call i64 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp eq i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Test a case where the result is used both as an integer and for
+; branching.
+define i64 @f3(i8 *%src1, i8 *%src2, i64 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: clst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: rll [[REG]], [[REG]], 31
+; CHECK: lgfr %r2, [[REG]]
+; CHECK: jl {{\.L*}}
+; CHECK: br %r14
+entry:
+  %res = call i64 @strcmp(i8 *%src1, i8 *%src2)
+  %cmp = icmp slt i64 %res, 0
+  br i1 %cmp, label %exit, label %store
+
+store:
+  store i64 0, i64 *%dest
+  br label %exit
+
+exit:
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/strcpy-01.ll b/test/CodeGen/SystemZ/strcpy-01.ll
new file mode 100644
index 000000000000..29bab629ecf8
--- /dev/null
+++ b/test/CodeGen/SystemZ/strcpy-01.ll
@@ -0,0 +1,50 @@
+; Test strcpy using MVST.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@strcpy(i8 *%dest, i8 *%src)
+declare i8 *@stpcpy(i8 *%dest, i8 *%src)
+
+; Check strcpy.
+define i8 *@f1(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r2
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: mvst [[REG]], %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  %res = call i8 *@strcpy(i8 *%dest, i8 *%src)
+  ret i8 *%res
+}
+
+; Check stpcpy.
+define i8 *@f2(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lhi %r0, 0
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: mvst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NOT: %r2
+; CHECK: br %r14
+  %res = call i8 *@stpcpy(i8 *%dest, i8 *%src)
+  ret i8 *%res
+}
+
+; Check correct operation with other loads and stores.  The load must
+; come before the loop and the store afterwards.
+define i32 @f3(i32 %dummy, i8 *%dest, i8 *%src, i32 *%resptr, i32 *%storeptr) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: l %r2, 0(%r5)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: mvst %r3, %r4
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: mvhi 0(%r6), 0
+; CHECK: br %r14
+  %res = load i32 *%resptr
+  %unused = call i8 *@strcpy(i8 *%dest, i8 *%src)
+  store i32 0, i32 *%storeptr
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/strlen-01.ll b/test/CodeGen/SystemZ/strlen-01.ll
new file mode 100644
index 000000000000..16161d4d2c82
--- /dev/null
+++ b/test/CodeGen/SystemZ/strlen-01.ll
@@ -0,0 +1,39 @@
+; Test strlen using SRST, i64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @strlen(i8 *%src)
+declare i64 @strnlen(i8 *%src, i64 %len)
+
+; Test strlen with its proper i64 prototype.  It would also be valid for
+; the uses of %r3 and REG after the LGR to be swapped.
+define i64 @f1(i32 %dummy, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lghi %r2, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i64 @strlen(i8 *%src)
+  ret i64 %res
+}
+
+; Test strnlen with its proper i64 prototype.
+define i64 @f2(i64 %len, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: agr %r2, %r3
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i64 @strnlen(i8 *%src, i64 %len)
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/strlen-02.ll b/test/CodeGen/SystemZ/strlen-02.ll
new file mode 100644
index 000000000000..e1abbff4b4e0
--- /dev/null
+++ b/test/CodeGen/SystemZ/strlen-02.ll
@@ -0,0 +1,39 @@
+; Test strlen using SRST, i32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @strlen(i8 *%src)
+declare i32 @strnlen(i8 *%src, i32 %len)
+
+; Test strlen with an i32-based prototype.  It would also be valid for
+; the uses of %r3 and REG after the LGR to be swapped.
+define i32 @f1(i32 %dummy, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lghi %r2, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i32 @strlen(i8 *%src)
+  ret i32 %res
+}
+
+; Test strnlen with an i32-based prototype.
+define i32 @f2(i32 zeroext %len, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: agr %r2, %r3
+; CHECK-DAG: lhi %r0, 0
+; CHECK-DAG: lgr [[REG:%r[145]]], %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK-NEXT: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK-NEXT: BB#{{[0-9]+}}
+; CHECK-NEXT: sgr %r2, %r3
+; CHECK: br %r14
+  %res = call i32 @strnlen(i8 *%src, i32 %len)
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/tls-01.ll b/test/CodeGen/SystemZ/tls-01.ll
index 49037ad51c69..16bc8f6e500f 100644
--- a/test/CodeGen/SystemZ/tls-01.ll
+++ b/test/CodeGen/SystemZ/tls-01.ll
@@ -11,7 +11,7 @@ define i32 *@foo() {
 ; CHECK-CP: .LCP{{.*}}:
 ; CHECK-CP: .quad x@NTPOFF
 ;
-; CHECK-MAIN: foo:
+; CHECK-MAIN-LABEL: foo:
 ; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
 ; CHECK-MAIN: sllg %r2, [[HIGH]], 32
 ; CHECK-MAIN: ear %r2, %a1
diff --git a/test/CodeGen/SystemZ/unaligned-01.ll b/test/CodeGen/SystemZ/unaligned-01.ll
new file mode 100644
index 000000000000..526a068100ef
--- /dev/null
+++ b/test/CodeGen/SystemZ/unaligned-01.ll
@@ -0,0 +1,62 @@
+; Check that unaligned accesses are allowed in general.  We check the
+; few exceptions (like CRL) in their respective test files.
+;
+; FIXME: -combiner-alias-analysis (the default for SystemZ) stops
+;        f1 from being optimized.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -combiner-alias-analysis=false \
+; RUN:   | FileCheck %s
+
+; Check that these four byte stores become a single word store.
+define void @f1(i8 *%ptr) {
+; CHECK: f1
+; CHECK: iilf [[REG:%r[0-5]]], 66051
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %off1 = getelementptr i8 *%ptr, i64 1
+  %off2 = getelementptr i8 *%ptr, i64 2
+  %off3 = getelementptr i8 *%ptr, i64 3
+  store i8 0, i8 *%ptr
+  store i8 1, i8 *%off1
+  store i8 2, i8 *%off2
+  store i8 3, i8 *%off3
+  ret void
+}
+
+; Check that unaligned 2-byte accesses are allowed.
+define i16 @f2(i16 *%src, i16 *%dst) {
+; CHECK-LABEL: f2:
+; CHECK: lh %r2, 0(%r2)
+; CHECK: sth %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i16 *%src, align 1
+  store i16 %val, i16 *%dst, align 1
+  ret i16 %val
+}
+
+; Check that unaligned 4-byte accesses are allowed.
+define i32 @f3(i32 *%src1, i32 *%src2, i32 *%dst) {
+; CHECK-LABEL: f3:
+; CHECK: l %r2, 0(%r2)
+; CHECK: s %r2, 0(%r3)
+; CHECK: st %r2, 0(%r4)
+; CHECK: br %r14
+  %val1 = load i32 *%src1, align 1
+  %val2 = load i32 *%src2, align 2
+  %sub = sub i32 %val1, %val2
+  store i32 %sub, i32 *%dst, align 1
+  ret i32 %sub
+}
+
+; Check that unaligned 8-byte accesses are allowed.
+define i64 @f4(i64 *%src1, i64 *%src2, i64 *%dst) {
+; CHECK-LABEL: f4:
+; CHECK: lg %r2, 0(%r2)
+; CHECK: sg %r2, 0(%r3)
+; CHECK: stg %r2, 0(%r4)
+; CHECK: br %r14
+  %val1 = load i64 *%src1, align 1
+  %val2 = load i64 *%src2, align 2
+  %sub = sub i64 %val1, %val2
+  store i64 %sub, i64 *%dst, align 4
+  ret i64 %sub
+}
diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll
index 30bdbe7901f9..185d6bb0a754 100644
--- a/test/CodeGen/SystemZ/xor-01.ll
+++ b/test/CodeGen/SystemZ/xor-01.ll
@@ -1,10 +1,13 @@
 ; Test 32-bit XORs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @foo()
 
 ; Check XR.
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: xr %r2, %r3
 ; CHECK: br %r14
   %xor = xor i32 %a, %b
@@ -13,7 +16,7 @@ define i32 @f1(i32 %a, i32 %b) {
 
 ; Check the low end of the X range.
 define i32 @f2(i32 %a, i32 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: x %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i32 *%src
@@ -23,7 +26,7 @@ define i32 @f2(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned X range.
 define i32 @f3(i32 %a, i32 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: x %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1023
@@ -34,7 +37,7 @@ define i32 @f3(i32 %a, i32 *%src) {
 
 ; Check the next word up, which should use XY instead of X.
 define i32 @f4(i32 %a, i32 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 1024
@@ -45,7 +48,7 @@ define i32 @f4(i32 %a, i32 *%src) {
 
 ; Check the high end of the aligned XY range.
 define i32 @f5(i32 %a, i32 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 131071
@@ -57,7 +60,7 @@ define i32 @f5(i32 %a, i32 *%src) {
 ; Check the next word up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f6(i32 %a, i32 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: agfi %r3, 524288
 ; CHECK: x %r2, 0(%r3)
 ; CHECK: br %r14
@@ -69,7 +72,7 @@ define i32 @f6(i32 %a, i32 *%src) {
 
 ; Check the high end of the negative aligned XY range.
 define i32 @f7(i32 %a, i32 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: xy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -1
@@ -80,7 +83,7 @@ define i32 @f7(i32 %a, i32 *%src) {
 
 ; Check the low end of the XY range.
 define i32 @f8(i32 %a, i32 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: xy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32 *%src, i64 -131072
@@ -92,7 +95,7 @@ define i32 @f8(i32 %a, i32 *%src) {
 ; Check the next word down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i32 @f9(i32 %a, i32 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: agfi %r3, -524292
 ; CHECK: x %r2, 0(%r3)
 ; CHECK: br %r14
@@ -104,7 +107,7 @@ define i32 @f9(i32 %a, i32 *%src) {
 
 ; Check that X allows an index.
 define i32 @f10(i32 %a, i64 %src, i64 %index) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: x %r2, 4092({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -117,7 +120,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
 
 ; Check that XY allows an index.
 define i32 @f11(i32 %a, i64 %src, i64 %index) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: xy %r2, 4096({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -127,3 +130,46 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
+
+; Check that XORs of spilled values can use X rather than XR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: x %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %xor0 = xor i32 %ret, %val0
+  %xor1 = xor i32 %xor0, %val1
+  %xor2 = xor i32 %xor1, %val2
+  %xor3 = xor i32 %xor2, %val3
+  %xor4 = xor i32 %xor3, %val4
+  %xor5 = xor i32 %xor4, %val5
+  %xor6 = xor i32 %xor5, %val6
+  %xor7 = xor i32 %xor6, %val7
+  %xor8 = xor i32 %xor7, %val8
+  %xor9 = xor i32 %xor8, %val9
+
+  ret i32 %xor9
+}
diff --git a/test/CodeGen/SystemZ/xor-02.ll b/test/CodeGen/SystemZ/xor-02.ll
index c2b52b9b8e20..7e28e231cfcd 100644
--- a/test/CodeGen/SystemZ/xor-02.ll
+++ b/test/CodeGen/SystemZ/xor-02.ll
@@ -4,7 +4,7 @@
 
 ; Check the lowest useful XILF value.
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: xilf %r2, 1
 ; CHECK: br %r14
   %xor = xor i32 %a, 1
@@ -13,7 +13,7 @@ define i32 @f1(i32 %a) {
 
 ; Check the high end of the signed range.
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: xilf %r2, 2147483647
 ; CHECK: br %r14
   %xor = xor i32 %a, 2147483647
@@ -23,7 +23,7 @@ define i32 @f2(i32 %a) {
 ; Check the low end of the signed range, which should be treated
 ; as a positive value.
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xilf %r2, 2147483648
 ; CHECK: br %r14
   %xor = xor i32 %a, -2147483648
@@ -32,7 +32,7 @@ define i32 @f3(i32 %a) {
 
 ; Check the high end of the XILF range.
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xilf %r2, 4294967295
 ; CHECK: br %r14
   %xor = xor i32 %a, 4294967295
diff --git a/test/CodeGen/SystemZ/xor-03.ll b/test/CodeGen/SystemZ/xor-03.ll
index a4851b33090d..ab7f2584b60d 100644
--- a/test/CodeGen/SystemZ/xor-03.ll
+++ b/test/CodeGen/SystemZ/xor-03.ll
@@ -1,10 +1,13 @@
 ; Test 64-bit XORs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i64 @foo()
 
 ; Check XGR.
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: xgr %r2, %r3
 ; CHECK: br %r14
   %xor = xor i64 %a, %b
@@ -13,7 +16,7 @@ define i64 @f1(i64 %a, i64 %b) {
 
 ; Check XG with no displacement.
 define i64 @f2(i64 %a, i64 *%src) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: xg %r2, 0(%r3)
 ; CHECK: br %r14
   %b = load i64 *%src
@@ -23,7 +26,7 @@ define i64 @f2(i64 %a, i64 *%src) {
 
 ; Check the high end of the aligned XG range.
 define i64 @f3(i64 %a, i64 *%src) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xg %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 65535
@@ -35,7 +38,7 @@ define i64 @f3(i64 %a, i64 *%src) {
 ; Check the next doubleword up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f4(i64 %a, i64 *%src) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: agfi %r3, 524288
 ; CHECK: xg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -47,7 +50,7 @@ define i64 @f4(i64 %a, i64 *%src) {
 
 ; Check the high end of the negative aligned XG range.
 define i64 @f5(i64 %a, i64 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xg %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -1
@@ -58,7 +61,7 @@ define i64 @f5(i64 %a, i64 *%src) {
 
 ; Check the low end of the XG range.
 define i64 @f6(i64 %a, i64 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: xg %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64 *%src, i64 -65536
@@ -70,7 +73,7 @@ define i64 @f6(i64 %a, i64 *%src) {
 ; Check the next doubleword down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define i64 @f7(i64 %a, i64 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: agfi %r3, -524296
 ; CHECK: xg %r2, 0(%r3)
 ; CHECK: br %r14
@@ -82,7 +85,7 @@ define i64 @f7(i64 %a, i64 *%src) {
 
 ; Check that XG allows an index.
 define i64 @f8(i64 %a, i64 %src, i64 %index) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: xg %r2, 524280({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
   %add1 = add i64 %src, %index
@@ -92,3 +95,46 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
+
+; Check that XORs of spilled values can use OG rather than OGR.
+define i64 @f9(i64 *%ptr0) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: xg %r2, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i64 *%ptr0, i64 2
+  %ptr2 = getelementptr i64 *%ptr0, i64 4
+  %ptr3 = getelementptr i64 *%ptr0, i64 6
+  %ptr4 = getelementptr i64 *%ptr0, i64 8
+  %ptr5 = getelementptr i64 *%ptr0, i64 10
+  %ptr6 = getelementptr i64 *%ptr0, i64 12
+  %ptr7 = getelementptr i64 *%ptr0, i64 14
+  %ptr8 = getelementptr i64 *%ptr0, i64 16
+  %ptr9 = getelementptr i64 *%ptr0, i64 18
+
+  %val0 = load i64 *%ptr0
+  %val1 = load i64 *%ptr1
+  %val2 = load i64 *%ptr2
+  %val3 = load i64 *%ptr3
+  %val4 = load i64 *%ptr4
+  %val5 = load i64 *%ptr5
+  %val6 = load i64 *%ptr6
+  %val7 = load i64 *%ptr7
+  %val8 = load i64 *%ptr8
+  %val9 = load i64 *%ptr9
+
+  %ret = call i64 @foo()
+
+  %xor0 = xor i64 %ret, %val0
+  %xor1 = xor i64 %xor0, %val1
+  %xor2 = xor i64 %xor1, %val2
+  %xor3 = xor i64 %xor2, %val3
+  %xor4 = xor i64 %xor3, %val4
+  %xor5 = xor i64 %xor4, %val5
+  %xor6 = xor i64 %xor5, %val6
+  %xor7 = xor i64 %xor6, %val7
+  %xor8 = xor i64 %xor7, %val8
+  %xor9 = xor i64 %xor8, %val9
+
+  ret i64 %xor9
+}
diff --git a/test/CodeGen/SystemZ/xor-04.ll b/test/CodeGen/SystemZ/xor-04.ll
index cc141d391a85..44f0a4cc39d0 100644
--- a/test/CodeGen/SystemZ/xor-04.ll
+++ b/test/CodeGen/SystemZ/xor-04.ll
@@ -4,7 +4,7 @@
 
 ; Check the lowest useful XILF value.
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: xilf %r2, 1
 ; CHECK: br %r14
   %xor = xor i64 %a, 1
@@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
 
 ; Check the high end of the XILF range.
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: xilf %r2, 4294967295
 ; CHECK: br %r14
   %xor = xor i64 %a, 4294967295
@@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
 
 ; Check the lowest useful XIHF value, which is one up from the above.
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xihf %r2, 1
 ; CHECK: br %r14
   %xor = xor i64 %a, 4294967296
@@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
 
 ; Check the next value up again, which needs a combination of XIHF and XILF.
 define i64 @f4(i64 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xihf %r2, 1
 ; CHECK: xilf %r2, 4294967295
 ; CHECK: br %r14
@@ -41,7 +41,7 @@ define i64 @f4(i64 %a) {
 
 ; Check the high end of the XIHF range.
 define i64 @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xihf %r2, 4294967295
 ; CHECK: br %r14
   %xor = xor i64 %a, -4294967296
@@ -50,7 +50,7 @@ define i64 @f5(i64 %a) {
 
 ; Check the next value up, which again must use XIHF and XILF.
 define i64 @f6(i64 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: xihf %r2, 4294967295
 ; CHECK: xilf %r2, 1
 ; CHECK: br %r14
@@ -60,7 +60,7 @@ define i64 @f6(i64 %a) {
 
 ; Check full bitwise negation
 define i64 @f7(i64 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: xihf %r2, 4294967295
 ; CHECK: xilf %r2, 4294967295
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/xor-05.ll b/test/CodeGen/SystemZ/xor-05.ll
index 9ef0d20ca52b..fbd5660ad058 100644
--- a/test/CodeGen/SystemZ/xor-05.ll
+++ b/test/CodeGen/SystemZ/xor-05.ll
@@ -4,7 +4,7 @@
 
 ; Check the lowest useful constant, expressed as a signed integer.
 define void @f1(i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: xi 0(%r2), 1
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -15,7 +15,7 @@ define void @f1(i8 *%ptr) {
 
 ; Check the highest useful constant, expressed as a signed integer.
 define void @f2(i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -26,7 +26,7 @@ define void @f2(i8 *%ptr) {
 
 ; Check the lowest useful constant, expressed as an unsigned integer.
 define void @f3(i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xi 0(%r2), 1
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -37,7 +37,7 @@ define void @f3(i8 *%ptr) {
 
 ; Check the highest useful constant, expressed as a unsigned integer.
 define void @f4(i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -48,7 +48,7 @@ define void @f4(i8 *%ptr) {
 
 ; Check the high end of the XI range.
 define void @f5(i8 *%src) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xi 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4095
@@ -60,7 +60,7 @@ define void @f5(i8 *%src) {
 
 ; Check the next byte up, which should use XIY instead of XI.
 define void @f6(i8 *%src) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: xiy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 4096
@@ -72,7 +72,7 @@ define void @f6(i8 *%src) {
 
 ; Check the high end of the XIY range.
 define void @f7(i8 *%src) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: xiy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 524287
@@ -85,7 +85,7 @@ define void @f7(i8 *%src) {
 ; Check the next byte up, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f8(i8 *%src) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: agfi %r2, 524288
 ; CHECK: xi 0(%r2), 127
 ; CHECK: br %r14
@@ -98,7 +98,7 @@ define void @f8(i8 *%src) {
 
 ; Check the high end of the negative XIY range.
 define void @f9(i8 *%src) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: xiy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -1
@@ -110,7 +110,7 @@ define void @f9(i8 *%src) {
 
 ; Check the low end of the XIY range.
 define void @f10(i8 *%src) {
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: xiy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8 *%src, i64 -524288
@@ -123,7 +123,7 @@ define void @f10(i8 *%src) {
 ; Check the next byte down, which needs separate address logic.
 ; Other sequences besides this one would be OK.
 define void @f11(i8 *%src) {
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: agfi %r2, -524289
 ; CHECK: xi 0(%r2), 127
 ; CHECK: br %r14
@@ -136,7 +136,7 @@ define void @f11(i8 *%src) {
 
 ; Check that XI does not allow an index
 define void @f12(i64 %src, i64 %index) {
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: agr %r2, %r3
 ; CHECK: xi 4095(%r2), 127
 ; CHECK: br %r14
@@ -151,7 +151,7 @@ define void @f12(i64 %src, i64 %index) {
 
 ; Check that XIY does not allow an index
 define void @f13(i64 %src, i64 %index) {
-; CHECK: f13:
+; CHECK-LABEL: f13:
 ; CHECK: agr %r2, %r3
 ; CHECK: xiy 4096(%r2), 127
 ; CHECK: br %r14
diff --git a/test/CodeGen/SystemZ/xor-06.ll b/test/CodeGen/SystemZ/xor-06.ll
index 0ffff47c2b5a..f39c0fec4e40 100644
--- a/test/CodeGen/SystemZ/xor-06.ll
+++ b/test/CodeGen/SystemZ/xor-06.ll
@@ -5,7 +5,7 @@
 
 ; Zero extension to 32 bits, negative constant.
 define void @f1(i8 *%ptr) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -18,7 +18,7 @@ define void @f1(i8 *%ptr) {
 
 ; Zero extension to 64 bits, negative constant.
 define void @f2(i8 *%ptr) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -31,7 +31,7 @@ define void @f2(i8 *%ptr) {
 
 ; Zero extension to 32 bits, positive constant.
 define void @f3(i8 *%ptr) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -44,7 +44,7 @@ define void @f3(i8 *%ptr) {
 
 ; Zero extension to 64 bits, positive constant.
 define void @f4(i8 *%ptr) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -57,7 +57,7 @@ define void @f4(i8 *%ptr) {
 
 ; Sign extension to 32 bits, negative constant.
 define void @f5(i8 *%ptr) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -70,7 +70,7 @@ define void @f5(i8 *%ptr) {
 
 ; Sign extension to 64 bits, negative constant.
 define void @f6(i8 *%ptr) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -83,7 +83,7 @@ define void @f6(i8 *%ptr) {
 
 ; Sign extension to 32 bits, positive constant.
 define void @f7(i8 *%ptr) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
@@ -96,7 +96,7 @@ define void @f7(i8 *%ptr) {
 
 ; Sign extension to 64 bits, positive constant.
 define void @f8(i8 *%ptr) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
   %val = load i8 *%ptr
diff --git a/test/CodeGen/SystemZ/xor-07.ll b/test/CodeGen/SystemZ/xor-07.ll
new file mode 100644
index 000000000000..ec2a0385b161
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-07.ll
@@ -0,0 +1,39 @@
+; Test the three-operand forms of XOR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check XRK.
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f1:
+; CHECK: xrk %r2, %r3, %r4
+; CHECK: br %r14
+  %xor = xor i32 %b, %c
+  ret i32 %xor
+}
+
+; Check that we can still use XR in obvious cases.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK-LABEL: f2:
+; CHECK: xr %r2, %r3
+; CHECK: br %r14
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check XGRK.
+define i64 @f3(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: f3:
+; CHECK: xgrk %r2, %r3, %r4
+; CHECK: br %r14
+  %xor = xor i64 %b, %c
+  ret i64 %xor
+}
+
+; Check that we can still use XGR in obvious cases.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: xgr %r2, %r3
+; CHECK: br %r14
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
diff --git a/test/CodeGen/SystemZ/xor-08.ll b/test/CodeGen/SystemZ/xor-08.ll
new file mode 100644
index 000000000000..8cba41e742ce
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-08.ll
@@ -0,0 +1,57 @@
+; Test memory-to-memory XORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the simple i8 case.
+define void @f1(i8 *%ptr1) {
+; CHECK-LABEL: f1:
+; CHECK: xc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i8 *%ptr1, i64 1
+  %val = load i8 *%ptr1
+  %old = load i8 *%ptr2
+  %xor = xor i8 %val, %old
+  store i8 %xor, i8 *%ptr2
+  ret void
+}
+
+; Test the simple i16 case.
+define void @f2(i16 *%ptr1) {
+; CHECK-LABEL: f2:
+; CHECK: xc 2(2,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i16 *%ptr1, i64 1
+  %val = load i16 *%ptr1
+  %old = load i16 *%ptr2
+  %xor = xor i16 %val, %old
+  store i16 %xor, i16 *%ptr2
+  ret void
+}
+
+; Test the simple i32 case.
+define void @f3(i32 *%ptr1) {
+; CHECK-LABEL: f3:
+; CHECK: xc 4(4,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i32 *%ptr1, i64 1
+  %val = load i32 *%ptr1
+  %old = load i32 *%ptr2
+  %xor = xor i32 %old, %val
+  store i32 %xor, i32 *%ptr2
+  ret void
+}
+
+; Test the i64 case.
+define void @f4(i64 *%ptr1) {
+; CHECK-LABEL: f4:
+; CHECK: xc 8(8,%r2), 0(%r2)
+; CHECK: br %r14
+  %ptr2 = getelementptr i64 *%ptr1, i64 1
+  %val = load i64 *%ptr1
+  %old = load i64 *%ptr2
+  %xor = xor i64 %old, %val
+  store i64 %xor, i64 *%ptr2
+  ret void
+}
+
+; Leave other more complicated tests to and-08.ll.
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
index 787655779d31..414b76d750b9 100644
--- a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -10,7 +10,7 @@
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.asl_file_t*, i64, i64*)* @t to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
 
 define i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: adds {{r[0-7]}}, #8
 entry:
   %val = alloca i64, align 4                      ; <i64*> [#uses=3]
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index d6b649569173..b87bf24993a1 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -46,102 +46,110 @@ declare double @sqrt(double) nounwind readonly
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!104}
 !0 = metadata !{i32 46, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !4, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524299, metadata !4, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 524299, metadata !101, metadata !2, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !101, metadata !3, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, metadata !101, null, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !101} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, metadata !101, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, metadata !"", i32 0, metadata !102, metadata !102, metadata !103, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !22, metadata !22}
-!8 = metadata !{i32 524307, metadata !4, metadata !"ggVector3", metadata !9, i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 524307, metadata !99, null, metadata !"ggVector3", i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [ggVector3] [line 66, size 192, align 32, offset 0] [def] [from ]
 !9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
+!99 = metadata !{metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
 !10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90}
-!11 = metadata !{i32 524301, metadata !8, metadata !"e", metadata !9, i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
-!12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
-!13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 524301, metadata !99, metadata !8, metadata !"e", i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 524289, metadata !101, metadata !4, metadata !"", i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 192, align 32, offset 0] [from double]
+!13 = metadata !{i32 524324, metadata !101, metadata !4, metadata !"double", i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 524321, i64 0, i64 3}        ; [ DW_TAG_subrange_type ]
-!16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{null, metadata !19, metadata !20}
-!19 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 524310, metadata !21, metadata !"ggBoolean", metadata !21, i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
+!19 = metadata !{i32 524303, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 524310, metadata !100, null, metadata !"ggBoolean", i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
 !21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ]
-!22 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!23 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!100 = metadata !{metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm"}
+!22 = metadata !{i32 524324, metadata !101, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !25 = metadata !{null, metadata !19}
-!26 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!27 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!26 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!27 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13}
-!29 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", metadata !9, i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!30 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", metadata !9, i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!31 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!29 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!30 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !32 = metadata !{metadata !13, metadata !33}
-!33 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
-!35 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", metadata !9, i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!36 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", metadata !9, i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!37 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", metadata !9, i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!38 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!33 = metadata !{i32 524303, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 524326, metadata !101, metadata !4, metadata !"", i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!35 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!36 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!38 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !39 = metadata !{metadata !40, metadata !19}
-!40 = metadata !{i32 524304, metadata !4, metadata !"double", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ]
-!41 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", metadata !9, i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!42 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", metadata !9, i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!43 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", metadata !9, i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!44 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!40 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"double", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ]
+!41 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!42 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!44 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !45 = metadata !{null, metadata !19, metadata !13}
-!46 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", metadata !9, i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!47 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", metadata !9, i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!48 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!49 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!46 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!47 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!48 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!49 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !50 = metadata !{null, metadata !19, metadata !51}
-!51 = metadata !{i32 524304, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ]
-!52 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", metadata !9, i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!53 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", metadata !9, i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!54 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", metadata !9, i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!55 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!51 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ]
+!52 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!53 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!54 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!55 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !56 = metadata !{metadata !51, metadata !33}
-!57 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", metadata !9, i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!58 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!57 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!58 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !59 = metadata !{metadata !8, metadata !33}
-!60 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", metadata !9, i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!61 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!60 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!61 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !62 = metadata !{metadata !13, metadata !33, metadata !22}
-!63 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", metadata !9, i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!64 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!63 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!64 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !65 = metadata !{metadata !40, metadata !19, metadata !22}
-!66 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", metadata !9, i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!67 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!66 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!67 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !68 = metadata !{metadata !69, metadata !19, metadata !51}
-!69 = metadata !{i32 524304, metadata !4, metadata !"ggVector3", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ]
-!70 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", metadata !9, i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!71 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", metadata !9, i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!72 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!69 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"ggVector3", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ]
+!70 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!71 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!72 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !73 = metadata !{metadata !69, metadata !19, metadata !13}
-!74 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", metadata !9, i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!75 = metadata !{i32 524334, i32 0, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", metadata !9, i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!76 = metadata !{i32 524334, i32 0, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", metadata !9, i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!77 = metadata !{i32 524334, i32 0, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", metadata !9, i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!78 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", metadata !9, i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!79 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!74 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!75 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!76 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!77 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!78 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!79 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13}
-!81 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", metadata !9, i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!82 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", metadata !9, i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!83 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", metadata !9, i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!84 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", metadata !9, i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!85 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", metadata !9, i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!86 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!81 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!82 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!83 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!84 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!85 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!86 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !87 = metadata !{metadata !22, metadata !33}
-!88 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", metadata !9, i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!89 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", metadata !9, i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!90 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", metadata !9, i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!88 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!89 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!90 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
 !92 = metadata !{i32 48, i32 0, metadata !1, null}
 !93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
-!94 = metadata !{i32 524299, metadata !4, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
-!95 = metadata !{i32 524299, metadata !4, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!94 = metadata !{i32 524299, metadata !101, metadata !95, i32 217, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!95 = metadata !{i32 524299, metadata !101, metadata !77, i32 217, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !96 = metadata !{i32 51, i32 0, metadata !1, null}
 !97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
 !98 = metadata !{i32 52, i32 0, metadata !1, null}
+!101 = metadata !{metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
+!102 = metadata !{i32 0}
+!103 = metadata !{metadata !3}
+!104 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
index a4c05d2492a4..b39978b9d44e 100644
--- a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
+++ b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
@@ -3,7 +3,7 @@
 ; rdar://11331541
 
 define i32 @t(i32 %a) nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: asrs [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], #31
 ; CHECK: eors [[REG1]], [[REG2]]
   %tmp0 = ashr i32 %a, 31
diff --git a/test/CodeGen/Thumb/PR17309.ll b/test/CodeGen/Thumb/PR17309.ll
new file mode 100644
index 000000000000..b7b08e941898
--- /dev/null
+++ b/test/CodeGen/Thumb/PR17309.ll
@@ -0,0 +1,57 @@
+; RUN: llc -mtriple thumbv5-none-linux-gnueabi < %s | FileCheck %s
+
+%struct.C = type { [1000 x i8] }
+%struct.S = type { [1000 x i16] }
+%struct.I = type { [1000 x i32] }
+
+;CHECK-LABEL: pass_C:
+;CHECK-NOT: ldrb    r{{[0-9]+}}, [{{.*}}], #1
+;CHECK-NOT: strb    r{{[0-9]+}}, [{{.*}}], #1
+define void @pass_C() #0 {
+entry:
+  %c = alloca %struct.C, align 1
+  %0 = getelementptr inbounds %struct.C* %c, i32 0, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 1000, i8* %0) #1
+  call void @use_C(%struct.C* byval %c) #3
+  call void @llvm.lifetime.end(i64 1000, i8* %0) #1
+  ret void
+}
+
+;CHECK-LABEL: pass_S:
+;CHECK-NOT: ldrh    r{{[0-9]+}}, [{{.*}}], #2
+;CHECK-NOT: strh    r{{[0-9]+}}, [{{.*}}], #2
+define void @pass_S() #0 {
+entry:
+  %s = alloca %struct.S, align 2
+  %0 = bitcast %struct.S* %s to i8*
+  call void @llvm.lifetime.start(i64 2000, i8* %0) #1
+  call void @use_S(%struct.S* byval %s) #3
+  call void @llvm.lifetime.end(i64 2000, i8* %0) #1
+  ret void
+}
+
+;CHECK-LABEL: pass_I:
+;CHECK-NOT: ldr     r{{[0-9]+}}, [{{.*}}], #4
+;CHECK-NOT: str     r{{[0-9]+}}, [{{.*}}], #4
+define void @pass_I() #0 {
+entry:
+  %i = alloca %struct.I, align 4
+  %0 = bitcast %struct.I* %i to i8*
+  call void @llvm.lifetime.start(i64 4000, i8* %0) #1
+  call void @use_I(%struct.I* byval %i) #3
+  call void @llvm.lifetime.end(i64 4000, i8* %0) #1
+  ret void
+}
+
+declare void @use_C(%struct.C* byval) #2
+declare void @use_S(%struct.S* byval) #2
+declare void @use_I(%struct.I* byval) #2
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind optsize }
diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll
index 50d138fe44dd..1c27fa09884f 100644
--- a/test/CodeGen/Thumb/barrier.ll
+++ b/test/CodeGen/Thumb/barrier.ll
@@ -3,11 +3,11 @@
 ; RUN: llc < %s -march=thumb -mcpu=cortex-m0   | FileCheck %s -check-prefix=V6M
 
 define void @t1() {
-; V6: t1:
+; V6-LABEL: t1:
 ; V6: blx {{_*}}sync_synchronize
 
-; V6M: t1:
-; V6M: dmb ish
+; V6M-LABEL: t1:
+; V6M: dmb sy
   fence seq_cst
   ret void
 }
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index f3f08347ae6e..6c6de55347a4 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -5,7 +5,7 @@
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
 
 define void @t1(%struct.state* %v) {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: push
 ; CHECK: add r7, sp, #12
 ; CHECK: lsls r[[R0:[0-9]+]]
@@ -39,7 +39,7 @@ declare fastcc void @f2(float*, float*, float*, i32)
 @str215 = external global [2 x i8]
 
 define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: push
 ; CHECK: add r7, sp, #12
 ; CHECK: sub sp, #
diff --git a/test/CodeGen/Thumb/ispositive.ll b/test/CodeGen/Thumb/ispositive.ll
index eac3ef28377b..7b2822707745 100644
--- a/test/CodeGen/Thumb/ispositive.ll
+++ b/test/CodeGen/Thumb/ispositive.ll
@@ -2,7 +2,7 @@
 
 define i32 @test1(i32 %X) {
 entry:
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: lsrs r0, r0, #31
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index 680976e74fba..fb6daa478651 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
 
 define void @test1() {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: sub sp, #256
 ; CHECK: add sp, #256
     %tmp = alloca [ 64 x i32 ] , align 4
@@ -9,8 +9,8 @@ define void @test1() {
 }
 
 define void @test2() {
-; CHECK: test2:
-; CHECK: ldr.n r0, LCPI
+; CHECK-LABEL: test2:
+; CHECK: ldr r0, LCPI
 ; CHECK: add sp, r0
 ; CHECK: subs r4, r7, #4
 ; CHECK: mov sp, r4
@@ -19,10 +19,10 @@ define void @test2() {
 }
 
 define i32 @test3() {
-; CHECK: test3:
-; CHECK: ldr.n r1, LCPI
+; CHECK-LABEL: test3:
+; CHECK: ldr r1, LCPI
 ; CHECK: add sp, r1
-; CHECK: ldr.n r1, LCPI
+; CHECK: ldr r1, LCPI
 ; CHECK: add r1, sp
 ; CHECK: subs r4, r7, #4
 ; CHECK: mov sp, r4
diff --git a/test/CodeGen/Thumb/ldr_frame.ll b/test/CodeGen/Thumb/ldr_frame.ll
index 81782cda4a90..6c586385b1bc 100644
--- a/test/CodeGen/Thumb/ldr_frame.ll
+++ b/test/CodeGen/Thumb/ldr_frame.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb | FileCheck %s
 
 define i32 @f1() {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
@@ -10,7 +10,7 @@ define i32 @f1() {
 }
 
 define i32 @f2() {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mov r0
 ; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
@@ -21,7 +21,7 @@ define i32 @f2() {
 }
 
 define i32 @f3() {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
@@ -30,7 +30,7 @@ define i32 @f3() {
 }
 
 define i32 @f4() {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mov r0
 ; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
index 4d75f581a1d2..8a3ba96497e7 100644
--- a/test/CodeGen/Thumb/lit.local.cfg
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
index 63f2feb765fe..1e45c7f37bcb 100644
--- a/test/CodeGen/Thumb/pop.ll
+++ b/test/CodeGen/Thumb/pop.ll
@@ -2,7 +2,7 @@
 ; rdar://7268481
 
 define void @t(i8* %a, ...) nounwind {
-; CHECK:      t:
+; CHECK-LABEL:      t:
 ; CHECK:      pop {r3}
 ; CHECK-NEXT: add sp, #12
 ; CHECK-NEXT: bx r3
diff --git a/test/CodeGen/Thumb/push.ll b/test/CodeGen/Thumb/push.ll
index 94ef8e900434..62229c6daded 100644
--- a/test/CodeGen/Thumb/push.ll
+++ b/test/CodeGen/Thumb/push.ll
@@ -2,7 +2,7 @@
 ; rdar://7268481
 
 define void @t() nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: push {r7}
 entry:
   call void asm sideeffect alignstack ".long 0xe7ffdefe", ""() nounwind
diff --git a/test/CodeGen/Thumb/rev.ll b/test/CodeGen/Thumb/rev.ll
index 5e163f8f96bc..dcba00e49663 100644
--- a/test/CodeGen/Thumb/rev.ll
+++ b/test/CodeGen/Thumb/rev.ll
@@ -32,7 +32,7 @@ define i32 @test2(i32 %X) nounwind {
 ; rdar://9147637
 define i32 @test3(i16 zeroext %a) nounwind {
 entry:
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: revsh r0, r0
   %0 = tail call i16 @llvm.bswap.i16(i16 %a)
   %1 = sext i16 %0 to i32
@@ -43,7 +43,7 @@ declare i16 @llvm.bswap.i16(i16) nounwind readnone
 
 define i32 @test4(i16 zeroext %a) nounwind {
 entry:
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: revsh r0, r0
   %conv = zext i16 %a to i32
   %shr9 = lshr i16 %a, 8
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
index 3f10b0573be5..fe69a39e350c 100644
--- a/test/CodeGen/Thumb/select.ll
+++ b/test/CodeGen/Thumb/select.ll
@@ -7,9 +7,9 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: beq
-; CHECK-EABI: f1:
+; CHECK-EABI-LABEL: f1:
 ; CHECK-EABI: beq
 
 define i32 @f2(i32 %a.s) {
@@ -18,9 +18,9 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: bgt
-; CHECK-EABI: f2:
+; CHECK-EABI-LABEL: f2:
 ; CHECK-EABI: bgt
 
 define i32 @f3(i32 %a.s, i32 %b.s) {
@@ -29,9 +29,9 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: blt
-; CHECK-EABI: f3:
+; CHECK-EABI-LABEL: f3:
 ; CHECK-EABI: blt
 
 define i32 @f4(i32 %a.s, i32 %b.s) {
@@ -40,9 +40,9 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ble
-; CHECK-EABI: f4:
+; CHECK-EABI-LABEL: f4:
 ; CHECK-EABI: ble
 
 define i32 @f5(i32 %a.u, i32 %b.u) {
@@ -51,9 +51,9 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: bls
-; CHECK-EABI: f5:
+; CHECK-EABI-LABEL: f5:
 ; CHECK-EABI: bls
 
 define i32 @f6(i32 %a.u, i32 %b.u) {
@@ -62,9 +62,9 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: bhi
-; CHECK-EABI: f6:
+; CHECK-EABI-LABEL: f6:
 ; CHECK-EABI: bhi
 
 define double @f7(double %a, double %b) {
@@ -72,11 +72,11 @@ define double @f7(double %a, double %b) {
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
 }
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: blt
 ; CHECK: blt
 ; CHECK: __ltdf2
-; CHECK-EABI: f7:
+; CHECK-EABI-LABEL: f7:
 ; CHECK-EABI: __aeabi_dcmplt
 ; CHECK-EABI: bne
 ; CHECK-EABI: bne
diff --git a/test/CodeGen/Thumb/trap.ll b/test/CodeGen/Thumb/trap.ll
index 04cd3eed0fcb..e04059c4b021 100644
--- a/test/CodeGen/Thumb/trap.ll
+++ b/test/CodeGen/Thumb/trap.ll
@@ -3,7 +3,7 @@
 
 define void @t() nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: trap
   call void @llvm.trap()
   unreachable
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 4616dcfe3e4e..4abeca930c1f 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -5,7 +5,7 @@
 
 define i32 @t(i32, ...) nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: add r7, sp, #12
 	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
 	%2 = getelementptr i8* %1, i32 4		; <i8*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
index 095aecce9e57..e0144531454a 100644
--- a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
+++ b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -7,7 +7,7 @@
 @sep = external global [20 x i32]		; <[20 x i32]*> [#uses=1]
 
 define void @main(i32 %argc, i8** %argv) noreturn nounwind {
-; CHECK: main:
+; CHECK-LABEL: main:
 ; CHECK: ldrb
 entry:
 	%nb.i.i.i = alloca [25 x i8], align 1		; <[25 x i8]*> [#uses=0]
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index ff68e665078a..940cfd15e08e 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -4,7 +4,7 @@
 
 define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
 entry:
-; CHECK: __gcov_execlp:
+; CHECK-LABEL: __gcov_execlp:
 ; CHECK: sub sp, #8
 ; CHECK: push
 ; CHECK: add r7, sp, #4
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
index ac3e80a7c113..52066d3f86ad 100644
--- a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -5,7 +5,7 @@
 @getNeighbour = external global void (i32, i32, i32, i32, %struct.pix_pos*)*, align 4 ; <void (i32, i32, i32, i32, %struct.pix_pos*)**> [#uses=2]
 
 define void @t() nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK:      it eq
 ; CHECK-NEXT: cmpeq
 entry:
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 18c2e0bfaec5..04d46e60d7db 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -10,7 +10,7 @@
 
 
 define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
-; CHECK: _ZNKSs7compareERKSs:
+; CHECK-LABEL: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
 ; CHECK-NEXT: subeq{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
 ; CHECK-NEXT: pop.w
diff --git a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
index c153092288a1..c662620b19e2 100644
--- a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
+++ b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 
 define i32 @test(i32 %n) nounwind {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: mov
 ; CHECK: return
 entry:
@@ -30,7 +30,7 @@ return:                                           ; preds = %bb, %entry
 }
 
 define i32 @test_dead_cycle(i32 %n) nounwind {
-; CHECK: test_dead_cycle:
+; CHECK-LABEL: test_dead_cycle:
 ; CHECK: blx
 ; CHECK-NOT: mov
 ; CHECK: blx
diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
index 2246de35e03c..1b8bdb1c19bb 100644
--- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
+++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -6,12 +6,12 @@
 
 define void @t() nounwind ssp {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
   %size = mul i32 8, 2
-; CHECK:  subs  r0, #16
+; CHECK:  sub.w  r0, sp, #16
 ; CHECK:  mov sp, r0
   %vla_a = alloca i8, i32 %size, align 8
-; CHECK:  subs  r0, #16
+; CHECK:  sub.w  r0, sp, #16
 ; CHECK:  mov sp, r0
   %vla_b = alloca i8, i32 %size, align 8
   unreachable
diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index 244d0bb8f720..810bfb790209 100644
--- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -40,7 +40,7 @@ entry:
 ; CHECK: pop
 ; CHECK: pop
 ; Do not convert into single stream code. BranchProbability Analysis assumes
-; that branches which goes to "ret" intruction have lower probabilities.
+; that branches which goes to "ret" instruction have lower probabilities.
   switch i32 undef, label %bb7 [
     i32 37, label %bb43
     i32 48, label %bb5
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index 47d7a9cca48d..547950fb17f0 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -4,7 +4,7 @@
 
 define internal fastcc i32 @Callee(i32 %i) nounwind {
 entry:
-; CHECK: Callee:
+; CHECK-LABEL: Callee:
 ; CHECK: push
 ; CHECK: mov r4, sp
 ; CHECK: sub.w [[R12:r[0-9]+]], r4, #1000
@@ -33,7 +33,7 @@ bb2:                                              ; preds = %entry
 declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind
 
 define i32 @main() nounwind {
-; CHECK: main:
+; CHECK-LABEL: main:
 bb.nph:
   br label %bb
 
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index 5cb266b11b0c..75f5439b98c2 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -6,7 +6,7 @@
 declare void @bar() nounwind optsize
 
 define void @foo() nounwind optsize {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: push
 ; CHECK: mov r7, sp
 ; CHECK: sub sp, #4
diff --git a/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll b/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll
index 604a352baa11..9878ae862c7a 100644
--- a/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll
+++ b/test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll
@@ -7,7 +7,7 @@
 
 define i32 @t() nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: sub sp, #12
 ; CHECK-NOT: sub
 ; CHECK: add r0, sp, #4
diff --git a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
index 4acdd9e19ed8..500871519234 100644
--- a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
+++ b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -12,7 +12,7 @@
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
-; CHECK: rdictionary_lookup:
+; CHECK-LABEL: rdictionary_lookup:
 entry:
   br label %tailrecurse
 
diff --git a/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
index 203815fadc9c..974fade64f0b 100644
--- a/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
+++ b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
@@ -3,7 +3,7 @@
 ; Testing that these don't crash/assert. The loop vectorizer can end up
 ; with odd constructs like this. The code actually generated is incidental.
 define <1 x i64> @test_zext(i32 %a) nounwind {
-; CHECK: test_zext:
+; CHECK-LABEL: test_zext:
   %Cmp = icmp uge i32 %a, 42
   %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
   %Se = zext <1 x i1> %vec to <1 x i64>
@@ -11,7 +11,7 @@ define <1 x i64> @test_zext(i32 %a) nounwind {
 }
 
 define <1 x i64> @test_sext(i32 %a) nounwind {
-; CHECK: test_sext:
+; CHECK-LABEL: test_sext:
   %Cmp = icmp uge i32 %a, 42
   %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
   %Se = sext <1 x i1> %vec to <1 x i64>
diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll
index ce42f4b3773d..8a3c895bbe57 100644
--- a/test/CodeGen/Thumb2/buildvector-crash.ll
+++ b/test/CodeGen/Thumb2/buildvector-crash.ll
@@ -12,6 +12,6 @@ bb8:                                              ; preds = %bb8, %bb.nph372
   %3 = fadd <4 x float> undef, %2
   store <4 x float> %3, <4 x float>* undef, align 4
   br label %bb8
-; CHECK: RotateStarsFP_Vec:
+; CHECK-LABEL: RotateStarsFP_Vec:
 ; CHECK: vld1.64
 }
diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll
index 85b4370fa599..da1902b7e0f8 100644
--- a/test/CodeGen/Thumb2/carry.ll
+++ b/test/CodeGen/Thumb2/carry.ll
@@ -2,7 +2,7 @@
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: subs r0, r0, r2
 ; CHECK: sbcs r1, r3
 	%tmp = sub i64 %a, %b
@@ -11,7 +11,7 @@ entry:
 
 define i64 @f2(i64 %a, i64 %b) {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: adds r0, r0, r0
 ; CHECK: adcs r1, r1
 ; CHECK: subs r0, r0, r2
@@ -24,7 +24,7 @@ entry:
 ; rdar://12559385
 define i64 @f3(i32 %vi) {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: movw [[REG:r[0-9]+]], #36102
 ; CHECK: sbcs r{{[0-9]+}}, [[REG]]
     %v0 = zext i32 %vi to i64
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index edbf83405be7..a9f948cf717a 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
 
 define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
-; CHECK: fht:
+; CHECK-LABEL: fht:
 entry:
   br label %bb5
 
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index f89746a30327..003d71797ab1 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -3,7 +3,9 @@
 ; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
 ; RUN: llc < %s -march=thumb -mcpu=swift \
-; RUN:    | FileCheck %s -check-prefix=CHECK-SWIFT-T2
+; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -march=thumb -mcpu=cortex-r5 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
@@ -11,8 +13,8 @@ entry:
 ; CHECK-THUMB: __divsi3
 ; CHECK-THUMBV7M: f1
 ; CHECK-THUMBV7M: sdiv
-; CHECK-SWIFT-T2: f1
-; CHECK-SWIFT-T2: sdiv
+; CHECK-HWDIV: f1
+; CHECK-HWDIV: sdiv
         %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
@@ -23,8 +25,8 @@ entry:
 ; CHECK-THUMB: __udivsi3
 ; CHECK-THUMBV7M: f2
 ; CHECK-THUMBV7M: udiv
-; CHECK-SWIFT-T2: f2
-; CHECK-SWIFT-T2: udiv
+; CHECK-HWDIV: f2
+; CHECK-HWDIV: udiv
         %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
@@ -35,8 +37,8 @@ entry:
 ; CHECK-THUMB: __modsi3
 ; CHECK-THUMBV7M: f3
 ; CHECK-THUMBV7M: sdiv
-; CHECK-SWIFT-T2: f3
-; CHECK-SWIFT-T2: sdiv
+; CHECK-HWDIV: f3
+; CHECK-HWDIV: sdiv
         %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
@@ -47,8 +49,8 @@ entry:
 ; CHECK-THUMB: __umodsi3
 ; CHECK-THUMBV7M: f4
 ; CHECK-THUMBV7M: udiv
-; CHECK-SWIFT-T2: f4
-; CHECK-SWIFT-T2: udiv
+; CHECK-HWDIV: f4
+; CHECK-HWDIV: udiv
         %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
index 61c477aa9180..1b4d4625dd05 100644
--- a/test/CodeGen/Thumb2/large-call.ll
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mcpu=cortex-a8 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios0.0.0"
 
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 68b5d1cc94fb..36f3ce2eaa88 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -2,19 +2,19 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX
 
 define void @test1() {
-; DARWIN: test1:
+; DARWIN-LABEL: test1:
 ; DARWIN: sub sp, #256
-; LINUX: test1:
+; LINUX-LABEL: test1:
 ; LINUX: sub sp, #256
     %tmp = alloca [ 64 x i32 ] , align 4
     ret void
 }
 
 define void @test2() {
-; DARWIN: test2:
+; DARWIN-LABEL: test2:
 ; DARWIN: sub.w sp, sp, #4160
 ; DARWIN: sub sp, #8
-; LINUX: test2:
+; LINUX-LABEL: test2:
 ; LINUX: sub.w sp, sp, #4160
 ; LINUX: sub sp, #8
     %tmp = alloca [ 4168 x i8 ] , align 4
@@ -22,11 +22,11 @@ define void @test2() {
 }
 
 define i32 @test3() {
-; DARWIN: test3:
+; DARWIN-LABEL: test3:
 ; DARWIN: push    {r4, r7, lr}
 ; DARWIN: sub.w sp, sp, #805306368
 ; DARWIN: sub sp, #20
-; LINUX: test3:
+; LINUX-LABEL: test3:
 ; LINUX: push.w {r4, r7, r11, lr}
 ; LINUX: sub.w sp, sp, #805306368
 ; LINUX: sub sp, #16
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
index cb77b09ef4ad..8a3ba96497e7 100644
--- a/test/CodeGen/Thumb2/lit.local.cfg
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/Thumb2/longMACt.ll b/test/CodeGen/Thumb2/longMACt.ll
index beefd6044cf4..a457333d978f 100644
--- a/test/CodeGen/Thumb2/longMACt.ll
+++ b/test/CodeGen/Thumb2/longMACt.ll
@@ -2,7 +2,7 @@
 ; Check generated signed and unsigned multiply accumulate long.
 
 define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
-;CHECK: MACLongTest1:
+;CHECK-LABEL: MACLongTest1:
 ;CHECK: umlal
   %conv = zext i32 %a to i64
   %conv1 = zext i32 %b to i64
@@ -12,7 +12,7 @@ define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
 }
 
 define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c)  {
-;CHECK: MACLongTest2:
+;CHECK-LABEL: MACLongTest2:
 ;CHECK: smlal
   %conv = sext i32 %a to i64
   %conv1 = sext i32 %b to i64
@@ -22,7 +22,7 @@ define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c)  {
 }
 
 define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
-;CHECK: MACLongTest3:
+;CHECK-LABEL: MACLongTest3:
 ;CHECK: umlal
   %conv = zext i32 %b to i64
   %conv1 = zext i32 %a to i64
@@ -33,7 +33,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
 }
 
 define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
-;CHECK: MACLongTest4:
+;CHECK-LABEL: MACLongTest4:
 ;CHECK: smlal
   %conv = sext i32 %b to i64
   %conv1 = sext i32 %a to i64
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 9aaa821698c1..7ce6768a2187 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -7,7 +7,7 @@
 @array = external global i32*                     ; <i32**> [#uses=1]
 
 define void @t() nounwind optsize {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: mov{{.*}}, #1000
 entry:
   %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 01df37323252..d9da846294c4 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -7,7 +7,7 @@
 
 define void @t1(i32* nocapture %vals, i32 %c) nounwind {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: bxeq lr
 
   %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
@@ -50,7 +50,7 @@ return:                                           ; preds = %bb, %entry
 ; rdar://8001136
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: vmov.f32 q{{.*}}, #1.000000e+00
   br i1 undef, label %bb1, label %bb2
 
@@ -82,7 +82,7 @@ declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwin
 ; rdar://8241368
 ; isel should not fold immediate into eor's which would have prevented LICM.
 define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone {
-; CHECK: t3:
+; CHECK-LABEL: t3:
 bb.nph:
 ; CHECK: bb.nph
 ; CHECK: movw {{(r[0-9])|(lr)}}, #32768
diff --git a/test/CodeGen/Thumb2/mul_const.ll b/test/CodeGen/Thumb2/mul_const.ll
index 9a2ec93a5adc..488f4d13a0eb 100644
--- a/test/CodeGen/Thumb2/mul_const.ll
+++ b/test/CodeGen/Thumb2/mul_const.ll
@@ -3,7 +3,7 @@
 
 define i32 @t1(i32 %v) nounwind readnone {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: add.w r0, r0, r0, lsl #3
 	%0 = mul i32 %v, 9
 	ret i32 %0
@@ -11,7 +11,7 @@ entry:
 
 define i32 @t2(i32 %v) nounwind readnone {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: rsb r0, r0, r0, lsl #3
 	%0 = mul i32 %v, 7
 	ret i32 %0
diff --git a/test/CodeGen/Thumb2/pic-load.ll b/test/CodeGen/Thumb2/pic-load.ll
index 35a03e777313..b22fd1dc72e1 100644
--- a/test/CodeGen/Thumb2/pic-load.ll
+++ b/test/CodeGen/Thumb2/pic-load.ll
@@ -7,7 +7,7 @@
 
 define hidden i32 @atexit(void ()* %func) nounwind {
 entry:
-; CHECK: atexit:
+; CHECK-LABEL: atexit:
 ; CHECK: add r0, pc
 	%r = alloca %struct.one_atexit_routine, align 4		; <%struct.one_atexit_routine*> [#uses=3]
 	%0 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0		; <void ()**> [#uses=1]
diff --git a/test/CodeGen/Thumb2/tail-call-r9.ll b/test/CodeGen/Thumb2/tail-call-r9.ll
new file mode 100644
index 000000000000..24c76c98c03a
--- /dev/null
+++ b/test/CodeGen/Thumb2/tail-call-r9.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 | FileCheck %s
+
+@foo = common global void ()* null, align 4
+
+; Make sure in the presence of a tail call, r9 doesn't get used to hold
+; the destination address. It's callee-saved in AAPCS.
+define arm_aapcscc void @test(i32 %a) nounwind {
+; CHECK-LABEL: test:
+; CHECK-NOT bx r9
+  %tmp = load void ()** @foo, align 4
+  tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r12}"() nounwind
+  tail call arm_aapcscc void %tmp() nounwind
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-adc.ll b/test/CodeGen/Thumb2/thumb2-adc.ll
index 702df91c8595..7c34cfdef3f9 100644
--- a/test/CodeGen/Thumb2/thumb2-adc.ll
+++ b/test/CodeGen/Thumb2/thumb2-adc.ll
@@ -2,7 +2,7 @@
 
 ; 734439407618 = 0x000000ab00000002
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: adds r0, #2
     %tmp = add i64 %a, 734439407618
     ret i64 %tmp
@@ -10,7 +10,7 @@ define i64 @f1(i64 %a) {
 
 ; 5066626890203138 = 0x0012001200000002
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: adds r0, #2
     %tmp = add i64 %a, 5066626890203138
     ret i64 %tmp
@@ -18,7 +18,7 @@ define i64 @f2(i64 %a) {
 
 ; 3747052064576897026 = 0x3400340000000002
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: adds r0, #2
     %tmp = add i64 %a, 3747052064576897026
     ret i64 %tmp
@@ -26,7 +26,7 @@ define i64 @f3(i64 %a) {
 
 ; 6221254862626095106 = 0x5656565600000002
 define i64 @f4(i64 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: adds r0, #2
     %tmp = add i64 %a, 6221254862626095106 
     ret i64 %tmp
@@ -34,14 +34,14 @@ define i64 @f4(i64 %a) {
 
 ; 287104476244869122 = 0x03fc000000000002
 define i64 @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: adds r0, #2
     %tmp = add i64 %a, 287104476244869122
     ret i64 %tmp
 }
 
 define i64 @f6(i64 %a, i64 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: adds r0, r0, r2
     %tmp = add i64 %a, %b
     ret i64 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index 66fca132f04a..c23c74a1682e 100644
--- a/test/CodeGen/Thumb2/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s 
 
 define i32 @t2ADDrc_255(i32 %lhs) {
-; CHECK: t2ADDrc_255:
+; CHECK-LABEL: t2ADDrc_255:
 ; CHECK-NOT: bx lr
 ; CHECK: add{{.*}} #255
 ; CHECK: bx lr
@@ -11,7 +11,7 @@ define i32 @t2ADDrc_255(i32 %lhs) {
 }
 
 define i32 @t2ADDrc_256(i32 %lhs) {
-; CHECK: t2ADDrc_256:
+; CHECK-LABEL: t2ADDrc_256:
 ; CHECK-NOT: bx lr
 ; CHECK: add{{.*}} #256
 ; CHECK: bx lr
@@ -21,7 +21,7 @@ define i32 @t2ADDrc_256(i32 %lhs) {
 }
 
 define i32 @t2ADDrc_257(i32 %lhs) {
-; CHECK: t2ADDrc_257:
+; CHECK-LABEL: t2ADDrc_257:
 ; CHECK-NOT: bx lr
 ; CHECK: add{{.*}} #257
 ; CHECK: bx lr
@@ -31,7 +31,7 @@ define i32 @t2ADDrc_257(i32 %lhs) {
 }
 
 define i32 @t2ADDrc_4094(i32 %lhs) {
-; CHECK: t2ADDrc_4094:
+; CHECK-LABEL: t2ADDrc_4094:
 ; CHECK-NOT: bx lr
 ; CHECK: add{{.*}} #4094
 ; CHECK: bx lr
@@ -41,7 +41,7 @@ define i32 @t2ADDrc_4094(i32 %lhs) {
 }
 
 define i32 @t2ADDrc_4095(i32 %lhs) {
-; CHECK: t2ADDrc_4095:
+; CHECK-LABEL: t2ADDrc_4095:
 ; CHECK-NOT: bx lr
 ; CHECK: add{{.*}} #4095
 ; CHECK: bx lr
@@ -51,7 +51,7 @@ define i32 @t2ADDrc_4095(i32 %lhs) {
 }
 
 define i32 @t2ADDrc_4096(i32 %lhs) {
-; CHECK: t2ADDrc_4096:
+; CHECK-LABEL: t2ADDrc_4096:
 ; CHECK-NOT: bx lr
 ; CHECK: add{{.*}} #4096
 ; CHECK: bx lr
@@ -61,7 +61,7 @@ define i32 @t2ADDrc_4096(i32 %lhs) {
 }
 
 define i32 @t2ADDrr(i32 %lhs, i32 %rhs) {
-; CHECK: t2ADDrr:
+; CHECK-LABEL: t2ADDrr:
 ; CHECK-NOT: bx lr
 ; CHECK: add
 ; CHECK: bx lr
@@ -71,7 +71,7 @@ define i32 @t2ADDrr(i32 %lhs, i32 %rhs) {
 }
 
 define i32 @t2ADDrs(i32 %lhs, i32 %rhs) {
-; CHECK: t2ADDrs:
+; CHECK-LABEL: t2ADDrs:
 ; CHECK-NOT: bx lr
 ; CHECK: add{{.*}} lsl #8
 ; CHECK: bx lr
diff --git a/test/CodeGen/Thumb2/thumb2-add2.ll b/test/CodeGen/Thumb2/thumb2-add2.ll
index e496654706ec..3bbc3bf812ad 100644
--- a/test/CodeGen/Thumb2/thumb2-add2.ll
+++ b/test/CodeGen/Thumb2/thumb2-add2.ll
@@ -2,7 +2,7 @@
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: adds r0, #171
     %tmp = add i32 %a, 171
     ret i32 %tmp
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: add.w r0, r0, #1179666
     %tmp = add i32 %a, 1179666
     ret i32 %tmp
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a) {
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: add.w r0, r0, #872428544
     %tmp = add i32 %a, 872428544
     ret i32 %tmp
@@ -26,7 +26,7 @@ define i32 @f3(i32 %a) {
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: add.w r0, r0, #1448498774
     %tmp = add i32 %a, 1448498774
     ret i32 %tmp
@@ -34,7 +34,7 @@ define i32 @f4(i32 %a) {
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: add.w r0, r0, #510
     %tmp = add i32 %a, 510
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-add3.ll b/test/CodeGen/Thumb2/thumb2-add3.ll
index 58fc33372cf6..6cd818c03e11 100644
--- a/test/CodeGen/Thumb2/thumb2-add3.ll
+++ b/test/CodeGen/Thumb2/thumb2-add3.ll
@@ -5,5 +5,5 @@ define i32 @f1(i32 %a) {
     ret i32 %tmp
 }
 
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	addw	r0, r0, #4095
diff --git a/test/CodeGen/Thumb2/thumb2-add4.ll b/test/CodeGen/Thumb2/thumb2-add4.ll
index b94e84daee1b..8b957114835d 100644
--- a/test/CodeGen/Thumb2/thumb2-add4.ll
+++ b/test/CodeGen/Thumb2/thumb2-add4.ll
@@ -2,7 +2,7 @@
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: adds r0, #171
 ; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 171
@@ -11,7 +11,7 @@ define i64 @f1(i64 %a) {
 
 ; 1179666 = 0x00120012
 define i64 @f2(i64 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: adds.w r0, r0, #1179666
 ; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1179666
@@ -20,7 +20,7 @@ define i64 @f2(i64 %a) {
 
 ; 872428544 = 0x34003400
 define i64 @f3(i64 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: adds.w r0, r0, #872428544
 ; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 872428544
@@ -29,7 +29,7 @@ define i64 @f3(i64 %a) {
 
 ; 1448498774 = 0x56565656
 define i64 @f4(i64 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: adds.w r0, r0, #1448498774
 ; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 1448498774
@@ -38,7 +38,7 @@ define i64 @f4(i64 %a) {
 
 ; 66846720 = 0x03fc0000
 define i64 @f5(i64 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: adds.w r0, r0, #66846720
 ; CHECK: adc r1, r1, #0
     %tmp = add i64 %a, 66846720
diff --git a/test/CodeGen/Thumb2/thumb2-add5.ll b/test/CodeGen/Thumb2/thumb2-add5.ll
index 8b3a4f6d12a8..beaa09e1e69e 100644
--- a/test/CodeGen/Thumb2/thumb2-add5.ll
+++ b/test/CodeGen/Thumb2/thumb2-add5.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: add r0, r1
     %tmp = add i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: add.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = add i32 %a, %tmp
@@ -16,7 +16,7 @@ define i32 @f2(i32 %a, i32 %b) {
 }
 
 define i32 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: add.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = add i32 %a, %tmp
@@ -24,7 +24,7 @@ define i32 @f3(i32 %a, i32 %b) {
 }
 
 define i32 @f4(i32 %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: add.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = add i32 %a, %tmp
@@ -32,7 +32,7 @@ define i32 @f4(i32 %a, i32 %b) {
 }
 
 define i32 @f5(i32 %a, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: add.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/thumb2-add6.ll b/test/CodeGen/Thumb2/thumb2-add6.ll
index 0ecaa793909f..0d2f12249956 100644
--- a/test/CodeGen/Thumb2/thumb2-add6.ll
+++ b/test/CodeGen/Thumb2/thumb2-add6.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: adds r0, r0, r2
 ; CHECK: adcs r1, r3
     %tmp = add i64 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-and.ll b/test/CodeGen/Thumb2/thumb2-and.ll
index 8e2245a85926..c9578d9d7d21 100644
--- a/test/CodeGen/Thumb2/thumb2-and.ll
+++ b/test/CodeGen/Thumb2/thumb2-and.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ands r0, r1
     %tmp = and i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: and.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
@@ -16,7 +16,7 @@ define i32 @f2(i32 %a, i32 %b) {
 }
 
 define i32 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: and.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
@@ -24,7 +24,7 @@ define i32 @f3(i32 %a, i32 %b) {
 }
 
 define i32 @f4(i32 %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: and.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
@@ -32,7 +32,7 @@ define i32 @f4(i32 %a, i32 %b) {
 }
 
 define i32 @f5(i32 %a, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: and.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
index 7b0432de9bb5..c0501ab8ad37 100644
--- a/test/CodeGen/Thumb2/thumb2-and2.ll
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -5,7 +5,7 @@ define i32 @f1(i32 %a) {
     %tmp = and i32 %a, 171
     ret i32 %tmp
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	and	r0, r0, #171
 
 ; 1179666 = 0x00120012
@@ -13,7 +13,7 @@ define i32 @f2(i32 %a) {
     %tmp = and i32 %a, 1179666
     ret i32 %tmp
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	and	r0, r0, #1179666
 
 ; 872428544 = 0x34003400
@@ -21,7 +21,7 @@ define i32 @f3(i32 %a) {
     %tmp = and i32 %a, 872428544
     ret i32 %tmp
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	and	r0, r0, #872428544
 
 ; 1448498774 = 0x56565656
@@ -29,7 +29,7 @@ define i32 @f4(i32 %a) {
     %tmp = and i32 %a, 1448498774
     ret i32 %tmp
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: bic r0, r0, #-1448498775
 
 ; 66846720 = 0x03fc0000
@@ -37,5 +37,5 @@ define i32 @f5(i32 %a) {
     %tmp = and i32 %a, 66846720
     ret i32 %tmp
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: 	and	r0, r0, #66846720
diff --git a/test/CodeGen/Thumb2/thumb2-asr.ll b/test/CodeGen/Thumb2/thumb2-asr.ll
index a0a60e68989f..ba782dde1034 100644
--- a/test/CodeGen/Thumb2/thumb2-asr.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: asrs r0, r1
     %tmp = ashr i32 %a, %b
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-asr2.ll b/test/CodeGen/Thumb2/thumb2-asr2.ll
index 9c8634f7097c..3685badcafdf 100644
--- a/test/CodeGen/Thumb2/thumb2-asr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: asrs r0, r0, #17
     %tmp = ashr i32 %a, 17
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-bcc.ll b/test/CodeGen/Thumb2/thumb2-bcc.ll
index 4a2d6000bbc7..81f7de9ae39c 100644
--- a/test/CodeGen/Thumb2/thumb2-bcc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -4,7 +4,7 @@
 ; happen and we get actual branches.
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: cbz
   %tmp2 = icmp eq i32 %a, 0
   br i1 %tmp2, label %cond_false, label %cond_true
diff --git a/test/CodeGen/Thumb2/thumb2-bfc.ll b/test/CodeGen/Thumb2/thumb2-bfc.ll
index b486045ab501..327b6d1a503a 100644
--- a/test/CodeGen/Thumb2/thumb2-bfc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -2,7 +2,7 @@
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: bfc r
     %tmp = and i32 %a, 4278190095
     ret i32 %tmp
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
 
 ; 4286578688 = 0xff800000
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: bfc r
     %tmp = and i32 %a, 4286578688
     ret i32 %tmp
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a) {
 
 ; 4095 = 0x00000fff
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: bfc r
     %tmp = and i32 %a, 4095
     ret i32 %tmp
@@ -26,7 +26,7 @@ define i32 @f3(i32 %a) {
 
 ; 2147483646 = 0x7ffffffe   not implementable w/ BFC
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
     %tmp = and i32 %a, 2147483646
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-bic.ll b/test/CodeGen/Thumb2/thumb2-bic.ll
index 4e35383997d9..5938fa19a3c4 100644
--- a/test/CodeGen/Thumb2/thumb2-bic.ll
+++ b/test/CodeGen/Thumb2/thumb2-bic.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %a, %tmp
@@ -9,7 +9,7 @@ define i32 @f1(i32 %a, i32 %b) {
 }
 
 define i32 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: bics r0, r1
     %tmp = xor i32 %b, 4294967295
     %tmp1 = and i32 %tmp, %a
@@ -17,7 +17,7 @@ define i32 @f2(i32 %a, i32 %b) {
 }
 
 define i32 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %a, %tmp
@@ -25,7 +25,7 @@ define i32 @f3(i32 %a, i32 %b) {
 }
 
 define i32 @f4(i32 %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: bics r0, r1
     %tmp = xor i32 4294967295, %b
     %tmp1 = and i32 %tmp, %a
@@ -33,7 +33,7 @@ define i32 @f4(i32 %a, i32 %b) {
 }
 
 define i32 @f5(i32 %a, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: bic.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 4294967295, %tmp
@@ -42,7 +42,7 @@ define i32 @f5(i32 %a, i32 %b) {
 }
 
 define i32 @f6(i32 %a, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: bic.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, 4294967295
@@ -51,7 +51,7 @@ define i32 @f6(i32 %a, i32 %b) {
 }
 
 define i32 @f7(i32 %a, i32 %b) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: bic.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %tmp, 4294967295
@@ -60,7 +60,7 @@ define i32 @f7(i32 %a, i32 %b) {
 }
 
 define i32 @f8(i32 %a, i32 %b) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: bic.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
@@ -75,7 +75,7 @@ define i32 @f9(i32 %a) {
     %tmp = and i32 %a, 4294967108
     ret i32 %tmp
     
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: bic r0, r0, #187
 }
 
@@ -84,7 +84,7 @@ define i32 @f10(i32 %a) {
     %tmp = and i32 %a, 4283826005
     ret i32 %tmp
     
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: bic r0, r0, #11141290
 }
 
@@ -92,7 +92,7 @@ define i32 @f10(i32 %a) {
 define i32 @f11(i32 %a) {
     %tmp = and i32 %a, 872363007
     ret i32 %tmp
-; CHECK: f11:
+; CHECK-LABEL: f11:
 ; CHECK: bic r0, r0, #-872363008
 }
 
@@ -100,6 +100,6 @@ define i32 @f11(i32 %a) {
 define i32 @f12(i32 %a) {
     %tmp = and i32 %a, 4293853183
     ret i32 %tmp
-; CHECK: f12:
+; CHECK-LABEL: f12:
 ; CHECK: bic r0, r0, #1114112
 }
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
index f1c097c1892d..a00b22d85022 100644
--- a/test/CodeGen/Thumb2/thumb2-branch.ll
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -1,72 +1,74 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
-; If-conversion defeats the purpose of this test, which is to check conditional
-; branch generation, so use memory barrier instruction to make sure it doesn't
+; If-conversion defeats the purpose of this test, which is to check
+; conditional branch generation, so a call to make sure it doesn't
 ; happen and we get actual branches.
 
+declare void @foo()
+
 define i32 @f1(i32 %a, i32 %b, i32* %v) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: bne LBB
         %tmp = icmp eq i32 %a, %b               ; <i1> [#uses=1]
         br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
-        fence seq_cst
+        call void @foo()
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
-        fence seq_cst
+        call void @foo()
         ret i32 1
 }
 
 define i32 @f2(i32 %a, i32 %b, i32* %v) {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: bge LBB
         %tmp = icmp slt i32 %a, %b              ; <i1> [#uses=1]
         br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
-        fence seq_cst
+        call void @foo()
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
-        fence seq_cst
+        call void @foo()
         ret i32 1
 }
 
 define i32 @f3(i32 %a, i32 %b, i32* %v) {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: bhs LBB
         %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
         br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
-        fence seq_cst
+        call void @foo()
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
-        fence seq_cst
+        call void @foo()
         ret i32 1
 }
 
 define i32 @f4(i32 %a, i32 %b, i32* %v) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: blo LBB
         %tmp = icmp uge i32 %a, %b              ; <i1> [#uses=1]
         br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
-        fence seq_cst
+        call void @foo()
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
-        fence seq_cst
+        call void @foo()
         ret i32 1
 }
diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll
index 2e4da1b289b5..2902949d9768 100644
--- a/test/CodeGen/Thumb2/thumb2-call-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -7,20 +7,20 @@
 declare void @g(i32, i32, i32, i32)
 
 define void @f() {
-; DARWIN: f:
+; DARWIN-LABEL: f:
 ; DARWIN: blx _g
 
-; LINUX: f:
+; LINUX-LABEL: f:
 ; LINUX: bl g
         tail call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
 
 define void @h() {
-; DARWIN: h:
+; DARWIN-LABEL: h:
 ; DARWIN: bx r0 @ TAILCALL
 
-; LINUX: h:
+; LINUX-LABEL: h:
 ; LINUX: bx r0 @ TAILCALL
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
@@ -28,10 +28,10 @@ define void @h() {
 }
 
 define void @j() {
-; DARWIN: j:
+; DARWIN-LABEL: j:
 ; DARWIN: b.w _f  @ TAILCALL
 
-; LINUX: j:
+; LINUX-LABEL: j:
 ; LINUX: b.w f  @ TAILCALL
         tail call void @f()
         ret void
diff --git a/test/CodeGen/Thumb2/thumb2-call.ll b/test/CodeGen/Thumb2/thumb2-call.ll
index 8513cfb404ce..1d2eaa77c7fe 100644
--- a/test/CodeGen/Thumb2/thumb2-call.ll
+++ b/test/CodeGen/Thumb2/thumb2-call.ll
@@ -6,20 +6,20 @@
 declare void @g(i32, i32, i32, i32)
 
 define void @f() {
-; DARWIN: f:
+; DARWIN-LABEL: f:
 ; DARWIN: blx _g
 
-; LINUX: f:
+; LINUX-LABEL: f:
 ; LINUX: bl g
         call void @g( i32 1, i32 2, i32 3, i32 4 )
         ret void
 }
 
 define void @h() {
-; DARWIN: h:
+; DARWIN-LABEL: h:
 ; DARWIN: blx r0
 
-; LINUX: h:
+; LINUX-LABEL: h:
 ; LINUX: blx r0
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index f7e966535d2f..dbdaae29eaef 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7 | FileCheck %s
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: clz r
     %tmp = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index 67b07e63fc09..8bcaa7e8209e 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -8,7 +8,7 @@ define i1 @f1(i32 %a, i32 %b) {
     %tmp = icmp ne i32 %a, %nb
     ret i1 %tmp
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f2(i32 %a, i32 %b) {
@@ -16,7 +16,7 @@ define i1 @f2(i32 %a, i32 %b) {
     %tmp = icmp ne i32 %nb, %a
     ret i1 %tmp
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f3(i32 %a, i32 %b) {
@@ -24,7 +24,7 @@ define i1 @f3(i32 %a, i32 %b) {
     %tmp = icmp eq i32 %a, %nb
     ret i1 %tmp
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f4(i32 %a, i32 %b) {
@@ -32,7 +32,7 @@ define i1 @f4(i32 %a, i32 %b) {
     %tmp = icmp eq i32 %nb, %a
     ret i1 %tmp
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: 	cmn	{{.*}}, r1
 
 define i1 @f5(i32 %a, i32 %b) {
@@ -41,7 +41,7 @@ define i1 @f5(i32 %a, i32 %b) {
     %tmp1 = icmp eq i32 %nb, %a
     ret i1 %tmp1
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: 	cmn.w	{{.*}}, r1, lsl #5
 
 define i1 @f6(i32 %a, i32 %b) {
@@ -50,7 +50,7 @@ define i1 @f6(i32 %a, i32 %b) {
     %tmp1 = icmp ne i32 %nb, %a
     ret i1 %tmp1
 }
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: 	cmn.w	{{.*}}, r1, lsr #6
 
 define i1 @f7(i32 %a, i32 %b) {
@@ -59,7 +59,7 @@ define i1 @f7(i32 %a, i32 %b) {
     %tmp1 = icmp eq i32 %a, %nb
     ret i1 %tmp1
 }
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: 	cmn.w	{{.*}}, r1, asr #7
 
 define i1 @f8(i32 %a, i32 %b) {
@@ -70,7 +70,7 @@ define i1 @f8(i32 %a, i32 %b) {
     %tmp1 = icmp ne i32 %a, %nb
     ret i1 %tmp1
 }
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: 	cmn.w	{{.*}}, {{.*}}, ror #8
 
 
@@ -81,5 +81,5 @@ define void @f9(i32 %a, i32 %b) nounwind optsize {
 
 !0 = metadata !{i32 81}
 
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: 	cmn.w	r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-cmn2.ll b/test/CodeGen/Thumb2/thumb2-cmn2.ll
index c0e19f63a309..f5db728d46a4 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -2,7 +2,7 @@
 
 ; -0x000000bb = 4294967109
 define i1 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cmn.w {{r.*}}, #187
     %tmp = icmp ne i32 %a, 4294967109
     ret i1 %tmp
@@ -10,7 +10,7 @@ define i1 @f1(i32 %a) {
 
 ; -0x00aa00aa = 4283826006
 define i1 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cmn.w {{r.*}}, #11141290
     %tmp = icmp eq i32 %a, 4283826006
     ret i1 %tmp
@@ -18,7 +18,7 @@ define i1 @f2(i32 %a) {
 
 ; -0xcc00cc00 = 872363008
 define i1 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cmn.w {{r.*}}, #-872363008
     %tmp = icmp ne i32 %a, 872363008
     ret i1 %tmp
@@ -26,7 +26,7 @@ define i1 @f3(i32 %a) {
 
 ; -0x00110000 = 4293853184
 define i1 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cmn.w {{r.*}}, #1114112
     %tmp = icmp eq i32 %a, 4293853184
     ret i1 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index 4ce7acc22e0f..87413444ca3b 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -5,7 +5,7 @@
 
 ; 0x000000bb = 187
 define i1 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cmp {{.*}}, #187
     %tmp = icmp ne i32 %a, 187
     ret i1 %tmp
@@ -13,7 +13,7 @@ define i1 @f1(i32 %a) {
 
 ; 0x00aa00aa = 11141290
 define i1 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cmp.w {{.*}}, #11141290
     %tmp = icmp eq i32 %a, 11141290 
     ret i1 %tmp
@@ -21,7 +21,7 @@ define i1 @f2(i32 %a) {
 
 ; 0xcc00cc00 = 3422604288
 define i1 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: cmp.w {{.*}}, #-872363008
     %tmp = icmp ne i32 %a, 3422604288
     ret i1 %tmp
@@ -29,7 +29,7 @@ define i1 @f3(i32 %a) {
 
 ; 0xdddddddd = 3722304989
 define i1 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: cmp.w {{.*}}, #-572662307
     %tmp = icmp ne i32 %a, 3722304989
     ret i1 %tmp
@@ -37,7 +37,7 @@ define i1 @f4(i32 %a) {
 
 ; 0x00110000 = 1114112
 define i1 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: cmp.w {{.*}}, #1114112
     %tmp = icmp eq i32 %a, 1114112
     ret i1 %tmp
@@ -45,7 +45,7 @@ define i1 @f5(i32 %a) {
 
 ; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform.
 ;
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK-NOT: cmp.w {{.*}}, #-2147483648
 ; CHECK: bx lr
 define i32 @f6(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index f6790deb1fc2..5b880f16deb5 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -4,21 +4,21 @@
 ; test as 'mov.w r0, #0'.
 
 define i1 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: cmp {{.*}}, r1
     %tmp = icmp ne i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: cmp {{.*}}, r1
     %tmp = icmp eq i32 %a, %b
     ret i1 %tmp
 }
 
 define i1 @f6(i32 %a, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: cmp.w {{.*}}, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = icmp eq i32 %tmp, %a
@@ -26,7 +26,7 @@ define i1 @f6(i32 %a, i32 %b) {
 }
 
 define i1 @f7(i32 %a, i32 %b) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: cmp.w {{.*}}, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = icmp ne i32 %tmp, %a
@@ -34,7 +34,7 @@ define i1 @f7(i32 %a, i32 %b) {
 }
 
 define i1 @f8(i32 %a, i32 %b) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: cmp.w {{.*}}, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = icmp eq i32 %a, %tmp
@@ -42,7 +42,7 @@ define i1 @f8(i32 %a, i32 %b) {
 }
 
 define i1 @f9(i32 %a, i32 %b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: cmp.w {{.*}}, {{.*}}, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/thumb2-eor.ll b/test/CodeGen/Thumb2/thumb2-eor.ll
index 116a1a3519aa..b3e323c10d2e 100644
--- a/test/CodeGen/Thumb2/thumb2-eor.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor.ll
@@ -1,28 +1,28 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: eors r0, r1
     %tmp = xor i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: eors r0, r1
     %tmp = xor i32 %b, %a
     ret i32 %tmp
 }
 
 define i32 @f2b(i32 %a, i32 %b, i32 %c) {
-; CHECK: f2b:
+; CHECK-LABEL: f2b:
 ; CHECK: eor.w r0, r1, r2
     %tmp = xor i32 %b, %c
     ret i32 %tmp
 }
 
 define i32 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: eor.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = xor i32 %a, %tmp
@@ -30,7 +30,7 @@ define i32 @f3(i32 %a, i32 %b) {
 }
 
 define i32 @f4(i32 %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: eor.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = xor i32 %tmp, %a
@@ -38,7 +38,7 @@ define i32 @f4(i32 %a, i32 %b) {
 }
 
 define i32 @f5(i32 %a, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: eor.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = xor i32 %a, %tmp
@@ -46,7 +46,7 @@ define i32 @f5(i32 %a, i32 %b) {
 }
 
 define i32 @f6(i32 %a, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: eor.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/thumb2-eor2.ll b/test/CodeGen/Thumb2/thumb2-eor2.ll
index 6b2e9dcf3d1f..5daa13df655d 100644
--- a/test/CodeGen/Thumb2/thumb2-eor2.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -2,7 +2,7 @@
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: eor {{.*}}#187
     %tmp = xor i32 %a, 187
     ret i32 %tmp
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
 
 ; 0x00aa00aa = 11141290
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: eor {{.*}}#11141290
     %tmp = xor i32 %a, 11141290 
     ret i32 %tmp
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a) {
 
 ; 0xcc00cc00 = 3422604288
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: eor {{.*}}#-872363008
     %tmp = xor i32 %a, 3422604288
     ret i32 %tmp
@@ -26,7 +26,7 @@ define i32 @f3(i32 %a) {
 
 ; 0xdddddddd = 3722304989
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: eor {{.*}}#-572662307
     %tmp = xor i32 %a, 3722304989
     ret i32 %tmp
@@ -34,7 +34,7 @@ define i32 @f4(i32 %a) {
 
 ; 0x00110000 = 1114112
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: eor {{.*}}#1114112
     %tmp = xor i32 %a, 1114112
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
index 5315535db045..d86a897a4a09 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -2,7 +2,7 @@
 ; XFAIL: *
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: it ne
 ; CHECK: cmpne
 	switch i32 %c, label %cond_next [
@@ -23,7 +23,7 @@ cond_next:
 ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
 define i32 @t2(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: ite gt
 ; CHECK: subgt
 ; CHECK: suble
@@ -71,7 +71,7 @@ entry:
 ; Tail call prevents use of ifcvt in this one.  Seems like a win though.
 define void @t3(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK-NOT: it lt
 ; CHECK-NOT: poplt
 ; CHECK: b.w _foo @ TAILCALL
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index af8fcc641247..13a1ca2e26cd 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -1,7 +1,8 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
-
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: ittt ne
 ; CHECK: cmpne
 ; CHECK: addne
@@ -24,7 +25,7 @@ cond_next:
 define i32 @t2(i32 %a, i32 %b) nounwind {
 entry:
 ; Do not if-convert when branches go to the different loops.
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK-NOT: ite gt
 ; CHECK-NOT: subgt
 ; CHECK-NOT: suble
@@ -71,10 +72,10 @@ entry:
 
 define void @t3(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: itt ge
 ; CHECK: movge r0, r1
-; CHECK: blge  _foo
+; CHECK: blge  {{_?}}foo
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
 
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 5aa9a735f250..403cd48035b4 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,8 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-apple-ios -arm-no-restrict-it | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: it ne
 ; CHECK: cmpne
 ; CHECK: it hi
@@ -28,14 +30,14 @@ declare i32 @bar(...)
 
 define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
 entry:
-; CHECK: CountTree:
-; CHECK: itt eq
-; CHECK: moveq
-; CHECK: popeq
+; CHECK-LABEL: CountTree:
 ; CHECK: bne
 ; CHECK: cmp
 ; CHECK: it eq
 ; CHECK: cmpeq
+; CHECK: itt eq
+; CHECK: moveq
+; CHECK: popeq
 	br label %tailrecurse
 
 tailrecurse:		; preds = %bb, %entry
@@ -65,7 +67,7 @@ declare void @abort()
 
 define fastcc void @t1(%struct.SString* %word, i8 signext  %c) {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: it ne
 ; CHECK: popne {r7, pc}
 	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
@@ -81,7 +83,7 @@ cond_false:		; preds = %entry
 
 define fastcc void @t2() nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: cmp r0, #0
 ; CHECK: %growMapping.exit
 	br i1 undef, label %bb.i.i3, label %growMapping.exit
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index bcf10eff729b..a71aa3fb613a 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-apple-darwin -arm-no-restrict-it | FileCheck %s
 
 ; There shouldn't be a unconditional branch at end of bb52.
 ; rdar://7184787
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index b2328e780074..8716d80a2c8c 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -3,7 +3,7 @@
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
 define i32 @t1() {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: push {r7, lr}
 ; CHECK: pop {r7, pc}
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
@@ -13,7 +13,7 @@ define i32 @t1() {
 }
 
 define i32 @t2() {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: push {r7, lr}
 ; CHECK: ldm
 ; CHECK: pop {r7, pc}
@@ -25,7 +25,7 @@ define i32 @t2() {
 }
 
 define i32 @t3() {
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: push {r7, lr}
 ; CHECK: pop {r7, pc}
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index 88434f1c7d80..7f68f661fa9a 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(i32* %v) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldr r0, [r0]
         %tmp = load i32* %v
         ret i32 %tmp
@@ -10,7 +10,7 @@ entry:
 
 define i32 @f2(i32* %v) {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldr.w r0, [r0, #4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         %tmp = load i32* %tmp2
@@ -19,7 +19,7 @@ entry:
 
 define i32 @f3(i32* %v) {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov.w r1, #4096
 ; CHECK: ldr r0, [r0, r1]
         %tmp2 = getelementptr i32* %v, i32 1024
@@ -29,7 +29,7 @@ entry:
 
 define i32 @f4(i32 %base) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldr r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
@@ -39,7 +39,7 @@ entry:
 
 define i32 @f5(i32 %base, i32 %offset) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldr r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
@@ -49,7 +49,7 @@ entry:
 
 define i32 @f6(i32 %base, i32 %offset) {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ldr.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
@@ -60,7 +60,7 @@ entry:
 
 define i32 @f7(i32 %base, i32 %offset) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lsrs r1, r1, #2
 ; CHECK: ldr r0, [r0, r1]
 
diff --git a/test/CodeGen/Thumb2/thumb2-ldrb.ll b/test/CodeGen/Thumb2/thumb2-ldrb.ll
index bf1009743afc..c135effd796b 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrb.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -2,7 +2,7 @@
 
 define i8 @f1(i8* %v) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldrb r0, [r0]
         %tmp = load i8* %v
         ret i8 %tmp
@@ -10,7 +10,7 @@ entry:
 
 define i8 @f2(i8* %v) {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldrb r0, [r0, #-1]
         %tmp2 = getelementptr i8* %v, i8 1023
         %tmp = load i8* %tmp2
@@ -19,7 +19,7 @@ entry:
 
 define i8 @f3(i32 %base) {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov.w r1, #4096
 ; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, 4096
@@ -30,7 +30,7 @@ entry:
 
 define i8 @f4(i32 %base) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldrb r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
@@ -40,7 +40,7 @@ entry:
 
 define i8 @f5(i32 %base, i32 %offset) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
@@ -50,7 +50,7 @@ entry:
 
 define i8 @f6(i32 %base, i32 %offset) {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ldrb.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
@@ -61,7 +61,7 @@ entry:
 
 define i8 @f7(i32 %base, i32 %offset) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lsrs r1, r1, #2
 ; CHECK: ldrb r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index fee97bf68913..99f6aba65cf0 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -2,7 +2,7 @@
 
 define i16 @f1(i16* %v) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldrh r0, [r0]
         %tmp = load i16* %v
         ret i16 %tmp
@@ -10,7 +10,7 @@ entry:
 
 define i16 @f2(i16* %v) {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldrh.w r0, [r0, #2046]
         %tmp2 = getelementptr i16* %v, i16 1023
         %tmp = load i16* %tmp2
@@ -19,7 +19,7 @@ entry:
 
 define i16 @f3(i16* %v) {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov.w r1, #4096
 ; CHECK: ldrh r0, [r0, r1]
         %tmp2 = getelementptr i16* %v, i16 2048
@@ -29,7 +29,7 @@ entry:
 
 define i16 @f4(i32 %base) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldrh r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
@@ -39,7 +39,7 @@ entry:
 
 define i16 @f5(i32 %base, i32 %offset) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldrh r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
@@ -49,7 +49,7 @@ entry:
 
 define i16 @f6(i32 %base, i32 %offset) {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: ldrh.w r0, [r0, r1, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
@@ -60,7 +60,7 @@ entry:
 
 define i16 @f7(i32 %base, i32 %offset) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lsrs r1, r1, #2
 ; CHECK: ldrh r0, [r0, r1]
         %tmp1 = lshr i32 %offset, 2
diff --git a/test/CodeGen/Thumb2/thumb2-lsl.ll b/test/CodeGen/Thumb2/thumb2-lsl.ll
index 6b0818a34b9b..1b4853853a4e 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lsls r0, r0, #5
     %tmp = shl i32 %a, 5
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-lsl2.ll b/test/CodeGen/Thumb2/thumb2-lsl2.ll
index f283eef89a37..bc0978e68241 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lsls r0, r1
     %tmp = shl i32 %a, %b
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-lsr.ll b/test/CodeGen/Thumb2/thumb2-lsr.ll
index 7cbee54f381f..a3b207c1f90b 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lsrs r0, r0, #13
     %tmp = lshr i32 %a, 13
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-lsr2.ll b/test/CodeGen/Thumb2/thumb2-lsr2.ll
index 87800f9d73fb..ae55735fabbc 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: lsrs r0, r1
     %tmp = lshr i32 %a, %b
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll
index 594d9742b0f9..709fa13dd3a1 100644
--- a/test/CodeGen/Thumb2/thumb2-mla.ll
+++ b/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -6,9 +6,9 @@ define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp2 = add i32 %c, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	mla	r0, r0, r1, r2
-; NO_MULOPS: f1:
+; NO_MULOPS-LABEL: f1:
 ; NO_MULOPS: muls r0, r1, r0
 ; NO_MULOPS-NEXT: add r0, r2
 
@@ -17,8 +17,8 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     %tmp2 = add i32 %tmp1, %c
     ret i32 %tmp2
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	mla	r0, r0, r1, r2
-; NO_MULOPS: f2:
+; NO_MULOPS-LABEL: f2:
 ; NO_MULOPS: muls r0, r1, r0
 ; NO_MULOPS-NEXT: add r0, r2
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 58f9add0fc60..86e147b24018 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -5,7 +5,7 @@ define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp2 = sub i32 %c, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	mls	r0, r0, r1, r2
 
 ; sub doesn't commute, so no mls for this one
@@ -14,6 +14,6 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     %tmp2 = sub i32 %tmp1, %c
     ret i32 %tmp2
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	muls	r0, r1, r0
 
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index adb6dde2c788..148bafec4014 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -4,14 +4,14 @@
 
 ; var 2.1 - 0x00ab00ab
 define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
-;CHECK: t2_const_var2_1_ok_1:
+;CHECK-LABEL: t2_const_var2_1_ok_1:
 ;CHECK: add.w   r0, r0, #11206827
     %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_1_ok_2(i32 %lhs) {
-;CHECK: t2_const_var2_1_ok_2:
+;CHECK-LABEL: t2_const_var2_1_ok_2:
 ;CHECK: add.w   r0, r0, #11206656
 ;CHECK: adds    r0, #187
     %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
@@ -19,7 +19,7 @@ define i32 @t2_const_var2_1_ok_2(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_1_ok_3(i32 %lhs) {
-;CHECK: t2_const_var2_1_ok_3:
+;CHECK-LABEL: t2_const_var2_1_ok_3:
 ;CHECK: add.w   r0, r0, #11206827
 ;CHECK: add.w   r0, r0, #16777216
     %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
@@ -27,7 +27,7 @@ define i32 @t2_const_var2_1_ok_3(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_1_ok_4(i32 %lhs) {
-;CHECK: t2_const_var2_1_ok_4:
+;CHECK-LABEL: t2_const_var2_1_ok_4:
 ;CHECK: add.w   r0, r0, #16777472
 ;CHECK: add.w   r0, r0, #11206827
     %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
@@ -35,7 +35,7 @@ define i32 @t2_const_var2_1_ok_4(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
-;CHECK: t2_const_var2_1_fail_1:
+;CHECK-LABEL: t2_const_var2_1_fail_1:
 ;CHECK: movw    r1, #43777
 ;CHECK: movt    r1, #427
 ;CHECK: add     r0, r1
@@ -45,14 +45,14 @@ define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
 
 ; var 2.2 - 0xab00ab00
 define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
-;CHECK: t2_const_var2_2_ok_1:
+;CHECK-LABEL: t2_const_var2_2_ok_1:
 ;CHECK: add.w   r0, r0, #-1426019584
     %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
-;CHECK: t2_const_var2_2_ok_2:
+;CHECK-LABEL: t2_const_var2_2_ok_2:
 ;CHECK: add.w   r0, r0, #2868903936
 ;CHECK: add.w   r0, r0, #47616
     %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
@@ -60,7 +60,7 @@ define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
-;CHECK: t2_const_var2_2_ok_3:
+;CHECK-LABEL: t2_const_var2_2_ok_3:
 ;CHECK: add.w   r0, r0, #2868947712
 ;CHECK: adds    r0, #16
     %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
@@ -68,7 +68,7 @@ define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
-;CHECK: t2_const_var2_2_ok_4:
+;CHECK-LABEL: t2_const_var2_2_ok_4:
 ;CHECK: add.w   r0, r0, #2868947712
 ;CHECK: add.w   r0, r0, #1048592
     %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
@@ -76,7 +76,7 @@ define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
-;CHECK: t2_const_var2_2_fail_1:
+;CHECK-LABEL: t2_const_var2_2_fail_1:
 ;CHECK: movw    r1, #43792
 ;CHECK: movt    r1, #4267
 ;CHECK: add     r0, r1
@@ -86,14 +86,14 @@ define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
 
 ; var 2.3 - 0xabababab
 define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
-;CHECK: t2_const_var2_3_ok_1:
+;CHECK-LABEL: t2_const_var2_3_ok_1:
 ;CHECK: add.w   r0, r0, #-1414812757
     %ret = add i32 %lhs, 2880154539 ; 0xabababab
     ret i32 %ret
 }
 
 define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
-;CHECK: t2_const_var2_3_fail_1:
+;CHECK-LABEL: t2_const_var2_3_fail_1:
 ;CHECK: movw    r1, #43962
 ;CHECK: movt    r1, #43947
 ;CHECK: add     r0, r1
@@ -102,7 +102,7 @@ define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
-;CHECK: t2_const_var2_3_fail_2:
+;CHECK-LABEL: t2_const_var2_3_fail_2:
 ;CHECK: movw    r1, #47787
 ;CHECK: movt    r1, #43947
 ;CHECK: add     r0, r1
@@ -111,7 +111,7 @@ define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
-;CHECK: t2_const_var2_3_fail_3:
+;CHECK-LABEL: t2_const_var2_3_fail_3:
 ;CHECK: movw    r1, #43947
 ;CHECK: movt    r1, #43962
 ;CHECK: add     r0, r1
@@ -120,7 +120,7 @@ define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
 }
 
 define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
-;CHECK: t2_const_var2_3_fail_4:
+;CHECK-LABEL: t2_const_var2_3_fail_4:
 ;CHECK: movw    r1, #43947
 ;CHECK: movt    r1, #47787
 ;CHECK: add     r0, r1
@@ -130,21 +130,21 @@ define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
 
 ; var 3 - 0x0F000000
 define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
-;CHECK: t2_const_var3_1_ok_1:
+;CHECK-LABEL: t2_const_var3_1_ok_1:
 ;CHECK: add.w   r0, r0, #251658240
     %ret = add i32 %lhs, 251658240 ; 0x0F000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
-;CHECK: t2_const_var3_2_ok_1:
+;CHECK-LABEL: t2_const_var3_2_ok_1:
 ;CHECK: add.w   r0, r0, #3948544
     %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_2_ok_2(i32 %lhs) {
-;CHECK: t2_const_var3_2_ok_2:
+;CHECK-LABEL: t2_const_var3_2_ok_2:
 ;CHECK: add.w   r0, r0, #2097152
 ;CHECK: add.w   r0, r0, #1843200
     %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
@@ -152,21 +152,21 @@ define i32 @t2_const_var3_2_ok_2(i32 %lhs) {
 }
 
 define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
-;CHECK: t2_const_var3_3_ok_1:
+;CHECK-LABEL: t2_const_var3_3_ok_1:
 ;CHECK: add.w   r0, r0, #258
     %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
     ret i32 %ret
 }
 
 define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
-;CHECK: t2_const_var3_4_ok_1:
+;CHECK-LABEL: t2_const_var3_4_ok_1:
 ;CHECK: add.w   r0, r0, #-268435456
     %ret = add i32 %lhs, 4026531840 ; 0xF0000000
     ret i32 %ret
 }
 
 define i32 @t2MOVTi16_ok_1(i32 %a) {
-; CHECK: t2MOVTi16_ok_1:
+; CHECK-LABEL: t2MOVTi16_ok_1:
 ; CHECK: movt r0, #1234
     %1 = and i32 %a, 65535
     %2 = shl i32 1234, 16
@@ -176,7 +176,7 @@ define i32 @t2MOVTi16_ok_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_1(i32 %a) {
-; CHECK: t2MOVTi16_test_1:
+; CHECK-LABEL: t2MOVTi16_test_1:
 ; CHECK: movt r0, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
@@ -189,7 +189,7 @@ define i32 @t2MOVTi16_test_1(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_2(i32 %a) {
-; CHECK: t2MOVTi16_test_2:
+; CHECK-LABEL: t2MOVTi16_test_2:
 ; CHECK: movt r0, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
@@ -203,7 +203,7 @@ define i32 @t2MOVTi16_test_2(i32 %a) {
 }
 
 define i32 @t2MOVTi16_test_3(i32 %a) {
-; CHECK: t2MOVTi16_test_3:
+; CHECK-LABEL: t2MOVTi16_test_3:
 ; CHECK: movt r0, #1234
     %1 = shl i32  255,   8
     %2 = shl i32 1234,   8
@@ -220,7 +220,7 @@ define i32 @t2MOVTi16_test_3(i32 %a) {
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: movs r0, #171
     %tmp = add i32 0, 171
     ret i32 %tmp
@@ -228,7 +228,7 @@ define i32 @f1(i32 %a) {
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mov.w r0, #1179666
     %tmp = add i32 0, 1179666
     ret i32 %tmp
@@ -236,7 +236,7 @@ define i32 @f2(i32 %a) {
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov.w r0, #872428544
     %tmp = add i32 0, 872428544
     ret i32 %tmp
@@ -244,7 +244,7 @@ define i32 @f3(i32 %a) {
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: mov.w r0, #1448498774
     %tmp = add i32 0, 1448498774
     ret i32 %tmp
@@ -252,14 +252,14 @@ define i32 @f4(i32 %a) {
 
 ; 66846720 = 0x03fc0000
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mov.w r0, #66846720
     %tmp = add i32 0, 66846720
     ret i32 %tmp
 }
 
 define i32 @f6(i32 %a) {
-;CHECK: f6
+;CHECK-LABEL: f6:
 ;CHECK: movw    r0, #65535
     %tmp = add i32 0, 65535
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index a8134e630821..a989989b43f7 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: muls r0, r1, r0
     %tmp = mul i32 %a, %b
     ret i32 %tmp
@@ -12,7 +12,7 @@ define i32 @f1(i32 %a, i32 %b, i32 %c) {
 
 define %struct.CMPoint* @t1(i32 %i, i32 %j, i32 %n, %struct.CMPoint* %thePoints) nounwind readnone ssp {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: mla     r0, r2, r0, r1
 ; CHECK: add.w   r0, r0, r0, lsl #3
 ; CHECK: add.w   r0, r3, r0, lsl #2
diff --git a/test/CodeGen/Thumb2/thumb2-mvn.ll b/test/CodeGen/Thumb2/thumb2-mvn.ll
index a8c8f831c75a..a5592f6b9276 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn.ll
@@ -2,7 +2,7 @@
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mvn r0, #187
     %tmp = xor i32 4294967295, 187
     ret i32 %tmp
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
 
 ; 0x00aa00aa = 11141290
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mvn r0, #11141290
     %tmp = xor i32 4294967295, 11141290 
     ret i32 %tmp
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a) {
 
 ; 0xcc00cc00 = 3422604288
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mvn r0, #-872363008
     %tmp = xor i32 4294967295, 3422604288
     ret i32 %tmp
@@ -26,7 +26,7 @@ define i32 @f3(i32 %a) {
 
 ; 0x00110000 = 1114112
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mvn r0, #1114112
     %tmp = xor i32 4294967295, 1114112
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-mvn2.ll b/test/CodeGen/Thumb2/thumb2-mvn2.ll
index 375d0aad5021..bce54a352e80 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn2.ll
@@ -1,21 +1,21 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: mvns r0, r0
     %tmp = xor i32 4294967295, %a
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: mvns r0, r0
     %tmp = xor i32 %a, 4294967295
     ret i32 %tmp
 }
 
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: mvn.w r0, r0, lsl #5
     %tmp = shl i32 %a, 5
     %tmp1 = xor i32 %tmp, 4294967295
@@ -23,7 +23,7 @@ define i32 @f5(i32 %a) {
 }
 
 define i32 @f6(i32 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: mvn.w r0, r0, lsr #6
     %tmp = lshr i32 %a, 6
     %tmp1 = xor i32 %tmp, 4294967295
@@ -31,7 +31,7 @@ define i32 @f6(i32 %a) {
 }
 
 define i32 @f7(i32 %a) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: mvn.w r0, r0, asr #7
     %tmp = ashr i32 %a, 7
     %tmp1 = xor i32 %tmp, 4294967295
@@ -39,7 +39,7 @@ define i32 @f7(i32 %a) {
 }
 
 define i32 @f8(i32 %a) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: mvn.w r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/thumb2-neg.ll b/test/CodeGen/Thumb2/thumb2-neg.ll
index 6bf11ec90621..40e809862140 100644
--- a/test/CodeGen/Thumb2/thumb2-neg.ll
+++ b/test/CodeGen/Thumb2/thumb2-neg.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: rsbs r0, r0, #0
     %tmp = sub i32 0, %a
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-orn.ll b/test/CodeGen/Thumb2/thumb2-orn.ll
index 97a3fd75f068..5bbe653cd12e 100644
--- a/test/CodeGen/Thumb2/thumb2-orn.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn.ll
@@ -6,7 +6,7 @@ define i32 @f1(i32 %a, i32 %b) {
     %tmp1 = or i32 %a, %tmp
     ret i32 %tmp1
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	orn	r0, r0, r1
 
 define i32 @f2(i32 %a, i32 %b) {
@@ -14,7 +14,7 @@ define i32 @f2(i32 %a, i32 %b) {
     %tmp1 = or i32 %tmp, %a
     ret i32 %tmp1
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	orn	r0, r0, r1
 
 define i32 @f3(i32 %a, i32 %b) {
@@ -22,7 +22,7 @@ define i32 @f3(i32 %a, i32 %b) {
     %tmp1 = or i32 %a, %tmp
     ret i32 %tmp1
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	orn	r0, r0, r1
 
 define i32 @f4(i32 %a, i32 %b) {
@@ -30,7 +30,7 @@ define i32 @f4(i32 %a, i32 %b) {
     %tmp1 = or i32 %tmp, %a
     ret i32 %tmp1
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: 	orn	r0, r0, r1
 
 define i32 @f5(i32 %a, i32 %b) {
@@ -39,7 +39,7 @@ define i32 @f5(i32 %a, i32 %b) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: 	orn	r0, r0, r1, lsl #5
 
 define i32 @f6(i32 %a, i32 %b) {
@@ -48,7 +48,7 @@ define i32 @f6(i32 %a, i32 %b) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: 	orn	r0, r0, r1, lsr #6
 
 define i32 @f7(i32 %a, i32 %b) {
@@ -57,7 +57,7 @@ define i32 @f7(i32 %a, i32 %b) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: 	orn	r0, r0, r1, asr #7
 
 define i32 @f8(i32 %a, i32 %b) {
@@ -68,5 +68,5 @@ define i32 @f8(i32 %a, i32 %b) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: 	orn	r0, r0, r0, ror #8
diff --git a/test/CodeGen/Thumb2/thumb2-orn2.ll b/test/CodeGen/Thumb2/thumb2-orn2.ll
index 34ab3a56663c..eff3ae38a056 100644
--- a/test/CodeGen/Thumb2/thumb2-orn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn2.ll
@@ -7,7 +7,7 @@ define i32 @f1(i32 %a) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	orn	r0, r0, #187
 
 ; 0x00aa00aa = 11141290
@@ -16,7 +16,7 @@ define i32 @f2(i32 %a) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	orn	r0, r0, #11141290
 
 ; 0xcc00cc00 = 3422604288
@@ -25,7 +25,7 @@ define i32 @f3(i32 %a) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	orn	r0, r0, #-872363008
 
 ; 0x00110000 = 1114112
@@ -34,5 +34,5 @@ define i32 @f5(i32 %a) {
     %tmp2 = or i32 %a, %tmp1
     ret i32 %tmp2
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: 	orn	r0, r0, #1114112
diff --git a/test/CodeGen/Thumb2/thumb2-orr.ll b/test/CodeGen/Thumb2/thumb2-orr.ll
index 89ab7b1edf70..13ed8620059b 100644
--- a/test/CodeGen/Thumb2/thumb2-orr.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: orrs r0, r1
     %tmp2 = or i32 %a, %b
     ret i32 %tmp2
 }
 
 define i32 @f5(i32 %a, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: orr.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp2 = or i32 %a, %tmp
@@ -16,7 +16,7 @@ define i32 @f5(i32 %a, i32 %b) {
 }
 
 define i32 @f6(i32 %a, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: orr.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp2 = or i32 %a, %tmp
@@ -24,7 +24,7 @@ define i32 @f6(i32 %a, i32 %b) {
 }
 
 define i32 @f7(i32 %a, i32 %b) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: orr.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp2 = or i32 %a, %tmp
@@ -32,7 +32,7 @@ define i32 @f7(i32 %a, i32 %b) {
 }
 
 define i32 @f8(i32 %a, i32 %b) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: orr.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/thumb2-orr2.ll b/test/CodeGen/Thumb2/thumb2-orr2.ll
index 8f7a3c2a61a9..837bb1cb07c1 100644
--- a/test/CodeGen/Thumb2/thumb2-orr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr2.ll
@@ -6,7 +6,7 @@ define i32 @f1(i32 %a) {
     %tmp2 = or i32 %a, 187
     ret i32 %tmp2
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	orr	r0, r0, #187
 
 ; 0x00aa00aa = 11141290
@@ -14,7 +14,7 @@ define i32 @f2(i32 %a) {
     %tmp2 = or i32 %a, 11141290 
     ret i32 %tmp2
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	orr	r0, r0, #11141290
 
 ; 0xcc00cc00 = 3422604288
@@ -22,7 +22,7 @@ define i32 @f3(i32 %a) {
     %tmp2 = or i32 %a, 3422604288
     ret i32 %tmp2
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	orr	r0, r0, #-872363008
 
 ; 0x44444444 = 1145324612
@@ -30,7 +30,7 @@ define i32 @f4(i32 %a) {
     %tmp2 = or i32 %a, 1145324612
     ret i32 %tmp2
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: 	orr	r0, r0, #1145324612
 
 ; 0x00110000 = 1114112
@@ -38,5 +38,5 @@ define i32 @f5(i32 %a) {
     %tmp2 = or i32 %a, 1114112
     ret i32 %tmp2
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: 	orr	r0, r0, #1114112
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
index 2e8bb1d60934..1052dd2a072e 100644
--- a/test/CodeGen/Thumb2/thumb2-pack.ll
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -88,10 +88,33 @@ define i32 @test7(i32 %X, i32 %Y) {
 }
 
 ; CHECK: test8
-; CHECK: pkhtb   r0, r0, r1, asr #22
+; CHECK-NOT: pkhtb   r0, r0, r1, asr #22
+;   pkhtb does an arithmetic shift, not a logical shift. Make sure we don't
+;   use it for problematic cases when whether sign bits would be shifted in
+;   would matter.
 define i32 @test8(i32 %X, i32 %Y) {
 	%tmp1 = and i32 %X, -65536
 	%tmp3 = lshr i32 %Y, 22
 	%tmp57 = or i32 %tmp3, %tmp1
 	ret i32 %tmp57
 }
+
+; CHECK-LABEL: test9:
+; CHECK: pkhtb r0, r0, r1, asr #16
+define i32 @test9(i32 %src1, i32 %src2) {
+entry:
+    %tmp = and i32 %src1, -65536
+    %tmp2 = lshr i32 %src2, 16
+    %tmp3 = or i32 %tmp, %tmp2
+    ret i32 %tmp3
+}
+
+; CHECK: test10
+; CHECK: pkhtb   r0, r0, r1, asr #22
+define i32 @test10(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = ashr i32 %Y, 22
+	%tmp57 = or i32 %tmp3, %tmp1
+	ret i32 %tmp57
+}
+
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
index b469bbd3f7b5..67cd62362fe9 100644
--- a/test/CodeGen/Thumb2/thumb2-rev.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7,+t2xtpk | FileCheck %s
 
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: rev r0, r0
     %tmp = tail call i32 @llvm.bswap.i32(i32 %a)
     ret i32 %tmp
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
 define i32 @f2(i32 %X) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: revsh r0, r0
         %tmp1 = lshr i32 %X, 8
         %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 5ad92cd7290d..2a218eae9752 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 ; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=THUMB1
 
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	ror.w	r0, r0, #22
 define i32 @f1(i32 %a) {
     %l8 = shl i32 %a, 10
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
     ret i32 %tmp
 }
 
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NOT: and
 ; CHECK: ror
 ; THUMB1: f2
diff --git a/test/CodeGen/Thumb2/thumb2-rsb.ll b/test/CodeGen/Thumb2/thumb2-rsb.ll
index 15185be94621..150a25f51b54 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb.ll
@@ -5,7 +5,7 @@ define i32 @f1(i32 %a, i32 %b) {
     %tmp1 = sub i32 %tmp, %a
     ret i32 %tmp1
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	rsb	r0, r0, r1, lsl #5
 
 define i32 @f2(i32 %a, i32 %b) {
@@ -13,7 +13,7 @@ define i32 @f2(i32 %a, i32 %b) {
     %tmp1 = sub i32 %tmp, %a
     ret i32 %tmp1
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	rsb	r0, r0, r1, lsr #6
 
 define i32 @f3(i32 %a, i32 %b) {
@@ -21,7 +21,7 @@ define i32 @f3(i32 %a, i32 %b) {
     %tmp1 = sub i32 %tmp, %a
     ret i32 %tmp1
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	rsb	r0, r0, r1, asr #7
 
 define i32 @f4(i32 %a, i32 %b) {
@@ -31,5 +31,5 @@ define i32 @f4(i32 %a, i32 %b) {
     %tmp1 = sub i32 %tmp, %a
     ret i32 %tmp1
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: 	rsb	r0, r0, r0, ror #8
diff --git a/test/CodeGen/Thumb2/thumb2-rsb2.ll b/test/CodeGen/Thumb2/thumb2-rsb2.ll
index 61fb619c40e7..15aa8af3b83b 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb2.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb2.ll
@@ -5,7 +5,7 @@ define i32 @f1(i32 %a) {
     %tmp = sub i32 171, %a
     ret i32 %tmp
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	rsb.w	r0, r0, #171
 
 ; 1179666 = 0x00120012
@@ -13,7 +13,7 @@ define i32 @f2(i32 %a) {
     %tmp = sub i32 1179666, %a
     ret i32 %tmp
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	rsb.w	r0, r0, #1179666
 
 ; 872428544 = 0x34003400
@@ -21,7 +21,7 @@ define i32 @f3(i32 %a) {
     %tmp = sub i32 872428544, %a
     ret i32 %tmp
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	rsb.w	r0, r0, #872428544
 
 ; 1448498774 = 0x56565656
@@ -29,7 +29,7 @@ define i32 @f4(i32 %a) {
     %tmp = sub i32 1448498774, %a
     ret i32 %tmp
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: 	rsb.w	r0, r0, #1448498774
 
 ; 66846720 = 0x03fc0000
@@ -37,5 +37,5 @@ define i32 @f5(i32 %a) {
     %tmp = sub i32 66846720, %a
     ret i32 %tmp
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: 	rsb.w	r0, r0, #66846720
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
index 492e5f0eacc0..0c37984ba3e7 100644
--- a/test/CodeGen/Thumb2/thumb2-sbc.ll
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -54,7 +54,7 @@ define i64 @f6(i64 %a) {
 
 ; Example from numerics code that manually computes wider-than-64 values.
 ;
-; CHECK: livecarry:
+; CHECK-LABEL: livecarry:
 ; CHECK: adds
 ; CHECK: adc
 define i64 @livecarry(i64 %carry, i32 %digit) nounwind {
diff --git a/test/CodeGen/Thumb2/thumb2-select.ll b/test/CodeGen/Thumb2/thumb2-select.ll
index 2dcf8aaa24c5..5f5fa1992516 100644
--- a/test/CodeGen/Thumb2/thumb2-select.ll
+++ b/test/CodeGen/Thumb2/thumb2-select.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -show-mc-encoding | FileCheck %s
 
 define i32 @f1(i32 %a.s) {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: it eq
 ; CHECK: moveq
 
@@ -13,7 +13,7 @@ entry:
 
 define i32 @f2(i32 %a.s) {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: it gt
 ; CHECK: movgt
     %tmp = icmp sgt i32 %a.s, 4
@@ -23,7 +23,7 @@ entry:
 
 define i32 @f3(i32 %a.s, i32 %b.s) {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: it lt
 ; CHECK: movlt
     %tmp = icmp slt i32 %a.s, %b.s
@@ -33,7 +33,7 @@ entry:
 
 define i32 @f4(i32 %a.s, i32 %b.s) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: it le
 ; CHECK: movle
 
@@ -44,7 +44,7 @@ entry:
 
 define i32 @f5(i32 %a.u, i32 %b.u) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: it ls
 ; CHECK: movls
     %tmp = icmp ule i32 %a.u, %b.u
@@ -54,7 +54,7 @@ entry:
 
 define i32 @f6(i32 %a.u, i32 %b.u) {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: it hi
 ; CHECK: movhi
     %tmp = icmp ugt i32 %a.u, %b.u
@@ -64,9 +64,9 @@ entry:
 
 define i32 @f7(i32 %a, i32 %b, i32 %c) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: it hi
-; CHECK: lsrhi.w
+; CHECK: lsrhi {{r[0-9]+}}
     %tmp1 = icmp ugt i32 %a, %b
     %tmp2 = udiv i32 %c, 3
     %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
@@ -75,9 +75,9 @@ entry:
 
 define i32 @f8(i32 %a, i32 %b, i32 %c) {
 entry:
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: it lo
-; CHECK: lsllo.w
+; CHECK: lsllo {{r[0-9]+}}
     %tmp1 = icmp ult i32 %a, %b
     %tmp2 = mul i32 %c, 4
     %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
@@ -86,7 +86,7 @@ entry:
 
 define i32 @f9(i32 %a, i32 %b, i32 %c) {
 entry:
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: it ge
 ; CHECK: rorge.w
     %tmp1 = icmp sge i32 %a, %b
@@ -96,3 +96,20 @@ entry:
     %tmp5 = select i1 %tmp1, i32 %tmp4, i32 3
     ret i32 %tmp5
 }
+
+define i32 @f10(i32 %a, i32 %b) {
+; CHECK-LABEL: f10:
+; CHECK: movwne {{r[0-9]+}}, #1234    @ encoding: [0x40,0xf2,0xd2,0x4{{[0-9a-f]+}}]
+    %tst = icmp ne i32 %a, %b
+    %val = select i1 %tst, i32 1234, i32 12345
+    ret i32 %val
+}
+
+; Make sure we pick the Thumb encoding for movw/movt
+define i32 @f11(i32 %a, i32 %b) {
+; CHECK-LABEL: f11:
+; CHECK: movwne {{r[0-9]+}}, #50033         @ encoding: [0x4c,0xf2,0x71,0x3{{[0-9a-f]+}}]
+    %tst = icmp ne i32 %a, %b
+    %val = select i1 %tst, i32 123454321, i32 543212345
+    ret i32 %val
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 5bff268e2c3e..52c106344910 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -10,7 +10,7 @@
 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
-; CHECK: aaa:
+; CHECK-LABEL: aaa:
 ; CHECK: bic r4, r4, #15
 ; CHECK: vst1.64 {{.*}}[{{.*}}:128]
 ; CHECK: vld1.64 {{.*}}[{{.*}}:128]
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index 11bb936d1e64..fb5fa168e8b8 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32* %v) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: str r0, [r1]
         store i32 %a, i32* %v
         ret i32 %a
 }
 
 define i32 @f2(i32 %a, i32* %v) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: str.w r0, [r1, #4092]
         %tmp2 = getelementptr i32* %v, i32 1023
         store i32 %a, i32* %tmp2
@@ -16,7 +16,7 @@ define i32 @f2(i32 %a, i32* %v) {
 }
 
 define i32 @f2a(i32 %a, i32* %v) {
-; CHECK: f2a:
+; CHECK-LABEL: f2a:
 ; CHECK: str r0, [r1, #-128]
         %tmp2 = getelementptr i32* %v, i32 -32
         store i32 %a, i32* %tmp2
@@ -24,7 +24,7 @@ define i32 @f2a(i32 %a, i32* %v) {
 }
 
 define i32 @f3(i32 %a, i32* %v) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov.w r2, #4096
 ; CHECK: str r0, [r1, r2]
         %tmp2 = getelementptr i32* %v, i32 1024
@@ -34,7 +34,7 @@ define i32 @f3(i32 %a, i32* %v) {
 
 define i32 @f4(i32 %a, i32 %base) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: str r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
@@ -44,7 +44,7 @@ entry:
 
 define i32 @f5(i32 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: str r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
@@ -54,7 +54,7 @@ entry:
 
 define i32 @f6(i32 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: str.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
@@ -65,7 +65,7 @@ entry:
 
 define i32 @f7(i32 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lsrs r2, r2, #2
 ; CHECK: str r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
diff --git a/test/CodeGen/Thumb2/thumb2-str_post.ll b/test/CodeGen/Thumb2/thumb2-str_post.ll
index bbfb447ca3ef..2133d2807006 100644
--- a/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @test1(i32* %X, i16* %A) {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: strh {{.*}}[{{.*}}], #-4
         %Y = load i32* %X               ; <i32> [#uses=1]
         %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
@@ -12,7 +12,7 @@ define i16 @test1(i32* %X, i16* %A) {
 }
 
 define i32 @test2(i32* %X, i32* %A) {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: str {{.*}}[{{.*}}],
         %Y = load i32* %X               ; <i32> [#uses=1]
         store i32 %Y, i32* %A
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index 7978e7fa918e..cc39b7d585c5 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i8 @f1(i8 %a, i8* %v) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: strb r0, [r1]
         store i8 %a, i8* %v
         ret i8 %a
 }
 
 define i8 @f2(i8 %a, i8* %v) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: strb.w r0, [r1, #4092]
         %tmp2 = getelementptr i8* %v, i32 4092
         store i8 %a, i8* %tmp2
@@ -16,7 +16,7 @@ define i8 @f2(i8 %a, i8* %v) {
 }
 
 define i8 @f2a(i8 %a, i8* %v) {
-; CHECK: f2a:
+; CHECK-LABEL: f2a:
 ; CHECK: strb r0, [r1, #-128]
         %tmp2 = getelementptr i8* %v, i32 -128
         store i8 %a, i8* %tmp2
@@ -24,7 +24,7 @@ define i8 @f2a(i8 %a, i8* %v) {
 }
 
 define i8 @f3(i8 %a, i8* %v) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov.w r2, #4096
 ; CHECK: strb r0, [r1, r2]
         %tmp2 = getelementptr i8* %v, i32 4096
@@ -34,7 +34,7 @@ define i8 @f3(i8 %a, i8* %v) {
 
 define i8 @f4(i8 %a, i32 %base) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: strb r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
@@ -44,7 +44,7 @@ entry:
 
 define i8 @f5(i8 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: strb r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
@@ -54,7 +54,7 @@ entry:
 
 define i8 @f6(i8 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: strb.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
@@ -65,7 +65,7 @@ entry:
 
 define i8 @f7(i8 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lsrs r2, r2, #2
 ; CHECK: strb r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index 97110a726f47..d68693830518 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i16 @f1(i16 %a, i16* %v) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: strh r0, [r1]
         store i16 %a, i16* %v
         ret i16 %a
 }
 
 define i16 @f2(i16 %a, i16* %v) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: strh.w r0, [r1, #4092]
         %tmp2 = getelementptr i16* %v, i32 2046
         store i16 %a, i16* %tmp2
@@ -16,7 +16,7 @@ define i16 @f2(i16 %a, i16* %v) {
 }
 
 define i16 @f2a(i16 %a, i16* %v) {
-; CHECK: f2a:
+; CHECK-LABEL: f2a:
 ; CHECK: strh r0, [r1, #-128]
         %tmp2 = getelementptr i16* %v, i32 -64
         store i16 %a, i16* %tmp2
@@ -24,7 +24,7 @@ define i16 @f2a(i16 %a, i16* %v) {
 }
 
 define i16 @f3(i16 %a, i16* %v) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: mov.w r2, #4096
 ; CHECK: strh r0, [r1, r2]
         %tmp2 = getelementptr i16* %v, i32 2048
@@ -34,7 +34,7 @@ define i16 @f3(i16 %a, i16* %v) {
 
 define i16 @f4(i16 %a, i32 %base) {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: strh r0, [r1, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
@@ -44,7 +44,7 @@ entry:
 
 define i16 @f5(i16 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: strh r0, [r1, r2]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
@@ -54,7 +54,7 @@ entry:
 
 define i16 @f6(i16 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: strh.w r0, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
@@ -65,7 +65,7 @@ entry:
 
 define i16 @f7(i16 %a, i32 %base, i32 %offset) {
 entry:
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: lsrs r2, r2, #2
 ; CHECK: strh r0, [r1, r2]
         %tmp1 = lshr i32 %offset, 2
diff --git a/test/CodeGen/Thumb2/thumb2-sub.ll b/test/CodeGen/Thumb2/thumb2-sub.ll
index 95335a2ee2cc..f83dfe2e00a4 100644
--- a/test/CodeGen/Thumb2/thumb2-sub.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub.ll
@@ -2,7 +2,7 @@
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: subs r0, #171
     %tmp = sub i32 %a, 171
     ret i32 %tmp
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a) {
 
 ; 1179666 = 0x00120012
 define i32 @f2(i32 %a) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sub.w r0, r0, #1179666
     %tmp = sub i32 %a, 1179666
     ret i32 %tmp
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a) {
 
 ; 872428544 = 0x34003400
 define i32 @f3(i32 %a) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sub.w r0, r0, #872428544
     %tmp = sub i32 %a, 872428544
     ret i32 %tmp
@@ -26,7 +26,7 @@ define i32 @f3(i32 %a) {
 
 ; 1448498774 = 0x56565656
 define i32 @f4(i32 %a) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sub.w r0, r0, #1448498774
     %tmp = sub i32 %a, 1448498774
     ret i32 %tmp
@@ -34,7 +34,7 @@ define i32 @f4(i32 %a) {
 
 ; 510 = 0x000001fe
 define i32 @f5(i32 %a) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sub.w r0, r0, #510
     %tmp = sub i32 %a, 510
     ret i32 %tmp
@@ -42,7 +42,7 @@ define i32 @f5(i32 %a) {
 
 ; Don't change this to an add.
 define i32 @f6(i32 %a) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: subs r0, #1
     %tmp = sub i32 %a, 1
     ret i32 %tmp
diff --git a/test/CodeGen/Thumb2/thumb2-sub2.ll b/test/CodeGen/Thumb2/thumb2-sub2.ll
index bb99cbd67fcf..47eb1e1a36cf 100644
--- a/test/CodeGen/Thumb2/thumb2-sub2.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub2.ll
@@ -4,5 +4,5 @@ define i32 @f1(i32 %a) {
     %tmp = sub i32 %a, 4095
     ret i32 %tmp
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: 	subw	r0, r0, #4095
diff --git a/test/CodeGen/Thumb2/thumb2-sub4.ll b/test/CodeGen/Thumb2/thumb2-sub4.ll
index a040d170f935..ff1441ac64dd 100644
--- a/test/CodeGen/Thumb2/thumb2-sub4.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub4.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: subs r0, r0, r1
     %tmp = sub i32 %a, %b
     ret i32 %tmp
 }
 
 define i32 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: sub.w r0, r0, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = sub i32 %a, %tmp
@@ -16,7 +16,7 @@ define i32 @f2(i32 %a, i32 %b) {
 }
 
 define i32 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: sub.w r0, r0, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = sub i32 %a, %tmp
@@ -24,7 +24,7 @@ define i32 @f3(i32 %a, i32 %b) {
 }
 
 define i32 @f4(i32 %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: sub.w r0, r0, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = sub i32 %a, %tmp
@@ -32,7 +32,7 @@ define i32 @f4(i32 %a, i32 %b) {
 }
 
 define i32 @f5(i32 %a, i32 %b) {
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: sub.w r0, r0, r0, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/thumb2-sub5.ll b/test/CodeGen/Thumb2/thumb2-sub5.ll
index 6edd789beec5..5941dd6ec89f 100644
--- a/test/CodeGen/Thumb2/thumb2-sub5.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub5.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: subs.w r0, r0, r2
 ; To test dead_carry, +32bit prevents sbc conveting to 16-bit sbcs
 ; CHECK: sbc.w  r1, r1, r3
diff --git a/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll b/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll
index ab888e694cfc..792ebef5f9bc 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll
@@ -1,28 +1,28 @@
 ; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s
 
 define i32 @test1(i16 zeroext %z) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: sxth
   %r = sext i16 %z to i32
   ret i32 %r
 }
 
 define i32 @test2(i8 zeroext %z) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: sxtb
   %r = sext i8 %z to i32
   ret i32 %r
 }
 
 define i32 @test3(i16 signext %z) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: uxth
   %r = zext i16 %z to i32
   ret i32 %r
 }
 
 define i32 @test4(i8 signext %z) nounwind {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: uxtb
   %r = zext i8 %z to i32
   ret i32 %r
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
index a9d71d6bda15..d57638bbb4f6 100644
--- a/test/CodeGen/Thumb2/thumb2-tbb.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -3,7 +3,7 @@
 
 define void @bar(i32 %n.u) {
 entry:
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: tbb
 ; CHECK: .data_region jt8
 ; CHECK: .end_data_region
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
index cd9c8e1015b2..bf1c7c613ab5 100644
--- a/test/CodeGen/Thumb2/thumb2-tbh.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -15,7 +15,7 @@ declare void @Z_fatal(i8*) noreturn nounwind
 declare noalias i8* @calloc(i32, i32) nounwind
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
-; CHECK: main:
+; CHECK-LABEL: main:
 ; CHECK: tbb
 entry:
 	br label %bb42.i
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index d453f469abc7..5acda35b4948 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -9,7 +9,7 @@ define i1 @f2(i32 %a) {
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	teq.w	{{.*}}, #187
 
 ; 0x00aa00aa = 11141290
@@ -18,7 +18,7 @@ define i1 @f3(i32 %a) {
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	teq.w	{{.*}}, #11141290
 
 ; 0xcc00cc00 = 3422604288
@@ -27,7 +27,7 @@ define i1 @f6(i32 %a) {
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: 	teq.w	{{.*}}, #-872363008
 
 ; 0xdddddddd = 3722304989
@@ -36,7 +36,7 @@ define i1 @f7(i32 %a) {
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: 	teq.w	{{.*}}, #-572662307
 
 ; 0xdddddddd = 3722304989
@@ -52,6 +52,6 @@ define i1 @f10(i32 %a) {
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: 	teq.w	{{.*}}, #1114112
 
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index 67fe82ee5202..31eafea614de 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -9,7 +9,7 @@ define i1 @f2(i32 %a) {
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: 	tst.w	{{.*}}, #187
 
 ; 0x00aa00aa = 11141290
@@ -18,7 +18,7 @@ define i1 @f3(i32 %a) {
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: 	tst.w	{{.*}}, #11141290
 
 ; 0xcc00cc00 = 3422604288
@@ -27,7 +27,7 @@ define i1 @f6(i32 %a) {
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: 	tst.w	{{.*}}, #-872363008
 
 ; 0xdddddddd = 3722304989
@@ -36,7 +36,7 @@ define i1 @f7(i32 %a) {
     %tmp1 = icmp eq i32 %tmp, 0
     ret i1 %tmp1
 }
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: 	tst.w	{{.*}}, #-572662307
 
 ; 0x00110000 = 1114112
@@ -45,5 +45,5 @@ define i1 @f10(i32 %a) {
     %tmp1 = icmp eq i32 0, %tmp
     ret i1 %tmp1
 }
-; CHECK: f10:
+; CHECK-LABEL: f10:
 ; CHECK: 	tst.w	{{.*}}, #1114112
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index e3fe792005f8..f71e91d1e9de 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -4,7 +4,7 @@
 ; tst as 'mov.w r0, #0'.
 
 define i1 @f2(i32 %a, i32 %b) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: tst {{.*}}, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
@@ -12,7 +12,7 @@ define i1 @f2(i32 %a, i32 %b) {
 }
 
 define i1 @f4(i32 %a, i32 %b) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: tst {{.*}}, r1
     %tmp = and i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
@@ -20,7 +20,7 @@ define i1 @f4(i32 %a, i32 %b) {
 }
 
 define i1 @f6(i32 %a, i32 %b) {
-; CHECK: f6:
+; CHECK-LABEL: f6:
 ; CHECK: tst.w {{.*}}, r1, lsl #5
     %tmp = shl i32 %b, 5
     %tmp1 = and i32 %a, %tmp
@@ -29,7 +29,7 @@ define i1 @f6(i32 %a, i32 %b) {
 }
 
 define i1 @f7(i32 %a, i32 %b) {
-; CHECK: f7:
+; CHECK-LABEL: f7:
 ; CHECK: tst.w {{.*}}, r1, lsr #6
     %tmp = lshr i32 %b, 6
     %tmp1 = and i32 %a, %tmp
@@ -38,7 +38,7 @@ define i1 @f7(i32 %a, i32 %b) {
 }
 
 define i1 @f8(i32 %a, i32 %b) {
-; CHECK: f8:
+; CHECK-LABEL: f8:
 ; CHECK: tst.w {{.*}}, r1, asr #7
     %tmp = ashr i32 %b, 7
     %tmp1 = and i32 %a, %tmp
@@ -47,7 +47,7 @@ define i1 @f8(i32 %a, i32 %b) {
 }
 
 define i1 @f9(i32 %a, i32 %b) {
-; CHECK: f9:
+; CHECK-LABEL: f9:
 ; CHECK: tst.w {{.*}}, {{.*}}, ror #8
     %l8 = shl i32 %a, 24
     %r8 = lshr i32 %a, 8
diff --git a/test/CodeGen/Thumb2/tls2.ll b/test/CodeGen/Thumb2/tls2.ll
index b8a0657c9069..6cb019ff00ec 100644
--- a/test/CodeGen/Thumb2/tls2.ll
+++ b/test/CodeGen/Thumb2/tls2.ll
@@ -5,12 +5,12 @@
 
 define i32 @f() {
 entry:
-; CHECK-NOT-PIC: f:
+; CHECK-NOT-PIC-LABEL: f:
 ; CHECK-NOT-PIC: add r0, pc
 ; CHECK-NOT-PIC: ldr r1, [r0]
 ; CHECK-NOT-PIC: i(gottpoff)
 
-; CHECK-PIC: f:
+; CHECK-PIC-LABEL: f:
 ; CHECK-PIC: bl __tls_get_addr(PLT)
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
@@ -18,12 +18,12 @@ entry:
 
 define i32* @g() {
 entry:
-; CHECK-NOT-PIC: g:
+; CHECK-NOT-PIC-LABEL: g:
 ; CHECK-NOT-PIC: add r0, pc
 ; CHECK-NOT-PIC: ldr r1, [r0]
 ; CHECK-NOT-PIC: i(gottpoff)
 
-; CHECK-PIC: g:
+; CHECK-PIC-LABEL: g:
 ; CHECK-PIC: bl __tls_get_addr(PLT)
 	ret i32* @i
 }
diff --git a/test/CodeGen/Thumb2/v8_IT_1.ll b/test/CodeGen/Thumb2/v8_IT_1.ll
new file mode 100644
index 000000000000..30dbb4802b6d
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_1.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=thumbv8 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -mattr=+neon -arm-restrict-it | FileCheck %s
+
+;CHECK-LABEL: select_s_v_v:
+;CHECK-NOT: it
+;CHECK: bx
+define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
+entry:
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+  %and = and i32 %avail, 1
+  %tobool = icmp eq i32 %and, 0
+  %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
+  ret <16 x i8> %vld1.
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+
diff --git a/test/CodeGen/Thumb2/v8_IT_2.ll b/test/CodeGen/Thumb2/v8_IT_2.ll
new file mode 100644
index 000000000000..170b4135b536
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK-LABEL: CountTree:
+; CHECK: bne
+; CHECK: cmp
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: mov
+; CHECK: pop
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
+
diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll
new file mode 100644
index 000000000000..4dca24629b01
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
+
+%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
+%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
+
+@FuncPtr = external hidden unnamed_addr global %struct.FF*
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
+@G = external unnamed_addr global i32
+@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
+@.str3 = external hidden unnamed_addr constant [58 x i8], align 4
+
+define i32 @test() nounwind optsize ssp {
+entry:
+; CHECK-LABEL: test:
+; CHECK: push
+; CHECK-NOT: push
+  %block_size = alloca i32, align 4
+  %block_count = alloca i32, align 4
+  %index_cache = alloca i32, align 4
+  store i32 0, i32* %index_cache, align 4
+  %tmp = load i32* @G, align 4
+  %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
+  switch i32 %tmp1, label %bb8 [
+    i32 0, label %bb
+    i32 536870913, label %bb4
+    i32 536870914, label %bb6
+  ]
+
+bb:
+  %tmp2 = load i32* @G, align 4
+  %tmp4 = icmp eq i32 %tmp2, 0
+  br i1 %tmp4, label %bb1, label %bb8
+
+bb1:
+; CHECK: %bb6
+; CHECK: it	eq
+; CHECK-NEXT: ldreq
+; CHECK-NEXT: it	eq
+; CHECK-NEXT: cmpeq
+; CHECK: %bb1
+  %tmp5 = load i32* %block_size, align 4
+  %tmp6 = load i32* %block_count, align 4
+  %tmp7 = call %struct.FF* @Get() nounwind
+  store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
+  %tmp10 = zext i32 %tmp6 to i64
+  %tmp11 = zext i32 %tmp5 to i64
+  %tmp12 = mul nsw i64 %tmp10, %tmp11
+  %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+  br label %bb8
+
+bb4:
+; CHECK-PIC: cmp
+; CHECK-PIC: cmp
+; CHECK-PIC-NEXT: bne
+; CHECK-PIC-NEXT: %bb4
+; CHECK-PIC-NEXT: movs
+; CHECK-PIC-NEXT: add
+; CHECK-PIC-NEXT: pop
+  ret i32 0
+
+bb6:
+  ret i32 1
+
+bb8:
+  ret i32 -1
+}
+
+declare i32 @printf(i8*, ...)
+
+declare %struct.FF* @Get()
+
+declare i32 @foo(i8*, i64, i32)
+
+declare i32 @bar(i32, i32, i32)
diff --git a/test/CodeGen/Thumb2/v8_IT_4.ll b/test/CodeGen/Thumb2/v8_IT_4.ll
new file mode 100644
index 000000000000..5a80d8cd7b4e
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_4.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-eabi -float-abi=hard -arm-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-eabi -float-abi=hard -regalloc=basic -arm-restrict-it | FileCheck %s
+
+%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
+%"struct.__gnu_cxx::new_allocator<char>" = type <{ i8 }>
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+
+
+define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
+; CHECK-LABEL: _ZNKSs7compareERKSs:
+; CHECK:      cbnz	r0,
+; CHECK-NEXT: %bb
+; CHECK-NEXT: sub{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
+; CHECK-NEXT: %bb1
+; CHECK-NEXT: pop.w
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
+  %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
+  %2 = icmp ult i32 %1, %0                        ; <i1> [#uses=1]
+  %3 = select i1 %2, i32 %1, i32 %0               ; <i32> [#uses=1]
+  %4 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i8*> [#uses=1]
+  %5 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i8*> [#uses=1]
+  %6 = tail call arm_aapcs_vfpcc  i32 @memcmp(i8* %4, i8* %5, i32 %3) nounwind readonly ; <i32> [#uses=2]
+  %7 = icmp eq i32 %6, 0                          ; <i1> [#uses=1]
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = sub i32 %0, %1                             ; <i32> [#uses=1]
+  ret i32 %8
+
+bb1:                                              ; preds = %entry
+  ret i32 %6
+}
+
+declare arm_aapcs_vfpcc i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+
+declare arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll
new file mode 100644
index 000000000000..30250c8d02f0
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_5.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it | FileCheck %s
+; CHECK: it	ne
+; CHECK-NEXT: cmpne
+; CHECK-NEXT: beq
+; CHECK: cmp
+; CHECK-NEXT: beq
+; CHECK-NEXT: %if.else163
+; CHECK-NEXT: mov.w
+; CHECK-NEXT: b
+; CHECK-NEXT: %if.else145
+; CHECK-NEXT: mov.w
+
+%struct.hc = type { i32, i32, i32, i32 }
+
+define i32 @t(i32 %type) optsize {
+entry:
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  unreachable
+
+if.else:
+  br i1 undef, label %if.then15, label %if.else18
+
+if.then15:
+  unreachable
+
+if.else18:
+  switch i32 %type, label %if.else173 [
+    i32 3, label %if.then115
+    i32 1, label %if.then102
+  ]
+
+if.then102:
+  br i1 undef, label %cond.true10.i, label %t.exit
+
+cond.true10.i:
+  br label %t.exit
+
+t.exit:
+  unreachable
+
+if.then115:
+  br i1 undef, label %if.else163, label %if.else145
+
+if.else145:
+  %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize
+  br label %while.body172
+
+if.else163:
+  %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize
+  br label %while.body172
+
+while.body172:
+  br label %while.body172
+
+if.else173:
+  ret i32 -1
+}
+
+declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize
+
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 0afddd8f876f..69266dc4e44b 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,7 +1,10 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
-; RUN:   grep asm-printer | grep 14
+; RUN:   grep asm-printer | grep 16
 ;
+; It's possible to schedule this in 14 instructions by avoiding
+; callee-save registers, but the scheduler isn't currently that
+; conervative with registers.
 @size20 = external global i32		; <i32*> [#uses=1]
 @in5 = external global i8*		; <i8**> [#uses=1]
 
@@ -21,4 +24,3 @@ define i32 @compare(i8* %a, i8* %b) nounwind {
 }
 
 declare i32 @memcmp(i8*, i8*, i32)
-
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 6912351d7b7e..3d0946698500 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \
 ; RUN:     grep "asm-printer" | grep 35
 
 target datalayout = "e-p:32:32"
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index a58c9b102d13..d7af1c3fdc71 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -4,7 +4,7 @@ target triple = "i686-pc-linux-gnu"
 @str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]
 
 define i32 @main() {
-; CHECK: main:
+; CHECK-LABEL: main:
 ; CHECK-NOT: ret
 ; CHECK: subl $4, %{{.*}}
 ; CHECK: ret
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 24aa5b98d0bb..4ec703921e29 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -13,10 +13,10 @@ define float @foo(float %x) nounwind {
 
 ; CHECK: mulss
 ; CHECK: mulss
-; CHECK: addss
 ; CHECK: mulss
-; CHECK: addss
 ; CHECK: mulss
 ; CHECK: addss
+; CHECK: addss
+; CHECK: addss
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index b0eb1c5441bf..cea4d9d272fc 100644
--- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -2,7 +2,7 @@
 
 ;; This example can't fold the or into an LEA.
 define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: ret
 ; CHECK: orl $1, %{{.*}}
 ; CHECK: ret
@@ -18,7 +18,7 @@ define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
 
 ;; This can!
 define i32 @test2(i32 %a, i32 %b) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK-NOT: ret
 ; CHECK: leal 3(,%{{.*}},8)
 ; CHECK: ret
diff --git a/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll b/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
index b48ce845f902..cbc1bc47fb15 100644
--- a/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
+++ b/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @test(i1 %X) {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: ret
 ; CHECK: movl $1, %eax
 ; CHECK: ret
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
index e2cd750e2cac..3e1786bef793 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
 define void @test() {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: ret
 ; CHECK: psrlw $8, %xmm0
 ; CHECK: ret
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
deleted file mode 100644
index c3d7e8a05472..000000000000
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep "addl .12, %esp"
-; PR1398
-
-	%struct.S = type { i32, i32 }
-
-declare void @invokee(%struct.S* sret )
-
-define void @invoker(%struct.S* %name.0.0) {
-entry:
-	invoke void @invokee( %struct.S* sret %name.0.0   )
-			to label %return unwind label %return
-
-return:		; preds = %entry, %entry
-        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-                 cleanup
-	ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index c5d2a46f92c2..638d399056a2 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,4 +1,13 @@
-; RUN: llc < %s -march=x86 | grep weak | count 2
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s
+
 @__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
 
-declare extern_weak i32 @pthread_once(i32*, void ()*)
+define weak i32 @pthread_once(i32*, void ()*) {
+  ret i32 0
+}
+
+; CHECK: .weak   pthread_once
+; CHECK: pthread_once:
+
+; CHECK: .weak   __gthrw_pthread_once
+; CHECK: __gthrw_pthread_once = pthread_once
diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
index f7ffb9337ef8..88057c86fd9d 100644
--- a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -7,7 +7,7 @@ entry:
 	%tmp2 = call x86_fp80 @llvm.sqrt.f80( x86_fp80 %x )
 	ret x86_fp80 %tmp2
         
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: fldt 4(%esp)
 ; CHECK-NEXT: fsqrt
 ; CHECK-NEXT: ret
@@ -19,7 +19,7 @@ define x86_fp80 @bar(x86_fp80 %x) nounwind {
 entry:
 	%tmp2 = call x86_fp80 @llvm.powi.f80( x86_fp80 %x, i32 3 )
 	ret x86_fp80 %tmp2
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: fldt 4(%esp)
 ; CHECK-NEXT: fld	%st(0)
 ; CHECK-NEXT: fmul	%st(1)
diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index 8091bd1bc1ca..d3a47aefb7d0 100644
--- a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -7,8 +7,10 @@ entry:
 cond_next127:		; preds = %cond_next391, %entry
 	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=1]
 	%tmp149 = mul i32 0, %v.1		; <i32> [#uses=0]
-	%tmp254 = and i32 0, 15		; <i32> [#uses=1]
-	%tmp256 = and i32 0, 15		; <i32> [#uses=2]
+	%tmpss = load i32* %ss, align 4		; <i32> [#uses=1]
+	%tmpbp = load i32* %bp, align 4		; <i32> [#uses=2]
+	%tmp254 = and i32 %tmpss, 15		; <i32> [#uses=1]
+	%tmp256 = and i32 %tmpbp, 15		; <i32> [#uses=2]
 	br label %cond_next391
 
 cond_next391:		; preds = %cond_next127
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index 7a3d72dd4b07..1ec9c70d570c 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep sarl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=corei7 | grep sarl | not grep esp
 
 define signext   i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) {
 entry:
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 39af9319c8d1..9b9b781cfa2e 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -19,7 +19,7 @@ bb917:		; preds = %entry
 	ret i32 0
 
 bb951:		; preds = %bb986, %entry
-	%tmp955 = sdiv i32 0, 2		; <i32> [#uses=3]
+	%tmp955 = sdiv i32 %offset, 2		; <i32> [#uses=3]
 	%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0		; <i32*> [#uses=1]
 	br i1 %cond, label %bb986, label %bb967
 
diff --git a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
deleted file mode 100644
index b772d77f6405..000000000000
--- a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep andpd | not grep esp
-
-declare double @llvm.sqrt.f64(double) nounwind readnone 
-
-declare fastcc void @ApplyGivens(double**, double, double, i32, i32, i32, i32) nounwind 
-
-declare double @fabs(double)
-
-define void @main_bb114_2E_outer_2E_i_bb3_2E_i27(double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i) {
-newFuncRoot:
-	br label %bb3.i27
-
-bb111.i77.bb121.i_crit_edge.exitStub:		; preds = %bb111.i77
-	ret void
-
-bb3.i27:		; preds = %bb111.i77.bb3.i27_crit_edge, %newFuncRoot
-	%indvar94.i = phi i32 [ 0, %newFuncRoot ], [ %tmp113.i76, %bb111.i77.bb3.i27_crit_edge ]		; <i32> [#uses=6]
-	%tmp6.i20 = getelementptr [51 x double*]* %tmp12.i.i, i32 0, i32 %indvar94.i		; <double**> [#uses=1]
-	%tmp7.i21 = load double** %tmp6.i20, align 4		; <double*> [#uses=2]
-	%tmp10.i = add i32 %indvar94.i, %i.0.reg2mem.0.ph.i		; <i32> [#uses=5]
-	%tmp11.i22 = getelementptr double* %tmp7.i21, i32 %tmp10.i		; <double*> [#uses=1]
-	%tmp12.i23 = load double* %tmp11.i22, align 8		; <double> [#uses=4]
-	%tmp20.i24 = add i32 %tmp19.i, %indvar94.i		; <i32> [#uses=3]
-	%tmp21.i = getelementptr double* %tmp7.i21, i32 %tmp20.i24		; <double*> [#uses=1]
-	%tmp22.i25 = load double* %tmp21.i, align 8		; <double> [#uses=3]
-	%tmp1.i.i26 = fcmp oeq double %tmp12.i23, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %tmp1.i.i26, label %bb3.i27.Givens.exit.i49_crit_edge, label %bb5.i.i31
-
-bb5.i.i31:		; preds = %bb3.i27
-	%tmp7.i.i28 = call double @fabs( double %tmp12.i23 ) nounwind 		; <double> [#uses=1]
-	%tmp9.i.i29 = call double @fabs( double %tmp22.i25 ) nounwind 		; <double> [#uses=1]
-	%tmp10.i.i30 = fcmp ogt double %tmp7.i.i28, %tmp9.i.i29		; <i1> [#uses=1]
-	br i1 %tmp10.i.i30, label %bb13.i.i37, label %bb30.i.i43
-
-bb13.i.i37:		; preds = %bb5.i.i31
-	%tmp15.i.i32 = fsub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
-	%tmp17.i.i33 = fdiv double %tmp15.i.i32, %tmp12.i23		; <double> [#uses=3]
-	%tmp20.i4.i = fmul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
-	%tmp21.i.i34 = fadd double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
-	%tmp22.i.i35 = call double @llvm.sqrt.f64( double %tmp21.i.i34 ) nounwind 		; <double> [#uses=1]
-	%tmp23.i5.i = fdiv double 1.000000e+00, %tmp22.i.i35		; <double> [#uses=2]
-	%tmp28.i.i36 = fmul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
-	br label %Givens.exit.i49
-
-bb30.i.i43:		; preds = %bb5.i.i31
-	%tmp32.i.i38 = fsub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
-	%tmp34.i.i39 = fdiv double %tmp32.i.i38, %tmp22.i25		; <double> [#uses=3]
-	%tmp37.i6.i = fmul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
-	%tmp38.i.i40 = fadd double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
-	%tmp39.i7.i = call double @llvm.sqrt.f64( double %tmp38.i.i40 ) nounwind 		; <double> [#uses=1]
-	%tmp40.i.i41 = fdiv double 1.000000e+00, %tmp39.i7.i		; <double> [#uses=2]
-	%tmp45.i.i42 = fmul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
-	br label %Givens.exit.i49
-
-Givens.exit.i49:		; preds = %bb3.i27.Givens.exit.i49_crit_edge, %bb30.i.i43, %bb13.i.i37
-	%s.0.i44 = phi double [ %tmp45.i.i42, %bb30.i.i43 ], [ %tmp23.i5.i, %bb13.i.i37 ], [ 0.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
-	%c.0.i45 = phi double [ %tmp40.i.i41, %bb30.i.i43 ], [ %tmp28.i.i36, %bb13.i.i37 ], [ 1.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
-	%tmp26.i46 = add i32 %tmp24.i, %indvar94.i		; <i32> [#uses=2]
-	%tmp27.i47 = icmp slt i32 %tmp26.i46, 51		; <i1> [#uses=1]
-	%min.i48 = select i1 %tmp27.i47, i32 %tmp26.i46, i32 50		; <i32> [#uses=1]
-	call fastcc void @ApplyGivens( double** %tmp12.sub.i.i, double %s.0.i44, double %c.0.i45, i32 %tmp20.i24, i32 %tmp10.i, i32 %indvar94.i, i32 %min.i48 ) nounwind 
-	br label %codeRepl
-
-codeRepl:		; preds = %Givens.exit.i49
-	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i( i32 %tmp10.i, i32 %tmp20.i24, double %s.0.i44, double %c.0.i45, [51 x double*]* %tmp12.i.i.i )
-	br label %ApplyRGivens.exit49.i
-
-ApplyRGivens.exit49.i:		; preds = %codeRepl
-	%tmp10986.i = icmp sgt i32 %tmp11688.i, %tmp10.i		; <i1> [#uses=1]
-	br i1 %tmp10986.i, label %ApplyRGivens.exit49.i.bb52.i57_crit_edge, label %ApplyRGivens.exit49.i.bb111.i77_crit_edge
-
-codeRepl1:		; preds = %ApplyRGivens.exit49.i.bb52.i57_crit_edge
-	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57( i32 %tmp10.i, double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i )
-	br label %bb105.i.bb111.i77_crit_edge
-
-bb111.i77:		; preds = %bb105.i.bb111.i77_crit_edge, %ApplyRGivens.exit49.i.bb111.i77_crit_edge
-	%tmp113.i76 = add i32 %indvar94.i, 1		; <i32> [#uses=2]
-	%tmp118.i = icmp sgt i32 %tmp11688.i, %tmp113.i76		; <i1> [#uses=1]
-	br i1 %tmp118.i, label %bb111.i77.bb3.i27_crit_edge, label %bb111.i77.bb121.i_crit_edge.exitStub
-
-bb3.i27.Givens.exit.i49_crit_edge:		; preds = %bb3.i27
-	br label %Givens.exit.i49
-
-ApplyRGivens.exit49.i.bb52.i57_crit_edge:		; preds = %ApplyRGivens.exit49.i
-	br label %codeRepl1
-
-ApplyRGivens.exit49.i.bb111.i77_crit_edge:		; preds = %ApplyRGivens.exit49.i
-	br label %bb111.i77
-
-bb105.i.bb111.i77_crit_edge:		; preds = %codeRepl1
-	br label %bb111.i77
-
-bb111.i77.bb3.i27_crit_edge:		; preds = %bb111.i77
-	br label %bb3.i27
-}
-
-declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i(i32, i32, double, double, [51 x double*]*)
-
-declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57(i32, double**, [51 x double*]*, i32, i32, i32, i32, [51 x double*]*)
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index 8946415108f4..18b3714f851f 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -45,4 +45,6 @@ bb383:		; preds = %bb374.us, %bb311.split
 	ret i64 0
 }
 
-declare i64 @__wcstoll_l(i32*, i32**, i32, %struct.__locale_struct*) nounwind 
+define i64 @__wcstoll_l(i32*, i32**, i32, %struct.__locale_struct*) nounwind {
+  ret i64 0
+}
diff --git a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
index 4eaca17c8861..86bce8e977ac 100644
--- a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mattr=+sse41
+; RUN: llc < %s -mattr=+sse4.1
 ; rdar://5886601
 ; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
index 6e9a6298436d..d4805b4bb63e 100644
--- a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
+++ b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin8"
 
 define void @test() nounwind  {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: ret
 ; CHECK: 1 $2 3
 ; CHECK: ret
diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index fc38135032c2..da56ce7ab583 100644
--- a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -8,7 +8,7 @@ entry:
         ret void
 }
 
-; CHECK: a:
+; CHECK-LABEL: a:
 ; CHECK: movups
 ; CHECK: movups
 ; CHECK-NOT: movups
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index 368af6d8abde..cd86ee188949 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movups | count 33
-; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movups | count 33
+; RUN: llc < %s -mtriple=i686-pc-linux -mattr=sse2 | FileCheck %s
 ; PR2539
 ; PR8969 - make 32-bit linux have a 16-byte aligned stack
 ; Verify that movups is still generated with an aligned stack for the globals
@@ -40,7 +39,42 @@ external global <4 x float>, align 1		; <<4 x float>*>:31 [#uses=1]
 
 declare void @abort()
 
-define void @""() {
+define void @test1() {
+; CHECK: test1
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK-NOT: movups
 	load <4 x float>* @0, align 1		; <<4 x float>>:1 [#uses=2]
 	load <4 x float>* @1, align 1		; <<4 x float>>:2 [#uses=3]
 	load <4 x float>* @2, align 1		; <<4 x float>>:3 [#uses=4]
@@ -637,3 +671,636 @@ define void @""() {
 	store <4 x float> %593, <4 x float>* @0, align 1
 	ret void
 }
+
+define void @test2() "no-realign-stack" {
+; CHECK: test2
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK: movups
+; CHECK-NOT: movups
+	load <4 x float>* @0, align 1
+	load <4 x float>* @1, align 1
+	load <4 x float>* @2, align 1
+	load <4 x float>* @3, align 1
+	load <4 x float>* @4, align 1
+	load <4 x float>* @5, align 1
+	load <4 x float>* @6, align 1
+	load <4 x float>* @7, align 1
+	load <4 x float>* @8, align 1
+	load <4 x float>* @9, align 1
+	load <4 x float>* @10, align 1
+	load <4 x float>* @11, align 1
+	load <4 x float>* @12, align 1
+	load <4 x float>* @13, align 1
+	load <4 x float>* @14, align 1
+	load <4 x float>* @15, align 1
+	load <4 x float>* @16, align 1
+	load <4 x float>* @17, align 1
+	load <4 x float>* @18, align 1
+	load <4 x float>* @19, align 1
+	load <4 x float>* @20, align 1
+	load <4 x float>* @21, align 1
+	load <4 x float>* @22, align 1
+	load <4 x float>* @23, align 1
+	load <4 x float>* @24, align 1
+	load <4 x float>* @25, align 1
+	load <4 x float>* @26, align 1
+	load <4 x float>* @27, align 1
+	load <4 x float>* @28, align 1
+	load <4 x float>* @29, align 1
+	load <4 x float>* @30, align 1
+	load <4 x float>* @31, align 1
+	fmul <4 x float> %1, %1
+	fmul <4 x float> %33, %2
+	fmul <4 x float> %34, %3
+	fmul <4 x float> %35, %4
+	fmul <4 x float> %36, %5
+	fmul <4 x float> %37, %6
+	fmul <4 x float> %38, %7
+	fmul <4 x float> %39, %8
+	fmul <4 x float> %40, %9
+	fmul <4 x float> %41, %10
+	fmul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
+	fmul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
+	fmul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
+	fmul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
+	fmul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
+	fmul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
+	fmul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
+	fmul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
+	fmul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
+	fmul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
+	fmul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
+	fmul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
+	fmul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
+	fmul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
+	fmul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
+	fmul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
+	fmul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
+	fmul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
+	fmul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
+	fmul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
+	fmul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
+	fmul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
+	fmul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
+	fmul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
+	fmul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
+	fmul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
+	fmul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
+	fmul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
+	fmul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
+	fmul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
+	fmul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
+	fmul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
+	fmul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
+	fmul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
+	fmul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
+	fmul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
+	fmul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
+	fmul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
+	fmul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
+	fmul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
+	fmul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
+	fmul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
+	fmul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
+	fmul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
+	fmul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
+	fmul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
+	fmul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
+	fmul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
+	fmul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
+	fmul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
+	fmul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
+	fmul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
+	fmul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
+	fmul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
+	fmul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
+	fmul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
+	fmul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
+	fmul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
+	fmul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
+	fmul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
+	fmul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
+	fmul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
+	fmul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
+	fmul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
+	fmul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
+	fmul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
+	fmul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
+	fmul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
+	fmul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
+	fmul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
+	fmul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
+	fmul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
+	fmul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
+	fmul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
+	fmul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
+	fmul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
+	fmul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
+	fmul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
+	fmul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
+	fmul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
+	fmul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
+	fmul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
+	fmul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
+	fmul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
+	fmul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
+	fmul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
+	fmul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
+	fmul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
+	fmul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
+	fmul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
+	fmul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
+	fmul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
+	fmul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
+	fmul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
+	fmul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
+	fmul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
+	fmul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
+	fmul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
+	fmul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
+	fmul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
+	fmul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
+	fmul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
+	fmul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
+	fmul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
+	fmul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
+	fmul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
+	fmul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
+	fmul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
+	fmul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
+	fmul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
+	fmul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
+	fmul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
+	fmul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
+	fmul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
+	fmul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
+	fmul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
+	fmul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
+	fmul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
+	fmul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
+	fmul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
+	fmul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
+	fmul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
+	fmul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
+	fmul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
+	fmul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
+	fmul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
+	fmul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
+	fmul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
+	fmul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
+	fmul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
+	fmul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
+	fmul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
+	fmul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
+	fmul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
+	fmul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
+	fmul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
+	fmul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
+	fmul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
+	fmul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
+	fmul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
+	fmul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
+	fmul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
+	fmul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
+	fmul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
+	fmul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
+	fmul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
+	fmul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
+	fmul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
+	fmul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
+	fmul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
+	fmul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
+	fmul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
+	fmul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
+	fmul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
+	fmul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
+	fmul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
+	fmul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
+	fmul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
+	fmul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
+	fmul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
+	fmul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
+	fmul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
+	fmul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
+	fmul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
+	fmul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
+	fmul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
+	fmul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
+	fmul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
+	fmul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
+	fmul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
+	fmul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
+	fmul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
+	fmul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
+	fmul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
+	fmul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
+	fmul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
+	fmul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
+	fmul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
+	fmul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
+	fmul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
+	fmul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
+	fmul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
+	fmul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
+	fmul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
+	fmul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
+	fmul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
+	fmul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
+	fmul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
+	fmul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
+	fmul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
+	fmul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
+	fmul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
+	fmul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
+	fmul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
+	fmul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
+	fmul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
+	fmul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
+	fmul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
+	fmul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
+	fmul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
+	fmul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
+	fmul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
+	fmul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
+	fmul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
+	fmul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
+	fmul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
+	fmul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
+	fmul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
+	fmul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
+	fmul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
+	fmul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
+	fmul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
+	fmul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
+	fmul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
+	fmul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
+	fmul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
+	fmul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
+	fmul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
+	fmul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
+	fmul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
+	fmul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
+	fmul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
+	fmul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
+	fmul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
+	fmul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
+	fmul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
+	fmul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
+	fmul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
+	fmul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
+	fmul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
+	fmul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
+	fmul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
+	fmul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
+	fmul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
+	fmul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
+	fmul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
+	fmul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
+	fmul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
+	fmul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
+	fmul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
+	fmul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
+	fmul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
+	fmul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
+	fmul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
+	fmul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
+	fmul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
+	fmul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
+	fmul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
+	fmul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
+	fmul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
+	fmul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
+	fmul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
+	fmul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
+	fmul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
+	fmul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
+	fmul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
+	fmul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
+	fmul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
+	fmul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
+	fmul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
+	fmul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
+	fmul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
+	fmul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
+	fmul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
+	fmul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
+	fmul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
+	fmul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
+	fmul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
+	fmul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
+	fmul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
+	fmul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
+	fmul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
+	fmul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
+	fmul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
+	fmul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
+	fmul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
+	fmul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
+	fmul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
+	fmul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
+	fmul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
+	fmul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
+	fmul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
+	fmul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
+	fmul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
+	fmul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
+	fmul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
+	fmul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
+	fmul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
+	fmul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
+	fmul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
+	fmul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
+	fmul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
+	fmul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
+	fmul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
+	fmul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
+	fmul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
+	fmul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
+	fmul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
+	fmul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
+	fmul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
+	fmul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
+	fmul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
+	fmul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
+	fmul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
+	fmul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
+	fmul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
+	fmul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
+	fmul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
+	fmul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
+	fmul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
+	fmul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
+	fmul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
+	fmul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
+	fmul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
+	fmul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
+	fmul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
+	fmul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
+	fmul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
+	fmul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
+	fmul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
+	fmul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
+	fmul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
+	fmul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
+	fmul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
+	fmul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
+	fmul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
+	fmul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
+	fmul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
+	fmul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
+	fmul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
+	fmul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
+	fmul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
+	fmul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
+	fmul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
+	fmul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
+	fmul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
+	fmul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
+	fmul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
+	fmul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
+	fmul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
+	fmul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
+	fmul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
+	fmul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
+	fmul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
+	fmul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
+	fmul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
+	fmul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
+	fmul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
+	fmul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
+	fmul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
+	fmul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
+	fmul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
+	fmul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
+	fmul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
+	fmul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
+	fmul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
+	fmul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
+	fmul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
+	fmul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
+	fmul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
+	fmul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
+	fmul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
+	fmul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
+	fmul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
+	fmul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
+	fmul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
+	fmul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
+	fmul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
+	fmul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
+	fmul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
+	fmul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
+	fmul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
+	fmul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
+	fmul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
+	fmul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
+	fmul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
+	fmul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
+	fmul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
+	fmul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
+	fmul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
+	fmul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
+	fmul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
+	fmul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
+	fmul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
+	fmul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
+	fmul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
+	fmul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
+	fmul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
+	fmul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
+	fmul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
+	fmul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
+	fmul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
+	fmul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
+	fmul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
+	fmul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
+	fmul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
+	fmul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
+	fmul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
+	fmul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
+	fmul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
+	fmul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
+	fmul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
+	fmul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
+	fmul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
+	fmul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
+	fmul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
+	fmul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
+	fmul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
+	fmul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
+	fmul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
+	fmul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
+	fmul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
+	fmul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
+	fmul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
+	fmul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
+	fmul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
+	fmul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
+	fmul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
+	fmul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
+	fmul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
+	fmul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
+	fmul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
+	fmul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
+	fmul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
+	fmul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
+	fmul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
+	fmul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
+	fmul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
+	fmul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
+	fmul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
+	fmul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
+	fmul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
+	fmul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
+	fmul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
+	fmul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
+	fmul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
+	fmul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
+	fmul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
+	fmul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
+	fmul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
+	fmul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
+	fmul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
+	fmul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
+	fmul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
+	fmul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
+	fmul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
+	fmul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
+	fmul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
+	fmul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
+	fmul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
+	fmul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
+	fmul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
+	fmul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
+	fmul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
+	fmul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
+	fmul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
+	fmul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
+	fmul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
+	fmul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
+	fmul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
+	fmul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
+	fmul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
+	fmul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
+	fmul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
+	fmul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
+	fmul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
+	fmul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
+	fmul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
+	fmul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
+	fmul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
+	fmul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
+	fmul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
+	fmul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
+	fmul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
+	fmul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
+	fmul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
+	fmul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
+	fmul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
+	fmul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
+	fmul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
+	fmul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
+	fmul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
+	fmul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
+	fmul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
+	fmul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
+	fmul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
+	fmul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
+	fmul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
+	fmul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
+	fmul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
+	fmul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
+	fmul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
+	fmul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
+	fmul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
+	fmul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
+	fmul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
+	fmul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
+	fmul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
+	fmul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
+	fmul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
+	fmul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
+	fmul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
+	fmul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
+	fmul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
+	fmul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
+	fmul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
+	fmul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
+	fmul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
+	fmul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
+	fmul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
+	fmul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
+	fmul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
+	fmul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
+	fmul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
+	fmul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
+	fmul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
+	fmul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
+	fmul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
+	fadd <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
+	fadd <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
+	fadd <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
+	fadd <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
+	fadd <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
+	fadd <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
+	fadd <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
+	fadd <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
+	fadd <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
+	fadd <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
+	fadd <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
+	fadd <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
+	fadd <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
+	fadd <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
+	fadd <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
+	fadd <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
+	fadd <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
+	fadd <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
+	fadd <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
+	fadd <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
+	fadd <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
+	fadd <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
+	fadd <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
+	fadd <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
+	fadd <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
+	fadd <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
+	fadd <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
+	fadd <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
+	fadd <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
+	fadd <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
+	fadd <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
+	fadd <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
+	fadd <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
+	store <4 x float> %593, <4 x float>* @0, align 1
+	ret void
+}
diff --git a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
index 360ec73bc436..9324d5dfa3bb 100644
--- a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
+++ b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
@@ -4,7 +4,7 @@
 
 define i32 @main() nounwind {
 entry:
-; CHECK: main:
+; CHECK-LABEL: main:
 ; CHECK: lock
 ; CHECK: decq
 	atomicrmw sub i64* @var, i64 1 monotonic
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
index c63c890add50..7cf9cb007708 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -1,5 +1,5 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+; RUN: llc -mcpu=corei7 < %s -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i386-pc-linux"
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 534f99033372..59d1c7f77abf 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86
+; RUN: llc -pre-RA-sched=source < %s -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix=SOURCE-SCHED
 ; PR2748
 
 @g_73 = external global i32		; <i32*> [#uses=1]
@@ -6,6 +7,17 @@
 
 define i32 @func_44(i16 signext %p_46) nounwind {
 entry:
+; SOURCE-SCHED: subl
+; SOURCE-SCHED: movl
+; SOURCE-SCHED: sarl
+; SOURCE-SCHED: cmpl
+; SOURCE-SCHED: setg
+; SOURCE-SCHED: movzbl
+; SOURCE-SCHED: movb
+; SOURCE-SCHED: xorl
+; SOURCE-SCHED: subl
+; SOURCE-SCHED: testb
+; SOURCE-SCHED: jne
 	%0 = load i32* @g_5, align 4		; <i32> [#uses=1]
 	%1 = ashr i32 %0, 1		; <i32> [#uses=1]
 	%2 = icmp sgt i32 %1, 1		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 890fd0f067cf..296f0ca135b8 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -67,19 +67,21 @@ declare i64 @strlen(i8*) nounwind readonly
 declare void @llvm.stackrestore(i8*) nounwind
 
 !0 = metadata !{i32 459009, metadata !1, metadata !"s1", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"vla.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, metadata !17, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !18, metadata !18, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, null, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !6}
-!5 = metadata !{i32 458788, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!5 = metadata !{i32 458788, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458767, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 2, i32 0, metadata !1, null}
 !8 = metadata !{i32 459008, metadata !1, metadata !"str.0", metadata !2, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
+!9 = metadata !{i32 458767, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 458753, null, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 458785, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !13 = metadata !{i32 3, i32 0, metadata !14, null}
-!14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 458763, metadata !17, metadata !1, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 4, i32 0, metadata !14, null}
 !16 = metadata !{i32 5, i32 0, metadata !14, null}
+!17 = metadata !{metadata !"vla.c", metadata !"/tmp/"}
+!18 = metadata !{i32 0}
diff --git a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
index d64c96658014..75496518afa6 100644
--- a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
+++ b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin9.6"
 
 define void @f() nounwind {
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK-NOT: ret
 ; CHECK: foo $-81920
 ; CHECK-NOT: ret
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 68a9fafb6de8..764c2cdd6d99 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
 
@@ -15,11 +15,11 @@ entry:
 
 bb4:		; preds = %bb.i, %bb26, %bb4, %entry
 ; CHECK: %bb4
-; CHECK: xorb
+; CHECK: xorl
 ; CHECK: callq
-; CHECK: movq
 ; CHECK: xorl
-; CHECK: xorb
+; CHECK: xorl
+; CHECK: movq
 
 	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
 	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 351a1722a231..bbc1d341d4e0 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -relocation-model=static -o /dev/null -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep "%rsp" %t
 ; RUN: not grep "%rbp" %t
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 5cb05e8a796f..e1930e012dd8 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
+; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse4.1,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 3cd54169745d..7c87598d0d9c 100644
--- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @foo(i64 %b) nounwind readnone {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: shlq $56, %rdi
 ; CHECK: sarq $48, %rdi
 ; CHECK: leaq 1(%rdi), %rax
diff --git a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
index 80b883582ce5..0268d817c70d 100644
--- a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
+++ b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
 
 define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
-; CHECK: dot:
+; CHECK-LABEL: dot:
 ; CHECK: decl %
 ; CHECK-NEXT: jne
 entry:
diff --git a/test/CodeGen/X86/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll
index e41038d3fdcf..a936edc120d8 100644
--- a/test/CodeGen/X86/2009-10-16-Scope.ll
+++ b/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -23,10 +23,12 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare i32 @foo(i32) ssp
 
 !0 = metadata !{i32 5, i32 2, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !2, i32 1, i32 1}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 4, null, i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"genmodes.i", metadata !"/Users/yash/Downloads", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!1 = metadata !{i32 458763, null, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, metadata !8, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, null, metadata !9, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
 !4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
-!5 = metadata !{i32 458763, metadata !1, i32 1, i32 1}; [DW_TAG_lexical_block ]
-!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!5 = metadata !{i32 458763, null, metadata !1, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
+!6 = metadata !{i32 458788, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
 !7 = metadata !{i32 6, i32 1, metadata !2, null}
+!8 = metadata !{metadata !"genmodes.i", metadata !"/Users/yash/Downloads"}
+!9 = metadata !{i32 0}
diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
index 2ac688fd80af..fedb2a51f357 100644
--- a/test/CodeGen/X86/2009-11-16-MachineLICM.ll
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -5,7 +5,7 @@
 
 define void @foo(i32 %n, float* nocapture %x) nounwind ssp {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
   %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
   br i1 %0, label %bb.nph, label %return
 
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index c2d9d84d4c5a..08a99e3f6618 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -5,7 +5,7 @@
 
 define void @t(i32 %count) ssp nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movups L_str+12(%rip), %xmm0
 ; CHECK: movups L_str(%rip), %xmm1
   %tmp0 = alloca [60 x i8], align 1
diff --git a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
index 07003234a993..b1664470551b 100644
--- a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
@@ -3,7 +3,7 @@
 
 define void @t() nounwind ssp {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movl %ecx, %eax
 ; CHECK: %eax = foo (%eax, %ecx)
   %b = alloca i32                                 ; <i32*> [#uses=2]
@@ -21,7 +21,7 @@ return:                                           ; preds = %entry
 
 define void @t2() nounwind ssp {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: movl
 ; CHECK: [[D2:%e.x]] = foo
 ; CHECK: ([[D2]],
diff --git a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
index 823e0ca465ef..65b70a7d4936 100644
--- a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
+++ b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @_dm_offset_addr_mask = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
 
 define void @leaf() nounwind {
-; CHECK: leaf:
+; CHECK-LABEL: leaf:
 ; CHECK-NOT: -8(%rsp)
 ; CHECK: leaq link_ptr@TLSGD
 ; CHECK: callq __tls_get_addr@PLT
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
index 3d058bc28965..f9bf3109ea10 100644
--- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -6,7 +6,7 @@
 
 define void @t(i64* nocapture %p) nounwind ssp {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movl ([[REG:%[a-z]+]]), %eax
 ; CHECK: movl 4([[REG]]), %edx
 ; CHECK: LBB0_1:
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index 7dba332b1bec..f99e68242811 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -29,24 +29,27 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !19, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7}
 !6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !14}
 !9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
 !11 = metadata !{metadata !12, metadata !13}
 !12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
 !14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
 !15 = metadata !{i32 11, i32 0, metadata !1, null}
 !16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !19, metadata !1, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{metadata !1}
 !19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
+!20 = metadata !{i32 0}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index 2113263c0ac3..4d4e8c197d87 100644
--- a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -16,18 +16,20 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!0 = metadata !{i32 458769, metadata !15, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !16, metadata !16, null, null, null, i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !15, metadata !0, metadata !"", i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !15, metadata !0, metadata !"C", i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 192, align 64, offset 0] [def] [from ]
 !3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!4 = metadata !{i32 458765, metadata !15, metadata !2, metadata !"x", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !15, metadata !0, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !15, metadata !2, metadata !"y", i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !15, metadata !2, metadata !"z", i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
 !8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 458763, null, metadata !10, i32 0, i32 0, i32 0}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !15, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 458788, metadata !15, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!15 = metadata !{metadata !"sm.c", metadata !""}
+!16 = metadata !{i32 0}
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index d4a74c9e7e7a..060c535dd778 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -1,9 +1,9 @@
-; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt -enable-misched=false < %s | FileCheck %s
 ; Check that lowered argumens do not overwrite the return address before it is moved.
 ; Bug 6225
 ;
 ; If a call is a fastcc tail call and tail call optimization is enabled, the
-; caller frame is replaced by the callee frame. This can require that arguments are 
+; caller frame is replaced by the callee frame. This can require that arguments are
 ; placed on the former return address stack slot. Special care needs to be taken
 ; taken that the return address is moved / or stored in a register before
 ; lowering of arguments potentially overwrites the value.
@@ -51,5 +51,3 @@ false:
   tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
   ret void
 }
-
-
diff --git a/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
index 6a58e9e55182..a8c87fa2074a 100644
--- a/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
+++ b/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
@@ -2,7 +2,7 @@
 
 define i32* @t() nounwind optsize ssp {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: testl %eax, %eax
 ; CHECK: js
   %cmp = icmp slt i32 undef, 0                    ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
index 9a5958e62a0e..5e86ecf42b11 100644
--- a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
+++ b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
@@ -11,7 +11,7 @@
 
 define void @t(%struct.F* %this) nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: addq $12, %rsi
   %BitValueArray = alloca [32 x i32], align 4
   %tmp2 = getelementptr inbounds %struct.F* %this, i64 0, i32 0
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 7650a5c3be88..7faee993a7d1 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -199,12 +199,13 @@ declare float @copysignf(float, float) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!48}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !45, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !45, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !47, metadata !47, metadata !44, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
 !6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
 !7 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
@@ -215,7 +216,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !12 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 786689, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !14 = metadata !{i32 786688, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !2, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 786443, metadata !45, metadata !1, i32 1922, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 786688, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !17 = metadata !{i32 786688, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !18 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
@@ -247,3 +248,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !44 = metadata !{metadata !1}
 !45 = metadata !{metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
 !46 = metadata !{metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
+!47 = metadata !{i32 0}
+!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 6510ff17f7bb..c5736eb9b449 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -22,32 +22,33 @@ declare void @foo(i32) nounwind optsize noinline ssp
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!38}
 
-!0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
+!0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null, null} ; [ DW_TAG_variable ]
 !1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31,  metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !37, metadata !37, metadata !32, metadata !31,  metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786468, metadata !36, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
-!5 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !3}
 !8 = metadata !{i32 786689, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786451, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786451, metadata !36, metadata !1, metadata !"a", i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 2, size 128, align 64, offset 0] [def] [from ]
 !15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786445, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
-!17 = metadata !{i32 786445, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
+!16 = metadata !{i32 786445, metadata !36, metadata !14, metadata !"c", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
+!17 = metadata !{i32 786445, metadata !36, metadata !14, metadata !"d", i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
 !18 = metadata !{i32 786689, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !21 = metadata !{metadata !3, metadata !3, metadata !22}
-!22 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
-!24 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!22 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 786468, metadata !36, metadata !1, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !25 = metadata !{i32 786689, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22, i32 0, null} ; [ DW_TAG_arg_variable ]
 !26 = metadata !{i32 786688, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14, i32 0, null} ; [ DW_TAG_auto_variable ]
 !27 = metadata !{i32 786443, metadata !36, metadata !19, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
@@ -60,6 +61,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !34 = metadata !{metadata !8}
 !35 = metadata !{metadata !18, metadata !25, metadata !26}
 !36 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
+!37 = metadata !{i32 0}
 
 ; The variable bar:myvar changes registers after the first movq.
 ; It is cobbered by popq %rbx
@@ -85,3 +87,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   83
 ; CHECK-NEXT: Ltmp{{.*}}:
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index ee00dbacbf60..1114c8dc87bb 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -1,5 +1,5 @@
-; RUN: llc  -mtriple=x86_64-apple-darwin < %s | FileCheck %s
-; RUN: llc  -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
 ; Test to check separate label for inlined function argument.
 
 define i32 @foo(i32 %y) nounwind optsize ssp {
@@ -23,27 +23,30 @@ entry:
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6}
-!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ]
 !9 = metadata !{i32 3, i32 0, metadata !10, null}
-!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !18, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 1}
 !12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
 !13 = metadata !{i32 7, i32 0, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !2, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !18, metadata !8, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{metadata !0}
 !16 = metadata !{metadata !7}
 !17 = metadata !{metadata !1, metadata !8}
 !18 = metadata !{metadata !"f.c", metadata !"/tmp"}
+!19 = metadata !{i32 0}
 
 ;CHECK: DEBUG_VALUE: bar:x <- E
 ;CHECK: Ltmp
-;CHECK:	DEBUG_VALUE: foo:y <- 1+0
+;CHECK:	DEBUG_VALUE: foo:y <- 1{{$}}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index b764b0b34597..b45ac226a650 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-darwin10.2"
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.foo*, i32)* @_ZN3foo3bazEi to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
 
 define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
-;CHECK: DEBUG_VALUE: baz:this <- RDI+0
+;CHECK: DEBUG_VALUE: baz:this <- RDI{{$}}
 entry:
   tail call void @llvm.dbg.value(metadata !{%struct.foo* %this}, i64 0, metadata !15)
   tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !16)
@@ -19,37 +19,42 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!34}
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786451, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
+!1 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786451, metadata !31, metadata !3, metadata !"foo", i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 3, size 32, align 32, offset 0] [def] [from ]
 !3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786449, metadata !31, i32 4, metadata !"4.2.1 LLVM build", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !33, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{metadata !6, metadata !1, metadata !8}
-!6 = metadata !{i32 786445, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
-!7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786445, metadata !31, metadata !2, metadata !"y", i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
+!7 = metadata !{i32 786468, metadata !31, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !10 = metadata !{metadata !7, metadata !11, metadata !7}
-!11 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 786689, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
 !16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786689, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 786478, metadata !3, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{i32 786478, metadata !31, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !20 = metadata !{metadata !7, metadata !7, metadata !21}
-!21 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786468, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, metadata !31, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !24 = metadata !{i32 786689, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
 !25 = metadata !{i32 786688, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2, i32 0, null} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 786443, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 786443, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786443, metadata !31, metadata !27, i32 19, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !31, metadata !18, i32 19, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 16, i32 0, metadata !30, null}
-!30 = metadata !{i32 786443, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 786443, metadata !31, metadata !8, i32 15, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !31 = metadata !{metadata !"foo.cp", metadata !"/tmp/"}
+!32 = metadata !{i32 0}
+!33 = metadata !{metadata !1, metadata !8, metadata !18}
+!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index 208e93e098e6..b49aec3af87a 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -3,21 +3,23 @@
 @.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1]
 @.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1]
 @C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0)]
-!38 = metadata !{i32 524329, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !39} ; [ DW_TAG_file_type ]
-!39 = metadata !{i32 524305, i32 0, i32 1, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!46 = metadata !{i32 524303, metadata !38, metadata !"", metadata !38, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]!101 = metadata !{[2 x i8*]* @C.9.2167}
-!47 = metadata !{i32 524324, metadata !38, metadata !"char", metadata !38, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!98 = metadata !{i32 524309, metadata !38, metadata !"", metadata !38, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!38 = metadata !{i32 524329, metadata !109} ; [ DW_TAG_file_type ]
+!39 = metadata !{i32 524305, metadata !109, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !108, metadata !108, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!46 = metadata !{i32 524303, metadata !109, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 524324, metadata !109, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!97 = metadata !{i32 524334, i32 0, metadata !39, metadata !"main", metadata !"main", metadata !"main", i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!98 = metadata !{i32 524309, metadata !109, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !99 = metadata !{metadata !100}
-!100 = metadata !{i32 524324, metadata !38, metadata !"int", metadata !38, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!100 = metadata !{i32 524324, metadata !109, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !101 = metadata !{[2 x i8*]* @C.9.2167}
-!102 = metadata !{i32 524544, metadata !103, metadata !"find_strings", metadata !38, i32 75, metadata !104} ; [ DW_TAG_auto_variable ]
-!103 = metadata !{i32 524299, metadata !97, i32 73, i32 0} ; [ DW_TAG_lexical_block ]
-!104 = metadata !{i32 524289, metadata !38, metadata !"", metadata !38, i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ]
+!102 = metadata !{i32 524544, metadata !103, metadata !"find_strings", metadata !38, i32 75, metadata !104, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!103 = metadata !{i32 524299, null, metadata !97, i32 73, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!104 = metadata !{i32 524289, metadata !109, null, metadata !"", i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 85312, align 64, offset 0] [from ]
 !105 = metadata !{metadata !106}
 !106 = metadata !{i32 524321, i64 0, i64 1333}    ; [ DW_TAG_subrange_type ]
 !107 = metadata !{i32 73, i32 0, metadata !103, null}
+!108 = metadata !{i32 0}
+!109 = metadata !{metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch"}
 
 define i32 @main() nounwind ssp {
 bb.nph:
diff --git a/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
index 96016cfe1c73..47e511f92098 100644
--- a/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
+++ b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
@@ -9,6 +9,6 @@ entry:
   ret i32 %3
 }
 
-; CHECK: extend2bit_v2:
+; CHECK-LABEL: extend2bit_v2:
 ; CHECK: xorl	%eax, %eax
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index 91711bb758c3..91fec3beefcb 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -75,51 +75,55 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!49}
 !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
-!2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786451, metadata !47, metadata !2, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
+!2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, metadata !47, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !48, metadata !48, metadata !46, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !47, metadata !1, metadata !"Data", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786447, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786445, metadata !47, metadata !1, metadata !"Kind", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786468, metadata !47, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786447, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !47, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"main", metadata !"main", metadata !"main", i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{metadata !13}
 !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
 !25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!26 = metadata !{i32 786448, metadata !47, metadata !2, metadata !"SVal", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !47, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
 !31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, null} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 786470, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786470, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 786447, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !47, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !47, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, null} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
+!39 = metadata !{i32 786443, metadata !47, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !47, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
 !44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
+!47 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
+!48 = metadata !{i32 0}
+!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index de0d216e266f..9aa41c32c366 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -13,20 +13,23 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!17}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, metadata !13, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, metadata !16, metadata !16, metadata !13, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786468, metadata !14, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !15, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !7 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !8 = metadata !{i32 53, i32 13, metadata !9, null}
-!9 = metadata !{i32 786443, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786443, metadata !14, metadata !0, i32 53, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{i32 4, i32 13, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 786443, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !15, metadata !12, i32 4, i32 13, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786443, metadata !15, metadata !6, i32 4, i32 11, i32 1} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{metadata !0, metadata !6}
 !14 = metadata !{metadata !"", metadata !"/private/tmp"}
 !15 = metadata !{metadata !"bug.c", metadata !"/private/tmp"}
+!16 = metadata !{i32 0}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index 1b339777f571..39d89e3d8276 100644
--- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -19,8 +19,8 @@ entry:
 }
 
 ; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip)
-; CHECK: movb   38(%rsp), [[R0:%.+]]
-; CHECK: movb   8(%rsp), [[R1:%.+]]
-; CHECK: movb   [[R1]], 8(%rsp)
-; CHECK: movb   [[R0]], 38(%rsp)
+; CHECK: movb   (%rsp), [[R1:%.+]]
+; CHECK: movb   30(%rsp), [[R0:%.+]]
+; CHECK: movb   [[R1]], (%rsp)
+; CHECK: movb   [[R0]], 30(%rsp)
 ; CHECK: callq	___stack_chk_fail
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 8719f738b7cc..21ac7c9079e8 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -16,22 +16,25 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!19}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !17, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, metadata !18, metadata !18, metadata !15, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !17, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, metadata !17, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786447, metadata !17, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !17, metadata !1, metadata !"bar", i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 2, size 64, align 32, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !11}
-!10 = metadata !{i32 786445, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 786445, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786445, metadata !17,  metadata !1, metadata !"x", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 786445, metadata !17, metadata !1, metadata !"y", i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
 !12 = metadata !{i32 3, i32 47, metadata !0, null}
 !13 = metadata !{i32 4, i32 2, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !17, metadata !0, i32 3, i32 50, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{metadata !0}
 !16 = metadata !{metadata !6}
 !17 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
+!18 = metadata !{i32 0}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
index 4d8d974f703e..5a407d3f9972 100644
--- a/test/CodeGen/X86/2010-12-02-MC-Set.ll
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -7,17 +7,21 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !7 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, null, null, metadata !7, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !9, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, metadata !8, metadata !8, metadata !7, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !9, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 1, metadata !6, null}
-!6 = metadata !{i32 786443, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786443, metadata !9, metadata !0, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 0}
+!9 = metadata !{metadata !"e.c", metadata !"/private/tmp"}
 
 ; CHECK: .subsections_via_symbols
 ; CHECK-NEXT: __debug_line
 ; CHECK-NEXT: Lline_table_start0
 ; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 14fb3e493231..d5340300df54 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -70,30 +70,31 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 declare i32 @puts(i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [gcd]
 !1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !28, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ] [line 25] [def] [scope 0] [main]
+!7 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 786688, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786443, metadata !31, metadata !0, i32 5, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
 !14 = metadata !{i32 786688, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !1, metadata !6, i32 25, i32 12, i32 2} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 786443, metadata !31, metadata !6, i32 25, i32 12, i32 2} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 786688, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !18 = metadata !{i32 5, i32 41, metadata !0, null}
 !19 = metadata !{i32 5, i32 49, metadata !0, null}
 !20 = metadata !{i32 7, i32 5, metadata !13, null}
 !21 = metadata !{i32 8, i32 9, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !1, metadata !13, i32 7, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !31, metadata !13, i32 7, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 9, i32 9, metadata !22, null}
 !24 = metadata !{i32 26, i32 38, metadata !15, null}
 !25 = metadata !{i32 27, i32 38, metadata !15, null}
@@ -103,3 +104,5 @@ declare i32 @puts(i8* nocapture) nounwind
 !29 = metadata !{metadata !10, metadata !11, metadata !12}
 !30 = metadata !{metadata !14, metadata !17}
 !31 = metadata !{metadata !"rem_small.c", metadata !"/private/tmp"}
+!32 = metadata !{i32 0}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2011-04-19-sclr-bb.ll b/test/CodeGen/X86/2011-04-19-sclr-bb.ll
index 771e4b3a0815..b77cc40acf3c 100644
--- a/test/CodeGen/X86/2011-04-19-sclr-bb.ll
+++ b/test/CodeGen/X86/2011-04-19-sclr-bb.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
 
 ; Make sure that values of illegal types are not scalarized between basic blocks.
-;CHECK: test
+;CHECK-LABEL: test:
 ;CHECK-NOT: pinsrw
 ;CHECK-NOT: pextrb
 ;CHECK: ret
diff --git a/test/CodeGen/X86/2011-05-09-loaduse.ll b/test/CodeGen/X86/2011-05-09-loaduse.ll
index 8673d7433fac..adcea5cf6159 100644
--- a/test/CodeGen/X86/2011-05-09-loaduse.ll
+++ b/test/CodeGen/X86/2011-05-09-loaduse.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
 
-;CHECK: test
+;CHECK-LABEL: test:
 ;CHECK-not: pshufd
 ;CHECK: ret
 define float @test(<4 x float>* %A) nounwind {
diff --git a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
index 0f18f0969bad..91cd208f0167 100644
--- a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
+++ b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 @aux_temp = external global %struct.dfa, align 8
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 declare void @__memset_chk() nounwind
 
@@ -21,12 +21,12 @@ if.end.i:                                         ; preds = %entry
   br i1 undef, label %land.end.thread.i, label %land.end.i
 
 land.end.thread.i:                                ; preds = %if.end.i
-  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false) nounwind
   %cmp1710.i = icmp eq i64 %0, -1
   br i1 %cmp1710.i, label %cond.false156.i, label %cond.true138.i
 
 land.end.i:                                       ; preds = %if.end.i
-  %1 = call i64 @llvm.objectsize.i64(i8* undef, i1 false) nounwind
+  %1 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false) nounwind
   %cmp17.i = icmp eq i64 %1, -1
   br i1 %cmp17.i, label %cond.false156.i, label %cond.true138.i
 
@@ -41,13 +41,8 @@ cond.false156.i:                                  ; preds = %for.end.i, %land.en
 
 cond.end166.i:                                    ; preds = %cond.false156.i, %cond.true138.i
   %idxprom1113.i = phi i64 [ %idxprom1114.i, %cond.false156.i ], [ undef, %cond.true138.i ]
-  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8, !tbaa !0
+  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8
   %att.i = getelementptr inbounds %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
-  store i32 0, i32* %att.i, align 4, !tbaa !3
+  store i32 0, i32* %att.i, align 4
   ret void
 }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
index ce63c74fbdfd..5275b6889bff 100644
--- a/test/CodeGen/X86/2011-06-03-x87chain.ll
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -29,3 +29,21 @@ entry:
   store float %conv, float* %f, align 4
   ret float %conv
 }
+
+define void @PR17495() {
+entry:
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %x.1.copyload = load i24* undef, align 1
+  %conv = sitofp i24 %x.1.copyload to float
+  %div = fmul float %conv, 0x3E80000000000000
+  store float %div, float* undef, align 4
+  br i1 false, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+
+; CHECK-LABEL: @PR17495
+; CHECK-NOT: fildll
+}
diff --git a/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll b/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
index 7632034e13b5..aea53b3b9855 100644
--- a/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
+++ b/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
@@ -16,5 +16,5 @@ entry:
   %tmp10 = sext i8 %tmp9 to i32
   ret i32 %tmp10
 }
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: movsbl	-2147483647
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index 6d91109daafb..4e84e84c1aa9 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -14,22 +14,47 @@
 ; Prior to fixing PR10920 401.bzip miscompile, the coalescer would
 ; consider vreg1 and vreg27 to be copies of the same value. It would
 ; then remove one of the critical edge copes, which cannot safely be removed.
-;
+
+; There are two obvious ways the register-allocator could go here, either
+; reusing the pre-addition register later, or the post-addition one. Currently,
+; it does the latter, so we check:
+
 ; CHECK: # %while.body85.i
 ; CHECK-NOT: # %
 ; CHECK-NOT: add
 ; CHECK: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
 ; CHECK: addl %{{.*}}, %[[POSTR]]
 ; CHECK: # %while.end.i
-; CHECK: movl %[[POSTR]], %[[USER:e[abcdxi]+]]
+; CHECK-NOT: movl %[[POSTR]]
 ; CHECK: # %land.lhs.true.i
-; CHECK: movl %[[POSTR]], %[[USER]]
+; CHECK-NOT: movl %[[POSTR]]
 ; CHECK: # %land.lhs.true103.i
-; CHECK: movl %[[POSTR]], %[[USER]]
+; CHECK-NOT: movl %[[POSTR]]
 ; CHECK: # %if.then108.i
-; [[PRER] live out, so nothing on this path should define it.
-; CHECK-NOT: , %[[PRER]]
+; CHECK: movl %[[PRER]], %[[POSTR]]
 ; CHECK: # %if.end117.i
+;   and use it for fprintf:
+; CHECK: movl %[[POSTR]], 12(%esp)
+
+
+; If it ever reverts to reusing the pre-addition register then we should
+; *probably* check this instead (it certainly worked last time):
+
+; CHECKALT: # %while.body85.i
+; CHECKALT-NOT: # %
+; CHECKALT-NOT: add
+; CHECKALT: movl %[[POSTR:e[abcdxi]+]], %[[PRER:e[abcdxi]+]]
+; CHECKALT: addl %{{.*}}, %[[POSTR]]
+; CHECKALT: # %while.end.i
+; CHECKALT: movl %[[POSTR]], %[[USER:e[abcdxi]+]]
+; CHECKALT: # %land.lhs.true.i
+; CHECKALT: movl %[[POSTR]], %[[USER]]
+; CHECKALT: # %land.lhs.true103.i
+; CHECKALT: movl %[[POSTR]], %[[USER]]
+; CHECKALT: # %if.then108.i
+; [[PRER] live out, so nothing on this path should define it.
+; CHECKALT-NOT: , %[[PRER]]
+; CHECKALT: # %if.end117.i
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
diff --git a/test/CodeGen/X86/2011-09-18-sse2cmp.ll b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
index a6f428fdacc3..89de648ca966 100644
--- a/test/CodeGen/X86/2011-09-18-sse2cmp.ll
+++ b/test/CodeGen/X86/2011-09-18-sse2cmp.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
+;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
 
 ;CHECK: @max
 ;CHECK: cmplepd
diff --git a/test/CodeGen/X86/2011-09-21-setcc-bug.ll b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
index 4daf6781495a..a67c3f338862 100644
--- a/test/CodeGen/X86/2011-09-21-setcc-bug.ll
+++ b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1
 
 ; Make sure we are not crashing on this code.
 
diff --git a/test/CodeGen/X86/2011-10-11-srl.ll b/test/CodeGen/X86/2011-10-11-srl.ll
index 6c6d340fd1a4..434f88c14b6a 100644
--- a/test/CodeGen/X86/2011-10-11-srl.ll
+++ b/test/CodeGen/X86/2011-10-11-srl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse41 
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse4.1
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/X86/2011-10-12-MachineCSE.ll b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
index cd15f84605a8..72e672ac4f1f 100644
--- a/test/CodeGen/X86/2011-10-12-MachineCSE.ll
+++ b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
@@ -20,11 +20,11 @@ entry:
   %2 = lshr i32 %1, 16
   %bf.clear = and i32 %2, 255
   %idxprom = sext i32 %bf.clear to i64
-  %3 = load %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8, !tbaa !0
+  %3 = load %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8
   %handlers = getelementptr inbounds %struct.optab* %3, i32 0, i32 1
   %arrayidx = getelementptr inbounds [59 x %struct.anon.3]* %handlers, i32 0, i64 %idxprom
   %insn_code = getelementptr inbounds %struct.anon.3* %arrayidx, i32 0, i32 0
-  %4 = load i32* %insn_code, align 4, !tbaa !3
+  %4 = load i32* %insn_code, align 4
   %cmp = icmp eq i32 %4, 1317
   br i1 %cmp, label %if.then, label %lor.lhs.false
 
@@ -32,14 +32,14 @@ lor.lhs.false:                                    ; preds = %entry
   %idxprom1 = sext i32 %4 to i64
   %arrayidx2 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom1
   %operand = getelementptr inbounds %struct.insn_data* %arrayidx2, i32 0, i32 3
-  %5 = load %struct.insn_operand_data** %operand, align 8, !tbaa !0
+  %5 = load %struct.insn_operand_data** %operand, align 8
   %arrayidx3 = getelementptr inbounds %struct.insn_operand_data* %5, i64 0
   %predicate = getelementptr inbounds %struct.insn_operand_data* %arrayidx3, i32 0, i32 0
-  %6 = load i32 (%struct.rtx_def*, i32)** %predicate, align 8, !tbaa !0
+  %6 = load i32 (%struct.rtx_def*, i32)** %predicate, align 8
   %idxprom4 = sext i32 %4 to i64
   %arrayidx5 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom4
   %operand6 = getelementptr inbounds %struct.insn_data* %arrayidx5, i32 0, i32 3
-  %7 = load %struct.insn_operand_data** %operand6, align 8, !tbaa !0
+  %7 = load %struct.insn_operand_data** %operand6, align 8
   %arrayidx7 = getelementptr inbounds %struct.insn_operand_data* %7, i64 0
   %8 = bitcast %struct.insn_operand_data* %arrayidx7 to i8*
   %bf.field.offs = getelementptr i8* %8, i32 16
@@ -54,14 +54,14 @@ lor.lhs.false9:                                   ; preds = %lor.lhs.false
   %idxprom10 = sext i32 %4 to i64
   %arrayidx11 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom10
   %operand12 = getelementptr inbounds %struct.insn_data* %arrayidx11, i32 0, i32 3
-  %11 = load %struct.insn_operand_data** %operand12, align 8, !tbaa !0
+  %11 = load %struct.insn_operand_data** %operand12, align 8
   %arrayidx13 = getelementptr inbounds %struct.insn_operand_data* %11, i64 1
   %predicate14 = getelementptr inbounds %struct.insn_operand_data* %arrayidx13, i32 0, i32 0
-  %12 = load i32 (%struct.rtx_def*, i32)** %predicate14, align 8, !tbaa !0
+  %12 = load i32 (%struct.rtx_def*, i32)** %predicate14, align 8
   %idxprom15 = sext i32 %4 to i64
   %arrayidx16 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom15
   %operand17 = getelementptr inbounds %struct.insn_data* %arrayidx16, i32 0, i32 3
-  %13 = load %struct.insn_operand_data** %operand17, align 8, !tbaa !0
+  %13 = load %struct.insn_operand_data** %operand17, align 8
   %arrayidx18 = getelementptr inbounds %struct.insn_operand_data* %13, i64 1
   %14 = bitcast %struct.insn_operand_data* %arrayidx18 to i8*
   %bf.field.offs19 = getelementptr i8* %14, i32 16
@@ -76,14 +76,14 @@ lor.lhs.false23:                                  ; preds = %lor.lhs.false9
   %idxprom24 = sext i32 %4 to i64
   %arrayidx25 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom24
   %operand26 = getelementptr inbounds %struct.insn_data* %arrayidx25, i32 0, i32 3
-  %17 = load %struct.insn_operand_data** %operand26, align 8, !tbaa !0
+  %17 = load %struct.insn_operand_data** %operand26, align 8
   %arrayidx27 = getelementptr inbounds %struct.insn_operand_data* %17, i64 2
   %predicate28 = getelementptr inbounds %struct.insn_operand_data* %arrayidx27, i32 0, i32 0
-  %18 = load i32 (%struct.rtx_def*, i32)** %predicate28, align 8, !tbaa !0
+  %18 = load i32 (%struct.rtx_def*, i32)** %predicate28, align 8
   %idxprom29 = sext i32 %4 to i64
   %arrayidx30 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom29
   %operand31 = getelementptr inbounds %struct.insn_data* %arrayidx30, i32 0, i32 3
-  %19 = load %struct.insn_operand_data** %operand31, align 8, !tbaa !0
+  %19 = load %struct.insn_operand_data** %operand31, align 8
   %arrayidx32 = getelementptr inbounds %struct.insn_operand_data* %19, i64 2
   %20 = bitcast %struct.insn_operand_data* %arrayidx32 to i8*
   %bf.field.offs33 = getelementptr i8* %20, i32 16
@@ -101,7 +101,7 @@ if.end:                                           ; preds = %lor.lhs.false23
   %idxprom37 = sext i32 %4 to i64
   %arrayidx38 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom37
   %genfun = getelementptr inbounds %struct.insn_data* %arrayidx38, i32 0, i32 2
-  %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8, !tbaa !0
+  %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8
   %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...)* %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
   br label %return
 
@@ -109,8 +109,3 @@ return:                                           ; preds = %if.end, %if.then
   %24 = phi %struct.rtx_def* [ %call39, %if.end ], [ null, %if.then ]
   ret %struct.rtx_def* %24
 }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"_ZTS9insn_code", metadata !1}
diff --git a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
index 8c09d97f08d2..e7d1e194d9cd 100644
--- a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
+++ b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
@@ -20,7 +20,7 @@ entry:
   %2 = load <4 x float>* %p3, align 16
   %3 = load <4 x float>* %p4, align 16
   %4 = load <4 x float>* %p5, align 16
-; CHECK:      movaps {{%xmm[0-7]}}, (%esp)
+; CHECK:      movups {{%xmm[0-7]}}, (%esp)
 ; CHECK-NEXT: calll _dovectortest 
   call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
   ret void
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index da734d4b6454..07a6910c65e0 100644
--- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: main
 define i32 @main() nounwind uwtable {
 entry:
-; CHECK: pmovsxbq  j(%rip), %
 ; CHECK: pmovsxbq  i(%rip), %
+; CHECK: pmovsxbq  j(%rip), %
   %0 = load <2 x i8>* @i, align 8
   %1 = load <2 x i8>* @j, align 8
   %div = sdiv <2 x i8> %1, %0
@@ -25,4 +25,3 @@ entry:
   ret i32 0
 ; CHECK: ret
 }
-
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
index 6e83f6713ae4..6dea92b63071 100644
--- a/test/CodeGen/X86/2011-10-27-tstore.ll
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -2,7 +2,7 @@
 
 target triple = "x86_64-unknown-linux-gnu"
 
-;CHECK: ltstore
+;CHECK-LABEL: ltstore:
 ;CHECK: movq
 ;CHECK: movq
 ;CHECK: ret
diff --git a/test/CodeGen/X86/2011-10-30-padd.ll b/test/CodeGen/X86/2011-10-30-padd.ll
index 180ca15a0ee2..1b8c12bc8e38 100644
--- a/test/CodeGen/X86/2011-10-30-padd.ll
+++ b/test/CodeGen/X86/2011-10-30-padd.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
 
-;CHECK: addXX_test
+;CHECK-LABEL: addXX_test:
 ;CHECK: padd
 ;CHECK: ret
 
@@ -10,7 +10,7 @@ define <16 x i8> @addXX_test(<16 x i8> %a) {
       ret <16 x i8> %b
 }
 
-;CHECK: instcombine_test
+;CHECK-LABEL: instcombine_test:
 ;CHECK: padd
 ;CHECK: ret
 define <16 x i8> @instcombine_test(<16 x i8> %a) {
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
index fcaabddd2cc5..df9823aa3825 100644
--- a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -2,7 +2,7 @@
 ; PR11494
 
 define void @test(<4 x i32>* nocapture %p) nounwind {
-  ; CHECK: test:
+  ; CHECK-LABEL: test:
   ; CHECK: vpxor %xmm0, %xmm0, %xmm0
   ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
   ; CHECK-NEXT: vmovdqu	%xmm0, (%rdi)
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
index dc3a08bb4daf..0183e107460e 100644
--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=x86-64 -mattr=+sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
-; RUN: llc -march=x86-64 -mattr=-sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; RUN: llc -march=x86-64 -mattr=+sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
 ; Test case for r146671
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7"
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
index 39c213f00ab8..14643e4ba8b8 100644
--- a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=-sse4.2,+sse4.1 < %s | FileCheck %s
 ; Make sure we don't load from the location pointed to by %p
 ; twice: it has non-obvious performance implications, and
 ; the relevant transformation doesn't know how to update
 ; the chains correctly.
 ; PR10747
 
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: pextrd $2, %xmm
 define <4 x i32> @test(<4 x i32>* %p) {
   %v = load <4 x i32>* %p
diff --git a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
index 75efcf5ac47b..78cdfcf0e1f0 100644
--- a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
+++ b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; 0x1 means that we only look at the first bit.
 
 ;CHECK: 0x1
-;CHECK: ui_to_fp_conv
+;CHECK-LABEL: ui_to_fp_conv:
 ;CHECK: ret
 define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
 allocas:
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
index 7e914984fe44..69d4b93bb78d 100644
--- a/test/CodeGen/X86/2012-01-11-split-cv.ll
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
 
-;CHECK: add18i16
+;CHECK-LABEL: add18i16:
 define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
 ;CHECK: vmovaps
   %b = load <18 x i16>* %bp, align 16
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
index a883d7938b55..cd8a16f5732a 100644
--- a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -15,7 +15,7 @@ entry:
 
 ; CHECK: lock
 ; CHECK-NEXT: orl {{.*}}, (%esp)
-; CHECK-NEXT: cmpl $0
+; CHECK-NEXT: testl [[REG:%e[a-z]+]], [[REG]]
 
 if.then:                                          ; preds = %entry
   tail call void bitcast (void (...)* @foo to void ()*)() nounwind
diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll
index 3ce7db6e4138..9eb59e41ef7d 100644
--- a/test/CodeGen/X86/2012-01-18-vbitcast.ll
+++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s
 
-;CHECK: vcast
+;CHECK-LABEL: vcast:
 define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
 ;CHECK: pmovzxdq
 ;CHECK: pmovzxdq
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
deleted file mode 100644
index 477b4deba820..000000000000
--- a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=core2 -mattr=+sse | FileCheck %s
-; PR11940: Do not optimize away movb %al, %ch
-
-%struct.APInt = type { i64* }
-
-declare noalias i8* @calloc(i32, i32) nounwind
-
-define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
-entry:
-; CHECK: bug:
-  %call = tail call i8* @calloc(i32 1, i32 32)
-  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
-  %0 = bitcast i8* %call.i to i64*
-  %rem.i = and i32 %rotateAmt, 63
-  %div.i = lshr i32 %rotateAmt, 6
-  %cmp.i = icmp eq i32 %rem.i, 0
-  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
-
-for.cond.preheader.i:                             ; preds = %entry
-  %sub.i = sub i32 4, %div.i
-  %cmp23.i = icmp eq i32 %div.i, 4
-  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
-
-for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
-  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
-  %.pre5.i = load i64** %pVal.i, align 4
-  br label %for.body.i
-
-for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
-  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
-  %add.i = add i32 %i.04.i, %div.i
-  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
-  %1 = load i64* %arrayidx.i, align 4
-  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
-  store i64 %1, i64* %arrayidx3.i, align 4
-  %inc.i = add i32 %i.04.i, 1
-  %cmp2.i = icmp ult i32 %inc.i, %sub.i
-  br i1 %cmp2.i, label %for.body.i, label %if.end.i
-
-if.end.i:                                         ; preds = %for.body.i, %entry
-  %cmp81.i = icmp eq i32 %div.i, 3
-  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
-
-for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
-  %sub58.i = sub i32 3, %div.i
-  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
-  %sh_prom.i = zext i32 %rem.i to i64
-  %sub17.i = sub i32 64, %rem.i
-  %sh_prom18.i = zext i32 %sub17.i to i64
-  %.pre.i = load i64** %pVal11.i, align 4
-  br label %for.body9.i
-
-for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
-; CHECK: %for.body9.i
-; CHECK: movb
-; CHECK: shrdl
-  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
-  %add10.i = add i32 %i6.02.i, %div.i
-  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
-  %2 = load i64* %arrayidx12.i, align 4
-  %shr.i = lshr i64 %2, %sh_prom.i
-  %add14.i = add i32 %add10.i, 1
-  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
-  %3 = load i64* %arrayidx16.i, align 4
-  %shl.i = shl i64 %3, %sh_prom18.i
-  %or.i = or i64 %shl.i, %shr.i
-  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
-  store i64 %or.i, i64* %arrayidx19.i, align 4
-  %inc21.i = add i32 %i6.02.i, 1
-  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
-  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
-
-_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
-  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
-  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
-  store i64* %0, i64** %4, align 4
-  ret void
-}
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 04659522d360..16706ae957f2 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -4,9 +4,9 @@
 
 ; It's hard to test for the ISEL condition because CodeGen optimizes
 ; away the bugpointed code. Just ensure the basics are still there.
-;CHECK: func:
-;CHECK: vxorps
-;CHECK: vinsertf128
+;CHECK-LABEL: func:
+;CHECK: vpxor
+;CHECK: vinserti128
 ;CHECK: vpshufd
 ;CHECK: vpshufd
 ;CHECK: vmulps
diff --git a/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll b/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll
index 171c3f18dc8b..881fa37f99cb 100644
--- a/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll
+++ b/test/CodeGen/X86/2012-05-17-TwoAddressBug.ll
@@ -6,7 +6,7 @@
 ; rdar://11472010
 define i32 @t(i32 %mask) nounwind readnone ssp {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK-NOT: mov
   %sub = add i32 %mask, -65535
   %shr = lshr i32 %sub, 23
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
index 4abdded38d8c..723302723b6b 100644
--- a/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -13,7 +13,7 @@ entry:
 }
 
 ; Make sure that we store a 64bit value, even on 32bit systems.
-;CHECK: store_64
+;CHECK-LABEL: store_64:
 define void @store_64(<2 x i32>* %ptr) {
 BB:
   store <2 x i32> zeroinitializer, <2 x i32>* %ptr
@@ -22,7 +22,7 @@ BB:
 ;CHECK: ret
 }
 
-;CHECK: load_64
+;CHECK-LABEL: load_64:
 define <2 x i32> @load_64(<2 x i32>* %ptr) {
 BB:
   %t = load <2 x i32>* %ptr
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 2c7dfc8dfd45..1c39c747cdc8 100644
--- a/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -2,7 +2,7 @@
 
 declare x86_fastcallcc i64 @barrier()
 
-;CHECK: bcast_fold
+;CHECK-LABEL: bcast_fold:
 ;CHECK: vmov{{[au]}}ps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
 ;CHECK: barrier
 ;CHECK: vbroadcastss [[SPILLED]], %ymm0
diff --git a/test/CodeGen/X86/2012-08-07-CmpISelBug.ll b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
index 000b853ab8f6..eba970e711ae 100644
--- a/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
+++ b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
@@ -6,7 +6,7 @@
 
 define void @foo(i8 %arg4, i32 %arg5, i32* %arg14) nounwind {
 bb:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NOT: testl
 ; CHECK: testb
   %tmp48 = zext i8 %arg4 to i32
diff --git a/test/CodeGen/X86/2012-08-16-setcc.ll b/test/CodeGen/X86/2012-08-16-setcc.ll
index ed511567c32b..c03b923cadba 100644
--- a/test/CodeGen/X86/2012-08-16-setcc.ll
+++ b/test/CodeGen/X86/2012-08-16-setcc.ll
@@ -2,7 +2,7 @@
 
 ; rdar://12081007
 
-; CHECK: and_1:
+; CHECK-LABEL: and_1:
 ; CHECK: andb
 ; CHECK-NEXT: cmovnel
 ; CHECK: ret
@@ -13,7 +13,7 @@ define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
   ret i32 %3
 }
 
-; CHECK: and_2:
+; CHECK-LABEL: and_2:
 ; CHECK: andb
 ; CHECK-NEXT: setne
 ; CHECK: ret
@@ -23,7 +23,7 @@ define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) {
   ret i1 %2
 }
 
-; CHECK: xor_1:
+; CHECK-LABEL: xor_1:
 ; CHECK: xorb
 ; CHECK-NEXT: cmovnel
 ; CHECK: ret
@@ -34,7 +34,7 @@ define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
   ret i32 %3
 }
 
-; CHECK: xor_2:
+; CHECK-LABEL: xor_2:
 ; CHECK: xorb
 ; CHECK-NEXT: setne
 ; CHECK: ret
diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
index a65e6881540d..971e56d20ea2 100644
--- a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -25,8 +25,7 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret void
 
-; CHECK: fn1:
+; CHECK-LABEL: fn1:
 ; CHECK: shrq $32, [[REG:%.*]]
-; CHECK: testq [[REG]], [[REG]]
 ; CHECK: je
 }
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
index ff6be369dc57..a5f64c5eaf55 100644
--- a/test/CodeGen/X86/2012-1-10-buildvector.ll
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
 target triple = "i686-pc-win32"
 
-;CHECK: bad_cast
+;CHECK-LABEL: bad_cast:
 define void @bad_cast() {
 entry:
   %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
@@ -14,7 +14,7 @@ entry:
 }
 
 
-;CHECK: bad_insert
+;CHECK-LABEL: bad_insert:
 define void @bad_insert(i32 %t) {
 entry:
 ;CHECK: vpinsrd
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index 9164eb9c6912..d41b43228b6c 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -6,7 +6,7 @@
 ;
 ; CHECK: %entry
 ; CHECK: DEBUG_VALUE: hg
-; CHECK: je
+; CHECK: j
 
 %struct.node.0.27 = type { i16, double, [3 x double], i32, i32 }
 %struct.hgstruct.2.29 = type { %struct.bnode.1.28*, [3 x double], double, [3 x double] }
@@ -36,13 +36,15 @@ return:                                           ; preds = %for.cond.preheader,
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !11, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{null}
 !4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725]
 !5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
 !6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [def] [from ]
 !11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index a0fbbb2ff9ef..7befa6b4757d 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -63,8 +63,9 @@ if.else4114:                                      ; preds = %if.then4073
 declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{}
@@ -79,7 +80,7 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 !12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
 !13 = metadata !{i32 786443, metadata !3, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
 !14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
 !16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
@@ -129,8 +130,9 @@ declare void @_Znwm()
 
 !llvm.dbg.cu = !{!30}
 
-!30 = metadata !{i32 786449, i32 0, i32 4, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, i1 true, metadata !"", i32 0, null, null, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
+!30 = metadata !{i32 786449, metadata !34, i32 4, metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
 !31 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !32, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29]
 !32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
 !33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ]
 !34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"}
+!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index df93c5647d95..5aec3d92c70f 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -34,11 +34,13 @@ invoke.cont44:                                    ; preds = %if.end
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet", metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, i1 true, metadata !"", i32 0, metadata !1, null, null, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{null, null}
+!0 = metadata !{i32 786449, metadata !6, i32 4, metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, metadata !"", i32 0, metadata !2, metadata !7, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
+!2 = metadata !{null}
 !3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214]
-!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ]
+!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [def] [from ]
 !5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
 !6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"}
+!7 = metadata !{i32 0}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
index f0c7781fafe9..0ff9d3951d5b 100644
--- a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
+++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -23,6 +23,6 @@ entry:
 
 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
diff --git a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
new file mode 100644
index 000000000000..3455b68fb0e6
--- /dev/null
+++ b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
@@ -0,0 +1,132 @@
+; RUN: llc -mtriple x86_64-apple-darwin -O0 < %s -o - | FileCheck %s
+;
+; During X86 fastisel, the address of indirect call was resolved
+; through bitcast, ptrtoint, and inttoptr instructions. This is valid
+; only if the related instructions are in that same basic block, otherwise
+; we may reference variables that were not live accross basic blocks
+; resulting in undefined virtual registers.
+;
+; In this example, this is illustrated by a the spill/reload of the
+; LOADED_PTR_SLOT.
+;
+; Before this patch, the compiler was accessing two different spill
+; slots.
+; <rdar://problem/15192473>
+
+; CHECK-LABEL: @test_bitcast
+; Load the value of the function pointer: %loaded_ptr
+; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
+; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
+; Spill %loaded_ptr.
+; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
+; Perform the indirect call.
+; Load the first argument
+; CHECK: movq [[ARG2_SLOT]], %rdi
+; Load the second argument
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the thrid argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
+; Call.
+; CHECK: callq *[[FCT_PTR]]
+; CHECK: ret
+define i64 @test_bitcast(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
+entry:
+  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
+  switch i1 %bool, label %default [
+    i1 true, label %label_true
+    i1 false, label %label_end
+  ]
+default:
+  unreachable
+
+label_true:
+  br label %label_end
+
+label_end:
+  %fct_ptr = bitcast i8* %raw to i64 (i64, i64, i64)*
+  %res = call i64 %fct_ptr(i64 %arg2, i64 %arg2, i64 %arg2)
+  ret i64 %res
+}
+
+; CHECK-LABEL: @test_inttoptr
+; Load the value of the function pointer: %loaded_ptr
+; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
+; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
+; Spill %loaded_ptr.
+; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
+; Perform the indirect call.
+; Load the first argument
+; CHECK: movq [[ARG2_SLOT]], %rdi
+; Load the second argument
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the thrid argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
+; Call.
+; CHECK: callq *[[FCT_PTR]]
+; CHECK: ret
+define i64 @test_inttoptr(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
+entry:
+  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %raw = ptrtoint i64 (i64, i64, i64)* %loaded_ptr to i64
+  switch i1 %bool, label %default [
+    i1 true, label %label_true
+    i1 false, label %label_end
+  ]
+default:
+  unreachable
+
+label_true:
+  br label %label_end
+
+label_end:
+  %fct_ptr = inttoptr i64 %raw to i64 (i64, i64, i64)*
+  %res = call i64 %fct_ptr(i64 %arg2, i64 %arg2, i64 %arg2)
+  ret i64 %res
+}
+
+; CHECK-LABEL: @test_ptrtoint
+; Load the value of the function pointer: %loaded_ptr
+; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]]
+; Spill %arg2.
+; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]]
+; Spill %loaded_ptr.
+; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]]
+; Perform the indirect call.
+; Load the first argument
+; CHECK: movq [[ARG2_SLOT]], %rdi
+; Load the second argument
+; CHECK: movq [[ARG2_SLOT]], %rsi
+; Load the thrid argument
+; CHECK: movq [[ARG2_SLOT]], %rdx
+; Load the function pointer.
+; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]]
+; Call.
+; CHECK: callq *[[FCT_PTR]]
+; CHECK: ret
+define i64 @test_ptrtoint(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
+entry:
+  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
+  switch i1 %bool, label %default [
+    i1 true, label %label_true
+    i1 false, label %label_end
+  ]
+default:
+  unreachable
+
+label_true:
+  br label %label_end
+
+label_end:
+  %fct_int = ptrtoint i8* %raw to i64
+  %fct_ptr = inttoptr i64 %fct_int to i64 (i64, i64, i64)*
+  %res = call i64 %fct_ptr(i64 %arg2, i64 %arg2, i64 %arg2)
+  ret i64 %res
+}
diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll
index c51247ab925a..fafdfdb74811 100644
--- a/test/CodeGen/X86/3addr-16bit.ll
+++ b/test/CodeGen/X86/3addr-16bit.ll
@@ -5,12 +5,12 @@
 
 define zeroext i16 @t1(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
 entry:
-; 32BIT:     t1:
+; 32BIT-LABEL:     t1:
 ; 32BIT:     movw 20(%esp), %ax
 ; 32BIT-NOT: movw %ax, %cx
 ; 32BIT:     leal 1(%eax), %ecx
 
-; 64BIT:     t1:
+; 64BIT-LABEL:     t1:
 ; 64BIT-NOT: movw %si, %ax
 ; 64BIT:     leal 1(%rsi), %eax
   %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
@@ -27,14 +27,15 @@ bb1:                                              ; preds = %entry
 
 define zeroext i16 @t2(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
 entry:
-; 32BIT:     t2:
+; 32BIT-LABEL:     t2:
 ; 32BIT:     movw 20(%esp), %ax
 ; 32BIT-NOT: movw %ax, %cx
 ; 32BIT:     leal -1(%eax), %ecx
 
-; 64BIT:     t2:
+; 64BIT-LABEL:     t2:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     leal -1(%rsi), %eax
+; 64BIT:     decl %eax
+; 64BIT:     movzwl %ax
   %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   %1 = add i16 %k, -1                             ; <i16> [#uses=3]
   br i1 %0, label %bb, label %bb1
@@ -51,14 +52,14 @@ declare void @foo(i16 zeroext)
 
 define zeroext i16 @t3(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
 entry:
-; 32BIT:     t3:
+; 32BIT-LABEL:     t3:
 ; 32BIT:     movw 20(%esp), %ax
 ; 32BIT-NOT: movw %ax, %cx
 ; 32BIT:     leal 2(%eax), %ecx
 
-; 64BIT:     t3:
+; 64BIT-LABEL:     t3:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     leal 2(%rsi), %eax
+; 64BIT:     addl $2, %eax
   %0 = add i16 %k, 2                              ; <i16> [#uses=3]
   %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   br i1 %1, label %bb, label %bb1
@@ -73,15 +74,15 @@ bb1:                                              ; preds = %entry
 
 define zeroext i16 @t4(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
 entry:
-; 32BIT:     t4:
+; 32BIT-LABEL:     t4:
 ; 32BIT:     movw 16(%esp), %ax
 ; 32BIT:     movw 20(%esp), %cx
 ; 32BIT-NOT: movw %cx, %dx
 ; 32BIT:     leal (%ecx,%eax), %edx
 
-; 64BIT:     t4:
+; 64BIT-LABEL:     t4:
 ; 64BIT-NOT: movw %si, %ax
-; 64BIT:     leal (%rsi,%rdi), %eax
+; 64BIT:     addl %edi, %eax
   %0 = add i16 %k, %c                             ; <i16> [#uses=3]
   %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
   br i1 %1, label %bb, label %bb1
diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
index 912bdc215474..76fabbf0f6ae 100644
--- a/test/CodeGen/X86/3addr-or.ll
+++ b/test/CodeGen/X86/3addr-or.ll
@@ -3,7 +3,7 @@
 
 define i32 @test1(i32 %x) nounwind readnone ssp {
 entry:
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: leal 3(%rdi), %eax
   %0 = shl i32 %x, 5                              ; <i32> [#uses=1]
   %1 = or i32 %0, 3                               ; <i32> [#uses=1]
@@ -11,7 +11,7 @@ entry:
 }
 
 define i64 @test2(i8 %A, i8 %B) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: shrq $4
 ; CHECK-NOT: movq
 ; CHECK-NOT: orq
@@ -31,7 +31,7 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
 define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
 entry:
 ; No reason to emit an add here, should be an or.
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: orl $3, %edi
   %0 = shl i32 %x, 5
   %1 = or i32 %0, 3
@@ -45,7 +45,7 @@ entry:
   %and2 = and i32 %b, 16
   %or = or i32 %and2, %and
   ret i32 %or
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: leal	(%rsi,%rdi), %eax
 }
 
@@ -56,6 +56,6 @@ entry:
   %or = or i32 %and2, %and
   store i32 %or, i32* %P, align 4
   ret void
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: orl
 }
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
index a8ad0f1a28b2..ba763cf03ffc 100644
--- a/test/CodeGen/X86/GC/lit.local.cfg
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/X86/GC/ocaml-gc-assert.ll b/test/CodeGen/X86/GC/ocaml-gc-assert.ll
new file mode 100644
index 000000000000..b32ceca63441
--- /dev/null
+++ b/test/CodeGen/X86/GC/ocaml-gc-assert.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; PR3168
+
+; CHECK-LABEL: append
+
+define i32* @append() gc "ocaml" {
+entry:
+  switch i32 0, label %L2 [i32 0, label %L1]
+L1:
+  %var8 = alloca i8*
+  call void @llvm.gcroot(i8** %var8,i8* null)
+  br label %L3
+L2:
+  call ccc void @oread_runtime_casenotcovered()
+  unreachable
+L3:
+  ret i32* null
+}
+
+declare ccc void @oread_runtime_casenotcovered()
+declare void @llvm.gcroot(i8**,i8*)
diff --git a/test/CodeGen/X86/GC/ocaml-gc.ll b/test/CodeGen/X86/GC/ocaml-gc.ll
index 44241a90d0e7..6d5f8aebe139 100644
--- a/test/CodeGen/X86/GC/ocaml-gc.ll
+++ b/test/CodeGen/X86/GC/ocaml-gc.ll
@@ -2,23 +2,23 @@
 
 define i32 @main(i32 %x) nounwind gc "ocaml" {
 ; CHECK:        .text
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___code_begin
-; CHECK-NEXT: caml_3C_stdin_3E___code_begin:
+; CHECK-NEXT:   .globl "caml<stdin>__code_begin"
+; CHECK-NEXT: "caml<stdin>__code_begin":
 ; CHECK-NEXT:   .data
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_begin
-; CHECK-NEXT: caml_3C_stdin_3E___data_begin:
+; CHECK-NEXT:   .globl  "caml<stdin>__data_begin"
+; CHECK-NEXT: "caml<stdin>__data_begin":
 
   %puts = tail call i32 @foo(i32 %x)
   ret i32 0
 
-; CHECK:        .globl  caml_3C_stdin_3E___code_end
-; CHECK-NEXT: caml_3C_stdin_3E___code_end:
+; CHECK:        .globl "caml<stdin>__code_end"
+; CHECK-NEXT: "caml<stdin>__code_end":
 ; CHECK-NEXT:   .data
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_end
-; CHECK-NEXT: caml_3C_stdin_3E___data_end:
+; CHECK-NEXT:   .globl "caml<stdin>__data_end"
+; CHECK-NEXT: "caml<stdin>__data_end":
 ; CHECK-NEXT:   .quad   0
-; CHECK-NEXT:   .globl  caml_3C_stdin_3E___frametable
-; CHECK-NEXT: caml_3C_stdin_3E___frametable:
+; CHECK-NEXT:   .globl "caml<stdin>__frametable"
+; CHECK-NEXT: "caml<stdin>__frametable":
 ; CHECK-NEXT:   .short  1
 ; CHECK-NEXT:   .align  8
 ; CHECK-NEXT:                # live roots for main
diff --git a/test/CodeGen/X86/MachineSink-CritEdge.ll b/test/CodeGen/X86/MachineSink-CritEdge.ll
index 74a1049772a0..bc058354a21f 100644
--- a/test/CodeGen/X86/MachineSink-CritEdge.ll
+++ b/test/CodeGen/X86/MachineSink-CritEdge.ll
@@ -30,10 +30,10 @@ land.lhs.true:
   %cmp4 = icmp eq i32 %call3, 10
   br i1 %cmp4, label %do.body.preheader, label %if.then
 
-; %shl.i should be sinked all the way down to do.body.preheader, but not into the loop.
+; %add16.i should be sinked all the way down to do.body.preheader, but not into the loop.
 ; CHECK: do.body.preheader
 ; CHECK-NOT: do.body
-; CHECK: shll	$12
+; CHECK: leal ([[SRC:%r[a-z0-9]+]],[[SRC]],8)
 
 do.body.preheader:
   %xor29.i = xor i32 %shr27.i, %add25.i
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 13a6444a496b..584e644ed51f 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -26,17 +26,18 @@ bb2:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 786468, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786468, null, metadata !0, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 13, metadata !1, null}
@@ -48,3 +49,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{metadata !1}
 !19 = metadata !{metadata !6, metadata !7, metadata !10}
 !20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
+!21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index bb227a0185df..0ef3aa5b6f07 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -147,7 +147,7 @@ define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) n
 }
 
 
-;CHECK: merge_loads_i16
+;CHECK-LABEL: merge_loads_i16:
 ; load:
 ;CHECK: movw
 ; store:
@@ -181,7 +181,7 @@ define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struc
 }
 
 ; The loads and the stores are interleved. Can't merge them.
-;CHECK: no_merge_loads
+;CHECK-LABEL: no_merge_loads:
 ;CHECK: movb
 ;CHECK: movb
 ;CHECK: movb
@@ -215,7 +215,7 @@ a4:                                       ; preds = %4, %.lr.ph
 }
 
 
-;CHECK: merge_loads_integer
+;CHECK-LABEL: merge_loads_integer:
 ; load:
 ;CHECK: movq
 ; store:
@@ -249,7 +249,7 @@ define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %s
 }
 
 
-;CHECK: merge_loads_vector
+;CHECK-LABEL: merge_loads_vector:
 ; load:
 ;CHECK: movups
 ; store:
@@ -290,7 +290,7 @@ block4:                                       ; preds = %4, %.lr.ph
   ret void
 }
 
-;CHECK: merge_loads_no_align
+;CHECK-LABEL: merge_loads_no_align:
 ; load:
 ;CHECK: movl
 ;CHECK: movl
diff --git a/test/CodeGen/X86/StackColoring-dbg.ll b/test/CodeGen/X86/StackColoring-dbg.ll
index 5982544f7a8c..51d0d1775c67 100644
--- a/test/CodeGen/X86/StackColoring-dbg.ll
+++ b/test/CodeGen/X86/StackColoring-dbg.ll
@@ -25,6 +25,11 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
-!16 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6}
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23}
+!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"clang", i1 true, metadata !"", i32 0, metadata !2, metadata !2, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"t.c", metadata !""}
+!16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6}
 !2 = metadata !{i32 0}
-!22 = metadata !{i32 786688, metadata !2, metadata !"x", metadata !2, i32 16, metadata !16, i32 0, i32 0}
+!22 = metadata !{i32 786688, null, metadata !"x", metadata !2, i32 16, metadata !16, i32 0, i32 0}
+!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll
index f8ae74f292d2..a8e3537fabe3 100644
--- a/test/CodeGen/X86/StackColoring.ll
+++ b/test/CodeGen/X86/StackColoring.ll
@@ -4,8 +4,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;YESCOLOR: subq  $136, %rsp
-;NOCOLOR: subq  $264, %rsp
+;YESCOLOR: subq  $144, %rsp
+;NOCOLOR: subq  $272, %rsp
 
 define i32 @myCall_w2(i32 %in) {
 entry:
@@ -82,8 +82,8 @@ bb2:
 bb3:
   ret i32 0
 }
-;YESCOLOR: subq  $208, %rsp
-;NOCOLOR: subq  $400, %rsp
+;YESCOLOR: subq  $200, %rsp
+;NOCOLOR: subq  $408, %rsp
 
 
 
@@ -297,8 +297,8 @@ bb3:
 }
 
 
-;YESCOLOR: multi_region_bb
-;NOCOLOR: multi_region_bb
+;YESCOLOR-LABEL: multi_region_bb:
+;NOCOLOR-LABEL: multi_region_bb:
 define void @multi_region_bb() nounwind ssp {
 entry:
   %A.i1 = alloca [100 x i32], align 4
@@ -350,10 +350,32 @@ bb3:
   ret i32 0
 }
 
+
+; Regression test for PR15707.  %buf1 and %buf2 should not be merged
+; in this test case.
+;YESCOLOR-LABEL: myCall_pr15707:
+;YESCOLOR: subq $200008, %rsp
+;NOCOLOR-LABEL: myCall_pr15707:
+;NOCOLOR: subq $200008, %rsp
+define void @myCall_pr15707() {
+  %buf1 = alloca i8, i32 100000, align 16
+  %buf2 = alloca i8, i32 100000, align 16
+
+  call void @llvm.lifetime.start(i64 -1, i8* %buf1)
+  call void @llvm.lifetime.end(i64 -1, i8* %buf1)
+
+  call void @llvm.lifetime.start(i64 -1, i8* %buf1)
+  call void @llvm.lifetime.start(i64 -1, i8* %buf2)
+  %result1 = call i32 @foo(i32 0, i8* %buf1)
+  %result2 = call i32 @foo(i32 0, i8* %buf2)
+  ret void
+}
+
+
 ; Check that we don't assert and crash even when there are allocas
 ; outside the declared lifetime regions.
-;YESCOLOR: bad_range
-;NOCOLOR:  bad_range
+;YESCOLOR-LABEL: bad_range:
+;NOCOLOR-LABEL:  bad_range:
 define void @bad_range() nounwind ssp {
 entry:
   %A.i1 = alloca [100 x i32], align 4
@@ -378,8 +400,8 @@ block2:
 
 ; Check that we don't assert and crash even when there are usages
 ; of allocas which do not read or write outside the declared lifetime regions.
-;YESCOLOR: shady_range
-;NOCOLOR:  shady_range
+;YESCOLOR-LABEL: shady_range:
+;NOCOLOR-LABEL:  shady_range:
 
 %struct.Klass = type { i32, i32 }
 
@@ -407,4 +429,3 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
 declare i32 @foo(i32, i8*)
-
diff --git a/test/CodeGen/X86/WidenArith.ll b/test/CodeGen/X86/WidenArith.ll
index 0383bd665b0f..f87b3821dde8 100644
--- a/test/CodeGen/X86/WidenArith.ll
+++ b/test/CodeGen/X86/WidenArith.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
 
-;CHECK: test
+;CHECK-LABEL: test:
 ;CHECK: vaddps
 ;CHECK: vmulps
 ;CHECK: vsubps
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 658ccaa71dc0..633e70f0285a 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
@@ -37,22 +37,22 @@ entry:
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
 	ret void
 
-; LINUX-64-STATIC: foo00:
+; LINUX-64-STATIC-LABEL: foo00:
 ; LINUX-64-STATIC: movl	src(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl	[[EAX]], dst
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo00:
+; LINUX-32-STATIC-LABEL: foo00:
 ; LINUX-32-STATIC: 	movl	src, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo00:
+; LINUX-32-PIC-LABEL: foo00:
 ; LINUX-32-PIC: 	movl	src, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo00:
+; LINUX-64-PIC-LABEL: foo00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r..]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e..]]
 ; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r..]]
@@ -109,22 +109,22 @@ entry:
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
 	ret void
 
-; LINUX-64-STATIC: fxo00:
+; LINUX-64-STATIC-LABEL: fxo00:
 ; LINUX-64-STATIC: movl	xsrc(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl	[[EAX]], xdst
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: fxo00:
+; LINUX-32-STATIC-LABEL: fxo00:
 ; LINUX-32-STATIC: 	movl	xsrc, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], xdst
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: fxo00:
+; LINUX-32-PIC-LABEL: fxo00:
 ; LINUX-32-PIC: 	movl	xsrc, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], xdst
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: fxo00:
+; LINUX-64-PIC-LABEL: fxo00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -179,19 +179,19 @@ define void @foo01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: foo01:
+; LINUX-64-STATIC-LABEL: foo01:
 ; LINUX-64-STATIC: movq	$dst, ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo01:
+; LINUX-32-STATIC-LABEL: foo01:
 ; LINUX-32-STATIC: 	movl	$dst, ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo01:
+; LINUX-32-PIC-LABEL: foo01:
 ; LINUX-32-PIC: 	movl	$dst, ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo01:
+; LINUX-64-PIC-LABEL: foo01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
@@ -239,19 +239,19 @@ define void @fxo01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: fxo01:
+; LINUX-64-STATIC-LABEL: fxo01:
 ; LINUX-64-STATIC: movq	$xdst, ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: fxo01:
+; LINUX-32-STATIC-LABEL: fxo01:
 ; LINUX-32-STATIC: 	movl	$xdst, ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: fxo01:
+; LINUX-32-PIC-LABEL: fxo01:
 ; LINUX-32-PIC: 	movl	$xdst, ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: fxo01:
+; LINUX-64-PIC-LABEL: fxo01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
@@ -301,25 +301,25 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
-; LINUX-64-STATIC: foo02:
+; LINUX-64-STATIC-LABEL: foo02:
 ; LINUX-64-STATIC: movl    src(%rip), %
 ; LINUX-64-STATIC: movq    ptr(%rip), %
 ; LINUX-64-STATIC: movl
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo02:
+; LINUX-32-STATIC-LABEL: foo02:
 ; LINUX-32-STATIC: 	movl	src, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo02:
+; LINUX-32-PIC-LABEL: foo02:
 ; LINUX-32-PIC: 	movl	src, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo02:
+; LINUX-64-PIC-LABEL: foo02:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -382,26 +382,26 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
-; LINUX-64-STATIC: fxo02:
+; LINUX-64-STATIC-LABEL: fxo02:
 ; LINUX-64-STATIC: movl    xsrc(%rip), %
 ; LINUX-64-STATIC: movq    ptr(%rip), %
 ; LINUX-64-STATIC: movl
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: fxo02:
+; LINUX-32-STATIC-LABEL: fxo02:
 ; LINUX-32-STATIC: 	movl	xsrc, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
-; LINUX-32-PIC: fxo02:
+; LINUX-32-PIC-LABEL: fxo02:
 ; LINUX-32-PIC: 	movl	xsrc, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: fxo02:
+; LINUX-64-PIC-LABEL: fxo02:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -464,22 +464,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
 	ret void
-; LINUX-64-STATIC: foo03:
+; LINUX-64-STATIC-LABEL: foo03:
 ; LINUX-64-STATIC: movl    dsrc(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ddst
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo03:
+; LINUX-32-STATIC-LABEL: foo03:
 ; LINUX-32-STATIC: 	movl	dsrc, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo03:
+; LINUX-32-PIC-LABEL: foo03:
 ; LINUX-32-PIC: 	movl	dsrc, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo03:
+; LINUX-64-PIC-LABEL: foo03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -524,19 +524,19 @@ define void @foo04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
 	ret void
-; LINUX-64-STATIC: foo04:
+; LINUX-64-STATIC-LABEL: foo04:
 ; LINUX-64-STATIC: movq    $ddst, dptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo04:
+; LINUX-32-STATIC-LABEL: foo04:
 ; LINUX-32-STATIC: 	movl	$ddst, dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo04:
+; LINUX-32-PIC-LABEL: foo04:
 ; LINUX-32-PIC: 	movl	$ddst, dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo04:
+; LINUX-64-PIC-LABEL: foo04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], ([[RCX]])
@@ -580,25 +580,25 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %1, i32* %0, align 4
 	ret void
-; LINUX-64-STATIC: foo05:
+; LINUX-64-STATIC-LABEL: foo05:
 ; LINUX-64-STATIC: movl    dsrc(%rip), %
 ; LINUX-64-STATIC: movq    dptr(%rip), %
 ; LINUX-64-STATIC: movl
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo05:
+; LINUX-32-STATIC-LABEL: foo05:
 ; LINUX-32-STATIC: 	movl	dsrc, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo05:
+; LINUX-32-PIC-LABEL: foo05:
 ; LINUX-32-PIC: 	movl	dsrc, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo05:
+; LINUX-64-PIC-LABEL: foo05:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -651,22 +651,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
 	ret void
-; LINUX-64-STATIC: foo06:
+; LINUX-64-STATIC-LABEL: foo06:
 ; LINUX-64-STATIC: movl    lsrc(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ldst(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo06:
+; LINUX-32-STATIC-LABEL: foo06:
 ; LINUX-32-STATIC: 	movl	lsrc, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo06:
+; LINUX-32-PIC-LABEL: foo06:
 ; LINUX-32-PIC: 	movl	lsrc, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo06:
+; LINUX-64-PIC-LABEL: foo06:
 ; LINUX-64-PIC: 	movl	lsrc(%rip), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
@@ -709,19 +709,19 @@ define void @foo07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
 	ret void
-; LINUX-64-STATIC: foo07:
+; LINUX-64-STATIC-LABEL: foo07:
 ; LINUX-64-STATIC: movq    $ldst, lptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo07:
+; LINUX-32-STATIC-LABEL: foo07:
 ; LINUX-32-STATIC: 	movl	$ldst, lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo07:
+; LINUX-32-PIC-LABEL: foo07:
 ; LINUX-32-PIC: 	movl	$ldst, lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo07:
+; LINUX-64-PIC-LABEL: foo07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
@@ -764,25 +764,25 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
-; LINUX-64-STATIC: foo08:
+; LINUX-64-STATIC-LABEL: foo08:
 ; LINUX-64-STATIC: movl    lsrc(%rip), %
 ; LINUX-64-STATIC: movq    lptr(%rip), %
 ; LINUX-64-STATIC: movl
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: foo08:
+; LINUX-32-STATIC-LABEL: foo08:
 ; LINUX-32-STATIC: 	movl	lsrc, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: foo08:
+; LINUX-32-PIC-LABEL: foo08:
 ; LINUX-32-PIC: 	movl	lsrc, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: foo08:
+; LINUX-64-PIC-LABEL: foo08:
 ; LINUX-64-PIC: 	movl	lsrc(%rip), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ([[RCX]])
@@ -833,22 +833,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
 	ret void
-; LINUX-64-STATIC: qux00:
+; LINUX-64-STATIC-LABEL: qux00:
 ; LINUX-64-STATIC: movl    src+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], dst+64(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux00:
+; LINUX-32-STATIC-LABEL: qux00:
 ; LINUX-32-STATIC: 	movl	src+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qux00:
+; LINUX-32-PIC-LABEL: qux00:
 ; LINUX-32-PIC: 	movl	src+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux00:
+; LINUX-64-PIC-LABEL: qux00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -904,22 +904,22 @@ entry:
 	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
 	ret void
-; LINUX-64-STATIC: qxx00:
+; LINUX-64-STATIC-LABEL: qxx00:
 ; LINUX-64-STATIC: movl    xsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], xdst+64(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qxx00:
+; LINUX-32-STATIC-LABEL: qxx00:
 ; LINUX-32-STATIC: 	movl	xsrc+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], xdst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qxx00:
+; LINUX-32-PIC-LABEL: qxx00:
 ; LINUX-32-PIC: 	movl	xsrc+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], xdst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qxx00:
+; LINUX-64-PIC-LABEL: qxx00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -974,19 +974,19 @@ define void @qux01() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: qux01:
+; LINUX-64-STATIC-LABEL: qux01:
 ; LINUX-64-STATIC: movq    $dst+64, ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux01:
+; LINUX-32-STATIC-LABEL: qux01:
 ; LINUX-32-STATIC: 	movl	$dst+64, ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qux01:
+; LINUX-32-PIC-LABEL: qux01:
 ; LINUX-32-PIC: 	movl	$dst+64, ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux01:
+; LINUX-64-PIC-LABEL: qux01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1040,19 +1040,19 @@ define void @qxx01() nounwind {
 entry:
 	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: qxx01:
+; LINUX-64-STATIC-LABEL: qxx01:
 ; LINUX-64-STATIC: movq    $xdst+64, ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qxx01:
+; LINUX-32-STATIC-LABEL: qxx01:
 ; LINUX-32-STATIC: 	movl	$xdst+64, ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qxx01:
+; LINUX-32-PIC-LABEL: qxx01:
 ; LINUX-32-PIC: 	movl	$xdst+64, ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qxx01:
+; LINUX-64-PIC-LABEL: qxx01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1108,26 +1108,26 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
-; LINUX-64-STATIC: qux02:
+; LINUX-64-STATIC-LABEL: qux02:
 ; LINUX-64-STATIC: movl    src+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux02:
+; LINUX-32-STATIC-LABEL: qux02:
 ; LINUX-32-STATIC: 	movl	src+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
-; LINUX-32-PIC: qux02:
+; LINUX-32-PIC-LABEL: qux02:
 ; LINUX-32-PIC: 	movl	src+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux02:
+; LINUX-64-PIC-LABEL: qux02:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1191,26 +1191,26 @@ entry:
 	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
-; LINUX-64-STATIC: qxx02:
+; LINUX-64-STATIC-LABEL: qxx02:
 ; LINUX-64-STATIC: movl    xsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qxx02:
+; LINUX-32-STATIC-LABEL: qxx02:
 ; LINUX-32-STATIC: 	movl	xsrc+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
-; LINUX-32-PIC: qxx02:
+; LINUX-32-PIC-LABEL: qxx02:
 ; LINUX-32-PIC: 	movl	xsrc+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qxx02:
+; LINUX-64-PIC-LABEL: qxx02:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1273,22 +1273,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
 	ret void
-; LINUX-64-STATIC: qux03:
+; LINUX-64-STATIC-LABEL: qux03:
 ; LINUX-64-STATIC: movl    dsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ddst+64(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux03:
+; LINUX-32-STATIC-LABEL: qux03:
 ; LINUX-32-STATIC: 	movl	dsrc+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qux03:
+; LINUX-32-PIC-LABEL: qux03:
 ; LINUX-32-PIC: 	movl	dsrc+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux03:
+; LINUX-64-PIC-LABEL: qux03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1333,19 +1333,19 @@ define void @qux04() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
 	ret void
-; LINUX-64-STATIC: qux04:
+; LINUX-64-STATIC-LABEL: qux04:
 ; LINUX-64-STATIC: movq    $ddst+64, dptr(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux04:
+; LINUX-32-STATIC-LABEL: qux04:
 ; LINUX-32-STATIC: 	movl	$ddst+64, dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qux04:
+; LINUX-32-PIC-LABEL: qux04:
 ; LINUX-32-PIC: 	movl	$ddst+64, dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux04:
+; LINUX-64-PIC-LABEL: qux04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	addq	$64, [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1390,26 +1390,26 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
-; LINUX-64-STATIC: qux05:
+; LINUX-64-STATIC-LABEL: qux05:
 ; LINUX-64-STATIC: movl    dsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux05:
+; LINUX-32-STATIC-LABEL: qux05:
 ; LINUX-32-STATIC: 	movl	dsrc+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
-; LINUX-32-PIC: qux05:
+; LINUX-32-PIC-LABEL: qux05:
 ; LINUX-32-PIC: 	movl	dsrc+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux05:
+; LINUX-64-PIC-LABEL: qux05:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1462,22 +1462,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
 	ret void
-; LINUX-64-STATIC: qux06:
+; LINUX-64-STATIC-LABEL: qux06:
 ; LINUX-64-STATIC: movl    lsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ldst+64
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux06:
+; LINUX-32-STATIC-LABEL: qux06:
 ; LINUX-32-STATIC: 	movl	lsrc+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst+64
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qux06:
+; LINUX-32-PIC-LABEL: qux06:
 ; LINUX-32-PIC: 	movl	lsrc+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst+64
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux06:
+; LINUX-64-PIC-LABEL: qux06:
 ; LINUX-64-PIC: 	movl	lsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst+64(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
@@ -1520,19 +1520,19 @@ define void @qux07() nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
 	ret void
-; LINUX-64-STATIC: qux07:
+; LINUX-64-STATIC-LABEL: qux07:
 ; LINUX-64-STATIC: movq    $ldst+64, lptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux07:
+; LINUX-32-STATIC-LABEL: qux07:
 ; LINUX-32-STATIC: 	movl	$ldst+64, lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: qux07:
+; LINUX-32-PIC-LABEL: qux07:
 ; LINUX-32-PIC: 	movl	$ldst+64, lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux07:
+; LINUX-64-PIC-LABEL: qux07:
 ; LINUX-64-PIC: 	leaq	ldst+64(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
@@ -1575,26 +1575,26 @@ entry:
 	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
-; LINUX-64-STATIC: qux08:
+; LINUX-64-STATIC-LABEL: qux08:
 ; LINUX-64-STATIC: movl    lsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]])
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: qux08:
+; LINUX-32-STATIC-LABEL: qux08:
 ; LINUX-32-STATIC: 	movl	lsrc+64, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 	ret void
 
-; LINUX-32-PIC: qux08:
+; LINUX-32-PIC-LABEL: qux08:
 ; LINUX-32-PIC: 	movl	lsrc+64, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 64([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: qux08:
+; LINUX-64-PIC-LABEL: qux08:
 ; LINUX-64-PIC: 	movl	lsrc+64(%rip), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 64([[RCX]])
@@ -1647,24 +1647,24 @@ entry:
 	%2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
-; LINUX-64-STATIC: ind00:
+; LINUX-64-STATIC-LABEL: ind00:
 ; LINUX-64-STATIC: movl    src(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], dst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind00:
+; LINUX-32-STATIC-LABEL: ind00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind00:
+; LINUX-32-PIC-LABEL: ind00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind00:
+; LINUX-64-PIC-LABEL: ind00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1725,24 +1725,24 @@ entry:
 	%2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
-; LINUX-64-STATIC: ixd00:
+; LINUX-64-STATIC-LABEL: ixd00:
 ; LINUX-64-STATIC: movl    xsrc(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], xdst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ixd00:
+; LINUX-32-STATIC-LABEL: ixd00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], xdst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ixd00:
+; LINUX-32-PIC-LABEL: ixd00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], xdst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ixd00:
+; LINUX-64-PIC-LABEL: ixd00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -1801,24 +1801,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: ind01:
+; LINUX-64-STATIC-LABEL: ind01:
 ; LINUX-64-STATIC: leaq    dst(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind01:
+; LINUX-32-STATIC-LABEL: ind01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	dst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind01:
+; LINUX-32-PIC-LABEL: ind01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	dst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind01:
+; LINUX-64-PIC-LABEL: ind01:
 ; LINUX-64-PIC: 	shlq	$2, %rdi
 ; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
@@ -1877,24 +1877,24 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
 	store i32* %0, i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: ixd01:
+; LINUX-64-STATIC-LABEL: ixd01:
 ; LINUX-64-STATIC: leaq    xdst(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ixd01:
+; LINUX-32-STATIC-LABEL: ixd01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	xdst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ixd01:
+; LINUX-32-PIC-LABEL: ixd01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	xdst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ixd01:
+; LINUX-64-PIC-LABEL: ixd01:
 ; LINUX-64-PIC: 	shlq	$2, %rdi
 ; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
@@ -1956,27 +1956,27 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: ind02:
+; LINUX-64-STATIC-LABEL: ind02:
 ; LINUX-64-STATIC: movl    src(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind02:
+; LINUX-32-STATIC-LABEL: ind02:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind02:
+; LINUX-32-PIC-LABEL: ind02:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	src(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind02:
+; LINUX-64-PIC-LABEL: ind02:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2045,27 +2045,27 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: ixd02:
+; LINUX-64-STATIC-LABEL: ixd02:
 ; LINUX-64-STATIC: movl    xsrc(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ixd02:
+; LINUX-32-STATIC-LABEL: ixd02:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ixd02:
+; LINUX-32-PIC-LABEL: ixd02:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	xsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ixd02:
+; LINUX-64-PIC-LABEL: ixd02:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2133,24 +2133,24 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
-; LINUX-64-STATIC: ind03:
+; LINUX-64-STATIC-LABEL: ind03:
 ; LINUX-64-STATIC: movl    dsrc(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ddst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind03:
+; LINUX-32-STATIC-LABEL: ind03:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind03:
+; LINUX-32-PIC-LABEL: ind03:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind03:
+; LINUX-64-PIC-LABEL: ind03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2205,24 +2205,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32* %0, i32** @dptr, align 8
 	ret void
-; LINUX-64-STATIC: ind04:
+; LINUX-64-STATIC-LABEL: ind04:
 ; LINUX-64-STATIC: leaq    ddst(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], dptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind04:
+; LINUX-32-STATIC-LABEL: ind04:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ddst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind04:
+; LINUX-32-PIC-LABEL: ind04:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ddst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind04:
+; LINUX-64-PIC-LABEL: ind04:
 ; LINUX-64-PIC: 	shlq	$2, %rdi
 ; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
@@ -2277,27 +2277,27 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: ind05:
+; LINUX-64-STATIC-LABEL: ind05:
 ; LINUX-64-STATIC: movl    dsrc(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind05:
+; LINUX-32-STATIC-LABEL: ind05:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind05:
+; LINUX-32-PIC-LABEL: ind05:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind05:
+; LINUX-64-PIC-LABEL: ind05:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2358,24 +2358,24 @@ entry:
 	%2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
-; LINUX-64-STATIC: ind06:
+; LINUX-64-STATIC-LABEL: ind06:
 ; LINUX-64-STATIC: movl    lsrc(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ldst(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind06:
+; LINUX-32-STATIC-LABEL: ind06:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind06:
+; LINUX-32-PIC-LABEL: ind06:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind06:
+; LINUX-64-PIC-LABEL: ind06:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
@@ -2430,24 +2430,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32* %0, i32** @lptr, align 8
 	ret void
-; LINUX-64-STATIC: ind07:
+; LINUX-64-STATIC-LABEL: ind07:
 ; LINUX-64-STATIC: leaq    ldst(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], lptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind07:
+; LINUX-32-STATIC-LABEL: ind07:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ldst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind07:
+; LINUX-32-PIC-LABEL: ind07:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ldst(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind07:
+; LINUX-64-PIC-LABEL: ind07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
@@ -2501,27 +2501,27 @@ entry:
 	%3 = getelementptr i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: ind08:
+; LINUX-64-STATIC-LABEL: ind08:
 ; LINUX-64-STATIC: movl    lsrc(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ind08:
+; LINUX-32-STATIC-LABEL: ind08:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ind08:
+; LINUX-32-PIC-LABEL: ind08:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lsrc(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ind08:
+; LINUX-64-PIC-LABEL: ind08:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
@@ -2582,24 +2582,24 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: off00:
+; LINUX-64-STATIC-LABEL: off00:
 ; LINUX-64-STATIC: movl    src+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], dst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off00:
+; LINUX-32-STATIC-LABEL: off00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off00:
+; LINUX-32-PIC-LABEL: off00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off00:
+; LINUX-64-PIC-LABEL: off00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2661,24 +2661,24 @@ entry:
 	%3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: oxf00:
+; LINUX-64-STATIC-LABEL: oxf00:
 ; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], xdst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: oxf00:
+; LINUX-32-STATIC-LABEL: oxf00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], xdst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: oxf00:
+; LINUX-32-PIC-LABEL: oxf00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], xdst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: oxf00:
+; LINUX-64-PIC-LABEL: oxf00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2738,24 +2738,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: off01:
+; LINUX-64-STATIC-LABEL: off01:
 ; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off01:
+; LINUX-32-STATIC-LABEL: off01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	dst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off01:
+; LINUX-32-PIC-LABEL: off01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	dst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off01:
+; LINUX-64-PIC-LABEL: off01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2815,24 +2815,24 @@ entry:
 	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: oxf01:
+; LINUX-64-STATIC-LABEL: oxf01:
 ; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], ptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: oxf01:
+; LINUX-32-STATIC-LABEL: oxf01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: oxf01:
+; LINUX-32-PIC-LABEL: oxf01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	xdst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: oxf01:
+; LINUX-64-PIC-LABEL: oxf01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2895,27 +2895,27 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
-; LINUX-64-STATIC: off02:
+; LINUX-64-STATIC-LABEL: off02:
 ; LINUX-64-STATIC: movl    src+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off02:
+; LINUX-32-STATIC-LABEL: off02:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off02:
+; LINUX-32-PIC-LABEL: off02:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	src+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off02:
+; LINUX-64-PIC-LABEL: off02:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -2985,27 +2985,27 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
-; LINUX-64-STATIC: oxf02:
+; LINUX-64-STATIC-LABEL: oxf02:
 ; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: oxf02:
+; LINUX-32-STATIC-LABEL: oxf02:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: oxf02:
+; LINUX-32-PIC-LABEL: oxf02:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: oxf02:
+; LINUX-64-PIC-LABEL: oxf02:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3074,24 +3074,24 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: off03:
+; LINUX-64-STATIC-LABEL: off03:
 ; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ddst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off03:
+; LINUX-32-STATIC-LABEL: off03:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off03:
+; LINUX-32-PIC-LABEL: off03:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off03:
+; LINUX-64-PIC-LABEL: off03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3147,24 +3147,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
-; LINUX-64-STATIC: off04:
+; LINUX-64-STATIC-LABEL: off04:
 ; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], dptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off04:
+; LINUX-32-STATIC-LABEL: off04:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off04:
+; LINUX-32-PIC-LABEL: off04:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ddst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off04:
+; LINUX-64-PIC-LABEL: off04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3220,27 +3220,27 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
-; LINUX-64-STATIC: off05:
+; LINUX-64-STATIC-LABEL: off05:
 ; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off05:
+; LINUX-32-STATIC-LABEL: off05:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off05:
+; LINUX-32-PIC-LABEL: off05:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off05:
+; LINUX-64-PIC-LABEL: off05:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3302,24 +3302,24 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: off06:
+; LINUX-64-STATIC-LABEL: off06:
 ; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ldst+64(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off06:
+; LINUX-32-STATIC-LABEL: off06:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst+64(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off06:
+; LINUX-32-PIC-LABEL: off06:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst+64(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off06:
+; LINUX-64-PIC-LABEL: off06:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
@@ -3375,24 +3375,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
-; LINUX-64-STATIC: off07:
+; LINUX-64-STATIC-LABEL: off07:
 ; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], lptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off07:
+; LINUX-32-STATIC-LABEL: off07:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off07:
+; LINUX-32-PIC-LABEL: off07:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ldst+64(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off07:
+; LINUX-64-PIC-LABEL: off07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
@@ -3447,27 +3447,27 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
-; LINUX-64-STATIC: off08:
+; LINUX-64-STATIC-LABEL: off08:
 ; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 64([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: off08:
+; LINUX-32-STATIC-LABEL: off08:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: off08:
+; LINUX-32-PIC-LABEL: off08:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 64([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: off08:
+; LINUX-64-PIC-LABEL: off08:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	64([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
@@ -3525,22 +3525,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
 	ret void
-; LINUX-64-STATIC: moo00:
+; LINUX-64-STATIC-LABEL: moo00:
 ; LINUX-64-STATIC: movl    src+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], dst+262144(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo00:
+; LINUX-32-STATIC-LABEL: moo00:
 ; LINUX-32-STATIC: 	movl	src+262144, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dst+262144
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo00:
+; LINUX-32-PIC-LABEL: moo00:
 ; LINUX-32-PIC: 	movl	src+262144, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dst+262144
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo00:
+; LINUX-64-PIC-LABEL: moo00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3595,19 +3595,19 @@ define void @moo01(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: moo01:
+; LINUX-64-STATIC-LABEL: moo01:
 ; LINUX-64-STATIC: movq    $dst+262144, ptr(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo01:
+; LINUX-32-STATIC-LABEL: moo01:
 ; LINUX-32-STATIC: 	movl	$dst+262144, ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo01:
+; LINUX-32-PIC-LABEL: moo01:
 ; LINUX-32-PIC: 	movl	$dst+262144, ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo01:
+; LINUX-64-PIC-LABEL: moo01:
 ; LINUX-64-PIC: 	movl	$262144, [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3664,25 +3664,25 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
-; LINUX-64-STATIC: moo02:
+; LINUX-64-STATIC-LABEL: moo02:
 ; LINUX-64-STATIC: movl    src+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo02:
+; LINUX-32-STATIC-LABEL: moo02:
 ; LINUX-32-STATIC: 	movl	src+262144, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo02:
+; LINUX-32-PIC-LABEL: moo02:
 ; LINUX-32-PIC: 	movl	src+262144, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo02:
+; LINUX-64-PIC-LABEL: moo02:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3745,22 +3745,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
 	ret void
-; LINUX-64-STATIC: moo03:
+; LINUX-64-STATIC-LABEL: moo03:
 ; LINUX-64-STATIC: movl    dsrc+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ddst+262144(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo03:
+; LINUX-32-STATIC-LABEL: moo03:
 ; LINUX-32-STATIC: 	movl	dsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ddst+262144
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo03:
+; LINUX-32-PIC-LABEL: moo03:
 ; LINUX-32-PIC: 	movl	dsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ddst+262144
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo03:
+; LINUX-64-PIC-LABEL: moo03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3805,19 +3805,19 @@ define void @moo04(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
 	ret void
-; LINUX-64-STATIC: moo04:
+; LINUX-64-STATIC-LABEL: moo04:
 ; LINUX-64-STATIC: movq    $ddst+262144, dptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo04:
+; LINUX-32-STATIC-LABEL: moo04:
 ; LINUX-32-STATIC: 	movl	$ddst+262144, dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo04:
+; LINUX-32-PIC-LABEL: moo04:
 ; LINUX-32-PIC: 	movl	$ddst+262144, dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo04:
+; LINUX-64-PIC-LABEL: moo04:
 ; LINUX-64-PIC: 	movl	$262144, [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3863,25 +3863,25 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
-; LINUX-64-STATIC: moo05:
+; LINUX-64-STATIC-LABEL: moo05:
 ; LINUX-64-STATIC: movl    dsrc+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo05:
+; LINUX-32-STATIC-LABEL: moo05:
 ; LINUX-32-STATIC: 	movl	dsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo05:
+; LINUX-32-PIC-LABEL: moo05:
 ; LINUX-32-PIC: 	movl	dsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo05:
+; LINUX-64-PIC-LABEL: moo05:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]]), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -3934,22 +3934,22 @@ entry:
 	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
 	ret void
-; LINUX-64-STATIC: moo06:
+; LINUX-64-STATIC-LABEL: moo06:
 ; LINUX-64-STATIC: movl    lsrc+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ldst+262144(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo06:
+; LINUX-32-STATIC-LABEL: moo06:
 ; LINUX-32-STATIC: 	movl	lsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ldst+262144
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo06:
+; LINUX-32-PIC-LABEL: moo06:
 ; LINUX-32-PIC: 	movl	lsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ldst+262144
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo06:
+; LINUX-64-PIC-LABEL: moo06:
 ; LINUX-64-PIC: 	movl	lsrc+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movl	[[EAX]], ldst+262144(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
@@ -3992,19 +3992,19 @@ define void @moo07(i64 %i) nounwind {
 entry:
 	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
 	ret void
-; LINUX-64-STATIC: moo07:
+; LINUX-64-STATIC-LABEL: moo07:
 ; LINUX-64-STATIC: movq    $ldst+262144, lptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo07:
+; LINUX-32-STATIC-LABEL: moo07:
 ; LINUX-32-STATIC: 	movl	$ldst+262144, lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo07:
+; LINUX-32-PIC-LABEL: moo07:
 ; LINUX-32-PIC: 	movl	$ldst+262144, lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo07:
+; LINUX-64-PIC-LABEL: moo07:
 ; LINUX-64-PIC: 	leaq	ldst+262144(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
 ; LINUX-64-PIC-NEXT: 	ret
@@ -4048,25 +4048,25 @@ entry:
 	%2 = getelementptr i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
-; LINUX-64-STATIC: moo08:
+; LINUX-64-STATIC-LABEL: moo08:
 ; LINUX-64-STATIC: movl    lsrc+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]])
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: moo08:
+; LINUX-32-STATIC-LABEL: moo08:
 ; LINUX-32-STATIC: 	movl	lsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: moo08:
+; LINUX-32-PIC-LABEL: moo08:
 ; LINUX-32-PIC: 	movl	lsrc+262144, [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], 262144([[ECX]])
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: moo08:
+; LINUX-64-PIC-LABEL: moo08:
 ; LINUX-64-PIC: 	movl	lsrc+262144(%rip), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	[[EAX]], 262144([[RCX]])
@@ -4120,24 +4120,24 @@ entry:
 	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: big00:
+; LINUX-64-STATIC-LABEL: big00:
 ; LINUX-64-STATIC: movl    src+262144(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], dst+262144(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big00:
+; LINUX-32-STATIC-LABEL: big00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], dst+262144(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big00:
+; LINUX-32-PIC-LABEL: big00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], dst+262144(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big00:
+; LINUX-64-PIC-LABEL: big00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -4197,24 +4197,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
 	store i32* %0, i32** @ptr, align 8
 	ret void
-; LINUX-64-STATIC: big01:
+; LINUX-64-STATIC-LABEL: big01:
 ; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], ptr(%rip)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big01:
+; LINUX-32-STATIC-LABEL: big01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big01:
+; LINUX-32-PIC-LABEL: big01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	dst+262144(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], ptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big01:
+; LINUX-64-PIC-LABEL: big01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -4277,27 +4277,27 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
-; LINUX-64-STATIC: big02:
+; LINUX-64-STATIC-LABEL: big02:
 ; LINUX-64-STATIC: movl    src+262144(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big02:
+; LINUX-32-STATIC-LABEL: big02:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big02:
+; LINUX-32-PIC-LABEL: big02:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	src+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big02:
+; LINUX-64-PIC-LABEL: big02:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -4366,24 +4366,24 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: big03:
+; LINUX-64-STATIC-LABEL: big03:
 ; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ddst+262144(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big03:
+; LINUX-32-STATIC-LABEL: big03:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ddst+262144(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big03:
+; LINUX-32-PIC-LABEL: big03:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ddst+262144(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big03:
+; LINUX-64-PIC-LABEL: big03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -4439,24 +4439,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
 	store i32* %0, i32** @dptr, align 8
 	ret void
-; LINUX-64-STATIC: big04:
+; LINUX-64-STATIC-LABEL: big04:
 ; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], dptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big04:
+; LINUX-32-STATIC-LABEL: big04:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big04:
+; LINUX-32-PIC-LABEL: big04:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], dptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big04:
+; LINUX-64-PIC-LABEL: big04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -4512,27 +4512,27 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
-; LINUX-64-STATIC: big05:
+; LINUX-64-STATIC-LABEL: big05:
 ; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    dptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big05:
+; LINUX-32-STATIC-LABEL: big05:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big05:
+; LINUX-32-PIC-LABEL: big05:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big05:
+; LINUX-64-PIC-LABEL: big05:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
@@ -4594,24 +4594,24 @@ entry:
 	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
-; LINUX-64-STATIC: big06:
+; LINUX-64-STATIC-LABEL: big06:
 ; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], ldst+262144(,%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big06:
+; LINUX-32-STATIC-LABEL: big06:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], ldst+262144(,[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big06:
+; LINUX-32-PIC-LABEL: big06:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], ldst+262144(,[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big06:
+; LINUX-64-PIC-LABEL: big06:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), [[RCX:%r.x]]
@@ -4667,24 +4667,24 @@ entry:
 	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
 	store i32* %0, i32** @lptr, align 8
 	ret void
-; LINUX-64-STATIC: big07:
+; LINUX-64-STATIC-LABEL: big07:
 ; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-STATIC: movq    [[RAX]], lptr
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big07:
+; LINUX-32-STATIC-LABEL: big07:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big07:
+; LINUX-32-PIC-LABEL: big07:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[EAX]], lptr
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big07:
+; LINUX-64-PIC-LABEL: big07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	[[RAX]], lptr(%rip)
@@ -4739,27 +4739,27 @@ entry:
 	%4 = getelementptr i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
-; LINUX-64-STATIC: big08:
+; LINUX-64-STATIC-LABEL: big08:
 ; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-STATIC: movq    lptr(%rip), [[RCX:%r.x]]
 ; LINUX-64-STATIC: movl    [[EAX]], 262144([[RCX]],%rdi,4)
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: big08:
+; LINUX-32-STATIC-LABEL: big08:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: big08:
+; LINUX-32-PIC-LABEL: big08:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,[[EAX]],4), [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[EDX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	[[ECX]], 262144([[EDX]],[[EAX]],4)
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: big08:
+; LINUX-64-PIC-LABEL: big08:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movl	262144([[RAX]],%rdi,4), [[EAX:%e.x]]
 ; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), [[RCX:%r.x]]
@@ -4815,19 +4815,19 @@ entry:
 define i8* @bar00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
-; LINUX-64-STATIC: bar00:
+; LINUX-64-STATIC-LABEL: bar00:
 ; LINUX-64-STATIC: movl    $src, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar00:
+; LINUX-32-STATIC-LABEL: bar00:
 ; LINUX-32-STATIC: 	movl	$src, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar00:
+; LINUX-32-PIC-LABEL: bar00:
 ; LINUX-32-PIC: 	movl	$src, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar00:
+; LINUX-64-PIC-LABEL: bar00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -4862,19 +4862,19 @@ entry:
 define i8* @bxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
-; LINUX-64-STATIC: bxr00:
+; LINUX-64-STATIC-LABEL: bxr00:
 ; LINUX-64-STATIC: movl    $xsrc, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bxr00:
+; LINUX-32-STATIC-LABEL: bxr00:
 ; LINUX-32-STATIC: 	movl	$xsrc, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bxr00:
+; LINUX-32-PIC-LABEL: bxr00:
 ; LINUX-32-PIC: 	movl	$xsrc, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bxr00:
+; LINUX-64-PIC-LABEL: bxr00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -4909,19 +4909,19 @@ entry:
 define i8* @bar01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
-; LINUX-64-STATIC: bar01:
+; LINUX-64-STATIC-LABEL: bar01:
 ; LINUX-64-STATIC: movl    $dst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar01:
+; LINUX-32-STATIC-LABEL: bar01:
 ; LINUX-32-STATIC: 	movl	$dst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar01:
+; LINUX-32-PIC-LABEL: bar01:
 ; LINUX-32-PIC: 	movl	$dst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar01:
+; LINUX-64-PIC-LABEL: bar01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -4956,19 +4956,19 @@ entry:
 define i8* @bxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
-; LINUX-64-STATIC: bxr01:
+; LINUX-64-STATIC-LABEL: bxr01:
 ; LINUX-64-STATIC: movl    $xdst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bxr01:
+; LINUX-32-STATIC-LABEL: bxr01:
 ; LINUX-32-STATIC: 	movl	$xdst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bxr01:
+; LINUX-32-PIC-LABEL: bxr01:
 ; LINUX-32-PIC: 	movl	$xdst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bxr01:
+; LINUX-64-PIC-LABEL: bxr01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5003,19 +5003,19 @@ entry:
 define i8* @bar02() nounwind {
 entry:
 	ret i8* bitcast (i32** @ptr to i8*)
-; LINUX-64-STATIC: bar02:
+; LINUX-64-STATIC-LABEL: bar02:
 ; LINUX-64-STATIC: movl    $ptr, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar02:
+; LINUX-32-STATIC-LABEL: bar02:
 ; LINUX-32-STATIC: 	movl	$ptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar02:
+; LINUX-32-PIC-LABEL: bar02:
 ; LINUX-32-PIC: 	movl	$ptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar02:
+; LINUX-64-PIC-LABEL: bar02:
 ; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5050,19 +5050,19 @@ entry:
 define i8* @bar03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
-; LINUX-64-STATIC: bar03:
+; LINUX-64-STATIC-LABEL: bar03:
 ; LINUX-64-STATIC: movl    $dsrc, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar03:
+; LINUX-32-STATIC-LABEL: bar03:
 ; LINUX-32-STATIC: 	movl	$dsrc, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar03:
+; LINUX-32-PIC-LABEL: bar03:
 ; LINUX-32-PIC: 	movl	$dsrc, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar03:
+; LINUX-64-PIC-LABEL: bar03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5097,19 +5097,19 @@ entry:
 define i8* @bar04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
-; LINUX-64-STATIC: bar04:
+; LINUX-64-STATIC-LABEL: bar04:
 ; LINUX-64-STATIC: movl    $ddst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar04:
+; LINUX-32-STATIC-LABEL: bar04:
 ; LINUX-32-STATIC: 	movl	$ddst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar04:
+; LINUX-32-PIC-LABEL: bar04:
 ; LINUX-32-PIC: 	movl	$ddst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar04:
+; LINUX-64-PIC-LABEL: bar04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5144,19 +5144,19 @@ entry:
 define i8* @bar05() nounwind {
 entry:
 	ret i8* bitcast (i32** @dptr to i8*)
-; LINUX-64-STATIC: bar05:
+; LINUX-64-STATIC-LABEL: bar05:
 ; LINUX-64-STATIC: movl    $dptr, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar05:
+; LINUX-32-STATIC-LABEL: bar05:
 ; LINUX-32-STATIC: 	movl	$dptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar05:
+; LINUX-32-PIC-LABEL: bar05:
 ; LINUX-32-PIC: 	movl	$dptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar05:
+; LINUX-64-PIC-LABEL: bar05:
 ; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5191,19 +5191,19 @@ entry:
 define i8* @bar06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
-; LINUX-64-STATIC: bar06:
+; LINUX-64-STATIC-LABEL: bar06:
 ; LINUX-64-STATIC: movl    $lsrc, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar06:
+; LINUX-32-STATIC-LABEL: bar06:
 ; LINUX-32-STATIC: 	movl	$lsrc, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar06:
+; LINUX-32-PIC-LABEL: bar06:
 ; LINUX-32-PIC: 	movl	$lsrc, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar06:
+; LINUX-64-PIC-LABEL: bar06:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5238,19 +5238,19 @@ entry:
 define i8* @bar07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
-; LINUX-64-STATIC: bar07:
+; LINUX-64-STATIC-LABEL: bar07:
 ; LINUX-64-STATIC: movl    $ldst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar07:
+; LINUX-32-STATIC-LABEL: bar07:
 ; LINUX-32-STATIC: 	movl	$ldst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar07:
+; LINUX-32-PIC-LABEL: bar07:
 ; LINUX-32-PIC: 	movl	$ldst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar07:
+; LINUX-64-PIC-LABEL: bar07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5285,19 +5285,19 @@ entry:
 define i8* @bar08() nounwind {
 entry:
 	ret i8* bitcast (i32** @lptr to i8*)
-; LINUX-64-STATIC: bar08:
+; LINUX-64-STATIC-LABEL: bar08:
 ; LINUX-64-STATIC: movl    $lptr, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bar08:
+; LINUX-32-STATIC-LABEL: bar08:
 ; LINUX-32-STATIC: 	movl	$lptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bar08:
+; LINUX-32-PIC-LABEL: bar08:
 ; LINUX-32-PIC: 	movl	$lptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bar08:
+; LINUX-64-PIC-LABEL: bar08:
 ; LINUX-64-PIC: 	leaq	lptr(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5332,19 +5332,19 @@ entry:
 define i8* @har00() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @src to i8*)
-; LINUX-64-STATIC: har00:
+; LINUX-64-STATIC-LABEL: har00:
 ; LINUX-64-STATIC: movl    $src, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har00:
+; LINUX-32-STATIC-LABEL: har00:
 ; LINUX-32-STATIC: 	movl	$src, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har00:
+; LINUX-32-PIC-LABEL: har00:
 ; LINUX-32-PIC: 	movl	$src, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har00:
+; LINUX-64-PIC-LABEL: har00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5379,19 +5379,19 @@ entry:
 define i8* @hxr00() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
-; LINUX-64-STATIC: hxr00:
+; LINUX-64-STATIC-LABEL: hxr00:
 ; LINUX-64-STATIC: movl    $xsrc, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: hxr00:
+; LINUX-32-STATIC-LABEL: hxr00:
 ; LINUX-32-STATIC: 	movl	$xsrc, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: hxr00:
+; LINUX-32-PIC-LABEL: hxr00:
 ; LINUX-32-PIC: 	movl	$xsrc, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: hxr00:
+; LINUX-64-PIC-LABEL: hxr00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5426,19 +5426,19 @@ entry:
 define i8* @har01() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dst to i8*)
-; LINUX-64-STATIC: har01:
+; LINUX-64-STATIC-LABEL: har01:
 ; LINUX-64-STATIC: movl    $dst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har01:
+; LINUX-32-STATIC-LABEL: har01:
 ; LINUX-32-STATIC: 	movl	$dst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har01:
+; LINUX-32-PIC-LABEL: har01:
 ; LINUX-32-PIC: 	movl	$dst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har01:
+; LINUX-64-PIC-LABEL: har01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5473,19 +5473,19 @@ entry:
 define i8* @hxr01() nounwind {
 entry:
 	ret i8* bitcast ([32 x i32]* @xdst to i8*)
-; LINUX-64-STATIC: hxr01:
+; LINUX-64-STATIC-LABEL: hxr01:
 ; LINUX-64-STATIC: movl    $xdst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: hxr01:
+; LINUX-32-STATIC-LABEL: hxr01:
 ; LINUX-32-STATIC: 	movl	$xdst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: hxr01:
+; LINUX-32-PIC-LABEL: hxr01:
 ; LINUX-32-PIC: 	movl	$xdst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: hxr01:
+; LINUX-64-PIC-LABEL: hxr01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5522,19 +5522,19 @@ entry:
 	%0 = load i32** @ptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
-; LINUX-64-STATIC: har02:
+; LINUX-64-STATIC-LABEL: har02:
 ; LINUX-64-STATIC: movq    ptr(%rip), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har02:
+; LINUX-32-STATIC-LABEL: har02:
 ; LINUX-32-STATIC: 	movl	ptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har02:
+; LINUX-32-PIC-LABEL: har02:
 ; LINUX-32-PIC: 	movl	ptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har02:
+; LINUX-64-PIC-LABEL: har02:
 ; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -5575,19 +5575,19 @@ entry:
 define i8* @har03() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
-; LINUX-64-STATIC: har03:
+; LINUX-64-STATIC-LABEL: har03:
 ; LINUX-64-STATIC: movl    $dsrc, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har03:
+; LINUX-32-STATIC-LABEL: har03:
 ; LINUX-32-STATIC: 	movl	$dsrc, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har03:
+; LINUX-32-PIC-LABEL: har03:
 ; LINUX-32-PIC: 	movl	$dsrc, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har03:
+; LINUX-64-PIC-LABEL: har03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5622,19 +5622,19 @@ entry:
 define i8* @har04() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
-; LINUX-64-STATIC: har04:
+; LINUX-64-STATIC-LABEL: har04:
 ; LINUX-64-STATIC: movl    $ddst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har04:
+; LINUX-32-STATIC-LABEL: har04:
 ; LINUX-32-STATIC: 	movl	$ddst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har04:
+; LINUX-32-PIC-LABEL: har04:
 ; LINUX-32-PIC: 	movl	$ddst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har04:
+; LINUX-64-PIC-LABEL: har04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5671,19 +5671,19 @@ entry:
 	%0 = load i32** @dptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
-; LINUX-64-STATIC: har05:
+; LINUX-64-STATIC-LABEL: har05:
 ; LINUX-64-STATIC: movq    dptr(%rip), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har05:
+; LINUX-32-STATIC-LABEL: har05:
 ; LINUX-32-STATIC: 	movl	dptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har05:
+; LINUX-32-PIC-LABEL: har05:
 ; LINUX-32-PIC: 	movl	dptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har05:
+; LINUX-64-PIC-LABEL: har05:
 ; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -5719,19 +5719,19 @@ entry:
 define i8* @har06() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
-; LINUX-64-STATIC: har06:
+; LINUX-64-STATIC-LABEL: har06:
 ; LINUX-64-STATIC: movl    $lsrc, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har06:
+; LINUX-32-STATIC-LABEL: har06:
 ; LINUX-32-STATIC: 	movl	$lsrc, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har06:
+; LINUX-32-PIC-LABEL: har06:
 ; LINUX-32-PIC: 	movl	$lsrc, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har06:
+; LINUX-64-PIC-LABEL: har06:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5766,19 +5766,19 @@ entry:
 define i8* @har07() nounwind {
 entry:
 	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
-; LINUX-64-STATIC: har07:
+; LINUX-64-STATIC-LABEL: har07:
 ; LINUX-64-STATIC: movl    $ldst, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har07:
+; LINUX-32-STATIC-LABEL: har07:
 ; LINUX-32-STATIC: 	movl	$ldst, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har07:
+; LINUX-32-PIC-LABEL: har07:
 ; LINUX-32-PIC: 	movl	$ldst, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har07:
+; LINUX-64-PIC-LABEL: har07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5815,19 +5815,19 @@ entry:
 	%0 = load i32** @lptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
-; LINUX-64-STATIC: har08:
+; LINUX-64-STATIC-LABEL: har08:
 ; LINUX-64-STATIC: movq    lptr(%rip), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: har08:
+; LINUX-32-STATIC-LABEL: har08:
 ; LINUX-32-STATIC: 	movl	lptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: har08:
+; LINUX-32-PIC-LABEL: har08:
 ; LINUX-32-PIC: 	movl	lptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: har08:
+; LINUX-64-PIC-LABEL: har08:
 ; LINUX-64-PIC: 	movq	lptr(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -5862,19 +5862,19 @@ entry:
 define i8* @bat00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bat00:
+; LINUX-64-STATIC-LABEL: bat00:
 ; LINUX-64-STATIC: movl    $src+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat00:
+; LINUX-32-STATIC-LABEL: bat00:
 ; LINUX-32-STATIC: 	movl	$src+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat00:
+; LINUX-32-PIC-LABEL: bat00:
 ; LINUX-32-PIC: 	movl	$src+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat00:
+; LINUX-64-PIC-LABEL: bat00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -5915,19 +5915,19 @@ entry:
 define i8* @bxt00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bxt00:
+; LINUX-64-STATIC-LABEL: bxt00:
 ; LINUX-64-STATIC: movl    $xsrc+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bxt00:
+; LINUX-32-STATIC-LABEL: bxt00:
 ; LINUX-32-STATIC: 	movl	$xsrc+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bxt00:
+; LINUX-32-PIC-LABEL: bxt00:
 ; LINUX-32-PIC: 	movl	$xsrc+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bxt00:
+; LINUX-64-PIC-LABEL: bxt00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -5968,19 +5968,19 @@ entry:
 define i8* @bat01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bat01:
+; LINUX-64-STATIC-LABEL: bat01:
 ; LINUX-64-STATIC: movl    $dst+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat01:
+; LINUX-32-STATIC-LABEL: bat01:
 ; LINUX-32-STATIC: 	movl	$dst+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat01:
+; LINUX-32-PIC-LABEL: bat01:
 ; LINUX-32-PIC: 	movl	$dst+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat01:
+; LINUX-64-PIC-LABEL: bat01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6021,19 +6021,19 @@ entry:
 define i8* @bxt01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bxt01:
+; LINUX-64-STATIC-LABEL: bxt01:
 ; LINUX-64-STATIC: movl    $xdst+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bxt01:
+; LINUX-32-STATIC-LABEL: bxt01:
 ; LINUX-32-STATIC: 	movl	$xdst+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bxt01:
+; LINUX-32-PIC-LABEL: bxt01:
 ; LINUX-32-PIC: 	movl	$xdst+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bxt01:
+; LINUX-64-PIC-LABEL: bxt01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6077,22 +6077,22 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: bat02:
+; LINUX-64-STATIC-LABEL: bat02:
 ; LINUX-64-STATIC: movq    ptr(%rip), %rax
 ; LINUX-64-STATIC: addq    $64, %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat02:
+; LINUX-32-STATIC-LABEL: bat02:
 ; LINUX-32-STATIC: 	movl	ptr, %eax
 ; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat02:
+; LINUX-32-PIC-LABEL: bat02:
 ; LINUX-32-PIC: 	movl	ptr, %eax
 ; LINUX-32-PIC-NEXT: 	addl	$64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat02:
+; LINUX-64-PIC-LABEL: bat02:
 ; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
@@ -6140,19 +6140,19 @@ entry:
 define i8* @bat03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bat03:
+; LINUX-64-STATIC-LABEL: bat03:
 ; LINUX-64-STATIC: movl    $dsrc+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat03:
+; LINUX-32-STATIC-LABEL: bat03:
 ; LINUX-32-STATIC: 	movl	$dsrc+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat03:
+; LINUX-32-PIC-LABEL: bat03:
 ; LINUX-32-PIC: 	movl	$dsrc+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat03:
+; LINUX-64-PIC-LABEL: bat03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6188,19 +6188,19 @@ entry:
 define i8* @bat04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bat04:
+; LINUX-64-STATIC-LABEL: bat04:
 ; LINUX-64-STATIC: movl    $ddst+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat04:
+; LINUX-32-STATIC-LABEL: bat04:
 ; LINUX-32-STATIC: 	movl	$ddst+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat04:
+; LINUX-32-PIC-LABEL: bat04:
 ; LINUX-32-PIC: 	movl	$ddst+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat04:
+; LINUX-64-PIC-LABEL: bat04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6239,22 +6239,22 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: bat05:
+; LINUX-64-STATIC-LABEL: bat05:
 ; LINUX-64-STATIC: movq    dptr(%rip), %rax
 ; LINUX-64-STATIC: addq    $64, %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat05:
+; LINUX-32-STATIC-LABEL: bat05:
 ; LINUX-32-STATIC: 	movl	dptr, %eax
 ; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat05:
+; LINUX-32-PIC-LABEL: bat05:
 ; LINUX-32-PIC: 	movl	dptr, %eax
 ; LINUX-32-PIC-NEXT: 	addl	$64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat05:
+; LINUX-64-PIC-LABEL: bat05:
 ; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
@@ -6297,19 +6297,19 @@ entry:
 define i8* @bat06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bat06:
+; LINUX-64-STATIC-LABEL: bat06:
 ; LINUX-64-STATIC: movl    $lsrc+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat06:
+; LINUX-32-STATIC-LABEL: bat06:
 ; LINUX-32-STATIC: 	movl	$lsrc+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat06:
+; LINUX-32-PIC-LABEL: bat06:
 ; LINUX-32-PIC: 	movl	$lsrc+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat06:
+; LINUX-64-PIC-LABEL: bat06:
 ; LINUX-64-PIC: 	leaq	lsrc+64(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6344,19 +6344,19 @@ entry:
 define i8* @bat07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
-; LINUX-64-STATIC: bat07:
+; LINUX-64-STATIC-LABEL: bat07:
 ; LINUX-64-STATIC: movl    $ldst+64, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat07:
+; LINUX-32-STATIC-LABEL: bat07:
 ; LINUX-32-STATIC: 	movl	$ldst+64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat07:
+; LINUX-32-PIC-LABEL: bat07:
 ; LINUX-32-PIC: 	movl	$ldst+64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat07:
+; LINUX-64-PIC-LABEL: bat07:
 ; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6394,22 +6394,22 @@ entry:
 	%1 = getelementptr i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: bat08:
+; LINUX-64-STATIC-LABEL: bat08:
 ; LINUX-64-STATIC: movq    lptr(%rip), %rax
 ; LINUX-64-STATIC: addq    $64, %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bat08:
+; LINUX-32-STATIC-LABEL: bat08:
 ; LINUX-32-STATIC: 	movl	lptr, %eax
 ; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bat08:
+; LINUX-32-PIC-LABEL: bat08:
 ; LINUX-32-PIC: 	movl	lptr, %eax
 ; LINUX-32-PIC-NEXT: 	addl	$64, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bat08:
+; LINUX-64-PIC-LABEL: bat08:
 ; LINUX-64-PIC: 	movq	lptr(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	addq	$64, %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6451,19 +6451,19 @@ entry:
 define i8* @bam00() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
-; LINUX-64-STATIC: bam00:
+; LINUX-64-STATIC-LABEL: bam00:
 ; LINUX-64-STATIC: movl    $src+262144, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam00:
+; LINUX-32-STATIC-LABEL: bam00:
 ; LINUX-32-STATIC: 	movl	$src+262144, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam00:
+; LINUX-32-PIC-LABEL: bam00:
 ; LINUX-32-PIC: 	movl	$src+262144, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam00:
+; LINUX-64-PIC-LABEL: bam00:
 ; LINUX-64-PIC: 	movl	$262144, %eax
 ; LINUX-64-PIC-NEXT: 	addq	src@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6504,19 +6504,19 @@ entry:
 define i8* @bam01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
-; LINUX-64-STATIC: bam01:
+; LINUX-64-STATIC-LABEL: bam01:
 ; LINUX-64-STATIC: movl    $dst+262144, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam01:
+; LINUX-32-STATIC-LABEL: bam01:
 ; LINUX-32-STATIC: 	movl	$dst+262144, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam01:
+; LINUX-32-PIC-LABEL: bam01:
 ; LINUX-32-PIC: 	movl	$dst+262144, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam01:
+; LINUX-64-PIC-LABEL: bam01:
 ; LINUX-64-PIC: 	movl	$262144, %eax
 ; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6557,19 +6557,19 @@ entry:
 define i8* @bxm01() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
-; LINUX-64-STATIC: bxm01:
+; LINUX-64-STATIC-LABEL: bxm01:
 ; LINUX-64-STATIC: movl    $xdst+262144, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bxm01:
+; LINUX-32-STATIC-LABEL: bxm01:
 ; LINUX-32-STATIC: 	movl	$xdst+262144, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bxm01:
+; LINUX-32-PIC-LABEL: bxm01:
 ; LINUX-32-PIC: 	movl	$xdst+262144, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bxm01:
+; LINUX-64-PIC-LABEL: bxm01:
 ; LINUX-64-PIC: 	movl	$262144, %eax
 ; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6613,24 +6613,24 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: bam02:
+; LINUX-64-STATIC-LABEL: bam02:
 ; LINUX-64-STATIC: movl    $262144, %eax
 ; LINUX-64-STATIC: addq    ptr(%rip), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam02:
+; LINUX-32-STATIC-LABEL: bam02:
 ; LINUX-32-STATIC: 	movl	$262144, %eax
 ; LINUX-32-STATIC-NEXT: 	addl	ptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam02:
+; LINUX-32-PIC-LABEL: bam02:
 ; LINUX-32-PIC: 	movl	$262144, %eax
 ; LINUX-32-PIC-NEXT: 	addl	ptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam02:
-; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
-; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-LABEL: bam02:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	addq	([[RCX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6655,20 +6655,20 @@ entry:
 ; DARWIN-32-PIC-NEXT: 	ret
 
 ; DARWIN-64-STATIC: _bam02:
-; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
-; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; DARWIN-64-STATIC-NEXT: 	addq	([[RCX]]), %rax
 ; DARWIN-64-STATIC-NEXT: 	ret
 
 ; DARWIN-64-DYNAMIC: _bam02:
-; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
-; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; DARWIN-64-DYNAMIC-NEXT: 	addq	([[RCX]]), %rax
 ; DARWIN-64-DYNAMIC-NEXT: 	ret
 
 ; DARWIN-64-PIC: _bam02:
-; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
-; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; DARWIN-64-PIC-NEXT: 	addq	([[RCX]]), %rax
 ; DARWIN-64-PIC-NEXT: 	ret
 }
@@ -6676,19 +6676,19 @@ entry:
 define i8* @bam03() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
-; LINUX-64-STATIC: bam03:
+; LINUX-64-STATIC-LABEL: bam03:
 ; LINUX-64-STATIC: movl    $dsrc+262144, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam03:
+; LINUX-32-STATIC-LABEL: bam03:
 ; LINUX-32-STATIC: 	movl	$dsrc+262144, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam03:
+; LINUX-32-PIC-LABEL: bam03:
 ; LINUX-32-PIC: 	movl	$dsrc+262144, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam03:
+; LINUX-64-PIC-LABEL: bam03:
 ; LINUX-64-PIC: 	movl	$262144, %eax
 ; LINUX-64-PIC-NEXT: 	addq	dsrc@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6724,19 +6724,19 @@ entry:
 define i8* @bam04() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
-; LINUX-64-STATIC: bam04:
+; LINUX-64-STATIC-LABEL: bam04:
 ; LINUX-64-STATIC: movl    $ddst+262144, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam04:
+; LINUX-32-STATIC-LABEL: bam04:
 ; LINUX-32-STATIC: 	movl	$ddst+262144, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam04:
+; LINUX-32-PIC-LABEL: bam04:
 ; LINUX-32-PIC: 	movl	$ddst+262144, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam04:
+; LINUX-64-PIC-LABEL: bam04:
 ; LINUX-64-PIC: 	movl	$262144, %eax
 ; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6775,24 +6775,24 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: bam05:
+; LINUX-64-STATIC-LABEL: bam05:
 ; LINUX-64-STATIC: movl    $262144, %eax
 ; LINUX-64-STATIC: addq    dptr(%rip), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam05:
+; LINUX-32-STATIC-LABEL: bam05:
 ; LINUX-32-STATIC: 	movl	$262144, %eax
 ; LINUX-32-STATIC-NEXT: 	addl	dptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam05:
+; LINUX-32-PIC-LABEL: bam05:
 ; LINUX-32-PIC: 	movl	$262144, %eax
 ; LINUX-32-PIC-NEXT: 	addl	dptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam05:
-; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
-; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-LABEL: bam05:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), [[RCX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	addq	([[RCX]]), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6833,19 +6833,19 @@ entry:
 define i8* @bam06() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
-; LINUX-64-STATIC: bam06:
+; LINUX-64-STATIC-LABEL: bam06:
 ; LINUX-64-STATIC: movl    $lsrc+262144, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam06:
+; LINUX-32-STATIC-LABEL: bam06:
 ; LINUX-32-STATIC: 	movl	$lsrc+262144, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam06:
+; LINUX-32-PIC-LABEL: bam06:
 ; LINUX-32-PIC: 	movl	$lsrc+262144, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam06:
+; LINUX-64-PIC-LABEL: bam06:
 ; LINUX-64-PIC: 	leaq	lsrc+262144(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6880,19 +6880,19 @@ entry:
 define i8* @bam07() nounwind {
 entry:
 	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
-; LINUX-64-STATIC: bam07:
+; LINUX-64-STATIC-LABEL: bam07:
 ; LINUX-64-STATIC: movl    $ldst+262144, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam07:
+; LINUX-32-STATIC-LABEL: bam07:
 ; LINUX-32-STATIC: 	movl	$ldst+262144, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam07:
+; LINUX-32-PIC-LABEL: bam07:
 ; LINUX-32-PIC: 	movl	$ldst+262144, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam07:
+; LINUX-64-PIC-LABEL: bam07:
 ; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -6930,22 +6930,22 @@ entry:
 	%1 = getelementptr i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: bam08:
+; LINUX-64-STATIC-LABEL: bam08:
 ; LINUX-64-STATIC: movl    $262144, %eax
 ; LINUX-64-STATIC: addq    lptr(%rip), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: bam08:
+; LINUX-32-STATIC-LABEL: bam08:
 ; LINUX-32-STATIC: 	movl	$262144, %eax
 ; LINUX-32-STATIC-NEXT: 	addl	lptr, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: bam08:
+; LINUX-32-PIC-LABEL: bam08:
 ; LINUX-32-PIC: 	movl	$262144, %eax
 ; LINUX-32-PIC-NEXT: 	addl	lptr, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: bam08:
+; LINUX-64-PIC-LABEL: bam08:
 ; LINUX-64-PIC: 	movl	$262144, %eax
 ; LINUX-64-PIC-NEXT: 	addq	lptr(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -6990,21 +6990,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cat00:
+; LINUX-64-STATIC-LABEL: cat00:
 ; LINUX-64-STATIC: leaq    src+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat00:
+; LINUX-32-STATIC-LABEL: cat00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	src+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat00:
+; LINUX-32-PIC-LABEL: cat00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	src+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat00:
+; LINUX-64-PIC-LABEL: cat00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7051,21 +7051,21 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cxt00:
+; LINUX-64-STATIC-LABEL: cxt00:
 ; LINUX-64-STATIC: leaq    xsrc+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cxt00:
+; LINUX-32-STATIC-LABEL: cxt00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cxt00:
+; LINUX-32-PIC-LABEL: cxt00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cxt00:
+; LINUX-64-PIC-LABEL: cxt00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7112,21 +7112,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cat01:
+; LINUX-64-STATIC-LABEL: cat01:
 ; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat01:
+; LINUX-32-STATIC-LABEL: cat01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	dst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat01:
+; LINUX-32-PIC-LABEL: cat01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	dst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat01:
+; LINUX-64-PIC-LABEL: cat01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7173,21 +7173,21 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cxt01:
+; LINUX-64-STATIC-LABEL: cxt01:
 ; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cxt01:
+; LINUX-32-STATIC-LABEL: cxt01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cxt01:
+; LINUX-32-PIC-LABEL: cxt01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	xdst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cxt01:
+; LINUX-64-PIC-LABEL: cxt01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7235,24 +7235,24 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
-; LINUX-64-STATIC: cat02:
+; LINUX-64-STATIC-LABEL: cat02:
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat02:
+; LINUX-32-STATIC-LABEL: cat02:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat02:
+; LINUX-32-PIC-LABEL: cat02:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat02:
+; LINUX-64-PIC-LABEL: cat02:
 ; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
@@ -7306,21 +7306,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cat03:
+; LINUX-64-STATIC-LABEL: cat03:
 ; LINUX-64-STATIC: leaq    dsrc+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat03:
+; LINUX-32-STATIC-LABEL: cat03:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat03:
+; LINUX-32-PIC-LABEL: cat03:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat03:
+; LINUX-64-PIC-LABEL: cat03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7365,21 +7365,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cat04:
+; LINUX-64-STATIC-LABEL: cat04:
 ; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat04:
+; LINUX-32-STATIC-LABEL: cat04:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat04:
+; LINUX-32-PIC-LABEL: cat04:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ddst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat04:
+; LINUX-64-PIC-LABEL: cat04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7425,24 +7425,24 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
-; LINUX-64-STATIC: cat05:
+; LINUX-64-STATIC-LABEL: cat05:
 ; LINUX-64-STATIC: movq    dptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat05:
+; LINUX-32-STATIC-LABEL: cat05:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat05:
+; LINUX-32-PIC-LABEL: cat05:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat05:
+; LINUX-64-PIC-LABEL: cat05:
 ; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
@@ -7491,21 +7491,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cat06:
+; LINUX-64-STATIC-LABEL: cat06:
 ; LINUX-64-STATIC: leaq    lsrc+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat06:
+; LINUX-32-STATIC-LABEL: cat06:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat06:
+; LINUX-32-PIC-LABEL: cat06:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat06:
+; LINUX-64-PIC-LABEL: cat06:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7550,21 +7550,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cat07:
+; LINUX-64-STATIC-LABEL: cat07:
 ; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat07:
+; LINUX-32-STATIC-LABEL: cat07:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat07:
+; LINUX-32-PIC-LABEL: cat07:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ldst+64(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat07:
+; LINUX-64-PIC-LABEL: cat07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7610,24 +7610,24 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
-; LINUX-64-STATIC: cat08:
+; LINUX-64-STATIC-LABEL: cat08:
 ; LINUX-64-STATIC: movq    lptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-STATIC: leaq    64([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cat08:
+; LINUX-32-STATIC-LABEL: cat08:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cat08:
+; LINUX-32-PIC-LABEL: cat08:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	64([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cat08:
+; LINUX-64-PIC-LABEL: cat08:
 ; LINUX-64-PIC: 	movq	lptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	64([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7675,21 +7675,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cam00:
+; LINUX-64-STATIC-LABEL: cam00:
 ; LINUX-64-STATIC: leaq    src+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam00:
+; LINUX-32-STATIC-LABEL: cam00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	src+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam00:
+; LINUX-32-PIC-LABEL: cam00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	src+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam00:
+; LINUX-64-PIC-LABEL: cam00:
 ; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7736,21 +7736,21 @@ entry:
 	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cxm00:
+; LINUX-64-STATIC-LABEL: cxm00:
 ; LINUX-64-STATIC: leaq    xsrc+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cxm00:
+; LINUX-32-STATIC-LABEL: cxm00:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cxm00:
+; LINUX-32-PIC-LABEL: cxm00:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cxm00:
+; LINUX-64-PIC-LABEL: cxm00:
 ; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7797,21 +7797,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cam01:
+; LINUX-64-STATIC-LABEL: cam01:
 ; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam01:
+; LINUX-32-STATIC-LABEL: cam01:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam01:
+; LINUX-32-PIC-LABEL: cam01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	dst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam01:
+; LINUX-64-PIC-LABEL: cam01:
 ; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7858,21 +7858,21 @@ entry:
 	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cxm01:
+; LINUX-64-STATIC-LABEL: cxm01:
 ; LINUX-64-STATIC: leaq    xdst+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cxm01:
+; LINUX-32-STATIC-LABEL: cxm01:
 ; LINUX-32-STATIC: 	movl	4(%esp), %eax
 ; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cxm01:
+; LINUX-32-PIC-LABEL: cxm01:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cxm01:
+; LINUX-64-PIC-LABEL: cxm01:
 ; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -7920,24 +7920,24 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
-; LINUX-64-STATIC: cam02:
+; LINUX-64-STATIC-LABEL: cam02:
 ; LINUX-64-STATIC: movq    ptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam02:
+; LINUX-32-STATIC-LABEL: cam02:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam02:
+; LINUX-32-PIC-LABEL: cam02:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	ptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam02:
+; LINUX-64-PIC-LABEL: cam02:
 ; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
@@ -7991,21 +7991,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cam03:
+; LINUX-64-STATIC-LABEL: cam03:
 ; LINUX-64-STATIC: leaq    dsrc+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam03:
+; LINUX-32-STATIC-LABEL: cam03:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam03:
+; LINUX-32-PIC-LABEL: cam03:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam03:
+; LINUX-64-PIC-LABEL: cam03:
 ; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -8050,21 +8050,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cam04:
+; LINUX-64-STATIC-LABEL: cam04:
 ; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam04:
+; LINUX-32-STATIC-LABEL: cam04:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam04:
+; LINUX-32-PIC-LABEL: cam04:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam04:
+; LINUX-64-PIC-LABEL: cam04:
 ; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -8110,24 +8110,24 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
-; LINUX-64-STATIC: cam05:
+; LINUX-64-STATIC-LABEL: cam05:
 ; LINUX-64-STATIC: movq    dptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam05:
+; LINUX-32-STATIC-LABEL: cam05:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam05:
+; LINUX-32-PIC-LABEL: cam05:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	dptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam05:
+; LINUX-64-PIC-LABEL: cam05:
 ; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	([[RAX]]), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
@@ -8176,21 +8176,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cam06:
+; LINUX-64-STATIC-LABEL: cam06:
 ; LINUX-64-STATIC: leaq    lsrc+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam06:
+; LINUX-32-STATIC-LABEL: cam06:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam06:
+; LINUX-32-PIC-LABEL: cam06:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam06:
+; LINUX-64-PIC-LABEL: cam06:
 ; LINUX-64-PIC: 	leaq	lsrc(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -8235,21 +8235,21 @@ entry:
 	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
-; LINUX-64-STATIC: cam07:
+; LINUX-64-STATIC-LABEL: cam07:
 ; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam07:
+; LINUX-32-STATIC-LABEL: cam07:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam07:
+; LINUX-32-PIC-LABEL: cam07:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam07:
+; LINUX-64-PIC-LABEL: cam07:
 ; LINUX-64-PIC: 	leaq	ldst(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -8295,24 +8295,24 @@ entry:
 	%2 = getelementptr i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
 	ret i8* %3
-; LINUX-64-STATIC: cam08:
+; LINUX-64-STATIC-LABEL: cam08:
 ; LINUX-64-STATIC: movq    lptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-STATIC: leaq    262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: cam08:
+; LINUX-32-STATIC-LABEL: cam08:
 ; LINUX-32-STATIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-STATIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: cam08:
+; LINUX-32-PIC-LABEL: cam08:
 ; LINUX-32-PIC: 	movl	4(%esp), [[EAX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	movl	lptr, [[ECX:%e.x]]
 ; LINUX-32-PIC-NEXT: 	leal	262144([[ECX]],[[EAX]],4), %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: cam08:
+; LINUX-64-PIC-LABEL: cam08:
 ; LINUX-64-PIC: 	movq	lptr(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	leaq	262144([[RAX]],%rdi,4), %rax
 ; LINUX-64-PIC-NEXT: 	ret
@@ -8364,7 +8364,7 @@ entry:
 	call void @x() nounwind
 	call void @x() nounwind
 	ret void
-; LINUX-64-STATIC: lcallee:
+; LINUX-64-STATIC-LABEL: lcallee:
 ; LINUX-64-STATIC: callq   x
 ; LINUX-64-STATIC: callq   x
 ; LINUX-64-STATIC: callq   x
@@ -8374,7 +8374,7 @@ entry:
 ; LINUX-64-STATIC: callq   x
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: lcallee:
+; LINUX-32-STATIC-LABEL: lcallee:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	x
 ; LINUX-32-STATIC-NEXT: 	calll	x
@@ -8386,7 +8386,7 @@ entry:
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: lcallee:
+; LINUX-32-PIC-LABEL: lcallee:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	x
 ; LINUX-32-PIC-NEXT: 	calll	x
@@ -8399,7 +8399,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: lcallee:
+; LINUX-64-PIC-LABEL: lcallee:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
 ; LINUX-64-PIC-NEXT: 	callq	x@PLT
@@ -8496,7 +8496,7 @@ entry:
 	call void @y() nounwind
 	call void @y() nounwind
 	ret void
-; LINUX-64-STATIC: dcallee:
+; LINUX-64-STATIC-LABEL: dcallee:
 ; LINUX-64-STATIC: callq   y
 ; LINUX-64-STATIC: callq   y
 ; LINUX-64-STATIC: callq   y
@@ -8506,7 +8506,7 @@ entry:
 ; LINUX-64-STATIC: callq   y
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: dcallee:
+; LINUX-32-STATIC-LABEL: dcallee:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	y
 ; LINUX-32-STATIC-NEXT: 	calll	y
@@ -8518,7 +8518,7 @@ entry:
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: dcallee:
+; LINUX-32-PIC-LABEL: dcallee:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	y
 ; LINUX-32-PIC-NEXT: 	calll	y
@@ -8531,7 +8531,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: dcallee:
+; LINUX-64-PIC-LABEL: dcallee:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
 ; LINUX-64-PIC-NEXT: 	callq	y@PLT
@@ -8621,19 +8621,19 @@ declare void @y()
 define void ()* @address() nounwind {
 entry:
 	ret void ()* @callee
-; LINUX-64-STATIC: address:
+; LINUX-64-STATIC-LABEL: address:
 ; LINUX-64-STATIC: movl    $callee, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: address:
+; LINUX-32-STATIC-LABEL: address:
 ; LINUX-32-STATIC: 	movl	$callee, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: address:
+; LINUX-32-PIC-LABEL: address:
 ; LINUX-32-PIC: 	movl	$callee, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: address:
+; LINUX-64-PIC-LABEL: address:
 ; LINUX-64-PIC: 	movq	callee@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -8670,19 +8670,19 @@ declare void @callee()
 define void ()* @laddress() nounwind {
 entry:
 	ret void ()* @lcallee
-; LINUX-64-STATIC: laddress:
+; LINUX-64-STATIC-LABEL: laddress:
 ; LINUX-64-STATIC: movl    $lcallee, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: laddress:
+; LINUX-32-STATIC-LABEL: laddress:
 ; LINUX-32-STATIC: 	movl	$lcallee, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: laddress:
+; LINUX-32-PIC-LABEL: laddress:
 ; LINUX-32-PIC: 	movl	$lcallee, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: laddress:
+; LINUX-64-PIC-LABEL: laddress:
 ; LINUX-64-PIC: 	movq	lcallee@GOTPCREL(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -8717,19 +8717,19 @@ entry:
 define void ()* @daddress() nounwind {
 entry:
 	ret void ()* @dcallee
-; LINUX-64-STATIC: daddress:
+; LINUX-64-STATIC-LABEL: daddress:
 ; LINUX-64-STATIC: movl    $dcallee, %eax
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: daddress:
+; LINUX-32-STATIC-LABEL: daddress:
 ; LINUX-32-STATIC: 	movl	$dcallee, %eax
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: daddress:
+; LINUX-32-PIC-LABEL: daddress:
 ; LINUX-32-PIC: 	movl	$dcallee, %eax
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: daddress:
+; LINUX-64-PIC-LABEL: daddress:
 ; LINUX-64-PIC: 	leaq	dcallee(%rip), %rax
 ; LINUX-64-PIC-NEXT: 	ret
 
@@ -8766,19 +8766,19 @@ entry:
 	call void @callee() nounwind
 	call void @callee() nounwind
 	ret void
-; LINUX-64-STATIC: caller:
+; LINUX-64-STATIC-LABEL: caller:
 ; LINUX-64-STATIC: callq   callee
 ; LINUX-64-STATIC: callq   callee
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: caller:
+; LINUX-32-STATIC-LABEL: caller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	callee
 ; LINUX-32-STATIC-NEXT: 	calll	callee
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: caller:
+; LINUX-32-PIC-LABEL: caller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	callee
 ; LINUX-32-PIC-NEXT: 	calll	callee
@@ -8786,7 +8786,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: caller:
+; LINUX-64-PIC-LABEL: caller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
@@ -8841,19 +8841,19 @@ entry:
 	call void @dcallee() nounwind
 	call void @dcallee() nounwind
 	ret void
-; LINUX-64-STATIC: dcaller:
+; LINUX-64-STATIC-LABEL: dcaller:
 ; LINUX-64-STATIC: callq   dcallee
 ; LINUX-64-STATIC: callq   dcallee
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: dcaller:
+; LINUX-32-STATIC-LABEL: dcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	dcallee
 ; LINUX-32-STATIC-NEXT: 	calll	dcallee
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: dcaller:
+; LINUX-32-PIC-LABEL: dcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	dcallee
 ; LINUX-32-PIC-NEXT: 	calll	dcallee
@@ -8861,7 +8861,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: dcaller:
+; LINUX-64-PIC-LABEL: dcaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
@@ -8916,19 +8916,19 @@ entry:
 	call void @lcallee() nounwind
 	call void @lcallee() nounwind
 	ret void
-; LINUX-64-STATIC: lcaller:
+; LINUX-64-STATIC-LABEL: lcaller:
 ; LINUX-64-STATIC: callq   lcallee
 ; LINUX-64-STATIC: callq   lcallee
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: lcaller:
+; LINUX-32-STATIC-LABEL: lcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	lcallee
 ; LINUX-32-STATIC-NEXT: 	calll	lcallee
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: lcaller:
+; LINUX-32-PIC-LABEL: lcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	lcallee
 ; LINUX-32-PIC-NEXT: 	calll	lcallee
@@ -8936,7 +8936,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: lcaller:
+; LINUX-64-PIC-LABEL: lcaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
@@ -8990,24 +8990,24 @@ define void @tailcaller() nounwind {
 entry:
 	call void @callee() nounwind
 	ret void
-; LINUX-64-STATIC: tailcaller:
+; LINUX-64-STATIC-LABEL: tailcaller:
 ; LINUX-64-STATIC: callq   callee
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: tailcaller:
+; LINUX-32-STATIC-LABEL: tailcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	callee
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: tailcaller:
+; LINUX-32-PIC-LABEL: tailcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	callee
 ; LINUX-32-PIC-NEXT: 	addl
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: tailcaller:
+; LINUX-64-PIC-LABEL: tailcaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	callee@PLT
 ; LINUX-64-PIC-NEXT: 	popq
@@ -9054,24 +9054,24 @@ define void @dtailcaller() nounwind {
 entry:
 	call void @dcallee() nounwind
 	ret void
-; LINUX-64-STATIC: dtailcaller:
+; LINUX-64-STATIC-LABEL: dtailcaller:
 ; LINUX-64-STATIC: callq   dcallee
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: dtailcaller:
+; LINUX-32-STATIC-LABEL: dtailcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	dcallee
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: dtailcaller:
+; LINUX-32-PIC-LABEL: dtailcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	dcallee
 ; LINUX-32-PIC-NEXT: 	addl
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: dtailcaller:
+; LINUX-64-PIC-LABEL: dtailcaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	dcallee
 ; LINUX-64-PIC-NEXT: 	popq
@@ -9118,24 +9118,24 @@ define void @ltailcaller() nounwind {
 entry:
 	call void @lcallee() nounwind
 	ret void
-; LINUX-64-STATIC: ltailcaller:
+; LINUX-64-STATIC-LABEL: ltailcaller:
 ; LINUX-64-STATIC: callq   lcallee
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ltailcaller:
+; LINUX-32-STATIC-LABEL: ltailcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	lcallee
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ltailcaller:
+; LINUX-32-PIC-LABEL: ltailcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	lcallee
 ; LINUX-32-PIC-NEXT: 	addl
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ltailcaller:
+; LINUX-64-PIC-LABEL: ltailcaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
 ; LINUX-64-PIC-NEXT: 	popq
@@ -9185,19 +9185,19 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	call void %1() nounwind
 	ret void
-; LINUX-64-STATIC: icaller:
+; LINUX-64-STATIC-LABEL: icaller:
 ; LINUX-64-STATIC: callq   *ifunc
 ; LINUX-64-STATIC: callq   *ifunc
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: icaller:
+; LINUX-32-STATIC-LABEL: icaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	*ifunc
 ; LINUX-32-STATIC-NEXT: 	calll	*ifunc
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: icaller:
+; LINUX-32-PIC-LABEL: icaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	*ifunc
 ; LINUX-32-PIC-NEXT: 	calll	*ifunc
@@ -9205,7 +9205,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: icaller:
+; LINUX-64-PIC-LABEL: icaller:
 ; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
@@ -9275,19 +9275,19 @@ entry:
 	%1 = load void ()** @difunc, align 8
 	call void %1() nounwind
 	ret void
-; LINUX-64-STATIC: dicaller:
+; LINUX-64-STATIC-LABEL: dicaller:
 ; LINUX-64-STATIC: callq   *difunc
 ; LINUX-64-STATIC: callq   *difunc
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: dicaller:
+; LINUX-32-STATIC-LABEL: dicaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	*difunc
 ; LINUX-32-STATIC-NEXT: 	calll	*difunc
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: dicaller:
+; LINUX-32-PIC-LABEL: dicaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	*difunc
 ; LINUX-32-PIC-NEXT: 	calll	*difunc
@@ -9295,7 +9295,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: dicaller:
+; LINUX-64-PIC-LABEL: dicaller:
 ; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), [[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
@@ -9358,19 +9358,19 @@ entry:
 	%1 = load void ()** @lifunc, align 8
 	call void %1() nounwind
 	ret void
-; LINUX-64-STATIC: licaller:
+; LINUX-64-STATIC-LABEL: licaller:
 ; LINUX-64-STATIC: callq   *lifunc
 ; LINUX-64-STATIC: callq   *lifunc
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: licaller:
+; LINUX-32-STATIC-LABEL: licaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	*lifunc
 ; LINUX-32-STATIC-NEXT: 	calll	*lifunc
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: licaller:
+; LINUX-32-PIC-LABEL: licaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	*lifunc
 ; LINUX-32-PIC-NEXT: 	calll	*lifunc
@@ -9378,7 +9378,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: licaller:
+; LINUX-64-PIC-LABEL: licaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
@@ -9440,19 +9440,19 @@ entry:
 	%1 = load void ()** @ifunc, align 8
 	call void %1() nounwind
 	ret void
-; LINUX-64-STATIC: itailcaller:
+; LINUX-64-STATIC-LABEL: itailcaller:
 ; LINUX-64-STATIC: callq   *ifunc
 ; LINUX-64-STATIC: callq   *ifunc
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: itailcaller:
+; LINUX-32-STATIC-LABEL: itailcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	*ifunc
 ; LINUX-32-STATIC-NEXT: 	calll	*ifunc
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: itailcaller:
+; LINUX-32-PIC-LABEL: itailcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	*ifunc
 ; LINUX-32-PIC-NEXT: 	calll	*ifunc
@@ -9460,7 +9460,7 @@ entry:
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: itailcaller:
+; LINUX-64-PIC-LABEL: itailcaller:
 ; LINUX-64-PIC: 	pushq	[[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), [[RBX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	callq	*([[RBX]])
@@ -9528,24 +9528,24 @@ entry:
 	%0 = load void ()** @difunc, align 8
 	call void %0() nounwind
 	ret void
-; LINUX-64-STATIC: ditailcaller:
+; LINUX-64-STATIC-LABEL: ditailcaller:
 ; LINUX-64-STATIC: callq   *difunc
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: ditailcaller:
+; LINUX-32-STATIC-LABEL: ditailcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	*difunc
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: ditailcaller:
+; LINUX-32-PIC-LABEL: ditailcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	*difunc
 ; LINUX-32-PIC-NEXT: 	addl
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: ditailcaller:
+; LINUX-64-PIC-LABEL: ditailcaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), [[RAX:%r.x]]
 ; LINUX-64-PIC-NEXT: 	callq	*([[RAX]])
@@ -9596,24 +9596,24 @@ entry:
 	%0 = load void ()** @lifunc, align 8
 	call void %0() nounwind
 	ret void
-; LINUX-64-STATIC: litailcaller:
+; LINUX-64-STATIC-LABEL: litailcaller:
 ; LINUX-64-STATIC: callq   *lifunc
 ; LINUX-64-STATIC: ret
 
-; LINUX-32-STATIC: litailcaller:
+; LINUX-32-STATIC-LABEL: litailcaller:
 ; LINUX-32-STATIC: 	subl
 ; LINUX-32-STATIC-NEXT: 	calll	*lifunc
 ; LINUX-32-STATIC-NEXT: 	addl
 ; LINUX-32-STATIC-NEXT: 	ret
 
-; LINUX-32-PIC: litailcaller:
+; LINUX-32-PIC-LABEL: litailcaller:
 ; LINUX-32-PIC: 	subl
 ; LINUX-32-PIC-NEXT: 	calll	*lifunc
 ; LINUX-32-PIC-NEXT: 	addl
 
 ; LINUX-32-PIC-NEXT: 	ret
 
-; LINUX-64-PIC: litailcaller:
+; LINUX-64-PIC-LABEL: litailcaller:
 ; LINUX-64-PIC: 	pushq
 ; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
 ; LINUX-64-PIC-NEXT: 	popq
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
index 4e30f2b05a89..1513fcba774b 100644
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -3,7 +3,7 @@
 
 define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: cmpl %ecx, %eax
 ; CHECK-NOT: addl
 ; CHECK: adcl $0, %eax
@@ -15,7 +15,7 @@ entry:
 }
 
 ; Instcombine transforms test1 into test2:
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movl
 ; CHECK-NEXT: addl
 ; CHECK-NEXT: adcl $0
@@ -37,7 +37,7 @@ entry:
   %dec = sext i1 %cmp to i32
   %dec.res = add nsw i32 %dec, %res
   ret i32 %dec.res
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: cmpl
 ; CHECK: sbbl
 ; CHECK: ret
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 5fe08ed305f1..62a62a460bd7 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 inreg %a) nounwind {
   %b = add i32 %a, 128
   ret i32 %b
 ; X32: subl	$-128, %eax
-; X64: subl $-128, 
+; X64: subl $-128,
 }
 define i64 @test2(i64 inreg %a) nounwind {
   %b = add i64 %a, 2147483648
@@ -20,7 +20,7 @@ define i64 @test2(i64 inreg %a) nounwind {
 define i64 @test3(i64 inreg %a) nounwind {
   %b = add i64 %a, 128
   ret i64 %b
-  
+
 ; X32: addl $128, %eax
 ; X64: subq	$-128,
 }
@@ -38,12 +38,12 @@ normal:
 
 overflow:
   ret i1 false
-  
-; X32: test4:
+
+; X32-LABEL: test4:
 ; X32: addl
 ; X32-NEXT: jo
 
-; X64:        test4:
+; X64-LABEL:        test4:
 ; X64:          addl	%e[[A1:si|dx]], %e[[A0:di|cx]]
 ; X64-NEXT:	jo
 }
@@ -62,11 +62,11 @@ normal:
 carry:
   ret i1 false
 
-; X32: test5:
+; X32-LABEL: test5:
 ; X32: addl
 ; X32-NEXT: jb
 
-; X64:        test5:
+; X64-LABEL:        test5:
 ; X64:          addl	%e[[A1]], %e[[A0]]
 ; X64-NEXT:	jb
 }
@@ -81,13 +81,13 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
         %tmp5 = add i64 %tmp3, %A               ; <i64> [#uses=1]
         ret i64 %tmp5
 
-; X32: test6:
-; X32:	    movl 12(%esp), %edx
+; X32-LABEL: test6:
+; X32:	    movl 4(%esp), %eax
+; X32-NEXT: movl 12(%esp), %edx
 ; X32-NEXT: addl 8(%esp), %edx
-; X32-NEXT: movl 4(%esp), %eax
 ; X32-NEXT: ret
-        
-; X64: test6:
+
+; X64-LABEL: test6:
 ; X64:	shlq	$32, %r[[A1]]
 ; X64:	leaq	(%r[[A1]],%r[[A0]]), %rax
 ; X64:	ret
@@ -98,7 +98,7 @@ define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
    ret {i32, i1} %t
 }
 
-; X64: test7:
+; X64-LABEL: test7:
 ; X64: addl %e[[A1]], %e
 ; X64-NEXT: setb %dl
 ; X64: ret
@@ -117,7 +117,7 @@ entry:
     ret {i64, i1} %final1
 }
 
-; X64: test8:
+; X64-LABEL: test8:
 ; X64: addq
 ; X64-NEXT: setb
 ; X64: ret
@@ -127,7 +127,7 @@ define i32 @test9(i32 %x, i32 %y) nounwind readnone {
   %sub = sext i1 %cmp to i32
   %cond = add i32 %sub, %y
   ret i32 %cond
-; X64: test9:
+; X64-LABEL: test9:
 ; X64: cmpl $10
 ; X64: sete
 ; X64: subl
@@ -140,11 +140,11 @@ entry:
   %obit = extractvalue {i32, i1} %t, 1
   ret i1 %obit
 
-; X32: test10:
+; X32-LABEL: test10:
 ; X32: incl
 ; X32-NEXT: seto
 
-; X64: test10:
+; X64-LABEL: test10:
 ; X64: incl
 ; X64-NEXT: seto
 }
diff --git a/test/CodeGen/X86/aes_intrinsics.ll b/test/CodeGen/X86/aes_intrinsics.ll
new file mode 100644
index 000000000000..fc1a2cc61289
--- /dev/null
+++ b/test/CodeGen/X86/aes_intrinsics.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+aes,-avx | FileCheck %s
+
+define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesdec
+  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesdeclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesenc
+  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: aesenclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
+  ; CHECK: aesimc
+  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
+  ; CHECK: aeskeygenassist
+  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/alias-error.ll b/test/CodeGen/X86/alias-error.ll
new file mode 100644
index 000000000000..8f01dcff2bf9
--- /dev/null
+++ b/test/CodeGen/X86/alias-error.ll
@@ -0,0 +1,5 @@
+; RUN: not llc -mtriple=i686-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck %s
+
+@a = external global i32
+@b = alias i32* @a
+; CHECK: b: Target doesn't support aliases to declarations
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index f92027998a40..d0a262d390da 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -1,26 +1,38 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
-; RUN: grep globl %t | count 6
-; RUN: grep weak %t  | count 1
-; RUN: grep hidden %t | count 1
-; RUN: grep protected %t | count 1
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false | FileCheck %s
 
-@bar = external global i32
+@bar = global i32 42
+
+; CHECK-DAG: .globl	foo1
 @foo1 = alias i32* @bar
+
+; CHECK-DAG: .globl	foo2
 @foo2 = alias i32* @bar
 
 %FunTy = type i32()
 
-declare i32 @foo_f()
+define i32 @foo_f() {
+  ret i32 0
+}
+; CHECK-DAG: .weak	bar_f
 @bar_f = alias weak %FunTy* @foo_f
 
+@bar_l = alias linkonce_odr i32* @bar
+; CHECK-DAG: .weak	bar_l
+
 @bar_i = alias internal i32* @bar
 
+; CHECK-DAG: .globl	A
 @A = alias bitcast (i32* @bar to i64*)
 
+; CHECK-DAG: .globl	bar_h
+; CHECK-DAG: .hidden	bar_h
 @bar_h = hidden alias i32* @bar
 
+; CHECK-DAG: .globl	bar_p
+; CHECK-DAG: .protected	bar_p
 @bar_p = protected alias i32* @bar
 
+; CHECK-DAG: .globl	test
 define i32 @test() {
 entry:
    %tmp = load i32* @foo1
diff --git a/test/CodeGen/X86/alloca-align-rounding-32.ll b/test/CodeGen/X86/alloca-align-rounding-32.ll
index a45284e10cf4..2b5a205086e9 100644
--- a/test/CodeGen/X86/alloca-align-rounding-32.ll
+++ b/test/CodeGen/X86/alloca-align-rounding-32.ll
@@ -16,5 +16,5 @@ define void @foo2(i32 %h) {
   ret void
 ; CHECK: foo2
 ; CHECK: andl $-32, %esp
-; CHECK: andl $-32, %eax
+; CHECK: andl $-32, %e{{..}}
 }
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 3d76fb0aa25b..74b9470db752 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll
index 38db88af12c2..70c24615a7e2 100644
--- a/test/CodeGen/X86/and-su.ll
+++ b/test/CodeGen/X86/and-su.ll
@@ -3,7 +3,7 @@
 ; Don't duplicate the load.
 
 define fastcc i32 @foo(i32* %p) nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: andl $10, %eax
 ; CHECK: je
 	%t0 = load i32* %p
@@ -18,7 +18,7 @@ bb76:
 
 define fastcc double @bar(i32 %hash, double %x, double %y) nounwind {
 entry:
-; CHECK: bar:
+; CHECK-LABEL: bar:
   %0 = and i32 %hash, 15
   %1 = icmp ult i32 %0, 8
   br i1 %1, label %bb11, label %bb10
diff --git a/test/CodeGen/X86/anyregcc-crash.ll b/test/CodeGen/X86/anyregcc-crash.ll
new file mode 100644
index 000000000000..cf6f6edb31a8
--- /dev/null
+++ b/test/CodeGen/X86/anyregcc-crash.ll
@@ -0,0 +1,17 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin 2>&1 | FileCheck %s
+;
+; Check that misuse of anyregcc results in a compile time error.
+
+; CHECK: LLVM ERROR: ran out of registers during register allocation
+define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
+                        i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
+                        i64 %v13, i64 %v14, i64 %v15, i64 %v16) {
+entry:
+  %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16,
+                i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
+                i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
+                i64 %v13, i64 %v14, i64 %v15, i64 %v16)
+  ret i64 %result
+}
+
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll
new file mode 100644
index 000000000000..8109f879f217
--- /dev/null
+++ b/test/CodeGen/X86/anyregcc.ll
@@ -0,0 +1,348 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Stackmap Header: no constants - 6 callsites
+; CHECK-LABEL: .section	__LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .long   0
+; Num Constants
+; CHECK-NEXT:   .long   0
+; Num Callsites
+; CHECK-NEXT:   .long   8
+
+; test
+; CHECK-NEXT:   .long   0
+; CHECK-LABEL:  .long   L{{.*}}-_test
+; CHECK-NEXT:   .short  0
+; 3 locations
+; CHECK-NEXT:   .short  3
+; Loc 0: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Constant 3
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long 3
+define i64 @test() nounwind ssp uwtable {
+entry:
+  call anyregcc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3)
+  ret i64 0
+}
+
+; property access 1 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-NEXT:   .long   1
+; CHECK-LABEL:  .long   L{{.*}}-_property_access1
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 1, i32 15, i8* %f, i32 1, i8* %obj)
+  ret i64 %ret
+}
+
+; property access 2 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-NEXT:   .long   2
+; CHECK-LABEL:  .long   L{{.*}}-_property_access2
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access2() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %f, i32 1, i64* %obj)
+  ret i64 %ret
+}
+
+; property access 3 - %obj is a frame index
+; CHECK-NEXT:   .long   3
+; CHECK-LABEL:  .long   L{{.*}}-_property_access3
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register <-- this will be folded once folding for FI is implemented
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access3() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 3, i32 15, i8* %f, i32 0, i64* %obj)
+  ret i64 %ret
+}
+
+; anyreg_test1
+; CHECK-NEXT:   .long   4
+; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test1
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; anyreg_test2
+; CHECK-NEXT:   .long   5
+; CHECK-LABEL:  .long   L{{.*}}-_anyreg_test2
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 12297829382473034410 to i8*
+  %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; Test spilling the return value of an anyregcc call.
+;
+; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a non-store!"
+;
+; CHECK-LABEL: .long 12
+; CHECK-LABEL: .long L{{.*}}-_patchpoint_spilldef
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 3
+; Loc 0: Register (some register that will be spilled to the stack)
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Register RDI
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 5
+; CHECK-NEXT: .long  0
+; Loc 1: Register RSI
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 4
+; CHECK-NEXT: .long  0
+define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+  tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  ret i64 %result
+}
+
+; Test spilling the arguments of an anyregcc call.
+;
+; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
+;
+; CHECK-LABEL: .long 13
+; CHECK-LABEL: .long L{{.*}}-_patchpoint_spillargs
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 5
+; Loc 0: Return a register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Arg0 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 2: Arg1 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 3: Arg2 spilled to RBP +
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 7
+; CHECK-NEXT: .long  {{[0-9]+}}
+; Loc 4: Arg3 spilled to RBP +
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 7
+; CHECK-NEXT: .long  {{[0-9]+}}
+define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  ret i64 %result
+}
+
+declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index aaedf18481b5..4ba1e21b8a15 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -2,11 +2,11 @@
 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: leaq    (%rdi), %rax
 ; CHECK-NEXT: movl    %esi, %ecx
 ; CHECK-NEXT: monitor
-; WIN64: foo:
+; WIN64-LABEL: foo:
 ; WIN64:      leaq    (%rcx), %rax
 ; WIN64-NEXT: movl    %edx, %ecx
 ; WIN64-NEXT: movl    %r8d, %edx
@@ -19,11 +19,11 @@ entry:
 
 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
 
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: movl    %edi, %ecx
 ; CHECK-NEXT: movl    %esi, %eax
 ; CHECK-NEXT: mwait
-; WIN64: bar:
+; WIN64-LABEL: bar:
 ; WIN64:      movl    %edx, %eax
 ; WIN64-NEXT: mwait
 define void @bar(i32 %E, i32 %H) nounwind {
diff --git a/test/CodeGen/X86/asm-global-imm.ll b/test/CodeGen/X86/asm-global-imm.ll
index 6c569d624e06..ebf585a39a28 100644
--- a/test/CodeGen/X86/asm-global-imm.ll
+++ b/test/CodeGen/X86/asm-global-imm.ll
@@ -7,7 +7,7 @@ target triple = "i686-apple-darwin9.0.0d2"
 @str = external global [12 x i8]		; <[12 x i8]*> [#uses=1]
 
 define void @foo() {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NOT: ret
 ; CHECK: test1 $_GV
 ; CHECK-NOT: ret
diff --git a/test/CodeGen/X86/asm-modifier-P.ll b/test/CodeGen/X86/asm-modifier-P.ll
index 6139da8c3685..0aa55556d8f3 100644
--- a/test/CodeGen/X86/asm-modifier-P.ll
+++ b/test/CodeGen/X86/asm-modifier-P.ll
@@ -21,20 +21,20 @@ define void @test1() nounwind {
 entry:
 ; P suffix removes (rip) in -static 64-bit mode.
 
-; CHECK-PIC-64: test1:
+; CHECK-PIC-64-LABEL: test1:
 ; CHECK-PIC-64: movq	G@GOTPCREL(%rip), %rax
 ; CHECK-PIC-64: frob (%rax) x
 ; CHECK-PIC-64: frob (%rax) x
 
-; CHECK-STATIC-64: test1:
+; CHECK-STATIC-64-LABEL: test1:
 ; CHECK-STATIC-64: frob G(%rip) x
 ; CHECK-STATIC-64: frob G x
 
-; CHECK-PIC-32: test1:
+; CHECK-PIC-32-LABEL: test1:
 ; CHECK-PIC-32: frob G x
 ; CHECK-PIC-32: frob G x
 
-; CHECK-STATIC-32: test1:
+; CHECK-STATIC-32-LABEL: test1:
 ; CHECK-STATIC-32: frob G x
 ; CHECK-STATIC-32: frob G x
 
@@ -45,25 +45,25 @@ entry:
 
 define void @test3() nounwind {
 entry:
-; CHECK-STATIC-64: test3:
+; CHECK-STATIC-64-LABEL: test3:
 ; CHECK-STATIC-64: call bar
 ; CHECK-STATIC-64: call test3
 ; CHECK-STATIC-64: call $bar
 ; CHECK-STATIC-64: call $test3
 
-; CHECK-STATIC-32: test3:
+; CHECK-STATIC-32-LABEL: test3:
 ; CHECK-STATIC-32: call bar
 ; CHECK-STATIC-32: call test3
 ; CHECK-STATIC-32: call $bar
 ; CHECK-STATIC-32: call $test3
 
-; CHECK-PIC-64: test3:
+; CHECK-PIC-64-LABEL: test3:
 ; CHECK-PIC-64: call bar@PLT
 ; CHECK-PIC-64: call test3@PLT
 ; CHECK-PIC-64: call $bar
 ; CHECK-PIC-64: call $test3
 
-; CHECK-PIC-32: test3:
+; CHECK-PIC-32-LABEL: test3:
 ; CHECK-PIC-32: call bar@PLT
 ; CHECK-PIC-32: call test3@PLT
 ; CHECK-PIC-32: call $bar
diff --git a/test/CodeGen/X86/asm-modifier.ll b/test/CodeGen/X86/asm-modifier.ll
index 44f972ec7198..47b185a15766 100644
--- a/test/CodeGen/X86/asm-modifier.ll
+++ b/test/CodeGen/X86/asm-modifier.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin9.6"
 
 define i32 @test1() nounwind {
 entry:
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: movw	%gs:6, %ax
   %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
   %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
@@ -14,7 +14,7 @@ entry:
 
 define zeroext i16 @test2(i32 %address) nounwind {
 entry:
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movw	%gs:(%eax), %ax
   %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
   ret i16 %asmtmp
@@ -25,7 +25,7 @@ entry:
 
 define void @test3() nounwind {
 entry:
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: movl _n, %eax
   call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @n) nounwind
   ret void
@@ -33,7 +33,7 @@ entry:
 
 define void @test4() nounwind {
 entry:
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: movl	L_y$non_lazy_ptr, %ecx
 ; CHECK: movl (%ecx), %eax
   call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @y) nounwind
diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
index a3bbea3c996b..d1b52a4ec3bb 100644
--- a/test/CodeGen/X86/atom-bypass-slow-division-64.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -3,9 +3,10 @@
 ; Additional tests for 64-bit divide bypass
 
 define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
-; CHECK: Test_get_quotient:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-LABEL: Test_get_quotient:
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: ret
@@ -16,9 +17,10 @@ define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
 }
 
 define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
-; CHECK: Test_get_remainder:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-LABEL: Test_get_remainder:
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: ret
@@ -29,9 +31,10 @@ define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
 }
 
 define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
-; CHECK: Test_get_quotient_and_remainder:
-; CHECK: orq %rsi, %rcx
-; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-LABEL: Test_get_quotient_and_remainder:
+; CHECK: movq %rdi, %rax
+; CHECK: orq %rsi, %rax
+; CHECK-NEXT: testq $-65536, %rax
 ; CHECK-NEXT: je
 ; CHECK: idivq
 ; CHECK: divw
diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll
index 4612940445cb..79001e5de192 100644
--- a/test/CodeGen/X86/atom-bypass-slow-division.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
 
 define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
-; CHECK: Test_get_quotient:
+; CHECK-LABEL: Test_get_quotient:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -14,7 +14,7 @@ define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
 }
 
 define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind {
-; CHECK: Test_get_remainder:
+; CHECK-LABEL: Test_get_remainder:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -27,7 +27,7 @@ define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind {
 }
 
 define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
-; CHECK: Test_get_quotient_and_remainder:
+; CHECK-LABEL: Test_get_quotient_and_remainder:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -44,7 +44,7 @@ define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
 }
 
 define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
-; CHECK: Test_use_div_and_idiv:
+; CHECK-LABEL: Test_use_div_and_idiv:
 ; CHECK: idivl
 ; CHECK: divb
 ; CHECK: divl
@@ -58,14 +58,14 @@ define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
 }
 
 define i32 @Test_use_div_imm_imm() nounwind {
-; CHECK: Test_use_div_imm_imm:
+; CHECK-LABEL: Test_use_div_imm_imm:
 ; CHECK: movl $64
   %resultdiv = sdiv i32 256, 4
   ret i32 %resultdiv
 }
 
 define i32 @Test_use_div_reg_imm(i32 %a) nounwind {
-; CHECK: Test_use_div_reg_imm:
+; CHECK-LABEL: Test_use_div_reg_imm:
 ; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
@@ -74,7 +74,7 @@ define i32 @Test_use_div_reg_imm(i32 %a) nounwind {
 }
 
 define i32 @Test_use_rem_reg_imm(i32 %a) nounwind {
-; CHECK: Test_use_rem_reg_imm:
+; CHECK-LABEL: Test_use_rem_reg_imm:
 ; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
@@ -83,7 +83,7 @@ define i32 @Test_use_rem_reg_imm(i32 %a) nounwind {
 }
 
 define i32 @Test_use_divrem_reg_imm(i32 %a) nounwind {
-; CHECK: Test_use_divrem_reg_imm:
+; CHECK-LABEL: Test_use_divrem_reg_imm:
 ; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
@@ -94,7 +94,7 @@ define i32 @Test_use_divrem_reg_imm(i32 %a) nounwind {
 }
 
 define i32 @Test_use_div_imm_reg(i32 %a) nounwind {
-; CHECK: Test_use_div_imm_reg:
+; CHECK-LABEL: Test_use_div_imm_reg:
 ; CHECK: test
 ; CHECK: idiv
 ; CHECK: divb
@@ -103,7 +103,7 @@ define i32 @Test_use_div_imm_reg(i32 %a) nounwind {
 }
 
 define i32 @Test_use_rem_imm_reg(i32 %a) nounwind {
-; CHECK: Test_use_rem_imm_reg:
+; CHECK-LABEL: Test_use_rem_imm_reg:
 ; CHECK: test
 ; CHECK: idiv
 ; CHECK: divb
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll
index 632781130d06..48f2d4c11346 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -2,13 +2,15 @@
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
 ; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux  | FileCheck -check-prefix=ATOM64 %s
 ; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
+; RUN: llc < %s -mcpu=slm -mtriple=i686-linux  | FileCheck -check-prefix=SLM32 %s
+; RUN: llc < %s -mcpu=slm -mtriple=x86_64-linux  | FileCheck -check-prefix=SLM64 %s
 
 
 ; fn_ptr.ll
 %class.A = type { i32 (...)** }
 
 define i32 @test1() #0 {
-  ;ATOM: test1
+  ;ATOM-LABEL: test1:
 entry:
   %call = tail call %class.A* @_Z3facv()
   %0 = bitcast %class.A* %call to void (%class.A*)***
@@ -20,6 +22,10 @@ entry:
   ;ATOM64: movq (%rcx), %rcx
   ;ATOM64: callq *%rcx
   ;ATOM-NOT64: callq *(%rcx)
+  ;SLM32: movl (%ecx), %ecx
+  ;SLM32: calll *%ecx
+  ;SLM64: movq (%rcx), %rcx
+  ;SLM64: callq *%rcx
   tail call void %1(%class.A* %call)
   ret i32 0
 }
@@ -30,7 +36,7 @@ declare %class.A* @_Z3facv() #1
 @p = external global void (i32)**
 
 define i32 @test2() #0 {
-  ;ATOM: test2
+  ;ATOM-LABEL: test2:
 entry:
   %0 = load void (i32)*** @p, align 8
   %1 = load void (i32)** %0, align 8
@@ -40,6 +46,10 @@ entry:
   ;ATOM64: movq (%rax), %rax
   ;ATOM64: callq *%rax
   ;ATOM-NOT64: callq *(%rax)
+  ;SLM32: movl (%eax), %eax
+  ;SLM32: calll *%eax
+  ;SLM64: movq (%rax), %rax
+  ;SLM64: callq *%rax
   tail call void %1(i32 2)
   ret i32 0
 }
diff --git a/test/CodeGen/X86/atom-lea-addw-bug.ll b/test/CodeGen/X86/atom-lea-addw-bug.ll
new file mode 100644
index 000000000000..5cda2df432fc
--- /dev/null
+++ b/test/CodeGen/X86/atom-lea-addw-bug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mcpu=atom | FileCheck %s
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target triple = "x86_64-apple-darwin12.5.0"
+
+define i32 @DoLayout() {
+entry:
+  %tmp1 = load i16* undef, align 2
+  %tmp17 = load i16* null, align 2
+  %tmp19 = load i16* undef, align 2
+  %shl = shl i16 %tmp19, 1
+  %add55 = add i16 %tmp17, %tmp1
+  %add57 = add i16 %add55, %shl
+  %conv60 = zext i16 %add57 to i32
+  %add61 = add nsw i32 %conv60, 0
+  %conv63 = and i32 %add61, 65535
+  ret i32 %conv63
+; CHECK: addw
+}
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
index 19482e13d8c8..1df1974dc494 100644
--- a/test/CodeGen/X86/atom-lea-sp.ll
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -5,13 +5,13 @@ declare void @use_arr(i8*)
 declare void @many_params(i32, i32, i32, i32, i32, i32)
 
 define void @test1() nounwind {
-; ATOM: test1:
+; ATOM-LABEL: test1:
 ; ATOM: leal -1052(%esp), %esp
 ; ATOM-NOT: sub
 ; ATOM: call
 ; ATOM: leal 1052(%esp), %esp
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: subl
 ; CHECK: call
 ; CHECK-NOT: lea
@@ -22,23 +22,23 @@ define void @test1() nounwind {
 }
 
 define void @test2() nounwind {
-; ATOM: test2:
+; ATOM-LABEL: test2:
 ; ATOM: leal -28(%esp), %esp
 ; ATOM: call
 ; ATOM: leal 28(%esp), %esp
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK-NOT: lea
   call void @many_params(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
   ret void
 }
 
 define void @test3() nounwind {
-; ATOM: test3:
+; ATOM-LABEL: test3:
 ; ATOM: leal -8(%esp), %esp
 ; ATOM: leal 8(%esp), %esp
 
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK-NOT: lea
   %x = alloca i32, align 4
   %y = alloca i32, align 4
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
index 0d97e8535824..fd18472bff8a 100644
--- a/test/CodeGen/X86/atom-sched.ll
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -1,4 +1,5 @@
 ; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
+; RUN: llc <%s -O2 -mcpu=slm -march=x86 -relocation-model=static | FileCheck -check-prefix=slm %s
 ; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
 ;
 
@@ -13,6 +14,9 @@ define void @func() nounwind uwtable {
 ; atom: imull
 ; atom-NOT: movl
 ; atom: imull
+; slm: imull
+; slm-NOT: movl
+; slm: imull
 ; CHECK: imull
 ; CHECK: movl
 ; CHECK: imull
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
index 05e630be153c..aa057577a042 100644
--- a/test/CodeGen/X86/atomic-dagsched.ll
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -34,8 +34,8 @@ dim_0_vector_pre_head.i:                          ; preds = %loop
 vector_kernel_entry.i:                            ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i
   %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ]
   %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
-  %8 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
-  %asr.iv911 = bitcast i8* %asr.iv9 to <8 x i32> addrspace(1)*
+  %8 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %asr.iv911 = addrspacecast i8* %asr.iv9 to <8 x i32> addrspace(1)*
   %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4
   %extract8vector_func.i = extractelement <8 x i32> %9, i32 0
   %extract9vector_func.i = extractelement <8 x i32> %9, i32 1
@@ -73,8 +73,8 @@ dim_0_pre_head.i:                                 ; preds = %scalarIf.i
 
 scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i
   %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ]
-  %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)*
-  %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %23 = addrspacecast i8* %asr.iv6 to i32 addrspace(1)*
+  %24 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
   %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
   %25 = load i32 addrspace(1)* %scevgep16, align 4
   %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
index 62f784f69608..1cfbc49ab1c9 100644
--- a/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -97,7 +97,7 @@ define void @atomic_maxmin_i6432() {
 @id = internal global i64 0, align 8
 
 define void @tf_bug(i8* %ptr) nounwind {
-; PIC: tf_bug:
+; PIC-LABEL: tf_bug:
 ; PIC: movl _id-L1$pb(
 ; PIC: movl (_id-L1$pb)+4(
   %tmp1 = atomicrmw add i64* @id, i64 1 seq_cst
diff --git a/test/CodeGen/X86/atomic-or.ll b/test/CodeGen/X86/atomic-or.ll
index d759beb2caa8..1687e07d57e0 100644
--- a/test/CodeGen/X86/atomic-or.ll
+++ b/test/CodeGen/X86/atomic-or.ll
@@ -7,7 +7,7 @@ entry:
   %p.addr = alloca i64*, align 8
   store i64* %p, i64** %p.addr, align 8
   %tmp = load i64** %p.addr, align 8
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: movl    $2147483648, %eax
 ; CHECK: lock
 ; CHECK-NEXT: orq %r{{.*}}, (%r{{.*}})
@@ -20,7 +20,7 @@ entry:
   %p.addr = alloca i64*, align 8
   store i64* %p, i64** %p.addr, align 8
   %tmp = load i64** %p.addr, align 8
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: lock
 ; CHECK-NEXT: orq $2147483644, (%r{{.*}})
   %0 = atomicrmw or i64* %tmp, i64 2147483644 seq_cst
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
index 6b3a6b224dba..bdd25e6a2a56 100644
--- a/test/CodeGen/X86/atomic_add.ll
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -4,7 +4,7 @@
 
 define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: sub1:
+; CHECK-LABEL: sub1:
 ; CHECK: subl
   %0 = atomicrmw sub i32* %p, i32 %v monotonic
   ret void
@@ -12,7 +12,7 @@ entry:
 
 define void @inc4(i64* nocapture %p) nounwind ssp {
 entry:
-; CHECK: inc4:
+; CHECK-LABEL: inc4:
 ; CHECK: incq
   %0 = atomicrmw add i64* %p, i64 1 monotonic
   ret void
@@ -20,7 +20,7 @@ entry:
 
 define void @add8(i64* nocapture %p) nounwind ssp {
 entry:
-; CHECK: add8:
+; CHECK-LABEL: add8:
 ; CHECK: addq $2
   %0 = atomicrmw add i64* %p, i64 2 monotonic
   ret void
@@ -28,7 +28,7 @@ entry:
 
 define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: add4:
+; CHECK-LABEL: add4:
 ; CHECK: addq
   %0 = sext i32 %v to i64		; <i64> [#uses=1]
   %1 = atomicrmw add i64* %p, i64 %0 monotonic
@@ -37,7 +37,7 @@ entry:
 
 define void @inc3(i8* nocapture %p) nounwind ssp {
 entry:
-; CHECK: inc3:
+; CHECK-LABEL: inc3:
 ; CHECK: incb
   %0 = atomicrmw add i8* %p, i8 1 monotonic
   ret void
@@ -45,7 +45,7 @@ entry:
 
 define void @add7(i8* nocapture %p) nounwind ssp {
 entry:
-; CHECK: add7:
+; CHECK-LABEL: add7:
 ; CHECK: addb $2
   %0 = atomicrmw add i8* %p, i8 2 monotonic
   ret void
@@ -53,7 +53,7 @@ entry:
 
 define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: add3:
+; CHECK-LABEL: add3:
 ; CHECK: addb
   %0 = trunc i32 %v to i8		; <i8> [#uses=1]
   %1 = atomicrmw add i8* %p, i8 %0 monotonic
@@ -62,7 +62,7 @@ entry:
 
 define void @inc2(i16* nocapture %p) nounwind ssp {
 entry:
-; CHECK: inc2:
+; CHECK-LABEL: inc2:
 ; CHECK: incw
   %0 = atomicrmw add i16* %p, i16 1 monotonic
   ret void
@@ -70,7 +70,7 @@ entry:
 
 define void @add6(i16* nocapture %p) nounwind ssp {
 entry:
-; CHECK: add6:
+; CHECK-LABEL: add6:
 ; CHECK: addw $2
   %0 = atomicrmw add i16* %p, i16 2 monotonic
   ret void
@@ -78,7 +78,7 @@ entry:
 
 define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: add2:
+; CHECK-LABEL: add2:
 ; CHECK: addw
 	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
   %1 = atomicrmw add i16* %p, i16 %0 monotonic
@@ -87,7 +87,7 @@ entry:
 
 define void @inc1(i32* nocapture %p) nounwind ssp {
 entry:
-; CHECK: inc1:
+; CHECK-LABEL: inc1:
 ; CHECK: incl
   %0 = atomicrmw add i32* %p, i32 1 monotonic
   ret void
@@ -95,7 +95,7 @@ entry:
 
 define void @add5(i32* nocapture %p) nounwind ssp {
 entry:
-; CHECK: add5:
+; CHECK-LABEL: add5:
 ; CHECK: addl $2
   %0 = atomicrmw add i32* %p, i32 2 monotonic
   ret void
@@ -103,7 +103,7 @@ entry:
 
 define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: add1:
+; CHECK-LABEL: add1:
 ; CHECK: addl
   %0 = atomicrmw add i32* %p, i32 %v monotonic
   ret void
@@ -111,7 +111,7 @@ entry:
 
 define void @dec4(i64* nocapture %p) nounwind ssp {
 entry:
-; CHECK: dec4:
+; CHECK-LABEL: dec4:
 ; CHECK: decq
   %0 = atomicrmw sub i64* %p, i64 1 monotonic
   ret void
@@ -119,7 +119,7 @@ entry:
 
 define void @sub8(i64* nocapture %p) nounwind ssp {
 entry:
-; CHECK: sub8:
+; CHECK-LABEL: sub8:
 ; CHECK: subq $2
   %0 = atomicrmw sub i64* %p, i64 2 monotonic
   ret void
@@ -127,7 +127,7 @@ entry:
 
 define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: sub4:
+; CHECK-LABEL: sub4:
 ; CHECK: subq
 	%0 = sext i32 %v to i64		; <i64> [#uses=1]
   %1 = atomicrmw sub i64* %p, i64 %0 monotonic
@@ -136,7 +136,7 @@ entry:
 
 define void @dec3(i8* nocapture %p) nounwind ssp {
 entry:
-; CHECK: dec3:
+; CHECK-LABEL: dec3:
 ; CHECK: decb
   %0 = atomicrmw sub i8* %p, i8 1 monotonic
   ret void
@@ -144,7 +144,7 @@ entry:
 
 define void @sub7(i8* nocapture %p) nounwind ssp {
 entry:
-; CHECK: sub7:
+; CHECK-LABEL: sub7:
 ; CHECK: subb $2
   %0 = atomicrmw sub i8* %p, i8 2 monotonic
   ret void
@@ -152,7 +152,7 @@ entry:
 
 define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: sub3:
+; CHECK-LABEL: sub3:
 ; CHECK: subb
 	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
   %1 = atomicrmw sub i8* %p, i8 %0 monotonic
@@ -161,7 +161,7 @@ entry:
 
 define void @dec2(i16* nocapture %p) nounwind ssp {
 entry:
-; CHECK: dec2:
+; CHECK-LABEL: dec2:
 ; CHECK: decw
   %0 = atomicrmw sub i16* %p, i16 1 monotonic
   ret void
@@ -169,7 +169,7 @@ entry:
 
 define void @sub6(i16* nocapture %p) nounwind ssp {
 entry:
-; CHECK: sub6:
+; CHECK-LABEL: sub6:
 ; CHECK: subw $2
   %0 = atomicrmw sub i16* %p, i16 2 monotonic
   ret void
@@ -177,7 +177,7 @@ entry:
 
 define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
 entry:
-; CHECK: sub2:
+; CHECK-LABEL: sub2:
 ; CHECK-NOT: negl
 ; CHECK: subw
 	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
@@ -187,7 +187,7 @@ entry:
 
 define void @dec1(i32* nocapture %p) nounwind ssp {
 entry:
-; CHECK: dec1:
+; CHECK-LABEL: dec1:
 ; CHECK: decl
   %0 = atomicrmw sub i32* %p, i32 1 monotonic
   ret void
@@ -195,7 +195,7 @@ entry:
 
 define void @sub5(i32* nocapture %p) nounwind ssp {
 entry:
-; CHECK: sub5:
+; CHECK-LABEL: sub5:
 ; CHECK: subl $2
   %0 = atomicrmw sub i32* %p, i32 2 monotonic
   ret void
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index 4aa337033df6..a9da1ec067ca 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -240,15 +240,15 @@ define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
 ; CHECK-NEXT: vpaddq %xmm
-; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
+; CHECK-NEXT: vpaddq %xmm
+; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
 ; CHECK-NEXT: vpaddq %xmm
-; CHECK-NEXT: vpaddq %xmm
 ; CHECK-NEXT: vpsrlq $32, %xmm
 ; CHECK-NEXT: vpmuludq %xmm
 ; CHECK-NEXT: vpsllq $32, %xmm
@@ -269,4 +269,3 @@ define <4 x float> @int_sqrt_ss() {
  %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
  ret <4 x float> %x2
 }
-
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 64c4627c47c3..1fd9085838df 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -122,10 +122,10 @@ define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
   ret <16 x i16> %res
 }
 
-;;; Check that VMOVPQIto64rr generates the assembly string "vmovd".  Previously
+;;; Check that VMOVPQIto64rr generates the assembly string "vmovq".  Previously
 ;;; an incorrect mnemonic of "movd" was printed for this instruction.
 ; CHECK: VMOVPQIto64rr
-; CHECK: vmovd
+; CHECK: vmovq
 define i64 @VMOVPQIto64rr(<2 x i64> %a) {
 entry:
   %vecext.i = extractelement <2 x i64> %a, i32 0
diff --git a/test/CodeGen/X86/avx-bitcast.ll b/test/CodeGen/X86/avx-bitcast.ll
index ecc71be7c0dc..c9d828c1f6e2 100644
--- a/test/CodeGen/X86/avx-bitcast.ll
+++ b/test/CodeGen/X86/avx-bitcast.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
 ; CHECK: vmovsd (%
-; CHECK-NEXT: vmovd %xmm
+; CHECK-NEXT: vmovq %xmm
 define i64 @bitcasti64tof64() {
   %a = load double* undef
   %b = bitcast double %a to i64
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 188efe26d92a..a98e0761ce31 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -2,7 +2,7 @@
 
 ; AVX128 tests:
 
-;CHECK: vsel_float
+;CHECK-LABEL: vsel_float:
 ;CHECK: vblendvps
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -11,7 +11,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 }
 
 
-;CHECK: vsel_i32
+;CHECK-LABEL: vsel_i32:
 ;CHECK: vblendvps
 ;CHECK: ret
 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
@@ -20,7 +20,7 @@ define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
 }
 
 
-;CHECK: vsel_double
+;CHECK-LABEL: vsel_double:
 ;CHECK: vblendvpd
 ;CHECK: ret
 define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
@@ -29,7 +29,7 @@ define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
 }
 
 
-;CHECK: vsel_i64
+;CHECK-LABEL: vsel_i64:
 ;CHECK: vblendvpd
 ;CHECK: ret
 define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
@@ -38,7 +38,7 @@ define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
 }
 
 
-;CHECK: vsel_i8
+;CHECK-LABEL: vsel_i8:
 ;CHECK: vpblendvb
 ;CHECK: ret
 define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
@@ -50,7 +50,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
 ; AVX256 tests:
 
 
-;CHECK: vsel_float
+;CHECK-LABEL: vsel_float8:
 ;CHECK: vblendvps
 ;CHECK: ret
 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
@@ -58,7 +58,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
   ret <8 x float> %vsel
 }
 
-;CHECK: vsel_i32
+;CHECK-LABEL: vsel_i328:
 ;CHECK: vblendvps
 ;CHECK: ret
 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
@@ -66,7 +66,7 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
   ret <8 x i32> %vsel
 }
 
-;CHECK: vsel_double
+;CHECK-LABEL: vsel_double8:
 ;CHECK: vblendvpd
 ;CHECK: ret
 define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
@@ -74,7 +74,7 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
   ret <8 x double> %vsel
 }
 
-;CHECK: vsel_i64
+;CHECK-LABEL: vsel_i648:
 ;CHECK: vblendvpd
 ;CHECK: ret
 define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
@@ -83,8 +83,8 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
 }
 
 ;; TEST blend + compares
-; CHECK: A
-define <2 x double> @A(<2 x double> %x, <2 x double> %y) {
+; CHECK: testa
+define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
   ; CHECK: vcmplepd
   ; CHECK: vblendvpd
   %max_is_x = fcmp oge <2 x double> %x, %y
@@ -92,8 +92,8 @@ define <2 x double> @A(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %max
 }
 
-; CHECK: B
-define <2 x double> @B(<2 x double> %x, <2 x double> %y) {
+; CHECK: testb
+define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
   ; CHECK: vcmpnlepd
   ; CHECK: vblendvpd
   %min_is_x = fcmp ult <2 x double> %x, %y
diff --git a/test/CodeGen/X86/avx-brcond.ll b/test/CodeGen/X86/avx-brcond.ll
index d52ae52e0b98..4313a1594fb2 100644
--- a/test/CodeGen/X86/avx-brcond.ll
+++ b/test/CodeGen/X86/avx-brcond.ll
@@ -5,7 +5,7 @@ declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
 
 define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: vptest
 ; CHECK-NEXT:	jne
 ; CHECK: ret
@@ -29,7 +29,7 @@ return:
 
 define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: vptest
 ; CHECK-NEXT:	jne
 ; CHECK: ret
@@ -53,7 +53,7 @@ return:
 
 define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: vptest
 ; CHECK-NEXT:	jae
 ; CHECK: ret
@@ -77,7 +77,7 @@ return:
 
 define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: vptest
 ; CHECK-NEXT:	jae
 ; CHECK: ret
@@ -101,7 +101,7 @@ return:
 
 define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: vptest
 ; CHECK-NEXT:	jne
 ; CHECK: ret
@@ -125,7 +125,7 @@ return:
 
 define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: vptest
 ; CHECK-NEXT:	je
 ; CHECK: ret
diff --git a/test/CodeGen/X86/avx-fp2int.ll b/test/CodeGen/X86/avx-fp2int.ll
index a3aadde2bdd1..8beaac6a780b 100755
--- a/test/CodeGen/X86/avx-fp2int.ll
+++ b/test/CodeGen/X86/avx-fp2int.ll
@@ -2,10 +2,10 @@
 
 ;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: vcvttpd2dqy
 ; CHECK: ret
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: vcvttpd2dqy
 ; CHECK: ret
 
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 055072098a25..7337815a39ac 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -32,7 +32,7 @@ declare i32 @func_int(i32, i32)
 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
-  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
   %2 = load <16 x float>* %y, align 16
   %3 = fadd <16 x float> %2, %1
   ret <16 x float> %3
@@ -43,21 +43,21 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; preserved ymm6-ymm15
 ; WIN64: testf16_regs
 ; WIN64: call
-; WIN64: vaddps  {{%ymm[6-7]}}, %ymm0, %ymm0
-; WIN64: vaddps  {{%ymm[6-7]}}, %ymm1, %ymm1
+; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
+; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; WIN64: ret
 
 ; preserved ymm8-ymm15
 ; X64: testf16_regs
 ; X64: call
-; X64: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
-; X64: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
+; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
+; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; X64: ret
 
 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
-  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
+  %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
   %2 = load <16 x float>* %y, align 16
   %3 = fadd <16 x float> %1, %b
   %4 = fadd <16 x float> %2, %3
@@ -166,4 +166,3 @@ entry:
   %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   ret <8 x float> %8
 }
-
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index eff92510348a..c94962b74ed1 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+avx -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
 
-; UNSAFE: maxpd:
+; UNSAFE-LABEL: maxpd:
 ; UNSAFE: vmaxpd {{.+}}, %xmm
 define <2 x double> @maxpd(<2 x double> %x, <2 x double> %y) {
   %max_is_x = fcmp oge <2 x double> %x, %y
@@ -8,7 +8,7 @@ define <2 x double> @maxpd(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %max
 }
 
-; UNSAFE: minpd:
+; UNSAFE-LABEL: minpd:
 ; UNSAFE: vminpd {{.+}}, %xmm
 define <2 x double> @minpd(<2 x double> %x, <2 x double> %y) {
   %min_is_x = fcmp ole <2 x double> %x, %y
@@ -16,7 +16,7 @@ define <2 x double> @minpd(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %min
 }
 
-; UNSAFE: maxps:
+; UNSAFE-LABEL: maxps:
 ; UNSAFE: vmaxps {{.+}}, %xmm
 define <4 x float> @maxps(<4 x float> %x, <4 x float> %y) {
   %max_is_x = fcmp oge <4 x float> %x, %y
@@ -24,7 +24,7 @@ define <4 x float> @maxps(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %max
 }
 
-; UNSAFE: minps:
+; UNSAFE-LABEL: minps:
 ; UNSAFE: vminps {{.+}}, %xmm
 define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
   %min_is_x = fcmp ole <4 x float> %x, %y
@@ -32,7 +32,7 @@ define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %min
 }
 
-; UNSAFE: vmaxpd:
+; UNSAFE-LABEL: vmaxpd:
 ; UNSAFE: vmaxpd {{.+}}, %ymm
 define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
   %max_is_x = fcmp oge <4 x double> %x, %y
@@ -40,7 +40,7 @@ define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
   ret <4 x double> %max
 }
 
-; UNSAFE: vminpd:
+; UNSAFE-LABEL: vminpd:
 ; UNSAFE: vminpd {{.+}}, %ymm
 define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
   %min_is_x = fcmp ole <4 x double> %x, %y
@@ -48,7 +48,7 @@ define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
   ret <4 x double> %min
 }
 
-; UNSAFE: vmaxps:
+; UNSAFE-LABEL: vmaxps:
 ; UNSAFE: vmaxps {{.+}}, %ymm
 define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
   %max_is_x = fcmp oge <8 x float> %x, %y
@@ -56,7 +56,7 @@ define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %max
 }
 
-; UNSAFE: vminps:
+; UNSAFE-LABEL: vminps:
 ; UNSAFE: vminps {{.+}}, %ymm
 define <8 x float> @vminps(<8 x float> %x, <8 x float> %y) {
   %min_is_x = fcmp ole <8 x float> %x, %y
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
index b9c700051005..fb2287f52892 100755
--- a/test/CodeGen/X86/avx-sext.ll
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -154,6 +154,17 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
   ret <4 x i64> %extmask
 }
 
+; AVX-LABEL: sext_16i8_to_16i16
+; AVX: vpmovsxbw
+; AVX: vmovhlps
+; AVX: vpmovsxbw
+; AVX: ret
+define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
+ %X = load <16 x i8>* %ptr
+ %Y = sext <16 x i8> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
 ; AVX: sext_4i8_to_4i64
 ; AVX: vpslld  $24
 ; AVX: vpsrad  $24
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 01eb7361e293..d79dfcc076b0 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -103,9 +103,10 @@ define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
 
 ;;; Support variable shifts
 ; CHECK: _vshift08
-; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
+; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
+; CHECK: ret
 define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
   ret <8 x i32> %bitop
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
index e203c4ed0298..78b4888cfa16 100755
--- a/test/CodeGen/X86/avx-shuffle-x86_32.ll
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -3,6 +3,6 @@
 define <4 x i64> @test1(<4 x i64> %a) nounwind {
  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  ret <4 x i64>%b
- ; CHECK: test1:
+ ; CHECK-LABEL: test1:
  ; CHECK-NOT: vinsertf128
  }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 73faa1fe0d40..0956361c7e30 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -4,14 +4,14 @@
 define <4 x float> @test1(<4 x float> %a) nounwind {
   %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef>
   ret <4 x float> %b
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: vshufps
 ; CHECK: vpshufd
 }
 
 ; rdar://10538417
 define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: vinsertf128
   %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
   %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
@@ -22,7 +22,7 @@ define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
 define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
   %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
   ret <4 x i64> %c
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: vperm2f128
 ; CHECK: ret
 }
@@ -30,7 +30,7 @@ define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
 define <8 x float> @test4(float %a) nounwind {
   %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
   ret <8 x float> %b
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: vinsertf128
 }
 
@@ -81,7 +81,7 @@ entry:
 define i32 @test9(<4 x i32> %a) nounwind {
 ; CHECK: test9
 ; CHECK: vpextrd
-  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4>
   %r = extractelement <8 x i32> %b, i32 2
 ; CHECK: ret
   ret i32 %r
@@ -220,7 +220,7 @@ define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline {
   ret <16 x i16> %t
 }
 
-;CHECK: test17
+;CHECK-LABEL: test17:
 ;CHECK-NOT: vinsertf128
 ;CHECK: ret
 define   <8 x float> @test17(<4 x float> %y) {
@@ -251,8 +251,8 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
 ; CHECK: swap8doubles
 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
 ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
-; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
-; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
+; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
 ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 5c01c2cc5b50..5d0781531f4d 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -20,7 +20,7 @@ entry:
   ret <16 x i16> %shuffle
 }
 
-; CHECK: vmovd
+; CHECK: vmovq
 ; CHECK-NEXT: vmovlhps %xmm
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
index d0077366444d..58d0a356909b 100755
--- a/test/CodeGen/X86/avx-trunc.ll
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -12,4 +12,9 @@ define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
   %B = trunc <8 x i32> %A to <8 x i16>
   ret <8 x i16>%B
 }
-
+define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{
+; CHECK-LABEL: trunc_16_8
+; CHECK: pshufb
+  %B = trunc <16 x i16> %A to <16 x i8>
+  ret <16 x i8> %B
+}
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
index b0932bdfced9..f73174dd2bc6 100644
--- a/test/CodeGen/X86/avx-varargs-x86_64.ll
+++ b/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -5,7 +5,7 @@
 @x = common global <8 x float> zeroinitializer, align 32
 declare i32 @f(i32, ...)
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: vmovaps	%ymm0, (%rsp)
 define void @test1() nounwind uwtable ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index ad8365bb59c0..fa49f949b689 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -114,7 +114,7 @@ define void @t9(i64* %p) {
  store i64 0, i64* %s
  ret void
 
-; CHECK: t9:
+; CHECK-LABEL: t9:
 ; CHECK: vxorps	%xmm
 ; CHECK-NOT: vextractf
 ; CHECK: vmovups
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 7f2f9d821dd5..b7f8d72e58c9 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -46,7 +46,7 @@ entry:
 }
 
 ; CHECK: palignr $8
-; CHECK: psrldq $8
+; CHECK: palignr $8
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
index 582537ea906f..75117463bc39 100755
--- a/test/CodeGen/X86/avx-zext.ll
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
-;CHECK: zext_8i16_to_8i32
+;CHECK-LABEL: zext_8i16_to_8i32:
 ;CHECK: vpunpckhwd
 ;CHECK: ret
 
@@ -10,7 +10,7 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
 }
 
 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
-;CHECK: zext_4i32_to_4i64
+;CHECK-LABEL: zext_4i32_to_4i64:
 ;CHECK: vpunpckhdq
 ;CHECK: ret
 
@@ -19,7 +19,7 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
 }
 
 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
-;CHECK: zext_8i8_to_8i32
+;CHECK-LABEL: zext_8i8_to_8i32:
 ;CHECK: vpunpckhwd
 ;CHECK: vpmovzxwd
 ;CHECK: vinsertf128
@@ -27,3 +27,15 @@ define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
   %t = zext <8 x i8> %z to <8 x i32>
   ret <8 x i32> %t
 }
+
+; PR17654
+define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
+; CHECK-LABEL: zext_16i8_to_16i16:
+; CHECK: vpxor
+; CHECK: vpunpckhbw
+; CHECK: vpunpcklbw
+; CHECK: vinsertf128
+; CHECK: ret
+  %t = zext <16 x i8> %z to <16 x i16>
+  ret <16 x i16> %t
+}
diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll
index 09f953835820..72bdd9d04729 100644
--- a/test/CodeGen/X86/avx2-arith.ll
+++ b/test/CodeGen/X86/avx2-arith.ll
@@ -1,65 +1,66 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
 
 ; CHECK: vpaddq %ymm
-define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
   %x = add <4 x i64> %i, %j
   ret <4 x i64> %x
 }
 
 ; CHECK: vpaddd %ymm
-define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
   %x = add <8 x i32> %i, %j
   ret <8 x i32> %x
 }
 
 ; CHECK: vpaddw %ymm
-define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
   %x = add <16 x i16> %i, %j
   ret <16 x i16> %x
 }
 
 ; CHECK: vpaddb %ymm
-define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
   %x = add <32 x i8> %i, %j
   ret <32 x i8> %x
 }
 
 ; CHECK: vpsubq %ymm
-define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
   %x = sub <4 x i64> %i, %j
   ret <4 x i64> %x
 }
 
 ; CHECK: vpsubd %ymm
-define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
   %x = sub <8 x i32> %i, %j
   ret <8 x i32> %x
 }
 
 ; CHECK: vpsubw %ymm
-define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
   %x = sub <16 x i16> %i, %j
   ret <16 x i16> %x
 }
 
 ; CHECK: vpsubb %ymm
-define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
   %x = sub <32 x i8> %i, %j
   ret <32 x i8> %x
 }
 
 ; CHECK: vpmulld %ymm
-define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
   %x = mul <8 x i32> %i, %j
   ret <8 x i32> %x
 }
 
 ; CHECK: vpmullw %ymm
-define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
   %x = mul <16 x i16> %i, %j
   ret <16 x i16> %x
 }
 
+; CHECK: mul-v4i64
 ; CHECK: vpmuludq %ymm
 ; CHECK-NEXT: vpsrlq $32, %ymm
 ; CHECK-NEXT: vpmuludq %ymm
@@ -74,3 +75,94 @@ define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
   ret <4 x i64> %x
 }
 
+; CHECK: mul_const1
+; CHECK: vpaddd
+; CHECK: ret
+define <8 x i32> @mul_const1(<8 x i32> %x) {
+  %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i32> %y
+}
+
+; CHECK: mul_const2
+; CHECK: vpsllq  $2
+; CHECK: ret
+define <4 x i64> @mul_const2(<4 x i64> %x) {
+  %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
+  ret <4 x i64> %y
+}
+
+; CHECK: mul_const3
+; CHECK: vpsllw  $3
+; CHECK: ret
+define <16 x i16> @mul_const3(<16 x i16> %x) {
+  %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <16 x i16> %y
+}
+
+; CHECK: mul_const4
+; CHECK: vpxor
+; CHECK: vpsubq
+; CHECK: ret
+define <4 x i64> @mul_const4(<4 x i64> %x) {
+  %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
+  ret <4 x i64> %y
+}
+
+; CHECK: mul_const5
+; CHECK: vxorps
+; CHECK-NEXT: ret
+define <8 x i32> @mul_const5(<8 x i32> %x) {
+  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %y
+}
+
+; CHECK: mul_const6
+; CHECK: vpmulld
+; CHECK: ret
+define <8 x i32> @mul_const6(<8 x i32> %x) {
+  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
+  ret <8 x i32> %y
+}
+
+; CHECK: mul_const7
+; CHECK: vpaddq
+; CHECK: vpaddq
+; CHECK: ret
+define <8 x i64> @mul_const7(<8 x i64> %x) {
+  %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+  ret <8 x i64> %y
+}
+
+; CHECK: mul_const8
+; CHECK: vpsllw  $3
+; CHECK: ret
+define <8 x i16> @mul_const8(<8 x i16> %x) {
+  %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %y
+}
+
+; CHECK: mul_const9
+; CHECK: vpmulld
+; CHECK: ret
+define <8 x i32> @mul_const9(<8 x i32> %x) {
+  %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %y
+}
+
+; CHECK: mul_const10
+; CHECK: vpmulld
+; CHECK: ret
+define <4 x i32> @mul_const10(<4 x i32> %x) {
+  ; %x * 0x01010101
+  %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
+  ret <4 x i32> %m
+}
+
+; CHECK: mul_const11
+; CHECK: vpmulld
+; CHECK: ret
+define <4 x i32> @mul_const11(<4 x i32> %x) {
+  ; %x * 0x80808080
+  %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>
+  ret <4 x i32> %m
+}
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index 3ce08dcc7370..f49718e4c8b6 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -63,6 +63,34 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
   ret <8 x i32>%B
 }
 
+; CHECK-LABEL: zext_16i8_16i16:
+; CHECK: vpmovzxbw
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
+  %t = zext <16 x i8> %z to <16 x i16>
+  ret <16 x i16> %t
+}
+
+; CHECK-LABEL: sext_16i8_16i16:
+; CHECK: vpmovsxbw
+; CHECK-NOT: vinsert
+; CHECK: ret
+define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
+  %t = sext <16 x i8> %z to <16 x i16>
+  ret <16 x i16> %t
+}
+
+; CHECK-LABEL: trunc_16i16_16i8:
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpor
+; CHECK: ret
+define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
+  %t = trunc <16 x i16> %z to <16 x i8>
+  ret <16 x i8> %t
+}
+
 ; CHECK: load_sext_test1
 ; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
 ; CHECK: ret 
diff --git a/test/CodeGen/X86/avx2-gather.ll b/test/CodeGen/X86/avx2-gather.ll
new file mode 100644
index 000000000000..ee50c457fe8c
--- /dev/null
+++ b/test/CodeGen/X86/avx2-gather.ll
@@ -0,0 +1,18 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
+
+declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
+                      <4 x i32>, <4 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1,
+                     <4 x i32> %idx, <4 x float> %mask) {
+  %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef,
+                            i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_avx2_gather_d_ps
+; CHECK: vgatherdps
+; CHECK-NOT: [[DST]]
+; CHECK: [[DST:%xmm[0-9]+]]{{$}}
+; CHECK: ret
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
index a5bb1a8f8e44..3d4fcec6078e 100644
--- a/test/CodeGen/X86/avx2-logic.ll
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -55,7 +55,7 @@ define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
 
 define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
 entry:
-; CHECK: signd:
+; CHECK-LABEL: signd:
 ; CHECK: psignd
 ; CHECK-NOT: sub
 ; CHECK: ret
@@ -70,7 +70,7 @@ entry:
 
 define <8 x i32> @blendvb(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) nounwind {
 entry:
-; CHECK: blendvb:
+; CHECK-LABEL: blendvb:
 ; CHECK: pblendvb
 ; CHECK: ret
   %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
index 53b9da32ae8e..83573dc7b260 100644
--- a/test/CodeGen/X86/avx2-palignr.ll
+++ b/test/CodeGen/X86/avx2-palignr.ll
@@ -1,57 +1,57 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
 
 define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: vpalignr $4
   %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
   ret <8 x i32> %C
 }
 
 define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: vpalignr $4
   %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 undef, i32 12>
   ret <8 x i32> %C
 }
 
 define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: vpalignr $4
   %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
   ret <8 x i32> %C
 }
 ;
 define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: vpalignr $8
   %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 10, i32 11, i32 undef, i32 1, i32 14, i32 15, i32 4, i32 5>
   ret <8 x i32> %C
 }
 
 define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: vpalignr $6
   %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 undef, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
   ret <16 x i16> %C
 }
 
 define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: vpalignr $6
   %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
   ret <16 x i16> %C
 }
 
 define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: vpalignr $6
   %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <16 x i16> %C
 }
 
 define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
-; CHECK: test8:
-; CHECK: palignr $5
+; CHECK-LABEL: test8:
+; CHECK: vpalignr $5
   %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
   ret <32 x i8> %C
 }
diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll
index 4eac71d08b4f..3f9c95cfd070 100644
--- a/test/CodeGen/X86/avx2-phaddsub.ll
+++ b/test/CodeGen/X86/avx2-phaddsub.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+avx2 | FileCheck %s
 
-; CHECK: phaddw1:
+; CHECK-LABEL: phaddw1:
 ; CHECK: vphaddw
 define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
   %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
@@ -9,7 +9,7 @@ define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
   ret <16 x i16> %r
 }
 
-; CHECK: phaddw2:
+; CHECK-LABEL: phaddw2:
 ; CHECK: vphaddw
 define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
   %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
@@ -18,7 +18,7 @@ define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
   ret <16 x i16> %r
 }
 
-; CHECK: phaddd1:
+; CHECK-LABEL: phaddd1:
 ; CHECK: vphaddd
 define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
   %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -27,7 +27,7 @@ define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
   ret <8 x i32> %r
 }
 
-; CHECK: phaddd2:
+; CHECK-LABEL: phaddd2:
 ; CHECK: vphaddd
 define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
   %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
@@ -36,7 +36,7 @@ define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
   ret <8 x i32> %r
 }
 
-; CHECK: phaddd3:
+; CHECK-LABEL: phaddd3:
 ; CHECK: vphaddd
 define <8 x i32> @phaddd3(<8 x i32> %x) {
   %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
@@ -45,7 +45,7 @@ define <8 x i32> @phaddd3(<8 x i32> %x) {
   ret <8 x i32> %r
 }
 
-; CHECK: phsubw1:
+; CHECK-LABEL: phsubw1:
 ; CHECK: vphsubw
 define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
   %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
@@ -54,7 +54,7 @@ define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
   ret <16 x i16> %r
 }
 
-; CHECK: phsubd1:
+; CHECK-LABEL: phsubd1:
 ; CHECK: vphsubd
 define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
   %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -63,7 +63,7 @@ define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
   ret <8 x i32> %r
 }
 
-; CHECK: phsubd2:
+; CHECK-LABEL: phsubd2:
 ; CHECK: vphsubd
 define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
   %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 undef, i32 8, i32 undef, i32 4, i32 6, i32 12, i32 14>
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
index 1f192a0d323c..7fdbaaa39cbe 100644
--- a/test/CodeGen/X86/avx2-shift.ll
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -212,7 +212,7 @@ define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
 define <32 x i8> @shl9(<32 x i8> %A) nounwind {
   %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %B
-; CHECK: shl9:
+; CHECK-LABEL: shl9:
 ; CHECK: vpsllw $3
 ; CHECK: vpand
 ; CHECK: ret
@@ -221,7 +221,7 @@ define <32 x i8> @shl9(<32 x i8> %A) nounwind {
 define <32 x i8> @shr9(<32 x i8> %A) nounwind {
   %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %B
-; CHECK: shr9:
+; CHECK-LABEL: shr9:
 ; CHECK: vpsrlw $3
 ; CHECK: vpand
 ; CHECK: ret
@@ -230,7 +230,7 @@ define <32 x i8> @shr9(<32 x i8> %A) nounwind {
 define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
   %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %B
-; CHECK: sra_v32i8_7:
+; CHECK-LABEL: sra_v32i8_7:
 ; CHECK: vpxor
 ; CHECK: vpcmpgtb
 ; CHECK: ret
@@ -239,7 +239,7 @@ define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
 define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
   %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %B
-; CHECK: sra_v32i8:
+; CHECK-LABEL: sra_v32i8:
 ; CHECK: vpsrlw $3
 ; CHECK: vpand
 ; CHECK: vpxor
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
index cf319cb7fe1d..0e6dd297f8df 100644
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -54,10 +54,10 @@ define <8 x float> @blend_test3(<8 x float> %a, <8 x float> %b) nounwind alwaysi
 
 ; CHECK: blend_test4
 ; CHECK: vblendpd
-; CHECK: ret
-define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
-  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-  ret <4 x i64> %t
+; CHECK: ret
+define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  ret <4 x i64> %t
 }
 
 ; CHECK: vpshufhw $27, %ymm
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index b804233663d4..5610416d39a3 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -259,7 +259,7 @@ define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
   ret <4 x double> %wide
 }
 
-;CHECK: _inreg8xfloat
+;CHECK-LABEL: _inreg8xfloat:
 ;CHECK: vbroadcastss
 ;CHECK: ret
 define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
@@ -267,7 +267,7 @@ define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
   ret <8 x float> %b
 }
 
-;CHECK: _inreg4xfloat
+;CHECK-LABEL: _inreg4xfloat:
 ;CHECK: vbroadcastss
 ;CHECK: ret
 define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
@@ -275,7 +275,7 @@ define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
   ret <4 x float> %b
 }
 
-;CHECK: _inreg16xi16
+;CHECK-LABEL: _inreg16xi16:
 ;CHECK: vpbroadcastw
 ;CHECK: ret
 define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
@@ -283,7 +283,7 @@ define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
   ret <16 x i16> %b
 }
 
-;CHECK: _inreg8xi16
+;CHECK-LABEL: _inreg8xi16:
 ;CHECK: vpbroadcastw
 ;CHECK: ret
 define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
@@ -292,7 +292,7 @@ define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
 }
 
 
-;CHECK: _inreg4xi64
+;CHECK-LABEL: _inreg4xi64:
 ;CHECK: vpbroadcastq
 ;CHECK: ret
 define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
@@ -300,7 +300,7 @@ define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
   ret <4 x i64> %b
 }
 
-;CHECK: _inreg2xi64
+;CHECK-LABEL: _inreg2xi64:
 ;CHECK: vpbroadcastq
 ;CHECK: ret
 define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
@@ -308,7 +308,7 @@ define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
   ret <2 x i64> %b
 }
 
-;CHECK: _inreg4xdouble
+;CHECK-LABEL: _inreg4xdouble:
 ;CHECK: vbroadcastsd
 ;CHECK: ret
 define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
@@ -316,7 +316,7 @@ define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
   ret <4 x double> %b
 }
 
-;CHECK: _inreg2xdouble
+;CHECK-LABEL: _inreg2xdouble:
 ;CHECK: vpbroadcastq
 ;CHECK: ret
 define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
@@ -324,7 +324,7 @@ define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
   ret <2 x double> %b
 }
 
-;CHECK: _inreg8xi32
+;CHECK-LABEL: _inreg8xi32:
 ;CHECK: vpbroadcastd
 ;CHECK: ret
 define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
@@ -332,7 +332,7 @@ define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
   ret <8 x i32> %b
 }
 
-;CHECK: _inreg4xi32
+;CHECK-LABEL: _inreg4xi32:
 ;CHECK: vpbroadcastd
 ;CHECK: ret
 define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
@@ -340,7 +340,7 @@ define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
   ret <4 x i32> %b
 }
 
-;CHECK: _inreg32xi8
+;CHECK-LABEL: _inreg32xi8:
 ;CHECK: vpbroadcastb
 ;CHECK: ret
 define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
@@ -348,7 +348,7 @@ define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
   ret <32 x i8> %b
 }
 
-;CHECK: _inreg16xi8
+;CHECK-LABEL: _inreg16xi8:
 ;CHECK: vpbroadcastb
 ;CHECK: ret
 define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll
new file mode 100644
index 000000000000..5592e6c8a5f7
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -0,0 +1,247 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; AVX2 Logical Shift Left
+
+define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
+entry:
+  %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_sllw_1:
+; CHECK: vpsllw  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
+entry:
+  %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_sllw_2:
+; CHECK: vpaddw  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
+entry:
+  %shl = shl <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_sllw_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
+entry:
+  %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_slld_1:
+; CHECK: vpslld  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
+entry:
+  %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_slld_2:
+; CHECK: vpaddd  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
+entry:
+  %shl = shl <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_slld_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
+  ret <4 x i64> %shl
+}
+
+; CHECK-LABEL: test_sllq_1:
+; CHECK: vpsllq  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %shl
+}
+
+; CHECK-LABEL: test_sllq_2:
+; CHECK: vpaddq  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 64, i64 64, i64 64, i64 64>
+  ret <4 x i64> %shl
+}
+
+; CHECK-LABEL: test_sllq_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+; AVX2 Arithmetic Shift
+
+define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
+entry:
+  %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_sraw_1:
+; CHECK: vpsraw  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
+entry:
+  %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_sraw_2:
+; CHECK: vpsraw  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
+entry:
+  %shl = ashr <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_sraw_3:
+; CHECK: vpsraw  $15, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
+entry:
+  %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_srad_1:
+; CHECK: vpsrad  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
+entry:
+  %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_srad_2:
+; CHECK: vpsrad  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
+entry:
+  %shl = ashr <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_srad_3:
+; CHECK: vpsrad  $31, %ymm0, %ymm0
+; CHECK: ret
+
+; SSE Logical Shift Right
+
+define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
+entry:
+  %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_srlw_1:
+; CHECK: vpsrlw  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
+entry:
+  %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_srlw_2:
+; CHECK: vpsrlw  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
+entry:
+  %shl = lshr <16 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <16 x i16> %shl
+}
+
+; CHECK-LABEL: test_srlw_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
+entry:
+  %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_srld_1:
+; CHECK: vpsrld  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
+entry:
+  %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_srld_2:
+; CHECK: vpsrld  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
+entry:
+  %shl = lshr <8 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+  ret <8 x i32> %shl
+}
+
+; CHECK-LABEL: test_srld_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
+entry:
+  %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
+  ret <4 x i64> %shl
+}
+
+; CHECK-LABEL: test_srlq_1:
+; CHECK: vpsrlq  $0, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
+entry:
+  %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %shl
+}
+
+; CHECK-LABEL: test_srlq_2:
+; CHECK: vpsrlq  $1, %ymm0, %ymm0
+; CHECK: ret
+
+define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
+entry:
+  %shl = lshr <4 x i64> %InVec, <i64 64, i64 64, i64 64, i64 64>
+  ret <4 x i64> %shl
+}
+
+; CHECK-LABEL: test_srlq_3:
+; CHECK: vxorps  %ymm0, %ymm0, %ymm0
+; CHECK: ret
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll
new file mode 100644
index 000000000000..e27600ecd734
--- /dev/null
+++ b/test/CodeGen/X86/avx512-arith.ll
@@ -0,0 +1,271 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: addpd512
+; CHECK: vaddpd
+; CHECK: ret
+define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %add.i = fadd <8 x double> %x, %y
+  ret <8 x double> %add.i
+}
+
+; CHECK-LABEL: addpd512fold
+; CHECK: vaddpd LCP{{.*}}(%rip)
+; CHECK: ret
+define <8 x double> @addpd512fold(<8 x double> %y) {
+entry:
+  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <8 x double> %add.i
+}
+
+; CHECK-LABEL: addps512
+; CHECK: vaddps
+; CHECK: ret
+define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %add.i = fadd <16 x float> %x, %y
+  ret <16 x float> %add.i
+}
+
+; CHECK-LABEL: addps512fold
+; CHECK: vaddps LCP{{.*}}(%rip)
+; CHECK: ret
+define <16 x float> @addps512fold(<16 x float> %y) {
+entry:
+  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <16 x float> %add.i
+}
+
+; CHECK-LABEL: subpd512
+; CHECK: vsubpd
+; CHECK: ret
+define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %sub.i = fsub <8 x double> %x, %y
+  ret <8 x double> %sub.i
+}
+
+; CHECK-LABEL: @subpd512fold
+; CHECK: vsubpd (%
+; CHECK: ret
+define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
+entry:
+  %tmp2 = load <8 x double>* %x, align 8
+  %sub.i = fsub <8 x double> %y, %tmp2
+  ret <8 x double> %sub.i
+}
+
+; CHECK-LABEL: @subps512
+; CHECK: vsubps
+; CHECK: ret
+define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %sub.i = fsub <16 x float> %x, %y
+  ret <16 x float> %sub.i
+}
+
+; CHECK-LABEL: subps512fold
+; CHECK: vsubps (%
+; CHECK: ret
+define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
+entry:
+  %tmp2 = load <16 x float>* %x, align 4
+  %sub.i = fsub <16 x float> %y, %tmp2
+  ret <16 x float> %sub.i
+}
+
+; CHECK-LABEL: imulq512
+; CHECK: vpmuludq
+; CHECK: vpmuludq
+; CHECK: ret
+define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
+  %z = mul <8 x i64>%x, %y
+  ret <8 x i64>%z
+}
+
+; CHECK-LABEL: mulpd512
+; CHECK: vmulpd
+; CHECK: ret
+define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %mul.i = fmul <8 x double> %x, %y
+  ret <8 x double> %mul.i
+}
+
+; CHECK-LABEL: mulpd512fold
+; CHECK: vmulpd LCP{{.*}}(%rip)
+; CHECK: ret
+define <8 x double> @mulpd512fold(<8 x double> %y) {
+entry:
+  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <8 x double> %mul.i
+}
+
+; CHECK-LABEL: mulps512
+; CHECK: vmulps
+; CHECK: ret
+define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %mul.i = fmul <16 x float> %x, %y
+  ret <16 x float> %mul.i
+}
+
+; CHECK-LABEL: mulps512fold
+; CHECK: vmulps LCP{{.*}}(%rip)
+; CHECK: ret
+define <16 x float> @mulps512fold(<16 x float> %y) {
+entry:
+  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <16 x float> %mul.i
+}
+
+; CHECK-LABEL: divpd512
+; CHECK: vdivpd
+; CHECK: ret
+define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
+entry:
+  %div.i = fdiv <8 x double> %x, %y
+  ret <8 x double> %div.i
+}
+
+; CHECK-LABEL: divpd512fold
+; CHECK: vdivpd LCP{{.*}}(%rip)
+; CHECK: ret
+define <8 x double> @divpd512fold(<8 x double> %y) {
+entry:
+  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <8 x double> %div.i
+}
+
+; CHECK-LABEL: divps512
+; CHECK: vdivps
+; CHECK: ret
+define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
+entry:
+  %div.i = fdiv <16 x float> %x, %y
+  ret <16 x float> %div.i
+}
+
+; CHECK-LABEL: divps512fold
+; CHECK: vdivps LCP{{.*}}(%rip)
+; CHECK: ret
+define <16 x float> @divps512fold(<16 x float> %y) {
+entry:
+  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <16 x float> %div.i
+}
+
+; CHECK-LABEL: vpaddq_test
+; CHECK: vpaddq %zmm
+; CHECK: ret
+define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+  %x = add <8 x i64> %i, %j
+  ret <8 x i64> %x
+}
+
+; CHECK-LABEL: vpaddd_test
+; CHECK: vpaddd %zmm
+; CHECK: ret
+define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+  %x = add <16 x i32> %i, %j
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: vpsubq_test
+; CHECK: vpsubq %zmm
+; CHECK: ret
+define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+  %x = sub <8 x i64> %i, %j
+  ret <8 x i64> %x
+}
+
+; CHECK-LABEL: vpsubd_test
+; CHECK: vpsubd
+; CHECK: ret
+define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+  %x = sub <16 x i32> %i, %j
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: vpmulld_test
+; CHECK: vpmulld %zmm
+; CHECK: ret
+define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
+  %x = mul <16 x i32> %i, %j
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: sqrtA
+; CHECK: vsqrtssz
+; CHECK: ret
+declare float @sqrtf(float) readnone
+define float @sqrtA(float %a) nounwind uwtable readnone ssp {
+entry:
+  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
+  ret float %conv1
+}
+
+; CHECK-LABEL: sqrtB
+; CHECK: vsqrtsdz
+; CHECK: ret
+declare double @sqrt(double) readnone
+define double @sqrtB(double %a) nounwind uwtable readnone ssp {
+entry:
+  %call = tail call double @sqrt(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK-LABEL: sqrtC
+; CHECK: vsqrtssz
+; CHECK: ret
+declare float @llvm.sqrt.f32(float)
+define float @sqrtC(float %a) nounwind {
+  %b = call float @llvm.sqrt.f32(float %a)
+  ret float %b
+}
+
+; CHECK-LABEL: fadd_broadcast
+; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK: ret
+define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
+  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  ret <16 x float> %b
+}
+
+; CHECK-LABEL: addq_broadcast
+; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK: ret
+define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
+  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+  ret <8 x i64> %b
+}
+
+; CHECK-LABEL: orq_broadcast
+; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK: ret
+define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
+  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+  ret <8 x i64> %b
+}
+
+; CHECK-LABEL: andd512fold
+; CHECK: vpandd (%
+; CHECK: ret
+define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
+entry:
+  %a = load <16 x i32>* %x, align 4
+  %b = and <16 x i32> %y, %a
+  ret <16 x i32> %b
+}
+
+; CHECK-LABEL: andqbrst
+; CHECK: vpandq  (%rdi){1to8}, %zmm
+; CHECK: ret
+define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
+entry:
+  %a = load i64* %ap, align 8
+  %b = insertelement <8 x i64> undef, i64 %a, i32 0
+  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+  %d = and <8 x i64> %p1, %c
+  ret <8 x i64>%d
+}
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
new file mode 100644
index 000000000000..bc4560b3f3fc
--- /dev/null
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vpxord
+; CHECK: ret
+define <16 x i32> @test1(i32* %x) {
+   %y = load i32* %x, align 4
+   %res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
+   ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test2
+; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
+; CHECK: ret
+define <16 x i32> @test2(<16 x i32> %x) {
+   %res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
+   ret <16 x i32>%res
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll
new file mode 100644
index 000000000000..ba52745e6c19
--- /dev/null
+++ b/test/CodeGen/X86/avx512-cmp.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK: vucomisdz
+define double @test1(double %a, double %b) nounwind {
+  %tobool = fcmp une double %a, %b
+  br i1 %tobool, label %l1, label %l2
+
+l1:
+  %c = fsub double %a, %b
+  ret double %c
+l2:
+  %c1 = fadd double %a, %b
+  ret double %c1
+}
+
+; CHECK: vucomissz
+define float @test2(float %a, float %b) nounwind {
+  %tobool = fcmp olt float %a, %b
+  br i1 %tobool, label %l1, label %l2
+
+l1:
+  %c = fsub float %a, %b
+  ret float %c
+l2:
+  %c1 = fadd float %a, %b
+  ret float %c1
+}
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
new file mode 100644
index 000000000000..ed68ff7bcbdb
--- /dev/null
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: sitof32
+; CHECK: vcvtdq2ps %zmm
+; CHECK: ret
+define <16 x float> @sitof32(<16 x i32> %a) nounwind {
+  %b = sitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+; CHECK-LABEL: fptosi00
+; CHECK: vcvttps2dq %zmm
+; CHECK: ret
+define <16 x i32> @fptosi00(<16 x float> %a) nounwind {
+  %b = fptosi <16 x float> %a to <16 x i32>
+  ret <16 x i32> %b
+}
+
+; CHECK-LABEL: fptoui00
+; CHECK: vcvttps2udq
+; CHECK: ret
+define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
+  %b = fptoui <16 x float> %a to <16 x i32>
+  ret <16 x i32> %b
+}
+
+; CHECK-LABEL: fptoui01
+; CHECK: vcvttpd2udq
+; CHECK: ret
+define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
+  %b = fptoui <8 x double> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+; CHECK-LABEL: sitof64
+; CHECK: vcvtdq2pd %ymm
+; CHECK: ret
+define <8 x double> @sitof64(<8 x i32> %a) {
+  %b = sitofp <8 x i32> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+; CHECK-LABEL: fptosi01
+; CHECK: vcvttpd2dq %zmm
+; CHECK: ret
+define <8 x i32> @fptosi01(<8 x double> %a) {
+  %b = fptosi <8 x double> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+; CHECK-LABEL: fptrunc00
+; CHECK: vcvtpd2ps %zmm
+; CHECK-NEXT: vcvtpd2ps %zmm
+; CHECK-NEXT: vinsertf64x4    $1
+; CHECK: ret
+define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
+  %a = fptrunc <16 x double> %b to <16 x float>
+  ret <16 x float> %a
+}
+
+; CHECK-LABEL: fpext00
+; CHECK: vcvtps2pd %ymm0, %zmm0
+; CHECK: ret
+define <8 x double> @fpext00(<8 x float> %b) nounwind {
+  %a = fpext <8 x float> %b to <8 x double>
+  ret <8 x double> %a
+}
+
+; CHECK-LABEL: funcA
+; CHECK: vcvtsi2sdqz (%
+; CHECK: ret
+define double @funcA(i64* nocapture %e) {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK-LABEL: funcB
+; CHECK: vcvtsi2sdlz (%
+; CHECK: ret
+define double @funcB(i32* %e) {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK-LABEL: funcC
+; CHECK: vcvtsi2sslz (%
+; CHECK: ret
+define float @funcC(i32* %e) {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK-LABEL: i64tof32
+; CHECK: vcvtsi2ssqz  (%
+; CHECK: ret
+define float @i64tof32(i64* %e) {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK-LABEL: fpext
+; CHECK: vcvtss2sdz
+; CHECK: ret
+define void @fpext() {
+entry:
+  %f = alloca float, align 4
+  %d = alloca double, align 8
+  %tmp = load float* %f, align 4
+  %conv = fpext float %tmp to double
+  store double %conv, double* %d, align 8
+  ret void
+}
+
+; CHECK-LABEL: fpround_scalar
+; CHECK: vmovsdz
+; CHECK: vcvtsd2ssz
+; CHECK: vmovssz
+; CHECK: ret
+define void @fpround_scalar() nounwind uwtable {
+entry:
+  %f = alloca float, align 4
+  %d = alloca double, align 8
+  %tmp = load double* %d, align 8
+  %conv = fptrunc double %tmp to float
+  store float %conv, float* %f, align 4
+  ret void
+}
+
+; CHECK-LABEL: long_to_double
+; CHECK: vmovqz
+; CHECK: ret
+define double @long_to_double(i64 %x) {
+   %res = bitcast i64 %x to double
+   ret double %res
+}
+
+; CHECK-LABEL: double_to_long
+; CHECK: vmovqz
+; CHECK: ret
+define i64 @double_to_long(double %x) {
+   %res = bitcast double %x to i64
+   ret i64 %res
+}
+
+; CHECK-LABEL: int_to_float
+; CHECK: vmovdz
+; CHECK: ret
+define float @int_to_float(i32 %x) {
+   %res = bitcast i32 %x to float
+   ret float %res
+}
+
+; CHECK-LABEL: float_to_int
+; CHECK: vmovdz
+; CHECK: ret
+define i32 @float_to_int(float %x) {
+   %res = bitcast float %x to i32
+   ret i32 %res
+}
+
+; CHECK-LABEL: uitof64
+; CHECK: vcvtudq2pd
+; CHECK: vextracti64x4
+; CHECK: vcvtudq2pd
+; CHECK: ret
+define <16 x double> @uitof64(<16 x i32> %a) nounwind {
+  %b = uitofp <16 x i32> %a to <16 x double>
+  ret <16 x double> %b
+}
+
+; CHECK-LABEL: uitof32
+; CHECK: vcvtudq2ps
+; CHECK: ret
+define <16 x float> @uitof32(<16 x i32> %a) nounwind {
+  %b = uitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+; CHECK-LABEL: @fptosi02
+; CHECK vcvttss2siz
+; CHECK: ret
+define i32 @fptosi02(float %a) nounwind {
+  %b = fptosi float %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: @fptoui02
+; CHECK vcvttss2usiz
+; CHECK: ret
+define i32 @fptoui02(float %a) nounwind {
+  %b = fptoui float %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: @uitofp02
+; CHECK vcvtusi2ss
+; CHECK: ret
+define float @uitofp02(i32 %a) nounwind {
+  %b = uitofp i32 %a to float
+  ret float %b
+}
+
+; CHECK-LABEL: @uitofp03
+; CHECK vcvtusi2sd
+; CHECK: ret
+define double @uitofp03(i32 %a) nounwind {
+  %b = uitofp i32 %a to double
+  ret double %b
+}
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll
new file mode 100644
index 000000000000..ce3d7590f396
--- /dev/null
+++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmadd_ps_z
+  ; CHECK: vfmadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmadd_pd_z
+  ; CHECK: vfmadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubps_z
+  ; CHECK: vfmsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubpd_z
+  ; CHECK: vfmsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmadd_ps_z
+  ; CHECK: vfnmadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmadd_pd_z
+  ; CHECK: vfnmadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmsubps_z
+  ; CHECK: vfnmsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfnmsubpd_z
+  ; CHECK: vfnmsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmaddsubps_z
+  ; CHECK: vfmaddsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmaddsubpd_z
+  ; CHECK: vfmaddsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubaddps_z
+  ; CHECK: vfmsubadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK-LABEL: test_x86_vfmsubaddpd_z
+  ; CHECK: vfmsubadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/avx512-fma.ll b/test/CodeGen/X86/avx512-fma.ll
new file mode 100644
index 000000000000..d6926e2571ab
--- /dev/null
+++ b/test/CodeGen/X86/avx512-fma.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -fp-contract=fast | FileCheck %s
+
+; CHECK-LABEL: test_x86_fmadd_ps_z
+; CHECK: vfmadd213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fadd <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fmsub_ps_z
+; CHECK: vfmsub213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fnmadd_ps_z
+; CHECK: vfnmadd213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %a2, %x
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fnmsub_ps_z
+; CHECK: vfnmsub213ps     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
+                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+						  float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
+						  float -0.000000e+00>, %x
+  %res = fsub <16 x float> %y, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fmadd_pd_z
+; CHECK: vfmadd213pd     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fadd <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+; CHECK-LABEL: test_x86_fmsub_pd_z
+; CHECK: vfmsub213pd     %zmm2, %zmm1, %zmm0
+; CHECK: ret
+define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fsub <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+define double @test_x86_fmsub_sd_z(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+;CHECK-LABEL: test132_br
+;CHECK: vfmadd132ps  LCP{{.*}}(%rip){1to16}
+;CHECK: ret
+define <16 x float> @test132_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  %b2 = fadd <16 x float> %b1, %a2
+  ret <16 x float> %b2
+}
+
+;CHECK-LABEL: test213_br
+;CHECK: vfmadd213ps  LCP{{.*}}(%rip){1to16}
+;CHECK: ret
+define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+  %b1 = fmul <16 x float> %a1, %a2
+  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  ret <16 x float> %b2
+}
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
new file mode 100644
index 000000000000..0321e950ef81
--- /dev/null
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+declare <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float>, i16, <16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dps.mask.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double>, i8, <8 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpd.mask.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
+
+declare <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qps.mask.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
+
+;CHECK-LABEL: gather_mask_dps
+;CHECK: kmovw
+;CHECK: vgatherdps
+;CHECK: vpadd
+;CHECK: vscatterdps
+;CHECK: ret
+define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dps.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_dpd
+;CHECK: kmovw
+;CHECK: vgatherdpd
+;CHECK: vpadd
+;CHECK: vscatterdpd
+;CHECK: ret
+define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qps
+;CHECK: kmovw
+;CHECK: vgatherqps
+;CHECK: vpadd
+;CHECK: vscatterqps
+;CHECK: ret
+define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qps.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qpd
+;CHECK: kmovw
+;CHECK: vgatherqpd
+;CHECK: vpadd
+;CHECK: vscatterqpd
+;CHECK: ret
+define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
+  ret void
+}
+;;
+;; Integer Gather/Scatter
+;;
+declare <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32>, i16, <16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpi.mask.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64>, i8, <8 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpq.mask.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
+
+declare <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpi.mask.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64>, i8, <8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
+
+;CHECK-LABEL: gather_mask_dd
+;CHECK: kmovw
+;CHECK: vpgatherdd
+;CHECK: vpadd
+;CHECK: vpscatterdd
+;CHECK: ret
+define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpi.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qd
+;CHECK: kmovw
+;CHECK: vpgatherqd
+;CHECK: vpadd
+;CHECK: vpscatterqd
+;CHECK: ret
+define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpi.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_qq
+;CHECK: kmovw
+;CHECK: vpgatherqq
+;CHECK: vpadd
+;CHECK: vpscatterqq
+;CHECK: ret
+define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_mask_dq
+;CHECK: kmovw
+;CHECK: vpgatherdq
+;CHECK: vpadd
+;CHECK: vpscatterdq
+;CHECK: ret
+define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
+  ret void
+}
+
+;; FP Intinsics without masks
+
+declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dps.512 (i8*, <16 x i32>, <16 x float>, i32)
+declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qps.512 (i8*, <8 x i64>, <8 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, <8 x i64>, <8 x double>, i32)
+
+;CHECK-LABEL: gather_dps
+;CHECK: kxnorw
+;CHECK: vgatherdps
+;CHECK: vscatterdps
+;CHECK: ret
+define void @gather_dps(<16 x i32> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, <16 x i32>%ind2, <16 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qps
+;CHECK: kxnorw
+;CHECK: vgatherqps
+;CHECK: vscatterqps
+;CHECK: ret
+define void @gather_qps(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, <8 x i64>%ind2, <8 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qpd
+;CHECK: kxnorw
+;CHECK: vgatherqpd
+;CHECK: vpadd
+;CHECK: vscatterqpd
+;CHECK: ret
+define void @gather_qpd(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, <8 x i64>%ind2, <8 x double> %x, i32 4)
+  ret void
+}
+
+;; Integer Intinsics without masks
+
+declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, <16 x i32>, <16 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i32>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, <8 x i32>, <8 x i64>, i32)
+
+declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, <8 x i64>, <8 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, i32)
+declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, <8 x i64>, <8 x i64>, i32)
+
+;CHECK-LABEL: gather_dpi
+;CHECK: kxnorw
+;CHECK: vpgatherdd
+;CHECK: vpscatterdd
+;CHECK: ret
+define void @gather_dpi(<16 x i32> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>%ind, i8* %base, i32 4)
+  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, <16 x i32>%ind2, <16 x i32> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qpq
+;CHECK: vpxord  %zmm
+;CHECK: kxnorw
+;CHECK: vpgatherqq
+;CHECK: vpadd
+;CHECK: vpscatterqq
+;CHECK: ret
+define void @gather_qpq(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i64> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qpi
+;CHECK: vpxor %ymm
+;CHECK: kxnorw
+;CHECK: vpgatherqd
+;CHECK: vpadd
+;CHECK: vpscatterqd
+;CHECK: ret
+define void @gather_qpi(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
+  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>%ind, i8* %base, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i32> %x, i32 4)
+  ret void
+}
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
new file mode 100644
index 000000000000..3f067401ed3f
--- /dev/null
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+;CHECK-LABEL: test1:
+;CHECK: vinsertps
+;CHECK: vinsertf32x4
+;CHECK: ret
+define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
+  %rrr = load float* %br
+  %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
+  %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
+  ret <16 x float> %rrr3
+}
+
+;CHECK-LABEL: test2:
+;CHECK: vinsertf32x4
+;CHECK: vextractf32x4
+;CHECK: vinsertf32x4
+;CHECK: ret
+define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
+  %rrr = load double* %br
+  %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
+  %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
+  ret <8 x double> %rrr3
+}
+
+;CHECK-LABEL: test3:
+;CHECK: vextractf32x4
+;CHECK: vinsertf32x4
+;CHECK: ret
+define <16 x float> @test3(<16 x float> %x) nounwind {
+  %eee = extractelement <16 x float> %x, i32 4
+  %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
+  ret <16 x float> %rrr2
+}
+
+;CHECK-LABEL: test4:
+;CHECK: vextracti32x4
+;CHECK: vinserti32x4
+;CHECK: ret
+define <8 x i64> @test4(<8 x i64> %x) nounwind {
+  %eee = extractelement <8 x i64> %x, i32 4
+  %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
+  ret <8 x i64> %rrr2
+}
+
+;CHECK-LABEL: test5:
+;CHECK: vextractpsz
+;CHECK: ret
+define i32 @test5(<4 x float> %x) nounwind {
+  %ef = extractelement <4 x float> %x, i32 3
+  %ei = bitcast float %ef to i32
+  ret i32 %ei
+}
+
+;CHECK-LABEL: test6:
+;CHECK: vextractpsz {{.*}}, (%rdi)
+;CHECK: ret
+define void @test6(<4 x float> %x, float* %out) nounwind {
+  %ef = extractelement <4 x float> %x, i32 3
+  store float %ef, float* %out, align 4
+  ret void
+}
+
+;CHECK-LABEL: test7
+;CHECK: vmovdz
+;CHECK: vpermps %zmm
+;CHECK: ret
+define float @test7(<16 x float> %x, i32 %ind) nounwind {
+  %e = extractelement <16 x float> %x, i32 %ind
+  ret float %e
+}
+
+;CHECK-LABEL: test8
+;CHECK: vmovqz
+;CHECK: vpermpd %zmm
+;CHECK: ret
+define double @test8(<8 x double> %x, i32 %ind) nounwind {
+  %e = extractelement <8 x double> %x, i32 %ind
+  ret double %e
+}
+
+;CHECK-LABEL: test9
+;CHECK: vmovd
+;CHECK: vpermps %ymm
+;CHECK: ret
+define float @test9(<8 x float> %x, i32 %ind) nounwind {
+  %e = extractelement <8 x float> %x, i32 %ind
+  ret float %e
+}
+
+;CHECK-LABEL: test10
+;CHECK: vmovdz
+;CHECK: vpermd %zmm
+;CHEKK: vmovdz  %xmm0, %eax
+;CHECK: ret
+define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
+  %e = extractelement <16 x i32> %x, i32 %ind
+  ret i32 %e
+}
+
+;CHECK-LABEL: test11
+;CHECK: movl    $260
+;CHECK: bextrl
+;CHECK: movl    $268
+;CHECK: bextrl
+;CHECK: ret
+define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
+  %cmp_res = icmp ult <16 x i32> %a, %b
+  %ia = extractelement <16 x i1> %cmp_res, i32 4
+  %ib = extractelement <16 x i1> %cmp_res, i32 12
+
+  br i1 %ia, label %A, label %B
+
+  A:
+    ret <16 x i32>%b
+  B:
+   %c = add <16 x i32>%b, %a
+  br i1 %ib, label %C, label %D
+  C:
+   %c1 = sub <16 x i32>%c, %a
+   ret <16 x i32>%c1
+  D:
+   %c2 = mul <16 x i32>%c, %a
+   ret <16 x i32>%c2
+}
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
new file mode 100644
index 000000000000..5bdabf234990
--- /dev/null
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -0,0 +1,374 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone
+; CHECK: test_kortestz
+; CHECK: kortestw
+; CHECK: sete
+define i32 @test_kortestz(i16 %a0, i16 %a1) {
+  %res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1) 
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone
+; CHECK: test_kortestc
+; CHECK: kortestw
+; CHECK: sbbl
+define i32 @test_kortestc(i16 %a0, i16 %a1) {
+  %res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1) 
+  ret i32 %res
+}
+
+define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
+  ; CHECK: vrcp14ps
+  %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone
+
+define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
+  ; CHECK: vrcp14pd
+  %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone
+
+define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
+  ; CHECK: vrcp28ps
+  %res = call <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float>) nounwind readnone
+
+define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
+  ; CHECK: vrcp28pd
+  %res = call <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone
+
+define <8 x double> @test_rndscale_pd_512(<8 x double> %a0) {
+  ; CHECK: vrndscale
+  %res = call <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double> %a0, i32 7) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double>, i32) nounwind readnone
+
+
+define <16 x float> @test_rndscale_ps_512(<16 x float> %a0) {
+  ; CHECK: vrndscale
+  %res = call <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float> %a0, i32 7) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float>, i32) nounwind readnone
+
+
+define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
+  ; CHECK: vrsqrt14ps
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone
+
+define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) {
+  ; CHECK: vrsqrt28ps
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float>) nounwind readnone
+
+define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrt14ss
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrt28ss
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
+  ; CHECK: vrcp14ss
+  %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
+  ; CHECK: vrcp28ss
+  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>) nounwind readnone
+
+define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone
+
+define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone
+
+define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vsqrtssz
+  %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vsqrtsdz
+  %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvtsd2siz
+  %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2sdqz
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
+  ; CHECK: vcvtusi2sdqz
+  %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
+
+define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvttsd2siz
+  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+
+define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
+  ; CHECK: vcvtss2siz
+  %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2ssqz
+  %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
+  ; CHECK: vcvttss2siz
+  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
+  ; CHECK: vcvtsd2usiz
+  %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
+
+define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16> %a0)
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16>) nounwind readonly
+
+
+define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float> %a0, i32 0)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float>, i32) nounwind readonly
+
+define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
+
+define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
+
+define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
+
+define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
+
+define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32>  %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
+
+define <16 x i32> @test_x86_pbroadcastd_i32_512(i32  %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
+
+define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
+
+define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
+
+define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpmaxud 
+  %res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmaxu_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpmaxuq
+  %res = call <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pmaxs_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmaxs_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpmaxsq
+  %res = call <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pminu_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pminu_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpminuq
+  %res = call <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pmins_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK: vpminsq
+  %res = call <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_conflict_d(<16 x i32> %a) {
+  ; CHECK: vpconflictd
+  %res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly
+
+define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
+  ; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z}
+  %vmask = bitcast i16 %mask to <16 x i1>
+  %res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a)
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly
+
+define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+  ; CHECK: vpconflictq {{.*}} {%k1}
+  %vmask = bitcast i8 %mask to <8 x i1>
+  %res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a)
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly
+
+define <16 x float> @test_x86_mskblend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
+  ; CHECK: vblendmps
+  %m0 = bitcast i16 %a0 to <16 x i1>
+  %res = call <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1]
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly
+
+define <8 x double> @test_x86_mskblend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
+  ; CHECK: vblendmpd
+  %m0 = bitcast i8 %a0 to <8 x i1>
+  %res = call <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %m0, <8 x double> %a1, <8 x double> %a2) ; <<8 x double>> [#uses=1]
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %a0, <8 x double> %a1, <8 x double> %a2) nounwind readonly
+
+define <16 x i32> @test_x86_mskblend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
+  ; CHECK: vpblendmd
+  %m0 = bitcast i16 %a0 to <16 x i1>
+  %res = call <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %m0, <16 x i32> %a1, <16 x i32> %a2) ; <<16 x i32>> [#uses=1]
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %a0, <16 x i32> %a1, <16 x i32> %a2) nounwind readonly
+
+define <8 x i64> @test_x86_mskblend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
+  ; CHECK: vpblendmq
+  %m0 = bitcast i8 %a0 to <8 x i1>
+  %res = call <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %m0, <8 x i64> %a1, <8 x i64> %a2) ; <<8 x i64>> [#uses=1]
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %a0, <8 x i64> %a1, <8 x i64> %a2) nounwind readonly
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
new file mode 100644
index 000000000000..ef5cb56d7284
--- /dev/null
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=x86-64 -mcpu=knl | FileCheck %s
+
+define i16 @mask16(i16 %x) {
+  %m0 = bitcast i16 %x to <16 x i1>
+  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <16 x i1> %m1 to i16
+  ret i16 %ret
+; CHECK: mask16
+; CHECK: knotw
+; CHECK: ret
+}
+
+define i8 @mask8(i8 %x) {
+  %m0 = bitcast i8 %x to <8 x i1>
+  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <8 x i1> %m1 to i8
+  ret i8 %ret
+; CHECK: mask8
+; CHECK: knotw
+; CHECK: ret
+}
+
+define i16 @mand16(i16 %x, i16 %y) {
+  %ma = bitcast i16 %x to <16 x i1>
+  %mb = bitcast i16 %y to <16 x i1>
+  %mc = and <16 x i1> %ma, %mb
+  %md = xor <16 x i1> %ma, %mb
+  %me = or <16 x i1> %mc, %md
+  %ret = bitcast <16 x i1> %me to i16
+; CHECK: kandw
+; CHECK: kxorw
+; CHECK: korw
+  ret i16 %ret
+}
+
+; CHECK: unpckbw_test
+; CHECK: kunpckbw
+; CHECK:ret
+declare <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1>, <8 x i1>) nounwind readnone
+
+define i16 @unpckbw_test(i8 %x, i8 %y) {
+  %m0 = bitcast i8 %x to <8 x i1>
+  %m1 = bitcast i8 %y to <8 x i1>
+  %k = tail call <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1> %m0, <8 x i1> %m1)
+  %r = bitcast <16 x i1> %k to i16
+  ret i16 %r
+}
+
+; CHECK: shuf_test1
+; CHECK: kshiftrw        $8
+; CHECK:ret
+define i8 @shuf_test1(i16 %v) nounwind {
+   %v1 = bitcast i16 %v to <16 x i1>
+   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+   %mask1 = bitcast <8 x i1> %mask to i8
+   ret i8 %mask1
+}
diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll
new file mode 100644
index 000000000000..91242b1cc125
--- /dev/null
+++ b/test/CodeGen/X86/avx512-mov.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: @test1
+; CHECK: vmovdz  %xmm0, %eax
+; CHECK: ret
+define i32 @test1(float %x) {
+   %res = bitcast float %x to i32
+   ret i32 %res
+}
+
+; CHECK-LABEL: @test2
+; CHECK: vmovdz  %edi
+; CHECK: ret
+define <4 x i32> @test2(i32 %x) {
+   %res = insertelement <4 x i32>undef, i32 %x, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test3
+; CHECK: vmovqz  %rdi
+; CHECK: ret
+define <2 x i64> @test3(i64 %x) {
+   %res = insertelement <2 x i64>undef, i64 %x, i32 0
+   ret <2 x i64>%res
+}
+
+; CHECK-LABEL: @test4
+; CHECK: vmovdz  (%rdi)
+; CHECK: ret
+define <4 x i32> @test4(i32* %x) {
+   %y = load i32* %x
+   %res = insertelement <4 x i32>undef, i32 %y, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test5
+; CHECK: vmovssz  %xmm0, (%rdi)
+; CHECK: ret
+define void @test5(float %x, float* %y) {
+   store float %x, float* %y, align 4
+   ret void
+}
+
+; CHECK-LABEL: @test6
+; CHECK: vmovsdz  %xmm0, (%rdi)
+; CHECK: ret
+define void @test6(double %x, double* %y) {
+   store double %x, double* %y, align 8
+   ret void
+}
+
+; CHECK-LABEL: @test7
+; CHECK: vmovssz  (%rdi), %xmm0
+; CHECK: ret
+define float @test7(i32* %x) {
+   %y = load i32* %x
+   %res = bitcast i32 %y to float
+   ret float %res
+}
+
+; CHECK-LABEL: @test8
+; CHECK: vmovdz %xmm0, %eax
+; CHECK: ret
+define i32 @test8(<4 x i32> %x) {
+   %res = extractelement <4 x i32> %x, i32 0
+   ret i32 %res
+}
+
+; CHECK-LABEL: @test9
+; CHECK: vmovqz %xmm0, %rax
+; CHECK: ret
+define i64 @test9(<2 x i64> %x) {
+   %res = extractelement <2 x i64> %x, i32 0
+   ret i64 %res
+}
+
+; CHECK-LABEL: @test10
+; CHECK: vmovdz  (%rdi)
+; CHECK: ret
+define <4 x i32> @test10(i32* %x) {
+   %y = load i32* %x, align 4
+   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test11
+; CHECK: vmovssz  (%rdi)
+; CHECK: ret
+define <4 x float> @test11(float* %x) {
+   %y = load float* %x, align 4
+   %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
+   ret <4 x float>%res
+}
+
+; CHECK-LABEL: @test12
+; CHECK: vmovsdz  (%rdi)
+; CHECK: ret
+define <2 x double> @test12(double* %x) {
+   %y = load double* %x, align 8
+   %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
+   ret <2 x double>%res
+}
+
+; CHECK-LABEL: @test13
+; CHECK: vmovqz  %rdi
+; CHECK: ret
+define <2 x i64> @test13(i64 %x) {
+   %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
+   ret <2 x i64>%res
+}
+
+; CHECK-LABEL: @test14
+; CHECK: vmovdz  %edi
+; CHECK: ret
+define <4 x i32> @test14(i32 %x) {
+   %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: @test15
+; CHECK: vmovdz  (%rdi)
+; CHECK: ret
+define <4 x i32> @test15(i32* %x) {
+   %y = load i32* %x, align 4
+   %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
+   ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test16
+; CHECK: vmovdqu32
+; CHECK: ret
+define <16 x i32> @test16(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %res = load <16 x i32>* %vaddr, align 1
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test17
+; CHECK: vmovdqa32
+; CHECK: ret
+define <16 x i32> @test17(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %res = load <16 x i32>* %vaddr, align 64
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test18
+; CHECK: vmovdqa64
+; CHECK: ret
+define void @test18(i8 * %addr, <8 x i64> %data) {
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  store <8 x i64>%data, <8 x i64>* %vaddr, align 64
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll
new file mode 100644
index 000000000000..d2d6681fb422
--- /dev/null
+++ b/test/CodeGen/X86/avx512-select.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl  | FileCheck %s
+
+; CHECK-LABEL: select00
+; CHECK: vmovaps
+; CHECK-NEXT: LBB
+define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind {
+  %cmpres = icmp eq i32 %a, 255
+  %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b
+  %res = xor <16 x i32> %b, %selres
+  ret <16 x i32> %res
+}
+
+; CHECK-LABEL: select01
+; CHECK: vmovaps
+; CHECK-NEXT: LBB
+define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind {
+  %cmpres = icmp eq i32 %a, 255
+  %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b
+  %res = xor <8 x i64> %b, %selres
+  ret <8 x i64> %res
+}
+
diff --git a/test/CodeGen/X86/avx512-shift.ll b/test/CodeGen/X86/avx512-shift.ll
new file mode 100644
index 000000000000..8cdcf8ad062f
--- /dev/null
+++ b/test/CodeGen/X86/avx512-shift.ll
@@ -0,0 +1,108 @@
+;RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+;CHECK-LABEL: shift_16_i32
+;CHECK: vpsrld
+;CHECK: vpslld
+;CHECK: vpsrad
+;CHECK: ret
+define <16 x i32> @shift_16_i32(<16 x i32> %a) {
+   %b = lshr <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+   %c = shl <16 x i32> %b, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+   %d = ashr <16 x i32> %c, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+   ret <16 x i32> %d;
+}
+
+;CHECK-LABEL: shift_8_i64
+;CHECK: vpsrlq
+;CHECK: vpsllq
+;CHECK: vpsraq
+;CHECK: ret
+define <8 x i64> @shift_8_i64(<8 x i64> %a) {
+   %b = lshr <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+   %c = shl <8 x i64> %b,  <i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12>
+   %d = ashr <8 x i64> %c, <i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12>
+   ret <8 x i64> %d;
+}
+
+; CHECK-LABEL: variable_shl4
+; CHECK: vpsllvq %zmm
+; CHECK: ret
+define <8 x i64> @variable_shl4(<8 x i64> %x, <8 x i64> %y) {
+  %k = shl <8 x i64> %x, %y
+  ret <8 x i64> %k
+}
+
+; CHECK-LABEL: variable_shl5
+; CHECK: vpsllvd %zmm
+; CHECK: ret
+define <16 x i32> @variable_shl5(<16 x i32> %x, <16 x i32> %y) {
+  %k = shl <16 x i32> %x, %y
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_srl0
+; CHECK: vpsrlvd
+; CHECK: ret
+define <16 x i32> @variable_srl0(<16 x i32> %x, <16 x i32> %y) {
+  %k = lshr <16 x i32> %x, %y
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_srl2
+; CHECK: psrlvq
+; CHECK: ret
+define <8 x i64> @variable_srl2(<8 x i64> %x, <8 x i64> %y) {
+  %k = lshr <8 x i64> %x, %y
+  ret <8 x i64> %k
+}
+
+; CHECK-LABEL: variable_sra1
+; CHECK: vpsravd
+; CHECK: ret
+define <16 x i32> @variable_sra1(<16 x i32> %x, <16 x i32> %y) {
+  %k = ashr <16 x i32> %x, %y
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_sra2
+; CHECK: vpsravq %zmm
+; CHECK: ret
+define <8 x i64> @variable_sra2(<8 x i64> %x, <8 x i64> %y) {
+  %k = ashr <8 x i64> %x, %y
+  ret <8 x i64> %k
+}
+
+; CHECK-LABEL: variable_sra01_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <16 x i32> @variable_sra01_load(<16 x i32> %x, <16 x i32>* %y) {
+  %y1 = load <16 x i32>* %y
+  %k = ashr <16 x i32> %x, %y1
+  ret <16 x i32> %k
+}
+
+; CHECK-LABEL: variable_shl1_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <16 x i32> @variable_shl1_load(<16 x i32> %x, <16 x i32>* %y) {
+  %y1 = load <16 x i32>* %y
+  %k = shl <16 x i32> %x, %y1
+  ret <16 x i32> %k
+}
+; CHECK: variable_srl0_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <16 x i32> @variable_srl0_load(<16 x i32> %x, <16 x i32>* %y) {
+  %y1 = load <16 x i32>* %y
+  %k = lshr <16 x i32> %x, %y1
+  ret <16 x i32> %k
+}
+
+; CHECK: variable_srl3_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <8 x i64> @variable_srl3_load(<8 x i64> %x, <8 x i64>* %y) {
+  %y1 = load <8 x i64>* %y
+  %k = lshr <8 x i64> %x, %y1
+  ret <8 x i64> %k
+}
diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll
new file mode 100644
index 000000000000..c9e0c2b992d9
--- /dev/null
+++ b/test/CodeGen/X86/avx512-shuffle.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; CHECK: LCP
+; CHECK: .long 2
+; CHECK: .long 5
+; CHECK: .long 0
+; CHECK: .long 0
+; CHECK: .long 7
+; CHECK: .long 0
+; CHECK: .long 10
+; CHECK: .long 1
+; CHECK: .long 0
+; CHECK: .long 5
+; CHECK: .long 0
+; CHECK: .long 4
+; CHECK: .long 7
+; CHECK: .long 0
+; CHECK: .long 10
+; CHECK: .long 1
+; CHECK-LABEL: test1:
+; CHECK: vpermps
+; CHECK: ret
+define <16 x float> @test1(<16 x float> %a) nounwind {
+  %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+  ret <16 x float> %c
+}
+
+; CHECK-LABEL: test2:
+; CHECK: vpermd
+; CHECK: ret
+define <16 x i32> @test2(<16 x i32> %a) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: test3:
+; CHECK: vpermq
+; CHECK: ret
+define <8 x i64> @test3(<8 x i64> %a) nounwind {
+  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: test4:
+; CHECK: vpermpd
+; CHECK: ret
+define <8 x double> @test4(<8 x double> %a) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test5:
+; CHECK: vpermi2pd
+; CHECK: ret
+define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test6:
+; CHECK: vpermq $30
+; CHECK: ret
+define <8 x i64> @test6(<8 x i64> %a) nounwind {
+  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: test7:
+; CHECK: vpermi2q
+; CHECK: ret
+define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
+  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: test8:
+; CHECK: vpermi2d
+; CHECK: ret
+define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: test9:
+; CHECK: vpermi2ps
+; CHECK: ret
+define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
+  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x float> %c
+}
+
+; CHECK-LABEL: test10:
+; CHECK: vpermi2ps (
+; CHECK: ret
+define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
+  %c = load <16 x float>* %b
+  %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x float> %d
+}
+
+; CHECK-LABEL: test11:
+; CHECK: vpermi2d (
+; CHECK: ret
+define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
+  %c = load <16 x i32>* %b
+  %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x i32> %d
+}
+
+; CHECK-LABEL: test12
+; CHECK: vmovlhpsz %xmm
+; CHECK: ret
+define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) nounwind {
+  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i32> %c
+}
+
+; CHECK-LABEL: test13
+; CHECK: vpermilps $-79, %zmm
+; CHECK: ret
+define <16 x float> @test13(<16 x float> %a) {
+ %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x float> %b
+}
+
+; CHECK-LABEL: test14
+; CHECK: vpermilpd $-53, %zmm
+; CHECK: ret
+define <8 x double> @test14(<8 x double> %a) {
+ %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
+ ret <8 x double> %b
+}
+
+; CHECK-LABEL: test15
+; CHECK: vpshufd $-79, %zmm
+; CHECK: ret
+define <16 x i32> @test15(<16 x i32> %a) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ ret <16 x i32> %b
+}
+; CHECK-LABEL: test16
+; CHECK: valignq $2, %zmm0, %zmm1
+; CHECK: ret
+define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test17
+; CHECK: vshufpd $19, %zmm1, %zmm0
+; CHECK: ret
+define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
+  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
+  ret <8 x double> %c
+}
+
+; CHECK-LABEL: test18
+; CHECK: vpunpckhdq %zmm
+; CHECK: ret
+define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15, i32 18, i32 26, i32 19, i32 27, i32 22, i32 30, i32 23, i32 31>
+ ret <16 x i32> %b
+}
+
+; CHECK-LABEL: test19
+; CHECK: vpunpckldq %zmm
+; CHECK: ret
+define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
+ %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
+ ret <16 x i32> %b
+}
+
+; CHECK-LABEL: test20
+; CHECK: vpunpckhqdq  %zmm
+; CHECK: ret
+define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
+ %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 5, i32 3, i32 7, i32 9, i32 13, i32 11, i32 15>
+ ret <8 x i64> %b
+}
+
+; CHECK-LABEL: test21
+; CHECK: vunpcklps %zmm
+; CHECK: ret
+define <16 x float> @test21(<16 x float> %a, <16 x float> %c) {
+ %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
+ ret <16 x float> %b
+}
+
+; CHECK-LABEL: test22
+; CHECK: vmovhlpsz %xmm
+; CHECK: ret
+define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) nounwind {
+  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  ret <4 x i32> %c
+}
+
+; CHECK-LABEL: @test23
+; CHECK: vshufps $-112, %zmm
+; CHECK: ret
+define <16 x float> @test23(<16 x float> %a, <16 x float> %c) {
+ %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30>
+ ret <16 x float> %b
+}
+
+; CHECK-LABEL: @test24
+; CHECK: vpermi2d
+; CHECK: ret
+define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: @test25
+; CHECK: vshufps  $52
+; CHECK: ret
+define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 undef, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: @test26
+; CHECK: vmovshdup
+; CHECK: ret
+define <16 x i32> @test26(<16 x i32> %a) nounwind {
+  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef>
+  ret <16 x i32> %c
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll
new file mode 100644
index 000000000000..31db68cc582b
--- /dev/null
+++ b/test/CodeGen/X86/avx512-trunc-ext.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: trunc_16x32_to_16x8
+; CHECK: vpmovdb
+; CHECK: ret
+define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
+  %x = trunc <16 x i32> %i to <16 x i8>
+  ret <16 x i8> %x
+}
+
+; CHECK-LABEL: trunc_8x64_to_8x16
+; CHECK: vpmovqw
+; CHECK: ret
+define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
+  %x = trunc <8 x i64> %i to <8 x i16>
+  ret <8 x i16> %x
+}
+
+
+; CHECK-LABEL: zext_16x8_to_16x32
+; CHECK; vpmovzxbd {{.*}}%zmm
+; CHECK: ret
+define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+  %x = zext <16 x i8> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: sext_16x8_to_16x32
+; CHECK; vpmovsxbd {{.*}}%zmm
+; CHECK: ret
+define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+  %x = sext <16 x i8> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+
+; CHECK-LABEL: zext_16x16_to_16x32
+; CHECK; vpmovzxwd {{.*}}%zmm
+; CHECK: ret
+define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
+  %x = zext <16 x i16> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: zext_8x16_to_8x64
+; CHECK; vpmovzxwq
+; CHECK: ret
+define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
+  %x = zext <8 x i16> %i to <8 x i64>
+  ret <8 x i64> %x
+}
+
+;CHECK-LABEL: fptrunc_test
+;CHECK: vcvtpd2ps {{.*}}%zmm
+;CHECK: ret
+define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
+  %b = fptrunc <8 x double> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+;CHECK-LABEL: fpext_test
+;CHECK: vcvtps2pd {{.*}}%zmm
+;CHECK: ret
+define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
+  %b = fpext <8 x float> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+; CHECK-LABEL: zext_16i1_to_16xi32
+; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK: ret
+define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
+  %a = bitcast i16 %b to <16 x i1>
+  %c = zext <16 x i1> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+; CHECK-LABEL: zext_8i1_to_8xi64
+; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK: ret
+define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
+  %a = bitcast i8 %b to <8 x i1>
+  %c = zext <8 x i1> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+; CHECK-LABEL: trunc_16i8_to_16i1
+; CHECK: vpmovsxbd
+; CHECK: vpandd
+; CHECK: vptestmd
+; CHECK: ret
+define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
+  %mask_b = trunc <16 x i8>%a to <16 x i1>
+  %mask = bitcast <16 x i1> %mask_b to i16
+  ret i16 %mask
+}
+
+; CHECK-LABEL: trunc_16i32_to_16i1
+; CHECK: vpandd
+; CHECK: vptestmd
+; CHECK: ret
+define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
+  %mask_b = trunc <16 x i32>%a to <16 x i1>
+  %mask = bitcast <16 x i1> %mask_b to i16
+  ret i16 %mask
+}
+
+; CHECK-LABEL: trunc_8i16_to_8i1
+; CHECK: vpmovsxwq
+; CHECK: vpandq LCP{{.*}}(%rip){1to8}
+; CHECK: vptestmq
+; CHECK: ret
+define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
+  %mask_b = trunc <8 x i16>%a to <8 x i1>
+  %mask = bitcast <8 x i1> %mask_b to i8
+  ret i8 %mask
+}
+
+; CHECK: sext_8i1_8i32
+; CHECK: vpbroadcastq  LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK: ret
+define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+  %x = icmp slt <8 x i32> %a1, %a2
+  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  %y = sext <8 x i1> %x1 to <8 x i32>
+  ret <8 x i32> %y
+}
diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll
new file mode 100644
index 000000000000..6f89d6ce2342
--- /dev/null
+++ b/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+;CHECK-LABEL: _inreg16xi32:
+;CHECK: vpbroadcastd {{.*}}, %zmm
+;CHECK: ret
+define   <16 x i32> @_inreg16xi32(i32 %a) {
+  %b = insertelement <16 x i32> undef, i32 %a, i32 0
+  %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+  ret <16 x i32> %c
+}
+
+;CHECK-LABEL: _inreg8xi64:
+;CHECK: vpbroadcastq {{.*}}, %zmm
+;CHECK: ret
+define   <8 x i64> @_inreg8xi64(i64 %a) {
+  %b = insertelement <8 x i64> undef, i64 %a, i32 0
+  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+  ret <8 x i64> %c
+}
+
+;CHECK-LABEL: _inreg16xfloat:
+;CHECK: vbroadcastssz {{.*}}, %zmm
+;CHECK: ret
+define   <16 x float> @_inreg16xfloat(float %a) {
+  %b = insertelement <16 x float> undef, float %a, i32 0
+  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
+  ret <16 x float> %c
+}
+
+;CHECK-LABEL: _inreg8xdouble:
+;CHECK: vbroadcastsdz {{.*}}, %zmm
+;CHECK: ret
+define   <8 x double> @_inreg8xdouble(double %a) {
+  %b = insertelement <8 x double> undef, double %a, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
+  ret <8 x double> %c
+}
+
+;CHECK-LABEL: _xmm16xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
+  %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
+  ret <16 x i32> %b
+}
+
+;CHECK-LABEL: _xmm16xfloat
+;CHECK: vbroadcastssz
+;CHECK: ret
+define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
+  %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
+  ret <16 x float> %b
+}
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
new file mode 100644
index 000000000000..6ca5bcc3b862
--- /dev/null
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vcmpleps
+; CHECK: vmovups
+; CHECK: ret
+define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
+	%mask = fcmp ole <16 x float> %x, %y
+	%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
+	ret <16 x float> %max
+}
+
+; CHECK-LABEL: test2
+; CHECK: vcmplepd
+; CHECK: vmovupd
+; CHECK: ret
+define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
+	%mask = fcmp ole <8 x double> %x, %y
+	%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
+	ret <8 x double> %max
+}
+
+; CHECK-LABEL: test3
+; CHECK: vpcmpeqd  (%rdi)
+; CHECK: vmovdqu32
+; CHECK: ret
+define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
+  %y = load <16 x i32>* %yp, align 4
+	%mask = icmp eq <16 x i32> %x, %y
+	%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+	ret <16 x i32> %max
+}
+
+; CHECK-LABEL: @test4_unsigned
+; CHECK: vpcmpnltud
+; CHECK: vmovdqu32
+; CHECK: ret
+define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
+	%mask = icmp uge <16 x i32> %x, %y
+	%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+	ret <16 x i32> %max
+}
+
+; CHECK-LABEL: test5
+; CHECK: vpcmpeqq {{.*}}%k1
+; CHECK: vmovdqu64 {{.*}}%k1
+; CHECK: ret
+define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
+	%mask = icmp eq <8 x i64> %x, %y
+	%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
+	ret <8 x i64> %max
+}
+
+; CHECK-LABEL: test6_unsigned
+; CHECK: vpcmpnleuq {{.*}}%k1
+; CHECK: vmovdqu64 {{.*}}%k1
+; CHECK: ret
+define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
+	%mask = icmp ugt <8 x i64> %x, %y
+	%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
+	ret <8 x i64> %max
+}
+
+; CHECK-LABEL: test7
+; CHECK: xor
+; CHECK: vcmpltps
+; CHECK: vblendvps
+; CHECK: ret
+define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
+  %mask = fcmp olt <4 x float> %a, zeroinitializer
+  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
+  ret <4 x float>%c
+}
+
+; CHECK-LABEL: test8
+; CHECK: xor
+; CHECK: vcmpltpd
+; CHECK: vblendvpd
+; CHECK: ret
+define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
+  %mask = fcmp olt <2 x double> %a, zeroinitializer
+  %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
+  ret <2 x double>%c
+}
+
+; CHECK-LABEL: test9
+; CHECK: vpcmpeqd
+; CHECK: vpblendmd
+; CHECK: ret
+define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
+  %mask = icmp eq <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test10
+; CHECK: vcmpeqps
+; CHECK: vblendmps
+; CHECK: ret
+define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
+  %mask = fcmp oeq <8 x float> %x, %y
+  %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
+  ret <8 x float> %max
+}
+
+; CHECK-LABEL: test11_unsigned
+; CHECK: vpmaxud
+; CHECK: ret
+define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
+  %mask = icmp ugt <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+  ret <8 x i32> %max
+}
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
index ceabcb71a78b..a1c0f5ae527c 100644
--- a/test/CodeGen/X86/bc-extract.ll
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 |  FileCheck %s
 
 
 define float @extractFloat1() nounwind {
diff --git a/test/CodeGen/X86/bigstructret2.ll b/test/CodeGen/X86/bigstructret2.ll
index 46e0fd217646..6a79139d9bcf 100644
--- a/test/CodeGen/X86/bigstructret2.ll
+++ b/test/CodeGen/X86/bigstructret2.ll
@@ -1,4 +1,12 @@
-; RUN: llc < %s -march=x86 -o %t
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | FileCheck %s
+
+; CHECK: .cfi_startproc
+; CHECK: .cfi_def_cfa_offset 8
+; CHECK: .cfi_def_cfa_offset 12
+; CHECK: .cfi_def_cfa_offset 32
+; CHECK: .cfi_offset %esi, -12
+; CHECK: .cfi_offset %edi, -8
+; CHECK: .cfi_endproc
 
 %0 = type { i64, i64 }
 
diff --git a/test/CodeGen/X86/bitcast2.ll b/test/CodeGen/X86/bitcast2.ll
index 48922b5f5a13..12aa863a37a1 100644
--- a/test/CodeGen/X86/bitcast2.ll
+++ b/test/CodeGen/X86/bitcast2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep movd | count 2
-; RUN: llc < %s -march=x86-64 | not grep rsp
+; RUN: llc < %s -march=x86-64 -mattr=-avx | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=-avx | not grep rsp
 
 define i64 @test1(double %A) {
    %B = bitcast double %A to i64
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index e565da74a082..4f2060f7012b 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
 
 ; In this test we check that sign-extend of the mask bit is performed by
 ; shifting the needed bit to the MSB, and not using shl+sra.
 
-;CHECK: vsel_float
+;CHECK-LABEL: vsel_float:
 ;CHECK: movl $-2147483648
 ;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
@@ -14,7 +14,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
   ret <4 x float> %vsel
 }
 
-;CHECK: vsel_4xi8
+;CHECK-LABEL: vsel_4xi8:
 ;CHECK: movl $-2147483648
 ;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
@@ -28,7 +28,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
 ; We do not have native support for v8i16 blends and we have to use the
 ; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r
 ; reduce the mask in this case.
-;CHECK: vsel_8xi16
+;CHECK-LABEL: vsel_8xi16:
 ;CHECK: psllw
 ;CHECK: psraw
 ;CHECK: pblendvb
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index 271fb4250517..d3e05d6fbed2 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -1,11 +1,11 @@
-; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
+; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s
 
 declare void @error(i32 %i, i32 %a, i32 %b)
 
 define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
 ; Test a chain of ifs, where the block guarded by the if is error handling code
 ; that is not expected to run.
-; CHECK: test_ifchains:
+; CHECK-LABEL: test_ifchains:
 ; CHECK: %entry
 ; CHECK-NOT: .align
 ; CHECK: %else1
@@ -79,7 +79,7 @@ exit:
 
 define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
 ; Check that we sink cold loop blocks after the hot loop body.
-; CHECK: test_loop_cold_blocks:
+; CHECK-LABEL: test_loop_cold_blocks:
 ; CHECK: %entry
 ; CHECK-NOT: .align
 ; CHECK: %unlikely1
@@ -128,7 +128,7 @@ exit:
 
 define i32 @test_loop_early_exits(i32 %i, i32* %a) {
 ; Check that we sink early exit blocks out of loop bodies.
-; CHECK: test_loop_early_exits:
+; CHECK-LABEL: test_loop_early_exits:
 ; CHECK: %entry
 ; CHECK: %body1
 ; CHECK: %body2
@@ -180,7 +180,7 @@ exit:
 define i32 @test_loop_rotate(i32 %i, i32* %a) {
 ; Check that we rotate conditional exits from the loop to the bottom of the
 ; loop, eliminating unconditional branches to the top.
-; CHECK: test_loop_rotate:
+; CHECK-LABEL: test_loop_rotate:
 ; CHECK: %entry
 ; CHECK: %body1
 ; CHECK: %body0
@@ -210,7 +210,7 @@ exit:
 define i32 @test_no_loop_rotate(i32 %i, i32* %a) {
 ; Check that we don't try to rotate a loop which is already laid out with
 ; fallthrough opportunities into the top and out of the bottom.
-; CHECK: test_no_loop_rotate:
+; CHECK-LABEL: test_no_loop_rotate:
 ; CHECK: %entry
 ; CHECK: %body0
 ; CHECK: %body1
@@ -278,7 +278,7 @@ exit:
 define i32 @test_loop_align(i32 %i, i32* %a) {
 ; Check that we provide basic loop body alignment with the block placement
 ; pass.
-; CHECK: test_loop_align:
+; CHECK-LABEL: test_loop_align:
 ; CHECK: %entry
 ; CHECK: .align [[ALIGN:[0-9]+]],
 ; CHECK-NEXT: %body
@@ -303,7 +303,7 @@ exit:
 
 define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) {
 ; Check that we provide nested loop body alignment.
-; CHECK: test_nested_loop_align:
+; CHECK-LABEL: test_nested_loop_align:
 ; CHECK: %entry
 ; CHECK: .align [[ALIGN]],
 ; CHECK-NEXT: %loop.body.1
@@ -997,7 +997,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
 ; CHECK: %while.body
 ; CHECK: %land.lhs.true
 ; CHECK: %if.then19
-; CHECK: %if.then19
+; CHECK: %if.end20
 ; CHECK: %if.then8
 ; CHECK: ret
 
@@ -1089,3 +1089,35 @@ while.end:
   store double %rra.0, double* %arrayidx34, align 8
   br label %for.cond
 }
+
+declare void @cold_function() cold
+
+define i32 @test_cold_calls(i32* %a) {
+; Test that edges to blocks post-dominated by cold calls are
+; marked as not expected to be taken.  They should be laid out
+; at the bottom.
+; CHECK-LABEL: test_cold_calls:
+; CHECK: %entry
+; CHECK: %else
+; CHECK: %exit
+; CHECK: %then
+
+entry:
+  %gep1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %gep1
+  %cond1 = icmp ugt i32 %val1, 1
+  br i1 %cond1, label %then, label %else
+
+then:
+  call void @cold_function()
+  br label %exit
+
+else:
+  %gep2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %gep2
+  br label %exit
+
+exit:
+  %ret = phi i32 [ %val1, %then ], [ %val2, %else ]
+  ret i32 %ret
+}
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index b89e648c52d9..242075a878bb 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -8,21 +8,21 @@ declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
 define i8 @t1(i8 %x) nounwind  {
   %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
   ret i8 %tmp
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: tzcntl
 }
 
 define i16 @t2(i16 %x) nounwind  {
   %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
   ret i16 %tmp
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: tzcntw
 }
 
 define i32 @t3(i32 %x) nounwind  {
   %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
   ret i32 %tmp
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: tzcntl
 }
 
@@ -30,42 +30,42 @@ define i32 @tzcnt32_load(i32* %x) nounwind  {
   %x1 = load i32* %x
   %tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false )
   ret i32 %tmp
-; CHECK: tzcnt32_load:
+; CHECK-LABEL: tzcnt32_load:
 ; CHECK: tzcntl ({{.*}})
 }
 
 define i64 @t4(i64 %x) nounwind  {
   %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
   ret i64 %tmp
-; CHECK: t4:
+; CHECK-LABEL: t4:
 ; CHECK: tzcntq
 }
 
 define i8 @t5(i8 %x) nounwind  {
   %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
   ret i8 %tmp
-; CHECK: t5:
+; CHECK-LABEL: t5:
 ; CHECK: tzcntl
 }
 
 define i16 @t6(i16 %x) nounwind  {
   %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
   ret i16 %tmp
-; CHECK: t6:
+; CHECK-LABEL: t6:
 ; CHECK: tzcntw
 }
 
 define i32 @t7(i32 %x) nounwind  {
   %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
   ret i32 %tmp
-; CHECK: t7:
+; CHECK-LABEL: t7:
 ; CHECK: tzcntl
 }
 
 define i64 @t8(i64 %x) nounwind  {
   %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
   ret i64 %tmp
-; CHECK: t8:
+; CHECK-LABEL: t8:
 ; CHECK: tzcntq
 }
 
@@ -73,7 +73,7 @@ define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
   %tmp2 = and i32 %y, %tmp1
   ret i32 %tmp2
-; CHECK: andn32:
+; CHECK-LABEL: andn32:
 ; CHECK: andnl
 }
 
@@ -82,7 +82,7 @@ define i32 @andn32_load(i32 %x, i32* %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
   %tmp2 = and i32 %y1, %tmp1
   ret i32 %tmp2
-; CHECK: andn32_load:
+; CHECK-LABEL: andn32_load:
 ; CHECK: andnl ({{.*}})
 }
 
@@ -90,14 +90,14 @@ define i64 @andn64(i64 %x, i64 %y) nounwind readnone {
   %tmp1 = xor i64 %x, -1
   %tmp2 = and i64 %tmp1, %y
   ret i64 %tmp2
-; CHECK: andn64:
+; CHECK-LABEL: andn64:
 ; CHECK: andnq
 }
 
 define i32 @bextr32(i32 %x, i32 %y) nounwind readnone {
   %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
   ret i32 %tmp
-; CHECK: bextr32:
+; CHECK-LABEL: bextr32:
 ; CHECK: bextrl
 }
 
@@ -105,25 +105,50 @@ define i32 @bextr32_load(i32* %x, i32 %y) nounwind readnone {
   %x1 = load i32* %x
   %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
   ret i32 %tmp
-; CHECK: bextr32_load:
+; CHECK-LABEL: bextr32_load:
 ; CHECK: bextrl {{.*}}, ({{.*}}), {{.*}}
 }
 
 declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
 
+define i32 @bextr32b(i32 %x) nounwind uwtable readnone ssp {
+  %1 = lshr i32 %x, 4
+  %2 = and i32 %1, 4095
+  ret i32 %2
+; CHECK-LABEL: bextr32b:
+; CHECK: bextrl
+}
+
+define i32 @bextr32b_load(i32* %x) nounwind uwtable readnone ssp {
+  %1 = load i32* %x
+  %2 = lshr i32 %1, 4
+  %3 = and i32 %2, 4095
+  ret i32 %3
+; CHECK-LABEL: bextr32b_load:
+; CHECK: bextrl {{.*}}, ({{.*}}), {{.*}}
+}
+
 define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
   %tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
   ret i64 %tmp
-; CHECK: bextr64:
+; CHECK-LABEL: bextr64:
 ; CHECK: bextrq
 }
 
 declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
 
+define i64 @bextr64b(i64 %x) nounwind uwtable readnone ssp {
+  %1 = lshr i64 %x, 4
+  %2 = and i64 %1, 4095
+  ret i64 %2
+; CHECK-LABEL: bextr64b:
+; CHECK: bextrq
+}
+
 define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
   %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
   ret i32 %tmp
-; CHECK: bzhi32:
+; CHECK-LABEL: bzhi32:
 ; CHECK: bzhil
 }
 
@@ -131,7 +156,7 @@ define i32 @bzhi32_load(i32* %x, i32 %y) nounwind readnone {
   %x1 = load i32* %x
   %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
   ret i32 %tmp
-; CHECK: bzhi32_load:
+; CHECK-LABEL: bzhi32_load:
 ; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}}
 }
 
@@ -140,17 +165,62 @@ declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
 define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone {
   %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
   ret i64 %tmp
-; CHECK: bzhi64:
+; CHECK-LABEL: bzhi64:
 ; CHECK: bzhiq
 }
 
 declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
 
+define i32 @bzhi32b(i32 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %sub, %x
+  ret i32 %and
+; CHECK-LABEL: bzhi32b:
+; CHECK: bzhil
+}
+
+define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) #0 {
+entry:
+  %x = load i32* %w
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %sub, %x
+  ret i32 %and
+; CHECK-LABEL: bzhi32b_load:
+; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}}
+}
+
+define i32 @bzhi32c(i32 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %x, %sub
+  ret i32 %and
+; CHECK-LABEL: bzhi32c:
+; CHECK: bzhil
+}
+
+define i64 @bzhi64b(i64 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i64
+  %shl = shl i64 1, %conv
+  %sub = add nsw i64 %shl, -1
+  %and = and i64 %x, %sub
+  ret i64 %and
+; CHECK-LABEL: bzhi64b:
+; CHECK: bzhiq
+}
+
 define i32 @blsi32(i32 %x) nounwind readnone {
   %tmp = sub i32 0, %x
   %tmp2 = and i32 %x, %tmp
   ret i32 %tmp2
-; CHECK: blsi32:
+; CHECK-LABEL: blsi32:
 ; CHECK: blsil
 }
 
@@ -159,7 +229,7 @@ define i32 @blsi32_load(i32* %x) nounwind readnone {
   %tmp = sub i32 0, %x1
   %tmp2 = and i32 %x1, %tmp
   ret i32 %tmp2
-; CHECK: blsi32_load:
+; CHECK-LABEL: blsi32_load:
 ; CHECK: blsil ({{.*}})
 }
 
@@ -167,7 +237,7 @@ define i64 @blsi64(i64 %x) nounwind readnone {
   %tmp = sub i64 0, %x
   %tmp2 = and i64 %tmp, %x
   ret i64 %tmp2
-; CHECK: blsi64:
+; CHECK-LABEL: blsi64:
 ; CHECK: blsiq
 }
 
@@ -175,7 +245,7 @@ define i32 @blsmsk32(i32 %x) nounwind readnone {
   %tmp = sub i32 %x, 1
   %tmp2 = xor i32 %x, %tmp
   ret i32 %tmp2
-; CHECK: blsmsk32:
+; CHECK-LABEL: blsmsk32:
 ; CHECK: blsmskl
 }
 
@@ -184,7 +254,7 @@ define i32 @blsmsk32_load(i32* %x) nounwind readnone {
   %tmp = sub i32 %x1, 1
   %tmp2 = xor i32 %x1, %tmp
   ret i32 %tmp2
-; CHECK: blsmsk32_load:
+; CHECK-LABEL: blsmsk32_load:
 ; CHECK: blsmskl ({{.*}})
 }
 
@@ -192,7 +262,7 @@ define i64 @blsmsk64(i64 %x) nounwind readnone {
   %tmp = sub i64 %x, 1
   %tmp2 = xor i64 %tmp, %x
   ret i64 %tmp2
-; CHECK: blsmsk64:
+; CHECK-LABEL: blsmsk64:
 ; CHECK: blsmskq
 }
 
@@ -200,7 +270,7 @@ define i32 @blsr32(i32 %x) nounwind readnone {
   %tmp = sub i32 %x, 1
   %tmp2 = and i32 %x, %tmp
   ret i32 %tmp2
-; CHECK: blsr32:
+; CHECK-LABEL: blsr32:
 ; CHECK: blsrl
 }
 
@@ -209,7 +279,7 @@ define i32 @blsr32_load(i32* %x) nounwind readnone {
   %tmp = sub i32 %x1, 1
   %tmp2 = and i32 %x1, %tmp
   ret i32 %tmp2
-; CHECK: blsr32_load:
+; CHECK-LABEL: blsr32_load:
 ; CHECK: blsrl ({{.*}})
 }
 
@@ -217,14 +287,14 @@ define i64 @blsr64(i64 %x) nounwind readnone {
   %tmp = sub i64 %x, 1
   %tmp2 = and i64 %tmp, %x
   ret i64 %tmp2
-; CHECK: blsr64:
+; CHECK-LABEL: blsr64:
 ; CHECK: blsrq
 }
 
 define i32 @pdep32(i32 %x, i32 %y) nounwind readnone {
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y)
   ret i32 %tmp
-; CHECK: pdep32:
+; CHECK-LABEL: pdep32:
 ; CHECK: pdepl
 }
 
@@ -232,7 +302,7 @@ define i32 @pdep32_load(i32 %x, i32* %y) nounwind readnone {
   %y1 = load i32* %y
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
   ret i32 %tmp
-; CHECK: pdep32_load:
+; CHECK-LABEL: pdep32_load:
 ; CHECK: pdepl ({{.*}})
 }
 
@@ -241,7 +311,7 @@ declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
 define i64 @pdep64(i64 %x, i64 %y) nounwind readnone {
   %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
   ret i64 %tmp
-; CHECK: pdep64:
+; CHECK-LABEL: pdep64:
 ; CHECK: pdepq
 }
 
@@ -250,7 +320,7 @@ declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone
 define i32 @pext32(i32 %x, i32 %y) nounwind readnone {
   %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y)
   ret i32 %tmp
-; CHECK: pext32:
+; CHECK-LABEL: pext32:
 ; CHECK: pextl
 }
 
@@ -258,7 +328,7 @@ define i32 @pext32_load(i32 %x, i32* %y) nounwind readnone {
   %y1 = load i32* %y
   %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
   ret i32 %tmp
-; CHECK: pext32_load:
+; CHECK-LABEL: pext32_load:
 ; CHECK: pextl ({{.*}})
 }
 
@@ -267,7 +337,7 @@ declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
 define i64 @pext64(i64 %x, i64 %y) nounwind readnone {
   %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
   ret i64 %tmp
-; CHECK: pext64:
+; CHECK-LABEL: pext64:
 ; CHECK: pextq
 }
 
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
index fa6f6e85e9b8..a0a1c3646624 100644
--- a/test/CodeGen/X86/bool-simplify.ll
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand,+rdseed | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx,+rdrnd,+rdseed | FileCheck %s
 
 define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index bc4032b13cc0..3ebe1a1d2357 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -4,7 +4,7 @@
 
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: xorb
 ; CHECK-NOT: andb
 ; CHECK-NOT: shrb
@@ -44,7 +44,7 @@ bb1:                                              ; preds = %entry
 
 return:                                           ; preds = %entry
   ret i32 192
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK:	movl	4(%esp), %eax
 ; CHECK-NEXT:	orl	8(%esp), %eax
 ; CHECK-NEXT:	jne	LBB1_2
@@ -63,7 +63,7 @@ bb1:                                              ; preds = %entry
 
 return:                                           ; preds = %entry
   ret i32 192
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK:	movl	4(%esp), %eax
 ; CHECK-NEXT:	orl	8(%esp), %eax
 ; CHECK-NEXT:	je	LBB2_2
@@ -113,7 +113,7 @@ declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
 
 define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: ptest
 ; CHECK-NEXT:	jne
 ; CHECK: ret
@@ -137,7 +137,7 @@ return:
 
 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: ptest
 ; CHECK-NEXT:	jne
 ; CHECK: ret
@@ -161,7 +161,7 @@ return:
 
 define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: ptest
 ; CHECK-NEXT:	jae
 ; CHECK: ret
@@ -185,7 +185,7 @@ return:
 
 define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: ptest
 ; CHECK-NEXT:	jae
 ; CHECK: ret
@@ -209,7 +209,7 @@ return:
 
 define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK: ptest
 ; CHECK-NEXT:	jne
 ; CHECK: ret
@@ -233,7 +233,7 @@ return:
 
 define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-; CHECK: test12:
+; CHECK-LABEL: test12:
 ; CHECK: ptest
 ; CHECK-NEXT:	je
 ; CHECK: ret
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index c94261467c9d..614d0adc7271 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -1,7 +1,7 @@
 ; Without list-burr scheduling we may not see the difference in codegen here.
 ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
 ; breaker requires liveness information to be kept.
-; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
+; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
 ; RUN:   grep "%xmm0" %t | count 14
 ; RUN:   not grep "%xmm1" %t
 ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t
diff --git a/test/CodeGen/X86/break-avx-dep.ll b/test/CodeGen/X86/break-avx-dep.ll
new file mode 100644
index 000000000000..210bda136b57
--- /dev/null
+++ b/test/CodeGen/X86/break-avx-dep.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+;
+; rdar:15221834 False AVX register dependencies cause 5x slowdown on
+; flops-6. Make sure the unused register read by vcvtsi2sdq is zeroed
+; to avoid cyclic dependence on a write to the same register in a
+; previous iteration.
+
+; CHECK-LABEL: t1:
+; CHECK-LABEL: %loop
+; CHECK: vxorps %[[REG:xmm.]], %{{xmm.}}, %{{xmm.}}
+; CHECK: vcvtsi2sdq %{{r[0-9a-x]+}}, %[[REG]], %{{xmm.}}
+define i64 @t1(i64* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %vx = load i64* %x
+  br label %loop
+loop:
+  %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
+  %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
+  %fi = sitofp i64 %i to double
+  %vy = load double* %y
+  %fipy = fadd double %fi, %vy
+  %iipy = fptosi double %fipy to i64
+  %s2 = add i64 %s1, %iipy
+  %inc = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %inc, 156250000
+  br i1 %exitcond, label %ret, label %loop
+ret:
+  ret i64 %s2
+}
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
index 4d801891da5c..8124d6f52263 100644
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -3,7 +3,7 @@
 
 define double @t1(float* nocapture %x) nounwind readonly ssp {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: movss ([[A0:%rdi|%rcx]]), %xmm0
 ; CHECK: cvtss2sd %xmm0, %xmm0
 
@@ -14,7 +14,7 @@ entry:
 
 define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: cvtsd2ss ([[A0]]), %xmm0
   %0 = load double* %x, align 8
   %1 = fptrunc double %0 to float
@@ -23,7 +23,7 @@ entry:
 
 define float @squirtf(float* %x) nounwind {
 entry:
-; CHECK: squirtf:
+; CHECK-LABEL: squirtf:
 ; CHECK: movss ([[A0]]), %xmm0
 ; CHECK: sqrtss %xmm0, %xmm0
   %z = load float* %x
@@ -33,7 +33,7 @@ entry:
 
 define double @squirt(double* %x) nounwind {
 entry:
-; CHECK: squirt:
+; CHECK-LABEL: squirt:
 ; CHECK: sqrtsd ([[A0]]), %xmm0
   %z = load double* %x
   %t = call double @llvm.sqrt.f64(double %z)
@@ -42,7 +42,7 @@ entry:
 
 define float @squirtf_size(float* %x) nounwind optsize {
 entry:
-; CHECK: squirtf_size:
+; CHECK-LABEL: squirtf_size:
 ; CHECK: sqrtss ([[A0]]), %xmm0
   %z = load float* %x
   %t = call float @llvm.sqrt.f32(float %z)
@@ -51,7 +51,7 @@ entry:
 
 define double @squirt_size(double* %x) nounwind optsize {
 entry:
-; CHECK: squirt_size:
+; CHECK-LABEL: squirt_size:
 ; CHECK: sqrtsd ([[A0]]), %xmm0
   %z = load double* %x
   %t = call double @llvm.sqrt.f64(double %z)
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index d69bfa6e7eb7..f8f154c0688f 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -3,84 +3,84 @@
 
 ; CHK-NOT: InlineAsm
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: bswapq
 define i64 @foo(i64 %x) nounwind {
 	%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
 }
 
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: bswapq
 define i64 @bar(i64 %x) nounwind {
 	%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
 }
 
-; CHECK: pen:
+; CHECK-LABEL: pen:
 ; CHECK: bswapl
 define i32 @pen(i32 %x) nounwind {
 	%asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
 	ret i32 %asmtmp
 }
 
-; CHECK: s16:
+; CHECK-LABEL: s16:
 ; CHECK: rolw    $8,
 define zeroext i16 @s16(i16 zeroext %x) nounwind {
   %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
   ret i16 %asmtmp
 }
 
-; CHECK: t16:
+; CHECK-LABEL: t16:
 ; CHECK: rolw    $8,
 define zeroext i16 @t16(i16 zeroext %x) nounwind {
   %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
   ret i16 %asmtmp
 }
 
-; CHECK: u16:
+; CHECK-LABEL: u16:
 ; CHECK: rolw    $8,
 define zeroext i16 @u16(i16 zeroext %x) nounwind {
   %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
   ret i16 %asmtmp
 }
 
-; CHECK: v16:
+; CHECK-LABEL: v16:
 ; CHECK: rolw    $8,
 define zeroext i16 @v16(i16 zeroext %x) nounwind {
   %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
   ret i16 %asmtmp
 }
 
-; CHECK: s32:
+; CHECK-LABEL: s32:
 ; CHECK: bswapl
 define i32 @s32(i32 %x) nounwind {
   %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
   ret i32 %asmtmp
 }
 
-; CHECK: t32:
+; CHECK-LABEL: t32:
 ; CHECK: bswapl
 define i32 @t32(i32 %x) nounwind {
   %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
   ret i32 %asmtmp
 }
 
-; CHECK: u32:
+; CHECK-LABEL: u32:
 ; CHECK: bswapl
 define i32 @u32(i32 %x) nounwind {
   %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
   ret i32 %asmtmp
 }
 
-; CHECK: s64:
+; CHECK-LABEL: s64:
 ; CHECK: bswapq
 define i64 @s64(i64 %x) nounwind {
   %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
   ret i64 %asmtmp
 }
 
-; CHECK: t64:
+; CHECK-LABEL: t64:
 ; CHECK: bswapq
 define i64 @t64(i64 %x) nounwind {
   %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index d2d6f9099a1b..e6a456c39ddd 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,6 +1,7 @@
 ; bswap should be constant folded when it is passed a constant argument
 
 ; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -9,23 +10,32 @@ declare i32 @llvm.bswap.i32(i32)
 declare i64 @llvm.bswap.i64(i64)
 
 define i16 @W(i16 %A) {
-; CHECK: W:
+; CHECK-LABEL: W:
 ; CHECK: rolw $8, %ax
+
+; CHECK64-LABEL: W:
+; CHECK64: rolw $8, %
         %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
         ret i16 %Z
 }
 
 define i32 @X(i32 %A) {
-; CHECK: X:
+; CHECK-LABEL: X:
 ; CHECK: bswapl %eax
+
+; CHECK64-LABEL: X:
+; CHECK64: bswapl %
         %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
         ret i32 %Z
 }
 
 define i64 @Y(i64 %A) {
-; CHECK: Y:
+; CHECK-LABEL: Y:
 ; CHECK: bswapl %eax
 ; CHECK: bswapl %edx
+
+; CHECK64-LABEL: Y:
+; CHECK64: bswapq %
         %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
         ret i64 %Z
 }
@@ -33,9 +43,13 @@ define i64 @Y(i64 %A) {
 ; rdar://9164521
 define i32 @test1(i32 %a) nounwind readnone {
 entry:
-; CHECK: test1
-; CHECK: bswapl %eax
-; CHECK: shrl $16, %eax
+; CHECK-LABEL: test1:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: shrl $16, [[REG]]
+
+; CHECK64-LABEL: test1:
+; CHECK64: bswapl [[REG:%.*]]
+; CHECK64: shrl $16, [[REG]]
   %and = lshr i32 %a, 8
   %shr3 = and i32 %and, 255
   %and2 = shl i32 %a, 8
@@ -46,9 +60,13 @@ entry:
 
 define i32 @test2(i32 %a) nounwind readnone {
 entry:
-; CHECK: test2
-; CHECK: bswapl %eax
-; CHECK: sarl $16, %eax
+; CHECK-LABEL: test2:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: sarl $16, [[REG]]
+
+; CHECK64-LABEL: test2:
+; CHECK64: bswapl [[REG:%.*]]
+; CHECK64: sarl $16, [[REG]]
   %and = lshr i32 %a, 8
   %shr4 = and i32 %and, 255
   %and2 = shl i32 %a, 8
@@ -57,3 +75,80 @@ entry:
   %conv3 = ashr exact i32 %sext, 16
   ret i32 %conv3
 }
+
+@var8 = global i8 0
+@var16 = global i16 0
+
+; The "shl" below can move bits into the high parts of the value, so the
+; operation is not a "bswap, shr" pair.
+
+; rdar://problem/14814049
+define i64 @not_bswap() {
+; CHECK-LABEL: not_bswap:
+; CHECK-NOT: bswapl
+; CHECK: ret
+
+; CHECK64-LABEL: not_bswap:
+; CHECK64-NOT: bswapq
+; CHECK64: ret
+  %init = load i16* @var16
+  %big = zext i16 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %loshifted = shl i64 %big, 8
+
+  %notswapped = or i64 %hishifted, %loshifted
+
+  ret i64 %notswapped
+}
+
+; This time, the lshr (and subsequent or) is completely useless. While it's
+; technically correct to convert this into a "bswap, shr", it's suboptimal. A
+; simple shl works better.
+
+define i64 @not_useful_bswap() {
+; CHECK-LABEL: not_useful_bswap:
+; CHECK-NOT: bswapl
+; CHECK: ret
+
+; CHECK64-LABEL: not_useful_bswap:
+; CHECK64-NOT: bswapq
+; CHECK64: ret
+
+  %init = load i8* @var8
+  %big = zext i8 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %loshifted = shl i64 %big, 8
+
+  %notswapped = or i64 %hishifted, %loshifted
+
+  ret i64 %notswapped
+}
+
+; Finally, it *is* OK to just mask off the shl if we know that the value is zero
+; beyond 16 bits anyway. This is a legitimate bswap.
+
+define i64 @finally_useful_bswap() {
+; CHECK-LABEL: finally_useful_bswap:
+; CHECK: bswapl [[REG:%.*]]
+; CHECK: shrl $16, [[REG]]
+; CHECK: ret
+
+; CHECK64-LABEL: finally_useful_bswap:
+; CHECK64: bswapq [[REG:%.*]]
+; CHECK64: shrq $48, [[REG]]
+; CHECK64: ret
+
+  %init = load i16* @var16
+  %big = zext i16 %init to i64
+
+  %hishifted = lshr i64 %big, 8
+  %lomasked = and i64 %big, 255
+  %loshifted = shl i64 %lomasked, 8
+
+  %swapped = or i64 %hishifted, %loshifted
+
+  ret i64 %swapped
+}
+
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index e28923bb21d2..f12a3543b072 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -38,7 +38,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -56,7 +56,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atest2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atest2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -74,7 +74,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atest2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atest2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -91,7 +91,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -109,7 +109,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @test3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: test3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -127,7 +127,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -145,7 +145,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -163,7 +163,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atestne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atestne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -181,7 +181,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @atestne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: atestne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -199,7 +199,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -217,7 +217,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @testne3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: testne3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -235,7 +235,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -253,7 +253,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -271,7 +271,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aquery2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aquery2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -289,7 +289,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aquery2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aquery2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -307,7 +307,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -325,7 +325,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -343,7 +343,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3x(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3x
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -361,7 +361,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @query3bx(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: query3bx
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -379,7 +379,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -397,7 +397,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -415,7 +415,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aqueryne2(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aqueryne2
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
@@ -433,7 +433,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: aqueryne2b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
@@ -451,7 +451,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -469,7 +469,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3b(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3b
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
@@ -487,7 +487,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3x(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3x
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
@@ -505,7 +505,7 @@ UnifiedReturnBlock:		; preds = %entry
 define void @queryne3bx(i32 %x, i32 %n) nounwind {
 entry:
 ; CHECK: queryne3bx
-; CHECK: btl %eax, %ecx
+; CHECK: btl %e{{..}}, %e{{..}}
 ; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
diff --git a/test/CodeGen/X86/btq.ll b/test/CodeGen/X86/btq.ll
index 9c137a7239bb..add65765e389 100644
--- a/test/CodeGen/X86/btq.ll
+++ b/test/CodeGen/X86/btq.ll
@@ -7,7 +7,7 @@ define void @test1(i64 %foo) nounwind {
   %tobool = icmp eq i64 %and, 0
   br i1 %tobool, label %if.end, label %if.then
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: btq $32
 
 if.then:
@@ -23,7 +23,7 @@ define void @test2(i64 %foo) nounwind {
   %tobool = icmp eq i64 %and, 0
   br i1 %tobool, label %if.end, label %if.then
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: testl $-2147483648
 
 if.then:
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 98a26e47ab76..42751d7dbc93 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -6,15 +6,15 @@
 
 define i32 @main() nounwind  {
 entry:
-; CHECK: main:
-; CHECK: movl $1, (%esp)
+; CHECK-LABEL: main:
 ; CHECK: leal 16(%esp), %edi
 ; CHECK: leal 160(%esp), %esi
 ; CHECK: rep;movsl
+; CHECK: movl $1, (%esp)
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
 	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
-	call void @t( i32 1, %struct.S* byval  %s ) nounwind 
+	call void @t( i32 1, %struct.S* byval  %s ) nounwind
 	ret i32 0
 }
 
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index e69f8c1ebf79..ccb98fefae2a 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -4,7 +4,7 @@
         %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
 
 define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
-; CHECK: decode_byte:
+; CHECK-LABEL: decode_byte:
 ; CHECK: pushl
 ; CHECK: popl
 ; CHECK: jmp
diff --git a/test/CodeGen/X86/chain_order.ll b/test/CodeGen/X86/chain_order.ll
new file mode 100644
index 000000000000..c88726e75a81
--- /dev/null
+++ b/test/CodeGen/X86/chain_order.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mcpu=corei7-avx -mtriple=x86_64-linux | FileCheck %s
+
+;CHECK-LABEL: cftx020:
+;CHECK: vmovsd  (%rdi), %xmm{{.*}}
+;CHECK: vmovsd  16(%rdi), %xmm{{.*}}
+;CHECK: vmovsd  24(%rdi), %xmm{{.*}}
+;CHECK: vmovhpd  8(%rdi), %xmm{{.*}}
+;CHECK: vmovupd %xmm{{.*}}, (%rdi)
+;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
+;CHECK: ret
+
+; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.
+define void @cftx020(double* nocapture %a) {
+entry:
+  %0 = load double* %a, align 8
+  %arrayidx1 = getelementptr inbounds double* %a, i64 2
+  %1 = load double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds double* %a, i64 1
+  %2 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %a, i64 3
+  %3 = load double* %arrayidx3, align 8
+  %4 = insertelement <2 x double> undef, double %0, i32 0
+  %5 = insertelement <2 x double> %4, double %3, i32 1
+  %6 = insertelement <2 x double> undef, double %1, i32 0
+  %7 = insertelement <2 x double> %6, double %2, i32 1
+  %8 = fadd <2 x double> %5, %7
+  %9 = bitcast double* %a to <2 x double>*
+  store <2 x double> %8, <2 x double>* %9, align 8
+  %10 = insertelement <2 x double> undef, double %0, i32 0
+  %11 = insertelement <2 x double> %10, double %2, i32 1
+  %12 = insertelement <2 x double> undef, double %1, i32 0
+  %13 = insertelement <2 x double> %12, double %3, i32 1
+  %14 = fsub <2 x double> %11, %13
+  %15 = bitcast double* %arrayidx1 to <2 x double>*
+  store <2 x double> %14, <2 x double>* %15, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 1c5c113a7232..b45b404c0f3c 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -8,7 +8,7 @@
 ; XFAIL: *
 
 define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
-; CHECK: borf:
+; CHECK-LABEL: borf:
 ; CHECK-NOT: inc
 ; CHECK-NOT: leal 1(
 ; CHECK-NOT: leal -1(
diff --git a/test/CodeGen/X86/change-compare-stride-trickiness-0.ll b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
index 1f7f6ecafafb..be9e70919167 100644
--- a/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin9"
 ; The comparison happens before the relevant use, but it can still be rewritten
 ; to compare with zero.
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: align
 ; CHECK: incl  %eax
 ; CHECK-NEXT: decl  %ecx
diff --git a/test/CodeGen/X86/change-compare-stride-trickiness-1.ll b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
index a3933e2e00a4..63733abc5f34 100644
--- a/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
@@ -7,7 +7,7 @@
 ; could be made simpler.
 
 define void @foo() nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NOT: ret
 ; CHECK: cmpl $10
 ; CHECK: ret
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 763079f3446f..6a6f5256f44d 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -12,7 +12,7 @@ declare i64 @llvm.ctlz.i64(i64, i1)
 define i8 @cttz_i8(i8 %x)  {
   %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
   ret i8 %tmp
-; CHECK: cttz_i8:
+; CHECK-LABEL: cttz_i8:
 ; CHECK: bsfl
 ; CHECK-NOT: cmov
 ; CHECK: ret
@@ -21,7 +21,7 @@ define i8 @cttz_i8(i8 %x)  {
 define i16 @cttz_i16(i16 %x)  {
   %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
   ret i16 %tmp
-; CHECK: cttz_i16:
+; CHECK-LABEL: cttz_i16:
 ; CHECK: bsfw
 ; CHECK-NOT: cmov
 ; CHECK: ret
@@ -30,7 +30,7 @@ define i16 @cttz_i16(i16 %x)  {
 define i32 @cttz_i32(i32 %x)  {
   %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
   ret i32 %tmp
-; CHECK: cttz_i32:
+; CHECK-LABEL: cttz_i32:
 ; CHECK: bsfl
 ; CHECK-NOT: cmov
 ; CHECK: ret
@@ -39,7 +39,7 @@ define i32 @cttz_i32(i32 %x)  {
 define i64 @cttz_i64(i64 %x)  {
   %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
   ret i64 %tmp
-; CHECK: cttz_i64:
+; CHECK-LABEL: cttz_i64:
 ; CHECK: bsfq
 ; CHECK-NOT: cmov
 ; CHECK: ret
@@ -49,7 +49,7 @@ define i8 @ctlz_i8(i8 %x) {
 entry:
   %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
   ret i8 %tmp2
-; CHECK: ctlz_i8:
+; CHECK-LABEL: ctlz_i8:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
 ; CHECK: xorl $7,
@@ -60,7 +60,7 @@ define i16 @ctlz_i16(i16 %x) {
 entry:
   %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
   ret i16 %tmp2
-; CHECK: ctlz_i16:
+; CHECK-LABEL: ctlz_i16:
 ; CHECK: bsrw
 ; CHECK-NOT: cmov
 ; CHECK: xorl $15,
@@ -70,7 +70,7 @@ entry:
 define i32 @ctlz_i32(i32 %x) {
   %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
   ret i32 %tmp
-; CHECK: ctlz_i32:
+; CHECK-LABEL: ctlz_i32:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
 ; CHECK: xorl $31,
@@ -80,7 +80,7 @@ define i32 @ctlz_i32(i32 %x) {
 define i64 @ctlz_i64(i64 %x) {
   %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
   ret i64 %tmp
-; CHECK: ctlz_i64:
+; CHECK-LABEL: ctlz_i64:
 ; CHECK: bsrq
 ; CHECK-NOT: cmov
 ; CHECK: xorq $63,
@@ -90,7 +90,7 @@ define i64 @ctlz_i64(i64 %x) {
 define i32 @ctlz_i32_cmov(i32 %n) {
 entry:
 ; Generate a cmov to handle zero inputs when necessary.
-; CHECK: ctlz_i32_cmov:
+; CHECK-LABEL: ctlz_i32_cmov:
 ; CHECK: bsrl
 ; CHECK: cmov
 ; CHECK: xorl $31,
@@ -104,7 +104,7 @@ entry:
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
-; CHECK: ctlz_i32_fold_cmov:
+; CHECK-LABEL: ctlz_i32_fold_cmov:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
 ; CHECK: xorl $31,
@@ -118,7 +118,7 @@ define i32 @ctlz_bsr(i32 %n) {
 entry:
 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
 ; the most significant bit, which is what 'bsr' does natively.
-; CHECK: ctlz_bsr:
+; CHECK-LABEL: ctlz_bsr:
 ; CHECK: bsrl
 ; CHECK-NOT: xorl
 ; CHECK: ret
@@ -131,7 +131,7 @@ define i32 @ctlz_bsr_cmov(i32 %n) {
 entry:
 ; Same as ctlz_bsr, but ensure this happens even when there is a potential
 ; zero.
-; CHECK: ctlz_bsr_cmov:
+; CHECK-LABEL: ctlz_bsr_cmov:
 ; CHECK: bsrl
 ; CHECK-NOT: xorl
 ; CHECK: ret
diff --git a/test/CodeGen/X86/cmov-fp.ll b/test/CodeGen/X86/cmov-fp.ll
index ca91f9ea2c2b..768af943eb49 100644
--- a/test/CodeGen/X86/cmov-fp.ll
+++ b/test/CodeGen/X86/cmov-fp.ll
@@ -9,16 +9,16 @@ define double @test1(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test1:
+; SSE-LABEL: test1:
 ; SSE: movsd
 
-; NOSSE2: test1:
+; NOSSE2-LABEL: test1:
 ; NOSSE2: fcmovnbe
 
-; NOSSE1: test1:
+; NOSSE1-LABEL: test1:
 ; NOSSE1: fcmovnbe
 
-; NOCMOV: test1:
+; NOCMOV-LABEL: test1:
 ; NOCMOV: fstp
 
 }
@@ -28,16 +28,16 @@ define double @test2(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test2:
+; SSE-LABEL: test2:
 ; SSE: movsd
 
-; NOSSE2: test2:
+; NOSSE2-LABEL: test2:
 ; NOSSE2: fcmovnb
 
-; NOSSE1: test2:
+; NOSSE1-LABEL: test2:
 ; NOSSE1: fcmovnb
 
-; NOCMOV: test2:
+; NOCMOV-LABEL: test2:
 ; NOCMOV: fstp
 }
 
@@ -46,16 +46,16 @@ define double @test3(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test3:
+; SSE-LABEL: test3:
 ; SSE: movsd
 
-; NOSSE2: test3:
+; NOSSE2-LABEL: test3:
 ; NOSSE2: fcmovb
 
-; NOSSE1: test3:
+; NOSSE1-LABEL: test3:
 ; NOSSE1: fcmovb
 
-; NOCMOV: test3:
+; NOCMOV-LABEL: test3:
 ; NOCMOV: fstp
 }
 
@@ -64,16 +64,16 @@ define double @test4(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test4:
+; SSE-LABEL: test4:
 ; SSE: movsd
 
-; NOSSE2: test4:
+; NOSSE2-LABEL: test4:
 ; NOSSE2: fcmovbe
 
-; NOSSE1: test4:
+; NOSSE1-LABEL: test4:
 ; NOSSE1: fcmovbe
 
-; NOCMOV: test4:
+; NOCMOV-LABEL: test4:
 ; NOCMOV: fstp
 }
 
@@ -82,16 +82,16 @@ define double @test5(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test5:
+; SSE-LABEL: test5:
 ; SSE: movsd
 
-; NOSSE2: test5:
+; NOSSE2-LABEL: test5:
 ; NOSSE2: fstp
 
-; NOSSE1: test5:
+; NOSSE1-LABEL: test5:
 ; NOSSE1: fstp
 
-; NOCMOV: test5:
+; NOCMOV-LABEL: test5:
 ; NOCMOV: fstp
 }
 
@@ -100,16 +100,16 @@ define double @test6(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test6:
+; SSE-LABEL: test6:
 ; SSE: movsd
 
-; NOSSE2: test6:
+; NOSSE2-LABEL: test6:
 ; NOSSE2: fstp
 
-; NOSSE1: test6:
+; NOSSE1-LABEL: test6:
 ; NOSSE1: fstp
 
-; NOCMOV: test6:
+; NOCMOV-LABEL: test6:
 ; NOCMOV: fstp
 }
 
@@ -118,16 +118,16 @@ define double @test7(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test7:
+; SSE-LABEL: test7:
 ; SSE: movsd
 
-; NOSSE2: test7:
+; NOSSE2-LABEL: test7:
 ; NOSSE2: fstp
 
-; NOSSE1: test7:
+; NOSSE1-LABEL: test7:
 ; NOSSE1: fstp
 
-; NOCMOV: test7:
+; NOCMOV-LABEL: test7:
 ; NOCMOV: fstp
 }
 
@@ -136,16 +136,16 @@ define double @test8(i32 %a, i32 %b, double %x) nounwind {
   %sel = select i1 %cmp, double 99.0, double %x
   ret double %sel
 
-; SSE: test8:
+; SSE-LABEL: test8:
 ; SSE: movsd
 
-; NOSSE2: test8:
+; NOSSE2-LABEL: test8:
 ; NOSSE2: fstp
 
-; NOSSE1: test8:
+; NOSSE1-LABEL: test8:
 ; NOSSE1: fstp
 
-; NOCMOV: test8:
+; NOCMOV-LABEL: test8:
 ; NOCMOV: fstp
 }
 
@@ -154,16 +154,16 @@ define float @test9(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test9:
+; SSE-LABEL: test9:
 ; SSE: movss
 
-; NOSSE2: test9:
+; NOSSE2-LABEL: test9:
 ; NOSSE2: movss
 
-; NOSSE1: test9:
+; NOSSE1-LABEL: test9:
 ; NOSSE1: fcmovnbe
 
-; NOCMOV: test9:
+; NOCMOV-LABEL: test9:
 ; NOCMOV: fstp
 }
 
@@ -172,16 +172,16 @@ define float @test10(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test10:
+; SSE-LABEL: test10:
 ; SSE: movss
 
-; NOSSE2: test10:
+; NOSSE2-LABEL: test10:
 ; NOSSE2: movss
 
-; NOSSE1: test10:
+; NOSSE1-LABEL: test10:
 ; NOSSE1: fcmovnb
 
-; NOCMOV: test10:
+; NOCMOV-LABEL: test10:
 ; NOCMOV: fstp
 }
 
@@ -190,16 +190,16 @@ define float @test11(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test11:
+; SSE-LABEL: test11:
 ; SSE: movss
 
-; NOSSE2: test11:
+; NOSSE2-LABEL: test11:
 ; NOSSE2: movss
 
-; NOSSE1: test11:
+; NOSSE1-LABEL: test11:
 ; NOSSE1: fcmovb
 
-; NOCMOV: test11:
+; NOCMOV-LABEL: test11:
 ; NOCMOV: fstp
 }
 
@@ -208,16 +208,16 @@ define float @test12(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test12:
+; SSE-LABEL: test12:
 ; SSE: movss
 
-; NOSSE2: test12:
+; NOSSE2-LABEL: test12:
 ; NOSSE2: movss
 
-; NOSSE1: test12:
+; NOSSE1-LABEL: test12:
 ; NOSSE1: fcmovbe
 
-; NOCMOV: test12:
+; NOCMOV-LABEL: test12:
 ; NOCMOV: fstp
 }
 
@@ -226,16 +226,16 @@ define float @test13(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test13:
+; SSE-LABEL: test13:
 ; SSE: movss
 
-; NOSSE2: test13:
+; NOSSE2-LABEL: test13:
 ; NOSSE2: movss
 
-; NOSSE1: test13:
+; NOSSE1-LABEL: test13:
 ; NOSSE1: fstp
 
-; NOCMOV: test13:
+; NOCMOV-LABEL: test13:
 ; NOCMOV: fstp
 }
 
@@ -244,16 +244,16 @@ define float @test14(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test14:
+; SSE-LABEL: test14:
 ; SSE: movss
 
-; NOSSE2: test14:
+; NOSSE2-LABEL: test14:
 ; NOSSE2: movss
 
-; NOSSE1: test14:
+; NOSSE1-LABEL: test14:
 ; NOSSE1: fstp
 
-; NOCMOV: test14:
+; NOCMOV-LABEL: test14:
 ; NOCMOV: fstp
 }
 
@@ -262,16 +262,16 @@ define float @test15(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test15:
+; SSE-LABEL: test15:
 ; SSE: movss
 
-; NOSSE2: test15:
+; NOSSE2-LABEL: test15:
 ; NOSSE2: movss
 
-; NOSSE1: test15:
+; NOSSE1-LABEL: test15:
 ; NOSSE1: fstp
 
-; NOCMOV: test15:
+; NOCMOV-LABEL: test15:
 ; NOCMOV: fstp
 }
 
@@ -280,16 +280,16 @@ define float @test16(i32 %a, i32 %b, float %x) nounwind {
   %sel = select i1 %cmp, float 99.0, float %x
   ret float %sel
 
-; SSE: test16:
+; SSE-LABEL: test16:
 ; SSE: movss
 
-; NOSSE2: test16:
+; NOSSE2-LABEL: test16:
 ; NOSSE2: movss
 
-; NOSSE1: test16:
+; NOSSE1-LABEL: test16:
 ; NOSSE1: fstp
 
-; NOCMOV: test16:
+; NOCMOV-LABEL: test16:
 ; NOCMOV: fstp
 }
 
@@ -298,16 +298,16 @@ define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   %sel = select i1 %cmp, x86_fp80 0xK4005C600000000000000, x86_fp80 %x
   ret x86_fp80 %sel
 
-; SSE: test17:
+; SSE-LABEL: test17:
 ; SSE: fcmovnbe
 
-; NOSSE2: test17:
+; NOSSE2-LABEL: test17:
 ; NOSSE2: fcmovnbe
 
-; NOSSE1: test17:
+; NOSSE1-LABEL: test17:
 ; NOSSE1: fcmovnbe
 
-; NOCMOV: test17:
+; NOCMOV-LABEL: test17:
 ; NOCMOV: fstp
 }
 
@@ -316,16 +316,16 @@ define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   %sel = select i1 %cmp, x86_fp80 0xK4005C600000000000000, x86_fp80 %x
   ret x86_fp80 %sel
 
-; SSE: test18:
+; SSE-LABEL: test18:
 ; SSE: fcmovnb
 
-; NOSSE2: test18:
+; NOSSE2-LABEL: test18:
 ; NOSSE2: fcmovnb
 
-; NOSSE1: test18:
+; NOSSE1-LABEL: test18:
 ; NOSSE1: fcmovnb
 
-; NOCMOV: test18:
+; NOCMOV-LABEL: test18:
 ; NOCMOV: fstp
 }
 
@@ -334,16 +334,16 @@ define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   %sel = select i1 %cmp, x86_fp80 0xK4005C600000000000000, x86_fp80 %x
   ret x86_fp80 %sel
 
-; SSE: test19:
+; SSE-LABEL: test19:
 ; SSE: fcmovb
 
-; NOSSE2: test19:
+; NOSSE2-LABEL: test19:
 ; NOSSE2: fcmovb
 
-; NOSSE1: test19:
+; NOSSE1-LABEL: test19:
 ; NOSSE1: fcmovb
 
-; NOCMOV: test19:
+; NOCMOV-LABEL: test19:
 ; NOCMOV: fstp
 }
 
@@ -352,16 +352,16 @@ define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   %sel = select i1 %cmp, x86_fp80 0xK4005C600000000000000, x86_fp80 %x
   ret x86_fp80 %sel
 
-; SSE: test20:
+; SSE-LABEL: test20:
 ; SSE: fcmovbe
 
-; NOSSE2: test20:
+; NOSSE2-LABEL: test20:
 ; NOSSE2: fcmovbe
 
-; NOSSE1: test20:
+; NOSSE1-LABEL: test20:
 ; NOSSE1: fcmovbe
 
-; NOCMOV: test20:
+; NOCMOV-LABEL: test20:
 ; NOCMOV: fstp
 }
 
@@ -371,19 +371,19 @@ define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   ret x86_fp80 %sel
 
 ; We don't emit a branch for fp80, why?
-; SSE: test21:
+; SSE-LABEL: test21:
 ; SSE: testb
 ; SSE: fcmovne
 
-; NOSSE2: test21:
+; NOSSE2-LABEL: test21:
 ; NOSSE2: testb
 ; NOSSE2: fcmovne
 
-; NOSSE1: test21:
+; NOSSE1-LABEL: test21:
 ; NOSSE1: testb
 ; NOSSE1: fcmovne
 
-; NOCMOV: test21:
+; NOCMOV-LABEL: test21:
 ; NOCMOV: fstp
 }
 
@@ -392,19 +392,19 @@ define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   %sel = select i1 %cmp, x86_fp80 0xK4005C600000000000000, x86_fp80 %x
   ret x86_fp80 %sel
 
-; SSE: test22:
+; SSE-LABEL: test22:
 ; SSE: testb
 ; SSE: fcmovne
 
-; NOSSE2: test22:
+; NOSSE2-LABEL: test22:
 ; NOSSE2: testb
 ; NOSSE2: fcmovne
 
-; NOSSE1: test22:
+; NOSSE1-LABEL: test22:
 ; NOSSE1: testb
 ; NOSSE1: fcmovne
 
-; NOCMOV: test22:
+; NOCMOV-LABEL: test22:
 ; NOCMOV: fstp
 }
 
@@ -413,19 +413,19 @@ define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   %sel = select i1 %cmp, x86_fp80 0xK4005C600000000000000, x86_fp80 %x
   ret x86_fp80 %sel
 
-; SSE: test23:
+; SSE-LABEL: test23:
 ; SSE: testb
 ; SSE: fcmovne
 
-; NOSSE2: test23:
+; NOSSE2-LABEL: test23:
 ; NOSSE2: testb
 ; NOSSE2: fcmovne
 
-; NOSSE1: test23:
+; NOSSE1-LABEL: test23:
 ; NOSSE1: testb
 ; NOSSE1: fcmovne
 
-; NOCMOV: test23:
+; NOCMOV-LABEL: test23:
 ; NOCMOV: fstp
 }
 
@@ -434,18 +434,18 @@ define x86_fp80 @test24(i32 %a, i32 %b, x86_fp80 %x) nounwind {
   %sel = select i1 %cmp, x86_fp80 0xK4005C600000000000000, x86_fp80 %x
   ret x86_fp80 %sel
 
-; SSE: test24:
+; SSE-LABEL: test24:
 ; SSE: testb
 ; SSE: fcmovne
 
-; NOSSE2: test24:
+; NOSSE2-LABEL: test24:
 ; NOSSE2: testb
 ; NOSSE2: fcmovne
 
-; NOSSE1: test24:
+; NOSSE1-LABEL: test24:
 ; NOSSE1: testb
 ; NOSSE1: fcmovne
 
-; NOCMOV: test24:
+; NOCMOV-LABEL: test24:
 ; NOCMOV: fstp
 }
diff --git a/test/CodeGen/X86/cmov-into-branch.ll b/test/CodeGen/X86/cmov-into-branch.ll
index 780746ab1ae4..cad8dd307b34 100644
--- a/test/CodeGen/X86/cmov-into-branch.ll
+++ b/test/CodeGen/X86/cmov-into-branch.ll
@@ -6,7 +6,7 @@ define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y)  {
   %cmp = fcmp olt double %load, %a
   %cond = select i1 %cmp, i32 %x, i32 %y
   ret i32 %cond
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: ucomisd
 ; CHECK-NOT: cmov
 ; CHECK: j
@@ -18,7 +18,7 @@ define i32 @test2(double %a, double %b, i32 %x, i32 %y)  {
   %cmp = fcmp ogt double %a, %b
   %cond = select i1 %cmp, i32 %x, i32 %y
   ret i32 %cond
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: ucomisd
 ; CHECK: cmov
 }
@@ -29,7 +29,7 @@ define i32 @test3(i32 %a, i32* nocapture %b, i32 %x)  {
   %cmp = icmp ult i32 %load, %a
   %cond = select i1 %cmp, i32 %a, i32 %x
   ret i32 %cond
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: cmpl
 ; CHECK-NOT: cmov
 ; CHECK: j
@@ -43,7 +43,7 @@ define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y)  {
   %cond = select i1 %cmp, i32 %x, i32 %y
   %add = add i32 %cond, %load
   ret i32 %add
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: cmpl
 ; CHECK: cmov
 }
@@ -56,7 +56,7 @@ define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
   %cond = select i1 %cmp1, i32 %a, i32 %y
   %cond5 = select i1 %cmp, i32 %cond, i32 %x
   ret i32 %cond5
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: cmpl
 ; CHECK: cmov
 ; CHECK: cmov
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index ed25c82fddac..215b86267a47 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -3,9 +3,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
-; CHECK: test1:
-; CHECK: movl	$12, %eax
-; CHECK-NEXT: btl
+; CHECK-LABEL: test1:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
 ; CHECK-NEXT: cmovael	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -18,9 +18,9 @@ entry:
 }
 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
-; CHECK: test2:
-; CHECK: movl	$12, %eax
-; CHECK-NEXT: btl
+; CHECK-LABEL: test2:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
 ; CHECK-NEXT: cmovbl	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -40,7 +40,7 @@ entry:
 declare void @bar(i64) nounwind
 
 define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK:      cmovnel %edi, %esi
 ; CHECK-NEXT: movl    %esi, %edi
 
@@ -87,12 +87,12 @@ bb.i.i.i:                                         ; preds = %entry
   %4 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_4.exit.i
 
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: g_100
 ; CHECK: testb
 ; CHECK-NOT: xor
 ; CHECK: setne
-; CHECK-NEXT: testb
+; CHECK: testb
 
 func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
   %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
@@ -119,7 +119,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 ; rdar://6668608
 define i32 @test5(i32* nocapture %P) nounwind readonly {
 entry:
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: 	setg	%al
 ; CHECK:	movzbl	%al, %eax
 ; CHECK:	orl	$-2, %eax
@@ -133,7 +133,7 @@ entry:
 
 define i32 @test6(i32* nocapture %P) nounwind readonly {
 entry:
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: 	setl	%al
 ; CHECK:	movzbl	%al, %eax
 ; CHECK:	leal	4(%rax,%rax,8), %eax
@@ -148,7 +148,7 @@ entry:
 ; Don't try to use a 16-bit conditional move to do an 8-bit select,
 ; because it isn't worth it. Just use a branch instead.
 define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK:     testb	$1, %dil
 ; CHECK-NEXT:     jne	LBB
 
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index 1855fe2fb89e..551d9bc6074b 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -10,7 +10,7 @@ cond_true:		; preds = %0
 
 ReturnBlock:		; preds = %0
 	ret i32 0
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: cmpl	$0, (%rsi)
 }
 
@@ -25,7 +25,7 @@ cond_true:		; preds = %0
 
 ReturnBlock:		; preds = %0
 	ret i32 0
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movl	(%rsi), %eax
 ; CHECK: shll	$3, %eax
 ; CHECK: testl	%eax, %eax
@@ -35,7 +35,7 @@ define i64 @test3(i64 %x) nounwind {
   %t = icmp eq i64 %x, 0
   %r = zext i1 %t to i64
   ret i64 %r
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: 	testq	%rdi, %rdi
 ; CHECK: 	sete	%al
 ; CHECK: 	movzbl	%al, %eax
@@ -46,7 +46,7 @@ define i64 @test4(i64 %x) nounwind {
   %t = icmp slt i64 %x, 1
   %r = zext i1 %t to i64
   ret i64 %r
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: 	testq	%rdi, %rdi
 ; CHECK: 	setle	%al
 ; CHECK: 	movzbl	%al, %eax
@@ -67,7 +67,7 @@ define i32 @test5(double %A) nounwind  {
 
  bb12:; preds = %entry
  ret i32 32
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: ucomisd	LCPI4_0(%rip), %xmm0
 ; CHECK: ucomisd	LCPI4_1(%rip), %xmm0
 }
@@ -85,7 +85,7 @@ T:
   
 F:
   ret i32 0
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: cmpq	$0, -8(%rsp)
 ; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
 }
@@ -93,10 +93,9 @@ F:
 ; rdar://11866926
 define i32 @test7(i64 %res) nounwind {
 entry:
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK-NOT: movabsq
 ; CHECK: shrq $32, %rdi
-; CHECK: testq %rdi, %rdi
 ; CHECK: sete
   %lnot = icmp ult i64 %res, 4294967296
   %lnot.ext = zext i1 %lnot to i32
@@ -105,7 +104,7 @@ entry:
 
 define i32 @test8(i64 %res) nounwind {
 entry:
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK-NOT: movabsq
 ; CHECK: shrq $32, %rdi
 ; CHECK: cmpq $3, %rdi
@@ -116,10 +115,9 @@ entry:
 
 define i32 @test9(i64 %res) nounwind {
 entry:
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK-NOT: movabsq
 ; CHECK: shrq $33, %rdi
-; CHECK: testq %rdi, %rdi
 ; CHECK: sete
   %lnot = icmp ult i64 %res, 8589934592
   %lnot.ext = zext i1 %lnot to i32
@@ -128,10 +126,9 @@ entry:
 
 define i32 @test10(i64 %res) nounwind {
 entry:
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK-NOT: movabsq
 ; CHECK: shrq $32, %rdi
-; CHECK: testq %rdi, %rdi
 ; CHECK: setne
   %lnot = icmp uge i64 %res, 4294967296
   %lnot.ext = zext i1 %lnot to i32
@@ -141,7 +138,7 @@ entry:
 ; rdar://9758774
 define i32 @test11(i64 %l) nounwind {
 entry:
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK-NOT: movabsq
 ; CHECK-NOT: andq
 ; CHECK: shrq $47, %rdi
@@ -153,7 +150,7 @@ entry:
 }
 
 define i32 @test12() uwtable ssp {
-; CHECK: test12:
+; CHECK-LABEL: test12:
 ; CHECK: testb
   %1 = call zeroext i1 @test12b()
   br i1 %1, label %2, label %3
diff --git a/test/CodeGen/X86/coalesce-implicitdef.ll b/test/CodeGen/X86/coalesce-implicitdef.ll
index 19cd08cf3797..9be045271d8d 100644
--- a/test/CodeGen/X86/coalesce-implicitdef.ll
+++ b/test/CodeGen/X86/coalesce-implicitdef.ll
@@ -26,7 +26,7 @@ for.cond:                                         ; preds = %for.inc34, %entry
   br i1 %tobool, label %for.end36, label %for.body
 
 for.body:                                         ; preds = %for.cond
-  store i32 0, i32* @c, align 4, !tbaa !0
+  store i32 0, i32* @c, align 4
   br label %for.body2
 
 for.body2:                                        ; preds = %for.body, %for.inc
@@ -35,7 +35,7 @@ for.body2:                                        ; preds = %for.body, %for.inc
   br i1 %tobool3, label %if.then10, label %if.then
 
 if.then:                                          ; preds = %for.body2
-  store i32 0, i32* %i, align 4, !tbaa !0
+  store i32 0, i32* %i, align 4
   br label %for.body6
 
 for.body6:                                        ; preds = %if.then, %for.body6
@@ -43,7 +43,7 @@ for.body6:                                        ; preds = %if.then, %for.body6
   br i1 true, label %for.body6, label %for.inc
 
 if.then10:                                        ; preds = %for.body2
-  store i32 1, i32* @b, align 4, !tbaa !0
+  store i32 1, i32* @b, align 4
   ret void
 
 for.inc:                                          ; preds = %for.body6
@@ -66,30 +66,30 @@ while.end:                                        ; preds = %while.cond
 
 for.inc27.backedge:                               ; preds = %while.end, %if.then22
   %inc28 = add nsw i32 %0, 1
-  store i32 %inc28, i32* @b, align 4, !tbaa !0
+  store i32 %inc28, i32* @b, align 4
   %tobool17 = icmp eq i32 %inc28, 0
   br i1 %tobool17, label %for.inc27.if.end30.loopexit56_crit_edge, label %while.condthread-pre-split
 
 if.then22:                                        ; preds = %while.end
-  %1 = load i16* %p2.1, align 2, !tbaa !3
+  %1 = load i16* %p2.1, align 2
   %tobool23 = icmp eq i16 %1, 0
   br i1 %tobool23, label %for.inc27.backedge, label %label.loopexit
 
 label.loopexit:                                   ; preds = %if.then22
-  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  store i32 %inc20, i32* @a, align 4
   %inc2858 = add nsw i32 %0, 1
-  store i32 %inc2858, i32* @b, align 4, !tbaa !0
+  store i32 %inc2858, i32* @b, align 4
   %tobool1759 = icmp eq i32 %inc2858, 0
   br i1 %tobool1759, label %if.end30, label %while.condthread-pre-split
 
 for.inc27.if.end30.loopexit56_crit_edge:          ; preds = %for.inc27.backedge
-  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  store i32 %inc20, i32* @a, align 4
   br label %if.end30
 
 if.end30:                                         ; preds = %for.inc27.if.end30.loopexit56_crit_edge, %label.loopexit, %label.preheader, %for.inc
   %i.0.load46 = phi i32 [ 0, %for.inc ], [ %i.0.load4669, %label.preheader ], [ %i.0.load4669, %label.loopexit ], [ %i.0.load4669, %for.inc27.if.end30.loopexit56_crit_edge ]
   %pi.4 = phi i32* [ %i, %for.inc ], [ %pi.3.ph, %label.preheader ], [ %pi.3.ph, %label.loopexit ], [ %pi.3.ph, %for.inc27.if.end30.loopexit56_crit_edge ]
-  %2 = load i32* %pi.4, align 4, !tbaa !0
+  %2 = load i32* %pi.4, align 4
   %tobool31 = icmp eq i32 %2, 0
   br i1 %tobool31, label %for.inc34, label %label.preheader
 
@@ -100,31 +100,26 @@ for.inc34:                                        ; preds = %if.end30
 
 for.end36:                                        ; preds = %for.cond
   store i32 1, i32* %i, align 4
-  %3 = load i32* @c, align 4, !tbaa !0
+  %3 = load i32* @c, align 4
   %tobool37 = icmp eq i32 %3, 0
   br i1 %tobool37, label %label.preheader, label %land.rhs
 
 land.rhs:                                         ; preds = %for.end36
-  store i32 0, i32* @a, align 4, !tbaa !0
+  store i32 0, i32* @a, align 4
   br label %label.preheader
 
 label.preheader:                                  ; preds = %for.end36, %if.end30, %land.rhs
   %i.0.load4669 = phi i32 [ 1, %land.rhs ], [ %i.0.load46, %if.end30 ], [ 1, %for.end36 ]
   %pi.3.ph = phi i32* [ %pi.0, %land.rhs ], [ %pi.4, %if.end30 ], [ %pi.0, %for.end36 ]
-  %4 = load i32* @b, align 4, !tbaa !0
+  %4 = load i32* @b, align 4
   %inc285863 = add nsw i32 %4, 1
-  store i32 %inc285863, i32* @b, align 4, !tbaa !0
+  store i32 %inc285863, i32* @b, align 4
   %tobool175964 = icmp eq i32 %inc285863, 0
   br i1 %tobool175964, label %if.end30, label %while.condthread-pre-split.lr.ph.lr.ph
 
 while.condthread-pre-split.lr.ph.lr.ph:           ; preds = %label.preheader
-  %.pr50 = load i32* @d, align 4, !tbaa !0
+  %.pr50 = load i32* @d, align 4
   %tobool19 = icmp eq i32 %.pr50, 0
-  %a.promoted.pre = load i32* @a, align 4, !tbaa !0
+  %a.promoted.pre = load i32* @a, align 4
   br label %while.condthread-pre-split
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index d9e0778102cb..cbcb89031b5b 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+sse2 | not grep movaps
 ; PR1877
 
 @NNTOT = weak global i32 0		; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/code_placement_align_all.ll b/test/CodeGen/X86/code_placement_align_all.ll
index 1e5e8f780b70..53df90620204 100644
--- a/test/CodeGen/X86/code_placement_align_all.ll
+++ b/test/CodeGen/X86/code_placement_align_all.ll
@@ -1,6 +1,6 @@
 ; RUN: llc  -mcpu=corei7 -mtriple=x86_64-linux -align-all-blocks=16 < %s | FileCheck %s
 
-;CHECK: foo
+;CHECK-LABEL: foo:
 ;CHECK: .align  65536, 0x90
 ;CHECK: .align  65536, 0x90
 ;CHECK: .align  65536, 0x90
diff --git a/test/CodeGen/X86/codegen-prepare.ll b/test/CodeGen/X86/codegen-prepare.ll
index e8ee07063531..316accfa41ac 100644
--- a/test/CodeGen/X86/codegen-prepare.ll
+++ b/test/CodeGen/X86/codegen-prepare.ll
@@ -38,7 +38,7 @@ if.end:                                           ; preds = %if.then, %if.else,
   ret void
 }
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: movss 12([[THIS:%[a-zA-Z0-9]+]]), [[REGISTER:%[a-zA-Z0-9]+]]
 ; CHECK-NEXT: movss [[REGISTER]], 60([[THIS]])
 
diff --git a/test/CodeGen/X86/codemodel.ll b/test/CodeGen/X86/codemodel.ll
index b6ca1cedc22e..3aebc13f8740 100644
--- a/test/CodeGen/X86/codemodel.ll
+++ b/test/CodeGen/X86/codemodel.ll
@@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define i32 @foo() nounwind readonly {
 entry:
-; CHECK-SMALL:  foo:
+; CHECK-SMALL-LABEL:  foo:
 ; CHECK-SMALL:   movl data(%rip), %eax
-; CHECK-KERNEL: foo:
+; CHECK-KERNEL-LABEL: foo:
 ; CHECK-KERNEL:  movl data, %eax
 	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
 	ret i32 %0
@@ -17,9 +17,9 @@ entry:
 
 define i32 @foo2() nounwind readonly {
 entry:
-; CHECK-SMALL:  foo2:
+; CHECK-SMALL-LABEL:  foo2:
 ; CHECK-SMALL:   movl data+40(%rip), %eax
-; CHECK-KERNEL: foo2:
+; CHECK-KERNEL-LABEL: foo2:
 ; CHECK-KERNEL:  movl data+40, %eax
 	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
 	ret i32 %0
@@ -27,9 +27,9 @@ entry:
 
 define i32 @foo3() nounwind readonly {
 entry:
-; CHECK-SMALL:  foo3:
+; CHECK-SMALL-LABEL:  foo3:
 ; CHECK-SMALL:   movl data-40(%rip), %eax
-; CHECK-KERNEL: foo3:
+; CHECK-KERNEL-LABEL: foo3:
 ; CHECK-KERNEL:  movq $-40, %rax
 	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
 	ret i32 %0
@@ -38,10 +38,10 @@ entry:
 define i32 @foo4() nounwind readonly {
 entry:
 ; FIXME: We really can use movabsl here!
-; CHECK-SMALL:  foo4:
+; CHECK-SMALL-LABEL:  foo4:
 ; CHECK-SMALL:   movl $16777216, %eax
 ; CHECK-SMALL:   movl data(%rax), %eax
-; CHECK-KERNEL: foo4:
+; CHECK-KERNEL-LABEL: foo4:
 ; CHECK-KERNEL:  movl data+16777216, %eax
 	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
 	ret i32 %0
@@ -49,18 +49,18 @@ entry:
 
 define i32 @foo1() nounwind readonly {
 entry:
-; CHECK-SMALL:  foo1:
+; CHECK-SMALL-LABEL:  foo1:
 ; CHECK-SMALL:   movl data+16777212(%rip), %eax
-; CHECK-KERNEL: foo1:
+; CHECK-KERNEL-LABEL: foo1:
 ; CHECK-KERNEL:  movl data+16777212, %eax
         %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
         ret i32 %0
 }
 define i32 @foo5() nounwind readonly {
 entry:
-; CHECK-SMALL:  foo5:
+; CHECK-SMALL-LABEL:  foo5:
 ; CHECK-SMALL:   movl data-16777216(%rip), %eax
-; CHECK-KERNEL: foo5:
+; CHECK-KERNEL-LABEL: foo5:
 ; CHECK-KERNEL:  movq $-16777216, %rax
 	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
 	ret i32 %0
diff --git a/test/CodeGen/X86/coff-feat00.ll b/test/CodeGen/X86/coff-feat00.ll
new file mode 100644
index 000000000000..1dcd4276399a
--- /dev/null
+++ b/test/CodeGen/X86/coff-feat00.ll
@@ -0,0 +1,7 @@
+; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s
+
+define i32 @foo() {
+  ret i32 0
+}
+
+; CHECK: @feat.00 = 1
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index ef44a3d119b8..656c385e2bc7 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -2,8 +2,8 @@
 ; insertion of register-register copies.
 
 ; Make sure there are only 3 mov's for each testcase
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu   | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu   -mcpu=corei7 | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s -check-prefix=DARWIN
 
 
 @G = external global i32                ; <i32*> [#uses=2]
@@ -11,7 +11,7 @@
 declare void @ext(i32)
 
 define i32 @t1(i32 %X, i32 %Y) nounwind {
-; LINUX: t1:
+; LINUX-LABEL: t1:
 ; LINUX: movl 4(%esp), %eax
 ; LINUX: movl 8(%esp), %ecx
 ; LINUX: addl %eax, %ecx
@@ -22,7 +22,7 @@ define i32 @t1(i32 %X, i32 %Y) nounwind {
 }
 
 define i32 @t2(i32 %X, i32 %Y) nounwind {
-; LINUX: t2:
+; LINUX-LABEL: t2:
 ; LINUX: movl 4(%esp), %eax
 ; LINUX: movl 8(%esp), %ecx
 ; LINUX: xorl %eax, %ecx
@@ -37,11 +37,11 @@ define i32 @t2(i32 %X, i32 %Y) nounwind {
 
 define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
 entry:
-; DARWIN: t3:
-; DARWIN: shll $16
+; DARWIN-LABEL: t3:
 ; DARWIN: shlq $32, %rcx
+; DARWIN-NEXT: orq %rcx, %rax
+; DARWIN-NEXT: shll $8
 ; DARWIN-NOT: leaq
-; DARWIN: orq %rcx, %rax
   %tmp21 = zext i32 %lb to i64
   %tmp23 = zext i32 %ub to i64
   %tmp24 = shl i64 %tmp23, 32
diff --git a/test/CodeGen/X86/compact-unwind.ll b/test/CodeGen/X86/compact-unwind.ll
index 8c4fa27da560..9d3a1257288c 100644
--- a/test/CodeGen/X86/compact-unwind.ll
+++ b/test/CodeGen/X86/compact-unwind.ll
@@ -1,18 +1,29 @@
-; RUN: llc < %s -disable-cfi -disable-fp-elim -mtriple x86_64-apple-darwin11 | FileCheck %s
+; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 | FileCheck -check-prefix=ASM %s
+; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | FileCheck -check-prefix=CU %s
+; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 \
+; RUN:  | llvm-mc -triple x86_64-apple-darwin11 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | FileCheck -check-prefix=FROM-ASM %s
 
 %ty = type { i8* }
 
 @gv = external global i32
 
 ; This is aligning the stack with a push of a random register.
-; CHECK: pushq %rax
+; ASM: pushq %rax
 
 ; Even though we can't encode %rax into the compact unwind, We still want to be
 ; able to generate a compact unwind encoding in this particular case.
-;
-; CHECK: __LD,__compact_unwind
-; CHECK: _foo ## Range Start
-; CHECK: 16842753 ## Compact Unwind Encoding: 0x1010001
+
+; CU:      Contents of section __compact_unwind:
+; CU-NEXT: 0020 00000000 00000000 1e000000 01000101
+; CU-NEXT: 0030 00000000 00000000 00000000 00000000
+
+; FROM-ASM:      Contents of section __compact_unwind:
+; FROM-ASM-NEXT: 0020 00000000 00000000 1e000000 01000101
+; FROM-ASM-NEXT: 0030 00000000 00000000 00000000 00000000
 
 define i8* @foo(i64 %size) {
   %addr = alloca i64, align 8
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
index 9aa44a30af57..5eb0135277d3 100644
--- a/test/CodeGen/X86/compare-inf.ll
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -3,74 +3,124 @@
 ; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
 ; and negative infinity, because those are more efficient on x86.
 
-; CHECK: oeq_inff:
+declare void @f() nounwind
+
+; CHECK-LABEL: oeq_inff:
 ; CHECK: ucomiss
 ; CHECK: jb
-define float @oeq_inff(float %x, float %y) nounwind readonly {
+define void @oeq_inff(float %x) nounwind {
   %t0 = fcmp oeq float %x, 0x7FF0000000000000
-  %t1 = select i1 %t0, float 1.0, float %y
-  ret float %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
 
-; CHECK: oeq_inf:
+; CHECK-LABEL: oeq_inf:
 ; CHECK: ucomisd
 ; CHECK: jb
-define double @oeq_inf(double %x, double %y) nounwind readonly {
+define void @oeq_inf(double %x) nounwind {
   %t0 = fcmp oeq double %x, 0x7FF0000000000000
-  %t1 = select i1 %t0, double 1.0, double %y
-  ret double %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
 
-; CHECK: une_inff:
+; CHECK-LABEL: une_inff:
 ; CHECK: ucomiss
 ; CHECK: jae
-define float @une_inff(float %x, float %y) nounwind readonly {
+define void @une_inff(float %x) nounwind {
   %t0 = fcmp une float %x, 0x7FF0000000000000
-  %t1 = select i1 %t0, float 1.0, float %y
-  ret float %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
 
-; CHECK: une_inf:
+; CHECK-LABEL: une_inf:
 ; CHECK: ucomisd
 ; CHECK: jae
-define double @une_inf(double %x, double %y) nounwind readonly {
+define void @une_inf(double %x) nounwind {
   %t0 = fcmp une double %x, 0x7FF0000000000000
-  %t1 = select i1 %t0, double 1.0, double %y
-  ret double %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
 
-; CHECK: oeq_neg_inff:
+; CHECK-LABEL: oeq_neg_inff:
 ; CHECK: ucomiss
 ; CHECK: jb
-define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+define void @oeq_neg_inff(float %x) nounwind {
   %t0 = fcmp oeq float %x, 0xFFF0000000000000
-  %t1 = select i1 %t0, float 1.0, float %y
-  ret float %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
 
-; CHECK: oeq_neg_inf:
+; CHECK-LABEL: oeq_neg_inf:
 ; CHECK: ucomisd
 ; CHECK: jb
-define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+define void @oeq_neg_inf(double %x) nounwind {
   %t0 = fcmp oeq double %x, 0xFFF0000000000000
-  %t1 = select i1 %t0, double 1.0, double %y
-  ret double %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
 
-; CHECK: une_neg_inff:
+; CHECK-LABEL: une_neg_inff:
 ; CHECK: ucomiss
 ; CHECK: jae
-define float @une_neg_inff(float %x, float %y) nounwind readonly {
+define void @une_neg_inff(float %x) nounwind {
   %t0 = fcmp une float %x, 0xFFF0000000000000
-  %t1 = select i1 %t0, float 1.0, float %y
-  ret float %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
 
-; CHECK: une_neg_inf:
+; CHECK-LABEL: une_neg_inf:
 ; CHECK: ucomisd
 ; CHECK: jae
-define double @une_neg_inf(double %x, double %y) nounwind readonly {
+define void @une_neg_inf(double %x) nounwind {
   %t0 = fcmp une double %x, 0xFFF0000000000000
-  %t1 = select i1 %t0, double 1.0, double %y
-  ret double %t1
+  br i1 %t0, label %true, label %false
+
+true:
+  call void @f() nounwind
+  br label %false
+
+false:
+  ret void
 }
diff --git a/test/CodeGen/X86/compiler_used.ll b/test/CodeGen/X86/compiler_used.ll
index d38ce91310b1..af5c86c451a0 100644
--- a/test/CodeGen/X86/compiler_used.ll
+++ b/test/CodeGen/X86/compiler_used.ll
@@ -5,7 +5,7 @@
 @Z = internal global i8 4
 
 @llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
-@llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
+@llvm.compiler.used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
 
 ; CHECK-NOT: .no_dead_strip
 ; CHECK: .no_dead_strip	_Z
diff --git a/test/CodeGen/X86/conditional-indecrement.ll b/test/CodeGen/X86/conditional-indecrement.ll
index a3a0c39905aa..c3e71180bb18 100644
--- a/test/CodeGen/X86/conditional-indecrement.ll
+++ b/test/CodeGen/X86/conditional-indecrement.ll
@@ -5,7 +5,7 @@ define i32 @test1(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %not.cmp to i32
   %retval.0 = add i32 %inc, %b
   ret i32 %retval.0
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: cmpl $1
 ; CHECK: sbbl $-1
 ; CHECK: ret
@@ -16,7 +16,7 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %cmp to i32
   %retval.0 = add i32 %inc, %b
   ret i32 %retval.0
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: cmpl $1
 ; CHECK: adcl $0
 ; CHECK: ret
@@ -27,7 +27,7 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %cmp to i32
   %retval.0 = add i32 %inc, %b
   ret i32 %retval.0
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: cmpl $1
 ; CHECK: adcl $0
 ; CHECK: ret
@@ -38,7 +38,7 @@ define i32 @test4(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %not.cmp to i32
   %retval.0 = add i32 %inc, %b
   ret i32 %retval.0
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: cmpl $1
 ; CHECK: sbbl $-1
 ; CHECK: ret
@@ -49,7 +49,7 @@ define i32 @test5(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %not.cmp to i32
   %retval.0 = sub i32 %b, %inc
   ret i32 %retval.0
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: cmpl $1
 ; CHECK: adcl $-1
 ; CHECK: ret
@@ -60,7 +60,7 @@ define i32 @test6(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %cmp to i32
   %retval.0 = sub i32 %b, %inc
   ret i32 %retval.0
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: cmpl $1
 ; CHECK: sbbl $0
 ; CHECK: ret
@@ -71,7 +71,7 @@ define i32 @test7(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %cmp to i32
   %retval.0 = sub i32 %b, %inc
   ret i32 %retval.0
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: cmpl $1
 ; CHECK: sbbl $0
 ; CHECK: ret
@@ -82,7 +82,7 @@ define i32 @test8(i32 %a, i32 %b) nounwind readnone {
   %inc = zext i1 %not.cmp to i32
   %retval.0 = sub i32 %b, %inc
   ret i32 %retval.0
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: cmpl $1
 ; CHECK: adcl $-1
 ; CHECK: ret
diff --git a/test/CodeGen/X86/crash-nosse.ll b/test/CodeGen/X86/crash-nosse.ll
index 1cec25b3347b..b1e01f94c9e6 100644
--- a/test/CodeGen/X86/crash-nosse.ll
+++ b/test/CodeGen/X86/crash-nosse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mattr=-sse2,-sse41 -verify-machineinstrs
+; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse4.1 -verify-machineinstrs
 target triple = "x86_64-unknown-linux-gnu"
 
 ; PR10503
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 852b642de68d..051150e227aa 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=x86 < %s -verify-machineinstrs
-; RUN: llc -march=x86-64 < %s -verify-machineinstrs
+; REQUIRES: asserts
+; RUN: llc -march=x86 < %s -verify-machineinstrs -precompute-phys-liveness
+; RUN: llc -march=x86-64 < %s -verify-machineinstrs -precompute-phys-liveness
 
 ; PR6497
 
@@ -107,8 +108,8 @@ do.body92:                                        ; preds = %if.then66
   ret void
 }
 
-!0 = metadata !{i32 633550}                       
-!1 = metadata !{i32 634261}                       
+!0 = metadata !{i32 633550}
+!1 = metadata !{i32 634261}
 
 
 ; Crash during XOR optimization.
@@ -203,7 +204,7 @@ entry:
 ; <rdar://problem/9187792>
 define fastcc void @func_61() nounwind sspreq {
 entry:
-  %t1 = tail call i64 @llvm.objectsize.i64(i8* undef, i1 false)
+  %t1 = tail call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false)
   %t2 = icmp eq i64 %t1, -1
   br i1 %t2, label %bb2, label %bb1
 
@@ -214,7 +215,7 @@ bb2:
   ret void
 }
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 ; PR10277
 ; This test has dead code elimination caused by remat during spilling.
diff --git a/test/CodeGen/X86/critical-edge-split-2.ll b/test/CodeGen/X86/critical-edge-split-2.ll
index 70301cd9bcc4..44205d6829dd 100644
--- a/test/CodeGen/X86/critical-edge-split-2.ll
+++ b/test/CodeGen/X86/critical-edge-split-2.ll
@@ -22,7 +22,7 @@ cond.end.i:                                       ; preds = %entry
   ret i16 %call1
 }
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: testb %dil, %dil
 ; CHECK: jne LBB0_2
 ; CHECK: divl
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
index 0a3dfca228c1..786f7f9b1cc8 100644
--- a/test/CodeGen/X86/ctpop-combine.ll
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -8,7 +8,7 @@ define i32 @test1(i64 %x) nounwind readnone {
   %cmp = icmp ugt i32 %cast, 1
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: leaq -1([[A0:%rdi|%rcx]])
 ; CHECK-NEXT: testq
 ; CHECK-NEXT: setne
@@ -21,7 +21,7 @@ define i32 @test2(i64 %x) nounwind readnone {
   %cmp = icmp ult i64 %count, 2
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: leaq -1([[A0]])
 ; CHECK-NEXT: testq
 ; CHECK-NEXT: sete
@@ -34,7 +34,7 @@ define i32 @test3(i64 %x) nounwind readnone {
   %cmp = icmp ult i6 %cast, 2
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: cmpb $2
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/dag-rauw-cse.ll b/test/CodeGen/X86/dag-rauw-cse.ll
index eca8c8641a20..12a2e626687b 100644
--- a/test/CodeGen/X86/dag-rauw-cse.ll
+++ b/test/CodeGen/X86/dag-rauw-cse.ll
@@ -2,7 +2,7 @@
 ; PR3018
 
 define i32 @test(i32 %A) nounwind {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: ret
 ; CHECK: orl $1
 ; CHECK: ret
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index dae91d5ccdd6..cf631c353fce 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -3,7 +3,7 @@
 ; Shows a dag combine bug that will generate an illegal build vector
 ; with v2i64 build_vector i32, i32.
 
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: unpcklpd
 ; CHECK: movapd
 define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
@@ -13,7 +13,7 @@ entry:
         ret void
 }
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movdqa
 define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
 entry:
diff --git a/test/CodeGen/X86/dagcombine-shifts.ll b/test/CodeGen/X86/dagcombine-shifts.ll
new file mode 100644
index 000000000000..905cf052c39c
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine-shifts.ll
@@ -0,0 +1,209 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))
+
+; Canolicalize the sequence shl/zext/lshr performing the zeroextend
+; as the last instruction of the sequence.
+; This will help DAGCombiner to identify and then fold the sequence
+; of shifts into a single AND.
+; This transformation is profitable if the shift amounts are the same
+; and if there is only one use of the zext.
+
+define i16 @fun1(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i16
+  %shl = shl i16 %ext, 4
+  ret i16 %shl
+}
+
+; CHECK-LABEL: @fun1
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i32 @fun2(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i32
+  %shl = shl i32 %ext, 4
+  ret i32 %shl
+}
+
+; CHECK-LABEL: @fun2
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i32 @fun3(i16 zeroext %v) {
+entry:
+  %shr = lshr i16 %v, 4
+  %ext = zext i16 %shr to i32
+  %shl = shl i32 %ext, 4
+  ret i32 %shl
+}
+
+; CHECK-LABEL: @fun3
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i64 @fun4(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun4
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i64 @fun5(i16 zeroext %v) {
+entry:
+  %shr = lshr i16 %v, 4
+  %ext = zext i16 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun5
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+define i64 @fun6(i32 zeroext %v) {
+entry:
+  %shr = lshr i32 %v, 4
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun6
+; CHECK: and
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: ret
+
+; Don't fold the pattern if we use arithmetic shifts.
+
+define i64 @fun7(i8 zeroext %v) {
+entry:
+  %shr = ashr i8 %v, 4
+  %ext = zext i8 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun7
+; CHECK: sar
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun8(i16 zeroext %v) {
+entry:
+  %shr = ashr i16 %v, 4
+  %ext = zext i16 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun8
+; CHECK: sar
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun9(i32 zeroext %v) {
+entry:
+  %shr = ashr i32 %v, 4
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 4
+  ret i64 %shl
+}
+
+; CHECK-LABEL: @fun9
+; CHECK: sar
+; CHECK: shl
+; CHECK: ret
+
+; Don't fold the pattern if there is more than one use of the
+; operand in input to the shift left.
+
+define i64 @fun10(i8 zeroext %v) {
+entry:
+  %shr = lshr i8 %v, 4
+  %ext = zext i8 %shr to i64
+  %shl = shl i64 %ext, 4
+  %add = add i64 %shl, %ext
+  ret i64 %add
+}
+
+; CHECK-LABEL: @fun10
+; CHECK: shr
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun11(i16 zeroext %v) {
+entry:
+  %shr = lshr i16 %v, 4
+  %ext = zext i16 %shr to i64
+  %shl = shl i64 %ext, 4
+  %add = add i64 %shl, %ext
+  ret i64 %add
+}
+
+; CHECK-LABEL: @fun11
+; CHECK: shr
+; CHECK: shl
+; CHECK: ret
+
+define i64 @fun12(i32 zeroext %v) {
+entry:
+  %shr = lshr i32 %v, 4
+  %ext = zext i32 %shr to i64
+  %shl = shl i64 %ext, 4
+  %add = add i64 %shl, %ext
+  ret i64 %add
+}
+
+; CHECK-LABEL: @fun12
+; CHECK: shr
+; CHECK: shl
+; CHECK: ret
+
+; PR17380
+; Make sure that the combined dags are legal if we run the DAGCombiner after
+; Legalization took place. The add instruction is redundant and increases by 
+; one the number of uses of the zext. This prevents the transformation from
+; firing before dags are legalized and optimized.
+; Once the add is removed, the number of uses becomes one and therefore the
+; dags are canonicalized. After Legalization, we need to make sure that the
+; valuetype for the shift count is legal.
+; Verify also that we correctly fold the shl-shr sequence into an 
+; AND with bitmask.
+
+define void @g(i32 %a) {
+  %b = lshr i32 %a, 2
+  %c = zext i32 %b to i64
+  %d = add i64 %c, 1
+  %e = shl i64 %c, 2
+  tail call void @f(i64 %e)
+  ret void
+}
+
+; CHECK-LABEL: @g
+; CHECK-NOT: shr
+; CHECK-NOT: shl
+; CHECK: and
+; CHECK-NEXT: jmp
+
+declare void @f(i64)
+
diff --git a/test/CodeGen/X86/dagcombine-unsafe-math.ll b/test/CodeGen/X86/dagcombine-unsafe-math.ll
new file mode 100644
index 000000000000..f06d9f1dc4b9
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine-unsafe-math.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s 
+
+
+; rdar://13126763
+; Expression "x + x*x" was mistakenly transformed into "x * 3.0f".
+
+define float @test1(float %x) {
+  %t1 = fmul fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test1
+; CHECK: vaddss
+}
+
+; (x + x) + x => x * 3.0
+define float @test2(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test2
+; CHECK: vmulss LCPI1_0(%rip), %xmm0, %xmm0
+}
+
+; x + (x + x) => x * 3.0
+define float @test3(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test3
+; CHECK: vmulss LCPI2_0(%rip), %xmm0, %xmm0
+}
+
+; (y + x) + x != x * 3.0
+define float @test4(float %x, float %y) {
+  %t1 = fadd fast float %x, %y
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test4
+; CHECK: vaddss
+}
+
+; rdar://13445387
+; "x + x + x => 3.0 * x" should be disabled after legalization because 
+; Instruction-Selection doesn't know how to handle "3.0"
+; 
+define float @test5() {
+  %mul.i.i151 = fmul <4 x float> zeroinitializer, zeroinitializer
+  %vecext.i8.i152 = extractelement <4 x float> %mul.i.i151, i32 1
+  %vecext1.i9.i153 = extractelement <4 x float> %mul.i.i151, i32 0
+  %add.i10.i154 = fadd float %vecext1.i9.i153, %vecext.i8.i152
+  %vecext.i7.i155 = extractelement <4 x float> %mul.i.i151, i32 2
+  %add.i.i156 = fadd float %vecext.i7.i155, %add.i10.i154
+  ret float %add.i.i156
+}
diff --git a/test/CodeGen/X86/dagcombine_unsafe_math.ll b/test/CodeGen/X86/dagcombine_unsafe_math.ll
deleted file mode 100644
index 592cf1bec2e5..000000000000
--- a/test/CodeGen/X86/dagcombine_unsafe_math.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s 
-
-
-; rdar://13126763
-; Expression "x + x*x" was mistakenly transformed into "x * 3.0f".
-
-define float @test1(float %x) {
-  %t1 = fmul fast float %x, %x
-  %t2 = fadd fast float %t1, %x
-  ret float %t2
-; CHECK: test1
-; CHECK: vaddss
-}
-
-; (x + x) + x => x * 3.0
-define float @test2(float %x) {
-  %t1 = fadd fast float %x, %x
-  %t2 = fadd fast float %t1, %x
-  ret float %t2
-; CHECK: .long  1077936128
-; CHECK: test2
-; CHECK: vmulss LCPI1_0(%rip), %xmm0, %xmm0
-}
-
-; x + (x + x) => x * 3.0
-define float @test3(float %x) {
-  %t1 = fadd fast float %x, %x
-  %t2 = fadd fast float %t1, %x
-  ret float %t2
-; CHECK: .long  1077936128
-; CHECK: test3
-; CHECK: vmulss LCPI2_0(%rip), %xmm0, %xmm0
-}
-
-; (y + x) + x != x * 3.0
-define float @test4(float %x, float %y) {
-  %t1 = fadd fast float %x, %y
-  %t2 = fadd fast float %t1, %x
-  ret float %t2
-; CHECK: test4
-; CHECK: vaddss
-}
-
-; rdar://13445387
-; "x + x + x => 3.0 * x" should be disabled after legalization because 
-; Instruction-Selection dosen't know how to handle "3.0"
-; 
-define float @test5() {
-  %mul.i.i151 = fmul <4 x float> zeroinitializer, zeroinitializer
-  %vecext.i8.i152 = extractelement <4 x float> %mul.i.i151, i32 1
-  %vecext1.i9.i153 = extractelement <4 x float> %mul.i.i151, i32 0
-  %add.i10.i154 = fadd float %vecext1.i9.i153, %vecext.i8.i152
-  %vecext.i7.i155 = extractelement <4 x float> %mul.i.i151, i32 2
-  %add.i.i156 = fadd float %vecext.i7.i155, %add.i10.i154
-  ret float %add.i.i156
-}
diff --git a/test/CodeGen/X86/dbg-at-specficiation.ll b/test/CodeGen/X86/dbg-at-specficiation.ll
deleted file mode 100644
index 48b8202bd5fa..000000000000
--- a/test/CodeGen/X86/dbg-at-specficiation.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc  < %s | FileCheck %s
-; Radar 10147769
-; Do not unnecessarily use AT_specification DIE.
-; CHECK-NOT: AT_specification
-
-@a = common global [10 x i32] zeroinitializer, align 16
-
-!llvm.dbg.cu = !{!0}
-
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"x.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 140253)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, [10 x i32]* @a} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !"x.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!8 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll
deleted file mode 100644
index 719a526cc892..000000000000
--- a/test/CodeGen/X86/dbg-byval-parameter.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc  -march=x86 -asm-verbose < %s | grep DW_TAG_formal_parameter
-
-
-%struct.Pt = type { double, double }
-%struct.Rect = type { %struct.Pt, %struct.Pt }
-
-define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
-entry:
-  %retval = alloca double                         ; <double*> [#uses=2]
-  %0 = alloca double                              ; <double*> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
-  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
-  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
-  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
-  store double %3, double* %0, align 8, !dbg !16
-  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
-  store double %4, double* %retval, align 8, !dbg !16
-  br label %return, !dbg !16
-
-return:                                           ; preds = %entry
-  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
-  ret double %retval1, !dbg !16
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!3}
-
-!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
-!8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
-!11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
-!15 = metadata !{i32 11, i32 0, metadata !1, null}
-!16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !2, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{metadata !1}
-!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
deleted file mode 100644
index f72729c5fee1..000000000000
--- a/test/CodeGen/X86/dbg-const-int.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s - | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.6.7"
-; Radar 9511391
-
-;CHECK:         .byte   4                       ## DW_AT_const_value
-define i32 @foo() nounwind uwtable readnone optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
-  ret i32 42, !dbg !10
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-
-!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 42}
-!9 = metadata !{i32 2, i32 12, metadata !7, null}
-!10 = metadata !{i32 3, i32 2, metadata !7, null}
-!11 = metadata !{metadata !1}
-!12 = metadata !{metadata !6}
-!13 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
deleted file mode 100644
index 5c2e62bc9168..000000000000
--- a/test/CodeGen/X86/dbg-const.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc < %s - | FileCheck %s
-target triple = "x86_64-apple-darwin10.0.0"
-
-;CHECK:        ## DW_OP_constu
-;CHECK-NEXT:  .byte	42
-define i32 @foobar() nounwind readonly noinline ssp {
-entry:
-  %call = tail call i32 @bar(), !dbg !11
-  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
-  %call2 = tail call i32 @bar(), !dbg !11
-  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !6), !dbg !11
-  %add = add nsw i32 %call2, %call, !dbg !12
-  ret i32 %add, !dbg !10
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-declare i32 @bar() nounwind readnone
-
-!llvm.dbg.cu = !{!2}
-
-!0 = metadata !{i32 786478, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null}
-!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 42}
-!9 = metadata !{i32 15, i32 12, metadata !7, null}
-!10 = metadata !{i32 23, i32 3, metadata !7, null}
-!11 = metadata !{i32 17, i32 3, metadata !7, null}
-!12 = metadata !{i32 18, i32 3, metadata !7, null}
-!13 = metadata !{metadata !0}
-!14 = metadata !{metadata !6}
-!15 = metadata !{metadata !"mu.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-declare-arg.ll b/test/CodeGen/X86/dbg-declare-arg.ll
deleted file mode 100644
index f7e0c91cdff2..000000000000
--- a/test/CodeGen/X86/dbg-declare-arg.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; RUN: llc -O0 -fast-isel=false < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.6.7"
-;Radar 9321650
-
-;CHECK: ##DEBUG_VALUE: my_a 
-
-%class.A = type { i32, i32, i32, i32 }
-
-define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp {
-entry:
-  %i.addr = alloca i32, align 4
-  %j = alloca i32, align 4
-  %nrvo = alloca i1
-  %cleanup.dest.slot = alloca i32
-  store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !26), !dbg !27
-  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !28), !dbg !30
-  store i32 0, i32* %j, align 4, !dbg !31
-  %tmp = load i32* %i.addr, align 4, !dbg !32
-  %cmp = icmp eq i32 %tmp, 42, !dbg !32
-  br i1 %cmp, label %if.then, label %if.end, !dbg !32
-
-if.then:                                          ; preds = %entry
-  %tmp1 = load i32* %i.addr, align 4, !dbg !33
-  %add = add nsw i32 %tmp1, 1, !dbg !33
-  store i32 %add, i32* %j, align 4, !dbg !33
-  br label %if.end, !dbg !35
-
-if.end:                                           ; preds = %if.then, %entry
-  store i1 false, i1* %nrvo, !dbg !36
-  call void @llvm.dbg.declare(metadata !{%class.A* %agg.result}, metadata !37), !dbg !39
-  %tmp2 = load i32* %j, align 4, !dbg !40
-  %x = getelementptr inbounds %class.A* %agg.result, i32 0, i32 0, !dbg !40
-  store i32 %tmp2, i32* %x, align 4, !dbg !40
-  store i1 true, i1* %nrvo, !dbg !41
-  store i32 1, i32* %cleanup.dest.slot
-  %nrvo.val = load i1* %nrvo, !dbg !42
-  br i1 %nrvo.val, label %nrvo.skipdtor, label %nrvo.unused, !dbg !42
-
-nrvo.unused:                                      ; preds = %if.end
-  call void @_ZN1AD1Ev(%class.A* %agg.result), !dbg !42
-  br label %nrvo.skipdtor, !dbg !42
-
-nrvo.skipdtor:                                    ; preds = %nrvo.unused, %if.end
-  ret void, !dbg !42
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 {
-entry:
-  %this.addr = alloca %class.A*, align 8
-  store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !43), !dbg !44
-  %this1 = load %class.A** %this.addr
-  call void @_ZN1AD2Ev(%class.A* %this1)
-  ret void, !dbg !45
-}
-
-define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 {
-entry:
-  %this.addr = alloca %class.A*, align 8
-  store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !46), !dbg !47
-  %this1 = load %class.A** %this.addr
-  %x = getelementptr inbounds %class.A* %this1, i32 0, i32 0, !dbg !48
-  store i32 1, i32* %x, align 4, !dbg !48
-  ret void, !dbg !48
-}
-
-!llvm.dbg.cu = !{!2}
-
-!0 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589826, metadata !2, metadata !"A", metadata !3, i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
-!2 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
-!5 = metadata !{i32 786445, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786445, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
-!8 = metadata !{i32 786445, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!9 = metadata !{i32 786445, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!16 = metadata !{null, metadata !13, metadata !17}
-!17 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
-!18 = metadata !{i32 786470, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
-!19 = metadata !{i32 786478, metadata !"_Z3fooi", i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!21 = metadata !{metadata !1}
-!22 = metadata !{i32 786478, metadata !"_ZN1AD1Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
-!23 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!24 = metadata !{null}
-!25 = metadata !{i32 786478, metadata !"_ZN1AD2Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 4, i32 11, metadata !19, null}
-!28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786443, metadata !19, i32 4, i32 14, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
-!30 = metadata !{i32 5, i32 7, metadata !29, null}
-!31 = metadata !{i32 5, i32 12, metadata !29, null}
-!32 = metadata !{i32 6, i32 3, metadata !29, null}
-!33 = metadata !{i32 7, i32 5, metadata !34, null}
-!34 = metadata !{i32 786443, metadata !29, i32 6, i32 16, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
-!35 = metadata !{i32 8, i32 3, metadata !34, null}
-!36 = metadata !{i32 9, i32 9, metadata !29, null}
-!37 = metadata !{i32 786688, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0, null} ; [ DW_TAG_auto_variable ]
-!38 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
-!39 = metadata !{i32 9, i32 5, metadata !29, null}
-!40 = metadata !{i32 10, i32 3, metadata !29, null}
-!41 = metadata !{i32 11, i32 3, metadata !29, null}
-!42 = metadata !{i32 12, i32 1, metadata !29, null}
-!43 = metadata !{i32 786689, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
-!44 = metadata !{i32 2, i32 47, metadata !22, null}
-!45 = metadata !{i32 2, i32 61, metadata !22, null}
-!46 = metadata !{i32 786689, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
-!47 = metadata !{i32 2, i32 47, metadata !25, null}
-!48 = metadata !{i32 2, i32 54, metadata !49, null}
-!49 = metadata !{i32 786443, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
-!50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25}
-!51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll
deleted file mode 100644
index 6ac397ac42e1..000000000000
--- a/test/CodeGen/X86/dbg-declare.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -O0 -mtriple x86_64-apple-darwin
-; <rdar://problem/11134152>
-
-define i32 @foo(i32* %x) nounwind uwtable ssp {
-entry:
-  %x.addr = alloca i32*, align 8
-  %saved_stack = alloca i8*
-  %cleanup.dest.slot = alloca i32
-  store i32* %x, i32** %x.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32** %x.addr}, metadata !14), !dbg !15
-  %0 = load i32** %x.addr, align 8, !dbg !16
-  %1 = load i32* %0, align 4, !dbg !16
-  %2 = zext i32 %1 to i64, !dbg !16
-  %3 = call i8* @llvm.stacksave(), !dbg !16
-  store i8* %3, i8** %saved_stack, !dbg !16
-  %vla = alloca i8, i64 %2, align 16, !dbg !16
-  call void @llvm.dbg.declare(metadata !{i8* %vla}, metadata !18), !dbg !23
-  store i32 1, i32* %cleanup.dest.slot
-  %4 = load i8** %saved_stack, !dbg !24
-  call void @llvm.stackrestore(i8* %4), !dbg !24
-  ret i32 0, !dbg !25
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare i8* @llvm.stacksave() nounwind
-
-declare void @llvm.stackrestore(i8*) nounwind
-
-!llvm.dbg.cu = !{!0}
-
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 5, i32 21, metadata !5, null}
-!16 = metadata !{i32 7, i32 13, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!21 = metadata !{metadata !22}
-!22 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
-!23 = metadata !{i32 7, i32 8, metadata !17, null}
-!24 = metadata !{i32 9, i32 1, metadata !17, null}
-!25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
deleted file mode 100644
index 1bd3d77522a3..000000000000
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc -enable-dwarf-directory -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
-
-; Radar 8884898
-; CHECK: file	1 "simple.c"
-
-declare i32 @printf(i8*, ...) nounwind
-
-define i32 @main() nounwind {
-  ret i32 0
-}
-
-!llvm.dbg.cu = !{!2}
-
-!1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !5}
-!9 = metadata !{metadata !6}
-!10 = metadata !{metadata !"simple.c", metadata !"/Users/manav/one/two"}
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll
deleted file mode 100644
index cc612b2ca53e..000000000000
--- a/test/CodeGen/X86/dbg-i128-const.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
-
-; CHECK: DW_AT_const_value
-; CHECK-NEXT: 42
-
-define i128 @__foo(i128 %a, i128 %b) nounwind {
-entry:
-  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
-  %add = add i128 %a, %b, !dbg !11
-  ret i128 %add, !dbg !11
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!5}
-
-!0 = metadata !{i128 42 }
-!1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 786443, metadata !4, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 786478, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!7 = metadata !{metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!10 = metadata !{i32 786468, metadata !13, metadata !4, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 29, i32 0, metadata !2, null}
-!12 = metadata !{metadata !3}
-!13 = metadata !{metadata !"foo.c", metadata !"/tmp"}
-!14 = metadata !{metadata !"myint.h", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/CodeGen/X86/dbg-large-unsigned-const.ll
deleted file mode 100644
index c381cd754cfe..000000000000
--- a/test/CodeGen/X86/dbg-large-unsigned-const.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc -filetype=obj %s -o /dev/null
-; Hanle large unsigned constant values.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-macosx10.7.0"
-
-define zeroext i1 @_Z3iseRKxS0_(i64* nocapture %LHS, i64* nocapture %RHS) nounwind readonly optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i64* %LHS}, i64 0, metadata !7), !dbg !13
-  tail call void @llvm.dbg.value(metadata !{i64* %RHS}, i64 0, metadata !11), !dbg !14
-  %tmp1 = load i64* %LHS, align 4, !dbg !15
-  %tmp3 = load i64* %RHS, align 4, !dbg !15
-  %cmp = icmp eq i64 %tmp1, %tmp3, !dbg !15
-  ret i1 %cmp, !dbg !15
-}
-
-define zeroext i1 @_Z2fnx(i64 %a) nounwind readnone optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !21), !dbg !24
-  tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !26), !dbg !27
-  %cmp.i = icmp eq i64 %a, 9223372036854775807, !dbg !28
-  ret i1 %cmp.i, !dbg !22
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!29 = metadata !{metadata !1, metadata !6}
-!30 = metadata !{metadata !7, metadata !11}
-!31 = metadata !{metadata !12}
-
-!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, null, null, metadata !29, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !"_Z3iseRKxS0_", i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !"_Z2fnx", i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!9 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
-!10 = metadata !{i32 786468, metadata !0, metadata !"long long int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 2, i32 27, metadata !1, null}
-!14 = metadata !{i32 2, i32 49, metadata !1, null}
-!15 = metadata !{i32 3, i32 3, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
-!20 = metadata !{i32 6, i32 19, metadata !6, null}
-!21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 7, i32 10, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !2, metadata !6, i32 6, i32 22, i32 1} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{i32 2, i32 27, metadata !1, metadata !22}
-!25 = metadata !{i64 9223372036854775807}         
-!26 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 2, i32 49, metadata !1, metadata !22}
-!28 = metadata !{i32 3, i32 3, metadata !16, metadata !22}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
deleted file mode 100644
index 30d03054a104..000000000000
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc < %s -regalloc=basic | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin8"
-
-;CHECK: Ldebug_loc0:
-;CHECK-NEXT:	.quad	Lfunc_begin0
-;CHECK-NEXT:	.quad	L
-;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}          ## Loc expr size
-;CHECK-NEXT:    .short  Lset
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT:	.quad	0
-;CHECK-NEXT:	.quad	0
-
-%0 = type { i64, i1 }
-
-@__clz_tab = external constant [256 x i8]
-
-define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
-  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
-  br i1 undef, label %bb2, label %bb4, !dbg !22
-
-bb2:                                              ; preds = %entry
-  br label %bb4, !dbg !23
-
-bb4:                                              ; preds = %bb2, %entry
-  br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
-
-bb82.i:                                           ; preds = %bb4
-  unreachable
-
-__udivmodti4.exit:                                ; preds = %bb4
-  ret i128 undef, !dbg !27
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
-
-!llvm.dbg.cu = !{!2}
-
-!0 = metadata !{i32 786478, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
-!5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 786478, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!11 = metadata !{metadata !12, metadata !12, metadata !12}
-!12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
-!13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786689, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 1093, i32 0, metadata !9, null}
-!16 = metadata !{i64 0}
-!17 = metadata !{i32 786688, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0, null} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786443, metadata !1, metadata !9, i32 1094, i32 0, i32 13} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"word_type", i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
-!20 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 1095, i32 0, metadata !18, null}
-!22 = metadata !{i32 1103, i32 0, metadata !18, null}
-!23 = metadata !{i32 1104, i32 0, metadata !18, null}
-!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
-!25 = metadata !{i32 786443, metadata !1, metadata !0, i32 879, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 1107, i32 0, metadata !18, null}
-!27 = metadata !{i32 1111, i32 0, metadata !18, null}
-!28 = metadata !{metadata !0, metadata !9}
-!29 = metadata !{metadata !"foobar.c", metadata !"/tmp"}
-!30 = metadata !{metadata !"foobar.h", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
deleted file mode 100644
index d1774cc7bcaf..000000000000
--- a/test/CodeGen/X86/dbg-prolog-end.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc -O0 < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.6.7"
-
-;CHECK: .loc	1 2 11 prologue_end
-define i32 @foo(i32 %i) nounwind ssp {
-entry:
-  %i.addr = alloca i32, align 4
-  %j = alloca i32, align 4
-  store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !7), !dbg !8
-  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !9), !dbg !11
-  store i32 2, i32* %j, align 4, !dbg !12
-  %tmp = load i32* %j, align 4, !dbg !13
-  %inc = add nsw i32 %tmp, 1, !dbg !13
-  store i32 %inc, i32* %j, align 4, !dbg !13
-  %tmp1 = load i32* %j, align 4, !dbg !14
-  %tmp2 = load i32* %i.addr, align 4, !dbg !14
-  %add = add nsw i32 %tmp1, %tmp2, !dbg !14
-  store i32 %add, i32* %j, align 4, !dbg !14
-  %tmp3 = load i32* %j, align 4, !dbg !15
-  ret i32 %tmp3, !dbg !15
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-define i32 @main() nounwind ssp {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %call = call i32 @foo(i32 21), !dbg !16
-  ret i32 %call, !dbg !16
-}
-
-!llvm.dbg.cu = !{!0}
-!18 = metadata !{metadata !1, metadata !6}
-
-!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 1, i32 13, metadata !1, null}
-!9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 2, i32 6, metadata !10, null}
-!12 = metadata !{i32 2, i32 11, metadata !10, null}
-!13 = metadata !{i32 3, i32 2, metadata !10, null}
-!14 = metadata !{i32 4, i32 2, metadata !10, null}
-!15 = metadata !{i32 5, i32 2, metadata !10, null}
-!16 = metadata !{i32 8, i32 2, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
deleted file mode 100644
index b08d68a6643d..000000000000
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc -O0 < %s | FileCheck %s
-; Radar 10464995
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.7.2"
-
-@s = common global [4294967296 x i8] zeroinitializer, align 16
-;CHECK: .long	4294967295
-
-define void @bar() nounwind uwtable ssp {
-entry:
-  store i8 97, i8* getelementptr inbounds ([4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
-  ret void, !dbg !20
-}
-
-!llvm.dbg.cu = !{!0}
-
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{null}
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!11 = metadata !{metadata !13}
-!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
-!14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 720929, i64 0, i64 4294967296} ; [ DW_TAG_subrange_type ]
-!18 = metadata !{i32 5, i32 3, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-!20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
deleted file mode 100644
index c63235e7ad65..000000000000
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-; PR 9817
-
-
-declare  <4 x i32> @__amdil_get_global_id_int()
-declare  void @llvm.dbg.value(metadata , i64 , metadata )
-define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
-entry:
-  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata
-!7), !dbg !8
-  %0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
-  %1 = extractelement <4 x i32> %0, i32 0
-  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !9), !dbg !11
-  call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
-  %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
-  %tmp3 = add i32 0, %tmp2, !dbg !15
-; CHECK:  ##DEBUG_VALUE: idx <- EAX+0
-  call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
-!15
-  %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
-  store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
-  ret void, !dbg !17
-}
-!llvm.dbg.cu = !{!2}
-
-!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 1, i32 42, metadata !0, null}
-!9 = metadata !{i32 786688, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 3, i32 41, metadata !10, null}
-!12 = metadata !{i32 0}
-!13 = metadata !{i32 786688, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!14 = metadata !{i32 4, i32 20, metadata !10, null}
-!15 = metadata !{i32 5, i32 15, metadata !10, null}
-!16 = metadata !{i32 6, i32 18, metadata !10, null}
-!17 = metadata !{i32 7, i32 1, metadata !0, null}
-!18 = metadata !{metadata !0}
-!19 = metadata !{metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"}
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
deleted file mode 100644
index acc360e90cd2..000000000000
--- a/test/CodeGen/X86/dbg-value-isel.ll
+++ /dev/null
@@ -1,103 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-; PR 9879
-
-; CHECK: ##DEBUG_VALUE: tid <-
-%0 = type { i8*, i8*, i8*, i8*, i32 }
-
-@sgv = internal addrspace(2) constant [1 x i8] zeroinitializer
-@fgv = internal addrspace(2) constant [1 x i8] zeroinitializer
-@lvgv = internal constant [0 x i8*] zeroinitializer
-@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* bitcast ([1 x i8] addrspace(2)* @sgv to i8*), i8* bitcast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
-
-define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
-entry:
-  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata !8), !dbg !9
-  %0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
-  %1 = extractelement <4 x i32> %0, i32 0
-  br label %2
-
-; <label>:2                                       ; preds = %entry
-  %3 = phi i32 [ %1, %entry ]
-  br label %4
-
-; <label>:4                                       ; preds = %2
-  %5 = phi i32 [ %3, %2 ]
-  br label %get_local_id.exit
-
-get_local_id.exit:                                ; preds = %4
-  %6 = phi i32 [ %5, %4 ]
-  call void @llvm.dbg.value(metadata !{i32 %6}, i64 0, metadata !10), !dbg !12
-  %7 = call <4 x i32> @__amdil_get_global_id_int() nounwind, !dbg !12
-  %8 = extractelement <4 x i32> %7, i32 0, !dbg !12
-  br label %9
-
-; <label>:9                                       ; preds = %get_local_id.exit
-  %10 = phi i32 [ %8, %get_local_id.exit ]
-  br label %11
-
-; <label>:11                                      ; preds = %9
-  %12 = phi i32 [ %10, %9 ]
-  br label %get_global_id.exit
-
-get_global_id.exit:                               ; preds = %11
-  %13 = phi i32 [ %12, %11 ]
-  call void @llvm.dbg.value(metadata !{i32 %13}, i64 0, metadata !13), !dbg !14
-  %14 = call <4 x i32> @__amdil_get_local_size_int() nounwind
-  %15 = extractelement <4 x i32> %14, i32 0
-  br label %16
-
-; <label>:16                                      ; preds = %get_global_id.exit
-  %17 = phi i32 [ %15, %get_global_id.exit ]
-  br label %18
-
-; <label>:18                                      ; preds = %16
-  %19 = phi i32 [ %17, %16 ]
-  br label %get_local_size.exit
-
-get_local_size.exit:                              ; preds = %18
-  %20 = phi i32 [ %19, %18 ]
-  call void @llvm.dbg.value(metadata !{i32 %20}, i64 0, metadata !15), !dbg !16
-  %tmp5 = add i32 %6, %13, !dbg !17
-  %tmp7 = add i32 %tmp5, %20, !dbg !17
-  store i32 %tmp7, i32 addrspace(1)* %ip, align 4, !dbg !17
-  br label %return, !dbg !17
-
-return:                                           ; preds = %get_local_size.exit
-  ret void, !dbg !18
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare <4 x i32> @__amdil_get_local_size_int() nounwind
-
-declare <4 x i32> @__amdil_get_local_id_int() nounwind
-
-declare <4 x i32> @__amdil_get_global_id_int() nounwind
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!2}
-
-!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 1, i32 32, metadata !0, null}
-!10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 5, i32 24, metadata !11, null}
-!13 = metadata !{i32 786688, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!14 = metadata !{i32 6, i32 25, metadata !11, null}
-!15 = metadata !{i32 786688, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!16 = metadata !{i32 7, i32 26, metadata !11, null}
-!17 = metadata !{i32 9, i32 24, metadata !11, null}
-!18 = metadata !{i32 10, i32 1, metadata !0, null}
-!19 = metadata !{metadata !0}
-!20 = metadata !{metadata !"OCLlLwTXZ.cl", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
deleted file mode 100644
index a6c3e13621c9..000000000000
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc < %s -regalloc=basic | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-;Radar 8950491
-
-;CHECK: .long Lset5
-;CHECK-NEXT:        ## DW_AT_decl_file
-;CHECK-NEXT:        ## DW_AT_decl_line
-;CHECK-NEXT:        ## DW_AT_type
-;CHECK-NEXT:        ## DW_AT_location
-
-@dfm = external global i32, align 4
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
-entry:
-  call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
-  %tmp.i = load i32* @dfm, align 4, !dbg !14
-  %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
-  br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
-
-if.end.i:                                         ; preds = %entry
-  switch i64 %cmd, label %if.then [
-    i64 2147772420, label %bb.i
-    i64 536897538, label %bb116.i
-  ], !dbg !22
-
-bb.i:                                             ; preds = %if.end.i
-  unreachable
-
-bb116.i:                                          ; preds = %if.end.i
-  unreachable
-
-if.then:                                          ; preds = %if.end.i
-  ret i32 undef, !dbg !23
-
-if.else:                                          ; preds = %entry
-  ret i32 0
-}
-
-declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
-declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
-declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!2}
-
-!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 19509, i32 20, metadata !0, null}
-!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
-!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 19514, i32 2, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !1, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
-!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
-!23 = metadata !{i32 19524, i32 1, metadata !18, null}
-!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8}
-!25 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"}
-!27 = metadata !{metadata !"f.i", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
deleted file mode 100644
index b068bbbe784a..000000000000
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
-
-%struct.a = type { i32 }
-
-define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
-  %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
-  %tmp2 = load i32* %tmp1, align 4, !dbg !14
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
-  %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
-  %add = add nsw i32 %tmp2, 1, !dbg !19
-  ret i32 %add, !dbg !19
-}
-
-declare i32 @foo(...) 
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!2}
-
-!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786445, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 5, i32 19, metadata !0, null}
-!14 = metadata !{i32 6, i32 14, metadata !12, null}
-!18 = metadata !{i32 7, i32 2, metadata !12, null}
-!19 = metadata !{i32 8, i32 2, metadata !12, null}
-!20 = metadata !{metadata !0}
-!21 = metadata !{metadata !6, metadata !11}
-!22 = metadata !{metadata !"bar.c", metadata !"/private/tmp"}
-
-; Check that variable bar:b value range is appropriately trucated in debug info.
-; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
-; Here Ltmp7 is the end of the location range.
-
-;CHECK: .loc	1 7 2
-;CHECK: movl
-;CHECK-NEXT: [[CLOBBER:Ltmp[0-9]*]]
-
-;CHECK:Ldebug_loc0:
-;CHECK-NEXT:	.quad
-;CHECK-NEXT:	.quad	[[CLOBBER]]
-;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}
-;CHECK-NEXT:    .short  Lset
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT:	.byte	85
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT:	.quad	0
-;CHECK-NEXT:	.quad	0
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 9669d97cb7fa..98ae1d51db21 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -6,7 +6,7 @@ define zeroext i16 @test1(i16 zeroext %x) nounwind {
 entry:
 	%div = udiv i16 %x, 33
 	ret i16 %div
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: imull	$63551, %eax, %eax
 ; CHECK-NEXT: shrl	$21, %eax
 ; CHECK-NEXT: ret
@@ -17,7 +17,7 @@ entry:
   %div = udiv i16 %c, 3
   ret i16 %div
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: imull	$43691, %eax, %eax
 ; CHECK-NEXT: shrl	$17, %eax
 ; CHECK-NEXT: ret
@@ -28,7 +28,7 @@ entry:
   %div = udiv i8 %c, 3
   ret i8 %div
 
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: movzbl  8(%esp), %eax
 ; CHECK-NEXT: imull	$171, %eax, %eax
 ; CHECK-NEXT: shrl	$9, %eax
@@ -39,14 +39,14 @@ define signext i16 @test4(i16 signext %x) nounwind {
 entry:
 	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
 	ret i16 %div
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: imull	$1986, %eax, %
 }
 
 define i32 @test5(i32 %A) nounwind {
         %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
         ret i32 %tmp1
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: movl	$365384439, %eax
 ; CHECK: mull	4(%esp)
 }
@@ -55,7 +55,7 @@ define signext i16 @test6(i16 signext %x) nounwind {
 entry:
   %div = sdiv i16 %x, 10
   ret i16 %div
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: imull $26215, %eax, %ecx
 ; CHECK: sarl $18, %ecx
 ; CHECK: shrl $15, %eax
@@ -64,7 +64,7 @@ entry:
 define i32 @test7(i32 %x) nounwind {
   %div = udiv i32 %x, 28
   ret i32 %div
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: shrl $2
 ; CHECK: movl $613566757
 ; CHECK: mull
@@ -76,7 +76,7 @@ define i32 @test7(i32 %x) nounwind {
 define i8 @test8(i8 %x) nounwind {
   %div = udiv i8 %x, 78
   ret i8 %div
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: shrb %
 ; CHECK: imull $211
 ; CHECK: shrl $13
@@ -86,7 +86,7 @@ define i8 @test8(i8 %x) nounwind {
 define i8 @test9(i8 %x) nounwind {
   %div = udiv i8 %x, 116
   ret i8 %div
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: shrb $2
 ; CHECK: imull $71
 ; CHECK: shrl $11
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index 3bc2957963eb..3b4a8689060d 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -5,9 +5,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5}
 
-!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
+!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !2 = metadata !{i32 0}
 !3 = metadata !{i32 786473, metadata !4} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !"empty.c", metadata !"/home/nlewycky"}
@@ -16,3 +16,4 @@ target triple = "x86_64-unknown-linux-gnu"
 ;                        Dir  Mod Time   File Len   File Name
 ;                        ---- ---------- ---------- ---------------------------
 ; CHECK: file_names[  1]    0 0x00000000 0x00000000 empty.c
+!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/dyn_alloca_aligned.ll b/test/CodeGen/X86/dyn_alloca_aligned.ll
new file mode 100644
index 000000000000..993f4d2af75d
--- /dev/null
+++ b/test/CodeGen/X86/dyn_alloca_aligned.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+define i32 @A(i32 %Size) {
+; CHECK:  subq    %rcx, %rax
+; CHECK:  andq    $-128, %rax
+; CHECK:  movq    %rax, %rsp
+  %A = alloca i8, i32 %Size, align 128
+  %A_addr = ptrtoint i8* %A to i32
+  ret i32 %A_addr
+}
diff --git a/test/CodeGen/X86/emit-big-cst.ll b/test/CodeGen/X86/emit-big-cst.ll
new file mode 100644
index 000000000000..96c15d4a3658
--- /dev/null
+++ b/test/CodeGen/X86/emit-big-cst.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; Check assembly printing of odd constants.
+
+; CHECK: bigCst:
+; CHECK-NEXT: .quad 12713950999227904
+; CHECK-NEXT: .quad 26220
+; CHECK-NEXT: .size bigCst, 16
+
+@bigCst = internal constant i82 483673642326615442599424
+
+define void @accessBig(i64* %storage) {
+  %addr = bitcast i64* %storage to i82*
+  %bigLoadedCst = load volatile i82* @bigCst
+  %tmp = add i82 %bigLoadedCst, 1
+  store i82 %tmp, i82* %addr
+  ret void
+}
diff --git a/test/CodeGen/X86/extended-fma-contraction.ll b/test/CodeGen/X86/extended-fma-contraction.ll
new file mode 100644
index 000000000000..858eabcb7dc6
--- /dev/null
+++ b/test/CodeGen/X86/extended-fma-contraction.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
+
+; CHECK-LABEL: fmafunc
+define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
+
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
+  %ret = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
+  ret <3 x float> %ret
+}
+
+declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
index 06d739ceed9e..cadc0fb723f9 100644
--- a/test/CodeGen/X86/extractelement-load.ll
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | FileCheck %s
 
 define i32 @t(<2 x i64>* %val) nounwind  {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK-NOT: movd
 ; CHECK: movl 8(
 ; CHECK-NEXT: ret
@@ -15,7 +15,7 @@ define i32 @t(<2 x i64>* %val) nounwind  {
 ; Case where extractelement of load ends up as undef.
 ; (Making sure this doesn't crash.)
 define i32 @t2(<8 x i32>* %xp) {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: ret
   %x = load <8 x i32>* %xp
   %Shuff68 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index af1867fc51cc..e330ee79430e 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -7,9 +7,9 @@ declare float @fabsf(float)
 
 declare x86_fp80 @fabsl(x86_fp80)
 
-; CHECK:  test1:
-; UNSAFE: test1:
-; NOOPT:  test1:
+; CHECK-LABEL:  test1:
+; UNSAFE-LABEL: test1:
+; NOOPT-LABEL:  test1:
 define float @test1(float %X) {
         %Y = call float @fabsf(float %X) readnone
         ret float %Y
@@ -21,9 +21,9 @@ define float @test1(float %X) {
 ; UNSAFE-NOT: fabs
 ; NOOPT-NOT:  fabsf
 
-; CHECK:  test2:
-; UNSAFE: test2:
-; NOOPT:  test2:
+; CHECK-LABEL:  test2:
+; UNSAFE-LABEL: test2:
+; NOOPT-LABEL:  test2:
 define double @test2(double %X) {
         %Y = fcmp oge double %X, -0.0
         %Z = fsub double -0.0, %X
@@ -38,9 +38,9 @@ define double @test2(double %X) {
 
 ; UNSAFE-NOT: fabs
 
-; CHECK:  test3:
-; UNSAFE: test3:
-; NOOPT:  test3:
+; CHECK-LABEL:  test3:
+; UNSAFE-LABEL: test3:
+; NOOPT-LABEL:  test3:
 define x86_fp80 @test3(x86_fp80 %X) {
         %Y = call x86_fp80 @fabsl(x86_fp80 %X) readnone
         ret x86_fp80 %Y
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index 5121ed13228d..d9cfaa4c2656 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | FileCheck %s
-; CHECK: add ESP, 8
+; CHECK: add esp, 8
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
index b60b68bd388d..ac898e69dfe1 100644
--- a/test/CodeGen/X86/fast-cc-pass-in-regs.ll
+++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
@@ -5,25 +5,25 @@ declare x86_fastcallcc i64 @callee(i64 inreg)
 
 define i64 @caller() {
         %X = call x86_fastcallcc  i64 @callee( i64 4294967299 )          ; <i64> [#uses=1]
-; CHECK: mov{{.*}}EDX, 1
+; CHECK: mov{{.*}}edx, 1
         ret i64 %X
 }
 
 define x86_fastcallcc i64 @caller2(i64 inreg %X) {
         ret i64 %X
-; CHECK: mov{{.*}}EAX, ECX
+; CHECK: mov{{.*}}eax, ecx
 }
 
 declare x86_thiscallcc i64 @callee2(i32)
 
 define i64 @caller3() {
         %X = call x86_thiscallcc i64 @callee2( i32 3 )
-; CHECK: mov{{.*}}ECX, 3
+; CHECK: mov{{.*}}ecx, 3
         ret i64 %X
 }
 
 define x86_thiscallcc i32 @caller4(i32 %X) {
         ret i32 %X
-; CHECK: mov{{.*}}EAX, ECX
+; CHECK: mov{{.*}}eax, ecx
 }
 
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 3159741cd9c4..42d2b8bed654 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -10,7 +10,7 @@ BB1:
 	ret i32 1
 BB2:
 	ret i32 0
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: calll
 ; CHECK-NEXT: testb	$1
 }
@@ -21,7 +21,7 @@ declare void @foo2(%struct.s* byval)
 define void @test2(%struct.s* %d) nounwind {
   call void @foo2(%struct.s* byval %d )
   ret void
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movl	(%eax)
 ; CHECK: movl {{.*}}, (%esp)
 ; CHECK: movl	4(%eax)
@@ -35,7 +35,7 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 define void @test3(i8* %a) {
   call void @llvm.memset.p0i8.i32(i8* %a, i8 0, i32 100, i32 1, i1 false)
   ret void
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK:   movl	{{.*}}, (%esp)
 ; CHECK:   movl	$0, 4(%esp)
 ; CHECK:   movl	$100, 8(%esp)
@@ -47,7 +47,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 define void @test4(i8* %a, i8* %b) {
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 100, i32 1, i1 false)
   ret void
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK:   movl	{{.*}}, (%esp)
 ; CHECK:   movl	{{.*}}, 4(%esp)
 ; CHECK:   movl	$100, 8(%esp)
diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
index 45494f139e24..0fd0561e2046 100644
--- a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
 
 define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind {
 entry:
@@ -6,7 +6,7 @@ entry:
   ret i64 %result
 }
 
-; CHECK: test_sdiv64:
+; CHECK-LABEL: test_sdiv64:
 ; CHECK: cqto
 ; CHECK: idivq
 
@@ -16,7 +16,7 @@ entry:
   ret i64 %result
 }
 
-; CHECK: test_srem64:
+; CHECK-LABEL: test_srem64:
 ; CHECK: cqto
 ; CHECK: idivq
 
@@ -26,7 +26,7 @@ entry:
   ret i64 %result
 }
 
-; CHECK: test_udiv64:
+; CHECK-LABEL: test_udiv64:
 ; CHECK: xorl
 ; CHECK: divq
 
@@ -36,6 +36,6 @@ entry:
   ret i64 %result
 }
 
-; CHECK: test_urem64:
+; CHECK-LABEL: test_urem64:
 ; CHECK: xorl
 ; CHECK: divq
diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll
index 7aba7f7b7953..5828becb3c33 100644
--- a/test/CodeGen/X86/fast-isel-divrem.ll
+++ b/test/CodeGen/X86/fast-isel-divrem.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
-; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
 
 define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind {
 entry:
@@ -7,7 +7,7 @@ entry:
   ret i8 %result
 }
 
-; CHECK: test_sdiv8:
+; CHECK-LABEL: test_sdiv8:
 ; CHECK: movsbw
 ; CHECK: idivb
 
@@ -17,7 +17,7 @@ entry:
   ret i8 %result
 }
 
-; CHECK: test_srem8:
+; CHECK-LABEL: test_srem8:
 ; CHECK: movsbw
 ; CHECK: idivb
 
@@ -27,7 +27,7 @@ entry:
   ret i8 %result
 }
 
-; CHECK: test_udiv8:
+; CHECK-LABEL: test_udiv8:
 ; CHECK: movzbw
 ; CHECK: divb
 
@@ -37,7 +37,7 @@ entry:
   ret i8 %result
 }
 
-; CHECK: test_urem8:
+; CHECK-LABEL: test_urem8:
 ; CHECK: movzbw
 ; CHECK: divb
 
@@ -47,7 +47,7 @@ entry:
   ret i16 %result
 }
 
-; CHECK: test_sdiv16:
+; CHECK-LABEL: test_sdiv16:
 ; CHECK: cwtd
 ; CHECK: idivw
 
@@ -57,7 +57,7 @@ entry:
   ret i16 %result
 }
 
-; CHECK: test_srem16:
+; CHECK-LABEL: test_srem16:
 ; CHECK: cwtd
 ; CHECK: idivw
 
@@ -67,7 +67,7 @@ entry:
   ret i16 %result
 }
 
-; CHECK: test_udiv16:
+; CHECK-LABEL: test_udiv16:
 ; CHECK: xorl
 ; CHECK: divw
 
@@ -77,7 +77,7 @@ entry:
   ret i16 %result
 }
 
-; CHECK: test_urem16:
+; CHECK-LABEL: test_urem16:
 ; CHECK: xorl
 ; CHECK: divw
 
@@ -87,7 +87,7 @@ entry:
   ret i32 %result
 }
 
-; CHECK: test_sdiv32:
+; CHECK-LABEL: test_sdiv32:
 ; CHECK: cltd
 ; CHECK: idivl
 
@@ -97,7 +97,7 @@ entry:
   ret i32 %result
 }
 
-; CHECK: test_srem32:
+; CHECK-LABEL: test_srem32:
 ; CHECK: cltd
 ; CHECK: idivl
 
@@ -107,7 +107,7 @@ entry:
   ret i32 %result
 }
 
-; CHECK: test_udiv32:
+; CHECK-LABEL: test_udiv32:
 ; CHECK: xorl
 ; CHECK: divl
 
@@ -117,6 +117,6 @@ entry:
   ret i32 %result
 }
 
-; CHECK: test_urem32:
+; CHECK-LABEL: test_urem32:
 ; CHECK: xorl
 ; CHECK: divl
diff --git a/test/CodeGen/X86/fast-isel-extract.ll b/test/CodeGen/X86/fast-isel-extract.ll
index f63396e40ca4..3a4b2a685504 100644
--- a/test/CodeGen/X86/fast-isel-extract.ll
+++ b/test/CodeGen/X86/fast-isel-extract.ll
@@ -10,7 +10,7 @@ define void @test1(i64*) nounwind ssp {
   %4 = add i64 %3, 10
   store i64 %4, i64* %0
   ret void
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: callq _f
 ; CHECK-NEXT: addq	$10, %rax
 }
@@ -21,7 +21,7 @@ define void @test2(i64*) nounwind ssp {
   %4 = add i64 %3, 10
   store i64 %4, i64* %0
   ret void
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: callq _f
 ; CHECK-NEXT: addq	$10, %rdx
 }
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
index 67fdad299369..8b38587164fe 100644
--- a/test/CodeGen/X86/fast-isel-fneg.ll
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -5,14 +5,14 @@
 ; SSE2: xor
 ; SSE2-NOT: xor
 
-; CHECK: doo:
+; CHECK-LABEL: doo:
 ; CHECK: xor
 define double @doo(double %x) nounwind {
   %y = fsub double -0.0, %x
   ret double %y
 }
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: xor
 define float @foo(float %x) nounwind {
   %y = fsub float -0.0, %x
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index f0375f86028c..4e47c7455c53 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -9,11 +9,11 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind {
        %t9 = getelementptr i32* %t1, i32 %t3           ; <i32*> [#uses=1]
        %t15 = load i32* %t9            ; <i32> [#uses=1]
        ret i32 %t15
-; X32: test1:
+; X32-LABEL: test1:
 ; X32:  	movl	(%eax,%ecx,4), %eax
 ; X32:  	ret
 
-; X64: test1:
+; X64-LABEL: test1:
 ; X64:  	movslq	%e[[A0:di|cx]], %rax
 ; X64:  	movl	(%r[[A1:si|dx]],%rax,4), %eax
 ; X64:  	ret
@@ -23,11 +23,11 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind {
        %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
        %t15 = load i32* %t9            ; <i32> [#uses=1]
        ret i32 %t15
-; X32: test2:
+; X32-LABEL: test2:
 ; X32:  	movl	(%edx,%ecx,4), %e
 ; X32:  	ret
 
-; X64: test2:
+; X64-LABEL: test2:
 ; X64:  	movl	(%r[[A1]],%r[[A0]],4), %eax
 ; X64:  	ret
 }
@@ -42,12 +42,12 @@ entry:
   ret i8 %B
   
   
-; X32: test3:
+; X32-LABEL: test3:
 ; X32:  	movl	4(%esp), %eax
 ; X32:  	movb	-2(%eax), %al
 ; X32:  	ret
 
-; X64: test3:
+; X64-LABEL: test3:
 ; X64:  	movb	-2(%r[[A0]]), %al
 ; X64:  	ret
 
@@ -66,9 +66,9 @@ entry:
   %tmp2 = load double* %arrayidx                  ; <double> [#uses=1]
   ret double %tmp2
 
-; X32: test4:
+; X32-LABEL: test4:
 ; X32: 128(%e{{.*}},%e{{.*}},8)
-; X64: test4:
+; X64-LABEL: test4:
 ; X64: 128(%r{{.*}},%r{{.*}},8)
 }
 
@@ -80,7 +80,7 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
   %v10 = load i64* %v9
   %v11 = add i64 %B, %v10
   ret i64 %v11
-; X64: test5:
+; X64-LABEL: test5:
 ; X64: movslq	%e[[A1]], %rax
 ; X64-NEXT: (%r[[A0]],%rax),
 ; X64: ret
@@ -113,7 +113,7 @@ declare i8* @_ZNK18G__FastAllocString4dataEv() nounwind
 ; PR10605 / rdar://9930964 - Don't fold loads incorrectly.  The load should
 ; happen before the store.  
 define i32 @test7({i32,i32,i32}* %tmp1, i32 %tmp71, i32 %tmp63) nounwind  {
-; X64: test7:
+; X64-LABEL: test7:
 ; X64:    movl	8({{%rdi|%rcx}}), %eax
 ; X64:     movl	$4, 8({{%rdi|%rcx}})
 
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index bea18a195006..9c042d30e78c 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -4,7 +4,7 @@
 declare i32 @test1a(i32)
 
 define i32 @test1(i32 %x) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: andb $1, %
 	%y = add i32 %x, -3
 	%t = call i32 @test1a(i32 %y)
@@ -23,7 +23,7 @@ exit:		; preds = %next
 
 define void @test2(i8* %a) nounwind {
 entry:
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movb {{.*}} %al
 ; CHECK-NEXT: xorb $1, %al
 ; CHECK-NEXT: testb $1
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 52b1e8564338..cd2dc1d02c8a 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -12,7 +12,7 @@ entry:
         store i32 %2, i32* @src
 	ret i32 %2
 ; This should fold one of the loads into the add.
-; CHECK: loadgv:
+; CHECK-LABEL: loadgv:
 ; CHECK: 	movl	L_src$non_lazy_ptr, %ecx
 ; CHECK: 	movl	(%ecx), %eax
 ; CHECK: 	addl	(%ecx), %eax
@@ -40,7 +40,7 @@ entry:
 ; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
 
 ; ATOM: _t:
-; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %ecx
-; ATOM:         movl    $0, %eax
+; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %e{{..}}
+; ATOM:         movl    $0, %e{{..}}
 
 }
diff --git a/test/CodeGen/X86/fast-isel-ret-ext.ll b/test/CodeGen/X86/fast-isel-ret-ext.ll
index fd768cb9b336..0370d99f906c 100644
--- a/test/CodeGen/X86/fast-isel-ret-ext.ll
+++ b/test/CodeGen/X86/fast-isel-ret-ext.ll
@@ -4,35 +4,35 @@
 define zeroext i8 @test1(i32 %y) nounwind {
   %conv = trunc i32 %y to i8
   ret i8 %conv
-  ; CHECK: test1:
+  ; CHECK-LABEL: test1:
   ; CHECK: movzbl {{.*}}, %eax
 }
 
 define signext i8 @test2(i32 %y) nounwind {
   %conv = trunc i32 %y to i8
   ret i8 %conv
-  ; CHECK: test2:
+  ; CHECK-LABEL: test2:
   ; CHECK: movsbl {{.*}}, %eax
 }
 
 define zeroext i16 @test3(i32 %y) nounwind {
   %conv = trunc i32 %y to i16
   ret i16 %conv
-  ; CHECK: test3:
+  ; CHECK-LABEL: test3:
   ; CHECK: movzwl {{.*}}, %eax
 }
 
 define signext i16 @test4(i32 %y) nounwind {
   %conv = trunc i32 %y to i16
   ret i16 %conv
-  ; CHECK: test4:
-  ; CHECK: movswl {{.*}}, %eax
+  ; CHECK-LABEL: test4:
+  ; CHECK: {{(movswl.%.x, %eax|cwtl)}}
 }
 
 define zeroext i1 @test5(i32 %y) nounwind {
   %conv = trunc i32 %y to i1
   ret i1 %conv
-  ; CHECK: test5:
+  ; CHECK-LABEL: test5:
   ; CHECK: andb $1
   ; CHECK: movzbl {{.*}}, %eax
 }
diff --git a/test/CodeGen/X86/fast-isel-store.ll b/test/CodeGen/X86/fast-isel-store.ll
new file mode 100644
index 000000000000..3d2a46c2bd1b
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-store.ll
@@ -0,0 +1,64 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
+
+define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
+entry:
+  store i32 %value, i32* %addr, align 1
+  ret i32 %value
+}
+
+; CHECK: ret
+
+define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
+entry:
+  store i16 %value, i16* %addr, align 1
+  ret i16 %value
+}
+
+; CHECK: ret
+
+define <4 x i32> @test_store_4xi32(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
+; CHECK: movdqu
+; CHECK: ret
+  %foo = add <4 x i32> %value, %value2 ; to force integer type on store
+  store <4 x i32> %foo, <4 x i32>* %addr, align 1
+  ret <4 x i32> %foo
+}
+
+define <4 x i32> @test_store_4xi32_aligned(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
+; CHECK: movdqa
+; CHECK: ret
+  %foo = add <4 x i32> %value, %value2 ; to force integer type on store
+  store <4 x i32> %foo, <4 x i32>* %addr, align 16
+  ret <4 x i32> %foo
+}
+
+define <4 x float> @test_store_4xf32(<4 x float>* nocapture %addr, <4 x float> %value) {
+; CHECK: movups
+; CHECK: ret
+  store <4 x float> %value, <4 x float>* %addr, align 1
+  ret <4 x float> %value
+}
+
+define <4 x float> @test_store_4xf32_aligned(<4 x float>* nocapture %addr, <4 x float> %value) {
+; CHECK: movaps
+; CHECK: ret
+  store <4 x float> %value, <4 x float>* %addr, align 16
+  ret <4 x float> %value
+}
+
+define <2 x double> @test_store_2xf64(<2 x double>* nocapture %addr, <2 x double> %value, <2 x double> %value2) {
+; CHECK: movupd
+; CHECK: ret
+  %foo = fadd <2 x double> %value, %value2 ; to force dobule type on store
+  store <2 x double> %foo, <2 x double>* %addr, align 1
+  ret <2 x double> %foo
+}
+
+define <2 x double> @test_store_2xf64_aligned(<2 x double>* nocapture %addr, <2 x double> %value, <2 x double> %value2) {
+; CHECK: movapd
+; CHECK: ret
+  %foo = fadd <2 x double> %value, %value2 ; to force dobule type on store
+  store <2 x double> %foo, <2 x double>* %addr, align 16
+  ret <2 x double> %foo
+}
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index 0963c5201c25..f71abd2fec01 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -9,7 +9,7 @@ entry:
           ret i32 %s
 }
 
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: leal	v@TLSGD
 ; CHECK: __tls_get_addr
 
@@ -21,6 +21,6 @@ entry:
           ret i32 %s
 }
 
-; CHECK: f_alias:
+; CHECK-LABEL: f_alias:
 ; CHECK: leal	v@TLSGD
 ; CHECK: __tls_get_addr
diff --git a/test/CodeGen/X86/fast-isel-unaligned-store.ll b/test/CodeGen/X86/fast-isel-unaligned-store.ll
deleted file mode 100644
index 7ce7f676add0..000000000000
--- a/test/CodeGen/X86/fast-isel-unaligned-store.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
-; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
-
-define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
-entry:
-  store i32 %value, i32* %addr, align 1
-  ret i32 %value
-}
-
-; CHECK: ret
-
-define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
-entry:
-  store i16 %value, i16* %addr, align 1
-  ret i16 %value
-}
-
-; CHECK: ret
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index ad1520ef8194..f7d2750b5b81 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -11,7 +11,7 @@ define i32 @test1(i32 %i) nounwind ssp {
   ret i32 %and
 }
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: andl	$8, 
 
 
@@ -29,7 +29,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %if.then, %entry
   ret void
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movq	%rdi, -8(%rsp)
 ; CHECK: cmpq	$42, -8(%rsp)
 }
@@ -41,7 +41,7 @@ if.end:                                           ; preds = %if.then, %entry
 define i64 @test3() nounwind {
   %A = ptrtoint i32* @G to i64
   ret i64 %A
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: movq _G@GOTPCREL(%rip), %rax
 ; CHECK-NEXT: ret
 }
@@ -57,7 +57,7 @@ define i32 @test4(i64 %idxprom9) nounwind {
   %conv = zext i8 %tmp11 to i32
   ret i32 %conv
 
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: movq	_rtx_length@GOTPCREL(%rip), %rax
 ; CHECK-NEXT: movzbl	(%rax,%rdi), %eax
 ; CHECK-NEXT: ret
@@ -70,7 +70,7 @@ define void @test5(i32 %x, i32* %p) nounwind {
   store i32 %y, i32* %p
   ret void
 
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: movl	$50000, %ecx
 ; CHECK: sarl	%cl, %edi
 ; CHECK: ret
@@ -82,7 +82,7 @@ entry:
   %mul = mul nsw i64 %x, 8
   ret i64 %mul
 
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: shlq	$3, %rdi
 }
 
@@ -90,7 +90,7 @@ define i32 @test7(i32 %x) nounwind ssp {
 entry:
   %mul = mul nsw i32 %x, 8
   ret i32 %mul
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: shll	$3, %edi
 }
 
@@ -101,7 +101,7 @@ entry:
   %add = add nsw i64 %x, 7
   ret i64 %add
 
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: addq	$7, %rdi
 }
 
@@ -109,7 +109,7 @@ define i64 @test9(i64 %x) nounwind ssp {
 entry:
   %add = mul nsw i64 %x, 7
   ret i64 %add
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: imulq	$7, %rdi, %rax
 }
 
@@ -117,14 +117,14 @@ entry:
 define i32 @test10(i32 %X) nounwind {
   %Y = udiv i32 %X, 8
   ret i32 %Y
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: shrl	$3, 
 }
 
 define i32 @test11(i32 %X) nounwind {
   %Y = sdiv exact i32 %X, 8
   ret i32 %Y
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK: sarl	$3, 
 }
 
@@ -141,7 +141,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %if.then, %entry
   ret void
-; CHECK: test12:
+; CHECK-LABEL: test12:
 ; CHECK: testb	$1,
 ; CHECK-NEXT: je L
 ; CHECK-NEXT: movl $0, %edi
@@ -153,7 +153,7 @@ declare void @test13f(i1 %X)
 define void @test13() nounwind {
   call void @test13f(i1 0)
   ret void
-; CHECK: test13:
+; CHECK-LABEL: test13:
 ; CHECK: movl $0, %edi
 ; CHECK-NEXT: callq
 }
@@ -166,7 +166,7 @@ entry:
   %tobool = trunc i8 %tmp to i1
   call void @test13f(i1 zeroext %tobool) noredzone
   ret void
-; CHECK: test14:
+; CHECK-LABEL: test14:
 ; CHECK: andb	$1, 
 ; CHECK: callq
 }
@@ -177,7 +177,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
 define void @test15(i8* %a, i8* %b) nounwind {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 4, i32 4, i1 false)
   ret void
-; CHECK: test15:
+; CHECK-LABEL: test15:
 ; CHECK-NEXT: movl	(%rsi), %eax
 ; CHECK-NEXT: movl	%eax, (%rdi)
 ; CHECK-NEXT: ret
@@ -186,7 +186,7 @@ define void @test15(i8* %a, i8* %b) nounwind {
 ; Handling for varargs calls
 declare void @test16callee(...) nounwind
 define void @test16() nounwind {
-; CHECK: test16:
+; CHECK-LABEL: test16:
 ; CHECK: movl $1, %edi
 ; CHECK: movb $0, %al
 ; CHECK: callq _test16callee
@@ -224,7 +224,7 @@ if.then:                                          ; preds = %entry
 
 if.else:                                          ; preds = %entry
   ret i32 2
-; CHECK: test17:
+; CHECK-LABEL: test17:
 ; CHECK: movl	(%rdi), %eax
 ; CHECK: callq _foo
 ; CHECK: cmpl	$5, %eax
@@ -235,7 +235,7 @@ if.else:                                          ; preds = %entry
 define void @test18(float* %p1) {
   store float 0.0, float* %p1
   ret void
-; CHECK: test18:
+; CHECK-LABEL: test18:
 ; CHECK: xorps
 }
 
@@ -243,7 +243,7 @@ define void @test18(float* %p1) {
 define void @test19(double* %p1) {
   store double 0.0, double* %p1
   ret void
-; CHECK: test19:
+; CHECK-LABEL: test19:
 ; CHECK: xorps
 }
 
@@ -254,7 +254,7 @@ entry:
   %tmp = alloca %struct.a, align 8
   call void @test20sret(%struct.a* sret %tmp)
   ret void
-; CHECK: test20:
+; CHECK-LABEL: test20:
 ; CHECK: leaq (%rsp), %rdi
 ; CHECK: callq _test20sret
 }
@@ -264,7 +264,7 @@ declare void @test20sret(%struct.a* sret)
 define void @test21(double* %p1) {
   store double -0.0, double* %p1
   ret void
-; CHECK: test21:
+; CHECK-LABEL: test21:
 ; CHECK-NOT: xor
 ; CHECK: movsd	LCPI
 }
@@ -279,7 +279,7 @@ entry:
   call void @foo22(i32 2)
   call void @foo22(i32 3)
   ret void
-; CHECK: test22:
+; CHECK-LABEL: test22:
 ; CHECK: movl	$0, %edi
 ; CHECK: callq	_foo22
 ; CHECK: movl	$1, %edi
@@ -297,7 +297,7 @@ define void @test23(i8* noalias sret %result) {
   %a = alloca i8
   %b = call i8* @foo23()
   ret void
-; CHECK: test23:
+; CHECK-LABEL: test23:
 ; CHECK: call
 ; CHECK: movq  %rdi, %rax
 ; CHECK: ret
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 4caa3a039d6a..ba86e888cdde 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -fast-isel -O0 -mcpu=generic -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
 
 ; This should use flds to set the return value.
-; CHECK: test0:
+; CHECK-LABEL: test0:
 ; CHECK: flds
 ; CHECK: ret
 @G = external global float
@@ -11,7 +11,7 @@ define float @test0() nounwind {
 }
 
 ; This should pop 4 bytes on return.
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: ret $4
 define void @test1({i32, i32, i32, i32}* sret %p) nounwind {
   store {i32, i32, i32, i32} zeroinitializer, {i32, i32, i32, i32}* %p
@@ -19,7 +19,7 @@ define void @test1({i32, i32, i32, i32}* sret %p) nounwind {
 }
 
 ; Properly initialize the pic base.
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK-NOT: HHH
 ; CHECK: call{{.*}}L2$pb
 ; CHECK-NEXT: L2$pb:
@@ -39,7 +39,7 @@ entry:
   %tmp = alloca %struct.a, align 8
   call void @test3sret(%struct.a* sret %tmp)
   ret void
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: subl $44
 ; CHECK: leal 16(%esp)
 ; CHECK: calll _test3sret
@@ -53,7 +53,7 @@ entry:
   %tmp = alloca %struct.a, align 8
   call fastcc void @test4fastccsret(%struct.a* sret %tmp)
   ret void
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: subl $28
 ; CHECK: leal (%esp), %ecx
 ; CHECK: calll _test4fastccsret
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
index 705ab7bada7c..a362f8d1ca7e 100644
--- a/test/CodeGen/X86/fastcc.ll
+++ b/test/CodeGen/X86/fastcc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
-; CHECK: movsd %xmm0, 8(%esp)
-; CHECK: xorl %ecx, %ecx
+; CHECK: movsd %xmm{{[0-9]}}, 8(%esp)
+; CHECK: xorl %eax, %eax
 
 @d = external global double		; <double*> [#uses=1]
 @c = external global double		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/fastisel-gep-promote-before-add.ll b/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
new file mode 100644
index 000000000000..f87a34c4abde
--- /dev/null
+++ b/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
@@ -0,0 +1,37 @@
+; fastisel should not fold add with non-pointer bitwidth
+; sext(a) + sext(b) != sext(a + b)
+; RUN: llc -mtriple=x86_64-apple-darwin %s -O0 -o - | FileCheck %s
+
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+entry:
+  %ptr.addr = alloca i8*, align 8
+  %add = add i8 64, 64 ; 0x40 + 0x40
+  %0 = load i8** %ptr.addr, align 8
+
+  ; CHECK-LABEL: _gep_promotion:
+  ; CHECK: movzbl ({{.*}})
+  %arrayidx = getelementptr inbounds i8* %0, i8 %add
+
+  %1 = load i8* %arrayidx, align 1
+  ret i8 %1
+}
+
+define zeroext i8 @gep_promotion_nonconst(i8 %i, i8* %ptr) nounwind uwtable ssp {
+entry:
+  %i.addr = alloca i8, align 4
+  %ptr.addr = alloca i8*, align 8
+  store i8 %i, i8* %i.addr, align 4
+  store i8* %ptr, i8** %ptr.addr, align 8
+  %0 = load i8* %i.addr, align 4
+  ; CHECK-LABEL: _gep_promotion_nonconst:
+  ; CHECK: movzbl ({{.*}})
+  %xor = xor i8 %0, -128   ; %0   ^ 0x80
+  %add = add i8 %xor, -127 ; %xor + 0x81
+  %1 = load i8** %ptr.addr, align 8
+
+  %arrayidx = getelementptr inbounds i8* %1, i8 %add
+
+  %2 = load i8* %arrayidx, align 1
+  ret i8 %2
+}
+
diff --git a/test/CodeGen/X86/floor-soft-float.ll b/test/CodeGen/X86/floor-soft-float.ll
new file mode 100644
index 000000000000..5644509a86f7
--- /dev/null
+++ b/test/CodeGen/X86/floor-soft-float.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare float @llvm.floor.f32(float)
+
+; CHECK-SOFT-FLOAT: callq floorf
+; CHECK-HARD-FLOAT: roundss $1, %xmm0, %xmm0
+define float @myfloor(float %a) {
+  %val = tail call float @llvm.floor.f32(float %a)
+  ret float %val
+}
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index bd3514cc3f73..917eac0ca32d 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -34,6 +34,14 @@ entry:
   ret x86_fp80 %call
 }
 
+; CHECK: test_f32_cst
+; CHECK-NOT: fma
+define float @test_f32_cst() nounwind readnone ssp {
+entry:
+  %call = tail call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0) nounwind readnone
+  ret float %call
+}
+
 declare float @llvm.fma.f32(float, float, float) nounwind readnone
 declare double @llvm.fma.f64(double, double, double) nounwind readnone
 declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) nounwind readnone
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
index 6d98d59b3822..cfb598df634c 100644
--- a/test/CodeGen/X86/fma_patterns.ll
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -182,11 +182,11 @@ define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
   ret float %res
 }
 
-; CHECK: test_x86_fmadd_ps
+; CHECK: test_x86_fmadd_ps_load
 ; CHECK: vmovaps         (%rdi), %xmm2
 ; CHECK: vfmadd213ps     %xmm1, %xmm0, %xmm2
 ; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_ps
+; CHECK_FMA4: test_x86_fmadd_ps_load
 ; CHECK_FMA4: vfmaddps     %xmm1, (%rdi), %xmm0, %xmm0
 ; CHECK_FMA4: ret
 define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
@@ -196,11 +196,11 @@ define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4
   ret <4 x float> %res
 }
 
-; CHECK: test_x86_fmsub_ps
+; CHECK: test_x86_fmsub_ps_load
 ; CHECK: vmovaps         (%rdi), %xmm2
 ; CHECK: fmsub213ps     %xmm1, %xmm0, %xmm2
 ; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_ps
+; CHECK_FMA4: test_x86_fmsub_ps_load
 ; CHECK_FMA4: vfmsubps     %xmm1, (%rdi), %xmm0, %xmm0
 ; CHECK_FMA4: ret
 define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
diff --git a/test/CodeGen/X86/fma_patterns_wide.ll b/test/CodeGen/X86/fma_patterns_wide.ll
new file mode 100644
index 000000000000..04db2d76cd8c
--- /dev/null
+++ b/test/CodeGen/X86/fma_patterns_wide.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
+
+; CHECK-LABEL: test_x86_fmadd_ps_y_wide
+; CHECK: vfmadd213ps
+; CHECK: vfmadd213ps
+; CHECK: ret
+; CHECK_FMA4-LABEL: test_x86_fmadd_ps_y_wide
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fadd <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fmsub_ps_y_wide
+; CHECK: vfmsub213ps
+; CHECK: vfmsub213ps
+; CHECK: ret
+; CHECK_FMA4-LABEL: test_x86_fmsub_ps_y_wide
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fnmadd_ps_y_wide
+; CHECK: vfnmadd213ps
+; CHECK: vfnmadd213ps
+; CHECK: ret
+; CHECK_FMA4-LABEL: test_x86_fnmadd_ps_y_wide
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fnmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %a2, %x
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fnmsub_ps_y_wide
+; CHECK: vfnmsub213ps
+; CHECK: vfnmsub213ps
+; CHECK: ret
+define <16 x float> @test_x86_fnmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <16 x float> %y, %a2
+  ret <16 x float> %res
+}
+
+; CHECK-LABEL: test_x86_fmadd_pd_y_wide
+; CHECK: vfmadd213pd
+; CHECK: vfmadd213pd
+; CHECK: ret
+; CHECK_FMA4-LABEL: test_x86_fmadd_pd_y_wide
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmadd_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fadd <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+; CHECK-LABEL: test_x86_fmsub_pd_y_wide
+; CHECK: vfmsub213pd
+; CHECK: vfmsub213pd
+; CHECK: ret
+; CHECK_FMA4-LABEL: test_x86_fmsub_pd_y_wide
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmsub_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fsub <8 x double> %x, %a2
+  ret <8 x double> %res
+}
diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll
index 63e7d36ada25..0b27387b73bf 100644
--- a/test/CodeGen/X86/fold-add.ll
+++ b/test/CodeGen/X86/fold-add.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-darwin9.6"
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @longest_match to i8*)]		; <[1 x i8*]*> [#uses=0]
 
 define fastcc i32 @longest_match(i32 %cur_match) nounwind {
-; CHECK: longest_match:
+; CHECK-LABEL: longest_match:
 ; CHECK-NOT: ret
 ; CHECK: cmpb $0, (%r{{.*}},%r{{.*}})
 ; CHECK: ret
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 93baa0e0eee0..a5eb8b5de3a4 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @t1(i8* %X, i32 %i) {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK-NOT: and
 ; CHECK: movzbl
 ; CHECK: movl (%{{...}},%{{...}},4),
@@ -17,7 +17,7 @@ entry:
 }
 
 define i32 @t2(i16* %X, i32 %i) {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK-NOT: and
 ; CHECK: movzwl
 ; CHECK: movl (%{{...}},%{{...}},4),
@@ -39,7 +39,7 @@ define i32 @t3(i16* %i.ptr, i32* %arr) {
 ; To make matters worse, because of the two-phase zext of %i and their reuse in
 ; the function, the DAG can get confusing trying to re-use both of them and
 ; prevent easy analysis of the mask in order to match this.
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK-NOT: and
 ; CHECK: shrl
 ; CHECK: addl (%{{...}},%{{...}},4),
@@ -58,7 +58,7 @@ entry:
 define i32 @t4(i16* %i.ptr, i32* %arr) {
 ; A version of @t3 that has more zero extends and more re-use of intermediate
 ; values. This exercise slightly different bits of canonicalization.
-; CHECK: t4:
+; CHECK-LABEL: t4:
 ; CHECK-NOT: and
 ; CHECK: shrl
 ; CHECK: addl (%{{...}},%{{...}},4),
diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll
index c1756d5e2e1a..e85d8f78c052 100644
--- a/test/CodeGen/X86/fold-load-vec.ll
+++ b/test/CodeGen/X86/fold-load-vec.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
 ; rdar://12721174
 ; We should not fold movss into pshufd since pshufd expects m128 while movss
 ; loads from m32.
 define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind {
 ; CHECK: sample_test
-; CHECK: movss
-; CHECK: pshufd
+; CHECK: movaps
+; CHECK: insertps
 entry:
   %source.addr = alloca <4 x float>*, align 8
   %dest.addr = alloca <2 x float>*, align 8
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index d8366654c01c..dde0a2d1c5d3 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -38,21 +38,21 @@ L:
 
   store i16 %A, i16* %Q
   ret i32 %D
-  
-; CHECK: test2:
+
+; CHECK-LABEL: test2:
 ; CHECK: 	movl	4(%esp), %eax
-; CHECK-NEXT:	movzwl	(%eax), %ecx
+; CHECK-NEXT:	movzwl	(%eax), %e{{..}}
 
 }
 
 ; rdar://10554090
 ; xor in exit block will be CSE'ed and load will be folded to xor in entry.
 define i1 @test3(i32* %P, i32* %Q) nounwind {
-; CHECK: test3:
-; CHECK: movl 8(%esp), %eax
-; CHECK: xorl (%eax),
+; CHECK-LABEL: test3:
+; CHECK: movl 8(%esp), %e
+; CHECK: movl 4(%esp), %e
+; CHECK: xorl (%e
 ; CHECK: j
-; CHECK-NOT: xor
 entry:
   %0 = load i32* %P, align 4
   %1 = load i32* %Q, align 4
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index d850630a4d08..663e2afe22c7 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -2,14 +2,14 @@
 
 define <2 x double> @foo() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: pcmpeqd %xmm0, %xmm0
 ; CHECK-NOT: %xmm
 ; CHECK: ret
 }
 define <2 x double> @bar() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: xorps %xmm0, %xmm0
 ; CHECK-NOT: %xmm
 ; CHECK: ret
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 2bde76efd2ae..60a6844b39b2 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -11,7 +11,7 @@
 ; CHECK: .space 16,255
 
 ; No pcmpeqd instructions, everybody uses the constant pool.
-; CHECK: program_1:
+; CHECK-LABEL: program_1:
 ; CHECK-NOT: pcmpeqd
 
 	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
@@ -54,22 +54,27 @@ forbody:		; preds = %forcond
 	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
 	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
+
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+
+	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13		; <<4 x i32>> [#uses=1]
 	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16		; <<4 x i32>> [#uses=1]
 	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
 	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
+
 	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
-	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
 	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
 	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+
 	%bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
 	%andps.i = and <4 x i32> zeroinitializer, %bitcast6.i		; <<4 x i32>> [#uses=1]
 	%bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32>		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index 2ada194f891f..95defc83db1f 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -16,7 +16,7 @@ entry:
 }
 
 define i64 @g(i32 %i) nounwind {
-; CHECK: g:
+; CHECK-LABEL: g:
 ; CHECK:      pushl  %ebp
 ; CHECK-NEXT: movl   %esp, %ebp
 ; CHECK-NEXT: pushl
diff --git a/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll b/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll
index 3468a457e95f..c3b2dfb5d6c6 100644
--- a/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll
+++ b/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
 
 define void @bar(i32 %argc) #0 {
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: pushq %rbp
 entry:
   %conv = sitofp i32 %argc to double
@@ -14,7 +14,7 @@ entry:
 }
 
 define void @qux(i32 %argc) #1 {
-; CHECK: qux:
+; CHECK-LABEL: qux:
 ; CHECK-NOT: pushq %rbp
 entry:
   %conv = sitofp i32 %argc to double
diff --git a/test/CodeGen/X86/fp-elim.ll b/test/CodeGen/X86/fp-elim.ll
index 60892a2352fb..2c50bd1be75a 100644
--- a/test/CodeGen/X86/fp-elim.ll
+++ b/test/CodeGen/X86/fp-elim.ll
@@ -1,42 +1,60 @@
 ; RUN: llc < %s -march=x86 -asm-verbose=false                           | FileCheck %s -check-prefix=FP-ELIM
 ; RUN: llc < %s -march=x86 -asm-verbose=false -disable-fp-elim          | FileCheck %s -check-prefix=NO-ELIM
-; RUN: llc < %s -march=x86 -asm-verbose=false -disable-non-leaf-fp-elim | FileCheck %s -check-prefix=NON-LEAF
 
 ; Implement -momit-leaf-frame-pointer
 ; rdar://7886181
 
 define i32 @t1() nounwind readnone {
 entry:
-; FP-ELIM:      t1:
-; FP-ELIM-NEXT: movl
-; FP-ELIM-NEXT: ret
-
-; NO-ELIM:      t1:
-; NO-ELIM-NEXT: pushl %ebp
-; NO-ELIM:      popl %ebp
-; NO-ELIM-NEXT: ret
-
-; NON-LEAF:      t1:
-; NON-LEAF-NEXT: movl
-; NON-LEAF-NEXT: ret
+; FP-ELIM-LABEL:  t1:
+; FP-ELIM-NEXT:     movl
+; FP-ELIM-NEXT:     ret
+
+; NO-ELIM-LABEL:  t1:
+; NO-ELIM-NEXT:     pushl %ebp
+; NO-ELIM:          popl %ebp
+; NO-ELIM-NEXT:     ret
   ret i32 10
 }
 
 define void @t2() nounwind {
 entry:
-; FP-ELIM:     t2:
-; FP-ELIM-NOT: pushl %ebp
-; FP-ELIM:     ret
-
-; NO-ELIM:      t2:
-; NO-ELIM-NEXT: pushl %ebp
-; NO-ELIM:      popl %ebp
-; NO-ELIM-NEXT: ret
-
-; NON-LEAF:      t2:
-; NON-LEAF-NEXT: pushl %ebp
-; NON-LEAF:      popl %ebp
-; NON-LEAF-NEXT: ret
+; FP-ELIM-LABEL:  t2:
+; FP-ELIM-NOT:      pushl %ebp
+; FP-ELIM:          ret
+
+; NO-ELIM-LABEL:  t2:
+; NO-ELIM-NEXT:     pushl %ebp
+; NO-ELIM:          popl %ebp
+; NO-ELIM-NEXT:     ret
+  tail call void @foo(i32 0) nounwind
+  ret void
+}
+
+define i32 @t3() "no-frame-pointer-elim-non-leaf" nounwind readnone {
+entry:
+; FP-ELIM-LABEL:  t3:
+; FP-ELIM-NEXT:     movl
+; FP-ELIM-NEXT:     ret
+
+; NO-ELIM-LABEL:  t3:
+; NO-ELIM-NEXT:     pushl %ebp
+; NO-ELIM:          popl %ebp
+; NO-ELIM-NEXT:     ret
+  ret i32 10
+}
+
+define void @t4() "no-frame-pointer-elim-non-leaf" nounwind {
+entry:
+; FP-ELIM-LABEL:  t4:
+; FP-ELIM-NEXT:     pushl %ebp
+; FP-ELIM:          popl %ebp
+; FP-ELIM-NEXT:     ret
+
+; NO-ELIM-LABEL:  t4:
+; NO-ELIM-NEXT:     pushl %ebp
+; NO-ELIM:          popl %ebp
+; NO-ELIM-NEXT:     ret
   tail call void @foo(i32 0) nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index 287504801d04..07baca84804e 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s
 
-; CHECK: test1
+; CHECK-LABEL: test1
 define float @test1(float %a) {
 ; CHECK-NOT: addss
 ; CHECK: mulss
@@ -11,7 +11,7 @@ define float @test1(float %a) {
   ret float %r
 }
 
-; CHECK: test2
+; CHECK-LABEL: test2
 define float @test2(float %a) {
 ; CHECK-NOT: addss
 ; CHECK: mulss
@@ -23,9 +23,45 @@ define float @test2(float %a) {
   ret float %r
 }
 
-; CHECK: test3
+; CHECK-LABEL: test3
 define float @test3(float %a) {
 ; CHECK-NOT: addss
+; CHECK: mulss
+; CHECK-NOT: addss
+; CHECK: ret
+  %t1 = fmul float %a, 4.0
+  %t2 = fadd float %a, %a
+  %r = fadd float %t1, %t2
+  ret float %r
+}
+
+; CHECK-LABEL: test4
+define float @test4(float %a) {
+; CHECK-NOT: addss
+; CHECK: mulss
+; CHECK-NOT: addss
+; CHECK: ret
+  %t1 = fadd float %a, %a
+  %t2 = fmul float 4.0, %a
+  %r = fadd float %t1, %t2
+  ret float %r
+}
+
+; CHECK-LABEL: test5
+define float @test5(float %a) {
+; CHECK-NOT: addss
+; CHECK: mulss
+; CHECK-NOT: addss
+; CHECK: ret
+  %t1 = fadd float %a, %a
+  %t2 = fmul float %a, 4.0
+  %r = fadd float %t1, %t2
+  ret float %r
+}
+
+; CHECK-LABEL: test6
+define float @test6(float %a) {
+; CHECK-NOT: addss
 ; CHECK: xorps
 ; CHECK-NOT: addss
 ; CHECK: ret
@@ -35,8 +71,20 @@ define float @test3(float %a) {
   ret float %r
 }
 
-; CHECK: test4
-define float @test4(float %a) {
+; CHECK-LABEL: test7
+define float @test7(float %a) {
+; CHECK-NOT: addss
+; CHECK: xorps
+; CHECK-NOT: addss
+; CHECK: ret
+  %t1 = fmul float %a, 2.0
+  %t2 = fadd float %a, %a
+  %r = fsub float %t1, %t2
+  ret float %r
+}
+
+; CHECK-LABEL: test8
+define float @test8(float %a) {
 ; CHECK-NOT: fma
 ; CHECK-NOT: mul
 ; CHECK-NOT: add
@@ -46,8 +94,29 @@ define float @test4(float %a) {
   ret float %t2
 }
 
-; CHECK: test5
-define float @test5(float %a) {
+; CHECK-LABEL: test9
+define float @test9(float %a) {
+; CHECK-NOT: fma
+; CHECK-NOT: mul
+; CHECK-NOT: add
+; CHECK: ret
+  %t1 = fmul float 0.0, %a
+  %t2 = fadd float %t1, %a
+  ret float %t2
+}
+
+; CHECK-LABEL: test10
+define float @test10(float %a) {
+; CHECK-NOT: add
+; CHECK: vxorps
+; CHECK: ret
+  %t1 = fsub float -0.0, %a
+  %t2 = fadd float %a, %t1
+  ret float %t2
+}
+
+; CHECK-LABEL: test11
+define float @test11(float %a) {
 ; CHECK-NOT: add
 ; CHECK: vxorps
 ; CHECK: ret
diff --git a/test/CodeGen/X86/fp-select-cmp-and.ll b/test/CodeGen/X86/fp-select-cmp-and.ll
new file mode 100644
index 000000000000..cc76b43e8761
--- /dev/null
+++ b/test/CodeGen/X86/fp-select-cmp-and.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s
+
+define double @test1(double %a, double %b, double %eps) {
+  %cmp = fcmp olt double %a, %eps
+  %cond = select i1 %cmp, double %b, double 0.000000e+00
+  ret double %cond
+
+; CHECK-LABEL: @test1
+; CHECK:	cmpltsd	%xmm2, %xmm0
+; CHECK-NEXT:	andpd	%xmm1, %xmm0
+}
+
+define double @test2(double %a, double %b, double %eps) {
+  %cmp = fcmp ole double %a, %eps
+  %cond = select i1 %cmp, double %b, double 0.000000e+00
+  ret double %cond
+
+; CHECK-LABEL: @test2
+; CHECK:	cmplesd	%xmm2, %xmm0
+; CHECK-NEXT:	andpd	%xmm1, %xmm0
+}
+
+define double @test3(double %a, double %b, double %eps) {
+  %cmp = fcmp ogt double %a, %eps
+  %cond = select i1 %cmp, double %b, double 0.000000e+00
+  ret double %cond
+
+; CHECK-LABEL: @test3
+; CHECK:	cmpltsd	%xmm0, %xmm2
+; CHECK-NEXT:	andpd	%xmm1, %xmm2
+}
+
+define double @test4(double %a, double %b, double %eps) {
+  %cmp = fcmp oge double %a, %eps
+  %cond = select i1 %cmp, double %b, double 0.000000e+00
+  ret double %cond
+
+; CHECK-LABEL: @test4
+; CHECK:	cmplesd	%xmm0, %xmm2
+; CHECK-NEXT:	andpd	%xmm1, %xmm2
+}
+
+define double @test5(double %a, double %b, double %eps) {
+  %cmp = fcmp olt double %a, %eps
+  %cond = select i1 %cmp, double 0.000000e+00, double %b
+  ret double %cond
+
+; CHECK-LABEL: @test5
+; CHECK:	cmpltsd	%xmm2, %xmm0
+; CHECK-NEXT:	andnpd	%xmm1, %xmm0
+}
+
+define double @test6(double %a, double %b, double %eps) {
+  %cmp = fcmp ole double %a, %eps
+  %cond = select i1 %cmp, double 0.000000e+00, double %b
+  ret double %cond
+
+; CHECK-LABEL: @test6
+; CHECK:	cmplesd	%xmm2, %xmm0
+; CHECK-NEXT:	andnpd	%xmm1, %xmm0
+}
+
+define double @test7(double %a, double %b, double %eps) {
+  %cmp = fcmp ogt double %a, %eps
+  %cond = select i1 %cmp, double 0.000000e+00, double %b
+  ret double %cond
+
+; CHECK-LABEL: @test7
+; CHECK:	cmpltsd	%xmm0, %xmm2
+; CHECK-NEXT:	andnpd	%xmm1, %xmm2
+}
+
+define double @test8(double %a, double %b, double %eps) {
+  %cmp = fcmp oge double %a, %eps
+  %cond = select i1 %cmp, double 0.000000e+00, double %b
+  ret double %cond
+
+; CHECK-LABEL: @test8
+; CHECK:	cmplesd	%xmm0, %xmm2
+; CHECK-NEXT:	andnpd	%xmm1, %xmm2
+}
+
+define float @test9(float %a, float %b, float %eps) {
+  %cmp = fcmp olt float %a, %eps
+  %cond = select i1 %cmp, float %b, float 0.000000e+00
+  ret float %cond
+
+; CHECK-LABEL: @test9
+; CHECK:	cmpltss	%xmm2, %xmm0
+; CHECK-NEXT:	andps	%xmm1, %xmm0
+}
+
+define float @test10(float %a, float %b, float %eps) {
+  %cmp = fcmp ole float %a, %eps
+  %cond = select i1 %cmp, float %b, float 0.000000e+00
+  ret float %cond
+
+; CHECK-LABEL: @test10
+; CHECK:	cmpless	%xmm2, %xmm0
+; CHECK-NEXT:	andps	%xmm1, %xmm0
+}
+
+define float @test11(float %a, float %b, float %eps) {
+  %cmp = fcmp ogt float %a, %eps
+  %cond = select i1 %cmp, float %b, float 0.000000e+00
+  ret float %cond
+
+; CHECK-LABEL: @test11
+; CHECK:	cmpltss	%xmm0, %xmm2
+; CHECK-NEXT:	andps	%xmm1, %xmm2
+}
+
+define float @test12(float %a, float %b, float %eps) {
+  %cmp = fcmp oge float %a, %eps
+  %cond = select i1 %cmp, float %b, float 0.000000e+00
+  ret float %cond
+
+; CHECK-LABEL: @test12
+; CHECK:	cmpless	%xmm0, %xmm2
+; CHECK-NEXT:	andps	%xmm1, %xmm2
+}
+
+define float @test13(float %a, float %b, float %eps) {
+  %cmp = fcmp olt float %a, %eps
+  %cond = select i1 %cmp, float 0.000000e+00, float %b
+  ret float %cond
+
+; CHECK-LABEL: @test13
+; CHECK:	cmpltss	%xmm2, %xmm0
+; CHECK-NEXT:	andnps	%xmm1, %xmm0
+}
+
+define float @test14(float %a, float %b, float %eps) {
+  %cmp = fcmp ole float %a, %eps
+  %cond = select i1 %cmp, float 0.000000e+00, float %b
+  ret float %cond
+
+; CHECK-LABEL: @test14
+; CHECK:	cmpless	%xmm2, %xmm0
+; CHECK-NEXT:	andnps	%xmm1, %xmm0
+}
+
+define float @test15(float %a, float %b, float %eps) {
+  %cmp = fcmp ogt float %a, %eps
+  %cond = select i1 %cmp, float 0.000000e+00, float %b
+  ret float %cond
+
+; CHECK-LABEL: @test15
+; CHECK:	cmpltss	%xmm0, %xmm2
+; CHECK-NEXT:	andnps	%xmm1, %xmm2
+}
+
+define float @test16(float %a, float %b, float %eps) {
+  %cmp = fcmp oge float %a, %eps
+  %cond = select i1 %cmp, float 0.000000e+00, float %b
+  ret float %cond
+
+; CHECK-LABEL: @test16
+; CHECK:	cmpless	%xmm0, %xmm2
+; CHECK-NEXT:	andnps	%xmm1, %xmm2
+}
+
+define float @test17(float %a, float %b, float %c, float %eps) {
+  %cmp = fcmp oge float %a, %eps
+  %cond = select i1 %cmp, float %c, float %b
+  ret float %cond
+
+; CHECK-LABEL: @test17
+; CHECK: cmpless	%xmm0, %xmm3
+; CHECK-NEXT: andps	%xmm3, %xmm2
+; CHECK-NEXT: andnps	%xmm1, %xmm3
+; CHECK-NEXT: orps	%xmm2, %xmm3
+}
+
+define double @test18(double %a, double %b, double %c, double %eps) {
+  %cmp = fcmp oge double %a, %eps
+  %cond = select i1 %cmp, double %c, double %b
+  ret double %cond
+
+; CHECK-LABEL: @test18
+; CHECK: cmplesd	%xmm0, %xmm3
+; CHECK-NEXT: andpd	%xmm3, %xmm2
+; CHECK-NEXT: andnpd	%xmm1, %xmm3
+; CHECK-NEXT: orpd	%xmm2, %xmm3
+}
diff --git a/test/CodeGen/X86/fp-une-cmp.ll b/test/CodeGen/X86/fp-une-cmp.ll
new file mode 100644
index 000000000000..7f772d11da9a
--- /dev/null
+++ b/test/CodeGen/X86/fp-une-cmp.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mattr=sse4.1 | FileCheck %s
+; <rdar://problem/7859988>
+
+; Make sure we don't generate more jumps than we need to. We used to generate
+; something like this:
+;
+;       jne  LBB0_1
+;       jnp  LBB0_2
+;   LBB0_1:
+;       jmp  LBB0_3
+;   LBB0_2:
+;       addsd ...
+;   LBB0_3:
+;
+; Now we generate this:
+;
+;       jne  LBB0_2
+;       jp   LBB0_2
+;       addsd ...
+;   LBB0_2:
+
+; CHECK:       func
+; CHECK:       jne [[LABEL:.*]]
+; CHECK-NEXT:  jp  [[LABEL]]
+; CHECK-NOT:   jmp
+
+define float @func(float %x, float %y) nounwind readnone optsize ssp {
+entry:
+  %0 = fpext float %x to double
+  %1 = fpext float %y to double
+  %2 = fmul double %0, %1
+  %3 = fcmp une double %2, 0.000000e+00
+  br i1 %3, label %bb2, label %bb1
+
+bb1:
+  %4 = fadd double %2, -1.000000e+00
+  br label %bb2
+
+bb2:
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ]
+  %.0 = fptrunc double %.0.in to float
+  ret float %.0
+}
diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll
index b3ec5388d704..9a1337ab6cdb 100644
--- a/test/CodeGen/X86/fp_constant_op.ll
+++ b/test/CodeGen/X86/fp_constant_op.ll
@@ -6,41 +6,41 @@ define double @foo_add(double %P) {
 	%tmp.1 = fadd double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
-; CHECK: foo_add:
-; CHECK: fadd DWORD PTR
+; CHECK-LABEL: foo_add:
+; CHECK: fadd dword ptr
 
 define double @foo_mul(double %P) {
 	%tmp.1 = fmul double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
-; CHECK: foo_mul:
-; CHECK: fmul DWORD PTR
+; CHECK-LABEL: foo_mul:
+; CHECK: fmul dword ptr
 
 define double @foo_sub(double %P) {
 	%tmp.1 = fsub double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
-; CHECK: foo_sub:
-; CHECK: fadd DWORD PTR
+; CHECK-LABEL: foo_sub:
+; CHECK: fadd dword ptr
 
 define double @foo_subr(double %P) {
 	%tmp.1 = fsub double 1.230000e+02, %P		; <double> [#uses=1]
 	ret double %tmp.1
 }
-; CHECK: foo_subr:
-; CHECK: fsub QWORD PTR
+; CHECK-LABEL: foo_subr:
+; CHECK: fsub qword ptr
 
 define double @foo_div(double %P) {
 	%tmp.1 = fdiv double %P, 1.230000e+02		; <double> [#uses=1]
 	ret double %tmp.1
 }
-; CHECK: foo_div:
-; CHECK: fdiv DWORD PTR
+; CHECK-LABEL: foo_div:
+; CHECK: fdiv dword ptr
 
 define double @foo_divr(double %P) {
 	%tmp.1 = fdiv double 1.230000e+02, %P		; <double> [#uses=1]
 	ret double %tmp.1
 }
-; CHECK: foo_divr:
-; CHECK: fdiv QWORD PTR
+; CHECK-LABEL: foo_divr:
+; CHECK: fdiv qword ptr
 
diff --git a/test/CodeGen/X86/frame-base.ll b/test/CodeGen/X86/frame-base.ll
new file mode 100644
index 000000000000..a6bd2a51c9a5
--- /dev/null
+++ b/test/CodeGen/X86/frame-base.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -o - %s | FileCheck %s
+
+; The issue here was a conflict between forming a %rip-relative lea and a
+; FrameIndex lea. The %rip sanity-checks didn't consider that a base register
+; had been set if we'd already matched a FrameIndex, when it has in reality.
+
+@var = global i32 0
+
+define void @test_frame_rip_conflict() {
+; CHECK-LABEL: test_frame_rip_conflict:
+; CHECK: leaq _var(%rip), [[TMPADDR:%r.*]]
+; CHECK: leaq {{-?[0-9]+}}(%rsp,[[TMPADDR]]),
+  %stackvar = alloca i32
+
+  %stackint = ptrtoint i32* %stackvar to i64
+  %addr = add i64 ptrtoint(i32* @var to i64), %stackint
+
+  call void @eat_i64(i64 %addr)
+  ret void
+}
+
+declare void @eat_i64(i64)
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 0729dda4a12b..cbcc62a7011a 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -4,7 +4,7 @@
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
 ; ATOM: foo
 ; ATOM: addl
-; ATOM: leal
+; ATOM: addl
 ; ATOM: leal
 
 ; CHECK: foo
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 72a50961b2ff..5f48b1e32b16 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,21 +1,35 @@
-; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
+; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
 ; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
 ; bounce the vector off of cache rather than shuffling each individual
 ; element out of the index vector.
 
-; CHECK: andps    ([[H:%rdx|%r8]]), %xmm0
-; CHECK: movaps   %xmm0, {{(-24)?}}(%rsp)
-; CHECK: movslq   {{(-24)?}}(%rsp), %rax
-; CHECK: movsd    ([[P:%rdi|%rcx]],%rax,8), %xmm0
-; CHECK: movslq   {{-20|4}}(%rsp), %rax
-; CHECK: movhpd   ([[P]],%rax,8), %xmm0
-; CHECK: movslq   {{-16|8}}(%rsp), %rax
-; CHECK: movsd    ([[P]],%rax,8), %xmm1
-; CHECK: movslq   {{-12|12}}(%rsp), %rax
-; CHECK: movhpd   ([[P]],%rax,8), %xmm1
+; CHECK: foo:
+; LIN: movaps	(%rsi), %xmm0
+; LIN: andps	(%rdx), %xmm0
+; LIN: movaps	%xmm0, -24(%rsp)
+; LIN: movslq	-24(%rsp), %[[REG1:r.+]]
+; LIN: movslq	-20(%rsp), %[[REG2:r.+]]
+; LIN: movslq	-16(%rsp), %[[REG3:r.+]]
+; LIN: movslq	-12(%rsp), %[[REG4:r.+]]
+; LIN: movsd	(%rdi,%[[REG1]],8), %xmm0
+; LIN: movhpd	(%rdi,%[[REG2]],8), %xmm0
+; LIN: movsd	(%rdi,%[[REG3]],8), %xmm1
+; LIN: movhpd	(%rdi,%[[REG4]],8), %xmm1
+
+; WIN: movaps	(%rdx), %xmm0
+; WIN: andps	(%r8), %xmm0
+; WIN: movaps	%xmm0, (%rsp)
+; WIN: movslq	(%rsp), %[[REG1:r.+]]
+; WIN: movslq	4(%rsp), %[[REG2:r.+]]
+; WIN: movslq	8(%rsp), %[[REG3:r.+]]
+; WIN: movslq	12(%rsp), %[[REG4:r.+]]
+; WIN: movsd	(%rcx,%[[REG1]],8), %xmm0
+; WIN: movhpd	(%rcx,%[[REG2]],8), %xmm0
+; WIN: movsd	(%rcx,%[[REG3]],8), %xmm1
+; WIN: movhpd	(%rcx,%[[REG4]],8), %xmm1
 
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
   %a = load <4 x i32>* %i
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
index d89e9dca33d1..fcc4e9f0b89b 100644
--- a/test/CodeGen/X86/gcc_except_table.ll
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -2,6 +2,12 @@
 @_ZTIi = external constant i8*
 
 define i32 @main() uwtable optsize ssp {
+; CHECK: .cfi_startproc
+; CHECK: .cfi_personality 155, ___gxx_personality_v0
+; CHECK: .cfi_lsda 16, Lexception0
+; CHECK: .cfi_def_cfa_offset 16
+; CHECK: .cfi_endproc
+
 entry:
   invoke void @_Z1fv() optsize
           to label %try.cont unwind label %lpad
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 0e65cfdbae30..4dba2c086329 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -28,10 +28,10 @@ entry:
 
 define cc 10 void @foo() nounwind {
 entry:
-  ; CHECK: movl base, %ebx
-  ; CHECK-NEXT: movl sp, %ebp
+  ; CHECK:      movl r1, %esi
   ; CHECK-NEXT: movl hp, %edi
-  ; CHECK-NEXT: movl r1, %esi
+  ; CHECK-NEXT: movl sp, %ebp
+  ; CHECK-NEXT: movl base, %ebx
   %0 = load i32* @r1
   %1 = load i32* @hp
   %2 = load i32* @sp
@@ -42,4 +42,3 @@ entry:
 }
 
 declare cc 10 void @bar(i32, i32, i32, i32)
-
diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll
index fcf7e1797ad8..403391e81658 100644
--- a/test/CodeGen/X86/ghc-cc64.ll
+++ b/test/CodeGen/X86/ghc-cc64.ll
@@ -41,22 +41,22 @@ entry:
 
 define cc 10 void @foo() nounwind {
 entry:
-  ; CHECK: movq base(%rip), %r13
-  ; CHECK-NEXT: movq sp(%rip), %rbp
-  ; CHECK-NEXT: movq hp(%rip), %r12
-  ; CHECK-NEXT: movq r1(%rip), %rbx
-  ; CHECK-NEXT: movq r2(%rip), %r14
-  ; CHECK-NEXT: movq r3(%rip), %rsi
-  ; CHECK-NEXT: movq r4(%rip), %rdi
-  ; CHECK-NEXT: movq r5(%rip), %r8
-  ; CHECK-NEXT: movq r6(%rip), %r9
-  ; CHECK-NEXT: movq splim(%rip), %r15
-  ; CHECK-NEXT: movss f1(%rip), %xmm1
-  ; CHECK-NEXT: movss f2(%rip), %xmm2
-  ; CHECK-NEXT: movss f3(%rip), %xmm3
-  ; CHECK-NEXT: movss f4(%rip), %xmm4
+  ; CHECK:      movsd d2(%rip), %xmm6
   ; CHECK-NEXT: movsd d1(%rip), %xmm5
-  ; CHECK-NEXT: movsd d2(%rip), %xmm6
+  ; CHECK-NEXT: movss f4(%rip), %xmm4
+  ; CHECK-NEXT: movss f3(%rip), %xmm3
+  ; CHECK-NEXT: movss f2(%rip), %xmm2
+  ; CHECK-NEXT: movss f1(%rip), %xmm1
+  ; CHECK-NEXT: movq splim(%rip), %r15
+  ; CHECK-NEXT: movq r6(%rip), %r9
+  ; CHECK-NEXT: movq r5(%rip), %r8
+  ; CHECK-NEXT: movq r4(%rip), %rdi
+  ; CHECK-NEXT: movq r3(%rip), %rsi
+  ; CHECK-NEXT: movq r2(%rip), %r14
+  ; CHECK-NEXT: movq r1(%rip), %rbx
+  ; CHECK-NEXT: movq hp(%rip), %r12
+  ; CHECK-NEXT: movq sp(%rip), %rbp
+  ; CHECK-NEXT: movq base(%rip), %r13
   %0 = load double* @d2
   %1 = load double* @d1
   %2 = load float* @f4
@@ -83,4 +83,3 @@ entry:
 
 declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
                         float, float, float, float, double, double)
-
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 194f59765e5b..d8743ac31814 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -65,10 +65,10 @@
 ; PR4584
 @"foo bar" = linkonce global i32 42
 
-; LINUX: .type	foo_20_bar,@object
-; LINUX: .section .data.foo_20_bar,"aGw",@progbits,foo_20_bar,comdat
-; LINUX: .weak	foo_20_bar
-; LINUX: foo_20_bar:
+; LINUX: .type	"foo bar",@object
+; LINUX: .section ".data.foo bar","aGw",@progbits,"foo bar",comdat
+; LINUX: .weak	"foo bar"
+; LINUX: "foo bar":
 
 ; DARWIN: .section		__DATA,__datacoal_nt,coalesced
 ; DARWIN: .globl	"_foo bar"
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 968a9e88c0e9..68e8c605f678 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep "movzbl	%[abcd]h," | count 7
+; RUN: llc < %s -march=x86 -mattr=-bmi | FileCheck %s
 
 ; Use h-register extract and zero-extend.
 
@@ -9,6 +9,9 @@ define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly
   %t3 = load double* %t2, align 8
   ret double %t3
 }
+; CHECK: foo8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -16,6 +19,9 @@ define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load float* %t2, align 8
   ret float %t3
 }
+; CHECK: foo4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -23,6 +29,9 @@ define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i16* %t2, align 8
   ret i16 %t3
 }
+; CHECK: foo2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -30,6 +39,9 @@ define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: foo1:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 5
   %t1 = and i32 %t0, 2040
@@ -37,6 +49,9 @@ define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 6
   %t1 = and i32 %t0, 1020
@@ -44,6 +59,9 @@ define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t0 = lshr i32 %x, 7
   %t1 = and i32 %t0, 510
@@ -51,3 +69,6 @@ define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: ret
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index a19fca555811..3f549d26c2fe 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep "movzbl	%[abcd]h," | count 7
+; RUN: llc < %s -march=x86-64 -mattr=-bmi | FileCheck %s
 
 ; Use h-register extract and zero-extend.
 
@@ -9,6 +9,9 @@ define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly
   %t3 = load double* %t2, align 8
   ret double %t3
 }
+; CHECK: foo8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -16,6 +19,9 @@ define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load float* %t2, align 8
   ret float %t3
 }
+; CHECK: foo4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -23,6 +29,9 @@ define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i16* %t2, align 8
   ret i16 %t3
 }
+; CHECK: foo2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -30,6 +39,9 @@ define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: foo1:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 5
   %t1 = and i64 %t0, 2040
@@ -37,6 +49,9 @@ define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar8:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 6
   %t1 = and i64 %t0, 1020
@@ -44,6 +59,9 @@ define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar4:
+; CHECK: movzbl %{{[abcd]}}h, %e
+
 define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t0 = lshr i64 %x, 7
   %t1 = and i64 %t0, 510
@@ -51,3 +69,6 @@ define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
   %t3 = load i8* %t2, align 8
   ret i8 %t3
 }
+; CHECK: bar2:
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: ret
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index cdc75af92e43..6a5ccaa1e76f 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,22 +1,22 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
-; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mattr=-bmi -march=x86    | FileCheck %s -check-prefix=X86-32
 
 ; Use h registers. On x86-64, codegen doesn't support general allocation
 ; of h registers yet, due to x86 encoding complications.
 
 define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
-; X86-64: bar64:
+; X86-64-LABEL: bar64:
 ; X86-64: shrq $8, %rdi
 ; X86-64: incb %dil
 
 ; See FIXME: on regclass GR8.
 ; It could be optimally transformed like; incb %ch; movb %ch, (%rdx)
-; WIN64:  bar64:
+; WIN64-LABEL:  bar64:
 ; WIN64:  shrq $8, %rcx
 ; WIN64:  incb %cl
 
-; X86-32: bar64:
+; X86-32-LABEL: bar64:
 ; X86-32: incb %ah
   %t0 = lshr i64 %x, 8
   %t1 = trunc i64 %t0 to i8
@@ -26,15 +26,15 @@ define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
 }
 
 define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
-; X86-64: bar32:
+; X86-64-LABEL: bar32:
 ; X86-64: shrl $8, %edi
 ; X86-64: incb %dil
 
-; WIN64:  bar32:
+; WIN64-LABEL:  bar32:
 ; WIN64:  shrl $8, %ecx
 ; WIN64:  incb %cl
 
-; X86-32: bar32:
+; X86-32-LABEL: bar32:
 ; X86-32: incb %ah
   %t0 = lshr i32 %x, 8
   %t1 = trunc i32 %t0 to i8
@@ -44,15 +44,15 @@ define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
 }
 
 define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
-; X86-64: bar16:
+; X86-64-LABEL: bar16:
 ; X86-64: shrl $8, %edi
 ; X86-64: incb %dil
 
-; WIN64:  bar16:
+; WIN64-LABEL:  bar16:
 ; WIN64:  shrl $8, %ecx
 ; WIN64:  incb %cl
 
-; X86-32: bar16:
+; X86-32-LABEL: bar16:
 ; X86-32: incb %ah
   %t0 = lshr i16 %x, 8
   %t1 = trunc i16 %t0 to i8
@@ -62,14 +62,14 @@ define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
 }
 
 define i64 @qux64(i64 inreg %x) nounwind {
-; X86-64: qux64:
+; X86-64-LABEL: qux64:
 ; X86-64: movq %rdi, %rax
 ; X86-64: movzbl %ah, %eax
 
-; WIN64:  qux64:
+; WIN64-LABEL:  qux64:
 ; WIN64:  movzbl %ch, %eax
 
-; X86-32: qux64:
+; X86-32-LABEL: qux64:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
@@ -77,14 +77,14 @@ define i64 @qux64(i64 inreg %x) nounwind {
 }
 
 define i32 @qux32(i32 inreg %x) nounwind {
-; X86-64: qux32:
+; X86-64-LABEL: qux32:
 ; X86-64: movl %edi, %eax
 ; X86-64: movzbl %ah, %eax
 
-; WIN64:  qux32:
+; WIN64-LABEL:  qux32:
 ; WIN64:  movzbl %ch, %eax
 
-; X86-32: qux32:
+; X86-32-LABEL: qux32:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
@@ -92,14 +92,14 @@ define i32 @qux32(i32 inreg %x) nounwind {
 }
 
 define i16 @qux16(i16 inreg %x) nounwind {
-; X86-64: qux16:
+; X86-64-LABEL: qux16:
 ; X86-64: movl %edi, %eax
 ; X86-64: movzbl %ah, %eax
 
-; WIN64:  qux16:
+; WIN64-LABEL:  qux16:
 ; WIN64:  movzbl %ch, %eax
 
-; X86-32: qux16:
+; X86-32-LABEL: qux16:
 ; X86-32: movzbl %ah, %eax
   %t0 = lshr i16 %x, 8
   ret i16 %t0
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 903c4538aba7..7254325a9265 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,12 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-linux > %t
-; RUN: grep "movzbl	%[abcd]h," %t | count 8
-; RUN: grep "%[abcd]h" %t | not grep "%r[[:digit:]]*d"
+; RUN: llc -mattr=-bmi < %s -mtriple=x86_64-linux | FileCheck %s
 
 ; LLVM creates virtual registers for values live across blocks
 ; based on the type of the value. Make sure that the extracts
 ; here use the GR64_NOREX register class for their result,
 ; instead of plain GR64.
 
+; CHECK: foo:
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: movzbl %{{[abcd]}}h, %e
+; CHECK: ret
+
 define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d,
                 i64 %e, i64 %f, i64 %g, i64 %h) {
   %sa = lshr i64 %a, 8
diff --git a/test/CodeGen/X86/h-registers-2.ll b/test/CodeGen/X86/h-registers-2.ll
index 488444c15d3b..91acb7d5bb1c 100644
--- a/test/CodeGen/X86/h-registers-2.ll
+++ b/test/CodeGen/X86/h-registers-2.ll
@@ -4,7 +4,7 @@
 ; non-address use(s).
 
 define i32 @foo(i8* %x, i32 %y) nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NOT: ret
 ; CHECK: movzbl %{{[abcd]h}},
 ; CHECK-NOT: ret
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll
index 5f1f4fd8f76d..9feb5f6ea6e7 100644
--- a/test/CodeGen/X86/haddsub.ll
+++ b/test/CodeGen/X86/haddsub.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,-avx | FileCheck %s -check-prefix=SSE3
 ; RUN: llc < %s -march=x86-64 -mattr=-sse3,+avx | FileCheck %s -check-prefix=AVX
 
-; SSE3: haddpd1:
+; SSE3-LABEL: haddpd1:
 ; SSE3-NOT: vhaddpd
 ; SSE3: haddpd
-; AVX: haddpd1:
+; AVX-LABEL: haddpd1:
 ; AVX: vhaddpd
 define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
@@ -13,10 +13,10 @@ define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
-; SSE3: haddpd2:
+; SSE3-LABEL: haddpd2:
 ; SSE3-NOT: vhaddpd
 ; SSE3: haddpd
-; AVX: haddpd2:
+; AVX-LABEL: haddpd2:
 ; AVX: vhaddpd
 define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
@@ -25,10 +25,10 @@ define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
-; SSE3: haddpd3:
+; SSE3-LABEL: haddpd3:
 ; SSE3-NOT: vhaddpd
 ; SSE3: haddpd
-; AVX: haddpd3:
+; AVX-LABEL: haddpd3:
 ; AVX: vhaddpd
 define <2 x double> @haddpd3(<2 x double> %x) {
   %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
@@ -37,10 +37,10 @@ define <2 x double> @haddpd3(<2 x double> %x) {
   ret <2 x double> %r
 }
 
-; SSE3: haddps1:
+; SSE3-LABEL: haddps1:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
-; AVX: haddps1:
+; AVX-LABEL: haddps1:
 ; AVX: vhaddps
 define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -49,10 +49,10 @@ define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-; SSE3: haddps2:
+; SSE3-LABEL: haddps2:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
-; AVX: haddps2:
+; AVX-LABEL: haddps2:
 ; AVX: vhaddps
 define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
@@ -61,10 +61,10 @@ define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-; SSE3: haddps3:
+; SSE3-LABEL: haddps3:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
-; AVX: haddps3:
+; AVX-LABEL: haddps3:
 ; AVX: vhaddps
 define <4 x float> @haddps3(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
@@ -73,10 +73,10 @@ define <4 x float> @haddps3(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: haddps4:
+; SSE3-LABEL: haddps4:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
-; AVX: haddps4:
+; AVX-LABEL: haddps4:
 ; AVX: vhaddps
 define <4 x float> @haddps4(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -85,10 +85,10 @@ define <4 x float> @haddps4(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: haddps5:
+; SSE3-LABEL: haddps5:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
-; AVX: haddps5:
+; AVX-LABEL: haddps5:
 ; AVX: vhaddps
 define <4 x float> @haddps5(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
@@ -97,10 +97,10 @@ define <4 x float> @haddps5(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: haddps6:
+; SSE3-LABEL: haddps6:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
-; AVX: haddps6:
+; AVX-LABEL: haddps6:
 ; AVX: vhaddps
 define <4 x float> @haddps6(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -109,10 +109,10 @@ define <4 x float> @haddps6(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: haddps7:
+; SSE3-LABEL: haddps7:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
-; AVX: haddps7:
+; AVX-LABEL: haddps7:
 ; AVX: vhaddps
 define <4 x float> @haddps7(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
@@ -121,10 +121,10 @@ define <4 x float> @haddps7(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: hsubpd1:
+; SSE3-LABEL: hsubpd1:
 ; SSE3-NOT: vhsubpd
 ; SSE3: hsubpd
-; AVX: hsubpd1:
+; AVX-LABEL: hsubpd1:
 ; AVX: vhsubpd
 define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
   %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
@@ -133,10 +133,10 @@ define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
-; SSE3: hsubpd2:
+; SSE3-LABEL: hsubpd2:
 ; SSE3-NOT: vhsubpd
 ; SSE3: hsubpd
-; AVX: hsubpd2:
+; AVX-LABEL: hsubpd2:
 ; AVX: vhsubpd
 define <2 x double> @hsubpd2(<2 x double> %x) {
   %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
@@ -145,10 +145,10 @@ define <2 x double> @hsubpd2(<2 x double> %x) {
   ret <2 x double> %r
 }
 
-; SSE3: hsubps1:
+; SSE3-LABEL: hsubps1:
 ; SSE3-NOT: vhsubps
 ; SSE3: hsubps
-; AVX: hsubps1:
+; AVX-LABEL: hsubps1:
 ; AVX: vhsubps
 define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
   %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -157,10 +157,10 @@ define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %r
 }
 
-; SSE3: hsubps2:
+; SSE3-LABEL: hsubps2:
 ; SSE3-NOT: vhsubps
 ; SSE3: hsubps
-; AVX: hsubps2:
+; AVX-LABEL: hsubps2:
 ; AVX: vhsubps
 define <4 x float> @hsubps2(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
@@ -169,10 +169,10 @@ define <4 x float> @hsubps2(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: hsubps3:
+; SSE3-LABEL: hsubps3:
 ; SSE3-NOT: vhsubps
 ; SSE3: hsubps
-; AVX: hsubps3:
+; AVX-LABEL: hsubps3:
 ; AVX: vhsubps
 define <4 x float> @hsubps3(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -181,10 +181,10 @@ define <4 x float> @hsubps3(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: hsubps4:
+; SSE3-LABEL: hsubps4:
 ; SSE3-NOT: vhsubps
 ; SSE3: hsubps
-; AVX: hsubps4:
+; AVX-LABEL: hsubps4:
 ; AVX: vhsubps
 define <4 x float> @hsubps4(<4 x float> %x) {
   %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -193,11 +193,11 @@ define <4 x float> @hsubps4(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; SSE3: vhaddps1:
+; SSE3-LABEL: vhaddps1:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
 ; SSE3: haddps
-; AVX: vhaddps1:
+; AVX-LABEL: vhaddps1:
 ; AVX: vhaddps
 define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -206,11 +206,11 @@ define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-; SSE3: vhaddps2:
+; SSE3-LABEL: vhaddps2:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
 ; SSE3: haddps
-; AVX: vhaddps2:
+; AVX-LABEL: vhaddps2:
 ; AVX: vhaddps
 define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
@@ -219,11 +219,11 @@ define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-; SSE3: vhaddps3:
+; SSE3-LABEL: vhaddps3:
 ; SSE3-NOT: vhaddps
 ; SSE3: haddps
 ; SSE3: haddps
-; AVX: vhaddps3:
+; AVX-LABEL: vhaddps3:
 ; AVX: vhaddps
 define <8 x float> @vhaddps3(<8 x float> %x) {
   %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
@@ -232,11 +232,11 @@ define <8 x float> @vhaddps3(<8 x float> %x) {
   ret <8 x float> %r
 }
 
-; SSE3: vhsubps1:
+; SSE3-LABEL: vhsubps1:
 ; SSE3-NOT: vhsubps
 ; SSE3: hsubps
 ; SSE3: hsubps
-; AVX: vhsubps1:
+; AVX-LABEL: vhsubps1:
 ; AVX: vhsubps
 define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
   %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -245,11 +245,11 @@ define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
   ret <8 x float> %r
 }
 
-; SSE3: vhsubps3:
+; SSE3-LABEL: vhsubps3:
 ; SSE3-NOT: vhsubps
 ; SSE3: hsubps
 ; SSE3: hsubps
-; AVX: vhsubps3:
+; AVX-LABEL: vhsubps3:
 ; AVX: vhsubps
 define <8 x float> @vhsubps3(<8 x float> %x) {
   %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
@@ -258,11 +258,11 @@ define <8 x float> @vhsubps3(<8 x float> %x) {
   ret <8 x float> %r
 }
 
-; SSE3: vhaddpd1:
+; SSE3-LABEL: vhaddpd1:
 ; SSE3-NOT: vhaddpd
 ; SSE3: haddpd
 ; SSE3: haddpd
-; AVX: vhaddpd1:
+; AVX-LABEL: vhaddpd1:
 ; AVX: vhaddpd
 define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
   %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -271,11 +271,11 @@ define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
   ret <4 x double> %r
 }
 
-; SSE3: vhsubpd1:
+; SSE3-LABEL: vhsubpd1:
 ; SSE3-NOT: vhsubpd
 ; SSE3: hsubpd
 ; SSE3: hsubpd
-; AVX: vhsubpd1:
+; AVX-LABEL: vhsubpd1:
 ; AVX: vhsubpd
 define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
   %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
diff --git a/test/CodeGen/X86/hidden-vis-4.ll b/test/CodeGen/X86/hidden-vis-4.ll
index a8aede52accd..25a87b905bc1 100644
--- a/test/CodeGen/X86/hidden-vis-4.ll
+++ b/test/CodeGen/X86/hidden-vis-4.ll
@@ -4,7 +4,7 @@
 
 define i32 @t() nounwind readonly {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movl _x, %eax
 ; CHECK: .comm _x,4
 	%0 = load i32* @x, align 4		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/hidden-vis.ll b/test/CodeGen/X86/hidden-vis.ll
index fcb74fc9de52..a072cb08c99d 100644
--- a/test/CodeGen/X86/hidden-vis.ll
+++ b/test/CodeGen/X86/hidden-vis.ll
@@ -9,12 +9,12 @@
 
 define weak hidden void @t1() nounwind {
 ; LINUX: .hidden t1
-; LINUX: t1:
+; LINUX-LABEL: t1:
 
 ; DARWIN: .private_extern _t1
-; DARWIN: t1:
+; DARWIN-LABEL: t1:
 
-; WINDOWS: t1:
+; WINDOWS-LABEL: t1:
 ; WINDOWS-NOT: hidden
   ret void
 }
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
index 76d17a09d54e..b34417ebf69b 100644
--- a/test/CodeGen/X86/hipe-cc.ll
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -49,10 +49,10 @@ entry:
   store i32 %arg1, i32* %arg1_var
   store i32 %arg2, i32* %arg2_var
 
-  ; CHECK:      movl   4(%esp), %edx
-  ; CHECK-NEXT: movl   8(%esp), %eax
+  ; CHECK:      movl  16(%esp), %esi
   ; CHECK-NEXT: movl  12(%esp), %ebp
-  ; CHECK-NEXT: movl  16(%esp), %esi
+  ; CHECK-NEXT: movl   8(%esp), %eax
+  ; CHECK-NEXT: movl   4(%esp), %edx
   %0 = load i32* %hp_var
   %1 = load i32* %p_var
   %2 = load i32* %arg0_var
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
index 5dbb5a25cbeb..27e1c723a8f7 100644
--- a/test/CodeGen/X86/hipe-cc64.ll
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -5,10 +5,10 @@
 define void @zap(i64 %a, i64 %b) nounwind {
 entry:
   ; CHECK:      movq %rsi, %rax
-  ; CHECK-NEXT: movq %rdi, %rsi
-  ; CHECK-NEXT: movq %rax, %rdx
   ; CHECK-NEXT: movl $8, %ecx
   ; CHECK-NEXT: movl $9, %r8d
+  ; CHECK-NEXT: movq %rdi, %rsi
+  ; CHECK-NEXT: movq %rax, %rdx
   ; CHECK-NEXT: callq addfour
   %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
   %res = extractvalue {i64, i64, i64} %0, 2
@@ -57,11 +57,11 @@ entry:
   store i64 %arg2, i64* %arg2_var
   store i64 %arg3, i64* %arg3_var
 
-  ; CHECK:      movq  8(%rsp), %rcx
-  ; CHECK-NEXT: movq  16(%rsp), %rdx
-  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK:      movq  40(%rsp), %r15
   ; CHECK-NEXT: movq  32(%rsp), %rbp
-  ; CHECK-NEXT: movq  40(%rsp), %r15
+  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK-NEXT: movq  16(%rsp), %rdx
+  ; CHECK-NEXT: movq  8(%rsp), %rcx
   %0 = load i64* %hp_var
   %1 = load i64* %p_var
   %2 = load i64* %arg0_var
diff --git a/test/CodeGen/X86/hipe-prologue.ll b/test/CodeGen/X86/hipe-prologue.ll
index ff3c5c803c90..2f16423600c9 100644
--- a/test/CodeGen/X86/hipe-prologue.ll
+++ b/test/CodeGen/X86/hipe-prologue.ll
@@ -9,10 +9,10 @@
 declare void @dummy_use(i32*, i32)
 
 define {i32, i32} @test_basic(i32 %hp, i32 %p) {
-  ; X32-Linux:       test_basic:
+  ; X32-Linux-LABEL:       test_basic:
   ; X32-Linux-NOT:   calll inc_stack_0
 
-  ; X64-Linux:       test_basic:
+  ; X64-Linux-LABEL:       test_basic:
   ; X64-Linux-NOT:   callq inc_stack_0
 
   %mem = alloca i32, i32 10
@@ -23,7 +23,7 @@ define {i32, i32} @test_basic(i32 %hp, i32 %p) {
 }
 
 define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
-  ; X32-Linux:       test_basic_hipecc:
+  ; X32-Linux-LABEL:       test_basic_hipecc:
   ; X32-Linux:       leal -156(%esp), %ebx
   ; X32-Linux-NEXT:  cmpl 76(%ebp), %ebx
   ; X32-Linux-NEXT:  jb .LBB1_1
@@ -33,7 +33,7 @@ define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
   ; X32-Linux:       .LBB1_1:
   ; X32-Linux-NEXT:  calll inc_stack_0
 
-  ; X64-Linux:       test_basic_hipecc:
+  ; X64-Linux-LABEL:       test_basic_hipecc:
   ; X64-Linux:       leaq -232(%rsp), %r14
   ; X64-Linux-NEXT:  cmpq 144(%rbp), %r14
   ; X64-Linux-NEXT:  jb .LBB1_1
@@ -51,10 +51,10 @@ define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
 }
 
 define cc 11 {i32,i32,i32} @test_nocall_hipecc(i32 %hp,i32 %p,i32 %x,i32 %y) {
-  ; X32-Linux:       test_nocall_hipecc:
+  ; X32-Linux-LABEL:       test_nocall_hipecc:
   ; X32-Linux-NOT:   calll inc_stack_0
 
-  ; X64-Linux:       test_nocall_hipecc:
+  ; X64-Linux-LABEL:       test_nocall_hipecc:
   ; X64-Linux-NOT:   callq inc_stack_0
 
   %1 = add i32 %x, %y
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
index 72e17c065b63..01d1b8c034e3 100644
--- a/test/CodeGen/X86/hoist-common.ll
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -1,4 +1,14 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx  | FileCheck %s
+; This is supposed to be testing BranchFolding's common
+; code hoisting logic, but has been erroneously passing due
+; to there being a redundant xorl in the entry block
+; and no common code to hoist.
+; However, now that MachineSink sinks the redundant xor
+; hoist-common looks at it and rejects it for hoisting,
+; which causes this test to fail.
+; Since it seems this test is broken, marking XFAIL for now
+; until someone decides to remove it or fix what it tests.
+; XFAIL: *
 
 ; Common "xorb al, al" instruction in the two successor blocks should be
 ; moved to the entry block above the test + je.
@@ -7,8 +17,8 @@
 
 define zeroext i1 @t(i32 %c) nounwind ssp {
 entry:
-; CHECK: t:
-; CHECK: xorb %al, %al
+; CHECK-LABEL: t:
+; CHECK: xorl %eax, %eax
 ; CHECK: test
 ; CHECK: je
   %tobool = icmp eq i32 %c, 0
diff --git a/test/CodeGen/X86/i128-mul.ll b/test/CodeGen/X86/i128-mul.ll
index e9d30d67019e..8cfda85ce46d 100644
--- a/test/CodeGen/X86/i128-mul.ll
+++ b/test/CodeGen/X86/i128-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
 ; PR1198
 
 define i64 @foo(i64 %x, i64 %y) {
@@ -10,3 +10,37 @@ define i64 @foo(i64 %x, i64 %y) {
         %tmp4 = trunc i128 %tmp3 to i64
         ret i64 %tmp4
 }
+
+; <rdar://problem/14096009> superfluous multiply by high part of
+; zero-extended value.
+; CHECK: @mul1
+; CHECK-NOT: imulq
+; CHECK: mulq
+; CHECK-NOT: imulq
+define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) {
+entry:
+  %conv = zext i64 %y to i128
+  %cmp11 = icmp eq i64 %n, 0
+  br i1 %cmp11, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %carry.013 = phi i64 [ %conv6, %for.body ], [ 0, %entry ]
+  %i.012 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64* %x, i64 %i.012
+  %0 = load i64* %arrayidx, align 8
+  %conv2 = zext i64 %0 to i128
+  %mul = mul i128 %conv2, %conv
+  %conv3 = zext i64 %carry.013 to i128
+  %add = add i128 %mul, %conv3
+  %conv4 = trunc i128 %add to i64
+  %arrayidx5 = getelementptr inbounds i64* %z, i64 %i.012
+  store i64 %conv4, i64* %arrayidx5, align 8
+  %shr = lshr i128 %add, 64
+  %conv6 = trunc i128 %shr to i64
+  %inc = add i64 %i.012, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i64 0
+}
diff --git a/test/CodeGen/X86/i128-sdiv.ll b/test/CodeGen/X86/i128-sdiv.ll
index ab5cdda0ce22..89cd495aa8b6 100644
--- a/test/CodeGen/X86/i128-sdiv.ll
+++ b/test/CodeGen/X86/i128-sdiv.ll
@@ -3,21 +3,21 @@
 ; trigger correctly.
 
 define i128 @test1(i128 %x) {
-  ; CHECK: test1:
+  ; CHECK-LABEL: test1:
   ; CHECK-NOT: call
   %tmp = sdiv i128 %x, 73786976294838206464
   ret i128 %tmp
 }
 
 define i128 @test2(i128 %x) {
-  ; CHECK: test2:
+  ; CHECK-LABEL: test2:
   ; CHECK-NOT: call
   %tmp = sdiv i128 %x, -73786976294838206464
   ret i128 %tmp
 }
 
 define i128 @test3(i128 %x) {
-  ; CHECK: test3:
+  ; CHECK-LABEL: test3:
   ; CHECK: call
   %tmp = sdiv i128 %x, -73786976294838206467
   ret i128 %tmp
diff --git a/test/CodeGen/X86/i486-fence-loop.ll b/test/CodeGen/X86/i486-fence-loop.ll
new file mode 100644
index 000000000000..d8096197b0df
--- /dev/null
+++ b/test/CodeGen/X86/i486-fence-loop.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=x86 -mcpu=i486 -o - %s | FileCheck %s
+
+; Main test here was that ISelDAG could cope with a MachineNode in the chain
+; from the first load to the "X86ISD::SUB". Previously it thought that meant no
+; cycle could be formed so it tried to use "sub (%eax), [[RHS]]".
+
+define void @gst_atomic_queue_push(i32* %addr) {
+; CHECK-LABEL: gst_atomic_queue_push:
+; CHECK: movl (%eax), [[LHS:%e[a-z]+]]
+; CHECK: lock
+; CHECK-NEXT: orl
+; CHECK: movl (%eax), [[RHS:%e[a-z]+]]
+; CHECK: cmpl [[LHS]], [[RHS]]
+
+entry:
+  br label %while.body
+
+while.body:
+  %0 = load volatile i32* %addr, align 4
+  fence seq_cst
+  %1 = load volatile i32* %addr, align 4
+  %cmp = icmp sgt i32 %1, %0
+  br i1 %cmp, label %while.body, label %if.then
+
+if.then:
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
index 9196cce1ae5a..f47bd7b2defb 100644
--- a/test/CodeGen/X86/iabs.ll
+++ b/test/CodeGen/X86/iabs.ll
@@ -7,7 +7,7 @@
 ;;       ret
 ; rdar://10695237
 define i32 @test(i32 %a) nounwind {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: mov
 ; CHECK-NEXT: neg
 ; CHECK-NEXT: cmov
diff --git a/test/CodeGen/X86/ident-metadata.ll b/test/CodeGen/X86/ident-metadata.ll
new file mode 100644
index 000000000000..a5686730cee9
--- /dev/null
+++ b/test/CodeGen/X86/ident-metadata.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; Verify that llvm.ident metadata is emitted as .ident
+; directives in assembly files, and in the .comment section in ELF object files.
+
+; CHECK: .ident  "clang version x.x"
+; CHECK-NEXT: .ident  "something else"
+!llvm.ident = !{!0, !1}
+!0 = metadata !{metadata !"clang version x.x"}
+!1 = metadata !{metadata !"something else"}
diff --git a/test/CodeGen/X86/inline-asm-R-constraint.ll b/test/CodeGen/X86/inline-asm-R-constraint.ll
index 66c27ac87712..d17e04dd7949 100644
--- a/test/CodeGen/X86/inline-asm-R-constraint.ll
+++ b/test/CodeGen/X86/inline-asm-R-constraint.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10.0"
 
 define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp {
 entry:
-; CHECK: udiv8:
+; CHECK-LABEL: udiv8:
 ; CHECK-NOT: movb %ah, (%r8)
   %a_addr = alloca i16, align 2                   ; <i16*> [#uses=2]
   %b_addr = alloca i8, align 1                    ; <i8*> [#uses=2]
diff --git a/test/CodeGen/X86/inline-asm-error.ll b/test/CodeGen/X86/inline-asm-error.ll
index 747a5891cf04..31fb190daf83 100644
--- a/test/CodeGen/X86/inline-asm-error.ll
+++ b/test/CodeGen/X86/inline-asm-error.ll
@@ -6,7 +6,7 @@
 ; RUN: FileCheck %s < %t3
 
 ; The register allocator must fail on this function.
-; CHECK: error: ran out of registers during register allocation
+; CHECK: error: inline assembly requires more registers than available
 
 define void @f(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/inline-asm-flag-clobber.ll b/test/CodeGen/X86/inline-asm-flag-clobber.ll
index 51ea843712d1..45f4d2f38a46 100644
--- a/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -2,18 +2,31 @@
 ; PR3701
 
 define i64 @t(i64* %arg) nounwind {
-	br i1 true, label %1, label %5
+        br i1 true, label %1, label %5
 
-; <label>:1		; preds = %0
-	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
-	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; <label>:1             ; preds = %0
+        %2 = icmp eq i64* null, %arg            ; <i1> [#uses=1]
+        %3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind         ; <%struct.thread*> [#uses=0]
 ; CHECK: test
 ; CHECK-NEXT: j
-	br i1 %2, label %4, label %5
+        br i1 %2, label %4, label %5
 
-; <label>:4		; preds = %1
-	ret i64 1
+; <label>:4             ; preds = %1
+        ret i64 1
 
-; <label>:5		; preds = %1
-	ret i64 0
+; <label>:5             ; preds = %1
+        ret i64 0
 }
+
+; Make sure that we translate this to the bswap intrinsic which lowers down without the
+; inline assembly.
+; CHECK-NOT: #APP
+define i32 @s(i32 %argc, i8** nocapture %argv) unnamed_addr nounwind {
+entry:
+  %0 = trunc i32 %argc to i16
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{fpsr},~{flags},~{cc}"(i16 %0) nounwind, !srcloc !0
+  %1 = zext i16 %asmtmp to i32
+  ret i32 %1
+}
+
+!0 = metadata !{i64 935930}
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 2249618c8a9e..e83c065632dc 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -147,7 +147,7 @@ declare x86_fp80 @ceil(x86_fp80)
 ; PR4484
 ; test1 leaves a value on the stack that is needed after the asm.
 ; CHECK: testPR4484
-; CHECK: test1
+; CHECK: calll _test1
 ; CHECK-NOT: fstp
 ; Load %a from stack after ceil
 ; CHECK: fldt
diff --git a/test/CodeGen/X86/inreg.ll b/test/CodeGen/X86/inreg.ll
index 6653cfb14ed8..e4610e360257 100644
--- a/test/CodeGen/X86/inreg.ll
+++ b/test/CodeGen/X86/inreg.ll
@@ -8,7 +8,7 @@ entry:
   %tmp = alloca %struct.s1, align 4
   call void @f(%struct.s1* inreg sret %tmp, i32 inreg 41, i32 inreg 42, i32 43)
   ret void
-  ; DAG: g1:
+  ; DAG-LABEL: g1:
   ; DAG: subl $[[AMT:.*]], %esp
   ; DAG-NEXT: $43, (%esp)
   ; DAG-NEXT: leal    16(%esp), %eax
@@ -18,7 +18,7 @@ entry:
   ; DAG-NEXT: addl $[[AMT]], %esp
   ; DAG-NEXT: ret
 
-  ; FAST: g1:
+  ; FAST-LABEL: g1:
   ; FAST: subl $[[AMT:.*]], %esp
   ; FAST-NEXT: leal    8(%esp), %eax
   ; FAST-NEXT: movl    $41, %edx
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index 83674361a773..a74e3f20c41a 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=-bmi | FileCheck %s
 
 define fastcc i32 @t() nounwind  {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movzwl 0, %eax
 ; CHECK: orl $2, %eax
 ; CHECK: movw %ax, 0
diff --git a/test/CodeGen/X86/isel-optnone.ll b/test/CodeGen/X86/isel-optnone.ll
new file mode 100644
index 000000000000..d2f062832e0c
--- /dev/null
+++ b/test/CodeGen/X86/isel-optnone.ll
@@ -0,0 +1,42 @@
+; RUN: llc -O2 -march=x86 < %s | FileCheck %s
+
+define i32* @fooOptnone(i32* %p, i32* %q, i32** %z) #0 {
+entry:
+  %r = load i32* %p
+  %s = load i32* %q
+  %y = load i32** %z
+
+  %t0 = add i32 %r, %s
+  %t1 = add i32 %t0, 1
+  %t2 = getelementptr i32* %y, i32 1
+  %t3 = getelementptr i32* %t2, i32 %t1
+
+  ret i32* %t3
+
+; 'optnone' should use fast-isel which will not produce 'lea'.
+; CHECK-LABEL: fooOptnone:
+; CHECK-NOT:   lea
+; CHECK:       ret
+}
+
+define i32* @fooNormal(i32* %p, i32* %q, i32** %z) #1 {
+entry:
+  %r = load i32* %p
+  %s = load i32* %q
+  %y = load i32** %z
+
+  %t0 = add i32 %r, %s
+  %t1 = add i32 %t0, 1
+  %t2 = getelementptr i32* %y, i32 1
+  %t3 = getelementptr i32* %t2, i32 %t1
+
+  ret i32* %t3
+
+; Normal ISel will produce 'lea'.
+; CHECK-LABEL: fooNormal:
+; CHECK:       lea
+; CHECK:       ret
+}
+
+attributes #0 = { nounwind optnone noinline }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
index d2755331fe8d..458f19dfc4f7 100644
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @test(i32* %X, i32 %B) {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: ret
 ; CHECK-NOT: lea
 ; CHECK: mov{{.}} $4, ({{.*}},{{.*}},4)
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 0e34222b945f..d4174539f2f9 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=x86 -mcpu=pentiumpro -verify-machineinstrs | FileCheck %s
 
-define i32 @f(i32 %X) {
+define i32 @func_f(i32 %X) {
 entry:
-; CHECK: f:
+; CHECK-LABEL: func_f:
 ; CHECK: jns
 	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
 	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
@@ -23,9 +23,9 @@ declare i32 @baz(...)
 
 ; rdar://10633221
 ; rdar://11355268
-define i32 @g(i32 %a, i32 %b) nounwind {
+define i32 @func_g(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: g:
+; CHECK-LABEL: func_g:
 ; CHECK-NOT: test
 ; CHECK: cmovs
   %sub = sub nsw i32 %a, %b
@@ -35,9 +35,9 @@ entry:
 }
 
 ; rdar://10734411
-define i32 @h(i32 %a, i32 %b) nounwind {
+define i32 @func_h(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: h:
+; CHECK-LABEL: func_h:
 ; CHECK-NOT: cmp
 ; CHECK: cmov
 ; CHECK-NOT: movl
@@ -47,9 +47,9 @@ entry:
   %cond = select i1 %cmp, i32 %sub, i32 0
   ret i32 %cond
 }
-define i32 @i(i32 %a, i32 %b) nounwind {
+define i32 @func_i(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: i:
+; CHECK-LABEL: func_i:
 ; CHECK-NOT: cmp
 ; CHECK: cmov
 ; CHECK-NOT: movl
@@ -59,9 +59,9 @@ entry:
   %cond = select i1 %cmp, i32 %sub, i32 0
   ret i32 %cond
 }
-define i32 @j(i32 %a, i32 %b) nounwind {
+define i32 @func_j(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: j:
+; CHECK-LABEL: func_j:
 ; CHECK-NOT: cmp
 ; CHECK: cmov
 ; CHECK-NOT: movl
@@ -71,9 +71,9 @@ entry:
   %cond = select i1 %cmp, i32 %sub, i32 0
   ret i32 %cond
 }
-define i32 @k(i32 %a, i32 %b) nounwind {
+define i32 @func_k(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: k:
+; CHECK-LABEL: func_k:
 ; CHECK-NOT: cmp
 ; CHECK: cmov
 ; CHECK-NOT: movl
@@ -84,18 +84,18 @@ entry:
   ret i32 %cond
 }
 ; redundant cmp instruction
-define i32 @l(i32 %a, i32 %b) nounwind {
+define i32 @func_l(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: l:
+; CHECK-LABEL: func_l:
 ; CHECK-NOT: cmp
   %cmp = icmp slt i32 %b, %a
   %sub = sub nsw i32 %a, %b
   %cond = select i1 %cmp, i32 %sub, i32 %a
   ret i32 %cond
 }
-define i32 @m(i32 %a, i32 %b) nounwind {
+define i32 @func_m(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: m:
+; CHECK-LABEL: func_m:
 ; CHECK-NOT: cmp
   %cmp = icmp sgt i32 %a, %b
   %sub = sub nsw i32 %a, %b
@@ -104,9 +104,9 @@ entry:
 }
 ; If EFLAGS is live-out, we can't remove cmp if there exists
 ; a swapped sub.
-define i32 @l2(i32 %a, i32 %b) nounwind {
+define i32 @func_l2(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: l2:
+; CHECK-LABEL: func_l2:
 ; CHECK: cmp
   %cmp = icmp eq i32 %b, %a
   %sub = sub nsw i32 %a, %b
@@ -120,9 +120,9 @@ if.then:
 if.else:
   ret i32 %sub
 }
-define i32 @l3(i32 %a, i32 %b) nounwind {
+define i32 @func_l3(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: l3:
+; CHECK-LABEL: func_l3:
 ; CHECK: sub
 ; CHECK-NOT: cmp
 ; CHECK: jge
@@ -139,9 +139,9 @@ if.else:
 }
 ; rdar://11830760
 ; When Movr0 is between sub and cmp, we need to move "Movr0" before sub.
-define i32 @l4(i32 %a, i32 %b) nounwind {
+define i32 @func_l4(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: l4:
+; CHECK-LABEL: func_l4:
 ; CHECK: xor
 ; CHECK: sub
 ; CHECK-NOT: cmp
@@ -151,9 +151,9 @@ entry:
   ret i32 %.sub
 }
 ; rdar://11540023
-define i32 @n(i32 %x, i32 %y) nounwind {
+define i32 @func_n(i32 %x, i32 %y) nounwind {
 entry:
-; CHECK: n:
+; CHECK-LABEL: func_n:
 ; CHECK-NOT: sub
 ; CHECK: cmp
   %sub = sub nsw i32 %x, %y
@@ -162,7 +162,7 @@ entry:
   ret i32 %y.x
 }
 ; PR://13046
-define void @o() nounwind uwtable {
+define void @func_o() nounwind uwtable {
 entry:
   %0 = load i16* undef, align 2
   br i1 undef, label %if.then.i, label %if.end.i
@@ -177,7 +177,7 @@ sw.bb:                                            ; preds = %if.end.i
   br i1 undef, label %if.then44, label %if.end29
 
 if.end29:                                         ; preds = %sw.bb
-; CHECK: o:
+; CHECK-LABEL: func_o:
 ; CHECK: cmp
   %1 = urem i16 %0, 10
   %cmp25 = icmp eq i16 %1, 0
@@ -204,9 +204,9 @@ if.else.i104:                                     ; preds = %if.then44
   ret void
 }
 ; rdar://11855129
-define i32 @p(i32 %a, i32 %b) nounwind {
+define i32 @func_p(i32 %a, i32 %b) nounwind {
 entry:
-; CHECK: p:
+; CHECK-LABEL: func_p:
 ; CHECK-NOT: test
 ; CHECK: cmovs
   %add = add nsw i32 %b, %a
@@ -217,8 +217,8 @@ entry:
 ; PR13475
 ; If we have sub a, b and cmp b, a and the result of cmp is used
 ; by sbb, we should not optimize cmp away.
-define i32 @q(i32 %j.4, i32 %w, i32 %el) {
-; CHECK: q:
+define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
+; CHECK-LABEL: func_q:
 ; CHECK: cmp
 ; CHECK-NEXT: sbb
   %tmp532 = add i32 %j.4, %w
@@ -230,9 +230,9 @@ define i32 @q(i32 %j.4, i32 %w, i32 %el) {
   ret i32 %j.5
 }
 ; rdar://11873276
-define i8* @r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
 entry:
-; CHECK: r:
+; CHECK-LABEL: func_r:
 ; CHECK: sub
 ; CHECK-NOT: cmp
 ; CHECK: j
@@ -254,9 +254,9 @@ return:
 }
 
 ; Test optimizations of dec/inc.
-define i32 @dec(i32 %a) nounwind {
+define i32 @func_dec(i32 %a) nounwind {
 entry:
-; CHECK: dec:
+; CHECK-LABEL: func_dec:
 ; CHECK: decl
 ; CHECK-NOT: test
 ; CHECK: cmovsl
@@ -266,9 +266,9 @@ entry:
   ret i32 %cond
 }
 
-define i32 @inc(i32 %a) nounwind {
+define i32 @func_inc(i32 %a) nounwind {
 entry:
-; CHECK: inc:
+; CHECK-LABEL: func_inc:
 ; CHECK: incl
 ; CHECK-NOT: test
 ; CHECK: cmovsl
@@ -281,9 +281,9 @@ entry:
 ; PR13966
 @b = common global i32 0, align 4
 @a = common global i32 0, align 4
-define i32 @test1(i32 %p1) nounwind uwtable {
+define i32 @func_test1(i32 %p1) nounwind uwtable {
 entry:
-; CHECK: test1:
+; CHECK-LABEL: func_test1:
 ; CHECK: testb
 ; CHECK: j
 ; CHECK: ret
diff --git a/test/CodeGen/X86/large-gep-chain.ll b/test/CodeGen/X86/large-gep-chain.ll
new file mode 100644
index 000000000000..5cf4661f8ff9
--- /dev/null
+++ b/test/CodeGen/X86/large-gep-chain.ll
@@ -0,0 +1,25607 @@
+; RUN: llc < %s -O0 -march x86 -o /dev/null
+; <rdar://problem/12445434>
+
+%0 = type { i32, float* }
+
+@0 = external unnamed_addr constant [27 x i8], align 1
+@1 = external unnamed_addr constant [26 x i8], align 1
+@2 = external unnamed_addr constant [18 x i8], align 1
+@3 = external unnamed_addr constant [15 x i8], align 1
+@4 = external unnamed_addr constant [20 x i8], align 1
+@5 = external unnamed_addr constant [21 x i8], align 1
+@6 = external unnamed_addr constant [12 x i8], align 1
+@7 = external unnamed_addr constant [27 x i8], align 1
+@8 = external unnamed_addr constant [63 x i8], align 1
+
+define void @main() uwtable ssp {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  br label %bb25362
+
+bb2:                                              ; preds = %bb
+  %tmp = getelementptr inbounds float* null, i64 1
+  %tmp3 = getelementptr inbounds float* %tmp, i64 1
+  %tmp4 = getelementptr inbounds float* %tmp3, i64 1
+  %tmp5 = getelementptr inbounds float* %tmp4, i64 1
+  %tmp6 = getelementptr inbounds float* %tmp5, i64 1
+  %tmp7 = getelementptr inbounds float* %tmp6, i64 1
+  %tmp8 = getelementptr inbounds float* %tmp7, i64 1
+  %tmp9 = getelementptr inbounds float* %tmp8, i64 1
+  %tmp10 = getelementptr inbounds float* %tmp9, i64 1
+  %tmp11 = getelementptr inbounds float* %tmp10, i64 1
+  %tmp12 = getelementptr inbounds float* %tmp11, i64 1
+  %tmp13 = getelementptr inbounds float* %tmp12, i64 1
+  %tmp14 = getelementptr inbounds float* %tmp13, i64 1
+  %tmp15 = getelementptr inbounds float* %tmp14, i64 1
+  %tmp16 = getelementptr inbounds float* %tmp15, i64 1
+  %tmp17 = getelementptr inbounds float* %tmp16, i64 1
+  %tmp18 = getelementptr inbounds float* %tmp17, i64 1
+  %tmp19 = getelementptr inbounds float* %tmp18, i64 1
+  %tmp20 = getelementptr inbounds float* %tmp19, i64 1
+  %tmp21 = getelementptr inbounds float* %tmp20, i64 1
+  %tmp22 = getelementptr inbounds float* %tmp21, i64 1
+  %tmp23 = getelementptr inbounds float* %tmp22, i64 1
+  %tmp24 = getelementptr inbounds float* %tmp23, i64 1
+  %tmp25 = getelementptr inbounds float* %tmp24, i64 1
+  %tmp26 = getelementptr inbounds float* %tmp25, i64 1
+  %tmp27 = getelementptr inbounds float* %tmp26, i64 1
+  %tmp28 = getelementptr inbounds float* %tmp27, i64 1
+  %tmp29 = getelementptr inbounds float* %tmp28, i64 1
+  %tmp30 = getelementptr inbounds float* %tmp29, i64 1
+  %tmp31 = getelementptr inbounds float* %tmp30, i64 1
+  %tmp32 = getelementptr inbounds float* %tmp31, i64 1
+  %tmp33 = getelementptr inbounds float* %tmp32, i64 1
+  %tmp34 = getelementptr inbounds float* %tmp33, i64 1
+  %tmp35 = getelementptr inbounds float* %tmp34, i64 1
+  %tmp36 = getelementptr inbounds float* %tmp35, i64 1
+  %tmp37 = getelementptr inbounds float* %tmp36, i64 1
+  %tmp38 = getelementptr inbounds float* %tmp37, i64 1
+  %tmp39 = getelementptr inbounds float* %tmp38, i64 1
+  %tmp40 = getelementptr inbounds float* %tmp39, i64 1
+  %tmp41 = getelementptr inbounds float* %tmp40, i64 1
+  %tmp42 = getelementptr inbounds float* %tmp41, i64 1
+  %tmp43 = getelementptr inbounds float* %tmp42, i64 1
+  %tmp44 = getelementptr inbounds float* %tmp43, i64 1
+  %tmp45 = getelementptr inbounds float* %tmp44, i64 1
+  %tmp46 = getelementptr inbounds float* %tmp45, i64 1
+  %tmp47 = getelementptr inbounds float* %tmp46, i64 1
+  %tmp48 = getelementptr inbounds float* %tmp47, i64 1
+  %tmp49 = getelementptr inbounds float* %tmp48, i64 1
+  %tmp50 = getelementptr inbounds float* %tmp49, i64 1
+  %tmp51 = getelementptr inbounds float* %tmp50, i64 1
+  %tmp52 = getelementptr inbounds float* %tmp51, i64 1
+  %tmp53 = getelementptr inbounds float* %tmp52, i64 1
+  %tmp54 = getelementptr inbounds float* %tmp53, i64 1
+  %tmp55 = getelementptr inbounds float* %tmp54, i64 1
+  %tmp56 = getelementptr inbounds float* %tmp55, i64 1
+  %tmp57 = getelementptr inbounds float* %tmp56, i64 1
+  %tmp58 = getelementptr inbounds float* %tmp57, i64 1
+  %tmp59 = getelementptr inbounds float* %tmp58, i64 1
+  %tmp60 = getelementptr inbounds float* %tmp59, i64 1
+  %tmp61 = getelementptr inbounds float* %tmp60, i64 1
+  %tmp62 = getelementptr inbounds float* %tmp61, i64 1
+  %tmp63 = getelementptr inbounds float* %tmp62, i64 1
+  %tmp64 = getelementptr inbounds float* %tmp63, i64 1
+  %tmp65 = getelementptr inbounds float* %tmp64, i64 1
+  %tmp66 = getelementptr inbounds float* %tmp65, i64 1
+  %tmp67 = getelementptr inbounds float* %tmp66, i64 1
+  %tmp68 = getelementptr inbounds float* %tmp67, i64 1
+  %tmp69 = getelementptr inbounds float* %tmp68, i64 1
+  %tmp70 = getelementptr inbounds float* %tmp69, i64 1
+  %tmp71 = getelementptr inbounds float* %tmp70, i64 1
+  %tmp72 = getelementptr inbounds float* %tmp71, i64 1
+  %tmp73 = getelementptr inbounds float* %tmp72, i64 1
+  %tmp74 = getelementptr inbounds float* %tmp73, i64 1
+  %tmp75 = getelementptr inbounds float* %tmp74, i64 1
+  %tmp76 = getelementptr inbounds float* %tmp75, i64 1
+  %tmp77 = getelementptr inbounds float* %tmp76, i64 1
+  %tmp78 = getelementptr inbounds float* %tmp77, i64 1
+  %tmp79 = getelementptr inbounds float* %tmp78, i64 1
+  %tmp80 = getelementptr inbounds float* %tmp79, i64 1
+  %tmp81 = getelementptr inbounds float* %tmp80, i64 1
+  %tmp82 = getelementptr inbounds float* %tmp81, i64 1
+  %tmp83 = getelementptr inbounds float* %tmp82, i64 1
+  %tmp84 = getelementptr inbounds float* %tmp83, i64 1
+  %tmp85 = getelementptr inbounds float* %tmp84, i64 1
+  %tmp86 = getelementptr inbounds float* %tmp85, i64 1
+  %tmp87 = getelementptr inbounds float* %tmp86, i64 1
+  %tmp88 = getelementptr inbounds float* %tmp87, i64 1
+  %tmp89 = getelementptr inbounds float* %tmp88, i64 1
+  %tmp90 = getelementptr inbounds float* %tmp89, i64 1
+  %tmp91 = getelementptr inbounds float* %tmp90, i64 1
+  %tmp92 = getelementptr inbounds float* %tmp91, i64 1
+  %tmp93 = getelementptr inbounds float* %tmp92, i64 1
+  %tmp94 = getelementptr inbounds float* %tmp93, i64 1
+  %tmp95 = getelementptr inbounds float* %tmp94, i64 1
+  %tmp96 = getelementptr inbounds float* %tmp95, i64 1
+  %tmp97 = getelementptr inbounds float* %tmp96, i64 1
+  %tmp98 = getelementptr inbounds float* %tmp97, i64 1
+  %tmp99 = getelementptr inbounds float* %tmp98, i64 1
+  %tmp100 = getelementptr inbounds float* %tmp99, i64 1
+  %tmp101 = getelementptr inbounds float* %tmp100, i64 1
+  %tmp102 = getelementptr inbounds float* %tmp101, i64 1
+  %tmp103 = getelementptr inbounds float* %tmp102, i64 1
+  %tmp104 = getelementptr inbounds float* %tmp103, i64 1
+  %tmp105 = getelementptr inbounds float* %tmp104, i64 1
+  %tmp106 = getelementptr inbounds float* %tmp105, i64 1
+  %tmp107 = getelementptr inbounds float* %tmp106, i64 1
+  %tmp108 = getelementptr inbounds float* %tmp107, i64 1
+  %tmp109 = getelementptr inbounds float* %tmp108, i64 1
+  %tmp110 = getelementptr inbounds float* %tmp109, i64 1
+  %tmp111 = getelementptr inbounds float* %tmp110, i64 1
+  %tmp112 = getelementptr inbounds float* %tmp111, i64 1
+  %tmp113 = getelementptr inbounds float* %tmp112, i64 1
+  %tmp114 = getelementptr inbounds float* %tmp113, i64 1
+  %tmp115 = getelementptr inbounds float* %tmp114, i64 1
+  %tmp116 = getelementptr inbounds float* %tmp115, i64 1
+  %tmp117 = getelementptr inbounds float* %tmp116, i64 1
+  %tmp118 = getelementptr inbounds float* %tmp117, i64 1
+  %tmp119 = getelementptr inbounds float* %tmp118, i64 1
+  %tmp120 = getelementptr inbounds float* %tmp119, i64 1
+  %tmp121 = getelementptr inbounds float* %tmp120, i64 1
+  %tmp122 = getelementptr inbounds float* %tmp121, i64 1
+  %tmp123 = getelementptr inbounds float* %tmp122, i64 1
+  %tmp124 = getelementptr inbounds float* %tmp123, i64 1
+  %tmp125 = getelementptr inbounds float* %tmp124, i64 1
+  %tmp126 = getelementptr inbounds float* %tmp125, i64 1
+  %tmp127 = getelementptr inbounds float* %tmp126, i64 1
+  %tmp128 = getelementptr inbounds float* %tmp127, i64 1
+  %tmp129 = getelementptr inbounds float* %tmp128, i64 1
+  %tmp130 = getelementptr inbounds float* %tmp129, i64 1
+  %tmp131 = getelementptr inbounds float* %tmp130, i64 1
+  %tmp132 = getelementptr inbounds float* %tmp131, i64 1
+  %tmp133 = getelementptr inbounds float* %tmp132, i64 1
+  %tmp134 = getelementptr inbounds float* %tmp133, i64 1
+  %tmp135 = getelementptr inbounds float* %tmp134, i64 1
+  %tmp136 = getelementptr inbounds float* %tmp135, i64 1
+  %tmp137 = getelementptr inbounds float* %tmp136, i64 1
+  %tmp138 = getelementptr inbounds float* %tmp137, i64 1
+  %tmp139 = getelementptr inbounds float* %tmp138, i64 1
+  %tmp140 = getelementptr inbounds float* %tmp139, i64 1
+  %tmp141 = getelementptr inbounds float* %tmp140, i64 1
+  %tmp142 = getelementptr inbounds float* %tmp141, i64 1
+  %tmp143 = getelementptr inbounds float* %tmp142, i64 1
+  %tmp144 = getelementptr inbounds float* %tmp143, i64 1
+  %tmp145 = getelementptr inbounds float* %tmp144, i64 1
+  %tmp146 = getelementptr inbounds float* %tmp145, i64 1
+  %tmp147 = getelementptr inbounds float* %tmp146, i64 1
+  %tmp148 = getelementptr inbounds float* %tmp147, i64 1
+  %tmp149 = getelementptr inbounds float* %tmp148, i64 1
+  %tmp150 = getelementptr inbounds float* %tmp149, i64 1
+  %tmp151 = getelementptr inbounds float* %tmp150, i64 1
+  %tmp152 = getelementptr inbounds float* %tmp151, i64 1
+  %tmp153 = getelementptr inbounds float* %tmp152, i64 1
+  %tmp154 = getelementptr inbounds float* %tmp153, i64 1
+  %tmp155 = getelementptr inbounds float* %tmp154, i64 1
+  %tmp156 = getelementptr inbounds float* %tmp155, i64 1
+  %tmp157 = getelementptr inbounds float* %tmp156, i64 1
+  %tmp158 = getelementptr inbounds float* %tmp157, i64 1
+  %tmp159 = getelementptr inbounds float* %tmp158, i64 1
+  %tmp160 = getelementptr inbounds float* %tmp159, i64 1
+  %tmp161 = getelementptr inbounds float* %tmp160, i64 1
+  %tmp162 = getelementptr inbounds float* %tmp161, i64 1
+  %tmp163 = getelementptr inbounds float* %tmp162, i64 1
+  %tmp164 = getelementptr inbounds float* %tmp163, i64 1
+  %tmp165 = getelementptr inbounds float* %tmp164, i64 1
+  %tmp166 = getelementptr inbounds float* %tmp165, i64 1
+  %tmp167 = getelementptr inbounds float* %tmp166, i64 1
+  %tmp168 = getelementptr inbounds float* %tmp167, i64 1
+  %tmp169 = getelementptr inbounds float* %tmp168, i64 1
+  %tmp170 = getelementptr inbounds float* %tmp169, i64 1
+  %tmp171 = getelementptr inbounds float* %tmp170, i64 1
+  %tmp172 = getelementptr inbounds float* %tmp171, i64 1
+  %tmp173 = getelementptr inbounds float* %tmp172, i64 1
+  %tmp174 = getelementptr inbounds float* %tmp173, i64 1
+  %tmp175 = getelementptr inbounds float* %tmp174, i64 1
+  %tmp176 = getelementptr inbounds float* %tmp175, i64 1
+  %tmp177 = getelementptr inbounds float* %tmp176, i64 1
+  %tmp178 = getelementptr inbounds float* %tmp177, i64 1
+  %tmp179 = getelementptr inbounds float* %tmp178, i64 1
+  %tmp180 = getelementptr inbounds float* %tmp179, i64 1
+  %tmp181 = getelementptr inbounds float* %tmp180, i64 1
+  %tmp182 = getelementptr inbounds float* %tmp181, i64 1
+  %tmp183 = getelementptr inbounds float* %tmp182, i64 1
+  %tmp184 = getelementptr inbounds float* %tmp183, i64 1
+  %tmp185 = getelementptr inbounds float* %tmp184, i64 1
+  %tmp186 = getelementptr inbounds float* %tmp185, i64 1
+  %tmp187 = getelementptr inbounds float* %tmp186, i64 1
+  %tmp188 = getelementptr inbounds float* %tmp187, i64 1
+  %tmp189 = getelementptr inbounds float* %tmp188, i64 1
+  %tmp190 = getelementptr inbounds float* %tmp189, i64 1
+  %tmp191 = getelementptr inbounds float* %tmp190, i64 1
+  %tmp192 = getelementptr inbounds float* %tmp191, i64 1
+  %tmp193 = getelementptr inbounds float* %tmp192, i64 1
+  %tmp194 = getelementptr inbounds float* %tmp193, i64 1
+  %tmp195 = getelementptr inbounds float* %tmp194, i64 1
+  %tmp196 = getelementptr inbounds float* %tmp195, i64 1
+  %tmp197 = getelementptr inbounds float* %tmp196, i64 1
+  %tmp198 = getelementptr inbounds float* %tmp197, i64 1
+  %tmp199 = getelementptr inbounds float* %tmp198, i64 1
+  %tmp200 = getelementptr inbounds float* %tmp199, i64 1
+  %tmp201 = getelementptr inbounds float* %tmp200, i64 1
+  %tmp202 = getelementptr inbounds float* %tmp201, i64 1
+  %tmp203 = getelementptr inbounds float* %tmp202, i64 1
+  %tmp204 = getelementptr inbounds float* %tmp203, i64 1
+  %tmp205 = getelementptr inbounds float* %tmp204, i64 1
+  %tmp206 = getelementptr inbounds float* %tmp205, i64 1
+  %tmp207 = getelementptr inbounds float* %tmp206, i64 1
+  %tmp208 = getelementptr inbounds float* %tmp207, i64 1
+  %tmp209 = getelementptr inbounds float* %tmp208, i64 1
+  %tmp210 = getelementptr inbounds float* %tmp209, i64 1
+  %tmp211 = getelementptr inbounds float* %tmp210, i64 1
+  %tmp212 = getelementptr inbounds float* %tmp211, i64 1
+  %tmp213 = getelementptr inbounds float* %tmp212, i64 1
+  %tmp214 = getelementptr inbounds float* %tmp213, i64 1
+  %tmp215 = getelementptr inbounds float* %tmp214, i64 1
+  %tmp216 = getelementptr inbounds float* %tmp215, i64 1
+  %tmp217 = getelementptr inbounds float* %tmp216, i64 1
+  %tmp218 = getelementptr inbounds float* %tmp217, i64 1
+  %tmp219 = getelementptr inbounds float* %tmp218, i64 1
+  %tmp220 = getelementptr inbounds float* %tmp219, i64 1
+  %tmp221 = getelementptr inbounds float* %tmp220, i64 1
+  %tmp222 = getelementptr inbounds float* %tmp221, i64 1
+  %tmp223 = getelementptr inbounds float* %tmp222, i64 1
+  %tmp224 = getelementptr inbounds float* %tmp223, i64 1
+  %tmp225 = getelementptr inbounds float* %tmp224, i64 1
+  %tmp226 = getelementptr inbounds float* %tmp225, i64 1
+  %tmp227 = getelementptr inbounds float* %tmp226, i64 1
+  %tmp228 = getelementptr inbounds float* %tmp227, i64 1
+  %tmp229 = getelementptr inbounds float* %tmp228, i64 1
+  %tmp230 = getelementptr inbounds float* %tmp229, i64 1
+  %tmp231 = getelementptr inbounds float* %tmp230, i64 1
+  %tmp232 = getelementptr inbounds float* %tmp231, i64 1
+  %tmp233 = getelementptr inbounds float* %tmp232, i64 1
+  %tmp234 = getelementptr inbounds float* %tmp233, i64 1
+  %tmp235 = getelementptr inbounds float* %tmp234, i64 1
+  %tmp236 = getelementptr inbounds float* %tmp235, i64 1
+  %tmp237 = getelementptr inbounds float* %tmp236, i64 1
+  %tmp238 = getelementptr inbounds float* %tmp237, i64 1
+  %tmp239 = getelementptr inbounds float* %tmp238, i64 1
+  %tmp240 = getelementptr inbounds float* %tmp239, i64 1
+  %tmp241 = getelementptr inbounds float* %tmp240, i64 1
+  %tmp242 = getelementptr inbounds float* %tmp241, i64 1
+  %tmp243 = getelementptr inbounds float* %tmp242, i64 1
+  %tmp244 = getelementptr inbounds float* %tmp243, i64 1
+  %tmp245 = getelementptr inbounds float* %tmp244, i64 1
+  %tmp246 = getelementptr inbounds float* %tmp245, i64 1
+  %tmp247 = getelementptr inbounds float* %tmp246, i64 1
+  %tmp248 = getelementptr inbounds float* %tmp247, i64 1
+  %tmp249 = getelementptr inbounds float* %tmp248, i64 1
+  %tmp250 = getelementptr inbounds float* %tmp249, i64 1
+  %tmp251 = getelementptr inbounds float* %tmp250, i64 1
+  %tmp252 = getelementptr inbounds float* %tmp251, i64 1
+  %tmp253 = getelementptr inbounds float* %tmp252, i64 1
+  %tmp254 = getelementptr inbounds float* %tmp253, i64 1
+  %tmp255 = getelementptr inbounds float* %tmp254, i64 1
+  %tmp256 = getelementptr inbounds float* %tmp255, i64 1
+  %tmp257 = getelementptr inbounds float* %tmp256, i64 1
+  %tmp258 = getelementptr inbounds float* %tmp257, i64 1
+  %tmp259 = getelementptr inbounds float* %tmp258, i64 1
+  %tmp260 = getelementptr inbounds float* %tmp259, i64 1
+  %tmp261 = getelementptr inbounds float* %tmp260, i64 1
+  %tmp262 = getelementptr inbounds float* %tmp261, i64 1
+  %tmp263 = getelementptr inbounds float* %tmp262, i64 1
+  %tmp264 = getelementptr inbounds float* %tmp263, i64 1
+  %tmp265 = getelementptr inbounds float* %tmp264, i64 1
+  %tmp266 = getelementptr inbounds float* %tmp265, i64 1
+  %tmp267 = getelementptr inbounds float* %tmp266, i64 1
+  %tmp268 = getelementptr inbounds float* %tmp267, i64 1
+  %tmp269 = getelementptr inbounds float* %tmp268, i64 1
+  %tmp270 = getelementptr inbounds float* %tmp269, i64 1
+  %tmp271 = getelementptr inbounds float* %tmp270, i64 1
+  %tmp272 = getelementptr inbounds float* %tmp271, i64 1
+  %tmp273 = getelementptr inbounds float* %tmp272, i64 1
+  %tmp274 = getelementptr inbounds float* %tmp273, i64 1
+  %tmp275 = getelementptr inbounds float* %tmp274, i64 1
+  %tmp276 = getelementptr inbounds float* %tmp275, i64 1
+  %tmp277 = getelementptr inbounds float* %tmp276, i64 1
+  %tmp278 = getelementptr inbounds float* %tmp277, i64 1
+  %tmp279 = getelementptr inbounds float* %tmp278, i64 1
+  %tmp280 = getelementptr inbounds float* %tmp279, i64 1
+  %tmp281 = getelementptr inbounds float* %tmp280, i64 1
+  %tmp282 = getelementptr inbounds float* %tmp281, i64 1
+  %tmp283 = getelementptr inbounds float* %tmp282, i64 1
+  %tmp284 = getelementptr inbounds float* %tmp283, i64 1
+  %tmp285 = getelementptr inbounds float* %tmp284, i64 1
+  %tmp286 = getelementptr inbounds float* %tmp285, i64 1
+  %tmp287 = getelementptr inbounds float* %tmp286, i64 1
+  %tmp288 = getelementptr inbounds float* %tmp287, i64 1
+  %tmp289 = getelementptr inbounds float* %tmp288, i64 1
+  %tmp290 = getelementptr inbounds float* %tmp289, i64 1
+  %tmp291 = getelementptr inbounds float* %tmp290, i64 1
+  %tmp292 = getelementptr inbounds float* %tmp291, i64 1
+  %tmp293 = getelementptr inbounds float* %tmp292, i64 1
+  %tmp294 = getelementptr inbounds float* %tmp293, i64 1
+  %tmp295 = getelementptr inbounds float* %tmp294, i64 1
+  %tmp296 = getelementptr inbounds float* %tmp295, i64 1
+  %tmp297 = getelementptr inbounds float* %tmp296, i64 1
+  %tmp298 = getelementptr inbounds float* %tmp297, i64 1
+  %tmp299 = getelementptr inbounds float* %tmp298, i64 1
+  %tmp300 = getelementptr inbounds float* %tmp299, i64 1
+  %tmp301 = getelementptr inbounds float* %tmp300, i64 1
+  %tmp302 = getelementptr inbounds float* %tmp301, i64 1
+  %tmp303 = getelementptr inbounds float* %tmp302, i64 1
+  %tmp304 = getelementptr inbounds float* %tmp303, i64 1
+  %tmp305 = getelementptr inbounds float* %tmp304, i64 1
+  %tmp306 = getelementptr inbounds float* %tmp305, i64 1
+  %tmp307 = getelementptr inbounds float* %tmp306, i64 1
+  %tmp308 = getelementptr inbounds float* %tmp307, i64 1
+  %tmp309 = getelementptr inbounds float* %tmp308, i64 1
+  %tmp310 = getelementptr inbounds float* %tmp309, i64 1
+  %tmp311 = getelementptr inbounds float* %tmp310, i64 1
+  %tmp312 = getelementptr inbounds float* %tmp311, i64 1
+  %tmp313 = getelementptr inbounds float* %tmp312, i64 1
+  %tmp314 = getelementptr inbounds float* %tmp313, i64 1
+  %tmp315 = getelementptr inbounds float* %tmp314, i64 1
+  %tmp316 = getelementptr inbounds float* %tmp315, i64 1
+  %tmp317 = getelementptr inbounds float* %tmp316, i64 1
+  %tmp318 = getelementptr inbounds float* %tmp317, i64 1
+  %tmp319 = getelementptr inbounds float* %tmp318, i64 1
+  %tmp320 = getelementptr inbounds float* %tmp319, i64 1
+  %tmp321 = getelementptr inbounds float* %tmp320, i64 1
+  %tmp322 = getelementptr inbounds float* %tmp321, i64 1
+  %tmp323 = getelementptr inbounds float* %tmp322, i64 1
+  %tmp324 = getelementptr inbounds float* %tmp323, i64 1
+  %tmp325 = getelementptr inbounds float* %tmp324, i64 1
+  %tmp326 = getelementptr inbounds float* %tmp325, i64 1
+  %tmp327 = getelementptr inbounds float* %tmp326, i64 1
+  %tmp328 = getelementptr inbounds float* %tmp327, i64 1
+  %tmp329 = getelementptr inbounds float* %tmp328, i64 1
+  %tmp330 = getelementptr inbounds float* %tmp329, i64 1
+  %tmp331 = getelementptr inbounds float* %tmp330, i64 1
+  %tmp332 = getelementptr inbounds float* %tmp331, i64 1
+  %tmp333 = getelementptr inbounds float* %tmp332, i64 1
+  %tmp334 = getelementptr inbounds float* %tmp333, i64 1
+  %tmp335 = getelementptr inbounds float* %tmp334, i64 1
+  %tmp336 = getelementptr inbounds float* %tmp335, i64 1
+  %tmp337 = getelementptr inbounds float* %tmp336, i64 1
+  %tmp338 = getelementptr inbounds float* %tmp337, i64 1
+  %tmp339 = getelementptr inbounds float* %tmp338, i64 1
+  %tmp340 = getelementptr inbounds float* %tmp339, i64 1
+  %tmp341 = getelementptr inbounds float* %tmp340, i64 1
+  %tmp342 = getelementptr inbounds float* %tmp341, i64 1
+  %tmp343 = getelementptr inbounds float* %tmp342, i64 1
+  %tmp344 = getelementptr inbounds float* %tmp343, i64 1
+  %tmp345 = getelementptr inbounds float* %tmp344, i64 1
+  %tmp346 = getelementptr inbounds float* %tmp345, i64 1
+  %tmp347 = getelementptr inbounds float* %tmp346, i64 1
+  %tmp348 = getelementptr inbounds float* %tmp347, i64 1
+  %tmp349 = getelementptr inbounds float* %tmp348, i64 1
+  %tmp350 = getelementptr inbounds float* %tmp349, i64 1
+  %tmp351 = getelementptr inbounds float* %tmp350, i64 1
+  %tmp352 = getelementptr inbounds float* %tmp351, i64 1
+  %tmp353 = getelementptr inbounds float* %tmp352, i64 1
+  %tmp354 = getelementptr inbounds float* %tmp353, i64 1
+  %tmp355 = getelementptr inbounds float* %tmp354, i64 1
+  %tmp356 = getelementptr inbounds float* %tmp355, i64 1
+  %tmp357 = getelementptr inbounds float* %tmp356, i64 1
+  %tmp358 = getelementptr inbounds float* %tmp357, i64 1
+  %tmp359 = getelementptr inbounds float* %tmp358, i64 1
+  %tmp360 = getelementptr inbounds float* %tmp359, i64 1
+  %tmp361 = getelementptr inbounds float* %tmp360, i64 1
+  %tmp362 = getelementptr inbounds float* %tmp361, i64 1
+  %tmp363 = getelementptr inbounds float* %tmp362, i64 1
+  %tmp364 = getelementptr inbounds float* %tmp363, i64 1
+  %tmp365 = getelementptr inbounds float* %tmp364, i64 1
+  %tmp366 = getelementptr inbounds float* %tmp365, i64 1
+  %tmp367 = getelementptr inbounds float* %tmp366, i64 1
+  %tmp368 = getelementptr inbounds float* %tmp367, i64 1
+  %tmp369 = getelementptr inbounds float* %tmp368, i64 1
+  %tmp370 = getelementptr inbounds float* %tmp369, i64 1
+  %tmp371 = getelementptr inbounds float* %tmp370, i64 1
+  %tmp372 = getelementptr inbounds float* %tmp371, i64 1
+  %tmp373 = getelementptr inbounds float* %tmp372, i64 1
+  %tmp374 = getelementptr inbounds float* %tmp373, i64 1
+  %tmp375 = getelementptr inbounds float* %tmp374, i64 1
+  %tmp376 = getelementptr inbounds float* %tmp375, i64 1
+  %tmp377 = getelementptr inbounds float* %tmp376, i64 1
+  %tmp378 = getelementptr inbounds float* %tmp377, i64 1
+  %tmp379 = getelementptr inbounds float* %tmp378, i64 1
+  %tmp380 = getelementptr inbounds float* %tmp379, i64 1
+  %tmp381 = getelementptr inbounds float* %tmp380, i64 1
+  %tmp382 = getelementptr inbounds float* %tmp381, i64 1
+  %tmp383 = getelementptr inbounds float* %tmp382, i64 1
+  %tmp384 = getelementptr inbounds float* %tmp383, i64 1
+  %tmp385 = getelementptr inbounds float* %tmp384, i64 1
+  %tmp386 = getelementptr inbounds float* %tmp385, i64 1
+  %tmp387 = getelementptr inbounds float* %tmp386, i64 1
+  %tmp388 = getelementptr inbounds float* %tmp387, i64 1
+  %tmp389 = getelementptr inbounds float* %tmp388, i64 1
+  %tmp390 = getelementptr inbounds float* %tmp389, i64 1
+  %tmp391 = getelementptr inbounds float* %tmp390, i64 1
+  %tmp392 = getelementptr inbounds float* %tmp391, i64 1
+  %tmp393 = getelementptr inbounds float* %tmp392, i64 1
+  %tmp394 = getelementptr inbounds float* %tmp393, i64 1
+  %tmp395 = getelementptr inbounds float* %tmp394, i64 1
+  %tmp396 = getelementptr inbounds float* %tmp395, i64 1
+  %tmp397 = getelementptr inbounds float* %tmp396, i64 1
+  %tmp398 = getelementptr inbounds float* %tmp397, i64 1
+  %tmp399 = getelementptr inbounds float* %tmp398, i64 1
+  %tmp400 = getelementptr inbounds float* %tmp399, i64 1
+  %tmp401 = getelementptr inbounds float* %tmp400, i64 1
+  %tmp402 = getelementptr inbounds float* %tmp401, i64 1
+  %tmp403 = getelementptr inbounds float* %tmp402, i64 1
+  %tmp404 = getelementptr inbounds float* %tmp403, i64 1
+  %tmp405 = getelementptr inbounds float* %tmp404, i64 1
+  %tmp406 = getelementptr inbounds float* %tmp405, i64 1
+  %tmp407 = getelementptr inbounds float* %tmp406, i64 1
+  %tmp408 = getelementptr inbounds float* %tmp407, i64 1
+  %tmp409 = getelementptr inbounds float* %tmp408, i64 1
+  %tmp410 = getelementptr inbounds float* %tmp409, i64 1
+  %tmp411 = getelementptr inbounds float* %tmp410, i64 1
+  %tmp412 = getelementptr inbounds float* %tmp411, i64 1
+  %tmp413 = getelementptr inbounds float* %tmp412, i64 1
+  %tmp414 = getelementptr inbounds float* %tmp413, i64 1
+  %tmp415 = getelementptr inbounds float* %tmp414, i64 1
+  %tmp416 = getelementptr inbounds float* %tmp415, i64 1
+  %tmp417 = getelementptr inbounds float* %tmp416, i64 1
+  %tmp418 = getelementptr inbounds float* %tmp417, i64 1
+  %tmp419 = getelementptr inbounds float* %tmp418, i64 1
+  %tmp420 = getelementptr inbounds float* %tmp419, i64 1
+  %tmp421 = getelementptr inbounds float* %tmp420, i64 1
+  %tmp422 = getelementptr inbounds float* %tmp421, i64 1
+  %tmp423 = getelementptr inbounds float* %tmp422, i64 1
+  %tmp424 = getelementptr inbounds float* %tmp423, i64 1
+  %tmp425 = getelementptr inbounds float* %tmp424, i64 1
+  %tmp426 = getelementptr inbounds float* %tmp425, i64 1
+  %tmp427 = getelementptr inbounds float* %tmp426, i64 1
+  %tmp428 = getelementptr inbounds float* %tmp427, i64 1
+  %tmp429 = getelementptr inbounds float* %tmp428, i64 1
+  %tmp430 = getelementptr inbounds float* %tmp429, i64 1
+  %tmp431 = getelementptr inbounds float* %tmp430, i64 1
+  %tmp432 = getelementptr inbounds float* %tmp431, i64 1
+  %tmp433 = getelementptr inbounds float* %tmp432, i64 1
+  %tmp434 = getelementptr inbounds float* %tmp433, i64 1
+  %tmp435 = getelementptr inbounds float* %tmp434, i64 1
+  %tmp436 = getelementptr inbounds float* %tmp435, i64 1
+  %tmp437 = getelementptr inbounds float* %tmp436, i64 1
+  %tmp438 = getelementptr inbounds float* %tmp437, i64 1
+  %tmp439 = getelementptr inbounds float* %tmp438, i64 1
+  %tmp440 = getelementptr inbounds float* %tmp439, i64 1
+  %tmp441 = getelementptr inbounds float* %tmp440, i64 1
+  %tmp442 = getelementptr inbounds float* %tmp441, i64 1
+  %tmp443 = getelementptr inbounds float* %tmp442, i64 1
+  %tmp444 = getelementptr inbounds float* %tmp443, i64 1
+  %tmp445 = getelementptr inbounds float* %tmp444, i64 1
+  %tmp446 = getelementptr inbounds float* %tmp445, i64 1
+  %tmp447 = getelementptr inbounds float* %tmp446, i64 1
+  %tmp448 = getelementptr inbounds float* %tmp447, i64 1
+  %tmp449 = getelementptr inbounds float* %tmp448, i64 1
+  %tmp450 = getelementptr inbounds float* %tmp449, i64 1
+  %tmp451 = getelementptr inbounds float* %tmp450, i64 1
+  %tmp452 = getelementptr inbounds float* %tmp451, i64 1
+  %tmp453 = getelementptr inbounds float* %tmp452, i64 1
+  %tmp454 = getelementptr inbounds float* %tmp453, i64 1
+  %tmp455 = getelementptr inbounds float* %tmp454, i64 1
+  %tmp456 = getelementptr inbounds float* %tmp455, i64 1
+  %tmp457 = getelementptr inbounds float* %tmp456, i64 1
+  %tmp458 = getelementptr inbounds float* %tmp457, i64 1
+  %tmp459 = getelementptr inbounds float* %tmp458, i64 1
+  %tmp460 = getelementptr inbounds float* %tmp459, i64 1
+  %tmp461 = getelementptr inbounds float* %tmp460, i64 1
+  %tmp462 = getelementptr inbounds float* %tmp461, i64 1
+  %tmp463 = getelementptr inbounds float* %tmp462, i64 1
+  %tmp464 = getelementptr inbounds float* %tmp463, i64 1
+  %tmp465 = getelementptr inbounds float* %tmp464, i64 1
+  %tmp466 = getelementptr inbounds float* %tmp465, i64 1
+  %tmp467 = getelementptr inbounds float* %tmp466, i64 1
+  %tmp468 = getelementptr inbounds float* %tmp467, i64 1
+  %tmp469 = getelementptr inbounds float* %tmp468, i64 1
+  %tmp470 = getelementptr inbounds float* %tmp469, i64 1
+  %tmp471 = getelementptr inbounds float* %tmp470, i64 1
+  %tmp472 = getelementptr inbounds float* %tmp471, i64 1
+  %tmp473 = getelementptr inbounds float* %tmp472, i64 1
+  %tmp474 = getelementptr inbounds float* %tmp473, i64 1
+  %tmp475 = getelementptr inbounds float* %tmp474, i64 1
+  %tmp476 = getelementptr inbounds float* %tmp475, i64 1
+  %tmp477 = getelementptr inbounds float* %tmp476, i64 1
+  %tmp478 = getelementptr inbounds float* %tmp477, i64 1
+  %tmp479 = getelementptr inbounds float* %tmp478, i64 1
+  %tmp480 = getelementptr inbounds float* %tmp479, i64 1
+  %tmp481 = getelementptr inbounds float* %tmp480, i64 1
+  %tmp482 = getelementptr inbounds float* %tmp481, i64 1
+  %tmp483 = getelementptr inbounds float* %tmp482, i64 1
+  %tmp484 = getelementptr inbounds float* %tmp483, i64 1
+  %tmp485 = getelementptr inbounds float* %tmp484, i64 1
+  %tmp486 = getelementptr inbounds float* %tmp485, i64 1
+  %tmp487 = getelementptr inbounds float* %tmp486, i64 1
+  %tmp488 = getelementptr inbounds float* %tmp487, i64 1
+  %tmp489 = getelementptr inbounds float* %tmp488, i64 1
+  %tmp490 = getelementptr inbounds float* %tmp489, i64 1
+  %tmp491 = getelementptr inbounds float* %tmp490, i64 1
+  %tmp492 = getelementptr inbounds float* %tmp491, i64 1
+  %tmp493 = getelementptr inbounds float* %tmp492, i64 1
+  %tmp494 = getelementptr inbounds float* %tmp493, i64 1
+  %tmp495 = getelementptr inbounds float* %tmp494, i64 1
+  %tmp496 = getelementptr inbounds float* %tmp495, i64 1
+  %tmp497 = getelementptr inbounds float* %tmp496, i64 1
+  %tmp498 = getelementptr inbounds float* %tmp497, i64 1
+  %tmp499 = getelementptr inbounds float* %tmp498, i64 1
+  %tmp500 = getelementptr inbounds float* %tmp499, i64 1
+  %tmp501 = getelementptr inbounds float* %tmp500, i64 1
+  %tmp502 = getelementptr inbounds float* %tmp501, i64 1
+  %tmp503 = getelementptr inbounds float* %tmp502, i64 1
+  %tmp504 = getelementptr inbounds float* %tmp503, i64 1
+  %tmp505 = getelementptr inbounds float* %tmp504, i64 1
+  %tmp506 = getelementptr inbounds float* %tmp505, i64 1
+  %tmp507 = getelementptr inbounds float* %tmp506, i64 1
+  %tmp508 = getelementptr inbounds float* %tmp507, i64 1
+  %tmp509 = getelementptr inbounds float* %tmp508, i64 1
+  %tmp510 = getelementptr inbounds float* %tmp509, i64 1
+  %tmp511 = getelementptr inbounds float* %tmp510, i64 1
+  %tmp512 = getelementptr inbounds float* %tmp511, i64 1
+  %tmp513 = getelementptr inbounds float* %tmp512, i64 1
+  %tmp514 = getelementptr inbounds float* %tmp513, i64 1
+  %tmp515 = getelementptr inbounds float* %tmp514, i64 1
+  %tmp516 = getelementptr inbounds float* %tmp515, i64 1
+  %tmp517 = getelementptr inbounds float* %tmp516, i64 1
+  %tmp518 = getelementptr inbounds float* %tmp517, i64 1
+  %tmp519 = getelementptr inbounds float* %tmp518, i64 1
+  %tmp520 = getelementptr inbounds float* %tmp519, i64 1
+  %tmp521 = getelementptr inbounds float* %tmp520, i64 1
+  %tmp522 = getelementptr inbounds float* %tmp521, i64 1
+  %tmp523 = getelementptr inbounds float* %tmp522, i64 1
+  %tmp524 = getelementptr inbounds float* %tmp523, i64 1
+  %tmp525 = getelementptr inbounds float* %tmp524, i64 1
+  %tmp526 = getelementptr inbounds float* %tmp525, i64 1
+  %tmp527 = getelementptr inbounds float* %tmp526, i64 1
+  %tmp528 = getelementptr inbounds float* %tmp527, i64 1
+  %tmp529 = getelementptr inbounds float* %tmp528, i64 1
+  %tmp530 = getelementptr inbounds float* %tmp529, i64 1
+  %tmp531 = getelementptr inbounds float* %tmp530, i64 1
+  %tmp532 = getelementptr inbounds float* %tmp531, i64 1
+  %tmp533 = getelementptr inbounds float* %tmp532, i64 1
+  %tmp534 = getelementptr inbounds float* %tmp533, i64 1
+  %tmp535 = getelementptr inbounds float* %tmp534, i64 1
+  %tmp536 = getelementptr inbounds float* %tmp535, i64 1
+  %tmp537 = getelementptr inbounds float* %tmp536, i64 1
+  %tmp538 = getelementptr inbounds float* %tmp537, i64 1
+  %tmp539 = getelementptr inbounds float* %tmp538, i64 1
+  %tmp540 = getelementptr inbounds float* %tmp539, i64 1
+  %tmp541 = getelementptr inbounds float* %tmp540, i64 1
+  %tmp542 = getelementptr inbounds float* %tmp541, i64 1
+  %tmp543 = getelementptr inbounds float* %tmp542, i64 1
+  %tmp544 = getelementptr inbounds float* %tmp543, i64 1
+  %tmp545 = getelementptr inbounds float* %tmp544, i64 1
+  %tmp546 = getelementptr inbounds float* %tmp545, i64 1
+  %tmp547 = getelementptr inbounds float* %tmp546, i64 1
+  %tmp548 = getelementptr inbounds float* %tmp547, i64 1
+  %tmp549 = getelementptr inbounds float* %tmp548, i64 1
+  %tmp550 = getelementptr inbounds float* %tmp549, i64 1
+  %tmp551 = getelementptr inbounds float* %tmp550, i64 1
+  %tmp552 = getelementptr inbounds float* %tmp551, i64 1
+  %tmp553 = getelementptr inbounds float* %tmp552, i64 1
+  %tmp554 = getelementptr inbounds float* %tmp553, i64 1
+  %tmp555 = getelementptr inbounds float* %tmp554, i64 1
+  %tmp556 = getelementptr inbounds float* %tmp555, i64 1
+  %tmp557 = getelementptr inbounds float* %tmp556, i64 1
+  %tmp558 = getelementptr inbounds float* %tmp557, i64 1
+  %tmp559 = getelementptr inbounds float* %tmp558, i64 1
+  %tmp560 = getelementptr inbounds float* %tmp559, i64 1
+  %tmp561 = getelementptr inbounds float* %tmp560, i64 1
+  %tmp562 = getelementptr inbounds float* %tmp561, i64 1
+  %tmp563 = getelementptr inbounds float* %tmp562, i64 1
+  %tmp564 = getelementptr inbounds float* %tmp563, i64 1
+  %tmp565 = getelementptr inbounds float* %tmp564, i64 1
+  %tmp566 = getelementptr inbounds float* %tmp565, i64 1
+  %tmp567 = getelementptr inbounds float* %tmp566, i64 1
+  %tmp568 = getelementptr inbounds float* %tmp567, i64 1
+  %tmp569 = getelementptr inbounds float* %tmp568, i64 1
+  %tmp570 = getelementptr inbounds float* %tmp569, i64 1
+  %tmp571 = getelementptr inbounds float* %tmp570, i64 1
+  %tmp572 = getelementptr inbounds float* %tmp571, i64 1
+  %tmp573 = getelementptr inbounds float* %tmp572, i64 1
+  %tmp574 = getelementptr inbounds float* %tmp573, i64 1
+  %tmp575 = getelementptr inbounds float* %tmp574, i64 1
+  %tmp576 = getelementptr inbounds float* %tmp575, i64 1
+  %tmp577 = getelementptr inbounds float* %tmp576, i64 1
+  %tmp578 = getelementptr inbounds float* %tmp577, i64 1
+  %tmp579 = getelementptr inbounds float* %tmp578, i64 1
+  %tmp580 = getelementptr inbounds float* %tmp579, i64 1
+  %tmp581 = getelementptr inbounds float* %tmp580, i64 1
+  %tmp582 = getelementptr inbounds float* %tmp581, i64 1
+  %tmp583 = getelementptr inbounds float* %tmp582, i64 1
+  %tmp584 = getelementptr inbounds float* %tmp583, i64 1
+  %tmp585 = getelementptr inbounds float* %tmp584, i64 1
+  %tmp586 = getelementptr inbounds float* %tmp585, i64 1
+  %tmp587 = getelementptr inbounds float* %tmp586, i64 1
+  %tmp588 = getelementptr inbounds float* %tmp587, i64 1
+  %tmp589 = getelementptr inbounds float* %tmp588, i64 1
+  %tmp590 = getelementptr inbounds float* %tmp589, i64 1
+  %tmp591 = getelementptr inbounds float* %tmp590, i64 1
+  %tmp592 = getelementptr inbounds float* %tmp591, i64 1
+  %tmp593 = getelementptr inbounds float* %tmp592, i64 1
+  %tmp594 = getelementptr inbounds float* %tmp593, i64 1
+  %tmp595 = getelementptr inbounds float* %tmp594, i64 1
+  %tmp596 = getelementptr inbounds float* %tmp595, i64 1
+  %tmp597 = getelementptr inbounds float* %tmp596, i64 1
+  %tmp598 = getelementptr inbounds float* %tmp597, i64 1
+  %tmp599 = getelementptr inbounds float* %tmp598, i64 1
+  %tmp600 = getelementptr inbounds float* %tmp599, i64 1
+  %tmp601 = getelementptr inbounds float* %tmp600, i64 1
+  %tmp602 = getelementptr inbounds float* %tmp601, i64 1
+  %tmp603 = getelementptr inbounds float* %tmp602, i64 1
+  %tmp604 = getelementptr inbounds float* %tmp603, i64 1
+  %tmp605 = getelementptr inbounds float* %tmp604, i64 1
+  %tmp606 = getelementptr inbounds float* %tmp605, i64 1
+  %tmp607 = getelementptr inbounds float* %tmp606, i64 1
+  %tmp608 = getelementptr inbounds float* %tmp607, i64 1
+  %tmp609 = getelementptr inbounds float* %tmp608, i64 1
+  %tmp610 = getelementptr inbounds float* %tmp609, i64 1
+  %tmp611 = getelementptr inbounds float* %tmp610, i64 1
+  %tmp612 = getelementptr inbounds float* %tmp611, i64 1
+  %tmp613 = getelementptr inbounds float* %tmp612, i64 1
+  %tmp614 = getelementptr inbounds float* %tmp613, i64 1
+  %tmp615 = getelementptr inbounds float* %tmp614, i64 1
+  %tmp616 = getelementptr inbounds float* %tmp615, i64 1
+  %tmp617 = getelementptr inbounds float* %tmp616, i64 1
+  %tmp618 = getelementptr inbounds float* %tmp617, i64 1
+  %tmp619 = getelementptr inbounds float* %tmp618, i64 1
+  %tmp620 = getelementptr inbounds float* %tmp619, i64 1
+  %tmp621 = getelementptr inbounds float* %tmp620, i64 1
+  %tmp622 = getelementptr inbounds float* %tmp621, i64 1
+  %tmp623 = getelementptr inbounds float* %tmp622, i64 1
+  %tmp624 = getelementptr inbounds float* %tmp623, i64 1
+  %tmp625 = getelementptr inbounds float* %tmp624, i64 1
+  %tmp626 = getelementptr inbounds float* %tmp625, i64 1
+  %tmp627 = getelementptr inbounds float* %tmp626, i64 1
+  %tmp628 = getelementptr inbounds float* %tmp627, i64 1
+  %tmp629 = getelementptr inbounds float* %tmp628, i64 1
+  %tmp630 = getelementptr inbounds float* %tmp629, i64 1
+  %tmp631 = getelementptr inbounds float* %tmp630, i64 1
+  %tmp632 = getelementptr inbounds float* %tmp631, i64 1
+  %tmp633 = getelementptr inbounds float* %tmp632, i64 1
+  %tmp634 = getelementptr inbounds float* %tmp633, i64 1
+  %tmp635 = getelementptr inbounds float* %tmp634, i64 1
+  %tmp636 = getelementptr inbounds float* %tmp635, i64 1
+  %tmp637 = getelementptr inbounds float* %tmp636, i64 1
+  %tmp638 = getelementptr inbounds float* %tmp637, i64 1
+  %tmp639 = getelementptr inbounds float* %tmp638, i64 1
+  %tmp640 = getelementptr inbounds float* %tmp639, i64 1
+  %tmp641 = getelementptr inbounds float* %tmp640, i64 1
+  %tmp642 = getelementptr inbounds float* %tmp641, i64 1
+  %tmp643 = getelementptr inbounds float* %tmp642, i64 1
+  %tmp644 = getelementptr inbounds float* %tmp643, i64 1
+  %tmp645 = getelementptr inbounds float* %tmp644, i64 1
+  %tmp646 = getelementptr inbounds float* %tmp645, i64 1
+  %tmp647 = getelementptr inbounds float* %tmp646, i64 1
+  %tmp648 = getelementptr inbounds float* %tmp647, i64 1
+  %tmp649 = getelementptr inbounds float* %tmp648, i64 1
+  %tmp650 = getelementptr inbounds float* %tmp649, i64 1
+  %tmp651 = getelementptr inbounds float* %tmp650, i64 1
+  %tmp652 = getelementptr inbounds float* %tmp651, i64 1
+  %tmp653 = getelementptr inbounds float* %tmp652, i64 1
+  %tmp654 = getelementptr inbounds float* %tmp653, i64 1
+  %tmp655 = getelementptr inbounds float* %tmp654, i64 1
+  %tmp656 = getelementptr inbounds float* %tmp655, i64 1
+  %tmp657 = getelementptr inbounds float* %tmp656, i64 1
+  %tmp658 = getelementptr inbounds float* %tmp657, i64 1
+  %tmp659 = getelementptr inbounds float* %tmp658, i64 1
+  %tmp660 = getelementptr inbounds float* %tmp659, i64 1
+  %tmp661 = getelementptr inbounds float* %tmp660, i64 1
+  %tmp662 = getelementptr inbounds float* %tmp661, i64 1
+  %tmp663 = getelementptr inbounds float* %tmp662, i64 1
+  %tmp664 = getelementptr inbounds float* %tmp663, i64 1
+  %tmp665 = getelementptr inbounds float* %tmp664, i64 1
+  %tmp666 = getelementptr inbounds float* %tmp665, i64 1
+  %tmp667 = getelementptr inbounds float* %tmp666, i64 1
+  %tmp668 = getelementptr inbounds float* %tmp667, i64 1
+  %tmp669 = getelementptr inbounds float* %tmp668, i64 1
+  %tmp670 = getelementptr inbounds float* %tmp669, i64 1
+  %tmp671 = getelementptr inbounds float* %tmp670, i64 1
+  %tmp672 = getelementptr inbounds float* %tmp671, i64 1
+  %tmp673 = getelementptr inbounds float* %tmp672, i64 1
+  %tmp674 = getelementptr inbounds float* %tmp673, i64 1
+  %tmp675 = getelementptr inbounds float* %tmp674, i64 1
+  %tmp676 = getelementptr inbounds float* %tmp675, i64 1
+  %tmp677 = getelementptr inbounds float* %tmp676, i64 1
+  %tmp678 = getelementptr inbounds float* %tmp677, i64 1
+  %tmp679 = getelementptr inbounds float* %tmp678, i64 1
+  %tmp680 = getelementptr inbounds float* %tmp679, i64 1
+  %tmp681 = getelementptr inbounds float* %tmp680, i64 1
+  %tmp682 = getelementptr inbounds float* %tmp681, i64 1
+  %tmp683 = getelementptr inbounds float* %tmp682, i64 1
+  %tmp684 = getelementptr inbounds float* %tmp683, i64 1
+  %tmp685 = getelementptr inbounds float* %tmp684, i64 1
+  %tmp686 = getelementptr inbounds float* %tmp685, i64 1
+  %tmp687 = getelementptr inbounds float* %tmp686, i64 1
+  %tmp688 = getelementptr inbounds float* %tmp687, i64 1
+  %tmp689 = getelementptr inbounds float* %tmp688, i64 1
+  %tmp690 = getelementptr inbounds float* %tmp689, i64 1
+  %tmp691 = getelementptr inbounds float* %tmp690, i64 1
+  %tmp692 = getelementptr inbounds float* %tmp691, i64 1
+  %tmp693 = getelementptr inbounds float* %tmp692, i64 1
+  %tmp694 = getelementptr inbounds float* %tmp693, i64 1
+  %tmp695 = getelementptr inbounds float* %tmp694, i64 1
+  %tmp696 = getelementptr inbounds float* %tmp695, i64 1
+  %tmp697 = getelementptr inbounds float* %tmp696, i64 1
+  %tmp698 = getelementptr inbounds float* %tmp697, i64 1
+  %tmp699 = getelementptr inbounds float* %tmp698, i64 1
+  %tmp700 = getelementptr inbounds float* %tmp699, i64 1
+  %tmp701 = getelementptr inbounds float* %tmp700, i64 1
+  %tmp702 = getelementptr inbounds float* %tmp701, i64 1
+  %tmp703 = getelementptr inbounds float* %tmp702, i64 1
+  %tmp704 = getelementptr inbounds float* %tmp703, i64 1
+  %tmp705 = getelementptr inbounds float* %tmp704, i64 1
+  %tmp706 = getelementptr inbounds float* %tmp705, i64 1
+  %tmp707 = getelementptr inbounds float* %tmp706, i64 1
+  %tmp708 = getelementptr inbounds float* %tmp707, i64 1
+  %tmp709 = getelementptr inbounds float* %tmp708, i64 1
+  %tmp710 = getelementptr inbounds float* %tmp709, i64 1
+  %tmp711 = getelementptr inbounds float* %tmp710, i64 1
+  %tmp712 = getelementptr inbounds float* %tmp711, i64 1
+  %tmp713 = getelementptr inbounds float* %tmp712, i64 1
+  %tmp714 = getelementptr inbounds float* %tmp713, i64 1
+  %tmp715 = getelementptr inbounds float* %tmp714, i64 1
+  %tmp716 = getelementptr inbounds float* %tmp715, i64 1
+  %tmp717 = getelementptr inbounds float* %tmp716, i64 1
+  %tmp718 = getelementptr inbounds float* %tmp717, i64 1
+  %tmp719 = getelementptr inbounds float* %tmp718, i64 1
+  %tmp720 = getelementptr inbounds float* %tmp719, i64 1
+  %tmp721 = getelementptr inbounds float* %tmp720, i64 1
+  %tmp722 = getelementptr inbounds float* %tmp721, i64 1
+  %tmp723 = getelementptr inbounds float* %tmp722, i64 1
+  %tmp724 = getelementptr inbounds float* %tmp723, i64 1
+  %tmp725 = getelementptr inbounds float* %tmp724, i64 1
+  %tmp726 = getelementptr inbounds float* %tmp725, i64 1
+  %tmp727 = getelementptr inbounds float* %tmp726, i64 1
+  %tmp728 = getelementptr inbounds float* %tmp727, i64 1
+  %tmp729 = getelementptr inbounds float* %tmp728, i64 1
+  %tmp730 = getelementptr inbounds float* %tmp729, i64 1
+  %tmp731 = getelementptr inbounds float* %tmp730, i64 1
+  %tmp732 = getelementptr inbounds float* %tmp731, i64 1
+  %tmp733 = getelementptr inbounds float* %tmp732, i64 1
+  %tmp734 = getelementptr inbounds float* %tmp733, i64 1
+  %tmp735 = getelementptr inbounds float* %tmp734, i64 1
+  %tmp736 = getelementptr inbounds float* %tmp735, i64 1
+  %tmp737 = getelementptr inbounds float* %tmp736, i64 1
+  %tmp738 = getelementptr inbounds float* %tmp737, i64 1
+  %tmp739 = getelementptr inbounds float* %tmp738, i64 1
+  %tmp740 = getelementptr inbounds float* %tmp739, i64 1
+  %tmp741 = getelementptr inbounds float* %tmp740, i64 1
+  %tmp742 = getelementptr inbounds float* %tmp741, i64 1
+  %tmp743 = getelementptr inbounds float* %tmp742, i64 1
+  %tmp744 = getelementptr inbounds float* %tmp743, i64 1
+  %tmp745 = getelementptr inbounds float* %tmp744, i64 1
+  %tmp746 = getelementptr inbounds float* %tmp745, i64 1
+  %tmp747 = getelementptr inbounds float* %tmp746, i64 1
+  %tmp748 = getelementptr inbounds float* %tmp747, i64 1
+  %tmp749 = getelementptr inbounds float* %tmp748, i64 1
+  %tmp750 = getelementptr inbounds float* %tmp749, i64 1
+  %tmp751 = getelementptr inbounds float* %tmp750, i64 1
+  %tmp752 = getelementptr inbounds float* %tmp751, i64 1
+  %tmp753 = getelementptr inbounds float* %tmp752, i64 1
+  %tmp754 = getelementptr inbounds float* %tmp753, i64 1
+  %tmp755 = getelementptr inbounds float* %tmp754, i64 1
+  %tmp756 = getelementptr inbounds float* %tmp755, i64 1
+  %tmp757 = getelementptr inbounds float* %tmp756, i64 1
+  %tmp758 = getelementptr inbounds float* %tmp757, i64 1
+  %tmp759 = getelementptr inbounds float* %tmp758, i64 1
+  %tmp760 = getelementptr inbounds float* %tmp759, i64 1
+  %tmp761 = getelementptr inbounds float* %tmp760, i64 1
+  %tmp762 = getelementptr inbounds float* %tmp761, i64 1
+  %tmp763 = getelementptr inbounds float* %tmp762, i64 1
+  %tmp764 = getelementptr inbounds float* %tmp763, i64 1
+  %tmp765 = getelementptr inbounds float* %tmp764, i64 1
+  %tmp766 = getelementptr inbounds float* %tmp765, i64 1
+  %tmp767 = getelementptr inbounds float* %tmp766, i64 1
+  %tmp768 = getelementptr inbounds float* %tmp767, i64 1
+  %tmp769 = getelementptr inbounds float* %tmp768, i64 1
+  %tmp770 = getelementptr inbounds float* %tmp769, i64 1
+  %tmp771 = getelementptr inbounds float* %tmp770, i64 1
+  %tmp772 = getelementptr inbounds float* %tmp771, i64 1
+  %tmp773 = getelementptr inbounds float* %tmp772, i64 1
+  %tmp774 = getelementptr inbounds float* %tmp773, i64 1
+  %tmp775 = getelementptr inbounds float* %tmp774, i64 1
+  %tmp776 = getelementptr inbounds float* %tmp775, i64 1
+  %tmp777 = getelementptr inbounds float* %tmp776, i64 1
+  %tmp778 = getelementptr inbounds float* %tmp777, i64 1
+  %tmp779 = getelementptr inbounds float* %tmp778, i64 1
+  %tmp780 = getelementptr inbounds float* %tmp779, i64 1
+  %tmp781 = getelementptr inbounds float* %tmp780, i64 1
+  %tmp782 = getelementptr inbounds float* %tmp781, i64 1
+  %tmp783 = getelementptr inbounds float* %tmp782, i64 1
+  %tmp784 = getelementptr inbounds float* %tmp783, i64 1
+  %tmp785 = getelementptr inbounds float* %tmp784, i64 1
+  %tmp786 = getelementptr inbounds float* %tmp785, i64 1
+  %tmp787 = getelementptr inbounds float* %tmp786, i64 1
+  %tmp788 = getelementptr inbounds float* %tmp787, i64 1
+  %tmp789 = getelementptr inbounds float* %tmp788, i64 1
+  %tmp790 = getelementptr inbounds float* %tmp789, i64 1
+  %tmp791 = getelementptr inbounds float* %tmp790, i64 1
+  %tmp792 = getelementptr inbounds float* %tmp791, i64 1
+  %tmp793 = getelementptr inbounds float* %tmp792, i64 1
+  %tmp794 = getelementptr inbounds float* %tmp793, i64 1
+  %tmp795 = getelementptr inbounds float* %tmp794, i64 1
+  %tmp796 = getelementptr inbounds float* %tmp795, i64 1
+  %tmp797 = getelementptr inbounds float* %tmp796, i64 1
+  %tmp798 = getelementptr inbounds float* %tmp797, i64 1
+  %tmp799 = getelementptr inbounds float* %tmp798, i64 1
+  %tmp800 = getelementptr inbounds float* %tmp799, i64 1
+  %tmp801 = getelementptr inbounds float* %tmp800, i64 1
+  %tmp802 = getelementptr inbounds float* %tmp801, i64 1
+  %tmp803 = getelementptr inbounds float* %tmp802, i64 1
+  %tmp804 = getelementptr inbounds float* %tmp803, i64 1
+  %tmp805 = getelementptr inbounds float* %tmp804, i64 1
+  %tmp806 = getelementptr inbounds float* %tmp805, i64 1
+  %tmp807 = getelementptr inbounds float* %tmp806, i64 1
+  %tmp808 = getelementptr inbounds float* %tmp807, i64 1
+  %tmp809 = getelementptr inbounds float* %tmp808, i64 1
+  %tmp810 = getelementptr inbounds float* %tmp809, i64 1
+  %tmp811 = getelementptr inbounds float* %tmp810, i64 1
+  %tmp812 = getelementptr inbounds float* %tmp811, i64 1
+  %tmp813 = getelementptr inbounds float* %tmp812, i64 1
+  %tmp814 = getelementptr inbounds float* %tmp813, i64 1
+  %tmp815 = getelementptr inbounds float* %tmp814, i64 1
+  %tmp816 = getelementptr inbounds float* %tmp815, i64 1
+  %tmp817 = getelementptr inbounds float* %tmp816, i64 1
+  %tmp818 = getelementptr inbounds float* %tmp817, i64 1
+  %tmp819 = getelementptr inbounds float* %tmp818, i64 1
+  %tmp820 = getelementptr inbounds float* %tmp819, i64 1
+  %tmp821 = getelementptr inbounds float* %tmp820, i64 1
+  %tmp822 = getelementptr inbounds float* %tmp821, i64 1
+  %tmp823 = getelementptr inbounds float* %tmp822, i64 1
+  %tmp824 = getelementptr inbounds float* %tmp823, i64 1
+  %tmp825 = getelementptr inbounds float* %tmp824, i64 1
+  %tmp826 = getelementptr inbounds float* %tmp825, i64 1
+  %tmp827 = getelementptr inbounds float* %tmp826, i64 1
+  %tmp828 = getelementptr inbounds float* %tmp827, i64 1
+  %tmp829 = getelementptr inbounds float* %tmp828, i64 1
+  %tmp830 = getelementptr inbounds float* %tmp829, i64 1
+  %tmp831 = getelementptr inbounds float* %tmp830, i64 1
+  %tmp832 = getelementptr inbounds float* %tmp831, i64 1
+  %tmp833 = getelementptr inbounds float* %tmp832, i64 1
+  %tmp834 = getelementptr inbounds float* %tmp833, i64 1
+  %tmp835 = getelementptr inbounds float* %tmp834, i64 1
+  %tmp836 = getelementptr inbounds float* %tmp835, i64 1
+  %tmp837 = getelementptr inbounds float* %tmp836, i64 1
+  %tmp838 = getelementptr inbounds float* %tmp837, i64 1
+  %tmp839 = getelementptr inbounds float* %tmp838, i64 1
+  %tmp840 = getelementptr inbounds float* %tmp839, i64 1
+  %tmp841 = getelementptr inbounds float* %tmp840, i64 1
+  %tmp842 = getelementptr inbounds float* %tmp841, i64 1
+  %tmp843 = getelementptr inbounds float* %tmp842, i64 1
+  %tmp844 = getelementptr inbounds float* %tmp843, i64 1
+  %tmp845 = getelementptr inbounds float* %tmp844, i64 1
+  %tmp846 = getelementptr inbounds float* %tmp845, i64 1
+  %tmp847 = getelementptr inbounds float* %tmp846, i64 1
+  %tmp848 = getelementptr inbounds float* %tmp847, i64 1
+  %tmp849 = getelementptr inbounds float* %tmp848, i64 1
+  %tmp850 = getelementptr inbounds float* %tmp849, i64 1
+  %tmp851 = getelementptr inbounds float* %tmp850, i64 1
+  %tmp852 = getelementptr inbounds float* %tmp851, i64 1
+  %tmp853 = getelementptr inbounds float* %tmp852, i64 1
+  %tmp854 = getelementptr inbounds float* %tmp853, i64 1
+  %tmp855 = getelementptr inbounds float* %tmp854, i64 1
+  %tmp856 = getelementptr inbounds float* %tmp855, i64 1
+  %tmp857 = getelementptr inbounds float* %tmp856, i64 1
+  %tmp858 = getelementptr inbounds float* %tmp857, i64 1
+  %tmp859 = getelementptr inbounds float* %tmp858, i64 1
+  %tmp860 = getelementptr inbounds float* %tmp859, i64 1
+  %tmp861 = getelementptr inbounds float* %tmp860, i64 1
+  %tmp862 = getelementptr inbounds float* %tmp861, i64 1
+  %tmp863 = getelementptr inbounds float* %tmp862, i64 1
+  %tmp864 = getelementptr inbounds float* %tmp863, i64 1
+  %tmp865 = getelementptr inbounds float* %tmp864, i64 1
+  %tmp866 = getelementptr inbounds float* %tmp865, i64 1
+  %tmp867 = getelementptr inbounds float* %tmp866, i64 1
+  %tmp868 = getelementptr inbounds float* %tmp867, i64 1
+  %tmp869 = getelementptr inbounds float* %tmp868, i64 1
+  %tmp870 = getelementptr inbounds float* %tmp869, i64 1
+  %tmp871 = getelementptr inbounds float* %tmp870, i64 1
+  %tmp872 = getelementptr inbounds float* %tmp871, i64 1
+  %tmp873 = getelementptr inbounds float* %tmp872, i64 1
+  %tmp874 = getelementptr inbounds float* %tmp873, i64 1
+  %tmp875 = getelementptr inbounds float* %tmp874, i64 1
+  %tmp876 = getelementptr inbounds float* %tmp875, i64 1
+  %tmp877 = getelementptr inbounds float* %tmp876, i64 1
+  %tmp878 = getelementptr inbounds float* %tmp877, i64 1
+  %tmp879 = getelementptr inbounds float* %tmp878, i64 1
+  %tmp880 = getelementptr inbounds float* %tmp879, i64 1
+  %tmp881 = getelementptr inbounds float* %tmp880, i64 1
+  %tmp882 = getelementptr inbounds float* %tmp881, i64 1
+  %tmp883 = getelementptr inbounds float* %tmp882, i64 1
+  %tmp884 = getelementptr inbounds float* %tmp883, i64 1
+  %tmp885 = getelementptr inbounds float* %tmp884, i64 1
+  %tmp886 = getelementptr inbounds float* %tmp885, i64 1
+  %tmp887 = getelementptr inbounds float* %tmp886, i64 1
+  %tmp888 = getelementptr inbounds float* %tmp887, i64 1
+  %tmp889 = getelementptr inbounds float* %tmp888, i64 1
+  %tmp890 = getelementptr inbounds float* %tmp889, i64 1
+  %tmp891 = getelementptr inbounds float* %tmp890, i64 1
+  %tmp892 = getelementptr inbounds float* %tmp891, i64 1
+  %tmp893 = getelementptr inbounds float* %tmp892, i64 1
+  %tmp894 = getelementptr inbounds float* %tmp893, i64 1
+  %tmp895 = getelementptr inbounds float* %tmp894, i64 1
+  %tmp896 = getelementptr inbounds float* %tmp895, i64 1
+  %tmp897 = getelementptr inbounds float* %tmp896, i64 1
+  %tmp898 = getelementptr inbounds float* %tmp897, i64 1
+  %tmp899 = getelementptr inbounds float* %tmp898, i64 1
+  %tmp900 = getelementptr inbounds float* %tmp899, i64 1
+  %tmp901 = getelementptr inbounds float* %tmp900, i64 1
+  %tmp902 = getelementptr inbounds float* %tmp901, i64 1
+  %tmp903 = getelementptr inbounds float* %tmp902, i64 1
+  %tmp904 = getelementptr inbounds float* %tmp903, i64 1
+  %tmp905 = getelementptr inbounds float* %tmp904, i64 1
+  %tmp906 = getelementptr inbounds float* %tmp905, i64 1
+  %tmp907 = getelementptr inbounds float* %tmp906, i64 1
+  %tmp908 = getelementptr inbounds float* %tmp907, i64 1
+  %tmp909 = getelementptr inbounds float* %tmp908, i64 1
+  %tmp910 = getelementptr inbounds float* %tmp909, i64 1
+  %tmp911 = getelementptr inbounds float* %tmp910, i64 1
+  %tmp912 = getelementptr inbounds float* %tmp911, i64 1
+  %tmp913 = getelementptr inbounds float* %tmp912, i64 1
+  %tmp914 = getelementptr inbounds float* %tmp913, i64 1
+  %tmp915 = getelementptr inbounds float* %tmp914, i64 1
+  %tmp916 = getelementptr inbounds float* %tmp915, i64 1
+  %tmp917 = getelementptr inbounds float* %tmp916, i64 1
+  %tmp918 = getelementptr inbounds float* %tmp917, i64 1
+  %tmp919 = getelementptr inbounds float* %tmp918, i64 1
+  %tmp920 = getelementptr inbounds float* %tmp919, i64 1
+  %tmp921 = getelementptr inbounds float* %tmp920, i64 1
+  %tmp922 = getelementptr inbounds float* %tmp921, i64 1
+  %tmp923 = getelementptr inbounds float* %tmp922, i64 1
+  %tmp924 = getelementptr inbounds float* %tmp923, i64 1
+  %tmp925 = getelementptr inbounds float* %tmp924, i64 1
+  %tmp926 = getelementptr inbounds float* %tmp925, i64 1
+  %tmp927 = getelementptr inbounds float* %tmp926, i64 1
+  %tmp928 = getelementptr inbounds float* %tmp927, i64 1
+  %tmp929 = getelementptr inbounds float* %tmp928, i64 1
+  %tmp930 = getelementptr inbounds float* %tmp929, i64 1
+  %tmp931 = getelementptr inbounds float* %tmp930, i64 1
+  %tmp932 = getelementptr inbounds float* %tmp931, i64 1
+  %tmp933 = getelementptr inbounds float* %tmp932, i64 1
+  %tmp934 = getelementptr inbounds float* %tmp933, i64 1
+  %tmp935 = getelementptr inbounds float* %tmp934, i64 1
+  %tmp936 = getelementptr inbounds float* %tmp935, i64 1
+  %tmp937 = getelementptr inbounds float* %tmp936, i64 1
+  %tmp938 = getelementptr inbounds float* %tmp937, i64 1
+  %tmp939 = getelementptr inbounds float* %tmp938, i64 1
+  %tmp940 = getelementptr inbounds float* %tmp939, i64 1
+  %tmp941 = getelementptr inbounds float* %tmp940, i64 1
+  %tmp942 = getelementptr inbounds float* %tmp941, i64 1
+  %tmp943 = getelementptr inbounds float* %tmp942, i64 1
+  %tmp944 = getelementptr inbounds float* %tmp943, i64 1
+  %tmp945 = getelementptr inbounds float* %tmp944, i64 1
+  %tmp946 = getelementptr inbounds float* %tmp945, i64 1
+  %tmp947 = getelementptr inbounds float* %tmp946, i64 1
+  %tmp948 = getelementptr inbounds float* %tmp947, i64 1
+  %tmp949 = getelementptr inbounds float* %tmp948, i64 1
+  %tmp950 = getelementptr inbounds float* %tmp949, i64 1
+  %tmp951 = getelementptr inbounds float* %tmp950, i64 1
+  %tmp952 = getelementptr inbounds float* %tmp951, i64 1
+  %tmp953 = getelementptr inbounds float* %tmp952, i64 1
+  %tmp954 = getelementptr inbounds float* %tmp953, i64 1
+  %tmp955 = getelementptr inbounds float* %tmp954, i64 1
+  %tmp956 = getelementptr inbounds float* %tmp955, i64 1
+  %tmp957 = getelementptr inbounds float* %tmp956, i64 1
+  %tmp958 = getelementptr inbounds float* %tmp957, i64 1
+  %tmp959 = getelementptr inbounds float* %tmp958, i64 1
+  %tmp960 = getelementptr inbounds float* %tmp959, i64 1
+  %tmp961 = getelementptr inbounds float* %tmp960, i64 1
+  %tmp962 = getelementptr inbounds float* %tmp961, i64 1
+  %tmp963 = getelementptr inbounds float* %tmp962, i64 1
+  %tmp964 = getelementptr inbounds float* %tmp963, i64 1
+  %tmp965 = getelementptr inbounds float* %tmp964, i64 1
+  %tmp966 = getelementptr inbounds float* %tmp965, i64 1
+  %tmp967 = getelementptr inbounds float* %tmp966, i64 1
+  %tmp968 = getelementptr inbounds float* %tmp967, i64 1
+  %tmp969 = getelementptr inbounds float* %tmp968, i64 1
+  %tmp970 = getelementptr inbounds float* %tmp969, i64 1
+  %tmp971 = getelementptr inbounds float* %tmp970, i64 1
+  %tmp972 = getelementptr inbounds float* %tmp971, i64 1
+  %tmp973 = getelementptr inbounds float* %tmp972, i64 1
+  %tmp974 = getelementptr inbounds float* %tmp973, i64 1
+  %tmp975 = getelementptr inbounds float* %tmp974, i64 1
+  %tmp976 = getelementptr inbounds float* %tmp975, i64 1
+  %tmp977 = getelementptr inbounds float* %tmp976, i64 1
+  %tmp978 = getelementptr inbounds float* %tmp977, i64 1
+  %tmp979 = getelementptr inbounds float* %tmp978, i64 1
+  %tmp980 = getelementptr inbounds float* %tmp979, i64 1
+  %tmp981 = getelementptr inbounds float* %tmp980, i64 1
+  %tmp982 = getelementptr inbounds float* %tmp981, i64 1
+  %tmp983 = getelementptr inbounds float* %tmp982, i64 1
+  %tmp984 = getelementptr inbounds float* %tmp983, i64 1
+  %tmp985 = getelementptr inbounds float* %tmp984, i64 1
+  %tmp986 = getelementptr inbounds float* %tmp985, i64 1
+  %tmp987 = getelementptr inbounds float* %tmp986, i64 1
+  %tmp988 = getelementptr inbounds float* %tmp987, i64 1
+  %tmp989 = getelementptr inbounds float* %tmp988, i64 1
+  %tmp990 = getelementptr inbounds float* %tmp989, i64 1
+  %tmp991 = getelementptr inbounds float* %tmp990, i64 1
+  %tmp992 = getelementptr inbounds float* %tmp991, i64 1
+  %tmp993 = getelementptr inbounds float* %tmp992, i64 1
+  %tmp994 = getelementptr inbounds float* %tmp993, i64 1
+  %tmp995 = getelementptr inbounds float* %tmp994, i64 1
+  %tmp996 = getelementptr inbounds float* %tmp995, i64 1
+  %tmp997 = getelementptr inbounds float* %tmp996, i64 1
+  %tmp998 = getelementptr inbounds float* %tmp997, i64 1
+  %tmp999 = getelementptr inbounds float* %tmp998, i64 1
+  %tmp1000 = getelementptr inbounds float* %tmp999, i64 1
+  %tmp1001 = getelementptr inbounds float* %tmp1000, i64 1
+  %tmp1002 = getelementptr inbounds float* %tmp1001, i64 1
+  %tmp1003 = getelementptr inbounds float* %tmp1002, i64 1
+  %tmp1004 = getelementptr inbounds float* %tmp1003, i64 1
+  %tmp1005 = getelementptr inbounds float* %tmp1004, i64 1
+  %tmp1006 = getelementptr inbounds float* %tmp1005, i64 1
+  %tmp1007 = getelementptr inbounds float* %tmp1006, i64 1
+  %tmp1008 = getelementptr inbounds float* %tmp1007, i64 1
+  %tmp1009 = getelementptr inbounds float* %tmp1008, i64 1
+  %tmp1010 = getelementptr inbounds float* %tmp1009, i64 1
+  %tmp1011 = getelementptr inbounds float* %tmp1010, i64 1
+  %tmp1012 = getelementptr inbounds float* %tmp1011, i64 1
+  %tmp1013 = getelementptr inbounds float* %tmp1012, i64 1
+  %tmp1014 = getelementptr inbounds float* %tmp1013, i64 1
+  %tmp1015 = getelementptr inbounds float* %tmp1014, i64 1
+  %tmp1016 = getelementptr inbounds float* %tmp1015, i64 1
+  %tmp1017 = getelementptr inbounds float* %tmp1016, i64 1
+  %tmp1018 = getelementptr inbounds float* %tmp1017, i64 1
+  %tmp1019 = getelementptr inbounds float* %tmp1018, i64 1
+  %tmp1020 = getelementptr inbounds float* %tmp1019, i64 1
+  %tmp1021 = getelementptr inbounds float* %tmp1020, i64 1
+  %tmp1022 = getelementptr inbounds float* %tmp1021, i64 1
+  %tmp1023 = getelementptr inbounds float* %tmp1022, i64 1
+  %tmp1024 = getelementptr inbounds float* %tmp1023, i64 1
+  %tmp1025 = getelementptr inbounds float* %tmp1024, i64 1
+  %tmp1026 = getelementptr inbounds float* %tmp1025, i64 1
+  %tmp1027 = getelementptr inbounds float* %tmp1026, i64 1
+  %tmp1028 = getelementptr inbounds float* %tmp1027, i64 1
+  %tmp1029 = getelementptr inbounds float* %tmp1028, i64 1
+  %tmp1030 = getelementptr inbounds float* %tmp1029, i64 1
+  %tmp1031 = getelementptr inbounds float* %tmp1030, i64 1
+  %tmp1032 = getelementptr inbounds float* %tmp1031, i64 1
+  %tmp1033 = getelementptr inbounds float* %tmp1032, i64 1
+  %tmp1034 = getelementptr inbounds float* %tmp1033, i64 1
+  %tmp1035 = getelementptr inbounds float* %tmp1034, i64 1
+  %tmp1036 = getelementptr inbounds float* %tmp1035, i64 1
+  %tmp1037 = getelementptr inbounds float* %tmp1036, i64 1
+  %tmp1038 = getelementptr inbounds float* %tmp1037, i64 1
+  %tmp1039 = getelementptr inbounds float* %tmp1038, i64 1
+  %tmp1040 = getelementptr inbounds float* %tmp1039, i64 1
+  %tmp1041 = getelementptr inbounds float* %tmp1040, i64 1
+  %tmp1042 = getelementptr inbounds float* %tmp1041, i64 1
+  %tmp1043 = getelementptr inbounds float* %tmp1042, i64 1
+  %tmp1044 = getelementptr inbounds float* %tmp1043, i64 1
+  %tmp1045 = getelementptr inbounds float* %tmp1044, i64 1
+  %tmp1046 = getelementptr inbounds float* %tmp1045, i64 1
+  %tmp1047 = getelementptr inbounds float* %tmp1046, i64 1
+  %tmp1048 = getelementptr inbounds float* %tmp1047, i64 1
+  %tmp1049 = getelementptr inbounds float* %tmp1048, i64 1
+  %tmp1050 = getelementptr inbounds float* %tmp1049, i64 1
+  %tmp1051 = getelementptr inbounds float* %tmp1050, i64 1
+  %tmp1052 = getelementptr inbounds float* %tmp1051, i64 1
+  %tmp1053 = getelementptr inbounds float* %tmp1052, i64 1
+  %tmp1054 = getelementptr inbounds float* %tmp1053, i64 1
+  %tmp1055 = getelementptr inbounds float* %tmp1054, i64 1
+  %tmp1056 = getelementptr inbounds float* %tmp1055, i64 1
+  %tmp1057 = getelementptr inbounds float* %tmp1056, i64 1
+  %tmp1058 = getelementptr inbounds float* %tmp1057, i64 1
+  %tmp1059 = getelementptr inbounds float* %tmp1058, i64 1
+  %tmp1060 = getelementptr inbounds float* %tmp1059, i64 1
+  %tmp1061 = getelementptr inbounds float* %tmp1060, i64 1
+  %tmp1062 = getelementptr inbounds float* %tmp1061, i64 1
+  %tmp1063 = getelementptr inbounds float* %tmp1062, i64 1
+  %tmp1064 = getelementptr inbounds float* %tmp1063, i64 1
+  %tmp1065 = getelementptr inbounds float* %tmp1064, i64 1
+  %tmp1066 = getelementptr inbounds float* %tmp1065, i64 1
+  %tmp1067 = getelementptr inbounds float* %tmp1066, i64 1
+  %tmp1068 = getelementptr inbounds float* %tmp1067, i64 1
+  %tmp1069 = getelementptr inbounds float* %tmp1068, i64 1
+  %tmp1070 = getelementptr inbounds float* %tmp1069, i64 1
+  %tmp1071 = getelementptr inbounds float* %tmp1070, i64 1
+  %tmp1072 = getelementptr inbounds float* %tmp1071, i64 1
+  %tmp1073 = getelementptr inbounds float* %tmp1072, i64 1
+  %tmp1074 = getelementptr inbounds float* %tmp1073, i64 1
+  %tmp1075 = getelementptr inbounds float* %tmp1074, i64 1
+  %tmp1076 = getelementptr inbounds float* %tmp1075, i64 1
+  %tmp1077 = getelementptr inbounds float* %tmp1076, i64 1
+  %tmp1078 = getelementptr inbounds float* %tmp1077, i64 1
+  %tmp1079 = getelementptr inbounds float* %tmp1078, i64 1
+  %tmp1080 = getelementptr inbounds float* %tmp1079, i64 1
+  %tmp1081 = getelementptr inbounds float* %tmp1080, i64 1
+  %tmp1082 = getelementptr inbounds float* %tmp1081, i64 1
+  %tmp1083 = getelementptr inbounds float* %tmp1082, i64 1
+  %tmp1084 = getelementptr inbounds float* %tmp1083, i64 1
+  %tmp1085 = getelementptr inbounds float* %tmp1084, i64 1
+  %tmp1086 = getelementptr inbounds float* %tmp1085, i64 1
+  %tmp1087 = getelementptr inbounds float* %tmp1086, i64 1
+  %tmp1088 = getelementptr inbounds float* %tmp1087, i64 1
+  %tmp1089 = getelementptr inbounds float* %tmp1088, i64 1
+  %tmp1090 = getelementptr inbounds float* %tmp1089, i64 1
+  %tmp1091 = getelementptr inbounds float* %tmp1090, i64 1
+  %tmp1092 = getelementptr inbounds float* %tmp1091, i64 1
+  %tmp1093 = getelementptr inbounds float* %tmp1092, i64 1
+  %tmp1094 = getelementptr inbounds float* %tmp1093, i64 1
+  %tmp1095 = getelementptr inbounds float* %tmp1094, i64 1
+  %tmp1096 = getelementptr inbounds float* %tmp1095, i64 1
+  %tmp1097 = getelementptr inbounds float* %tmp1096, i64 1
+  %tmp1098 = getelementptr inbounds float* %tmp1097, i64 1
+  %tmp1099 = getelementptr inbounds float* %tmp1098, i64 1
+  %tmp1100 = getelementptr inbounds float* %tmp1099, i64 1
+  %tmp1101 = getelementptr inbounds float* %tmp1100, i64 1
+  %tmp1102 = getelementptr inbounds float* %tmp1101, i64 1
+  %tmp1103 = getelementptr inbounds float* %tmp1102, i64 1
+  %tmp1104 = getelementptr inbounds float* %tmp1103, i64 1
+  %tmp1105 = getelementptr inbounds float* %tmp1104, i64 1
+  %tmp1106 = getelementptr inbounds float* %tmp1105, i64 1
+  %tmp1107 = getelementptr inbounds float* %tmp1106, i64 1
+  %tmp1108 = getelementptr inbounds float* %tmp1107, i64 1
+  %tmp1109 = getelementptr inbounds float* %tmp1108, i64 1
+  %tmp1110 = getelementptr inbounds float* %tmp1109, i64 1
+  %tmp1111 = getelementptr inbounds float* %tmp1110, i64 1
+  %tmp1112 = getelementptr inbounds float* %tmp1111, i64 1
+  %tmp1113 = getelementptr inbounds float* %tmp1112, i64 1
+  %tmp1114 = getelementptr inbounds float* %tmp1113, i64 1
+  %tmp1115 = getelementptr inbounds float* %tmp1114, i64 1
+  %tmp1116 = getelementptr inbounds float* %tmp1115, i64 1
+  %tmp1117 = getelementptr inbounds float* %tmp1116, i64 1
+  %tmp1118 = getelementptr inbounds float* %tmp1117, i64 1
+  %tmp1119 = getelementptr inbounds float* %tmp1118, i64 1
+  %tmp1120 = getelementptr inbounds float* %tmp1119, i64 1
+  %tmp1121 = getelementptr inbounds float* %tmp1120, i64 1
+  %tmp1122 = getelementptr inbounds float* %tmp1121, i64 1
+  %tmp1123 = getelementptr inbounds float* %tmp1122, i64 1
+  %tmp1124 = getelementptr inbounds float* %tmp1123, i64 1
+  %tmp1125 = getelementptr inbounds float* %tmp1124, i64 1
+  %tmp1126 = getelementptr inbounds float* %tmp1125, i64 1
+  %tmp1127 = getelementptr inbounds float* %tmp1126, i64 1
+  %tmp1128 = getelementptr inbounds float* %tmp1127, i64 1
+  %tmp1129 = getelementptr inbounds float* %tmp1128, i64 1
+  %tmp1130 = getelementptr inbounds float* %tmp1129, i64 1
+  %tmp1131 = getelementptr inbounds float* %tmp1130, i64 1
+  %tmp1132 = getelementptr inbounds float* %tmp1131, i64 1
+  %tmp1133 = getelementptr inbounds float* %tmp1132, i64 1
+  %tmp1134 = getelementptr inbounds float* %tmp1133, i64 1
+  %tmp1135 = getelementptr inbounds float* %tmp1134, i64 1
+  %tmp1136 = getelementptr inbounds float* %tmp1135, i64 1
+  %tmp1137 = getelementptr inbounds float* %tmp1136, i64 1
+  %tmp1138 = getelementptr inbounds float* %tmp1137, i64 1
+  %tmp1139 = getelementptr inbounds float* %tmp1138, i64 1
+  %tmp1140 = getelementptr inbounds float* %tmp1139, i64 1
+  %tmp1141 = getelementptr inbounds float* %tmp1140, i64 1
+  %tmp1142 = getelementptr inbounds float* %tmp1141, i64 1
+  %tmp1143 = getelementptr inbounds float* %tmp1142, i64 1
+  %tmp1144 = getelementptr inbounds float* %tmp1143, i64 1
+  %tmp1145 = getelementptr inbounds float* %tmp1144, i64 1
+  %tmp1146 = getelementptr inbounds float* %tmp1145, i64 1
+  %tmp1147 = getelementptr inbounds float* %tmp1146, i64 1
+  %tmp1148 = getelementptr inbounds float* %tmp1147, i64 1
+  %tmp1149 = getelementptr inbounds float* %tmp1148, i64 1
+  %tmp1150 = getelementptr inbounds float* %tmp1149, i64 1
+  %tmp1151 = getelementptr inbounds float* %tmp1150, i64 1
+  %tmp1152 = getelementptr inbounds float* %tmp1151, i64 1
+  %tmp1153 = getelementptr inbounds float* %tmp1152, i64 1
+  %tmp1154 = getelementptr inbounds float* %tmp1153, i64 1
+  %tmp1155 = getelementptr inbounds float* %tmp1154, i64 1
+  %tmp1156 = getelementptr inbounds float* %tmp1155, i64 1
+  %tmp1157 = getelementptr inbounds float* %tmp1156, i64 1
+  %tmp1158 = getelementptr inbounds float* %tmp1157, i64 1
+  %tmp1159 = getelementptr inbounds float* %tmp1158, i64 1
+  %tmp1160 = getelementptr inbounds float* %tmp1159, i64 1
+  %tmp1161 = getelementptr inbounds float* %tmp1160, i64 1
+  %tmp1162 = getelementptr inbounds float* %tmp1161, i64 1
+  %tmp1163 = getelementptr inbounds float* %tmp1162, i64 1
+  %tmp1164 = getelementptr inbounds float* %tmp1163, i64 1
+  %tmp1165 = getelementptr inbounds float* %tmp1164, i64 1
+  %tmp1166 = getelementptr inbounds float* %tmp1165, i64 1
+  %tmp1167 = getelementptr inbounds float* %tmp1166, i64 1
+  %tmp1168 = getelementptr inbounds float* %tmp1167, i64 1
+  %tmp1169 = getelementptr inbounds float* %tmp1168, i64 1
+  %tmp1170 = getelementptr inbounds float* %tmp1169, i64 1
+  %tmp1171 = getelementptr inbounds float* %tmp1170, i64 1
+  %tmp1172 = getelementptr inbounds float* %tmp1171, i64 1
+  %tmp1173 = getelementptr inbounds float* %tmp1172, i64 1
+  %tmp1174 = getelementptr inbounds float* %tmp1173, i64 1
+  %tmp1175 = getelementptr inbounds float* %tmp1174, i64 1
+  %tmp1176 = getelementptr inbounds float* %tmp1175, i64 1
+  %tmp1177 = getelementptr inbounds float* %tmp1176, i64 1
+  %tmp1178 = getelementptr inbounds float* %tmp1177, i64 1
+  %tmp1179 = getelementptr inbounds float* %tmp1178, i64 1
+  %tmp1180 = getelementptr inbounds float* %tmp1179, i64 1
+  %tmp1181 = getelementptr inbounds float* %tmp1180, i64 1
+  %tmp1182 = getelementptr inbounds float* %tmp1181, i64 1
+  %tmp1183 = getelementptr inbounds float* %tmp1182, i64 1
+  %tmp1184 = getelementptr inbounds float* %tmp1183, i64 1
+  %tmp1185 = getelementptr inbounds float* %tmp1184, i64 1
+  %tmp1186 = getelementptr inbounds float* %tmp1185, i64 1
+  %tmp1187 = getelementptr inbounds float* %tmp1186, i64 1
+  %tmp1188 = getelementptr inbounds float* %tmp1187, i64 1
+  %tmp1189 = getelementptr inbounds float* %tmp1188, i64 1
+  %tmp1190 = getelementptr inbounds float* %tmp1189, i64 1
+  %tmp1191 = getelementptr inbounds float* %tmp1190, i64 1
+  %tmp1192 = getelementptr inbounds float* %tmp1191, i64 1
+  %tmp1193 = getelementptr inbounds float* %tmp1192, i64 1
+  %tmp1194 = getelementptr inbounds float* %tmp1193, i64 1
+  %tmp1195 = getelementptr inbounds float* %tmp1194, i64 1
+  %tmp1196 = getelementptr inbounds float* %tmp1195, i64 1
+  %tmp1197 = getelementptr inbounds float* %tmp1196, i64 1
+  %tmp1198 = getelementptr inbounds float* %tmp1197, i64 1
+  %tmp1199 = getelementptr inbounds float* %tmp1198, i64 1
+  %tmp1200 = getelementptr inbounds float* %tmp1199, i64 1
+  %tmp1201 = getelementptr inbounds float* %tmp1200, i64 1
+  %tmp1202 = getelementptr inbounds float* %tmp1201, i64 1
+  %tmp1203 = getelementptr inbounds float* %tmp1202, i64 1
+  %tmp1204 = getelementptr inbounds float* %tmp1203, i64 1
+  %tmp1205 = getelementptr inbounds float* %tmp1204, i64 1
+  %tmp1206 = getelementptr inbounds float* %tmp1205, i64 1
+  %tmp1207 = getelementptr inbounds float* %tmp1206, i64 1
+  %tmp1208 = getelementptr inbounds float* %tmp1207, i64 1
+  %tmp1209 = getelementptr inbounds float* %tmp1208, i64 1
+  %tmp1210 = getelementptr inbounds float* %tmp1209, i64 1
+  %tmp1211 = getelementptr inbounds float* %tmp1210, i64 1
+  %tmp1212 = getelementptr inbounds float* %tmp1211, i64 1
+  %tmp1213 = getelementptr inbounds float* %tmp1212, i64 1
+  %tmp1214 = getelementptr inbounds float* %tmp1213, i64 1
+  %tmp1215 = getelementptr inbounds float* %tmp1214, i64 1
+  %tmp1216 = getelementptr inbounds float* %tmp1215, i64 1
+  %tmp1217 = getelementptr inbounds float* %tmp1216, i64 1
+  %tmp1218 = getelementptr inbounds float* %tmp1217, i64 1
+  %tmp1219 = getelementptr inbounds float* %tmp1218, i64 1
+  %tmp1220 = getelementptr inbounds float* %tmp1219, i64 1
+  %tmp1221 = getelementptr inbounds float* %tmp1220, i64 1
+  %tmp1222 = getelementptr inbounds float* %tmp1221, i64 1
+  %tmp1223 = getelementptr inbounds float* %tmp1222, i64 1
+  %tmp1224 = getelementptr inbounds float* %tmp1223, i64 1
+  %tmp1225 = getelementptr inbounds float* %tmp1224, i64 1
+  %tmp1226 = getelementptr inbounds float* %tmp1225, i64 1
+  %tmp1227 = getelementptr inbounds float* %tmp1226, i64 1
+  %tmp1228 = getelementptr inbounds float* %tmp1227, i64 1
+  %tmp1229 = getelementptr inbounds float* %tmp1228, i64 1
+  %tmp1230 = getelementptr inbounds float* %tmp1229, i64 1
+  %tmp1231 = getelementptr inbounds float* %tmp1230, i64 1
+  %tmp1232 = getelementptr inbounds float* %tmp1231, i64 1
+  %tmp1233 = getelementptr inbounds float* %tmp1232, i64 1
+  %tmp1234 = getelementptr inbounds float* %tmp1233, i64 1
+  %tmp1235 = getelementptr inbounds float* %tmp1234, i64 1
+  %tmp1236 = getelementptr inbounds float* %tmp1235, i64 1
+  %tmp1237 = getelementptr inbounds float* %tmp1236, i64 1
+  %tmp1238 = getelementptr inbounds float* %tmp1237, i64 1
+  %tmp1239 = getelementptr inbounds float* %tmp1238, i64 1
+  %tmp1240 = getelementptr inbounds float* %tmp1239, i64 1
+  %tmp1241 = getelementptr inbounds float* %tmp1240, i64 1
+  %tmp1242 = getelementptr inbounds float* %tmp1241, i64 1
+  %tmp1243 = getelementptr inbounds float* %tmp1242, i64 1
+  %tmp1244 = getelementptr inbounds float* %tmp1243, i64 1
+  %tmp1245 = getelementptr inbounds float* %tmp1244, i64 1
+  %tmp1246 = getelementptr inbounds float* %tmp1245, i64 1
+  %tmp1247 = getelementptr inbounds float* %tmp1246, i64 1
+  %tmp1248 = getelementptr inbounds float* %tmp1247, i64 1
+  %tmp1249 = getelementptr inbounds float* %tmp1248, i64 1
+  %tmp1250 = getelementptr inbounds float* %tmp1249, i64 1
+  %tmp1251 = getelementptr inbounds float* %tmp1250, i64 1
+  %tmp1252 = getelementptr inbounds float* %tmp1251, i64 1
+  %tmp1253 = getelementptr inbounds float* %tmp1252, i64 1
+  %tmp1254 = getelementptr inbounds float* %tmp1253, i64 1
+  %tmp1255 = getelementptr inbounds float* %tmp1254, i64 1
+  %tmp1256 = getelementptr inbounds float* %tmp1255, i64 1
+  %tmp1257 = getelementptr inbounds float* %tmp1256, i64 1
+  %tmp1258 = getelementptr inbounds float* %tmp1257, i64 1
+  %tmp1259 = getelementptr inbounds float* %tmp1258, i64 1
+  %tmp1260 = getelementptr inbounds float* %tmp1259, i64 1
+  %tmp1261 = getelementptr inbounds float* %tmp1260, i64 1
+  %tmp1262 = getelementptr inbounds float* %tmp1261, i64 1
+  %tmp1263 = getelementptr inbounds float* %tmp1262, i64 1
+  %tmp1264 = getelementptr inbounds float* %tmp1263, i64 1
+  %tmp1265 = getelementptr inbounds float* %tmp1264, i64 1
+  %tmp1266 = getelementptr inbounds float* %tmp1265, i64 1
+  %tmp1267 = getelementptr inbounds float* %tmp1266, i64 1
+  %tmp1268 = getelementptr inbounds float* %tmp1267, i64 1
+  %tmp1269 = getelementptr inbounds float* %tmp1268, i64 1
+  %tmp1270 = getelementptr inbounds float* %tmp1269, i64 1
+  %tmp1271 = getelementptr inbounds float* %tmp1270, i64 1
+  %tmp1272 = getelementptr inbounds float* %tmp1271, i64 1
+  %tmp1273 = getelementptr inbounds float* %tmp1272, i64 1
+  %tmp1274 = getelementptr inbounds float* %tmp1273, i64 1
+  %tmp1275 = getelementptr inbounds float* %tmp1274, i64 1
+  %tmp1276 = getelementptr inbounds float* %tmp1275, i64 1
+  %tmp1277 = getelementptr inbounds float* %tmp1276, i64 1
+  %tmp1278 = getelementptr inbounds float* %tmp1277, i64 1
+  %tmp1279 = getelementptr inbounds float* %tmp1278, i64 1
+  %tmp1280 = getelementptr inbounds float* %tmp1279, i64 1
+  %tmp1281 = getelementptr inbounds float* %tmp1280, i64 1
+  %tmp1282 = getelementptr inbounds float* %tmp1281, i64 1
+  %tmp1283 = getelementptr inbounds float* %tmp1282, i64 1
+  %tmp1284 = getelementptr inbounds float* %tmp1283, i64 1
+  %tmp1285 = getelementptr inbounds float* %tmp1284, i64 1
+  %tmp1286 = getelementptr inbounds float* %tmp1285, i64 1
+  %tmp1287 = getelementptr inbounds float* %tmp1286, i64 1
+  %tmp1288 = getelementptr inbounds float* %tmp1287, i64 1
+  %tmp1289 = getelementptr inbounds float* %tmp1288, i64 1
+  %tmp1290 = getelementptr inbounds float* %tmp1289, i64 1
+  %tmp1291 = getelementptr inbounds float* %tmp1290, i64 1
+  %tmp1292 = getelementptr inbounds float* %tmp1291, i64 1
+  %tmp1293 = getelementptr inbounds float* %tmp1292, i64 1
+  %tmp1294 = getelementptr inbounds float* %tmp1293, i64 1
+  %tmp1295 = getelementptr inbounds float* %tmp1294, i64 1
+  %tmp1296 = getelementptr inbounds float* %tmp1295, i64 1
+  %tmp1297 = getelementptr inbounds float* %tmp1296, i64 1
+  %tmp1298 = getelementptr inbounds float* %tmp1297, i64 1
+  %tmp1299 = getelementptr inbounds float* %tmp1298, i64 1
+  %tmp1300 = getelementptr inbounds float* %tmp1299, i64 1
+  %tmp1301 = getelementptr inbounds float* %tmp1300, i64 1
+  %tmp1302 = getelementptr inbounds float* %tmp1301, i64 1
+  %tmp1303 = getelementptr inbounds float* %tmp1302, i64 1
+  %tmp1304 = getelementptr inbounds float* %tmp1303, i64 1
+  %tmp1305 = getelementptr inbounds float* %tmp1304, i64 1
+  %tmp1306 = getelementptr inbounds float* %tmp1305, i64 1
+  %tmp1307 = getelementptr inbounds float* %tmp1306, i64 1
+  %tmp1308 = getelementptr inbounds float* %tmp1307, i64 1
+  %tmp1309 = getelementptr inbounds float* %tmp1308, i64 1
+  %tmp1310 = getelementptr inbounds float* %tmp1309, i64 1
+  %tmp1311 = getelementptr inbounds float* %tmp1310, i64 1
+  %tmp1312 = getelementptr inbounds float* %tmp1311, i64 1
+  %tmp1313 = getelementptr inbounds float* %tmp1312, i64 1
+  %tmp1314 = getelementptr inbounds float* %tmp1313, i64 1
+  %tmp1315 = getelementptr inbounds float* %tmp1314, i64 1
+  %tmp1316 = getelementptr inbounds float* %tmp1315, i64 1
+  %tmp1317 = getelementptr inbounds float* %tmp1316, i64 1
+  %tmp1318 = getelementptr inbounds float* %tmp1317, i64 1
+  %tmp1319 = getelementptr inbounds float* %tmp1318, i64 1
+  %tmp1320 = getelementptr inbounds float* %tmp1319, i64 1
+  %tmp1321 = getelementptr inbounds float* %tmp1320, i64 1
+  %tmp1322 = getelementptr inbounds float* %tmp1321, i64 1
+  %tmp1323 = getelementptr inbounds float* %tmp1322, i64 1
+  %tmp1324 = getelementptr inbounds float* %tmp1323, i64 1
+  %tmp1325 = getelementptr inbounds float* %tmp1324, i64 1
+  %tmp1326 = getelementptr inbounds float* %tmp1325, i64 1
+  %tmp1327 = getelementptr inbounds float* %tmp1326, i64 1
+  %tmp1328 = getelementptr inbounds float* %tmp1327, i64 1
+  %tmp1329 = getelementptr inbounds float* %tmp1328, i64 1
+  %tmp1330 = getelementptr inbounds float* %tmp1329, i64 1
+  %tmp1331 = getelementptr inbounds float* %tmp1330, i64 1
+  %tmp1332 = getelementptr inbounds float* %tmp1331, i64 1
+  %tmp1333 = getelementptr inbounds float* %tmp1332, i64 1
+  %tmp1334 = getelementptr inbounds float* %tmp1333, i64 1
+  %tmp1335 = getelementptr inbounds float* %tmp1334, i64 1
+  %tmp1336 = getelementptr inbounds float* %tmp1335, i64 1
+  %tmp1337 = getelementptr inbounds float* %tmp1336, i64 1
+  %tmp1338 = getelementptr inbounds float* %tmp1337, i64 1
+  %tmp1339 = getelementptr inbounds float* %tmp1338, i64 1
+  %tmp1340 = getelementptr inbounds float* %tmp1339, i64 1
+  %tmp1341 = getelementptr inbounds float* %tmp1340, i64 1
+  %tmp1342 = getelementptr inbounds float* %tmp1341, i64 1
+  %tmp1343 = getelementptr inbounds float* %tmp1342, i64 1
+  %tmp1344 = getelementptr inbounds float* %tmp1343, i64 1
+  %tmp1345 = getelementptr inbounds float* %tmp1344, i64 1
+  %tmp1346 = getelementptr inbounds float* %tmp1345, i64 1
+  %tmp1347 = getelementptr inbounds float* %tmp1346, i64 1
+  %tmp1348 = getelementptr inbounds float* %tmp1347, i64 1
+  %tmp1349 = getelementptr inbounds float* %tmp1348, i64 1
+  %tmp1350 = getelementptr inbounds float* %tmp1349, i64 1
+  %tmp1351 = getelementptr inbounds float* %tmp1350, i64 1
+  %tmp1352 = getelementptr inbounds float* %tmp1351, i64 1
+  %tmp1353 = getelementptr inbounds float* %tmp1352, i64 1
+  %tmp1354 = getelementptr inbounds float* %tmp1353, i64 1
+  %tmp1355 = getelementptr inbounds float* %tmp1354, i64 1
+  %tmp1356 = getelementptr inbounds float* %tmp1355, i64 1
+  %tmp1357 = getelementptr inbounds float* %tmp1356, i64 1
+  %tmp1358 = getelementptr inbounds float* %tmp1357, i64 1
+  %tmp1359 = getelementptr inbounds float* %tmp1358, i64 1
+  %tmp1360 = getelementptr inbounds float* %tmp1359, i64 1
+  %tmp1361 = getelementptr inbounds float* %tmp1360, i64 1
+  %tmp1362 = getelementptr inbounds float* %tmp1361, i64 1
+  %tmp1363 = getelementptr inbounds float* %tmp1362, i64 1
+  %tmp1364 = getelementptr inbounds float* %tmp1363, i64 1
+  %tmp1365 = getelementptr inbounds float* %tmp1364, i64 1
+  %tmp1366 = getelementptr inbounds float* %tmp1365, i64 1
+  %tmp1367 = getelementptr inbounds float* %tmp1366, i64 1
+  %tmp1368 = getelementptr inbounds float* %tmp1367, i64 1
+  %tmp1369 = getelementptr inbounds float* %tmp1368, i64 1
+  %tmp1370 = getelementptr inbounds float* %tmp1369, i64 1
+  %tmp1371 = getelementptr inbounds float* %tmp1370, i64 1
+  %tmp1372 = getelementptr inbounds float* %tmp1371, i64 1
+  %tmp1373 = getelementptr inbounds float* %tmp1372, i64 1
+  %tmp1374 = getelementptr inbounds float* %tmp1373, i64 1
+  %tmp1375 = getelementptr inbounds float* %tmp1374, i64 1
+  %tmp1376 = getelementptr inbounds float* %tmp1375, i64 1
+  %tmp1377 = getelementptr inbounds float* %tmp1376, i64 1
+  %tmp1378 = getelementptr inbounds float* %tmp1377, i64 1
+  %tmp1379 = getelementptr inbounds float* %tmp1378, i64 1
+  %tmp1380 = getelementptr inbounds float* %tmp1379, i64 1
+  %tmp1381 = getelementptr inbounds float* %tmp1380, i64 1
+  %tmp1382 = getelementptr inbounds float* %tmp1381, i64 1
+  %tmp1383 = getelementptr inbounds float* %tmp1382, i64 1
+  %tmp1384 = getelementptr inbounds float* %tmp1383, i64 1
+  %tmp1385 = getelementptr inbounds float* %tmp1384, i64 1
+  %tmp1386 = getelementptr inbounds float* %tmp1385, i64 1
+  %tmp1387 = getelementptr inbounds float* %tmp1386, i64 1
+  %tmp1388 = getelementptr inbounds float* %tmp1387, i64 1
+  %tmp1389 = getelementptr inbounds float* %tmp1388, i64 1
+  %tmp1390 = getelementptr inbounds float* %tmp1389, i64 1
+  %tmp1391 = getelementptr inbounds float* %tmp1390, i64 1
+  %tmp1392 = getelementptr inbounds float* %tmp1391, i64 1
+  %tmp1393 = getelementptr inbounds float* %tmp1392, i64 1
+  %tmp1394 = getelementptr inbounds float* %tmp1393, i64 1
+  %tmp1395 = getelementptr inbounds float* %tmp1394, i64 1
+  %tmp1396 = getelementptr inbounds float* %tmp1395, i64 1
+  %tmp1397 = getelementptr inbounds float* %tmp1396, i64 1
+  %tmp1398 = getelementptr inbounds float* %tmp1397, i64 1
+  %tmp1399 = getelementptr inbounds float* %tmp1398, i64 1
+  %tmp1400 = getelementptr inbounds float* %tmp1399, i64 1
+  %tmp1401 = getelementptr inbounds float* %tmp1400, i64 1
+  %tmp1402 = getelementptr inbounds float* %tmp1401, i64 1
+  %tmp1403 = getelementptr inbounds float* %tmp1402, i64 1
+  %tmp1404 = getelementptr inbounds float* %tmp1403, i64 1
+  %tmp1405 = getelementptr inbounds float* %tmp1404, i64 1
+  %tmp1406 = getelementptr inbounds float* %tmp1405, i64 1
+  %tmp1407 = getelementptr inbounds float* %tmp1406, i64 1
+  %tmp1408 = getelementptr inbounds float* %tmp1407, i64 1
+  %tmp1409 = getelementptr inbounds float* %tmp1408, i64 1
+  %tmp1410 = getelementptr inbounds float* %tmp1409, i64 1
+  %tmp1411 = getelementptr inbounds float* %tmp1410, i64 1
+  %tmp1412 = getelementptr inbounds float* %tmp1411, i64 1
+  %tmp1413 = getelementptr inbounds float* %tmp1412, i64 1
+  %tmp1414 = getelementptr inbounds float* %tmp1413, i64 1
+  %tmp1415 = getelementptr inbounds float* %tmp1414, i64 1
+  %tmp1416 = getelementptr inbounds float* %tmp1415, i64 1
+  %tmp1417 = getelementptr inbounds float* %tmp1416, i64 1
+  %tmp1418 = getelementptr inbounds float* %tmp1417, i64 1
+  %tmp1419 = getelementptr inbounds float* %tmp1418, i64 1
+  %tmp1420 = getelementptr inbounds float* %tmp1419, i64 1
+  %tmp1421 = getelementptr inbounds float* %tmp1420, i64 1
+  %tmp1422 = getelementptr inbounds float* %tmp1421, i64 1
+  %tmp1423 = getelementptr inbounds float* %tmp1422, i64 1
+  %tmp1424 = getelementptr inbounds float* %tmp1423, i64 1
+  %tmp1425 = getelementptr inbounds float* %tmp1424, i64 1
+  %tmp1426 = getelementptr inbounds float* %tmp1425, i64 1
+  %tmp1427 = getelementptr inbounds float* %tmp1426, i64 1
+  %tmp1428 = getelementptr inbounds float* %tmp1427, i64 1
+  %tmp1429 = getelementptr inbounds float* %tmp1428, i64 1
+  %tmp1430 = getelementptr inbounds float* %tmp1429, i64 1
+  %tmp1431 = getelementptr inbounds float* %tmp1430, i64 1
+  %tmp1432 = getelementptr inbounds float* %tmp1431, i64 1
+  %tmp1433 = getelementptr inbounds float* %tmp1432, i64 1
+  %tmp1434 = getelementptr inbounds float* %tmp1433, i64 1
+  %tmp1435 = getelementptr inbounds float* %tmp1434, i64 1
+  %tmp1436 = getelementptr inbounds float* %tmp1435, i64 1
+  %tmp1437 = getelementptr inbounds float* %tmp1436, i64 1
+  %tmp1438 = getelementptr inbounds float* %tmp1437, i64 1
+  %tmp1439 = getelementptr inbounds float* %tmp1438, i64 1
+  %tmp1440 = getelementptr inbounds float* %tmp1439, i64 1
+  %tmp1441 = getelementptr inbounds float* %tmp1440, i64 1
+  %tmp1442 = getelementptr inbounds float* %tmp1441, i64 1
+  %tmp1443 = getelementptr inbounds float* %tmp1442, i64 1
+  %tmp1444 = getelementptr inbounds float* %tmp1443, i64 1
+  %tmp1445 = getelementptr inbounds float* %tmp1444, i64 1
+  %tmp1446 = getelementptr inbounds float* %tmp1445, i64 1
+  %tmp1447 = getelementptr inbounds float* %tmp1446, i64 1
+  %tmp1448 = getelementptr inbounds float* %tmp1447, i64 1
+  %tmp1449 = getelementptr inbounds float* %tmp1448, i64 1
+  %tmp1450 = getelementptr inbounds float* %tmp1449, i64 1
+  %tmp1451 = getelementptr inbounds float* %tmp1450, i64 1
+  %tmp1452 = getelementptr inbounds float* %tmp1451, i64 1
+  %tmp1453 = getelementptr inbounds float* %tmp1452, i64 1
+  %tmp1454 = getelementptr inbounds float* %tmp1453, i64 1
+  %tmp1455 = getelementptr inbounds float* %tmp1454, i64 1
+  %tmp1456 = getelementptr inbounds float* %tmp1455, i64 1
+  %tmp1457 = getelementptr inbounds float* %tmp1456, i64 1
+  %tmp1458 = getelementptr inbounds float* %tmp1457, i64 1
+  %tmp1459 = getelementptr inbounds float* %tmp1458, i64 1
+  %tmp1460 = getelementptr inbounds float* %tmp1459, i64 1
+  %tmp1461 = getelementptr inbounds float* %tmp1460, i64 1
+  %tmp1462 = getelementptr inbounds float* %tmp1461, i64 1
+  %tmp1463 = getelementptr inbounds float* %tmp1462, i64 1
+  %tmp1464 = getelementptr inbounds float* %tmp1463, i64 1
+  %tmp1465 = getelementptr inbounds float* %tmp1464, i64 1
+  %tmp1466 = getelementptr inbounds float* %tmp1465, i64 1
+  %tmp1467 = getelementptr inbounds float* %tmp1466, i64 1
+  %tmp1468 = getelementptr inbounds float* %tmp1467, i64 1
+  %tmp1469 = getelementptr inbounds float* %tmp1468, i64 1
+  %tmp1470 = getelementptr inbounds float* %tmp1469, i64 1
+  %tmp1471 = getelementptr inbounds float* %tmp1470, i64 1
+  %tmp1472 = getelementptr inbounds float* %tmp1471, i64 1
+  %tmp1473 = getelementptr inbounds float* %tmp1472, i64 1
+  %tmp1474 = getelementptr inbounds float* %tmp1473, i64 1
+  %tmp1475 = getelementptr inbounds float* %tmp1474, i64 1
+  %tmp1476 = getelementptr inbounds float* %tmp1475, i64 1
+  %tmp1477 = getelementptr inbounds float* %tmp1476, i64 1
+  %tmp1478 = getelementptr inbounds float* %tmp1477, i64 1
+  %tmp1479 = getelementptr inbounds float* %tmp1478, i64 1
+  %tmp1480 = getelementptr inbounds float* %tmp1479, i64 1
+  %tmp1481 = getelementptr inbounds float* %tmp1480, i64 1
+  %tmp1482 = getelementptr inbounds float* %tmp1481, i64 1
+  %tmp1483 = getelementptr inbounds float* %tmp1482, i64 1
+  %tmp1484 = getelementptr inbounds float* %tmp1483, i64 1
+  %tmp1485 = getelementptr inbounds float* %tmp1484, i64 1
+  %tmp1486 = getelementptr inbounds float* %tmp1485, i64 1
+  %tmp1487 = getelementptr inbounds float* %tmp1486, i64 1
+  %tmp1488 = getelementptr inbounds float* %tmp1487, i64 1
+  %tmp1489 = getelementptr inbounds float* %tmp1488, i64 1
+  %tmp1490 = getelementptr inbounds float* %tmp1489, i64 1
+  %tmp1491 = getelementptr inbounds float* %tmp1490, i64 1
+  %tmp1492 = getelementptr inbounds float* %tmp1491, i64 1
+  %tmp1493 = getelementptr inbounds float* %tmp1492, i64 1
+  %tmp1494 = getelementptr inbounds float* %tmp1493, i64 1
+  %tmp1495 = getelementptr inbounds float* %tmp1494, i64 1
+  %tmp1496 = getelementptr inbounds float* %tmp1495, i64 1
+  %tmp1497 = getelementptr inbounds float* %tmp1496, i64 1
+  %tmp1498 = getelementptr inbounds float* %tmp1497, i64 1
+  %tmp1499 = getelementptr inbounds float* %tmp1498, i64 1
+  %tmp1500 = getelementptr inbounds float* %tmp1499, i64 1
+  %tmp1501 = getelementptr inbounds float* %tmp1500, i64 1
+  %tmp1502 = getelementptr inbounds float* %tmp1501, i64 1
+  %tmp1503 = getelementptr inbounds float* %tmp1502, i64 1
+  %tmp1504 = getelementptr inbounds float* %tmp1503, i64 1
+  %tmp1505 = getelementptr inbounds float* %tmp1504, i64 1
+  %tmp1506 = getelementptr inbounds float* %tmp1505, i64 1
+  %tmp1507 = getelementptr inbounds float* %tmp1506, i64 1
+  %tmp1508 = getelementptr inbounds float* %tmp1507, i64 1
+  %tmp1509 = getelementptr inbounds float* %tmp1508, i64 1
+  %tmp1510 = getelementptr inbounds float* %tmp1509, i64 1
+  %tmp1511 = getelementptr inbounds float* %tmp1510, i64 1
+  %tmp1512 = getelementptr inbounds float* %tmp1511, i64 1
+  %tmp1513 = getelementptr inbounds float* %tmp1512, i64 1
+  %tmp1514 = getelementptr inbounds float* %tmp1513, i64 1
+  %tmp1515 = getelementptr inbounds float* %tmp1514, i64 1
+  %tmp1516 = getelementptr inbounds float* %tmp1515, i64 1
+  %tmp1517 = getelementptr inbounds float* %tmp1516, i64 1
+  %tmp1518 = getelementptr inbounds float* %tmp1517, i64 1
+  %tmp1519 = getelementptr inbounds float* %tmp1518, i64 1
+  %tmp1520 = getelementptr inbounds float* %tmp1519, i64 1
+  %tmp1521 = getelementptr inbounds float* %tmp1520, i64 1
+  %tmp1522 = getelementptr inbounds float* %tmp1521, i64 1
+  %tmp1523 = getelementptr inbounds float* %tmp1522, i64 1
+  %tmp1524 = getelementptr inbounds float* %tmp1523, i64 1
+  %tmp1525 = getelementptr inbounds float* %tmp1524, i64 1
+  %tmp1526 = getelementptr inbounds float* %tmp1525, i64 1
+  %tmp1527 = getelementptr inbounds float* %tmp1526, i64 1
+  %tmp1528 = getelementptr inbounds float* %tmp1527, i64 1
+  %tmp1529 = getelementptr inbounds float* %tmp1528, i64 1
+  %tmp1530 = getelementptr inbounds float* %tmp1529, i64 1
+  %tmp1531 = getelementptr inbounds float* %tmp1530, i64 1
+  %tmp1532 = getelementptr inbounds float* %tmp1531, i64 1
+  %tmp1533 = getelementptr inbounds float* %tmp1532, i64 1
+  %tmp1534 = getelementptr inbounds float* %tmp1533, i64 1
+  %tmp1535 = getelementptr inbounds float* %tmp1534, i64 1
+  %tmp1536 = getelementptr inbounds float* %tmp1535, i64 1
+  %tmp1537 = getelementptr inbounds float* %tmp1536, i64 1
+  %tmp1538 = getelementptr inbounds float* %tmp1537, i64 1
+  %tmp1539 = getelementptr inbounds float* %tmp1538, i64 1
+  %tmp1540 = getelementptr inbounds float* %tmp1539, i64 1
+  %tmp1541 = getelementptr inbounds float* %tmp1540, i64 1
+  %tmp1542 = getelementptr inbounds float* %tmp1541, i64 1
+  %tmp1543 = getelementptr inbounds float* %tmp1542, i64 1
+  %tmp1544 = getelementptr inbounds float* %tmp1543, i64 1
+  %tmp1545 = getelementptr inbounds float* %tmp1544, i64 1
+  %tmp1546 = getelementptr inbounds float* %tmp1545, i64 1
+  %tmp1547 = getelementptr inbounds float* %tmp1546, i64 1
+  %tmp1548 = getelementptr inbounds float* %tmp1547, i64 1
+  %tmp1549 = getelementptr inbounds float* %tmp1548, i64 1
+  %tmp1550 = getelementptr inbounds float* %tmp1549, i64 1
+  %tmp1551 = getelementptr inbounds float* %tmp1550, i64 1
+  %tmp1552 = getelementptr inbounds float* %tmp1551, i64 1
+  %tmp1553 = getelementptr inbounds float* %tmp1552, i64 1
+  %tmp1554 = getelementptr inbounds float* %tmp1553, i64 1
+  %tmp1555 = getelementptr inbounds float* %tmp1554, i64 1
+  %tmp1556 = getelementptr inbounds float* %tmp1555, i64 1
+  %tmp1557 = getelementptr inbounds float* %tmp1556, i64 1
+  %tmp1558 = getelementptr inbounds float* %tmp1557, i64 1
+  %tmp1559 = getelementptr inbounds float* %tmp1558, i64 1
+  %tmp1560 = getelementptr inbounds float* %tmp1559, i64 1
+  %tmp1561 = getelementptr inbounds float* %tmp1560, i64 1
+  %tmp1562 = getelementptr inbounds float* %tmp1561, i64 1
+  %tmp1563 = getelementptr inbounds float* %tmp1562, i64 1
+  %tmp1564 = getelementptr inbounds float* %tmp1563, i64 1
+  %tmp1565 = getelementptr inbounds float* %tmp1564, i64 1
+  %tmp1566 = getelementptr inbounds float* %tmp1565, i64 1
+  %tmp1567 = getelementptr inbounds float* %tmp1566, i64 1
+  %tmp1568 = getelementptr inbounds float* %tmp1567, i64 1
+  %tmp1569 = getelementptr inbounds float* %tmp1568, i64 1
+  %tmp1570 = getelementptr inbounds float* %tmp1569, i64 1
+  %tmp1571 = getelementptr inbounds float* %tmp1570, i64 1
+  %tmp1572 = getelementptr inbounds float* %tmp1571, i64 1
+  %tmp1573 = getelementptr inbounds float* %tmp1572, i64 1
+  %tmp1574 = getelementptr inbounds float* %tmp1573, i64 1
+  %tmp1575 = getelementptr inbounds float* %tmp1574, i64 1
+  %tmp1576 = getelementptr inbounds float* %tmp1575, i64 1
+  %tmp1577 = getelementptr inbounds float* %tmp1576, i64 1
+  %tmp1578 = getelementptr inbounds float* %tmp1577, i64 1
+  %tmp1579 = getelementptr inbounds float* %tmp1578, i64 1
+  %tmp1580 = getelementptr inbounds float* %tmp1579, i64 1
+  %tmp1581 = getelementptr inbounds float* %tmp1580, i64 1
+  %tmp1582 = getelementptr inbounds float* %tmp1581, i64 1
+  %tmp1583 = getelementptr inbounds float* %tmp1582, i64 1
+  %tmp1584 = getelementptr inbounds float* %tmp1583, i64 1
+  %tmp1585 = getelementptr inbounds float* %tmp1584, i64 1
+  %tmp1586 = getelementptr inbounds float* %tmp1585, i64 1
+  %tmp1587 = getelementptr inbounds float* %tmp1586, i64 1
+  %tmp1588 = getelementptr inbounds float* %tmp1587, i64 1
+  %tmp1589 = getelementptr inbounds float* %tmp1588, i64 1
+  %tmp1590 = getelementptr inbounds float* %tmp1589, i64 1
+  %tmp1591 = getelementptr inbounds float* %tmp1590, i64 1
+  %tmp1592 = getelementptr inbounds float* %tmp1591, i64 1
+  %tmp1593 = getelementptr inbounds float* %tmp1592, i64 1
+  %tmp1594 = getelementptr inbounds float* %tmp1593, i64 1
+  %tmp1595 = getelementptr inbounds float* %tmp1594, i64 1
+  %tmp1596 = getelementptr inbounds float* %tmp1595, i64 1
+  %tmp1597 = getelementptr inbounds float* %tmp1596, i64 1
+  %tmp1598 = getelementptr inbounds float* %tmp1597, i64 1
+  %tmp1599 = getelementptr inbounds float* %tmp1598, i64 1
+  %tmp1600 = getelementptr inbounds float* %tmp1599, i64 1
+  %tmp1601 = getelementptr inbounds float* %tmp1600, i64 1
+  %tmp1602 = getelementptr inbounds float* %tmp1601, i64 1
+  %tmp1603 = getelementptr inbounds float* %tmp1602, i64 1
+  %tmp1604 = getelementptr inbounds float* %tmp1603, i64 1
+  %tmp1605 = getelementptr inbounds float* %tmp1604, i64 1
+  %tmp1606 = getelementptr inbounds float* %tmp1605, i64 1
+  %tmp1607 = getelementptr inbounds float* %tmp1606, i64 1
+  %tmp1608 = getelementptr inbounds float* %tmp1607, i64 1
+  %tmp1609 = getelementptr inbounds float* %tmp1608, i64 1
+  %tmp1610 = getelementptr inbounds float* %tmp1609, i64 1
+  %tmp1611 = getelementptr inbounds float* %tmp1610, i64 1
+  %tmp1612 = getelementptr inbounds float* %tmp1611, i64 1
+  %tmp1613 = getelementptr inbounds float* %tmp1612, i64 1
+  %tmp1614 = getelementptr inbounds float* %tmp1613, i64 1
+  %tmp1615 = getelementptr inbounds float* %tmp1614, i64 1
+  %tmp1616 = getelementptr inbounds float* %tmp1615, i64 1
+  %tmp1617 = getelementptr inbounds float* %tmp1616, i64 1
+  %tmp1618 = getelementptr inbounds float* %tmp1617, i64 1
+  %tmp1619 = getelementptr inbounds float* %tmp1618, i64 1
+  %tmp1620 = getelementptr inbounds float* %tmp1619, i64 1
+  %tmp1621 = getelementptr inbounds float* %tmp1620, i64 1
+  %tmp1622 = getelementptr inbounds float* %tmp1621, i64 1
+  %tmp1623 = getelementptr inbounds float* %tmp1622, i64 1
+  %tmp1624 = getelementptr inbounds float* %tmp1623, i64 1
+  %tmp1625 = getelementptr inbounds float* %tmp1624, i64 1
+  %tmp1626 = getelementptr inbounds float* %tmp1625, i64 1
+  %tmp1627 = getelementptr inbounds float* %tmp1626, i64 1
+  %tmp1628 = getelementptr inbounds float* %tmp1627, i64 1
+  %tmp1629 = getelementptr inbounds float* %tmp1628, i64 1
+  %tmp1630 = getelementptr inbounds float* %tmp1629, i64 1
+  %tmp1631 = getelementptr inbounds float* %tmp1630, i64 1
+  %tmp1632 = getelementptr inbounds float* %tmp1631, i64 1
+  %tmp1633 = getelementptr inbounds float* %tmp1632, i64 1
+  %tmp1634 = getelementptr inbounds float* %tmp1633, i64 1
+  %tmp1635 = getelementptr inbounds float* %tmp1634, i64 1
+  %tmp1636 = getelementptr inbounds float* %tmp1635, i64 1
+  %tmp1637 = getelementptr inbounds float* %tmp1636, i64 1
+  %tmp1638 = getelementptr inbounds float* %tmp1637, i64 1
+  %tmp1639 = getelementptr inbounds float* %tmp1638, i64 1
+  %tmp1640 = getelementptr inbounds float* %tmp1639, i64 1
+  %tmp1641 = getelementptr inbounds float* %tmp1640, i64 1
+  %tmp1642 = getelementptr inbounds float* %tmp1641, i64 1
+  %tmp1643 = getelementptr inbounds float* %tmp1642, i64 1
+  %tmp1644 = getelementptr inbounds float* %tmp1643, i64 1
+  %tmp1645 = getelementptr inbounds float* %tmp1644, i64 1
+  %tmp1646 = getelementptr inbounds float* %tmp1645, i64 1
+  %tmp1647 = getelementptr inbounds float* %tmp1646, i64 1
+  %tmp1648 = getelementptr inbounds float* %tmp1647, i64 1
+  %tmp1649 = getelementptr inbounds float* %tmp1648, i64 1
+  %tmp1650 = getelementptr inbounds float* %tmp1649, i64 1
+  %tmp1651 = getelementptr inbounds float* %tmp1650, i64 1
+  %tmp1652 = getelementptr inbounds float* %tmp1651, i64 1
+  %tmp1653 = getelementptr inbounds float* %tmp1652, i64 1
+  %tmp1654 = getelementptr inbounds float* %tmp1653, i64 1
+  %tmp1655 = getelementptr inbounds float* %tmp1654, i64 1
+  %tmp1656 = getelementptr inbounds float* %tmp1655, i64 1
+  %tmp1657 = getelementptr inbounds float* %tmp1656, i64 1
+  %tmp1658 = getelementptr inbounds float* %tmp1657, i64 1
+  %tmp1659 = getelementptr inbounds float* %tmp1658, i64 1
+  %tmp1660 = getelementptr inbounds float* %tmp1659, i64 1
+  %tmp1661 = getelementptr inbounds float* %tmp1660, i64 1
+  %tmp1662 = getelementptr inbounds float* %tmp1661, i64 1
+  %tmp1663 = getelementptr inbounds float* %tmp1662, i64 1
+  %tmp1664 = getelementptr inbounds float* %tmp1663, i64 1
+  %tmp1665 = getelementptr inbounds float* %tmp1664, i64 1
+  %tmp1666 = getelementptr inbounds float* %tmp1665, i64 1
+  %tmp1667 = getelementptr inbounds float* %tmp1666, i64 1
+  %tmp1668 = getelementptr inbounds float* %tmp1667, i64 1
+  %tmp1669 = getelementptr inbounds float* %tmp1668, i64 1
+  %tmp1670 = getelementptr inbounds float* %tmp1669, i64 1
+  %tmp1671 = getelementptr inbounds float* %tmp1670, i64 1
+  %tmp1672 = getelementptr inbounds float* %tmp1671, i64 1
+  %tmp1673 = getelementptr inbounds float* %tmp1672, i64 1
+  %tmp1674 = getelementptr inbounds float* %tmp1673, i64 1
+  %tmp1675 = getelementptr inbounds float* %tmp1674, i64 1
+  %tmp1676 = getelementptr inbounds float* %tmp1675, i64 1
+  %tmp1677 = getelementptr inbounds float* %tmp1676, i64 1
+  %tmp1678 = getelementptr inbounds float* %tmp1677, i64 1
+  %tmp1679 = getelementptr inbounds float* %tmp1678, i64 1
+  %tmp1680 = getelementptr inbounds float* %tmp1679, i64 1
+  %tmp1681 = getelementptr inbounds float* %tmp1680, i64 1
+  %tmp1682 = getelementptr inbounds float* %tmp1681, i64 1
+  %tmp1683 = getelementptr inbounds float* %tmp1682, i64 1
+  %tmp1684 = getelementptr inbounds float* %tmp1683, i64 1
+  %tmp1685 = getelementptr inbounds float* %tmp1684, i64 1
+  %tmp1686 = getelementptr inbounds float* %tmp1685, i64 1
+  %tmp1687 = getelementptr inbounds float* %tmp1686, i64 1
+  %tmp1688 = getelementptr inbounds float* %tmp1687, i64 1
+  %tmp1689 = getelementptr inbounds float* %tmp1688, i64 1
+  %tmp1690 = getelementptr inbounds float* %tmp1689, i64 1
+  %tmp1691 = getelementptr inbounds float* %tmp1690, i64 1
+  %tmp1692 = getelementptr inbounds float* %tmp1691, i64 1
+  %tmp1693 = getelementptr inbounds float* %tmp1692, i64 1
+  %tmp1694 = getelementptr inbounds float* %tmp1693, i64 1
+  %tmp1695 = getelementptr inbounds float* %tmp1694, i64 1
+  %tmp1696 = getelementptr inbounds float* %tmp1695, i64 1
+  %tmp1697 = getelementptr inbounds float* %tmp1696, i64 1
+  %tmp1698 = getelementptr inbounds float* %tmp1697, i64 1
+  %tmp1699 = getelementptr inbounds float* %tmp1698, i64 1
+  %tmp1700 = getelementptr inbounds float* %tmp1699, i64 1
+  %tmp1701 = getelementptr inbounds float* %tmp1700, i64 1
+  %tmp1702 = getelementptr inbounds float* %tmp1701, i64 1
+  %tmp1703 = getelementptr inbounds float* %tmp1702, i64 1
+  %tmp1704 = getelementptr inbounds float* %tmp1703, i64 1
+  %tmp1705 = getelementptr inbounds float* %tmp1704, i64 1
+  %tmp1706 = getelementptr inbounds float* %tmp1705, i64 1
+  %tmp1707 = getelementptr inbounds float* %tmp1706, i64 1
+  %tmp1708 = getelementptr inbounds float* %tmp1707, i64 1
+  %tmp1709 = getelementptr inbounds float* %tmp1708, i64 1
+  %tmp1710 = getelementptr inbounds float* %tmp1709, i64 1
+  %tmp1711 = getelementptr inbounds float* %tmp1710, i64 1
+  %tmp1712 = getelementptr inbounds float* %tmp1711, i64 1
+  %tmp1713 = getelementptr inbounds float* %tmp1712, i64 1
+  %tmp1714 = getelementptr inbounds float* %tmp1713, i64 1
+  %tmp1715 = getelementptr inbounds float* %tmp1714, i64 1
+  %tmp1716 = getelementptr inbounds float* %tmp1715, i64 1
+  %tmp1717 = getelementptr inbounds float* %tmp1716, i64 1
+  %tmp1718 = getelementptr inbounds float* %tmp1717, i64 1
+  %tmp1719 = getelementptr inbounds float* %tmp1718, i64 1
+  %tmp1720 = getelementptr inbounds float* %tmp1719, i64 1
+  %tmp1721 = getelementptr inbounds float* %tmp1720, i64 1
+  %tmp1722 = getelementptr inbounds float* %tmp1721, i64 1
+  %tmp1723 = getelementptr inbounds float* %tmp1722, i64 1
+  %tmp1724 = getelementptr inbounds float* %tmp1723, i64 1
+  %tmp1725 = getelementptr inbounds float* %tmp1724, i64 1
+  %tmp1726 = getelementptr inbounds float* %tmp1725, i64 1
+  %tmp1727 = getelementptr inbounds float* %tmp1726, i64 1
+  %tmp1728 = getelementptr inbounds float* %tmp1727, i64 1
+  %tmp1729 = getelementptr inbounds float* %tmp1728, i64 1
+  %tmp1730 = getelementptr inbounds float* %tmp1729, i64 1
+  %tmp1731 = getelementptr inbounds float* %tmp1730, i64 1
+  %tmp1732 = getelementptr inbounds float* %tmp1731, i64 1
+  %tmp1733 = getelementptr inbounds float* %tmp1732, i64 1
+  %tmp1734 = getelementptr inbounds float* %tmp1733, i64 1
+  %tmp1735 = getelementptr inbounds float* %tmp1734, i64 1
+  %tmp1736 = getelementptr inbounds float* %tmp1735, i64 1
+  %tmp1737 = getelementptr inbounds float* %tmp1736, i64 1
+  %tmp1738 = getelementptr inbounds float* %tmp1737, i64 1
+  %tmp1739 = getelementptr inbounds float* %tmp1738, i64 1
+  %tmp1740 = getelementptr inbounds float* %tmp1739, i64 1
+  %tmp1741 = getelementptr inbounds float* %tmp1740, i64 1
+  %tmp1742 = getelementptr inbounds float* %tmp1741, i64 1
+  %tmp1743 = getelementptr inbounds float* %tmp1742, i64 1
+  %tmp1744 = getelementptr inbounds float* %tmp1743, i64 1
+  %tmp1745 = getelementptr inbounds float* %tmp1744, i64 1
+  %tmp1746 = getelementptr inbounds float* %tmp1745, i64 1
+  %tmp1747 = getelementptr inbounds float* %tmp1746, i64 1
+  %tmp1748 = getelementptr inbounds float* %tmp1747, i64 1
+  %tmp1749 = getelementptr inbounds float* %tmp1748, i64 1
+  %tmp1750 = getelementptr inbounds float* %tmp1749, i64 1
+  %tmp1751 = getelementptr inbounds float* %tmp1750, i64 1
+  %tmp1752 = getelementptr inbounds float* %tmp1751, i64 1
+  %tmp1753 = getelementptr inbounds float* %tmp1752, i64 1
+  %tmp1754 = getelementptr inbounds float* %tmp1753, i64 1
+  %tmp1755 = getelementptr inbounds float* %tmp1754, i64 1
+  %tmp1756 = getelementptr inbounds float* %tmp1755, i64 1
+  %tmp1757 = getelementptr inbounds float* %tmp1756, i64 1
+  %tmp1758 = getelementptr inbounds float* %tmp1757, i64 1
+  %tmp1759 = getelementptr inbounds float* %tmp1758, i64 1
+  %tmp1760 = getelementptr inbounds float* %tmp1759, i64 1
+  %tmp1761 = getelementptr inbounds float* %tmp1760, i64 1
+  %tmp1762 = getelementptr inbounds float* %tmp1761, i64 1
+  %tmp1763 = getelementptr inbounds float* %tmp1762, i64 1
+  %tmp1764 = getelementptr inbounds float* %tmp1763, i64 1
+  %tmp1765 = getelementptr inbounds float* %tmp1764, i64 1
+  %tmp1766 = getelementptr inbounds float* %tmp1765, i64 1
+  %tmp1767 = getelementptr inbounds float* %tmp1766, i64 1
+  %tmp1768 = getelementptr inbounds float* %tmp1767, i64 1
+  %tmp1769 = getelementptr inbounds float* %tmp1768, i64 1
+  %tmp1770 = getelementptr inbounds float* %tmp1769, i64 1
+  %tmp1771 = getelementptr inbounds float* %tmp1770, i64 1
+  %tmp1772 = getelementptr inbounds float* %tmp1771, i64 1
+  %tmp1773 = getelementptr inbounds float* %tmp1772, i64 1
+  %tmp1774 = getelementptr inbounds float* %tmp1773, i64 1
+  %tmp1775 = getelementptr inbounds float* %tmp1774, i64 1
+  %tmp1776 = getelementptr inbounds float* %tmp1775, i64 1
+  %tmp1777 = getelementptr inbounds float* %tmp1776, i64 1
+  %tmp1778 = getelementptr inbounds float* %tmp1777, i64 1
+  %tmp1779 = getelementptr inbounds float* %tmp1778, i64 1
+  %tmp1780 = getelementptr inbounds float* %tmp1779, i64 1
+  %tmp1781 = getelementptr inbounds float* %tmp1780, i64 1
+  %tmp1782 = getelementptr inbounds float* %tmp1781, i64 1
+  %tmp1783 = getelementptr inbounds float* %tmp1782, i64 1
+  %tmp1784 = getelementptr inbounds float* %tmp1783, i64 1
+  %tmp1785 = getelementptr inbounds float* %tmp1784, i64 1
+  %tmp1786 = getelementptr inbounds float* %tmp1785, i64 1
+  %tmp1787 = getelementptr inbounds float* %tmp1786, i64 1
+  %tmp1788 = getelementptr inbounds float* %tmp1787, i64 1
+  %tmp1789 = getelementptr inbounds float* %tmp1788, i64 1
+  %tmp1790 = getelementptr inbounds float* %tmp1789, i64 1
+  %tmp1791 = getelementptr inbounds float* %tmp1790, i64 1
+  %tmp1792 = getelementptr inbounds float* %tmp1791, i64 1
+  %tmp1793 = getelementptr inbounds float* %tmp1792, i64 1
+  %tmp1794 = getelementptr inbounds float* %tmp1793, i64 1
+  %tmp1795 = getelementptr inbounds float* %tmp1794, i64 1
+  %tmp1796 = getelementptr inbounds float* %tmp1795, i64 1
+  %tmp1797 = getelementptr inbounds float* %tmp1796, i64 1
+  %tmp1798 = getelementptr inbounds float* %tmp1797, i64 1
+  %tmp1799 = getelementptr inbounds float* %tmp1798, i64 1
+  %tmp1800 = getelementptr inbounds float* %tmp1799, i64 1
+  %tmp1801 = getelementptr inbounds float* %tmp1800, i64 1
+  %tmp1802 = getelementptr inbounds float* %tmp1801, i64 1
+  %tmp1803 = getelementptr inbounds float* %tmp1802, i64 1
+  %tmp1804 = getelementptr inbounds float* %tmp1803, i64 1
+  %tmp1805 = getelementptr inbounds float* %tmp1804, i64 1
+  %tmp1806 = getelementptr inbounds float* %tmp1805, i64 1
+  %tmp1807 = getelementptr inbounds float* %tmp1806, i64 1
+  %tmp1808 = getelementptr inbounds float* %tmp1807, i64 1
+  %tmp1809 = getelementptr inbounds float* %tmp1808, i64 1
+  %tmp1810 = getelementptr inbounds float* %tmp1809, i64 1
+  %tmp1811 = getelementptr inbounds float* %tmp1810, i64 1
+  %tmp1812 = getelementptr inbounds float* %tmp1811, i64 1
+  %tmp1813 = getelementptr inbounds float* %tmp1812, i64 1
+  %tmp1814 = getelementptr inbounds float* %tmp1813, i64 1
+  %tmp1815 = getelementptr inbounds float* %tmp1814, i64 1
+  %tmp1816 = getelementptr inbounds float* %tmp1815, i64 1
+  %tmp1817 = getelementptr inbounds float* %tmp1816, i64 1
+  %tmp1818 = getelementptr inbounds float* %tmp1817, i64 1
+  %tmp1819 = getelementptr inbounds float* %tmp1818, i64 1
+  %tmp1820 = getelementptr inbounds float* %tmp1819, i64 1
+  %tmp1821 = getelementptr inbounds float* %tmp1820, i64 1
+  %tmp1822 = getelementptr inbounds float* %tmp1821, i64 1
+  %tmp1823 = getelementptr inbounds float* %tmp1822, i64 1
+  %tmp1824 = getelementptr inbounds float* %tmp1823, i64 1
+  %tmp1825 = getelementptr inbounds float* %tmp1824, i64 1
+  %tmp1826 = getelementptr inbounds float* %tmp1825, i64 1
+  %tmp1827 = getelementptr inbounds float* %tmp1826, i64 1
+  %tmp1828 = getelementptr inbounds float* %tmp1827, i64 1
+  %tmp1829 = getelementptr inbounds float* %tmp1828, i64 1
+  %tmp1830 = getelementptr inbounds float* %tmp1829, i64 1
+  %tmp1831 = getelementptr inbounds float* %tmp1830, i64 1
+  %tmp1832 = getelementptr inbounds float* %tmp1831, i64 1
+  %tmp1833 = getelementptr inbounds float* %tmp1832, i64 1
+  %tmp1834 = getelementptr inbounds float* %tmp1833, i64 1
+  %tmp1835 = getelementptr inbounds float* %tmp1834, i64 1
+  %tmp1836 = getelementptr inbounds float* %tmp1835, i64 1
+  %tmp1837 = getelementptr inbounds float* %tmp1836, i64 1
+  %tmp1838 = getelementptr inbounds float* %tmp1837, i64 1
+  %tmp1839 = getelementptr inbounds float* %tmp1838, i64 1
+  %tmp1840 = getelementptr inbounds float* %tmp1839, i64 1
+  %tmp1841 = getelementptr inbounds float* %tmp1840, i64 1
+  %tmp1842 = getelementptr inbounds float* %tmp1841, i64 1
+  %tmp1843 = getelementptr inbounds float* %tmp1842, i64 1
+  %tmp1844 = getelementptr inbounds float* %tmp1843, i64 1
+  %tmp1845 = getelementptr inbounds float* %tmp1844, i64 1
+  %tmp1846 = getelementptr inbounds float* %tmp1845, i64 1
+  %tmp1847 = getelementptr inbounds float* %tmp1846, i64 1
+  %tmp1848 = getelementptr inbounds float* %tmp1847, i64 1
+  %tmp1849 = getelementptr inbounds float* %tmp1848, i64 1
+  %tmp1850 = getelementptr inbounds float* %tmp1849, i64 1
+  %tmp1851 = getelementptr inbounds float* %tmp1850, i64 1
+  %tmp1852 = getelementptr inbounds float* %tmp1851, i64 1
+  %tmp1853 = getelementptr inbounds float* %tmp1852, i64 1
+  %tmp1854 = getelementptr inbounds float* %tmp1853, i64 1
+  %tmp1855 = getelementptr inbounds float* %tmp1854, i64 1
+  %tmp1856 = getelementptr inbounds float* %tmp1855, i64 1
+  %tmp1857 = getelementptr inbounds float* %tmp1856, i64 1
+  %tmp1858 = getelementptr inbounds float* %tmp1857, i64 1
+  %tmp1859 = getelementptr inbounds float* %tmp1858, i64 1
+  %tmp1860 = getelementptr inbounds float* %tmp1859, i64 1
+  %tmp1861 = getelementptr inbounds float* %tmp1860, i64 1
+  %tmp1862 = getelementptr inbounds float* %tmp1861, i64 1
+  %tmp1863 = getelementptr inbounds float* %tmp1862, i64 1
+  %tmp1864 = getelementptr inbounds float* %tmp1863, i64 1
+  %tmp1865 = getelementptr inbounds float* %tmp1864, i64 1
+  %tmp1866 = getelementptr inbounds float* %tmp1865, i64 1
+  %tmp1867 = getelementptr inbounds float* %tmp1866, i64 1
+  %tmp1868 = getelementptr inbounds float* %tmp1867, i64 1
+  %tmp1869 = getelementptr inbounds float* %tmp1868, i64 1
+  %tmp1870 = getelementptr inbounds float* %tmp1869, i64 1
+  %tmp1871 = getelementptr inbounds float* %tmp1870, i64 1
+  %tmp1872 = getelementptr inbounds float* %tmp1871, i64 1
+  %tmp1873 = getelementptr inbounds float* %tmp1872, i64 1
+  %tmp1874 = getelementptr inbounds float* %tmp1873, i64 1
+  %tmp1875 = getelementptr inbounds float* %tmp1874, i64 1
+  %tmp1876 = getelementptr inbounds float* %tmp1875, i64 1
+  %tmp1877 = getelementptr inbounds float* %tmp1876, i64 1
+  %tmp1878 = getelementptr inbounds float* %tmp1877, i64 1
+  %tmp1879 = getelementptr inbounds float* %tmp1878, i64 1
+  %tmp1880 = getelementptr inbounds float* %tmp1879, i64 1
+  %tmp1881 = getelementptr inbounds float* %tmp1880, i64 1
+  %tmp1882 = getelementptr inbounds float* %tmp1881, i64 1
+  %tmp1883 = getelementptr inbounds float* %tmp1882, i64 1
+  %tmp1884 = getelementptr inbounds float* %tmp1883, i64 1
+  %tmp1885 = getelementptr inbounds float* %tmp1884, i64 1
+  %tmp1886 = getelementptr inbounds float* %tmp1885, i64 1
+  %tmp1887 = getelementptr inbounds float* %tmp1886, i64 1
+  %tmp1888 = getelementptr inbounds float* %tmp1887, i64 1
+  %tmp1889 = getelementptr inbounds float* %tmp1888, i64 1
+  %tmp1890 = getelementptr inbounds float* %tmp1889, i64 1
+  %tmp1891 = getelementptr inbounds float* %tmp1890, i64 1
+  %tmp1892 = getelementptr inbounds float* %tmp1891, i64 1
+  %tmp1893 = getelementptr inbounds float* %tmp1892, i64 1
+  %tmp1894 = getelementptr inbounds float* %tmp1893, i64 1
+  %tmp1895 = getelementptr inbounds float* %tmp1894, i64 1
+  %tmp1896 = getelementptr inbounds float* %tmp1895, i64 1
+  %tmp1897 = getelementptr inbounds float* %tmp1896, i64 1
+  %tmp1898 = getelementptr inbounds float* %tmp1897, i64 1
+  %tmp1899 = getelementptr inbounds float* %tmp1898, i64 1
+  %tmp1900 = getelementptr inbounds float* %tmp1899, i64 1
+  %tmp1901 = getelementptr inbounds float* %tmp1900, i64 1
+  %tmp1902 = getelementptr inbounds float* %tmp1901, i64 1
+  %tmp1903 = getelementptr inbounds float* %tmp1902, i64 1
+  %tmp1904 = getelementptr inbounds float* %tmp1903, i64 1
+  %tmp1905 = getelementptr inbounds float* %tmp1904, i64 1
+  %tmp1906 = getelementptr inbounds float* %tmp1905, i64 1
+  %tmp1907 = getelementptr inbounds float* %tmp1906, i64 1
+  %tmp1908 = getelementptr inbounds float* %tmp1907, i64 1
+  %tmp1909 = getelementptr inbounds float* %tmp1908, i64 1
+  %tmp1910 = getelementptr inbounds float* %tmp1909, i64 1
+  %tmp1911 = getelementptr inbounds float* %tmp1910, i64 1
+  %tmp1912 = getelementptr inbounds float* %tmp1911, i64 1
+  %tmp1913 = getelementptr inbounds float* %tmp1912, i64 1
+  %tmp1914 = getelementptr inbounds float* %tmp1913, i64 1
+  %tmp1915 = getelementptr inbounds float* %tmp1914, i64 1
+  %tmp1916 = getelementptr inbounds float* %tmp1915, i64 1
+  %tmp1917 = getelementptr inbounds float* %tmp1916, i64 1
+  %tmp1918 = getelementptr inbounds float* %tmp1917, i64 1
+  %tmp1919 = getelementptr inbounds float* %tmp1918, i64 1
+  %tmp1920 = getelementptr inbounds float* %tmp1919, i64 1
+  %tmp1921 = getelementptr inbounds float* %tmp1920, i64 1
+  %tmp1922 = getelementptr inbounds float* %tmp1921, i64 1
+  %tmp1923 = getelementptr inbounds float* %tmp1922, i64 1
+  %tmp1924 = getelementptr inbounds float* %tmp1923, i64 1
+  %tmp1925 = getelementptr inbounds float* %tmp1924, i64 1
+  %tmp1926 = getelementptr inbounds float* %tmp1925, i64 1
+  %tmp1927 = getelementptr inbounds float* %tmp1926, i64 1
+  %tmp1928 = getelementptr inbounds float* %tmp1927, i64 1
+  %tmp1929 = getelementptr inbounds float* %tmp1928, i64 1
+  %tmp1930 = getelementptr inbounds float* %tmp1929, i64 1
+  %tmp1931 = getelementptr inbounds float* %tmp1930, i64 1
+  %tmp1932 = getelementptr inbounds float* %tmp1931, i64 1
+  %tmp1933 = getelementptr inbounds float* %tmp1932, i64 1
+  %tmp1934 = getelementptr inbounds float* %tmp1933, i64 1
+  %tmp1935 = getelementptr inbounds float* %tmp1934, i64 1
+  %tmp1936 = getelementptr inbounds float* %tmp1935, i64 1
+  %tmp1937 = getelementptr inbounds float* %tmp1936, i64 1
+  %tmp1938 = getelementptr inbounds float* %tmp1937, i64 1
+  %tmp1939 = getelementptr inbounds float* %tmp1938, i64 1
+  %tmp1940 = getelementptr inbounds float* %tmp1939, i64 1
+  %tmp1941 = getelementptr inbounds float* %tmp1940, i64 1
+  %tmp1942 = getelementptr inbounds float* %tmp1941, i64 1
+  %tmp1943 = getelementptr inbounds float* %tmp1942, i64 1
+  %tmp1944 = getelementptr inbounds float* %tmp1943, i64 1
+  %tmp1945 = getelementptr inbounds float* %tmp1944, i64 1
+  %tmp1946 = getelementptr inbounds float* %tmp1945, i64 1
+  %tmp1947 = getelementptr inbounds float* %tmp1946, i64 1
+  %tmp1948 = getelementptr inbounds float* %tmp1947, i64 1
+  %tmp1949 = getelementptr inbounds float* %tmp1948, i64 1
+  %tmp1950 = getelementptr inbounds float* %tmp1949, i64 1
+  %tmp1951 = getelementptr inbounds float* %tmp1950, i64 1
+  %tmp1952 = getelementptr inbounds float* %tmp1951, i64 1
+  %tmp1953 = getelementptr inbounds float* %tmp1952, i64 1
+  %tmp1954 = getelementptr inbounds float* %tmp1953, i64 1
+  %tmp1955 = getelementptr inbounds float* %tmp1954, i64 1
+  %tmp1956 = getelementptr inbounds float* %tmp1955, i64 1
+  %tmp1957 = getelementptr inbounds float* %tmp1956, i64 1
+  %tmp1958 = getelementptr inbounds float* %tmp1957, i64 1
+  %tmp1959 = getelementptr inbounds float* %tmp1958, i64 1
+  %tmp1960 = getelementptr inbounds float* %tmp1959, i64 1
+  %tmp1961 = getelementptr inbounds float* %tmp1960, i64 1
+  %tmp1962 = getelementptr inbounds float* %tmp1961, i64 1
+  %tmp1963 = getelementptr inbounds float* %tmp1962, i64 1
+  %tmp1964 = getelementptr inbounds float* %tmp1963, i64 1
+  %tmp1965 = getelementptr inbounds float* %tmp1964, i64 1
+  %tmp1966 = getelementptr inbounds float* %tmp1965, i64 1
+  %tmp1967 = getelementptr inbounds float* %tmp1966, i64 1
+  %tmp1968 = getelementptr inbounds float* %tmp1967, i64 1
+  %tmp1969 = getelementptr inbounds float* %tmp1968, i64 1
+  %tmp1970 = getelementptr inbounds float* %tmp1969, i64 1
+  %tmp1971 = getelementptr inbounds float* %tmp1970, i64 1
+  %tmp1972 = getelementptr inbounds float* %tmp1971, i64 1
+  %tmp1973 = getelementptr inbounds float* %tmp1972, i64 1
+  %tmp1974 = getelementptr inbounds float* %tmp1973, i64 1
+  %tmp1975 = getelementptr inbounds float* %tmp1974, i64 1
+  %tmp1976 = getelementptr inbounds float* %tmp1975, i64 1
+  %tmp1977 = getelementptr inbounds float* %tmp1976, i64 1
+  %tmp1978 = getelementptr inbounds float* %tmp1977, i64 1
+  %tmp1979 = getelementptr inbounds float* %tmp1978, i64 1
+  %tmp1980 = getelementptr inbounds float* %tmp1979, i64 1
+  %tmp1981 = getelementptr inbounds float* %tmp1980, i64 1
+  %tmp1982 = getelementptr inbounds float* %tmp1981, i64 1
+  %tmp1983 = getelementptr inbounds float* %tmp1982, i64 1
+  %tmp1984 = getelementptr inbounds float* %tmp1983, i64 1
+  %tmp1985 = getelementptr inbounds float* %tmp1984, i64 1
+  %tmp1986 = getelementptr inbounds float* %tmp1985, i64 1
+  %tmp1987 = getelementptr inbounds float* %tmp1986, i64 1
+  %tmp1988 = getelementptr inbounds float* %tmp1987, i64 1
+  %tmp1989 = getelementptr inbounds float* %tmp1988, i64 1
+  %tmp1990 = getelementptr inbounds float* %tmp1989, i64 1
+  %tmp1991 = getelementptr inbounds float* %tmp1990, i64 1
+  %tmp1992 = getelementptr inbounds float* %tmp1991, i64 1
+  %tmp1993 = getelementptr inbounds float* %tmp1992, i64 1
+  %tmp1994 = getelementptr inbounds float* %tmp1993, i64 1
+  %tmp1995 = getelementptr inbounds float* %tmp1994, i64 1
+  %tmp1996 = getelementptr inbounds float* %tmp1995, i64 1
+  %tmp1997 = getelementptr inbounds float* %tmp1996, i64 1
+  %tmp1998 = getelementptr inbounds float* %tmp1997, i64 1
+  %tmp1999 = getelementptr inbounds float* %tmp1998, i64 1
+  %tmp2000 = getelementptr inbounds float* %tmp1999, i64 1
+  %tmp2001 = getelementptr inbounds float* %tmp2000, i64 1
+  %tmp2002 = getelementptr inbounds float* %tmp2001, i64 1
+  %tmp2003 = getelementptr inbounds float* %tmp2002, i64 1
+  %tmp2004 = getelementptr inbounds float* %tmp2003, i64 1
+  %tmp2005 = getelementptr inbounds float* %tmp2004, i64 1
+  %tmp2006 = getelementptr inbounds float* %tmp2005, i64 1
+  %tmp2007 = getelementptr inbounds float* %tmp2006, i64 1
+  %tmp2008 = getelementptr inbounds float* %tmp2007, i64 1
+  %tmp2009 = getelementptr inbounds float* %tmp2008, i64 1
+  %tmp2010 = getelementptr inbounds float* %tmp2009, i64 1
+  %tmp2011 = getelementptr inbounds float* %tmp2010, i64 1
+  %tmp2012 = getelementptr inbounds float* %tmp2011, i64 1
+  %tmp2013 = getelementptr inbounds float* %tmp2012, i64 1
+  %tmp2014 = getelementptr inbounds float* %tmp2013, i64 1
+  %tmp2015 = getelementptr inbounds float* %tmp2014, i64 1
+  %tmp2016 = getelementptr inbounds float* %tmp2015, i64 1
+  %tmp2017 = getelementptr inbounds float* %tmp2016, i64 1
+  %tmp2018 = getelementptr inbounds float* %tmp2017, i64 1
+  %tmp2019 = getelementptr inbounds float* %tmp2018, i64 1
+  %tmp2020 = getelementptr inbounds float* %tmp2019, i64 1
+  %tmp2021 = getelementptr inbounds float* %tmp2020, i64 1
+  %tmp2022 = getelementptr inbounds float* %tmp2021, i64 1
+  %tmp2023 = getelementptr inbounds float* %tmp2022, i64 1
+  %tmp2024 = getelementptr inbounds float* %tmp2023, i64 1
+  %tmp2025 = getelementptr inbounds float* %tmp2024, i64 1
+  %tmp2026 = getelementptr inbounds float* %tmp2025, i64 1
+  %tmp2027 = getelementptr inbounds float* %tmp2026, i64 1
+  %tmp2028 = getelementptr inbounds float* %tmp2027, i64 1
+  %tmp2029 = getelementptr inbounds float* %tmp2028, i64 1
+  %tmp2030 = getelementptr inbounds float* %tmp2029, i64 1
+  %tmp2031 = getelementptr inbounds float* %tmp2030, i64 1
+  %tmp2032 = getelementptr inbounds float* %tmp2031, i64 1
+  %tmp2033 = getelementptr inbounds float* %tmp2032, i64 1
+  %tmp2034 = getelementptr inbounds float* %tmp2033, i64 1
+  %tmp2035 = getelementptr inbounds float* %tmp2034, i64 1
+  %tmp2036 = getelementptr inbounds float* %tmp2035, i64 1
+  %tmp2037 = getelementptr inbounds float* %tmp2036, i64 1
+  %tmp2038 = getelementptr inbounds float* %tmp2037, i64 1
+  %tmp2039 = getelementptr inbounds float* %tmp2038, i64 1
+  %tmp2040 = getelementptr inbounds float* %tmp2039, i64 1
+  %tmp2041 = getelementptr inbounds float* %tmp2040, i64 1
+  %tmp2042 = getelementptr inbounds float* %tmp2041, i64 1
+  %tmp2043 = getelementptr inbounds float* %tmp2042, i64 1
+  %tmp2044 = getelementptr inbounds float* %tmp2043, i64 1
+  %tmp2045 = getelementptr inbounds float* %tmp2044, i64 1
+  %tmp2046 = getelementptr inbounds float* %tmp2045, i64 1
+  %tmp2047 = getelementptr inbounds float* %tmp2046, i64 1
+  %tmp2048 = getelementptr inbounds float* %tmp2047, i64 1
+  %tmp2049 = getelementptr inbounds float* %tmp2048, i64 1
+  %tmp2050 = getelementptr inbounds float* %tmp2049, i64 1
+  %tmp2051 = getelementptr inbounds float* %tmp2050, i64 1
+  %tmp2052 = getelementptr inbounds float* %tmp2051, i64 1
+  %tmp2053 = getelementptr inbounds float* %tmp2052, i64 1
+  %tmp2054 = getelementptr inbounds float* %tmp2053, i64 1
+  %tmp2055 = getelementptr inbounds float* %tmp2054, i64 1
+  %tmp2056 = getelementptr inbounds float* %tmp2055, i64 1
+  %tmp2057 = getelementptr inbounds float* %tmp2056, i64 1
+  %tmp2058 = getelementptr inbounds float* %tmp2057, i64 1
+  %tmp2059 = getelementptr inbounds float* %tmp2058, i64 1
+  %tmp2060 = getelementptr inbounds float* %tmp2059, i64 1
+  %tmp2061 = getelementptr inbounds float* %tmp2060, i64 1
+  %tmp2062 = getelementptr inbounds float* %tmp2061, i64 1
+  %tmp2063 = getelementptr inbounds float* %tmp2062, i64 1
+  %tmp2064 = getelementptr inbounds float* %tmp2063, i64 1
+  %tmp2065 = getelementptr inbounds float* %tmp2064, i64 1
+  %tmp2066 = getelementptr inbounds float* %tmp2065, i64 1
+  %tmp2067 = getelementptr inbounds float* %tmp2066, i64 1
+  %tmp2068 = getelementptr inbounds float* %tmp2067, i64 1
+  %tmp2069 = getelementptr inbounds float* %tmp2068, i64 1
+  %tmp2070 = getelementptr inbounds float* %tmp2069, i64 1
+  %tmp2071 = getelementptr inbounds float* %tmp2070, i64 1
+  %tmp2072 = getelementptr inbounds float* %tmp2071, i64 1
+  %tmp2073 = getelementptr inbounds float* %tmp2072, i64 1
+  %tmp2074 = getelementptr inbounds float* %tmp2073, i64 1
+  %tmp2075 = getelementptr inbounds float* %tmp2074, i64 1
+  %tmp2076 = getelementptr inbounds float* %tmp2075, i64 1
+  %tmp2077 = getelementptr inbounds float* %tmp2076, i64 1
+  %tmp2078 = getelementptr inbounds float* %tmp2077, i64 1
+  %tmp2079 = getelementptr inbounds float* %tmp2078, i64 1
+  %tmp2080 = getelementptr inbounds float* %tmp2079, i64 1
+  %tmp2081 = getelementptr inbounds float* %tmp2080, i64 1
+  %tmp2082 = getelementptr inbounds float* %tmp2081, i64 1
+  %tmp2083 = getelementptr inbounds float* %tmp2082, i64 1
+  %tmp2084 = getelementptr inbounds float* %tmp2083, i64 1
+  %tmp2085 = getelementptr inbounds float* %tmp2084, i64 1
+  %tmp2086 = getelementptr inbounds float* %tmp2085, i64 1
+  %tmp2087 = getelementptr inbounds float* %tmp2086, i64 1
+  %tmp2088 = getelementptr inbounds float* %tmp2087, i64 1
+  %tmp2089 = getelementptr inbounds float* %tmp2088, i64 1
+  %tmp2090 = getelementptr inbounds float* %tmp2089, i64 1
+  %tmp2091 = getelementptr inbounds float* %tmp2090, i64 1
+  %tmp2092 = getelementptr inbounds float* %tmp2091, i64 1
+  %tmp2093 = getelementptr inbounds float* %tmp2092, i64 1
+  %tmp2094 = getelementptr inbounds float* %tmp2093, i64 1
+  %tmp2095 = getelementptr inbounds float* %tmp2094, i64 1
+  %tmp2096 = getelementptr inbounds float* %tmp2095, i64 1
+  %tmp2097 = getelementptr inbounds float* %tmp2096, i64 1
+  %tmp2098 = getelementptr inbounds float* %tmp2097, i64 1
+  %tmp2099 = getelementptr inbounds float* %tmp2098, i64 1
+  %tmp2100 = getelementptr inbounds float* %tmp2099, i64 1
+  %tmp2101 = getelementptr inbounds float* %tmp2100, i64 1
+  %tmp2102 = getelementptr inbounds float* %tmp2101, i64 1
+  %tmp2103 = getelementptr inbounds float* %tmp2102, i64 1
+  %tmp2104 = getelementptr inbounds float* %tmp2103, i64 1
+  %tmp2105 = getelementptr inbounds float* %tmp2104, i64 1
+  %tmp2106 = getelementptr inbounds float* %tmp2105, i64 1
+  %tmp2107 = getelementptr inbounds float* %tmp2106, i64 1
+  %tmp2108 = getelementptr inbounds float* %tmp2107, i64 1
+  %tmp2109 = getelementptr inbounds float* %tmp2108, i64 1
+  %tmp2110 = getelementptr inbounds float* %tmp2109, i64 1
+  %tmp2111 = getelementptr inbounds float* %tmp2110, i64 1
+  %tmp2112 = getelementptr inbounds float* %tmp2111, i64 1
+  %tmp2113 = getelementptr inbounds float* %tmp2112, i64 1
+  %tmp2114 = getelementptr inbounds float* %tmp2113, i64 1
+  %tmp2115 = getelementptr inbounds float* %tmp2114, i64 1
+  %tmp2116 = getelementptr inbounds float* %tmp2115, i64 1
+  %tmp2117 = getelementptr inbounds float* %tmp2116, i64 1
+  %tmp2118 = getelementptr inbounds float* %tmp2117, i64 1
+  %tmp2119 = getelementptr inbounds float* %tmp2118, i64 1
+  %tmp2120 = getelementptr inbounds float* %tmp2119, i64 1
+  %tmp2121 = getelementptr inbounds float* %tmp2120, i64 1
+  %tmp2122 = getelementptr inbounds float* %tmp2121, i64 1
+  %tmp2123 = getelementptr inbounds float* %tmp2122, i64 1
+  %tmp2124 = getelementptr inbounds float* %tmp2123, i64 1
+  %tmp2125 = getelementptr inbounds float* %tmp2124, i64 1
+  %tmp2126 = getelementptr inbounds float* %tmp2125, i64 1
+  %tmp2127 = getelementptr inbounds float* %tmp2126, i64 1
+  %tmp2128 = getelementptr inbounds float* %tmp2127, i64 1
+  %tmp2129 = getelementptr inbounds float* %tmp2128, i64 1
+  %tmp2130 = getelementptr inbounds float* %tmp2129, i64 1
+  %tmp2131 = getelementptr inbounds float* %tmp2130, i64 1
+  %tmp2132 = getelementptr inbounds float* %tmp2131, i64 1
+  %tmp2133 = getelementptr inbounds float* %tmp2132, i64 1
+  %tmp2134 = getelementptr inbounds float* %tmp2133, i64 1
+  %tmp2135 = getelementptr inbounds float* %tmp2134, i64 1
+  %tmp2136 = getelementptr inbounds float* %tmp2135, i64 1
+  %tmp2137 = getelementptr inbounds float* %tmp2136, i64 1
+  %tmp2138 = getelementptr inbounds float* %tmp2137, i64 1
+  %tmp2139 = getelementptr inbounds float* %tmp2138, i64 1
+  %tmp2140 = getelementptr inbounds float* %tmp2139, i64 1
+  %tmp2141 = getelementptr inbounds float* %tmp2140, i64 1
+  %tmp2142 = getelementptr inbounds float* %tmp2141, i64 1
+  %tmp2143 = getelementptr inbounds float* %tmp2142, i64 1
+  %tmp2144 = getelementptr inbounds float* %tmp2143, i64 1
+  %tmp2145 = getelementptr inbounds float* %tmp2144, i64 1
+  %tmp2146 = getelementptr inbounds float* %tmp2145, i64 1
+  %tmp2147 = getelementptr inbounds float* %tmp2146, i64 1
+  %tmp2148 = getelementptr inbounds float* %tmp2147, i64 1
+  %tmp2149 = getelementptr inbounds float* %tmp2148, i64 1
+  %tmp2150 = getelementptr inbounds float* %tmp2149, i64 1
+  %tmp2151 = getelementptr inbounds float* %tmp2150, i64 1
+  %tmp2152 = getelementptr inbounds float* %tmp2151, i64 1
+  %tmp2153 = getelementptr inbounds float* %tmp2152, i64 1
+  %tmp2154 = getelementptr inbounds float* %tmp2153, i64 1
+  %tmp2155 = getelementptr inbounds float* %tmp2154, i64 1
+  %tmp2156 = getelementptr inbounds float* %tmp2155, i64 1
+  %tmp2157 = getelementptr inbounds float* %tmp2156, i64 1
+  %tmp2158 = getelementptr inbounds float* %tmp2157, i64 1
+  %tmp2159 = getelementptr inbounds float* %tmp2158, i64 1
+  %tmp2160 = getelementptr inbounds float* %tmp2159, i64 1
+  %tmp2161 = getelementptr inbounds float* %tmp2160, i64 1
+  %tmp2162 = getelementptr inbounds float* %tmp2161, i64 1
+  %tmp2163 = getelementptr inbounds float* %tmp2162, i64 1
+  %tmp2164 = getelementptr inbounds float* %tmp2163, i64 1
+  %tmp2165 = getelementptr inbounds float* %tmp2164, i64 1
+  %tmp2166 = getelementptr inbounds float* %tmp2165, i64 1
+  %tmp2167 = getelementptr inbounds float* %tmp2166, i64 1
+  %tmp2168 = getelementptr inbounds float* %tmp2167, i64 1
+  %tmp2169 = getelementptr inbounds float* %tmp2168, i64 1
+  %tmp2170 = getelementptr inbounds float* %tmp2169, i64 1
+  %tmp2171 = getelementptr inbounds float* %tmp2170, i64 1
+  %tmp2172 = getelementptr inbounds float* %tmp2171, i64 1
+  %tmp2173 = getelementptr inbounds float* %tmp2172, i64 1
+  %tmp2174 = getelementptr inbounds float* %tmp2173, i64 1
+  %tmp2175 = getelementptr inbounds float* %tmp2174, i64 1
+  %tmp2176 = getelementptr inbounds float* %tmp2175, i64 1
+  %tmp2177 = getelementptr inbounds float* %tmp2176, i64 1
+  %tmp2178 = getelementptr inbounds float* %tmp2177, i64 1
+  %tmp2179 = getelementptr inbounds float* %tmp2178, i64 1
+  %tmp2180 = getelementptr inbounds float* %tmp2179, i64 1
+  %tmp2181 = getelementptr inbounds float* %tmp2180, i64 1
+  %tmp2182 = getelementptr inbounds float* %tmp2181, i64 1
+  %tmp2183 = getelementptr inbounds float* %tmp2182, i64 1
+  %tmp2184 = getelementptr inbounds float* %tmp2183, i64 1
+  %tmp2185 = getelementptr inbounds float* %tmp2184, i64 1
+  %tmp2186 = getelementptr inbounds float* %tmp2185, i64 1
+  %tmp2187 = getelementptr inbounds float* %tmp2186, i64 1
+  %tmp2188 = getelementptr inbounds float* %tmp2187, i64 1
+  %tmp2189 = getelementptr inbounds float* %tmp2188, i64 1
+  %tmp2190 = getelementptr inbounds float* %tmp2189, i64 1
+  %tmp2191 = getelementptr inbounds float* %tmp2190, i64 1
+  %tmp2192 = getelementptr inbounds float* %tmp2191, i64 1
+  %tmp2193 = getelementptr inbounds float* %tmp2192, i64 1
+  %tmp2194 = getelementptr inbounds float* %tmp2193, i64 1
+  %tmp2195 = getelementptr inbounds float* %tmp2194, i64 1
+  %tmp2196 = getelementptr inbounds float* %tmp2195, i64 1
+  %tmp2197 = getelementptr inbounds float* %tmp2196, i64 1
+  %tmp2198 = getelementptr inbounds float* %tmp2197, i64 1
+  %tmp2199 = getelementptr inbounds float* %tmp2198, i64 1
+  %tmp2200 = getelementptr inbounds float* %tmp2199, i64 1
+  %tmp2201 = getelementptr inbounds float* %tmp2200, i64 1
+  %tmp2202 = getelementptr inbounds float* %tmp2201, i64 1
+  %tmp2203 = getelementptr inbounds float* %tmp2202, i64 1
+  %tmp2204 = getelementptr inbounds float* %tmp2203, i64 1
+  %tmp2205 = getelementptr inbounds float* %tmp2204, i64 1
+  %tmp2206 = getelementptr inbounds float* %tmp2205, i64 1
+  %tmp2207 = getelementptr inbounds float* %tmp2206, i64 1
+  %tmp2208 = getelementptr inbounds float* %tmp2207, i64 1
+  %tmp2209 = getelementptr inbounds float* %tmp2208, i64 1
+  %tmp2210 = getelementptr inbounds float* %tmp2209, i64 1
+  %tmp2211 = getelementptr inbounds float* %tmp2210, i64 1
+  %tmp2212 = getelementptr inbounds float* %tmp2211, i64 1
+  %tmp2213 = getelementptr inbounds float* %tmp2212, i64 1
+  %tmp2214 = getelementptr inbounds float* %tmp2213, i64 1
+  %tmp2215 = getelementptr inbounds float* %tmp2214, i64 1
+  %tmp2216 = getelementptr inbounds float* %tmp2215, i64 1
+  %tmp2217 = getelementptr inbounds float* %tmp2216, i64 1
+  %tmp2218 = getelementptr inbounds float* %tmp2217, i64 1
+  %tmp2219 = getelementptr inbounds float* %tmp2218, i64 1
+  %tmp2220 = getelementptr inbounds float* %tmp2219, i64 1
+  %tmp2221 = getelementptr inbounds float* %tmp2220, i64 1
+  %tmp2222 = getelementptr inbounds float* %tmp2221, i64 1
+  %tmp2223 = getelementptr inbounds float* %tmp2222, i64 1
+  %tmp2224 = getelementptr inbounds float* %tmp2223, i64 1
+  %tmp2225 = getelementptr inbounds float* %tmp2224, i64 1
+  %tmp2226 = getelementptr inbounds float* %tmp2225, i64 1
+  %tmp2227 = getelementptr inbounds float* %tmp2226, i64 1
+  %tmp2228 = getelementptr inbounds float* %tmp2227, i64 1
+  %tmp2229 = getelementptr inbounds float* %tmp2228, i64 1
+  %tmp2230 = getelementptr inbounds float* %tmp2229, i64 1
+  %tmp2231 = getelementptr inbounds float* %tmp2230, i64 1
+  %tmp2232 = getelementptr inbounds float* %tmp2231, i64 1
+  %tmp2233 = getelementptr inbounds float* %tmp2232, i64 1
+  %tmp2234 = getelementptr inbounds float* %tmp2233, i64 1
+  %tmp2235 = getelementptr inbounds float* %tmp2234, i64 1
+  %tmp2236 = getelementptr inbounds float* %tmp2235, i64 1
+  %tmp2237 = getelementptr inbounds float* %tmp2236, i64 1
+  %tmp2238 = getelementptr inbounds float* %tmp2237, i64 1
+  %tmp2239 = getelementptr inbounds float* %tmp2238, i64 1
+  %tmp2240 = getelementptr inbounds float* %tmp2239, i64 1
+  %tmp2241 = getelementptr inbounds float* %tmp2240, i64 1
+  %tmp2242 = getelementptr inbounds float* %tmp2241, i64 1
+  %tmp2243 = getelementptr inbounds float* %tmp2242, i64 1
+  %tmp2244 = getelementptr inbounds float* %tmp2243, i64 1
+  %tmp2245 = getelementptr inbounds float* %tmp2244, i64 1
+  %tmp2246 = getelementptr inbounds float* %tmp2245, i64 1
+  %tmp2247 = getelementptr inbounds float* %tmp2246, i64 1
+  %tmp2248 = getelementptr inbounds float* %tmp2247, i64 1
+  %tmp2249 = getelementptr inbounds float* %tmp2248, i64 1
+  %tmp2250 = getelementptr inbounds float* %tmp2249, i64 1
+  %tmp2251 = getelementptr inbounds float* %tmp2250, i64 1
+  %tmp2252 = getelementptr inbounds float* %tmp2251, i64 1
+  %tmp2253 = getelementptr inbounds float* %tmp2252, i64 1
+  %tmp2254 = getelementptr inbounds float* %tmp2253, i64 1
+  %tmp2255 = getelementptr inbounds float* %tmp2254, i64 1
+  %tmp2256 = getelementptr inbounds float* %tmp2255, i64 1
+  %tmp2257 = getelementptr inbounds float* %tmp2256, i64 1
+  %tmp2258 = getelementptr inbounds float* %tmp2257, i64 1
+  %tmp2259 = getelementptr inbounds float* %tmp2258, i64 1
+  %tmp2260 = getelementptr inbounds float* %tmp2259, i64 1
+  %tmp2261 = getelementptr inbounds float* %tmp2260, i64 1
+  %tmp2262 = getelementptr inbounds float* %tmp2261, i64 1
+  %tmp2263 = getelementptr inbounds float* %tmp2262, i64 1
+  %tmp2264 = getelementptr inbounds float* %tmp2263, i64 1
+  %tmp2265 = getelementptr inbounds float* %tmp2264, i64 1
+  %tmp2266 = getelementptr inbounds float* %tmp2265, i64 1
+  %tmp2267 = getelementptr inbounds float* %tmp2266, i64 1
+  %tmp2268 = getelementptr inbounds float* %tmp2267, i64 1
+  %tmp2269 = getelementptr inbounds float* %tmp2268, i64 1
+  %tmp2270 = getelementptr inbounds float* %tmp2269, i64 1
+  %tmp2271 = getelementptr inbounds float* %tmp2270, i64 1
+  %tmp2272 = getelementptr inbounds float* %tmp2271, i64 1
+  %tmp2273 = getelementptr inbounds float* %tmp2272, i64 1
+  %tmp2274 = getelementptr inbounds float* %tmp2273, i64 1
+  %tmp2275 = getelementptr inbounds float* %tmp2274, i64 1
+  %tmp2276 = getelementptr inbounds float* %tmp2275, i64 1
+  %tmp2277 = getelementptr inbounds float* %tmp2276, i64 1
+  %tmp2278 = getelementptr inbounds float* %tmp2277, i64 1
+  %tmp2279 = getelementptr inbounds float* %tmp2278, i64 1
+  %tmp2280 = getelementptr inbounds float* %tmp2279, i64 1
+  %tmp2281 = getelementptr inbounds float* %tmp2280, i64 1
+  %tmp2282 = getelementptr inbounds float* %tmp2281, i64 1
+  %tmp2283 = getelementptr inbounds float* %tmp2282, i64 1
+  %tmp2284 = getelementptr inbounds float* %tmp2283, i64 1
+  %tmp2285 = getelementptr inbounds float* %tmp2284, i64 1
+  %tmp2286 = getelementptr inbounds float* %tmp2285, i64 1
+  %tmp2287 = getelementptr inbounds float* %tmp2286, i64 1
+  %tmp2288 = getelementptr inbounds float* %tmp2287, i64 1
+  %tmp2289 = getelementptr inbounds float* %tmp2288, i64 1
+  %tmp2290 = getelementptr inbounds float* %tmp2289, i64 1
+  %tmp2291 = getelementptr inbounds float* %tmp2290, i64 1
+  %tmp2292 = getelementptr inbounds float* %tmp2291, i64 1
+  %tmp2293 = getelementptr inbounds float* %tmp2292, i64 1
+  %tmp2294 = getelementptr inbounds float* %tmp2293, i64 1
+  %tmp2295 = getelementptr inbounds float* %tmp2294, i64 1
+  %tmp2296 = getelementptr inbounds float* %tmp2295, i64 1
+  %tmp2297 = getelementptr inbounds float* %tmp2296, i64 1
+  %tmp2298 = getelementptr inbounds float* %tmp2297, i64 1
+  %tmp2299 = getelementptr inbounds float* %tmp2298, i64 1
+  %tmp2300 = getelementptr inbounds float* %tmp2299, i64 1
+  %tmp2301 = getelementptr inbounds float* %tmp2300, i64 1
+  %tmp2302 = getelementptr inbounds float* %tmp2301, i64 1
+  %tmp2303 = getelementptr inbounds float* %tmp2302, i64 1
+  %tmp2304 = getelementptr inbounds float* %tmp2303, i64 1
+  %tmp2305 = getelementptr inbounds float* %tmp2304, i64 1
+  %tmp2306 = getelementptr inbounds float* %tmp2305, i64 1
+  %tmp2307 = getelementptr inbounds float* %tmp2306, i64 1
+  %tmp2308 = getelementptr inbounds float* %tmp2307, i64 1
+  %tmp2309 = getelementptr inbounds float* %tmp2308, i64 1
+  %tmp2310 = getelementptr inbounds float* %tmp2309, i64 1
+  %tmp2311 = getelementptr inbounds float* %tmp2310, i64 1
+  %tmp2312 = getelementptr inbounds float* %tmp2311, i64 1
+  %tmp2313 = getelementptr inbounds float* %tmp2312, i64 1
+  %tmp2314 = getelementptr inbounds float* %tmp2313, i64 1
+  %tmp2315 = getelementptr inbounds float* %tmp2314, i64 1
+  %tmp2316 = getelementptr inbounds float* %tmp2315, i64 1
+  %tmp2317 = getelementptr inbounds float* %tmp2316, i64 1
+  %tmp2318 = getelementptr inbounds float* %tmp2317, i64 1
+  %tmp2319 = getelementptr inbounds float* %tmp2318, i64 1
+  %tmp2320 = getelementptr inbounds float* %tmp2319, i64 1
+  %tmp2321 = getelementptr inbounds float* %tmp2320, i64 1
+  %tmp2322 = getelementptr inbounds float* %tmp2321, i64 1
+  %tmp2323 = getelementptr inbounds float* %tmp2322, i64 1
+  %tmp2324 = getelementptr inbounds float* %tmp2323, i64 1
+  %tmp2325 = getelementptr inbounds float* %tmp2324, i64 1
+  %tmp2326 = getelementptr inbounds float* %tmp2325, i64 1
+  %tmp2327 = getelementptr inbounds float* %tmp2326, i64 1
+  %tmp2328 = getelementptr inbounds float* %tmp2327, i64 1
+  %tmp2329 = getelementptr inbounds float* %tmp2328, i64 1
+  %tmp2330 = getelementptr inbounds float* %tmp2329, i64 1
+  %tmp2331 = getelementptr inbounds float* %tmp2330, i64 1
+  %tmp2332 = getelementptr inbounds float* %tmp2331, i64 1
+  %tmp2333 = getelementptr inbounds float* %tmp2332, i64 1
+  %tmp2334 = getelementptr inbounds float* %tmp2333, i64 1
+  %tmp2335 = getelementptr inbounds float* %tmp2334, i64 1
+  %tmp2336 = getelementptr inbounds float* %tmp2335, i64 1
+  %tmp2337 = getelementptr inbounds float* %tmp2336, i64 1
+  %tmp2338 = getelementptr inbounds float* %tmp2337, i64 1
+  %tmp2339 = getelementptr inbounds float* %tmp2338, i64 1
+  %tmp2340 = getelementptr inbounds float* %tmp2339, i64 1
+  %tmp2341 = getelementptr inbounds float* %tmp2340, i64 1
+  %tmp2342 = getelementptr inbounds float* %tmp2341, i64 1
+  %tmp2343 = getelementptr inbounds float* %tmp2342, i64 1
+  %tmp2344 = getelementptr inbounds float* %tmp2343, i64 1
+  %tmp2345 = getelementptr inbounds float* %tmp2344, i64 1
+  %tmp2346 = getelementptr inbounds float* %tmp2345, i64 1
+  %tmp2347 = getelementptr inbounds float* %tmp2346, i64 1
+  %tmp2348 = getelementptr inbounds float* %tmp2347, i64 1
+  %tmp2349 = getelementptr inbounds float* %tmp2348, i64 1
+  %tmp2350 = getelementptr inbounds float* %tmp2349, i64 1
+  %tmp2351 = getelementptr inbounds float* %tmp2350, i64 1
+  %tmp2352 = getelementptr inbounds float* %tmp2351, i64 1
+  %tmp2353 = getelementptr inbounds float* %tmp2352, i64 1
+  %tmp2354 = getelementptr inbounds float* %tmp2353, i64 1
+  %tmp2355 = getelementptr inbounds float* %tmp2354, i64 1
+  %tmp2356 = getelementptr inbounds float* %tmp2355, i64 1
+  %tmp2357 = getelementptr inbounds float* %tmp2356, i64 1
+  %tmp2358 = getelementptr inbounds float* %tmp2357, i64 1
+  %tmp2359 = getelementptr inbounds float* %tmp2358, i64 1
+  %tmp2360 = getelementptr inbounds float* %tmp2359, i64 1
+  %tmp2361 = getelementptr inbounds float* %tmp2360, i64 1
+  %tmp2362 = getelementptr inbounds float* %tmp2361, i64 1
+  %tmp2363 = getelementptr inbounds float* %tmp2362, i64 1
+  %tmp2364 = getelementptr inbounds float* %tmp2363, i64 1
+  %tmp2365 = getelementptr inbounds float* %tmp2364, i64 1
+  %tmp2366 = getelementptr inbounds float* %tmp2365, i64 1
+  %tmp2367 = getelementptr inbounds float* %tmp2366, i64 1
+  %tmp2368 = getelementptr inbounds float* %tmp2367, i64 1
+  %tmp2369 = getelementptr inbounds float* %tmp2368, i64 1
+  %tmp2370 = getelementptr inbounds float* %tmp2369, i64 1
+  %tmp2371 = getelementptr inbounds float* %tmp2370, i64 1
+  %tmp2372 = getelementptr inbounds float* %tmp2371, i64 1
+  %tmp2373 = getelementptr inbounds float* %tmp2372, i64 1
+  %tmp2374 = getelementptr inbounds float* %tmp2373, i64 1
+  %tmp2375 = getelementptr inbounds float* %tmp2374, i64 1
+  %tmp2376 = getelementptr inbounds float* %tmp2375, i64 1
+  %tmp2377 = getelementptr inbounds float* %tmp2376, i64 1
+  %tmp2378 = getelementptr inbounds float* %tmp2377, i64 1
+  %tmp2379 = getelementptr inbounds float* %tmp2378, i64 1
+  %tmp2380 = getelementptr inbounds float* %tmp2379, i64 1
+  %tmp2381 = getelementptr inbounds float* %tmp2380, i64 1
+  %tmp2382 = getelementptr inbounds float* %tmp2381, i64 1
+  %tmp2383 = getelementptr inbounds float* %tmp2382, i64 1
+  %tmp2384 = getelementptr inbounds float* %tmp2383, i64 1
+  %tmp2385 = getelementptr inbounds float* %tmp2384, i64 1
+  %tmp2386 = getelementptr inbounds float* %tmp2385, i64 1
+  %tmp2387 = getelementptr inbounds float* %tmp2386, i64 1
+  %tmp2388 = getelementptr inbounds float* %tmp2387, i64 1
+  %tmp2389 = getelementptr inbounds float* %tmp2388, i64 1
+  %tmp2390 = getelementptr inbounds float* %tmp2389, i64 1
+  %tmp2391 = getelementptr inbounds float* %tmp2390, i64 1
+  %tmp2392 = getelementptr inbounds float* %tmp2391, i64 1
+  %tmp2393 = getelementptr inbounds float* %tmp2392, i64 1
+  %tmp2394 = getelementptr inbounds float* %tmp2393, i64 1
+  %tmp2395 = getelementptr inbounds float* %tmp2394, i64 1
+  %tmp2396 = getelementptr inbounds float* %tmp2395, i64 1
+  %tmp2397 = getelementptr inbounds float* %tmp2396, i64 1
+  %tmp2398 = getelementptr inbounds float* %tmp2397, i64 1
+  %tmp2399 = getelementptr inbounds float* %tmp2398, i64 1
+  %tmp2400 = getelementptr inbounds float* %tmp2399, i64 1
+  %tmp2401 = getelementptr inbounds float* %tmp2400, i64 1
+  %tmp2402 = getelementptr inbounds float* %tmp2401, i64 1
+  %tmp2403 = getelementptr inbounds float* %tmp2402, i64 1
+  %tmp2404 = getelementptr inbounds float* %tmp2403, i64 1
+  %tmp2405 = getelementptr inbounds float* %tmp2404, i64 1
+  %tmp2406 = getelementptr inbounds float* %tmp2405, i64 1
+  %tmp2407 = getelementptr inbounds float* %tmp2406, i64 1
+  %tmp2408 = getelementptr inbounds float* %tmp2407, i64 1
+  %tmp2409 = getelementptr inbounds float* %tmp2408, i64 1
+  %tmp2410 = getelementptr inbounds float* %tmp2409, i64 1
+  %tmp2411 = getelementptr inbounds float* %tmp2410, i64 1
+  %tmp2412 = getelementptr inbounds float* %tmp2411, i64 1
+  %tmp2413 = getelementptr inbounds float* %tmp2412, i64 1
+  %tmp2414 = getelementptr inbounds float* %tmp2413, i64 1
+  %tmp2415 = getelementptr inbounds float* %tmp2414, i64 1
+  %tmp2416 = getelementptr inbounds float* %tmp2415, i64 1
+  %tmp2417 = getelementptr inbounds float* %tmp2416, i64 1
+  %tmp2418 = getelementptr inbounds float* %tmp2417, i64 1
+  %tmp2419 = getelementptr inbounds float* %tmp2418, i64 1
+  %tmp2420 = getelementptr inbounds float* %tmp2419, i64 1
+  %tmp2421 = getelementptr inbounds float* %tmp2420, i64 1
+  %tmp2422 = getelementptr inbounds float* %tmp2421, i64 1
+  %tmp2423 = getelementptr inbounds float* %tmp2422, i64 1
+  %tmp2424 = getelementptr inbounds float* %tmp2423, i64 1
+  %tmp2425 = getelementptr inbounds float* %tmp2424, i64 1
+  %tmp2426 = getelementptr inbounds float* %tmp2425, i64 1
+  %tmp2427 = getelementptr inbounds float* %tmp2426, i64 1
+  %tmp2428 = getelementptr inbounds float* %tmp2427, i64 1
+  %tmp2429 = getelementptr inbounds float* %tmp2428, i64 1
+  %tmp2430 = getelementptr inbounds float* %tmp2429, i64 1
+  %tmp2431 = getelementptr inbounds float* %tmp2430, i64 1
+  %tmp2432 = getelementptr inbounds float* %tmp2431, i64 1
+  %tmp2433 = getelementptr inbounds float* %tmp2432, i64 1
+  %tmp2434 = getelementptr inbounds float* %tmp2433, i64 1
+  %tmp2435 = getelementptr inbounds float* %tmp2434, i64 1
+  %tmp2436 = getelementptr inbounds float* %tmp2435, i64 1
+  %tmp2437 = getelementptr inbounds float* %tmp2436, i64 1
+  %tmp2438 = getelementptr inbounds float* %tmp2437, i64 1
+  %tmp2439 = getelementptr inbounds float* %tmp2438, i64 1
+  %tmp2440 = getelementptr inbounds float* %tmp2439, i64 1
+  %tmp2441 = getelementptr inbounds float* %tmp2440, i64 1
+  %tmp2442 = getelementptr inbounds float* %tmp2441, i64 1
+  %tmp2443 = getelementptr inbounds float* %tmp2442, i64 1
+  %tmp2444 = getelementptr inbounds float* %tmp2443, i64 1
+  %tmp2445 = getelementptr inbounds float* %tmp2444, i64 1
+  %tmp2446 = getelementptr inbounds float* %tmp2445, i64 1
+  %tmp2447 = getelementptr inbounds float* %tmp2446, i64 1
+  %tmp2448 = getelementptr inbounds float* %tmp2447, i64 1
+  %tmp2449 = getelementptr inbounds float* %tmp2448, i64 1
+  %tmp2450 = getelementptr inbounds float* %tmp2449, i64 1
+  %tmp2451 = getelementptr inbounds float* %tmp2450, i64 1
+  %tmp2452 = getelementptr inbounds float* %tmp2451, i64 1
+  %tmp2453 = getelementptr inbounds float* %tmp2452, i64 1
+  %tmp2454 = getelementptr inbounds float* %tmp2453, i64 1
+  %tmp2455 = getelementptr inbounds float* %tmp2454, i64 1
+  %tmp2456 = getelementptr inbounds float* %tmp2455, i64 1
+  %tmp2457 = getelementptr inbounds float* %tmp2456, i64 1
+  %tmp2458 = getelementptr inbounds float* %tmp2457, i64 1
+  %tmp2459 = getelementptr inbounds float* %tmp2458, i64 1
+  %tmp2460 = getelementptr inbounds float* %tmp2459, i64 1
+  %tmp2461 = getelementptr inbounds float* %tmp2460, i64 1
+  %tmp2462 = getelementptr inbounds float* %tmp2461, i64 1
+  %tmp2463 = getelementptr inbounds float* %tmp2462, i64 1
+  %tmp2464 = getelementptr inbounds float* %tmp2463, i64 1
+  %tmp2465 = getelementptr inbounds float* %tmp2464, i64 1
+  %tmp2466 = getelementptr inbounds float* %tmp2465, i64 1
+  %tmp2467 = getelementptr inbounds float* %tmp2466, i64 1
+  %tmp2468 = getelementptr inbounds float* %tmp2467, i64 1
+  %tmp2469 = getelementptr inbounds float* %tmp2468, i64 1
+  %tmp2470 = getelementptr inbounds float* %tmp2469, i64 1
+  %tmp2471 = getelementptr inbounds float* %tmp2470, i64 1
+  %tmp2472 = getelementptr inbounds float* %tmp2471, i64 1
+  %tmp2473 = getelementptr inbounds float* %tmp2472, i64 1
+  %tmp2474 = getelementptr inbounds float* %tmp2473, i64 1
+  %tmp2475 = getelementptr inbounds float* %tmp2474, i64 1
+  %tmp2476 = getelementptr inbounds float* %tmp2475, i64 1
+  %tmp2477 = getelementptr inbounds float* %tmp2476, i64 1
+  %tmp2478 = getelementptr inbounds float* %tmp2477, i64 1
+  %tmp2479 = getelementptr inbounds float* %tmp2478, i64 1
+  %tmp2480 = getelementptr inbounds float* %tmp2479, i64 1
+  %tmp2481 = getelementptr inbounds float* %tmp2480, i64 1
+  %tmp2482 = getelementptr inbounds float* %tmp2481, i64 1
+  %tmp2483 = getelementptr inbounds float* %tmp2482, i64 1
+  %tmp2484 = getelementptr inbounds float* %tmp2483, i64 1
+  %tmp2485 = getelementptr inbounds float* %tmp2484, i64 1
+  %tmp2486 = getelementptr inbounds float* %tmp2485, i64 1
+  %tmp2487 = getelementptr inbounds float* %tmp2486, i64 1
+  %tmp2488 = getelementptr inbounds float* %tmp2487, i64 1
+  %tmp2489 = getelementptr inbounds float* %tmp2488, i64 1
+  %tmp2490 = getelementptr inbounds float* %tmp2489, i64 1
+  %tmp2491 = getelementptr inbounds float* %tmp2490, i64 1
+  %tmp2492 = getelementptr inbounds float* %tmp2491, i64 1
+  %tmp2493 = getelementptr inbounds float* %tmp2492, i64 1
+  %tmp2494 = getelementptr inbounds float* %tmp2493, i64 1
+  %tmp2495 = getelementptr inbounds float* %tmp2494, i64 1
+  %tmp2496 = getelementptr inbounds float* %tmp2495, i64 1
+  %tmp2497 = getelementptr inbounds float* %tmp2496, i64 1
+  %tmp2498 = getelementptr inbounds float* %tmp2497, i64 1
+  %tmp2499 = getelementptr inbounds float* %tmp2498, i64 1
+  %tmp2500 = getelementptr inbounds float* %tmp2499, i64 1
+  %tmp2501 = getelementptr inbounds float* %tmp2500, i64 1
+  %tmp2502 = getelementptr inbounds float* %tmp2501, i64 1
+  %tmp2503 = getelementptr inbounds float* %tmp2502, i64 1
+  %tmp2504 = getelementptr inbounds float* %tmp2503, i64 1
+  %tmp2505 = getelementptr inbounds float* %tmp2504, i64 1
+  %tmp2506 = getelementptr inbounds float* %tmp2505, i64 1
+  %tmp2507 = getelementptr inbounds float* %tmp2506, i64 1
+  %tmp2508 = getelementptr inbounds float* %tmp2507, i64 1
+  %tmp2509 = getelementptr inbounds float* %tmp2508, i64 1
+  %tmp2510 = getelementptr inbounds float* %tmp2509, i64 1
+  %tmp2511 = getelementptr inbounds float* %tmp2510, i64 1
+  %tmp2512 = getelementptr inbounds float* %tmp2511, i64 1
+  %tmp2513 = getelementptr inbounds float* %tmp2512, i64 1
+  %tmp2514 = getelementptr inbounds float* %tmp2513, i64 1
+  %tmp2515 = getelementptr inbounds float* %tmp2514, i64 1
+  %tmp2516 = getelementptr inbounds float* %tmp2515, i64 1
+  %tmp2517 = getelementptr inbounds float* %tmp2516, i64 1
+  %tmp2518 = getelementptr inbounds float* %tmp2517, i64 1
+  %tmp2519 = getelementptr inbounds float* %tmp2518, i64 1
+  %tmp2520 = getelementptr inbounds float* %tmp2519, i64 1
+  %tmp2521 = getelementptr inbounds float* %tmp2520, i64 1
+  %tmp2522 = getelementptr inbounds float* %tmp2521, i64 1
+  %tmp2523 = getelementptr inbounds float* %tmp2522, i64 1
+  %tmp2524 = getelementptr inbounds float* %tmp2523, i64 1
+  %tmp2525 = getelementptr inbounds float* %tmp2524, i64 1
+  %tmp2526 = getelementptr inbounds float* %tmp2525, i64 1
+  %tmp2527 = getelementptr inbounds float* %tmp2526, i64 1
+  %tmp2528 = getelementptr inbounds float* %tmp2527, i64 1
+  %tmp2529 = getelementptr inbounds float* %tmp2528, i64 1
+  %tmp2530 = getelementptr inbounds float* %tmp2529, i64 1
+  %tmp2531 = getelementptr inbounds float* %tmp2530, i64 1
+  %tmp2532 = getelementptr inbounds float* %tmp2531, i64 1
+  %tmp2533 = getelementptr inbounds float* %tmp2532, i64 1
+  %tmp2534 = getelementptr inbounds float* %tmp2533, i64 1
+  %tmp2535 = getelementptr inbounds float* %tmp2534, i64 1
+  %tmp2536 = getelementptr inbounds float* %tmp2535, i64 1
+  %tmp2537 = getelementptr inbounds float* %tmp2536, i64 1
+  %tmp2538 = getelementptr inbounds float* %tmp2537, i64 1
+  %tmp2539 = getelementptr inbounds float* %tmp2538, i64 1
+  %tmp2540 = getelementptr inbounds float* %tmp2539, i64 1
+  %tmp2541 = getelementptr inbounds float* %tmp2540, i64 1
+  %tmp2542 = getelementptr inbounds float* %tmp2541, i64 1
+  %tmp2543 = getelementptr inbounds float* %tmp2542, i64 1
+  %tmp2544 = getelementptr inbounds float* %tmp2543, i64 1
+  %tmp2545 = getelementptr inbounds float* %tmp2544, i64 1
+  %tmp2546 = getelementptr inbounds float* %tmp2545, i64 1
+  %tmp2547 = getelementptr inbounds float* %tmp2546, i64 1
+  %tmp2548 = getelementptr inbounds float* %tmp2547, i64 1
+  %tmp2549 = getelementptr inbounds float* %tmp2548, i64 1
+  %tmp2550 = getelementptr inbounds float* %tmp2549, i64 1
+  %tmp2551 = getelementptr inbounds float* %tmp2550, i64 1
+  %tmp2552 = getelementptr inbounds float* %tmp2551, i64 1
+  %tmp2553 = getelementptr inbounds float* %tmp2552, i64 1
+  %tmp2554 = getelementptr inbounds float* %tmp2553, i64 1
+  %tmp2555 = getelementptr inbounds float* %tmp2554, i64 1
+  %tmp2556 = getelementptr inbounds float* %tmp2555, i64 1
+  %tmp2557 = getelementptr inbounds float* %tmp2556, i64 1
+  %tmp2558 = getelementptr inbounds float* %tmp2557, i64 1
+  %tmp2559 = getelementptr inbounds float* %tmp2558, i64 1
+  %tmp2560 = getelementptr inbounds float* %tmp2559, i64 1
+  %tmp2561 = getelementptr inbounds float* %tmp2560, i64 1
+  %tmp2562 = getelementptr inbounds float* %tmp2561, i64 1
+  %tmp2563 = getelementptr inbounds float* %tmp2562, i64 1
+  %tmp2564 = getelementptr inbounds float* %tmp2563, i64 1
+  %tmp2565 = getelementptr inbounds float* %tmp2564, i64 1
+  %tmp2566 = getelementptr inbounds float* %tmp2565, i64 1
+  %tmp2567 = getelementptr inbounds float* %tmp2566, i64 1
+  %tmp2568 = getelementptr inbounds float* %tmp2567, i64 1
+  %tmp2569 = getelementptr inbounds float* %tmp2568, i64 1
+  %tmp2570 = getelementptr inbounds float* %tmp2569, i64 1
+  %tmp2571 = getelementptr inbounds float* %tmp2570, i64 1
+  %tmp2572 = getelementptr inbounds float* %tmp2571, i64 1
+  %tmp2573 = getelementptr inbounds float* %tmp2572, i64 1
+  %tmp2574 = getelementptr inbounds float* %tmp2573, i64 1
+  %tmp2575 = getelementptr inbounds float* %tmp2574, i64 1
+  %tmp2576 = getelementptr inbounds float* %tmp2575, i64 1
+  %tmp2577 = getelementptr inbounds float* %tmp2576, i64 1
+  %tmp2578 = getelementptr inbounds float* %tmp2577, i64 1
+  %tmp2579 = getelementptr inbounds float* %tmp2578, i64 1
+  %tmp2580 = getelementptr inbounds float* %tmp2579, i64 1
+  %tmp2581 = getelementptr inbounds float* %tmp2580, i64 1
+  %tmp2582 = getelementptr inbounds float* %tmp2581, i64 1
+  %tmp2583 = getelementptr inbounds float* %tmp2582, i64 1
+  %tmp2584 = getelementptr inbounds float* %tmp2583, i64 1
+  %tmp2585 = getelementptr inbounds float* %tmp2584, i64 1
+  %tmp2586 = getelementptr inbounds float* %tmp2585, i64 1
+  %tmp2587 = getelementptr inbounds float* %tmp2586, i64 1
+  %tmp2588 = getelementptr inbounds float* %tmp2587, i64 1
+  %tmp2589 = getelementptr inbounds float* %tmp2588, i64 1
+  %tmp2590 = getelementptr inbounds float* %tmp2589, i64 1
+  %tmp2591 = getelementptr inbounds float* %tmp2590, i64 1
+  %tmp2592 = getelementptr inbounds float* %tmp2591, i64 1
+  %tmp2593 = getelementptr inbounds float* %tmp2592, i64 1
+  %tmp2594 = getelementptr inbounds float* %tmp2593, i64 1
+  %tmp2595 = getelementptr inbounds float* %tmp2594, i64 1
+  %tmp2596 = getelementptr inbounds float* %tmp2595, i64 1
+  %tmp2597 = getelementptr inbounds float* %tmp2596, i64 1
+  %tmp2598 = getelementptr inbounds float* %tmp2597, i64 1
+  %tmp2599 = getelementptr inbounds float* %tmp2598, i64 1
+  %tmp2600 = getelementptr inbounds float* %tmp2599, i64 1
+  %tmp2601 = getelementptr inbounds float* %tmp2600, i64 1
+  %tmp2602 = getelementptr inbounds float* %tmp2601, i64 1
+  %tmp2603 = getelementptr inbounds float* %tmp2602, i64 1
+  %tmp2604 = getelementptr inbounds float* %tmp2603, i64 1
+  %tmp2605 = getelementptr inbounds float* %tmp2604, i64 1
+  %tmp2606 = getelementptr inbounds float* %tmp2605, i64 1
+  %tmp2607 = getelementptr inbounds float* %tmp2606, i64 1
+  %tmp2608 = getelementptr inbounds float* %tmp2607, i64 1
+  %tmp2609 = getelementptr inbounds float* %tmp2608, i64 1
+  %tmp2610 = getelementptr inbounds float* %tmp2609, i64 1
+  %tmp2611 = getelementptr inbounds float* %tmp2610, i64 1
+  %tmp2612 = getelementptr inbounds float* %tmp2611, i64 1
+  %tmp2613 = getelementptr inbounds float* %tmp2612, i64 1
+  %tmp2614 = getelementptr inbounds float* %tmp2613, i64 1
+  %tmp2615 = getelementptr inbounds float* %tmp2614, i64 1
+  %tmp2616 = getelementptr inbounds float* %tmp2615, i64 1
+  %tmp2617 = getelementptr inbounds float* %tmp2616, i64 1
+  %tmp2618 = getelementptr inbounds float* %tmp2617, i64 1
+  %tmp2619 = getelementptr inbounds float* %tmp2618, i64 1
+  %tmp2620 = getelementptr inbounds float* %tmp2619, i64 1
+  %tmp2621 = getelementptr inbounds float* %tmp2620, i64 1
+  %tmp2622 = getelementptr inbounds float* %tmp2621, i64 1
+  %tmp2623 = getelementptr inbounds float* %tmp2622, i64 1
+  %tmp2624 = getelementptr inbounds float* %tmp2623, i64 1
+  %tmp2625 = getelementptr inbounds float* %tmp2624, i64 1
+  %tmp2626 = getelementptr inbounds float* %tmp2625, i64 1
+  %tmp2627 = getelementptr inbounds float* %tmp2626, i64 1
+  %tmp2628 = getelementptr inbounds float* %tmp2627, i64 1
+  %tmp2629 = getelementptr inbounds float* %tmp2628, i64 1
+  %tmp2630 = getelementptr inbounds float* %tmp2629, i64 1
+  %tmp2631 = getelementptr inbounds float* %tmp2630, i64 1
+  %tmp2632 = getelementptr inbounds float* %tmp2631, i64 1
+  %tmp2633 = getelementptr inbounds float* %tmp2632, i64 1
+  %tmp2634 = getelementptr inbounds float* %tmp2633, i64 1
+  %tmp2635 = getelementptr inbounds float* %tmp2634, i64 1
+  %tmp2636 = getelementptr inbounds float* %tmp2635, i64 1
+  %tmp2637 = getelementptr inbounds float* %tmp2636, i64 1
+  %tmp2638 = getelementptr inbounds float* %tmp2637, i64 1
+  %tmp2639 = getelementptr inbounds float* %tmp2638, i64 1
+  %tmp2640 = getelementptr inbounds float* %tmp2639, i64 1
+  %tmp2641 = getelementptr inbounds float* %tmp2640, i64 1
+  %tmp2642 = getelementptr inbounds float* %tmp2641, i64 1
+  %tmp2643 = getelementptr inbounds float* %tmp2642, i64 1
+  %tmp2644 = getelementptr inbounds float* %tmp2643, i64 1
+  %tmp2645 = getelementptr inbounds float* %tmp2644, i64 1
+  %tmp2646 = getelementptr inbounds float* %tmp2645, i64 1
+  %tmp2647 = getelementptr inbounds float* %tmp2646, i64 1
+  %tmp2648 = getelementptr inbounds float* %tmp2647, i64 1
+  %tmp2649 = getelementptr inbounds float* %tmp2648, i64 1
+  %tmp2650 = getelementptr inbounds float* %tmp2649, i64 1
+  %tmp2651 = getelementptr inbounds float* %tmp2650, i64 1
+  %tmp2652 = getelementptr inbounds float* %tmp2651, i64 1
+  %tmp2653 = getelementptr inbounds float* %tmp2652, i64 1
+  %tmp2654 = getelementptr inbounds float* %tmp2653, i64 1
+  %tmp2655 = getelementptr inbounds float* %tmp2654, i64 1
+  %tmp2656 = getelementptr inbounds float* %tmp2655, i64 1
+  %tmp2657 = getelementptr inbounds float* %tmp2656, i64 1
+  %tmp2658 = getelementptr inbounds float* %tmp2657, i64 1
+  %tmp2659 = getelementptr inbounds float* %tmp2658, i64 1
+  %tmp2660 = getelementptr inbounds float* %tmp2659, i64 1
+  %tmp2661 = getelementptr inbounds float* %tmp2660, i64 1
+  %tmp2662 = getelementptr inbounds float* %tmp2661, i64 1
+  %tmp2663 = getelementptr inbounds float* %tmp2662, i64 1
+  %tmp2664 = getelementptr inbounds float* %tmp2663, i64 1
+  %tmp2665 = getelementptr inbounds float* %tmp2664, i64 1
+  %tmp2666 = getelementptr inbounds float* %tmp2665, i64 1
+  %tmp2667 = getelementptr inbounds float* %tmp2666, i64 1
+  %tmp2668 = getelementptr inbounds float* %tmp2667, i64 1
+  %tmp2669 = getelementptr inbounds float* %tmp2668, i64 1
+  %tmp2670 = getelementptr inbounds float* %tmp2669, i64 1
+  %tmp2671 = getelementptr inbounds float* %tmp2670, i64 1
+  %tmp2672 = getelementptr inbounds float* %tmp2671, i64 1
+  %tmp2673 = getelementptr inbounds float* %tmp2672, i64 1
+  %tmp2674 = getelementptr inbounds float* %tmp2673, i64 1
+  %tmp2675 = getelementptr inbounds float* %tmp2674, i64 1
+  %tmp2676 = getelementptr inbounds float* %tmp2675, i64 1
+  %tmp2677 = getelementptr inbounds float* %tmp2676, i64 1
+  %tmp2678 = getelementptr inbounds float* %tmp2677, i64 1
+  %tmp2679 = getelementptr inbounds float* %tmp2678, i64 1
+  %tmp2680 = getelementptr inbounds float* %tmp2679, i64 1
+  %tmp2681 = getelementptr inbounds float* %tmp2680, i64 1
+  %tmp2682 = getelementptr inbounds float* %tmp2681, i64 1
+  %tmp2683 = getelementptr inbounds float* %tmp2682, i64 1
+  %tmp2684 = getelementptr inbounds float* %tmp2683, i64 1
+  %tmp2685 = getelementptr inbounds float* %tmp2684, i64 1
+  %tmp2686 = getelementptr inbounds float* %tmp2685, i64 1
+  %tmp2687 = getelementptr inbounds float* %tmp2686, i64 1
+  %tmp2688 = getelementptr inbounds float* %tmp2687, i64 1
+  %tmp2689 = getelementptr inbounds float* %tmp2688, i64 1
+  %tmp2690 = getelementptr inbounds float* %tmp2689, i64 1
+  %tmp2691 = getelementptr inbounds float* %tmp2690, i64 1
+  %tmp2692 = getelementptr inbounds float* %tmp2691, i64 1
+  %tmp2693 = getelementptr inbounds float* %tmp2692, i64 1
+  %tmp2694 = getelementptr inbounds float* %tmp2693, i64 1
+  %tmp2695 = getelementptr inbounds float* %tmp2694, i64 1
+  %tmp2696 = getelementptr inbounds float* %tmp2695, i64 1
+  %tmp2697 = getelementptr inbounds float* %tmp2696, i64 1
+  %tmp2698 = getelementptr inbounds float* %tmp2697, i64 1
+  %tmp2699 = getelementptr inbounds float* %tmp2698, i64 1
+  %tmp2700 = getelementptr inbounds float* %tmp2699, i64 1
+  %tmp2701 = getelementptr inbounds float* %tmp2700, i64 1
+  %tmp2702 = getelementptr inbounds float* %tmp2701, i64 1
+  %tmp2703 = getelementptr inbounds float* %tmp2702, i64 1
+  %tmp2704 = getelementptr inbounds float* %tmp2703, i64 1
+  %tmp2705 = getelementptr inbounds float* %tmp2704, i64 1
+  %tmp2706 = getelementptr inbounds float* %tmp2705, i64 1
+  %tmp2707 = getelementptr inbounds float* %tmp2706, i64 1
+  %tmp2708 = getelementptr inbounds float* %tmp2707, i64 1
+  %tmp2709 = getelementptr inbounds float* %tmp2708, i64 1
+  %tmp2710 = getelementptr inbounds float* %tmp2709, i64 1
+  %tmp2711 = getelementptr inbounds float* %tmp2710, i64 1
+  %tmp2712 = getelementptr inbounds float* %tmp2711, i64 1
+  %tmp2713 = getelementptr inbounds float* %tmp2712, i64 1
+  %tmp2714 = getelementptr inbounds float* %tmp2713, i64 1
+  %tmp2715 = getelementptr inbounds float* %tmp2714, i64 1
+  %tmp2716 = getelementptr inbounds float* %tmp2715, i64 1
+  %tmp2717 = getelementptr inbounds float* %tmp2716, i64 1
+  %tmp2718 = getelementptr inbounds float* %tmp2717, i64 1
+  %tmp2719 = getelementptr inbounds float* %tmp2718, i64 1
+  %tmp2720 = getelementptr inbounds float* %tmp2719, i64 1
+  %tmp2721 = getelementptr inbounds float* %tmp2720, i64 1
+  %tmp2722 = getelementptr inbounds float* %tmp2721, i64 1
+  %tmp2723 = getelementptr inbounds float* %tmp2722, i64 1
+  %tmp2724 = getelementptr inbounds float* %tmp2723, i64 1
+  %tmp2725 = getelementptr inbounds float* %tmp2724, i64 1
+  %tmp2726 = getelementptr inbounds float* %tmp2725, i64 1
+  %tmp2727 = getelementptr inbounds float* %tmp2726, i64 1
+  %tmp2728 = getelementptr inbounds float* %tmp2727, i64 1
+  %tmp2729 = getelementptr inbounds float* %tmp2728, i64 1
+  %tmp2730 = getelementptr inbounds float* %tmp2729, i64 1
+  %tmp2731 = getelementptr inbounds float* %tmp2730, i64 1
+  %tmp2732 = getelementptr inbounds float* %tmp2731, i64 1
+  %tmp2733 = getelementptr inbounds float* %tmp2732, i64 1
+  %tmp2734 = getelementptr inbounds float* %tmp2733, i64 1
+  %tmp2735 = getelementptr inbounds float* %tmp2734, i64 1
+  %tmp2736 = getelementptr inbounds float* %tmp2735, i64 1
+  %tmp2737 = getelementptr inbounds float* %tmp2736, i64 1
+  %tmp2738 = getelementptr inbounds float* %tmp2737, i64 1
+  %tmp2739 = getelementptr inbounds float* %tmp2738, i64 1
+  %tmp2740 = getelementptr inbounds float* %tmp2739, i64 1
+  %tmp2741 = getelementptr inbounds float* %tmp2740, i64 1
+  %tmp2742 = getelementptr inbounds float* %tmp2741, i64 1
+  %tmp2743 = getelementptr inbounds float* %tmp2742, i64 1
+  %tmp2744 = getelementptr inbounds float* %tmp2743, i64 1
+  %tmp2745 = getelementptr inbounds float* %tmp2744, i64 1
+  %tmp2746 = getelementptr inbounds float* %tmp2745, i64 1
+  %tmp2747 = getelementptr inbounds float* %tmp2746, i64 1
+  %tmp2748 = getelementptr inbounds float* %tmp2747, i64 1
+  %tmp2749 = getelementptr inbounds float* %tmp2748, i64 1
+  %tmp2750 = getelementptr inbounds float* %tmp2749, i64 1
+  %tmp2751 = getelementptr inbounds float* %tmp2750, i64 1
+  %tmp2752 = getelementptr inbounds float* %tmp2751, i64 1
+  %tmp2753 = getelementptr inbounds float* %tmp2752, i64 1
+  %tmp2754 = getelementptr inbounds float* %tmp2753, i64 1
+  %tmp2755 = getelementptr inbounds float* %tmp2754, i64 1
+  %tmp2756 = getelementptr inbounds float* %tmp2755, i64 1
+  %tmp2757 = getelementptr inbounds float* %tmp2756, i64 1
+  %tmp2758 = getelementptr inbounds float* %tmp2757, i64 1
+  %tmp2759 = getelementptr inbounds float* %tmp2758, i64 1
+  %tmp2760 = getelementptr inbounds float* %tmp2759, i64 1
+  %tmp2761 = getelementptr inbounds float* %tmp2760, i64 1
+  %tmp2762 = getelementptr inbounds float* %tmp2761, i64 1
+  %tmp2763 = getelementptr inbounds float* %tmp2762, i64 1
+  %tmp2764 = getelementptr inbounds float* %tmp2763, i64 1
+  %tmp2765 = getelementptr inbounds float* %tmp2764, i64 1
+  %tmp2766 = getelementptr inbounds float* %tmp2765, i64 1
+  %tmp2767 = getelementptr inbounds float* %tmp2766, i64 1
+  %tmp2768 = getelementptr inbounds float* %tmp2767, i64 1
+  %tmp2769 = getelementptr inbounds float* %tmp2768, i64 1
+  %tmp2770 = getelementptr inbounds float* %tmp2769, i64 1
+  %tmp2771 = getelementptr inbounds float* %tmp2770, i64 1
+  %tmp2772 = getelementptr inbounds float* %tmp2771, i64 1
+  %tmp2773 = getelementptr inbounds float* %tmp2772, i64 1
+  %tmp2774 = getelementptr inbounds float* %tmp2773, i64 1
+  %tmp2775 = getelementptr inbounds float* %tmp2774, i64 1
+  %tmp2776 = getelementptr inbounds float* %tmp2775, i64 1
+  %tmp2777 = getelementptr inbounds float* %tmp2776, i64 1
+  %tmp2778 = getelementptr inbounds float* %tmp2777, i64 1
+  %tmp2779 = getelementptr inbounds float* %tmp2778, i64 1
+  %tmp2780 = getelementptr inbounds float* %tmp2779, i64 1
+  %tmp2781 = getelementptr inbounds float* %tmp2780, i64 1
+  %tmp2782 = getelementptr inbounds float* %tmp2781, i64 1
+  %tmp2783 = getelementptr inbounds float* %tmp2782, i64 1
+  %tmp2784 = getelementptr inbounds float* %tmp2783, i64 1
+  %tmp2785 = getelementptr inbounds float* %tmp2784, i64 1
+  %tmp2786 = getelementptr inbounds float* %tmp2785, i64 1
+  %tmp2787 = getelementptr inbounds float* %tmp2786, i64 1
+  %tmp2788 = getelementptr inbounds float* %tmp2787, i64 1
+  %tmp2789 = getelementptr inbounds float* %tmp2788, i64 1
+  %tmp2790 = getelementptr inbounds float* %tmp2789, i64 1
+  %tmp2791 = getelementptr inbounds float* %tmp2790, i64 1
+  %tmp2792 = getelementptr inbounds float* %tmp2791, i64 1
+  %tmp2793 = getelementptr inbounds float* %tmp2792, i64 1
+  %tmp2794 = getelementptr inbounds float* %tmp2793, i64 1
+  %tmp2795 = getelementptr inbounds float* %tmp2794, i64 1
+  %tmp2796 = getelementptr inbounds float* %tmp2795, i64 1
+  %tmp2797 = getelementptr inbounds float* %tmp2796, i64 1
+  %tmp2798 = getelementptr inbounds float* %tmp2797, i64 1
+  %tmp2799 = getelementptr inbounds float* %tmp2798, i64 1
+  %tmp2800 = getelementptr inbounds float* %tmp2799, i64 1
+  %tmp2801 = getelementptr inbounds float* %tmp2800, i64 1
+  %tmp2802 = getelementptr inbounds float* %tmp2801, i64 1
+  %tmp2803 = getelementptr inbounds float* %tmp2802, i64 1
+  %tmp2804 = getelementptr inbounds float* %tmp2803, i64 1
+  %tmp2805 = getelementptr inbounds float* %tmp2804, i64 1
+  %tmp2806 = getelementptr inbounds float* %tmp2805, i64 1
+  %tmp2807 = getelementptr inbounds float* %tmp2806, i64 1
+  %tmp2808 = getelementptr inbounds float* %tmp2807, i64 1
+  %tmp2809 = getelementptr inbounds float* %tmp2808, i64 1
+  %tmp2810 = getelementptr inbounds float* %tmp2809, i64 1
+  %tmp2811 = getelementptr inbounds float* %tmp2810, i64 1
+  %tmp2812 = getelementptr inbounds float* %tmp2811, i64 1
+  %tmp2813 = getelementptr inbounds float* %tmp2812, i64 1
+  %tmp2814 = getelementptr inbounds float* %tmp2813, i64 1
+  %tmp2815 = getelementptr inbounds float* %tmp2814, i64 1
+  %tmp2816 = getelementptr inbounds float* %tmp2815, i64 1
+  %tmp2817 = getelementptr inbounds float* %tmp2816, i64 1
+  %tmp2818 = getelementptr inbounds float* %tmp2817, i64 1
+  %tmp2819 = getelementptr inbounds float* %tmp2818, i64 1
+  %tmp2820 = getelementptr inbounds float* %tmp2819, i64 1
+  %tmp2821 = getelementptr inbounds float* %tmp2820, i64 1
+  %tmp2822 = getelementptr inbounds float* %tmp2821, i64 1
+  %tmp2823 = getelementptr inbounds float* %tmp2822, i64 1
+  %tmp2824 = getelementptr inbounds float* %tmp2823, i64 1
+  %tmp2825 = getelementptr inbounds float* %tmp2824, i64 1
+  %tmp2826 = getelementptr inbounds float* %tmp2825, i64 1
+  %tmp2827 = getelementptr inbounds float* %tmp2826, i64 1
+  %tmp2828 = getelementptr inbounds float* %tmp2827, i64 1
+  %tmp2829 = getelementptr inbounds float* %tmp2828, i64 1
+  %tmp2830 = getelementptr inbounds float* %tmp2829, i64 1
+  %tmp2831 = getelementptr inbounds float* %tmp2830, i64 1
+  %tmp2832 = getelementptr inbounds float* %tmp2831, i64 1
+  %tmp2833 = getelementptr inbounds float* %tmp2832, i64 1
+  %tmp2834 = getelementptr inbounds float* %tmp2833, i64 1
+  %tmp2835 = getelementptr inbounds float* %tmp2834, i64 1
+  %tmp2836 = getelementptr inbounds float* %tmp2835, i64 1
+  %tmp2837 = getelementptr inbounds float* %tmp2836, i64 1
+  %tmp2838 = getelementptr inbounds float* %tmp2837, i64 1
+  %tmp2839 = getelementptr inbounds float* %tmp2838, i64 1
+  %tmp2840 = getelementptr inbounds float* %tmp2839, i64 1
+  %tmp2841 = getelementptr inbounds float* %tmp2840, i64 1
+  %tmp2842 = getelementptr inbounds float* %tmp2841, i64 1
+  %tmp2843 = getelementptr inbounds float* %tmp2842, i64 1
+  %tmp2844 = getelementptr inbounds float* %tmp2843, i64 1
+  %tmp2845 = getelementptr inbounds float* %tmp2844, i64 1
+  %tmp2846 = getelementptr inbounds float* %tmp2845, i64 1
+  %tmp2847 = getelementptr inbounds float* %tmp2846, i64 1
+  %tmp2848 = getelementptr inbounds float* %tmp2847, i64 1
+  %tmp2849 = getelementptr inbounds float* %tmp2848, i64 1
+  %tmp2850 = getelementptr inbounds float* %tmp2849, i64 1
+  %tmp2851 = getelementptr inbounds float* %tmp2850, i64 1
+  %tmp2852 = getelementptr inbounds float* %tmp2851, i64 1
+  %tmp2853 = getelementptr inbounds float* %tmp2852, i64 1
+  %tmp2854 = getelementptr inbounds float* %tmp2853, i64 1
+  %tmp2855 = getelementptr inbounds float* %tmp2854, i64 1
+  %tmp2856 = getelementptr inbounds float* %tmp2855, i64 1
+  %tmp2857 = getelementptr inbounds float* %tmp2856, i64 1
+  %tmp2858 = getelementptr inbounds float* %tmp2857, i64 1
+  %tmp2859 = getelementptr inbounds float* %tmp2858, i64 1
+  %tmp2860 = getelementptr inbounds float* %tmp2859, i64 1
+  %tmp2861 = getelementptr inbounds float* %tmp2860, i64 1
+  %tmp2862 = getelementptr inbounds float* %tmp2861, i64 1
+  %tmp2863 = getelementptr inbounds float* %tmp2862, i64 1
+  %tmp2864 = getelementptr inbounds float* %tmp2863, i64 1
+  %tmp2865 = getelementptr inbounds float* %tmp2864, i64 1
+  %tmp2866 = getelementptr inbounds float* %tmp2865, i64 1
+  %tmp2867 = getelementptr inbounds float* %tmp2866, i64 1
+  %tmp2868 = getelementptr inbounds float* %tmp2867, i64 1
+  %tmp2869 = getelementptr inbounds float* %tmp2868, i64 1
+  %tmp2870 = getelementptr inbounds float* %tmp2869, i64 1
+  %tmp2871 = getelementptr inbounds float* %tmp2870, i64 1
+  %tmp2872 = getelementptr inbounds float* %tmp2871, i64 1
+  %tmp2873 = getelementptr inbounds float* %tmp2872, i64 1
+  %tmp2874 = getelementptr inbounds float* %tmp2873, i64 1
+  %tmp2875 = getelementptr inbounds float* %tmp2874, i64 1
+  %tmp2876 = getelementptr inbounds float* %tmp2875, i64 1
+  %tmp2877 = getelementptr inbounds float* %tmp2876, i64 1
+  %tmp2878 = getelementptr inbounds float* %tmp2877, i64 1
+  %tmp2879 = getelementptr inbounds float* %tmp2878, i64 1
+  %tmp2880 = getelementptr inbounds float* %tmp2879, i64 1
+  %tmp2881 = getelementptr inbounds float* %tmp2880, i64 1
+  %tmp2882 = getelementptr inbounds float* %tmp2881, i64 1
+  %tmp2883 = getelementptr inbounds float* %tmp2882, i64 1
+  %tmp2884 = getelementptr inbounds float* %tmp2883, i64 1
+  %tmp2885 = getelementptr inbounds float* %tmp2884, i64 1
+  %tmp2886 = getelementptr inbounds float* %tmp2885, i64 1
+  %tmp2887 = getelementptr inbounds float* %tmp2886, i64 1
+  %tmp2888 = getelementptr inbounds float* %tmp2887, i64 1
+  %tmp2889 = getelementptr inbounds float* %tmp2888, i64 1
+  %tmp2890 = getelementptr inbounds float* %tmp2889, i64 1
+  %tmp2891 = getelementptr inbounds float* %tmp2890, i64 1
+  %tmp2892 = getelementptr inbounds float* %tmp2891, i64 1
+  %tmp2893 = getelementptr inbounds float* %tmp2892, i64 1
+  %tmp2894 = getelementptr inbounds float* %tmp2893, i64 1
+  %tmp2895 = getelementptr inbounds float* %tmp2894, i64 1
+  %tmp2896 = getelementptr inbounds float* %tmp2895, i64 1
+  %tmp2897 = getelementptr inbounds float* %tmp2896, i64 1
+  %tmp2898 = getelementptr inbounds float* %tmp2897, i64 1
+  %tmp2899 = getelementptr inbounds float* %tmp2898, i64 1
+  %tmp2900 = getelementptr inbounds float* %tmp2899, i64 1
+  %tmp2901 = getelementptr inbounds float* %tmp2900, i64 1
+  %tmp2902 = getelementptr inbounds float* %tmp2901, i64 1
+  %tmp2903 = getelementptr inbounds float* %tmp2902, i64 1
+  %tmp2904 = getelementptr inbounds float* %tmp2903, i64 1
+  %tmp2905 = getelementptr inbounds float* %tmp2904, i64 1
+  %tmp2906 = getelementptr inbounds float* %tmp2905, i64 1
+  %tmp2907 = getelementptr inbounds float* %tmp2906, i64 1
+  %tmp2908 = getelementptr inbounds float* %tmp2907, i64 1
+  %tmp2909 = getelementptr inbounds float* %tmp2908, i64 1
+  %tmp2910 = getelementptr inbounds float* %tmp2909, i64 1
+  %tmp2911 = getelementptr inbounds float* %tmp2910, i64 1
+  %tmp2912 = getelementptr inbounds float* %tmp2911, i64 1
+  %tmp2913 = getelementptr inbounds float* %tmp2912, i64 1
+  %tmp2914 = getelementptr inbounds float* %tmp2913, i64 1
+  %tmp2915 = getelementptr inbounds float* %tmp2914, i64 1
+  %tmp2916 = getelementptr inbounds float* %tmp2915, i64 1
+  %tmp2917 = getelementptr inbounds float* %tmp2916, i64 1
+  %tmp2918 = getelementptr inbounds float* %tmp2917, i64 1
+  %tmp2919 = getelementptr inbounds float* %tmp2918, i64 1
+  %tmp2920 = getelementptr inbounds float* %tmp2919, i64 1
+  %tmp2921 = getelementptr inbounds float* %tmp2920, i64 1
+  %tmp2922 = getelementptr inbounds float* %tmp2921, i64 1
+  %tmp2923 = getelementptr inbounds float* %tmp2922, i64 1
+  %tmp2924 = getelementptr inbounds float* %tmp2923, i64 1
+  %tmp2925 = getelementptr inbounds float* %tmp2924, i64 1
+  %tmp2926 = getelementptr inbounds float* %tmp2925, i64 1
+  %tmp2927 = getelementptr inbounds float* %tmp2926, i64 1
+  %tmp2928 = getelementptr inbounds float* %tmp2927, i64 1
+  %tmp2929 = getelementptr inbounds float* %tmp2928, i64 1
+  %tmp2930 = getelementptr inbounds float* %tmp2929, i64 1
+  %tmp2931 = getelementptr inbounds float* %tmp2930, i64 1
+  %tmp2932 = getelementptr inbounds float* %tmp2931, i64 1
+  %tmp2933 = getelementptr inbounds float* %tmp2932, i64 1
+  %tmp2934 = getelementptr inbounds float* %tmp2933, i64 1
+  %tmp2935 = getelementptr inbounds float* %tmp2934, i64 1
+  %tmp2936 = getelementptr inbounds float* %tmp2935, i64 1
+  %tmp2937 = getelementptr inbounds float* %tmp2936, i64 1
+  %tmp2938 = getelementptr inbounds float* %tmp2937, i64 1
+  %tmp2939 = getelementptr inbounds float* %tmp2938, i64 1
+  %tmp2940 = getelementptr inbounds float* %tmp2939, i64 1
+  %tmp2941 = getelementptr inbounds float* %tmp2940, i64 1
+  %tmp2942 = getelementptr inbounds float* %tmp2941, i64 1
+  %tmp2943 = getelementptr inbounds float* %tmp2942, i64 1
+  %tmp2944 = getelementptr inbounds float* %tmp2943, i64 1
+  %tmp2945 = getelementptr inbounds float* %tmp2944, i64 1
+  %tmp2946 = getelementptr inbounds float* %tmp2945, i64 1
+  %tmp2947 = getelementptr inbounds float* %tmp2946, i64 1
+  %tmp2948 = getelementptr inbounds float* %tmp2947, i64 1
+  %tmp2949 = getelementptr inbounds float* %tmp2948, i64 1
+  %tmp2950 = getelementptr inbounds float* %tmp2949, i64 1
+  %tmp2951 = getelementptr inbounds float* %tmp2950, i64 1
+  %tmp2952 = getelementptr inbounds float* %tmp2951, i64 1
+  %tmp2953 = getelementptr inbounds float* %tmp2952, i64 1
+  %tmp2954 = getelementptr inbounds float* %tmp2953, i64 1
+  %tmp2955 = getelementptr inbounds float* %tmp2954, i64 1
+  %tmp2956 = getelementptr inbounds float* %tmp2955, i64 1
+  %tmp2957 = getelementptr inbounds float* %tmp2956, i64 1
+  %tmp2958 = getelementptr inbounds float* %tmp2957, i64 1
+  %tmp2959 = getelementptr inbounds float* %tmp2958, i64 1
+  %tmp2960 = getelementptr inbounds float* %tmp2959, i64 1
+  %tmp2961 = getelementptr inbounds float* %tmp2960, i64 1
+  %tmp2962 = getelementptr inbounds float* %tmp2961, i64 1
+  %tmp2963 = getelementptr inbounds float* %tmp2962, i64 1
+  %tmp2964 = getelementptr inbounds float* %tmp2963, i64 1
+  %tmp2965 = getelementptr inbounds float* %tmp2964, i64 1
+  %tmp2966 = getelementptr inbounds float* %tmp2965, i64 1
+  %tmp2967 = getelementptr inbounds float* %tmp2966, i64 1
+  %tmp2968 = getelementptr inbounds float* %tmp2967, i64 1
+  %tmp2969 = getelementptr inbounds float* %tmp2968, i64 1
+  %tmp2970 = getelementptr inbounds float* %tmp2969, i64 1
+  %tmp2971 = getelementptr inbounds float* %tmp2970, i64 1
+  %tmp2972 = getelementptr inbounds float* %tmp2971, i64 1
+  %tmp2973 = getelementptr inbounds float* %tmp2972, i64 1
+  %tmp2974 = getelementptr inbounds float* %tmp2973, i64 1
+  %tmp2975 = getelementptr inbounds float* %tmp2974, i64 1
+  %tmp2976 = getelementptr inbounds float* %tmp2975, i64 1
+  %tmp2977 = getelementptr inbounds float* %tmp2976, i64 1
+  %tmp2978 = getelementptr inbounds float* %tmp2977, i64 1
+  %tmp2979 = getelementptr inbounds float* %tmp2978, i64 1
+  %tmp2980 = getelementptr inbounds float* %tmp2979, i64 1
+  %tmp2981 = getelementptr inbounds float* %tmp2980, i64 1
+  %tmp2982 = getelementptr inbounds float* %tmp2981, i64 1
+  %tmp2983 = getelementptr inbounds float* %tmp2982, i64 1
+  %tmp2984 = getelementptr inbounds float* %tmp2983, i64 1
+  %tmp2985 = getelementptr inbounds float* %tmp2984, i64 1
+  %tmp2986 = getelementptr inbounds float* %tmp2985, i64 1
+  %tmp2987 = getelementptr inbounds float* %tmp2986, i64 1
+  %tmp2988 = getelementptr inbounds float* %tmp2987, i64 1
+  %tmp2989 = getelementptr inbounds float* %tmp2988, i64 1
+  %tmp2990 = getelementptr inbounds float* %tmp2989, i64 1
+  %tmp2991 = getelementptr inbounds float* %tmp2990, i64 1
+  %tmp2992 = getelementptr inbounds float* %tmp2991, i64 1
+  %tmp2993 = getelementptr inbounds float* %tmp2992, i64 1
+  %tmp2994 = getelementptr inbounds float* %tmp2993, i64 1
+  %tmp2995 = getelementptr inbounds float* %tmp2994, i64 1
+  %tmp2996 = getelementptr inbounds float* %tmp2995, i64 1
+  %tmp2997 = getelementptr inbounds float* %tmp2996, i64 1
+  %tmp2998 = getelementptr inbounds float* %tmp2997, i64 1
+  %tmp2999 = getelementptr inbounds float* %tmp2998, i64 1
+  %tmp3000 = getelementptr inbounds float* %tmp2999, i64 1
+  %tmp3001 = getelementptr inbounds float* %tmp3000, i64 1
+  %tmp3002 = getelementptr inbounds float* %tmp3001, i64 1
+  %tmp3003 = getelementptr inbounds float* %tmp3002, i64 1
+  %tmp3004 = getelementptr inbounds float* %tmp3003, i64 1
+  %tmp3005 = getelementptr inbounds float* %tmp3004, i64 1
+  %tmp3006 = getelementptr inbounds float* %tmp3005, i64 1
+  %tmp3007 = getelementptr inbounds float* %tmp3006, i64 1
+  %tmp3008 = getelementptr inbounds float* %tmp3007, i64 1
+  %tmp3009 = getelementptr inbounds float* %tmp3008, i64 1
+  %tmp3010 = getelementptr inbounds float* %tmp3009, i64 1
+  %tmp3011 = getelementptr inbounds float* %tmp3010, i64 1
+  %tmp3012 = getelementptr inbounds float* %tmp3011, i64 1
+  %tmp3013 = getelementptr inbounds float* %tmp3012, i64 1
+  %tmp3014 = getelementptr inbounds float* %tmp3013, i64 1
+  %tmp3015 = getelementptr inbounds float* %tmp3014, i64 1
+  %tmp3016 = getelementptr inbounds float* %tmp3015, i64 1
+  %tmp3017 = getelementptr inbounds float* %tmp3016, i64 1
+  %tmp3018 = getelementptr inbounds float* %tmp3017, i64 1
+  %tmp3019 = getelementptr inbounds float* %tmp3018, i64 1
+  %tmp3020 = getelementptr inbounds float* %tmp3019, i64 1
+  %tmp3021 = getelementptr inbounds float* %tmp3020, i64 1
+  %tmp3022 = getelementptr inbounds float* %tmp3021, i64 1
+  %tmp3023 = getelementptr inbounds float* %tmp3022, i64 1
+  %tmp3024 = getelementptr inbounds float* %tmp3023, i64 1
+  %tmp3025 = getelementptr inbounds float* %tmp3024, i64 1
+  %tmp3026 = getelementptr inbounds float* %tmp3025, i64 1
+  %tmp3027 = getelementptr inbounds float* %tmp3026, i64 1
+  %tmp3028 = getelementptr inbounds float* %tmp3027, i64 1
+  %tmp3029 = getelementptr inbounds float* %tmp3028, i64 1
+  %tmp3030 = getelementptr inbounds float* %tmp3029, i64 1
+  %tmp3031 = getelementptr inbounds float* %tmp3030, i64 1
+  %tmp3032 = getelementptr inbounds float* %tmp3031, i64 1
+  %tmp3033 = getelementptr inbounds float* %tmp3032, i64 1
+  %tmp3034 = getelementptr inbounds float* %tmp3033, i64 1
+  %tmp3035 = getelementptr inbounds float* %tmp3034, i64 1
+  %tmp3036 = getelementptr inbounds float* %tmp3035, i64 1
+  %tmp3037 = getelementptr inbounds float* %tmp3036, i64 1
+  %tmp3038 = getelementptr inbounds float* %tmp3037, i64 1
+  %tmp3039 = getelementptr inbounds float* %tmp3038, i64 1
+  %tmp3040 = getelementptr inbounds float* %tmp3039, i64 1
+  %tmp3041 = getelementptr inbounds float* %tmp3040, i64 1
+  %tmp3042 = getelementptr inbounds float* %tmp3041, i64 1
+  %tmp3043 = getelementptr inbounds float* %tmp3042, i64 1
+  %tmp3044 = getelementptr inbounds float* %tmp3043, i64 1
+  %tmp3045 = getelementptr inbounds float* %tmp3044, i64 1
+  %tmp3046 = getelementptr inbounds float* %tmp3045, i64 1
+  %tmp3047 = getelementptr inbounds float* %tmp3046, i64 1
+  %tmp3048 = getelementptr inbounds float* %tmp3047, i64 1
+  %tmp3049 = getelementptr inbounds float* %tmp3048, i64 1
+  %tmp3050 = getelementptr inbounds float* %tmp3049, i64 1
+  %tmp3051 = getelementptr inbounds float* %tmp3050, i64 1
+  %tmp3052 = getelementptr inbounds float* %tmp3051, i64 1
+  %tmp3053 = getelementptr inbounds float* %tmp3052, i64 1
+  %tmp3054 = getelementptr inbounds float* %tmp3053, i64 1
+  %tmp3055 = getelementptr inbounds float* %tmp3054, i64 1
+  %tmp3056 = getelementptr inbounds float* %tmp3055, i64 1
+  %tmp3057 = getelementptr inbounds float* %tmp3056, i64 1
+  %tmp3058 = getelementptr inbounds float* %tmp3057, i64 1
+  %tmp3059 = getelementptr inbounds float* %tmp3058, i64 1
+  %tmp3060 = getelementptr inbounds float* %tmp3059, i64 1
+  %tmp3061 = getelementptr inbounds float* %tmp3060, i64 1
+  %tmp3062 = getelementptr inbounds float* %tmp3061, i64 1
+  %tmp3063 = getelementptr inbounds float* %tmp3062, i64 1
+  %tmp3064 = getelementptr inbounds float* %tmp3063, i64 1
+  %tmp3065 = getelementptr inbounds float* %tmp3064, i64 1
+  %tmp3066 = getelementptr inbounds float* %tmp3065, i64 1
+  %tmp3067 = getelementptr inbounds float* %tmp3066, i64 1
+  %tmp3068 = getelementptr inbounds float* %tmp3067, i64 1
+  %tmp3069 = getelementptr inbounds float* %tmp3068, i64 1
+  %tmp3070 = getelementptr inbounds float* %tmp3069, i64 1
+  %tmp3071 = getelementptr inbounds float* %tmp3070, i64 1
+  %tmp3072 = getelementptr inbounds float* %tmp3071, i64 1
+  %tmp3073 = getelementptr inbounds float* %tmp3072, i64 1
+  %tmp3074 = getelementptr inbounds float* %tmp3073, i64 1
+  %tmp3075 = getelementptr inbounds float* %tmp3074, i64 1
+  %tmp3076 = getelementptr inbounds float* %tmp3075, i64 1
+  %tmp3077 = getelementptr inbounds float* %tmp3076, i64 1
+  %tmp3078 = getelementptr inbounds float* %tmp3077, i64 1
+  %tmp3079 = getelementptr inbounds float* %tmp3078, i64 1
+  %tmp3080 = getelementptr inbounds float* %tmp3079, i64 1
+  %tmp3081 = getelementptr inbounds float* %tmp3080, i64 1
+  %tmp3082 = getelementptr inbounds float* %tmp3081, i64 1
+  %tmp3083 = getelementptr inbounds float* %tmp3082, i64 1
+  %tmp3084 = getelementptr inbounds float* %tmp3083, i64 1
+  %tmp3085 = getelementptr inbounds float* %tmp3084, i64 1
+  %tmp3086 = getelementptr inbounds float* %tmp3085, i64 1
+  %tmp3087 = getelementptr inbounds float* %tmp3086, i64 1
+  %tmp3088 = getelementptr inbounds float* %tmp3087, i64 1
+  %tmp3089 = getelementptr inbounds float* %tmp3088, i64 1
+  %tmp3090 = getelementptr inbounds float* %tmp3089, i64 1
+  %tmp3091 = getelementptr inbounds float* %tmp3090, i64 1
+  %tmp3092 = getelementptr inbounds float* %tmp3091, i64 1
+  %tmp3093 = getelementptr inbounds float* %tmp3092, i64 1
+  %tmp3094 = getelementptr inbounds float* %tmp3093, i64 1
+  %tmp3095 = getelementptr inbounds float* %tmp3094, i64 1
+  %tmp3096 = getelementptr inbounds float* %tmp3095, i64 1
+  %tmp3097 = getelementptr inbounds float* %tmp3096, i64 1
+  %tmp3098 = getelementptr inbounds float* %tmp3097, i64 1
+  %tmp3099 = getelementptr inbounds float* %tmp3098, i64 1
+  %tmp3100 = getelementptr inbounds float* %tmp3099, i64 1
+  %tmp3101 = getelementptr inbounds float* %tmp3100, i64 1
+  %tmp3102 = getelementptr inbounds float* %tmp3101, i64 1
+  %tmp3103 = getelementptr inbounds float* %tmp3102, i64 1
+  %tmp3104 = getelementptr inbounds float* %tmp3103, i64 1
+  %tmp3105 = getelementptr inbounds float* %tmp3104, i64 1
+  %tmp3106 = getelementptr inbounds float* %tmp3105, i64 1
+  %tmp3107 = getelementptr inbounds float* %tmp3106, i64 1
+  %tmp3108 = getelementptr inbounds float* %tmp3107, i64 1
+  %tmp3109 = getelementptr inbounds float* %tmp3108, i64 1
+  %tmp3110 = getelementptr inbounds float* %tmp3109, i64 1
+  %tmp3111 = getelementptr inbounds float* %tmp3110, i64 1
+  %tmp3112 = getelementptr inbounds float* %tmp3111, i64 1
+  %tmp3113 = getelementptr inbounds float* %tmp3112, i64 1
+  %tmp3114 = getelementptr inbounds float* %tmp3113, i64 1
+  %tmp3115 = getelementptr inbounds float* %tmp3114, i64 1
+  %tmp3116 = getelementptr inbounds float* %tmp3115, i64 1
+  %tmp3117 = getelementptr inbounds float* %tmp3116, i64 1
+  %tmp3118 = getelementptr inbounds float* %tmp3117, i64 1
+  %tmp3119 = getelementptr inbounds float* %tmp3118, i64 1
+  %tmp3120 = getelementptr inbounds float* %tmp3119, i64 1
+  %tmp3121 = getelementptr inbounds float* %tmp3120, i64 1
+  %tmp3122 = getelementptr inbounds float* %tmp3121, i64 1
+  %tmp3123 = getelementptr inbounds float* %tmp3122, i64 1
+  %tmp3124 = getelementptr inbounds float* %tmp3123, i64 1
+  %tmp3125 = getelementptr inbounds float* %tmp3124, i64 1
+  %tmp3126 = getelementptr inbounds float* %tmp3125, i64 1
+  %tmp3127 = getelementptr inbounds float* %tmp3126, i64 1
+  %tmp3128 = getelementptr inbounds float* %tmp3127, i64 1
+  %tmp3129 = getelementptr inbounds float* %tmp3128, i64 1
+  %tmp3130 = getelementptr inbounds float* %tmp3129, i64 1
+  %tmp3131 = getelementptr inbounds float* %tmp3130, i64 1
+  %tmp3132 = getelementptr inbounds float* %tmp3131, i64 1
+  %tmp3133 = getelementptr inbounds float* %tmp3132, i64 1
+  %tmp3134 = getelementptr inbounds float* %tmp3133, i64 1
+  %tmp3135 = getelementptr inbounds float* %tmp3134, i64 1
+  %tmp3136 = getelementptr inbounds float* %tmp3135, i64 1
+  %tmp3137 = getelementptr inbounds float* %tmp3136, i64 1
+  %tmp3138 = getelementptr inbounds float* %tmp3137, i64 1
+  %tmp3139 = getelementptr inbounds float* %tmp3138, i64 1
+  %tmp3140 = getelementptr inbounds float* %tmp3139, i64 1
+  %tmp3141 = getelementptr inbounds float* %tmp3140, i64 1
+  %tmp3142 = getelementptr inbounds float* %tmp3141, i64 1
+  %tmp3143 = getelementptr inbounds float* %tmp3142, i64 1
+  %tmp3144 = getelementptr inbounds float* %tmp3143, i64 1
+  %tmp3145 = getelementptr inbounds float* %tmp3144, i64 1
+  %tmp3146 = getelementptr inbounds float* %tmp3145, i64 1
+  %tmp3147 = getelementptr inbounds float* %tmp3146, i64 1
+  %tmp3148 = getelementptr inbounds float* %tmp3147, i64 1
+  %tmp3149 = getelementptr inbounds float* %tmp3148, i64 1
+  %tmp3150 = getelementptr inbounds float* %tmp3149, i64 1
+  %tmp3151 = getelementptr inbounds float* %tmp3150, i64 1
+  %tmp3152 = getelementptr inbounds float* %tmp3151, i64 1
+  %tmp3153 = getelementptr inbounds float* %tmp3152, i64 1
+  %tmp3154 = getelementptr inbounds float* %tmp3153, i64 1
+  %tmp3155 = getelementptr inbounds float* %tmp3154, i64 1
+  %tmp3156 = getelementptr inbounds float* %tmp3155, i64 1
+  %tmp3157 = getelementptr inbounds float* %tmp3156, i64 1
+  %tmp3158 = getelementptr inbounds float* %tmp3157, i64 1
+  %tmp3159 = getelementptr inbounds float* %tmp3158, i64 1
+  %tmp3160 = getelementptr inbounds float* %tmp3159, i64 1
+  %tmp3161 = getelementptr inbounds float* %tmp3160, i64 1
+  %tmp3162 = getelementptr inbounds float* %tmp3161, i64 1
+  %tmp3163 = getelementptr inbounds float* %tmp3162, i64 1
+  %tmp3164 = getelementptr inbounds float* %tmp3163, i64 1
+  %tmp3165 = getelementptr inbounds float* %tmp3164, i64 1
+  %tmp3166 = getelementptr inbounds float* %tmp3165, i64 1
+  %tmp3167 = getelementptr inbounds float* %tmp3166, i64 1
+  %tmp3168 = getelementptr inbounds float* %tmp3167, i64 1
+  %tmp3169 = getelementptr inbounds float* %tmp3168, i64 1
+  %tmp3170 = getelementptr inbounds float* %tmp3169, i64 1
+  %tmp3171 = getelementptr inbounds float* %tmp3170, i64 1
+  %tmp3172 = getelementptr inbounds float* %tmp3171, i64 1
+  %tmp3173 = getelementptr inbounds float* %tmp3172, i64 1
+  %tmp3174 = getelementptr inbounds float* %tmp3173, i64 1
+  %tmp3175 = getelementptr inbounds float* %tmp3174, i64 1
+  %tmp3176 = getelementptr inbounds float* %tmp3175, i64 1
+  %tmp3177 = getelementptr inbounds float* %tmp3176, i64 1
+  %tmp3178 = getelementptr inbounds float* %tmp3177, i64 1
+  %tmp3179 = getelementptr inbounds float* %tmp3178, i64 1
+  %tmp3180 = getelementptr inbounds float* %tmp3179, i64 1
+  %tmp3181 = getelementptr inbounds float* %tmp3180, i64 1
+  %tmp3182 = getelementptr inbounds float* %tmp3181, i64 1
+  %tmp3183 = getelementptr inbounds float* %tmp3182, i64 1
+  %tmp3184 = getelementptr inbounds float* %tmp3183, i64 1
+  %tmp3185 = getelementptr inbounds float* %tmp3184, i64 1
+  %tmp3186 = getelementptr inbounds float* %tmp3185, i64 1
+  %tmp3187 = getelementptr inbounds float* %tmp3186, i64 1
+  %tmp3188 = getelementptr inbounds float* %tmp3187, i64 1
+  %tmp3189 = getelementptr inbounds float* %tmp3188, i64 1
+  %tmp3190 = getelementptr inbounds float* %tmp3189, i64 1
+  %tmp3191 = getelementptr inbounds float* %tmp3190, i64 1
+  %tmp3192 = getelementptr inbounds float* %tmp3191, i64 1
+  %tmp3193 = getelementptr inbounds float* %tmp3192, i64 1
+  %tmp3194 = getelementptr inbounds float* %tmp3193, i64 1
+  %tmp3195 = getelementptr inbounds float* %tmp3194, i64 1
+  %tmp3196 = getelementptr inbounds float* %tmp3195, i64 1
+  %tmp3197 = getelementptr inbounds float* %tmp3196, i64 1
+  %tmp3198 = getelementptr inbounds float* %tmp3197, i64 1
+  %tmp3199 = getelementptr inbounds float* %tmp3198, i64 1
+  %tmp3200 = getelementptr inbounds float* %tmp3199, i64 1
+  %tmp3201 = getelementptr inbounds float* %tmp3200, i64 1
+  %tmp3202 = getelementptr inbounds float* %tmp3201, i64 1
+  %tmp3203 = getelementptr inbounds float* %tmp3202, i64 1
+  %tmp3204 = getelementptr inbounds float* %tmp3203, i64 1
+  %tmp3205 = getelementptr inbounds float* %tmp3204, i64 1
+  %tmp3206 = getelementptr inbounds float* %tmp3205, i64 1
+  %tmp3207 = getelementptr inbounds float* %tmp3206, i64 1
+  %tmp3208 = getelementptr inbounds float* %tmp3207, i64 1
+  %tmp3209 = getelementptr inbounds float* %tmp3208, i64 1
+  %tmp3210 = getelementptr inbounds float* %tmp3209, i64 1
+  %tmp3211 = getelementptr inbounds float* %tmp3210, i64 1
+  %tmp3212 = getelementptr inbounds float* %tmp3211, i64 1
+  %tmp3213 = getelementptr inbounds float* %tmp3212, i64 1
+  %tmp3214 = getelementptr inbounds float* %tmp3213, i64 1
+  %tmp3215 = getelementptr inbounds float* %tmp3214, i64 1
+  %tmp3216 = getelementptr inbounds float* %tmp3215, i64 1
+  %tmp3217 = getelementptr inbounds float* %tmp3216, i64 1
+  %tmp3218 = getelementptr inbounds float* %tmp3217, i64 1
+  %tmp3219 = getelementptr inbounds float* %tmp3218, i64 1
+  %tmp3220 = getelementptr inbounds float* %tmp3219, i64 1
+  %tmp3221 = getelementptr inbounds float* %tmp3220, i64 1
+  %tmp3222 = getelementptr inbounds float* %tmp3221, i64 1
+  %tmp3223 = getelementptr inbounds float* %tmp3222, i64 1
+  %tmp3224 = getelementptr inbounds float* %tmp3223, i64 1
+  %tmp3225 = getelementptr inbounds float* %tmp3224, i64 1
+  %tmp3226 = getelementptr inbounds float* %tmp3225, i64 1
+  %tmp3227 = getelementptr inbounds float* %tmp3226, i64 1
+  %tmp3228 = getelementptr inbounds float* %tmp3227, i64 1
+  %tmp3229 = getelementptr inbounds float* %tmp3228, i64 1
+  %tmp3230 = getelementptr inbounds float* %tmp3229, i64 1
+  %tmp3231 = getelementptr inbounds float* %tmp3230, i64 1
+  %tmp3232 = getelementptr inbounds float* %tmp3231, i64 1
+  %tmp3233 = getelementptr inbounds float* %tmp3232, i64 1
+  %tmp3234 = getelementptr inbounds float* %tmp3233, i64 1
+  %tmp3235 = getelementptr inbounds float* %tmp3234, i64 1
+  %tmp3236 = getelementptr inbounds float* %tmp3235, i64 1
+  %tmp3237 = getelementptr inbounds float* %tmp3236, i64 1
+  %tmp3238 = getelementptr inbounds float* %tmp3237, i64 1
+  %tmp3239 = getelementptr inbounds float* %tmp3238, i64 1
+  %tmp3240 = getelementptr inbounds float* %tmp3239, i64 1
+  %tmp3241 = getelementptr inbounds float* %tmp3240, i64 1
+  %tmp3242 = getelementptr inbounds float* %tmp3241, i64 1
+  %tmp3243 = getelementptr inbounds float* %tmp3242, i64 1
+  %tmp3244 = getelementptr inbounds float* %tmp3243, i64 1
+  %tmp3245 = getelementptr inbounds float* %tmp3244, i64 1
+  %tmp3246 = getelementptr inbounds float* %tmp3245, i64 1
+  %tmp3247 = getelementptr inbounds float* %tmp3246, i64 1
+  %tmp3248 = getelementptr inbounds float* %tmp3247, i64 1
+  %tmp3249 = getelementptr inbounds float* %tmp3248, i64 1
+  %tmp3250 = getelementptr inbounds float* %tmp3249, i64 1
+  %tmp3251 = getelementptr inbounds float* %tmp3250, i64 1
+  %tmp3252 = getelementptr inbounds float* %tmp3251, i64 1
+  %tmp3253 = getelementptr inbounds float* %tmp3252, i64 1
+  %tmp3254 = getelementptr inbounds float* %tmp3253, i64 1
+  %tmp3255 = getelementptr inbounds float* %tmp3254, i64 1
+  %tmp3256 = getelementptr inbounds float* %tmp3255, i64 1
+  %tmp3257 = getelementptr inbounds float* %tmp3256, i64 1
+  %tmp3258 = getelementptr inbounds float* %tmp3257, i64 1
+  %tmp3259 = getelementptr inbounds float* %tmp3258, i64 1
+  %tmp3260 = getelementptr inbounds float* %tmp3259, i64 1
+  %tmp3261 = getelementptr inbounds float* %tmp3260, i64 1
+  %tmp3262 = getelementptr inbounds float* %tmp3261, i64 1
+  %tmp3263 = getelementptr inbounds float* %tmp3262, i64 1
+  %tmp3264 = getelementptr inbounds float* %tmp3263, i64 1
+  %tmp3265 = getelementptr inbounds float* %tmp3264, i64 1
+  %tmp3266 = getelementptr inbounds float* %tmp3265, i64 1
+  %tmp3267 = getelementptr inbounds float* %tmp3266, i64 1
+  %tmp3268 = getelementptr inbounds float* %tmp3267, i64 1
+  %tmp3269 = getelementptr inbounds float* %tmp3268, i64 1
+  %tmp3270 = getelementptr inbounds float* %tmp3269, i64 1
+  %tmp3271 = getelementptr inbounds float* %tmp3270, i64 1
+  %tmp3272 = getelementptr inbounds float* %tmp3271, i64 1
+  %tmp3273 = getelementptr inbounds float* %tmp3272, i64 1
+  %tmp3274 = getelementptr inbounds float* %tmp3273, i64 1
+  %tmp3275 = getelementptr inbounds float* %tmp3274, i64 1
+  %tmp3276 = getelementptr inbounds float* %tmp3275, i64 1
+  %tmp3277 = getelementptr inbounds float* %tmp3276, i64 1
+  %tmp3278 = getelementptr inbounds float* %tmp3277, i64 1
+  %tmp3279 = getelementptr inbounds float* %tmp3278, i64 1
+  %tmp3280 = getelementptr inbounds float* %tmp3279, i64 1
+  %tmp3281 = getelementptr inbounds float* %tmp3280, i64 1
+  %tmp3282 = getelementptr inbounds float* %tmp3281, i64 1
+  %tmp3283 = getelementptr inbounds float* %tmp3282, i64 1
+  %tmp3284 = getelementptr inbounds float* %tmp3283, i64 1
+  %tmp3285 = getelementptr inbounds float* %tmp3284, i64 1
+  %tmp3286 = getelementptr inbounds float* %tmp3285, i64 1
+  %tmp3287 = getelementptr inbounds float* %tmp3286, i64 1
+  %tmp3288 = getelementptr inbounds float* %tmp3287, i64 1
+  %tmp3289 = getelementptr inbounds float* %tmp3288, i64 1
+  %tmp3290 = getelementptr inbounds float* %tmp3289, i64 1
+  %tmp3291 = getelementptr inbounds float* %tmp3290, i64 1
+  %tmp3292 = getelementptr inbounds float* %tmp3291, i64 1
+  %tmp3293 = getelementptr inbounds float* %tmp3292, i64 1
+  %tmp3294 = getelementptr inbounds float* %tmp3293, i64 1
+  %tmp3295 = getelementptr inbounds float* %tmp3294, i64 1
+  %tmp3296 = getelementptr inbounds float* %tmp3295, i64 1
+  %tmp3297 = getelementptr inbounds float* %tmp3296, i64 1
+  %tmp3298 = getelementptr inbounds float* %tmp3297, i64 1
+  %tmp3299 = getelementptr inbounds float* %tmp3298, i64 1
+  %tmp3300 = getelementptr inbounds float* %tmp3299, i64 1
+  %tmp3301 = getelementptr inbounds float* %tmp3300, i64 1
+  %tmp3302 = getelementptr inbounds float* %tmp3301, i64 1
+  %tmp3303 = getelementptr inbounds float* %tmp3302, i64 1
+  %tmp3304 = getelementptr inbounds float* %tmp3303, i64 1
+  %tmp3305 = getelementptr inbounds float* %tmp3304, i64 1
+  %tmp3306 = getelementptr inbounds float* %tmp3305, i64 1
+  %tmp3307 = getelementptr inbounds float* %tmp3306, i64 1
+  %tmp3308 = getelementptr inbounds float* %tmp3307, i64 1
+  %tmp3309 = getelementptr inbounds float* %tmp3308, i64 1
+  %tmp3310 = getelementptr inbounds float* %tmp3309, i64 1
+  %tmp3311 = getelementptr inbounds float* %tmp3310, i64 1
+  %tmp3312 = getelementptr inbounds float* %tmp3311, i64 1
+  %tmp3313 = getelementptr inbounds float* %tmp3312, i64 1
+  %tmp3314 = getelementptr inbounds float* %tmp3313, i64 1
+  %tmp3315 = getelementptr inbounds float* %tmp3314, i64 1
+  %tmp3316 = getelementptr inbounds float* %tmp3315, i64 1
+  %tmp3317 = getelementptr inbounds float* %tmp3316, i64 1
+  %tmp3318 = getelementptr inbounds float* %tmp3317, i64 1
+  %tmp3319 = getelementptr inbounds float* %tmp3318, i64 1
+  %tmp3320 = getelementptr inbounds float* %tmp3319, i64 1
+  %tmp3321 = getelementptr inbounds float* %tmp3320, i64 1
+  %tmp3322 = getelementptr inbounds float* %tmp3321, i64 1
+  %tmp3323 = getelementptr inbounds float* %tmp3322, i64 1
+  %tmp3324 = getelementptr inbounds float* %tmp3323, i64 1
+  %tmp3325 = getelementptr inbounds float* %tmp3324, i64 1
+  %tmp3326 = getelementptr inbounds float* %tmp3325, i64 1
+  %tmp3327 = getelementptr inbounds float* %tmp3326, i64 1
+  %tmp3328 = getelementptr inbounds float* %tmp3327, i64 1
+  %tmp3329 = getelementptr inbounds float* %tmp3328, i64 1
+  %tmp3330 = getelementptr inbounds float* %tmp3329, i64 1
+  %tmp3331 = getelementptr inbounds float* %tmp3330, i64 1
+  %tmp3332 = getelementptr inbounds float* %tmp3331, i64 1
+  %tmp3333 = getelementptr inbounds float* %tmp3332, i64 1
+  %tmp3334 = getelementptr inbounds float* %tmp3333, i64 1
+  %tmp3335 = getelementptr inbounds float* %tmp3334, i64 1
+  %tmp3336 = getelementptr inbounds float* %tmp3335, i64 1
+  %tmp3337 = getelementptr inbounds float* %tmp3336, i64 1
+  %tmp3338 = getelementptr inbounds float* %tmp3337, i64 1
+  %tmp3339 = getelementptr inbounds float* %tmp3338, i64 1
+  %tmp3340 = getelementptr inbounds float* %tmp3339, i64 1
+  %tmp3341 = getelementptr inbounds float* %tmp3340, i64 1
+  %tmp3342 = getelementptr inbounds float* %tmp3341, i64 1
+  %tmp3343 = getelementptr inbounds float* %tmp3342, i64 1
+  %tmp3344 = getelementptr inbounds float* %tmp3343, i64 1
+  %tmp3345 = getelementptr inbounds float* %tmp3344, i64 1
+  %tmp3346 = getelementptr inbounds float* %tmp3345, i64 1
+  %tmp3347 = getelementptr inbounds float* %tmp3346, i64 1
+  %tmp3348 = getelementptr inbounds float* %tmp3347, i64 1
+  %tmp3349 = getelementptr inbounds float* %tmp3348, i64 1
+  %tmp3350 = getelementptr inbounds float* %tmp3349, i64 1
+  %tmp3351 = getelementptr inbounds float* %tmp3350, i64 1
+  %tmp3352 = getelementptr inbounds float* %tmp3351, i64 1
+  %tmp3353 = getelementptr inbounds float* %tmp3352, i64 1
+  %tmp3354 = getelementptr inbounds float* %tmp3353, i64 1
+  %tmp3355 = getelementptr inbounds float* %tmp3354, i64 1
+  %tmp3356 = getelementptr inbounds float* %tmp3355, i64 1
+  %tmp3357 = getelementptr inbounds float* %tmp3356, i64 1
+  %tmp3358 = getelementptr inbounds float* %tmp3357, i64 1
+  %tmp3359 = getelementptr inbounds float* %tmp3358, i64 1
+  %tmp3360 = getelementptr inbounds float* %tmp3359, i64 1
+  %tmp3361 = getelementptr inbounds float* %tmp3360, i64 1
+  %tmp3362 = getelementptr inbounds float* %tmp3361, i64 1
+  %tmp3363 = getelementptr inbounds float* %tmp3362, i64 1
+  %tmp3364 = getelementptr inbounds float* %tmp3363, i64 1
+  %tmp3365 = getelementptr inbounds float* %tmp3364, i64 1
+  %tmp3366 = getelementptr inbounds float* %tmp3365, i64 1
+  %tmp3367 = getelementptr inbounds float* %tmp3366, i64 1
+  %tmp3368 = getelementptr inbounds float* %tmp3367, i64 1
+  %tmp3369 = getelementptr inbounds float* %tmp3368, i64 1
+  %tmp3370 = getelementptr inbounds float* %tmp3369, i64 1
+  %tmp3371 = getelementptr inbounds float* %tmp3370, i64 1
+  %tmp3372 = getelementptr inbounds float* %tmp3371, i64 1
+  %tmp3373 = getelementptr inbounds float* %tmp3372, i64 1
+  %tmp3374 = getelementptr inbounds float* %tmp3373, i64 1
+  %tmp3375 = getelementptr inbounds float* %tmp3374, i64 1
+  %tmp3376 = getelementptr inbounds float* %tmp3375, i64 1
+  %tmp3377 = getelementptr inbounds float* %tmp3376, i64 1
+  %tmp3378 = getelementptr inbounds float* %tmp3377, i64 1
+  %tmp3379 = getelementptr inbounds float* %tmp3378, i64 1
+  %tmp3380 = getelementptr inbounds float* %tmp3379, i64 1
+  %tmp3381 = getelementptr inbounds float* %tmp3380, i64 1
+  %tmp3382 = getelementptr inbounds float* %tmp3381, i64 1
+  %tmp3383 = getelementptr inbounds float* %tmp3382, i64 1
+  %tmp3384 = getelementptr inbounds float* %tmp3383, i64 1
+  %tmp3385 = getelementptr inbounds float* %tmp3384, i64 1
+  %tmp3386 = getelementptr inbounds float* %tmp3385, i64 1
+  %tmp3387 = getelementptr inbounds float* %tmp3386, i64 1
+  %tmp3388 = getelementptr inbounds float* %tmp3387, i64 1
+  %tmp3389 = getelementptr inbounds float* %tmp3388, i64 1
+  %tmp3390 = getelementptr inbounds float* %tmp3389, i64 1
+  %tmp3391 = getelementptr inbounds float* %tmp3390, i64 1
+  %tmp3392 = getelementptr inbounds float* %tmp3391, i64 1
+  %tmp3393 = getelementptr inbounds float* %tmp3392, i64 1
+  %tmp3394 = getelementptr inbounds float* %tmp3393, i64 1
+  %tmp3395 = getelementptr inbounds float* %tmp3394, i64 1
+  %tmp3396 = getelementptr inbounds float* %tmp3395, i64 1
+  %tmp3397 = getelementptr inbounds float* %tmp3396, i64 1
+  %tmp3398 = getelementptr inbounds float* %tmp3397, i64 1
+  %tmp3399 = getelementptr inbounds float* %tmp3398, i64 1
+  %tmp3400 = getelementptr inbounds float* %tmp3399, i64 1
+  %tmp3401 = getelementptr inbounds float* %tmp3400, i64 1
+  %tmp3402 = getelementptr inbounds float* %tmp3401, i64 1
+  %tmp3403 = getelementptr inbounds float* %tmp3402, i64 1
+  %tmp3404 = getelementptr inbounds float* %tmp3403, i64 1
+  %tmp3405 = getelementptr inbounds float* %tmp3404, i64 1
+  %tmp3406 = getelementptr inbounds float* %tmp3405, i64 1
+  %tmp3407 = getelementptr inbounds float* %tmp3406, i64 1
+  %tmp3408 = getelementptr inbounds float* %tmp3407, i64 1
+  %tmp3409 = getelementptr inbounds float* %tmp3408, i64 1
+  %tmp3410 = getelementptr inbounds float* %tmp3409, i64 1
+  %tmp3411 = getelementptr inbounds float* %tmp3410, i64 1
+  %tmp3412 = getelementptr inbounds float* %tmp3411, i64 1
+  %tmp3413 = getelementptr inbounds float* %tmp3412, i64 1
+  %tmp3414 = getelementptr inbounds float* %tmp3413, i64 1
+  %tmp3415 = getelementptr inbounds float* %tmp3414, i64 1
+  %tmp3416 = getelementptr inbounds float* %tmp3415, i64 1
+  %tmp3417 = getelementptr inbounds float* %tmp3416, i64 1
+  %tmp3418 = getelementptr inbounds float* %tmp3417, i64 1
+  %tmp3419 = getelementptr inbounds float* %tmp3418, i64 1
+  %tmp3420 = getelementptr inbounds float* %tmp3419, i64 1
+  %tmp3421 = getelementptr inbounds float* %tmp3420, i64 1
+  %tmp3422 = getelementptr inbounds float* %tmp3421, i64 1
+  %tmp3423 = getelementptr inbounds float* %tmp3422, i64 1
+  %tmp3424 = getelementptr inbounds float* %tmp3423, i64 1
+  %tmp3425 = getelementptr inbounds float* %tmp3424, i64 1
+  %tmp3426 = getelementptr inbounds float* %tmp3425, i64 1
+  %tmp3427 = getelementptr inbounds float* %tmp3426, i64 1
+  %tmp3428 = getelementptr inbounds float* %tmp3427, i64 1
+  %tmp3429 = getelementptr inbounds float* %tmp3428, i64 1
+  %tmp3430 = getelementptr inbounds float* %tmp3429, i64 1
+  %tmp3431 = getelementptr inbounds float* %tmp3430, i64 1
+  %tmp3432 = getelementptr inbounds float* %tmp3431, i64 1
+  %tmp3433 = getelementptr inbounds float* %tmp3432, i64 1
+  %tmp3434 = getelementptr inbounds float* %tmp3433, i64 1
+  %tmp3435 = getelementptr inbounds float* %tmp3434, i64 1
+  %tmp3436 = getelementptr inbounds float* %tmp3435, i64 1
+  %tmp3437 = getelementptr inbounds float* %tmp3436, i64 1
+  %tmp3438 = getelementptr inbounds float* %tmp3437, i64 1
+  %tmp3439 = getelementptr inbounds float* %tmp3438, i64 1
+  %tmp3440 = getelementptr inbounds float* %tmp3439, i64 1
+  %tmp3441 = getelementptr inbounds float* %tmp3440, i64 1
+  %tmp3442 = getelementptr inbounds float* %tmp3441, i64 1
+  %tmp3443 = getelementptr inbounds float* %tmp3442, i64 1
+  %tmp3444 = getelementptr inbounds float* %tmp3443, i64 1
+  %tmp3445 = getelementptr inbounds float* %tmp3444, i64 1
+  %tmp3446 = getelementptr inbounds float* %tmp3445, i64 1
+  %tmp3447 = getelementptr inbounds float* %tmp3446, i64 1
+  %tmp3448 = getelementptr inbounds float* %tmp3447, i64 1
+  %tmp3449 = getelementptr inbounds float* %tmp3448, i64 1
+  %tmp3450 = getelementptr inbounds float* %tmp3449, i64 1
+  %tmp3451 = getelementptr inbounds float* %tmp3450, i64 1
+  %tmp3452 = getelementptr inbounds float* %tmp3451, i64 1
+  %tmp3453 = getelementptr inbounds float* %tmp3452, i64 1
+  %tmp3454 = getelementptr inbounds float* %tmp3453, i64 1
+  %tmp3455 = getelementptr inbounds float* %tmp3454, i64 1
+  %tmp3456 = getelementptr inbounds float* %tmp3455, i64 1
+  %tmp3457 = getelementptr inbounds float* %tmp3456, i64 1
+  %tmp3458 = getelementptr inbounds float* %tmp3457, i64 1
+  %tmp3459 = getelementptr inbounds float* %tmp3458, i64 1
+  %tmp3460 = getelementptr inbounds float* %tmp3459, i64 1
+  %tmp3461 = getelementptr inbounds float* %tmp3460, i64 1
+  %tmp3462 = getelementptr inbounds float* %tmp3461, i64 1
+  %tmp3463 = getelementptr inbounds float* %tmp3462, i64 1
+  %tmp3464 = getelementptr inbounds float* %tmp3463, i64 1
+  %tmp3465 = getelementptr inbounds float* %tmp3464, i64 1
+  %tmp3466 = getelementptr inbounds float* %tmp3465, i64 1
+  %tmp3467 = getelementptr inbounds float* %tmp3466, i64 1
+  %tmp3468 = getelementptr inbounds float* %tmp3467, i64 1
+  %tmp3469 = getelementptr inbounds float* %tmp3468, i64 1
+  %tmp3470 = getelementptr inbounds float* %tmp3469, i64 1
+  %tmp3471 = getelementptr inbounds float* %tmp3470, i64 1
+  %tmp3472 = getelementptr inbounds float* %tmp3471, i64 1
+  %tmp3473 = getelementptr inbounds float* %tmp3472, i64 1
+  %tmp3474 = getelementptr inbounds float* %tmp3473, i64 1
+  %tmp3475 = getelementptr inbounds float* %tmp3474, i64 1
+  %tmp3476 = getelementptr inbounds float* %tmp3475, i64 1
+  %tmp3477 = getelementptr inbounds float* %tmp3476, i64 1
+  %tmp3478 = getelementptr inbounds float* %tmp3477, i64 1
+  %tmp3479 = getelementptr inbounds float* %tmp3478, i64 1
+  %tmp3480 = getelementptr inbounds float* %tmp3479, i64 1
+  %tmp3481 = getelementptr inbounds float* %tmp3480, i64 1
+  %tmp3482 = getelementptr inbounds float* %tmp3481, i64 1
+  %tmp3483 = getelementptr inbounds float* %tmp3482, i64 1
+  %tmp3484 = getelementptr inbounds float* %tmp3483, i64 1
+  %tmp3485 = getelementptr inbounds float* %tmp3484, i64 1
+  %tmp3486 = getelementptr inbounds float* %tmp3485, i64 1
+  %tmp3487 = getelementptr inbounds float* %tmp3486, i64 1
+  %tmp3488 = getelementptr inbounds float* %tmp3487, i64 1
+  %tmp3489 = getelementptr inbounds float* %tmp3488, i64 1
+  %tmp3490 = getelementptr inbounds float* %tmp3489, i64 1
+  %tmp3491 = getelementptr inbounds float* %tmp3490, i64 1
+  %tmp3492 = getelementptr inbounds float* %tmp3491, i64 1
+  %tmp3493 = getelementptr inbounds float* %tmp3492, i64 1
+  %tmp3494 = getelementptr inbounds float* %tmp3493, i64 1
+  %tmp3495 = getelementptr inbounds float* %tmp3494, i64 1
+  %tmp3496 = getelementptr inbounds float* %tmp3495, i64 1
+  %tmp3497 = getelementptr inbounds float* %tmp3496, i64 1
+  %tmp3498 = getelementptr inbounds float* %tmp3497, i64 1
+  %tmp3499 = getelementptr inbounds float* %tmp3498, i64 1
+  %tmp3500 = getelementptr inbounds float* %tmp3499, i64 1
+  %tmp3501 = getelementptr inbounds float* %tmp3500, i64 1
+  %tmp3502 = getelementptr inbounds float* %tmp3501, i64 1
+  %tmp3503 = getelementptr inbounds float* %tmp3502, i64 1
+  %tmp3504 = getelementptr inbounds float* %tmp3503, i64 1
+  %tmp3505 = getelementptr inbounds float* %tmp3504, i64 1
+  %tmp3506 = getelementptr inbounds float* %tmp3505, i64 1
+  %tmp3507 = getelementptr inbounds float* %tmp3506, i64 1
+  %tmp3508 = getelementptr inbounds float* %tmp3507, i64 1
+  %tmp3509 = getelementptr inbounds float* %tmp3508, i64 1
+  %tmp3510 = getelementptr inbounds float* %tmp3509, i64 1
+  %tmp3511 = getelementptr inbounds float* %tmp3510, i64 1
+  %tmp3512 = getelementptr inbounds float* %tmp3511, i64 1
+  %tmp3513 = getelementptr inbounds float* %tmp3512, i64 1
+  %tmp3514 = getelementptr inbounds float* %tmp3513, i64 1
+  %tmp3515 = getelementptr inbounds float* %tmp3514, i64 1
+  %tmp3516 = getelementptr inbounds float* %tmp3515, i64 1
+  %tmp3517 = getelementptr inbounds float* %tmp3516, i64 1
+  %tmp3518 = getelementptr inbounds float* %tmp3517, i64 1
+  %tmp3519 = getelementptr inbounds float* %tmp3518, i64 1
+  %tmp3520 = getelementptr inbounds float* %tmp3519, i64 1
+  %tmp3521 = getelementptr inbounds float* %tmp3520, i64 1
+  %tmp3522 = getelementptr inbounds float* %tmp3521, i64 1
+  %tmp3523 = getelementptr inbounds float* %tmp3522, i64 1
+  %tmp3524 = getelementptr inbounds float* %tmp3523, i64 1
+  %tmp3525 = getelementptr inbounds float* %tmp3524, i64 1
+  %tmp3526 = getelementptr inbounds float* %tmp3525, i64 1
+  %tmp3527 = getelementptr inbounds float* %tmp3526, i64 1
+  %tmp3528 = getelementptr inbounds float* %tmp3527, i64 1
+  %tmp3529 = getelementptr inbounds float* %tmp3528, i64 1
+  %tmp3530 = getelementptr inbounds float* %tmp3529, i64 1
+  %tmp3531 = getelementptr inbounds float* %tmp3530, i64 1
+  %tmp3532 = getelementptr inbounds float* %tmp3531, i64 1
+  %tmp3533 = getelementptr inbounds float* %tmp3532, i64 1
+  %tmp3534 = getelementptr inbounds float* %tmp3533, i64 1
+  %tmp3535 = getelementptr inbounds float* %tmp3534, i64 1
+  %tmp3536 = getelementptr inbounds float* %tmp3535, i64 1
+  %tmp3537 = getelementptr inbounds float* %tmp3536, i64 1
+  %tmp3538 = getelementptr inbounds float* %tmp3537, i64 1
+  %tmp3539 = getelementptr inbounds float* %tmp3538, i64 1
+  %tmp3540 = getelementptr inbounds float* %tmp3539, i64 1
+  %tmp3541 = getelementptr inbounds float* %tmp3540, i64 1
+  %tmp3542 = getelementptr inbounds float* %tmp3541, i64 1
+  %tmp3543 = getelementptr inbounds float* %tmp3542, i64 1
+  %tmp3544 = getelementptr inbounds float* %tmp3543, i64 1
+  %tmp3545 = getelementptr inbounds float* %tmp3544, i64 1
+  %tmp3546 = getelementptr inbounds float* %tmp3545, i64 1
+  %tmp3547 = getelementptr inbounds float* %tmp3546, i64 1
+  %tmp3548 = getelementptr inbounds float* %tmp3547, i64 1
+  %tmp3549 = getelementptr inbounds float* %tmp3548, i64 1
+  %tmp3550 = getelementptr inbounds float* %tmp3549, i64 1
+  %tmp3551 = getelementptr inbounds float* %tmp3550, i64 1
+  %tmp3552 = getelementptr inbounds float* %tmp3551, i64 1
+  %tmp3553 = getelementptr inbounds float* %tmp3552, i64 1
+  %tmp3554 = getelementptr inbounds float* %tmp3553, i64 1
+  %tmp3555 = getelementptr inbounds float* %tmp3554, i64 1
+  %tmp3556 = getelementptr inbounds float* %tmp3555, i64 1
+  %tmp3557 = getelementptr inbounds float* %tmp3556, i64 1
+  %tmp3558 = getelementptr inbounds float* %tmp3557, i64 1
+  %tmp3559 = getelementptr inbounds float* %tmp3558, i64 1
+  %tmp3560 = getelementptr inbounds float* %tmp3559, i64 1
+  %tmp3561 = getelementptr inbounds float* %tmp3560, i64 1
+  %tmp3562 = getelementptr inbounds float* %tmp3561, i64 1
+  %tmp3563 = getelementptr inbounds float* %tmp3562, i64 1
+  %tmp3564 = getelementptr inbounds float* %tmp3563, i64 1
+  %tmp3565 = getelementptr inbounds float* %tmp3564, i64 1
+  %tmp3566 = getelementptr inbounds float* %tmp3565, i64 1
+  %tmp3567 = getelementptr inbounds float* %tmp3566, i64 1
+  %tmp3568 = getelementptr inbounds float* %tmp3567, i64 1
+  %tmp3569 = getelementptr inbounds float* %tmp3568, i64 1
+  %tmp3570 = getelementptr inbounds float* %tmp3569, i64 1
+  %tmp3571 = getelementptr inbounds float* %tmp3570, i64 1
+  %tmp3572 = getelementptr inbounds float* %tmp3571, i64 1
+  %tmp3573 = getelementptr inbounds float* %tmp3572, i64 1
+  %tmp3574 = getelementptr inbounds float* %tmp3573, i64 1
+  %tmp3575 = getelementptr inbounds float* %tmp3574, i64 1
+  %tmp3576 = getelementptr inbounds float* %tmp3575, i64 1
+  %tmp3577 = getelementptr inbounds float* %tmp3576, i64 1
+  %tmp3578 = getelementptr inbounds float* %tmp3577, i64 1
+  %tmp3579 = getelementptr inbounds float* %tmp3578, i64 1
+  %tmp3580 = getelementptr inbounds float* %tmp3579, i64 1
+  %tmp3581 = getelementptr inbounds float* %tmp3580, i64 1
+  %tmp3582 = getelementptr inbounds float* %tmp3581, i64 1
+  %tmp3583 = getelementptr inbounds float* %tmp3582, i64 1
+  %tmp3584 = getelementptr inbounds float* %tmp3583, i64 1
+  %tmp3585 = getelementptr inbounds float* %tmp3584, i64 1
+  %tmp3586 = getelementptr inbounds float* %tmp3585, i64 1
+  %tmp3587 = getelementptr inbounds float* %tmp3586, i64 1
+  %tmp3588 = getelementptr inbounds float* %tmp3587, i64 1
+  %tmp3589 = getelementptr inbounds float* %tmp3588, i64 1
+  %tmp3590 = getelementptr inbounds float* %tmp3589, i64 1
+  %tmp3591 = getelementptr inbounds float* %tmp3590, i64 1
+  %tmp3592 = getelementptr inbounds float* %tmp3591, i64 1
+  %tmp3593 = getelementptr inbounds float* %tmp3592, i64 1
+  %tmp3594 = getelementptr inbounds float* %tmp3593, i64 1
+  %tmp3595 = getelementptr inbounds float* %tmp3594, i64 1
+  %tmp3596 = getelementptr inbounds float* %tmp3595, i64 1
+  %tmp3597 = getelementptr inbounds float* %tmp3596, i64 1
+  %tmp3598 = getelementptr inbounds float* %tmp3597, i64 1
+  %tmp3599 = getelementptr inbounds float* %tmp3598, i64 1
+  %tmp3600 = getelementptr inbounds float* %tmp3599, i64 1
+  %tmp3601 = getelementptr inbounds float* %tmp3600, i64 1
+  %tmp3602 = getelementptr inbounds float* %tmp3601, i64 1
+  %tmp3603 = getelementptr inbounds float* %tmp3602, i64 1
+  %tmp3604 = getelementptr inbounds float* %tmp3603, i64 1
+  %tmp3605 = getelementptr inbounds float* %tmp3604, i64 1
+  %tmp3606 = getelementptr inbounds float* %tmp3605, i64 1
+  %tmp3607 = getelementptr inbounds float* %tmp3606, i64 1
+  %tmp3608 = getelementptr inbounds float* %tmp3607, i64 1
+  %tmp3609 = getelementptr inbounds float* %tmp3608, i64 1
+  %tmp3610 = getelementptr inbounds float* %tmp3609, i64 1
+  %tmp3611 = getelementptr inbounds float* %tmp3610, i64 1
+  %tmp3612 = getelementptr inbounds float* %tmp3611, i64 1
+  %tmp3613 = getelementptr inbounds float* %tmp3612, i64 1
+  %tmp3614 = getelementptr inbounds float* %tmp3613, i64 1
+  %tmp3615 = getelementptr inbounds float* %tmp3614, i64 1
+  %tmp3616 = getelementptr inbounds float* %tmp3615, i64 1
+  %tmp3617 = getelementptr inbounds float* %tmp3616, i64 1
+  %tmp3618 = getelementptr inbounds float* %tmp3617, i64 1
+  %tmp3619 = getelementptr inbounds float* %tmp3618, i64 1
+  %tmp3620 = getelementptr inbounds float* %tmp3619, i64 1
+  %tmp3621 = getelementptr inbounds float* %tmp3620, i64 1
+  %tmp3622 = getelementptr inbounds float* %tmp3621, i64 1
+  %tmp3623 = getelementptr inbounds float* %tmp3622, i64 1
+  %tmp3624 = getelementptr inbounds float* %tmp3623, i64 1
+  %tmp3625 = getelementptr inbounds float* %tmp3624, i64 1
+  %tmp3626 = getelementptr inbounds float* %tmp3625, i64 1
+  %tmp3627 = getelementptr inbounds float* %tmp3626, i64 1
+  %tmp3628 = getelementptr inbounds float* %tmp3627, i64 1
+  %tmp3629 = getelementptr inbounds float* %tmp3628, i64 1
+  %tmp3630 = getelementptr inbounds float* %tmp3629, i64 1
+  %tmp3631 = getelementptr inbounds float* %tmp3630, i64 1
+  %tmp3632 = getelementptr inbounds float* %tmp3631, i64 1
+  %tmp3633 = getelementptr inbounds float* %tmp3632, i64 1
+  %tmp3634 = getelementptr inbounds float* %tmp3633, i64 1
+  %tmp3635 = getelementptr inbounds float* %tmp3634, i64 1
+  %tmp3636 = getelementptr inbounds float* %tmp3635, i64 1
+  %tmp3637 = getelementptr inbounds float* %tmp3636, i64 1
+  %tmp3638 = getelementptr inbounds float* %tmp3637, i64 1
+  %tmp3639 = getelementptr inbounds float* %tmp3638, i64 1
+  %tmp3640 = getelementptr inbounds float* %tmp3639, i64 1
+  %tmp3641 = getelementptr inbounds float* %tmp3640, i64 1
+  %tmp3642 = getelementptr inbounds float* %tmp3641, i64 1
+  %tmp3643 = getelementptr inbounds float* %tmp3642, i64 1
+  %tmp3644 = getelementptr inbounds float* %tmp3643, i64 1
+  %tmp3645 = getelementptr inbounds float* %tmp3644, i64 1
+  %tmp3646 = getelementptr inbounds float* %tmp3645, i64 1
+  %tmp3647 = getelementptr inbounds float* %tmp3646, i64 1
+  %tmp3648 = getelementptr inbounds float* %tmp3647, i64 1
+  %tmp3649 = getelementptr inbounds float* %tmp3648, i64 1
+  %tmp3650 = getelementptr inbounds float* %tmp3649, i64 1
+  %tmp3651 = getelementptr inbounds float* %tmp3650, i64 1
+  %tmp3652 = getelementptr inbounds float* %tmp3651, i64 1
+  %tmp3653 = getelementptr inbounds float* %tmp3652, i64 1
+  %tmp3654 = getelementptr inbounds float* %tmp3653, i64 1
+  %tmp3655 = getelementptr inbounds float* %tmp3654, i64 1
+  %tmp3656 = getelementptr inbounds float* %tmp3655, i64 1
+  %tmp3657 = getelementptr inbounds float* %tmp3656, i64 1
+  %tmp3658 = getelementptr inbounds float* %tmp3657, i64 1
+  %tmp3659 = getelementptr inbounds float* %tmp3658, i64 1
+  %tmp3660 = getelementptr inbounds float* %tmp3659, i64 1
+  %tmp3661 = getelementptr inbounds float* %tmp3660, i64 1
+  %tmp3662 = getelementptr inbounds float* %tmp3661, i64 1
+  %tmp3663 = getelementptr inbounds float* %tmp3662, i64 1
+  %tmp3664 = getelementptr inbounds float* %tmp3663, i64 1
+  %tmp3665 = getelementptr inbounds float* %tmp3664, i64 1
+  %tmp3666 = getelementptr inbounds float* %tmp3665, i64 1
+  %tmp3667 = getelementptr inbounds float* %tmp3666, i64 1
+  %tmp3668 = getelementptr inbounds float* %tmp3667, i64 1
+  %tmp3669 = getelementptr inbounds float* %tmp3668, i64 1
+  %tmp3670 = getelementptr inbounds float* %tmp3669, i64 1
+  %tmp3671 = getelementptr inbounds float* %tmp3670, i64 1
+  %tmp3672 = getelementptr inbounds float* %tmp3671, i64 1
+  %tmp3673 = getelementptr inbounds float* %tmp3672, i64 1
+  %tmp3674 = getelementptr inbounds float* %tmp3673, i64 1
+  %tmp3675 = getelementptr inbounds float* %tmp3674, i64 1
+  %tmp3676 = getelementptr inbounds float* %tmp3675, i64 1
+  %tmp3677 = getelementptr inbounds float* %tmp3676, i64 1
+  %tmp3678 = getelementptr inbounds float* %tmp3677, i64 1
+  %tmp3679 = getelementptr inbounds float* %tmp3678, i64 1
+  %tmp3680 = getelementptr inbounds float* %tmp3679, i64 1
+  %tmp3681 = getelementptr inbounds float* %tmp3680, i64 1
+  %tmp3682 = getelementptr inbounds float* %tmp3681, i64 1
+  %tmp3683 = getelementptr inbounds float* %tmp3682, i64 1
+  %tmp3684 = getelementptr inbounds float* %tmp3683, i64 1
+  %tmp3685 = getelementptr inbounds float* %tmp3684, i64 1
+  %tmp3686 = getelementptr inbounds float* %tmp3685, i64 1
+  %tmp3687 = getelementptr inbounds float* %tmp3686, i64 1
+  %tmp3688 = getelementptr inbounds float* %tmp3687, i64 1
+  %tmp3689 = getelementptr inbounds float* %tmp3688, i64 1
+  %tmp3690 = getelementptr inbounds float* %tmp3689, i64 1
+  %tmp3691 = getelementptr inbounds float* %tmp3690, i64 1
+  %tmp3692 = getelementptr inbounds float* %tmp3691, i64 1
+  %tmp3693 = getelementptr inbounds float* %tmp3692, i64 1
+  %tmp3694 = getelementptr inbounds float* %tmp3693, i64 1
+  %tmp3695 = getelementptr inbounds float* %tmp3694, i64 1
+  %tmp3696 = getelementptr inbounds float* %tmp3695, i64 1
+  %tmp3697 = getelementptr inbounds float* %tmp3696, i64 1
+  %tmp3698 = getelementptr inbounds float* %tmp3697, i64 1
+  %tmp3699 = getelementptr inbounds float* %tmp3698, i64 1
+  %tmp3700 = getelementptr inbounds float* %tmp3699, i64 1
+  %tmp3701 = getelementptr inbounds float* %tmp3700, i64 1
+  %tmp3702 = getelementptr inbounds float* %tmp3701, i64 1
+  %tmp3703 = getelementptr inbounds float* %tmp3702, i64 1
+  %tmp3704 = getelementptr inbounds float* %tmp3703, i64 1
+  %tmp3705 = getelementptr inbounds float* %tmp3704, i64 1
+  %tmp3706 = getelementptr inbounds float* %tmp3705, i64 1
+  %tmp3707 = getelementptr inbounds float* %tmp3706, i64 1
+  %tmp3708 = getelementptr inbounds float* %tmp3707, i64 1
+  %tmp3709 = getelementptr inbounds float* %tmp3708, i64 1
+  %tmp3710 = getelementptr inbounds float* %tmp3709, i64 1
+  %tmp3711 = getelementptr inbounds float* %tmp3710, i64 1
+  %tmp3712 = getelementptr inbounds float* %tmp3711, i64 1
+  %tmp3713 = getelementptr inbounds float* %tmp3712, i64 1
+  %tmp3714 = getelementptr inbounds float* %tmp3713, i64 1
+  %tmp3715 = getelementptr inbounds float* %tmp3714, i64 1
+  %tmp3716 = getelementptr inbounds float* %tmp3715, i64 1
+  %tmp3717 = getelementptr inbounds float* %tmp3716, i64 1
+  %tmp3718 = getelementptr inbounds float* %tmp3717, i64 1
+  %tmp3719 = getelementptr inbounds float* %tmp3718, i64 1
+  %tmp3720 = getelementptr inbounds float* %tmp3719, i64 1
+  %tmp3721 = getelementptr inbounds float* %tmp3720, i64 1
+  %tmp3722 = getelementptr inbounds float* %tmp3721, i64 1
+  %tmp3723 = getelementptr inbounds float* %tmp3722, i64 1
+  %tmp3724 = getelementptr inbounds float* %tmp3723, i64 1
+  %tmp3725 = getelementptr inbounds float* %tmp3724, i64 1
+  %tmp3726 = getelementptr inbounds float* %tmp3725, i64 1
+  %tmp3727 = getelementptr inbounds float* %tmp3726, i64 1
+  %tmp3728 = getelementptr inbounds float* %tmp3727, i64 1
+  %tmp3729 = getelementptr inbounds float* %tmp3728, i64 1
+  %tmp3730 = getelementptr inbounds float* %tmp3729, i64 1
+  %tmp3731 = getelementptr inbounds float* %tmp3730, i64 1
+  %tmp3732 = getelementptr inbounds float* %tmp3731, i64 1
+  %tmp3733 = getelementptr inbounds float* %tmp3732, i64 1
+  %tmp3734 = getelementptr inbounds float* %tmp3733, i64 1
+  %tmp3735 = getelementptr inbounds float* %tmp3734, i64 1
+  %tmp3736 = getelementptr inbounds float* %tmp3735, i64 1
+  %tmp3737 = getelementptr inbounds float* %tmp3736, i64 1
+  %tmp3738 = getelementptr inbounds float* %tmp3737, i64 1
+  %tmp3739 = getelementptr inbounds float* %tmp3738, i64 1
+  %tmp3740 = getelementptr inbounds float* %tmp3739, i64 1
+  %tmp3741 = getelementptr inbounds float* %tmp3740, i64 1
+  %tmp3742 = getelementptr inbounds float* %tmp3741, i64 1
+  %tmp3743 = getelementptr inbounds float* %tmp3742, i64 1
+  %tmp3744 = getelementptr inbounds float* %tmp3743, i64 1
+  %tmp3745 = getelementptr inbounds float* %tmp3744, i64 1
+  %tmp3746 = getelementptr inbounds float* %tmp3745, i64 1
+  %tmp3747 = getelementptr inbounds float* %tmp3746, i64 1
+  %tmp3748 = getelementptr inbounds float* %tmp3747, i64 1
+  %tmp3749 = getelementptr inbounds float* %tmp3748, i64 1
+  %tmp3750 = getelementptr inbounds float* %tmp3749, i64 1
+  %tmp3751 = getelementptr inbounds float* %tmp3750, i64 1
+  %tmp3752 = getelementptr inbounds float* %tmp3751, i64 1
+  %tmp3753 = getelementptr inbounds float* %tmp3752, i64 1
+  %tmp3754 = getelementptr inbounds float* %tmp3753, i64 1
+  %tmp3755 = getelementptr inbounds float* %tmp3754, i64 1
+  %tmp3756 = getelementptr inbounds float* %tmp3755, i64 1
+  %tmp3757 = getelementptr inbounds float* %tmp3756, i64 1
+  %tmp3758 = getelementptr inbounds float* %tmp3757, i64 1
+  %tmp3759 = getelementptr inbounds float* %tmp3758, i64 1
+  %tmp3760 = getelementptr inbounds float* %tmp3759, i64 1
+  %tmp3761 = getelementptr inbounds float* %tmp3760, i64 1
+  %tmp3762 = getelementptr inbounds float* %tmp3761, i64 1
+  %tmp3763 = getelementptr inbounds float* %tmp3762, i64 1
+  %tmp3764 = getelementptr inbounds float* %tmp3763, i64 1
+  %tmp3765 = getelementptr inbounds float* %tmp3764, i64 1
+  %tmp3766 = getelementptr inbounds float* %tmp3765, i64 1
+  %tmp3767 = getelementptr inbounds float* %tmp3766, i64 1
+  %tmp3768 = getelementptr inbounds float* %tmp3767, i64 1
+  %tmp3769 = getelementptr inbounds float* %tmp3768, i64 1
+  %tmp3770 = getelementptr inbounds float* %tmp3769, i64 1
+  %tmp3771 = getelementptr inbounds float* %tmp3770, i64 1
+  %tmp3772 = getelementptr inbounds float* %tmp3771, i64 1
+  %tmp3773 = getelementptr inbounds float* %tmp3772, i64 1
+  %tmp3774 = getelementptr inbounds float* %tmp3773, i64 1
+  %tmp3775 = getelementptr inbounds float* %tmp3774, i64 1
+  %tmp3776 = getelementptr inbounds float* %tmp3775, i64 1
+  %tmp3777 = getelementptr inbounds float* %tmp3776, i64 1
+  %tmp3778 = getelementptr inbounds float* %tmp3777, i64 1
+  %tmp3779 = getelementptr inbounds float* %tmp3778, i64 1
+  %tmp3780 = getelementptr inbounds float* %tmp3779, i64 1
+  %tmp3781 = getelementptr inbounds float* %tmp3780, i64 1
+  %tmp3782 = getelementptr inbounds float* %tmp3781, i64 1
+  %tmp3783 = getelementptr inbounds float* %tmp3782, i64 1
+  %tmp3784 = getelementptr inbounds float* %tmp3783, i64 1
+  %tmp3785 = getelementptr inbounds float* %tmp3784, i64 1
+  %tmp3786 = getelementptr inbounds float* %tmp3785, i64 1
+  %tmp3787 = getelementptr inbounds float* %tmp3786, i64 1
+  %tmp3788 = getelementptr inbounds float* %tmp3787, i64 1
+  %tmp3789 = getelementptr inbounds float* %tmp3788, i64 1
+  %tmp3790 = getelementptr inbounds float* %tmp3789, i64 1
+  %tmp3791 = getelementptr inbounds float* %tmp3790, i64 1
+  %tmp3792 = getelementptr inbounds float* %tmp3791, i64 1
+  %tmp3793 = getelementptr inbounds float* %tmp3792, i64 1
+  %tmp3794 = getelementptr inbounds float* %tmp3793, i64 1
+  %tmp3795 = getelementptr inbounds float* %tmp3794, i64 1
+  %tmp3796 = getelementptr inbounds float* %tmp3795, i64 1
+  %tmp3797 = getelementptr inbounds float* %tmp3796, i64 1
+  %tmp3798 = getelementptr inbounds float* %tmp3797, i64 1
+  %tmp3799 = getelementptr inbounds float* %tmp3798, i64 1
+  %tmp3800 = getelementptr inbounds float* %tmp3799, i64 1
+  %tmp3801 = getelementptr inbounds float* %tmp3800, i64 1
+  %tmp3802 = getelementptr inbounds float* %tmp3801, i64 1
+  %tmp3803 = getelementptr inbounds float* %tmp3802, i64 1
+  %tmp3804 = getelementptr inbounds float* %tmp3803, i64 1
+  %tmp3805 = getelementptr inbounds float* %tmp3804, i64 1
+  %tmp3806 = getelementptr inbounds float* %tmp3805, i64 1
+  %tmp3807 = getelementptr inbounds float* %tmp3806, i64 1
+  %tmp3808 = getelementptr inbounds float* %tmp3807, i64 1
+  %tmp3809 = getelementptr inbounds float* %tmp3808, i64 1
+  %tmp3810 = getelementptr inbounds float* %tmp3809, i64 1
+  %tmp3811 = getelementptr inbounds float* %tmp3810, i64 1
+  %tmp3812 = getelementptr inbounds float* %tmp3811, i64 1
+  %tmp3813 = getelementptr inbounds float* %tmp3812, i64 1
+  %tmp3814 = getelementptr inbounds float* %tmp3813, i64 1
+  %tmp3815 = getelementptr inbounds float* %tmp3814, i64 1
+  %tmp3816 = getelementptr inbounds float* %tmp3815, i64 1
+  %tmp3817 = getelementptr inbounds float* %tmp3816, i64 1
+  %tmp3818 = getelementptr inbounds float* %tmp3817, i64 1
+  %tmp3819 = getelementptr inbounds float* %tmp3818, i64 1
+  %tmp3820 = getelementptr inbounds float* %tmp3819, i64 1
+  %tmp3821 = getelementptr inbounds float* %tmp3820, i64 1
+  %tmp3822 = getelementptr inbounds float* %tmp3821, i64 1
+  %tmp3823 = getelementptr inbounds float* %tmp3822, i64 1
+  %tmp3824 = getelementptr inbounds float* %tmp3823, i64 1
+  %tmp3825 = getelementptr inbounds float* %tmp3824, i64 1
+  %tmp3826 = getelementptr inbounds float* %tmp3825, i64 1
+  %tmp3827 = getelementptr inbounds float* %tmp3826, i64 1
+  %tmp3828 = getelementptr inbounds float* %tmp3827, i64 1
+  %tmp3829 = getelementptr inbounds float* %tmp3828, i64 1
+  %tmp3830 = getelementptr inbounds float* %tmp3829, i64 1
+  %tmp3831 = getelementptr inbounds float* %tmp3830, i64 1
+  %tmp3832 = getelementptr inbounds float* %tmp3831, i64 1
+  %tmp3833 = getelementptr inbounds float* %tmp3832, i64 1
+  %tmp3834 = getelementptr inbounds float* %tmp3833, i64 1
+  %tmp3835 = getelementptr inbounds float* %tmp3834, i64 1
+  %tmp3836 = getelementptr inbounds float* %tmp3835, i64 1
+  %tmp3837 = getelementptr inbounds float* %tmp3836, i64 1
+  %tmp3838 = getelementptr inbounds float* %tmp3837, i64 1
+  %tmp3839 = getelementptr inbounds float* %tmp3838, i64 1
+  %tmp3840 = getelementptr inbounds float* %tmp3839, i64 1
+  %tmp3841 = getelementptr inbounds float* %tmp3840, i64 1
+  %tmp3842 = getelementptr inbounds float* %tmp3841, i64 1
+  %tmp3843 = getelementptr inbounds float* %tmp3842, i64 1
+  %tmp3844 = getelementptr inbounds float* %tmp3843, i64 1
+  %tmp3845 = getelementptr inbounds float* %tmp3844, i64 1
+  %tmp3846 = getelementptr inbounds float* %tmp3845, i64 1
+  %tmp3847 = getelementptr inbounds float* %tmp3846, i64 1
+  %tmp3848 = getelementptr inbounds float* %tmp3847, i64 1
+  %tmp3849 = getelementptr inbounds float* %tmp3848, i64 1
+  %tmp3850 = getelementptr inbounds float* %tmp3849, i64 1
+  %tmp3851 = getelementptr inbounds float* %tmp3850, i64 1
+  %tmp3852 = getelementptr inbounds float* %tmp3851, i64 1
+  %tmp3853 = getelementptr inbounds float* %tmp3852, i64 1
+  %tmp3854 = getelementptr inbounds float* %tmp3853, i64 1
+  %tmp3855 = getelementptr inbounds float* %tmp3854, i64 1
+  %tmp3856 = getelementptr inbounds float* %tmp3855, i64 1
+  %tmp3857 = getelementptr inbounds float* %tmp3856, i64 1
+  %tmp3858 = getelementptr inbounds float* %tmp3857, i64 1
+  %tmp3859 = getelementptr inbounds float* %tmp3858, i64 1
+  %tmp3860 = getelementptr inbounds float* %tmp3859, i64 1
+  %tmp3861 = getelementptr inbounds float* %tmp3860, i64 1
+  %tmp3862 = getelementptr inbounds float* %tmp3861, i64 1
+  %tmp3863 = getelementptr inbounds float* %tmp3862, i64 1
+  %tmp3864 = getelementptr inbounds float* %tmp3863, i64 1
+  %tmp3865 = getelementptr inbounds float* %tmp3864, i64 1
+  %tmp3866 = getelementptr inbounds float* %tmp3865, i64 1
+  %tmp3867 = getelementptr inbounds float* %tmp3866, i64 1
+  %tmp3868 = getelementptr inbounds float* %tmp3867, i64 1
+  %tmp3869 = getelementptr inbounds float* %tmp3868, i64 1
+  %tmp3870 = getelementptr inbounds float* %tmp3869, i64 1
+  %tmp3871 = getelementptr inbounds float* %tmp3870, i64 1
+  %tmp3872 = getelementptr inbounds float* %tmp3871, i64 1
+  %tmp3873 = getelementptr inbounds float* %tmp3872, i64 1
+  %tmp3874 = getelementptr inbounds float* %tmp3873, i64 1
+  %tmp3875 = getelementptr inbounds float* %tmp3874, i64 1
+  %tmp3876 = getelementptr inbounds float* %tmp3875, i64 1
+  %tmp3877 = getelementptr inbounds float* %tmp3876, i64 1
+  %tmp3878 = getelementptr inbounds float* %tmp3877, i64 1
+  %tmp3879 = getelementptr inbounds float* %tmp3878, i64 1
+  %tmp3880 = getelementptr inbounds float* %tmp3879, i64 1
+  %tmp3881 = getelementptr inbounds float* %tmp3880, i64 1
+  %tmp3882 = getelementptr inbounds float* %tmp3881, i64 1
+  %tmp3883 = getelementptr inbounds float* %tmp3882, i64 1
+  %tmp3884 = getelementptr inbounds float* %tmp3883, i64 1
+  %tmp3885 = getelementptr inbounds float* %tmp3884, i64 1
+  %tmp3886 = getelementptr inbounds float* %tmp3885, i64 1
+  %tmp3887 = getelementptr inbounds float* %tmp3886, i64 1
+  %tmp3888 = getelementptr inbounds float* %tmp3887, i64 1
+  %tmp3889 = getelementptr inbounds float* %tmp3888, i64 1
+  %tmp3890 = getelementptr inbounds float* %tmp3889, i64 1
+  %tmp3891 = getelementptr inbounds float* %tmp3890, i64 1
+  %tmp3892 = getelementptr inbounds float* %tmp3891, i64 1
+  %tmp3893 = getelementptr inbounds float* %tmp3892, i64 1
+  %tmp3894 = getelementptr inbounds float* %tmp3893, i64 1
+  %tmp3895 = getelementptr inbounds float* %tmp3894, i64 1
+  %tmp3896 = getelementptr inbounds float* %tmp3895, i64 1
+  %tmp3897 = getelementptr inbounds float* %tmp3896, i64 1
+  %tmp3898 = getelementptr inbounds float* %tmp3897, i64 1
+  %tmp3899 = getelementptr inbounds float* %tmp3898, i64 1
+  %tmp3900 = getelementptr inbounds float* %tmp3899, i64 1
+  %tmp3901 = getelementptr inbounds float* %tmp3900, i64 1
+  %tmp3902 = getelementptr inbounds float* %tmp3901, i64 1
+  %tmp3903 = getelementptr inbounds float* %tmp3902, i64 1
+  %tmp3904 = getelementptr inbounds float* %tmp3903, i64 1
+  %tmp3905 = getelementptr inbounds float* %tmp3904, i64 1
+  %tmp3906 = getelementptr inbounds float* %tmp3905, i64 1
+  %tmp3907 = getelementptr inbounds float* %tmp3906, i64 1
+  %tmp3908 = getelementptr inbounds float* %tmp3907, i64 1
+  %tmp3909 = getelementptr inbounds float* %tmp3908, i64 1
+  %tmp3910 = getelementptr inbounds float* %tmp3909, i64 1
+  %tmp3911 = getelementptr inbounds float* %tmp3910, i64 1
+  %tmp3912 = getelementptr inbounds float* %tmp3911, i64 1
+  %tmp3913 = getelementptr inbounds float* %tmp3912, i64 1
+  %tmp3914 = getelementptr inbounds float* %tmp3913, i64 1
+  %tmp3915 = getelementptr inbounds float* %tmp3914, i64 1
+  %tmp3916 = getelementptr inbounds float* %tmp3915, i64 1
+  %tmp3917 = getelementptr inbounds float* %tmp3916, i64 1
+  %tmp3918 = getelementptr inbounds float* %tmp3917, i64 1
+  %tmp3919 = getelementptr inbounds float* %tmp3918, i64 1
+  %tmp3920 = getelementptr inbounds float* %tmp3919, i64 1
+  %tmp3921 = getelementptr inbounds float* %tmp3920, i64 1
+  %tmp3922 = getelementptr inbounds float* %tmp3921, i64 1
+  %tmp3923 = getelementptr inbounds float* %tmp3922, i64 1
+  %tmp3924 = getelementptr inbounds float* %tmp3923, i64 1
+  %tmp3925 = getelementptr inbounds float* %tmp3924, i64 1
+  %tmp3926 = getelementptr inbounds float* %tmp3925, i64 1
+  %tmp3927 = getelementptr inbounds float* %tmp3926, i64 1
+  %tmp3928 = getelementptr inbounds float* %tmp3927, i64 1
+  %tmp3929 = getelementptr inbounds float* %tmp3928, i64 1
+  %tmp3930 = getelementptr inbounds float* %tmp3929, i64 1
+  %tmp3931 = getelementptr inbounds float* %tmp3930, i64 1
+  %tmp3932 = getelementptr inbounds float* %tmp3931, i64 1
+  %tmp3933 = getelementptr inbounds float* %tmp3932, i64 1
+  %tmp3934 = getelementptr inbounds float* %tmp3933, i64 1
+  %tmp3935 = getelementptr inbounds float* %tmp3934, i64 1
+  %tmp3936 = getelementptr inbounds float* %tmp3935, i64 1
+  %tmp3937 = getelementptr inbounds float* %tmp3936, i64 1
+  %tmp3938 = getelementptr inbounds float* %tmp3937, i64 1
+  %tmp3939 = getelementptr inbounds float* %tmp3938, i64 1
+  %tmp3940 = getelementptr inbounds float* %tmp3939, i64 1
+  %tmp3941 = getelementptr inbounds float* %tmp3940, i64 1
+  %tmp3942 = getelementptr inbounds float* %tmp3941, i64 1
+  %tmp3943 = getelementptr inbounds float* %tmp3942, i64 1
+  %tmp3944 = getelementptr inbounds float* %tmp3943, i64 1
+  %tmp3945 = getelementptr inbounds float* %tmp3944, i64 1
+  %tmp3946 = getelementptr inbounds float* %tmp3945, i64 1
+  %tmp3947 = getelementptr inbounds float* %tmp3946, i64 1
+  %tmp3948 = getelementptr inbounds float* %tmp3947, i64 1
+  %tmp3949 = getelementptr inbounds float* %tmp3948, i64 1
+  %tmp3950 = getelementptr inbounds float* %tmp3949, i64 1
+  %tmp3951 = getelementptr inbounds float* %tmp3950, i64 1
+  %tmp3952 = getelementptr inbounds float* %tmp3951, i64 1
+  %tmp3953 = getelementptr inbounds float* %tmp3952, i64 1
+  %tmp3954 = getelementptr inbounds float* %tmp3953, i64 1
+  %tmp3955 = getelementptr inbounds float* %tmp3954, i64 1
+  %tmp3956 = getelementptr inbounds float* %tmp3955, i64 1
+  %tmp3957 = getelementptr inbounds float* %tmp3956, i64 1
+  %tmp3958 = getelementptr inbounds float* %tmp3957, i64 1
+  %tmp3959 = getelementptr inbounds float* %tmp3958, i64 1
+  %tmp3960 = getelementptr inbounds float* %tmp3959, i64 1
+  %tmp3961 = getelementptr inbounds float* %tmp3960, i64 1
+  %tmp3962 = getelementptr inbounds float* %tmp3961, i64 1
+  %tmp3963 = getelementptr inbounds float* %tmp3962, i64 1
+  %tmp3964 = getelementptr inbounds float* %tmp3963, i64 1
+  %tmp3965 = getelementptr inbounds float* %tmp3964, i64 1
+  %tmp3966 = getelementptr inbounds float* %tmp3965, i64 1
+  %tmp3967 = getelementptr inbounds float* %tmp3966, i64 1
+  %tmp3968 = getelementptr inbounds float* %tmp3967, i64 1
+  %tmp3969 = getelementptr inbounds float* %tmp3968, i64 1
+  %tmp3970 = getelementptr inbounds float* %tmp3969, i64 1
+  %tmp3971 = getelementptr inbounds float* %tmp3970, i64 1
+  %tmp3972 = getelementptr inbounds float* %tmp3971, i64 1
+  %tmp3973 = getelementptr inbounds float* %tmp3972, i64 1
+  %tmp3974 = getelementptr inbounds float* %tmp3973, i64 1
+  %tmp3975 = getelementptr inbounds float* %tmp3974, i64 1
+  %tmp3976 = getelementptr inbounds float* %tmp3975, i64 1
+  %tmp3977 = getelementptr inbounds float* %tmp3976, i64 1
+  %tmp3978 = getelementptr inbounds float* %tmp3977, i64 1
+  %tmp3979 = getelementptr inbounds float* %tmp3978, i64 1
+  %tmp3980 = getelementptr inbounds float* %tmp3979, i64 1
+  %tmp3981 = getelementptr inbounds float* %tmp3980, i64 1
+  %tmp3982 = getelementptr inbounds float* %tmp3981, i64 1
+  %tmp3983 = getelementptr inbounds float* %tmp3982, i64 1
+  %tmp3984 = getelementptr inbounds float* %tmp3983, i64 1
+  %tmp3985 = getelementptr inbounds float* %tmp3984, i64 1
+  %tmp3986 = getelementptr inbounds float* %tmp3985, i64 1
+  %tmp3987 = getelementptr inbounds float* %tmp3986, i64 1
+  %tmp3988 = getelementptr inbounds float* %tmp3987, i64 1
+  %tmp3989 = getelementptr inbounds float* %tmp3988, i64 1
+  %tmp3990 = getelementptr inbounds float* %tmp3989, i64 1
+  %tmp3991 = getelementptr inbounds float* %tmp3990, i64 1
+  %tmp3992 = getelementptr inbounds float* %tmp3991, i64 1
+  %tmp3993 = getelementptr inbounds float* %tmp3992, i64 1
+  %tmp3994 = getelementptr inbounds float* %tmp3993, i64 1
+  %tmp3995 = getelementptr inbounds float* %tmp3994, i64 1
+  %tmp3996 = getelementptr inbounds float* %tmp3995, i64 1
+  %tmp3997 = getelementptr inbounds float* %tmp3996, i64 1
+  %tmp3998 = getelementptr inbounds float* %tmp3997, i64 1
+  %tmp3999 = getelementptr inbounds float* %tmp3998, i64 1
+  %tmp4000 = getelementptr inbounds float* %tmp3999, i64 1
+  %tmp4001 = getelementptr inbounds float* %tmp4000, i64 1
+  %tmp4002 = getelementptr inbounds float* %tmp4001, i64 1
+  %tmp4003 = getelementptr inbounds float* %tmp4002, i64 1
+  %tmp4004 = getelementptr inbounds float* %tmp4003, i64 1
+  %tmp4005 = getelementptr inbounds float* %tmp4004, i64 1
+  %tmp4006 = getelementptr inbounds float* %tmp4005, i64 1
+  %tmp4007 = getelementptr inbounds float* %tmp4006, i64 1
+  %tmp4008 = getelementptr inbounds float* %tmp4007, i64 1
+  %tmp4009 = getelementptr inbounds float* %tmp4008, i64 1
+  %tmp4010 = getelementptr inbounds float* %tmp4009, i64 1
+  %tmp4011 = getelementptr inbounds float* %tmp4010, i64 1
+  %tmp4012 = getelementptr inbounds float* %tmp4011, i64 1
+  %tmp4013 = getelementptr inbounds float* %tmp4012, i64 1
+  %tmp4014 = getelementptr inbounds float* %tmp4013, i64 1
+  %tmp4015 = getelementptr inbounds float* %tmp4014, i64 1
+  %tmp4016 = getelementptr inbounds float* %tmp4015, i64 1
+  %tmp4017 = getelementptr inbounds float* %tmp4016, i64 1
+  %tmp4018 = getelementptr inbounds float* %tmp4017, i64 1
+  %tmp4019 = getelementptr inbounds float* %tmp4018, i64 1
+  %tmp4020 = getelementptr inbounds float* %tmp4019, i64 1
+  %tmp4021 = getelementptr inbounds float* %tmp4020, i64 1
+  %tmp4022 = getelementptr inbounds float* %tmp4021, i64 1
+  %tmp4023 = getelementptr inbounds float* %tmp4022, i64 1
+  %tmp4024 = getelementptr inbounds float* %tmp4023, i64 1
+  %tmp4025 = getelementptr inbounds float* %tmp4024, i64 1
+  %tmp4026 = getelementptr inbounds float* %tmp4025, i64 1
+  %tmp4027 = getelementptr inbounds float* %tmp4026, i64 1
+  %tmp4028 = getelementptr inbounds float* %tmp4027, i64 1
+  %tmp4029 = getelementptr inbounds float* %tmp4028, i64 1
+  %tmp4030 = getelementptr inbounds float* %tmp4029, i64 1
+  %tmp4031 = getelementptr inbounds float* %tmp4030, i64 1
+  %tmp4032 = getelementptr inbounds float* %tmp4031, i64 1
+  %tmp4033 = getelementptr inbounds float* %tmp4032, i64 1
+  %tmp4034 = getelementptr inbounds float* %tmp4033, i64 1
+  %tmp4035 = getelementptr inbounds float* %tmp4034, i64 1
+  %tmp4036 = getelementptr inbounds float* %tmp4035, i64 1
+  %tmp4037 = getelementptr inbounds float* %tmp4036, i64 1
+  %tmp4038 = getelementptr inbounds float* %tmp4037, i64 1
+  %tmp4039 = getelementptr inbounds float* %tmp4038, i64 1
+  %tmp4040 = getelementptr inbounds float* %tmp4039, i64 1
+  %tmp4041 = getelementptr inbounds float* %tmp4040, i64 1
+  %tmp4042 = getelementptr inbounds float* %tmp4041, i64 1
+  %tmp4043 = getelementptr inbounds float* %tmp4042, i64 1
+  %tmp4044 = getelementptr inbounds float* %tmp4043, i64 1
+  %tmp4045 = getelementptr inbounds float* %tmp4044, i64 1
+  %tmp4046 = getelementptr inbounds float* %tmp4045, i64 1
+  %tmp4047 = getelementptr inbounds float* %tmp4046, i64 1
+  %tmp4048 = getelementptr inbounds float* %tmp4047, i64 1
+  %tmp4049 = getelementptr inbounds float* %tmp4048, i64 1
+  %tmp4050 = getelementptr inbounds float* %tmp4049, i64 1
+  %tmp4051 = getelementptr inbounds float* %tmp4050, i64 1
+  %tmp4052 = getelementptr inbounds float* %tmp4051, i64 1
+  %tmp4053 = getelementptr inbounds float* %tmp4052, i64 1
+  %tmp4054 = getelementptr inbounds float* %tmp4053, i64 1
+  %tmp4055 = getelementptr inbounds float* %tmp4054, i64 1
+  %tmp4056 = getelementptr inbounds float* %tmp4055, i64 1
+  %tmp4057 = getelementptr inbounds float* %tmp4056, i64 1
+  %tmp4058 = getelementptr inbounds float* %tmp4057, i64 1
+  %tmp4059 = getelementptr inbounds float* %tmp4058, i64 1
+  %tmp4060 = getelementptr inbounds float* %tmp4059, i64 1
+  %tmp4061 = getelementptr inbounds float* %tmp4060, i64 1
+  %tmp4062 = getelementptr inbounds float* %tmp4061, i64 1
+  %tmp4063 = getelementptr inbounds float* %tmp4062, i64 1
+  %tmp4064 = getelementptr inbounds float* %tmp4063, i64 1
+  %tmp4065 = getelementptr inbounds float* %tmp4064, i64 1
+  %tmp4066 = getelementptr inbounds float* %tmp4065, i64 1
+  %tmp4067 = getelementptr inbounds float* %tmp4066, i64 1
+  %tmp4068 = getelementptr inbounds float* %tmp4067, i64 1
+  %tmp4069 = getelementptr inbounds float* %tmp4068, i64 1
+  %tmp4070 = getelementptr inbounds float* %tmp4069, i64 1
+  %tmp4071 = getelementptr inbounds float* %tmp4070, i64 1
+  %tmp4072 = getelementptr inbounds float* %tmp4071, i64 1
+  %tmp4073 = getelementptr inbounds float* %tmp4072, i64 1
+  %tmp4074 = getelementptr inbounds float* %tmp4073, i64 1
+  %tmp4075 = getelementptr inbounds float* %tmp4074, i64 1
+  %tmp4076 = getelementptr inbounds float* %tmp4075, i64 1
+  %tmp4077 = getelementptr inbounds float* %tmp4076, i64 1
+  %tmp4078 = getelementptr inbounds float* %tmp4077, i64 1
+  %tmp4079 = getelementptr inbounds float* %tmp4078, i64 1
+  %tmp4080 = getelementptr inbounds float* %tmp4079, i64 1
+  %tmp4081 = getelementptr inbounds float* %tmp4080, i64 1
+  %tmp4082 = getelementptr inbounds float* %tmp4081, i64 1
+  %tmp4083 = getelementptr inbounds float* %tmp4082, i64 1
+  %tmp4084 = getelementptr inbounds float* %tmp4083, i64 1
+  %tmp4085 = getelementptr inbounds float* %tmp4084, i64 1
+  %tmp4086 = getelementptr inbounds float* %tmp4085, i64 1
+  %tmp4087 = getelementptr inbounds float* %tmp4086, i64 1
+  %tmp4088 = getelementptr inbounds float* %tmp4087, i64 1
+  %tmp4089 = getelementptr inbounds float* %tmp4088, i64 1
+  %tmp4090 = getelementptr inbounds float* %tmp4089, i64 1
+  %tmp4091 = getelementptr inbounds float* %tmp4090, i64 1
+  %tmp4092 = getelementptr inbounds float* %tmp4091, i64 1
+  %tmp4093 = getelementptr inbounds float* %tmp4092, i64 1
+  %tmp4094 = getelementptr inbounds float* %tmp4093, i64 1
+  %tmp4095 = getelementptr inbounds float* %tmp4094, i64 1
+  %tmp4096 = getelementptr inbounds float* %tmp4095, i64 1
+  %tmp4097 = getelementptr inbounds float* %tmp4096, i64 1
+  %tmp4098 = getelementptr inbounds float* %tmp4097, i64 1
+  %tmp4099 = getelementptr inbounds float* %tmp4098, i64 1
+  %tmp4100 = getelementptr inbounds float* %tmp4099, i64 1
+  %tmp4101 = getelementptr inbounds float* %tmp4100, i64 1
+  %tmp4102 = getelementptr inbounds float* %tmp4101, i64 1
+  %tmp4103 = getelementptr inbounds float* %tmp4102, i64 1
+  %tmp4104 = getelementptr inbounds float* %tmp4103, i64 1
+  %tmp4105 = getelementptr inbounds float* %tmp4104, i64 1
+  %tmp4106 = getelementptr inbounds float* %tmp4105, i64 1
+  %tmp4107 = getelementptr inbounds float* %tmp4106, i64 1
+  %tmp4108 = getelementptr inbounds float* %tmp4107, i64 1
+  %tmp4109 = getelementptr inbounds float* %tmp4108, i64 1
+  %tmp4110 = getelementptr inbounds float* %tmp4109, i64 1
+  %tmp4111 = getelementptr inbounds float* %tmp4110, i64 1
+  %tmp4112 = getelementptr inbounds float* %tmp4111, i64 1
+  %tmp4113 = getelementptr inbounds float* %tmp4112, i64 1
+  %tmp4114 = getelementptr inbounds float* %tmp4113, i64 1
+  %tmp4115 = getelementptr inbounds float* %tmp4114, i64 1
+  %tmp4116 = getelementptr inbounds float* %tmp4115, i64 1
+  %tmp4117 = getelementptr inbounds float* %tmp4116, i64 1
+  %tmp4118 = getelementptr inbounds float* %tmp4117, i64 1
+  %tmp4119 = getelementptr inbounds float* %tmp4118, i64 1
+  %tmp4120 = getelementptr inbounds float* %tmp4119, i64 1
+  %tmp4121 = getelementptr inbounds float* %tmp4120, i64 1
+  %tmp4122 = getelementptr inbounds float* %tmp4121, i64 1
+  %tmp4123 = getelementptr inbounds float* %tmp4122, i64 1
+  %tmp4124 = getelementptr inbounds float* %tmp4123, i64 1
+  %tmp4125 = getelementptr inbounds float* %tmp4124, i64 1
+  %tmp4126 = getelementptr inbounds float* %tmp4125, i64 1
+  %tmp4127 = getelementptr inbounds float* %tmp4126, i64 1
+  %tmp4128 = getelementptr inbounds float* %tmp4127, i64 1
+  %tmp4129 = getelementptr inbounds float* %tmp4128, i64 1
+  %tmp4130 = getelementptr inbounds float* %tmp4129, i64 1
+  %tmp4131 = getelementptr inbounds float* %tmp4130, i64 1
+  %tmp4132 = getelementptr inbounds float* %tmp4131, i64 1
+  %tmp4133 = getelementptr inbounds float* %tmp4132, i64 1
+  %tmp4134 = getelementptr inbounds float* %tmp4133, i64 1
+  %tmp4135 = getelementptr inbounds float* %tmp4134, i64 1
+  %tmp4136 = getelementptr inbounds float* %tmp4135, i64 1
+  %tmp4137 = getelementptr inbounds float* %tmp4136, i64 1
+  %tmp4138 = getelementptr inbounds float* %tmp4137, i64 1
+  %tmp4139 = getelementptr inbounds float* %tmp4138, i64 1
+  %tmp4140 = getelementptr inbounds float* %tmp4139, i64 1
+  %tmp4141 = getelementptr inbounds float* %tmp4140, i64 1
+  %tmp4142 = getelementptr inbounds float* %tmp4141, i64 1
+  %tmp4143 = getelementptr inbounds float* %tmp4142, i64 1
+  %tmp4144 = getelementptr inbounds float* %tmp4143, i64 1
+  %tmp4145 = getelementptr inbounds float* %tmp4144, i64 1
+  %tmp4146 = getelementptr inbounds float* %tmp4145, i64 1
+  %tmp4147 = getelementptr inbounds float* %tmp4146, i64 1
+  %tmp4148 = getelementptr inbounds float* %tmp4147, i64 1
+  %tmp4149 = getelementptr inbounds float* %tmp4148, i64 1
+  %tmp4150 = getelementptr inbounds float* %tmp4149, i64 1
+  %tmp4151 = getelementptr inbounds float* %tmp4150, i64 1
+  %tmp4152 = getelementptr inbounds float* %tmp4151, i64 1
+  %tmp4153 = getelementptr inbounds float* %tmp4152, i64 1
+  %tmp4154 = getelementptr inbounds float* %tmp4153, i64 1
+  %tmp4155 = getelementptr inbounds float* %tmp4154, i64 1
+  %tmp4156 = getelementptr inbounds float* %tmp4155, i64 1
+  %tmp4157 = getelementptr inbounds float* %tmp4156, i64 1
+  %tmp4158 = getelementptr inbounds float* %tmp4157, i64 1
+  %tmp4159 = getelementptr inbounds float* %tmp4158, i64 1
+  %tmp4160 = getelementptr inbounds float* %tmp4159, i64 1
+  %tmp4161 = getelementptr inbounds float* %tmp4160, i64 1
+  %tmp4162 = getelementptr inbounds float* %tmp4161, i64 1
+  %tmp4163 = getelementptr inbounds float* %tmp4162, i64 1
+  %tmp4164 = getelementptr inbounds float* %tmp4163, i64 1
+  %tmp4165 = getelementptr inbounds float* %tmp4164, i64 1
+  %tmp4166 = getelementptr inbounds float* %tmp4165, i64 1
+  %tmp4167 = getelementptr inbounds float* %tmp4166, i64 1
+  %tmp4168 = getelementptr inbounds float* %tmp4167, i64 1
+  %tmp4169 = getelementptr inbounds float* %tmp4168, i64 1
+  %tmp4170 = getelementptr inbounds float* %tmp4169, i64 1
+  %tmp4171 = getelementptr inbounds float* %tmp4170, i64 1
+  %tmp4172 = getelementptr inbounds float* %tmp4171, i64 1
+  %tmp4173 = getelementptr inbounds float* %tmp4172, i64 1
+  %tmp4174 = getelementptr inbounds float* %tmp4173, i64 1
+  %tmp4175 = getelementptr inbounds float* %tmp4174, i64 1
+  %tmp4176 = getelementptr inbounds float* %tmp4175, i64 1
+  %tmp4177 = getelementptr inbounds float* %tmp4176, i64 1
+  %tmp4178 = getelementptr inbounds float* %tmp4177, i64 1
+  %tmp4179 = getelementptr inbounds float* %tmp4178, i64 1
+  %tmp4180 = getelementptr inbounds float* %tmp4179, i64 1
+  %tmp4181 = getelementptr inbounds float* %tmp4180, i64 1
+  %tmp4182 = getelementptr inbounds float* %tmp4181, i64 1
+  %tmp4183 = getelementptr inbounds float* %tmp4182, i64 1
+  %tmp4184 = getelementptr inbounds float* %tmp4183, i64 1
+  %tmp4185 = getelementptr inbounds float* %tmp4184, i64 1
+  %tmp4186 = getelementptr inbounds float* %tmp4185, i64 1
+  %tmp4187 = getelementptr inbounds float* %tmp4186, i64 1
+  %tmp4188 = getelementptr inbounds float* %tmp4187, i64 1
+  %tmp4189 = getelementptr inbounds float* %tmp4188, i64 1
+  %tmp4190 = getelementptr inbounds float* %tmp4189, i64 1
+  %tmp4191 = getelementptr inbounds float* %tmp4190, i64 1
+  %tmp4192 = getelementptr inbounds float* %tmp4191, i64 1
+  %tmp4193 = getelementptr inbounds float* %tmp4192, i64 1
+  %tmp4194 = getelementptr inbounds float* %tmp4193, i64 1
+  %tmp4195 = getelementptr inbounds float* %tmp4194, i64 1
+  %tmp4196 = getelementptr inbounds float* %tmp4195, i64 1
+  %tmp4197 = getelementptr inbounds float* %tmp4196, i64 1
+  %tmp4198 = getelementptr inbounds float* %tmp4197, i64 1
+  %tmp4199 = getelementptr inbounds float* %tmp4198, i64 1
+  %tmp4200 = getelementptr inbounds float* %tmp4199, i64 1
+  %tmp4201 = getelementptr inbounds float* %tmp4200, i64 1
+  %tmp4202 = getelementptr inbounds float* %tmp4201, i64 1
+  %tmp4203 = getelementptr inbounds float* %tmp4202, i64 1
+  %tmp4204 = getelementptr inbounds float* %tmp4203, i64 1
+  %tmp4205 = getelementptr inbounds float* %tmp4204, i64 1
+  %tmp4206 = getelementptr inbounds float* %tmp4205, i64 1
+  %tmp4207 = getelementptr inbounds float* %tmp4206, i64 1
+  %tmp4208 = getelementptr inbounds float* %tmp4207, i64 1
+  %tmp4209 = getelementptr inbounds float* %tmp4208, i64 1
+  %tmp4210 = getelementptr inbounds float* %tmp4209, i64 1
+  %tmp4211 = getelementptr inbounds float* %tmp4210, i64 1
+  %tmp4212 = getelementptr inbounds float* %tmp4211, i64 1
+  %tmp4213 = getelementptr inbounds float* %tmp4212, i64 1
+  %tmp4214 = getelementptr inbounds float* %tmp4213, i64 1
+  %tmp4215 = getelementptr inbounds float* %tmp4214, i64 1
+  %tmp4216 = getelementptr inbounds float* %tmp4215, i64 1
+  %tmp4217 = getelementptr inbounds float* %tmp4216, i64 1
+  %tmp4218 = getelementptr inbounds float* %tmp4217, i64 1
+  %tmp4219 = getelementptr inbounds float* %tmp4218, i64 1
+  %tmp4220 = getelementptr inbounds float* %tmp4219, i64 1
+  %tmp4221 = getelementptr inbounds float* %tmp4220, i64 1
+  %tmp4222 = getelementptr inbounds float* %tmp4221, i64 1
+  %tmp4223 = getelementptr inbounds float* %tmp4222, i64 1
+  %tmp4224 = getelementptr inbounds float* %tmp4223, i64 1
+  %tmp4225 = getelementptr inbounds float* %tmp4224, i64 1
+  %tmp4226 = getelementptr inbounds float* %tmp4225, i64 1
+  %tmp4227 = getelementptr inbounds float* %tmp4226, i64 1
+  %tmp4228 = getelementptr inbounds float* %tmp4227, i64 1
+  %tmp4229 = getelementptr inbounds float* %tmp4228, i64 1
+  %tmp4230 = getelementptr inbounds float* %tmp4229, i64 1
+  %tmp4231 = getelementptr inbounds float* %tmp4230, i64 1
+  %tmp4232 = getelementptr inbounds float* %tmp4231, i64 1
+  %tmp4233 = getelementptr inbounds float* %tmp4232, i64 1
+  %tmp4234 = getelementptr inbounds float* %tmp4233, i64 1
+  %tmp4235 = getelementptr inbounds float* %tmp4234, i64 1
+  %tmp4236 = getelementptr inbounds float* %tmp4235, i64 1
+  %tmp4237 = getelementptr inbounds float* %tmp4236, i64 1
+  %tmp4238 = getelementptr inbounds float* %tmp4237, i64 1
+  %tmp4239 = getelementptr inbounds float* %tmp4238, i64 1
+  %tmp4240 = getelementptr inbounds float* %tmp4239, i64 1
+  %tmp4241 = getelementptr inbounds float* %tmp4240, i64 1
+  %tmp4242 = getelementptr inbounds float* %tmp4241, i64 1
+  %tmp4243 = getelementptr inbounds float* %tmp4242, i64 1
+  %tmp4244 = getelementptr inbounds float* %tmp4243, i64 1
+  %tmp4245 = getelementptr inbounds float* %tmp4244, i64 1
+  %tmp4246 = getelementptr inbounds float* %tmp4245, i64 1
+  %tmp4247 = getelementptr inbounds float* %tmp4246, i64 1
+  %tmp4248 = getelementptr inbounds float* %tmp4247, i64 1
+  %tmp4249 = getelementptr inbounds float* %tmp4248, i64 1
+  %tmp4250 = getelementptr inbounds float* %tmp4249, i64 1
+  %tmp4251 = getelementptr inbounds float* %tmp4250, i64 1
+  %tmp4252 = getelementptr inbounds float* %tmp4251, i64 1
+  %tmp4253 = getelementptr inbounds float* %tmp4252, i64 1
+  %tmp4254 = getelementptr inbounds float* %tmp4253, i64 1
+  %tmp4255 = getelementptr inbounds float* %tmp4254, i64 1
+  %tmp4256 = getelementptr inbounds float* %tmp4255, i64 1
+  %tmp4257 = getelementptr inbounds float* %tmp4256, i64 1
+  %tmp4258 = getelementptr inbounds float* %tmp4257, i64 1
+  %tmp4259 = getelementptr inbounds float* %tmp4258, i64 1
+  %tmp4260 = getelementptr inbounds float* %tmp4259, i64 1
+  %tmp4261 = getelementptr inbounds float* %tmp4260, i64 1
+  %tmp4262 = getelementptr inbounds float* %tmp4261, i64 1
+  %tmp4263 = getelementptr inbounds float* %tmp4262, i64 1
+  %tmp4264 = getelementptr inbounds float* %tmp4263, i64 1
+  %tmp4265 = getelementptr inbounds float* %tmp4264, i64 1
+  %tmp4266 = getelementptr inbounds float* %tmp4265, i64 1
+  %tmp4267 = getelementptr inbounds float* %tmp4266, i64 1
+  %tmp4268 = getelementptr inbounds float* %tmp4267, i64 1
+  %tmp4269 = getelementptr inbounds float* %tmp4268, i64 1
+  %tmp4270 = getelementptr inbounds float* %tmp4269, i64 1
+  %tmp4271 = getelementptr inbounds float* %tmp4270, i64 1
+  %tmp4272 = getelementptr inbounds float* %tmp4271, i64 1
+  %tmp4273 = getelementptr inbounds float* %tmp4272, i64 1
+  %tmp4274 = getelementptr inbounds float* %tmp4273, i64 1
+  %tmp4275 = getelementptr inbounds float* %tmp4274, i64 1
+  %tmp4276 = getelementptr inbounds float* %tmp4275, i64 1
+  %tmp4277 = getelementptr inbounds float* %tmp4276, i64 1
+  %tmp4278 = getelementptr inbounds float* %tmp4277, i64 1
+  %tmp4279 = getelementptr inbounds float* %tmp4278, i64 1
+  %tmp4280 = getelementptr inbounds float* %tmp4279, i64 1
+  %tmp4281 = getelementptr inbounds float* %tmp4280, i64 1
+  %tmp4282 = getelementptr inbounds float* %tmp4281, i64 1
+  %tmp4283 = getelementptr inbounds float* %tmp4282, i64 1
+  %tmp4284 = getelementptr inbounds float* %tmp4283, i64 1
+  %tmp4285 = getelementptr inbounds float* %tmp4284, i64 1
+  %tmp4286 = getelementptr inbounds float* %tmp4285, i64 1
+  %tmp4287 = getelementptr inbounds float* %tmp4286, i64 1
+  %tmp4288 = getelementptr inbounds float* %tmp4287, i64 1
+  %tmp4289 = getelementptr inbounds float* %tmp4288, i64 1
+  %tmp4290 = getelementptr inbounds float* %tmp4289, i64 1
+  %tmp4291 = getelementptr inbounds float* %tmp4290, i64 1
+  %tmp4292 = getelementptr inbounds float* %tmp4291, i64 1
+  %tmp4293 = getelementptr inbounds float* %tmp4292, i64 1
+  %tmp4294 = getelementptr inbounds float* %tmp4293, i64 1
+  %tmp4295 = getelementptr inbounds float* %tmp4294, i64 1
+  %tmp4296 = getelementptr inbounds float* %tmp4295, i64 1
+  %tmp4297 = getelementptr inbounds float* %tmp4296, i64 1
+  %tmp4298 = getelementptr inbounds float* %tmp4297, i64 1
+  %tmp4299 = getelementptr inbounds float* %tmp4298, i64 1
+  %tmp4300 = getelementptr inbounds float* %tmp4299, i64 1
+  %tmp4301 = getelementptr inbounds float* %tmp4300, i64 1
+  %tmp4302 = getelementptr inbounds float* %tmp4301, i64 1
+  %tmp4303 = getelementptr inbounds float* %tmp4302, i64 1
+  %tmp4304 = getelementptr inbounds float* %tmp4303, i64 1
+  %tmp4305 = getelementptr inbounds float* %tmp4304, i64 1
+  %tmp4306 = getelementptr inbounds float* %tmp4305, i64 1
+  %tmp4307 = getelementptr inbounds float* %tmp4306, i64 1
+  %tmp4308 = getelementptr inbounds float* %tmp4307, i64 1
+  %tmp4309 = getelementptr inbounds float* %tmp4308, i64 1
+  %tmp4310 = getelementptr inbounds float* %tmp4309, i64 1
+  %tmp4311 = getelementptr inbounds float* %tmp4310, i64 1
+  %tmp4312 = getelementptr inbounds float* %tmp4311, i64 1
+  %tmp4313 = getelementptr inbounds float* %tmp4312, i64 1
+  %tmp4314 = getelementptr inbounds float* %tmp4313, i64 1
+  %tmp4315 = getelementptr inbounds float* %tmp4314, i64 1
+  %tmp4316 = getelementptr inbounds float* %tmp4315, i64 1
+  %tmp4317 = getelementptr inbounds float* %tmp4316, i64 1
+  %tmp4318 = getelementptr inbounds float* %tmp4317, i64 1
+  %tmp4319 = getelementptr inbounds float* %tmp4318, i64 1
+  %tmp4320 = getelementptr inbounds float* %tmp4319, i64 1
+  %tmp4321 = getelementptr inbounds float* %tmp4320, i64 1
+  %tmp4322 = getelementptr inbounds float* %tmp4321, i64 1
+  %tmp4323 = getelementptr inbounds float* %tmp4322, i64 1
+  %tmp4324 = getelementptr inbounds float* %tmp4323, i64 1
+  %tmp4325 = getelementptr inbounds float* %tmp4324, i64 1
+  %tmp4326 = getelementptr inbounds float* %tmp4325, i64 1
+  %tmp4327 = getelementptr inbounds float* %tmp4326, i64 1
+  %tmp4328 = getelementptr inbounds float* %tmp4327, i64 1
+  %tmp4329 = getelementptr inbounds float* %tmp4328, i64 1
+  %tmp4330 = getelementptr inbounds float* %tmp4329, i64 1
+  %tmp4331 = getelementptr inbounds float* %tmp4330, i64 1
+  %tmp4332 = getelementptr inbounds float* %tmp4331, i64 1
+  %tmp4333 = getelementptr inbounds float* %tmp4332, i64 1
+  %tmp4334 = getelementptr inbounds float* %tmp4333, i64 1
+  %tmp4335 = getelementptr inbounds float* %tmp4334, i64 1
+  %tmp4336 = getelementptr inbounds float* %tmp4335, i64 1
+  %tmp4337 = getelementptr inbounds float* %tmp4336, i64 1
+  %tmp4338 = getelementptr inbounds float* %tmp4337, i64 1
+  %tmp4339 = getelementptr inbounds float* %tmp4338, i64 1
+  %tmp4340 = getelementptr inbounds float* %tmp4339, i64 1
+  %tmp4341 = getelementptr inbounds float* %tmp4340, i64 1
+  %tmp4342 = getelementptr inbounds float* %tmp4341, i64 1
+  %tmp4343 = getelementptr inbounds float* %tmp4342, i64 1
+  %tmp4344 = getelementptr inbounds float* %tmp4343, i64 1
+  %tmp4345 = getelementptr inbounds float* %tmp4344, i64 1
+  %tmp4346 = getelementptr inbounds float* %tmp4345, i64 1
+  %tmp4347 = getelementptr inbounds float* %tmp4346, i64 1
+  %tmp4348 = getelementptr inbounds float* %tmp4347, i64 1
+  %tmp4349 = getelementptr inbounds float* %tmp4348, i64 1
+  %tmp4350 = getelementptr inbounds float* %tmp4349, i64 1
+  %tmp4351 = getelementptr inbounds float* %tmp4350, i64 1
+  %tmp4352 = getelementptr inbounds float* %tmp4351, i64 1
+  %tmp4353 = getelementptr inbounds float* %tmp4352, i64 1
+  %tmp4354 = getelementptr inbounds float* %tmp4353, i64 1
+  %tmp4355 = getelementptr inbounds float* %tmp4354, i64 1
+  %tmp4356 = getelementptr inbounds float* %tmp4355, i64 1
+  %tmp4357 = getelementptr inbounds float* %tmp4356, i64 1
+  %tmp4358 = getelementptr inbounds float* %tmp4357, i64 1
+  %tmp4359 = getelementptr inbounds float* %tmp4358, i64 1
+  %tmp4360 = getelementptr inbounds float* %tmp4359, i64 1
+  %tmp4361 = getelementptr inbounds float* %tmp4360, i64 1
+  %tmp4362 = getelementptr inbounds float* %tmp4361, i64 1
+  %tmp4363 = getelementptr inbounds float* %tmp4362, i64 1
+  %tmp4364 = getelementptr inbounds float* %tmp4363, i64 1
+  %tmp4365 = getelementptr inbounds float* %tmp4364, i64 1
+  %tmp4366 = getelementptr inbounds float* %tmp4365, i64 1
+  %tmp4367 = getelementptr inbounds float* %tmp4366, i64 1
+  %tmp4368 = getelementptr inbounds float* %tmp4367, i64 1
+  %tmp4369 = getelementptr inbounds float* %tmp4368, i64 1
+  %tmp4370 = getelementptr inbounds float* %tmp4369, i64 1
+  %tmp4371 = getelementptr inbounds float* %tmp4370, i64 1
+  %tmp4372 = getelementptr inbounds float* %tmp4371, i64 1
+  %tmp4373 = getelementptr inbounds float* %tmp4372, i64 1
+  %tmp4374 = getelementptr inbounds float* %tmp4373, i64 1
+  %tmp4375 = getelementptr inbounds float* %tmp4374, i64 1
+  %tmp4376 = getelementptr inbounds float* %tmp4375, i64 1
+  %tmp4377 = getelementptr inbounds float* %tmp4376, i64 1
+  %tmp4378 = getelementptr inbounds float* %tmp4377, i64 1
+  %tmp4379 = getelementptr inbounds float* %tmp4378, i64 1
+  %tmp4380 = getelementptr inbounds float* %tmp4379, i64 1
+  %tmp4381 = getelementptr inbounds float* %tmp4380, i64 1
+  %tmp4382 = getelementptr inbounds float* %tmp4381, i64 1
+  %tmp4383 = getelementptr inbounds float* %tmp4382, i64 1
+  %tmp4384 = getelementptr inbounds float* %tmp4383, i64 1
+  %tmp4385 = getelementptr inbounds float* %tmp4384, i64 1
+  %tmp4386 = getelementptr inbounds float* %tmp4385, i64 1
+  %tmp4387 = getelementptr inbounds float* %tmp4386, i64 1
+  %tmp4388 = getelementptr inbounds float* %tmp4387, i64 1
+  %tmp4389 = getelementptr inbounds float* %tmp4388, i64 1
+  %tmp4390 = getelementptr inbounds float* %tmp4389, i64 1
+  %tmp4391 = getelementptr inbounds float* %tmp4390, i64 1
+  %tmp4392 = getelementptr inbounds float* %tmp4391, i64 1
+  %tmp4393 = getelementptr inbounds float* %tmp4392, i64 1
+  %tmp4394 = getelementptr inbounds float* %tmp4393, i64 1
+  %tmp4395 = getelementptr inbounds float* %tmp4394, i64 1
+  %tmp4396 = getelementptr inbounds float* %tmp4395, i64 1
+  %tmp4397 = getelementptr inbounds float* %tmp4396, i64 1
+  %tmp4398 = getelementptr inbounds float* %tmp4397, i64 1
+  %tmp4399 = getelementptr inbounds float* %tmp4398, i64 1
+  %tmp4400 = getelementptr inbounds float* %tmp4399, i64 1
+  %tmp4401 = getelementptr inbounds float* %tmp4400, i64 1
+  %tmp4402 = getelementptr inbounds float* %tmp4401, i64 1
+  %tmp4403 = getelementptr inbounds float* %tmp4402, i64 1
+  %tmp4404 = getelementptr inbounds float* %tmp4403, i64 1
+  %tmp4405 = getelementptr inbounds float* %tmp4404, i64 1
+  %tmp4406 = getelementptr inbounds float* %tmp4405, i64 1
+  %tmp4407 = getelementptr inbounds float* %tmp4406, i64 1
+  %tmp4408 = getelementptr inbounds float* %tmp4407, i64 1
+  %tmp4409 = getelementptr inbounds float* %tmp4408, i64 1
+  %tmp4410 = getelementptr inbounds float* %tmp4409, i64 1
+  %tmp4411 = getelementptr inbounds float* %tmp4410, i64 1
+  %tmp4412 = getelementptr inbounds float* %tmp4411, i64 1
+  %tmp4413 = getelementptr inbounds float* %tmp4412, i64 1
+  %tmp4414 = getelementptr inbounds float* %tmp4413, i64 1
+  %tmp4415 = getelementptr inbounds float* %tmp4414, i64 1
+  %tmp4416 = getelementptr inbounds float* %tmp4415, i64 1
+  %tmp4417 = getelementptr inbounds float* %tmp4416, i64 1
+  %tmp4418 = getelementptr inbounds float* %tmp4417, i64 1
+  %tmp4419 = getelementptr inbounds float* %tmp4418, i64 1
+  %tmp4420 = getelementptr inbounds float* %tmp4419, i64 1
+  %tmp4421 = getelementptr inbounds float* %tmp4420, i64 1
+  %tmp4422 = getelementptr inbounds float* %tmp4421, i64 1
+  %tmp4423 = getelementptr inbounds float* %tmp4422, i64 1
+  %tmp4424 = getelementptr inbounds float* %tmp4423, i64 1
+  %tmp4425 = getelementptr inbounds float* %tmp4424, i64 1
+  %tmp4426 = getelementptr inbounds float* %tmp4425, i64 1
+  %tmp4427 = getelementptr inbounds float* %tmp4426, i64 1
+  %tmp4428 = getelementptr inbounds float* %tmp4427, i64 1
+  %tmp4429 = getelementptr inbounds float* %tmp4428, i64 1
+  %tmp4430 = getelementptr inbounds float* %tmp4429, i64 1
+  %tmp4431 = getelementptr inbounds float* %tmp4430, i64 1
+  %tmp4432 = getelementptr inbounds float* %tmp4431, i64 1
+  %tmp4433 = getelementptr inbounds float* %tmp4432, i64 1
+  %tmp4434 = getelementptr inbounds float* %tmp4433, i64 1
+  %tmp4435 = getelementptr inbounds float* %tmp4434, i64 1
+  %tmp4436 = getelementptr inbounds float* %tmp4435, i64 1
+  %tmp4437 = getelementptr inbounds float* %tmp4436, i64 1
+  %tmp4438 = getelementptr inbounds float* %tmp4437, i64 1
+  %tmp4439 = getelementptr inbounds float* %tmp4438, i64 1
+  %tmp4440 = getelementptr inbounds float* %tmp4439, i64 1
+  %tmp4441 = getelementptr inbounds float* %tmp4440, i64 1
+  %tmp4442 = getelementptr inbounds float* %tmp4441, i64 1
+  %tmp4443 = getelementptr inbounds float* %tmp4442, i64 1
+  %tmp4444 = getelementptr inbounds float* %tmp4443, i64 1
+  %tmp4445 = getelementptr inbounds float* %tmp4444, i64 1
+  %tmp4446 = getelementptr inbounds float* %tmp4445, i64 1
+  %tmp4447 = getelementptr inbounds float* %tmp4446, i64 1
+  %tmp4448 = getelementptr inbounds float* %tmp4447, i64 1
+  %tmp4449 = getelementptr inbounds float* %tmp4448, i64 1
+  %tmp4450 = getelementptr inbounds float* %tmp4449, i64 1
+  %tmp4451 = getelementptr inbounds float* %tmp4450, i64 1
+  %tmp4452 = getelementptr inbounds float* %tmp4451, i64 1
+  %tmp4453 = getelementptr inbounds float* %tmp4452, i64 1
+  %tmp4454 = getelementptr inbounds float* %tmp4453, i64 1
+  %tmp4455 = getelementptr inbounds float* %tmp4454, i64 1
+  %tmp4456 = getelementptr inbounds float* %tmp4455, i64 1
+  %tmp4457 = getelementptr inbounds float* %tmp4456, i64 1
+  %tmp4458 = getelementptr inbounds float* %tmp4457, i64 1
+  %tmp4459 = getelementptr inbounds float* %tmp4458, i64 1
+  %tmp4460 = getelementptr inbounds float* %tmp4459, i64 1
+  %tmp4461 = getelementptr inbounds float* %tmp4460, i64 1
+  %tmp4462 = getelementptr inbounds float* %tmp4461, i64 1
+  %tmp4463 = getelementptr inbounds float* %tmp4462, i64 1
+  %tmp4464 = getelementptr inbounds float* %tmp4463, i64 1
+  %tmp4465 = getelementptr inbounds float* %tmp4464, i64 1
+  %tmp4466 = getelementptr inbounds float* %tmp4465, i64 1
+  %tmp4467 = getelementptr inbounds float* %tmp4466, i64 1
+  %tmp4468 = getelementptr inbounds float* %tmp4467, i64 1
+  %tmp4469 = getelementptr inbounds float* %tmp4468, i64 1
+  %tmp4470 = getelementptr inbounds float* %tmp4469, i64 1
+  %tmp4471 = getelementptr inbounds float* %tmp4470, i64 1
+  %tmp4472 = getelementptr inbounds float* %tmp4471, i64 1
+  %tmp4473 = getelementptr inbounds float* %tmp4472, i64 1
+  %tmp4474 = getelementptr inbounds float* %tmp4473, i64 1
+  %tmp4475 = getelementptr inbounds float* %tmp4474, i64 1
+  %tmp4476 = getelementptr inbounds float* %tmp4475, i64 1
+  %tmp4477 = getelementptr inbounds float* %tmp4476, i64 1
+  %tmp4478 = getelementptr inbounds float* %tmp4477, i64 1
+  %tmp4479 = getelementptr inbounds float* %tmp4478, i64 1
+  %tmp4480 = getelementptr inbounds float* %tmp4479, i64 1
+  %tmp4481 = getelementptr inbounds float* %tmp4480, i64 1
+  %tmp4482 = getelementptr inbounds float* %tmp4481, i64 1
+  %tmp4483 = getelementptr inbounds float* %tmp4482, i64 1
+  %tmp4484 = getelementptr inbounds float* %tmp4483, i64 1
+  %tmp4485 = getelementptr inbounds float* %tmp4484, i64 1
+  %tmp4486 = getelementptr inbounds float* %tmp4485, i64 1
+  %tmp4487 = getelementptr inbounds float* %tmp4486, i64 1
+  %tmp4488 = getelementptr inbounds float* %tmp4487, i64 1
+  %tmp4489 = getelementptr inbounds float* %tmp4488, i64 1
+  %tmp4490 = getelementptr inbounds float* %tmp4489, i64 1
+  %tmp4491 = getelementptr inbounds float* %tmp4490, i64 1
+  %tmp4492 = getelementptr inbounds float* %tmp4491, i64 1
+  %tmp4493 = getelementptr inbounds float* %tmp4492, i64 1
+  %tmp4494 = getelementptr inbounds float* %tmp4493, i64 1
+  %tmp4495 = getelementptr inbounds float* %tmp4494, i64 1
+  %tmp4496 = getelementptr inbounds float* %tmp4495, i64 1
+  %tmp4497 = getelementptr inbounds float* %tmp4496, i64 1
+  %tmp4498 = getelementptr inbounds float* %tmp4497, i64 1
+  %tmp4499 = getelementptr inbounds float* %tmp4498, i64 1
+  %tmp4500 = getelementptr inbounds float* %tmp4499, i64 1
+  %tmp4501 = getelementptr inbounds float* %tmp4500, i64 1
+  %tmp4502 = getelementptr inbounds float* %tmp4501, i64 1
+  %tmp4503 = getelementptr inbounds float* %tmp4502, i64 1
+  %tmp4504 = getelementptr inbounds float* %tmp4503, i64 1
+  %tmp4505 = getelementptr inbounds float* %tmp4504, i64 1
+  %tmp4506 = getelementptr inbounds float* %tmp4505, i64 1
+  %tmp4507 = getelementptr inbounds float* %tmp4506, i64 1
+  %tmp4508 = getelementptr inbounds float* %tmp4507, i64 1
+  %tmp4509 = getelementptr inbounds float* %tmp4508, i64 1
+  %tmp4510 = getelementptr inbounds float* %tmp4509, i64 1
+  %tmp4511 = getelementptr inbounds float* %tmp4510, i64 1
+  %tmp4512 = getelementptr inbounds float* %tmp4511, i64 1
+  %tmp4513 = getelementptr inbounds float* %tmp4512, i64 1
+  %tmp4514 = getelementptr inbounds float* %tmp4513, i64 1
+  %tmp4515 = getelementptr inbounds float* %tmp4514, i64 1
+  %tmp4516 = getelementptr inbounds float* %tmp4515, i64 1
+  %tmp4517 = getelementptr inbounds float* %tmp4516, i64 1
+  %tmp4518 = getelementptr inbounds float* %tmp4517, i64 1
+  %tmp4519 = getelementptr inbounds float* %tmp4518, i64 1
+  %tmp4520 = getelementptr inbounds float* %tmp4519, i64 1
+  %tmp4521 = getelementptr inbounds float* %tmp4520, i64 1
+  %tmp4522 = getelementptr inbounds float* %tmp4521, i64 1
+  %tmp4523 = getelementptr inbounds float* %tmp4522, i64 1
+  %tmp4524 = getelementptr inbounds float* %tmp4523, i64 1
+  %tmp4525 = getelementptr inbounds float* %tmp4524, i64 1
+  %tmp4526 = getelementptr inbounds float* %tmp4525, i64 1
+  %tmp4527 = getelementptr inbounds float* %tmp4526, i64 1
+  %tmp4528 = getelementptr inbounds float* %tmp4527, i64 1
+  %tmp4529 = getelementptr inbounds float* %tmp4528, i64 1
+  %tmp4530 = getelementptr inbounds float* %tmp4529, i64 1
+  %tmp4531 = getelementptr inbounds float* %tmp4530, i64 1
+  %tmp4532 = getelementptr inbounds float* %tmp4531, i64 1
+  %tmp4533 = getelementptr inbounds float* %tmp4532, i64 1
+  %tmp4534 = getelementptr inbounds float* %tmp4533, i64 1
+  %tmp4535 = getelementptr inbounds float* %tmp4534, i64 1
+  %tmp4536 = getelementptr inbounds float* %tmp4535, i64 1
+  %tmp4537 = getelementptr inbounds float* %tmp4536, i64 1
+  %tmp4538 = getelementptr inbounds float* %tmp4537, i64 1
+  %tmp4539 = getelementptr inbounds float* %tmp4538, i64 1
+  %tmp4540 = getelementptr inbounds float* %tmp4539, i64 1
+  %tmp4541 = getelementptr inbounds float* %tmp4540, i64 1
+  %tmp4542 = getelementptr inbounds float* %tmp4541, i64 1
+  %tmp4543 = getelementptr inbounds float* %tmp4542, i64 1
+  %tmp4544 = getelementptr inbounds float* %tmp4543, i64 1
+  %tmp4545 = getelementptr inbounds float* %tmp4544, i64 1
+  %tmp4546 = getelementptr inbounds float* %tmp4545, i64 1
+  %tmp4547 = getelementptr inbounds float* %tmp4546, i64 1
+  %tmp4548 = getelementptr inbounds float* %tmp4547, i64 1
+  %tmp4549 = getelementptr inbounds float* %tmp4548, i64 1
+  %tmp4550 = getelementptr inbounds float* %tmp4549, i64 1
+  %tmp4551 = getelementptr inbounds float* %tmp4550, i64 1
+  %tmp4552 = getelementptr inbounds float* %tmp4551, i64 1
+  %tmp4553 = getelementptr inbounds float* %tmp4552, i64 1
+  %tmp4554 = getelementptr inbounds float* %tmp4553, i64 1
+  %tmp4555 = getelementptr inbounds float* %tmp4554, i64 1
+  %tmp4556 = getelementptr inbounds float* %tmp4555, i64 1
+  %tmp4557 = getelementptr inbounds float* %tmp4556, i64 1
+  %tmp4558 = getelementptr inbounds float* %tmp4557, i64 1
+  %tmp4559 = getelementptr inbounds float* %tmp4558, i64 1
+  %tmp4560 = getelementptr inbounds float* %tmp4559, i64 1
+  %tmp4561 = getelementptr inbounds float* %tmp4560, i64 1
+  %tmp4562 = getelementptr inbounds float* %tmp4561, i64 1
+  %tmp4563 = getelementptr inbounds float* %tmp4562, i64 1
+  %tmp4564 = getelementptr inbounds float* %tmp4563, i64 1
+  %tmp4565 = getelementptr inbounds float* %tmp4564, i64 1
+  %tmp4566 = getelementptr inbounds float* %tmp4565, i64 1
+  %tmp4567 = getelementptr inbounds float* %tmp4566, i64 1
+  %tmp4568 = getelementptr inbounds float* %tmp4567, i64 1
+  %tmp4569 = getelementptr inbounds float* %tmp4568, i64 1
+  %tmp4570 = getelementptr inbounds float* %tmp4569, i64 1
+  %tmp4571 = getelementptr inbounds float* %tmp4570, i64 1
+  %tmp4572 = getelementptr inbounds float* %tmp4571, i64 1
+  %tmp4573 = getelementptr inbounds float* %tmp4572, i64 1
+  %tmp4574 = getelementptr inbounds float* %tmp4573, i64 1
+  %tmp4575 = getelementptr inbounds float* %tmp4574, i64 1
+  %tmp4576 = getelementptr inbounds float* %tmp4575, i64 1
+  %tmp4577 = getelementptr inbounds float* %tmp4576, i64 1
+  %tmp4578 = getelementptr inbounds float* %tmp4577, i64 1
+  %tmp4579 = getelementptr inbounds float* %tmp4578, i64 1
+  %tmp4580 = getelementptr inbounds float* %tmp4579, i64 1
+  %tmp4581 = getelementptr inbounds float* %tmp4580, i64 1
+  %tmp4582 = getelementptr inbounds float* %tmp4581, i64 1
+  %tmp4583 = getelementptr inbounds float* %tmp4582, i64 1
+  %tmp4584 = getelementptr inbounds float* %tmp4583, i64 1
+  %tmp4585 = getelementptr inbounds float* %tmp4584, i64 1
+  %tmp4586 = getelementptr inbounds float* %tmp4585, i64 1
+  %tmp4587 = getelementptr inbounds float* %tmp4586, i64 1
+  %tmp4588 = getelementptr inbounds float* %tmp4587, i64 1
+  %tmp4589 = getelementptr inbounds float* %tmp4588, i64 1
+  %tmp4590 = getelementptr inbounds float* %tmp4589, i64 1
+  %tmp4591 = getelementptr inbounds float* %tmp4590, i64 1
+  %tmp4592 = getelementptr inbounds float* %tmp4591, i64 1
+  %tmp4593 = getelementptr inbounds float* %tmp4592, i64 1
+  %tmp4594 = getelementptr inbounds float* %tmp4593, i64 1
+  %tmp4595 = getelementptr inbounds float* %tmp4594, i64 1
+  %tmp4596 = getelementptr inbounds float* %tmp4595, i64 1
+  %tmp4597 = getelementptr inbounds float* %tmp4596, i64 1
+  %tmp4598 = getelementptr inbounds float* %tmp4597, i64 1
+  %tmp4599 = getelementptr inbounds float* %tmp4598, i64 1
+  %tmp4600 = getelementptr inbounds float* %tmp4599, i64 1
+  %tmp4601 = getelementptr inbounds float* %tmp4600, i64 1
+  %tmp4602 = getelementptr inbounds float* %tmp4601, i64 1
+  %tmp4603 = getelementptr inbounds float* %tmp4602, i64 1
+  %tmp4604 = getelementptr inbounds float* %tmp4603, i64 1
+  %tmp4605 = getelementptr inbounds float* %tmp4604, i64 1
+  %tmp4606 = getelementptr inbounds float* %tmp4605, i64 1
+  %tmp4607 = getelementptr inbounds float* %tmp4606, i64 1
+  %tmp4608 = getelementptr inbounds float* %tmp4607, i64 1
+  %tmp4609 = getelementptr inbounds float* %tmp4608, i64 1
+  %tmp4610 = getelementptr inbounds float* %tmp4609, i64 1
+  %tmp4611 = getelementptr inbounds float* %tmp4610, i64 1
+  %tmp4612 = getelementptr inbounds float* %tmp4611, i64 1
+  %tmp4613 = getelementptr inbounds float* %tmp4612, i64 1
+  %tmp4614 = getelementptr inbounds float* %tmp4613, i64 1
+  %tmp4615 = getelementptr inbounds float* %tmp4614, i64 1
+  %tmp4616 = getelementptr inbounds float* %tmp4615, i64 1
+  %tmp4617 = getelementptr inbounds float* %tmp4616, i64 1
+  %tmp4618 = getelementptr inbounds float* %tmp4617, i64 1
+  %tmp4619 = getelementptr inbounds float* %tmp4618, i64 1
+  %tmp4620 = getelementptr inbounds float* %tmp4619, i64 1
+  %tmp4621 = getelementptr inbounds float* %tmp4620, i64 1
+  %tmp4622 = getelementptr inbounds float* %tmp4621, i64 1
+  %tmp4623 = getelementptr inbounds float* %tmp4622, i64 1
+  %tmp4624 = getelementptr inbounds float* %tmp4623, i64 1
+  %tmp4625 = getelementptr inbounds float* %tmp4624, i64 1
+  %tmp4626 = getelementptr inbounds float* %tmp4625, i64 1
+  %tmp4627 = getelementptr inbounds float* %tmp4626, i64 1
+  %tmp4628 = getelementptr inbounds float* %tmp4627, i64 1
+  %tmp4629 = getelementptr inbounds float* %tmp4628, i64 1
+  %tmp4630 = getelementptr inbounds float* %tmp4629, i64 1
+  %tmp4631 = getelementptr inbounds float* %tmp4630, i64 1
+  %tmp4632 = getelementptr inbounds float* %tmp4631, i64 1
+  %tmp4633 = getelementptr inbounds float* %tmp4632, i64 1
+  %tmp4634 = getelementptr inbounds float* %tmp4633, i64 1
+  %tmp4635 = getelementptr inbounds float* %tmp4634, i64 1
+  %tmp4636 = getelementptr inbounds float* %tmp4635, i64 1
+  %tmp4637 = getelementptr inbounds float* %tmp4636, i64 1
+  %tmp4638 = getelementptr inbounds float* %tmp4637, i64 1
+  %tmp4639 = getelementptr inbounds float* %tmp4638, i64 1
+  %tmp4640 = getelementptr inbounds float* %tmp4639, i64 1
+  %tmp4641 = getelementptr inbounds float* %tmp4640, i64 1
+  %tmp4642 = getelementptr inbounds float* %tmp4641, i64 1
+  %tmp4643 = getelementptr inbounds float* %tmp4642, i64 1
+  %tmp4644 = getelementptr inbounds float* %tmp4643, i64 1
+  %tmp4645 = getelementptr inbounds float* %tmp4644, i64 1
+  %tmp4646 = getelementptr inbounds float* %tmp4645, i64 1
+  %tmp4647 = getelementptr inbounds float* %tmp4646, i64 1
+  %tmp4648 = getelementptr inbounds float* %tmp4647, i64 1
+  %tmp4649 = getelementptr inbounds float* %tmp4648, i64 1
+  %tmp4650 = getelementptr inbounds float* %tmp4649, i64 1
+  %tmp4651 = getelementptr inbounds float* %tmp4650, i64 1
+  %tmp4652 = getelementptr inbounds float* %tmp4651, i64 1
+  %tmp4653 = getelementptr inbounds float* %tmp4652, i64 1
+  %tmp4654 = getelementptr inbounds float* %tmp4653, i64 1
+  %tmp4655 = getelementptr inbounds float* %tmp4654, i64 1
+  %tmp4656 = getelementptr inbounds float* %tmp4655, i64 1
+  %tmp4657 = getelementptr inbounds float* %tmp4656, i64 1
+  %tmp4658 = getelementptr inbounds float* %tmp4657, i64 1
+  %tmp4659 = getelementptr inbounds float* %tmp4658, i64 1
+  %tmp4660 = getelementptr inbounds float* %tmp4659, i64 1
+  %tmp4661 = getelementptr inbounds float* %tmp4660, i64 1
+  %tmp4662 = getelementptr inbounds float* %tmp4661, i64 1
+  %tmp4663 = getelementptr inbounds float* %tmp4662, i64 1
+  %tmp4664 = getelementptr inbounds float* %tmp4663, i64 1
+  %tmp4665 = getelementptr inbounds float* %tmp4664, i64 1
+  %tmp4666 = getelementptr inbounds float* %tmp4665, i64 1
+  %tmp4667 = getelementptr inbounds float* %tmp4666, i64 1
+  %tmp4668 = getelementptr inbounds float* %tmp4667, i64 1
+  %tmp4669 = getelementptr inbounds float* %tmp4668, i64 1
+  %tmp4670 = getelementptr inbounds float* %tmp4669, i64 1
+  %tmp4671 = getelementptr inbounds float* %tmp4670, i64 1
+  %tmp4672 = getelementptr inbounds float* %tmp4671, i64 1
+  %tmp4673 = getelementptr inbounds float* %tmp4672, i64 1
+  %tmp4674 = getelementptr inbounds float* %tmp4673, i64 1
+  %tmp4675 = getelementptr inbounds float* %tmp4674, i64 1
+  %tmp4676 = getelementptr inbounds float* %tmp4675, i64 1
+  %tmp4677 = getelementptr inbounds float* %tmp4676, i64 1
+  %tmp4678 = getelementptr inbounds float* %tmp4677, i64 1
+  %tmp4679 = getelementptr inbounds float* %tmp4678, i64 1
+  %tmp4680 = getelementptr inbounds float* %tmp4679, i64 1
+  %tmp4681 = getelementptr inbounds float* %tmp4680, i64 1
+  %tmp4682 = getelementptr inbounds float* %tmp4681, i64 1
+  %tmp4683 = getelementptr inbounds float* %tmp4682, i64 1
+  %tmp4684 = getelementptr inbounds float* %tmp4683, i64 1
+  %tmp4685 = getelementptr inbounds float* %tmp4684, i64 1
+  %tmp4686 = getelementptr inbounds float* %tmp4685, i64 1
+  %tmp4687 = getelementptr inbounds float* %tmp4686, i64 1
+  %tmp4688 = getelementptr inbounds float* %tmp4687, i64 1
+  %tmp4689 = getelementptr inbounds float* %tmp4688, i64 1
+  %tmp4690 = getelementptr inbounds float* %tmp4689, i64 1
+  %tmp4691 = getelementptr inbounds float* %tmp4690, i64 1
+  %tmp4692 = getelementptr inbounds float* %tmp4691, i64 1
+  %tmp4693 = getelementptr inbounds float* %tmp4692, i64 1
+  %tmp4694 = getelementptr inbounds float* %tmp4693, i64 1
+  %tmp4695 = getelementptr inbounds float* %tmp4694, i64 1
+  %tmp4696 = getelementptr inbounds float* %tmp4695, i64 1
+  %tmp4697 = getelementptr inbounds float* %tmp4696, i64 1
+  %tmp4698 = getelementptr inbounds float* %tmp4697, i64 1
+  %tmp4699 = getelementptr inbounds float* %tmp4698, i64 1
+  %tmp4700 = getelementptr inbounds float* %tmp4699, i64 1
+  %tmp4701 = getelementptr inbounds float* %tmp4700, i64 1
+  %tmp4702 = getelementptr inbounds float* %tmp4701, i64 1
+  %tmp4703 = getelementptr inbounds float* %tmp4702, i64 1
+  %tmp4704 = getelementptr inbounds float* %tmp4703, i64 1
+  %tmp4705 = getelementptr inbounds float* %tmp4704, i64 1
+  %tmp4706 = getelementptr inbounds float* %tmp4705, i64 1
+  %tmp4707 = getelementptr inbounds float* %tmp4706, i64 1
+  %tmp4708 = getelementptr inbounds float* %tmp4707, i64 1
+  %tmp4709 = getelementptr inbounds float* %tmp4708, i64 1
+  %tmp4710 = getelementptr inbounds float* %tmp4709, i64 1
+  %tmp4711 = getelementptr inbounds float* %tmp4710, i64 1
+  %tmp4712 = getelementptr inbounds float* %tmp4711, i64 1
+  %tmp4713 = getelementptr inbounds float* %tmp4712, i64 1
+  %tmp4714 = getelementptr inbounds float* %tmp4713, i64 1
+  %tmp4715 = getelementptr inbounds float* %tmp4714, i64 1
+  %tmp4716 = getelementptr inbounds float* %tmp4715, i64 1
+  %tmp4717 = getelementptr inbounds float* %tmp4716, i64 1
+  %tmp4718 = getelementptr inbounds float* %tmp4717, i64 1
+  %tmp4719 = getelementptr inbounds float* %tmp4718, i64 1
+  %tmp4720 = getelementptr inbounds float* %tmp4719, i64 1
+  %tmp4721 = getelementptr inbounds float* %tmp4720, i64 1
+  %tmp4722 = getelementptr inbounds float* %tmp4721, i64 1
+  %tmp4723 = getelementptr inbounds float* %tmp4722, i64 1
+  %tmp4724 = getelementptr inbounds float* %tmp4723, i64 1
+  %tmp4725 = getelementptr inbounds float* %tmp4724, i64 1
+  %tmp4726 = getelementptr inbounds float* %tmp4725, i64 1
+  %tmp4727 = getelementptr inbounds float* %tmp4726, i64 1
+  %tmp4728 = getelementptr inbounds float* %tmp4727, i64 1
+  %tmp4729 = getelementptr inbounds float* %tmp4728, i64 1
+  %tmp4730 = getelementptr inbounds float* %tmp4729, i64 1
+  %tmp4731 = getelementptr inbounds float* %tmp4730, i64 1
+  %tmp4732 = getelementptr inbounds float* %tmp4731, i64 1
+  %tmp4733 = getelementptr inbounds float* %tmp4732, i64 1
+  %tmp4734 = getelementptr inbounds float* %tmp4733, i64 1
+  %tmp4735 = getelementptr inbounds float* %tmp4734, i64 1
+  %tmp4736 = getelementptr inbounds float* %tmp4735, i64 1
+  %tmp4737 = getelementptr inbounds float* %tmp4736, i64 1
+  %tmp4738 = getelementptr inbounds float* %tmp4737, i64 1
+  %tmp4739 = getelementptr inbounds float* %tmp4738, i64 1
+  %tmp4740 = getelementptr inbounds float* %tmp4739, i64 1
+  %tmp4741 = getelementptr inbounds float* %tmp4740, i64 1
+  %tmp4742 = getelementptr inbounds float* %tmp4741, i64 1
+  %tmp4743 = getelementptr inbounds float* %tmp4742, i64 1
+  %tmp4744 = getelementptr inbounds float* %tmp4743, i64 1
+  %tmp4745 = getelementptr inbounds float* %tmp4744, i64 1
+  %tmp4746 = getelementptr inbounds float* %tmp4745, i64 1
+  %tmp4747 = getelementptr inbounds float* %tmp4746, i64 1
+  %tmp4748 = getelementptr inbounds float* %tmp4747, i64 1
+  %tmp4749 = getelementptr inbounds float* %tmp4748, i64 1
+  %tmp4750 = getelementptr inbounds float* %tmp4749, i64 1
+  %tmp4751 = getelementptr inbounds float* %tmp4750, i64 1
+  %tmp4752 = getelementptr inbounds float* %tmp4751, i64 1
+  %tmp4753 = getelementptr inbounds float* %tmp4752, i64 1
+  %tmp4754 = getelementptr inbounds float* %tmp4753, i64 1
+  %tmp4755 = getelementptr inbounds float* %tmp4754, i64 1
+  %tmp4756 = getelementptr inbounds float* %tmp4755, i64 1
+  %tmp4757 = getelementptr inbounds float* %tmp4756, i64 1
+  %tmp4758 = getelementptr inbounds float* %tmp4757, i64 1
+  %tmp4759 = getelementptr inbounds float* %tmp4758, i64 1
+  %tmp4760 = getelementptr inbounds float* %tmp4759, i64 1
+  %tmp4761 = getelementptr inbounds float* %tmp4760, i64 1
+  %tmp4762 = getelementptr inbounds float* %tmp4761, i64 1
+  %tmp4763 = getelementptr inbounds float* %tmp4762, i64 1
+  %tmp4764 = getelementptr inbounds float* %tmp4763, i64 1
+  %tmp4765 = getelementptr inbounds float* %tmp4764, i64 1
+  %tmp4766 = getelementptr inbounds float* %tmp4765, i64 1
+  %tmp4767 = getelementptr inbounds float* %tmp4766, i64 1
+  %tmp4768 = getelementptr inbounds float* %tmp4767, i64 1
+  %tmp4769 = getelementptr inbounds float* %tmp4768, i64 1
+  %tmp4770 = getelementptr inbounds float* %tmp4769, i64 1
+  %tmp4771 = getelementptr inbounds float* %tmp4770, i64 1
+  %tmp4772 = getelementptr inbounds float* %tmp4771, i64 1
+  %tmp4773 = getelementptr inbounds float* %tmp4772, i64 1
+  %tmp4774 = getelementptr inbounds float* %tmp4773, i64 1
+  %tmp4775 = getelementptr inbounds float* %tmp4774, i64 1
+  %tmp4776 = getelementptr inbounds float* %tmp4775, i64 1
+  %tmp4777 = getelementptr inbounds float* %tmp4776, i64 1
+  %tmp4778 = getelementptr inbounds float* %tmp4777, i64 1
+  %tmp4779 = getelementptr inbounds float* %tmp4778, i64 1
+  %tmp4780 = getelementptr inbounds float* %tmp4779, i64 1
+  %tmp4781 = getelementptr inbounds float* %tmp4780, i64 1
+  %tmp4782 = getelementptr inbounds float* %tmp4781, i64 1
+  %tmp4783 = getelementptr inbounds float* %tmp4782, i64 1
+  %tmp4784 = getelementptr inbounds float* %tmp4783, i64 1
+  %tmp4785 = getelementptr inbounds float* %tmp4784, i64 1
+  %tmp4786 = getelementptr inbounds float* %tmp4785, i64 1
+  %tmp4787 = getelementptr inbounds float* %tmp4786, i64 1
+  %tmp4788 = getelementptr inbounds float* %tmp4787, i64 1
+  %tmp4789 = getelementptr inbounds float* %tmp4788, i64 1
+  %tmp4790 = getelementptr inbounds float* %tmp4789, i64 1
+  %tmp4791 = getelementptr inbounds float* %tmp4790, i64 1
+  %tmp4792 = getelementptr inbounds float* %tmp4791, i64 1
+  %tmp4793 = getelementptr inbounds float* %tmp4792, i64 1
+  %tmp4794 = getelementptr inbounds float* %tmp4793, i64 1
+  %tmp4795 = getelementptr inbounds float* %tmp4794, i64 1
+  %tmp4796 = getelementptr inbounds float* %tmp4795, i64 1
+  %tmp4797 = getelementptr inbounds float* %tmp4796, i64 1
+  %tmp4798 = getelementptr inbounds float* %tmp4797, i64 1
+  %tmp4799 = getelementptr inbounds float* %tmp4798, i64 1
+  %tmp4800 = getelementptr inbounds float* %tmp4799, i64 1
+  %tmp4801 = getelementptr inbounds float* %tmp4800, i64 1
+  %tmp4802 = getelementptr inbounds float* %tmp4801, i64 1
+  %tmp4803 = getelementptr inbounds float* %tmp4802, i64 1
+  %tmp4804 = getelementptr inbounds float* %tmp4803, i64 1
+  %tmp4805 = getelementptr inbounds float* %tmp4804, i64 1
+  %tmp4806 = getelementptr inbounds float* %tmp4805, i64 1
+  %tmp4807 = getelementptr inbounds float* %tmp4806, i64 1
+  %tmp4808 = getelementptr inbounds float* %tmp4807, i64 1
+  %tmp4809 = getelementptr inbounds float* %tmp4808, i64 1
+  %tmp4810 = getelementptr inbounds float* %tmp4809, i64 1
+  %tmp4811 = getelementptr inbounds float* %tmp4810, i64 1
+  %tmp4812 = getelementptr inbounds float* %tmp4811, i64 1
+  %tmp4813 = getelementptr inbounds float* %tmp4812, i64 1
+  %tmp4814 = getelementptr inbounds float* %tmp4813, i64 1
+  %tmp4815 = getelementptr inbounds float* %tmp4814, i64 1
+  %tmp4816 = getelementptr inbounds float* %tmp4815, i64 1
+  %tmp4817 = getelementptr inbounds float* %tmp4816, i64 1
+  %tmp4818 = getelementptr inbounds float* %tmp4817, i64 1
+  %tmp4819 = getelementptr inbounds float* %tmp4818, i64 1
+  %tmp4820 = getelementptr inbounds float* %tmp4819, i64 1
+  %tmp4821 = getelementptr inbounds float* %tmp4820, i64 1
+  %tmp4822 = getelementptr inbounds float* %tmp4821, i64 1
+  %tmp4823 = getelementptr inbounds float* %tmp4822, i64 1
+  %tmp4824 = getelementptr inbounds float* %tmp4823, i64 1
+  %tmp4825 = getelementptr inbounds float* %tmp4824, i64 1
+  %tmp4826 = getelementptr inbounds float* %tmp4825, i64 1
+  %tmp4827 = getelementptr inbounds float* %tmp4826, i64 1
+  %tmp4828 = getelementptr inbounds float* %tmp4827, i64 1
+  %tmp4829 = getelementptr inbounds float* %tmp4828, i64 1
+  %tmp4830 = getelementptr inbounds float* %tmp4829, i64 1
+  %tmp4831 = getelementptr inbounds float* %tmp4830, i64 1
+  %tmp4832 = getelementptr inbounds float* %tmp4831, i64 1
+  %tmp4833 = getelementptr inbounds float* %tmp4832, i64 1
+  %tmp4834 = getelementptr inbounds float* %tmp4833, i64 1
+  %tmp4835 = getelementptr inbounds float* %tmp4834, i64 1
+  %tmp4836 = getelementptr inbounds float* %tmp4835, i64 1
+  %tmp4837 = getelementptr inbounds float* %tmp4836, i64 1
+  %tmp4838 = getelementptr inbounds float* %tmp4837, i64 1
+  %tmp4839 = getelementptr inbounds float* %tmp4838, i64 1
+  %tmp4840 = getelementptr inbounds float* %tmp4839, i64 1
+  %tmp4841 = getelementptr inbounds float* %tmp4840, i64 1
+  %tmp4842 = getelementptr inbounds float* %tmp4841, i64 1
+  %tmp4843 = getelementptr inbounds float* %tmp4842, i64 1
+  %tmp4844 = getelementptr inbounds float* %tmp4843, i64 1
+  %tmp4845 = getelementptr inbounds float* %tmp4844, i64 1
+  %tmp4846 = getelementptr inbounds float* %tmp4845, i64 1
+  %tmp4847 = getelementptr inbounds float* %tmp4846, i64 1
+  %tmp4848 = getelementptr inbounds float* %tmp4847, i64 1
+  %tmp4849 = getelementptr inbounds float* %tmp4848, i64 1
+  %tmp4850 = getelementptr inbounds float* %tmp4849, i64 1
+  %tmp4851 = getelementptr inbounds float* %tmp4850, i64 1
+  %tmp4852 = getelementptr inbounds float* %tmp4851, i64 1
+  %tmp4853 = getelementptr inbounds float* %tmp4852, i64 1
+  %tmp4854 = getelementptr inbounds float* %tmp4853, i64 1
+  %tmp4855 = getelementptr inbounds float* %tmp4854, i64 1
+  %tmp4856 = getelementptr inbounds float* %tmp4855, i64 1
+  %tmp4857 = getelementptr inbounds float* %tmp4856, i64 1
+  %tmp4858 = getelementptr inbounds float* %tmp4857, i64 1
+  %tmp4859 = getelementptr inbounds float* %tmp4858, i64 1
+  %tmp4860 = getelementptr inbounds float* %tmp4859, i64 1
+  %tmp4861 = getelementptr inbounds float* %tmp4860, i64 1
+  %tmp4862 = getelementptr inbounds float* %tmp4861, i64 1
+  %tmp4863 = getelementptr inbounds float* %tmp4862, i64 1
+  %tmp4864 = getelementptr inbounds float* %tmp4863, i64 1
+  %tmp4865 = getelementptr inbounds float* %tmp4864, i64 1
+  %tmp4866 = getelementptr inbounds float* %tmp4865, i64 1
+  %tmp4867 = getelementptr inbounds float* %tmp4866, i64 1
+  %tmp4868 = getelementptr inbounds float* %tmp4867, i64 1
+  %tmp4869 = getelementptr inbounds float* %tmp4868, i64 1
+  %tmp4870 = getelementptr inbounds float* %tmp4869, i64 1
+  %tmp4871 = getelementptr inbounds float* %tmp4870, i64 1
+  %tmp4872 = getelementptr inbounds float* %tmp4871, i64 1
+  %tmp4873 = getelementptr inbounds float* %tmp4872, i64 1
+  %tmp4874 = getelementptr inbounds float* %tmp4873, i64 1
+  %tmp4875 = getelementptr inbounds float* %tmp4874, i64 1
+  %tmp4876 = getelementptr inbounds float* %tmp4875, i64 1
+  %tmp4877 = getelementptr inbounds float* %tmp4876, i64 1
+  %tmp4878 = getelementptr inbounds float* %tmp4877, i64 1
+  %tmp4879 = getelementptr inbounds float* %tmp4878, i64 1
+  %tmp4880 = getelementptr inbounds float* %tmp4879, i64 1
+  %tmp4881 = getelementptr inbounds float* %tmp4880, i64 1
+  %tmp4882 = getelementptr inbounds float* %tmp4881, i64 1
+  %tmp4883 = getelementptr inbounds float* %tmp4882, i64 1
+  %tmp4884 = getelementptr inbounds float* %tmp4883, i64 1
+  %tmp4885 = getelementptr inbounds float* %tmp4884, i64 1
+  %tmp4886 = getelementptr inbounds float* %tmp4885, i64 1
+  %tmp4887 = getelementptr inbounds float* %tmp4886, i64 1
+  %tmp4888 = getelementptr inbounds float* %tmp4887, i64 1
+  %tmp4889 = getelementptr inbounds float* %tmp4888, i64 1
+  %tmp4890 = getelementptr inbounds float* %tmp4889, i64 1
+  %tmp4891 = getelementptr inbounds float* %tmp4890, i64 1
+  %tmp4892 = getelementptr inbounds float* %tmp4891, i64 1
+  %tmp4893 = getelementptr inbounds float* %tmp4892, i64 1
+  %tmp4894 = getelementptr inbounds float* %tmp4893, i64 1
+  %tmp4895 = getelementptr inbounds float* %tmp4894, i64 1
+  %tmp4896 = getelementptr inbounds float* %tmp4895, i64 1
+  %tmp4897 = getelementptr inbounds float* %tmp4896, i64 1
+  %tmp4898 = getelementptr inbounds float* %tmp4897, i64 1
+  %tmp4899 = getelementptr inbounds float* %tmp4898, i64 1
+  %tmp4900 = getelementptr inbounds float* %tmp4899, i64 1
+  %tmp4901 = getelementptr inbounds float* %tmp4900, i64 1
+  %tmp4902 = getelementptr inbounds float* %tmp4901, i64 1
+  %tmp4903 = getelementptr inbounds float* %tmp4902, i64 1
+  %tmp4904 = getelementptr inbounds float* %tmp4903, i64 1
+  %tmp4905 = getelementptr inbounds float* %tmp4904, i64 1
+  %tmp4906 = getelementptr inbounds float* %tmp4905, i64 1
+  %tmp4907 = getelementptr inbounds float* %tmp4906, i64 1
+  %tmp4908 = getelementptr inbounds float* %tmp4907, i64 1
+  %tmp4909 = getelementptr inbounds float* %tmp4908, i64 1
+  %tmp4910 = getelementptr inbounds float* %tmp4909, i64 1
+  %tmp4911 = getelementptr inbounds float* %tmp4910, i64 1
+  %tmp4912 = getelementptr inbounds float* %tmp4911, i64 1
+  %tmp4913 = getelementptr inbounds float* %tmp4912, i64 1
+  %tmp4914 = getelementptr inbounds float* %tmp4913, i64 1
+  %tmp4915 = getelementptr inbounds float* %tmp4914, i64 1
+  %tmp4916 = getelementptr inbounds float* %tmp4915, i64 1
+  %tmp4917 = getelementptr inbounds float* %tmp4916, i64 1
+  %tmp4918 = getelementptr inbounds float* %tmp4917, i64 1
+  %tmp4919 = getelementptr inbounds float* %tmp4918, i64 1
+  %tmp4920 = getelementptr inbounds float* %tmp4919, i64 1
+  %tmp4921 = getelementptr inbounds float* %tmp4920, i64 1
+  %tmp4922 = getelementptr inbounds float* %tmp4921, i64 1
+  %tmp4923 = getelementptr inbounds float* %tmp4922, i64 1
+  %tmp4924 = getelementptr inbounds float* %tmp4923, i64 1
+  %tmp4925 = getelementptr inbounds float* %tmp4924, i64 1
+  %tmp4926 = getelementptr inbounds float* %tmp4925, i64 1
+  %tmp4927 = getelementptr inbounds float* %tmp4926, i64 1
+  %tmp4928 = getelementptr inbounds float* %tmp4927, i64 1
+  %tmp4929 = getelementptr inbounds float* %tmp4928, i64 1
+  %tmp4930 = getelementptr inbounds float* %tmp4929, i64 1
+  %tmp4931 = getelementptr inbounds float* %tmp4930, i64 1
+  %tmp4932 = getelementptr inbounds float* %tmp4931, i64 1
+  %tmp4933 = getelementptr inbounds float* %tmp4932, i64 1
+  %tmp4934 = getelementptr inbounds float* %tmp4933, i64 1
+  %tmp4935 = getelementptr inbounds float* %tmp4934, i64 1
+  %tmp4936 = getelementptr inbounds float* %tmp4935, i64 1
+  %tmp4937 = getelementptr inbounds float* %tmp4936, i64 1
+  %tmp4938 = getelementptr inbounds float* %tmp4937, i64 1
+  %tmp4939 = getelementptr inbounds float* %tmp4938, i64 1
+  %tmp4940 = getelementptr inbounds float* %tmp4939, i64 1
+  %tmp4941 = getelementptr inbounds float* %tmp4940, i64 1
+  %tmp4942 = getelementptr inbounds float* %tmp4941, i64 1
+  %tmp4943 = getelementptr inbounds float* %tmp4942, i64 1
+  %tmp4944 = getelementptr inbounds float* %tmp4943, i64 1
+  %tmp4945 = getelementptr inbounds float* %tmp4944, i64 1
+  %tmp4946 = getelementptr inbounds float* %tmp4945, i64 1
+  %tmp4947 = getelementptr inbounds float* %tmp4946, i64 1
+  %tmp4948 = getelementptr inbounds float* %tmp4947, i64 1
+  %tmp4949 = getelementptr inbounds float* %tmp4948, i64 1
+  %tmp4950 = getelementptr inbounds float* %tmp4949, i64 1
+  %tmp4951 = getelementptr inbounds float* %tmp4950, i64 1
+  %tmp4952 = getelementptr inbounds float* %tmp4951, i64 1
+  %tmp4953 = getelementptr inbounds float* %tmp4952, i64 1
+  %tmp4954 = getelementptr inbounds float* %tmp4953, i64 1
+  %tmp4955 = getelementptr inbounds float* %tmp4954, i64 1
+  %tmp4956 = getelementptr inbounds float* %tmp4955, i64 1
+  %tmp4957 = getelementptr inbounds float* %tmp4956, i64 1
+  %tmp4958 = getelementptr inbounds float* %tmp4957, i64 1
+  %tmp4959 = getelementptr inbounds float* %tmp4958, i64 1
+  %tmp4960 = getelementptr inbounds float* %tmp4959, i64 1
+  %tmp4961 = getelementptr inbounds float* %tmp4960, i64 1
+  %tmp4962 = getelementptr inbounds float* %tmp4961, i64 1
+  %tmp4963 = getelementptr inbounds float* %tmp4962, i64 1
+  %tmp4964 = getelementptr inbounds float* %tmp4963, i64 1
+  %tmp4965 = getelementptr inbounds float* %tmp4964, i64 1
+  %tmp4966 = getelementptr inbounds float* %tmp4965, i64 1
+  %tmp4967 = getelementptr inbounds float* %tmp4966, i64 1
+  %tmp4968 = getelementptr inbounds float* %tmp4967, i64 1
+  %tmp4969 = getelementptr inbounds float* %tmp4968, i64 1
+  %tmp4970 = getelementptr inbounds float* %tmp4969, i64 1
+  %tmp4971 = getelementptr inbounds float* %tmp4970, i64 1
+  %tmp4972 = getelementptr inbounds float* %tmp4971, i64 1
+  %tmp4973 = getelementptr inbounds float* %tmp4972, i64 1
+  %tmp4974 = getelementptr inbounds float* %tmp4973, i64 1
+  %tmp4975 = getelementptr inbounds float* %tmp4974, i64 1
+  %tmp4976 = getelementptr inbounds float* %tmp4975, i64 1
+  %tmp4977 = getelementptr inbounds float* %tmp4976, i64 1
+  %tmp4978 = getelementptr inbounds float* %tmp4977, i64 1
+  %tmp4979 = getelementptr inbounds float* %tmp4978, i64 1
+  %tmp4980 = getelementptr inbounds float* %tmp4979, i64 1
+  %tmp4981 = getelementptr inbounds float* %tmp4980, i64 1
+  %tmp4982 = getelementptr inbounds float* %tmp4981, i64 1
+  %tmp4983 = getelementptr inbounds float* %tmp4982, i64 1
+  %tmp4984 = getelementptr inbounds float* %tmp4983, i64 1
+  %tmp4985 = getelementptr inbounds float* %tmp4984, i64 1
+  %tmp4986 = getelementptr inbounds float* %tmp4985, i64 1
+  %tmp4987 = getelementptr inbounds float* %tmp4986, i64 1
+  %tmp4988 = getelementptr inbounds float* %tmp4987, i64 1
+  %tmp4989 = getelementptr inbounds float* %tmp4988, i64 1
+  %tmp4990 = getelementptr inbounds float* %tmp4989, i64 1
+  %tmp4991 = getelementptr inbounds float* %tmp4990, i64 1
+  %tmp4992 = getelementptr inbounds float* %tmp4991, i64 1
+  %tmp4993 = getelementptr inbounds float* %tmp4992, i64 1
+  %tmp4994 = getelementptr inbounds float* %tmp4993, i64 1
+  %tmp4995 = getelementptr inbounds float* %tmp4994, i64 1
+  %tmp4996 = getelementptr inbounds float* %tmp4995, i64 1
+  %tmp4997 = getelementptr inbounds float* %tmp4996, i64 1
+  %tmp4998 = getelementptr inbounds float* %tmp4997, i64 1
+  %tmp4999 = getelementptr inbounds float* %tmp4998, i64 1
+  %tmp5000 = getelementptr inbounds float* %tmp4999, i64 1
+  %tmp5001 = getelementptr inbounds float* %tmp5000, i64 1
+  %tmp5002 = getelementptr inbounds float* %tmp5001, i64 1
+  %tmp5003 = getelementptr inbounds float* %tmp5002, i64 1
+  %tmp5004 = getelementptr inbounds float* %tmp5003, i64 1
+  %tmp5005 = getelementptr inbounds float* %tmp5004, i64 1
+  %tmp5006 = getelementptr inbounds float* %tmp5005, i64 1
+  %tmp5007 = getelementptr inbounds float* %tmp5006, i64 1
+  %tmp5008 = getelementptr inbounds float* %tmp5007, i64 1
+  %tmp5009 = getelementptr inbounds float* %tmp5008, i64 1
+  %tmp5010 = getelementptr inbounds float* %tmp5009, i64 1
+  %tmp5011 = getelementptr inbounds float* %tmp5010, i64 1
+  %tmp5012 = getelementptr inbounds float* %tmp5011, i64 1
+  %tmp5013 = getelementptr inbounds float* %tmp5012, i64 1
+  %tmp5014 = getelementptr inbounds float* %tmp5013, i64 1
+  %tmp5015 = getelementptr inbounds float* %tmp5014, i64 1
+  %tmp5016 = getelementptr inbounds float* %tmp5015, i64 1
+  %tmp5017 = getelementptr inbounds float* %tmp5016, i64 1
+  %tmp5018 = getelementptr inbounds float* %tmp5017, i64 1
+  %tmp5019 = getelementptr inbounds float* %tmp5018, i64 1
+  %tmp5020 = getelementptr inbounds float* %tmp5019, i64 1
+  %tmp5021 = getelementptr inbounds float* %tmp5020, i64 1
+  %tmp5022 = getelementptr inbounds float* %tmp5021, i64 1
+  %tmp5023 = getelementptr inbounds float* %tmp5022, i64 1
+  %tmp5024 = getelementptr inbounds float* %tmp5023, i64 1
+  %tmp5025 = getelementptr inbounds float* %tmp5024, i64 1
+  %tmp5026 = getelementptr inbounds float* %tmp5025, i64 1
+  %tmp5027 = getelementptr inbounds float* %tmp5026, i64 1
+  %tmp5028 = getelementptr inbounds float* %tmp5027, i64 1
+  %tmp5029 = getelementptr inbounds float* %tmp5028, i64 1
+  %tmp5030 = getelementptr inbounds float* %tmp5029, i64 1
+  %tmp5031 = getelementptr inbounds float* %tmp5030, i64 1
+  %tmp5032 = getelementptr inbounds float* %tmp5031, i64 1
+  %tmp5033 = getelementptr inbounds float* %tmp5032, i64 1
+  %tmp5034 = getelementptr inbounds float* %tmp5033, i64 1
+  %tmp5035 = getelementptr inbounds float* %tmp5034, i64 1
+  %tmp5036 = getelementptr inbounds float* %tmp5035, i64 1
+  %tmp5037 = getelementptr inbounds float* %tmp5036, i64 1
+  %tmp5038 = getelementptr inbounds float* %tmp5037, i64 1
+  %tmp5039 = getelementptr inbounds float* %tmp5038, i64 1
+  %tmp5040 = getelementptr inbounds float* %tmp5039, i64 1
+  %tmp5041 = getelementptr inbounds float* %tmp5040, i64 1
+  %tmp5042 = getelementptr inbounds float* %tmp5041, i64 1
+  %tmp5043 = getelementptr inbounds float* %tmp5042, i64 1
+  %tmp5044 = getelementptr inbounds float* %tmp5043, i64 1
+  %tmp5045 = getelementptr inbounds float* %tmp5044, i64 1
+  %tmp5046 = getelementptr inbounds float* %tmp5045, i64 1
+  %tmp5047 = getelementptr inbounds float* %tmp5046, i64 1
+  %tmp5048 = getelementptr inbounds float* %tmp5047, i64 1
+  %tmp5049 = getelementptr inbounds float* %tmp5048, i64 1
+  %tmp5050 = getelementptr inbounds float* %tmp5049, i64 1
+  %tmp5051 = getelementptr inbounds float* %tmp5050, i64 1
+  %tmp5052 = getelementptr inbounds float* %tmp5051, i64 1
+  %tmp5053 = getelementptr inbounds float* %tmp5052, i64 1
+  %tmp5054 = getelementptr inbounds float* %tmp5053, i64 1
+  %tmp5055 = getelementptr inbounds float* %tmp5054, i64 1
+  %tmp5056 = getelementptr inbounds float* %tmp5055, i64 1
+  %tmp5057 = getelementptr inbounds float* %tmp5056, i64 1
+  %tmp5058 = getelementptr inbounds float* %tmp5057, i64 1
+  %tmp5059 = getelementptr inbounds float* %tmp5058, i64 1
+  %tmp5060 = getelementptr inbounds float* %tmp5059, i64 1
+  %tmp5061 = getelementptr inbounds float* %tmp5060, i64 1
+  %tmp5062 = getelementptr inbounds float* %tmp5061, i64 1
+  %tmp5063 = getelementptr inbounds float* %tmp5062, i64 1
+  %tmp5064 = getelementptr inbounds float* %tmp5063, i64 1
+  %tmp5065 = getelementptr inbounds float* %tmp5064, i64 1
+  %tmp5066 = getelementptr inbounds float* %tmp5065, i64 1
+  %tmp5067 = getelementptr inbounds float* %tmp5066, i64 1
+  %tmp5068 = getelementptr inbounds float* %tmp5067, i64 1
+  %tmp5069 = getelementptr inbounds float* %tmp5068, i64 1
+  %tmp5070 = getelementptr inbounds float* %tmp5069, i64 1
+  %tmp5071 = getelementptr inbounds float* %tmp5070, i64 1
+  %tmp5072 = getelementptr inbounds float* %tmp5071, i64 1
+  %tmp5073 = getelementptr inbounds float* %tmp5072, i64 1
+  %tmp5074 = getelementptr inbounds float* %tmp5073, i64 1
+  %tmp5075 = getelementptr inbounds float* %tmp5074, i64 1
+  %tmp5076 = getelementptr inbounds float* %tmp5075, i64 1
+  %tmp5077 = getelementptr inbounds float* %tmp5076, i64 1
+  %tmp5078 = getelementptr inbounds float* %tmp5077, i64 1
+  %tmp5079 = getelementptr inbounds float* %tmp5078, i64 1
+  %tmp5080 = getelementptr inbounds float* %tmp5079, i64 1
+  %tmp5081 = getelementptr inbounds float* %tmp5080, i64 1
+  %tmp5082 = getelementptr inbounds float* %tmp5081, i64 1
+  %tmp5083 = getelementptr inbounds float* %tmp5082, i64 1
+  %tmp5084 = getelementptr inbounds float* %tmp5083, i64 1
+  %tmp5085 = getelementptr inbounds float* %tmp5084, i64 1
+  %tmp5086 = getelementptr inbounds float* %tmp5085, i64 1
+  %tmp5087 = getelementptr inbounds float* %tmp5086, i64 1
+  %tmp5088 = getelementptr inbounds float* %tmp5087, i64 1
+  %tmp5089 = getelementptr inbounds float* %tmp5088, i64 1
+  %tmp5090 = getelementptr inbounds float* %tmp5089, i64 1
+  %tmp5091 = getelementptr inbounds float* %tmp5090, i64 1
+  %tmp5092 = getelementptr inbounds float* %tmp5091, i64 1
+  %tmp5093 = getelementptr inbounds float* %tmp5092, i64 1
+  %tmp5094 = getelementptr inbounds float* %tmp5093, i64 1
+  %tmp5095 = getelementptr inbounds float* %tmp5094, i64 1
+  %tmp5096 = getelementptr inbounds float* %tmp5095, i64 1
+  %tmp5097 = getelementptr inbounds float* %tmp5096, i64 1
+  %tmp5098 = getelementptr inbounds float* %tmp5097, i64 1
+  %tmp5099 = getelementptr inbounds float* %tmp5098, i64 1
+  %tmp5100 = getelementptr inbounds float* %tmp5099, i64 1
+  %tmp5101 = getelementptr inbounds float* %tmp5100, i64 1
+  %tmp5102 = getelementptr inbounds float* %tmp5101, i64 1
+  %tmp5103 = getelementptr inbounds float* %tmp5102, i64 1
+  %tmp5104 = getelementptr inbounds float* %tmp5103, i64 1
+  %tmp5105 = getelementptr inbounds float* %tmp5104, i64 1
+  %tmp5106 = getelementptr inbounds float* %tmp5105, i64 1
+  %tmp5107 = getelementptr inbounds float* %tmp5106, i64 1
+  %tmp5108 = getelementptr inbounds float* %tmp5107, i64 1
+  %tmp5109 = getelementptr inbounds float* %tmp5108, i64 1
+  %tmp5110 = getelementptr inbounds float* %tmp5109, i64 1
+  %tmp5111 = getelementptr inbounds float* %tmp5110, i64 1
+  %tmp5112 = getelementptr inbounds float* %tmp5111, i64 1
+  %tmp5113 = getelementptr inbounds float* %tmp5112, i64 1
+  %tmp5114 = getelementptr inbounds float* %tmp5113, i64 1
+  %tmp5115 = getelementptr inbounds float* %tmp5114, i64 1
+  %tmp5116 = getelementptr inbounds float* %tmp5115, i64 1
+  %tmp5117 = getelementptr inbounds float* %tmp5116, i64 1
+  %tmp5118 = getelementptr inbounds float* %tmp5117, i64 1
+  %tmp5119 = getelementptr inbounds float* %tmp5118, i64 1
+  %tmp5120 = getelementptr inbounds float* %tmp5119, i64 1
+  %tmp5121 = getelementptr inbounds float* %tmp5120, i64 1
+  %tmp5122 = getelementptr inbounds float* %tmp5121, i64 1
+  %tmp5123 = getelementptr inbounds float* %tmp5122, i64 1
+  %tmp5124 = getelementptr inbounds float* %tmp5123, i64 1
+  %tmp5125 = getelementptr inbounds float* %tmp5124, i64 1
+  %tmp5126 = getelementptr inbounds float* %tmp5125, i64 1
+  %tmp5127 = getelementptr inbounds float* %tmp5126, i64 1
+  %tmp5128 = getelementptr inbounds float* %tmp5127, i64 1
+  %tmp5129 = getelementptr inbounds float* %tmp5128, i64 1
+  %tmp5130 = getelementptr inbounds float* %tmp5129, i64 1
+  %tmp5131 = getelementptr inbounds float* %tmp5130, i64 1
+  %tmp5132 = getelementptr inbounds float* %tmp5131, i64 1
+  %tmp5133 = getelementptr inbounds float* %tmp5132, i64 1
+  %tmp5134 = getelementptr inbounds float* %tmp5133, i64 1
+  %tmp5135 = getelementptr inbounds float* %tmp5134, i64 1
+  %tmp5136 = getelementptr inbounds float* %tmp5135, i64 1
+  %tmp5137 = getelementptr inbounds float* %tmp5136, i64 1
+  %tmp5138 = getelementptr inbounds float* %tmp5137, i64 1
+  %tmp5139 = getelementptr inbounds float* %tmp5138, i64 1
+  %tmp5140 = getelementptr inbounds float* %tmp5139, i64 1
+  %tmp5141 = getelementptr inbounds float* %tmp5140, i64 1
+  %tmp5142 = getelementptr inbounds float* %tmp5141, i64 1
+  %tmp5143 = getelementptr inbounds float* %tmp5142, i64 1
+  %tmp5144 = getelementptr inbounds float* %tmp5143, i64 1
+  %tmp5145 = getelementptr inbounds float* %tmp5144, i64 1
+  %tmp5146 = getelementptr inbounds float* %tmp5145, i64 1
+  %tmp5147 = getelementptr inbounds float* %tmp5146, i64 1
+  %tmp5148 = getelementptr inbounds float* %tmp5147, i64 1
+  %tmp5149 = getelementptr inbounds float* %tmp5148, i64 1
+  %tmp5150 = getelementptr inbounds float* %tmp5149, i64 1
+  %tmp5151 = getelementptr inbounds float* %tmp5150, i64 1
+  %tmp5152 = getelementptr inbounds float* %tmp5151, i64 1
+  %tmp5153 = getelementptr inbounds float* %tmp5152, i64 1
+  %tmp5154 = getelementptr inbounds float* %tmp5153, i64 1
+  %tmp5155 = getelementptr inbounds float* %tmp5154, i64 1
+  %tmp5156 = getelementptr inbounds float* %tmp5155, i64 1
+  %tmp5157 = getelementptr inbounds float* %tmp5156, i64 1
+  %tmp5158 = getelementptr inbounds float* %tmp5157, i64 1
+  %tmp5159 = getelementptr inbounds float* %tmp5158, i64 1
+  %tmp5160 = getelementptr inbounds float* %tmp5159, i64 1
+  %tmp5161 = getelementptr inbounds float* %tmp5160, i64 1
+  %tmp5162 = getelementptr inbounds float* %tmp5161, i64 1
+  %tmp5163 = getelementptr inbounds float* %tmp5162, i64 1
+  %tmp5164 = getelementptr inbounds float* %tmp5163, i64 1
+  %tmp5165 = getelementptr inbounds float* %tmp5164, i64 1
+  %tmp5166 = getelementptr inbounds float* %tmp5165, i64 1
+  %tmp5167 = getelementptr inbounds float* %tmp5166, i64 1
+  %tmp5168 = getelementptr inbounds float* %tmp5167, i64 1
+  %tmp5169 = getelementptr inbounds float* %tmp5168, i64 1
+  %tmp5170 = getelementptr inbounds float* %tmp5169, i64 1
+  %tmp5171 = getelementptr inbounds float* %tmp5170, i64 1
+  %tmp5172 = getelementptr inbounds float* %tmp5171, i64 1
+  %tmp5173 = getelementptr inbounds float* %tmp5172, i64 1
+  %tmp5174 = getelementptr inbounds float* %tmp5173, i64 1
+  %tmp5175 = getelementptr inbounds float* %tmp5174, i64 1
+  %tmp5176 = getelementptr inbounds float* %tmp5175, i64 1
+  %tmp5177 = getelementptr inbounds float* %tmp5176, i64 1
+  %tmp5178 = getelementptr inbounds float* %tmp5177, i64 1
+  %tmp5179 = getelementptr inbounds float* %tmp5178, i64 1
+  %tmp5180 = getelementptr inbounds float* %tmp5179, i64 1
+  %tmp5181 = getelementptr inbounds float* %tmp5180, i64 1
+  %tmp5182 = getelementptr inbounds float* %tmp5181, i64 1
+  %tmp5183 = getelementptr inbounds float* %tmp5182, i64 1
+  %tmp5184 = getelementptr inbounds float* %tmp5183, i64 1
+  %tmp5185 = getelementptr inbounds float* %tmp5184, i64 1
+  %tmp5186 = getelementptr inbounds float* %tmp5185, i64 1
+  %tmp5187 = getelementptr inbounds float* %tmp5186, i64 1
+  %tmp5188 = getelementptr inbounds float* %tmp5187, i64 1
+  %tmp5189 = getelementptr inbounds float* %tmp5188, i64 1
+  %tmp5190 = getelementptr inbounds float* %tmp5189, i64 1
+  %tmp5191 = getelementptr inbounds float* %tmp5190, i64 1
+  %tmp5192 = getelementptr inbounds float* %tmp5191, i64 1
+  %tmp5193 = getelementptr inbounds float* %tmp5192, i64 1
+  %tmp5194 = getelementptr inbounds float* %tmp5193, i64 1
+  %tmp5195 = getelementptr inbounds float* %tmp5194, i64 1
+  %tmp5196 = getelementptr inbounds float* %tmp5195, i64 1
+  %tmp5197 = getelementptr inbounds float* %tmp5196, i64 1
+  %tmp5198 = getelementptr inbounds float* %tmp5197, i64 1
+  %tmp5199 = getelementptr inbounds float* %tmp5198, i64 1
+  %tmp5200 = getelementptr inbounds float* %tmp5199, i64 1
+  %tmp5201 = getelementptr inbounds float* %tmp5200, i64 1
+  %tmp5202 = getelementptr inbounds float* %tmp5201, i64 1
+  %tmp5203 = getelementptr inbounds float* %tmp5202, i64 1
+  %tmp5204 = getelementptr inbounds float* %tmp5203, i64 1
+  %tmp5205 = getelementptr inbounds float* %tmp5204, i64 1
+  %tmp5206 = getelementptr inbounds float* %tmp5205, i64 1
+  %tmp5207 = getelementptr inbounds float* %tmp5206, i64 1
+  %tmp5208 = getelementptr inbounds float* %tmp5207, i64 1
+  %tmp5209 = getelementptr inbounds float* %tmp5208, i64 1
+  %tmp5210 = getelementptr inbounds float* %tmp5209, i64 1
+  %tmp5211 = getelementptr inbounds float* %tmp5210, i64 1
+  %tmp5212 = getelementptr inbounds float* %tmp5211, i64 1
+  %tmp5213 = getelementptr inbounds float* %tmp5212, i64 1
+  %tmp5214 = getelementptr inbounds float* %tmp5213, i64 1
+  %tmp5215 = getelementptr inbounds float* %tmp5214, i64 1
+  %tmp5216 = getelementptr inbounds float* %tmp5215, i64 1
+  %tmp5217 = getelementptr inbounds float* %tmp5216, i64 1
+  %tmp5218 = getelementptr inbounds float* %tmp5217, i64 1
+  %tmp5219 = getelementptr inbounds float* %tmp5218, i64 1
+  %tmp5220 = getelementptr inbounds float* %tmp5219, i64 1
+  %tmp5221 = getelementptr inbounds float* %tmp5220, i64 1
+  %tmp5222 = getelementptr inbounds float* %tmp5221, i64 1
+  %tmp5223 = getelementptr inbounds float* %tmp5222, i64 1
+  %tmp5224 = getelementptr inbounds float* %tmp5223, i64 1
+  %tmp5225 = getelementptr inbounds float* %tmp5224, i64 1
+  %tmp5226 = getelementptr inbounds float* %tmp5225, i64 1
+  %tmp5227 = getelementptr inbounds float* %tmp5226, i64 1
+  %tmp5228 = getelementptr inbounds float* %tmp5227, i64 1
+  %tmp5229 = getelementptr inbounds float* %tmp5228, i64 1
+  %tmp5230 = getelementptr inbounds float* %tmp5229, i64 1
+  %tmp5231 = getelementptr inbounds float* %tmp5230, i64 1
+  %tmp5232 = getelementptr inbounds float* %tmp5231, i64 1
+  %tmp5233 = getelementptr inbounds float* %tmp5232, i64 1
+  %tmp5234 = getelementptr inbounds float* %tmp5233, i64 1
+  %tmp5235 = getelementptr inbounds float* %tmp5234, i64 1
+  %tmp5236 = getelementptr inbounds float* %tmp5235, i64 1
+  %tmp5237 = getelementptr inbounds float* %tmp5236, i64 1
+  %tmp5238 = getelementptr inbounds float* %tmp5237, i64 1
+  %tmp5239 = getelementptr inbounds float* %tmp5238, i64 1
+  %tmp5240 = getelementptr inbounds float* %tmp5239, i64 1
+  %tmp5241 = getelementptr inbounds float* %tmp5240, i64 1
+  %tmp5242 = getelementptr inbounds float* %tmp5241, i64 1
+  %tmp5243 = getelementptr inbounds float* %tmp5242, i64 1
+  %tmp5244 = getelementptr inbounds float* %tmp5243, i64 1
+  %tmp5245 = getelementptr inbounds float* %tmp5244, i64 1
+  %tmp5246 = getelementptr inbounds float* %tmp5245, i64 1
+  %tmp5247 = getelementptr inbounds float* %tmp5246, i64 1
+  %tmp5248 = getelementptr inbounds float* %tmp5247, i64 1
+  %tmp5249 = getelementptr inbounds float* %tmp5248, i64 1
+  %tmp5250 = getelementptr inbounds float* %tmp5249, i64 1
+  %tmp5251 = getelementptr inbounds float* %tmp5250, i64 1
+  %tmp5252 = getelementptr inbounds float* %tmp5251, i64 1
+  %tmp5253 = getelementptr inbounds float* %tmp5252, i64 1
+  %tmp5254 = getelementptr inbounds float* %tmp5253, i64 1
+  %tmp5255 = getelementptr inbounds float* %tmp5254, i64 1
+  %tmp5256 = getelementptr inbounds float* %tmp5255, i64 1
+  %tmp5257 = getelementptr inbounds float* %tmp5256, i64 1
+  %tmp5258 = getelementptr inbounds float* %tmp5257, i64 1
+  %tmp5259 = getelementptr inbounds float* %tmp5258, i64 1
+  %tmp5260 = getelementptr inbounds float* %tmp5259, i64 1
+  %tmp5261 = getelementptr inbounds float* %tmp5260, i64 1
+  %tmp5262 = getelementptr inbounds float* %tmp5261, i64 1
+  %tmp5263 = getelementptr inbounds float* %tmp5262, i64 1
+  %tmp5264 = getelementptr inbounds float* %tmp5263, i64 1
+  %tmp5265 = getelementptr inbounds float* %tmp5264, i64 1
+  %tmp5266 = getelementptr inbounds float* %tmp5265, i64 1
+  %tmp5267 = getelementptr inbounds float* %tmp5266, i64 1
+  %tmp5268 = getelementptr inbounds float* %tmp5267, i64 1
+  %tmp5269 = getelementptr inbounds float* %tmp5268, i64 1
+  %tmp5270 = getelementptr inbounds float* %tmp5269, i64 1
+  %tmp5271 = getelementptr inbounds float* %tmp5270, i64 1
+  %tmp5272 = getelementptr inbounds float* %tmp5271, i64 1
+  %tmp5273 = getelementptr inbounds float* %tmp5272, i64 1
+  %tmp5274 = getelementptr inbounds float* %tmp5273, i64 1
+  %tmp5275 = getelementptr inbounds float* %tmp5274, i64 1
+  %tmp5276 = getelementptr inbounds float* %tmp5275, i64 1
+  %tmp5277 = getelementptr inbounds float* %tmp5276, i64 1
+  %tmp5278 = getelementptr inbounds float* %tmp5277, i64 1
+  %tmp5279 = getelementptr inbounds float* %tmp5278, i64 1
+  %tmp5280 = getelementptr inbounds float* %tmp5279, i64 1
+  %tmp5281 = getelementptr inbounds float* %tmp5280, i64 1
+  %tmp5282 = getelementptr inbounds float* %tmp5281, i64 1
+  %tmp5283 = getelementptr inbounds float* %tmp5282, i64 1
+  %tmp5284 = getelementptr inbounds float* %tmp5283, i64 1
+  %tmp5285 = getelementptr inbounds float* %tmp5284, i64 1
+  %tmp5286 = getelementptr inbounds float* %tmp5285, i64 1
+  %tmp5287 = getelementptr inbounds float* %tmp5286, i64 1
+  %tmp5288 = getelementptr inbounds float* %tmp5287, i64 1
+  %tmp5289 = getelementptr inbounds float* %tmp5288, i64 1
+  %tmp5290 = getelementptr inbounds float* %tmp5289, i64 1
+  %tmp5291 = getelementptr inbounds float* %tmp5290, i64 1
+  %tmp5292 = getelementptr inbounds float* %tmp5291, i64 1
+  %tmp5293 = getelementptr inbounds float* %tmp5292, i64 1
+  %tmp5294 = getelementptr inbounds float* %tmp5293, i64 1
+  %tmp5295 = getelementptr inbounds float* %tmp5294, i64 1
+  %tmp5296 = getelementptr inbounds float* %tmp5295, i64 1
+  %tmp5297 = getelementptr inbounds float* %tmp5296, i64 1
+  %tmp5298 = getelementptr inbounds float* %tmp5297, i64 1
+  %tmp5299 = getelementptr inbounds float* %tmp5298, i64 1
+  %tmp5300 = getelementptr inbounds float* %tmp5299, i64 1
+  %tmp5301 = getelementptr inbounds float* %tmp5300, i64 1
+  %tmp5302 = getelementptr inbounds float* %tmp5301, i64 1
+  %tmp5303 = getelementptr inbounds float* %tmp5302, i64 1
+  %tmp5304 = getelementptr inbounds float* %tmp5303, i64 1
+  %tmp5305 = getelementptr inbounds float* %tmp5304, i64 1
+  %tmp5306 = getelementptr inbounds float* %tmp5305, i64 1
+  %tmp5307 = getelementptr inbounds float* %tmp5306, i64 1
+  %tmp5308 = getelementptr inbounds float* %tmp5307, i64 1
+  %tmp5309 = getelementptr inbounds float* %tmp5308, i64 1
+  %tmp5310 = getelementptr inbounds float* %tmp5309, i64 1
+  %tmp5311 = getelementptr inbounds float* %tmp5310, i64 1
+  %tmp5312 = getelementptr inbounds float* %tmp5311, i64 1
+  %tmp5313 = getelementptr inbounds float* %tmp5312, i64 1
+  %tmp5314 = getelementptr inbounds float* %tmp5313, i64 1
+  %tmp5315 = getelementptr inbounds float* %tmp5314, i64 1
+  %tmp5316 = getelementptr inbounds float* %tmp5315, i64 1
+  %tmp5317 = getelementptr inbounds float* %tmp5316, i64 1
+  %tmp5318 = getelementptr inbounds float* %tmp5317, i64 1
+  %tmp5319 = getelementptr inbounds float* %tmp5318, i64 1
+  %tmp5320 = getelementptr inbounds float* %tmp5319, i64 1
+  %tmp5321 = getelementptr inbounds float* %tmp5320, i64 1
+  %tmp5322 = getelementptr inbounds float* %tmp5321, i64 1
+  %tmp5323 = getelementptr inbounds float* %tmp5322, i64 1
+  %tmp5324 = getelementptr inbounds float* %tmp5323, i64 1
+  %tmp5325 = getelementptr inbounds float* %tmp5324, i64 1
+  %tmp5326 = getelementptr inbounds float* %tmp5325, i64 1
+  %tmp5327 = getelementptr inbounds float* %tmp5326, i64 1
+  %tmp5328 = getelementptr inbounds float* %tmp5327, i64 1
+  %tmp5329 = getelementptr inbounds float* %tmp5328, i64 1
+  %tmp5330 = getelementptr inbounds float* %tmp5329, i64 1
+  %tmp5331 = getelementptr inbounds float* %tmp5330, i64 1
+  %tmp5332 = getelementptr inbounds float* %tmp5331, i64 1
+  %tmp5333 = getelementptr inbounds float* %tmp5332, i64 1
+  %tmp5334 = getelementptr inbounds float* %tmp5333, i64 1
+  %tmp5335 = getelementptr inbounds float* %tmp5334, i64 1
+  %tmp5336 = getelementptr inbounds float* %tmp5335, i64 1
+  %tmp5337 = getelementptr inbounds float* %tmp5336, i64 1
+  %tmp5338 = getelementptr inbounds float* %tmp5337, i64 1
+  %tmp5339 = getelementptr inbounds float* %tmp5338, i64 1
+  %tmp5340 = getelementptr inbounds float* %tmp5339, i64 1
+  %tmp5341 = getelementptr inbounds float* %tmp5340, i64 1
+  %tmp5342 = getelementptr inbounds float* %tmp5341, i64 1
+  %tmp5343 = getelementptr inbounds float* %tmp5342, i64 1
+  %tmp5344 = getelementptr inbounds float* %tmp5343, i64 1
+  %tmp5345 = getelementptr inbounds float* %tmp5344, i64 1
+  %tmp5346 = getelementptr inbounds float* %tmp5345, i64 1
+  %tmp5347 = getelementptr inbounds float* %tmp5346, i64 1
+  %tmp5348 = getelementptr inbounds float* %tmp5347, i64 1
+  %tmp5349 = getelementptr inbounds float* %tmp5348, i64 1
+  %tmp5350 = getelementptr inbounds float* %tmp5349, i64 1
+  %tmp5351 = getelementptr inbounds float* %tmp5350, i64 1
+  %tmp5352 = getelementptr inbounds float* %tmp5351, i64 1
+  %tmp5353 = getelementptr inbounds float* %tmp5352, i64 1
+  %tmp5354 = getelementptr inbounds float* %tmp5353, i64 1
+  %tmp5355 = getelementptr inbounds float* %tmp5354, i64 1
+  %tmp5356 = getelementptr inbounds float* %tmp5355, i64 1
+  %tmp5357 = getelementptr inbounds float* %tmp5356, i64 1
+  %tmp5358 = getelementptr inbounds float* %tmp5357, i64 1
+  %tmp5359 = getelementptr inbounds float* %tmp5358, i64 1
+  %tmp5360 = getelementptr inbounds float* %tmp5359, i64 1
+  %tmp5361 = getelementptr inbounds float* %tmp5360, i64 1
+  %tmp5362 = getelementptr inbounds float* %tmp5361, i64 1
+  %tmp5363 = getelementptr inbounds float* %tmp5362, i64 1
+  %tmp5364 = getelementptr inbounds float* %tmp5363, i64 1
+  %tmp5365 = getelementptr inbounds float* %tmp5364, i64 1
+  %tmp5366 = getelementptr inbounds float* %tmp5365, i64 1
+  %tmp5367 = getelementptr inbounds float* %tmp5366, i64 1
+  %tmp5368 = getelementptr inbounds float* %tmp5367, i64 1
+  %tmp5369 = getelementptr inbounds float* %tmp5368, i64 1
+  %tmp5370 = getelementptr inbounds float* %tmp5369, i64 1
+  %tmp5371 = getelementptr inbounds float* %tmp5370, i64 1
+  %tmp5372 = getelementptr inbounds float* %tmp5371, i64 1
+  %tmp5373 = getelementptr inbounds float* %tmp5372, i64 1
+  %tmp5374 = getelementptr inbounds float* %tmp5373, i64 1
+  %tmp5375 = getelementptr inbounds float* %tmp5374, i64 1
+  %tmp5376 = getelementptr inbounds float* %tmp5375, i64 1
+  %tmp5377 = getelementptr inbounds float* %tmp5376, i64 1
+  %tmp5378 = getelementptr inbounds float* %tmp5377, i64 1
+  %tmp5379 = getelementptr inbounds float* %tmp5378, i64 1
+  %tmp5380 = getelementptr inbounds float* %tmp5379, i64 1
+  %tmp5381 = getelementptr inbounds float* %tmp5380, i64 1
+  %tmp5382 = getelementptr inbounds float* %tmp5381, i64 1
+  %tmp5383 = getelementptr inbounds float* %tmp5382, i64 1
+  %tmp5384 = getelementptr inbounds float* %tmp5383, i64 1
+  %tmp5385 = getelementptr inbounds float* %tmp5384, i64 1
+  %tmp5386 = getelementptr inbounds float* %tmp5385, i64 1
+  %tmp5387 = getelementptr inbounds float* %tmp5386, i64 1
+  %tmp5388 = getelementptr inbounds float* %tmp5387, i64 1
+  %tmp5389 = getelementptr inbounds float* %tmp5388, i64 1
+  %tmp5390 = getelementptr inbounds float* %tmp5389, i64 1
+  %tmp5391 = getelementptr inbounds float* %tmp5390, i64 1
+  %tmp5392 = getelementptr inbounds float* %tmp5391, i64 1
+  %tmp5393 = getelementptr inbounds float* %tmp5392, i64 1
+  %tmp5394 = getelementptr inbounds float* %tmp5393, i64 1
+  %tmp5395 = getelementptr inbounds float* %tmp5394, i64 1
+  %tmp5396 = getelementptr inbounds float* %tmp5395, i64 1
+  %tmp5397 = getelementptr inbounds float* %tmp5396, i64 1
+  %tmp5398 = getelementptr inbounds float* %tmp5397, i64 1
+  %tmp5399 = getelementptr inbounds float* %tmp5398, i64 1
+  %tmp5400 = getelementptr inbounds float* %tmp5399, i64 1
+  %tmp5401 = getelementptr inbounds float* %tmp5400, i64 1
+  %tmp5402 = getelementptr inbounds float* %tmp5401, i64 1
+  %tmp5403 = getelementptr inbounds float* %tmp5402, i64 1
+  %tmp5404 = getelementptr inbounds float* %tmp5403, i64 1
+  %tmp5405 = getelementptr inbounds float* %tmp5404, i64 1
+  %tmp5406 = getelementptr inbounds float* %tmp5405, i64 1
+  %tmp5407 = getelementptr inbounds float* %tmp5406, i64 1
+  %tmp5408 = getelementptr inbounds float* %tmp5407, i64 1
+  %tmp5409 = getelementptr inbounds float* %tmp5408, i64 1
+  %tmp5410 = getelementptr inbounds float* %tmp5409, i64 1
+  %tmp5411 = getelementptr inbounds float* %tmp5410, i64 1
+  %tmp5412 = getelementptr inbounds float* %tmp5411, i64 1
+  %tmp5413 = getelementptr inbounds float* %tmp5412, i64 1
+  %tmp5414 = getelementptr inbounds float* %tmp5413, i64 1
+  %tmp5415 = getelementptr inbounds float* %tmp5414, i64 1
+  %tmp5416 = getelementptr inbounds float* %tmp5415, i64 1
+  %tmp5417 = getelementptr inbounds float* %tmp5416, i64 1
+  %tmp5418 = getelementptr inbounds float* %tmp5417, i64 1
+  %tmp5419 = getelementptr inbounds float* %tmp5418, i64 1
+  %tmp5420 = getelementptr inbounds float* %tmp5419, i64 1
+  %tmp5421 = getelementptr inbounds float* %tmp5420, i64 1
+  %tmp5422 = getelementptr inbounds float* %tmp5421, i64 1
+  %tmp5423 = getelementptr inbounds float* %tmp5422, i64 1
+  %tmp5424 = getelementptr inbounds float* %tmp5423, i64 1
+  %tmp5425 = getelementptr inbounds float* %tmp5424, i64 1
+  %tmp5426 = getelementptr inbounds float* %tmp5425, i64 1
+  %tmp5427 = getelementptr inbounds float* %tmp5426, i64 1
+  %tmp5428 = getelementptr inbounds float* %tmp5427, i64 1
+  %tmp5429 = getelementptr inbounds float* %tmp5428, i64 1
+  %tmp5430 = getelementptr inbounds float* %tmp5429, i64 1
+  %tmp5431 = getelementptr inbounds float* %tmp5430, i64 1
+  %tmp5432 = getelementptr inbounds float* %tmp5431, i64 1
+  %tmp5433 = getelementptr inbounds float* %tmp5432, i64 1
+  %tmp5434 = getelementptr inbounds float* %tmp5433, i64 1
+  %tmp5435 = getelementptr inbounds float* %tmp5434, i64 1
+  %tmp5436 = getelementptr inbounds float* %tmp5435, i64 1
+  %tmp5437 = getelementptr inbounds float* %tmp5436, i64 1
+  %tmp5438 = getelementptr inbounds float* %tmp5437, i64 1
+  %tmp5439 = getelementptr inbounds float* %tmp5438, i64 1
+  %tmp5440 = getelementptr inbounds float* %tmp5439, i64 1
+  %tmp5441 = getelementptr inbounds float* %tmp5440, i64 1
+  %tmp5442 = getelementptr inbounds float* %tmp5441, i64 1
+  %tmp5443 = getelementptr inbounds float* %tmp5442, i64 1
+  %tmp5444 = getelementptr inbounds float* %tmp5443, i64 1
+  %tmp5445 = getelementptr inbounds float* %tmp5444, i64 1
+  %tmp5446 = getelementptr inbounds float* %tmp5445, i64 1
+  %tmp5447 = getelementptr inbounds float* %tmp5446, i64 1
+  %tmp5448 = getelementptr inbounds float* %tmp5447, i64 1
+  %tmp5449 = getelementptr inbounds float* %tmp5448, i64 1
+  %tmp5450 = getelementptr inbounds float* %tmp5449, i64 1
+  %tmp5451 = getelementptr inbounds float* %tmp5450, i64 1
+  %tmp5452 = getelementptr inbounds float* %tmp5451, i64 1
+  %tmp5453 = getelementptr inbounds float* %tmp5452, i64 1
+  %tmp5454 = getelementptr inbounds float* %tmp5453, i64 1
+  %tmp5455 = getelementptr inbounds float* %tmp5454, i64 1
+  %tmp5456 = getelementptr inbounds float* %tmp5455, i64 1
+  %tmp5457 = getelementptr inbounds float* %tmp5456, i64 1
+  %tmp5458 = getelementptr inbounds float* %tmp5457, i64 1
+  %tmp5459 = getelementptr inbounds float* %tmp5458, i64 1
+  %tmp5460 = getelementptr inbounds float* %tmp5459, i64 1
+  %tmp5461 = getelementptr inbounds float* %tmp5460, i64 1
+  %tmp5462 = getelementptr inbounds float* %tmp5461, i64 1
+  %tmp5463 = getelementptr inbounds float* %tmp5462, i64 1
+  %tmp5464 = getelementptr inbounds float* %tmp5463, i64 1
+  %tmp5465 = getelementptr inbounds float* %tmp5464, i64 1
+  %tmp5466 = getelementptr inbounds float* %tmp5465, i64 1
+  %tmp5467 = getelementptr inbounds float* %tmp5466, i64 1
+  %tmp5468 = getelementptr inbounds float* %tmp5467, i64 1
+  %tmp5469 = getelementptr inbounds float* %tmp5468, i64 1
+  %tmp5470 = getelementptr inbounds float* %tmp5469, i64 1
+  %tmp5471 = getelementptr inbounds float* %tmp5470, i64 1
+  %tmp5472 = getelementptr inbounds float* %tmp5471, i64 1
+  %tmp5473 = getelementptr inbounds float* %tmp5472, i64 1
+  %tmp5474 = getelementptr inbounds float* %tmp5473, i64 1
+  %tmp5475 = getelementptr inbounds float* %tmp5474, i64 1
+  %tmp5476 = getelementptr inbounds float* %tmp5475, i64 1
+  %tmp5477 = getelementptr inbounds float* %tmp5476, i64 1
+  %tmp5478 = getelementptr inbounds float* %tmp5477, i64 1
+  %tmp5479 = getelementptr inbounds float* %tmp5478, i64 1
+  %tmp5480 = getelementptr inbounds float* %tmp5479, i64 1
+  %tmp5481 = getelementptr inbounds float* %tmp5480, i64 1
+  %tmp5482 = getelementptr inbounds float* %tmp5481, i64 1
+  %tmp5483 = getelementptr inbounds float* %tmp5482, i64 1
+  %tmp5484 = getelementptr inbounds float* %tmp5483, i64 1
+  %tmp5485 = getelementptr inbounds float* %tmp5484, i64 1
+  %tmp5486 = getelementptr inbounds float* %tmp5485, i64 1
+  %tmp5487 = getelementptr inbounds float* %tmp5486, i64 1
+  %tmp5488 = getelementptr inbounds float* %tmp5487, i64 1
+  %tmp5489 = getelementptr inbounds float* %tmp5488, i64 1
+  %tmp5490 = getelementptr inbounds float* %tmp5489, i64 1
+  %tmp5491 = getelementptr inbounds float* %tmp5490, i64 1
+  %tmp5492 = getelementptr inbounds float* %tmp5491, i64 1
+  %tmp5493 = getelementptr inbounds float* %tmp5492, i64 1
+  %tmp5494 = getelementptr inbounds float* %tmp5493, i64 1
+  %tmp5495 = getelementptr inbounds float* %tmp5494, i64 1
+  %tmp5496 = getelementptr inbounds float* %tmp5495, i64 1
+  %tmp5497 = getelementptr inbounds float* %tmp5496, i64 1
+  %tmp5498 = getelementptr inbounds float* %tmp5497, i64 1
+  %tmp5499 = getelementptr inbounds float* %tmp5498, i64 1
+  %tmp5500 = getelementptr inbounds float* %tmp5499, i64 1
+  %tmp5501 = getelementptr inbounds float* %tmp5500, i64 1
+  %tmp5502 = getelementptr inbounds float* %tmp5501, i64 1
+  %tmp5503 = getelementptr inbounds float* %tmp5502, i64 1
+  %tmp5504 = getelementptr inbounds float* %tmp5503, i64 1
+  %tmp5505 = getelementptr inbounds float* %tmp5504, i64 1
+  %tmp5506 = getelementptr inbounds float* %tmp5505, i64 1
+  %tmp5507 = getelementptr inbounds float* %tmp5506, i64 1
+  %tmp5508 = getelementptr inbounds float* %tmp5507, i64 1
+  %tmp5509 = getelementptr inbounds float* %tmp5508, i64 1
+  %tmp5510 = getelementptr inbounds float* %tmp5509, i64 1
+  %tmp5511 = getelementptr inbounds float* %tmp5510, i64 1
+  %tmp5512 = getelementptr inbounds float* %tmp5511, i64 1
+  %tmp5513 = getelementptr inbounds float* %tmp5512, i64 1
+  %tmp5514 = getelementptr inbounds float* %tmp5513, i64 1
+  %tmp5515 = getelementptr inbounds float* %tmp5514, i64 1
+  %tmp5516 = getelementptr inbounds float* %tmp5515, i64 1
+  %tmp5517 = getelementptr inbounds float* %tmp5516, i64 1
+  %tmp5518 = getelementptr inbounds float* %tmp5517, i64 1
+  %tmp5519 = getelementptr inbounds float* %tmp5518, i64 1
+  %tmp5520 = getelementptr inbounds float* %tmp5519, i64 1
+  %tmp5521 = getelementptr inbounds float* %tmp5520, i64 1
+  %tmp5522 = getelementptr inbounds float* %tmp5521, i64 1
+  %tmp5523 = getelementptr inbounds float* %tmp5522, i64 1
+  %tmp5524 = getelementptr inbounds float* %tmp5523, i64 1
+  %tmp5525 = getelementptr inbounds float* %tmp5524, i64 1
+  %tmp5526 = getelementptr inbounds float* %tmp5525, i64 1
+  %tmp5527 = getelementptr inbounds float* %tmp5526, i64 1
+  %tmp5528 = getelementptr inbounds float* %tmp5527, i64 1
+  %tmp5529 = getelementptr inbounds float* %tmp5528, i64 1
+  %tmp5530 = getelementptr inbounds float* %tmp5529, i64 1
+  %tmp5531 = getelementptr inbounds float* %tmp5530, i64 1
+  %tmp5532 = getelementptr inbounds float* %tmp5531, i64 1
+  %tmp5533 = getelementptr inbounds float* %tmp5532, i64 1
+  %tmp5534 = getelementptr inbounds float* %tmp5533, i64 1
+  %tmp5535 = getelementptr inbounds float* %tmp5534, i64 1
+  %tmp5536 = getelementptr inbounds float* %tmp5535, i64 1
+  %tmp5537 = getelementptr inbounds float* %tmp5536, i64 1
+  %tmp5538 = getelementptr inbounds float* %tmp5537, i64 1
+  %tmp5539 = getelementptr inbounds float* %tmp5538, i64 1
+  %tmp5540 = getelementptr inbounds float* %tmp5539, i64 1
+  %tmp5541 = getelementptr inbounds float* %tmp5540, i64 1
+  %tmp5542 = getelementptr inbounds float* %tmp5541, i64 1
+  %tmp5543 = getelementptr inbounds float* %tmp5542, i64 1
+  %tmp5544 = getelementptr inbounds float* %tmp5543, i64 1
+  %tmp5545 = getelementptr inbounds float* %tmp5544, i64 1
+  %tmp5546 = getelementptr inbounds float* %tmp5545, i64 1
+  %tmp5547 = getelementptr inbounds float* %tmp5546, i64 1
+  %tmp5548 = getelementptr inbounds float* %tmp5547, i64 1
+  %tmp5549 = getelementptr inbounds float* %tmp5548, i64 1
+  %tmp5550 = getelementptr inbounds float* %tmp5549, i64 1
+  %tmp5551 = getelementptr inbounds float* %tmp5550, i64 1
+  %tmp5552 = getelementptr inbounds float* %tmp5551, i64 1
+  %tmp5553 = getelementptr inbounds float* %tmp5552, i64 1
+  %tmp5554 = getelementptr inbounds float* %tmp5553, i64 1
+  %tmp5555 = getelementptr inbounds float* %tmp5554, i64 1
+  %tmp5556 = getelementptr inbounds float* %tmp5555, i64 1
+  %tmp5557 = getelementptr inbounds float* %tmp5556, i64 1
+  %tmp5558 = getelementptr inbounds float* %tmp5557, i64 1
+  %tmp5559 = getelementptr inbounds float* %tmp5558, i64 1
+  %tmp5560 = getelementptr inbounds float* %tmp5559, i64 1
+  %tmp5561 = getelementptr inbounds float* %tmp5560, i64 1
+  %tmp5562 = getelementptr inbounds float* %tmp5561, i64 1
+  %tmp5563 = getelementptr inbounds float* %tmp5562, i64 1
+  %tmp5564 = getelementptr inbounds float* %tmp5563, i64 1
+  %tmp5565 = getelementptr inbounds float* %tmp5564, i64 1
+  %tmp5566 = getelementptr inbounds float* %tmp5565, i64 1
+  %tmp5567 = getelementptr inbounds float* %tmp5566, i64 1
+  %tmp5568 = getelementptr inbounds float* %tmp5567, i64 1
+  %tmp5569 = getelementptr inbounds float* %tmp5568, i64 1
+  %tmp5570 = getelementptr inbounds float* %tmp5569, i64 1
+  %tmp5571 = getelementptr inbounds float* %tmp5570, i64 1
+  %tmp5572 = getelementptr inbounds float* %tmp5571, i64 1
+  %tmp5573 = getelementptr inbounds float* %tmp5572, i64 1
+  %tmp5574 = getelementptr inbounds float* %tmp5573, i64 1
+  %tmp5575 = getelementptr inbounds float* %tmp5574, i64 1
+  %tmp5576 = getelementptr inbounds float* %tmp5575, i64 1
+  %tmp5577 = getelementptr inbounds float* %tmp5576, i64 1
+  %tmp5578 = getelementptr inbounds float* %tmp5577, i64 1
+  %tmp5579 = getelementptr inbounds float* %tmp5578, i64 1
+  %tmp5580 = getelementptr inbounds float* %tmp5579, i64 1
+  %tmp5581 = getelementptr inbounds float* %tmp5580, i64 1
+  %tmp5582 = getelementptr inbounds float* %tmp5581, i64 1
+  %tmp5583 = getelementptr inbounds float* %tmp5582, i64 1
+  %tmp5584 = getelementptr inbounds float* %tmp5583, i64 1
+  %tmp5585 = getelementptr inbounds float* %tmp5584, i64 1
+  %tmp5586 = getelementptr inbounds float* %tmp5585, i64 1
+  %tmp5587 = getelementptr inbounds float* %tmp5586, i64 1
+  %tmp5588 = getelementptr inbounds float* %tmp5587, i64 1
+  %tmp5589 = getelementptr inbounds float* %tmp5588, i64 1
+  %tmp5590 = getelementptr inbounds float* %tmp5589, i64 1
+  %tmp5591 = getelementptr inbounds float* %tmp5590, i64 1
+  %tmp5592 = getelementptr inbounds float* %tmp5591, i64 1
+  %tmp5593 = getelementptr inbounds float* %tmp5592, i64 1
+  %tmp5594 = getelementptr inbounds float* %tmp5593, i64 1
+  %tmp5595 = getelementptr inbounds float* %tmp5594, i64 1
+  %tmp5596 = getelementptr inbounds float* %tmp5595, i64 1
+  %tmp5597 = getelementptr inbounds float* %tmp5596, i64 1
+  %tmp5598 = getelementptr inbounds float* %tmp5597, i64 1
+  %tmp5599 = getelementptr inbounds float* %tmp5598, i64 1
+  %tmp5600 = getelementptr inbounds float* %tmp5599, i64 1
+  %tmp5601 = getelementptr inbounds float* %tmp5600, i64 1
+  %tmp5602 = getelementptr inbounds float* %tmp5601, i64 1
+  %tmp5603 = getelementptr inbounds float* %tmp5602, i64 1
+  %tmp5604 = getelementptr inbounds float* %tmp5603, i64 1
+  %tmp5605 = getelementptr inbounds float* %tmp5604, i64 1
+  %tmp5606 = getelementptr inbounds float* %tmp5605, i64 1
+  %tmp5607 = getelementptr inbounds float* %tmp5606, i64 1
+  %tmp5608 = getelementptr inbounds float* %tmp5607, i64 1
+  %tmp5609 = getelementptr inbounds float* %tmp5608, i64 1
+  %tmp5610 = getelementptr inbounds float* %tmp5609, i64 1
+  %tmp5611 = getelementptr inbounds float* %tmp5610, i64 1
+  %tmp5612 = getelementptr inbounds float* %tmp5611, i64 1
+  %tmp5613 = getelementptr inbounds float* %tmp5612, i64 1
+  %tmp5614 = getelementptr inbounds float* %tmp5613, i64 1
+  %tmp5615 = getelementptr inbounds float* %tmp5614, i64 1
+  %tmp5616 = getelementptr inbounds float* %tmp5615, i64 1
+  %tmp5617 = getelementptr inbounds float* %tmp5616, i64 1
+  %tmp5618 = getelementptr inbounds float* %tmp5617, i64 1
+  %tmp5619 = getelementptr inbounds float* %tmp5618, i64 1
+  %tmp5620 = getelementptr inbounds float* %tmp5619, i64 1
+  %tmp5621 = getelementptr inbounds float* %tmp5620, i64 1
+  %tmp5622 = getelementptr inbounds float* %tmp5621, i64 1
+  %tmp5623 = getelementptr inbounds float* %tmp5622, i64 1
+  %tmp5624 = getelementptr inbounds float* %tmp5623, i64 1
+  %tmp5625 = getelementptr inbounds float* %tmp5624, i64 1
+  %tmp5626 = getelementptr inbounds float* %tmp5625, i64 1
+  %tmp5627 = getelementptr inbounds float* %tmp5626, i64 1
+  %tmp5628 = getelementptr inbounds float* %tmp5627, i64 1
+  %tmp5629 = getelementptr inbounds float* %tmp5628, i64 1
+  %tmp5630 = getelementptr inbounds float* %tmp5629, i64 1
+  %tmp5631 = getelementptr inbounds float* %tmp5630, i64 1
+  %tmp5632 = getelementptr inbounds float* %tmp5631, i64 1
+  %tmp5633 = getelementptr inbounds float* %tmp5632, i64 1
+  %tmp5634 = getelementptr inbounds float* %tmp5633, i64 1
+  %tmp5635 = getelementptr inbounds float* %tmp5634, i64 1
+  %tmp5636 = getelementptr inbounds float* %tmp5635, i64 1
+  %tmp5637 = getelementptr inbounds float* %tmp5636, i64 1
+  %tmp5638 = getelementptr inbounds float* %tmp5637, i64 1
+  %tmp5639 = getelementptr inbounds float* %tmp5638, i64 1
+  %tmp5640 = getelementptr inbounds float* %tmp5639, i64 1
+  %tmp5641 = getelementptr inbounds float* %tmp5640, i64 1
+  %tmp5642 = getelementptr inbounds float* %tmp5641, i64 1
+  %tmp5643 = getelementptr inbounds float* %tmp5642, i64 1
+  %tmp5644 = getelementptr inbounds float* %tmp5643, i64 1
+  %tmp5645 = getelementptr inbounds float* %tmp5644, i64 1
+  %tmp5646 = getelementptr inbounds float* %tmp5645, i64 1
+  %tmp5647 = getelementptr inbounds float* %tmp5646, i64 1
+  %tmp5648 = getelementptr inbounds float* %tmp5647, i64 1
+  %tmp5649 = getelementptr inbounds float* %tmp5648, i64 1
+  %tmp5650 = getelementptr inbounds float* %tmp5649, i64 1
+  %tmp5651 = getelementptr inbounds float* %tmp5650, i64 1
+  %tmp5652 = getelementptr inbounds float* %tmp5651, i64 1
+  %tmp5653 = getelementptr inbounds float* %tmp5652, i64 1
+  %tmp5654 = getelementptr inbounds float* %tmp5653, i64 1
+  %tmp5655 = getelementptr inbounds float* %tmp5654, i64 1
+  %tmp5656 = getelementptr inbounds float* %tmp5655, i64 1
+  %tmp5657 = getelementptr inbounds float* %tmp5656, i64 1
+  %tmp5658 = getelementptr inbounds float* %tmp5657, i64 1
+  %tmp5659 = getelementptr inbounds float* %tmp5658, i64 1
+  %tmp5660 = getelementptr inbounds float* %tmp5659, i64 1
+  %tmp5661 = getelementptr inbounds float* %tmp5660, i64 1
+  %tmp5662 = getelementptr inbounds float* %tmp5661, i64 1
+  %tmp5663 = getelementptr inbounds float* %tmp5662, i64 1
+  %tmp5664 = getelementptr inbounds float* %tmp5663, i64 1
+  %tmp5665 = getelementptr inbounds float* %tmp5664, i64 1
+  %tmp5666 = getelementptr inbounds float* %tmp5665, i64 1
+  %tmp5667 = getelementptr inbounds float* %tmp5666, i64 1
+  %tmp5668 = getelementptr inbounds float* %tmp5667, i64 1
+  %tmp5669 = getelementptr inbounds float* %tmp5668, i64 1
+  %tmp5670 = getelementptr inbounds float* %tmp5669, i64 1
+  %tmp5671 = getelementptr inbounds float* %tmp5670, i64 1
+  %tmp5672 = getelementptr inbounds float* %tmp5671, i64 1
+  %tmp5673 = getelementptr inbounds float* %tmp5672, i64 1
+  %tmp5674 = getelementptr inbounds float* %tmp5673, i64 1
+  %tmp5675 = getelementptr inbounds float* %tmp5674, i64 1
+  %tmp5676 = getelementptr inbounds float* %tmp5675, i64 1
+  %tmp5677 = getelementptr inbounds float* %tmp5676, i64 1
+  %tmp5678 = getelementptr inbounds float* %tmp5677, i64 1
+  %tmp5679 = getelementptr inbounds float* %tmp5678, i64 1
+  %tmp5680 = getelementptr inbounds float* %tmp5679, i64 1
+  %tmp5681 = getelementptr inbounds float* %tmp5680, i64 1
+  %tmp5682 = getelementptr inbounds float* %tmp5681, i64 1
+  %tmp5683 = getelementptr inbounds float* %tmp5682, i64 1
+  %tmp5684 = getelementptr inbounds float* %tmp5683, i64 1
+  %tmp5685 = getelementptr inbounds float* %tmp5684, i64 1
+  %tmp5686 = getelementptr inbounds float* %tmp5685, i64 1
+  %tmp5687 = getelementptr inbounds float* %tmp5686, i64 1
+  %tmp5688 = getelementptr inbounds float* %tmp5687, i64 1
+  %tmp5689 = getelementptr inbounds float* %tmp5688, i64 1
+  %tmp5690 = getelementptr inbounds float* %tmp5689, i64 1
+  %tmp5691 = getelementptr inbounds float* %tmp5690, i64 1
+  %tmp5692 = getelementptr inbounds float* %tmp5691, i64 1
+  %tmp5693 = getelementptr inbounds float* %tmp5692, i64 1
+  %tmp5694 = getelementptr inbounds float* %tmp5693, i64 1
+  %tmp5695 = getelementptr inbounds float* %tmp5694, i64 1
+  %tmp5696 = getelementptr inbounds float* %tmp5695, i64 1
+  %tmp5697 = getelementptr inbounds float* %tmp5696, i64 1
+  %tmp5698 = getelementptr inbounds float* %tmp5697, i64 1
+  %tmp5699 = getelementptr inbounds float* %tmp5698, i64 1
+  %tmp5700 = getelementptr inbounds float* %tmp5699, i64 1
+  %tmp5701 = getelementptr inbounds float* %tmp5700, i64 1
+  %tmp5702 = getelementptr inbounds float* %tmp5701, i64 1
+  %tmp5703 = getelementptr inbounds float* %tmp5702, i64 1
+  %tmp5704 = getelementptr inbounds float* %tmp5703, i64 1
+  %tmp5705 = getelementptr inbounds float* %tmp5704, i64 1
+  %tmp5706 = getelementptr inbounds float* %tmp5705, i64 1
+  %tmp5707 = getelementptr inbounds float* %tmp5706, i64 1
+  %tmp5708 = getelementptr inbounds float* %tmp5707, i64 1
+  %tmp5709 = getelementptr inbounds float* %tmp5708, i64 1
+  %tmp5710 = getelementptr inbounds float* %tmp5709, i64 1
+  %tmp5711 = getelementptr inbounds float* %tmp5710, i64 1
+  %tmp5712 = getelementptr inbounds float* %tmp5711, i64 1
+  %tmp5713 = getelementptr inbounds float* %tmp5712, i64 1
+  %tmp5714 = getelementptr inbounds float* %tmp5713, i64 1
+  %tmp5715 = getelementptr inbounds float* %tmp5714, i64 1
+  %tmp5716 = getelementptr inbounds float* %tmp5715, i64 1
+  %tmp5717 = getelementptr inbounds float* %tmp5716, i64 1
+  %tmp5718 = getelementptr inbounds float* %tmp5717, i64 1
+  %tmp5719 = getelementptr inbounds float* %tmp5718, i64 1
+  %tmp5720 = getelementptr inbounds float* %tmp5719, i64 1
+  %tmp5721 = getelementptr inbounds float* %tmp5720, i64 1
+  %tmp5722 = getelementptr inbounds float* %tmp5721, i64 1
+  %tmp5723 = getelementptr inbounds float* %tmp5722, i64 1
+  %tmp5724 = getelementptr inbounds float* %tmp5723, i64 1
+  %tmp5725 = getelementptr inbounds float* %tmp5724, i64 1
+  %tmp5726 = getelementptr inbounds float* %tmp5725, i64 1
+  %tmp5727 = getelementptr inbounds float* %tmp5726, i64 1
+  %tmp5728 = getelementptr inbounds float* %tmp5727, i64 1
+  %tmp5729 = getelementptr inbounds float* %tmp5728, i64 1
+  %tmp5730 = getelementptr inbounds float* %tmp5729, i64 1
+  %tmp5731 = getelementptr inbounds float* %tmp5730, i64 1
+  %tmp5732 = getelementptr inbounds float* %tmp5731, i64 1
+  %tmp5733 = getelementptr inbounds float* %tmp5732, i64 1
+  %tmp5734 = getelementptr inbounds float* %tmp5733, i64 1
+  %tmp5735 = getelementptr inbounds float* %tmp5734, i64 1
+  %tmp5736 = getelementptr inbounds float* %tmp5735, i64 1
+  %tmp5737 = getelementptr inbounds float* %tmp5736, i64 1
+  %tmp5738 = getelementptr inbounds float* %tmp5737, i64 1
+  %tmp5739 = getelementptr inbounds float* %tmp5738, i64 1
+  %tmp5740 = getelementptr inbounds float* %tmp5739, i64 1
+  %tmp5741 = getelementptr inbounds float* %tmp5740, i64 1
+  %tmp5742 = getelementptr inbounds float* %tmp5741, i64 1
+  %tmp5743 = getelementptr inbounds float* %tmp5742, i64 1
+  %tmp5744 = getelementptr inbounds float* %tmp5743, i64 1
+  %tmp5745 = getelementptr inbounds float* %tmp5744, i64 1
+  %tmp5746 = getelementptr inbounds float* %tmp5745, i64 1
+  %tmp5747 = getelementptr inbounds float* %tmp5746, i64 1
+  %tmp5748 = getelementptr inbounds float* %tmp5747, i64 1
+  %tmp5749 = getelementptr inbounds float* %tmp5748, i64 1
+  %tmp5750 = getelementptr inbounds float* %tmp5749, i64 1
+  %tmp5751 = getelementptr inbounds float* %tmp5750, i64 1
+  %tmp5752 = getelementptr inbounds float* %tmp5751, i64 1
+  %tmp5753 = getelementptr inbounds float* %tmp5752, i64 1
+  %tmp5754 = getelementptr inbounds float* %tmp5753, i64 1
+  %tmp5755 = getelementptr inbounds float* %tmp5754, i64 1
+  %tmp5756 = getelementptr inbounds float* %tmp5755, i64 1
+  %tmp5757 = getelementptr inbounds float* %tmp5756, i64 1
+  %tmp5758 = getelementptr inbounds float* %tmp5757, i64 1
+  %tmp5759 = getelementptr inbounds float* %tmp5758, i64 1
+  %tmp5760 = getelementptr inbounds float* %tmp5759, i64 1
+  %tmp5761 = getelementptr inbounds float* %tmp5760, i64 1
+  %tmp5762 = getelementptr inbounds float* %tmp5761, i64 1
+  %tmp5763 = getelementptr inbounds float* %tmp5762, i64 1
+  %tmp5764 = getelementptr inbounds float* %tmp5763, i64 1
+  %tmp5765 = getelementptr inbounds float* %tmp5764, i64 1
+  %tmp5766 = getelementptr inbounds float* %tmp5765, i64 1
+  %tmp5767 = getelementptr inbounds float* %tmp5766, i64 1
+  %tmp5768 = getelementptr inbounds float* %tmp5767, i64 1
+  %tmp5769 = getelementptr inbounds float* %tmp5768, i64 1
+  %tmp5770 = getelementptr inbounds float* %tmp5769, i64 1
+  %tmp5771 = getelementptr inbounds float* %tmp5770, i64 1
+  %tmp5772 = getelementptr inbounds float* %tmp5771, i64 1
+  %tmp5773 = getelementptr inbounds float* %tmp5772, i64 1
+  %tmp5774 = getelementptr inbounds float* %tmp5773, i64 1
+  %tmp5775 = getelementptr inbounds float* %tmp5774, i64 1
+  %tmp5776 = getelementptr inbounds float* %tmp5775, i64 1
+  %tmp5777 = getelementptr inbounds float* %tmp5776, i64 1
+  %tmp5778 = getelementptr inbounds float* %tmp5777, i64 1
+  %tmp5779 = getelementptr inbounds float* %tmp5778, i64 1
+  %tmp5780 = getelementptr inbounds float* %tmp5779, i64 1
+  %tmp5781 = getelementptr inbounds float* %tmp5780, i64 1
+  %tmp5782 = getelementptr inbounds float* %tmp5781, i64 1
+  %tmp5783 = getelementptr inbounds float* %tmp5782, i64 1
+  %tmp5784 = getelementptr inbounds float* %tmp5783, i64 1
+  %tmp5785 = getelementptr inbounds float* %tmp5784, i64 1
+  %tmp5786 = getelementptr inbounds float* %tmp5785, i64 1
+  %tmp5787 = getelementptr inbounds float* %tmp5786, i64 1
+  %tmp5788 = getelementptr inbounds float* %tmp5787, i64 1
+  %tmp5789 = getelementptr inbounds float* %tmp5788, i64 1
+  %tmp5790 = getelementptr inbounds float* %tmp5789, i64 1
+  %tmp5791 = getelementptr inbounds float* %tmp5790, i64 1
+  %tmp5792 = getelementptr inbounds float* %tmp5791, i64 1
+  %tmp5793 = getelementptr inbounds float* %tmp5792, i64 1
+  %tmp5794 = getelementptr inbounds float* %tmp5793, i64 1
+  %tmp5795 = getelementptr inbounds float* %tmp5794, i64 1
+  %tmp5796 = getelementptr inbounds float* %tmp5795, i64 1
+  %tmp5797 = getelementptr inbounds float* %tmp5796, i64 1
+  %tmp5798 = getelementptr inbounds float* %tmp5797, i64 1
+  %tmp5799 = getelementptr inbounds float* %tmp5798, i64 1
+  %tmp5800 = getelementptr inbounds float* %tmp5799, i64 1
+  %tmp5801 = getelementptr inbounds float* %tmp5800, i64 1
+  %tmp5802 = getelementptr inbounds float* %tmp5801, i64 1
+  %tmp5803 = getelementptr inbounds float* %tmp5802, i64 1
+  %tmp5804 = getelementptr inbounds float* %tmp5803, i64 1
+  %tmp5805 = getelementptr inbounds float* %tmp5804, i64 1
+  %tmp5806 = getelementptr inbounds float* %tmp5805, i64 1
+  %tmp5807 = getelementptr inbounds float* %tmp5806, i64 1
+  %tmp5808 = getelementptr inbounds float* %tmp5807, i64 1
+  %tmp5809 = getelementptr inbounds float* %tmp5808, i64 1
+  %tmp5810 = getelementptr inbounds float* %tmp5809, i64 1
+  %tmp5811 = getelementptr inbounds float* %tmp5810, i64 1
+  %tmp5812 = getelementptr inbounds float* %tmp5811, i64 1
+  %tmp5813 = getelementptr inbounds float* %tmp5812, i64 1
+  %tmp5814 = getelementptr inbounds float* %tmp5813, i64 1
+  %tmp5815 = getelementptr inbounds float* %tmp5814, i64 1
+  %tmp5816 = getelementptr inbounds float* %tmp5815, i64 1
+  %tmp5817 = getelementptr inbounds float* %tmp5816, i64 1
+  %tmp5818 = getelementptr inbounds float* %tmp5817, i64 1
+  %tmp5819 = getelementptr inbounds float* %tmp5818, i64 1
+  %tmp5820 = getelementptr inbounds float* %tmp5819, i64 1
+  %tmp5821 = getelementptr inbounds float* %tmp5820, i64 1
+  %tmp5822 = getelementptr inbounds float* %tmp5821, i64 1
+  %tmp5823 = getelementptr inbounds float* %tmp5822, i64 1
+  %tmp5824 = getelementptr inbounds float* %tmp5823, i64 1
+  %tmp5825 = getelementptr inbounds float* %tmp5824, i64 1
+  %tmp5826 = getelementptr inbounds float* %tmp5825, i64 1
+  %tmp5827 = getelementptr inbounds float* %tmp5826, i64 1
+  %tmp5828 = getelementptr inbounds float* %tmp5827, i64 1
+  %tmp5829 = getelementptr inbounds float* %tmp5828, i64 1
+  %tmp5830 = getelementptr inbounds float* %tmp5829, i64 1
+  %tmp5831 = getelementptr inbounds float* %tmp5830, i64 1
+  %tmp5832 = getelementptr inbounds float* %tmp5831, i64 1
+  %tmp5833 = getelementptr inbounds float* %tmp5832, i64 1
+  %tmp5834 = getelementptr inbounds float* %tmp5833, i64 1
+  %tmp5835 = getelementptr inbounds float* %tmp5834, i64 1
+  %tmp5836 = getelementptr inbounds float* %tmp5835, i64 1
+  %tmp5837 = getelementptr inbounds float* %tmp5836, i64 1
+  %tmp5838 = getelementptr inbounds float* %tmp5837, i64 1
+  %tmp5839 = getelementptr inbounds float* %tmp5838, i64 1
+  %tmp5840 = getelementptr inbounds float* %tmp5839, i64 1
+  %tmp5841 = getelementptr inbounds float* %tmp5840, i64 1
+  %tmp5842 = getelementptr inbounds float* %tmp5841, i64 1
+  %tmp5843 = getelementptr inbounds float* %tmp5842, i64 1
+  %tmp5844 = getelementptr inbounds float* %tmp5843, i64 1
+  %tmp5845 = getelementptr inbounds float* %tmp5844, i64 1
+  %tmp5846 = getelementptr inbounds float* %tmp5845, i64 1
+  %tmp5847 = getelementptr inbounds float* %tmp5846, i64 1
+  %tmp5848 = getelementptr inbounds float* %tmp5847, i64 1
+  %tmp5849 = getelementptr inbounds float* %tmp5848, i64 1
+  %tmp5850 = getelementptr inbounds float* %tmp5849, i64 1
+  %tmp5851 = getelementptr inbounds float* %tmp5850, i64 1
+  %tmp5852 = getelementptr inbounds float* %tmp5851, i64 1
+  %tmp5853 = getelementptr inbounds float* %tmp5852, i64 1
+  %tmp5854 = getelementptr inbounds float* %tmp5853, i64 1
+  %tmp5855 = getelementptr inbounds float* %tmp5854, i64 1
+  %tmp5856 = getelementptr inbounds float* %tmp5855, i64 1
+  %tmp5857 = getelementptr inbounds float* %tmp5856, i64 1
+  %tmp5858 = getelementptr inbounds float* %tmp5857, i64 1
+  %tmp5859 = getelementptr inbounds float* %tmp5858, i64 1
+  %tmp5860 = getelementptr inbounds float* %tmp5859, i64 1
+  %tmp5861 = getelementptr inbounds float* %tmp5860, i64 1
+  %tmp5862 = getelementptr inbounds float* %tmp5861, i64 1
+  %tmp5863 = getelementptr inbounds float* %tmp5862, i64 1
+  %tmp5864 = getelementptr inbounds float* %tmp5863, i64 1
+  %tmp5865 = getelementptr inbounds float* %tmp5864, i64 1
+  %tmp5866 = getelementptr inbounds float* %tmp5865, i64 1
+  %tmp5867 = getelementptr inbounds float* %tmp5866, i64 1
+  %tmp5868 = getelementptr inbounds float* %tmp5867, i64 1
+  %tmp5869 = getelementptr inbounds float* %tmp5868, i64 1
+  %tmp5870 = getelementptr inbounds float* %tmp5869, i64 1
+  %tmp5871 = getelementptr inbounds float* %tmp5870, i64 1
+  %tmp5872 = getelementptr inbounds float* %tmp5871, i64 1
+  %tmp5873 = getelementptr inbounds float* %tmp5872, i64 1
+  %tmp5874 = getelementptr inbounds float* %tmp5873, i64 1
+  %tmp5875 = getelementptr inbounds float* %tmp5874, i64 1
+  %tmp5876 = getelementptr inbounds float* %tmp5875, i64 1
+  %tmp5877 = getelementptr inbounds float* %tmp5876, i64 1
+  %tmp5878 = getelementptr inbounds float* %tmp5877, i64 1
+  %tmp5879 = getelementptr inbounds float* %tmp5878, i64 1
+  %tmp5880 = getelementptr inbounds float* %tmp5879, i64 1
+  %tmp5881 = getelementptr inbounds float* %tmp5880, i64 1
+  %tmp5882 = getelementptr inbounds float* %tmp5881, i64 1
+  %tmp5883 = getelementptr inbounds float* %tmp5882, i64 1
+  %tmp5884 = getelementptr inbounds float* %tmp5883, i64 1
+  %tmp5885 = getelementptr inbounds float* %tmp5884, i64 1
+  %tmp5886 = getelementptr inbounds float* %tmp5885, i64 1
+  %tmp5887 = getelementptr inbounds float* %tmp5886, i64 1
+  %tmp5888 = getelementptr inbounds float* %tmp5887, i64 1
+  %tmp5889 = getelementptr inbounds float* %tmp5888, i64 1
+  %tmp5890 = getelementptr inbounds float* %tmp5889, i64 1
+  %tmp5891 = getelementptr inbounds float* %tmp5890, i64 1
+  %tmp5892 = getelementptr inbounds float* %tmp5891, i64 1
+  %tmp5893 = getelementptr inbounds float* %tmp5892, i64 1
+  %tmp5894 = getelementptr inbounds float* %tmp5893, i64 1
+  %tmp5895 = getelementptr inbounds float* %tmp5894, i64 1
+  %tmp5896 = getelementptr inbounds float* %tmp5895, i64 1
+  %tmp5897 = getelementptr inbounds float* %tmp5896, i64 1
+  %tmp5898 = getelementptr inbounds float* %tmp5897, i64 1
+  %tmp5899 = getelementptr inbounds float* %tmp5898, i64 1
+  %tmp5900 = getelementptr inbounds float* %tmp5899, i64 1
+  %tmp5901 = getelementptr inbounds float* %tmp5900, i64 1
+  %tmp5902 = getelementptr inbounds float* %tmp5901, i64 1
+  %tmp5903 = getelementptr inbounds float* %tmp5902, i64 1
+  %tmp5904 = getelementptr inbounds float* %tmp5903, i64 1
+  %tmp5905 = getelementptr inbounds float* %tmp5904, i64 1
+  %tmp5906 = getelementptr inbounds float* %tmp5905, i64 1
+  %tmp5907 = getelementptr inbounds float* %tmp5906, i64 1
+  %tmp5908 = getelementptr inbounds float* %tmp5907, i64 1
+  %tmp5909 = getelementptr inbounds float* %tmp5908, i64 1
+  %tmp5910 = getelementptr inbounds float* %tmp5909, i64 1
+  %tmp5911 = getelementptr inbounds float* %tmp5910, i64 1
+  %tmp5912 = getelementptr inbounds float* %tmp5911, i64 1
+  %tmp5913 = getelementptr inbounds float* %tmp5912, i64 1
+  %tmp5914 = getelementptr inbounds float* %tmp5913, i64 1
+  %tmp5915 = getelementptr inbounds float* %tmp5914, i64 1
+  %tmp5916 = getelementptr inbounds float* %tmp5915, i64 1
+  %tmp5917 = getelementptr inbounds float* %tmp5916, i64 1
+  %tmp5918 = getelementptr inbounds float* %tmp5917, i64 1
+  %tmp5919 = getelementptr inbounds float* %tmp5918, i64 1
+  %tmp5920 = getelementptr inbounds float* %tmp5919, i64 1
+  %tmp5921 = getelementptr inbounds float* %tmp5920, i64 1
+  %tmp5922 = getelementptr inbounds float* %tmp5921, i64 1
+  %tmp5923 = getelementptr inbounds float* %tmp5922, i64 1
+  %tmp5924 = getelementptr inbounds float* %tmp5923, i64 1
+  %tmp5925 = getelementptr inbounds float* %tmp5924, i64 1
+  %tmp5926 = getelementptr inbounds float* %tmp5925, i64 1
+  %tmp5927 = getelementptr inbounds float* %tmp5926, i64 1
+  %tmp5928 = getelementptr inbounds float* %tmp5927, i64 1
+  %tmp5929 = getelementptr inbounds float* %tmp5928, i64 1
+  %tmp5930 = getelementptr inbounds float* %tmp5929, i64 1
+  %tmp5931 = getelementptr inbounds float* %tmp5930, i64 1
+  %tmp5932 = getelementptr inbounds float* %tmp5931, i64 1
+  %tmp5933 = getelementptr inbounds float* %tmp5932, i64 1
+  %tmp5934 = getelementptr inbounds float* %tmp5933, i64 1
+  %tmp5935 = getelementptr inbounds float* %tmp5934, i64 1
+  %tmp5936 = getelementptr inbounds float* %tmp5935, i64 1
+  %tmp5937 = getelementptr inbounds float* %tmp5936, i64 1
+  %tmp5938 = getelementptr inbounds float* %tmp5937, i64 1
+  %tmp5939 = getelementptr inbounds float* %tmp5938, i64 1
+  %tmp5940 = getelementptr inbounds float* %tmp5939, i64 1
+  %tmp5941 = getelementptr inbounds float* %tmp5940, i64 1
+  %tmp5942 = getelementptr inbounds float* %tmp5941, i64 1
+  %tmp5943 = getelementptr inbounds float* %tmp5942, i64 1
+  %tmp5944 = getelementptr inbounds float* %tmp5943, i64 1
+  %tmp5945 = getelementptr inbounds float* %tmp5944, i64 1
+  %tmp5946 = getelementptr inbounds float* %tmp5945, i64 1
+  %tmp5947 = getelementptr inbounds float* %tmp5946, i64 1
+  %tmp5948 = getelementptr inbounds float* %tmp5947, i64 1
+  %tmp5949 = getelementptr inbounds float* %tmp5948, i64 1
+  %tmp5950 = getelementptr inbounds float* %tmp5949, i64 1
+  %tmp5951 = getelementptr inbounds float* %tmp5950, i64 1
+  %tmp5952 = getelementptr inbounds float* %tmp5951, i64 1
+  %tmp5953 = getelementptr inbounds float* %tmp5952, i64 1
+  %tmp5954 = getelementptr inbounds float* %tmp5953, i64 1
+  %tmp5955 = getelementptr inbounds float* %tmp5954, i64 1
+  %tmp5956 = getelementptr inbounds float* %tmp5955, i64 1
+  %tmp5957 = getelementptr inbounds float* %tmp5956, i64 1
+  %tmp5958 = getelementptr inbounds float* %tmp5957, i64 1
+  %tmp5959 = getelementptr inbounds float* %tmp5958, i64 1
+  %tmp5960 = getelementptr inbounds float* %tmp5959, i64 1
+  %tmp5961 = getelementptr inbounds float* %tmp5960, i64 1
+  %tmp5962 = getelementptr inbounds float* %tmp5961, i64 1
+  %tmp5963 = getelementptr inbounds float* %tmp5962, i64 1
+  %tmp5964 = getelementptr inbounds float* %tmp5963, i64 1
+  %tmp5965 = getelementptr inbounds float* %tmp5964, i64 1
+  %tmp5966 = getelementptr inbounds float* %tmp5965, i64 1
+  %tmp5967 = getelementptr inbounds float* %tmp5966, i64 1
+  %tmp5968 = getelementptr inbounds float* %tmp5967, i64 1
+  %tmp5969 = getelementptr inbounds float* %tmp5968, i64 1
+  %tmp5970 = getelementptr inbounds float* %tmp5969, i64 1
+  %tmp5971 = getelementptr inbounds float* %tmp5970, i64 1
+  %tmp5972 = getelementptr inbounds float* %tmp5971, i64 1
+  %tmp5973 = getelementptr inbounds float* %tmp5972, i64 1
+  %tmp5974 = getelementptr inbounds float* %tmp5973, i64 1
+  %tmp5975 = getelementptr inbounds float* %tmp5974, i64 1
+  %tmp5976 = getelementptr inbounds float* %tmp5975, i64 1
+  %tmp5977 = getelementptr inbounds float* %tmp5976, i64 1
+  %tmp5978 = getelementptr inbounds float* %tmp5977, i64 1
+  %tmp5979 = getelementptr inbounds float* %tmp5978, i64 1
+  %tmp5980 = getelementptr inbounds float* %tmp5979, i64 1
+  %tmp5981 = getelementptr inbounds float* %tmp5980, i64 1
+  %tmp5982 = getelementptr inbounds float* %tmp5981, i64 1
+  %tmp5983 = getelementptr inbounds float* %tmp5982, i64 1
+  %tmp5984 = getelementptr inbounds float* %tmp5983, i64 1
+  %tmp5985 = getelementptr inbounds float* %tmp5984, i64 1
+  %tmp5986 = getelementptr inbounds float* %tmp5985, i64 1
+  %tmp5987 = getelementptr inbounds float* %tmp5986, i64 1
+  %tmp5988 = getelementptr inbounds float* %tmp5987, i64 1
+  %tmp5989 = getelementptr inbounds float* %tmp5988, i64 1
+  %tmp5990 = getelementptr inbounds float* %tmp5989, i64 1
+  %tmp5991 = getelementptr inbounds float* %tmp5990, i64 1
+  %tmp5992 = getelementptr inbounds float* %tmp5991, i64 1
+  %tmp5993 = getelementptr inbounds float* %tmp5992, i64 1
+  %tmp5994 = getelementptr inbounds float* %tmp5993, i64 1
+  %tmp5995 = getelementptr inbounds float* %tmp5994, i64 1
+  %tmp5996 = getelementptr inbounds float* %tmp5995, i64 1
+  %tmp5997 = getelementptr inbounds float* %tmp5996, i64 1
+  %tmp5998 = getelementptr inbounds float* %tmp5997, i64 1
+  %tmp5999 = getelementptr inbounds float* %tmp5998, i64 1
+  %tmp6000 = getelementptr inbounds float* %tmp5999, i64 1
+  %tmp6001 = getelementptr inbounds float* %tmp6000, i64 1
+  %tmp6002 = getelementptr inbounds float* %tmp6001, i64 1
+  %tmp6003 = getelementptr inbounds float* %tmp6002, i64 1
+  %tmp6004 = getelementptr inbounds float* %tmp6003, i64 1
+  %tmp6005 = getelementptr inbounds float* %tmp6004, i64 1
+  %tmp6006 = getelementptr inbounds float* %tmp6005, i64 1
+  %tmp6007 = getelementptr inbounds float* %tmp6006, i64 1
+  %tmp6008 = getelementptr inbounds float* %tmp6007, i64 1
+  %tmp6009 = getelementptr inbounds float* %tmp6008, i64 1
+  %tmp6010 = getelementptr inbounds float* %tmp6009, i64 1
+  %tmp6011 = getelementptr inbounds float* %tmp6010, i64 1
+  %tmp6012 = getelementptr inbounds float* %tmp6011, i64 1
+  %tmp6013 = getelementptr inbounds float* %tmp6012, i64 1
+  %tmp6014 = getelementptr inbounds float* %tmp6013, i64 1
+  %tmp6015 = getelementptr inbounds float* %tmp6014, i64 1
+  %tmp6016 = getelementptr inbounds float* %tmp6015, i64 1
+  %tmp6017 = getelementptr inbounds float* %tmp6016, i64 1
+  %tmp6018 = getelementptr inbounds float* %tmp6017, i64 1
+  %tmp6019 = getelementptr inbounds float* %tmp6018, i64 1
+  %tmp6020 = getelementptr inbounds float* %tmp6019, i64 1
+  %tmp6021 = getelementptr inbounds float* %tmp6020, i64 1
+  %tmp6022 = getelementptr inbounds float* %tmp6021, i64 1
+  %tmp6023 = getelementptr inbounds float* %tmp6022, i64 1
+  %tmp6024 = getelementptr inbounds float* %tmp6023, i64 1
+  %tmp6025 = getelementptr inbounds float* %tmp6024, i64 1
+  %tmp6026 = getelementptr inbounds float* %tmp6025, i64 1
+  %tmp6027 = getelementptr inbounds float* %tmp6026, i64 1
+  %tmp6028 = getelementptr inbounds float* %tmp6027, i64 1
+  %tmp6029 = getelementptr inbounds float* %tmp6028, i64 1
+  %tmp6030 = getelementptr inbounds float* %tmp6029, i64 1
+  %tmp6031 = getelementptr inbounds float* %tmp6030, i64 1
+  %tmp6032 = getelementptr inbounds float* %tmp6031, i64 1
+  %tmp6033 = getelementptr inbounds float* %tmp6032, i64 1
+  %tmp6034 = getelementptr inbounds float* %tmp6033, i64 1
+  %tmp6035 = getelementptr inbounds float* %tmp6034, i64 1
+  %tmp6036 = getelementptr inbounds float* %tmp6035, i64 1
+  %tmp6037 = getelementptr inbounds float* %tmp6036, i64 1
+  %tmp6038 = getelementptr inbounds float* %tmp6037, i64 1
+  %tmp6039 = getelementptr inbounds float* %tmp6038, i64 1
+  %tmp6040 = getelementptr inbounds float* %tmp6039, i64 1
+  %tmp6041 = getelementptr inbounds float* %tmp6040, i64 1
+  %tmp6042 = getelementptr inbounds float* %tmp6041, i64 1
+  %tmp6043 = getelementptr inbounds float* %tmp6042, i64 1
+  %tmp6044 = getelementptr inbounds float* %tmp6043, i64 1
+  %tmp6045 = getelementptr inbounds float* %tmp6044, i64 1
+  %tmp6046 = getelementptr inbounds float* %tmp6045, i64 1
+  %tmp6047 = getelementptr inbounds float* %tmp6046, i64 1
+  %tmp6048 = getelementptr inbounds float* %tmp6047, i64 1
+  %tmp6049 = getelementptr inbounds float* %tmp6048, i64 1
+  %tmp6050 = getelementptr inbounds float* %tmp6049, i64 1
+  %tmp6051 = getelementptr inbounds float* %tmp6050, i64 1
+  %tmp6052 = getelementptr inbounds float* %tmp6051, i64 1
+  %tmp6053 = getelementptr inbounds float* %tmp6052, i64 1
+  %tmp6054 = getelementptr inbounds float* %tmp6053, i64 1
+  %tmp6055 = getelementptr inbounds float* %tmp6054, i64 1
+  %tmp6056 = getelementptr inbounds float* %tmp6055, i64 1
+  %tmp6057 = getelementptr inbounds float* %tmp6056, i64 1
+  %tmp6058 = getelementptr inbounds float* %tmp6057, i64 1
+  %tmp6059 = getelementptr inbounds float* %tmp6058, i64 1
+  %tmp6060 = getelementptr inbounds float* %tmp6059, i64 1
+  %tmp6061 = getelementptr inbounds float* %tmp6060, i64 1
+  %tmp6062 = getelementptr inbounds float* %tmp6061, i64 1
+  %tmp6063 = getelementptr inbounds float* %tmp6062, i64 1
+  %tmp6064 = getelementptr inbounds float* %tmp6063, i64 1
+  %tmp6065 = getelementptr inbounds float* %tmp6064, i64 1
+  %tmp6066 = getelementptr inbounds float* %tmp6065, i64 1
+  %tmp6067 = getelementptr inbounds float* %tmp6066, i64 1
+  %tmp6068 = getelementptr inbounds float* %tmp6067, i64 1
+  %tmp6069 = getelementptr inbounds float* %tmp6068, i64 1
+  %tmp6070 = getelementptr inbounds float* %tmp6069, i64 1
+  %tmp6071 = getelementptr inbounds float* %tmp6070, i64 1
+  %tmp6072 = getelementptr inbounds float* %tmp6071, i64 1
+  %tmp6073 = getelementptr inbounds float* %tmp6072, i64 1
+  %tmp6074 = getelementptr inbounds float* %tmp6073, i64 1
+  %tmp6075 = getelementptr inbounds float* %tmp6074, i64 1
+  %tmp6076 = getelementptr inbounds float* %tmp6075, i64 1
+  %tmp6077 = getelementptr inbounds float* %tmp6076, i64 1
+  %tmp6078 = getelementptr inbounds float* %tmp6077, i64 1
+  %tmp6079 = getelementptr inbounds float* %tmp6078, i64 1
+  %tmp6080 = getelementptr inbounds float* %tmp6079, i64 1
+  %tmp6081 = getelementptr inbounds float* %tmp6080, i64 1
+  %tmp6082 = getelementptr inbounds float* %tmp6081, i64 1
+  %tmp6083 = getelementptr inbounds float* %tmp6082, i64 1
+  %tmp6084 = getelementptr inbounds float* %tmp6083, i64 1
+  %tmp6085 = getelementptr inbounds float* %tmp6084, i64 1
+  %tmp6086 = getelementptr inbounds float* %tmp6085, i64 1
+  %tmp6087 = getelementptr inbounds float* %tmp6086, i64 1
+  %tmp6088 = getelementptr inbounds float* %tmp6087, i64 1
+  %tmp6089 = getelementptr inbounds float* %tmp6088, i64 1
+  %tmp6090 = getelementptr inbounds float* %tmp6089, i64 1
+  %tmp6091 = getelementptr inbounds float* %tmp6090, i64 1
+  %tmp6092 = getelementptr inbounds float* %tmp6091, i64 1
+  %tmp6093 = getelementptr inbounds float* %tmp6092, i64 1
+  %tmp6094 = getelementptr inbounds float* %tmp6093, i64 1
+  %tmp6095 = getelementptr inbounds float* %tmp6094, i64 1
+  %tmp6096 = getelementptr inbounds float* %tmp6095, i64 1
+  %tmp6097 = getelementptr inbounds float* %tmp6096, i64 1
+  %tmp6098 = getelementptr inbounds float* %tmp6097, i64 1
+  %tmp6099 = getelementptr inbounds float* %tmp6098, i64 1
+  %tmp6100 = getelementptr inbounds float* %tmp6099, i64 1
+  %tmp6101 = getelementptr inbounds float* %tmp6100, i64 1
+  %tmp6102 = getelementptr inbounds float* %tmp6101, i64 1
+  %tmp6103 = getelementptr inbounds float* %tmp6102, i64 1
+  %tmp6104 = getelementptr inbounds float* %tmp6103, i64 1
+  %tmp6105 = getelementptr inbounds float* %tmp6104, i64 1
+  %tmp6106 = getelementptr inbounds float* %tmp6105, i64 1
+  %tmp6107 = getelementptr inbounds float* %tmp6106, i64 1
+  %tmp6108 = getelementptr inbounds float* %tmp6107, i64 1
+  %tmp6109 = getelementptr inbounds float* %tmp6108, i64 1
+  %tmp6110 = getelementptr inbounds float* %tmp6109, i64 1
+  %tmp6111 = getelementptr inbounds float* %tmp6110, i64 1
+  %tmp6112 = getelementptr inbounds float* %tmp6111, i64 1
+  %tmp6113 = getelementptr inbounds float* %tmp6112, i64 1
+  %tmp6114 = getelementptr inbounds float* %tmp6113, i64 1
+  %tmp6115 = getelementptr inbounds float* %tmp6114, i64 1
+  %tmp6116 = getelementptr inbounds float* %tmp6115, i64 1
+  %tmp6117 = getelementptr inbounds float* %tmp6116, i64 1
+  %tmp6118 = getelementptr inbounds float* %tmp6117, i64 1
+  %tmp6119 = getelementptr inbounds float* %tmp6118, i64 1
+  %tmp6120 = getelementptr inbounds float* %tmp6119, i64 1
+  %tmp6121 = getelementptr inbounds float* %tmp6120, i64 1
+  %tmp6122 = getelementptr inbounds float* %tmp6121, i64 1
+  %tmp6123 = getelementptr inbounds float* %tmp6122, i64 1
+  %tmp6124 = getelementptr inbounds float* %tmp6123, i64 1
+  %tmp6125 = getelementptr inbounds float* %tmp6124, i64 1
+  %tmp6126 = getelementptr inbounds float* %tmp6125, i64 1
+  %tmp6127 = getelementptr inbounds float* %tmp6126, i64 1
+  %tmp6128 = getelementptr inbounds float* %tmp6127, i64 1
+  %tmp6129 = getelementptr inbounds float* %tmp6128, i64 1
+  %tmp6130 = getelementptr inbounds float* %tmp6129, i64 1
+  %tmp6131 = getelementptr inbounds float* %tmp6130, i64 1
+  %tmp6132 = getelementptr inbounds float* %tmp6131, i64 1
+  %tmp6133 = getelementptr inbounds float* %tmp6132, i64 1
+  %tmp6134 = getelementptr inbounds float* %tmp6133, i64 1
+  %tmp6135 = getelementptr inbounds float* %tmp6134, i64 1
+  %tmp6136 = getelementptr inbounds float* %tmp6135, i64 1
+  %tmp6137 = getelementptr inbounds float* %tmp6136, i64 1
+  %tmp6138 = getelementptr inbounds float* %tmp6137, i64 1
+  %tmp6139 = getelementptr inbounds float* %tmp6138, i64 1
+  %tmp6140 = getelementptr inbounds float* %tmp6139, i64 1
+  %tmp6141 = getelementptr inbounds float* %tmp6140, i64 1
+  %tmp6142 = getelementptr inbounds float* %tmp6141, i64 1
+  %tmp6143 = getelementptr inbounds float* %tmp6142, i64 1
+  %tmp6144 = getelementptr inbounds float* %tmp6143, i64 1
+  %tmp6145 = getelementptr inbounds float* %tmp6144, i64 1
+  %tmp6146 = getelementptr inbounds float* %tmp6145, i64 1
+  %tmp6147 = getelementptr inbounds float* %tmp6146, i64 1
+  %tmp6148 = getelementptr inbounds float* %tmp6147, i64 1
+  %tmp6149 = getelementptr inbounds float* %tmp6148, i64 1
+  %tmp6150 = getelementptr inbounds float* %tmp6149, i64 1
+  %tmp6151 = getelementptr inbounds float* %tmp6150, i64 1
+  %tmp6152 = getelementptr inbounds float* %tmp6151, i64 1
+  %tmp6153 = getelementptr inbounds float* %tmp6152, i64 1
+  %tmp6154 = getelementptr inbounds float* %tmp6153, i64 1
+  %tmp6155 = getelementptr inbounds float* %tmp6154, i64 1
+  %tmp6156 = getelementptr inbounds float* %tmp6155, i64 1
+  %tmp6157 = getelementptr inbounds float* %tmp6156, i64 1
+  %tmp6158 = getelementptr inbounds float* %tmp6157, i64 1
+  %tmp6159 = getelementptr inbounds float* %tmp6158, i64 1
+  %tmp6160 = getelementptr inbounds float* %tmp6159, i64 1
+  %tmp6161 = getelementptr inbounds float* %tmp6160, i64 1
+  %tmp6162 = getelementptr inbounds float* %tmp6161, i64 1
+  %tmp6163 = getelementptr inbounds float* %tmp6162, i64 1
+  %tmp6164 = getelementptr inbounds float* %tmp6163, i64 1
+  %tmp6165 = getelementptr inbounds float* %tmp6164, i64 1
+  %tmp6166 = getelementptr inbounds float* %tmp6165, i64 1
+  %tmp6167 = getelementptr inbounds float* %tmp6166, i64 1
+  %tmp6168 = getelementptr inbounds float* %tmp6167, i64 1
+  %tmp6169 = getelementptr inbounds float* %tmp6168, i64 1
+  %tmp6170 = getelementptr inbounds float* %tmp6169, i64 1
+  %tmp6171 = getelementptr inbounds float* %tmp6170, i64 1
+  %tmp6172 = getelementptr inbounds float* %tmp6171, i64 1
+  %tmp6173 = getelementptr inbounds float* %tmp6172, i64 1
+  %tmp6174 = getelementptr inbounds float* %tmp6173, i64 1
+  %tmp6175 = getelementptr inbounds float* %tmp6174, i64 1
+  %tmp6176 = getelementptr inbounds float* %tmp6175, i64 1
+  %tmp6177 = getelementptr inbounds float* %tmp6176, i64 1
+  %tmp6178 = getelementptr inbounds float* %tmp6177, i64 1
+  %tmp6179 = getelementptr inbounds float* %tmp6178, i64 1
+  %tmp6180 = getelementptr inbounds float* %tmp6179, i64 1
+  %tmp6181 = getelementptr inbounds float* %tmp6180, i64 1
+  %tmp6182 = getelementptr inbounds float* %tmp6181, i64 1
+  %tmp6183 = getelementptr inbounds float* %tmp6182, i64 1
+  %tmp6184 = getelementptr inbounds float* %tmp6183, i64 1
+  %tmp6185 = getelementptr inbounds float* %tmp6184, i64 1
+  %tmp6186 = getelementptr inbounds float* %tmp6185, i64 1
+  %tmp6187 = getelementptr inbounds float* %tmp6186, i64 1
+  %tmp6188 = getelementptr inbounds float* %tmp6187, i64 1
+  %tmp6189 = getelementptr inbounds float* %tmp6188, i64 1
+  %tmp6190 = getelementptr inbounds float* %tmp6189, i64 1
+  %tmp6191 = getelementptr inbounds float* %tmp6190, i64 1
+  %tmp6192 = getelementptr inbounds float* %tmp6191, i64 1
+  %tmp6193 = getelementptr inbounds float* %tmp6192, i64 1
+  %tmp6194 = getelementptr inbounds float* %tmp6193, i64 1
+  %tmp6195 = getelementptr inbounds float* %tmp6194, i64 1
+  %tmp6196 = getelementptr inbounds float* %tmp6195, i64 1
+  %tmp6197 = getelementptr inbounds float* %tmp6196, i64 1
+  %tmp6198 = getelementptr inbounds float* %tmp6197, i64 1
+  %tmp6199 = getelementptr inbounds float* %tmp6198, i64 1
+  %tmp6200 = getelementptr inbounds float* %tmp6199, i64 1
+  %tmp6201 = getelementptr inbounds float* %tmp6200, i64 1
+  %tmp6202 = getelementptr inbounds float* %tmp6201, i64 1
+  %tmp6203 = getelementptr inbounds float* %tmp6202, i64 1
+  %tmp6204 = getelementptr inbounds float* %tmp6203, i64 1
+  %tmp6205 = getelementptr inbounds float* %tmp6204, i64 1
+  %tmp6206 = getelementptr inbounds float* %tmp6205, i64 1
+  %tmp6207 = getelementptr inbounds float* %tmp6206, i64 1
+  %tmp6208 = getelementptr inbounds float* %tmp6207, i64 1
+  %tmp6209 = getelementptr inbounds float* %tmp6208, i64 1
+  %tmp6210 = getelementptr inbounds float* %tmp6209, i64 1
+  %tmp6211 = getelementptr inbounds float* %tmp6210, i64 1
+  %tmp6212 = getelementptr inbounds float* %tmp6211, i64 1
+  %tmp6213 = getelementptr inbounds float* %tmp6212, i64 1
+  %tmp6214 = getelementptr inbounds float* %tmp6213, i64 1
+  %tmp6215 = getelementptr inbounds float* %tmp6214, i64 1
+  %tmp6216 = getelementptr inbounds float* %tmp6215, i64 1
+  %tmp6217 = getelementptr inbounds float* %tmp6216, i64 1
+  %tmp6218 = getelementptr inbounds float* %tmp6217, i64 1
+  %tmp6219 = getelementptr inbounds float* %tmp6218, i64 1
+  %tmp6220 = getelementptr inbounds float* %tmp6219, i64 1
+  %tmp6221 = getelementptr inbounds float* %tmp6220, i64 1
+  %tmp6222 = getelementptr inbounds float* %tmp6221, i64 1
+  %tmp6223 = getelementptr inbounds float* %tmp6222, i64 1
+  %tmp6224 = getelementptr inbounds float* %tmp6223, i64 1
+  %tmp6225 = getelementptr inbounds float* %tmp6224, i64 1
+  %tmp6226 = getelementptr inbounds float* %tmp6225, i64 1
+  %tmp6227 = getelementptr inbounds float* %tmp6226, i64 1
+  %tmp6228 = getelementptr inbounds float* %tmp6227, i64 1
+  %tmp6229 = getelementptr inbounds float* %tmp6228, i64 1
+  %tmp6230 = getelementptr inbounds float* %tmp6229, i64 1
+  %tmp6231 = getelementptr inbounds float* %tmp6230, i64 1
+  %tmp6232 = getelementptr inbounds float* %tmp6231, i64 1
+  %tmp6233 = getelementptr inbounds float* %tmp6232, i64 1
+  %tmp6234 = getelementptr inbounds float* %tmp6233, i64 1
+  %tmp6235 = getelementptr inbounds float* %tmp6234, i64 1
+  %tmp6236 = getelementptr inbounds float* %tmp6235, i64 1
+  %tmp6237 = getelementptr inbounds float* %tmp6236, i64 1
+  %tmp6238 = getelementptr inbounds float* %tmp6237, i64 1
+  %tmp6239 = getelementptr inbounds float* %tmp6238, i64 1
+  %tmp6240 = getelementptr inbounds float* %tmp6239, i64 1
+  %tmp6241 = getelementptr inbounds float* %tmp6240, i64 1
+  %tmp6242 = getelementptr inbounds float* %tmp6241, i64 1
+  %tmp6243 = getelementptr inbounds float* %tmp6242, i64 1
+  %tmp6244 = getelementptr inbounds float* %tmp6243, i64 1
+  %tmp6245 = getelementptr inbounds float* %tmp6244, i64 1
+  %tmp6246 = getelementptr inbounds float* %tmp6245, i64 1
+  %tmp6247 = getelementptr inbounds float* %tmp6246, i64 1
+  %tmp6248 = getelementptr inbounds float* %tmp6247, i64 1
+  %tmp6249 = getelementptr inbounds float* %tmp6248, i64 1
+  %tmp6250 = getelementptr inbounds float* %tmp6249, i64 1
+  %tmp6251 = getelementptr inbounds float* %tmp6250, i64 1
+  %tmp6252 = getelementptr inbounds float* %tmp6251, i64 1
+  %tmp6253 = getelementptr inbounds float* %tmp6252, i64 1
+  %tmp6254 = getelementptr inbounds float* %tmp6253, i64 1
+  %tmp6255 = getelementptr inbounds float* %tmp6254, i64 1
+  %tmp6256 = getelementptr inbounds float* %tmp6255, i64 1
+  %tmp6257 = getelementptr inbounds float* %tmp6256, i64 1
+  %tmp6258 = getelementptr inbounds float* %tmp6257, i64 1
+  %tmp6259 = getelementptr inbounds float* %tmp6258, i64 1
+  %tmp6260 = getelementptr inbounds float* %tmp6259, i64 1
+  %tmp6261 = getelementptr inbounds float* %tmp6260, i64 1
+  %tmp6262 = getelementptr inbounds float* %tmp6261, i64 1
+  %tmp6263 = getelementptr inbounds float* %tmp6262, i64 1
+  %tmp6264 = getelementptr inbounds float* %tmp6263, i64 1
+  %tmp6265 = getelementptr inbounds float* %tmp6264, i64 1
+  %tmp6266 = getelementptr inbounds float* %tmp6265, i64 1
+  %tmp6267 = getelementptr inbounds float* %tmp6266, i64 1
+  %tmp6268 = getelementptr inbounds float* %tmp6267, i64 1
+  %tmp6269 = getelementptr inbounds float* %tmp6268, i64 1
+  %tmp6270 = getelementptr inbounds float* %tmp6269, i64 1
+  %tmp6271 = getelementptr inbounds float* %tmp6270, i64 1
+  %tmp6272 = getelementptr inbounds float* %tmp6271, i64 1
+  %tmp6273 = getelementptr inbounds float* %tmp6272, i64 1
+  %tmp6274 = getelementptr inbounds float* %tmp6273, i64 1
+  %tmp6275 = getelementptr inbounds float* %tmp6274, i64 1
+  %tmp6276 = getelementptr inbounds float* %tmp6275, i64 1
+  %tmp6277 = getelementptr inbounds float* %tmp6276, i64 1
+  %tmp6278 = getelementptr inbounds float* %tmp6277, i64 1
+  %tmp6279 = getelementptr inbounds float* %tmp6278, i64 1
+  %tmp6280 = getelementptr inbounds float* %tmp6279, i64 1
+  %tmp6281 = getelementptr inbounds float* %tmp6280, i64 1
+  %tmp6282 = getelementptr inbounds float* %tmp6281, i64 1
+  %tmp6283 = getelementptr inbounds float* %tmp6282, i64 1
+  %tmp6284 = getelementptr inbounds float* %tmp6283, i64 1
+  %tmp6285 = getelementptr inbounds float* %tmp6284, i64 1
+  %tmp6286 = getelementptr inbounds float* %tmp6285, i64 1
+  %tmp6287 = getelementptr inbounds float* %tmp6286, i64 1
+  %tmp6288 = getelementptr inbounds float* %tmp6287, i64 1
+  %tmp6289 = getelementptr inbounds float* %tmp6288, i64 1
+  %tmp6290 = getelementptr inbounds float* %tmp6289, i64 1
+  %tmp6291 = getelementptr inbounds float* %tmp6290, i64 1
+  %tmp6292 = getelementptr inbounds float* %tmp6291, i64 1
+  %tmp6293 = getelementptr inbounds float* %tmp6292, i64 1
+  %tmp6294 = getelementptr inbounds float* %tmp6293, i64 1
+  %tmp6295 = getelementptr inbounds float* %tmp6294, i64 1
+  %tmp6296 = getelementptr inbounds float* %tmp6295, i64 1
+  %tmp6297 = getelementptr inbounds float* %tmp6296, i64 1
+  %tmp6298 = getelementptr inbounds float* %tmp6297, i64 1
+  %tmp6299 = getelementptr inbounds float* %tmp6298, i64 1
+  %tmp6300 = getelementptr inbounds float* %tmp6299, i64 1
+  %tmp6301 = getelementptr inbounds float* %tmp6300, i64 1
+  %tmp6302 = getelementptr inbounds float* %tmp6301, i64 1
+  %tmp6303 = getelementptr inbounds float* %tmp6302, i64 1
+  %tmp6304 = getelementptr inbounds float* %tmp6303, i64 1
+  %tmp6305 = getelementptr inbounds float* %tmp6304, i64 1
+  %tmp6306 = getelementptr inbounds float* %tmp6305, i64 1
+  %tmp6307 = getelementptr inbounds float* %tmp6306, i64 1
+  %tmp6308 = getelementptr inbounds float* %tmp6307, i64 1
+  %tmp6309 = getelementptr inbounds float* %tmp6308, i64 1
+  %tmp6310 = getelementptr inbounds float* %tmp6309, i64 1
+  %tmp6311 = getelementptr inbounds float* %tmp6310, i64 1
+  %tmp6312 = getelementptr inbounds float* %tmp6311, i64 1
+  %tmp6313 = getelementptr inbounds float* %tmp6312, i64 1
+  %tmp6314 = getelementptr inbounds float* %tmp6313, i64 1
+  %tmp6315 = getelementptr inbounds float* %tmp6314, i64 1
+  %tmp6316 = getelementptr inbounds float* %tmp6315, i64 1
+  %tmp6317 = getelementptr inbounds float* %tmp6316, i64 1
+  %tmp6318 = getelementptr inbounds float* %tmp6317, i64 1
+  %tmp6319 = getelementptr inbounds float* %tmp6318, i64 1
+  %tmp6320 = getelementptr inbounds float* %tmp6319, i64 1
+  %tmp6321 = getelementptr inbounds float* %tmp6320, i64 1
+  %tmp6322 = getelementptr inbounds float* %tmp6321, i64 1
+  %tmp6323 = getelementptr inbounds float* %tmp6322, i64 1
+  %tmp6324 = getelementptr inbounds float* %tmp6323, i64 1
+  %tmp6325 = getelementptr inbounds float* %tmp6324, i64 1
+  %tmp6326 = getelementptr inbounds float* %tmp6325, i64 1
+  %tmp6327 = getelementptr inbounds float* %tmp6326, i64 1
+  %tmp6328 = getelementptr inbounds float* %tmp6327, i64 1
+  %tmp6329 = getelementptr inbounds float* %tmp6328, i64 1
+  %tmp6330 = getelementptr inbounds float* %tmp6329, i64 1
+  %tmp6331 = getelementptr inbounds float* %tmp6330, i64 1
+  %tmp6332 = getelementptr inbounds float* %tmp6331, i64 1
+  %tmp6333 = getelementptr inbounds float* %tmp6332, i64 1
+  %tmp6334 = getelementptr inbounds float* %tmp6333, i64 1
+  %tmp6335 = getelementptr inbounds float* %tmp6334, i64 1
+  %tmp6336 = getelementptr inbounds float* %tmp6335, i64 1
+  %tmp6337 = getelementptr inbounds float* %tmp6336, i64 1
+  %tmp6338 = getelementptr inbounds float* %tmp6337, i64 1
+  %tmp6339 = getelementptr inbounds float* %tmp6338, i64 1
+  %tmp6340 = getelementptr inbounds float* %tmp6339, i64 1
+  %tmp6341 = getelementptr inbounds float* %tmp6340, i64 1
+  %tmp6342 = getelementptr inbounds float* %tmp6341, i64 1
+  %tmp6343 = getelementptr inbounds float* %tmp6342, i64 1
+  %tmp6344 = getelementptr inbounds float* %tmp6343, i64 1
+  %tmp6345 = getelementptr inbounds float* %tmp6344, i64 1
+  %tmp6346 = getelementptr inbounds float* %tmp6345, i64 1
+  %tmp6347 = getelementptr inbounds float* %tmp6346, i64 1
+  %tmp6348 = getelementptr inbounds float* %tmp6347, i64 1
+  %tmp6349 = getelementptr inbounds float* %tmp6348, i64 1
+  %tmp6350 = getelementptr inbounds float* %tmp6349, i64 1
+  %tmp6351 = getelementptr inbounds float* %tmp6350, i64 1
+  %tmp6352 = getelementptr inbounds float* %tmp6351, i64 1
+  %tmp6353 = getelementptr inbounds float* %tmp6352, i64 1
+  %tmp6354 = getelementptr inbounds float* %tmp6353, i64 1
+  %tmp6355 = getelementptr inbounds float* %tmp6354, i64 1
+  %tmp6356 = getelementptr inbounds float* %tmp6355, i64 1
+  %tmp6357 = getelementptr inbounds float* %tmp6356, i64 1
+  %tmp6358 = getelementptr inbounds float* %tmp6357, i64 1
+  %tmp6359 = getelementptr inbounds float* %tmp6358, i64 1
+  %tmp6360 = getelementptr inbounds float* %tmp6359, i64 1
+  %tmp6361 = getelementptr inbounds float* %tmp6360, i64 1
+  %tmp6362 = getelementptr inbounds float* %tmp6361, i64 1
+  %tmp6363 = getelementptr inbounds float* %tmp6362, i64 1
+  %tmp6364 = getelementptr inbounds float* %tmp6363, i64 1
+  %tmp6365 = getelementptr inbounds float* %tmp6364, i64 1
+  %tmp6366 = getelementptr inbounds float* %tmp6365, i64 1
+  %tmp6367 = getelementptr inbounds float* %tmp6366, i64 1
+  %tmp6368 = getelementptr inbounds float* %tmp6367, i64 1
+  %tmp6369 = getelementptr inbounds float* %tmp6368, i64 1
+  %tmp6370 = getelementptr inbounds float* %tmp6369, i64 1
+  %tmp6371 = getelementptr inbounds float* %tmp6370, i64 1
+  %tmp6372 = getelementptr inbounds float* %tmp6371, i64 1
+  %tmp6373 = getelementptr inbounds float* %tmp6372, i64 1
+  %tmp6374 = getelementptr inbounds float* %tmp6373, i64 1
+  %tmp6375 = getelementptr inbounds float* %tmp6374, i64 1
+  %tmp6376 = getelementptr inbounds float* %tmp6375, i64 1
+  %tmp6377 = getelementptr inbounds float* %tmp6376, i64 1
+  %tmp6378 = getelementptr inbounds float* %tmp6377, i64 1
+  %tmp6379 = getelementptr inbounds float* %tmp6378, i64 1
+  %tmp6380 = getelementptr inbounds float* %tmp6379, i64 1
+  %tmp6381 = getelementptr inbounds float* %tmp6380, i64 1
+  %tmp6382 = getelementptr inbounds float* %tmp6381, i64 1
+  %tmp6383 = getelementptr inbounds float* %tmp6382, i64 1
+  %tmp6384 = getelementptr inbounds float* %tmp6383, i64 1
+  %tmp6385 = getelementptr inbounds float* %tmp6384, i64 1
+  %tmp6386 = getelementptr inbounds float* %tmp6385, i64 1
+  %tmp6387 = getelementptr inbounds float* %tmp6386, i64 1
+  %tmp6388 = getelementptr inbounds float* %tmp6387, i64 1
+  %tmp6389 = getelementptr inbounds float* %tmp6388, i64 1
+  %tmp6390 = getelementptr inbounds float* %tmp6389, i64 1
+  %tmp6391 = getelementptr inbounds float* %tmp6390, i64 1
+  %tmp6392 = getelementptr inbounds float* %tmp6391, i64 1
+  %tmp6393 = getelementptr inbounds float* %tmp6392, i64 1
+  %tmp6394 = getelementptr inbounds float* %tmp6393, i64 1
+  %tmp6395 = getelementptr inbounds float* %tmp6394, i64 1
+  %tmp6396 = getelementptr inbounds float* %tmp6395, i64 1
+  %tmp6397 = getelementptr inbounds float* %tmp6396, i64 1
+  %tmp6398 = getelementptr inbounds float* %tmp6397, i64 1
+  %tmp6399 = getelementptr inbounds float* %tmp6398, i64 1
+  %tmp6400 = getelementptr inbounds float* %tmp6399, i64 1
+  %tmp6401 = getelementptr inbounds float* %tmp6400, i64 1
+  %tmp6402 = getelementptr inbounds float* %tmp6401, i64 1
+  %tmp6403 = getelementptr inbounds float* %tmp6402, i64 1
+  %tmp6404 = getelementptr inbounds float* %tmp6403, i64 1
+  %tmp6405 = getelementptr inbounds float* %tmp6404, i64 1
+  %tmp6406 = getelementptr inbounds float* %tmp6405, i64 1
+  %tmp6407 = getelementptr inbounds float* %tmp6406, i64 1
+  %tmp6408 = getelementptr inbounds float* %tmp6407, i64 1
+  %tmp6409 = getelementptr inbounds float* %tmp6408, i64 1
+  %tmp6410 = getelementptr inbounds float* %tmp6409, i64 1
+  %tmp6411 = getelementptr inbounds float* %tmp6410, i64 1
+  %tmp6412 = getelementptr inbounds float* %tmp6411, i64 1
+  %tmp6413 = getelementptr inbounds float* %tmp6412, i64 1
+  %tmp6414 = getelementptr inbounds float* %tmp6413, i64 1
+  %tmp6415 = getelementptr inbounds float* %tmp6414, i64 1
+  %tmp6416 = getelementptr inbounds float* %tmp6415, i64 1
+  %tmp6417 = getelementptr inbounds float* %tmp6416, i64 1
+  %tmp6418 = getelementptr inbounds float* %tmp6417, i64 1
+  %tmp6419 = getelementptr inbounds float* %tmp6418, i64 1
+  %tmp6420 = getelementptr inbounds float* %tmp6419, i64 1
+  %tmp6421 = getelementptr inbounds float* %tmp6420, i64 1
+  %tmp6422 = getelementptr inbounds float* %tmp6421, i64 1
+  %tmp6423 = getelementptr inbounds float* %tmp6422, i64 1
+  %tmp6424 = getelementptr inbounds float* %tmp6423, i64 1
+  %tmp6425 = getelementptr inbounds float* %tmp6424, i64 1
+  %tmp6426 = getelementptr inbounds float* %tmp6425, i64 1
+  %tmp6427 = getelementptr inbounds float* %tmp6426, i64 1
+  %tmp6428 = getelementptr inbounds float* %tmp6427, i64 1
+  %tmp6429 = getelementptr inbounds float* %tmp6428, i64 1
+  %tmp6430 = getelementptr inbounds float* %tmp6429, i64 1
+  %tmp6431 = getelementptr inbounds float* %tmp6430, i64 1
+  %tmp6432 = getelementptr inbounds float* %tmp6431, i64 1
+  %tmp6433 = getelementptr inbounds float* %tmp6432, i64 1
+  %tmp6434 = getelementptr inbounds float* %tmp6433, i64 1
+  %tmp6435 = getelementptr inbounds float* %tmp6434, i64 1
+  %tmp6436 = getelementptr inbounds float* %tmp6435, i64 1
+  %tmp6437 = getelementptr inbounds float* %tmp6436, i64 1
+  %tmp6438 = getelementptr inbounds float* %tmp6437, i64 1
+  %tmp6439 = getelementptr inbounds float* %tmp6438, i64 1
+  %tmp6440 = getelementptr inbounds float* %tmp6439, i64 1
+  %tmp6441 = getelementptr inbounds float* %tmp6440, i64 1
+  %tmp6442 = getelementptr inbounds float* %tmp6441, i64 1
+  %tmp6443 = getelementptr inbounds float* %tmp6442, i64 1
+  %tmp6444 = getelementptr inbounds float* %tmp6443, i64 1
+  %tmp6445 = getelementptr inbounds float* %tmp6444, i64 1
+  %tmp6446 = getelementptr inbounds float* %tmp6445, i64 1
+  %tmp6447 = getelementptr inbounds float* %tmp6446, i64 1
+  %tmp6448 = getelementptr inbounds float* %tmp6447, i64 1
+  %tmp6449 = getelementptr inbounds float* %tmp6448, i64 1
+  %tmp6450 = getelementptr inbounds float* %tmp6449, i64 1
+  %tmp6451 = getelementptr inbounds float* %tmp6450, i64 1
+  %tmp6452 = getelementptr inbounds float* %tmp6451, i64 1
+  %tmp6453 = getelementptr inbounds float* %tmp6452, i64 1
+  %tmp6454 = getelementptr inbounds float* %tmp6453, i64 1
+  %tmp6455 = getelementptr inbounds float* %tmp6454, i64 1
+  %tmp6456 = getelementptr inbounds float* %tmp6455, i64 1
+  %tmp6457 = getelementptr inbounds float* %tmp6456, i64 1
+  %tmp6458 = getelementptr inbounds float* %tmp6457, i64 1
+  %tmp6459 = getelementptr inbounds float* %tmp6458, i64 1
+  %tmp6460 = getelementptr inbounds float* %tmp6459, i64 1
+  %tmp6461 = getelementptr inbounds float* %tmp6460, i64 1
+  %tmp6462 = getelementptr inbounds float* %tmp6461, i64 1
+  %tmp6463 = getelementptr inbounds float* %tmp6462, i64 1
+  %tmp6464 = getelementptr inbounds float* %tmp6463, i64 1
+  %tmp6465 = getelementptr inbounds float* %tmp6464, i64 1
+  %tmp6466 = getelementptr inbounds float* %tmp6465, i64 1
+  %tmp6467 = getelementptr inbounds float* %tmp6466, i64 1
+  %tmp6468 = getelementptr inbounds float* %tmp6467, i64 1
+  %tmp6469 = getelementptr inbounds float* %tmp6468, i64 1
+  %tmp6470 = getelementptr inbounds float* %tmp6469, i64 1
+  %tmp6471 = getelementptr inbounds float* %tmp6470, i64 1
+  %tmp6472 = getelementptr inbounds float* %tmp6471, i64 1
+  %tmp6473 = getelementptr inbounds float* %tmp6472, i64 1
+  %tmp6474 = getelementptr inbounds float* %tmp6473, i64 1
+  %tmp6475 = getelementptr inbounds float* %tmp6474, i64 1
+  %tmp6476 = getelementptr inbounds float* %tmp6475, i64 1
+  %tmp6477 = getelementptr inbounds float* %tmp6476, i64 1
+  %tmp6478 = getelementptr inbounds float* %tmp6477, i64 1
+  %tmp6479 = getelementptr inbounds float* %tmp6478, i64 1
+  %tmp6480 = getelementptr inbounds float* %tmp6479, i64 1
+  %tmp6481 = getelementptr inbounds float* %tmp6480, i64 1
+  %tmp6482 = getelementptr inbounds float* %tmp6481, i64 1
+  %tmp6483 = getelementptr inbounds float* %tmp6482, i64 1
+  %tmp6484 = getelementptr inbounds float* %tmp6483, i64 1
+  %tmp6485 = getelementptr inbounds float* %tmp6484, i64 1
+  %tmp6486 = getelementptr inbounds float* %tmp6485, i64 1
+  %tmp6487 = getelementptr inbounds float* %tmp6486, i64 1
+  %tmp6488 = getelementptr inbounds float* %tmp6487, i64 1
+  %tmp6489 = getelementptr inbounds float* %tmp6488, i64 1
+  %tmp6490 = getelementptr inbounds float* %tmp6489, i64 1
+  %tmp6491 = getelementptr inbounds float* %tmp6490, i64 1
+  %tmp6492 = getelementptr inbounds float* %tmp6491, i64 1
+  %tmp6493 = getelementptr inbounds float* %tmp6492, i64 1
+  %tmp6494 = getelementptr inbounds float* %tmp6493, i64 1
+  %tmp6495 = getelementptr inbounds float* %tmp6494, i64 1
+  %tmp6496 = getelementptr inbounds float* %tmp6495, i64 1
+  %tmp6497 = getelementptr inbounds float* %tmp6496, i64 1
+  %tmp6498 = getelementptr inbounds float* %tmp6497, i64 1
+  %tmp6499 = getelementptr inbounds float* %tmp6498, i64 1
+  %tmp6500 = getelementptr inbounds float* %tmp6499, i64 1
+  %tmp6501 = getelementptr inbounds float* %tmp6500, i64 1
+  %tmp6502 = getelementptr inbounds float* %tmp6501, i64 1
+  %tmp6503 = getelementptr inbounds float* %tmp6502, i64 1
+  %tmp6504 = getelementptr inbounds float* %tmp6503, i64 1
+  %tmp6505 = getelementptr inbounds float* %tmp6504, i64 1
+  %tmp6506 = getelementptr inbounds float* %tmp6505, i64 1
+  %tmp6507 = getelementptr inbounds float* %tmp6506, i64 1
+  %tmp6508 = getelementptr inbounds float* %tmp6507, i64 1
+  %tmp6509 = getelementptr inbounds float* %tmp6508, i64 1
+  %tmp6510 = getelementptr inbounds float* %tmp6509, i64 1
+  %tmp6511 = getelementptr inbounds float* %tmp6510, i64 1
+  %tmp6512 = getelementptr inbounds float* %tmp6511, i64 1
+  %tmp6513 = getelementptr inbounds float* %tmp6512, i64 1
+  %tmp6514 = getelementptr inbounds float* %tmp6513, i64 1
+  %tmp6515 = getelementptr inbounds float* %tmp6514, i64 1
+  %tmp6516 = getelementptr inbounds float* %tmp6515, i64 1
+  %tmp6517 = getelementptr inbounds float* %tmp6516, i64 1
+  %tmp6518 = getelementptr inbounds float* %tmp6517, i64 1
+  %tmp6519 = getelementptr inbounds float* %tmp6518, i64 1
+  %tmp6520 = getelementptr inbounds float* %tmp6519, i64 1
+  %tmp6521 = getelementptr inbounds float* %tmp6520, i64 1
+  %tmp6522 = getelementptr inbounds float* %tmp6521, i64 1
+  %tmp6523 = getelementptr inbounds float* %tmp6522, i64 1
+  %tmp6524 = getelementptr inbounds float* %tmp6523, i64 1
+  %tmp6525 = getelementptr inbounds float* %tmp6524, i64 1
+  %tmp6526 = getelementptr inbounds float* %tmp6525, i64 1
+  %tmp6527 = getelementptr inbounds float* %tmp6526, i64 1
+  %tmp6528 = getelementptr inbounds float* %tmp6527, i64 1
+  %tmp6529 = getelementptr inbounds float* %tmp6528, i64 1
+  %tmp6530 = getelementptr inbounds float* %tmp6529, i64 1
+  %tmp6531 = getelementptr inbounds float* %tmp6530, i64 1
+  %tmp6532 = getelementptr inbounds float* %tmp6531, i64 1
+  %tmp6533 = getelementptr inbounds float* %tmp6532, i64 1
+  %tmp6534 = getelementptr inbounds float* %tmp6533, i64 1
+  %tmp6535 = getelementptr inbounds float* %tmp6534, i64 1
+  %tmp6536 = getelementptr inbounds float* %tmp6535, i64 1
+  %tmp6537 = getelementptr inbounds float* %tmp6536, i64 1
+  %tmp6538 = getelementptr inbounds float* %tmp6537, i64 1
+  %tmp6539 = getelementptr inbounds float* %tmp6538, i64 1
+  %tmp6540 = getelementptr inbounds float* %tmp6539, i64 1
+  %tmp6541 = getelementptr inbounds float* %tmp6540, i64 1
+  %tmp6542 = getelementptr inbounds float* %tmp6541, i64 1
+  %tmp6543 = getelementptr inbounds float* %tmp6542, i64 1
+  %tmp6544 = getelementptr inbounds float* %tmp6543, i64 1
+  %tmp6545 = getelementptr inbounds float* %tmp6544, i64 1
+  %tmp6546 = getelementptr inbounds float* %tmp6545, i64 1
+  %tmp6547 = getelementptr inbounds float* %tmp6546, i64 1
+  %tmp6548 = getelementptr inbounds float* %tmp6547, i64 1
+  %tmp6549 = getelementptr inbounds float* %tmp6548, i64 1
+  %tmp6550 = getelementptr inbounds float* %tmp6549, i64 1
+  %tmp6551 = getelementptr inbounds float* %tmp6550, i64 1
+  %tmp6552 = getelementptr inbounds float* %tmp6551, i64 1
+  %tmp6553 = getelementptr inbounds float* %tmp6552, i64 1
+  %tmp6554 = getelementptr inbounds float* %tmp6553, i64 1
+  %tmp6555 = getelementptr inbounds float* %tmp6554, i64 1
+  %tmp6556 = getelementptr inbounds float* %tmp6555, i64 1
+  %tmp6557 = getelementptr inbounds float* %tmp6556, i64 1
+  %tmp6558 = getelementptr inbounds float* %tmp6557, i64 1
+  %tmp6559 = getelementptr inbounds float* %tmp6558, i64 1
+  %tmp6560 = getelementptr inbounds float* %tmp6559, i64 1
+  %tmp6561 = getelementptr inbounds float* %tmp6560, i64 1
+  %tmp6562 = getelementptr inbounds float* %tmp6561, i64 1
+  %tmp6563 = getelementptr inbounds float* %tmp6562, i64 1
+  %tmp6564 = getelementptr inbounds float* %tmp6563, i64 1
+  %tmp6565 = getelementptr inbounds float* %tmp6564, i64 1
+  %tmp6566 = getelementptr inbounds float* %tmp6565, i64 1
+  %tmp6567 = getelementptr inbounds float* %tmp6566, i64 1
+  %tmp6568 = getelementptr inbounds float* %tmp6567, i64 1
+  %tmp6569 = getelementptr inbounds float* %tmp6568, i64 1
+  %tmp6570 = getelementptr inbounds float* %tmp6569, i64 1
+  %tmp6571 = getelementptr inbounds float* %tmp6570, i64 1
+  %tmp6572 = getelementptr inbounds float* %tmp6571, i64 1
+  %tmp6573 = getelementptr inbounds float* %tmp6572, i64 1
+  %tmp6574 = getelementptr inbounds float* %tmp6573, i64 1
+  %tmp6575 = getelementptr inbounds float* %tmp6574, i64 1
+  %tmp6576 = getelementptr inbounds float* %tmp6575, i64 1
+  %tmp6577 = getelementptr inbounds float* %tmp6576, i64 1
+  %tmp6578 = getelementptr inbounds float* %tmp6577, i64 1
+  %tmp6579 = getelementptr inbounds float* %tmp6578, i64 1
+  %tmp6580 = getelementptr inbounds float* %tmp6579, i64 1
+  %tmp6581 = getelementptr inbounds float* %tmp6580, i64 1
+  %tmp6582 = getelementptr inbounds float* %tmp6581, i64 1
+  %tmp6583 = getelementptr inbounds float* %tmp6582, i64 1
+  %tmp6584 = getelementptr inbounds float* %tmp6583, i64 1
+  %tmp6585 = getelementptr inbounds float* %tmp6584, i64 1
+  %tmp6586 = getelementptr inbounds float* %tmp6585, i64 1
+  %tmp6587 = getelementptr inbounds float* %tmp6586, i64 1
+  %tmp6588 = getelementptr inbounds float* %tmp6587, i64 1
+  %tmp6589 = getelementptr inbounds float* %tmp6588, i64 1
+  %tmp6590 = getelementptr inbounds float* %tmp6589, i64 1
+  %tmp6591 = getelementptr inbounds float* %tmp6590, i64 1
+  %tmp6592 = getelementptr inbounds float* %tmp6591, i64 1
+  %tmp6593 = getelementptr inbounds float* %tmp6592, i64 1
+  %tmp6594 = getelementptr inbounds float* %tmp6593, i64 1
+  %tmp6595 = getelementptr inbounds float* %tmp6594, i64 1
+  %tmp6596 = getelementptr inbounds float* %tmp6595, i64 1
+  %tmp6597 = getelementptr inbounds float* %tmp6596, i64 1
+  %tmp6598 = getelementptr inbounds float* %tmp6597, i64 1
+  %tmp6599 = getelementptr inbounds float* %tmp6598, i64 1
+  %tmp6600 = getelementptr inbounds float* %tmp6599, i64 1
+  %tmp6601 = getelementptr inbounds float* %tmp6600, i64 1
+  %tmp6602 = getelementptr inbounds float* %tmp6601, i64 1
+  %tmp6603 = getelementptr inbounds float* %tmp6602, i64 1
+  %tmp6604 = getelementptr inbounds float* %tmp6603, i64 1
+  %tmp6605 = getelementptr inbounds float* %tmp6604, i64 1
+  %tmp6606 = getelementptr inbounds float* %tmp6605, i64 1
+  %tmp6607 = getelementptr inbounds float* %tmp6606, i64 1
+  %tmp6608 = getelementptr inbounds float* %tmp6607, i64 1
+  %tmp6609 = getelementptr inbounds float* %tmp6608, i64 1
+  %tmp6610 = getelementptr inbounds float* %tmp6609, i64 1
+  %tmp6611 = getelementptr inbounds float* %tmp6610, i64 1
+  %tmp6612 = getelementptr inbounds float* %tmp6611, i64 1
+  %tmp6613 = getelementptr inbounds float* %tmp6612, i64 1
+  %tmp6614 = getelementptr inbounds float* %tmp6613, i64 1
+  %tmp6615 = getelementptr inbounds float* %tmp6614, i64 1
+  %tmp6616 = getelementptr inbounds float* %tmp6615, i64 1
+  %tmp6617 = getelementptr inbounds float* %tmp6616, i64 1
+  %tmp6618 = getelementptr inbounds float* %tmp6617, i64 1
+  %tmp6619 = getelementptr inbounds float* %tmp6618, i64 1
+  %tmp6620 = getelementptr inbounds float* %tmp6619, i64 1
+  %tmp6621 = getelementptr inbounds float* %tmp6620, i64 1
+  %tmp6622 = getelementptr inbounds float* %tmp6621, i64 1
+  %tmp6623 = getelementptr inbounds float* %tmp6622, i64 1
+  %tmp6624 = getelementptr inbounds float* %tmp6623, i64 1
+  %tmp6625 = getelementptr inbounds float* %tmp6624, i64 1
+  %tmp6626 = getelementptr inbounds float* %tmp6625, i64 1
+  %tmp6627 = getelementptr inbounds float* %tmp6626, i64 1
+  %tmp6628 = getelementptr inbounds float* %tmp6627, i64 1
+  %tmp6629 = getelementptr inbounds float* %tmp6628, i64 1
+  %tmp6630 = getelementptr inbounds float* %tmp6629, i64 1
+  %tmp6631 = getelementptr inbounds float* %tmp6630, i64 1
+  %tmp6632 = getelementptr inbounds float* %tmp6631, i64 1
+  %tmp6633 = getelementptr inbounds float* %tmp6632, i64 1
+  %tmp6634 = getelementptr inbounds float* %tmp6633, i64 1
+  %tmp6635 = getelementptr inbounds float* %tmp6634, i64 1
+  %tmp6636 = getelementptr inbounds float* %tmp6635, i64 1
+  %tmp6637 = getelementptr inbounds float* %tmp6636, i64 1
+  %tmp6638 = getelementptr inbounds float* %tmp6637, i64 1
+  %tmp6639 = getelementptr inbounds float* %tmp6638, i64 1
+  %tmp6640 = getelementptr inbounds float* %tmp6639, i64 1
+  %tmp6641 = getelementptr inbounds float* %tmp6640, i64 1
+  %tmp6642 = getelementptr inbounds float* %tmp6641, i64 1
+  %tmp6643 = getelementptr inbounds float* %tmp6642, i64 1
+  %tmp6644 = getelementptr inbounds float* %tmp6643, i64 1
+  %tmp6645 = getelementptr inbounds float* %tmp6644, i64 1
+  %tmp6646 = getelementptr inbounds float* %tmp6645, i64 1
+  %tmp6647 = getelementptr inbounds float* %tmp6646, i64 1
+  %tmp6648 = getelementptr inbounds float* %tmp6647, i64 1
+  %tmp6649 = getelementptr inbounds float* %tmp6648, i64 1
+  %tmp6650 = getelementptr inbounds float* %tmp6649, i64 1
+  %tmp6651 = getelementptr inbounds float* %tmp6650, i64 1
+  %tmp6652 = getelementptr inbounds float* %tmp6651, i64 1
+  %tmp6653 = getelementptr inbounds float* %tmp6652, i64 1
+  %tmp6654 = getelementptr inbounds float* %tmp6653, i64 1
+  %tmp6655 = getelementptr inbounds float* %tmp6654, i64 1
+  %tmp6656 = getelementptr inbounds float* %tmp6655, i64 1
+  %tmp6657 = getelementptr inbounds float* %tmp6656, i64 1
+  %tmp6658 = getelementptr inbounds float* %tmp6657, i64 1
+  %tmp6659 = getelementptr inbounds float* %tmp6658, i64 1
+  %tmp6660 = getelementptr inbounds float* %tmp6659, i64 1
+  %tmp6661 = getelementptr inbounds float* %tmp6660, i64 1
+  %tmp6662 = getelementptr inbounds float* %tmp6661, i64 1
+  %tmp6663 = getelementptr inbounds float* %tmp6662, i64 1
+  %tmp6664 = getelementptr inbounds float* %tmp6663, i64 1
+  %tmp6665 = getelementptr inbounds float* %tmp6664, i64 1
+  %tmp6666 = getelementptr inbounds float* %tmp6665, i64 1
+  %tmp6667 = getelementptr inbounds float* %tmp6666, i64 1
+  %tmp6668 = getelementptr inbounds float* %tmp6667, i64 1
+  %tmp6669 = getelementptr inbounds float* %tmp6668, i64 1
+  %tmp6670 = getelementptr inbounds float* %tmp6669, i64 1
+  %tmp6671 = getelementptr inbounds float* %tmp6670, i64 1
+  %tmp6672 = getelementptr inbounds float* %tmp6671, i64 1
+  %tmp6673 = getelementptr inbounds float* %tmp6672, i64 1
+  %tmp6674 = getelementptr inbounds float* %tmp6673, i64 1
+  %tmp6675 = getelementptr inbounds float* %tmp6674, i64 1
+  %tmp6676 = getelementptr inbounds float* %tmp6675, i64 1
+  %tmp6677 = getelementptr inbounds float* %tmp6676, i64 1
+  %tmp6678 = getelementptr inbounds float* %tmp6677, i64 1
+  %tmp6679 = getelementptr inbounds float* %tmp6678, i64 1
+  %tmp6680 = getelementptr inbounds float* %tmp6679, i64 1
+  %tmp6681 = getelementptr inbounds float* %tmp6680, i64 1
+  %tmp6682 = getelementptr inbounds float* %tmp6681, i64 1
+  %tmp6683 = getelementptr inbounds float* %tmp6682, i64 1
+  %tmp6684 = getelementptr inbounds float* %tmp6683, i64 1
+  %tmp6685 = getelementptr inbounds float* %tmp6684, i64 1
+  %tmp6686 = getelementptr inbounds float* %tmp6685, i64 1
+  %tmp6687 = getelementptr inbounds float* %tmp6686, i64 1
+  %tmp6688 = getelementptr inbounds float* %tmp6687, i64 1
+  %tmp6689 = getelementptr inbounds float* %tmp6688, i64 1
+  %tmp6690 = getelementptr inbounds float* %tmp6689, i64 1
+  %tmp6691 = getelementptr inbounds float* %tmp6690, i64 1
+  %tmp6692 = getelementptr inbounds float* %tmp6691, i64 1
+  %tmp6693 = getelementptr inbounds float* %tmp6692, i64 1
+  %tmp6694 = getelementptr inbounds float* %tmp6693, i64 1
+  %tmp6695 = getelementptr inbounds float* %tmp6694, i64 1
+  %tmp6696 = getelementptr inbounds float* %tmp6695, i64 1
+  %tmp6697 = getelementptr inbounds float* %tmp6696, i64 1
+  %tmp6698 = getelementptr inbounds float* %tmp6697, i64 1
+  %tmp6699 = getelementptr inbounds float* %tmp6698, i64 1
+  %tmp6700 = getelementptr inbounds float* %tmp6699, i64 1
+  %tmp6701 = getelementptr inbounds float* %tmp6700, i64 1
+  %tmp6702 = getelementptr inbounds float* %tmp6701, i64 1
+  %tmp6703 = getelementptr inbounds float* %tmp6702, i64 1
+  %tmp6704 = getelementptr inbounds float* %tmp6703, i64 1
+  %tmp6705 = getelementptr inbounds float* %tmp6704, i64 1
+  %tmp6706 = getelementptr inbounds float* %tmp6705, i64 1
+  %tmp6707 = getelementptr inbounds float* %tmp6706, i64 1
+  %tmp6708 = getelementptr inbounds float* %tmp6707, i64 1
+  %tmp6709 = getelementptr inbounds float* %tmp6708, i64 1
+  %tmp6710 = getelementptr inbounds float* %tmp6709, i64 1
+  %tmp6711 = getelementptr inbounds float* %tmp6710, i64 1
+  %tmp6712 = getelementptr inbounds float* %tmp6711, i64 1
+  %tmp6713 = getelementptr inbounds float* %tmp6712, i64 1
+  %tmp6714 = getelementptr inbounds float* %tmp6713, i64 1
+  %tmp6715 = getelementptr inbounds float* %tmp6714, i64 1
+  %tmp6716 = getelementptr inbounds float* %tmp6715, i64 1
+  %tmp6717 = getelementptr inbounds float* %tmp6716, i64 1
+  %tmp6718 = getelementptr inbounds float* %tmp6717, i64 1
+  %tmp6719 = getelementptr inbounds float* %tmp6718, i64 1
+  %tmp6720 = getelementptr inbounds float* %tmp6719, i64 1
+  %tmp6721 = getelementptr inbounds float* %tmp6720, i64 1
+  %tmp6722 = getelementptr inbounds float* %tmp6721, i64 1
+  %tmp6723 = getelementptr inbounds float* %tmp6722, i64 1
+  %tmp6724 = getelementptr inbounds float* %tmp6723, i64 1
+  %tmp6725 = getelementptr inbounds float* %tmp6724, i64 1
+  %tmp6726 = getelementptr inbounds float* %tmp6725, i64 1
+  %tmp6727 = getelementptr inbounds float* %tmp6726, i64 1
+  %tmp6728 = getelementptr inbounds float* %tmp6727, i64 1
+  %tmp6729 = getelementptr inbounds float* %tmp6728, i64 1
+  %tmp6730 = getelementptr inbounds float* %tmp6729, i64 1
+  %tmp6731 = getelementptr inbounds float* %tmp6730, i64 1
+  %tmp6732 = getelementptr inbounds float* %tmp6731, i64 1
+  %tmp6733 = getelementptr inbounds float* %tmp6732, i64 1
+  %tmp6734 = getelementptr inbounds float* %tmp6733, i64 1
+  %tmp6735 = getelementptr inbounds float* %tmp6734, i64 1
+  %tmp6736 = getelementptr inbounds float* %tmp6735, i64 1
+  %tmp6737 = getelementptr inbounds float* %tmp6736, i64 1
+  %tmp6738 = getelementptr inbounds float* %tmp6737, i64 1
+  %tmp6739 = getelementptr inbounds float* %tmp6738, i64 1
+  %tmp6740 = getelementptr inbounds float* %tmp6739, i64 1
+  %tmp6741 = getelementptr inbounds float* %tmp6740, i64 1
+  %tmp6742 = getelementptr inbounds float* %tmp6741, i64 1
+  %tmp6743 = getelementptr inbounds float* %tmp6742, i64 1
+  %tmp6744 = getelementptr inbounds float* %tmp6743, i64 1
+  %tmp6745 = getelementptr inbounds float* %tmp6744, i64 1
+  %tmp6746 = getelementptr inbounds float* %tmp6745, i64 1
+  %tmp6747 = getelementptr inbounds float* %tmp6746, i64 1
+  %tmp6748 = getelementptr inbounds float* %tmp6747, i64 1
+  %tmp6749 = getelementptr inbounds float* %tmp6748, i64 1
+  %tmp6750 = getelementptr inbounds float* %tmp6749, i64 1
+  %tmp6751 = getelementptr inbounds float* %tmp6750, i64 1
+  %tmp6752 = getelementptr inbounds float* %tmp6751, i64 1
+  %tmp6753 = getelementptr inbounds float* %tmp6752, i64 1
+  %tmp6754 = getelementptr inbounds float* %tmp6753, i64 1
+  %tmp6755 = getelementptr inbounds float* %tmp6754, i64 1
+  %tmp6756 = getelementptr inbounds float* %tmp6755, i64 1
+  %tmp6757 = getelementptr inbounds float* %tmp6756, i64 1
+  %tmp6758 = getelementptr inbounds float* %tmp6757, i64 1
+  %tmp6759 = getelementptr inbounds float* %tmp6758, i64 1
+  %tmp6760 = getelementptr inbounds float* %tmp6759, i64 1
+  %tmp6761 = getelementptr inbounds float* %tmp6760, i64 1
+  %tmp6762 = getelementptr inbounds float* %tmp6761, i64 1
+  %tmp6763 = getelementptr inbounds float* %tmp6762, i64 1
+  %tmp6764 = getelementptr inbounds float* %tmp6763, i64 1
+  %tmp6765 = getelementptr inbounds float* %tmp6764, i64 1
+  %tmp6766 = getelementptr inbounds float* %tmp6765, i64 1
+  %tmp6767 = getelementptr inbounds float* %tmp6766, i64 1
+  %tmp6768 = getelementptr inbounds float* %tmp6767, i64 1
+  %tmp6769 = getelementptr inbounds float* %tmp6768, i64 1
+  %tmp6770 = getelementptr inbounds float* %tmp6769, i64 1
+  %tmp6771 = getelementptr inbounds float* %tmp6770, i64 1
+  %tmp6772 = getelementptr inbounds float* %tmp6771, i64 1
+  %tmp6773 = getelementptr inbounds float* %tmp6772, i64 1
+  %tmp6774 = getelementptr inbounds float* %tmp6773, i64 1
+  %tmp6775 = getelementptr inbounds float* %tmp6774, i64 1
+  %tmp6776 = getelementptr inbounds float* %tmp6775, i64 1
+  %tmp6777 = getelementptr inbounds float* %tmp6776, i64 1
+  %tmp6778 = getelementptr inbounds float* %tmp6777, i64 1
+  %tmp6779 = getelementptr inbounds float* %tmp6778, i64 1
+  %tmp6780 = getelementptr inbounds float* %tmp6779, i64 1
+  %tmp6781 = getelementptr inbounds float* %tmp6780, i64 1
+  %tmp6782 = getelementptr inbounds float* %tmp6781, i64 1
+  %tmp6783 = getelementptr inbounds float* %tmp6782, i64 1
+  %tmp6784 = getelementptr inbounds float* %tmp6783, i64 1
+  %tmp6785 = getelementptr inbounds float* %tmp6784, i64 1
+  %tmp6786 = getelementptr inbounds float* %tmp6785, i64 1
+  %tmp6787 = getelementptr inbounds float* %tmp6786, i64 1
+  %tmp6788 = getelementptr inbounds float* %tmp6787, i64 1
+  %tmp6789 = getelementptr inbounds float* %tmp6788, i64 1
+  %tmp6790 = getelementptr inbounds float* %tmp6789, i64 1
+  %tmp6791 = getelementptr inbounds float* %tmp6790, i64 1
+  %tmp6792 = getelementptr inbounds float* %tmp6791, i64 1
+  %tmp6793 = getelementptr inbounds float* %tmp6792, i64 1
+  %tmp6794 = getelementptr inbounds float* %tmp6793, i64 1
+  %tmp6795 = getelementptr inbounds float* %tmp6794, i64 1
+  %tmp6796 = getelementptr inbounds float* %tmp6795, i64 1
+  %tmp6797 = getelementptr inbounds float* %tmp6796, i64 1
+  %tmp6798 = getelementptr inbounds float* %tmp6797, i64 1
+  %tmp6799 = getelementptr inbounds float* %tmp6798, i64 1
+  %tmp6800 = getelementptr inbounds float* %tmp6799, i64 1
+  %tmp6801 = getelementptr inbounds float* %tmp6800, i64 1
+  %tmp6802 = getelementptr inbounds float* %tmp6801, i64 1
+  %tmp6803 = getelementptr inbounds float* %tmp6802, i64 1
+  %tmp6804 = getelementptr inbounds float* %tmp6803, i64 1
+  %tmp6805 = getelementptr inbounds float* %tmp6804, i64 1
+  %tmp6806 = getelementptr inbounds float* %tmp6805, i64 1
+  %tmp6807 = getelementptr inbounds float* %tmp6806, i64 1
+  %tmp6808 = getelementptr inbounds float* %tmp6807, i64 1
+  %tmp6809 = getelementptr inbounds float* %tmp6808, i64 1
+  %tmp6810 = getelementptr inbounds float* %tmp6809, i64 1
+  %tmp6811 = getelementptr inbounds float* %tmp6810, i64 1
+  %tmp6812 = getelementptr inbounds float* %tmp6811, i64 1
+  %tmp6813 = getelementptr inbounds float* %tmp6812, i64 1
+  %tmp6814 = getelementptr inbounds float* %tmp6813, i64 1
+  %tmp6815 = getelementptr inbounds float* %tmp6814, i64 1
+  %tmp6816 = getelementptr inbounds float* %tmp6815, i64 1
+  %tmp6817 = getelementptr inbounds float* %tmp6816, i64 1
+  %tmp6818 = getelementptr inbounds float* %tmp6817, i64 1
+  %tmp6819 = getelementptr inbounds float* %tmp6818, i64 1
+  %tmp6820 = getelementptr inbounds float* %tmp6819, i64 1
+  %tmp6821 = getelementptr inbounds float* %tmp6820, i64 1
+  %tmp6822 = getelementptr inbounds float* %tmp6821, i64 1
+  %tmp6823 = getelementptr inbounds float* %tmp6822, i64 1
+  %tmp6824 = getelementptr inbounds float* %tmp6823, i64 1
+  %tmp6825 = getelementptr inbounds float* %tmp6824, i64 1
+  %tmp6826 = getelementptr inbounds float* %tmp6825, i64 1
+  %tmp6827 = getelementptr inbounds float* %tmp6826, i64 1
+  %tmp6828 = getelementptr inbounds float* %tmp6827, i64 1
+  %tmp6829 = getelementptr inbounds float* %tmp6828, i64 1
+  %tmp6830 = getelementptr inbounds float* %tmp6829, i64 1
+  %tmp6831 = getelementptr inbounds float* %tmp6830, i64 1
+  %tmp6832 = getelementptr inbounds float* %tmp6831, i64 1
+  %tmp6833 = getelementptr inbounds float* %tmp6832, i64 1
+  %tmp6834 = getelementptr inbounds float* %tmp6833, i64 1
+  %tmp6835 = getelementptr inbounds float* %tmp6834, i64 1
+  %tmp6836 = getelementptr inbounds float* %tmp6835, i64 1
+  %tmp6837 = getelementptr inbounds float* %tmp6836, i64 1
+  %tmp6838 = getelementptr inbounds float* %tmp6837, i64 1
+  %tmp6839 = getelementptr inbounds float* %tmp6838, i64 1
+  %tmp6840 = getelementptr inbounds float* %tmp6839, i64 1
+  %tmp6841 = getelementptr inbounds float* %tmp6840, i64 1
+  %tmp6842 = getelementptr inbounds float* %tmp6841, i64 1
+  %tmp6843 = getelementptr inbounds float* %tmp6842, i64 1
+  %tmp6844 = getelementptr inbounds float* %tmp6843, i64 1
+  %tmp6845 = getelementptr inbounds float* %tmp6844, i64 1
+  %tmp6846 = getelementptr inbounds float* %tmp6845, i64 1
+  %tmp6847 = getelementptr inbounds float* %tmp6846, i64 1
+  %tmp6848 = getelementptr inbounds float* %tmp6847, i64 1
+  %tmp6849 = getelementptr inbounds float* %tmp6848, i64 1
+  %tmp6850 = getelementptr inbounds float* %tmp6849, i64 1
+  %tmp6851 = getelementptr inbounds float* %tmp6850, i64 1
+  %tmp6852 = getelementptr inbounds float* %tmp6851, i64 1
+  %tmp6853 = getelementptr inbounds float* %tmp6852, i64 1
+  %tmp6854 = getelementptr inbounds float* %tmp6853, i64 1
+  %tmp6855 = getelementptr inbounds float* %tmp6854, i64 1
+  %tmp6856 = getelementptr inbounds float* %tmp6855, i64 1
+  %tmp6857 = getelementptr inbounds float* %tmp6856, i64 1
+  %tmp6858 = getelementptr inbounds float* %tmp6857, i64 1
+  %tmp6859 = getelementptr inbounds float* %tmp6858, i64 1
+  %tmp6860 = getelementptr inbounds float* %tmp6859, i64 1
+  %tmp6861 = getelementptr inbounds float* %tmp6860, i64 1
+  %tmp6862 = getelementptr inbounds float* %tmp6861, i64 1
+  %tmp6863 = getelementptr inbounds float* %tmp6862, i64 1
+  %tmp6864 = getelementptr inbounds float* %tmp6863, i64 1
+  %tmp6865 = getelementptr inbounds float* %tmp6864, i64 1
+  %tmp6866 = getelementptr inbounds float* %tmp6865, i64 1
+  %tmp6867 = getelementptr inbounds float* %tmp6866, i64 1
+  %tmp6868 = getelementptr inbounds float* %tmp6867, i64 1
+  %tmp6869 = getelementptr inbounds float* %tmp6868, i64 1
+  %tmp6870 = getelementptr inbounds float* %tmp6869, i64 1
+  %tmp6871 = getelementptr inbounds float* %tmp6870, i64 1
+  %tmp6872 = getelementptr inbounds float* %tmp6871, i64 1
+  %tmp6873 = getelementptr inbounds float* %tmp6872, i64 1
+  %tmp6874 = getelementptr inbounds float* %tmp6873, i64 1
+  %tmp6875 = getelementptr inbounds float* %tmp6874, i64 1
+  %tmp6876 = getelementptr inbounds float* %tmp6875, i64 1
+  %tmp6877 = getelementptr inbounds float* %tmp6876, i64 1
+  %tmp6878 = getelementptr inbounds float* %tmp6877, i64 1
+  %tmp6879 = getelementptr inbounds float* %tmp6878, i64 1
+  %tmp6880 = getelementptr inbounds float* %tmp6879, i64 1
+  %tmp6881 = getelementptr inbounds float* %tmp6880, i64 1
+  %tmp6882 = getelementptr inbounds float* %tmp6881, i64 1
+  %tmp6883 = getelementptr inbounds float* %tmp6882, i64 1
+  %tmp6884 = getelementptr inbounds float* %tmp6883, i64 1
+  %tmp6885 = getelementptr inbounds float* %tmp6884, i64 1
+  %tmp6886 = getelementptr inbounds float* %tmp6885, i64 1
+  %tmp6887 = getelementptr inbounds float* %tmp6886, i64 1
+  %tmp6888 = getelementptr inbounds float* %tmp6887, i64 1
+  %tmp6889 = getelementptr inbounds float* %tmp6888, i64 1
+  %tmp6890 = getelementptr inbounds float* %tmp6889, i64 1
+  %tmp6891 = getelementptr inbounds float* %tmp6890, i64 1
+  %tmp6892 = getelementptr inbounds float* %tmp6891, i64 1
+  %tmp6893 = getelementptr inbounds float* %tmp6892, i64 1
+  %tmp6894 = getelementptr inbounds float* %tmp6893, i64 1
+  %tmp6895 = getelementptr inbounds float* %tmp6894, i64 1
+  %tmp6896 = getelementptr inbounds float* %tmp6895, i64 1
+  %tmp6897 = getelementptr inbounds float* %tmp6896, i64 1
+  %tmp6898 = getelementptr inbounds float* %tmp6897, i64 1
+  %tmp6899 = getelementptr inbounds float* %tmp6898, i64 1
+  %tmp6900 = getelementptr inbounds float* %tmp6899, i64 1
+  %tmp6901 = getelementptr inbounds float* %tmp6900, i64 1
+  %tmp6902 = getelementptr inbounds float* %tmp6901, i64 1
+  %tmp6903 = getelementptr inbounds float* %tmp6902, i64 1
+  %tmp6904 = getelementptr inbounds float* %tmp6903, i64 1
+  %tmp6905 = getelementptr inbounds float* %tmp6904, i64 1
+  %tmp6906 = getelementptr inbounds float* %tmp6905, i64 1
+  %tmp6907 = getelementptr inbounds float* %tmp6906, i64 1
+  %tmp6908 = getelementptr inbounds float* %tmp6907, i64 1
+  %tmp6909 = getelementptr inbounds float* %tmp6908, i64 1
+  %tmp6910 = getelementptr inbounds float* %tmp6909, i64 1
+  %tmp6911 = getelementptr inbounds float* %tmp6910, i64 1
+  %tmp6912 = getelementptr inbounds float* %tmp6911, i64 1
+  %tmp6913 = getelementptr inbounds float* %tmp6912, i64 1
+  %tmp6914 = getelementptr inbounds float* %tmp6913, i64 1
+  %tmp6915 = getelementptr inbounds float* %tmp6914, i64 1
+  %tmp6916 = getelementptr inbounds float* %tmp6915, i64 1
+  %tmp6917 = getelementptr inbounds float* %tmp6916, i64 1
+  %tmp6918 = getelementptr inbounds float* %tmp6917, i64 1
+  %tmp6919 = getelementptr inbounds float* %tmp6918, i64 1
+  %tmp6920 = getelementptr inbounds float* %tmp6919, i64 1
+  %tmp6921 = getelementptr inbounds float* %tmp6920, i64 1
+  %tmp6922 = getelementptr inbounds float* %tmp6921, i64 1
+  %tmp6923 = getelementptr inbounds float* %tmp6922, i64 1
+  %tmp6924 = getelementptr inbounds float* %tmp6923, i64 1
+  %tmp6925 = getelementptr inbounds float* %tmp6924, i64 1
+  %tmp6926 = getelementptr inbounds float* %tmp6925, i64 1
+  %tmp6927 = getelementptr inbounds float* %tmp6926, i64 1
+  %tmp6928 = getelementptr inbounds float* %tmp6927, i64 1
+  %tmp6929 = getelementptr inbounds float* %tmp6928, i64 1
+  %tmp6930 = getelementptr inbounds float* %tmp6929, i64 1
+  %tmp6931 = getelementptr inbounds float* %tmp6930, i64 1
+  %tmp6932 = getelementptr inbounds float* %tmp6931, i64 1
+  %tmp6933 = getelementptr inbounds float* %tmp6932, i64 1
+  %tmp6934 = getelementptr inbounds float* %tmp6933, i64 1
+  %tmp6935 = getelementptr inbounds float* %tmp6934, i64 1
+  %tmp6936 = getelementptr inbounds float* %tmp6935, i64 1
+  %tmp6937 = getelementptr inbounds float* %tmp6936, i64 1
+  %tmp6938 = getelementptr inbounds float* %tmp6937, i64 1
+  %tmp6939 = getelementptr inbounds float* %tmp6938, i64 1
+  %tmp6940 = getelementptr inbounds float* %tmp6939, i64 1
+  %tmp6941 = getelementptr inbounds float* %tmp6940, i64 1
+  %tmp6942 = getelementptr inbounds float* %tmp6941, i64 1
+  %tmp6943 = getelementptr inbounds float* %tmp6942, i64 1
+  %tmp6944 = getelementptr inbounds float* %tmp6943, i64 1
+  %tmp6945 = getelementptr inbounds float* %tmp6944, i64 1
+  %tmp6946 = getelementptr inbounds float* %tmp6945, i64 1
+  %tmp6947 = getelementptr inbounds float* %tmp6946, i64 1
+  %tmp6948 = getelementptr inbounds float* %tmp6947, i64 1
+  %tmp6949 = getelementptr inbounds float* %tmp6948, i64 1
+  %tmp6950 = getelementptr inbounds float* %tmp6949, i64 1
+  %tmp6951 = getelementptr inbounds float* %tmp6950, i64 1
+  %tmp6952 = getelementptr inbounds float* %tmp6951, i64 1
+  %tmp6953 = getelementptr inbounds float* %tmp6952, i64 1
+  %tmp6954 = getelementptr inbounds float* %tmp6953, i64 1
+  %tmp6955 = getelementptr inbounds float* %tmp6954, i64 1
+  %tmp6956 = getelementptr inbounds float* %tmp6955, i64 1
+  %tmp6957 = getelementptr inbounds float* %tmp6956, i64 1
+  %tmp6958 = getelementptr inbounds float* %tmp6957, i64 1
+  %tmp6959 = getelementptr inbounds float* %tmp6958, i64 1
+  %tmp6960 = getelementptr inbounds float* %tmp6959, i64 1
+  %tmp6961 = getelementptr inbounds float* %tmp6960, i64 1
+  %tmp6962 = getelementptr inbounds float* %tmp6961, i64 1
+  %tmp6963 = getelementptr inbounds float* %tmp6962, i64 1
+  %tmp6964 = getelementptr inbounds float* %tmp6963, i64 1
+  %tmp6965 = getelementptr inbounds float* %tmp6964, i64 1
+  %tmp6966 = getelementptr inbounds float* %tmp6965, i64 1
+  %tmp6967 = getelementptr inbounds float* %tmp6966, i64 1
+  %tmp6968 = getelementptr inbounds float* %tmp6967, i64 1
+  %tmp6969 = getelementptr inbounds float* %tmp6968, i64 1
+  %tmp6970 = getelementptr inbounds float* %tmp6969, i64 1
+  %tmp6971 = getelementptr inbounds float* %tmp6970, i64 1
+  %tmp6972 = getelementptr inbounds float* %tmp6971, i64 1
+  %tmp6973 = getelementptr inbounds float* %tmp6972, i64 1
+  %tmp6974 = getelementptr inbounds float* %tmp6973, i64 1
+  %tmp6975 = getelementptr inbounds float* %tmp6974, i64 1
+  %tmp6976 = getelementptr inbounds float* %tmp6975, i64 1
+  %tmp6977 = getelementptr inbounds float* %tmp6976, i64 1
+  %tmp6978 = getelementptr inbounds float* %tmp6977, i64 1
+  %tmp6979 = getelementptr inbounds float* %tmp6978, i64 1
+  %tmp6980 = getelementptr inbounds float* %tmp6979, i64 1
+  %tmp6981 = getelementptr inbounds float* %tmp6980, i64 1
+  %tmp6982 = getelementptr inbounds float* %tmp6981, i64 1
+  %tmp6983 = getelementptr inbounds float* %tmp6982, i64 1
+  %tmp6984 = getelementptr inbounds float* %tmp6983, i64 1
+  %tmp6985 = getelementptr inbounds float* %tmp6984, i64 1
+  %tmp6986 = getelementptr inbounds float* %tmp6985, i64 1
+  %tmp6987 = getelementptr inbounds float* %tmp6986, i64 1
+  %tmp6988 = getelementptr inbounds float* %tmp6987, i64 1
+  %tmp6989 = getelementptr inbounds float* %tmp6988, i64 1
+  %tmp6990 = getelementptr inbounds float* %tmp6989, i64 1
+  %tmp6991 = getelementptr inbounds float* %tmp6990, i64 1
+  %tmp6992 = getelementptr inbounds float* %tmp6991, i64 1
+  %tmp6993 = getelementptr inbounds float* %tmp6992, i64 1
+  %tmp6994 = getelementptr inbounds float* %tmp6993, i64 1
+  %tmp6995 = getelementptr inbounds float* %tmp6994, i64 1
+  %tmp6996 = getelementptr inbounds float* %tmp6995, i64 1
+  %tmp6997 = getelementptr inbounds float* %tmp6996, i64 1
+  %tmp6998 = getelementptr inbounds float* %tmp6997, i64 1
+  %tmp6999 = getelementptr inbounds float* %tmp6998, i64 1
+  %tmp7000 = getelementptr inbounds float* %tmp6999, i64 1
+  %tmp7001 = getelementptr inbounds float* %tmp7000, i64 1
+  %tmp7002 = getelementptr inbounds float* %tmp7001, i64 1
+  %tmp7003 = getelementptr inbounds float* %tmp7002, i64 1
+  %tmp7004 = getelementptr inbounds float* %tmp7003, i64 1
+  %tmp7005 = getelementptr inbounds float* %tmp7004, i64 1
+  %tmp7006 = getelementptr inbounds float* %tmp7005, i64 1
+  %tmp7007 = getelementptr inbounds float* %tmp7006, i64 1
+  %tmp7008 = getelementptr inbounds float* %tmp7007, i64 1
+  %tmp7009 = getelementptr inbounds float* %tmp7008, i64 1
+  %tmp7010 = getelementptr inbounds float* %tmp7009, i64 1
+  %tmp7011 = getelementptr inbounds float* %tmp7010, i64 1
+  %tmp7012 = getelementptr inbounds float* %tmp7011, i64 1
+  %tmp7013 = getelementptr inbounds float* %tmp7012, i64 1
+  %tmp7014 = getelementptr inbounds float* %tmp7013, i64 1
+  %tmp7015 = getelementptr inbounds float* %tmp7014, i64 1
+  %tmp7016 = getelementptr inbounds float* %tmp7015, i64 1
+  %tmp7017 = getelementptr inbounds float* %tmp7016, i64 1
+  %tmp7018 = getelementptr inbounds float* %tmp7017, i64 1
+  %tmp7019 = getelementptr inbounds float* %tmp7018, i64 1
+  %tmp7020 = getelementptr inbounds float* %tmp7019, i64 1
+  %tmp7021 = getelementptr inbounds float* %tmp7020, i64 1
+  %tmp7022 = getelementptr inbounds float* %tmp7021, i64 1
+  %tmp7023 = getelementptr inbounds float* %tmp7022, i64 1
+  %tmp7024 = getelementptr inbounds float* %tmp7023, i64 1
+  %tmp7025 = getelementptr inbounds float* %tmp7024, i64 1
+  %tmp7026 = getelementptr inbounds float* %tmp7025, i64 1
+  %tmp7027 = getelementptr inbounds float* %tmp7026, i64 1
+  %tmp7028 = getelementptr inbounds float* %tmp7027, i64 1
+  %tmp7029 = getelementptr inbounds float* %tmp7028, i64 1
+  %tmp7030 = getelementptr inbounds float* %tmp7029, i64 1
+  %tmp7031 = getelementptr inbounds float* %tmp7030, i64 1
+  %tmp7032 = getelementptr inbounds float* %tmp7031, i64 1
+  %tmp7033 = getelementptr inbounds float* %tmp7032, i64 1
+  %tmp7034 = getelementptr inbounds float* %tmp7033, i64 1
+  %tmp7035 = getelementptr inbounds float* %tmp7034, i64 1
+  %tmp7036 = getelementptr inbounds float* %tmp7035, i64 1
+  %tmp7037 = getelementptr inbounds float* %tmp7036, i64 1
+  %tmp7038 = getelementptr inbounds float* %tmp7037, i64 1
+  %tmp7039 = getelementptr inbounds float* %tmp7038, i64 1
+  %tmp7040 = getelementptr inbounds float* %tmp7039, i64 1
+  %tmp7041 = getelementptr inbounds float* %tmp7040, i64 1
+  %tmp7042 = getelementptr inbounds float* %tmp7041, i64 1
+  %tmp7043 = getelementptr inbounds float* %tmp7042, i64 1
+  %tmp7044 = getelementptr inbounds float* %tmp7043, i64 1
+  %tmp7045 = getelementptr inbounds float* %tmp7044, i64 1
+  %tmp7046 = getelementptr inbounds float* %tmp7045, i64 1
+  %tmp7047 = getelementptr inbounds float* %tmp7046, i64 1
+  %tmp7048 = getelementptr inbounds float* %tmp7047, i64 1
+  %tmp7049 = getelementptr inbounds float* %tmp7048, i64 1
+  %tmp7050 = getelementptr inbounds float* %tmp7049, i64 1
+  %tmp7051 = getelementptr inbounds float* %tmp7050, i64 1
+  %tmp7052 = getelementptr inbounds float* %tmp7051, i64 1
+  %tmp7053 = getelementptr inbounds float* %tmp7052, i64 1
+  %tmp7054 = getelementptr inbounds float* %tmp7053, i64 1
+  %tmp7055 = getelementptr inbounds float* %tmp7054, i64 1
+  %tmp7056 = getelementptr inbounds float* %tmp7055, i64 1
+  %tmp7057 = getelementptr inbounds float* %tmp7056, i64 1
+  %tmp7058 = getelementptr inbounds float* %tmp7057, i64 1
+  %tmp7059 = getelementptr inbounds float* %tmp7058, i64 1
+  %tmp7060 = getelementptr inbounds float* %tmp7059, i64 1
+  %tmp7061 = getelementptr inbounds float* %tmp7060, i64 1
+  %tmp7062 = getelementptr inbounds float* %tmp7061, i64 1
+  %tmp7063 = getelementptr inbounds float* %tmp7062, i64 1
+  %tmp7064 = getelementptr inbounds float* %tmp7063, i64 1
+  %tmp7065 = getelementptr inbounds float* %tmp7064, i64 1
+  %tmp7066 = getelementptr inbounds float* %tmp7065, i64 1
+  %tmp7067 = getelementptr inbounds float* %tmp7066, i64 1
+  %tmp7068 = getelementptr inbounds float* %tmp7067, i64 1
+  %tmp7069 = getelementptr inbounds float* %tmp7068, i64 1
+  %tmp7070 = getelementptr inbounds float* %tmp7069, i64 1
+  %tmp7071 = getelementptr inbounds float* %tmp7070, i64 1
+  %tmp7072 = getelementptr inbounds float* %tmp7071, i64 1
+  %tmp7073 = getelementptr inbounds float* %tmp7072, i64 1
+  %tmp7074 = getelementptr inbounds float* %tmp7073, i64 1
+  %tmp7075 = getelementptr inbounds float* %tmp7074, i64 1
+  %tmp7076 = getelementptr inbounds float* %tmp7075, i64 1
+  %tmp7077 = getelementptr inbounds float* %tmp7076, i64 1
+  %tmp7078 = getelementptr inbounds float* %tmp7077, i64 1
+  %tmp7079 = getelementptr inbounds float* %tmp7078, i64 1
+  %tmp7080 = getelementptr inbounds float* %tmp7079, i64 1
+  %tmp7081 = getelementptr inbounds float* %tmp7080, i64 1
+  %tmp7082 = getelementptr inbounds float* %tmp7081, i64 1
+  %tmp7083 = getelementptr inbounds float* %tmp7082, i64 1
+  %tmp7084 = getelementptr inbounds float* %tmp7083, i64 1
+  %tmp7085 = getelementptr inbounds float* %tmp7084, i64 1
+  %tmp7086 = getelementptr inbounds float* %tmp7085, i64 1
+  %tmp7087 = getelementptr inbounds float* %tmp7086, i64 1
+  %tmp7088 = getelementptr inbounds float* %tmp7087, i64 1
+  %tmp7089 = getelementptr inbounds float* %tmp7088, i64 1
+  %tmp7090 = getelementptr inbounds float* %tmp7089, i64 1
+  %tmp7091 = getelementptr inbounds float* %tmp7090, i64 1
+  %tmp7092 = getelementptr inbounds float* %tmp7091, i64 1
+  %tmp7093 = getelementptr inbounds float* %tmp7092, i64 1
+  %tmp7094 = getelementptr inbounds float* %tmp7093, i64 1
+  %tmp7095 = getelementptr inbounds float* %tmp7094, i64 1
+  %tmp7096 = getelementptr inbounds float* %tmp7095, i64 1
+  %tmp7097 = getelementptr inbounds float* %tmp7096, i64 1
+  %tmp7098 = getelementptr inbounds float* %tmp7097, i64 1
+  %tmp7099 = getelementptr inbounds float* %tmp7098, i64 1
+  %tmp7100 = getelementptr inbounds float* %tmp7099, i64 1
+  %tmp7101 = getelementptr inbounds float* %tmp7100, i64 1
+  %tmp7102 = getelementptr inbounds float* %tmp7101, i64 1
+  %tmp7103 = getelementptr inbounds float* %tmp7102, i64 1
+  %tmp7104 = getelementptr inbounds float* %tmp7103, i64 1
+  %tmp7105 = getelementptr inbounds float* %tmp7104, i64 1
+  %tmp7106 = getelementptr inbounds float* %tmp7105, i64 1
+  %tmp7107 = getelementptr inbounds float* %tmp7106, i64 1
+  %tmp7108 = getelementptr inbounds float* %tmp7107, i64 1
+  %tmp7109 = getelementptr inbounds float* %tmp7108, i64 1
+  %tmp7110 = getelementptr inbounds float* %tmp7109, i64 1
+  %tmp7111 = getelementptr inbounds float* %tmp7110, i64 1
+  %tmp7112 = getelementptr inbounds float* %tmp7111, i64 1
+  %tmp7113 = getelementptr inbounds float* %tmp7112, i64 1
+  %tmp7114 = getelementptr inbounds float* %tmp7113, i64 1
+  %tmp7115 = getelementptr inbounds float* %tmp7114, i64 1
+  %tmp7116 = getelementptr inbounds float* %tmp7115, i64 1
+  %tmp7117 = getelementptr inbounds float* %tmp7116, i64 1
+  %tmp7118 = getelementptr inbounds float* %tmp7117, i64 1
+  %tmp7119 = getelementptr inbounds float* %tmp7118, i64 1
+  %tmp7120 = getelementptr inbounds float* %tmp7119, i64 1
+  %tmp7121 = getelementptr inbounds float* %tmp7120, i64 1
+  %tmp7122 = getelementptr inbounds float* %tmp7121, i64 1
+  %tmp7123 = getelementptr inbounds float* %tmp7122, i64 1
+  %tmp7124 = getelementptr inbounds float* %tmp7123, i64 1
+  %tmp7125 = getelementptr inbounds float* %tmp7124, i64 1
+  %tmp7126 = getelementptr inbounds float* %tmp7125, i64 1
+  %tmp7127 = getelementptr inbounds float* %tmp7126, i64 1
+  %tmp7128 = getelementptr inbounds float* %tmp7127, i64 1
+  %tmp7129 = getelementptr inbounds float* %tmp7128, i64 1
+  %tmp7130 = getelementptr inbounds float* %tmp7129, i64 1
+  %tmp7131 = getelementptr inbounds float* %tmp7130, i64 1
+  %tmp7132 = getelementptr inbounds float* %tmp7131, i64 1
+  %tmp7133 = getelementptr inbounds float* %tmp7132, i64 1
+  %tmp7134 = getelementptr inbounds float* %tmp7133, i64 1
+  %tmp7135 = getelementptr inbounds float* %tmp7134, i64 1
+  %tmp7136 = getelementptr inbounds float* %tmp7135, i64 1
+  %tmp7137 = getelementptr inbounds float* %tmp7136, i64 1
+  %tmp7138 = getelementptr inbounds float* %tmp7137, i64 1
+  %tmp7139 = getelementptr inbounds float* %tmp7138, i64 1
+  %tmp7140 = getelementptr inbounds float* %tmp7139, i64 1
+  %tmp7141 = getelementptr inbounds float* %tmp7140, i64 1
+  %tmp7142 = getelementptr inbounds float* %tmp7141, i64 1
+  %tmp7143 = getelementptr inbounds float* %tmp7142, i64 1
+  %tmp7144 = getelementptr inbounds float* %tmp7143, i64 1
+  %tmp7145 = getelementptr inbounds float* %tmp7144, i64 1
+  %tmp7146 = getelementptr inbounds float* %tmp7145, i64 1
+  %tmp7147 = getelementptr inbounds float* %tmp7146, i64 1
+  %tmp7148 = getelementptr inbounds float* %tmp7147, i64 1
+  %tmp7149 = getelementptr inbounds float* %tmp7148, i64 1
+  %tmp7150 = getelementptr inbounds float* %tmp7149, i64 1
+  %tmp7151 = getelementptr inbounds float* %tmp7150, i64 1
+  %tmp7152 = getelementptr inbounds float* %tmp7151, i64 1
+  %tmp7153 = getelementptr inbounds float* %tmp7152, i64 1
+  %tmp7154 = getelementptr inbounds float* %tmp7153, i64 1
+  %tmp7155 = getelementptr inbounds float* %tmp7154, i64 1
+  %tmp7156 = getelementptr inbounds float* %tmp7155, i64 1
+  %tmp7157 = getelementptr inbounds float* %tmp7156, i64 1
+  %tmp7158 = getelementptr inbounds float* %tmp7157, i64 1
+  %tmp7159 = getelementptr inbounds float* %tmp7158, i64 1
+  %tmp7160 = getelementptr inbounds float* %tmp7159, i64 1
+  %tmp7161 = getelementptr inbounds float* %tmp7160, i64 1
+  %tmp7162 = getelementptr inbounds float* %tmp7161, i64 1
+  %tmp7163 = getelementptr inbounds float* %tmp7162, i64 1
+  %tmp7164 = getelementptr inbounds float* %tmp7163, i64 1
+  %tmp7165 = getelementptr inbounds float* %tmp7164, i64 1
+  %tmp7166 = getelementptr inbounds float* %tmp7165, i64 1
+  %tmp7167 = getelementptr inbounds float* %tmp7166, i64 1
+  %tmp7168 = getelementptr inbounds float* %tmp7167, i64 1
+  %tmp7169 = getelementptr inbounds float* %tmp7168, i64 1
+  %tmp7170 = getelementptr inbounds float* %tmp7169, i64 1
+  %tmp7171 = getelementptr inbounds float* %tmp7170, i64 1
+  %tmp7172 = getelementptr inbounds float* %tmp7171, i64 1
+  %tmp7173 = getelementptr inbounds float* %tmp7172, i64 1
+  %tmp7174 = getelementptr inbounds float* %tmp7173, i64 1
+  %tmp7175 = getelementptr inbounds float* %tmp7174, i64 1
+  %tmp7176 = getelementptr inbounds float* %tmp7175, i64 1
+  %tmp7177 = getelementptr inbounds float* %tmp7176, i64 1
+  %tmp7178 = getelementptr inbounds float* %tmp7177, i64 1
+  %tmp7179 = getelementptr inbounds float* %tmp7178, i64 1
+  %tmp7180 = getelementptr inbounds float* %tmp7179, i64 1
+  %tmp7181 = getelementptr inbounds float* %tmp7180, i64 1
+  %tmp7182 = getelementptr inbounds float* %tmp7181, i64 1
+  %tmp7183 = getelementptr inbounds float* %tmp7182, i64 1
+  %tmp7184 = getelementptr inbounds float* %tmp7183, i64 1
+  %tmp7185 = getelementptr inbounds float* %tmp7184, i64 1
+  %tmp7186 = getelementptr inbounds float* %tmp7185, i64 1
+  %tmp7187 = getelementptr inbounds float* %tmp7186, i64 1
+  %tmp7188 = getelementptr inbounds float* %tmp7187, i64 1
+  %tmp7189 = getelementptr inbounds float* %tmp7188, i64 1
+  %tmp7190 = getelementptr inbounds float* %tmp7189, i64 1
+  %tmp7191 = getelementptr inbounds float* %tmp7190, i64 1
+  %tmp7192 = getelementptr inbounds float* %tmp7191, i64 1
+  %tmp7193 = getelementptr inbounds float* %tmp7192, i64 1
+  %tmp7194 = getelementptr inbounds float* %tmp7193, i64 1
+  %tmp7195 = getelementptr inbounds float* %tmp7194, i64 1
+  %tmp7196 = getelementptr inbounds float* %tmp7195, i64 1
+  %tmp7197 = getelementptr inbounds float* %tmp7196, i64 1
+  %tmp7198 = getelementptr inbounds float* %tmp7197, i64 1
+  %tmp7199 = getelementptr inbounds float* %tmp7198, i64 1
+  %tmp7200 = getelementptr inbounds float* %tmp7199, i64 1
+  %tmp7201 = getelementptr inbounds float* %tmp7200, i64 1
+  %tmp7202 = getelementptr inbounds float* %tmp7201, i64 1
+  %tmp7203 = getelementptr inbounds float* %tmp7202, i64 1
+  %tmp7204 = getelementptr inbounds float* %tmp7203, i64 1
+  %tmp7205 = getelementptr inbounds float* %tmp7204, i64 1
+  %tmp7206 = getelementptr inbounds float* %tmp7205, i64 1
+  %tmp7207 = getelementptr inbounds float* %tmp7206, i64 1
+  %tmp7208 = getelementptr inbounds float* %tmp7207, i64 1
+  %tmp7209 = getelementptr inbounds float* %tmp7208, i64 1
+  %tmp7210 = getelementptr inbounds float* %tmp7209, i64 1
+  %tmp7211 = getelementptr inbounds float* %tmp7210, i64 1
+  %tmp7212 = getelementptr inbounds float* %tmp7211, i64 1
+  %tmp7213 = getelementptr inbounds float* %tmp7212, i64 1
+  %tmp7214 = getelementptr inbounds float* %tmp7213, i64 1
+  %tmp7215 = getelementptr inbounds float* %tmp7214, i64 1
+  %tmp7216 = getelementptr inbounds float* %tmp7215, i64 1
+  %tmp7217 = getelementptr inbounds float* %tmp7216, i64 1
+  %tmp7218 = getelementptr inbounds float* %tmp7217, i64 1
+  %tmp7219 = getelementptr inbounds float* %tmp7218, i64 1
+  %tmp7220 = getelementptr inbounds float* %tmp7219, i64 1
+  %tmp7221 = getelementptr inbounds float* %tmp7220, i64 1
+  %tmp7222 = getelementptr inbounds float* %tmp7221, i64 1
+  %tmp7223 = getelementptr inbounds float* %tmp7222, i64 1
+  %tmp7224 = getelementptr inbounds float* %tmp7223, i64 1
+  %tmp7225 = getelementptr inbounds float* %tmp7224, i64 1
+  %tmp7226 = getelementptr inbounds float* %tmp7225, i64 1
+  %tmp7227 = getelementptr inbounds float* %tmp7226, i64 1
+  %tmp7228 = getelementptr inbounds float* %tmp7227, i64 1
+  %tmp7229 = getelementptr inbounds float* %tmp7228, i64 1
+  %tmp7230 = getelementptr inbounds float* %tmp7229, i64 1
+  %tmp7231 = getelementptr inbounds float* %tmp7230, i64 1
+  %tmp7232 = getelementptr inbounds float* %tmp7231, i64 1
+  %tmp7233 = getelementptr inbounds float* %tmp7232, i64 1
+  %tmp7234 = getelementptr inbounds float* %tmp7233, i64 1
+  %tmp7235 = getelementptr inbounds float* %tmp7234, i64 1
+  %tmp7236 = getelementptr inbounds float* %tmp7235, i64 1
+  %tmp7237 = getelementptr inbounds float* %tmp7236, i64 1
+  %tmp7238 = getelementptr inbounds float* %tmp7237, i64 1
+  %tmp7239 = getelementptr inbounds float* %tmp7238, i64 1
+  %tmp7240 = getelementptr inbounds float* %tmp7239, i64 1
+  %tmp7241 = getelementptr inbounds float* %tmp7240, i64 1
+  %tmp7242 = getelementptr inbounds float* %tmp7241, i64 1
+  %tmp7243 = getelementptr inbounds float* %tmp7242, i64 1
+  %tmp7244 = getelementptr inbounds float* %tmp7243, i64 1
+  %tmp7245 = getelementptr inbounds float* %tmp7244, i64 1
+  %tmp7246 = getelementptr inbounds float* %tmp7245, i64 1
+  %tmp7247 = getelementptr inbounds float* %tmp7246, i64 1
+  %tmp7248 = getelementptr inbounds float* %tmp7247, i64 1
+  %tmp7249 = getelementptr inbounds float* %tmp7248, i64 1
+  %tmp7250 = getelementptr inbounds float* %tmp7249, i64 1
+  %tmp7251 = getelementptr inbounds float* %tmp7250, i64 1
+  %tmp7252 = getelementptr inbounds float* %tmp7251, i64 1
+  %tmp7253 = getelementptr inbounds float* %tmp7252, i64 1
+  %tmp7254 = getelementptr inbounds float* %tmp7253, i64 1
+  %tmp7255 = getelementptr inbounds float* %tmp7254, i64 1
+  %tmp7256 = getelementptr inbounds float* %tmp7255, i64 1
+  %tmp7257 = getelementptr inbounds float* %tmp7256, i64 1
+  %tmp7258 = getelementptr inbounds float* %tmp7257, i64 1
+  %tmp7259 = getelementptr inbounds float* %tmp7258, i64 1
+  %tmp7260 = getelementptr inbounds float* %tmp7259, i64 1
+  %tmp7261 = getelementptr inbounds float* %tmp7260, i64 1
+  %tmp7262 = getelementptr inbounds float* %tmp7261, i64 1
+  %tmp7263 = getelementptr inbounds float* %tmp7262, i64 1
+  %tmp7264 = getelementptr inbounds float* %tmp7263, i64 1
+  %tmp7265 = getelementptr inbounds float* %tmp7264, i64 1
+  %tmp7266 = getelementptr inbounds float* %tmp7265, i64 1
+  %tmp7267 = getelementptr inbounds float* %tmp7266, i64 1
+  %tmp7268 = getelementptr inbounds float* %tmp7267, i64 1
+  %tmp7269 = getelementptr inbounds float* %tmp7268, i64 1
+  %tmp7270 = getelementptr inbounds float* %tmp7269, i64 1
+  %tmp7271 = getelementptr inbounds float* %tmp7270, i64 1
+  %tmp7272 = getelementptr inbounds float* %tmp7271, i64 1
+  %tmp7273 = getelementptr inbounds float* %tmp7272, i64 1
+  %tmp7274 = getelementptr inbounds float* %tmp7273, i64 1
+  %tmp7275 = getelementptr inbounds float* %tmp7274, i64 1
+  %tmp7276 = getelementptr inbounds float* %tmp7275, i64 1
+  %tmp7277 = getelementptr inbounds float* %tmp7276, i64 1
+  %tmp7278 = getelementptr inbounds float* %tmp7277, i64 1
+  %tmp7279 = getelementptr inbounds float* %tmp7278, i64 1
+  %tmp7280 = getelementptr inbounds float* %tmp7279, i64 1
+  %tmp7281 = getelementptr inbounds float* %tmp7280, i64 1
+  %tmp7282 = getelementptr inbounds float* %tmp7281, i64 1
+  %tmp7283 = getelementptr inbounds float* %tmp7282, i64 1
+  %tmp7284 = getelementptr inbounds float* %tmp7283, i64 1
+  %tmp7285 = getelementptr inbounds float* %tmp7284, i64 1
+  %tmp7286 = getelementptr inbounds float* %tmp7285, i64 1
+  %tmp7287 = getelementptr inbounds float* %tmp7286, i64 1
+  %tmp7288 = getelementptr inbounds float* %tmp7287, i64 1
+  %tmp7289 = getelementptr inbounds float* %tmp7288, i64 1
+  %tmp7290 = getelementptr inbounds float* %tmp7289, i64 1
+  %tmp7291 = getelementptr inbounds float* %tmp7290, i64 1
+  %tmp7292 = getelementptr inbounds float* %tmp7291, i64 1
+  %tmp7293 = getelementptr inbounds float* %tmp7292, i64 1
+  %tmp7294 = getelementptr inbounds float* %tmp7293, i64 1
+  %tmp7295 = getelementptr inbounds float* %tmp7294, i64 1
+  %tmp7296 = getelementptr inbounds float* %tmp7295, i64 1
+  %tmp7297 = getelementptr inbounds float* %tmp7296, i64 1
+  %tmp7298 = getelementptr inbounds float* %tmp7297, i64 1
+  %tmp7299 = getelementptr inbounds float* %tmp7298, i64 1
+  %tmp7300 = getelementptr inbounds float* %tmp7299, i64 1
+  %tmp7301 = getelementptr inbounds float* %tmp7300, i64 1
+  %tmp7302 = getelementptr inbounds float* %tmp7301, i64 1
+  %tmp7303 = getelementptr inbounds float* %tmp7302, i64 1
+  %tmp7304 = getelementptr inbounds float* %tmp7303, i64 1
+  %tmp7305 = getelementptr inbounds float* %tmp7304, i64 1
+  %tmp7306 = getelementptr inbounds float* %tmp7305, i64 1
+  %tmp7307 = getelementptr inbounds float* %tmp7306, i64 1
+  %tmp7308 = getelementptr inbounds float* %tmp7307, i64 1
+  %tmp7309 = getelementptr inbounds float* %tmp7308, i64 1
+  %tmp7310 = getelementptr inbounds float* %tmp7309, i64 1
+  %tmp7311 = getelementptr inbounds float* %tmp7310, i64 1
+  %tmp7312 = getelementptr inbounds float* %tmp7311, i64 1
+  %tmp7313 = getelementptr inbounds float* %tmp7312, i64 1
+  %tmp7314 = getelementptr inbounds float* %tmp7313, i64 1
+  %tmp7315 = getelementptr inbounds float* %tmp7314, i64 1
+  %tmp7316 = getelementptr inbounds float* %tmp7315, i64 1
+  %tmp7317 = getelementptr inbounds float* %tmp7316, i64 1
+  %tmp7318 = getelementptr inbounds float* %tmp7317, i64 1
+  %tmp7319 = getelementptr inbounds float* %tmp7318, i64 1
+  %tmp7320 = getelementptr inbounds float* %tmp7319, i64 1
+  %tmp7321 = getelementptr inbounds float* %tmp7320, i64 1
+  %tmp7322 = getelementptr inbounds float* %tmp7321, i64 1
+  %tmp7323 = getelementptr inbounds float* %tmp7322, i64 1
+  %tmp7324 = getelementptr inbounds float* %tmp7323, i64 1
+  %tmp7325 = getelementptr inbounds float* %tmp7324, i64 1
+  %tmp7326 = getelementptr inbounds float* %tmp7325, i64 1
+  %tmp7327 = getelementptr inbounds float* %tmp7326, i64 1
+  %tmp7328 = getelementptr inbounds float* %tmp7327, i64 1
+  %tmp7329 = getelementptr inbounds float* %tmp7328, i64 1
+  %tmp7330 = getelementptr inbounds float* %tmp7329, i64 1
+  %tmp7331 = getelementptr inbounds float* %tmp7330, i64 1
+  %tmp7332 = getelementptr inbounds float* %tmp7331, i64 1
+  %tmp7333 = getelementptr inbounds float* %tmp7332, i64 1
+  %tmp7334 = getelementptr inbounds float* %tmp7333, i64 1
+  %tmp7335 = getelementptr inbounds float* %tmp7334, i64 1
+  %tmp7336 = getelementptr inbounds float* %tmp7335, i64 1
+  %tmp7337 = getelementptr inbounds float* %tmp7336, i64 1
+  %tmp7338 = getelementptr inbounds float* %tmp7337, i64 1
+  %tmp7339 = getelementptr inbounds float* %tmp7338, i64 1
+  %tmp7340 = getelementptr inbounds float* %tmp7339, i64 1
+  %tmp7341 = getelementptr inbounds float* %tmp7340, i64 1
+  %tmp7342 = getelementptr inbounds float* %tmp7341, i64 1
+  %tmp7343 = getelementptr inbounds float* %tmp7342, i64 1
+  %tmp7344 = getelementptr inbounds float* %tmp7343, i64 1
+  %tmp7345 = getelementptr inbounds float* %tmp7344, i64 1
+  %tmp7346 = getelementptr inbounds float* %tmp7345, i64 1
+  %tmp7347 = getelementptr inbounds float* %tmp7346, i64 1
+  %tmp7348 = getelementptr inbounds float* %tmp7347, i64 1
+  %tmp7349 = getelementptr inbounds float* %tmp7348, i64 1
+  %tmp7350 = getelementptr inbounds float* %tmp7349, i64 1
+  %tmp7351 = getelementptr inbounds float* %tmp7350, i64 1
+  %tmp7352 = getelementptr inbounds float* %tmp7351, i64 1
+  %tmp7353 = getelementptr inbounds float* %tmp7352, i64 1
+  %tmp7354 = getelementptr inbounds float* %tmp7353, i64 1
+  %tmp7355 = getelementptr inbounds float* %tmp7354, i64 1
+  %tmp7356 = getelementptr inbounds float* %tmp7355, i64 1
+  %tmp7357 = getelementptr inbounds float* %tmp7356, i64 1
+  %tmp7358 = getelementptr inbounds float* %tmp7357, i64 1
+  %tmp7359 = getelementptr inbounds float* %tmp7358, i64 1
+  %tmp7360 = getelementptr inbounds float* %tmp7359, i64 1
+  %tmp7361 = getelementptr inbounds float* %tmp7360, i64 1
+  %tmp7362 = getelementptr inbounds float* %tmp7361, i64 1
+  %tmp7363 = getelementptr inbounds float* %tmp7362, i64 1
+  %tmp7364 = getelementptr inbounds float* %tmp7363, i64 1
+  %tmp7365 = getelementptr inbounds float* %tmp7364, i64 1
+  %tmp7366 = getelementptr inbounds float* %tmp7365, i64 1
+  %tmp7367 = getelementptr inbounds float* %tmp7366, i64 1
+  %tmp7368 = getelementptr inbounds float* %tmp7367, i64 1
+  %tmp7369 = getelementptr inbounds float* %tmp7368, i64 1
+  %tmp7370 = getelementptr inbounds float* %tmp7369, i64 1
+  %tmp7371 = getelementptr inbounds float* %tmp7370, i64 1
+  %tmp7372 = getelementptr inbounds float* %tmp7371, i64 1
+  %tmp7373 = getelementptr inbounds float* %tmp7372, i64 1
+  %tmp7374 = getelementptr inbounds float* %tmp7373, i64 1
+  %tmp7375 = getelementptr inbounds float* %tmp7374, i64 1
+  %tmp7376 = getelementptr inbounds float* %tmp7375, i64 1
+  %tmp7377 = getelementptr inbounds float* %tmp7376, i64 1
+  %tmp7378 = getelementptr inbounds float* %tmp7377, i64 1
+  %tmp7379 = getelementptr inbounds float* %tmp7378, i64 1
+  %tmp7380 = getelementptr inbounds float* %tmp7379, i64 1
+  %tmp7381 = getelementptr inbounds float* %tmp7380, i64 1
+  %tmp7382 = getelementptr inbounds float* %tmp7381, i64 1
+  %tmp7383 = getelementptr inbounds float* %tmp7382, i64 1
+  %tmp7384 = getelementptr inbounds float* %tmp7383, i64 1
+  %tmp7385 = getelementptr inbounds float* %tmp7384, i64 1
+  %tmp7386 = getelementptr inbounds float* %tmp7385, i64 1
+  %tmp7387 = getelementptr inbounds float* %tmp7386, i64 1
+  %tmp7388 = getelementptr inbounds float* %tmp7387, i64 1
+  %tmp7389 = getelementptr inbounds float* %tmp7388, i64 1
+  %tmp7390 = getelementptr inbounds float* %tmp7389, i64 1
+  %tmp7391 = getelementptr inbounds float* %tmp7390, i64 1
+  %tmp7392 = getelementptr inbounds float* %tmp7391, i64 1
+  %tmp7393 = getelementptr inbounds float* %tmp7392, i64 1
+  %tmp7394 = getelementptr inbounds float* %tmp7393, i64 1
+  %tmp7395 = getelementptr inbounds float* %tmp7394, i64 1
+  %tmp7396 = getelementptr inbounds float* %tmp7395, i64 1
+  %tmp7397 = getelementptr inbounds float* %tmp7396, i64 1
+  %tmp7398 = getelementptr inbounds float* %tmp7397, i64 1
+  %tmp7399 = getelementptr inbounds float* %tmp7398, i64 1
+  %tmp7400 = getelementptr inbounds float* %tmp7399, i64 1
+  %tmp7401 = getelementptr inbounds float* %tmp7400, i64 1
+  %tmp7402 = getelementptr inbounds float* %tmp7401, i64 1
+  %tmp7403 = getelementptr inbounds float* %tmp7402, i64 1
+  %tmp7404 = getelementptr inbounds float* %tmp7403, i64 1
+  %tmp7405 = getelementptr inbounds float* %tmp7404, i64 1
+  %tmp7406 = getelementptr inbounds float* %tmp7405, i64 1
+  %tmp7407 = getelementptr inbounds float* %tmp7406, i64 1
+  %tmp7408 = getelementptr inbounds float* %tmp7407, i64 1
+  %tmp7409 = getelementptr inbounds float* %tmp7408, i64 1
+  %tmp7410 = getelementptr inbounds float* %tmp7409, i64 1
+  %tmp7411 = getelementptr inbounds float* %tmp7410, i64 1
+  %tmp7412 = getelementptr inbounds float* %tmp7411, i64 1
+  %tmp7413 = getelementptr inbounds float* %tmp7412, i64 1
+  %tmp7414 = getelementptr inbounds float* %tmp7413, i64 1
+  %tmp7415 = getelementptr inbounds float* %tmp7414, i64 1
+  %tmp7416 = getelementptr inbounds float* %tmp7415, i64 1
+  %tmp7417 = getelementptr inbounds float* %tmp7416, i64 1
+  %tmp7418 = getelementptr inbounds float* %tmp7417, i64 1
+  %tmp7419 = getelementptr inbounds float* %tmp7418, i64 1
+  %tmp7420 = getelementptr inbounds float* %tmp7419, i64 1
+  %tmp7421 = getelementptr inbounds float* %tmp7420, i64 1
+  %tmp7422 = getelementptr inbounds float* %tmp7421, i64 1
+  %tmp7423 = getelementptr inbounds float* %tmp7422, i64 1
+  %tmp7424 = getelementptr inbounds float* %tmp7423, i64 1
+  %tmp7425 = getelementptr inbounds float* %tmp7424, i64 1
+  %tmp7426 = getelementptr inbounds float* %tmp7425, i64 1
+  %tmp7427 = getelementptr inbounds float* %tmp7426, i64 1
+  %tmp7428 = getelementptr inbounds float* %tmp7427, i64 1
+  %tmp7429 = getelementptr inbounds float* %tmp7428, i64 1
+  %tmp7430 = getelementptr inbounds float* %tmp7429, i64 1
+  %tmp7431 = getelementptr inbounds float* %tmp7430, i64 1
+  %tmp7432 = getelementptr inbounds float* %tmp7431, i64 1
+  %tmp7433 = getelementptr inbounds float* %tmp7432, i64 1
+  %tmp7434 = getelementptr inbounds float* %tmp7433, i64 1
+  %tmp7435 = getelementptr inbounds float* %tmp7434, i64 1
+  %tmp7436 = getelementptr inbounds float* %tmp7435, i64 1
+  %tmp7437 = getelementptr inbounds float* %tmp7436, i64 1
+  %tmp7438 = getelementptr inbounds float* %tmp7437, i64 1
+  %tmp7439 = getelementptr inbounds float* %tmp7438, i64 1
+  %tmp7440 = getelementptr inbounds float* %tmp7439, i64 1
+  %tmp7441 = getelementptr inbounds float* %tmp7440, i64 1
+  %tmp7442 = getelementptr inbounds float* %tmp7441, i64 1
+  %tmp7443 = getelementptr inbounds float* %tmp7442, i64 1
+  %tmp7444 = getelementptr inbounds float* %tmp7443, i64 1
+  %tmp7445 = getelementptr inbounds float* %tmp7444, i64 1
+  %tmp7446 = getelementptr inbounds float* %tmp7445, i64 1
+  %tmp7447 = getelementptr inbounds float* %tmp7446, i64 1
+  %tmp7448 = getelementptr inbounds float* %tmp7447, i64 1
+  %tmp7449 = getelementptr inbounds float* %tmp7448, i64 1
+  %tmp7450 = getelementptr inbounds float* %tmp7449, i64 1
+  %tmp7451 = getelementptr inbounds float* %tmp7450, i64 1
+  %tmp7452 = getelementptr inbounds float* %tmp7451, i64 1
+  %tmp7453 = getelementptr inbounds float* %tmp7452, i64 1
+  %tmp7454 = getelementptr inbounds float* %tmp7453, i64 1
+  %tmp7455 = getelementptr inbounds float* %tmp7454, i64 1
+  %tmp7456 = getelementptr inbounds float* %tmp7455, i64 1
+  %tmp7457 = getelementptr inbounds float* %tmp7456, i64 1
+  %tmp7458 = getelementptr inbounds float* %tmp7457, i64 1
+  %tmp7459 = getelementptr inbounds float* %tmp7458, i64 1
+  %tmp7460 = getelementptr inbounds float* %tmp7459, i64 1
+  %tmp7461 = getelementptr inbounds float* %tmp7460, i64 1
+  %tmp7462 = getelementptr inbounds float* %tmp7461, i64 1
+  %tmp7463 = getelementptr inbounds float* %tmp7462, i64 1
+  %tmp7464 = getelementptr inbounds float* %tmp7463, i64 1
+  %tmp7465 = getelementptr inbounds float* %tmp7464, i64 1
+  %tmp7466 = getelementptr inbounds float* %tmp7465, i64 1
+  %tmp7467 = getelementptr inbounds float* %tmp7466, i64 1
+  %tmp7468 = getelementptr inbounds float* %tmp7467, i64 1
+  %tmp7469 = getelementptr inbounds float* %tmp7468, i64 1
+  %tmp7470 = getelementptr inbounds float* %tmp7469, i64 1
+  %tmp7471 = getelementptr inbounds float* %tmp7470, i64 1
+  %tmp7472 = getelementptr inbounds float* %tmp7471, i64 1
+  %tmp7473 = getelementptr inbounds float* %tmp7472, i64 1
+  %tmp7474 = getelementptr inbounds float* %tmp7473, i64 1
+  %tmp7475 = getelementptr inbounds float* %tmp7474, i64 1
+  %tmp7476 = getelementptr inbounds float* %tmp7475, i64 1
+  %tmp7477 = getelementptr inbounds float* %tmp7476, i64 1
+  %tmp7478 = getelementptr inbounds float* %tmp7477, i64 1
+  %tmp7479 = getelementptr inbounds float* %tmp7478, i64 1
+  %tmp7480 = getelementptr inbounds float* %tmp7479, i64 1
+  %tmp7481 = getelementptr inbounds float* %tmp7480, i64 1
+  %tmp7482 = getelementptr inbounds float* %tmp7481, i64 1
+  %tmp7483 = getelementptr inbounds float* %tmp7482, i64 1
+  %tmp7484 = getelementptr inbounds float* %tmp7483, i64 1
+  %tmp7485 = getelementptr inbounds float* %tmp7484, i64 1
+  %tmp7486 = getelementptr inbounds float* %tmp7485, i64 1
+  %tmp7487 = getelementptr inbounds float* %tmp7486, i64 1
+  %tmp7488 = getelementptr inbounds float* %tmp7487, i64 1
+  %tmp7489 = getelementptr inbounds float* %tmp7488, i64 1
+  %tmp7490 = getelementptr inbounds float* %tmp7489, i64 1
+  %tmp7491 = getelementptr inbounds float* %tmp7490, i64 1
+  %tmp7492 = getelementptr inbounds float* %tmp7491, i64 1
+  %tmp7493 = getelementptr inbounds float* %tmp7492, i64 1
+  %tmp7494 = getelementptr inbounds float* %tmp7493, i64 1
+  %tmp7495 = getelementptr inbounds float* %tmp7494, i64 1
+  %tmp7496 = getelementptr inbounds float* %tmp7495, i64 1
+  %tmp7497 = getelementptr inbounds float* %tmp7496, i64 1
+  %tmp7498 = getelementptr inbounds float* %tmp7497, i64 1
+  %tmp7499 = getelementptr inbounds float* %tmp7498, i64 1
+  %tmp7500 = getelementptr inbounds float* %tmp7499, i64 1
+  %tmp7501 = getelementptr inbounds float* %tmp7500, i64 1
+  %tmp7502 = getelementptr inbounds float* %tmp7501, i64 1
+  %tmp7503 = getelementptr inbounds float* %tmp7502, i64 1
+  %tmp7504 = getelementptr inbounds float* %tmp7503, i64 1
+  %tmp7505 = getelementptr inbounds float* %tmp7504, i64 1
+  %tmp7506 = getelementptr inbounds float* %tmp7505, i64 1
+  %tmp7507 = getelementptr inbounds float* %tmp7506, i64 1
+  %tmp7508 = getelementptr inbounds float* %tmp7507, i64 1
+  %tmp7509 = getelementptr inbounds float* %tmp7508, i64 1
+  %tmp7510 = getelementptr inbounds float* %tmp7509, i64 1
+  %tmp7511 = getelementptr inbounds float* %tmp7510, i64 1
+  %tmp7512 = getelementptr inbounds float* %tmp7511, i64 1
+  %tmp7513 = getelementptr inbounds float* %tmp7512, i64 1
+  %tmp7514 = getelementptr inbounds float* %tmp7513, i64 1
+  %tmp7515 = getelementptr inbounds float* %tmp7514, i64 1
+  %tmp7516 = getelementptr inbounds float* %tmp7515, i64 1
+  %tmp7517 = getelementptr inbounds float* %tmp7516, i64 1
+  %tmp7518 = getelementptr inbounds float* %tmp7517, i64 1
+  %tmp7519 = getelementptr inbounds float* %tmp7518, i64 1
+  %tmp7520 = getelementptr inbounds float* %tmp7519, i64 1
+  %tmp7521 = getelementptr inbounds float* %tmp7520, i64 1
+  %tmp7522 = getelementptr inbounds float* %tmp7521, i64 1
+  %tmp7523 = getelementptr inbounds float* %tmp7522, i64 1
+  %tmp7524 = getelementptr inbounds float* %tmp7523, i64 1
+  %tmp7525 = getelementptr inbounds float* %tmp7524, i64 1
+  %tmp7526 = getelementptr inbounds float* %tmp7525, i64 1
+  %tmp7527 = getelementptr inbounds float* %tmp7526, i64 1
+  %tmp7528 = getelementptr inbounds float* %tmp7527, i64 1
+  %tmp7529 = getelementptr inbounds float* %tmp7528, i64 1
+  %tmp7530 = getelementptr inbounds float* %tmp7529, i64 1
+  %tmp7531 = getelementptr inbounds float* %tmp7530, i64 1
+  %tmp7532 = getelementptr inbounds float* %tmp7531, i64 1
+  %tmp7533 = getelementptr inbounds float* %tmp7532, i64 1
+  %tmp7534 = getelementptr inbounds float* %tmp7533, i64 1
+  %tmp7535 = getelementptr inbounds float* %tmp7534, i64 1
+  %tmp7536 = getelementptr inbounds float* %tmp7535, i64 1
+  %tmp7537 = getelementptr inbounds float* %tmp7536, i64 1
+  %tmp7538 = getelementptr inbounds float* %tmp7537, i64 1
+  %tmp7539 = getelementptr inbounds float* %tmp7538, i64 1
+  %tmp7540 = getelementptr inbounds float* %tmp7539, i64 1
+  %tmp7541 = getelementptr inbounds float* %tmp7540, i64 1
+  %tmp7542 = getelementptr inbounds float* %tmp7541, i64 1
+  %tmp7543 = getelementptr inbounds float* %tmp7542, i64 1
+  %tmp7544 = getelementptr inbounds float* %tmp7543, i64 1
+  %tmp7545 = getelementptr inbounds float* %tmp7544, i64 1
+  %tmp7546 = getelementptr inbounds float* %tmp7545, i64 1
+  %tmp7547 = getelementptr inbounds float* %tmp7546, i64 1
+  %tmp7548 = getelementptr inbounds float* %tmp7547, i64 1
+  %tmp7549 = getelementptr inbounds float* %tmp7548, i64 1
+  %tmp7550 = getelementptr inbounds float* %tmp7549, i64 1
+  %tmp7551 = getelementptr inbounds float* %tmp7550, i64 1
+  %tmp7552 = getelementptr inbounds float* %tmp7551, i64 1
+  %tmp7553 = getelementptr inbounds float* %tmp7552, i64 1
+  %tmp7554 = getelementptr inbounds float* %tmp7553, i64 1
+  %tmp7555 = getelementptr inbounds float* %tmp7554, i64 1
+  %tmp7556 = getelementptr inbounds float* %tmp7555, i64 1
+  %tmp7557 = getelementptr inbounds float* %tmp7556, i64 1
+  %tmp7558 = getelementptr inbounds float* %tmp7557, i64 1
+  %tmp7559 = getelementptr inbounds float* %tmp7558, i64 1
+  %tmp7560 = getelementptr inbounds float* %tmp7559, i64 1
+  %tmp7561 = getelementptr inbounds float* %tmp7560, i64 1
+  %tmp7562 = getelementptr inbounds float* %tmp7561, i64 1
+  %tmp7563 = getelementptr inbounds float* %tmp7562, i64 1
+  %tmp7564 = getelementptr inbounds float* %tmp7563, i64 1
+  %tmp7565 = getelementptr inbounds float* %tmp7564, i64 1
+  %tmp7566 = getelementptr inbounds float* %tmp7565, i64 1
+  %tmp7567 = getelementptr inbounds float* %tmp7566, i64 1
+  %tmp7568 = getelementptr inbounds float* %tmp7567, i64 1
+  %tmp7569 = getelementptr inbounds float* %tmp7568, i64 1
+  %tmp7570 = getelementptr inbounds float* %tmp7569, i64 1
+  %tmp7571 = getelementptr inbounds float* %tmp7570, i64 1
+  %tmp7572 = getelementptr inbounds float* %tmp7571, i64 1
+  %tmp7573 = getelementptr inbounds float* %tmp7572, i64 1
+  %tmp7574 = getelementptr inbounds float* %tmp7573, i64 1
+  %tmp7575 = getelementptr inbounds float* %tmp7574, i64 1
+  %tmp7576 = getelementptr inbounds float* %tmp7575, i64 1
+  %tmp7577 = getelementptr inbounds float* %tmp7576, i64 1
+  %tmp7578 = getelementptr inbounds float* %tmp7577, i64 1
+  %tmp7579 = getelementptr inbounds float* %tmp7578, i64 1
+  %tmp7580 = getelementptr inbounds float* %tmp7579, i64 1
+  %tmp7581 = getelementptr inbounds float* %tmp7580, i64 1
+  %tmp7582 = getelementptr inbounds float* %tmp7581, i64 1
+  %tmp7583 = getelementptr inbounds float* %tmp7582, i64 1
+  %tmp7584 = getelementptr inbounds float* %tmp7583, i64 1
+  %tmp7585 = getelementptr inbounds float* %tmp7584, i64 1
+  %tmp7586 = getelementptr inbounds float* %tmp7585, i64 1
+  %tmp7587 = getelementptr inbounds float* %tmp7586, i64 1
+  %tmp7588 = getelementptr inbounds float* %tmp7587, i64 1
+  %tmp7589 = getelementptr inbounds float* %tmp7588, i64 1
+  %tmp7590 = getelementptr inbounds float* %tmp7589, i64 1
+  %tmp7591 = getelementptr inbounds float* %tmp7590, i64 1
+  %tmp7592 = getelementptr inbounds float* %tmp7591, i64 1
+  %tmp7593 = getelementptr inbounds float* %tmp7592, i64 1
+  %tmp7594 = getelementptr inbounds float* %tmp7593, i64 1
+  %tmp7595 = getelementptr inbounds float* %tmp7594, i64 1
+  %tmp7596 = getelementptr inbounds float* %tmp7595, i64 1
+  %tmp7597 = getelementptr inbounds float* %tmp7596, i64 1
+  %tmp7598 = getelementptr inbounds float* %tmp7597, i64 1
+  %tmp7599 = getelementptr inbounds float* %tmp7598, i64 1
+  %tmp7600 = getelementptr inbounds float* %tmp7599, i64 1
+  %tmp7601 = getelementptr inbounds float* %tmp7600, i64 1
+  %tmp7602 = getelementptr inbounds float* %tmp7601, i64 1
+  %tmp7603 = getelementptr inbounds float* %tmp7602, i64 1
+  %tmp7604 = getelementptr inbounds float* %tmp7603, i64 1
+  %tmp7605 = getelementptr inbounds float* %tmp7604, i64 1
+  %tmp7606 = getelementptr inbounds float* %tmp7605, i64 1
+  %tmp7607 = getelementptr inbounds float* %tmp7606, i64 1
+  %tmp7608 = getelementptr inbounds float* %tmp7607, i64 1
+  %tmp7609 = getelementptr inbounds float* %tmp7608, i64 1
+  %tmp7610 = getelementptr inbounds float* %tmp7609, i64 1
+  %tmp7611 = getelementptr inbounds float* %tmp7610, i64 1
+  %tmp7612 = getelementptr inbounds float* %tmp7611, i64 1
+  %tmp7613 = getelementptr inbounds float* %tmp7612, i64 1
+  %tmp7614 = getelementptr inbounds float* %tmp7613, i64 1
+  %tmp7615 = getelementptr inbounds float* %tmp7614, i64 1
+  %tmp7616 = getelementptr inbounds float* %tmp7615, i64 1
+  %tmp7617 = getelementptr inbounds float* %tmp7616, i64 1
+  %tmp7618 = getelementptr inbounds float* %tmp7617, i64 1
+  %tmp7619 = getelementptr inbounds float* %tmp7618, i64 1
+  %tmp7620 = getelementptr inbounds float* %tmp7619, i64 1
+  %tmp7621 = getelementptr inbounds float* %tmp7620, i64 1
+  %tmp7622 = getelementptr inbounds float* %tmp7621, i64 1
+  %tmp7623 = getelementptr inbounds float* %tmp7622, i64 1
+  %tmp7624 = getelementptr inbounds float* %tmp7623, i64 1
+  %tmp7625 = getelementptr inbounds float* %tmp7624, i64 1
+  %tmp7626 = getelementptr inbounds float* %tmp7625, i64 1
+  %tmp7627 = getelementptr inbounds float* %tmp7626, i64 1
+  %tmp7628 = getelementptr inbounds float* %tmp7627, i64 1
+  %tmp7629 = getelementptr inbounds float* %tmp7628, i64 1
+  %tmp7630 = getelementptr inbounds float* %tmp7629, i64 1
+  %tmp7631 = getelementptr inbounds float* %tmp7630, i64 1
+  %tmp7632 = getelementptr inbounds float* %tmp7631, i64 1
+  %tmp7633 = getelementptr inbounds float* %tmp7632, i64 1
+  %tmp7634 = getelementptr inbounds float* %tmp7633, i64 1
+  %tmp7635 = getelementptr inbounds float* %tmp7634, i64 1
+  %tmp7636 = getelementptr inbounds float* %tmp7635, i64 1
+  %tmp7637 = getelementptr inbounds float* %tmp7636, i64 1
+  %tmp7638 = getelementptr inbounds float* %tmp7637, i64 1
+  %tmp7639 = getelementptr inbounds float* %tmp7638, i64 1
+  %tmp7640 = getelementptr inbounds float* %tmp7639, i64 1
+  %tmp7641 = getelementptr inbounds float* %tmp7640, i64 1
+  %tmp7642 = getelementptr inbounds float* %tmp7641, i64 1
+  %tmp7643 = getelementptr inbounds float* %tmp7642, i64 1
+  %tmp7644 = getelementptr inbounds float* %tmp7643, i64 1
+  %tmp7645 = getelementptr inbounds float* %tmp7644, i64 1
+  %tmp7646 = getelementptr inbounds float* %tmp7645, i64 1
+  %tmp7647 = getelementptr inbounds float* %tmp7646, i64 1
+  %tmp7648 = getelementptr inbounds float* %tmp7647, i64 1
+  %tmp7649 = getelementptr inbounds float* %tmp7648, i64 1
+  %tmp7650 = getelementptr inbounds float* %tmp7649, i64 1
+  %tmp7651 = getelementptr inbounds float* %tmp7650, i64 1
+  %tmp7652 = getelementptr inbounds float* %tmp7651, i64 1
+  %tmp7653 = getelementptr inbounds float* %tmp7652, i64 1
+  %tmp7654 = getelementptr inbounds float* %tmp7653, i64 1
+  %tmp7655 = getelementptr inbounds float* %tmp7654, i64 1
+  %tmp7656 = getelementptr inbounds float* %tmp7655, i64 1
+  %tmp7657 = getelementptr inbounds float* %tmp7656, i64 1
+  %tmp7658 = getelementptr inbounds float* %tmp7657, i64 1
+  %tmp7659 = getelementptr inbounds float* %tmp7658, i64 1
+  %tmp7660 = getelementptr inbounds float* %tmp7659, i64 1
+  %tmp7661 = getelementptr inbounds float* %tmp7660, i64 1
+  %tmp7662 = getelementptr inbounds float* %tmp7661, i64 1
+  %tmp7663 = getelementptr inbounds float* %tmp7662, i64 1
+  %tmp7664 = getelementptr inbounds float* %tmp7663, i64 1
+  %tmp7665 = getelementptr inbounds float* %tmp7664, i64 1
+  %tmp7666 = getelementptr inbounds float* %tmp7665, i64 1
+  %tmp7667 = getelementptr inbounds float* %tmp7666, i64 1
+  %tmp7668 = getelementptr inbounds float* %tmp7667, i64 1
+  %tmp7669 = getelementptr inbounds float* %tmp7668, i64 1
+  %tmp7670 = getelementptr inbounds float* %tmp7669, i64 1
+  %tmp7671 = getelementptr inbounds float* %tmp7670, i64 1
+  %tmp7672 = getelementptr inbounds float* %tmp7671, i64 1
+  %tmp7673 = getelementptr inbounds float* %tmp7672, i64 1
+  %tmp7674 = getelementptr inbounds float* %tmp7673, i64 1
+  %tmp7675 = getelementptr inbounds float* %tmp7674, i64 1
+  %tmp7676 = getelementptr inbounds float* %tmp7675, i64 1
+  %tmp7677 = getelementptr inbounds float* %tmp7676, i64 1
+  %tmp7678 = getelementptr inbounds float* %tmp7677, i64 1
+  %tmp7679 = getelementptr inbounds float* %tmp7678, i64 1
+  %tmp7680 = getelementptr inbounds float* %tmp7679, i64 1
+  %tmp7681 = getelementptr inbounds float* %tmp7680, i64 1
+  %tmp7682 = getelementptr inbounds float* %tmp7681, i64 1
+  %tmp7683 = getelementptr inbounds float* %tmp7682, i64 1
+  %tmp7684 = getelementptr inbounds float* %tmp7683, i64 1
+  %tmp7685 = getelementptr inbounds float* %tmp7684, i64 1
+  %tmp7686 = getelementptr inbounds float* %tmp7685, i64 1
+  %tmp7687 = getelementptr inbounds float* %tmp7686, i64 1
+  %tmp7688 = getelementptr inbounds float* %tmp7687, i64 1
+  %tmp7689 = getelementptr inbounds float* %tmp7688, i64 1
+  %tmp7690 = getelementptr inbounds float* %tmp7689, i64 1
+  %tmp7691 = getelementptr inbounds float* %tmp7690, i64 1
+  %tmp7692 = getelementptr inbounds float* %tmp7691, i64 1
+  %tmp7693 = getelementptr inbounds float* %tmp7692, i64 1
+  %tmp7694 = getelementptr inbounds float* %tmp7693, i64 1
+  %tmp7695 = getelementptr inbounds float* %tmp7694, i64 1
+  %tmp7696 = getelementptr inbounds float* %tmp7695, i64 1
+  %tmp7697 = getelementptr inbounds float* %tmp7696, i64 1
+  %tmp7698 = getelementptr inbounds float* %tmp7697, i64 1
+  %tmp7699 = getelementptr inbounds float* %tmp7698, i64 1
+  %tmp7700 = getelementptr inbounds float* %tmp7699, i64 1
+  %tmp7701 = getelementptr inbounds float* %tmp7700, i64 1
+  %tmp7702 = getelementptr inbounds float* %tmp7701, i64 1
+  %tmp7703 = getelementptr inbounds float* %tmp7702, i64 1
+  %tmp7704 = getelementptr inbounds float* %tmp7703, i64 1
+  %tmp7705 = getelementptr inbounds float* %tmp7704, i64 1
+  %tmp7706 = getelementptr inbounds float* %tmp7705, i64 1
+  %tmp7707 = getelementptr inbounds float* %tmp7706, i64 1
+  %tmp7708 = getelementptr inbounds float* %tmp7707, i64 1
+  %tmp7709 = getelementptr inbounds float* %tmp7708, i64 1
+  %tmp7710 = getelementptr inbounds float* %tmp7709, i64 1
+  %tmp7711 = getelementptr inbounds float* %tmp7710, i64 1
+  %tmp7712 = getelementptr inbounds float* %tmp7711, i64 1
+  %tmp7713 = getelementptr inbounds float* %tmp7712, i64 1
+  %tmp7714 = getelementptr inbounds float* %tmp7713, i64 1
+  %tmp7715 = getelementptr inbounds float* %tmp7714, i64 1
+  %tmp7716 = getelementptr inbounds float* %tmp7715, i64 1
+  %tmp7717 = getelementptr inbounds float* %tmp7716, i64 1
+  %tmp7718 = getelementptr inbounds float* %tmp7717, i64 1
+  %tmp7719 = getelementptr inbounds float* %tmp7718, i64 1
+  %tmp7720 = getelementptr inbounds float* %tmp7719, i64 1
+  %tmp7721 = getelementptr inbounds float* %tmp7720, i64 1
+  %tmp7722 = getelementptr inbounds float* %tmp7721, i64 1
+  %tmp7723 = getelementptr inbounds float* %tmp7722, i64 1
+  %tmp7724 = getelementptr inbounds float* %tmp7723, i64 1
+  %tmp7725 = getelementptr inbounds float* %tmp7724, i64 1
+  %tmp7726 = getelementptr inbounds float* %tmp7725, i64 1
+  %tmp7727 = getelementptr inbounds float* %tmp7726, i64 1
+  %tmp7728 = getelementptr inbounds float* %tmp7727, i64 1
+  %tmp7729 = getelementptr inbounds float* %tmp7728, i64 1
+  %tmp7730 = getelementptr inbounds float* %tmp7729, i64 1
+  %tmp7731 = getelementptr inbounds float* %tmp7730, i64 1
+  %tmp7732 = getelementptr inbounds float* %tmp7731, i64 1
+  %tmp7733 = getelementptr inbounds float* %tmp7732, i64 1
+  %tmp7734 = getelementptr inbounds float* %tmp7733, i64 1
+  %tmp7735 = getelementptr inbounds float* %tmp7734, i64 1
+  %tmp7736 = getelementptr inbounds float* %tmp7735, i64 1
+  %tmp7737 = getelementptr inbounds float* %tmp7736, i64 1
+  %tmp7738 = getelementptr inbounds float* %tmp7737, i64 1
+  %tmp7739 = getelementptr inbounds float* %tmp7738, i64 1
+  %tmp7740 = getelementptr inbounds float* %tmp7739, i64 1
+  %tmp7741 = getelementptr inbounds float* %tmp7740, i64 1
+  %tmp7742 = getelementptr inbounds float* %tmp7741, i64 1
+  %tmp7743 = getelementptr inbounds float* %tmp7742, i64 1
+  %tmp7744 = getelementptr inbounds float* %tmp7743, i64 1
+  %tmp7745 = getelementptr inbounds float* %tmp7744, i64 1
+  %tmp7746 = getelementptr inbounds float* %tmp7745, i64 1
+  %tmp7747 = getelementptr inbounds float* %tmp7746, i64 1
+  %tmp7748 = getelementptr inbounds float* %tmp7747, i64 1
+  %tmp7749 = getelementptr inbounds float* %tmp7748, i64 1
+  %tmp7750 = getelementptr inbounds float* %tmp7749, i64 1
+  %tmp7751 = getelementptr inbounds float* %tmp7750, i64 1
+  %tmp7752 = getelementptr inbounds float* %tmp7751, i64 1
+  %tmp7753 = getelementptr inbounds float* %tmp7752, i64 1
+  %tmp7754 = getelementptr inbounds float* %tmp7753, i64 1
+  %tmp7755 = getelementptr inbounds float* %tmp7754, i64 1
+  %tmp7756 = getelementptr inbounds float* %tmp7755, i64 1
+  %tmp7757 = getelementptr inbounds float* %tmp7756, i64 1
+  %tmp7758 = getelementptr inbounds float* %tmp7757, i64 1
+  %tmp7759 = getelementptr inbounds float* %tmp7758, i64 1
+  %tmp7760 = getelementptr inbounds float* %tmp7759, i64 1
+  %tmp7761 = getelementptr inbounds float* %tmp7760, i64 1
+  %tmp7762 = getelementptr inbounds float* %tmp7761, i64 1
+  %tmp7763 = getelementptr inbounds float* %tmp7762, i64 1
+  %tmp7764 = getelementptr inbounds float* %tmp7763, i64 1
+  %tmp7765 = getelementptr inbounds float* %tmp7764, i64 1
+  %tmp7766 = getelementptr inbounds float* %tmp7765, i64 1
+  %tmp7767 = getelementptr inbounds float* %tmp7766, i64 1
+  %tmp7768 = getelementptr inbounds float* %tmp7767, i64 1
+  %tmp7769 = getelementptr inbounds float* %tmp7768, i64 1
+  %tmp7770 = getelementptr inbounds float* %tmp7769, i64 1
+  %tmp7771 = getelementptr inbounds float* %tmp7770, i64 1
+  %tmp7772 = getelementptr inbounds float* %tmp7771, i64 1
+  %tmp7773 = getelementptr inbounds float* %tmp7772, i64 1
+  %tmp7774 = getelementptr inbounds float* %tmp7773, i64 1
+  %tmp7775 = getelementptr inbounds float* %tmp7774, i64 1
+  %tmp7776 = getelementptr inbounds float* %tmp7775, i64 1
+  %tmp7777 = getelementptr inbounds float* %tmp7776, i64 1
+  %tmp7778 = getelementptr inbounds float* %tmp7777, i64 1
+  %tmp7779 = getelementptr inbounds float* %tmp7778, i64 1
+  %tmp7780 = getelementptr inbounds float* %tmp7779, i64 1
+  %tmp7781 = getelementptr inbounds float* %tmp7780, i64 1
+  %tmp7782 = getelementptr inbounds float* %tmp7781, i64 1
+  %tmp7783 = getelementptr inbounds float* %tmp7782, i64 1
+  %tmp7784 = getelementptr inbounds float* %tmp7783, i64 1
+  %tmp7785 = getelementptr inbounds float* %tmp7784, i64 1
+  %tmp7786 = getelementptr inbounds float* %tmp7785, i64 1
+  %tmp7787 = getelementptr inbounds float* %tmp7786, i64 1
+  %tmp7788 = getelementptr inbounds float* %tmp7787, i64 1
+  %tmp7789 = getelementptr inbounds float* %tmp7788, i64 1
+  %tmp7790 = getelementptr inbounds float* %tmp7789, i64 1
+  %tmp7791 = getelementptr inbounds float* %tmp7790, i64 1
+  %tmp7792 = getelementptr inbounds float* %tmp7791, i64 1
+  %tmp7793 = getelementptr inbounds float* %tmp7792, i64 1
+  %tmp7794 = getelementptr inbounds float* %tmp7793, i64 1
+  %tmp7795 = getelementptr inbounds float* %tmp7794, i64 1
+  %tmp7796 = getelementptr inbounds float* %tmp7795, i64 1
+  %tmp7797 = getelementptr inbounds float* %tmp7796, i64 1
+  %tmp7798 = getelementptr inbounds float* %tmp7797, i64 1
+  %tmp7799 = getelementptr inbounds float* %tmp7798, i64 1
+  %tmp7800 = getelementptr inbounds float* %tmp7799, i64 1
+  %tmp7801 = getelementptr inbounds float* %tmp7800, i64 1
+  %tmp7802 = getelementptr inbounds float* %tmp7801, i64 1
+  %tmp7803 = getelementptr inbounds float* %tmp7802, i64 1
+  %tmp7804 = getelementptr inbounds float* %tmp7803, i64 1
+  %tmp7805 = getelementptr inbounds float* %tmp7804, i64 1
+  %tmp7806 = getelementptr inbounds float* %tmp7805, i64 1
+  %tmp7807 = getelementptr inbounds float* %tmp7806, i64 1
+  %tmp7808 = getelementptr inbounds float* %tmp7807, i64 1
+  %tmp7809 = getelementptr inbounds float* %tmp7808, i64 1
+  %tmp7810 = getelementptr inbounds float* %tmp7809, i64 1
+  %tmp7811 = getelementptr inbounds float* %tmp7810, i64 1
+  %tmp7812 = getelementptr inbounds float* %tmp7811, i64 1
+  %tmp7813 = getelementptr inbounds float* %tmp7812, i64 1
+  %tmp7814 = getelementptr inbounds float* %tmp7813, i64 1
+  %tmp7815 = getelementptr inbounds float* %tmp7814, i64 1
+  %tmp7816 = getelementptr inbounds float* %tmp7815, i64 1
+  %tmp7817 = getelementptr inbounds float* %tmp7816, i64 1
+  %tmp7818 = getelementptr inbounds float* %tmp7817, i64 1
+  %tmp7819 = getelementptr inbounds float* %tmp7818, i64 1
+  %tmp7820 = getelementptr inbounds float* %tmp7819, i64 1
+  %tmp7821 = getelementptr inbounds float* %tmp7820, i64 1
+  %tmp7822 = getelementptr inbounds float* %tmp7821, i64 1
+  %tmp7823 = getelementptr inbounds float* %tmp7822, i64 1
+  %tmp7824 = getelementptr inbounds float* %tmp7823, i64 1
+  %tmp7825 = getelementptr inbounds float* %tmp7824, i64 1
+  %tmp7826 = getelementptr inbounds float* %tmp7825, i64 1
+  %tmp7827 = getelementptr inbounds float* %tmp7826, i64 1
+  %tmp7828 = getelementptr inbounds float* %tmp7827, i64 1
+  %tmp7829 = getelementptr inbounds float* %tmp7828, i64 1
+  %tmp7830 = getelementptr inbounds float* %tmp7829, i64 1
+  %tmp7831 = getelementptr inbounds float* %tmp7830, i64 1
+  %tmp7832 = getelementptr inbounds float* %tmp7831, i64 1
+  %tmp7833 = getelementptr inbounds float* %tmp7832, i64 1
+  %tmp7834 = getelementptr inbounds float* %tmp7833, i64 1
+  %tmp7835 = getelementptr inbounds float* %tmp7834, i64 1
+  %tmp7836 = getelementptr inbounds float* %tmp7835, i64 1
+  %tmp7837 = getelementptr inbounds float* %tmp7836, i64 1
+  %tmp7838 = getelementptr inbounds float* %tmp7837, i64 1
+  %tmp7839 = getelementptr inbounds float* %tmp7838, i64 1
+  %tmp7840 = getelementptr inbounds float* %tmp7839, i64 1
+  %tmp7841 = getelementptr inbounds float* %tmp7840, i64 1
+  %tmp7842 = getelementptr inbounds float* %tmp7841, i64 1
+  %tmp7843 = getelementptr inbounds float* %tmp7842, i64 1
+  %tmp7844 = getelementptr inbounds float* %tmp7843, i64 1
+  %tmp7845 = getelementptr inbounds float* %tmp7844, i64 1
+  %tmp7846 = getelementptr inbounds float* %tmp7845, i64 1
+  %tmp7847 = getelementptr inbounds float* %tmp7846, i64 1
+  %tmp7848 = getelementptr inbounds float* %tmp7847, i64 1
+  %tmp7849 = getelementptr inbounds float* %tmp7848, i64 1
+  %tmp7850 = getelementptr inbounds float* %tmp7849, i64 1
+  %tmp7851 = getelementptr inbounds float* %tmp7850, i64 1
+  %tmp7852 = getelementptr inbounds float* %tmp7851, i64 1
+  %tmp7853 = getelementptr inbounds float* %tmp7852, i64 1
+  %tmp7854 = getelementptr inbounds float* %tmp7853, i64 1
+  %tmp7855 = getelementptr inbounds float* %tmp7854, i64 1
+  %tmp7856 = getelementptr inbounds float* %tmp7855, i64 1
+  %tmp7857 = getelementptr inbounds float* %tmp7856, i64 1
+  %tmp7858 = getelementptr inbounds float* %tmp7857, i64 1
+  %tmp7859 = getelementptr inbounds float* %tmp7858, i64 1
+  %tmp7860 = getelementptr inbounds float* %tmp7859, i64 1
+  %tmp7861 = getelementptr inbounds float* %tmp7860, i64 1
+  %tmp7862 = getelementptr inbounds float* %tmp7861, i64 1
+  %tmp7863 = getelementptr inbounds float* %tmp7862, i64 1
+  %tmp7864 = getelementptr inbounds float* %tmp7863, i64 1
+  %tmp7865 = getelementptr inbounds float* %tmp7864, i64 1
+  %tmp7866 = getelementptr inbounds float* %tmp7865, i64 1
+  %tmp7867 = getelementptr inbounds float* %tmp7866, i64 1
+  %tmp7868 = getelementptr inbounds float* %tmp7867, i64 1
+  %tmp7869 = getelementptr inbounds float* %tmp7868, i64 1
+  %tmp7870 = getelementptr inbounds float* %tmp7869, i64 1
+  %tmp7871 = getelementptr inbounds float* %tmp7870, i64 1
+  %tmp7872 = getelementptr inbounds float* %tmp7871, i64 1
+  %tmp7873 = getelementptr inbounds float* %tmp7872, i64 1
+  %tmp7874 = getelementptr inbounds float* %tmp7873, i64 1
+  %tmp7875 = getelementptr inbounds float* %tmp7874, i64 1
+  %tmp7876 = getelementptr inbounds float* %tmp7875, i64 1
+  %tmp7877 = getelementptr inbounds float* %tmp7876, i64 1
+  %tmp7878 = getelementptr inbounds float* %tmp7877, i64 1
+  %tmp7879 = getelementptr inbounds float* %tmp7878, i64 1
+  %tmp7880 = getelementptr inbounds float* %tmp7879, i64 1
+  %tmp7881 = getelementptr inbounds float* %tmp7880, i64 1
+  %tmp7882 = getelementptr inbounds float* %tmp7881, i64 1
+  %tmp7883 = getelementptr inbounds float* %tmp7882, i64 1
+  %tmp7884 = getelementptr inbounds float* %tmp7883, i64 1
+  %tmp7885 = getelementptr inbounds float* %tmp7884, i64 1
+  %tmp7886 = getelementptr inbounds float* %tmp7885, i64 1
+  %tmp7887 = getelementptr inbounds float* %tmp7886, i64 1
+  %tmp7888 = getelementptr inbounds float* %tmp7887, i64 1
+  %tmp7889 = getelementptr inbounds float* %tmp7888, i64 1
+  %tmp7890 = getelementptr inbounds float* %tmp7889, i64 1
+  %tmp7891 = getelementptr inbounds float* %tmp7890, i64 1
+  %tmp7892 = getelementptr inbounds float* %tmp7891, i64 1
+  %tmp7893 = getelementptr inbounds float* %tmp7892, i64 1
+  %tmp7894 = getelementptr inbounds float* %tmp7893, i64 1
+  %tmp7895 = getelementptr inbounds float* %tmp7894, i64 1
+  %tmp7896 = getelementptr inbounds float* %tmp7895, i64 1
+  %tmp7897 = getelementptr inbounds float* %tmp7896, i64 1
+  %tmp7898 = getelementptr inbounds float* %tmp7897, i64 1
+  %tmp7899 = getelementptr inbounds float* %tmp7898, i64 1
+  %tmp7900 = getelementptr inbounds float* %tmp7899, i64 1
+  %tmp7901 = getelementptr inbounds float* %tmp7900, i64 1
+  %tmp7902 = getelementptr inbounds float* %tmp7901, i64 1
+  %tmp7903 = getelementptr inbounds float* %tmp7902, i64 1
+  %tmp7904 = getelementptr inbounds float* %tmp7903, i64 1
+  %tmp7905 = getelementptr inbounds float* %tmp7904, i64 1
+  %tmp7906 = getelementptr inbounds float* %tmp7905, i64 1
+  %tmp7907 = getelementptr inbounds float* %tmp7906, i64 1
+  %tmp7908 = getelementptr inbounds float* %tmp7907, i64 1
+  %tmp7909 = getelementptr inbounds float* %tmp7908, i64 1
+  %tmp7910 = getelementptr inbounds float* %tmp7909, i64 1
+  %tmp7911 = getelementptr inbounds float* %tmp7910, i64 1
+  %tmp7912 = getelementptr inbounds float* %tmp7911, i64 1
+  %tmp7913 = getelementptr inbounds float* %tmp7912, i64 1
+  %tmp7914 = getelementptr inbounds float* %tmp7913, i64 1
+  %tmp7915 = getelementptr inbounds float* %tmp7914, i64 1
+  %tmp7916 = getelementptr inbounds float* %tmp7915, i64 1
+  %tmp7917 = getelementptr inbounds float* %tmp7916, i64 1
+  %tmp7918 = getelementptr inbounds float* %tmp7917, i64 1
+  %tmp7919 = getelementptr inbounds float* %tmp7918, i64 1
+  %tmp7920 = getelementptr inbounds float* %tmp7919, i64 1
+  %tmp7921 = getelementptr inbounds float* %tmp7920, i64 1
+  %tmp7922 = getelementptr inbounds float* %tmp7921, i64 1
+  %tmp7923 = getelementptr inbounds float* %tmp7922, i64 1
+  %tmp7924 = getelementptr inbounds float* %tmp7923, i64 1
+  %tmp7925 = getelementptr inbounds float* %tmp7924, i64 1
+  %tmp7926 = getelementptr inbounds float* %tmp7925, i64 1
+  %tmp7927 = getelementptr inbounds float* %tmp7926, i64 1
+  %tmp7928 = getelementptr inbounds float* %tmp7927, i64 1
+  %tmp7929 = getelementptr inbounds float* %tmp7928, i64 1
+  %tmp7930 = getelementptr inbounds float* %tmp7929, i64 1
+  %tmp7931 = getelementptr inbounds float* %tmp7930, i64 1
+  %tmp7932 = getelementptr inbounds float* %tmp7931, i64 1
+  %tmp7933 = getelementptr inbounds float* %tmp7932, i64 1
+  %tmp7934 = getelementptr inbounds float* %tmp7933, i64 1
+  %tmp7935 = getelementptr inbounds float* %tmp7934, i64 1
+  %tmp7936 = getelementptr inbounds float* %tmp7935, i64 1
+  %tmp7937 = getelementptr inbounds float* %tmp7936, i64 1
+  %tmp7938 = getelementptr inbounds float* %tmp7937, i64 1
+  %tmp7939 = getelementptr inbounds float* %tmp7938, i64 1
+  %tmp7940 = getelementptr inbounds float* %tmp7939, i64 1
+  %tmp7941 = getelementptr inbounds float* %tmp7940, i64 1
+  %tmp7942 = getelementptr inbounds float* %tmp7941, i64 1
+  %tmp7943 = getelementptr inbounds float* %tmp7942, i64 1
+  %tmp7944 = getelementptr inbounds float* %tmp7943, i64 1
+  %tmp7945 = getelementptr inbounds float* %tmp7944, i64 1
+  %tmp7946 = getelementptr inbounds float* %tmp7945, i64 1
+  %tmp7947 = getelementptr inbounds float* %tmp7946, i64 1
+  %tmp7948 = getelementptr inbounds float* %tmp7947, i64 1
+  %tmp7949 = getelementptr inbounds float* %tmp7948, i64 1
+  %tmp7950 = getelementptr inbounds float* %tmp7949, i64 1
+  %tmp7951 = getelementptr inbounds float* %tmp7950, i64 1
+  %tmp7952 = getelementptr inbounds float* %tmp7951, i64 1
+  %tmp7953 = getelementptr inbounds float* %tmp7952, i64 1
+  %tmp7954 = getelementptr inbounds float* %tmp7953, i64 1
+  %tmp7955 = getelementptr inbounds float* %tmp7954, i64 1
+  %tmp7956 = getelementptr inbounds float* %tmp7955, i64 1
+  %tmp7957 = getelementptr inbounds float* %tmp7956, i64 1
+  %tmp7958 = getelementptr inbounds float* %tmp7957, i64 1
+  %tmp7959 = getelementptr inbounds float* %tmp7958, i64 1
+  %tmp7960 = getelementptr inbounds float* %tmp7959, i64 1
+  %tmp7961 = getelementptr inbounds float* %tmp7960, i64 1
+  %tmp7962 = getelementptr inbounds float* %tmp7961, i64 1
+  %tmp7963 = getelementptr inbounds float* %tmp7962, i64 1
+  %tmp7964 = getelementptr inbounds float* %tmp7963, i64 1
+  %tmp7965 = getelementptr inbounds float* %tmp7964, i64 1
+  %tmp7966 = getelementptr inbounds float* %tmp7965, i64 1
+  %tmp7967 = getelementptr inbounds float* %tmp7966, i64 1
+  %tmp7968 = getelementptr inbounds float* %tmp7967, i64 1
+  %tmp7969 = getelementptr inbounds float* %tmp7968, i64 1
+  %tmp7970 = getelementptr inbounds float* %tmp7969, i64 1
+  %tmp7971 = getelementptr inbounds float* %tmp7970, i64 1
+  %tmp7972 = getelementptr inbounds float* %tmp7971, i64 1
+  %tmp7973 = getelementptr inbounds float* %tmp7972, i64 1
+  %tmp7974 = getelementptr inbounds float* %tmp7973, i64 1
+  %tmp7975 = getelementptr inbounds float* %tmp7974, i64 1
+  %tmp7976 = getelementptr inbounds float* %tmp7975, i64 1
+  %tmp7977 = getelementptr inbounds float* %tmp7976, i64 1
+  %tmp7978 = getelementptr inbounds float* %tmp7977, i64 1
+  %tmp7979 = getelementptr inbounds float* %tmp7978, i64 1
+  %tmp7980 = getelementptr inbounds float* %tmp7979, i64 1
+  %tmp7981 = getelementptr inbounds float* %tmp7980, i64 1
+  %tmp7982 = getelementptr inbounds float* %tmp7981, i64 1
+  %tmp7983 = getelementptr inbounds float* %tmp7982, i64 1
+  %tmp7984 = getelementptr inbounds float* %tmp7983, i64 1
+  %tmp7985 = getelementptr inbounds float* %tmp7984, i64 1
+  %tmp7986 = getelementptr inbounds float* %tmp7985, i64 1
+  %tmp7987 = getelementptr inbounds float* %tmp7986, i64 1
+  %tmp7988 = getelementptr inbounds float* %tmp7987, i64 1
+  %tmp7989 = getelementptr inbounds float* %tmp7988, i64 1
+  %tmp7990 = getelementptr inbounds float* %tmp7989, i64 1
+  %tmp7991 = getelementptr inbounds float* %tmp7990, i64 1
+  %tmp7992 = getelementptr inbounds float* %tmp7991, i64 1
+  %tmp7993 = getelementptr inbounds float* %tmp7992, i64 1
+  %tmp7994 = getelementptr inbounds float* %tmp7993, i64 1
+  %tmp7995 = getelementptr inbounds float* %tmp7994, i64 1
+  %tmp7996 = getelementptr inbounds float* %tmp7995, i64 1
+  %tmp7997 = getelementptr inbounds float* %tmp7996, i64 1
+  %tmp7998 = getelementptr inbounds float* %tmp7997, i64 1
+  %tmp7999 = getelementptr inbounds float* %tmp7998, i64 1
+  %tmp8000 = getelementptr inbounds float* %tmp7999, i64 1
+  %tmp8001 = getelementptr inbounds float* %tmp8000, i64 1
+  %tmp8002 = getelementptr inbounds float* %tmp8001, i64 1
+  %tmp8003 = getelementptr inbounds float* %tmp8002, i64 1
+  %tmp8004 = getelementptr inbounds float* %tmp8003, i64 1
+  %tmp8005 = getelementptr inbounds float* %tmp8004, i64 1
+  %tmp8006 = getelementptr inbounds float* %tmp8005, i64 1
+  %tmp8007 = getelementptr inbounds float* %tmp8006, i64 1
+  %tmp8008 = getelementptr inbounds float* %tmp8007, i64 1
+  %tmp8009 = getelementptr inbounds float* %tmp8008, i64 1
+  %tmp8010 = getelementptr inbounds float* %tmp8009, i64 1
+  %tmp8011 = getelementptr inbounds float* %tmp8010, i64 1
+  %tmp8012 = getelementptr inbounds float* %tmp8011, i64 1
+  %tmp8013 = getelementptr inbounds float* %tmp8012, i64 1
+  %tmp8014 = getelementptr inbounds float* %tmp8013, i64 1
+  %tmp8015 = getelementptr inbounds float* %tmp8014, i64 1
+  %tmp8016 = getelementptr inbounds float* %tmp8015, i64 1
+  %tmp8017 = getelementptr inbounds float* %tmp8016, i64 1
+  %tmp8018 = getelementptr inbounds float* %tmp8017, i64 1
+  %tmp8019 = getelementptr inbounds float* %tmp8018, i64 1
+  %tmp8020 = getelementptr inbounds float* %tmp8019, i64 1
+  %tmp8021 = getelementptr inbounds float* %tmp8020, i64 1
+  %tmp8022 = getelementptr inbounds float* %tmp8021, i64 1
+  %tmp8023 = getelementptr inbounds float* %tmp8022, i64 1
+  %tmp8024 = getelementptr inbounds float* %tmp8023, i64 1
+  %tmp8025 = getelementptr inbounds float* %tmp8024, i64 1
+  %tmp8026 = getelementptr inbounds float* %tmp8025, i64 1
+  %tmp8027 = getelementptr inbounds float* %tmp8026, i64 1
+  %tmp8028 = getelementptr inbounds float* %tmp8027, i64 1
+  %tmp8029 = getelementptr inbounds float* %tmp8028, i64 1
+  %tmp8030 = getelementptr inbounds float* %tmp8029, i64 1
+  %tmp8031 = getelementptr inbounds float* %tmp8030, i64 1
+  %tmp8032 = getelementptr inbounds float* %tmp8031, i64 1
+  %tmp8033 = getelementptr inbounds float* %tmp8032, i64 1
+  %tmp8034 = getelementptr inbounds float* %tmp8033, i64 1
+  %tmp8035 = getelementptr inbounds float* %tmp8034, i64 1
+  %tmp8036 = getelementptr inbounds float* %tmp8035, i64 1
+  %tmp8037 = getelementptr inbounds float* %tmp8036, i64 1
+  %tmp8038 = getelementptr inbounds float* %tmp8037, i64 1
+  %tmp8039 = getelementptr inbounds float* %tmp8038, i64 1
+  %tmp8040 = getelementptr inbounds float* %tmp8039, i64 1
+  %tmp8041 = getelementptr inbounds float* %tmp8040, i64 1
+  %tmp8042 = getelementptr inbounds float* %tmp8041, i64 1
+  %tmp8043 = getelementptr inbounds float* %tmp8042, i64 1
+  %tmp8044 = getelementptr inbounds float* %tmp8043, i64 1
+  %tmp8045 = getelementptr inbounds float* %tmp8044, i64 1
+  %tmp8046 = getelementptr inbounds float* %tmp8045, i64 1
+  %tmp8047 = getelementptr inbounds float* %tmp8046, i64 1
+  %tmp8048 = getelementptr inbounds float* %tmp8047, i64 1
+  %tmp8049 = getelementptr inbounds float* %tmp8048, i64 1
+  %tmp8050 = getelementptr inbounds float* %tmp8049, i64 1
+  %tmp8051 = getelementptr inbounds float* %tmp8050, i64 1
+  %tmp8052 = getelementptr inbounds float* %tmp8051, i64 1
+  %tmp8053 = getelementptr inbounds float* %tmp8052, i64 1
+  %tmp8054 = getelementptr inbounds float* %tmp8053, i64 1
+  %tmp8055 = getelementptr inbounds float* %tmp8054, i64 1
+  %tmp8056 = getelementptr inbounds float* %tmp8055, i64 1
+  %tmp8057 = getelementptr inbounds float* %tmp8056, i64 1
+  %tmp8058 = getelementptr inbounds float* %tmp8057, i64 1
+  %tmp8059 = getelementptr inbounds float* %tmp8058, i64 1
+  %tmp8060 = getelementptr inbounds float* %tmp8059, i64 1
+  %tmp8061 = getelementptr inbounds float* %tmp8060, i64 1
+  %tmp8062 = getelementptr inbounds float* %tmp8061, i64 1
+  %tmp8063 = getelementptr inbounds float* %tmp8062, i64 1
+  %tmp8064 = getelementptr inbounds float* %tmp8063, i64 1
+  %tmp8065 = getelementptr inbounds float* %tmp8064, i64 1
+  %tmp8066 = getelementptr inbounds float* %tmp8065, i64 1
+  %tmp8067 = getelementptr inbounds float* %tmp8066, i64 1
+  %tmp8068 = getelementptr inbounds float* %tmp8067, i64 1
+  %tmp8069 = getelementptr inbounds float* %tmp8068, i64 1
+  %tmp8070 = getelementptr inbounds float* %tmp8069, i64 1
+  %tmp8071 = getelementptr inbounds float* %tmp8070, i64 1
+  %tmp8072 = getelementptr inbounds float* %tmp8071, i64 1
+  %tmp8073 = getelementptr inbounds float* %tmp8072, i64 1
+  %tmp8074 = getelementptr inbounds float* %tmp8073, i64 1
+  %tmp8075 = getelementptr inbounds float* %tmp8074, i64 1
+  %tmp8076 = getelementptr inbounds float* %tmp8075, i64 1
+  %tmp8077 = getelementptr inbounds float* %tmp8076, i64 1
+  %tmp8078 = getelementptr inbounds float* %tmp8077, i64 1
+  %tmp8079 = getelementptr inbounds float* %tmp8078, i64 1
+  %tmp8080 = getelementptr inbounds float* %tmp8079, i64 1
+  %tmp8081 = getelementptr inbounds float* %tmp8080, i64 1
+  %tmp8082 = getelementptr inbounds float* %tmp8081, i64 1
+  %tmp8083 = getelementptr inbounds float* %tmp8082, i64 1
+  %tmp8084 = getelementptr inbounds float* %tmp8083, i64 1
+  %tmp8085 = getelementptr inbounds float* %tmp8084, i64 1
+  %tmp8086 = getelementptr inbounds float* %tmp8085, i64 1
+  %tmp8087 = getelementptr inbounds float* %tmp8086, i64 1
+  %tmp8088 = getelementptr inbounds float* %tmp8087, i64 1
+  %tmp8089 = getelementptr inbounds float* %tmp8088, i64 1
+  %tmp8090 = getelementptr inbounds float* %tmp8089, i64 1
+  %tmp8091 = getelementptr inbounds float* %tmp8090, i64 1
+  %tmp8092 = getelementptr inbounds float* %tmp8091, i64 1
+  %tmp8093 = getelementptr inbounds float* %tmp8092, i64 1
+  %tmp8094 = getelementptr inbounds float* %tmp8093, i64 1
+  %tmp8095 = getelementptr inbounds float* %tmp8094, i64 1
+  %tmp8096 = getelementptr inbounds float* %tmp8095, i64 1
+  %tmp8097 = getelementptr inbounds float* %tmp8096, i64 1
+  %tmp8098 = getelementptr inbounds float* %tmp8097, i64 1
+  %tmp8099 = getelementptr inbounds float* %tmp8098, i64 1
+  %tmp8100 = getelementptr inbounds float* %tmp8099, i64 1
+  %tmp8101 = getelementptr inbounds float* %tmp8100, i64 1
+  %tmp8102 = getelementptr inbounds float* %tmp8101, i64 1
+  %tmp8103 = getelementptr inbounds float* %tmp8102, i64 1
+  %tmp8104 = getelementptr inbounds float* %tmp8103, i64 1
+  %tmp8105 = getelementptr inbounds float* %tmp8104, i64 1
+  %tmp8106 = getelementptr inbounds float* %tmp8105, i64 1
+  %tmp8107 = getelementptr inbounds float* %tmp8106, i64 1
+  %tmp8108 = getelementptr inbounds float* %tmp8107, i64 1
+  %tmp8109 = getelementptr inbounds float* %tmp8108, i64 1
+  %tmp8110 = getelementptr inbounds float* %tmp8109, i64 1
+  %tmp8111 = getelementptr inbounds float* %tmp8110, i64 1
+  %tmp8112 = getelementptr inbounds float* %tmp8111, i64 1
+  %tmp8113 = getelementptr inbounds float* %tmp8112, i64 1
+  %tmp8114 = getelementptr inbounds float* %tmp8113, i64 1
+  %tmp8115 = getelementptr inbounds float* %tmp8114, i64 1
+  %tmp8116 = getelementptr inbounds float* %tmp8115, i64 1
+  %tmp8117 = getelementptr inbounds float* %tmp8116, i64 1
+  %tmp8118 = getelementptr inbounds float* %tmp8117, i64 1
+  %tmp8119 = getelementptr inbounds float* %tmp8118, i64 1
+  %tmp8120 = getelementptr inbounds float* %tmp8119, i64 1
+  %tmp8121 = getelementptr inbounds float* %tmp8120, i64 1
+  %tmp8122 = getelementptr inbounds float* %tmp8121, i64 1
+  %tmp8123 = getelementptr inbounds float* %tmp8122, i64 1
+  %tmp8124 = getelementptr inbounds float* %tmp8123, i64 1
+  %tmp8125 = getelementptr inbounds float* %tmp8124, i64 1
+  %tmp8126 = getelementptr inbounds float* %tmp8125, i64 1
+  %tmp8127 = getelementptr inbounds float* %tmp8126, i64 1
+  %tmp8128 = getelementptr inbounds float* %tmp8127, i64 1
+  %tmp8129 = getelementptr inbounds float* %tmp8128, i64 1
+  %tmp8130 = getelementptr inbounds float* %tmp8129, i64 1
+  %tmp8131 = getelementptr inbounds float* %tmp8130, i64 1
+  %tmp8132 = getelementptr inbounds float* %tmp8131, i64 1
+  %tmp8133 = getelementptr inbounds float* %tmp8132, i64 1
+  %tmp8134 = getelementptr inbounds float* %tmp8133, i64 1
+  %tmp8135 = getelementptr inbounds float* %tmp8134, i64 1
+  %tmp8136 = getelementptr inbounds float* %tmp8135, i64 1
+  %tmp8137 = getelementptr inbounds float* %tmp8136, i64 1
+  %tmp8138 = getelementptr inbounds float* %tmp8137, i64 1
+  %tmp8139 = getelementptr inbounds float* %tmp8138, i64 1
+  %tmp8140 = getelementptr inbounds float* %tmp8139, i64 1
+  %tmp8141 = getelementptr inbounds float* %tmp8140, i64 1
+  %tmp8142 = getelementptr inbounds float* %tmp8141, i64 1
+  %tmp8143 = getelementptr inbounds float* %tmp8142, i64 1
+  %tmp8144 = getelementptr inbounds float* %tmp8143, i64 1
+  %tmp8145 = getelementptr inbounds float* %tmp8144, i64 1
+  %tmp8146 = getelementptr inbounds float* %tmp8145, i64 1
+  %tmp8147 = getelementptr inbounds float* %tmp8146, i64 1
+  %tmp8148 = getelementptr inbounds float* %tmp8147, i64 1
+  %tmp8149 = getelementptr inbounds float* %tmp8148, i64 1
+  %tmp8150 = getelementptr inbounds float* %tmp8149, i64 1
+  %tmp8151 = getelementptr inbounds float* %tmp8150, i64 1
+  %tmp8152 = getelementptr inbounds float* %tmp8151, i64 1
+  %tmp8153 = getelementptr inbounds float* %tmp8152, i64 1
+  %tmp8154 = getelementptr inbounds float* %tmp8153, i64 1
+  %tmp8155 = getelementptr inbounds float* %tmp8154, i64 1
+  %tmp8156 = getelementptr inbounds float* %tmp8155, i64 1
+  %tmp8157 = getelementptr inbounds float* %tmp8156, i64 1
+  %tmp8158 = getelementptr inbounds float* %tmp8157, i64 1
+  %tmp8159 = getelementptr inbounds float* %tmp8158, i64 1
+  %tmp8160 = getelementptr inbounds float* %tmp8159, i64 1
+  %tmp8161 = getelementptr inbounds float* %tmp8160, i64 1
+  %tmp8162 = getelementptr inbounds float* %tmp8161, i64 1
+  %tmp8163 = getelementptr inbounds float* %tmp8162, i64 1
+  %tmp8164 = getelementptr inbounds float* %tmp8163, i64 1
+  %tmp8165 = getelementptr inbounds float* %tmp8164, i64 1
+  %tmp8166 = getelementptr inbounds float* %tmp8165, i64 1
+  %tmp8167 = getelementptr inbounds float* %tmp8166, i64 1
+  %tmp8168 = getelementptr inbounds float* %tmp8167, i64 1
+  %tmp8169 = getelementptr inbounds float* %tmp8168, i64 1
+  %tmp8170 = getelementptr inbounds float* %tmp8169, i64 1
+  %tmp8171 = getelementptr inbounds float* %tmp8170, i64 1
+  %tmp8172 = getelementptr inbounds float* %tmp8171, i64 1
+  %tmp8173 = getelementptr inbounds float* %tmp8172, i64 1
+  %tmp8174 = getelementptr inbounds float* %tmp8173, i64 1
+  %tmp8175 = getelementptr inbounds float* %tmp8174, i64 1
+  %tmp8176 = getelementptr inbounds float* %tmp8175, i64 1
+  %tmp8177 = getelementptr inbounds float* %tmp8176, i64 1
+  %tmp8178 = getelementptr inbounds float* %tmp8177, i64 1
+  %tmp8179 = getelementptr inbounds float* %tmp8178, i64 1
+  %tmp8180 = getelementptr inbounds float* %tmp8179, i64 1
+  %tmp8181 = getelementptr inbounds float* %tmp8180, i64 1
+  %tmp8182 = getelementptr inbounds float* %tmp8181, i64 1
+  %tmp8183 = getelementptr inbounds float* %tmp8182, i64 1
+  %tmp8184 = getelementptr inbounds float* %tmp8183, i64 1
+  %tmp8185 = getelementptr inbounds float* %tmp8184, i64 1
+  %tmp8186 = getelementptr inbounds float* %tmp8185, i64 1
+  %tmp8187 = getelementptr inbounds float* %tmp8186, i64 1
+  %tmp8188 = getelementptr inbounds float* %tmp8187, i64 1
+  %tmp8189 = getelementptr inbounds float* %tmp8188, i64 1
+  %tmp8190 = getelementptr inbounds float* %tmp8189, i64 1
+  %tmp8191 = getelementptr inbounds float* %tmp8190, i64 1
+  %tmp8192 = getelementptr inbounds float* %tmp8191, i64 1
+  %tmp8193 = getelementptr inbounds float* %tmp8192, i64 1
+  %tmp8194 = getelementptr inbounds float* %tmp8193, i64 1
+  %tmp8195 = getelementptr inbounds float* %tmp8194, i64 1
+  %tmp8196 = getelementptr inbounds float* %tmp8195, i64 1
+  %tmp8197 = getelementptr inbounds float* %tmp8196, i64 1
+  %tmp8198 = getelementptr inbounds float* %tmp8197, i64 1
+  %tmp8199 = getelementptr inbounds float* %tmp8198, i64 1
+  %tmp8200 = getelementptr inbounds float* %tmp8199, i64 1
+  %tmp8201 = getelementptr inbounds float* %tmp8200, i64 1
+  %tmp8202 = getelementptr inbounds float* %tmp8201, i64 1
+  %tmp8203 = getelementptr inbounds float* %tmp8202, i64 1
+  %tmp8204 = getelementptr inbounds float* %tmp8203, i64 1
+  %tmp8205 = getelementptr inbounds float* %tmp8204, i64 1
+  %tmp8206 = getelementptr inbounds float* %tmp8205, i64 1
+  %tmp8207 = getelementptr inbounds float* %tmp8206, i64 1
+  %tmp8208 = getelementptr inbounds float* %tmp8207, i64 1
+  %tmp8209 = getelementptr inbounds float* %tmp8208, i64 1
+  %tmp8210 = getelementptr inbounds float* %tmp8209, i64 1
+  %tmp8211 = getelementptr inbounds float* %tmp8210, i64 1
+  %tmp8212 = getelementptr inbounds float* %tmp8211, i64 1
+  %tmp8213 = getelementptr inbounds float* %tmp8212, i64 1
+  %tmp8214 = getelementptr inbounds float* %tmp8213, i64 1
+  %tmp8215 = getelementptr inbounds float* %tmp8214, i64 1
+  %tmp8216 = getelementptr inbounds float* %tmp8215, i64 1
+  %tmp8217 = getelementptr inbounds float* %tmp8216, i64 1
+  %tmp8218 = getelementptr inbounds float* %tmp8217, i64 1
+  %tmp8219 = getelementptr inbounds float* %tmp8218, i64 1
+  %tmp8220 = getelementptr inbounds float* %tmp8219, i64 1
+  %tmp8221 = getelementptr inbounds float* %tmp8220, i64 1
+  %tmp8222 = getelementptr inbounds float* %tmp8221, i64 1
+  %tmp8223 = getelementptr inbounds float* %tmp8222, i64 1
+  %tmp8224 = getelementptr inbounds float* %tmp8223, i64 1
+  %tmp8225 = getelementptr inbounds float* %tmp8224, i64 1
+  %tmp8226 = getelementptr inbounds float* %tmp8225, i64 1
+  %tmp8227 = getelementptr inbounds float* %tmp8226, i64 1
+  %tmp8228 = getelementptr inbounds float* %tmp8227, i64 1
+  %tmp8229 = getelementptr inbounds float* %tmp8228, i64 1
+  %tmp8230 = getelementptr inbounds float* %tmp8229, i64 1
+  %tmp8231 = getelementptr inbounds float* %tmp8230, i64 1
+  %tmp8232 = getelementptr inbounds float* %tmp8231, i64 1
+  %tmp8233 = getelementptr inbounds float* %tmp8232, i64 1
+  %tmp8234 = getelementptr inbounds float* %tmp8233, i64 1
+  %tmp8235 = getelementptr inbounds float* %tmp8234, i64 1
+  %tmp8236 = getelementptr inbounds float* %tmp8235, i64 1
+  %tmp8237 = getelementptr inbounds float* %tmp8236, i64 1
+  %tmp8238 = getelementptr inbounds float* %tmp8237, i64 1
+  %tmp8239 = getelementptr inbounds float* %tmp8238, i64 1
+  %tmp8240 = getelementptr inbounds float* %tmp8239, i64 1
+  %tmp8241 = getelementptr inbounds float* %tmp8240, i64 1
+  %tmp8242 = getelementptr inbounds float* %tmp8241, i64 1
+  %tmp8243 = getelementptr inbounds float* %tmp8242, i64 1
+  %tmp8244 = getelementptr inbounds float* %tmp8243, i64 1
+  %tmp8245 = getelementptr inbounds float* %tmp8244, i64 1
+  %tmp8246 = getelementptr inbounds float* %tmp8245, i64 1
+  %tmp8247 = getelementptr inbounds float* %tmp8246, i64 1
+  %tmp8248 = getelementptr inbounds float* %tmp8247, i64 1
+  %tmp8249 = getelementptr inbounds float* %tmp8248, i64 1
+  %tmp8250 = getelementptr inbounds float* %tmp8249, i64 1
+  %tmp8251 = getelementptr inbounds float* %tmp8250, i64 1
+  %tmp8252 = getelementptr inbounds float* %tmp8251, i64 1
+  %tmp8253 = getelementptr inbounds float* %tmp8252, i64 1
+  %tmp8254 = getelementptr inbounds float* %tmp8253, i64 1
+  %tmp8255 = getelementptr inbounds float* %tmp8254, i64 1
+  %tmp8256 = getelementptr inbounds float* %tmp8255, i64 1
+  %tmp8257 = getelementptr inbounds float* %tmp8256, i64 1
+  %tmp8258 = getelementptr inbounds float* %tmp8257, i64 1
+  %tmp8259 = getelementptr inbounds float* %tmp8258, i64 1
+  %tmp8260 = getelementptr inbounds float* %tmp8259, i64 1
+  %tmp8261 = getelementptr inbounds float* %tmp8260, i64 1
+  %tmp8262 = getelementptr inbounds float* %tmp8261, i64 1
+  %tmp8263 = getelementptr inbounds float* %tmp8262, i64 1
+  %tmp8264 = getelementptr inbounds float* %tmp8263, i64 1
+  %tmp8265 = getelementptr inbounds float* %tmp8264, i64 1
+  %tmp8266 = getelementptr inbounds float* %tmp8265, i64 1
+  %tmp8267 = getelementptr inbounds float* %tmp8266, i64 1
+  %tmp8268 = getelementptr inbounds float* %tmp8267, i64 1
+  %tmp8269 = getelementptr inbounds float* %tmp8268, i64 1
+  %tmp8270 = getelementptr inbounds float* %tmp8269, i64 1
+  %tmp8271 = getelementptr inbounds float* %tmp8270, i64 1
+  %tmp8272 = getelementptr inbounds float* %tmp8271, i64 1
+  %tmp8273 = getelementptr inbounds float* %tmp8272, i64 1
+  %tmp8274 = getelementptr inbounds float* %tmp8273, i64 1
+  %tmp8275 = getelementptr inbounds float* %tmp8274, i64 1
+  %tmp8276 = getelementptr inbounds float* %tmp8275, i64 1
+  %tmp8277 = getelementptr inbounds float* %tmp8276, i64 1
+  %tmp8278 = getelementptr inbounds float* %tmp8277, i64 1
+  %tmp8279 = getelementptr inbounds float* %tmp8278, i64 1
+  %tmp8280 = getelementptr inbounds float* %tmp8279, i64 1
+  %tmp8281 = getelementptr inbounds float* %tmp8280, i64 1
+  %tmp8282 = getelementptr inbounds float* %tmp8281, i64 1
+  %tmp8283 = getelementptr inbounds float* %tmp8282, i64 1
+  %tmp8284 = getelementptr inbounds float* %tmp8283, i64 1
+  %tmp8285 = getelementptr inbounds float* %tmp8284, i64 1
+  %tmp8286 = getelementptr inbounds float* %tmp8285, i64 1
+  %tmp8287 = getelementptr inbounds float* %tmp8286, i64 1
+  %tmp8288 = getelementptr inbounds float* %tmp8287, i64 1
+  %tmp8289 = getelementptr inbounds float* %tmp8288, i64 1
+  %tmp8290 = getelementptr inbounds float* %tmp8289, i64 1
+  %tmp8291 = getelementptr inbounds float* %tmp8290, i64 1
+  %tmp8292 = getelementptr inbounds float* %tmp8291, i64 1
+  %tmp8293 = getelementptr inbounds float* %tmp8292, i64 1
+  %tmp8294 = getelementptr inbounds float* %tmp8293, i64 1
+  %tmp8295 = getelementptr inbounds float* %tmp8294, i64 1
+  %tmp8296 = getelementptr inbounds float* %tmp8295, i64 1
+  %tmp8297 = getelementptr inbounds float* %tmp8296, i64 1
+  %tmp8298 = getelementptr inbounds float* %tmp8297, i64 1
+  %tmp8299 = getelementptr inbounds float* %tmp8298, i64 1
+  %tmp8300 = getelementptr inbounds float* %tmp8299, i64 1
+  %tmp8301 = getelementptr inbounds float* %tmp8300, i64 1
+  %tmp8302 = getelementptr inbounds float* %tmp8301, i64 1
+  %tmp8303 = getelementptr inbounds float* %tmp8302, i64 1
+  %tmp8304 = getelementptr inbounds float* %tmp8303, i64 1
+  %tmp8305 = getelementptr inbounds float* %tmp8304, i64 1
+  %tmp8306 = getelementptr inbounds float* %tmp8305, i64 1
+  %tmp8307 = getelementptr inbounds float* %tmp8306, i64 1
+  %tmp8308 = getelementptr inbounds float* %tmp8307, i64 1
+  %tmp8309 = getelementptr inbounds float* %tmp8308, i64 1
+  %tmp8310 = getelementptr inbounds float* %tmp8309, i64 1
+  %tmp8311 = getelementptr inbounds float* %tmp8310, i64 1
+  %tmp8312 = getelementptr inbounds float* %tmp8311, i64 1
+  %tmp8313 = getelementptr inbounds float* %tmp8312, i64 1
+  %tmp8314 = getelementptr inbounds float* %tmp8313, i64 1
+  %tmp8315 = getelementptr inbounds float* %tmp8314, i64 1
+  %tmp8316 = getelementptr inbounds float* %tmp8315, i64 1
+  %tmp8317 = getelementptr inbounds float* %tmp8316, i64 1
+  %tmp8318 = getelementptr inbounds float* %tmp8317, i64 1
+  %tmp8319 = getelementptr inbounds float* %tmp8318, i64 1
+  %tmp8320 = getelementptr inbounds float* %tmp8319, i64 1
+  %tmp8321 = getelementptr inbounds float* %tmp8320, i64 1
+  %tmp8322 = getelementptr inbounds float* %tmp8321, i64 1
+  %tmp8323 = getelementptr inbounds float* %tmp8322, i64 1
+  %tmp8324 = getelementptr inbounds float* %tmp8323, i64 1
+  %tmp8325 = getelementptr inbounds float* %tmp8324, i64 1
+  %tmp8326 = getelementptr inbounds float* %tmp8325, i64 1
+  %tmp8327 = getelementptr inbounds float* %tmp8326, i64 1
+  %tmp8328 = getelementptr inbounds float* %tmp8327, i64 1
+  %tmp8329 = getelementptr inbounds float* %tmp8328, i64 1
+  %tmp8330 = getelementptr inbounds float* %tmp8329, i64 1
+  %tmp8331 = getelementptr inbounds float* %tmp8330, i64 1
+  %tmp8332 = getelementptr inbounds float* %tmp8331, i64 1
+  %tmp8333 = getelementptr inbounds float* %tmp8332, i64 1
+  %tmp8334 = getelementptr inbounds float* %tmp8333, i64 1
+  %tmp8335 = getelementptr inbounds float* %tmp8334, i64 1
+  %tmp8336 = getelementptr inbounds float* %tmp8335, i64 1
+  %tmp8337 = getelementptr inbounds float* %tmp8336, i64 1
+  %tmp8338 = getelementptr inbounds float* %tmp8337, i64 1
+  %tmp8339 = getelementptr inbounds float* %tmp8338, i64 1
+  %tmp8340 = getelementptr inbounds float* %tmp8339, i64 1
+  %tmp8341 = getelementptr inbounds float* %tmp8340, i64 1
+  %tmp8342 = getelementptr inbounds float* %tmp8341, i64 1
+  %tmp8343 = getelementptr inbounds float* %tmp8342, i64 1
+  %tmp8344 = getelementptr inbounds float* %tmp8343, i64 1
+  %tmp8345 = getelementptr inbounds float* %tmp8344, i64 1
+  %tmp8346 = getelementptr inbounds float* %tmp8345, i64 1
+  %tmp8347 = getelementptr inbounds float* %tmp8346, i64 1
+  %tmp8348 = getelementptr inbounds float* %tmp8347, i64 1
+  %tmp8349 = getelementptr inbounds float* %tmp8348, i64 1
+  %tmp8350 = getelementptr inbounds float* %tmp8349, i64 1
+  %tmp8351 = getelementptr inbounds float* %tmp8350, i64 1
+  %tmp8352 = getelementptr inbounds float* %tmp8351, i64 1
+  %tmp8353 = getelementptr inbounds float* %tmp8352, i64 1
+  %tmp8354 = getelementptr inbounds float* %tmp8353, i64 1
+  %tmp8355 = getelementptr inbounds float* %tmp8354, i64 1
+  %tmp8356 = getelementptr inbounds float* %tmp8355, i64 1
+  %tmp8357 = getelementptr inbounds float* %tmp8356, i64 1
+  %tmp8358 = getelementptr inbounds float* %tmp8357, i64 1
+  %tmp8359 = getelementptr inbounds float* %tmp8358, i64 1
+  %tmp8360 = getelementptr inbounds float* %tmp8359, i64 1
+  %tmp8361 = getelementptr inbounds float* %tmp8360, i64 1
+  %tmp8362 = getelementptr inbounds float* %tmp8361, i64 1
+  %tmp8363 = getelementptr inbounds float* %tmp8362, i64 1
+  %tmp8364 = getelementptr inbounds float* %tmp8363, i64 1
+  %tmp8365 = getelementptr inbounds float* %tmp8364, i64 1
+  %tmp8366 = getelementptr inbounds float* %tmp8365, i64 1
+  %tmp8367 = getelementptr inbounds float* %tmp8366, i64 1
+  %tmp8368 = getelementptr inbounds float* %tmp8367, i64 1
+  %tmp8369 = getelementptr inbounds float* %tmp8368, i64 1
+  %tmp8370 = getelementptr inbounds float* %tmp8369, i64 1
+  %tmp8371 = getelementptr inbounds float* %tmp8370, i64 1
+  %tmp8372 = getelementptr inbounds float* %tmp8371, i64 1
+  %tmp8373 = getelementptr inbounds float* %tmp8372, i64 1
+  %tmp8374 = getelementptr inbounds float* %tmp8373, i64 1
+  %tmp8375 = getelementptr inbounds float* %tmp8374, i64 1
+  %tmp8376 = getelementptr inbounds float* %tmp8375, i64 1
+  %tmp8377 = getelementptr inbounds float* %tmp8376, i64 1
+  %tmp8378 = getelementptr inbounds float* %tmp8377, i64 1
+  %tmp8379 = getelementptr inbounds float* %tmp8378, i64 1
+  %tmp8380 = getelementptr inbounds float* %tmp8379, i64 1
+  %tmp8381 = getelementptr inbounds float* %tmp8380, i64 1
+  %tmp8382 = getelementptr inbounds float* %tmp8381, i64 1
+  %tmp8383 = getelementptr inbounds float* %tmp8382, i64 1
+  %tmp8384 = getelementptr inbounds float* %tmp8383, i64 1
+  %tmp8385 = getelementptr inbounds float* %tmp8384, i64 1
+  %tmp8386 = getelementptr inbounds float* %tmp8385, i64 1
+  %tmp8387 = getelementptr inbounds float* %tmp8386, i64 1
+  %tmp8388 = getelementptr inbounds float* %tmp8387, i64 1
+  %tmp8389 = getelementptr inbounds float* %tmp8388, i64 1
+  %tmp8390 = getelementptr inbounds float* %tmp8389, i64 1
+  %tmp8391 = getelementptr inbounds float* %tmp8390, i64 1
+  %tmp8392 = getelementptr inbounds float* %tmp8391, i64 1
+  %tmp8393 = getelementptr inbounds float* %tmp8392, i64 1
+  %tmp8394 = getelementptr inbounds float* %tmp8393, i64 1
+  %tmp8395 = getelementptr inbounds float* %tmp8394, i64 1
+  %tmp8396 = getelementptr inbounds float* %tmp8395, i64 1
+  %tmp8397 = getelementptr inbounds float* %tmp8396, i64 1
+  %tmp8398 = getelementptr inbounds float* %tmp8397, i64 1
+  %tmp8399 = getelementptr inbounds float* %tmp8398, i64 1
+  %tmp8400 = getelementptr inbounds float* %tmp8399, i64 1
+  %tmp8401 = getelementptr inbounds float* %tmp8400, i64 1
+  %tmp8402 = getelementptr inbounds float* %tmp8401, i64 1
+  %tmp8403 = getelementptr inbounds float* %tmp8402, i64 1
+  %tmp8404 = getelementptr inbounds float* %tmp8403, i64 1
+  %tmp8405 = getelementptr inbounds float* %tmp8404, i64 1
+  %tmp8406 = getelementptr inbounds float* %tmp8405, i64 1
+  %tmp8407 = getelementptr inbounds float* %tmp8406, i64 1
+  %tmp8408 = getelementptr inbounds float* %tmp8407, i64 1
+  %tmp8409 = getelementptr inbounds float* %tmp8408, i64 1
+  %tmp8410 = getelementptr inbounds float* %tmp8409, i64 1
+  %tmp8411 = getelementptr inbounds float* %tmp8410, i64 1
+  %tmp8412 = getelementptr inbounds float* %tmp8411, i64 1
+  %tmp8413 = getelementptr inbounds float* %tmp8412, i64 1
+  %tmp8414 = getelementptr inbounds float* %tmp8413, i64 1
+  %tmp8415 = getelementptr inbounds float* %tmp8414, i64 1
+  %tmp8416 = getelementptr inbounds float* %tmp8415, i64 1
+  %tmp8417 = getelementptr inbounds float* %tmp8416, i64 1
+  %tmp8418 = getelementptr inbounds float* %tmp8417, i64 1
+  %tmp8419 = getelementptr inbounds float* %tmp8418, i64 1
+  %tmp8420 = getelementptr inbounds float* %tmp8419, i64 1
+  %tmp8421 = getelementptr inbounds float* %tmp8420, i64 1
+  %tmp8422 = getelementptr inbounds float* %tmp8421, i64 1
+  %tmp8423 = getelementptr inbounds float* %tmp8422, i64 1
+  %tmp8424 = getelementptr inbounds float* %tmp8423, i64 1
+  %tmp8425 = getelementptr inbounds float* %tmp8424, i64 1
+  %tmp8426 = getelementptr inbounds float* %tmp8425, i64 1
+  %tmp8427 = getelementptr inbounds float* %tmp8426, i64 1
+  %tmp8428 = getelementptr inbounds float* %tmp8427, i64 1
+  %tmp8429 = getelementptr inbounds float* %tmp8428, i64 1
+  %tmp8430 = getelementptr inbounds float* %tmp8429, i64 1
+  %tmp8431 = getelementptr inbounds float* %tmp8430, i64 1
+  %tmp8432 = getelementptr inbounds float* %tmp8431, i64 1
+  %tmp8433 = getelementptr inbounds float* %tmp8432, i64 1
+  %tmp8434 = getelementptr inbounds float* %tmp8433, i64 1
+  %tmp8435 = getelementptr inbounds float* %tmp8434, i64 1
+  %tmp8436 = getelementptr inbounds float* %tmp8435, i64 1
+  %tmp8437 = getelementptr inbounds float* %tmp8436, i64 1
+  %tmp8438 = getelementptr inbounds float* %tmp8437, i64 1
+  %tmp8439 = getelementptr inbounds float* %tmp8438, i64 1
+  %tmp8440 = getelementptr inbounds float* %tmp8439, i64 1
+  %tmp8441 = getelementptr inbounds float* %tmp8440, i64 1
+  %tmp8442 = getelementptr inbounds float* %tmp8441, i64 1
+  %tmp8443 = getelementptr inbounds float* %tmp8442, i64 1
+  %tmp8444 = getelementptr inbounds float* %tmp8443, i64 1
+  %tmp8445 = getelementptr inbounds float* %tmp8444, i64 1
+  %tmp8446 = getelementptr inbounds float* %tmp8445, i64 1
+  %tmp8447 = getelementptr inbounds float* %tmp8446, i64 1
+  %tmp8448 = getelementptr inbounds float* %tmp8447, i64 1
+  %tmp8449 = getelementptr inbounds float* %tmp8448, i64 1
+  %tmp8450 = getelementptr inbounds float* %tmp8449, i64 1
+  %tmp8451 = getelementptr inbounds float* %tmp8450, i64 1
+  %tmp8452 = getelementptr inbounds float* %tmp8451, i64 1
+  %tmp8453 = getelementptr inbounds float* %tmp8452, i64 1
+  %tmp8454 = getelementptr inbounds float* %tmp8453, i64 1
+  %tmp8455 = getelementptr inbounds float* %tmp8454, i64 1
+  %tmp8456 = getelementptr inbounds float* %tmp8455, i64 1
+  %tmp8457 = getelementptr inbounds float* %tmp8456, i64 1
+  %tmp8458 = getelementptr inbounds float* %tmp8457, i64 1
+  %tmp8459 = getelementptr inbounds float* %tmp8458, i64 1
+  %tmp8460 = getelementptr inbounds float* %tmp8459, i64 1
+  %tmp8461 = getelementptr inbounds float* %tmp8460, i64 1
+  %tmp8462 = getelementptr inbounds float* %tmp8461, i64 1
+  %tmp8463 = getelementptr inbounds float* %tmp8462, i64 1
+  %tmp8464 = getelementptr inbounds float* %tmp8463, i64 1
+  %tmp8465 = getelementptr inbounds float* %tmp8464, i64 1
+  %tmp8466 = getelementptr inbounds float* %tmp8465, i64 1
+  %tmp8467 = getelementptr inbounds float* %tmp8466, i64 1
+  %tmp8468 = getelementptr inbounds float* %tmp8467, i64 1
+  %tmp8469 = getelementptr inbounds float* %tmp8468, i64 1
+  %tmp8470 = getelementptr inbounds float* %tmp8469, i64 1
+  %tmp8471 = getelementptr inbounds float* %tmp8470, i64 1
+  %tmp8472 = getelementptr inbounds float* %tmp8471, i64 1
+  %tmp8473 = getelementptr inbounds float* %tmp8472, i64 1
+  %tmp8474 = getelementptr inbounds float* %tmp8473, i64 1
+  %tmp8475 = getelementptr inbounds float* %tmp8474, i64 1
+  %tmp8476 = getelementptr inbounds float* %tmp8475, i64 1
+  %tmp8477 = getelementptr inbounds float* %tmp8476, i64 1
+  %tmp8478 = getelementptr inbounds float* %tmp8477, i64 1
+  %tmp8479 = getelementptr inbounds float* %tmp8478, i64 1
+  %tmp8480 = getelementptr inbounds float* %tmp8479, i64 1
+  %tmp8481 = getelementptr inbounds float* %tmp8480, i64 1
+  %tmp8482 = getelementptr inbounds float* %tmp8481, i64 1
+  %tmp8483 = getelementptr inbounds float* %tmp8482, i64 1
+  %tmp8484 = getelementptr inbounds float* %tmp8483, i64 1
+  %tmp8485 = getelementptr inbounds float* %tmp8484, i64 1
+  %tmp8486 = getelementptr inbounds float* %tmp8485, i64 1
+  %tmp8487 = getelementptr inbounds float* %tmp8486, i64 1
+  %tmp8488 = getelementptr inbounds float* %tmp8487, i64 1
+  %tmp8489 = getelementptr inbounds float* %tmp8488, i64 1
+  %tmp8490 = getelementptr inbounds float* %tmp8489, i64 1
+  %tmp8491 = getelementptr inbounds float* %tmp8490, i64 1
+  %tmp8492 = getelementptr inbounds float* %tmp8491, i64 1
+  %tmp8493 = getelementptr inbounds float* %tmp8492, i64 1
+  %tmp8494 = getelementptr inbounds float* %tmp8493, i64 1
+  %tmp8495 = getelementptr inbounds float* %tmp8494, i64 1
+  %tmp8496 = getelementptr inbounds float* %tmp8495, i64 1
+  %tmp8497 = getelementptr inbounds float* %tmp8496, i64 1
+  %tmp8498 = getelementptr inbounds float* %tmp8497, i64 1
+  %tmp8499 = getelementptr inbounds float* %tmp8498, i64 1
+  %tmp8500 = getelementptr inbounds float* %tmp8499, i64 1
+  %tmp8501 = getelementptr inbounds float* %tmp8500, i64 1
+  %tmp8502 = getelementptr inbounds float* %tmp8501, i64 1
+  %tmp8503 = getelementptr inbounds float* %tmp8502, i64 1
+  %tmp8504 = getelementptr inbounds float* %tmp8503, i64 1
+  %tmp8505 = getelementptr inbounds float* %tmp8504, i64 1
+  %tmp8506 = getelementptr inbounds float* %tmp8505, i64 1
+  %tmp8507 = getelementptr inbounds float* %tmp8506, i64 1
+  %tmp8508 = getelementptr inbounds float* %tmp8507, i64 1
+  %tmp8509 = getelementptr inbounds float* %tmp8508, i64 1
+  %tmp8510 = getelementptr inbounds float* %tmp8509, i64 1
+  %tmp8511 = getelementptr inbounds float* %tmp8510, i64 1
+  %tmp8512 = getelementptr inbounds float* %tmp8511, i64 1
+  %tmp8513 = getelementptr inbounds float* %tmp8512, i64 1
+  %tmp8514 = getelementptr inbounds float* %tmp8513, i64 1
+  %tmp8515 = getelementptr inbounds float* %tmp8514, i64 1
+  %tmp8516 = getelementptr inbounds float* %tmp8515, i64 1
+  %tmp8517 = getelementptr inbounds float* %tmp8516, i64 1
+  %tmp8518 = getelementptr inbounds float* %tmp8517, i64 1
+  %tmp8519 = getelementptr inbounds float* %tmp8518, i64 1
+  %tmp8520 = getelementptr inbounds float* %tmp8519, i64 1
+  %tmp8521 = getelementptr inbounds float* %tmp8520, i64 1
+  %tmp8522 = getelementptr inbounds float* %tmp8521, i64 1
+  %tmp8523 = getelementptr inbounds float* %tmp8522, i64 1
+  %tmp8524 = getelementptr inbounds float* %tmp8523, i64 1
+  %tmp8525 = getelementptr inbounds float* %tmp8524, i64 1
+  %tmp8526 = getelementptr inbounds float* %tmp8525, i64 1
+  %tmp8527 = getelementptr inbounds float* %tmp8526, i64 1
+  %tmp8528 = getelementptr inbounds float* %tmp8527, i64 1
+  %tmp8529 = getelementptr inbounds float* %tmp8528, i64 1
+  %tmp8530 = getelementptr inbounds float* %tmp8529, i64 1
+  %tmp8531 = getelementptr inbounds float* %tmp8530, i64 1
+  %tmp8532 = getelementptr inbounds float* %tmp8531, i64 1
+  %tmp8533 = getelementptr inbounds float* %tmp8532, i64 1
+  %tmp8534 = getelementptr inbounds float* %tmp8533, i64 1
+  %tmp8535 = getelementptr inbounds float* %tmp8534, i64 1
+  %tmp8536 = getelementptr inbounds float* %tmp8535, i64 1
+  %tmp8537 = getelementptr inbounds float* %tmp8536, i64 1
+  %tmp8538 = getelementptr inbounds float* %tmp8537, i64 1
+  %tmp8539 = getelementptr inbounds float* %tmp8538, i64 1
+  %tmp8540 = getelementptr inbounds float* %tmp8539, i64 1
+  %tmp8541 = getelementptr inbounds float* %tmp8540, i64 1
+  %tmp8542 = getelementptr inbounds float* %tmp8541, i64 1
+  %tmp8543 = getelementptr inbounds float* %tmp8542, i64 1
+  %tmp8544 = getelementptr inbounds float* %tmp8543, i64 1
+  %tmp8545 = getelementptr inbounds float* %tmp8544, i64 1
+  %tmp8546 = getelementptr inbounds float* %tmp8545, i64 1
+  %tmp8547 = getelementptr inbounds float* %tmp8546, i64 1
+  %tmp8548 = getelementptr inbounds float* %tmp8547, i64 1
+  %tmp8549 = getelementptr inbounds float* %tmp8548, i64 1
+  %tmp8550 = getelementptr inbounds float* %tmp8549, i64 1
+  %tmp8551 = getelementptr inbounds float* %tmp8550, i64 1
+  %tmp8552 = getelementptr inbounds float* %tmp8551, i64 1
+  %tmp8553 = getelementptr inbounds float* %tmp8552, i64 1
+  %tmp8554 = getelementptr inbounds float* %tmp8553, i64 1
+  %tmp8555 = getelementptr inbounds float* %tmp8554, i64 1
+  %tmp8556 = getelementptr inbounds float* %tmp8555, i64 1
+  %tmp8557 = getelementptr inbounds float* %tmp8556, i64 1
+  %tmp8558 = getelementptr inbounds float* %tmp8557, i64 1
+  %tmp8559 = getelementptr inbounds float* %tmp8558, i64 1
+  %tmp8560 = getelementptr inbounds float* %tmp8559, i64 1
+  %tmp8561 = getelementptr inbounds float* %tmp8560, i64 1
+  %tmp8562 = getelementptr inbounds float* %tmp8561, i64 1
+  %tmp8563 = getelementptr inbounds float* %tmp8562, i64 1
+  %tmp8564 = getelementptr inbounds float* %tmp8563, i64 1
+  %tmp8565 = getelementptr inbounds float* %tmp8564, i64 1
+  %tmp8566 = getelementptr inbounds float* %tmp8565, i64 1
+  %tmp8567 = getelementptr inbounds float* %tmp8566, i64 1
+  %tmp8568 = getelementptr inbounds float* %tmp8567, i64 1
+  %tmp8569 = getelementptr inbounds float* %tmp8568, i64 1
+  %tmp8570 = getelementptr inbounds float* %tmp8569, i64 1
+  %tmp8571 = getelementptr inbounds float* %tmp8570, i64 1
+  %tmp8572 = getelementptr inbounds float* %tmp8571, i64 1
+  %tmp8573 = getelementptr inbounds float* %tmp8572, i64 1
+  %tmp8574 = getelementptr inbounds float* %tmp8573, i64 1
+  %tmp8575 = getelementptr inbounds float* %tmp8574, i64 1
+  %tmp8576 = getelementptr inbounds float* %tmp8575, i64 1
+  %tmp8577 = getelementptr inbounds float* %tmp8576, i64 1
+  %tmp8578 = getelementptr inbounds float* %tmp8577, i64 1
+  %tmp8579 = getelementptr inbounds float* %tmp8578, i64 1
+  %tmp8580 = getelementptr inbounds float* %tmp8579, i64 1
+  %tmp8581 = getelementptr inbounds float* %tmp8580, i64 1
+  %tmp8582 = getelementptr inbounds float* %tmp8581, i64 1
+  %tmp8583 = getelementptr inbounds float* %tmp8582, i64 1
+  %tmp8584 = getelementptr inbounds float* %tmp8583, i64 1
+  %tmp8585 = getelementptr inbounds float* %tmp8584, i64 1
+  %tmp8586 = getelementptr inbounds float* %tmp8585, i64 1
+  %tmp8587 = getelementptr inbounds float* %tmp8586, i64 1
+  %tmp8588 = getelementptr inbounds float* %tmp8587, i64 1
+  %tmp8589 = getelementptr inbounds float* %tmp8588, i64 1
+  %tmp8590 = getelementptr inbounds float* %tmp8589, i64 1
+  %tmp8591 = getelementptr inbounds float* %tmp8590, i64 1
+  %tmp8592 = getelementptr inbounds float* %tmp8591, i64 1
+  %tmp8593 = getelementptr inbounds float* %tmp8592, i64 1
+  %tmp8594 = getelementptr inbounds float* %tmp8593, i64 1
+  %tmp8595 = getelementptr inbounds float* %tmp8594, i64 1
+  %tmp8596 = getelementptr inbounds float* %tmp8595, i64 1
+  %tmp8597 = getelementptr inbounds float* %tmp8596, i64 1
+  %tmp8598 = getelementptr inbounds float* %tmp8597, i64 1
+  %tmp8599 = getelementptr inbounds float* %tmp8598, i64 1
+  %tmp8600 = getelementptr inbounds float* %tmp8599, i64 1
+  %tmp8601 = getelementptr inbounds float* %tmp8600, i64 1
+  %tmp8602 = getelementptr inbounds float* %tmp8601, i64 1
+  %tmp8603 = getelementptr inbounds float* %tmp8602, i64 1
+  %tmp8604 = getelementptr inbounds float* %tmp8603, i64 1
+  %tmp8605 = getelementptr inbounds float* %tmp8604, i64 1
+  %tmp8606 = getelementptr inbounds float* %tmp8605, i64 1
+  %tmp8607 = getelementptr inbounds float* %tmp8606, i64 1
+  %tmp8608 = getelementptr inbounds float* %tmp8607, i64 1
+  %tmp8609 = getelementptr inbounds float* %tmp8608, i64 1
+  %tmp8610 = getelementptr inbounds float* %tmp8609, i64 1
+  %tmp8611 = getelementptr inbounds float* %tmp8610, i64 1
+  %tmp8612 = getelementptr inbounds float* %tmp8611, i64 1
+  %tmp8613 = getelementptr inbounds float* %tmp8612, i64 1
+  %tmp8614 = getelementptr inbounds float* %tmp8613, i64 1
+  %tmp8615 = getelementptr inbounds float* %tmp8614, i64 1
+  %tmp8616 = getelementptr inbounds float* %tmp8615, i64 1
+  %tmp8617 = getelementptr inbounds float* %tmp8616, i64 1
+  %tmp8618 = getelementptr inbounds float* %tmp8617, i64 1
+  %tmp8619 = getelementptr inbounds float* %tmp8618, i64 1
+  %tmp8620 = getelementptr inbounds float* %tmp8619, i64 1
+  %tmp8621 = getelementptr inbounds float* %tmp8620, i64 1
+  %tmp8622 = getelementptr inbounds float* %tmp8621, i64 1
+  %tmp8623 = getelementptr inbounds float* %tmp8622, i64 1
+  %tmp8624 = getelementptr inbounds float* %tmp8623, i64 1
+  %tmp8625 = getelementptr inbounds float* %tmp8624, i64 1
+  %tmp8626 = getelementptr inbounds float* %tmp8625, i64 1
+  %tmp8627 = getelementptr inbounds float* %tmp8626, i64 1
+  %tmp8628 = getelementptr inbounds float* %tmp8627, i64 1
+  %tmp8629 = getelementptr inbounds float* %tmp8628, i64 1
+  %tmp8630 = getelementptr inbounds float* %tmp8629, i64 1
+  %tmp8631 = getelementptr inbounds float* %tmp8630, i64 1
+  %tmp8632 = getelementptr inbounds float* %tmp8631, i64 1
+  %tmp8633 = getelementptr inbounds float* %tmp8632, i64 1
+  %tmp8634 = getelementptr inbounds float* %tmp8633, i64 1
+  %tmp8635 = getelementptr inbounds float* %tmp8634, i64 1
+  %tmp8636 = getelementptr inbounds float* %tmp8635, i64 1
+  %tmp8637 = getelementptr inbounds float* %tmp8636, i64 1
+  %tmp8638 = getelementptr inbounds float* %tmp8637, i64 1
+  %tmp8639 = getelementptr inbounds float* %tmp8638, i64 1
+  %tmp8640 = getelementptr inbounds float* %tmp8639, i64 1
+  %tmp8641 = getelementptr inbounds float* %tmp8640, i64 1
+  %tmp8642 = getelementptr inbounds float* %tmp8641, i64 1
+  %tmp8643 = getelementptr inbounds float* %tmp8642, i64 1
+  %tmp8644 = getelementptr inbounds float* %tmp8643, i64 1
+  %tmp8645 = getelementptr inbounds float* %tmp8644, i64 1
+  %tmp8646 = getelementptr inbounds float* %tmp8645, i64 1
+  %tmp8647 = getelementptr inbounds float* %tmp8646, i64 1
+  %tmp8648 = getelementptr inbounds float* %tmp8647, i64 1
+  %tmp8649 = getelementptr inbounds float* %tmp8648, i64 1
+  %tmp8650 = getelementptr inbounds float* %tmp8649, i64 1
+  %tmp8651 = getelementptr inbounds float* %tmp8650, i64 1
+  %tmp8652 = getelementptr inbounds float* %tmp8651, i64 1
+  %tmp8653 = getelementptr inbounds float* %tmp8652, i64 1
+  %tmp8654 = getelementptr inbounds float* %tmp8653, i64 1
+  %tmp8655 = getelementptr inbounds float* %tmp8654, i64 1
+  %tmp8656 = getelementptr inbounds float* %tmp8655, i64 1
+  %tmp8657 = getelementptr inbounds float* %tmp8656, i64 1
+  %tmp8658 = getelementptr inbounds float* %tmp8657, i64 1
+  %tmp8659 = getelementptr inbounds float* %tmp8658, i64 1
+  %tmp8660 = getelementptr inbounds float* %tmp8659, i64 1
+  %tmp8661 = getelementptr inbounds float* %tmp8660, i64 1
+  %tmp8662 = getelementptr inbounds float* %tmp8661, i64 1
+  %tmp8663 = getelementptr inbounds float* %tmp8662, i64 1
+  %tmp8664 = getelementptr inbounds float* %tmp8663, i64 1
+  %tmp8665 = getelementptr inbounds float* %tmp8664, i64 1
+  %tmp8666 = getelementptr inbounds float* %tmp8665, i64 1
+  %tmp8667 = getelementptr inbounds float* %tmp8666, i64 1
+  %tmp8668 = getelementptr inbounds float* %tmp8667, i64 1
+  %tmp8669 = getelementptr inbounds float* %tmp8668, i64 1
+  %tmp8670 = getelementptr inbounds float* %tmp8669, i64 1
+  %tmp8671 = getelementptr inbounds float* %tmp8670, i64 1
+  %tmp8672 = getelementptr inbounds float* %tmp8671, i64 1
+  %tmp8673 = getelementptr inbounds float* %tmp8672, i64 1
+  %tmp8674 = getelementptr inbounds float* %tmp8673, i64 1
+  %tmp8675 = getelementptr inbounds float* %tmp8674, i64 1
+  %tmp8676 = getelementptr inbounds float* %tmp8675, i64 1
+  %tmp8677 = getelementptr inbounds float* %tmp8676, i64 1
+  %tmp8678 = getelementptr inbounds float* %tmp8677, i64 1
+  %tmp8679 = getelementptr inbounds float* %tmp8678, i64 1
+  %tmp8680 = getelementptr inbounds float* %tmp8679, i64 1
+  %tmp8681 = getelementptr inbounds float* %tmp8680, i64 1
+  %tmp8682 = getelementptr inbounds float* %tmp8681, i64 1
+  %tmp8683 = getelementptr inbounds float* %tmp8682, i64 1
+  %tmp8684 = getelementptr inbounds float* %tmp8683, i64 1
+  %tmp8685 = getelementptr inbounds float* %tmp8684, i64 1
+  %tmp8686 = getelementptr inbounds float* %tmp8685, i64 1
+  %tmp8687 = getelementptr inbounds float* %tmp8686, i64 1
+  %tmp8688 = getelementptr inbounds float* %tmp8687, i64 1
+  %tmp8689 = getelementptr inbounds float* %tmp8688, i64 1
+  %tmp8690 = getelementptr inbounds float* %tmp8689, i64 1
+  %tmp8691 = getelementptr inbounds float* %tmp8690, i64 1
+  %tmp8692 = getelementptr inbounds float* %tmp8691, i64 1
+  %tmp8693 = getelementptr inbounds float* %tmp8692, i64 1
+  %tmp8694 = getelementptr inbounds float* %tmp8693, i64 1
+  %tmp8695 = getelementptr inbounds float* %tmp8694, i64 1
+  %tmp8696 = getelementptr inbounds float* %tmp8695, i64 1
+  %tmp8697 = getelementptr inbounds float* %tmp8696, i64 1
+  %tmp8698 = getelementptr inbounds float* %tmp8697, i64 1
+  %tmp8699 = getelementptr inbounds float* %tmp8698, i64 1
+  %tmp8700 = getelementptr inbounds float* %tmp8699, i64 1
+  %tmp8701 = getelementptr inbounds float* %tmp8700, i64 1
+  %tmp8702 = getelementptr inbounds float* %tmp8701, i64 1
+  %tmp8703 = getelementptr inbounds float* %tmp8702, i64 1
+  %tmp8704 = getelementptr inbounds float* %tmp8703, i64 1
+  %tmp8705 = getelementptr inbounds float* %tmp8704, i64 1
+  %tmp8706 = getelementptr inbounds float* %tmp8705, i64 1
+  %tmp8707 = getelementptr inbounds float* %tmp8706, i64 1
+  %tmp8708 = getelementptr inbounds float* %tmp8707, i64 1
+  %tmp8709 = getelementptr inbounds float* %tmp8708, i64 1
+  %tmp8710 = getelementptr inbounds float* %tmp8709, i64 1
+  %tmp8711 = getelementptr inbounds float* %tmp8710, i64 1
+  %tmp8712 = getelementptr inbounds float* %tmp8711, i64 1
+  %tmp8713 = getelementptr inbounds float* %tmp8712, i64 1
+  %tmp8714 = getelementptr inbounds float* %tmp8713, i64 1
+  %tmp8715 = getelementptr inbounds float* %tmp8714, i64 1
+  %tmp8716 = getelementptr inbounds float* %tmp8715, i64 1
+  %tmp8717 = getelementptr inbounds float* %tmp8716, i64 1
+  %tmp8718 = getelementptr inbounds float* %tmp8717, i64 1
+  %tmp8719 = getelementptr inbounds float* %tmp8718, i64 1
+  %tmp8720 = getelementptr inbounds float* %tmp8719, i64 1
+  %tmp8721 = getelementptr inbounds float* %tmp8720, i64 1
+  %tmp8722 = getelementptr inbounds float* %tmp8721, i64 1
+  %tmp8723 = getelementptr inbounds float* %tmp8722, i64 1
+  %tmp8724 = getelementptr inbounds float* %tmp8723, i64 1
+  %tmp8725 = getelementptr inbounds float* %tmp8724, i64 1
+  %tmp8726 = getelementptr inbounds float* %tmp8725, i64 1
+  %tmp8727 = getelementptr inbounds float* %tmp8726, i64 1
+  %tmp8728 = getelementptr inbounds float* %tmp8727, i64 1
+  %tmp8729 = getelementptr inbounds float* %tmp8728, i64 1
+  %tmp8730 = getelementptr inbounds float* %tmp8729, i64 1
+  %tmp8731 = getelementptr inbounds float* %tmp8730, i64 1
+  %tmp8732 = getelementptr inbounds float* %tmp8731, i64 1
+  %tmp8733 = getelementptr inbounds float* %tmp8732, i64 1
+  %tmp8734 = getelementptr inbounds float* %tmp8733, i64 1
+  %tmp8735 = getelementptr inbounds float* %tmp8734, i64 1
+  %tmp8736 = getelementptr inbounds float* %tmp8735, i64 1
+  %tmp8737 = getelementptr inbounds float* %tmp8736, i64 1
+  %tmp8738 = getelementptr inbounds float* %tmp8737, i64 1
+  %tmp8739 = getelementptr inbounds float* %tmp8738, i64 1
+  %tmp8740 = getelementptr inbounds float* %tmp8739, i64 1
+  %tmp8741 = getelementptr inbounds float* %tmp8740, i64 1
+  %tmp8742 = getelementptr inbounds float* %tmp8741, i64 1
+  %tmp8743 = getelementptr inbounds float* %tmp8742, i64 1
+  %tmp8744 = getelementptr inbounds float* %tmp8743, i64 1
+  %tmp8745 = getelementptr inbounds float* %tmp8744, i64 1
+  %tmp8746 = getelementptr inbounds float* %tmp8745, i64 1
+  %tmp8747 = getelementptr inbounds float* %tmp8746, i64 1
+  %tmp8748 = getelementptr inbounds float* %tmp8747, i64 1
+  %tmp8749 = getelementptr inbounds float* %tmp8748, i64 1
+  %tmp8750 = getelementptr inbounds float* %tmp8749, i64 1
+  %tmp8751 = getelementptr inbounds float* %tmp8750, i64 1
+  %tmp8752 = getelementptr inbounds float* %tmp8751, i64 1
+  %tmp8753 = getelementptr inbounds float* %tmp8752, i64 1
+  %tmp8754 = getelementptr inbounds float* %tmp8753, i64 1
+  %tmp8755 = getelementptr inbounds float* %tmp8754, i64 1
+  %tmp8756 = getelementptr inbounds float* %tmp8755, i64 1
+  %tmp8757 = getelementptr inbounds float* %tmp8756, i64 1
+  %tmp8758 = getelementptr inbounds float* %tmp8757, i64 1
+  %tmp8759 = getelementptr inbounds float* %tmp8758, i64 1
+  %tmp8760 = getelementptr inbounds float* %tmp8759, i64 1
+  %tmp8761 = getelementptr inbounds float* %tmp8760, i64 1
+  %tmp8762 = getelementptr inbounds float* %tmp8761, i64 1
+  %tmp8763 = getelementptr inbounds float* %tmp8762, i64 1
+  %tmp8764 = getelementptr inbounds float* %tmp8763, i64 1
+  %tmp8765 = getelementptr inbounds float* %tmp8764, i64 1
+  %tmp8766 = getelementptr inbounds float* %tmp8765, i64 1
+  %tmp8767 = getelementptr inbounds float* %tmp8766, i64 1
+  %tmp8768 = getelementptr inbounds float* %tmp8767, i64 1
+  %tmp8769 = getelementptr inbounds float* %tmp8768, i64 1
+  %tmp8770 = getelementptr inbounds float* %tmp8769, i64 1
+  %tmp8771 = getelementptr inbounds float* %tmp8770, i64 1
+  %tmp8772 = getelementptr inbounds float* %tmp8771, i64 1
+  %tmp8773 = getelementptr inbounds float* %tmp8772, i64 1
+  %tmp8774 = getelementptr inbounds float* %tmp8773, i64 1
+  %tmp8775 = getelementptr inbounds float* %tmp8774, i64 1
+  %tmp8776 = getelementptr inbounds float* %tmp8775, i64 1
+  %tmp8777 = getelementptr inbounds float* %tmp8776, i64 1
+  %tmp8778 = getelementptr inbounds float* %tmp8777, i64 1
+  %tmp8779 = getelementptr inbounds float* %tmp8778, i64 1
+  %tmp8780 = getelementptr inbounds float* %tmp8779, i64 1
+  %tmp8781 = getelementptr inbounds float* %tmp8780, i64 1
+  %tmp8782 = getelementptr inbounds float* %tmp8781, i64 1
+  %tmp8783 = getelementptr inbounds float* %tmp8782, i64 1
+  %tmp8784 = getelementptr inbounds float* %tmp8783, i64 1
+  %tmp8785 = getelementptr inbounds float* %tmp8784, i64 1
+  %tmp8786 = getelementptr inbounds float* %tmp8785, i64 1
+  %tmp8787 = getelementptr inbounds float* %tmp8786, i64 1
+  %tmp8788 = getelementptr inbounds float* %tmp8787, i64 1
+  %tmp8789 = getelementptr inbounds float* %tmp8788, i64 1
+  %tmp8790 = getelementptr inbounds float* %tmp8789, i64 1
+  %tmp8791 = getelementptr inbounds float* %tmp8790, i64 1
+  %tmp8792 = getelementptr inbounds float* %tmp8791, i64 1
+  %tmp8793 = getelementptr inbounds float* %tmp8792, i64 1
+  %tmp8794 = getelementptr inbounds float* %tmp8793, i64 1
+  %tmp8795 = getelementptr inbounds float* %tmp8794, i64 1
+  %tmp8796 = getelementptr inbounds float* %tmp8795, i64 1
+  %tmp8797 = getelementptr inbounds float* %tmp8796, i64 1
+  %tmp8798 = getelementptr inbounds float* %tmp8797, i64 1
+  %tmp8799 = getelementptr inbounds float* %tmp8798, i64 1
+  %tmp8800 = getelementptr inbounds float* %tmp8799, i64 1
+  %tmp8801 = getelementptr inbounds float* %tmp8800, i64 1
+  %tmp8802 = getelementptr inbounds float* %tmp8801, i64 1
+  %tmp8803 = getelementptr inbounds float* %tmp8802, i64 1
+  %tmp8804 = getelementptr inbounds float* %tmp8803, i64 1
+  %tmp8805 = getelementptr inbounds float* %tmp8804, i64 1
+  %tmp8806 = getelementptr inbounds float* %tmp8805, i64 1
+  %tmp8807 = getelementptr inbounds float* %tmp8806, i64 1
+  %tmp8808 = getelementptr inbounds float* %tmp8807, i64 1
+  %tmp8809 = getelementptr inbounds float* %tmp8808, i64 1
+  %tmp8810 = getelementptr inbounds float* %tmp8809, i64 1
+  %tmp8811 = getelementptr inbounds float* %tmp8810, i64 1
+  %tmp8812 = getelementptr inbounds float* %tmp8811, i64 1
+  %tmp8813 = getelementptr inbounds float* %tmp8812, i64 1
+  %tmp8814 = getelementptr inbounds float* %tmp8813, i64 1
+  %tmp8815 = getelementptr inbounds float* %tmp8814, i64 1
+  %tmp8816 = getelementptr inbounds float* %tmp8815, i64 1
+  %tmp8817 = getelementptr inbounds float* %tmp8816, i64 1
+  %tmp8818 = getelementptr inbounds float* %tmp8817, i64 1
+  %tmp8819 = getelementptr inbounds float* %tmp8818, i64 1
+  %tmp8820 = getelementptr inbounds float* %tmp8819, i64 1
+  %tmp8821 = getelementptr inbounds float* %tmp8820, i64 1
+  %tmp8822 = getelementptr inbounds float* %tmp8821, i64 1
+  %tmp8823 = getelementptr inbounds float* %tmp8822, i64 1
+  %tmp8824 = getelementptr inbounds float* %tmp8823, i64 1
+  %tmp8825 = getelementptr inbounds float* %tmp8824, i64 1
+  %tmp8826 = getelementptr inbounds float* %tmp8825, i64 1
+  %tmp8827 = getelementptr inbounds float* %tmp8826, i64 1
+  %tmp8828 = getelementptr inbounds float* %tmp8827, i64 1
+  %tmp8829 = getelementptr inbounds float* %tmp8828, i64 1
+  %tmp8830 = getelementptr inbounds float* %tmp8829, i64 1
+  %tmp8831 = getelementptr inbounds float* %tmp8830, i64 1
+  %tmp8832 = getelementptr inbounds float* %tmp8831, i64 1
+  %tmp8833 = getelementptr inbounds float* %tmp8832, i64 1
+  %tmp8834 = getelementptr inbounds float* %tmp8833, i64 1
+  %tmp8835 = getelementptr inbounds float* %tmp8834, i64 1
+  %tmp8836 = getelementptr inbounds float* %tmp8835, i64 1
+  %tmp8837 = getelementptr inbounds float* %tmp8836, i64 1
+  %tmp8838 = getelementptr inbounds float* %tmp8837, i64 1
+  %tmp8839 = getelementptr inbounds float* %tmp8838, i64 1
+  %tmp8840 = getelementptr inbounds float* %tmp8839, i64 1
+  %tmp8841 = getelementptr inbounds float* %tmp8840, i64 1
+  %tmp8842 = getelementptr inbounds float* %tmp8841, i64 1
+  %tmp8843 = getelementptr inbounds float* %tmp8842, i64 1
+  %tmp8844 = getelementptr inbounds float* %tmp8843, i64 1
+  %tmp8845 = getelementptr inbounds float* %tmp8844, i64 1
+  %tmp8846 = getelementptr inbounds float* %tmp8845, i64 1
+  %tmp8847 = getelementptr inbounds float* %tmp8846, i64 1
+  %tmp8848 = getelementptr inbounds float* %tmp8847, i64 1
+  %tmp8849 = getelementptr inbounds float* %tmp8848, i64 1
+  %tmp8850 = getelementptr inbounds float* %tmp8849, i64 1
+  %tmp8851 = getelementptr inbounds float* %tmp8850, i64 1
+  %tmp8852 = getelementptr inbounds float* %tmp8851, i64 1
+  %tmp8853 = getelementptr inbounds float* %tmp8852, i64 1
+  %tmp8854 = getelementptr inbounds float* %tmp8853, i64 1
+  %tmp8855 = getelementptr inbounds float* %tmp8854, i64 1
+  %tmp8856 = getelementptr inbounds float* %tmp8855, i64 1
+  %tmp8857 = getelementptr inbounds float* %tmp8856, i64 1
+  %tmp8858 = getelementptr inbounds float* %tmp8857, i64 1
+  %tmp8859 = getelementptr inbounds float* %tmp8858, i64 1
+  %tmp8860 = getelementptr inbounds float* %tmp8859, i64 1
+  %tmp8861 = getelementptr inbounds float* %tmp8860, i64 1
+  %tmp8862 = getelementptr inbounds float* %tmp8861, i64 1
+  %tmp8863 = getelementptr inbounds float* %tmp8862, i64 1
+  %tmp8864 = getelementptr inbounds float* %tmp8863, i64 1
+  %tmp8865 = getelementptr inbounds float* %tmp8864, i64 1
+  %tmp8866 = getelementptr inbounds float* %tmp8865, i64 1
+  %tmp8867 = getelementptr inbounds float* %tmp8866, i64 1
+  %tmp8868 = getelementptr inbounds float* %tmp8867, i64 1
+  %tmp8869 = getelementptr inbounds float* %tmp8868, i64 1
+  %tmp8870 = getelementptr inbounds float* %tmp8869, i64 1
+  %tmp8871 = getelementptr inbounds float* %tmp8870, i64 1
+  %tmp8872 = getelementptr inbounds float* %tmp8871, i64 1
+  %tmp8873 = getelementptr inbounds float* %tmp8872, i64 1
+  %tmp8874 = getelementptr inbounds float* %tmp8873, i64 1
+  %tmp8875 = getelementptr inbounds float* %tmp8874, i64 1
+  %tmp8876 = getelementptr inbounds float* %tmp8875, i64 1
+  %tmp8877 = getelementptr inbounds float* %tmp8876, i64 1
+  %tmp8878 = getelementptr inbounds float* %tmp8877, i64 1
+  %tmp8879 = getelementptr inbounds float* %tmp8878, i64 1
+  %tmp8880 = getelementptr inbounds float* %tmp8879, i64 1
+  %tmp8881 = getelementptr inbounds float* %tmp8880, i64 1
+  %tmp8882 = getelementptr inbounds float* %tmp8881, i64 1
+  %tmp8883 = getelementptr inbounds float* %tmp8882, i64 1
+  %tmp8884 = getelementptr inbounds float* %tmp8883, i64 1
+  %tmp8885 = getelementptr inbounds float* %tmp8884, i64 1
+  %tmp8886 = getelementptr inbounds float* %tmp8885, i64 1
+  %tmp8887 = getelementptr inbounds float* %tmp8886, i64 1
+  %tmp8888 = getelementptr inbounds float* %tmp8887, i64 1
+  %tmp8889 = getelementptr inbounds float* %tmp8888, i64 1
+  %tmp8890 = getelementptr inbounds float* %tmp8889, i64 1
+  %tmp8891 = getelementptr inbounds float* %tmp8890, i64 1
+  %tmp8892 = getelementptr inbounds float* %tmp8891, i64 1
+  %tmp8893 = getelementptr inbounds float* %tmp8892, i64 1
+  %tmp8894 = getelementptr inbounds float* %tmp8893, i64 1
+  %tmp8895 = getelementptr inbounds float* %tmp8894, i64 1
+  %tmp8896 = getelementptr inbounds float* %tmp8895, i64 1
+  %tmp8897 = getelementptr inbounds float* %tmp8896, i64 1
+  %tmp8898 = getelementptr inbounds float* %tmp8897, i64 1
+  %tmp8899 = getelementptr inbounds float* %tmp8898, i64 1
+  %tmp8900 = getelementptr inbounds float* %tmp8899, i64 1
+  %tmp8901 = getelementptr inbounds float* %tmp8900, i64 1
+  %tmp8902 = getelementptr inbounds float* %tmp8901, i64 1
+  %tmp8903 = getelementptr inbounds float* %tmp8902, i64 1
+  %tmp8904 = getelementptr inbounds float* %tmp8903, i64 1
+  %tmp8905 = getelementptr inbounds float* %tmp8904, i64 1
+  %tmp8906 = getelementptr inbounds float* %tmp8905, i64 1
+  %tmp8907 = getelementptr inbounds float* %tmp8906, i64 1
+  %tmp8908 = getelementptr inbounds float* %tmp8907, i64 1
+  %tmp8909 = getelementptr inbounds float* %tmp8908, i64 1
+  %tmp8910 = getelementptr inbounds float* %tmp8909, i64 1
+  %tmp8911 = getelementptr inbounds float* %tmp8910, i64 1
+  %tmp8912 = getelementptr inbounds float* %tmp8911, i64 1
+  %tmp8913 = getelementptr inbounds float* %tmp8912, i64 1
+  %tmp8914 = getelementptr inbounds float* %tmp8913, i64 1
+  %tmp8915 = getelementptr inbounds float* %tmp8914, i64 1
+  %tmp8916 = getelementptr inbounds float* %tmp8915, i64 1
+  %tmp8917 = getelementptr inbounds float* %tmp8916, i64 1
+  %tmp8918 = getelementptr inbounds float* %tmp8917, i64 1
+  %tmp8919 = getelementptr inbounds float* %tmp8918, i64 1
+  %tmp8920 = getelementptr inbounds float* %tmp8919, i64 1
+  %tmp8921 = getelementptr inbounds float* %tmp8920, i64 1
+  %tmp8922 = getelementptr inbounds float* %tmp8921, i64 1
+  %tmp8923 = getelementptr inbounds float* %tmp8922, i64 1
+  %tmp8924 = getelementptr inbounds float* %tmp8923, i64 1
+  %tmp8925 = getelementptr inbounds float* %tmp8924, i64 1
+  %tmp8926 = getelementptr inbounds float* %tmp8925, i64 1
+  %tmp8927 = getelementptr inbounds float* %tmp8926, i64 1
+  %tmp8928 = getelementptr inbounds float* %tmp8927, i64 1
+  %tmp8929 = getelementptr inbounds float* %tmp8928, i64 1
+  %tmp8930 = getelementptr inbounds float* %tmp8929, i64 1
+  %tmp8931 = getelementptr inbounds float* %tmp8930, i64 1
+  %tmp8932 = getelementptr inbounds float* %tmp8931, i64 1
+  %tmp8933 = getelementptr inbounds float* %tmp8932, i64 1
+  %tmp8934 = getelementptr inbounds float* %tmp8933, i64 1
+  %tmp8935 = getelementptr inbounds float* %tmp8934, i64 1
+  %tmp8936 = getelementptr inbounds float* %tmp8935, i64 1
+  %tmp8937 = getelementptr inbounds float* %tmp8936, i64 1
+  %tmp8938 = getelementptr inbounds float* %tmp8937, i64 1
+  %tmp8939 = getelementptr inbounds float* %tmp8938, i64 1
+  %tmp8940 = getelementptr inbounds float* %tmp8939, i64 1
+  %tmp8941 = getelementptr inbounds float* %tmp8940, i64 1
+  %tmp8942 = getelementptr inbounds float* %tmp8941, i64 1
+  %tmp8943 = getelementptr inbounds float* %tmp8942, i64 1
+  %tmp8944 = getelementptr inbounds float* %tmp8943, i64 1
+  %tmp8945 = getelementptr inbounds float* %tmp8944, i64 1
+  %tmp8946 = getelementptr inbounds float* %tmp8945, i64 1
+  %tmp8947 = getelementptr inbounds float* %tmp8946, i64 1
+  %tmp8948 = getelementptr inbounds float* %tmp8947, i64 1
+  %tmp8949 = getelementptr inbounds float* %tmp8948, i64 1
+  %tmp8950 = getelementptr inbounds float* %tmp8949, i64 1
+  %tmp8951 = getelementptr inbounds float* %tmp8950, i64 1
+  %tmp8952 = getelementptr inbounds float* %tmp8951, i64 1
+  %tmp8953 = getelementptr inbounds float* %tmp8952, i64 1
+  %tmp8954 = getelementptr inbounds float* %tmp8953, i64 1
+  %tmp8955 = getelementptr inbounds float* %tmp8954, i64 1
+  %tmp8956 = getelementptr inbounds float* %tmp8955, i64 1
+  %tmp8957 = getelementptr inbounds float* %tmp8956, i64 1
+  %tmp8958 = getelementptr inbounds float* %tmp8957, i64 1
+  %tmp8959 = getelementptr inbounds float* %tmp8958, i64 1
+  %tmp8960 = getelementptr inbounds float* %tmp8959, i64 1
+  %tmp8961 = getelementptr inbounds float* %tmp8960, i64 1
+  %tmp8962 = getelementptr inbounds float* %tmp8961, i64 1
+  %tmp8963 = getelementptr inbounds float* %tmp8962, i64 1
+  %tmp8964 = getelementptr inbounds float* %tmp8963, i64 1
+  %tmp8965 = getelementptr inbounds float* %tmp8964, i64 1
+  %tmp8966 = getelementptr inbounds float* %tmp8965, i64 1
+  %tmp8967 = getelementptr inbounds float* %tmp8966, i64 1
+  %tmp8968 = getelementptr inbounds float* %tmp8967, i64 1
+  %tmp8969 = getelementptr inbounds float* %tmp8968, i64 1
+  %tmp8970 = getelementptr inbounds float* %tmp8969, i64 1
+  %tmp8971 = getelementptr inbounds float* %tmp8970, i64 1
+  %tmp8972 = getelementptr inbounds float* %tmp8971, i64 1
+  %tmp8973 = getelementptr inbounds float* %tmp8972, i64 1
+  %tmp8974 = getelementptr inbounds float* %tmp8973, i64 1
+  %tmp8975 = getelementptr inbounds float* %tmp8974, i64 1
+  %tmp8976 = getelementptr inbounds float* %tmp8975, i64 1
+  %tmp8977 = getelementptr inbounds float* %tmp8976, i64 1
+  %tmp8978 = getelementptr inbounds float* %tmp8977, i64 1
+  %tmp8979 = getelementptr inbounds float* %tmp8978, i64 1
+  %tmp8980 = getelementptr inbounds float* %tmp8979, i64 1
+  %tmp8981 = getelementptr inbounds float* %tmp8980, i64 1
+  %tmp8982 = getelementptr inbounds float* %tmp8981, i64 1
+  %tmp8983 = getelementptr inbounds float* %tmp8982, i64 1
+  %tmp8984 = getelementptr inbounds float* %tmp8983, i64 1
+  %tmp8985 = getelementptr inbounds float* %tmp8984, i64 1
+  %tmp8986 = getelementptr inbounds float* %tmp8985, i64 1
+  %tmp8987 = getelementptr inbounds float* %tmp8986, i64 1
+  %tmp8988 = getelementptr inbounds float* %tmp8987, i64 1
+  %tmp8989 = getelementptr inbounds float* %tmp8988, i64 1
+  %tmp8990 = getelementptr inbounds float* %tmp8989, i64 1
+  %tmp8991 = getelementptr inbounds float* %tmp8990, i64 1
+  %tmp8992 = getelementptr inbounds float* %tmp8991, i64 1
+  %tmp8993 = getelementptr inbounds float* %tmp8992, i64 1
+  %tmp8994 = getelementptr inbounds float* %tmp8993, i64 1
+  %tmp8995 = getelementptr inbounds float* %tmp8994, i64 1
+  %tmp8996 = getelementptr inbounds float* %tmp8995, i64 1
+  %tmp8997 = getelementptr inbounds float* %tmp8996, i64 1
+  %tmp8998 = getelementptr inbounds float* %tmp8997, i64 1
+  %tmp8999 = getelementptr inbounds float* %tmp8998, i64 1
+  %tmp9000 = getelementptr inbounds float* %tmp8999, i64 1
+  %tmp9001 = getelementptr inbounds float* %tmp9000, i64 1
+  %tmp9002 = getelementptr inbounds float* %tmp9001, i64 1
+  %tmp9003 = getelementptr inbounds float* %tmp9002, i64 1
+  %tmp9004 = getelementptr inbounds float* %tmp9003, i64 1
+  %tmp9005 = getelementptr inbounds float* %tmp9004, i64 1
+  %tmp9006 = getelementptr inbounds float* %tmp9005, i64 1
+  %tmp9007 = getelementptr inbounds float* %tmp9006, i64 1
+  %tmp9008 = getelementptr inbounds float* %tmp9007, i64 1
+  %tmp9009 = getelementptr inbounds float* %tmp9008, i64 1
+  %tmp9010 = getelementptr inbounds float* %tmp9009, i64 1
+  %tmp9011 = getelementptr inbounds float* %tmp9010, i64 1
+  %tmp9012 = getelementptr inbounds float* %tmp9011, i64 1
+  %tmp9013 = getelementptr inbounds float* %tmp9012, i64 1
+  %tmp9014 = getelementptr inbounds float* %tmp9013, i64 1
+  %tmp9015 = getelementptr inbounds float* %tmp9014, i64 1
+  %tmp9016 = getelementptr inbounds float* %tmp9015, i64 1
+  %tmp9017 = getelementptr inbounds float* %tmp9016, i64 1
+  %tmp9018 = getelementptr inbounds float* %tmp9017, i64 1
+  %tmp9019 = getelementptr inbounds float* %tmp9018, i64 1
+  %tmp9020 = getelementptr inbounds float* %tmp9019, i64 1
+  %tmp9021 = getelementptr inbounds float* %tmp9020, i64 1
+  %tmp9022 = getelementptr inbounds float* %tmp9021, i64 1
+  %tmp9023 = getelementptr inbounds float* %tmp9022, i64 1
+  %tmp9024 = getelementptr inbounds float* %tmp9023, i64 1
+  %tmp9025 = getelementptr inbounds float* %tmp9024, i64 1
+  %tmp9026 = getelementptr inbounds float* %tmp9025, i64 1
+  %tmp9027 = getelementptr inbounds float* %tmp9026, i64 1
+  %tmp9028 = getelementptr inbounds float* %tmp9027, i64 1
+  %tmp9029 = getelementptr inbounds float* %tmp9028, i64 1
+  %tmp9030 = getelementptr inbounds float* %tmp9029, i64 1
+  %tmp9031 = getelementptr inbounds float* %tmp9030, i64 1
+  %tmp9032 = getelementptr inbounds float* %tmp9031, i64 1
+  %tmp9033 = getelementptr inbounds float* %tmp9032, i64 1
+  %tmp9034 = getelementptr inbounds float* %tmp9033, i64 1
+  %tmp9035 = getelementptr inbounds float* %tmp9034, i64 1
+  %tmp9036 = getelementptr inbounds float* %tmp9035, i64 1
+  %tmp9037 = getelementptr inbounds float* %tmp9036, i64 1
+  %tmp9038 = getelementptr inbounds float* %tmp9037, i64 1
+  %tmp9039 = getelementptr inbounds float* %tmp9038, i64 1
+  %tmp9040 = getelementptr inbounds float* %tmp9039, i64 1
+  %tmp9041 = getelementptr inbounds float* %tmp9040, i64 1
+  %tmp9042 = getelementptr inbounds float* %tmp9041, i64 1
+  %tmp9043 = getelementptr inbounds float* %tmp9042, i64 1
+  %tmp9044 = getelementptr inbounds float* %tmp9043, i64 1
+  %tmp9045 = getelementptr inbounds float* %tmp9044, i64 1
+  %tmp9046 = getelementptr inbounds float* %tmp9045, i64 1
+  %tmp9047 = getelementptr inbounds float* %tmp9046, i64 1
+  %tmp9048 = getelementptr inbounds float* %tmp9047, i64 1
+  %tmp9049 = getelementptr inbounds float* %tmp9048, i64 1
+  %tmp9050 = getelementptr inbounds float* %tmp9049, i64 1
+  %tmp9051 = getelementptr inbounds float* %tmp9050, i64 1
+  %tmp9052 = getelementptr inbounds float* %tmp9051, i64 1
+  %tmp9053 = getelementptr inbounds float* %tmp9052, i64 1
+  %tmp9054 = getelementptr inbounds float* %tmp9053, i64 1
+  %tmp9055 = getelementptr inbounds float* %tmp9054, i64 1
+  %tmp9056 = getelementptr inbounds float* %tmp9055, i64 1
+  %tmp9057 = getelementptr inbounds float* %tmp9056, i64 1
+  %tmp9058 = getelementptr inbounds float* %tmp9057, i64 1
+  %tmp9059 = getelementptr inbounds float* %tmp9058, i64 1
+  %tmp9060 = getelementptr inbounds float* %tmp9059, i64 1
+  %tmp9061 = getelementptr inbounds float* %tmp9060, i64 1
+  %tmp9062 = getelementptr inbounds float* %tmp9061, i64 1
+  %tmp9063 = getelementptr inbounds float* %tmp9062, i64 1
+  %tmp9064 = getelementptr inbounds float* %tmp9063, i64 1
+  %tmp9065 = getelementptr inbounds float* %tmp9064, i64 1
+  %tmp9066 = getelementptr inbounds float* %tmp9065, i64 1
+  %tmp9067 = getelementptr inbounds float* %tmp9066, i64 1
+  %tmp9068 = getelementptr inbounds float* %tmp9067, i64 1
+  %tmp9069 = getelementptr inbounds float* %tmp9068, i64 1
+  %tmp9070 = getelementptr inbounds float* %tmp9069, i64 1
+  %tmp9071 = getelementptr inbounds float* %tmp9070, i64 1
+  %tmp9072 = getelementptr inbounds float* %tmp9071, i64 1
+  %tmp9073 = getelementptr inbounds float* %tmp9072, i64 1
+  %tmp9074 = getelementptr inbounds float* %tmp9073, i64 1
+  %tmp9075 = getelementptr inbounds float* %tmp9074, i64 1
+  %tmp9076 = getelementptr inbounds float* %tmp9075, i64 1
+  %tmp9077 = getelementptr inbounds float* %tmp9076, i64 1
+  %tmp9078 = getelementptr inbounds float* %tmp9077, i64 1
+  %tmp9079 = getelementptr inbounds float* %tmp9078, i64 1
+  %tmp9080 = getelementptr inbounds float* %tmp9079, i64 1
+  %tmp9081 = getelementptr inbounds float* %tmp9080, i64 1
+  %tmp9082 = getelementptr inbounds float* %tmp9081, i64 1
+  %tmp9083 = getelementptr inbounds float* %tmp9082, i64 1
+  %tmp9084 = getelementptr inbounds float* %tmp9083, i64 1
+  %tmp9085 = getelementptr inbounds float* %tmp9084, i64 1
+  %tmp9086 = getelementptr inbounds float* %tmp9085, i64 1
+  %tmp9087 = getelementptr inbounds float* %tmp9086, i64 1
+  %tmp9088 = getelementptr inbounds float* %tmp9087, i64 1
+  %tmp9089 = getelementptr inbounds float* %tmp9088, i64 1
+  %tmp9090 = getelementptr inbounds float* %tmp9089, i64 1
+  %tmp9091 = getelementptr inbounds float* %tmp9090, i64 1
+  %tmp9092 = getelementptr inbounds float* %tmp9091, i64 1
+  %tmp9093 = getelementptr inbounds float* %tmp9092, i64 1
+  %tmp9094 = getelementptr inbounds float* %tmp9093, i64 1
+  %tmp9095 = getelementptr inbounds float* %tmp9094, i64 1
+  %tmp9096 = getelementptr inbounds float* %tmp9095, i64 1
+  %tmp9097 = getelementptr inbounds float* %tmp9096, i64 1
+  %tmp9098 = getelementptr inbounds float* %tmp9097, i64 1
+  %tmp9099 = getelementptr inbounds float* %tmp9098, i64 1
+  %tmp9100 = getelementptr inbounds float* %tmp9099, i64 1
+  %tmp9101 = getelementptr inbounds float* %tmp9100, i64 1
+  %tmp9102 = getelementptr inbounds float* %tmp9101, i64 1
+  %tmp9103 = getelementptr inbounds float* %tmp9102, i64 1
+  %tmp9104 = getelementptr inbounds float* %tmp9103, i64 1
+  %tmp9105 = getelementptr inbounds float* %tmp9104, i64 1
+  %tmp9106 = getelementptr inbounds float* %tmp9105, i64 1
+  %tmp9107 = getelementptr inbounds float* %tmp9106, i64 1
+  %tmp9108 = getelementptr inbounds float* %tmp9107, i64 1
+  %tmp9109 = getelementptr inbounds float* %tmp9108, i64 1
+  %tmp9110 = getelementptr inbounds float* %tmp9109, i64 1
+  %tmp9111 = getelementptr inbounds float* %tmp9110, i64 1
+  %tmp9112 = getelementptr inbounds float* %tmp9111, i64 1
+  %tmp9113 = getelementptr inbounds float* %tmp9112, i64 1
+  %tmp9114 = getelementptr inbounds float* %tmp9113, i64 1
+  %tmp9115 = getelementptr inbounds float* %tmp9114, i64 1
+  %tmp9116 = getelementptr inbounds float* %tmp9115, i64 1
+  %tmp9117 = getelementptr inbounds float* %tmp9116, i64 1
+  %tmp9118 = getelementptr inbounds float* %tmp9117, i64 1
+  %tmp9119 = getelementptr inbounds float* %tmp9118, i64 1
+  %tmp9120 = getelementptr inbounds float* %tmp9119, i64 1
+  %tmp9121 = getelementptr inbounds float* %tmp9120, i64 1
+  %tmp9122 = getelementptr inbounds float* %tmp9121, i64 1
+  %tmp9123 = getelementptr inbounds float* %tmp9122, i64 1
+  %tmp9124 = getelementptr inbounds float* %tmp9123, i64 1
+  %tmp9125 = getelementptr inbounds float* %tmp9124, i64 1
+  %tmp9126 = getelementptr inbounds float* %tmp9125, i64 1
+  %tmp9127 = getelementptr inbounds float* %tmp9126, i64 1
+  %tmp9128 = getelementptr inbounds float* %tmp9127, i64 1
+  %tmp9129 = getelementptr inbounds float* %tmp9128, i64 1
+  %tmp9130 = getelementptr inbounds float* %tmp9129, i64 1
+  %tmp9131 = getelementptr inbounds float* %tmp9130, i64 1
+  %tmp9132 = getelementptr inbounds float* %tmp9131, i64 1
+  %tmp9133 = getelementptr inbounds float* %tmp9132, i64 1
+  %tmp9134 = getelementptr inbounds float* %tmp9133, i64 1
+  %tmp9135 = getelementptr inbounds float* %tmp9134, i64 1
+  %tmp9136 = getelementptr inbounds float* %tmp9135, i64 1
+  %tmp9137 = getelementptr inbounds float* %tmp9136, i64 1
+  %tmp9138 = getelementptr inbounds float* %tmp9137, i64 1
+  %tmp9139 = getelementptr inbounds float* %tmp9138, i64 1
+  %tmp9140 = getelementptr inbounds float* %tmp9139, i64 1
+  %tmp9141 = getelementptr inbounds float* %tmp9140, i64 1
+  %tmp9142 = getelementptr inbounds float* %tmp9141, i64 1
+  %tmp9143 = getelementptr inbounds float* %tmp9142, i64 1
+  %tmp9144 = getelementptr inbounds float* %tmp9143, i64 1
+  %tmp9145 = getelementptr inbounds float* %tmp9144, i64 1
+  %tmp9146 = getelementptr inbounds float* %tmp9145, i64 1
+  %tmp9147 = getelementptr inbounds float* %tmp9146, i64 1
+  %tmp9148 = getelementptr inbounds float* %tmp9147, i64 1
+  %tmp9149 = getelementptr inbounds float* %tmp9148, i64 1
+  %tmp9150 = getelementptr inbounds float* %tmp9149, i64 1
+  %tmp9151 = getelementptr inbounds float* %tmp9150, i64 1
+  %tmp9152 = getelementptr inbounds float* %tmp9151, i64 1
+  %tmp9153 = getelementptr inbounds float* %tmp9152, i64 1
+  %tmp9154 = getelementptr inbounds float* %tmp9153, i64 1
+  %tmp9155 = getelementptr inbounds float* %tmp9154, i64 1
+  %tmp9156 = getelementptr inbounds float* %tmp9155, i64 1
+  %tmp9157 = getelementptr inbounds float* %tmp9156, i64 1
+  %tmp9158 = getelementptr inbounds float* %tmp9157, i64 1
+  %tmp9159 = getelementptr inbounds float* %tmp9158, i64 1
+  %tmp9160 = getelementptr inbounds float* %tmp9159, i64 1
+  %tmp9161 = getelementptr inbounds float* %tmp9160, i64 1
+  %tmp9162 = getelementptr inbounds float* %tmp9161, i64 1
+  %tmp9163 = getelementptr inbounds float* %tmp9162, i64 1
+  %tmp9164 = getelementptr inbounds float* %tmp9163, i64 1
+  %tmp9165 = getelementptr inbounds float* %tmp9164, i64 1
+  %tmp9166 = getelementptr inbounds float* %tmp9165, i64 1
+  %tmp9167 = getelementptr inbounds float* %tmp9166, i64 1
+  %tmp9168 = getelementptr inbounds float* %tmp9167, i64 1
+  %tmp9169 = getelementptr inbounds float* %tmp9168, i64 1
+  %tmp9170 = getelementptr inbounds float* %tmp9169, i64 1
+  %tmp9171 = getelementptr inbounds float* %tmp9170, i64 1
+  %tmp9172 = getelementptr inbounds float* %tmp9171, i64 1
+  %tmp9173 = getelementptr inbounds float* %tmp9172, i64 1
+  %tmp9174 = getelementptr inbounds float* %tmp9173, i64 1
+  %tmp9175 = getelementptr inbounds float* %tmp9174, i64 1
+  %tmp9176 = getelementptr inbounds float* %tmp9175, i64 1
+  %tmp9177 = getelementptr inbounds float* %tmp9176, i64 1
+  %tmp9178 = getelementptr inbounds float* %tmp9177, i64 1
+  %tmp9179 = getelementptr inbounds float* %tmp9178, i64 1
+  %tmp9180 = getelementptr inbounds float* %tmp9179, i64 1
+  %tmp9181 = getelementptr inbounds float* %tmp9180, i64 1
+  %tmp9182 = getelementptr inbounds float* %tmp9181, i64 1
+  %tmp9183 = getelementptr inbounds float* %tmp9182, i64 1
+  %tmp9184 = getelementptr inbounds float* %tmp9183, i64 1
+  %tmp9185 = getelementptr inbounds float* %tmp9184, i64 1
+  %tmp9186 = getelementptr inbounds float* %tmp9185, i64 1
+  %tmp9187 = getelementptr inbounds float* %tmp9186, i64 1
+  %tmp9188 = getelementptr inbounds float* %tmp9187, i64 1
+  %tmp9189 = getelementptr inbounds float* %tmp9188, i64 1
+  %tmp9190 = getelementptr inbounds float* %tmp9189, i64 1
+  %tmp9191 = getelementptr inbounds float* %tmp9190, i64 1
+  %tmp9192 = getelementptr inbounds float* %tmp9191, i64 1
+  %tmp9193 = getelementptr inbounds float* %tmp9192, i64 1
+  %tmp9194 = getelementptr inbounds float* %tmp9193, i64 1
+  %tmp9195 = getelementptr inbounds float* %tmp9194, i64 1
+  %tmp9196 = getelementptr inbounds float* %tmp9195, i64 1
+  %tmp9197 = getelementptr inbounds float* %tmp9196, i64 1
+  %tmp9198 = getelementptr inbounds float* %tmp9197, i64 1
+  %tmp9199 = getelementptr inbounds float* %tmp9198, i64 1
+  %tmp9200 = getelementptr inbounds float* %tmp9199, i64 1
+  %tmp9201 = getelementptr inbounds float* %tmp9200, i64 1
+  %tmp9202 = getelementptr inbounds float* %tmp9201, i64 1
+  %tmp9203 = getelementptr inbounds float* %tmp9202, i64 1
+  %tmp9204 = getelementptr inbounds float* %tmp9203, i64 1
+  %tmp9205 = getelementptr inbounds float* %tmp9204, i64 1
+  %tmp9206 = getelementptr inbounds float* %tmp9205, i64 1
+  %tmp9207 = getelementptr inbounds float* %tmp9206, i64 1
+  %tmp9208 = getelementptr inbounds float* %tmp9207, i64 1
+  %tmp9209 = getelementptr inbounds float* %tmp9208, i64 1
+  %tmp9210 = getelementptr inbounds float* %tmp9209, i64 1
+  %tmp9211 = getelementptr inbounds float* %tmp9210, i64 1
+  %tmp9212 = getelementptr inbounds float* %tmp9211, i64 1
+  %tmp9213 = getelementptr inbounds float* %tmp9212, i64 1
+  %tmp9214 = getelementptr inbounds float* %tmp9213, i64 1
+  %tmp9215 = getelementptr inbounds float* %tmp9214, i64 1
+  %tmp9216 = getelementptr inbounds float* %tmp9215, i64 1
+  %tmp9217 = getelementptr inbounds float* %tmp9216, i64 1
+  %tmp9218 = getelementptr inbounds float* %tmp9217, i64 1
+  %tmp9219 = getelementptr inbounds float* %tmp9218, i64 1
+  %tmp9220 = getelementptr inbounds float* %tmp9219, i64 1
+  %tmp9221 = getelementptr inbounds float* %tmp9220, i64 1
+  %tmp9222 = getelementptr inbounds float* %tmp9221, i64 1
+  %tmp9223 = getelementptr inbounds float* %tmp9222, i64 1
+  %tmp9224 = getelementptr inbounds float* %tmp9223, i64 1
+  %tmp9225 = getelementptr inbounds float* %tmp9224, i64 1
+  %tmp9226 = getelementptr inbounds float* %tmp9225, i64 1
+  %tmp9227 = getelementptr inbounds float* %tmp9226, i64 1
+  %tmp9228 = getelementptr inbounds float* %tmp9227, i64 1
+  %tmp9229 = getelementptr inbounds float* %tmp9228, i64 1
+  %tmp9230 = getelementptr inbounds float* %tmp9229, i64 1
+  %tmp9231 = getelementptr inbounds float* %tmp9230, i64 1
+  %tmp9232 = getelementptr inbounds float* %tmp9231, i64 1
+  %tmp9233 = getelementptr inbounds float* %tmp9232, i64 1
+  %tmp9234 = getelementptr inbounds float* %tmp9233, i64 1
+  %tmp9235 = getelementptr inbounds float* %tmp9234, i64 1
+  %tmp9236 = getelementptr inbounds float* %tmp9235, i64 1
+  %tmp9237 = getelementptr inbounds float* %tmp9236, i64 1
+  %tmp9238 = getelementptr inbounds float* %tmp9237, i64 1
+  %tmp9239 = getelementptr inbounds float* %tmp9238, i64 1
+  %tmp9240 = getelementptr inbounds float* %tmp9239, i64 1
+  %tmp9241 = getelementptr inbounds float* %tmp9240, i64 1
+  %tmp9242 = getelementptr inbounds float* %tmp9241, i64 1
+  %tmp9243 = getelementptr inbounds float* %tmp9242, i64 1
+  %tmp9244 = getelementptr inbounds float* %tmp9243, i64 1
+  %tmp9245 = getelementptr inbounds float* %tmp9244, i64 1
+  %tmp9246 = getelementptr inbounds float* %tmp9245, i64 1
+  %tmp9247 = getelementptr inbounds float* %tmp9246, i64 1
+  %tmp9248 = getelementptr inbounds float* %tmp9247, i64 1
+  %tmp9249 = getelementptr inbounds float* %tmp9248, i64 1
+  %tmp9250 = getelementptr inbounds float* %tmp9249, i64 1
+  %tmp9251 = getelementptr inbounds float* %tmp9250, i64 1
+  %tmp9252 = getelementptr inbounds float* %tmp9251, i64 1
+  %tmp9253 = getelementptr inbounds float* %tmp9252, i64 1
+  %tmp9254 = getelementptr inbounds float* %tmp9253, i64 1
+  %tmp9255 = getelementptr inbounds float* %tmp9254, i64 1
+  %tmp9256 = getelementptr inbounds float* %tmp9255, i64 1
+  %tmp9257 = getelementptr inbounds float* %tmp9256, i64 1
+  %tmp9258 = getelementptr inbounds float* %tmp9257, i64 1
+  %tmp9259 = getelementptr inbounds float* %tmp9258, i64 1
+  %tmp9260 = getelementptr inbounds float* %tmp9259, i64 1
+  %tmp9261 = getelementptr inbounds float* %tmp9260, i64 1
+  %tmp9262 = getelementptr inbounds float* %tmp9261, i64 1
+  %tmp9263 = getelementptr inbounds float* %tmp9262, i64 1
+  %tmp9264 = getelementptr inbounds float* %tmp9263, i64 1
+  %tmp9265 = getelementptr inbounds float* %tmp9264, i64 1
+  %tmp9266 = getelementptr inbounds float* %tmp9265, i64 1
+  %tmp9267 = getelementptr inbounds float* %tmp9266, i64 1
+  %tmp9268 = getelementptr inbounds float* %tmp9267, i64 1
+  %tmp9269 = getelementptr inbounds float* %tmp9268, i64 1
+  %tmp9270 = getelementptr inbounds float* %tmp9269, i64 1
+  %tmp9271 = getelementptr inbounds float* %tmp9270, i64 1
+  %tmp9272 = getelementptr inbounds float* %tmp9271, i64 1
+  %tmp9273 = getelementptr inbounds float* %tmp9272, i64 1
+  %tmp9274 = getelementptr inbounds float* %tmp9273, i64 1
+  %tmp9275 = getelementptr inbounds float* %tmp9274, i64 1
+  %tmp9276 = getelementptr inbounds float* %tmp9275, i64 1
+  %tmp9277 = getelementptr inbounds float* %tmp9276, i64 1
+  %tmp9278 = getelementptr inbounds float* %tmp9277, i64 1
+  %tmp9279 = getelementptr inbounds float* %tmp9278, i64 1
+  %tmp9280 = getelementptr inbounds float* %tmp9279, i64 1
+  %tmp9281 = getelementptr inbounds float* %tmp9280, i64 1
+  %tmp9282 = getelementptr inbounds float* %tmp9281, i64 1
+  %tmp9283 = getelementptr inbounds float* %tmp9282, i64 1
+  %tmp9284 = getelementptr inbounds float* %tmp9283, i64 1
+  %tmp9285 = getelementptr inbounds float* %tmp9284, i64 1
+  %tmp9286 = getelementptr inbounds float* %tmp9285, i64 1
+  %tmp9287 = getelementptr inbounds float* %tmp9286, i64 1
+  %tmp9288 = getelementptr inbounds float* %tmp9287, i64 1
+  %tmp9289 = getelementptr inbounds float* %tmp9288, i64 1
+  %tmp9290 = getelementptr inbounds float* %tmp9289, i64 1
+  %tmp9291 = getelementptr inbounds float* %tmp9290, i64 1
+  %tmp9292 = getelementptr inbounds float* %tmp9291, i64 1
+  %tmp9293 = getelementptr inbounds float* %tmp9292, i64 1
+  %tmp9294 = getelementptr inbounds float* %tmp9293, i64 1
+  %tmp9295 = getelementptr inbounds float* %tmp9294, i64 1
+  %tmp9296 = getelementptr inbounds float* %tmp9295, i64 1
+  %tmp9297 = getelementptr inbounds float* %tmp9296, i64 1
+  %tmp9298 = getelementptr inbounds float* %tmp9297, i64 1
+  %tmp9299 = getelementptr inbounds float* %tmp9298, i64 1
+  %tmp9300 = getelementptr inbounds float* %tmp9299, i64 1
+  %tmp9301 = getelementptr inbounds float* %tmp9300, i64 1
+  %tmp9302 = getelementptr inbounds float* %tmp9301, i64 1
+  %tmp9303 = getelementptr inbounds float* %tmp9302, i64 1
+  %tmp9304 = getelementptr inbounds float* %tmp9303, i64 1
+  %tmp9305 = getelementptr inbounds float* %tmp9304, i64 1
+  %tmp9306 = getelementptr inbounds float* %tmp9305, i64 1
+  %tmp9307 = getelementptr inbounds float* %tmp9306, i64 1
+  %tmp9308 = getelementptr inbounds float* %tmp9307, i64 1
+  %tmp9309 = getelementptr inbounds float* %tmp9308, i64 1
+  %tmp9310 = getelementptr inbounds float* %tmp9309, i64 1
+  %tmp9311 = getelementptr inbounds float* %tmp9310, i64 1
+  %tmp9312 = getelementptr inbounds float* %tmp9311, i64 1
+  %tmp9313 = getelementptr inbounds float* %tmp9312, i64 1
+  %tmp9314 = getelementptr inbounds float* %tmp9313, i64 1
+  %tmp9315 = getelementptr inbounds float* %tmp9314, i64 1
+  %tmp9316 = getelementptr inbounds float* %tmp9315, i64 1
+  %tmp9317 = getelementptr inbounds float* %tmp9316, i64 1
+  %tmp9318 = getelementptr inbounds float* %tmp9317, i64 1
+  %tmp9319 = getelementptr inbounds float* %tmp9318, i64 1
+  %tmp9320 = getelementptr inbounds float* %tmp9319, i64 1
+  %tmp9321 = getelementptr inbounds float* %tmp9320, i64 1
+  %tmp9322 = getelementptr inbounds float* %tmp9321, i64 1
+  %tmp9323 = getelementptr inbounds float* %tmp9322, i64 1
+  %tmp9324 = getelementptr inbounds float* %tmp9323, i64 1
+  %tmp9325 = getelementptr inbounds float* %tmp9324, i64 1
+  %tmp9326 = getelementptr inbounds float* %tmp9325, i64 1
+  %tmp9327 = getelementptr inbounds float* %tmp9326, i64 1
+  %tmp9328 = getelementptr inbounds float* %tmp9327, i64 1
+  %tmp9329 = getelementptr inbounds float* %tmp9328, i64 1
+  %tmp9330 = getelementptr inbounds float* %tmp9329, i64 1
+  %tmp9331 = getelementptr inbounds float* %tmp9330, i64 1
+  %tmp9332 = getelementptr inbounds float* %tmp9331, i64 1
+  %tmp9333 = getelementptr inbounds float* %tmp9332, i64 1
+  %tmp9334 = getelementptr inbounds float* %tmp9333, i64 1
+  %tmp9335 = getelementptr inbounds float* %tmp9334, i64 1
+  %tmp9336 = getelementptr inbounds float* %tmp9335, i64 1
+  %tmp9337 = getelementptr inbounds float* %tmp9336, i64 1
+  %tmp9338 = getelementptr inbounds float* %tmp9337, i64 1
+  %tmp9339 = getelementptr inbounds float* %tmp9338, i64 1
+  %tmp9340 = getelementptr inbounds float* %tmp9339, i64 1
+  %tmp9341 = getelementptr inbounds float* %tmp9340, i64 1
+  %tmp9342 = getelementptr inbounds float* %tmp9341, i64 1
+  %tmp9343 = getelementptr inbounds float* %tmp9342, i64 1
+  %tmp9344 = getelementptr inbounds float* %tmp9343, i64 1
+  %tmp9345 = getelementptr inbounds float* %tmp9344, i64 1
+  %tmp9346 = getelementptr inbounds float* %tmp9345, i64 1
+  %tmp9347 = getelementptr inbounds float* %tmp9346, i64 1
+  %tmp9348 = getelementptr inbounds float* %tmp9347, i64 1
+  %tmp9349 = getelementptr inbounds float* %tmp9348, i64 1
+  %tmp9350 = getelementptr inbounds float* %tmp9349, i64 1
+  %tmp9351 = getelementptr inbounds float* %tmp9350, i64 1
+  %tmp9352 = getelementptr inbounds float* %tmp9351, i64 1
+  %tmp9353 = getelementptr inbounds float* %tmp9352, i64 1
+  %tmp9354 = getelementptr inbounds float* %tmp9353, i64 1
+  %tmp9355 = getelementptr inbounds float* %tmp9354, i64 1
+  %tmp9356 = getelementptr inbounds float* %tmp9355, i64 1
+  %tmp9357 = getelementptr inbounds float* %tmp9356, i64 1
+  %tmp9358 = getelementptr inbounds float* %tmp9357, i64 1
+  %tmp9359 = getelementptr inbounds float* %tmp9358, i64 1
+  %tmp9360 = getelementptr inbounds float* %tmp9359, i64 1
+  %tmp9361 = getelementptr inbounds float* %tmp9360, i64 1
+  %tmp9362 = getelementptr inbounds float* %tmp9361, i64 1
+  %tmp9363 = getelementptr inbounds float* %tmp9362, i64 1
+  %tmp9364 = getelementptr inbounds float* %tmp9363, i64 1
+  %tmp9365 = getelementptr inbounds float* %tmp9364, i64 1
+  %tmp9366 = getelementptr inbounds float* %tmp9365, i64 1
+  %tmp9367 = getelementptr inbounds float* %tmp9366, i64 1
+  %tmp9368 = getelementptr inbounds float* %tmp9367, i64 1
+  %tmp9369 = getelementptr inbounds float* %tmp9368, i64 1
+  %tmp9370 = getelementptr inbounds float* %tmp9369, i64 1
+  %tmp9371 = getelementptr inbounds float* %tmp9370, i64 1
+  %tmp9372 = getelementptr inbounds float* %tmp9371, i64 1
+  %tmp9373 = getelementptr inbounds float* %tmp9372, i64 1
+  %tmp9374 = getelementptr inbounds float* %tmp9373, i64 1
+  %tmp9375 = getelementptr inbounds float* %tmp9374, i64 1
+  %tmp9376 = getelementptr inbounds float* %tmp9375, i64 1
+  %tmp9377 = getelementptr inbounds float* %tmp9376, i64 1
+  %tmp9378 = getelementptr inbounds float* %tmp9377, i64 1
+  %tmp9379 = getelementptr inbounds float* %tmp9378, i64 1
+  %tmp9380 = getelementptr inbounds float* %tmp9379, i64 1
+  %tmp9381 = getelementptr inbounds float* %tmp9380, i64 1
+  %tmp9382 = getelementptr inbounds float* %tmp9381, i64 1
+  %tmp9383 = getelementptr inbounds float* %tmp9382, i64 1
+  %tmp9384 = getelementptr inbounds float* %tmp9383, i64 1
+  %tmp9385 = getelementptr inbounds float* %tmp9384, i64 1
+  %tmp9386 = getelementptr inbounds float* %tmp9385, i64 1
+  %tmp9387 = getelementptr inbounds float* %tmp9386, i64 1
+  %tmp9388 = getelementptr inbounds float* %tmp9387, i64 1
+  %tmp9389 = getelementptr inbounds float* %tmp9388, i64 1
+  %tmp9390 = getelementptr inbounds float* %tmp9389, i64 1
+  %tmp9391 = getelementptr inbounds float* %tmp9390, i64 1
+  %tmp9392 = getelementptr inbounds float* %tmp9391, i64 1
+  %tmp9393 = getelementptr inbounds float* %tmp9392, i64 1
+  %tmp9394 = getelementptr inbounds float* %tmp9393, i64 1
+  %tmp9395 = getelementptr inbounds float* %tmp9394, i64 1
+  %tmp9396 = getelementptr inbounds float* %tmp9395, i64 1
+  %tmp9397 = getelementptr inbounds float* %tmp9396, i64 1
+  %tmp9398 = getelementptr inbounds float* %tmp9397, i64 1
+  %tmp9399 = getelementptr inbounds float* %tmp9398, i64 1
+  %tmp9400 = getelementptr inbounds float* %tmp9399, i64 1
+  %tmp9401 = getelementptr inbounds float* %tmp9400, i64 1
+  %tmp9402 = getelementptr inbounds float* %tmp9401, i64 1
+  %tmp9403 = getelementptr inbounds float* %tmp9402, i64 1
+  %tmp9404 = getelementptr inbounds float* %tmp9403, i64 1
+  %tmp9405 = getelementptr inbounds float* %tmp9404, i64 1
+  %tmp9406 = getelementptr inbounds float* %tmp9405, i64 1
+  %tmp9407 = getelementptr inbounds float* %tmp9406, i64 1
+  %tmp9408 = getelementptr inbounds float* %tmp9407, i64 1
+  %tmp9409 = getelementptr inbounds float* %tmp9408, i64 1
+  %tmp9410 = getelementptr inbounds float* %tmp9409, i64 1
+  %tmp9411 = getelementptr inbounds float* %tmp9410, i64 1
+  %tmp9412 = getelementptr inbounds float* %tmp9411, i64 1
+  %tmp9413 = getelementptr inbounds float* %tmp9412, i64 1
+  %tmp9414 = getelementptr inbounds float* %tmp9413, i64 1
+  %tmp9415 = getelementptr inbounds float* %tmp9414, i64 1
+  %tmp9416 = getelementptr inbounds float* %tmp9415, i64 1
+  %tmp9417 = getelementptr inbounds float* %tmp9416, i64 1
+  %tmp9418 = getelementptr inbounds float* %tmp9417, i64 1
+  %tmp9419 = getelementptr inbounds float* %tmp9418, i64 1
+  %tmp9420 = getelementptr inbounds float* %tmp9419, i64 1
+  %tmp9421 = getelementptr inbounds float* %tmp9420, i64 1
+  %tmp9422 = getelementptr inbounds float* %tmp9421, i64 1
+  %tmp9423 = getelementptr inbounds float* %tmp9422, i64 1
+  %tmp9424 = getelementptr inbounds float* %tmp9423, i64 1
+  %tmp9425 = getelementptr inbounds float* %tmp9424, i64 1
+  %tmp9426 = getelementptr inbounds float* %tmp9425, i64 1
+  %tmp9427 = getelementptr inbounds float* %tmp9426, i64 1
+  %tmp9428 = getelementptr inbounds float* %tmp9427, i64 1
+  %tmp9429 = getelementptr inbounds float* %tmp9428, i64 1
+  %tmp9430 = getelementptr inbounds float* %tmp9429, i64 1
+  %tmp9431 = getelementptr inbounds float* %tmp9430, i64 1
+  %tmp9432 = getelementptr inbounds float* %tmp9431, i64 1
+  %tmp9433 = getelementptr inbounds float* %tmp9432, i64 1
+  %tmp9434 = getelementptr inbounds float* %tmp9433, i64 1
+  %tmp9435 = getelementptr inbounds float* %tmp9434, i64 1
+  %tmp9436 = getelementptr inbounds float* %tmp9435, i64 1
+  %tmp9437 = getelementptr inbounds float* %tmp9436, i64 1
+  %tmp9438 = getelementptr inbounds float* %tmp9437, i64 1
+  %tmp9439 = getelementptr inbounds float* %tmp9438, i64 1
+  %tmp9440 = getelementptr inbounds float* %tmp9439, i64 1
+  %tmp9441 = getelementptr inbounds float* %tmp9440, i64 1
+  %tmp9442 = getelementptr inbounds float* %tmp9441, i64 1
+  %tmp9443 = getelementptr inbounds float* %tmp9442, i64 1
+  %tmp9444 = getelementptr inbounds float* %tmp9443, i64 1
+  %tmp9445 = getelementptr inbounds float* %tmp9444, i64 1
+  %tmp9446 = getelementptr inbounds float* %tmp9445, i64 1
+  %tmp9447 = getelementptr inbounds float* %tmp9446, i64 1
+  %tmp9448 = getelementptr inbounds float* %tmp9447, i64 1
+  %tmp9449 = getelementptr inbounds float* %tmp9448, i64 1
+  %tmp9450 = getelementptr inbounds float* %tmp9449, i64 1
+  %tmp9451 = getelementptr inbounds float* %tmp9450, i64 1
+  %tmp9452 = getelementptr inbounds float* %tmp9451, i64 1
+  %tmp9453 = getelementptr inbounds float* %tmp9452, i64 1
+  %tmp9454 = getelementptr inbounds float* %tmp9453, i64 1
+  %tmp9455 = getelementptr inbounds float* %tmp9454, i64 1
+  %tmp9456 = getelementptr inbounds float* %tmp9455, i64 1
+  %tmp9457 = getelementptr inbounds float* %tmp9456, i64 1
+  %tmp9458 = getelementptr inbounds float* %tmp9457, i64 1
+  %tmp9459 = getelementptr inbounds float* %tmp9458, i64 1
+  %tmp9460 = getelementptr inbounds float* %tmp9459, i64 1
+  %tmp9461 = getelementptr inbounds float* %tmp9460, i64 1
+  %tmp9462 = getelementptr inbounds float* %tmp9461, i64 1
+  %tmp9463 = getelementptr inbounds float* %tmp9462, i64 1
+  %tmp9464 = getelementptr inbounds float* %tmp9463, i64 1
+  %tmp9465 = getelementptr inbounds float* %tmp9464, i64 1
+  %tmp9466 = getelementptr inbounds float* %tmp9465, i64 1
+  %tmp9467 = getelementptr inbounds float* %tmp9466, i64 1
+  %tmp9468 = getelementptr inbounds float* %tmp9467, i64 1
+  %tmp9469 = getelementptr inbounds float* %tmp9468, i64 1
+  %tmp9470 = getelementptr inbounds float* %tmp9469, i64 1
+  %tmp9471 = getelementptr inbounds float* %tmp9470, i64 1
+  %tmp9472 = getelementptr inbounds float* %tmp9471, i64 1
+  %tmp9473 = getelementptr inbounds float* %tmp9472, i64 1
+  %tmp9474 = getelementptr inbounds float* %tmp9473, i64 1
+  %tmp9475 = getelementptr inbounds float* %tmp9474, i64 1
+  %tmp9476 = getelementptr inbounds float* %tmp9475, i64 1
+  %tmp9477 = getelementptr inbounds float* %tmp9476, i64 1
+  %tmp9478 = getelementptr inbounds float* %tmp9477, i64 1
+  %tmp9479 = getelementptr inbounds float* %tmp9478, i64 1
+  %tmp9480 = getelementptr inbounds float* %tmp9479, i64 1
+  %tmp9481 = getelementptr inbounds float* %tmp9480, i64 1
+  %tmp9482 = getelementptr inbounds float* %tmp9481, i64 1
+  %tmp9483 = getelementptr inbounds float* %tmp9482, i64 1
+  %tmp9484 = getelementptr inbounds float* %tmp9483, i64 1
+  %tmp9485 = getelementptr inbounds float* %tmp9484, i64 1
+  %tmp9486 = getelementptr inbounds float* %tmp9485, i64 1
+  %tmp9487 = getelementptr inbounds float* %tmp9486, i64 1
+  %tmp9488 = getelementptr inbounds float* %tmp9487, i64 1
+  %tmp9489 = getelementptr inbounds float* %tmp9488, i64 1
+  %tmp9490 = getelementptr inbounds float* %tmp9489, i64 1
+  %tmp9491 = getelementptr inbounds float* %tmp9490, i64 1
+  %tmp9492 = getelementptr inbounds float* %tmp9491, i64 1
+  %tmp9493 = getelementptr inbounds float* %tmp9492, i64 1
+  %tmp9494 = getelementptr inbounds float* %tmp9493, i64 1
+  %tmp9495 = getelementptr inbounds float* %tmp9494, i64 1
+  %tmp9496 = getelementptr inbounds float* %tmp9495, i64 1
+  %tmp9497 = getelementptr inbounds float* %tmp9496, i64 1
+  %tmp9498 = getelementptr inbounds float* %tmp9497, i64 1
+  %tmp9499 = getelementptr inbounds float* %tmp9498, i64 1
+  %tmp9500 = getelementptr inbounds float* %tmp9499, i64 1
+  %tmp9501 = getelementptr inbounds float* %tmp9500, i64 1
+  %tmp9502 = getelementptr inbounds float* %tmp9501, i64 1
+  %tmp9503 = getelementptr inbounds float* %tmp9502, i64 1
+  %tmp9504 = getelementptr inbounds float* %tmp9503, i64 1
+  %tmp9505 = getelementptr inbounds float* %tmp9504, i64 1
+  %tmp9506 = getelementptr inbounds float* %tmp9505, i64 1
+  %tmp9507 = getelementptr inbounds float* %tmp9506, i64 1
+  %tmp9508 = getelementptr inbounds float* %tmp9507, i64 1
+  %tmp9509 = getelementptr inbounds float* %tmp9508, i64 1
+  %tmp9510 = getelementptr inbounds float* %tmp9509, i64 1
+  %tmp9511 = getelementptr inbounds float* %tmp9510, i64 1
+  %tmp9512 = getelementptr inbounds float* %tmp9511, i64 1
+  %tmp9513 = getelementptr inbounds float* %tmp9512, i64 1
+  %tmp9514 = getelementptr inbounds float* %tmp9513, i64 1
+  %tmp9515 = getelementptr inbounds float* %tmp9514, i64 1
+  %tmp9516 = getelementptr inbounds float* %tmp9515, i64 1
+  %tmp9517 = getelementptr inbounds float* %tmp9516, i64 1
+  %tmp9518 = getelementptr inbounds float* %tmp9517, i64 1
+  %tmp9519 = getelementptr inbounds float* %tmp9518, i64 1
+  %tmp9520 = getelementptr inbounds float* %tmp9519, i64 1
+  %tmp9521 = getelementptr inbounds float* %tmp9520, i64 1
+  %tmp9522 = getelementptr inbounds float* %tmp9521, i64 1
+  %tmp9523 = getelementptr inbounds float* %tmp9522, i64 1
+  %tmp9524 = getelementptr inbounds float* %tmp9523, i64 1
+  %tmp9525 = getelementptr inbounds float* %tmp9524, i64 1
+  %tmp9526 = getelementptr inbounds float* %tmp9525, i64 1
+  %tmp9527 = getelementptr inbounds float* %tmp9526, i64 1
+  %tmp9528 = getelementptr inbounds float* %tmp9527, i64 1
+  %tmp9529 = getelementptr inbounds float* %tmp9528, i64 1
+  %tmp9530 = getelementptr inbounds float* %tmp9529, i64 1
+  %tmp9531 = getelementptr inbounds float* %tmp9530, i64 1
+  %tmp9532 = getelementptr inbounds float* %tmp9531, i64 1
+  %tmp9533 = getelementptr inbounds float* %tmp9532, i64 1
+  %tmp9534 = getelementptr inbounds float* %tmp9533, i64 1
+  %tmp9535 = getelementptr inbounds float* %tmp9534, i64 1
+  %tmp9536 = getelementptr inbounds float* %tmp9535, i64 1
+  %tmp9537 = getelementptr inbounds float* %tmp9536, i64 1
+  %tmp9538 = getelementptr inbounds float* %tmp9537, i64 1
+  %tmp9539 = getelementptr inbounds float* %tmp9538, i64 1
+  %tmp9540 = getelementptr inbounds float* %tmp9539, i64 1
+  %tmp9541 = getelementptr inbounds float* %tmp9540, i64 1
+  %tmp9542 = getelementptr inbounds float* %tmp9541, i64 1
+  %tmp9543 = getelementptr inbounds float* %tmp9542, i64 1
+  %tmp9544 = getelementptr inbounds float* %tmp9543, i64 1
+  %tmp9545 = getelementptr inbounds float* %tmp9544, i64 1
+  %tmp9546 = getelementptr inbounds float* %tmp9545, i64 1
+  %tmp9547 = getelementptr inbounds float* %tmp9546, i64 1
+  %tmp9548 = getelementptr inbounds float* %tmp9547, i64 1
+  %tmp9549 = getelementptr inbounds float* %tmp9548, i64 1
+  %tmp9550 = getelementptr inbounds float* %tmp9549, i64 1
+  %tmp9551 = getelementptr inbounds float* %tmp9550, i64 1
+  %tmp9552 = getelementptr inbounds float* %tmp9551, i64 1
+  %tmp9553 = getelementptr inbounds float* %tmp9552, i64 1
+  %tmp9554 = getelementptr inbounds float* %tmp9553, i64 1
+  %tmp9555 = getelementptr inbounds float* %tmp9554, i64 1
+  %tmp9556 = getelementptr inbounds float* %tmp9555, i64 1
+  %tmp9557 = getelementptr inbounds float* %tmp9556, i64 1
+  %tmp9558 = getelementptr inbounds float* %tmp9557, i64 1
+  %tmp9559 = getelementptr inbounds float* %tmp9558, i64 1
+  %tmp9560 = getelementptr inbounds float* %tmp9559, i64 1
+  %tmp9561 = getelementptr inbounds float* %tmp9560, i64 1
+  %tmp9562 = getelementptr inbounds float* %tmp9561, i64 1
+  %tmp9563 = getelementptr inbounds float* %tmp9562, i64 1
+  %tmp9564 = getelementptr inbounds float* %tmp9563, i64 1
+  %tmp9565 = getelementptr inbounds float* %tmp9564, i64 1
+  %tmp9566 = getelementptr inbounds float* %tmp9565, i64 1
+  %tmp9567 = getelementptr inbounds float* %tmp9566, i64 1
+  %tmp9568 = getelementptr inbounds float* %tmp9567, i64 1
+  %tmp9569 = getelementptr inbounds float* %tmp9568, i64 1
+  %tmp9570 = getelementptr inbounds float* %tmp9569, i64 1
+  %tmp9571 = getelementptr inbounds float* %tmp9570, i64 1
+  %tmp9572 = getelementptr inbounds float* %tmp9571, i64 1
+  %tmp9573 = getelementptr inbounds float* %tmp9572, i64 1
+  %tmp9574 = getelementptr inbounds float* %tmp9573, i64 1
+  %tmp9575 = getelementptr inbounds float* %tmp9574, i64 1
+  %tmp9576 = getelementptr inbounds float* %tmp9575, i64 1
+  %tmp9577 = getelementptr inbounds float* %tmp9576, i64 1
+  %tmp9578 = getelementptr inbounds float* %tmp9577, i64 1
+  %tmp9579 = getelementptr inbounds float* %tmp9578, i64 1
+  %tmp9580 = getelementptr inbounds float* %tmp9579, i64 1
+  %tmp9581 = getelementptr inbounds float* %tmp9580, i64 1
+  %tmp9582 = getelementptr inbounds float* %tmp9581, i64 1
+  %tmp9583 = getelementptr inbounds float* %tmp9582, i64 1
+  %tmp9584 = getelementptr inbounds float* %tmp9583, i64 1
+  %tmp9585 = getelementptr inbounds float* %tmp9584, i64 1
+  %tmp9586 = getelementptr inbounds float* %tmp9585, i64 1
+  %tmp9587 = getelementptr inbounds float* %tmp9586, i64 1
+  %tmp9588 = getelementptr inbounds float* %tmp9587, i64 1
+  %tmp9589 = getelementptr inbounds float* %tmp9588, i64 1
+  %tmp9590 = getelementptr inbounds float* %tmp9589, i64 1
+  %tmp9591 = getelementptr inbounds float* %tmp9590, i64 1
+  %tmp9592 = getelementptr inbounds float* %tmp9591, i64 1
+  %tmp9593 = getelementptr inbounds float* %tmp9592, i64 1
+  %tmp9594 = getelementptr inbounds float* %tmp9593, i64 1
+  %tmp9595 = getelementptr inbounds float* %tmp9594, i64 1
+  %tmp9596 = getelementptr inbounds float* %tmp9595, i64 1
+  %tmp9597 = getelementptr inbounds float* %tmp9596, i64 1
+  %tmp9598 = getelementptr inbounds float* %tmp9597, i64 1
+  %tmp9599 = getelementptr inbounds float* %tmp9598, i64 1
+  %tmp9600 = getelementptr inbounds float* %tmp9599, i64 1
+  %tmp9601 = getelementptr inbounds float* %tmp9600, i64 1
+  %tmp9602 = getelementptr inbounds float* %tmp9601, i64 1
+  %tmp9603 = getelementptr inbounds float* %tmp9602, i64 1
+  %tmp9604 = getelementptr inbounds float* %tmp9603, i64 1
+  %tmp9605 = getelementptr inbounds float* %tmp9604, i64 1
+  %tmp9606 = getelementptr inbounds float* %tmp9605, i64 1
+  %tmp9607 = getelementptr inbounds float* %tmp9606, i64 1
+  %tmp9608 = getelementptr inbounds float* %tmp9607, i64 1
+  %tmp9609 = getelementptr inbounds float* %tmp9608, i64 1
+  %tmp9610 = getelementptr inbounds float* %tmp9609, i64 1
+  %tmp9611 = getelementptr inbounds float* %tmp9610, i64 1
+  %tmp9612 = getelementptr inbounds float* %tmp9611, i64 1
+  %tmp9613 = getelementptr inbounds float* %tmp9612, i64 1
+  %tmp9614 = getelementptr inbounds float* %tmp9613, i64 1
+  %tmp9615 = getelementptr inbounds float* %tmp9614, i64 1
+  %tmp9616 = getelementptr inbounds float* %tmp9615, i64 1
+  %tmp9617 = getelementptr inbounds float* %tmp9616, i64 1
+  %tmp9618 = getelementptr inbounds float* %tmp9617, i64 1
+  %tmp9619 = getelementptr inbounds float* %tmp9618, i64 1
+  %tmp9620 = getelementptr inbounds float* %tmp9619, i64 1
+  %tmp9621 = getelementptr inbounds float* %tmp9620, i64 1
+  %tmp9622 = getelementptr inbounds float* %tmp9621, i64 1
+  %tmp9623 = getelementptr inbounds float* %tmp9622, i64 1
+  %tmp9624 = getelementptr inbounds float* %tmp9623, i64 1
+  %tmp9625 = getelementptr inbounds float* %tmp9624, i64 1
+  %tmp9626 = getelementptr inbounds float* %tmp9625, i64 1
+  %tmp9627 = getelementptr inbounds float* %tmp9626, i64 1
+  %tmp9628 = getelementptr inbounds float* %tmp9627, i64 1
+  %tmp9629 = getelementptr inbounds float* %tmp9628, i64 1
+  %tmp9630 = getelementptr inbounds float* %tmp9629, i64 1
+  %tmp9631 = getelementptr inbounds float* %tmp9630, i64 1
+  %tmp9632 = getelementptr inbounds float* %tmp9631, i64 1
+  %tmp9633 = getelementptr inbounds float* %tmp9632, i64 1
+  %tmp9634 = getelementptr inbounds float* %tmp9633, i64 1
+  %tmp9635 = getelementptr inbounds float* %tmp9634, i64 1
+  %tmp9636 = getelementptr inbounds float* %tmp9635, i64 1
+  %tmp9637 = getelementptr inbounds float* %tmp9636, i64 1
+  %tmp9638 = getelementptr inbounds float* %tmp9637, i64 1
+  %tmp9639 = getelementptr inbounds float* %tmp9638, i64 1
+  %tmp9640 = getelementptr inbounds float* %tmp9639, i64 1
+  %tmp9641 = getelementptr inbounds float* %tmp9640, i64 1
+  %tmp9642 = getelementptr inbounds float* %tmp9641, i64 1
+  %tmp9643 = getelementptr inbounds float* %tmp9642, i64 1
+  %tmp9644 = getelementptr inbounds float* %tmp9643, i64 1
+  %tmp9645 = getelementptr inbounds float* %tmp9644, i64 1
+  %tmp9646 = getelementptr inbounds float* %tmp9645, i64 1
+  %tmp9647 = getelementptr inbounds float* %tmp9646, i64 1
+  %tmp9648 = getelementptr inbounds float* %tmp9647, i64 1
+  %tmp9649 = getelementptr inbounds float* %tmp9648, i64 1
+  %tmp9650 = getelementptr inbounds float* %tmp9649, i64 1
+  %tmp9651 = getelementptr inbounds float* %tmp9650, i64 1
+  %tmp9652 = getelementptr inbounds float* %tmp9651, i64 1
+  %tmp9653 = getelementptr inbounds float* %tmp9652, i64 1
+  %tmp9654 = getelementptr inbounds float* %tmp9653, i64 1
+  %tmp9655 = getelementptr inbounds float* %tmp9654, i64 1
+  %tmp9656 = getelementptr inbounds float* %tmp9655, i64 1
+  %tmp9657 = getelementptr inbounds float* %tmp9656, i64 1
+  %tmp9658 = getelementptr inbounds float* %tmp9657, i64 1
+  %tmp9659 = getelementptr inbounds float* %tmp9658, i64 1
+  %tmp9660 = getelementptr inbounds float* %tmp9659, i64 1
+  %tmp9661 = getelementptr inbounds float* %tmp9660, i64 1
+  %tmp9662 = getelementptr inbounds float* %tmp9661, i64 1
+  %tmp9663 = getelementptr inbounds float* %tmp9662, i64 1
+  %tmp9664 = getelementptr inbounds float* %tmp9663, i64 1
+  %tmp9665 = getelementptr inbounds float* %tmp9664, i64 1
+  %tmp9666 = getelementptr inbounds float* %tmp9665, i64 1
+  %tmp9667 = getelementptr inbounds float* %tmp9666, i64 1
+  %tmp9668 = getelementptr inbounds float* %tmp9667, i64 1
+  %tmp9669 = getelementptr inbounds float* %tmp9668, i64 1
+  %tmp9670 = getelementptr inbounds float* %tmp9669, i64 1
+  %tmp9671 = getelementptr inbounds float* %tmp9670, i64 1
+  %tmp9672 = getelementptr inbounds float* %tmp9671, i64 1
+  %tmp9673 = getelementptr inbounds float* %tmp9672, i64 1
+  %tmp9674 = getelementptr inbounds float* %tmp9673, i64 1
+  %tmp9675 = getelementptr inbounds float* %tmp9674, i64 1
+  %tmp9676 = getelementptr inbounds float* %tmp9675, i64 1
+  %tmp9677 = getelementptr inbounds float* %tmp9676, i64 1
+  %tmp9678 = getelementptr inbounds float* %tmp9677, i64 1
+  %tmp9679 = getelementptr inbounds float* %tmp9678, i64 1
+  %tmp9680 = getelementptr inbounds float* %tmp9679, i64 1
+  %tmp9681 = getelementptr inbounds float* %tmp9680, i64 1
+  %tmp9682 = getelementptr inbounds float* %tmp9681, i64 1
+  %tmp9683 = getelementptr inbounds float* %tmp9682, i64 1
+  %tmp9684 = getelementptr inbounds float* %tmp9683, i64 1
+  %tmp9685 = getelementptr inbounds float* %tmp9684, i64 1
+  %tmp9686 = getelementptr inbounds float* %tmp9685, i64 1
+  %tmp9687 = getelementptr inbounds float* %tmp9686, i64 1
+  %tmp9688 = getelementptr inbounds float* %tmp9687, i64 1
+  %tmp9689 = getelementptr inbounds float* %tmp9688, i64 1
+  %tmp9690 = getelementptr inbounds float* %tmp9689, i64 1
+  %tmp9691 = getelementptr inbounds float* %tmp9690, i64 1
+  %tmp9692 = getelementptr inbounds float* %tmp9691, i64 1
+  %tmp9693 = getelementptr inbounds float* %tmp9692, i64 1
+  %tmp9694 = getelementptr inbounds float* %tmp9693, i64 1
+  %tmp9695 = getelementptr inbounds float* %tmp9694, i64 1
+  %tmp9696 = getelementptr inbounds float* %tmp9695, i64 1
+  %tmp9697 = getelementptr inbounds float* %tmp9696, i64 1
+  %tmp9698 = getelementptr inbounds float* %tmp9697, i64 1
+  %tmp9699 = getelementptr inbounds float* %tmp9698, i64 1
+  %tmp9700 = getelementptr inbounds float* %tmp9699, i64 1
+  %tmp9701 = getelementptr inbounds float* %tmp9700, i64 1
+  %tmp9702 = getelementptr inbounds float* %tmp9701, i64 1
+  %tmp9703 = getelementptr inbounds float* %tmp9702, i64 1
+  %tmp9704 = getelementptr inbounds float* %tmp9703, i64 1
+  %tmp9705 = getelementptr inbounds float* %tmp9704, i64 1
+  %tmp9706 = getelementptr inbounds float* %tmp9705, i64 1
+  %tmp9707 = getelementptr inbounds float* %tmp9706, i64 1
+  %tmp9708 = getelementptr inbounds float* %tmp9707, i64 1
+  %tmp9709 = getelementptr inbounds float* %tmp9708, i64 1
+  %tmp9710 = getelementptr inbounds float* %tmp9709, i64 1
+  %tmp9711 = getelementptr inbounds float* %tmp9710, i64 1
+  %tmp9712 = getelementptr inbounds float* %tmp9711, i64 1
+  %tmp9713 = getelementptr inbounds float* %tmp9712, i64 1
+  %tmp9714 = getelementptr inbounds float* %tmp9713, i64 1
+  %tmp9715 = getelementptr inbounds float* %tmp9714, i64 1
+  %tmp9716 = getelementptr inbounds float* %tmp9715, i64 1
+  %tmp9717 = getelementptr inbounds float* %tmp9716, i64 1
+  %tmp9718 = getelementptr inbounds float* %tmp9717, i64 1
+  %tmp9719 = getelementptr inbounds float* %tmp9718, i64 1
+  %tmp9720 = getelementptr inbounds float* %tmp9719, i64 1
+  %tmp9721 = getelementptr inbounds float* %tmp9720, i64 1
+  %tmp9722 = getelementptr inbounds float* %tmp9721, i64 1
+  %tmp9723 = getelementptr inbounds float* %tmp9722, i64 1
+  %tmp9724 = getelementptr inbounds float* %tmp9723, i64 1
+  %tmp9725 = getelementptr inbounds float* %tmp9724, i64 1
+  %tmp9726 = getelementptr inbounds float* %tmp9725, i64 1
+  %tmp9727 = getelementptr inbounds float* %tmp9726, i64 1
+  %tmp9728 = getelementptr inbounds float* %tmp9727, i64 1
+  %tmp9729 = getelementptr inbounds float* %tmp9728, i64 1
+  %tmp9730 = getelementptr inbounds float* %tmp9729, i64 1
+  %tmp9731 = getelementptr inbounds float* %tmp9730, i64 1
+  %tmp9732 = getelementptr inbounds float* %tmp9731, i64 1
+  %tmp9733 = getelementptr inbounds float* %tmp9732, i64 1
+  %tmp9734 = getelementptr inbounds float* %tmp9733, i64 1
+  %tmp9735 = getelementptr inbounds float* %tmp9734, i64 1
+  %tmp9736 = getelementptr inbounds float* %tmp9735, i64 1
+  %tmp9737 = getelementptr inbounds float* %tmp9736, i64 1
+  %tmp9738 = getelementptr inbounds float* %tmp9737, i64 1
+  %tmp9739 = getelementptr inbounds float* %tmp9738, i64 1
+  %tmp9740 = getelementptr inbounds float* %tmp9739, i64 1
+  %tmp9741 = getelementptr inbounds float* %tmp9740, i64 1
+  %tmp9742 = getelementptr inbounds float* %tmp9741, i64 1
+  %tmp9743 = getelementptr inbounds float* %tmp9742, i64 1
+  %tmp9744 = getelementptr inbounds float* %tmp9743, i64 1
+  %tmp9745 = getelementptr inbounds float* %tmp9744, i64 1
+  %tmp9746 = getelementptr inbounds float* %tmp9745, i64 1
+  %tmp9747 = getelementptr inbounds float* %tmp9746, i64 1
+  %tmp9748 = getelementptr inbounds float* %tmp9747, i64 1
+  %tmp9749 = getelementptr inbounds float* %tmp9748, i64 1
+  %tmp9750 = getelementptr inbounds float* %tmp9749, i64 1
+  %tmp9751 = getelementptr inbounds float* %tmp9750, i64 1
+  %tmp9752 = getelementptr inbounds float* %tmp9751, i64 1
+  %tmp9753 = getelementptr inbounds float* %tmp9752, i64 1
+  %tmp9754 = getelementptr inbounds float* %tmp9753, i64 1
+  %tmp9755 = getelementptr inbounds float* %tmp9754, i64 1
+  %tmp9756 = getelementptr inbounds float* %tmp9755, i64 1
+  %tmp9757 = getelementptr inbounds float* %tmp9756, i64 1
+  %tmp9758 = getelementptr inbounds float* %tmp9757, i64 1
+  %tmp9759 = getelementptr inbounds float* %tmp9758, i64 1
+  %tmp9760 = getelementptr inbounds float* %tmp9759, i64 1
+  %tmp9761 = getelementptr inbounds float* %tmp9760, i64 1
+  %tmp9762 = getelementptr inbounds float* %tmp9761, i64 1
+  %tmp9763 = getelementptr inbounds float* %tmp9762, i64 1
+  %tmp9764 = getelementptr inbounds float* %tmp9763, i64 1
+  %tmp9765 = getelementptr inbounds float* %tmp9764, i64 1
+  %tmp9766 = getelementptr inbounds float* %tmp9765, i64 1
+  %tmp9767 = getelementptr inbounds float* %tmp9766, i64 1
+  %tmp9768 = getelementptr inbounds float* %tmp9767, i64 1
+  %tmp9769 = getelementptr inbounds float* %tmp9768, i64 1
+  %tmp9770 = getelementptr inbounds float* %tmp9769, i64 1
+  %tmp9771 = getelementptr inbounds float* %tmp9770, i64 1
+  %tmp9772 = getelementptr inbounds float* %tmp9771, i64 1
+  %tmp9773 = getelementptr inbounds float* %tmp9772, i64 1
+  %tmp9774 = getelementptr inbounds float* %tmp9773, i64 1
+  %tmp9775 = getelementptr inbounds float* %tmp9774, i64 1
+  %tmp9776 = getelementptr inbounds float* %tmp9775, i64 1
+  %tmp9777 = getelementptr inbounds float* %tmp9776, i64 1
+  %tmp9778 = getelementptr inbounds float* %tmp9777, i64 1
+  %tmp9779 = getelementptr inbounds float* %tmp9778, i64 1
+  %tmp9780 = getelementptr inbounds float* %tmp9779, i64 1
+  %tmp9781 = getelementptr inbounds float* %tmp9780, i64 1
+  %tmp9782 = getelementptr inbounds float* %tmp9781, i64 1
+  %tmp9783 = getelementptr inbounds float* %tmp9782, i64 1
+  %tmp9784 = getelementptr inbounds float* %tmp9783, i64 1
+  %tmp9785 = getelementptr inbounds float* %tmp9784, i64 1
+  %tmp9786 = getelementptr inbounds float* %tmp9785, i64 1
+  %tmp9787 = getelementptr inbounds float* %tmp9786, i64 1
+  %tmp9788 = getelementptr inbounds float* %tmp9787, i64 1
+  %tmp9789 = getelementptr inbounds float* %tmp9788, i64 1
+  %tmp9790 = getelementptr inbounds float* %tmp9789, i64 1
+  %tmp9791 = getelementptr inbounds float* %tmp9790, i64 1
+  %tmp9792 = getelementptr inbounds float* %tmp9791, i64 1
+  %tmp9793 = getelementptr inbounds float* %tmp9792, i64 1
+  %tmp9794 = getelementptr inbounds float* %tmp9793, i64 1
+  %tmp9795 = getelementptr inbounds float* %tmp9794, i64 1
+  %tmp9796 = getelementptr inbounds float* %tmp9795, i64 1
+  %tmp9797 = getelementptr inbounds float* %tmp9796, i64 1
+  %tmp9798 = getelementptr inbounds float* %tmp9797, i64 1
+  %tmp9799 = getelementptr inbounds float* %tmp9798, i64 1
+  %tmp9800 = getelementptr inbounds float* %tmp9799, i64 1
+  %tmp9801 = getelementptr inbounds float* %tmp9800, i64 1
+  %tmp9802 = getelementptr inbounds float* %tmp9801, i64 1
+  %tmp9803 = getelementptr inbounds float* %tmp9802, i64 1
+  %tmp9804 = getelementptr inbounds float* %tmp9803, i64 1
+  %tmp9805 = getelementptr inbounds float* %tmp9804, i64 1
+  %tmp9806 = getelementptr inbounds float* %tmp9805, i64 1
+  %tmp9807 = getelementptr inbounds float* %tmp9806, i64 1
+  %tmp9808 = getelementptr inbounds float* %tmp9807, i64 1
+  %tmp9809 = getelementptr inbounds float* %tmp9808, i64 1
+  %tmp9810 = getelementptr inbounds float* %tmp9809, i64 1
+  %tmp9811 = getelementptr inbounds float* %tmp9810, i64 1
+  %tmp9812 = getelementptr inbounds float* %tmp9811, i64 1
+  %tmp9813 = getelementptr inbounds float* %tmp9812, i64 1
+  %tmp9814 = getelementptr inbounds float* %tmp9813, i64 1
+  %tmp9815 = getelementptr inbounds float* %tmp9814, i64 1
+  %tmp9816 = getelementptr inbounds float* %tmp9815, i64 1
+  %tmp9817 = getelementptr inbounds float* %tmp9816, i64 1
+  %tmp9818 = getelementptr inbounds float* %tmp9817, i64 1
+  %tmp9819 = getelementptr inbounds float* %tmp9818, i64 1
+  %tmp9820 = getelementptr inbounds float* %tmp9819, i64 1
+  %tmp9821 = getelementptr inbounds float* %tmp9820, i64 1
+  %tmp9822 = getelementptr inbounds float* %tmp9821, i64 1
+  %tmp9823 = getelementptr inbounds float* %tmp9822, i64 1
+  %tmp9824 = getelementptr inbounds float* %tmp9823, i64 1
+  %tmp9825 = getelementptr inbounds float* %tmp9824, i64 1
+  %tmp9826 = getelementptr inbounds float* %tmp9825, i64 1
+  %tmp9827 = getelementptr inbounds float* %tmp9826, i64 1
+  %tmp9828 = getelementptr inbounds float* %tmp9827, i64 1
+  %tmp9829 = getelementptr inbounds float* %tmp9828, i64 1
+  %tmp9830 = getelementptr inbounds float* %tmp9829, i64 1
+  %tmp9831 = getelementptr inbounds float* %tmp9830, i64 1
+  %tmp9832 = getelementptr inbounds float* %tmp9831, i64 1
+  %tmp9833 = getelementptr inbounds float* %tmp9832, i64 1
+  %tmp9834 = getelementptr inbounds float* %tmp9833, i64 1
+  %tmp9835 = getelementptr inbounds float* %tmp9834, i64 1
+  %tmp9836 = getelementptr inbounds float* %tmp9835, i64 1
+  %tmp9837 = getelementptr inbounds float* %tmp9836, i64 1
+  %tmp9838 = getelementptr inbounds float* %tmp9837, i64 1
+  %tmp9839 = getelementptr inbounds float* %tmp9838, i64 1
+  %tmp9840 = getelementptr inbounds float* %tmp9839, i64 1
+  %tmp9841 = getelementptr inbounds float* %tmp9840, i64 1
+  %tmp9842 = getelementptr inbounds float* %tmp9841, i64 1
+  %tmp9843 = getelementptr inbounds float* %tmp9842, i64 1
+  %tmp9844 = getelementptr inbounds float* %tmp9843, i64 1
+  %tmp9845 = getelementptr inbounds float* %tmp9844, i64 1
+  %tmp9846 = getelementptr inbounds float* %tmp9845, i64 1
+  %tmp9847 = getelementptr inbounds float* %tmp9846, i64 1
+  %tmp9848 = getelementptr inbounds float* %tmp9847, i64 1
+  %tmp9849 = getelementptr inbounds float* %tmp9848, i64 1
+  %tmp9850 = getelementptr inbounds float* %tmp9849, i64 1
+  %tmp9851 = getelementptr inbounds float* %tmp9850, i64 1
+  %tmp9852 = getelementptr inbounds float* %tmp9851, i64 1
+  %tmp9853 = getelementptr inbounds float* %tmp9852, i64 1
+  %tmp9854 = getelementptr inbounds float* %tmp9853, i64 1
+  %tmp9855 = getelementptr inbounds float* %tmp9854, i64 1
+  %tmp9856 = getelementptr inbounds float* %tmp9855, i64 1
+  %tmp9857 = getelementptr inbounds float* %tmp9856, i64 1
+  %tmp9858 = getelementptr inbounds float* %tmp9857, i64 1
+  %tmp9859 = getelementptr inbounds float* %tmp9858, i64 1
+  %tmp9860 = getelementptr inbounds float* %tmp9859, i64 1
+  %tmp9861 = getelementptr inbounds float* %tmp9860, i64 1
+  %tmp9862 = getelementptr inbounds float* %tmp9861, i64 1
+  %tmp9863 = getelementptr inbounds float* %tmp9862, i64 1
+  %tmp9864 = getelementptr inbounds float* %tmp9863, i64 1
+  %tmp9865 = getelementptr inbounds float* %tmp9864, i64 1
+  %tmp9866 = getelementptr inbounds float* %tmp9865, i64 1
+  %tmp9867 = getelementptr inbounds float* %tmp9866, i64 1
+  %tmp9868 = getelementptr inbounds float* %tmp9867, i64 1
+  %tmp9869 = getelementptr inbounds float* %tmp9868, i64 1
+  %tmp9870 = getelementptr inbounds float* %tmp9869, i64 1
+  %tmp9871 = getelementptr inbounds float* %tmp9870, i64 1
+  %tmp9872 = getelementptr inbounds float* %tmp9871, i64 1
+  %tmp9873 = getelementptr inbounds float* %tmp9872, i64 1
+  %tmp9874 = getelementptr inbounds float* %tmp9873, i64 1
+  %tmp9875 = getelementptr inbounds float* %tmp9874, i64 1
+  %tmp9876 = getelementptr inbounds float* %tmp9875, i64 1
+  %tmp9877 = getelementptr inbounds float* %tmp9876, i64 1
+  %tmp9878 = getelementptr inbounds float* %tmp9877, i64 1
+  %tmp9879 = getelementptr inbounds float* %tmp9878, i64 1
+  %tmp9880 = getelementptr inbounds float* %tmp9879, i64 1
+  %tmp9881 = getelementptr inbounds float* %tmp9880, i64 1
+  %tmp9882 = getelementptr inbounds float* %tmp9881, i64 1
+  %tmp9883 = getelementptr inbounds float* %tmp9882, i64 1
+  %tmp9884 = getelementptr inbounds float* %tmp9883, i64 1
+  %tmp9885 = getelementptr inbounds float* %tmp9884, i64 1
+  %tmp9886 = getelementptr inbounds float* %tmp9885, i64 1
+  %tmp9887 = getelementptr inbounds float* %tmp9886, i64 1
+  %tmp9888 = getelementptr inbounds float* %tmp9887, i64 1
+  %tmp9889 = getelementptr inbounds float* %tmp9888, i64 1
+  %tmp9890 = getelementptr inbounds float* %tmp9889, i64 1
+  %tmp9891 = getelementptr inbounds float* %tmp9890, i64 1
+  %tmp9892 = getelementptr inbounds float* %tmp9891, i64 1
+  %tmp9893 = getelementptr inbounds float* %tmp9892, i64 1
+  %tmp9894 = getelementptr inbounds float* %tmp9893, i64 1
+  %tmp9895 = getelementptr inbounds float* %tmp9894, i64 1
+  %tmp9896 = getelementptr inbounds float* %tmp9895, i64 1
+  %tmp9897 = getelementptr inbounds float* %tmp9896, i64 1
+  %tmp9898 = getelementptr inbounds float* %tmp9897, i64 1
+  %tmp9899 = getelementptr inbounds float* %tmp9898, i64 1
+  %tmp9900 = getelementptr inbounds float* %tmp9899, i64 1
+  %tmp9901 = getelementptr inbounds float* %tmp9900, i64 1
+  %tmp9902 = getelementptr inbounds float* %tmp9901, i64 1
+  %tmp9903 = getelementptr inbounds float* %tmp9902, i64 1
+  %tmp9904 = getelementptr inbounds float* %tmp9903, i64 1
+  %tmp9905 = getelementptr inbounds float* %tmp9904, i64 1
+  %tmp9906 = getelementptr inbounds float* %tmp9905, i64 1
+  %tmp9907 = getelementptr inbounds float* %tmp9906, i64 1
+  %tmp9908 = getelementptr inbounds float* %tmp9907, i64 1
+  %tmp9909 = getelementptr inbounds float* %tmp9908, i64 1
+  %tmp9910 = getelementptr inbounds float* %tmp9909, i64 1
+  %tmp9911 = getelementptr inbounds float* %tmp9910, i64 1
+  %tmp9912 = getelementptr inbounds float* %tmp9911, i64 1
+  %tmp9913 = getelementptr inbounds float* %tmp9912, i64 1
+  %tmp9914 = getelementptr inbounds float* %tmp9913, i64 1
+  %tmp9915 = getelementptr inbounds float* %tmp9914, i64 1
+  %tmp9916 = getelementptr inbounds float* %tmp9915, i64 1
+  %tmp9917 = getelementptr inbounds float* %tmp9916, i64 1
+  %tmp9918 = getelementptr inbounds float* %tmp9917, i64 1
+  %tmp9919 = getelementptr inbounds float* %tmp9918, i64 1
+  %tmp9920 = getelementptr inbounds float* %tmp9919, i64 1
+  %tmp9921 = getelementptr inbounds float* %tmp9920, i64 1
+  %tmp9922 = getelementptr inbounds float* %tmp9921, i64 1
+  %tmp9923 = getelementptr inbounds float* %tmp9922, i64 1
+  %tmp9924 = getelementptr inbounds float* %tmp9923, i64 1
+  %tmp9925 = getelementptr inbounds float* %tmp9924, i64 1
+  %tmp9926 = getelementptr inbounds float* %tmp9925, i64 1
+  %tmp9927 = getelementptr inbounds float* %tmp9926, i64 1
+  %tmp9928 = getelementptr inbounds float* %tmp9927, i64 1
+  %tmp9929 = getelementptr inbounds float* %tmp9928, i64 1
+  %tmp9930 = getelementptr inbounds float* %tmp9929, i64 1
+  %tmp9931 = getelementptr inbounds float* %tmp9930, i64 1
+  %tmp9932 = getelementptr inbounds float* %tmp9931, i64 1
+  %tmp9933 = getelementptr inbounds float* %tmp9932, i64 1
+  %tmp9934 = getelementptr inbounds float* %tmp9933, i64 1
+  %tmp9935 = getelementptr inbounds float* %tmp9934, i64 1
+  %tmp9936 = getelementptr inbounds float* %tmp9935, i64 1
+  %tmp9937 = getelementptr inbounds float* %tmp9936, i64 1
+  %tmp9938 = getelementptr inbounds float* %tmp9937, i64 1
+  %tmp9939 = getelementptr inbounds float* %tmp9938, i64 1
+  %tmp9940 = getelementptr inbounds float* %tmp9939, i64 1
+  %tmp9941 = getelementptr inbounds float* %tmp9940, i64 1
+  %tmp9942 = getelementptr inbounds float* %tmp9941, i64 1
+  %tmp9943 = getelementptr inbounds float* %tmp9942, i64 1
+  %tmp9944 = getelementptr inbounds float* %tmp9943, i64 1
+  %tmp9945 = getelementptr inbounds float* %tmp9944, i64 1
+  %tmp9946 = getelementptr inbounds float* %tmp9945, i64 1
+  %tmp9947 = getelementptr inbounds float* %tmp9946, i64 1
+  %tmp9948 = getelementptr inbounds float* %tmp9947, i64 1
+  %tmp9949 = getelementptr inbounds float* %tmp9948, i64 1
+  %tmp9950 = getelementptr inbounds float* %tmp9949, i64 1
+  %tmp9951 = getelementptr inbounds float* %tmp9950, i64 1
+  %tmp9952 = getelementptr inbounds float* %tmp9951, i64 1
+  %tmp9953 = getelementptr inbounds float* %tmp9952, i64 1
+  %tmp9954 = getelementptr inbounds float* %tmp9953, i64 1
+  %tmp9955 = getelementptr inbounds float* %tmp9954, i64 1
+  %tmp9956 = getelementptr inbounds float* %tmp9955, i64 1
+  %tmp9957 = getelementptr inbounds float* %tmp9956, i64 1
+  %tmp9958 = getelementptr inbounds float* %tmp9957, i64 1
+  %tmp9959 = getelementptr inbounds float* %tmp9958, i64 1
+  %tmp9960 = getelementptr inbounds float* %tmp9959, i64 1
+  %tmp9961 = getelementptr inbounds float* %tmp9960, i64 1
+  %tmp9962 = getelementptr inbounds float* %tmp9961, i64 1
+  %tmp9963 = getelementptr inbounds float* %tmp9962, i64 1
+  %tmp9964 = getelementptr inbounds float* %tmp9963, i64 1
+  %tmp9965 = getelementptr inbounds float* %tmp9964, i64 1
+  %tmp9966 = getelementptr inbounds float* %tmp9965, i64 1
+  %tmp9967 = getelementptr inbounds float* %tmp9966, i64 1
+  %tmp9968 = getelementptr inbounds float* %tmp9967, i64 1
+  %tmp9969 = getelementptr inbounds float* %tmp9968, i64 1
+  %tmp9970 = getelementptr inbounds float* %tmp9969, i64 1
+  %tmp9971 = getelementptr inbounds float* %tmp9970, i64 1
+  %tmp9972 = getelementptr inbounds float* %tmp9971, i64 1
+  %tmp9973 = getelementptr inbounds float* %tmp9972, i64 1
+  %tmp9974 = getelementptr inbounds float* %tmp9973, i64 1
+  %tmp9975 = getelementptr inbounds float* %tmp9974, i64 1
+  %tmp9976 = getelementptr inbounds float* %tmp9975, i64 1
+  %tmp9977 = getelementptr inbounds float* %tmp9976, i64 1
+  %tmp9978 = getelementptr inbounds float* %tmp9977, i64 1
+  %tmp9979 = getelementptr inbounds float* %tmp9978, i64 1
+  %tmp9980 = getelementptr inbounds float* %tmp9979, i64 1
+  %tmp9981 = getelementptr inbounds float* %tmp9980, i64 1
+  %tmp9982 = getelementptr inbounds float* %tmp9981, i64 1
+  %tmp9983 = getelementptr inbounds float* %tmp9982, i64 1
+  %tmp9984 = getelementptr inbounds float* %tmp9983, i64 1
+  %tmp9985 = getelementptr inbounds float* %tmp9984, i64 1
+  %tmp9986 = getelementptr inbounds float* %tmp9985, i64 1
+  %tmp9987 = getelementptr inbounds float* %tmp9986, i64 1
+  %tmp9988 = getelementptr inbounds float* %tmp9987, i64 1
+  %tmp9989 = getelementptr inbounds float* %tmp9988, i64 1
+  %tmp9990 = getelementptr inbounds float* %tmp9989, i64 1
+  %tmp9991 = getelementptr inbounds float* %tmp9990, i64 1
+  %tmp9992 = getelementptr inbounds float* %tmp9991, i64 1
+  %tmp9993 = getelementptr inbounds float* %tmp9992, i64 1
+  %tmp9994 = getelementptr inbounds float* %tmp9993, i64 1
+  %tmp9995 = getelementptr inbounds float* %tmp9994, i64 1
+  %tmp9996 = getelementptr inbounds float* %tmp9995, i64 1
+  %tmp9997 = getelementptr inbounds float* %tmp9996, i64 1
+  %tmp9998 = getelementptr inbounds float* %tmp9997, i64 1
+  %tmp9999 = getelementptr inbounds float* %tmp9998, i64 1
+  %tmp10000 = getelementptr inbounds float* %tmp9999, i64 1
+  %tmp10001 = getelementptr inbounds float* %tmp10000, i64 1
+  %tmp10002 = getelementptr inbounds float* %tmp10001, i64 1
+  %tmp10003 = getelementptr inbounds float* %tmp10002, i64 1
+  %tmp10004 = getelementptr inbounds float* %tmp10003, i64 1
+  %tmp10005 = getelementptr inbounds float* %tmp10004, i64 1
+  %tmp10006 = getelementptr inbounds float* %tmp10005, i64 1
+  %tmp10007 = getelementptr inbounds float* %tmp10006, i64 1
+  %tmp10008 = getelementptr inbounds float* %tmp10007, i64 1
+  %tmp10009 = getelementptr inbounds float* %tmp10008, i64 1
+  %tmp10010 = getelementptr inbounds float* %tmp10009, i64 1
+  %tmp10011 = getelementptr inbounds float* %tmp10010, i64 1
+  %tmp10012 = getelementptr inbounds float* %tmp10011, i64 1
+  %tmp10013 = getelementptr inbounds float* %tmp10012, i64 1
+  %tmp10014 = getelementptr inbounds float* %tmp10013, i64 1
+  %tmp10015 = getelementptr inbounds float* %tmp10014, i64 1
+  %tmp10016 = getelementptr inbounds float* %tmp10015, i64 1
+  %tmp10017 = getelementptr inbounds float* %tmp10016, i64 1
+  %tmp10018 = getelementptr inbounds float* %tmp10017, i64 1
+  %tmp10019 = getelementptr inbounds float* %tmp10018, i64 1
+  %tmp10020 = getelementptr inbounds float* %tmp10019, i64 1
+  %tmp10021 = getelementptr inbounds float* %tmp10020, i64 1
+  %tmp10022 = getelementptr inbounds float* %tmp10021, i64 1
+  %tmp10023 = getelementptr inbounds float* %tmp10022, i64 1
+  %tmp10024 = getelementptr inbounds float* %tmp10023, i64 1
+  %tmp10025 = getelementptr inbounds float* %tmp10024, i64 1
+  %tmp10026 = getelementptr inbounds float* %tmp10025, i64 1
+  %tmp10027 = getelementptr inbounds float* %tmp10026, i64 1
+  %tmp10028 = getelementptr inbounds float* %tmp10027, i64 1
+  %tmp10029 = getelementptr inbounds float* %tmp10028, i64 1
+  %tmp10030 = getelementptr inbounds float* %tmp10029, i64 1
+  %tmp10031 = getelementptr inbounds float* %tmp10030, i64 1
+  %tmp10032 = getelementptr inbounds float* %tmp10031, i64 1
+  %tmp10033 = getelementptr inbounds float* %tmp10032, i64 1
+  %tmp10034 = getelementptr inbounds float* %tmp10033, i64 1
+  %tmp10035 = getelementptr inbounds float* %tmp10034, i64 1
+  %tmp10036 = getelementptr inbounds float* %tmp10035, i64 1
+  %tmp10037 = getelementptr inbounds float* %tmp10036, i64 1
+  %tmp10038 = getelementptr inbounds float* %tmp10037, i64 1
+  %tmp10039 = getelementptr inbounds float* %tmp10038, i64 1
+  %tmp10040 = getelementptr inbounds float* %tmp10039, i64 1
+  %tmp10041 = getelementptr inbounds float* %tmp10040, i64 1
+  %tmp10042 = getelementptr inbounds float* %tmp10041, i64 1
+  %tmp10043 = getelementptr inbounds float* %tmp10042, i64 1
+  %tmp10044 = getelementptr inbounds float* %tmp10043, i64 1
+  %tmp10045 = getelementptr inbounds float* %tmp10044, i64 1
+  %tmp10046 = getelementptr inbounds float* %tmp10045, i64 1
+  %tmp10047 = getelementptr inbounds float* %tmp10046, i64 1
+  %tmp10048 = getelementptr inbounds float* %tmp10047, i64 1
+  %tmp10049 = getelementptr inbounds float* %tmp10048, i64 1
+  %tmp10050 = getelementptr inbounds float* %tmp10049, i64 1
+  %tmp10051 = getelementptr inbounds float* %tmp10050, i64 1
+  %tmp10052 = getelementptr inbounds float* %tmp10051, i64 1
+  %tmp10053 = getelementptr inbounds float* %tmp10052, i64 1
+  %tmp10054 = getelementptr inbounds float* %tmp10053, i64 1
+  %tmp10055 = getelementptr inbounds float* %tmp10054, i64 1
+  %tmp10056 = getelementptr inbounds float* %tmp10055, i64 1
+  %tmp10057 = getelementptr inbounds float* %tmp10056, i64 1
+  %tmp10058 = getelementptr inbounds float* %tmp10057, i64 1
+  %tmp10059 = getelementptr inbounds float* %tmp10058, i64 1
+  %tmp10060 = getelementptr inbounds float* %tmp10059, i64 1
+  %tmp10061 = getelementptr inbounds float* %tmp10060, i64 1
+  %tmp10062 = getelementptr inbounds float* %tmp10061, i64 1
+  %tmp10063 = getelementptr inbounds float* %tmp10062, i64 1
+  %tmp10064 = getelementptr inbounds float* %tmp10063, i64 1
+  %tmp10065 = getelementptr inbounds float* %tmp10064, i64 1
+  %tmp10066 = getelementptr inbounds float* %tmp10065, i64 1
+  %tmp10067 = getelementptr inbounds float* %tmp10066, i64 1
+  %tmp10068 = getelementptr inbounds float* %tmp10067, i64 1
+  %tmp10069 = getelementptr inbounds float* %tmp10068, i64 1
+  %tmp10070 = getelementptr inbounds float* %tmp10069, i64 1
+  %tmp10071 = getelementptr inbounds float* %tmp10070, i64 1
+  %tmp10072 = getelementptr inbounds float* %tmp10071, i64 1
+  %tmp10073 = getelementptr inbounds float* %tmp10072, i64 1
+  %tmp10074 = getelementptr inbounds float* %tmp10073, i64 1
+  %tmp10075 = getelementptr inbounds float* %tmp10074, i64 1
+  %tmp10076 = getelementptr inbounds float* %tmp10075, i64 1
+  %tmp10077 = getelementptr inbounds float* %tmp10076, i64 1
+  %tmp10078 = getelementptr inbounds float* %tmp10077, i64 1
+  %tmp10079 = getelementptr inbounds float* %tmp10078, i64 1
+  %tmp10080 = getelementptr inbounds float* %tmp10079, i64 1
+  %tmp10081 = getelementptr inbounds float* %tmp10080, i64 1
+  %tmp10082 = getelementptr inbounds float* %tmp10081, i64 1
+  %tmp10083 = getelementptr inbounds float* %tmp10082, i64 1
+  %tmp10084 = getelementptr inbounds float* %tmp10083, i64 1
+  %tmp10085 = getelementptr inbounds float* %tmp10084, i64 1
+  %tmp10086 = getelementptr inbounds float* %tmp10085, i64 1
+  %tmp10087 = getelementptr inbounds float* %tmp10086, i64 1
+  %tmp10088 = getelementptr inbounds float* %tmp10087, i64 1
+  %tmp10089 = getelementptr inbounds float* %tmp10088, i64 1
+  %tmp10090 = getelementptr inbounds float* %tmp10089, i64 1
+  %tmp10091 = getelementptr inbounds float* %tmp10090, i64 1
+  %tmp10092 = getelementptr inbounds float* %tmp10091, i64 1
+  %tmp10093 = getelementptr inbounds float* %tmp10092, i64 1
+  %tmp10094 = getelementptr inbounds float* %tmp10093, i64 1
+  %tmp10095 = getelementptr inbounds float* %tmp10094, i64 1
+  %tmp10096 = getelementptr inbounds float* %tmp10095, i64 1
+  %tmp10097 = getelementptr inbounds float* %tmp10096, i64 1
+  %tmp10098 = getelementptr inbounds float* %tmp10097, i64 1
+  %tmp10099 = getelementptr inbounds float* %tmp10098, i64 1
+  %tmp10100 = getelementptr inbounds float* %tmp10099, i64 1
+  %tmp10101 = getelementptr inbounds float* %tmp10100, i64 1
+  %tmp10102 = getelementptr inbounds float* %tmp10101, i64 1
+  %tmp10103 = getelementptr inbounds float* %tmp10102, i64 1
+  %tmp10104 = getelementptr inbounds float* %tmp10103, i64 1
+  %tmp10105 = getelementptr inbounds float* %tmp10104, i64 1
+  %tmp10106 = getelementptr inbounds float* %tmp10105, i64 1
+  %tmp10107 = getelementptr inbounds float* %tmp10106, i64 1
+  %tmp10108 = getelementptr inbounds float* %tmp10107, i64 1
+  %tmp10109 = getelementptr inbounds float* %tmp10108, i64 1
+  %tmp10110 = getelementptr inbounds float* %tmp10109, i64 1
+  %tmp10111 = getelementptr inbounds float* %tmp10110, i64 1
+  %tmp10112 = getelementptr inbounds float* %tmp10111, i64 1
+  %tmp10113 = getelementptr inbounds float* %tmp10112, i64 1
+  %tmp10114 = getelementptr inbounds float* %tmp10113, i64 1
+  %tmp10115 = getelementptr inbounds float* %tmp10114, i64 1
+  %tmp10116 = getelementptr inbounds float* %tmp10115, i64 1
+  %tmp10117 = getelementptr inbounds float* %tmp10116, i64 1
+  %tmp10118 = getelementptr inbounds float* %tmp10117, i64 1
+  %tmp10119 = getelementptr inbounds float* %tmp10118, i64 1
+  %tmp10120 = getelementptr inbounds float* %tmp10119, i64 1
+  %tmp10121 = getelementptr inbounds float* %tmp10120, i64 1
+  %tmp10122 = getelementptr inbounds float* %tmp10121, i64 1
+  %tmp10123 = getelementptr inbounds float* %tmp10122, i64 1
+  %tmp10124 = getelementptr inbounds float* %tmp10123, i64 1
+  %tmp10125 = getelementptr inbounds float* %tmp10124, i64 1
+  %tmp10126 = getelementptr inbounds float* %tmp10125, i64 1
+  %tmp10127 = getelementptr inbounds float* %tmp10126, i64 1
+  %tmp10128 = getelementptr inbounds float* %tmp10127, i64 1
+  %tmp10129 = getelementptr inbounds float* %tmp10128, i64 1
+  %tmp10130 = getelementptr inbounds float* %tmp10129, i64 1
+  %tmp10131 = getelementptr inbounds float* %tmp10130, i64 1
+  %tmp10132 = getelementptr inbounds float* %tmp10131, i64 1
+  %tmp10133 = getelementptr inbounds float* %tmp10132, i64 1
+  %tmp10134 = getelementptr inbounds float* %tmp10133, i64 1
+  %tmp10135 = getelementptr inbounds float* %tmp10134, i64 1
+  %tmp10136 = getelementptr inbounds float* %tmp10135, i64 1
+  %tmp10137 = getelementptr inbounds float* %tmp10136, i64 1
+  %tmp10138 = getelementptr inbounds float* %tmp10137, i64 1
+  %tmp10139 = getelementptr inbounds float* %tmp10138, i64 1
+  %tmp10140 = getelementptr inbounds float* %tmp10139, i64 1
+  %tmp10141 = getelementptr inbounds float* %tmp10140, i64 1
+  %tmp10142 = getelementptr inbounds float* %tmp10141, i64 1
+  %tmp10143 = getelementptr inbounds float* %tmp10142, i64 1
+  %tmp10144 = getelementptr inbounds float* %tmp10143, i64 1
+  %tmp10145 = getelementptr inbounds float* %tmp10144, i64 1
+  %tmp10146 = getelementptr inbounds float* %tmp10145, i64 1
+  %tmp10147 = getelementptr inbounds float* %tmp10146, i64 1
+  %tmp10148 = getelementptr inbounds float* %tmp10147, i64 1
+  %tmp10149 = getelementptr inbounds float* %tmp10148, i64 1
+  %tmp10150 = getelementptr inbounds float* %tmp10149, i64 1
+  %tmp10151 = getelementptr inbounds float* %tmp10150, i64 1
+  %tmp10152 = getelementptr inbounds float* %tmp10151, i64 1
+  %tmp10153 = getelementptr inbounds float* %tmp10152, i64 1
+  %tmp10154 = getelementptr inbounds float* %tmp10153, i64 1
+  %tmp10155 = getelementptr inbounds float* %tmp10154, i64 1
+  %tmp10156 = getelementptr inbounds float* %tmp10155, i64 1
+  %tmp10157 = getelementptr inbounds float* %tmp10156, i64 1
+  %tmp10158 = getelementptr inbounds float* %tmp10157, i64 1
+  %tmp10159 = getelementptr inbounds float* %tmp10158, i64 1
+  %tmp10160 = getelementptr inbounds float* %tmp10159, i64 1
+  %tmp10161 = getelementptr inbounds float* %tmp10160, i64 1
+  %tmp10162 = getelementptr inbounds float* %tmp10161, i64 1
+  %tmp10163 = getelementptr inbounds float* %tmp10162, i64 1
+  %tmp10164 = getelementptr inbounds float* %tmp10163, i64 1
+  %tmp10165 = getelementptr inbounds float* %tmp10164, i64 1
+  %tmp10166 = getelementptr inbounds float* %tmp10165, i64 1
+  %tmp10167 = getelementptr inbounds float* %tmp10166, i64 1
+  %tmp10168 = getelementptr inbounds float* %tmp10167, i64 1
+  %tmp10169 = getelementptr inbounds float* %tmp10168, i64 1
+  %tmp10170 = getelementptr inbounds float* %tmp10169, i64 1
+  %tmp10171 = getelementptr inbounds float* %tmp10170, i64 1
+  %tmp10172 = getelementptr inbounds float* %tmp10171, i64 1
+  %tmp10173 = getelementptr inbounds float* %tmp10172, i64 1
+  %tmp10174 = getelementptr inbounds float* %tmp10173, i64 1
+  %tmp10175 = getelementptr inbounds float* %tmp10174, i64 1
+  %tmp10176 = getelementptr inbounds float* %tmp10175, i64 1
+  %tmp10177 = getelementptr inbounds float* %tmp10176, i64 1
+  %tmp10178 = getelementptr inbounds float* %tmp10177, i64 1
+  %tmp10179 = getelementptr inbounds float* %tmp10178, i64 1
+  %tmp10180 = getelementptr inbounds float* %tmp10179, i64 1
+  %tmp10181 = getelementptr inbounds float* %tmp10180, i64 1
+  %tmp10182 = getelementptr inbounds float* %tmp10181, i64 1
+  %tmp10183 = getelementptr inbounds float* %tmp10182, i64 1
+  %tmp10184 = getelementptr inbounds float* %tmp10183, i64 1
+  %tmp10185 = getelementptr inbounds float* %tmp10184, i64 1
+  %tmp10186 = getelementptr inbounds float* %tmp10185, i64 1
+  %tmp10187 = getelementptr inbounds float* %tmp10186, i64 1
+  %tmp10188 = getelementptr inbounds float* %tmp10187, i64 1
+  %tmp10189 = getelementptr inbounds float* %tmp10188, i64 1
+  %tmp10190 = getelementptr inbounds float* %tmp10189, i64 1
+  %tmp10191 = getelementptr inbounds float* %tmp10190, i64 1
+  %tmp10192 = getelementptr inbounds float* %tmp10191, i64 1
+  %tmp10193 = getelementptr inbounds float* %tmp10192, i64 1
+  %tmp10194 = getelementptr inbounds float* %tmp10193, i64 1
+  %tmp10195 = getelementptr inbounds float* %tmp10194, i64 1
+  %tmp10196 = getelementptr inbounds float* %tmp10195, i64 1
+  %tmp10197 = getelementptr inbounds float* %tmp10196, i64 1
+  %tmp10198 = getelementptr inbounds float* %tmp10197, i64 1
+  %tmp10199 = getelementptr inbounds float* %tmp10198, i64 1
+  %tmp10200 = getelementptr inbounds float* %tmp10199, i64 1
+  %tmp10201 = getelementptr inbounds float* %tmp10200, i64 1
+  %tmp10202 = getelementptr inbounds float* %tmp10201, i64 1
+  %tmp10203 = getelementptr inbounds float* %tmp10202, i64 1
+  %tmp10204 = getelementptr inbounds float* %tmp10203, i64 1
+  %tmp10205 = getelementptr inbounds float* %tmp10204, i64 1
+  %tmp10206 = getelementptr inbounds float* %tmp10205, i64 1
+  %tmp10207 = getelementptr inbounds float* %tmp10206, i64 1
+  %tmp10208 = getelementptr inbounds float* %tmp10207, i64 1
+  %tmp10209 = getelementptr inbounds float* %tmp10208, i64 1
+  %tmp10210 = getelementptr inbounds float* %tmp10209, i64 1
+  %tmp10211 = getelementptr inbounds float* %tmp10210, i64 1
+  %tmp10212 = getelementptr inbounds float* %tmp10211, i64 1
+  %tmp10213 = getelementptr inbounds float* %tmp10212, i64 1
+  %tmp10214 = getelementptr inbounds float* %tmp10213, i64 1
+  %tmp10215 = getelementptr inbounds float* %tmp10214, i64 1
+  %tmp10216 = getelementptr inbounds float* %tmp10215, i64 1
+  %tmp10217 = getelementptr inbounds float* %tmp10216, i64 1
+  %tmp10218 = getelementptr inbounds float* %tmp10217, i64 1
+  %tmp10219 = getelementptr inbounds float* %tmp10218, i64 1
+  %tmp10220 = getelementptr inbounds float* %tmp10219, i64 1
+  %tmp10221 = getelementptr inbounds float* %tmp10220, i64 1
+  %tmp10222 = getelementptr inbounds float* %tmp10221, i64 1
+  %tmp10223 = getelementptr inbounds float* %tmp10222, i64 1
+  %tmp10224 = getelementptr inbounds float* %tmp10223, i64 1
+  %tmp10225 = getelementptr inbounds float* %tmp10224, i64 1
+  %tmp10226 = getelementptr inbounds float* %tmp10225, i64 1
+  %tmp10227 = getelementptr inbounds float* %tmp10226, i64 1
+  %tmp10228 = getelementptr inbounds float* %tmp10227, i64 1
+  %tmp10229 = getelementptr inbounds float* %tmp10228, i64 1
+  %tmp10230 = getelementptr inbounds float* %tmp10229, i64 1
+  %tmp10231 = getelementptr inbounds float* %tmp10230, i64 1
+  %tmp10232 = getelementptr inbounds float* %tmp10231, i64 1
+  %tmp10233 = getelementptr inbounds float* %tmp10232, i64 1
+  %tmp10234 = getelementptr inbounds float* %tmp10233, i64 1
+  %tmp10235 = getelementptr inbounds float* %tmp10234, i64 1
+  %tmp10236 = getelementptr inbounds float* %tmp10235, i64 1
+  %tmp10237 = getelementptr inbounds float* %tmp10236, i64 1
+  %tmp10238 = getelementptr inbounds float* %tmp10237, i64 1
+  %tmp10239 = getelementptr inbounds float* %tmp10238, i64 1
+  %tmp10240 = getelementptr inbounds float* %tmp10239, i64 1
+  %tmp10241 = getelementptr inbounds float* %tmp10240, i64 1
+  %tmp10242 = getelementptr inbounds float* %tmp10241, i64 1
+  %tmp10243 = getelementptr inbounds float* %tmp10242, i64 1
+  %tmp10244 = getelementptr inbounds float* %tmp10243, i64 1
+  %tmp10245 = getelementptr inbounds float* %tmp10244, i64 1
+  %tmp10246 = getelementptr inbounds float* %tmp10245, i64 1
+  %tmp10247 = getelementptr inbounds float* %tmp10246, i64 1
+  %tmp10248 = getelementptr inbounds float* %tmp10247, i64 1
+  %tmp10249 = getelementptr inbounds float* %tmp10248, i64 1
+  %tmp10250 = getelementptr inbounds float* %tmp10249, i64 1
+  %tmp10251 = getelementptr inbounds float* %tmp10250, i64 1
+  %tmp10252 = getelementptr inbounds float* %tmp10251, i64 1
+  %tmp10253 = getelementptr inbounds float* %tmp10252, i64 1
+  %tmp10254 = getelementptr inbounds float* %tmp10253, i64 1
+  %tmp10255 = getelementptr inbounds float* %tmp10254, i64 1
+  %tmp10256 = getelementptr inbounds float* %tmp10255, i64 1
+  %tmp10257 = getelementptr inbounds float* %tmp10256, i64 1
+  %tmp10258 = getelementptr inbounds float* %tmp10257, i64 1
+  %tmp10259 = getelementptr inbounds float* %tmp10258, i64 1
+  %tmp10260 = getelementptr inbounds float* %tmp10259, i64 1
+  %tmp10261 = getelementptr inbounds float* %tmp10260, i64 1
+  %tmp10262 = getelementptr inbounds float* %tmp10261, i64 1
+  %tmp10263 = getelementptr inbounds float* %tmp10262, i64 1
+  %tmp10264 = getelementptr inbounds float* %tmp10263, i64 1
+  %tmp10265 = getelementptr inbounds float* %tmp10264, i64 1
+  %tmp10266 = getelementptr inbounds float* %tmp10265, i64 1
+  %tmp10267 = getelementptr inbounds float* %tmp10266, i64 1
+  %tmp10268 = getelementptr inbounds float* %tmp10267, i64 1
+  %tmp10269 = getelementptr inbounds float* %tmp10268, i64 1
+  %tmp10270 = getelementptr inbounds float* %tmp10269, i64 1
+  %tmp10271 = getelementptr inbounds float* %tmp10270, i64 1
+  %tmp10272 = getelementptr inbounds float* %tmp10271, i64 1
+  %tmp10273 = getelementptr inbounds float* %tmp10272, i64 1
+  %tmp10274 = getelementptr inbounds float* %tmp10273, i64 1
+  %tmp10275 = getelementptr inbounds float* %tmp10274, i64 1
+  %tmp10276 = getelementptr inbounds float* %tmp10275, i64 1
+  %tmp10277 = getelementptr inbounds float* %tmp10276, i64 1
+  %tmp10278 = getelementptr inbounds float* %tmp10277, i64 1
+  %tmp10279 = getelementptr inbounds float* %tmp10278, i64 1
+  %tmp10280 = getelementptr inbounds float* %tmp10279, i64 1
+  %tmp10281 = getelementptr inbounds float* %tmp10280, i64 1
+  %tmp10282 = getelementptr inbounds float* %tmp10281, i64 1
+  %tmp10283 = getelementptr inbounds float* %tmp10282, i64 1
+  %tmp10284 = getelementptr inbounds float* %tmp10283, i64 1
+  %tmp10285 = getelementptr inbounds float* %tmp10284, i64 1
+  %tmp10286 = getelementptr inbounds float* %tmp10285, i64 1
+  %tmp10287 = getelementptr inbounds float* %tmp10286, i64 1
+  %tmp10288 = getelementptr inbounds float* %tmp10287, i64 1
+  %tmp10289 = getelementptr inbounds float* %tmp10288, i64 1
+  %tmp10290 = getelementptr inbounds float* %tmp10289, i64 1
+  %tmp10291 = getelementptr inbounds float* %tmp10290, i64 1
+  %tmp10292 = getelementptr inbounds float* %tmp10291, i64 1
+  %tmp10293 = getelementptr inbounds float* %tmp10292, i64 1
+  %tmp10294 = getelementptr inbounds float* %tmp10293, i64 1
+  %tmp10295 = getelementptr inbounds float* %tmp10294, i64 1
+  %tmp10296 = getelementptr inbounds float* %tmp10295, i64 1
+  %tmp10297 = getelementptr inbounds float* %tmp10296, i64 1
+  %tmp10298 = getelementptr inbounds float* %tmp10297, i64 1
+  %tmp10299 = getelementptr inbounds float* %tmp10298, i64 1
+  %tmp10300 = getelementptr inbounds float* %tmp10299, i64 1
+  %tmp10301 = getelementptr inbounds float* %tmp10300, i64 1
+  %tmp10302 = getelementptr inbounds float* %tmp10301, i64 1
+  %tmp10303 = getelementptr inbounds float* %tmp10302, i64 1
+  %tmp10304 = getelementptr inbounds float* %tmp10303, i64 1
+  %tmp10305 = getelementptr inbounds float* %tmp10304, i64 1
+  %tmp10306 = getelementptr inbounds float* %tmp10305, i64 1
+  %tmp10307 = getelementptr inbounds float* %tmp10306, i64 1
+  %tmp10308 = getelementptr inbounds float* %tmp10307, i64 1
+  %tmp10309 = getelementptr inbounds float* %tmp10308, i64 1
+  %tmp10310 = getelementptr inbounds float* %tmp10309, i64 1
+  %tmp10311 = getelementptr inbounds float* %tmp10310, i64 1
+  %tmp10312 = getelementptr inbounds float* %tmp10311, i64 1
+  %tmp10313 = getelementptr inbounds float* %tmp10312, i64 1
+  %tmp10314 = getelementptr inbounds float* %tmp10313, i64 1
+  %tmp10315 = getelementptr inbounds float* %tmp10314, i64 1
+  %tmp10316 = getelementptr inbounds float* %tmp10315, i64 1
+  %tmp10317 = getelementptr inbounds float* %tmp10316, i64 1
+  %tmp10318 = getelementptr inbounds float* %tmp10317, i64 1
+  %tmp10319 = getelementptr inbounds float* %tmp10318, i64 1
+  %tmp10320 = getelementptr inbounds float* %tmp10319, i64 1
+  %tmp10321 = getelementptr inbounds float* %tmp10320, i64 1
+  %tmp10322 = getelementptr inbounds float* %tmp10321, i64 1
+  %tmp10323 = getelementptr inbounds float* %tmp10322, i64 1
+  %tmp10324 = getelementptr inbounds float* %tmp10323, i64 1
+  %tmp10325 = getelementptr inbounds float* %tmp10324, i64 1
+  %tmp10326 = getelementptr inbounds float* %tmp10325, i64 1
+  %tmp10327 = getelementptr inbounds float* %tmp10326, i64 1
+  %tmp10328 = getelementptr inbounds float* %tmp10327, i64 1
+  %tmp10329 = getelementptr inbounds float* %tmp10328, i64 1
+  %tmp10330 = getelementptr inbounds float* %tmp10329, i64 1
+  %tmp10331 = getelementptr inbounds float* %tmp10330, i64 1
+  %tmp10332 = getelementptr inbounds float* %tmp10331, i64 1
+  %tmp10333 = getelementptr inbounds float* %tmp10332, i64 1
+  %tmp10334 = getelementptr inbounds float* %tmp10333, i64 1
+  %tmp10335 = getelementptr inbounds float* %tmp10334, i64 1
+  %tmp10336 = getelementptr inbounds float* %tmp10335, i64 1
+  %tmp10337 = getelementptr inbounds float* %tmp10336, i64 1
+  %tmp10338 = getelementptr inbounds float* %tmp10337, i64 1
+  %tmp10339 = getelementptr inbounds float* %tmp10338, i64 1
+  %tmp10340 = getelementptr inbounds float* %tmp10339, i64 1
+  %tmp10341 = getelementptr inbounds float* %tmp10340, i64 1
+  %tmp10342 = getelementptr inbounds float* %tmp10341, i64 1
+  %tmp10343 = getelementptr inbounds float* %tmp10342, i64 1
+  %tmp10344 = getelementptr inbounds float* %tmp10343, i64 1
+  %tmp10345 = getelementptr inbounds float* %tmp10344, i64 1
+  %tmp10346 = getelementptr inbounds float* %tmp10345, i64 1
+  %tmp10347 = getelementptr inbounds float* %tmp10346, i64 1
+  %tmp10348 = getelementptr inbounds float* %tmp10347, i64 1
+  %tmp10349 = getelementptr inbounds float* %tmp10348, i64 1
+  %tmp10350 = getelementptr inbounds float* %tmp10349, i64 1
+  %tmp10351 = getelementptr inbounds float* %tmp10350, i64 1
+  %tmp10352 = getelementptr inbounds float* %tmp10351, i64 1
+  %tmp10353 = getelementptr inbounds float* %tmp10352, i64 1
+  %tmp10354 = getelementptr inbounds float* %tmp10353, i64 1
+  %tmp10355 = getelementptr inbounds float* %tmp10354, i64 1
+  %tmp10356 = getelementptr inbounds float* %tmp10355, i64 1
+  %tmp10357 = getelementptr inbounds float* %tmp10356, i64 1
+  %tmp10358 = getelementptr inbounds float* %tmp10357, i64 1
+  %tmp10359 = getelementptr inbounds float* %tmp10358, i64 1
+  %tmp10360 = getelementptr inbounds float* %tmp10359, i64 1
+  %tmp10361 = getelementptr inbounds float* %tmp10360, i64 1
+  %tmp10362 = getelementptr inbounds float* %tmp10361, i64 1
+  %tmp10363 = getelementptr inbounds float* %tmp10362, i64 1
+  %tmp10364 = getelementptr inbounds float* %tmp10363, i64 1
+  %tmp10365 = getelementptr inbounds float* %tmp10364, i64 1
+  %tmp10366 = getelementptr inbounds float* %tmp10365, i64 1
+  %tmp10367 = getelementptr inbounds float* %tmp10366, i64 1
+  %tmp10368 = getelementptr inbounds float* %tmp10367, i64 1
+  %tmp10369 = getelementptr inbounds float* %tmp10368, i64 1
+  %tmp10370 = getelementptr inbounds float* %tmp10369, i64 1
+  %tmp10371 = getelementptr inbounds float* %tmp10370, i64 1
+  %tmp10372 = getelementptr inbounds float* %tmp10371, i64 1
+  %tmp10373 = getelementptr inbounds float* %tmp10372, i64 1
+  %tmp10374 = getelementptr inbounds float* %tmp10373, i64 1
+  %tmp10375 = getelementptr inbounds float* %tmp10374, i64 1
+  %tmp10376 = getelementptr inbounds float* %tmp10375, i64 1
+  %tmp10377 = getelementptr inbounds float* %tmp10376, i64 1
+  %tmp10378 = getelementptr inbounds float* %tmp10377, i64 1
+  %tmp10379 = getelementptr inbounds float* %tmp10378, i64 1
+  %tmp10380 = getelementptr inbounds float* %tmp10379, i64 1
+  %tmp10381 = getelementptr inbounds float* %tmp10380, i64 1
+  %tmp10382 = getelementptr inbounds float* %tmp10381, i64 1
+  %tmp10383 = getelementptr inbounds float* %tmp10382, i64 1
+  %tmp10384 = getelementptr inbounds float* %tmp10383, i64 1
+  %tmp10385 = getelementptr inbounds float* %tmp10384, i64 1
+  %tmp10386 = getelementptr inbounds float* %tmp10385, i64 1
+  %tmp10387 = getelementptr inbounds float* %tmp10386, i64 1
+  %tmp10388 = getelementptr inbounds float* %tmp10387, i64 1
+  %tmp10389 = getelementptr inbounds float* %tmp10388, i64 1
+  %tmp10390 = getelementptr inbounds float* %tmp10389, i64 1
+  %tmp10391 = getelementptr inbounds float* %tmp10390, i64 1
+  %tmp10392 = getelementptr inbounds float* %tmp10391, i64 1
+  %tmp10393 = getelementptr inbounds float* %tmp10392, i64 1
+  %tmp10394 = getelementptr inbounds float* %tmp10393, i64 1
+  %tmp10395 = getelementptr inbounds float* %tmp10394, i64 1
+  %tmp10396 = getelementptr inbounds float* %tmp10395, i64 1
+  %tmp10397 = getelementptr inbounds float* %tmp10396, i64 1
+  %tmp10398 = getelementptr inbounds float* %tmp10397, i64 1
+  %tmp10399 = getelementptr inbounds float* %tmp10398, i64 1
+  %tmp10400 = getelementptr inbounds float* %tmp10399, i64 1
+  %tmp10401 = getelementptr inbounds float* %tmp10400, i64 1
+  %tmp10402 = getelementptr inbounds float* %tmp10401, i64 1
+  %tmp10403 = getelementptr inbounds float* %tmp10402, i64 1
+  %tmp10404 = getelementptr inbounds float* %tmp10403, i64 1
+  %tmp10405 = getelementptr inbounds float* %tmp10404, i64 1
+  %tmp10406 = getelementptr inbounds float* %tmp10405, i64 1
+  %tmp10407 = getelementptr inbounds float* %tmp10406, i64 1
+  %tmp10408 = getelementptr inbounds float* %tmp10407, i64 1
+  %tmp10409 = getelementptr inbounds float* %tmp10408, i64 1
+  %tmp10410 = getelementptr inbounds float* %tmp10409, i64 1
+  %tmp10411 = getelementptr inbounds float* %tmp10410, i64 1
+  %tmp10412 = getelementptr inbounds float* %tmp10411, i64 1
+  %tmp10413 = getelementptr inbounds float* %tmp10412, i64 1
+  %tmp10414 = getelementptr inbounds float* %tmp10413, i64 1
+  %tmp10415 = getelementptr inbounds float* %tmp10414, i64 1
+  %tmp10416 = getelementptr inbounds float* %tmp10415, i64 1
+  %tmp10417 = getelementptr inbounds float* %tmp10416, i64 1
+  %tmp10418 = getelementptr inbounds float* %tmp10417, i64 1
+  %tmp10419 = getelementptr inbounds float* %tmp10418, i64 1
+  %tmp10420 = getelementptr inbounds float* %tmp10419, i64 1
+  %tmp10421 = getelementptr inbounds float* %tmp10420, i64 1
+  %tmp10422 = getelementptr inbounds float* %tmp10421, i64 1
+  %tmp10423 = getelementptr inbounds float* %tmp10422, i64 1
+  %tmp10424 = getelementptr inbounds float* %tmp10423, i64 1
+  %tmp10425 = getelementptr inbounds float* %tmp10424, i64 1
+  %tmp10426 = getelementptr inbounds float* %tmp10425, i64 1
+  %tmp10427 = getelementptr inbounds float* %tmp10426, i64 1
+  %tmp10428 = getelementptr inbounds float* %tmp10427, i64 1
+  %tmp10429 = getelementptr inbounds float* %tmp10428, i64 1
+  %tmp10430 = getelementptr inbounds float* %tmp10429, i64 1
+  %tmp10431 = getelementptr inbounds float* %tmp10430, i64 1
+  %tmp10432 = getelementptr inbounds float* %tmp10431, i64 1
+  %tmp10433 = getelementptr inbounds float* %tmp10432, i64 1
+  %tmp10434 = getelementptr inbounds float* %tmp10433, i64 1
+  %tmp10435 = getelementptr inbounds float* %tmp10434, i64 1
+  %tmp10436 = getelementptr inbounds float* %tmp10435, i64 1
+  %tmp10437 = getelementptr inbounds float* %tmp10436, i64 1
+  %tmp10438 = getelementptr inbounds float* %tmp10437, i64 1
+  %tmp10439 = getelementptr inbounds float* %tmp10438, i64 1
+  %tmp10440 = getelementptr inbounds float* %tmp10439, i64 1
+  %tmp10441 = getelementptr inbounds float* %tmp10440, i64 1
+  %tmp10442 = getelementptr inbounds float* %tmp10441, i64 1
+  %tmp10443 = getelementptr inbounds float* %tmp10442, i64 1
+  %tmp10444 = getelementptr inbounds float* %tmp10443, i64 1
+  %tmp10445 = getelementptr inbounds float* %tmp10444, i64 1
+  %tmp10446 = getelementptr inbounds float* %tmp10445, i64 1
+  %tmp10447 = getelementptr inbounds float* %tmp10446, i64 1
+  %tmp10448 = getelementptr inbounds float* %tmp10447, i64 1
+  %tmp10449 = getelementptr inbounds float* %tmp10448, i64 1
+  %tmp10450 = getelementptr inbounds float* %tmp10449, i64 1
+  %tmp10451 = getelementptr inbounds float* %tmp10450, i64 1
+  %tmp10452 = getelementptr inbounds float* %tmp10451, i64 1
+  %tmp10453 = getelementptr inbounds float* %tmp10452, i64 1
+  %tmp10454 = getelementptr inbounds float* %tmp10453, i64 1
+  %tmp10455 = getelementptr inbounds float* %tmp10454, i64 1
+  %tmp10456 = getelementptr inbounds float* %tmp10455, i64 1
+  %tmp10457 = getelementptr inbounds float* %tmp10456, i64 1
+  %tmp10458 = getelementptr inbounds float* %tmp10457, i64 1
+  %tmp10459 = getelementptr inbounds float* %tmp10458, i64 1
+  %tmp10460 = getelementptr inbounds float* %tmp10459, i64 1
+  %tmp10461 = getelementptr inbounds float* %tmp10460, i64 1
+  %tmp10462 = getelementptr inbounds float* %tmp10461, i64 1
+  %tmp10463 = getelementptr inbounds float* %tmp10462, i64 1
+  %tmp10464 = getelementptr inbounds float* %tmp10463, i64 1
+  %tmp10465 = getelementptr inbounds float* %tmp10464, i64 1
+  %tmp10466 = getelementptr inbounds float* %tmp10465, i64 1
+  %tmp10467 = getelementptr inbounds float* %tmp10466, i64 1
+  %tmp10468 = getelementptr inbounds float* %tmp10467, i64 1
+  %tmp10469 = getelementptr inbounds float* %tmp10468, i64 1
+  %tmp10470 = getelementptr inbounds float* %tmp10469, i64 1
+  %tmp10471 = getelementptr inbounds float* %tmp10470, i64 1
+  %tmp10472 = getelementptr inbounds float* %tmp10471, i64 1
+  %tmp10473 = getelementptr inbounds float* %tmp10472, i64 1
+  %tmp10474 = getelementptr inbounds float* %tmp10473, i64 1
+  %tmp10475 = getelementptr inbounds float* %tmp10474, i64 1
+  %tmp10476 = getelementptr inbounds float* %tmp10475, i64 1
+  %tmp10477 = getelementptr inbounds float* %tmp10476, i64 1
+  %tmp10478 = getelementptr inbounds float* %tmp10477, i64 1
+  %tmp10479 = getelementptr inbounds float* %tmp10478, i64 1
+  %tmp10480 = getelementptr inbounds float* %tmp10479, i64 1
+  %tmp10481 = getelementptr inbounds float* %tmp10480, i64 1
+  %tmp10482 = getelementptr inbounds float* %tmp10481, i64 1
+  %tmp10483 = getelementptr inbounds float* %tmp10482, i64 1
+  %tmp10484 = getelementptr inbounds float* %tmp10483, i64 1
+  %tmp10485 = getelementptr inbounds float* %tmp10484, i64 1
+  %tmp10486 = getelementptr inbounds float* %tmp10485, i64 1
+  %tmp10487 = getelementptr inbounds float* %tmp10486, i64 1
+  %tmp10488 = getelementptr inbounds float* %tmp10487, i64 1
+  %tmp10489 = getelementptr inbounds float* %tmp10488, i64 1
+  %tmp10490 = getelementptr inbounds float* %tmp10489, i64 1
+  %tmp10491 = getelementptr inbounds float* %tmp10490, i64 1
+  %tmp10492 = getelementptr inbounds float* %tmp10491, i64 1
+  %tmp10493 = getelementptr inbounds float* %tmp10492, i64 1
+  %tmp10494 = getelementptr inbounds float* %tmp10493, i64 1
+  %tmp10495 = getelementptr inbounds float* %tmp10494, i64 1
+  %tmp10496 = getelementptr inbounds float* %tmp10495, i64 1
+  %tmp10497 = getelementptr inbounds float* %tmp10496, i64 1
+  %tmp10498 = getelementptr inbounds float* %tmp10497, i64 1
+  %tmp10499 = getelementptr inbounds float* %tmp10498, i64 1
+  %tmp10500 = getelementptr inbounds float* %tmp10499, i64 1
+  %tmp10501 = getelementptr inbounds float* %tmp10500, i64 1
+  %tmp10502 = getelementptr inbounds float* %tmp10501, i64 1
+  %tmp10503 = getelementptr inbounds float* %tmp10502, i64 1
+  %tmp10504 = getelementptr inbounds float* %tmp10503, i64 1
+  %tmp10505 = getelementptr inbounds float* %tmp10504, i64 1
+  %tmp10506 = getelementptr inbounds float* %tmp10505, i64 1
+  %tmp10507 = getelementptr inbounds float* %tmp10506, i64 1
+  %tmp10508 = getelementptr inbounds float* %tmp10507, i64 1
+  %tmp10509 = getelementptr inbounds float* %tmp10508, i64 1
+  %tmp10510 = getelementptr inbounds float* %tmp10509, i64 1
+  %tmp10511 = getelementptr inbounds float* %tmp10510, i64 1
+  %tmp10512 = getelementptr inbounds float* %tmp10511, i64 1
+  %tmp10513 = getelementptr inbounds float* %tmp10512, i64 1
+  %tmp10514 = getelementptr inbounds float* %tmp10513, i64 1
+  %tmp10515 = getelementptr inbounds float* %tmp10514, i64 1
+  %tmp10516 = getelementptr inbounds float* %tmp10515, i64 1
+  %tmp10517 = getelementptr inbounds float* %tmp10516, i64 1
+  %tmp10518 = getelementptr inbounds float* %tmp10517, i64 1
+  %tmp10519 = getelementptr inbounds float* %tmp10518, i64 1
+  %tmp10520 = getelementptr inbounds float* %tmp10519, i64 1
+  %tmp10521 = getelementptr inbounds float* %tmp10520, i64 1
+  %tmp10522 = getelementptr inbounds float* %tmp10521, i64 1
+  %tmp10523 = getelementptr inbounds float* %tmp10522, i64 1
+  %tmp10524 = getelementptr inbounds float* %tmp10523, i64 1
+  %tmp10525 = getelementptr inbounds float* %tmp10524, i64 1
+  %tmp10526 = getelementptr inbounds float* %tmp10525, i64 1
+  %tmp10527 = getelementptr inbounds float* %tmp10526, i64 1
+  %tmp10528 = getelementptr inbounds float* %tmp10527, i64 1
+  %tmp10529 = getelementptr inbounds float* %tmp10528, i64 1
+  %tmp10530 = getelementptr inbounds float* %tmp10529, i64 1
+  %tmp10531 = getelementptr inbounds float* %tmp10530, i64 1
+  %tmp10532 = getelementptr inbounds float* %tmp10531, i64 1
+  %tmp10533 = getelementptr inbounds float* %tmp10532, i64 1
+  %tmp10534 = getelementptr inbounds float* %tmp10533, i64 1
+  %tmp10535 = getelementptr inbounds float* %tmp10534, i64 1
+  %tmp10536 = getelementptr inbounds float* %tmp10535, i64 1
+  %tmp10537 = getelementptr inbounds float* %tmp10536, i64 1
+  %tmp10538 = getelementptr inbounds float* %tmp10537, i64 1
+  %tmp10539 = getelementptr inbounds float* %tmp10538, i64 1
+  %tmp10540 = getelementptr inbounds float* %tmp10539, i64 1
+  %tmp10541 = getelementptr inbounds float* %tmp10540, i64 1
+  %tmp10542 = getelementptr inbounds float* %tmp10541, i64 1
+  %tmp10543 = getelementptr inbounds float* %tmp10542, i64 1
+  %tmp10544 = getelementptr inbounds float* %tmp10543, i64 1
+  %tmp10545 = getelementptr inbounds float* %tmp10544, i64 1
+  %tmp10546 = getelementptr inbounds float* %tmp10545, i64 1
+  %tmp10547 = getelementptr inbounds float* %tmp10546, i64 1
+  %tmp10548 = getelementptr inbounds float* %tmp10547, i64 1
+  %tmp10549 = getelementptr inbounds float* %tmp10548, i64 1
+  %tmp10550 = getelementptr inbounds float* %tmp10549, i64 1
+  %tmp10551 = getelementptr inbounds float* %tmp10550, i64 1
+  %tmp10552 = getelementptr inbounds float* %tmp10551, i64 1
+  %tmp10553 = getelementptr inbounds float* %tmp10552, i64 1
+  %tmp10554 = getelementptr inbounds float* %tmp10553, i64 1
+  %tmp10555 = getelementptr inbounds float* %tmp10554, i64 1
+  %tmp10556 = getelementptr inbounds float* %tmp10555, i64 1
+  %tmp10557 = getelementptr inbounds float* %tmp10556, i64 1
+  %tmp10558 = getelementptr inbounds float* %tmp10557, i64 1
+  %tmp10559 = getelementptr inbounds float* %tmp10558, i64 1
+  %tmp10560 = getelementptr inbounds float* %tmp10559, i64 1
+  %tmp10561 = getelementptr inbounds float* %tmp10560, i64 1
+  %tmp10562 = getelementptr inbounds float* %tmp10561, i64 1
+  %tmp10563 = getelementptr inbounds float* %tmp10562, i64 1
+  %tmp10564 = getelementptr inbounds float* %tmp10563, i64 1
+  %tmp10565 = getelementptr inbounds float* %tmp10564, i64 1
+  %tmp10566 = getelementptr inbounds float* %tmp10565, i64 1
+  %tmp10567 = getelementptr inbounds float* %tmp10566, i64 1
+  %tmp10568 = getelementptr inbounds float* %tmp10567, i64 1
+  %tmp10569 = getelementptr inbounds float* %tmp10568, i64 1
+  %tmp10570 = getelementptr inbounds float* %tmp10569, i64 1
+  %tmp10571 = getelementptr inbounds float* %tmp10570, i64 1
+  %tmp10572 = getelementptr inbounds float* %tmp10571, i64 1
+  %tmp10573 = getelementptr inbounds float* %tmp10572, i64 1
+  %tmp10574 = getelementptr inbounds float* %tmp10573, i64 1
+  %tmp10575 = getelementptr inbounds float* %tmp10574, i64 1
+  %tmp10576 = getelementptr inbounds float* %tmp10575, i64 1
+  %tmp10577 = getelementptr inbounds float* %tmp10576, i64 1
+  %tmp10578 = getelementptr inbounds float* %tmp10577, i64 1
+  %tmp10579 = getelementptr inbounds float* %tmp10578, i64 1
+  %tmp10580 = getelementptr inbounds float* %tmp10579, i64 1
+  %tmp10581 = getelementptr inbounds float* %tmp10580, i64 1
+  %tmp10582 = getelementptr inbounds float* %tmp10581, i64 1
+  %tmp10583 = getelementptr inbounds float* %tmp10582, i64 1
+  %tmp10584 = getelementptr inbounds float* %tmp10583, i64 1
+  %tmp10585 = getelementptr inbounds float* %tmp10584, i64 1
+  %tmp10586 = getelementptr inbounds float* %tmp10585, i64 1
+  %tmp10587 = getelementptr inbounds float* %tmp10586, i64 1
+  %tmp10588 = getelementptr inbounds float* %tmp10587, i64 1
+  %tmp10589 = getelementptr inbounds float* %tmp10588, i64 1
+  %tmp10590 = getelementptr inbounds float* %tmp10589, i64 1
+  %tmp10591 = getelementptr inbounds float* %tmp10590, i64 1
+  %tmp10592 = getelementptr inbounds float* %tmp10591, i64 1
+  %tmp10593 = getelementptr inbounds float* %tmp10592, i64 1
+  %tmp10594 = getelementptr inbounds float* %tmp10593, i64 1
+  %tmp10595 = getelementptr inbounds float* %tmp10594, i64 1
+  %tmp10596 = getelementptr inbounds float* %tmp10595, i64 1
+  %tmp10597 = getelementptr inbounds float* %tmp10596, i64 1
+  %tmp10598 = getelementptr inbounds float* %tmp10597, i64 1
+  %tmp10599 = getelementptr inbounds float* %tmp10598, i64 1
+  %tmp10600 = getelementptr inbounds float* %tmp10599, i64 1
+  %tmp10601 = getelementptr inbounds float* %tmp10600, i64 1
+  %tmp10602 = getelementptr inbounds float* %tmp10601, i64 1
+  %tmp10603 = getelementptr inbounds float* %tmp10602, i64 1
+  %tmp10604 = getelementptr inbounds float* %tmp10603, i64 1
+  %tmp10605 = getelementptr inbounds float* %tmp10604, i64 1
+  %tmp10606 = getelementptr inbounds float* %tmp10605, i64 1
+  %tmp10607 = getelementptr inbounds float* %tmp10606, i64 1
+  %tmp10608 = getelementptr inbounds float* %tmp10607, i64 1
+  %tmp10609 = getelementptr inbounds float* %tmp10608, i64 1
+  %tmp10610 = getelementptr inbounds float* %tmp10609, i64 1
+  %tmp10611 = getelementptr inbounds float* %tmp10610, i64 1
+  %tmp10612 = getelementptr inbounds float* %tmp10611, i64 1
+  %tmp10613 = getelementptr inbounds float* %tmp10612, i64 1
+  %tmp10614 = getelementptr inbounds float* %tmp10613, i64 1
+  %tmp10615 = getelementptr inbounds float* %tmp10614, i64 1
+  %tmp10616 = getelementptr inbounds float* %tmp10615, i64 1
+  %tmp10617 = getelementptr inbounds float* %tmp10616, i64 1
+  %tmp10618 = getelementptr inbounds float* %tmp10617, i64 1
+  %tmp10619 = getelementptr inbounds float* %tmp10618, i64 1
+  %tmp10620 = getelementptr inbounds float* %tmp10619, i64 1
+  %tmp10621 = getelementptr inbounds float* %tmp10620, i64 1
+  %tmp10622 = getelementptr inbounds float* %tmp10621, i64 1
+  %tmp10623 = getelementptr inbounds float* %tmp10622, i64 1
+  %tmp10624 = getelementptr inbounds float* %tmp10623, i64 1
+  %tmp10625 = getelementptr inbounds float* %tmp10624, i64 1
+  %tmp10626 = getelementptr inbounds float* %tmp10625, i64 1
+  %tmp10627 = getelementptr inbounds float* %tmp10626, i64 1
+  %tmp10628 = getelementptr inbounds float* %tmp10627, i64 1
+  %tmp10629 = getelementptr inbounds float* %tmp10628, i64 1
+  %tmp10630 = getelementptr inbounds float* %tmp10629, i64 1
+  %tmp10631 = getelementptr inbounds float* %tmp10630, i64 1
+  %tmp10632 = getelementptr inbounds float* %tmp10631, i64 1
+  %tmp10633 = getelementptr inbounds float* %tmp10632, i64 1
+  %tmp10634 = getelementptr inbounds float* %tmp10633, i64 1
+  %tmp10635 = getelementptr inbounds float* %tmp10634, i64 1
+  %tmp10636 = getelementptr inbounds float* %tmp10635, i64 1
+  %tmp10637 = getelementptr inbounds float* %tmp10636, i64 1
+  %tmp10638 = getelementptr inbounds float* %tmp10637, i64 1
+  %tmp10639 = getelementptr inbounds float* %tmp10638, i64 1
+  %tmp10640 = getelementptr inbounds float* %tmp10639, i64 1
+  %tmp10641 = getelementptr inbounds float* %tmp10640, i64 1
+  %tmp10642 = getelementptr inbounds float* %tmp10641, i64 1
+  %tmp10643 = getelementptr inbounds float* %tmp10642, i64 1
+  %tmp10644 = getelementptr inbounds float* %tmp10643, i64 1
+  %tmp10645 = getelementptr inbounds float* %tmp10644, i64 1
+  %tmp10646 = getelementptr inbounds float* %tmp10645, i64 1
+  %tmp10647 = getelementptr inbounds float* %tmp10646, i64 1
+  %tmp10648 = getelementptr inbounds float* %tmp10647, i64 1
+  %tmp10649 = getelementptr inbounds float* %tmp10648, i64 1
+  %tmp10650 = getelementptr inbounds float* %tmp10649, i64 1
+  %tmp10651 = getelementptr inbounds float* %tmp10650, i64 1
+  %tmp10652 = getelementptr inbounds float* %tmp10651, i64 1
+  %tmp10653 = getelementptr inbounds float* %tmp10652, i64 1
+  %tmp10654 = getelementptr inbounds float* %tmp10653, i64 1
+  %tmp10655 = getelementptr inbounds float* %tmp10654, i64 1
+  %tmp10656 = getelementptr inbounds float* %tmp10655, i64 1
+  %tmp10657 = getelementptr inbounds float* %tmp10656, i64 1
+  %tmp10658 = getelementptr inbounds float* %tmp10657, i64 1
+  %tmp10659 = getelementptr inbounds float* %tmp10658, i64 1
+  %tmp10660 = getelementptr inbounds float* %tmp10659, i64 1
+  %tmp10661 = getelementptr inbounds float* %tmp10660, i64 1
+  %tmp10662 = getelementptr inbounds float* %tmp10661, i64 1
+  %tmp10663 = getelementptr inbounds float* %tmp10662, i64 1
+  %tmp10664 = getelementptr inbounds float* %tmp10663, i64 1
+  %tmp10665 = getelementptr inbounds float* %tmp10664, i64 1
+  %tmp10666 = getelementptr inbounds float* %tmp10665, i64 1
+  %tmp10667 = getelementptr inbounds float* %tmp10666, i64 1
+  %tmp10668 = getelementptr inbounds float* %tmp10667, i64 1
+  %tmp10669 = getelementptr inbounds float* %tmp10668, i64 1
+  %tmp10670 = getelementptr inbounds float* %tmp10669, i64 1
+  %tmp10671 = getelementptr inbounds float* %tmp10670, i64 1
+  %tmp10672 = getelementptr inbounds float* %tmp10671, i64 1
+  %tmp10673 = getelementptr inbounds float* %tmp10672, i64 1
+  %tmp10674 = getelementptr inbounds float* %tmp10673, i64 1
+  %tmp10675 = getelementptr inbounds float* %tmp10674, i64 1
+  %tmp10676 = getelementptr inbounds float* %tmp10675, i64 1
+  %tmp10677 = getelementptr inbounds float* %tmp10676, i64 1
+  %tmp10678 = getelementptr inbounds float* %tmp10677, i64 1
+  %tmp10679 = getelementptr inbounds float* %tmp10678, i64 1
+  %tmp10680 = getelementptr inbounds float* %tmp10679, i64 1
+  %tmp10681 = getelementptr inbounds float* %tmp10680, i64 1
+  %tmp10682 = getelementptr inbounds float* %tmp10681, i64 1
+  %tmp10683 = getelementptr inbounds float* %tmp10682, i64 1
+  %tmp10684 = getelementptr inbounds float* %tmp10683, i64 1
+  %tmp10685 = getelementptr inbounds float* %tmp10684, i64 1
+  %tmp10686 = getelementptr inbounds float* %tmp10685, i64 1
+  %tmp10687 = getelementptr inbounds float* %tmp10686, i64 1
+  %tmp10688 = getelementptr inbounds float* %tmp10687, i64 1
+  %tmp10689 = getelementptr inbounds float* %tmp10688, i64 1
+  %tmp10690 = getelementptr inbounds float* %tmp10689, i64 1
+  %tmp10691 = getelementptr inbounds float* %tmp10690, i64 1
+  %tmp10692 = getelementptr inbounds float* %tmp10691, i64 1
+  %tmp10693 = getelementptr inbounds float* %tmp10692, i64 1
+  %tmp10694 = getelementptr inbounds float* %tmp10693, i64 1
+  %tmp10695 = getelementptr inbounds float* %tmp10694, i64 1
+  %tmp10696 = getelementptr inbounds float* %tmp10695, i64 1
+  %tmp10697 = getelementptr inbounds float* %tmp10696, i64 1
+  %tmp10698 = getelementptr inbounds float* %tmp10697, i64 1
+  %tmp10699 = getelementptr inbounds float* %tmp10698, i64 1
+  %tmp10700 = getelementptr inbounds float* %tmp10699, i64 1
+  %tmp10701 = getelementptr inbounds float* %tmp10700, i64 1
+  %tmp10702 = getelementptr inbounds float* %tmp10701, i64 1
+  %tmp10703 = getelementptr inbounds float* %tmp10702, i64 1
+  %tmp10704 = getelementptr inbounds float* %tmp10703, i64 1
+  %tmp10705 = getelementptr inbounds float* %tmp10704, i64 1
+  %tmp10706 = getelementptr inbounds float* %tmp10705, i64 1
+  %tmp10707 = getelementptr inbounds float* %tmp10706, i64 1
+  %tmp10708 = getelementptr inbounds float* %tmp10707, i64 1
+  %tmp10709 = getelementptr inbounds float* %tmp10708, i64 1
+  %tmp10710 = getelementptr inbounds float* %tmp10709, i64 1
+  %tmp10711 = getelementptr inbounds float* %tmp10710, i64 1
+  %tmp10712 = getelementptr inbounds float* %tmp10711, i64 1
+  %tmp10713 = getelementptr inbounds float* %tmp10712, i64 1
+  %tmp10714 = getelementptr inbounds float* %tmp10713, i64 1
+  %tmp10715 = getelementptr inbounds float* %tmp10714, i64 1
+  %tmp10716 = getelementptr inbounds float* %tmp10715, i64 1
+  %tmp10717 = getelementptr inbounds float* %tmp10716, i64 1
+  %tmp10718 = getelementptr inbounds float* %tmp10717, i64 1
+  %tmp10719 = getelementptr inbounds float* %tmp10718, i64 1
+  %tmp10720 = getelementptr inbounds float* %tmp10719, i64 1
+  %tmp10721 = getelementptr inbounds float* %tmp10720, i64 1
+  %tmp10722 = getelementptr inbounds float* %tmp10721, i64 1
+  %tmp10723 = getelementptr inbounds float* %tmp10722, i64 1
+  %tmp10724 = getelementptr inbounds float* %tmp10723, i64 1
+  %tmp10725 = getelementptr inbounds float* %tmp10724, i64 1
+  %tmp10726 = getelementptr inbounds float* %tmp10725, i64 1
+  %tmp10727 = getelementptr inbounds float* %tmp10726, i64 1
+  %tmp10728 = getelementptr inbounds float* %tmp10727, i64 1
+  %tmp10729 = getelementptr inbounds float* %tmp10728, i64 1
+  %tmp10730 = getelementptr inbounds float* %tmp10729, i64 1
+  %tmp10731 = getelementptr inbounds float* %tmp10730, i64 1
+  %tmp10732 = getelementptr inbounds float* %tmp10731, i64 1
+  %tmp10733 = getelementptr inbounds float* %tmp10732, i64 1
+  %tmp10734 = getelementptr inbounds float* %tmp10733, i64 1
+  %tmp10735 = getelementptr inbounds float* %tmp10734, i64 1
+  %tmp10736 = getelementptr inbounds float* %tmp10735, i64 1
+  %tmp10737 = getelementptr inbounds float* %tmp10736, i64 1
+  %tmp10738 = getelementptr inbounds float* %tmp10737, i64 1
+  %tmp10739 = getelementptr inbounds float* %tmp10738, i64 1
+  %tmp10740 = getelementptr inbounds float* %tmp10739, i64 1
+  %tmp10741 = getelementptr inbounds float* %tmp10740, i64 1
+  %tmp10742 = getelementptr inbounds float* %tmp10741, i64 1
+  %tmp10743 = getelementptr inbounds float* %tmp10742, i64 1
+  %tmp10744 = getelementptr inbounds float* %tmp10743, i64 1
+  %tmp10745 = getelementptr inbounds float* %tmp10744, i64 1
+  %tmp10746 = getelementptr inbounds float* %tmp10745, i64 1
+  %tmp10747 = getelementptr inbounds float* %tmp10746, i64 1
+  %tmp10748 = getelementptr inbounds float* %tmp10747, i64 1
+  %tmp10749 = getelementptr inbounds float* %tmp10748, i64 1
+  %tmp10750 = getelementptr inbounds float* %tmp10749, i64 1
+  %tmp10751 = getelementptr inbounds float* %tmp10750, i64 1
+  %tmp10752 = getelementptr inbounds float* %tmp10751, i64 1
+  %tmp10753 = getelementptr inbounds float* %tmp10752, i64 1
+  %tmp10754 = getelementptr inbounds float* %tmp10753, i64 1
+  %tmp10755 = getelementptr inbounds float* %tmp10754, i64 1
+  %tmp10756 = getelementptr inbounds float* %tmp10755, i64 1
+  %tmp10757 = getelementptr inbounds float* %tmp10756, i64 1
+  %tmp10758 = getelementptr inbounds float* %tmp10757, i64 1
+  %tmp10759 = getelementptr inbounds float* %tmp10758, i64 1
+  %tmp10760 = getelementptr inbounds float* %tmp10759, i64 1
+  %tmp10761 = getelementptr inbounds float* %tmp10760, i64 1
+  %tmp10762 = getelementptr inbounds float* %tmp10761, i64 1
+  %tmp10763 = getelementptr inbounds float* %tmp10762, i64 1
+  %tmp10764 = getelementptr inbounds float* %tmp10763, i64 1
+  %tmp10765 = getelementptr inbounds float* %tmp10764, i64 1
+  %tmp10766 = getelementptr inbounds float* %tmp10765, i64 1
+  %tmp10767 = getelementptr inbounds float* %tmp10766, i64 1
+  %tmp10768 = getelementptr inbounds float* %tmp10767, i64 1
+  %tmp10769 = getelementptr inbounds float* %tmp10768, i64 1
+  %tmp10770 = getelementptr inbounds float* %tmp10769, i64 1
+  %tmp10771 = getelementptr inbounds float* %tmp10770, i64 1
+  %tmp10772 = getelementptr inbounds float* %tmp10771, i64 1
+  %tmp10773 = getelementptr inbounds float* %tmp10772, i64 1
+  %tmp10774 = getelementptr inbounds float* %tmp10773, i64 1
+  %tmp10775 = getelementptr inbounds float* %tmp10774, i64 1
+  %tmp10776 = getelementptr inbounds float* %tmp10775, i64 1
+  %tmp10777 = getelementptr inbounds float* %tmp10776, i64 1
+  %tmp10778 = getelementptr inbounds float* %tmp10777, i64 1
+  %tmp10779 = getelementptr inbounds float* %tmp10778, i64 1
+  %tmp10780 = getelementptr inbounds float* %tmp10779, i64 1
+  %tmp10781 = getelementptr inbounds float* %tmp10780, i64 1
+  %tmp10782 = getelementptr inbounds float* %tmp10781, i64 1
+  %tmp10783 = getelementptr inbounds float* %tmp10782, i64 1
+  %tmp10784 = getelementptr inbounds float* %tmp10783, i64 1
+  %tmp10785 = getelementptr inbounds float* %tmp10784, i64 1
+  %tmp10786 = getelementptr inbounds float* %tmp10785, i64 1
+  %tmp10787 = getelementptr inbounds float* %tmp10786, i64 1
+  %tmp10788 = getelementptr inbounds float* %tmp10787, i64 1
+  %tmp10789 = getelementptr inbounds float* %tmp10788, i64 1
+  %tmp10790 = getelementptr inbounds float* %tmp10789, i64 1
+  %tmp10791 = getelementptr inbounds float* %tmp10790, i64 1
+  %tmp10792 = getelementptr inbounds float* %tmp10791, i64 1
+  %tmp10793 = getelementptr inbounds float* %tmp10792, i64 1
+  %tmp10794 = getelementptr inbounds float* %tmp10793, i64 1
+  %tmp10795 = getelementptr inbounds float* %tmp10794, i64 1
+  %tmp10796 = getelementptr inbounds float* %tmp10795, i64 1
+  %tmp10797 = getelementptr inbounds float* %tmp10796, i64 1
+  %tmp10798 = getelementptr inbounds float* %tmp10797, i64 1
+  %tmp10799 = getelementptr inbounds float* %tmp10798, i64 1
+  %tmp10800 = getelementptr inbounds float* %tmp10799, i64 1
+  %tmp10801 = getelementptr inbounds float* %tmp10800, i64 1
+  %tmp10802 = getelementptr inbounds float* %tmp10801, i64 1
+  %tmp10803 = getelementptr inbounds float* %tmp10802, i64 1
+  %tmp10804 = getelementptr inbounds float* %tmp10803, i64 1
+  %tmp10805 = getelementptr inbounds float* %tmp10804, i64 1
+  %tmp10806 = getelementptr inbounds float* %tmp10805, i64 1
+  %tmp10807 = getelementptr inbounds float* %tmp10806, i64 1
+  %tmp10808 = getelementptr inbounds float* %tmp10807, i64 1
+  %tmp10809 = getelementptr inbounds float* %tmp10808, i64 1
+  %tmp10810 = getelementptr inbounds float* %tmp10809, i64 1
+  %tmp10811 = getelementptr inbounds float* %tmp10810, i64 1
+  %tmp10812 = getelementptr inbounds float* %tmp10811, i64 1
+  %tmp10813 = getelementptr inbounds float* %tmp10812, i64 1
+  %tmp10814 = getelementptr inbounds float* %tmp10813, i64 1
+  %tmp10815 = getelementptr inbounds float* %tmp10814, i64 1
+  %tmp10816 = getelementptr inbounds float* %tmp10815, i64 1
+  %tmp10817 = getelementptr inbounds float* %tmp10816, i64 1
+  %tmp10818 = getelementptr inbounds float* %tmp10817, i64 1
+  %tmp10819 = getelementptr inbounds float* %tmp10818, i64 1
+  %tmp10820 = getelementptr inbounds float* %tmp10819, i64 1
+  %tmp10821 = getelementptr inbounds float* %tmp10820, i64 1
+  %tmp10822 = getelementptr inbounds float* %tmp10821, i64 1
+  %tmp10823 = getelementptr inbounds float* %tmp10822, i64 1
+  %tmp10824 = getelementptr inbounds float* %tmp10823, i64 1
+  %tmp10825 = getelementptr inbounds float* %tmp10824, i64 1
+  %tmp10826 = getelementptr inbounds float* %tmp10825, i64 1
+  %tmp10827 = getelementptr inbounds float* %tmp10826, i64 1
+  %tmp10828 = getelementptr inbounds float* %tmp10827, i64 1
+  %tmp10829 = getelementptr inbounds float* %tmp10828, i64 1
+  %tmp10830 = getelementptr inbounds float* %tmp10829, i64 1
+  %tmp10831 = getelementptr inbounds float* %tmp10830, i64 1
+  %tmp10832 = getelementptr inbounds float* %tmp10831, i64 1
+  %tmp10833 = getelementptr inbounds float* %tmp10832, i64 1
+  %tmp10834 = getelementptr inbounds float* %tmp10833, i64 1
+  %tmp10835 = getelementptr inbounds float* %tmp10834, i64 1
+  %tmp10836 = getelementptr inbounds float* %tmp10835, i64 1
+  %tmp10837 = getelementptr inbounds float* %tmp10836, i64 1
+  %tmp10838 = getelementptr inbounds float* %tmp10837, i64 1
+  %tmp10839 = getelementptr inbounds float* %tmp10838, i64 1
+  %tmp10840 = getelementptr inbounds float* %tmp10839, i64 1
+  %tmp10841 = getelementptr inbounds float* %tmp10840, i64 1
+  %tmp10842 = getelementptr inbounds float* %tmp10841, i64 1
+  %tmp10843 = getelementptr inbounds float* %tmp10842, i64 1
+  %tmp10844 = getelementptr inbounds float* %tmp10843, i64 1
+  %tmp10845 = getelementptr inbounds float* %tmp10844, i64 1
+  %tmp10846 = getelementptr inbounds float* %tmp10845, i64 1
+  %tmp10847 = getelementptr inbounds float* %tmp10846, i64 1
+  %tmp10848 = getelementptr inbounds float* %tmp10847, i64 1
+  %tmp10849 = getelementptr inbounds float* %tmp10848, i64 1
+  %tmp10850 = getelementptr inbounds float* %tmp10849, i64 1
+  %tmp10851 = getelementptr inbounds float* %tmp10850, i64 1
+  %tmp10852 = getelementptr inbounds float* %tmp10851, i64 1
+  %tmp10853 = getelementptr inbounds float* %tmp10852, i64 1
+  %tmp10854 = getelementptr inbounds float* %tmp10853, i64 1
+  %tmp10855 = getelementptr inbounds float* %tmp10854, i64 1
+  %tmp10856 = getelementptr inbounds float* %tmp10855, i64 1
+  %tmp10857 = getelementptr inbounds float* %tmp10856, i64 1
+  %tmp10858 = getelementptr inbounds float* %tmp10857, i64 1
+  %tmp10859 = getelementptr inbounds float* %tmp10858, i64 1
+  %tmp10860 = getelementptr inbounds float* %tmp10859, i64 1
+  %tmp10861 = getelementptr inbounds float* %tmp10860, i64 1
+  %tmp10862 = getelementptr inbounds float* %tmp10861, i64 1
+  %tmp10863 = getelementptr inbounds float* %tmp10862, i64 1
+  %tmp10864 = getelementptr inbounds float* %tmp10863, i64 1
+  %tmp10865 = getelementptr inbounds float* %tmp10864, i64 1
+  %tmp10866 = getelementptr inbounds float* %tmp10865, i64 1
+  %tmp10867 = getelementptr inbounds float* %tmp10866, i64 1
+  %tmp10868 = getelementptr inbounds float* %tmp10867, i64 1
+  %tmp10869 = getelementptr inbounds float* %tmp10868, i64 1
+  %tmp10870 = getelementptr inbounds float* %tmp10869, i64 1
+  %tmp10871 = getelementptr inbounds float* %tmp10870, i64 1
+  %tmp10872 = getelementptr inbounds float* %tmp10871, i64 1
+  %tmp10873 = getelementptr inbounds float* %tmp10872, i64 1
+  %tmp10874 = getelementptr inbounds float* %tmp10873, i64 1
+  %tmp10875 = getelementptr inbounds float* %tmp10874, i64 1
+  %tmp10876 = getelementptr inbounds float* %tmp10875, i64 1
+  %tmp10877 = getelementptr inbounds float* %tmp10876, i64 1
+  %tmp10878 = getelementptr inbounds float* %tmp10877, i64 1
+  %tmp10879 = getelementptr inbounds float* %tmp10878, i64 1
+  %tmp10880 = getelementptr inbounds float* %tmp10879, i64 1
+  %tmp10881 = getelementptr inbounds float* %tmp10880, i64 1
+  %tmp10882 = getelementptr inbounds float* %tmp10881, i64 1
+  %tmp10883 = getelementptr inbounds float* %tmp10882, i64 1
+  %tmp10884 = getelementptr inbounds float* %tmp10883, i64 1
+  %tmp10885 = getelementptr inbounds float* %tmp10884, i64 1
+  %tmp10886 = getelementptr inbounds float* %tmp10885, i64 1
+  %tmp10887 = getelementptr inbounds float* %tmp10886, i64 1
+  %tmp10888 = getelementptr inbounds float* %tmp10887, i64 1
+  %tmp10889 = getelementptr inbounds float* %tmp10888, i64 1
+  %tmp10890 = getelementptr inbounds float* %tmp10889, i64 1
+  %tmp10891 = getelementptr inbounds float* %tmp10890, i64 1
+  %tmp10892 = getelementptr inbounds float* %tmp10891, i64 1
+  %tmp10893 = getelementptr inbounds float* %tmp10892, i64 1
+  %tmp10894 = getelementptr inbounds float* %tmp10893, i64 1
+  %tmp10895 = getelementptr inbounds float* %tmp10894, i64 1
+  %tmp10896 = getelementptr inbounds float* %tmp10895, i64 1
+  %tmp10897 = getelementptr inbounds float* %tmp10896, i64 1
+  %tmp10898 = getelementptr inbounds float* %tmp10897, i64 1
+  %tmp10899 = getelementptr inbounds float* %tmp10898, i64 1
+  %tmp10900 = getelementptr inbounds float* %tmp10899, i64 1
+  %tmp10901 = getelementptr inbounds float* %tmp10900, i64 1
+  %tmp10902 = getelementptr inbounds float* %tmp10901, i64 1
+  %tmp10903 = getelementptr inbounds float* %tmp10902, i64 1
+  %tmp10904 = getelementptr inbounds float* %tmp10903, i64 1
+  %tmp10905 = getelementptr inbounds float* %tmp10904, i64 1
+  %tmp10906 = getelementptr inbounds float* %tmp10905, i64 1
+  %tmp10907 = getelementptr inbounds float* %tmp10906, i64 1
+  %tmp10908 = getelementptr inbounds float* %tmp10907, i64 1
+  %tmp10909 = getelementptr inbounds float* %tmp10908, i64 1
+  %tmp10910 = getelementptr inbounds float* %tmp10909, i64 1
+  %tmp10911 = getelementptr inbounds float* %tmp10910, i64 1
+  %tmp10912 = getelementptr inbounds float* %tmp10911, i64 1
+  %tmp10913 = getelementptr inbounds float* %tmp10912, i64 1
+  %tmp10914 = getelementptr inbounds float* %tmp10913, i64 1
+  %tmp10915 = getelementptr inbounds float* %tmp10914, i64 1
+  %tmp10916 = getelementptr inbounds float* %tmp10915, i64 1
+  %tmp10917 = getelementptr inbounds float* %tmp10916, i64 1
+  %tmp10918 = getelementptr inbounds float* %tmp10917, i64 1
+  %tmp10919 = getelementptr inbounds float* %tmp10918, i64 1
+  %tmp10920 = getelementptr inbounds float* %tmp10919, i64 1
+  %tmp10921 = getelementptr inbounds float* %tmp10920, i64 1
+  %tmp10922 = getelementptr inbounds float* %tmp10921, i64 1
+  %tmp10923 = getelementptr inbounds float* %tmp10922, i64 1
+  %tmp10924 = getelementptr inbounds float* %tmp10923, i64 1
+  %tmp10925 = getelementptr inbounds float* %tmp10924, i64 1
+  %tmp10926 = getelementptr inbounds float* %tmp10925, i64 1
+  %tmp10927 = getelementptr inbounds float* %tmp10926, i64 1
+  %tmp10928 = getelementptr inbounds float* %tmp10927, i64 1
+  %tmp10929 = getelementptr inbounds float* %tmp10928, i64 1
+  %tmp10930 = getelementptr inbounds float* %tmp10929, i64 1
+  %tmp10931 = getelementptr inbounds float* %tmp10930, i64 1
+  %tmp10932 = getelementptr inbounds float* %tmp10931, i64 1
+  %tmp10933 = getelementptr inbounds float* %tmp10932, i64 1
+  %tmp10934 = getelementptr inbounds float* %tmp10933, i64 1
+  %tmp10935 = getelementptr inbounds float* %tmp10934, i64 1
+  %tmp10936 = getelementptr inbounds float* %tmp10935, i64 1
+  %tmp10937 = getelementptr inbounds float* %tmp10936, i64 1
+  %tmp10938 = getelementptr inbounds float* %tmp10937, i64 1
+  %tmp10939 = getelementptr inbounds float* %tmp10938, i64 1
+  %tmp10940 = getelementptr inbounds float* %tmp10939, i64 1
+  %tmp10941 = getelementptr inbounds float* %tmp10940, i64 1
+  %tmp10942 = getelementptr inbounds float* %tmp10941, i64 1
+  %tmp10943 = getelementptr inbounds float* %tmp10942, i64 1
+  %tmp10944 = getelementptr inbounds float* %tmp10943, i64 1
+  %tmp10945 = getelementptr inbounds float* %tmp10944, i64 1
+  %tmp10946 = getelementptr inbounds float* %tmp10945, i64 1
+  %tmp10947 = getelementptr inbounds float* %tmp10946, i64 1
+  %tmp10948 = getelementptr inbounds float* %tmp10947, i64 1
+  %tmp10949 = getelementptr inbounds float* %tmp10948, i64 1
+  %tmp10950 = getelementptr inbounds float* %tmp10949, i64 1
+  %tmp10951 = getelementptr inbounds float* %tmp10950, i64 1
+  %tmp10952 = getelementptr inbounds float* %tmp10951, i64 1
+  %tmp10953 = getelementptr inbounds float* %tmp10952, i64 1
+  %tmp10954 = getelementptr inbounds float* %tmp10953, i64 1
+  %tmp10955 = getelementptr inbounds float* %tmp10954, i64 1
+  %tmp10956 = getelementptr inbounds float* %tmp10955, i64 1
+  %tmp10957 = getelementptr inbounds float* %tmp10956, i64 1
+  %tmp10958 = getelementptr inbounds float* %tmp10957, i64 1
+  %tmp10959 = getelementptr inbounds float* %tmp10958, i64 1
+  %tmp10960 = getelementptr inbounds float* %tmp10959, i64 1
+  %tmp10961 = getelementptr inbounds float* %tmp10960, i64 1
+  %tmp10962 = getelementptr inbounds float* %tmp10961, i64 1
+  %tmp10963 = getelementptr inbounds float* %tmp10962, i64 1
+  %tmp10964 = getelementptr inbounds float* %tmp10963, i64 1
+  %tmp10965 = getelementptr inbounds float* %tmp10964, i64 1
+  %tmp10966 = getelementptr inbounds float* %tmp10965, i64 1
+  %tmp10967 = getelementptr inbounds float* %tmp10966, i64 1
+  %tmp10968 = getelementptr inbounds float* %tmp10967, i64 1
+  %tmp10969 = getelementptr inbounds float* %tmp10968, i64 1
+  %tmp10970 = getelementptr inbounds float* %tmp10969, i64 1
+  %tmp10971 = getelementptr inbounds float* %tmp10970, i64 1
+  %tmp10972 = getelementptr inbounds float* %tmp10971, i64 1
+  %tmp10973 = getelementptr inbounds float* %tmp10972, i64 1
+  %tmp10974 = getelementptr inbounds float* %tmp10973, i64 1
+  %tmp10975 = getelementptr inbounds float* %tmp10974, i64 1
+  %tmp10976 = getelementptr inbounds float* %tmp10975, i64 1
+  %tmp10977 = getelementptr inbounds float* %tmp10976, i64 1
+  %tmp10978 = getelementptr inbounds float* %tmp10977, i64 1
+  %tmp10979 = getelementptr inbounds float* %tmp10978, i64 1
+  %tmp10980 = getelementptr inbounds float* %tmp10979, i64 1
+  %tmp10981 = getelementptr inbounds float* %tmp10980, i64 1
+  %tmp10982 = getelementptr inbounds float* %tmp10981, i64 1
+  %tmp10983 = getelementptr inbounds float* %tmp10982, i64 1
+  %tmp10984 = getelementptr inbounds float* %tmp10983, i64 1
+  %tmp10985 = getelementptr inbounds float* %tmp10984, i64 1
+  %tmp10986 = getelementptr inbounds float* %tmp10985, i64 1
+  %tmp10987 = getelementptr inbounds float* %tmp10986, i64 1
+  %tmp10988 = getelementptr inbounds float* %tmp10987, i64 1
+  %tmp10989 = getelementptr inbounds float* %tmp10988, i64 1
+  %tmp10990 = getelementptr inbounds float* %tmp10989, i64 1
+  %tmp10991 = getelementptr inbounds float* %tmp10990, i64 1
+  %tmp10992 = getelementptr inbounds float* %tmp10991, i64 1
+  %tmp10993 = getelementptr inbounds float* %tmp10992, i64 1
+  %tmp10994 = getelementptr inbounds float* %tmp10993, i64 1
+  %tmp10995 = getelementptr inbounds float* %tmp10994, i64 1
+  %tmp10996 = getelementptr inbounds float* %tmp10995, i64 1
+  %tmp10997 = getelementptr inbounds float* %tmp10996, i64 1
+  %tmp10998 = getelementptr inbounds float* %tmp10997, i64 1
+  %tmp10999 = getelementptr inbounds float* %tmp10998, i64 1
+  %tmp11000 = getelementptr inbounds float* %tmp10999, i64 1
+  %tmp11001 = getelementptr inbounds float* %tmp11000, i64 1
+  %tmp11002 = getelementptr inbounds float* %tmp11001, i64 1
+  %tmp11003 = getelementptr inbounds float* %tmp11002, i64 1
+  %tmp11004 = getelementptr inbounds float* %tmp11003, i64 1
+  %tmp11005 = getelementptr inbounds float* %tmp11004, i64 1
+  %tmp11006 = getelementptr inbounds float* %tmp11005, i64 1
+  %tmp11007 = getelementptr inbounds float* %tmp11006, i64 1
+  %tmp11008 = getelementptr inbounds float* %tmp11007, i64 1
+  %tmp11009 = getelementptr inbounds float* %tmp11008, i64 1
+  %tmp11010 = getelementptr inbounds float* %tmp11009, i64 1
+  %tmp11011 = getelementptr inbounds float* %tmp11010, i64 1
+  %tmp11012 = getelementptr inbounds float* %tmp11011, i64 1
+  %tmp11013 = getelementptr inbounds float* %tmp11012, i64 1
+  %tmp11014 = getelementptr inbounds float* %tmp11013, i64 1
+  %tmp11015 = getelementptr inbounds float* %tmp11014, i64 1
+  %tmp11016 = getelementptr inbounds float* %tmp11015, i64 1
+  %tmp11017 = getelementptr inbounds float* %tmp11016, i64 1
+  %tmp11018 = getelementptr inbounds float* %tmp11017, i64 1
+  %tmp11019 = getelementptr inbounds float* %tmp11018, i64 1
+  %tmp11020 = getelementptr inbounds float* %tmp11019, i64 1
+  %tmp11021 = getelementptr inbounds float* %tmp11020, i64 1
+  %tmp11022 = getelementptr inbounds float* %tmp11021, i64 1
+  %tmp11023 = getelementptr inbounds float* %tmp11022, i64 1
+  %tmp11024 = getelementptr inbounds float* %tmp11023, i64 1
+  %tmp11025 = getelementptr inbounds float* %tmp11024, i64 1
+  %tmp11026 = getelementptr inbounds float* %tmp11025, i64 1
+  %tmp11027 = getelementptr inbounds float* %tmp11026, i64 1
+  %tmp11028 = getelementptr inbounds float* %tmp11027, i64 1
+  %tmp11029 = getelementptr inbounds float* %tmp11028, i64 1
+  %tmp11030 = getelementptr inbounds float* %tmp11029, i64 1
+  %tmp11031 = getelementptr inbounds float* %tmp11030, i64 1
+  %tmp11032 = getelementptr inbounds float* %tmp11031, i64 1
+  %tmp11033 = getelementptr inbounds float* %tmp11032, i64 1
+  %tmp11034 = getelementptr inbounds float* %tmp11033, i64 1
+  %tmp11035 = getelementptr inbounds float* %tmp11034, i64 1
+  %tmp11036 = getelementptr inbounds float* %tmp11035, i64 1
+  %tmp11037 = getelementptr inbounds float* %tmp11036, i64 1
+  %tmp11038 = getelementptr inbounds float* %tmp11037, i64 1
+  %tmp11039 = getelementptr inbounds float* %tmp11038, i64 1
+  %tmp11040 = getelementptr inbounds float* %tmp11039, i64 1
+  %tmp11041 = getelementptr inbounds float* %tmp11040, i64 1
+  %tmp11042 = getelementptr inbounds float* %tmp11041, i64 1
+  %tmp11043 = getelementptr inbounds float* %tmp11042, i64 1
+  %tmp11044 = getelementptr inbounds float* %tmp11043, i64 1
+  %tmp11045 = getelementptr inbounds float* %tmp11044, i64 1
+  %tmp11046 = getelementptr inbounds float* %tmp11045, i64 1
+  %tmp11047 = getelementptr inbounds float* %tmp11046, i64 1
+  %tmp11048 = getelementptr inbounds float* %tmp11047, i64 1
+  %tmp11049 = getelementptr inbounds float* %tmp11048, i64 1
+  %tmp11050 = getelementptr inbounds float* %tmp11049, i64 1
+  %tmp11051 = getelementptr inbounds float* %tmp11050, i64 1
+  %tmp11052 = getelementptr inbounds float* %tmp11051, i64 1
+  %tmp11053 = getelementptr inbounds float* %tmp11052, i64 1
+  %tmp11054 = getelementptr inbounds float* %tmp11053, i64 1
+  %tmp11055 = getelementptr inbounds float* %tmp11054, i64 1
+  %tmp11056 = getelementptr inbounds float* %tmp11055, i64 1
+  %tmp11057 = getelementptr inbounds float* %tmp11056, i64 1
+  %tmp11058 = getelementptr inbounds float* %tmp11057, i64 1
+  %tmp11059 = getelementptr inbounds float* %tmp11058, i64 1
+  %tmp11060 = getelementptr inbounds float* %tmp11059, i64 1
+  %tmp11061 = getelementptr inbounds float* %tmp11060, i64 1
+  %tmp11062 = getelementptr inbounds float* %tmp11061, i64 1
+  %tmp11063 = getelementptr inbounds float* %tmp11062, i64 1
+  %tmp11064 = getelementptr inbounds float* %tmp11063, i64 1
+  %tmp11065 = getelementptr inbounds float* %tmp11064, i64 1
+  %tmp11066 = getelementptr inbounds float* %tmp11065, i64 1
+  %tmp11067 = getelementptr inbounds float* %tmp11066, i64 1
+  %tmp11068 = getelementptr inbounds float* %tmp11067, i64 1
+  %tmp11069 = getelementptr inbounds float* %tmp11068, i64 1
+  %tmp11070 = getelementptr inbounds float* %tmp11069, i64 1
+  %tmp11071 = getelementptr inbounds float* %tmp11070, i64 1
+  %tmp11072 = getelementptr inbounds float* %tmp11071, i64 1
+  %tmp11073 = getelementptr inbounds float* %tmp11072, i64 1
+  %tmp11074 = getelementptr inbounds float* %tmp11073, i64 1
+  %tmp11075 = getelementptr inbounds float* %tmp11074, i64 1
+  %tmp11076 = getelementptr inbounds float* %tmp11075, i64 1
+  %tmp11077 = getelementptr inbounds float* %tmp11076, i64 1
+  %tmp11078 = getelementptr inbounds float* %tmp11077, i64 1
+  %tmp11079 = getelementptr inbounds float* %tmp11078, i64 1
+  %tmp11080 = getelementptr inbounds float* %tmp11079, i64 1
+  %tmp11081 = getelementptr inbounds float* %tmp11080, i64 1
+  %tmp11082 = getelementptr inbounds float* %tmp11081, i64 1
+  %tmp11083 = getelementptr inbounds float* %tmp11082, i64 1
+  %tmp11084 = getelementptr inbounds float* %tmp11083, i64 1
+  %tmp11085 = getelementptr inbounds float* %tmp11084, i64 1
+  %tmp11086 = getelementptr inbounds float* %tmp11085, i64 1
+  %tmp11087 = getelementptr inbounds float* %tmp11086, i64 1
+  %tmp11088 = getelementptr inbounds float* %tmp11087, i64 1
+  %tmp11089 = getelementptr inbounds float* %tmp11088, i64 1
+  %tmp11090 = getelementptr inbounds float* %tmp11089, i64 1
+  %tmp11091 = getelementptr inbounds float* %tmp11090, i64 1
+  %tmp11092 = getelementptr inbounds float* %tmp11091, i64 1
+  %tmp11093 = getelementptr inbounds float* %tmp11092, i64 1
+  %tmp11094 = getelementptr inbounds float* %tmp11093, i64 1
+  %tmp11095 = getelementptr inbounds float* %tmp11094, i64 1
+  %tmp11096 = getelementptr inbounds float* %tmp11095, i64 1
+  %tmp11097 = getelementptr inbounds float* %tmp11096, i64 1
+  %tmp11098 = getelementptr inbounds float* %tmp11097, i64 1
+  %tmp11099 = getelementptr inbounds float* %tmp11098, i64 1
+  %tmp11100 = getelementptr inbounds float* %tmp11099, i64 1
+  %tmp11101 = getelementptr inbounds float* %tmp11100, i64 1
+  %tmp11102 = getelementptr inbounds float* %tmp11101, i64 1
+  %tmp11103 = getelementptr inbounds float* %tmp11102, i64 1
+  %tmp11104 = getelementptr inbounds float* %tmp11103, i64 1
+  %tmp11105 = getelementptr inbounds float* %tmp11104, i64 1
+  %tmp11106 = getelementptr inbounds float* %tmp11105, i64 1
+  %tmp11107 = getelementptr inbounds float* %tmp11106, i64 1
+  %tmp11108 = getelementptr inbounds float* %tmp11107, i64 1
+  %tmp11109 = getelementptr inbounds float* %tmp11108, i64 1
+  %tmp11110 = getelementptr inbounds float* %tmp11109, i64 1
+  %tmp11111 = getelementptr inbounds float* %tmp11110, i64 1
+  %tmp11112 = getelementptr inbounds float* %tmp11111, i64 1
+  %tmp11113 = getelementptr inbounds float* %tmp11112, i64 1
+  %tmp11114 = getelementptr inbounds float* %tmp11113, i64 1
+  %tmp11115 = getelementptr inbounds float* %tmp11114, i64 1
+  %tmp11116 = getelementptr inbounds float* %tmp11115, i64 1
+  %tmp11117 = getelementptr inbounds float* %tmp11116, i64 1
+  %tmp11118 = getelementptr inbounds float* %tmp11117, i64 1
+  %tmp11119 = getelementptr inbounds float* %tmp11118, i64 1
+  %tmp11120 = getelementptr inbounds float* %tmp11119, i64 1
+  %tmp11121 = getelementptr inbounds float* %tmp11120, i64 1
+  %tmp11122 = getelementptr inbounds float* %tmp11121, i64 1
+  %tmp11123 = getelementptr inbounds float* %tmp11122, i64 1
+  %tmp11124 = getelementptr inbounds float* %tmp11123, i64 1
+  %tmp11125 = getelementptr inbounds float* %tmp11124, i64 1
+  %tmp11126 = getelementptr inbounds float* %tmp11125, i64 1
+  %tmp11127 = getelementptr inbounds float* %tmp11126, i64 1
+  %tmp11128 = getelementptr inbounds float* %tmp11127, i64 1
+  %tmp11129 = getelementptr inbounds float* %tmp11128, i64 1
+  %tmp11130 = getelementptr inbounds float* %tmp11129, i64 1
+  %tmp11131 = getelementptr inbounds float* %tmp11130, i64 1
+  %tmp11132 = getelementptr inbounds float* %tmp11131, i64 1
+  %tmp11133 = getelementptr inbounds float* %tmp11132, i64 1
+  %tmp11134 = getelementptr inbounds float* %tmp11133, i64 1
+  %tmp11135 = getelementptr inbounds float* %tmp11134, i64 1
+  %tmp11136 = getelementptr inbounds float* %tmp11135, i64 1
+  %tmp11137 = getelementptr inbounds float* %tmp11136, i64 1
+  %tmp11138 = getelementptr inbounds float* %tmp11137, i64 1
+  %tmp11139 = getelementptr inbounds float* %tmp11138, i64 1
+  %tmp11140 = getelementptr inbounds float* %tmp11139, i64 1
+  %tmp11141 = getelementptr inbounds float* %tmp11140, i64 1
+  %tmp11142 = getelementptr inbounds float* %tmp11141, i64 1
+  %tmp11143 = getelementptr inbounds float* %tmp11142, i64 1
+  %tmp11144 = getelementptr inbounds float* %tmp11143, i64 1
+  %tmp11145 = getelementptr inbounds float* %tmp11144, i64 1
+  %tmp11146 = getelementptr inbounds float* %tmp11145, i64 1
+  %tmp11147 = getelementptr inbounds float* %tmp11146, i64 1
+  %tmp11148 = getelementptr inbounds float* %tmp11147, i64 1
+  %tmp11149 = getelementptr inbounds float* %tmp11148, i64 1
+  %tmp11150 = getelementptr inbounds float* %tmp11149, i64 1
+  %tmp11151 = getelementptr inbounds float* %tmp11150, i64 1
+  %tmp11152 = getelementptr inbounds float* %tmp11151, i64 1
+  %tmp11153 = getelementptr inbounds float* %tmp11152, i64 1
+  %tmp11154 = getelementptr inbounds float* %tmp11153, i64 1
+  %tmp11155 = getelementptr inbounds float* %tmp11154, i64 1
+  %tmp11156 = getelementptr inbounds float* %tmp11155, i64 1
+  %tmp11157 = getelementptr inbounds float* %tmp11156, i64 1
+  %tmp11158 = getelementptr inbounds float* %tmp11157, i64 1
+  %tmp11159 = getelementptr inbounds float* %tmp11158, i64 1
+  %tmp11160 = getelementptr inbounds float* %tmp11159, i64 1
+  %tmp11161 = getelementptr inbounds float* %tmp11160, i64 1
+  %tmp11162 = getelementptr inbounds float* %tmp11161, i64 1
+  %tmp11163 = getelementptr inbounds float* %tmp11162, i64 1
+  %tmp11164 = getelementptr inbounds float* %tmp11163, i64 1
+  %tmp11165 = getelementptr inbounds float* %tmp11164, i64 1
+  %tmp11166 = getelementptr inbounds float* %tmp11165, i64 1
+  %tmp11167 = getelementptr inbounds float* %tmp11166, i64 1
+  %tmp11168 = getelementptr inbounds float* %tmp11167, i64 1
+  %tmp11169 = getelementptr inbounds float* %tmp11168, i64 1
+  %tmp11170 = getelementptr inbounds float* %tmp11169, i64 1
+  %tmp11171 = getelementptr inbounds float* %tmp11170, i64 1
+  %tmp11172 = getelementptr inbounds float* %tmp11171, i64 1
+  %tmp11173 = getelementptr inbounds float* %tmp11172, i64 1
+  %tmp11174 = getelementptr inbounds float* %tmp11173, i64 1
+  %tmp11175 = getelementptr inbounds float* %tmp11174, i64 1
+  %tmp11176 = getelementptr inbounds float* %tmp11175, i64 1
+  %tmp11177 = getelementptr inbounds float* %tmp11176, i64 1
+  %tmp11178 = getelementptr inbounds float* %tmp11177, i64 1
+  %tmp11179 = getelementptr inbounds float* %tmp11178, i64 1
+  %tmp11180 = getelementptr inbounds float* %tmp11179, i64 1
+  %tmp11181 = getelementptr inbounds float* %tmp11180, i64 1
+  %tmp11182 = getelementptr inbounds float* %tmp11181, i64 1
+  %tmp11183 = getelementptr inbounds float* %tmp11182, i64 1
+  %tmp11184 = getelementptr inbounds float* %tmp11183, i64 1
+  %tmp11185 = getelementptr inbounds float* %tmp11184, i64 1
+  %tmp11186 = getelementptr inbounds float* %tmp11185, i64 1
+  %tmp11187 = getelementptr inbounds float* %tmp11186, i64 1
+  %tmp11188 = getelementptr inbounds float* %tmp11187, i64 1
+  %tmp11189 = getelementptr inbounds float* %tmp11188, i64 1
+  %tmp11190 = getelementptr inbounds float* %tmp11189, i64 1
+  %tmp11191 = getelementptr inbounds float* %tmp11190, i64 1
+  %tmp11192 = getelementptr inbounds float* %tmp11191, i64 1
+  %tmp11193 = getelementptr inbounds float* %tmp11192, i64 1
+  %tmp11194 = getelementptr inbounds float* %tmp11193, i64 1
+  %tmp11195 = getelementptr inbounds float* %tmp11194, i64 1
+  %tmp11196 = getelementptr inbounds float* %tmp11195, i64 1
+  %tmp11197 = getelementptr inbounds float* %tmp11196, i64 1
+  %tmp11198 = getelementptr inbounds float* %tmp11197, i64 1
+  %tmp11199 = getelementptr inbounds float* %tmp11198, i64 1
+  %tmp11200 = getelementptr inbounds float* %tmp11199, i64 1
+  %tmp11201 = getelementptr inbounds float* %tmp11200, i64 1
+  %tmp11202 = getelementptr inbounds float* %tmp11201, i64 1
+  %tmp11203 = getelementptr inbounds float* %tmp11202, i64 1
+  %tmp11204 = getelementptr inbounds float* %tmp11203, i64 1
+  %tmp11205 = getelementptr inbounds float* %tmp11204, i64 1
+  %tmp11206 = getelementptr inbounds float* %tmp11205, i64 1
+  %tmp11207 = getelementptr inbounds float* %tmp11206, i64 1
+  %tmp11208 = getelementptr inbounds float* %tmp11207, i64 1
+  %tmp11209 = getelementptr inbounds float* %tmp11208, i64 1
+  %tmp11210 = getelementptr inbounds float* %tmp11209, i64 1
+  %tmp11211 = getelementptr inbounds float* %tmp11210, i64 1
+  %tmp11212 = getelementptr inbounds float* %tmp11211, i64 1
+  %tmp11213 = getelementptr inbounds float* %tmp11212, i64 1
+  %tmp11214 = getelementptr inbounds float* %tmp11213, i64 1
+  %tmp11215 = getelementptr inbounds float* %tmp11214, i64 1
+  %tmp11216 = getelementptr inbounds float* %tmp11215, i64 1
+  %tmp11217 = getelementptr inbounds float* %tmp11216, i64 1
+  %tmp11218 = getelementptr inbounds float* %tmp11217, i64 1
+  %tmp11219 = getelementptr inbounds float* %tmp11218, i64 1
+  %tmp11220 = getelementptr inbounds float* %tmp11219, i64 1
+  %tmp11221 = getelementptr inbounds float* %tmp11220, i64 1
+  %tmp11222 = getelementptr inbounds float* %tmp11221, i64 1
+  %tmp11223 = getelementptr inbounds float* %tmp11222, i64 1
+  %tmp11224 = getelementptr inbounds float* %tmp11223, i64 1
+  %tmp11225 = getelementptr inbounds float* %tmp11224, i64 1
+  %tmp11226 = getelementptr inbounds float* %tmp11225, i64 1
+  %tmp11227 = getelementptr inbounds float* %tmp11226, i64 1
+  %tmp11228 = getelementptr inbounds float* %tmp11227, i64 1
+  %tmp11229 = getelementptr inbounds float* %tmp11228, i64 1
+  %tmp11230 = getelementptr inbounds float* %tmp11229, i64 1
+  %tmp11231 = getelementptr inbounds float* %tmp11230, i64 1
+  %tmp11232 = getelementptr inbounds float* %tmp11231, i64 1
+  %tmp11233 = getelementptr inbounds float* %tmp11232, i64 1
+  %tmp11234 = getelementptr inbounds float* %tmp11233, i64 1
+  %tmp11235 = getelementptr inbounds float* %tmp11234, i64 1
+  %tmp11236 = getelementptr inbounds float* %tmp11235, i64 1
+  %tmp11237 = getelementptr inbounds float* %tmp11236, i64 1
+  %tmp11238 = getelementptr inbounds float* %tmp11237, i64 1
+  %tmp11239 = getelementptr inbounds float* %tmp11238, i64 1
+  %tmp11240 = getelementptr inbounds float* %tmp11239, i64 1
+  %tmp11241 = getelementptr inbounds float* %tmp11240, i64 1
+  %tmp11242 = getelementptr inbounds float* %tmp11241, i64 1
+  %tmp11243 = getelementptr inbounds float* %tmp11242, i64 1
+  %tmp11244 = getelementptr inbounds float* %tmp11243, i64 1
+  %tmp11245 = getelementptr inbounds float* %tmp11244, i64 1
+  %tmp11246 = getelementptr inbounds float* %tmp11245, i64 1
+  %tmp11247 = getelementptr inbounds float* %tmp11246, i64 1
+  %tmp11248 = getelementptr inbounds float* %tmp11247, i64 1
+  %tmp11249 = getelementptr inbounds float* %tmp11248, i64 1
+  %tmp11250 = getelementptr inbounds float* %tmp11249, i64 1
+  %tmp11251 = getelementptr inbounds float* %tmp11250, i64 1
+  %tmp11252 = getelementptr inbounds float* %tmp11251, i64 1
+  %tmp11253 = getelementptr inbounds float* %tmp11252, i64 1
+  %tmp11254 = getelementptr inbounds float* %tmp11253, i64 1
+  %tmp11255 = getelementptr inbounds float* %tmp11254, i64 1
+  %tmp11256 = getelementptr inbounds float* %tmp11255, i64 1
+  %tmp11257 = getelementptr inbounds float* %tmp11256, i64 1
+  %tmp11258 = getelementptr inbounds float* %tmp11257, i64 1
+  %tmp11259 = getelementptr inbounds float* %tmp11258, i64 1
+  %tmp11260 = getelementptr inbounds float* %tmp11259, i64 1
+  %tmp11261 = getelementptr inbounds float* %tmp11260, i64 1
+  %tmp11262 = getelementptr inbounds float* %tmp11261, i64 1
+  %tmp11263 = getelementptr inbounds float* %tmp11262, i64 1
+  %tmp11264 = getelementptr inbounds float* %tmp11263, i64 1
+  %tmp11265 = getelementptr inbounds float* %tmp11264, i64 1
+  %tmp11266 = getelementptr inbounds float* %tmp11265, i64 1
+  %tmp11267 = getelementptr inbounds float* %tmp11266, i64 1
+  %tmp11268 = getelementptr inbounds float* %tmp11267, i64 1
+  %tmp11269 = getelementptr inbounds float* %tmp11268, i64 1
+  %tmp11270 = getelementptr inbounds float* %tmp11269, i64 1
+  %tmp11271 = getelementptr inbounds float* %tmp11270, i64 1
+  %tmp11272 = getelementptr inbounds float* %tmp11271, i64 1
+  %tmp11273 = getelementptr inbounds float* %tmp11272, i64 1
+  %tmp11274 = getelementptr inbounds float* %tmp11273, i64 1
+  %tmp11275 = getelementptr inbounds float* %tmp11274, i64 1
+  %tmp11276 = getelementptr inbounds float* %tmp11275, i64 1
+  %tmp11277 = getelementptr inbounds float* %tmp11276, i64 1
+  %tmp11278 = getelementptr inbounds float* %tmp11277, i64 1
+  %tmp11279 = getelementptr inbounds float* %tmp11278, i64 1
+  %tmp11280 = getelementptr inbounds float* %tmp11279, i64 1
+  %tmp11281 = getelementptr inbounds float* %tmp11280, i64 1
+  %tmp11282 = getelementptr inbounds float* %tmp11281, i64 1
+  %tmp11283 = getelementptr inbounds float* %tmp11282, i64 1
+  %tmp11284 = getelementptr inbounds float* %tmp11283, i64 1
+  %tmp11285 = getelementptr inbounds float* %tmp11284, i64 1
+  %tmp11286 = getelementptr inbounds float* %tmp11285, i64 1
+  %tmp11287 = getelementptr inbounds float* %tmp11286, i64 1
+  %tmp11288 = getelementptr inbounds float* %tmp11287, i64 1
+  %tmp11289 = getelementptr inbounds float* %tmp11288, i64 1
+  %tmp11290 = getelementptr inbounds float* %tmp11289, i64 1
+  %tmp11291 = getelementptr inbounds float* %tmp11290, i64 1
+  %tmp11292 = getelementptr inbounds float* %tmp11291, i64 1
+  %tmp11293 = getelementptr inbounds float* %tmp11292, i64 1
+  %tmp11294 = getelementptr inbounds float* %tmp11293, i64 1
+  %tmp11295 = getelementptr inbounds float* %tmp11294, i64 1
+  %tmp11296 = getelementptr inbounds float* %tmp11295, i64 1
+  %tmp11297 = getelementptr inbounds float* %tmp11296, i64 1
+  %tmp11298 = getelementptr inbounds float* %tmp11297, i64 1
+  %tmp11299 = getelementptr inbounds float* %tmp11298, i64 1
+  %tmp11300 = getelementptr inbounds float* %tmp11299, i64 1
+  %tmp11301 = getelementptr inbounds float* %tmp11300, i64 1
+  %tmp11302 = getelementptr inbounds float* %tmp11301, i64 1
+  %tmp11303 = getelementptr inbounds float* %tmp11302, i64 1
+  %tmp11304 = getelementptr inbounds float* %tmp11303, i64 1
+  %tmp11305 = getelementptr inbounds float* %tmp11304, i64 1
+  %tmp11306 = getelementptr inbounds float* %tmp11305, i64 1
+  %tmp11307 = getelementptr inbounds float* %tmp11306, i64 1
+  %tmp11308 = getelementptr inbounds float* %tmp11307, i64 1
+  %tmp11309 = getelementptr inbounds float* %tmp11308, i64 1
+  %tmp11310 = getelementptr inbounds float* %tmp11309, i64 1
+  %tmp11311 = getelementptr inbounds float* %tmp11310, i64 1
+  %tmp11312 = getelementptr inbounds float* %tmp11311, i64 1
+  %tmp11313 = getelementptr inbounds float* %tmp11312, i64 1
+  %tmp11314 = getelementptr inbounds float* %tmp11313, i64 1
+  %tmp11315 = getelementptr inbounds float* %tmp11314, i64 1
+  %tmp11316 = getelementptr inbounds float* %tmp11315, i64 1
+  %tmp11317 = getelementptr inbounds float* %tmp11316, i64 1
+  %tmp11318 = getelementptr inbounds float* %tmp11317, i64 1
+  %tmp11319 = getelementptr inbounds float* %tmp11318, i64 1
+  %tmp11320 = getelementptr inbounds float* %tmp11319, i64 1
+  %tmp11321 = getelementptr inbounds float* %tmp11320, i64 1
+  %tmp11322 = getelementptr inbounds float* %tmp11321, i64 1
+  %tmp11323 = getelementptr inbounds float* %tmp11322, i64 1
+  %tmp11324 = getelementptr inbounds float* %tmp11323, i64 1
+  %tmp11325 = getelementptr inbounds float* %tmp11324, i64 1
+  %tmp11326 = getelementptr inbounds float* %tmp11325, i64 1
+  %tmp11327 = getelementptr inbounds float* %tmp11326, i64 1
+  %tmp11328 = getelementptr inbounds float* %tmp11327, i64 1
+  %tmp11329 = getelementptr inbounds float* %tmp11328, i64 1
+  %tmp11330 = getelementptr inbounds float* %tmp11329, i64 1
+  %tmp11331 = getelementptr inbounds float* %tmp11330, i64 1
+  %tmp11332 = getelementptr inbounds float* %tmp11331, i64 1
+  %tmp11333 = getelementptr inbounds float* %tmp11332, i64 1
+  %tmp11334 = getelementptr inbounds float* %tmp11333, i64 1
+  %tmp11335 = getelementptr inbounds float* %tmp11334, i64 1
+  %tmp11336 = getelementptr inbounds float* %tmp11335, i64 1
+  %tmp11337 = getelementptr inbounds float* %tmp11336, i64 1
+  %tmp11338 = getelementptr inbounds float* %tmp11337, i64 1
+  %tmp11339 = getelementptr inbounds float* %tmp11338, i64 1
+  %tmp11340 = getelementptr inbounds float* %tmp11339, i64 1
+  %tmp11341 = getelementptr inbounds float* %tmp11340, i64 1
+  %tmp11342 = getelementptr inbounds float* %tmp11341, i64 1
+  %tmp11343 = getelementptr inbounds float* %tmp11342, i64 1
+  %tmp11344 = getelementptr inbounds float* %tmp11343, i64 1
+  %tmp11345 = getelementptr inbounds float* %tmp11344, i64 1
+  %tmp11346 = getelementptr inbounds float* %tmp11345, i64 1
+  %tmp11347 = getelementptr inbounds float* %tmp11346, i64 1
+  %tmp11348 = getelementptr inbounds float* %tmp11347, i64 1
+  %tmp11349 = getelementptr inbounds float* %tmp11348, i64 1
+  %tmp11350 = getelementptr inbounds float* %tmp11349, i64 1
+  %tmp11351 = getelementptr inbounds float* %tmp11350, i64 1
+  %tmp11352 = getelementptr inbounds float* %tmp11351, i64 1
+  %tmp11353 = getelementptr inbounds float* %tmp11352, i64 1
+  %tmp11354 = getelementptr inbounds float* %tmp11353, i64 1
+  %tmp11355 = getelementptr inbounds float* %tmp11354, i64 1
+  %tmp11356 = getelementptr inbounds float* %tmp11355, i64 1
+  %tmp11357 = getelementptr inbounds float* %tmp11356, i64 1
+  %tmp11358 = getelementptr inbounds float* %tmp11357, i64 1
+  %tmp11359 = getelementptr inbounds float* %tmp11358, i64 1
+  %tmp11360 = getelementptr inbounds float* %tmp11359, i64 1
+  %tmp11361 = getelementptr inbounds float* %tmp11360, i64 1
+  %tmp11362 = getelementptr inbounds float* %tmp11361, i64 1
+  %tmp11363 = getelementptr inbounds float* %tmp11362, i64 1
+  %tmp11364 = getelementptr inbounds float* %tmp11363, i64 1
+  %tmp11365 = getelementptr inbounds float* %tmp11364, i64 1
+  %tmp11366 = getelementptr inbounds float* %tmp11365, i64 1
+  %tmp11367 = getelementptr inbounds float* %tmp11366, i64 1
+  %tmp11368 = getelementptr inbounds float* %tmp11367, i64 1
+  %tmp11369 = getelementptr inbounds float* %tmp11368, i64 1
+  %tmp11370 = getelementptr inbounds float* %tmp11369, i64 1
+  %tmp11371 = getelementptr inbounds float* %tmp11370, i64 1
+  %tmp11372 = getelementptr inbounds float* %tmp11371, i64 1
+  %tmp11373 = getelementptr inbounds float* %tmp11372, i64 1
+  %tmp11374 = getelementptr inbounds float* %tmp11373, i64 1
+  %tmp11375 = getelementptr inbounds float* %tmp11374, i64 1
+  %tmp11376 = getelementptr inbounds float* %tmp11375, i64 1
+  %tmp11377 = getelementptr inbounds float* %tmp11376, i64 1
+  %tmp11378 = getelementptr inbounds float* %tmp11377, i64 1
+  %tmp11379 = getelementptr inbounds float* %tmp11378, i64 1
+  %tmp11380 = getelementptr inbounds float* %tmp11379, i64 1
+  %tmp11381 = getelementptr inbounds float* %tmp11380, i64 1
+  %tmp11382 = getelementptr inbounds float* %tmp11381, i64 1
+  %tmp11383 = getelementptr inbounds float* %tmp11382, i64 1
+  %tmp11384 = getelementptr inbounds float* %tmp11383, i64 1
+  %tmp11385 = getelementptr inbounds float* %tmp11384, i64 1
+  %tmp11386 = getelementptr inbounds float* %tmp11385, i64 1
+  %tmp11387 = getelementptr inbounds float* %tmp11386, i64 1
+  %tmp11388 = getelementptr inbounds float* %tmp11387, i64 1
+  %tmp11389 = getelementptr inbounds float* %tmp11388, i64 1
+  %tmp11390 = getelementptr inbounds float* %tmp11389, i64 1
+  %tmp11391 = getelementptr inbounds float* %tmp11390, i64 1
+  %tmp11392 = getelementptr inbounds float* %tmp11391, i64 1
+  %tmp11393 = getelementptr inbounds float* %tmp11392, i64 1
+  %tmp11394 = getelementptr inbounds float* %tmp11393, i64 1
+  %tmp11395 = getelementptr inbounds float* %tmp11394, i64 1
+  %tmp11396 = getelementptr inbounds float* %tmp11395, i64 1
+  %tmp11397 = getelementptr inbounds float* %tmp11396, i64 1
+  %tmp11398 = getelementptr inbounds float* %tmp11397, i64 1
+  %tmp11399 = getelementptr inbounds float* %tmp11398, i64 1
+  %tmp11400 = getelementptr inbounds float* %tmp11399, i64 1
+  %tmp11401 = getelementptr inbounds float* %tmp11400, i64 1
+  %tmp11402 = getelementptr inbounds float* %tmp11401, i64 1
+  %tmp11403 = getelementptr inbounds float* %tmp11402, i64 1
+  %tmp11404 = getelementptr inbounds float* %tmp11403, i64 1
+  %tmp11405 = getelementptr inbounds float* %tmp11404, i64 1
+  %tmp11406 = getelementptr inbounds float* %tmp11405, i64 1
+  %tmp11407 = getelementptr inbounds float* %tmp11406, i64 1
+  %tmp11408 = getelementptr inbounds float* %tmp11407, i64 1
+  %tmp11409 = getelementptr inbounds float* %tmp11408, i64 1
+  %tmp11410 = getelementptr inbounds float* %tmp11409, i64 1
+  %tmp11411 = getelementptr inbounds float* %tmp11410, i64 1
+  %tmp11412 = getelementptr inbounds float* %tmp11411, i64 1
+  %tmp11413 = getelementptr inbounds float* %tmp11412, i64 1
+  %tmp11414 = getelementptr inbounds float* %tmp11413, i64 1
+  %tmp11415 = getelementptr inbounds float* %tmp11414, i64 1
+  %tmp11416 = getelementptr inbounds float* %tmp11415, i64 1
+  %tmp11417 = getelementptr inbounds float* %tmp11416, i64 1
+  %tmp11418 = getelementptr inbounds float* %tmp11417, i64 1
+  %tmp11419 = getelementptr inbounds float* %tmp11418, i64 1
+  %tmp11420 = getelementptr inbounds float* %tmp11419, i64 1
+  %tmp11421 = getelementptr inbounds float* %tmp11420, i64 1
+  %tmp11422 = getelementptr inbounds float* %tmp11421, i64 1
+  %tmp11423 = getelementptr inbounds float* %tmp11422, i64 1
+  %tmp11424 = getelementptr inbounds float* %tmp11423, i64 1
+  %tmp11425 = getelementptr inbounds float* %tmp11424, i64 1
+  %tmp11426 = getelementptr inbounds float* %tmp11425, i64 1
+  %tmp11427 = getelementptr inbounds float* %tmp11426, i64 1
+  %tmp11428 = getelementptr inbounds float* %tmp11427, i64 1
+  %tmp11429 = getelementptr inbounds float* %tmp11428, i64 1
+  %tmp11430 = getelementptr inbounds float* %tmp11429, i64 1
+  %tmp11431 = getelementptr inbounds float* %tmp11430, i64 1
+  %tmp11432 = getelementptr inbounds float* %tmp11431, i64 1
+  %tmp11433 = getelementptr inbounds float* %tmp11432, i64 1
+  %tmp11434 = getelementptr inbounds float* %tmp11433, i64 1
+  %tmp11435 = getelementptr inbounds float* %tmp11434, i64 1
+  %tmp11436 = getelementptr inbounds float* %tmp11435, i64 1
+  %tmp11437 = getelementptr inbounds float* %tmp11436, i64 1
+  %tmp11438 = getelementptr inbounds float* %tmp11437, i64 1
+  %tmp11439 = getelementptr inbounds float* %tmp11438, i64 1
+  %tmp11440 = getelementptr inbounds float* %tmp11439, i64 1
+  %tmp11441 = getelementptr inbounds float* %tmp11440, i64 1
+  %tmp11442 = getelementptr inbounds float* %tmp11441, i64 1
+  %tmp11443 = getelementptr inbounds float* %tmp11442, i64 1
+  %tmp11444 = getelementptr inbounds float* %tmp11443, i64 1
+  %tmp11445 = getelementptr inbounds float* %tmp11444, i64 1
+  %tmp11446 = getelementptr inbounds float* %tmp11445, i64 1
+  %tmp11447 = getelementptr inbounds float* %tmp11446, i64 1
+  %tmp11448 = getelementptr inbounds float* %tmp11447, i64 1
+  %tmp11449 = getelementptr inbounds float* %tmp11448, i64 1
+  %tmp11450 = getelementptr inbounds float* %tmp11449, i64 1
+  %tmp11451 = getelementptr inbounds float* %tmp11450, i64 1
+  %tmp11452 = getelementptr inbounds float* %tmp11451, i64 1
+  %tmp11453 = getelementptr inbounds float* %tmp11452, i64 1
+  %tmp11454 = getelementptr inbounds float* %tmp11453, i64 1
+  %tmp11455 = getelementptr inbounds float* %tmp11454, i64 1
+  %tmp11456 = getelementptr inbounds float* %tmp11455, i64 1
+  %tmp11457 = getelementptr inbounds float* %tmp11456, i64 1
+  %tmp11458 = getelementptr inbounds float* %tmp11457, i64 1
+  %tmp11459 = getelementptr inbounds float* %tmp11458, i64 1
+  %tmp11460 = getelementptr inbounds float* %tmp11459, i64 1
+  %tmp11461 = getelementptr inbounds float* %tmp11460, i64 1
+  %tmp11462 = getelementptr inbounds float* %tmp11461, i64 1
+  %tmp11463 = getelementptr inbounds float* %tmp11462, i64 1
+  %tmp11464 = getelementptr inbounds float* %tmp11463, i64 1
+  %tmp11465 = getelementptr inbounds float* %tmp11464, i64 1
+  %tmp11466 = getelementptr inbounds float* %tmp11465, i64 1
+  %tmp11467 = getelementptr inbounds float* %tmp11466, i64 1
+  %tmp11468 = getelementptr inbounds float* %tmp11467, i64 1
+  %tmp11469 = getelementptr inbounds float* %tmp11468, i64 1
+  %tmp11470 = getelementptr inbounds float* %tmp11469, i64 1
+  %tmp11471 = getelementptr inbounds float* %tmp11470, i64 1
+  %tmp11472 = getelementptr inbounds float* %tmp11471, i64 1
+  %tmp11473 = getelementptr inbounds float* %tmp11472, i64 1
+  %tmp11474 = getelementptr inbounds float* %tmp11473, i64 1
+  %tmp11475 = getelementptr inbounds float* %tmp11474, i64 1
+  %tmp11476 = getelementptr inbounds float* %tmp11475, i64 1
+  %tmp11477 = getelementptr inbounds float* %tmp11476, i64 1
+  %tmp11478 = getelementptr inbounds float* %tmp11477, i64 1
+  %tmp11479 = getelementptr inbounds float* %tmp11478, i64 1
+  %tmp11480 = getelementptr inbounds float* %tmp11479, i64 1
+  %tmp11481 = getelementptr inbounds float* %tmp11480, i64 1
+  %tmp11482 = getelementptr inbounds float* %tmp11481, i64 1
+  %tmp11483 = getelementptr inbounds float* %tmp11482, i64 1
+  %tmp11484 = getelementptr inbounds float* %tmp11483, i64 1
+  %tmp11485 = getelementptr inbounds float* %tmp11484, i64 1
+  %tmp11486 = getelementptr inbounds float* %tmp11485, i64 1
+  %tmp11487 = getelementptr inbounds float* %tmp11486, i64 1
+  %tmp11488 = getelementptr inbounds float* %tmp11487, i64 1
+  %tmp11489 = getelementptr inbounds float* %tmp11488, i64 1
+  %tmp11490 = getelementptr inbounds float* %tmp11489, i64 1
+  %tmp11491 = getelementptr inbounds float* %tmp11490, i64 1
+  %tmp11492 = getelementptr inbounds float* %tmp11491, i64 1
+  %tmp11493 = getelementptr inbounds float* %tmp11492, i64 1
+  %tmp11494 = getelementptr inbounds float* %tmp11493, i64 1
+  %tmp11495 = getelementptr inbounds float* %tmp11494, i64 1
+  %tmp11496 = getelementptr inbounds float* %tmp11495, i64 1
+  %tmp11497 = getelementptr inbounds float* %tmp11496, i64 1
+  %tmp11498 = getelementptr inbounds float* %tmp11497, i64 1
+  %tmp11499 = getelementptr inbounds float* %tmp11498, i64 1
+  %tmp11500 = getelementptr inbounds float* %tmp11499, i64 1
+  %tmp11501 = getelementptr inbounds float* %tmp11500, i64 1
+  %tmp11502 = getelementptr inbounds float* %tmp11501, i64 1
+  %tmp11503 = getelementptr inbounds float* %tmp11502, i64 1
+  %tmp11504 = getelementptr inbounds float* %tmp11503, i64 1
+  %tmp11505 = getelementptr inbounds float* %tmp11504, i64 1
+  %tmp11506 = getelementptr inbounds float* %tmp11505, i64 1
+  %tmp11507 = getelementptr inbounds float* %tmp11506, i64 1
+  %tmp11508 = getelementptr inbounds float* %tmp11507, i64 1
+  %tmp11509 = getelementptr inbounds float* %tmp11508, i64 1
+  %tmp11510 = getelementptr inbounds float* %tmp11509, i64 1
+  %tmp11511 = getelementptr inbounds float* %tmp11510, i64 1
+  %tmp11512 = getelementptr inbounds float* %tmp11511, i64 1
+  %tmp11513 = getelementptr inbounds float* %tmp11512, i64 1
+  %tmp11514 = getelementptr inbounds float* %tmp11513, i64 1
+  %tmp11515 = getelementptr inbounds float* %tmp11514, i64 1
+  %tmp11516 = getelementptr inbounds float* %tmp11515, i64 1
+  %tmp11517 = getelementptr inbounds float* %tmp11516, i64 1
+  %tmp11518 = getelementptr inbounds float* %tmp11517, i64 1
+  %tmp11519 = getelementptr inbounds float* %tmp11518, i64 1
+  %tmp11520 = getelementptr inbounds float* %tmp11519, i64 1
+  %tmp11521 = getelementptr inbounds float* %tmp11520, i64 1
+  %tmp11522 = getelementptr inbounds float* %tmp11521, i64 1
+  %tmp11523 = getelementptr inbounds float* %tmp11522, i64 1
+  %tmp11524 = getelementptr inbounds float* %tmp11523, i64 1
+  %tmp11525 = getelementptr inbounds float* %tmp11524, i64 1
+  %tmp11526 = getelementptr inbounds float* %tmp11525, i64 1
+  %tmp11527 = getelementptr inbounds float* %tmp11526, i64 1
+  %tmp11528 = getelementptr inbounds float* %tmp11527, i64 1
+  %tmp11529 = getelementptr inbounds float* %tmp11528, i64 1
+  %tmp11530 = getelementptr inbounds float* %tmp11529, i64 1
+  %tmp11531 = getelementptr inbounds float* %tmp11530, i64 1
+  %tmp11532 = getelementptr inbounds float* %tmp11531, i64 1
+  %tmp11533 = getelementptr inbounds float* %tmp11532, i64 1
+  %tmp11534 = getelementptr inbounds float* %tmp11533, i64 1
+  %tmp11535 = getelementptr inbounds float* %tmp11534, i64 1
+  %tmp11536 = getelementptr inbounds float* %tmp11535, i64 1
+  %tmp11537 = getelementptr inbounds float* %tmp11536, i64 1
+  %tmp11538 = getelementptr inbounds float* %tmp11537, i64 1
+  %tmp11539 = getelementptr inbounds float* %tmp11538, i64 1
+  %tmp11540 = getelementptr inbounds float* %tmp11539, i64 1
+  %tmp11541 = getelementptr inbounds float* %tmp11540, i64 1
+  %tmp11542 = getelementptr inbounds float* %tmp11541, i64 1
+  %tmp11543 = getelementptr inbounds float* %tmp11542, i64 1
+  %tmp11544 = getelementptr inbounds float* %tmp11543, i64 1
+  %tmp11545 = getelementptr inbounds float* %tmp11544, i64 1
+  %tmp11546 = getelementptr inbounds float* %tmp11545, i64 1
+  %tmp11547 = getelementptr inbounds float* %tmp11546, i64 1
+  %tmp11548 = getelementptr inbounds float* %tmp11547, i64 1
+  %tmp11549 = getelementptr inbounds float* %tmp11548, i64 1
+  %tmp11550 = getelementptr inbounds float* %tmp11549, i64 1
+  %tmp11551 = getelementptr inbounds float* %tmp11550, i64 1
+  %tmp11552 = getelementptr inbounds float* %tmp11551, i64 1
+  %tmp11553 = getelementptr inbounds float* %tmp11552, i64 1
+  %tmp11554 = getelementptr inbounds float* %tmp11553, i64 1
+  %tmp11555 = getelementptr inbounds float* %tmp11554, i64 1
+  %tmp11556 = getelementptr inbounds float* %tmp11555, i64 1
+  %tmp11557 = getelementptr inbounds float* %tmp11556, i64 1
+  %tmp11558 = getelementptr inbounds float* %tmp11557, i64 1
+  %tmp11559 = getelementptr inbounds float* %tmp11558, i64 1
+  %tmp11560 = getelementptr inbounds float* %tmp11559, i64 1
+  %tmp11561 = getelementptr inbounds float* %tmp11560, i64 1
+  %tmp11562 = getelementptr inbounds float* %tmp11561, i64 1
+  %tmp11563 = getelementptr inbounds float* %tmp11562, i64 1
+  %tmp11564 = getelementptr inbounds float* %tmp11563, i64 1
+  %tmp11565 = getelementptr inbounds float* %tmp11564, i64 1
+  %tmp11566 = getelementptr inbounds float* %tmp11565, i64 1
+  %tmp11567 = getelementptr inbounds float* %tmp11566, i64 1
+  %tmp11568 = getelementptr inbounds float* %tmp11567, i64 1
+  %tmp11569 = getelementptr inbounds float* %tmp11568, i64 1
+  %tmp11570 = getelementptr inbounds float* %tmp11569, i64 1
+  %tmp11571 = getelementptr inbounds float* %tmp11570, i64 1
+  %tmp11572 = getelementptr inbounds float* %tmp11571, i64 1
+  %tmp11573 = getelementptr inbounds float* %tmp11572, i64 1
+  %tmp11574 = getelementptr inbounds float* %tmp11573, i64 1
+  %tmp11575 = getelementptr inbounds float* %tmp11574, i64 1
+  %tmp11576 = getelementptr inbounds float* %tmp11575, i64 1
+  %tmp11577 = getelementptr inbounds float* %tmp11576, i64 1
+  %tmp11578 = getelementptr inbounds float* %tmp11577, i64 1
+  %tmp11579 = getelementptr inbounds float* %tmp11578, i64 1
+  %tmp11580 = getelementptr inbounds float* %tmp11579, i64 1
+  %tmp11581 = getelementptr inbounds float* %tmp11580, i64 1
+  %tmp11582 = getelementptr inbounds float* %tmp11581, i64 1
+  %tmp11583 = getelementptr inbounds float* %tmp11582, i64 1
+  %tmp11584 = getelementptr inbounds float* %tmp11583, i64 1
+  %tmp11585 = getelementptr inbounds float* %tmp11584, i64 1
+  %tmp11586 = getelementptr inbounds float* %tmp11585, i64 1
+  %tmp11587 = getelementptr inbounds float* %tmp11586, i64 1
+  %tmp11588 = getelementptr inbounds float* %tmp11587, i64 1
+  %tmp11589 = getelementptr inbounds float* %tmp11588, i64 1
+  %tmp11590 = getelementptr inbounds float* %tmp11589, i64 1
+  %tmp11591 = getelementptr inbounds float* %tmp11590, i64 1
+  %tmp11592 = getelementptr inbounds float* %tmp11591, i64 1
+  %tmp11593 = getelementptr inbounds float* %tmp11592, i64 1
+  %tmp11594 = getelementptr inbounds float* %tmp11593, i64 1
+  %tmp11595 = getelementptr inbounds float* %tmp11594, i64 1
+  %tmp11596 = getelementptr inbounds float* %tmp11595, i64 1
+  %tmp11597 = getelementptr inbounds float* %tmp11596, i64 1
+  %tmp11598 = getelementptr inbounds float* %tmp11597, i64 1
+  %tmp11599 = getelementptr inbounds float* %tmp11598, i64 1
+  %tmp11600 = getelementptr inbounds float* %tmp11599, i64 1
+  %tmp11601 = getelementptr inbounds float* %tmp11600, i64 1
+  %tmp11602 = getelementptr inbounds float* %tmp11601, i64 1
+  %tmp11603 = getelementptr inbounds float* %tmp11602, i64 1
+  %tmp11604 = getelementptr inbounds float* %tmp11603, i64 1
+  %tmp11605 = getelementptr inbounds float* %tmp11604, i64 1
+  %tmp11606 = getelementptr inbounds float* %tmp11605, i64 1
+  %tmp11607 = getelementptr inbounds float* %tmp11606, i64 1
+  %tmp11608 = getelementptr inbounds float* %tmp11607, i64 1
+  %tmp11609 = getelementptr inbounds float* %tmp11608, i64 1
+  %tmp11610 = getelementptr inbounds float* %tmp11609, i64 1
+  %tmp11611 = getelementptr inbounds float* %tmp11610, i64 1
+  %tmp11612 = getelementptr inbounds float* %tmp11611, i64 1
+  %tmp11613 = getelementptr inbounds float* %tmp11612, i64 1
+  %tmp11614 = getelementptr inbounds float* %tmp11613, i64 1
+  %tmp11615 = getelementptr inbounds float* %tmp11614, i64 1
+  %tmp11616 = getelementptr inbounds float* %tmp11615, i64 1
+  %tmp11617 = getelementptr inbounds float* %tmp11616, i64 1
+  %tmp11618 = getelementptr inbounds float* %tmp11617, i64 1
+  %tmp11619 = getelementptr inbounds float* %tmp11618, i64 1
+  %tmp11620 = getelementptr inbounds float* %tmp11619, i64 1
+  %tmp11621 = getelementptr inbounds float* %tmp11620, i64 1
+  %tmp11622 = getelementptr inbounds float* %tmp11621, i64 1
+  %tmp11623 = getelementptr inbounds float* %tmp11622, i64 1
+  %tmp11624 = getelementptr inbounds float* %tmp11623, i64 1
+  %tmp11625 = getelementptr inbounds float* %tmp11624, i64 1
+  %tmp11626 = getelementptr inbounds float* %tmp11625, i64 1
+  %tmp11627 = getelementptr inbounds float* %tmp11626, i64 1
+  %tmp11628 = getelementptr inbounds float* %tmp11627, i64 1
+  %tmp11629 = getelementptr inbounds float* %tmp11628, i64 1
+  %tmp11630 = getelementptr inbounds float* %tmp11629, i64 1
+  %tmp11631 = getelementptr inbounds float* %tmp11630, i64 1
+  %tmp11632 = getelementptr inbounds float* %tmp11631, i64 1
+  %tmp11633 = getelementptr inbounds float* %tmp11632, i64 1
+  %tmp11634 = getelementptr inbounds float* %tmp11633, i64 1
+  %tmp11635 = getelementptr inbounds float* %tmp11634, i64 1
+  %tmp11636 = getelementptr inbounds float* %tmp11635, i64 1
+  %tmp11637 = getelementptr inbounds float* %tmp11636, i64 1
+  %tmp11638 = getelementptr inbounds float* %tmp11637, i64 1
+  %tmp11639 = getelementptr inbounds float* %tmp11638, i64 1
+  %tmp11640 = getelementptr inbounds float* %tmp11639, i64 1
+  %tmp11641 = getelementptr inbounds float* %tmp11640, i64 1
+  %tmp11642 = getelementptr inbounds float* %tmp11641, i64 1
+  %tmp11643 = getelementptr inbounds float* %tmp11642, i64 1
+  %tmp11644 = getelementptr inbounds float* %tmp11643, i64 1
+  %tmp11645 = getelementptr inbounds float* %tmp11644, i64 1
+  %tmp11646 = getelementptr inbounds float* %tmp11645, i64 1
+  %tmp11647 = getelementptr inbounds float* %tmp11646, i64 1
+  %tmp11648 = getelementptr inbounds float* %tmp11647, i64 1
+  %tmp11649 = getelementptr inbounds float* %tmp11648, i64 1
+  %tmp11650 = getelementptr inbounds float* %tmp11649, i64 1
+  %tmp11651 = getelementptr inbounds float* %tmp11650, i64 1
+  %tmp11652 = getelementptr inbounds float* %tmp11651, i64 1
+  %tmp11653 = getelementptr inbounds float* %tmp11652, i64 1
+  %tmp11654 = getelementptr inbounds float* %tmp11653, i64 1
+  %tmp11655 = getelementptr inbounds float* %tmp11654, i64 1
+  %tmp11656 = getelementptr inbounds float* %tmp11655, i64 1
+  %tmp11657 = getelementptr inbounds float* %tmp11656, i64 1
+  %tmp11658 = getelementptr inbounds float* %tmp11657, i64 1
+  %tmp11659 = getelementptr inbounds float* %tmp11658, i64 1
+  %tmp11660 = getelementptr inbounds float* %tmp11659, i64 1
+  %tmp11661 = getelementptr inbounds float* %tmp11660, i64 1
+  %tmp11662 = getelementptr inbounds float* %tmp11661, i64 1
+  %tmp11663 = getelementptr inbounds float* %tmp11662, i64 1
+  %tmp11664 = getelementptr inbounds float* %tmp11663, i64 1
+  %tmp11665 = getelementptr inbounds float* %tmp11664, i64 1
+  %tmp11666 = getelementptr inbounds float* %tmp11665, i64 1
+  %tmp11667 = getelementptr inbounds float* %tmp11666, i64 1
+  %tmp11668 = getelementptr inbounds float* %tmp11667, i64 1
+  %tmp11669 = getelementptr inbounds float* %tmp11668, i64 1
+  %tmp11670 = getelementptr inbounds float* %tmp11669, i64 1
+  %tmp11671 = getelementptr inbounds float* %tmp11670, i64 1
+  %tmp11672 = getelementptr inbounds float* %tmp11671, i64 1
+  %tmp11673 = getelementptr inbounds float* %tmp11672, i64 1
+  %tmp11674 = getelementptr inbounds float* %tmp11673, i64 1
+  %tmp11675 = getelementptr inbounds float* %tmp11674, i64 1
+  %tmp11676 = getelementptr inbounds float* %tmp11675, i64 1
+  %tmp11677 = getelementptr inbounds float* %tmp11676, i64 1
+  %tmp11678 = getelementptr inbounds float* %tmp11677, i64 1
+  %tmp11679 = getelementptr inbounds float* %tmp11678, i64 1
+  %tmp11680 = getelementptr inbounds float* %tmp11679, i64 1
+  %tmp11681 = getelementptr inbounds float* %tmp11680, i64 1
+  %tmp11682 = getelementptr inbounds float* %tmp11681, i64 1
+  %tmp11683 = getelementptr inbounds float* %tmp11682, i64 1
+  %tmp11684 = getelementptr inbounds float* %tmp11683, i64 1
+  %tmp11685 = getelementptr inbounds float* %tmp11684, i64 1
+  %tmp11686 = getelementptr inbounds float* %tmp11685, i64 1
+  %tmp11687 = getelementptr inbounds float* %tmp11686, i64 1
+  %tmp11688 = getelementptr inbounds float* %tmp11687, i64 1
+  %tmp11689 = getelementptr inbounds float* %tmp11688, i64 1
+  %tmp11690 = getelementptr inbounds float* %tmp11689, i64 1
+  %tmp11691 = getelementptr inbounds float* %tmp11690, i64 1
+  %tmp11692 = getelementptr inbounds float* %tmp11691, i64 1
+  %tmp11693 = getelementptr inbounds float* %tmp11692, i64 1
+  %tmp11694 = getelementptr inbounds float* %tmp11693, i64 1
+  %tmp11695 = getelementptr inbounds float* %tmp11694, i64 1
+  %tmp11696 = getelementptr inbounds float* %tmp11695, i64 1
+  %tmp11697 = getelementptr inbounds float* %tmp11696, i64 1
+  %tmp11698 = getelementptr inbounds float* %tmp11697, i64 1
+  %tmp11699 = getelementptr inbounds float* %tmp11698, i64 1
+  %tmp11700 = getelementptr inbounds float* %tmp11699, i64 1
+  %tmp11701 = getelementptr inbounds float* %tmp11700, i64 1
+  %tmp11702 = getelementptr inbounds float* %tmp11701, i64 1
+  %tmp11703 = getelementptr inbounds float* %tmp11702, i64 1
+  %tmp11704 = getelementptr inbounds float* %tmp11703, i64 1
+  %tmp11705 = getelementptr inbounds float* %tmp11704, i64 1
+  %tmp11706 = getelementptr inbounds float* %tmp11705, i64 1
+  %tmp11707 = getelementptr inbounds float* %tmp11706, i64 1
+  %tmp11708 = getelementptr inbounds float* %tmp11707, i64 1
+  %tmp11709 = getelementptr inbounds float* %tmp11708, i64 1
+  %tmp11710 = getelementptr inbounds float* %tmp11709, i64 1
+  %tmp11711 = getelementptr inbounds float* %tmp11710, i64 1
+  %tmp11712 = getelementptr inbounds float* %tmp11711, i64 1
+  %tmp11713 = getelementptr inbounds float* %tmp11712, i64 1
+  %tmp11714 = getelementptr inbounds float* %tmp11713, i64 1
+  %tmp11715 = getelementptr inbounds float* %tmp11714, i64 1
+  %tmp11716 = getelementptr inbounds float* %tmp11715, i64 1
+  %tmp11717 = getelementptr inbounds float* %tmp11716, i64 1
+  %tmp11718 = getelementptr inbounds float* %tmp11717, i64 1
+  %tmp11719 = getelementptr inbounds float* %tmp11718, i64 1
+  %tmp11720 = getelementptr inbounds float* %tmp11719, i64 1
+  %tmp11721 = getelementptr inbounds float* %tmp11720, i64 1
+  %tmp11722 = getelementptr inbounds float* %tmp11721, i64 1
+  %tmp11723 = getelementptr inbounds float* %tmp11722, i64 1
+  %tmp11724 = getelementptr inbounds float* %tmp11723, i64 1
+  %tmp11725 = getelementptr inbounds float* %tmp11724, i64 1
+  %tmp11726 = getelementptr inbounds float* %tmp11725, i64 1
+  %tmp11727 = getelementptr inbounds float* %tmp11726, i64 1
+  %tmp11728 = getelementptr inbounds float* %tmp11727, i64 1
+  %tmp11729 = getelementptr inbounds float* %tmp11728, i64 1
+  %tmp11730 = getelementptr inbounds float* %tmp11729, i64 1
+  %tmp11731 = getelementptr inbounds float* %tmp11730, i64 1
+  %tmp11732 = getelementptr inbounds float* %tmp11731, i64 1
+  %tmp11733 = getelementptr inbounds float* %tmp11732, i64 1
+  %tmp11734 = getelementptr inbounds float* %tmp11733, i64 1
+  %tmp11735 = getelementptr inbounds float* %tmp11734, i64 1
+  %tmp11736 = getelementptr inbounds float* %tmp11735, i64 1
+  %tmp11737 = getelementptr inbounds float* %tmp11736, i64 1
+  %tmp11738 = getelementptr inbounds float* %tmp11737, i64 1
+  %tmp11739 = getelementptr inbounds float* %tmp11738, i64 1
+  %tmp11740 = getelementptr inbounds float* %tmp11739, i64 1
+  %tmp11741 = getelementptr inbounds float* %tmp11740, i64 1
+  %tmp11742 = getelementptr inbounds float* %tmp11741, i64 1
+  %tmp11743 = getelementptr inbounds float* %tmp11742, i64 1
+  %tmp11744 = getelementptr inbounds float* %tmp11743, i64 1
+  %tmp11745 = getelementptr inbounds float* %tmp11744, i64 1
+  %tmp11746 = getelementptr inbounds float* %tmp11745, i64 1
+  %tmp11747 = getelementptr inbounds float* %tmp11746, i64 1
+  %tmp11748 = getelementptr inbounds float* %tmp11747, i64 1
+  %tmp11749 = getelementptr inbounds float* %tmp11748, i64 1
+  %tmp11750 = getelementptr inbounds float* %tmp11749, i64 1
+  %tmp11751 = getelementptr inbounds float* %tmp11750, i64 1
+  %tmp11752 = getelementptr inbounds float* %tmp11751, i64 1
+  %tmp11753 = getelementptr inbounds float* %tmp11752, i64 1
+  %tmp11754 = getelementptr inbounds float* %tmp11753, i64 1
+  %tmp11755 = getelementptr inbounds float* %tmp11754, i64 1
+  %tmp11756 = getelementptr inbounds float* %tmp11755, i64 1
+  %tmp11757 = getelementptr inbounds float* %tmp11756, i64 1
+  %tmp11758 = getelementptr inbounds float* %tmp11757, i64 1
+  %tmp11759 = getelementptr inbounds float* %tmp11758, i64 1
+  %tmp11760 = getelementptr inbounds float* %tmp11759, i64 1
+  %tmp11761 = getelementptr inbounds float* %tmp11760, i64 1
+  %tmp11762 = getelementptr inbounds float* %tmp11761, i64 1
+  %tmp11763 = getelementptr inbounds float* %tmp11762, i64 1
+  %tmp11764 = getelementptr inbounds float* %tmp11763, i64 1
+  %tmp11765 = getelementptr inbounds float* %tmp11764, i64 1
+  %tmp11766 = getelementptr inbounds float* %tmp11765, i64 1
+  %tmp11767 = getelementptr inbounds float* %tmp11766, i64 1
+  %tmp11768 = getelementptr inbounds float* %tmp11767, i64 1
+  %tmp11769 = getelementptr inbounds float* %tmp11768, i64 1
+  %tmp11770 = getelementptr inbounds float* %tmp11769, i64 1
+  %tmp11771 = getelementptr inbounds float* %tmp11770, i64 1
+  %tmp11772 = getelementptr inbounds float* %tmp11771, i64 1
+  %tmp11773 = getelementptr inbounds float* %tmp11772, i64 1
+  %tmp11774 = getelementptr inbounds float* %tmp11773, i64 1
+  %tmp11775 = getelementptr inbounds float* %tmp11774, i64 1
+  %tmp11776 = getelementptr inbounds float* %tmp11775, i64 1
+  %tmp11777 = getelementptr inbounds float* %tmp11776, i64 1
+  %tmp11778 = getelementptr inbounds float* %tmp11777, i64 1
+  %tmp11779 = getelementptr inbounds float* %tmp11778, i64 1
+  %tmp11780 = getelementptr inbounds float* %tmp11779, i64 1
+  %tmp11781 = getelementptr inbounds float* %tmp11780, i64 1
+  %tmp11782 = getelementptr inbounds float* %tmp11781, i64 1
+  %tmp11783 = getelementptr inbounds float* %tmp11782, i64 1
+  %tmp11784 = getelementptr inbounds float* %tmp11783, i64 1
+  %tmp11785 = getelementptr inbounds float* %tmp11784, i64 1
+  %tmp11786 = getelementptr inbounds float* %tmp11785, i64 1
+  %tmp11787 = getelementptr inbounds float* %tmp11786, i64 1
+  %tmp11788 = getelementptr inbounds float* %tmp11787, i64 1
+  %tmp11789 = getelementptr inbounds float* %tmp11788, i64 1
+  %tmp11790 = getelementptr inbounds float* %tmp11789, i64 1
+  %tmp11791 = getelementptr inbounds float* %tmp11790, i64 1
+  %tmp11792 = getelementptr inbounds float* %tmp11791, i64 1
+  %tmp11793 = getelementptr inbounds float* %tmp11792, i64 1
+  %tmp11794 = getelementptr inbounds float* %tmp11793, i64 1
+  %tmp11795 = getelementptr inbounds float* %tmp11794, i64 1
+  %tmp11796 = getelementptr inbounds float* %tmp11795, i64 1
+  %tmp11797 = getelementptr inbounds float* %tmp11796, i64 1
+  %tmp11798 = getelementptr inbounds float* %tmp11797, i64 1
+  %tmp11799 = getelementptr inbounds float* %tmp11798, i64 1
+  %tmp11800 = getelementptr inbounds float* %tmp11799, i64 1
+  %tmp11801 = getelementptr inbounds float* %tmp11800, i64 1
+  %tmp11802 = getelementptr inbounds float* %tmp11801, i64 1
+  %tmp11803 = getelementptr inbounds float* %tmp11802, i64 1
+  %tmp11804 = getelementptr inbounds float* %tmp11803, i64 1
+  %tmp11805 = getelementptr inbounds float* %tmp11804, i64 1
+  %tmp11806 = getelementptr inbounds float* %tmp11805, i64 1
+  %tmp11807 = getelementptr inbounds float* %tmp11806, i64 1
+  %tmp11808 = getelementptr inbounds float* %tmp11807, i64 1
+  %tmp11809 = getelementptr inbounds float* %tmp11808, i64 1
+  %tmp11810 = getelementptr inbounds float* %tmp11809, i64 1
+  %tmp11811 = getelementptr inbounds float* %tmp11810, i64 1
+  %tmp11812 = getelementptr inbounds float* %tmp11811, i64 1
+  %tmp11813 = getelementptr inbounds float* %tmp11812, i64 1
+  %tmp11814 = getelementptr inbounds float* %tmp11813, i64 1
+  %tmp11815 = getelementptr inbounds float* %tmp11814, i64 1
+  %tmp11816 = getelementptr inbounds float* %tmp11815, i64 1
+  %tmp11817 = getelementptr inbounds float* %tmp11816, i64 1
+  %tmp11818 = getelementptr inbounds float* %tmp11817, i64 1
+  %tmp11819 = getelementptr inbounds float* %tmp11818, i64 1
+  %tmp11820 = getelementptr inbounds float* %tmp11819, i64 1
+  %tmp11821 = getelementptr inbounds float* %tmp11820, i64 1
+  %tmp11822 = getelementptr inbounds float* %tmp11821, i64 1
+  %tmp11823 = getelementptr inbounds float* %tmp11822, i64 1
+  %tmp11824 = getelementptr inbounds float* %tmp11823, i64 1
+  %tmp11825 = getelementptr inbounds float* %tmp11824, i64 1
+  %tmp11826 = getelementptr inbounds float* %tmp11825, i64 1
+  %tmp11827 = getelementptr inbounds float* %tmp11826, i64 1
+  %tmp11828 = getelementptr inbounds float* %tmp11827, i64 1
+  %tmp11829 = getelementptr inbounds float* %tmp11828, i64 1
+  %tmp11830 = getelementptr inbounds float* %tmp11829, i64 1
+  %tmp11831 = getelementptr inbounds float* %tmp11830, i64 1
+  %tmp11832 = getelementptr inbounds float* %tmp11831, i64 1
+  %tmp11833 = getelementptr inbounds float* %tmp11832, i64 1
+  %tmp11834 = getelementptr inbounds float* %tmp11833, i64 1
+  %tmp11835 = getelementptr inbounds float* %tmp11834, i64 1
+  %tmp11836 = getelementptr inbounds float* %tmp11835, i64 1
+  %tmp11837 = getelementptr inbounds float* %tmp11836, i64 1
+  %tmp11838 = getelementptr inbounds float* %tmp11837, i64 1
+  %tmp11839 = getelementptr inbounds float* %tmp11838, i64 1
+  %tmp11840 = getelementptr inbounds float* %tmp11839, i64 1
+  %tmp11841 = getelementptr inbounds float* %tmp11840, i64 1
+  %tmp11842 = getelementptr inbounds float* %tmp11841, i64 1
+  %tmp11843 = getelementptr inbounds float* %tmp11842, i64 1
+  %tmp11844 = getelementptr inbounds float* %tmp11843, i64 1
+  %tmp11845 = getelementptr inbounds float* %tmp11844, i64 1
+  %tmp11846 = getelementptr inbounds float* %tmp11845, i64 1
+  %tmp11847 = getelementptr inbounds float* %tmp11846, i64 1
+  %tmp11848 = getelementptr inbounds float* %tmp11847, i64 1
+  %tmp11849 = getelementptr inbounds float* %tmp11848, i64 1
+  %tmp11850 = getelementptr inbounds float* %tmp11849, i64 1
+  %tmp11851 = getelementptr inbounds float* %tmp11850, i64 1
+  %tmp11852 = getelementptr inbounds float* %tmp11851, i64 1
+  %tmp11853 = getelementptr inbounds float* %tmp11852, i64 1
+  %tmp11854 = getelementptr inbounds float* %tmp11853, i64 1
+  %tmp11855 = getelementptr inbounds float* %tmp11854, i64 1
+  %tmp11856 = getelementptr inbounds float* %tmp11855, i64 1
+  %tmp11857 = getelementptr inbounds float* %tmp11856, i64 1
+  %tmp11858 = getelementptr inbounds float* %tmp11857, i64 1
+  %tmp11859 = getelementptr inbounds float* %tmp11858, i64 1
+  %tmp11860 = getelementptr inbounds float* %tmp11859, i64 1
+  %tmp11861 = getelementptr inbounds float* %tmp11860, i64 1
+  %tmp11862 = getelementptr inbounds float* %tmp11861, i64 1
+  %tmp11863 = getelementptr inbounds float* %tmp11862, i64 1
+  %tmp11864 = getelementptr inbounds float* %tmp11863, i64 1
+  %tmp11865 = getelementptr inbounds float* %tmp11864, i64 1
+  %tmp11866 = getelementptr inbounds float* %tmp11865, i64 1
+  %tmp11867 = getelementptr inbounds float* %tmp11866, i64 1
+  %tmp11868 = getelementptr inbounds float* %tmp11867, i64 1
+  %tmp11869 = getelementptr inbounds float* %tmp11868, i64 1
+  %tmp11870 = getelementptr inbounds float* %tmp11869, i64 1
+  %tmp11871 = getelementptr inbounds float* %tmp11870, i64 1
+  %tmp11872 = getelementptr inbounds float* %tmp11871, i64 1
+  %tmp11873 = getelementptr inbounds float* %tmp11872, i64 1
+  %tmp11874 = getelementptr inbounds float* %tmp11873, i64 1
+  %tmp11875 = getelementptr inbounds float* %tmp11874, i64 1
+  %tmp11876 = getelementptr inbounds float* %tmp11875, i64 1
+  %tmp11877 = getelementptr inbounds float* %tmp11876, i64 1
+  %tmp11878 = getelementptr inbounds float* %tmp11877, i64 1
+  %tmp11879 = getelementptr inbounds float* %tmp11878, i64 1
+  %tmp11880 = getelementptr inbounds float* %tmp11879, i64 1
+  %tmp11881 = getelementptr inbounds float* %tmp11880, i64 1
+  %tmp11882 = getelementptr inbounds float* %tmp11881, i64 1
+  %tmp11883 = getelementptr inbounds float* %tmp11882, i64 1
+  %tmp11884 = getelementptr inbounds float* %tmp11883, i64 1
+  %tmp11885 = getelementptr inbounds float* %tmp11884, i64 1
+  %tmp11886 = getelementptr inbounds float* %tmp11885, i64 1
+  %tmp11887 = getelementptr inbounds float* %tmp11886, i64 1
+  %tmp11888 = getelementptr inbounds float* %tmp11887, i64 1
+  %tmp11889 = getelementptr inbounds float* %tmp11888, i64 1
+  %tmp11890 = getelementptr inbounds float* %tmp11889, i64 1
+  %tmp11891 = getelementptr inbounds float* %tmp11890, i64 1
+  %tmp11892 = getelementptr inbounds float* %tmp11891, i64 1
+  %tmp11893 = getelementptr inbounds float* %tmp11892, i64 1
+  %tmp11894 = getelementptr inbounds float* %tmp11893, i64 1
+  %tmp11895 = getelementptr inbounds float* %tmp11894, i64 1
+  %tmp11896 = getelementptr inbounds float* %tmp11895, i64 1
+  %tmp11897 = getelementptr inbounds float* %tmp11896, i64 1
+  %tmp11898 = getelementptr inbounds float* %tmp11897, i64 1
+  %tmp11899 = getelementptr inbounds float* %tmp11898, i64 1
+  %tmp11900 = getelementptr inbounds float* %tmp11899, i64 1
+  %tmp11901 = getelementptr inbounds float* %tmp11900, i64 1
+  %tmp11902 = getelementptr inbounds float* %tmp11901, i64 1
+  %tmp11903 = getelementptr inbounds float* %tmp11902, i64 1
+  %tmp11904 = getelementptr inbounds float* %tmp11903, i64 1
+  %tmp11905 = getelementptr inbounds float* %tmp11904, i64 1
+  %tmp11906 = getelementptr inbounds float* %tmp11905, i64 1
+  %tmp11907 = getelementptr inbounds float* %tmp11906, i64 1
+  %tmp11908 = getelementptr inbounds float* %tmp11907, i64 1
+  %tmp11909 = getelementptr inbounds float* %tmp11908, i64 1
+  %tmp11910 = getelementptr inbounds float* %tmp11909, i64 1
+  %tmp11911 = getelementptr inbounds float* %tmp11910, i64 1
+  %tmp11912 = getelementptr inbounds float* %tmp11911, i64 1
+  %tmp11913 = getelementptr inbounds float* %tmp11912, i64 1
+  %tmp11914 = getelementptr inbounds float* %tmp11913, i64 1
+  %tmp11915 = getelementptr inbounds float* %tmp11914, i64 1
+  %tmp11916 = getelementptr inbounds float* %tmp11915, i64 1
+  %tmp11917 = getelementptr inbounds float* %tmp11916, i64 1
+  %tmp11918 = getelementptr inbounds float* %tmp11917, i64 1
+  %tmp11919 = getelementptr inbounds float* %tmp11918, i64 1
+  %tmp11920 = getelementptr inbounds float* %tmp11919, i64 1
+  %tmp11921 = getelementptr inbounds float* %tmp11920, i64 1
+  %tmp11922 = getelementptr inbounds float* %tmp11921, i64 1
+  %tmp11923 = getelementptr inbounds float* %tmp11922, i64 1
+  %tmp11924 = getelementptr inbounds float* %tmp11923, i64 1
+  %tmp11925 = getelementptr inbounds float* %tmp11924, i64 1
+  %tmp11926 = getelementptr inbounds float* %tmp11925, i64 1
+  %tmp11927 = getelementptr inbounds float* %tmp11926, i64 1
+  %tmp11928 = getelementptr inbounds float* %tmp11927, i64 1
+  %tmp11929 = getelementptr inbounds float* %tmp11928, i64 1
+  %tmp11930 = getelementptr inbounds float* %tmp11929, i64 1
+  %tmp11931 = getelementptr inbounds float* %tmp11930, i64 1
+  %tmp11932 = getelementptr inbounds float* %tmp11931, i64 1
+  %tmp11933 = getelementptr inbounds float* %tmp11932, i64 1
+  %tmp11934 = getelementptr inbounds float* %tmp11933, i64 1
+  %tmp11935 = getelementptr inbounds float* %tmp11934, i64 1
+  %tmp11936 = getelementptr inbounds float* %tmp11935, i64 1
+  %tmp11937 = getelementptr inbounds float* %tmp11936, i64 1
+  %tmp11938 = getelementptr inbounds float* %tmp11937, i64 1
+  %tmp11939 = getelementptr inbounds float* %tmp11938, i64 1
+  %tmp11940 = getelementptr inbounds float* %tmp11939, i64 1
+  %tmp11941 = getelementptr inbounds float* %tmp11940, i64 1
+  %tmp11942 = getelementptr inbounds float* %tmp11941, i64 1
+  %tmp11943 = getelementptr inbounds float* %tmp11942, i64 1
+  %tmp11944 = getelementptr inbounds float* %tmp11943, i64 1
+  %tmp11945 = getelementptr inbounds float* %tmp11944, i64 1
+  %tmp11946 = getelementptr inbounds float* %tmp11945, i64 1
+  %tmp11947 = getelementptr inbounds float* %tmp11946, i64 1
+  %tmp11948 = getelementptr inbounds float* %tmp11947, i64 1
+  %tmp11949 = getelementptr inbounds float* %tmp11948, i64 1
+  %tmp11950 = getelementptr inbounds float* %tmp11949, i64 1
+  %tmp11951 = getelementptr inbounds float* %tmp11950, i64 1
+  %tmp11952 = getelementptr inbounds float* %tmp11951, i64 1
+  %tmp11953 = getelementptr inbounds float* %tmp11952, i64 1
+  %tmp11954 = getelementptr inbounds float* %tmp11953, i64 1
+  %tmp11955 = getelementptr inbounds float* %tmp11954, i64 1
+  %tmp11956 = getelementptr inbounds float* %tmp11955, i64 1
+  %tmp11957 = getelementptr inbounds float* %tmp11956, i64 1
+  %tmp11958 = getelementptr inbounds float* %tmp11957, i64 1
+  %tmp11959 = getelementptr inbounds float* %tmp11958, i64 1
+  %tmp11960 = getelementptr inbounds float* %tmp11959, i64 1
+  %tmp11961 = getelementptr inbounds float* %tmp11960, i64 1
+  %tmp11962 = getelementptr inbounds float* %tmp11961, i64 1
+  %tmp11963 = getelementptr inbounds float* %tmp11962, i64 1
+  %tmp11964 = getelementptr inbounds float* %tmp11963, i64 1
+  %tmp11965 = getelementptr inbounds float* %tmp11964, i64 1
+  %tmp11966 = getelementptr inbounds float* %tmp11965, i64 1
+  %tmp11967 = getelementptr inbounds float* %tmp11966, i64 1
+  %tmp11968 = getelementptr inbounds float* %tmp11967, i64 1
+  %tmp11969 = getelementptr inbounds float* %tmp11968, i64 1
+  %tmp11970 = getelementptr inbounds float* %tmp11969, i64 1
+  %tmp11971 = getelementptr inbounds float* %tmp11970, i64 1
+  %tmp11972 = getelementptr inbounds float* %tmp11971, i64 1
+  %tmp11973 = getelementptr inbounds float* %tmp11972, i64 1
+  %tmp11974 = getelementptr inbounds float* %tmp11973, i64 1
+  %tmp11975 = getelementptr inbounds float* %tmp11974, i64 1
+  %tmp11976 = getelementptr inbounds float* %tmp11975, i64 1
+  %tmp11977 = getelementptr inbounds float* %tmp11976, i64 1
+  %tmp11978 = getelementptr inbounds float* %tmp11977, i64 1
+  %tmp11979 = getelementptr inbounds float* %tmp11978, i64 1
+  %tmp11980 = getelementptr inbounds float* %tmp11979, i64 1
+  %tmp11981 = getelementptr inbounds float* %tmp11980, i64 1
+  %tmp11982 = getelementptr inbounds float* %tmp11981, i64 1
+  %tmp11983 = getelementptr inbounds float* %tmp11982, i64 1
+  %tmp11984 = getelementptr inbounds float* %tmp11983, i64 1
+  %tmp11985 = getelementptr inbounds float* %tmp11984, i64 1
+  %tmp11986 = getelementptr inbounds float* %tmp11985, i64 1
+  %tmp11987 = getelementptr inbounds float* %tmp11986, i64 1
+  %tmp11988 = getelementptr inbounds float* %tmp11987, i64 1
+  %tmp11989 = getelementptr inbounds float* %tmp11988, i64 1
+  %tmp11990 = getelementptr inbounds float* %tmp11989, i64 1
+  %tmp11991 = getelementptr inbounds float* %tmp11990, i64 1
+  %tmp11992 = getelementptr inbounds float* %tmp11991, i64 1
+  %tmp11993 = getelementptr inbounds float* %tmp11992, i64 1
+  %tmp11994 = getelementptr inbounds float* %tmp11993, i64 1
+  %tmp11995 = getelementptr inbounds float* %tmp11994, i64 1
+  %tmp11996 = getelementptr inbounds float* %tmp11995, i64 1
+  %tmp11997 = getelementptr inbounds float* %tmp11996, i64 1
+  %tmp11998 = getelementptr inbounds float* %tmp11997, i64 1
+  %tmp11999 = getelementptr inbounds float* %tmp11998, i64 1
+  %tmp12000 = getelementptr inbounds float* %tmp11999, i64 1
+  %tmp12001 = getelementptr inbounds float* %tmp12000, i64 1
+  %tmp12002 = getelementptr inbounds float* %tmp12001, i64 1
+  %tmp12003 = getelementptr inbounds float* %tmp12002, i64 1
+  %tmp12004 = getelementptr inbounds float* %tmp12003, i64 1
+  %tmp12005 = getelementptr inbounds float* %tmp12004, i64 1
+  %tmp12006 = getelementptr inbounds float* %tmp12005, i64 1
+  %tmp12007 = getelementptr inbounds float* %tmp12006, i64 1
+  %tmp12008 = getelementptr inbounds float* %tmp12007, i64 1
+  %tmp12009 = getelementptr inbounds float* %tmp12008, i64 1
+  %tmp12010 = getelementptr inbounds float* %tmp12009, i64 1
+  %tmp12011 = getelementptr inbounds float* %tmp12010, i64 1
+  %tmp12012 = getelementptr inbounds float* %tmp12011, i64 1
+  %tmp12013 = getelementptr inbounds float* %tmp12012, i64 1
+  %tmp12014 = getelementptr inbounds float* %tmp12013, i64 1
+  %tmp12015 = getelementptr inbounds float* %tmp12014, i64 1
+  %tmp12016 = getelementptr inbounds float* %tmp12015, i64 1
+  %tmp12017 = getelementptr inbounds float* %tmp12016, i64 1
+  %tmp12018 = getelementptr inbounds float* %tmp12017, i64 1
+  %tmp12019 = getelementptr inbounds float* %tmp12018, i64 1
+  %tmp12020 = getelementptr inbounds float* %tmp12019, i64 1
+  %tmp12021 = getelementptr inbounds float* %tmp12020, i64 1
+  %tmp12022 = getelementptr inbounds float* %tmp12021, i64 1
+  %tmp12023 = getelementptr inbounds float* %tmp12022, i64 1
+  %tmp12024 = getelementptr inbounds float* %tmp12023, i64 1
+  %tmp12025 = getelementptr inbounds float* %tmp12024, i64 1
+  %tmp12026 = getelementptr inbounds float* %tmp12025, i64 1
+  %tmp12027 = getelementptr inbounds float* %tmp12026, i64 1
+  %tmp12028 = getelementptr inbounds float* %tmp12027, i64 1
+  %tmp12029 = getelementptr inbounds float* %tmp12028, i64 1
+  %tmp12030 = getelementptr inbounds float* %tmp12029, i64 1
+  %tmp12031 = getelementptr inbounds float* %tmp12030, i64 1
+  %tmp12032 = getelementptr inbounds float* %tmp12031, i64 1
+  %tmp12033 = getelementptr inbounds float* %tmp12032, i64 1
+  %tmp12034 = getelementptr inbounds float* %tmp12033, i64 1
+  %tmp12035 = getelementptr inbounds float* %tmp12034, i64 1
+  %tmp12036 = getelementptr inbounds float* %tmp12035, i64 1
+  %tmp12037 = getelementptr inbounds float* %tmp12036, i64 1
+  %tmp12038 = getelementptr inbounds float* %tmp12037, i64 1
+  %tmp12039 = getelementptr inbounds float* %tmp12038, i64 1
+  %tmp12040 = getelementptr inbounds float* %tmp12039, i64 1
+  %tmp12041 = getelementptr inbounds float* %tmp12040, i64 1
+  %tmp12042 = getelementptr inbounds float* %tmp12041, i64 1
+  %tmp12043 = getelementptr inbounds float* %tmp12042, i64 1
+  %tmp12044 = getelementptr inbounds float* %tmp12043, i64 1
+  %tmp12045 = getelementptr inbounds float* %tmp12044, i64 1
+  %tmp12046 = getelementptr inbounds float* %tmp12045, i64 1
+  %tmp12047 = getelementptr inbounds float* %tmp12046, i64 1
+  %tmp12048 = getelementptr inbounds float* %tmp12047, i64 1
+  %tmp12049 = getelementptr inbounds float* %tmp12048, i64 1
+  %tmp12050 = getelementptr inbounds float* %tmp12049, i64 1
+  %tmp12051 = getelementptr inbounds float* %tmp12050, i64 1
+  %tmp12052 = getelementptr inbounds float* %tmp12051, i64 1
+  %tmp12053 = getelementptr inbounds float* %tmp12052, i64 1
+  %tmp12054 = getelementptr inbounds float* %tmp12053, i64 1
+  %tmp12055 = getelementptr inbounds float* %tmp12054, i64 1
+  %tmp12056 = getelementptr inbounds float* %tmp12055, i64 1
+  %tmp12057 = getelementptr inbounds float* %tmp12056, i64 1
+  %tmp12058 = getelementptr inbounds float* %tmp12057, i64 1
+  %tmp12059 = getelementptr inbounds float* %tmp12058, i64 1
+  %tmp12060 = getelementptr inbounds float* %tmp12059, i64 1
+  %tmp12061 = getelementptr inbounds float* %tmp12060, i64 1
+  %tmp12062 = getelementptr inbounds float* %tmp12061, i64 1
+  %tmp12063 = getelementptr inbounds float* %tmp12062, i64 1
+  %tmp12064 = getelementptr inbounds float* %tmp12063, i64 1
+  %tmp12065 = getelementptr inbounds float* %tmp12064, i64 1
+  %tmp12066 = getelementptr inbounds float* %tmp12065, i64 1
+  %tmp12067 = getelementptr inbounds float* %tmp12066, i64 1
+  %tmp12068 = getelementptr inbounds float* %tmp12067, i64 1
+  %tmp12069 = getelementptr inbounds float* %tmp12068, i64 1
+  %tmp12070 = getelementptr inbounds float* %tmp12069, i64 1
+  %tmp12071 = getelementptr inbounds float* %tmp12070, i64 1
+  %tmp12072 = getelementptr inbounds float* %tmp12071, i64 1
+  %tmp12073 = getelementptr inbounds float* %tmp12072, i64 1
+  %tmp12074 = getelementptr inbounds float* %tmp12073, i64 1
+  %tmp12075 = getelementptr inbounds float* %tmp12074, i64 1
+  %tmp12076 = getelementptr inbounds float* %tmp12075, i64 1
+  %tmp12077 = getelementptr inbounds float* %tmp12076, i64 1
+  %tmp12078 = getelementptr inbounds float* %tmp12077, i64 1
+  %tmp12079 = getelementptr inbounds float* %tmp12078, i64 1
+  %tmp12080 = getelementptr inbounds float* %tmp12079, i64 1
+  %tmp12081 = getelementptr inbounds float* %tmp12080, i64 1
+  %tmp12082 = getelementptr inbounds float* %tmp12081, i64 1
+  %tmp12083 = getelementptr inbounds float* %tmp12082, i64 1
+  %tmp12084 = getelementptr inbounds float* %tmp12083, i64 1
+  %tmp12085 = getelementptr inbounds float* %tmp12084, i64 1
+  %tmp12086 = getelementptr inbounds float* %tmp12085, i64 1
+  %tmp12087 = getelementptr inbounds float* %tmp12086, i64 1
+  %tmp12088 = getelementptr inbounds float* %tmp12087, i64 1
+  %tmp12089 = getelementptr inbounds float* %tmp12088, i64 1
+  %tmp12090 = getelementptr inbounds float* %tmp12089, i64 1
+  %tmp12091 = getelementptr inbounds float* %tmp12090, i64 1
+  %tmp12092 = getelementptr inbounds float* %tmp12091, i64 1
+  %tmp12093 = getelementptr inbounds float* %tmp12092, i64 1
+  %tmp12094 = getelementptr inbounds float* %tmp12093, i64 1
+  %tmp12095 = getelementptr inbounds float* %tmp12094, i64 1
+  %tmp12096 = getelementptr inbounds float* %tmp12095, i64 1
+  %tmp12097 = getelementptr inbounds float* %tmp12096, i64 1
+  %tmp12098 = getelementptr inbounds float* %tmp12097, i64 1
+  %tmp12099 = getelementptr inbounds float* %tmp12098, i64 1
+  %tmp12100 = getelementptr inbounds float* %tmp12099, i64 1
+  %tmp12101 = getelementptr inbounds float* %tmp12100, i64 1
+  %tmp12102 = getelementptr inbounds float* %tmp12101, i64 1
+  %tmp12103 = getelementptr inbounds float* %tmp12102, i64 1
+  %tmp12104 = getelementptr inbounds float* %tmp12103, i64 1
+  %tmp12105 = getelementptr inbounds float* %tmp12104, i64 1
+  %tmp12106 = getelementptr inbounds float* %tmp12105, i64 1
+  %tmp12107 = getelementptr inbounds float* %tmp12106, i64 1
+  %tmp12108 = getelementptr inbounds float* %tmp12107, i64 1
+  %tmp12109 = getelementptr inbounds float* %tmp12108, i64 1
+  %tmp12110 = getelementptr inbounds float* %tmp12109, i64 1
+  %tmp12111 = getelementptr inbounds float* %tmp12110, i64 1
+  %tmp12112 = getelementptr inbounds float* %tmp12111, i64 1
+  %tmp12113 = getelementptr inbounds float* %tmp12112, i64 1
+  %tmp12114 = getelementptr inbounds float* %tmp12113, i64 1
+  %tmp12115 = getelementptr inbounds float* %tmp12114, i64 1
+  %tmp12116 = getelementptr inbounds float* %tmp12115, i64 1
+  %tmp12117 = getelementptr inbounds float* %tmp12116, i64 1
+  %tmp12118 = getelementptr inbounds float* %tmp12117, i64 1
+  %tmp12119 = getelementptr inbounds float* %tmp12118, i64 1
+  %tmp12120 = getelementptr inbounds float* %tmp12119, i64 1
+  %tmp12121 = getelementptr inbounds float* %tmp12120, i64 1
+  %tmp12122 = getelementptr inbounds float* %tmp12121, i64 1
+  %tmp12123 = getelementptr inbounds float* %tmp12122, i64 1
+  %tmp12124 = getelementptr inbounds float* %tmp12123, i64 1
+  %tmp12125 = getelementptr inbounds float* %tmp12124, i64 1
+  %tmp12126 = getelementptr inbounds float* %tmp12125, i64 1
+  %tmp12127 = getelementptr inbounds float* %tmp12126, i64 1
+  %tmp12128 = getelementptr inbounds float* %tmp12127, i64 1
+  %tmp12129 = getelementptr inbounds float* %tmp12128, i64 1
+  %tmp12130 = getelementptr inbounds float* %tmp12129, i64 1
+  %tmp12131 = getelementptr inbounds float* %tmp12130, i64 1
+  %tmp12132 = getelementptr inbounds float* %tmp12131, i64 1
+  %tmp12133 = getelementptr inbounds float* %tmp12132, i64 1
+  %tmp12134 = getelementptr inbounds float* %tmp12133, i64 1
+  %tmp12135 = getelementptr inbounds float* %tmp12134, i64 1
+  %tmp12136 = getelementptr inbounds float* %tmp12135, i64 1
+  %tmp12137 = getelementptr inbounds float* %tmp12136, i64 1
+  %tmp12138 = getelementptr inbounds float* %tmp12137, i64 1
+  %tmp12139 = getelementptr inbounds float* %tmp12138, i64 1
+  %tmp12140 = getelementptr inbounds float* %tmp12139, i64 1
+  %tmp12141 = getelementptr inbounds float* %tmp12140, i64 1
+  %tmp12142 = getelementptr inbounds float* %tmp12141, i64 1
+  %tmp12143 = getelementptr inbounds float* %tmp12142, i64 1
+  %tmp12144 = getelementptr inbounds float* %tmp12143, i64 1
+  %tmp12145 = getelementptr inbounds float* %tmp12144, i64 1
+  %tmp12146 = getelementptr inbounds float* %tmp12145, i64 1
+  %tmp12147 = getelementptr inbounds float* %tmp12146, i64 1
+  %tmp12148 = getelementptr inbounds float* %tmp12147, i64 1
+  %tmp12149 = getelementptr inbounds float* %tmp12148, i64 1
+  %tmp12150 = getelementptr inbounds float* %tmp12149, i64 1
+  %tmp12151 = getelementptr inbounds float* %tmp12150, i64 1
+  %tmp12152 = getelementptr inbounds float* %tmp12151, i64 1
+  %tmp12153 = getelementptr inbounds float* %tmp12152, i64 1
+  %tmp12154 = getelementptr inbounds float* %tmp12153, i64 1
+  %tmp12155 = getelementptr inbounds float* %tmp12154, i64 1
+  %tmp12156 = getelementptr inbounds float* %tmp12155, i64 1
+  %tmp12157 = getelementptr inbounds float* %tmp12156, i64 1
+  %tmp12158 = getelementptr inbounds float* %tmp12157, i64 1
+  %tmp12159 = getelementptr inbounds float* %tmp12158, i64 1
+  %tmp12160 = getelementptr inbounds float* %tmp12159, i64 1
+  %tmp12161 = getelementptr inbounds float* %tmp12160, i64 1
+  %tmp12162 = getelementptr inbounds float* %tmp12161, i64 1
+  %tmp12163 = getelementptr inbounds float* %tmp12162, i64 1
+  %tmp12164 = getelementptr inbounds float* %tmp12163, i64 1
+  %tmp12165 = getelementptr inbounds float* %tmp12164, i64 1
+  %tmp12166 = getelementptr inbounds float* %tmp12165, i64 1
+  %tmp12167 = getelementptr inbounds float* %tmp12166, i64 1
+  %tmp12168 = getelementptr inbounds float* %tmp12167, i64 1
+  %tmp12169 = getelementptr inbounds float* %tmp12168, i64 1
+  %tmp12170 = getelementptr inbounds float* %tmp12169, i64 1
+  %tmp12171 = getelementptr inbounds float* %tmp12170, i64 1
+  %tmp12172 = getelementptr inbounds float* %tmp12171, i64 1
+  %tmp12173 = getelementptr inbounds float* %tmp12172, i64 1
+  %tmp12174 = getelementptr inbounds float* %tmp12173, i64 1
+  %tmp12175 = getelementptr inbounds float* %tmp12174, i64 1
+  %tmp12176 = getelementptr inbounds float* %tmp12175, i64 1
+  %tmp12177 = getelementptr inbounds float* %tmp12176, i64 1
+  %tmp12178 = getelementptr inbounds float* %tmp12177, i64 1
+  %tmp12179 = getelementptr inbounds float* %tmp12178, i64 1
+  %tmp12180 = getelementptr inbounds float* %tmp12179, i64 1
+  %tmp12181 = getelementptr inbounds float* %tmp12180, i64 1
+  %tmp12182 = getelementptr inbounds float* %tmp12181, i64 1
+  %tmp12183 = getelementptr inbounds float* %tmp12182, i64 1
+  %tmp12184 = getelementptr inbounds float* %tmp12183, i64 1
+  %tmp12185 = getelementptr inbounds float* %tmp12184, i64 1
+  %tmp12186 = getelementptr inbounds float* %tmp12185, i64 1
+  %tmp12187 = getelementptr inbounds float* %tmp12186, i64 1
+  %tmp12188 = getelementptr inbounds float* %tmp12187, i64 1
+  %tmp12189 = getelementptr inbounds float* %tmp12188, i64 1
+  %tmp12190 = getelementptr inbounds float* %tmp12189, i64 1
+  %tmp12191 = getelementptr inbounds float* %tmp12190, i64 1
+  %tmp12192 = getelementptr inbounds float* %tmp12191, i64 1
+  %tmp12193 = getelementptr inbounds float* %tmp12192, i64 1
+  %tmp12194 = getelementptr inbounds float* %tmp12193, i64 1
+  %tmp12195 = getelementptr inbounds float* %tmp12194, i64 1
+  %tmp12196 = getelementptr inbounds float* %tmp12195, i64 1
+  %tmp12197 = getelementptr inbounds float* %tmp12196, i64 1
+  %tmp12198 = getelementptr inbounds float* %tmp12197, i64 1
+  %tmp12199 = getelementptr inbounds float* %tmp12198, i64 1
+  %tmp12200 = getelementptr inbounds float* %tmp12199, i64 1
+  %tmp12201 = getelementptr inbounds float* %tmp12200, i64 1
+  %tmp12202 = getelementptr inbounds float* %tmp12201, i64 1
+  %tmp12203 = getelementptr inbounds float* %tmp12202, i64 1
+  %tmp12204 = getelementptr inbounds float* %tmp12203, i64 1
+  %tmp12205 = getelementptr inbounds float* %tmp12204, i64 1
+  %tmp12206 = getelementptr inbounds float* %tmp12205, i64 1
+  %tmp12207 = getelementptr inbounds float* %tmp12206, i64 1
+  %tmp12208 = getelementptr inbounds float* %tmp12207, i64 1
+  %tmp12209 = getelementptr inbounds float* %tmp12208, i64 1
+  %tmp12210 = getelementptr inbounds float* %tmp12209, i64 1
+  %tmp12211 = getelementptr inbounds float* %tmp12210, i64 1
+  %tmp12212 = getelementptr inbounds float* %tmp12211, i64 1
+  %tmp12213 = getelementptr inbounds float* %tmp12212, i64 1
+  %tmp12214 = getelementptr inbounds float* %tmp12213, i64 1
+  %tmp12215 = getelementptr inbounds float* %tmp12214, i64 1
+  %tmp12216 = getelementptr inbounds float* %tmp12215, i64 1
+  %tmp12217 = getelementptr inbounds float* %tmp12216, i64 1
+  %tmp12218 = getelementptr inbounds float* %tmp12217, i64 1
+  %tmp12219 = getelementptr inbounds float* %tmp12218, i64 1
+  %tmp12220 = getelementptr inbounds float* %tmp12219, i64 1
+  %tmp12221 = getelementptr inbounds float* %tmp12220, i64 1
+  %tmp12222 = getelementptr inbounds float* %tmp12221, i64 1
+  %tmp12223 = getelementptr inbounds float* %tmp12222, i64 1
+  %tmp12224 = getelementptr inbounds float* %tmp12223, i64 1
+  %tmp12225 = getelementptr inbounds float* %tmp12224, i64 1
+  %tmp12226 = getelementptr inbounds float* %tmp12225, i64 1
+  %tmp12227 = getelementptr inbounds float* %tmp12226, i64 1
+  %tmp12228 = getelementptr inbounds float* %tmp12227, i64 1
+  %tmp12229 = getelementptr inbounds float* %tmp12228, i64 1
+  %tmp12230 = getelementptr inbounds float* %tmp12229, i64 1
+  %tmp12231 = getelementptr inbounds float* %tmp12230, i64 1
+  %tmp12232 = getelementptr inbounds float* %tmp12231, i64 1
+  %tmp12233 = getelementptr inbounds float* %tmp12232, i64 1
+  %tmp12234 = getelementptr inbounds float* %tmp12233, i64 1
+  %tmp12235 = getelementptr inbounds float* %tmp12234, i64 1
+  %tmp12236 = getelementptr inbounds float* %tmp12235, i64 1
+  %tmp12237 = getelementptr inbounds float* %tmp12236, i64 1
+  %tmp12238 = getelementptr inbounds float* %tmp12237, i64 1
+  %tmp12239 = getelementptr inbounds float* %tmp12238, i64 1
+  %tmp12240 = getelementptr inbounds float* %tmp12239, i64 1
+  %tmp12241 = getelementptr inbounds float* %tmp12240, i64 1
+  %tmp12242 = getelementptr inbounds float* %tmp12241, i64 1
+  %tmp12243 = getelementptr inbounds float* %tmp12242, i64 1
+  %tmp12244 = getelementptr inbounds float* %tmp12243, i64 1
+  %tmp12245 = getelementptr inbounds float* %tmp12244, i64 1
+  %tmp12246 = getelementptr inbounds float* %tmp12245, i64 1
+  %tmp12247 = getelementptr inbounds float* %tmp12246, i64 1
+  %tmp12248 = getelementptr inbounds float* %tmp12247, i64 1
+  %tmp12249 = getelementptr inbounds float* %tmp12248, i64 1
+  %tmp12250 = getelementptr inbounds float* %tmp12249, i64 1
+  %tmp12251 = getelementptr inbounds float* %tmp12250, i64 1
+  %tmp12252 = getelementptr inbounds float* %tmp12251, i64 1
+  %tmp12253 = getelementptr inbounds float* %tmp12252, i64 1
+  %tmp12254 = getelementptr inbounds float* %tmp12253, i64 1
+  %tmp12255 = getelementptr inbounds float* %tmp12254, i64 1
+  %tmp12256 = getelementptr inbounds float* %tmp12255, i64 1
+  %tmp12257 = getelementptr inbounds float* %tmp12256, i64 1
+  %tmp12258 = getelementptr inbounds float* %tmp12257, i64 1
+  %tmp12259 = getelementptr inbounds float* %tmp12258, i64 1
+  %tmp12260 = getelementptr inbounds float* %tmp12259, i64 1
+  %tmp12261 = getelementptr inbounds float* %tmp12260, i64 1
+  %tmp12262 = getelementptr inbounds float* %tmp12261, i64 1
+  %tmp12263 = getelementptr inbounds float* %tmp12262, i64 1
+  %tmp12264 = getelementptr inbounds float* %tmp12263, i64 1
+  %tmp12265 = getelementptr inbounds float* %tmp12264, i64 1
+  %tmp12266 = getelementptr inbounds float* %tmp12265, i64 1
+  %tmp12267 = getelementptr inbounds float* %tmp12266, i64 1
+  %tmp12268 = getelementptr inbounds float* %tmp12267, i64 1
+  %tmp12269 = getelementptr inbounds float* %tmp12268, i64 1
+  %tmp12270 = getelementptr inbounds float* %tmp12269, i64 1
+  %tmp12271 = getelementptr inbounds float* %tmp12270, i64 1
+  %tmp12272 = getelementptr inbounds float* %tmp12271, i64 1
+  %tmp12273 = getelementptr inbounds float* %tmp12272, i64 1
+  %tmp12274 = getelementptr inbounds float* %tmp12273, i64 1
+  %tmp12275 = getelementptr inbounds float* %tmp12274, i64 1
+  %tmp12276 = getelementptr inbounds float* %tmp12275, i64 1
+  %tmp12277 = getelementptr inbounds float* %tmp12276, i64 1
+  %tmp12278 = getelementptr inbounds float* %tmp12277, i64 1
+  %tmp12279 = getelementptr inbounds float* %tmp12278, i64 1
+  %tmp12280 = getelementptr inbounds float* %tmp12279, i64 1
+  %tmp12281 = getelementptr inbounds float* %tmp12280, i64 1
+  %tmp12282 = getelementptr inbounds float* %tmp12281, i64 1
+  %tmp12283 = getelementptr inbounds float* %tmp12282, i64 1
+  %tmp12284 = getelementptr inbounds float* %tmp12283, i64 1
+  %tmp12285 = getelementptr inbounds float* %tmp12284, i64 1
+  %tmp12286 = getelementptr inbounds float* %tmp12285, i64 1
+  %tmp12287 = getelementptr inbounds float* %tmp12286, i64 1
+  %tmp12288 = getelementptr inbounds float* %tmp12287, i64 1
+  %tmp12289 = getelementptr inbounds float* %tmp12288, i64 1
+  %tmp12290 = getelementptr inbounds float* %tmp12289, i64 1
+  %tmp12291 = getelementptr inbounds float* %tmp12290, i64 1
+  %tmp12292 = getelementptr inbounds float* %tmp12291, i64 1
+  %tmp12293 = getelementptr inbounds float* %tmp12292, i64 1
+  %tmp12294 = getelementptr inbounds float* %tmp12293, i64 1
+  %tmp12295 = getelementptr inbounds float* %tmp12294, i64 1
+  %tmp12296 = getelementptr inbounds float* %tmp12295, i64 1
+  %tmp12297 = getelementptr inbounds float* %tmp12296, i64 1
+  %tmp12298 = getelementptr inbounds float* %tmp12297, i64 1
+  %tmp12299 = getelementptr inbounds float* %tmp12298, i64 1
+  %tmp12300 = getelementptr inbounds float* %tmp12299, i64 1
+  %tmp12301 = getelementptr inbounds float* %tmp12300, i64 1
+  %tmp12302 = getelementptr inbounds float* %tmp12301, i64 1
+  %tmp12303 = getelementptr inbounds float* %tmp12302, i64 1
+  %tmp12304 = getelementptr inbounds float* %tmp12303, i64 1
+  %tmp12305 = getelementptr inbounds float* %tmp12304, i64 1
+  %tmp12306 = getelementptr inbounds float* %tmp12305, i64 1
+  %tmp12307 = getelementptr inbounds float* %tmp12306, i64 1
+  %tmp12308 = getelementptr inbounds float* %tmp12307, i64 1
+  %tmp12309 = getelementptr inbounds float* %tmp12308, i64 1
+  %tmp12310 = getelementptr inbounds float* %tmp12309, i64 1
+  %tmp12311 = getelementptr inbounds float* %tmp12310, i64 1
+  %tmp12312 = getelementptr inbounds float* %tmp12311, i64 1
+  %tmp12313 = getelementptr inbounds float* %tmp12312, i64 1
+  %tmp12314 = getelementptr inbounds float* %tmp12313, i64 1
+  %tmp12315 = getelementptr inbounds float* %tmp12314, i64 1
+  %tmp12316 = getelementptr inbounds float* %tmp12315, i64 1
+  %tmp12317 = getelementptr inbounds float* %tmp12316, i64 1
+  %tmp12318 = getelementptr inbounds float* %tmp12317, i64 1
+  %tmp12319 = getelementptr inbounds float* %tmp12318, i64 1
+  %tmp12320 = getelementptr inbounds float* %tmp12319, i64 1
+  %tmp12321 = getelementptr inbounds float* %tmp12320, i64 1
+  %tmp12322 = getelementptr inbounds float* %tmp12321, i64 1
+  %tmp12323 = getelementptr inbounds float* %tmp12322, i64 1
+  %tmp12324 = getelementptr inbounds float* %tmp12323, i64 1
+  %tmp12325 = getelementptr inbounds float* %tmp12324, i64 1
+  %tmp12326 = getelementptr inbounds float* %tmp12325, i64 1
+  %tmp12327 = getelementptr inbounds float* %tmp12326, i64 1
+  %tmp12328 = getelementptr inbounds float* %tmp12327, i64 1
+  %tmp12329 = getelementptr inbounds float* %tmp12328, i64 1
+  %tmp12330 = getelementptr inbounds float* %tmp12329, i64 1
+  %tmp12331 = getelementptr inbounds float* %tmp12330, i64 1
+  %tmp12332 = getelementptr inbounds float* %tmp12331, i64 1
+  %tmp12333 = getelementptr inbounds float* %tmp12332, i64 1
+  %tmp12334 = getelementptr inbounds float* %tmp12333, i64 1
+  %tmp12335 = getelementptr inbounds float* %tmp12334, i64 1
+  %tmp12336 = getelementptr inbounds float* %tmp12335, i64 1
+  %tmp12337 = getelementptr inbounds float* %tmp12336, i64 1
+  %tmp12338 = getelementptr inbounds float* %tmp12337, i64 1
+  %tmp12339 = getelementptr inbounds float* %tmp12338, i64 1
+  %tmp12340 = getelementptr inbounds float* %tmp12339, i64 1
+  %tmp12341 = getelementptr inbounds float* %tmp12340, i64 1
+  %tmp12342 = getelementptr inbounds float* %tmp12341, i64 1
+  %tmp12343 = getelementptr inbounds float* %tmp12342, i64 1
+  %tmp12344 = getelementptr inbounds float* %tmp12343, i64 1
+  %tmp12345 = getelementptr inbounds float* %tmp12344, i64 1
+  %tmp12346 = getelementptr inbounds float* %tmp12345, i64 1
+  %tmp12347 = getelementptr inbounds float* %tmp12346, i64 1
+  %tmp12348 = getelementptr inbounds float* %tmp12347, i64 1
+  %tmp12349 = getelementptr inbounds float* %tmp12348, i64 1
+  %tmp12350 = getelementptr inbounds float* %tmp12349, i64 1
+  %tmp12351 = getelementptr inbounds float* %tmp12350, i64 1
+  %tmp12352 = getelementptr inbounds float* %tmp12351, i64 1
+  %tmp12353 = getelementptr inbounds float* %tmp12352, i64 1
+  %tmp12354 = getelementptr inbounds float* %tmp12353, i64 1
+  %tmp12355 = getelementptr inbounds float* %tmp12354, i64 1
+  %tmp12356 = getelementptr inbounds float* %tmp12355, i64 1
+  %tmp12357 = getelementptr inbounds float* %tmp12356, i64 1
+  %tmp12358 = getelementptr inbounds float* %tmp12357, i64 1
+  %tmp12359 = getelementptr inbounds float* %tmp12358, i64 1
+  %tmp12360 = getelementptr inbounds float* %tmp12359, i64 1
+  %tmp12361 = getelementptr inbounds float* %tmp12360, i64 1
+  %tmp12362 = getelementptr inbounds float* %tmp12361, i64 1
+  %tmp12363 = getelementptr inbounds float* %tmp12362, i64 1
+  %tmp12364 = getelementptr inbounds float* %tmp12363, i64 1
+  %tmp12365 = getelementptr inbounds float* %tmp12364, i64 1
+  %tmp12366 = getelementptr inbounds float* %tmp12365, i64 1
+  %tmp12367 = getelementptr inbounds float* %tmp12366, i64 1
+  %tmp12368 = getelementptr inbounds float* %tmp12367, i64 1
+  %tmp12369 = getelementptr inbounds float* %tmp12368, i64 1
+  %tmp12370 = getelementptr inbounds float* %tmp12369, i64 1
+  %tmp12371 = getelementptr inbounds float* %tmp12370, i64 1
+  %tmp12372 = getelementptr inbounds float* %tmp12371, i64 1
+  %tmp12373 = getelementptr inbounds float* %tmp12372, i64 1
+  %tmp12374 = getelementptr inbounds float* %tmp12373, i64 1
+  %tmp12375 = getelementptr inbounds float* %tmp12374, i64 1
+  %tmp12376 = getelementptr inbounds float* %tmp12375, i64 1
+  %tmp12377 = getelementptr inbounds float* %tmp12376, i64 1
+  %tmp12378 = getelementptr inbounds float* %tmp12377, i64 1
+  %tmp12379 = getelementptr inbounds float* %tmp12378, i64 1
+  %tmp12380 = getelementptr inbounds float* %tmp12379, i64 1
+  %tmp12381 = getelementptr inbounds float* %tmp12380, i64 1
+  %tmp12382 = getelementptr inbounds float* %tmp12381, i64 1
+  %tmp12383 = getelementptr inbounds float* %tmp12382, i64 1
+  %tmp12384 = getelementptr inbounds float* %tmp12383, i64 1
+  %tmp12385 = getelementptr inbounds float* %tmp12384, i64 1
+  %tmp12386 = getelementptr inbounds float* %tmp12385, i64 1
+  %tmp12387 = getelementptr inbounds float* %tmp12386, i64 1
+  %tmp12388 = getelementptr inbounds float* %tmp12387, i64 1
+  %tmp12389 = getelementptr inbounds float* %tmp12388, i64 1
+  %tmp12390 = getelementptr inbounds float* %tmp12389, i64 1
+  %tmp12391 = getelementptr inbounds float* %tmp12390, i64 1
+  %tmp12392 = getelementptr inbounds float* %tmp12391, i64 1
+  %tmp12393 = getelementptr inbounds float* %tmp12392, i64 1
+  %tmp12394 = getelementptr inbounds float* %tmp12393, i64 1
+  %tmp12395 = getelementptr inbounds float* %tmp12394, i64 1
+  %tmp12396 = getelementptr inbounds float* %tmp12395, i64 1
+  %tmp12397 = getelementptr inbounds float* %tmp12396, i64 1
+  %tmp12398 = getelementptr inbounds float* %tmp12397, i64 1
+  %tmp12399 = getelementptr inbounds float* %tmp12398, i64 1
+  %tmp12400 = getelementptr inbounds float* %tmp12399, i64 1
+  %tmp12401 = getelementptr inbounds float* %tmp12400, i64 1
+  %tmp12402 = getelementptr inbounds float* %tmp12401, i64 1
+  %tmp12403 = getelementptr inbounds float* %tmp12402, i64 1
+  %tmp12404 = getelementptr inbounds float* %tmp12403, i64 1
+  %tmp12405 = getelementptr inbounds float* %tmp12404, i64 1
+  %tmp12406 = getelementptr inbounds float* %tmp12405, i64 1
+  %tmp12407 = getelementptr inbounds float* %tmp12406, i64 1
+  %tmp12408 = getelementptr inbounds float* %tmp12407, i64 1
+  %tmp12409 = getelementptr inbounds float* %tmp12408, i64 1
+  %tmp12410 = getelementptr inbounds float* %tmp12409, i64 1
+  %tmp12411 = getelementptr inbounds float* %tmp12410, i64 1
+  %tmp12412 = getelementptr inbounds float* %tmp12411, i64 1
+  %tmp12413 = getelementptr inbounds float* %tmp12412, i64 1
+  %tmp12414 = getelementptr inbounds float* %tmp12413, i64 1
+  %tmp12415 = getelementptr inbounds float* %tmp12414, i64 1
+  %tmp12416 = getelementptr inbounds float* %tmp12415, i64 1
+  %tmp12417 = getelementptr inbounds float* %tmp12416, i64 1
+  %tmp12418 = getelementptr inbounds float* %tmp12417, i64 1
+  %tmp12419 = getelementptr inbounds float* %tmp12418, i64 1
+  %tmp12420 = getelementptr inbounds float* %tmp12419, i64 1
+  %tmp12421 = getelementptr inbounds float* %tmp12420, i64 1
+  %tmp12422 = getelementptr inbounds float* %tmp12421, i64 1
+  %tmp12423 = getelementptr inbounds float* %tmp12422, i64 1
+  %tmp12424 = getelementptr inbounds float* %tmp12423, i64 1
+  %tmp12425 = getelementptr inbounds float* %tmp12424, i64 1
+  %tmp12426 = getelementptr inbounds float* %tmp12425, i64 1
+  %tmp12427 = getelementptr inbounds float* %tmp12426, i64 1
+  %tmp12428 = getelementptr inbounds float* %tmp12427, i64 1
+  %tmp12429 = getelementptr inbounds float* %tmp12428, i64 1
+  %tmp12430 = getelementptr inbounds float* %tmp12429, i64 1
+  %tmp12431 = getelementptr inbounds float* %tmp12430, i64 1
+  %tmp12432 = getelementptr inbounds float* %tmp12431, i64 1
+  %tmp12433 = getelementptr inbounds float* %tmp12432, i64 1
+  %tmp12434 = getelementptr inbounds float* %tmp12433, i64 1
+  %tmp12435 = getelementptr inbounds float* %tmp12434, i64 1
+  %tmp12436 = getelementptr inbounds float* %tmp12435, i64 1
+  %tmp12437 = getelementptr inbounds float* %tmp12436, i64 1
+  %tmp12438 = getelementptr inbounds float* %tmp12437, i64 1
+  %tmp12439 = getelementptr inbounds float* %tmp12438, i64 1
+  %tmp12440 = getelementptr inbounds float* %tmp12439, i64 1
+  %tmp12441 = getelementptr inbounds float* %tmp12440, i64 1
+  %tmp12442 = getelementptr inbounds float* %tmp12441, i64 1
+  %tmp12443 = getelementptr inbounds float* %tmp12442, i64 1
+  %tmp12444 = getelementptr inbounds float* %tmp12443, i64 1
+  %tmp12445 = getelementptr inbounds float* %tmp12444, i64 1
+  %tmp12446 = getelementptr inbounds float* %tmp12445, i64 1
+  %tmp12447 = getelementptr inbounds float* %tmp12446, i64 1
+  %tmp12448 = getelementptr inbounds float* %tmp12447, i64 1
+  %tmp12449 = getelementptr inbounds float* %tmp12448, i64 1
+  %tmp12450 = getelementptr inbounds float* %tmp12449, i64 1
+  %tmp12451 = getelementptr inbounds float* %tmp12450, i64 1
+  %tmp12452 = getelementptr inbounds float* %tmp12451, i64 1
+  %tmp12453 = getelementptr inbounds float* %tmp12452, i64 1
+  %tmp12454 = getelementptr inbounds float* %tmp12453, i64 1
+  %tmp12455 = getelementptr inbounds float* %tmp12454, i64 1
+  %tmp12456 = getelementptr inbounds float* %tmp12455, i64 1
+  %tmp12457 = getelementptr inbounds float* %tmp12456, i64 1
+  %tmp12458 = getelementptr inbounds float* %tmp12457, i64 1
+  %tmp12459 = getelementptr inbounds float* %tmp12458, i64 1
+  %tmp12460 = getelementptr inbounds float* %tmp12459, i64 1
+  %tmp12461 = getelementptr inbounds float* %tmp12460, i64 1
+  %tmp12462 = getelementptr inbounds float* %tmp12461, i64 1
+  %tmp12463 = getelementptr inbounds float* %tmp12462, i64 1
+  %tmp12464 = getelementptr inbounds float* %tmp12463, i64 1
+  %tmp12465 = getelementptr inbounds float* %tmp12464, i64 1
+  %tmp12466 = getelementptr inbounds float* %tmp12465, i64 1
+  %tmp12467 = getelementptr inbounds float* %tmp12466, i64 1
+  %tmp12468 = getelementptr inbounds float* %tmp12467, i64 1
+  %tmp12469 = getelementptr inbounds float* %tmp12468, i64 1
+  %tmp12470 = getelementptr inbounds float* %tmp12469, i64 1
+  %tmp12471 = getelementptr inbounds float* %tmp12470, i64 1
+  %tmp12472 = getelementptr inbounds float* %tmp12471, i64 1
+  %tmp12473 = getelementptr inbounds float* %tmp12472, i64 1
+  %tmp12474 = getelementptr inbounds float* %tmp12473, i64 1
+  %tmp12475 = getelementptr inbounds float* %tmp12474, i64 1
+  %tmp12476 = getelementptr inbounds float* %tmp12475, i64 1
+  %tmp12477 = getelementptr inbounds float* %tmp12476, i64 1
+  %tmp12478 = getelementptr inbounds float* %tmp12477, i64 1
+  %tmp12479 = getelementptr inbounds float* %tmp12478, i64 1
+  %tmp12480 = getelementptr inbounds float* %tmp12479, i64 1
+  %tmp12481 = getelementptr inbounds float* %tmp12480, i64 1
+  %tmp12482 = getelementptr inbounds float* %tmp12481, i64 1
+  %tmp12483 = getelementptr inbounds float* %tmp12482, i64 1
+  %tmp12484 = getelementptr inbounds float* %tmp12483, i64 1
+  %tmp12485 = getelementptr inbounds float* %tmp12484, i64 1
+  %tmp12486 = getelementptr inbounds float* %tmp12485, i64 1
+  %tmp12487 = getelementptr inbounds float* %tmp12486, i64 1
+  %tmp12488 = getelementptr inbounds float* %tmp12487, i64 1
+  %tmp12489 = getelementptr inbounds float* %tmp12488, i64 1
+  %tmp12490 = getelementptr inbounds float* %tmp12489, i64 1
+  %tmp12491 = getelementptr inbounds float* %tmp12490, i64 1
+  %tmp12492 = getelementptr inbounds float* %tmp12491, i64 1
+  %tmp12493 = getelementptr inbounds float* %tmp12492, i64 1
+  %tmp12494 = getelementptr inbounds float* %tmp12493, i64 1
+  %tmp12495 = getelementptr inbounds float* %tmp12494, i64 1
+  %tmp12496 = getelementptr inbounds float* %tmp12495, i64 1
+  %tmp12497 = getelementptr inbounds float* %tmp12496, i64 1
+  %tmp12498 = getelementptr inbounds float* %tmp12497, i64 1
+  %tmp12499 = getelementptr inbounds float* %tmp12498, i64 1
+  %tmp12500 = getelementptr inbounds float* %tmp12499, i64 1
+  %tmp12501 = getelementptr inbounds float* %tmp12500, i64 1
+  %tmp12502 = getelementptr inbounds float* %tmp12501, i64 1
+  %tmp12503 = getelementptr inbounds float* %tmp12502, i64 1
+  %tmp12504 = getelementptr inbounds float* %tmp12503, i64 1
+  %tmp12505 = getelementptr inbounds float* %tmp12504, i64 1
+  %tmp12506 = getelementptr inbounds float* %tmp12505, i64 1
+  %tmp12507 = getelementptr inbounds float* %tmp12506, i64 1
+  %tmp12508 = getelementptr inbounds float* %tmp12507, i64 1
+  %tmp12509 = getelementptr inbounds float* %tmp12508, i64 1
+  %tmp12510 = getelementptr inbounds float* %tmp12509, i64 1
+  %tmp12511 = getelementptr inbounds float* %tmp12510, i64 1
+  %tmp12512 = getelementptr inbounds float* %tmp12511, i64 1
+  %tmp12513 = getelementptr inbounds float* %tmp12512, i64 1
+  %tmp12514 = getelementptr inbounds float* %tmp12513, i64 1
+  %tmp12515 = getelementptr inbounds float* %tmp12514, i64 1
+  %tmp12516 = getelementptr inbounds float* %tmp12515, i64 1
+  %tmp12517 = getelementptr inbounds float* %tmp12516, i64 1
+  %tmp12518 = getelementptr inbounds float* %tmp12517, i64 1
+  %tmp12519 = getelementptr inbounds float* %tmp12518, i64 1
+  %tmp12520 = getelementptr inbounds float* %tmp12519, i64 1
+  %tmp12521 = getelementptr inbounds float* %tmp12520, i64 1
+  %tmp12522 = getelementptr inbounds float* %tmp12521, i64 1
+  %tmp12523 = getelementptr inbounds float* %tmp12522, i64 1
+  %tmp12524 = getelementptr inbounds float* %tmp12523, i64 1
+  %tmp12525 = getelementptr inbounds float* %tmp12524, i64 1
+  %tmp12526 = getelementptr inbounds float* %tmp12525, i64 1
+  %tmp12527 = getelementptr inbounds float* %tmp12526, i64 1
+  %tmp12528 = getelementptr inbounds float* %tmp12527, i64 1
+  %tmp12529 = getelementptr inbounds float* %tmp12528, i64 1
+  %tmp12530 = getelementptr inbounds float* %tmp12529, i64 1
+  %tmp12531 = getelementptr inbounds float* %tmp12530, i64 1
+  %tmp12532 = getelementptr inbounds float* %tmp12531, i64 1
+  %tmp12533 = getelementptr inbounds float* %tmp12532, i64 1
+  %tmp12534 = getelementptr inbounds float* %tmp12533, i64 1
+  %tmp12535 = getelementptr inbounds float* %tmp12534, i64 1
+  %tmp12536 = getelementptr inbounds float* %tmp12535, i64 1
+  %tmp12537 = getelementptr inbounds float* %tmp12536, i64 1
+  %tmp12538 = getelementptr inbounds float* %tmp12537, i64 1
+  %tmp12539 = getelementptr inbounds float* %tmp12538, i64 1
+  %tmp12540 = getelementptr inbounds float* %tmp12539, i64 1
+  %tmp12541 = getelementptr inbounds float* %tmp12540, i64 1
+  %tmp12542 = getelementptr inbounds float* %tmp12541, i64 1
+  %tmp12543 = getelementptr inbounds float* %tmp12542, i64 1
+  %tmp12544 = getelementptr inbounds float* %tmp12543, i64 1
+  %tmp12545 = getelementptr inbounds float* %tmp12544, i64 1
+  %tmp12546 = getelementptr inbounds float* %tmp12545, i64 1
+  %tmp12547 = getelementptr inbounds float* %tmp12546, i64 1
+  %tmp12548 = getelementptr inbounds float* %tmp12547, i64 1
+  %tmp12549 = getelementptr inbounds float* %tmp12548, i64 1
+  %tmp12550 = getelementptr inbounds float* %tmp12549, i64 1
+  %tmp12551 = getelementptr inbounds float* %tmp12550, i64 1
+  %tmp12552 = getelementptr inbounds float* %tmp12551, i64 1
+  %tmp12553 = getelementptr inbounds float* %tmp12552, i64 1
+  %tmp12554 = getelementptr inbounds float* %tmp12553, i64 1
+  %tmp12555 = getelementptr inbounds float* %tmp12554, i64 1
+  %tmp12556 = getelementptr inbounds float* %tmp12555, i64 1
+  %tmp12557 = getelementptr inbounds float* %tmp12556, i64 1
+  %tmp12558 = getelementptr inbounds float* %tmp12557, i64 1
+  %tmp12559 = getelementptr inbounds float* %tmp12558, i64 1
+  %tmp12560 = getelementptr inbounds float* %tmp12559, i64 1
+  %tmp12561 = getelementptr inbounds float* %tmp12560, i64 1
+  %tmp12562 = getelementptr inbounds float* %tmp12561, i64 1
+  %tmp12563 = getelementptr inbounds float* %tmp12562, i64 1
+  %tmp12564 = getelementptr inbounds float* %tmp12563, i64 1
+  %tmp12565 = getelementptr inbounds float* %tmp12564, i64 1
+  %tmp12566 = getelementptr inbounds float* %tmp12565, i64 1
+  %tmp12567 = getelementptr inbounds float* %tmp12566, i64 1
+  %tmp12568 = getelementptr inbounds float* %tmp12567, i64 1
+  %tmp12569 = getelementptr inbounds float* %tmp12568, i64 1
+  %tmp12570 = getelementptr inbounds float* %tmp12569, i64 1
+  %tmp12571 = getelementptr inbounds float* %tmp12570, i64 1
+  %tmp12572 = getelementptr inbounds float* %tmp12571, i64 1
+  %tmp12573 = getelementptr inbounds float* %tmp12572, i64 1
+  %tmp12574 = getelementptr inbounds float* %tmp12573, i64 1
+  %tmp12575 = getelementptr inbounds float* %tmp12574, i64 1
+  %tmp12576 = getelementptr inbounds float* %tmp12575, i64 1
+  %tmp12577 = getelementptr inbounds float* %tmp12576, i64 1
+  %tmp12578 = getelementptr inbounds float* %tmp12577, i64 1
+  %tmp12579 = getelementptr inbounds float* %tmp12578, i64 1
+  %tmp12580 = getelementptr inbounds float* %tmp12579, i64 1
+  %tmp12581 = getelementptr inbounds float* %tmp12580, i64 1
+  %tmp12582 = getelementptr inbounds float* %tmp12581, i64 1
+  %tmp12583 = getelementptr inbounds float* %tmp12582, i64 1
+  %tmp12584 = getelementptr inbounds float* %tmp12583, i64 1
+  %tmp12585 = getelementptr inbounds float* %tmp12584, i64 1
+  %tmp12586 = getelementptr inbounds float* %tmp12585, i64 1
+  %tmp12587 = getelementptr inbounds float* %tmp12586, i64 1
+  %tmp12588 = getelementptr inbounds float* %tmp12587, i64 1
+  %tmp12589 = getelementptr inbounds float* %tmp12588, i64 1
+  %tmp12590 = getelementptr inbounds float* %tmp12589, i64 1
+  %tmp12591 = getelementptr inbounds float* %tmp12590, i64 1
+  %tmp12592 = getelementptr inbounds float* %tmp12591, i64 1
+  %tmp12593 = getelementptr inbounds float* %tmp12592, i64 1
+  %tmp12594 = getelementptr inbounds float* %tmp12593, i64 1
+  %tmp12595 = getelementptr inbounds float* %tmp12594, i64 1
+  %tmp12596 = getelementptr inbounds float* %tmp12595, i64 1
+  %tmp12597 = getelementptr inbounds float* %tmp12596, i64 1
+  %tmp12598 = getelementptr inbounds float* %tmp12597, i64 1
+  %tmp12599 = getelementptr inbounds float* %tmp12598, i64 1
+  %tmp12600 = getelementptr inbounds float* %tmp12599, i64 1
+  %tmp12601 = getelementptr inbounds float* %tmp12600, i64 1
+  %tmp12602 = getelementptr inbounds float* %tmp12601, i64 1
+  %tmp12603 = getelementptr inbounds float* %tmp12602, i64 1
+  %tmp12604 = getelementptr inbounds float* %tmp12603, i64 1
+  %tmp12605 = getelementptr inbounds float* %tmp12604, i64 1
+  %tmp12606 = getelementptr inbounds float* %tmp12605, i64 1
+  %tmp12607 = getelementptr inbounds float* %tmp12606, i64 1
+  %tmp12608 = getelementptr inbounds float* %tmp12607, i64 1
+  %tmp12609 = getelementptr inbounds float* %tmp12608, i64 1
+  %tmp12610 = getelementptr inbounds float* %tmp12609, i64 1
+  %tmp12611 = getelementptr inbounds float* %tmp12610, i64 1
+  %tmp12612 = getelementptr inbounds float* %tmp12611, i64 1
+  %tmp12613 = getelementptr inbounds float* %tmp12612, i64 1
+  %tmp12614 = getelementptr inbounds float* %tmp12613, i64 1
+  %tmp12615 = getelementptr inbounds float* %tmp12614, i64 1
+  %tmp12616 = getelementptr inbounds float* %tmp12615, i64 1
+  %tmp12617 = getelementptr inbounds float* %tmp12616, i64 1
+  %tmp12618 = getelementptr inbounds float* %tmp12617, i64 1
+  %tmp12619 = getelementptr inbounds float* %tmp12618, i64 1
+  %tmp12620 = getelementptr inbounds float* %tmp12619, i64 1
+  %tmp12621 = getelementptr inbounds float* %tmp12620, i64 1
+  %tmp12622 = getelementptr inbounds float* %tmp12621, i64 1
+  %tmp12623 = getelementptr inbounds float* %tmp12622, i64 1
+  %tmp12624 = getelementptr inbounds float* %tmp12623, i64 1
+  %tmp12625 = getelementptr inbounds float* %tmp12624, i64 1
+  %tmp12626 = getelementptr inbounds float* %tmp12625, i64 1
+  %tmp12627 = getelementptr inbounds float* %tmp12626, i64 1
+  %tmp12628 = getelementptr inbounds float* %tmp12627, i64 1
+  %tmp12629 = getelementptr inbounds float* %tmp12628, i64 1
+  %tmp12630 = getelementptr inbounds float* %tmp12629, i64 1
+  %tmp12631 = getelementptr inbounds float* %tmp12630, i64 1
+  %tmp12632 = getelementptr inbounds float* %tmp12631, i64 1
+  %tmp12633 = getelementptr inbounds float* %tmp12632, i64 1
+  %tmp12634 = getelementptr inbounds float* %tmp12633, i64 1
+  %tmp12635 = getelementptr inbounds float* %tmp12634, i64 1
+  %tmp12636 = getelementptr inbounds float* %tmp12635, i64 1
+  %tmp12637 = getelementptr inbounds float* %tmp12636, i64 1
+  %tmp12638 = getelementptr inbounds float* %tmp12637, i64 1
+  %tmp12639 = getelementptr inbounds float* %tmp12638, i64 1
+  %tmp12640 = getelementptr inbounds float* %tmp12639, i64 1
+  %tmp12641 = getelementptr inbounds float* %tmp12640, i64 1
+  %tmp12642 = getelementptr inbounds float* %tmp12641, i64 1
+  %tmp12643 = getelementptr inbounds float* %tmp12642, i64 1
+  %tmp12644 = getelementptr inbounds float* %tmp12643, i64 1
+  %tmp12645 = getelementptr inbounds float* %tmp12644, i64 1
+  %tmp12646 = getelementptr inbounds float* %tmp12645, i64 1
+  %tmp12647 = getelementptr inbounds float* %tmp12646, i64 1
+  %tmp12648 = getelementptr inbounds float* %tmp12647, i64 1
+  %tmp12649 = getelementptr inbounds float* %tmp12648, i64 1
+  %tmp12650 = getelementptr inbounds float* %tmp12649, i64 1
+  %tmp12651 = getelementptr inbounds float* %tmp12650, i64 1
+  %tmp12652 = getelementptr inbounds float* %tmp12651, i64 1
+  %tmp12653 = getelementptr inbounds float* %tmp12652, i64 1
+  %tmp12654 = getelementptr inbounds float* %tmp12653, i64 1
+  %tmp12655 = getelementptr inbounds float* %tmp12654, i64 1
+  %tmp12656 = getelementptr inbounds float* %tmp12655, i64 1
+  %tmp12657 = getelementptr inbounds float* %tmp12656, i64 1
+  %tmp12658 = getelementptr inbounds float* %tmp12657, i64 1
+  %tmp12659 = getelementptr inbounds float* %tmp12658, i64 1
+  %tmp12660 = getelementptr inbounds float* %tmp12659, i64 1
+  %tmp12661 = getelementptr inbounds float* %tmp12660, i64 1
+  %tmp12662 = getelementptr inbounds float* %tmp12661, i64 1
+  %tmp12663 = getelementptr inbounds float* %tmp12662, i64 1
+  %tmp12664 = getelementptr inbounds float* %tmp12663, i64 1
+  %tmp12665 = getelementptr inbounds float* %tmp12664, i64 1
+  %tmp12666 = getelementptr inbounds float* %tmp12665, i64 1
+  %tmp12667 = getelementptr inbounds float* %tmp12666, i64 1
+  %tmp12668 = getelementptr inbounds float* %tmp12667, i64 1
+  %tmp12669 = getelementptr inbounds float* %tmp12668, i64 1
+  %tmp12670 = getelementptr inbounds float* %tmp12669, i64 1
+  %tmp12671 = getelementptr inbounds float* %tmp12670, i64 1
+  %tmp12672 = getelementptr inbounds float* %tmp12671, i64 1
+  %tmp12673 = getelementptr inbounds float* %tmp12672, i64 1
+  %tmp12674 = getelementptr inbounds float* %tmp12673, i64 1
+  %tmp12675 = getelementptr inbounds float* %tmp12674, i64 1
+  %tmp12676 = getelementptr inbounds float* %tmp12675, i64 1
+  %tmp12677 = getelementptr inbounds float* %tmp12676, i64 1
+  %tmp12678 = getelementptr inbounds float* %tmp12677, i64 1
+  %tmp12679 = getelementptr inbounds float* %tmp12678, i64 1
+  %tmp12680 = getelementptr inbounds float* %tmp12679, i64 1
+  %tmp12681 = getelementptr inbounds float* %tmp12680, i64 1
+  %tmp12682 = getelementptr inbounds float* %tmp12681, i64 1
+  %tmp12683 = getelementptr inbounds float* %tmp12682, i64 1
+  %tmp12684 = getelementptr inbounds float* %tmp12683, i64 1
+  %tmp12685 = getelementptr inbounds float* %tmp12684, i64 1
+  %tmp12686 = getelementptr inbounds float* %tmp12685, i64 1
+  %tmp12687 = getelementptr inbounds float* %tmp12686, i64 1
+  %tmp12688 = getelementptr inbounds float* %tmp12687, i64 1
+  %tmp12689 = getelementptr inbounds float* %tmp12688, i64 1
+  %tmp12690 = getelementptr inbounds float* %tmp12689, i64 1
+  %tmp12691 = getelementptr inbounds float* %tmp12690, i64 1
+  %tmp12692 = getelementptr inbounds float* %tmp12691, i64 1
+  %tmp12693 = getelementptr inbounds float* %tmp12692, i64 1
+  %tmp12694 = getelementptr inbounds float* %tmp12693, i64 1
+  %tmp12695 = getelementptr inbounds float* %tmp12694, i64 1
+  %tmp12696 = getelementptr inbounds float* %tmp12695, i64 1
+  %tmp12697 = getelementptr inbounds float* %tmp12696, i64 1
+  %tmp12698 = getelementptr inbounds float* %tmp12697, i64 1
+  %tmp12699 = getelementptr inbounds float* %tmp12698, i64 1
+  %tmp12700 = getelementptr inbounds float* %tmp12699, i64 1
+  %tmp12701 = getelementptr inbounds float* %tmp12700, i64 1
+  %tmp12702 = getelementptr inbounds float* %tmp12701, i64 1
+  %tmp12703 = getelementptr inbounds float* %tmp12702, i64 1
+  %tmp12704 = getelementptr inbounds float* %tmp12703, i64 1
+  %tmp12705 = getelementptr inbounds float* %tmp12704, i64 1
+  %tmp12706 = getelementptr inbounds float* %tmp12705, i64 1
+  %tmp12707 = getelementptr inbounds float* %tmp12706, i64 1
+  %tmp12708 = getelementptr inbounds float* %tmp12707, i64 1
+  %tmp12709 = getelementptr inbounds float* %tmp12708, i64 1
+  %tmp12710 = getelementptr inbounds float* %tmp12709, i64 1
+  %tmp12711 = getelementptr inbounds float* %tmp12710, i64 1
+  %tmp12712 = getelementptr inbounds float* %tmp12711, i64 1
+  %tmp12713 = getelementptr inbounds float* %tmp12712, i64 1
+  %tmp12714 = getelementptr inbounds float* %tmp12713, i64 1
+  %tmp12715 = getelementptr inbounds float* %tmp12714, i64 1
+  %tmp12716 = getelementptr inbounds float* %tmp12715, i64 1
+  %tmp12717 = getelementptr inbounds float* %tmp12716, i64 1
+  %tmp12718 = getelementptr inbounds float* %tmp12717, i64 1
+  %tmp12719 = getelementptr inbounds float* %tmp12718, i64 1
+  %tmp12720 = getelementptr inbounds float* %tmp12719, i64 1
+  %tmp12721 = getelementptr inbounds float* %tmp12720, i64 1
+  %tmp12722 = getelementptr inbounds float* %tmp12721, i64 1
+  %tmp12723 = getelementptr inbounds float* %tmp12722, i64 1
+  %tmp12724 = getelementptr inbounds float* %tmp12723, i64 1
+  %tmp12725 = getelementptr inbounds float* %tmp12724, i64 1
+  %tmp12726 = getelementptr inbounds float* %tmp12725, i64 1
+  %tmp12727 = getelementptr inbounds float* %tmp12726, i64 1
+  %tmp12728 = getelementptr inbounds float* %tmp12727, i64 1
+  %tmp12729 = getelementptr inbounds float* %tmp12728, i64 1
+  %tmp12730 = getelementptr inbounds float* %tmp12729, i64 1
+  %tmp12731 = getelementptr inbounds float* %tmp12730, i64 1
+  %tmp12732 = getelementptr inbounds float* %tmp12731, i64 1
+  %tmp12733 = getelementptr inbounds float* %tmp12732, i64 1
+  %tmp12734 = getelementptr inbounds float* %tmp12733, i64 1
+  %tmp12735 = getelementptr inbounds float* %tmp12734, i64 1
+  %tmp12736 = getelementptr inbounds float* %tmp12735, i64 1
+  %tmp12737 = getelementptr inbounds float* %tmp12736, i64 1
+  %tmp12738 = getelementptr inbounds float* %tmp12737, i64 1
+  %tmp12739 = getelementptr inbounds float* %tmp12738, i64 1
+  %tmp12740 = getelementptr inbounds float* %tmp12739, i64 1
+  %tmp12741 = getelementptr inbounds float* %tmp12740, i64 1
+  %tmp12742 = getelementptr inbounds float* %tmp12741, i64 1
+  %tmp12743 = getelementptr inbounds float* %tmp12742, i64 1
+  %tmp12744 = getelementptr inbounds float* %tmp12743, i64 1
+  %tmp12745 = getelementptr inbounds float* %tmp12744, i64 1
+  %tmp12746 = getelementptr inbounds float* %tmp12745, i64 1
+  %tmp12747 = getelementptr inbounds float* %tmp12746, i64 1
+  %tmp12748 = getelementptr inbounds float* %tmp12747, i64 1
+  %tmp12749 = getelementptr inbounds float* %tmp12748, i64 1
+  %tmp12750 = getelementptr inbounds float* %tmp12749, i64 1
+  %tmp12751 = getelementptr inbounds float* %tmp12750, i64 1
+  %tmp12752 = getelementptr inbounds float* %tmp12751, i64 1
+  %tmp12753 = getelementptr inbounds float* %tmp12752, i64 1
+  %tmp12754 = getelementptr inbounds float* %tmp12753, i64 1
+  %tmp12755 = getelementptr inbounds float* %tmp12754, i64 1
+  %tmp12756 = getelementptr inbounds float* %tmp12755, i64 1
+  %tmp12757 = getelementptr inbounds float* %tmp12756, i64 1
+  %tmp12758 = getelementptr inbounds float* %tmp12757, i64 1
+  %tmp12759 = getelementptr inbounds float* %tmp12758, i64 1
+  %tmp12760 = getelementptr inbounds float* %tmp12759, i64 1
+  %tmp12761 = getelementptr inbounds float* %tmp12760, i64 1
+  %tmp12762 = getelementptr inbounds float* %tmp12761, i64 1
+  %tmp12763 = getelementptr inbounds float* %tmp12762, i64 1
+  %tmp12764 = getelementptr inbounds float* %tmp12763, i64 1
+  %tmp12765 = getelementptr inbounds float* %tmp12764, i64 1
+  %tmp12766 = getelementptr inbounds float* %tmp12765, i64 1
+  %tmp12767 = getelementptr inbounds float* %tmp12766, i64 1
+  %tmp12768 = getelementptr inbounds float* %tmp12767, i64 1
+  %tmp12769 = getelementptr inbounds float* %tmp12768, i64 1
+  %tmp12770 = getelementptr inbounds float* %tmp12769, i64 1
+  %tmp12771 = getelementptr inbounds float* %tmp12770, i64 1
+  %tmp12772 = getelementptr inbounds float* %tmp12771, i64 1
+  %tmp12773 = getelementptr inbounds float* %tmp12772, i64 1
+  %tmp12774 = getelementptr inbounds float* %tmp12773, i64 1
+  %tmp12775 = getelementptr inbounds float* %tmp12774, i64 1
+  %tmp12776 = getelementptr inbounds float* %tmp12775, i64 1
+  %tmp12777 = getelementptr inbounds float* %tmp12776, i64 1
+  %tmp12778 = getelementptr inbounds float* %tmp12777, i64 1
+  %tmp12779 = getelementptr inbounds float* %tmp12778, i64 1
+  %tmp12780 = getelementptr inbounds float* %tmp12779, i64 1
+  %tmp12781 = getelementptr inbounds float* %tmp12780, i64 1
+  %tmp12782 = getelementptr inbounds float* %tmp12781, i64 1
+  %tmp12783 = getelementptr inbounds float* %tmp12782, i64 1
+  %tmp12784 = getelementptr inbounds float* %tmp12783, i64 1
+  %tmp12785 = getelementptr inbounds float* %tmp12784, i64 1
+  %tmp12786 = getelementptr inbounds float* %tmp12785, i64 1
+  %tmp12787 = getelementptr inbounds float* %tmp12786, i64 1
+  %tmp12788 = getelementptr inbounds float* %tmp12787, i64 1
+  %tmp12789 = getelementptr inbounds float* %tmp12788, i64 1
+  %tmp12790 = getelementptr inbounds float* %tmp12789, i64 1
+  %tmp12791 = getelementptr inbounds float* %tmp12790, i64 1
+  %tmp12792 = getelementptr inbounds float* %tmp12791, i64 1
+  %tmp12793 = getelementptr inbounds float* %tmp12792, i64 1
+  %tmp12794 = getelementptr inbounds float* %tmp12793, i64 1
+  %tmp12795 = getelementptr inbounds float* %tmp12794, i64 1
+  %tmp12796 = getelementptr inbounds float* %tmp12795, i64 1
+  %tmp12797 = getelementptr inbounds float* %tmp12796, i64 1
+  %tmp12798 = getelementptr inbounds float* %tmp12797, i64 1
+  %tmp12799 = getelementptr inbounds float* %tmp12798, i64 1
+  %tmp12800 = getelementptr inbounds float* %tmp12799, i64 1
+  %tmp12801 = getelementptr inbounds float* %tmp12800, i64 1
+  %tmp12802 = getelementptr inbounds float* %tmp12801, i64 1
+  %tmp12803 = getelementptr inbounds float* %tmp12802, i64 1
+  %tmp12804 = getelementptr inbounds float* %tmp12803, i64 1
+  %tmp12805 = getelementptr inbounds float* %tmp12804, i64 1
+  %tmp12806 = getelementptr inbounds float* %tmp12805, i64 1
+  %tmp12807 = getelementptr inbounds float* %tmp12806, i64 1
+  %tmp12808 = getelementptr inbounds float* %tmp12807, i64 1
+  %tmp12809 = getelementptr inbounds float* %tmp12808, i64 1
+  %tmp12810 = getelementptr inbounds float* %tmp12809, i64 1
+  %tmp12811 = getelementptr inbounds float* %tmp12810, i64 1
+  %tmp12812 = getelementptr inbounds float* %tmp12811, i64 1
+  %tmp12813 = getelementptr inbounds float* %tmp12812, i64 1
+  %tmp12814 = getelementptr inbounds float* %tmp12813, i64 1
+  %tmp12815 = getelementptr inbounds float* %tmp12814, i64 1
+  %tmp12816 = getelementptr inbounds float* %tmp12815, i64 1
+  %tmp12817 = getelementptr inbounds float* %tmp12816, i64 1
+  %tmp12818 = getelementptr inbounds float* %tmp12817, i64 1
+  %tmp12819 = getelementptr inbounds float* %tmp12818, i64 1
+  %tmp12820 = getelementptr inbounds float* %tmp12819, i64 1
+  %tmp12821 = getelementptr inbounds float* %tmp12820, i64 1
+  %tmp12822 = getelementptr inbounds float* %tmp12821, i64 1
+  %tmp12823 = getelementptr inbounds float* %tmp12822, i64 1
+  %tmp12824 = getelementptr inbounds float* %tmp12823, i64 1
+  %tmp12825 = getelementptr inbounds float* %tmp12824, i64 1
+  %tmp12826 = getelementptr inbounds float* %tmp12825, i64 1
+  %tmp12827 = getelementptr inbounds float* %tmp12826, i64 1
+  %tmp12828 = getelementptr inbounds float* %tmp12827, i64 1
+  %tmp12829 = getelementptr inbounds float* %tmp12828, i64 1
+  %tmp12830 = getelementptr inbounds float* %tmp12829, i64 1
+  %tmp12831 = getelementptr inbounds float* %tmp12830, i64 1
+  %tmp12832 = getelementptr inbounds float* %tmp12831, i64 1
+  %tmp12833 = getelementptr inbounds float* %tmp12832, i64 1
+  %tmp12834 = getelementptr inbounds float* %tmp12833, i64 1
+  %tmp12835 = getelementptr inbounds float* %tmp12834, i64 1
+  %tmp12836 = getelementptr inbounds float* %tmp12835, i64 1
+  %tmp12837 = getelementptr inbounds float* %tmp12836, i64 1
+  %tmp12838 = getelementptr inbounds float* %tmp12837, i64 1
+  %tmp12839 = getelementptr inbounds float* %tmp12838, i64 1
+  %tmp12840 = getelementptr inbounds float* %tmp12839, i64 1
+  %tmp12841 = getelementptr inbounds float* %tmp12840, i64 1
+  %tmp12842 = getelementptr inbounds float* %tmp12841, i64 1
+  %tmp12843 = getelementptr inbounds float* %tmp12842, i64 1
+  %tmp12844 = getelementptr inbounds float* %tmp12843, i64 1
+  %tmp12845 = getelementptr inbounds float* %tmp12844, i64 1
+  %tmp12846 = getelementptr inbounds float* %tmp12845, i64 1
+  %tmp12847 = getelementptr inbounds float* %tmp12846, i64 1
+  %tmp12848 = getelementptr inbounds float* %tmp12847, i64 1
+  %tmp12849 = getelementptr inbounds float* %tmp12848, i64 1
+  %tmp12850 = getelementptr inbounds float* %tmp12849, i64 1
+  %tmp12851 = getelementptr inbounds float* %tmp12850, i64 1
+  %tmp12852 = getelementptr inbounds float* %tmp12851, i64 1
+  %tmp12853 = getelementptr inbounds float* %tmp12852, i64 1
+  %tmp12854 = getelementptr inbounds float* %tmp12853, i64 1
+  %tmp12855 = getelementptr inbounds float* %tmp12854, i64 1
+  %tmp12856 = getelementptr inbounds float* %tmp12855, i64 1
+  %tmp12857 = getelementptr inbounds float* %tmp12856, i64 1
+  %tmp12858 = getelementptr inbounds float* %tmp12857, i64 1
+  %tmp12859 = getelementptr inbounds float* %tmp12858, i64 1
+  %tmp12860 = getelementptr inbounds float* %tmp12859, i64 1
+  %tmp12861 = getelementptr inbounds float* %tmp12860, i64 1
+  %tmp12862 = getelementptr inbounds float* %tmp12861, i64 1
+  %tmp12863 = getelementptr inbounds float* %tmp12862, i64 1
+  %tmp12864 = getelementptr inbounds float* %tmp12863, i64 1
+  %tmp12865 = getelementptr inbounds float* %tmp12864, i64 1
+  %tmp12866 = getelementptr inbounds float* %tmp12865, i64 1
+  %tmp12867 = getelementptr inbounds float* %tmp12866, i64 1
+  %tmp12868 = getelementptr inbounds float* %tmp12867, i64 1
+  %tmp12869 = getelementptr inbounds float* %tmp12868, i64 1
+  %tmp12870 = getelementptr inbounds float* %tmp12869, i64 1
+  %tmp12871 = getelementptr inbounds float* %tmp12870, i64 1
+  %tmp12872 = getelementptr inbounds float* %tmp12871, i64 1
+  %tmp12873 = getelementptr inbounds float* %tmp12872, i64 1
+  %tmp12874 = getelementptr inbounds float* %tmp12873, i64 1
+  %tmp12875 = getelementptr inbounds float* %tmp12874, i64 1
+  %tmp12876 = getelementptr inbounds float* %tmp12875, i64 1
+  %tmp12877 = getelementptr inbounds float* %tmp12876, i64 1
+  %tmp12878 = getelementptr inbounds float* %tmp12877, i64 1
+  %tmp12879 = getelementptr inbounds float* %tmp12878, i64 1
+  %tmp12880 = getelementptr inbounds float* %tmp12879, i64 1
+  %tmp12881 = getelementptr inbounds float* %tmp12880, i64 1
+  %tmp12882 = getelementptr inbounds float* %tmp12881, i64 1
+  %tmp12883 = getelementptr inbounds float* %tmp12882, i64 1
+  %tmp12884 = getelementptr inbounds float* %tmp12883, i64 1
+  %tmp12885 = getelementptr inbounds float* %tmp12884, i64 1
+  %tmp12886 = getelementptr inbounds float* %tmp12885, i64 1
+  %tmp12887 = getelementptr inbounds float* %tmp12886, i64 1
+  %tmp12888 = getelementptr inbounds float* %tmp12887, i64 1
+  %tmp12889 = getelementptr inbounds float* %tmp12888, i64 1
+  %tmp12890 = getelementptr inbounds float* %tmp12889, i64 1
+  %tmp12891 = getelementptr inbounds float* %tmp12890, i64 1
+  %tmp12892 = getelementptr inbounds float* %tmp12891, i64 1
+  %tmp12893 = getelementptr inbounds float* %tmp12892, i64 1
+  %tmp12894 = getelementptr inbounds float* %tmp12893, i64 1
+  %tmp12895 = getelementptr inbounds float* %tmp12894, i64 1
+  %tmp12896 = getelementptr inbounds float* %tmp12895, i64 1
+  %tmp12897 = getelementptr inbounds float* %tmp12896, i64 1
+  %tmp12898 = getelementptr inbounds float* %tmp12897, i64 1
+  %tmp12899 = getelementptr inbounds float* %tmp12898, i64 1
+  %tmp12900 = getelementptr inbounds float* %tmp12899, i64 1
+  %tmp12901 = getelementptr inbounds float* %tmp12900, i64 1
+  %tmp12902 = getelementptr inbounds float* %tmp12901, i64 1
+  %tmp12903 = getelementptr inbounds float* %tmp12902, i64 1
+  %tmp12904 = getelementptr inbounds float* %tmp12903, i64 1
+  %tmp12905 = getelementptr inbounds float* %tmp12904, i64 1
+  %tmp12906 = getelementptr inbounds float* %tmp12905, i64 1
+  %tmp12907 = getelementptr inbounds float* %tmp12906, i64 1
+  %tmp12908 = getelementptr inbounds float* %tmp12907, i64 1
+  %tmp12909 = getelementptr inbounds float* %tmp12908, i64 1
+  %tmp12910 = getelementptr inbounds float* %tmp12909, i64 1
+  %tmp12911 = getelementptr inbounds float* %tmp12910, i64 1
+  %tmp12912 = getelementptr inbounds float* %tmp12911, i64 1
+  %tmp12913 = getelementptr inbounds float* %tmp12912, i64 1
+  %tmp12914 = getelementptr inbounds float* %tmp12913, i64 1
+  %tmp12915 = getelementptr inbounds float* %tmp12914, i64 1
+  %tmp12916 = getelementptr inbounds float* %tmp12915, i64 1
+  %tmp12917 = getelementptr inbounds float* %tmp12916, i64 1
+  %tmp12918 = getelementptr inbounds float* %tmp12917, i64 1
+  %tmp12919 = getelementptr inbounds float* %tmp12918, i64 1
+  %tmp12920 = getelementptr inbounds float* %tmp12919, i64 1
+  %tmp12921 = getelementptr inbounds float* %tmp12920, i64 1
+  %tmp12922 = getelementptr inbounds float* %tmp12921, i64 1
+  %tmp12923 = getelementptr inbounds float* %tmp12922, i64 1
+  %tmp12924 = getelementptr inbounds float* %tmp12923, i64 1
+  %tmp12925 = getelementptr inbounds float* %tmp12924, i64 1
+  %tmp12926 = getelementptr inbounds float* %tmp12925, i64 1
+  %tmp12927 = getelementptr inbounds float* %tmp12926, i64 1
+  %tmp12928 = getelementptr inbounds float* %tmp12927, i64 1
+  %tmp12929 = getelementptr inbounds float* %tmp12928, i64 1
+  %tmp12930 = getelementptr inbounds float* %tmp12929, i64 1
+  %tmp12931 = getelementptr inbounds float* %tmp12930, i64 1
+  %tmp12932 = getelementptr inbounds float* %tmp12931, i64 1
+  %tmp12933 = getelementptr inbounds float* %tmp12932, i64 1
+  %tmp12934 = getelementptr inbounds float* %tmp12933, i64 1
+  %tmp12935 = getelementptr inbounds float* %tmp12934, i64 1
+  %tmp12936 = getelementptr inbounds float* %tmp12935, i64 1
+  %tmp12937 = getelementptr inbounds float* %tmp12936, i64 1
+  %tmp12938 = getelementptr inbounds float* %tmp12937, i64 1
+  %tmp12939 = getelementptr inbounds float* %tmp12938, i64 1
+  %tmp12940 = getelementptr inbounds float* %tmp12939, i64 1
+  %tmp12941 = getelementptr inbounds float* %tmp12940, i64 1
+  %tmp12942 = getelementptr inbounds float* %tmp12941, i64 1
+  %tmp12943 = getelementptr inbounds float* %tmp12942, i64 1
+  %tmp12944 = getelementptr inbounds float* %tmp12943, i64 1
+  %tmp12945 = getelementptr inbounds float* %tmp12944, i64 1
+  %tmp12946 = getelementptr inbounds float* %tmp12945, i64 1
+  %tmp12947 = getelementptr inbounds float* %tmp12946, i64 1
+  %tmp12948 = getelementptr inbounds float* %tmp12947, i64 1
+  %tmp12949 = getelementptr inbounds float* %tmp12948, i64 1
+  %tmp12950 = getelementptr inbounds float* %tmp12949, i64 1
+  %tmp12951 = getelementptr inbounds float* %tmp12950, i64 1
+  %tmp12952 = getelementptr inbounds float* %tmp12951, i64 1
+  %tmp12953 = getelementptr inbounds float* %tmp12952, i64 1
+  %tmp12954 = getelementptr inbounds float* %tmp12953, i64 1
+  %tmp12955 = getelementptr inbounds float* %tmp12954, i64 1
+  %tmp12956 = getelementptr inbounds float* %tmp12955, i64 1
+  %tmp12957 = getelementptr inbounds float* %tmp12956, i64 1
+  %tmp12958 = getelementptr inbounds float* %tmp12957, i64 1
+  %tmp12959 = getelementptr inbounds float* %tmp12958, i64 1
+  %tmp12960 = getelementptr inbounds float* %tmp12959, i64 1
+  %tmp12961 = getelementptr inbounds float* %tmp12960, i64 1
+  %tmp12962 = getelementptr inbounds float* %tmp12961, i64 1
+  %tmp12963 = getelementptr inbounds float* %tmp12962, i64 1
+  %tmp12964 = getelementptr inbounds float* %tmp12963, i64 1
+  %tmp12965 = getelementptr inbounds float* %tmp12964, i64 1
+  %tmp12966 = getelementptr inbounds float* %tmp12965, i64 1
+  %tmp12967 = getelementptr inbounds float* %tmp12966, i64 1
+  %tmp12968 = getelementptr inbounds float* %tmp12967, i64 1
+  %tmp12969 = getelementptr inbounds float* %tmp12968, i64 1
+  %tmp12970 = getelementptr inbounds float* %tmp12969, i64 1
+  %tmp12971 = getelementptr inbounds float* %tmp12970, i64 1
+  %tmp12972 = getelementptr inbounds float* %tmp12971, i64 1
+  %tmp12973 = getelementptr inbounds float* %tmp12972, i64 1
+  %tmp12974 = getelementptr inbounds float* %tmp12973, i64 1
+  %tmp12975 = getelementptr inbounds float* %tmp12974, i64 1
+  %tmp12976 = getelementptr inbounds float* %tmp12975, i64 1
+  %tmp12977 = getelementptr inbounds float* %tmp12976, i64 1
+  %tmp12978 = getelementptr inbounds float* %tmp12977, i64 1
+  %tmp12979 = getelementptr inbounds float* %tmp12978, i64 1
+  %tmp12980 = getelementptr inbounds float* %tmp12979, i64 1
+  %tmp12981 = getelementptr inbounds float* %tmp12980, i64 1
+  %tmp12982 = getelementptr inbounds float* %tmp12981, i64 1
+  %tmp12983 = getelementptr inbounds float* %tmp12982, i64 1
+  %tmp12984 = getelementptr inbounds float* %tmp12983, i64 1
+  %tmp12985 = getelementptr inbounds float* %tmp12984, i64 1
+  %tmp12986 = getelementptr inbounds float* %tmp12985, i64 1
+  %tmp12987 = getelementptr inbounds float* %tmp12986, i64 1
+  %tmp12988 = getelementptr inbounds float* %tmp12987, i64 1
+  %tmp12989 = getelementptr inbounds float* %tmp12988, i64 1
+  %tmp12990 = getelementptr inbounds float* %tmp12989, i64 1
+  %tmp12991 = getelementptr inbounds float* %tmp12990, i64 1
+  %tmp12992 = getelementptr inbounds float* %tmp12991, i64 1
+  %tmp12993 = getelementptr inbounds float* %tmp12992, i64 1
+  %tmp12994 = getelementptr inbounds float* %tmp12993, i64 1
+  %tmp12995 = getelementptr inbounds float* %tmp12994, i64 1
+  %tmp12996 = getelementptr inbounds float* %tmp12995, i64 1
+  %tmp12997 = getelementptr inbounds float* %tmp12996, i64 1
+  %tmp12998 = getelementptr inbounds float* %tmp12997, i64 1
+  %tmp12999 = getelementptr inbounds float* %tmp12998, i64 1
+  %tmp13000 = getelementptr inbounds float* %tmp12999, i64 1
+  %tmp13001 = getelementptr inbounds float* %tmp13000, i64 1
+  %tmp13002 = getelementptr inbounds float* %tmp13001, i64 1
+  %tmp13003 = getelementptr inbounds float* %tmp13002, i64 1
+  %tmp13004 = getelementptr inbounds float* %tmp13003, i64 1
+  %tmp13005 = getelementptr inbounds float* %tmp13004, i64 1
+  %tmp13006 = getelementptr inbounds float* %tmp13005, i64 1
+  %tmp13007 = getelementptr inbounds float* %tmp13006, i64 1
+  %tmp13008 = getelementptr inbounds float* %tmp13007, i64 1
+  %tmp13009 = getelementptr inbounds float* %tmp13008, i64 1
+  %tmp13010 = getelementptr inbounds float* %tmp13009, i64 1
+  %tmp13011 = getelementptr inbounds float* %tmp13010, i64 1
+  %tmp13012 = getelementptr inbounds float* %tmp13011, i64 1
+  %tmp13013 = getelementptr inbounds float* %tmp13012, i64 1
+  %tmp13014 = getelementptr inbounds float* %tmp13013, i64 1
+  %tmp13015 = getelementptr inbounds float* %tmp13014, i64 1
+  %tmp13016 = getelementptr inbounds float* %tmp13015, i64 1
+  %tmp13017 = getelementptr inbounds float* %tmp13016, i64 1
+  %tmp13018 = getelementptr inbounds float* %tmp13017, i64 1
+  %tmp13019 = getelementptr inbounds float* %tmp13018, i64 1
+  %tmp13020 = getelementptr inbounds float* %tmp13019, i64 1
+  %tmp13021 = getelementptr inbounds float* %tmp13020, i64 1
+  %tmp13022 = getelementptr inbounds float* %tmp13021, i64 1
+  %tmp13023 = getelementptr inbounds float* %tmp13022, i64 1
+  %tmp13024 = getelementptr inbounds float* %tmp13023, i64 1
+  %tmp13025 = getelementptr inbounds float* %tmp13024, i64 1
+  %tmp13026 = getelementptr inbounds float* %tmp13025, i64 1
+  %tmp13027 = getelementptr inbounds float* %tmp13026, i64 1
+  %tmp13028 = getelementptr inbounds float* %tmp13027, i64 1
+  %tmp13029 = getelementptr inbounds float* %tmp13028, i64 1
+  %tmp13030 = getelementptr inbounds float* %tmp13029, i64 1
+  %tmp13031 = getelementptr inbounds float* %tmp13030, i64 1
+  %tmp13032 = getelementptr inbounds float* %tmp13031, i64 1
+  %tmp13033 = getelementptr inbounds float* %tmp13032, i64 1
+  %tmp13034 = getelementptr inbounds float* %tmp13033, i64 1
+  %tmp13035 = getelementptr inbounds float* %tmp13034, i64 1
+  %tmp13036 = getelementptr inbounds float* %tmp13035, i64 1
+  %tmp13037 = getelementptr inbounds float* %tmp13036, i64 1
+  %tmp13038 = getelementptr inbounds float* %tmp13037, i64 1
+  %tmp13039 = getelementptr inbounds float* %tmp13038, i64 1
+  %tmp13040 = getelementptr inbounds float* %tmp13039, i64 1
+  %tmp13041 = getelementptr inbounds float* %tmp13040, i64 1
+  %tmp13042 = getelementptr inbounds float* %tmp13041, i64 1
+  %tmp13043 = getelementptr inbounds float* %tmp13042, i64 1
+  %tmp13044 = getelementptr inbounds float* %tmp13043, i64 1
+  %tmp13045 = getelementptr inbounds float* %tmp13044, i64 1
+  %tmp13046 = getelementptr inbounds float* %tmp13045, i64 1
+  %tmp13047 = getelementptr inbounds float* %tmp13046, i64 1
+  %tmp13048 = getelementptr inbounds float* %tmp13047, i64 1
+  %tmp13049 = getelementptr inbounds float* %tmp13048, i64 1
+  %tmp13050 = getelementptr inbounds float* %tmp13049, i64 1
+  %tmp13051 = getelementptr inbounds float* %tmp13050, i64 1
+  %tmp13052 = getelementptr inbounds float* %tmp13051, i64 1
+  %tmp13053 = getelementptr inbounds float* %tmp13052, i64 1
+  %tmp13054 = getelementptr inbounds float* %tmp13053, i64 1
+  %tmp13055 = getelementptr inbounds float* %tmp13054, i64 1
+  %tmp13056 = getelementptr inbounds float* %tmp13055, i64 1
+  %tmp13057 = getelementptr inbounds float* %tmp13056, i64 1
+  %tmp13058 = getelementptr inbounds float* %tmp13057, i64 1
+  %tmp13059 = getelementptr inbounds float* %tmp13058, i64 1
+  %tmp13060 = getelementptr inbounds float* %tmp13059, i64 1
+  %tmp13061 = getelementptr inbounds float* %tmp13060, i64 1
+  %tmp13062 = getelementptr inbounds float* %tmp13061, i64 1
+  %tmp13063 = getelementptr inbounds float* %tmp13062, i64 1
+  %tmp13064 = getelementptr inbounds float* %tmp13063, i64 1
+  %tmp13065 = getelementptr inbounds float* %tmp13064, i64 1
+  %tmp13066 = getelementptr inbounds float* %tmp13065, i64 1
+  %tmp13067 = getelementptr inbounds float* %tmp13066, i64 1
+  %tmp13068 = getelementptr inbounds float* %tmp13067, i64 1
+  %tmp13069 = getelementptr inbounds float* %tmp13068, i64 1
+  %tmp13070 = getelementptr inbounds float* %tmp13069, i64 1
+  %tmp13071 = getelementptr inbounds float* %tmp13070, i64 1
+  %tmp13072 = getelementptr inbounds float* %tmp13071, i64 1
+  %tmp13073 = getelementptr inbounds float* %tmp13072, i64 1
+  %tmp13074 = getelementptr inbounds float* %tmp13073, i64 1
+  %tmp13075 = getelementptr inbounds float* %tmp13074, i64 1
+  %tmp13076 = getelementptr inbounds float* %tmp13075, i64 1
+  %tmp13077 = getelementptr inbounds float* %tmp13076, i64 1
+  %tmp13078 = getelementptr inbounds float* %tmp13077, i64 1
+  %tmp13079 = getelementptr inbounds float* %tmp13078, i64 1
+  %tmp13080 = getelementptr inbounds float* %tmp13079, i64 1
+  %tmp13081 = getelementptr inbounds float* %tmp13080, i64 1
+  %tmp13082 = getelementptr inbounds float* %tmp13081, i64 1
+  %tmp13083 = getelementptr inbounds float* %tmp13082, i64 1
+  %tmp13084 = getelementptr inbounds float* %tmp13083, i64 1
+  %tmp13085 = getelementptr inbounds float* %tmp13084, i64 1
+  %tmp13086 = getelementptr inbounds float* %tmp13085, i64 1
+  %tmp13087 = getelementptr inbounds float* %tmp13086, i64 1
+  %tmp13088 = getelementptr inbounds float* %tmp13087, i64 1
+  %tmp13089 = getelementptr inbounds float* %tmp13088, i64 1
+  %tmp13090 = getelementptr inbounds float* %tmp13089, i64 1
+  %tmp13091 = getelementptr inbounds float* %tmp13090, i64 1
+  %tmp13092 = getelementptr inbounds float* %tmp13091, i64 1
+  %tmp13093 = getelementptr inbounds float* %tmp13092, i64 1
+  %tmp13094 = getelementptr inbounds float* %tmp13093, i64 1
+  %tmp13095 = getelementptr inbounds float* %tmp13094, i64 1
+  %tmp13096 = getelementptr inbounds float* %tmp13095, i64 1
+  %tmp13097 = getelementptr inbounds float* %tmp13096, i64 1
+  %tmp13098 = getelementptr inbounds float* %tmp13097, i64 1
+  %tmp13099 = getelementptr inbounds float* %tmp13098, i64 1
+  %tmp13100 = getelementptr inbounds float* %tmp13099, i64 1
+  %tmp13101 = getelementptr inbounds float* %tmp13100, i64 1
+  %tmp13102 = getelementptr inbounds float* %tmp13101, i64 1
+  %tmp13103 = getelementptr inbounds float* %tmp13102, i64 1
+  %tmp13104 = getelementptr inbounds float* %tmp13103, i64 1
+  %tmp13105 = getelementptr inbounds float* %tmp13104, i64 1
+  %tmp13106 = getelementptr inbounds float* %tmp13105, i64 1
+  %tmp13107 = getelementptr inbounds float* %tmp13106, i64 1
+  %tmp13108 = getelementptr inbounds float* %tmp13107, i64 1
+  %tmp13109 = getelementptr inbounds float* %tmp13108, i64 1
+  %tmp13110 = getelementptr inbounds float* %tmp13109, i64 1
+  %tmp13111 = getelementptr inbounds float* %tmp13110, i64 1
+  %tmp13112 = getelementptr inbounds float* %tmp13111, i64 1
+  %tmp13113 = getelementptr inbounds float* %tmp13112, i64 1
+  %tmp13114 = getelementptr inbounds float* %tmp13113, i64 1
+  %tmp13115 = getelementptr inbounds float* %tmp13114, i64 1
+  %tmp13116 = getelementptr inbounds float* %tmp13115, i64 1
+  %tmp13117 = getelementptr inbounds float* %tmp13116, i64 1
+  %tmp13118 = getelementptr inbounds float* %tmp13117, i64 1
+  %tmp13119 = getelementptr inbounds float* %tmp13118, i64 1
+  %tmp13120 = getelementptr inbounds float* %tmp13119, i64 1
+  %tmp13121 = getelementptr inbounds float* %tmp13120, i64 1
+  %tmp13122 = getelementptr inbounds float* %tmp13121, i64 1
+  %tmp13123 = getelementptr inbounds float* %tmp13122, i64 1
+  %tmp13124 = getelementptr inbounds float* %tmp13123, i64 1
+  %tmp13125 = getelementptr inbounds float* %tmp13124, i64 1
+  %tmp13126 = getelementptr inbounds float* %tmp13125, i64 1
+  %tmp13127 = getelementptr inbounds float* %tmp13126, i64 1
+  %tmp13128 = getelementptr inbounds float* %tmp13127, i64 1
+  %tmp13129 = getelementptr inbounds float* %tmp13128, i64 1
+  %tmp13130 = getelementptr inbounds float* %tmp13129, i64 1
+  %tmp13131 = getelementptr inbounds float* %tmp13130, i64 1
+  %tmp13132 = getelementptr inbounds float* %tmp13131, i64 1
+  %tmp13133 = getelementptr inbounds float* %tmp13132, i64 1
+  %tmp13134 = getelementptr inbounds float* %tmp13133, i64 1
+  %tmp13135 = getelementptr inbounds float* %tmp13134, i64 1
+  %tmp13136 = getelementptr inbounds float* %tmp13135, i64 1
+  %tmp13137 = getelementptr inbounds float* %tmp13136, i64 1
+  %tmp13138 = getelementptr inbounds float* %tmp13137, i64 1
+  %tmp13139 = getelementptr inbounds float* %tmp13138, i64 1
+  %tmp13140 = getelementptr inbounds float* %tmp13139, i64 1
+  %tmp13141 = getelementptr inbounds float* %tmp13140, i64 1
+  %tmp13142 = getelementptr inbounds float* %tmp13141, i64 1
+  %tmp13143 = getelementptr inbounds float* %tmp13142, i64 1
+  %tmp13144 = getelementptr inbounds float* %tmp13143, i64 1
+  %tmp13145 = getelementptr inbounds float* %tmp13144, i64 1
+  %tmp13146 = getelementptr inbounds float* %tmp13145, i64 1
+  %tmp13147 = getelementptr inbounds float* %tmp13146, i64 1
+  %tmp13148 = getelementptr inbounds float* %tmp13147, i64 1
+  %tmp13149 = getelementptr inbounds float* %tmp13148, i64 1
+  %tmp13150 = getelementptr inbounds float* %tmp13149, i64 1
+  %tmp13151 = getelementptr inbounds float* %tmp13150, i64 1
+  %tmp13152 = getelementptr inbounds float* %tmp13151, i64 1
+  %tmp13153 = getelementptr inbounds float* %tmp13152, i64 1
+  %tmp13154 = getelementptr inbounds float* %tmp13153, i64 1
+  %tmp13155 = getelementptr inbounds float* %tmp13154, i64 1
+  %tmp13156 = getelementptr inbounds float* %tmp13155, i64 1
+  %tmp13157 = getelementptr inbounds float* %tmp13156, i64 1
+  %tmp13158 = getelementptr inbounds float* %tmp13157, i64 1
+  %tmp13159 = getelementptr inbounds float* %tmp13158, i64 1
+  %tmp13160 = getelementptr inbounds float* %tmp13159, i64 1
+  %tmp13161 = getelementptr inbounds float* %tmp13160, i64 1
+  %tmp13162 = getelementptr inbounds float* %tmp13161, i64 1
+  %tmp13163 = getelementptr inbounds float* %tmp13162, i64 1
+  %tmp13164 = getelementptr inbounds float* %tmp13163, i64 1
+  %tmp13165 = getelementptr inbounds float* %tmp13164, i64 1
+  %tmp13166 = getelementptr inbounds float* %tmp13165, i64 1
+  %tmp13167 = getelementptr inbounds float* %tmp13166, i64 1
+  %tmp13168 = getelementptr inbounds float* %tmp13167, i64 1
+  %tmp13169 = getelementptr inbounds float* %tmp13168, i64 1
+  %tmp13170 = getelementptr inbounds float* %tmp13169, i64 1
+  %tmp13171 = getelementptr inbounds float* %tmp13170, i64 1
+  %tmp13172 = getelementptr inbounds float* %tmp13171, i64 1
+  %tmp13173 = getelementptr inbounds float* %tmp13172, i64 1
+  %tmp13174 = getelementptr inbounds float* %tmp13173, i64 1
+  %tmp13175 = getelementptr inbounds float* %tmp13174, i64 1
+  %tmp13176 = getelementptr inbounds float* %tmp13175, i64 1
+  %tmp13177 = getelementptr inbounds float* %tmp13176, i64 1
+  %tmp13178 = getelementptr inbounds float* %tmp13177, i64 1
+  %tmp13179 = getelementptr inbounds float* %tmp13178, i64 1
+  %tmp13180 = getelementptr inbounds float* %tmp13179, i64 1
+  %tmp13181 = getelementptr inbounds float* %tmp13180, i64 1
+  %tmp13182 = getelementptr inbounds float* %tmp13181, i64 1
+  %tmp13183 = getelementptr inbounds float* %tmp13182, i64 1
+  %tmp13184 = getelementptr inbounds float* %tmp13183, i64 1
+  %tmp13185 = getelementptr inbounds float* %tmp13184, i64 1
+  %tmp13186 = getelementptr inbounds float* %tmp13185, i64 1
+  %tmp13187 = getelementptr inbounds float* %tmp13186, i64 1
+  %tmp13188 = getelementptr inbounds float* %tmp13187, i64 1
+  %tmp13189 = getelementptr inbounds float* %tmp13188, i64 1
+  %tmp13190 = getelementptr inbounds float* %tmp13189, i64 1
+  %tmp13191 = getelementptr inbounds float* %tmp13190, i64 1
+  %tmp13192 = getelementptr inbounds float* %tmp13191, i64 1
+  %tmp13193 = getelementptr inbounds float* %tmp13192, i64 1
+  %tmp13194 = getelementptr inbounds float* %tmp13193, i64 1
+  %tmp13195 = getelementptr inbounds float* %tmp13194, i64 1
+  %tmp13196 = getelementptr inbounds float* %tmp13195, i64 1
+  %tmp13197 = getelementptr inbounds float* %tmp13196, i64 1
+  %tmp13198 = getelementptr inbounds float* %tmp13197, i64 1
+  %tmp13199 = getelementptr inbounds float* %tmp13198, i64 1
+  %tmp13200 = getelementptr inbounds float* %tmp13199, i64 1
+  %tmp13201 = getelementptr inbounds float* %tmp13200, i64 1
+  %tmp13202 = getelementptr inbounds float* %tmp13201, i64 1
+  %tmp13203 = getelementptr inbounds float* %tmp13202, i64 1
+  %tmp13204 = getelementptr inbounds float* %tmp13203, i64 1
+  %tmp13205 = getelementptr inbounds float* %tmp13204, i64 1
+  %tmp13206 = getelementptr inbounds float* %tmp13205, i64 1
+  %tmp13207 = getelementptr inbounds float* %tmp13206, i64 1
+  %tmp13208 = getelementptr inbounds float* %tmp13207, i64 1
+  %tmp13209 = getelementptr inbounds float* %tmp13208, i64 1
+  %tmp13210 = getelementptr inbounds float* %tmp13209, i64 1
+  %tmp13211 = getelementptr inbounds float* %tmp13210, i64 1
+  %tmp13212 = getelementptr inbounds float* %tmp13211, i64 1
+  %tmp13213 = getelementptr inbounds float* %tmp13212, i64 1
+  %tmp13214 = getelementptr inbounds float* %tmp13213, i64 1
+  %tmp13215 = getelementptr inbounds float* %tmp13214, i64 1
+  %tmp13216 = getelementptr inbounds float* %tmp13215, i64 1
+  %tmp13217 = getelementptr inbounds float* %tmp13216, i64 1
+  %tmp13218 = getelementptr inbounds float* %tmp13217, i64 1
+  %tmp13219 = getelementptr inbounds float* %tmp13218, i64 1
+  %tmp13220 = getelementptr inbounds float* %tmp13219, i64 1
+  %tmp13221 = getelementptr inbounds float* %tmp13220, i64 1
+  %tmp13222 = getelementptr inbounds float* %tmp13221, i64 1
+  %tmp13223 = getelementptr inbounds float* %tmp13222, i64 1
+  %tmp13224 = getelementptr inbounds float* %tmp13223, i64 1
+  %tmp13225 = getelementptr inbounds float* %tmp13224, i64 1
+  %tmp13226 = getelementptr inbounds float* %tmp13225, i64 1
+  %tmp13227 = getelementptr inbounds float* %tmp13226, i64 1
+  %tmp13228 = getelementptr inbounds float* %tmp13227, i64 1
+  %tmp13229 = getelementptr inbounds float* %tmp13228, i64 1
+  %tmp13230 = getelementptr inbounds float* %tmp13229, i64 1
+  %tmp13231 = getelementptr inbounds float* %tmp13230, i64 1
+  %tmp13232 = getelementptr inbounds float* %tmp13231, i64 1
+  %tmp13233 = getelementptr inbounds float* %tmp13232, i64 1
+  %tmp13234 = getelementptr inbounds float* %tmp13233, i64 1
+  %tmp13235 = getelementptr inbounds float* %tmp13234, i64 1
+  %tmp13236 = getelementptr inbounds float* %tmp13235, i64 1
+  %tmp13237 = getelementptr inbounds float* %tmp13236, i64 1
+  %tmp13238 = getelementptr inbounds float* %tmp13237, i64 1
+  %tmp13239 = getelementptr inbounds float* %tmp13238, i64 1
+  %tmp13240 = getelementptr inbounds float* %tmp13239, i64 1
+  %tmp13241 = getelementptr inbounds float* %tmp13240, i64 1
+  %tmp13242 = getelementptr inbounds float* %tmp13241, i64 1
+  %tmp13243 = getelementptr inbounds float* %tmp13242, i64 1
+  %tmp13244 = getelementptr inbounds float* %tmp13243, i64 1
+  %tmp13245 = getelementptr inbounds float* %tmp13244, i64 1
+  %tmp13246 = getelementptr inbounds float* %tmp13245, i64 1
+  %tmp13247 = getelementptr inbounds float* %tmp13246, i64 1
+  %tmp13248 = getelementptr inbounds float* %tmp13247, i64 1
+  %tmp13249 = getelementptr inbounds float* %tmp13248, i64 1
+  %tmp13250 = getelementptr inbounds float* %tmp13249, i64 1
+  %tmp13251 = getelementptr inbounds float* %tmp13250, i64 1
+  %tmp13252 = getelementptr inbounds float* %tmp13251, i64 1
+  %tmp13253 = getelementptr inbounds float* %tmp13252, i64 1
+  %tmp13254 = getelementptr inbounds float* %tmp13253, i64 1
+  %tmp13255 = getelementptr inbounds float* %tmp13254, i64 1
+  %tmp13256 = getelementptr inbounds float* %tmp13255, i64 1
+  %tmp13257 = getelementptr inbounds float* %tmp13256, i64 1
+  %tmp13258 = getelementptr inbounds float* %tmp13257, i64 1
+  %tmp13259 = getelementptr inbounds float* %tmp13258, i64 1
+  %tmp13260 = getelementptr inbounds float* %tmp13259, i64 1
+  %tmp13261 = getelementptr inbounds float* %tmp13260, i64 1
+  %tmp13262 = getelementptr inbounds float* %tmp13261, i64 1
+  %tmp13263 = getelementptr inbounds float* %tmp13262, i64 1
+  %tmp13264 = getelementptr inbounds float* %tmp13263, i64 1
+  %tmp13265 = getelementptr inbounds float* %tmp13264, i64 1
+  %tmp13266 = getelementptr inbounds float* %tmp13265, i64 1
+  %tmp13267 = getelementptr inbounds float* %tmp13266, i64 1
+  %tmp13268 = getelementptr inbounds float* %tmp13267, i64 1
+  %tmp13269 = getelementptr inbounds float* %tmp13268, i64 1
+  %tmp13270 = getelementptr inbounds float* %tmp13269, i64 1
+  %tmp13271 = getelementptr inbounds float* %tmp13270, i64 1
+  %tmp13272 = getelementptr inbounds float* %tmp13271, i64 1
+  %tmp13273 = getelementptr inbounds float* %tmp13272, i64 1
+  %tmp13274 = getelementptr inbounds float* %tmp13273, i64 1
+  %tmp13275 = getelementptr inbounds float* %tmp13274, i64 1
+  %tmp13276 = getelementptr inbounds float* %tmp13275, i64 1
+  %tmp13277 = getelementptr inbounds float* %tmp13276, i64 1
+  %tmp13278 = getelementptr inbounds float* %tmp13277, i64 1
+  %tmp13279 = getelementptr inbounds float* %tmp13278, i64 1
+  %tmp13280 = getelementptr inbounds float* %tmp13279, i64 1
+  %tmp13281 = getelementptr inbounds float* %tmp13280, i64 1
+  %tmp13282 = getelementptr inbounds float* %tmp13281, i64 1
+  %tmp13283 = getelementptr inbounds float* %tmp13282, i64 1
+  %tmp13284 = getelementptr inbounds float* %tmp13283, i64 1
+  %tmp13285 = getelementptr inbounds float* %tmp13284, i64 1
+  %tmp13286 = getelementptr inbounds float* %tmp13285, i64 1
+  %tmp13287 = getelementptr inbounds float* %tmp13286, i64 1
+  %tmp13288 = getelementptr inbounds float* %tmp13287, i64 1
+  %tmp13289 = getelementptr inbounds float* %tmp13288, i64 1
+  %tmp13290 = getelementptr inbounds float* %tmp13289, i64 1
+  %tmp13291 = getelementptr inbounds float* %tmp13290, i64 1
+  %tmp13292 = getelementptr inbounds float* %tmp13291, i64 1
+  %tmp13293 = getelementptr inbounds float* %tmp13292, i64 1
+  %tmp13294 = getelementptr inbounds float* %tmp13293, i64 1
+  %tmp13295 = getelementptr inbounds float* %tmp13294, i64 1
+  %tmp13296 = getelementptr inbounds float* %tmp13295, i64 1
+  %tmp13297 = getelementptr inbounds float* %tmp13296, i64 1
+  %tmp13298 = getelementptr inbounds float* %tmp13297, i64 1
+  %tmp13299 = getelementptr inbounds float* %tmp13298, i64 1
+  %tmp13300 = getelementptr inbounds float* %tmp13299, i64 1
+  %tmp13301 = getelementptr inbounds float* %tmp13300, i64 1
+  %tmp13302 = getelementptr inbounds float* %tmp13301, i64 1
+  %tmp13303 = getelementptr inbounds float* %tmp13302, i64 1
+  %tmp13304 = getelementptr inbounds float* %tmp13303, i64 1
+  %tmp13305 = getelementptr inbounds float* %tmp13304, i64 1
+  %tmp13306 = getelementptr inbounds float* %tmp13305, i64 1
+  %tmp13307 = getelementptr inbounds float* %tmp13306, i64 1
+  %tmp13308 = getelementptr inbounds float* %tmp13307, i64 1
+  %tmp13309 = getelementptr inbounds float* %tmp13308, i64 1
+  %tmp13310 = getelementptr inbounds float* %tmp13309, i64 1
+  %tmp13311 = getelementptr inbounds float* %tmp13310, i64 1
+  %tmp13312 = getelementptr inbounds float* %tmp13311, i64 1
+  %tmp13313 = getelementptr inbounds float* %tmp13312, i64 1
+  %tmp13314 = getelementptr inbounds float* %tmp13313, i64 1
+  %tmp13315 = getelementptr inbounds float* %tmp13314, i64 1
+  %tmp13316 = getelementptr inbounds float* %tmp13315, i64 1
+  %tmp13317 = getelementptr inbounds float* %tmp13316, i64 1
+  %tmp13318 = getelementptr inbounds float* %tmp13317, i64 1
+  %tmp13319 = getelementptr inbounds float* %tmp13318, i64 1
+  %tmp13320 = getelementptr inbounds float* %tmp13319, i64 1
+  %tmp13321 = getelementptr inbounds float* %tmp13320, i64 1
+  %tmp13322 = getelementptr inbounds float* %tmp13321, i64 1
+  %tmp13323 = getelementptr inbounds float* %tmp13322, i64 1
+  %tmp13324 = getelementptr inbounds float* %tmp13323, i64 1
+  %tmp13325 = getelementptr inbounds float* %tmp13324, i64 1
+  %tmp13326 = getelementptr inbounds float* %tmp13325, i64 1
+  %tmp13327 = getelementptr inbounds float* %tmp13326, i64 1
+  %tmp13328 = getelementptr inbounds float* %tmp13327, i64 1
+  %tmp13329 = getelementptr inbounds float* %tmp13328, i64 1
+  %tmp13330 = getelementptr inbounds float* %tmp13329, i64 1
+  %tmp13331 = getelementptr inbounds float* %tmp13330, i64 1
+  %tmp13332 = getelementptr inbounds float* %tmp13331, i64 1
+  %tmp13333 = getelementptr inbounds float* %tmp13332, i64 1
+  %tmp13334 = getelementptr inbounds float* %tmp13333, i64 1
+  %tmp13335 = getelementptr inbounds float* %tmp13334, i64 1
+  %tmp13336 = getelementptr inbounds float* %tmp13335, i64 1
+  %tmp13337 = getelementptr inbounds float* %tmp13336, i64 1
+  %tmp13338 = getelementptr inbounds float* %tmp13337, i64 1
+  %tmp13339 = getelementptr inbounds float* %tmp13338, i64 1
+  %tmp13340 = getelementptr inbounds float* %tmp13339, i64 1
+  %tmp13341 = getelementptr inbounds float* %tmp13340, i64 1
+  %tmp13342 = getelementptr inbounds float* %tmp13341, i64 1
+  %tmp13343 = getelementptr inbounds float* %tmp13342, i64 1
+  %tmp13344 = getelementptr inbounds float* %tmp13343, i64 1
+  %tmp13345 = getelementptr inbounds float* %tmp13344, i64 1
+  %tmp13346 = getelementptr inbounds float* %tmp13345, i64 1
+  %tmp13347 = getelementptr inbounds float* %tmp13346, i64 1
+  %tmp13348 = getelementptr inbounds float* %tmp13347, i64 1
+  %tmp13349 = getelementptr inbounds float* %tmp13348, i64 1
+  %tmp13350 = getelementptr inbounds float* %tmp13349, i64 1
+  %tmp13351 = getelementptr inbounds float* %tmp13350, i64 1
+  %tmp13352 = getelementptr inbounds float* %tmp13351, i64 1
+  %tmp13353 = getelementptr inbounds float* %tmp13352, i64 1
+  %tmp13354 = getelementptr inbounds float* %tmp13353, i64 1
+  %tmp13355 = getelementptr inbounds float* %tmp13354, i64 1
+  %tmp13356 = getelementptr inbounds float* %tmp13355, i64 1
+  %tmp13357 = getelementptr inbounds float* %tmp13356, i64 1
+  %tmp13358 = getelementptr inbounds float* %tmp13357, i64 1
+  %tmp13359 = getelementptr inbounds float* %tmp13358, i64 1
+  %tmp13360 = getelementptr inbounds float* %tmp13359, i64 1
+  %tmp13361 = getelementptr inbounds float* %tmp13360, i64 1
+  %tmp13362 = getelementptr inbounds float* %tmp13361, i64 1
+  %tmp13363 = getelementptr inbounds float* %tmp13362, i64 1
+  %tmp13364 = getelementptr inbounds float* %tmp13363, i64 1
+  %tmp13365 = getelementptr inbounds float* %tmp13364, i64 1
+  %tmp13366 = getelementptr inbounds float* %tmp13365, i64 1
+  %tmp13367 = getelementptr inbounds float* %tmp13366, i64 1
+  %tmp13368 = getelementptr inbounds float* %tmp13367, i64 1
+  %tmp13369 = getelementptr inbounds float* %tmp13368, i64 1
+  %tmp13370 = getelementptr inbounds float* %tmp13369, i64 1
+  %tmp13371 = getelementptr inbounds float* %tmp13370, i64 1
+  %tmp13372 = getelementptr inbounds float* %tmp13371, i64 1
+  %tmp13373 = getelementptr inbounds float* %tmp13372, i64 1
+  %tmp13374 = getelementptr inbounds float* %tmp13373, i64 1
+  %tmp13375 = getelementptr inbounds float* %tmp13374, i64 1
+  %tmp13376 = getelementptr inbounds float* %tmp13375, i64 1
+  %tmp13377 = getelementptr inbounds float* %tmp13376, i64 1
+  %tmp13378 = getelementptr inbounds float* %tmp13377, i64 1
+  %tmp13379 = getelementptr inbounds float* %tmp13378, i64 1
+  %tmp13380 = getelementptr inbounds float* %tmp13379, i64 1
+  %tmp13381 = getelementptr inbounds float* %tmp13380, i64 1
+  %tmp13382 = getelementptr inbounds float* %tmp13381, i64 1
+  %tmp13383 = getelementptr inbounds float* %tmp13382, i64 1
+  %tmp13384 = getelementptr inbounds float* %tmp13383, i64 1
+  %tmp13385 = getelementptr inbounds float* %tmp13384, i64 1
+  %tmp13386 = getelementptr inbounds float* %tmp13385, i64 1
+  %tmp13387 = getelementptr inbounds float* %tmp13386, i64 1
+  %tmp13388 = getelementptr inbounds float* %tmp13387, i64 1
+  %tmp13389 = getelementptr inbounds float* %tmp13388, i64 1
+  %tmp13390 = getelementptr inbounds float* %tmp13389, i64 1
+  %tmp13391 = getelementptr inbounds float* %tmp13390, i64 1
+  %tmp13392 = getelementptr inbounds float* %tmp13391, i64 1
+  %tmp13393 = getelementptr inbounds float* %tmp13392, i64 1
+  %tmp13394 = getelementptr inbounds float* %tmp13393, i64 1
+  %tmp13395 = getelementptr inbounds float* %tmp13394, i64 1
+  %tmp13396 = getelementptr inbounds float* %tmp13395, i64 1
+  %tmp13397 = getelementptr inbounds float* %tmp13396, i64 1
+  %tmp13398 = getelementptr inbounds float* %tmp13397, i64 1
+  %tmp13399 = getelementptr inbounds float* %tmp13398, i64 1
+  %tmp13400 = getelementptr inbounds float* %tmp13399, i64 1
+  %tmp13401 = getelementptr inbounds float* %tmp13400, i64 1
+  %tmp13402 = getelementptr inbounds float* %tmp13401, i64 1
+  %tmp13403 = getelementptr inbounds float* %tmp13402, i64 1
+  %tmp13404 = getelementptr inbounds float* %tmp13403, i64 1
+  %tmp13405 = getelementptr inbounds float* %tmp13404, i64 1
+  %tmp13406 = getelementptr inbounds float* %tmp13405, i64 1
+  %tmp13407 = getelementptr inbounds float* %tmp13406, i64 1
+  %tmp13408 = getelementptr inbounds float* %tmp13407, i64 1
+  %tmp13409 = getelementptr inbounds float* %tmp13408, i64 1
+  %tmp13410 = getelementptr inbounds float* %tmp13409, i64 1
+  %tmp13411 = getelementptr inbounds float* %tmp13410, i64 1
+  %tmp13412 = getelementptr inbounds float* %tmp13411, i64 1
+  %tmp13413 = getelementptr inbounds float* %tmp13412, i64 1
+  %tmp13414 = getelementptr inbounds float* %tmp13413, i64 1
+  %tmp13415 = getelementptr inbounds float* %tmp13414, i64 1
+  %tmp13416 = getelementptr inbounds float* %tmp13415, i64 1
+  %tmp13417 = getelementptr inbounds float* %tmp13416, i64 1
+  %tmp13418 = getelementptr inbounds float* %tmp13417, i64 1
+  %tmp13419 = getelementptr inbounds float* %tmp13418, i64 1
+  %tmp13420 = getelementptr inbounds float* %tmp13419, i64 1
+  %tmp13421 = getelementptr inbounds float* %tmp13420, i64 1
+  %tmp13422 = getelementptr inbounds float* %tmp13421, i64 1
+  %tmp13423 = getelementptr inbounds float* %tmp13422, i64 1
+  %tmp13424 = getelementptr inbounds float* %tmp13423, i64 1
+  %tmp13425 = getelementptr inbounds float* %tmp13424, i64 1
+  %tmp13426 = getelementptr inbounds float* %tmp13425, i64 1
+  %tmp13427 = getelementptr inbounds float* %tmp13426, i64 1
+  %tmp13428 = getelementptr inbounds float* %tmp13427, i64 1
+  %tmp13429 = getelementptr inbounds float* %tmp13428, i64 1
+  %tmp13430 = getelementptr inbounds float* %tmp13429, i64 1
+  %tmp13431 = getelementptr inbounds float* %tmp13430, i64 1
+  %tmp13432 = getelementptr inbounds float* %tmp13431, i64 1
+  %tmp13433 = getelementptr inbounds float* %tmp13432, i64 1
+  %tmp13434 = getelementptr inbounds float* %tmp13433, i64 1
+  %tmp13435 = getelementptr inbounds float* %tmp13434, i64 1
+  %tmp13436 = getelementptr inbounds float* %tmp13435, i64 1
+  %tmp13437 = getelementptr inbounds float* %tmp13436, i64 1
+  %tmp13438 = getelementptr inbounds float* %tmp13437, i64 1
+  %tmp13439 = getelementptr inbounds float* %tmp13438, i64 1
+  %tmp13440 = getelementptr inbounds float* %tmp13439, i64 1
+  %tmp13441 = getelementptr inbounds float* %tmp13440, i64 1
+  %tmp13442 = getelementptr inbounds float* %tmp13441, i64 1
+  %tmp13443 = getelementptr inbounds float* %tmp13442, i64 1
+  %tmp13444 = getelementptr inbounds float* %tmp13443, i64 1
+  %tmp13445 = getelementptr inbounds float* %tmp13444, i64 1
+  %tmp13446 = getelementptr inbounds float* %tmp13445, i64 1
+  %tmp13447 = getelementptr inbounds float* %tmp13446, i64 1
+  %tmp13448 = getelementptr inbounds float* %tmp13447, i64 1
+  %tmp13449 = getelementptr inbounds float* %tmp13448, i64 1
+  %tmp13450 = getelementptr inbounds float* %tmp13449, i64 1
+  %tmp13451 = getelementptr inbounds float* %tmp13450, i64 1
+  %tmp13452 = getelementptr inbounds float* %tmp13451, i64 1
+  %tmp13453 = getelementptr inbounds float* %tmp13452, i64 1
+  %tmp13454 = getelementptr inbounds float* %tmp13453, i64 1
+  %tmp13455 = getelementptr inbounds float* %tmp13454, i64 1
+  %tmp13456 = getelementptr inbounds float* %tmp13455, i64 1
+  %tmp13457 = getelementptr inbounds float* %tmp13456, i64 1
+  %tmp13458 = getelementptr inbounds float* %tmp13457, i64 1
+  %tmp13459 = getelementptr inbounds float* %tmp13458, i64 1
+  %tmp13460 = getelementptr inbounds float* %tmp13459, i64 1
+  %tmp13461 = getelementptr inbounds float* %tmp13460, i64 1
+  %tmp13462 = getelementptr inbounds float* %tmp13461, i64 1
+  %tmp13463 = getelementptr inbounds float* %tmp13462, i64 1
+  %tmp13464 = getelementptr inbounds float* %tmp13463, i64 1
+  %tmp13465 = getelementptr inbounds float* %tmp13464, i64 1
+  %tmp13466 = getelementptr inbounds float* %tmp13465, i64 1
+  %tmp13467 = getelementptr inbounds float* %tmp13466, i64 1
+  %tmp13468 = getelementptr inbounds float* %tmp13467, i64 1
+  %tmp13469 = getelementptr inbounds float* %tmp13468, i64 1
+  %tmp13470 = getelementptr inbounds float* %tmp13469, i64 1
+  %tmp13471 = getelementptr inbounds float* %tmp13470, i64 1
+  %tmp13472 = getelementptr inbounds float* %tmp13471, i64 1
+  %tmp13473 = getelementptr inbounds float* %tmp13472, i64 1
+  %tmp13474 = getelementptr inbounds float* %tmp13473, i64 1
+  %tmp13475 = getelementptr inbounds float* %tmp13474, i64 1
+  %tmp13476 = getelementptr inbounds float* %tmp13475, i64 1
+  %tmp13477 = getelementptr inbounds float* %tmp13476, i64 1
+  %tmp13478 = getelementptr inbounds float* %tmp13477, i64 1
+  %tmp13479 = getelementptr inbounds float* %tmp13478, i64 1
+  %tmp13480 = getelementptr inbounds float* %tmp13479, i64 1
+  %tmp13481 = getelementptr inbounds float* %tmp13480, i64 1
+  %tmp13482 = getelementptr inbounds float* %tmp13481, i64 1
+  %tmp13483 = getelementptr inbounds float* %tmp13482, i64 1
+  %tmp13484 = getelementptr inbounds float* %tmp13483, i64 1
+  %tmp13485 = getelementptr inbounds float* %tmp13484, i64 1
+  %tmp13486 = getelementptr inbounds float* %tmp13485, i64 1
+  %tmp13487 = getelementptr inbounds float* %tmp13486, i64 1
+  %tmp13488 = getelementptr inbounds float* %tmp13487, i64 1
+  %tmp13489 = getelementptr inbounds float* %tmp13488, i64 1
+  %tmp13490 = getelementptr inbounds float* %tmp13489, i64 1
+  %tmp13491 = getelementptr inbounds float* %tmp13490, i64 1
+  %tmp13492 = getelementptr inbounds float* %tmp13491, i64 1
+  %tmp13493 = getelementptr inbounds float* %tmp13492, i64 1
+  %tmp13494 = getelementptr inbounds float* %tmp13493, i64 1
+  %tmp13495 = getelementptr inbounds float* %tmp13494, i64 1
+  %tmp13496 = getelementptr inbounds float* %tmp13495, i64 1
+  %tmp13497 = getelementptr inbounds float* %tmp13496, i64 1
+  %tmp13498 = getelementptr inbounds float* %tmp13497, i64 1
+  %tmp13499 = getelementptr inbounds float* %tmp13498, i64 1
+  %tmp13500 = getelementptr inbounds float* %tmp13499, i64 1
+  %tmp13501 = getelementptr inbounds float* %tmp13500, i64 1
+  %tmp13502 = getelementptr inbounds float* %tmp13501, i64 1
+  %tmp13503 = getelementptr inbounds float* %tmp13502, i64 1
+  %tmp13504 = getelementptr inbounds float* %tmp13503, i64 1
+  %tmp13505 = getelementptr inbounds float* %tmp13504, i64 1
+  %tmp13506 = getelementptr inbounds float* %tmp13505, i64 1
+  %tmp13507 = getelementptr inbounds float* %tmp13506, i64 1
+  %tmp13508 = getelementptr inbounds float* %tmp13507, i64 1
+  %tmp13509 = getelementptr inbounds float* %tmp13508, i64 1
+  %tmp13510 = getelementptr inbounds float* %tmp13509, i64 1
+  %tmp13511 = getelementptr inbounds float* %tmp13510, i64 1
+  %tmp13512 = getelementptr inbounds float* %tmp13511, i64 1
+  %tmp13513 = getelementptr inbounds float* %tmp13512, i64 1
+  %tmp13514 = getelementptr inbounds float* %tmp13513, i64 1
+  %tmp13515 = getelementptr inbounds float* %tmp13514, i64 1
+  %tmp13516 = getelementptr inbounds float* %tmp13515, i64 1
+  %tmp13517 = getelementptr inbounds float* %tmp13516, i64 1
+  %tmp13518 = getelementptr inbounds float* %tmp13517, i64 1
+  %tmp13519 = getelementptr inbounds float* %tmp13518, i64 1
+  %tmp13520 = getelementptr inbounds float* %tmp13519, i64 1
+  %tmp13521 = getelementptr inbounds float* %tmp13520, i64 1
+  %tmp13522 = getelementptr inbounds float* %tmp13521, i64 1
+  %tmp13523 = getelementptr inbounds float* %tmp13522, i64 1
+  %tmp13524 = getelementptr inbounds float* %tmp13523, i64 1
+  %tmp13525 = getelementptr inbounds float* %tmp13524, i64 1
+  %tmp13526 = getelementptr inbounds float* %tmp13525, i64 1
+  %tmp13527 = getelementptr inbounds float* %tmp13526, i64 1
+  %tmp13528 = getelementptr inbounds float* %tmp13527, i64 1
+  %tmp13529 = getelementptr inbounds float* %tmp13528, i64 1
+  %tmp13530 = getelementptr inbounds float* %tmp13529, i64 1
+  %tmp13531 = getelementptr inbounds float* %tmp13530, i64 1
+  %tmp13532 = getelementptr inbounds float* %tmp13531, i64 1
+  %tmp13533 = getelementptr inbounds float* %tmp13532, i64 1
+  %tmp13534 = getelementptr inbounds float* %tmp13533, i64 1
+  %tmp13535 = getelementptr inbounds float* %tmp13534, i64 1
+  %tmp13536 = getelementptr inbounds float* %tmp13535, i64 1
+  %tmp13537 = getelementptr inbounds float* %tmp13536, i64 1
+  %tmp13538 = getelementptr inbounds float* %tmp13537, i64 1
+  %tmp13539 = getelementptr inbounds float* %tmp13538, i64 1
+  %tmp13540 = getelementptr inbounds float* %tmp13539, i64 1
+  %tmp13541 = getelementptr inbounds float* %tmp13540, i64 1
+  %tmp13542 = getelementptr inbounds float* %tmp13541, i64 1
+  %tmp13543 = getelementptr inbounds float* %tmp13542, i64 1
+  %tmp13544 = getelementptr inbounds float* %tmp13543, i64 1
+  %tmp13545 = getelementptr inbounds float* %tmp13544, i64 1
+  %tmp13546 = getelementptr inbounds float* %tmp13545, i64 1
+  %tmp13547 = getelementptr inbounds float* %tmp13546, i64 1
+  %tmp13548 = getelementptr inbounds float* %tmp13547, i64 1
+  %tmp13549 = getelementptr inbounds float* %tmp13548, i64 1
+  %tmp13550 = getelementptr inbounds float* %tmp13549, i64 1
+  %tmp13551 = getelementptr inbounds float* %tmp13550, i64 1
+  %tmp13552 = getelementptr inbounds float* %tmp13551, i64 1
+  %tmp13553 = getelementptr inbounds float* %tmp13552, i64 1
+  %tmp13554 = getelementptr inbounds float* %tmp13553, i64 1
+  %tmp13555 = getelementptr inbounds float* %tmp13554, i64 1
+  %tmp13556 = getelementptr inbounds float* %tmp13555, i64 1
+  %tmp13557 = getelementptr inbounds float* %tmp13556, i64 1
+  %tmp13558 = getelementptr inbounds float* %tmp13557, i64 1
+  %tmp13559 = getelementptr inbounds float* %tmp13558, i64 1
+  %tmp13560 = getelementptr inbounds float* %tmp13559, i64 1
+  %tmp13561 = getelementptr inbounds float* %tmp13560, i64 1
+  %tmp13562 = getelementptr inbounds float* %tmp13561, i64 1
+  %tmp13563 = getelementptr inbounds float* %tmp13562, i64 1
+  %tmp13564 = getelementptr inbounds float* %tmp13563, i64 1
+  %tmp13565 = getelementptr inbounds float* %tmp13564, i64 1
+  %tmp13566 = getelementptr inbounds float* %tmp13565, i64 1
+  %tmp13567 = getelementptr inbounds float* %tmp13566, i64 1
+  %tmp13568 = getelementptr inbounds float* %tmp13567, i64 1
+  %tmp13569 = getelementptr inbounds float* %tmp13568, i64 1
+  %tmp13570 = getelementptr inbounds float* %tmp13569, i64 1
+  %tmp13571 = getelementptr inbounds float* %tmp13570, i64 1
+  %tmp13572 = getelementptr inbounds float* %tmp13571, i64 1
+  %tmp13573 = getelementptr inbounds float* %tmp13572, i64 1
+  %tmp13574 = getelementptr inbounds float* %tmp13573, i64 1
+  %tmp13575 = getelementptr inbounds float* %tmp13574, i64 1
+  %tmp13576 = getelementptr inbounds float* %tmp13575, i64 1
+  %tmp13577 = getelementptr inbounds float* %tmp13576, i64 1
+  %tmp13578 = getelementptr inbounds float* %tmp13577, i64 1
+  %tmp13579 = getelementptr inbounds float* %tmp13578, i64 1
+  %tmp13580 = getelementptr inbounds float* %tmp13579, i64 1
+  %tmp13581 = getelementptr inbounds float* %tmp13580, i64 1
+  %tmp13582 = getelementptr inbounds float* %tmp13581, i64 1
+  %tmp13583 = getelementptr inbounds float* %tmp13582, i64 1
+  %tmp13584 = getelementptr inbounds float* %tmp13583, i64 1
+  %tmp13585 = getelementptr inbounds float* %tmp13584, i64 1
+  %tmp13586 = getelementptr inbounds float* %tmp13585, i64 1
+  %tmp13587 = getelementptr inbounds float* %tmp13586, i64 1
+  %tmp13588 = getelementptr inbounds float* %tmp13587, i64 1
+  %tmp13589 = getelementptr inbounds float* %tmp13588, i64 1
+  %tmp13590 = getelementptr inbounds float* %tmp13589, i64 1
+  %tmp13591 = getelementptr inbounds float* %tmp13590, i64 1
+  %tmp13592 = getelementptr inbounds float* %tmp13591, i64 1
+  %tmp13593 = getelementptr inbounds float* %tmp13592, i64 1
+  %tmp13594 = getelementptr inbounds float* %tmp13593, i64 1
+  %tmp13595 = getelementptr inbounds float* %tmp13594, i64 1
+  %tmp13596 = getelementptr inbounds float* %tmp13595, i64 1
+  %tmp13597 = getelementptr inbounds float* %tmp13596, i64 1
+  %tmp13598 = getelementptr inbounds float* %tmp13597, i64 1
+  %tmp13599 = getelementptr inbounds float* %tmp13598, i64 1
+  %tmp13600 = getelementptr inbounds float* %tmp13599, i64 1
+  %tmp13601 = getelementptr inbounds float* %tmp13600, i64 1
+  %tmp13602 = getelementptr inbounds float* %tmp13601, i64 1
+  %tmp13603 = getelementptr inbounds float* %tmp13602, i64 1
+  %tmp13604 = getelementptr inbounds float* %tmp13603, i64 1
+  %tmp13605 = getelementptr inbounds float* %tmp13604, i64 1
+  %tmp13606 = getelementptr inbounds float* %tmp13605, i64 1
+  %tmp13607 = getelementptr inbounds float* %tmp13606, i64 1
+  %tmp13608 = getelementptr inbounds float* %tmp13607, i64 1
+  %tmp13609 = getelementptr inbounds float* %tmp13608, i64 1
+  %tmp13610 = getelementptr inbounds float* %tmp13609, i64 1
+  %tmp13611 = getelementptr inbounds float* %tmp13610, i64 1
+  %tmp13612 = getelementptr inbounds float* %tmp13611, i64 1
+  %tmp13613 = getelementptr inbounds float* %tmp13612, i64 1
+  %tmp13614 = getelementptr inbounds float* %tmp13613, i64 1
+  %tmp13615 = getelementptr inbounds float* %tmp13614, i64 1
+  %tmp13616 = getelementptr inbounds float* %tmp13615, i64 1
+  %tmp13617 = getelementptr inbounds float* %tmp13616, i64 1
+  %tmp13618 = getelementptr inbounds float* %tmp13617, i64 1
+  %tmp13619 = getelementptr inbounds float* %tmp13618, i64 1
+  %tmp13620 = getelementptr inbounds float* %tmp13619, i64 1
+  %tmp13621 = getelementptr inbounds float* %tmp13620, i64 1
+  %tmp13622 = getelementptr inbounds float* %tmp13621, i64 1
+  %tmp13623 = getelementptr inbounds float* %tmp13622, i64 1
+  %tmp13624 = getelementptr inbounds float* %tmp13623, i64 1
+  %tmp13625 = getelementptr inbounds float* %tmp13624, i64 1
+  %tmp13626 = getelementptr inbounds float* %tmp13625, i64 1
+  %tmp13627 = getelementptr inbounds float* %tmp13626, i64 1
+  %tmp13628 = getelementptr inbounds float* %tmp13627, i64 1
+  %tmp13629 = getelementptr inbounds float* %tmp13628, i64 1
+  %tmp13630 = getelementptr inbounds float* %tmp13629, i64 1
+  %tmp13631 = getelementptr inbounds float* %tmp13630, i64 1
+  %tmp13632 = getelementptr inbounds float* %tmp13631, i64 1
+  %tmp13633 = getelementptr inbounds float* %tmp13632, i64 1
+  %tmp13634 = getelementptr inbounds float* %tmp13633, i64 1
+  %tmp13635 = getelementptr inbounds float* %tmp13634, i64 1
+  %tmp13636 = getelementptr inbounds float* %tmp13635, i64 1
+  %tmp13637 = getelementptr inbounds float* %tmp13636, i64 1
+  %tmp13638 = getelementptr inbounds float* %tmp13637, i64 1
+  %tmp13639 = getelementptr inbounds float* %tmp13638, i64 1
+  %tmp13640 = getelementptr inbounds float* %tmp13639, i64 1
+  %tmp13641 = getelementptr inbounds float* %tmp13640, i64 1
+  %tmp13642 = getelementptr inbounds float* %tmp13641, i64 1
+  %tmp13643 = getelementptr inbounds float* %tmp13642, i64 1
+  %tmp13644 = getelementptr inbounds float* %tmp13643, i64 1
+  %tmp13645 = getelementptr inbounds float* %tmp13644, i64 1
+  %tmp13646 = getelementptr inbounds float* %tmp13645, i64 1
+  %tmp13647 = getelementptr inbounds float* %tmp13646, i64 1
+  %tmp13648 = getelementptr inbounds float* %tmp13647, i64 1
+  %tmp13649 = getelementptr inbounds float* %tmp13648, i64 1
+  %tmp13650 = getelementptr inbounds float* %tmp13649, i64 1
+  %tmp13651 = getelementptr inbounds float* %tmp13650, i64 1
+  %tmp13652 = getelementptr inbounds float* %tmp13651, i64 1
+  %tmp13653 = getelementptr inbounds float* %tmp13652, i64 1
+  %tmp13654 = getelementptr inbounds float* %tmp13653, i64 1
+  %tmp13655 = getelementptr inbounds float* %tmp13654, i64 1
+  %tmp13656 = getelementptr inbounds float* %tmp13655, i64 1
+  %tmp13657 = getelementptr inbounds float* %tmp13656, i64 1
+  %tmp13658 = getelementptr inbounds float* %tmp13657, i64 1
+  %tmp13659 = getelementptr inbounds float* %tmp13658, i64 1
+  %tmp13660 = getelementptr inbounds float* %tmp13659, i64 1
+  %tmp13661 = getelementptr inbounds float* %tmp13660, i64 1
+  %tmp13662 = getelementptr inbounds float* %tmp13661, i64 1
+  %tmp13663 = getelementptr inbounds float* %tmp13662, i64 1
+  %tmp13664 = getelementptr inbounds float* %tmp13663, i64 1
+  %tmp13665 = getelementptr inbounds float* %tmp13664, i64 1
+  %tmp13666 = getelementptr inbounds float* %tmp13665, i64 1
+  %tmp13667 = getelementptr inbounds float* %tmp13666, i64 1
+  %tmp13668 = getelementptr inbounds float* %tmp13667, i64 1
+  %tmp13669 = getelementptr inbounds float* %tmp13668, i64 1
+  %tmp13670 = getelementptr inbounds float* %tmp13669, i64 1
+  %tmp13671 = getelementptr inbounds float* %tmp13670, i64 1
+  %tmp13672 = getelementptr inbounds float* %tmp13671, i64 1
+  %tmp13673 = getelementptr inbounds float* %tmp13672, i64 1
+  %tmp13674 = getelementptr inbounds float* %tmp13673, i64 1
+  %tmp13675 = getelementptr inbounds float* %tmp13674, i64 1
+  %tmp13676 = getelementptr inbounds float* %tmp13675, i64 1
+  %tmp13677 = getelementptr inbounds float* %tmp13676, i64 1
+  %tmp13678 = getelementptr inbounds float* %tmp13677, i64 1
+  %tmp13679 = getelementptr inbounds float* %tmp13678, i64 1
+  %tmp13680 = getelementptr inbounds float* %tmp13679, i64 1
+  %tmp13681 = getelementptr inbounds float* %tmp13680, i64 1
+  %tmp13682 = getelementptr inbounds float* %tmp13681, i64 1
+  %tmp13683 = getelementptr inbounds float* %tmp13682, i64 1
+  %tmp13684 = getelementptr inbounds float* %tmp13683, i64 1
+  %tmp13685 = getelementptr inbounds float* %tmp13684, i64 1
+  %tmp13686 = getelementptr inbounds float* %tmp13685, i64 1
+  %tmp13687 = getelementptr inbounds float* %tmp13686, i64 1
+  %tmp13688 = getelementptr inbounds float* %tmp13687, i64 1
+  %tmp13689 = getelementptr inbounds float* %tmp13688, i64 1
+  %tmp13690 = getelementptr inbounds float* %tmp13689, i64 1
+  %tmp13691 = getelementptr inbounds float* %tmp13690, i64 1
+  %tmp13692 = getelementptr inbounds float* %tmp13691, i64 1
+  %tmp13693 = getelementptr inbounds float* %tmp13692, i64 1
+  %tmp13694 = getelementptr inbounds float* %tmp13693, i64 1
+  %tmp13695 = getelementptr inbounds float* %tmp13694, i64 1
+  %tmp13696 = getelementptr inbounds float* %tmp13695, i64 1
+  %tmp13697 = getelementptr inbounds float* %tmp13696, i64 1
+  %tmp13698 = getelementptr inbounds float* %tmp13697, i64 1
+  %tmp13699 = getelementptr inbounds float* %tmp13698, i64 1
+  %tmp13700 = getelementptr inbounds float* %tmp13699, i64 1
+  %tmp13701 = getelementptr inbounds float* %tmp13700, i64 1
+  %tmp13702 = getelementptr inbounds float* %tmp13701, i64 1
+  %tmp13703 = getelementptr inbounds float* %tmp13702, i64 1
+  %tmp13704 = getelementptr inbounds float* %tmp13703, i64 1
+  %tmp13705 = getelementptr inbounds float* %tmp13704, i64 1
+  %tmp13706 = getelementptr inbounds float* %tmp13705, i64 1
+  %tmp13707 = getelementptr inbounds float* %tmp13706, i64 1
+  %tmp13708 = getelementptr inbounds float* %tmp13707, i64 1
+  %tmp13709 = getelementptr inbounds float* %tmp13708, i64 1
+  %tmp13710 = getelementptr inbounds float* %tmp13709, i64 1
+  %tmp13711 = getelementptr inbounds float* %tmp13710, i64 1
+  %tmp13712 = getelementptr inbounds float* %tmp13711, i64 1
+  %tmp13713 = getelementptr inbounds float* %tmp13712, i64 1
+  %tmp13714 = getelementptr inbounds float* %tmp13713, i64 1
+  %tmp13715 = getelementptr inbounds float* %tmp13714, i64 1
+  %tmp13716 = getelementptr inbounds float* %tmp13715, i64 1
+  %tmp13717 = getelementptr inbounds float* %tmp13716, i64 1
+  %tmp13718 = getelementptr inbounds float* %tmp13717, i64 1
+  %tmp13719 = getelementptr inbounds float* %tmp13718, i64 1
+  %tmp13720 = getelementptr inbounds float* %tmp13719, i64 1
+  %tmp13721 = getelementptr inbounds float* %tmp13720, i64 1
+  %tmp13722 = getelementptr inbounds float* %tmp13721, i64 1
+  %tmp13723 = getelementptr inbounds float* %tmp13722, i64 1
+  %tmp13724 = getelementptr inbounds float* %tmp13723, i64 1
+  %tmp13725 = getelementptr inbounds float* %tmp13724, i64 1
+  %tmp13726 = getelementptr inbounds float* %tmp13725, i64 1
+  %tmp13727 = getelementptr inbounds float* %tmp13726, i64 1
+  %tmp13728 = getelementptr inbounds float* %tmp13727, i64 1
+  %tmp13729 = getelementptr inbounds float* %tmp13728, i64 1
+  %tmp13730 = getelementptr inbounds float* %tmp13729, i64 1
+  %tmp13731 = getelementptr inbounds float* %tmp13730, i64 1
+  %tmp13732 = getelementptr inbounds float* %tmp13731, i64 1
+  %tmp13733 = getelementptr inbounds float* %tmp13732, i64 1
+  %tmp13734 = getelementptr inbounds float* %tmp13733, i64 1
+  %tmp13735 = getelementptr inbounds float* %tmp13734, i64 1
+  %tmp13736 = getelementptr inbounds float* %tmp13735, i64 1
+  %tmp13737 = getelementptr inbounds float* %tmp13736, i64 1
+  %tmp13738 = getelementptr inbounds float* %tmp13737, i64 1
+  %tmp13739 = getelementptr inbounds float* %tmp13738, i64 1
+  %tmp13740 = getelementptr inbounds float* %tmp13739, i64 1
+  %tmp13741 = getelementptr inbounds float* %tmp13740, i64 1
+  %tmp13742 = getelementptr inbounds float* %tmp13741, i64 1
+  %tmp13743 = getelementptr inbounds float* %tmp13742, i64 1
+  %tmp13744 = getelementptr inbounds float* %tmp13743, i64 1
+  %tmp13745 = getelementptr inbounds float* %tmp13744, i64 1
+  %tmp13746 = getelementptr inbounds float* %tmp13745, i64 1
+  %tmp13747 = getelementptr inbounds float* %tmp13746, i64 1
+  %tmp13748 = getelementptr inbounds float* %tmp13747, i64 1
+  %tmp13749 = getelementptr inbounds float* %tmp13748, i64 1
+  %tmp13750 = getelementptr inbounds float* %tmp13749, i64 1
+  %tmp13751 = getelementptr inbounds float* %tmp13750, i64 1
+  %tmp13752 = getelementptr inbounds float* %tmp13751, i64 1
+  %tmp13753 = getelementptr inbounds float* %tmp13752, i64 1
+  %tmp13754 = getelementptr inbounds float* %tmp13753, i64 1
+  %tmp13755 = getelementptr inbounds float* %tmp13754, i64 1
+  %tmp13756 = getelementptr inbounds float* %tmp13755, i64 1
+  %tmp13757 = getelementptr inbounds float* %tmp13756, i64 1
+  %tmp13758 = getelementptr inbounds float* %tmp13757, i64 1
+  %tmp13759 = getelementptr inbounds float* %tmp13758, i64 1
+  %tmp13760 = getelementptr inbounds float* %tmp13759, i64 1
+  %tmp13761 = getelementptr inbounds float* %tmp13760, i64 1
+  %tmp13762 = getelementptr inbounds float* %tmp13761, i64 1
+  %tmp13763 = getelementptr inbounds float* %tmp13762, i64 1
+  %tmp13764 = getelementptr inbounds float* %tmp13763, i64 1
+  %tmp13765 = getelementptr inbounds float* %tmp13764, i64 1
+  %tmp13766 = getelementptr inbounds float* %tmp13765, i64 1
+  %tmp13767 = getelementptr inbounds float* %tmp13766, i64 1
+  %tmp13768 = getelementptr inbounds float* %tmp13767, i64 1
+  %tmp13769 = getelementptr inbounds float* %tmp13768, i64 1
+  %tmp13770 = getelementptr inbounds float* %tmp13769, i64 1
+  %tmp13771 = getelementptr inbounds float* %tmp13770, i64 1
+  %tmp13772 = getelementptr inbounds float* %tmp13771, i64 1
+  %tmp13773 = getelementptr inbounds float* %tmp13772, i64 1
+  %tmp13774 = getelementptr inbounds float* %tmp13773, i64 1
+  %tmp13775 = getelementptr inbounds float* %tmp13774, i64 1
+  %tmp13776 = getelementptr inbounds float* %tmp13775, i64 1
+  %tmp13777 = getelementptr inbounds float* %tmp13776, i64 1
+  %tmp13778 = getelementptr inbounds float* %tmp13777, i64 1
+  %tmp13779 = getelementptr inbounds float* %tmp13778, i64 1
+  %tmp13780 = getelementptr inbounds float* %tmp13779, i64 1
+  %tmp13781 = getelementptr inbounds float* %tmp13780, i64 1
+  %tmp13782 = getelementptr inbounds float* %tmp13781, i64 1
+  %tmp13783 = getelementptr inbounds float* %tmp13782, i64 1
+  %tmp13784 = getelementptr inbounds float* %tmp13783, i64 1
+  %tmp13785 = getelementptr inbounds float* %tmp13784, i64 1
+  %tmp13786 = getelementptr inbounds float* %tmp13785, i64 1
+  %tmp13787 = getelementptr inbounds float* %tmp13786, i64 1
+  %tmp13788 = getelementptr inbounds float* %tmp13787, i64 1
+  %tmp13789 = getelementptr inbounds float* %tmp13788, i64 1
+  %tmp13790 = getelementptr inbounds float* %tmp13789, i64 1
+  %tmp13791 = getelementptr inbounds float* %tmp13790, i64 1
+  %tmp13792 = getelementptr inbounds float* %tmp13791, i64 1
+  %tmp13793 = getelementptr inbounds float* %tmp13792, i64 1
+  %tmp13794 = getelementptr inbounds float* %tmp13793, i64 1
+  %tmp13795 = getelementptr inbounds float* %tmp13794, i64 1
+  %tmp13796 = getelementptr inbounds float* %tmp13795, i64 1
+  %tmp13797 = getelementptr inbounds float* %tmp13796, i64 1
+  %tmp13798 = getelementptr inbounds float* %tmp13797, i64 1
+  %tmp13799 = getelementptr inbounds float* %tmp13798, i64 1
+  %tmp13800 = getelementptr inbounds float* %tmp13799, i64 1
+  %tmp13801 = getelementptr inbounds float* %tmp13800, i64 1
+  %tmp13802 = getelementptr inbounds float* %tmp13801, i64 1
+  %tmp13803 = getelementptr inbounds float* %tmp13802, i64 1
+  %tmp13804 = getelementptr inbounds float* %tmp13803, i64 1
+  %tmp13805 = getelementptr inbounds float* %tmp13804, i64 1
+  %tmp13806 = getelementptr inbounds float* %tmp13805, i64 1
+  %tmp13807 = getelementptr inbounds float* %tmp13806, i64 1
+  %tmp13808 = getelementptr inbounds float* %tmp13807, i64 1
+  %tmp13809 = getelementptr inbounds float* %tmp13808, i64 1
+  %tmp13810 = getelementptr inbounds float* %tmp13809, i64 1
+  %tmp13811 = getelementptr inbounds float* %tmp13810, i64 1
+  %tmp13812 = getelementptr inbounds float* %tmp13811, i64 1
+  %tmp13813 = getelementptr inbounds float* %tmp13812, i64 1
+  %tmp13814 = getelementptr inbounds float* %tmp13813, i64 1
+  %tmp13815 = getelementptr inbounds float* %tmp13814, i64 1
+  %tmp13816 = getelementptr inbounds float* %tmp13815, i64 1
+  %tmp13817 = getelementptr inbounds float* %tmp13816, i64 1
+  %tmp13818 = getelementptr inbounds float* %tmp13817, i64 1
+  %tmp13819 = getelementptr inbounds float* %tmp13818, i64 1
+  %tmp13820 = getelementptr inbounds float* %tmp13819, i64 1
+  %tmp13821 = getelementptr inbounds float* %tmp13820, i64 1
+  %tmp13822 = getelementptr inbounds float* %tmp13821, i64 1
+  %tmp13823 = getelementptr inbounds float* %tmp13822, i64 1
+  %tmp13824 = getelementptr inbounds float* %tmp13823, i64 1
+  %tmp13825 = getelementptr inbounds float* %tmp13824, i64 1
+  %tmp13826 = getelementptr inbounds float* %tmp13825, i64 1
+  %tmp13827 = getelementptr inbounds float* %tmp13826, i64 1
+  %tmp13828 = getelementptr inbounds float* %tmp13827, i64 1
+  %tmp13829 = getelementptr inbounds float* %tmp13828, i64 1
+  %tmp13830 = getelementptr inbounds float* %tmp13829, i64 1
+  %tmp13831 = getelementptr inbounds float* %tmp13830, i64 1
+  %tmp13832 = getelementptr inbounds float* %tmp13831, i64 1
+  %tmp13833 = getelementptr inbounds float* %tmp13832, i64 1
+  %tmp13834 = getelementptr inbounds float* %tmp13833, i64 1
+  %tmp13835 = getelementptr inbounds float* %tmp13834, i64 1
+  %tmp13836 = getelementptr inbounds float* %tmp13835, i64 1
+  %tmp13837 = getelementptr inbounds float* %tmp13836, i64 1
+  %tmp13838 = getelementptr inbounds float* %tmp13837, i64 1
+  %tmp13839 = getelementptr inbounds float* %tmp13838, i64 1
+  %tmp13840 = getelementptr inbounds float* %tmp13839, i64 1
+  %tmp13841 = getelementptr inbounds float* %tmp13840, i64 1
+  %tmp13842 = getelementptr inbounds float* %tmp13841, i64 1
+  %tmp13843 = getelementptr inbounds float* %tmp13842, i64 1
+  %tmp13844 = getelementptr inbounds float* %tmp13843, i64 1
+  %tmp13845 = getelementptr inbounds float* %tmp13844, i64 1
+  %tmp13846 = getelementptr inbounds float* %tmp13845, i64 1
+  %tmp13847 = getelementptr inbounds float* %tmp13846, i64 1
+  %tmp13848 = getelementptr inbounds float* %tmp13847, i64 1
+  %tmp13849 = getelementptr inbounds float* %tmp13848, i64 1
+  %tmp13850 = getelementptr inbounds float* %tmp13849, i64 1
+  %tmp13851 = getelementptr inbounds float* %tmp13850, i64 1
+  %tmp13852 = getelementptr inbounds float* %tmp13851, i64 1
+  %tmp13853 = getelementptr inbounds float* %tmp13852, i64 1
+  %tmp13854 = getelementptr inbounds float* %tmp13853, i64 1
+  %tmp13855 = getelementptr inbounds float* %tmp13854, i64 1
+  %tmp13856 = getelementptr inbounds float* %tmp13855, i64 1
+  %tmp13857 = getelementptr inbounds float* %tmp13856, i64 1
+  %tmp13858 = getelementptr inbounds float* %tmp13857, i64 1
+  %tmp13859 = getelementptr inbounds float* %tmp13858, i64 1
+  %tmp13860 = getelementptr inbounds float* %tmp13859, i64 1
+  %tmp13861 = getelementptr inbounds float* %tmp13860, i64 1
+  %tmp13862 = getelementptr inbounds float* %tmp13861, i64 1
+  %tmp13863 = getelementptr inbounds float* %tmp13862, i64 1
+  %tmp13864 = getelementptr inbounds float* %tmp13863, i64 1
+  %tmp13865 = getelementptr inbounds float* %tmp13864, i64 1
+  %tmp13866 = getelementptr inbounds float* %tmp13865, i64 1
+  %tmp13867 = getelementptr inbounds float* %tmp13866, i64 1
+  %tmp13868 = getelementptr inbounds float* %tmp13867, i64 1
+  %tmp13869 = getelementptr inbounds float* %tmp13868, i64 1
+  %tmp13870 = getelementptr inbounds float* %tmp13869, i64 1
+  %tmp13871 = getelementptr inbounds float* %tmp13870, i64 1
+  %tmp13872 = getelementptr inbounds float* %tmp13871, i64 1
+  %tmp13873 = getelementptr inbounds float* %tmp13872, i64 1
+  %tmp13874 = getelementptr inbounds float* %tmp13873, i64 1
+  %tmp13875 = getelementptr inbounds float* %tmp13874, i64 1
+  %tmp13876 = getelementptr inbounds float* %tmp13875, i64 1
+  %tmp13877 = getelementptr inbounds float* %tmp13876, i64 1
+  %tmp13878 = getelementptr inbounds float* %tmp13877, i64 1
+  %tmp13879 = getelementptr inbounds float* %tmp13878, i64 1
+  %tmp13880 = getelementptr inbounds float* %tmp13879, i64 1
+  %tmp13881 = getelementptr inbounds float* %tmp13880, i64 1
+  %tmp13882 = getelementptr inbounds float* %tmp13881, i64 1
+  %tmp13883 = getelementptr inbounds float* %tmp13882, i64 1
+  %tmp13884 = getelementptr inbounds float* %tmp13883, i64 1
+  %tmp13885 = getelementptr inbounds float* %tmp13884, i64 1
+  %tmp13886 = getelementptr inbounds float* %tmp13885, i64 1
+  %tmp13887 = getelementptr inbounds float* %tmp13886, i64 1
+  %tmp13888 = getelementptr inbounds float* %tmp13887, i64 1
+  %tmp13889 = getelementptr inbounds float* %tmp13888, i64 1
+  %tmp13890 = getelementptr inbounds float* %tmp13889, i64 1
+  %tmp13891 = getelementptr inbounds float* %tmp13890, i64 1
+  %tmp13892 = getelementptr inbounds float* %tmp13891, i64 1
+  %tmp13893 = getelementptr inbounds float* %tmp13892, i64 1
+  %tmp13894 = getelementptr inbounds float* %tmp13893, i64 1
+  %tmp13895 = getelementptr inbounds float* %tmp13894, i64 1
+  %tmp13896 = getelementptr inbounds float* %tmp13895, i64 1
+  %tmp13897 = getelementptr inbounds float* %tmp13896, i64 1
+  %tmp13898 = getelementptr inbounds float* %tmp13897, i64 1
+  %tmp13899 = getelementptr inbounds float* %tmp13898, i64 1
+  %tmp13900 = getelementptr inbounds float* %tmp13899, i64 1
+  %tmp13901 = getelementptr inbounds float* %tmp13900, i64 1
+  %tmp13902 = getelementptr inbounds float* %tmp13901, i64 1
+  %tmp13903 = getelementptr inbounds float* %tmp13902, i64 1
+  %tmp13904 = getelementptr inbounds float* %tmp13903, i64 1
+  %tmp13905 = getelementptr inbounds float* %tmp13904, i64 1
+  %tmp13906 = getelementptr inbounds float* %tmp13905, i64 1
+  %tmp13907 = getelementptr inbounds float* %tmp13906, i64 1
+  %tmp13908 = getelementptr inbounds float* %tmp13907, i64 1
+  %tmp13909 = getelementptr inbounds float* %tmp13908, i64 1
+  %tmp13910 = getelementptr inbounds float* %tmp13909, i64 1
+  %tmp13911 = getelementptr inbounds float* %tmp13910, i64 1
+  %tmp13912 = getelementptr inbounds float* %tmp13911, i64 1
+  %tmp13913 = getelementptr inbounds float* %tmp13912, i64 1
+  %tmp13914 = getelementptr inbounds float* %tmp13913, i64 1
+  %tmp13915 = getelementptr inbounds float* %tmp13914, i64 1
+  %tmp13916 = getelementptr inbounds float* %tmp13915, i64 1
+  %tmp13917 = getelementptr inbounds float* %tmp13916, i64 1
+  %tmp13918 = getelementptr inbounds float* %tmp13917, i64 1
+  %tmp13919 = getelementptr inbounds float* %tmp13918, i64 1
+  %tmp13920 = getelementptr inbounds float* %tmp13919, i64 1
+  %tmp13921 = getelementptr inbounds float* %tmp13920, i64 1
+  %tmp13922 = getelementptr inbounds float* %tmp13921, i64 1
+  %tmp13923 = getelementptr inbounds float* %tmp13922, i64 1
+  %tmp13924 = getelementptr inbounds float* %tmp13923, i64 1
+  %tmp13925 = getelementptr inbounds float* %tmp13924, i64 1
+  %tmp13926 = getelementptr inbounds float* %tmp13925, i64 1
+  %tmp13927 = getelementptr inbounds float* %tmp13926, i64 1
+  %tmp13928 = getelementptr inbounds float* %tmp13927, i64 1
+  %tmp13929 = getelementptr inbounds float* %tmp13928, i64 1
+  %tmp13930 = getelementptr inbounds float* %tmp13929, i64 1
+  %tmp13931 = getelementptr inbounds float* %tmp13930, i64 1
+  %tmp13932 = getelementptr inbounds float* %tmp13931, i64 1
+  %tmp13933 = getelementptr inbounds float* %tmp13932, i64 1
+  %tmp13934 = getelementptr inbounds float* %tmp13933, i64 1
+  %tmp13935 = getelementptr inbounds float* %tmp13934, i64 1
+  %tmp13936 = getelementptr inbounds float* %tmp13935, i64 1
+  %tmp13937 = getelementptr inbounds float* %tmp13936, i64 1
+  %tmp13938 = getelementptr inbounds float* %tmp13937, i64 1
+  %tmp13939 = getelementptr inbounds float* %tmp13938, i64 1
+  %tmp13940 = getelementptr inbounds float* %tmp13939, i64 1
+  %tmp13941 = getelementptr inbounds float* %tmp13940, i64 1
+  %tmp13942 = getelementptr inbounds float* %tmp13941, i64 1
+  %tmp13943 = getelementptr inbounds float* %tmp13942, i64 1
+  %tmp13944 = getelementptr inbounds float* %tmp13943, i64 1
+  %tmp13945 = getelementptr inbounds float* %tmp13944, i64 1
+  %tmp13946 = getelementptr inbounds float* %tmp13945, i64 1
+  %tmp13947 = getelementptr inbounds float* %tmp13946, i64 1
+  %tmp13948 = getelementptr inbounds float* %tmp13947, i64 1
+  %tmp13949 = getelementptr inbounds float* %tmp13948, i64 1
+  %tmp13950 = getelementptr inbounds float* %tmp13949, i64 1
+  %tmp13951 = getelementptr inbounds float* %tmp13950, i64 1
+  %tmp13952 = getelementptr inbounds float* %tmp13951, i64 1
+  %tmp13953 = getelementptr inbounds float* %tmp13952, i64 1
+  %tmp13954 = getelementptr inbounds float* %tmp13953, i64 1
+  %tmp13955 = getelementptr inbounds float* %tmp13954, i64 1
+  %tmp13956 = getelementptr inbounds float* %tmp13955, i64 1
+  %tmp13957 = getelementptr inbounds float* %tmp13956, i64 1
+  %tmp13958 = getelementptr inbounds float* %tmp13957, i64 1
+  %tmp13959 = getelementptr inbounds float* %tmp13958, i64 1
+  %tmp13960 = getelementptr inbounds float* %tmp13959, i64 1
+  %tmp13961 = getelementptr inbounds float* %tmp13960, i64 1
+  %tmp13962 = getelementptr inbounds float* %tmp13961, i64 1
+  %tmp13963 = getelementptr inbounds float* %tmp13962, i64 1
+  %tmp13964 = getelementptr inbounds float* %tmp13963, i64 1
+  %tmp13965 = getelementptr inbounds float* %tmp13964, i64 1
+  %tmp13966 = getelementptr inbounds float* %tmp13965, i64 1
+  %tmp13967 = getelementptr inbounds float* %tmp13966, i64 1
+  %tmp13968 = getelementptr inbounds float* %tmp13967, i64 1
+  %tmp13969 = getelementptr inbounds float* %tmp13968, i64 1
+  %tmp13970 = getelementptr inbounds float* %tmp13969, i64 1
+  %tmp13971 = getelementptr inbounds float* %tmp13970, i64 1
+  %tmp13972 = getelementptr inbounds float* %tmp13971, i64 1
+  %tmp13973 = getelementptr inbounds float* %tmp13972, i64 1
+  %tmp13974 = getelementptr inbounds float* %tmp13973, i64 1
+  %tmp13975 = getelementptr inbounds float* %tmp13974, i64 1
+  %tmp13976 = getelementptr inbounds float* %tmp13975, i64 1
+  %tmp13977 = getelementptr inbounds float* %tmp13976, i64 1
+  %tmp13978 = getelementptr inbounds float* %tmp13977, i64 1
+  %tmp13979 = getelementptr inbounds float* %tmp13978, i64 1
+  %tmp13980 = getelementptr inbounds float* %tmp13979, i64 1
+  %tmp13981 = getelementptr inbounds float* %tmp13980, i64 1
+  %tmp13982 = getelementptr inbounds float* %tmp13981, i64 1
+  %tmp13983 = getelementptr inbounds float* %tmp13982, i64 1
+  %tmp13984 = getelementptr inbounds float* %tmp13983, i64 1
+  %tmp13985 = getelementptr inbounds float* %tmp13984, i64 1
+  %tmp13986 = getelementptr inbounds float* %tmp13985, i64 1
+  %tmp13987 = getelementptr inbounds float* %tmp13986, i64 1
+  %tmp13988 = getelementptr inbounds float* %tmp13987, i64 1
+  %tmp13989 = getelementptr inbounds float* %tmp13988, i64 1
+  %tmp13990 = getelementptr inbounds float* %tmp13989, i64 1
+  %tmp13991 = getelementptr inbounds float* %tmp13990, i64 1
+  %tmp13992 = getelementptr inbounds float* %tmp13991, i64 1
+  %tmp13993 = getelementptr inbounds float* %tmp13992, i64 1
+  %tmp13994 = getelementptr inbounds float* %tmp13993, i64 1
+  %tmp13995 = getelementptr inbounds float* %tmp13994, i64 1
+  %tmp13996 = getelementptr inbounds float* %tmp13995, i64 1
+  %tmp13997 = getelementptr inbounds float* %tmp13996, i64 1
+  %tmp13998 = getelementptr inbounds float* %tmp13997, i64 1
+  %tmp13999 = getelementptr inbounds float* %tmp13998, i64 1
+  %tmp14000 = getelementptr inbounds float* %tmp13999, i64 1
+  %tmp14001 = getelementptr inbounds float* %tmp14000, i64 1
+  %tmp14002 = getelementptr inbounds float* %tmp14001, i64 1
+  %tmp14003 = getelementptr inbounds float* %tmp14002, i64 1
+  %tmp14004 = getelementptr inbounds float* %tmp14003, i64 1
+  %tmp14005 = getelementptr inbounds float* %tmp14004, i64 1
+  %tmp14006 = getelementptr inbounds float* %tmp14005, i64 1
+  %tmp14007 = getelementptr inbounds float* %tmp14006, i64 1
+  %tmp14008 = getelementptr inbounds float* %tmp14007, i64 1
+  %tmp14009 = getelementptr inbounds float* %tmp14008, i64 1
+  %tmp14010 = getelementptr inbounds float* %tmp14009, i64 1
+  %tmp14011 = getelementptr inbounds float* %tmp14010, i64 1
+  %tmp14012 = getelementptr inbounds float* %tmp14011, i64 1
+  %tmp14013 = getelementptr inbounds float* %tmp14012, i64 1
+  %tmp14014 = getelementptr inbounds float* %tmp14013, i64 1
+  %tmp14015 = getelementptr inbounds float* %tmp14014, i64 1
+  %tmp14016 = getelementptr inbounds float* %tmp14015, i64 1
+  %tmp14017 = getelementptr inbounds float* %tmp14016, i64 1
+  %tmp14018 = getelementptr inbounds float* %tmp14017, i64 1
+  %tmp14019 = getelementptr inbounds float* %tmp14018, i64 1
+  %tmp14020 = getelementptr inbounds float* %tmp14019, i64 1
+  %tmp14021 = getelementptr inbounds float* %tmp14020, i64 1
+  %tmp14022 = getelementptr inbounds float* %tmp14021, i64 1
+  %tmp14023 = getelementptr inbounds float* %tmp14022, i64 1
+  %tmp14024 = getelementptr inbounds float* %tmp14023, i64 1
+  %tmp14025 = getelementptr inbounds float* %tmp14024, i64 1
+  %tmp14026 = getelementptr inbounds float* %tmp14025, i64 1
+  %tmp14027 = getelementptr inbounds float* %tmp14026, i64 1
+  %tmp14028 = getelementptr inbounds float* %tmp14027, i64 1
+  %tmp14029 = getelementptr inbounds float* %tmp14028, i64 1
+  %tmp14030 = getelementptr inbounds float* %tmp14029, i64 1
+  %tmp14031 = getelementptr inbounds float* %tmp14030, i64 1
+  %tmp14032 = getelementptr inbounds float* %tmp14031, i64 1
+  %tmp14033 = getelementptr inbounds float* %tmp14032, i64 1
+  %tmp14034 = getelementptr inbounds float* %tmp14033, i64 1
+  %tmp14035 = getelementptr inbounds float* %tmp14034, i64 1
+  %tmp14036 = getelementptr inbounds float* %tmp14035, i64 1
+  %tmp14037 = getelementptr inbounds float* %tmp14036, i64 1
+  %tmp14038 = getelementptr inbounds float* %tmp14037, i64 1
+  %tmp14039 = getelementptr inbounds float* %tmp14038, i64 1
+  %tmp14040 = getelementptr inbounds float* %tmp14039, i64 1
+  %tmp14041 = getelementptr inbounds float* %tmp14040, i64 1
+  %tmp14042 = getelementptr inbounds float* %tmp14041, i64 1
+  %tmp14043 = getelementptr inbounds float* %tmp14042, i64 1
+  %tmp14044 = getelementptr inbounds float* %tmp14043, i64 1
+  %tmp14045 = getelementptr inbounds float* %tmp14044, i64 1
+  %tmp14046 = getelementptr inbounds float* %tmp14045, i64 1
+  %tmp14047 = getelementptr inbounds float* %tmp14046, i64 1
+  %tmp14048 = getelementptr inbounds float* %tmp14047, i64 1
+  %tmp14049 = getelementptr inbounds float* %tmp14048, i64 1
+  %tmp14050 = getelementptr inbounds float* %tmp14049, i64 1
+  %tmp14051 = getelementptr inbounds float* %tmp14050, i64 1
+  %tmp14052 = getelementptr inbounds float* %tmp14051, i64 1
+  %tmp14053 = getelementptr inbounds float* %tmp14052, i64 1
+  %tmp14054 = getelementptr inbounds float* %tmp14053, i64 1
+  %tmp14055 = getelementptr inbounds float* %tmp14054, i64 1
+  %tmp14056 = getelementptr inbounds float* %tmp14055, i64 1
+  %tmp14057 = getelementptr inbounds float* %tmp14056, i64 1
+  %tmp14058 = getelementptr inbounds float* %tmp14057, i64 1
+  %tmp14059 = getelementptr inbounds float* %tmp14058, i64 1
+  %tmp14060 = getelementptr inbounds float* %tmp14059, i64 1
+  %tmp14061 = getelementptr inbounds float* %tmp14060, i64 1
+  %tmp14062 = getelementptr inbounds float* %tmp14061, i64 1
+  %tmp14063 = getelementptr inbounds float* %tmp14062, i64 1
+  %tmp14064 = getelementptr inbounds float* %tmp14063, i64 1
+  %tmp14065 = getelementptr inbounds float* %tmp14064, i64 1
+  %tmp14066 = getelementptr inbounds float* %tmp14065, i64 1
+  %tmp14067 = getelementptr inbounds float* %tmp14066, i64 1
+  %tmp14068 = getelementptr inbounds float* %tmp14067, i64 1
+  %tmp14069 = getelementptr inbounds float* %tmp14068, i64 1
+  %tmp14070 = getelementptr inbounds float* %tmp14069, i64 1
+  %tmp14071 = getelementptr inbounds float* %tmp14070, i64 1
+  %tmp14072 = getelementptr inbounds float* %tmp14071, i64 1
+  %tmp14073 = getelementptr inbounds float* %tmp14072, i64 1
+  %tmp14074 = getelementptr inbounds float* %tmp14073, i64 1
+  %tmp14075 = getelementptr inbounds float* %tmp14074, i64 1
+  %tmp14076 = getelementptr inbounds float* %tmp14075, i64 1
+  %tmp14077 = getelementptr inbounds float* %tmp14076, i64 1
+  %tmp14078 = getelementptr inbounds float* %tmp14077, i64 1
+  %tmp14079 = getelementptr inbounds float* %tmp14078, i64 1
+  %tmp14080 = getelementptr inbounds float* %tmp14079, i64 1
+  %tmp14081 = getelementptr inbounds float* %tmp14080, i64 1
+  %tmp14082 = getelementptr inbounds float* %tmp14081, i64 1
+  %tmp14083 = getelementptr inbounds float* %tmp14082, i64 1
+  %tmp14084 = getelementptr inbounds float* %tmp14083, i64 1
+  %tmp14085 = getelementptr inbounds float* %tmp14084, i64 1
+  %tmp14086 = getelementptr inbounds float* %tmp14085, i64 1
+  %tmp14087 = getelementptr inbounds float* %tmp14086, i64 1
+  %tmp14088 = getelementptr inbounds float* %tmp14087, i64 1
+  %tmp14089 = getelementptr inbounds float* %tmp14088, i64 1
+  %tmp14090 = getelementptr inbounds float* %tmp14089, i64 1
+  %tmp14091 = getelementptr inbounds float* %tmp14090, i64 1
+  %tmp14092 = getelementptr inbounds float* %tmp14091, i64 1
+  %tmp14093 = getelementptr inbounds float* %tmp14092, i64 1
+  %tmp14094 = getelementptr inbounds float* %tmp14093, i64 1
+  %tmp14095 = getelementptr inbounds float* %tmp14094, i64 1
+  %tmp14096 = getelementptr inbounds float* %tmp14095, i64 1
+  %tmp14097 = getelementptr inbounds float* %tmp14096, i64 1
+  %tmp14098 = getelementptr inbounds float* %tmp14097, i64 1
+  %tmp14099 = getelementptr inbounds float* %tmp14098, i64 1
+  %tmp14100 = getelementptr inbounds float* %tmp14099, i64 1
+  %tmp14101 = getelementptr inbounds float* %tmp14100, i64 1
+  %tmp14102 = getelementptr inbounds float* %tmp14101, i64 1
+  %tmp14103 = getelementptr inbounds float* %tmp14102, i64 1
+  %tmp14104 = getelementptr inbounds float* %tmp14103, i64 1
+  %tmp14105 = getelementptr inbounds float* %tmp14104, i64 1
+  %tmp14106 = getelementptr inbounds float* %tmp14105, i64 1
+  %tmp14107 = getelementptr inbounds float* %tmp14106, i64 1
+  %tmp14108 = getelementptr inbounds float* %tmp14107, i64 1
+  %tmp14109 = getelementptr inbounds float* %tmp14108, i64 1
+  %tmp14110 = getelementptr inbounds float* %tmp14109, i64 1
+  %tmp14111 = getelementptr inbounds float* %tmp14110, i64 1
+  %tmp14112 = getelementptr inbounds float* %tmp14111, i64 1
+  %tmp14113 = getelementptr inbounds float* %tmp14112, i64 1
+  %tmp14114 = getelementptr inbounds float* %tmp14113, i64 1
+  %tmp14115 = getelementptr inbounds float* %tmp14114, i64 1
+  %tmp14116 = getelementptr inbounds float* %tmp14115, i64 1
+  %tmp14117 = getelementptr inbounds float* %tmp14116, i64 1
+  %tmp14118 = getelementptr inbounds float* %tmp14117, i64 1
+  %tmp14119 = getelementptr inbounds float* %tmp14118, i64 1
+  %tmp14120 = getelementptr inbounds float* %tmp14119, i64 1
+  %tmp14121 = getelementptr inbounds float* %tmp14120, i64 1
+  %tmp14122 = getelementptr inbounds float* %tmp14121, i64 1
+  %tmp14123 = getelementptr inbounds float* %tmp14122, i64 1
+  %tmp14124 = getelementptr inbounds float* %tmp14123, i64 1
+  %tmp14125 = getelementptr inbounds float* %tmp14124, i64 1
+  %tmp14126 = getelementptr inbounds float* %tmp14125, i64 1
+  %tmp14127 = getelementptr inbounds float* %tmp14126, i64 1
+  %tmp14128 = getelementptr inbounds float* %tmp14127, i64 1
+  %tmp14129 = getelementptr inbounds float* %tmp14128, i64 1
+  %tmp14130 = getelementptr inbounds float* %tmp14129, i64 1
+  %tmp14131 = getelementptr inbounds float* %tmp14130, i64 1
+  %tmp14132 = getelementptr inbounds float* %tmp14131, i64 1
+  %tmp14133 = getelementptr inbounds float* %tmp14132, i64 1
+  %tmp14134 = getelementptr inbounds float* %tmp14133, i64 1
+  %tmp14135 = getelementptr inbounds float* %tmp14134, i64 1
+  %tmp14136 = getelementptr inbounds float* %tmp14135, i64 1
+  %tmp14137 = getelementptr inbounds float* %tmp14136, i64 1
+  %tmp14138 = getelementptr inbounds float* %tmp14137, i64 1
+  %tmp14139 = getelementptr inbounds float* %tmp14138, i64 1
+  %tmp14140 = getelementptr inbounds float* %tmp14139, i64 1
+  %tmp14141 = getelementptr inbounds float* %tmp14140, i64 1
+  %tmp14142 = getelementptr inbounds float* %tmp14141, i64 1
+  %tmp14143 = getelementptr inbounds float* %tmp14142, i64 1
+  %tmp14144 = getelementptr inbounds float* %tmp14143, i64 1
+  %tmp14145 = getelementptr inbounds float* %tmp14144, i64 1
+  %tmp14146 = getelementptr inbounds float* %tmp14145, i64 1
+  %tmp14147 = getelementptr inbounds float* %tmp14146, i64 1
+  %tmp14148 = getelementptr inbounds float* %tmp14147, i64 1
+  %tmp14149 = getelementptr inbounds float* %tmp14148, i64 1
+  %tmp14150 = getelementptr inbounds float* %tmp14149, i64 1
+  %tmp14151 = getelementptr inbounds float* %tmp14150, i64 1
+  %tmp14152 = getelementptr inbounds float* %tmp14151, i64 1
+  %tmp14153 = getelementptr inbounds float* %tmp14152, i64 1
+  %tmp14154 = getelementptr inbounds float* %tmp14153, i64 1
+  %tmp14155 = getelementptr inbounds float* %tmp14154, i64 1
+  %tmp14156 = getelementptr inbounds float* %tmp14155, i64 1
+  %tmp14157 = getelementptr inbounds float* %tmp14156, i64 1
+  %tmp14158 = getelementptr inbounds float* %tmp14157, i64 1
+  %tmp14159 = getelementptr inbounds float* %tmp14158, i64 1
+  %tmp14160 = getelementptr inbounds float* %tmp14159, i64 1
+  %tmp14161 = getelementptr inbounds float* %tmp14160, i64 1
+  %tmp14162 = getelementptr inbounds float* %tmp14161, i64 1
+  %tmp14163 = getelementptr inbounds float* %tmp14162, i64 1
+  %tmp14164 = getelementptr inbounds float* %tmp14163, i64 1
+  %tmp14165 = getelementptr inbounds float* %tmp14164, i64 1
+  %tmp14166 = getelementptr inbounds float* %tmp14165, i64 1
+  %tmp14167 = getelementptr inbounds float* %tmp14166, i64 1
+  %tmp14168 = getelementptr inbounds float* %tmp14167, i64 1
+  %tmp14169 = getelementptr inbounds float* %tmp14168, i64 1
+  %tmp14170 = getelementptr inbounds float* %tmp14169, i64 1
+  %tmp14171 = getelementptr inbounds float* %tmp14170, i64 1
+  %tmp14172 = getelementptr inbounds float* %tmp14171, i64 1
+  %tmp14173 = getelementptr inbounds float* %tmp14172, i64 1
+  %tmp14174 = getelementptr inbounds float* %tmp14173, i64 1
+  %tmp14175 = getelementptr inbounds float* %tmp14174, i64 1
+  %tmp14176 = getelementptr inbounds float* %tmp14175, i64 1
+  %tmp14177 = getelementptr inbounds float* %tmp14176, i64 1
+  %tmp14178 = getelementptr inbounds float* %tmp14177, i64 1
+  %tmp14179 = getelementptr inbounds float* %tmp14178, i64 1
+  %tmp14180 = getelementptr inbounds float* %tmp14179, i64 1
+  %tmp14181 = getelementptr inbounds float* %tmp14180, i64 1
+  %tmp14182 = getelementptr inbounds float* %tmp14181, i64 1
+  %tmp14183 = getelementptr inbounds float* %tmp14182, i64 1
+  %tmp14184 = getelementptr inbounds float* %tmp14183, i64 1
+  %tmp14185 = getelementptr inbounds float* %tmp14184, i64 1
+  %tmp14186 = getelementptr inbounds float* %tmp14185, i64 1
+  %tmp14187 = getelementptr inbounds float* %tmp14186, i64 1
+  %tmp14188 = getelementptr inbounds float* %tmp14187, i64 1
+  %tmp14189 = getelementptr inbounds float* %tmp14188, i64 1
+  %tmp14190 = getelementptr inbounds float* %tmp14189, i64 1
+  %tmp14191 = getelementptr inbounds float* %tmp14190, i64 1
+  %tmp14192 = getelementptr inbounds float* %tmp14191, i64 1
+  %tmp14193 = getelementptr inbounds float* %tmp14192, i64 1
+  %tmp14194 = getelementptr inbounds float* %tmp14193, i64 1
+  %tmp14195 = getelementptr inbounds float* %tmp14194, i64 1
+  %tmp14196 = getelementptr inbounds float* %tmp14195, i64 1
+  %tmp14197 = getelementptr inbounds float* %tmp14196, i64 1
+  %tmp14198 = getelementptr inbounds float* %tmp14197, i64 1
+  %tmp14199 = getelementptr inbounds float* %tmp14198, i64 1
+  %tmp14200 = getelementptr inbounds float* %tmp14199, i64 1
+  %tmp14201 = getelementptr inbounds float* %tmp14200, i64 1
+  %tmp14202 = getelementptr inbounds float* %tmp14201, i64 1
+  %tmp14203 = getelementptr inbounds float* %tmp14202, i64 1
+  %tmp14204 = getelementptr inbounds float* %tmp14203, i64 1
+  %tmp14205 = getelementptr inbounds float* %tmp14204, i64 1
+  %tmp14206 = getelementptr inbounds float* %tmp14205, i64 1
+  %tmp14207 = getelementptr inbounds float* %tmp14206, i64 1
+  %tmp14208 = getelementptr inbounds float* %tmp14207, i64 1
+  %tmp14209 = getelementptr inbounds float* %tmp14208, i64 1
+  %tmp14210 = getelementptr inbounds float* %tmp14209, i64 1
+  %tmp14211 = getelementptr inbounds float* %tmp14210, i64 1
+  %tmp14212 = getelementptr inbounds float* %tmp14211, i64 1
+  %tmp14213 = getelementptr inbounds float* %tmp14212, i64 1
+  %tmp14214 = getelementptr inbounds float* %tmp14213, i64 1
+  %tmp14215 = getelementptr inbounds float* %tmp14214, i64 1
+  %tmp14216 = getelementptr inbounds float* %tmp14215, i64 1
+  %tmp14217 = getelementptr inbounds float* %tmp14216, i64 1
+  %tmp14218 = getelementptr inbounds float* %tmp14217, i64 1
+  %tmp14219 = getelementptr inbounds float* %tmp14218, i64 1
+  %tmp14220 = getelementptr inbounds float* %tmp14219, i64 1
+  %tmp14221 = getelementptr inbounds float* %tmp14220, i64 1
+  %tmp14222 = getelementptr inbounds float* %tmp14221, i64 1
+  %tmp14223 = getelementptr inbounds float* %tmp14222, i64 1
+  %tmp14224 = getelementptr inbounds float* %tmp14223, i64 1
+  %tmp14225 = getelementptr inbounds float* %tmp14224, i64 1
+  %tmp14226 = getelementptr inbounds float* %tmp14225, i64 1
+  %tmp14227 = getelementptr inbounds float* %tmp14226, i64 1
+  %tmp14228 = getelementptr inbounds float* %tmp14227, i64 1
+  %tmp14229 = getelementptr inbounds float* %tmp14228, i64 1
+  %tmp14230 = getelementptr inbounds float* %tmp14229, i64 1
+  %tmp14231 = getelementptr inbounds float* %tmp14230, i64 1
+  %tmp14232 = getelementptr inbounds float* %tmp14231, i64 1
+  %tmp14233 = getelementptr inbounds float* %tmp14232, i64 1
+  %tmp14234 = getelementptr inbounds float* %tmp14233, i64 1
+  %tmp14235 = getelementptr inbounds float* %tmp14234, i64 1
+  %tmp14236 = getelementptr inbounds float* %tmp14235, i64 1
+  %tmp14237 = getelementptr inbounds float* %tmp14236, i64 1
+  %tmp14238 = getelementptr inbounds float* %tmp14237, i64 1
+  %tmp14239 = getelementptr inbounds float* %tmp14238, i64 1
+  %tmp14240 = getelementptr inbounds float* %tmp14239, i64 1
+  %tmp14241 = getelementptr inbounds float* %tmp14240, i64 1
+  %tmp14242 = getelementptr inbounds float* %tmp14241, i64 1
+  %tmp14243 = getelementptr inbounds float* %tmp14242, i64 1
+  %tmp14244 = getelementptr inbounds float* %tmp14243, i64 1
+  %tmp14245 = getelementptr inbounds float* %tmp14244, i64 1
+  %tmp14246 = getelementptr inbounds float* %tmp14245, i64 1
+  %tmp14247 = getelementptr inbounds float* %tmp14246, i64 1
+  %tmp14248 = getelementptr inbounds float* %tmp14247, i64 1
+  %tmp14249 = getelementptr inbounds float* %tmp14248, i64 1
+  %tmp14250 = getelementptr inbounds float* %tmp14249, i64 1
+  %tmp14251 = getelementptr inbounds float* %tmp14250, i64 1
+  %tmp14252 = getelementptr inbounds float* %tmp14251, i64 1
+  %tmp14253 = getelementptr inbounds float* %tmp14252, i64 1
+  %tmp14254 = getelementptr inbounds float* %tmp14253, i64 1
+  %tmp14255 = getelementptr inbounds float* %tmp14254, i64 1
+  %tmp14256 = getelementptr inbounds float* %tmp14255, i64 1
+  %tmp14257 = getelementptr inbounds float* %tmp14256, i64 1
+  %tmp14258 = getelementptr inbounds float* %tmp14257, i64 1
+  %tmp14259 = getelementptr inbounds float* %tmp14258, i64 1
+  %tmp14260 = getelementptr inbounds float* %tmp14259, i64 1
+  %tmp14261 = getelementptr inbounds float* %tmp14260, i64 1
+  %tmp14262 = getelementptr inbounds float* %tmp14261, i64 1
+  %tmp14263 = getelementptr inbounds float* %tmp14262, i64 1
+  %tmp14264 = getelementptr inbounds float* %tmp14263, i64 1
+  %tmp14265 = getelementptr inbounds float* %tmp14264, i64 1
+  %tmp14266 = getelementptr inbounds float* %tmp14265, i64 1
+  %tmp14267 = getelementptr inbounds float* %tmp14266, i64 1
+  %tmp14268 = getelementptr inbounds float* %tmp14267, i64 1
+  %tmp14269 = getelementptr inbounds float* %tmp14268, i64 1
+  %tmp14270 = getelementptr inbounds float* %tmp14269, i64 1
+  %tmp14271 = getelementptr inbounds float* %tmp14270, i64 1
+  %tmp14272 = getelementptr inbounds float* %tmp14271, i64 1
+  %tmp14273 = getelementptr inbounds float* %tmp14272, i64 1
+  %tmp14274 = getelementptr inbounds float* %tmp14273, i64 1
+  %tmp14275 = getelementptr inbounds float* %tmp14274, i64 1
+  %tmp14276 = getelementptr inbounds float* %tmp14275, i64 1
+  %tmp14277 = getelementptr inbounds float* %tmp14276, i64 1
+  %tmp14278 = getelementptr inbounds float* %tmp14277, i64 1
+  %tmp14279 = getelementptr inbounds float* %tmp14278, i64 1
+  %tmp14280 = getelementptr inbounds float* %tmp14279, i64 1
+  %tmp14281 = getelementptr inbounds float* %tmp14280, i64 1
+  %tmp14282 = getelementptr inbounds float* %tmp14281, i64 1
+  %tmp14283 = getelementptr inbounds float* %tmp14282, i64 1
+  %tmp14284 = getelementptr inbounds float* %tmp14283, i64 1
+  %tmp14285 = getelementptr inbounds float* %tmp14284, i64 1
+  %tmp14286 = getelementptr inbounds float* %tmp14285, i64 1
+  %tmp14287 = getelementptr inbounds float* %tmp14286, i64 1
+  %tmp14288 = getelementptr inbounds float* %tmp14287, i64 1
+  %tmp14289 = getelementptr inbounds float* %tmp14288, i64 1
+  %tmp14290 = getelementptr inbounds float* %tmp14289, i64 1
+  %tmp14291 = getelementptr inbounds float* %tmp14290, i64 1
+  %tmp14292 = getelementptr inbounds float* %tmp14291, i64 1
+  %tmp14293 = getelementptr inbounds float* %tmp14292, i64 1
+  %tmp14294 = getelementptr inbounds float* %tmp14293, i64 1
+  %tmp14295 = getelementptr inbounds float* %tmp14294, i64 1
+  %tmp14296 = getelementptr inbounds float* %tmp14295, i64 1
+  %tmp14297 = getelementptr inbounds float* %tmp14296, i64 1
+  %tmp14298 = getelementptr inbounds float* %tmp14297, i64 1
+  %tmp14299 = getelementptr inbounds float* %tmp14298, i64 1
+  %tmp14300 = getelementptr inbounds float* %tmp14299, i64 1
+  %tmp14301 = getelementptr inbounds float* %tmp14300, i64 1
+  %tmp14302 = getelementptr inbounds float* %tmp14301, i64 1
+  %tmp14303 = getelementptr inbounds float* %tmp14302, i64 1
+  %tmp14304 = getelementptr inbounds float* %tmp14303, i64 1
+  %tmp14305 = getelementptr inbounds float* %tmp14304, i64 1
+  %tmp14306 = getelementptr inbounds float* %tmp14305, i64 1
+  %tmp14307 = getelementptr inbounds float* %tmp14306, i64 1
+  %tmp14308 = getelementptr inbounds float* %tmp14307, i64 1
+  %tmp14309 = getelementptr inbounds float* %tmp14308, i64 1
+  %tmp14310 = getelementptr inbounds float* %tmp14309, i64 1
+  %tmp14311 = getelementptr inbounds float* %tmp14310, i64 1
+  %tmp14312 = getelementptr inbounds float* %tmp14311, i64 1
+  %tmp14313 = getelementptr inbounds float* %tmp14312, i64 1
+  %tmp14314 = getelementptr inbounds float* %tmp14313, i64 1
+  %tmp14315 = getelementptr inbounds float* %tmp14314, i64 1
+  %tmp14316 = getelementptr inbounds float* %tmp14315, i64 1
+  %tmp14317 = getelementptr inbounds float* %tmp14316, i64 1
+  %tmp14318 = getelementptr inbounds float* %tmp14317, i64 1
+  %tmp14319 = getelementptr inbounds float* %tmp14318, i64 1
+  %tmp14320 = getelementptr inbounds float* %tmp14319, i64 1
+  %tmp14321 = getelementptr inbounds float* %tmp14320, i64 1
+  %tmp14322 = getelementptr inbounds float* %tmp14321, i64 1
+  %tmp14323 = getelementptr inbounds float* %tmp14322, i64 1
+  %tmp14324 = getelementptr inbounds float* %tmp14323, i64 1
+  %tmp14325 = getelementptr inbounds float* %tmp14324, i64 1
+  %tmp14326 = getelementptr inbounds float* %tmp14325, i64 1
+  %tmp14327 = getelementptr inbounds float* %tmp14326, i64 1
+  %tmp14328 = getelementptr inbounds float* %tmp14327, i64 1
+  %tmp14329 = getelementptr inbounds float* %tmp14328, i64 1
+  %tmp14330 = getelementptr inbounds float* %tmp14329, i64 1
+  %tmp14331 = getelementptr inbounds float* %tmp14330, i64 1
+  %tmp14332 = getelementptr inbounds float* %tmp14331, i64 1
+  %tmp14333 = getelementptr inbounds float* %tmp14332, i64 1
+  %tmp14334 = getelementptr inbounds float* %tmp14333, i64 1
+  %tmp14335 = getelementptr inbounds float* %tmp14334, i64 1
+  %tmp14336 = getelementptr inbounds float* %tmp14335, i64 1
+  %tmp14337 = getelementptr inbounds float* %tmp14336, i64 1
+  %tmp14338 = getelementptr inbounds float* %tmp14337, i64 1
+  %tmp14339 = getelementptr inbounds float* %tmp14338, i64 1
+  %tmp14340 = getelementptr inbounds float* %tmp14339, i64 1
+  %tmp14341 = getelementptr inbounds float* %tmp14340, i64 1
+  %tmp14342 = getelementptr inbounds float* %tmp14341, i64 1
+  %tmp14343 = getelementptr inbounds float* %tmp14342, i64 1
+  %tmp14344 = getelementptr inbounds float* %tmp14343, i64 1
+  %tmp14345 = getelementptr inbounds float* %tmp14344, i64 1
+  %tmp14346 = getelementptr inbounds float* %tmp14345, i64 1
+  %tmp14347 = getelementptr inbounds float* %tmp14346, i64 1
+  %tmp14348 = getelementptr inbounds float* %tmp14347, i64 1
+  %tmp14349 = getelementptr inbounds float* %tmp14348, i64 1
+  %tmp14350 = getelementptr inbounds float* %tmp14349, i64 1
+  %tmp14351 = getelementptr inbounds float* %tmp14350, i64 1
+  %tmp14352 = getelementptr inbounds float* %tmp14351, i64 1
+  %tmp14353 = getelementptr inbounds float* %tmp14352, i64 1
+  %tmp14354 = getelementptr inbounds float* %tmp14353, i64 1
+  %tmp14355 = getelementptr inbounds float* %tmp14354, i64 1
+  %tmp14356 = getelementptr inbounds float* %tmp14355, i64 1
+  %tmp14357 = getelementptr inbounds float* %tmp14356, i64 1
+  %tmp14358 = getelementptr inbounds float* %tmp14357, i64 1
+  %tmp14359 = getelementptr inbounds float* %tmp14358, i64 1
+  %tmp14360 = getelementptr inbounds float* %tmp14359, i64 1
+  %tmp14361 = getelementptr inbounds float* %tmp14360, i64 1
+  %tmp14362 = getelementptr inbounds float* %tmp14361, i64 1
+  %tmp14363 = getelementptr inbounds float* %tmp14362, i64 1
+  %tmp14364 = getelementptr inbounds float* %tmp14363, i64 1
+  %tmp14365 = getelementptr inbounds float* %tmp14364, i64 1
+  %tmp14366 = getelementptr inbounds float* %tmp14365, i64 1
+  %tmp14367 = getelementptr inbounds float* %tmp14366, i64 1
+  %tmp14368 = getelementptr inbounds float* %tmp14367, i64 1
+  %tmp14369 = getelementptr inbounds float* %tmp14368, i64 1
+  %tmp14370 = getelementptr inbounds float* %tmp14369, i64 1
+  %tmp14371 = getelementptr inbounds float* %tmp14370, i64 1
+  %tmp14372 = getelementptr inbounds float* %tmp14371, i64 1
+  %tmp14373 = getelementptr inbounds float* %tmp14372, i64 1
+  %tmp14374 = getelementptr inbounds float* %tmp14373, i64 1
+  %tmp14375 = getelementptr inbounds float* %tmp14374, i64 1
+  %tmp14376 = getelementptr inbounds float* %tmp14375, i64 1
+  %tmp14377 = getelementptr inbounds float* %tmp14376, i64 1
+  %tmp14378 = getelementptr inbounds float* %tmp14377, i64 1
+  %tmp14379 = getelementptr inbounds float* %tmp14378, i64 1
+  %tmp14380 = getelementptr inbounds float* %tmp14379, i64 1
+  %tmp14381 = getelementptr inbounds float* %tmp14380, i64 1
+  %tmp14382 = getelementptr inbounds float* %tmp14381, i64 1
+  %tmp14383 = getelementptr inbounds float* %tmp14382, i64 1
+  %tmp14384 = getelementptr inbounds float* %tmp14383, i64 1
+  %tmp14385 = getelementptr inbounds float* %tmp14384, i64 1
+  %tmp14386 = getelementptr inbounds float* %tmp14385, i64 1
+  %tmp14387 = getelementptr inbounds float* %tmp14386, i64 1
+  %tmp14388 = getelementptr inbounds float* %tmp14387, i64 1
+  %tmp14389 = getelementptr inbounds float* %tmp14388, i64 1
+  %tmp14390 = getelementptr inbounds float* %tmp14389, i64 1
+  %tmp14391 = getelementptr inbounds float* %tmp14390, i64 1
+  %tmp14392 = getelementptr inbounds float* %tmp14391, i64 1
+  %tmp14393 = getelementptr inbounds float* %tmp14392, i64 1
+  %tmp14394 = getelementptr inbounds float* %tmp14393, i64 1
+  %tmp14395 = getelementptr inbounds float* %tmp14394, i64 1
+  %tmp14396 = getelementptr inbounds float* %tmp14395, i64 1
+  %tmp14397 = getelementptr inbounds float* %tmp14396, i64 1
+  %tmp14398 = getelementptr inbounds float* %tmp14397, i64 1
+  %tmp14399 = getelementptr inbounds float* %tmp14398, i64 1
+  %tmp14400 = getelementptr inbounds float* %tmp14399, i64 1
+  %tmp14401 = getelementptr inbounds float* %tmp14400, i64 1
+  %tmp14402 = getelementptr inbounds float* %tmp14401, i64 1
+  %tmp14403 = getelementptr inbounds float* %tmp14402, i64 1
+  %tmp14404 = getelementptr inbounds float* %tmp14403, i64 1
+  %tmp14405 = getelementptr inbounds float* %tmp14404, i64 1
+  %tmp14406 = getelementptr inbounds float* %tmp14405, i64 1
+  %tmp14407 = getelementptr inbounds float* %tmp14406, i64 1
+  %tmp14408 = getelementptr inbounds float* %tmp14407, i64 1
+  %tmp14409 = getelementptr inbounds float* %tmp14408, i64 1
+  %tmp14410 = getelementptr inbounds float* %tmp14409, i64 1
+  %tmp14411 = getelementptr inbounds float* %tmp14410, i64 1
+  %tmp14412 = getelementptr inbounds float* %tmp14411, i64 1
+  %tmp14413 = getelementptr inbounds float* %tmp14412, i64 1
+  %tmp14414 = getelementptr inbounds float* %tmp14413, i64 1
+  %tmp14415 = getelementptr inbounds float* %tmp14414, i64 1
+  %tmp14416 = getelementptr inbounds float* %tmp14415, i64 1
+  %tmp14417 = getelementptr inbounds float* %tmp14416, i64 1
+  %tmp14418 = getelementptr inbounds float* %tmp14417, i64 1
+  %tmp14419 = getelementptr inbounds float* %tmp14418, i64 1
+  %tmp14420 = getelementptr inbounds float* %tmp14419, i64 1
+  %tmp14421 = getelementptr inbounds float* %tmp14420, i64 1
+  %tmp14422 = getelementptr inbounds float* %tmp14421, i64 1
+  %tmp14423 = getelementptr inbounds float* %tmp14422, i64 1
+  %tmp14424 = getelementptr inbounds float* %tmp14423, i64 1
+  %tmp14425 = getelementptr inbounds float* %tmp14424, i64 1
+  %tmp14426 = getelementptr inbounds float* %tmp14425, i64 1
+  %tmp14427 = getelementptr inbounds float* %tmp14426, i64 1
+  %tmp14428 = getelementptr inbounds float* %tmp14427, i64 1
+  %tmp14429 = getelementptr inbounds float* %tmp14428, i64 1
+  %tmp14430 = getelementptr inbounds float* %tmp14429, i64 1
+  %tmp14431 = getelementptr inbounds float* %tmp14430, i64 1
+  %tmp14432 = getelementptr inbounds float* %tmp14431, i64 1
+  %tmp14433 = getelementptr inbounds float* %tmp14432, i64 1
+  %tmp14434 = getelementptr inbounds float* %tmp14433, i64 1
+  %tmp14435 = getelementptr inbounds float* %tmp14434, i64 1
+  %tmp14436 = getelementptr inbounds float* %tmp14435, i64 1
+  %tmp14437 = getelementptr inbounds float* %tmp14436, i64 1
+  %tmp14438 = getelementptr inbounds float* %tmp14437, i64 1
+  %tmp14439 = getelementptr inbounds float* %tmp14438, i64 1
+  %tmp14440 = getelementptr inbounds float* %tmp14439, i64 1
+  %tmp14441 = getelementptr inbounds float* %tmp14440, i64 1
+  %tmp14442 = getelementptr inbounds float* %tmp14441, i64 1
+  %tmp14443 = getelementptr inbounds float* %tmp14442, i64 1
+  %tmp14444 = getelementptr inbounds float* %tmp14443, i64 1
+  %tmp14445 = getelementptr inbounds float* %tmp14444, i64 1
+  %tmp14446 = getelementptr inbounds float* %tmp14445, i64 1
+  %tmp14447 = getelementptr inbounds float* %tmp14446, i64 1
+  %tmp14448 = getelementptr inbounds float* %tmp14447, i64 1
+  %tmp14449 = getelementptr inbounds float* %tmp14448, i64 1
+  %tmp14450 = getelementptr inbounds float* %tmp14449, i64 1
+  %tmp14451 = getelementptr inbounds float* %tmp14450, i64 1
+  %tmp14452 = getelementptr inbounds float* %tmp14451, i64 1
+  %tmp14453 = getelementptr inbounds float* %tmp14452, i64 1
+  %tmp14454 = getelementptr inbounds float* %tmp14453, i64 1
+  %tmp14455 = getelementptr inbounds float* %tmp14454, i64 1
+  %tmp14456 = getelementptr inbounds float* %tmp14455, i64 1
+  %tmp14457 = getelementptr inbounds float* %tmp14456, i64 1
+  %tmp14458 = getelementptr inbounds float* %tmp14457, i64 1
+  %tmp14459 = getelementptr inbounds float* %tmp14458, i64 1
+  %tmp14460 = getelementptr inbounds float* %tmp14459, i64 1
+  %tmp14461 = getelementptr inbounds float* %tmp14460, i64 1
+  %tmp14462 = getelementptr inbounds float* %tmp14461, i64 1
+  %tmp14463 = getelementptr inbounds float* %tmp14462, i64 1
+  %tmp14464 = getelementptr inbounds float* %tmp14463, i64 1
+  %tmp14465 = getelementptr inbounds float* %tmp14464, i64 1
+  %tmp14466 = getelementptr inbounds float* %tmp14465, i64 1
+  %tmp14467 = getelementptr inbounds float* %tmp14466, i64 1
+  %tmp14468 = getelementptr inbounds float* %tmp14467, i64 1
+  %tmp14469 = getelementptr inbounds float* %tmp14468, i64 1
+  %tmp14470 = getelementptr inbounds float* %tmp14469, i64 1
+  %tmp14471 = getelementptr inbounds float* %tmp14470, i64 1
+  %tmp14472 = getelementptr inbounds float* %tmp14471, i64 1
+  %tmp14473 = getelementptr inbounds float* %tmp14472, i64 1
+  %tmp14474 = getelementptr inbounds float* %tmp14473, i64 1
+  %tmp14475 = getelementptr inbounds float* %tmp14474, i64 1
+  %tmp14476 = getelementptr inbounds float* %tmp14475, i64 1
+  %tmp14477 = getelementptr inbounds float* %tmp14476, i64 1
+  %tmp14478 = getelementptr inbounds float* %tmp14477, i64 1
+  %tmp14479 = getelementptr inbounds float* %tmp14478, i64 1
+  %tmp14480 = getelementptr inbounds float* %tmp14479, i64 1
+  %tmp14481 = getelementptr inbounds float* %tmp14480, i64 1
+  %tmp14482 = getelementptr inbounds float* %tmp14481, i64 1
+  %tmp14483 = getelementptr inbounds float* %tmp14482, i64 1
+  %tmp14484 = getelementptr inbounds float* %tmp14483, i64 1
+  %tmp14485 = getelementptr inbounds float* %tmp14484, i64 1
+  %tmp14486 = getelementptr inbounds float* %tmp14485, i64 1
+  %tmp14487 = getelementptr inbounds float* %tmp14486, i64 1
+  %tmp14488 = getelementptr inbounds float* %tmp14487, i64 1
+  %tmp14489 = getelementptr inbounds float* %tmp14488, i64 1
+  %tmp14490 = getelementptr inbounds float* %tmp14489, i64 1
+  %tmp14491 = getelementptr inbounds float* %tmp14490, i64 1
+  %tmp14492 = getelementptr inbounds float* %tmp14491, i64 1
+  %tmp14493 = getelementptr inbounds float* %tmp14492, i64 1
+  %tmp14494 = getelementptr inbounds float* %tmp14493, i64 1
+  %tmp14495 = getelementptr inbounds float* %tmp14494, i64 1
+  %tmp14496 = getelementptr inbounds float* %tmp14495, i64 1
+  %tmp14497 = getelementptr inbounds float* %tmp14496, i64 1
+  %tmp14498 = getelementptr inbounds float* %tmp14497, i64 1
+  %tmp14499 = getelementptr inbounds float* %tmp14498, i64 1
+  %tmp14500 = getelementptr inbounds float* %tmp14499, i64 1
+  %tmp14501 = getelementptr inbounds float* %tmp14500, i64 1
+  %tmp14502 = getelementptr inbounds float* %tmp14501, i64 1
+  %tmp14503 = getelementptr inbounds float* %tmp14502, i64 1
+  %tmp14504 = getelementptr inbounds float* %tmp14503, i64 1
+  %tmp14505 = getelementptr inbounds float* %tmp14504, i64 1
+  %tmp14506 = getelementptr inbounds float* %tmp14505, i64 1
+  %tmp14507 = getelementptr inbounds float* %tmp14506, i64 1
+  %tmp14508 = getelementptr inbounds float* %tmp14507, i64 1
+  %tmp14509 = getelementptr inbounds float* %tmp14508, i64 1
+  %tmp14510 = getelementptr inbounds float* %tmp14509, i64 1
+  %tmp14511 = getelementptr inbounds float* %tmp14510, i64 1
+  %tmp14512 = getelementptr inbounds float* %tmp14511, i64 1
+  %tmp14513 = getelementptr inbounds float* %tmp14512, i64 1
+  %tmp14514 = getelementptr inbounds float* %tmp14513, i64 1
+  %tmp14515 = getelementptr inbounds float* %tmp14514, i64 1
+  %tmp14516 = getelementptr inbounds float* %tmp14515, i64 1
+  %tmp14517 = getelementptr inbounds float* %tmp14516, i64 1
+  %tmp14518 = getelementptr inbounds float* %tmp14517, i64 1
+  %tmp14519 = getelementptr inbounds float* %tmp14518, i64 1
+  %tmp14520 = getelementptr inbounds float* %tmp14519, i64 1
+  %tmp14521 = getelementptr inbounds float* %tmp14520, i64 1
+  %tmp14522 = getelementptr inbounds float* %tmp14521, i64 1
+  %tmp14523 = getelementptr inbounds float* %tmp14522, i64 1
+  %tmp14524 = getelementptr inbounds float* %tmp14523, i64 1
+  %tmp14525 = getelementptr inbounds float* %tmp14524, i64 1
+  %tmp14526 = getelementptr inbounds float* %tmp14525, i64 1
+  %tmp14527 = getelementptr inbounds float* %tmp14526, i64 1
+  %tmp14528 = getelementptr inbounds float* %tmp14527, i64 1
+  %tmp14529 = getelementptr inbounds float* %tmp14528, i64 1
+  %tmp14530 = getelementptr inbounds float* %tmp14529, i64 1
+  %tmp14531 = getelementptr inbounds float* %tmp14530, i64 1
+  %tmp14532 = getelementptr inbounds float* %tmp14531, i64 1
+  %tmp14533 = getelementptr inbounds float* %tmp14532, i64 1
+  %tmp14534 = getelementptr inbounds float* %tmp14533, i64 1
+  %tmp14535 = getelementptr inbounds float* %tmp14534, i64 1
+  %tmp14536 = getelementptr inbounds float* %tmp14535, i64 1
+  %tmp14537 = getelementptr inbounds float* %tmp14536, i64 1
+  %tmp14538 = getelementptr inbounds float* %tmp14537, i64 1
+  %tmp14539 = getelementptr inbounds float* %tmp14538, i64 1
+  %tmp14540 = getelementptr inbounds float* %tmp14539, i64 1
+  %tmp14541 = getelementptr inbounds float* %tmp14540, i64 1
+  %tmp14542 = getelementptr inbounds float* %tmp14541, i64 1
+  %tmp14543 = getelementptr inbounds float* %tmp14542, i64 1
+  %tmp14544 = getelementptr inbounds float* %tmp14543, i64 1
+  %tmp14545 = getelementptr inbounds float* %tmp14544, i64 1
+  %tmp14546 = getelementptr inbounds float* %tmp14545, i64 1
+  %tmp14547 = getelementptr inbounds float* %tmp14546, i64 1
+  %tmp14548 = getelementptr inbounds float* %tmp14547, i64 1
+  %tmp14549 = getelementptr inbounds float* %tmp14548, i64 1
+  %tmp14550 = getelementptr inbounds float* %tmp14549, i64 1
+  %tmp14551 = getelementptr inbounds float* %tmp14550, i64 1
+  %tmp14552 = getelementptr inbounds float* %tmp14551, i64 1
+  %tmp14553 = getelementptr inbounds float* %tmp14552, i64 1
+  %tmp14554 = getelementptr inbounds float* %tmp14553, i64 1
+  %tmp14555 = getelementptr inbounds float* %tmp14554, i64 1
+  %tmp14556 = getelementptr inbounds float* %tmp14555, i64 1
+  %tmp14557 = getelementptr inbounds float* %tmp14556, i64 1
+  %tmp14558 = getelementptr inbounds float* %tmp14557, i64 1
+  %tmp14559 = getelementptr inbounds float* %tmp14558, i64 1
+  %tmp14560 = getelementptr inbounds float* %tmp14559, i64 1
+  %tmp14561 = getelementptr inbounds float* %tmp14560, i64 1
+  %tmp14562 = getelementptr inbounds float* %tmp14561, i64 1
+  %tmp14563 = getelementptr inbounds float* %tmp14562, i64 1
+  %tmp14564 = getelementptr inbounds float* %tmp14563, i64 1
+  %tmp14565 = getelementptr inbounds float* %tmp14564, i64 1
+  %tmp14566 = getelementptr inbounds float* %tmp14565, i64 1
+  %tmp14567 = getelementptr inbounds float* %tmp14566, i64 1
+  %tmp14568 = getelementptr inbounds float* %tmp14567, i64 1
+  %tmp14569 = getelementptr inbounds float* %tmp14568, i64 1
+  %tmp14570 = getelementptr inbounds float* %tmp14569, i64 1
+  %tmp14571 = getelementptr inbounds float* %tmp14570, i64 1
+  %tmp14572 = getelementptr inbounds float* %tmp14571, i64 1
+  %tmp14573 = getelementptr inbounds float* %tmp14572, i64 1
+  %tmp14574 = getelementptr inbounds float* %tmp14573, i64 1
+  %tmp14575 = getelementptr inbounds float* %tmp14574, i64 1
+  %tmp14576 = getelementptr inbounds float* %tmp14575, i64 1
+  %tmp14577 = getelementptr inbounds float* %tmp14576, i64 1
+  %tmp14578 = getelementptr inbounds float* %tmp14577, i64 1
+  %tmp14579 = getelementptr inbounds float* %tmp14578, i64 1
+  %tmp14580 = getelementptr inbounds float* %tmp14579, i64 1
+  %tmp14581 = getelementptr inbounds float* %tmp14580, i64 1
+  %tmp14582 = getelementptr inbounds float* %tmp14581, i64 1
+  %tmp14583 = getelementptr inbounds float* %tmp14582, i64 1
+  %tmp14584 = getelementptr inbounds float* %tmp14583, i64 1
+  %tmp14585 = getelementptr inbounds float* %tmp14584, i64 1
+  %tmp14586 = getelementptr inbounds float* %tmp14585, i64 1
+  %tmp14587 = getelementptr inbounds float* %tmp14586, i64 1
+  %tmp14588 = getelementptr inbounds float* %tmp14587, i64 1
+  %tmp14589 = getelementptr inbounds float* %tmp14588, i64 1
+  %tmp14590 = getelementptr inbounds float* %tmp14589, i64 1
+  %tmp14591 = getelementptr inbounds float* %tmp14590, i64 1
+  %tmp14592 = getelementptr inbounds float* %tmp14591, i64 1
+  %tmp14593 = getelementptr inbounds float* %tmp14592, i64 1
+  %tmp14594 = getelementptr inbounds float* %tmp14593, i64 1
+  %tmp14595 = getelementptr inbounds float* %tmp14594, i64 1
+  %tmp14596 = getelementptr inbounds float* %tmp14595, i64 1
+  %tmp14597 = getelementptr inbounds float* %tmp14596, i64 1
+  %tmp14598 = getelementptr inbounds float* %tmp14597, i64 1
+  %tmp14599 = getelementptr inbounds float* %tmp14598, i64 1
+  %tmp14600 = getelementptr inbounds float* %tmp14599, i64 1
+  %tmp14601 = getelementptr inbounds float* %tmp14600, i64 1
+  %tmp14602 = getelementptr inbounds float* %tmp14601, i64 1
+  %tmp14603 = getelementptr inbounds float* %tmp14602, i64 1
+  %tmp14604 = getelementptr inbounds float* %tmp14603, i64 1
+  %tmp14605 = getelementptr inbounds float* %tmp14604, i64 1
+  %tmp14606 = getelementptr inbounds float* %tmp14605, i64 1
+  %tmp14607 = getelementptr inbounds float* %tmp14606, i64 1
+  %tmp14608 = getelementptr inbounds float* %tmp14607, i64 1
+  %tmp14609 = getelementptr inbounds float* %tmp14608, i64 1
+  %tmp14610 = getelementptr inbounds float* %tmp14609, i64 1
+  %tmp14611 = getelementptr inbounds float* %tmp14610, i64 1
+  %tmp14612 = getelementptr inbounds float* %tmp14611, i64 1
+  %tmp14613 = getelementptr inbounds float* %tmp14612, i64 1
+  %tmp14614 = getelementptr inbounds float* %tmp14613, i64 1
+  %tmp14615 = getelementptr inbounds float* %tmp14614, i64 1
+  %tmp14616 = getelementptr inbounds float* %tmp14615, i64 1
+  %tmp14617 = getelementptr inbounds float* %tmp14616, i64 1
+  %tmp14618 = getelementptr inbounds float* %tmp14617, i64 1
+  %tmp14619 = getelementptr inbounds float* %tmp14618, i64 1
+  %tmp14620 = getelementptr inbounds float* %tmp14619, i64 1
+  %tmp14621 = getelementptr inbounds float* %tmp14620, i64 1
+  %tmp14622 = getelementptr inbounds float* %tmp14621, i64 1
+  %tmp14623 = getelementptr inbounds float* %tmp14622, i64 1
+  %tmp14624 = getelementptr inbounds float* %tmp14623, i64 1
+  %tmp14625 = getelementptr inbounds float* %tmp14624, i64 1
+  %tmp14626 = getelementptr inbounds float* %tmp14625, i64 1
+  %tmp14627 = getelementptr inbounds float* %tmp14626, i64 1
+  %tmp14628 = getelementptr inbounds float* %tmp14627, i64 1
+  %tmp14629 = getelementptr inbounds float* %tmp14628, i64 1
+  %tmp14630 = getelementptr inbounds float* %tmp14629, i64 1
+  %tmp14631 = getelementptr inbounds float* %tmp14630, i64 1
+  %tmp14632 = getelementptr inbounds float* %tmp14631, i64 1
+  %tmp14633 = getelementptr inbounds float* %tmp14632, i64 1
+  %tmp14634 = getelementptr inbounds float* %tmp14633, i64 1
+  %tmp14635 = getelementptr inbounds float* %tmp14634, i64 1
+  %tmp14636 = getelementptr inbounds float* %tmp14635, i64 1
+  %tmp14637 = getelementptr inbounds float* %tmp14636, i64 1
+  %tmp14638 = getelementptr inbounds float* %tmp14637, i64 1
+  %tmp14639 = getelementptr inbounds float* %tmp14638, i64 1
+  %tmp14640 = getelementptr inbounds float* %tmp14639, i64 1
+  %tmp14641 = getelementptr inbounds float* %tmp14640, i64 1
+  %tmp14642 = getelementptr inbounds float* %tmp14641, i64 1
+  %tmp14643 = getelementptr inbounds float* %tmp14642, i64 1
+  %tmp14644 = getelementptr inbounds float* %tmp14643, i64 1
+  %tmp14645 = getelementptr inbounds float* %tmp14644, i64 1
+  %tmp14646 = getelementptr inbounds float* %tmp14645, i64 1
+  %tmp14647 = getelementptr inbounds float* %tmp14646, i64 1
+  %tmp14648 = getelementptr inbounds float* %tmp14647, i64 1
+  %tmp14649 = getelementptr inbounds float* %tmp14648, i64 1
+  %tmp14650 = getelementptr inbounds float* %tmp14649, i64 1
+  %tmp14651 = getelementptr inbounds float* %tmp14650, i64 1
+  %tmp14652 = getelementptr inbounds float* %tmp14651, i64 1
+  %tmp14653 = getelementptr inbounds float* %tmp14652, i64 1
+  %tmp14654 = getelementptr inbounds float* %tmp14653, i64 1
+  %tmp14655 = getelementptr inbounds float* %tmp14654, i64 1
+  %tmp14656 = getelementptr inbounds float* %tmp14655, i64 1
+  %tmp14657 = getelementptr inbounds float* %tmp14656, i64 1
+  %tmp14658 = getelementptr inbounds float* %tmp14657, i64 1
+  %tmp14659 = getelementptr inbounds float* %tmp14658, i64 1
+  %tmp14660 = getelementptr inbounds float* %tmp14659, i64 1
+  %tmp14661 = getelementptr inbounds float* %tmp14660, i64 1
+  %tmp14662 = getelementptr inbounds float* %tmp14661, i64 1
+  %tmp14663 = getelementptr inbounds float* %tmp14662, i64 1
+  %tmp14664 = getelementptr inbounds float* %tmp14663, i64 1
+  %tmp14665 = getelementptr inbounds float* %tmp14664, i64 1
+  %tmp14666 = getelementptr inbounds float* %tmp14665, i64 1
+  %tmp14667 = getelementptr inbounds float* %tmp14666, i64 1
+  %tmp14668 = getelementptr inbounds float* %tmp14667, i64 1
+  %tmp14669 = getelementptr inbounds float* %tmp14668, i64 1
+  %tmp14670 = getelementptr inbounds float* %tmp14669, i64 1
+  %tmp14671 = getelementptr inbounds float* %tmp14670, i64 1
+  %tmp14672 = getelementptr inbounds float* %tmp14671, i64 1
+  %tmp14673 = getelementptr inbounds float* %tmp14672, i64 1
+  %tmp14674 = getelementptr inbounds float* %tmp14673, i64 1
+  %tmp14675 = getelementptr inbounds float* %tmp14674, i64 1
+  %tmp14676 = getelementptr inbounds float* %tmp14675, i64 1
+  %tmp14677 = getelementptr inbounds float* %tmp14676, i64 1
+  %tmp14678 = getelementptr inbounds float* %tmp14677, i64 1
+  %tmp14679 = getelementptr inbounds float* %tmp14678, i64 1
+  %tmp14680 = getelementptr inbounds float* %tmp14679, i64 1
+  %tmp14681 = getelementptr inbounds float* %tmp14680, i64 1
+  %tmp14682 = getelementptr inbounds float* %tmp14681, i64 1
+  %tmp14683 = getelementptr inbounds float* %tmp14682, i64 1
+  %tmp14684 = getelementptr inbounds float* %tmp14683, i64 1
+  %tmp14685 = getelementptr inbounds float* %tmp14684, i64 1
+  %tmp14686 = getelementptr inbounds float* %tmp14685, i64 1
+  %tmp14687 = getelementptr inbounds float* %tmp14686, i64 1
+  %tmp14688 = getelementptr inbounds float* %tmp14687, i64 1
+  %tmp14689 = getelementptr inbounds float* %tmp14688, i64 1
+  %tmp14690 = getelementptr inbounds float* %tmp14689, i64 1
+  %tmp14691 = getelementptr inbounds float* %tmp14690, i64 1
+  %tmp14692 = getelementptr inbounds float* %tmp14691, i64 1
+  %tmp14693 = getelementptr inbounds float* %tmp14692, i64 1
+  %tmp14694 = getelementptr inbounds float* %tmp14693, i64 1
+  %tmp14695 = getelementptr inbounds float* %tmp14694, i64 1
+  %tmp14696 = getelementptr inbounds float* %tmp14695, i64 1
+  %tmp14697 = getelementptr inbounds float* %tmp14696, i64 1
+  %tmp14698 = getelementptr inbounds float* %tmp14697, i64 1
+  %tmp14699 = getelementptr inbounds float* %tmp14698, i64 1
+  %tmp14700 = getelementptr inbounds float* %tmp14699, i64 1
+  %tmp14701 = getelementptr inbounds float* %tmp14700, i64 1
+  %tmp14702 = getelementptr inbounds float* %tmp14701, i64 1
+  %tmp14703 = getelementptr inbounds float* %tmp14702, i64 1
+  %tmp14704 = getelementptr inbounds float* %tmp14703, i64 1
+  %tmp14705 = getelementptr inbounds float* %tmp14704, i64 1
+  %tmp14706 = getelementptr inbounds float* %tmp14705, i64 1
+  %tmp14707 = getelementptr inbounds float* %tmp14706, i64 1
+  %tmp14708 = getelementptr inbounds float* %tmp14707, i64 1
+  %tmp14709 = getelementptr inbounds float* %tmp14708, i64 1
+  %tmp14710 = getelementptr inbounds float* %tmp14709, i64 1
+  %tmp14711 = getelementptr inbounds float* %tmp14710, i64 1
+  %tmp14712 = getelementptr inbounds float* %tmp14711, i64 1
+  %tmp14713 = getelementptr inbounds float* %tmp14712, i64 1
+  %tmp14714 = getelementptr inbounds float* %tmp14713, i64 1
+  %tmp14715 = getelementptr inbounds float* %tmp14714, i64 1
+  %tmp14716 = getelementptr inbounds float* %tmp14715, i64 1
+  %tmp14717 = getelementptr inbounds float* %tmp14716, i64 1
+  %tmp14718 = getelementptr inbounds float* %tmp14717, i64 1
+  %tmp14719 = getelementptr inbounds float* %tmp14718, i64 1
+  %tmp14720 = getelementptr inbounds float* %tmp14719, i64 1
+  %tmp14721 = getelementptr inbounds float* %tmp14720, i64 1
+  %tmp14722 = getelementptr inbounds float* %tmp14721, i64 1
+  %tmp14723 = getelementptr inbounds float* %tmp14722, i64 1
+  %tmp14724 = getelementptr inbounds float* %tmp14723, i64 1
+  %tmp14725 = getelementptr inbounds float* %tmp14724, i64 1
+  %tmp14726 = getelementptr inbounds float* %tmp14725, i64 1
+  %tmp14727 = getelementptr inbounds float* %tmp14726, i64 1
+  %tmp14728 = getelementptr inbounds float* %tmp14727, i64 1
+  %tmp14729 = getelementptr inbounds float* %tmp14728, i64 1
+  %tmp14730 = getelementptr inbounds float* %tmp14729, i64 1
+  %tmp14731 = getelementptr inbounds float* %tmp14730, i64 1
+  %tmp14732 = getelementptr inbounds float* %tmp14731, i64 1
+  %tmp14733 = getelementptr inbounds float* %tmp14732, i64 1
+  %tmp14734 = getelementptr inbounds float* %tmp14733, i64 1
+  %tmp14735 = getelementptr inbounds float* %tmp14734, i64 1
+  %tmp14736 = getelementptr inbounds float* %tmp14735, i64 1
+  %tmp14737 = getelementptr inbounds float* %tmp14736, i64 1
+  %tmp14738 = getelementptr inbounds float* %tmp14737, i64 1
+  %tmp14739 = getelementptr inbounds float* %tmp14738, i64 1
+  %tmp14740 = getelementptr inbounds float* %tmp14739, i64 1
+  %tmp14741 = getelementptr inbounds float* %tmp14740, i64 1
+  %tmp14742 = getelementptr inbounds float* %tmp14741, i64 1
+  %tmp14743 = getelementptr inbounds float* %tmp14742, i64 1
+  %tmp14744 = getelementptr inbounds float* %tmp14743, i64 1
+  %tmp14745 = getelementptr inbounds float* %tmp14744, i64 1
+  %tmp14746 = getelementptr inbounds float* %tmp14745, i64 1
+  %tmp14747 = getelementptr inbounds float* %tmp14746, i64 1
+  %tmp14748 = getelementptr inbounds float* %tmp14747, i64 1
+  %tmp14749 = getelementptr inbounds float* %tmp14748, i64 1
+  %tmp14750 = getelementptr inbounds float* %tmp14749, i64 1
+  %tmp14751 = getelementptr inbounds float* %tmp14750, i64 1
+  %tmp14752 = getelementptr inbounds float* %tmp14751, i64 1
+  %tmp14753 = getelementptr inbounds float* %tmp14752, i64 1
+  %tmp14754 = getelementptr inbounds float* %tmp14753, i64 1
+  %tmp14755 = getelementptr inbounds float* %tmp14754, i64 1
+  %tmp14756 = getelementptr inbounds float* %tmp14755, i64 1
+  %tmp14757 = getelementptr inbounds float* %tmp14756, i64 1
+  %tmp14758 = getelementptr inbounds float* %tmp14757, i64 1
+  %tmp14759 = getelementptr inbounds float* %tmp14758, i64 1
+  %tmp14760 = getelementptr inbounds float* %tmp14759, i64 1
+  %tmp14761 = getelementptr inbounds float* %tmp14760, i64 1
+  %tmp14762 = getelementptr inbounds float* %tmp14761, i64 1
+  %tmp14763 = getelementptr inbounds float* %tmp14762, i64 1
+  %tmp14764 = getelementptr inbounds float* %tmp14763, i64 1
+  %tmp14765 = getelementptr inbounds float* %tmp14764, i64 1
+  %tmp14766 = getelementptr inbounds float* %tmp14765, i64 1
+  %tmp14767 = getelementptr inbounds float* %tmp14766, i64 1
+  %tmp14768 = getelementptr inbounds float* %tmp14767, i64 1
+  %tmp14769 = getelementptr inbounds float* %tmp14768, i64 1
+  %tmp14770 = getelementptr inbounds float* %tmp14769, i64 1
+  %tmp14771 = getelementptr inbounds float* %tmp14770, i64 1
+  %tmp14772 = getelementptr inbounds float* %tmp14771, i64 1
+  %tmp14773 = getelementptr inbounds float* %tmp14772, i64 1
+  %tmp14774 = getelementptr inbounds float* %tmp14773, i64 1
+  %tmp14775 = getelementptr inbounds float* %tmp14774, i64 1
+  %tmp14776 = getelementptr inbounds float* %tmp14775, i64 1
+  %tmp14777 = getelementptr inbounds float* %tmp14776, i64 1
+  %tmp14778 = getelementptr inbounds float* %tmp14777, i64 1
+  %tmp14779 = getelementptr inbounds float* %tmp14778, i64 1
+  %tmp14780 = getelementptr inbounds float* %tmp14779, i64 1
+  %tmp14781 = getelementptr inbounds float* %tmp14780, i64 1
+  %tmp14782 = getelementptr inbounds float* %tmp14781, i64 1
+  %tmp14783 = getelementptr inbounds float* %tmp14782, i64 1
+  %tmp14784 = getelementptr inbounds float* %tmp14783, i64 1
+  %tmp14785 = getelementptr inbounds float* %tmp14784, i64 1
+  %tmp14786 = getelementptr inbounds float* %tmp14785, i64 1
+  %tmp14787 = getelementptr inbounds float* %tmp14786, i64 1
+  %tmp14788 = getelementptr inbounds float* %tmp14787, i64 1
+  %tmp14789 = getelementptr inbounds float* %tmp14788, i64 1
+  %tmp14790 = getelementptr inbounds float* %tmp14789, i64 1
+  %tmp14791 = getelementptr inbounds float* %tmp14790, i64 1
+  %tmp14792 = getelementptr inbounds float* %tmp14791, i64 1
+  %tmp14793 = getelementptr inbounds float* %tmp14792, i64 1
+  %tmp14794 = getelementptr inbounds float* %tmp14793, i64 1
+  %tmp14795 = getelementptr inbounds float* %tmp14794, i64 1
+  %tmp14796 = getelementptr inbounds float* %tmp14795, i64 1
+  %tmp14797 = getelementptr inbounds float* %tmp14796, i64 1
+  %tmp14798 = getelementptr inbounds float* %tmp14797, i64 1
+  %tmp14799 = getelementptr inbounds float* %tmp14798, i64 1
+  %tmp14800 = getelementptr inbounds float* %tmp14799, i64 1
+  %tmp14801 = getelementptr inbounds float* %tmp14800, i64 1
+  %tmp14802 = getelementptr inbounds float* %tmp14801, i64 1
+  %tmp14803 = getelementptr inbounds float* %tmp14802, i64 1
+  %tmp14804 = getelementptr inbounds float* %tmp14803, i64 1
+  %tmp14805 = getelementptr inbounds float* %tmp14804, i64 1
+  %tmp14806 = getelementptr inbounds float* %tmp14805, i64 1
+  %tmp14807 = getelementptr inbounds float* %tmp14806, i64 1
+  %tmp14808 = getelementptr inbounds float* %tmp14807, i64 1
+  %tmp14809 = getelementptr inbounds float* %tmp14808, i64 1
+  %tmp14810 = getelementptr inbounds float* %tmp14809, i64 1
+  %tmp14811 = getelementptr inbounds float* %tmp14810, i64 1
+  %tmp14812 = getelementptr inbounds float* %tmp14811, i64 1
+  %tmp14813 = getelementptr inbounds float* %tmp14812, i64 1
+  %tmp14814 = getelementptr inbounds float* %tmp14813, i64 1
+  %tmp14815 = getelementptr inbounds float* %tmp14814, i64 1
+  %tmp14816 = getelementptr inbounds float* %tmp14815, i64 1
+  %tmp14817 = getelementptr inbounds float* %tmp14816, i64 1
+  %tmp14818 = getelementptr inbounds float* %tmp14817, i64 1
+  %tmp14819 = getelementptr inbounds float* %tmp14818, i64 1
+  %tmp14820 = getelementptr inbounds float* %tmp14819, i64 1
+  %tmp14821 = getelementptr inbounds float* %tmp14820, i64 1
+  %tmp14822 = getelementptr inbounds float* %tmp14821, i64 1
+  %tmp14823 = getelementptr inbounds float* %tmp14822, i64 1
+  %tmp14824 = getelementptr inbounds float* %tmp14823, i64 1
+  %tmp14825 = getelementptr inbounds float* %tmp14824, i64 1
+  %tmp14826 = getelementptr inbounds float* %tmp14825, i64 1
+  %tmp14827 = getelementptr inbounds float* %tmp14826, i64 1
+  %tmp14828 = getelementptr inbounds float* %tmp14827, i64 1
+  %tmp14829 = getelementptr inbounds float* %tmp14828, i64 1
+  %tmp14830 = getelementptr inbounds float* %tmp14829, i64 1
+  %tmp14831 = getelementptr inbounds float* %tmp14830, i64 1
+  %tmp14832 = getelementptr inbounds float* %tmp14831, i64 1
+  %tmp14833 = getelementptr inbounds float* %tmp14832, i64 1
+  %tmp14834 = getelementptr inbounds float* %tmp14833, i64 1
+  %tmp14835 = getelementptr inbounds float* %tmp14834, i64 1
+  %tmp14836 = getelementptr inbounds float* %tmp14835, i64 1
+  %tmp14837 = getelementptr inbounds float* %tmp14836, i64 1
+  %tmp14838 = getelementptr inbounds float* %tmp14837, i64 1
+  %tmp14839 = getelementptr inbounds float* %tmp14838, i64 1
+  %tmp14840 = getelementptr inbounds float* %tmp14839, i64 1
+  %tmp14841 = getelementptr inbounds float* %tmp14840, i64 1
+  %tmp14842 = getelementptr inbounds float* %tmp14841, i64 1
+  %tmp14843 = getelementptr inbounds float* %tmp14842, i64 1
+  %tmp14844 = getelementptr inbounds float* %tmp14843, i64 1
+  %tmp14845 = getelementptr inbounds float* %tmp14844, i64 1
+  %tmp14846 = getelementptr inbounds float* %tmp14845, i64 1
+  %tmp14847 = getelementptr inbounds float* %tmp14846, i64 1
+  %tmp14848 = getelementptr inbounds float* %tmp14847, i64 1
+  %tmp14849 = getelementptr inbounds float* %tmp14848, i64 1
+  %tmp14850 = getelementptr inbounds float* %tmp14849, i64 1
+  %tmp14851 = getelementptr inbounds float* %tmp14850, i64 1
+  %tmp14852 = getelementptr inbounds float* %tmp14851, i64 1
+  %tmp14853 = getelementptr inbounds float* %tmp14852, i64 1
+  %tmp14854 = getelementptr inbounds float* %tmp14853, i64 1
+  %tmp14855 = getelementptr inbounds float* %tmp14854, i64 1
+  %tmp14856 = getelementptr inbounds float* %tmp14855, i64 1
+  %tmp14857 = getelementptr inbounds float* %tmp14856, i64 1
+  %tmp14858 = getelementptr inbounds float* %tmp14857, i64 1
+  %tmp14859 = getelementptr inbounds float* %tmp14858, i64 1
+  %tmp14860 = getelementptr inbounds float* %tmp14859, i64 1
+  %tmp14861 = getelementptr inbounds float* %tmp14860, i64 1
+  %tmp14862 = getelementptr inbounds float* %tmp14861, i64 1
+  %tmp14863 = getelementptr inbounds float* %tmp14862, i64 1
+  %tmp14864 = getelementptr inbounds float* %tmp14863, i64 1
+  %tmp14865 = getelementptr inbounds float* %tmp14864, i64 1
+  %tmp14866 = getelementptr inbounds float* %tmp14865, i64 1
+  %tmp14867 = getelementptr inbounds float* %tmp14866, i64 1
+  %tmp14868 = getelementptr inbounds float* %tmp14867, i64 1
+  %tmp14869 = getelementptr inbounds float* %tmp14868, i64 1
+  %tmp14870 = getelementptr inbounds float* %tmp14869, i64 1
+  %tmp14871 = getelementptr inbounds float* %tmp14870, i64 1
+  %tmp14872 = getelementptr inbounds float* %tmp14871, i64 1
+  %tmp14873 = getelementptr inbounds float* %tmp14872, i64 1
+  %tmp14874 = getelementptr inbounds float* %tmp14873, i64 1
+  %tmp14875 = getelementptr inbounds float* %tmp14874, i64 1
+  %tmp14876 = getelementptr inbounds float* %tmp14875, i64 1
+  %tmp14877 = getelementptr inbounds float* %tmp14876, i64 1
+  %tmp14878 = getelementptr inbounds float* %tmp14877, i64 1
+  %tmp14879 = getelementptr inbounds float* %tmp14878, i64 1
+  %tmp14880 = getelementptr inbounds float* %tmp14879, i64 1
+  %tmp14881 = getelementptr inbounds float* %tmp14880, i64 1
+  %tmp14882 = getelementptr inbounds float* %tmp14881, i64 1
+  %tmp14883 = getelementptr inbounds float* %tmp14882, i64 1
+  %tmp14884 = getelementptr inbounds float* %tmp14883, i64 1
+  %tmp14885 = getelementptr inbounds float* %tmp14884, i64 1
+  %tmp14886 = getelementptr inbounds float* %tmp14885, i64 1
+  %tmp14887 = getelementptr inbounds float* %tmp14886, i64 1
+  %tmp14888 = getelementptr inbounds float* %tmp14887, i64 1
+  %tmp14889 = getelementptr inbounds float* %tmp14888, i64 1
+  %tmp14890 = getelementptr inbounds float* %tmp14889, i64 1
+  %tmp14891 = getelementptr inbounds float* %tmp14890, i64 1
+  %tmp14892 = getelementptr inbounds float* %tmp14891, i64 1
+  %tmp14893 = getelementptr inbounds float* %tmp14892, i64 1
+  %tmp14894 = getelementptr inbounds float* %tmp14893, i64 1
+  %tmp14895 = getelementptr inbounds float* %tmp14894, i64 1
+  %tmp14896 = getelementptr inbounds float* %tmp14895, i64 1
+  %tmp14897 = getelementptr inbounds float* %tmp14896, i64 1
+  %tmp14898 = getelementptr inbounds float* %tmp14897, i64 1
+  %tmp14899 = getelementptr inbounds float* %tmp14898, i64 1
+  %tmp14900 = getelementptr inbounds float* %tmp14899, i64 1
+  %tmp14901 = getelementptr inbounds float* %tmp14900, i64 1
+  %tmp14902 = getelementptr inbounds float* %tmp14901, i64 1
+  %tmp14903 = getelementptr inbounds float* %tmp14902, i64 1
+  %tmp14904 = getelementptr inbounds float* %tmp14903, i64 1
+  %tmp14905 = getelementptr inbounds float* %tmp14904, i64 1
+  %tmp14906 = getelementptr inbounds float* %tmp14905, i64 1
+  %tmp14907 = getelementptr inbounds float* %tmp14906, i64 1
+  %tmp14908 = getelementptr inbounds float* %tmp14907, i64 1
+  %tmp14909 = getelementptr inbounds float* %tmp14908, i64 1
+  %tmp14910 = getelementptr inbounds float* %tmp14909, i64 1
+  %tmp14911 = getelementptr inbounds float* %tmp14910, i64 1
+  %tmp14912 = getelementptr inbounds float* %tmp14911, i64 1
+  %tmp14913 = getelementptr inbounds float* %tmp14912, i64 1
+  %tmp14914 = getelementptr inbounds float* %tmp14913, i64 1
+  %tmp14915 = getelementptr inbounds float* %tmp14914, i64 1
+  %tmp14916 = getelementptr inbounds float* %tmp14915, i64 1
+  %tmp14917 = getelementptr inbounds float* %tmp14916, i64 1
+  %tmp14918 = getelementptr inbounds float* %tmp14917, i64 1
+  %tmp14919 = getelementptr inbounds float* %tmp14918, i64 1
+  %tmp14920 = getelementptr inbounds float* %tmp14919, i64 1
+  %tmp14921 = getelementptr inbounds float* %tmp14920, i64 1
+  %tmp14922 = getelementptr inbounds float* %tmp14921, i64 1
+  %tmp14923 = getelementptr inbounds float* %tmp14922, i64 1
+  %tmp14924 = getelementptr inbounds float* %tmp14923, i64 1
+  %tmp14925 = getelementptr inbounds float* %tmp14924, i64 1
+  %tmp14926 = getelementptr inbounds float* %tmp14925, i64 1
+  %tmp14927 = getelementptr inbounds float* %tmp14926, i64 1
+  %tmp14928 = getelementptr inbounds float* %tmp14927, i64 1
+  %tmp14929 = getelementptr inbounds float* %tmp14928, i64 1
+  %tmp14930 = getelementptr inbounds float* %tmp14929, i64 1
+  %tmp14931 = getelementptr inbounds float* %tmp14930, i64 1
+  %tmp14932 = getelementptr inbounds float* %tmp14931, i64 1
+  %tmp14933 = getelementptr inbounds float* %tmp14932, i64 1
+  %tmp14934 = getelementptr inbounds float* %tmp14933, i64 1
+  %tmp14935 = getelementptr inbounds float* %tmp14934, i64 1
+  %tmp14936 = getelementptr inbounds float* %tmp14935, i64 1
+  %tmp14937 = getelementptr inbounds float* %tmp14936, i64 1
+  %tmp14938 = getelementptr inbounds float* %tmp14937, i64 1
+  %tmp14939 = getelementptr inbounds float* %tmp14938, i64 1
+  %tmp14940 = getelementptr inbounds float* %tmp14939, i64 1
+  %tmp14941 = getelementptr inbounds float* %tmp14940, i64 1
+  %tmp14942 = getelementptr inbounds float* %tmp14941, i64 1
+  %tmp14943 = getelementptr inbounds float* %tmp14942, i64 1
+  %tmp14944 = getelementptr inbounds float* %tmp14943, i64 1
+  %tmp14945 = getelementptr inbounds float* %tmp14944, i64 1
+  %tmp14946 = getelementptr inbounds float* %tmp14945, i64 1
+  %tmp14947 = getelementptr inbounds float* %tmp14946, i64 1
+  %tmp14948 = getelementptr inbounds float* %tmp14947, i64 1
+  %tmp14949 = getelementptr inbounds float* %tmp14948, i64 1
+  %tmp14950 = getelementptr inbounds float* %tmp14949, i64 1
+  %tmp14951 = getelementptr inbounds float* %tmp14950, i64 1
+  %tmp14952 = getelementptr inbounds float* %tmp14951, i64 1
+  %tmp14953 = getelementptr inbounds float* %tmp14952, i64 1
+  %tmp14954 = getelementptr inbounds float* %tmp14953, i64 1
+  %tmp14955 = getelementptr inbounds float* %tmp14954, i64 1
+  %tmp14956 = getelementptr inbounds float* %tmp14955, i64 1
+  %tmp14957 = getelementptr inbounds float* %tmp14956, i64 1
+  %tmp14958 = getelementptr inbounds float* %tmp14957, i64 1
+  %tmp14959 = getelementptr inbounds float* %tmp14958, i64 1
+  %tmp14960 = getelementptr inbounds float* %tmp14959, i64 1
+  %tmp14961 = getelementptr inbounds float* %tmp14960, i64 1
+  %tmp14962 = getelementptr inbounds float* %tmp14961, i64 1
+  %tmp14963 = getelementptr inbounds float* %tmp14962, i64 1
+  %tmp14964 = getelementptr inbounds float* %tmp14963, i64 1
+  %tmp14965 = getelementptr inbounds float* %tmp14964, i64 1
+  %tmp14966 = getelementptr inbounds float* %tmp14965, i64 1
+  %tmp14967 = getelementptr inbounds float* %tmp14966, i64 1
+  %tmp14968 = getelementptr inbounds float* %tmp14967, i64 1
+  %tmp14969 = getelementptr inbounds float* %tmp14968, i64 1
+  %tmp14970 = getelementptr inbounds float* %tmp14969, i64 1
+  %tmp14971 = getelementptr inbounds float* %tmp14970, i64 1
+  %tmp14972 = getelementptr inbounds float* %tmp14971, i64 1
+  %tmp14973 = getelementptr inbounds float* %tmp14972, i64 1
+  %tmp14974 = getelementptr inbounds float* %tmp14973, i64 1
+  %tmp14975 = getelementptr inbounds float* %tmp14974, i64 1
+  %tmp14976 = getelementptr inbounds float* %tmp14975, i64 1
+  %tmp14977 = getelementptr inbounds float* %tmp14976, i64 1
+  %tmp14978 = getelementptr inbounds float* %tmp14977, i64 1
+  %tmp14979 = getelementptr inbounds float* %tmp14978, i64 1
+  %tmp14980 = getelementptr inbounds float* %tmp14979, i64 1
+  %tmp14981 = getelementptr inbounds float* %tmp14980, i64 1
+  %tmp14982 = getelementptr inbounds float* %tmp14981, i64 1
+  %tmp14983 = getelementptr inbounds float* %tmp14982, i64 1
+  %tmp14984 = getelementptr inbounds float* %tmp14983, i64 1
+  %tmp14985 = getelementptr inbounds float* %tmp14984, i64 1
+  %tmp14986 = getelementptr inbounds float* %tmp14985, i64 1
+  %tmp14987 = getelementptr inbounds float* %tmp14986, i64 1
+  %tmp14988 = getelementptr inbounds float* %tmp14987, i64 1
+  %tmp14989 = getelementptr inbounds float* %tmp14988, i64 1
+  %tmp14990 = getelementptr inbounds float* %tmp14989, i64 1
+  %tmp14991 = getelementptr inbounds float* %tmp14990, i64 1
+  %tmp14992 = getelementptr inbounds float* %tmp14991, i64 1
+  %tmp14993 = getelementptr inbounds float* %tmp14992, i64 1
+  %tmp14994 = getelementptr inbounds float* %tmp14993, i64 1
+  %tmp14995 = getelementptr inbounds float* %tmp14994, i64 1
+  %tmp14996 = getelementptr inbounds float* %tmp14995, i64 1
+  %tmp14997 = getelementptr inbounds float* %tmp14996, i64 1
+  %tmp14998 = getelementptr inbounds float* %tmp14997, i64 1
+  %tmp14999 = getelementptr inbounds float* %tmp14998, i64 1
+  %tmp15000 = getelementptr inbounds float* %tmp14999, i64 1
+  %tmp15001 = getelementptr inbounds float* %tmp15000, i64 1
+  %tmp15002 = getelementptr inbounds float* %tmp15001, i64 1
+  %tmp15003 = getelementptr inbounds float* %tmp15002, i64 1
+  %tmp15004 = getelementptr inbounds float* %tmp15003, i64 1
+  %tmp15005 = getelementptr inbounds float* %tmp15004, i64 1
+  %tmp15006 = getelementptr inbounds float* %tmp15005, i64 1
+  %tmp15007 = getelementptr inbounds float* %tmp15006, i64 1
+  %tmp15008 = getelementptr inbounds float* %tmp15007, i64 1
+  %tmp15009 = getelementptr inbounds float* %tmp15008, i64 1
+  %tmp15010 = getelementptr inbounds float* %tmp15009, i64 1
+  %tmp15011 = getelementptr inbounds float* %tmp15010, i64 1
+  %tmp15012 = getelementptr inbounds float* %tmp15011, i64 1
+  %tmp15013 = getelementptr inbounds float* %tmp15012, i64 1
+  %tmp15014 = getelementptr inbounds float* %tmp15013, i64 1
+  %tmp15015 = getelementptr inbounds float* %tmp15014, i64 1
+  %tmp15016 = getelementptr inbounds float* %tmp15015, i64 1
+  %tmp15017 = getelementptr inbounds float* %tmp15016, i64 1
+  %tmp15018 = getelementptr inbounds float* %tmp15017, i64 1
+  %tmp15019 = getelementptr inbounds float* %tmp15018, i64 1
+  %tmp15020 = getelementptr inbounds float* %tmp15019, i64 1
+  %tmp15021 = getelementptr inbounds float* %tmp15020, i64 1
+  %tmp15022 = getelementptr inbounds float* %tmp15021, i64 1
+  %tmp15023 = getelementptr inbounds float* %tmp15022, i64 1
+  %tmp15024 = getelementptr inbounds float* %tmp15023, i64 1
+  %tmp15025 = getelementptr inbounds float* %tmp15024, i64 1
+  %tmp15026 = getelementptr inbounds float* %tmp15025, i64 1
+  %tmp15027 = getelementptr inbounds float* %tmp15026, i64 1
+  %tmp15028 = getelementptr inbounds float* %tmp15027, i64 1
+  %tmp15029 = getelementptr inbounds float* %tmp15028, i64 1
+  %tmp15030 = getelementptr inbounds float* %tmp15029, i64 1
+  %tmp15031 = getelementptr inbounds float* %tmp15030, i64 1
+  %tmp15032 = getelementptr inbounds float* %tmp15031, i64 1
+  %tmp15033 = getelementptr inbounds float* %tmp15032, i64 1
+  %tmp15034 = getelementptr inbounds float* %tmp15033, i64 1
+  %tmp15035 = getelementptr inbounds float* %tmp15034, i64 1
+  %tmp15036 = getelementptr inbounds float* %tmp15035, i64 1
+  %tmp15037 = getelementptr inbounds float* %tmp15036, i64 1
+  %tmp15038 = getelementptr inbounds float* %tmp15037, i64 1
+  %tmp15039 = getelementptr inbounds float* %tmp15038, i64 1
+  %tmp15040 = getelementptr inbounds float* %tmp15039, i64 1
+  %tmp15041 = getelementptr inbounds float* %tmp15040, i64 1
+  %tmp15042 = getelementptr inbounds float* %tmp15041, i64 1
+  %tmp15043 = getelementptr inbounds float* %tmp15042, i64 1
+  %tmp15044 = getelementptr inbounds float* %tmp15043, i64 1
+  %tmp15045 = getelementptr inbounds float* %tmp15044, i64 1
+  %tmp15046 = getelementptr inbounds float* %tmp15045, i64 1
+  %tmp15047 = getelementptr inbounds float* %tmp15046, i64 1
+  %tmp15048 = getelementptr inbounds float* %tmp15047, i64 1
+  %tmp15049 = getelementptr inbounds float* %tmp15048, i64 1
+  %tmp15050 = getelementptr inbounds float* %tmp15049, i64 1
+  %tmp15051 = getelementptr inbounds float* %tmp15050, i64 1
+  %tmp15052 = getelementptr inbounds float* %tmp15051, i64 1
+  %tmp15053 = getelementptr inbounds float* %tmp15052, i64 1
+  %tmp15054 = getelementptr inbounds float* %tmp15053, i64 1
+  %tmp15055 = getelementptr inbounds float* %tmp15054, i64 1
+  %tmp15056 = getelementptr inbounds float* %tmp15055, i64 1
+  %tmp15057 = getelementptr inbounds float* %tmp15056, i64 1
+  %tmp15058 = getelementptr inbounds float* %tmp15057, i64 1
+  %tmp15059 = getelementptr inbounds float* %tmp15058, i64 1
+  %tmp15060 = getelementptr inbounds float* %tmp15059, i64 1
+  %tmp15061 = getelementptr inbounds float* %tmp15060, i64 1
+  %tmp15062 = getelementptr inbounds float* %tmp15061, i64 1
+  %tmp15063 = getelementptr inbounds float* %tmp15062, i64 1
+  %tmp15064 = getelementptr inbounds float* %tmp15063, i64 1
+  %tmp15065 = getelementptr inbounds float* %tmp15064, i64 1
+  %tmp15066 = getelementptr inbounds float* %tmp15065, i64 1
+  %tmp15067 = getelementptr inbounds float* %tmp15066, i64 1
+  %tmp15068 = getelementptr inbounds float* %tmp15067, i64 1
+  %tmp15069 = getelementptr inbounds float* %tmp15068, i64 1
+  %tmp15070 = getelementptr inbounds float* %tmp15069, i64 1
+  %tmp15071 = getelementptr inbounds float* %tmp15070, i64 1
+  %tmp15072 = getelementptr inbounds float* %tmp15071, i64 1
+  %tmp15073 = getelementptr inbounds float* %tmp15072, i64 1
+  %tmp15074 = getelementptr inbounds float* %tmp15073, i64 1
+  %tmp15075 = getelementptr inbounds float* %tmp15074, i64 1
+  %tmp15076 = getelementptr inbounds float* %tmp15075, i64 1
+  %tmp15077 = getelementptr inbounds float* %tmp15076, i64 1
+  %tmp15078 = getelementptr inbounds float* %tmp15077, i64 1
+  %tmp15079 = getelementptr inbounds float* %tmp15078, i64 1
+  %tmp15080 = getelementptr inbounds float* %tmp15079, i64 1
+  %tmp15081 = getelementptr inbounds float* %tmp15080, i64 1
+  %tmp15082 = getelementptr inbounds float* %tmp15081, i64 1
+  %tmp15083 = getelementptr inbounds float* %tmp15082, i64 1
+  %tmp15084 = getelementptr inbounds float* %tmp15083, i64 1
+  %tmp15085 = getelementptr inbounds float* %tmp15084, i64 1
+  %tmp15086 = getelementptr inbounds float* %tmp15085, i64 1
+  %tmp15087 = getelementptr inbounds float* %tmp15086, i64 1
+  %tmp15088 = getelementptr inbounds float* %tmp15087, i64 1
+  %tmp15089 = getelementptr inbounds float* %tmp15088, i64 1
+  %tmp15090 = getelementptr inbounds float* %tmp15089, i64 1
+  %tmp15091 = getelementptr inbounds float* %tmp15090, i64 1
+  %tmp15092 = getelementptr inbounds float* %tmp15091, i64 1
+  %tmp15093 = getelementptr inbounds float* %tmp15092, i64 1
+  %tmp15094 = getelementptr inbounds float* %tmp15093, i64 1
+  %tmp15095 = getelementptr inbounds float* %tmp15094, i64 1
+  %tmp15096 = getelementptr inbounds float* %tmp15095, i64 1
+  %tmp15097 = getelementptr inbounds float* %tmp15096, i64 1
+  %tmp15098 = getelementptr inbounds float* %tmp15097, i64 1
+  %tmp15099 = getelementptr inbounds float* %tmp15098, i64 1
+  %tmp15100 = getelementptr inbounds float* %tmp15099, i64 1
+  %tmp15101 = getelementptr inbounds float* %tmp15100, i64 1
+  %tmp15102 = getelementptr inbounds float* %tmp15101, i64 1
+  %tmp15103 = getelementptr inbounds float* %tmp15102, i64 1
+  %tmp15104 = getelementptr inbounds float* %tmp15103, i64 1
+  %tmp15105 = getelementptr inbounds float* %tmp15104, i64 1
+  %tmp15106 = getelementptr inbounds float* %tmp15105, i64 1
+  %tmp15107 = getelementptr inbounds float* %tmp15106, i64 1
+  %tmp15108 = getelementptr inbounds float* %tmp15107, i64 1
+  %tmp15109 = getelementptr inbounds float* %tmp15108, i64 1
+  %tmp15110 = getelementptr inbounds float* %tmp15109, i64 1
+  %tmp15111 = getelementptr inbounds float* %tmp15110, i64 1
+  %tmp15112 = getelementptr inbounds float* %tmp15111, i64 1
+  %tmp15113 = getelementptr inbounds float* %tmp15112, i64 1
+  %tmp15114 = getelementptr inbounds float* %tmp15113, i64 1
+  %tmp15115 = getelementptr inbounds float* %tmp15114, i64 1
+  %tmp15116 = getelementptr inbounds float* %tmp15115, i64 1
+  %tmp15117 = getelementptr inbounds float* %tmp15116, i64 1
+  %tmp15118 = getelementptr inbounds float* %tmp15117, i64 1
+  %tmp15119 = getelementptr inbounds float* %tmp15118, i64 1
+  %tmp15120 = getelementptr inbounds float* %tmp15119, i64 1
+  %tmp15121 = getelementptr inbounds float* %tmp15120, i64 1
+  %tmp15122 = getelementptr inbounds float* %tmp15121, i64 1
+  %tmp15123 = getelementptr inbounds float* %tmp15122, i64 1
+  %tmp15124 = getelementptr inbounds float* %tmp15123, i64 1
+  %tmp15125 = getelementptr inbounds float* %tmp15124, i64 1
+  %tmp15126 = getelementptr inbounds float* %tmp15125, i64 1
+  %tmp15127 = getelementptr inbounds float* %tmp15126, i64 1
+  %tmp15128 = getelementptr inbounds float* %tmp15127, i64 1
+  %tmp15129 = getelementptr inbounds float* %tmp15128, i64 1
+  %tmp15130 = getelementptr inbounds float* %tmp15129, i64 1
+  %tmp15131 = getelementptr inbounds float* %tmp15130, i64 1
+  %tmp15132 = getelementptr inbounds float* %tmp15131, i64 1
+  %tmp15133 = getelementptr inbounds float* %tmp15132, i64 1
+  %tmp15134 = getelementptr inbounds float* %tmp15133, i64 1
+  %tmp15135 = getelementptr inbounds float* %tmp15134, i64 1
+  %tmp15136 = getelementptr inbounds float* %tmp15135, i64 1
+  %tmp15137 = getelementptr inbounds float* %tmp15136, i64 1
+  %tmp15138 = getelementptr inbounds float* %tmp15137, i64 1
+  %tmp15139 = getelementptr inbounds float* %tmp15138, i64 1
+  %tmp15140 = getelementptr inbounds float* %tmp15139, i64 1
+  %tmp15141 = getelementptr inbounds float* %tmp15140, i64 1
+  %tmp15142 = getelementptr inbounds float* %tmp15141, i64 1
+  %tmp15143 = getelementptr inbounds float* %tmp15142, i64 1
+  %tmp15144 = getelementptr inbounds float* %tmp15143, i64 1
+  %tmp15145 = getelementptr inbounds float* %tmp15144, i64 1
+  %tmp15146 = getelementptr inbounds float* %tmp15145, i64 1
+  %tmp15147 = getelementptr inbounds float* %tmp15146, i64 1
+  %tmp15148 = getelementptr inbounds float* %tmp15147, i64 1
+  %tmp15149 = getelementptr inbounds float* %tmp15148, i64 1
+  %tmp15150 = getelementptr inbounds float* %tmp15149, i64 1
+  %tmp15151 = getelementptr inbounds float* %tmp15150, i64 1
+  %tmp15152 = getelementptr inbounds float* %tmp15151, i64 1
+  %tmp15153 = getelementptr inbounds float* %tmp15152, i64 1
+  %tmp15154 = getelementptr inbounds float* %tmp15153, i64 1
+  %tmp15155 = getelementptr inbounds float* %tmp15154, i64 1
+  %tmp15156 = getelementptr inbounds float* %tmp15155, i64 1
+  %tmp15157 = getelementptr inbounds float* %tmp15156, i64 1
+  %tmp15158 = getelementptr inbounds float* %tmp15157, i64 1
+  %tmp15159 = getelementptr inbounds float* %tmp15158, i64 1
+  %tmp15160 = getelementptr inbounds float* %tmp15159, i64 1
+  %tmp15161 = getelementptr inbounds float* %tmp15160, i64 1
+  %tmp15162 = getelementptr inbounds float* %tmp15161, i64 1
+  %tmp15163 = getelementptr inbounds float* %tmp15162, i64 1
+  %tmp15164 = getelementptr inbounds float* %tmp15163, i64 1
+  %tmp15165 = getelementptr inbounds float* %tmp15164, i64 1
+  %tmp15166 = getelementptr inbounds float* %tmp15165, i64 1
+  %tmp15167 = getelementptr inbounds float* %tmp15166, i64 1
+  %tmp15168 = getelementptr inbounds float* %tmp15167, i64 1
+  %tmp15169 = getelementptr inbounds float* %tmp15168, i64 1
+  %tmp15170 = getelementptr inbounds float* %tmp15169, i64 1
+  %tmp15171 = getelementptr inbounds float* %tmp15170, i64 1
+  %tmp15172 = getelementptr inbounds float* %tmp15171, i64 1
+  %tmp15173 = getelementptr inbounds float* %tmp15172, i64 1
+  %tmp15174 = getelementptr inbounds float* %tmp15173, i64 1
+  %tmp15175 = getelementptr inbounds float* %tmp15174, i64 1
+  %tmp15176 = getelementptr inbounds float* %tmp15175, i64 1
+  %tmp15177 = getelementptr inbounds float* %tmp15176, i64 1
+  %tmp15178 = getelementptr inbounds float* %tmp15177, i64 1
+  %tmp15179 = getelementptr inbounds float* %tmp15178, i64 1
+  %tmp15180 = getelementptr inbounds float* %tmp15179, i64 1
+  %tmp15181 = getelementptr inbounds float* %tmp15180, i64 1
+  %tmp15182 = getelementptr inbounds float* %tmp15181, i64 1
+  %tmp15183 = getelementptr inbounds float* %tmp15182, i64 1
+  %tmp15184 = getelementptr inbounds float* %tmp15183, i64 1
+  %tmp15185 = getelementptr inbounds float* %tmp15184, i64 1
+  %tmp15186 = getelementptr inbounds float* %tmp15185, i64 1
+  %tmp15187 = getelementptr inbounds float* %tmp15186, i64 1
+  %tmp15188 = getelementptr inbounds float* %tmp15187, i64 1
+  %tmp15189 = getelementptr inbounds float* %tmp15188, i64 1
+  %tmp15190 = getelementptr inbounds float* %tmp15189, i64 1
+  %tmp15191 = getelementptr inbounds float* %tmp15190, i64 1
+  %tmp15192 = getelementptr inbounds float* %tmp15191, i64 1
+  %tmp15193 = getelementptr inbounds float* %tmp15192, i64 1
+  %tmp15194 = getelementptr inbounds float* %tmp15193, i64 1
+  %tmp15195 = getelementptr inbounds float* %tmp15194, i64 1
+  %tmp15196 = getelementptr inbounds float* %tmp15195, i64 1
+  %tmp15197 = getelementptr inbounds float* %tmp15196, i64 1
+  %tmp15198 = getelementptr inbounds float* %tmp15197, i64 1
+  %tmp15199 = getelementptr inbounds float* %tmp15198, i64 1
+  %tmp15200 = getelementptr inbounds float* %tmp15199, i64 1
+  %tmp15201 = getelementptr inbounds float* %tmp15200, i64 1
+  %tmp15202 = getelementptr inbounds float* %tmp15201, i64 1
+  %tmp15203 = getelementptr inbounds float* %tmp15202, i64 1
+  %tmp15204 = getelementptr inbounds float* %tmp15203, i64 1
+  %tmp15205 = getelementptr inbounds float* %tmp15204, i64 1
+  %tmp15206 = getelementptr inbounds float* %tmp15205, i64 1
+  %tmp15207 = getelementptr inbounds float* %tmp15206, i64 1
+  %tmp15208 = getelementptr inbounds float* %tmp15207, i64 1
+  %tmp15209 = getelementptr inbounds float* %tmp15208, i64 1
+  %tmp15210 = getelementptr inbounds float* %tmp15209, i64 1
+  %tmp15211 = getelementptr inbounds float* %tmp15210, i64 1
+  %tmp15212 = getelementptr inbounds float* %tmp15211, i64 1
+  %tmp15213 = getelementptr inbounds float* %tmp15212, i64 1
+  %tmp15214 = getelementptr inbounds float* %tmp15213, i64 1
+  %tmp15215 = getelementptr inbounds float* %tmp15214, i64 1
+  %tmp15216 = getelementptr inbounds float* %tmp15215, i64 1
+  %tmp15217 = getelementptr inbounds float* %tmp15216, i64 1
+  %tmp15218 = getelementptr inbounds float* %tmp15217, i64 1
+  %tmp15219 = getelementptr inbounds float* %tmp15218, i64 1
+  %tmp15220 = getelementptr inbounds float* %tmp15219, i64 1
+  %tmp15221 = getelementptr inbounds float* %tmp15220, i64 1
+  %tmp15222 = getelementptr inbounds float* %tmp15221, i64 1
+  %tmp15223 = getelementptr inbounds float* %tmp15222, i64 1
+  %tmp15224 = getelementptr inbounds float* %tmp15223, i64 1
+  %tmp15225 = getelementptr inbounds float* %tmp15224, i64 1
+  %tmp15226 = getelementptr inbounds float* %tmp15225, i64 1
+  %tmp15227 = getelementptr inbounds float* %tmp15226, i64 1
+  %tmp15228 = getelementptr inbounds float* %tmp15227, i64 1
+  %tmp15229 = getelementptr inbounds float* %tmp15228, i64 1
+  %tmp15230 = getelementptr inbounds float* %tmp15229, i64 1
+  %tmp15231 = getelementptr inbounds float* %tmp15230, i64 1
+  %tmp15232 = getelementptr inbounds float* %tmp15231, i64 1
+  %tmp15233 = getelementptr inbounds float* %tmp15232, i64 1
+  %tmp15234 = getelementptr inbounds float* %tmp15233, i64 1
+  %tmp15235 = getelementptr inbounds float* %tmp15234, i64 1
+  %tmp15236 = getelementptr inbounds float* %tmp15235, i64 1
+  %tmp15237 = getelementptr inbounds float* %tmp15236, i64 1
+  %tmp15238 = getelementptr inbounds float* %tmp15237, i64 1
+  %tmp15239 = getelementptr inbounds float* %tmp15238, i64 1
+  %tmp15240 = getelementptr inbounds float* %tmp15239, i64 1
+  %tmp15241 = getelementptr inbounds float* %tmp15240, i64 1
+  %tmp15242 = getelementptr inbounds float* %tmp15241, i64 1
+  %tmp15243 = getelementptr inbounds float* %tmp15242, i64 1
+  %tmp15244 = getelementptr inbounds float* %tmp15243, i64 1
+  %tmp15245 = getelementptr inbounds float* %tmp15244, i64 1
+  %tmp15246 = getelementptr inbounds float* %tmp15245, i64 1
+  %tmp15247 = getelementptr inbounds float* %tmp15246, i64 1
+  %tmp15248 = getelementptr inbounds float* %tmp15247, i64 1
+  %tmp15249 = getelementptr inbounds float* %tmp15248, i64 1
+  %tmp15250 = getelementptr inbounds float* %tmp15249, i64 1
+  %tmp15251 = getelementptr inbounds float* %tmp15250, i64 1
+  %tmp15252 = getelementptr inbounds float* %tmp15251, i64 1
+  %tmp15253 = getelementptr inbounds float* %tmp15252, i64 1
+  %tmp15254 = getelementptr inbounds float* %tmp15253, i64 1
+  %tmp15255 = getelementptr inbounds float* %tmp15254, i64 1
+  %tmp15256 = getelementptr inbounds float* %tmp15255, i64 1
+  %tmp15257 = getelementptr inbounds float* %tmp15256, i64 1
+  %tmp15258 = getelementptr inbounds float* %tmp15257, i64 1
+  %tmp15259 = getelementptr inbounds float* %tmp15258, i64 1
+  %tmp15260 = getelementptr inbounds float* %tmp15259, i64 1
+  %tmp15261 = getelementptr inbounds float* %tmp15260, i64 1
+  %tmp15262 = getelementptr inbounds float* %tmp15261, i64 1
+  %tmp15263 = getelementptr inbounds float* %tmp15262, i64 1
+  %tmp15264 = getelementptr inbounds float* %tmp15263, i64 1
+  %tmp15265 = getelementptr inbounds float* %tmp15264, i64 1
+  %tmp15266 = getelementptr inbounds float* %tmp15265, i64 1
+  %tmp15267 = getelementptr inbounds float* %tmp15266, i64 1
+  %tmp15268 = getelementptr inbounds float* %tmp15267, i64 1
+  %tmp15269 = getelementptr inbounds float* %tmp15268, i64 1
+  %tmp15270 = getelementptr inbounds float* %tmp15269, i64 1
+  %tmp15271 = getelementptr inbounds float* %tmp15270, i64 1
+  %tmp15272 = getelementptr inbounds float* %tmp15271, i64 1
+  %tmp15273 = getelementptr inbounds float* %tmp15272, i64 1
+  %tmp15274 = getelementptr inbounds float* %tmp15273, i64 1
+  %tmp15275 = getelementptr inbounds float* %tmp15274, i64 1
+  %tmp15276 = getelementptr inbounds float* %tmp15275, i64 1
+  %tmp15277 = getelementptr inbounds float* %tmp15276, i64 1
+  %tmp15278 = getelementptr inbounds float* %tmp15277, i64 1
+  %tmp15279 = getelementptr inbounds float* %tmp15278, i64 1
+  %tmp15280 = getelementptr inbounds float* %tmp15279, i64 1
+  %tmp15281 = getelementptr inbounds float* %tmp15280, i64 1
+  %tmp15282 = getelementptr inbounds float* %tmp15281, i64 1
+  %tmp15283 = getelementptr inbounds float* %tmp15282, i64 1
+  %tmp15284 = getelementptr inbounds float* %tmp15283, i64 1
+  %tmp15285 = getelementptr inbounds float* %tmp15284, i64 1
+  %tmp15286 = getelementptr inbounds float* %tmp15285, i64 1
+  %tmp15287 = getelementptr inbounds float* %tmp15286, i64 1
+  %tmp15288 = getelementptr inbounds float* %tmp15287, i64 1
+  %tmp15289 = getelementptr inbounds float* %tmp15288, i64 1
+  %tmp15290 = getelementptr inbounds float* %tmp15289, i64 1
+  %tmp15291 = getelementptr inbounds float* %tmp15290, i64 1
+  %tmp15292 = getelementptr inbounds float* %tmp15291, i64 1
+  %tmp15293 = getelementptr inbounds float* %tmp15292, i64 1
+  %tmp15294 = getelementptr inbounds float* %tmp15293, i64 1
+  %tmp15295 = getelementptr inbounds float* %tmp15294, i64 1
+  %tmp15296 = getelementptr inbounds float* %tmp15295, i64 1
+  %tmp15297 = getelementptr inbounds float* %tmp15296, i64 1
+  %tmp15298 = getelementptr inbounds float* %tmp15297, i64 1
+  %tmp15299 = getelementptr inbounds float* %tmp15298, i64 1
+  %tmp15300 = getelementptr inbounds float* %tmp15299, i64 1
+  %tmp15301 = getelementptr inbounds float* %tmp15300, i64 1
+  %tmp15302 = getelementptr inbounds float* %tmp15301, i64 1
+  %tmp15303 = getelementptr inbounds float* %tmp15302, i64 1
+  %tmp15304 = getelementptr inbounds float* %tmp15303, i64 1
+  %tmp15305 = getelementptr inbounds float* %tmp15304, i64 1
+  %tmp15306 = getelementptr inbounds float* %tmp15305, i64 1
+  %tmp15307 = getelementptr inbounds float* %tmp15306, i64 1
+  %tmp15308 = getelementptr inbounds float* %tmp15307, i64 1
+  %tmp15309 = getelementptr inbounds float* %tmp15308, i64 1
+  %tmp15310 = getelementptr inbounds float* %tmp15309, i64 1
+  %tmp15311 = getelementptr inbounds float* %tmp15310, i64 1
+  %tmp15312 = getelementptr inbounds float* %tmp15311, i64 1
+  %tmp15313 = getelementptr inbounds float* %tmp15312, i64 1
+  %tmp15314 = getelementptr inbounds float* %tmp15313, i64 1
+  %tmp15315 = getelementptr inbounds float* %tmp15314, i64 1
+  %tmp15316 = getelementptr inbounds float* %tmp15315, i64 1
+  %tmp15317 = getelementptr inbounds float* %tmp15316, i64 1
+  %tmp15318 = getelementptr inbounds float* %tmp15317, i64 1
+  %tmp15319 = getelementptr inbounds float* %tmp15318, i64 1
+  %tmp15320 = getelementptr inbounds float* %tmp15319, i64 1
+  %tmp15321 = getelementptr inbounds float* %tmp15320, i64 1
+  %tmp15322 = getelementptr inbounds float* %tmp15321, i64 1
+  %tmp15323 = getelementptr inbounds float* %tmp15322, i64 1
+  %tmp15324 = getelementptr inbounds float* %tmp15323, i64 1
+  %tmp15325 = getelementptr inbounds float* %tmp15324, i64 1
+  %tmp15326 = getelementptr inbounds float* %tmp15325, i64 1
+  %tmp15327 = getelementptr inbounds float* %tmp15326, i64 1
+  %tmp15328 = getelementptr inbounds float* %tmp15327, i64 1
+  %tmp15329 = getelementptr inbounds float* %tmp15328, i64 1
+  %tmp15330 = getelementptr inbounds float* %tmp15329, i64 1
+  %tmp15331 = getelementptr inbounds float* %tmp15330, i64 1
+  %tmp15332 = getelementptr inbounds float* %tmp15331, i64 1
+  %tmp15333 = getelementptr inbounds float* %tmp15332, i64 1
+  %tmp15334 = getelementptr inbounds float* %tmp15333, i64 1
+  %tmp15335 = getelementptr inbounds float* %tmp15334, i64 1
+  %tmp15336 = getelementptr inbounds float* %tmp15335, i64 1
+  %tmp15337 = getelementptr inbounds float* %tmp15336, i64 1
+  %tmp15338 = getelementptr inbounds float* %tmp15337, i64 1
+  %tmp15339 = getelementptr inbounds float* %tmp15338, i64 1
+  %tmp15340 = getelementptr inbounds float* %tmp15339, i64 1
+  %tmp15341 = getelementptr inbounds float* %tmp15340, i64 1
+  %tmp15342 = getelementptr inbounds float* %tmp15341, i64 1
+  %tmp15343 = getelementptr inbounds float* %tmp15342, i64 1
+  %tmp15344 = getelementptr inbounds float* %tmp15343, i64 1
+  %tmp15345 = getelementptr inbounds float* %tmp15344, i64 1
+  %tmp15346 = getelementptr inbounds float* %tmp15345, i64 1
+  %tmp15347 = getelementptr inbounds float* %tmp15346, i64 1
+  %tmp15348 = getelementptr inbounds float* %tmp15347, i64 1
+  %tmp15349 = getelementptr inbounds float* %tmp15348, i64 1
+  %tmp15350 = getelementptr inbounds float* %tmp15349, i64 1
+  %tmp15351 = getelementptr inbounds float* %tmp15350, i64 1
+  %tmp15352 = getelementptr inbounds float* %tmp15351, i64 1
+  %tmp15353 = getelementptr inbounds float* %tmp15352, i64 1
+  %tmp15354 = getelementptr inbounds float* %tmp15353, i64 1
+  %tmp15355 = getelementptr inbounds float* %tmp15354, i64 1
+  %tmp15356 = getelementptr inbounds float* %tmp15355, i64 1
+  %tmp15357 = getelementptr inbounds float* %tmp15356, i64 1
+  %tmp15358 = getelementptr inbounds float* %tmp15357, i64 1
+  %tmp15359 = getelementptr inbounds float* %tmp15358, i64 1
+  %tmp15360 = getelementptr inbounds float* %tmp15359, i64 1
+  %tmp15361 = getelementptr inbounds float* %tmp15360, i64 1
+  %tmp15362 = getelementptr inbounds float* %tmp15361, i64 1
+  %tmp15363 = getelementptr inbounds float* %tmp15362, i64 1
+  %tmp15364 = getelementptr inbounds float* %tmp15363, i64 1
+  %tmp15365 = getelementptr inbounds float* %tmp15364, i64 1
+  %tmp15366 = getelementptr inbounds float* %tmp15365, i64 1
+  %tmp15367 = getelementptr inbounds float* %tmp15366, i64 1
+  %tmp15368 = getelementptr inbounds float* %tmp15367, i64 1
+  %tmp15369 = getelementptr inbounds float* %tmp15368, i64 1
+  %tmp15370 = getelementptr inbounds float* %tmp15369, i64 1
+  %tmp15371 = getelementptr inbounds float* %tmp15370, i64 1
+  %tmp15372 = getelementptr inbounds float* %tmp15371, i64 1
+  %tmp15373 = getelementptr inbounds float* %tmp15372, i64 1
+  %tmp15374 = getelementptr inbounds float* %tmp15373, i64 1
+  %tmp15375 = getelementptr inbounds float* %tmp15374, i64 1
+  %tmp15376 = getelementptr inbounds float* %tmp15375, i64 1
+  %tmp15377 = getelementptr inbounds float* %tmp15376, i64 1
+  %tmp15378 = getelementptr inbounds float* %tmp15377, i64 1
+  %tmp15379 = getelementptr inbounds float* %tmp15378, i64 1
+  %tmp15380 = getelementptr inbounds float* %tmp15379, i64 1
+  %tmp15381 = getelementptr inbounds float* %tmp15380, i64 1
+  %tmp15382 = getelementptr inbounds float* %tmp15381, i64 1
+  %tmp15383 = getelementptr inbounds float* %tmp15382, i64 1
+  %tmp15384 = getelementptr inbounds float* %tmp15383, i64 1
+  %tmp15385 = getelementptr inbounds float* %tmp15384, i64 1
+  %tmp15386 = getelementptr inbounds float* %tmp15385, i64 1
+  %tmp15387 = getelementptr inbounds float* %tmp15386, i64 1
+  %tmp15388 = getelementptr inbounds float* %tmp15387, i64 1
+  %tmp15389 = getelementptr inbounds float* %tmp15388, i64 1
+  %tmp15390 = getelementptr inbounds float* %tmp15389, i64 1
+  %tmp15391 = getelementptr inbounds float* %tmp15390, i64 1
+  %tmp15392 = getelementptr inbounds float* %tmp15391, i64 1
+  %tmp15393 = getelementptr inbounds float* %tmp15392, i64 1
+  %tmp15394 = getelementptr inbounds float* %tmp15393, i64 1
+  %tmp15395 = getelementptr inbounds float* %tmp15394, i64 1
+  %tmp15396 = getelementptr inbounds float* %tmp15395, i64 1
+  %tmp15397 = getelementptr inbounds float* %tmp15396, i64 1
+  %tmp15398 = getelementptr inbounds float* %tmp15397, i64 1
+  %tmp15399 = getelementptr inbounds float* %tmp15398, i64 1
+  %tmp15400 = getelementptr inbounds float* %tmp15399, i64 1
+  %tmp15401 = getelementptr inbounds float* %tmp15400, i64 1
+  %tmp15402 = getelementptr inbounds float* %tmp15401, i64 1
+  %tmp15403 = getelementptr inbounds float* %tmp15402, i64 1
+  %tmp15404 = getelementptr inbounds float* %tmp15403, i64 1
+  %tmp15405 = getelementptr inbounds float* %tmp15404, i64 1
+  %tmp15406 = getelementptr inbounds float* %tmp15405, i64 1
+  %tmp15407 = getelementptr inbounds float* %tmp15406, i64 1
+  %tmp15408 = getelementptr inbounds float* %tmp15407, i64 1
+  %tmp15409 = getelementptr inbounds float* %tmp15408, i64 1
+  %tmp15410 = getelementptr inbounds float* %tmp15409, i64 1
+  %tmp15411 = getelementptr inbounds float* %tmp15410, i64 1
+  %tmp15412 = getelementptr inbounds float* %tmp15411, i64 1
+  %tmp15413 = getelementptr inbounds float* %tmp15412, i64 1
+  %tmp15414 = getelementptr inbounds float* %tmp15413, i64 1
+  %tmp15415 = getelementptr inbounds float* %tmp15414, i64 1
+  %tmp15416 = getelementptr inbounds float* %tmp15415, i64 1
+  %tmp15417 = getelementptr inbounds float* %tmp15416, i64 1
+  %tmp15418 = getelementptr inbounds float* %tmp15417, i64 1
+  %tmp15419 = getelementptr inbounds float* %tmp15418, i64 1
+  %tmp15420 = getelementptr inbounds float* %tmp15419, i64 1
+  %tmp15421 = getelementptr inbounds float* %tmp15420, i64 1
+  %tmp15422 = getelementptr inbounds float* %tmp15421, i64 1
+  %tmp15423 = getelementptr inbounds float* %tmp15422, i64 1
+  %tmp15424 = getelementptr inbounds float* %tmp15423, i64 1
+  %tmp15425 = getelementptr inbounds float* %tmp15424, i64 1
+  %tmp15426 = getelementptr inbounds float* %tmp15425, i64 1
+  %tmp15427 = getelementptr inbounds float* %tmp15426, i64 1
+  %tmp15428 = getelementptr inbounds float* %tmp15427, i64 1
+  %tmp15429 = getelementptr inbounds float* %tmp15428, i64 1
+  %tmp15430 = getelementptr inbounds float* %tmp15429, i64 1
+  %tmp15431 = getelementptr inbounds float* %tmp15430, i64 1
+  %tmp15432 = getelementptr inbounds float* %tmp15431, i64 1
+  %tmp15433 = getelementptr inbounds float* %tmp15432, i64 1
+  %tmp15434 = getelementptr inbounds float* %tmp15433, i64 1
+  %tmp15435 = getelementptr inbounds float* %tmp15434, i64 1
+  %tmp15436 = getelementptr inbounds float* %tmp15435, i64 1
+  %tmp15437 = getelementptr inbounds float* %tmp15436, i64 1
+  %tmp15438 = getelementptr inbounds float* %tmp15437, i64 1
+  %tmp15439 = getelementptr inbounds float* %tmp15438, i64 1
+  %tmp15440 = getelementptr inbounds float* %tmp15439, i64 1
+  %tmp15441 = getelementptr inbounds float* %tmp15440, i64 1
+  %tmp15442 = getelementptr inbounds float* %tmp15441, i64 1
+  %tmp15443 = getelementptr inbounds float* %tmp15442, i64 1
+  %tmp15444 = getelementptr inbounds float* %tmp15443, i64 1
+  %tmp15445 = getelementptr inbounds float* %tmp15444, i64 1
+  %tmp15446 = getelementptr inbounds float* %tmp15445, i64 1
+  %tmp15447 = getelementptr inbounds float* %tmp15446, i64 1
+  %tmp15448 = getelementptr inbounds float* %tmp15447, i64 1
+  %tmp15449 = getelementptr inbounds float* %tmp15448, i64 1
+  %tmp15450 = getelementptr inbounds float* %tmp15449, i64 1
+  %tmp15451 = getelementptr inbounds float* %tmp15450, i64 1
+  %tmp15452 = getelementptr inbounds float* %tmp15451, i64 1
+  %tmp15453 = getelementptr inbounds float* %tmp15452, i64 1
+  %tmp15454 = getelementptr inbounds float* %tmp15453, i64 1
+  %tmp15455 = getelementptr inbounds float* %tmp15454, i64 1
+  %tmp15456 = getelementptr inbounds float* %tmp15455, i64 1
+  %tmp15457 = getelementptr inbounds float* %tmp15456, i64 1
+  %tmp15458 = getelementptr inbounds float* %tmp15457, i64 1
+  %tmp15459 = getelementptr inbounds float* %tmp15458, i64 1
+  %tmp15460 = getelementptr inbounds float* %tmp15459, i64 1
+  %tmp15461 = getelementptr inbounds float* %tmp15460, i64 1
+  %tmp15462 = getelementptr inbounds float* %tmp15461, i64 1
+  %tmp15463 = getelementptr inbounds float* %tmp15462, i64 1
+  %tmp15464 = getelementptr inbounds float* %tmp15463, i64 1
+  %tmp15465 = getelementptr inbounds float* %tmp15464, i64 1
+  %tmp15466 = getelementptr inbounds float* %tmp15465, i64 1
+  %tmp15467 = getelementptr inbounds float* %tmp15466, i64 1
+  %tmp15468 = getelementptr inbounds float* %tmp15467, i64 1
+  %tmp15469 = getelementptr inbounds float* %tmp15468, i64 1
+  %tmp15470 = getelementptr inbounds float* %tmp15469, i64 1
+  %tmp15471 = getelementptr inbounds float* %tmp15470, i64 1
+  %tmp15472 = getelementptr inbounds float* %tmp15471, i64 1
+  %tmp15473 = getelementptr inbounds float* %tmp15472, i64 1
+  %tmp15474 = getelementptr inbounds float* %tmp15473, i64 1
+  %tmp15475 = getelementptr inbounds float* %tmp15474, i64 1
+  %tmp15476 = getelementptr inbounds float* %tmp15475, i64 1
+  %tmp15477 = getelementptr inbounds float* %tmp15476, i64 1
+  %tmp15478 = getelementptr inbounds float* %tmp15477, i64 1
+  %tmp15479 = getelementptr inbounds float* %tmp15478, i64 1
+  %tmp15480 = getelementptr inbounds float* %tmp15479, i64 1
+  %tmp15481 = getelementptr inbounds float* %tmp15480, i64 1
+  %tmp15482 = getelementptr inbounds float* %tmp15481, i64 1
+  %tmp15483 = getelementptr inbounds float* %tmp15482, i64 1
+  %tmp15484 = getelementptr inbounds float* %tmp15483, i64 1
+  %tmp15485 = getelementptr inbounds float* %tmp15484, i64 1
+  %tmp15486 = getelementptr inbounds float* %tmp15485, i64 1
+  %tmp15487 = getelementptr inbounds float* %tmp15486, i64 1
+  %tmp15488 = getelementptr inbounds float* %tmp15487, i64 1
+  %tmp15489 = getelementptr inbounds float* %tmp15488, i64 1
+  %tmp15490 = getelementptr inbounds float* %tmp15489, i64 1
+  %tmp15491 = getelementptr inbounds float* %tmp15490, i64 1
+  %tmp15492 = getelementptr inbounds float* %tmp15491, i64 1
+  %tmp15493 = getelementptr inbounds float* %tmp15492, i64 1
+  %tmp15494 = getelementptr inbounds float* %tmp15493, i64 1
+  %tmp15495 = getelementptr inbounds float* %tmp15494, i64 1
+  %tmp15496 = getelementptr inbounds float* %tmp15495, i64 1
+  %tmp15497 = getelementptr inbounds float* %tmp15496, i64 1
+  %tmp15498 = getelementptr inbounds float* %tmp15497, i64 1
+  %tmp15499 = getelementptr inbounds float* %tmp15498, i64 1
+  %tmp15500 = getelementptr inbounds float* %tmp15499, i64 1
+  %tmp15501 = getelementptr inbounds float* %tmp15500, i64 1
+  %tmp15502 = getelementptr inbounds float* %tmp15501, i64 1
+  %tmp15503 = getelementptr inbounds float* %tmp15502, i64 1
+  %tmp15504 = getelementptr inbounds float* %tmp15503, i64 1
+  %tmp15505 = getelementptr inbounds float* %tmp15504, i64 1
+  %tmp15506 = getelementptr inbounds float* %tmp15505, i64 1
+  %tmp15507 = getelementptr inbounds float* %tmp15506, i64 1
+  %tmp15508 = getelementptr inbounds float* %tmp15507, i64 1
+  %tmp15509 = getelementptr inbounds float* %tmp15508, i64 1
+  %tmp15510 = getelementptr inbounds float* %tmp15509, i64 1
+  %tmp15511 = getelementptr inbounds float* %tmp15510, i64 1
+  %tmp15512 = getelementptr inbounds float* %tmp15511, i64 1
+  %tmp15513 = getelementptr inbounds float* %tmp15512, i64 1
+  %tmp15514 = getelementptr inbounds float* %tmp15513, i64 1
+  %tmp15515 = getelementptr inbounds float* %tmp15514, i64 1
+  %tmp15516 = getelementptr inbounds float* %tmp15515, i64 1
+  %tmp15517 = getelementptr inbounds float* %tmp15516, i64 1
+  %tmp15518 = getelementptr inbounds float* %tmp15517, i64 1
+  %tmp15519 = getelementptr inbounds float* %tmp15518, i64 1
+  %tmp15520 = getelementptr inbounds float* %tmp15519, i64 1
+  %tmp15521 = getelementptr inbounds float* %tmp15520, i64 1
+  %tmp15522 = getelementptr inbounds float* %tmp15521, i64 1
+  %tmp15523 = getelementptr inbounds float* %tmp15522, i64 1
+  %tmp15524 = getelementptr inbounds float* %tmp15523, i64 1
+  %tmp15525 = getelementptr inbounds float* %tmp15524, i64 1
+  %tmp15526 = getelementptr inbounds float* %tmp15525, i64 1
+  %tmp15527 = getelementptr inbounds float* %tmp15526, i64 1
+  %tmp15528 = getelementptr inbounds float* %tmp15527, i64 1
+  %tmp15529 = getelementptr inbounds float* %tmp15528, i64 1
+  %tmp15530 = getelementptr inbounds float* %tmp15529, i64 1
+  %tmp15531 = getelementptr inbounds float* %tmp15530, i64 1
+  %tmp15532 = getelementptr inbounds float* %tmp15531, i64 1
+  %tmp15533 = getelementptr inbounds float* %tmp15532, i64 1
+  %tmp15534 = getelementptr inbounds float* %tmp15533, i64 1
+  %tmp15535 = getelementptr inbounds float* %tmp15534, i64 1
+  %tmp15536 = getelementptr inbounds float* %tmp15535, i64 1
+  %tmp15537 = getelementptr inbounds float* %tmp15536, i64 1
+  %tmp15538 = getelementptr inbounds float* %tmp15537, i64 1
+  %tmp15539 = getelementptr inbounds float* %tmp15538, i64 1
+  %tmp15540 = getelementptr inbounds float* %tmp15539, i64 1
+  %tmp15541 = getelementptr inbounds float* %tmp15540, i64 1
+  %tmp15542 = getelementptr inbounds float* %tmp15541, i64 1
+  %tmp15543 = getelementptr inbounds float* %tmp15542, i64 1
+  %tmp15544 = getelementptr inbounds float* %tmp15543, i64 1
+  %tmp15545 = getelementptr inbounds float* %tmp15544, i64 1
+  %tmp15546 = getelementptr inbounds float* %tmp15545, i64 1
+  %tmp15547 = getelementptr inbounds float* %tmp15546, i64 1
+  %tmp15548 = getelementptr inbounds float* %tmp15547, i64 1
+  %tmp15549 = getelementptr inbounds float* %tmp15548, i64 1
+  %tmp15550 = getelementptr inbounds float* %tmp15549, i64 1
+  %tmp15551 = getelementptr inbounds float* %tmp15550, i64 1
+  %tmp15552 = getelementptr inbounds float* %tmp15551, i64 1
+  %tmp15553 = getelementptr inbounds float* %tmp15552, i64 1
+  %tmp15554 = getelementptr inbounds float* %tmp15553, i64 1
+  %tmp15555 = getelementptr inbounds float* %tmp15554, i64 1
+  %tmp15556 = getelementptr inbounds float* %tmp15555, i64 1
+  %tmp15557 = getelementptr inbounds float* %tmp15556, i64 1
+  %tmp15558 = getelementptr inbounds float* %tmp15557, i64 1
+  %tmp15559 = getelementptr inbounds float* %tmp15558, i64 1
+  %tmp15560 = getelementptr inbounds float* %tmp15559, i64 1
+  %tmp15561 = getelementptr inbounds float* %tmp15560, i64 1
+  %tmp15562 = getelementptr inbounds float* %tmp15561, i64 1
+  %tmp15563 = getelementptr inbounds float* %tmp15562, i64 1
+  %tmp15564 = getelementptr inbounds float* %tmp15563, i64 1
+  %tmp15565 = getelementptr inbounds float* %tmp15564, i64 1
+  %tmp15566 = getelementptr inbounds float* %tmp15565, i64 1
+  %tmp15567 = getelementptr inbounds float* %tmp15566, i64 1
+  %tmp15568 = getelementptr inbounds float* %tmp15567, i64 1
+  %tmp15569 = getelementptr inbounds float* %tmp15568, i64 1
+  %tmp15570 = getelementptr inbounds float* %tmp15569, i64 1
+  %tmp15571 = getelementptr inbounds float* %tmp15570, i64 1
+  %tmp15572 = getelementptr inbounds float* %tmp15571, i64 1
+  %tmp15573 = getelementptr inbounds float* %tmp15572, i64 1
+  %tmp15574 = getelementptr inbounds float* %tmp15573, i64 1
+  %tmp15575 = getelementptr inbounds float* %tmp15574, i64 1
+  %tmp15576 = getelementptr inbounds float* %tmp15575, i64 1
+  %tmp15577 = getelementptr inbounds float* %tmp15576, i64 1
+  %tmp15578 = getelementptr inbounds float* %tmp15577, i64 1
+  %tmp15579 = getelementptr inbounds float* %tmp15578, i64 1
+  %tmp15580 = getelementptr inbounds float* %tmp15579, i64 1
+  %tmp15581 = getelementptr inbounds float* %tmp15580, i64 1
+  %tmp15582 = getelementptr inbounds float* %tmp15581, i64 1
+  %tmp15583 = getelementptr inbounds float* %tmp15582, i64 1
+  %tmp15584 = getelementptr inbounds float* %tmp15583, i64 1
+  %tmp15585 = getelementptr inbounds float* %tmp15584, i64 1
+  %tmp15586 = getelementptr inbounds float* %tmp15585, i64 1
+  %tmp15587 = getelementptr inbounds float* %tmp15586, i64 1
+  %tmp15588 = getelementptr inbounds float* %tmp15587, i64 1
+  %tmp15589 = getelementptr inbounds float* %tmp15588, i64 1
+  %tmp15590 = getelementptr inbounds float* %tmp15589, i64 1
+  %tmp15591 = getelementptr inbounds float* %tmp15590, i64 1
+  %tmp15592 = getelementptr inbounds float* %tmp15591, i64 1
+  %tmp15593 = getelementptr inbounds float* %tmp15592, i64 1
+  %tmp15594 = getelementptr inbounds float* %tmp15593, i64 1
+  %tmp15595 = getelementptr inbounds float* %tmp15594, i64 1
+  %tmp15596 = getelementptr inbounds float* %tmp15595, i64 1
+  %tmp15597 = getelementptr inbounds float* %tmp15596, i64 1
+  %tmp15598 = getelementptr inbounds float* %tmp15597, i64 1
+  %tmp15599 = getelementptr inbounds float* %tmp15598, i64 1
+  %tmp15600 = getelementptr inbounds float* %tmp15599, i64 1
+  %tmp15601 = getelementptr inbounds float* %tmp15600, i64 1
+  %tmp15602 = getelementptr inbounds float* %tmp15601, i64 1
+  %tmp15603 = getelementptr inbounds float* %tmp15602, i64 1
+  %tmp15604 = getelementptr inbounds float* %tmp15603, i64 1
+  %tmp15605 = getelementptr inbounds float* %tmp15604, i64 1
+  %tmp15606 = getelementptr inbounds float* %tmp15605, i64 1
+  %tmp15607 = getelementptr inbounds float* %tmp15606, i64 1
+  %tmp15608 = getelementptr inbounds float* %tmp15607, i64 1
+  %tmp15609 = getelementptr inbounds float* %tmp15608, i64 1
+  %tmp15610 = getelementptr inbounds float* %tmp15609, i64 1
+  %tmp15611 = getelementptr inbounds float* %tmp15610, i64 1
+  %tmp15612 = getelementptr inbounds float* %tmp15611, i64 1
+  %tmp15613 = getelementptr inbounds float* %tmp15612, i64 1
+  %tmp15614 = getelementptr inbounds float* %tmp15613, i64 1
+  %tmp15615 = getelementptr inbounds float* %tmp15614, i64 1
+  %tmp15616 = getelementptr inbounds float* %tmp15615, i64 1
+  %tmp15617 = getelementptr inbounds float* %tmp15616, i64 1
+  %tmp15618 = getelementptr inbounds float* %tmp15617, i64 1
+  %tmp15619 = getelementptr inbounds float* %tmp15618, i64 1
+  %tmp15620 = getelementptr inbounds float* %tmp15619, i64 1
+  %tmp15621 = getelementptr inbounds float* %tmp15620, i64 1
+  %tmp15622 = getelementptr inbounds float* %tmp15621, i64 1
+  %tmp15623 = getelementptr inbounds float* %tmp15622, i64 1
+  %tmp15624 = getelementptr inbounds float* %tmp15623, i64 1
+  %tmp15625 = getelementptr inbounds float* %tmp15624, i64 1
+  %tmp15626 = getelementptr inbounds float* %tmp15625, i64 1
+  %tmp15627 = getelementptr inbounds float* %tmp15626, i64 1
+  %tmp15628 = getelementptr inbounds float* %tmp15627, i64 1
+  %tmp15629 = getelementptr inbounds float* %tmp15628, i64 1
+  %tmp15630 = getelementptr inbounds float* %tmp15629, i64 1
+  %tmp15631 = getelementptr inbounds float* %tmp15630, i64 1
+  %tmp15632 = getelementptr inbounds float* %tmp15631, i64 1
+  %tmp15633 = getelementptr inbounds float* %tmp15632, i64 1
+  %tmp15634 = getelementptr inbounds float* %tmp15633, i64 1
+  %tmp15635 = getelementptr inbounds float* %tmp15634, i64 1
+  %tmp15636 = getelementptr inbounds float* %tmp15635, i64 1
+  %tmp15637 = getelementptr inbounds float* %tmp15636, i64 1
+  %tmp15638 = getelementptr inbounds float* %tmp15637, i64 1
+  %tmp15639 = getelementptr inbounds float* %tmp15638, i64 1
+  %tmp15640 = getelementptr inbounds float* %tmp15639, i64 1
+  %tmp15641 = getelementptr inbounds float* %tmp15640, i64 1
+  %tmp15642 = getelementptr inbounds float* %tmp15641, i64 1
+  %tmp15643 = getelementptr inbounds float* %tmp15642, i64 1
+  %tmp15644 = getelementptr inbounds float* %tmp15643, i64 1
+  %tmp15645 = getelementptr inbounds float* %tmp15644, i64 1
+  %tmp15646 = getelementptr inbounds float* %tmp15645, i64 1
+  %tmp15647 = getelementptr inbounds float* %tmp15646, i64 1
+  %tmp15648 = getelementptr inbounds float* %tmp15647, i64 1
+  %tmp15649 = getelementptr inbounds float* %tmp15648, i64 1
+  %tmp15650 = getelementptr inbounds float* %tmp15649, i64 1
+  %tmp15651 = getelementptr inbounds float* %tmp15650, i64 1
+  %tmp15652 = getelementptr inbounds float* %tmp15651, i64 1
+  %tmp15653 = getelementptr inbounds float* %tmp15652, i64 1
+  %tmp15654 = getelementptr inbounds float* %tmp15653, i64 1
+  %tmp15655 = getelementptr inbounds float* %tmp15654, i64 1
+  %tmp15656 = getelementptr inbounds float* %tmp15655, i64 1
+  %tmp15657 = getelementptr inbounds float* %tmp15656, i64 1
+  %tmp15658 = getelementptr inbounds float* %tmp15657, i64 1
+  %tmp15659 = getelementptr inbounds float* %tmp15658, i64 1
+  %tmp15660 = getelementptr inbounds float* %tmp15659, i64 1
+  %tmp15661 = getelementptr inbounds float* %tmp15660, i64 1
+  %tmp15662 = getelementptr inbounds float* %tmp15661, i64 1
+  %tmp15663 = getelementptr inbounds float* %tmp15662, i64 1
+  %tmp15664 = getelementptr inbounds float* %tmp15663, i64 1
+  %tmp15665 = getelementptr inbounds float* %tmp15664, i64 1
+  %tmp15666 = getelementptr inbounds float* %tmp15665, i64 1
+  %tmp15667 = getelementptr inbounds float* %tmp15666, i64 1
+  %tmp15668 = getelementptr inbounds float* %tmp15667, i64 1
+  %tmp15669 = getelementptr inbounds float* %tmp15668, i64 1
+  %tmp15670 = getelementptr inbounds float* %tmp15669, i64 1
+  %tmp15671 = getelementptr inbounds float* %tmp15670, i64 1
+  %tmp15672 = getelementptr inbounds float* %tmp15671, i64 1
+  %tmp15673 = getelementptr inbounds float* %tmp15672, i64 1
+  %tmp15674 = getelementptr inbounds float* %tmp15673, i64 1
+  %tmp15675 = getelementptr inbounds float* %tmp15674, i64 1
+  %tmp15676 = getelementptr inbounds float* %tmp15675, i64 1
+  %tmp15677 = getelementptr inbounds float* %tmp15676, i64 1
+  %tmp15678 = getelementptr inbounds float* %tmp15677, i64 1
+  %tmp15679 = getelementptr inbounds float* %tmp15678, i64 1
+  %tmp15680 = getelementptr inbounds float* %tmp15679, i64 1
+  %tmp15681 = getelementptr inbounds float* %tmp15680, i64 1
+  %tmp15682 = getelementptr inbounds float* %tmp15681, i64 1
+  %tmp15683 = getelementptr inbounds float* %tmp15682, i64 1
+  %tmp15684 = getelementptr inbounds float* %tmp15683, i64 1
+  %tmp15685 = getelementptr inbounds float* %tmp15684, i64 1
+  %tmp15686 = getelementptr inbounds float* %tmp15685, i64 1
+  %tmp15687 = getelementptr inbounds float* %tmp15686, i64 1
+  %tmp15688 = getelementptr inbounds float* %tmp15687, i64 1
+  %tmp15689 = getelementptr inbounds float* %tmp15688, i64 1
+  %tmp15690 = getelementptr inbounds float* %tmp15689, i64 1
+  %tmp15691 = getelementptr inbounds float* %tmp15690, i64 1
+  %tmp15692 = getelementptr inbounds float* %tmp15691, i64 1
+  %tmp15693 = getelementptr inbounds float* %tmp15692, i64 1
+  %tmp15694 = getelementptr inbounds float* %tmp15693, i64 1
+  %tmp15695 = getelementptr inbounds float* %tmp15694, i64 1
+  %tmp15696 = getelementptr inbounds float* %tmp15695, i64 1
+  %tmp15697 = getelementptr inbounds float* %tmp15696, i64 1
+  %tmp15698 = getelementptr inbounds float* %tmp15697, i64 1
+  %tmp15699 = getelementptr inbounds float* %tmp15698, i64 1
+  %tmp15700 = getelementptr inbounds float* %tmp15699, i64 1
+  %tmp15701 = getelementptr inbounds float* %tmp15700, i64 1
+  %tmp15702 = getelementptr inbounds float* %tmp15701, i64 1
+  %tmp15703 = getelementptr inbounds float* %tmp15702, i64 1
+  %tmp15704 = getelementptr inbounds float* %tmp15703, i64 1
+  %tmp15705 = getelementptr inbounds float* %tmp15704, i64 1
+  %tmp15706 = getelementptr inbounds float* %tmp15705, i64 1
+  %tmp15707 = getelementptr inbounds float* %tmp15706, i64 1
+  %tmp15708 = getelementptr inbounds float* %tmp15707, i64 1
+  %tmp15709 = getelementptr inbounds float* %tmp15708, i64 1
+  %tmp15710 = getelementptr inbounds float* %tmp15709, i64 1
+  %tmp15711 = getelementptr inbounds float* %tmp15710, i64 1
+  %tmp15712 = getelementptr inbounds float* %tmp15711, i64 1
+  %tmp15713 = getelementptr inbounds float* %tmp15712, i64 1
+  %tmp15714 = getelementptr inbounds float* %tmp15713, i64 1
+  %tmp15715 = getelementptr inbounds float* %tmp15714, i64 1
+  %tmp15716 = getelementptr inbounds float* %tmp15715, i64 1
+  %tmp15717 = getelementptr inbounds float* %tmp15716, i64 1
+  %tmp15718 = getelementptr inbounds float* %tmp15717, i64 1
+  %tmp15719 = getelementptr inbounds float* %tmp15718, i64 1
+  %tmp15720 = getelementptr inbounds float* %tmp15719, i64 1
+  %tmp15721 = getelementptr inbounds float* %tmp15720, i64 1
+  %tmp15722 = getelementptr inbounds float* %tmp15721, i64 1
+  %tmp15723 = getelementptr inbounds float* %tmp15722, i64 1
+  %tmp15724 = getelementptr inbounds float* %tmp15723, i64 1
+  %tmp15725 = getelementptr inbounds float* %tmp15724, i64 1
+  %tmp15726 = getelementptr inbounds float* %tmp15725, i64 1
+  %tmp15727 = getelementptr inbounds float* %tmp15726, i64 1
+  %tmp15728 = getelementptr inbounds float* %tmp15727, i64 1
+  %tmp15729 = getelementptr inbounds float* %tmp15728, i64 1
+  %tmp15730 = getelementptr inbounds float* %tmp15729, i64 1
+  %tmp15731 = getelementptr inbounds float* %tmp15730, i64 1
+  %tmp15732 = getelementptr inbounds float* %tmp15731, i64 1
+  %tmp15733 = getelementptr inbounds float* %tmp15732, i64 1
+  %tmp15734 = getelementptr inbounds float* %tmp15733, i64 1
+  %tmp15735 = getelementptr inbounds float* %tmp15734, i64 1
+  %tmp15736 = getelementptr inbounds float* %tmp15735, i64 1
+  %tmp15737 = getelementptr inbounds float* %tmp15736, i64 1
+  %tmp15738 = getelementptr inbounds float* %tmp15737, i64 1
+  %tmp15739 = getelementptr inbounds float* %tmp15738, i64 1
+  %tmp15740 = getelementptr inbounds float* %tmp15739, i64 1
+  %tmp15741 = getelementptr inbounds float* %tmp15740, i64 1
+  %tmp15742 = getelementptr inbounds float* %tmp15741, i64 1
+  %tmp15743 = getelementptr inbounds float* %tmp15742, i64 1
+  %tmp15744 = getelementptr inbounds float* %tmp15743, i64 1
+  %tmp15745 = getelementptr inbounds float* %tmp15744, i64 1
+  %tmp15746 = getelementptr inbounds float* %tmp15745, i64 1
+  %tmp15747 = getelementptr inbounds float* %tmp15746, i64 1
+  %tmp15748 = getelementptr inbounds float* %tmp15747, i64 1
+  %tmp15749 = getelementptr inbounds float* %tmp15748, i64 1
+  %tmp15750 = getelementptr inbounds float* %tmp15749, i64 1
+  %tmp15751 = getelementptr inbounds float* %tmp15750, i64 1
+  %tmp15752 = getelementptr inbounds float* %tmp15751, i64 1
+  %tmp15753 = getelementptr inbounds float* %tmp15752, i64 1
+  %tmp15754 = getelementptr inbounds float* %tmp15753, i64 1
+  %tmp15755 = getelementptr inbounds float* %tmp15754, i64 1
+  %tmp15756 = getelementptr inbounds float* %tmp15755, i64 1
+  %tmp15757 = getelementptr inbounds float* %tmp15756, i64 1
+  %tmp15758 = getelementptr inbounds float* %tmp15757, i64 1
+  %tmp15759 = getelementptr inbounds float* %tmp15758, i64 1
+  %tmp15760 = getelementptr inbounds float* %tmp15759, i64 1
+  %tmp15761 = getelementptr inbounds float* %tmp15760, i64 1
+  %tmp15762 = getelementptr inbounds float* %tmp15761, i64 1
+  %tmp15763 = getelementptr inbounds float* %tmp15762, i64 1
+  %tmp15764 = getelementptr inbounds float* %tmp15763, i64 1
+  %tmp15765 = getelementptr inbounds float* %tmp15764, i64 1
+  %tmp15766 = getelementptr inbounds float* %tmp15765, i64 1
+  %tmp15767 = getelementptr inbounds float* %tmp15766, i64 1
+  %tmp15768 = getelementptr inbounds float* %tmp15767, i64 1
+  %tmp15769 = getelementptr inbounds float* %tmp15768, i64 1
+  %tmp15770 = getelementptr inbounds float* %tmp15769, i64 1
+  %tmp15771 = getelementptr inbounds float* %tmp15770, i64 1
+  %tmp15772 = getelementptr inbounds float* %tmp15771, i64 1
+  %tmp15773 = getelementptr inbounds float* %tmp15772, i64 1
+  %tmp15774 = getelementptr inbounds float* %tmp15773, i64 1
+  %tmp15775 = getelementptr inbounds float* %tmp15774, i64 1
+  %tmp15776 = getelementptr inbounds float* %tmp15775, i64 1
+  %tmp15777 = getelementptr inbounds float* %tmp15776, i64 1
+  %tmp15778 = getelementptr inbounds float* %tmp15777, i64 1
+  %tmp15779 = getelementptr inbounds float* %tmp15778, i64 1
+  %tmp15780 = getelementptr inbounds float* %tmp15779, i64 1
+  %tmp15781 = getelementptr inbounds float* %tmp15780, i64 1
+  %tmp15782 = getelementptr inbounds float* %tmp15781, i64 1
+  %tmp15783 = getelementptr inbounds float* %tmp15782, i64 1
+  %tmp15784 = getelementptr inbounds float* %tmp15783, i64 1
+  %tmp15785 = getelementptr inbounds float* %tmp15784, i64 1
+  %tmp15786 = getelementptr inbounds float* %tmp15785, i64 1
+  %tmp15787 = getelementptr inbounds float* %tmp15786, i64 1
+  %tmp15788 = getelementptr inbounds float* %tmp15787, i64 1
+  %tmp15789 = getelementptr inbounds float* %tmp15788, i64 1
+  %tmp15790 = getelementptr inbounds float* %tmp15789, i64 1
+  %tmp15791 = getelementptr inbounds float* %tmp15790, i64 1
+  %tmp15792 = getelementptr inbounds float* %tmp15791, i64 1
+  %tmp15793 = getelementptr inbounds float* %tmp15792, i64 1
+  %tmp15794 = getelementptr inbounds float* %tmp15793, i64 1
+  %tmp15795 = getelementptr inbounds float* %tmp15794, i64 1
+  %tmp15796 = getelementptr inbounds float* %tmp15795, i64 1
+  %tmp15797 = getelementptr inbounds float* %tmp15796, i64 1
+  %tmp15798 = getelementptr inbounds float* %tmp15797, i64 1
+  %tmp15799 = getelementptr inbounds float* %tmp15798, i64 1
+  %tmp15800 = getelementptr inbounds float* %tmp15799, i64 1
+  %tmp15801 = getelementptr inbounds float* %tmp15800, i64 1
+  %tmp15802 = getelementptr inbounds float* %tmp15801, i64 1
+  %tmp15803 = getelementptr inbounds float* %tmp15802, i64 1
+  %tmp15804 = getelementptr inbounds float* %tmp15803, i64 1
+  %tmp15805 = getelementptr inbounds float* %tmp15804, i64 1
+  %tmp15806 = getelementptr inbounds float* %tmp15805, i64 1
+  %tmp15807 = getelementptr inbounds float* %tmp15806, i64 1
+  %tmp15808 = getelementptr inbounds float* %tmp15807, i64 1
+  %tmp15809 = getelementptr inbounds float* %tmp15808, i64 1
+  %tmp15810 = getelementptr inbounds float* %tmp15809, i64 1
+  %tmp15811 = getelementptr inbounds float* %tmp15810, i64 1
+  %tmp15812 = getelementptr inbounds float* %tmp15811, i64 1
+  %tmp15813 = getelementptr inbounds float* %tmp15812, i64 1
+  %tmp15814 = getelementptr inbounds float* %tmp15813, i64 1
+  %tmp15815 = getelementptr inbounds float* %tmp15814, i64 1
+  %tmp15816 = getelementptr inbounds float* %tmp15815, i64 1
+  %tmp15817 = getelementptr inbounds float* %tmp15816, i64 1
+  %tmp15818 = getelementptr inbounds float* %tmp15817, i64 1
+  %tmp15819 = getelementptr inbounds float* %tmp15818, i64 1
+  %tmp15820 = getelementptr inbounds float* %tmp15819, i64 1
+  %tmp15821 = getelementptr inbounds float* %tmp15820, i64 1
+  %tmp15822 = getelementptr inbounds float* %tmp15821, i64 1
+  %tmp15823 = getelementptr inbounds float* %tmp15822, i64 1
+  %tmp15824 = getelementptr inbounds float* %tmp15823, i64 1
+  %tmp15825 = getelementptr inbounds float* %tmp15824, i64 1
+  %tmp15826 = getelementptr inbounds float* %tmp15825, i64 1
+  %tmp15827 = getelementptr inbounds float* %tmp15826, i64 1
+  %tmp15828 = getelementptr inbounds float* %tmp15827, i64 1
+  %tmp15829 = getelementptr inbounds float* %tmp15828, i64 1
+  %tmp15830 = getelementptr inbounds float* %tmp15829, i64 1
+  %tmp15831 = getelementptr inbounds float* %tmp15830, i64 1
+  %tmp15832 = getelementptr inbounds float* %tmp15831, i64 1
+  %tmp15833 = getelementptr inbounds float* %tmp15832, i64 1
+  %tmp15834 = getelementptr inbounds float* %tmp15833, i64 1
+  %tmp15835 = getelementptr inbounds float* %tmp15834, i64 1
+  %tmp15836 = getelementptr inbounds float* %tmp15835, i64 1
+  %tmp15837 = getelementptr inbounds float* %tmp15836, i64 1
+  %tmp15838 = getelementptr inbounds float* %tmp15837, i64 1
+  %tmp15839 = getelementptr inbounds float* %tmp15838, i64 1
+  %tmp15840 = getelementptr inbounds float* %tmp15839, i64 1
+  %tmp15841 = getelementptr inbounds float* %tmp15840, i64 1
+  %tmp15842 = getelementptr inbounds float* %tmp15841, i64 1
+  %tmp15843 = getelementptr inbounds float* %tmp15842, i64 1
+  %tmp15844 = getelementptr inbounds float* %tmp15843, i64 1
+  %tmp15845 = getelementptr inbounds float* %tmp15844, i64 1
+  %tmp15846 = getelementptr inbounds float* %tmp15845, i64 1
+  %tmp15847 = getelementptr inbounds float* %tmp15846, i64 1
+  %tmp15848 = getelementptr inbounds float* %tmp15847, i64 1
+  %tmp15849 = getelementptr inbounds float* %tmp15848, i64 1
+  %tmp15850 = getelementptr inbounds float* %tmp15849, i64 1
+  %tmp15851 = getelementptr inbounds float* %tmp15850, i64 1
+  %tmp15852 = getelementptr inbounds float* %tmp15851, i64 1
+  %tmp15853 = getelementptr inbounds float* %tmp15852, i64 1
+  %tmp15854 = getelementptr inbounds float* %tmp15853, i64 1
+  %tmp15855 = getelementptr inbounds float* %tmp15854, i64 1
+  %tmp15856 = getelementptr inbounds float* %tmp15855, i64 1
+  %tmp15857 = getelementptr inbounds float* %tmp15856, i64 1
+  %tmp15858 = getelementptr inbounds float* %tmp15857, i64 1
+  %tmp15859 = getelementptr inbounds float* %tmp15858, i64 1
+  %tmp15860 = getelementptr inbounds float* %tmp15859, i64 1
+  %tmp15861 = getelementptr inbounds float* %tmp15860, i64 1
+  %tmp15862 = getelementptr inbounds float* %tmp15861, i64 1
+  %tmp15863 = getelementptr inbounds float* %tmp15862, i64 1
+  %tmp15864 = getelementptr inbounds float* %tmp15863, i64 1
+  %tmp15865 = getelementptr inbounds float* %tmp15864, i64 1
+  %tmp15866 = getelementptr inbounds float* %tmp15865, i64 1
+  %tmp15867 = getelementptr inbounds float* %tmp15866, i64 1
+  %tmp15868 = getelementptr inbounds float* %tmp15867, i64 1
+  %tmp15869 = getelementptr inbounds float* %tmp15868, i64 1
+  %tmp15870 = getelementptr inbounds float* %tmp15869, i64 1
+  %tmp15871 = getelementptr inbounds float* %tmp15870, i64 1
+  %tmp15872 = getelementptr inbounds float* %tmp15871, i64 1
+  %tmp15873 = getelementptr inbounds float* %tmp15872, i64 1
+  %tmp15874 = getelementptr inbounds float* %tmp15873, i64 1
+  %tmp15875 = getelementptr inbounds float* %tmp15874, i64 1
+  %tmp15876 = getelementptr inbounds float* %tmp15875, i64 1
+  %tmp15877 = getelementptr inbounds float* %tmp15876, i64 1
+  %tmp15878 = getelementptr inbounds float* %tmp15877, i64 1
+  %tmp15879 = getelementptr inbounds float* %tmp15878, i64 1
+  %tmp15880 = getelementptr inbounds float* %tmp15879, i64 1
+  %tmp15881 = getelementptr inbounds float* %tmp15880, i64 1
+  %tmp15882 = getelementptr inbounds float* %tmp15881, i64 1
+  %tmp15883 = getelementptr inbounds float* %tmp15882, i64 1
+  %tmp15884 = getelementptr inbounds float* %tmp15883, i64 1
+  %tmp15885 = getelementptr inbounds float* %tmp15884, i64 1
+  %tmp15886 = getelementptr inbounds float* %tmp15885, i64 1
+  %tmp15887 = getelementptr inbounds float* %tmp15886, i64 1
+  %tmp15888 = getelementptr inbounds float* %tmp15887, i64 1
+  %tmp15889 = getelementptr inbounds float* %tmp15888, i64 1
+  %tmp15890 = getelementptr inbounds float* %tmp15889, i64 1
+  %tmp15891 = getelementptr inbounds float* %tmp15890, i64 1
+  %tmp15892 = getelementptr inbounds float* %tmp15891, i64 1
+  %tmp15893 = getelementptr inbounds float* %tmp15892, i64 1
+  %tmp15894 = getelementptr inbounds float* %tmp15893, i64 1
+  %tmp15895 = getelementptr inbounds float* %tmp15894, i64 1
+  %tmp15896 = getelementptr inbounds float* %tmp15895, i64 1
+  %tmp15897 = getelementptr inbounds float* %tmp15896, i64 1
+  %tmp15898 = getelementptr inbounds float* %tmp15897, i64 1
+  %tmp15899 = getelementptr inbounds float* %tmp15898, i64 1
+  %tmp15900 = getelementptr inbounds float* %tmp15899, i64 1
+  %tmp15901 = getelementptr inbounds float* %tmp15900, i64 1
+  %tmp15902 = getelementptr inbounds float* %tmp15901, i64 1
+  %tmp15903 = getelementptr inbounds float* %tmp15902, i64 1
+  %tmp15904 = getelementptr inbounds float* %tmp15903, i64 1
+  %tmp15905 = getelementptr inbounds float* %tmp15904, i64 1
+  %tmp15906 = getelementptr inbounds float* %tmp15905, i64 1
+  %tmp15907 = getelementptr inbounds float* %tmp15906, i64 1
+  %tmp15908 = getelementptr inbounds float* %tmp15907, i64 1
+  %tmp15909 = getelementptr inbounds float* %tmp15908, i64 1
+  %tmp15910 = getelementptr inbounds float* %tmp15909, i64 1
+  %tmp15911 = getelementptr inbounds float* %tmp15910, i64 1
+  %tmp15912 = getelementptr inbounds float* %tmp15911, i64 1
+  %tmp15913 = getelementptr inbounds float* %tmp15912, i64 1
+  %tmp15914 = getelementptr inbounds float* %tmp15913, i64 1
+  %tmp15915 = getelementptr inbounds float* %tmp15914, i64 1
+  %tmp15916 = getelementptr inbounds float* %tmp15915, i64 1
+  %tmp15917 = getelementptr inbounds float* %tmp15916, i64 1
+  %tmp15918 = getelementptr inbounds float* %tmp15917, i64 1
+  %tmp15919 = getelementptr inbounds float* %tmp15918, i64 1
+  %tmp15920 = getelementptr inbounds float* %tmp15919, i64 1
+  %tmp15921 = getelementptr inbounds float* %tmp15920, i64 1
+  %tmp15922 = getelementptr inbounds float* %tmp15921, i64 1
+  %tmp15923 = getelementptr inbounds float* %tmp15922, i64 1
+  %tmp15924 = getelementptr inbounds float* %tmp15923, i64 1
+  %tmp15925 = getelementptr inbounds float* %tmp15924, i64 1
+  %tmp15926 = getelementptr inbounds float* %tmp15925, i64 1
+  %tmp15927 = getelementptr inbounds float* %tmp15926, i64 1
+  %tmp15928 = getelementptr inbounds float* %tmp15927, i64 1
+  %tmp15929 = getelementptr inbounds float* %tmp15928, i64 1
+  %tmp15930 = getelementptr inbounds float* %tmp15929, i64 1
+  %tmp15931 = getelementptr inbounds float* %tmp15930, i64 1
+  %tmp15932 = getelementptr inbounds float* %tmp15931, i64 1
+  %tmp15933 = getelementptr inbounds float* %tmp15932, i64 1
+  %tmp15934 = getelementptr inbounds float* %tmp15933, i64 1
+  %tmp15935 = getelementptr inbounds float* %tmp15934, i64 1
+  %tmp15936 = getelementptr inbounds float* %tmp15935, i64 1
+  %tmp15937 = getelementptr inbounds float* %tmp15936, i64 1
+  %tmp15938 = getelementptr inbounds float* %tmp15937, i64 1
+  %tmp15939 = getelementptr inbounds float* %tmp15938, i64 1
+  %tmp15940 = getelementptr inbounds float* %tmp15939, i64 1
+  %tmp15941 = getelementptr inbounds float* %tmp15940, i64 1
+  %tmp15942 = getelementptr inbounds float* %tmp15941, i64 1
+  %tmp15943 = getelementptr inbounds float* %tmp15942, i64 1
+  %tmp15944 = getelementptr inbounds float* %tmp15943, i64 1
+  %tmp15945 = getelementptr inbounds float* %tmp15944, i64 1
+  %tmp15946 = getelementptr inbounds float* %tmp15945, i64 1
+  %tmp15947 = getelementptr inbounds float* %tmp15946, i64 1
+  %tmp15948 = getelementptr inbounds float* %tmp15947, i64 1
+  %tmp15949 = getelementptr inbounds float* %tmp15948, i64 1
+  %tmp15950 = getelementptr inbounds float* %tmp15949, i64 1
+  %tmp15951 = getelementptr inbounds float* %tmp15950, i64 1
+  %tmp15952 = getelementptr inbounds float* %tmp15951, i64 1
+  %tmp15953 = getelementptr inbounds float* %tmp15952, i64 1
+  %tmp15954 = getelementptr inbounds float* %tmp15953, i64 1
+  %tmp15955 = getelementptr inbounds float* %tmp15954, i64 1
+  %tmp15956 = getelementptr inbounds float* %tmp15955, i64 1
+  %tmp15957 = getelementptr inbounds float* %tmp15956, i64 1
+  %tmp15958 = getelementptr inbounds float* %tmp15957, i64 1
+  %tmp15959 = getelementptr inbounds float* %tmp15958, i64 1
+  %tmp15960 = getelementptr inbounds float* %tmp15959, i64 1
+  %tmp15961 = getelementptr inbounds float* %tmp15960, i64 1
+  %tmp15962 = getelementptr inbounds float* %tmp15961, i64 1
+  %tmp15963 = getelementptr inbounds float* %tmp15962, i64 1
+  %tmp15964 = getelementptr inbounds float* %tmp15963, i64 1
+  %tmp15965 = getelementptr inbounds float* %tmp15964, i64 1
+  %tmp15966 = getelementptr inbounds float* %tmp15965, i64 1
+  %tmp15967 = getelementptr inbounds float* %tmp15966, i64 1
+  %tmp15968 = getelementptr inbounds float* %tmp15967, i64 1
+  %tmp15969 = getelementptr inbounds float* %tmp15968, i64 1
+  %tmp15970 = getelementptr inbounds float* %tmp15969, i64 1
+  %tmp15971 = getelementptr inbounds float* %tmp15970, i64 1
+  %tmp15972 = getelementptr inbounds float* %tmp15971, i64 1
+  %tmp15973 = getelementptr inbounds float* %tmp15972, i64 1
+  %tmp15974 = getelementptr inbounds float* %tmp15973, i64 1
+  %tmp15975 = getelementptr inbounds float* %tmp15974, i64 1
+  %tmp15976 = getelementptr inbounds float* %tmp15975, i64 1
+  %tmp15977 = getelementptr inbounds float* %tmp15976, i64 1
+  %tmp15978 = getelementptr inbounds float* %tmp15977, i64 1
+  %tmp15979 = getelementptr inbounds float* %tmp15978, i64 1
+  %tmp15980 = getelementptr inbounds float* %tmp15979, i64 1
+  %tmp15981 = getelementptr inbounds float* %tmp15980, i64 1
+  %tmp15982 = getelementptr inbounds float* %tmp15981, i64 1
+  %tmp15983 = getelementptr inbounds float* %tmp15982, i64 1
+  %tmp15984 = getelementptr inbounds float* %tmp15983, i64 1
+  %tmp15985 = getelementptr inbounds float* %tmp15984, i64 1
+  %tmp15986 = getelementptr inbounds float* %tmp15985, i64 1
+  %tmp15987 = getelementptr inbounds float* %tmp15986, i64 1
+  %tmp15988 = getelementptr inbounds float* %tmp15987, i64 1
+  %tmp15989 = getelementptr inbounds float* %tmp15988, i64 1
+  %tmp15990 = getelementptr inbounds float* %tmp15989, i64 1
+  %tmp15991 = getelementptr inbounds float* %tmp15990, i64 1
+  %tmp15992 = getelementptr inbounds float* %tmp15991, i64 1
+  %tmp15993 = getelementptr inbounds float* %tmp15992, i64 1
+  %tmp15994 = getelementptr inbounds float* %tmp15993, i64 1
+  %tmp15995 = getelementptr inbounds float* %tmp15994, i64 1
+  %tmp15996 = getelementptr inbounds float* %tmp15995, i64 1
+  %tmp15997 = getelementptr inbounds float* %tmp15996, i64 1
+  %tmp15998 = getelementptr inbounds float* %tmp15997, i64 1
+  %tmp15999 = getelementptr inbounds float* %tmp15998, i64 1
+  %tmp16000 = getelementptr inbounds float* %tmp15999, i64 1
+  %tmp16001 = getelementptr inbounds float* %tmp16000, i64 1
+  %tmp16002 = getelementptr inbounds float* %tmp16001, i64 1
+  %tmp16003 = getelementptr inbounds float* %tmp16002, i64 1
+  %tmp16004 = getelementptr inbounds float* %tmp16003, i64 1
+  %tmp16005 = getelementptr inbounds float* %tmp16004, i64 1
+  %tmp16006 = getelementptr inbounds float* %tmp16005, i64 1
+  %tmp16007 = getelementptr inbounds float* %tmp16006, i64 1
+  %tmp16008 = getelementptr inbounds float* %tmp16007, i64 1
+  %tmp16009 = getelementptr inbounds float* %tmp16008, i64 1
+  %tmp16010 = getelementptr inbounds float* %tmp16009, i64 1
+  %tmp16011 = getelementptr inbounds float* %tmp16010, i64 1
+  %tmp16012 = getelementptr inbounds float* %tmp16011, i64 1
+  %tmp16013 = getelementptr inbounds float* %tmp16012, i64 1
+  %tmp16014 = getelementptr inbounds float* %tmp16013, i64 1
+  %tmp16015 = getelementptr inbounds float* %tmp16014, i64 1
+  %tmp16016 = getelementptr inbounds float* %tmp16015, i64 1
+  %tmp16017 = getelementptr inbounds float* %tmp16016, i64 1
+  %tmp16018 = getelementptr inbounds float* %tmp16017, i64 1
+  %tmp16019 = getelementptr inbounds float* %tmp16018, i64 1
+  %tmp16020 = getelementptr inbounds float* %tmp16019, i64 1
+  %tmp16021 = getelementptr inbounds float* %tmp16020, i64 1
+  %tmp16022 = getelementptr inbounds float* %tmp16021, i64 1
+  %tmp16023 = getelementptr inbounds float* %tmp16022, i64 1
+  %tmp16024 = getelementptr inbounds float* %tmp16023, i64 1
+  %tmp16025 = getelementptr inbounds float* %tmp16024, i64 1
+  %tmp16026 = getelementptr inbounds float* %tmp16025, i64 1
+  %tmp16027 = getelementptr inbounds float* %tmp16026, i64 1
+  %tmp16028 = getelementptr inbounds float* %tmp16027, i64 1
+  %tmp16029 = getelementptr inbounds float* %tmp16028, i64 1
+  %tmp16030 = getelementptr inbounds float* %tmp16029, i64 1
+  %tmp16031 = getelementptr inbounds float* %tmp16030, i64 1
+  %tmp16032 = getelementptr inbounds float* %tmp16031, i64 1
+  %tmp16033 = getelementptr inbounds float* %tmp16032, i64 1
+  %tmp16034 = getelementptr inbounds float* %tmp16033, i64 1
+  %tmp16035 = getelementptr inbounds float* %tmp16034, i64 1
+  %tmp16036 = getelementptr inbounds float* %tmp16035, i64 1
+  %tmp16037 = getelementptr inbounds float* %tmp16036, i64 1
+  %tmp16038 = getelementptr inbounds float* %tmp16037, i64 1
+  %tmp16039 = getelementptr inbounds float* %tmp16038, i64 1
+  %tmp16040 = getelementptr inbounds float* %tmp16039, i64 1
+  %tmp16041 = getelementptr inbounds float* %tmp16040, i64 1
+  %tmp16042 = getelementptr inbounds float* %tmp16041, i64 1
+  %tmp16043 = getelementptr inbounds float* %tmp16042, i64 1
+  %tmp16044 = getelementptr inbounds float* %tmp16043, i64 1
+  %tmp16045 = getelementptr inbounds float* %tmp16044, i64 1
+  %tmp16046 = getelementptr inbounds float* %tmp16045, i64 1
+  %tmp16047 = getelementptr inbounds float* %tmp16046, i64 1
+  %tmp16048 = getelementptr inbounds float* %tmp16047, i64 1
+  %tmp16049 = getelementptr inbounds float* %tmp16048, i64 1
+  %tmp16050 = getelementptr inbounds float* %tmp16049, i64 1
+  %tmp16051 = getelementptr inbounds float* %tmp16050, i64 1
+  %tmp16052 = getelementptr inbounds float* %tmp16051, i64 1
+  %tmp16053 = getelementptr inbounds float* %tmp16052, i64 1
+  %tmp16054 = getelementptr inbounds float* %tmp16053, i64 1
+  %tmp16055 = getelementptr inbounds float* %tmp16054, i64 1
+  %tmp16056 = getelementptr inbounds float* %tmp16055, i64 1
+  %tmp16057 = getelementptr inbounds float* %tmp16056, i64 1
+  %tmp16058 = getelementptr inbounds float* %tmp16057, i64 1
+  %tmp16059 = getelementptr inbounds float* %tmp16058, i64 1
+  %tmp16060 = getelementptr inbounds float* %tmp16059, i64 1
+  %tmp16061 = getelementptr inbounds float* %tmp16060, i64 1
+  %tmp16062 = getelementptr inbounds float* %tmp16061, i64 1
+  %tmp16063 = getelementptr inbounds float* %tmp16062, i64 1
+  %tmp16064 = getelementptr inbounds float* %tmp16063, i64 1
+  %tmp16065 = getelementptr inbounds float* %tmp16064, i64 1
+  %tmp16066 = getelementptr inbounds float* %tmp16065, i64 1
+  %tmp16067 = getelementptr inbounds float* %tmp16066, i64 1
+  %tmp16068 = getelementptr inbounds float* %tmp16067, i64 1
+  %tmp16069 = getelementptr inbounds float* %tmp16068, i64 1
+  %tmp16070 = getelementptr inbounds float* %tmp16069, i64 1
+  %tmp16071 = getelementptr inbounds float* %tmp16070, i64 1
+  %tmp16072 = getelementptr inbounds float* %tmp16071, i64 1
+  %tmp16073 = getelementptr inbounds float* %tmp16072, i64 1
+  %tmp16074 = getelementptr inbounds float* %tmp16073, i64 1
+  %tmp16075 = getelementptr inbounds float* %tmp16074, i64 1
+  %tmp16076 = getelementptr inbounds float* %tmp16075, i64 1
+  %tmp16077 = getelementptr inbounds float* %tmp16076, i64 1
+  %tmp16078 = getelementptr inbounds float* %tmp16077, i64 1
+  %tmp16079 = getelementptr inbounds float* %tmp16078, i64 1
+  %tmp16080 = getelementptr inbounds float* %tmp16079, i64 1
+  %tmp16081 = getelementptr inbounds float* %tmp16080, i64 1
+  %tmp16082 = getelementptr inbounds float* %tmp16081, i64 1
+  %tmp16083 = getelementptr inbounds float* %tmp16082, i64 1
+  %tmp16084 = getelementptr inbounds float* %tmp16083, i64 1
+  %tmp16085 = getelementptr inbounds float* %tmp16084, i64 1
+  %tmp16086 = getelementptr inbounds float* %tmp16085, i64 1
+  %tmp16087 = getelementptr inbounds float* %tmp16086, i64 1
+  %tmp16088 = getelementptr inbounds float* %tmp16087, i64 1
+  %tmp16089 = getelementptr inbounds float* %tmp16088, i64 1
+  %tmp16090 = getelementptr inbounds float* %tmp16089, i64 1
+  %tmp16091 = getelementptr inbounds float* %tmp16090, i64 1
+  %tmp16092 = getelementptr inbounds float* %tmp16091, i64 1
+  %tmp16093 = getelementptr inbounds float* %tmp16092, i64 1
+  %tmp16094 = getelementptr inbounds float* %tmp16093, i64 1
+  %tmp16095 = getelementptr inbounds float* %tmp16094, i64 1
+  %tmp16096 = getelementptr inbounds float* %tmp16095, i64 1
+  %tmp16097 = getelementptr inbounds float* %tmp16096, i64 1
+  %tmp16098 = getelementptr inbounds float* %tmp16097, i64 1
+  %tmp16099 = getelementptr inbounds float* %tmp16098, i64 1
+  %tmp16100 = getelementptr inbounds float* %tmp16099, i64 1
+  %tmp16101 = getelementptr inbounds float* %tmp16100, i64 1
+  %tmp16102 = getelementptr inbounds float* %tmp16101, i64 1
+  %tmp16103 = getelementptr inbounds float* %tmp16102, i64 1
+  %tmp16104 = getelementptr inbounds float* %tmp16103, i64 1
+  %tmp16105 = getelementptr inbounds float* %tmp16104, i64 1
+  %tmp16106 = getelementptr inbounds float* %tmp16105, i64 1
+  %tmp16107 = getelementptr inbounds float* %tmp16106, i64 1
+  %tmp16108 = getelementptr inbounds float* %tmp16107, i64 1
+  %tmp16109 = getelementptr inbounds float* %tmp16108, i64 1
+  %tmp16110 = getelementptr inbounds float* %tmp16109, i64 1
+  %tmp16111 = getelementptr inbounds float* %tmp16110, i64 1
+  %tmp16112 = getelementptr inbounds float* %tmp16111, i64 1
+  %tmp16113 = getelementptr inbounds float* %tmp16112, i64 1
+  %tmp16114 = getelementptr inbounds float* %tmp16113, i64 1
+  %tmp16115 = getelementptr inbounds float* %tmp16114, i64 1
+  %tmp16116 = getelementptr inbounds float* %tmp16115, i64 1
+  %tmp16117 = getelementptr inbounds float* %tmp16116, i64 1
+  %tmp16118 = getelementptr inbounds float* %tmp16117, i64 1
+  %tmp16119 = getelementptr inbounds float* %tmp16118, i64 1
+  %tmp16120 = getelementptr inbounds float* %tmp16119, i64 1
+  %tmp16121 = getelementptr inbounds float* %tmp16120, i64 1
+  %tmp16122 = getelementptr inbounds float* %tmp16121, i64 1
+  %tmp16123 = getelementptr inbounds float* %tmp16122, i64 1
+  %tmp16124 = getelementptr inbounds float* %tmp16123, i64 1
+  %tmp16125 = getelementptr inbounds float* %tmp16124, i64 1
+  %tmp16126 = getelementptr inbounds float* %tmp16125, i64 1
+  %tmp16127 = getelementptr inbounds float* %tmp16126, i64 1
+  %tmp16128 = getelementptr inbounds float* %tmp16127, i64 1
+  %tmp16129 = getelementptr inbounds float* %tmp16128, i64 1
+  %tmp16130 = getelementptr inbounds float* %tmp16129, i64 1
+  %tmp16131 = getelementptr inbounds float* %tmp16130, i64 1
+  %tmp16132 = getelementptr inbounds float* %tmp16131, i64 1
+  %tmp16133 = getelementptr inbounds float* %tmp16132, i64 1
+  %tmp16134 = getelementptr inbounds float* %tmp16133, i64 1
+  %tmp16135 = getelementptr inbounds float* %tmp16134, i64 1
+  %tmp16136 = getelementptr inbounds float* %tmp16135, i64 1
+  %tmp16137 = getelementptr inbounds float* %tmp16136, i64 1
+  %tmp16138 = getelementptr inbounds float* %tmp16137, i64 1
+  %tmp16139 = getelementptr inbounds float* %tmp16138, i64 1
+  %tmp16140 = getelementptr inbounds float* %tmp16139, i64 1
+  %tmp16141 = getelementptr inbounds float* %tmp16140, i64 1
+  %tmp16142 = getelementptr inbounds float* %tmp16141, i64 1
+  %tmp16143 = getelementptr inbounds float* %tmp16142, i64 1
+  %tmp16144 = getelementptr inbounds float* %tmp16143, i64 1
+  %tmp16145 = getelementptr inbounds float* %tmp16144, i64 1
+  %tmp16146 = getelementptr inbounds float* %tmp16145, i64 1
+  %tmp16147 = getelementptr inbounds float* %tmp16146, i64 1
+  %tmp16148 = getelementptr inbounds float* %tmp16147, i64 1
+  %tmp16149 = getelementptr inbounds float* %tmp16148, i64 1
+  %tmp16150 = getelementptr inbounds float* %tmp16149, i64 1
+  %tmp16151 = getelementptr inbounds float* %tmp16150, i64 1
+  %tmp16152 = getelementptr inbounds float* %tmp16151, i64 1
+  %tmp16153 = getelementptr inbounds float* %tmp16152, i64 1
+  %tmp16154 = getelementptr inbounds float* %tmp16153, i64 1
+  %tmp16155 = getelementptr inbounds float* %tmp16154, i64 1
+  %tmp16156 = getelementptr inbounds float* %tmp16155, i64 1
+  %tmp16157 = getelementptr inbounds float* %tmp16156, i64 1
+  %tmp16158 = getelementptr inbounds float* %tmp16157, i64 1
+  %tmp16159 = getelementptr inbounds float* %tmp16158, i64 1
+  %tmp16160 = getelementptr inbounds float* %tmp16159, i64 1
+  %tmp16161 = getelementptr inbounds float* %tmp16160, i64 1
+  %tmp16162 = getelementptr inbounds float* %tmp16161, i64 1
+  %tmp16163 = getelementptr inbounds float* %tmp16162, i64 1
+  %tmp16164 = getelementptr inbounds float* %tmp16163, i64 1
+  %tmp16165 = getelementptr inbounds float* %tmp16164, i64 1
+  %tmp16166 = getelementptr inbounds float* %tmp16165, i64 1
+  %tmp16167 = getelementptr inbounds float* %tmp16166, i64 1
+  %tmp16168 = getelementptr inbounds float* %tmp16167, i64 1
+  %tmp16169 = getelementptr inbounds float* %tmp16168, i64 1
+  %tmp16170 = getelementptr inbounds float* %tmp16169, i64 1
+  %tmp16171 = getelementptr inbounds float* %tmp16170, i64 1
+  %tmp16172 = getelementptr inbounds float* %tmp16171, i64 1
+  %tmp16173 = getelementptr inbounds float* %tmp16172, i64 1
+  %tmp16174 = getelementptr inbounds float* %tmp16173, i64 1
+  %tmp16175 = getelementptr inbounds float* %tmp16174, i64 1
+  %tmp16176 = getelementptr inbounds float* %tmp16175, i64 1
+  %tmp16177 = getelementptr inbounds float* %tmp16176, i64 1
+  %tmp16178 = getelementptr inbounds float* %tmp16177, i64 1
+  %tmp16179 = getelementptr inbounds float* %tmp16178, i64 1
+  %tmp16180 = getelementptr inbounds float* %tmp16179, i64 1
+  %tmp16181 = getelementptr inbounds float* %tmp16180, i64 1
+  %tmp16182 = getelementptr inbounds float* %tmp16181, i64 1
+  %tmp16183 = getelementptr inbounds float* %tmp16182, i64 1
+  %tmp16184 = getelementptr inbounds float* %tmp16183, i64 1
+  %tmp16185 = getelementptr inbounds float* %tmp16184, i64 1
+  %tmp16186 = getelementptr inbounds float* %tmp16185, i64 1
+  %tmp16187 = getelementptr inbounds float* %tmp16186, i64 1
+  %tmp16188 = getelementptr inbounds float* %tmp16187, i64 1
+  %tmp16189 = getelementptr inbounds float* %tmp16188, i64 1
+  %tmp16190 = getelementptr inbounds float* %tmp16189, i64 1
+  %tmp16191 = getelementptr inbounds float* %tmp16190, i64 1
+  %tmp16192 = getelementptr inbounds float* %tmp16191, i64 1
+  %tmp16193 = getelementptr inbounds float* %tmp16192, i64 1
+  %tmp16194 = getelementptr inbounds float* %tmp16193, i64 1
+  %tmp16195 = getelementptr inbounds float* %tmp16194, i64 1
+  %tmp16196 = getelementptr inbounds float* %tmp16195, i64 1
+  %tmp16197 = getelementptr inbounds float* %tmp16196, i64 1
+  %tmp16198 = getelementptr inbounds float* %tmp16197, i64 1
+  %tmp16199 = getelementptr inbounds float* %tmp16198, i64 1
+  %tmp16200 = getelementptr inbounds float* %tmp16199, i64 1
+  %tmp16201 = getelementptr inbounds float* %tmp16200, i64 1
+  %tmp16202 = getelementptr inbounds float* %tmp16201, i64 1
+  %tmp16203 = getelementptr inbounds float* %tmp16202, i64 1
+  %tmp16204 = getelementptr inbounds float* %tmp16203, i64 1
+  %tmp16205 = getelementptr inbounds float* %tmp16204, i64 1
+  %tmp16206 = getelementptr inbounds float* %tmp16205, i64 1
+  %tmp16207 = getelementptr inbounds float* %tmp16206, i64 1
+  %tmp16208 = getelementptr inbounds float* %tmp16207, i64 1
+  %tmp16209 = getelementptr inbounds float* %tmp16208, i64 1
+  %tmp16210 = getelementptr inbounds float* %tmp16209, i64 1
+  %tmp16211 = getelementptr inbounds float* %tmp16210, i64 1
+  %tmp16212 = getelementptr inbounds float* %tmp16211, i64 1
+  %tmp16213 = getelementptr inbounds float* %tmp16212, i64 1
+  %tmp16214 = getelementptr inbounds float* %tmp16213, i64 1
+  %tmp16215 = getelementptr inbounds float* %tmp16214, i64 1
+  %tmp16216 = getelementptr inbounds float* %tmp16215, i64 1
+  %tmp16217 = getelementptr inbounds float* %tmp16216, i64 1
+  %tmp16218 = getelementptr inbounds float* %tmp16217, i64 1
+  %tmp16219 = getelementptr inbounds float* %tmp16218, i64 1
+  %tmp16220 = getelementptr inbounds float* %tmp16219, i64 1
+  %tmp16221 = getelementptr inbounds float* %tmp16220, i64 1
+  %tmp16222 = getelementptr inbounds float* %tmp16221, i64 1
+  %tmp16223 = getelementptr inbounds float* %tmp16222, i64 1
+  %tmp16224 = getelementptr inbounds float* %tmp16223, i64 1
+  %tmp16225 = getelementptr inbounds float* %tmp16224, i64 1
+  %tmp16226 = getelementptr inbounds float* %tmp16225, i64 1
+  %tmp16227 = getelementptr inbounds float* %tmp16226, i64 1
+  %tmp16228 = getelementptr inbounds float* %tmp16227, i64 1
+  %tmp16229 = getelementptr inbounds float* %tmp16228, i64 1
+  %tmp16230 = getelementptr inbounds float* %tmp16229, i64 1
+  %tmp16231 = getelementptr inbounds float* %tmp16230, i64 1
+  %tmp16232 = getelementptr inbounds float* %tmp16231, i64 1
+  %tmp16233 = getelementptr inbounds float* %tmp16232, i64 1
+  %tmp16234 = getelementptr inbounds float* %tmp16233, i64 1
+  %tmp16235 = getelementptr inbounds float* %tmp16234, i64 1
+  %tmp16236 = getelementptr inbounds float* %tmp16235, i64 1
+  %tmp16237 = getelementptr inbounds float* %tmp16236, i64 1
+  %tmp16238 = getelementptr inbounds float* %tmp16237, i64 1
+  %tmp16239 = getelementptr inbounds float* %tmp16238, i64 1
+  %tmp16240 = getelementptr inbounds float* %tmp16239, i64 1
+  %tmp16241 = getelementptr inbounds float* %tmp16240, i64 1
+  %tmp16242 = getelementptr inbounds float* %tmp16241, i64 1
+  %tmp16243 = getelementptr inbounds float* %tmp16242, i64 1
+  %tmp16244 = getelementptr inbounds float* %tmp16243, i64 1
+  %tmp16245 = getelementptr inbounds float* %tmp16244, i64 1
+  %tmp16246 = getelementptr inbounds float* %tmp16245, i64 1
+  %tmp16247 = getelementptr inbounds float* %tmp16246, i64 1
+  %tmp16248 = getelementptr inbounds float* %tmp16247, i64 1
+  %tmp16249 = getelementptr inbounds float* %tmp16248, i64 1
+  %tmp16250 = getelementptr inbounds float* %tmp16249, i64 1
+  %tmp16251 = getelementptr inbounds float* %tmp16250, i64 1
+  %tmp16252 = getelementptr inbounds float* %tmp16251, i64 1
+  %tmp16253 = getelementptr inbounds float* %tmp16252, i64 1
+  %tmp16254 = getelementptr inbounds float* %tmp16253, i64 1
+  %tmp16255 = getelementptr inbounds float* %tmp16254, i64 1
+  %tmp16256 = getelementptr inbounds float* %tmp16255, i64 1
+  %tmp16257 = getelementptr inbounds float* %tmp16256, i64 1
+  %tmp16258 = getelementptr inbounds float* %tmp16257, i64 1
+  %tmp16259 = getelementptr inbounds float* %tmp16258, i64 1
+  %tmp16260 = getelementptr inbounds float* %tmp16259, i64 1
+  %tmp16261 = getelementptr inbounds float* %tmp16260, i64 1
+  %tmp16262 = getelementptr inbounds float* %tmp16261, i64 1
+  %tmp16263 = getelementptr inbounds float* %tmp16262, i64 1
+  %tmp16264 = getelementptr inbounds float* %tmp16263, i64 1
+  %tmp16265 = getelementptr inbounds float* %tmp16264, i64 1
+  %tmp16266 = getelementptr inbounds float* %tmp16265, i64 1
+  %tmp16267 = getelementptr inbounds float* %tmp16266, i64 1
+  %tmp16268 = getelementptr inbounds float* %tmp16267, i64 1
+  %tmp16269 = getelementptr inbounds float* %tmp16268, i64 1
+  %tmp16270 = getelementptr inbounds float* %tmp16269, i64 1
+  %tmp16271 = getelementptr inbounds float* %tmp16270, i64 1
+  %tmp16272 = getelementptr inbounds float* %tmp16271, i64 1
+  %tmp16273 = getelementptr inbounds float* %tmp16272, i64 1
+  %tmp16274 = getelementptr inbounds float* %tmp16273, i64 1
+  %tmp16275 = getelementptr inbounds float* %tmp16274, i64 1
+  %tmp16276 = getelementptr inbounds float* %tmp16275, i64 1
+  %tmp16277 = getelementptr inbounds float* %tmp16276, i64 1
+  %tmp16278 = getelementptr inbounds float* %tmp16277, i64 1
+  %tmp16279 = getelementptr inbounds float* %tmp16278, i64 1
+  %tmp16280 = getelementptr inbounds float* %tmp16279, i64 1
+  %tmp16281 = getelementptr inbounds float* %tmp16280, i64 1
+  %tmp16282 = getelementptr inbounds float* %tmp16281, i64 1
+  %tmp16283 = getelementptr inbounds float* %tmp16282, i64 1
+  %tmp16284 = getelementptr inbounds float* %tmp16283, i64 1
+  %tmp16285 = getelementptr inbounds float* %tmp16284, i64 1
+  %tmp16286 = getelementptr inbounds float* %tmp16285, i64 1
+  %tmp16287 = getelementptr inbounds float* %tmp16286, i64 1
+  %tmp16288 = getelementptr inbounds float* %tmp16287, i64 1
+  %tmp16289 = getelementptr inbounds float* %tmp16288, i64 1
+  %tmp16290 = getelementptr inbounds float* %tmp16289, i64 1
+  %tmp16291 = getelementptr inbounds float* %tmp16290, i64 1
+  %tmp16292 = getelementptr inbounds float* %tmp16291, i64 1
+  %tmp16293 = getelementptr inbounds float* %tmp16292, i64 1
+  %tmp16294 = getelementptr inbounds float* %tmp16293, i64 1
+  %tmp16295 = getelementptr inbounds float* %tmp16294, i64 1
+  %tmp16296 = getelementptr inbounds float* %tmp16295, i64 1
+  %tmp16297 = getelementptr inbounds float* %tmp16296, i64 1
+  %tmp16298 = getelementptr inbounds float* %tmp16297, i64 1
+  %tmp16299 = getelementptr inbounds float* %tmp16298, i64 1
+  %tmp16300 = getelementptr inbounds float* %tmp16299, i64 1
+  %tmp16301 = getelementptr inbounds float* %tmp16300, i64 1
+  %tmp16302 = getelementptr inbounds float* %tmp16301, i64 1
+  %tmp16303 = getelementptr inbounds float* %tmp16302, i64 1
+  %tmp16304 = getelementptr inbounds float* %tmp16303, i64 1
+  %tmp16305 = getelementptr inbounds float* %tmp16304, i64 1
+  %tmp16306 = getelementptr inbounds float* %tmp16305, i64 1
+  %tmp16307 = getelementptr inbounds float* %tmp16306, i64 1
+  %tmp16308 = getelementptr inbounds float* %tmp16307, i64 1
+  %tmp16309 = getelementptr inbounds float* %tmp16308, i64 1
+  %tmp16310 = getelementptr inbounds float* %tmp16309, i64 1
+  %tmp16311 = getelementptr inbounds float* %tmp16310, i64 1
+  %tmp16312 = getelementptr inbounds float* %tmp16311, i64 1
+  %tmp16313 = getelementptr inbounds float* %tmp16312, i64 1
+  %tmp16314 = getelementptr inbounds float* %tmp16313, i64 1
+  %tmp16315 = getelementptr inbounds float* %tmp16314, i64 1
+  %tmp16316 = getelementptr inbounds float* %tmp16315, i64 1
+  %tmp16317 = getelementptr inbounds float* %tmp16316, i64 1
+  %tmp16318 = getelementptr inbounds float* %tmp16317, i64 1
+  %tmp16319 = getelementptr inbounds float* %tmp16318, i64 1
+  %tmp16320 = getelementptr inbounds float* %tmp16319, i64 1
+  %tmp16321 = getelementptr inbounds float* %tmp16320, i64 1
+  %tmp16322 = getelementptr inbounds float* %tmp16321, i64 1
+  %tmp16323 = getelementptr inbounds float* %tmp16322, i64 1
+  %tmp16324 = getelementptr inbounds float* %tmp16323, i64 1
+  %tmp16325 = getelementptr inbounds float* %tmp16324, i64 1
+  %tmp16326 = getelementptr inbounds float* %tmp16325, i64 1
+  %tmp16327 = getelementptr inbounds float* %tmp16326, i64 1
+  %tmp16328 = getelementptr inbounds float* %tmp16327, i64 1
+  %tmp16329 = getelementptr inbounds float* %tmp16328, i64 1
+  %tmp16330 = getelementptr inbounds float* %tmp16329, i64 1
+  %tmp16331 = getelementptr inbounds float* %tmp16330, i64 1
+  %tmp16332 = getelementptr inbounds float* %tmp16331, i64 1
+  %tmp16333 = getelementptr inbounds float* %tmp16332, i64 1
+  %tmp16334 = getelementptr inbounds float* %tmp16333, i64 1
+  %tmp16335 = getelementptr inbounds float* %tmp16334, i64 1
+  %tmp16336 = getelementptr inbounds float* %tmp16335, i64 1
+  %tmp16337 = getelementptr inbounds float* %tmp16336, i64 1
+  %tmp16338 = getelementptr inbounds float* %tmp16337, i64 1
+  %tmp16339 = getelementptr inbounds float* %tmp16338, i64 1
+  %tmp16340 = getelementptr inbounds float* %tmp16339, i64 1
+  %tmp16341 = getelementptr inbounds float* %tmp16340, i64 1
+  %tmp16342 = getelementptr inbounds float* %tmp16341, i64 1
+  %tmp16343 = getelementptr inbounds float* %tmp16342, i64 1
+  %tmp16344 = getelementptr inbounds float* %tmp16343, i64 1
+  %tmp16345 = getelementptr inbounds float* %tmp16344, i64 1
+  %tmp16346 = getelementptr inbounds float* %tmp16345, i64 1
+  %tmp16347 = getelementptr inbounds float* %tmp16346, i64 1
+  %tmp16348 = getelementptr inbounds float* %tmp16347, i64 1
+  %tmp16349 = getelementptr inbounds float* %tmp16348, i64 1
+  %tmp16350 = getelementptr inbounds float* %tmp16349, i64 1
+  %tmp16351 = getelementptr inbounds float* %tmp16350, i64 1
+  %tmp16352 = getelementptr inbounds float* %tmp16351, i64 1
+  %tmp16353 = getelementptr inbounds float* %tmp16352, i64 1
+  %tmp16354 = getelementptr inbounds float* %tmp16353, i64 1
+  %tmp16355 = getelementptr inbounds float* %tmp16354, i64 1
+  %tmp16356 = getelementptr inbounds float* %tmp16355, i64 1
+  %tmp16357 = getelementptr inbounds float* %tmp16356, i64 1
+  %tmp16358 = getelementptr inbounds float* %tmp16357, i64 1
+  %tmp16359 = getelementptr inbounds float* %tmp16358, i64 1
+  %tmp16360 = getelementptr inbounds float* %tmp16359, i64 1
+  %tmp16361 = getelementptr inbounds float* %tmp16360, i64 1
+  %tmp16362 = getelementptr inbounds float* %tmp16361, i64 1
+  %tmp16363 = getelementptr inbounds float* %tmp16362, i64 1
+  %tmp16364 = getelementptr inbounds float* %tmp16363, i64 1
+  %tmp16365 = getelementptr inbounds float* %tmp16364, i64 1
+  %tmp16366 = getelementptr inbounds float* %tmp16365, i64 1
+  %tmp16367 = getelementptr inbounds float* %tmp16366, i64 1
+  %tmp16368 = getelementptr inbounds float* %tmp16367, i64 1
+  %tmp16369 = getelementptr inbounds float* %tmp16368, i64 1
+  %tmp16370 = getelementptr inbounds float* %tmp16369, i64 1
+  %tmp16371 = getelementptr inbounds float* %tmp16370, i64 1
+  %tmp16372 = getelementptr inbounds float* %tmp16371, i64 1
+  %tmp16373 = getelementptr inbounds float* %tmp16372, i64 1
+  %tmp16374 = getelementptr inbounds float* %tmp16373, i64 1
+  %tmp16375 = getelementptr inbounds float* %tmp16374, i64 1
+  %tmp16376 = getelementptr inbounds float* %tmp16375, i64 1
+  %tmp16377 = getelementptr inbounds float* %tmp16376, i64 1
+  %tmp16378 = getelementptr inbounds float* %tmp16377, i64 1
+  %tmp16379 = getelementptr inbounds float* %tmp16378, i64 1
+  %tmp16380 = getelementptr inbounds float* %tmp16379, i64 1
+  %tmp16381 = getelementptr inbounds float* %tmp16380, i64 1
+  %tmp16382 = getelementptr inbounds float* %tmp16381, i64 1
+  %tmp16383 = getelementptr inbounds float* %tmp16382, i64 1
+  %tmp16384 = getelementptr inbounds float* %tmp16383, i64 1
+  %tmp16385 = getelementptr inbounds float* %tmp16384, i64 1
+  %tmp16386 = getelementptr inbounds float* %tmp16385, i64 1
+  %tmp16387 = getelementptr inbounds float* %tmp16386, i64 1
+  %tmp16388 = getelementptr inbounds float* %tmp16387, i64 1
+  %tmp16389 = getelementptr inbounds float* %tmp16388, i64 1
+  %tmp16390 = getelementptr inbounds float* %tmp16389, i64 1
+  %tmp16391 = getelementptr inbounds float* %tmp16390, i64 1
+  %tmp16392 = getelementptr inbounds float* %tmp16391, i64 1
+  %tmp16393 = getelementptr inbounds float* %tmp16392, i64 1
+  %tmp16394 = getelementptr inbounds float* %tmp16393, i64 1
+  %tmp16395 = getelementptr inbounds float* %tmp16394, i64 1
+  %tmp16396 = getelementptr inbounds float* %tmp16395, i64 1
+  %tmp16397 = getelementptr inbounds float* %tmp16396, i64 1
+  %tmp16398 = getelementptr inbounds float* %tmp16397, i64 1
+  %tmp16399 = getelementptr inbounds float* %tmp16398, i64 1
+  %tmp16400 = getelementptr inbounds float* %tmp16399, i64 1
+  %tmp16401 = getelementptr inbounds float* %tmp16400, i64 1
+  %tmp16402 = getelementptr inbounds float* %tmp16401, i64 1
+  %tmp16403 = getelementptr inbounds float* %tmp16402, i64 1
+  %tmp16404 = getelementptr inbounds float* %tmp16403, i64 1
+  %tmp16405 = getelementptr inbounds float* %tmp16404, i64 1
+  %tmp16406 = getelementptr inbounds float* %tmp16405, i64 1
+  %tmp16407 = getelementptr inbounds float* %tmp16406, i64 1
+  %tmp16408 = getelementptr inbounds float* %tmp16407, i64 1
+  %tmp16409 = getelementptr inbounds float* %tmp16408, i64 1
+  %tmp16410 = getelementptr inbounds float* %tmp16409, i64 1
+  %tmp16411 = getelementptr inbounds float* %tmp16410, i64 1
+  %tmp16412 = getelementptr inbounds float* %tmp16411, i64 1
+  %tmp16413 = getelementptr inbounds float* %tmp16412, i64 1
+  %tmp16414 = getelementptr inbounds float* %tmp16413, i64 1
+  %tmp16415 = getelementptr inbounds float* %tmp16414, i64 1
+  %tmp16416 = getelementptr inbounds float* %tmp16415, i64 1
+  %tmp16417 = getelementptr inbounds float* %tmp16416, i64 1
+  %tmp16418 = getelementptr inbounds float* %tmp16417, i64 1
+  %tmp16419 = getelementptr inbounds float* %tmp16418, i64 1
+  %tmp16420 = getelementptr inbounds float* %tmp16419, i64 1
+  %tmp16421 = getelementptr inbounds float* %tmp16420, i64 1
+  %tmp16422 = getelementptr inbounds float* %tmp16421, i64 1
+  %tmp16423 = getelementptr inbounds float* %tmp16422, i64 1
+  %tmp16424 = getelementptr inbounds float* %tmp16423, i64 1
+  %tmp16425 = getelementptr inbounds float* %tmp16424, i64 1
+  %tmp16426 = getelementptr inbounds float* %tmp16425, i64 1
+  %tmp16427 = getelementptr inbounds float* %tmp16426, i64 1
+  %tmp16428 = getelementptr inbounds float* %tmp16427, i64 1
+  %tmp16429 = getelementptr inbounds float* %tmp16428, i64 1
+  %tmp16430 = getelementptr inbounds float* %tmp16429, i64 1
+  %tmp16431 = getelementptr inbounds float* %tmp16430, i64 1
+  %tmp16432 = getelementptr inbounds float* %tmp16431, i64 1
+  %tmp16433 = getelementptr inbounds float* %tmp16432, i64 1
+  %tmp16434 = getelementptr inbounds float* %tmp16433, i64 1
+  %tmp16435 = getelementptr inbounds float* %tmp16434, i64 1
+  %tmp16436 = getelementptr inbounds float* %tmp16435, i64 1
+  %tmp16437 = getelementptr inbounds float* %tmp16436, i64 1
+  %tmp16438 = getelementptr inbounds float* %tmp16437, i64 1
+  %tmp16439 = getelementptr inbounds float* %tmp16438, i64 1
+  %tmp16440 = getelementptr inbounds float* %tmp16439, i64 1
+  %tmp16441 = getelementptr inbounds float* %tmp16440, i64 1
+  %tmp16442 = getelementptr inbounds float* %tmp16441, i64 1
+  %tmp16443 = getelementptr inbounds float* %tmp16442, i64 1
+  %tmp16444 = getelementptr inbounds float* %tmp16443, i64 1
+  %tmp16445 = getelementptr inbounds float* %tmp16444, i64 1
+  %tmp16446 = getelementptr inbounds float* %tmp16445, i64 1
+  %tmp16447 = getelementptr inbounds float* %tmp16446, i64 1
+  %tmp16448 = getelementptr inbounds float* %tmp16447, i64 1
+  %tmp16449 = getelementptr inbounds float* %tmp16448, i64 1
+  %tmp16450 = getelementptr inbounds float* %tmp16449, i64 1
+  %tmp16451 = getelementptr inbounds float* %tmp16450, i64 1
+  %tmp16452 = getelementptr inbounds float* %tmp16451, i64 1
+  %tmp16453 = getelementptr inbounds float* %tmp16452, i64 1
+  %tmp16454 = getelementptr inbounds float* %tmp16453, i64 1
+  %tmp16455 = getelementptr inbounds float* %tmp16454, i64 1
+  %tmp16456 = getelementptr inbounds float* %tmp16455, i64 1
+  %tmp16457 = getelementptr inbounds float* %tmp16456, i64 1
+  %tmp16458 = getelementptr inbounds float* %tmp16457, i64 1
+  %tmp16459 = getelementptr inbounds float* %tmp16458, i64 1
+  %tmp16460 = getelementptr inbounds float* %tmp16459, i64 1
+  %tmp16461 = getelementptr inbounds float* %tmp16460, i64 1
+  %tmp16462 = getelementptr inbounds float* %tmp16461, i64 1
+  %tmp16463 = getelementptr inbounds float* %tmp16462, i64 1
+  %tmp16464 = getelementptr inbounds float* %tmp16463, i64 1
+  %tmp16465 = getelementptr inbounds float* %tmp16464, i64 1
+  %tmp16466 = getelementptr inbounds float* %tmp16465, i64 1
+  %tmp16467 = getelementptr inbounds float* %tmp16466, i64 1
+  %tmp16468 = getelementptr inbounds float* %tmp16467, i64 1
+  %tmp16469 = getelementptr inbounds float* %tmp16468, i64 1
+  %tmp16470 = getelementptr inbounds float* %tmp16469, i64 1
+  %tmp16471 = getelementptr inbounds float* %tmp16470, i64 1
+  %tmp16472 = getelementptr inbounds float* %tmp16471, i64 1
+  %tmp16473 = getelementptr inbounds float* %tmp16472, i64 1
+  %tmp16474 = getelementptr inbounds float* %tmp16473, i64 1
+  %tmp16475 = getelementptr inbounds float* %tmp16474, i64 1
+  %tmp16476 = getelementptr inbounds float* %tmp16475, i64 1
+  %tmp16477 = getelementptr inbounds float* %tmp16476, i64 1
+  %tmp16478 = getelementptr inbounds float* %tmp16477, i64 1
+  %tmp16479 = getelementptr inbounds float* %tmp16478, i64 1
+  %tmp16480 = getelementptr inbounds float* %tmp16479, i64 1
+  %tmp16481 = getelementptr inbounds float* %tmp16480, i64 1
+  %tmp16482 = getelementptr inbounds float* %tmp16481, i64 1
+  %tmp16483 = getelementptr inbounds float* %tmp16482, i64 1
+  %tmp16484 = getelementptr inbounds float* %tmp16483, i64 1
+  %tmp16485 = getelementptr inbounds float* %tmp16484, i64 1
+  %tmp16486 = getelementptr inbounds float* %tmp16485, i64 1
+  %tmp16487 = getelementptr inbounds float* %tmp16486, i64 1
+  %tmp16488 = getelementptr inbounds float* %tmp16487, i64 1
+  %tmp16489 = getelementptr inbounds float* %tmp16488, i64 1
+  %tmp16490 = getelementptr inbounds float* %tmp16489, i64 1
+  %tmp16491 = getelementptr inbounds float* %tmp16490, i64 1
+  %tmp16492 = getelementptr inbounds float* %tmp16491, i64 1
+  %tmp16493 = getelementptr inbounds float* %tmp16492, i64 1
+  %tmp16494 = getelementptr inbounds float* %tmp16493, i64 1
+  %tmp16495 = getelementptr inbounds float* %tmp16494, i64 1
+  %tmp16496 = getelementptr inbounds float* %tmp16495, i64 1
+  %tmp16497 = getelementptr inbounds float* %tmp16496, i64 1
+  %tmp16498 = getelementptr inbounds float* %tmp16497, i64 1
+  %tmp16499 = getelementptr inbounds float* %tmp16498, i64 1
+  %tmp16500 = getelementptr inbounds float* %tmp16499, i64 1
+  %tmp16501 = getelementptr inbounds float* %tmp16500, i64 1
+  %tmp16502 = getelementptr inbounds float* %tmp16501, i64 1
+  %tmp16503 = getelementptr inbounds float* %tmp16502, i64 1
+  %tmp16504 = getelementptr inbounds float* %tmp16503, i64 1
+  %tmp16505 = getelementptr inbounds float* %tmp16504, i64 1
+  %tmp16506 = getelementptr inbounds float* %tmp16505, i64 1
+  %tmp16507 = getelementptr inbounds float* %tmp16506, i64 1
+  %tmp16508 = getelementptr inbounds float* %tmp16507, i64 1
+  %tmp16509 = getelementptr inbounds float* %tmp16508, i64 1
+  %tmp16510 = getelementptr inbounds float* %tmp16509, i64 1
+  %tmp16511 = getelementptr inbounds float* %tmp16510, i64 1
+  %tmp16512 = getelementptr inbounds float* %tmp16511, i64 1
+  %tmp16513 = getelementptr inbounds float* %tmp16512, i64 1
+  %tmp16514 = getelementptr inbounds float* %tmp16513, i64 1
+  %tmp16515 = getelementptr inbounds float* %tmp16514, i64 1
+  %tmp16516 = getelementptr inbounds float* %tmp16515, i64 1
+  %tmp16517 = getelementptr inbounds float* %tmp16516, i64 1
+  %tmp16518 = getelementptr inbounds float* %tmp16517, i64 1
+  %tmp16519 = getelementptr inbounds float* %tmp16518, i64 1
+  %tmp16520 = getelementptr inbounds float* %tmp16519, i64 1
+  %tmp16521 = getelementptr inbounds float* %tmp16520, i64 1
+  %tmp16522 = getelementptr inbounds float* %tmp16521, i64 1
+  %tmp16523 = getelementptr inbounds float* %tmp16522, i64 1
+  %tmp16524 = getelementptr inbounds float* %tmp16523, i64 1
+  %tmp16525 = getelementptr inbounds float* %tmp16524, i64 1
+  %tmp16526 = getelementptr inbounds float* %tmp16525, i64 1
+  %tmp16527 = getelementptr inbounds float* %tmp16526, i64 1
+  %tmp16528 = getelementptr inbounds float* %tmp16527, i64 1
+  %tmp16529 = getelementptr inbounds float* %tmp16528, i64 1
+  %tmp16530 = getelementptr inbounds float* %tmp16529, i64 1
+  %tmp16531 = getelementptr inbounds float* %tmp16530, i64 1
+  %tmp16532 = getelementptr inbounds float* %tmp16531, i64 1
+  %tmp16533 = getelementptr inbounds float* %tmp16532, i64 1
+  %tmp16534 = getelementptr inbounds float* %tmp16533, i64 1
+  %tmp16535 = getelementptr inbounds float* %tmp16534, i64 1
+  %tmp16536 = getelementptr inbounds float* %tmp16535, i64 1
+  %tmp16537 = getelementptr inbounds float* %tmp16536, i64 1
+  %tmp16538 = getelementptr inbounds float* %tmp16537, i64 1
+  %tmp16539 = getelementptr inbounds float* %tmp16538, i64 1
+  %tmp16540 = getelementptr inbounds float* %tmp16539, i64 1
+  %tmp16541 = getelementptr inbounds float* %tmp16540, i64 1
+  %tmp16542 = getelementptr inbounds float* %tmp16541, i64 1
+  %tmp16543 = getelementptr inbounds float* %tmp16542, i64 1
+  %tmp16544 = getelementptr inbounds float* %tmp16543, i64 1
+  %tmp16545 = getelementptr inbounds float* %tmp16544, i64 1
+  %tmp16546 = getelementptr inbounds float* %tmp16545, i64 1
+  %tmp16547 = getelementptr inbounds float* %tmp16546, i64 1
+  %tmp16548 = getelementptr inbounds float* %tmp16547, i64 1
+  %tmp16549 = getelementptr inbounds float* %tmp16548, i64 1
+  %tmp16550 = getelementptr inbounds float* %tmp16549, i64 1
+  %tmp16551 = getelementptr inbounds float* %tmp16550, i64 1
+  %tmp16552 = getelementptr inbounds float* %tmp16551, i64 1
+  %tmp16553 = getelementptr inbounds float* %tmp16552, i64 1
+  %tmp16554 = getelementptr inbounds float* %tmp16553, i64 1
+  %tmp16555 = getelementptr inbounds float* %tmp16554, i64 1
+  %tmp16556 = getelementptr inbounds float* %tmp16555, i64 1
+  %tmp16557 = getelementptr inbounds float* %tmp16556, i64 1
+  %tmp16558 = getelementptr inbounds float* %tmp16557, i64 1
+  %tmp16559 = getelementptr inbounds float* %tmp16558, i64 1
+  %tmp16560 = getelementptr inbounds float* %tmp16559, i64 1
+  %tmp16561 = getelementptr inbounds float* %tmp16560, i64 1
+  %tmp16562 = getelementptr inbounds float* %tmp16561, i64 1
+  %tmp16563 = getelementptr inbounds float* %tmp16562, i64 1
+  %tmp16564 = getelementptr inbounds float* %tmp16563, i64 1
+  %tmp16565 = getelementptr inbounds float* %tmp16564, i64 1
+  %tmp16566 = getelementptr inbounds float* %tmp16565, i64 1
+  %tmp16567 = getelementptr inbounds float* %tmp16566, i64 1
+  %tmp16568 = getelementptr inbounds float* %tmp16567, i64 1
+  %tmp16569 = getelementptr inbounds float* %tmp16568, i64 1
+  %tmp16570 = getelementptr inbounds float* %tmp16569, i64 1
+  %tmp16571 = getelementptr inbounds float* %tmp16570, i64 1
+  %tmp16572 = getelementptr inbounds float* %tmp16571, i64 1
+  %tmp16573 = getelementptr inbounds float* %tmp16572, i64 1
+  %tmp16574 = getelementptr inbounds float* %tmp16573, i64 1
+  %tmp16575 = getelementptr inbounds float* %tmp16574, i64 1
+  %tmp16576 = getelementptr inbounds float* %tmp16575, i64 1
+  %tmp16577 = getelementptr inbounds float* %tmp16576, i64 1
+  %tmp16578 = getelementptr inbounds float* %tmp16577, i64 1
+  %tmp16579 = getelementptr inbounds float* %tmp16578, i64 1
+  %tmp16580 = getelementptr inbounds float* %tmp16579, i64 1
+  %tmp16581 = getelementptr inbounds float* %tmp16580, i64 1
+  %tmp16582 = getelementptr inbounds float* %tmp16581, i64 1
+  %tmp16583 = getelementptr inbounds float* %tmp16582, i64 1
+  %tmp16584 = getelementptr inbounds float* %tmp16583, i64 1
+  %tmp16585 = getelementptr inbounds float* %tmp16584, i64 1
+  %tmp16586 = getelementptr inbounds float* %tmp16585, i64 1
+  %tmp16587 = getelementptr inbounds float* %tmp16586, i64 1
+  %tmp16588 = getelementptr inbounds float* %tmp16587, i64 1
+  %tmp16589 = getelementptr inbounds float* %tmp16588, i64 1
+  %tmp16590 = getelementptr inbounds float* %tmp16589, i64 1
+  %tmp16591 = getelementptr inbounds float* %tmp16590, i64 1
+  %tmp16592 = getelementptr inbounds float* %tmp16591, i64 1
+  %tmp16593 = getelementptr inbounds float* %tmp16592, i64 1
+  %tmp16594 = getelementptr inbounds float* %tmp16593, i64 1
+  %tmp16595 = getelementptr inbounds float* %tmp16594, i64 1
+  %tmp16596 = getelementptr inbounds float* %tmp16595, i64 1
+  %tmp16597 = getelementptr inbounds float* %tmp16596, i64 1
+  %tmp16598 = getelementptr inbounds float* %tmp16597, i64 1
+  %tmp16599 = getelementptr inbounds float* %tmp16598, i64 1
+  %tmp16600 = getelementptr inbounds float* %tmp16599, i64 1
+  %tmp16601 = getelementptr inbounds float* %tmp16600, i64 1
+  %tmp16602 = getelementptr inbounds float* %tmp16601, i64 1
+  %tmp16603 = getelementptr inbounds float* %tmp16602, i64 1
+  %tmp16604 = getelementptr inbounds float* %tmp16603, i64 1
+  %tmp16605 = getelementptr inbounds float* %tmp16604, i64 1
+  %tmp16606 = getelementptr inbounds float* %tmp16605, i64 1
+  %tmp16607 = getelementptr inbounds float* %tmp16606, i64 1
+  %tmp16608 = getelementptr inbounds float* %tmp16607, i64 1
+  %tmp16609 = getelementptr inbounds float* %tmp16608, i64 1
+  %tmp16610 = getelementptr inbounds float* %tmp16609, i64 1
+  %tmp16611 = getelementptr inbounds float* %tmp16610, i64 1
+  %tmp16612 = getelementptr inbounds float* %tmp16611, i64 1
+  %tmp16613 = getelementptr inbounds float* %tmp16612, i64 1
+  %tmp16614 = getelementptr inbounds float* %tmp16613, i64 1
+  %tmp16615 = getelementptr inbounds float* %tmp16614, i64 1
+  %tmp16616 = getelementptr inbounds float* %tmp16615, i64 1
+  %tmp16617 = getelementptr inbounds float* %tmp16616, i64 1
+  %tmp16618 = getelementptr inbounds float* %tmp16617, i64 1
+  %tmp16619 = getelementptr inbounds float* %tmp16618, i64 1
+  %tmp16620 = getelementptr inbounds float* %tmp16619, i64 1
+  %tmp16621 = getelementptr inbounds float* %tmp16620, i64 1
+  %tmp16622 = getelementptr inbounds float* %tmp16621, i64 1
+  %tmp16623 = getelementptr inbounds float* %tmp16622, i64 1
+  %tmp16624 = getelementptr inbounds float* %tmp16623, i64 1
+  %tmp16625 = getelementptr inbounds float* %tmp16624, i64 1
+  %tmp16626 = getelementptr inbounds float* %tmp16625, i64 1
+  %tmp16627 = getelementptr inbounds float* %tmp16626, i64 1
+  %tmp16628 = getelementptr inbounds float* %tmp16627, i64 1
+  %tmp16629 = getelementptr inbounds float* %tmp16628, i64 1
+  %tmp16630 = getelementptr inbounds float* %tmp16629, i64 1
+  %tmp16631 = getelementptr inbounds float* %tmp16630, i64 1
+  %tmp16632 = getelementptr inbounds float* %tmp16631, i64 1
+  %tmp16633 = getelementptr inbounds float* %tmp16632, i64 1
+  %tmp16634 = getelementptr inbounds float* %tmp16633, i64 1
+  %tmp16635 = getelementptr inbounds float* %tmp16634, i64 1
+  %tmp16636 = getelementptr inbounds float* %tmp16635, i64 1
+  %tmp16637 = getelementptr inbounds float* %tmp16636, i64 1
+  %tmp16638 = getelementptr inbounds float* %tmp16637, i64 1
+  %tmp16639 = getelementptr inbounds float* %tmp16638, i64 1
+  %tmp16640 = getelementptr inbounds float* %tmp16639, i64 1
+  %tmp16641 = getelementptr inbounds float* %tmp16640, i64 1
+  %tmp16642 = getelementptr inbounds float* %tmp16641, i64 1
+  %tmp16643 = getelementptr inbounds float* %tmp16642, i64 1
+  %tmp16644 = getelementptr inbounds float* %tmp16643, i64 1
+  %tmp16645 = getelementptr inbounds float* %tmp16644, i64 1
+  %tmp16646 = getelementptr inbounds float* %tmp16645, i64 1
+  %tmp16647 = getelementptr inbounds float* %tmp16646, i64 1
+  %tmp16648 = getelementptr inbounds float* %tmp16647, i64 1
+  %tmp16649 = getelementptr inbounds float* %tmp16648, i64 1
+  %tmp16650 = getelementptr inbounds float* %tmp16649, i64 1
+  %tmp16651 = getelementptr inbounds float* %tmp16650, i64 1
+  %tmp16652 = getelementptr inbounds float* %tmp16651, i64 1
+  %tmp16653 = getelementptr inbounds float* %tmp16652, i64 1
+  %tmp16654 = getelementptr inbounds float* %tmp16653, i64 1
+  %tmp16655 = getelementptr inbounds float* %tmp16654, i64 1
+  %tmp16656 = getelementptr inbounds float* %tmp16655, i64 1
+  %tmp16657 = getelementptr inbounds float* %tmp16656, i64 1
+  %tmp16658 = getelementptr inbounds float* %tmp16657, i64 1
+  %tmp16659 = getelementptr inbounds float* %tmp16658, i64 1
+  %tmp16660 = getelementptr inbounds float* %tmp16659, i64 1
+  %tmp16661 = getelementptr inbounds float* %tmp16660, i64 1
+  %tmp16662 = getelementptr inbounds float* %tmp16661, i64 1
+  %tmp16663 = getelementptr inbounds float* %tmp16662, i64 1
+  %tmp16664 = getelementptr inbounds float* %tmp16663, i64 1
+  %tmp16665 = getelementptr inbounds float* %tmp16664, i64 1
+  %tmp16666 = getelementptr inbounds float* %tmp16665, i64 1
+  %tmp16667 = getelementptr inbounds float* %tmp16666, i64 1
+  %tmp16668 = getelementptr inbounds float* %tmp16667, i64 1
+  %tmp16669 = getelementptr inbounds float* %tmp16668, i64 1
+  %tmp16670 = getelementptr inbounds float* %tmp16669, i64 1
+  %tmp16671 = getelementptr inbounds float* %tmp16670, i64 1
+  %tmp16672 = getelementptr inbounds float* %tmp16671, i64 1
+  %tmp16673 = getelementptr inbounds float* %tmp16672, i64 1
+  %tmp16674 = getelementptr inbounds float* %tmp16673, i64 1
+  %tmp16675 = getelementptr inbounds float* %tmp16674, i64 1
+  %tmp16676 = getelementptr inbounds float* %tmp16675, i64 1
+  %tmp16677 = getelementptr inbounds float* %tmp16676, i64 1
+  %tmp16678 = getelementptr inbounds float* %tmp16677, i64 1
+  %tmp16679 = getelementptr inbounds float* %tmp16678, i64 1
+  %tmp16680 = getelementptr inbounds float* %tmp16679, i64 1
+  %tmp16681 = getelementptr inbounds float* %tmp16680, i64 1
+  %tmp16682 = getelementptr inbounds float* %tmp16681, i64 1
+  %tmp16683 = getelementptr inbounds float* %tmp16682, i64 1
+  %tmp16684 = getelementptr inbounds float* %tmp16683, i64 1
+  %tmp16685 = getelementptr inbounds float* %tmp16684, i64 1
+  %tmp16686 = getelementptr inbounds float* %tmp16685, i64 1
+  %tmp16687 = getelementptr inbounds float* %tmp16686, i64 1
+  %tmp16688 = getelementptr inbounds float* %tmp16687, i64 1
+  %tmp16689 = getelementptr inbounds float* %tmp16688, i64 1
+  %tmp16690 = getelementptr inbounds float* %tmp16689, i64 1
+  %tmp16691 = getelementptr inbounds float* %tmp16690, i64 1
+  %tmp16692 = getelementptr inbounds float* %tmp16691, i64 1
+  %tmp16693 = getelementptr inbounds float* %tmp16692, i64 1
+  %tmp16694 = getelementptr inbounds float* %tmp16693, i64 1
+  %tmp16695 = getelementptr inbounds float* %tmp16694, i64 1
+  %tmp16696 = getelementptr inbounds float* %tmp16695, i64 1
+  %tmp16697 = getelementptr inbounds float* %tmp16696, i64 1
+  %tmp16698 = getelementptr inbounds float* %tmp16697, i64 1
+  %tmp16699 = getelementptr inbounds float* %tmp16698, i64 1
+  %tmp16700 = getelementptr inbounds float* %tmp16699, i64 1
+  %tmp16701 = getelementptr inbounds float* %tmp16700, i64 1
+  %tmp16702 = getelementptr inbounds float* %tmp16701, i64 1
+  %tmp16703 = getelementptr inbounds float* %tmp16702, i64 1
+  %tmp16704 = getelementptr inbounds float* %tmp16703, i64 1
+  %tmp16705 = getelementptr inbounds float* %tmp16704, i64 1
+  %tmp16706 = getelementptr inbounds float* %tmp16705, i64 1
+  %tmp16707 = getelementptr inbounds float* %tmp16706, i64 1
+  %tmp16708 = getelementptr inbounds float* %tmp16707, i64 1
+  %tmp16709 = getelementptr inbounds float* %tmp16708, i64 1
+  %tmp16710 = getelementptr inbounds float* %tmp16709, i64 1
+  %tmp16711 = getelementptr inbounds float* %tmp16710, i64 1
+  %tmp16712 = getelementptr inbounds float* %tmp16711, i64 1
+  %tmp16713 = getelementptr inbounds float* %tmp16712, i64 1
+  %tmp16714 = getelementptr inbounds float* %tmp16713, i64 1
+  %tmp16715 = getelementptr inbounds float* %tmp16714, i64 1
+  %tmp16716 = getelementptr inbounds float* %tmp16715, i64 1
+  %tmp16717 = getelementptr inbounds float* %tmp16716, i64 1
+  %tmp16718 = getelementptr inbounds float* %tmp16717, i64 1
+  %tmp16719 = getelementptr inbounds float* %tmp16718, i64 1
+  %tmp16720 = getelementptr inbounds float* %tmp16719, i64 1
+  %tmp16721 = getelementptr inbounds float* %tmp16720, i64 1
+  %tmp16722 = getelementptr inbounds float* %tmp16721, i64 1
+  %tmp16723 = getelementptr inbounds float* %tmp16722, i64 1
+  %tmp16724 = getelementptr inbounds float* %tmp16723, i64 1
+  %tmp16725 = getelementptr inbounds float* %tmp16724, i64 1
+  %tmp16726 = getelementptr inbounds float* %tmp16725, i64 1
+  %tmp16727 = getelementptr inbounds float* %tmp16726, i64 1
+  %tmp16728 = getelementptr inbounds float* %tmp16727, i64 1
+  %tmp16729 = getelementptr inbounds float* %tmp16728, i64 1
+  %tmp16730 = getelementptr inbounds float* %tmp16729, i64 1
+  %tmp16731 = getelementptr inbounds float* %tmp16730, i64 1
+  %tmp16732 = getelementptr inbounds float* %tmp16731, i64 1
+  %tmp16733 = getelementptr inbounds float* %tmp16732, i64 1
+  %tmp16734 = getelementptr inbounds float* %tmp16733, i64 1
+  %tmp16735 = getelementptr inbounds float* %tmp16734, i64 1
+  %tmp16736 = getelementptr inbounds float* %tmp16735, i64 1
+  %tmp16737 = getelementptr inbounds float* %tmp16736, i64 1
+  %tmp16738 = getelementptr inbounds float* %tmp16737, i64 1
+  %tmp16739 = getelementptr inbounds float* %tmp16738, i64 1
+  %tmp16740 = getelementptr inbounds float* %tmp16739, i64 1
+  %tmp16741 = getelementptr inbounds float* %tmp16740, i64 1
+  %tmp16742 = getelementptr inbounds float* %tmp16741, i64 1
+  %tmp16743 = getelementptr inbounds float* %tmp16742, i64 1
+  %tmp16744 = getelementptr inbounds float* %tmp16743, i64 1
+  %tmp16745 = getelementptr inbounds float* %tmp16744, i64 1
+  %tmp16746 = getelementptr inbounds float* %tmp16745, i64 1
+  %tmp16747 = getelementptr inbounds float* %tmp16746, i64 1
+  %tmp16748 = getelementptr inbounds float* %tmp16747, i64 1
+  %tmp16749 = getelementptr inbounds float* %tmp16748, i64 1
+  %tmp16750 = getelementptr inbounds float* %tmp16749, i64 1
+  %tmp16751 = getelementptr inbounds float* %tmp16750, i64 1
+  %tmp16752 = getelementptr inbounds float* %tmp16751, i64 1
+  %tmp16753 = getelementptr inbounds float* %tmp16752, i64 1
+  %tmp16754 = getelementptr inbounds float* %tmp16753, i64 1
+  %tmp16755 = getelementptr inbounds float* %tmp16754, i64 1
+  %tmp16756 = getelementptr inbounds float* %tmp16755, i64 1
+  %tmp16757 = getelementptr inbounds float* %tmp16756, i64 1
+  %tmp16758 = getelementptr inbounds float* %tmp16757, i64 1
+  %tmp16759 = getelementptr inbounds float* %tmp16758, i64 1
+  %tmp16760 = getelementptr inbounds float* %tmp16759, i64 1
+  %tmp16761 = getelementptr inbounds float* %tmp16760, i64 1
+  %tmp16762 = getelementptr inbounds float* %tmp16761, i64 1
+  %tmp16763 = getelementptr inbounds float* %tmp16762, i64 1
+  %tmp16764 = getelementptr inbounds float* %tmp16763, i64 1
+  %tmp16765 = getelementptr inbounds float* %tmp16764, i64 1
+  %tmp16766 = getelementptr inbounds float* %tmp16765, i64 1
+  %tmp16767 = getelementptr inbounds float* %tmp16766, i64 1
+  %tmp16768 = getelementptr inbounds float* %tmp16767, i64 1
+  %tmp16769 = getelementptr inbounds float* %tmp16768, i64 1
+  %tmp16770 = getelementptr inbounds float* %tmp16769, i64 1
+  %tmp16771 = getelementptr inbounds float* %tmp16770, i64 1
+  %tmp16772 = getelementptr inbounds float* %tmp16771, i64 1
+  %tmp16773 = getelementptr inbounds float* %tmp16772, i64 1
+  %tmp16774 = getelementptr inbounds float* %tmp16773, i64 1
+  %tmp16775 = getelementptr inbounds float* %tmp16774, i64 1
+  %tmp16776 = getelementptr inbounds float* %tmp16775, i64 1
+  %tmp16777 = getelementptr inbounds float* %tmp16776, i64 1
+  %tmp16778 = getelementptr inbounds float* %tmp16777, i64 1
+  %tmp16779 = getelementptr inbounds float* %tmp16778, i64 1
+  %tmp16780 = getelementptr inbounds float* %tmp16779, i64 1
+  %tmp16781 = getelementptr inbounds float* %tmp16780, i64 1
+  %tmp16782 = getelementptr inbounds float* %tmp16781, i64 1
+  %tmp16783 = getelementptr inbounds float* %tmp16782, i64 1
+  %tmp16784 = getelementptr inbounds float* %tmp16783, i64 1
+  %tmp16785 = getelementptr inbounds float* %tmp16784, i64 1
+  %tmp16786 = getelementptr inbounds float* %tmp16785, i64 1
+  %tmp16787 = getelementptr inbounds float* %tmp16786, i64 1
+  %tmp16788 = getelementptr inbounds float* %tmp16787, i64 1
+  %tmp16789 = getelementptr inbounds float* %tmp16788, i64 1
+  %tmp16790 = getelementptr inbounds float* %tmp16789, i64 1
+  %tmp16791 = getelementptr inbounds float* %tmp16790, i64 1
+  %tmp16792 = getelementptr inbounds float* %tmp16791, i64 1
+  %tmp16793 = getelementptr inbounds float* %tmp16792, i64 1
+  %tmp16794 = getelementptr inbounds float* %tmp16793, i64 1
+  %tmp16795 = getelementptr inbounds float* %tmp16794, i64 1
+  %tmp16796 = getelementptr inbounds float* %tmp16795, i64 1
+  %tmp16797 = getelementptr inbounds float* %tmp16796, i64 1
+  %tmp16798 = getelementptr inbounds float* %tmp16797, i64 1
+  %tmp16799 = getelementptr inbounds float* %tmp16798, i64 1
+  %tmp16800 = getelementptr inbounds float* %tmp16799, i64 1
+  %tmp16801 = getelementptr inbounds float* %tmp16800, i64 1
+  %tmp16802 = getelementptr inbounds float* %tmp16801, i64 1
+  %tmp16803 = getelementptr inbounds float* %tmp16802, i64 1
+  %tmp16804 = getelementptr inbounds float* %tmp16803, i64 1
+  %tmp16805 = getelementptr inbounds float* %tmp16804, i64 1
+  %tmp16806 = getelementptr inbounds float* %tmp16805, i64 1
+  %tmp16807 = getelementptr inbounds float* %tmp16806, i64 1
+  %tmp16808 = getelementptr inbounds float* %tmp16807, i64 1
+  %tmp16809 = getelementptr inbounds float* %tmp16808, i64 1
+  %tmp16810 = getelementptr inbounds float* %tmp16809, i64 1
+  %tmp16811 = getelementptr inbounds float* %tmp16810, i64 1
+  %tmp16812 = getelementptr inbounds float* %tmp16811, i64 1
+  %tmp16813 = getelementptr inbounds float* %tmp16812, i64 1
+  %tmp16814 = getelementptr inbounds float* %tmp16813, i64 1
+  %tmp16815 = getelementptr inbounds float* %tmp16814, i64 1
+  %tmp16816 = getelementptr inbounds float* %tmp16815, i64 1
+  %tmp16817 = getelementptr inbounds float* %tmp16816, i64 1
+  %tmp16818 = getelementptr inbounds float* %tmp16817, i64 1
+  %tmp16819 = getelementptr inbounds float* %tmp16818, i64 1
+  %tmp16820 = getelementptr inbounds float* %tmp16819, i64 1
+  %tmp16821 = getelementptr inbounds float* %tmp16820, i64 1
+  %tmp16822 = getelementptr inbounds float* %tmp16821, i64 1
+  %tmp16823 = getelementptr inbounds float* %tmp16822, i64 1
+  %tmp16824 = getelementptr inbounds float* %tmp16823, i64 1
+  %tmp16825 = getelementptr inbounds float* %tmp16824, i64 1
+  %tmp16826 = getelementptr inbounds float* %tmp16825, i64 1
+  %tmp16827 = getelementptr inbounds float* %tmp16826, i64 1
+  %tmp16828 = getelementptr inbounds float* %tmp16827, i64 1
+  %tmp16829 = getelementptr inbounds float* %tmp16828, i64 1
+  %tmp16830 = getelementptr inbounds float* %tmp16829, i64 1
+  %tmp16831 = getelementptr inbounds float* %tmp16830, i64 1
+  %tmp16832 = getelementptr inbounds float* %tmp16831, i64 1
+  %tmp16833 = getelementptr inbounds float* %tmp16832, i64 1
+  %tmp16834 = getelementptr inbounds float* %tmp16833, i64 1
+  %tmp16835 = getelementptr inbounds float* %tmp16834, i64 1
+  %tmp16836 = getelementptr inbounds float* %tmp16835, i64 1
+  %tmp16837 = getelementptr inbounds float* %tmp16836, i64 1
+  %tmp16838 = getelementptr inbounds float* %tmp16837, i64 1
+  %tmp16839 = getelementptr inbounds float* %tmp16838, i64 1
+  %tmp16840 = getelementptr inbounds float* %tmp16839, i64 1
+  %tmp16841 = getelementptr inbounds float* %tmp16840, i64 1
+  %tmp16842 = getelementptr inbounds float* %tmp16841, i64 1
+  %tmp16843 = getelementptr inbounds float* %tmp16842, i64 1
+  %tmp16844 = getelementptr inbounds float* %tmp16843, i64 1
+  %tmp16845 = getelementptr inbounds float* %tmp16844, i64 1
+  %tmp16846 = getelementptr inbounds float* %tmp16845, i64 1
+  %tmp16847 = getelementptr inbounds float* %tmp16846, i64 1
+  %tmp16848 = getelementptr inbounds float* %tmp16847, i64 1
+  %tmp16849 = getelementptr inbounds float* %tmp16848, i64 1
+  %tmp16850 = getelementptr inbounds float* %tmp16849, i64 1
+  %tmp16851 = getelementptr inbounds float* %tmp16850, i64 1
+  %tmp16852 = getelementptr inbounds float* %tmp16851, i64 1
+  %tmp16853 = getelementptr inbounds float* %tmp16852, i64 1
+  %tmp16854 = getelementptr inbounds float* %tmp16853, i64 1
+  %tmp16855 = getelementptr inbounds float* %tmp16854, i64 1
+  %tmp16856 = getelementptr inbounds float* %tmp16855, i64 1
+  %tmp16857 = getelementptr inbounds float* %tmp16856, i64 1
+  %tmp16858 = getelementptr inbounds float* %tmp16857, i64 1
+  %tmp16859 = getelementptr inbounds float* %tmp16858, i64 1
+  %tmp16860 = getelementptr inbounds float* %tmp16859, i64 1
+  %tmp16861 = getelementptr inbounds float* %tmp16860, i64 1
+  %tmp16862 = getelementptr inbounds float* %tmp16861, i64 1
+  %tmp16863 = getelementptr inbounds float* %tmp16862, i64 1
+  %tmp16864 = getelementptr inbounds float* %tmp16863, i64 1
+  %tmp16865 = getelementptr inbounds float* %tmp16864, i64 1
+  %tmp16866 = getelementptr inbounds float* %tmp16865, i64 1
+  %tmp16867 = getelementptr inbounds float* %tmp16866, i64 1
+  %tmp16868 = getelementptr inbounds float* %tmp16867, i64 1
+  %tmp16869 = getelementptr inbounds float* %tmp16868, i64 1
+  %tmp16870 = getelementptr inbounds float* %tmp16869, i64 1
+  %tmp16871 = getelementptr inbounds float* %tmp16870, i64 1
+  %tmp16872 = getelementptr inbounds float* %tmp16871, i64 1
+  %tmp16873 = getelementptr inbounds float* %tmp16872, i64 1
+  %tmp16874 = getelementptr inbounds float* %tmp16873, i64 1
+  %tmp16875 = getelementptr inbounds float* %tmp16874, i64 1
+  %tmp16876 = getelementptr inbounds float* %tmp16875, i64 1
+  %tmp16877 = getelementptr inbounds float* %tmp16876, i64 1
+  %tmp16878 = getelementptr inbounds float* %tmp16877, i64 1
+  %tmp16879 = getelementptr inbounds float* %tmp16878, i64 1
+  %tmp16880 = getelementptr inbounds float* %tmp16879, i64 1
+  %tmp16881 = getelementptr inbounds float* %tmp16880, i64 1
+  %tmp16882 = getelementptr inbounds float* %tmp16881, i64 1
+  %tmp16883 = getelementptr inbounds float* %tmp16882, i64 1
+  %tmp16884 = getelementptr inbounds float* %tmp16883, i64 1
+  %tmp16885 = getelementptr inbounds float* %tmp16884, i64 1
+  %tmp16886 = getelementptr inbounds float* %tmp16885, i64 1
+  %tmp16887 = getelementptr inbounds float* %tmp16886, i64 1
+  %tmp16888 = getelementptr inbounds float* %tmp16887, i64 1
+  %tmp16889 = getelementptr inbounds float* %tmp16888, i64 1
+  %tmp16890 = getelementptr inbounds float* %tmp16889, i64 1
+  %tmp16891 = getelementptr inbounds float* %tmp16890, i64 1
+  %tmp16892 = getelementptr inbounds float* %tmp16891, i64 1
+  %tmp16893 = getelementptr inbounds float* %tmp16892, i64 1
+  %tmp16894 = getelementptr inbounds float* %tmp16893, i64 1
+  %tmp16895 = getelementptr inbounds float* %tmp16894, i64 1
+  %tmp16896 = getelementptr inbounds float* %tmp16895, i64 1
+  %tmp16897 = getelementptr inbounds float* %tmp16896, i64 1
+  %tmp16898 = getelementptr inbounds float* %tmp16897, i64 1
+  %tmp16899 = getelementptr inbounds float* %tmp16898, i64 1
+  %tmp16900 = getelementptr inbounds float* %tmp16899, i64 1
+  %tmp16901 = getelementptr inbounds float* %tmp16900, i64 1
+  %tmp16902 = getelementptr inbounds float* %tmp16901, i64 1
+  %tmp16903 = getelementptr inbounds float* %tmp16902, i64 1
+  %tmp16904 = getelementptr inbounds float* %tmp16903, i64 1
+  %tmp16905 = getelementptr inbounds float* %tmp16904, i64 1
+  %tmp16906 = getelementptr inbounds float* %tmp16905, i64 1
+  %tmp16907 = getelementptr inbounds float* %tmp16906, i64 1
+  %tmp16908 = getelementptr inbounds float* %tmp16907, i64 1
+  %tmp16909 = getelementptr inbounds float* %tmp16908, i64 1
+  %tmp16910 = getelementptr inbounds float* %tmp16909, i64 1
+  %tmp16911 = getelementptr inbounds float* %tmp16910, i64 1
+  %tmp16912 = getelementptr inbounds float* %tmp16911, i64 1
+  %tmp16913 = getelementptr inbounds float* %tmp16912, i64 1
+  %tmp16914 = getelementptr inbounds float* %tmp16913, i64 1
+  %tmp16915 = getelementptr inbounds float* %tmp16914, i64 1
+  %tmp16916 = getelementptr inbounds float* %tmp16915, i64 1
+  %tmp16917 = getelementptr inbounds float* %tmp16916, i64 1
+  %tmp16918 = getelementptr inbounds float* %tmp16917, i64 1
+  %tmp16919 = getelementptr inbounds float* %tmp16918, i64 1
+  %tmp16920 = getelementptr inbounds float* %tmp16919, i64 1
+  %tmp16921 = getelementptr inbounds float* %tmp16920, i64 1
+  %tmp16922 = getelementptr inbounds float* %tmp16921, i64 1
+  %tmp16923 = getelementptr inbounds float* %tmp16922, i64 1
+  %tmp16924 = getelementptr inbounds float* %tmp16923, i64 1
+  %tmp16925 = getelementptr inbounds float* %tmp16924, i64 1
+  %tmp16926 = getelementptr inbounds float* %tmp16925, i64 1
+  %tmp16927 = getelementptr inbounds float* %tmp16926, i64 1
+  %tmp16928 = getelementptr inbounds float* %tmp16927, i64 1
+  %tmp16929 = getelementptr inbounds float* %tmp16928, i64 1
+  %tmp16930 = getelementptr inbounds float* %tmp16929, i64 1
+  %tmp16931 = getelementptr inbounds float* %tmp16930, i64 1
+  %tmp16932 = getelementptr inbounds float* %tmp16931, i64 1
+  %tmp16933 = getelementptr inbounds float* %tmp16932, i64 1
+  %tmp16934 = getelementptr inbounds float* %tmp16933, i64 1
+  %tmp16935 = getelementptr inbounds float* %tmp16934, i64 1
+  %tmp16936 = getelementptr inbounds float* %tmp16935, i64 1
+  %tmp16937 = getelementptr inbounds float* %tmp16936, i64 1
+  %tmp16938 = getelementptr inbounds float* %tmp16937, i64 1
+  %tmp16939 = getelementptr inbounds float* %tmp16938, i64 1
+  %tmp16940 = getelementptr inbounds float* %tmp16939, i64 1
+  %tmp16941 = getelementptr inbounds float* %tmp16940, i64 1
+  %tmp16942 = getelementptr inbounds float* %tmp16941, i64 1
+  %tmp16943 = getelementptr inbounds float* %tmp16942, i64 1
+  %tmp16944 = getelementptr inbounds float* %tmp16943, i64 1
+  %tmp16945 = getelementptr inbounds float* %tmp16944, i64 1
+  %tmp16946 = getelementptr inbounds float* %tmp16945, i64 1
+  %tmp16947 = getelementptr inbounds float* %tmp16946, i64 1
+  %tmp16948 = getelementptr inbounds float* %tmp16947, i64 1
+  %tmp16949 = getelementptr inbounds float* %tmp16948, i64 1
+  %tmp16950 = getelementptr inbounds float* %tmp16949, i64 1
+  %tmp16951 = getelementptr inbounds float* %tmp16950, i64 1
+  %tmp16952 = getelementptr inbounds float* %tmp16951, i64 1
+  %tmp16953 = getelementptr inbounds float* %tmp16952, i64 1
+  %tmp16954 = getelementptr inbounds float* %tmp16953, i64 1
+  %tmp16955 = getelementptr inbounds float* %tmp16954, i64 1
+  %tmp16956 = getelementptr inbounds float* %tmp16955, i64 1
+  %tmp16957 = getelementptr inbounds float* %tmp16956, i64 1
+  %tmp16958 = getelementptr inbounds float* %tmp16957, i64 1
+  %tmp16959 = getelementptr inbounds float* %tmp16958, i64 1
+  %tmp16960 = getelementptr inbounds float* %tmp16959, i64 1
+  %tmp16961 = getelementptr inbounds float* %tmp16960, i64 1
+  %tmp16962 = getelementptr inbounds float* %tmp16961, i64 1
+  %tmp16963 = getelementptr inbounds float* %tmp16962, i64 1
+  %tmp16964 = getelementptr inbounds float* %tmp16963, i64 1
+  %tmp16965 = getelementptr inbounds float* %tmp16964, i64 1
+  %tmp16966 = getelementptr inbounds float* %tmp16965, i64 1
+  %tmp16967 = getelementptr inbounds float* %tmp16966, i64 1
+  %tmp16968 = getelementptr inbounds float* %tmp16967, i64 1
+  %tmp16969 = getelementptr inbounds float* %tmp16968, i64 1
+  %tmp16970 = getelementptr inbounds float* %tmp16969, i64 1
+  %tmp16971 = getelementptr inbounds float* %tmp16970, i64 1
+  %tmp16972 = getelementptr inbounds float* %tmp16971, i64 1
+  %tmp16973 = getelementptr inbounds float* %tmp16972, i64 1
+  %tmp16974 = getelementptr inbounds float* %tmp16973, i64 1
+  %tmp16975 = getelementptr inbounds float* %tmp16974, i64 1
+  %tmp16976 = getelementptr inbounds float* %tmp16975, i64 1
+  %tmp16977 = getelementptr inbounds float* %tmp16976, i64 1
+  %tmp16978 = getelementptr inbounds float* %tmp16977, i64 1
+  %tmp16979 = getelementptr inbounds float* %tmp16978, i64 1
+  %tmp16980 = getelementptr inbounds float* %tmp16979, i64 1
+  %tmp16981 = getelementptr inbounds float* %tmp16980, i64 1
+  %tmp16982 = getelementptr inbounds float* %tmp16981, i64 1
+  %tmp16983 = getelementptr inbounds float* %tmp16982, i64 1
+  %tmp16984 = getelementptr inbounds float* %tmp16983, i64 1
+  %tmp16985 = getelementptr inbounds float* %tmp16984, i64 1
+  %tmp16986 = getelementptr inbounds float* %tmp16985, i64 1
+  %tmp16987 = getelementptr inbounds float* %tmp16986, i64 1
+  %tmp16988 = getelementptr inbounds float* %tmp16987, i64 1
+  %tmp16989 = getelementptr inbounds float* %tmp16988, i64 1
+  %tmp16990 = getelementptr inbounds float* %tmp16989, i64 1
+  %tmp16991 = getelementptr inbounds float* %tmp16990, i64 1
+  %tmp16992 = getelementptr inbounds float* %tmp16991, i64 1
+  %tmp16993 = getelementptr inbounds float* %tmp16992, i64 1
+  %tmp16994 = getelementptr inbounds float* %tmp16993, i64 1
+  %tmp16995 = getelementptr inbounds float* %tmp16994, i64 1
+  %tmp16996 = getelementptr inbounds float* %tmp16995, i64 1
+  %tmp16997 = getelementptr inbounds float* %tmp16996, i64 1
+  %tmp16998 = getelementptr inbounds float* %tmp16997, i64 1
+  %tmp16999 = getelementptr inbounds float* %tmp16998, i64 1
+  %tmp17000 = getelementptr inbounds float* %tmp16999, i64 1
+  %tmp17001 = getelementptr inbounds float* %tmp17000, i64 1
+  %tmp17002 = getelementptr inbounds float* %tmp17001, i64 1
+  %tmp17003 = getelementptr inbounds float* %tmp17002, i64 1
+  %tmp17004 = getelementptr inbounds float* %tmp17003, i64 1
+  %tmp17005 = getelementptr inbounds float* %tmp17004, i64 1
+  %tmp17006 = getelementptr inbounds float* %tmp17005, i64 1
+  %tmp17007 = getelementptr inbounds float* %tmp17006, i64 1
+  %tmp17008 = getelementptr inbounds float* %tmp17007, i64 1
+  %tmp17009 = getelementptr inbounds float* %tmp17008, i64 1
+  %tmp17010 = getelementptr inbounds float* %tmp17009, i64 1
+  %tmp17011 = getelementptr inbounds float* %tmp17010, i64 1
+  %tmp17012 = getelementptr inbounds float* %tmp17011, i64 1
+  %tmp17013 = getelementptr inbounds float* %tmp17012, i64 1
+  %tmp17014 = getelementptr inbounds float* %tmp17013, i64 1
+  %tmp17015 = getelementptr inbounds float* %tmp17014, i64 1
+  %tmp17016 = getelementptr inbounds float* %tmp17015, i64 1
+  %tmp17017 = getelementptr inbounds float* %tmp17016, i64 1
+  %tmp17018 = getelementptr inbounds float* %tmp17017, i64 1
+  %tmp17019 = getelementptr inbounds float* %tmp17018, i64 1
+  %tmp17020 = getelementptr inbounds float* %tmp17019, i64 1
+  %tmp17021 = getelementptr inbounds float* %tmp17020, i64 1
+  %tmp17022 = getelementptr inbounds float* %tmp17021, i64 1
+  %tmp17023 = getelementptr inbounds float* %tmp17022, i64 1
+  %tmp17024 = getelementptr inbounds float* %tmp17023, i64 1
+  %tmp17025 = getelementptr inbounds float* %tmp17024, i64 1
+  %tmp17026 = getelementptr inbounds float* %tmp17025, i64 1
+  %tmp17027 = getelementptr inbounds float* %tmp17026, i64 1
+  %tmp17028 = getelementptr inbounds float* %tmp17027, i64 1
+  %tmp17029 = getelementptr inbounds float* %tmp17028, i64 1
+  %tmp17030 = getelementptr inbounds float* %tmp17029, i64 1
+  %tmp17031 = getelementptr inbounds float* %tmp17030, i64 1
+  %tmp17032 = getelementptr inbounds float* %tmp17031, i64 1
+  %tmp17033 = getelementptr inbounds float* %tmp17032, i64 1
+  %tmp17034 = getelementptr inbounds float* %tmp17033, i64 1
+  %tmp17035 = getelementptr inbounds float* %tmp17034, i64 1
+  %tmp17036 = getelementptr inbounds float* %tmp17035, i64 1
+  %tmp17037 = getelementptr inbounds float* %tmp17036, i64 1
+  %tmp17038 = getelementptr inbounds float* %tmp17037, i64 1
+  %tmp17039 = getelementptr inbounds float* %tmp17038, i64 1
+  %tmp17040 = getelementptr inbounds float* %tmp17039, i64 1
+  %tmp17041 = getelementptr inbounds float* %tmp17040, i64 1
+  %tmp17042 = getelementptr inbounds float* %tmp17041, i64 1
+  %tmp17043 = getelementptr inbounds float* %tmp17042, i64 1
+  %tmp17044 = getelementptr inbounds float* %tmp17043, i64 1
+  %tmp17045 = getelementptr inbounds float* %tmp17044, i64 1
+  %tmp17046 = getelementptr inbounds float* %tmp17045, i64 1
+  %tmp17047 = getelementptr inbounds float* %tmp17046, i64 1
+  %tmp17048 = getelementptr inbounds float* %tmp17047, i64 1
+  %tmp17049 = getelementptr inbounds float* %tmp17048, i64 1
+  %tmp17050 = getelementptr inbounds float* %tmp17049, i64 1
+  %tmp17051 = getelementptr inbounds float* %tmp17050, i64 1
+  %tmp17052 = getelementptr inbounds float* %tmp17051, i64 1
+  %tmp17053 = getelementptr inbounds float* %tmp17052, i64 1
+  %tmp17054 = getelementptr inbounds float* %tmp17053, i64 1
+  %tmp17055 = getelementptr inbounds float* %tmp17054, i64 1
+  %tmp17056 = getelementptr inbounds float* %tmp17055, i64 1
+  %tmp17057 = getelementptr inbounds float* %tmp17056, i64 1
+  %tmp17058 = getelementptr inbounds float* %tmp17057, i64 1
+  %tmp17059 = getelementptr inbounds float* %tmp17058, i64 1
+  %tmp17060 = getelementptr inbounds float* %tmp17059, i64 1
+  %tmp17061 = getelementptr inbounds float* %tmp17060, i64 1
+  %tmp17062 = getelementptr inbounds float* %tmp17061, i64 1
+  %tmp17063 = getelementptr inbounds float* %tmp17062, i64 1
+  %tmp17064 = getelementptr inbounds float* %tmp17063, i64 1
+  %tmp17065 = getelementptr inbounds float* %tmp17064, i64 1
+  %tmp17066 = getelementptr inbounds float* %tmp17065, i64 1
+  %tmp17067 = getelementptr inbounds float* %tmp17066, i64 1
+  %tmp17068 = getelementptr inbounds float* %tmp17067, i64 1
+  %tmp17069 = getelementptr inbounds float* %tmp17068, i64 1
+  %tmp17070 = getelementptr inbounds float* %tmp17069, i64 1
+  %tmp17071 = getelementptr inbounds float* %tmp17070, i64 1
+  %tmp17072 = getelementptr inbounds float* %tmp17071, i64 1
+  %tmp17073 = getelementptr inbounds float* %tmp17072, i64 1
+  %tmp17074 = getelementptr inbounds float* %tmp17073, i64 1
+  %tmp17075 = getelementptr inbounds float* %tmp17074, i64 1
+  %tmp17076 = getelementptr inbounds float* %tmp17075, i64 1
+  %tmp17077 = getelementptr inbounds float* %tmp17076, i64 1
+  %tmp17078 = getelementptr inbounds float* %tmp17077, i64 1
+  %tmp17079 = getelementptr inbounds float* %tmp17078, i64 1
+  %tmp17080 = getelementptr inbounds float* %tmp17079, i64 1
+  %tmp17081 = getelementptr inbounds float* %tmp17080, i64 1
+  %tmp17082 = getelementptr inbounds float* %tmp17081, i64 1
+  %tmp17083 = getelementptr inbounds float* %tmp17082, i64 1
+  %tmp17084 = getelementptr inbounds float* %tmp17083, i64 1
+  %tmp17085 = getelementptr inbounds float* %tmp17084, i64 1
+  %tmp17086 = getelementptr inbounds float* %tmp17085, i64 1
+  %tmp17087 = getelementptr inbounds float* %tmp17086, i64 1
+  %tmp17088 = getelementptr inbounds float* %tmp17087, i64 1
+  %tmp17089 = getelementptr inbounds float* %tmp17088, i64 1
+  %tmp17090 = getelementptr inbounds float* %tmp17089, i64 1
+  %tmp17091 = getelementptr inbounds float* %tmp17090, i64 1
+  %tmp17092 = getelementptr inbounds float* %tmp17091, i64 1
+  %tmp17093 = getelementptr inbounds float* %tmp17092, i64 1
+  %tmp17094 = getelementptr inbounds float* %tmp17093, i64 1
+  %tmp17095 = getelementptr inbounds float* %tmp17094, i64 1
+  %tmp17096 = getelementptr inbounds float* %tmp17095, i64 1
+  %tmp17097 = getelementptr inbounds float* %tmp17096, i64 1
+  %tmp17098 = getelementptr inbounds float* %tmp17097, i64 1
+  %tmp17099 = getelementptr inbounds float* %tmp17098, i64 1
+  %tmp17100 = getelementptr inbounds float* %tmp17099, i64 1
+  %tmp17101 = getelementptr inbounds float* %tmp17100, i64 1
+  %tmp17102 = getelementptr inbounds float* %tmp17101, i64 1
+  %tmp17103 = getelementptr inbounds float* %tmp17102, i64 1
+  %tmp17104 = getelementptr inbounds float* %tmp17103, i64 1
+  %tmp17105 = getelementptr inbounds float* %tmp17104, i64 1
+  %tmp17106 = getelementptr inbounds float* %tmp17105, i64 1
+  %tmp17107 = getelementptr inbounds float* %tmp17106, i64 1
+  %tmp17108 = getelementptr inbounds float* %tmp17107, i64 1
+  %tmp17109 = getelementptr inbounds float* %tmp17108, i64 1
+  %tmp17110 = getelementptr inbounds float* %tmp17109, i64 1
+  %tmp17111 = getelementptr inbounds float* %tmp17110, i64 1
+  %tmp17112 = getelementptr inbounds float* %tmp17111, i64 1
+  %tmp17113 = getelementptr inbounds float* %tmp17112, i64 1
+  %tmp17114 = getelementptr inbounds float* %tmp17113, i64 1
+  %tmp17115 = getelementptr inbounds float* %tmp17114, i64 1
+  %tmp17116 = getelementptr inbounds float* %tmp17115, i64 1
+  %tmp17117 = getelementptr inbounds float* %tmp17116, i64 1
+  %tmp17118 = getelementptr inbounds float* %tmp17117, i64 1
+  %tmp17119 = getelementptr inbounds float* %tmp17118, i64 1
+  %tmp17120 = getelementptr inbounds float* %tmp17119, i64 1
+  %tmp17121 = getelementptr inbounds float* %tmp17120, i64 1
+  %tmp17122 = getelementptr inbounds float* %tmp17121, i64 1
+  %tmp17123 = getelementptr inbounds float* %tmp17122, i64 1
+  %tmp17124 = getelementptr inbounds float* %tmp17123, i64 1
+  %tmp17125 = getelementptr inbounds float* %tmp17124, i64 1
+  %tmp17126 = getelementptr inbounds float* %tmp17125, i64 1
+  %tmp17127 = getelementptr inbounds float* %tmp17126, i64 1
+  %tmp17128 = getelementptr inbounds float* %tmp17127, i64 1
+  %tmp17129 = getelementptr inbounds float* %tmp17128, i64 1
+  %tmp17130 = getelementptr inbounds float* %tmp17129, i64 1
+  %tmp17131 = getelementptr inbounds float* %tmp17130, i64 1
+  %tmp17132 = getelementptr inbounds float* %tmp17131, i64 1
+  %tmp17133 = getelementptr inbounds float* %tmp17132, i64 1
+  %tmp17134 = getelementptr inbounds float* %tmp17133, i64 1
+  %tmp17135 = getelementptr inbounds float* %tmp17134, i64 1
+  %tmp17136 = getelementptr inbounds float* %tmp17135, i64 1
+  %tmp17137 = getelementptr inbounds float* %tmp17136, i64 1
+  %tmp17138 = getelementptr inbounds float* %tmp17137, i64 1
+  %tmp17139 = getelementptr inbounds float* %tmp17138, i64 1
+  %tmp17140 = getelementptr inbounds float* %tmp17139, i64 1
+  %tmp17141 = getelementptr inbounds float* %tmp17140, i64 1
+  %tmp17142 = getelementptr inbounds float* %tmp17141, i64 1
+  %tmp17143 = getelementptr inbounds float* %tmp17142, i64 1
+  %tmp17144 = getelementptr inbounds float* %tmp17143, i64 1
+  %tmp17145 = getelementptr inbounds float* %tmp17144, i64 1
+  %tmp17146 = getelementptr inbounds float* %tmp17145, i64 1
+  %tmp17147 = getelementptr inbounds float* %tmp17146, i64 1
+  %tmp17148 = getelementptr inbounds float* %tmp17147, i64 1
+  %tmp17149 = getelementptr inbounds float* %tmp17148, i64 1
+  %tmp17150 = getelementptr inbounds float* %tmp17149, i64 1
+  %tmp17151 = getelementptr inbounds float* %tmp17150, i64 1
+  %tmp17152 = getelementptr inbounds float* %tmp17151, i64 1
+  %tmp17153 = getelementptr inbounds float* %tmp17152, i64 1
+  %tmp17154 = getelementptr inbounds float* %tmp17153, i64 1
+  %tmp17155 = getelementptr inbounds float* %tmp17154, i64 1
+  %tmp17156 = getelementptr inbounds float* %tmp17155, i64 1
+  %tmp17157 = getelementptr inbounds float* %tmp17156, i64 1
+  %tmp17158 = getelementptr inbounds float* %tmp17157, i64 1
+  %tmp17159 = getelementptr inbounds float* %tmp17158, i64 1
+  %tmp17160 = getelementptr inbounds float* %tmp17159, i64 1
+  %tmp17161 = getelementptr inbounds float* %tmp17160, i64 1
+  %tmp17162 = getelementptr inbounds float* %tmp17161, i64 1
+  %tmp17163 = getelementptr inbounds float* %tmp17162, i64 1
+  %tmp17164 = getelementptr inbounds float* %tmp17163, i64 1
+  %tmp17165 = getelementptr inbounds float* %tmp17164, i64 1
+  %tmp17166 = getelementptr inbounds float* %tmp17165, i64 1
+  %tmp17167 = getelementptr inbounds float* %tmp17166, i64 1
+  %tmp17168 = getelementptr inbounds float* %tmp17167, i64 1
+  %tmp17169 = getelementptr inbounds float* %tmp17168, i64 1
+  %tmp17170 = getelementptr inbounds float* %tmp17169, i64 1
+  %tmp17171 = getelementptr inbounds float* %tmp17170, i64 1
+  %tmp17172 = getelementptr inbounds float* %tmp17171, i64 1
+  %tmp17173 = getelementptr inbounds float* %tmp17172, i64 1
+  %tmp17174 = getelementptr inbounds float* %tmp17173, i64 1
+  %tmp17175 = getelementptr inbounds float* %tmp17174, i64 1
+  %tmp17176 = getelementptr inbounds float* %tmp17175, i64 1
+  %tmp17177 = getelementptr inbounds float* %tmp17176, i64 1
+  %tmp17178 = getelementptr inbounds float* %tmp17177, i64 1
+  %tmp17179 = getelementptr inbounds float* %tmp17178, i64 1
+  %tmp17180 = getelementptr inbounds float* %tmp17179, i64 1
+  %tmp17181 = getelementptr inbounds float* %tmp17180, i64 1
+  %tmp17182 = getelementptr inbounds float* %tmp17181, i64 1
+  %tmp17183 = getelementptr inbounds float* %tmp17182, i64 1
+  %tmp17184 = getelementptr inbounds float* %tmp17183, i64 1
+  %tmp17185 = getelementptr inbounds float* %tmp17184, i64 1
+  %tmp17186 = getelementptr inbounds float* %tmp17185, i64 1
+  %tmp17187 = getelementptr inbounds float* %tmp17186, i64 1
+  %tmp17188 = getelementptr inbounds float* %tmp17187, i64 1
+  %tmp17189 = getelementptr inbounds float* %tmp17188, i64 1
+  %tmp17190 = getelementptr inbounds float* %tmp17189, i64 1
+  %tmp17191 = getelementptr inbounds float* %tmp17190, i64 1
+  %tmp17192 = getelementptr inbounds float* %tmp17191, i64 1
+  %tmp17193 = getelementptr inbounds float* %tmp17192, i64 1
+  %tmp17194 = getelementptr inbounds float* %tmp17193, i64 1
+  %tmp17195 = getelementptr inbounds float* %tmp17194, i64 1
+  %tmp17196 = getelementptr inbounds float* %tmp17195, i64 1
+  %tmp17197 = getelementptr inbounds float* %tmp17196, i64 1
+  %tmp17198 = getelementptr inbounds float* %tmp17197, i64 1
+  %tmp17199 = getelementptr inbounds float* %tmp17198, i64 1
+  %tmp17200 = getelementptr inbounds float* %tmp17199, i64 1
+  %tmp17201 = getelementptr inbounds float* %tmp17200, i64 1
+  %tmp17202 = getelementptr inbounds float* %tmp17201, i64 1
+  %tmp17203 = getelementptr inbounds float* %tmp17202, i64 1
+  %tmp17204 = getelementptr inbounds float* %tmp17203, i64 1
+  %tmp17205 = getelementptr inbounds float* %tmp17204, i64 1
+  %tmp17206 = getelementptr inbounds float* %tmp17205, i64 1
+  %tmp17207 = getelementptr inbounds float* %tmp17206, i64 1
+  %tmp17208 = getelementptr inbounds float* %tmp17207, i64 1
+  %tmp17209 = getelementptr inbounds float* %tmp17208, i64 1
+  %tmp17210 = getelementptr inbounds float* %tmp17209, i64 1
+  %tmp17211 = getelementptr inbounds float* %tmp17210, i64 1
+  %tmp17212 = getelementptr inbounds float* %tmp17211, i64 1
+  %tmp17213 = getelementptr inbounds float* %tmp17212, i64 1
+  %tmp17214 = getelementptr inbounds float* %tmp17213, i64 1
+  %tmp17215 = getelementptr inbounds float* %tmp17214, i64 1
+  %tmp17216 = getelementptr inbounds float* %tmp17215, i64 1
+  %tmp17217 = getelementptr inbounds float* %tmp17216, i64 1
+  %tmp17218 = getelementptr inbounds float* %tmp17217, i64 1
+  %tmp17219 = getelementptr inbounds float* %tmp17218, i64 1
+  %tmp17220 = getelementptr inbounds float* %tmp17219, i64 1
+  %tmp17221 = getelementptr inbounds float* %tmp17220, i64 1
+  %tmp17222 = getelementptr inbounds float* %tmp17221, i64 1
+  %tmp17223 = getelementptr inbounds float* %tmp17222, i64 1
+  %tmp17224 = getelementptr inbounds float* %tmp17223, i64 1
+  %tmp17225 = getelementptr inbounds float* %tmp17224, i64 1
+  %tmp17226 = getelementptr inbounds float* %tmp17225, i64 1
+  %tmp17227 = getelementptr inbounds float* %tmp17226, i64 1
+  %tmp17228 = getelementptr inbounds float* %tmp17227, i64 1
+  %tmp17229 = getelementptr inbounds float* %tmp17228, i64 1
+  %tmp17230 = getelementptr inbounds float* %tmp17229, i64 1
+  %tmp17231 = getelementptr inbounds float* %tmp17230, i64 1
+  %tmp17232 = getelementptr inbounds float* %tmp17231, i64 1
+  %tmp17233 = getelementptr inbounds float* %tmp17232, i64 1
+  %tmp17234 = getelementptr inbounds float* %tmp17233, i64 1
+  %tmp17235 = getelementptr inbounds float* %tmp17234, i64 1
+  %tmp17236 = getelementptr inbounds float* %tmp17235, i64 1
+  %tmp17237 = getelementptr inbounds float* %tmp17236, i64 1
+  %tmp17238 = getelementptr inbounds float* %tmp17237, i64 1
+  %tmp17239 = getelementptr inbounds float* %tmp17238, i64 1
+  %tmp17240 = getelementptr inbounds float* %tmp17239, i64 1
+  %tmp17241 = getelementptr inbounds float* %tmp17240, i64 1
+  %tmp17242 = getelementptr inbounds float* %tmp17241, i64 1
+  %tmp17243 = getelementptr inbounds float* %tmp17242, i64 1
+  %tmp17244 = getelementptr inbounds float* %tmp17243, i64 1
+  %tmp17245 = getelementptr inbounds float* %tmp17244, i64 1
+  %tmp17246 = getelementptr inbounds float* %tmp17245, i64 1
+  %tmp17247 = getelementptr inbounds float* %tmp17246, i64 1
+  %tmp17248 = getelementptr inbounds float* %tmp17247, i64 1
+  %tmp17249 = getelementptr inbounds float* %tmp17248, i64 1
+  %tmp17250 = getelementptr inbounds float* %tmp17249, i64 1
+  %tmp17251 = getelementptr inbounds float* %tmp17250, i64 1
+  %tmp17252 = getelementptr inbounds float* %tmp17251, i64 1
+  %tmp17253 = getelementptr inbounds float* %tmp17252, i64 1
+  %tmp17254 = getelementptr inbounds float* %tmp17253, i64 1
+  %tmp17255 = getelementptr inbounds float* %tmp17254, i64 1
+  %tmp17256 = getelementptr inbounds float* %tmp17255, i64 1
+  %tmp17257 = getelementptr inbounds float* %tmp17256, i64 1
+  %tmp17258 = getelementptr inbounds float* %tmp17257, i64 1
+  %tmp17259 = getelementptr inbounds float* %tmp17258, i64 1
+  %tmp17260 = getelementptr inbounds float* %tmp17259, i64 1
+  %tmp17261 = getelementptr inbounds float* %tmp17260, i64 1
+  %tmp17262 = getelementptr inbounds float* %tmp17261, i64 1
+  %tmp17263 = getelementptr inbounds float* %tmp17262, i64 1
+  %tmp17264 = getelementptr inbounds float* %tmp17263, i64 1
+  %tmp17265 = getelementptr inbounds float* %tmp17264, i64 1
+  %tmp17266 = getelementptr inbounds float* %tmp17265, i64 1
+  %tmp17267 = getelementptr inbounds float* %tmp17266, i64 1
+  %tmp17268 = getelementptr inbounds float* %tmp17267, i64 1
+  %tmp17269 = getelementptr inbounds float* %tmp17268, i64 1
+  %tmp17270 = getelementptr inbounds float* %tmp17269, i64 1
+  %tmp17271 = getelementptr inbounds float* %tmp17270, i64 1
+  %tmp17272 = getelementptr inbounds float* %tmp17271, i64 1
+  %tmp17273 = getelementptr inbounds float* %tmp17272, i64 1
+  %tmp17274 = getelementptr inbounds float* %tmp17273, i64 1
+  %tmp17275 = getelementptr inbounds float* %tmp17274, i64 1
+  %tmp17276 = getelementptr inbounds float* %tmp17275, i64 1
+  %tmp17277 = getelementptr inbounds float* %tmp17276, i64 1
+  %tmp17278 = getelementptr inbounds float* %tmp17277, i64 1
+  %tmp17279 = getelementptr inbounds float* %tmp17278, i64 1
+  %tmp17280 = getelementptr inbounds float* %tmp17279, i64 1
+  %tmp17281 = getelementptr inbounds float* %tmp17280, i64 1
+  %tmp17282 = getelementptr inbounds float* %tmp17281, i64 1
+  %tmp17283 = getelementptr inbounds float* %tmp17282, i64 1
+  %tmp17284 = getelementptr inbounds float* %tmp17283, i64 1
+  %tmp17285 = getelementptr inbounds float* %tmp17284, i64 1
+  %tmp17286 = getelementptr inbounds float* %tmp17285, i64 1
+  %tmp17287 = getelementptr inbounds float* %tmp17286, i64 1
+  %tmp17288 = getelementptr inbounds float* %tmp17287, i64 1
+  %tmp17289 = getelementptr inbounds float* %tmp17288, i64 1
+  %tmp17290 = getelementptr inbounds float* %tmp17289, i64 1
+  %tmp17291 = getelementptr inbounds float* %tmp17290, i64 1
+  %tmp17292 = getelementptr inbounds float* %tmp17291, i64 1
+  %tmp17293 = getelementptr inbounds float* %tmp17292, i64 1
+  %tmp17294 = getelementptr inbounds float* %tmp17293, i64 1
+  %tmp17295 = getelementptr inbounds float* %tmp17294, i64 1
+  %tmp17296 = getelementptr inbounds float* %tmp17295, i64 1
+  %tmp17297 = getelementptr inbounds float* %tmp17296, i64 1
+  %tmp17298 = getelementptr inbounds float* %tmp17297, i64 1
+  %tmp17299 = getelementptr inbounds float* %tmp17298, i64 1
+  %tmp17300 = getelementptr inbounds float* %tmp17299, i64 1
+  %tmp17301 = getelementptr inbounds float* %tmp17300, i64 1
+  %tmp17302 = getelementptr inbounds float* %tmp17301, i64 1
+  %tmp17303 = getelementptr inbounds float* %tmp17302, i64 1
+  %tmp17304 = getelementptr inbounds float* %tmp17303, i64 1
+  %tmp17305 = getelementptr inbounds float* %tmp17304, i64 1
+  %tmp17306 = getelementptr inbounds float* %tmp17305, i64 1
+  %tmp17307 = getelementptr inbounds float* %tmp17306, i64 1
+  %tmp17308 = getelementptr inbounds float* %tmp17307, i64 1
+  %tmp17309 = getelementptr inbounds float* %tmp17308, i64 1
+  %tmp17310 = getelementptr inbounds float* %tmp17309, i64 1
+  %tmp17311 = getelementptr inbounds float* %tmp17310, i64 1
+  %tmp17312 = getelementptr inbounds float* %tmp17311, i64 1
+  %tmp17313 = getelementptr inbounds float* %tmp17312, i64 1
+  %tmp17314 = getelementptr inbounds float* %tmp17313, i64 1
+  %tmp17315 = getelementptr inbounds float* %tmp17314, i64 1
+  %tmp17316 = getelementptr inbounds float* %tmp17315, i64 1
+  %tmp17317 = getelementptr inbounds float* %tmp17316, i64 1
+  %tmp17318 = getelementptr inbounds float* %tmp17317, i64 1
+  %tmp17319 = getelementptr inbounds float* %tmp17318, i64 1
+  %tmp17320 = getelementptr inbounds float* %tmp17319, i64 1
+  %tmp17321 = getelementptr inbounds float* %tmp17320, i64 1
+  %tmp17322 = getelementptr inbounds float* %tmp17321, i64 1
+  %tmp17323 = getelementptr inbounds float* %tmp17322, i64 1
+  %tmp17324 = getelementptr inbounds float* %tmp17323, i64 1
+  %tmp17325 = getelementptr inbounds float* %tmp17324, i64 1
+  %tmp17326 = getelementptr inbounds float* %tmp17325, i64 1
+  %tmp17327 = getelementptr inbounds float* %tmp17326, i64 1
+  %tmp17328 = getelementptr inbounds float* %tmp17327, i64 1
+  %tmp17329 = getelementptr inbounds float* %tmp17328, i64 1
+  %tmp17330 = getelementptr inbounds float* %tmp17329, i64 1
+  %tmp17331 = getelementptr inbounds float* %tmp17330, i64 1
+  %tmp17332 = getelementptr inbounds float* %tmp17331, i64 1
+  %tmp17333 = getelementptr inbounds float* %tmp17332, i64 1
+  %tmp17334 = getelementptr inbounds float* %tmp17333, i64 1
+  %tmp17335 = getelementptr inbounds float* %tmp17334, i64 1
+  %tmp17336 = getelementptr inbounds float* %tmp17335, i64 1
+  %tmp17337 = getelementptr inbounds float* %tmp17336, i64 1
+  %tmp17338 = getelementptr inbounds float* %tmp17337, i64 1
+  %tmp17339 = getelementptr inbounds float* %tmp17338, i64 1
+  %tmp17340 = getelementptr inbounds float* %tmp17339, i64 1
+  %tmp17341 = getelementptr inbounds float* %tmp17340, i64 1
+  %tmp17342 = getelementptr inbounds float* %tmp17341, i64 1
+  %tmp17343 = getelementptr inbounds float* %tmp17342, i64 1
+  %tmp17344 = getelementptr inbounds float* %tmp17343, i64 1
+  %tmp17345 = getelementptr inbounds float* %tmp17344, i64 1
+  %tmp17346 = getelementptr inbounds float* %tmp17345, i64 1
+  %tmp17347 = getelementptr inbounds float* %tmp17346, i64 1
+  %tmp17348 = getelementptr inbounds float* %tmp17347, i64 1
+  %tmp17349 = getelementptr inbounds float* %tmp17348, i64 1
+  %tmp17350 = getelementptr inbounds float* %tmp17349, i64 1
+  %tmp17351 = getelementptr inbounds float* %tmp17350, i64 1
+  %tmp17352 = getelementptr inbounds float* %tmp17351, i64 1
+  %tmp17353 = getelementptr inbounds float* %tmp17352, i64 1
+  %tmp17354 = getelementptr inbounds float* %tmp17353, i64 1
+  %tmp17355 = getelementptr inbounds float* %tmp17354, i64 1
+  %tmp17356 = getelementptr inbounds float* %tmp17355, i64 1
+  %tmp17357 = getelementptr inbounds float* %tmp17356, i64 1
+  %tmp17358 = getelementptr inbounds float* %tmp17357, i64 1
+  %tmp17359 = getelementptr inbounds float* %tmp17358, i64 1
+  %tmp17360 = getelementptr inbounds float* %tmp17359, i64 1
+  %tmp17361 = getelementptr inbounds float* %tmp17360, i64 1
+  %tmp17362 = getelementptr inbounds float* %tmp17361, i64 1
+  %tmp17363 = getelementptr inbounds float* %tmp17362, i64 1
+  %tmp17364 = getelementptr inbounds float* %tmp17363, i64 1
+  %tmp17365 = getelementptr inbounds float* %tmp17364, i64 1
+  %tmp17366 = getelementptr inbounds float* %tmp17365, i64 1
+  %tmp17367 = getelementptr inbounds float* %tmp17366, i64 1
+  %tmp17368 = getelementptr inbounds float* %tmp17367, i64 1
+  %tmp17369 = getelementptr inbounds float* %tmp17368, i64 1
+  %tmp17370 = getelementptr inbounds float* %tmp17369, i64 1
+  %tmp17371 = getelementptr inbounds float* %tmp17370, i64 1
+  %tmp17372 = getelementptr inbounds float* %tmp17371, i64 1
+  %tmp17373 = getelementptr inbounds float* %tmp17372, i64 1
+  %tmp17374 = getelementptr inbounds float* %tmp17373, i64 1
+  %tmp17375 = getelementptr inbounds float* %tmp17374, i64 1
+  %tmp17376 = getelementptr inbounds float* %tmp17375, i64 1
+  %tmp17377 = getelementptr inbounds float* %tmp17376, i64 1
+  %tmp17378 = getelementptr inbounds float* %tmp17377, i64 1
+  %tmp17379 = getelementptr inbounds float* %tmp17378, i64 1
+  %tmp17380 = getelementptr inbounds float* %tmp17379, i64 1
+  %tmp17381 = getelementptr inbounds float* %tmp17380, i64 1
+  %tmp17382 = getelementptr inbounds float* %tmp17381, i64 1
+  %tmp17383 = getelementptr inbounds float* %tmp17382, i64 1
+  %tmp17384 = getelementptr inbounds float* %tmp17383, i64 1
+  %tmp17385 = getelementptr inbounds float* %tmp17384, i64 1
+  %tmp17386 = getelementptr inbounds float* %tmp17385, i64 1
+  %tmp17387 = getelementptr inbounds float* %tmp17386, i64 1
+  %tmp17388 = getelementptr inbounds float* %tmp17387, i64 1
+  %tmp17389 = getelementptr inbounds float* %tmp17388, i64 1
+  %tmp17390 = getelementptr inbounds float* %tmp17389, i64 1
+  %tmp17391 = getelementptr inbounds float* %tmp17390, i64 1
+  %tmp17392 = getelementptr inbounds float* %tmp17391, i64 1
+  %tmp17393 = getelementptr inbounds float* %tmp17392, i64 1
+  %tmp17394 = getelementptr inbounds float* %tmp17393, i64 1
+  %tmp17395 = getelementptr inbounds float* %tmp17394, i64 1
+  %tmp17396 = getelementptr inbounds float* %tmp17395, i64 1
+  %tmp17397 = getelementptr inbounds float* %tmp17396, i64 1
+  %tmp17398 = getelementptr inbounds float* %tmp17397, i64 1
+  %tmp17399 = getelementptr inbounds float* %tmp17398, i64 1
+  %tmp17400 = getelementptr inbounds float* %tmp17399, i64 1
+  %tmp17401 = getelementptr inbounds float* %tmp17400, i64 1
+  %tmp17402 = getelementptr inbounds float* %tmp17401, i64 1
+  %tmp17403 = getelementptr inbounds float* %tmp17402, i64 1
+  %tmp17404 = getelementptr inbounds float* %tmp17403, i64 1
+  %tmp17405 = getelementptr inbounds float* %tmp17404, i64 1
+  %tmp17406 = getelementptr inbounds float* %tmp17405, i64 1
+  %tmp17407 = getelementptr inbounds float* %tmp17406, i64 1
+  %tmp17408 = getelementptr inbounds float* %tmp17407, i64 1
+  %tmp17409 = getelementptr inbounds float* %tmp17408, i64 1
+  %tmp17410 = getelementptr inbounds float* %tmp17409, i64 1
+  %tmp17411 = getelementptr inbounds float* %tmp17410, i64 1
+  %tmp17412 = getelementptr inbounds float* %tmp17411, i64 1
+  %tmp17413 = getelementptr inbounds float* %tmp17412, i64 1
+  %tmp17414 = getelementptr inbounds float* %tmp17413, i64 1
+  %tmp17415 = getelementptr inbounds float* %tmp17414, i64 1
+  %tmp17416 = getelementptr inbounds float* %tmp17415, i64 1
+  %tmp17417 = getelementptr inbounds float* %tmp17416, i64 1
+  %tmp17418 = getelementptr inbounds float* %tmp17417, i64 1
+  %tmp17419 = getelementptr inbounds float* %tmp17418, i64 1
+  %tmp17420 = getelementptr inbounds float* %tmp17419, i64 1
+  %tmp17421 = getelementptr inbounds float* %tmp17420, i64 1
+  %tmp17422 = getelementptr inbounds float* %tmp17421, i64 1
+  %tmp17423 = getelementptr inbounds float* %tmp17422, i64 1
+  %tmp17424 = getelementptr inbounds float* %tmp17423, i64 1
+  %tmp17425 = getelementptr inbounds float* %tmp17424, i64 1
+  %tmp17426 = getelementptr inbounds float* %tmp17425, i64 1
+  %tmp17427 = getelementptr inbounds float* %tmp17426, i64 1
+  %tmp17428 = getelementptr inbounds float* %tmp17427, i64 1
+  %tmp17429 = getelementptr inbounds float* %tmp17428, i64 1
+  %tmp17430 = getelementptr inbounds float* %tmp17429, i64 1
+  %tmp17431 = getelementptr inbounds float* %tmp17430, i64 1
+  %tmp17432 = getelementptr inbounds float* %tmp17431, i64 1
+  %tmp17433 = getelementptr inbounds float* %tmp17432, i64 1
+  %tmp17434 = getelementptr inbounds float* %tmp17433, i64 1
+  %tmp17435 = getelementptr inbounds float* %tmp17434, i64 1
+  %tmp17436 = getelementptr inbounds float* %tmp17435, i64 1
+  %tmp17437 = getelementptr inbounds float* %tmp17436, i64 1
+  %tmp17438 = getelementptr inbounds float* %tmp17437, i64 1
+  %tmp17439 = getelementptr inbounds float* %tmp17438, i64 1
+  %tmp17440 = getelementptr inbounds float* %tmp17439, i64 1
+  %tmp17441 = getelementptr inbounds float* %tmp17440, i64 1
+  %tmp17442 = getelementptr inbounds float* %tmp17441, i64 1
+  %tmp17443 = getelementptr inbounds float* %tmp17442, i64 1
+  %tmp17444 = getelementptr inbounds float* %tmp17443, i64 1
+  %tmp17445 = getelementptr inbounds float* %tmp17444, i64 1
+  %tmp17446 = getelementptr inbounds float* %tmp17445, i64 1
+  %tmp17447 = getelementptr inbounds float* %tmp17446, i64 1
+  %tmp17448 = getelementptr inbounds float* %tmp17447, i64 1
+  %tmp17449 = getelementptr inbounds float* %tmp17448, i64 1
+  %tmp17450 = getelementptr inbounds float* %tmp17449, i64 1
+  %tmp17451 = getelementptr inbounds float* %tmp17450, i64 1
+  %tmp17452 = getelementptr inbounds float* %tmp17451, i64 1
+  %tmp17453 = getelementptr inbounds float* %tmp17452, i64 1
+  %tmp17454 = getelementptr inbounds float* %tmp17453, i64 1
+  %tmp17455 = getelementptr inbounds float* %tmp17454, i64 1
+  %tmp17456 = getelementptr inbounds float* %tmp17455, i64 1
+  %tmp17457 = getelementptr inbounds float* %tmp17456, i64 1
+  %tmp17458 = getelementptr inbounds float* %tmp17457, i64 1
+  %tmp17459 = getelementptr inbounds float* %tmp17458, i64 1
+  %tmp17460 = getelementptr inbounds float* %tmp17459, i64 1
+  %tmp17461 = getelementptr inbounds float* %tmp17460, i64 1
+  %tmp17462 = getelementptr inbounds float* %tmp17461, i64 1
+  %tmp17463 = getelementptr inbounds float* %tmp17462, i64 1
+  %tmp17464 = getelementptr inbounds float* %tmp17463, i64 1
+  %tmp17465 = getelementptr inbounds float* %tmp17464, i64 1
+  %tmp17466 = getelementptr inbounds float* %tmp17465, i64 1
+  %tmp17467 = getelementptr inbounds float* %tmp17466, i64 1
+  %tmp17468 = getelementptr inbounds float* %tmp17467, i64 1
+  %tmp17469 = getelementptr inbounds float* %tmp17468, i64 1
+  %tmp17470 = getelementptr inbounds float* %tmp17469, i64 1
+  %tmp17471 = getelementptr inbounds float* %tmp17470, i64 1
+  %tmp17472 = getelementptr inbounds float* %tmp17471, i64 1
+  %tmp17473 = getelementptr inbounds float* %tmp17472, i64 1
+  %tmp17474 = getelementptr inbounds float* %tmp17473, i64 1
+  %tmp17475 = getelementptr inbounds float* %tmp17474, i64 1
+  %tmp17476 = getelementptr inbounds float* %tmp17475, i64 1
+  %tmp17477 = getelementptr inbounds float* %tmp17476, i64 1
+  %tmp17478 = getelementptr inbounds float* %tmp17477, i64 1
+  %tmp17479 = getelementptr inbounds float* %tmp17478, i64 1
+  %tmp17480 = getelementptr inbounds float* %tmp17479, i64 1
+  %tmp17481 = getelementptr inbounds float* %tmp17480, i64 1
+  %tmp17482 = getelementptr inbounds float* %tmp17481, i64 1
+  %tmp17483 = getelementptr inbounds float* %tmp17482, i64 1
+  %tmp17484 = getelementptr inbounds float* %tmp17483, i64 1
+  %tmp17485 = getelementptr inbounds float* %tmp17484, i64 1
+  %tmp17486 = getelementptr inbounds float* %tmp17485, i64 1
+  %tmp17487 = getelementptr inbounds float* %tmp17486, i64 1
+  %tmp17488 = getelementptr inbounds float* %tmp17487, i64 1
+  %tmp17489 = getelementptr inbounds float* %tmp17488, i64 1
+  %tmp17490 = getelementptr inbounds float* %tmp17489, i64 1
+  %tmp17491 = getelementptr inbounds float* %tmp17490, i64 1
+  %tmp17492 = getelementptr inbounds float* %tmp17491, i64 1
+  %tmp17493 = getelementptr inbounds float* %tmp17492, i64 1
+  %tmp17494 = getelementptr inbounds float* %tmp17493, i64 1
+  %tmp17495 = getelementptr inbounds float* %tmp17494, i64 1
+  %tmp17496 = getelementptr inbounds float* %tmp17495, i64 1
+  %tmp17497 = getelementptr inbounds float* %tmp17496, i64 1
+  %tmp17498 = getelementptr inbounds float* %tmp17497, i64 1
+  %tmp17499 = getelementptr inbounds float* %tmp17498, i64 1
+  %tmp17500 = getelementptr inbounds float* %tmp17499, i64 1
+  %tmp17501 = getelementptr inbounds float* %tmp17500, i64 1
+  %tmp17502 = getelementptr inbounds float* %tmp17501, i64 1
+  %tmp17503 = getelementptr inbounds float* %tmp17502, i64 1
+  %tmp17504 = getelementptr inbounds float* %tmp17503, i64 1
+  %tmp17505 = getelementptr inbounds float* %tmp17504, i64 1
+  %tmp17506 = getelementptr inbounds float* %tmp17505, i64 1
+  %tmp17507 = getelementptr inbounds float* %tmp17506, i64 1
+  %tmp17508 = getelementptr inbounds float* %tmp17507, i64 1
+  %tmp17509 = getelementptr inbounds float* %tmp17508, i64 1
+  %tmp17510 = getelementptr inbounds float* %tmp17509, i64 1
+  %tmp17511 = getelementptr inbounds float* %tmp17510, i64 1
+  %tmp17512 = getelementptr inbounds float* %tmp17511, i64 1
+  %tmp17513 = getelementptr inbounds float* %tmp17512, i64 1
+  %tmp17514 = getelementptr inbounds float* %tmp17513, i64 1
+  %tmp17515 = getelementptr inbounds float* %tmp17514, i64 1
+  %tmp17516 = getelementptr inbounds float* %tmp17515, i64 1
+  %tmp17517 = getelementptr inbounds float* %tmp17516, i64 1
+  %tmp17518 = getelementptr inbounds float* %tmp17517, i64 1
+  %tmp17519 = getelementptr inbounds float* %tmp17518, i64 1
+  %tmp17520 = getelementptr inbounds float* %tmp17519, i64 1
+  %tmp17521 = getelementptr inbounds float* %tmp17520, i64 1
+  %tmp17522 = getelementptr inbounds float* %tmp17521, i64 1
+  %tmp17523 = getelementptr inbounds float* %tmp17522, i64 1
+  %tmp17524 = getelementptr inbounds float* %tmp17523, i64 1
+  %tmp17525 = getelementptr inbounds float* %tmp17524, i64 1
+  %tmp17526 = getelementptr inbounds float* %tmp17525, i64 1
+  %tmp17527 = getelementptr inbounds float* %tmp17526, i64 1
+  %tmp17528 = getelementptr inbounds float* %tmp17527, i64 1
+  %tmp17529 = getelementptr inbounds float* %tmp17528, i64 1
+  %tmp17530 = getelementptr inbounds float* %tmp17529, i64 1
+  %tmp17531 = getelementptr inbounds float* %tmp17530, i64 1
+  %tmp17532 = getelementptr inbounds float* %tmp17531, i64 1
+  %tmp17533 = getelementptr inbounds float* %tmp17532, i64 1
+  %tmp17534 = getelementptr inbounds float* %tmp17533, i64 1
+  %tmp17535 = getelementptr inbounds float* %tmp17534, i64 1
+  %tmp17536 = getelementptr inbounds float* %tmp17535, i64 1
+  %tmp17537 = getelementptr inbounds float* %tmp17536, i64 1
+  %tmp17538 = getelementptr inbounds float* %tmp17537, i64 1
+  %tmp17539 = getelementptr inbounds float* %tmp17538, i64 1
+  %tmp17540 = getelementptr inbounds float* %tmp17539, i64 1
+  %tmp17541 = getelementptr inbounds float* %tmp17540, i64 1
+  %tmp17542 = getelementptr inbounds float* %tmp17541, i64 1
+  %tmp17543 = getelementptr inbounds float* %tmp17542, i64 1
+  %tmp17544 = getelementptr inbounds float* %tmp17543, i64 1
+  %tmp17545 = getelementptr inbounds float* %tmp17544, i64 1
+  %tmp17546 = getelementptr inbounds float* %tmp17545, i64 1
+  %tmp17547 = getelementptr inbounds float* %tmp17546, i64 1
+  %tmp17548 = getelementptr inbounds float* %tmp17547, i64 1
+  %tmp17549 = getelementptr inbounds float* %tmp17548, i64 1
+  %tmp17550 = getelementptr inbounds float* %tmp17549, i64 1
+  %tmp17551 = getelementptr inbounds float* %tmp17550, i64 1
+  %tmp17552 = getelementptr inbounds float* %tmp17551, i64 1
+  %tmp17553 = getelementptr inbounds float* %tmp17552, i64 1
+  %tmp17554 = getelementptr inbounds float* %tmp17553, i64 1
+  %tmp17555 = getelementptr inbounds float* %tmp17554, i64 1
+  %tmp17556 = getelementptr inbounds float* %tmp17555, i64 1
+  %tmp17557 = getelementptr inbounds float* %tmp17556, i64 1
+  %tmp17558 = getelementptr inbounds float* %tmp17557, i64 1
+  %tmp17559 = getelementptr inbounds float* %tmp17558, i64 1
+  %tmp17560 = getelementptr inbounds float* %tmp17559, i64 1
+  %tmp17561 = getelementptr inbounds float* %tmp17560, i64 1
+  %tmp17562 = getelementptr inbounds float* %tmp17561, i64 1
+  %tmp17563 = getelementptr inbounds float* %tmp17562, i64 1
+  %tmp17564 = getelementptr inbounds float* %tmp17563, i64 1
+  %tmp17565 = getelementptr inbounds float* %tmp17564, i64 1
+  %tmp17566 = getelementptr inbounds float* %tmp17565, i64 1
+  %tmp17567 = getelementptr inbounds float* %tmp17566, i64 1
+  %tmp17568 = getelementptr inbounds float* %tmp17567, i64 1
+  %tmp17569 = getelementptr inbounds float* %tmp17568, i64 1
+  %tmp17570 = getelementptr inbounds float* %tmp17569, i64 1
+  %tmp17571 = getelementptr inbounds float* %tmp17570, i64 1
+  %tmp17572 = getelementptr inbounds float* %tmp17571, i64 1
+  %tmp17573 = getelementptr inbounds float* %tmp17572, i64 1
+  %tmp17574 = getelementptr inbounds float* %tmp17573, i64 1
+  %tmp17575 = getelementptr inbounds float* %tmp17574, i64 1
+  %tmp17576 = getelementptr inbounds float* %tmp17575, i64 1
+  %tmp17577 = getelementptr inbounds float* %tmp17576, i64 1
+  %tmp17578 = getelementptr inbounds float* %tmp17577, i64 1
+  %tmp17579 = getelementptr inbounds float* %tmp17578, i64 1
+  %tmp17580 = getelementptr inbounds float* %tmp17579, i64 1
+  %tmp17581 = getelementptr inbounds float* %tmp17580, i64 1
+  %tmp17582 = getelementptr inbounds float* %tmp17581, i64 1
+  %tmp17583 = getelementptr inbounds float* %tmp17582, i64 1
+  %tmp17584 = getelementptr inbounds float* %tmp17583, i64 1
+  %tmp17585 = getelementptr inbounds float* %tmp17584, i64 1
+  %tmp17586 = getelementptr inbounds float* %tmp17585, i64 1
+  %tmp17587 = getelementptr inbounds float* %tmp17586, i64 1
+  %tmp17588 = getelementptr inbounds float* %tmp17587, i64 1
+  %tmp17589 = getelementptr inbounds float* %tmp17588, i64 1
+  %tmp17590 = getelementptr inbounds float* %tmp17589, i64 1
+  %tmp17591 = getelementptr inbounds float* %tmp17590, i64 1
+  %tmp17592 = getelementptr inbounds float* %tmp17591, i64 1
+  %tmp17593 = getelementptr inbounds float* %tmp17592, i64 1
+  %tmp17594 = getelementptr inbounds float* %tmp17593, i64 1
+  %tmp17595 = getelementptr inbounds float* %tmp17594, i64 1
+  %tmp17596 = getelementptr inbounds float* %tmp17595, i64 1
+  %tmp17597 = getelementptr inbounds float* %tmp17596, i64 1
+  %tmp17598 = getelementptr inbounds float* %tmp17597, i64 1
+  %tmp17599 = getelementptr inbounds float* %tmp17598, i64 1
+  %tmp17600 = getelementptr inbounds float* %tmp17599, i64 1
+  %tmp17601 = getelementptr inbounds float* %tmp17600, i64 1
+  %tmp17602 = getelementptr inbounds float* %tmp17601, i64 1
+  %tmp17603 = getelementptr inbounds float* %tmp17602, i64 1
+  %tmp17604 = getelementptr inbounds float* %tmp17603, i64 1
+  %tmp17605 = getelementptr inbounds float* %tmp17604, i64 1
+  %tmp17606 = getelementptr inbounds float* %tmp17605, i64 1
+  %tmp17607 = getelementptr inbounds float* %tmp17606, i64 1
+  %tmp17608 = getelementptr inbounds float* %tmp17607, i64 1
+  %tmp17609 = getelementptr inbounds float* %tmp17608, i64 1
+  %tmp17610 = getelementptr inbounds float* %tmp17609, i64 1
+  %tmp17611 = getelementptr inbounds float* %tmp17610, i64 1
+  %tmp17612 = getelementptr inbounds float* %tmp17611, i64 1
+  %tmp17613 = getelementptr inbounds float* %tmp17612, i64 1
+  %tmp17614 = getelementptr inbounds float* %tmp17613, i64 1
+  %tmp17615 = getelementptr inbounds float* %tmp17614, i64 1
+  %tmp17616 = getelementptr inbounds float* %tmp17615, i64 1
+  %tmp17617 = getelementptr inbounds float* %tmp17616, i64 1
+  %tmp17618 = getelementptr inbounds float* %tmp17617, i64 1
+  %tmp17619 = getelementptr inbounds float* %tmp17618, i64 1
+  %tmp17620 = getelementptr inbounds float* %tmp17619, i64 1
+  %tmp17621 = getelementptr inbounds float* %tmp17620, i64 1
+  %tmp17622 = getelementptr inbounds float* %tmp17621, i64 1
+  %tmp17623 = getelementptr inbounds float* %tmp17622, i64 1
+  %tmp17624 = getelementptr inbounds float* %tmp17623, i64 1
+  %tmp17625 = getelementptr inbounds float* %tmp17624, i64 1
+  %tmp17626 = getelementptr inbounds float* %tmp17625, i64 1
+  %tmp17627 = getelementptr inbounds float* %tmp17626, i64 1
+  %tmp17628 = getelementptr inbounds float* %tmp17627, i64 1
+  %tmp17629 = getelementptr inbounds float* %tmp17628, i64 1
+  %tmp17630 = getelementptr inbounds float* %tmp17629, i64 1
+  %tmp17631 = getelementptr inbounds float* %tmp17630, i64 1
+  %tmp17632 = getelementptr inbounds float* %tmp17631, i64 1
+  %tmp17633 = getelementptr inbounds float* %tmp17632, i64 1
+  %tmp17634 = getelementptr inbounds float* %tmp17633, i64 1
+  %tmp17635 = getelementptr inbounds float* %tmp17634, i64 1
+  %tmp17636 = getelementptr inbounds float* %tmp17635, i64 1
+  %tmp17637 = getelementptr inbounds float* %tmp17636, i64 1
+  %tmp17638 = getelementptr inbounds float* %tmp17637, i64 1
+  %tmp17639 = getelementptr inbounds float* %tmp17638, i64 1
+  %tmp17640 = getelementptr inbounds float* %tmp17639, i64 1
+  %tmp17641 = getelementptr inbounds float* %tmp17640, i64 1
+  %tmp17642 = getelementptr inbounds float* %tmp17641, i64 1
+  %tmp17643 = getelementptr inbounds float* %tmp17642, i64 1
+  %tmp17644 = getelementptr inbounds float* %tmp17643, i64 1
+  %tmp17645 = getelementptr inbounds float* %tmp17644, i64 1
+  %tmp17646 = getelementptr inbounds float* %tmp17645, i64 1
+  %tmp17647 = getelementptr inbounds float* %tmp17646, i64 1
+  %tmp17648 = getelementptr inbounds float* %tmp17647, i64 1
+  %tmp17649 = getelementptr inbounds float* %tmp17648, i64 1
+  %tmp17650 = getelementptr inbounds float* %tmp17649, i64 1
+  %tmp17651 = getelementptr inbounds float* %tmp17650, i64 1
+  %tmp17652 = getelementptr inbounds float* %tmp17651, i64 1
+  %tmp17653 = getelementptr inbounds float* %tmp17652, i64 1
+  %tmp17654 = getelementptr inbounds float* %tmp17653, i64 1
+  %tmp17655 = getelementptr inbounds float* %tmp17654, i64 1
+  %tmp17656 = getelementptr inbounds float* %tmp17655, i64 1
+  %tmp17657 = getelementptr inbounds float* %tmp17656, i64 1
+  %tmp17658 = getelementptr inbounds float* %tmp17657, i64 1
+  %tmp17659 = getelementptr inbounds float* %tmp17658, i64 1
+  %tmp17660 = getelementptr inbounds float* %tmp17659, i64 1
+  %tmp17661 = getelementptr inbounds float* %tmp17660, i64 1
+  %tmp17662 = getelementptr inbounds float* %tmp17661, i64 1
+  %tmp17663 = getelementptr inbounds float* %tmp17662, i64 1
+  %tmp17664 = getelementptr inbounds float* %tmp17663, i64 1
+  %tmp17665 = getelementptr inbounds float* %tmp17664, i64 1
+  %tmp17666 = getelementptr inbounds float* %tmp17665, i64 1
+  %tmp17667 = getelementptr inbounds float* %tmp17666, i64 1
+  %tmp17668 = getelementptr inbounds float* %tmp17667, i64 1
+  %tmp17669 = getelementptr inbounds float* %tmp17668, i64 1
+  %tmp17670 = getelementptr inbounds float* %tmp17669, i64 1
+  %tmp17671 = getelementptr inbounds float* %tmp17670, i64 1
+  %tmp17672 = getelementptr inbounds float* %tmp17671, i64 1
+  %tmp17673 = getelementptr inbounds float* %tmp17672, i64 1
+  %tmp17674 = getelementptr inbounds float* %tmp17673, i64 1
+  %tmp17675 = getelementptr inbounds float* %tmp17674, i64 1
+  %tmp17676 = getelementptr inbounds float* %tmp17675, i64 1
+  %tmp17677 = getelementptr inbounds float* %tmp17676, i64 1
+  %tmp17678 = getelementptr inbounds float* %tmp17677, i64 1
+  %tmp17679 = getelementptr inbounds float* %tmp17678, i64 1
+  %tmp17680 = getelementptr inbounds float* %tmp17679, i64 1
+  %tmp17681 = getelementptr inbounds float* %tmp17680, i64 1
+  %tmp17682 = getelementptr inbounds float* %tmp17681, i64 1
+  %tmp17683 = getelementptr inbounds float* %tmp17682, i64 1
+  %tmp17684 = getelementptr inbounds float* %tmp17683, i64 1
+  %tmp17685 = getelementptr inbounds float* %tmp17684, i64 1
+  %tmp17686 = getelementptr inbounds float* %tmp17685, i64 1
+  %tmp17687 = getelementptr inbounds float* %tmp17686, i64 1
+  %tmp17688 = getelementptr inbounds float* %tmp17687, i64 1
+  %tmp17689 = getelementptr inbounds float* %tmp17688, i64 1
+  %tmp17690 = getelementptr inbounds float* %tmp17689, i64 1
+  %tmp17691 = getelementptr inbounds float* %tmp17690, i64 1
+  %tmp17692 = getelementptr inbounds float* %tmp17691, i64 1
+  %tmp17693 = getelementptr inbounds float* %tmp17692, i64 1
+  %tmp17694 = getelementptr inbounds float* %tmp17693, i64 1
+  %tmp17695 = getelementptr inbounds float* %tmp17694, i64 1
+  %tmp17696 = getelementptr inbounds float* %tmp17695, i64 1
+  %tmp17697 = getelementptr inbounds float* %tmp17696, i64 1
+  %tmp17698 = getelementptr inbounds float* %tmp17697, i64 1
+  %tmp17699 = getelementptr inbounds float* %tmp17698, i64 1
+  %tmp17700 = getelementptr inbounds float* %tmp17699, i64 1
+  %tmp17701 = getelementptr inbounds float* %tmp17700, i64 1
+  %tmp17702 = getelementptr inbounds float* %tmp17701, i64 1
+  %tmp17703 = getelementptr inbounds float* %tmp17702, i64 1
+  %tmp17704 = getelementptr inbounds float* %tmp17703, i64 1
+  %tmp17705 = getelementptr inbounds float* %tmp17704, i64 1
+  %tmp17706 = getelementptr inbounds float* %tmp17705, i64 1
+  %tmp17707 = getelementptr inbounds float* %tmp17706, i64 1
+  %tmp17708 = getelementptr inbounds float* %tmp17707, i64 1
+  %tmp17709 = getelementptr inbounds float* %tmp17708, i64 1
+  %tmp17710 = getelementptr inbounds float* %tmp17709, i64 1
+  %tmp17711 = getelementptr inbounds float* %tmp17710, i64 1
+  %tmp17712 = getelementptr inbounds float* %tmp17711, i64 1
+  %tmp17713 = getelementptr inbounds float* %tmp17712, i64 1
+  %tmp17714 = getelementptr inbounds float* %tmp17713, i64 1
+  %tmp17715 = getelementptr inbounds float* %tmp17714, i64 1
+  %tmp17716 = getelementptr inbounds float* %tmp17715, i64 1
+  %tmp17717 = getelementptr inbounds float* %tmp17716, i64 1
+  %tmp17718 = getelementptr inbounds float* %tmp17717, i64 1
+  %tmp17719 = getelementptr inbounds float* %tmp17718, i64 1
+  %tmp17720 = getelementptr inbounds float* %tmp17719, i64 1
+  %tmp17721 = getelementptr inbounds float* %tmp17720, i64 1
+  %tmp17722 = getelementptr inbounds float* %tmp17721, i64 1
+  %tmp17723 = getelementptr inbounds float* %tmp17722, i64 1
+  %tmp17724 = getelementptr inbounds float* %tmp17723, i64 1
+  %tmp17725 = getelementptr inbounds float* %tmp17724, i64 1
+  %tmp17726 = getelementptr inbounds float* %tmp17725, i64 1
+  %tmp17727 = getelementptr inbounds float* %tmp17726, i64 1
+  %tmp17728 = getelementptr inbounds float* %tmp17727, i64 1
+  %tmp17729 = getelementptr inbounds float* %tmp17728, i64 1
+  %tmp17730 = getelementptr inbounds float* %tmp17729, i64 1
+  %tmp17731 = getelementptr inbounds float* %tmp17730, i64 1
+  %tmp17732 = getelementptr inbounds float* %tmp17731, i64 1
+  %tmp17733 = getelementptr inbounds float* %tmp17732, i64 1
+  %tmp17734 = getelementptr inbounds float* %tmp17733, i64 1
+  %tmp17735 = getelementptr inbounds float* %tmp17734, i64 1
+  %tmp17736 = getelementptr inbounds float* %tmp17735, i64 1
+  %tmp17737 = getelementptr inbounds float* %tmp17736, i64 1
+  %tmp17738 = getelementptr inbounds float* %tmp17737, i64 1
+  %tmp17739 = getelementptr inbounds float* %tmp17738, i64 1
+  %tmp17740 = getelementptr inbounds float* %tmp17739, i64 1
+  %tmp17741 = getelementptr inbounds float* %tmp17740, i64 1
+  %tmp17742 = getelementptr inbounds float* %tmp17741, i64 1
+  %tmp17743 = getelementptr inbounds float* %tmp17742, i64 1
+  %tmp17744 = getelementptr inbounds float* %tmp17743, i64 1
+  %tmp17745 = getelementptr inbounds float* %tmp17744, i64 1
+  %tmp17746 = getelementptr inbounds float* %tmp17745, i64 1
+  %tmp17747 = getelementptr inbounds float* %tmp17746, i64 1
+  %tmp17748 = getelementptr inbounds float* %tmp17747, i64 1
+  %tmp17749 = getelementptr inbounds float* %tmp17748, i64 1
+  %tmp17750 = getelementptr inbounds float* %tmp17749, i64 1
+  %tmp17751 = getelementptr inbounds float* %tmp17750, i64 1
+  %tmp17752 = getelementptr inbounds float* %tmp17751, i64 1
+  %tmp17753 = getelementptr inbounds float* %tmp17752, i64 1
+  %tmp17754 = getelementptr inbounds float* %tmp17753, i64 1
+  %tmp17755 = getelementptr inbounds float* %tmp17754, i64 1
+  %tmp17756 = getelementptr inbounds float* %tmp17755, i64 1
+  %tmp17757 = getelementptr inbounds float* %tmp17756, i64 1
+  %tmp17758 = getelementptr inbounds float* %tmp17757, i64 1
+  %tmp17759 = getelementptr inbounds float* %tmp17758, i64 1
+  %tmp17760 = getelementptr inbounds float* %tmp17759, i64 1
+  %tmp17761 = getelementptr inbounds float* %tmp17760, i64 1
+  %tmp17762 = getelementptr inbounds float* %tmp17761, i64 1
+  %tmp17763 = getelementptr inbounds float* %tmp17762, i64 1
+  %tmp17764 = getelementptr inbounds float* %tmp17763, i64 1
+  %tmp17765 = getelementptr inbounds float* %tmp17764, i64 1
+  %tmp17766 = getelementptr inbounds float* %tmp17765, i64 1
+  %tmp17767 = getelementptr inbounds float* %tmp17766, i64 1
+  %tmp17768 = getelementptr inbounds float* %tmp17767, i64 1
+  %tmp17769 = getelementptr inbounds float* %tmp17768, i64 1
+  %tmp17770 = getelementptr inbounds float* %tmp17769, i64 1
+  %tmp17771 = getelementptr inbounds float* %tmp17770, i64 1
+  %tmp17772 = getelementptr inbounds float* %tmp17771, i64 1
+  %tmp17773 = getelementptr inbounds float* %tmp17772, i64 1
+  %tmp17774 = getelementptr inbounds float* %tmp17773, i64 1
+  %tmp17775 = getelementptr inbounds float* %tmp17774, i64 1
+  %tmp17776 = getelementptr inbounds float* %tmp17775, i64 1
+  %tmp17777 = getelementptr inbounds float* %tmp17776, i64 1
+  %tmp17778 = getelementptr inbounds float* %tmp17777, i64 1
+  %tmp17779 = getelementptr inbounds float* %tmp17778, i64 1
+  %tmp17780 = getelementptr inbounds float* %tmp17779, i64 1
+  %tmp17781 = getelementptr inbounds float* %tmp17780, i64 1
+  %tmp17782 = getelementptr inbounds float* %tmp17781, i64 1
+  %tmp17783 = getelementptr inbounds float* %tmp17782, i64 1
+  %tmp17784 = getelementptr inbounds float* %tmp17783, i64 1
+  %tmp17785 = getelementptr inbounds float* %tmp17784, i64 1
+  %tmp17786 = getelementptr inbounds float* %tmp17785, i64 1
+  %tmp17787 = getelementptr inbounds float* %tmp17786, i64 1
+  %tmp17788 = getelementptr inbounds float* %tmp17787, i64 1
+  %tmp17789 = getelementptr inbounds float* %tmp17788, i64 1
+  %tmp17790 = getelementptr inbounds float* %tmp17789, i64 1
+  %tmp17791 = getelementptr inbounds float* %tmp17790, i64 1
+  %tmp17792 = getelementptr inbounds float* %tmp17791, i64 1
+  %tmp17793 = getelementptr inbounds float* %tmp17792, i64 1
+  %tmp17794 = getelementptr inbounds float* %tmp17793, i64 1
+  %tmp17795 = getelementptr inbounds float* %tmp17794, i64 1
+  %tmp17796 = getelementptr inbounds float* %tmp17795, i64 1
+  %tmp17797 = getelementptr inbounds float* %tmp17796, i64 1
+  %tmp17798 = getelementptr inbounds float* %tmp17797, i64 1
+  %tmp17799 = getelementptr inbounds float* %tmp17798, i64 1
+  %tmp17800 = getelementptr inbounds float* %tmp17799, i64 1
+  %tmp17801 = getelementptr inbounds float* %tmp17800, i64 1
+  %tmp17802 = getelementptr inbounds float* %tmp17801, i64 1
+  %tmp17803 = getelementptr inbounds float* %tmp17802, i64 1
+  %tmp17804 = getelementptr inbounds float* %tmp17803, i64 1
+  %tmp17805 = getelementptr inbounds float* %tmp17804, i64 1
+  %tmp17806 = getelementptr inbounds float* %tmp17805, i64 1
+  %tmp17807 = getelementptr inbounds float* %tmp17806, i64 1
+  %tmp17808 = getelementptr inbounds float* %tmp17807, i64 1
+  %tmp17809 = getelementptr inbounds float* %tmp17808, i64 1
+  %tmp17810 = getelementptr inbounds float* %tmp17809, i64 1
+  %tmp17811 = getelementptr inbounds float* %tmp17810, i64 1
+  %tmp17812 = getelementptr inbounds float* %tmp17811, i64 1
+  %tmp17813 = getelementptr inbounds float* %tmp17812, i64 1
+  %tmp17814 = getelementptr inbounds float* %tmp17813, i64 1
+  %tmp17815 = getelementptr inbounds float* %tmp17814, i64 1
+  %tmp17816 = getelementptr inbounds float* %tmp17815, i64 1
+  %tmp17817 = getelementptr inbounds float* %tmp17816, i64 1
+  %tmp17818 = getelementptr inbounds float* %tmp17817, i64 1
+  %tmp17819 = getelementptr inbounds float* %tmp17818, i64 1
+  %tmp17820 = getelementptr inbounds float* %tmp17819, i64 1
+  %tmp17821 = getelementptr inbounds float* %tmp17820, i64 1
+  %tmp17822 = getelementptr inbounds float* %tmp17821, i64 1
+  %tmp17823 = getelementptr inbounds float* %tmp17822, i64 1
+  %tmp17824 = getelementptr inbounds float* %tmp17823, i64 1
+  %tmp17825 = getelementptr inbounds float* %tmp17824, i64 1
+  %tmp17826 = getelementptr inbounds float* %tmp17825, i64 1
+  %tmp17827 = getelementptr inbounds float* %tmp17826, i64 1
+  %tmp17828 = getelementptr inbounds float* %tmp17827, i64 1
+  %tmp17829 = getelementptr inbounds float* %tmp17828, i64 1
+  %tmp17830 = getelementptr inbounds float* %tmp17829, i64 1
+  %tmp17831 = getelementptr inbounds float* %tmp17830, i64 1
+  %tmp17832 = getelementptr inbounds float* %tmp17831, i64 1
+  %tmp17833 = getelementptr inbounds float* %tmp17832, i64 1
+  %tmp17834 = getelementptr inbounds float* %tmp17833, i64 1
+  %tmp17835 = getelementptr inbounds float* %tmp17834, i64 1
+  %tmp17836 = getelementptr inbounds float* %tmp17835, i64 1
+  %tmp17837 = getelementptr inbounds float* %tmp17836, i64 1
+  %tmp17838 = getelementptr inbounds float* %tmp17837, i64 1
+  %tmp17839 = getelementptr inbounds float* %tmp17838, i64 1
+  %tmp17840 = getelementptr inbounds float* %tmp17839, i64 1
+  %tmp17841 = getelementptr inbounds float* %tmp17840, i64 1
+  %tmp17842 = getelementptr inbounds float* %tmp17841, i64 1
+  %tmp17843 = getelementptr inbounds float* %tmp17842, i64 1
+  %tmp17844 = getelementptr inbounds float* %tmp17843, i64 1
+  %tmp17845 = getelementptr inbounds float* %tmp17844, i64 1
+  %tmp17846 = getelementptr inbounds float* %tmp17845, i64 1
+  %tmp17847 = getelementptr inbounds float* %tmp17846, i64 1
+  %tmp17848 = getelementptr inbounds float* %tmp17847, i64 1
+  %tmp17849 = getelementptr inbounds float* %tmp17848, i64 1
+  %tmp17850 = getelementptr inbounds float* %tmp17849, i64 1
+  %tmp17851 = getelementptr inbounds float* %tmp17850, i64 1
+  %tmp17852 = getelementptr inbounds float* %tmp17851, i64 1
+  %tmp17853 = getelementptr inbounds float* %tmp17852, i64 1
+  %tmp17854 = getelementptr inbounds float* %tmp17853, i64 1
+  %tmp17855 = getelementptr inbounds float* %tmp17854, i64 1
+  %tmp17856 = getelementptr inbounds float* %tmp17855, i64 1
+  %tmp17857 = getelementptr inbounds float* %tmp17856, i64 1
+  %tmp17858 = getelementptr inbounds float* %tmp17857, i64 1
+  %tmp17859 = getelementptr inbounds float* %tmp17858, i64 1
+  %tmp17860 = getelementptr inbounds float* %tmp17859, i64 1
+  %tmp17861 = getelementptr inbounds float* %tmp17860, i64 1
+  %tmp17862 = getelementptr inbounds float* %tmp17861, i64 1
+  %tmp17863 = getelementptr inbounds float* %tmp17862, i64 1
+  %tmp17864 = getelementptr inbounds float* %tmp17863, i64 1
+  %tmp17865 = getelementptr inbounds float* %tmp17864, i64 1
+  %tmp17866 = getelementptr inbounds float* %tmp17865, i64 1
+  %tmp17867 = getelementptr inbounds float* %tmp17866, i64 1
+  %tmp17868 = getelementptr inbounds float* %tmp17867, i64 1
+  %tmp17869 = getelementptr inbounds float* %tmp17868, i64 1
+  %tmp17870 = getelementptr inbounds float* %tmp17869, i64 1
+  %tmp17871 = getelementptr inbounds float* %tmp17870, i64 1
+  %tmp17872 = getelementptr inbounds float* %tmp17871, i64 1
+  %tmp17873 = getelementptr inbounds float* %tmp17872, i64 1
+  %tmp17874 = getelementptr inbounds float* %tmp17873, i64 1
+  %tmp17875 = getelementptr inbounds float* %tmp17874, i64 1
+  %tmp17876 = getelementptr inbounds float* %tmp17875, i64 1
+  %tmp17877 = getelementptr inbounds float* %tmp17876, i64 1
+  %tmp17878 = getelementptr inbounds float* %tmp17877, i64 1
+  %tmp17879 = getelementptr inbounds float* %tmp17878, i64 1
+  %tmp17880 = getelementptr inbounds float* %tmp17879, i64 1
+  %tmp17881 = getelementptr inbounds float* %tmp17880, i64 1
+  %tmp17882 = getelementptr inbounds float* %tmp17881, i64 1
+  %tmp17883 = getelementptr inbounds float* %tmp17882, i64 1
+  %tmp17884 = getelementptr inbounds float* %tmp17883, i64 1
+  %tmp17885 = getelementptr inbounds float* %tmp17884, i64 1
+  %tmp17886 = getelementptr inbounds float* %tmp17885, i64 1
+  %tmp17887 = getelementptr inbounds float* %tmp17886, i64 1
+  %tmp17888 = getelementptr inbounds float* %tmp17887, i64 1
+  %tmp17889 = getelementptr inbounds float* %tmp17888, i64 1
+  %tmp17890 = getelementptr inbounds float* %tmp17889, i64 1
+  %tmp17891 = getelementptr inbounds float* %tmp17890, i64 1
+  %tmp17892 = getelementptr inbounds float* %tmp17891, i64 1
+  %tmp17893 = getelementptr inbounds float* %tmp17892, i64 1
+  %tmp17894 = getelementptr inbounds float* %tmp17893, i64 1
+  %tmp17895 = getelementptr inbounds float* %tmp17894, i64 1
+  %tmp17896 = getelementptr inbounds float* %tmp17895, i64 1
+  %tmp17897 = getelementptr inbounds float* %tmp17896, i64 1
+  %tmp17898 = getelementptr inbounds float* %tmp17897, i64 1
+  %tmp17899 = getelementptr inbounds float* %tmp17898, i64 1
+  %tmp17900 = getelementptr inbounds float* %tmp17899, i64 1
+  %tmp17901 = getelementptr inbounds float* %tmp17900, i64 1
+  %tmp17902 = getelementptr inbounds float* %tmp17901, i64 1
+  %tmp17903 = getelementptr inbounds float* %tmp17902, i64 1
+  %tmp17904 = getelementptr inbounds float* %tmp17903, i64 1
+  %tmp17905 = getelementptr inbounds float* %tmp17904, i64 1
+  %tmp17906 = getelementptr inbounds float* %tmp17905, i64 1
+  %tmp17907 = getelementptr inbounds float* %tmp17906, i64 1
+  %tmp17908 = getelementptr inbounds float* %tmp17907, i64 1
+  %tmp17909 = getelementptr inbounds float* %tmp17908, i64 1
+  %tmp17910 = getelementptr inbounds float* %tmp17909, i64 1
+  %tmp17911 = getelementptr inbounds float* %tmp17910, i64 1
+  %tmp17912 = getelementptr inbounds float* %tmp17911, i64 1
+  %tmp17913 = getelementptr inbounds float* %tmp17912, i64 1
+  %tmp17914 = getelementptr inbounds float* %tmp17913, i64 1
+  %tmp17915 = getelementptr inbounds float* %tmp17914, i64 1
+  %tmp17916 = getelementptr inbounds float* %tmp17915, i64 1
+  %tmp17917 = getelementptr inbounds float* %tmp17916, i64 1
+  %tmp17918 = getelementptr inbounds float* %tmp17917, i64 1
+  %tmp17919 = getelementptr inbounds float* %tmp17918, i64 1
+  %tmp17920 = getelementptr inbounds float* %tmp17919, i64 1
+  %tmp17921 = getelementptr inbounds float* %tmp17920, i64 1
+  %tmp17922 = getelementptr inbounds float* %tmp17921, i64 1
+  %tmp17923 = getelementptr inbounds float* %tmp17922, i64 1
+  %tmp17924 = getelementptr inbounds float* %tmp17923, i64 1
+  %tmp17925 = getelementptr inbounds float* %tmp17924, i64 1
+  %tmp17926 = getelementptr inbounds float* %tmp17925, i64 1
+  %tmp17927 = getelementptr inbounds float* %tmp17926, i64 1
+  %tmp17928 = getelementptr inbounds float* %tmp17927, i64 1
+  %tmp17929 = getelementptr inbounds float* %tmp17928, i64 1
+  %tmp17930 = getelementptr inbounds float* %tmp17929, i64 1
+  %tmp17931 = getelementptr inbounds float* %tmp17930, i64 1
+  %tmp17932 = getelementptr inbounds float* %tmp17931, i64 1
+  %tmp17933 = getelementptr inbounds float* %tmp17932, i64 1
+  %tmp17934 = getelementptr inbounds float* %tmp17933, i64 1
+  %tmp17935 = getelementptr inbounds float* %tmp17934, i64 1
+  %tmp17936 = getelementptr inbounds float* %tmp17935, i64 1
+  %tmp17937 = getelementptr inbounds float* %tmp17936, i64 1
+  %tmp17938 = getelementptr inbounds float* %tmp17937, i64 1
+  %tmp17939 = getelementptr inbounds float* %tmp17938, i64 1
+  %tmp17940 = getelementptr inbounds float* %tmp17939, i64 1
+  %tmp17941 = getelementptr inbounds float* %tmp17940, i64 1
+  %tmp17942 = getelementptr inbounds float* %tmp17941, i64 1
+  %tmp17943 = getelementptr inbounds float* %tmp17942, i64 1
+  %tmp17944 = getelementptr inbounds float* %tmp17943, i64 1
+  %tmp17945 = getelementptr inbounds float* %tmp17944, i64 1
+  %tmp17946 = getelementptr inbounds float* %tmp17945, i64 1
+  %tmp17947 = getelementptr inbounds float* %tmp17946, i64 1
+  %tmp17948 = getelementptr inbounds float* %tmp17947, i64 1
+  %tmp17949 = getelementptr inbounds float* %tmp17948, i64 1
+  %tmp17950 = getelementptr inbounds float* %tmp17949, i64 1
+  %tmp17951 = getelementptr inbounds float* %tmp17950, i64 1
+  %tmp17952 = getelementptr inbounds float* %tmp17951, i64 1
+  %tmp17953 = getelementptr inbounds float* %tmp17952, i64 1
+  %tmp17954 = getelementptr inbounds float* %tmp17953, i64 1
+  %tmp17955 = getelementptr inbounds float* %tmp17954, i64 1
+  %tmp17956 = getelementptr inbounds float* %tmp17955, i64 1
+  %tmp17957 = getelementptr inbounds float* %tmp17956, i64 1
+  %tmp17958 = getelementptr inbounds float* %tmp17957, i64 1
+  %tmp17959 = getelementptr inbounds float* %tmp17958, i64 1
+  %tmp17960 = getelementptr inbounds float* %tmp17959, i64 1
+  %tmp17961 = getelementptr inbounds float* %tmp17960, i64 1
+  %tmp17962 = getelementptr inbounds float* %tmp17961, i64 1
+  %tmp17963 = getelementptr inbounds float* %tmp17962, i64 1
+  %tmp17964 = getelementptr inbounds float* %tmp17963, i64 1
+  %tmp17965 = getelementptr inbounds float* %tmp17964, i64 1
+  %tmp17966 = getelementptr inbounds float* %tmp17965, i64 1
+  %tmp17967 = getelementptr inbounds float* %tmp17966, i64 1
+  %tmp17968 = getelementptr inbounds float* %tmp17967, i64 1
+  %tmp17969 = getelementptr inbounds float* %tmp17968, i64 1
+  %tmp17970 = getelementptr inbounds float* %tmp17969, i64 1
+  %tmp17971 = getelementptr inbounds float* %tmp17970, i64 1
+  %tmp17972 = getelementptr inbounds float* %tmp17971, i64 1
+  %tmp17973 = getelementptr inbounds float* %tmp17972, i64 1
+  %tmp17974 = getelementptr inbounds float* %tmp17973, i64 1
+  %tmp17975 = getelementptr inbounds float* %tmp17974, i64 1
+  %tmp17976 = getelementptr inbounds float* %tmp17975, i64 1
+  %tmp17977 = getelementptr inbounds float* %tmp17976, i64 1
+  %tmp17978 = getelementptr inbounds float* %tmp17977, i64 1
+  %tmp17979 = getelementptr inbounds float* %tmp17978, i64 1
+  %tmp17980 = getelementptr inbounds float* %tmp17979, i64 1
+  %tmp17981 = getelementptr inbounds float* %tmp17980, i64 1
+  %tmp17982 = getelementptr inbounds float* %tmp17981, i64 1
+  %tmp17983 = getelementptr inbounds float* %tmp17982, i64 1
+  %tmp17984 = getelementptr inbounds float* %tmp17983, i64 1
+  %tmp17985 = getelementptr inbounds float* %tmp17984, i64 1
+  %tmp17986 = getelementptr inbounds float* %tmp17985, i64 1
+  %tmp17987 = getelementptr inbounds float* %tmp17986, i64 1
+  %tmp17988 = getelementptr inbounds float* %tmp17987, i64 1
+  %tmp17989 = getelementptr inbounds float* %tmp17988, i64 1
+  %tmp17990 = getelementptr inbounds float* %tmp17989, i64 1
+  %tmp17991 = getelementptr inbounds float* %tmp17990, i64 1
+  %tmp17992 = getelementptr inbounds float* %tmp17991, i64 1
+  %tmp17993 = getelementptr inbounds float* %tmp17992, i64 1
+  %tmp17994 = getelementptr inbounds float* %tmp17993, i64 1
+  %tmp17995 = getelementptr inbounds float* %tmp17994, i64 1
+  %tmp17996 = getelementptr inbounds float* %tmp17995, i64 1
+  %tmp17997 = getelementptr inbounds float* %tmp17996, i64 1
+  %tmp17998 = getelementptr inbounds float* %tmp17997, i64 1
+  %tmp17999 = getelementptr inbounds float* %tmp17998, i64 1
+  %tmp18000 = getelementptr inbounds float* %tmp17999, i64 1
+  %tmp18001 = getelementptr inbounds float* %tmp18000, i64 1
+  %tmp18002 = getelementptr inbounds float* %tmp18001, i64 1
+  %tmp18003 = getelementptr inbounds float* %tmp18002, i64 1
+  %tmp18004 = getelementptr inbounds float* %tmp18003, i64 1
+  %tmp18005 = getelementptr inbounds float* %tmp18004, i64 1
+  %tmp18006 = getelementptr inbounds float* %tmp18005, i64 1
+  %tmp18007 = getelementptr inbounds float* %tmp18006, i64 1
+  %tmp18008 = getelementptr inbounds float* %tmp18007, i64 1
+  %tmp18009 = getelementptr inbounds float* %tmp18008, i64 1
+  %tmp18010 = getelementptr inbounds float* %tmp18009, i64 1
+  %tmp18011 = getelementptr inbounds float* %tmp18010, i64 1
+  %tmp18012 = getelementptr inbounds float* %tmp18011, i64 1
+  %tmp18013 = getelementptr inbounds float* %tmp18012, i64 1
+  %tmp18014 = getelementptr inbounds float* %tmp18013, i64 1
+  %tmp18015 = getelementptr inbounds float* %tmp18014, i64 1
+  %tmp18016 = getelementptr inbounds float* %tmp18015, i64 1
+  %tmp18017 = getelementptr inbounds float* %tmp18016, i64 1
+  %tmp18018 = getelementptr inbounds float* %tmp18017, i64 1
+  %tmp18019 = getelementptr inbounds float* %tmp18018, i64 1
+  %tmp18020 = getelementptr inbounds float* %tmp18019, i64 1
+  %tmp18021 = getelementptr inbounds float* %tmp18020, i64 1
+  %tmp18022 = getelementptr inbounds float* %tmp18021, i64 1
+  %tmp18023 = getelementptr inbounds float* %tmp18022, i64 1
+  %tmp18024 = getelementptr inbounds float* %tmp18023, i64 1
+  %tmp18025 = getelementptr inbounds float* %tmp18024, i64 1
+  %tmp18026 = getelementptr inbounds float* %tmp18025, i64 1
+  %tmp18027 = getelementptr inbounds float* %tmp18026, i64 1
+  %tmp18028 = getelementptr inbounds float* %tmp18027, i64 1
+  %tmp18029 = getelementptr inbounds float* %tmp18028, i64 1
+  %tmp18030 = getelementptr inbounds float* %tmp18029, i64 1
+  %tmp18031 = getelementptr inbounds float* %tmp18030, i64 1
+  %tmp18032 = getelementptr inbounds float* %tmp18031, i64 1
+  %tmp18033 = getelementptr inbounds float* %tmp18032, i64 1
+  %tmp18034 = getelementptr inbounds float* %tmp18033, i64 1
+  %tmp18035 = getelementptr inbounds float* %tmp18034, i64 1
+  %tmp18036 = getelementptr inbounds float* %tmp18035, i64 1
+  %tmp18037 = getelementptr inbounds float* %tmp18036, i64 1
+  %tmp18038 = getelementptr inbounds float* %tmp18037, i64 1
+  %tmp18039 = getelementptr inbounds float* %tmp18038, i64 1
+  %tmp18040 = getelementptr inbounds float* %tmp18039, i64 1
+  %tmp18041 = getelementptr inbounds float* %tmp18040, i64 1
+  %tmp18042 = getelementptr inbounds float* %tmp18041, i64 1
+  %tmp18043 = getelementptr inbounds float* %tmp18042, i64 1
+  %tmp18044 = getelementptr inbounds float* %tmp18043, i64 1
+  %tmp18045 = getelementptr inbounds float* %tmp18044, i64 1
+  %tmp18046 = getelementptr inbounds float* %tmp18045, i64 1
+  %tmp18047 = getelementptr inbounds float* %tmp18046, i64 1
+  %tmp18048 = getelementptr inbounds float* %tmp18047, i64 1
+  %tmp18049 = getelementptr inbounds float* %tmp18048, i64 1
+  %tmp18050 = getelementptr inbounds float* %tmp18049, i64 1
+  %tmp18051 = getelementptr inbounds float* %tmp18050, i64 1
+  %tmp18052 = getelementptr inbounds float* %tmp18051, i64 1
+  %tmp18053 = getelementptr inbounds float* %tmp18052, i64 1
+  %tmp18054 = getelementptr inbounds float* %tmp18053, i64 1
+  %tmp18055 = getelementptr inbounds float* %tmp18054, i64 1
+  %tmp18056 = getelementptr inbounds float* %tmp18055, i64 1
+  %tmp18057 = getelementptr inbounds float* %tmp18056, i64 1
+  %tmp18058 = getelementptr inbounds float* %tmp18057, i64 1
+  %tmp18059 = getelementptr inbounds float* %tmp18058, i64 1
+  %tmp18060 = getelementptr inbounds float* %tmp18059, i64 1
+  %tmp18061 = getelementptr inbounds float* %tmp18060, i64 1
+  %tmp18062 = getelementptr inbounds float* %tmp18061, i64 1
+  %tmp18063 = getelementptr inbounds float* %tmp18062, i64 1
+  %tmp18064 = getelementptr inbounds float* %tmp18063, i64 1
+  %tmp18065 = getelementptr inbounds float* %tmp18064, i64 1
+  %tmp18066 = getelementptr inbounds float* %tmp18065, i64 1
+  %tmp18067 = getelementptr inbounds float* %tmp18066, i64 1
+  %tmp18068 = getelementptr inbounds float* %tmp18067, i64 1
+  %tmp18069 = getelementptr inbounds float* %tmp18068, i64 1
+  %tmp18070 = getelementptr inbounds float* %tmp18069, i64 1
+  %tmp18071 = getelementptr inbounds float* %tmp18070, i64 1
+  %tmp18072 = getelementptr inbounds float* %tmp18071, i64 1
+  %tmp18073 = getelementptr inbounds float* %tmp18072, i64 1
+  %tmp18074 = getelementptr inbounds float* %tmp18073, i64 1
+  %tmp18075 = getelementptr inbounds float* %tmp18074, i64 1
+  %tmp18076 = getelementptr inbounds float* %tmp18075, i64 1
+  %tmp18077 = getelementptr inbounds float* %tmp18076, i64 1
+  %tmp18078 = getelementptr inbounds float* %tmp18077, i64 1
+  %tmp18079 = getelementptr inbounds float* %tmp18078, i64 1
+  %tmp18080 = getelementptr inbounds float* %tmp18079, i64 1
+  %tmp18081 = getelementptr inbounds float* %tmp18080, i64 1
+  %tmp18082 = getelementptr inbounds float* %tmp18081, i64 1
+  %tmp18083 = getelementptr inbounds float* %tmp18082, i64 1
+  %tmp18084 = getelementptr inbounds float* %tmp18083, i64 1
+  %tmp18085 = getelementptr inbounds float* %tmp18084, i64 1
+  %tmp18086 = getelementptr inbounds float* %tmp18085, i64 1
+  %tmp18087 = getelementptr inbounds float* %tmp18086, i64 1
+  %tmp18088 = getelementptr inbounds float* %tmp18087, i64 1
+  %tmp18089 = getelementptr inbounds float* %tmp18088, i64 1
+  %tmp18090 = getelementptr inbounds float* %tmp18089, i64 1
+  %tmp18091 = getelementptr inbounds float* %tmp18090, i64 1
+  %tmp18092 = getelementptr inbounds float* %tmp18091, i64 1
+  %tmp18093 = getelementptr inbounds float* %tmp18092, i64 1
+  %tmp18094 = getelementptr inbounds float* %tmp18093, i64 1
+  %tmp18095 = getelementptr inbounds float* %tmp18094, i64 1
+  %tmp18096 = getelementptr inbounds float* %tmp18095, i64 1
+  %tmp18097 = getelementptr inbounds float* %tmp18096, i64 1
+  %tmp18098 = getelementptr inbounds float* %tmp18097, i64 1
+  %tmp18099 = getelementptr inbounds float* %tmp18098, i64 1
+  %tmp18100 = getelementptr inbounds float* %tmp18099, i64 1
+  %tmp18101 = getelementptr inbounds float* %tmp18100, i64 1
+  %tmp18102 = getelementptr inbounds float* %tmp18101, i64 1
+  %tmp18103 = getelementptr inbounds float* %tmp18102, i64 1
+  %tmp18104 = getelementptr inbounds float* %tmp18103, i64 1
+  %tmp18105 = getelementptr inbounds float* %tmp18104, i64 1
+  %tmp18106 = getelementptr inbounds float* %tmp18105, i64 1
+  %tmp18107 = getelementptr inbounds float* %tmp18106, i64 1
+  %tmp18108 = getelementptr inbounds float* %tmp18107, i64 1
+  %tmp18109 = getelementptr inbounds float* %tmp18108, i64 1
+  %tmp18110 = getelementptr inbounds float* %tmp18109, i64 1
+  %tmp18111 = getelementptr inbounds float* %tmp18110, i64 1
+  %tmp18112 = getelementptr inbounds float* %tmp18111, i64 1
+  %tmp18113 = getelementptr inbounds float* %tmp18112, i64 1
+  %tmp18114 = getelementptr inbounds float* %tmp18113, i64 1
+  %tmp18115 = getelementptr inbounds float* %tmp18114, i64 1
+  %tmp18116 = getelementptr inbounds float* %tmp18115, i64 1
+  %tmp18117 = getelementptr inbounds float* %tmp18116, i64 1
+  %tmp18118 = getelementptr inbounds float* %tmp18117, i64 1
+  %tmp18119 = getelementptr inbounds float* %tmp18118, i64 1
+  %tmp18120 = getelementptr inbounds float* %tmp18119, i64 1
+  %tmp18121 = getelementptr inbounds float* %tmp18120, i64 1
+  %tmp18122 = getelementptr inbounds float* %tmp18121, i64 1
+  %tmp18123 = getelementptr inbounds float* %tmp18122, i64 1
+  %tmp18124 = getelementptr inbounds float* %tmp18123, i64 1
+  %tmp18125 = getelementptr inbounds float* %tmp18124, i64 1
+  %tmp18126 = getelementptr inbounds float* %tmp18125, i64 1
+  %tmp18127 = getelementptr inbounds float* %tmp18126, i64 1
+  %tmp18128 = getelementptr inbounds float* %tmp18127, i64 1
+  %tmp18129 = getelementptr inbounds float* %tmp18128, i64 1
+  %tmp18130 = getelementptr inbounds float* %tmp18129, i64 1
+  %tmp18131 = getelementptr inbounds float* %tmp18130, i64 1
+  %tmp18132 = getelementptr inbounds float* %tmp18131, i64 1
+  %tmp18133 = getelementptr inbounds float* %tmp18132, i64 1
+  %tmp18134 = getelementptr inbounds float* %tmp18133, i64 1
+  %tmp18135 = getelementptr inbounds float* %tmp18134, i64 1
+  %tmp18136 = getelementptr inbounds float* %tmp18135, i64 1
+  %tmp18137 = getelementptr inbounds float* %tmp18136, i64 1
+  %tmp18138 = getelementptr inbounds float* %tmp18137, i64 1
+  %tmp18139 = getelementptr inbounds float* %tmp18138, i64 1
+  %tmp18140 = getelementptr inbounds float* %tmp18139, i64 1
+  %tmp18141 = getelementptr inbounds float* %tmp18140, i64 1
+  %tmp18142 = getelementptr inbounds float* %tmp18141, i64 1
+  %tmp18143 = getelementptr inbounds float* %tmp18142, i64 1
+  %tmp18144 = getelementptr inbounds float* %tmp18143, i64 1
+  %tmp18145 = getelementptr inbounds float* %tmp18144, i64 1
+  %tmp18146 = getelementptr inbounds float* %tmp18145, i64 1
+  %tmp18147 = getelementptr inbounds float* %tmp18146, i64 1
+  %tmp18148 = getelementptr inbounds float* %tmp18147, i64 1
+  %tmp18149 = getelementptr inbounds float* %tmp18148, i64 1
+  %tmp18150 = getelementptr inbounds float* %tmp18149, i64 1
+  %tmp18151 = getelementptr inbounds float* %tmp18150, i64 1
+  %tmp18152 = getelementptr inbounds float* %tmp18151, i64 1
+  %tmp18153 = getelementptr inbounds float* %tmp18152, i64 1
+  %tmp18154 = getelementptr inbounds float* %tmp18153, i64 1
+  %tmp18155 = getelementptr inbounds float* %tmp18154, i64 1
+  %tmp18156 = getelementptr inbounds float* %tmp18155, i64 1
+  %tmp18157 = getelementptr inbounds float* %tmp18156, i64 1
+  %tmp18158 = getelementptr inbounds float* %tmp18157, i64 1
+  %tmp18159 = getelementptr inbounds float* %tmp18158, i64 1
+  %tmp18160 = getelementptr inbounds float* %tmp18159, i64 1
+  %tmp18161 = getelementptr inbounds float* %tmp18160, i64 1
+  %tmp18162 = getelementptr inbounds float* %tmp18161, i64 1
+  %tmp18163 = getelementptr inbounds float* %tmp18162, i64 1
+  %tmp18164 = getelementptr inbounds float* %tmp18163, i64 1
+  %tmp18165 = getelementptr inbounds float* %tmp18164, i64 1
+  %tmp18166 = getelementptr inbounds float* %tmp18165, i64 1
+  %tmp18167 = getelementptr inbounds float* %tmp18166, i64 1
+  %tmp18168 = getelementptr inbounds float* %tmp18167, i64 1
+  %tmp18169 = getelementptr inbounds float* %tmp18168, i64 1
+  %tmp18170 = getelementptr inbounds float* %tmp18169, i64 1
+  %tmp18171 = getelementptr inbounds float* %tmp18170, i64 1
+  %tmp18172 = getelementptr inbounds float* %tmp18171, i64 1
+  %tmp18173 = getelementptr inbounds float* %tmp18172, i64 1
+  %tmp18174 = getelementptr inbounds float* %tmp18173, i64 1
+  %tmp18175 = getelementptr inbounds float* %tmp18174, i64 1
+  %tmp18176 = getelementptr inbounds float* %tmp18175, i64 1
+  %tmp18177 = getelementptr inbounds float* %tmp18176, i64 1
+  %tmp18178 = getelementptr inbounds float* %tmp18177, i64 1
+  %tmp18179 = getelementptr inbounds float* %tmp18178, i64 1
+  %tmp18180 = getelementptr inbounds float* %tmp18179, i64 1
+  %tmp18181 = getelementptr inbounds float* %tmp18180, i64 1
+  %tmp18182 = getelementptr inbounds float* %tmp18181, i64 1
+  %tmp18183 = getelementptr inbounds float* %tmp18182, i64 1
+  %tmp18184 = getelementptr inbounds float* %tmp18183, i64 1
+  %tmp18185 = getelementptr inbounds float* %tmp18184, i64 1
+  %tmp18186 = getelementptr inbounds float* %tmp18185, i64 1
+  %tmp18187 = getelementptr inbounds float* %tmp18186, i64 1
+  %tmp18188 = getelementptr inbounds float* %tmp18187, i64 1
+  %tmp18189 = getelementptr inbounds float* %tmp18188, i64 1
+  %tmp18190 = getelementptr inbounds float* %tmp18189, i64 1
+  %tmp18191 = getelementptr inbounds float* %tmp18190, i64 1
+  %tmp18192 = getelementptr inbounds float* %tmp18191, i64 1
+  %tmp18193 = getelementptr inbounds float* %tmp18192, i64 1
+  %tmp18194 = getelementptr inbounds float* %tmp18193, i64 1
+  %tmp18195 = getelementptr inbounds float* %tmp18194, i64 1
+  %tmp18196 = getelementptr inbounds float* %tmp18195, i64 1
+  %tmp18197 = getelementptr inbounds float* %tmp18196, i64 1
+  %tmp18198 = getelementptr inbounds float* %tmp18197, i64 1
+  %tmp18199 = getelementptr inbounds float* %tmp18198, i64 1
+  %tmp18200 = getelementptr inbounds float* %tmp18199, i64 1
+  %tmp18201 = getelementptr inbounds float* %tmp18200, i64 1
+  %tmp18202 = getelementptr inbounds float* %tmp18201, i64 1
+  %tmp18203 = getelementptr inbounds float* %tmp18202, i64 1
+  %tmp18204 = getelementptr inbounds float* %tmp18203, i64 1
+  %tmp18205 = getelementptr inbounds float* %tmp18204, i64 1
+  %tmp18206 = getelementptr inbounds float* %tmp18205, i64 1
+  %tmp18207 = getelementptr inbounds float* %tmp18206, i64 1
+  %tmp18208 = getelementptr inbounds float* %tmp18207, i64 1
+  %tmp18209 = getelementptr inbounds float* %tmp18208, i64 1
+  %tmp18210 = getelementptr inbounds float* %tmp18209, i64 1
+  %tmp18211 = getelementptr inbounds float* %tmp18210, i64 1
+  %tmp18212 = getelementptr inbounds float* %tmp18211, i64 1
+  %tmp18213 = getelementptr inbounds float* %tmp18212, i64 1
+  %tmp18214 = getelementptr inbounds float* %tmp18213, i64 1
+  %tmp18215 = getelementptr inbounds float* %tmp18214, i64 1
+  %tmp18216 = getelementptr inbounds float* %tmp18215, i64 1
+  %tmp18217 = getelementptr inbounds float* %tmp18216, i64 1
+  %tmp18218 = getelementptr inbounds float* %tmp18217, i64 1
+  %tmp18219 = getelementptr inbounds float* %tmp18218, i64 1
+  %tmp18220 = getelementptr inbounds float* %tmp18219, i64 1
+  %tmp18221 = getelementptr inbounds float* %tmp18220, i64 1
+  %tmp18222 = getelementptr inbounds float* %tmp18221, i64 1
+  %tmp18223 = getelementptr inbounds float* %tmp18222, i64 1
+  %tmp18224 = getelementptr inbounds float* %tmp18223, i64 1
+  %tmp18225 = getelementptr inbounds float* %tmp18224, i64 1
+  %tmp18226 = getelementptr inbounds float* %tmp18225, i64 1
+  %tmp18227 = getelementptr inbounds float* %tmp18226, i64 1
+  %tmp18228 = getelementptr inbounds float* %tmp18227, i64 1
+  %tmp18229 = getelementptr inbounds float* %tmp18228, i64 1
+  %tmp18230 = getelementptr inbounds float* %tmp18229, i64 1
+  %tmp18231 = getelementptr inbounds float* %tmp18230, i64 1
+  %tmp18232 = getelementptr inbounds float* %tmp18231, i64 1
+  %tmp18233 = getelementptr inbounds float* %tmp18232, i64 1
+  %tmp18234 = getelementptr inbounds float* %tmp18233, i64 1
+  %tmp18235 = getelementptr inbounds float* %tmp18234, i64 1
+  %tmp18236 = getelementptr inbounds float* %tmp18235, i64 1
+  %tmp18237 = getelementptr inbounds float* %tmp18236, i64 1
+  %tmp18238 = getelementptr inbounds float* %tmp18237, i64 1
+  %tmp18239 = getelementptr inbounds float* %tmp18238, i64 1
+  %tmp18240 = getelementptr inbounds float* %tmp18239, i64 1
+  %tmp18241 = getelementptr inbounds float* %tmp18240, i64 1
+  %tmp18242 = getelementptr inbounds float* %tmp18241, i64 1
+  %tmp18243 = getelementptr inbounds float* %tmp18242, i64 1
+  %tmp18244 = getelementptr inbounds float* %tmp18243, i64 1
+  %tmp18245 = getelementptr inbounds float* %tmp18244, i64 1
+  %tmp18246 = getelementptr inbounds float* %tmp18245, i64 1
+  %tmp18247 = getelementptr inbounds float* %tmp18246, i64 1
+  %tmp18248 = getelementptr inbounds float* %tmp18247, i64 1
+  %tmp18249 = getelementptr inbounds float* %tmp18248, i64 1
+  %tmp18250 = getelementptr inbounds float* %tmp18249, i64 1
+  %tmp18251 = getelementptr inbounds float* %tmp18250, i64 1
+  %tmp18252 = getelementptr inbounds float* %tmp18251, i64 1
+  %tmp18253 = getelementptr inbounds float* %tmp18252, i64 1
+  %tmp18254 = getelementptr inbounds float* %tmp18253, i64 1
+  %tmp18255 = getelementptr inbounds float* %tmp18254, i64 1
+  %tmp18256 = getelementptr inbounds float* %tmp18255, i64 1
+  %tmp18257 = getelementptr inbounds float* %tmp18256, i64 1
+  %tmp18258 = getelementptr inbounds float* %tmp18257, i64 1
+  %tmp18259 = getelementptr inbounds float* %tmp18258, i64 1
+  %tmp18260 = getelementptr inbounds float* %tmp18259, i64 1
+  %tmp18261 = getelementptr inbounds float* %tmp18260, i64 1
+  %tmp18262 = getelementptr inbounds float* %tmp18261, i64 1
+  %tmp18263 = getelementptr inbounds float* %tmp18262, i64 1
+  %tmp18264 = getelementptr inbounds float* %tmp18263, i64 1
+  %tmp18265 = getelementptr inbounds float* %tmp18264, i64 1
+  %tmp18266 = getelementptr inbounds float* %tmp18265, i64 1
+  %tmp18267 = getelementptr inbounds float* %tmp18266, i64 1
+  %tmp18268 = getelementptr inbounds float* %tmp18267, i64 1
+  %tmp18269 = getelementptr inbounds float* %tmp18268, i64 1
+  %tmp18270 = getelementptr inbounds float* %tmp18269, i64 1
+  %tmp18271 = getelementptr inbounds float* %tmp18270, i64 1
+  %tmp18272 = getelementptr inbounds float* %tmp18271, i64 1
+  %tmp18273 = getelementptr inbounds float* %tmp18272, i64 1
+  %tmp18274 = getelementptr inbounds float* %tmp18273, i64 1
+  %tmp18275 = getelementptr inbounds float* %tmp18274, i64 1
+  %tmp18276 = getelementptr inbounds float* %tmp18275, i64 1
+  %tmp18277 = getelementptr inbounds float* %tmp18276, i64 1
+  %tmp18278 = getelementptr inbounds float* %tmp18277, i64 1
+  %tmp18279 = getelementptr inbounds float* %tmp18278, i64 1
+  %tmp18280 = getelementptr inbounds float* %tmp18279, i64 1
+  %tmp18281 = getelementptr inbounds float* %tmp18280, i64 1
+  %tmp18282 = getelementptr inbounds float* %tmp18281, i64 1
+  %tmp18283 = getelementptr inbounds float* %tmp18282, i64 1
+  %tmp18284 = getelementptr inbounds float* %tmp18283, i64 1
+  %tmp18285 = getelementptr inbounds float* %tmp18284, i64 1
+  %tmp18286 = getelementptr inbounds float* %tmp18285, i64 1
+  %tmp18287 = getelementptr inbounds float* %tmp18286, i64 1
+  %tmp18288 = getelementptr inbounds float* %tmp18287, i64 1
+  %tmp18289 = getelementptr inbounds float* %tmp18288, i64 1
+  %tmp18290 = getelementptr inbounds float* %tmp18289, i64 1
+  %tmp18291 = getelementptr inbounds float* %tmp18290, i64 1
+  %tmp18292 = getelementptr inbounds float* %tmp18291, i64 1
+  %tmp18293 = getelementptr inbounds float* %tmp18292, i64 1
+  %tmp18294 = getelementptr inbounds float* %tmp18293, i64 1
+  %tmp18295 = getelementptr inbounds float* %tmp18294, i64 1
+  %tmp18296 = getelementptr inbounds float* %tmp18295, i64 1
+  %tmp18297 = getelementptr inbounds float* %tmp18296, i64 1
+  %tmp18298 = getelementptr inbounds float* %tmp18297, i64 1
+  %tmp18299 = getelementptr inbounds float* %tmp18298, i64 1
+  %tmp18300 = getelementptr inbounds float* %tmp18299, i64 1
+  %tmp18301 = getelementptr inbounds float* %tmp18300, i64 1
+  %tmp18302 = getelementptr inbounds float* %tmp18301, i64 1
+  %tmp18303 = getelementptr inbounds float* %tmp18302, i64 1
+  %tmp18304 = getelementptr inbounds float* %tmp18303, i64 1
+  %tmp18305 = getelementptr inbounds float* %tmp18304, i64 1
+  %tmp18306 = getelementptr inbounds float* %tmp18305, i64 1
+  %tmp18307 = getelementptr inbounds float* %tmp18306, i64 1
+  %tmp18308 = getelementptr inbounds float* %tmp18307, i64 1
+  %tmp18309 = getelementptr inbounds float* %tmp18308, i64 1
+  %tmp18310 = getelementptr inbounds float* %tmp18309, i64 1
+  %tmp18311 = getelementptr inbounds float* %tmp18310, i64 1
+  %tmp18312 = getelementptr inbounds float* %tmp18311, i64 1
+  %tmp18313 = getelementptr inbounds float* %tmp18312, i64 1
+  %tmp18314 = getelementptr inbounds float* %tmp18313, i64 1
+  %tmp18315 = getelementptr inbounds float* %tmp18314, i64 1
+  %tmp18316 = getelementptr inbounds float* %tmp18315, i64 1
+  %tmp18317 = getelementptr inbounds float* %tmp18316, i64 1
+  %tmp18318 = getelementptr inbounds float* %tmp18317, i64 1
+  %tmp18319 = getelementptr inbounds float* %tmp18318, i64 1
+  %tmp18320 = getelementptr inbounds float* %tmp18319, i64 1
+  %tmp18321 = getelementptr inbounds float* %tmp18320, i64 1
+  %tmp18322 = getelementptr inbounds float* %tmp18321, i64 1
+  %tmp18323 = getelementptr inbounds float* %tmp18322, i64 1
+  %tmp18324 = getelementptr inbounds float* %tmp18323, i64 1
+  %tmp18325 = getelementptr inbounds float* %tmp18324, i64 1
+  %tmp18326 = getelementptr inbounds float* %tmp18325, i64 1
+  %tmp18327 = getelementptr inbounds float* %tmp18326, i64 1
+  %tmp18328 = getelementptr inbounds float* %tmp18327, i64 1
+  %tmp18329 = getelementptr inbounds float* %tmp18328, i64 1
+  %tmp18330 = getelementptr inbounds float* %tmp18329, i64 1
+  %tmp18331 = getelementptr inbounds float* %tmp18330, i64 1
+  %tmp18332 = getelementptr inbounds float* %tmp18331, i64 1
+  %tmp18333 = getelementptr inbounds float* %tmp18332, i64 1
+  %tmp18334 = getelementptr inbounds float* %tmp18333, i64 1
+  %tmp18335 = getelementptr inbounds float* %tmp18334, i64 1
+  %tmp18336 = getelementptr inbounds float* %tmp18335, i64 1
+  %tmp18337 = getelementptr inbounds float* %tmp18336, i64 1
+  %tmp18338 = getelementptr inbounds float* %tmp18337, i64 1
+  %tmp18339 = getelementptr inbounds float* %tmp18338, i64 1
+  %tmp18340 = getelementptr inbounds float* %tmp18339, i64 1
+  %tmp18341 = getelementptr inbounds float* %tmp18340, i64 1
+  %tmp18342 = getelementptr inbounds float* %tmp18341, i64 1
+  %tmp18343 = getelementptr inbounds float* %tmp18342, i64 1
+  %tmp18344 = getelementptr inbounds float* %tmp18343, i64 1
+  %tmp18345 = getelementptr inbounds float* %tmp18344, i64 1
+  %tmp18346 = getelementptr inbounds float* %tmp18345, i64 1
+  %tmp18347 = getelementptr inbounds float* %tmp18346, i64 1
+  %tmp18348 = getelementptr inbounds float* %tmp18347, i64 1
+  %tmp18349 = getelementptr inbounds float* %tmp18348, i64 1
+  %tmp18350 = getelementptr inbounds float* %tmp18349, i64 1
+  %tmp18351 = getelementptr inbounds float* %tmp18350, i64 1
+  %tmp18352 = getelementptr inbounds float* %tmp18351, i64 1
+  %tmp18353 = getelementptr inbounds float* %tmp18352, i64 1
+  %tmp18354 = getelementptr inbounds float* %tmp18353, i64 1
+  %tmp18355 = getelementptr inbounds float* %tmp18354, i64 1
+  %tmp18356 = getelementptr inbounds float* %tmp18355, i64 1
+  %tmp18357 = getelementptr inbounds float* %tmp18356, i64 1
+  %tmp18358 = getelementptr inbounds float* %tmp18357, i64 1
+  %tmp18359 = getelementptr inbounds float* %tmp18358, i64 1
+  %tmp18360 = getelementptr inbounds float* %tmp18359, i64 1
+  %tmp18361 = getelementptr inbounds float* %tmp18360, i64 1
+  %tmp18362 = getelementptr inbounds float* %tmp18361, i64 1
+  %tmp18363 = getelementptr inbounds float* %tmp18362, i64 1
+  %tmp18364 = getelementptr inbounds float* %tmp18363, i64 1
+  %tmp18365 = getelementptr inbounds float* %tmp18364, i64 1
+  %tmp18366 = getelementptr inbounds float* %tmp18365, i64 1
+  %tmp18367 = getelementptr inbounds float* %tmp18366, i64 1
+  %tmp18368 = getelementptr inbounds float* %tmp18367, i64 1
+  %tmp18369 = getelementptr inbounds float* %tmp18368, i64 1
+  %tmp18370 = getelementptr inbounds float* %tmp18369, i64 1
+  %tmp18371 = getelementptr inbounds float* %tmp18370, i64 1
+  %tmp18372 = getelementptr inbounds float* %tmp18371, i64 1
+  %tmp18373 = getelementptr inbounds float* %tmp18372, i64 1
+  %tmp18374 = getelementptr inbounds float* %tmp18373, i64 1
+  %tmp18375 = getelementptr inbounds float* %tmp18374, i64 1
+  %tmp18376 = getelementptr inbounds float* %tmp18375, i64 1
+  %tmp18377 = getelementptr inbounds float* %tmp18376, i64 1
+  %tmp18378 = getelementptr inbounds float* %tmp18377, i64 1
+  %tmp18379 = getelementptr inbounds float* %tmp18378, i64 1
+  %tmp18380 = getelementptr inbounds float* %tmp18379, i64 1
+  %tmp18381 = getelementptr inbounds float* %tmp18380, i64 1
+  %tmp18382 = getelementptr inbounds float* %tmp18381, i64 1
+  %tmp18383 = getelementptr inbounds float* %tmp18382, i64 1
+  %tmp18384 = getelementptr inbounds float* %tmp18383, i64 1
+  %tmp18385 = getelementptr inbounds float* %tmp18384, i64 1
+  %tmp18386 = getelementptr inbounds float* %tmp18385, i64 1
+  %tmp18387 = getelementptr inbounds float* %tmp18386, i64 1
+  %tmp18388 = getelementptr inbounds float* %tmp18387, i64 1
+  %tmp18389 = getelementptr inbounds float* %tmp18388, i64 1
+  %tmp18390 = getelementptr inbounds float* %tmp18389, i64 1
+  %tmp18391 = getelementptr inbounds float* %tmp18390, i64 1
+  %tmp18392 = getelementptr inbounds float* %tmp18391, i64 1
+  %tmp18393 = getelementptr inbounds float* %tmp18392, i64 1
+  %tmp18394 = getelementptr inbounds float* %tmp18393, i64 1
+  %tmp18395 = getelementptr inbounds float* %tmp18394, i64 1
+  %tmp18396 = getelementptr inbounds float* %tmp18395, i64 1
+  %tmp18397 = getelementptr inbounds float* %tmp18396, i64 1
+  %tmp18398 = getelementptr inbounds float* %tmp18397, i64 1
+  %tmp18399 = getelementptr inbounds float* %tmp18398, i64 1
+  %tmp18400 = getelementptr inbounds float* %tmp18399, i64 1
+  %tmp18401 = getelementptr inbounds float* %tmp18400, i64 1
+  %tmp18402 = getelementptr inbounds float* %tmp18401, i64 1
+  %tmp18403 = getelementptr inbounds float* %tmp18402, i64 1
+  %tmp18404 = getelementptr inbounds float* %tmp18403, i64 1
+  %tmp18405 = getelementptr inbounds float* %tmp18404, i64 1
+  %tmp18406 = getelementptr inbounds float* %tmp18405, i64 1
+  %tmp18407 = getelementptr inbounds float* %tmp18406, i64 1
+  %tmp18408 = getelementptr inbounds float* %tmp18407, i64 1
+  %tmp18409 = getelementptr inbounds float* %tmp18408, i64 1
+  %tmp18410 = getelementptr inbounds float* %tmp18409, i64 1
+  %tmp18411 = getelementptr inbounds float* %tmp18410, i64 1
+  %tmp18412 = getelementptr inbounds float* %tmp18411, i64 1
+  %tmp18413 = getelementptr inbounds float* %tmp18412, i64 1
+  %tmp18414 = getelementptr inbounds float* %tmp18413, i64 1
+  %tmp18415 = getelementptr inbounds float* %tmp18414, i64 1
+  %tmp18416 = getelementptr inbounds float* %tmp18415, i64 1
+  %tmp18417 = getelementptr inbounds float* %tmp18416, i64 1
+  %tmp18418 = getelementptr inbounds float* %tmp18417, i64 1
+  %tmp18419 = getelementptr inbounds float* %tmp18418, i64 1
+  %tmp18420 = getelementptr inbounds float* %tmp18419, i64 1
+  %tmp18421 = getelementptr inbounds float* %tmp18420, i64 1
+  %tmp18422 = getelementptr inbounds float* %tmp18421, i64 1
+  %tmp18423 = getelementptr inbounds float* %tmp18422, i64 1
+  %tmp18424 = getelementptr inbounds float* %tmp18423, i64 1
+  %tmp18425 = getelementptr inbounds float* %tmp18424, i64 1
+  %tmp18426 = getelementptr inbounds float* %tmp18425, i64 1
+  %tmp18427 = getelementptr inbounds float* %tmp18426, i64 1
+  %tmp18428 = getelementptr inbounds float* %tmp18427, i64 1
+  %tmp18429 = getelementptr inbounds float* %tmp18428, i64 1
+  %tmp18430 = getelementptr inbounds float* %tmp18429, i64 1
+  %tmp18431 = getelementptr inbounds float* %tmp18430, i64 1
+  %tmp18432 = getelementptr inbounds float* %tmp18431, i64 1
+  %tmp18433 = getelementptr inbounds float* %tmp18432, i64 1
+  %tmp18434 = getelementptr inbounds float* %tmp18433, i64 1
+  %tmp18435 = getelementptr inbounds float* %tmp18434, i64 1
+  %tmp18436 = getelementptr inbounds float* %tmp18435, i64 1
+  %tmp18437 = getelementptr inbounds float* %tmp18436, i64 1
+  %tmp18438 = getelementptr inbounds float* %tmp18437, i64 1
+  %tmp18439 = getelementptr inbounds float* %tmp18438, i64 1
+  %tmp18440 = getelementptr inbounds float* %tmp18439, i64 1
+  %tmp18441 = getelementptr inbounds float* %tmp18440, i64 1
+  %tmp18442 = getelementptr inbounds float* %tmp18441, i64 1
+  %tmp18443 = getelementptr inbounds float* %tmp18442, i64 1
+  %tmp18444 = getelementptr inbounds float* %tmp18443, i64 1
+  %tmp18445 = getelementptr inbounds float* %tmp18444, i64 1
+  %tmp18446 = getelementptr inbounds float* %tmp18445, i64 1
+  %tmp18447 = getelementptr inbounds float* %tmp18446, i64 1
+  %tmp18448 = getelementptr inbounds float* %tmp18447, i64 1
+  %tmp18449 = getelementptr inbounds float* %tmp18448, i64 1
+  %tmp18450 = getelementptr inbounds float* %tmp18449, i64 1
+  %tmp18451 = getelementptr inbounds float* %tmp18450, i64 1
+  %tmp18452 = getelementptr inbounds float* %tmp18451, i64 1
+  %tmp18453 = getelementptr inbounds float* %tmp18452, i64 1
+  %tmp18454 = getelementptr inbounds float* %tmp18453, i64 1
+  %tmp18455 = getelementptr inbounds float* %tmp18454, i64 1
+  %tmp18456 = getelementptr inbounds float* %tmp18455, i64 1
+  %tmp18457 = getelementptr inbounds float* %tmp18456, i64 1
+  %tmp18458 = getelementptr inbounds float* %tmp18457, i64 1
+  %tmp18459 = getelementptr inbounds float* %tmp18458, i64 1
+  %tmp18460 = getelementptr inbounds float* %tmp18459, i64 1
+  %tmp18461 = getelementptr inbounds float* %tmp18460, i64 1
+  %tmp18462 = getelementptr inbounds float* %tmp18461, i64 1
+  %tmp18463 = getelementptr inbounds float* %tmp18462, i64 1
+  %tmp18464 = getelementptr inbounds float* %tmp18463, i64 1
+  %tmp18465 = getelementptr inbounds float* %tmp18464, i64 1
+  %tmp18466 = getelementptr inbounds float* %tmp18465, i64 1
+  %tmp18467 = getelementptr inbounds float* %tmp18466, i64 1
+  %tmp18468 = getelementptr inbounds float* %tmp18467, i64 1
+  %tmp18469 = getelementptr inbounds float* %tmp18468, i64 1
+  %tmp18470 = getelementptr inbounds float* %tmp18469, i64 1
+  %tmp18471 = getelementptr inbounds float* %tmp18470, i64 1
+  %tmp18472 = getelementptr inbounds float* %tmp18471, i64 1
+  %tmp18473 = getelementptr inbounds float* %tmp18472, i64 1
+  %tmp18474 = getelementptr inbounds float* %tmp18473, i64 1
+  %tmp18475 = getelementptr inbounds float* %tmp18474, i64 1
+  %tmp18476 = getelementptr inbounds float* %tmp18475, i64 1
+  %tmp18477 = getelementptr inbounds float* %tmp18476, i64 1
+  %tmp18478 = getelementptr inbounds float* %tmp18477, i64 1
+  %tmp18479 = getelementptr inbounds float* %tmp18478, i64 1
+  %tmp18480 = getelementptr inbounds float* %tmp18479, i64 1
+  %tmp18481 = getelementptr inbounds float* %tmp18480, i64 1
+  %tmp18482 = getelementptr inbounds float* %tmp18481, i64 1
+  %tmp18483 = getelementptr inbounds float* %tmp18482, i64 1
+  %tmp18484 = getelementptr inbounds float* %tmp18483, i64 1
+  %tmp18485 = getelementptr inbounds float* %tmp18484, i64 1
+  %tmp18486 = getelementptr inbounds float* %tmp18485, i64 1
+  %tmp18487 = getelementptr inbounds float* %tmp18486, i64 1
+  %tmp18488 = getelementptr inbounds float* %tmp18487, i64 1
+  %tmp18489 = getelementptr inbounds float* %tmp18488, i64 1
+  %tmp18490 = getelementptr inbounds float* %tmp18489, i64 1
+  %tmp18491 = getelementptr inbounds float* %tmp18490, i64 1
+  %tmp18492 = getelementptr inbounds float* %tmp18491, i64 1
+  %tmp18493 = getelementptr inbounds float* %tmp18492, i64 1
+  %tmp18494 = getelementptr inbounds float* %tmp18493, i64 1
+  %tmp18495 = getelementptr inbounds float* %tmp18494, i64 1
+  %tmp18496 = getelementptr inbounds float* %tmp18495, i64 1
+  %tmp18497 = getelementptr inbounds float* %tmp18496, i64 1
+  %tmp18498 = getelementptr inbounds float* %tmp18497, i64 1
+  %tmp18499 = getelementptr inbounds float* %tmp18498, i64 1
+  %tmp18500 = getelementptr inbounds float* %tmp18499, i64 1
+  %tmp18501 = getelementptr inbounds float* %tmp18500, i64 1
+  %tmp18502 = getelementptr inbounds float* %tmp18501, i64 1
+  %tmp18503 = getelementptr inbounds float* %tmp18502, i64 1
+  %tmp18504 = getelementptr inbounds float* %tmp18503, i64 1
+  %tmp18505 = getelementptr inbounds float* %tmp18504, i64 1
+  %tmp18506 = getelementptr inbounds float* %tmp18505, i64 1
+  %tmp18507 = getelementptr inbounds float* %tmp18506, i64 1
+  %tmp18508 = getelementptr inbounds float* %tmp18507, i64 1
+  %tmp18509 = getelementptr inbounds float* %tmp18508, i64 1
+  %tmp18510 = getelementptr inbounds float* %tmp18509, i64 1
+  %tmp18511 = getelementptr inbounds float* %tmp18510, i64 1
+  %tmp18512 = getelementptr inbounds float* %tmp18511, i64 1
+  %tmp18513 = getelementptr inbounds float* %tmp18512, i64 1
+  %tmp18514 = getelementptr inbounds float* %tmp18513, i64 1
+  %tmp18515 = getelementptr inbounds float* %tmp18514, i64 1
+  %tmp18516 = getelementptr inbounds float* %tmp18515, i64 1
+  %tmp18517 = getelementptr inbounds float* %tmp18516, i64 1
+  %tmp18518 = getelementptr inbounds float* %tmp18517, i64 1
+  %tmp18519 = getelementptr inbounds float* %tmp18518, i64 1
+  %tmp18520 = getelementptr inbounds float* %tmp18519, i64 1
+  %tmp18521 = getelementptr inbounds float* %tmp18520, i64 1
+  %tmp18522 = getelementptr inbounds float* %tmp18521, i64 1
+  %tmp18523 = getelementptr inbounds float* %tmp18522, i64 1
+  %tmp18524 = getelementptr inbounds float* %tmp18523, i64 1
+  %tmp18525 = getelementptr inbounds float* %tmp18524, i64 1
+  %tmp18526 = getelementptr inbounds float* %tmp18525, i64 1
+  %tmp18527 = getelementptr inbounds float* %tmp18526, i64 1
+  %tmp18528 = getelementptr inbounds float* %tmp18527, i64 1
+  %tmp18529 = getelementptr inbounds float* %tmp18528, i64 1
+  %tmp18530 = getelementptr inbounds float* %tmp18529, i64 1
+  %tmp18531 = getelementptr inbounds float* %tmp18530, i64 1
+  %tmp18532 = getelementptr inbounds float* %tmp18531, i64 1
+  %tmp18533 = getelementptr inbounds float* %tmp18532, i64 1
+  %tmp18534 = getelementptr inbounds float* %tmp18533, i64 1
+  %tmp18535 = getelementptr inbounds float* %tmp18534, i64 1
+  %tmp18536 = getelementptr inbounds float* %tmp18535, i64 1
+  %tmp18537 = getelementptr inbounds float* %tmp18536, i64 1
+  %tmp18538 = getelementptr inbounds float* %tmp18537, i64 1
+  %tmp18539 = getelementptr inbounds float* %tmp18538, i64 1
+  %tmp18540 = getelementptr inbounds float* %tmp18539, i64 1
+  %tmp18541 = getelementptr inbounds float* %tmp18540, i64 1
+  %tmp18542 = getelementptr inbounds float* %tmp18541, i64 1
+  %tmp18543 = getelementptr inbounds float* %tmp18542, i64 1
+  %tmp18544 = getelementptr inbounds float* %tmp18543, i64 1
+  %tmp18545 = getelementptr inbounds float* %tmp18544, i64 1
+  %tmp18546 = getelementptr inbounds float* %tmp18545, i64 1
+  %tmp18547 = getelementptr inbounds float* %tmp18546, i64 1
+  %tmp18548 = getelementptr inbounds float* %tmp18547, i64 1
+  %tmp18549 = getelementptr inbounds float* %tmp18548, i64 1
+  %tmp18550 = getelementptr inbounds float* %tmp18549, i64 1
+  %tmp18551 = getelementptr inbounds float* %tmp18550, i64 1
+  %tmp18552 = getelementptr inbounds float* %tmp18551, i64 1
+  %tmp18553 = getelementptr inbounds float* %tmp18552, i64 1
+  %tmp18554 = getelementptr inbounds float* %tmp18553, i64 1
+  %tmp18555 = getelementptr inbounds float* %tmp18554, i64 1
+  %tmp18556 = getelementptr inbounds float* %tmp18555, i64 1
+  %tmp18557 = getelementptr inbounds float* %tmp18556, i64 1
+  %tmp18558 = getelementptr inbounds float* %tmp18557, i64 1
+  %tmp18559 = getelementptr inbounds float* %tmp18558, i64 1
+  %tmp18560 = getelementptr inbounds float* %tmp18559, i64 1
+  %tmp18561 = getelementptr inbounds float* %tmp18560, i64 1
+  %tmp18562 = getelementptr inbounds float* %tmp18561, i64 1
+  %tmp18563 = getelementptr inbounds float* %tmp18562, i64 1
+  %tmp18564 = getelementptr inbounds float* %tmp18563, i64 1
+  %tmp18565 = getelementptr inbounds float* %tmp18564, i64 1
+  %tmp18566 = getelementptr inbounds float* %tmp18565, i64 1
+  %tmp18567 = getelementptr inbounds float* %tmp18566, i64 1
+  %tmp18568 = getelementptr inbounds float* %tmp18567, i64 1
+  %tmp18569 = getelementptr inbounds float* %tmp18568, i64 1
+  %tmp18570 = getelementptr inbounds float* %tmp18569, i64 1
+  %tmp18571 = getelementptr inbounds float* %tmp18570, i64 1
+  %tmp18572 = getelementptr inbounds float* %tmp18571, i64 1
+  %tmp18573 = getelementptr inbounds float* %tmp18572, i64 1
+  %tmp18574 = getelementptr inbounds float* %tmp18573, i64 1
+  %tmp18575 = getelementptr inbounds float* %tmp18574, i64 1
+  %tmp18576 = getelementptr inbounds float* %tmp18575, i64 1
+  %tmp18577 = getelementptr inbounds float* %tmp18576, i64 1
+  %tmp18578 = getelementptr inbounds float* %tmp18577, i64 1
+  %tmp18579 = getelementptr inbounds float* %tmp18578, i64 1
+  %tmp18580 = getelementptr inbounds float* %tmp18579, i64 1
+  %tmp18581 = getelementptr inbounds float* %tmp18580, i64 1
+  %tmp18582 = getelementptr inbounds float* %tmp18581, i64 1
+  %tmp18583 = getelementptr inbounds float* %tmp18582, i64 1
+  %tmp18584 = getelementptr inbounds float* %tmp18583, i64 1
+  %tmp18585 = getelementptr inbounds float* %tmp18584, i64 1
+  %tmp18586 = getelementptr inbounds float* %tmp18585, i64 1
+  %tmp18587 = getelementptr inbounds float* %tmp18586, i64 1
+  %tmp18588 = getelementptr inbounds float* %tmp18587, i64 1
+  %tmp18589 = getelementptr inbounds float* %tmp18588, i64 1
+  %tmp18590 = getelementptr inbounds float* %tmp18589, i64 1
+  %tmp18591 = getelementptr inbounds float* %tmp18590, i64 1
+  %tmp18592 = getelementptr inbounds float* %tmp18591, i64 1
+  %tmp18593 = getelementptr inbounds float* %tmp18592, i64 1
+  %tmp18594 = getelementptr inbounds float* %tmp18593, i64 1
+  %tmp18595 = getelementptr inbounds float* %tmp18594, i64 1
+  %tmp18596 = getelementptr inbounds float* %tmp18595, i64 1
+  %tmp18597 = getelementptr inbounds float* %tmp18596, i64 1
+  %tmp18598 = getelementptr inbounds float* %tmp18597, i64 1
+  %tmp18599 = getelementptr inbounds float* %tmp18598, i64 1
+  %tmp18600 = getelementptr inbounds float* %tmp18599, i64 1
+  %tmp18601 = getelementptr inbounds float* %tmp18600, i64 1
+  %tmp18602 = getelementptr inbounds float* %tmp18601, i64 1
+  %tmp18603 = getelementptr inbounds float* %tmp18602, i64 1
+  %tmp18604 = getelementptr inbounds float* %tmp18603, i64 1
+  %tmp18605 = getelementptr inbounds float* %tmp18604, i64 1
+  %tmp18606 = getelementptr inbounds float* %tmp18605, i64 1
+  %tmp18607 = getelementptr inbounds float* %tmp18606, i64 1
+  %tmp18608 = getelementptr inbounds float* %tmp18607, i64 1
+  %tmp18609 = getelementptr inbounds float* %tmp18608, i64 1
+  %tmp18610 = getelementptr inbounds float* %tmp18609, i64 1
+  %tmp18611 = getelementptr inbounds float* %tmp18610, i64 1
+  %tmp18612 = getelementptr inbounds float* %tmp18611, i64 1
+  %tmp18613 = getelementptr inbounds float* %tmp18612, i64 1
+  %tmp18614 = getelementptr inbounds float* %tmp18613, i64 1
+  %tmp18615 = getelementptr inbounds float* %tmp18614, i64 1
+  %tmp18616 = getelementptr inbounds float* %tmp18615, i64 1
+  %tmp18617 = getelementptr inbounds float* %tmp18616, i64 1
+  %tmp18618 = getelementptr inbounds float* %tmp18617, i64 1
+  %tmp18619 = getelementptr inbounds float* %tmp18618, i64 1
+  %tmp18620 = getelementptr inbounds float* %tmp18619, i64 1
+  %tmp18621 = getelementptr inbounds float* %tmp18620, i64 1
+  %tmp18622 = getelementptr inbounds float* %tmp18621, i64 1
+  %tmp18623 = getelementptr inbounds float* %tmp18622, i64 1
+  %tmp18624 = getelementptr inbounds float* %tmp18623, i64 1
+  %tmp18625 = getelementptr inbounds float* %tmp18624, i64 1
+  %tmp18626 = getelementptr inbounds float* %tmp18625, i64 1
+  %tmp18627 = getelementptr inbounds float* %tmp18626, i64 1
+  %tmp18628 = getelementptr inbounds float* %tmp18627, i64 1
+  %tmp18629 = getelementptr inbounds float* %tmp18628, i64 1
+  %tmp18630 = getelementptr inbounds float* %tmp18629, i64 1
+  %tmp18631 = getelementptr inbounds float* %tmp18630, i64 1
+  %tmp18632 = getelementptr inbounds float* %tmp18631, i64 1
+  %tmp18633 = getelementptr inbounds float* %tmp18632, i64 1
+  %tmp18634 = getelementptr inbounds float* %tmp18633, i64 1
+  %tmp18635 = getelementptr inbounds float* %tmp18634, i64 1
+  %tmp18636 = getelementptr inbounds float* %tmp18635, i64 1
+  %tmp18637 = getelementptr inbounds float* %tmp18636, i64 1
+  %tmp18638 = getelementptr inbounds float* %tmp18637, i64 1
+  %tmp18639 = getelementptr inbounds float* %tmp18638, i64 1
+  %tmp18640 = getelementptr inbounds float* %tmp18639, i64 1
+  %tmp18641 = getelementptr inbounds float* %tmp18640, i64 1
+  %tmp18642 = getelementptr inbounds float* %tmp18641, i64 1
+  %tmp18643 = getelementptr inbounds float* %tmp18642, i64 1
+  %tmp18644 = getelementptr inbounds float* %tmp18643, i64 1
+  %tmp18645 = getelementptr inbounds float* %tmp18644, i64 1
+  %tmp18646 = getelementptr inbounds float* %tmp18645, i64 1
+  %tmp18647 = getelementptr inbounds float* %tmp18646, i64 1
+  %tmp18648 = getelementptr inbounds float* %tmp18647, i64 1
+  %tmp18649 = getelementptr inbounds float* %tmp18648, i64 1
+  %tmp18650 = getelementptr inbounds float* %tmp18649, i64 1
+  %tmp18651 = getelementptr inbounds float* %tmp18650, i64 1
+  %tmp18652 = getelementptr inbounds float* %tmp18651, i64 1
+  %tmp18653 = getelementptr inbounds float* %tmp18652, i64 1
+  %tmp18654 = getelementptr inbounds float* %tmp18653, i64 1
+  %tmp18655 = getelementptr inbounds float* %tmp18654, i64 1
+  %tmp18656 = getelementptr inbounds float* %tmp18655, i64 1
+  %tmp18657 = getelementptr inbounds float* %tmp18656, i64 1
+  %tmp18658 = getelementptr inbounds float* %tmp18657, i64 1
+  %tmp18659 = getelementptr inbounds float* %tmp18658, i64 1
+  %tmp18660 = getelementptr inbounds float* %tmp18659, i64 1
+  %tmp18661 = getelementptr inbounds float* %tmp18660, i64 1
+  %tmp18662 = getelementptr inbounds float* %tmp18661, i64 1
+  %tmp18663 = getelementptr inbounds float* %tmp18662, i64 1
+  %tmp18664 = getelementptr inbounds float* %tmp18663, i64 1
+  %tmp18665 = getelementptr inbounds float* %tmp18664, i64 1
+  %tmp18666 = getelementptr inbounds float* %tmp18665, i64 1
+  %tmp18667 = getelementptr inbounds float* %tmp18666, i64 1
+  %tmp18668 = getelementptr inbounds float* %tmp18667, i64 1
+  %tmp18669 = getelementptr inbounds float* %tmp18668, i64 1
+  %tmp18670 = getelementptr inbounds float* %tmp18669, i64 1
+  %tmp18671 = getelementptr inbounds float* %tmp18670, i64 1
+  %tmp18672 = getelementptr inbounds float* %tmp18671, i64 1
+  %tmp18673 = getelementptr inbounds float* %tmp18672, i64 1
+  %tmp18674 = getelementptr inbounds float* %tmp18673, i64 1
+  %tmp18675 = getelementptr inbounds float* %tmp18674, i64 1
+  %tmp18676 = getelementptr inbounds float* %tmp18675, i64 1
+  %tmp18677 = getelementptr inbounds float* %tmp18676, i64 1
+  %tmp18678 = getelementptr inbounds float* %tmp18677, i64 1
+  %tmp18679 = getelementptr inbounds float* %tmp18678, i64 1
+  %tmp18680 = getelementptr inbounds float* %tmp18679, i64 1
+  %tmp18681 = getelementptr inbounds float* %tmp18680, i64 1
+  %tmp18682 = getelementptr inbounds float* %tmp18681, i64 1
+  %tmp18683 = getelementptr inbounds float* %tmp18682, i64 1
+  %tmp18684 = getelementptr inbounds float* %tmp18683, i64 1
+  %tmp18685 = getelementptr inbounds float* %tmp18684, i64 1
+  %tmp18686 = getelementptr inbounds float* %tmp18685, i64 1
+  %tmp18687 = getelementptr inbounds float* %tmp18686, i64 1
+  %tmp18688 = getelementptr inbounds float* %tmp18687, i64 1
+  %tmp18689 = getelementptr inbounds float* %tmp18688, i64 1
+  %tmp18690 = getelementptr inbounds float* %tmp18689, i64 1
+  %tmp18691 = getelementptr inbounds float* %tmp18690, i64 1
+  %tmp18692 = getelementptr inbounds float* %tmp18691, i64 1
+  %tmp18693 = getelementptr inbounds float* %tmp18692, i64 1
+  %tmp18694 = getelementptr inbounds float* %tmp18693, i64 1
+  %tmp18695 = getelementptr inbounds float* %tmp18694, i64 1
+  %tmp18696 = getelementptr inbounds float* %tmp18695, i64 1
+  %tmp18697 = getelementptr inbounds float* %tmp18696, i64 1
+  %tmp18698 = getelementptr inbounds float* %tmp18697, i64 1
+  %tmp18699 = getelementptr inbounds float* %tmp18698, i64 1
+  %tmp18700 = getelementptr inbounds float* %tmp18699, i64 1
+  %tmp18701 = getelementptr inbounds float* %tmp18700, i64 1
+  %tmp18702 = getelementptr inbounds float* %tmp18701, i64 1
+  %tmp18703 = getelementptr inbounds float* %tmp18702, i64 1
+  %tmp18704 = getelementptr inbounds float* %tmp18703, i64 1
+  %tmp18705 = getelementptr inbounds float* %tmp18704, i64 1
+  %tmp18706 = getelementptr inbounds float* %tmp18705, i64 1
+  %tmp18707 = getelementptr inbounds float* %tmp18706, i64 1
+  %tmp18708 = getelementptr inbounds float* %tmp18707, i64 1
+  %tmp18709 = getelementptr inbounds float* %tmp18708, i64 1
+  %tmp18710 = getelementptr inbounds float* %tmp18709, i64 1
+  %tmp18711 = getelementptr inbounds float* %tmp18710, i64 1
+  %tmp18712 = getelementptr inbounds float* %tmp18711, i64 1
+  %tmp18713 = getelementptr inbounds float* %tmp18712, i64 1
+  %tmp18714 = getelementptr inbounds float* %tmp18713, i64 1
+  %tmp18715 = getelementptr inbounds float* %tmp18714, i64 1
+  %tmp18716 = getelementptr inbounds float* %tmp18715, i64 1
+  %tmp18717 = getelementptr inbounds float* %tmp18716, i64 1
+  %tmp18718 = getelementptr inbounds float* %tmp18717, i64 1
+  %tmp18719 = getelementptr inbounds float* %tmp18718, i64 1
+  %tmp18720 = getelementptr inbounds float* %tmp18719, i64 1
+  %tmp18721 = getelementptr inbounds float* %tmp18720, i64 1
+  %tmp18722 = getelementptr inbounds float* %tmp18721, i64 1
+  %tmp18723 = getelementptr inbounds float* %tmp18722, i64 1
+  %tmp18724 = getelementptr inbounds float* %tmp18723, i64 1
+  %tmp18725 = getelementptr inbounds float* %tmp18724, i64 1
+  %tmp18726 = getelementptr inbounds float* %tmp18725, i64 1
+  %tmp18727 = getelementptr inbounds float* %tmp18726, i64 1
+  %tmp18728 = getelementptr inbounds float* %tmp18727, i64 1
+  %tmp18729 = getelementptr inbounds float* %tmp18728, i64 1
+  %tmp18730 = getelementptr inbounds float* %tmp18729, i64 1
+  %tmp18731 = getelementptr inbounds float* %tmp18730, i64 1
+  %tmp18732 = getelementptr inbounds float* %tmp18731, i64 1
+  %tmp18733 = getelementptr inbounds float* %tmp18732, i64 1
+  %tmp18734 = getelementptr inbounds float* %tmp18733, i64 1
+  %tmp18735 = getelementptr inbounds float* %tmp18734, i64 1
+  %tmp18736 = getelementptr inbounds float* %tmp18735, i64 1
+  %tmp18737 = getelementptr inbounds float* %tmp18736, i64 1
+  %tmp18738 = getelementptr inbounds float* %tmp18737, i64 1
+  %tmp18739 = getelementptr inbounds float* %tmp18738, i64 1
+  %tmp18740 = getelementptr inbounds float* %tmp18739, i64 1
+  %tmp18741 = getelementptr inbounds float* %tmp18740, i64 1
+  %tmp18742 = getelementptr inbounds float* %tmp18741, i64 1
+  %tmp18743 = getelementptr inbounds float* %tmp18742, i64 1
+  %tmp18744 = getelementptr inbounds float* %tmp18743, i64 1
+  %tmp18745 = getelementptr inbounds float* %tmp18744, i64 1
+  %tmp18746 = getelementptr inbounds float* %tmp18745, i64 1
+  %tmp18747 = getelementptr inbounds float* %tmp18746, i64 1
+  %tmp18748 = getelementptr inbounds float* %tmp18747, i64 1
+  %tmp18749 = getelementptr inbounds float* %tmp18748, i64 1
+  %tmp18750 = getelementptr inbounds float* %tmp18749, i64 1
+  %tmp18751 = getelementptr inbounds float* %tmp18750, i64 1
+  %tmp18752 = getelementptr inbounds float* %tmp18751, i64 1
+  %tmp18753 = getelementptr inbounds float* %tmp18752, i64 1
+  %tmp18754 = getelementptr inbounds float* %tmp18753, i64 1
+  %tmp18755 = getelementptr inbounds float* %tmp18754, i64 1
+  %tmp18756 = getelementptr inbounds float* %tmp18755, i64 1
+  %tmp18757 = getelementptr inbounds float* %tmp18756, i64 1
+  %tmp18758 = getelementptr inbounds float* %tmp18757, i64 1
+  %tmp18759 = getelementptr inbounds float* %tmp18758, i64 1
+  %tmp18760 = getelementptr inbounds float* %tmp18759, i64 1
+  %tmp18761 = getelementptr inbounds float* %tmp18760, i64 1
+  %tmp18762 = getelementptr inbounds float* %tmp18761, i64 1
+  %tmp18763 = getelementptr inbounds float* %tmp18762, i64 1
+  %tmp18764 = getelementptr inbounds float* %tmp18763, i64 1
+  %tmp18765 = getelementptr inbounds float* %tmp18764, i64 1
+  %tmp18766 = getelementptr inbounds float* %tmp18765, i64 1
+  %tmp18767 = getelementptr inbounds float* %tmp18766, i64 1
+  %tmp18768 = getelementptr inbounds float* %tmp18767, i64 1
+  %tmp18769 = getelementptr inbounds float* %tmp18768, i64 1
+  %tmp18770 = getelementptr inbounds float* %tmp18769, i64 1
+  %tmp18771 = getelementptr inbounds float* %tmp18770, i64 1
+  %tmp18772 = getelementptr inbounds float* %tmp18771, i64 1
+  %tmp18773 = getelementptr inbounds float* %tmp18772, i64 1
+  %tmp18774 = getelementptr inbounds float* %tmp18773, i64 1
+  %tmp18775 = getelementptr inbounds float* %tmp18774, i64 1
+  %tmp18776 = getelementptr inbounds float* %tmp18775, i64 1
+  %tmp18777 = getelementptr inbounds float* %tmp18776, i64 1
+  %tmp18778 = getelementptr inbounds float* %tmp18777, i64 1
+  %tmp18779 = getelementptr inbounds float* %tmp18778, i64 1
+  %tmp18780 = getelementptr inbounds float* %tmp18779, i64 1
+  %tmp18781 = getelementptr inbounds float* %tmp18780, i64 1
+  %tmp18782 = getelementptr inbounds float* %tmp18781, i64 1
+  %tmp18783 = getelementptr inbounds float* %tmp18782, i64 1
+  %tmp18784 = getelementptr inbounds float* %tmp18783, i64 1
+  %tmp18785 = getelementptr inbounds float* %tmp18784, i64 1
+  %tmp18786 = getelementptr inbounds float* %tmp18785, i64 1
+  %tmp18787 = getelementptr inbounds float* %tmp18786, i64 1
+  %tmp18788 = getelementptr inbounds float* %tmp18787, i64 1
+  %tmp18789 = getelementptr inbounds float* %tmp18788, i64 1
+  %tmp18790 = getelementptr inbounds float* %tmp18789, i64 1
+  %tmp18791 = getelementptr inbounds float* %tmp18790, i64 1
+  %tmp18792 = getelementptr inbounds float* %tmp18791, i64 1
+  %tmp18793 = getelementptr inbounds float* %tmp18792, i64 1
+  %tmp18794 = getelementptr inbounds float* %tmp18793, i64 1
+  %tmp18795 = getelementptr inbounds float* %tmp18794, i64 1
+  %tmp18796 = getelementptr inbounds float* %tmp18795, i64 1
+  %tmp18797 = getelementptr inbounds float* %tmp18796, i64 1
+  %tmp18798 = getelementptr inbounds float* %tmp18797, i64 1
+  %tmp18799 = getelementptr inbounds float* %tmp18798, i64 1
+  %tmp18800 = getelementptr inbounds float* %tmp18799, i64 1
+  %tmp18801 = getelementptr inbounds float* %tmp18800, i64 1
+  %tmp18802 = getelementptr inbounds float* %tmp18801, i64 1
+  %tmp18803 = getelementptr inbounds float* %tmp18802, i64 1
+  %tmp18804 = getelementptr inbounds float* %tmp18803, i64 1
+  %tmp18805 = getelementptr inbounds float* %tmp18804, i64 1
+  %tmp18806 = getelementptr inbounds float* %tmp18805, i64 1
+  %tmp18807 = getelementptr inbounds float* %tmp18806, i64 1
+  %tmp18808 = getelementptr inbounds float* %tmp18807, i64 1
+  %tmp18809 = getelementptr inbounds float* %tmp18808, i64 1
+  %tmp18810 = getelementptr inbounds float* %tmp18809, i64 1
+  %tmp18811 = getelementptr inbounds float* %tmp18810, i64 1
+  %tmp18812 = getelementptr inbounds float* %tmp18811, i64 1
+  %tmp18813 = getelementptr inbounds float* %tmp18812, i64 1
+  %tmp18814 = getelementptr inbounds float* %tmp18813, i64 1
+  %tmp18815 = getelementptr inbounds float* %tmp18814, i64 1
+  %tmp18816 = getelementptr inbounds float* %tmp18815, i64 1
+  %tmp18817 = getelementptr inbounds float* %tmp18816, i64 1
+  %tmp18818 = getelementptr inbounds float* %tmp18817, i64 1
+  %tmp18819 = getelementptr inbounds float* %tmp18818, i64 1
+  %tmp18820 = getelementptr inbounds float* %tmp18819, i64 1
+  %tmp18821 = getelementptr inbounds float* %tmp18820, i64 1
+  %tmp18822 = getelementptr inbounds float* %tmp18821, i64 1
+  %tmp18823 = getelementptr inbounds float* %tmp18822, i64 1
+  %tmp18824 = getelementptr inbounds float* %tmp18823, i64 1
+  %tmp18825 = getelementptr inbounds float* %tmp18824, i64 1
+  %tmp18826 = getelementptr inbounds float* %tmp18825, i64 1
+  %tmp18827 = getelementptr inbounds float* %tmp18826, i64 1
+  %tmp18828 = getelementptr inbounds float* %tmp18827, i64 1
+  %tmp18829 = getelementptr inbounds float* %tmp18828, i64 1
+  %tmp18830 = getelementptr inbounds float* %tmp18829, i64 1
+  %tmp18831 = getelementptr inbounds float* %tmp18830, i64 1
+  %tmp18832 = getelementptr inbounds float* %tmp18831, i64 1
+  %tmp18833 = getelementptr inbounds float* %tmp18832, i64 1
+  %tmp18834 = getelementptr inbounds float* %tmp18833, i64 1
+  %tmp18835 = getelementptr inbounds float* %tmp18834, i64 1
+  %tmp18836 = getelementptr inbounds float* %tmp18835, i64 1
+  %tmp18837 = getelementptr inbounds float* %tmp18836, i64 1
+  %tmp18838 = getelementptr inbounds float* %tmp18837, i64 1
+  %tmp18839 = getelementptr inbounds float* %tmp18838, i64 1
+  %tmp18840 = getelementptr inbounds float* %tmp18839, i64 1
+  %tmp18841 = getelementptr inbounds float* %tmp18840, i64 1
+  %tmp18842 = getelementptr inbounds float* %tmp18841, i64 1
+  %tmp18843 = getelementptr inbounds float* %tmp18842, i64 1
+  %tmp18844 = getelementptr inbounds float* %tmp18843, i64 1
+  %tmp18845 = getelementptr inbounds float* %tmp18844, i64 1
+  %tmp18846 = getelementptr inbounds float* %tmp18845, i64 1
+  %tmp18847 = getelementptr inbounds float* %tmp18846, i64 1
+  %tmp18848 = getelementptr inbounds float* %tmp18847, i64 1
+  %tmp18849 = getelementptr inbounds float* %tmp18848, i64 1
+  %tmp18850 = getelementptr inbounds float* %tmp18849, i64 1
+  %tmp18851 = getelementptr inbounds float* %tmp18850, i64 1
+  %tmp18852 = getelementptr inbounds float* %tmp18851, i64 1
+  %tmp18853 = getelementptr inbounds float* %tmp18852, i64 1
+  %tmp18854 = getelementptr inbounds float* %tmp18853, i64 1
+  %tmp18855 = getelementptr inbounds float* %tmp18854, i64 1
+  %tmp18856 = getelementptr inbounds float* %tmp18855, i64 1
+  %tmp18857 = getelementptr inbounds float* %tmp18856, i64 1
+  %tmp18858 = getelementptr inbounds float* %tmp18857, i64 1
+  %tmp18859 = getelementptr inbounds float* %tmp18858, i64 1
+  %tmp18860 = getelementptr inbounds float* %tmp18859, i64 1
+  %tmp18861 = getelementptr inbounds float* %tmp18860, i64 1
+  %tmp18862 = getelementptr inbounds float* %tmp18861, i64 1
+  %tmp18863 = getelementptr inbounds float* %tmp18862, i64 1
+  %tmp18864 = getelementptr inbounds float* %tmp18863, i64 1
+  %tmp18865 = getelementptr inbounds float* %tmp18864, i64 1
+  %tmp18866 = getelementptr inbounds float* %tmp18865, i64 1
+  %tmp18867 = getelementptr inbounds float* %tmp18866, i64 1
+  %tmp18868 = getelementptr inbounds float* %tmp18867, i64 1
+  %tmp18869 = getelementptr inbounds float* %tmp18868, i64 1
+  %tmp18870 = getelementptr inbounds float* %tmp18869, i64 1
+  %tmp18871 = getelementptr inbounds float* %tmp18870, i64 1
+  %tmp18872 = getelementptr inbounds float* %tmp18871, i64 1
+  %tmp18873 = getelementptr inbounds float* %tmp18872, i64 1
+  %tmp18874 = getelementptr inbounds float* %tmp18873, i64 1
+  %tmp18875 = getelementptr inbounds float* %tmp18874, i64 1
+  %tmp18876 = getelementptr inbounds float* %tmp18875, i64 1
+  %tmp18877 = getelementptr inbounds float* %tmp18876, i64 1
+  %tmp18878 = getelementptr inbounds float* %tmp18877, i64 1
+  %tmp18879 = getelementptr inbounds float* %tmp18878, i64 1
+  %tmp18880 = getelementptr inbounds float* %tmp18879, i64 1
+  %tmp18881 = getelementptr inbounds float* %tmp18880, i64 1
+  %tmp18882 = getelementptr inbounds float* %tmp18881, i64 1
+  %tmp18883 = getelementptr inbounds float* %tmp18882, i64 1
+  %tmp18884 = getelementptr inbounds float* %tmp18883, i64 1
+  %tmp18885 = getelementptr inbounds float* %tmp18884, i64 1
+  %tmp18886 = getelementptr inbounds float* %tmp18885, i64 1
+  %tmp18887 = getelementptr inbounds float* %tmp18886, i64 1
+  %tmp18888 = getelementptr inbounds float* %tmp18887, i64 1
+  %tmp18889 = getelementptr inbounds float* %tmp18888, i64 1
+  %tmp18890 = getelementptr inbounds float* %tmp18889, i64 1
+  %tmp18891 = getelementptr inbounds float* %tmp18890, i64 1
+  %tmp18892 = getelementptr inbounds float* %tmp18891, i64 1
+  %tmp18893 = getelementptr inbounds float* %tmp18892, i64 1
+  %tmp18894 = getelementptr inbounds float* %tmp18893, i64 1
+  %tmp18895 = getelementptr inbounds float* %tmp18894, i64 1
+  %tmp18896 = getelementptr inbounds float* %tmp18895, i64 1
+  %tmp18897 = getelementptr inbounds float* %tmp18896, i64 1
+  %tmp18898 = getelementptr inbounds float* %tmp18897, i64 1
+  %tmp18899 = getelementptr inbounds float* %tmp18898, i64 1
+  %tmp18900 = getelementptr inbounds float* %tmp18899, i64 1
+  %tmp18901 = getelementptr inbounds float* %tmp18900, i64 1
+  %tmp18902 = getelementptr inbounds float* %tmp18901, i64 1
+  %tmp18903 = getelementptr inbounds float* %tmp18902, i64 1
+  %tmp18904 = getelementptr inbounds float* %tmp18903, i64 1
+  %tmp18905 = getelementptr inbounds float* %tmp18904, i64 1
+  %tmp18906 = getelementptr inbounds float* %tmp18905, i64 1
+  %tmp18907 = getelementptr inbounds float* %tmp18906, i64 1
+  %tmp18908 = getelementptr inbounds float* %tmp18907, i64 1
+  %tmp18909 = getelementptr inbounds float* %tmp18908, i64 1
+  %tmp18910 = getelementptr inbounds float* %tmp18909, i64 1
+  %tmp18911 = getelementptr inbounds float* %tmp18910, i64 1
+  %tmp18912 = getelementptr inbounds float* %tmp18911, i64 1
+  %tmp18913 = getelementptr inbounds float* %tmp18912, i64 1
+  %tmp18914 = getelementptr inbounds float* %tmp18913, i64 1
+  %tmp18915 = getelementptr inbounds float* %tmp18914, i64 1
+  %tmp18916 = getelementptr inbounds float* %tmp18915, i64 1
+  %tmp18917 = getelementptr inbounds float* %tmp18916, i64 1
+  %tmp18918 = getelementptr inbounds float* %tmp18917, i64 1
+  %tmp18919 = getelementptr inbounds float* %tmp18918, i64 1
+  %tmp18920 = getelementptr inbounds float* %tmp18919, i64 1
+  %tmp18921 = getelementptr inbounds float* %tmp18920, i64 1
+  %tmp18922 = getelementptr inbounds float* %tmp18921, i64 1
+  %tmp18923 = getelementptr inbounds float* %tmp18922, i64 1
+  %tmp18924 = getelementptr inbounds float* %tmp18923, i64 1
+  %tmp18925 = getelementptr inbounds float* %tmp18924, i64 1
+  %tmp18926 = getelementptr inbounds float* %tmp18925, i64 1
+  %tmp18927 = getelementptr inbounds float* %tmp18926, i64 1
+  %tmp18928 = getelementptr inbounds float* %tmp18927, i64 1
+  %tmp18929 = getelementptr inbounds float* %tmp18928, i64 1
+  %tmp18930 = getelementptr inbounds float* %tmp18929, i64 1
+  %tmp18931 = getelementptr inbounds float* %tmp18930, i64 1
+  %tmp18932 = getelementptr inbounds float* %tmp18931, i64 1
+  %tmp18933 = getelementptr inbounds float* %tmp18932, i64 1
+  %tmp18934 = getelementptr inbounds float* %tmp18933, i64 1
+  %tmp18935 = getelementptr inbounds float* %tmp18934, i64 1
+  %tmp18936 = getelementptr inbounds float* %tmp18935, i64 1
+  %tmp18937 = getelementptr inbounds float* %tmp18936, i64 1
+  %tmp18938 = getelementptr inbounds float* %tmp18937, i64 1
+  %tmp18939 = getelementptr inbounds float* %tmp18938, i64 1
+  %tmp18940 = getelementptr inbounds float* %tmp18939, i64 1
+  %tmp18941 = getelementptr inbounds float* %tmp18940, i64 1
+  %tmp18942 = getelementptr inbounds float* %tmp18941, i64 1
+  %tmp18943 = getelementptr inbounds float* %tmp18942, i64 1
+  %tmp18944 = getelementptr inbounds float* %tmp18943, i64 1
+  %tmp18945 = getelementptr inbounds float* %tmp18944, i64 1
+  %tmp18946 = getelementptr inbounds float* %tmp18945, i64 1
+  %tmp18947 = getelementptr inbounds float* %tmp18946, i64 1
+  %tmp18948 = getelementptr inbounds float* %tmp18947, i64 1
+  %tmp18949 = getelementptr inbounds float* %tmp18948, i64 1
+  %tmp18950 = getelementptr inbounds float* %tmp18949, i64 1
+  %tmp18951 = getelementptr inbounds float* %tmp18950, i64 1
+  %tmp18952 = getelementptr inbounds float* %tmp18951, i64 1
+  %tmp18953 = getelementptr inbounds float* %tmp18952, i64 1
+  %tmp18954 = getelementptr inbounds float* %tmp18953, i64 1
+  %tmp18955 = getelementptr inbounds float* %tmp18954, i64 1
+  %tmp18956 = getelementptr inbounds float* %tmp18955, i64 1
+  %tmp18957 = getelementptr inbounds float* %tmp18956, i64 1
+  %tmp18958 = getelementptr inbounds float* %tmp18957, i64 1
+  %tmp18959 = getelementptr inbounds float* %tmp18958, i64 1
+  %tmp18960 = getelementptr inbounds float* %tmp18959, i64 1
+  %tmp18961 = getelementptr inbounds float* %tmp18960, i64 1
+  %tmp18962 = getelementptr inbounds float* %tmp18961, i64 1
+  %tmp18963 = getelementptr inbounds float* %tmp18962, i64 1
+  %tmp18964 = getelementptr inbounds float* %tmp18963, i64 1
+  %tmp18965 = getelementptr inbounds float* %tmp18964, i64 1
+  %tmp18966 = getelementptr inbounds float* %tmp18965, i64 1
+  %tmp18967 = getelementptr inbounds float* %tmp18966, i64 1
+  %tmp18968 = getelementptr inbounds float* %tmp18967, i64 1
+  %tmp18969 = getelementptr inbounds float* %tmp18968, i64 1
+  %tmp18970 = getelementptr inbounds float* %tmp18969, i64 1
+  %tmp18971 = getelementptr inbounds float* %tmp18970, i64 1
+  %tmp18972 = getelementptr inbounds float* %tmp18971, i64 1
+  %tmp18973 = getelementptr inbounds float* %tmp18972, i64 1
+  %tmp18974 = getelementptr inbounds float* %tmp18973, i64 1
+  %tmp18975 = getelementptr inbounds float* %tmp18974, i64 1
+  %tmp18976 = getelementptr inbounds float* %tmp18975, i64 1
+  %tmp18977 = getelementptr inbounds float* %tmp18976, i64 1
+  %tmp18978 = getelementptr inbounds float* %tmp18977, i64 1
+  %tmp18979 = getelementptr inbounds float* %tmp18978, i64 1
+  %tmp18980 = getelementptr inbounds float* %tmp18979, i64 1
+  %tmp18981 = getelementptr inbounds float* %tmp18980, i64 1
+  %tmp18982 = getelementptr inbounds float* %tmp18981, i64 1
+  %tmp18983 = getelementptr inbounds float* %tmp18982, i64 1
+  %tmp18984 = getelementptr inbounds float* %tmp18983, i64 1
+  %tmp18985 = getelementptr inbounds float* %tmp18984, i64 1
+  %tmp18986 = getelementptr inbounds float* %tmp18985, i64 1
+  %tmp18987 = getelementptr inbounds float* %tmp18986, i64 1
+  %tmp18988 = getelementptr inbounds float* %tmp18987, i64 1
+  %tmp18989 = getelementptr inbounds float* %tmp18988, i64 1
+  %tmp18990 = getelementptr inbounds float* %tmp18989, i64 1
+  %tmp18991 = getelementptr inbounds float* %tmp18990, i64 1
+  %tmp18992 = getelementptr inbounds float* %tmp18991, i64 1
+  %tmp18993 = getelementptr inbounds float* %tmp18992, i64 1
+  %tmp18994 = getelementptr inbounds float* %tmp18993, i64 1
+  %tmp18995 = getelementptr inbounds float* %tmp18994, i64 1
+  %tmp18996 = getelementptr inbounds float* %tmp18995, i64 1
+  %tmp18997 = getelementptr inbounds float* %tmp18996, i64 1
+  %tmp18998 = getelementptr inbounds float* %tmp18997, i64 1
+  %tmp18999 = getelementptr inbounds float* %tmp18998, i64 1
+  %tmp19000 = getelementptr inbounds float* %tmp18999, i64 1
+  %tmp19001 = getelementptr inbounds float* %tmp19000, i64 1
+  %tmp19002 = getelementptr inbounds float* %tmp19001, i64 1
+  %tmp19003 = getelementptr inbounds float* %tmp19002, i64 1
+  %tmp19004 = getelementptr inbounds float* %tmp19003, i64 1
+  %tmp19005 = getelementptr inbounds float* %tmp19004, i64 1
+  %tmp19006 = getelementptr inbounds float* %tmp19005, i64 1
+  %tmp19007 = getelementptr inbounds float* %tmp19006, i64 1
+  %tmp19008 = getelementptr inbounds float* %tmp19007, i64 1
+  %tmp19009 = getelementptr inbounds float* %tmp19008, i64 1
+  %tmp19010 = getelementptr inbounds float* %tmp19009, i64 1
+  %tmp19011 = getelementptr inbounds float* %tmp19010, i64 1
+  %tmp19012 = getelementptr inbounds float* %tmp19011, i64 1
+  %tmp19013 = getelementptr inbounds float* %tmp19012, i64 1
+  %tmp19014 = getelementptr inbounds float* %tmp19013, i64 1
+  %tmp19015 = getelementptr inbounds float* %tmp19014, i64 1
+  %tmp19016 = getelementptr inbounds float* %tmp19015, i64 1
+  %tmp19017 = getelementptr inbounds float* %tmp19016, i64 1
+  %tmp19018 = getelementptr inbounds float* %tmp19017, i64 1
+  %tmp19019 = getelementptr inbounds float* %tmp19018, i64 1
+  %tmp19020 = getelementptr inbounds float* %tmp19019, i64 1
+  %tmp19021 = getelementptr inbounds float* %tmp19020, i64 1
+  %tmp19022 = getelementptr inbounds float* %tmp19021, i64 1
+  %tmp19023 = getelementptr inbounds float* %tmp19022, i64 1
+  %tmp19024 = getelementptr inbounds float* %tmp19023, i64 1
+  %tmp19025 = getelementptr inbounds float* %tmp19024, i64 1
+  %tmp19026 = getelementptr inbounds float* %tmp19025, i64 1
+  %tmp19027 = getelementptr inbounds float* %tmp19026, i64 1
+  %tmp19028 = getelementptr inbounds float* %tmp19027, i64 1
+  %tmp19029 = getelementptr inbounds float* %tmp19028, i64 1
+  %tmp19030 = getelementptr inbounds float* %tmp19029, i64 1
+  %tmp19031 = getelementptr inbounds float* %tmp19030, i64 1
+  %tmp19032 = getelementptr inbounds float* %tmp19031, i64 1
+  %tmp19033 = getelementptr inbounds float* %tmp19032, i64 1
+  %tmp19034 = getelementptr inbounds float* %tmp19033, i64 1
+  %tmp19035 = getelementptr inbounds float* %tmp19034, i64 1
+  %tmp19036 = getelementptr inbounds float* %tmp19035, i64 1
+  %tmp19037 = getelementptr inbounds float* %tmp19036, i64 1
+  %tmp19038 = getelementptr inbounds float* %tmp19037, i64 1
+  %tmp19039 = getelementptr inbounds float* %tmp19038, i64 1
+  %tmp19040 = getelementptr inbounds float* %tmp19039, i64 1
+  %tmp19041 = getelementptr inbounds float* %tmp19040, i64 1
+  %tmp19042 = getelementptr inbounds float* %tmp19041, i64 1
+  %tmp19043 = getelementptr inbounds float* %tmp19042, i64 1
+  %tmp19044 = getelementptr inbounds float* %tmp19043, i64 1
+  %tmp19045 = getelementptr inbounds float* %tmp19044, i64 1
+  %tmp19046 = getelementptr inbounds float* %tmp19045, i64 1
+  %tmp19047 = getelementptr inbounds float* %tmp19046, i64 1
+  %tmp19048 = getelementptr inbounds float* %tmp19047, i64 1
+  %tmp19049 = getelementptr inbounds float* %tmp19048, i64 1
+  %tmp19050 = getelementptr inbounds float* %tmp19049, i64 1
+  %tmp19051 = getelementptr inbounds float* %tmp19050, i64 1
+  %tmp19052 = getelementptr inbounds float* %tmp19051, i64 1
+  %tmp19053 = getelementptr inbounds float* %tmp19052, i64 1
+  %tmp19054 = getelementptr inbounds float* %tmp19053, i64 1
+  %tmp19055 = getelementptr inbounds float* %tmp19054, i64 1
+  %tmp19056 = getelementptr inbounds float* %tmp19055, i64 1
+  %tmp19057 = getelementptr inbounds float* %tmp19056, i64 1
+  %tmp19058 = getelementptr inbounds float* %tmp19057, i64 1
+  %tmp19059 = getelementptr inbounds float* %tmp19058, i64 1
+  %tmp19060 = getelementptr inbounds float* %tmp19059, i64 1
+  %tmp19061 = getelementptr inbounds float* %tmp19060, i64 1
+  %tmp19062 = getelementptr inbounds float* %tmp19061, i64 1
+  %tmp19063 = getelementptr inbounds float* %tmp19062, i64 1
+  %tmp19064 = getelementptr inbounds float* %tmp19063, i64 1
+  %tmp19065 = getelementptr inbounds float* %tmp19064, i64 1
+  %tmp19066 = getelementptr inbounds float* %tmp19065, i64 1
+  %tmp19067 = getelementptr inbounds float* %tmp19066, i64 1
+  %tmp19068 = getelementptr inbounds float* %tmp19067, i64 1
+  %tmp19069 = getelementptr inbounds float* %tmp19068, i64 1
+  %tmp19070 = getelementptr inbounds float* %tmp19069, i64 1
+  %tmp19071 = getelementptr inbounds float* %tmp19070, i64 1
+  %tmp19072 = getelementptr inbounds float* %tmp19071, i64 1
+  %tmp19073 = getelementptr inbounds float* %tmp19072, i64 1
+  %tmp19074 = getelementptr inbounds float* %tmp19073, i64 1
+  %tmp19075 = getelementptr inbounds float* %tmp19074, i64 1
+  %tmp19076 = getelementptr inbounds float* %tmp19075, i64 1
+  %tmp19077 = getelementptr inbounds float* %tmp19076, i64 1
+  %tmp19078 = getelementptr inbounds float* %tmp19077, i64 1
+  %tmp19079 = getelementptr inbounds float* %tmp19078, i64 1
+  %tmp19080 = getelementptr inbounds float* %tmp19079, i64 1
+  %tmp19081 = getelementptr inbounds float* %tmp19080, i64 1
+  %tmp19082 = getelementptr inbounds float* %tmp19081, i64 1
+  %tmp19083 = getelementptr inbounds float* %tmp19082, i64 1
+  %tmp19084 = getelementptr inbounds float* %tmp19083, i64 1
+  %tmp19085 = getelementptr inbounds float* %tmp19084, i64 1
+  %tmp19086 = getelementptr inbounds float* %tmp19085, i64 1
+  %tmp19087 = getelementptr inbounds float* %tmp19086, i64 1
+  %tmp19088 = getelementptr inbounds float* %tmp19087, i64 1
+  %tmp19089 = getelementptr inbounds float* %tmp19088, i64 1
+  %tmp19090 = getelementptr inbounds float* %tmp19089, i64 1
+  %tmp19091 = getelementptr inbounds float* %tmp19090, i64 1
+  %tmp19092 = getelementptr inbounds float* %tmp19091, i64 1
+  %tmp19093 = getelementptr inbounds float* %tmp19092, i64 1
+  %tmp19094 = getelementptr inbounds float* %tmp19093, i64 1
+  %tmp19095 = getelementptr inbounds float* %tmp19094, i64 1
+  %tmp19096 = getelementptr inbounds float* %tmp19095, i64 1
+  %tmp19097 = getelementptr inbounds float* %tmp19096, i64 1
+  %tmp19098 = getelementptr inbounds float* %tmp19097, i64 1
+  %tmp19099 = getelementptr inbounds float* %tmp19098, i64 1
+  %tmp19100 = getelementptr inbounds float* %tmp19099, i64 1
+  %tmp19101 = getelementptr inbounds float* %tmp19100, i64 1
+  %tmp19102 = getelementptr inbounds float* %tmp19101, i64 1
+  %tmp19103 = getelementptr inbounds float* %tmp19102, i64 1
+  %tmp19104 = getelementptr inbounds float* %tmp19103, i64 1
+  %tmp19105 = getelementptr inbounds float* %tmp19104, i64 1
+  %tmp19106 = getelementptr inbounds float* %tmp19105, i64 1
+  %tmp19107 = getelementptr inbounds float* %tmp19106, i64 1
+  %tmp19108 = getelementptr inbounds float* %tmp19107, i64 1
+  %tmp19109 = getelementptr inbounds float* %tmp19108, i64 1
+  %tmp19110 = getelementptr inbounds float* %tmp19109, i64 1
+  %tmp19111 = getelementptr inbounds float* %tmp19110, i64 1
+  %tmp19112 = getelementptr inbounds float* %tmp19111, i64 1
+  %tmp19113 = getelementptr inbounds float* %tmp19112, i64 1
+  %tmp19114 = getelementptr inbounds float* %tmp19113, i64 1
+  %tmp19115 = getelementptr inbounds float* %tmp19114, i64 1
+  %tmp19116 = getelementptr inbounds float* %tmp19115, i64 1
+  %tmp19117 = getelementptr inbounds float* %tmp19116, i64 1
+  %tmp19118 = getelementptr inbounds float* %tmp19117, i64 1
+  %tmp19119 = getelementptr inbounds float* %tmp19118, i64 1
+  %tmp19120 = getelementptr inbounds float* %tmp19119, i64 1
+  %tmp19121 = getelementptr inbounds float* %tmp19120, i64 1
+  %tmp19122 = getelementptr inbounds float* %tmp19121, i64 1
+  %tmp19123 = getelementptr inbounds float* %tmp19122, i64 1
+  %tmp19124 = getelementptr inbounds float* %tmp19123, i64 1
+  %tmp19125 = getelementptr inbounds float* %tmp19124, i64 1
+  %tmp19126 = getelementptr inbounds float* %tmp19125, i64 1
+  %tmp19127 = getelementptr inbounds float* %tmp19126, i64 1
+  %tmp19128 = getelementptr inbounds float* %tmp19127, i64 1
+  %tmp19129 = getelementptr inbounds float* %tmp19128, i64 1
+  %tmp19130 = getelementptr inbounds float* %tmp19129, i64 1
+  %tmp19131 = getelementptr inbounds float* %tmp19130, i64 1
+  %tmp19132 = getelementptr inbounds float* %tmp19131, i64 1
+  %tmp19133 = getelementptr inbounds float* %tmp19132, i64 1
+  %tmp19134 = getelementptr inbounds float* %tmp19133, i64 1
+  %tmp19135 = getelementptr inbounds float* %tmp19134, i64 1
+  %tmp19136 = getelementptr inbounds float* %tmp19135, i64 1
+  %tmp19137 = getelementptr inbounds float* %tmp19136, i64 1
+  %tmp19138 = getelementptr inbounds float* %tmp19137, i64 1
+  %tmp19139 = getelementptr inbounds float* %tmp19138, i64 1
+  %tmp19140 = getelementptr inbounds float* %tmp19139, i64 1
+  %tmp19141 = getelementptr inbounds float* %tmp19140, i64 1
+  %tmp19142 = getelementptr inbounds float* %tmp19141, i64 1
+  %tmp19143 = getelementptr inbounds float* %tmp19142, i64 1
+  %tmp19144 = getelementptr inbounds float* %tmp19143, i64 1
+  %tmp19145 = getelementptr inbounds float* %tmp19144, i64 1
+  %tmp19146 = getelementptr inbounds float* %tmp19145, i64 1
+  %tmp19147 = getelementptr inbounds float* %tmp19146, i64 1
+  %tmp19148 = getelementptr inbounds float* %tmp19147, i64 1
+  %tmp19149 = getelementptr inbounds float* %tmp19148, i64 1
+  %tmp19150 = getelementptr inbounds float* %tmp19149, i64 1
+  %tmp19151 = getelementptr inbounds float* %tmp19150, i64 1
+  %tmp19152 = getelementptr inbounds float* %tmp19151, i64 1
+  %tmp19153 = getelementptr inbounds float* %tmp19152, i64 1
+  %tmp19154 = getelementptr inbounds float* %tmp19153, i64 1
+  %tmp19155 = getelementptr inbounds float* %tmp19154, i64 1
+  %tmp19156 = getelementptr inbounds float* %tmp19155, i64 1
+  %tmp19157 = getelementptr inbounds float* %tmp19156, i64 1
+  %tmp19158 = getelementptr inbounds float* %tmp19157, i64 1
+  %tmp19159 = getelementptr inbounds float* %tmp19158, i64 1
+  %tmp19160 = getelementptr inbounds float* %tmp19159, i64 1
+  %tmp19161 = getelementptr inbounds float* %tmp19160, i64 1
+  %tmp19162 = getelementptr inbounds float* %tmp19161, i64 1
+  %tmp19163 = getelementptr inbounds float* %tmp19162, i64 1
+  %tmp19164 = getelementptr inbounds float* %tmp19163, i64 1
+  %tmp19165 = getelementptr inbounds float* %tmp19164, i64 1
+  %tmp19166 = getelementptr inbounds float* %tmp19165, i64 1
+  %tmp19167 = getelementptr inbounds float* %tmp19166, i64 1
+  %tmp19168 = getelementptr inbounds float* %tmp19167, i64 1
+  %tmp19169 = getelementptr inbounds float* %tmp19168, i64 1
+  %tmp19170 = getelementptr inbounds float* %tmp19169, i64 1
+  %tmp19171 = getelementptr inbounds float* %tmp19170, i64 1
+  %tmp19172 = getelementptr inbounds float* %tmp19171, i64 1
+  %tmp19173 = getelementptr inbounds float* %tmp19172, i64 1
+  %tmp19174 = getelementptr inbounds float* %tmp19173, i64 1
+  %tmp19175 = getelementptr inbounds float* %tmp19174, i64 1
+  %tmp19176 = getelementptr inbounds float* %tmp19175, i64 1
+  %tmp19177 = getelementptr inbounds float* %tmp19176, i64 1
+  %tmp19178 = getelementptr inbounds float* %tmp19177, i64 1
+  %tmp19179 = getelementptr inbounds float* %tmp19178, i64 1
+  %tmp19180 = getelementptr inbounds float* %tmp19179, i64 1
+  %tmp19181 = getelementptr inbounds float* %tmp19180, i64 1
+  %tmp19182 = getelementptr inbounds float* %tmp19181, i64 1
+  %tmp19183 = getelementptr inbounds float* %tmp19182, i64 1
+  %tmp19184 = getelementptr inbounds float* %tmp19183, i64 1
+  %tmp19185 = getelementptr inbounds float* %tmp19184, i64 1
+  %tmp19186 = getelementptr inbounds float* %tmp19185, i64 1
+  %tmp19187 = getelementptr inbounds float* %tmp19186, i64 1
+  %tmp19188 = getelementptr inbounds float* %tmp19187, i64 1
+  %tmp19189 = getelementptr inbounds float* %tmp19188, i64 1
+  %tmp19190 = getelementptr inbounds float* %tmp19189, i64 1
+  %tmp19191 = getelementptr inbounds float* %tmp19190, i64 1
+  %tmp19192 = getelementptr inbounds float* %tmp19191, i64 1
+  %tmp19193 = getelementptr inbounds float* %tmp19192, i64 1
+  %tmp19194 = getelementptr inbounds float* %tmp19193, i64 1
+  %tmp19195 = getelementptr inbounds float* %tmp19194, i64 1
+  %tmp19196 = getelementptr inbounds float* %tmp19195, i64 1
+  %tmp19197 = getelementptr inbounds float* %tmp19196, i64 1
+  %tmp19198 = getelementptr inbounds float* %tmp19197, i64 1
+  %tmp19199 = getelementptr inbounds float* %tmp19198, i64 1
+  %tmp19200 = getelementptr inbounds float* %tmp19199, i64 1
+  %tmp19201 = getelementptr inbounds float* %tmp19200, i64 1
+  %tmp19202 = getelementptr inbounds float* %tmp19201, i64 1
+  %tmp19203 = getelementptr inbounds float* %tmp19202, i64 1
+  %tmp19204 = getelementptr inbounds float* %tmp19203, i64 1
+  %tmp19205 = getelementptr inbounds float* %tmp19204, i64 1
+  %tmp19206 = getelementptr inbounds float* %tmp19205, i64 1
+  %tmp19207 = getelementptr inbounds float* %tmp19206, i64 1
+  %tmp19208 = getelementptr inbounds float* %tmp19207, i64 1
+  %tmp19209 = getelementptr inbounds float* %tmp19208, i64 1
+  %tmp19210 = getelementptr inbounds float* %tmp19209, i64 1
+  %tmp19211 = getelementptr inbounds float* %tmp19210, i64 1
+  %tmp19212 = getelementptr inbounds float* %tmp19211, i64 1
+  %tmp19213 = getelementptr inbounds float* %tmp19212, i64 1
+  %tmp19214 = getelementptr inbounds float* %tmp19213, i64 1
+  %tmp19215 = getelementptr inbounds float* %tmp19214, i64 1
+  %tmp19216 = getelementptr inbounds float* %tmp19215, i64 1
+  %tmp19217 = getelementptr inbounds float* %tmp19216, i64 1
+  %tmp19218 = getelementptr inbounds float* %tmp19217, i64 1
+  %tmp19219 = getelementptr inbounds float* %tmp19218, i64 1
+  %tmp19220 = getelementptr inbounds float* %tmp19219, i64 1
+  %tmp19221 = getelementptr inbounds float* %tmp19220, i64 1
+  %tmp19222 = getelementptr inbounds float* %tmp19221, i64 1
+  %tmp19223 = getelementptr inbounds float* %tmp19222, i64 1
+  %tmp19224 = getelementptr inbounds float* %tmp19223, i64 1
+  %tmp19225 = getelementptr inbounds float* %tmp19224, i64 1
+  %tmp19226 = getelementptr inbounds float* %tmp19225, i64 1
+  %tmp19227 = getelementptr inbounds float* %tmp19226, i64 1
+  %tmp19228 = getelementptr inbounds float* %tmp19227, i64 1
+  %tmp19229 = getelementptr inbounds float* %tmp19228, i64 1
+  %tmp19230 = getelementptr inbounds float* %tmp19229, i64 1
+  %tmp19231 = getelementptr inbounds float* %tmp19230, i64 1
+  %tmp19232 = getelementptr inbounds float* %tmp19231, i64 1
+  %tmp19233 = getelementptr inbounds float* %tmp19232, i64 1
+  %tmp19234 = getelementptr inbounds float* %tmp19233, i64 1
+  %tmp19235 = getelementptr inbounds float* %tmp19234, i64 1
+  %tmp19236 = getelementptr inbounds float* %tmp19235, i64 1
+  %tmp19237 = getelementptr inbounds float* %tmp19236, i64 1
+  %tmp19238 = getelementptr inbounds float* %tmp19237, i64 1
+  %tmp19239 = getelementptr inbounds float* %tmp19238, i64 1
+  %tmp19240 = getelementptr inbounds float* %tmp19239, i64 1
+  %tmp19241 = getelementptr inbounds float* %tmp19240, i64 1
+  %tmp19242 = getelementptr inbounds float* %tmp19241, i64 1
+  %tmp19243 = getelementptr inbounds float* %tmp19242, i64 1
+  %tmp19244 = getelementptr inbounds float* %tmp19243, i64 1
+  %tmp19245 = getelementptr inbounds float* %tmp19244, i64 1
+  %tmp19246 = getelementptr inbounds float* %tmp19245, i64 1
+  %tmp19247 = getelementptr inbounds float* %tmp19246, i64 1
+  %tmp19248 = getelementptr inbounds float* %tmp19247, i64 1
+  %tmp19249 = getelementptr inbounds float* %tmp19248, i64 1
+  %tmp19250 = getelementptr inbounds float* %tmp19249, i64 1
+  %tmp19251 = getelementptr inbounds float* %tmp19250, i64 1
+  %tmp19252 = getelementptr inbounds float* %tmp19251, i64 1
+  %tmp19253 = getelementptr inbounds float* %tmp19252, i64 1
+  %tmp19254 = getelementptr inbounds float* %tmp19253, i64 1
+  %tmp19255 = getelementptr inbounds float* %tmp19254, i64 1
+  %tmp19256 = getelementptr inbounds float* %tmp19255, i64 1
+  %tmp19257 = getelementptr inbounds float* %tmp19256, i64 1
+  %tmp19258 = getelementptr inbounds float* %tmp19257, i64 1
+  %tmp19259 = getelementptr inbounds float* %tmp19258, i64 1
+  %tmp19260 = getelementptr inbounds float* %tmp19259, i64 1
+  %tmp19261 = getelementptr inbounds float* %tmp19260, i64 1
+  %tmp19262 = getelementptr inbounds float* %tmp19261, i64 1
+  %tmp19263 = getelementptr inbounds float* %tmp19262, i64 1
+  %tmp19264 = getelementptr inbounds float* %tmp19263, i64 1
+  %tmp19265 = getelementptr inbounds float* %tmp19264, i64 1
+  %tmp19266 = getelementptr inbounds float* %tmp19265, i64 1
+  %tmp19267 = getelementptr inbounds float* %tmp19266, i64 1
+  %tmp19268 = getelementptr inbounds float* %tmp19267, i64 1
+  %tmp19269 = getelementptr inbounds float* %tmp19268, i64 1
+  %tmp19270 = getelementptr inbounds float* %tmp19269, i64 1
+  %tmp19271 = getelementptr inbounds float* %tmp19270, i64 1
+  %tmp19272 = getelementptr inbounds float* %tmp19271, i64 1
+  %tmp19273 = getelementptr inbounds float* %tmp19272, i64 1
+  %tmp19274 = getelementptr inbounds float* %tmp19273, i64 1
+  %tmp19275 = getelementptr inbounds float* %tmp19274, i64 1
+  %tmp19276 = getelementptr inbounds float* %tmp19275, i64 1
+  %tmp19277 = getelementptr inbounds float* %tmp19276, i64 1
+  %tmp19278 = getelementptr inbounds float* %tmp19277, i64 1
+  %tmp19279 = getelementptr inbounds float* %tmp19278, i64 1
+  %tmp19280 = getelementptr inbounds float* %tmp19279, i64 1
+  %tmp19281 = getelementptr inbounds float* %tmp19280, i64 1
+  %tmp19282 = getelementptr inbounds float* %tmp19281, i64 1
+  %tmp19283 = getelementptr inbounds float* %tmp19282, i64 1
+  %tmp19284 = getelementptr inbounds float* %tmp19283, i64 1
+  %tmp19285 = getelementptr inbounds float* %tmp19284, i64 1
+  %tmp19286 = getelementptr inbounds float* %tmp19285, i64 1
+  %tmp19287 = getelementptr inbounds float* %tmp19286, i64 1
+  %tmp19288 = getelementptr inbounds float* %tmp19287, i64 1
+  %tmp19289 = getelementptr inbounds float* %tmp19288, i64 1
+  %tmp19290 = getelementptr inbounds float* %tmp19289, i64 1
+  %tmp19291 = getelementptr inbounds float* %tmp19290, i64 1
+  %tmp19292 = getelementptr inbounds float* %tmp19291, i64 1
+  %tmp19293 = getelementptr inbounds float* %tmp19292, i64 1
+  %tmp19294 = getelementptr inbounds float* %tmp19293, i64 1
+  %tmp19295 = getelementptr inbounds float* %tmp19294, i64 1
+  %tmp19296 = getelementptr inbounds float* %tmp19295, i64 1
+  %tmp19297 = getelementptr inbounds float* %tmp19296, i64 1
+  %tmp19298 = getelementptr inbounds float* %tmp19297, i64 1
+  %tmp19299 = getelementptr inbounds float* %tmp19298, i64 1
+  %tmp19300 = getelementptr inbounds float* %tmp19299, i64 1
+  %tmp19301 = getelementptr inbounds float* %tmp19300, i64 1
+  %tmp19302 = getelementptr inbounds float* %tmp19301, i64 1
+  %tmp19303 = getelementptr inbounds float* %tmp19302, i64 1
+  %tmp19304 = getelementptr inbounds float* %tmp19303, i64 1
+  %tmp19305 = getelementptr inbounds float* %tmp19304, i64 1
+  %tmp19306 = getelementptr inbounds float* %tmp19305, i64 1
+  %tmp19307 = getelementptr inbounds float* %tmp19306, i64 1
+  %tmp19308 = getelementptr inbounds float* %tmp19307, i64 1
+  %tmp19309 = getelementptr inbounds float* %tmp19308, i64 1
+  %tmp19310 = getelementptr inbounds float* %tmp19309, i64 1
+  %tmp19311 = getelementptr inbounds float* %tmp19310, i64 1
+  %tmp19312 = getelementptr inbounds float* %tmp19311, i64 1
+  %tmp19313 = getelementptr inbounds float* %tmp19312, i64 1
+  %tmp19314 = getelementptr inbounds float* %tmp19313, i64 1
+  %tmp19315 = getelementptr inbounds float* %tmp19314, i64 1
+  %tmp19316 = getelementptr inbounds float* %tmp19315, i64 1
+  %tmp19317 = getelementptr inbounds float* %tmp19316, i64 1
+  %tmp19318 = getelementptr inbounds float* %tmp19317, i64 1
+  %tmp19319 = getelementptr inbounds float* %tmp19318, i64 1
+  %tmp19320 = getelementptr inbounds float* %tmp19319, i64 1
+  %tmp19321 = getelementptr inbounds float* %tmp19320, i64 1
+  %tmp19322 = getelementptr inbounds float* %tmp19321, i64 1
+  %tmp19323 = getelementptr inbounds float* %tmp19322, i64 1
+  %tmp19324 = getelementptr inbounds float* %tmp19323, i64 1
+  %tmp19325 = getelementptr inbounds float* %tmp19324, i64 1
+  %tmp19326 = getelementptr inbounds float* %tmp19325, i64 1
+  %tmp19327 = getelementptr inbounds float* %tmp19326, i64 1
+  %tmp19328 = getelementptr inbounds float* %tmp19327, i64 1
+  %tmp19329 = getelementptr inbounds float* %tmp19328, i64 1
+  %tmp19330 = getelementptr inbounds float* %tmp19329, i64 1
+  %tmp19331 = getelementptr inbounds float* %tmp19330, i64 1
+  %tmp19332 = getelementptr inbounds float* %tmp19331, i64 1
+  %tmp19333 = getelementptr inbounds float* %tmp19332, i64 1
+  %tmp19334 = getelementptr inbounds float* %tmp19333, i64 1
+  %tmp19335 = getelementptr inbounds float* %tmp19334, i64 1
+  %tmp19336 = getelementptr inbounds float* %tmp19335, i64 1
+  %tmp19337 = getelementptr inbounds float* %tmp19336, i64 1
+  %tmp19338 = getelementptr inbounds float* %tmp19337, i64 1
+  %tmp19339 = getelementptr inbounds float* %tmp19338, i64 1
+  %tmp19340 = getelementptr inbounds float* %tmp19339, i64 1
+  %tmp19341 = getelementptr inbounds float* %tmp19340, i64 1
+  %tmp19342 = getelementptr inbounds float* %tmp19341, i64 1
+  %tmp19343 = getelementptr inbounds float* %tmp19342, i64 1
+  %tmp19344 = getelementptr inbounds float* %tmp19343, i64 1
+  %tmp19345 = getelementptr inbounds float* %tmp19344, i64 1
+  %tmp19346 = getelementptr inbounds float* %tmp19345, i64 1
+  %tmp19347 = getelementptr inbounds float* %tmp19346, i64 1
+  %tmp19348 = getelementptr inbounds float* %tmp19347, i64 1
+  %tmp19349 = getelementptr inbounds float* %tmp19348, i64 1
+  %tmp19350 = getelementptr inbounds float* %tmp19349, i64 1
+  %tmp19351 = getelementptr inbounds float* %tmp19350, i64 1
+  %tmp19352 = getelementptr inbounds float* %tmp19351, i64 1
+  %tmp19353 = getelementptr inbounds float* %tmp19352, i64 1
+  %tmp19354 = getelementptr inbounds float* %tmp19353, i64 1
+  %tmp19355 = getelementptr inbounds float* %tmp19354, i64 1
+  %tmp19356 = getelementptr inbounds float* %tmp19355, i64 1
+  %tmp19357 = getelementptr inbounds float* %tmp19356, i64 1
+  %tmp19358 = getelementptr inbounds float* %tmp19357, i64 1
+  %tmp19359 = getelementptr inbounds float* %tmp19358, i64 1
+  %tmp19360 = getelementptr inbounds float* %tmp19359, i64 1
+  %tmp19361 = getelementptr inbounds float* %tmp19360, i64 1
+  %tmp19362 = getelementptr inbounds float* %tmp19361, i64 1
+  %tmp19363 = getelementptr inbounds float* %tmp19362, i64 1
+  %tmp19364 = getelementptr inbounds float* %tmp19363, i64 1
+  %tmp19365 = getelementptr inbounds float* %tmp19364, i64 1
+  %tmp19366 = getelementptr inbounds float* %tmp19365, i64 1
+  %tmp19367 = getelementptr inbounds float* %tmp19366, i64 1
+  %tmp19368 = getelementptr inbounds float* %tmp19367, i64 1
+  %tmp19369 = getelementptr inbounds float* %tmp19368, i64 1
+  %tmp19370 = getelementptr inbounds float* %tmp19369, i64 1
+  %tmp19371 = getelementptr inbounds float* %tmp19370, i64 1
+  %tmp19372 = getelementptr inbounds float* %tmp19371, i64 1
+  %tmp19373 = getelementptr inbounds float* %tmp19372, i64 1
+  %tmp19374 = getelementptr inbounds float* %tmp19373, i64 1
+  %tmp19375 = getelementptr inbounds float* %tmp19374, i64 1
+  %tmp19376 = getelementptr inbounds float* %tmp19375, i64 1
+  %tmp19377 = getelementptr inbounds float* %tmp19376, i64 1
+  %tmp19378 = getelementptr inbounds float* %tmp19377, i64 1
+  %tmp19379 = getelementptr inbounds float* %tmp19378, i64 1
+  %tmp19380 = getelementptr inbounds float* %tmp19379, i64 1
+  %tmp19381 = getelementptr inbounds float* %tmp19380, i64 1
+  %tmp19382 = getelementptr inbounds float* %tmp19381, i64 1
+  %tmp19383 = getelementptr inbounds float* %tmp19382, i64 1
+  %tmp19384 = getelementptr inbounds float* %tmp19383, i64 1
+  %tmp19385 = getelementptr inbounds float* %tmp19384, i64 1
+  %tmp19386 = getelementptr inbounds float* %tmp19385, i64 1
+  %tmp19387 = getelementptr inbounds float* %tmp19386, i64 1
+  %tmp19388 = getelementptr inbounds float* %tmp19387, i64 1
+  %tmp19389 = getelementptr inbounds float* %tmp19388, i64 1
+  %tmp19390 = getelementptr inbounds float* %tmp19389, i64 1
+  %tmp19391 = getelementptr inbounds float* %tmp19390, i64 1
+  %tmp19392 = getelementptr inbounds float* %tmp19391, i64 1
+  %tmp19393 = getelementptr inbounds float* %tmp19392, i64 1
+  %tmp19394 = getelementptr inbounds float* %tmp19393, i64 1
+  %tmp19395 = getelementptr inbounds float* %tmp19394, i64 1
+  %tmp19396 = getelementptr inbounds float* %tmp19395, i64 1
+  %tmp19397 = getelementptr inbounds float* %tmp19396, i64 1
+  %tmp19398 = getelementptr inbounds float* %tmp19397, i64 1
+  %tmp19399 = getelementptr inbounds float* %tmp19398, i64 1
+  %tmp19400 = getelementptr inbounds float* %tmp19399, i64 1
+  %tmp19401 = getelementptr inbounds float* %tmp19400, i64 1
+  %tmp19402 = getelementptr inbounds float* %tmp19401, i64 1
+  %tmp19403 = getelementptr inbounds float* %tmp19402, i64 1
+  %tmp19404 = getelementptr inbounds float* %tmp19403, i64 1
+  %tmp19405 = getelementptr inbounds float* %tmp19404, i64 1
+  %tmp19406 = getelementptr inbounds float* %tmp19405, i64 1
+  %tmp19407 = getelementptr inbounds float* %tmp19406, i64 1
+  %tmp19408 = getelementptr inbounds float* %tmp19407, i64 1
+  %tmp19409 = getelementptr inbounds float* %tmp19408, i64 1
+  %tmp19410 = getelementptr inbounds float* %tmp19409, i64 1
+  %tmp19411 = getelementptr inbounds float* %tmp19410, i64 1
+  %tmp19412 = getelementptr inbounds float* %tmp19411, i64 1
+  %tmp19413 = getelementptr inbounds float* %tmp19412, i64 1
+  %tmp19414 = getelementptr inbounds float* %tmp19413, i64 1
+  %tmp19415 = getelementptr inbounds float* %tmp19414, i64 1
+  %tmp19416 = getelementptr inbounds float* %tmp19415, i64 1
+  %tmp19417 = getelementptr inbounds float* %tmp19416, i64 1
+  %tmp19418 = getelementptr inbounds float* %tmp19417, i64 1
+  %tmp19419 = getelementptr inbounds float* %tmp19418, i64 1
+  %tmp19420 = getelementptr inbounds float* %tmp19419, i64 1
+  %tmp19421 = getelementptr inbounds float* %tmp19420, i64 1
+  %tmp19422 = getelementptr inbounds float* %tmp19421, i64 1
+  %tmp19423 = getelementptr inbounds float* %tmp19422, i64 1
+  %tmp19424 = getelementptr inbounds float* %tmp19423, i64 1
+  %tmp19425 = getelementptr inbounds float* %tmp19424, i64 1
+  %tmp19426 = getelementptr inbounds float* %tmp19425, i64 1
+  %tmp19427 = getelementptr inbounds float* %tmp19426, i64 1
+  %tmp19428 = getelementptr inbounds float* %tmp19427, i64 1
+  %tmp19429 = getelementptr inbounds float* %tmp19428, i64 1
+  %tmp19430 = getelementptr inbounds float* %tmp19429, i64 1
+  %tmp19431 = getelementptr inbounds float* %tmp19430, i64 1
+  %tmp19432 = getelementptr inbounds float* %tmp19431, i64 1
+  %tmp19433 = getelementptr inbounds float* %tmp19432, i64 1
+  %tmp19434 = getelementptr inbounds float* %tmp19433, i64 1
+  %tmp19435 = getelementptr inbounds float* %tmp19434, i64 1
+  %tmp19436 = getelementptr inbounds float* %tmp19435, i64 1
+  %tmp19437 = getelementptr inbounds float* %tmp19436, i64 1
+  %tmp19438 = getelementptr inbounds float* %tmp19437, i64 1
+  %tmp19439 = getelementptr inbounds float* %tmp19438, i64 1
+  %tmp19440 = getelementptr inbounds float* %tmp19439, i64 1
+  %tmp19441 = getelementptr inbounds float* %tmp19440, i64 1
+  %tmp19442 = getelementptr inbounds float* %tmp19441, i64 1
+  %tmp19443 = getelementptr inbounds float* %tmp19442, i64 1
+  %tmp19444 = getelementptr inbounds float* %tmp19443, i64 1
+  %tmp19445 = getelementptr inbounds float* %tmp19444, i64 1
+  %tmp19446 = getelementptr inbounds float* %tmp19445, i64 1
+  %tmp19447 = getelementptr inbounds float* %tmp19446, i64 1
+  %tmp19448 = getelementptr inbounds float* %tmp19447, i64 1
+  %tmp19449 = getelementptr inbounds float* %tmp19448, i64 1
+  %tmp19450 = getelementptr inbounds float* %tmp19449, i64 1
+  %tmp19451 = getelementptr inbounds float* %tmp19450, i64 1
+  %tmp19452 = getelementptr inbounds float* %tmp19451, i64 1
+  %tmp19453 = getelementptr inbounds float* %tmp19452, i64 1
+  %tmp19454 = getelementptr inbounds float* %tmp19453, i64 1
+  %tmp19455 = getelementptr inbounds float* %tmp19454, i64 1
+  %tmp19456 = getelementptr inbounds float* %tmp19455, i64 1
+  %tmp19457 = getelementptr inbounds float* %tmp19456, i64 1
+  %tmp19458 = getelementptr inbounds float* %tmp19457, i64 1
+  %tmp19459 = getelementptr inbounds float* %tmp19458, i64 1
+  %tmp19460 = getelementptr inbounds float* %tmp19459, i64 1
+  %tmp19461 = getelementptr inbounds float* %tmp19460, i64 1
+  %tmp19462 = getelementptr inbounds float* %tmp19461, i64 1
+  %tmp19463 = getelementptr inbounds float* %tmp19462, i64 1
+  %tmp19464 = getelementptr inbounds float* %tmp19463, i64 1
+  %tmp19465 = getelementptr inbounds float* %tmp19464, i64 1
+  %tmp19466 = getelementptr inbounds float* %tmp19465, i64 1
+  %tmp19467 = getelementptr inbounds float* %tmp19466, i64 1
+  %tmp19468 = getelementptr inbounds float* %tmp19467, i64 1
+  %tmp19469 = getelementptr inbounds float* %tmp19468, i64 1
+  %tmp19470 = getelementptr inbounds float* %tmp19469, i64 1
+  %tmp19471 = getelementptr inbounds float* %tmp19470, i64 1
+  %tmp19472 = getelementptr inbounds float* %tmp19471, i64 1
+  %tmp19473 = getelementptr inbounds float* %tmp19472, i64 1
+  %tmp19474 = getelementptr inbounds float* %tmp19473, i64 1
+  %tmp19475 = getelementptr inbounds float* %tmp19474, i64 1
+  %tmp19476 = getelementptr inbounds float* %tmp19475, i64 1
+  %tmp19477 = getelementptr inbounds float* %tmp19476, i64 1
+  %tmp19478 = getelementptr inbounds float* %tmp19477, i64 1
+  %tmp19479 = getelementptr inbounds float* %tmp19478, i64 1
+  %tmp19480 = getelementptr inbounds float* %tmp19479, i64 1
+  %tmp19481 = getelementptr inbounds float* %tmp19480, i64 1
+  %tmp19482 = getelementptr inbounds float* %tmp19481, i64 1
+  %tmp19483 = getelementptr inbounds float* %tmp19482, i64 1
+  %tmp19484 = getelementptr inbounds float* %tmp19483, i64 1
+  %tmp19485 = getelementptr inbounds float* %tmp19484, i64 1
+  %tmp19486 = getelementptr inbounds float* %tmp19485, i64 1
+  %tmp19487 = getelementptr inbounds float* %tmp19486, i64 1
+  %tmp19488 = getelementptr inbounds float* %tmp19487, i64 1
+  %tmp19489 = getelementptr inbounds float* %tmp19488, i64 1
+  %tmp19490 = getelementptr inbounds float* %tmp19489, i64 1
+  %tmp19491 = getelementptr inbounds float* %tmp19490, i64 1
+  %tmp19492 = getelementptr inbounds float* %tmp19491, i64 1
+  %tmp19493 = getelementptr inbounds float* %tmp19492, i64 1
+  %tmp19494 = getelementptr inbounds float* %tmp19493, i64 1
+  %tmp19495 = getelementptr inbounds float* %tmp19494, i64 1
+  %tmp19496 = getelementptr inbounds float* %tmp19495, i64 1
+  %tmp19497 = getelementptr inbounds float* %tmp19496, i64 1
+  %tmp19498 = getelementptr inbounds float* %tmp19497, i64 1
+  %tmp19499 = getelementptr inbounds float* %tmp19498, i64 1
+  %tmp19500 = getelementptr inbounds float* %tmp19499, i64 1
+  %tmp19501 = getelementptr inbounds float* %tmp19500, i64 1
+  %tmp19502 = getelementptr inbounds float* %tmp19501, i64 1
+  %tmp19503 = getelementptr inbounds float* %tmp19502, i64 1
+  %tmp19504 = getelementptr inbounds float* %tmp19503, i64 1
+  %tmp19505 = getelementptr inbounds float* %tmp19504, i64 1
+  %tmp19506 = getelementptr inbounds float* %tmp19505, i64 1
+  %tmp19507 = getelementptr inbounds float* %tmp19506, i64 1
+  %tmp19508 = getelementptr inbounds float* %tmp19507, i64 1
+  %tmp19509 = getelementptr inbounds float* %tmp19508, i64 1
+  %tmp19510 = getelementptr inbounds float* %tmp19509, i64 1
+  %tmp19511 = getelementptr inbounds float* %tmp19510, i64 1
+  %tmp19512 = getelementptr inbounds float* %tmp19511, i64 1
+  %tmp19513 = getelementptr inbounds float* %tmp19512, i64 1
+  %tmp19514 = getelementptr inbounds float* %tmp19513, i64 1
+  %tmp19515 = getelementptr inbounds float* %tmp19514, i64 1
+  %tmp19516 = getelementptr inbounds float* %tmp19515, i64 1
+  %tmp19517 = getelementptr inbounds float* %tmp19516, i64 1
+  %tmp19518 = getelementptr inbounds float* %tmp19517, i64 1
+  %tmp19519 = getelementptr inbounds float* %tmp19518, i64 1
+  %tmp19520 = getelementptr inbounds float* %tmp19519, i64 1
+  %tmp19521 = getelementptr inbounds float* %tmp19520, i64 1
+  %tmp19522 = getelementptr inbounds float* %tmp19521, i64 1
+  %tmp19523 = getelementptr inbounds float* %tmp19522, i64 1
+  %tmp19524 = getelementptr inbounds float* %tmp19523, i64 1
+  %tmp19525 = getelementptr inbounds float* %tmp19524, i64 1
+  %tmp19526 = getelementptr inbounds float* %tmp19525, i64 1
+  %tmp19527 = getelementptr inbounds float* %tmp19526, i64 1
+  %tmp19528 = getelementptr inbounds float* %tmp19527, i64 1
+  %tmp19529 = getelementptr inbounds float* %tmp19528, i64 1
+  %tmp19530 = getelementptr inbounds float* %tmp19529, i64 1
+  %tmp19531 = getelementptr inbounds float* %tmp19530, i64 1
+  %tmp19532 = getelementptr inbounds float* %tmp19531, i64 1
+  %tmp19533 = getelementptr inbounds float* %tmp19532, i64 1
+  %tmp19534 = getelementptr inbounds float* %tmp19533, i64 1
+  %tmp19535 = getelementptr inbounds float* %tmp19534, i64 1
+  %tmp19536 = getelementptr inbounds float* %tmp19535, i64 1
+  %tmp19537 = getelementptr inbounds float* %tmp19536, i64 1
+  %tmp19538 = getelementptr inbounds float* %tmp19537, i64 1
+  %tmp19539 = getelementptr inbounds float* %tmp19538, i64 1
+  %tmp19540 = getelementptr inbounds float* %tmp19539, i64 1
+  %tmp19541 = getelementptr inbounds float* %tmp19540, i64 1
+  %tmp19542 = getelementptr inbounds float* %tmp19541, i64 1
+  %tmp19543 = getelementptr inbounds float* %tmp19542, i64 1
+  %tmp19544 = getelementptr inbounds float* %tmp19543, i64 1
+  %tmp19545 = getelementptr inbounds float* %tmp19544, i64 1
+  %tmp19546 = getelementptr inbounds float* %tmp19545, i64 1
+  %tmp19547 = getelementptr inbounds float* %tmp19546, i64 1
+  %tmp19548 = getelementptr inbounds float* %tmp19547, i64 1
+  %tmp19549 = getelementptr inbounds float* %tmp19548, i64 1
+  %tmp19550 = getelementptr inbounds float* %tmp19549, i64 1
+  %tmp19551 = getelementptr inbounds float* %tmp19550, i64 1
+  %tmp19552 = getelementptr inbounds float* %tmp19551, i64 1
+  %tmp19553 = getelementptr inbounds float* %tmp19552, i64 1
+  %tmp19554 = getelementptr inbounds float* %tmp19553, i64 1
+  %tmp19555 = getelementptr inbounds float* %tmp19554, i64 1
+  %tmp19556 = getelementptr inbounds float* %tmp19555, i64 1
+  %tmp19557 = getelementptr inbounds float* %tmp19556, i64 1
+  %tmp19558 = getelementptr inbounds float* %tmp19557, i64 1
+  %tmp19559 = getelementptr inbounds float* %tmp19558, i64 1
+  %tmp19560 = getelementptr inbounds float* %tmp19559, i64 1
+  %tmp19561 = getelementptr inbounds float* %tmp19560, i64 1
+  %tmp19562 = getelementptr inbounds float* %tmp19561, i64 1
+  %tmp19563 = getelementptr inbounds float* %tmp19562, i64 1
+  %tmp19564 = getelementptr inbounds float* %tmp19563, i64 1
+  %tmp19565 = getelementptr inbounds float* %tmp19564, i64 1
+  %tmp19566 = getelementptr inbounds float* %tmp19565, i64 1
+  %tmp19567 = getelementptr inbounds float* %tmp19566, i64 1
+  %tmp19568 = getelementptr inbounds float* %tmp19567, i64 1
+  %tmp19569 = getelementptr inbounds float* %tmp19568, i64 1
+  %tmp19570 = getelementptr inbounds float* %tmp19569, i64 1
+  %tmp19571 = getelementptr inbounds float* %tmp19570, i64 1
+  %tmp19572 = getelementptr inbounds float* %tmp19571, i64 1
+  %tmp19573 = getelementptr inbounds float* %tmp19572, i64 1
+  %tmp19574 = getelementptr inbounds float* %tmp19573, i64 1
+  %tmp19575 = getelementptr inbounds float* %tmp19574, i64 1
+  %tmp19576 = getelementptr inbounds float* %tmp19575, i64 1
+  %tmp19577 = getelementptr inbounds float* %tmp19576, i64 1
+  %tmp19578 = getelementptr inbounds float* %tmp19577, i64 1
+  %tmp19579 = getelementptr inbounds float* %tmp19578, i64 1
+  %tmp19580 = getelementptr inbounds float* %tmp19579, i64 1
+  %tmp19581 = getelementptr inbounds float* %tmp19580, i64 1
+  %tmp19582 = getelementptr inbounds float* %tmp19581, i64 1
+  %tmp19583 = getelementptr inbounds float* %tmp19582, i64 1
+  %tmp19584 = getelementptr inbounds float* %tmp19583, i64 1
+  %tmp19585 = getelementptr inbounds float* %tmp19584, i64 1
+  %tmp19586 = getelementptr inbounds float* %tmp19585, i64 1
+  %tmp19587 = getelementptr inbounds float* %tmp19586, i64 1
+  %tmp19588 = getelementptr inbounds float* %tmp19587, i64 1
+  %tmp19589 = getelementptr inbounds float* %tmp19588, i64 1
+  %tmp19590 = getelementptr inbounds float* %tmp19589, i64 1
+  %tmp19591 = getelementptr inbounds float* %tmp19590, i64 1
+  %tmp19592 = getelementptr inbounds float* %tmp19591, i64 1
+  %tmp19593 = getelementptr inbounds float* %tmp19592, i64 1
+  %tmp19594 = getelementptr inbounds float* %tmp19593, i64 1
+  %tmp19595 = getelementptr inbounds float* %tmp19594, i64 1
+  %tmp19596 = getelementptr inbounds float* %tmp19595, i64 1
+  %tmp19597 = getelementptr inbounds float* %tmp19596, i64 1
+  %tmp19598 = getelementptr inbounds float* %tmp19597, i64 1
+  %tmp19599 = getelementptr inbounds float* %tmp19598, i64 1
+  %tmp19600 = getelementptr inbounds float* %tmp19599, i64 1
+  %tmp19601 = getelementptr inbounds float* %tmp19600, i64 1
+  %tmp19602 = getelementptr inbounds float* %tmp19601, i64 1
+  %tmp19603 = getelementptr inbounds float* %tmp19602, i64 1
+  %tmp19604 = getelementptr inbounds float* %tmp19603, i64 1
+  %tmp19605 = getelementptr inbounds float* %tmp19604, i64 1
+  %tmp19606 = getelementptr inbounds float* %tmp19605, i64 1
+  %tmp19607 = getelementptr inbounds float* %tmp19606, i64 1
+  %tmp19608 = getelementptr inbounds float* %tmp19607, i64 1
+  %tmp19609 = getelementptr inbounds float* %tmp19608, i64 1
+  %tmp19610 = getelementptr inbounds float* %tmp19609, i64 1
+  %tmp19611 = getelementptr inbounds float* %tmp19610, i64 1
+  %tmp19612 = getelementptr inbounds float* %tmp19611, i64 1
+  %tmp19613 = getelementptr inbounds float* %tmp19612, i64 1
+  %tmp19614 = getelementptr inbounds float* %tmp19613, i64 1
+  %tmp19615 = getelementptr inbounds float* %tmp19614, i64 1
+  %tmp19616 = getelementptr inbounds float* %tmp19615, i64 1
+  %tmp19617 = getelementptr inbounds float* %tmp19616, i64 1
+  %tmp19618 = getelementptr inbounds float* %tmp19617, i64 1
+  %tmp19619 = getelementptr inbounds float* %tmp19618, i64 1
+  %tmp19620 = getelementptr inbounds float* %tmp19619, i64 1
+  %tmp19621 = getelementptr inbounds float* %tmp19620, i64 1
+  %tmp19622 = getelementptr inbounds float* %tmp19621, i64 1
+  %tmp19623 = getelementptr inbounds float* %tmp19622, i64 1
+  %tmp19624 = getelementptr inbounds float* %tmp19623, i64 1
+  %tmp19625 = getelementptr inbounds float* %tmp19624, i64 1
+  %tmp19626 = getelementptr inbounds float* %tmp19625, i64 1
+  %tmp19627 = getelementptr inbounds float* %tmp19626, i64 1
+  %tmp19628 = getelementptr inbounds float* %tmp19627, i64 1
+  %tmp19629 = getelementptr inbounds float* %tmp19628, i64 1
+  %tmp19630 = getelementptr inbounds float* %tmp19629, i64 1
+  %tmp19631 = getelementptr inbounds float* %tmp19630, i64 1
+  %tmp19632 = getelementptr inbounds float* %tmp19631, i64 1
+  %tmp19633 = getelementptr inbounds float* %tmp19632, i64 1
+  %tmp19634 = getelementptr inbounds float* %tmp19633, i64 1
+  %tmp19635 = getelementptr inbounds float* %tmp19634, i64 1
+  %tmp19636 = getelementptr inbounds float* %tmp19635, i64 1
+  %tmp19637 = getelementptr inbounds float* %tmp19636, i64 1
+  %tmp19638 = getelementptr inbounds float* %tmp19637, i64 1
+  %tmp19639 = getelementptr inbounds float* %tmp19638, i64 1
+  %tmp19640 = getelementptr inbounds float* %tmp19639, i64 1
+  %tmp19641 = getelementptr inbounds float* %tmp19640, i64 1
+  %tmp19642 = getelementptr inbounds float* %tmp19641, i64 1
+  %tmp19643 = getelementptr inbounds float* %tmp19642, i64 1
+  %tmp19644 = getelementptr inbounds float* %tmp19643, i64 1
+  %tmp19645 = getelementptr inbounds float* %tmp19644, i64 1
+  %tmp19646 = getelementptr inbounds float* %tmp19645, i64 1
+  %tmp19647 = getelementptr inbounds float* %tmp19646, i64 1
+  %tmp19648 = getelementptr inbounds float* %tmp19647, i64 1
+  %tmp19649 = getelementptr inbounds float* %tmp19648, i64 1
+  %tmp19650 = getelementptr inbounds float* %tmp19649, i64 1
+  %tmp19651 = getelementptr inbounds float* %tmp19650, i64 1
+  %tmp19652 = getelementptr inbounds float* %tmp19651, i64 1
+  %tmp19653 = getelementptr inbounds float* %tmp19652, i64 1
+  %tmp19654 = getelementptr inbounds float* %tmp19653, i64 1
+  %tmp19655 = getelementptr inbounds float* %tmp19654, i64 1
+  %tmp19656 = getelementptr inbounds float* %tmp19655, i64 1
+  %tmp19657 = getelementptr inbounds float* %tmp19656, i64 1
+  %tmp19658 = getelementptr inbounds float* %tmp19657, i64 1
+  %tmp19659 = getelementptr inbounds float* %tmp19658, i64 1
+  %tmp19660 = getelementptr inbounds float* %tmp19659, i64 1
+  %tmp19661 = getelementptr inbounds float* %tmp19660, i64 1
+  %tmp19662 = getelementptr inbounds float* %tmp19661, i64 1
+  %tmp19663 = getelementptr inbounds float* %tmp19662, i64 1
+  %tmp19664 = getelementptr inbounds float* %tmp19663, i64 1
+  %tmp19665 = getelementptr inbounds float* %tmp19664, i64 1
+  %tmp19666 = getelementptr inbounds float* %tmp19665, i64 1
+  %tmp19667 = getelementptr inbounds float* %tmp19666, i64 1
+  %tmp19668 = getelementptr inbounds float* %tmp19667, i64 1
+  %tmp19669 = getelementptr inbounds float* %tmp19668, i64 1
+  %tmp19670 = getelementptr inbounds float* %tmp19669, i64 1
+  %tmp19671 = getelementptr inbounds float* %tmp19670, i64 1
+  %tmp19672 = getelementptr inbounds float* %tmp19671, i64 1
+  %tmp19673 = getelementptr inbounds float* %tmp19672, i64 1
+  %tmp19674 = getelementptr inbounds float* %tmp19673, i64 1
+  %tmp19675 = getelementptr inbounds float* %tmp19674, i64 1
+  %tmp19676 = getelementptr inbounds float* %tmp19675, i64 1
+  %tmp19677 = getelementptr inbounds float* %tmp19676, i64 1
+  %tmp19678 = getelementptr inbounds float* %tmp19677, i64 1
+  %tmp19679 = getelementptr inbounds float* %tmp19678, i64 1
+  %tmp19680 = getelementptr inbounds float* %tmp19679, i64 1
+  %tmp19681 = getelementptr inbounds float* %tmp19680, i64 1
+  %tmp19682 = getelementptr inbounds float* %tmp19681, i64 1
+  %tmp19683 = getelementptr inbounds float* %tmp19682, i64 1
+  %tmp19684 = getelementptr inbounds float* %tmp19683, i64 1
+  %tmp19685 = getelementptr inbounds float* %tmp19684, i64 1
+  %tmp19686 = getelementptr inbounds float* %tmp19685, i64 1
+  %tmp19687 = getelementptr inbounds float* %tmp19686, i64 1
+  %tmp19688 = getelementptr inbounds float* %tmp19687, i64 1
+  %tmp19689 = getelementptr inbounds float* %tmp19688, i64 1
+  %tmp19690 = getelementptr inbounds float* %tmp19689, i64 1
+  %tmp19691 = getelementptr inbounds float* %tmp19690, i64 1
+  %tmp19692 = getelementptr inbounds float* %tmp19691, i64 1
+  %tmp19693 = getelementptr inbounds float* %tmp19692, i64 1
+  %tmp19694 = getelementptr inbounds float* %tmp19693, i64 1
+  %tmp19695 = getelementptr inbounds float* %tmp19694, i64 1
+  %tmp19696 = getelementptr inbounds float* %tmp19695, i64 1
+  %tmp19697 = getelementptr inbounds float* %tmp19696, i64 1
+  %tmp19698 = getelementptr inbounds float* %tmp19697, i64 1
+  %tmp19699 = getelementptr inbounds float* %tmp19698, i64 1
+  %tmp19700 = getelementptr inbounds float* %tmp19699, i64 1
+  %tmp19701 = getelementptr inbounds float* %tmp19700, i64 1
+  %tmp19702 = getelementptr inbounds float* %tmp19701, i64 1
+  %tmp19703 = getelementptr inbounds float* %tmp19702, i64 1
+  %tmp19704 = getelementptr inbounds float* %tmp19703, i64 1
+  %tmp19705 = getelementptr inbounds float* %tmp19704, i64 1
+  %tmp19706 = getelementptr inbounds float* %tmp19705, i64 1
+  %tmp19707 = getelementptr inbounds float* %tmp19706, i64 1
+  %tmp19708 = getelementptr inbounds float* %tmp19707, i64 1
+  %tmp19709 = getelementptr inbounds float* %tmp19708, i64 1
+  %tmp19710 = getelementptr inbounds float* %tmp19709, i64 1
+  %tmp19711 = getelementptr inbounds float* %tmp19710, i64 1
+  %tmp19712 = getelementptr inbounds float* %tmp19711, i64 1
+  %tmp19713 = getelementptr inbounds float* %tmp19712, i64 1
+  %tmp19714 = getelementptr inbounds float* %tmp19713, i64 1
+  %tmp19715 = getelementptr inbounds float* %tmp19714, i64 1
+  %tmp19716 = getelementptr inbounds float* %tmp19715, i64 1
+  %tmp19717 = getelementptr inbounds float* %tmp19716, i64 1
+  %tmp19718 = getelementptr inbounds float* %tmp19717, i64 1
+  %tmp19719 = getelementptr inbounds float* %tmp19718, i64 1
+  %tmp19720 = getelementptr inbounds float* %tmp19719, i64 1
+  %tmp19721 = getelementptr inbounds float* %tmp19720, i64 1
+  %tmp19722 = getelementptr inbounds float* %tmp19721, i64 1
+  %tmp19723 = getelementptr inbounds float* %tmp19722, i64 1
+  %tmp19724 = getelementptr inbounds float* %tmp19723, i64 1
+  %tmp19725 = getelementptr inbounds float* %tmp19724, i64 1
+  %tmp19726 = getelementptr inbounds float* %tmp19725, i64 1
+  %tmp19727 = getelementptr inbounds float* %tmp19726, i64 1
+  %tmp19728 = getelementptr inbounds float* %tmp19727, i64 1
+  %tmp19729 = getelementptr inbounds float* %tmp19728, i64 1
+  %tmp19730 = getelementptr inbounds float* %tmp19729, i64 1
+  %tmp19731 = getelementptr inbounds float* %tmp19730, i64 1
+  %tmp19732 = getelementptr inbounds float* %tmp19731, i64 1
+  %tmp19733 = getelementptr inbounds float* %tmp19732, i64 1
+  %tmp19734 = getelementptr inbounds float* %tmp19733, i64 1
+  %tmp19735 = getelementptr inbounds float* %tmp19734, i64 1
+  %tmp19736 = getelementptr inbounds float* %tmp19735, i64 1
+  %tmp19737 = getelementptr inbounds float* %tmp19736, i64 1
+  %tmp19738 = getelementptr inbounds float* %tmp19737, i64 1
+  %tmp19739 = getelementptr inbounds float* %tmp19738, i64 1
+  %tmp19740 = getelementptr inbounds float* %tmp19739, i64 1
+  %tmp19741 = getelementptr inbounds float* %tmp19740, i64 1
+  %tmp19742 = getelementptr inbounds float* %tmp19741, i64 1
+  %tmp19743 = getelementptr inbounds float* %tmp19742, i64 1
+  %tmp19744 = getelementptr inbounds float* %tmp19743, i64 1
+  %tmp19745 = getelementptr inbounds float* %tmp19744, i64 1
+  %tmp19746 = getelementptr inbounds float* %tmp19745, i64 1
+  %tmp19747 = getelementptr inbounds float* %tmp19746, i64 1
+  %tmp19748 = getelementptr inbounds float* %tmp19747, i64 1
+  %tmp19749 = getelementptr inbounds float* %tmp19748, i64 1
+  %tmp19750 = getelementptr inbounds float* %tmp19749, i64 1
+  %tmp19751 = getelementptr inbounds float* %tmp19750, i64 1
+  %tmp19752 = getelementptr inbounds float* %tmp19751, i64 1
+  %tmp19753 = getelementptr inbounds float* %tmp19752, i64 1
+  %tmp19754 = getelementptr inbounds float* %tmp19753, i64 1
+  %tmp19755 = getelementptr inbounds float* %tmp19754, i64 1
+  %tmp19756 = getelementptr inbounds float* %tmp19755, i64 1
+  %tmp19757 = getelementptr inbounds float* %tmp19756, i64 1
+  %tmp19758 = getelementptr inbounds float* %tmp19757, i64 1
+  %tmp19759 = getelementptr inbounds float* %tmp19758, i64 1
+  %tmp19760 = getelementptr inbounds float* %tmp19759, i64 1
+  %tmp19761 = getelementptr inbounds float* %tmp19760, i64 1
+  %tmp19762 = getelementptr inbounds float* %tmp19761, i64 1
+  %tmp19763 = getelementptr inbounds float* %tmp19762, i64 1
+  %tmp19764 = getelementptr inbounds float* %tmp19763, i64 1
+  %tmp19765 = getelementptr inbounds float* %tmp19764, i64 1
+  %tmp19766 = getelementptr inbounds float* %tmp19765, i64 1
+  %tmp19767 = getelementptr inbounds float* %tmp19766, i64 1
+  %tmp19768 = getelementptr inbounds float* %tmp19767, i64 1
+  %tmp19769 = getelementptr inbounds float* %tmp19768, i64 1
+  %tmp19770 = getelementptr inbounds float* %tmp19769, i64 1
+  %tmp19771 = getelementptr inbounds float* %tmp19770, i64 1
+  %tmp19772 = getelementptr inbounds float* %tmp19771, i64 1
+  %tmp19773 = getelementptr inbounds float* %tmp19772, i64 1
+  %tmp19774 = getelementptr inbounds float* %tmp19773, i64 1
+  %tmp19775 = getelementptr inbounds float* %tmp19774, i64 1
+  %tmp19776 = getelementptr inbounds float* %tmp19775, i64 1
+  %tmp19777 = getelementptr inbounds float* %tmp19776, i64 1
+  %tmp19778 = getelementptr inbounds float* %tmp19777, i64 1
+  %tmp19779 = getelementptr inbounds float* %tmp19778, i64 1
+  %tmp19780 = getelementptr inbounds float* %tmp19779, i64 1
+  %tmp19781 = getelementptr inbounds float* %tmp19780, i64 1
+  %tmp19782 = getelementptr inbounds float* %tmp19781, i64 1
+  %tmp19783 = getelementptr inbounds float* %tmp19782, i64 1
+  %tmp19784 = getelementptr inbounds float* %tmp19783, i64 1
+  %tmp19785 = getelementptr inbounds float* %tmp19784, i64 1
+  %tmp19786 = getelementptr inbounds float* %tmp19785, i64 1
+  %tmp19787 = getelementptr inbounds float* %tmp19786, i64 1
+  %tmp19788 = getelementptr inbounds float* %tmp19787, i64 1
+  %tmp19789 = getelementptr inbounds float* %tmp19788, i64 1
+  %tmp19790 = getelementptr inbounds float* %tmp19789, i64 1
+  %tmp19791 = getelementptr inbounds float* %tmp19790, i64 1
+  %tmp19792 = getelementptr inbounds float* %tmp19791, i64 1
+  %tmp19793 = getelementptr inbounds float* %tmp19792, i64 1
+  %tmp19794 = getelementptr inbounds float* %tmp19793, i64 1
+  %tmp19795 = getelementptr inbounds float* %tmp19794, i64 1
+  %tmp19796 = getelementptr inbounds float* %tmp19795, i64 1
+  %tmp19797 = getelementptr inbounds float* %tmp19796, i64 1
+  %tmp19798 = getelementptr inbounds float* %tmp19797, i64 1
+  %tmp19799 = getelementptr inbounds float* %tmp19798, i64 1
+  %tmp19800 = getelementptr inbounds float* %tmp19799, i64 1
+  %tmp19801 = getelementptr inbounds float* %tmp19800, i64 1
+  %tmp19802 = getelementptr inbounds float* %tmp19801, i64 1
+  %tmp19803 = getelementptr inbounds float* %tmp19802, i64 1
+  %tmp19804 = getelementptr inbounds float* %tmp19803, i64 1
+  %tmp19805 = getelementptr inbounds float* %tmp19804, i64 1
+  %tmp19806 = getelementptr inbounds float* %tmp19805, i64 1
+  %tmp19807 = getelementptr inbounds float* %tmp19806, i64 1
+  %tmp19808 = getelementptr inbounds float* %tmp19807, i64 1
+  %tmp19809 = getelementptr inbounds float* %tmp19808, i64 1
+  %tmp19810 = getelementptr inbounds float* %tmp19809, i64 1
+  %tmp19811 = getelementptr inbounds float* %tmp19810, i64 1
+  %tmp19812 = getelementptr inbounds float* %tmp19811, i64 1
+  %tmp19813 = getelementptr inbounds float* %tmp19812, i64 1
+  %tmp19814 = getelementptr inbounds float* %tmp19813, i64 1
+  %tmp19815 = getelementptr inbounds float* %tmp19814, i64 1
+  %tmp19816 = getelementptr inbounds float* %tmp19815, i64 1
+  %tmp19817 = getelementptr inbounds float* %tmp19816, i64 1
+  %tmp19818 = getelementptr inbounds float* %tmp19817, i64 1
+  %tmp19819 = getelementptr inbounds float* %tmp19818, i64 1
+  %tmp19820 = getelementptr inbounds float* %tmp19819, i64 1
+  %tmp19821 = getelementptr inbounds float* %tmp19820, i64 1
+  %tmp19822 = getelementptr inbounds float* %tmp19821, i64 1
+  %tmp19823 = getelementptr inbounds float* %tmp19822, i64 1
+  %tmp19824 = getelementptr inbounds float* %tmp19823, i64 1
+  %tmp19825 = getelementptr inbounds float* %tmp19824, i64 1
+  %tmp19826 = getelementptr inbounds float* %tmp19825, i64 1
+  %tmp19827 = getelementptr inbounds float* %tmp19826, i64 1
+  %tmp19828 = getelementptr inbounds float* %tmp19827, i64 1
+  %tmp19829 = getelementptr inbounds float* %tmp19828, i64 1
+  %tmp19830 = getelementptr inbounds float* %tmp19829, i64 1
+  %tmp19831 = getelementptr inbounds float* %tmp19830, i64 1
+  %tmp19832 = getelementptr inbounds float* %tmp19831, i64 1
+  %tmp19833 = getelementptr inbounds float* %tmp19832, i64 1
+  %tmp19834 = getelementptr inbounds float* %tmp19833, i64 1
+  %tmp19835 = getelementptr inbounds float* %tmp19834, i64 1
+  %tmp19836 = getelementptr inbounds float* %tmp19835, i64 1
+  %tmp19837 = getelementptr inbounds float* %tmp19836, i64 1
+  %tmp19838 = getelementptr inbounds float* %tmp19837, i64 1
+  %tmp19839 = getelementptr inbounds float* %tmp19838, i64 1
+  %tmp19840 = getelementptr inbounds float* %tmp19839, i64 1
+  %tmp19841 = getelementptr inbounds float* %tmp19840, i64 1
+  %tmp19842 = getelementptr inbounds float* %tmp19841, i64 1
+  %tmp19843 = getelementptr inbounds float* %tmp19842, i64 1
+  %tmp19844 = getelementptr inbounds float* %tmp19843, i64 1
+  %tmp19845 = getelementptr inbounds float* %tmp19844, i64 1
+  %tmp19846 = getelementptr inbounds float* %tmp19845, i64 1
+  %tmp19847 = getelementptr inbounds float* %tmp19846, i64 1
+  %tmp19848 = getelementptr inbounds float* %tmp19847, i64 1
+  %tmp19849 = getelementptr inbounds float* %tmp19848, i64 1
+  %tmp19850 = getelementptr inbounds float* %tmp19849, i64 1
+  %tmp19851 = getelementptr inbounds float* %tmp19850, i64 1
+  %tmp19852 = getelementptr inbounds float* %tmp19851, i64 1
+  %tmp19853 = getelementptr inbounds float* %tmp19852, i64 1
+  %tmp19854 = getelementptr inbounds float* %tmp19853, i64 1
+  %tmp19855 = getelementptr inbounds float* %tmp19854, i64 1
+  %tmp19856 = getelementptr inbounds float* %tmp19855, i64 1
+  %tmp19857 = getelementptr inbounds float* %tmp19856, i64 1
+  %tmp19858 = getelementptr inbounds float* %tmp19857, i64 1
+  %tmp19859 = getelementptr inbounds float* %tmp19858, i64 1
+  %tmp19860 = getelementptr inbounds float* %tmp19859, i64 1
+  %tmp19861 = getelementptr inbounds float* %tmp19860, i64 1
+  %tmp19862 = getelementptr inbounds float* %tmp19861, i64 1
+  %tmp19863 = getelementptr inbounds float* %tmp19862, i64 1
+  %tmp19864 = getelementptr inbounds float* %tmp19863, i64 1
+  %tmp19865 = getelementptr inbounds float* %tmp19864, i64 1
+  %tmp19866 = getelementptr inbounds float* %tmp19865, i64 1
+  %tmp19867 = getelementptr inbounds float* %tmp19866, i64 1
+  %tmp19868 = getelementptr inbounds float* %tmp19867, i64 1
+  %tmp19869 = getelementptr inbounds float* %tmp19868, i64 1
+  %tmp19870 = getelementptr inbounds float* %tmp19869, i64 1
+  %tmp19871 = getelementptr inbounds float* %tmp19870, i64 1
+  %tmp19872 = getelementptr inbounds float* %tmp19871, i64 1
+  %tmp19873 = getelementptr inbounds float* %tmp19872, i64 1
+  %tmp19874 = getelementptr inbounds float* %tmp19873, i64 1
+  %tmp19875 = getelementptr inbounds float* %tmp19874, i64 1
+  %tmp19876 = getelementptr inbounds float* %tmp19875, i64 1
+  %tmp19877 = getelementptr inbounds float* %tmp19876, i64 1
+  %tmp19878 = getelementptr inbounds float* %tmp19877, i64 1
+  %tmp19879 = getelementptr inbounds float* %tmp19878, i64 1
+  %tmp19880 = getelementptr inbounds float* %tmp19879, i64 1
+  %tmp19881 = getelementptr inbounds float* %tmp19880, i64 1
+  %tmp19882 = getelementptr inbounds float* %tmp19881, i64 1
+  %tmp19883 = getelementptr inbounds float* %tmp19882, i64 1
+  %tmp19884 = getelementptr inbounds float* %tmp19883, i64 1
+  %tmp19885 = getelementptr inbounds float* %tmp19884, i64 1
+  %tmp19886 = getelementptr inbounds float* %tmp19885, i64 1
+  %tmp19887 = getelementptr inbounds float* %tmp19886, i64 1
+  %tmp19888 = getelementptr inbounds float* %tmp19887, i64 1
+  %tmp19889 = getelementptr inbounds float* %tmp19888, i64 1
+  %tmp19890 = getelementptr inbounds float* %tmp19889, i64 1
+  %tmp19891 = getelementptr inbounds float* %tmp19890, i64 1
+  %tmp19892 = getelementptr inbounds float* %tmp19891, i64 1
+  %tmp19893 = getelementptr inbounds float* %tmp19892, i64 1
+  %tmp19894 = getelementptr inbounds float* %tmp19893, i64 1
+  %tmp19895 = getelementptr inbounds float* %tmp19894, i64 1
+  %tmp19896 = getelementptr inbounds float* %tmp19895, i64 1
+  %tmp19897 = getelementptr inbounds float* %tmp19896, i64 1
+  %tmp19898 = getelementptr inbounds float* %tmp19897, i64 1
+  %tmp19899 = getelementptr inbounds float* %tmp19898, i64 1
+  %tmp19900 = getelementptr inbounds float* %tmp19899, i64 1
+  %tmp19901 = getelementptr inbounds float* %tmp19900, i64 1
+  %tmp19902 = getelementptr inbounds float* %tmp19901, i64 1
+  %tmp19903 = getelementptr inbounds float* %tmp19902, i64 1
+  %tmp19904 = getelementptr inbounds float* %tmp19903, i64 1
+  %tmp19905 = getelementptr inbounds float* %tmp19904, i64 1
+  %tmp19906 = getelementptr inbounds float* %tmp19905, i64 1
+  %tmp19907 = getelementptr inbounds float* %tmp19906, i64 1
+  %tmp19908 = getelementptr inbounds float* %tmp19907, i64 1
+  %tmp19909 = getelementptr inbounds float* %tmp19908, i64 1
+  %tmp19910 = getelementptr inbounds float* %tmp19909, i64 1
+  %tmp19911 = getelementptr inbounds float* %tmp19910, i64 1
+  %tmp19912 = getelementptr inbounds float* %tmp19911, i64 1
+  %tmp19913 = getelementptr inbounds float* %tmp19912, i64 1
+  %tmp19914 = getelementptr inbounds float* %tmp19913, i64 1
+  %tmp19915 = getelementptr inbounds float* %tmp19914, i64 1
+  %tmp19916 = getelementptr inbounds float* %tmp19915, i64 1
+  %tmp19917 = getelementptr inbounds float* %tmp19916, i64 1
+  %tmp19918 = getelementptr inbounds float* %tmp19917, i64 1
+  %tmp19919 = getelementptr inbounds float* %tmp19918, i64 1
+  %tmp19920 = getelementptr inbounds float* %tmp19919, i64 1
+  %tmp19921 = getelementptr inbounds float* %tmp19920, i64 1
+  %tmp19922 = getelementptr inbounds float* %tmp19921, i64 1
+  %tmp19923 = getelementptr inbounds float* %tmp19922, i64 1
+  %tmp19924 = getelementptr inbounds float* %tmp19923, i64 1
+  %tmp19925 = getelementptr inbounds float* %tmp19924, i64 1
+  %tmp19926 = getelementptr inbounds float* %tmp19925, i64 1
+  %tmp19927 = getelementptr inbounds float* %tmp19926, i64 1
+  %tmp19928 = getelementptr inbounds float* %tmp19927, i64 1
+  %tmp19929 = getelementptr inbounds float* %tmp19928, i64 1
+  %tmp19930 = getelementptr inbounds float* %tmp19929, i64 1
+  %tmp19931 = getelementptr inbounds float* %tmp19930, i64 1
+  %tmp19932 = getelementptr inbounds float* %tmp19931, i64 1
+  %tmp19933 = getelementptr inbounds float* %tmp19932, i64 1
+  %tmp19934 = getelementptr inbounds float* %tmp19933, i64 1
+  %tmp19935 = getelementptr inbounds float* %tmp19934, i64 1
+  %tmp19936 = getelementptr inbounds float* %tmp19935, i64 1
+  %tmp19937 = getelementptr inbounds float* %tmp19936, i64 1
+  %tmp19938 = getelementptr inbounds float* %tmp19937, i64 1
+  %tmp19939 = getelementptr inbounds float* %tmp19938, i64 1
+  %tmp19940 = getelementptr inbounds float* %tmp19939, i64 1
+  %tmp19941 = getelementptr inbounds float* %tmp19940, i64 1
+  %tmp19942 = getelementptr inbounds float* %tmp19941, i64 1
+  %tmp19943 = getelementptr inbounds float* %tmp19942, i64 1
+  %tmp19944 = getelementptr inbounds float* %tmp19943, i64 1
+  %tmp19945 = getelementptr inbounds float* %tmp19944, i64 1
+  %tmp19946 = getelementptr inbounds float* %tmp19945, i64 1
+  %tmp19947 = getelementptr inbounds float* %tmp19946, i64 1
+  %tmp19948 = getelementptr inbounds float* %tmp19947, i64 1
+  %tmp19949 = getelementptr inbounds float* %tmp19948, i64 1
+  %tmp19950 = getelementptr inbounds float* %tmp19949, i64 1
+  %tmp19951 = getelementptr inbounds float* %tmp19950, i64 1
+  %tmp19952 = getelementptr inbounds float* %tmp19951, i64 1
+  %tmp19953 = getelementptr inbounds float* %tmp19952, i64 1
+  %tmp19954 = getelementptr inbounds float* %tmp19953, i64 1
+  %tmp19955 = getelementptr inbounds float* %tmp19954, i64 1
+  %tmp19956 = getelementptr inbounds float* %tmp19955, i64 1
+  %tmp19957 = getelementptr inbounds float* %tmp19956, i64 1
+  %tmp19958 = getelementptr inbounds float* %tmp19957, i64 1
+  %tmp19959 = getelementptr inbounds float* %tmp19958, i64 1
+  %tmp19960 = getelementptr inbounds float* %tmp19959, i64 1
+  %tmp19961 = getelementptr inbounds float* %tmp19960, i64 1
+  %tmp19962 = getelementptr inbounds float* %tmp19961, i64 1
+  %tmp19963 = getelementptr inbounds float* %tmp19962, i64 1
+  %tmp19964 = getelementptr inbounds float* %tmp19963, i64 1
+  %tmp19965 = getelementptr inbounds float* %tmp19964, i64 1
+  %tmp19966 = getelementptr inbounds float* %tmp19965, i64 1
+  %tmp19967 = getelementptr inbounds float* %tmp19966, i64 1
+  %tmp19968 = getelementptr inbounds float* %tmp19967, i64 1
+  %tmp19969 = getelementptr inbounds float* %tmp19968, i64 1
+  %tmp19970 = getelementptr inbounds float* %tmp19969, i64 1
+  %tmp19971 = getelementptr inbounds float* %tmp19970, i64 1
+  %tmp19972 = getelementptr inbounds float* %tmp19971, i64 1
+  %tmp19973 = getelementptr inbounds float* %tmp19972, i64 1
+  %tmp19974 = getelementptr inbounds float* %tmp19973, i64 1
+  %tmp19975 = getelementptr inbounds float* %tmp19974, i64 1
+  %tmp19976 = getelementptr inbounds float* %tmp19975, i64 1
+  %tmp19977 = getelementptr inbounds float* %tmp19976, i64 1
+  %tmp19978 = getelementptr inbounds float* %tmp19977, i64 1
+  %tmp19979 = getelementptr inbounds float* %tmp19978, i64 1
+  %tmp19980 = getelementptr inbounds float* %tmp19979, i64 1
+  %tmp19981 = getelementptr inbounds float* %tmp19980, i64 1
+  %tmp19982 = getelementptr inbounds float* %tmp19981, i64 1
+  %tmp19983 = getelementptr inbounds float* %tmp19982, i64 1
+  %tmp19984 = getelementptr inbounds float* %tmp19983, i64 1
+  %tmp19985 = getelementptr inbounds float* %tmp19984, i64 1
+  %tmp19986 = getelementptr inbounds float* %tmp19985, i64 1
+  %tmp19987 = getelementptr inbounds float* %tmp19986, i64 1
+  %tmp19988 = getelementptr inbounds float* %tmp19987, i64 1
+  %tmp19989 = getelementptr inbounds float* %tmp19988, i64 1
+  %tmp19990 = getelementptr inbounds float* %tmp19989, i64 1
+  %tmp19991 = getelementptr inbounds float* %tmp19990, i64 1
+  %tmp19992 = getelementptr inbounds float* %tmp19991, i64 1
+  %tmp19993 = getelementptr inbounds float* %tmp19992, i64 1
+  %tmp19994 = getelementptr inbounds float* %tmp19993, i64 1
+  %tmp19995 = getelementptr inbounds float* %tmp19994, i64 1
+  %tmp19996 = getelementptr inbounds float* %tmp19995, i64 1
+  %tmp19997 = getelementptr inbounds float* %tmp19996, i64 1
+  %tmp19998 = getelementptr inbounds float* %tmp19997, i64 1
+  %tmp19999 = getelementptr inbounds float* %tmp19998, i64 1
+  %tmp20000 = getelementptr inbounds float* %tmp19999, i64 1
+  %tmp20001 = getelementptr inbounds float* %tmp20000, i64 1
+  %tmp20002 = getelementptr inbounds float* %tmp20001, i64 1
+  %tmp20003 = getelementptr inbounds float* %tmp20002, i64 1
+  %tmp20004 = getelementptr inbounds float* %tmp20003, i64 1
+  %tmp20005 = getelementptr inbounds float* %tmp20004, i64 1
+  %tmp20006 = getelementptr inbounds float* %tmp20005, i64 1
+  %tmp20007 = getelementptr inbounds float* %tmp20006, i64 1
+  %tmp20008 = getelementptr inbounds float* %tmp20007, i64 1
+  %tmp20009 = getelementptr inbounds float* %tmp20008, i64 1
+  %tmp20010 = getelementptr inbounds float* %tmp20009, i64 1
+  %tmp20011 = getelementptr inbounds float* %tmp20010, i64 1
+  %tmp20012 = getelementptr inbounds float* %tmp20011, i64 1
+  %tmp20013 = getelementptr inbounds float* %tmp20012, i64 1
+  %tmp20014 = getelementptr inbounds float* %tmp20013, i64 1
+  %tmp20015 = getelementptr inbounds float* %tmp20014, i64 1
+  %tmp20016 = getelementptr inbounds float* %tmp20015, i64 1
+  %tmp20017 = getelementptr inbounds float* %tmp20016, i64 1
+  %tmp20018 = getelementptr inbounds float* %tmp20017, i64 1
+  %tmp20019 = getelementptr inbounds float* %tmp20018, i64 1
+  %tmp20020 = getelementptr inbounds float* %tmp20019, i64 1
+  %tmp20021 = getelementptr inbounds float* %tmp20020, i64 1
+  %tmp20022 = getelementptr inbounds float* %tmp20021, i64 1
+  %tmp20023 = getelementptr inbounds float* %tmp20022, i64 1
+  %tmp20024 = getelementptr inbounds float* %tmp20023, i64 1
+  %tmp20025 = getelementptr inbounds float* %tmp20024, i64 1
+  %tmp20026 = getelementptr inbounds float* %tmp20025, i64 1
+  %tmp20027 = getelementptr inbounds float* %tmp20026, i64 1
+  %tmp20028 = getelementptr inbounds float* %tmp20027, i64 1
+  %tmp20029 = getelementptr inbounds float* %tmp20028, i64 1
+  %tmp20030 = getelementptr inbounds float* %tmp20029, i64 1
+  %tmp20031 = getelementptr inbounds float* %tmp20030, i64 1
+  %tmp20032 = getelementptr inbounds float* %tmp20031, i64 1
+  %tmp20033 = getelementptr inbounds float* %tmp20032, i64 1
+  %tmp20034 = getelementptr inbounds float* %tmp20033, i64 1
+  %tmp20035 = getelementptr inbounds float* %tmp20034, i64 1
+  %tmp20036 = getelementptr inbounds float* %tmp20035, i64 1
+  %tmp20037 = getelementptr inbounds float* %tmp20036, i64 1
+  %tmp20038 = getelementptr inbounds float* %tmp20037, i64 1
+  %tmp20039 = getelementptr inbounds float* %tmp20038, i64 1
+  %tmp20040 = getelementptr inbounds float* %tmp20039, i64 1
+  %tmp20041 = getelementptr inbounds float* %tmp20040, i64 1
+  %tmp20042 = getelementptr inbounds float* %tmp20041, i64 1
+  %tmp20043 = getelementptr inbounds float* %tmp20042, i64 1
+  %tmp20044 = getelementptr inbounds float* %tmp20043, i64 1
+  %tmp20045 = getelementptr inbounds float* %tmp20044, i64 1
+  %tmp20046 = getelementptr inbounds float* %tmp20045, i64 1
+  %tmp20047 = getelementptr inbounds float* %tmp20046, i64 1
+  %tmp20048 = getelementptr inbounds float* %tmp20047, i64 1
+  %tmp20049 = getelementptr inbounds float* %tmp20048, i64 1
+  %tmp20050 = getelementptr inbounds float* %tmp20049, i64 1
+  %tmp20051 = getelementptr inbounds float* %tmp20050, i64 1
+  %tmp20052 = getelementptr inbounds float* %tmp20051, i64 1
+  %tmp20053 = getelementptr inbounds float* %tmp20052, i64 1
+  %tmp20054 = getelementptr inbounds float* %tmp20053, i64 1
+  %tmp20055 = getelementptr inbounds float* %tmp20054, i64 1
+  %tmp20056 = getelementptr inbounds float* %tmp20055, i64 1
+  %tmp20057 = getelementptr inbounds float* %tmp20056, i64 1
+  %tmp20058 = getelementptr inbounds float* %tmp20057, i64 1
+  %tmp20059 = getelementptr inbounds float* %tmp20058, i64 1
+  %tmp20060 = getelementptr inbounds float* %tmp20059, i64 1
+  %tmp20061 = getelementptr inbounds float* %tmp20060, i64 1
+  %tmp20062 = getelementptr inbounds float* %tmp20061, i64 1
+  %tmp20063 = getelementptr inbounds float* %tmp20062, i64 1
+  %tmp20064 = getelementptr inbounds float* %tmp20063, i64 1
+  %tmp20065 = getelementptr inbounds float* %tmp20064, i64 1
+  %tmp20066 = getelementptr inbounds float* %tmp20065, i64 1
+  %tmp20067 = getelementptr inbounds float* %tmp20066, i64 1
+  %tmp20068 = getelementptr inbounds float* %tmp20067, i64 1
+  %tmp20069 = getelementptr inbounds float* %tmp20068, i64 1
+  %tmp20070 = getelementptr inbounds float* %tmp20069, i64 1
+  %tmp20071 = getelementptr inbounds float* %tmp20070, i64 1
+  %tmp20072 = getelementptr inbounds float* %tmp20071, i64 1
+  %tmp20073 = getelementptr inbounds float* %tmp20072, i64 1
+  %tmp20074 = getelementptr inbounds float* %tmp20073, i64 1
+  %tmp20075 = getelementptr inbounds float* %tmp20074, i64 1
+  %tmp20076 = getelementptr inbounds float* %tmp20075, i64 1
+  %tmp20077 = getelementptr inbounds float* %tmp20076, i64 1
+  %tmp20078 = getelementptr inbounds float* %tmp20077, i64 1
+  %tmp20079 = getelementptr inbounds float* %tmp20078, i64 1
+  %tmp20080 = getelementptr inbounds float* %tmp20079, i64 1
+  %tmp20081 = getelementptr inbounds float* %tmp20080, i64 1
+  %tmp20082 = getelementptr inbounds float* %tmp20081, i64 1
+  %tmp20083 = getelementptr inbounds float* %tmp20082, i64 1
+  %tmp20084 = getelementptr inbounds float* %tmp20083, i64 1
+  %tmp20085 = getelementptr inbounds float* %tmp20084, i64 1
+  %tmp20086 = getelementptr inbounds float* %tmp20085, i64 1
+  %tmp20087 = getelementptr inbounds float* %tmp20086, i64 1
+  %tmp20088 = getelementptr inbounds float* %tmp20087, i64 1
+  %tmp20089 = getelementptr inbounds float* %tmp20088, i64 1
+  %tmp20090 = getelementptr inbounds float* %tmp20089, i64 1
+  %tmp20091 = getelementptr inbounds float* %tmp20090, i64 1
+  %tmp20092 = getelementptr inbounds float* %tmp20091, i64 1
+  %tmp20093 = getelementptr inbounds float* %tmp20092, i64 1
+  %tmp20094 = getelementptr inbounds float* %tmp20093, i64 1
+  %tmp20095 = getelementptr inbounds float* %tmp20094, i64 1
+  %tmp20096 = getelementptr inbounds float* %tmp20095, i64 1
+  %tmp20097 = getelementptr inbounds float* %tmp20096, i64 1
+  %tmp20098 = getelementptr inbounds float* %tmp20097, i64 1
+  %tmp20099 = getelementptr inbounds float* %tmp20098, i64 1
+  %tmp20100 = getelementptr inbounds float* %tmp20099, i64 1
+  %tmp20101 = getelementptr inbounds float* %tmp20100, i64 1
+  %tmp20102 = getelementptr inbounds float* %tmp20101, i64 1
+  %tmp20103 = getelementptr inbounds float* %tmp20102, i64 1
+  %tmp20104 = getelementptr inbounds float* %tmp20103, i64 1
+  %tmp20105 = getelementptr inbounds float* %tmp20104, i64 1
+  %tmp20106 = getelementptr inbounds float* %tmp20105, i64 1
+  %tmp20107 = getelementptr inbounds float* %tmp20106, i64 1
+  %tmp20108 = getelementptr inbounds float* %tmp20107, i64 1
+  %tmp20109 = getelementptr inbounds float* %tmp20108, i64 1
+  %tmp20110 = getelementptr inbounds float* %tmp20109, i64 1
+  %tmp20111 = getelementptr inbounds float* %tmp20110, i64 1
+  %tmp20112 = getelementptr inbounds float* %tmp20111, i64 1
+  %tmp20113 = getelementptr inbounds float* %tmp20112, i64 1
+  %tmp20114 = getelementptr inbounds float* %tmp20113, i64 1
+  %tmp20115 = getelementptr inbounds float* %tmp20114, i64 1
+  %tmp20116 = getelementptr inbounds float* %tmp20115, i64 1
+  %tmp20117 = getelementptr inbounds float* %tmp20116, i64 1
+  %tmp20118 = getelementptr inbounds float* %tmp20117, i64 1
+  %tmp20119 = getelementptr inbounds float* %tmp20118, i64 1
+  %tmp20120 = getelementptr inbounds float* %tmp20119, i64 1
+  %tmp20121 = getelementptr inbounds float* %tmp20120, i64 1
+  %tmp20122 = getelementptr inbounds float* %tmp20121, i64 1
+  %tmp20123 = getelementptr inbounds float* %tmp20122, i64 1
+  %tmp20124 = getelementptr inbounds float* %tmp20123, i64 1
+  %tmp20125 = getelementptr inbounds float* %tmp20124, i64 1
+  %tmp20126 = getelementptr inbounds float* %tmp20125, i64 1
+  %tmp20127 = getelementptr inbounds float* %tmp20126, i64 1
+  %tmp20128 = getelementptr inbounds float* %tmp20127, i64 1
+  %tmp20129 = getelementptr inbounds float* %tmp20128, i64 1
+  %tmp20130 = getelementptr inbounds float* %tmp20129, i64 1
+  %tmp20131 = getelementptr inbounds float* %tmp20130, i64 1
+  %tmp20132 = getelementptr inbounds float* %tmp20131, i64 1
+  %tmp20133 = getelementptr inbounds float* %tmp20132, i64 1
+  %tmp20134 = getelementptr inbounds float* %tmp20133, i64 1
+  %tmp20135 = getelementptr inbounds float* %tmp20134, i64 1
+  %tmp20136 = getelementptr inbounds float* %tmp20135, i64 1
+  %tmp20137 = getelementptr inbounds float* %tmp20136, i64 1
+  %tmp20138 = getelementptr inbounds float* %tmp20137, i64 1
+  %tmp20139 = getelementptr inbounds float* %tmp20138, i64 1
+  %tmp20140 = getelementptr inbounds float* %tmp20139, i64 1
+  %tmp20141 = getelementptr inbounds float* %tmp20140, i64 1
+  %tmp20142 = getelementptr inbounds float* %tmp20141, i64 1
+  %tmp20143 = getelementptr inbounds float* %tmp20142, i64 1
+  %tmp20144 = getelementptr inbounds float* %tmp20143, i64 1
+  %tmp20145 = getelementptr inbounds float* %tmp20144, i64 1
+  %tmp20146 = getelementptr inbounds float* %tmp20145, i64 1
+  %tmp20147 = getelementptr inbounds float* %tmp20146, i64 1
+  %tmp20148 = getelementptr inbounds float* %tmp20147, i64 1
+  %tmp20149 = getelementptr inbounds float* %tmp20148, i64 1
+  %tmp20150 = getelementptr inbounds float* %tmp20149, i64 1
+  %tmp20151 = getelementptr inbounds float* %tmp20150, i64 1
+  %tmp20152 = getelementptr inbounds float* %tmp20151, i64 1
+  %tmp20153 = getelementptr inbounds float* %tmp20152, i64 1
+  %tmp20154 = getelementptr inbounds float* %tmp20153, i64 1
+  %tmp20155 = getelementptr inbounds float* %tmp20154, i64 1
+  %tmp20156 = getelementptr inbounds float* %tmp20155, i64 1
+  %tmp20157 = getelementptr inbounds float* %tmp20156, i64 1
+  %tmp20158 = getelementptr inbounds float* %tmp20157, i64 1
+  %tmp20159 = getelementptr inbounds float* %tmp20158, i64 1
+  %tmp20160 = getelementptr inbounds float* %tmp20159, i64 1
+  %tmp20161 = getelementptr inbounds float* %tmp20160, i64 1
+  %tmp20162 = getelementptr inbounds float* %tmp20161, i64 1
+  %tmp20163 = getelementptr inbounds float* %tmp20162, i64 1
+  %tmp20164 = getelementptr inbounds float* %tmp20163, i64 1
+  %tmp20165 = getelementptr inbounds float* %tmp20164, i64 1
+  %tmp20166 = getelementptr inbounds float* %tmp20165, i64 1
+  %tmp20167 = getelementptr inbounds float* %tmp20166, i64 1
+  %tmp20168 = getelementptr inbounds float* %tmp20167, i64 1
+  %tmp20169 = getelementptr inbounds float* %tmp20168, i64 1
+  %tmp20170 = getelementptr inbounds float* %tmp20169, i64 1
+  %tmp20171 = getelementptr inbounds float* %tmp20170, i64 1
+  %tmp20172 = getelementptr inbounds float* %tmp20171, i64 1
+  %tmp20173 = getelementptr inbounds float* %tmp20172, i64 1
+  %tmp20174 = getelementptr inbounds float* %tmp20173, i64 1
+  %tmp20175 = getelementptr inbounds float* %tmp20174, i64 1
+  %tmp20176 = getelementptr inbounds float* %tmp20175, i64 1
+  %tmp20177 = getelementptr inbounds float* %tmp20176, i64 1
+  %tmp20178 = getelementptr inbounds float* %tmp20177, i64 1
+  %tmp20179 = getelementptr inbounds float* %tmp20178, i64 1
+  %tmp20180 = getelementptr inbounds float* %tmp20179, i64 1
+  %tmp20181 = getelementptr inbounds float* %tmp20180, i64 1
+  %tmp20182 = getelementptr inbounds float* %tmp20181, i64 1
+  %tmp20183 = getelementptr inbounds float* %tmp20182, i64 1
+  %tmp20184 = getelementptr inbounds float* %tmp20183, i64 1
+  %tmp20185 = getelementptr inbounds float* %tmp20184, i64 1
+  %tmp20186 = getelementptr inbounds float* %tmp20185, i64 1
+  %tmp20187 = getelementptr inbounds float* %tmp20186, i64 1
+  %tmp20188 = getelementptr inbounds float* %tmp20187, i64 1
+  %tmp20189 = getelementptr inbounds float* %tmp20188, i64 1
+  %tmp20190 = getelementptr inbounds float* %tmp20189, i64 1
+  %tmp20191 = getelementptr inbounds float* %tmp20190, i64 1
+  %tmp20192 = getelementptr inbounds float* %tmp20191, i64 1
+  %tmp20193 = getelementptr inbounds float* %tmp20192, i64 1
+  %tmp20194 = getelementptr inbounds float* %tmp20193, i64 1
+  %tmp20195 = getelementptr inbounds float* %tmp20194, i64 1
+  %tmp20196 = getelementptr inbounds float* %tmp20195, i64 1
+  %tmp20197 = getelementptr inbounds float* %tmp20196, i64 1
+  %tmp20198 = getelementptr inbounds float* %tmp20197, i64 1
+  %tmp20199 = getelementptr inbounds float* %tmp20198, i64 1
+  %tmp20200 = getelementptr inbounds float* %tmp20199, i64 1
+  %tmp20201 = getelementptr inbounds float* %tmp20200, i64 1
+  %tmp20202 = getelementptr inbounds float* %tmp20201, i64 1
+  %tmp20203 = getelementptr inbounds float* %tmp20202, i64 1
+  %tmp20204 = getelementptr inbounds float* %tmp20203, i64 1
+  %tmp20205 = getelementptr inbounds float* %tmp20204, i64 1
+  %tmp20206 = getelementptr inbounds float* %tmp20205, i64 1
+  %tmp20207 = getelementptr inbounds float* %tmp20206, i64 1
+  %tmp20208 = getelementptr inbounds float* %tmp20207, i64 1
+  %tmp20209 = getelementptr inbounds float* %tmp20208, i64 1
+  %tmp20210 = getelementptr inbounds float* %tmp20209, i64 1
+  %tmp20211 = getelementptr inbounds float* %tmp20210, i64 1
+  %tmp20212 = getelementptr inbounds float* %tmp20211, i64 1
+  %tmp20213 = getelementptr inbounds float* %tmp20212, i64 1
+  %tmp20214 = getelementptr inbounds float* %tmp20213, i64 1
+  %tmp20215 = getelementptr inbounds float* %tmp20214, i64 1
+  %tmp20216 = getelementptr inbounds float* %tmp20215, i64 1
+  %tmp20217 = getelementptr inbounds float* %tmp20216, i64 1
+  %tmp20218 = getelementptr inbounds float* %tmp20217, i64 1
+  %tmp20219 = getelementptr inbounds float* %tmp20218, i64 1
+  %tmp20220 = getelementptr inbounds float* %tmp20219, i64 1
+  %tmp20221 = getelementptr inbounds float* %tmp20220, i64 1
+  %tmp20222 = getelementptr inbounds float* %tmp20221, i64 1
+  %tmp20223 = getelementptr inbounds float* %tmp20222, i64 1
+  %tmp20224 = getelementptr inbounds float* %tmp20223, i64 1
+  %tmp20225 = getelementptr inbounds float* %tmp20224, i64 1
+  %tmp20226 = getelementptr inbounds float* %tmp20225, i64 1
+  %tmp20227 = getelementptr inbounds float* %tmp20226, i64 1
+  %tmp20228 = getelementptr inbounds float* %tmp20227, i64 1
+  %tmp20229 = getelementptr inbounds float* %tmp20228, i64 1
+  %tmp20230 = getelementptr inbounds float* %tmp20229, i64 1
+  %tmp20231 = getelementptr inbounds float* %tmp20230, i64 1
+  %tmp20232 = getelementptr inbounds float* %tmp20231, i64 1
+  %tmp20233 = getelementptr inbounds float* %tmp20232, i64 1
+  %tmp20234 = getelementptr inbounds float* %tmp20233, i64 1
+  %tmp20235 = getelementptr inbounds float* %tmp20234, i64 1
+  %tmp20236 = getelementptr inbounds float* %tmp20235, i64 1
+  %tmp20237 = getelementptr inbounds float* %tmp20236, i64 1
+  %tmp20238 = getelementptr inbounds float* %tmp20237, i64 1
+  %tmp20239 = getelementptr inbounds float* %tmp20238, i64 1
+  %tmp20240 = getelementptr inbounds float* %tmp20239, i64 1
+  %tmp20241 = getelementptr inbounds float* %tmp20240, i64 1
+  %tmp20242 = getelementptr inbounds float* %tmp20241, i64 1
+  %tmp20243 = getelementptr inbounds float* %tmp20242, i64 1
+  %tmp20244 = getelementptr inbounds float* %tmp20243, i64 1
+  %tmp20245 = getelementptr inbounds float* %tmp20244, i64 1
+  %tmp20246 = getelementptr inbounds float* %tmp20245, i64 1
+  %tmp20247 = getelementptr inbounds float* %tmp20246, i64 1
+  %tmp20248 = getelementptr inbounds float* %tmp20247, i64 1
+  %tmp20249 = getelementptr inbounds float* %tmp20248, i64 1
+  %tmp20250 = getelementptr inbounds float* %tmp20249, i64 1
+  %tmp20251 = getelementptr inbounds float* %tmp20250, i64 1
+  %tmp20252 = getelementptr inbounds float* %tmp20251, i64 1
+  %tmp20253 = getelementptr inbounds float* %tmp20252, i64 1
+  %tmp20254 = getelementptr inbounds float* %tmp20253, i64 1
+  %tmp20255 = getelementptr inbounds float* %tmp20254, i64 1
+  %tmp20256 = getelementptr inbounds float* %tmp20255, i64 1
+  %tmp20257 = getelementptr inbounds float* %tmp20256, i64 1
+  %tmp20258 = getelementptr inbounds float* %tmp20257, i64 1
+  %tmp20259 = getelementptr inbounds float* %tmp20258, i64 1
+  %tmp20260 = getelementptr inbounds float* %tmp20259, i64 1
+  %tmp20261 = getelementptr inbounds float* %tmp20260, i64 1
+  %tmp20262 = getelementptr inbounds float* %tmp20261, i64 1
+  %tmp20263 = getelementptr inbounds float* %tmp20262, i64 1
+  %tmp20264 = getelementptr inbounds float* %tmp20263, i64 1
+  %tmp20265 = getelementptr inbounds float* %tmp20264, i64 1
+  %tmp20266 = getelementptr inbounds float* %tmp20265, i64 1
+  %tmp20267 = getelementptr inbounds float* %tmp20266, i64 1
+  %tmp20268 = getelementptr inbounds float* %tmp20267, i64 1
+  %tmp20269 = getelementptr inbounds float* %tmp20268, i64 1
+  %tmp20270 = getelementptr inbounds float* %tmp20269, i64 1
+  %tmp20271 = getelementptr inbounds float* %tmp20270, i64 1
+  %tmp20272 = getelementptr inbounds float* %tmp20271, i64 1
+  %tmp20273 = getelementptr inbounds float* %tmp20272, i64 1
+  %tmp20274 = getelementptr inbounds float* %tmp20273, i64 1
+  %tmp20275 = getelementptr inbounds float* %tmp20274, i64 1
+  %tmp20276 = getelementptr inbounds float* %tmp20275, i64 1
+  %tmp20277 = getelementptr inbounds float* %tmp20276, i64 1
+  %tmp20278 = getelementptr inbounds float* %tmp20277, i64 1
+  %tmp20279 = getelementptr inbounds float* %tmp20278, i64 1
+  %tmp20280 = getelementptr inbounds float* %tmp20279, i64 1
+  %tmp20281 = getelementptr inbounds float* %tmp20280, i64 1
+  %tmp20282 = getelementptr inbounds float* %tmp20281, i64 1
+  %tmp20283 = getelementptr inbounds float* %tmp20282, i64 1
+  %tmp20284 = getelementptr inbounds float* %tmp20283, i64 1
+  %tmp20285 = getelementptr inbounds float* %tmp20284, i64 1
+  %tmp20286 = getelementptr inbounds float* %tmp20285, i64 1
+  %tmp20287 = getelementptr inbounds float* %tmp20286, i64 1
+  %tmp20288 = getelementptr inbounds float* %tmp20287, i64 1
+  %tmp20289 = getelementptr inbounds float* %tmp20288, i64 1
+  %tmp20290 = getelementptr inbounds float* %tmp20289, i64 1
+  %tmp20291 = getelementptr inbounds float* %tmp20290, i64 1
+  %tmp20292 = getelementptr inbounds float* %tmp20291, i64 1
+  %tmp20293 = getelementptr inbounds float* %tmp20292, i64 1
+  %tmp20294 = getelementptr inbounds float* %tmp20293, i64 1
+  %tmp20295 = getelementptr inbounds float* %tmp20294, i64 1
+  %tmp20296 = getelementptr inbounds float* %tmp20295, i64 1
+  %tmp20297 = getelementptr inbounds float* %tmp20296, i64 1
+  %tmp20298 = getelementptr inbounds float* %tmp20297, i64 1
+  %tmp20299 = getelementptr inbounds float* %tmp20298, i64 1
+  %tmp20300 = getelementptr inbounds float* %tmp20299, i64 1
+  %tmp20301 = getelementptr inbounds float* %tmp20300, i64 1
+  %tmp20302 = getelementptr inbounds float* %tmp20301, i64 1
+  %tmp20303 = getelementptr inbounds float* %tmp20302, i64 1
+  %tmp20304 = getelementptr inbounds float* %tmp20303, i64 1
+  %tmp20305 = getelementptr inbounds float* %tmp20304, i64 1
+  %tmp20306 = getelementptr inbounds float* %tmp20305, i64 1
+  %tmp20307 = getelementptr inbounds float* %tmp20306, i64 1
+  %tmp20308 = getelementptr inbounds float* %tmp20307, i64 1
+  %tmp20309 = getelementptr inbounds float* %tmp20308, i64 1
+  %tmp20310 = getelementptr inbounds float* %tmp20309, i64 1
+  %tmp20311 = getelementptr inbounds float* %tmp20310, i64 1
+  %tmp20312 = getelementptr inbounds float* %tmp20311, i64 1
+  %tmp20313 = getelementptr inbounds float* %tmp20312, i64 1
+  %tmp20314 = getelementptr inbounds float* %tmp20313, i64 1
+  %tmp20315 = getelementptr inbounds float* %tmp20314, i64 1
+  %tmp20316 = getelementptr inbounds float* %tmp20315, i64 1
+  %tmp20317 = getelementptr inbounds float* %tmp20316, i64 1
+  %tmp20318 = getelementptr inbounds float* %tmp20317, i64 1
+  %tmp20319 = getelementptr inbounds float* %tmp20318, i64 1
+  %tmp20320 = getelementptr inbounds float* %tmp20319, i64 1
+  %tmp20321 = getelementptr inbounds float* %tmp20320, i64 1
+  %tmp20322 = getelementptr inbounds float* %tmp20321, i64 1
+  %tmp20323 = getelementptr inbounds float* %tmp20322, i64 1
+  %tmp20324 = getelementptr inbounds float* %tmp20323, i64 1
+  %tmp20325 = getelementptr inbounds float* %tmp20324, i64 1
+  %tmp20326 = getelementptr inbounds float* %tmp20325, i64 1
+  %tmp20327 = getelementptr inbounds float* %tmp20326, i64 1
+  %tmp20328 = getelementptr inbounds float* %tmp20327, i64 1
+  %tmp20329 = getelementptr inbounds float* %tmp20328, i64 1
+  %tmp20330 = getelementptr inbounds float* %tmp20329, i64 1
+  %tmp20331 = getelementptr inbounds float* %tmp20330, i64 1
+  %tmp20332 = getelementptr inbounds float* %tmp20331, i64 1
+  %tmp20333 = getelementptr inbounds float* %tmp20332, i64 1
+  %tmp20334 = getelementptr inbounds float* %tmp20333, i64 1
+  %tmp20335 = getelementptr inbounds float* %tmp20334, i64 1
+  %tmp20336 = getelementptr inbounds float* %tmp20335, i64 1
+  %tmp20337 = getelementptr inbounds float* %tmp20336, i64 1
+  %tmp20338 = getelementptr inbounds float* %tmp20337, i64 1
+  %tmp20339 = getelementptr inbounds float* %tmp20338, i64 1
+  %tmp20340 = getelementptr inbounds float* %tmp20339, i64 1
+  %tmp20341 = getelementptr inbounds float* %tmp20340, i64 1
+  %tmp20342 = getelementptr inbounds float* %tmp20341, i64 1
+  %tmp20343 = getelementptr inbounds float* %tmp20342, i64 1
+  %tmp20344 = getelementptr inbounds float* %tmp20343, i64 1
+  %tmp20345 = getelementptr inbounds float* %tmp20344, i64 1
+  %tmp20346 = getelementptr inbounds float* %tmp20345, i64 1
+  %tmp20347 = getelementptr inbounds float* %tmp20346, i64 1
+  %tmp20348 = getelementptr inbounds float* %tmp20347, i64 1
+  %tmp20349 = getelementptr inbounds float* %tmp20348, i64 1
+  %tmp20350 = getelementptr inbounds float* %tmp20349, i64 1
+  %tmp20351 = getelementptr inbounds float* %tmp20350, i64 1
+  %tmp20352 = getelementptr inbounds float* %tmp20351, i64 1
+  %tmp20353 = getelementptr inbounds float* %tmp20352, i64 1
+  %tmp20354 = getelementptr inbounds float* %tmp20353, i64 1
+  %tmp20355 = getelementptr inbounds float* %tmp20354, i64 1
+  %tmp20356 = getelementptr inbounds float* %tmp20355, i64 1
+  %tmp20357 = getelementptr inbounds float* %tmp20356, i64 1
+  %tmp20358 = getelementptr inbounds float* %tmp20357, i64 1
+  %tmp20359 = getelementptr inbounds float* %tmp20358, i64 1
+  %tmp20360 = getelementptr inbounds float* %tmp20359, i64 1
+  %tmp20361 = getelementptr inbounds float* %tmp20360, i64 1
+  %tmp20362 = getelementptr inbounds float* %tmp20361, i64 1
+  %tmp20363 = getelementptr inbounds float* %tmp20362, i64 1
+  %tmp20364 = getelementptr inbounds float* %tmp20363, i64 1
+  %tmp20365 = getelementptr inbounds float* %tmp20364, i64 1
+  %tmp20366 = getelementptr inbounds float* %tmp20365, i64 1
+  %tmp20367 = getelementptr inbounds float* %tmp20366, i64 1
+  %tmp20368 = getelementptr inbounds float* %tmp20367, i64 1
+  %tmp20369 = getelementptr inbounds float* %tmp20368, i64 1
+  %tmp20370 = getelementptr inbounds float* %tmp20369, i64 1
+  %tmp20371 = getelementptr inbounds float* %tmp20370, i64 1
+  %tmp20372 = getelementptr inbounds float* %tmp20371, i64 1
+  %tmp20373 = getelementptr inbounds float* %tmp20372, i64 1
+  %tmp20374 = getelementptr inbounds float* %tmp20373, i64 1
+  %tmp20375 = getelementptr inbounds float* %tmp20374, i64 1
+  %tmp20376 = getelementptr inbounds float* %tmp20375, i64 1
+  %tmp20377 = getelementptr inbounds float* %tmp20376, i64 1
+  %tmp20378 = getelementptr inbounds float* %tmp20377, i64 1
+  %tmp20379 = getelementptr inbounds float* %tmp20378, i64 1
+  %tmp20380 = getelementptr inbounds float* %tmp20379, i64 1
+  %tmp20381 = getelementptr inbounds float* %tmp20380, i64 1
+  %tmp20382 = getelementptr inbounds float* %tmp20381, i64 1
+  %tmp20383 = getelementptr inbounds float* %tmp20382, i64 1
+  %tmp20384 = getelementptr inbounds float* %tmp20383, i64 1
+  %tmp20385 = getelementptr inbounds float* %tmp20384, i64 1
+  %tmp20386 = getelementptr inbounds float* %tmp20385, i64 1
+  %tmp20387 = getelementptr inbounds float* %tmp20386, i64 1
+  %tmp20388 = getelementptr inbounds float* %tmp20387, i64 1
+  %tmp20389 = getelementptr inbounds float* %tmp20388, i64 1
+  %tmp20390 = getelementptr inbounds float* %tmp20389, i64 1
+  %tmp20391 = getelementptr inbounds float* %tmp20390, i64 1
+  %tmp20392 = getelementptr inbounds float* %tmp20391, i64 1
+  %tmp20393 = getelementptr inbounds float* %tmp20392, i64 1
+  %tmp20394 = getelementptr inbounds float* %tmp20393, i64 1
+  %tmp20395 = getelementptr inbounds float* %tmp20394, i64 1
+  %tmp20396 = getelementptr inbounds float* %tmp20395, i64 1
+  %tmp20397 = getelementptr inbounds float* %tmp20396, i64 1
+  %tmp20398 = getelementptr inbounds float* %tmp20397, i64 1
+  %tmp20399 = getelementptr inbounds float* %tmp20398, i64 1
+  %tmp20400 = getelementptr inbounds float* %tmp20399, i64 1
+  %tmp20401 = getelementptr inbounds float* %tmp20400, i64 1
+  %tmp20402 = getelementptr inbounds float* %tmp20401, i64 1
+  %tmp20403 = getelementptr inbounds float* %tmp20402, i64 1
+  %tmp20404 = getelementptr inbounds float* %tmp20403, i64 1
+  %tmp20405 = getelementptr inbounds float* %tmp20404, i64 1
+  %tmp20406 = getelementptr inbounds float* %tmp20405, i64 1
+  %tmp20407 = getelementptr inbounds float* %tmp20406, i64 1
+  %tmp20408 = getelementptr inbounds float* %tmp20407, i64 1
+  %tmp20409 = getelementptr inbounds float* %tmp20408, i64 1
+  %tmp20410 = getelementptr inbounds float* %tmp20409, i64 1
+  %tmp20411 = getelementptr inbounds float* %tmp20410, i64 1
+  %tmp20412 = getelementptr inbounds float* %tmp20411, i64 1
+  %tmp20413 = getelementptr inbounds float* %tmp20412, i64 1
+  %tmp20414 = getelementptr inbounds float* %tmp20413, i64 1
+  %tmp20415 = getelementptr inbounds float* %tmp20414, i64 1
+  %tmp20416 = getelementptr inbounds float* %tmp20415, i64 1
+  %tmp20417 = getelementptr inbounds float* %tmp20416, i64 1
+  %tmp20418 = getelementptr inbounds float* %tmp20417, i64 1
+  %tmp20419 = getelementptr inbounds float* %tmp20418, i64 1
+  %tmp20420 = getelementptr inbounds float* %tmp20419, i64 1
+  %tmp20421 = getelementptr inbounds float* %tmp20420, i64 1
+  %tmp20422 = getelementptr inbounds float* %tmp20421, i64 1
+  %tmp20423 = getelementptr inbounds float* %tmp20422, i64 1
+  %tmp20424 = getelementptr inbounds float* %tmp20423, i64 1
+  %tmp20425 = getelementptr inbounds float* %tmp20424, i64 1
+  %tmp20426 = getelementptr inbounds float* %tmp20425, i64 1
+  %tmp20427 = getelementptr inbounds float* %tmp20426, i64 1
+  %tmp20428 = getelementptr inbounds float* %tmp20427, i64 1
+  %tmp20429 = getelementptr inbounds float* %tmp20428, i64 1
+  %tmp20430 = getelementptr inbounds float* %tmp20429, i64 1
+  %tmp20431 = getelementptr inbounds float* %tmp20430, i64 1
+  %tmp20432 = getelementptr inbounds float* %tmp20431, i64 1
+  %tmp20433 = getelementptr inbounds float* %tmp20432, i64 1
+  %tmp20434 = getelementptr inbounds float* %tmp20433, i64 1
+  %tmp20435 = getelementptr inbounds float* %tmp20434, i64 1
+  %tmp20436 = getelementptr inbounds float* %tmp20435, i64 1
+  %tmp20437 = getelementptr inbounds float* %tmp20436, i64 1
+  %tmp20438 = getelementptr inbounds float* %tmp20437, i64 1
+  %tmp20439 = getelementptr inbounds float* %tmp20438, i64 1
+  %tmp20440 = getelementptr inbounds float* %tmp20439, i64 1
+  %tmp20441 = getelementptr inbounds float* %tmp20440, i64 1
+  %tmp20442 = getelementptr inbounds float* %tmp20441, i64 1
+  %tmp20443 = getelementptr inbounds float* %tmp20442, i64 1
+  %tmp20444 = getelementptr inbounds float* %tmp20443, i64 1
+  %tmp20445 = getelementptr inbounds float* %tmp20444, i64 1
+  %tmp20446 = getelementptr inbounds float* %tmp20445, i64 1
+  %tmp20447 = getelementptr inbounds float* %tmp20446, i64 1
+  %tmp20448 = getelementptr inbounds float* %tmp20447, i64 1
+  %tmp20449 = getelementptr inbounds float* %tmp20448, i64 1
+  %tmp20450 = getelementptr inbounds float* %tmp20449, i64 1
+  %tmp20451 = getelementptr inbounds float* %tmp20450, i64 1
+  %tmp20452 = getelementptr inbounds float* %tmp20451, i64 1
+  %tmp20453 = getelementptr inbounds float* %tmp20452, i64 1
+  %tmp20454 = getelementptr inbounds float* %tmp20453, i64 1
+  %tmp20455 = getelementptr inbounds float* %tmp20454, i64 1
+  %tmp20456 = getelementptr inbounds float* %tmp20455, i64 1
+  %tmp20457 = getelementptr inbounds float* %tmp20456, i64 1
+  %tmp20458 = getelementptr inbounds float* %tmp20457, i64 1
+  %tmp20459 = getelementptr inbounds float* %tmp20458, i64 1
+  %tmp20460 = getelementptr inbounds float* %tmp20459, i64 1
+  %tmp20461 = getelementptr inbounds float* %tmp20460, i64 1
+  %tmp20462 = getelementptr inbounds float* %tmp20461, i64 1
+  %tmp20463 = getelementptr inbounds float* %tmp20462, i64 1
+  %tmp20464 = getelementptr inbounds float* %tmp20463, i64 1
+  %tmp20465 = getelementptr inbounds float* %tmp20464, i64 1
+  %tmp20466 = getelementptr inbounds float* %tmp20465, i64 1
+  %tmp20467 = getelementptr inbounds float* %tmp20466, i64 1
+  %tmp20468 = getelementptr inbounds float* %tmp20467, i64 1
+  %tmp20469 = getelementptr inbounds float* %tmp20468, i64 1
+  %tmp20470 = getelementptr inbounds float* %tmp20469, i64 1
+  %tmp20471 = getelementptr inbounds float* %tmp20470, i64 1
+  %tmp20472 = getelementptr inbounds float* %tmp20471, i64 1
+  %tmp20473 = getelementptr inbounds float* %tmp20472, i64 1
+  %tmp20474 = getelementptr inbounds float* %tmp20473, i64 1
+  %tmp20475 = getelementptr inbounds float* %tmp20474, i64 1
+  %tmp20476 = getelementptr inbounds float* %tmp20475, i64 1
+  %tmp20477 = getelementptr inbounds float* %tmp20476, i64 1
+  %tmp20478 = getelementptr inbounds float* %tmp20477, i64 1
+  %tmp20479 = getelementptr inbounds float* %tmp20478, i64 1
+  %tmp20480 = getelementptr inbounds float* %tmp20479, i64 1
+  %tmp20481 = getelementptr inbounds float* %tmp20480, i64 1
+  %tmp20482 = getelementptr inbounds float* %tmp20481, i64 1
+  %tmp20483 = getelementptr inbounds float* %tmp20482, i64 1
+  %tmp20484 = getelementptr inbounds float* %tmp20483, i64 1
+  %tmp20485 = getelementptr inbounds float* %tmp20484, i64 1
+  %tmp20486 = getelementptr inbounds float* %tmp20485, i64 1
+  %tmp20487 = getelementptr inbounds float* %tmp20486, i64 1
+  %tmp20488 = getelementptr inbounds float* %tmp20487, i64 1
+  %tmp20489 = getelementptr inbounds float* %tmp20488, i64 1
+  %tmp20490 = getelementptr inbounds float* %tmp20489, i64 1
+  %tmp20491 = getelementptr inbounds float* %tmp20490, i64 1
+  %tmp20492 = getelementptr inbounds float* %tmp20491, i64 1
+  %tmp20493 = getelementptr inbounds float* %tmp20492, i64 1
+  %tmp20494 = getelementptr inbounds float* %tmp20493, i64 1
+  %tmp20495 = getelementptr inbounds float* %tmp20494, i64 1
+  %tmp20496 = getelementptr inbounds float* %tmp20495, i64 1
+  %tmp20497 = getelementptr inbounds float* %tmp20496, i64 1
+  %tmp20498 = getelementptr inbounds float* %tmp20497, i64 1
+  %tmp20499 = getelementptr inbounds float* %tmp20498, i64 1
+  %tmp20500 = getelementptr inbounds float* %tmp20499, i64 1
+  %tmp20501 = getelementptr inbounds float* %tmp20500, i64 1
+  %tmp20502 = getelementptr inbounds float* %tmp20501, i64 1
+  %tmp20503 = getelementptr inbounds float* %tmp20502, i64 1
+  %tmp20504 = getelementptr inbounds float* %tmp20503, i64 1
+  %tmp20505 = getelementptr inbounds float* %tmp20504, i64 1
+  %tmp20506 = getelementptr inbounds float* %tmp20505, i64 1
+  %tmp20507 = getelementptr inbounds float* %tmp20506, i64 1
+  %tmp20508 = getelementptr inbounds float* %tmp20507, i64 1
+  %tmp20509 = getelementptr inbounds float* %tmp20508, i64 1
+  %tmp20510 = getelementptr inbounds float* %tmp20509, i64 1
+  %tmp20511 = getelementptr inbounds float* %tmp20510, i64 1
+  %tmp20512 = getelementptr inbounds float* %tmp20511, i64 1
+  %tmp20513 = getelementptr inbounds float* %tmp20512, i64 1
+  %tmp20514 = getelementptr inbounds float* %tmp20513, i64 1
+  %tmp20515 = getelementptr inbounds float* %tmp20514, i64 1
+  %tmp20516 = getelementptr inbounds float* %tmp20515, i64 1
+  %tmp20517 = getelementptr inbounds float* %tmp20516, i64 1
+  %tmp20518 = getelementptr inbounds float* %tmp20517, i64 1
+  %tmp20519 = getelementptr inbounds float* %tmp20518, i64 1
+  %tmp20520 = getelementptr inbounds float* %tmp20519, i64 1
+  %tmp20521 = getelementptr inbounds float* %tmp20520, i64 1
+  %tmp20522 = getelementptr inbounds float* %tmp20521, i64 1
+  %tmp20523 = getelementptr inbounds float* %tmp20522, i64 1
+  %tmp20524 = getelementptr inbounds float* %tmp20523, i64 1
+  %tmp20525 = getelementptr inbounds float* %tmp20524, i64 1
+  %tmp20526 = getelementptr inbounds float* %tmp20525, i64 1
+  %tmp20527 = getelementptr inbounds float* %tmp20526, i64 1
+  %tmp20528 = getelementptr inbounds float* %tmp20527, i64 1
+  %tmp20529 = getelementptr inbounds float* %tmp20528, i64 1
+  %tmp20530 = getelementptr inbounds float* %tmp20529, i64 1
+  %tmp20531 = getelementptr inbounds float* %tmp20530, i64 1
+  %tmp20532 = getelementptr inbounds float* %tmp20531, i64 1
+  %tmp20533 = getelementptr inbounds float* %tmp20532, i64 1
+  %tmp20534 = getelementptr inbounds float* %tmp20533, i64 1
+  %tmp20535 = getelementptr inbounds float* %tmp20534, i64 1
+  %tmp20536 = getelementptr inbounds float* %tmp20535, i64 1
+  %tmp20537 = getelementptr inbounds float* %tmp20536, i64 1
+  %tmp20538 = getelementptr inbounds float* %tmp20537, i64 1
+  %tmp20539 = getelementptr inbounds float* %tmp20538, i64 1
+  %tmp20540 = getelementptr inbounds float* %tmp20539, i64 1
+  %tmp20541 = getelementptr inbounds float* %tmp20540, i64 1
+  %tmp20542 = getelementptr inbounds float* %tmp20541, i64 1
+  %tmp20543 = getelementptr inbounds float* %tmp20542, i64 1
+  %tmp20544 = getelementptr inbounds float* %tmp20543, i64 1
+  %tmp20545 = getelementptr inbounds float* %tmp20544, i64 1
+  %tmp20546 = getelementptr inbounds float* %tmp20545, i64 1
+  %tmp20547 = getelementptr inbounds float* %tmp20546, i64 1
+  %tmp20548 = getelementptr inbounds float* %tmp20547, i64 1
+  %tmp20549 = getelementptr inbounds float* %tmp20548, i64 1
+  %tmp20550 = getelementptr inbounds float* %tmp20549, i64 1
+  %tmp20551 = getelementptr inbounds float* %tmp20550, i64 1
+  %tmp20552 = getelementptr inbounds float* %tmp20551, i64 1
+  %tmp20553 = getelementptr inbounds float* %tmp20552, i64 1
+  %tmp20554 = getelementptr inbounds float* %tmp20553, i64 1
+  %tmp20555 = getelementptr inbounds float* %tmp20554, i64 1
+  %tmp20556 = getelementptr inbounds float* %tmp20555, i64 1
+  %tmp20557 = getelementptr inbounds float* %tmp20556, i64 1
+  %tmp20558 = getelementptr inbounds float* %tmp20557, i64 1
+  %tmp20559 = getelementptr inbounds float* %tmp20558, i64 1
+  %tmp20560 = getelementptr inbounds float* %tmp20559, i64 1
+  %tmp20561 = getelementptr inbounds float* %tmp20560, i64 1
+  %tmp20562 = getelementptr inbounds float* %tmp20561, i64 1
+  %tmp20563 = getelementptr inbounds float* %tmp20562, i64 1
+  %tmp20564 = getelementptr inbounds float* %tmp20563, i64 1
+  %tmp20565 = getelementptr inbounds float* %tmp20564, i64 1
+  %tmp20566 = getelementptr inbounds float* %tmp20565, i64 1
+  %tmp20567 = getelementptr inbounds float* %tmp20566, i64 1
+  %tmp20568 = getelementptr inbounds float* %tmp20567, i64 1
+  %tmp20569 = getelementptr inbounds float* %tmp20568, i64 1
+  %tmp20570 = getelementptr inbounds float* %tmp20569, i64 1
+  %tmp20571 = getelementptr inbounds float* %tmp20570, i64 1
+  %tmp20572 = getelementptr inbounds float* %tmp20571, i64 1
+  %tmp20573 = getelementptr inbounds float* %tmp20572, i64 1
+  %tmp20574 = getelementptr inbounds float* %tmp20573, i64 1
+  %tmp20575 = getelementptr inbounds float* %tmp20574, i64 1
+  %tmp20576 = getelementptr inbounds float* %tmp20575, i64 1
+  %tmp20577 = getelementptr inbounds float* %tmp20576, i64 1
+  %tmp20578 = getelementptr inbounds float* %tmp20577, i64 1
+  %tmp20579 = getelementptr inbounds float* %tmp20578, i64 1
+  %tmp20580 = getelementptr inbounds float* %tmp20579, i64 1
+  %tmp20581 = getelementptr inbounds float* %tmp20580, i64 1
+  %tmp20582 = getelementptr inbounds float* %tmp20581, i64 1
+  %tmp20583 = getelementptr inbounds float* %tmp20582, i64 1
+  %tmp20584 = getelementptr inbounds float* %tmp20583, i64 1
+  %tmp20585 = getelementptr inbounds float* %tmp20584, i64 1
+  %tmp20586 = getelementptr inbounds float* %tmp20585, i64 1
+  %tmp20587 = getelementptr inbounds float* %tmp20586, i64 1
+  %tmp20588 = getelementptr inbounds float* %tmp20587, i64 1
+  %tmp20589 = getelementptr inbounds float* %tmp20588, i64 1
+  %tmp20590 = getelementptr inbounds float* %tmp20589, i64 1
+  %tmp20591 = getelementptr inbounds float* %tmp20590, i64 1
+  %tmp20592 = getelementptr inbounds float* %tmp20591, i64 1
+  %tmp20593 = getelementptr inbounds float* %tmp20592, i64 1
+  %tmp20594 = getelementptr inbounds float* %tmp20593, i64 1
+  %tmp20595 = getelementptr inbounds float* %tmp20594, i64 1
+  %tmp20596 = getelementptr inbounds float* %tmp20595, i64 1
+  %tmp20597 = getelementptr inbounds float* %tmp20596, i64 1
+  %tmp20598 = getelementptr inbounds float* %tmp20597, i64 1
+  %tmp20599 = getelementptr inbounds float* %tmp20598, i64 1
+  %tmp20600 = getelementptr inbounds float* %tmp20599, i64 1
+  %tmp20601 = getelementptr inbounds float* %tmp20600, i64 1
+  %tmp20602 = getelementptr inbounds float* %tmp20601, i64 1
+  %tmp20603 = getelementptr inbounds float* %tmp20602, i64 1
+  %tmp20604 = getelementptr inbounds float* %tmp20603, i64 1
+  %tmp20605 = getelementptr inbounds float* %tmp20604, i64 1
+  %tmp20606 = getelementptr inbounds float* %tmp20605, i64 1
+  %tmp20607 = getelementptr inbounds float* %tmp20606, i64 1
+  %tmp20608 = getelementptr inbounds float* %tmp20607, i64 1
+  %tmp20609 = getelementptr inbounds float* %tmp20608, i64 1
+  %tmp20610 = getelementptr inbounds float* %tmp20609, i64 1
+  %tmp20611 = getelementptr inbounds float* %tmp20610, i64 1
+  %tmp20612 = getelementptr inbounds float* %tmp20611, i64 1
+  %tmp20613 = getelementptr inbounds float* %tmp20612, i64 1
+  %tmp20614 = getelementptr inbounds float* %tmp20613, i64 1
+  %tmp20615 = getelementptr inbounds float* %tmp20614, i64 1
+  %tmp20616 = getelementptr inbounds float* %tmp20615, i64 1
+  %tmp20617 = getelementptr inbounds float* %tmp20616, i64 1
+  %tmp20618 = getelementptr inbounds float* %tmp20617, i64 1
+  %tmp20619 = getelementptr inbounds float* %tmp20618, i64 1
+  %tmp20620 = getelementptr inbounds float* %tmp20619, i64 1
+  %tmp20621 = getelementptr inbounds float* %tmp20620, i64 1
+  %tmp20622 = getelementptr inbounds float* %tmp20621, i64 1
+  %tmp20623 = getelementptr inbounds float* %tmp20622, i64 1
+  %tmp20624 = getelementptr inbounds float* %tmp20623, i64 1
+  %tmp20625 = getelementptr inbounds float* %tmp20624, i64 1
+  %tmp20626 = getelementptr inbounds float* %tmp20625, i64 1
+  %tmp20627 = getelementptr inbounds float* %tmp20626, i64 1
+  %tmp20628 = getelementptr inbounds float* %tmp20627, i64 1
+  %tmp20629 = getelementptr inbounds float* %tmp20628, i64 1
+  %tmp20630 = getelementptr inbounds float* %tmp20629, i64 1
+  %tmp20631 = getelementptr inbounds float* %tmp20630, i64 1
+  %tmp20632 = getelementptr inbounds float* %tmp20631, i64 1
+  %tmp20633 = getelementptr inbounds float* %tmp20632, i64 1
+  %tmp20634 = getelementptr inbounds float* %tmp20633, i64 1
+  %tmp20635 = getelementptr inbounds float* %tmp20634, i64 1
+  %tmp20636 = getelementptr inbounds float* %tmp20635, i64 1
+  %tmp20637 = getelementptr inbounds float* %tmp20636, i64 1
+  %tmp20638 = getelementptr inbounds float* %tmp20637, i64 1
+  %tmp20639 = getelementptr inbounds float* %tmp20638, i64 1
+  %tmp20640 = getelementptr inbounds float* %tmp20639, i64 1
+  %tmp20641 = getelementptr inbounds float* %tmp20640, i64 1
+  %tmp20642 = getelementptr inbounds float* %tmp20641, i64 1
+  %tmp20643 = getelementptr inbounds float* %tmp20642, i64 1
+  %tmp20644 = getelementptr inbounds float* %tmp20643, i64 1
+  %tmp20645 = getelementptr inbounds float* %tmp20644, i64 1
+  %tmp20646 = getelementptr inbounds float* %tmp20645, i64 1
+  %tmp20647 = getelementptr inbounds float* %tmp20646, i64 1
+  %tmp20648 = getelementptr inbounds float* %tmp20647, i64 1
+  %tmp20649 = getelementptr inbounds float* %tmp20648, i64 1
+  %tmp20650 = getelementptr inbounds float* %tmp20649, i64 1
+  %tmp20651 = getelementptr inbounds float* %tmp20650, i64 1
+  %tmp20652 = getelementptr inbounds float* %tmp20651, i64 1
+  %tmp20653 = getelementptr inbounds float* %tmp20652, i64 1
+  %tmp20654 = getelementptr inbounds float* %tmp20653, i64 1
+  %tmp20655 = getelementptr inbounds float* %tmp20654, i64 1
+  %tmp20656 = getelementptr inbounds float* %tmp20655, i64 1
+  %tmp20657 = getelementptr inbounds float* %tmp20656, i64 1
+  %tmp20658 = getelementptr inbounds float* %tmp20657, i64 1
+  %tmp20659 = getelementptr inbounds float* %tmp20658, i64 1
+  %tmp20660 = getelementptr inbounds float* %tmp20659, i64 1
+  %tmp20661 = getelementptr inbounds float* %tmp20660, i64 1
+  %tmp20662 = getelementptr inbounds float* %tmp20661, i64 1
+  %tmp20663 = getelementptr inbounds float* %tmp20662, i64 1
+  %tmp20664 = getelementptr inbounds float* %tmp20663, i64 1
+  %tmp20665 = getelementptr inbounds float* %tmp20664, i64 1
+  %tmp20666 = getelementptr inbounds float* %tmp20665, i64 1
+  %tmp20667 = getelementptr inbounds float* %tmp20666, i64 1
+  %tmp20668 = getelementptr inbounds float* %tmp20667, i64 1
+  %tmp20669 = getelementptr inbounds float* %tmp20668, i64 1
+  %tmp20670 = getelementptr inbounds float* %tmp20669, i64 1
+  %tmp20671 = getelementptr inbounds float* %tmp20670, i64 1
+  %tmp20672 = getelementptr inbounds float* %tmp20671, i64 1
+  %tmp20673 = getelementptr inbounds float* %tmp20672, i64 1
+  %tmp20674 = getelementptr inbounds float* %tmp20673, i64 1
+  %tmp20675 = getelementptr inbounds float* %tmp20674, i64 1
+  %tmp20676 = getelementptr inbounds float* %tmp20675, i64 1
+  %tmp20677 = getelementptr inbounds float* %tmp20676, i64 1
+  %tmp20678 = getelementptr inbounds float* %tmp20677, i64 1
+  %tmp20679 = getelementptr inbounds float* %tmp20678, i64 1
+  %tmp20680 = getelementptr inbounds float* %tmp20679, i64 1
+  %tmp20681 = getelementptr inbounds float* %tmp20680, i64 1
+  %tmp20682 = getelementptr inbounds float* %tmp20681, i64 1
+  %tmp20683 = getelementptr inbounds float* %tmp20682, i64 1
+  %tmp20684 = getelementptr inbounds float* %tmp20683, i64 1
+  %tmp20685 = getelementptr inbounds float* %tmp20684, i64 1
+  %tmp20686 = getelementptr inbounds float* %tmp20685, i64 1
+  %tmp20687 = getelementptr inbounds float* %tmp20686, i64 1
+  %tmp20688 = getelementptr inbounds float* %tmp20687, i64 1
+  %tmp20689 = getelementptr inbounds float* %tmp20688, i64 1
+  %tmp20690 = getelementptr inbounds float* %tmp20689, i64 1
+  %tmp20691 = getelementptr inbounds float* %tmp20690, i64 1
+  %tmp20692 = getelementptr inbounds float* %tmp20691, i64 1
+  %tmp20693 = getelementptr inbounds float* %tmp20692, i64 1
+  %tmp20694 = getelementptr inbounds float* %tmp20693, i64 1
+  %tmp20695 = getelementptr inbounds float* %tmp20694, i64 1
+  %tmp20696 = getelementptr inbounds float* %tmp20695, i64 1
+  %tmp20697 = getelementptr inbounds float* %tmp20696, i64 1
+  %tmp20698 = getelementptr inbounds float* %tmp20697, i64 1
+  %tmp20699 = getelementptr inbounds float* %tmp20698, i64 1
+  %tmp20700 = getelementptr inbounds float* %tmp20699, i64 1
+  %tmp20701 = getelementptr inbounds float* %tmp20700, i64 1
+  %tmp20702 = getelementptr inbounds float* %tmp20701, i64 1
+  %tmp20703 = getelementptr inbounds float* %tmp20702, i64 1
+  %tmp20704 = getelementptr inbounds float* %tmp20703, i64 1
+  %tmp20705 = getelementptr inbounds float* %tmp20704, i64 1
+  %tmp20706 = getelementptr inbounds float* %tmp20705, i64 1
+  %tmp20707 = getelementptr inbounds float* %tmp20706, i64 1
+  %tmp20708 = getelementptr inbounds float* %tmp20707, i64 1
+  %tmp20709 = getelementptr inbounds float* %tmp20708, i64 1
+  %tmp20710 = getelementptr inbounds float* %tmp20709, i64 1
+  %tmp20711 = getelementptr inbounds float* %tmp20710, i64 1
+  %tmp20712 = getelementptr inbounds float* %tmp20711, i64 1
+  %tmp20713 = getelementptr inbounds float* %tmp20712, i64 1
+  %tmp20714 = getelementptr inbounds float* %tmp20713, i64 1
+  %tmp20715 = getelementptr inbounds float* %tmp20714, i64 1
+  %tmp20716 = getelementptr inbounds float* %tmp20715, i64 1
+  %tmp20717 = getelementptr inbounds float* %tmp20716, i64 1
+  %tmp20718 = getelementptr inbounds float* %tmp20717, i64 1
+  %tmp20719 = getelementptr inbounds float* %tmp20718, i64 1
+  %tmp20720 = getelementptr inbounds float* %tmp20719, i64 1
+  %tmp20721 = getelementptr inbounds float* %tmp20720, i64 1
+  %tmp20722 = getelementptr inbounds float* %tmp20721, i64 1
+  %tmp20723 = getelementptr inbounds float* %tmp20722, i64 1
+  %tmp20724 = getelementptr inbounds float* %tmp20723, i64 1
+  %tmp20725 = getelementptr inbounds float* %tmp20724, i64 1
+  %tmp20726 = getelementptr inbounds float* %tmp20725, i64 1
+  %tmp20727 = getelementptr inbounds float* %tmp20726, i64 1
+  %tmp20728 = getelementptr inbounds float* %tmp20727, i64 1
+  %tmp20729 = getelementptr inbounds float* %tmp20728, i64 1
+  %tmp20730 = getelementptr inbounds float* %tmp20729, i64 1
+  %tmp20731 = getelementptr inbounds float* %tmp20730, i64 1
+  %tmp20732 = getelementptr inbounds float* %tmp20731, i64 1
+  %tmp20733 = getelementptr inbounds float* %tmp20732, i64 1
+  %tmp20734 = getelementptr inbounds float* %tmp20733, i64 1
+  %tmp20735 = getelementptr inbounds float* %tmp20734, i64 1
+  %tmp20736 = getelementptr inbounds float* %tmp20735, i64 1
+  %tmp20737 = getelementptr inbounds float* %tmp20736, i64 1
+  %tmp20738 = getelementptr inbounds float* %tmp20737, i64 1
+  %tmp20739 = getelementptr inbounds float* %tmp20738, i64 1
+  %tmp20740 = getelementptr inbounds float* %tmp20739, i64 1
+  %tmp20741 = getelementptr inbounds float* %tmp20740, i64 1
+  %tmp20742 = getelementptr inbounds float* %tmp20741, i64 1
+  %tmp20743 = getelementptr inbounds float* %tmp20742, i64 1
+  %tmp20744 = getelementptr inbounds float* %tmp20743, i64 1
+  %tmp20745 = getelementptr inbounds float* %tmp20744, i64 1
+  %tmp20746 = getelementptr inbounds float* %tmp20745, i64 1
+  %tmp20747 = getelementptr inbounds float* %tmp20746, i64 1
+  %tmp20748 = getelementptr inbounds float* %tmp20747, i64 1
+  %tmp20749 = getelementptr inbounds float* %tmp20748, i64 1
+  %tmp20750 = getelementptr inbounds float* %tmp20749, i64 1
+  %tmp20751 = getelementptr inbounds float* %tmp20750, i64 1
+  %tmp20752 = getelementptr inbounds float* %tmp20751, i64 1
+  %tmp20753 = getelementptr inbounds float* %tmp20752, i64 1
+  %tmp20754 = getelementptr inbounds float* %tmp20753, i64 1
+  %tmp20755 = getelementptr inbounds float* %tmp20754, i64 1
+  %tmp20756 = getelementptr inbounds float* %tmp20755, i64 1
+  %tmp20757 = getelementptr inbounds float* %tmp20756, i64 1
+  %tmp20758 = getelementptr inbounds float* %tmp20757, i64 1
+  %tmp20759 = getelementptr inbounds float* %tmp20758, i64 1
+  %tmp20760 = getelementptr inbounds float* %tmp20759, i64 1
+  %tmp20761 = getelementptr inbounds float* %tmp20760, i64 1
+  %tmp20762 = getelementptr inbounds float* %tmp20761, i64 1
+  %tmp20763 = getelementptr inbounds float* %tmp20762, i64 1
+  %tmp20764 = getelementptr inbounds float* %tmp20763, i64 1
+  %tmp20765 = getelementptr inbounds float* %tmp20764, i64 1
+  %tmp20766 = getelementptr inbounds float* %tmp20765, i64 1
+  %tmp20767 = getelementptr inbounds float* %tmp20766, i64 1
+  %tmp20768 = getelementptr inbounds float* %tmp20767, i64 1
+  %tmp20769 = getelementptr inbounds float* %tmp20768, i64 1
+  %tmp20770 = getelementptr inbounds float* %tmp20769, i64 1
+  %tmp20771 = getelementptr inbounds float* %tmp20770, i64 1
+  %tmp20772 = getelementptr inbounds float* %tmp20771, i64 1
+  %tmp20773 = getelementptr inbounds float* %tmp20772, i64 1
+  %tmp20774 = getelementptr inbounds float* %tmp20773, i64 1
+  %tmp20775 = getelementptr inbounds float* %tmp20774, i64 1
+  %tmp20776 = getelementptr inbounds float* %tmp20775, i64 1
+  %tmp20777 = getelementptr inbounds float* %tmp20776, i64 1
+  %tmp20778 = getelementptr inbounds float* %tmp20777, i64 1
+  %tmp20779 = getelementptr inbounds float* %tmp20778, i64 1
+  %tmp20780 = getelementptr inbounds float* %tmp20779, i64 1
+  %tmp20781 = getelementptr inbounds float* %tmp20780, i64 1
+  %tmp20782 = getelementptr inbounds float* %tmp20781, i64 1
+  %tmp20783 = getelementptr inbounds float* %tmp20782, i64 1
+  %tmp20784 = getelementptr inbounds float* %tmp20783, i64 1
+  %tmp20785 = getelementptr inbounds float* %tmp20784, i64 1
+  %tmp20786 = getelementptr inbounds float* %tmp20785, i64 1
+  %tmp20787 = getelementptr inbounds float* %tmp20786, i64 1
+  %tmp20788 = getelementptr inbounds float* %tmp20787, i64 1
+  %tmp20789 = getelementptr inbounds float* %tmp20788, i64 1
+  %tmp20790 = getelementptr inbounds float* %tmp20789, i64 1
+  %tmp20791 = getelementptr inbounds float* %tmp20790, i64 1
+  %tmp20792 = getelementptr inbounds float* %tmp20791, i64 1
+  %tmp20793 = getelementptr inbounds float* %tmp20792, i64 1
+  %tmp20794 = getelementptr inbounds float* %tmp20793, i64 1
+  %tmp20795 = getelementptr inbounds float* %tmp20794, i64 1
+  %tmp20796 = getelementptr inbounds float* %tmp20795, i64 1
+  %tmp20797 = getelementptr inbounds float* %tmp20796, i64 1
+  %tmp20798 = getelementptr inbounds float* %tmp20797, i64 1
+  %tmp20799 = getelementptr inbounds float* %tmp20798, i64 1
+  %tmp20800 = getelementptr inbounds float* %tmp20799, i64 1
+  %tmp20801 = getelementptr inbounds float* %tmp20800, i64 1
+  %tmp20802 = getelementptr inbounds float* %tmp20801, i64 1
+  %tmp20803 = getelementptr inbounds float* %tmp20802, i64 1
+  %tmp20804 = getelementptr inbounds float* %tmp20803, i64 1
+  %tmp20805 = getelementptr inbounds float* %tmp20804, i64 1
+  %tmp20806 = getelementptr inbounds float* %tmp20805, i64 1
+  %tmp20807 = getelementptr inbounds float* %tmp20806, i64 1
+  %tmp20808 = getelementptr inbounds float* %tmp20807, i64 1
+  %tmp20809 = getelementptr inbounds float* %tmp20808, i64 1
+  %tmp20810 = getelementptr inbounds float* %tmp20809, i64 1
+  %tmp20811 = getelementptr inbounds float* %tmp20810, i64 1
+  %tmp20812 = getelementptr inbounds float* %tmp20811, i64 1
+  %tmp20813 = getelementptr inbounds float* %tmp20812, i64 1
+  %tmp20814 = getelementptr inbounds float* %tmp20813, i64 1
+  %tmp20815 = getelementptr inbounds float* %tmp20814, i64 1
+  %tmp20816 = getelementptr inbounds float* %tmp20815, i64 1
+  %tmp20817 = getelementptr inbounds float* %tmp20816, i64 1
+  %tmp20818 = getelementptr inbounds float* %tmp20817, i64 1
+  %tmp20819 = getelementptr inbounds float* %tmp20818, i64 1
+  %tmp20820 = getelementptr inbounds float* %tmp20819, i64 1
+  %tmp20821 = getelementptr inbounds float* %tmp20820, i64 1
+  %tmp20822 = getelementptr inbounds float* %tmp20821, i64 1
+  %tmp20823 = getelementptr inbounds float* %tmp20822, i64 1
+  %tmp20824 = getelementptr inbounds float* %tmp20823, i64 1
+  %tmp20825 = getelementptr inbounds float* %tmp20824, i64 1
+  %tmp20826 = getelementptr inbounds float* %tmp20825, i64 1
+  %tmp20827 = getelementptr inbounds float* %tmp20826, i64 1
+  %tmp20828 = getelementptr inbounds float* %tmp20827, i64 1
+  %tmp20829 = getelementptr inbounds float* %tmp20828, i64 1
+  %tmp20830 = getelementptr inbounds float* %tmp20829, i64 1
+  %tmp20831 = getelementptr inbounds float* %tmp20830, i64 1
+  %tmp20832 = getelementptr inbounds float* %tmp20831, i64 1
+  %tmp20833 = getelementptr inbounds float* %tmp20832, i64 1
+  %tmp20834 = getelementptr inbounds float* %tmp20833, i64 1
+  %tmp20835 = getelementptr inbounds float* %tmp20834, i64 1
+  %tmp20836 = getelementptr inbounds float* %tmp20835, i64 1
+  %tmp20837 = getelementptr inbounds float* %tmp20836, i64 1
+  %tmp20838 = getelementptr inbounds float* %tmp20837, i64 1
+  %tmp20839 = getelementptr inbounds float* %tmp20838, i64 1
+  %tmp20840 = getelementptr inbounds float* %tmp20839, i64 1
+  %tmp20841 = getelementptr inbounds float* %tmp20840, i64 1
+  %tmp20842 = getelementptr inbounds float* %tmp20841, i64 1
+  %tmp20843 = getelementptr inbounds float* %tmp20842, i64 1
+  %tmp20844 = getelementptr inbounds float* %tmp20843, i64 1
+  %tmp20845 = getelementptr inbounds float* %tmp20844, i64 1
+  %tmp20846 = getelementptr inbounds float* %tmp20845, i64 1
+  %tmp20847 = getelementptr inbounds float* %tmp20846, i64 1
+  %tmp20848 = getelementptr inbounds float* %tmp20847, i64 1
+  %tmp20849 = getelementptr inbounds float* %tmp20848, i64 1
+  %tmp20850 = getelementptr inbounds float* %tmp20849, i64 1
+  %tmp20851 = getelementptr inbounds float* %tmp20850, i64 1
+  %tmp20852 = getelementptr inbounds float* %tmp20851, i64 1
+  %tmp20853 = getelementptr inbounds float* %tmp20852, i64 1
+  %tmp20854 = getelementptr inbounds float* %tmp20853, i64 1
+  %tmp20855 = getelementptr inbounds float* %tmp20854, i64 1
+  %tmp20856 = getelementptr inbounds float* %tmp20855, i64 1
+  %tmp20857 = getelementptr inbounds float* %tmp20856, i64 1
+  %tmp20858 = getelementptr inbounds float* %tmp20857, i64 1
+  %tmp20859 = getelementptr inbounds float* %tmp20858, i64 1
+  %tmp20860 = getelementptr inbounds float* %tmp20859, i64 1
+  %tmp20861 = getelementptr inbounds float* %tmp20860, i64 1
+  %tmp20862 = getelementptr inbounds float* %tmp20861, i64 1
+  %tmp20863 = getelementptr inbounds float* %tmp20862, i64 1
+  %tmp20864 = getelementptr inbounds float* %tmp20863, i64 1
+  %tmp20865 = getelementptr inbounds float* %tmp20864, i64 1
+  %tmp20866 = getelementptr inbounds float* %tmp20865, i64 1
+  %tmp20867 = getelementptr inbounds float* %tmp20866, i64 1
+  %tmp20868 = getelementptr inbounds float* %tmp20867, i64 1
+  %tmp20869 = getelementptr inbounds float* %tmp20868, i64 1
+  %tmp20870 = getelementptr inbounds float* %tmp20869, i64 1
+  %tmp20871 = getelementptr inbounds float* %tmp20870, i64 1
+  %tmp20872 = getelementptr inbounds float* %tmp20871, i64 1
+  %tmp20873 = getelementptr inbounds float* %tmp20872, i64 1
+  %tmp20874 = getelementptr inbounds float* %tmp20873, i64 1
+  %tmp20875 = getelementptr inbounds float* %tmp20874, i64 1
+  %tmp20876 = getelementptr inbounds float* %tmp20875, i64 1
+  %tmp20877 = getelementptr inbounds float* %tmp20876, i64 1
+  %tmp20878 = getelementptr inbounds float* %tmp20877, i64 1
+  %tmp20879 = getelementptr inbounds float* %tmp20878, i64 1
+  %tmp20880 = getelementptr inbounds float* %tmp20879, i64 1
+  %tmp20881 = getelementptr inbounds float* %tmp20880, i64 1
+  %tmp20882 = getelementptr inbounds float* %tmp20881, i64 1
+  %tmp20883 = getelementptr inbounds float* %tmp20882, i64 1
+  %tmp20884 = getelementptr inbounds float* %tmp20883, i64 1
+  %tmp20885 = getelementptr inbounds float* %tmp20884, i64 1
+  %tmp20886 = getelementptr inbounds float* %tmp20885, i64 1
+  %tmp20887 = getelementptr inbounds float* %tmp20886, i64 1
+  %tmp20888 = getelementptr inbounds float* %tmp20887, i64 1
+  %tmp20889 = getelementptr inbounds float* %tmp20888, i64 1
+  %tmp20890 = getelementptr inbounds float* %tmp20889, i64 1
+  %tmp20891 = getelementptr inbounds float* %tmp20890, i64 1
+  %tmp20892 = getelementptr inbounds float* %tmp20891, i64 1
+  %tmp20893 = getelementptr inbounds float* %tmp20892, i64 1
+  %tmp20894 = getelementptr inbounds float* %tmp20893, i64 1
+  %tmp20895 = getelementptr inbounds float* %tmp20894, i64 1
+  %tmp20896 = getelementptr inbounds float* %tmp20895, i64 1
+  %tmp20897 = getelementptr inbounds float* %tmp20896, i64 1
+  %tmp20898 = getelementptr inbounds float* %tmp20897, i64 1
+  %tmp20899 = getelementptr inbounds float* %tmp20898, i64 1
+  %tmp20900 = getelementptr inbounds float* %tmp20899, i64 1
+  %tmp20901 = getelementptr inbounds float* %tmp20900, i64 1
+  %tmp20902 = getelementptr inbounds float* %tmp20901, i64 1
+  %tmp20903 = getelementptr inbounds float* %tmp20902, i64 1
+  %tmp20904 = getelementptr inbounds float* %tmp20903, i64 1
+  %tmp20905 = getelementptr inbounds float* %tmp20904, i64 1
+  %tmp20906 = getelementptr inbounds float* %tmp20905, i64 1
+  %tmp20907 = getelementptr inbounds float* %tmp20906, i64 1
+  %tmp20908 = getelementptr inbounds float* %tmp20907, i64 1
+  %tmp20909 = getelementptr inbounds float* %tmp20908, i64 1
+  %tmp20910 = getelementptr inbounds float* %tmp20909, i64 1
+  %tmp20911 = getelementptr inbounds float* %tmp20910, i64 1
+  %tmp20912 = getelementptr inbounds float* %tmp20911, i64 1
+  %tmp20913 = getelementptr inbounds float* %tmp20912, i64 1
+  %tmp20914 = getelementptr inbounds float* %tmp20913, i64 1
+  %tmp20915 = getelementptr inbounds float* %tmp20914, i64 1
+  %tmp20916 = getelementptr inbounds float* %tmp20915, i64 1
+  %tmp20917 = getelementptr inbounds float* %tmp20916, i64 1
+  %tmp20918 = getelementptr inbounds float* %tmp20917, i64 1
+  %tmp20919 = getelementptr inbounds float* %tmp20918, i64 1
+  %tmp20920 = getelementptr inbounds float* %tmp20919, i64 1
+  %tmp20921 = getelementptr inbounds float* %tmp20920, i64 1
+  %tmp20922 = getelementptr inbounds float* %tmp20921, i64 1
+  %tmp20923 = getelementptr inbounds float* %tmp20922, i64 1
+  %tmp20924 = getelementptr inbounds float* %tmp20923, i64 1
+  %tmp20925 = getelementptr inbounds float* %tmp20924, i64 1
+  %tmp20926 = getelementptr inbounds float* %tmp20925, i64 1
+  %tmp20927 = getelementptr inbounds float* %tmp20926, i64 1
+  %tmp20928 = getelementptr inbounds float* %tmp20927, i64 1
+  %tmp20929 = getelementptr inbounds float* %tmp20928, i64 1
+  %tmp20930 = getelementptr inbounds float* %tmp20929, i64 1
+  %tmp20931 = getelementptr inbounds float* %tmp20930, i64 1
+  %tmp20932 = getelementptr inbounds float* %tmp20931, i64 1
+  %tmp20933 = getelementptr inbounds float* %tmp20932, i64 1
+  %tmp20934 = getelementptr inbounds float* %tmp20933, i64 1
+  %tmp20935 = getelementptr inbounds float* %tmp20934, i64 1
+  %tmp20936 = getelementptr inbounds float* %tmp20935, i64 1
+  %tmp20937 = getelementptr inbounds float* %tmp20936, i64 1
+  %tmp20938 = getelementptr inbounds float* %tmp20937, i64 1
+  %tmp20939 = getelementptr inbounds float* %tmp20938, i64 1
+  %tmp20940 = getelementptr inbounds float* %tmp20939, i64 1
+  %tmp20941 = getelementptr inbounds float* %tmp20940, i64 1
+  %tmp20942 = getelementptr inbounds float* %tmp20941, i64 1
+  %tmp20943 = getelementptr inbounds float* %tmp20942, i64 1
+  %tmp20944 = getelementptr inbounds float* %tmp20943, i64 1
+  %tmp20945 = getelementptr inbounds float* %tmp20944, i64 1
+  %tmp20946 = getelementptr inbounds float* %tmp20945, i64 1
+  %tmp20947 = getelementptr inbounds float* %tmp20946, i64 1
+  %tmp20948 = getelementptr inbounds float* %tmp20947, i64 1
+  %tmp20949 = getelementptr inbounds float* %tmp20948, i64 1
+  %tmp20950 = getelementptr inbounds float* %tmp20949, i64 1
+  %tmp20951 = getelementptr inbounds float* %tmp20950, i64 1
+  %tmp20952 = getelementptr inbounds float* %tmp20951, i64 1
+  %tmp20953 = getelementptr inbounds float* %tmp20952, i64 1
+  %tmp20954 = getelementptr inbounds float* %tmp20953, i64 1
+  %tmp20955 = getelementptr inbounds float* %tmp20954, i64 1
+  %tmp20956 = getelementptr inbounds float* %tmp20955, i64 1
+  %tmp20957 = getelementptr inbounds float* %tmp20956, i64 1
+  %tmp20958 = getelementptr inbounds float* %tmp20957, i64 1
+  %tmp20959 = getelementptr inbounds float* %tmp20958, i64 1
+  %tmp20960 = getelementptr inbounds float* %tmp20959, i64 1
+  %tmp20961 = getelementptr inbounds float* %tmp20960, i64 1
+  %tmp20962 = getelementptr inbounds float* %tmp20961, i64 1
+  %tmp20963 = getelementptr inbounds float* %tmp20962, i64 1
+  %tmp20964 = getelementptr inbounds float* %tmp20963, i64 1
+  %tmp20965 = getelementptr inbounds float* %tmp20964, i64 1
+  %tmp20966 = getelementptr inbounds float* %tmp20965, i64 1
+  %tmp20967 = getelementptr inbounds float* %tmp20966, i64 1
+  %tmp20968 = getelementptr inbounds float* %tmp20967, i64 1
+  %tmp20969 = getelementptr inbounds float* %tmp20968, i64 1
+  %tmp20970 = getelementptr inbounds float* %tmp20969, i64 1
+  %tmp20971 = getelementptr inbounds float* %tmp20970, i64 1
+  %tmp20972 = getelementptr inbounds float* %tmp20971, i64 1
+  %tmp20973 = getelementptr inbounds float* %tmp20972, i64 1
+  %tmp20974 = getelementptr inbounds float* %tmp20973, i64 1
+  %tmp20975 = getelementptr inbounds float* %tmp20974, i64 1
+  %tmp20976 = getelementptr inbounds float* %tmp20975, i64 1
+  %tmp20977 = getelementptr inbounds float* %tmp20976, i64 1
+  %tmp20978 = getelementptr inbounds float* %tmp20977, i64 1
+  %tmp20979 = getelementptr inbounds float* %tmp20978, i64 1
+  %tmp20980 = getelementptr inbounds float* %tmp20979, i64 1
+  %tmp20981 = getelementptr inbounds float* %tmp20980, i64 1
+  %tmp20982 = getelementptr inbounds float* %tmp20981, i64 1
+  %tmp20983 = getelementptr inbounds float* %tmp20982, i64 1
+  %tmp20984 = getelementptr inbounds float* %tmp20983, i64 1
+  %tmp20985 = getelementptr inbounds float* %tmp20984, i64 1
+  %tmp20986 = getelementptr inbounds float* %tmp20985, i64 1
+  %tmp20987 = getelementptr inbounds float* %tmp20986, i64 1
+  %tmp20988 = getelementptr inbounds float* %tmp20987, i64 1
+  %tmp20989 = getelementptr inbounds float* %tmp20988, i64 1
+  %tmp20990 = getelementptr inbounds float* %tmp20989, i64 1
+  %tmp20991 = getelementptr inbounds float* %tmp20990, i64 1
+  %tmp20992 = getelementptr inbounds float* %tmp20991, i64 1
+  %tmp20993 = getelementptr inbounds float* %tmp20992, i64 1
+  %tmp20994 = getelementptr inbounds float* %tmp20993, i64 1
+  %tmp20995 = getelementptr inbounds float* %tmp20994, i64 1
+  %tmp20996 = getelementptr inbounds float* %tmp20995, i64 1
+  %tmp20997 = getelementptr inbounds float* %tmp20996, i64 1
+  %tmp20998 = getelementptr inbounds float* %tmp20997, i64 1
+  %tmp20999 = getelementptr inbounds float* %tmp20998, i64 1
+  %tmp21000 = getelementptr inbounds float* %tmp20999, i64 1
+  %tmp21001 = getelementptr inbounds float* %tmp21000, i64 1
+  %tmp21002 = getelementptr inbounds float* %tmp21001, i64 1
+  %tmp21003 = getelementptr inbounds float* %tmp21002, i64 1
+  %tmp21004 = getelementptr inbounds float* %tmp21003, i64 1
+  %tmp21005 = getelementptr inbounds float* %tmp21004, i64 1
+  %tmp21006 = getelementptr inbounds float* %tmp21005, i64 1
+  %tmp21007 = getelementptr inbounds float* %tmp21006, i64 1
+  %tmp21008 = getelementptr inbounds float* %tmp21007, i64 1
+  %tmp21009 = getelementptr inbounds float* %tmp21008, i64 1
+  %tmp21010 = getelementptr inbounds float* %tmp21009, i64 1
+  %tmp21011 = getelementptr inbounds float* %tmp21010, i64 1
+  %tmp21012 = getelementptr inbounds float* %tmp21011, i64 1
+  %tmp21013 = getelementptr inbounds float* %tmp21012, i64 1
+  %tmp21014 = getelementptr inbounds float* %tmp21013, i64 1
+  %tmp21015 = getelementptr inbounds float* %tmp21014, i64 1
+  %tmp21016 = getelementptr inbounds float* %tmp21015, i64 1
+  %tmp21017 = getelementptr inbounds float* %tmp21016, i64 1
+  %tmp21018 = getelementptr inbounds float* %tmp21017, i64 1
+  %tmp21019 = getelementptr inbounds float* %tmp21018, i64 1
+  %tmp21020 = getelementptr inbounds float* %tmp21019, i64 1
+  %tmp21021 = getelementptr inbounds float* %tmp21020, i64 1
+  %tmp21022 = getelementptr inbounds float* %tmp21021, i64 1
+  %tmp21023 = getelementptr inbounds float* %tmp21022, i64 1
+  %tmp21024 = getelementptr inbounds float* %tmp21023, i64 1
+  %tmp21025 = getelementptr inbounds float* %tmp21024, i64 1
+  %tmp21026 = getelementptr inbounds float* %tmp21025, i64 1
+  %tmp21027 = getelementptr inbounds float* %tmp21026, i64 1
+  %tmp21028 = getelementptr inbounds float* %tmp21027, i64 1
+  %tmp21029 = getelementptr inbounds float* %tmp21028, i64 1
+  %tmp21030 = getelementptr inbounds float* %tmp21029, i64 1
+  %tmp21031 = getelementptr inbounds float* %tmp21030, i64 1
+  %tmp21032 = getelementptr inbounds float* %tmp21031, i64 1
+  %tmp21033 = getelementptr inbounds float* %tmp21032, i64 1
+  %tmp21034 = getelementptr inbounds float* %tmp21033, i64 1
+  %tmp21035 = getelementptr inbounds float* %tmp21034, i64 1
+  %tmp21036 = getelementptr inbounds float* %tmp21035, i64 1
+  %tmp21037 = getelementptr inbounds float* %tmp21036, i64 1
+  %tmp21038 = getelementptr inbounds float* %tmp21037, i64 1
+  %tmp21039 = getelementptr inbounds float* %tmp21038, i64 1
+  %tmp21040 = getelementptr inbounds float* %tmp21039, i64 1
+  %tmp21041 = getelementptr inbounds float* %tmp21040, i64 1
+  %tmp21042 = getelementptr inbounds float* %tmp21041, i64 1
+  %tmp21043 = getelementptr inbounds float* %tmp21042, i64 1
+  %tmp21044 = getelementptr inbounds float* %tmp21043, i64 1
+  %tmp21045 = getelementptr inbounds float* %tmp21044, i64 1
+  %tmp21046 = getelementptr inbounds float* %tmp21045, i64 1
+  %tmp21047 = getelementptr inbounds float* %tmp21046, i64 1
+  %tmp21048 = getelementptr inbounds float* %tmp21047, i64 1
+  %tmp21049 = getelementptr inbounds float* %tmp21048, i64 1
+  %tmp21050 = getelementptr inbounds float* %tmp21049, i64 1
+  %tmp21051 = getelementptr inbounds float* %tmp21050, i64 1
+  %tmp21052 = getelementptr inbounds float* %tmp21051, i64 1
+  %tmp21053 = getelementptr inbounds float* %tmp21052, i64 1
+  %tmp21054 = getelementptr inbounds float* %tmp21053, i64 1
+  %tmp21055 = getelementptr inbounds float* %tmp21054, i64 1
+  %tmp21056 = getelementptr inbounds float* %tmp21055, i64 1
+  %tmp21057 = getelementptr inbounds float* %tmp21056, i64 1
+  %tmp21058 = getelementptr inbounds float* %tmp21057, i64 1
+  %tmp21059 = getelementptr inbounds float* %tmp21058, i64 1
+  %tmp21060 = getelementptr inbounds float* %tmp21059, i64 1
+  %tmp21061 = getelementptr inbounds float* %tmp21060, i64 1
+  %tmp21062 = getelementptr inbounds float* %tmp21061, i64 1
+  %tmp21063 = getelementptr inbounds float* %tmp21062, i64 1
+  %tmp21064 = getelementptr inbounds float* %tmp21063, i64 1
+  %tmp21065 = getelementptr inbounds float* %tmp21064, i64 1
+  %tmp21066 = getelementptr inbounds float* %tmp21065, i64 1
+  %tmp21067 = getelementptr inbounds float* %tmp21066, i64 1
+  %tmp21068 = getelementptr inbounds float* %tmp21067, i64 1
+  %tmp21069 = getelementptr inbounds float* %tmp21068, i64 1
+  %tmp21070 = getelementptr inbounds float* %tmp21069, i64 1
+  %tmp21071 = getelementptr inbounds float* %tmp21070, i64 1
+  %tmp21072 = getelementptr inbounds float* %tmp21071, i64 1
+  %tmp21073 = getelementptr inbounds float* %tmp21072, i64 1
+  %tmp21074 = getelementptr inbounds float* %tmp21073, i64 1
+  %tmp21075 = getelementptr inbounds float* %tmp21074, i64 1
+  %tmp21076 = getelementptr inbounds float* %tmp21075, i64 1
+  %tmp21077 = getelementptr inbounds float* %tmp21076, i64 1
+  %tmp21078 = getelementptr inbounds float* %tmp21077, i64 1
+  %tmp21079 = getelementptr inbounds float* %tmp21078, i64 1
+  %tmp21080 = getelementptr inbounds float* %tmp21079, i64 1
+  %tmp21081 = getelementptr inbounds float* %tmp21080, i64 1
+  %tmp21082 = getelementptr inbounds float* %tmp21081, i64 1
+  %tmp21083 = getelementptr inbounds float* %tmp21082, i64 1
+  %tmp21084 = getelementptr inbounds float* %tmp21083, i64 1
+  %tmp21085 = getelementptr inbounds float* %tmp21084, i64 1
+  %tmp21086 = getelementptr inbounds float* %tmp21085, i64 1
+  %tmp21087 = getelementptr inbounds float* %tmp21086, i64 1
+  %tmp21088 = getelementptr inbounds float* %tmp21087, i64 1
+  %tmp21089 = getelementptr inbounds float* %tmp21088, i64 1
+  %tmp21090 = getelementptr inbounds float* %tmp21089, i64 1
+  %tmp21091 = getelementptr inbounds float* %tmp21090, i64 1
+  %tmp21092 = getelementptr inbounds float* %tmp21091, i64 1
+  %tmp21093 = getelementptr inbounds float* %tmp21092, i64 1
+  %tmp21094 = getelementptr inbounds float* %tmp21093, i64 1
+  %tmp21095 = getelementptr inbounds float* %tmp21094, i64 1
+  %tmp21096 = getelementptr inbounds float* %tmp21095, i64 1
+  %tmp21097 = getelementptr inbounds float* %tmp21096, i64 1
+  %tmp21098 = getelementptr inbounds float* %tmp21097, i64 1
+  %tmp21099 = getelementptr inbounds float* %tmp21098, i64 1
+  %tmp21100 = getelementptr inbounds float* %tmp21099, i64 1
+  %tmp21101 = getelementptr inbounds float* %tmp21100, i64 1
+  %tmp21102 = getelementptr inbounds float* %tmp21101, i64 1
+  %tmp21103 = getelementptr inbounds float* %tmp21102, i64 1
+  %tmp21104 = getelementptr inbounds float* %tmp21103, i64 1
+  %tmp21105 = getelementptr inbounds float* %tmp21104, i64 1
+  %tmp21106 = getelementptr inbounds float* %tmp21105, i64 1
+  %tmp21107 = getelementptr inbounds float* %tmp21106, i64 1
+  %tmp21108 = getelementptr inbounds float* %tmp21107, i64 1
+  %tmp21109 = getelementptr inbounds float* %tmp21108, i64 1
+  %tmp21110 = getelementptr inbounds float* %tmp21109, i64 1
+  %tmp21111 = getelementptr inbounds float* %tmp21110, i64 1
+  %tmp21112 = getelementptr inbounds float* %tmp21111, i64 1
+  %tmp21113 = getelementptr inbounds float* %tmp21112, i64 1
+  %tmp21114 = getelementptr inbounds float* %tmp21113, i64 1
+  %tmp21115 = getelementptr inbounds float* %tmp21114, i64 1
+  %tmp21116 = getelementptr inbounds float* %tmp21115, i64 1
+  %tmp21117 = getelementptr inbounds float* %tmp21116, i64 1
+  %tmp21118 = getelementptr inbounds float* %tmp21117, i64 1
+  %tmp21119 = getelementptr inbounds float* %tmp21118, i64 1
+  %tmp21120 = getelementptr inbounds float* %tmp21119, i64 1
+  %tmp21121 = getelementptr inbounds float* %tmp21120, i64 1
+  %tmp21122 = getelementptr inbounds float* %tmp21121, i64 1
+  %tmp21123 = getelementptr inbounds float* %tmp21122, i64 1
+  %tmp21124 = getelementptr inbounds float* %tmp21123, i64 1
+  %tmp21125 = getelementptr inbounds float* %tmp21124, i64 1
+  %tmp21126 = getelementptr inbounds float* %tmp21125, i64 1
+  %tmp21127 = getelementptr inbounds float* %tmp21126, i64 1
+  %tmp21128 = getelementptr inbounds float* %tmp21127, i64 1
+  %tmp21129 = getelementptr inbounds float* %tmp21128, i64 1
+  %tmp21130 = getelementptr inbounds float* %tmp21129, i64 1
+  %tmp21131 = getelementptr inbounds float* %tmp21130, i64 1
+  %tmp21132 = getelementptr inbounds float* %tmp21131, i64 1
+  %tmp21133 = getelementptr inbounds float* %tmp21132, i64 1
+  %tmp21134 = getelementptr inbounds float* %tmp21133, i64 1
+  %tmp21135 = getelementptr inbounds float* %tmp21134, i64 1
+  %tmp21136 = getelementptr inbounds float* %tmp21135, i64 1
+  %tmp21137 = getelementptr inbounds float* %tmp21136, i64 1
+  %tmp21138 = getelementptr inbounds float* %tmp21137, i64 1
+  %tmp21139 = getelementptr inbounds float* %tmp21138, i64 1
+  %tmp21140 = getelementptr inbounds float* %tmp21139, i64 1
+  %tmp21141 = getelementptr inbounds float* %tmp21140, i64 1
+  %tmp21142 = getelementptr inbounds float* %tmp21141, i64 1
+  %tmp21143 = getelementptr inbounds float* %tmp21142, i64 1
+  %tmp21144 = getelementptr inbounds float* %tmp21143, i64 1
+  %tmp21145 = getelementptr inbounds float* %tmp21144, i64 1
+  %tmp21146 = getelementptr inbounds float* %tmp21145, i64 1
+  %tmp21147 = getelementptr inbounds float* %tmp21146, i64 1
+  %tmp21148 = getelementptr inbounds float* %tmp21147, i64 1
+  %tmp21149 = getelementptr inbounds float* %tmp21148, i64 1
+  %tmp21150 = getelementptr inbounds float* %tmp21149, i64 1
+  %tmp21151 = getelementptr inbounds float* %tmp21150, i64 1
+  %tmp21152 = getelementptr inbounds float* %tmp21151, i64 1
+  %tmp21153 = getelementptr inbounds float* %tmp21152, i64 1
+  %tmp21154 = getelementptr inbounds float* %tmp21153, i64 1
+  %tmp21155 = getelementptr inbounds float* %tmp21154, i64 1
+  %tmp21156 = getelementptr inbounds float* %tmp21155, i64 1
+  %tmp21157 = getelementptr inbounds float* %tmp21156, i64 1
+  %tmp21158 = getelementptr inbounds float* %tmp21157, i64 1
+  %tmp21159 = getelementptr inbounds float* %tmp21158, i64 1
+  %tmp21160 = getelementptr inbounds float* %tmp21159, i64 1
+  %tmp21161 = getelementptr inbounds float* %tmp21160, i64 1
+  %tmp21162 = getelementptr inbounds float* %tmp21161, i64 1
+  %tmp21163 = getelementptr inbounds float* %tmp21162, i64 1
+  %tmp21164 = getelementptr inbounds float* %tmp21163, i64 1
+  %tmp21165 = getelementptr inbounds float* %tmp21164, i64 1
+  %tmp21166 = getelementptr inbounds float* %tmp21165, i64 1
+  %tmp21167 = getelementptr inbounds float* %tmp21166, i64 1
+  %tmp21168 = getelementptr inbounds float* %tmp21167, i64 1
+  %tmp21169 = getelementptr inbounds float* %tmp21168, i64 1
+  %tmp21170 = getelementptr inbounds float* %tmp21169, i64 1
+  %tmp21171 = getelementptr inbounds float* %tmp21170, i64 1
+  %tmp21172 = getelementptr inbounds float* %tmp21171, i64 1
+  %tmp21173 = getelementptr inbounds float* %tmp21172, i64 1
+  %tmp21174 = getelementptr inbounds float* %tmp21173, i64 1
+  %tmp21175 = getelementptr inbounds float* %tmp21174, i64 1
+  %tmp21176 = getelementptr inbounds float* %tmp21175, i64 1
+  %tmp21177 = getelementptr inbounds float* %tmp21176, i64 1
+  %tmp21178 = getelementptr inbounds float* %tmp21177, i64 1
+  %tmp21179 = getelementptr inbounds float* %tmp21178, i64 1
+  %tmp21180 = getelementptr inbounds float* %tmp21179, i64 1
+  %tmp21181 = getelementptr inbounds float* %tmp21180, i64 1
+  %tmp21182 = getelementptr inbounds float* %tmp21181, i64 1
+  %tmp21183 = getelementptr inbounds float* %tmp21182, i64 1
+  %tmp21184 = getelementptr inbounds float* %tmp21183, i64 1
+  %tmp21185 = getelementptr inbounds float* %tmp21184, i64 1
+  %tmp21186 = getelementptr inbounds float* %tmp21185, i64 1
+  %tmp21187 = getelementptr inbounds float* %tmp21186, i64 1
+  %tmp21188 = getelementptr inbounds float* %tmp21187, i64 1
+  %tmp21189 = getelementptr inbounds float* %tmp21188, i64 1
+  %tmp21190 = getelementptr inbounds float* %tmp21189, i64 1
+  %tmp21191 = getelementptr inbounds float* %tmp21190, i64 1
+  %tmp21192 = getelementptr inbounds float* %tmp21191, i64 1
+  %tmp21193 = getelementptr inbounds float* %tmp21192, i64 1
+  %tmp21194 = getelementptr inbounds float* %tmp21193, i64 1
+  %tmp21195 = getelementptr inbounds float* %tmp21194, i64 1
+  %tmp21196 = getelementptr inbounds float* %tmp21195, i64 1
+  %tmp21197 = getelementptr inbounds float* %tmp21196, i64 1
+  %tmp21198 = getelementptr inbounds float* %tmp21197, i64 1
+  %tmp21199 = getelementptr inbounds float* %tmp21198, i64 1
+  %tmp21200 = getelementptr inbounds float* %tmp21199, i64 1
+  %tmp21201 = getelementptr inbounds float* %tmp21200, i64 1
+  %tmp21202 = getelementptr inbounds float* %tmp21201, i64 1
+  %tmp21203 = getelementptr inbounds float* %tmp21202, i64 1
+  %tmp21204 = getelementptr inbounds float* %tmp21203, i64 1
+  %tmp21205 = getelementptr inbounds float* %tmp21204, i64 1
+  %tmp21206 = getelementptr inbounds float* %tmp21205, i64 1
+  %tmp21207 = getelementptr inbounds float* %tmp21206, i64 1
+  %tmp21208 = getelementptr inbounds float* %tmp21207, i64 1
+  %tmp21209 = getelementptr inbounds float* %tmp21208, i64 1
+  %tmp21210 = getelementptr inbounds float* %tmp21209, i64 1
+  %tmp21211 = getelementptr inbounds float* %tmp21210, i64 1
+  %tmp21212 = getelementptr inbounds float* %tmp21211, i64 1
+  %tmp21213 = getelementptr inbounds float* %tmp21212, i64 1
+  %tmp21214 = getelementptr inbounds float* %tmp21213, i64 1
+  %tmp21215 = getelementptr inbounds float* %tmp21214, i64 1
+  %tmp21216 = getelementptr inbounds float* %tmp21215, i64 1
+  %tmp21217 = getelementptr inbounds float* %tmp21216, i64 1
+  %tmp21218 = getelementptr inbounds float* %tmp21217, i64 1
+  %tmp21219 = getelementptr inbounds float* %tmp21218, i64 1
+  %tmp21220 = getelementptr inbounds float* %tmp21219, i64 1
+  %tmp21221 = getelementptr inbounds float* %tmp21220, i64 1
+  %tmp21222 = getelementptr inbounds float* %tmp21221, i64 1
+  %tmp21223 = getelementptr inbounds float* %tmp21222, i64 1
+  %tmp21224 = getelementptr inbounds float* %tmp21223, i64 1
+  %tmp21225 = getelementptr inbounds float* %tmp21224, i64 1
+  %tmp21226 = getelementptr inbounds float* %tmp21225, i64 1
+  %tmp21227 = getelementptr inbounds float* %tmp21226, i64 1
+  %tmp21228 = getelementptr inbounds float* %tmp21227, i64 1
+  %tmp21229 = getelementptr inbounds float* %tmp21228, i64 1
+  %tmp21230 = getelementptr inbounds float* %tmp21229, i64 1
+  %tmp21231 = getelementptr inbounds float* %tmp21230, i64 1
+  %tmp21232 = getelementptr inbounds float* %tmp21231, i64 1
+  %tmp21233 = getelementptr inbounds float* %tmp21232, i64 1
+  %tmp21234 = getelementptr inbounds float* %tmp21233, i64 1
+  %tmp21235 = getelementptr inbounds float* %tmp21234, i64 1
+  %tmp21236 = getelementptr inbounds float* %tmp21235, i64 1
+  %tmp21237 = getelementptr inbounds float* %tmp21236, i64 1
+  %tmp21238 = getelementptr inbounds float* %tmp21237, i64 1
+  %tmp21239 = getelementptr inbounds float* %tmp21238, i64 1
+  %tmp21240 = getelementptr inbounds float* %tmp21239, i64 1
+  %tmp21241 = getelementptr inbounds float* %tmp21240, i64 1
+  %tmp21242 = getelementptr inbounds float* %tmp21241, i64 1
+  %tmp21243 = getelementptr inbounds float* %tmp21242, i64 1
+  %tmp21244 = getelementptr inbounds float* %tmp21243, i64 1
+  %tmp21245 = getelementptr inbounds float* %tmp21244, i64 1
+  %tmp21246 = getelementptr inbounds float* %tmp21245, i64 1
+  %tmp21247 = getelementptr inbounds float* %tmp21246, i64 1
+  %tmp21248 = getelementptr inbounds float* %tmp21247, i64 1
+  %tmp21249 = getelementptr inbounds float* %tmp21248, i64 1
+  %tmp21250 = getelementptr inbounds float* %tmp21249, i64 1
+  %tmp21251 = getelementptr inbounds float* %tmp21250, i64 1
+  %tmp21252 = getelementptr inbounds float* %tmp21251, i64 1
+  %tmp21253 = getelementptr inbounds float* %tmp21252, i64 1
+  %tmp21254 = getelementptr inbounds float* %tmp21253, i64 1
+  %tmp21255 = getelementptr inbounds float* %tmp21254, i64 1
+  %tmp21256 = getelementptr inbounds float* %tmp21255, i64 1
+  %tmp21257 = getelementptr inbounds float* %tmp21256, i64 1
+  %tmp21258 = getelementptr inbounds float* %tmp21257, i64 1
+  %tmp21259 = getelementptr inbounds float* %tmp21258, i64 1
+  %tmp21260 = getelementptr inbounds float* %tmp21259, i64 1
+  %tmp21261 = getelementptr inbounds float* %tmp21260, i64 1
+  %tmp21262 = getelementptr inbounds float* %tmp21261, i64 1
+  %tmp21263 = getelementptr inbounds float* %tmp21262, i64 1
+  %tmp21264 = getelementptr inbounds float* %tmp21263, i64 1
+  %tmp21265 = getelementptr inbounds float* %tmp21264, i64 1
+  %tmp21266 = getelementptr inbounds float* %tmp21265, i64 1
+  %tmp21267 = getelementptr inbounds float* %tmp21266, i64 1
+  %tmp21268 = getelementptr inbounds float* %tmp21267, i64 1
+  %tmp21269 = getelementptr inbounds float* %tmp21268, i64 1
+  %tmp21270 = getelementptr inbounds float* %tmp21269, i64 1
+  %tmp21271 = getelementptr inbounds float* %tmp21270, i64 1
+  %tmp21272 = getelementptr inbounds float* %tmp21271, i64 1
+  %tmp21273 = getelementptr inbounds float* %tmp21272, i64 1
+  %tmp21274 = getelementptr inbounds float* %tmp21273, i64 1
+  %tmp21275 = getelementptr inbounds float* %tmp21274, i64 1
+  %tmp21276 = getelementptr inbounds float* %tmp21275, i64 1
+  %tmp21277 = getelementptr inbounds float* %tmp21276, i64 1
+  %tmp21278 = getelementptr inbounds float* %tmp21277, i64 1
+  %tmp21279 = getelementptr inbounds float* %tmp21278, i64 1
+  %tmp21280 = getelementptr inbounds float* %tmp21279, i64 1
+  %tmp21281 = getelementptr inbounds float* %tmp21280, i64 1
+  %tmp21282 = getelementptr inbounds float* %tmp21281, i64 1
+  %tmp21283 = getelementptr inbounds float* %tmp21282, i64 1
+  %tmp21284 = getelementptr inbounds float* %tmp21283, i64 1
+  %tmp21285 = getelementptr inbounds float* %tmp21284, i64 1
+  %tmp21286 = getelementptr inbounds float* %tmp21285, i64 1
+  %tmp21287 = getelementptr inbounds float* %tmp21286, i64 1
+  %tmp21288 = getelementptr inbounds float* %tmp21287, i64 1
+  %tmp21289 = getelementptr inbounds float* %tmp21288, i64 1
+  %tmp21290 = getelementptr inbounds float* %tmp21289, i64 1
+  %tmp21291 = getelementptr inbounds float* %tmp21290, i64 1
+  %tmp21292 = getelementptr inbounds float* %tmp21291, i64 1
+  %tmp21293 = getelementptr inbounds float* %tmp21292, i64 1
+  %tmp21294 = getelementptr inbounds float* %tmp21293, i64 1
+  %tmp21295 = getelementptr inbounds float* %tmp21294, i64 1
+  %tmp21296 = getelementptr inbounds float* %tmp21295, i64 1
+  %tmp21297 = getelementptr inbounds float* %tmp21296, i64 1
+  %tmp21298 = getelementptr inbounds float* %tmp21297, i64 1
+  %tmp21299 = getelementptr inbounds float* %tmp21298, i64 1
+  %tmp21300 = getelementptr inbounds float* %tmp21299, i64 1
+  %tmp21301 = getelementptr inbounds float* %tmp21300, i64 1
+  %tmp21302 = getelementptr inbounds float* %tmp21301, i64 1
+  %tmp21303 = getelementptr inbounds float* %tmp21302, i64 1
+  %tmp21304 = getelementptr inbounds float* %tmp21303, i64 1
+  %tmp21305 = getelementptr inbounds float* %tmp21304, i64 1
+  %tmp21306 = getelementptr inbounds float* %tmp21305, i64 1
+  %tmp21307 = getelementptr inbounds float* %tmp21306, i64 1
+  %tmp21308 = getelementptr inbounds float* %tmp21307, i64 1
+  %tmp21309 = getelementptr inbounds float* %tmp21308, i64 1
+  %tmp21310 = getelementptr inbounds float* %tmp21309, i64 1
+  %tmp21311 = getelementptr inbounds float* %tmp21310, i64 1
+  %tmp21312 = getelementptr inbounds float* %tmp21311, i64 1
+  %tmp21313 = getelementptr inbounds float* %tmp21312, i64 1
+  %tmp21314 = getelementptr inbounds float* %tmp21313, i64 1
+  %tmp21315 = getelementptr inbounds float* %tmp21314, i64 1
+  %tmp21316 = getelementptr inbounds float* %tmp21315, i64 1
+  %tmp21317 = getelementptr inbounds float* %tmp21316, i64 1
+  %tmp21318 = getelementptr inbounds float* %tmp21317, i64 1
+  %tmp21319 = getelementptr inbounds float* %tmp21318, i64 1
+  %tmp21320 = getelementptr inbounds float* %tmp21319, i64 1
+  %tmp21321 = getelementptr inbounds float* %tmp21320, i64 1
+  %tmp21322 = getelementptr inbounds float* %tmp21321, i64 1
+  %tmp21323 = getelementptr inbounds float* %tmp21322, i64 1
+  %tmp21324 = getelementptr inbounds float* %tmp21323, i64 1
+  %tmp21325 = getelementptr inbounds float* %tmp21324, i64 1
+  %tmp21326 = getelementptr inbounds float* %tmp21325, i64 1
+  %tmp21327 = getelementptr inbounds float* %tmp21326, i64 1
+  %tmp21328 = getelementptr inbounds float* %tmp21327, i64 1
+  %tmp21329 = getelementptr inbounds float* %tmp21328, i64 1
+  %tmp21330 = getelementptr inbounds float* %tmp21329, i64 1
+  %tmp21331 = getelementptr inbounds float* %tmp21330, i64 1
+  %tmp21332 = getelementptr inbounds float* %tmp21331, i64 1
+  %tmp21333 = getelementptr inbounds float* %tmp21332, i64 1
+  %tmp21334 = getelementptr inbounds float* %tmp21333, i64 1
+  %tmp21335 = getelementptr inbounds float* %tmp21334, i64 1
+  %tmp21336 = getelementptr inbounds float* %tmp21335, i64 1
+  %tmp21337 = getelementptr inbounds float* %tmp21336, i64 1
+  %tmp21338 = getelementptr inbounds float* %tmp21337, i64 1
+  %tmp21339 = getelementptr inbounds float* %tmp21338, i64 1
+  %tmp21340 = getelementptr inbounds float* %tmp21339, i64 1
+  %tmp21341 = getelementptr inbounds float* %tmp21340, i64 1
+  %tmp21342 = getelementptr inbounds float* %tmp21341, i64 1
+  %tmp21343 = getelementptr inbounds float* %tmp21342, i64 1
+  %tmp21344 = getelementptr inbounds float* %tmp21343, i64 1
+  %tmp21345 = getelementptr inbounds float* %tmp21344, i64 1
+  %tmp21346 = getelementptr inbounds float* %tmp21345, i64 1
+  %tmp21347 = getelementptr inbounds float* %tmp21346, i64 1
+  %tmp21348 = getelementptr inbounds float* %tmp21347, i64 1
+  %tmp21349 = getelementptr inbounds float* %tmp21348, i64 1
+  %tmp21350 = getelementptr inbounds float* %tmp21349, i64 1
+  %tmp21351 = getelementptr inbounds float* %tmp21350, i64 1
+  %tmp21352 = getelementptr inbounds float* %tmp21351, i64 1
+  %tmp21353 = getelementptr inbounds float* %tmp21352, i64 1
+  %tmp21354 = getelementptr inbounds float* %tmp21353, i64 1
+  %tmp21355 = getelementptr inbounds float* %tmp21354, i64 1
+  %tmp21356 = getelementptr inbounds float* %tmp21355, i64 1
+  %tmp21357 = getelementptr inbounds float* %tmp21356, i64 1
+  %tmp21358 = getelementptr inbounds float* %tmp21357, i64 1
+  %tmp21359 = getelementptr inbounds float* %tmp21358, i64 1
+  %tmp21360 = getelementptr inbounds float* %tmp21359, i64 1
+  %tmp21361 = getelementptr inbounds float* %tmp21360, i64 1
+  %tmp21362 = getelementptr inbounds float* %tmp21361, i64 1
+  %tmp21363 = getelementptr inbounds float* %tmp21362, i64 1
+  %tmp21364 = getelementptr inbounds float* %tmp21363, i64 1
+  %tmp21365 = getelementptr inbounds float* %tmp21364, i64 1
+  %tmp21366 = getelementptr inbounds float* %tmp21365, i64 1
+  %tmp21367 = getelementptr inbounds float* %tmp21366, i64 1
+  %tmp21368 = getelementptr inbounds float* %tmp21367, i64 1
+  %tmp21369 = getelementptr inbounds float* %tmp21368, i64 1
+  %tmp21370 = getelementptr inbounds float* %tmp21369, i64 1
+  %tmp21371 = getelementptr inbounds float* %tmp21370, i64 1
+  %tmp21372 = getelementptr inbounds float* %tmp21371, i64 1
+  %tmp21373 = getelementptr inbounds float* %tmp21372, i64 1
+  %tmp21374 = getelementptr inbounds float* %tmp21373, i64 1
+  %tmp21375 = getelementptr inbounds float* %tmp21374, i64 1
+  %tmp21376 = getelementptr inbounds float* %tmp21375, i64 1
+  %tmp21377 = getelementptr inbounds float* %tmp21376, i64 1
+  %tmp21378 = getelementptr inbounds float* %tmp21377, i64 1
+  %tmp21379 = getelementptr inbounds float* %tmp21378, i64 1
+  %tmp21380 = getelementptr inbounds float* %tmp21379, i64 1
+  %tmp21381 = getelementptr inbounds float* %tmp21380, i64 1
+  %tmp21382 = getelementptr inbounds float* %tmp21381, i64 1
+  %tmp21383 = getelementptr inbounds float* %tmp21382, i64 1
+  %tmp21384 = getelementptr inbounds float* %tmp21383, i64 1
+  %tmp21385 = getelementptr inbounds float* %tmp21384, i64 1
+  %tmp21386 = getelementptr inbounds float* %tmp21385, i64 1
+  %tmp21387 = getelementptr inbounds float* %tmp21386, i64 1
+  %tmp21388 = getelementptr inbounds float* %tmp21387, i64 1
+  %tmp21389 = getelementptr inbounds float* %tmp21388, i64 1
+  %tmp21390 = getelementptr inbounds float* %tmp21389, i64 1
+  %tmp21391 = getelementptr inbounds float* %tmp21390, i64 1
+  %tmp21392 = getelementptr inbounds float* %tmp21391, i64 1
+  %tmp21393 = getelementptr inbounds float* %tmp21392, i64 1
+  %tmp21394 = getelementptr inbounds float* %tmp21393, i64 1
+  %tmp21395 = getelementptr inbounds float* %tmp21394, i64 1
+  %tmp21396 = getelementptr inbounds float* %tmp21395, i64 1
+  %tmp21397 = getelementptr inbounds float* %tmp21396, i64 1
+  %tmp21398 = getelementptr inbounds float* %tmp21397, i64 1
+  %tmp21399 = getelementptr inbounds float* %tmp21398, i64 1
+  %tmp21400 = getelementptr inbounds float* %tmp21399, i64 1
+  %tmp21401 = getelementptr inbounds float* %tmp21400, i64 1
+  %tmp21402 = getelementptr inbounds float* %tmp21401, i64 1
+  %tmp21403 = getelementptr inbounds float* %tmp21402, i64 1
+  %tmp21404 = getelementptr inbounds float* %tmp21403, i64 1
+  %tmp21405 = getelementptr inbounds float* %tmp21404, i64 1
+  %tmp21406 = getelementptr inbounds float* %tmp21405, i64 1
+  %tmp21407 = getelementptr inbounds float* %tmp21406, i64 1
+  %tmp21408 = getelementptr inbounds float* %tmp21407, i64 1
+  %tmp21409 = getelementptr inbounds float* %tmp21408, i64 1
+  %tmp21410 = getelementptr inbounds float* %tmp21409, i64 1
+  %tmp21411 = getelementptr inbounds float* %tmp21410, i64 1
+  %tmp21412 = getelementptr inbounds float* %tmp21411, i64 1
+  %tmp21413 = getelementptr inbounds float* %tmp21412, i64 1
+  %tmp21414 = getelementptr inbounds float* %tmp21413, i64 1
+  %tmp21415 = getelementptr inbounds float* %tmp21414, i64 1
+  %tmp21416 = getelementptr inbounds float* %tmp21415, i64 1
+  %tmp21417 = getelementptr inbounds float* %tmp21416, i64 1
+  %tmp21418 = getelementptr inbounds float* %tmp21417, i64 1
+  %tmp21419 = getelementptr inbounds float* %tmp21418, i64 1
+  %tmp21420 = getelementptr inbounds float* %tmp21419, i64 1
+  %tmp21421 = getelementptr inbounds float* %tmp21420, i64 1
+  %tmp21422 = getelementptr inbounds float* %tmp21421, i64 1
+  %tmp21423 = getelementptr inbounds float* %tmp21422, i64 1
+  %tmp21424 = getelementptr inbounds float* %tmp21423, i64 1
+  %tmp21425 = getelementptr inbounds float* %tmp21424, i64 1
+  %tmp21426 = getelementptr inbounds float* %tmp21425, i64 1
+  %tmp21427 = getelementptr inbounds float* %tmp21426, i64 1
+  %tmp21428 = getelementptr inbounds float* %tmp21427, i64 1
+  %tmp21429 = getelementptr inbounds float* %tmp21428, i64 1
+  %tmp21430 = getelementptr inbounds float* %tmp21429, i64 1
+  %tmp21431 = getelementptr inbounds float* %tmp21430, i64 1
+  %tmp21432 = getelementptr inbounds float* %tmp21431, i64 1
+  %tmp21433 = getelementptr inbounds float* %tmp21432, i64 1
+  %tmp21434 = getelementptr inbounds float* %tmp21433, i64 1
+  %tmp21435 = getelementptr inbounds float* %tmp21434, i64 1
+  %tmp21436 = getelementptr inbounds float* %tmp21435, i64 1
+  %tmp21437 = getelementptr inbounds float* %tmp21436, i64 1
+  %tmp21438 = getelementptr inbounds float* %tmp21437, i64 1
+  %tmp21439 = getelementptr inbounds float* %tmp21438, i64 1
+  %tmp21440 = getelementptr inbounds float* %tmp21439, i64 1
+  %tmp21441 = getelementptr inbounds float* %tmp21440, i64 1
+  %tmp21442 = getelementptr inbounds float* %tmp21441, i64 1
+  %tmp21443 = getelementptr inbounds float* %tmp21442, i64 1
+  %tmp21444 = getelementptr inbounds float* %tmp21443, i64 1
+  %tmp21445 = getelementptr inbounds float* %tmp21444, i64 1
+  %tmp21446 = getelementptr inbounds float* %tmp21445, i64 1
+  %tmp21447 = getelementptr inbounds float* %tmp21446, i64 1
+  %tmp21448 = getelementptr inbounds float* %tmp21447, i64 1
+  %tmp21449 = getelementptr inbounds float* %tmp21448, i64 1
+  %tmp21450 = getelementptr inbounds float* %tmp21449, i64 1
+  %tmp21451 = getelementptr inbounds float* %tmp21450, i64 1
+  %tmp21452 = getelementptr inbounds float* %tmp21451, i64 1
+  %tmp21453 = getelementptr inbounds float* %tmp21452, i64 1
+  %tmp21454 = getelementptr inbounds float* %tmp21453, i64 1
+  %tmp21455 = getelementptr inbounds float* %tmp21454, i64 1
+  %tmp21456 = getelementptr inbounds float* %tmp21455, i64 1
+  %tmp21457 = getelementptr inbounds float* %tmp21456, i64 1
+  %tmp21458 = getelementptr inbounds float* %tmp21457, i64 1
+  %tmp21459 = getelementptr inbounds float* %tmp21458, i64 1
+  %tmp21460 = getelementptr inbounds float* %tmp21459, i64 1
+  %tmp21461 = getelementptr inbounds float* %tmp21460, i64 1
+  %tmp21462 = getelementptr inbounds float* %tmp21461, i64 1
+  %tmp21463 = getelementptr inbounds float* %tmp21462, i64 1
+  %tmp21464 = getelementptr inbounds float* %tmp21463, i64 1
+  %tmp21465 = getelementptr inbounds float* %tmp21464, i64 1
+  %tmp21466 = getelementptr inbounds float* %tmp21465, i64 1
+  %tmp21467 = getelementptr inbounds float* %tmp21466, i64 1
+  %tmp21468 = getelementptr inbounds float* %tmp21467, i64 1
+  %tmp21469 = getelementptr inbounds float* %tmp21468, i64 1
+  %tmp21470 = getelementptr inbounds float* %tmp21469, i64 1
+  %tmp21471 = getelementptr inbounds float* %tmp21470, i64 1
+  %tmp21472 = getelementptr inbounds float* %tmp21471, i64 1
+  %tmp21473 = getelementptr inbounds float* %tmp21472, i64 1
+  %tmp21474 = getelementptr inbounds float* %tmp21473, i64 1
+  %tmp21475 = getelementptr inbounds float* %tmp21474, i64 1
+  %tmp21476 = getelementptr inbounds float* %tmp21475, i64 1
+  %tmp21477 = getelementptr inbounds float* %tmp21476, i64 1
+  %tmp21478 = getelementptr inbounds float* %tmp21477, i64 1
+  %tmp21479 = getelementptr inbounds float* %tmp21478, i64 1
+  %tmp21480 = getelementptr inbounds float* %tmp21479, i64 1
+  %tmp21481 = getelementptr inbounds float* %tmp21480, i64 1
+  %tmp21482 = getelementptr inbounds float* %tmp21481, i64 1
+  %tmp21483 = getelementptr inbounds float* %tmp21482, i64 1
+  %tmp21484 = getelementptr inbounds float* %tmp21483, i64 1
+  %tmp21485 = getelementptr inbounds float* %tmp21484, i64 1
+  %tmp21486 = getelementptr inbounds float* %tmp21485, i64 1
+  %tmp21487 = getelementptr inbounds float* %tmp21486, i64 1
+  %tmp21488 = getelementptr inbounds float* %tmp21487, i64 1
+  %tmp21489 = getelementptr inbounds float* %tmp21488, i64 1
+  %tmp21490 = getelementptr inbounds float* %tmp21489, i64 1
+  %tmp21491 = getelementptr inbounds float* %tmp21490, i64 1
+  %tmp21492 = getelementptr inbounds float* %tmp21491, i64 1
+  %tmp21493 = getelementptr inbounds float* %tmp21492, i64 1
+  %tmp21494 = getelementptr inbounds float* %tmp21493, i64 1
+  %tmp21495 = getelementptr inbounds float* %tmp21494, i64 1
+  %tmp21496 = getelementptr inbounds float* %tmp21495, i64 1
+  %tmp21497 = getelementptr inbounds float* %tmp21496, i64 1
+  %tmp21498 = getelementptr inbounds float* %tmp21497, i64 1
+  %tmp21499 = getelementptr inbounds float* %tmp21498, i64 1
+  %tmp21500 = getelementptr inbounds float* %tmp21499, i64 1
+  %tmp21501 = getelementptr inbounds float* %tmp21500, i64 1
+  %tmp21502 = getelementptr inbounds float* %tmp21501, i64 1
+  %tmp21503 = getelementptr inbounds float* %tmp21502, i64 1
+  %tmp21504 = getelementptr inbounds float* %tmp21503, i64 1
+  %tmp21505 = getelementptr inbounds float* %tmp21504, i64 1
+  %tmp21506 = getelementptr inbounds float* %tmp21505, i64 1
+  %tmp21507 = getelementptr inbounds float* %tmp21506, i64 1
+  %tmp21508 = getelementptr inbounds float* %tmp21507, i64 1
+  %tmp21509 = getelementptr inbounds float* %tmp21508, i64 1
+  %tmp21510 = getelementptr inbounds float* %tmp21509, i64 1
+  %tmp21511 = getelementptr inbounds float* %tmp21510, i64 1
+  %tmp21512 = getelementptr inbounds float* %tmp21511, i64 1
+  %tmp21513 = getelementptr inbounds float* %tmp21512, i64 1
+  %tmp21514 = getelementptr inbounds float* %tmp21513, i64 1
+  %tmp21515 = getelementptr inbounds float* %tmp21514, i64 1
+  %tmp21516 = getelementptr inbounds float* %tmp21515, i64 1
+  %tmp21517 = getelementptr inbounds float* %tmp21516, i64 1
+  %tmp21518 = getelementptr inbounds float* %tmp21517, i64 1
+  %tmp21519 = getelementptr inbounds float* %tmp21518, i64 1
+  %tmp21520 = getelementptr inbounds float* %tmp21519, i64 1
+  %tmp21521 = getelementptr inbounds float* %tmp21520, i64 1
+  %tmp21522 = getelementptr inbounds float* %tmp21521, i64 1
+  %tmp21523 = getelementptr inbounds float* %tmp21522, i64 1
+  %tmp21524 = getelementptr inbounds float* %tmp21523, i64 1
+  %tmp21525 = getelementptr inbounds float* %tmp21524, i64 1
+  %tmp21526 = getelementptr inbounds float* %tmp21525, i64 1
+  %tmp21527 = getelementptr inbounds float* %tmp21526, i64 1
+  %tmp21528 = getelementptr inbounds float* %tmp21527, i64 1
+  %tmp21529 = getelementptr inbounds float* %tmp21528, i64 1
+  %tmp21530 = getelementptr inbounds float* %tmp21529, i64 1
+  %tmp21531 = getelementptr inbounds float* %tmp21530, i64 1
+  %tmp21532 = getelementptr inbounds float* %tmp21531, i64 1
+  %tmp21533 = getelementptr inbounds float* %tmp21532, i64 1
+  %tmp21534 = getelementptr inbounds float* %tmp21533, i64 1
+  %tmp21535 = getelementptr inbounds float* %tmp21534, i64 1
+  %tmp21536 = getelementptr inbounds float* %tmp21535, i64 1
+  %tmp21537 = getelementptr inbounds float* %tmp21536, i64 1
+  %tmp21538 = getelementptr inbounds float* %tmp21537, i64 1
+  %tmp21539 = getelementptr inbounds float* %tmp21538, i64 1
+  %tmp21540 = getelementptr inbounds float* %tmp21539, i64 1
+  %tmp21541 = getelementptr inbounds float* %tmp21540, i64 1
+  %tmp21542 = getelementptr inbounds float* %tmp21541, i64 1
+  %tmp21543 = getelementptr inbounds float* %tmp21542, i64 1
+  %tmp21544 = getelementptr inbounds float* %tmp21543, i64 1
+  %tmp21545 = getelementptr inbounds float* %tmp21544, i64 1
+  %tmp21546 = getelementptr inbounds float* %tmp21545, i64 1
+  %tmp21547 = getelementptr inbounds float* %tmp21546, i64 1
+  %tmp21548 = getelementptr inbounds float* %tmp21547, i64 1
+  %tmp21549 = getelementptr inbounds float* %tmp21548, i64 1
+  %tmp21550 = getelementptr inbounds float* %tmp21549, i64 1
+  %tmp21551 = getelementptr inbounds float* %tmp21550, i64 1
+  %tmp21552 = getelementptr inbounds float* %tmp21551, i64 1
+  %tmp21553 = getelementptr inbounds float* %tmp21552, i64 1
+  %tmp21554 = getelementptr inbounds float* %tmp21553, i64 1
+  %tmp21555 = getelementptr inbounds float* %tmp21554, i64 1
+  %tmp21556 = getelementptr inbounds float* %tmp21555, i64 1
+  %tmp21557 = getelementptr inbounds float* %tmp21556, i64 1
+  %tmp21558 = getelementptr inbounds float* %tmp21557, i64 1
+  %tmp21559 = getelementptr inbounds float* %tmp21558, i64 1
+  %tmp21560 = getelementptr inbounds float* %tmp21559, i64 1
+  %tmp21561 = getelementptr inbounds float* %tmp21560, i64 1
+  %tmp21562 = getelementptr inbounds float* %tmp21561, i64 1
+  %tmp21563 = getelementptr inbounds float* %tmp21562, i64 1
+  %tmp21564 = getelementptr inbounds float* %tmp21563, i64 1
+  %tmp21565 = getelementptr inbounds float* %tmp21564, i64 1
+  %tmp21566 = getelementptr inbounds float* %tmp21565, i64 1
+  %tmp21567 = getelementptr inbounds float* %tmp21566, i64 1
+  %tmp21568 = getelementptr inbounds float* %tmp21567, i64 1
+  %tmp21569 = getelementptr inbounds float* %tmp21568, i64 1
+  %tmp21570 = getelementptr inbounds float* %tmp21569, i64 1
+  %tmp21571 = getelementptr inbounds float* %tmp21570, i64 1
+  %tmp21572 = getelementptr inbounds float* %tmp21571, i64 1
+  %tmp21573 = getelementptr inbounds float* %tmp21572, i64 1
+  %tmp21574 = getelementptr inbounds float* %tmp21573, i64 1
+  %tmp21575 = getelementptr inbounds float* %tmp21574, i64 1
+  %tmp21576 = getelementptr inbounds float* %tmp21575, i64 1
+  %tmp21577 = getelementptr inbounds float* %tmp21576, i64 1
+  %tmp21578 = getelementptr inbounds float* %tmp21577, i64 1
+  %tmp21579 = getelementptr inbounds float* %tmp21578, i64 1
+  %tmp21580 = getelementptr inbounds float* %tmp21579, i64 1
+  %tmp21581 = getelementptr inbounds float* %tmp21580, i64 1
+  %tmp21582 = getelementptr inbounds float* %tmp21581, i64 1
+  %tmp21583 = getelementptr inbounds float* %tmp21582, i64 1
+  %tmp21584 = getelementptr inbounds float* %tmp21583, i64 1
+  %tmp21585 = getelementptr inbounds float* %tmp21584, i64 1
+  %tmp21586 = getelementptr inbounds float* %tmp21585, i64 1
+  %tmp21587 = getelementptr inbounds float* %tmp21586, i64 1
+  %tmp21588 = getelementptr inbounds float* %tmp21587, i64 1
+  %tmp21589 = getelementptr inbounds float* %tmp21588, i64 1
+  %tmp21590 = getelementptr inbounds float* %tmp21589, i64 1
+  %tmp21591 = getelementptr inbounds float* %tmp21590, i64 1
+  %tmp21592 = getelementptr inbounds float* %tmp21591, i64 1
+  %tmp21593 = getelementptr inbounds float* %tmp21592, i64 1
+  %tmp21594 = getelementptr inbounds float* %tmp21593, i64 1
+  %tmp21595 = getelementptr inbounds float* %tmp21594, i64 1
+  %tmp21596 = getelementptr inbounds float* %tmp21595, i64 1
+  %tmp21597 = getelementptr inbounds float* %tmp21596, i64 1
+  %tmp21598 = getelementptr inbounds float* %tmp21597, i64 1
+  %tmp21599 = getelementptr inbounds float* %tmp21598, i64 1
+  %tmp21600 = getelementptr inbounds float* %tmp21599, i64 1
+  %tmp21601 = getelementptr inbounds float* %tmp21600, i64 1
+  %tmp21602 = getelementptr inbounds float* %tmp21601, i64 1
+  %tmp21603 = getelementptr inbounds float* %tmp21602, i64 1
+  %tmp21604 = getelementptr inbounds float* %tmp21603, i64 1
+  %tmp21605 = getelementptr inbounds float* %tmp21604, i64 1
+  %tmp21606 = getelementptr inbounds float* %tmp21605, i64 1
+  %tmp21607 = getelementptr inbounds float* %tmp21606, i64 1
+  %tmp21608 = getelementptr inbounds float* %tmp21607, i64 1
+  %tmp21609 = getelementptr inbounds float* %tmp21608, i64 1
+  %tmp21610 = getelementptr inbounds float* %tmp21609, i64 1
+  %tmp21611 = getelementptr inbounds float* %tmp21610, i64 1
+  %tmp21612 = getelementptr inbounds float* %tmp21611, i64 1
+  %tmp21613 = getelementptr inbounds float* %tmp21612, i64 1
+  %tmp21614 = getelementptr inbounds float* %tmp21613, i64 1
+  %tmp21615 = getelementptr inbounds float* %tmp21614, i64 1
+  %tmp21616 = getelementptr inbounds float* %tmp21615, i64 1
+  %tmp21617 = getelementptr inbounds float* %tmp21616, i64 1
+  %tmp21618 = getelementptr inbounds float* %tmp21617, i64 1
+  %tmp21619 = getelementptr inbounds float* %tmp21618, i64 1
+  %tmp21620 = getelementptr inbounds float* %tmp21619, i64 1
+  %tmp21621 = getelementptr inbounds float* %tmp21620, i64 1
+  %tmp21622 = getelementptr inbounds float* %tmp21621, i64 1
+  %tmp21623 = getelementptr inbounds float* %tmp21622, i64 1
+  %tmp21624 = getelementptr inbounds float* %tmp21623, i64 1
+  %tmp21625 = getelementptr inbounds float* %tmp21624, i64 1
+  %tmp21626 = getelementptr inbounds float* %tmp21625, i64 1
+  %tmp21627 = getelementptr inbounds float* %tmp21626, i64 1
+  %tmp21628 = getelementptr inbounds float* %tmp21627, i64 1
+  %tmp21629 = getelementptr inbounds float* %tmp21628, i64 1
+  %tmp21630 = getelementptr inbounds float* %tmp21629, i64 1
+  %tmp21631 = getelementptr inbounds float* %tmp21630, i64 1
+  %tmp21632 = getelementptr inbounds float* %tmp21631, i64 1
+  %tmp21633 = getelementptr inbounds float* %tmp21632, i64 1
+  %tmp21634 = getelementptr inbounds float* %tmp21633, i64 1
+  %tmp21635 = getelementptr inbounds float* %tmp21634, i64 1
+  %tmp21636 = getelementptr inbounds float* %tmp21635, i64 1
+  %tmp21637 = getelementptr inbounds float* %tmp21636, i64 1
+  %tmp21638 = getelementptr inbounds float* %tmp21637, i64 1
+  %tmp21639 = getelementptr inbounds float* %tmp21638, i64 1
+  %tmp21640 = getelementptr inbounds float* %tmp21639, i64 1
+  %tmp21641 = getelementptr inbounds float* %tmp21640, i64 1
+  %tmp21642 = getelementptr inbounds float* %tmp21641, i64 1
+  %tmp21643 = getelementptr inbounds float* %tmp21642, i64 1
+  %tmp21644 = getelementptr inbounds float* %tmp21643, i64 1
+  %tmp21645 = getelementptr inbounds float* %tmp21644, i64 1
+  %tmp21646 = getelementptr inbounds float* %tmp21645, i64 1
+  %tmp21647 = getelementptr inbounds float* %tmp21646, i64 1
+  %tmp21648 = getelementptr inbounds float* %tmp21647, i64 1
+  %tmp21649 = getelementptr inbounds float* %tmp21648, i64 1
+  %tmp21650 = getelementptr inbounds float* %tmp21649, i64 1
+  %tmp21651 = getelementptr inbounds float* %tmp21650, i64 1
+  %tmp21652 = getelementptr inbounds float* %tmp21651, i64 1
+  %tmp21653 = getelementptr inbounds float* %tmp21652, i64 1
+  %tmp21654 = getelementptr inbounds float* %tmp21653, i64 1
+  %tmp21655 = getelementptr inbounds float* %tmp21654, i64 1
+  %tmp21656 = getelementptr inbounds float* %tmp21655, i64 1
+  %tmp21657 = getelementptr inbounds float* %tmp21656, i64 1
+  %tmp21658 = getelementptr inbounds float* %tmp21657, i64 1
+  %tmp21659 = getelementptr inbounds float* %tmp21658, i64 1
+  %tmp21660 = getelementptr inbounds float* %tmp21659, i64 1
+  %tmp21661 = getelementptr inbounds float* %tmp21660, i64 1
+  %tmp21662 = getelementptr inbounds float* %tmp21661, i64 1
+  %tmp21663 = getelementptr inbounds float* %tmp21662, i64 1
+  %tmp21664 = getelementptr inbounds float* %tmp21663, i64 1
+  %tmp21665 = getelementptr inbounds float* %tmp21664, i64 1
+  %tmp21666 = getelementptr inbounds float* %tmp21665, i64 1
+  %tmp21667 = getelementptr inbounds float* %tmp21666, i64 1
+  %tmp21668 = getelementptr inbounds float* %tmp21667, i64 1
+  %tmp21669 = getelementptr inbounds float* %tmp21668, i64 1
+  %tmp21670 = getelementptr inbounds float* %tmp21669, i64 1
+  %tmp21671 = getelementptr inbounds float* %tmp21670, i64 1
+  %tmp21672 = getelementptr inbounds float* %tmp21671, i64 1
+  %tmp21673 = getelementptr inbounds float* %tmp21672, i64 1
+  %tmp21674 = getelementptr inbounds float* %tmp21673, i64 1
+  %tmp21675 = getelementptr inbounds float* %tmp21674, i64 1
+  %tmp21676 = getelementptr inbounds float* %tmp21675, i64 1
+  %tmp21677 = getelementptr inbounds float* %tmp21676, i64 1
+  %tmp21678 = getelementptr inbounds float* %tmp21677, i64 1
+  %tmp21679 = getelementptr inbounds float* %tmp21678, i64 1
+  %tmp21680 = getelementptr inbounds float* %tmp21679, i64 1
+  %tmp21681 = getelementptr inbounds float* %tmp21680, i64 1
+  %tmp21682 = getelementptr inbounds float* %tmp21681, i64 1
+  %tmp21683 = getelementptr inbounds float* %tmp21682, i64 1
+  %tmp21684 = getelementptr inbounds float* %tmp21683, i64 1
+  %tmp21685 = getelementptr inbounds float* %tmp21684, i64 1
+  %tmp21686 = getelementptr inbounds float* %tmp21685, i64 1
+  %tmp21687 = getelementptr inbounds float* %tmp21686, i64 1
+  %tmp21688 = getelementptr inbounds float* %tmp21687, i64 1
+  %tmp21689 = getelementptr inbounds float* %tmp21688, i64 1
+  %tmp21690 = getelementptr inbounds float* %tmp21689, i64 1
+  %tmp21691 = getelementptr inbounds float* %tmp21690, i64 1
+  %tmp21692 = getelementptr inbounds float* %tmp21691, i64 1
+  %tmp21693 = getelementptr inbounds float* %tmp21692, i64 1
+  %tmp21694 = getelementptr inbounds float* %tmp21693, i64 1
+  %tmp21695 = getelementptr inbounds float* %tmp21694, i64 1
+  %tmp21696 = getelementptr inbounds float* %tmp21695, i64 1
+  %tmp21697 = getelementptr inbounds float* %tmp21696, i64 1
+  %tmp21698 = getelementptr inbounds float* %tmp21697, i64 1
+  %tmp21699 = getelementptr inbounds float* %tmp21698, i64 1
+  %tmp21700 = getelementptr inbounds float* %tmp21699, i64 1
+  %tmp21701 = getelementptr inbounds float* %tmp21700, i64 1
+  %tmp21702 = getelementptr inbounds float* %tmp21701, i64 1
+  %tmp21703 = getelementptr inbounds float* %tmp21702, i64 1
+  %tmp21704 = getelementptr inbounds float* %tmp21703, i64 1
+  %tmp21705 = getelementptr inbounds float* %tmp21704, i64 1
+  %tmp21706 = getelementptr inbounds float* %tmp21705, i64 1
+  %tmp21707 = getelementptr inbounds float* %tmp21706, i64 1
+  %tmp21708 = getelementptr inbounds float* %tmp21707, i64 1
+  %tmp21709 = getelementptr inbounds float* %tmp21708, i64 1
+  %tmp21710 = getelementptr inbounds float* %tmp21709, i64 1
+  %tmp21711 = getelementptr inbounds float* %tmp21710, i64 1
+  %tmp21712 = getelementptr inbounds float* %tmp21711, i64 1
+  %tmp21713 = getelementptr inbounds float* %tmp21712, i64 1
+  %tmp21714 = getelementptr inbounds float* %tmp21713, i64 1
+  %tmp21715 = getelementptr inbounds float* %tmp21714, i64 1
+  %tmp21716 = getelementptr inbounds float* %tmp21715, i64 1
+  %tmp21717 = getelementptr inbounds float* %tmp21716, i64 1
+  %tmp21718 = getelementptr inbounds float* %tmp21717, i64 1
+  %tmp21719 = getelementptr inbounds float* %tmp21718, i64 1
+  %tmp21720 = getelementptr inbounds float* %tmp21719, i64 1
+  %tmp21721 = getelementptr inbounds float* %tmp21720, i64 1
+  %tmp21722 = getelementptr inbounds float* %tmp21721, i64 1
+  %tmp21723 = getelementptr inbounds float* %tmp21722, i64 1
+  %tmp21724 = getelementptr inbounds float* %tmp21723, i64 1
+  %tmp21725 = getelementptr inbounds float* %tmp21724, i64 1
+  %tmp21726 = getelementptr inbounds float* %tmp21725, i64 1
+  %tmp21727 = getelementptr inbounds float* %tmp21726, i64 1
+  %tmp21728 = getelementptr inbounds float* %tmp21727, i64 1
+  %tmp21729 = getelementptr inbounds float* %tmp21728, i64 1
+  %tmp21730 = getelementptr inbounds float* %tmp21729, i64 1
+  %tmp21731 = getelementptr inbounds float* %tmp21730, i64 1
+  %tmp21732 = getelementptr inbounds float* %tmp21731, i64 1
+  %tmp21733 = getelementptr inbounds float* %tmp21732, i64 1
+  %tmp21734 = getelementptr inbounds float* %tmp21733, i64 1
+  %tmp21735 = getelementptr inbounds float* %tmp21734, i64 1
+  %tmp21736 = getelementptr inbounds float* %tmp21735, i64 1
+  %tmp21737 = getelementptr inbounds float* %tmp21736, i64 1
+  %tmp21738 = getelementptr inbounds float* %tmp21737, i64 1
+  %tmp21739 = getelementptr inbounds float* %tmp21738, i64 1
+  %tmp21740 = getelementptr inbounds float* %tmp21739, i64 1
+  %tmp21741 = getelementptr inbounds float* %tmp21740, i64 1
+  %tmp21742 = getelementptr inbounds float* %tmp21741, i64 1
+  %tmp21743 = getelementptr inbounds float* %tmp21742, i64 1
+  %tmp21744 = getelementptr inbounds float* %tmp21743, i64 1
+  %tmp21745 = getelementptr inbounds float* %tmp21744, i64 1
+  %tmp21746 = getelementptr inbounds float* %tmp21745, i64 1
+  %tmp21747 = getelementptr inbounds float* %tmp21746, i64 1
+  %tmp21748 = getelementptr inbounds float* %tmp21747, i64 1
+  %tmp21749 = getelementptr inbounds float* %tmp21748, i64 1
+  %tmp21750 = getelementptr inbounds float* %tmp21749, i64 1
+  %tmp21751 = getelementptr inbounds float* %tmp21750, i64 1
+  %tmp21752 = getelementptr inbounds float* %tmp21751, i64 1
+  %tmp21753 = getelementptr inbounds float* %tmp21752, i64 1
+  %tmp21754 = getelementptr inbounds float* %tmp21753, i64 1
+  %tmp21755 = getelementptr inbounds float* %tmp21754, i64 1
+  %tmp21756 = getelementptr inbounds float* %tmp21755, i64 1
+  %tmp21757 = getelementptr inbounds float* %tmp21756, i64 1
+  %tmp21758 = getelementptr inbounds float* %tmp21757, i64 1
+  %tmp21759 = getelementptr inbounds float* %tmp21758, i64 1
+  %tmp21760 = getelementptr inbounds float* %tmp21759, i64 1
+  %tmp21761 = getelementptr inbounds float* %tmp21760, i64 1
+  %tmp21762 = getelementptr inbounds float* %tmp21761, i64 1
+  %tmp21763 = getelementptr inbounds float* %tmp21762, i64 1
+  %tmp21764 = getelementptr inbounds float* %tmp21763, i64 1
+  %tmp21765 = getelementptr inbounds float* %tmp21764, i64 1
+  %tmp21766 = getelementptr inbounds float* %tmp21765, i64 1
+  %tmp21767 = getelementptr inbounds float* %tmp21766, i64 1
+  %tmp21768 = getelementptr inbounds float* %tmp21767, i64 1
+  %tmp21769 = getelementptr inbounds float* %tmp21768, i64 1
+  %tmp21770 = getelementptr inbounds float* %tmp21769, i64 1
+  %tmp21771 = getelementptr inbounds float* %tmp21770, i64 1
+  %tmp21772 = getelementptr inbounds float* %tmp21771, i64 1
+  %tmp21773 = getelementptr inbounds float* %tmp21772, i64 1
+  %tmp21774 = getelementptr inbounds float* %tmp21773, i64 1
+  %tmp21775 = getelementptr inbounds float* %tmp21774, i64 1
+  %tmp21776 = getelementptr inbounds float* %tmp21775, i64 1
+  %tmp21777 = getelementptr inbounds float* %tmp21776, i64 1
+  %tmp21778 = getelementptr inbounds float* %tmp21777, i64 1
+  %tmp21779 = getelementptr inbounds float* %tmp21778, i64 1
+  %tmp21780 = getelementptr inbounds float* %tmp21779, i64 1
+  %tmp21781 = getelementptr inbounds float* %tmp21780, i64 1
+  %tmp21782 = getelementptr inbounds float* %tmp21781, i64 1
+  %tmp21783 = getelementptr inbounds float* %tmp21782, i64 1
+  %tmp21784 = getelementptr inbounds float* %tmp21783, i64 1
+  %tmp21785 = getelementptr inbounds float* %tmp21784, i64 1
+  %tmp21786 = getelementptr inbounds float* %tmp21785, i64 1
+  %tmp21787 = getelementptr inbounds float* %tmp21786, i64 1
+  %tmp21788 = getelementptr inbounds float* %tmp21787, i64 1
+  %tmp21789 = getelementptr inbounds float* %tmp21788, i64 1
+  %tmp21790 = getelementptr inbounds float* %tmp21789, i64 1
+  %tmp21791 = getelementptr inbounds float* %tmp21790, i64 1
+  %tmp21792 = getelementptr inbounds float* %tmp21791, i64 1
+  %tmp21793 = getelementptr inbounds float* %tmp21792, i64 1
+  %tmp21794 = getelementptr inbounds float* %tmp21793, i64 1
+  %tmp21795 = getelementptr inbounds float* %tmp21794, i64 1
+  %tmp21796 = getelementptr inbounds float* %tmp21795, i64 1
+  %tmp21797 = getelementptr inbounds float* %tmp21796, i64 1
+  %tmp21798 = getelementptr inbounds float* %tmp21797, i64 1
+  %tmp21799 = getelementptr inbounds float* %tmp21798, i64 1
+  %tmp21800 = getelementptr inbounds float* %tmp21799, i64 1
+  %tmp21801 = getelementptr inbounds float* %tmp21800, i64 1
+  %tmp21802 = getelementptr inbounds float* %tmp21801, i64 1
+  %tmp21803 = getelementptr inbounds float* %tmp21802, i64 1
+  %tmp21804 = getelementptr inbounds float* %tmp21803, i64 1
+  %tmp21805 = getelementptr inbounds float* %tmp21804, i64 1
+  %tmp21806 = getelementptr inbounds float* %tmp21805, i64 1
+  %tmp21807 = getelementptr inbounds float* %tmp21806, i64 1
+  %tmp21808 = getelementptr inbounds float* %tmp21807, i64 1
+  %tmp21809 = getelementptr inbounds float* %tmp21808, i64 1
+  %tmp21810 = getelementptr inbounds float* %tmp21809, i64 1
+  %tmp21811 = getelementptr inbounds float* %tmp21810, i64 1
+  %tmp21812 = getelementptr inbounds float* %tmp21811, i64 1
+  %tmp21813 = getelementptr inbounds float* %tmp21812, i64 1
+  %tmp21814 = getelementptr inbounds float* %tmp21813, i64 1
+  %tmp21815 = getelementptr inbounds float* %tmp21814, i64 1
+  %tmp21816 = getelementptr inbounds float* %tmp21815, i64 1
+  %tmp21817 = getelementptr inbounds float* %tmp21816, i64 1
+  %tmp21818 = getelementptr inbounds float* %tmp21817, i64 1
+  %tmp21819 = getelementptr inbounds float* %tmp21818, i64 1
+  %tmp21820 = getelementptr inbounds float* %tmp21819, i64 1
+  %tmp21821 = getelementptr inbounds float* %tmp21820, i64 1
+  %tmp21822 = getelementptr inbounds float* %tmp21821, i64 1
+  %tmp21823 = getelementptr inbounds float* %tmp21822, i64 1
+  %tmp21824 = getelementptr inbounds float* %tmp21823, i64 1
+  %tmp21825 = getelementptr inbounds float* %tmp21824, i64 1
+  %tmp21826 = getelementptr inbounds float* %tmp21825, i64 1
+  %tmp21827 = getelementptr inbounds float* %tmp21826, i64 1
+  %tmp21828 = getelementptr inbounds float* %tmp21827, i64 1
+  %tmp21829 = getelementptr inbounds float* %tmp21828, i64 1
+  %tmp21830 = getelementptr inbounds float* %tmp21829, i64 1
+  %tmp21831 = getelementptr inbounds float* %tmp21830, i64 1
+  %tmp21832 = getelementptr inbounds float* %tmp21831, i64 1
+  %tmp21833 = getelementptr inbounds float* %tmp21832, i64 1
+  %tmp21834 = getelementptr inbounds float* %tmp21833, i64 1
+  %tmp21835 = getelementptr inbounds float* %tmp21834, i64 1
+  %tmp21836 = getelementptr inbounds float* %tmp21835, i64 1
+  %tmp21837 = getelementptr inbounds float* %tmp21836, i64 1
+  %tmp21838 = getelementptr inbounds float* %tmp21837, i64 1
+  %tmp21839 = getelementptr inbounds float* %tmp21838, i64 1
+  %tmp21840 = getelementptr inbounds float* %tmp21839, i64 1
+  %tmp21841 = getelementptr inbounds float* %tmp21840, i64 1
+  %tmp21842 = getelementptr inbounds float* %tmp21841, i64 1
+  %tmp21843 = getelementptr inbounds float* %tmp21842, i64 1
+  %tmp21844 = getelementptr inbounds float* %tmp21843, i64 1
+  %tmp21845 = getelementptr inbounds float* %tmp21844, i64 1
+  %tmp21846 = getelementptr inbounds float* %tmp21845, i64 1
+  %tmp21847 = getelementptr inbounds float* %tmp21846, i64 1
+  %tmp21848 = getelementptr inbounds float* %tmp21847, i64 1
+  %tmp21849 = getelementptr inbounds float* %tmp21848, i64 1
+  %tmp21850 = getelementptr inbounds float* %tmp21849, i64 1
+  %tmp21851 = getelementptr inbounds float* %tmp21850, i64 1
+  %tmp21852 = getelementptr inbounds float* %tmp21851, i64 1
+  %tmp21853 = getelementptr inbounds float* %tmp21852, i64 1
+  %tmp21854 = getelementptr inbounds float* %tmp21853, i64 1
+  %tmp21855 = getelementptr inbounds float* %tmp21854, i64 1
+  %tmp21856 = getelementptr inbounds float* %tmp21855, i64 1
+  %tmp21857 = getelementptr inbounds float* %tmp21856, i64 1
+  %tmp21858 = getelementptr inbounds float* %tmp21857, i64 1
+  %tmp21859 = getelementptr inbounds float* %tmp21858, i64 1
+  %tmp21860 = getelementptr inbounds float* %tmp21859, i64 1
+  %tmp21861 = getelementptr inbounds float* %tmp21860, i64 1
+  %tmp21862 = getelementptr inbounds float* %tmp21861, i64 1
+  %tmp21863 = getelementptr inbounds float* %tmp21862, i64 1
+  %tmp21864 = getelementptr inbounds float* %tmp21863, i64 1
+  %tmp21865 = getelementptr inbounds float* %tmp21864, i64 1
+  %tmp21866 = getelementptr inbounds float* %tmp21865, i64 1
+  %tmp21867 = getelementptr inbounds float* %tmp21866, i64 1
+  %tmp21868 = getelementptr inbounds float* %tmp21867, i64 1
+  %tmp21869 = getelementptr inbounds float* %tmp21868, i64 1
+  %tmp21870 = getelementptr inbounds float* %tmp21869, i64 1
+  %tmp21871 = getelementptr inbounds float* %tmp21870, i64 1
+  %tmp21872 = getelementptr inbounds float* %tmp21871, i64 1
+  %tmp21873 = getelementptr inbounds float* %tmp21872, i64 1
+  %tmp21874 = getelementptr inbounds float* %tmp21873, i64 1
+  %tmp21875 = getelementptr inbounds float* %tmp21874, i64 1
+  %tmp21876 = getelementptr inbounds float* %tmp21875, i64 1
+  %tmp21877 = getelementptr inbounds float* %tmp21876, i64 1
+  %tmp21878 = getelementptr inbounds float* %tmp21877, i64 1
+  %tmp21879 = getelementptr inbounds float* %tmp21878, i64 1
+  %tmp21880 = getelementptr inbounds float* %tmp21879, i64 1
+  %tmp21881 = getelementptr inbounds float* %tmp21880, i64 1
+  %tmp21882 = getelementptr inbounds float* %tmp21881, i64 1
+  %tmp21883 = getelementptr inbounds float* %tmp21882, i64 1
+  %tmp21884 = getelementptr inbounds float* %tmp21883, i64 1
+  %tmp21885 = getelementptr inbounds float* %tmp21884, i64 1
+  %tmp21886 = getelementptr inbounds float* %tmp21885, i64 1
+  %tmp21887 = getelementptr inbounds float* %tmp21886, i64 1
+  %tmp21888 = getelementptr inbounds float* %tmp21887, i64 1
+  %tmp21889 = getelementptr inbounds float* %tmp21888, i64 1
+  %tmp21890 = getelementptr inbounds float* %tmp21889, i64 1
+  %tmp21891 = getelementptr inbounds float* %tmp21890, i64 1
+  %tmp21892 = getelementptr inbounds float* %tmp21891, i64 1
+  %tmp21893 = getelementptr inbounds float* %tmp21892, i64 1
+  %tmp21894 = getelementptr inbounds float* %tmp21893, i64 1
+  %tmp21895 = getelementptr inbounds float* %tmp21894, i64 1
+  %tmp21896 = getelementptr inbounds float* %tmp21895, i64 1
+  %tmp21897 = getelementptr inbounds float* %tmp21896, i64 1
+  %tmp21898 = getelementptr inbounds float* %tmp21897, i64 1
+  %tmp21899 = getelementptr inbounds float* %tmp21898, i64 1
+  %tmp21900 = getelementptr inbounds float* %tmp21899, i64 1
+  %tmp21901 = getelementptr inbounds float* %tmp21900, i64 1
+  %tmp21902 = getelementptr inbounds float* %tmp21901, i64 1
+  %tmp21903 = getelementptr inbounds float* %tmp21902, i64 1
+  %tmp21904 = getelementptr inbounds float* %tmp21903, i64 1
+  %tmp21905 = getelementptr inbounds float* %tmp21904, i64 1
+  %tmp21906 = getelementptr inbounds float* %tmp21905, i64 1
+  %tmp21907 = getelementptr inbounds float* %tmp21906, i64 1
+  %tmp21908 = getelementptr inbounds float* %tmp21907, i64 1
+  %tmp21909 = getelementptr inbounds float* %tmp21908, i64 1
+  %tmp21910 = getelementptr inbounds float* %tmp21909, i64 1
+  %tmp21911 = getelementptr inbounds float* %tmp21910, i64 1
+  %tmp21912 = getelementptr inbounds float* %tmp21911, i64 1
+  %tmp21913 = getelementptr inbounds float* %tmp21912, i64 1
+  %tmp21914 = getelementptr inbounds float* %tmp21913, i64 1
+  %tmp21915 = getelementptr inbounds float* %tmp21914, i64 1
+  %tmp21916 = getelementptr inbounds float* %tmp21915, i64 1
+  %tmp21917 = getelementptr inbounds float* %tmp21916, i64 1
+  %tmp21918 = getelementptr inbounds float* %tmp21917, i64 1
+  %tmp21919 = getelementptr inbounds float* %tmp21918, i64 1
+  %tmp21920 = getelementptr inbounds float* %tmp21919, i64 1
+  %tmp21921 = getelementptr inbounds float* %tmp21920, i64 1
+  %tmp21922 = getelementptr inbounds float* %tmp21921, i64 1
+  %tmp21923 = getelementptr inbounds float* %tmp21922, i64 1
+  %tmp21924 = getelementptr inbounds float* %tmp21923, i64 1
+  %tmp21925 = getelementptr inbounds float* %tmp21924, i64 1
+  %tmp21926 = getelementptr inbounds float* %tmp21925, i64 1
+  %tmp21927 = getelementptr inbounds float* %tmp21926, i64 1
+  %tmp21928 = getelementptr inbounds float* %tmp21927, i64 1
+  %tmp21929 = getelementptr inbounds float* %tmp21928, i64 1
+  %tmp21930 = getelementptr inbounds float* %tmp21929, i64 1
+  %tmp21931 = getelementptr inbounds float* %tmp21930, i64 1
+  %tmp21932 = getelementptr inbounds float* %tmp21931, i64 1
+  %tmp21933 = getelementptr inbounds float* %tmp21932, i64 1
+  %tmp21934 = getelementptr inbounds float* %tmp21933, i64 1
+  %tmp21935 = getelementptr inbounds float* %tmp21934, i64 1
+  %tmp21936 = getelementptr inbounds float* %tmp21935, i64 1
+  %tmp21937 = getelementptr inbounds float* %tmp21936, i64 1
+  %tmp21938 = getelementptr inbounds float* %tmp21937, i64 1
+  %tmp21939 = getelementptr inbounds float* %tmp21938, i64 1
+  %tmp21940 = getelementptr inbounds float* %tmp21939, i64 1
+  %tmp21941 = getelementptr inbounds float* %tmp21940, i64 1
+  %tmp21942 = getelementptr inbounds float* %tmp21941, i64 1
+  %tmp21943 = getelementptr inbounds float* %tmp21942, i64 1
+  %tmp21944 = getelementptr inbounds float* %tmp21943, i64 1
+  %tmp21945 = getelementptr inbounds float* %tmp21944, i64 1
+  %tmp21946 = getelementptr inbounds float* %tmp21945, i64 1
+  %tmp21947 = getelementptr inbounds float* %tmp21946, i64 1
+  %tmp21948 = getelementptr inbounds float* %tmp21947, i64 1
+  %tmp21949 = getelementptr inbounds float* %tmp21948, i64 1
+  %tmp21950 = getelementptr inbounds float* %tmp21949, i64 1
+  %tmp21951 = getelementptr inbounds float* %tmp21950, i64 1
+  %tmp21952 = getelementptr inbounds float* %tmp21951, i64 1
+  %tmp21953 = getelementptr inbounds float* %tmp21952, i64 1
+  %tmp21954 = getelementptr inbounds float* %tmp21953, i64 1
+  %tmp21955 = getelementptr inbounds float* %tmp21954, i64 1
+  %tmp21956 = getelementptr inbounds float* %tmp21955, i64 1
+  %tmp21957 = getelementptr inbounds float* %tmp21956, i64 1
+  %tmp21958 = getelementptr inbounds float* %tmp21957, i64 1
+  %tmp21959 = getelementptr inbounds float* %tmp21958, i64 1
+  %tmp21960 = getelementptr inbounds float* %tmp21959, i64 1
+  %tmp21961 = getelementptr inbounds float* %tmp21960, i64 1
+  %tmp21962 = getelementptr inbounds float* %tmp21961, i64 1
+  %tmp21963 = getelementptr inbounds float* %tmp21962, i64 1
+  %tmp21964 = getelementptr inbounds float* %tmp21963, i64 1
+  %tmp21965 = getelementptr inbounds float* %tmp21964, i64 1
+  %tmp21966 = getelementptr inbounds float* %tmp21965, i64 1
+  %tmp21967 = getelementptr inbounds float* %tmp21966, i64 1
+  %tmp21968 = getelementptr inbounds float* %tmp21967, i64 1
+  %tmp21969 = getelementptr inbounds float* %tmp21968, i64 1
+  %tmp21970 = getelementptr inbounds float* %tmp21969, i64 1
+  %tmp21971 = getelementptr inbounds float* %tmp21970, i64 1
+  %tmp21972 = getelementptr inbounds float* %tmp21971, i64 1
+  %tmp21973 = getelementptr inbounds float* %tmp21972, i64 1
+  %tmp21974 = getelementptr inbounds float* %tmp21973, i64 1
+  %tmp21975 = getelementptr inbounds float* %tmp21974, i64 1
+  %tmp21976 = getelementptr inbounds float* %tmp21975, i64 1
+  %tmp21977 = getelementptr inbounds float* %tmp21976, i64 1
+  %tmp21978 = getelementptr inbounds float* %tmp21977, i64 1
+  %tmp21979 = getelementptr inbounds float* %tmp21978, i64 1
+  %tmp21980 = getelementptr inbounds float* %tmp21979, i64 1
+  %tmp21981 = getelementptr inbounds float* %tmp21980, i64 1
+  %tmp21982 = getelementptr inbounds float* %tmp21981, i64 1
+  %tmp21983 = getelementptr inbounds float* %tmp21982, i64 1
+  %tmp21984 = getelementptr inbounds float* %tmp21983, i64 1
+  %tmp21985 = getelementptr inbounds float* %tmp21984, i64 1
+  %tmp21986 = getelementptr inbounds float* %tmp21985, i64 1
+  %tmp21987 = getelementptr inbounds float* %tmp21986, i64 1
+  %tmp21988 = getelementptr inbounds float* %tmp21987, i64 1
+  %tmp21989 = getelementptr inbounds float* %tmp21988, i64 1
+  %tmp21990 = getelementptr inbounds float* %tmp21989, i64 1
+  %tmp21991 = getelementptr inbounds float* %tmp21990, i64 1
+  %tmp21992 = getelementptr inbounds float* %tmp21991, i64 1
+  %tmp21993 = getelementptr inbounds float* %tmp21992, i64 1
+  %tmp21994 = getelementptr inbounds float* %tmp21993, i64 1
+  %tmp21995 = getelementptr inbounds float* %tmp21994, i64 1
+  %tmp21996 = getelementptr inbounds float* %tmp21995, i64 1
+  %tmp21997 = getelementptr inbounds float* %tmp21996, i64 1
+  %tmp21998 = getelementptr inbounds float* %tmp21997, i64 1
+  %tmp21999 = getelementptr inbounds float* %tmp21998, i64 1
+  %tmp22000 = getelementptr inbounds float* %tmp21999, i64 1
+  %tmp22001 = getelementptr inbounds float* %tmp22000, i64 1
+  %tmp22002 = getelementptr inbounds float* %tmp22001, i64 1
+  %tmp22003 = getelementptr inbounds float* %tmp22002, i64 1
+  %tmp22004 = getelementptr inbounds float* %tmp22003, i64 1
+  %tmp22005 = getelementptr inbounds float* %tmp22004, i64 1
+  %tmp22006 = getelementptr inbounds float* %tmp22005, i64 1
+  %tmp22007 = getelementptr inbounds float* %tmp22006, i64 1
+  %tmp22008 = getelementptr inbounds float* %tmp22007, i64 1
+  %tmp22009 = getelementptr inbounds float* %tmp22008, i64 1
+  %tmp22010 = getelementptr inbounds float* %tmp22009, i64 1
+  %tmp22011 = getelementptr inbounds float* %tmp22010, i64 1
+  %tmp22012 = getelementptr inbounds float* %tmp22011, i64 1
+  %tmp22013 = getelementptr inbounds float* %tmp22012, i64 1
+  %tmp22014 = getelementptr inbounds float* %tmp22013, i64 1
+  %tmp22015 = getelementptr inbounds float* %tmp22014, i64 1
+  %tmp22016 = getelementptr inbounds float* %tmp22015, i64 1
+  %tmp22017 = getelementptr inbounds float* %tmp22016, i64 1
+  %tmp22018 = getelementptr inbounds float* %tmp22017, i64 1
+  %tmp22019 = getelementptr inbounds float* %tmp22018, i64 1
+  %tmp22020 = getelementptr inbounds float* %tmp22019, i64 1
+  %tmp22021 = getelementptr inbounds float* %tmp22020, i64 1
+  %tmp22022 = getelementptr inbounds float* %tmp22021, i64 1
+  %tmp22023 = getelementptr inbounds float* %tmp22022, i64 1
+  %tmp22024 = getelementptr inbounds float* %tmp22023, i64 1
+  %tmp22025 = getelementptr inbounds float* %tmp22024, i64 1
+  %tmp22026 = getelementptr inbounds float* %tmp22025, i64 1
+  %tmp22027 = getelementptr inbounds float* %tmp22026, i64 1
+  %tmp22028 = getelementptr inbounds float* %tmp22027, i64 1
+  %tmp22029 = getelementptr inbounds float* %tmp22028, i64 1
+  %tmp22030 = getelementptr inbounds float* %tmp22029, i64 1
+  %tmp22031 = getelementptr inbounds float* %tmp22030, i64 1
+  %tmp22032 = getelementptr inbounds float* %tmp22031, i64 1
+  %tmp22033 = getelementptr inbounds float* %tmp22032, i64 1
+  %tmp22034 = getelementptr inbounds float* %tmp22033, i64 1
+  %tmp22035 = getelementptr inbounds float* %tmp22034, i64 1
+  %tmp22036 = getelementptr inbounds float* %tmp22035, i64 1
+  %tmp22037 = getelementptr inbounds float* %tmp22036, i64 1
+  %tmp22038 = getelementptr inbounds float* %tmp22037, i64 1
+  %tmp22039 = getelementptr inbounds float* %tmp22038, i64 1
+  %tmp22040 = getelementptr inbounds float* %tmp22039, i64 1
+  %tmp22041 = getelementptr inbounds float* %tmp22040, i64 1
+  %tmp22042 = getelementptr inbounds float* %tmp22041, i64 1
+  %tmp22043 = getelementptr inbounds float* %tmp22042, i64 1
+  %tmp22044 = getelementptr inbounds float* %tmp22043, i64 1
+  %tmp22045 = getelementptr inbounds float* %tmp22044, i64 1
+  %tmp22046 = getelementptr inbounds float* %tmp22045, i64 1
+  %tmp22047 = getelementptr inbounds float* %tmp22046, i64 1
+  %tmp22048 = getelementptr inbounds float* %tmp22047, i64 1
+  %tmp22049 = getelementptr inbounds float* %tmp22048, i64 1
+  %tmp22050 = getelementptr inbounds float* %tmp22049, i64 1
+  %tmp22051 = getelementptr inbounds float* %tmp22050, i64 1
+  %tmp22052 = getelementptr inbounds float* %tmp22051, i64 1
+  %tmp22053 = getelementptr inbounds float* %tmp22052, i64 1
+  %tmp22054 = getelementptr inbounds float* %tmp22053, i64 1
+  %tmp22055 = getelementptr inbounds float* %tmp22054, i64 1
+  %tmp22056 = getelementptr inbounds float* %tmp22055, i64 1
+  %tmp22057 = getelementptr inbounds float* %tmp22056, i64 1
+  %tmp22058 = getelementptr inbounds float* %tmp22057, i64 1
+  %tmp22059 = getelementptr inbounds float* %tmp22058, i64 1
+  %tmp22060 = getelementptr inbounds float* %tmp22059, i64 1
+  %tmp22061 = getelementptr inbounds float* %tmp22060, i64 1
+  %tmp22062 = getelementptr inbounds float* %tmp22061, i64 1
+  %tmp22063 = getelementptr inbounds float* %tmp22062, i64 1
+  %tmp22064 = getelementptr inbounds float* %tmp22063, i64 1
+  %tmp22065 = getelementptr inbounds float* %tmp22064, i64 1
+  %tmp22066 = getelementptr inbounds float* %tmp22065, i64 1
+  %tmp22067 = getelementptr inbounds float* %tmp22066, i64 1
+  %tmp22068 = getelementptr inbounds float* %tmp22067, i64 1
+  %tmp22069 = getelementptr inbounds float* %tmp22068, i64 1
+  %tmp22070 = getelementptr inbounds float* %tmp22069, i64 1
+  %tmp22071 = getelementptr inbounds float* %tmp22070, i64 1
+  %tmp22072 = getelementptr inbounds float* %tmp22071, i64 1
+  %tmp22073 = getelementptr inbounds float* %tmp22072, i64 1
+  %tmp22074 = getelementptr inbounds float* %tmp22073, i64 1
+  %tmp22075 = getelementptr inbounds float* %tmp22074, i64 1
+  %tmp22076 = getelementptr inbounds float* %tmp22075, i64 1
+  %tmp22077 = getelementptr inbounds float* %tmp22076, i64 1
+  %tmp22078 = getelementptr inbounds float* %tmp22077, i64 1
+  %tmp22079 = getelementptr inbounds float* %tmp22078, i64 1
+  %tmp22080 = getelementptr inbounds float* %tmp22079, i64 1
+  %tmp22081 = getelementptr inbounds float* %tmp22080, i64 1
+  %tmp22082 = getelementptr inbounds float* %tmp22081, i64 1
+  %tmp22083 = getelementptr inbounds float* %tmp22082, i64 1
+  %tmp22084 = getelementptr inbounds float* %tmp22083, i64 1
+  %tmp22085 = getelementptr inbounds float* %tmp22084, i64 1
+  %tmp22086 = getelementptr inbounds float* %tmp22085, i64 1
+  %tmp22087 = getelementptr inbounds float* %tmp22086, i64 1
+  %tmp22088 = getelementptr inbounds float* %tmp22087, i64 1
+  %tmp22089 = getelementptr inbounds float* %tmp22088, i64 1
+  %tmp22090 = getelementptr inbounds float* %tmp22089, i64 1
+  %tmp22091 = getelementptr inbounds float* %tmp22090, i64 1
+  %tmp22092 = getelementptr inbounds float* %tmp22091, i64 1
+  %tmp22093 = getelementptr inbounds float* %tmp22092, i64 1
+  %tmp22094 = getelementptr inbounds float* %tmp22093, i64 1
+  %tmp22095 = getelementptr inbounds float* %tmp22094, i64 1
+  %tmp22096 = getelementptr inbounds float* %tmp22095, i64 1
+  %tmp22097 = getelementptr inbounds float* %tmp22096, i64 1
+  %tmp22098 = getelementptr inbounds float* %tmp22097, i64 1
+  %tmp22099 = getelementptr inbounds float* %tmp22098, i64 1
+  %tmp22100 = getelementptr inbounds float* %tmp22099, i64 1
+  %tmp22101 = getelementptr inbounds float* %tmp22100, i64 1
+  %tmp22102 = getelementptr inbounds float* %tmp22101, i64 1
+  %tmp22103 = getelementptr inbounds float* %tmp22102, i64 1
+  %tmp22104 = getelementptr inbounds float* %tmp22103, i64 1
+  %tmp22105 = getelementptr inbounds float* %tmp22104, i64 1
+  %tmp22106 = getelementptr inbounds float* %tmp22105, i64 1
+  %tmp22107 = getelementptr inbounds float* %tmp22106, i64 1
+  %tmp22108 = getelementptr inbounds float* %tmp22107, i64 1
+  %tmp22109 = getelementptr inbounds float* %tmp22108, i64 1
+  %tmp22110 = getelementptr inbounds float* %tmp22109, i64 1
+  %tmp22111 = getelementptr inbounds float* %tmp22110, i64 1
+  %tmp22112 = getelementptr inbounds float* %tmp22111, i64 1
+  %tmp22113 = getelementptr inbounds float* %tmp22112, i64 1
+  %tmp22114 = getelementptr inbounds float* %tmp22113, i64 1
+  %tmp22115 = getelementptr inbounds float* %tmp22114, i64 1
+  %tmp22116 = getelementptr inbounds float* %tmp22115, i64 1
+  %tmp22117 = getelementptr inbounds float* %tmp22116, i64 1
+  %tmp22118 = getelementptr inbounds float* %tmp22117, i64 1
+  %tmp22119 = getelementptr inbounds float* %tmp22118, i64 1
+  %tmp22120 = getelementptr inbounds float* %tmp22119, i64 1
+  %tmp22121 = getelementptr inbounds float* %tmp22120, i64 1
+  %tmp22122 = getelementptr inbounds float* %tmp22121, i64 1
+  %tmp22123 = getelementptr inbounds float* %tmp22122, i64 1
+  %tmp22124 = getelementptr inbounds float* %tmp22123, i64 1
+  %tmp22125 = getelementptr inbounds float* %tmp22124, i64 1
+  %tmp22126 = getelementptr inbounds float* %tmp22125, i64 1
+  %tmp22127 = getelementptr inbounds float* %tmp22126, i64 1
+  %tmp22128 = getelementptr inbounds float* %tmp22127, i64 1
+  %tmp22129 = getelementptr inbounds float* %tmp22128, i64 1
+  %tmp22130 = getelementptr inbounds float* %tmp22129, i64 1
+  %tmp22131 = getelementptr inbounds float* %tmp22130, i64 1
+  %tmp22132 = getelementptr inbounds float* %tmp22131, i64 1
+  %tmp22133 = getelementptr inbounds float* %tmp22132, i64 1
+  %tmp22134 = getelementptr inbounds float* %tmp22133, i64 1
+  %tmp22135 = getelementptr inbounds float* %tmp22134, i64 1
+  %tmp22136 = getelementptr inbounds float* %tmp22135, i64 1
+  %tmp22137 = getelementptr inbounds float* %tmp22136, i64 1
+  %tmp22138 = getelementptr inbounds float* %tmp22137, i64 1
+  %tmp22139 = getelementptr inbounds float* %tmp22138, i64 1
+  %tmp22140 = getelementptr inbounds float* %tmp22139, i64 1
+  %tmp22141 = getelementptr inbounds float* %tmp22140, i64 1
+  %tmp22142 = getelementptr inbounds float* %tmp22141, i64 1
+  %tmp22143 = getelementptr inbounds float* %tmp22142, i64 1
+  %tmp22144 = getelementptr inbounds float* %tmp22143, i64 1
+  %tmp22145 = getelementptr inbounds float* %tmp22144, i64 1
+  %tmp22146 = getelementptr inbounds float* %tmp22145, i64 1
+  %tmp22147 = getelementptr inbounds float* %tmp22146, i64 1
+  %tmp22148 = getelementptr inbounds float* %tmp22147, i64 1
+  %tmp22149 = getelementptr inbounds float* %tmp22148, i64 1
+  %tmp22150 = getelementptr inbounds float* %tmp22149, i64 1
+  %tmp22151 = getelementptr inbounds float* %tmp22150, i64 1
+  %tmp22152 = getelementptr inbounds float* %tmp22151, i64 1
+  %tmp22153 = getelementptr inbounds float* %tmp22152, i64 1
+  %tmp22154 = getelementptr inbounds float* %tmp22153, i64 1
+  %tmp22155 = getelementptr inbounds float* %tmp22154, i64 1
+  %tmp22156 = getelementptr inbounds float* %tmp22155, i64 1
+  %tmp22157 = getelementptr inbounds float* %tmp22156, i64 1
+  %tmp22158 = getelementptr inbounds float* %tmp22157, i64 1
+  %tmp22159 = getelementptr inbounds float* %tmp22158, i64 1
+  %tmp22160 = getelementptr inbounds float* %tmp22159, i64 1
+  %tmp22161 = getelementptr inbounds float* %tmp22160, i64 1
+  %tmp22162 = getelementptr inbounds float* %tmp22161, i64 1
+  %tmp22163 = getelementptr inbounds float* %tmp22162, i64 1
+  %tmp22164 = getelementptr inbounds float* %tmp22163, i64 1
+  %tmp22165 = getelementptr inbounds float* %tmp22164, i64 1
+  %tmp22166 = getelementptr inbounds float* %tmp22165, i64 1
+  %tmp22167 = getelementptr inbounds float* %tmp22166, i64 1
+  %tmp22168 = getelementptr inbounds float* %tmp22167, i64 1
+  %tmp22169 = getelementptr inbounds float* %tmp22168, i64 1
+  %tmp22170 = getelementptr inbounds float* %tmp22169, i64 1
+  %tmp22171 = getelementptr inbounds float* %tmp22170, i64 1
+  %tmp22172 = getelementptr inbounds float* %tmp22171, i64 1
+  %tmp22173 = getelementptr inbounds float* %tmp22172, i64 1
+  %tmp22174 = getelementptr inbounds float* %tmp22173, i64 1
+  %tmp22175 = getelementptr inbounds float* %tmp22174, i64 1
+  %tmp22176 = getelementptr inbounds float* %tmp22175, i64 1
+  %tmp22177 = getelementptr inbounds float* %tmp22176, i64 1
+  %tmp22178 = getelementptr inbounds float* %tmp22177, i64 1
+  %tmp22179 = getelementptr inbounds float* %tmp22178, i64 1
+  %tmp22180 = getelementptr inbounds float* %tmp22179, i64 1
+  %tmp22181 = getelementptr inbounds float* %tmp22180, i64 1
+  %tmp22182 = getelementptr inbounds float* %tmp22181, i64 1
+  %tmp22183 = getelementptr inbounds float* %tmp22182, i64 1
+  %tmp22184 = getelementptr inbounds float* %tmp22183, i64 1
+  %tmp22185 = getelementptr inbounds float* %tmp22184, i64 1
+  %tmp22186 = getelementptr inbounds float* %tmp22185, i64 1
+  %tmp22187 = getelementptr inbounds float* %tmp22186, i64 1
+  %tmp22188 = getelementptr inbounds float* %tmp22187, i64 1
+  %tmp22189 = getelementptr inbounds float* %tmp22188, i64 1
+  %tmp22190 = getelementptr inbounds float* %tmp22189, i64 1
+  %tmp22191 = getelementptr inbounds float* %tmp22190, i64 1
+  %tmp22192 = getelementptr inbounds float* %tmp22191, i64 1
+  %tmp22193 = getelementptr inbounds float* %tmp22192, i64 1
+  %tmp22194 = getelementptr inbounds float* %tmp22193, i64 1
+  %tmp22195 = getelementptr inbounds float* %tmp22194, i64 1
+  %tmp22196 = getelementptr inbounds float* %tmp22195, i64 1
+  %tmp22197 = getelementptr inbounds float* %tmp22196, i64 1
+  %tmp22198 = getelementptr inbounds float* %tmp22197, i64 1
+  %tmp22199 = getelementptr inbounds float* %tmp22198, i64 1
+  %tmp22200 = getelementptr inbounds float* %tmp22199, i64 1
+  %tmp22201 = getelementptr inbounds float* %tmp22200, i64 1
+  %tmp22202 = getelementptr inbounds float* %tmp22201, i64 1
+  %tmp22203 = getelementptr inbounds float* %tmp22202, i64 1
+  %tmp22204 = getelementptr inbounds float* %tmp22203, i64 1
+  %tmp22205 = getelementptr inbounds float* %tmp22204, i64 1
+  %tmp22206 = getelementptr inbounds float* %tmp22205, i64 1
+  %tmp22207 = getelementptr inbounds float* %tmp22206, i64 1
+  %tmp22208 = getelementptr inbounds float* %tmp22207, i64 1
+  %tmp22209 = getelementptr inbounds float* %tmp22208, i64 1
+  %tmp22210 = getelementptr inbounds float* %tmp22209, i64 1
+  %tmp22211 = getelementptr inbounds float* %tmp22210, i64 1
+  %tmp22212 = getelementptr inbounds float* %tmp22211, i64 1
+  %tmp22213 = getelementptr inbounds float* %tmp22212, i64 1
+  %tmp22214 = getelementptr inbounds float* %tmp22213, i64 1
+  %tmp22215 = getelementptr inbounds float* %tmp22214, i64 1
+  %tmp22216 = getelementptr inbounds float* %tmp22215, i64 1
+  %tmp22217 = getelementptr inbounds float* %tmp22216, i64 1
+  %tmp22218 = getelementptr inbounds float* %tmp22217, i64 1
+  %tmp22219 = getelementptr inbounds float* %tmp22218, i64 1
+  %tmp22220 = getelementptr inbounds float* %tmp22219, i64 1
+  %tmp22221 = getelementptr inbounds float* %tmp22220, i64 1
+  %tmp22222 = getelementptr inbounds float* %tmp22221, i64 1
+  %tmp22223 = getelementptr inbounds float* %tmp22222, i64 1
+  %tmp22224 = getelementptr inbounds float* %tmp22223, i64 1
+  %tmp22225 = getelementptr inbounds float* %tmp22224, i64 1
+  %tmp22226 = getelementptr inbounds float* %tmp22225, i64 1
+  %tmp22227 = getelementptr inbounds float* %tmp22226, i64 1
+  %tmp22228 = getelementptr inbounds float* %tmp22227, i64 1
+  %tmp22229 = getelementptr inbounds float* %tmp22228, i64 1
+  %tmp22230 = getelementptr inbounds float* %tmp22229, i64 1
+  %tmp22231 = getelementptr inbounds float* %tmp22230, i64 1
+  %tmp22232 = getelementptr inbounds float* %tmp22231, i64 1
+  %tmp22233 = getelementptr inbounds float* %tmp22232, i64 1
+  %tmp22234 = getelementptr inbounds float* %tmp22233, i64 1
+  %tmp22235 = getelementptr inbounds float* %tmp22234, i64 1
+  %tmp22236 = getelementptr inbounds float* %tmp22235, i64 1
+  %tmp22237 = getelementptr inbounds float* %tmp22236, i64 1
+  %tmp22238 = getelementptr inbounds float* %tmp22237, i64 1
+  %tmp22239 = getelementptr inbounds float* %tmp22238, i64 1
+  %tmp22240 = getelementptr inbounds float* %tmp22239, i64 1
+  %tmp22241 = getelementptr inbounds float* %tmp22240, i64 1
+  %tmp22242 = getelementptr inbounds float* %tmp22241, i64 1
+  %tmp22243 = getelementptr inbounds float* %tmp22242, i64 1
+  %tmp22244 = getelementptr inbounds float* %tmp22243, i64 1
+  %tmp22245 = getelementptr inbounds float* %tmp22244, i64 1
+  %tmp22246 = getelementptr inbounds float* %tmp22245, i64 1
+  %tmp22247 = getelementptr inbounds float* %tmp22246, i64 1
+  %tmp22248 = getelementptr inbounds float* %tmp22247, i64 1
+  %tmp22249 = getelementptr inbounds float* %tmp22248, i64 1
+  %tmp22250 = getelementptr inbounds float* %tmp22249, i64 1
+  %tmp22251 = getelementptr inbounds float* %tmp22250, i64 1
+  %tmp22252 = getelementptr inbounds float* %tmp22251, i64 1
+  %tmp22253 = getelementptr inbounds float* %tmp22252, i64 1
+  %tmp22254 = getelementptr inbounds float* %tmp22253, i64 1
+  %tmp22255 = getelementptr inbounds float* %tmp22254, i64 1
+  %tmp22256 = getelementptr inbounds float* %tmp22255, i64 1
+  %tmp22257 = getelementptr inbounds float* %tmp22256, i64 1
+  %tmp22258 = getelementptr inbounds float* %tmp22257, i64 1
+  %tmp22259 = getelementptr inbounds float* %tmp22258, i64 1
+  %tmp22260 = getelementptr inbounds float* %tmp22259, i64 1
+  %tmp22261 = getelementptr inbounds float* %tmp22260, i64 1
+  %tmp22262 = getelementptr inbounds float* %tmp22261, i64 1
+  %tmp22263 = getelementptr inbounds float* %tmp22262, i64 1
+  %tmp22264 = getelementptr inbounds float* %tmp22263, i64 1
+  %tmp22265 = getelementptr inbounds float* %tmp22264, i64 1
+  %tmp22266 = getelementptr inbounds float* %tmp22265, i64 1
+  %tmp22267 = getelementptr inbounds float* %tmp22266, i64 1
+  %tmp22268 = getelementptr inbounds float* %tmp22267, i64 1
+  %tmp22269 = getelementptr inbounds float* %tmp22268, i64 1
+  %tmp22270 = getelementptr inbounds float* %tmp22269, i64 1
+  %tmp22271 = getelementptr inbounds float* %tmp22270, i64 1
+  %tmp22272 = getelementptr inbounds float* %tmp22271, i64 1
+  %tmp22273 = getelementptr inbounds float* %tmp22272, i64 1
+  %tmp22274 = getelementptr inbounds float* %tmp22273, i64 1
+  %tmp22275 = getelementptr inbounds float* %tmp22274, i64 1
+  %tmp22276 = getelementptr inbounds float* %tmp22275, i64 1
+  %tmp22277 = getelementptr inbounds float* %tmp22276, i64 1
+  %tmp22278 = getelementptr inbounds float* %tmp22277, i64 1
+  %tmp22279 = getelementptr inbounds float* %tmp22278, i64 1
+  %tmp22280 = getelementptr inbounds float* %tmp22279, i64 1
+  %tmp22281 = getelementptr inbounds float* %tmp22280, i64 1
+  %tmp22282 = getelementptr inbounds float* %tmp22281, i64 1
+  %tmp22283 = getelementptr inbounds float* %tmp22282, i64 1
+  %tmp22284 = getelementptr inbounds float* %tmp22283, i64 1
+  %tmp22285 = getelementptr inbounds float* %tmp22284, i64 1
+  %tmp22286 = getelementptr inbounds float* %tmp22285, i64 1
+  %tmp22287 = getelementptr inbounds float* %tmp22286, i64 1
+  %tmp22288 = getelementptr inbounds float* %tmp22287, i64 1
+  %tmp22289 = getelementptr inbounds float* %tmp22288, i64 1
+  %tmp22290 = getelementptr inbounds float* %tmp22289, i64 1
+  %tmp22291 = getelementptr inbounds float* %tmp22290, i64 1
+  %tmp22292 = getelementptr inbounds float* %tmp22291, i64 1
+  %tmp22293 = getelementptr inbounds float* %tmp22292, i64 1
+  %tmp22294 = getelementptr inbounds float* %tmp22293, i64 1
+  %tmp22295 = getelementptr inbounds float* %tmp22294, i64 1
+  %tmp22296 = getelementptr inbounds float* %tmp22295, i64 1
+  %tmp22297 = getelementptr inbounds float* %tmp22296, i64 1
+  %tmp22298 = getelementptr inbounds float* %tmp22297, i64 1
+  %tmp22299 = getelementptr inbounds float* %tmp22298, i64 1
+  %tmp22300 = getelementptr inbounds float* %tmp22299, i64 1
+  %tmp22301 = getelementptr inbounds float* %tmp22300, i64 1
+  %tmp22302 = getelementptr inbounds float* %tmp22301, i64 1
+  %tmp22303 = getelementptr inbounds float* %tmp22302, i64 1
+  %tmp22304 = getelementptr inbounds float* %tmp22303, i64 1
+  %tmp22305 = getelementptr inbounds float* %tmp22304, i64 1
+  %tmp22306 = getelementptr inbounds float* %tmp22305, i64 1
+  %tmp22307 = getelementptr inbounds float* %tmp22306, i64 1
+  %tmp22308 = getelementptr inbounds float* %tmp22307, i64 1
+  %tmp22309 = getelementptr inbounds float* %tmp22308, i64 1
+  %tmp22310 = getelementptr inbounds float* %tmp22309, i64 1
+  %tmp22311 = getelementptr inbounds float* %tmp22310, i64 1
+  %tmp22312 = getelementptr inbounds float* %tmp22311, i64 1
+  %tmp22313 = getelementptr inbounds float* %tmp22312, i64 1
+  %tmp22314 = getelementptr inbounds float* %tmp22313, i64 1
+  %tmp22315 = getelementptr inbounds float* %tmp22314, i64 1
+  %tmp22316 = getelementptr inbounds float* %tmp22315, i64 1
+  %tmp22317 = getelementptr inbounds float* %tmp22316, i64 1
+  %tmp22318 = getelementptr inbounds float* %tmp22317, i64 1
+  %tmp22319 = getelementptr inbounds float* %tmp22318, i64 1
+  %tmp22320 = getelementptr inbounds float* %tmp22319, i64 1
+  %tmp22321 = getelementptr inbounds float* %tmp22320, i64 1
+  %tmp22322 = getelementptr inbounds float* %tmp22321, i64 1
+  %tmp22323 = getelementptr inbounds float* %tmp22322, i64 1
+  %tmp22324 = getelementptr inbounds float* %tmp22323, i64 1
+  %tmp22325 = getelementptr inbounds float* %tmp22324, i64 1
+  %tmp22326 = getelementptr inbounds float* %tmp22325, i64 1
+  %tmp22327 = getelementptr inbounds float* %tmp22326, i64 1
+  %tmp22328 = getelementptr inbounds float* %tmp22327, i64 1
+  %tmp22329 = getelementptr inbounds float* %tmp22328, i64 1
+  %tmp22330 = getelementptr inbounds float* %tmp22329, i64 1
+  %tmp22331 = getelementptr inbounds float* %tmp22330, i64 1
+  %tmp22332 = getelementptr inbounds float* %tmp22331, i64 1
+  %tmp22333 = getelementptr inbounds float* %tmp22332, i64 1
+  %tmp22334 = getelementptr inbounds float* %tmp22333, i64 1
+  %tmp22335 = getelementptr inbounds float* %tmp22334, i64 1
+  %tmp22336 = getelementptr inbounds float* %tmp22335, i64 1
+  %tmp22337 = getelementptr inbounds float* %tmp22336, i64 1
+  %tmp22338 = getelementptr inbounds float* %tmp22337, i64 1
+  %tmp22339 = getelementptr inbounds float* %tmp22338, i64 1
+  %tmp22340 = getelementptr inbounds float* %tmp22339, i64 1
+  %tmp22341 = getelementptr inbounds float* %tmp22340, i64 1
+  %tmp22342 = getelementptr inbounds float* %tmp22341, i64 1
+  %tmp22343 = getelementptr inbounds float* %tmp22342, i64 1
+  %tmp22344 = getelementptr inbounds float* %tmp22343, i64 1
+  %tmp22345 = getelementptr inbounds float* %tmp22344, i64 1
+  %tmp22346 = getelementptr inbounds float* %tmp22345, i64 1
+  %tmp22347 = getelementptr inbounds float* %tmp22346, i64 1
+  %tmp22348 = getelementptr inbounds float* %tmp22347, i64 1
+  %tmp22349 = getelementptr inbounds float* %tmp22348, i64 1
+  %tmp22350 = getelementptr inbounds float* %tmp22349, i64 1
+  %tmp22351 = getelementptr inbounds float* %tmp22350, i64 1
+  %tmp22352 = getelementptr inbounds float* %tmp22351, i64 1
+  %tmp22353 = getelementptr inbounds float* %tmp22352, i64 1
+  %tmp22354 = getelementptr inbounds float* %tmp22353, i64 1
+  %tmp22355 = getelementptr inbounds float* %tmp22354, i64 1
+  %tmp22356 = getelementptr inbounds float* %tmp22355, i64 1
+  %tmp22357 = getelementptr inbounds float* %tmp22356, i64 1
+  %tmp22358 = getelementptr inbounds float* %tmp22357, i64 1
+  %tmp22359 = getelementptr inbounds float* %tmp22358, i64 1
+  %tmp22360 = getelementptr inbounds float* %tmp22359, i64 1
+  %tmp22361 = getelementptr inbounds float* %tmp22360, i64 1
+  %tmp22362 = getelementptr inbounds float* %tmp22361, i64 1
+  %tmp22363 = getelementptr inbounds float* %tmp22362, i64 1
+  %tmp22364 = getelementptr inbounds float* %tmp22363, i64 1
+  %tmp22365 = getelementptr inbounds float* %tmp22364, i64 1
+  %tmp22366 = getelementptr inbounds float* %tmp22365, i64 1
+  %tmp22367 = getelementptr inbounds float* %tmp22366, i64 1
+  %tmp22368 = getelementptr inbounds float* %tmp22367, i64 1
+  %tmp22369 = getelementptr inbounds float* %tmp22368, i64 1
+  %tmp22370 = getelementptr inbounds float* %tmp22369, i64 1
+  %tmp22371 = getelementptr inbounds float* %tmp22370, i64 1
+  %tmp22372 = getelementptr inbounds float* %tmp22371, i64 1
+  %tmp22373 = getelementptr inbounds float* %tmp22372, i64 1
+  %tmp22374 = getelementptr inbounds float* %tmp22373, i64 1
+  %tmp22375 = getelementptr inbounds float* %tmp22374, i64 1
+  %tmp22376 = getelementptr inbounds float* %tmp22375, i64 1
+  %tmp22377 = getelementptr inbounds float* %tmp22376, i64 1
+  %tmp22378 = getelementptr inbounds float* %tmp22377, i64 1
+  %tmp22379 = getelementptr inbounds float* %tmp22378, i64 1
+  %tmp22380 = getelementptr inbounds float* %tmp22379, i64 1
+  %tmp22381 = getelementptr inbounds float* %tmp22380, i64 1
+  %tmp22382 = getelementptr inbounds float* %tmp22381, i64 1
+  %tmp22383 = getelementptr inbounds float* %tmp22382, i64 1
+  %tmp22384 = getelementptr inbounds float* %tmp22383, i64 1
+  %tmp22385 = getelementptr inbounds float* %tmp22384, i64 1
+  %tmp22386 = getelementptr inbounds float* %tmp22385, i64 1
+  %tmp22387 = getelementptr inbounds float* %tmp22386, i64 1
+  %tmp22388 = getelementptr inbounds float* %tmp22387, i64 1
+  %tmp22389 = getelementptr inbounds float* %tmp22388, i64 1
+  %tmp22390 = getelementptr inbounds float* %tmp22389, i64 1
+  %tmp22391 = getelementptr inbounds float* %tmp22390, i64 1
+  %tmp22392 = getelementptr inbounds float* %tmp22391, i64 1
+  %tmp22393 = getelementptr inbounds float* %tmp22392, i64 1
+  %tmp22394 = getelementptr inbounds float* %tmp22393, i64 1
+  %tmp22395 = getelementptr inbounds float* %tmp22394, i64 1
+  %tmp22396 = getelementptr inbounds float* %tmp22395, i64 1
+  %tmp22397 = getelementptr inbounds float* %tmp22396, i64 1
+  %tmp22398 = getelementptr inbounds float* %tmp22397, i64 1
+  %tmp22399 = getelementptr inbounds float* %tmp22398, i64 1
+  %tmp22400 = getelementptr inbounds float* %tmp22399, i64 1
+  %tmp22401 = getelementptr inbounds float* %tmp22400, i64 1
+  %tmp22402 = getelementptr inbounds float* %tmp22401, i64 1
+  %tmp22403 = getelementptr inbounds float* %tmp22402, i64 1
+  %tmp22404 = getelementptr inbounds float* %tmp22403, i64 1
+  %tmp22405 = getelementptr inbounds float* %tmp22404, i64 1
+  %tmp22406 = getelementptr inbounds float* %tmp22405, i64 1
+  %tmp22407 = getelementptr inbounds float* %tmp22406, i64 1
+  %tmp22408 = getelementptr inbounds float* %tmp22407, i64 1
+  %tmp22409 = getelementptr inbounds float* %tmp22408, i64 1
+  %tmp22410 = getelementptr inbounds float* %tmp22409, i64 1
+  %tmp22411 = getelementptr inbounds float* %tmp22410, i64 1
+  %tmp22412 = getelementptr inbounds float* %tmp22411, i64 1
+  %tmp22413 = getelementptr inbounds float* %tmp22412, i64 1
+  %tmp22414 = getelementptr inbounds float* %tmp22413, i64 1
+  %tmp22415 = getelementptr inbounds float* %tmp22414, i64 1
+  %tmp22416 = getelementptr inbounds float* %tmp22415, i64 1
+  %tmp22417 = getelementptr inbounds float* %tmp22416, i64 1
+  %tmp22418 = getelementptr inbounds float* %tmp22417, i64 1
+  %tmp22419 = getelementptr inbounds float* %tmp22418, i64 1
+  %tmp22420 = getelementptr inbounds float* %tmp22419, i64 1
+  %tmp22421 = getelementptr inbounds float* %tmp22420, i64 1
+  %tmp22422 = getelementptr inbounds float* %tmp22421, i64 1
+  %tmp22423 = getelementptr inbounds float* %tmp22422, i64 1
+  %tmp22424 = getelementptr inbounds float* %tmp22423, i64 1
+  %tmp22425 = getelementptr inbounds float* %tmp22424, i64 1
+  %tmp22426 = getelementptr inbounds float* %tmp22425, i64 1
+  %tmp22427 = getelementptr inbounds float* %tmp22426, i64 1
+  %tmp22428 = getelementptr inbounds float* %tmp22427, i64 1
+  %tmp22429 = getelementptr inbounds float* %tmp22428, i64 1
+  %tmp22430 = getelementptr inbounds float* %tmp22429, i64 1
+  %tmp22431 = getelementptr inbounds float* %tmp22430, i64 1
+  %tmp22432 = getelementptr inbounds float* %tmp22431, i64 1
+  %tmp22433 = getelementptr inbounds float* %tmp22432, i64 1
+  %tmp22434 = getelementptr inbounds float* %tmp22433, i64 1
+  %tmp22435 = getelementptr inbounds float* %tmp22434, i64 1
+  %tmp22436 = getelementptr inbounds float* %tmp22435, i64 1
+  %tmp22437 = getelementptr inbounds float* %tmp22436, i64 1
+  %tmp22438 = getelementptr inbounds float* %tmp22437, i64 1
+  %tmp22439 = getelementptr inbounds float* %tmp22438, i64 1
+  %tmp22440 = getelementptr inbounds float* %tmp22439, i64 1
+  %tmp22441 = getelementptr inbounds float* %tmp22440, i64 1
+  %tmp22442 = getelementptr inbounds float* %tmp22441, i64 1
+  %tmp22443 = getelementptr inbounds float* %tmp22442, i64 1
+  %tmp22444 = getelementptr inbounds float* %tmp22443, i64 1
+  %tmp22445 = getelementptr inbounds float* %tmp22444, i64 1
+  %tmp22446 = getelementptr inbounds float* %tmp22445, i64 1
+  %tmp22447 = getelementptr inbounds float* %tmp22446, i64 1
+  %tmp22448 = getelementptr inbounds float* %tmp22447, i64 1
+  %tmp22449 = getelementptr inbounds float* %tmp22448, i64 1
+  %tmp22450 = getelementptr inbounds float* %tmp22449, i64 1
+  %tmp22451 = getelementptr inbounds float* %tmp22450, i64 1
+  %tmp22452 = getelementptr inbounds float* %tmp22451, i64 1
+  %tmp22453 = getelementptr inbounds float* %tmp22452, i64 1
+  %tmp22454 = getelementptr inbounds float* %tmp22453, i64 1
+  %tmp22455 = getelementptr inbounds float* %tmp22454, i64 1
+  %tmp22456 = getelementptr inbounds float* %tmp22455, i64 1
+  %tmp22457 = getelementptr inbounds float* %tmp22456, i64 1
+  %tmp22458 = getelementptr inbounds float* %tmp22457, i64 1
+  %tmp22459 = getelementptr inbounds float* %tmp22458, i64 1
+  %tmp22460 = getelementptr inbounds float* %tmp22459, i64 1
+  %tmp22461 = getelementptr inbounds float* %tmp22460, i64 1
+  %tmp22462 = getelementptr inbounds float* %tmp22461, i64 1
+  %tmp22463 = getelementptr inbounds float* %tmp22462, i64 1
+  %tmp22464 = getelementptr inbounds float* %tmp22463, i64 1
+  %tmp22465 = getelementptr inbounds float* %tmp22464, i64 1
+  %tmp22466 = getelementptr inbounds float* %tmp22465, i64 1
+  %tmp22467 = getelementptr inbounds float* %tmp22466, i64 1
+  %tmp22468 = getelementptr inbounds float* %tmp22467, i64 1
+  %tmp22469 = getelementptr inbounds float* %tmp22468, i64 1
+  %tmp22470 = getelementptr inbounds float* %tmp22469, i64 1
+  %tmp22471 = getelementptr inbounds float* %tmp22470, i64 1
+  %tmp22472 = getelementptr inbounds float* %tmp22471, i64 1
+  %tmp22473 = getelementptr inbounds float* %tmp22472, i64 1
+  %tmp22474 = getelementptr inbounds float* %tmp22473, i64 1
+  %tmp22475 = getelementptr inbounds float* %tmp22474, i64 1
+  %tmp22476 = getelementptr inbounds float* %tmp22475, i64 1
+  %tmp22477 = getelementptr inbounds float* %tmp22476, i64 1
+  %tmp22478 = getelementptr inbounds float* %tmp22477, i64 1
+  %tmp22479 = getelementptr inbounds float* %tmp22478, i64 1
+  %tmp22480 = getelementptr inbounds float* %tmp22479, i64 1
+  %tmp22481 = getelementptr inbounds float* %tmp22480, i64 1
+  %tmp22482 = getelementptr inbounds float* %tmp22481, i64 1
+  %tmp22483 = getelementptr inbounds float* %tmp22482, i64 1
+  %tmp22484 = getelementptr inbounds float* %tmp22483, i64 1
+  %tmp22485 = getelementptr inbounds float* %tmp22484, i64 1
+  %tmp22486 = getelementptr inbounds float* %tmp22485, i64 1
+  %tmp22487 = getelementptr inbounds float* %tmp22486, i64 1
+  %tmp22488 = getelementptr inbounds float* %tmp22487, i64 1
+  %tmp22489 = getelementptr inbounds float* %tmp22488, i64 1
+  %tmp22490 = getelementptr inbounds float* %tmp22489, i64 1
+  %tmp22491 = getelementptr inbounds float* %tmp22490, i64 1
+  %tmp22492 = getelementptr inbounds float* %tmp22491, i64 1
+  %tmp22493 = getelementptr inbounds float* %tmp22492, i64 1
+  %tmp22494 = getelementptr inbounds float* %tmp22493, i64 1
+  %tmp22495 = getelementptr inbounds float* %tmp22494, i64 1
+  %tmp22496 = getelementptr inbounds float* %tmp22495, i64 1
+  %tmp22497 = getelementptr inbounds float* %tmp22496, i64 1
+  %tmp22498 = getelementptr inbounds float* %tmp22497, i64 1
+  %tmp22499 = getelementptr inbounds float* %tmp22498, i64 1
+  %tmp22500 = getelementptr inbounds float* %tmp22499, i64 1
+  %tmp22501 = getelementptr inbounds float* %tmp22500, i64 1
+  %tmp22502 = getelementptr inbounds float* %tmp22501, i64 1
+  %tmp22503 = getelementptr inbounds float* %tmp22502, i64 1
+  %tmp22504 = getelementptr inbounds float* %tmp22503, i64 1
+  %tmp22505 = getelementptr inbounds float* %tmp22504, i64 1
+  %tmp22506 = getelementptr inbounds float* %tmp22505, i64 1
+  %tmp22507 = getelementptr inbounds float* %tmp22506, i64 1
+  %tmp22508 = getelementptr inbounds float* %tmp22507, i64 1
+  %tmp22509 = getelementptr inbounds float* %tmp22508, i64 1
+  %tmp22510 = getelementptr inbounds float* %tmp22509, i64 1
+  %tmp22511 = getelementptr inbounds float* %tmp22510, i64 1
+  %tmp22512 = getelementptr inbounds float* %tmp22511, i64 1
+  %tmp22513 = getelementptr inbounds float* %tmp22512, i64 1
+  %tmp22514 = getelementptr inbounds float* %tmp22513, i64 1
+  %tmp22515 = getelementptr inbounds float* %tmp22514, i64 1
+  %tmp22516 = getelementptr inbounds float* %tmp22515, i64 1
+  %tmp22517 = getelementptr inbounds float* %tmp22516, i64 1
+  %tmp22518 = getelementptr inbounds float* %tmp22517, i64 1
+  %tmp22519 = getelementptr inbounds float* %tmp22518, i64 1
+  %tmp22520 = getelementptr inbounds float* %tmp22519, i64 1
+  %tmp22521 = getelementptr inbounds float* %tmp22520, i64 1
+  %tmp22522 = getelementptr inbounds float* %tmp22521, i64 1
+  %tmp22523 = getelementptr inbounds float* %tmp22522, i64 1
+  %tmp22524 = getelementptr inbounds float* %tmp22523, i64 1
+  %tmp22525 = getelementptr inbounds float* %tmp22524, i64 1
+  %tmp22526 = getelementptr inbounds float* %tmp22525, i64 1
+  %tmp22527 = getelementptr inbounds float* %tmp22526, i64 1
+  %tmp22528 = getelementptr inbounds float* %tmp22527, i64 1
+  %tmp22529 = getelementptr inbounds float* %tmp22528, i64 1
+  %tmp22530 = getelementptr inbounds float* %tmp22529, i64 1
+  %tmp22531 = getelementptr inbounds float* %tmp22530, i64 1
+  %tmp22532 = getelementptr inbounds float* %tmp22531, i64 1
+  %tmp22533 = getelementptr inbounds float* %tmp22532, i64 1
+  %tmp22534 = getelementptr inbounds float* %tmp22533, i64 1
+  %tmp22535 = getelementptr inbounds float* %tmp22534, i64 1
+  %tmp22536 = getelementptr inbounds float* %tmp22535, i64 1
+  %tmp22537 = getelementptr inbounds float* %tmp22536, i64 1
+  %tmp22538 = getelementptr inbounds float* %tmp22537, i64 1
+  %tmp22539 = getelementptr inbounds float* %tmp22538, i64 1
+  %tmp22540 = getelementptr inbounds float* %tmp22539, i64 1
+  %tmp22541 = getelementptr inbounds float* %tmp22540, i64 1
+  %tmp22542 = getelementptr inbounds float* %tmp22541, i64 1
+  %tmp22543 = getelementptr inbounds float* %tmp22542, i64 1
+  %tmp22544 = getelementptr inbounds float* %tmp22543, i64 1
+  %tmp22545 = getelementptr inbounds float* %tmp22544, i64 1
+  %tmp22546 = getelementptr inbounds float* %tmp22545, i64 1
+  %tmp22547 = getelementptr inbounds float* %tmp22546, i64 1
+  %tmp22548 = getelementptr inbounds float* %tmp22547, i64 1
+  %tmp22549 = getelementptr inbounds float* %tmp22548, i64 1
+  %tmp22550 = getelementptr inbounds float* %tmp22549, i64 1
+  %tmp22551 = getelementptr inbounds float* %tmp22550, i64 1
+  %tmp22552 = getelementptr inbounds float* %tmp22551, i64 1
+  %tmp22553 = getelementptr inbounds float* %tmp22552, i64 1
+  %tmp22554 = getelementptr inbounds float* %tmp22553, i64 1
+  %tmp22555 = getelementptr inbounds float* %tmp22554, i64 1
+  %tmp22556 = getelementptr inbounds float* %tmp22555, i64 1
+  %tmp22557 = getelementptr inbounds float* %tmp22556, i64 1
+  %tmp22558 = getelementptr inbounds float* %tmp22557, i64 1
+  %tmp22559 = getelementptr inbounds float* %tmp22558, i64 1
+  %tmp22560 = getelementptr inbounds float* %tmp22559, i64 1
+  %tmp22561 = getelementptr inbounds float* %tmp22560, i64 1
+  %tmp22562 = getelementptr inbounds float* %tmp22561, i64 1
+  %tmp22563 = getelementptr inbounds float* %tmp22562, i64 1
+  %tmp22564 = getelementptr inbounds float* %tmp22563, i64 1
+  %tmp22565 = getelementptr inbounds float* %tmp22564, i64 1
+  %tmp22566 = getelementptr inbounds float* %tmp22565, i64 1
+  %tmp22567 = getelementptr inbounds float* %tmp22566, i64 1
+  %tmp22568 = getelementptr inbounds float* %tmp22567, i64 1
+  %tmp22569 = getelementptr inbounds float* %tmp22568, i64 1
+  %tmp22570 = getelementptr inbounds float* %tmp22569, i64 1
+  %tmp22571 = getelementptr inbounds float* %tmp22570, i64 1
+  %tmp22572 = getelementptr inbounds float* %tmp22571, i64 1
+  %tmp22573 = getelementptr inbounds float* %tmp22572, i64 1
+  %tmp22574 = getelementptr inbounds float* %tmp22573, i64 1
+  %tmp22575 = getelementptr inbounds float* %tmp22574, i64 1
+  %tmp22576 = getelementptr inbounds float* %tmp22575, i64 1
+  %tmp22577 = getelementptr inbounds float* %tmp22576, i64 1
+  %tmp22578 = getelementptr inbounds float* %tmp22577, i64 1
+  %tmp22579 = getelementptr inbounds float* %tmp22578, i64 1
+  %tmp22580 = getelementptr inbounds float* %tmp22579, i64 1
+  %tmp22581 = getelementptr inbounds float* %tmp22580, i64 1
+  %tmp22582 = getelementptr inbounds float* %tmp22581, i64 1
+  %tmp22583 = getelementptr inbounds float* %tmp22582, i64 1
+  %tmp22584 = getelementptr inbounds float* %tmp22583, i64 1
+  %tmp22585 = getelementptr inbounds float* %tmp22584, i64 1
+  %tmp22586 = getelementptr inbounds float* %tmp22585, i64 1
+  %tmp22587 = getelementptr inbounds float* %tmp22586, i64 1
+  %tmp22588 = getelementptr inbounds float* %tmp22587, i64 1
+  %tmp22589 = getelementptr inbounds float* %tmp22588, i64 1
+  %tmp22590 = getelementptr inbounds float* %tmp22589, i64 1
+  %tmp22591 = getelementptr inbounds float* %tmp22590, i64 1
+  %tmp22592 = getelementptr inbounds float* %tmp22591, i64 1
+  %tmp22593 = getelementptr inbounds float* %tmp22592, i64 1
+  %tmp22594 = getelementptr inbounds float* %tmp22593, i64 1
+  %tmp22595 = getelementptr inbounds float* %tmp22594, i64 1
+  %tmp22596 = getelementptr inbounds float* %tmp22595, i64 1
+  %tmp22597 = getelementptr inbounds float* %tmp22596, i64 1
+  %tmp22598 = getelementptr inbounds float* %tmp22597, i64 1
+  %tmp22599 = getelementptr inbounds float* %tmp22598, i64 1
+  %tmp22600 = getelementptr inbounds float* %tmp22599, i64 1
+  %tmp22601 = getelementptr inbounds float* %tmp22600, i64 1
+  %tmp22602 = getelementptr inbounds float* %tmp22601, i64 1
+  %tmp22603 = getelementptr inbounds float* %tmp22602, i64 1
+  %tmp22604 = getelementptr inbounds float* %tmp22603, i64 1
+  %tmp22605 = getelementptr inbounds float* %tmp22604, i64 1
+  %tmp22606 = getelementptr inbounds float* %tmp22605, i64 1
+  %tmp22607 = getelementptr inbounds float* %tmp22606, i64 1
+  %tmp22608 = getelementptr inbounds float* %tmp22607, i64 1
+  %tmp22609 = getelementptr inbounds float* %tmp22608, i64 1
+  %tmp22610 = getelementptr inbounds float* %tmp22609, i64 1
+  %tmp22611 = getelementptr inbounds float* %tmp22610, i64 1
+  %tmp22612 = getelementptr inbounds float* %tmp22611, i64 1
+  %tmp22613 = getelementptr inbounds float* %tmp22612, i64 1
+  %tmp22614 = getelementptr inbounds float* %tmp22613, i64 1
+  %tmp22615 = getelementptr inbounds float* %tmp22614, i64 1
+  %tmp22616 = getelementptr inbounds float* %tmp22615, i64 1
+  %tmp22617 = getelementptr inbounds float* %tmp22616, i64 1
+  %tmp22618 = getelementptr inbounds float* %tmp22617, i64 1
+  %tmp22619 = getelementptr inbounds float* %tmp22618, i64 1
+  %tmp22620 = getelementptr inbounds float* %tmp22619, i64 1
+  %tmp22621 = getelementptr inbounds float* %tmp22620, i64 1
+  %tmp22622 = getelementptr inbounds float* %tmp22621, i64 1
+  %tmp22623 = getelementptr inbounds float* %tmp22622, i64 1
+  %tmp22624 = getelementptr inbounds float* %tmp22623, i64 1
+  %tmp22625 = getelementptr inbounds float* %tmp22624, i64 1
+  %tmp22626 = getelementptr inbounds float* %tmp22625, i64 1
+  %tmp22627 = getelementptr inbounds float* %tmp22626, i64 1
+  %tmp22628 = getelementptr inbounds float* %tmp22627, i64 1
+  %tmp22629 = getelementptr inbounds float* %tmp22628, i64 1
+  %tmp22630 = getelementptr inbounds float* %tmp22629, i64 1
+  %tmp22631 = getelementptr inbounds float* %tmp22630, i64 1
+  %tmp22632 = getelementptr inbounds float* %tmp22631, i64 1
+  %tmp22633 = getelementptr inbounds float* %tmp22632, i64 1
+  %tmp22634 = getelementptr inbounds float* %tmp22633, i64 1
+  %tmp22635 = getelementptr inbounds float* %tmp22634, i64 1
+  %tmp22636 = getelementptr inbounds float* %tmp22635, i64 1
+  %tmp22637 = getelementptr inbounds float* %tmp22636, i64 1
+  %tmp22638 = getelementptr inbounds float* %tmp22637, i64 1
+  %tmp22639 = getelementptr inbounds float* %tmp22638, i64 1
+  %tmp22640 = getelementptr inbounds float* %tmp22639, i64 1
+  %tmp22641 = getelementptr inbounds float* %tmp22640, i64 1
+  %tmp22642 = getelementptr inbounds float* %tmp22641, i64 1
+  %tmp22643 = getelementptr inbounds float* %tmp22642, i64 1
+  %tmp22644 = getelementptr inbounds float* %tmp22643, i64 1
+  %tmp22645 = getelementptr inbounds float* %tmp22644, i64 1
+  %tmp22646 = getelementptr inbounds float* %tmp22645, i64 1
+  %tmp22647 = getelementptr inbounds float* %tmp22646, i64 1
+  %tmp22648 = getelementptr inbounds float* %tmp22647, i64 1
+  %tmp22649 = getelementptr inbounds float* %tmp22648, i64 1
+  %tmp22650 = getelementptr inbounds float* %tmp22649, i64 1
+  %tmp22651 = getelementptr inbounds float* %tmp22650, i64 1
+  %tmp22652 = getelementptr inbounds float* %tmp22651, i64 1
+  %tmp22653 = getelementptr inbounds float* %tmp22652, i64 1
+  %tmp22654 = getelementptr inbounds float* %tmp22653, i64 1
+  %tmp22655 = getelementptr inbounds float* %tmp22654, i64 1
+  %tmp22656 = getelementptr inbounds float* %tmp22655, i64 1
+  %tmp22657 = getelementptr inbounds float* %tmp22656, i64 1
+  %tmp22658 = getelementptr inbounds float* %tmp22657, i64 1
+  %tmp22659 = getelementptr inbounds float* %tmp22658, i64 1
+  %tmp22660 = getelementptr inbounds float* %tmp22659, i64 1
+  %tmp22661 = getelementptr inbounds float* %tmp22660, i64 1
+  %tmp22662 = getelementptr inbounds float* %tmp22661, i64 1
+  %tmp22663 = getelementptr inbounds float* %tmp22662, i64 1
+  %tmp22664 = getelementptr inbounds float* %tmp22663, i64 1
+  %tmp22665 = getelementptr inbounds float* %tmp22664, i64 1
+  %tmp22666 = getelementptr inbounds float* %tmp22665, i64 1
+  %tmp22667 = getelementptr inbounds float* %tmp22666, i64 1
+  %tmp22668 = getelementptr inbounds float* %tmp22667, i64 1
+  %tmp22669 = getelementptr inbounds float* %tmp22668, i64 1
+  %tmp22670 = getelementptr inbounds float* %tmp22669, i64 1
+  %tmp22671 = getelementptr inbounds float* %tmp22670, i64 1
+  %tmp22672 = getelementptr inbounds float* %tmp22671, i64 1
+  %tmp22673 = getelementptr inbounds float* %tmp22672, i64 1
+  %tmp22674 = getelementptr inbounds float* %tmp22673, i64 1
+  %tmp22675 = getelementptr inbounds float* %tmp22674, i64 1
+  %tmp22676 = getelementptr inbounds float* %tmp22675, i64 1
+  %tmp22677 = getelementptr inbounds float* %tmp22676, i64 1
+  %tmp22678 = getelementptr inbounds float* %tmp22677, i64 1
+  %tmp22679 = getelementptr inbounds float* %tmp22678, i64 1
+  %tmp22680 = getelementptr inbounds float* %tmp22679, i64 1
+  %tmp22681 = getelementptr inbounds float* %tmp22680, i64 1
+  %tmp22682 = getelementptr inbounds float* %tmp22681, i64 1
+  %tmp22683 = getelementptr inbounds float* %tmp22682, i64 1
+  %tmp22684 = getelementptr inbounds float* %tmp22683, i64 1
+  %tmp22685 = getelementptr inbounds float* %tmp22684, i64 1
+  %tmp22686 = getelementptr inbounds float* %tmp22685, i64 1
+  %tmp22687 = getelementptr inbounds float* %tmp22686, i64 1
+  %tmp22688 = getelementptr inbounds float* %tmp22687, i64 1
+  %tmp22689 = getelementptr inbounds float* %tmp22688, i64 1
+  %tmp22690 = getelementptr inbounds float* %tmp22689, i64 1
+  %tmp22691 = getelementptr inbounds float* %tmp22690, i64 1
+  %tmp22692 = getelementptr inbounds float* %tmp22691, i64 1
+  %tmp22693 = getelementptr inbounds float* %tmp22692, i64 1
+  %tmp22694 = getelementptr inbounds float* %tmp22693, i64 1
+  %tmp22695 = getelementptr inbounds float* %tmp22694, i64 1
+  %tmp22696 = getelementptr inbounds float* %tmp22695, i64 1
+  %tmp22697 = getelementptr inbounds float* %tmp22696, i64 1
+  %tmp22698 = getelementptr inbounds float* %tmp22697, i64 1
+  %tmp22699 = getelementptr inbounds float* %tmp22698, i64 1
+  %tmp22700 = getelementptr inbounds float* %tmp22699, i64 1
+  %tmp22701 = getelementptr inbounds float* %tmp22700, i64 1
+  %tmp22702 = getelementptr inbounds float* %tmp22701, i64 1
+  %tmp22703 = getelementptr inbounds float* %tmp22702, i64 1
+  %tmp22704 = getelementptr inbounds float* %tmp22703, i64 1
+  %tmp22705 = getelementptr inbounds float* %tmp22704, i64 1
+  %tmp22706 = getelementptr inbounds float* %tmp22705, i64 1
+  %tmp22707 = getelementptr inbounds float* %tmp22706, i64 1
+  %tmp22708 = getelementptr inbounds float* %tmp22707, i64 1
+  %tmp22709 = getelementptr inbounds float* %tmp22708, i64 1
+  %tmp22710 = getelementptr inbounds float* %tmp22709, i64 1
+  %tmp22711 = getelementptr inbounds float* %tmp22710, i64 1
+  %tmp22712 = getelementptr inbounds float* %tmp22711, i64 1
+  %tmp22713 = getelementptr inbounds float* %tmp22712, i64 1
+  %tmp22714 = getelementptr inbounds float* %tmp22713, i64 1
+  %tmp22715 = getelementptr inbounds float* %tmp22714, i64 1
+  %tmp22716 = getelementptr inbounds float* %tmp22715, i64 1
+  %tmp22717 = getelementptr inbounds float* %tmp22716, i64 1
+  %tmp22718 = getelementptr inbounds float* %tmp22717, i64 1
+  %tmp22719 = getelementptr inbounds float* %tmp22718, i64 1
+  %tmp22720 = getelementptr inbounds float* %tmp22719, i64 1
+  %tmp22721 = getelementptr inbounds float* %tmp22720, i64 1
+  %tmp22722 = getelementptr inbounds float* %tmp22721, i64 1
+  %tmp22723 = getelementptr inbounds float* %tmp22722, i64 1
+  %tmp22724 = getelementptr inbounds float* %tmp22723, i64 1
+  %tmp22725 = getelementptr inbounds float* %tmp22724, i64 1
+  %tmp22726 = getelementptr inbounds float* %tmp22725, i64 1
+  %tmp22727 = getelementptr inbounds float* %tmp22726, i64 1
+  %tmp22728 = getelementptr inbounds float* %tmp22727, i64 1
+  %tmp22729 = getelementptr inbounds float* %tmp22728, i64 1
+  %tmp22730 = getelementptr inbounds float* %tmp22729, i64 1
+  %tmp22731 = getelementptr inbounds float* %tmp22730, i64 1
+  %tmp22732 = getelementptr inbounds float* %tmp22731, i64 1
+  %tmp22733 = getelementptr inbounds float* %tmp22732, i64 1
+  %tmp22734 = getelementptr inbounds float* %tmp22733, i64 1
+  %tmp22735 = getelementptr inbounds float* %tmp22734, i64 1
+  %tmp22736 = getelementptr inbounds float* %tmp22735, i64 1
+  %tmp22737 = getelementptr inbounds float* %tmp22736, i64 1
+  %tmp22738 = getelementptr inbounds float* %tmp22737, i64 1
+  %tmp22739 = getelementptr inbounds float* %tmp22738, i64 1
+  %tmp22740 = getelementptr inbounds float* %tmp22739, i64 1
+  %tmp22741 = getelementptr inbounds float* %tmp22740, i64 1
+  %tmp22742 = getelementptr inbounds float* %tmp22741, i64 1
+  %tmp22743 = getelementptr inbounds float* %tmp22742, i64 1
+  %tmp22744 = getelementptr inbounds float* %tmp22743, i64 1
+  %tmp22745 = getelementptr inbounds float* %tmp22744, i64 1
+  %tmp22746 = getelementptr inbounds float* %tmp22745, i64 1
+  %tmp22747 = getelementptr inbounds float* %tmp22746, i64 1
+  %tmp22748 = getelementptr inbounds float* %tmp22747, i64 1
+  %tmp22749 = getelementptr inbounds float* %tmp22748, i64 1
+  %tmp22750 = getelementptr inbounds float* %tmp22749, i64 1
+  %tmp22751 = getelementptr inbounds float* %tmp22750, i64 1
+  %tmp22752 = getelementptr inbounds float* %tmp22751, i64 1
+  %tmp22753 = getelementptr inbounds float* %tmp22752, i64 1
+  %tmp22754 = getelementptr inbounds float* %tmp22753, i64 1
+  %tmp22755 = getelementptr inbounds float* %tmp22754, i64 1
+  %tmp22756 = getelementptr inbounds float* %tmp22755, i64 1
+  %tmp22757 = getelementptr inbounds float* %tmp22756, i64 1
+  %tmp22758 = getelementptr inbounds float* %tmp22757, i64 1
+  %tmp22759 = getelementptr inbounds float* %tmp22758, i64 1
+  %tmp22760 = getelementptr inbounds float* %tmp22759, i64 1
+  %tmp22761 = getelementptr inbounds float* %tmp22760, i64 1
+  %tmp22762 = getelementptr inbounds float* %tmp22761, i64 1
+  %tmp22763 = getelementptr inbounds float* %tmp22762, i64 1
+  %tmp22764 = getelementptr inbounds float* %tmp22763, i64 1
+  %tmp22765 = getelementptr inbounds float* %tmp22764, i64 1
+  %tmp22766 = getelementptr inbounds float* %tmp22765, i64 1
+  %tmp22767 = getelementptr inbounds float* %tmp22766, i64 1
+  %tmp22768 = getelementptr inbounds float* %tmp22767, i64 1
+  %tmp22769 = getelementptr inbounds float* %tmp22768, i64 1
+  %tmp22770 = getelementptr inbounds float* %tmp22769, i64 1
+  %tmp22771 = getelementptr inbounds float* %tmp22770, i64 1
+  %tmp22772 = getelementptr inbounds float* %tmp22771, i64 1
+  %tmp22773 = getelementptr inbounds float* %tmp22772, i64 1
+  %tmp22774 = getelementptr inbounds float* %tmp22773, i64 1
+  %tmp22775 = getelementptr inbounds float* %tmp22774, i64 1
+  %tmp22776 = getelementptr inbounds float* %tmp22775, i64 1
+  %tmp22777 = getelementptr inbounds float* %tmp22776, i64 1
+  %tmp22778 = getelementptr inbounds float* %tmp22777, i64 1
+  %tmp22779 = getelementptr inbounds float* %tmp22778, i64 1
+  %tmp22780 = getelementptr inbounds float* %tmp22779, i64 1
+  %tmp22781 = getelementptr inbounds float* %tmp22780, i64 1
+  %tmp22782 = getelementptr inbounds float* %tmp22781, i64 1
+  %tmp22783 = getelementptr inbounds float* %tmp22782, i64 1
+  %tmp22784 = getelementptr inbounds float* %tmp22783, i64 1
+  %tmp22785 = getelementptr inbounds float* %tmp22784, i64 1
+  %tmp22786 = getelementptr inbounds float* %tmp22785, i64 1
+  %tmp22787 = getelementptr inbounds float* %tmp22786, i64 1
+  %tmp22788 = getelementptr inbounds float* %tmp22787, i64 1
+  %tmp22789 = getelementptr inbounds float* %tmp22788, i64 1
+  %tmp22790 = getelementptr inbounds float* %tmp22789, i64 1
+  %tmp22791 = getelementptr inbounds float* %tmp22790, i64 1
+  %tmp22792 = getelementptr inbounds float* %tmp22791, i64 1
+  %tmp22793 = getelementptr inbounds float* %tmp22792, i64 1
+  %tmp22794 = getelementptr inbounds float* %tmp22793, i64 1
+  %tmp22795 = getelementptr inbounds float* %tmp22794, i64 1
+  %tmp22796 = getelementptr inbounds float* %tmp22795, i64 1
+  %tmp22797 = getelementptr inbounds float* %tmp22796, i64 1
+  %tmp22798 = getelementptr inbounds float* %tmp22797, i64 1
+  %tmp22799 = getelementptr inbounds float* %tmp22798, i64 1
+  %tmp22800 = getelementptr inbounds float* %tmp22799, i64 1
+  %tmp22801 = getelementptr inbounds float* %tmp22800, i64 1
+  %tmp22802 = getelementptr inbounds float* %tmp22801, i64 1
+  %tmp22803 = getelementptr inbounds float* %tmp22802, i64 1
+  %tmp22804 = getelementptr inbounds float* %tmp22803, i64 1
+  %tmp22805 = getelementptr inbounds float* %tmp22804, i64 1
+  %tmp22806 = getelementptr inbounds float* %tmp22805, i64 1
+  %tmp22807 = getelementptr inbounds float* %tmp22806, i64 1
+  %tmp22808 = getelementptr inbounds float* %tmp22807, i64 1
+  %tmp22809 = getelementptr inbounds float* %tmp22808, i64 1
+  %tmp22810 = getelementptr inbounds float* %tmp22809, i64 1
+  %tmp22811 = getelementptr inbounds float* %tmp22810, i64 1
+  %tmp22812 = getelementptr inbounds float* %tmp22811, i64 1
+  %tmp22813 = getelementptr inbounds float* %tmp22812, i64 1
+  %tmp22814 = getelementptr inbounds float* %tmp22813, i64 1
+  %tmp22815 = getelementptr inbounds float* %tmp22814, i64 1
+  %tmp22816 = getelementptr inbounds float* %tmp22815, i64 1
+  %tmp22817 = getelementptr inbounds float* %tmp22816, i64 1
+  %tmp22818 = getelementptr inbounds float* %tmp22817, i64 1
+  %tmp22819 = getelementptr inbounds float* %tmp22818, i64 1
+  %tmp22820 = getelementptr inbounds float* %tmp22819, i64 1
+  %tmp22821 = getelementptr inbounds float* %tmp22820, i64 1
+  %tmp22822 = getelementptr inbounds float* %tmp22821, i64 1
+  %tmp22823 = getelementptr inbounds float* %tmp22822, i64 1
+  %tmp22824 = getelementptr inbounds float* %tmp22823, i64 1
+  %tmp22825 = getelementptr inbounds float* %tmp22824, i64 1
+  %tmp22826 = getelementptr inbounds float* %tmp22825, i64 1
+  %tmp22827 = getelementptr inbounds float* %tmp22826, i64 1
+  %tmp22828 = getelementptr inbounds float* %tmp22827, i64 1
+  %tmp22829 = getelementptr inbounds float* %tmp22828, i64 1
+  %tmp22830 = getelementptr inbounds float* %tmp22829, i64 1
+  %tmp22831 = getelementptr inbounds float* %tmp22830, i64 1
+  %tmp22832 = getelementptr inbounds float* %tmp22831, i64 1
+  %tmp22833 = getelementptr inbounds float* %tmp22832, i64 1
+  %tmp22834 = getelementptr inbounds float* %tmp22833, i64 1
+  %tmp22835 = getelementptr inbounds float* %tmp22834, i64 1
+  %tmp22836 = getelementptr inbounds float* %tmp22835, i64 1
+  %tmp22837 = getelementptr inbounds float* %tmp22836, i64 1
+  %tmp22838 = getelementptr inbounds float* %tmp22837, i64 1
+  %tmp22839 = getelementptr inbounds float* %tmp22838, i64 1
+  %tmp22840 = getelementptr inbounds float* %tmp22839, i64 1
+  %tmp22841 = getelementptr inbounds float* %tmp22840, i64 1
+  %tmp22842 = getelementptr inbounds float* %tmp22841, i64 1
+  %tmp22843 = getelementptr inbounds float* %tmp22842, i64 1
+  %tmp22844 = getelementptr inbounds float* %tmp22843, i64 1
+  %tmp22845 = getelementptr inbounds float* %tmp22844, i64 1
+  %tmp22846 = getelementptr inbounds float* %tmp22845, i64 1
+  %tmp22847 = getelementptr inbounds float* %tmp22846, i64 1
+  %tmp22848 = getelementptr inbounds float* %tmp22847, i64 1
+  %tmp22849 = getelementptr inbounds float* %tmp22848, i64 1
+  %tmp22850 = getelementptr inbounds float* %tmp22849, i64 1
+  %tmp22851 = getelementptr inbounds float* %tmp22850, i64 1
+  %tmp22852 = getelementptr inbounds float* %tmp22851, i64 1
+  %tmp22853 = getelementptr inbounds float* %tmp22852, i64 1
+  %tmp22854 = getelementptr inbounds float* %tmp22853, i64 1
+  %tmp22855 = getelementptr inbounds float* %tmp22854, i64 1
+  %tmp22856 = getelementptr inbounds float* %tmp22855, i64 1
+  %tmp22857 = getelementptr inbounds float* %tmp22856, i64 1
+  %tmp22858 = getelementptr inbounds float* %tmp22857, i64 1
+  %tmp22859 = getelementptr inbounds float* %tmp22858, i64 1
+  %tmp22860 = getelementptr inbounds float* %tmp22859, i64 1
+  %tmp22861 = getelementptr inbounds float* %tmp22860, i64 1
+  %tmp22862 = getelementptr inbounds float* %tmp22861, i64 1
+  %tmp22863 = getelementptr inbounds float* %tmp22862, i64 1
+  %tmp22864 = getelementptr inbounds float* %tmp22863, i64 1
+  %tmp22865 = getelementptr inbounds float* %tmp22864, i64 1
+  %tmp22866 = getelementptr inbounds float* %tmp22865, i64 1
+  %tmp22867 = getelementptr inbounds float* %tmp22866, i64 1
+  %tmp22868 = getelementptr inbounds float* %tmp22867, i64 1
+  %tmp22869 = getelementptr inbounds float* %tmp22868, i64 1
+  %tmp22870 = getelementptr inbounds float* %tmp22869, i64 1
+  %tmp22871 = getelementptr inbounds float* %tmp22870, i64 1
+  %tmp22872 = getelementptr inbounds float* %tmp22871, i64 1
+  %tmp22873 = getelementptr inbounds float* %tmp22872, i64 1
+  %tmp22874 = getelementptr inbounds float* %tmp22873, i64 1
+  %tmp22875 = getelementptr inbounds float* %tmp22874, i64 1
+  %tmp22876 = getelementptr inbounds float* %tmp22875, i64 1
+  %tmp22877 = getelementptr inbounds float* %tmp22876, i64 1
+  %tmp22878 = getelementptr inbounds float* %tmp22877, i64 1
+  %tmp22879 = getelementptr inbounds float* %tmp22878, i64 1
+  %tmp22880 = getelementptr inbounds float* %tmp22879, i64 1
+  %tmp22881 = getelementptr inbounds float* %tmp22880, i64 1
+  %tmp22882 = getelementptr inbounds float* %tmp22881, i64 1
+  %tmp22883 = getelementptr inbounds float* %tmp22882, i64 1
+  %tmp22884 = getelementptr inbounds float* %tmp22883, i64 1
+  %tmp22885 = getelementptr inbounds float* %tmp22884, i64 1
+  %tmp22886 = getelementptr inbounds float* %tmp22885, i64 1
+  %tmp22887 = getelementptr inbounds float* %tmp22886, i64 1
+  %tmp22888 = getelementptr inbounds float* %tmp22887, i64 1
+  %tmp22889 = getelementptr inbounds float* %tmp22888, i64 1
+  %tmp22890 = getelementptr inbounds float* %tmp22889, i64 1
+  %tmp22891 = getelementptr inbounds float* %tmp22890, i64 1
+  %tmp22892 = getelementptr inbounds float* %tmp22891, i64 1
+  %tmp22893 = getelementptr inbounds float* %tmp22892, i64 1
+  %tmp22894 = getelementptr inbounds float* %tmp22893, i64 1
+  %tmp22895 = getelementptr inbounds float* %tmp22894, i64 1
+  %tmp22896 = getelementptr inbounds float* %tmp22895, i64 1
+  %tmp22897 = getelementptr inbounds float* %tmp22896, i64 1
+  %tmp22898 = getelementptr inbounds float* %tmp22897, i64 1
+  %tmp22899 = getelementptr inbounds float* %tmp22898, i64 1
+  %tmp22900 = getelementptr inbounds float* %tmp22899, i64 1
+  %tmp22901 = getelementptr inbounds float* %tmp22900, i64 1
+  %tmp22902 = getelementptr inbounds float* %tmp22901, i64 1
+  %tmp22903 = getelementptr inbounds float* %tmp22902, i64 1
+  %tmp22904 = getelementptr inbounds float* %tmp22903, i64 1
+  %tmp22905 = getelementptr inbounds float* %tmp22904, i64 1
+  %tmp22906 = getelementptr inbounds float* %tmp22905, i64 1
+  %tmp22907 = getelementptr inbounds float* %tmp22906, i64 1
+  %tmp22908 = getelementptr inbounds float* %tmp22907, i64 1
+  %tmp22909 = getelementptr inbounds float* %tmp22908, i64 1
+  %tmp22910 = getelementptr inbounds float* %tmp22909, i64 1
+  %tmp22911 = getelementptr inbounds float* %tmp22910, i64 1
+  %tmp22912 = getelementptr inbounds float* %tmp22911, i64 1
+  %tmp22913 = getelementptr inbounds float* %tmp22912, i64 1
+  %tmp22914 = getelementptr inbounds float* %tmp22913, i64 1
+  %tmp22915 = getelementptr inbounds float* %tmp22914, i64 1
+  %tmp22916 = getelementptr inbounds float* %tmp22915, i64 1
+  %tmp22917 = getelementptr inbounds float* %tmp22916, i64 1
+  %tmp22918 = getelementptr inbounds float* %tmp22917, i64 1
+  %tmp22919 = getelementptr inbounds float* %tmp22918, i64 1
+  %tmp22920 = getelementptr inbounds float* %tmp22919, i64 1
+  %tmp22921 = getelementptr inbounds float* %tmp22920, i64 1
+  %tmp22922 = getelementptr inbounds float* %tmp22921, i64 1
+  %tmp22923 = getelementptr inbounds float* %tmp22922, i64 1
+  %tmp22924 = getelementptr inbounds float* %tmp22923, i64 1
+  %tmp22925 = getelementptr inbounds float* %tmp22924, i64 1
+  %tmp22926 = getelementptr inbounds float* %tmp22925, i64 1
+  %tmp22927 = getelementptr inbounds float* %tmp22926, i64 1
+  %tmp22928 = getelementptr inbounds float* %tmp22927, i64 1
+  %tmp22929 = getelementptr inbounds float* %tmp22928, i64 1
+  %tmp22930 = getelementptr inbounds float* %tmp22929, i64 1
+  %tmp22931 = getelementptr inbounds float* %tmp22930, i64 1
+  %tmp22932 = getelementptr inbounds float* %tmp22931, i64 1
+  %tmp22933 = getelementptr inbounds float* %tmp22932, i64 1
+  %tmp22934 = getelementptr inbounds float* %tmp22933, i64 1
+  %tmp22935 = getelementptr inbounds float* %tmp22934, i64 1
+  %tmp22936 = getelementptr inbounds float* %tmp22935, i64 1
+  %tmp22937 = getelementptr inbounds float* %tmp22936, i64 1
+  %tmp22938 = getelementptr inbounds float* %tmp22937, i64 1
+  %tmp22939 = getelementptr inbounds float* %tmp22938, i64 1
+  %tmp22940 = getelementptr inbounds float* %tmp22939, i64 1
+  %tmp22941 = getelementptr inbounds float* %tmp22940, i64 1
+  %tmp22942 = getelementptr inbounds float* %tmp22941, i64 1
+  %tmp22943 = getelementptr inbounds float* %tmp22942, i64 1
+  %tmp22944 = getelementptr inbounds float* %tmp22943, i64 1
+  %tmp22945 = getelementptr inbounds float* %tmp22944, i64 1
+  %tmp22946 = getelementptr inbounds float* %tmp22945, i64 1
+  %tmp22947 = getelementptr inbounds float* %tmp22946, i64 1
+  %tmp22948 = getelementptr inbounds float* %tmp22947, i64 1
+  %tmp22949 = getelementptr inbounds float* %tmp22948, i64 1
+  %tmp22950 = getelementptr inbounds float* %tmp22949, i64 1
+  %tmp22951 = getelementptr inbounds float* %tmp22950, i64 1
+  %tmp22952 = getelementptr inbounds float* %tmp22951, i64 1
+  %tmp22953 = getelementptr inbounds float* %tmp22952, i64 1
+  %tmp22954 = getelementptr inbounds float* %tmp22953, i64 1
+  %tmp22955 = getelementptr inbounds float* %tmp22954, i64 1
+  %tmp22956 = getelementptr inbounds float* %tmp22955, i64 1
+  %tmp22957 = getelementptr inbounds float* %tmp22956, i64 1
+  %tmp22958 = getelementptr inbounds float* %tmp22957, i64 1
+  %tmp22959 = getelementptr inbounds float* %tmp22958, i64 1
+  %tmp22960 = getelementptr inbounds float* %tmp22959, i64 1
+  %tmp22961 = getelementptr inbounds float* %tmp22960, i64 1
+  %tmp22962 = getelementptr inbounds float* %tmp22961, i64 1
+  %tmp22963 = getelementptr inbounds float* %tmp22962, i64 1
+  %tmp22964 = getelementptr inbounds float* %tmp22963, i64 1
+  %tmp22965 = getelementptr inbounds float* %tmp22964, i64 1
+  %tmp22966 = getelementptr inbounds float* %tmp22965, i64 1
+  %tmp22967 = getelementptr inbounds float* %tmp22966, i64 1
+  %tmp22968 = getelementptr inbounds float* %tmp22967, i64 1
+  %tmp22969 = getelementptr inbounds float* %tmp22968, i64 1
+  %tmp22970 = getelementptr inbounds float* %tmp22969, i64 1
+  %tmp22971 = getelementptr inbounds float* %tmp22970, i64 1
+  %tmp22972 = getelementptr inbounds float* %tmp22971, i64 1
+  %tmp22973 = getelementptr inbounds float* %tmp22972, i64 1
+  %tmp22974 = getelementptr inbounds float* %tmp22973, i64 1
+  %tmp22975 = getelementptr inbounds float* %tmp22974, i64 1
+  %tmp22976 = getelementptr inbounds float* %tmp22975, i64 1
+  %tmp22977 = getelementptr inbounds float* %tmp22976, i64 1
+  %tmp22978 = getelementptr inbounds float* %tmp22977, i64 1
+  %tmp22979 = getelementptr inbounds float* %tmp22978, i64 1
+  %tmp22980 = getelementptr inbounds float* %tmp22979, i64 1
+  %tmp22981 = getelementptr inbounds float* %tmp22980, i64 1
+  %tmp22982 = getelementptr inbounds float* %tmp22981, i64 1
+  %tmp22983 = getelementptr inbounds float* %tmp22982, i64 1
+  %tmp22984 = getelementptr inbounds float* %tmp22983, i64 1
+  %tmp22985 = getelementptr inbounds float* %tmp22984, i64 1
+  %tmp22986 = getelementptr inbounds float* %tmp22985, i64 1
+  %tmp22987 = getelementptr inbounds float* %tmp22986, i64 1
+  %tmp22988 = getelementptr inbounds float* %tmp22987, i64 1
+  %tmp22989 = getelementptr inbounds float* %tmp22988, i64 1
+  %tmp22990 = getelementptr inbounds float* %tmp22989, i64 1
+  %tmp22991 = getelementptr inbounds float* %tmp22990, i64 1
+  %tmp22992 = getelementptr inbounds float* %tmp22991, i64 1
+  %tmp22993 = getelementptr inbounds float* %tmp22992, i64 1
+  %tmp22994 = getelementptr inbounds float* %tmp22993, i64 1
+  %tmp22995 = getelementptr inbounds float* %tmp22994, i64 1
+  %tmp22996 = getelementptr inbounds float* %tmp22995, i64 1
+  %tmp22997 = getelementptr inbounds float* %tmp22996, i64 1
+  %tmp22998 = getelementptr inbounds float* %tmp22997, i64 1
+  %tmp22999 = getelementptr inbounds float* %tmp22998, i64 1
+  %tmp23000 = getelementptr inbounds float* %tmp22999, i64 1
+  %tmp23001 = getelementptr inbounds float* %tmp23000, i64 1
+  %tmp23002 = getelementptr inbounds float* %tmp23001, i64 1
+  %tmp23003 = getelementptr inbounds float* %tmp23002, i64 1
+  %tmp23004 = getelementptr inbounds float* %tmp23003, i64 1
+  %tmp23005 = getelementptr inbounds float* %tmp23004, i64 1
+  %tmp23006 = getelementptr inbounds float* %tmp23005, i64 1
+  %tmp23007 = getelementptr inbounds float* %tmp23006, i64 1
+  %tmp23008 = getelementptr inbounds float* %tmp23007, i64 1
+  %tmp23009 = getelementptr inbounds float* %tmp23008, i64 1
+  %tmp23010 = getelementptr inbounds float* %tmp23009, i64 1
+  %tmp23011 = getelementptr inbounds float* %tmp23010, i64 1
+  %tmp23012 = getelementptr inbounds float* %tmp23011, i64 1
+  %tmp23013 = getelementptr inbounds float* %tmp23012, i64 1
+  %tmp23014 = getelementptr inbounds float* %tmp23013, i64 1
+  %tmp23015 = getelementptr inbounds float* %tmp23014, i64 1
+  %tmp23016 = getelementptr inbounds float* %tmp23015, i64 1
+  %tmp23017 = getelementptr inbounds float* %tmp23016, i64 1
+  %tmp23018 = getelementptr inbounds float* %tmp23017, i64 1
+  %tmp23019 = getelementptr inbounds float* %tmp23018, i64 1
+  %tmp23020 = getelementptr inbounds float* %tmp23019, i64 1
+  %tmp23021 = getelementptr inbounds float* %tmp23020, i64 1
+  %tmp23022 = getelementptr inbounds float* %tmp23021, i64 1
+  %tmp23023 = getelementptr inbounds float* %tmp23022, i64 1
+  %tmp23024 = getelementptr inbounds float* %tmp23023, i64 1
+  %tmp23025 = getelementptr inbounds float* %tmp23024, i64 1
+  %tmp23026 = getelementptr inbounds float* %tmp23025, i64 1
+  %tmp23027 = getelementptr inbounds float* %tmp23026, i64 1
+  %tmp23028 = getelementptr inbounds float* %tmp23027, i64 1
+  %tmp23029 = getelementptr inbounds float* %tmp23028, i64 1
+  %tmp23030 = getelementptr inbounds float* %tmp23029, i64 1
+  %tmp23031 = getelementptr inbounds float* %tmp23030, i64 1
+  %tmp23032 = getelementptr inbounds float* %tmp23031, i64 1
+  %tmp23033 = getelementptr inbounds float* %tmp23032, i64 1
+  %tmp23034 = getelementptr inbounds float* %tmp23033, i64 1
+  %tmp23035 = getelementptr inbounds float* %tmp23034, i64 1
+  %tmp23036 = getelementptr inbounds float* %tmp23035, i64 1
+  %tmp23037 = getelementptr inbounds float* %tmp23036, i64 1
+  %tmp23038 = getelementptr inbounds float* %tmp23037, i64 1
+  %tmp23039 = getelementptr inbounds float* %tmp23038, i64 1
+  %tmp23040 = getelementptr inbounds float* %tmp23039, i64 1
+  %tmp23041 = getelementptr inbounds float* %tmp23040, i64 1
+  %tmp23042 = getelementptr inbounds float* %tmp23041, i64 1
+  %tmp23043 = getelementptr inbounds float* %tmp23042, i64 1
+  %tmp23044 = getelementptr inbounds float* %tmp23043, i64 1
+  %tmp23045 = getelementptr inbounds float* %tmp23044, i64 1
+  %tmp23046 = getelementptr inbounds float* %tmp23045, i64 1
+  %tmp23047 = getelementptr inbounds float* %tmp23046, i64 1
+  %tmp23048 = getelementptr inbounds float* %tmp23047, i64 1
+  %tmp23049 = getelementptr inbounds float* %tmp23048, i64 1
+  %tmp23050 = getelementptr inbounds float* %tmp23049, i64 1
+  %tmp23051 = getelementptr inbounds float* %tmp23050, i64 1
+  %tmp23052 = getelementptr inbounds float* %tmp23051, i64 1
+  %tmp23053 = getelementptr inbounds float* %tmp23052, i64 1
+  %tmp23054 = getelementptr inbounds float* %tmp23053, i64 1
+  %tmp23055 = getelementptr inbounds float* %tmp23054, i64 1
+  %tmp23056 = getelementptr inbounds float* %tmp23055, i64 1
+  %tmp23057 = getelementptr inbounds float* %tmp23056, i64 1
+  %tmp23058 = getelementptr inbounds float* %tmp23057, i64 1
+  %tmp23059 = getelementptr inbounds float* %tmp23058, i64 1
+  %tmp23060 = getelementptr inbounds float* %tmp23059, i64 1
+  %tmp23061 = getelementptr inbounds float* %tmp23060, i64 1
+  %tmp23062 = getelementptr inbounds float* %tmp23061, i64 1
+  %tmp23063 = getelementptr inbounds float* %tmp23062, i64 1
+  %tmp23064 = getelementptr inbounds float* %tmp23063, i64 1
+  %tmp23065 = getelementptr inbounds float* %tmp23064, i64 1
+  %tmp23066 = getelementptr inbounds float* %tmp23065, i64 1
+  %tmp23067 = getelementptr inbounds float* %tmp23066, i64 1
+  %tmp23068 = getelementptr inbounds float* %tmp23067, i64 1
+  %tmp23069 = getelementptr inbounds float* %tmp23068, i64 1
+  %tmp23070 = getelementptr inbounds float* %tmp23069, i64 1
+  %tmp23071 = getelementptr inbounds float* %tmp23070, i64 1
+  %tmp23072 = getelementptr inbounds float* %tmp23071, i64 1
+  %tmp23073 = getelementptr inbounds float* %tmp23072, i64 1
+  %tmp23074 = getelementptr inbounds float* %tmp23073, i64 1
+  %tmp23075 = getelementptr inbounds float* %tmp23074, i64 1
+  %tmp23076 = getelementptr inbounds float* %tmp23075, i64 1
+  %tmp23077 = getelementptr inbounds float* %tmp23076, i64 1
+  %tmp23078 = getelementptr inbounds float* %tmp23077, i64 1
+  %tmp23079 = getelementptr inbounds float* %tmp23078, i64 1
+  %tmp23080 = getelementptr inbounds float* %tmp23079, i64 1
+  %tmp23081 = getelementptr inbounds float* %tmp23080, i64 1
+  %tmp23082 = getelementptr inbounds float* %tmp23081, i64 1
+  %tmp23083 = getelementptr inbounds float* %tmp23082, i64 1
+  %tmp23084 = getelementptr inbounds float* %tmp23083, i64 1
+  %tmp23085 = getelementptr inbounds float* %tmp23084, i64 1
+  %tmp23086 = getelementptr inbounds float* %tmp23085, i64 1
+  %tmp23087 = getelementptr inbounds float* %tmp23086, i64 1
+  %tmp23088 = getelementptr inbounds float* %tmp23087, i64 1
+  %tmp23089 = getelementptr inbounds float* %tmp23088, i64 1
+  %tmp23090 = getelementptr inbounds float* %tmp23089, i64 1
+  %tmp23091 = getelementptr inbounds float* %tmp23090, i64 1
+  %tmp23092 = getelementptr inbounds float* %tmp23091, i64 1
+  %tmp23093 = getelementptr inbounds float* %tmp23092, i64 1
+  %tmp23094 = getelementptr inbounds float* %tmp23093, i64 1
+  %tmp23095 = getelementptr inbounds float* %tmp23094, i64 1
+  %tmp23096 = getelementptr inbounds float* %tmp23095, i64 1
+  %tmp23097 = getelementptr inbounds float* %tmp23096, i64 1
+  %tmp23098 = getelementptr inbounds float* %tmp23097, i64 1
+  %tmp23099 = getelementptr inbounds float* %tmp23098, i64 1
+  %tmp23100 = getelementptr inbounds float* %tmp23099, i64 1
+  %tmp23101 = getelementptr inbounds float* %tmp23100, i64 1
+  %tmp23102 = getelementptr inbounds float* %tmp23101, i64 1
+  %tmp23103 = getelementptr inbounds float* %tmp23102, i64 1
+  %tmp23104 = getelementptr inbounds float* %tmp23103, i64 1
+  %tmp23105 = getelementptr inbounds float* %tmp23104, i64 1
+  %tmp23106 = getelementptr inbounds float* %tmp23105, i64 1
+  %tmp23107 = getelementptr inbounds float* %tmp23106, i64 1
+  %tmp23108 = getelementptr inbounds float* %tmp23107, i64 1
+  %tmp23109 = getelementptr inbounds float* %tmp23108, i64 1
+  %tmp23110 = getelementptr inbounds float* %tmp23109, i64 1
+  %tmp23111 = getelementptr inbounds float* %tmp23110, i64 1
+  %tmp23112 = getelementptr inbounds float* %tmp23111, i64 1
+  %tmp23113 = getelementptr inbounds float* %tmp23112, i64 1
+  %tmp23114 = getelementptr inbounds float* %tmp23113, i64 1
+  %tmp23115 = getelementptr inbounds float* %tmp23114, i64 1
+  %tmp23116 = getelementptr inbounds float* %tmp23115, i64 1
+  %tmp23117 = getelementptr inbounds float* %tmp23116, i64 1
+  %tmp23118 = getelementptr inbounds float* %tmp23117, i64 1
+  %tmp23119 = getelementptr inbounds float* %tmp23118, i64 1
+  %tmp23120 = getelementptr inbounds float* %tmp23119, i64 1
+  %tmp23121 = getelementptr inbounds float* %tmp23120, i64 1
+  %tmp23122 = getelementptr inbounds float* %tmp23121, i64 1
+  %tmp23123 = getelementptr inbounds float* %tmp23122, i64 1
+  %tmp23124 = getelementptr inbounds float* %tmp23123, i64 1
+  %tmp23125 = getelementptr inbounds float* %tmp23124, i64 1
+  %tmp23126 = getelementptr inbounds float* %tmp23125, i64 1
+  %tmp23127 = getelementptr inbounds float* %tmp23126, i64 1
+  %tmp23128 = getelementptr inbounds float* %tmp23127, i64 1
+  %tmp23129 = getelementptr inbounds float* %tmp23128, i64 1
+  %tmp23130 = getelementptr inbounds float* %tmp23129, i64 1
+  %tmp23131 = getelementptr inbounds float* %tmp23130, i64 1
+  %tmp23132 = getelementptr inbounds float* %tmp23131, i64 1
+  %tmp23133 = getelementptr inbounds float* %tmp23132, i64 1
+  %tmp23134 = getelementptr inbounds float* %tmp23133, i64 1
+  %tmp23135 = getelementptr inbounds float* %tmp23134, i64 1
+  %tmp23136 = getelementptr inbounds float* %tmp23135, i64 1
+  %tmp23137 = getelementptr inbounds float* %tmp23136, i64 1
+  %tmp23138 = getelementptr inbounds float* %tmp23137, i64 1
+  %tmp23139 = getelementptr inbounds float* %tmp23138, i64 1
+  %tmp23140 = getelementptr inbounds float* %tmp23139, i64 1
+  %tmp23141 = getelementptr inbounds float* %tmp23140, i64 1
+  %tmp23142 = getelementptr inbounds float* %tmp23141, i64 1
+  %tmp23143 = getelementptr inbounds float* %tmp23142, i64 1
+  %tmp23144 = getelementptr inbounds float* %tmp23143, i64 1
+  %tmp23145 = getelementptr inbounds float* %tmp23144, i64 1
+  %tmp23146 = getelementptr inbounds float* %tmp23145, i64 1
+  %tmp23147 = getelementptr inbounds float* %tmp23146, i64 1
+  %tmp23148 = getelementptr inbounds float* %tmp23147, i64 1
+  %tmp23149 = getelementptr inbounds float* %tmp23148, i64 1
+  %tmp23150 = getelementptr inbounds float* %tmp23149, i64 1
+  %tmp23151 = getelementptr inbounds float* %tmp23150, i64 1
+  %tmp23152 = getelementptr inbounds float* %tmp23151, i64 1
+  %tmp23153 = getelementptr inbounds float* %tmp23152, i64 1
+  %tmp23154 = getelementptr inbounds float* %tmp23153, i64 1
+  %tmp23155 = getelementptr inbounds float* %tmp23154, i64 1
+  %tmp23156 = getelementptr inbounds float* %tmp23155, i64 1
+  %tmp23157 = getelementptr inbounds float* %tmp23156, i64 1
+  %tmp23158 = getelementptr inbounds float* %tmp23157, i64 1
+  %tmp23159 = getelementptr inbounds float* %tmp23158, i64 1
+  %tmp23160 = getelementptr inbounds float* %tmp23159, i64 1
+  %tmp23161 = getelementptr inbounds float* %tmp23160, i64 1
+  %tmp23162 = getelementptr inbounds float* %tmp23161, i64 1
+  %tmp23163 = getelementptr inbounds float* %tmp23162, i64 1
+  %tmp23164 = getelementptr inbounds float* %tmp23163, i64 1
+  %tmp23165 = getelementptr inbounds float* %tmp23164, i64 1
+  %tmp23166 = getelementptr inbounds float* %tmp23165, i64 1
+  %tmp23167 = getelementptr inbounds float* %tmp23166, i64 1
+  %tmp23168 = getelementptr inbounds float* %tmp23167, i64 1
+  %tmp23169 = getelementptr inbounds float* %tmp23168, i64 1
+  %tmp23170 = getelementptr inbounds float* %tmp23169, i64 1
+  %tmp23171 = getelementptr inbounds float* %tmp23170, i64 1
+  %tmp23172 = getelementptr inbounds float* %tmp23171, i64 1
+  %tmp23173 = getelementptr inbounds float* %tmp23172, i64 1
+  %tmp23174 = getelementptr inbounds float* %tmp23173, i64 1
+  %tmp23175 = getelementptr inbounds float* %tmp23174, i64 1
+  %tmp23176 = getelementptr inbounds float* %tmp23175, i64 1
+  %tmp23177 = getelementptr inbounds float* %tmp23176, i64 1
+  %tmp23178 = getelementptr inbounds float* %tmp23177, i64 1
+  %tmp23179 = getelementptr inbounds float* %tmp23178, i64 1
+  %tmp23180 = getelementptr inbounds float* %tmp23179, i64 1
+  %tmp23181 = getelementptr inbounds float* %tmp23180, i64 1
+  %tmp23182 = getelementptr inbounds float* %tmp23181, i64 1
+  %tmp23183 = getelementptr inbounds float* %tmp23182, i64 1
+  %tmp23184 = getelementptr inbounds float* %tmp23183, i64 1
+  %tmp23185 = getelementptr inbounds float* %tmp23184, i64 1
+  %tmp23186 = getelementptr inbounds float* %tmp23185, i64 1
+  %tmp23187 = getelementptr inbounds float* %tmp23186, i64 1
+  %tmp23188 = getelementptr inbounds float* %tmp23187, i64 1
+  %tmp23189 = getelementptr inbounds float* %tmp23188, i64 1
+  %tmp23190 = getelementptr inbounds float* %tmp23189, i64 1
+  %tmp23191 = getelementptr inbounds float* %tmp23190, i64 1
+  %tmp23192 = getelementptr inbounds float* %tmp23191, i64 1
+  %tmp23193 = getelementptr inbounds float* %tmp23192, i64 1
+  %tmp23194 = getelementptr inbounds float* %tmp23193, i64 1
+  %tmp23195 = getelementptr inbounds float* %tmp23194, i64 1
+  %tmp23196 = getelementptr inbounds float* %tmp23195, i64 1
+  %tmp23197 = getelementptr inbounds float* %tmp23196, i64 1
+  %tmp23198 = getelementptr inbounds float* %tmp23197, i64 1
+  %tmp23199 = getelementptr inbounds float* %tmp23198, i64 1
+  %tmp23200 = getelementptr inbounds float* %tmp23199, i64 1
+  %tmp23201 = getelementptr inbounds float* %tmp23200, i64 1
+  %tmp23202 = getelementptr inbounds float* %tmp23201, i64 1
+  %tmp23203 = getelementptr inbounds float* %tmp23202, i64 1
+  %tmp23204 = getelementptr inbounds float* %tmp23203, i64 1
+  %tmp23205 = getelementptr inbounds float* %tmp23204, i64 1
+  %tmp23206 = getelementptr inbounds float* %tmp23205, i64 1
+  %tmp23207 = getelementptr inbounds float* %tmp23206, i64 1
+  %tmp23208 = getelementptr inbounds float* %tmp23207, i64 1
+  %tmp23209 = getelementptr inbounds float* %tmp23208, i64 1
+  %tmp23210 = getelementptr inbounds float* %tmp23209, i64 1
+  %tmp23211 = getelementptr inbounds float* %tmp23210, i64 1
+  %tmp23212 = getelementptr inbounds float* %tmp23211, i64 1
+  %tmp23213 = getelementptr inbounds float* %tmp23212, i64 1
+  %tmp23214 = getelementptr inbounds float* %tmp23213, i64 1
+  %tmp23215 = getelementptr inbounds float* %tmp23214, i64 1
+  %tmp23216 = getelementptr inbounds float* %tmp23215, i64 1
+  %tmp23217 = getelementptr inbounds float* %tmp23216, i64 1
+  %tmp23218 = getelementptr inbounds float* %tmp23217, i64 1
+  %tmp23219 = getelementptr inbounds float* %tmp23218, i64 1
+  %tmp23220 = getelementptr inbounds float* %tmp23219, i64 1
+  %tmp23221 = getelementptr inbounds float* %tmp23220, i64 1
+  %tmp23222 = getelementptr inbounds float* %tmp23221, i64 1
+  %tmp23223 = getelementptr inbounds float* %tmp23222, i64 1
+  %tmp23224 = getelementptr inbounds float* %tmp23223, i64 1
+  %tmp23225 = getelementptr inbounds float* %tmp23224, i64 1
+  %tmp23226 = getelementptr inbounds float* %tmp23225, i64 1
+  %tmp23227 = getelementptr inbounds float* %tmp23226, i64 1
+  %tmp23228 = getelementptr inbounds float* %tmp23227, i64 1
+  %tmp23229 = getelementptr inbounds float* %tmp23228, i64 1
+  %tmp23230 = getelementptr inbounds float* %tmp23229, i64 1
+  %tmp23231 = getelementptr inbounds float* %tmp23230, i64 1
+  %tmp23232 = getelementptr inbounds float* %tmp23231, i64 1
+  %tmp23233 = getelementptr inbounds float* %tmp23232, i64 1
+  %tmp23234 = getelementptr inbounds float* %tmp23233, i64 1
+  %tmp23235 = getelementptr inbounds float* %tmp23234, i64 1
+  %tmp23236 = getelementptr inbounds float* %tmp23235, i64 1
+  %tmp23237 = getelementptr inbounds float* %tmp23236, i64 1
+  %tmp23238 = getelementptr inbounds float* %tmp23237, i64 1
+  %tmp23239 = getelementptr inbounds float* %tmp23238, i64 1
+  %tmp23240 = getelementptr inbounds float* %tmp23239, i64 1
+  %tmp23241 = getelementptr inbounds float* %tmp23240, i64 1
+  %tmp23242 = getelementptr inbounds float* %tmp23241, i64 1
+  %tmp23243 = getelementptr inbounds float* %tmp23242, i64 1
+  %tmp23244 = getelementptr inbounds float* %tmp23243, i64 1
+  %tmp23245 = getelementptr inbounds float* %tmp23244, i64 1
+  %tmp23246 = getelementptr inbounds float* %tmp23245, i64 1
+  %tmp23247 = getelementptr inbounds float* %tmp23246, i64 1
+  %tmp23248 = getelementptr inbounds float* %tmp23247, i64 1
+  %tmp23249 = getelementptr inbounds float* %tmp23248, i64 1
+  %tmp23250 = getelementptr inbounds float* %tmp23249, i64 1
+  %tmp23251 = getelementptr inbounds float* %tmp23250, i64 1
+  %tmp23252 = getelementptr inbounds float* %tmp23251, i64 1
+  %tmp23253 = getelementptr inbounds float* %tmp23252, i64 1
+  %tmp23254 = getelementptr inbounds float* %tmp23253, i64 1
+  %tmp23255 = getelementptr inbounds float* %tmp23254, i64 1
+  %tmp23256 = getelementptr inbounds float* %tmp23255, i64 1
+  %tmp23257 = getelementptr inbounds float* %tmp23256, i64 1
+  %tmp23258 = getelementptr inbounds float* %tmp23257, i64 1
+  %tmp23259 = getelementptr inbounds float* %tmp23258, i64 1
+  %tmp23260 = getelementptr inbounds float* %tmp23259, i64 1
+  %tmp23261 = getelementptr inbounds float* %tmp23260, i64 1
+  %tmp23262 = getelementptr inbounds float* %tmp23261, i64 1
+  %tmp23263 = getelementptr inbounds float* %tmp23262, i64 1
+  %tmp23264 = getelementptr inbounds float* %tmp23263, i64 1
+  %tmp23265 = getelementptr inbounds float* %tmp23264, i64 1
+  %tmp23266 = getelementptr inbounds float* %tmp23265, i64 1
+  %tmp23267 = getelementptr inbounds float* %tmp23266, i64 1
+  %tmp23268 = getelementptr inbounds float* %tmp23267, i64 1
+  %tmp23269 = getelementptr inbounds float* %tmp23268, i64 1
+  %tmp23270 = getelementptr inbounds float* %tmp23269, i64 1
+  %tmp23271 = getelementptr inbounds float* %tmp23270, i64 1
+  %tmp23272 = getelementptr inbounds float* %tmp23271, i64 1
+  %tmp23273 = getelementptr inbounds float* %tmp23272, i64 1
+  %tmp23274 = getelementptr inbounds float* %tmp23273, i64 1
+  %tmp23275 = getelementptr inbounds float* %tmp23274, i64 1
+  %tmp23276 = getelementptr inbounds float* %tmp23275, i64 1
+  %tmp23277 = getelementptr inbounds float* %tmp23276, i64 1
+  %tmp23278 = getelementptr inbounds float* %tmp23277, i64 1
+  %tmp23279 = getelementptr inbounds float* %tmp23278, i64 1
+  %tmp23280 = getelementptr inbounds float* %tmp23279, i64 1
+  %tmp23281 = getelementptr inbounds float* %tmp23280, i64 1
+  %tmp23282 = getelementptr inbounds float* %tmp23281, i64 1
+  %tmp23283 = getelementptr inbounds float* %tmp23282, i64 1
+  %tmp23284 = getelementptr inbounds float* %tmp23283, i64 1
+  %tmp23285 = getelementptr inbounds float* %tmp23284, i64 1
+  %tmp23286 = getelementptr inbounds float* %tmp23285, i64 1
+  %tmp23287 = getelementptr inbounds float* %tmp23286, i64 1
+  %tmp23288 = getelementptr inbounds float* %tmp23287, i64 1
+  %tmp23289 = getelementptr inbounds float* %tmp23288, i64 1
+  %tmp23290 = getelementptr inbounds float* %tmp23289, i64 1
+  %tmp23291 = getelementptr inbounds float* %tmp23290, i64 1
+  %tmp23292 = getelementptr inbounds float* %tmp23291, i64 1
+  %tmp23293 = getelementptr inbounds float* %tmp23292, i64 1
+  %tmp23294 = getelementptr inbounds float* %tmp23293, i64 1
+  %tmp23295 = getelementptr inbounds float* %tmp23294, i64 1
+  %tmp23296 = getelementptr inbounds float* %tmp23295, i64 1
+  %tmp23297 = getelementptr inbounds float* %tmp23296, i64 1
+  %tmp23298 = getelementptr inbounds float* %tmp23297, i64 1
+  %tmp23299 = getelementptr inbounds float* %tmp23298, i64 1
+  %tmp23300 = getelementptr inbounds float* %tmp23299, i64 1
+  %tmp23301 = getelementptr inbounds float* %tmp23300, i64 1
+  %tmp23302 = getelementptr inbounds float* %tmp23301, i64 1
+  %tmp23303 = getelementptr inbounds float* %tmp23302, i64 1
+  %tmp23304 = getelementptr inbounds float* %tmp23303, i64 1
+  %tmp23305 = getelementptr inbounds float* %tmp23304, i64 1
+  %tmp23306 = getelementptr inbounds float* %tmp23305, i64 1
+  %tmp23307 = getelementptr inbounds float* %tmp23306, i64 1
+  %tmp23308 = getelementptr inbounds float* %tmp23307, i64 1
+  %tmp23309 = getelementptr inbounds float* %tmp23308, i64 1
+  %tmp23310 = getelementptr inbounds float* %tmp23309, i64 1
+  %tmp23311 = getelementptr inbounds float* %tmp23310, i64 1
+  %tmp23312 = getelementptr inbounds float* %tmp23311, i64 1
+  %tmp23313 = getelementptr inbounds float* %tmp23312, i64 1
+  %tmp23314 = getelementptr inbounds float* %tmp23313, i64 1
+  %tmp23315 = getelementptr inbounds float* %tmp23314, i64 1
+  %tmp23316 = getelementptr inbounds float* %tmp23315, i64 1
+  %tmp23317 = getelementptr inbounds float* %tmp23316, i64 1
+  %tmp23318 = getelementptr inbounds float* %tmp23317, i64 1
+  %tmp23319 = getelementptr inbounds float* %tmp23318, i64 1
+  %tmp23320 = getelementptr inbounds float* %tmp23319, i64 1
+  %tmp23321 = getelementptr inbounds float* %tmp23320, i64 1
+  %tmp23322 = getelementptr inbounds float* %tmp23321, i64 1
+  %tmp23323 = getelementptr inbounds float* %tmp23322, i64 1
+  %tmp23324 = getelementptr inbounds float* %tmp23323, i64 1
+  %tmp23325 = getelementptr inbounds float* %tmp23324, i64 1
+  %tmp23326 = getelementptr inbounds float* %tmp23325, i64 1
+  %tmp23327 = getelementptr inbounds float* %tmp23326, i64 1
+  %tmp23328 = getelementptr inbounds float* %tmp23327, i64 1
+  %tmp23329 = getelementptr inbounds float* %tmp23328, i64 1
+  %tmp23330 = getelementptr inbounds float* %tmp23329, i64 1
+  %tmp23331 = getelementptr inbounds float* %tmp23330, i64 1
+  %tmp23332 = getelementptr inbounds float* %tmp23331, i64 1
+  %tmp23333 = getelementptr inbounds float* %tmp23332, i64 1
+  %tmp23334 = getelementptr inbounds float* %tmp23333, i64 1
+  %tmp23335 = getelementptr inbounds float* %tmp23334, i64 1
+  %tmp23336 = getelementptr inbounds float* %tmp23335, i64 1
+  %tmp23337 = getelementptr inbounds float* %tmp23336, i64 1
+  %tmp23338 = getelementptr inbounds float* %tmp23337, i64 1
+  %tmp23339 = getelementptr inbounds float* %tmp23338, i64 1
+  %tmp23340 = getelementptr inbounds float* %tmp23339, i64 1
+  %tmp23341 = getelementptr inbounds float* %tmp23340, i64 1
+  %tmp23342 = getelementptr inbounds float* %tmp23341, i64 1
+  %tmp23343 = getelementptr inbounds float* %tmp23342, i64 1
+  %tmp23344 = getelementptr inbounds float* %tmp23343, i64 1
+  %tmp23345 = getelementptr inbounds float* %tmp23344, i64 1
+  %tmp23346 = getelementptr inbounds float* %tmp23345, i64 1
+  %tmp23347 = getelementptr inbounds float* %tmp23346, i64 1
+  %tmp23348 = getelementptr inbounds float* %tmp23347, i64 1
+  %tmp23349 = getelementptr inbounds float* %tmp23348, i64 1
+  %tmp23350 = getelementptr inbounds float* %tmp23349, i64 1
+  %tmp23351 = getelementptr inbounds float* %tmp23350, i64 1
+  %tmp23352 = getelementptr inbounds float* %tmp23351, i64 1
+  %tmp23353 = getelementptr inbounds float* %tmp23352, i64 1
+  %tmp23354 = getelementptr inbounds float* %tmp23353, i64 1
+  %tmp23355 = getelementptr inbounds float* %tmp23354, i64 1
+  %tmp23356 = getelementptr inbounds float* %tmp23355, i64 1
+  %tmp23357 = getelementptr inbounds float* %tmp23356, i64 1
+  %tmp23358 = getelementptr inbounds float* %tmp23357, i64 1
+  %tmp23359 = getelementptr inbounds float* %tmp23358, i64 1
+  %tmp23360 = getelementptr inbounds float* %tmp23359, i64 1
+  %tmp23361 = getelementptr inbounds float* %tmp23360, i64 1
+  %tmp23362 = getelementptr inbounds float* %tmp23361, i64 1
+  %tmp23363 = getelementptr inbounds float* %tmp23362, i64 1
+  %tmp23364 = getelementptr inbounds float* %tmp23363, i64 1
+  %tmp23365 = getelementptr inbounds float* %tmp23364, i64 1
+  %tmp23366 = getelementptr inbounds float* %tmp23365, i64 1
+  %tmp23367 = getelementptr inbounds float* %tmp23366, i64 1
+  %tmp23368 = getelementptr inbounds float* %tmp23367, i64 1
+  %tmp23369 = getelementptr inbounds float* %tmp23368, i64 1
+  %tmp23370 = getelementptr inbounds float* %tmp23369, i64 1
+  %tmp23371 = getelementptr inbounds float* %tmp23370, i64 1
+  %tmp23372 = getelementptr inbounds float* %tmp23371, i64 1
+  %tmp23373 = getelementptr inbounds float* %tmp23372, i64 1
+  %tmp23374 = getelementptr inbounds float* %tmp23373, i64 1
+  %tmp23375 = getelementptr inbounds float* %tmp23374, i64 1
+  %tmp23376 = getelementptr inbounds float* %tmp23375, i64 1
+  %tmp23377 = getelementptr inbounds float* %tmp23376, i64 1
+  %tmp23378 = getelementptr inbounds float* %tmp23377, i64 1
+  %tmp23379 = getelementptr inbounds float* %tmp23378, i64 1
+  %tmp23380 = getelementptr inbounds float* %tmp23379, i64 1
+  %tmp23381 = getelementptr inbounds float* %tmp23380, i64 1
+  %tmp23382 = getelementptr inbounds float* %tmp23381, i64 1
+  %tmp23383 = getelementptr inbounds float* %tmp23382, i64 1
+  %tmp23384 = getelementptr inbounds float* %tmp23383, i64 1
+  %tmp23385 = getelementptr inbounds float* %tmp23384, i64 1
+  %tmp23386 = getelementptr inbounds float* %tmp23385, i64 1
+  %tmp23387 = getelementptr inbounds float* %tmp23386, i64 1
+  %tmp23388 = getelementptr inbounds float* %tmp23387, i64 1
+  %tmp23389 = getelementptr inbounds float* %tmp23388, i64 1
+  %tmp23390 = getelementptr inbounds float* %tmp23389, i64 1
+  %tmp23391 = getelementptr inbounds float* %tmp23390, i64 1
+  %tmp23392 = getelementptr inbounds float* %tmp23391, i64 1
+  %tmp23393 = getelementptr inbounds float* %tmp23392, i64 1
+  %tmp23394 = getelementptr inbounds float* %tmp23393, i64 1
+  %tmp23395 = getelementptr inbounds float* %tmp23394, i64 1
+  %tmp23396 = getelementptr inbounds float* %tmp23395, i64 1
+  %tmp23397 = getelementptr inbounds float* %tmp23396, i64 1
+  %tmp23398 = getelementptr inbounds float* %tmp23397, i64 1
+  %tmp23399 = getelementptr inbounds float* %tmp23398, i64 1
+  %tmp23400 = getelementptr inbounds float* %tmp23399, i64 1
+  %tmp23401 = getelementptr inbounds float* %tmp23400, i64 1
+  %tmp23402 = getelementptr inbounds float* %tmp23401, i64 1
+  %tmp23403 = getelementptr inbounds float* %tmp23402, i64 1
+  %tmp23404 = getelementptr inbounds float* %tmp23403, i64 1
+  %tmp23405 = getelementptr inbounds float* %tmp23404, i64 1
+  %tmp23406 = getelementptr inbounds float* %tmp23405, i64 1
+  %tmp23407 = getelementptr inbounds float* %tmp23406, i64 1
+  %tmp23408 = getelementptr inbounds float* %tmp23407, i64 1
+  %tmp23409 = getelementptr inbounds float* %tmp23408, i64 1
+  %tmp23410 = getelementptr inbounds float* %tmp23409, i64 1
+  %tmp23411 = getelementptr inbounds float* %tmp23410, i64 1
+  %tmp23412 = getelementptr inbounds float* %tmp23411, i64 1
+  %tmp23413 = getelementptr inbounds float* %tmp23412, i64 1
+  %tmp23414 = getelementptr inbounds float* %tmp23413, i64 1
+  %tmp23415 = getelementptr inbounds float* %tmp23414, i64 1
+  %tmp23416 = getelementptr inbounds float* %tmp23415, i64 1
+  %tmp23417 = getelementptr inbounds float* %tmp23416, i64 1
+  %tmp23418 = getelementptr inbounds float* %tmp23417, i64 1
+  %tmp23419 = getelementptr inbounds float* %tmp23418, i64 1
+  %tmp23420 = getelementptr inbounds float* %tmp23419, i64 1
+  %tmp23421 = getelementptr inbounds float* %tmp23420, i64 1
+  %tmp23422 = getelementptr inbounds float* %tmp23421, i64 1
+  %tmp23423 = getelementptr inbounds float* %tmp23422, i64 1
+  %tmp23424 = getelementptr inbounds float* %tmp23423, i64 1
+  %tmp23425 = getelementptr inbounds float* %tmp23424, i64 1
+  %tmp23426 = getelementptr inbounds float* %tmp23425, i64 1
+  %tmp23427 = getelementptr inbounds float* %tmp23426, i64 1
+  %tmp23428 = getelementptr inbounds float* %tmp23427, i64 1
+  %tmp23429 = getelementptr inbounds float* %tmp23428, i64 1
+  %tmp23430 = getelementptr inbounds float* %tmp23429, i64 1
+  %tmp23431 = getelementptr inbounds float* %tmp23430, i64 1
+  %tmp23432 = getelementptr inbounds float* %tmp23431, i64 1
+  %tmp23433 = getelementptr inbounds float* %tmp23432, i64 1
+  %tmp23434 = getelementptr inbounds float* %tmp23433, i64 1
+  %tmp23435 = getelementptr inbounds float* %tmp23434, i64 1
+  %tmp23436 = getelementptr inbounds float* %tmp23435, i64 1
+  %tmp23437 = getelementptr inbounds float* %tmp23436, i64 1
+  %tmp23438 = getelementptr inbounds float* %tmp23437, i64 1
+  %tmp23439 = getelementptr inbounds float* %tmp23438, i64 1
+  %tmp23440 = getelementptr inbounds float* %tmp23439, i64 1
+  %tmp23441 = getelementptr inbounds float* %tmp23440, i64 1
+  %tmp23442 = getelementptr inbounds float* %tmp23441, i64 1
+  %tmp23443 = getelementptr inbounds float* %tmp23442, i64 1
+  %tmp23444 = getelementptr inbounds float* %tmp23443, i64 1
+  %tmp23445 = getelementptr inbounds float* %tmp23444, i64 1
+  %tmp23446 = getelementptr inbounds float* %tmp23445, i64 1
+  %tmp23447 = getelementptr inbounds float* %tmp23446, i64 1
+  %tmp23448 = getelementptr inbounds float* %tmp23447, i64 1
+  %tmp23449 = getelementptr inbounds float* %tmp23448, i64 1
+  %tmp23450 = getelementptr inbounds float* %tmp23449, i64 1
+  %tmp23451 = getelementptr inbounds float* %tmp23450, i64 1
+  %tmp23452 = getelementptr inbounds float* %tmp23451, i64 1
+  %tmp23453 = getelementptr inbounds float* %tmp23452, i64 1
+  %tmp23454 = getelementptr inbounds float* %tmp23453, i64 1
+  %tmp23455 = getelementptr inbounds float* %tmp23454, i64 1
+  %tmp23456 = getelementptr inbounds float* %tmp23455, i64 1
+  %tmp23457 = getelementptr inbounds float* %tmp23456, i64 1
+  %tmp23458 = getelementptr inbounds float* %tmp23457, i64 1
+  %tmp23459 = getelementptr inbounds float* %tmp23458, i64 1
+  %tmp23460 = getelementptr inbounds float* %tmp23459, i64 1
+  %tmp23461 = getelementptr inbounds float* %tmp23460, i64 1
+  %tmp23462 = getelementptr inbounds float* %tmp23461, i64 1
+  %tmp23463 = getelementptr inbounds float* %tmp23462, i64 1
+  %tmp23464 = getelementptr inbounds float* %tmp23463, i64 1
+  %tmp23465 = getelementptr inbounds float* %tmp23464, i64 1
+  %tmp23466 = getelementptr inbounds float* %tmp23465, i64 1
+  %tmp23467 = getelementptr inbounds float* %tmp23466, i64 1
+  %tmp23468 = getelementptr inbounds float* %tmp23467, i64 1
+  %tmp23469 = getelementptr inbounds float* %tmp23468, i64 1
+  %tmp23470 = getelementptr inbounds float* %tmp23469, i64 1
+  %tmp23471 = getelementptr inbounds float* %tmp23470, i64 1
+  %tmp23472 = getelementptr inbounds float* %tmp23471, i64 1
+  %tmp23473 = getelementptr inbounds float* %tmp23472, i64 1
+  %tmp23474 = getelementptr inbounds float* %tmp23473, i64 1
+  %tmp23475 = getelementptr inbounds float* %tmp23474, i64 1
+  %tmp23476 = getelementptr inbounds float* %tmp23475, i64 1
+  %tmp23477 = getelementptr inbounds float* %tmp23476, i64 1
+  %tmp23478 = getelementptr inbounds float* %tmp23477, i64 1
+  %tmp23479 = getelementptr inbounds float* %tmp23478, i64 1
+  %tmp23480 = getelementptr inbounds float* %tmp23479, i64 1
+  %tmp23481 = getelementptr inbounds float* %tmp23480, i64 1
+  %tmp23482 = getelementptr inbounds float* %tmp23481, i64 1
+  %tmp23483 = getelementptr inbounds float* %tmp23482, i64 1
+  %tmp23484 = getelementptr inbounds float* %tmp23483, i64 1
+  %tmp23485 = getelementptr inbounds float* %tmp23484, i64 1
+  %tmp23486 = getelementptr inbounds float* %tmp23485, i64 1
+  %tmp23487 = getelementptr inbounds float* %tmp23486, i64 1
+  %tmp23488 = getelementptr inbounds float* %tmp23487, i64 1
+  %tmp23489 = getelementptr inbounds float* %tmp23488, i64 1
+  %tmp23490 = getelementptr inbounds float* %tmp23489, i64 1
+  %tmp23491 = getelementptr inbounds float* %tmp23490, i64 1
+  %tmp23492 = getelementptr inbounds float* %tmp23491, i64 1
+  %tmp23493 = getelementptr inbounds float* %tmp23492, i64 1
+  %tmp23494 = getelementptr inbounds float* %tmp23493, i64 1
+  %tmp23495 = getelementptr inbounds float* %tmp23494, i64 1
+  %tmp23496 = getelementptr inbounds float* %tmp23495, i64 1
+  %tmp23497 = getelementptr inbounds float* %tmp23496, i64 1
+  %tmp23498 = getelementptr inbounds float* %tmp23497, i64 1
+  %tmp23499 = getelementptr inbounds float* %tmp23498, i64 1
+  %tmp23500 = getelementptr inbounds float* %tmp23499, i64 1
+  %tmp23501 = getelementptr inbounds float* %tmp23500, i64 1
+  %tmp23502 = getelementptr inbounds float* %tmp23501, i64 1
+  %tmp23503 = getelementptr inbounds float* %tmp23502, i64 1
+  %tmp23504 = getelementptr inbounds float* %tmp23503, i64 1
+  %tmp23505 = getelementptr inbounds float* %tmp23504, i64 1
+  %tmp23506 = getelementptr inbounds float* %tmp23505, i64 1
+  %tmp23507 = getelementptr inbounds float* %tmp23506, i64 1
+  %tmp23508 = getelementptr inbounds float* %tmp23507, i64 1
+  %tmp23509 = getelementptr inbounds float* %tmp23508, i64 1
+  %tmp23510 = getelementptr inbounds float* %tmp23509, i64 1
+  %tmp23511 = getelementptr inbounds float* %tmp23510, i64 1
+  %tmp23512 = getelementptr inbounds float* %tmp23511, i64 1
+  %tmp23513 = getelementptr inbounds float* %tmp23512, i64 1
+  %tmp23514 = getelementptr inbounds float* %tmp23513, i64 1
+  %tmp23515 = getelementptr inbounds float* %tmp23514, i64 1
+  %tmp23516 = getelementptr inbounds float* %tmp23515, i64 1
+  %tmp23517 = getelementptr inbounds float* %tmp23516, i64 1
+  %tmp23518 = getelementptr inbounds float* %tmp23517, i64 1
+  %tmp23519 = getelementptr inbounds float* %tmp23518, i64 1
+  %tmp23520 = getelementptr inbounds float* %tmp23519, i64 1
+  %tmp23521 = getelementptr inbounds float* %tmp23520, i64 1
+  %tmp23522 = getelementptr inbounds float* %tmp23521, i64 1
+  %tmp23523 = getelementptr inbounds float* %tmp23522, i64 1
+  %tmp23524 = getelementptr inbounds float* %tmp23523, i64 1
+  %tmp23525 = getelementptr inbounds float* %tmp23524, i64 1
+  %tmp23526 = getelementptr inbounds float* %tmp23525, i64 1
+  %tmp23527 = getelementptr inbounds float* %tmp23526, i64 1
+  %tmp23528 = getelementptr inbounds float* %tmp23527, i64 1
+  %tmp23529 = getelementptr inbounds float* %tmp23528, i64 1
+  %tmp23530 = getelementptr inbounds float* %tmp23529, i64 1
+  %tmp23531 = getelementptr inbounds float* %tmp23530, i64 1
+  %tmp23532 = getelementptr inbounds float* %tmp23531, i64 1
+  %tmp23533 = getelementptr inbounds float* %tmp23532, i64 1
+  %tmp23534 = getelementptr inbounds float* %tmp23533, i64 1
+  %tmp23535 = getelementptr inbounds float* %tmp23534, i64 1
+  %tmp23536 = getelementptr inbounds float* %tmp23535, i64 1
+  %tmp23537 = getelementptr inbounds float* %tmp23536, i64 1
+  %tmp23538 = getelementptr inbounds float* %tmp23537, i64 1
+  %tmp23539 = getelementptr inbounds float* %tmp23538, i64 1
+  %tmp23540 = getelementptr inbounds float* %tmp23539, i64 1
+  %tmp23541 = getelementptr inbounds float* %tmp23540, i64 1
+  %tmp23542 = getelementptr inbounds float* %tmp23541, i64 1
+  %tmp23543 = getelementptr inbounds float* %tmp23542, i64 1
+  %tmp23544 = getelementptr inbounds float* %tmp23543, i64 1
+  %tmp23545 = getelementptr inbounds float* %tmp23544, i64 1
+  %tmp23546 = getelementptr inbounds float* %tmp23545, i64 1
+  %tmp23547 = getelementptr inbounds float* %tmp23546, i64 1
+  %tmp23548 = getelementptr inbounds float* %tmp23547, i64 1
+  %tmp23549 = getelementptr inbounds float* %tmp23548, i64 1
+  %tmp23550 = getelementptr inbounds float* %tmp23549, i64 1
+  %tmp23551 = getelementptr inbounds float* %tmp23550, i64 1
+  %tmp23552 = getelementptr inbounds float* %tmp23551, i64 1
+  %tmp23553 = getelementptr inbounds float* %tmp23552, i64 1
+  %tmp23554 = getelementptr inbounds float* %tmp23553, i64 1
+  %tmp23555 = getelementptr inbounds float* %tmp23554, i64 1
+  %tmp23556 = getelementptr inbounds float* %tmp23555, i64 1
+  %tmp23557 = getelementptr inbounds float* %tmp23556, i64 1
+  %tmp23558 = getelementptr inbounds float* %tmp23557, i64 1
+  %tmp23559 = getelementptr inbounds float* %tmp23558, i64 1
+  %tmp23560 = getelementptr inbounds float* %tmp23559, i64 1
+  %tmp23561 = getelementptr inbounds float* %tmp23560, i64 1
+  %tmp23562 = getelementptr inbounds float* %tmp23561, i64 1
+  %tmp23563 = getelementptr inbounds float* %tmp23562, i64 1
+  %tmp23564 = getelementptr inbounds float* %tmp23563, i64 1
+  %tmp23565 = getelementptr inbounds float* %tmp23564, i64 1
+  %tmp23566 = getelementptr inbounds float* %tmp23565, i64 1
+  %tmp23567 = getelementptr inbounds float* %tmp23566, i64 1
+  %tmp23568 = getelementptr inbounds float* %tmp23567, i64 1
+  %tmp23569 = getelementptr inbounds float* %tmp23568, i64 1
+  %tmp23570 = getelementptr inbounds float* %tmp23569, i64 1
+  %tmp23571 = getelementptr inbounds float* %tmp23570, i64 1
+  %tmp23572 = getelementptr inbounds float* %tmp23571, i64 1
+  %tmp23573 = getelementptr inbounds float* %tmp23572, i64 1
+  %tmp23574 = getelementptr inbounds float* %tmp23573, i64 1
+  %tmp23575 = getelementptr inbounds float* %tmp23574, i64 1
+  %tmp23576 = getelementptr inbounds float* %tmp23575, i64 1
+  %tmp23577 = getelementptr inbounds float* %tmp23576, i64 1
+  %tmp23578 = getelementptr inbounds float* %tmp23577, i64 1
+  %tmp23579 = getelementptr inbounds float* %tmp23578, i64 1
+  %tmp23580 = getelementptr inbounds float* %tmp23579, i64 1
+  %tmp23581 = getelementptr inbounds float* %tmp23580, i64 1
+  %tmp23582 = getelementptr inbounds float* %tmp23581, i64 1
+  %tmp23583 = getelementptr inbounds float* %tmp23582, i64 1
+  %tmp23584 = getelementptr inbounds float* %tmp23583, i64 1
+  %tmp23585 = getelementptr inbounds float* %tmp23584, i64 1
+  %tmp23586 = getelementptr inbounds float* %tmp23585, i64 1
+  %tmp23587 = getelementptr inbounds float* %tmp23586, i64 1
+  %tmp23588 = getelementptr inbounds float* %tmp23587, i64 1
+  %tmp23589 = getelementptr inbounds float* %tmp23588, i64 1
+  %tmp23590 = getelementptr inbounds float* %tmp23589, i64 1
+  %tmp23591 = getelementptr inbounds float* %tmp23590, i64 1
+  %tmp23592 = getelementptr inbounds float* %tmp23591, i64 1
+  %tmp23593 = getelementptr inbounds float* %tmp23592, i64 1
+  %tmp23594 = getelementptr inbounds float* %tmp23593, i64 1
+  %tmp23595 = getelementptr inbounds float* %tmp23594, i64 1
+  %tmp23596 = getelementptr inbounds float* %tmp23595, i64 1
+  %tmp23597 = getelementptr inbounds float* %tmp23596, i64 1
+  %tmp23598 = getelementptr inbounds float* %tmp23597, i64 1
+  %tmp23599 = getelementptr inbounds float* %tmp23598, i64 1
+  %tmp23600 = getelementptr inbounds float* %tmp23599, i64 1
+  %tmp23601 = getelementptr inbounds float* %tmp23600, i64 1
+  %tmp23602 = getelementptr inbounds float* %tmp23601, i64 1
+  %tmp23603 = getelementptr inbounds float* %tmp23602, i64 1
+  %tmp23604 = getelementptr inbounds float* %tmp23603, i64 1
+  %tmp23605 = getelementptr inbounds float* %tmp23604, i64 1
+  %tmp23606 = getelementptr inbounds float* %tmp23605, i64 1
+  %tmp23607 = getelementptr inbounds float* %tmp23606, i64 1
+  %tmp23608 = getelementptr inbounds float* %tmp23607, i64 1
+  %tmp23609 = getelementptr inbounds float* %tmp23608, i64 1
+  %tmp23610 = getelementptr inbounds float* %tmp23609, i64 1
+  %tmp23611 = getelementptr inbounds float* %tmp23610, i64 1
+  %tmp23612 = getelementptr inbounds float* %tmp23611, i64 1
+  %tmp23613 = getelementptr inbounds float* %tmp23612, i64 1
+  %tmp23614 = getelementptr inbounds float* %tmp23613, i64 1
+  %tmp23615 = getelementptr inbounds float* %tmp23614, i64 1
+  %tmp23616 = getelementptr inbounds float* %tmp23615, i64 1
+  %tmp23617 = getelementptr inbounds float* %tmp23616, i64 1
+  %tmp23618 = getelementptr inbounds float* %tmp23617, i64 1
+  %tmp23619 = getelementptr inbounds float* %tmp23618, i64 1
+  %tmp23620 = getelementptr inbounds float* %tmp23619, i64 1
+  %tmp23621 = getelementptr inbounds float* %tmp23620, i64 1
+  %tmp23622 = getelementptr inbounds float* %tmp23621, i64 1
+  %tmp23623 = getelementptr inbounds float* %tmp23622, i64 1
+  %tmp23624 = getelementptr inbounds float* %tmp23623, i64 1
+  %tmp23625 = getelementptr inbounds float* %tmp23624, i64 1
+  %tmp23626 = getelementptr inbounds float* %tmp23625, i64 1
+  %tmp23627 = getelementptr inbounds float* %tmp23626, i64 1
+  %tmp23628 = getelementptr inbounds float* %tmp23627, i64 1
+  %tmp23629 = getelementptr inbounds float* %tmp23628, i64 1
+  %tmp23630 = getelementptr inbounds float* %tmp23629, i64 1
+  %tmp23631 = getelementptr inbounds float* %tmp23630, i64 1
+  %tmp23632 = getelementptr inbounds float* %tmp23631, i64 1
+  %tmp23633 = getelementptr inbounds float* %tmp23632, i64 1
+  %tmp23634 = getelementptr inbounds float* %tmp23633, i64 1
+  %tmp23635 = getelementptr inbounds float* %tmp23634, i64 1
+  %tmp23636 = getelementptr inbounds float* %tmp23635, i64 1
+  %tmp23637 = getelementptr inbounds float* %tmp23636, i64 1
+  %tmp23638 = getelementptr inbounds float* %tmp23637, i64 1
+  %tmp23639 = getelementptr inbounds float* %tmp23638, i64 1
+  %tmp23640 = getelementptr inbounds float* %tmp23639, i64 1
+  %tmp23641 = getelementptr inbounds float* %tmp23640, i64 1
+  %tmp23642 = getelementptr inbounds float* %tmp23641, i64 1
+  %tmp23643 = getelementptr inbounds float* %tmp23642, i64 1
+  %tmp23644 = getelementptr inbounds float* %tmp23643, i64 1
+  %tmp23645 = getelementptr inbounds float* %tmp23644, i64 1
+  %tmp23646 = getelementptr inbounds float* %tmp23645, i64 1
+  %tmp23647 = getelementptr inbounds float* %tmp23646, i64 1
+  %tmp23648 = getelementptr inbounds float* %tmp23647, i64 1
+  %tmp23649 = getelementptr inbounds float* %tmp23648, i64 1
+  %tmp23650 = getelementptr inbounds float* %tmp23649, i64 1
+  %tmp23651 = getelementptr inbounds float* %tmp23650, i64 1
+  %tmp23652 = getelementptr inbounds float* %tmp23651, i64 1
+  %tmp23653 = getelementptr inbounds float* %tmp23652, i64 1
+  %tmp23654 = getelementptr inbounds float* %tmp23653, i64 1
+  %tmp23655 = getelementptr inbounds float* %tmp23654, i64 1
+  %tmp23656 = getelementptr inbounds float* %tmp23655, i64 1
+  %tmp23657 = getelementptr inbounds float* %tmp23656, i64 1
+  %tmp23658 = getelementptr inbounds float* %tmp23657, i64 1
+  %tmp23659 = getelementptr inbounds float* %tmp23658, i64 1
+  %tmp23660 = getelementptr inbounds float* %tmp23659, i64 1
+  %tmp23661 = getelementptr inbounds float* %tmp23660, i64 1
+  %tmp23662 = getelementptr inbounds float* %tmp23661, i64 1
+  %tmp23663 = getelementptr inbounds float* %tmp23662, i64 1
+  %tmp23664 = getelementptr inbounds float* %tmp23663, i64 1
+  %tmp23665 = getelementptr inbounds float* %tmp23664, i64 1
+  %tmp23666 = getelementptr inbounds float* %tmp23665, i64 1
+  %tmp23667 = getelementptr inbounds float* %tmp23666, i64 1
+  %tmp23668 = getelementptr inbounds float* %tmp23667, i64 1
+  %tmp23669 = getelementptr inbounds float* %tmp23668, i64 1
+  %tmp23670 = getelementptr inbounds float* %tmp23669, i64 1
+  %tmp23671 = getelementptr inbounds float* %tmp23670, i64 1
+  %tmp23672 = getelementptr inbounds float* %tmp23671, i64 1
+  %tmp23673 = getelementptr inbounds float* %tmp23672, i64 1
+  %tmp23674 = getelementptr inbounds float* %tmp23673, i64 1
+  %tmp23675 = getelementptr inbounds float* %tmp23674, i64 1
+  %tmp23676 = getelementptr inbounds float* %tmp23675, i64 1
+  %tmp23677 = getelementptr inbounds float* %tmp23676, i64 1
+  %tmp23678 = getelementptr inbounds float* %tmp23677, i64 1
+  %tmp23679 = getelementptr inbounds float* %tmp23678, i64 1
+  %tmp23680 = getelementptr inbounds float* %tmp23679, i64 1
+  %tmp23681 = getelementptr inbounds float* %tmp23680, i64 1
+  %tmp23682 = getelementptr inbounds float* %tmp23681, i64 1
+  %tmp23683 = getelementptr inbounds float* %tmp23682, i64 1
+  %tmp23684 = getelementptr inbounds float* %tmp23683, i64 1
+  %tmp23685 = getelementptr inbounds float* %tmp23684, i64 1
+  %tmp23686 = getelementptr inbounds float* %tmp23685, i64 1
+  %tmp23687 = getelementptr inbounds float* %tmp23686, i64 1
+  %tmp23688 = getelementptr inbounds float* %tmp23687, i64 1
+  %tmp23689 = getelementptr inbounds float* %tmp23688, i64 1
+  %tmp23690 = getelementptr inbounds float* %tmp23689, i64 1
+  %tmp23691 = getelementptr inbounds float* %tmp23690, i64 1
+  %tmp23692 = getelementptr inbounds float* %tmp23691, i64 1
+  %tmp23693 = getelementptr inbounds float* %tmp23692, i64 1
+  %tmp23694 = getelementptr inbounds float* %tmp23693, i64 1
+  %tmp23695 = getelementptr inbounds float* %tmp23694, i64 1
+  %tmp23696 = getelementptr inbounds float* %tmp23695, i64 1
+  %tmp23697 = getelementptr inbounds float* %tmp23696, i64 1
+  %tmp23698 = getelementptr inbounds float* %tmp23697, i64 1
+  %tmp23699 = getelementptr inbounds float* %tmp23698, i64 1
+  %tmp23700 = getelementptr inbounds float* %tmp23699, i64 1
+  %tmp23701 = getelementptr inbounds float* %tmp23700, i64 1
+  %tmp23702 = getelementptr inbounds float* %tmp23701, i64 1
+  %tmp23703 = getelementptr inbounds float* %tmp23702, i64 1
+  %tmp23704 = getelementptr inbounds float* %tmp23703, i64 1
+  %tmp23705 = getelementptr inbounds float* %tmp23704, i64 1
+  %tmp23706 = getelementptr inbounds float* %tmp23705, i64 1
+  %tmp23707 = getelementptr inbounds float* %tmp23706, i64 1
+  %tmp23708 = getelementptr inbounds float* %tmp23707, i64 1
+  %tmp23709 = getelementptr inbounds float* %tmp23708, i64 1
+  %tmp23710 = getelementptr inbounds float* %tmp23709, i64 1
+  %tmp23711 = getelementptr inbounds float* %tmp23710, i64 1
+  %tmp23712 = getelementptr inbounds float* %tmp23711, i64 1
+  %tmp23713 = getelementptr inbounds float* %tmp23712, i64 1
+  %tmp23714 = getelementptr inbounds float* %tmp23713, i64 1
+  %tmp23715 = getelementptr inbounds float* %tmp23714, i64 1
+  %tmp23716 = getelementptr inbounds float* %tmp23715, i64 1
+  %tmp23717 = getelementptr inbounds float* %tmp23716, i64 1
+  %tmp23718 = getelementptr inbounds float* %tmp23717, i64 1
+  %tmp23719 = getelementptr inbounds float* %tmp23718, i64 1
+  %tmp23720 = getelementptr inbounds float* %tmp23719, i64 1
+  %tmp23721 = getelementptr inbounds float* %tmp23720, i64 1
+  %tmp23722 = getelementptr inbounds float* %tmp23721, i64 1
+  %tmp23723 = getelementptr inbounds float* %tmp23722, i64 1
+  %tmp23724 = getelementptr inbounds float* %tmp23723, i64 1
+  %tmp23725 = getelementptr inbounds float* %tmp23724, i64 1
+  %tmp23726 = getelementptr inbounds float* %tmp23725, i64 1
+  %tmp23727 = getelementptr inbounds float* %tmp23726, i64 1
+  %tmp23728 = getelementptr inbounds float* %tmp23727, i64 1
+  %tmp23729 = getelementptr inbounds float* %tmp23728, i64 1
+  %tmp23730 = getelementptr inbounds float* %tmp23729, i64 1
+  %tmp23731 = getelementptr inbounds float* %tmp23730, i64 1
+  %tmp23732 = getelementptr inbounds float* %tmp23731, i64 1
+  %tmp23733 = getelementptr inbounds float* %tmp23732, i64 1
+  %tmp23734 = getelementptr inbounds float* %tmp23733, i64 1
+  %tmp23735 = getelementptr inbounds float* %tmp23734, i64 1
+  %tmp23736 = getelementptr inbounds float* %tmp23735, i64 1
+  %tmp23737 = getelementptr inbounds float* %tmp23736, i64 1
+  %tmp23738 = getelementptr inbounds float* %tmp23737, i64 1
+  %tmp23739 = getelementptr inbounds float* %tmp23738, i64 1
+  %tmp23740 = getelementptr inbounds float* %tmp23739, i64 1
+  %tmp23741 = getelementptr inbounds float* %tmp23740, i64 1
+  %tmp23742 = getelementptr inbounds float* %tmp23741, i64 1
+  %tmp23743 = getelementptr inbounds float* %tmp23742, i64 1
+  %tmp23744 = getelementptr inbounds float* %tmp23743, i64 1
+  %tmp23745 = getelementptr inbounds float* %tmp23744, i64 1
+  %tmp23746 = getelementptr inbounds float* %tmp23745, i64 1
+  %tmp23747 = getelementptr inbounds float* %tmp23746, i64 1
+  %tmp23748 = getelementptr inbounds float* %tmp23747, i64 1
+  %tmp23749 = getelementptr inbounds float* %tmp23748, i64 1
+  %tmp23750 = getelementptr inbounds float* %tmp23749, i64 1
+  %tmp23751 = getelementptr inbounds float* %tmp23750, i64 1
+  %tmp23752 = getelementptr inbounds float* %tmp23751, i64 1
+  %tmp23753 = getelementptr inbounds float* %tmp23752, i64 1
+  %tmp23754 = getelementptr inbounds float* %tmp23753, i64 1
+  %tmp23755 = getelementptr inbounds float* %tmp23754, i64 1
+  %tmp23756 = getelementptr inbounds float* %tmp23755, i64 1
+  %tmp23757 = getelementptr inbounds float* %tmp23756, i64 1
+  %tmp23758 = getelementptr inbounds float* %tmp23757, i64 1
+  %tmp23759 = getelementptr inbounds float* %tmp23758, i64 1
+  %tmp23760 = getelementptr inbounds float* %tmp23759, i64 1
+  %tmp23761 = getelementptr inbounds float* %tmp23760, i64 1
+  %tmp23762 = getelementptr inbounds float* %tmp23761, i64 1
+  %tmp23763 = getelementptr inbounds float* %tmp23762, i64 1
+  %tmp23764 = getelementptr inbounds float* %tmp23763, i64 1
+  %tmp23765 = getelementptr inbounds float* %tmp23764, i64 1
+  %tmp23766 = getelementptr inbounds float* %tmp23765, i64 1
+  %tmp23767 = getelementptr inbounds float* %tmp23766, i64 1
+  %tmp23768 = getelementptr inbounds float* %tmp23767, i64 1
+  %tmp23769 = getelementptr inbounds float* %tmp23768, i64 1
+  %tmp23770 = getelementptr inbounds float* %tmp23769, i64 1
+  %tmp23771 = getelementptr inbounds float* %tmp23770, i64 1
+  %tmp23772 = getelementptr inbounds float* %tmp23771, i64 1
+  %tmp23773 = getelementptr inbounds float* %tmp23772, i64 1
+  %tmp23774 = getelementptr inbounds float* %tmp23773, i64 1
+  %tmp23775 = getelementptr inbounds float* %tmp23774, i64 1
+  %tmp23776 = getelementptr inbounds float* %tmp23775, i64 1
+  %tmp23777 = getelementptr inbounds float* %tmp23776, i64 1
+  %tmp23778 = getelementptr inbounds float* %tmp23777, i64 1
+  %tmp23779 = getelementptr inbounds float* %tmp23778, i64 1
+  %tmp23780 = getelementptr inbounds float* %tmp23779, i64 1
+  %tmp23781 = getelementptr inbounds float* %tmp23780, i64 1
+  %tmp23782 = getelementptr inbounds float* %tmp23781, i64 1
+  %tmp23783 = getelementptr inbounds float* %tmp23782, i64 1
+  %tmp23784 = getelementptr inbounds float* %tmp23783, i64 1
+  %tmp23785 = getelementptr inbounds float* %tmp23784, i64 1
+  %tmp23786 = getelementptr inbounds float* %tmp23785, i64 1
+  %tmp23787 = getelementptr inbounds float* %tmp23786, i64 1
+  %tmp23788 = getelementptr inbounds float* %tmp23787, i64 1
+  %tmp23789 = getelementptr inbounds float* %tmp23788, i64 1
+  %tmp23790 = getelementptr inbounds float* %tmp23789, i64 1
+  %tmp23791 = getelementptr inbounds float* %tmp23790, i64 1
+  %tmp23792 = getelementptr inbounds float* %tmp23791, i64 1
+  %tmp23793 = getelementptr inbounds float* %tmp23792, i64 1
+  %tmp23794 = getelementptr inbounds float* %tmp23793, i64 1
+  %tmp23795 = getelementptr inbounds float* %tmp23794, i64 1
+  %tmp23796 = getelementptr inbounds float* %tmp23795, i64 1
+  %tmp23797 = getelementptr inbounds float* %tmp23796, i64 1
+  %tmp23798 = getelementptr inbounds float* %tmp23797, i64 1
+  %tmp23799 = getelementptr inbounds float* %tmp23798, i64 1
+  %tmp23800 = getelementptr inbounds float* %tmp23799, i64 1
+  %tmp23801 = getelementptr inbounds float* %tmp23800, i64 1
+  %tmp23802 = getelementptr inbounds float* %tmp23801, i64 1
+  %tmp23803 = getelementptr inbounds float* %tmp23802, i64 1
+  %tmp23804 = getelementptr inbounds float* %tmp23803, i64 1
+  %tmp23805 = getelementptr inbounds float* %tmp23804, i64 1
+  %tmp23806 = getelementptr inbounds float* %tmp23805, i64 1
+  %tmp23807 = getelementptr inbounds float* %tmp23806, i64 1
+  %tmp23808 = getelementptr inbounds float* %tmp23807, i64 1
+  %tmp23809 = getelementptr inbounds float* %tmp23808, i64 1
+  %tmp23810 = getelementptr inbounds float* %tmp23809, i64 1
+  %tmp23811 = getelementptr inbounds float* %tmp23810, i64 1
+  %tmp23812 = getelementptr inbounds float* %tmp23811, i64 1
+  %tmp23813 = getelementptr inbounds float* %tmp23812, i64 1
+  %tmp23814 = getelementptr inbounds float* %tmp23813, i64 1
+  %tmp23815 = getelementptr inbounds float* %tmp23814, i64 1
+  %tmp23816 = getelementptr inbounds float* %tmp23815, i64 1
+  %tmp23817 = getelementptr inbounds float* %tmp23816, i64 1
+  %tmp23818 = getelementptr inbounds float* %tmp23817, i64 1
+  %tmp23819 = getelementptr inbounds float* %tmp23818, i64 1
+  %tmp23820 = getelementptr inbounds float* %tmp23819, i64 1
+  %tmp23821 = getelementptr inbounds float* %tmp23820, i64 1
+  %tmp23822 = getelementptr inbounds float* %tmp23821, i64 1
+  %tmp23823 = getelementptr inbounds float* %tmp23822, i64 1
+  %tmp23824 = getelementptr inbounds float* %tmp23823, i64 1
+  %tmp23825 = getelementptr inbounds float* %tmp23824, i64 1
+  %tmp23826 = getelementptr inbounds float* %tmp23825, i64 1
+  %tmp23827 = getelementptr inbounds float* %tmp23826, i64 1
+  %tmp23828 = getelementptr inbounds float* %tmp23827, i64 1
+  %tmp23829 = getelementptr inbounds float* %tmp23828, i64 1
+  %tmp23830 = getelementptr inbounds float* %tmp23829, i64 1
+  %tmp23831 = getelementptr inbounds float* %tmp23830, i64 1
+  %tmp23832 = getelementptr inbounds float* %tmp23831, i64 1
+  %tmp23833 = getelementptr inbounds float* %tmp23832, i64 1
+  %tmp23834 = getelementptr inbounds float* %tmp23833, i64 1
+  %tmp23835 = getelementptr inbounds float* %tmp23834, i64 1
+  %tmp23836 = getelementptr inbounds float* %tmp23835, i64 1
+  %tmp23837 = getelementptr inbounds float* %tmp23836, i64 1
+  %tmp23838 = getelementptr inbounds float* %tmp23837, i64 1
+  %tmp23839 = getelementptr inbounds float* %tmp23838, i64 1
+  %tmp23840 = getelementptr inbounds float* %tmp23839, i64 1
+  %tmp23841 = getelementptr inbounds float* %tmp23840, i64 1
+  %tmp23842 = getelementptr inbounds float* %tmp23841, i64 1
+  %tmp23843 = getelementptr inbounds float* %tmp23842, i64 1
+  %tmp23844 = getelementptr inbounds float* %tmp23843, i64 1
+  %tmp23845 = getelementptr inbounds float* %tmp23844, i64 1
+  %tmp23846 = getelementptr inbounds float* %tmp23845, i64 1
+  %tmp23847 = getelementptr inbounds float* %tmp23846, i64 1
+  %tmp23848 = getelementptr inbounds float* %tmp23847, i64 1
+  %tmp23849 = getelementptr inbounds float* %tmp23848, i64 1
+  %tmp23850 = getelementptr inbounds float* %tmp23849, i64 1
+  %tmp23851 = getelementptr inbounds float* %tmp23850, i64 1
+  %tmp23852 = getelementptr inbounds float* %tmp23851, i64 1
+  %tmp23853 = getelementptr inbounds float* %tmp23852, i64 1
+  %tmp23854 = getelementptr inbounds float* %tmp23853, i64 1
+  %tmp23855 = getelementptr inbounds float* %tmp23854, i64 1
+  %tmp23856 = getelementptr inbounds float* %tmp23855, i64 1
+  %tmp23857 = getelementptr inbounds float* %tmp23856, i64 1
+  %tmp23858 = getelementptr inbounds float* %tmp23857, i64 1
+  %tmp23859 = getelementptr inbounds float* %tmp23858, i64 1
+  %tmp23860 = getelementptr inbounds float* %tmp23859, i64 1
+  %tmp23861 = getelementptr inbounds float* %tmp23860, i64 1
+  %tmp23862 = getelementptr inbounds float* %tmp23861, i64 1
+  %tmp23863 = getelementptr inbounds float* %tmp23862, i64 1
+  %tmp23864 = getelementptr inbounds float* %tmp23863, i64 1
+  %tmp23865 = getelementptr inbounds float* %tmp23864, i64 1
+  %tmp23866 = getelementptr inbounds float* %tmp23865, i64 1
+  %tmp23867 = getelementptr inbounds float* %tmp23866, i64 1
+  %tmp23868 = getelementptr inbounds float* %tmp23867, i64 1
+  %tmp23869 = getelementptr inbounds float* %tmp23868, i64 1
+  %tmp23870 = getelementptr inbounds float* %tmp23869, i64 1
+  %tmp23871 = getelementptr inbounds float* %tmp23870, i64 1
+  %tmp23872 = getelementptr inbounds float* %tmp23871, i64 1
+  %tmp23873 = getelementptr inbounds float* %tmp23872, i64 1
+  %tmp23874 = getelementptr inbounds float* %tmp23873, i64 1
+  %tmp23875 = getelementptr inbounds float* %tmp23874, i64 1
+  %tmp23876 = getelementptr inbounds float* %tmp23875, i64 1
+  %tmp23877 = getelementptr inbounds float* %tmp23876, i64 1
+  %tmp23878 = getelementptr inbounds float* %tmp23877, i64 1
+  %tmp23879 = getelementptr inbounds float* %tmp23878, i64 1
+  %tmp23880 = getelementptr inbounds float* %tmp23879, i64 1
+  %tmp23881 = getelementptr inbounds float* %tmp23880, i64 1
+  %tmp23882 = getelementptr inbounds float* %tmp23881, i64 1
+  %tmp23883 = getelementptr inbounds float* %tmp23882, i64 1
+  %tmp23884 = getelementptr inbounds float* %tmp23883, i64 1
+  %tmp23885 = getelementptr inbounds float* %tmp23884, i64 1
+  %tmp23886 = getelementptr inbounds float* %tmp23885, i64 1
+  %tmp23887 = getelementptr inbounds float* %tmp23886, i64 1
+  %tmp23888 = getelementptr inbounds float* %tmp23887, i64 1
+  %tmp23889 = getelementptr inbounds float* %tmp23888, i64 1
+  %tmp23890 = getelementptr inbounds float* %tmp23889, i64 1
+  %tmp23891 = getelementptr inbounds float* %tmp23890, i64 1
+  %tmp23892 = getelementptr inbounds float* %tmp23891, i64 1
+  %tmp23893 = getelementptr inbounds float* %tmp23892, i64 1
+  %tmp23894 = getelementptr inbounds float* %tmp23893, i64 1
+  %tmp23895 = getelementptr inbounds float* %tmp23894, i64 1
+  %tmp23896 = getelementptr inbounds float* %tmp23895, i64 1
+  %tmp23897 = getelementptr inbounds float* %tmp23896, i64 1
+  %tmp23898 = getelementptr inbounds float* %tmp23897, i64 1
+  %tmp23899 = getelementptr inbounds float* %tmp23898, i64 1
+  %tmp23900 = getelementptr inbounds float* %tmp23899, i64 1
+  %tmp23901 = getelementptr inbounds float* %tmp23900, i64 1
+  %tmp23902 = getelementptr inbounds float* %tmp23901, i64 1
+  %tmp23903 = getelementptr inbounds float* %tmp23902, i64 1
+  %tmp23904 = getelementptr inbounds float* %tmp23903, i64 1
+  %tmp23905 = getelementptr inbounds float* %tmp23904, i64 1
+  %tmp23906 = getelementptr inbounds float* %tmp23905, i64 1
+  %tmp23907 = getelementptr inbounds float* %tmp23906, i64 1
+  %tmp23908 = getelementptr inbounds float* %tmp23907, i64 1
+  %tmp23909 = getelementptr inbounds float* %tmp23908, i64 1
+  %tmp23910 = getelementptr inbounds float* %tmp23909, i64 1
+  %tmp23911 = getelementptr inbounds float* %tmp23910, i64 1
+  %tmp23912 = getelementptr inbounds float* %tmp23911, i64 1
+  %tmp23913 = getelementptr inbounds float* %tmp23912, i64 1
+  %tmp23914 = getelementptr inbounds float* %tmp23913, i64 1
+  %tmp23915 = getelementptr inbounds float* %tmp23914, i64 1
+  %tmp23916 = getelementptr inbounds float* %tmp23915, i64 1
+  %tmp23917 = getelementptr inbounds float* %tmp23916, i64 1
+  %tmp23918 = getelementptr inbounds float* %tmp23917, i64 1
+  %tmp23919 = getelementptr inbounds float* %tmp23918, i64 1
+  %tmp23920 = getelementptr inbounds float* %tmp23919, i64 1
+  %tmp23921 = getelementptr inbounds float* %tmp23920, i64 1
+  %tmp23922 = getelementptr inbounds float* %tmp23921, i64 1
+  %tmp23923 = getelementptr inbounds float* %tmp23922, i64 1
+  %tmp23924 = getelementptr inbounds float* %tmp23923, i64 1
+  %tmp23925 = getelementptr inbounds float* %tmp23924, i64 1
+  %tmp23926 = getelementptr inbounds float* %tmp23925, i64 1
+  %tmp23927 = getelementptr inbounds float* %tmp23926, i64 1
+  %tmp23928 = getelementptr inbounds float* %tmp23927, i64 1
+  %tmp23929 = getelementptr inbounds float* %tmp23928, i64 1
+  %tmp23930 = getelementptr inbounds float* %tmp23929, i64 1
+  %tmp23931 = getelementptr inbounds float* %tmp23930, i64 1
+  %tmp23932 = getelementptr inbounds float* %tmp23931, i64 1
+  %tmp23933 = getelementptr inbounds float* %tmp23932, i64 1
+  %tmp23934 = getelementptr inbounds float* %tmp23933, i64 1
+  %tmp23935 = getelementptr inbounds float* %tmp23934, i64 1
+  %tmp23936 = getelementptr inbounds float* %tmp23935, i64 1
+  %tmp23937 = getelementptr inbounds float* %tmp23936, i64 1
+  %tmp23938 = getelementptr inbounds float* %tmp23937, i64 1
+  %tmp23939 = getelementptr inbounds float* %tmp23938, i64 1
+  %tmp23940 = getelementptr inbounds float* %tmp23939, i64 1
+  %tmp23941 = getelementptr inbounds float* %tmp23940, i64 1
+  %tmp23942 = getelementptr inbounds float* %tmp23941, i64 1
+  %tmp23943 = getelementptr inbounds float* %tmp23942, i64 1
+  %tmp23944 = getelementptr inbounds float* %tmp23943, i64 1
+  %tmp23945 = getelementptr inbounds float* %tmp23944, i64 1
+  %tmp23946 = getelementptr inbounds float* %tmp23945, i64 1
+  %tmp23947 = getelementptr inbounds float* %tmp23946, i64 1
+  %tmp23948 = getelementptr inbounds float* %tmp23947, i64 1
+  %tmp23949 = getelementptr inbounds float* %tmp23948, i64 1
+  %tmp23950 = getelementptr inbounds float* %tmp23949, i64 1
+  %tmp23951 = getelementptr inbounds float* %tmp23950, i64 1
+  %tmp23952 = getelementptr inbounds float* %tmp23951, i64 1
+  %tmp23953 = getelementptr inbounds float* %tmp23952, i64 1
+  %tmp23954 = getelementptr inbounds float* %tmp23953, i64 1
+  %tmp23955 = getelementptr inbounds float* %tmp23954, i64 1
+  %tmp23956 = getelementptr inbounds float* %tmp23955, i64 1
+  %tmp23957 = getelementptr inbounds float* %tmp23956, i64 1
+  %tmp23958 = getelementptr inbounds float* %tmp23957, i64 1
+  %tmp23959 = getelementptr inbounds float* %tmp23958, i64 1
+  %tmp23960 = getelementptr inbounds float* %tmp23959, i64 1
+  %tmp23961 = getelementptr inbounds float* %tmp23960, i64 1
+  %tmp23962 = getelementptr inbounds float* %tmp23961, i64 1
+  %tmp23963 = getelementptr inbounds float* %tmp23962, i64 1
+  %tmp23964 = getelementptr inbounds float* %tmp23963, i64 1
+  %tmp23965 = getelementptr inbounds float* %tmp23964, i64 1
+  %tmp23966 = getelementptr inbounds float* %tmp23965, i64 1
+  %tmp23967 = getelementptr inbounds float* %tmp23966, i64 1
+  %tmp23968 = getelementptr inbounds float* %tmp23967, i64 1
+  %tmp23969 = getelementptr inbounds float* %tmp23968, i64 1
+  %tmp23970 = getelementptr inbounds float* %tmp23969, i64 1
+  %tmp23971 = getelementptr inbounds float* %tmp23970, i64 1
+  %tmp23972 = getelementptr inbounds float* %tmp23971, i64 1
+  %tmp23973 = getelementptr inbounds float* %tmp23972, i64 1
+  %tmp23974 = getelementptr inbounds float* %tmp23973, i64 1
+  %tmp23975 = getelementptr inbounds float* %tmp23974, i64 1
+  %tmp23976 = getelementptr inbounds float* %tmp23975, i64 1
+  %tmp23977 = getelementptr inbounds float* %tmp23976, i64 1
+  %tmp23978 = getelementptr inbounds float* %tmp23977, i64 1
+  %tmp23979 = getelementptr inbounds float* %tmp23978, i64 1
+  %tmp23980 = getelementptr inbounds float* %tmp23979, i64 1
+  %tmp23981 = getelementptr inbounds float* %tmp23980, i64 1
+  %tmp23982 = getelementptr inbounds float* %tmp23981, i64 1
+  %tmp23983 = getelementptr inbounds float* %tmp23982, i64 1
+  %tmp23984 = getelementptr inbounds float* %tmp23983, i64 1
+  %tmp23985 = getelementptr inbounds float* %tmp23984, i64 1
+  %tmp23986 = getelementptr inbounds float* %tmp23985, i64 1
+  %tmp23987 = getelementptr inbounds float* %tmp23986, i64 1
+  %tmp23988 = getelementptr inbounds float* %tmp23987, i64 1
+  %tmp23989 = getelementptr inbounds float* %tmp23988, i64 1
+  %tmp23990 = getelementptr inbounds float* %tmp23989, i64 1
+  %tmp23991 = getelementptr inbounds float* %tmp23990, i64 1
+  %tmp23992 = getelementptr inbounds float* %tmp23991, i64 1
+  %tmp23993 = getelementptr inbounds float* %tmp23992, i64 1
+  %tmp23994 = getelementptr inbounds float* %tmp23993, i64 1
+  %tmp23995 = getelementptr inbounds float* %tmp23994, i64 1
+  %tmp23996 = getelementptr inbounds float* %tmp23995, i64 1
+  %tmp23997 = getelementptr inbounds float* %tmp23996, i64 1
+  %tmp23998 = getelementptr inbounds float* %tmp23997, i64 1
+  %tmp23999 = getelementptr inbounds float* %tmp23998, i64 1
+  %tmp24000 = getelementptr inbounds float* %tmp23999, i64 1
+  %tmp24001 = getelementptr inbounds float* %tmp24000, i64 1
+  %tmp24002 = getelementptr inbounds float* %tmp24001, i64 1
+  %tmp24003 = getelementptr inbounds float* %tmp24002, i64 1
+  %tmp24004 = getelementptr inbounds float* %tmp24003, i64 1
+  %tmp24005 = getelementptr inbounds float* %tmp24004, i64 1
+  %tmp24006 = getelementptr inbounds float* %tmp24005, i64 1
+  %tmp24007 = getelementptr inbounds float* %tmp24006, i64 1
+  %tmp24008 = getelementptr inbounds float* %tmp24007, i64 1
+  %tmp24009 = getelementptr inbounds float* %tmp24008, i64 1
+  %tmp24010 = getelementptr inbounds float* %tmp24009, i64 1
+  %tmp24011 = getelementptr inbounds float* %tmp24010, i64 1
+  %tmp24012 = getelementptr inbounds float* %tmp24011, i64 1
+  %tmp24013 = getelementptr inbounds float* %tmp24012, i64 1
+  %tmp24014 = getelementptr inbounds float* %tmp24013, i64 1
+  %tmp24015 = getelementptr inbounds float* %tmp24014, i64 1
+  %tmp24016 = getelementptr inbounds float* %tmp24015, i64 1
+  %tmp24017 = getelementptr inbounds float* %tmp24016, i64 1
+  %tmp24018 = getelementptr inbounds float* %tmp24017, i64 1
+  %tmp24019 = getelementptr inbounds float* %tmp24018, i64 1
+  %tmp24020 = getelementptr inbounds float* %tmp24019, i64 1
+  %tmp24021 = getelementptr inbounds float* %tmp24020, i64 1
+  %tmp24022 = getelementptr inbounds float* %tmp24021, i64 1
+  %tmp24023 = getelementptr inbounds float* %tmp24022, i64 1
+  %tmp24024 = getelementptr inbounds float* %tmp24023, i64 1
+  %tmp24025 = getelementptr inbounds float* %tmp24024, i64 1
+  %tmp24026 = getelementptr inbounds float* %tmp24025, i64 1
+  %tmp24027 = getelementptr inbounds float* %tmp24026, i64 1
+  %tmp24028 = getelementptr inbounds float* %tmp24027, i64 1
+  %tmp24029 = getelementptr inbounds float* %tmp24028, i64 1
+  %tmp24030 = getelementptr inbounds float* %tmp24029, i64 1
+  %tmp24031 = getelementptr inbounds float* %tmp24030, i64 1
+  %tmp24032 = getelementptr inbounds float* %tmp24031, i64 1
+  %tmp24033 = getelementptr inbounds float* %tmp24032, i64 1
+  %tmp24034 = getelementptr inbounds float* %tmp24033, i64 1
+  %tmp24035 = getelementptr inbounds float* %tmp24034, i64 1
+  %tmp24036 = getelementptr inbounds float* %tmp24035, i64 1
+  %tmp24037 = getelementptr inbounds float* %tmp24036, i64 1
+  %tmp24038 = getelementptr inbounds float* %tmp24037, i64 1
+  %tmp24039 = getelementptr inbounds float* %tmp24038, i64 1
+  %tmp24040 = getelementptr inbounds float* %tmp24039, i64 1
+  %tmp24041 = getelementptr inbounds float* %tmp24040, i64 1
+  %tmp24042 = getelementptr inbounds float* %tmp24041, i64 1
+  %tmp24043 = getelementptr inbounds float* %tmp24042, i64 1
+  %tmp24044 = getelementptr inbounds float* %tmp24043, i64 1
+  %tmp24045 = getelementptr inbounds float* %tmp24044, i64 1
+  %tmp24046 = getelementptr inbounds float* %tmp24045, i64 1
+  %tmp24047 = getelementptr inbounds float* %tmp24046, i64 1
+  %tmp24048 = getelementptr inbounds float* %tmp24047, i64 1
+  %tmp24049 = getelementptr inbounds float* %tmp24048, i64 1
+  %tmp24050 = getelementptr inbounds float* %tmp24049, i64 1
+  %tmp24051 = getelementptr inbounds float* %tmp24050, i64 1
+  %tmp24052 = getelementptr inbounds float* %tmp24051, i64 1
+  %tmp24053 = getelementptr inbounds float* %tmp24052, i64 1
+  %tmp24054 = getelementptr inbounds float* %tmp24053, i64 1
+  %tmp24055 = getelementptr inbounds float* %tmp24054, i64 1
+  %tmp24056 = getelementptr inbounds float* %tmp24055, i64 1
+  %tmp24057 = getelementptr inbounds float* %tmp24056, i64 1
+  %tmp24058 = getelementptr inbounds float* %tmp24057, i64 1
+  %tmp24059 = getelementptr inbounds float* %tmp24058, i64 1
+  %tmp24060 = getelementptr inbounds float* %tmp24059, i64 1
+  %tmp24061 = getelementptr inbounds float* %tmp24060, i64 1
+  %tmp24062 = getelementptr inbounds float* %tmp24061, i64 1
+  %tmp24063 = getelementptr inbounds float* %tmp24062, i64 1
+  %tmp24064 = getelementptr inbounds float* %tmp24063, i64 1
+  %tmp24065 = getelementptr inbounds float* %tmp24064, i64 1
+  %tmp24066 = getelementptr inbounds float* %tmp24065, i64 1
+  %tmp24067 = getelementptr inbounds float* %tmp24066, i64 1
+  %tmp24068 = getelementptr inbounds float* %tmp24067, i64 1
+  %tmp24069 = getelementptr inbounds float* %tmp24068, i64 1
+  %tmp24070 = getelementptr inbounds float* %tmp24069, i64 1
+  %tmp24071 = getelementptr inbounds float* %tmp24070, i64 1
+  %tmp24072 = getelementptr inbounds float* %tmp24071, i64 1
+  %tmp24073 = getelementptr inbounds float* %tmp24072, i64 1
+  %tmp24074 = getelementptr inbounds float* %tmp24073, i64 1
+  %tmp24075 = getelementptr inbounds float* %tmp24074, i64 1
+  %tmp24076 = getelementptr inbounds float* %tmp24075, i64 1
+  %tmp24077 = getelementptr inbounds float* %tmp24076, i64 1
+  %tmp24078 = getelementptr inbounds float* %tmp24077, i64 1
+  %tmp24079 = getelementptr inbounds float* %tmp24078, i64 1
+  %tmp24080 = getelementptr inbounds float* %tmp24079, i64 1
+  %tmp24081 = getelementptr inbounds float* %tmp24080, i64 1
+  %tmp24082 = getelementptr inbounds float* %tmp24081, i64 1
+  %tmp24083 = getelementptr inbounds float* %tmp24082, i64 1
+  %tmp24084 = getelementptr inbounds float* %tmp24083, i64 1
+  %tmp24085 = getelementptr inbounds float* %tmp24084, i64 1
+  %tmp24086 = getelementptr inbounds float* %tmp24085, i64 1
+  %tmp24087 = getelementptr inbounds float* %tmp24086, i64 1
+  %tmp24088 = getelementptr inbounds float* %tmp24087, i64 1
+  %tmp24089 = getelementptr inbounds float* %tmp24088, i64 1
+  %tmp24090 = getelementptr inbounds float* %tmp24089, i64 1
+  %tmp24091 = getelementptr inbounds float* %tmp24090, i64 1
+  %tmp24092 = getelementptr inbounds float* %tmp24091, i64 1
+  %tmp24093 = getelementptr inbounds float* %tmp24092, i64 1
+  %tmp24094 = getelementptr inbounds float* %tmp24093, i64 1
+  %tmp24095 = getelementptr inbounds float* %tmp24094, i64 1
+  %tmp24096 = getelementptr inbounds float* %tmp24095, i64 1
+  %tmp24097 = getelementptr inbounds float* %tmp24096, i64 1
+  %tmp24098 = getelementptr inbounds float* %tmp24097, i64 1
+  %tmp24099 = getelementptr inbounds float* %tmp24098, i64 1
+  %tmp24100 = getelementptr inbounds float* %tmp24099, i64 1
+  %tmp24101 = getelementptr inbounds float* %tmp24100, i64 1
+  %tmp24102 = getelementptr inbounds float* %tmp24101, i64 1
+  %tmp24103 = getelementptr inbounds float* %tmp24102, i64 1
+  %tmp24104 = getelementptr inbounds float* %tmp24103, i64 1
+  %tmp24105 = getelementptr inbounds float* %tmp24104, i64 1
+  %tmp24106 = getelementptr inbounds float* %tmp24105, i64 1
+  %tmp24107 = getelementptr inbounds float* %tmp24106, i64 1
+  %tmp24108 = getelementptr inbounds float* %tmp24107, i64 1
+  %tmp24109 = getelementptr inbounds float* %tmp24108, i64 1
+  %tmp24110 = getelementptr inbounds float* %tmp24109, i64 1
+  %tmp24111 = getelementptr inbounds float* %tmp24110, i64 1
+  %tmp24112 = getelementptr inbounds float* %tmp24111, i64 1
+  %tmp24113 = getelementptr inbounds float* %tmp24112, i64 1
+  %tmp24114 = getelementptr inbounds float* %tmp24113, i64 1
+  %tmp24115 = getelementptr inbounds float* %tmp24114, i64 1
+  %tmp24116 = getelementptr inbounds float* %tmp24115, i64 1
+  %tmp24117 = getelementptr inbounds float* %tmp24116, i64 1
+  %tmp24118 = getelementptr inbounds float* %tmp24117, i64 1
+  %tmp24119 = getelementptr inbounds float* %tmp24118, i64 1
+  %tmp24120 = getelementptr inbounds float* %tmp24119, i64 1
+  %tmp24121 = getelementptr inbounds float* %tmp24120, i64 1
+  %tmp24122 = getelementptr inbounds float* %tmp24121, i64 1
+  %tmp24123 = getelementptr inbounds float* %tmp24122, i64 1
+  %tmp24124 = getelementptr inbounds float* %tmp24123, i64 1
+  %tmp24125 = getelementptr inbounds float* %tmp24124, i64 1
+  %tmp24126 = getelementptr inbounds float* %tmp24125, i64 1
+  %tmp24127 = getelementptr inbounds float* %tmp24126, i64 1
+  %tmp24128 = getelementptr inbounds float* %tmp24127, i64 1
+  %tmp24129 = getelementptr inbounds float* %tmp24128, i64 1
+  %tmp24130 = getelementptr inbounds float* %tmp24129, i64 1
+  %tmp24131 = getelementptr inbounds float* %tmp24130, i64 1
+  %tmp24132 = getelementptr inbounds float* %tmp24131, i64 1
+  %tmp24133 = getelementptr inbounds float* %tmp24132, i64 1
+  %tmp24134 = getelementptr inbounds float* %tmp24133, i64 1
+  %tmp24135 = getelementptr inbounds float* %tmp24134, i64 1
+  %tmp24136 = getelementptr inbounds float* %tmp24135, i64 1
+  %tmp24137 = getelementptr inbounds float* %tmp24136, i64 1
+  %tmp24138 = getelementptr inbounds float* %tmp24137, i64 1
+  %tmp24139 = getelementptr inbounds float* %tmp24138, i64 1
+  %tmp24140 = getelementptr inbounds float* %tmp24139, i64 1
+  %tmp24141 = getelementptr inbounds float* %tmp24140, i64 1
+  %tmp24142 = getelementptr inbounds float* %tmp24141, i64 1
+  %tmp24143 = getelementptr inbounds float* %tmp24142, i64 1
+  %tmp24144 = getelementptr inbounds float* %tmp24143, i64 1
+  %tmp24145 = getelementptr inbounds float* %tmp24144, i64 1
+  %tmp24146 = getelementptr inbounds float* %tmp24145, i64 1
+  %tmp24147 = getelementptr inbounds float* %tmp24146, i64 1
+  %tmp24148 = getelementptr inbounds float* %tmp24147, i64 1
+  %tmp24149 = getelementptr inbounds float* %tmp24148, i64 1
+  %tmp24150 = getelementptr inbounds float* %tmp24149, i64 1
+  %tmp24151 = getelementptr inbounds float* %tmp24150, i64 1
+  %tmp24152 = getelementptr inbounds float* %tmp24151, i64 1
+  %tmp24153 = getelementptr inbounds float* %tmp24152, i64 1
+  %tmp24154 = getelementptr inbounds float* %tmp24153, i64 1
+  %tmp24155 = getelementptr inbounds float* %tmp24154, i64 1
+  %tmp24156 = getelementptr inbounds float* %tmp24155, i64 1
+  %tmp24157 = getelementptr inbounds float* %tmp24156, i64 1
+  %tmp24158 = getelementptr inbounds float* %tmp24157, i64 1
+  %tmp24159 = getelementptr inbounds float* %tmp24158, i64 1
+  %tmp24160 = getelementptr inbounds float* %tmp24159, i64 1
+  %tmp24161 = getelementptr inbounds float* %tmp24160, i64 1
+  %tmp24162 = getelementptr inbounds float* %tmp24161, i64 1
+  %tmp24163 = getelementptr inbounds float* %tmp24162, i64 1
+  %tmp24164 = getelementptr inbounds float* %tmp24163, i64 1
+  %tmp24165 = getelementptr inbounds float* %tmp24164, i64 1
+  %tmp24166 = getelementptr inbounds float* %tmp24165, i64 1
+  %tmp24167 = getelementptr inbounds float* %tmp24166, i64 1
+  %tmp24168 = getelementptr inbounds float* %tmp24167, i64 1
+  %tmp24169 = getelementptr inbounds float* %tmp24168, i64 1
+  %tmp24170 = getelementptr inbounds float* %tmp24169, i64 1
+  %tmp24171 = getelementptr inbounds float* %tmp24170, i64 1
+  %tmp24172 = getelementptr inbounds float* %tmp24171, i64 1
+  %tmp24173 = getelementptr inbounds float* %tmp24172, i64 1
+  %tmp24174 = getelementptr inbounds float* %tmp24173, i64 1
+  %tmp24175 = getelementptr inbounds float* %tmp24174, i64 1
+  %tmp24176 = getelementptr inbounds float* %tmp24175, i64 1
+  %tmp24177 = getelementptr inbounds float* %tmp24176, i64 1
+  %tmp24178 = getelementptr inbounds float* %tmp24177, i64 1
+  %tmp24179 = getelementptr inbounds float* %tmp24178, i64 1
+  %tmp24180 = getelementptr inbounds float* %tmp24179, i64 1
+  %tmp24181 = getelementptr inbounds float* %tmp24180, i64 1
+  %tmp24182 = getelementptr inbounds float* %tmp24181, i64 1
+  %tmp24183 = getelementptr inbounds float* %tmp24182, i64 1
+  %tmp24184 = getelementptr inbounds float* %tmp24183, i64 1
+  %tmp24185 = getelementptr inbounds float* %tmp24184, i64 1
+  %tmp24186 = getelementptr inbounds float* %tmp24185, i64 1
+  %tmp24187 = getelementptr inbounds float* %tmp24186, i64 1
+  %tmp24188 = getelementptr inbounds float* %tmp24187, i64 1
+  %tmp24189 = getelementptr inbounds float* %tmp24188, i64 1
+  %tmp24190 = getelementptr inbounds float* %tmp24189, i64 1
+  %tmp24191 = getelementptr inbounds float* %tmp24190, i64 1
+  %tmp24192 = getelementptr inbounds float* %tmp24191, i64 1
+  %tmp24193 = getelementptr inbounds float* %tmp24192, i64 1
+  %tmp24194 = getelementptr inbounds float* %tmp24193, i64 1
+  %tmp24195 = getelementptr inbounds float* %tmp24194, i64 1
+  %tmp24196 = getelementptr inbounds float* %tmp24195, i64 1
+  %tmp24197 = getelementptr inbounds float* %tmp24196, i64 1
+  %tmp24198 = getelementptr inbounds float* %tmp24197, i64 1
+  %tmp24199 = getelementptr inbounds float* %tmp24198, i64 1
+  %tmp24200 = getelementptr inbounds float* %tmp24199, i64 1
+  %tmp24201 = getelementptr inbounds float* %tmp24200, i64 1
+  %tmp24202 = getelementptr inbounds float* %tmp24201, i64 1
+  %tmp24203 = getelementptr inbounds float* %tmp24202, i64 1
+  %tmp24204 = getelementptr inbounds float* %tmp24203, i64 1
+  %tmp24205 = getelementptr inbounds float* %tmp24204, i64 1
+  %tmp24206 = getelementptr inbounds float* %tmp24205, i64 1
+  %tmp24207 = getelementptr inbounds float* %tmp24206, i64 1
+  %tmp24208 = getelementptr inbounds float* %tmp24207, i64 1
+  %tmp24209 = getelementptr inbounds float* %tmp24208, i64 1
+  %tmp24210 = getelementptr inbounds float* %tmp24209, i64 1
+  %tmp24211 = getelementptr inbounds float* %tmp24210, i64 1
+  %tmp24212 = getelementptr inbounds float* %tmp24211, i64 1
+  %tmp24213 = getelementptr inbounds float* %tmp24212, i64 1
+  %tmp24214 = getelementptr inbounds float* %tmp24213, i64 1
+  %tmp24215 = getelementptr inbounds float* %tmp24214, i64 1
+  %tmp24216 = getelementptr inbounds float* %tmp24215, i64 1
+  %tmp24217 = getelementptr inbounds float* %tmp24216, i64 1
+  %tmp24218 = getelementptr inbounds float* %tmp24217, i64 1
+  %tmp24219 = getelementptr inbounds float* %tmp24218, i64 1
+  %tmp24220 = getelementptr inbounds float* %tmp24219, i64 1
+  %tmp24221 = getelementptr inbounds float* %tmp24220, i64 1
+  %tmp24222 = getelementptr inbounds float* %tmp24221, i64 1
+  %tmp24223 = getelementptr inbounds float* %tmp24222, i64 1
+  %tmp24224 = getelementptr inbounds float* %tmp24223, i64 1
+  %tmp24225 = getelementptr inbounds float* %tmp24224, i64 1
+  %tmp24226 = getelementptr inbounds float* %tmp24225, i64 1
+  %tmp24227 = getelementptr inbounds float* %tmp24226, i64 1
+  %tmp24228 = getelementptr inbounds float* %tmp24227, i64 1
+  %tmp24229 = getelementptr inbounds float* %tmp24228, i64 1
+  %tmp24230 = getelementptr inbounds float* %tmp24229, i64 1
+  %tmp24231 = getelementptr inbounds float* %tmp24230, i64 1
+  %tmp24232 = getelementptr inbounds float* %tmp24231, i64 1
+  %tmp24233 = getelementptr inbounds float* %tmp24232, i64 1
+  %tmp24234 = getelementptr inbounds float* %tmp24233, i64 1
+  %tmp24235 = getelementptr inbounds float* %tmp24234, i64 1
+  %tmp24236 = getelementptr inbounds float* %tmp24235, i64 1
+  %tmp24237 = getelementptr inbounds float* %tmp24236, i64 1
+  %tmp24238 = getelementptr inbounds float* %tmp24237, i64 1
+  %tmp24239 = getelementptr inbounds float* %tmp24238, i64 1
+  %tmp24240 = getelementptr inbounds float* %tmp24239, i64 1
+  %tmp24241 = getelementptr inbounds float* %tmp24240, i64 1
+  %tmp24242 = getelementptr inbounds float* %tmp24241, i64 1
+  %tmp24243 = getelementptr inbounds float* %tmp24242, i64 1
+  %tmp24244 = getelementptr inbounds float* %tmp24243, i64 1
+  %tmp24245 = getelementptr inbounds float* %tmp24244, i64 1
+  %tmp24246 = getelementptr inbounds float* %tmp24245, i64 1
+  %tmp24247 = getelementptr inbounds float* %tmp24246, i64 1
+  %tmp24248 = getelementptr inbounds float* %tmp24247, i64 1
+  %tmp24249 = getelementptr inbounds float* %tmp24248, i64 1
+  %tmp24250 = getelementptr inbounds float* %tmp24249, i64 1
+  %tmp24251 = getelementptr inbounds float* %tmp24250, i64 1
+  %tmp24252 = getelementptr inbounds float* %tmp24251, i64 1
+  %tmp24253 = getelementptr inbounds float* %tmp24252, i64 1
+  %tmp24254 = getelementptr inbounds float* %tmp24253, i64 1
+  %tmp24255 = getelementptr inbounds float* %tmp24254, i64 1
+  %tmp24256 = getelementptr inbounds float* %tmp24255, i64 1
+  %tmp24257 = getelementptr inbounds float* %tmp24256, i64 1
+  %tmp24258 = getelementptr inbounds float* %tmp24257, i64 1
+  %tmp24259 = getelementptr inbounds float* %tmp24258, i64 1
+  %tmp24260 = getelementptr inbounds float* %tmp24259, i64 1
+  %tmp24261 = getelementptr inbounds float* %tmp24260, i64 1
+  %tmp24262 = getelementptr inbounds float* %tmp24261, i64 1
+  %tmp24263 = getelementptr inbounds float* %tmp24262, i64 1
+  %tmp24264 = getelementptr inbounds float* %tmp24263, i64 1
+  %tmp24265 = getelementptr inbounds float* %tmp24264, i64 1
+  %tmp24266 = getelementptr inbounds float* %tmp24265, i64 1
+  %tmp24267 = getelementptr inbounds float* %tmp24266, i64 1
+  %tmp24268 = getelementptr inbounds float* %tmp24267, i64 1
+  %tmp24269 = getelementptr inbounds float* %tmp24268, i64 1
+  %tmp24270 = getelementptr inbounds float* %tmp24269, i64 1
+  %tmp24271 = getelementptr inbounds float* %tmp24270, i64 1
+  %tmp24272 = getelementptr inbounds float* %tmp24271, i64 1
+  %tmp24273 = getelementptr inbounds float* %tmp24272, i64 1
+  %tmp24274 = getelementptr inbounds float* %tmp24273, i64 1
+  %tmp24275 = getelementptr inbounds float* %tmp24274, i64 1
+  %tmp24276 = getelementptr inbounds float* %tmp24275, i64 1
+  %tmp24277 = getelementptr inbounds float* %tmp24276, i64 1
+  %tmp24278 = getelementptr inbounds float* %tmp24277, i64 1
+  %tmp24279 = getelementptr inbounds float* %tmp24278, i64 1
+  %tmp24280 = getelementptr inbounds float* %tmp24279, i64 1
+  %tmp24281 = getelementptr inbounds float* %tmp24280, i64 1
+  %tmp24282 = getelementptr inbounds float* %tmp24281, i64 1
+  %tmp24283 = getelementptr inbounds float* %tmp24282, i64 1
+  %tmp24284 = getelementptr inbounds float* %tmp24283, i64 1
+  %tmp24285 = getelementptr inbounds float* %tmp24284, i64 1
+  %tmp24286 = getelementptr inbounds float* %tmp24285, i64 1
+  %tmp24287 = getelementptr inbounds float* %tmp24286, i64 1
+  %tmp24288 = getelementptr inbounds float* %tmp24287, i64 1
+  %tmp24289 = getelementptr inbounds float* %tmp24288, i64 1
+  %tmp24290 = getelementptr inbounds float* %tmp24289, i64 1
+  %tmp24291 = getelementptr inbounds float* %tmp24290, i64 1
+  %tmp24292 = getelementptr inbounds float* %tmp24291, i64 1
+  %tmp24293 = getelementptr inbounds float* %tmp24292, i64 1
+  %tmp24294 = getelementptr inbounds float* %tmp24293, i64 1
+  %tmp24295 = getelementptr inbounds float* %tmp24294, i64 1
+  %tmp24296 = getelementptr inbounds float* %tmp24295, i64 1
+  %tmp24297 = getelementptr inbounds float* %tmp24296, i64 1
+  %tmp24298 = getelementptr inbounds float* %tmp24297, i64 1
+  %tmp24299 = getelementptr inbounds float* %tmp24298, i64 1
+  %tmp24300 = getelementptr inbounds float* %tmp24299, i64 1
+  %tmp24301 = getelementptr inbounds float* %tmp24300, i64 1
+  %tmp24302 = getelementptr inbounds float* %tmp24301, i64 1
+  %tmp24303 = getelementptr inbounds float* %tmp24302, i64 1
+  %tmp24304 = getelementptr inbounds float* %tmp24303, i64 1
+  %tmp24305 = getelementptr inbounds float* %tmp24304, i64 1
+  %tmp24306 = getelementptr inbounds float* %tmp24305, i64 1
+  %tmp24307 = getelementptr inbounds float* %tmp24306, i64 1
+  %tmp24308 = getelementptr inbounds float* %tmp24307, i64 1
+  %tmp24309 = getelementptr inbounds float* %tmp24308, i64 1
+  %tmp24310 = getelementptr inbounds float* %tmp24309, i64 1
+  %tmp24311 = getelementptr inbounds float* %tmp24310, i64 1
+  %tmp24312 = getelementptr inbounds float* %tmp24311, i64 1
+  %tmp24313 = getelementptr inbounds float* %tmp24312, i64 1
+  %tmp24314 = getelementptr inbounds float* %tmp24313, i64 1
+  %tmp24315 = getelementptr inbounds float* %tmp24314, i64 1
+  %tmp24316 = getelementptr inbounds float* %tmp24315, i64 1
+  %tmp24317 = getelementptr inbounds float* %tmp24316, i64 1
+  %tmp24318 = getelementptr inbounds float* %tmp24317, i64 1
+  %tmp24319 = getelementptr inbounds float* %tmp24318, i64 1
+  %tmp24320 = getelementptr inbounds float* %tmp24319, i64 1
+  %tmp24321 = getelementptr inbounds float* %tmp24320, i64 1
+  %tmp24322 = getelementptr inbounds float* %tmp24321, i64 1
+  %tmp24323 = getelementptr inbounds float* %tmp24322, i64 1
+  %tmp24324 = getelementptr inbounds float* %tmp24323, i64 1
+  %tmp24325 = getelementptr inbounds float* %tmp24324, i64 1
+  %tmp24326 = getelementptr inbounds float* %tmp24325, i64 1
+  %tmp24327 = getelementptr inbounds float* %tmp24326, i64 1
+  %tmp24328 = getelementptr inbounds float* %tmp24327, i64 1
+  %tmp24329 = getelementptr inbounds float* %tmp24328, i64 1
+  %tmp24330 = getelementptr inbounds float* %tmp24329, i64 1
+  %tmp24331 = getelementptr inbounds float* %tmp24330, i64 1
+  %tmp24332 = getelementptr inbounds float* %tmp24331, i64 1
+  %tmp24333 = getelementptr inbounds float* %tmp24332, i64 1
+  %tmp24334 = getelementptr inbounds float* %tmp24333, i64 1
+  %tmp24335 = getelementptr inbounds float* %tmp24334, i64 1
+  %tmp24336 = getelementptr inbounds float* %tmp24335, i64 1
+  %tmp24337 = getelementptr inbounds float* %tmp24336, i64 1
+  %tmp24338 = getelementptr inbounds float* %tmp24337, i64 1
+  %tmp24339 = getelementptr inbounds float* %tmp24338, i64 1
+  %tmp24340 = getelementptr inbounds float* %tmp24339, i64 1
+  %tmp24341 = getelementptr inbounds float* %tmp24340, i64 1
+  %tmp24342 = getelementptr inbounds float* %tmp24341, i64 1
+  %tmp24343 = getelementptr inbounds float* %tmp24342, i64 1
+  %tmp24344 = getelementptr inbounds float* %tmp24343, i64 1
+  %tmp24345 = getelementptr inbounds float* %tmp24344, i64 1
+  %tmp24346 = getelementptr inbounds float* %tmp24345, i64 1
+  %tmp24347 = getelementptr inbounds float* %tmp24346, i64 1
+  %tmp24348 = getelementptr inbounds float* %tmp24347, i64 1
+  %tmp24349 = getelementptr inbounds float* %tmp24348, i64 1
+  %tmp24350 = getelementptr inbounds float* %tmp24349, i64 1
+  %tmp24351 = getelementptr inbounds float* %tmp24350, i64 1
+  %tmp24352 = getelementptr inbounds float* %tmp24351, i64 1
+  %tmp24353 = getelementptr inbounds float* %tmp24352, i64 1
+  %tmp24354 = getelementptr inbounds float* %tmp24353, i64 1
+  %tmp24355 = getelementptr inbounds float* %tmp24354, i64 1
+  %tmp24356 = getelementptr inbounds float* %tmp24355, i64 1
+  %tmp24357 = getelementptr inbounds float* %tmp24356, i64 1
+  %tmp24358 = getelementptr inbounds float* %tmp24357, i64 1
+  %tmp24359 = getelementptr inbounds float* %tmp24358, i64 1
+  %tmp24360 = getelementptr inbounds float* %tmp24359, i64 1
+  %tmp24361 = getelementptr inbounds float* %tmp24360, i64 1
+  %tmp24362 = getelementptr inbounds float* %tmp24361, i64 1
+  %tmp24363 = getelementptr inbounds float* %tmp24362, i64 1
+  %tmp24364 = getelementptr inbounds float* %tmp24363, i64 1
+  %tmp24365 = getelementptr inbounds float* %tmp24364, i64 1
+  %tmp24366 = getelementptr inbounds float* %tmp24365, i64 1
+  %tmp24367 = getelementptr inbounds float* %tmp24366, i64 1
+  %tmp24368 = getelementptr inbounds float* %tmp24367, i64 1
+  %tmp24369 = getelementptr inbounds float* %tmp24368, i64 1
+  %tmp24370 = getelementptr inbounds float* %tmp24369, i64 1
+  %tmp24371 = getelementptr inbounds float* %tmp24370, i64 1
+  %tmp24372 = getelementptr inbounds float* %tmp24371, i64 1
+  %tmp24373 = getelementptr inbounds float* %tmp24372, i64 1
+  %tmp24374 = getelementptr inbounds float* %tmp24373, i64 1
+  %tmp24375 = getelementptr inbounds float* %tmp24374, i64 1
+  %tmp24376 = getelementptr inbounds float* %tmp24375, i64 1
+  %tmp24377 = getelementptr inbounds float* %tmp24376, i64 1
+  %tmp24378 = getelementptr inbounds float* %tmp24377, i64 1
+  %tmp24379 = getelementptr inbounds float* %tmp24378, i64 1
+  %tmp24380 = getelementptr inbounds float* %tmp24379, i64 1
+  %tmp24381 = getelementptr inbounds float* %tmp24380, i64 1
+  %tmp24382 = getelementptr inbounds float* %tmp24381, i64 1
+  %tmp24383 = getelementptr inbounds float* %tmp24382, i64 1
+  %tmp24384 = getelementptr inbounds float* %tmp24383, i64 1
+  %tmp24385 = getelementptr inbounds float* %tmp24384, i64 1
+  %tmp24386 = getelementptr inbounds float* %tmp24385, i64 1
+  %tmp24387 = getelementptr inbounds float* %tmp24386, i64 1
+  %tmp24388 = getelementptr inbounds float* %tmp24387, i64 1
+  %tmp24389 = getelementptr inbounds float* %tmp24388, i64 1
+  %tmp24390 = getelementptr inbounds float* %tmp24389, i64 1
+  %tmp24391 = getelementptr inbounds float* %tmp24390, i64 1
+  %tmp24392 = getelementptr inbounds float* %tmp24391, i64 1
+  %tmp24393 = getelementptr inbounds float* %tmp24392, i64 1
+  %tmp24394 = getelementptr inbounds float* %tmp24393, i64 1
+  %tmp24395 = getelementptr inbounds float* %tmp24394, i64 1
+  %tmp24396 = getelementptr inbounds float* %tmp24395, i64 1
+  %tmp24397 = getelementptr inbounds float* %tmp24396, i64 1
+  %tmp24398 = getelementptr inbounds float* %tmp24397, i64 1
+  %tmp24399 = getelementptr inbounds float* %tmp24398, i64 1
+  %tmp24400 = getelementptr inbounds float* %tmp24399, i64 1
+  %tmp24401 = getelementptr inbounds float* %tmp24400, i64 1
+  %tmp24402 = getelementptr inbounds float* %tmp24401, i64 1
+  %tmp24403 = getelementptr inbounds float* %tmp24402, i64 1
+  %tmp24404 = getelementptr inbounds float* %tmp24403, i64 1
+  %tmp24405 = getelementptr inbounds float* %tmp24404, i64 1
+  %tmp24406 = getelementptr inbounds float* %tmp24405, i64 1
+  %tmp24407 = getelementptr inbounds float* %tmp24406, i64 1
+  %tmp24408 = getelementptr inbounds float* %tmp24407, i64 1
+  %tmp24409 = getelementptr inbounds float* %tmp24408, i64 1
+  %tmp24410 = getelementptr inbounds float* %tmp24409, i64 1
+  %tmp24411 = getelementptr inbounds float* %tmp24410, i64 1
+  %tmp24412 = getelementptr inbounds float* %tmp24411, i64 1
+  %tmp24413 = getelementptr inbounds float* %tmp24412, i64 1
+  %tmp24414 = getelementptr inbounds float* %tmp24413, i64 1
+  %tmp24415 = getelementptr inbounds float* %tmp24414, i64 1
+  %tmp24416 = getelementptr inbounds float* %tmp24415, i64 1
+  %tmp24417 = getelementptr inbounds float* %tmp24416, i64 1
+  %tmp24418 = getelementptr inbounds float* %tmp24417, i64 1
+  %tmp24419 = getelementptr inbounds float* %tmp24418, i64 1
+  %tmp24420 = getelementptr inbounds float* %tmp24419, i64 1
+  %tmp24421 = getelementptr inbounds float* %tmp24420, i64 1
+  %tmp24422 = getelementptr inbounds float* %tmp24421, i64 1
+  %tmp24423 = getelementptr inbounds float* %tmp24422, i64 1
+  %tmp24424 = getelementptr inbounds float* %tmp24423, i64 1
+  %tmp24425 = getelementptr inbounds float* %tmp24424, i64 1
+  %tmp24426 = getelementptr inbounds float* %tmp24425, i64 1
+  %tmp24427 = getelementptr inbounds float* %tmp24426, i64 1
+  %tmp24428 = getelementptr inbounds float* %tmp24427, i64 1
+  %tmp24429 = getelementptr inbounds float* %tmp24428, i64 1
+  %tmp24430 = getelementptr inbounds float* %tmp24429, i64 1
+  %tmp24431 = getelementptr inbounds float* %tmp24430, i64 1
+  %tmp24432 = getelementptr inbounds float* %tmp24431, i64 1
+  %tmp24433 = getelementptr inbounds float* %tmp24432, i64 1
+  %tmp24434 = getelementptr inbounds float* %tmp24433, i64 1
+  %tmp24435 = getelementptr inbounds float* %tmp24434, i64 1
+  %tmp24436 = getelementptr inbounds float* %tmp24435, i64 1
+  %tmp24437 = getelementptr inbounds float* %tmp24436, i64 1
+  %tmp24438 = getelementptr inbounds float* %tmp24437, i64 1
+  %tmp24439 = getelementptr inbounds float* %tmp24438, i64 1
+  %tmp24440 = getelementptr inbounds float* %tmp24439, i64 1
+  %tmp24441 = getelementptr inbounds float* %tmp24440, i64 1
+  %tmp24442 = getelementptr inbounds float* %tmp24441, i64 1
+  %tmp24443 = getelementptr inbounds float* %tmp24442, i64 1
+  %tmp24444 = getelementptr inbounds float* %tmp24443, i64 1
+  %tmp24445 = getelementptr inbounds float* %tmp24444, i64 1
+  %tmp24446 = getelementptr inbounds float* %tmp24445, i64 1
+  %tmp24447 = getelementptr inbounds float* %tmp24446, i64 1
+  %tmp24448 = getelementptr inbounds float* %tmp24447, i64 1
+  %tmp24449 = getelementptr inbounds float* %tmp24448, i64 1
+  %tmp24450 = getelementptr inbounds float* %tmp24449, i64 1
+  %tmp24451 = getelementptr inbounds float* %tmp24450, i64 1
+  %tmp24452 = getelementptr inbounds float* %tmp24451, i64 1
+  %tmp24453 = getelementptr inbounds float* %tmp24452, i64 1
+  %tmp24454 = getelementptr inbounds float* %tmp24453, i64 1
+  %tmp24455 = getelementptr inbounds float* %tmp24454, i64 1
+  %tmp24456 = getelementptr inbounds float* %tmp24455, i64 1
+  %tmp24457 = getelementptr inbounds float* %tmp24456, i64 1
+  %tmp24458 = getelementptr inbounds float* %tmp24457, i64 1
+  %tmp24459 = getelementptr inbounds float* %tmp24458, i64 1
+  %tmp24460 = getelementptr inbounds float* %tmp24459, i64 1
+  %tmp24461 = getelementptr inbounds float* %tmp24460, i64 1
+  %tmp24462 = getelementptr inbounds float* %tmp24461, i64 1
+  %tmp24463 = getelementptr inbounds float* %tmp24462, i64 1
+  %tmp24464 = getelementptr inbounds float* %tmp24463, i64 1
+  %tmp24465 = getelementptr inbounds float* %tmp24464, i64 1
+  %tmp24466 = getelementptr inbounds float* %tmp24465, i64 1
+  %tmp24467 = getelementptr inbounds float* %tmp24466, i64 1
+  %tmp24468 = getelementptr inbounds float* %tmp24467, i64 1
+  %tmp24469 = getelementptr inbounds float* %tmp24468, i64 1
+  %tmp24470 = getelementptr inbounds float* %tmp24469, i64 1
+  %tmp24471 = getelementptr inbounds float* %tmp24470, i64 1
+  %tmp24472 = getelementptr inbounds float* %tmp24471, i64 1
+  %tmp24473 = getelementptr inbounds float* %tmp24472, i64 1
+  %tmp24474 = getelementptr inbounds float* %tmp24473, i64 1
+  %tmp24475 = getelementptr inbounds float* %tmp24474, i64 1
+  %tmp24476 = getelementptr inbounds float* %tmp24475, i64 1
+  %tmp24477 = getelementptr inbounds float* %tmp24476, i64 1
+  %tmp24478 = getelementptr inbounds float* %tmp24477, i64 1
+  %tmp24479 = getelementptr inbounds float* %tmp24478, i64 1
+  %tmp24480 = getelementptr inbounds float* %tmp24479, i64 1
+  %tmp24481 = getelementptr inbounds float* %tmp24480, i64 1
+  %tmp24482 = getelementptr inbounds float* %tmp24481, i64 1
+  %tmp24483 = getelementptr inbounds float* %tmp24482, i64 1
+  %tmp24484 = getelementptr inbounds float* %tmp24483, i64 1
+  %tmp24485 = getelementptr inbounds float* %tmp24484, i64 1
+  %tmp24486 = getelementptr inbounds float* %tmp24485, i64 1
+  %tmp24487 = getelementptr inbounds float* %tmp24486, i64 1
+  %tmp24488 = getelementptr inbounds float* %tmp24487, i64 1
+  %tmp24489 = getelementptr inbounds float* %tmp24488, i64 1
+  %tmp24490 = getelementptr inbounds float* %tmp24489, i64 1
+  %tmp24491 = getelementptr inbounds float* %tmp24490, i64 1
+  %tmp24492 = getelementptr inbounds float* %tmp24491, i64 1
+  %tmp24493 = getelementptr inbounds float* %tmp24492, i64 1
+  %tmp24494 = getelementptr inbounds float* %tmp24493, i64 1
+  %tmp24495 = getelementptr inbounds float* %tmp24494, i64 1
+  %tmp24496 = getelementptr inbounds float* %tmp24495, i64 1
+  %tmp24497 = getelementptr inbounds float* %tmp24496, i64 1
+  %tmp24498 = getelementptr inbounds float* %tmp24497, i64 1
+  %tmp24499 = getelementptr inbounds float* %tmp24498, i64 1
+  %tmp24500 = getelementptr inbounds float* %tmp24499, i64 1
+  %tmp24501 = getelementptr inbounds float* %tmp24500, i64 1
+  %tmp24502 = getelementptr inbounds float* %tmp24501, i64 1
+  %tmp24503 = getelementptr inbounds float* %tmp24502, i64 1
+  %tmp24504 = getelementptr inbounds float* %tmp24503, i64 1
+  %tmp24505 = getelementptr inbounds float* %tmp24504, i64 1
+  %tmp24506 = getelementptr inbounds float* %tmp24505, i64 1
+  %tmp24507 = getelementptr inbounds float* %tmp24506, i64 1
+  %tmp24508 = getelementptr inbounds float* %tmp24507, i64 1
+  %tmp24509 = getelementptr inbounds float* %tmp24508, i64 1
+  %tmp24510 = getelementptr inbounds float* %tmp24509, i64 1
+  %tmp24511 = getelementptr inbounds float* %tmp24510, i64 1
+  %tmp24512 = getelementptr inbounds float* %tmp24511, i64 1
+  %tmp24513 = getelementptr inbounds float* %tmp24512, i64 1
+  %tmp24514 = getelementptr inbounds float* %tmp24513, i64 1
+  %tmp24515 = getelementptr inbounds float* %tmp24514, i64 1
+  %tmp24516 = getelementptr inbounds float* %tmp24515, i64 1
+  %tmp24517 = getelementptr inbounds float* %tmp24516, i64 1
+  %tmp24518 = getelementptr inbounds float* %tmp24517, i64 1
+  %tmp24519 = getelementptr inbounds float* %tmp24518, i64 1
+  %tmp24520 = getelementptr inbounds float* %tmp24519, i64 1
+  %tmp24521 = getelementptr inbounds float* %tmp24520, i64 1
+  %tmp24522 = getelementptr inbounds float* %tmp24521, i64 1
+  %tmp24523 = getelementptr inbounds float* %tmp24522, i64 1
+  %tmp24524 = getelementptr inbounds float* %tmp24523, i64 1
+  %tmp24525 = getelementptr inbounds float* %tmp24524, i64 1
+  %tmp24526 = getelementptr inbounds float* %tmp24525, i64 1
+  %tmp24527 = getelementptr inbounds float* %tmp24526, i64 1
+  %tmp24528 = getelementptr inbounds float* %tmp24527, i64 1
+  %tmp24529 = getelementptr inbounds float* %tmp24528, i64 1
+  %tmp24530 = getelementptr inbounds float* %tmp24529, i64 1
+  %tmp24531 = getelementptr inbounds float* %tmp24530, i64 1
+  %tmp24532 = getelementptr inbounds float* %tmp24531, i64 1
+  %tmp24533 = getelementptr inbounds float* %tmp24532, i64 1
+  %tmp24534 = getelementptr inbounds float* %tmp24533, i64 1
+  %tmp24535 = getelementptr inbounds float* %tmp24534, i64 1
+  %tmp24536 = getelementptr inbounds float* %tmp24535, i64 1
+  %tmp24537 = getelementptr inbounds float* %tmp24536, i64 1
+  %tmp24538 = getelementptr inbounds float* %tmp24537, i64 1
+  %tmp24539 = getelementptr inbounds float* %tmp24538, i64 1
+  %tmp24540 = getelementptr inbounds float* %tmp24539, i64 1
+  %tmp24541 = getelementptr inbounds float* %tmp24540, i64 1
+  %tmp24542 = getelementptr inbounds float* %tmp24541, i64 1
+  %tmp24543 = getelementptr inbounds float* %tmp24542, i64 1
+  %tmp24544 = getelementptr inbounds float* %tmp24543, i64 1
+  %tmp24545 = getelementptr inbounds float* %tmp24544, i64 1
+  %tmp24546 = getelementptr inbounds float* %tmp24545, i64 1
+  %tmp24547 = getelementptr inbounds float* %tmp24546, i64 1
+  %tmp24548 = getelementptr inbounds float* %tmp24547, i64 1
+  %tmp24549 = getelementptr inbounds float* %tmp24548, i64 1
+  %tmp24550 = getelementptr inbounds float* %tmp24549, i64 1
+  %tmp24551 = getelementptr inbounds float* %tmp24550, i64 1
+  %tmp24552 = getelementptr inbounds float* %tmp24551, i64 1
+  %tmp24553 = getelementptr inbounds float* %tmp24552, i64 1
+  %tmp24554 = getelementptr inbounds float* %tmp24553, i64 1
+  %tmp24555 = getelementptr inbounds float* %tmp24554, i64 1
+  %tmp24556 = getelementptr inbounds float* %tmp24555, i64 1
+  %tmp24557 = getelementptr inbounds float* %tmp24556, i64 1
+  %tmp24558 = getelementptr inbounds float* %tmp24557, i64 1
+  %tmp24559 = getelementptr inbounds float* %tmp24558, i64 1
+  %tmp24560 = getelementptr inbounds float* %tmp24559, i64 1
+  %tmp24561 = getelementptr inbounds float* %tmp24560, i64 1
+  %tmp24562 = getelementptr inbounds float* %tmp24561, i64 1
+  %tmp24563 = getelementptr inbounds float* %tmp24562, i64 1
+  %tmp24564 = getelementptr inbounds float* %tmp24563, i64 1
+  %tmp24565 = getelementptr inbounds float* %tmp24564, i64 1
+  %tmp24566 = getelementptr inbounds float* %tmp24565, i64 1
+  %tmp24567 = getelementptr inbounds float* %tmp24566, i64 1
+  %tmp24568 = getelementptr inbounds float* %tmp24567, i64 1
+  %tmp24569 = getelementptr inbounds float* %tmp24568, i64 1
+  %tmp24570 = getelementptr inbounds float* %tmp24569, i64 1
+  %tmp24571 = getelementptr inbounds float* %tmp24570, i64 1
+  %tmp24572 = getelementptr inbounds float* %tmp24571, i64 1
+  %tmp24573 = getelementptr inbounds float* %tmp24572, i64 1
+  %tmp24574 = getelementptr inbounds float* %tmp24573, i64 1
+  %tmp24575 = getelementptr inbounds float* %tmp24574, i64 1
+  %tmp24576 = getelementptr inbounds float* %tmp24575, i64 1
+  %tmp24577 = getelementptr inbounds float* %tmp24576, i64 1
+  %tmp24578 = getelementptr inbounds float* %tmp24577, i64 1
+  %tmp24579 = getelementptr inbounds float* %tmp24578, i64 1
+  %tmp24580 = getelementptr inbounds float* %tmp24579, i64 1
+  %tmp24581 = getelementptr inbounds float* %tmp24580, i64 1
+  %tmp24582 = getelementptr inbounds float* %tmp24581, i64 1
+  %tmp24583 = getelementptr inbounds float* %tmp24582, i64 1
+  %tmp24584 = getelementptr inbounds float* %tmp24583, i64 1
+  %tmp24585 = getelementptr inbounds float* %tmp24584, i64 1
+  %tmp24586 = getelementptr inbounds float* %tmp24585, i64 1
+  %tmp24587 = getelementptr inbounds float* %tmp24586, i64 1
+  %tmp24588 = getelementptr inbounds float* %tmp24587, i64 1
+  %tmp24589 = getelementptr inbounds float* %tmp24588, i64 1
+  %tmp24590 = getelementptr inbounds float* %tmp24589, i64 1
+  %tmp24591 = getelementptr inbounds float* %tmp24590, i64 1
+  %tmp24592 = getelementptr inbounds float* %tmp24591, i64 1
+  %tmp24593 = getelementptr inbounds float* %tmp24592, i64 1
+  %tmp24594 = getelementptr inbounds float* %tmp24593, i64 1
+  %tmp24595 = getelementptr inbounds float* %tmp24594, i64 1
+  %tmp24596 = getelementptr inbounds float* %tmp24595, i64 1
+  %tmp24597 = getelementptr inbounds float* %tmp24596, i64 1
+  %tmp24598 = getelementptr inbounds float* %tmp24597, i64 1
+  %tmp24599 = getelementptr inbounds float* %tmp24598, i64 1
+  %tmp24600 = getelementptr inbounds float* %tmp24599, i64 1
+  %tmp24601 = getelementptr inbounds float* %tmp24600, i64 1
+  %tmp24602 = getelementptr inbounds float* %tmp24601, i64 1
+  %tmp24603 = getelementptr inbounds float* %tmp24602, i64 1
+  %tmp24604 = getelementptr inbounds float* %tmp24603, i64 1
+  %tmp24605 = getelementptr inbounds float* %tmp24604, i64 1
+  %tmp24606 = getelementptr inbounds float* %tmp24605, i64 1
+  %tmp24607 = getelementptr inbounds float* %tmp24606, i64 1
+  %tmp24608 = getelementptr inbounds float* %tmp24607, i64 1
+  %tmp24609 = getelementptr inbounds float* %tmp24608, i64 1
+  %tmp24610 = getelementptr inbounds float* %tmp24609, i64 1
+  %tmp24611 = getelementptr inbounds float* %tmp24610, i64 1
+  %tmp24612 = getelementptr inbounds float* %tmp24611, i64 1
+  %tmp24613 = getelementptr inbounds float* %tmp24612, i64 1
+  %tmp24614 = getelementptr inbounds float* %tmp24613, i64 1
+  %tmp24615 = getelementptr inbounds float* %tmp24614, i64 1
+  %tmp24616 = getelementptr inbounds float* %tmp24615, i64 1
+  %tmp24617 = getelementptr inbounds float* %tmp24616, i64 1
+  %tmp24618 = getelementptr inbounds float* %tmp24617, i64 1
+  %tmp24619 = getelementptr inbounds float* %tmp24618, i64 1
+  %tmp24620 = getelementptr inbounds float* %tmp24619, i64 1
+  %tmp24621 = getelementptr inbounds float* %tmp24620, i64 1
+  %tmp24622 = getelementptr inbounds float* %tmp24621, i64 1
+  %tmp24623 = getelementptr inbounds float* %tmp24622, i64 1
+  %tmp24624 = getelementptr inbounds float* %tmp24623, i64 1
+  %tmp24625 = getelementptr inbounds float* %tmp24624, i64 1
+  %tmp24626 = getelementptr inbounds float* %tmp24625, i64 1
+  %tmp24627 = getelementptr inbounds float* %tmp24626, i64 1
+  %tmp24628 = getelementptr inbounds float* %tmp24627, i64 1
+  %tmp24629 = getelementptr inbounds float* %tmp24628, i64 1
+  %tmp24630 = getelementptr inbounds float* %tmp24629, i64 1
+  %tmp24631 = getelementptr inbounds float* %tmp24630, i64 1
+  %tmp24632 = getelementptr inbounds float* %tmp24631, i64 1
+  %tmp24633 = getelementptr inbounds float* %tmp24632, i64 1
+  %tmp24634 = getelementptr inbounds float* %tmp24633, i64 1
+  %tmp24635 = getelementptr inbounds float* %tmp24634, i64 1
+  %tmp24636 = getelementptr inbounds float* %tmp24635, i64 1
+  %tmp24637 = getelementptr inbounds float* %tmp24636, i64 1
+  %tmp24638 = getelementptr inbounds float* %tmp24637, i64 1
+  %tmp24639 = getelementptr inbounds float* %tmp24638, i64 1
+  %tmp24640 = getelementptr inbounds float* %tmp24639, i64 1
+  %tmp24641 = getelementptr inbounds float* %tmp24640, i64 1
+  %tmp24642 = getelementptr inbounds float* %tmp24641, i64 1
+  %tmp24643 = getelementptr inbounds float* %tmp24642, i64 1
+  %tmp24644 = getelementptr inbounds float* %tmp24643, i64 1
+  %tmp24645 = getelementptr inbounds float* %tmp24644, i64 1
+  %tmp24646 = getelementptr inbounds float* %tmp24645, i64 1
+  %tmp24647 = getelementptr inbounds float* %tmp24646, i64 1
+  %tmp24648 = getelementptr inbounds float* %tmp24647, i64 1
+  %tmp24649 = getelementptr inbounds float* %tmp24648, i64 1
+  %tmp24650 = getelementptr inbounds float* %tmp24649, i64 1
+  %tmp24651 = getelementptr inbounds float* %tmp24650, i64 1
+  %tmp24652 = getelementptr inbounds float* %tmp24651, i64 1
+  %tmp24653 = getelementptr inbounds float* %tmp24652, i64 1
+  %tmp24654 = getelementptr inbounds float* %tmp24653, i64 1
+  %tmp24655 = getelementptr inbounds float* %tmp24654, i64 1
+  %tmp24656 = getelementptr inbounds float* %tmp24655, i64 1
+  %tmp24657 = getelementptr inbounds float* %tmp24656, i64 1
+  %tmp24658 = getelementptr inbounds float* %tmp24657, i64 1
+  %tmp24659 = getelementptr inbounds float* %tmp24658, i64 1
+  %tmp24660 = getelementptr inbounds float* %tmp24659, i64 1
+  %tmp24661 = getelementptr inbounds float* %tmp24660, i64 1
+  %tmp24662 = getelementptr inbounds float* %tmp24661, i64 1
+  %tmp24663 = getelementptr inbounds float* %tmp24662, i64 1
+  %tmp24664 = getelementptr inbounds float* %tmp24663, i64 1
+  %tmp24665 = getelementptr inbounds float* %tmp24664, i64 1
+  %tmp24666 = getelementptr inbounds float* %tmp24665, i64 1
+  %tmp24667 = getelementptr inbounds float* %tmp24666, i64 1
+  %tmp24668 = getelementptr inbounds float* %tmp24667, i64 1
+  %tmp24669 = getelementptr inbounds float* %tmp24668, i64 1
+  %tmp24670 = getelementptr inbounds float* %tmp24669, i64 1
+  %tmp24671 = getelementptr inbounds float* %tmp24670, i64 1
+  %tmp24672 = getelementptr inbounds float* %tmp24671, i64 1
+  %tmp24673 = getelementptr inbounds float* %tmp24672, i64 1
+  %tmp24674 = getelementptr inbounds float* %tmp24673, i64 1
+  %tmp24675 = getelementptr inbounds float* %tmp24674, i64 1
+  %tmp24676 = getelementptr inbounds float* %tmp24675, i64 1
+  %tmp24677 = getelementptr inbounds float* %tmp24676, i64 1
+  %tmp24678 = getelementptr inbounds float* %tmp24677, i64 1
+  %tmp24679 = getelementptr inbounds float* %tmp24678, i64 1
+  %tmp24680 = getelementptr inbounds float* %tmp24679, i64 1
+  %tmp24681 = getelementptr inbounds float* %tmp24680, i64 1
+  %tmp24682 = getelementptr inbounds float* %tmp24681, i64 1
+  %tmp24683 = getelementptr inbounds float* %tmp24682, i64 1
+  %tmp24684 = getelementptr inbounds float* %tmp24683, i64 1
+  %tmp24685 = getelementptr inbounds float* %tmp24684, i64 1
+  %tmp24686 = getelementptr inbounds float* %tmp24685, i64 1
+  %tmp24687 = getelementptr inbounds float* %tmp24686, i64 1
+  %tmp24688 = getelementptr inbounds float* %tmp24687, i64 1
+  %tmp24689 = getelementptr inbounds float* %tmp24688, i64 1
+  %tmp24690 = getelementptr inbounds float* %tmp24689, i64 1
+  %tmp24691 = getelementptr inbounds float* %tmp24690, i64 1
+  %tmp24692 = getelementptr inbounds float* %tmp24691, i64 1
+  %tmp24693 = getelementptr inbounds float* %tmp24692, i64 1
+  %tmp24694 = getelementptr inbounds float* %tmp24693, i64 1
+  %tmp24695 = getelementptr inbounds float* %tmp24694, i64 1
+  %tmp24696 = getelementptr inbounds float* %tmp24695, i64 1
+  %tmp24697 = getelementptr inbounds float* %tmp24696, i64 1
+  %tmp24698 = getelementptr inbounds float* %tmp24697, i64 1
+  %tmp24699 = getelementptr inbounds float* %tmp24698, i64 1
+  %tmp24700 = getelementptr inbounds float* %tmp24699, i64 1
+  %tmp24701 = getelementptr inbounds float* %tmp24700, i64 1
+  %tmp24702 = getelementptr inbounds float* %tmp24701, i64 1
+  %tmp24703 = getelementptr inbounds float* %tmp24702, i64 1
+  %tmp24704 = getelementptr inbounds float* %tmp24703, i64 1
+  %tmp24705 = getelementptr inbounds float* %tmp24704, i64 1
+  %tmp24706 = getelementptr inbounds float* %tmp24705, i64 1
+  %tmp24707 = getelementptr inbounds float* %tmp24706, i64 1
+  %tmp24708 = getelementptr inbounds float* %tmp24707, i64 1
+  %tmp24709 = getelementptr inbounds float* %tmp24708, i64 1
+  %tmp24710 = getelementptr inbounds float* %tmp24709, i64 1
+  %tmp24711 = getelementptr inbounds float* %tmp24710, i64 1
+  %tmp24712 = getelementptr inbounds float* %tmp24711, i64 1
+  %tmp24713 = getelementptr inbounds float* %tmp24712, i64 1
+  %tmp24714 = getelementptr inbounds float* %tmp24713, i64 1
+  %tmp24715 = getelementptr inbounds float* %tmp24714, i64 1
+  %tmp24716 = getelementptr inbounds float* %tmp24715, i64 1
+  %tmp24717 = getelementptr inbounds float* %tmp24716, i64 1
+  %tmp24718 = getelementptr inbounds float* %tmp24717, i64 1
+  %tmp24719 = getelementptr inbounds float* %tmp24718, i64 1
+  %tmp24720 = getelementptr inbounds float* %tmp24719, i64 1
+  %tmp24721 = getelementptr inbounds float* %tmp24720, i64 1
+  %tmp24722 = getelementptr inbounds float* %tmp24721, i64 1
+  %tmp24723 = getelementptr inbounds float* %tmp24722, i64 1
+  %tmp24724 = getelementptr inbounds float* %tmp24723, i64 1
+  %tmp24725 = getelementptr inbounds float* %tmp24724, i64 1
+  %tmp24726 = getelementptr inbounds float* %tmp24725, i64 1
+  %tmp24727 = getelementptr inbounds float* %tmp24726, i64 1
+  %tmp24728 = getelementptr inbounds float* %tmp24727, i64 1
+  %tmp24729 = getelementptr inbounds float* %tmp24728, i64 1
+  %tmp24730 = getelementptr inbounds float* %tmp24729, i64 1
+  %tmp24731 = getelementptr inbounds float* %tmp24730, i64 1
+  %tmp24732 = getelementptr inbounds float* %tmp24731, i64 1
+  %tmp24733 = getelementptr inbounds float* %tmp24732, i64 1
+  %tmp24734 = getelementptr inbounds float* %tmp24733, i64 1
+  %tmp24735 = getelementptr inbounds float* %tmp24734, i64 1
+  %tmp24736 = getelementptr inbounds float* %tmp24735, i64 1
+  %tmp24737 = getelementptr inbounds float* %tmp24736, i64 1
+  %tmp24738 = getelementptr inbounds float* %tmp24737, i64 1
+  %tmp24739 = getelementptr inbounds float* %tmp24738, i64 1
+  %tmp24740 = getelementptr inbounds float* %tmp24739, i64 1
+  %tmp24741 = getelementptr inbounds float* %tmp24740, i64 1
+  %tmp24742 = getelementptr inbounds float* %tmp24741, i64 1
+  %tmp24743 = getelementptr inbounds float* %tmp24742, i64 1
+  %tmp24744 = getelementptr inbounds float* %tmp24743, i64 1
+  %tmp24745 = getelementptr inbounds float* %tmp24744, i64 1
+  %tmp24746 = getelementptr inbounds float* %tmp24745, i64 1
+  %tmp24747 = getelementptr inbounds float* %tmp24746, i64 1
+  %tmp24748 = getelementptr inbounds float* %tmp24747, i64 1
+  %tmp24749 = getelementptr inbounds float* %tmp24748, i64 1
+  %tmp24750 = getelementptr inbounds float* %tmp24749, i64 1
+  %tmp24751 = getelementptr inbounds float* %tmp24750, i64 1
+  %tmp24752 = getelementptr inbounds float* %tmp24751, i64 1
+  %tmp24753 = getelementptr inbounds float* %tmp24752, i64 1
+  %tmp24754 = getelementptr inbounds float* %tmp24753, i64 1
+  %tmp24755 = getelementptr inbounds float* %tmp24754, i64 1
+  %tmp24756 = getelementptr inbounds float* %tmp24755, i64 1
+  %tmp24757 = getelementptr inbounds float* %tmp24756, i64 1
+  %tmp24758 = getelementptr inbounds float* %tmp24757, i64 1
+  %tmp24759 = getelementptr inbounds float* %tmp24758, i64 1
+  %tmp24760 = getelementptr inbounds float* %tmp24759, i64 1
+  %tmp24761 = getelementptr inbounds float* %tmp24760, i64 1
+  %tmp24762 = getelementptr inbounds float* %tmp24761, i64 1
+  %tmp24763 = getelementptr inbounds float* %tmp24762, i64 1
+  %tmp24764 = getelementptr inbounds float* %tmp24763, i64 1
+  %tmp24765 = getelementptr inbounds float* %tmp24764, i64 1
+  %tmp24766 = getelementptr inbounds float* %tmp24765, i64 1
+  %tmp24767 = getelementptr inbounds float* %tmp24766, i64 1
+  %tmp24768 = getelementptr inbounds float* %tmp24767, i64 1
+  %tmp24769 = getelementptr inbounds float* %tmp24768, i64 1
+  %tmp24770 = getelementptr inbounds float* %tmp24769, i64 1
+  %tmp24771 = getelementptr inbounds float* %tmp24770, i64 1
+  %tmp24772 = getelementptr inbounds float* %tmp24771, i64 1
+  %tmp24773 = getelementptr inbounds float* %tmp24772, i64 1
+  %tmp24774 = getelementptr inbounds float* %tmp24773, i64 1
+  %tmp24775 = getelementptr inbounds float* %tmp24774, i64 1
+  %tmp24776 = getelementptr inbounds float* %tmp24775, i64 1
+  %tmp24777 = getelementptr inbounds float* %tmp24776, i64 1
+  %tmp24778 = getelementptr inbounds float* %tmp24777, i64 1
+  %tmp24779 = getelementptr inbounds float* %tmp24778, i64 1
+  %tmp24780 = getelementptr inbounds float* %tmp24779, i64 1
+  %tmp24781 = getelementptr inbounds float* %tmp24780, i64 1
+  %tmp24782 = getelementptr inbounds float* %tmp24781, i64 1
+  %tmp24783 = getelementptr inbounds float* %tmp24782, i64 1
+  %tmp24784 = getelementptr inbounds float* %tmp24783, i64 1
+  %tmp24785 = getelementptr inbounds float* %tmp24784, i64 1
+  %tmp24786 = getelementptr inbounds float* %tmp24785, i64 1
+  %tmp24787 = getelementptr inbounds float* %tmp24786, i64 1
+  %tmp24788 = getelementptr inbounds float* %tmp24787, i64 1
+  %tmp24789 = getelementptr inbounds float* %tmp24788, i64 1
+  %tmp24790 = getelementptr inbounds float* %tmp24789, i64 1
+  %tmp24791 = getelementptr inbounds float* %tmp24790, i64 1
+  %tmp24792 = getelementptr inbounds float* %tmp24791, i64 1
+  %tmp24793 = getelementptr inbounds float* %tmp24792, i64 1
+  %tmp24794 = getelementptr inbounds float* %tmp24793, i64 1
+  %tmp24795 = getelementptr inbounds float* %tmp24794, i64 1
+  %tmp24796 = getelementptr inbounds float* %tmp24795, i64 1
+  %tmp24797 = getelementptr inbounds float* %tmp24796, i64 1
+  %tmp24798 = getelementptr inbounds float* %tmp24797, i64 1
+  %tmp24799 = getelementptr inbounds float* %tmp24798, i64 1
+  %tmp24800 = getelementptr inbounds float* %tmp24799, i64 1
+  %tmp24801 = getelementptr inbounds float* %tmp24800, i64 1
+  %tmp24802 = getelementptr inbounds float* %tmp24801, i64 1
+  %tmp24803 = getelementptr inbounds float* %tmp24802, i64 1
+  %tmp24804 = getelementptr inbounds float* %tmp24803, i64 1
+  %tmp24805 = getelementptr inbounds float* %tmp24804, i64 1
+  %tmp24806 = getelementptr inbounds float* %tmp24805, i64 1
+  %tmp24807 = getelementptr inbounds float* %tmp24806, i64 1
+  %tmp24808 = getelementptr inbounds float* %tmp24807, i64 1
+  %tmp24809 = getelementptr inbounds float* %tmp24808, i64 1
+  %tmp24810 = getelementptr inbounds float* %tmp24809, i64 1
+  %tmp24811 = getelementptr inbounds float* %tmp24810, i64 1
+  %tmp24812 = getelementptr inbounds float* %tmp24811, i64 1
+  %tmp24813 = getelementptr inbounds float* %tmp24812, i64 1
+  %tmp24814 = getelementptr inbounds float* %tmp24813, i64 1
+  %tmp24815 = getelementptr inbounds float* %tmp24814, i64 1
+  %tmp24816 = getelementptr inbounds float* %tmp24815, i64 1
+  %tmp24817 = getelementptr inbounds float* %tmp24816, i64 1
+  %tmp24818 = getelementptr inbounds float* %tmp24817, i64 1
+  %tmp24819 = getelementptr inbounds float* %tmp24818, i64 1
+  %tmp24820 = getelementptr inbounds float* %tmp24819, i64 1
+  %tmp24821 = getelementptr inbounds float* %tmp24820, i64 1
+  %tmp24822 = getelementptr inbounds float* %tmp24821, i64 1
+  %tmp24823 = getelementptr inbounds float* %tmp24822, i64 1
+  %tmp24824 = getelementptr inbounds float* %tmp24823, i64 1
+  %tmp24825 = getelementptr inbounds float* %tmp24824, i64 1
+  %tmp24826 = getelementptr inbounds float* %tmp24825, i64 1
+  %tmp24827 = getelementptr inbounds float* %tmp24826, i64 1
+  %tmp24828 = getelementptr inbounds float* %tmp24827, i64 1
+  %tmp24829 = getelementptr inbounds float* %tmp24828, i64 1
+  %tmp24830 = getelementptr inbounds float* %tmp24829, i64 1
+  %tmp24831 = getelementptr inbounds float* %tmp24830, i64 1
+  %tmp24832 = getelementptr inbounds float* %tmp24831, i64 1
+  %tmp24833 = getelementptr inbounds float* %tmp24832, i64 1
+  %tmp24834 = getelementptr inbounds float* %tmp24833, i64 1
+  %tmp24835 = getelementptr inbounds float* %tmp24834, i64 1
+  %tmp24836 = getelementptr inbounds float* %tmp24835, i64 1
+  %tmp24837 = getelementptr inbounds float* %tmp24836, i64 1
+  %tmp24838 = getelementptr inbounds float* %tmp24837, i64 1
+  %tmp24839 = getelementptr inbounds float* %tmp24838, i64 1
+  %tmp24840 = getelementptr inbounds float* %tmp24839, i64 1
+  %tmp24841 = getelementptr inbounds float* %tmp24840, i64 1
+  %tmp24842 = getelementptr inbounds float* %tmp24841, i64 1
+  %tmp24843 = getelementptr inbounds float* %tmp24842, i64 1
+  %tmp24844 = getelementptr inbounds float* %tmp24843, i64 1
+  %tmp24845 = getelementptr inbounds float* %tmp24844, i64 1
+  %tmp24846 = getelementptr inbounds float* %tmp24845, i64 1
+  %tmp24847 = getelementptr inbounds float* %tmp24846, i64 1
+  %tmp24848 = getelementptr inbounds float* %tmp24847, i64 1
+  %tmp24849 = getelementptr inbounds float* %tmp24848, i64 1
+  %tmp24850 = getelementptr inbounds float* %tmp24849, i64 1
+  %tmp24851 = getelementptr inbounds float* %tmp24850, i64 1
+  %tmp24852 = getelementptr inbounds float* %tmp24851, i64 1
+  %tmp24853 = getelementptr inbounds float* %tmp24852, i64 1
+  %tmp24854 = getelementptr inbounds float* %tmp24853, i64 1
+  %tmp24855 = getelementptr inbounds float* %tmp24854, i64 1
+  %tmp24856 = getelementptr inbounds float* %tmp24855, i64 1
+  %tmp24857 = getelementptr inbounds float* %tmp24856, i64 1
+  %tmp24858 = getelementptr inbounds float* %tmp24857, i64 1
+  %tmp24859 = getelementptr inbounds float* %tmp24858, i64 1
+  %tmp24860 = getelementptr inbounds float* %tmp24859, i64 1
+  %tmp24861 = getelementptr inbounds float* %tmp24860, i64 1
+  %tmp24862 = getelementptr inbounds float* %tmp24861, i64 1
+  %tmp24863 = getelementptr inbounds float* %tmp24862, i64 1
+  %tmp24864 = getelementptr inbounds float* %tmp24863, i64 1
+  %tmp24865 = getelementptr inbounds float* %tmp24864, i64 1
+  %tmp24866 = getelementptr inbounds float* %tmp24865, i64 1
+  %tmp24867 = getelementptr inbounds float* %tmp24866, i64 1
+  %tmp24868 = getelementptr inbounds float* %tmp24867, i64 1
+  %tmp24869 = getelementptr inbounds float* %tmp24868, i64 1
+  %tmp24870 = getelementptr inbounds float* %tmp24869, i64 1
+  %tmp24871 = getelementptr inbounds float* %tmp24870, i64 1
+  %tmp24872 = getelementptr inbounds float* %tmp24871, i64 1
+  %tmp24873 = getelementptr inbounds float* %tmp24872, i64 1
+  %tmp24874 = getelementptr inbounds float* %tmp24873, i64 1
+  %tmp24875 = getelementptr inbounds float* %tmp24874, i64 1
+  %tmp24876 = getelementptr inbounds float* %tmp24875, i64 1
+  %tmp24877 = getelementptr inbounds float* %tmp24876, i64 1
+  %tmp24878 = getelementptr inbounds float* %tmp24877, i64 1
+  %tmp24879 = getelementptr inbounds float* %tmp24878, i64 1
+  %tmp24880 = getelementptr inbounds float* %tmp24879, i64 1
+  %tmp24881 = getelementptr inbounds float* %tmp24880, i64 1
+  %tmp24882 = getelementptr inbounds float* %tmp24881, i64 1
+  %tmp24883 = getelementptr inbounds float* %tmp24882, i64 1
+  %tmp24884 = getelementptr inbounds float* %tmp24883, i64 1
+  %tmp24885 = getelementptr inbounds float* %tmp24884, i64 1
+  %tmp24886 = getelementptr inbounds float* %tmp24885, i64 1
+  %tmp24887 = getelementptr inbounds float* %tmp24886, i64 1
+  %tmp24888 = getelementptr inbounds float* %tmp24887, i64 1
+  %tmp24889 = getelementptr inbounds float* %tmp24888, i64 1
+  %tmp24890 = getelementptr inbounds float* %tmp24889, i64 1
+  %tmp24891 = getelementptr inbounds float* %tmp24890, i64 1
+  %tmp24892 = getelementptr inbounds float* %tmp24891, i64 1
+  %tmp24893 = getelementptr inbounds float* %tmp24892, i64 1
+  %tmp24894 = getelementptr inbounds float* %tmp24893, i64 1
+  %tmp24895 = getelementptr inbounds float* %tmp24894, i64 1
+  %tmp24896 = getelementptr inbounds float* %tmp24895, i64 1
+  %tmp24897 = getelementptr inbounds float* %tmp24896, i64 1
+  %tmp24898 = getelementptr inbounds float* %tmp24897, i64 1
+  %tmp24899 = getelementptr inbounds float* %tmp24898, i64 1
+  %tmp24900 = getelementptr inbounds float* %tmp24899, i64 1
+  %tmp24901 = getelementptr inbounds float* %tmp24900, i64 1
+  %tmp24902 = getelementptr inbounds float* %tmp24901, i64 1
+  %tmp24903 = getelementptr inbounds float* %tmp24902, i64 1
+  %tmp24904 = getelementptr inbounds float* %tmp24903, i64 1
+  %tmp24905 = getelementptr inbounds float* %tmp24904, i64 1
+  %tmp24906 = getelementptr inbounds float* %tmp24905, i64 1
+  %tmp24907 = getelementptr inbounds float* %tmp24906, i64 1
+  %tmp24908 = getelementptr inbounds float* %tmp24907, i64 1
+  %tmp24909 = getelementptr inbounds float* %tmp24908, i64 1
+  %tmp24910 = getelementptr inbounds float* %tmp24909, i64 1
+  %tmp24911 = getelementptr inbounds float* %tmp24910, i64 1
+  %tmp24912 = getelementptr inbounds float* %tmp24911, i64 1
+  %tmp24913 = getelementptr inbounds float* %tmp24912, i64 1
+  %tmp24914 = getelementptr inbounds float* %tmp24913, i64 1
+  %tmp24915 = getelementptr inbounds float* %tmp24914, i64 1
+  %tmp24916 = getelementptr inbounds float* %tmp24915, i64 1
+  %tmp24917 = getelementptr inbounds float* %tmp24916, i64 1
+  %tmp24918 = getelementptr inbounds float* %tmp24917, i64 1
+  %tmp24919 = getelementptr inbounds float* %tmp24918, i64 1
+  %tmp24920 = getelementptr inbounds float* %tmp24919, i64 1
+  %tmp24921 = getelementptr inbounds float* %tmp24920, i64 1
+  %tmp24922 = getelementptr inbounds float* %tmp24921, i64 1
+  %tmp24923 = getelementptr inbounds float* %tmp24922, i64 1
+  %tmp24924 = getelementptr inbounds float* %tmp24923, i64 1
+  %tmp24925 = getelementptr inbounds float* %tmp24924, i64 1
+  %tmp24926 = getelementptr inbounds float* %tmp24925, i64 1
+  %tmp24927 = getelementptr inbounds float* %tmp24926, i64 1
+  %tmp24928 = getelementptr inbounds float* %tmp24927, i64 1
+  %tmp24929 = getelementptr inbounds float* %tmp24928, i64 1
+  %tmp24930 = getelementptr inbounds float* %tmp24929, i64 1
+  %tmp24931 = getelementptr inbounds float* %tmp24930, i64 1
+  %tmp24932 = getelementptr inbounds float* %tmp24931, i64 1
+  %tmp24933 = getelementptr inbounds float* %tmp24932, i64 1
+  %tmp24934 = getelementptr inbounds float* %tmp24933, i64 1
+  %tmp24935 = getelementptr inbounds float* %tmp24934, i64 1
+  %tmp24936 = getelementptr inbounds float* %tmp24935, i64 1
+  %tmp24937 = getelementptr inbounds float* %tmp24936, i64 1
+  %tmp24938 = getelementptr inbounds float* %tmp24937, i64 1
+  %tmp24939 = getelementptr inbounds float* %tmp24938, i64 1
+  %tmp24940 = getelementptr inbounds float* %tmp24939, i64 1
+  %tmp24941 = getelementptr inbounds float* %tmp24940, i64 1
+  %tmp24942 = getelementptr inbounds float* %tmp24941, i64 1
+  %tmp24943 = getelementptr inbounds float* %tmp24942, i64 1
+  %tmp24944 = getelementptr inbounds float* %tmp24943, i64 1
+  %tmp24945 = getelementptr inbounds float* %tmp24944, i64 1
+  %tmp24946 = getelementptr inbounds float* %tmp24945, i64 1
+  store float 0x3F43FD0D00000000, float* %tmp24946
+  %tmp24947 = getelementptr inbounds float* undef, i64 1
+  %tmp24948 = getelementptr inbounds float* undef, i64 1
+  %tmp24949 = getelementptr inbounds float* undef, i64 1
+  %tmp24950 = getelementptr inbounds float* undef, i64 1
+  %tmp24951 = getelementptr inbounds float* %tmp24950, i64 1
+  %tmp24952 = getelementptr inbounds float* undef, i64 1
+  %tmp24953 = getelementptr inbounds float* undef, i64 1
+  %tmp24954 = getelementptr inbounds float* undef, i64 1
+  %tmp24955 = getelementptr inbounds float* undef, i64 1
+  %tmp24956 = getelementptr inbounds float* undef, i64 1
+  %tmp24957 = getelementptr inbounds float* undef, i64 1
+  %tmp24958 = getelementptr inbounds float* %tmp24957, i64 1
+  %tmp24959 = getelementptr inbounds float* undef, i64 1
+  %tmp24960 = getelementptr inbounds float* undef, i64 1
+  %tmp24961 = getelementptr inbounds float* undef, i64 1
+  %tmp24962 = getelementptr inbounds float* undef, i64 1
+  %tmp24963 = getelementptr inbounds float* undef, i64 1
+  %tmp24964 = getelementptr inbounds float* undef, i64 1
+  %tmp24965 = getelementptr inbounds float* undef, i64 1
+  %tmp24966 = getelementptr inbounds float* %tmp24965, i64 1
+  %tmp24967 = getelementptr inbounds float* undef, i64 1
+  %tmp24968 = getelementptr inbounds float* undef, i64 1
+  %tmp24969 = getelementptr inbounds float* undef, i64 1
+  %tmp24970 = getelementptr inbounds float* undef, i64 1
+  %tmp24971 = getelementptr inbounds float* %tmp24970, i64 1
+  %tmp24972 = getelementptr inbounds float* %tmp24971, i64 1
+  %tmp24973 = getelementptr inbounds float* %tmp24972, i64 1
+  %tmp24974 = getelementptr inbounds float* undef, i64 1
+  %tmp24975 = getelementptr inbounds float* undef, i64 1
+  %tmp24976 = getelementptr inbounds float* %tmp24975, i64 1
+  %tmp24977 = getelementptr inbounds float* undef, i64 1
+  %tmp24978 = getelementptr inbounds float* undef, i64 1
+  %tmp24979 = getelementptr inbounds float* undef, i64 1
+  %tmp24980 = getelementptr inbounds float* undef, i64 1
+  %tmp24981 = getelementptr inbounds float* undef, i64 1
+  %tmp24982 = getelementptr inbounds float* undef, i64 1
+  %tmp24983 = getelementptr inbounds float* %tmp24982, i64 1
+  %tmp24984 = getelementptr inbounds float* undef, i64 1
+  %tmp24985 = getelementptr inbounds float* %tmp24984, i64 1
+  %tmp24986 = getelementptr inbounds float* undef, i64 1
+  %tmp24987 = getelementptr inbounds float* %tmp24986, i64 1
+  %tmp24988 = getelementptr inbounds float* %tmp24987, i64 1
+  %tmp24989 = getelementptr inbounds float* undef, i64 1
+  %tmp24990 = getelementptr inbounds float* undef, i64 1
+  %tmp24991 = getelementptr inbounds float* %tmp24990, i64 1
+  %tmp24992 = getelementptr inbounds float* undef, i64 1
+  %tmp24993 = getelementptr inbounds float* %tmp24992, i64 1
+  %tmp24994 = getelementptr inbounds float* %tmp24993, i64 1
+  %tmp24995 = getelementptr inbounds float* undef, i64 1
+  %tmp24996 = getelementptr inbounds float* undef, i64 1
+  %tmp24997 = getelementptr inbounds float* undef, i64 1
+  %tmp24998 = getelementptr inbounds float* undef, i64 1
+  %tmp24999 = getelementptr inbounds float* undef, i64 1
+  %tmp25000 = getelementptr inbounds float* undef, i64 1
+  %tmp25001 = getelementptr inbounds float* undef, i64 1
+  %tmp25002 = getelementptr inbounds float* undef, i64 1
+  %tmp25003 = getelementptr inbounds float* undef, i64 1
+  %tmp25004 = getelementptr inbounds float* undef, i64 1
+  %tmp25005 = getelementptr inbounds float* undef, i64 1
+  %tmp25006 = getelementptr inbounds float* undef, i64 1
+  %tmp25007 = getelementptr inbounds float* undef, i64 1
+  %tmp25008 = getelementptr inbounds float* undef, i64 1
+  %tmp25009 = getelementptr inbounds float* undef, i64 1
+  %tmp25010 = getelementptr inbounds float* undef, i64 1
+  %tmp25011 = getelementptr inbounds float* undef, i64 1
+  %tmp25012 = getelementptr inbounds float* %tmp25011, i64 1
+  %tmp25013 = getelementptr inbounds float* undef, i64 1
+  %tmp25014 = getelementptr inbounds float* undef, i64 1
+  %tmp25015 = getelementptr inbounds float* undef, i64 1
+  %tmp25016 = getelementptr inbounds float* undef, i64 1
+  %tmp25017 = getelementptr inbounds float* %tmp25016, i64 1
+  %tmp25018 = getelementptr inbounds float* undef, i64 1
+  %tmp25019 = getelementptr inbounds float* undef, i64 1
+  %tmp25020 = getelementptr inbounds float* undef, i64 1
+  %tmp25021 = getelementptr inbounds float* undef, i64 1
+  %tmp25022 = getelementptr inbounds float* undef, i64 1
+  %tmp25023 = getelementptr inbounds float* %tmp25022, i64 1
+  %tmp25024 = getelementptr inbounds float* %tmp25023, i64 1
+  %tmp25025 = getelementptr inbounds float* undef, i64 1
+  %tmp25026 = getelementptr inbounds float* undef, i64 1
+  %tmp25027 = getelementptr inbounds float* undef, i64 1
+  %tmp25028 = getelementptr inbounds float* undef, i64 1
+  %tmp25029 = getelementptr inbounds float* undef, i64 1
+  %tmp25030 = getelementptr inbounds float* undef, i64 1
+  %tmp25031 = getelementptr inbounds float* undef, i64 1
+  %tmp25032 = getelementptr inbounds float* undef, i64 1
+  %tmp25033 = getelementptr inbounds float* undef, i64 1
+  %tmp25034 = getelementptr inbounds float* undef, i64 1
+  %tmp25035 = getelementptr inbounds float* %tmp25034, i64 1
+  %tmp25036 = getelementptr inbounds float* undef, i64 1
+  %tmp25037 = getelementptr inbounds float* undef, i64 1
+  %tmp25038 = getelementptr inbounds float* %tmp25037, i64 1
+  %tmp25039 = getelementptr inbounds float* undef, i64 1
+  %tmp25040 = getelementptr inbounds float* undef, i64 1
+  %tmp25041 = getelementptr inbounds float* undef, i64 1
+  %tmp25042 = getelementptr inbounds float* undef, i64 1
+  %tmp25043 = getelementptr inbounds float* undef, i64 1
+  %tmp25044 = getelementptr inbounds float* undef, i64 1
+  %tmp25045 = getelementptr inbounds float* %tmp25044, i64 1
+  %tmp25046 = getelementptr inbounds float* undef, i64 1
+  %tmp25047 = getelementptr inbounds float* %tmp25046, i64 1
+  %tmp25048 = getelementptr inbounds float* undef, i64 1
+  %tmp25049 = getelementptr inbounds float* %tmp25048, i64 1
+  %tmp25050 = getelementptr inbounds float* %tmp25049, i64 1
+  %tmp25051 = getelementptr inbounds float* undef, i64 1
+  %tmp25052 = getelementptr inbounds float* undef, i64 1
+  %tmp25053 = getelementptr inbounds float* undef, i64 1
+  %tmp25054 = getelementptr inbounds float* undef, i64 1
+  %tmp25055 = getelementptr inbounds float* undef, i64 1
+  %tmp25056 = getelementptr inbounds float* undef, i64 1
+  %tmp25057 = getelementptr inbounds float* undef, i64 1
+  %tmp25058 = getelementptr inbounds float* undef, i64 1
+  %tmp25059 = getelementptr inbounds float* undef, i64 1
+  %tmp25060 = getelementptr inbounds float* undef, i64 1
+  %tmp25061 = getelementptr inbounds float* undef, i64 1
+  %tmp25062 = getelementptr inbounds float* undef, i64 1
+  %tmp25063 = getelementptr inbounds float* undef, i64 1
+  %tmp25064 = getelementptr inbounds float* undef, i64 1
+  %tmp25065 = getelementptr inbounds float* undef, i64 1
+  %tmp25066 = getelementptr inbounds float* undef, i64 1
+  %tmp25067 = getelementptr inbounds float* %tmp25066, i64 1
+  %tmp25068 = getelementptr inbounds float* undef, i64 1
+  %tmp25069 = getelementptr inbounds float* %tmp25068, i64 1
+  %tmp25070 = getelementptr inbounds float* undef, i64 1
+  %tmp25071 = getelementptr inbounds float* undef, i64 1
+  %tmp25072 = getelementptr inbounds float* undef, i64 1
+  %tmp25073 = getelementptr inbounds float* undef, i64 1
+  %tmp25074 = getelementptr inbounds float* undef, i64 1
+  %tmp25075 = getelementptr inbounds float* %tmp25074, i64 1
+  %tmp25076 = getelementptr inbounds float* undef, i64 1
+  %tmp25077 = getelementptr inbounds float* undef, i64 1
+  %tmp25078 = getelementptr inbounds float* undef, i64 1
+  %tmp25079 = getelementptr inbounds float* undef, i64 1
+  %tmp25080 = getelementptr inbounds float* undef, i64 1
+  %tmp25081 = getelementptr inbounds float* undef, i64 1
+  %tmp25082 = getelementptr inbounds float* undef, i64 1
+  %tmp25083 = getelementptr inbounds float* undef, i64 1
+  %tmp25084 = getelementptr inbounds float* undef, i64 1
+  %tmp25085 = getelementptr inbounds float* undef, i64 1
+  %tmp25086 = getelementptr inbounds float* undef, i64 1
+  %tmp25087 = getelementptr inbounds float* undef, i64 1
+  %tmp25088 = getelementptr inbounds float* undef, i64 1
+  %tmp25089 = getelementptr inbounds float* undef, i64 1
+  %tmp25090 = getelementptr inbounds float* undef, i64 1
+  %tmp25091 = getelementptr inbounds float* undef, i64 1
+  %tmp25092 = getelementptr inbounds float* undef, i64 1
+  %tmp25093 = getelementptr inbounds float* undef, i64 1
+  %tmp25094 = getelementptr inbounds float* undef, i64 1
+  %tmp25095 = getelementptr inbounds float* %tmp25094, i64 1
+  %tmp25096 = getelementptr inbounds float* undef, i64 1
+  %tmp25097 = getelementptr inbounds float* %tmp25096, i64 1
+  %tmp25098 = getelementptr inbounds float* %tmp25097, i64 1
+  %tmp25099 = getelementptr inbounds float* undef, i64 1
+  %tmp25100 = getelementptr inbounds float* undef, i64 1
+  %tmp25101 = getelementptr inbounds float* undef, i64 1
+  %tmp25102 = getelementptr inbounds float* undef, i64 1
+  %tmp25103 = getelementptr inbounds float* undef, i64 1
+  %tmp25104 = getelementptr inbounds float* undef, i64 1
+  %tmp25105 = getelementptr inbounds float* undef, i64 1
+  %tmp25106 = getelementptr inbounds float* undef, i64 1
+  %tmp25107 = getelementptr inbounds float* %tmp25106, i64 1
+  %tmp25108 = getelementptr inbounds float* undef, i64 1
+  %tmp25109 = getelementptr inbounds float* undef, i64 1
+  %tmp25110 = getelementptr inbounds float* undef, i64 1
+  %tmp25111 = getelementptr inbounds float* undef, i64 1
+  %tmp25112 = getelementptr inbounds float* undef, i64 1
+  %tmp25113 = getelementptr inbounds float* undef, i64 1
+  %tmp25114 = getelementptr inbounds float* undef, i64 1
+  %tmp25115 = getelementptr inbounds float* undef, i64 1
+  %tmp25116 = getelementptr inbounds float* undef, i64 1
+  %tmp25117 = getelementptr inbounds float* undef, i64 1
+  %tmp25118 = getelementptr inbounds float* undef, i64 1
+  %tmp25119 = getelementptr inbounds float* undef, i64 1
+  %tmp25120 = getelementptr inbounds float* undef, i64 1
+  %tmp25121 = getelementptr inbounds float* undef, i64 1
+  %tmp25122 = getelementptr inbounds float* %tmp25121, i64 1
+  %tmp25123 = getelementptr inbounds float* undef, i64 1
+  %tmp25124 = getelementptr inbounds float* undef, i64 1
+  %tmp25125 = getelementptr inbounds float* undef, i64 1
+  %tmp25126 = getelementptr inbounds float* undef, i64 1
+  %tmp25127 = getelementptr inbounds float* undef, i64 1
+  %tmp25128 = getelementptr inbounds float* undef, i64 1
+  %tmp25129 = getelementptr inbounds float* undef, i64 1
+  %tmp25130 = getelementptr inbounds float* undef, i64 1
+  %tmp25131 = getelementptr inbounds float* undef, i64 1
+  %tmp25132 = getelementptr inbounds float* undef, i64 1
+  %tmp25133 = getelementptr inbounds float* undef, i64 1
+  %tmp25134 = getelementptr inbounds float* undef, i64 1
+  %tmp25135 = getelementptr inbounds float* undef, i64 1
+  %tmp25136 = getelementptr inbounds float* undef, i64 1
+  %tmp25137 = getelementptr inbounds float* undef, i64 1
+  %tmp25138 = getelementptr inbounds float* undef, i64 1
+  %tmp25139 = getelementptr inbounds float* undef, i64 1
+  %tmp25140 = getelementptr inbounds float* undef, i64 1
+  %tmp25141 = getelementptr inbounds float* undef, i64 1
+  %tmp25142 = getelementptr inbounds float* undef, i64 1
+  %tmp25143 = getelementptr inbounds float* undef, i64 1
+  %tmp25144 = getelementptr inbounds float* undef, i64 1
+  %tmp25145 = getelementptr inbounds float* undef, i64 1
+  %tmp25146 = getelementptr inbounds float* %tmp25145, i64 1
+  %tmp25147 = getelementptr inbounds float* undef, i64 1
+  %tmp25148 = getelementptr inbounds float* %tmp25147, i64 1
+  %tmp25149 = getelementptr inbounds float* undef, i64 1
+  %tmp25150 = getelementptr inbounds float* undef, i64 1
+  %tmp25151 = getelementptr inbounds float* undef, i64 1
+  %tmp25152 = getelementptr inbounds float* undef, i64 1
+  %tmp25153 = getelementptr inbounds float* %tmp25152, i64 1
+  %tmp25154 = getelementptr inbounds float* undef, i64 1
+  %tmp25155 = getelementptr inbounds float* undef, i64 1
+  %tmp25156 = getelementptr inbounds float* undef, i64 1
+  %tmp25157 = getelementptr inbounds float* undef, i64 1
+  %tmp25158 = getelementptr inbounds float* undef, i64 1
+  %tmp25159 = getelementptr inbounds float* undef, i64 1
+  %tmp25160 = getelementptr inbounds float* undef, i64 1
+  %tmp25161 = getelementptr inbounds float* undef, i64 1
+  %tmp25162 = getelementptr inbounds float* %tmp25161, i64 1
+  %tmp25163 = getelementptr inbounds float* undef, i64 1
+  %tmp25164 = getelementptr inbounds float* undef, i64 1
+  %tmp25165 = getelementptr inbounds float* undef, i64 1
+  %tmp25166 = getelementptr inbounds float* undef, i64 1
+  %tmp25167 = getelementptr inbounds float* undef, i64 1
+  %tmp25168 = getelementptr inbounds float* undef, i64 1
+  %tmp25169 = getelementptr inbounds float* undef, i64 1
+  %tmp25170 = getelementptr inbounds float* %tmp25169, i64 1
+  %tmp25171 = getelementptr inbounds float* undef, i64 1
+  %tmp25172 = getelementptr inbounds float* undef, i64 1
+  %tmp25173 = getelementptr inbounds float* undef, i64 1
+  %tmp25174 = getelementptr inbounds float* undef, i64 1
+  %tmp25175 = getelementptr inbounds float* %tmp25174, i64 1
+  %tmp25176 = getelementptr inbounds float* undef, i64 1
+  %tmp25177 = getelementptr inbounds float* undef, i64 1
+  %tmp25178 = getelementptr inbounds float* %tmp25177, i64 1
+  %tmp25179 = getelementptr inbounds float* undef, i64 1
+  %tmp25180 = getelementptr inbounds float* undef, i64 1
+  %tmp25181 = getelementptr inbounds float* undef, i64 1
+  %tmp25182 = getelementptr inbounds float* undef, i64 1
+  %tmp25183 = getelementptr inbounds float* undef, i64 1
+  %tmp25184 = getelementptr inbounds float* undef, i64 1
+  %tmp25185 = getelementptr inbounds float* undef, i64 1
+  %tmp25186 = getelementptr inbounds float* undef, i64 1
+  %tmp25187 = getelementptr inbounds float* %tmp25186, i64 1
+  %tmp25188 = getelementptr inbounds float* %tmp25187, i64 1
+  %tmp25189 = getelementptr inbounds float* undef, i64 1
+  %tmp25190 = getelementptr inbounds float* undef, i64 1
+  %tmp25191 = getelementptr inbounds float* undef, i64 1
+  %tmp25192 = getelementptr inbounds float* %tmp25191, i64 1
+  %tmp25193 = getelementptr inbounds float* undef, i64 1
+  %tmp25194 = getelementptr inbounds float* undef, i64 1
+  %tmp25195 = getelementptr inbounds float* undef, i64 1
+  %tmp25196 = getelementptr inbounds float* undef, i64 1
+  %tmp25197 = getelementptr inbounds float* undef, i64 1
+  %tmp25198 = getelementptr inbounds float* undef, i64 1
+  %tmp25199 = getelementptr inbounds float* undef, i64 1
+  %tmp25200 = getelementptr inbounds float* undef, i64 1
+  %tmp25201 = getelementptr inbounds float* %tmp25200, i64 1
+  %tmp25202 = getelementptr inbounds float* undef, i64 1
+  %tmp25203 = getelementptr inbounds float* undef, i64 1
+  %tmp25204 = getelementptr inbounds float* undef, i64 1
+  %tmp25205 = getelementptr inbounds float* undef, i64 1
+  %tmp25206 = getelementptr inbounds float* undef, i64 1
+  %tmp25207 = getelementptr inbounds float* undef, i64 1
+  %tmp25208 = getelementptr inbounds float* undef, i64 1
+  %tmp25209 = getelementptr inbounds float* undef, i64 1
+  %tmp25210 = getelementptr inbounds float* undef, i64 1
+  %tmp25211 = getelementptr inbounds float* undef, i64 1
+  %tmp25212 = getelementptr inbounds float* undef, i64 1
+  %tmp25213 = getelementptr inbounds float* undef, i64 1
+  %tmp25214 = getelementptr inbounds float* undef, i64 1
+  %tmp25215 = getelementptr inbounds float* undef, i64 1
+  %tmp25216 = getelementptr inbounds float* undef, i64 1
+  %tmp25217 = getelementptr inbounds float* undef, i64 1
+  %tmp25218 = getelementptr inbounds float* undef, i64 1
+  %tmp25219 = getelementptr inbounds float* undef, i64 1
+  %tmp25220 = getelementptr inbounds float* undef, i64 1
+  %tmp25221 = getelementptr inbounds float* undef, i64 1
+  %tmp25222 = getelementptr inbounds float* undef, i64 1
+  %tmp25223 = getelementptr inbounds float* undef, i64 1
+  %tmp25224 = getelementptr inbounds float* undef, i64 1
+  %tmp25225 = getelementptr inbounds float* undef, i64 1
+  %tmp25226 = getelementptr inbounds float* undef, i64 1
+  %tmp25227 = getelementptr inbounds float* undef, i64 1
+  %tmp25228 = getelementptr inbounds float* undef, i64 1
+  %tmp25229 = getelementptr inbounds float* undef, i64 1
+  %tmp25230 = getelementptr inbounds float* %tmp25229, i64 1
+  %tmp25231 = getelementptr inbounds float* undef, i64 1
+  %tmp25232 = getelementptr inbounds float* undef, i64 1
+  %tmp25233 = getelementptr inbounds float* undef, i64 1
+  %tmp25234 = getelementptr inbounds float* undef, i64 1
+  %tmp25235 = getelementptr inbounds float* %tmp25234, i64 1
+  %tmp25236 = getelementptr inbounds float* undef, i64 1
+  %tmp25237 = getelementptr inbounds float* %tmp25236, i64 1
+  %tmp25238 = getelementptr inbounds float* undef, i64 1
+  %tmp25239 = getelementptr inbounds float* undef, i64 1
+  %tmp25240 = getelementptr inbounds float* undef, i64 1
+  %tmp25241 = getelementptr inbounds float* undef, i64 1
+  %tmp25242 = getelementptr inbounds float* undef, i64 1
+  %tmp25243 = getelementptr inbounds float* undef, i64 1
+  %tmp25244 = getelementptr inbounds float* undef, i64 1
+  %tmp25245 = getelementptr inbounds float* undef, i64 1
+  %tmp25246 = getelementptr inbounds float* undef, i64 1
+  %tmp25247 = getelementptr inbounds float* undef, i64 1
+  %tmp25248 = getelementptr inbounds float* %tmp25247, i64 1
+  %tmp25249 = getelementptr inbounds float* undef, i64 1
+  %tmp25250 = getelementptr inbounds float* undef, i64 1
+  %tmp25251 = getelementptr inbounds float* undef, i64 1
+  %tmp25252 = getelementptr inbounds float* undef, i64 1
+  %tmp25253 = getelementptr inbounds float* undef, i64 1
+  %tmp25254 = getelementptr inbounds float* undef, i64 1
+  %tmp25255 = getelementptr inbounds float* undef, i64 1
+  %tmp25256 = getelementptr inbounds float* undef, i64 1
+  %tmp25257 = getelementptr inbounds float* undef, i64 1
+  %tmp25258 = getelementptr inbounds float* undef, i64 1
+  %tmp25259 = getelementptr inbounds float* undef, i64 1
+  %tmp25260 = getelementptr inbounds float* undef, i64 1
+  %tmp25261 = getelementptr inbounds float* undef, i64 1
+  %tmp25262 = getelementptr inbounds float* undef, i64 1
+  %tmp25263 = getelementptr inbounds float* undef, i64 1
+  %tmp25264 = getelementptr inbounds float* undef, i64 1
+  %tmp25265 = getelementptr inbounds float* undef, i64 1
+  %tmp25266 = getelementptr inbounds float* undef, i64 1
+  %tmp25267 = getelementptr inbounds float* undef, i64 1
+  %tmp25268 = getelementptr inbounds float* undef, i64 1
+  %tmp25269 = getelementptr inbounds float* undef, i64 1
+  br i1 undef, label %bb25270, label %bb25271
+
+bb25270:                                          ; preds = %bb2
+  br label %bb25362
+
+bb25271:                                          ; preds = %bb2
+  br label %bb25272
+
+bb25272:                                          ; preds = %bb25275, %bb25271
+  br i1 false, label %bb25273, label %bb25278
+
+bb25273:                                          ; preds = %bb25272
+  invoke void @foo()
+          to label %bb25274 unwind label %bb25276
+
+bb25274:                                          ; preds = %bb25273
+  invoke void @bar()
+          to label %bb25275 unwind label %bb25276
+
+bb25275:                                          ; preds = %bb25274
+  br label %bb25272
+
+bb25276:                                          ; preds = %bb25283, %bb25274, %bb25273
+  %tmp25277 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25361
+
+bb25278:                                          ; preds = %bb25272
+  br label %bb25279
+
+bb25279:                                          ; preds = %bb25284, %bb25278
+  br i1 undef, label %bb25280, label %bb25285
+
+bb25280:                                          ; preds = %bb25279
+  br label %bb25281
+
+bb25281:                                          ; preds = %bb25282, %bb25280
+  br i1 undef, label %bb25282, label %bb25283
+
+bb25282:                                          ; preds = %bb25281
+  br label %bb25281
+
+bb25283:                                          ; preds = %bb25281
+  invoke void @bar()
+          to label %bb25284 unwind label %bb25276
+
+bb25284:                                          ; preds = %bb25283
+  br label %bb25279
+
+bb25285:                                          ; preds = %bb25279
+  br label %bb25286
+
+bb25286:                                          ; preds = %bb25303, %bb25285
+  br i1 undef, label %bb25287, label %bb25304
+
+bb25287:                                          ; preds = %bb25286
+  invoke void @bar()
+          to label %bb25288 unwind label %bb25298
+
+bb25288:                                          ; preds = %bb25287
+  br i1 undef, label %bb25289, label %bb25300
+
+bb25289:                                          ; preds = %bb25288
+  br i1 undef, label %bb25290, label %bb25300
+
+bb25290:                                          ; preds = %bb25289
+  invoke void @bar()
+          to label %bb25291 unwind label %bb25298
+
+bb25291:                                          ; preds = %bb25290
+  br i1 undef, label %bb25292, label %bb25295
+
+bb25292:                                          ; preds = %bb25291
+  br i1 undef, label %bb25294, label %bb25293
+
+bb25293:                                          ; preds = %bb25292
+  br label %bb25294
+
+bb25294:                                          ; preds = %bb25293, %bb25292
+  br label %bb25296
+
+bb25295:                                          ; preds = %bb25291
+  invoke void @quuuux()
+          to label %bb25296 unwind label %bb25298
+
+bb25296:                                          ; preds = %bb25295, %bb25294
+  invoke void @baz()
+          to label %bb25297 unwind label %bb25298
+
+bb25297:                                          ; preds = %bb25296
+  br label %bb25300
+
+bb25298:                                          ; preds = %bb25296, %bb25295, %bb25290, %bb25287
+  %tmp25299 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25360
+
+bb25300:                                          ; preds = %bb25297, %bb25289, %bb25288
+  br i1 undef, label %bb25301, label %bb25302
+
+bb25301:                                          ; preds = %bb25300
+  br label %bb25303
+
+bb25302:                                          ; preds = %bb25300
+  br label %bb25303
+
+bb25303:                                          ; preds = %bb25302, %bb25301
+  br label %bb25286
+
+bb25304:                                          ; preds = %bb25286
+  br label %bb25305
+
+bb25305:                                          ; preds = %bb25331, %bb25304
+  br i1 undef, label %bb25306, label %bb25332
+
+bb25306:                                          ; preds = %bb25305
+  invoke void @quuux()
+          to label %bb25307 unwind label %bb25324
+
+bb25307:                                          ; preds = %bb25306
+  invoke void @quux()
+          to label %bb25308 unwind label %bb25324
+
+bb25308:                                          ; preds = %bb25307
+  br i1 undef, label %bb25309, label %bb25330
+
+bb25309:                                          ; preds = %bb25308
+  br i1 undef, label %bb25310, label %bb25330
+
+bb25310:                                          ; preds = %bb25309
+  br i1 undef, label %bb25311, label %bb25317
+
+bb25311:                                          ; preds = %bb25310
+  br label %bb25312
+
+bb25312:                                          ; preds = %bb25316, %bb25315, %bb25311
+  br i1 undef, label %bb25313, label %bb25317
+
+bb25313:                                          ; preds = %bb25312
+  %tmp25314 = invoke zeroext i1 undef(%0* undef, %0* undef)
+          to label %bb25315 unwind label %bb25324
+
+bb25315:                                          ; preds = %bb25313
+  br i1 %tmp25314, label %bb25316, label %bb25312
+
+bb25316:                                          ; preds = %bb25315
+  br label %bb25312
+
+bb25317:                                          ; preds = %bb25312, %bb25310
+  br i1 undef, label %bb25318, label %bb25326
+
+bb25318:                                          ; preds = %bb25317
+  br i1 undef, label %bb25319, label %bb25326
+
+bb25319:                                          ; preds = %bb25318
+  br i1 undef, label %bb25320, label %bb25323
+
+bb25320:                                          ; preds = %bb25319
+  br i1 undef, label %bb25322, label %bb25321
+
+bb25321:                                          ; preds = %bb25320
+  br label %bb25322
+
+bb25322:                                          ; preds = %bb25321, %bb25320
+  br label %bb25326
+
+bb25323:                                          ; preds = %bb25319
+  invoke void @qux()
+          to label %bb25326 unwind label %bb25324
+
+bb25324:                                          ; preds = %bb25357, %bb25344, %bb25343, %bb25342, %bb25337, %bb25334, %bb25333, %bb25323, %bb25313, %bb25307, %bb25306
+  %tmp25325 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25359
+
+bb25326:                                          ; preds = %bb25323, %bb25322, %bb25318, %bb25317
+  br label %bb25327
+
+bb25327:                                          ; preds = %bb25328, %bb25326
+  br i1 undef, label %bb25328, label %bb25329
+
+bb25328:                                          ; preds = %bb25327
+  br label %bb25327
+
+bb25329:                                          ; preds = %bb25327
+  br label %bb25330
+
+bb25330:                                          ; preds = %bb25329, %bb25309, %bb25308
+  br i1 undef, label %bb25332, label %bb25331
+
+bb25331:                                          ; preds = %bb25330
+  br label %bb25305
+
+bb25332:                                          ; preds = %bb25330, %bb25305
+  br i1 undef, label %bb25333, label %bb25357
+
+bb25333:                                          ; preds = %bb25332
+  invoke void (...)* @printf()
+          to label %bb25334 unwind label %bb25324
+
+bb25334:                                          ; preds = %bb25333
+  invoke void (...)* @printf(i32 undef)
+          to label %bb25335 unwind label %bb25324
+
+bb25335:                                          ; preds = %bb25334
+  br label %bb25336
+
+bb25336:                                          ; preds = %bb25338, %bb25335
+  br i1 undef, label %bb25337, label %bb25339
+
+bb25337:                                          ; preds = %bb25336
+  invoke void (...)* @printf(i32 undef, double undef)
+          to label %bb25338 unwind label %bb25324
+
+bb25338:                                          ; preds = %bb25337
+  br label %bb25336
+
+bb25339:                                          ; preds = %bb25336
+  br label %bb25340
+
+bb25340:                                          ; preds = %bb25341, %bb25339
+  br i1 undef, label %bb25341, label %bb25342
+
+bb25341:                                          ; preds = %bb25340
+  br label %bb25340
+
+bb25342:                                          ; preds = %bb25340
+  invoke void (...)* @printf()
+          to label %bb25343 unwind label %bb25324
+
+bb25343:                                          ; preds = %bb25342
+  invoke void (...)* @printf(double undef, double undef)
+          to label %bb25344 unwind label %bb25324
+
+bb25344:                                          ; preds = %bb25343
+  invoke void @mux()
+          to label %bb25345 unwind label %bb25324
+
+bb25345:                                          ; preds = %bb25344
+  br label %bb25346
+
+bb25346:                                          ; preds = %bb25347, %bb25345
+  br i1 undef, label %bb25347, label %bb25348
+
+bb25347:                                          ; preds = %bb25346
+  br label %bb25346
+
+bb25348:                                          ; preds = %bb25346
+  br label %bb25349
+
+bb25349:                                          ; preds = %bb25350, %bb25348
+  br i1 undef, label %bb25350, label %bb25351
+
+bb25350:                                          ; preds = %bb25349
+  br label %bb25349
+
+bb25351:                                          ; preds = %bb25349
+  invoke void (...)* @printf()
+          to label %bb25352 unwind label %bb25355
+
+bb25352:                                          ; preds = %bb25351
+  invoke void (...)* @printf(double undef)
+          to label %bb25353 unwind label %bb25355
+
+bb25353:                                          ; preds = %bb25352
+  invoke void (...)* @printf()
+          to label %bb25354 unwind label %bb25355
+
+bb25354:                                          ; preds = %bb25353
+  br label %bb25358
+
+bb25355:                                          ; preds = %bb25353, %bb25352, %bb25351
+  %tmp25356 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb25359
+
+bb25357:                                          ; preds = %bb25332
+  invoke void (...)* @printf()
+          to label %bb25358 unwind label %bb25324
+
+bb25358:                                          ; preds = %bb25357, %bb25354
+  br label %bb25362
+
+bb25359:                                          ; preds = %bb25355, %bb25324
+  br label %bb25360
+
+bb25360:                                          ; preds = %bb25359, %bb25298
+  br label %bb25361
+
+bb25361:                                          ; preds = %bb25360, %bb25276
+  resume { i8*, i32 } undef
+
+bb25362:                                          ; preds = %bb25358, %bb25270, %bb1
+  ret void
+}
+
+declare void @foo()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @bar() uwtable ssp align 2
+
+declare hidden void @baz() uwtable ssp align 2
+
+declare void @printf(...)
+
+declare void @mux() unnamed_addr uwtable ssp align 2
+
+declare hidden void @qux() uwtable ssp align 2
+
+declare void @quux() uwtable ssp
+
+declare void @quuux() uwtable ssp
+
+declare hidden void @quuuux() uwtable ssp align 2
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index 21128096e6e7..82cefb728c6e 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -7,7 +7,7 @@ define i32 @test1(i32 %A, i32 %B) {
 ; The above computation of %tmp4 should match a single lea, without using
 ; actual add instructions.
 ; CHECK-NOT: add
-; CHECK: lea {{[A-Z]+}}, DWORD PTR [{{[A-Z]+}} + 4*{{[A-Z]+}} - 5]
+; CHECK: lea {{[a-z]+}}, dword ptr [{{[a-z]+}} + 4*{{[a-z]+}} - 5]
 
   ret i32 %tmp4
 }
diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
index 3f32fd27c5c1..9480600312ce 100644
--- a/test/CodeGen/X86/lea-recursion.ll
+++ b/test/CodeGen/X86/lea-recursion.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 12
+; RUN: llc < %s -march=x86-64 | grep lea | count 13
 
 ; This testcase was written to demonstrate an instruction-selection problem,
 ; however it also happens to expose a limitation in the DAGCombiner's
@@ -44,4 +44,3 @@ entry:
 	store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
 	ret void
 }
-
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 542135529f1d..93cfe4611b44 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -5,8 +5,8 @@ define i32 @test1(i32 %x) nounwind {
         %tmp1 = shl i32 %x, 3
         %tmp2 = add i32 %tmp1, 7
         ret i32 %tmp2
-; CHECK: test1:
-; CHECK:    leal 7(,[[A0:%rdi|%rcx]],8), %eax
+; CHECK-LABEL: test1:
+; CHECK:    leal 7(,%r[[A0:di|cx]],8), %eax
 }
 
 
@@ -27,9 +27,9 @@ bb.nph:
 
 bb2:
 	ret i32 %x_offs
-; CHECK: test2:
-; CHECK:	leal	-5([[A0]]), %eax
+; CHECK-LABEL: test2:
+; CHECK:        leal    -5(%r[[A0:..]]), %eax
 ; CHECK:	andl	$-4, %eax
 ; CHECK:	negl	%eax
-; CHECK:	leal	-4([[A0]],%rax), %eax
+; CHECK:	leal	-4(%r[[A0]],%rax), %eax
 }
diff --git a/test/CodeGen/X86/leaf-fp-elim.ll b/test/CodeGen/X86/leaf-fp-elim.ll
index 607dc72e2fa3..1bb3c7519146 100644
--- a/test/CodeGen/X86/leaf-fp-elim.ll
+++ b/test/CodeGen/X86/leaf-fp-elim.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-non-leaf-fp-elim -relocation-model=pic -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -relocation-model=pic -mtriple=x86_64-apple-darwin | FileCheck %s
 ; <rdar://problem/8170192>
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0"
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin11.0"
 @msg = internal global i8* null                   ; <i8**> [#uses=1]
 @.str = private constant [2 x i8] c"x\00", align 1 ; <[2 x i8]*> [#uses=1]
 
-define void @test(i8* %p) nounwind optsize ssp {
+define void @test(i8* %p) "no-frame-pointer-elim-non-leaf" nounwind optsize ssp {
 
 ; No stack frame, please.
 ; CHECK:     _test
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
index 71ef2d3152f8..64460bb91186 100644
--- a/test/CodeGen/X86/legalize-shift-64.ll
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -1,12 +1,11 @@
-; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
-
+; RUN: llc -mcpu=generic -mtriple=i686-unknown-unknown < %s | FileCheck %s
 define i64 @test1(i32 %xx, i32 %test) nounwind {
   %conv = zext i32 %xx to i64
   %and = and i32 %test, 7
   %sh_prom = zext i32 %and to i64
   %shl = shl i64 %conv, %sh_prom
   ret i64 %shl
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: shll	%cl, %eax
 ; CHECK: shrl	%edx
 ; CHECK: xorb	$31
@@ -18,7 +17,7 @@ define i64 @test2(i64 %xx, i32 %test) nounwind {
   %sh_prom = zext i32 %and to i64
   %shl = shl i64 %xx, %sh_prom
   ret i64 %shl
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: shll	%cl, %esi
 ; CHECK: shrl	%edx
 ; CHECK: xorb	$31
@@ -32,7 +31,7 @@ define i64 @test3(i64 %xx, i32 %test) nounwind {
   %sh_prom = zext i32 %and to i64
   %shr = lshr i64 %xx, %sh_prom
   ret i64 %shr
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: shrl	%cl, %esi
 ; CHECK: leal	(%edx,%edx), %eax
 ; CHECK: xorb	$31, %cl
@@ -46,7 +45,7 @@ define i64 @test4(i64 %xx, i32 %test) nounwind {
   %sh_prom = zext i32 %and to i64
   %shr = ashr i64 %xx, %sh_prom
   ret i64 %shr
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: shrl	%cl, %esi
 ; CHECK: leal	(%edx,%edx), %eax
 ; CHECK: xorb	$31, %cl
@@ -65,3 +64,31 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) {
 ; CHECK: shl
 ; CHECK: shldl
 }
+
+; PR16108
+define i32 @test6() {
+  %x = alloca i32, align 4
+  %t = alloca i64, align 8
+  store i32 1, i32* %x, align 4
+  store i64 1, i64* %t, align 8  ;; DEAD
+  %load = load i32* %x, align 4
+  %shl = shl i32 %load, 8
+  %add = add i32 %shl, -224
+  %sh_prom = zext i32 %add to i64
+  %shl1 = shl i64 1, %sh_prom
+  %cmp = icmp ne i64 %shl1, 4294967296
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret i32 1
+
+if.end:                                           ; preds = %entry
+  ret i32 0
+
+; CHECK-LABEL: test6:
+; CHECK-NOT: andb $31
+; CHECK: sete
+; CHECK: movzbl
+; CHECK: xorl $1
+; CHECK: orl
+}
diff --git a/test/CodeGen/X86/licm-dominance.ll b/test/CodeGen/X86/licm-dominance.ll
index 019f8a32b6c0..7e3c6fdf9514 100644
--- a/test/CodeGen/X86/licm-dominance.ll
+++ b/test/CodeGen/X86/licm-dominance.ll
@@ -2,7 +2,7 @@
 
 ; MachineLICM should check dominance before hoisting instructions.
 ; CHECK: ## in Loop:
-; CHECK-NEXT:	xorb	%al, %al
+; CHECK-NEXT:	xorl	%eax, %eax
 ; CHECK-NEXT:	testb	%al, %al
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index 66074fb3682c..083ae0875e39 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3
+; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 4
 
 ; MachineLICM should be able to hoist the symbolic addresses out of
 ; the inner loops.
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index 9d285bf4e238..1637fa46536a 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -1,4 +1,10 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
+# FIXME: For now, override suffixes to exclude any .s tests, because some of the
+# buildbots have a stray misched-copy.s output file lying around that causes
+# failures. See misched-copy.s where we try and clean up that file.
+#
+# It should be possible to remove this override once all the bots have cycled
+# cleanly.
+config.suffixes = ['.ll', '.c', '.cpp', '.test', '.txt']
 
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
diff --git a/test/CodeGen/X86/load-slice.ll b/test/CodeGen/X86/load-slice.ll
new file mode 100644
index 000000000000..85fd7f03ef62
--- /dev/null
+++ b/test/CodeGen/X86/load-slice.ll
@@ -0,0 +1,139 @@
+; RUN: llc -mtriple x86_64-apple-macosx -mcpu=corei7-avx -combiner-stress-load-slicing < %s -o - | FileCheck %s --check-prefix=STRESS
+; RUN: llc -mtriple x86_64-apple-macosx -mcpu=corei7-avx < %s -o - | FileCheck %s --check-prefix=REGULAR
+;
+; <rdar://problem/14477220>
+
+%class.Complex = type { float, float }
+
+
+; Check that independant slices leads to independant loads then the slices leads to
+; different register file.
+;
+; The layout is:
+; LSB 0 1 2 3 | 4 5 6 7 MSB
+;       Low      High
+; The base address points to 0 and is 8-bytes aligned.
+; Low slice starts at 0 (base) and is 8-bytes aligned.
+; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned.
+;
+; STRESS-LABEL: t1:
+; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
+; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]]
+; Add low slice: out[out_start].real, this is base + 0.
+; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
+; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
+; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
+; Add high slice: out[out_start].imm, this is base + 4.
+; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
+; Swap Imm and Real.
+; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
+; Put the results back into out[out_start].
+; STRESS-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+;
+; Same for REGULAR, we eliminate register bank copy with each slices.
+; REGULAR-LABEL: t1:
+; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
+; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]]
+; Add low slice: out[out_start].real, this is base + 0.
+; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
+; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
+; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
+; Add high slice: out[out_start].imm, this is base + 4.
+; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
+; Swap Imm and Real.
+; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
+; Put the results back into out[out_start].
+; REGULAR-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+define void @t1(%class.Complex* nocapture %out, i64 %out_start) {
+entry:
+  %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+  %tmp = bitcast %class.Complex* %arrayidx to i64*
+  %tmp1 = load i64* %tmp, align 8
+  %t0.sroa.0.0.extract.trunc = trunc i64 %tmp1 to i32
+  %tmp2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float
+  %t0.sroa.2.0.extract.shift = lshr i64 %tmp1, 32
+  %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32
+  %tmp3 = bitcast i32 %t0.sroa.2.0.extract.trunc to float
+  %add = add i64 %out_start, 8
+  %arrayidx2 = getelementptr inbounds %class.Complex* %out, i64 %add
+  %i.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 0
+  %tmp4 = load float* %i.i, align 4
+  %add.i = fadd float %tmp4, %tmp2
+  %retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0
+  %r.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 1
+  %tmp5 = load float* %r.i, align 4
+  %add5.i = fadd float %tmp5, %tmp3
+  %retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1
+  %ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>*
+  store <2 x float> %retval.sroa.0.4.vec.insert.i, <2 x float>* %ref.tmp.sroa.0.0.cast, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+; Check that we do not read outside of the chunk of bits of the original loads.
+;
+; The 64-bits should have been split in one 32-bits and one 16-bits slices.
+; The 16-bits should be zero extended to match the final type.
+;
+; The memory layout is:
+; LSB 0 1 2 3 | 4 5 | 6 7 MSB
+;      Low            High
+; The base address points to 0 and is 8-bytes aligned.
+; Low slice starts at 0 (base) and is 8-bytes aligned.
+; High slice starts at 6 (base + 6-bytes) and is 2-bytes aligned.
+;
+; STRESS-LABEL: t2:
+; STRESS: movzwl 6([[BASE:[^)]+]]), %eax
+; STRESS-NEXT: addl ([[BASE]]), %eax
+; STRESS-NEXT: ret
+;
+; For the REGULAR heuristic, this is not profitable to slice things that are not
+; next to each other in memory. Here we have a hole with bytes #4-5.
+; REGULAR-LABEL: t2:
+; REGULAR: shrq $48
+define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) {
+  %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+  %bitcast = bitcast %class.Complex* %arrayidx to i64*
+  %chunk64 = load i64* %bitcast, align 8
+  %slice32_low = trunc i64 %chunk64 to i32
+  %shift48 = lshr i64 %chunk64, 48
+  %slice32_high = trunc i64 %shift48 to i32
+  %res = add i32 %slice32_high, %slice32_low
+  ret i32 %res
+}
+
+; Check that we do not optimize overlapping slices.
+;
+; The 64-bits should NOT have been split in as slices are overlapping.
+; First slice uses bytes numbered 0 to 3.
+; Second slice uses bytes numbered 6 and 7.
+; Third slice uses bytes numbered 4 to 7.
+;
+; STRESS-LABEL: t3:
+; STRESS: shrq $48
+; STRESS: shrq $32
+;
+; REGULAR-LABEL: t3:
+; REGULAR: shrq $48
+; REGULAR: shrq $32
+define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) {
+  %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+  %bitcast = bitcast %class.Complex* %arrayidx to i64*
+  %chunk64 = load i64* %bitcast, align 8
+  %slice32_low = trunc i64 %chunk64 to i32
+  %shift48 = lshr i64 %chunk64, 48
+  %slice32_high = trunc i64 %shift48 to i32
+  %shift32 = lshr i64 %chunk64, 32
+  %slice32_lowhigh = trunc i64 %shift32 to i32
+  %tmpres = add i32 %slice32_high, %slice32_low
+  %res = add i32 %slice32_lowhigh, %tmpres
+  ret i32 %res
+}
diff --git a/test/CodeGen/X86/lock-inst-encoding.ll b/test/CodeGen/X86/lock-inst-encoding.ll
index 9765faeecadc..5ce771f14ab2 100644
--- a/test/CodeGen/X86/lock-inst-encoding.ll
+++ b/test/CodeGen/X86/lock-inst-encoding.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: addq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x01,0x37]
 ; CHECK: ret
 define void @f1(i64* %a, i64 %b) nounwind {
@@ -11,7 +11,7 @@ define void @f1(i64* %a, i64 %b) nounwind {
   ret void
 }
 
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: subq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x29,0x37]
 ; CHECK: ret
 define void @f2(i64* %a, i64 %b) nounwind {
@@ -19,7 +19,7 @@ define void @f2(i64* %a, i64 %b) nounwind {
   ret void
 }
 
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: andq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x21,0x37]
 ; CHECK: ret
 define void @f3(i64* %a, i64 %b) nounwind {
@@ -27,7 +27,7 @@ define void @f3(i64* %a, i64 %b) nounwind {
   ret void
 }
 
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: orq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x09,0x37]
 ; CHECK: ret
 define void @f4(i64* %a, i64 %b) nounwind {
@@ -35,7 +35,7 @@ define void @f4(i64* %a, i64 %b) nounwind {
   ret void
 }
 
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: xorq %{{.*}}, ({{.*}}){{.*}}encoding: [0xf0,0x48,0x31,0x37]
 ; CHECK: ret
 define void @f5(i64* %a, i64 %b) nounwind {
diff --git a/test/CodeGen/X86/long-extend.ll b/test/CodeGen/X86/long-extend.ll
new file mode 100644
index 000000000000..5bbd41dad9d2
--- /dev/null
+++ b/test/CodeGen/X86/long-extend.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=core-avx-i -mtriple=x86_64-linux -asm-verbose=0| FileCheck %s
+define void @test_long_extend(<16 x i8> %a, <16 x i32>* %p) nounwind {
+; CHECK-LABEL: test_long_extend
+; CHECK: vpunpcklbw	%xmm1, %xmm0, [[REG1:%xmm[0-9]+]]
+; CHECK: vpunpckhwd	%xmm1, [[REG1]], [[REG2:%xmm[0-9]+]]
+; CHECK: vpunpcklwd	%xmm1, [[REG1]], %x[[REG3:mm[0-9]+]]
+; CHECK: vinsertf128	$1, [[REG2]], %y[[REG3]], [[REG_result0:%ymm[0-9]+]]
+; CHECK: vpunpckhbw	%xmm1, %xmm0, [[REG4:%xmm[0-9]+]]
+; CHECK: vpunpckhwd	%xmm1, [[REG4]], [[REG5:%xmm[0-9]+]]
+; CHECK: vpunpcklwd	%xmm1, [[REG4]], %x[[REG6:mm[0-9]+]]
+; CHECK: vinsertf128	$1, [[REG5]], %y[[REG6]], [[REG_result1:%ymm[0-9]+]]
+; CHECK: vmovaps	[[REG_result1]], 32(%rdi)
+; CHECK: vmovaps	[[REG_result0]], (%rdi)
+
+  %tmp = zext <16 x i8> %a to <16 x i32>
+  store <16 x i32> %tmp, <16 x i32>*%p
+  ret void
+}
diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll
index db91961e0410..73e10127c065 100644
--- a/test/CodeGen/X86/longlong-deadload.ll
+++ b/test/CodeGen/X86/longlong-deadload.ll
@@ -2,7 +2,7 @@
 ; This should not load or store the top part of *P.
 
 define void @test(i64* %P) nounwind  {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: movl 4(%esp), %[[REGISTER:.*]]
 ; CHECK-NOT: 4(%[[REGISTER]])
 ; CHECK: ret
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index 4bd162b45294..a81ceb902ab4 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -6,7 +6,7 @@
 ; CodeGen should insert a branch into the middle of the loop in
 ; order to avoid a branch within the loop.
 
-; CHECK: simple:
+; CHECK-LABEL: simple:
 ;      CHECK:   jmp   .LBB0_1
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB0_2:
@@ -36,7 +36,7 @@ done:
 ; CodeGen should move block_a to the top of the loop so that it
 ; falls through into the loop, avoiding a branch within the loop.
 
-; CHECK: slightly_more_involved:
+; CHECK-LABEL: slightly_more_involved:
 ;      CHECK:   jmp .LBB1_1
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_4:
@@ -72,7 +72,7 @@ exit:
 ; fallthrough edges which should be preserved.
 ; "callq block_a_merge_func" is tail duped.
 
-; CHECK: yet_more_involved:
+; CHECK-LABEL: yet_more_involved:
 ;      CHECK:   jmp .LBB2_1
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB2_5:
@@ -132,7 +132,7 @@ exit:
 ; conveniently fit anywhere so that they are at least contiguous with the
 ; loop.
 
-; CHECK: cfg_islands:
+; CHECK-LABEL: cfg_islands:
 ;      CHECK:   jmp     .LBB3_1
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_7:
diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll
index d1de0510a046..d4a7ac7da12d 100644
--- a/test/CodeGen/X86/lsr-interesting-step.ll
+++ b/test/CodeGen/X86/lsr-interesting-step.ll
@@ -5,7 +5,7 @@
 
 ; CHECK:      BB0_3:
 ; CHECK-NEXT:   movb    $0, flags(%rdx)
-; CHECK-NEXT:   addq    %rcx, %rdx
+; CHECK-NEXT:   addq    %rax, %rdx
 ; CHECK-NEXT:   cmpq    $8192, %rdx
 ; CHECK-NEXT:   jl
 
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 8a81f70a8a2a..e7d74a924075 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,13 +1,13 @@
 ; RUN: llc -mtriple=x86_64-darwin -mcpu=generic < %s | FileCheck %s
 ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
-; CHECK: t:
-; CHECK: decq
-; CHECK-NEXT: movl (%r9,%rax,4), %eax
+; CHECK-LABEL: t:
+; CHECK: movl (%r9,%rax,4), %e{{..}}
+; CHECK-NEXT: decq
 ; CHECK-NEXT: jne
 
-; ATOM: t:
-; ATOM: movl (%r9,%rax,4), %eax
+; ATOM-LABEL: t:
+; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}
 ; ATOM-NEXT: decq
 ; ATOM-NEXT: jne
 
@@ -148,14 +148,14 @@ bb2:		; preds = %bb
 ; is equal to the stride.
 ; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
 
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: %for.body
 ; CHECK: incl [[IV:%e..]]
 ; CHECK: cmpl $1, [[IV]]
 ; CHECK: jne
 ; CHECK: ret
 
-; ATOM: f:
+; ATOM-LABEL: f:
 ; ATOM: %for.body
 ; ATOM: incl [[IV:%e..]]
 ; ATOM: cmpl $1, [[IV]]
@@ -190,4 +190,3 @@ for.end:                                          ; preds = %for.body, %entry
   %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
   ret i32 %bi.0.lcssa
 }
-
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 1311a73fd32c..40c041ab6b09 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-unknown"
 ; Instruction selection should use the FLAGS value from the dec for
 ; the branch. Scheduling should push the adds upwards.
 
-; CHECK: full_me_0:
+; CHECK-LABEL: full_me_0:
 ; CHECK: movsd   (%rsi), %xmm0
 ; CHECK: mulsd   (%rdx), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi)
@@ -50,7 +50,7 @@ return:
 ; would be better on x86-64, since the start value would be 0 instead of
 ; 2048.
 
-; CHECK: mostly_full_me_0:
+; CHECK-LABEL: mostly_full_me_0:
 ; CHECK: movsd   -2048(%rsi), %xmm0
 ; CHECK: mulsd   -2048(%rdx), %xmm0
 ; CHECK: movsd   %xmm0, -2048(%rdi)
@@ -96,7 +96,7 @@ return:
 ; A minor variation on mostly_full_me_0.
 ; Prefer to start the indvar at 0.
 
-; CHECK: mostly_full_me_1:
+; CHECK-LABEL: mostly_full_me_1:
 ; CHECK: movsd   (%rsi), %xmm0
 ; CHECK: mulsd   (%rdx), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi)
@@ -141,7 +141,7 @@ return:
 
 ; A slightly less minor variation on mostly_full_me_0.
 
-; CHECK: mostly_full_me_2:
+; CHECK-LABEL: mostly_full_me_2:
 ; CHECK: movsd   (%rsi), %xmm0
 ; CHECK: mulsd   (%rdx), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi)
@@ -190,7 +190,7 @@ return:
 ; cases away, but it's useful here to verify that LSR's register pressure
 ; heuristics are working as expected.
 
-; CHECK: count_me_0:
+; CHECK-LABEL: count_me_0:
 ; CHECK: movsd   (%rsi,%rax,8), %xmm0
 ; CHECK: mulsd   (%rdx,%rax,8), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi,%rax,8)
@@ -225,7 +225,7 @@ return:
 ; would not reduce register pressure.
 ; (though it would reduce register pressure inside the loop...)
 
-; CHECK: count_me_1:
+; CHECK-LABEL: count_me_1:
 ; CHECK: movsd   (%rsi,%rax,8), %xmm0
 ; CHECK: mulsd   (%rdx,%rax,8), %xmm0
 ; CHECK: movsd   %xmm0, (%rdi,%rax,8)
@@ -259,7 +259,7 @@ return:
 ; Full strength reduction doesn't save any registers here because the
 ; loop tripcount is a constant.
 
-; CHECK: count_me_2:
+; CHECK-LABEL: count_me_2:
 ; CHECK: movl    $10, %eax
 ; CHECK: align
 ; CHECK: BB6_1:
@@ -305,7 +305,7 @@ return:
 
 ; This should be fully strength-reduced to reduce register pressure.
 
-; CHECK: full_me_1:
+; CHECK-LABEL: full_me_1:
 ; CHECK: align
 ; CHECK: BB7_1:
 ; CHECK: movsd   (%rdi), %xmm0
@@ -353,7 +353,7 @@ return:
 ; This is a variation on full_me_0 in which the 0,+,1 induction variable
 ; has a non-address use, pinning that value in a register.
 
-; CHECK: count_me_3:
+; CHECK-LABEL: count_me_3:
 ; CHECK: call
 ; CHECK: movsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
 ; CHECK: mulsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
@@ -390,7 +390,7 @@ return:
 ; LSR should use only one indvar for the inner loop.
 ; rdar://7657764
 
-; CHECK: asd:
+; CHECK-LABEL: asd:
 ; CHECK: BB9_4:
 ; CHECK-NEXT: addl  (%r{{[^,]*}},%rdi,4), %e
 ; CHECK-NEXT: incq  %rdi
@@ -447,7 +447,7 @@ bb5:                                              ; preds = %bb3, %entry
 ; we don't want to leave extra induction variables around, or use an
 ; lea to compute an exit condition inside the loop:
 
-; CHECK: test:
+; CHECK-LABEL: test:
 
 ; CHECK:      BB10_4:
 ; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index b2aea90500c4..1bac790f57f9 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -10,8 +10,9 @@
 ; CHECK-NEXT: movsd
 ; CHECK-NEXT: incq %rax
 
-; ATOM: movsd .LCPI0_0(%rip), %xmm0
+
 ; ATOM: xorl  %eax, %eax
+; ATOM: movsd .LCPI0_0(%rip), %xmm0
 ; ATOM: align
 ; ATOM-NEXT: BB0_2:
 ; ATOM-NEXT: movsd A(,%rax,8)
diff --git a/test/CodeGen/X86/lzcnt.ll b/test/CodeGen/X86/lzcnt.ll
index 2faa24a9a544..ff83f8540946 100644
--- a/test/CodeGen/X86/lzcnt.ll
+++ b/test/CodeGen/X86/lzcnt.ll
@@ -8,55 +8,55 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 define i8 @t1(i8 %x) nounwind  {
 	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
 	ret i8 %tmp
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: lzcntl
 }
 
 define i16 @t2(i16 %x) nounwind  {
 	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: lzcntw
 }
 
 define i32 @t3(i32 %x) nounwind  {
 	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
 	ret i32 %tmp
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: lzcntl
 }
 
 define i64 @t4(i64 %x) nounwind  {
 	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
-; CHECK: t4:
+; CHECK-LABEL: t4:
 ; CHECK: lzcntq
 }
 
 define i8 @t5(i8 %x) nounwind  {
 	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true )
 	ret i8 %tmp
-; CHECK: t5:
+; CHECK-LABEL: t5:
 ; CHECK: lzcntl
 }
 
 define i16 @t6(i16 %x) nounwind  {
 	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 true )
 	ret i16 %tmp
-; CHECK: t6:
+; CHECK-LABEL: t6:
 ; CHECK: lzcntw
 }
 
 define i32 @t7(i32 %x) nounwind  {
 	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
 	ret i32 %tmp
-; CHECK: t7:
+; CHECK-LABEL: t7:
 ; CHECK: lzcntl
 }
 
 define i64 @t8(i64 %x) nounwind  {
 	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 true )
 	ret i64 %tmp
-; CHECK: t8:
+; CHECK-LABEL: t8:
 ; CHECK: lzcntq
 }
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
index 8e97b991d076..f04e111714ae 100644
--- a/test/CodeGen/X86/machine-cp.ll
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -4,7 +4,7 @@
 ; rdar://10640363
 define i32 @t1(i32 %a, i32 %b) nounwind  {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: je [[LABEL:.*BB.*]]
   %cmp1 = icmp eq i32 %b, 0
   br i1 %cmp1, label %while.end, label %while.body
@@ -29,7 +29,7 @@ while.end:                                        ; preds = %while.body, %entry
 ; rdar://10428165
 define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK-NOT: movdqa
   %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
   ret <8 x i16> %tmp8
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index d171fd5f1d9f..409147b1d1f1 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -8,7 +8,7 @@
 
 define fastcc i8* @t(i32 %base) nounwind {
 entry:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: leaq (%rax,%rax,4)
   %0 = zext i32 %base to i64
   %1 = getelementptr inbounds %struct.s2* null, i64 %0
@@ -43,7 +43,7 @@ declare fastcc i8* @foo(%struct.s2*) nounwind
 declare void @printf(...) nounwind
 
 define void @commute(i32 %test_case, i32 %scale) nounwind ssp {
-; CHECK: commute:
+; CHECK-LABEL: commute:
 entry:
   switch i32 %test_case, label %sw.bb307 [
     i32 1, label %sw.bb
@@ -52,13 +52,14 @@ entry:
   ]
 
 sw.bb:                                            ; preds = %entry, %entry, %entry
+; CHECK: %sw.bb
+; CHECK: imull
   %mul = mul nsw i32 %test_case, 3
   %mul20 = mul nsw i32 %mul, %scale
   br i1 undef, label %if.end34, label %sw.bb307
 
 if.end34:                                         ; preds = %sw.bb
 ; CHECK: %if.end34
-; CHECK: imull
 ; CHECK: leal
 ; CHECK-NOT: imull
   tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
@@ -82,7 +83,7 @@ sw.bb307:                                         ; preds = %sw.bb, %entry
 ; rdar://10660865
 define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: cross_mbb_phys_cse:
+; CHECK-LABEL: cross_mbb_phys_cse:
 ; CHECK: cmpl
 ; CHECK: ja
   %cmp = icmp ugt i32 %a, %b
@@ -152,7 +153,7 @@ a:
 b:
   ret i32 0
 
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: t2_global@GOTPCREL(%rip)
 ; CHECK-NOT: t2_global@GOTPCREL(%rip)
 }
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index a7b036e9b658..4a4d178f6e41 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,16 +1,13 @@
-; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
-; RUN: not grep and %t
-; RUN: not grep movz %t
-; RUN: not grep sar %t
-; RUN: not grep shl %t
-; RUN: grep add %t | count 1
-; RUN: grep inc %t | count 4
-; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 3
+; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
 
+; CHECK-LABEL: count_up
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: inc
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_up(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -39,6 +36,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: count_down
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_down(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -67,6 +69,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: count_up_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: inc
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_up_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -97,6 +104,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: count_down_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @count_down_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -127,6 +139,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_up
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_up(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -155,6 +172,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_down
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: decq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_down(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -183,6 +205,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_up_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: addq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_up_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
@@ -213,6 +240,11 @@ return:
 	ret void
 }
 
+; CHECK-LABEL: another_count_down_signed
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: decq
+; CHECK-NOT: {{and|movz|sar|shl}}
+; CHECK: jne
 define void @another_count_down_signed(double* %d, i64 %n) nounwind {
 entry:
 	br label %loop
diff --git a/test/CodeGen/X86/maskmovdqu.ll b/test/CodeGen/X86/maskmovdqu.ll
index 7796f0e9a19e..0b3334d19f89 100644
--- a/test/CodeGen/X86/maskmovdqu.ll
+++ b/test/CodeGen/X86/maskmovdqu.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
+; RUN: llc < %s -march=x86    -mattr=+sse2,-avx | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-avx | grep -i RDI
+; RUN: llc < %s -march=x86    -mattr=+avx | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+avx | grep -i RDI
 ; rdar://6573467
 
 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
diff --git a/test/CodeGen/X86/mcinst-avx-lowering.ll b/test/CodeGen/X86/mcinst-avx-lowering.ll
index 41f96e8856c9..db72e0871c8e 100644
--- a/test/CodeGen/X86/mcinst-avx-lowering.ll
+++ b/test/CodeGen/X86/mcinst-avx-lowering.ll
@@ -4,7 +4,7 @@ define i64 @t1(double %d_ivar) nounwind uwtable ssp {
 entry:
 ; CHECK: t1
   %0 = bitcast double %d_ivar to i64
-; CHECK: vmovd
+; CHECK: vmovq
 ; CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
   ret i64 %0
 }
@@ -13,7 +13,7 @@ define double @t2(i64 %d_ivar) nounwind uwtable ssp {
 entry:
 ; CHECK: t2
   %0 = bitcast i64 %d_ivar to double
-; CHECK: vmovd
+; CHECK: vmovq
 ; CHECK: encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
   ret double %0
 }
diff --git a/test/CodeGen/X86/mcinst-lowering.ll b/test/CodeGen/X86/mcinst-lowering.ll
index 1ef5a971bab3..a82cfc431ba4 100644
--- a/test/CodeGen/X86/mcinst-lowering.ll
+++ b/test/CodeGen/X86/mcinst-lowering.ll
@@ -24,3 +24,21 @@ if.end:                                           ; preds = %entry
 return:                                           ; preds = %entry
   ret i32 0
 }
+
+define i32 @f1() nounwind {
+  %ax = tail call i16 asm sideeffect "", "={ax},~{dirflag},~{fpsr},~{flags}"()
+  %conv = sext i16 %ax to i32
+  ret i32 %conv
+
+; CHECK-LABEL: f1:
+; CHECK: cwtl ## encoding: [0x98]
+}
+
+define i64 @f2() nounwind {
+  %eax = tail call i32 asm sideeffect "", "={ax},~{dirflag},~{fpsr},~{flags}"()
+  %conv = sext i32 %eax to i64
+  ret i64 %conv
+
+; CHECK-LABEL: f2:
+; CHECK: cltq ## encoding: [0x48,0x98]
+}
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index 723d1d89427e..cb0797d3eb33 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -21,10 +21,10 @@ bb:                                               ; preds = %entry
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: memcmp2:
+; CHECK-LABEL: memcmp2:
 ; CHECK: movw    ([[A0:%rdi|%rcx]]), %ax
 ; CHECK: cmpw    ([[A1:%rsi|%rdx]]), %ax
-; NOBUILTIN: memcmp2:
+; NOBUILTIN-LABEL: memcmp2:
 ; NOBUILTIN: callq
 }
 
@@ -40,7 +40,7 @@ bb:                                               ; preds = %entry
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: memcmp2a:
+; CHECK-LABEL: memcmp2a:
 ; CHECK: cmpw    $28527, ([[A0]])
 }
 
@@ -57,7 +57,7 @@ bb:                                               ; preds = %entry
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: memcmp4:
+; CHECK-LABEL: memcmp4:
 ; CHECK: movl    ([[A0]]), %eax
 ; CHECK: cmpl    ([[A1]]), %eax
 }
@@ -74,7 +74,7 @@ bb:                                               ; preds = %entry
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: memcmp4a:
+; CHECK-LABEL: memcmp4a:
 ; CHECK: cmpl $1869573999, ([[A0]])
 }
 
@@ -90,7 +90,7 @@ bb:                                               ; preds = %entry
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: memcmp8:
+; CHECK-LABEL: memcmp8:
 ; CHECK: movq    ([[A0]]), %rax
 ; CHECK: cmpq    ([[A1]]), %rax
 }
@@ -107,7 +107,7 @@ bb:                                               ; preds = %entry
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: memcmp8a:
+; CHECK-LABEL: memcmp8a:
 ; CHECK: movabsq $8029759185026510694, %rax
 ; CHECK: cmpq	%rax, ([[A0]])
 }
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 630c0ed1a33c..6ae7807810e9 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -9,28 +9,28 @@
 
 define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
-; SSE2-Darwin: t1:
+; SSE2-Darwin-LABEL: t1:
 ; SSE2-Darwin: movsd _.str+16, %xmm0
 ; SSE2-Darwin: movsd %xmm0, 16(%esp)
 ; SSE2-Darwin: movaps _.str, %xmm0
 ; SSE2-Darwin: movaps %xmm0
 ; SSE2-Darwin: movb $0, 24(%esp)
 
-; SSE2-Mingw32: t1:
+; SSE2-Mingw32-LABEL: t1:
 ; SSE2-Mingw32: movsd _.str+16, %xmm0
 ; SSE2-Mingw32: movsd %xmm0, 16(%esp)
 ; SSE2-Mingw32: movaps _.str, %xmm0
 ; SSE2-Mingw32: movups %xmm0
 ; SSE2-Mingw32: movb $0, 24(%esp)
 
-; SSE1: t1:
+; SSE1-LABEL: t1:
 ; SSE1: movaps _.str, %xmm0
 ; SSE1: movaps %xmm0
 ; SSE1: movb $0, 24(%esp)
 ; SSE1: movl $0, 20(%esp)
 ; SSE1: movl $0, 16(%esp)
 
-; NOSSE: t1:
+; NOSSE-LABEL: t1:
 ; NOSSE: movb $0
 ; NOSSE: movl $0
 ; NOSSE: movl $0
@@ -39,7 +39,7 @@ entry:
 ; NOSSE: movl $101
 ; NOSSE: movl $1734438249
 
-; X86-64: t1:
+; X86-64-LABEL: t1:
 ; X86-64: movaps _.str(%rip), %xmm0
 ; X86-64: movaps %xmm0
 ; X86-64: movb $0
@@ -55,19 +55,19 @@ entry:
 
 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2-Darwin: t2:
-; SSE2-Darwin: movaps (%eax), %xmm0
+; SSE2-Darwin-LABEL: t2:
+; SSE2-Darwin: movaps (%ecx), %xmm0
 ; SSE2-Darwin: movaps %xmm0, (%eax)
 
-; SSE2-Mingw32: t2:
-; SSE2-Mingw32: movaps (%eax), %xmm0
+; SSE2-Mingw32-LABEL: t2:
+; SSE2-Mingw32: movaps (%ecx), %xmm0
 ; SSE2-Mingw32: movaps %xmm0, (%eax)
 
-; SSE1: t2:
-; SSE1: movaps (%eax), %xmm0
+; SSE1-LABEL: t2:
+; SSE1: movaps (%ecx), %xmm0
 ; SSE1: movaps %xmm0, (%eax)
 
-; NOSSE: t2:
+; NOSSE-LABEL: t2:
 ; NOSSE: movl
 ; NOSSE: movl
 ; NOSSE: movl
@@ -79,7 +79,7 @@ entry:
 ; NOSSE: movl
 ; NOSSE: movl
 
-; X86-64: t2:
+; X86-64-LABEL: t2:
 ; X86-64: movaps (%rsi), %xmm0
 ; X86-64: movaps %xmm0, (%rdi)
   %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
@@ -90,19 +90,19 @@ entry:
 
 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2-Darwin: t3:
-; SSE2-Darwin: movsd (%eax), %xmm0
-; SSE2-Darwin: movsd 8(%eax), %xmm1
+; SSE2-Darwin-LABEL: t3:
+; SSE2-Darwin: movsd (%ecx), %xmm0
+; SSE2-Darwin: movsd 8(%ecx), %xmm1
 ; SSE2-Darwin: movsd %xmm1, 8(%eax)
 ; SSE2-Darwin: movsd %xmm0, (%eax)
 
-; SSE2-Mingw32: t3:
-; SSE2-Mingw32: movsd (%eax), %xmm0
-; SSE2-Mingw32: movsd 8(%eax), %xmm1
+; SSE2-Mingw32-LABEL: t3:
+; SSE2-Mingw32: movsd (%ecx), %xmm0
+; SSE2-Mingw32: movsd 8(%ecx), %xmm1
 ; SSE2-Mingw32: movsd %xmm1, 8(%eax)
 ; SSE2-Mingw32: movsd %xmm0, (%eax)
 
-; SSE1: t3:
+; SSE1-LABEL: t3:
 ; SSE1: movl
 ; SSE1: movl
 ; SSE1: movl
@@ -114,7 +114,7 @@ entry:
 ; SSE1: movl
 ; SSE1: movl
 
-; NOSSE: t3:
+; NOSSE-LABEL: t3:
 ; NOSSE: movl
 ; NOSSE: movl
 ; NOSSE: movl
@@ -126,7 +126,7 @@ entry:
 ; NOSSE: movl
 ; NOSSE: movl
 
-; X86-64: t3:
+; X86-64-LABEL: t3:
 ; X86-64: movq (%rsi), %rax
 ; X86-64: movq 8(%rsi), %rcx
 ; X86-64: movq %rcx, 8(%rdi)
@@ -139,7 +139,7 @@ entry:
 
 define void @t4() nounwind {
 entry:
-; SSE2-Darwin: t4:
+; SSE2-Darwin-LABEL: t4:
 ; SSE2-Darwin: movw $120
 ; SSE2-Darwin: movl $2021161080
 ; SSE2-Darwin: movl $2021161080
@@ -149,7 +149,7 @@ entry:
 ; SSE2-Darwin: movl $2021161080
 ; SSE2-Darwin: movl $2021161080
 
-; SSE2-Mingw32: t4:
+; SSE2-Mingw32-LABEL: t4:
 ; SSE2-Mingw32: movw $120
 ; SSE2-Mingw32: movl $2021161080
 ; SSE2-Mingw32: movl $2021161080
@@ -159,7 +159,7 @@ entry:
 ; SSE2-Mingw32: movl $2021161080
 ; SSE2-Mingw32: movl $2021161080
 
-; SSE1: t4:
+; SSE1-LABEL: t4:
 ; SSE1: movw $120
 ; SSE1: movl $2021161080
 ; SSE1: movl $2021161080
@@ -169,7 +169,7 @@ entry:
 ; SSE1: movl $2021161080
 ; SSE1: movl $2021161080
 
-; NOSSE: t4:
+; NOSSE-LABEL: t4:
 ; NOSSE: movw $120
 ; NOSSE: movl $2021161080
 ; NOSSE: movl $2021161080
@@ -179,7 +179,7 @@ entry:
 ; NOSSE: movl $2021161080
 ; NOSSE: movl $2021161080
 
-; X86-64: t4:
+; X86-64-LABEL: t4:
 ; X86-64: movabsq $8680820740569200760, %rax
 ; X86-64: movq %rax
 ; X86-64: movq %rax
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 3372a4adc5ee..88b6cfd2295f 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -10,7 +10,7 @@ entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
 	ret i8* %a
         
-; LINUX: test1:
+; LINUX-LABEL: test1:
 ; LINUX: memcpy
 }
 
@@ -22,7 +22,7 @@ entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
 	ret i8* %tmp14
         
-; LINUX: test2:
+; LINUX-LABEL: test2:
 ; LINUX: memcpy
 }
 
@@ -36,10 +36,10 @@ define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzon
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
-; LINUX: test3:
+; LINUX-LABEL: test3:
 ; LINUX: memcpy
 
-; DARWIN: test3:
+; DARWIN-LABEL: test3:
 ; DARWIN-NOT: memcpy
 ; DARWIN: movq
 ; DARWIN: movq
@@ -64,7 +64,7 @@ define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
-; LINUX: test4:
+; LINUX-LABEL: test4:
 ; LINUX: movq
 ; LINUX: movq
 ; LINUX: movq
@@ -87,7 +87,7 @@ entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
   ret void
 
-; DARWIN: test5:
+; DARWIN-LABEL: test5:
 ; DARWIN: movabsq	$7016996765293437281
 ; DARWIN: movabsq	$7016996765293437184
 }
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index b2bd72bb312b..d0a3c7a74bce 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -4,7 +4,7 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
 define fastcc void @t1() nounwind {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: calll _memset
   call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false)
   unreachable
@@ -12,7 +12,7 @@ entry:
 
 define fastcc void @t2(i8 signext %c) nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: calll _memset
   call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false)
   unreachable
@@ -24,7 +24,7 @@ define void @t3(i8* nocapture %s, i8 %a) nounwind {
 entry:
   tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
   ret void
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: imull $16843009
 }
 
@@ -32,7 +32,7 @@ define void @t4(i8* nocapture %s, i8 %a) nounwind {
 entry:
   tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
   ret void
-; CHECK: t4:
+; CHECK-LABEL: t4:
 ; CHECK: imull $16843009
 ; CHECK-NOT: imul
 ; CHECK: ret
diff --git a/test/CodeGen/X86/memset-sse-stack-realignment.ll b/test/CodeGen/X86/memset-sse-stack-realignment.ll
index df9de5dfaf22..d77a7ed38165 100644
--- a/test/CodeGen/X86/memset-sse-stack-realignment.ll
+++ b/test/CodeGen/X86/memset-sse-stack-realignment.ll
@@ -14,26 +14,26 @@ define void @test1(i32 %t) nounwind {
   call void @dummy(i8* %x)
   ret void
 
-; NOSSE: test1:
+; NOSSE-LABEL: test1:
 ; NOSSE-NOT: and
 ; NOSSE: movl $0
 
-; SSE1: test1:
+; SSE1-LABEL: test1:
 ; SSE1: andl $-16
 ; SSE1: movl %esp, %esi
 ; SSE1: movaps
 
-; SSE2: test1:
+; SSE2-LABEL: test1:
 ; SSE2: andl $-16
 ; SSE2: movl %esp, %esi
 ; SSE2: movaps
 
-; AVX1: test1:
+; AVX1-LABEL: test1:
 ; AVX1: andl $-32
 ; AVX1: movl %esp, %esi
 ; AVX1: vmovaps %ymm
 
-; AVX2: test1:
+; AVX2-LABEL: test1:
 ; AVX2: andl $-32
 ; AVX2: movl %esp, %esi
 ; AVX2: vmovaps %ymm
@@ -47,26 +47,26 @@ define void @test2(i32 %t) nounwind {
   call void @dummy(i8* %x)
   ret void
 
-; NOSSE: test2:
+; NOSSE-LABEL: test2:
 ; NOSSE-NOT: and
 ; NOSSE: movl $0
 
-; SSE1: test2:
+; SSE1-LABEL: test2:
 ; SSE1: andl $-16
 ; SSE1: movl %esp, %esi
 ; SSE1: movaps
 
-; SSE2: test2:
+; SSE2-LABEL: test2:
 ; SSE2: andl $-16
 ; SSE2: movl %esp, %esi
 ; SSE2: movaps
 
-; AVX1: test2:
+; AVX1-LABEL: test2:
 ; AVX1: andl $-16
 ; AVX1: movl %esp, %esi
 ; AVX1: vmovaps %xmm
 
-; AVX2: test2:
+; AVX2-LABEL: test2:
 ; AVX2: andl $-16
 ; AVX2: movl %esp, %esi
 ; AVX2: vmovaps %xmm
diff --git a/test/CodeGen/X86/merge_store.ll b/test/CodeGen/X86/merge_store.ll
new file mode 100644
index 000000000000..940688c6252f
--- /dev/null
+++ b/test/CodeGen/X86/merge_store.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
+
+define void @merge_store(i32* nocapture %a) {
+; CHECK-LABEL: merge_store:
+; CHECK: movq
+; CHECK: movq
+entry:
+  br label %for.body
+
+  for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 1, i32* %arrayidx, align 4
+  %0 = or i64 %indvars.iv, 1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %0
+  store i32 1, i32* %arrayidx2, align 4
+  %1 = or i64 %indvars.iv, 2
+  %arrayidx5 = getelementptr inbounds i32* %a, i64 %1
+  store i32 1, i32* %arrayidx5, align 4
+  %2 = or i64 %indvars.iv, 3
+  %arrayidx8 = getelementptr inbounds i32* %a, i64 %2
+  store i32 1, i32* %arrayidx8, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %3 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %3, 1000
+  br i1 %cmp, label %for.body, label %for.end
+
+  for.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index ded4b73d0931..72b69400ffa1 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -1,12 +1,14 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mtriple=i386-pc-mingw32      | FileCheck %s -check-prefix=COFF
+; RUN: llc < %s -mtriple=i386-pc-mingw32-elf  | FileCheck %s -check-prefix=ELF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i386-pc-mingw32"
 
 define void @foo1(i32 %N) nounwind {
 entry:
-; CHECK: _foo1:
-; CHECK: calll __alloca
+; COFF: _foo1:
+; COFF: calll __alloca
+; ELF: foo1:
+; ELF: calll _alloca
 	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
 	call void @bar1( i32* %tmp14 )
 	ret void
@@ -16,11 +18,16 @@ declare void @bar1(i32*)
 
 define void @foo2(i32 inreg  %N) nounwind {
 entry:
-; CHECK: _foo2:
-; CHECK: andl $-16, %esp
-; CHECK: pushl %eax
-; CHECK: calll __alloca
-; CHECK: movl	8028(%esp), %eax
+; COFF: _foo2:
+; COFF: andl $-16, %esp
+; COFF: pushl %eax
+; COFF: calll __alloca
+; COFF: movl	8028(%esp), %eax
+; ELF: foo2:
+; ELF: andl $-16, %esp
+; ELF: pushl %eax
+; ELF: calll _alloca
+; ELF: movl	8028(%esp), %eax
 	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
 	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @bar2( i32* %A2.sub, i32 %N )
diff --git a/test/CodeGen/X86/misched-balance.ll b/test/CodeGen/X86/misched-balance.ll
index 2184d9e96036..1900802ac9b3 100644
--- a/test/CodeGen/X86/misched-balance.ll
+++ b/test/CodeGen/X86/misched-balance.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
-; RUN:          -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
 ;
 ; Verify that misched resource/latency balancy heuristics are sane.
 
@@ -16,7 +15,7 @@ entry:
 ; Since mmult1 IR is already in good order, this effectively ensure
 ; the scheduler maintains source order.
 ;
-; CHECK: %for.body
+; CHECK-LABEL: %for.body
 ; CHECK-NOT: %rsp
 ; CHECK: imull 4
 ; CHECK-NOT: {{imull|rsp}}
@@ -46,7 +45,7 @@ entry:
 ; CHECK-NOT: {{imull|rsp}}
 ; CHECK: addl
 ; CHECK-NOT: {{imull|rsp}}
-; CHECK: %end
+; CHECK-LABEL: %end
 for.body:
   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
   %tmp57 = load i32* %tmp56, align 4
@@ -121,7 +120,7 @@ end:
 ; Unlike the above loop, this IR starts out bad and must be
 ; rescheduled.
 ;
-; CHECK: %for.body
+; CHECK-LABEL: %for.body
 ; CHECK-NOT: %rsp
 ; CHECK: imull 4
 ; CHECK-NOT: {{imull|rsp}}
@@ -151,7 +150,7 @@ end:
 ; CHECK-NOT: {{imull|rsp}}
 ; CHECK: addl
 ; CHECK-NOT: {{imull|rsp}}
-; CHECK: %end
+; CHECK-LABEL: %end
 define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
   i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
   i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
@@ -228,3 +227,51 @@ for.body:
 end:
   ret void
 }
+
+; A mildly interesting little block extracted from a cipher.  The
+; balanced heuristics are interesting here because we have resource,
+; latency, and register limits all at once. For now, simply check that
+; we don't use any callee-saves.
+; CHECK-LABEL: @encpc1
+; CHECK-LABEL: %entry
+; CHECK-NOT: push
+; CHECK-NOT: pop
+; CHECK: ret
+@a = external global i32, align 4
+@b = external global i32, align 4
+@c = external global i32, align 4
+@d = external global i32, align 4
+define i32 @encpc1() nounwind {
+entry:
+  %l1 = load i32* @a, align 16
+  %conv = shl i32 %l1, 8
+  %s5 = lshr i32 %l1, 8
+  %add = or i32 %conv, %s5
+  store i32 %add, i32* @b
+  %l6 = load i32* @a
+  %l7 = load i32* @c
+  %add.i = add i32 %l7, %l6
+  %idxprom.i = zext i32 %l7 to i64
+  %arrayidx.i = getelementptr inbounds i32* @d, i64 %idxprom.i
+  %l8 = load i32* %arrayidx.i
+  store i32 346, i32* @c
+  store i32 20021, i32* @d
+  %l9 = load i32* @a
+  store i32 %l8, i32* @a
+  store i32 %l9, i32* @b
+  store i32 %add.i, i32* @c
+  store i32 %l9, i32* @d
+  %cmp.i = icmp eq i32 %add.i, 0
+  %s10 = lshr i32 %l1, 16
+  %s12 = lshr i32 %l1, 24
+  %s14 = lshr i32 %l1, 30
+  br i1 %cmp.i, label %if, label %return
+if:
+  %sa = add i32 %s5, %s10
+  %sb = add i32 %sa, %s12
+  %sc = add i32 %sb, %s14
+  br label %return
+return:
+  %result = phi i32 [0, %entry], [%sc, %if]
+  ret i32 %result
+}
diff --git a/test/CodeGen/X86/misched-copy.ll b/test/CodeGen/X86/misched-copy.ll
index 0450cfb53908..4485b8a244a8 100644
--- a/test/CodeGen/X86/misched-copy.ll
+++ b/test/CodeGen/X86/misched-copy.ll
@@ -8,11 +8,11 @@
 ; MUL_HiLo PhysReg use copies should be just above the mul.
 ; MUL_HiLo PhysReg def copies should be just below the mul.
 ;
-; CHECK:      *** Final schedule for BB#1 ***
-; CHECK-NEXT: %EAX<def> = COPY
-; CHECK:      MUL32r %vreg{{[0-9]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
-; CHECK-NEXT: COPY %E{{[AD]}}X;
-; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK: *** Final schedule for BB#1 ***
+; CHECK:      %EAX<def> = COPY
+; CHECK-NEXT: MUL32r %vreg{{[0-9]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
+; CHECK-NEXT: COPY %E{{[AD]}}X
+; CHECK-NEXT: COPY %E{{[AD]}}X
 ; CHECK:      DIVSSrm
 define i64 @mulhoist(i32 %a, i32 %b) #0 {
 entry:
@@ -42,7 +42,7 @@ end:
   ret i64 %add
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !0 = metadata !{metadata !"float", metadata !1}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
diff --git a/test/CodeGen/X86/misched-fusion.ll b/test/CodeGen/X86/misched-fusion.ll
new file mode 100644
index 000000000000..859d92d6978b
--- /dev/null
+++ b/test/CodeGen/X86/misched-fusion.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -disable-lsr -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
+
+; Verify that TEST+JE are scheduled together.
+; CHECK: test_je
+; CHECK: %loop
+; CHECK: test
+; CHECK-NEXT: je
+define void @test_je() {
+entry:
+  br label %loop
+
+loop:
+  %var = phi i32* [ null, %entry ], [ %next.load, %loop1 ], [ %var, %loop2 ]
+  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [ %gep, %loop2 ]
+  br label %loop1
+
+loop1:
+  %cond = icmp eq i32* %var, null
+  %next.load = load i32** %next.ptr
+  br i1 %cond, label %loop, label %loop2
+
+loop2:                                           ; preds = %loop1
+  %gep = getelementptr inbounds i32** %next.ptr, i32 1
+  store i32* %next.load, i32** undef
+  br label %loop
+}
+
+; Verify that DEC+JE are scheduled together.
+; CHECK: dec_je
+; CHECK: %loop1
+; CHECK: dec
+; CHECK-NEXT: je
+define void @dec_je() {
+entry:
+  br label %loop
+
+loop:
+  %var = phi i32 [ 0, %entry ], [ %next.var, %loop1 ], [ %var2, %loop2 ]
+  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [ %gep, %loop2 ]
+  br label %loop1
+
+loop1:
+  %var2 = sub i32 %var, 1
+  %cond = icmp eq i32 %var2, 0
+  %next.load = load i32** %next.ptr
+  %next.var = load i32* %next.load
+  br i1 %cond, label %loop, label %loop2
+
+loop2:
+  %gep = getelementptr inbounds i32** %next.ptr, i32 1
+  store i32* %next.load, i32** undef
+  br label %loop
+}
+
+; DEC+JS should *not* be scheduled together.
+; CHECK: dec_js
+; CHECK: %loop1
+; CHECK: dec
+; CHECK: mov
+; CHECK: js
+define void @dec_js() {
+entry:
+  br label %loop2a
+
+loop2a:                                           ; preds = %loop1, %body, %entry
+  %var = phi i32 [ 0, %entry ], [ %next.var, %loop1 ], [ %var2, %loop2b ]
+  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [ %gep, %loop2b ]
+  br label %loop1
+
+loop1:                                            ; preds = %loop2a, %loop2b
+  %var2 = sub i32 %var, 1
+  %cond = icmp slt i32 %var2, 0
+  %next.load = load i32** %next.ptr
+  %next.var = load i32* %next.load
+  br i1 %cond, label %loop2a, label %loop2b
+
+loop2b:                                           ; preds = %loop1
+  %gep = getelementptr inbounds i32** %next.ptr, i32 1
+  store i32* %next.load, i32** undef
+  br label %loop2a
+}
+
+; Verify that CMP+JB are scheduled together.
+; CHECK: cmp_jb
+; CHECK: %loop1
+; CHECK: cmp
+; CHECK-NEXT: jb
+define void @cmp_jb(i32 %n) {
+entry:
+  br label %loop2a
+
+loop2a:                                           ; preds = %loop1, %body, %entry
+  %var = phi i32 [ 0, %entry ], [ %next.var, %loop1 ], [ %var2, %loop2b ]
+  %next.ptr = phi i32** [ null, %entry ], [ %next.ptr, %loop1 ], [ %gep, %loop2b ]
+  br label %loop1
+
+loop1:                                            ; preds = %loop2a, %loop2b
+  %var2 = sub i32 %var, 1
+  %cond = icmp ult i32 %var2, %n
+  %next.load = load i32** %next.ptr
+  %next.var = load i32* %next.load
+  br i1 %cond, label %loop2a, label %loop2b
+
+loop2b:                                           ; preds = %loop1
+  %gep = getelementptr inbounds i32** %next.ptr, i32 1
+  store i32* %next.load, i32** undef
+  br label %loop2a
+}
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
index 15e8a0ad6f4b..5454b7cf780a 100644
--- a/test/CodeGen/X86/misched-matmul.ll
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -3,11 +3,14 @@
 ;
 ; Verify that register pressure heuristics are working in MachineScheduler.
 ;
-; When we enable subtree scheduling heuristics on X86, we may need a
-; flag to disable it for this test case.
+; We can further reduce spills in this case with a global register
+; pressure heuristic, like sethi-ullman numbers or biasing toward
+; scheduled subtrees. However, these heuristics are marginally
+; beneficial on x86_64 and exacerbate register pressure in other
+; more complex cases.
 ;
 ; CHECK: @wrap_mul4
-; CHECK: 30 regalloc - Number of spills inserted
+; CHECK: 23 regalloc - Number of spills inserted
 
 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
 entry:
@@ -221,4 +224,4 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
index 4dc95c5e9326..23b561f6e5db 100644
--- a/test/CodeGen/X86/misched-matrix.ll
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -15,19 +15,19 @@
 ; been reordered with the stores. This tests the scheduler's cheap
 ; alias analysis ability (that doesn't require any AliasAnalysis pass).
 ;
-; TOPDOWN: %for.body
+; TOPDOWN-LABEL: %for.body
 ; TOPDOWN: movl %{{.*}}, (
 ; TOPDOWN: imull {{[0-9]*}}(
 ; TOPDOWN: movl %{{.*}}, 4(
 ; TOPDOWN: imull {{[0-9]*}}(
 ; TOPDOWN: movl %{{.*}}, 8(
 ; TOPDOWN: movl %{{.*}}, 12(
-; TOPDOWN: %for.end
+; TOPDOWN-LABEL: %for.end
 ;
 ; For -misched=ilpmin, verify that each expression subtree is
 ; scheduled independently, and that the imull/adds are interleaved.
 ;
-; ILPMIN: %for.body
+; ILPMIN-LABEL: %for.body
 ; ILPMIN: movl %{{.*}}, (
 ; ILPMIN: imull
 ; ILPMIN: imull
@@ -53,12 +53,12 @@
 ; ILPMIN: imull
 ; ILPMIN: addl
 ; ILPMIN: movl %{{.*}}, 12(
-; ILPMIN: %for.end
+; ILPMIN-LABEL: %for.end
 ;
 ; For -misched=ilpmax, verify that each expression subtree is
 ; scheduled independently, and that the imull/adds are clustered.
 ;
-; ILPMAX: %for.body
+; ILPMAX-LABEL: %for.body
 ; ILPMAX: movl %{{.*}}, (
 ; ILPMAX: imull
 ; ILPMAX: imull
@@ -84,7 +84,7 @@
 ; ILPMAX: addl
 ; ILPMAX: addl
 ; ILPMAX: movl %{{.*}}, 12(
-; ILPMAX: %for.end
+; ILPMAX-LABEL: %for.end
 
 define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
 [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index b348512b5798..3a0fb95711e5 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,8 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 1
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 2
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-64
 ;
 ; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
 ; On Darwin x86-32, v1i64 values are passed in memory.  In this example, they
@@ -15,6 +12,13 @@
 define void @t1(x86_mmx %v1) nounwind  {
 	store x86_mmx %v1, x86_mmx* @u1, align 8
 	ret void
+
+; X86-32-LABEL: t1:
+; X86-32: movq %mm0
+
+; X86-64-LABEL: t1:
+; X86-64: movdq2q %xmm0
+; X86-64: movq %mm0
 }
 
 @u2 = external global x86_mmx
@@ -23,5 +27,12 @@ define void @t2(<1 x i64> %v1) nounwind  {
         %tmp = bitcast <1 x i64> %v1 to x86_mmx
 	store x86_mmx %tmp, x86_mmx* @u2, align 8
 	ret void
+
+; X86-32-LABEL: t2:
+; X86-32: movl 4(%esp)
+; X86-32: movl 8(%esp)
+
+; X86-64-LABEL: t2:
+; X86-64: movq %rdi
 }
 
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
index a8d33f43da01..aabdd53b09d6 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
@@ -1337,3 +1339,11 @@ entry:
   %7 = extractelement <1 x i64> %6, i32 0
   ret i64 %7
 }
+
+define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
+; CHECK: cvtpi2ps
+  %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
+  ret <4 x float> %c
+}
+
+declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 206cb33494cf..9e8f5bf53363 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse4.2 -mtriple=x86_64-apple-darwin10 | FileCheck %s
 ; There are no MMX operations in bork; promoted to XMM.
 
 define void @bork(<1 x i64>* %x) {
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index bafc75444d91..c7c6e75a5071 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq | grep 32
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psllq | grep 32
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep psrad
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psrlw
+; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | FileCheck %s
 
 define i64 @t1(<1 x i64> %mm1) nounwind  {
 entry:
@@ -9,6 +7,9 @@ entry:
 	%tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 )		; <x86_mmx> [#uses=1]
         %retval1112 = bitcast x86_mmx %tmp6 to i64
 	ret i64 %retval1112
+
+; CHECK-LABEL: t1:
+; CHECK: psllq $32
 }
 
 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 
@@ -18,6 +19,9 @@ entry:
 	%tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone 		; <x86_mmx> [#uses=1]
         %retval1112 = bitcast x86_mmx %tmp7 to i64
 	ret i64 %retval1112
+
+; CHECK-LABEL: t2:
+; CHECK: psrad
 }
 
 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 
@@ -27,6 +31,9 @@ entry:
 	%tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone 		; <x86_mmx> [#uses=1]
         %retval1314 = bitcast x86_mmx %tmp8 to i64
 	ret i64 %retval1314
+
+; CHECK-LABEL: t3:
+; CHECK: psrlw
 }
 
 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 
diff --git a/test/CodeGen/X86/movbe.ll b/test/CodeGen/X86/movbe.ll
index 3d3d8cf19b79..3f459be70d2a 100644
--- a/test/CodeGen/X86/movbe.ll
+++ b/test/CodeGen/X86/movbe.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 declare i64 @llvm.bswap.i64(i64) nounwind readnone
@@ -7,30 +8,38 @@ define void @test1(i32* nocapture %x, i32 %y) nounwind {
   %bswap = call i32 @llvm.bswap.i32(i32 %y)
   store i32 %bswap, i32* %x, align 4
   ret void
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: movbel	%esi, (%rdi)
+; SLM-LABEL: test1:
+; SLM: movbel	%esi, (%rdi)
 }
 
 define i32 @test2(i32* %x) nounwind {
   %load = load i32* %x, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %load)
   ret i32 %bswap
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movbel	(%rdi), %eax
+; SLM-LABEL: test2:
+; SLM: movbel	(%rdi), %eax
 }
 
 define void @test3(i64* %x, i64 %y) nounwind {
   %bswap = call i64 @llvm.bswap.i64(i64 %y)
   store i64 %bswap, i64* %x, align 8
   ret void
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: movbeq	%rsi, (%rdi)
+; SLM-LABEL: test3:
+; SLM: movbeq	%rsi, (%rdi)
 }
 
 define i64 @test4(i64* %x) nounwind {
   %load = load i64* %x, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %load)
   ret i64 %bswap
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: movbeq	(%rdi), %rax
+; SLM-LABEL: test4:
+; SLM: movbeq	(%rdi), %rax
 }
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index bb42734833dd..71b0723c429e 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
 
 define i32 @test1() nounwind readonly {
 entry:
@@ -8,12 +8,12 @@ entry:
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
-; X32: test1:
+; X32-LABEL: test1:
 ; X32: 	movl	%gs:196, %eax
 ; X32: 	movl	(%eax), %eax
 ; X32: 	ret
 
-; X64: test1:
+; X64-LABEL: test1:
 ; X64: 	movq	%gs:320, %rax
 ; X64: 	movl	(%rax), %eax
 ; X64: 	ret
@@ -26,11 +26,11 @@ entry:
 }
 
 ; rdar://8453210
-; X32: test2:
+; X32-LABEL: test2:
 ; X32: movl	{{.*}}(%esp), %eax
 ; X32: calll	*%gs:(%eax)
 
-; X64: test2:
+; X64-LABEL: test2:
 ; X64: callq	*%gs:([[A0:%rdi|%rcx]])
 
 
@@ -45,12 +45,12 @@ entry:
   %3 = bitcast <4 x i32> %2 to <2 x i64>
   ret <2 x i64> %3
   
-; X32: pmovsxwd_1:
+; X32-LABEL: pmovsxwd_1:
 ; X32: 	movl	4(%esp), %eax
 ; X32: 	pmovsxwd	%gs:(%eax), %xmm0
 ; X32: 	ret
 
-; X64: pmovsxwd_1:
+; X64-LABEL: pmovsxwd_1:
 ; X64:	pmovsxwd	%gs:([[A0]]), %xmm0
 ; X64:	ret
 }
@@ -66,7 +66,7 @@ entry:
 	%tmp4 = add i32 %tmp1, %tmp3
 	ret i32 %tmp4
 }
-; X32: test_no_cse:
+; X32-LABEL: test_no_cse:
 ; X32: 	movl	%gs:196
 ; X32: 	movl	%fs:196
 ; X32: 	ret
diff --git a/test/CodeGen/X86/movmsk.ll b/test/CodeGen/X86/movmsk.ll
index 928ad037c1ce..25206621077b 100644
--- a/test/CodeGen/X86/movmsk.ll
+++ b/test/CodeGen/X86/movmsk.ll
@@ -83,7 +83,7 @@ define void @float_call_signbit(double %n) {
 entry:
 ; FIXME: This should also use movmskps; we don't form the FGETSIGN node
 ; in this case, though.
-; CHECK: float_call_signbit:
+; CHECK-LABEL: float_call_signbit:
 ; CHECK: movd %xmm0, %rdi
 ; FIXME
   %t0 = bitcast double %n to i64
@@ -99,7 +99,7 @@ declare void @float_call_signbit_callee(i1 zeroext)
 
 define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: movmskps
 ; CHECK-NOT: movslq
   %0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
@@ -111,7 +111,7 @@ entry:
 
 define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: movmskpd
 ; CHECK-NOT: movslq
   %0 = bitcast <4 x float> %x to <2 x double>
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
index 5048a93ad302..5e7ba37b39c0 100644
--- a/test/CodeGen/X86/ms-inline-asm.ll
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -75,7 +75,7 @@ define void @t19() nounwind {
 entry:
   call void asm sideeffect inteldialect "call $0", "r,~{dirflag},~{fpsr},~{flags}"(void ()* @t19_helper) nounwind
   ret void
-; CHECK: t19:
+; CHECK-LABEL: t19:
 ; CHECK: movl %esp, %ebp
 ; CHECK: movl ${{_?}}t19_helper, %eax
 ; CHECK: {{## InlineAsm Start|#APP}}
@@ -94,7 +94,7 @@ entry:
   call void asm sideeffect inteldialect "mov dword ptr $0, edi", "=*m,~{dirflag},~{fpsr},~{flags}"(i32** %res) nounwind
   %0 = load i32** %res, align 4
   ret i32* %0
-; CHECK: t30:
+; CHECK-LABEL: t30:
 ; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
diff --git a/test/CodeGen/X86/narrow-shl-cst.ll b/test/CodeGen/X86/narrow-shl-cst.ll
index a404f34b9caa..40b976014a77 100644
--- a/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/test/CodeGen/X86/narrow-shl-cst.ll
@@ -5,7 +5,7 @@ define i32 @test1(i32 %x) nounwind {
   %and = shl i32 %x, 10
   %shl = and i32 %and, 31744
   ret i32 %shl
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: andl $31
 ; CHECK: shll $10
 }
@@ -14,7 +14,7 @@ define i32 @test2(i32 %x) nounwind {
   %or = shl i32 %x, 10
   %shl = or i32 %or, 31744
   ret i32 %shl
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: orl $31
 ; CHECK: shll $10
 }
@@ -23,7 +23,7 @@ define i32 @test3(i32 %x) nounwind {
   %xor = shl i32 %x, 10
   %shl = xor i32 %xor, 31744
   ret i32 %shl
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: xorl $31
 ; CHECK: shll $10
 }
@@ -32,7 +32,7 @@ define i64 @test4(i64 %x) nounwind {
   %and = shl i64 %x, 40
   %shl = and i64 %and, 264982302294016
   ret i64 %shl
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: andq $241
 ; CHECK: shlq $40
 }
@@ -41,7 +41,7 @@ define i64 @test5(i64 %x) nounwind {
   %and = shl i64 %x, 40
   %shl = and i64 %and, 34084860461056
   ret i64 %shl
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: andq $31
 ; CHECK: shlq $40
 }
@@ -50,7 +50,7 @@ define i64 @test6(i64 %x) nounwind {
   %and = shl i64 %x, 32
   %shl = and i64 %and, -281474976710656
   ret i64 %shl
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: andq $-65536
 ; CHECK: shlq $32
 }
@@ -59,7 +59,7 @@ define i64 @test7(i64 %x) nounwind {
   %or = shl i64 %x, 40
   %shl = or i64 %or, 264982302294016
   ret i64 %shl
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: orq $241
 ; CHECK: shlq $40
 }
@@ -68,7 +68,7 @@ define i64 @test8(i64 %x) nounwind {
   %or = shl i64 %x, 40
   %shl = or i64 %or, 34084860461056
   ret i64 %shl
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: orq $31
 ; CHECK: shlq $40
 }
@@ -77,7 +77,7 @@ define i64 @test9(i64 %x) nounwind {
   %xor = shl i64 %x, 40
   %shl = xor i64 %xor, 264982302294016
   ret i64 %shl
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: orq $241
 ; CHECK: shlq $40
 }
@@ -86,7 +86,7 @@ define i64 @test10(i64 %x) nounwind {
   %xor = shl i64 %x, 40
   %shl = xor i64 %xor, 34084860461056
   ret i64 %shl
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: xorq $31
 ; CHECK: shlq $40
 }
@@ -95,7 +95,7 @@ define i64 @test11(i64 %x) nounwind {
   %xor = shl i64 %x, 33
   %shl = xor i64 %xor, -562949953421312
   ret i64 %shl
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK: xorq $-65536
 ; CHECK: shlq $33
 }
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index 7822453add4f..30387925b34d 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -33,7 +33,7 @@ while.end:                                        ; preds = %while.cond
 
 ; DAGCombiner shouldn't fold the sdiv (ashr) away.
 ; rdar://8636812
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK:   sarl
 
 define i32 @test2() nounwind {
diff --git a/test/CodeGen/X86/narrow_op-1.ll b/test/CodeGen/X86/narrow_op-1.ll
index 18f110821bd5..89ae3f1a3353 100644
--- a/test/CodeGen/X86/narrow_op-1.ll
+++ b/test/CodeGen/X86/narrow_op-1.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep orb | count 1
-; RUN: llc < %s -march=x86-64 | grep orb | grep 1
-; RUN: llc < %s -march=x86-64 | grep orl | count 1
-; RUN: llc < %s -march=x86-64 | grep orl | grep 16842752
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 	%struct.bf = type { i64, i16, i16, i32 }
 @bfi = common global %struct.bf zeroinitializer, align 16
@@ -12,6 +9,10 @@ entry:
 	%1 = or i32 %0, 65536
 	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	ret void
+
+; CHECK-LABEL: t1:
+; CHECK: orb $1
+; CHECK-NEXT: ret
 }
 
 define void @t2() nounwind optsize ssp {
@@ -20,4 +21,8 @@ entry:
 	%1 = or i32 %0, 16842752
 	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	ret void
+
+; CHECK-LABEL: t2:
+; CHECK: orl $16842752
+; CHECK-NEXT: ret
 }
diff --git a/test/CodeGen/X86/neg_cmp.ll b/test/CodeGen/X86/neg_cmp.ll
index 866514ed9a2f..79050720d8e7 100644
--- a/test/CodeGen/X86/neg_cmp.ll
+++ b/test/CodeGen/X86/neg_cmp.ll
@@ -4,7 +4,7 @@
 ; PR12545
 define void @f(i32 %x, i32 %y) nounwind uwtable ssp {
 entry:
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK-NOT: neg
 ; CHECK: add
   %sub = sub i32 0, %y
diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
index 57164f2bcaf9..efb02f8832e6 100644
--- a/test/CodeGen/X86/neg_fp.ll
+++ b/test/CodeGen/X86/neg_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
 ; RUN: grep xorps %t | count 1
 
 ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
diff --git a/test/CodeGen/X86/newline-and-quote.ll b/test/CodeGen/X86/newline-and-quote.ll
new file mode 100644
index 000000000000..9206e9f398eb
--- /dev/null
+++ b/test/CodeGen/X86/newline-and-quote.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s
+@"foo\22bar" = global i32 42
+; CHECK: .globl "foo\"bar"
+
+@"foo\0abar" = global i32 42
+; CHECK: .globl "foo\nbar"
diff --git a/test/CodeGen/X86/no-cmov.ll b/test/CodeGen/X86/no-cmov.ll
index 62d73b0732e7..e13edf26cad7 100644
--- a/test/CodeGen/X86/no-cmov.ll
+++ b/test/CodeGen/X86/no-cmov.ll
@@ -6,6 +6,6 @@ define i32 @test1(i32 %g, i32* %j) {
   %retval.0 = select i1 %tobool, i32 1, i32 %cmp
   ret i32 %retval.0
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NOT: cmov
 }
diff --git a/test/CodeGen/X86/no-compact-unwind.ll b/test/CodeGen/X86/no-compact-unwind.ll
index 627f7da9f707..991cd4ed7363 100644
--- a/test/CodeGen/X86/no-compact-unwind.ll
+++ b/test/CodeGen/X86/no-compact-unwind.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -disable-cfi | FileCheck %s
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -mcpu corei7 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-macosx10.8.0 -s - \
+; RUN:  | FileCheck -check-prefix=CU %s
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -mcpu corei7 \
+; RUN:  | llvm-mc -triple x86_64-apple-darwin11 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | FileCheck -check-prefix=FROM-ASM %s
 
 %"struct.dyld::MappedRanges" = type { [400 x %struct.anon], %"struct.dyld::MappedRanges"* }
 %struct.anon = type { %class.ImageLoader*, i64, i64 }
@@ -12,13 +18,15 @@ declare void @OSMemoryBarrier() optsize
 ; This compact unwind encoding indicates that we could not generate correct
 ; compact unwind encodings for this function. This then defaults to using the
 ; DWARF EH frame.
-;
-; CHECK: .section __LD,__compact_unwind,regular,debug
-; CHECK: .quad _func
-; CHECK: .long 67108864                ## Compact Unwind Encoding: 0x4000000
-; CHECK: .quad 0                       ## Personality Function
-; CHECK: .quad 0                       ## LSDA
-;
+
+; CU:      Contents of section __compact_unwind:
+; CU-NEXT: 0048 00000000 00000000 42000000 00000004
+; CU-NEXT: 0058 00000000 00000000 00000000 00000000
+
+; FROM-ASM:      Contents of section __compact_unwind:
+; FROM-ASM-NEXT: 0048 00000000 00000000 42000000 00000004
+; FROM-ASM-NEXT: 0058 00000000 00000000 00000000 00000000
+
 define void @func(%class.ImageLoader* %image) optsize ssp uwtable {
 entry:
   br label %for.cond1.preheader
diff --git a/test/CodeGen/X86/no-elf-compact-unwind.ll b/test/CodeGen/X86/no-elf-compact-unwind.ll
new file mode 100644
index 000000000000..8a15817bcfe9
--- /dev/null
+++ b/test/CodeGen/X86/no-elf-compact-unwind.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -disable-cfi | FileCheck -check-prefix=MACHO %s
+; RUN: llc < %s -mtriple x86_64-unknown-linux -disable-cfi | FileCheck -check-prefix=ELF %s
+
+; Make sure we don't generate a compact unwind for ELF.
+
+; MACHO-LABEL: _Z3barv:
+; MACHO:       __compact_unwind
+
+; ELF-LABEL:   _Z3barv:
+; ELF-NOT:     __compact_unwind
+
+@_ZTIi = external constant i8*
+
+define void @_Z3barv() uwtable {
+entry:
+  invoke void @_Z3foov()
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3)
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %catch
+  ret void
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/X86/nocx16.ll b/test/CodeGen/X86/nocx16.ll
new file mode 100644
index 000000000000..cceaac47122d
--- /dev/null
+++ b/test/CodeGen/X86/nocx16.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=-cx16 | FileCheck %s
+define void @test(i128* %a) nounwind {
+entry:
+; CHECK: __sync_val_compare_and_swap_16
+  %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst
+; CHECK: __sync_lock_test_and_set_16
+  %1 = atomicrmw xchg i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_add_16
+  %2 = atomicrmw add i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_sub_16
+  %3 = atomicrmw sub i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_and_16
+  %4 = atomicrmw and i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_nand_16
+  %5 = atomicrmw nand i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_or_16
+  %6 = atomicrmw or i128* %a, i128 1 seq_cst
+; CHECK: __sync_fetch_and_xor_16
+  %7 = atomicrmw xor i128* %a, i128 1 seq_cst
+  ret void
+}
diff --git a/test/CodeGen/X86/non-lazy-bind.ll b/test/CodeGen/X86/non-lazy-bind.ll
index f72965877ddb..546a1365f26a 100644
--- a/test/CodeGen/X86/non-lazy-bind.ll
+++ b/test/CodeGen/X86/non-lazy-bind.ll
@@ -3,7 +3,7 @@
 declare void @lazy() nonlazybind
 declare void @not()
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK:  callq _not
 ; CHECK:  callq *_lazy@GOTPCREL(%rip)
 define void @foo() nounwind {
@@ -12,14 +12,14 @@ define void @foo() nounwind {
   ret void
 }
 
-; CHECK: tail_call_regular:
+; CHECK-LABEL: tail_call_regular:
 ; CHECK:   jmp _not
 define void @tail_call_regular() nounwind {
   tail call void @not()
   ret void
 }
 
-; CHECK: tail_call_eager:
+; CHECK-LABEL: tail_call_eager:
 ; CHECK:   jmpq *_lazy@GOTPCREL(%rip)
 define void @tail_call_eager() nounwind {
   tail call void @lazy()
diff --git a/test/CodeGen/X86/nonconst-static-ev.ll b/test/CodeGen/X86/nonconst-static-ev.ll
new file mode 100644
index 000000000000..f852caeeea21
--- /dev/null
+++ b/test/CodeGen/X86/nonconst-static-ev.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -march=x86 -mtriple=x86_64-linux-gnu < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+; REQUIRES: shell
+
+@0 = global i8 extractvalue ([1 x i8] select (i1 ptrtoint (i32* @1 to i1), [1 x i8] [ i8 1 ], [1 x i8] [ i8 2 ]), 0)
+@1 = external global i32
+
+; CHECK-ERRORS: Unsupported expression in static initializer: extractvalue
+
diff --git a/test/CodeGen/X86/nonconst-static-iv.ll b/test/CodeGen/X86/nonconst-static-iv.ll
new file mode 100644
index 000000000000..8fad39bcbf72
--- /dev/null
+++ b/test/CodeGen/X86/nonconst-static-iv.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -march=x86 -mtriple=x86_64-linux-gnu < %s 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+; REQUIRES: shell
+
+@0 = global i8 insertvalue( { i8 } select (i1 ptrtoint (i32* @1 to i1), { i8 } { i8 1 }, { i8 } { i8 2 }), i8 0, 0)
+@1 = external global i32
+
+; CHECK-ERRORS: Unsupported expression in static initializer: insertvalue
+
diff --git a/test/CodeGen/X86/nosse-error1.ll b/test/CodeGen/X86/nosse-error1.ll
index cddff3f2753b..291379eeaec9 100644
--- a/test/CodeGen/X86/nosse-error1.ll
+++ b/test/CodeGen/X86/nosse-error1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
+; RUN: not  llc < %s -march=x86-64 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 ; NOSSE: {{SSE register return with SSE disabled}}
diff --git a/test/CodeGen/X86/nosse-error2.ll b/test/CodeGen/X86/nosse-error2.ll
index fc9ba010e19d..a7cee2dd8211 100644
--- a/test/CodeGen/X86/nosse-error2.ll
+++ b/test/CodeGen/X86/nosse-error2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
+; RUN: not llc < %s -march=x86 -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
 ; RUN: llc < %s -march=x86 -mcpu=i686 -mattr=+sse | FileCheck %s
 
 ; NOSSE: {{SSE register return with SSE disabled}}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 8f1eabde7423..ec35d2981a16 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin10.0"
 define void @bar() nounwind ssp {
 entry:
   %tmp = load i8** @p                             ; <i8*> [#uses=1]
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp, i1 0) ; <i64> [#uses=1]
   %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
 ; X64: movabsq $-1, [[RAX:%r..]]
 ; X64: cmpq    $-1, [[RAX]]
@@ -19,7 +19,7 @@ entry:
 cond.true:                                        ; preds = %entry
   %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
   %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
-  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
   %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
   br label %cond.end
 
@@ -33,7 +33,7 @@ cond.end:                                         ; preds = %cond.false, %cond.t
   ret void
 }
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readonly
 
 declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
 
@@ -47,7 +47,7 @@ entry:
   %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
   %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
   %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
   %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
   store i8* %call, i8** %retval
   %1 = load i8** %retval                          ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/opt-shuff-tstore.ll b/test/CodeGen/X86/opt-shuff-tstore.ll
index 3e720844c437..fc43e81bf9b5 100644
--- a/test/CodeGen/X86/opt-shuff-tstore.ll
+++ b/test/CodeGen/X86/opt-shuff-tstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s  -mattr=+sse2,+sse41 | FileCheck %s
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s  -mattr=+sse2,+sse4.1 | FileCheck %s
 
 ; CHECK: func_4_8
 ; A single memory write
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index d092916ac6fc..1b653736ad3b 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -3,7 +3,7 @@
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NOT: cmov
 ; CHECK: jle
 
@@ -37,7 +37,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; OptimizeMax should handle this case.
 ; PR7454
 
-;      CHECK: _Z18GenerateStatusPagei:
+;      CHECK-LABEL: _Z18GenerateStatusPagei:
 
 ;      CHECK:         jle
 ;  CHECK-NOT:         cmov
diff --git a/test/CodeGen/X86/or-address.ll b/test/CodeGen/X86/or-address.ll
index f866e419c304..6bea864027bd 100644
--- a/test/CodeGen/X86/or-address.ll
+++ b/test/CodeGen/X86/or-address.ll
@@ -46,7 +46,7 @@ return:                                           ; preds = %bb
   ret void
 }
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: 	movl	%{{.*}},   (%[[RDI:...]],%[[RCX:...]],4)
 ; CHECK:	movl	%{{.*}},  8(%[[RDI]],%[[RCX]],4)
 ; CHECK:	movl	%{{.*}},  4(%[[RDI]],%[[RCX]],4)
diff --git a/test/CodeGen/X86/palignr-2.ll b/test/CodeGen/X86/palignr-2.ll
index 116d4c71814a..4df9a2284cb7 100644
--- a/test/CodeGen/X86/palignr-2.ll
+++ b/test/CodeGen/X86/palignr-2.ll
@@ -7,7 +7,7 @@
 
 define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; palignr $3, %xmm1, %xmm0
   %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone
   store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
@@ -18,7 +18,7 @@ declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwin
 
 define void @t2() nounwind ssp {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; palignr $4, _b, %xmm0
   %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
   %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
index 6875fb339242..ec6564d7e2eb 100644
--- a/test/CodeGen/X86/palignr.ll
+++ b/test/CodeGen/X86/palignr.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=YONAH %s
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: pshufd
 ; CHECK-YONAH: pshufd
   %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
@@ -10,7 +10,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 }
 
 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: palignr
 ; CHECK-YONAH: shufps
   %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
@@ -18,42 +18,42 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
 }
 
 define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: palignr
   %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
 	ret <4 x i32> %C
 }
 
 define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: palignr
   %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
 	ret <4 x i32> %C
 }
 
 define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: palignr
   %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
 	ret <4 x float> %C
 }
 
 define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: palignr
   %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
 	ret <8 x i16> %C
 }
 
 define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: palignr
   %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
 	ret <8 x i16> %C
 }
 
 define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: palignr
   %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
 	ret <16 x i8> %C
@@ -64,7 +64,7 @@ define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
 ; incorrectly.  In particular, one of the operands of the palignr node
 ; was an UNDEF.)
 define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK-NOT: palignr
 ; CHECK: pshufb
   %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
diff --git a/test/CodeGen/X86/pass-three.ll b/test/CodeGen/X86/pass-three.ll
index 23005c77c13d..39ff69a43f48 100644
--- a/test/CodeGen/X86/pass-three.ll
+++ b/test/CodeGen/X86/pass-three.ll
@@ -11,6 +11,6 @@ entry:
   ret { i8*, i64, i64* } %2
 }
 
-; CHECK: copy_3:
+; CHECK-LABEL: copy_3:
 ; CHECK-NOT: (%rdi)
 ; CHECK: ret
diff --git a/test/CodeGen/X86/patchpoint.ll b/test/CodeGen/X86/patchpoint.ll
new file mode 100644
index 000000000000..d534639953b3
--- /dev/null
+++ b/test/CodeGen/X86/patchpoint.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s
+
+; Trivial patchpoint codegen
+;
+define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: trivial_patchpoint_codegen:
+; CHECK:      movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; CHECK-NEXT: nop
+; CHECK:      movq %rax, %[[REG:r.+]]
+; CHECK:      callq *%r11
+; CHECK-NEXT: nop
+; CHECK:      movq %[[REG]], %rax
+; CHECK:      ret
+  %resolveCall2 = inttoptr i64 -559038736 to i8*
+  %result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %resolveCall3 = inttoptr i64 -559038737 to i8*
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret i64 %result
+}
+
+; Caller frame metadata with stackmaps. This should not be optimized
+; as a leaf function.
+;
+; CHECK-LABEL: caller_meta_leaf
+; CHECK: subq $32, %rsp
+; CHECK: Ltmp
+; CHECK: addq $32, %rsp
+; CHECK: ret
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+  call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 4, i32 0, i64* %metadata)
+  ret void
+}
+
+; Test the webkit_jscc calling convention.
+; Two arguments will be pushed on the stack.
+; Return value in $rax.
+define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      movq %r{{.+}}, 8(%rsp)
+; CHECK:      movq %r{{.+}}, (%rsp)
+; CHECK:      Ltmp
+; CHECK-NEXT: movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; CHECK:      movq %rax, 8(%rsp)
+; CHECK:      callq
+  %resolveCall2 = inttoptr i64 -559038736 to i8*
+  %result = tail call webkit_jscc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveCall2, i32 2, i64 %p1, i64 %p2)
+  %resolveCall3 = inttoptr i64 -559038737 to i8*
+  tail call webkit_jscc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret void
+}
+
+; Test patchpoints reusing the same TargetConstant.
+; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
+; There is no way to verify this, since it depends on memory allocation.
+; But I think it's useful to include as a working example.
+define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
+entry:
+  %tmp80 = add i64 %tmp79, -16
+  %tmp81 = inttoptr i64 %tmp80 to i64*
+  %tmp82 = load i64* %tmp81, align 8
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  %tmp83 = load i64* %tmp33, align 8
+  %tmp84 = add i64 %tmp83, -24
+  %tmp85 = inttoptr i64 %tmp84 to i64*
+  %tmp86 = load i64* %tmp85, align 8
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  ret i64 10
+}
+
+; Test small patchpoints that don't emit calls.
+define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: small_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: popq
+; CHECK-NEXT: ret
+  %result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i32, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/peep-setb.ll b/test/CodeGen/X86/peep-setb.ll
index 0bab78907635..adae8acd0432 100644
--- a/test/CodeGen/X86/peep-setb.ll
+++ b/test/CodeGen/X86/peep-setb.ll
@@ -5,7 +5,7 @@ define i8 @test1(i8 %a, i8 %b) nounwind {
   %cond = zext i1 %cmp to i8
   %add = add i8 %cond, %b
   ret i8 %add
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: adcb $0
 }
 
@@ -14,7 +14,7 @@ define i32 @test2(i32 %a, i32 %b) nounwind {
   %cond = zext i1 %cmp to i32
   %add = add i32 %cond, %b
   ret i32 %add
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: adcl $0
 }
 
@@ -23,7 +23,7 @@ define i64 @test3(i64 %a, i64 %b) nounwind {
   %conv = zext i1 %cmp to i64
   %add = add i64 %conv, %b
   ret i64 %add
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: adcq $0
 }
 
@@ -32,7 +32,7 @@ define i8 @test4(i8 %a, i8 %b) nounwind {
   %cond = zext i1 %cmp to i8
   %sub = sub i8 %b, %cond
   ret i8 %sub
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: sbbb $0
 }
 
@@ -41,7 +41,7 @@ define i32 @test5(i32 %a, i32 %b) nounwind {
   %cond = zext i1 %cmp to i32
   %sub = sub i32 %b, %cond
   ret i32 %sub
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: sbbl $0
 }
 
@@ -50,7 +50,7 @@ define i64 @test6(i64 %a, i64 %b) nounwind {
   %conv = zext i1 %cmp to i64
   %sub = sub i64 %b, %conv
   ret i64 %sub
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: sbbq $0
 }
 
@@ -59,7 +59,7 @@ define i8 @test7(i8 %a, i8 %b) nounwind {
   %cond = sext i1 %cmp to i8
   %sub = sub i8 %b, %cond
   ret i8 %sub
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: adcb $0
 }
 
@@ -68,7 +68,7 @@ define i32 @test8(i32 %a, i32 %b) nounwind {
   %cond = sext i1 %cmp to i32
   %sub = sub i32 %b, %cond
   ret i32 %sub
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: adcl $0
 }
 
@@ -77,6 +77,6 @@ define i64 @test9(i64 %a, i64 %b) nounwind {
   %conv = sext i1 %cmp to i64
   %sub = sub i64 %b, %conv
   ret i64 %sub
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: adcq $0
 }
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
index a3799807b384..b3d4f585f45d 100644
--- a/test/CodeGen/X86/peep-test-3.ll
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -3,7 +3,7 @@
 
 ; LLVM should omit the testl and use the flags result from the orl.
 
-; CHECK: or:
+; CHECK-LABEL: or:
 define void @or(float* %A, i32 %IA, i32 %N) nounwind {
 entry:
   %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
@@ -22,7 +22,7 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 }
-; CHECK: xor:
+; CHECK-LABEL: xor:
 define void @xor(float* %A, i32 %IA, i32 %N) nounwind {
 entry:
   %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
@@ -41,7 +41,7 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %entry
   ret void
 }
-; CHECK: and:
+; CHECK-LABEL: and:
 define void @and(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
 entry:
   store i8 0, i8* %p
@@ -67,7 +67,7 @@ return:                                           ; preds = %entry
 
 ; Just like @and, but without the trunc+store. This should use a testb
 ; instead of an andl.
-; CHECK: test:
+; CHECK-LABEL: test:
 define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
 entry:
   store i8 0, i8* %p
diff --git a/test/CodeGen/X86/peep-test-4.ll b/test/CodeGen/X86/peep-test-4.ll
new file mode 100644
index 000000000000..884ee7c2ba28
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-4.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt | FileCheck %s
+declare void @foo(i32)
+declare void @foo64(i64)
+
+; CHECK-LABEL: neg:
+; CHECK: negl %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+define void @neg(i32 %x) nounwind {
+  %sub = sub i32 0, %x
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %sub)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: sar:
+; CHECK: sarl %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+define void @sar(i32 %x) nounwind {
+  %ashr = ashr i32 %x, 1
+  %cmp = icmp eq i32 %ashr, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %ashr)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: shr:
+; CHECK: shrl %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+define void @shr(i32 %x) nounwind {
+  %ashr = lshr i32 %x, 1
+  %cmp = icmp eq i32 %ashr, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %ashr)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: shri:
+; CHECK: shrl $3, %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+define void @shri(i32 %x) nounwind {
+  %ashr = lshr i32 %x, 3
+  %cmp = icmp eq i32 %ashr, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %ashr)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: shl:
+; CHECK: addl %edi, %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+define void @shl(i32 %x) nounwind {
+  %shl = shl i32 %x, 1
+  %cmp = icmp eq i32 %shl, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %shl)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: shli:
+; CHECK: shll $4, %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+define void @shli(i32 %x) nounwind {
+  %shl = shl i32 %x, 4
+  %cmp = icmp eq i32 %shl, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %shl)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: adc:
+; CHECK: movabsq $-9223372036854775808, %rax
+; CHECK-NEXT: addq  %rdi, %rax
+; CHECK-NEXT: adcq  $0, %rsi
+; CHECK-NEXT: sete  %al
+; CHECK: ret
+define zeroext i1 @adc(i128 %x) nounwind {
+  %add = add i128 %x, 9223372036854775808
+  %cmp = icmp ult i128 %add, 18446744073709551616
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: sbb:
+; CHECK: cmpq  %rdx, %rdi
+; CHECK-NEXT: sbbq  %rcx, %rsi
+; CHECK-NEXT: setns %al
+; CHECK: ret
+define zeroext i1 @sbb(i128 %x, i128 %y) nounwind {
+  %sub = sub i128 %x, %y
+  %cmp = icmp sge i128 %sub, 0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: andn:
+; CHECK: andnl   %esi, %edi, %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+define void @andn(i32 %x, i32 %y) nounwind {
+  %not = xor i32 %x, -1
+  %andn = and i32 %y, %not
+  %cmp = icmp eq i32 %andn, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %andn)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: bextr:
+; CHECK: bextrl   %esi, %edi, %edi
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
+define void @bextr(i32 %x, i32 %y) nounwind {
+  %bextr = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
+  %cmp = icmp eq i32 %bextr, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %bextr)
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: popcnt:
+; CHECK: popcntl
+; CHECK-NEXT: je
+; CHECK: jmp foo
+; CHECK: ret
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+define void @popcnt(i32 %x) nounwind {
+  %popcnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  %cmp = icmp eq i32 %popcnt, 0
+  br i1 %cmp, label %return, label %bb
+;
+bb:
+  tail call void @foo(i32 %popcnt)
+  br label %return
+;
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
index 606a9be68bd4..f73ebb944dcd 100644
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse4.1 | FileCheck %s
 ; CHECK: pshufd $3, %xmm0, %xmm0
 
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse41 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse4.1 | FileCheck %s -check-prefix=WIN64
 ; %a is passed indirectly on Win64.
 ; WIN64: movss   12(%rcx), %xmm0
 
diff --git a/test/CodeGen/X86/phaddsub.ll b/test/CodeGen/X86/phaddsub.ll
index 62d85f7ee7c7..17e7e1dfdcf7 100644
--- a/test/CodeGen/X86/phaddsub.ll
+++ b/test/CodeGen/X86/phaddsub.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+ssse3,-avx | FileCheck %s -check-prefix=SSSE3
 ; RUN: llc < %s -march=x86-64 -mattr=-ssse3,+avx | FileCheck %s -check-prefix=AVX
 
-; SSSE3: phaddw1:
+; SSSE3-LABEL: phaddw1:
 ; SSSE3-NOT: vphaddw
 ; SSSE3: phaddw
-; AVX: phaddw1:
+; AVX-LABEL: phaddw1:
 ; AVX: vphaddw
 define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) {
   %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -13,10 +13,10 @@ define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) {
   ret <8 x i16> %r
 }
 
-; SSSE3: phaddw2:
+; SSSE3-LABEL: phaddw2:
 ; SSSE3-NOT: vphaddw
 ; SSSE3: phaddw
-; AVX: phaddw2:
+; AVX-LABEL: phaddw2:
 ; AVX: vphaddw
 define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) {
   %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14>
@@ -25,10 +25,10 @@ define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) {
   ret <8 x i16> %r
 }
 
-; SSSE3: phaddd1:
+; SSSE3-LABEL: phaddd1:
 ; SSSE3-NOT: vphaddd
 ; SSSE3: phaddd
-; AVX: phaddd1:
+; AVX-LABEL: phaddd1:
 ; AVX: vphaddd
 define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) {
   %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -37,10 +37,10 @@ define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phaddd2:
+; SSSE3-LABEL: phaddd2:
 ; SSSE3-NOT: vphaddd
 ; SSSE3: phaddd
-; AVX: phaddd2:
+; AVX-LABEL: phaddd2:
 ; AVX: vphaddd
 define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) {
   %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
@@ -49,10 +49,10 @@ define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phaddd3:
+; SSSE3-LABEL: phaddd3:
 ; SSSE3-NOT: vphaddd
 ; SSSE3: phaddd
-; AVX: phaddd3:
+; AVX-LABEL: phaddd3:
 ; AVX: vphaddd
 define <4 x i32> @phaddd3(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
@@ -61,10 +61,10 @@ define <4 x i32> @phaddd3(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phaddd4:
+; SSSE3-LABEL: phaddd4:
 ; SSSE3-NOT: vphaddd
 ; SSSE3: phaddd
-; AVX: phaddd4:
+; AVX-LABEL: phaddd4:
 ; AVX: vphaddd
 define <4 x i32> @phaddd4(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -73,10 +73,10 @@ define <4 x i32> @phaddd4(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phaddd5:
+; SSSE3-LABEL: phaddd5:
 ; SSSE3-NOT: vphaddd
 ; SSSE3: phaddd
-; AVX: phaddd5:
+; AVX-LABEL: phaddd5:
 ; AVX: vphaddd
 define <4 x i32> @phaddd5(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
@@ -85,10 +85,10 @@ define <4 x i32> @phaddd5(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phaddd6:
+; SSSE3-LABEL: phaddd6:
 ; SSSE3-NOT: vphaddd
 ; SSSE3: phaddd
-; AVX: phaddd6:
+; AVX-LABEL: phaddd6:
 ; AVX: vphaddd
 define <4 x i32> @phaddd6(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@@ -97,10 +97,10 @@ define <4 x i32> @phaddd6(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phaddd7:
+; SSSE3-LABEL: phaddd7:
 ; SSSE3-NOT: vphaddd
 ; SSSE3: phaddd
-; AVX: phaddd7:
+; AVX-LABEL: phaddd7:
 ; AVX: vphaddd
 define <4 x i32> @phaddd7(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
@@ -109,10 +109,10 @@ define <4 x i32> @phaddd7(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phsubw1:
+; SSSE3-LABEL: phsubw1:
 ; SSSE3-NOT: vphsubw
 ; SSSE3: phsubw
-; AVX: phsubw1:
+; AVX-LABEL: phsubw1:
 ; AVX: vphsubw
 define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) {
   %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -121,10 +121,10 @@ define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) {
   ret <8 x i16> %r
 }
 
-; SSSE3: phsubd1:
+; SSSE3-LABEL: phsubd1:
 ; SSSE3-NOT: vphsubd
 ; SSSE3: phsubd
-; AVX: phsubd1:
+; AVX-LABEL: phsubd1:
 ; AVX: vphsubd
 define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) {
   %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -133,10 +133,10 @@ define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phsubd2:
+; SSSE3-LABEL: phsubd2:
 ; SSSE3-NOT: vphsubd
 ; SSSE3: phsubd
-; AVX: phsubd2:
+; AVX-LABEL: phsubd2:
 ; AVX: vphsubd
 define <4 x i32> @phsubd2(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
@@ -145,10 +145,10 @@ define <4 x i32> @phsubd2(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phsubd3:
+; SSSE3-LABEL: phsubd3:
 ; SSSE3-NOT: vphsubd
 ; SSSE3: phsubd
-; AVX: phsubd3:
+; AVX-LABEL: phsubd3:
 ; AVX: vphsubd
 define <4 x i32> @phsubd3(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -157,10 +157,10 @@ define <4 x i32> @phsubd3(<4 x i32> %x) {
   ret <4 x i32> %r
 }
 
-; SSSE3: phsubd4:
+; SSSE3-LABEL: phsubd4:
 ; SSSE3-NOT: vphsubd
 ; SSSE3: phsubd
-; AVX: phsubd4:
+; AVX-LABEL: phsubd4:
 ; AVX: vphsubd
 define <4 x i32> @phsubd4(<4 x i32> %x) {
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index 2a20e7ad6f15..6eb97c3cd7ab 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -7,7 +7,7 @@
 ; 336L		%vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15
 
 define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 entry:
   %j.03 = add i32 %bbSize, -1                     ; <i32> [#uses=2]
   %0 = icmp sgt i32 %j.03, -1                     ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index fc0630991c9c..7bb127eae930 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -11,7 +11,7 @@ entry:
     store i32 %tmp.s, i32* @dst
     ret void
     
-; LINUX:    test0:
+; LINUX-LABEL:    test0:
 ; LINUX:	calll	.L0$pb
 ; LINUX-NEXT: .L0$pb:
 ; LINUX-NEXT:	popl
@@ -33,7 +33,7 @@ entry:
     store i32 %tmp.s, i32* @dst2
     ret void
     
-; LINUX: test1:
+; LINUX-LABEL: test1:
 ; LINUX:	calll	.L1$pb
 ; LINUX-NEXT: .L1$pb:
 ; LINUX-NEXT:	popl
@@ -51,7 +51,7 @@ define void @test2() nounwind {
 entry:
     %ptr = call i8* @malloc(i32 40)
     ret void
-; LINUX: test2:
+; LINUX-LABEL: test2:
 ; LINUX: 	pushl	%ebx
 ; LINUX-NEXT: 	subl	$8, %esp
 ; LINUX-NEXT: 	calll	.L2$pb
@@ -74,7 +74,7 @@ entry:
     %tmp1 = load void(...)** @pfoo
     call void(...)* %tmp1()
     ret void
-; LINUX: test3:
+; LINUX-LABEL: test3:
 ; LINUX: 	calll	.L3$pb
 ; LINUX-NEXT: .L3$pb:
 ; LINUX: 	popl
@@ -90,7 +90,7 @@ define void @test4() nounwind {
 entry:
     call void(...)* @foo()
     ret void
-; LINUX: test4:
+; LINUX-LABEL: test4:
 ; LINUX: calll	.L4$pb
 ; LINUX: popl	%ebx
 ; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx
@@ -111,7 +111,7 @@ entry:
     store i32 %tmp.s, i32* @dst6
     ret void
     
-; LINUX: test5:
+; LINUX-LABEL: test5:
 ; LINUX: 	calll	.L5$pb
 ; LINUX-NEXT: .L5$pb:
 ; LINUX-NEXT: 	popl	%eax
@@ -133,7 +133,7 @@ entry:
 
 ; LINUX: .LCPI6_0:
 
-; LINUX: test6:
+; LINUX-LABEL: test6:
 ; LINUX:    calll .L6$pb
 ; LINUX: .L6$pb:
 ; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), 
@@ -185,7 +185,7 @@ bb12:
     tail call void(...)* @foo6()
     ret void
     
-; LINUX: test7:
+; LINUX-LABEL: test7:
 ; LINUX:   calll	.L7$pb
 ; LINUX: .L7$pb:
 ; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll
index 16e9c28fcdef..f0e468f53cb3 100644
--- a/test/CodeGen/X86/pmovext.ll
+++ b/test/CodeGen/X86/pmovext.ll
@@ -2,7 +2,7 @@
 
 ; rdar://11897677
 
-;CHECK: intrin_pmov
+;CHECK-LABEL: intrin_pmov:
 ;CHECK: pmovzxbw  (%{{.*}}), %xmm0
 ;CHECK-NEXT: movdqu
 ;CHECK-NEXT: ret
@@ -18,5 +18,28 @@ define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable
 }
 
 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
-
 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+; rdar://15245794
+
+define <4 x i32> @foo0(double %v.coerce) nounwind ssp {
+; CHECK-LABEL: foo0
+; CHECK: pmovzxwd %xmm0, %xmm0
+; CHECK-NEXT: ret
+  %tmp = bitcast double %v.coerce to <4 x i16>
+  %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind
+  ret <4 x i32> %tmp2
+}
+
+define <8 x i16> @foo1(double %v.coerce) nounwind ssp {
+; CHECK-LABEL: foo1
+; CHECK: pmovzxbw %xmm0, %xmm0
+; CHECK-NEXT: ret
+  %tmp = bitcast double %v.coerce to <8 x i8>
+  %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1)
+  ret <8 x i16> %tmp2
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll
index d8c27f25043a..07979f61ddd8 100644
--- a/test/CodeGen/X86/pmovsx-inreg.ll
+++ b/test/CodeGen/X86/pmovsx-inreg.ll
@@ -12,13 +12,13 @@ define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
   store <2 x i64> %sext, <2 x i64>* %out, align 8
   ret void
 
-; SSE41: test1:
+; SSE41-LABEL: test1:
 ; SSE41: pmovsxbq
 
-; AVX1: test1:
+; AVX1-LABEL: test1:
 ; AVX1: vpmovsxbq
 
-; AVX2: test1:
+; AVX2-LABEL: test1:
 ; AVX2: vpmovsxbq
 }
 
@@ -29,7 +29,7 @@ define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
   store <4 x i64> %sext, <4 x i64>* %out, align 8
   ret void
 
-; AVX2: test2:
+; AVX2-LABEL: test2:
 ; AVX2: vpmovsxbq
 }
 
@@ -40,13 +40,13 @@ define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
   store <4 x i32> %sext, <4 x i32>* %out, align 8
   ret void
 
-; SSE41: test3:
+; SSE41-LABEL: test3:
 ; SSE41: pmovsxbd
 
-; AVX1: test3:
+; AVX1-LABEL: test3:
 ; AVX1: vpmovsxbd
 
-; AVX2: test3:
+; AVX2-LABEL: test3:
 ; AVX2: vpmovsxbd
 }
 
@@ -57,7 +57,7 @@ define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
   store <8 x i32> %sext, <8 x i32>* %out, align 8
   ret void
 
-; AVX2: test4:
+; AVX2-LABEL: test4:
 ; AVX2: vpmovsxbd
 }
 
@@ -68,13 +68,13 @@ define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
   store <8 x i16> %sext, <8 x i16>* %out, align 8
   ret void
 
-; SSE41: test5:
+; SSE41-LABEL: test5:
 ; SSE41: pmovsxbw
 
-; AVX1: test5:
+; AVX1-LABEL: test5:
 ; AVX1: vpmovsxbw
 
-; AVX2: test5:
+; AVX2-LABEL: test5:
 ; AVX2: vpmovsxbw
 }
 
@@ -85,9 +85,8 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
   store <16 x i16> %sext, <16 x i16>* %out, align 8
   ret void
 
-; AVX2: test6:
-; FIXME: v16i8 -> v16i16 is scalarized.
-; AVX2-NOT: pmovsx
+; AVX2-LABEL: test6:
+; AVX2: vpmovsxbw
 }
 
 define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
@@ -98,13 +97,13 @@ define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
   ret void
 
 
-; SSE41: test7:
+; SSE41-LABEL: test7:
 ; SSE41: pmovsxwq
 
-; AVX1: test7:
+; AVX1-LABEL: test7:
 ; AVX1: vpmovsxwq
 
-; AVX2: test7:
+; AVX2-LABEL: test7:
 ; AVX2: vpmovsxwq
 }
 
@@ -115,7 +114,7 @@ define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
   store <4 x i64> %sext, <4 x i64>* %out, align 8
   ret void
 
-; AVX2: test8:
+; AVX2-LABEL: test8:
 ; AVX2: vpmovsxwq
 }
 
@@ -126,13 +125,13 @@ define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
   store <4 x i32> %sext, <4 x i32>* %out, align 8
   ret void
 
-; SSE41: test9:
+; SSE41-LABEL: test9:
 ; SSE41: pmovsxwd
 
-; AVX1: test9:
+; AVX1-LABEL: test9:
 ; AVX1: vpmovsxwd
 
-; AVX2: test9:
+; AVX2-LABEL: test9:
 ; AVX2: vpmovsxwd
 }
 
@@ -143,7 +142,7 @@ define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
   store <8 x i32> %sext, <8 x i32>* %out, align 8
   ret void
 
-; AVX2: test10:
+; AVX2-LABEL: test10:
 ; AVX2: vpmovsxwd
 }
 
@@ -154,13 +153,13 @@ define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
   store <2 x i64> %sext, <2 x i64>* %out, align 8
   ret void
 
-; SSE41: test11:
+; SSE41-LABEL: test11:
 ; SSE41: pmovsxdq
 
-; AVX1: test11:
+; AVX1-LABEL: test11:
 ; AVX1: vpmovsxdq
 
-; AVX2: test11:
+; AVX2-LABEL: test11:
 ; AVX2: vpmovsxdq
 }
 
@@ -171,6 +170,6 @@ define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
   store <4 x i64> %sext, <4 x i64>* %out, align 8
   ret void
 
-; AVX2: test12:
+; AVX2-LABEL: test12:
 ; AVX2: vpmovsxdq
 }
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index da4af81959dc..7bf8a618fa77 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
+; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
 ; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 11
+; RUN: grep mov %t | count 14
 
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
         %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
diff --git a/test/CodeGen/X86/pmulld.ll b/test/CodeGen/X86/pmulld.ll
index be527aed9a98..3db0f73954d7 100644
--- a/test/CodeGen/X86/pmulld.ll
+++ b/test/CodeGen/X86/pmulld.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NEXT: pmulld
 
-; WIN64: test1:
+; WIN64-LABEL: test1:
 ; WIN64-NEXT: movdqa  (%rcx), %xmm0
 ; WIN64-NEXT: pmulld  (%rdx), %xmm0
   %C = mul <4 x i32> %A, %B
@@ -13,10 +13,10 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 }
 
 define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
-; CHECK: test1a:
+; CHECK-LABEL: test1a:
 ; CHECK-NEXT: pmulld
 
-; WIN64: test1a:
+; WIN64-LABEL: test1a:
 ; WIN64-NEXT: movdqa  (%rcx), %xmm0
 ; WIN64-NEXT: pmulld  (%rdx), %xmm0
 
diff --git a/test/CodeGen/X86/popcnt.ll b/test/CodeGen/X86/popcnt.ll
index 430214c73b13..e9350de101f6 100644
--- a/test/CodeGen/X86/popcnt.ll
+++ b/test/CodeGen/X86/popcnt.ll
@@ -3,7 +3,7 @@
 define i8 @cnt8(i8 %x) nounwind readnone {
   %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
   ret i8 %cnt
-; CHECK: cnt8:
+; CHECK-LABEL: cnt8:
 ; CHECK: popcntw
 ; CHECK: ret
 }
@@ -11,7 +11,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
 define i16 @cnt16(i16 %x) nounwind readnone {
   %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
   ret i16 %cnt
-; CHECK: cnt16:
+; CHECK-LABEL: cnt16:
 ; CHECK: popcntw
 ; CHECK: ret
 }
@@ -19,7 +19,7 @@ define i16 @cnt16(i16 %x) nounwind readnone {
 define i32 @cnt32(i32 %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
-; CHECK: cnt32:
+; CHECK-LABEL: cnt32:
 ; CHECK: popcntl
 ; CHECK: ret
 }
@@ -27,7 +27,7 @@ define i32 @cnt32(i32 %x) nounwind readnone {
 define i64 @cnt64(i64 %x) nounwind readnone {
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
-; CHECK: cnt64:
+; CHECK-LABEL: cnt64:
 ; CHECK: popcntq
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 01d6cbef1ee5..946b8362122d 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -16,7 +16,7 @@
 @.str24 = external constant [4 x i8], align 1     ; <[4 x i8]*> [#uses=1]
 
 define i32 @t1(i32 %c, i8** nocapture %v) nounwind ssp {
-; X86-32: t1:
+; X86-32-LABEL: t1:
 entry:
   br i1 undef, label %bb, label %bb3
 
@@ -146,7 +146,7 @@ declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
 @map_4_to_16 = external constant [16 x i16], align 32 ; <[16 x i16]*> [#uses=2]
 
 define void @t2(i8* nocapture %bufp, i8* nocapture %data, i32 %dsize) nounwind ssp {
-; X86-64: t2:
+; X86-64-LABEL: t2:
 entry:
   br i1 undef, label %return, label %bb.nph
 
diff --git a/test/CodeGen/X86/pr10523.ll b/test/CodeGen/X86/pr10523.ll
index 7191d6949c18..0ec22a08e440 100644
--- a/test/CodeGen/X86/pr10523.ll
+++ b/test/CodeGen/X86/pr10523.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr10524.ll b/test/CodeGen/X86/pr10524.ll
index ed3e7c528052..12bdba9fa595 100644
--- a/test/CodeGen/X86/pr10524.ll
+++ b/test/CodeGen/X86/pr10524.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr10525.ll b/test/CodeGen/X86/pr10525.ll
index 342c1d63e192..30ce2979e8e1 100644
--- a/test/CodeGen/X86/pr10525.ll
+++ b/test/CodeGen/X86/pr10525.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr10526.ll b/test/CodeGen/X86/pr10526.ll
index 6963fe515898..9fa83ce17b55 100644
--- a/test/CodeGen/X86/pr10526.ll
+++ b/test/CodeGen/X86/pr10526.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
 
 ; No check in a crash test
 
diff --git a/test/CodeGen/X86/pr12312.ll b/test/CodeGen/X86/pr12312.ll
index 087b8d7539ec..81aaf91f2688 100644
--- a/test/CodeGen/X86/pr12312.ll
+++ b/test/CodeGen/X86/pr12312.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix SSE41
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix AVX
 
 define i32 @veccond128(<4 x i32> %input) {
diff --git a/test/CodeGen/X86/pr12360.ll b/test/CodeGen/X86/pr12360.ll
index f29e50e29a3b..8b30596cd8ac 100644
--- a/test/CodeGen/X86/pr12360.ll
+++ b/test/CodeGen/X86/pr12360.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
 define zeroext i1 @f1(i8* %x) {
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: movb	(%rdi), %al
 ; CHECK-NEXT: ret
 
@@ -12,7 +12,7 @@ entry:
 }
 
 define zeroext i1 @f2(i8* %x) {
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: movb	(%rdi), %al
 ; CHECK-NEXT: ret
 
@@ -27,7 +27,7 @@ entry:
 
 ; check that we don't build a "trunc" from i1 to i1, which would assert.
 define zeroext i1 @f3(i1 %x) {
-; CHECK: f3:
+; CHECK-LABEL: f3:
 
 entry:
   %tobool = icmp ne i1 %x, 0
@@ -36,7 +36,7 @@ entry:
 
 ; check that we don't build a trunc when other bits are needed
 define zeroext i1 @f4(i32 %x) {
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: and
 
 entry:
diff --git a/test/CodeGen/X86/pr13209.ll b/test/CodeGen/X86/pr13209.ll
index 1c9316365959..8e5eca2b2c21 100644
--- a/test/CodeGen/X86/pr13209.ll
+++ b/test/CodeGen/X86/pr13209.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
 
-; CHECK: pr13209:
+; CHECK-LABEL: pr13209:
 ; CHECK-NOT: mov
 ; CHECK: .size pr13209
 
diff --git a/test/CodeGen/X86/pr14088.ll b/test/CodeGen/X86/pr14088.ll
index 505e3b5cf262..16f20d0500a1 100644
--- a/test/CodeGen/X86/pr14088.ll
+++ b/test/CodeGen/X86/pr14088.ll
@@ -19,7 +19,14 @@ return:
   ret i32 %retval.0
 }
 
-; We were miscompiling this and using %ax instead of %cx in the movw.
-; CHECK: movswl	%cx, %ecx
-; CHECK: movw	%cx, (%rsi)
-; CHECK: movslq	%ecx, %rcx
+; We were miscompiling this and using %ax instead of %cx in the movw
+; in the following sequence:
+;	movswl	%cx, %ecx
+;	movw	%cx, (%rsi)
+;	movslq	%ecx, %rcx
+;
+; We can't produce the above sequence without special SD-level
+; heuristics. Now we produce this:
+; CHECK: movw	%ax, (%rsi)
+; CHECK: cwtl
+; CHECK: cltq
diff --git a/test/CodeGen/X86/pr14090.ll b/test/CodeGen/X86/pr14090.ll
index d76b912fd8e2..2f7c720386be 100644
--- a/test/CodeGen/X86/pr14090.ll
+++ b/test/CodeGen/X86/pr14090.ll
@@ -48,11 +48,11 @@ entry:
   %fifteen = bitcast i64* %retval.i.i to i32**
   %sixteen = bitcast i64* %retval.i.i to i8*
   call void @llvm.lifetime.start(i64 8, i8* %sixteen)
-  store i32* %.ph.i80, i32** %fifteen, align 8, !tbaa !0
+  store i32* %.ph.i80, i32** %fifteen, align 8
   %sunkaddr = ptrtoint i64* %retval.i.i to i32
   %sunkaddr86 = add i32 %sunkaddr, 4
   %sunkaddr87 = inttoptr i32 %sunkaddr86 to i32*
-  store i32 %fourteen, i32* %sunkaddr87, align 4, !tbaa !3
+  store i32 %fourteen, i32* %sunkaddr87, align 4
   %seventeen = load i64* %retval.i.i, align 8
   call void @llvm.lifetime.end(i64 8, i8* %sixteen)
   %eighteen = lshr i64 %seventeen, 32
@@ -68,9 +68,3 @@ entry:
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
-!4 = metadata !{metadata !"vtable pointer", metadata !2}
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 9b0ef83ab042..c348fec54674 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -57,11 +57,10 @@ entry:
 	%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
 ; reload:
-; CHECK: fld
-; CHECK: fstps
 ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
 	%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+; CHECK: fld
 ; CHECK: fstpl
 ; CHECK: ZNSolsEd
 	%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
diff --git a/test/CodeGen/X86/pr16031.ll b/test/CodeGen/X86/pr16031.ll
new file mode 100644
index 000000000000..ecf6218aeb38
--- /dev/null
+++ b/test/CodeGen/X86/pr16031.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s
+
+; CHECK-LABEL: main:
+; CHECK: pushl %esi
+; CHECK-NEXT: movl  $-12, %eax
+; CHECK-NEXT: movl  $-1, %edx
+; CHECK-NEXT: testb $1, 8(%esp)
+; CHECK-NEXT: cmovel    %edx, %eax
+; CHECK-NEXT: xorl  %ecx, %ecx
+; CHECK-NEXT: movl  %eax, %esi
+; CHECK-NEXT: addl  $-1, %esi
+; CHECK-NEXT: movl  $-1, %esi
+; CHECK-NEXT: adcl  $-1, %esi
+; CHECK-NEXT: cmovsl    %ecx, %eax
+; CHECK-NEXT: cmovsl    %ecx, %edx
+; CHECK-NEXT: popl  %esi
+define i64 @main(i1 %tobool1) nounwind {
+entry:
+  %0 = zext i1 %tobool1 to i32
+  %. = xor i32 %0, 1
+  %.21 = select i1 %tobool1, i32 -12, i32 -1
+  %conv = sext i32 %.21 to i64
+  %1 = add i64 %conv, -1
+  %cmp10 = icmp slt i64 %1, 0
+  %sub17 = select i1 %cmp10, i64 0, i64 %conv
+  ret i64 %sub17
+}
diff --git a/test/CodeGen/X86/pr16360.ll b/test/CodeGen/X86/pr16360.ll
new file mode 100644
index 000000000000..1f73a4d43600
--- /dev/null
+++ b/test/CodeGen/X86/pr16360.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mcpu=pentium4 -mtriple=i686-pc-linux | FileCheck %s
+
+define i64 @foo(i32 %sum) {
+entry:
+  %conv = sext i32 %sum to i64
+  %shr = lshr i64 %conv, 2
+  %or = or i64 4611686018360279040, %shr
+  ret i64 %or
+}
+
+; CHECK: foo
+; CHECK: shrl $2
+; CHECK: orl $-67108864
+; CHECK-NOT: movl $-1
+; CHECK: movl $1073741823
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr16807.ll b/test/CodeGen/X86/pr16807.ll
new file mode 100644
index 000000000000..6d55d99a6ac4
--- /dev/null
+++ b/test/CodeGen/X86/pr16807.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core-avx-i | FileCheck %s
+
+define <16 x i16> @f_fu(<16 x i16> %bf) {
+allocas:
+  %avg.i.i = sdiv <16 x i16> %bf, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <16 x i16> %avg.i.i
+}
+
+; CHECK: f_fu
+; CHECK: psraw
+; CHECK: psrlw
+; CHECK: paddw
+; CHECK: psraw
+; CHECK: psraw
+; CHECK: psrlw
+; CHECK: paddw
+; CHECK: psraw
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr17546.ll b/test/CodeGen/X86/pr17546.ll
new file mode 100644
index 000000000000..174fa5ca3fcf
--- /dev/null
+++ b/test/CodeGen/X86/pr17546.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core-avx2 | FileCheck %s
+
+define i32 @f_f___un_3C_unf_3E_un_3C_unf_3E_(<8 x i32> %__mask, i64 %BBBB) {
+  %QQQ = trunc i64 %BBBB to i32
+  %1 = extractelement <8 x i32> %__mask, i32 %QQQ
+  ret i32 %1
+}
+
+; CHECK: f_f___un_3C_unf_3E_un_3C_unf_3E_
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll
new file mode 100644
index 000000000000..98f951f1b10c
--- /dev/null
+++ b/test/CodeGen/X86/pr17631.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s
+
+%struct_type = type { [64 x <8 x float>], <8 x float> }
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
+
+; Function Attrs: nounwind
+define i32 @equal(<8 x i32> %A) {
+allocas:
+  %first_alloc  = alloca [64 x <8 x i32>]
+  %second_alloc = alloca %struct_type
+
+  %A1 = bitcast <8 x i32> %A to <8 x float>
+  %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
+  ret i32 %A2
+}
+
+; CHECK: equal
+; CHECK-NOT: vzeroupper
+; CHECK: _chkstk
+; CHECK: ret
+
+define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
+  %i = fptoui double %x to i64
+  store i64 %i, i64* %p
+  %ret = fadd <8 x float> %y, %y
+  ret <8 x float> %ret
+}
+
+; CHECK: foo
+; CHECK-NOT: vzeroupper
+; CHECK: _ftol2
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr17764.ll b/test/CodeGen/X86/pr17764.ll
new file mode 100644
index 000000000000..7a3fd6d1810b
--- /dev/null
+++ b/test/CodeGen/X86/pr17764.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s
+
+define <16 x i16> @foo(<16 x i1> %mask, <16 x i16> %x, <16 x i16> %y) {
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %y
+  ret <16 x i16> %ret
+}
+
+; CHECK: foo
+; CHECK: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr18014.ll b/test/CodeGen/X86/pr18014.ll
new file mode 100644
index 000000000000..e3860b88bf4f
--- /dev/null
+++ b/test/CodeGen/X86/pr18014.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=penryn | FileCheck %s
+
+; Ensure PSRAD is generated as the condition is consumed by both PADD and
+; BLENDVPS. PAND requires all bits setting properly.
+
+define <4 x i32> @foo(<4 x i32>* %p, <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+  %sext_cond = sext <4 x i1> %cond to <4 x i32>
+  %t1 = add <4 x i32> %v1, %sext_cond
+  %t2 = select <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2
+  store <4 x i32> %t2, <4 x i32>* %p
+  ret <4 x i32> %t1
+; CHECK: foo
+; CHECK: pslld
+; CHECK: psrad
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/pr18023.ll b/test/CodeGen/X86/pr18023.ll
new file mode 100644
index 000000000000..4c6f8cfce732
--- /dev/null
+++ b/test/CodeGen/X86/pr18023.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.9.0 | FileCheck %s
+; PR18023
+
+; CHECK: movabsq $4294967296, %rcx
+; CHECK: movq  %rcx, (%rax)
+; CHECK: movl  $1, 4(%rax)
+; CHECK: movl  $0, 4(%rax)
+; CHECK: movq  $1, 4(%rax)
+
+@c = common global i32 0, align 4
+@a = common global [3 x i32] zeroinitializer, align 4
+@b = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+define void @func() {
+  store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 0), align 4
+  %1 = load volatile i32* @b, align 4
+  store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  %2 = load volatile i32* @b, align 4
+  store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 2), align 4
+  %3 = load volatile i32* @b, align 4
+  store i32 3, i32* @c, align 4
+  %4 = load i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %4)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/X86/pr18054.ll b/test/CodeGen/X86/pr18054.ll
new file mode 100644
index 000000000000..b7af51618047
--- /dev/null
+++ b/test/CodeGen/X86/pr18054.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=penryn | FileCheck %s
+
+define void @foo(<16 x i32>* %p, <16 x i1> %x) {
+  %ret = sext <16 x i1> %x to <16 x i32>
+  store <16 x i32> %ret, <16 x i32>* %p
+  ret void
+; CHECK: foo
+; CHECK-NOT: pmovsxbd
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/pr18162.ll b/test/CodeGen/X86/pr18162.ll
new file mode 100644
index 000000000000..523e47db5eee
--- /dev/null
+++ b/test/CodeGen/X86/pr18162.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+; Make sure we are not crashing on this one.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+%"Iterator" = type { i32* }
+
+declare { i64, <2 x float> } @Call() 
+declare { i64, <2 x float> }* @CallPtr() 
+
+define { i64, <2 x float> } @Foo(%"Iterator"* %this) {
+entry:
+  %retval = alloca i32
+  %this.addr = alloca %"Iterator"*
+  %this1 = load %"Iterator"** %this.addr
+  %bundle_ = getelementptr inbounds %"Iterator"* %this1, i32 0, i32 0
+  %0 = load i32** %bundle_
+  %1 = call { i64, <2 x float> } @Call()
+  %2 = call { i64, <2 x float> }* @CallPtr()
+  %3 = getelementptr { i64, <2 x float> }* %2, i32 0, i32 1
+  %4 = extractvalue { i64, <2 x float> } %1, 1
+  store <2 x float> %4, <2 x float>* %3
+  %5 = load { i64, <2 x float> }* %2
+  ret { i64, <2 x float> } %5
+}
+
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index 02a36054d88c..94429b265d97 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -7,7 +7,7 @@ target triple = "i386-apple-darwin8"
 @x = weak global i32 0          ; <i32*> [#uses=8]
 
 define void @loop_2() nounwind  {
-; CHECK: loop_2:
+; CHECK-LABEL: loop_2:
 ; CHECK-NOT: ret
 ; CHECK: addl $3, (%{{.*}})
 ; CHECK-NEXT: addl $3, (%{{.*}})
diff --git a/test/CodeGen/X86/pr3216.ll b/test/CodeGen/X86/pr3216.ll
index 63676d9d2ce1..a4a48210d358 100644
--- a/test/CodeGen/X86/pr3216.ll
+++ b/test/CodeGen/X86/pr3216.ll
@@ -3,7 +3,7 @@
 @foo = global i8 127
 
 define i32 @main() nounwind {
-; CHECK: main:
+; CHECK-LABEL: main:
 ; CHECK-NOT: ret
 ; CHECK: sar{{.}} $5
 ; CHECK: ret
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index f7af927d6136..7264bcd12c42 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | not grep fstpt
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | not grep fstpt
 ; PR3457
 ; rdar://6548010
 
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
index b792ffa09fb9..70135d43f49b 100644
--- a/test/CodeGen/X86/pre-ra-sched.ll
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
-; RUN:     2>&1 | FileCheck %s
+; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
+; RUN-disabled:     2>&1 | FileCheck %s
+; RUN: true
 ; REQUIRES: asserts
 ;
 ; rdar:13279013: pre-RA-sched should not check all interferences and
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index efb51913c5c1..d6571acbbb7e 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
+; RUN: llc < %s -march=x86 -mcpu=slm | FileCheck %s -check-prefix=SLM
+; RUN: llc < %s -march=x86 -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHW
+; RUN: llc < %s -march=x86 -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=NOPRFCHW
 
 ; rdar://10538297
 
@@ -11,6 +14,8 @@ entry:
 ; CHECK: prefetcht0
 ; CHECK: prefetchnta
 ; PRFCHW: prefetchw
+; NOPRFCHW-NOT: prefetchw
+; SLM: prefetchw
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
diff --git a/test/CodeGen/X86/prefixdata.ll b/test/CodeGen/X86/prefixdata.ll
new file mode 100644
index 000000000000..2ec1892dd183
--- /dev/null
+++ b/test/CodeGen/X86/prefixdata.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+@i = linkonce_odr global i32 1
+
+; CHECK: f:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .long	1
+define void @f() prefix i32 1 {
+  ret void
+}
+
+; CHECK: g:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .quad	i
+define void @g() prefix i32* @i {
+  ret void
+}
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index 484afc9b5af3..c02d19319a49 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -1,18 +1,22 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lfoo:
-; RUN: llc < %s -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
-; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
-; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
 
 define private void @foo() {
         ret void
-}
 
-@baz = private global i32 4
+; CHECK: .Lfoo:
+}
 
 define i32 @bar() {
         call void @foo()
 	%1 = load i32* @baz, align 4
         ret i32 %1
+
+; CHECK-LABEL: bar:
+; CHECK: callq .Lfoo
+; CHECK: movl	.Lbaz(%rip)
 }
+
+@baz = private global i32 4
+; CHECK: .Lbaz:
diff --git a/test/CodeGen/X86/promote-i16.ll b/test/CodeGen/X86/promote-i16.ll
index 3c91d740c86d..963bc1c2927a 100644
--- a/test/CodeGen/X86/promote-i16.ll
+++ b/test/CodeGen/X86/promote-i16.ll
@@ -2,7 +2,7 @@
 
 define signext i16 @foo(i16 signext %x) nounwind {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NOT: movzwl
 ; CHECK: movswl 4(%esp), %eax
 ; CHECK: xorl $21998, %eax
@@ -12,7 +12,7 @@ entry:
 
 define signext i16 @bar(i16 signext %x) nounwind {
 entry:
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK-NOT: movzwl
 ; CHECK: movswl 4(%esp), %eax
 ; CHECK: xorl $-10770, %eax
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
index 0bf601bc1c42..5089bd761a80 100644
--- a/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -179,7 +179,7 @@ return:
 
 define void @test3() nounwind ssp {
 entry:
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: decq 16(%rax)
   %0 = load i64** @foo, align 8
   %arrayidx = getelementptr inbounds i64* %0, i64 2
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
index 98f407776381..48182d029eb2 100644
--- a/test/CodeGen/X86/rdrand.ll
+++ b/test/CodeGen/X86/rdrand.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdrand | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx-i -mattr=+rdrnd | FileCheck %s
 declare {i16, i32} @llvm.x86.rdrand.16()
 declare {i32, i32} @llvm.x86.rdrand.32()
 declare {i64, i32} @llvm.x86.rdrand.64()
@@ -9,12 +9,12 @@ define i32 @_rdrand16_step(i16* %random_val) {
   store i16 %randval, i16* %random_val
   %isvalid = extractvalue {i16, i32} %call, 1
   ret i32 %isvalid
-; CHECK: _rdrand16_step:
+; CHECK-LABEL: _rdrand16_step:
 ; CHECK: rdrandw	%ax
-; CHECK: movw	%ax, (%r[[A0:di|cx]])
 ; CHECK: movzwl	%ax, %ecx
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%ecx, %eax
+; CHECK: movw	%cx, (%r[[A0:di|cx]])
 ; CHECK: ret
 }
 
@@ -24,11 +24,11 @@ define i32 @_rdrand32_step(i32* %random_val) {
   store i32 %randval, i32* %random_val
   %isvalid = extractvalue {i32, i32} %call, 1
   ret i32 %isvalid
-; CHECK: _rdrand32_step:
+; CHECK-LABEL: _rdrand32_step:
 ; CHECK: rdrandl	%e[[T0:[a-z]+]]
-; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: ret
 }
 
@@ -38,11 +38,11 @@ define i32 @_rdrand64_step(i64* %random_val) {
   store i64 %randval, i64* %random_val
   %isvalid = extractvalue {i64, i32} %call, 1
   ret i32 %isvalid
-; CHECK: _rdrand64_step:
+; CHECK-LABEL: _rdrand64_step:
 ; CHECK: rdrandq	%r[[T1:[a-z]+]]
-; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: ret
 }
 
@@ -54,7 +54,7 @@ define i32 @CSE() nounwind {
  %v2 = extractvalue { i32, i32 } %rand2, 0
  %add = add i32 %v2, %v1
  ret i32 %add
-; CHECK: CSE:
+; CHECK-LABEL: CSE:
 ; CHECK: rdrandl
 ; CHECK: rdrandl
 }
@@ -78,7 +78,7 @@ while.body:                                       ; preds = %entry, %while.body
 
 while.end:                                        ; preds = %while.body, %entry
   ret void
-; CHECK: loop:
+; CHECK-LABEL: loop:
 ; CHECK-NOT: rdrandl
 ; CHECK: This Inner Loop Header: Depth=1
 ; CHECK: rdrandl
diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll
index 35de7ebf7430..c219b4ad27ec 100644
--- a/test/CodeGen/X86/rdseed.ll
+++ b/test/CodeGen/X86/rdseed.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdseed | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx-i -mattr=+rdseed | FileCheck %s
 
 declare {i16, i32} @llvm.x86.rdseed.16()
 declare {i32, i32} @llvm.x86.rdseed.32()
@@ -10,12 +10,12 @@ define i32 @_rdseed16_step(i16* %random_val) {
   store i16 %randval, i16* %random_val
   %isvalid = extractvalue {i16, i32} %call, 1
   ret i32 %isvalid
-; CHECK: _rdseed16_step:
+; CHECK-LABEL: _rdseed16_step:
 ; CHECK: rdseedw	%ax
-; CHECK: movw	%ax, (%r[[A0:di|cx]])
 ; CHECK: movzwl	%ax, %ecx
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%ecx, %eax
+; CHECK: movw	%cx, (%r[[A0:di|cx]])
 ; CHECK: ret
 }
 
@@ -25,11 +25,11 @@ define i32 @_rdseed32_step(i32* %random_val) {
   store i32 %randval, i32* %random_val
   %isvalid = extractvalue {i32, i32} %call, 1
   ret i32 %isvalid
-; CHECK: _rdseed32_step:
+; CHECK-LABEL: _rdseed32_step:
 ; CHECK: rdseedl	%e[[T0:[a-z]+]]
-; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: movl	%e[[T0]], (%r[[A0]])
 ; CHECK: ret
 }
 
@@ -39,10 +39,10 @@ define i32 @_rdseed64_step(i64* %random_val) {
   store i64 %randval, i64* %random_val
   %isvalid = extractvalue {i64, i32} %call, 1
   ret i32 %isvalid
-; CHECK: _rdseed64_step:
+; CHECK-LABEL: _rdseed64_step:
 ; CHECK: rdseedq	%r[[T1:[a-z]+]]
-; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index d99a7a4bc4ad..cce71f5d4cfd 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 
 ; First without noredzone.
-; CHECK: f0:
+; CHECK-LABEL: f0:
 ; CHECK: -4(%rsp)
 ; CHECK: -4(%rsp)
 ; CHECK: ret
@@ -12,7 +12,7 @@ entry:
 }
 
 ; Then with noredzone.
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: subq $4, %rsp
 ; CHECK: (%rsp)
 ; CHECK: (%rsp)
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index 3e9c7909a366..c7e855b011b3 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
-; CHECK: f0:
+; CHECK-LABEL: f0:
 ; CHECK: subq
 ; CHECK: addq
 
diff --git a/test/CodeGen/X86/rem-2.ll b/test/CodeGen/X86/rem-2.ll
deleted file mode 100644
index 1b2af4b87a32..000000000000
--- a/test/CodeGen/X86/rem-2.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86 | not grep cltd
-
-define i32 @test(i32 %X) nounwind readnone {
-entry:
-	%0 = srem i32 41, %X
-	ret i32 %0
-}
diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll
index 394070ecdf23..733b7942a6d5 100644
--- a/test/CodeGen/X86/rem.ll
+++ b/test/CodeGen/X86/rem.ll
@@ -1,22 +1,37 @@
-; RUN: llc < %s -march=x86 | not grep div
+; RUN: llc < %s -march=x86 | FileCheck %s
 
+; CHECK-LABEL: test1:
+; CHECK-NOT: div
 define i32 @test1(i32 %X) {
         %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test2:
+; CHECK-NOT: div
 define i32 @test2(i32 %X) {
         %tmp1 = srem i32 %X, 256                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test3:
+; CHECK-NOT: div
 define i32 @test3(i32 %X) {
         %tmp1 = urem i32 %X, 255                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test4:
+; CHECK-NOT: div
 define i32 @test4(i32 %X) {
         %tmp1 = urem i32 %X, 256                ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+; CHECK-LABEL: test5:
+; CHECK-NOT: cltd
+define i32 @test5(i32 %X) nounwind readnone {
+entry:
+	%0 = srem i32 41, %X
+	ret i32 %0
+}
diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
index f89cd330803d..9e8d8f665031 100644
--- a/test/CodeGen/X86/remat-mov-0.ll
+++ b/test/CodeGen/X86/remat-mov-0.ll
@@ -5,7 +5,7 @@
 
 declare void @foo(i64 %p)
 
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: xorl %e[[A0:di|cx]], %e
 ; CHECK: xorl %e[[A0]], %e[[A0]]
 define void @bar() nounwind {
@@ -14,7 +14,7 @@ define void @bar() nounwind {
   ret void
 }
 
-; CHECK: bat:
+; CHECK-LABEL: bat:
 ; CHECK: movq $-1, %r[[A0]]
 ; CHECK: movq $-1, %r[[A0]]
 define void @bat() nounwind {
@@ -23,7 +23,7 @@ define void @bat() nounwind {
   ret void
 }
 
-; CHECK: bau:
+; CHECK-LABEL: bau:
 ; CHECK: movl $1, %e[[A0]]
 ; CHECK: movl $1, %e[[A0]]
 define void @bau() nounwind {
diff --git a/test/CodeGen/X86/remat-phys-dead.ll b/test/CodeGen/X86/remat-phys-dead.ll
new file mode 100644
index 000000000000..4d7ee622a37e
--- /dev/null
+++ b/test/CodeGen/X86/remat-phys-dead.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=x86_64-apple-darwin -debug -o /dev/null < %s 2>&1 | FileCheck %s
+
+; We need to make sure that rematerialization into a physical register marks the
+; super- or sub-register as dead after this rematerialization since only the
+; original register is actually used later. Largely irrelevant for a trivial
+; example like this, since EAX is never used again, but easy to test.
+
+define i8 @test_remat() {
+  ret i8 0
+; CHECK: REGISTER COALESCING
+; CHECK: Remat: %EAX<def,dead> = MOV32r0 %EFLAGS<imp-def,dead>, %AL<imp-def>
+}
+
+; On the other hand, if it's already the correct width, we really shouldn't be
+; marking the definition register as dead.
+
+define i32 @test_remat32() {
+  ret i32 0
+; CHECK: REGISTER COALESCING
+; CHECK: Remat: %EAX<def> = MOV32r0 %EFLAGS<imp-def,dead>
+}
+
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 778e4722cd95..091fd5398496 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -8,7 +8,7 @@ entry:
 	%call = call <1 x i64> @return_v1di()		; <<1 x i64>> [#uses=0]
 	store <1 x i64> %call, <1 x i64>* @g_v1di
         ret void
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: callq
 ; CHECK-NEXT: movq	_g_v1di
 ; CHECK-NEXT: movq	%rax,
@@ -18,21 +18,21 @@ declare <1 x i64> @return_v1di()
 
 define <1 x i64> @t2() nounwind {
 	ret <1 x i64> <i64 1>
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: movl	$1
 ; CHECK-NEXT: ret
 }
 
 define <2 x i32> @t3() nounwind {
 	ret <2 x i32> <i32 1, i32 0>
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: movl $1
 ; CHECK: movd {{.*}}, %xmm0
 }
 
 define double @t4() nounwind {
 	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
-; CHECK: t4:
+; CHECK-LABEL: t4:
 ; CHECK: movl $1
 ; CHECK: movd {{.*}}, %xmm0
 }
diff --git a/test/CodeGen/X86/returned-trunc-tail-calls.ll b/test/CodeGen/X86/returned-trunc-tail-calls.ll
new file mode 100644
index 000000000000..10bd3b673be7
--- /dev/null
+++ b/test/CodeGen/X86/returned-trunc-tail-calls.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+declare i32 @ret32(i32 returned)
+declare i64 @ret64(i64 returned)
+
+define i64 @test1(i64 %val) {
+; CHECK-LABEL: test1:
+; CHECK-NOT: jmp
+; CHECK: callq
+  %in = trunc i64 %val to i32
+  tail call i32 @ret32(i32 returned %in)
+  ret i64 %val
+}
+
+define i32 @test2(i64 %val) {
+; CHECK-LABEL: test2:
+; CHECK: jmp
+; CHECK-NOT: callq
+  %in = trunc i64 %val to i32
+  tail call i64 @ret64(i64 returned %val)
+  ret i32 %in
+}
+
+define i32 @test3(i64 %in) {
+; CHECK-LABEL: test3:
+; CHECK: jmp
+; CHECK-NOT: callq
+  %small = trunc i64 %in to i32
+  tail call i32 @ret32(i32 returned %small)
+  ret i32 %small
+}
+
+declare {i32, i8} @take_i32_i8({i32, i8} returned)
+define { i8, i8 } @test_nocommon_value({i32, i32} %in) {
+; CHECK-LABEL: test_nocommon_value
+; CHECK: jmp
+
+  %first = extractvalue {i32, i32} %in, 0
+  %first.trunc = trunc i32 %first to i8
+
+  %second = extractvalue {i32, i32} %in, 1
+  %second.trunc = trunc i32 %second to i8
+
+  %tmp = insertvalue {i32, i8} undef, i32 %first, 0
+  %callval = insertvalue {i32, i8} %tmp, i8 %second.trunc, 1
+  tail call {i32, i8} @take_i32_i8({i32, i8} returned %callval)
+
+  %restmp = insertvalue {i8, i8} undef, i8 %first.trunc, 0
+  %res = insertvalue {i8, i8} %restmp, i8 %second.trunc, 1
+  ret {i8, i8} %res
+}
+
+declare {i32, {i32, i32}} @give_i32_i32_i32()
+define {{i32, i32}, i32} @test_structs_different_shape() {
+; CHECK-LABEL: test_structs_different_shape
+; CHECK: jmp
+  %val = tail call {i32, {i32, i32}} @give_i32_i32_i32()
+
+  %first = extractvalue {i32, {i32, i32}} %val, 0
+  %second = extractvalue {i32, {i32, i32}} %val, 1, 0
+  %third = extractvalue {i32, {i32, i32}} %val, 1, 1
+
+  %restmp = insertvalue {{i32, i32}, i32} undef, i32 %first, 0, 0
+  %reseventmper = insertvalue {{i32, i32}, i32} %restmp, i32 %second, 0, 1
+  %res = insertvalue {{i32, i32}, i32} %reseventmper, i32 %third, 1
+
+  ret {{i32, i32}, i32} %res
+}
+
+define i64 @test_undef_asymmetry() {
+; CHECK: test_undef_asymmetry
+; CHECK-NOT: jmp
+  tail call i64 @ret64(i64 returned undef)
+  ret i64 2
+}
+
+define {{}, {{}, i32, {}}, [1 x i32]} @evil_empty_aggregates() {
+; CHECK-LABEL: evil_empty_aggregates
+; CHECK: jmp
+  %agg = tail call {i32, {i32, i32}} @give_i32_i32_i32()
+
+  %first = extractvalue {i32, {i32, i32}} %agg, 0
+  %second = extractvalue {i32, {i32, i32}} %agg, 1, 0
+
+  %restmp = insertvalue {{}, {{}, i32, {}}, [1 x i32]} undef, i32 %first, 1, 1
+  %res = insertvalue {{}, {{}, i32, {}}, [1 x i32]} %restmp, i32 %second, 2, 0
+  ret {{}, {{}, i32, {}}, [1 x i32]} %res
+}
+
+define i32 @structure_is_unimportant() {
+; CHECK-LABEL: structure_is_unimportant
+; CHECK: jmp
+  %val = tail call {i32, {i32, i32}} @give_i32_i32_i32()
+
+  %res = extractvalue {i32, {i32, i32}} %val, 0
+  ret i32 %res
+}
diff --git a/test/CodeGen/X86/reverse_branches.ll b/test/CodeGen/X86/reverse_branches.ll
index 97721250377e..ee6333e61e88 100644
--- a/test/CodeGen/X86/reverse_branches.ll
+++ b/test/CodeGen/X86/reverse_branches.ll
@@ -7,7 +7,7 @@
 ; Make sure at end of do.cond.i, we jump to do.body.i first to have a tighter
 ; inner loop.
 define i32 @test_branches_order() uwtable ssp {
-; CHECK: test_branches_order:
+; CHECK-LABEL: test_branches_order:
 ; CHECK: [[L0:LBB0_[0-9]+]]: ## %do.body.i
 ; CHECK: je
 ; CHECK: %do.cond.i
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index 9291200f0110..9228ea1f621f 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -1,13 +1,9 @@
-; RUN: llc < %s -relocation-model=static | grep rodata | count 3
-; RUN: llc < %s -relocation-model=static | grep -F "rodata.cst" | count 2
-; RUN: llc < %s -relocation-model=pic | grep rodata | count 2
-; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro" | count 2
-; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
-; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel" | count 4
-; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.local" | count 1
+; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
+
 @a = internal unnamed_addr constant [2 x i32] [i32 1, i32 2]
 @a1 = unnamed_addr constant [2 x i32] [i32 1, i32 2]
 @e = internal  unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
@@ -21,3 +17,30 @@ target triple = "x86_64-unknown-linux-gnu"
 @p3 = internal global i8* bitcast([2 x i32]* @a to i8*)
 @t3 = internal global i8* bitcast([2 x [2 x i32]]* @e to i8*)
 
+; STATIC: .section .rodata.cst8,"aM",@progbits,8
+; STATIC: a:
+; STATIC: a1:
+; STATIC: .section .rodata.cst16,"aM",@progbits,16
+; STATIC: e:
+; STATIC: e1:
+; STATIC: .section .rodata,"a",@progbits
+; STATIC: p:
+
+; PIC: .section .rodata.cst8,"aM",@progbits,8
+; PIC: a:
+; PIC: a1:
+; PIC: .section .rodata.cst16,"aM",@progbits,16
+; PIC: e:
+; PIC: e1:
+; PIC: .section .data.rel.ro.local,"aw",@progbits
+; PIC: p:
+; PIC: t:
+; PIC: .section .data.rel.ro,"aw",@progbits
+; PIC: p1:
+; PIC: t1:
+; PIC: .section .data.rel,"aw",@progbits
+; PIC: p2:
+; PIC: t2:
+; PIC: .section .data.rel.local,"aw",@progbits
+; PIC: p3:
+; PIC: t3:
diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll
index de23dcb78f10..0293f4e21123 100644
--- a/test/CodeGen/X86/rot16.ll
+++ b/test/CodeGen/X86/rot16.ll
@@ -2,7 +2,7 @@
 
 define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: rolw %cl
 	%0 = shl i16 %x, %z
 	%1 = sub i16 16, %z
@@ -13,7 +13,7 @@ entry:
 
 define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: shldw %cl
 	%0 = shl i16 %y, %z
 	%1 = sub i16 16, %z
@@ -24,7 +24,7 @@ entry:
 
 define i16 @un(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: un:
+; CHECK-LABEL: un:
 ; CHECK: rorw %cl
 	%0 = lshr i16 %x, %z
 	%1 = sub i16 16, %z
@@ -35,7 +35,7 @@ entry:
 
 define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: bu:
+; CHECK-LABEL: bu:
 ; CHECK: shrdw
 	%0 = lshr i16 %y, %z
 	%1 = sub i16 16, %z
@@ -46,7 +46,7 @@ entry:
 
 define i16 @xfoo(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: xfoo:
+; CHECK-LABEL: xfoo:
 ; CHECK: rolw $5
 	%0 = lshr i16 %x, 11
 	%1 = shl i16 %x, 5
@@ -56,7 +56,7 @@ entry:
 
 define i16 @xbar(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: xbar:
+; CHECK-LABEL: xbar:
 ; CHECK: shldw $5
 	%0 = shl i16 %y, 5
 	%1 = lshr i16 %x, 11
@@ -66,7 +66,7 @@ entry:
 
 define i16 @xun(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: xun:
+; CHECK-LABEL: xun:
 ; CHECK: rolw $11
 	%0 = lshr i16 %x, 5
 	%1 = shl i16 %x, 11
@@ -76,7 +76,7 @@ entry:
 
 define i16 @xbu(i16 %x, i16 %y, i16 %z) nounwind readnone {
 entry:
-; CHECK: xbu:
+; CHECK-LABEL: xbu:
 ; CHECK: shldw $11
 	%0 = lshr i16 %y, 5
 	%1 = shl i16 %x, 11
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
index e95a734e048d..7bdd606e9cbb 100644
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -3,7 +3,7 @@
 
 define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: roll %cl
 	%0 = shl i32 %x, %z
 	%1 = sub i32 32, %z
@@ -14,7 +14,7 @@ entry:
 
 define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: shldl %cl
 	%0 = shl i32 %y, %z
 	%1 = sub i32 32, %z
@@ -25,7 +25,7 @@ entry:
 
 define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: un:
+; CHECK-LABEL: un:
 ; CHECK: rorl %cl
 	%0 = lshr i32 %x, %z
 	%1 = sub i32 32, %z
@@ -36,7 +36,7 @@ entry:
 
 define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: bu:
+; CHECK-LABEL: bu:
 ; CHECK: shrdl %cl
 	%0 = lshr i32 %y, %z
 	%1 = sub i32 32, %z
@@ -47,9 +47,9 @@ entry:
 
 define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: xfoo:
+; CHECK-LABEL: xfoo:
 ; CHECK: roll $7
-; BMI2: xfoo:
+; BMI2-LABEL: xfoo:
 ; BMI2: rorxl $25
 	%0 = lshr i32 %x, 25
 	%1 = shl i32 %x, 7
@@ -59,7 +59,7 @@ entry:
 
 define i32 @xfoop(i32* %p) nounwind readnone {
 entry:
-; BMI2: xfoop:
+; BMI2-LABEL: xfoop:
 ; BMI2: rorxl $25, ({{.+}}), %{{.+}}
 	%x = load i32* %p
 	%a = lshr i32 %x, 25
@@ -70,7 +70,7 @@ entry:
 
 define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: xbar:
+; CHECK-LABEL: xbar:
 ; CHECK: shldl $7
 	%0 = shl i32 %y, 7
 	%1 = lshr i32 %x, 25
@@ -80,9 +80,9 @@ entry:
 
 define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: xun:
+; CHECK-LABEL: xun:
 ; CHECK: roll $25
-; BMI2: xun:
+; BMI2-LABEL: xun:
 ; BMI2: rorxl $7
 	%0 = lshr i32 %x, 7
 	%1 = shl i32 %x, 25
@@ -92,7 +92,7 @@ entry:
 
 define i32 @xunp(i32* %p) nounwind readnone {
 entry:
-; BMI2: xunp:
+; BMI2-LABEL: xunp:
 ; BMI2: rorxl $7, ({{.+}}), %{{.+}}
 	%x = load i32* %p
 	%a = lshr i32 %x, 7
@@ -103,7 +103,7 @@ entry:
 
 define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
 entry:
-; CHECK: xbu:
+; CHECK-LABEL: xbu:
 ; CHECK: shldl
 	%0 = lshr i32 %y, 7
 	%1 = shl i32 %x, 25
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
index 7fa982d83b61..e19a35da1cd6 100644
--- a/test/CodeGen/X86/rot64.ll
+++ b/test/CodeGen/X86/rot64.ll
@@ -43,7 +43,7 @@ entry:
 
 define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
 entry:
-; BMI2: xfoo:
+; BMI2-LABEL: xfoo:
 ; BMI2: rorxq $57
 	%0 = lshr i64 %x, 57
 	%1 = shl i64 %x, 7
@@ -53,7 +53,7 @@ entry:
 
 define i64 @xfoop(i64* %p) nounwind readnone {
 entry:
-; BMI2: xfoop:
+; BMI2-LABEL: xfoop:
 ; BMI2: rorxq $57, ({{.+}}), %{{.+}}
 	%x = load i64* %p
 	%a = lshr i64 %x, 57
@@ -72,7 +72,7 @@ entry:
 
 define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
 entry:
-; BMI2: xun:
+; BMI2-LABEL: xun:
 ; BMI2: rorxq $7
 	%0 = lshr i64 %x, 7
 	%1 = shl i64 %x, 57
@@ -82,7 +82,7 @@ entry:
 
 define i64 @xunp(i64* %p) nounwind readnone {
 entry:
-; BMI2: xunp:
+; BMI2-LABEL: xunp:
 ; BMI2: rorxq $7, ({{.+}}), %{{.+}}
 	%x = load i64* %p
 	%a = lshr i64 %x, 7
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index 51fcf6418429..69f4bfb9f47d 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck -check-prefix=CHECK-SSE %s
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
 
 define float @test1(float %x) nounwind  {
   %call = tail call float @floorf(float %x) nounwind readnone
   ret float %call
 
-; CHECK-SSE: test1:
+; CHECK-SSE-LABEL: test1:
 ; CHECK-SSE: roundss $1
 
-; CHECK-AVX: test1:
+; CHECK-AVX-LABEL: test1:
 ; CHECK-AVX: vroundss $1
 }
 
@@ -18,10 +18,10 @@ define double @test2(double %x) nounwind  {
   %call = tail call double @floor(double %x) nounwind readnone
   ret double %call
 
-; CHECK-SSE: test2:
+; CHECK-SSE-LABEL: test2:
 ; CHECK-SSE: roundsd $1
 
-; CHECK-AVX: test2:
+; CHECK-AVX-LABEL: test2:
 ; CHECK-AVX: vroundsd $1
 }
 
@@ -31,10 +31,10 @@ define float @test3(float %x) nounwind  {
   %call = tail call float @nearbyintf(float %x) nounwind readnone
   ret float %call
 
-; CHECK-SSE: test3:
+; CHECK-SSE-LABEL: test3:
 ; CHECK-SSE: roundss $12
 
-; CHECK-AVX: test3:
+; CHECK-AVX-LABEL: test3:
 ; CHECK-AVX: vroundss $12
 }
 
@@ -44,10 +44,10 @@ define double @test4(double %x) nounwind  {
   %call = tail call double @nearbyint(double %x) nounwind readnone
   ret double %call
 
-; CHECK-SSE: test4:
+; CHECK-SSE-LABEL: test4:
 ; CHECK-SSE: roundsd $12
 
-; CHECK-AVX: test4:
+; CHECK-AVX-LABEL: test4:
 ; CHECK-AVX: vroundsd $12
 }
 
@@ -57,10 +57,10 @@ define float @test5(float %x) nounwind  {
   %call = tail call float @ceilf(float %x) nounwind readnone
   ret float %call
 
-; CHECK-SSE: test5:
+; CHECK-SSE-LABEL: test5:
 ; CHECK-SSE: roundss $2
 
-; CHECK-AVX: test5:
+; CHECK-AVX-LABEL: test5:
 ; CHECK-AVX: vroundss $2
 }
 
@@ -70,10 +70,10 @@ define double @test6(double %x) nounwind  {
   %call = tail call double @ceil(double %x) nounwind readnone
   ret double %call
 
-; CHECK-SSE: test6:
+; CHECK-SSE-LABEL: test6:
 ; CHECK-SSE: roundsd $2
 
-; CHECK-AVX: test6:
+; CHECK-AVX-LABEL: test6:
 ; CHECK-AVX: vroundsd $2
 }
 
@@ -83,10 +83,10 @@ define float @test7(float %x) nounwind  {
   %call = tail call float @rintf(float %x) nounwind readnone
   ret float %call
 
-; CHECK-SSE: test7:
+; CHECK-SSE-LABEL: test7:
 ; CHECK-SSE: roundss $4
 
-; CHECK-AVX: test7:
+; CHECK-AVX-LABEL: test7:
 ; CHECK-AVX: vroundss $4
 }
 
@@ -96,10 +96,10 @@ define double @test8(double %x) nounwind  {
   %call = tail call double @rint(double %x) nounwind readnone
   ret double %call
 
-; CHECK-SSE: test8:
+; CHECK-SSE-LABEL: test8:
 ; CHECK-SSE: roundsd $4
 
-; CHECK-AVX: test8:
+; CHECK-AVX-LABEL: test8:
 ; CHECK-AVX: vroundsd $4
 }
 
@@ -109,10 +109,10 @@ define float @test9(float %x) nounwind  {
   %call = tail call float @truncf(float %x) nounwind readnone
   ret float %call
 
-; CHECK-SSE: test9:
+; CHECK-SSE-LABEL: test9:
 ; CHECK-SSE: roundss $3
 
-; CHECK-AVX: test9:
+; CHECK-AVX-LABEL: test9:
 ; CHECK-AVX: vroundss $3
 }
 
@@ -122,10 +122,10 @@ define double @test10(double %x) nounwind  {
   %call = tail call double @trunc(double %x) nounwind readnone
   ret double %call
 
-; CHECK-SSE: test10:
+; CHECK-SSE-LABEL: test10:
 ; CHECK-SSE: roundsd $3
 
-; CHECK-AVX: test10:
+; CHECK-AVX-LABEL: test10:
 ; CHECK-AVX: vroundsd $3
 }
 
diff --git a/test/CodeGen/X86/sandybridge-loads.ll b/test/CodeGen/X86/sandybridge-loads.ll
index 5a23cf136d85..b8c364e2961c 100644
--- a/test/CodeGen/X86/sandybridge-loads.ll
+++ b/test/CodeGen/X86/sandybridge-loads.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
 
-;CHECK: wideloads
+;CHECK-LABEL: wideloads:
 ;CHECK: vmovaps
 ;CHECK: vinsertf128
 ;CHECK: vmovaps
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index e99ea9356a64..5807d5babfff 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 |  FileCheck %s
 
 ; Verify when widening a divide/remainder operation, we only generate a
 ; divide/rem per element since divide/remainder can trap.
diff --git a/test/CodeGen/X86/sdiv-exact.ll b/test/CodeGen/X86/sdiv-exact.ll
index 48bb8836e896..4f8d3f05351b 100644
--- a/test/CodeGen/X86/sdiv-exact.ll
+++ b/test/CodeGen/X86/sdiv-exact.ll
@@ -3,7 +3,7 @@
 define i32 @test1(i32 %x) {
   %div = sdiv exact i32 %x, 25
   ret i32 %div
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: imull	$-1030792151, 4(%esp)
 ; CHECK-NEXT: ret
 }
@@ -11,7 +11,7 @@ define i32 @test1(i32 %x) {
 define i32 @test2(i32 %x) {
   %div = sdiv exact i32 %x, 24
   ret i32 %div
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: sarl	$3
 ; CHECK-NEXT: imull	$-1431655765
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
index d68b00b69a2b..e17076215d5e 100644
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -20,7 +20,7 @@ false:
         %retvalue = call i32 @test_basic(i32 %newlen)
         ret i32 %retvalue
 
-; X32:      test_basic:
+; X32-LABEL:      test_basic:
 
 ; X32:      cmpl %gs:48, %esp
 ; X32-NEXT: ja      .LBB0_2
@@ -31,7 +31,7 @@ false:
 ; X32-NEXT: ret
 
 ; X32:      movl %esp, %eax
-; X32-NEXT: subl %ecx, %eax
+; X32:      subl %ecx, %eax
 ; X32-NEXT: cmpl %eax, %gs:48
 
 ; X32:      movl %eax, %esp
@@ -41,7 +41,7 @@ false:
 ; X32-NEXT: calll __morestack_allocate_stack_space
 ; X32-NEXT: addl $16, %esp
 
-; X64:      test_basic:
+; X64-LABEL:      test_basic:
 
 ; X64:      cmpq %fs:112, %rsp
 ; X64-NEXT: ja      .LBB0_2
@@ -52,7 +52,7 @@ false:
 ; X64-NEXT: ret
 
 ; X64:      movq %rsp, %[[RDI:rdi|rax]]
-; X64-NEXT: subq %{{.*}}, %[[RDI]]
+; X64:      subq %{{.*}}, %[[RDI]]
 ; X64-NEXT: cmpq %[[RDI]], %fs:112
 
 ; X64:      movq %[[RDI]], %rsp
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 5407b87418f3..08a98ef51ec4 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -32,7 +32,7 @@ define void @test_basic() {
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
 
-; X32-Linux:       test_basic:
+; X32-Linux-LABEL:       test_basic:
 
 ; X32-Linux:       cmpl %gs:48, %esp
 ; X32-Linux-NEXT:  ja      .LBB0_2
@@ -42,7 +42,7 @@ define void @test_basic() {
 ; X32-Linux-NEXT:  calll __morestack
 ; X32-Linux-NEXT:  ret
 
-; X64-Linux:       test_basic:
+; X64-Linux-LABEL:       test_basic:
 
 ; X64-Linux:       cmpq %fs:112, %rsp
 ; X64-Linux-NEXT:  ja      .LBB0_2
@@ -52,7 +52,7 @@ define void @test_basic() {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
 
-; X32-Darwin:      test_basic:
+; X32-Darwin-LABEL:      test_basic:
 
 ; X32-Darwin:      movl $432, %ecx
 ; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
@@ -63,7 +63,7 @@ define void @test_basic() {
 ; X32-Darwin-NEXT: calll ___morestack
 ; X32-Darwin-NEXT: ret
 
-; X64-Darwin:      test_basic:
+; X64-Darwin-LABEL:      test_basic:
 
 ; X64-Darwin:      cmpq %gs:816, %rsp
 ; X64-Darwin-NEXT: ja      LBB0_2
@@ -73,7 +73,7 @@ define void @test_basic() {
 ; X64-Darwin-NEXT: callq ___morestack
 ; X64-Darwin-NEXT: ret
 
-; X32-MinGW:       test_basic:
+; X32-MinGW-LABEL:       test_basic:
 
 ; X32-MinGW:       cmpl %fs:20, %esp
 ; X32-MinGW-NEXT:  ja      LBB0_2
@@ -83,7 +83,7 @@ define void @test_basic() {
 ; X32-MinGW-NEXT:  calll ___morestack
 ; X32-MinGW-NEXT:  ret
 
-; X64-FreeBSD:       test_basic:
+; X64-FreeBSD-LABEL:       test_basic:
 
 ; X64-FreeBSD:       cmpq %fs:24, %rsp
 ; X64-FreeBSD-NEXT:  ja      .LBB0_2
@@ -224,7 +224,7 @@ define fastcc void @test_fastcc() {
         call void @dummy_use (i32* %mem, i32 10)
         ret void
 
-; X32-Linux:       test_fastcc:
+; X32-Linux-LABEL:       test_fastcc:
 
 ; X32-Linux:       cmpl %gs:48, %esp
 ; X32-Linux-NEXT:  ja      .LBB3_2
@@ -234,7 +234,7 @@ define fastcc void @test_fastcc() {
 ; X32-Linux-NEXT:  calll __morestack
 ; X32-Linux-NEXT:  ret
 
-; X64-Linux:       test_fastcc:
+; X64-Linux-LABEL:       test_fastcc:
 
 ; X64-Linux:       cmpq %fs:112, %rsp
 ; X64-Linux-NEXT:  ja      .LBB3_2
@@ -244,7 +244,7 @@ define fastcc void @test_fastcc() {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
 
-; X32-Darwin:      test_fastcc:
+; X32-Darwin-LABEL:      test_fastcc:
 
 ; X32-Darwin:      movl $432, %eax
 ; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
@@ -255,7 +255,7 @@ define fastcc void @test_fastcc() {
 ; X32-Darwin-NEXT: calll ___morestack
 ; X32-Darwin-NEXT: ret
 
-; X64-Darwin:      test_fastcc:
+; X64-Darwin-LABEL:      test_fastcc:
 
 ; X64-Darwin:      cmpq %gs:816, %rsp
 ; X64-Darwin-NEXT: ja      LBB3_2
@@ -265,7 +265,7 @@ define fastcc void @test_fastcc() {
 ; X64-Darwin-NEXT: callq ___morestack
 ; X64-Darwin-NEXT: ret
 
-; X32-MinGW:       test_fastcc:
+; X32-MinGW-LABEL:       test_fastcc:
 
 ; X32-MinGW:       cmpl %fs:20, %esp
 ; X32-MinGW-NEXT:  ja      LBB3_2
@@ -275,7 +275,7 @@ define fastcc void @test_fastcc() {
 ; X32-MinGW-NEXT:  calll ___morestack
 ; X32-MinGW-NEXT:  ret
 
-; X64-FreeBSD:       test_fastcc:
+; X64-FreeBSD-LABEL:       test_fastcc:
 
 ; X64-FreeBSD:       cmpq %fs:24, %rsp
 ; X64-FreeBSD-NEXT:  ja      .LBB3_2
@@ -292,7 +292,7 @@ define fastcc void @test_fastcc_large() {
         call void @dummy_use (i32* %mem, i32 0)
         ret void
 
-; X32-Linux:       test_fastcc_large:
+; X32-Linux-LABEL:       test_fastcc_large:
 
 ; X32-Linux:       leal -40012(%esp), %eax
 ; X32-Linux-NEXT:  cmpl %gs:48, %eax
@@ -303,7 +303,7 @@ define fastcc void @test_fastcc_large() {
 ; X32-Linux-NEXT:  calll __morestack
 ; X32-Linux-NEXT:  ret
 
-; X64-Linux:       test_fastcc_large:
+; X64-Linux-LABEL:       test_fastcc_large:
 
 ; X64-Linux:       leaq -40008(%rsp), %r11
 ; X64-Linux-NEXT:  cmpq %fs:112, %r11
@@ -314,7 +314,7 @@ define fastcc void @test_fastcc_large() {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
 
-; X32-Darwin:      test_fastcc_large:
+; X32-Darwin-LABEL:      test_fastcc_large:
 
 ; X32-Darwin:      leal -40012(%esp), %eax
 ; X32-Darwin-NEXT: movl $432, %ecx
@@ -326,7 +326,7 @@ define fastcc void @test_fastcc_large() {
 ; X32-Darwin-NEXT: calll ___morestack
 ; X32-Darwin-NEXT: ret
 
-; X64-Darwin:      test_fastcc_large:
+; X64-Darwin-LABEL:      test_fastcc_large:
 
 ; X64-Darwin:      leaq -40008(%rsp), %r11
 ; X64-Darwin-NEXT: cmpq %gs:816, %r11
@@ -337,7 +337,7 @@ define fastcc void @test_fastcc_large() {
 ; X64-Darwin-NEXT: callq ___morestack
 ; X64-Darwin-NEXT: ret
 
-; X32-MinGW:       test_fastcc_large:
+; X32-MinGW-LABEL:       test_fastcc_large:
 
 ; X32-MinGW:       leal -40008(%esp), %eax
 ; X32-MinGW-NEXT:  cmpl %fs:20, %eax
@@ -348,7 +348,7 @@ define fastcc void @test_fastcc_large() {
 ; X32-MinGW-NEXT:  calll ___morestack
 ; X32-MinGW-NEXT:  ret
 
-; X64-FreeBSD:       test_fastcc_large:
+; X64-FreeBSD-LABEL:       test_fastcc_large:
 
 ; X64-FreeBSD:       leaq -40008(%rsp), %r11
 ; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
@@ -368,7 +368,7 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
 
 ; This is testing that the Mac implementation preserves ecx
 
-; X32-Darwin:      test_fastcc_large_with_ecx_arg:
+; X32-Darwin-LABEL:      test_fastcc_large_with_ecx_arg:
 
 ; X32-Darwin:      leal -40012(%esp), %eax
 ; X32-Darwin-NEXT: pushl %ecx
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 09ca07b31a10..cdd258d92031 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -10,11 +10,11 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
   %t4 = select i1 %r, %0 %t0, %0 %t1
   %t5 = extractvalue %0 %t4, 1
   ret i32 %t5
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: cmovneq %rdi, %rsi
 ; CHECK: movl (%rsi), %eax
 
-; ATOM: test1:
+; ATOM-LABEL: test1:
 ; ATOM: cmovneq %rdi, %rsi
 ; ATOM: movl (%rsi), %eax
 }
@@ -33,13 +33,13 @@ bb90:		; preds = %bb84, %bb72
 	unreachable
 bb91:		; preds = %bb84
 	ret i32 0
-; CHECK: test2:
-; CHECK: movnew
-; CHECK: movswl
+; CHECK-LABEL: test2:
+; CHECK: cmovnew
+; CHECK: cwtl
 
-; ATOM: test2:
-; ATOM: movnew
-; ATOM: movswl
+; ATOM-LABEL: test2:
+; ATOM: cmovnew
+; ATOM: cwtl
 }
 
 declare i1 @return_false()
@@ -51,10 +51,10 @@ entry:
 	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
 	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
 	ret float %iftmp.0.0
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: movss	{{.*}},4), %xmm0
 
-; ATOM: test3:
+; ATOM-LABEL: test3:
 ; ATOM: movss  {{.*}},4), %xmm0
 }
 
@@ -65,10 +65,10 @@ entry:
 	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
 	%2 = load i8* %1, align 1		; <i8> [#uses=1]
 	ret i8 %2
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: movsbl	({{.*}},4), %eax
 
-; ATOM: test4:
+; ATOM-LABEL: test4:
 ; ATOM: movsbl ({{.*}},4), %eax
 }
 
@@ -76,9 +76,9 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
   %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
   store <2 x i16> %x, <2 x i16>* %p
   ret void
-; CHECK: test5:
+; CHECK-LABEL: test5:
 
-; ATOM: test5:
+; ATOM-LABEL: test5:
 }
 
 define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
@@ -91,13 +91,13 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
         ret void
 ; Verify that the fmul gets sunk into the one part of the diamond where it is
 ; needed.
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: je
 ; CHECK: ret
 ; CHECK: mulps
 ; CHECK: ret
 
-; ATOM: test6:
+; ATOM-LABEL: test6:
 ; ATOM: je
 ; ATOM: ret
 ; ATOM: mulps
@@ -109,11 +109,11 @@ define x86_fp80 @test7(i32 %tmp8) nounwind {
         %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
         %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
         ret x86_fp80 %retval
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: leaq
 ; CHECK: fldt (%r{{.}}x,%r{{.}}x)
 
-; ATOM: test7:
+; ATOM-LABEL: test7:
 ; ATOM: leaq
 ; ATOM: fldt (%r{{.}}x,%r{{.}}x)
 }
@@ -125,9 +125,9 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 	store <6 x i32> %val, <6 x i32>* %dst.addr
 	ret void
 
-; CHECK: test8:
+; CHECK-LABEL: test8:
 
-; ATOM: test8:
+; ATOM-LABEL: test8:
 }
 
 
@@ -137,13 +137,13 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
   %cmp = icmp ne i64 %x, 0
   %cond = select i1 %cmp, i64 %y, i64 -1
   ret i64 %cond
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: cmpq	$1, %rdi
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
 
-; ATOM: test9:
+; ATOM-LABEL: test9:
 ; ATOM: cmpq   $1, %rdi
 ; ATOM: sbbq   %rax, %rax
 ; ATOM: orq    %rsi, %rax
@@ -155,13 +155,13 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
   %cmp = icmp eq i64 %x, 0
   %cond = select i1 %cmp, i64 -1, i64 %y
   ret i64 %cond
-; CHECK: test9a:
+; CHECK-LABEL: test9a:
 ; CHECK: cmpq	$1, %rdi
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
 
-; ATOM: test9a:
+; ATOM-LABEL: test9a:
 ; ATOM: cmpq   $1, %rdi
 ; ATOM: sbbq   %rax, %rax
 ; ATOM: orq    %rsi, %rax
@@ -173,13 +173,13 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
   %A = sext i1 %cmp to i64
   %cond = or i64 %y, %A
   ret i64 %cond
-; CHECK: test9b:
+; CHECK-LABEL: test9b:
 ; CHECK: cmpq	$1, %rdi
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
 
-; ATOM: test9b:
+; ATOM-LABEL: test9b:
 ; ATOM: cmpq   $1, %rdi
 ; ATOM: sbbq   %rax, %rax
 ; ATOM: orq    %rsi, %rax
@@ -191,13 +191,13 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
   %cmp = icmp eq i64 %x, 0
   %cond = select i1 %cmp, i64 -1, i64 1
   ret i64 %cond
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: cmpq	$1, %rdi
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	$1, %rax
 ; CHECK: ret
 
-; ATOM: test10:
+; ATOM-LABEL: test10:
 ; ATOM: cmpq   $1, %rdi
 ; ATOM: sbbq   %rax, %rax
 ; ATOM: orq    $1, %rax
@@ -210,14 +210,14 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
   %cmp = icmp eq i64 %x, 0
   %cond = select i1 %cmp, i64 %y, i64 -1
   ret i64 %cond
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK: cmpq	$1, %rdi
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
 
-; ATOM: test11:
+; ATOM-LABEL: test11:
 ; ATOM: cmpq   $1, %rdi
 ; ATOM: sbbq   %rax, %rax
 ; ATOM: notq %rax
@@ -229,14 +229,14 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
   %cmp = icmp ne i64 %x, 0
   %cond = select i1 %cmp, i64 -1, i64 %y
   ret i64 %cond
-; CHECK: test11a:
+; CHECK-LABEL: test11a:
 ; CHECK: cmpq	$1, %rdi
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
 
-; ATOM: test11a:
+; ATOM-LABEL: test11a:
 ; ATOM: cmpq   $1, %rdi
 ; ATOM: sbbq   %rax, %rax
 ; ATOM: notq %rax
@@ -255,13 +255,13 @@ entry:
   %D = select i1 %B, i64 -1, i64 %C
   %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
   ret i8* %call
-; CHECK: test12:
-; CHECK: movq $-1, %rdi
+; CHECK-LABEL: test12:
 ; CHECK: mulq
-; CHECK: cmovnoq	%rax, %rdi
+; CHECK: movq $-1, %[[R:r..]]
+; CHECK: cmovnoq	%rax, %[[R]]
 ; CHECK: jmp	__Znam
 
-; ATOM: test12:
+; ATOM-LABEL: test12:
 ; ATOM: mulq
 ; ATOM: movq $-1, %rdi
 ; ATOM: cmovnoq        %rax, %rdi
@@ -274,12 +274,12 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
   %c = icmp ult i32 %a, %b
   %d = sext i1 %c to i32
   ret i32 %d
-; CHECK: test13:
+; CHECK-LABEL: test13:
 ; CHECK: cmpl
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: ret
 
-; ATOM: test13:
+; ATOM-LABEL: test13:
 ; ATOM: cmpl
 ; ATOM-NEXT: sbbl
 ; ATOM: ret
@@ -289,13 +289,13 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
   %c = icmp uge i32 %a, %b
   %d = sext i1 %c to i32
   ret i32 %d
-; CHECK: test14:
+; CHECK-LABEL: test14:
 ; CHECK: cmpl
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: notl
 ; CHECK-NEXT: ret
 
-; ATOM: test14:
+; ATOM-LABEL: test14:
 ; ATOM: cmpl
 ; ATOM-NEXT: sbbl
 ; ATOM-NEXT: notl
@@ -308,11 +308,11 @@ entry:
   %cmp = icmp ne i32 %x, 0
   %sub = sext i1 %cmp to i32
   ret i32 %sub
-; CHECK: test15:
+; CHECK-LABEL: test15:
 ; CHECK: negl
 ; CHECK: sbbl
 
-; ATOM: test15:
+; ATOM-LABEL: test15:
 ; ATOM: negl
 ; ATOM: sbbl
 }
@@ -322,11 +322,11 @@ entry:
   %cmp = icmp ne i64 %x, 0
   %conv1 = sext i1 %cmp to i64
   ret i64 %conv1
-; CHECK: test16:
+; CHECK-LABEL: test16:
 ; CHECK: negq
 ; CHECK: sbbq
 
-; ATOM: test16:
+; ATOM-LABEL: test16:
 ; ATOM: negq
 ; ATOM: sbbq
 }
@@ -336,11 +336,11 @@ entry:
   %cmp = icmp ne i16 %x, 0
   %sub = sext i1 %cmp to i16
   ret i16 %sub
-; CHECK: test17:
+; CHECK-LABEL: test17:
 ; CHECK: negw
 ; CHECK: sbbw
 
-; ATOM: test17:
+; ATOM-LABEL: test17:
 ; ATOM: negw
 ; ATOM: sbbw
 }
@@ -349,11 +349,11 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
   %cmp = icmp slt i32 %x, 15
   %sel = select i1 %cmp, i8 %a, i8 %b
   ret i8 %sel
-; CHECK: test18:
+; CHECK-LABEL: test18:
 ; CHECK: cmpl $15, %edi
 ; CHECK: cmovgel %edx
 
-; ATOM: test18:
+; ATOM-LABEL: test18:
 ; ATOM: cmpl $15, %edi
 ; ATOM: cmovgel %edx
 }
diff --git a/test/CodeGen/X86/select_const.ll b/test/CodeGen/X86/select_const.ll
index 5b2409d2396f..a6c2377e0366 100644
--- a/test/CodeGen/X86/select_const.ll
+++ b/test/CodeGen/X86/select_const.ll
@@ -7,7 +7,7 @@ entry:
   %retval.0 = select i1 %cmp, i64 2, i64 %add
   ret i64 %retval.0
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: leaq 1(%rdi), %rax
 ; CHECK: cmpq $2, %rdi
 ; CHECK: cmoveq %rdi, %rax
diff --git a/test/CodeGen/X86/setcc-narrowing.ll b/test/CodeGen/X86/setcc-narrowing.ll
new file mode 100644
index 000000000000..25cb2c822c5b
--- /dev/null
+++ b/test/CodeGen/X86/setcc-narrowing.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; PR17338
+
+@t1.global = internal global i64 -1, align 8
+
+define i32 @t1() nounwind ssp {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: cmpl	$0, _t1.global
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: ret
+  %0 = load i64* @t1.global, align 8
+  %and = and i64 4294967295, %0
+  %cmp = icmp sgt i64 %and, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/X86/setcc-sentinals.ll b/test/CodeGen/X86/setcc-sentinals.ll
new file mode 100644
index 000000000000..d36e678c6048
--- /dev/null
+++ b/test/CodeGen/X86/setcc-sentinals.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mcpu=generic -march=x86-64 -asm-verbose=false | FileCheck %s
+
+define zeroext i1 @test0(i64 %x) nounwind {
+; CHECK-LABEL: test0:
+; CHECK-NEXT: incq %[[X:rdi|rcx]]
+; CHECK-NEXT: cmpq $1, %[[X]]
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: ret
+  %cmp1 = icmp ne i64 %x, -1
+  %not.cmp = icmp ne i64 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  ret i1 %.cmp1
+}
diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
index c37e15d24f34..2454af926aae 100644
--- a/test/CodeGen/X86/setcc.ll
+++ b/test/CodeGen/X86/setcc.ll
@@ -6,7 +6,7 @@
 
 define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: seta %al
 ; CHECK: movzbl %al, %eax
 ; CHECK: shll $5, %eax
@@ -17,7 +17,7 @@ entry:
 
 define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: sbbl %eax, %eax
 ; CHECK: andl $32, %eax
   %0 = icmp ult i16 %x, 26                        ; <i1> [#uses=1]
@@ -27,7 +27,7 @@ entry:
 
 define i64 @t3(i64 %x) nounwind readnone ssp {
 entry:
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK: sbbq %rax, %rax
 ; CHECK: andq $64, %rax
   %0 = icmp ult i64 %x, 18                        ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/sext-i1.ll b/test/CodeGen/X86/sext-i1.ll
index 574769b43084..64de0aee70d3 100644
--- a/test/CodeGen/X86/sext-i1.ll
+++ b/test/CodeGen/X86/sext-i1.ll
@@ -5,11 +5,11 @@
 
 define i32 @t1(i32 %x) nounwind readnone ssp {
 entry:
-; 32: t1:
+; 32-LABEL: t1:
 ; 32: cmpl $1
 ; 32: sbbl
 
-; 64: t1:
+; 64-LABEL: t1:
 ; 64: cmpl $1
 ; 64: sbbl
   %0 = icmp eq i32 %x, 0
@@ -19,11 +19,11 @@ entry:
 
 define i32 @t2(i32 %x) nounwind readnone ssp {
 entry:
-; 32: t2:
+; 32-LABEL: t2:
 ; 32: cmpl $1
 ; 32: sbbl
 
-; 64: t2:
+; 64-LABEL: t2:
 ; 64: cmpl $1
 ; 64: sbbl
   %0 = icmp eq i32 %x, 0
@@ -36,13 +36,13 @@ entry:
 
 define i32 @t3() nounwind readonly {
 entry:
-; 32: t3:
+; 32-LABEL: t3:
 ; 32: cmpl $1
 ; 32: sbbl
 ; 32: cmpl
 ; 32: xorl
 
-; 64: t3:
+; 64-LABEL: t3:
 ; 64: cmpl $1
 ; 64: sbbq
 ; 64: cmpq
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index 58c93229a2c0..2753e8766294 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -3,7 +3,7 @@
 ; When doing sign extension, use the sext-load lowering to take advantage of
 ; x86's sign extension during loads.
 ;
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK:      movsbl {{.*}}, %eax
 ; CHECK-NEXT: ret
 define i32 @test1(i32 %X) nounwind  {
@@ -16,7 +16,7 @@ entry:
 ; When using a sextload representation, ensure that the sign extension is
 ; preserved even when removing shifted-out low bits.
 ;
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK:      movswl {{.*}}, %eax
 ; CHECK-NEXT: ret
 define i32 @test2({i16, [6 x i8]}* %this) {
diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
index a128af9950f0..e0c8ff9b5e08 100644
--- a/test/CodeGen/X86/sext-subreg.ll
+++ b/test/CodeGen/X86/sext-subreg.ll
@@ -2,7 +2,7 @@
 ; rdar://7529457
 
 define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: movslq %e{{.*}}, %rax
 ; CHECK: movq %rax
 ; CHECK: movl %eax
diff --git a/test/CodeGen/X86/sha.ll b/test/CodeGen/X86/sha.ll
new file mode 100644
index 000000000000..bf81e9938ec8
--- /dev/null
+++ b/test/CodeGen/X86/sha.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: not llc < %s -mtriple=x86_64-unknown-unknown
+
+declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1rnds4rr
+  ; CHECK: sha1rnds4 $3, %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1rnds4rm
+  ; CHECK: sha1rnds4 $3, (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1nexterr
+  ; CHECK: sha1nexte %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1nexterm
+  ; CHECK: sha1nexte (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1msg1rr
+  ; CHECK: sha1msg1 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1msg1rm
+  ; CHECK: sha1msg1 (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha1msg2rr
+  ; CHECK: sha1msg2 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha1msg2rm
+  ; CHECK: sha1msg2 (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %0
+  ; CHECK: test_sha256rnds2rr
+  ; CHECK: movaps %xmm0, [[XMM_TMP1:%xmm[1-9][0-9]?]]
+  ; CHECK: movaps %xmm2, %xmm0
+  ; CHECK: sha256rnds2 %xmm1, [[XMM_TMP1]]
+}
+
+define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
+  ret <4 x i32> %1
+  ; CHECK: test_sha256rnds2rm
+  ; CHECK: movaps %xmm0, [[XMM_TMP2:%xmm[1-9][0-9]?]]
+  ; CHECK: movaps %xmm1, %xmm0
+  ; CHECK: sha256rnds2 (%rdi), [[XMM_TMP2]]
+}
+
+declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha256msg1rr
+  ; CHECK: sha256msg1 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha256msg1rm
+  ; CHECK: sha256msg1 (%rdi), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+  ; CHECK: test_sha256msg2rr
+  ; CHECK: sha256msg2 %xmm1, %xmm0
+}
+
+define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
+entry:
+  %0 = load <4 x i32>* %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+  ; CHECK: test_sha256msg2rm
+  ; CHECK: sha256msg2 (%rdi), %xmm0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index 1de915164f0c..d487368431b0 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -2,11 +2,11 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=X64
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
-; X32: t1:
+; X32-LABEL: t1:
 ; X32-NOT: andl
 ; X32: shll
 
-; X64: t1:
+; X64-LABEL: t1:
 ; X64-NOT: andl
 ; X64: shll
        %shamt = and i32 %t, 31
@@ -15,11 +15,11 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
 }
 
 define i32 @t2(i32 %t, i32 %val) nounwind {
-; X32: t2:
+; X32-LABEL: t2:
 ; X32-NOT: andl
 ; X32: shll
 
-; X64: t2:
+; X64-LABEL: t2:
 ; X64-NOT: andl
 ; X64: shll
        %shamt = and i32 %t, 63
@@ -30,11 +30,11 @@ define i32 @t2(i32 %t, i32 %val) nounwind {
 @X = internal global i16 0
 
 define void @t3(i16 %t) nounwind {
-; X32: t3:
+; X32-LABEL: t3:
 ; X32-NOT: andl
 ; X32: sarw
 
-; X64: t3:
+; X64-LABEL: t3:
 ; X64-NOT: andl
 ; X64: sarw
        %shamt = and i16 %t, 31
@@ -45,7 +45,7 @@ define void @t3(i16 %t) nounwind {
 }
 
 define i64 @t4(i64 %t, i64 %val) nounwind {
-; X64: t4:
+; X64-LABEL: t4:
 ; X64-NOT: and
 ; X64: shrq
        %shamt = and i64 %t, 63
@@ -54,7 +54,7 @@ define i64 @t4(i64 %t, i64 %val) nounwind {
 }
 
 define i64 @t5(i64 %t, i64 %val) nounwind {
-; X64: t5:
+; X64-LABEL: t5:
 ; X64-NOT: and
 ; X64: shrq
        %shamt = and i64 %t, 191
@@ -66,7 +66,7 @@ define i64 @t5(i64 %t, i64 %val) nounwind {
 ; rdar://11866926
 define i64 @t6(i64 %key, i64* nocapture %val) nounwind {
 entry:
-; X64: t6:
+; X64-LABEL: t6:
 ; X64-NOT: movabsq
 ; X64: decq
 ; X64: andq
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll
index d1f321f17738..7615754a042a 100644
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -30,10 +30,11 @@ entry:
   %x = load i32* %p
   %shl = shl i32 %x, %shamt
 ; BMI2: shl32p
-; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; Source order scheduling prevents folding, rdar:14208996.
+; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI2: ret
 ; BMI264: shl32p
-; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -74,7 +75,7 @@ entry:
   %x = load i64* %p
   %shl = shl i64 %x, %shamt
 ; BMI264: shl64p
-; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -83,7 +84,7 @@ define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
 entry:
   %x = load i64* %p
   %shl = shl i64 %x, 7
-; BMI264: shl64p
+; BMI264: shl64pi
 ; BMI264-NOT: shlxq
 ; BMI264: ret
   ret i64 %shl
@@ -106,10 +107,11 @@ entry:
   %x = load i32* %p
   %shl = lshr i32 %x, %shamt
 ; BMI2: lshr32p
-; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; Source order scheduling prevents folding, rdar:14208996.
+; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI2: ret
-; BMI264: lshr32
-; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: lshr32p
+; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -128,7 +130,7 @@ entry:
   %x = load i64* %p
   %shl = lshr i64 %x, %shamt
 ; BMI264: lshr64p
-; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -150,10 +152,11 @@ entry:
   %x = load i32* %p
   %shl = ashr i32 %x, %shamt
 ; BMI2: ashr32p
-; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; Source order scheduling prevents folding, rdar:14208996.
+; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI2: ret
-; BMI264: ashr32
-; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ashr32p
+; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -172,7 +175,7 @@ entry:
   %x = load i64* %p
   %shl = ashr i64 %x, %shamt
 ; BMI264: ashr64p
-; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
index 4f27e97fb390..5241042d0c55 100644
--- a/test/CodeGen/X86/shift-coalesce.ll
+++ b/test/CodeGen/X86/shift-coalesce.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep "shld.*CL"
+; RUN:   grep "shld.*cl"
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep "mov CL, BL"
+; RUN:   not grep "mov cl, bl"
 
 ; PR687
 
diff --git a/test/CodeGen/X86/shift-codegen.ll b/test/CodeGen/X86/shift-codegen.ll
index 7d961e8a9c04..88b86100794a 100644
--- a/test/CodeGen/X86/shift-codegen.ll
+++ b/test/CodeGen/X86/shift-codegen.ll
@@ -8,7 +8,7 @@ target triple = "i686-apple-darwin8"
 
 
 define void @fn1() {
-; CHECK: fn1:
+; CHECK-LABEL: fn1:
 ; CHECK-NOT: ret
 ; CHECK-NOT: lea
 ; CHECK: shll $3
@@ -24,7 +24,7 @@ define void @fn1() {
 }
 
 define i32 @fn2(i32 %X, i32 %Y) {
-; CHECK: fn2:
+; CHECK-LABEL: fn2:
 ; CHECK-NOT: ret
 ; CHECK-NOT: lea
 ; CHECK: shll $3
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index 51f83036c23d..113dedb4a00c 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -3,7 +3,7 @@
 @array = weak global [4 x i32] zeroinitializer
 
 define i32 @test_lshr_and(i32 %x) {
-; CHECK: test_lshr_and:
+; CHECK-LABEL: test_lshr_and:
 ; CHECK-NOT: shrl
 ; CHECK: andl $12,
 ; CHECK: movl {{.*}}array{{.*}},
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index c518cdd3aa4e..ea9002c397b8 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -verify-coalescing | FileCheck %s
 
 define i32* @test1(i32* %P, i32 %X) {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NOT: shrl
 ; CHECK-NOT: shll
 ; CHECK: ret
@@ -14,7 +14,7 @@ entry:
 }
 
 define i32* @test2(i32* %P, i32 %X) {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: shll $4
 ; CHECK-NOT: shll
 ; CHECK: ret
@@ -27,7 +27,7 @@ entry:
 }
 
 define i32* @test3(i32* %P, i32 %X) {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK-NOT: shrl
 ; CHECK-NOT: shll
 ; CHECK: ret
@@ -39,7 +39,7 @@ entry:
 }
 
 define fastcc i32 @test4(i32* %d) {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK-NOT: shrl
 ; CHECK: ret
 
@@ -52,7 +52,7 @@ entry:
 define i64 @test5(i16 %i, i32* %arr) {
 ; Ensure that we don't fold away shifts which have multiple uses, as they are
 ; just re-introduced for the second use.
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK-NOT: shrl
 ; CHECK: shrl $11
 ; CHECK-NOT: shrl
diff --git a/test/CodeGen/X86/shl-anyext.ll b/test/CodeGen/X86/shl-anyext.ll
index 10d489b9a8a6..0a5d047d23d3 100644
--- a/test/CodeGen/X86/shl-anyext.ll
+++ b/test/CodeGen/X86/shl-anyext.ll
@@ -17,7 +17,7 @@ if.end523:                                        ; preds = %if.end453
   ret void
 }
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 
 declare void @bar(i64)
 
diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
index 83e1eb5c39e7..4762b13b516f 100644
--- a/test/CodeGen/X86/shl_elim.ll
+++ b/test/CodeGen/X86/shl_elim.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=x86 | grep "movl	8(.esp), %eax"
-; RUN: llc < %s -march=x86 | grep "shrl	.eax"
-; RUN: llc < %s -march=x86 | grep "movswl	.ax, .eax"
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @test1(i64 %a) nounwind {
         %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
@@ -9,5 +7,10 @@ define i32 @test1(i64 %a) nounwind {
         %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]
         %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]
         ret i32 %tmp456
+
+; CHECK-LABEL: test1:
+; CHECK: movl 8(%esp), %eax
+; CHECK: shrl %eax
+; CHECK: cwtl
 }
 
diff --git a/test/CodeGen/X86/shrink-compare.ll b/test/CodeGen/X86/shrink-compare.ll
index 8d4b07f9d9b0..bb892011e2d6 100644
--- a/test/CodeGen/X86/shrink-compare.ll
+++ b/test/CodeGen/X86/shrink-compare.ll
@@ -15,7 +15,7 @@ if.then:
 
 if.end:
   ret void
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: cmpb $47, (%{{rdi|rcx}})
 }
 
@@ -31,6 +31,61 @@ if.then:
 
 if.end:
   ret void
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: cmpb $47, %{{dil|cl}}
 }
+
+define void @test3(i32 %X) nounwind {
+entry:
+  %and = and i32 %X, 255
+  %cmp = icmp eq i32 %and, 255
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+; CHECK-LABEL: test3:
+; CHECK: cmpb $-1, %{{dil|cl}}
+}
+
+; PR16083
+define i1 @test4(i64 %a, i32 %b) {
+entry:
+  %tobool = icmp ne i32 %b, 0
+  br i1 %tobool, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %entry
+  %and = and i64 0, %a
+  %tobool1 = icmp ne i64 %and, 0
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %entry
+  %p = phi i1 [ true, %entry ], [ %tobool1, %lor.rhs ]
+  ret i1 %p
+}
+
+@x = global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 1 }, align 4
+
+; PR16551
+define void @test5(i32 %X) nounwind {
+entry:
+  %bf.load = load i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
+  %bf.lshr = lshr i56 %bf.load, 32
+  %bf.cast = trunc i56 %bf.lshr to i32
+  %cmp = icmp ne i32 %bf.cast, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK-LABEL: test5:
+; CHECK-NOT: cmpl $1,{{.*}}x+4
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/sibcall-2.ll b/test/CodeGen/X86/sibcall-2.ll
index f8a746563b51..1b9d2db47c37 100644
--- a/test/CodeGen/X86/sibcall-2.ll
+++ b/test/CodeGen/X86/sibcall-2.ll
@@ -5,10 +5,10 @@
 
 define void @t1(i8* nocapture %value) nounwind {
 entry:
-; 32: t1:
+; 32-LABEL: t1:
 ; 32: jmpl *4(%esp)
 
-; 64: t1:
+; 64-LABEL: t1:
 ; 64: jmpq *%rdi
   %0 = bitcast i8* %value to void ()*
   tail call void %0() nounwind
@@ -17,10 +17,10 @@ entry:
 
 define void @t2(i32 %a, i8* nocapture %value) nounwind {
 entry:
-; 32: t2:
+; 32-LABEL: t2:
 ; 32: jmpl *8(%esp)
 
-; 64: t2:
+; 64-LABEL: t2:
 ; 64: jmpq *%rsi
   %0 = bitcast i8* %value to void ()*
   tail call void %0() nounwind
@@ -29,10 +29,10 @@ entry:
 
 define void @t3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i8* nocapture %value) nounwind {
 entry:
-; 32: t3:
+; 32-LABEL: t3:
 ; 32: jmpl *28(%esp)
 
-; 64: t3:
+; 64-LABEL: t3:
 ; 64: jmpq *8(%rsp)
   %0 = bitcast i8* %value to void ()*
   tail call void %0() nounwind
@@ -41,10 +41,10 @@ entry:
 
 define void @t4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i8* nocapture %value) nounwind {
 entry:
-; 32: t4:
+; 32-LABEL: t4:
 ; 32: jmpl *32(%esp)
 
-; 64: t4:
+; 64-LABEL: t4:
 ; 64: jmpq *16(%rsp)
   %0 = bitcast i8* %value to void ()*
   tail call void %0() nounwind
diff --git a/test/CodeGen/X86/sibcall-3.ll b/test/CodeGen/X86/sibcall-3.ll
index f97abe002957..9fcb4603a9d1 100644
--- a/test/CodeGen/X86/sibcall-3.ll
+++ b/test/CodeGen/X86/sibcall-3.ll
@@ -2,14 +2,14 @@
 ; PR7193
 
 define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: calll 0
   tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind
   ret void
 }
 
 define void @t2(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: jmpl
   tail call void null(i8* inreg %dst, i8* inreg %src) nounwind
   ret void
diff --git a/test/CodeGen/X86/sibcall-4.ll b/test/CodeGen/X86/sibcall-4.ll
index 1499e6688024..980b0f797ee1 100644
--- a/test/CodeGen/X86/sibcall-4.ll
+++ b/test/CodeGen/X86/sibcall-4.ll
@@ -3,7 +3,7 @@
 
 define cc10 void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind {
 cm1:
-; CHECK: t:
+; CHECK-LABEL: t:
 ; CHECK: jmpl *%eax
   %nm3 = getelementptr i32* %Sp_Arg, i32 1
   %nm9 = load i32* %Sp_Arg
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index 937817e45647..c479030508a9 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -7,20 +7,20 @@
 
 define double @foo(double %a) nounwind readonly ssp {
 entry:
-; X32: foo:
+; X32-LABEL: foo:
 ; X32: jmp _sin$stub
 
-; X64: foo:
+; X64-LABEL: foo:
 ; X64: jmp _sin
   %0 = tail call double @sin(double %a) nounwind readonly
   ret double %0
 }
 
 define float @bar(float %a) nounwind readonly ssp {
-; X32: bar:
+; X32-LABEL: bar:
 ; X32: jmp _sinf$stub
 
-; X64: bar:
+; X64-LABEL: bar:
 ; X64: jmp _sinf
 entry:
   %0 = tail call float @sinf(float %a) nounwind readonly
diff --git a/test/CodeGen/X86/sibcall-6.ll b/test/CodeGen/X86/sibcall-6.ll
new file mode 100644
index 000000000000..c9dff6b73d2a
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-6.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -relocation-model=pic | FileCheck %s
+; PR15250
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+declare void @callee1(i32 inreg, i32 inreg, i32 inreg)
+define void @test1(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: calll callee1@PLT
+  tail call void @callee1(i32 inreg 0, i32 inreg 0, i32 inreg 0) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index ceb79ea927a1..589e9ec10524 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -3,10 +3,10 @@
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
-; 32: t1:
+; 32-LABEL: t1:
 ; 32: jmp {{_?}}foo
 
-; 64: t1:
+; 64-LABEL: t1:
 ; 64: jmp {{_?}}foo
   tail call void @foo() nounwind
   ret void
@@ -16,10 +16,10 @@ declare void @foo()
 
 define void @t2() nounwind ssp {
 entry:
-; 32: t2:
+; 32-LABEL: t2:
 ; 32: jmp {{_?}}foo2
 
-; 64: t2:
+; 64-LABEL: t2:
 ; 64: jmp {{_?}}foo2
   %0 = tail call i32 @foo2() nounwind
   ret void
@@ -29,10 +29,10 @@ declare i32 @foo2()
 
 define void @t3() nounwind ssp {
 entry:
-; 32: t3:
+; 32-LABEL: t3:
 ; 32: jmp {{_?}}foo3
 
-; 64: t3:
+; 64-LABEL: t3:
 ; 64: jmp {{_?}}foo3
   %0 = tail call i32 @foo3() nounwind
   ret void
@@ -42,11 +42,11 @@ declare i32 @foo3()
 
 define void @t4(void (i32)* nocapture %x) nounwind ssp {
 entry:
-; 32: t4:
+; 32-LABEL: t4:
 ; 32: calll *
 ; FIXME: gcc can generate a tailcall for this. But it's tricky.
 
-; 64: t4:
+; 64-LABEL: t4:
 ; 64-NOT: call
 ; 64: jmpq *
   tail call void %x(i32 0) nounwind
@@ -55,11 +55,11 @@ entry:
 
 define void @t5(void ()* nocapture %x) nounwind ssp {
 entry:
-; 32: t5:
+; 32-LABEL: t5:
 ; 32-NOT: call
 ; 32: jmpl *4(%esp)
 
-; 64: t5:
+; 64-LABEL: t5:
 ; 64-NOT: call
 ; 64: jmpq *%rdi
   tail call void %x() nounwind
@@ -68,11 +68,11 @@ entry:
 
 define i32 @t6(i32 %x) nounwind ssp {
 entry:
-; 32: t6:
+; 32-LABEL: t6:
 ; 32: calll {{_?}}t6
 ; 32: jmp {{_?}}bar
 
-; 64: t6:
+; 64-LABEL: t6:
 ; 64: jmp {{_?}}t6
 ; 64: jmp {{_?}}bar
   %0 = icmp slt i32 %x, 10
@@ -92,10 +92,10 @@ declare i32 @bar(i32)
 
 define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind ssp {
 entry:
-; 32: t7:
+; 32-LABEL: t7:
 ; 32: jmp {{_?}}bar2
 
-; 64: t7:
+; 64-LABEL: t7:
 ; 64: jmp {{_?}}bar2
   %0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind
   ret i32 %0
@@ -105,11 +105,11 @@ declare i32 @bar2(i32, i32, i32)
 
 define signext i16 @t8() nounwind ssp {
 entry:
-; 32: t8:
-; 32: calll {{_?}}bar3
+; 32-LABEL: t8:
+; 32: jmp {{_?}}bar3
 
-; 64: t8:
-; 64: callq {{_?}}bar3
+; 64-LABEL: t8:
+; 64: jmp {{_?}}bar3
   %0 = tail call signext i16 @bar3() nounwind      ; <i16> [#uses=1]
   ret i16 %0
 }
@@ -118,11 +118,11 @@ declare signext i16 @bar3()
 
 define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
 entry:
-; 32: t9:
+; 32-LABEL: t9:
 ; 32: calll *
 
-; 64: t9:
-; 64: callq *
+; 64-LABEL: t9:
+; 64: jmpq *
   %0 = bitcast i32 (i32)* %x to i16 (i32)*
   %1 = tail call signext i16 %0(i32 0) nounwind
   ret i16 %1
@@ -130,10 +130,10 @@ entry:
 
 define void @t10() nounwind ssp {
 entry:
-; 32: t10:
+; 32-LABEL: t10:
 ; 32: calll
 
-; 64: t10:
+; 64-LABEL: t10:
 ; 64: callq
   %0 = tail call i32 @foo4() noreturn nounwind
   unreachable
@@ -145,14 +145,14 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
 ; In 32-bit mode, it's emitting a bunch of dead loads that are not being
 ; eliminated currently.
 
-; 32: t11:
+; 32-LABEL: t11:
 ; 32-NOT: subl ${{[0-9]+}}, %esp
 ; 32: je
 ; 32-NOT: movl
 ; 32-NOT: addl ${{[0-9]+}}, %esp
 ; 32: jmp {{_?}}foo5
 
-; 64: t11:
+; 64-LABEL: t11:
 ; 64-NOT: subq ${{[0-9]+}}, %esp
 ; 64-NOT: addq ${{[0-9]+}}, %esp
 ; 64: jmp {{_?}}foo5
@@ -173,12 +173,12 @@ declare i32 @foo5(i32, i32, i32, i32, i32)
 %struct.t = type { i32, i32, i32, i32, i32 }
 
 define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
-; 32: t12:
+; 32-LABEL: t12:
 ; 32-NOT: subl ${{[0-9]+}}, %esp
 ; 32-NOT: addl ${{[0-9]+}}, %esp
 ; 32: jmp {{_?}}foo6
 
-; 64: t12:
+; 64-LABEL: t12:
 ; 64-NOT: subq ${{[0-9]+}}, %esp
 ; 64-NOT: addq ${{[0-9]+}}, %esp
 ; 64: jmp {{_?}}foo6
@@ -201,12 +201,12 @@ declare i32 @foo6(i32, i32, %struct.t* byval align 4)
 %struct.cp = type { float, float, float, float, float }
 
 define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
-; 32: t13:
+; 32-LABEL: t13:
 ; 32-NOT: jmp
 ; 32: calll
 ; 32: ret
 
-; 64: t13:
+; 64-LABEL: t13:
 ; 64-NOT: jmp
 ; 64: callq
 ; 64: ret
@@ -226,7 +226,7 @@ declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind
 
 define void @t14(%struct.__block_literal_2* nocapture %.block_descriptor) nounwind ssp {
 entry:
-; 64: t14:
+; 64-LABEL: t14:
 ; 64: movq 32(%rdi)
 ; 64-NOT: movq 16(%rdi)
 ; 64: jmpq *16({{%rdi|%rax}})
@@ -245,11 +245,11 @@ entry:
 %struct.foo = type { [4 x i32] }
 
 define void @t15(%struct.foo* noalias sret %agg.result) nounwind  {
-; 32: t15:
+; 32-LABEL: t15:
 ; 32: calll {{_?}}f
 ; 32: ret $4
 
-; 64: t15:
+; 64-LABEL: t15:
 ; 64: callq {{_?}}f
 ; 64: ret
   tail call fastcc void @f(%struct.foo* noalias sret %agg.result) nounwind
@@ -260,11 +260,11 @@ declare void @f(%struct.foo* noalias sret) nounwind
 
 define void @t16() nounwind ssp {
 entry:
-; 32: t16:
+; 32-LABEL: t16:
 ; 32: calll {{_?}}bar4
 ; 32: fstp
 
-; 64: t16:
+; 64-LABEL: t16:
 ; 64: jmp {{_?}}bar4
   %0 = tail call double @bar4() nounwind
   ret void
@@ -275,11 +275,11 @@ declare double @bar4()
 ; rdar://6283267
 define void @t17() nounwind ssp {
 entry:
-; 32: t17:
+; 32-LABEL: t17:
 ; 32: jmp {{_?}}bar5
 
-; 64: t17:
-; 64: xorb %al, %al
+; 64-LABEL: t17:
+; 64: xorl %eax, %eax
 ; 64: jmp {{_?}}bar5
   tail call void (...)* @bar5() nounwind
   ret void
@@ -290,12 +290,12 @@ declare void @bar5(...)
 ; rdar://7774847
 define void @t18() nounwind ssp {
 entry:
-; 32: t18:
+; 32-LABEL: t18:
 ; 32: calll {{_?}}bar6
 ; 32: fstp %st(0)
 
-; 64: t18:
-; 64: xorb %al, %al
+; 64-LABEL: t18:
+; 64: xorl %eax, %eax
 ; 64: jmp {{_?}}bar6
   %0 = tail call double (...)* @bar6() nounwind
   ret void
@@ -305,7 +305,7 @@ declare double @bar6(...)
 
 define void @t19() alignstack(32) nounwind {
 entry:
-; CHECK: t19:
+; CHECK-LABEL: t19:
 ; CHECK: andl $-32
 ; CHECK: calll {{_?}}foo
   tail call void @foo() nounwind
@@ -318,11 +318,11 @@ entry:
 
 define double @t20(double %x) nounwind {
 entry:
-; 32: t20:
+; 32-LABEL: t20:
 ; 32: calll {{_?}}foo20
 ; 32: fldl (%esp)
 
-; 64: t20:
+; 64-LABEL: t20:
 ; 64: jmp {{_?}}foo20
   %0 = tail call fastcc double @foo20(double %x) nounwind
   ret double %0
diff --git a/test/CodeGen/X86/simple-zext.ll b/test/CodeGen/X86/simple-zext.ll
new file mode 100644
index 000000000000..ccd8292bcdb3
--- /dev/null
+++ b/test/CodeGen/X86/simple-zext.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s| FileCheck %s
+
+; A bug in DAGCombiner prevented it forming a zextload in this simple case
+; because it counted both the chain user and the real user against the
+; profitability total.
+
+define void @load_zext(i32* nocapture %p){
+entry:
+  %0 = load i32* %p, align 4
+  %and = and i32 %0, 255
+  tail call void @use(i32 %and)
+  ret void
+; CHECK: movzbl ({{%r[a-z]+}}), {{%e[a-z]+}}
+}
+
+declare void @use(i32)
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
index 333c4663eb0b..2dc8816f840f 100644
--- a/test/CodeGen/X86/sincos-opt.ll
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -8,19 +8,19 @@
 
 define float @test1(float %x) nounwind {
 entry:
-; GNU_SINCOS: test1:
+; GNU_SINCOS-LABEL: test1:
 ; GNU_SINCOS: callq sincosf
 ; GNU_SINCOS: movss 4(%rsp), %xmm0
 ; GNU_SINCOS: addss (%rsp), %xmm0
 
-; OSX_SINCOS: test1:
+; OSX_SINCOS-LABEL: test1:
 ; OSX_SINCOS: callq ___sincosf_stret
 ; OSX_SINCOS: pshufd $1, %xmm0, %xmm1
 ; OSX_SINCOS: addss %xmm0, %xmm1
 
 ; OSX_NOOPT: test1
-; OSX_NOOPT: callq _cosf
 ; OSX_NOOPT: callq _sinf
+; OSX_NOOPT: callq _cosf
   %call = tail call float @sinf(float %x) nounwind readnone
   %call1 = tail call float @cosf(float %x) nounwind readnone
   %add = fadd float %call, %call1
@@ -29,18 +29,18 @@ entry:
 
 define double @test2(double %x) nounwind {
 entry:
-; GNU_SINCOS: test2:
+; GNU_SINCOS-LABEL: test2:
 ; GNU_SINCOS: callq sincos
 ; GNU_SINCOS: movsd 16(%rsp), %xmm0
 ; GNU_SINCOS: addsd 8(%rsp), %xmm0
 
-; OSX_SINCOS: test2:
+; OSX_SINCOS-LABEL: test2:
 ; OSX_SINCOS: callq ___sincos_stret
 ; OSX_SINCOS: addsd %xmm1, %xmm0
 
 ; OSX_NOOPT: test2
-; OSX_NOOPT: callq _cos
 ; OSX_NOOPT: callq _sin
+; OSX_NOOPT: callq _cos
   %call = tail call double @sin(double %x) nounwind readnone
   %call1 = tail call double @cos(double %x) nounwind readnone
   %add = fadd double %call, %call1
@@ -49,7 +49,7 @@ entry:
 
 define x86_fp80 @test3(x86_fp80 %x) nounwind {
 entry:
-; GNU_SINCOS: test3:
+; GNU_SINCOS-LABEL: test3:
 ; GNU_SINCOS: callq sinl
 ; GNU_SINCOS: callq cosl
 ; GNU_SINCOS: ret
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index 734f48ae329f..8f0e6f1edf66 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -9,7 +9,7 @@ declare double @sin(double) readonly
 
 declare x86_fp80 @sinl(x86_fp80) readonly
 
-; SIN: test1:
+; SIN-LABEL: test1:
 define float @test1(float %X) {
         %Y = call float @sinf(float %X) readonly
         ret float %Y
@@ -21,7 +21,7 @@ define float @test1(float %X) {
 ; SAFE: test1
 ; SAFE-NOT: fsin
 
-; SIN: test2:
+; SIN-LABEL: test2:
 define double @test2(double %X) {
         %Y = call double @sin(double %X) readonly
         ret double %Y
@@ -33,7 +33,7 @@ define double @test2(double %X) {
 ; SAFE: test2
 ; SAFE-NOT: fsin
 
-; SIN: test3:
+; SIN-LABEL: test3:
 define x86_fp80 @test3(x86_fp80 %X) {
         %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
@@ -49,8 +49,8 @@ declare double @cos(double) readonly
 declare x86_fp80 @cosl(x86_fp80) readonly
 
 
-; SIN: test4:
-; COS: test3:
+; SIN-LABEL: test4:
+; COS-LABEL: test3:
 define float @test4(float %X) {
         %Y = call float @cosf(float %X) readonly
         ret float %Y
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 2aca5b897d35..64f5311792db 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -5,7 +5,7 @@
 ; evaluated, however with MachineSink we can sink the other side so
 ; that it's conditionally evaluated.
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NEXT: testb $1, %dil
 ; CHECK-NEXT: jne
 ; CHECK-NEXT: divsd
@@ -24,13 +24,12 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
 ; the conditional branch.
 ; rdar://8454886
 
-; CHECK: split:
+; CHECK-LABEL: split:
 ; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne
-; CHECK-NEXT: movaps
-; CHECK-NEXT: ret
+; CHECK-NEXT: je
 ; CHECK:      divsd
-; CHECK-NEXT: ret
+; CHECK:      movaps
+; CHECK:      ret
 define double @split(double %x, double %y, i1 %c) nounwind {
   %a = fdiv double %x, 3.2
   %z = select i1 %c, double %a, double %y
@@ -40,7 +39,7 @@ define double @split(double %x, double %y, i1 %c) nounwind {
 
 ; Hoist floating-point constant-pool loads out of loops.
 
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: movsd
 ; CHECK: align
 define void @bar(double* nocapture %p, i64 %n) nounwind {
@@ -65,7 +64,7 @@ return:
 ; Sink instructions with dead EFLAGS defs.
 
 ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
-; 
+;
 ; See <rdar://problem/8030636>. This test isn't valid after we made machine
 ; sinking more conservative about sinking instructions that define a preg into a
 ; block when we don't know if the preg is killed within the current block.
@@ -87,7 +86,7 @@ return:
 
 ; Codegen should hoist and CSE these constants.
 
-; CHECK: vv:
+; CHECK-LABEL: vv:
 ; CHECK: LCPI3_0(%rip), %xmm0
 ; CHECK: LCPI3_1(%rip), %xmm1
 ; CHECK: LCPI3_2(%rip), %xmm2
@@ -151,7 +150,7 @@ declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
 ; CodeGen should use the correct register class when extracting
 ; a load from a zero-extending load for hoisting.
 
-; CHECK: default_get_pch_validity:
+; CHECK-LABEL: default_get_pch_validity:
 ; CHECK: movl cl_options_count(%rip), %ecx
 
 @cl_options_count = external constant i32         ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 2d0b2f7aa91d..cefbda64751b 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -17,7 +17,7 @@ normal:
 overflow:
   %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: imull
 ; CHECK-NEXT: jno
 }
@@ -36,7 +36,7 @@ overflow:
 normal:
   %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: imull
 ; CHECK-NEXT: jno
 }
@@ -50,7 +50,7 @@ entry:
 	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
 	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
 	ret i32 %tmp2
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: addl
 ; CHECK-NEXT: addl
 ; CHECK-NEXT: ret
@@ -62,7 +62,7 @@ entry:
 	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
 	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
 	ret i32 %tmp2
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: addl
 ; CHECK: mull
 ; CHECK-NEXT: ret
@@ -78,6 +78,6 @@ entry:
   ret i1 %overflow
 ; Was returning false, should return true (not constant folded yet though).
 ; PR13991
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK-NOT: xorb
 }
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
index 980f18c8b911..4d59b9cc2f63 100644
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -3,7 +3,7 @@
 
 define <2 x i64> @t2() nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: pshufd	$85, (%esp), %xmm0
   %array = alloca [8 x float], align 4
   %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
new file mode 100644
index 000000000000..fc79e31e72ee
--- /dev/null
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mcpu=core2 | FileCheck %s
+
+; generated using "clang -S -O2 -ffast-math -emit-llvm sqrt.c" from
+; #include <math.h>
+; 
+; double fd(double d){
+;   return sqrt(d);
+; }
+; 
+; float ff(float f){
+;   return sqrtf(f);
+; }
+; 
+; long double fld(long double ld){
+;   return sqrtl(ld);
+; }
+;
+; Tests conversion of sqrt function calls into sqrt instructions when
+; -ffast-math is in effect.
+
+; ModuleID = 'sqrt.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone uwtable
+define double @fd(double %d) #0 {
+entry:
+; CHECK: sqrtsd
+  %call = tail call double @__sqrt_finite(double %d) #2
+  ret double %call
+}
+
+; Function Attrs: nounwind readnone
+declare double @__sqrt_finite(double) #1
+
+; Function Attrs: nounwind readnone uwtable
+define float @ff(float %f) #0 {
+entry:
+; CHECK: sqrtss
+  %call = tail call float @__sqrtf_finite(float %f) #2
+  ret float %call
+}
+
+; Function Attrs: nounwind readnone
+declare float @__sqrtf_finite(float) #1
+
+; Function Attrs: nounwind readnone uwtable
+define x86_fp80 @fld(x86_fp80 %ld) #0 {
+entry:
+; CHECK: fsqrt
+  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
+  ret x86_fp80 %call
+}
+
+; Function Attrs: nounwind readnone
+declare x86_fp80 @__sqrtl_finite(x86_fp80) #1
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 71a42f4db34a..2351fd6fa77b 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=nehalem | FileCheck %s
 
-; CHECK: a:
+; CHECK-LABEL: a:
 ; CHECK: movdqu
 ; CHECK: pshufd
 define <4 x float> @a(<4 x float>* %y) nounwind {
@@ -16,7 +16,7 @@ define <4 x float> @a(<4 x float>* %y) nounwind {
   ret <4 x float> %s
 }
 
-; CHECK: b:
+; CHECK-LABEL: b:
 ; CHECK: movups
 ; CHECK: unpckhps
 define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
@@ -32,7 +32,7 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
   ret <4 x float> %s
 }
 
-; CHECK: c:
+; CHECK-LABEL: c:
 ; CHECK: movupd
 ; CHECK: shufpd
 define <2 x double> @c(<2 x double>* %y) nounwind {
@@ -44,7 +44,7 @@ define <2 x double> @c(<2 x double>* %y) nounwind {
   ret <2 x double> %r
 }
 
-; CHECK: d:
+; CHECK-LABEL: d:
 ; CHECK: movupd
 ; CHECK: unpckhpd
 define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index 22cd7723068c..98e75b56e891 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -6,7 +6,7 @@ define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   ret <4 x float> %z
 }
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: movups
 ; CHECK: ret
 
@@ -16,6 +16,6 @@ define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   ret <2 x double> %z
 }
 
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: movupd
 ; CHECK: ret
diff --git a/test/CodeGen/X86/sse-commute.ll b/test/CodeGen/X86/sse-commute.ll
index 336bf06e557d..1800a6eea61b 100644
--- a/test/CodeGen/X86/sse-commute.ll
+++ b/test/CodeGen/X86/sse-commute.ll
@@ -3,7 +3,7 @@
 ; Commute the comparison to avoid a move.
 ; PR7500.
 
-; CHECK: a:
+; CHECK-LABEL: a:
 ; CHECK-NOT: mov
 ; CHECK:     pcmpeqd
 define <2 x double> @a(<2 x double>, <2 x double>) nounwind readnone {
diff --git a/test/CodeGen/X86/sse-intrinsics-x86.ll b/test/CodeGen/X86/sse-intrinsics-x86.ll
new file mode 100644
index 000000000000..65d44bfb5ba8
--- /dev/null
+++ b/test/CodeGen/X86/sse-intrinsics-x86.ll
@@ -0,0 +1,308 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s
+
+define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: addss
+  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: cmpordps
+  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: cmpordss
+  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: sbb
+  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: comiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
+  ; CHECK: movl
+  ; CHECK: cvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
+  ; CHECK: cvtss2si
+  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
+  ; CHECK: cvttss2si
+  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: divss
+  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_ldmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: ldmxcsr
+  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
+
+
+
+define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: maxps
+  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: maxss
+  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: minps
+  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: minss
+  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
+  ; CHECK: movmskps
+  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
+
+
+
+define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: mulss
+  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
+  ; CHECK: rcpps
+  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
+  ; CHECK: rcpss
+  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
+  ; CHECK: rsqrtps
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
+  ; CHECK: rsqrtss
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
+  ; CHECK: sqrtps
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
+  ; CHECK: sqrtss
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_stmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: stmxcsr
+  call void @llvm.x86.sse.stmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
+
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: movups
+  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: subss
+  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: ucomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 0ba02155a657..5122c44131a4 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -12,13 +12,13 @@
 ;  _y: use -0.0 instead of %y
 ; _inverse : swap the arms of the select.
 
-; CHECK:      ogt:
+; CHECK-LABEL:      ogt:
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ogt:
+; UNSAFE-LABEL:      ogt:
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ogt:
+; FINITE-LABEL:      ogt:
 ; FINITE-NEXT: maxsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @ogt(double %x, double %y) nounwind {
@@ -27,13 +27,13 @@ define double @ogt(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      olt:
+; CHECK-LABEL:      olt:
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      olt:
+; UNSAFE-LABEL:      olt:
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      olt:
+; FINITE-LABEL:      olt:
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @olt(double %x, double %y) nounwind {
@@ -42,14 +42,14 @@ define double @olt(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ogt_inverse:
+; CHECK-LABEL:      ogt_inverse:
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ogt_inverse:
+; UNSAFE-LABEL:      ogt_inverse:
 ; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ogt_inverse:
+; FINITE-LABEL:      ogt_inverse:
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -59,14 +59,14 @@ define double @ogt_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      olt_inverse:
+; CHECK-LABEL:      olt_inverse:
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      olt_inverse:
+; UNSAFE-LABEL:      olt_inverse:
 ; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      olt_inverse:
+; FINITE-LABEL:      olt_inverse:
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -76,12 +76,12 @@ define double @olt_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      oge:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
-; UNSAFE:      oge:
+; CHECK-LABEL:      oge:
+; CHECK: cmplesd %xmm0
+; UNSAFE-LABEL:      oge:
 ; UNSAFE-NEXT: maxsd	%xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      oge:
+; FINITE-LABEL:      oge:
 ; FINITE-NEXT: maxsd	%xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @oge(double %x, double %y) nounwind {
@@ -90,11 +90,11 @@ define double @oge(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ole:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
-; UNSAFE:      ole:
+; CHECK-LABEL:      ole:
+; CHECK: cmplesd %xmm1
+; UNSAFE-LABEL:      ole:
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; FINITE:      ole:
+; FINITE-LABEL:      ole:
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 define double @ole(double %x, double %y) nounwind {
   %c = fcmp ole double %x, %y
@@ -102,12 +102,12 @@ define double @ole(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      oge_inverse:
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
-; UNSAFE:      oge_inverse:
+; CHECK-LABEL:      oge_inverse:
+; CHECK: cmplesd %xmm0
+; UNSAFE-LABEL:      oge_inverse:
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      oge_inverse:
+; FINITE-LABEL:      oge_inverse:
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -117,12 +117,12 @@ define double @oge_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ole_inverse:
-; CHECK-NEXT: ucomisd %xmm0, %xmm1
-; UNSAFE:      ole_inverse:
+; CHECK-LABEL:      ole_inverse:
+; CHECK: cmplesd %xmm1
+; UNSAFE-LABEL:      ole_inverse:
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ole_inverse:
+; FINITE-LABEL:      ole_inverse:
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -132,16 +132,16 @@ define double @ole_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ogt_x:
+; CHECK-LABEL:      ogt_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ogt_x:
+; UNSAFE-LABEL:      ogt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ogt_x:
+; FINITE-LABEL:      ogt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -151,16 +151,16 @@ define double @ogt_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      olt_x:
+; CHECK-LABEL:      olt_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      olt_x:
+; UNSAFE-LABEL:      olt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      olt_x:
+; FINITE-LABEL:      olt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -170,17 +170,17 @@ define double @olt_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ogt_inverse_x:
+; CHECK-LABEL:      ogt_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ogt_inverse_x:
+; UNSAFE-LABEL:      ogt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ogt_inverse_x:
+; FINITE-LABEL:      ogt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -191,17 +191,17 @@ define double @ogt_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      olt_inverse_x:
+; CHECK-LABEL:      olt_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      olt_inverse_x:
+; UNSAFE-LABEL:      olt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      olt_inverse_x:
+; FINITE-LABEL:      olt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -212,14 +212,15 @@ define double @olt_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      oge_x:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      oge_x:
+; CHECK-LABEL:      oge_x:
+; CHECK:      cmplesd %xmm
+; CHECK-NEXT: andpd
+; UNSAFE-LABEL:      oge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      oge_x:
+; FINITE-LABEL:      oge_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -229,14 +230,15 @@ define double @oge_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ole_x:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      ole_x:
+; CHECK-LABEL:      ole_x:
+; CHECK:      cmplesd %xmm
+; CHECK-NEXT: andpd
+; UNSAFE-LABEL:      ole_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ole_x:
+; FINITE-LABEL:      ole_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -246,14 +248,15 @@ define double @ole_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      oge_inverse_x:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      oge_inverse_x:
+; CHECK-LABEL:      oge_inverse_x:
+; CHECK:      cmplesd %xmm
+; CHECK-NEXT: andnpd
+; UNSAFE-LABEL:      oge_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      oge_inverse_x:
+; FINITE-LABEL:      oge_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -264,14 +267,14 @@ define double @oge_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ole_inverse_x:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      ole_inverse_x:
+; CHECK-LABEL:      ole_inverse_x:
+; CHECK:      cmplesd %xmm
+; UNSAFE-LABEL:      ole_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ole_inverse_x:
+; FINITE-LABEL:      ole_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -282,12 +285,12 @@ define double @ole_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ugt:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      ugt:
+; CHECK-LABEL:      ugt:
+; CHECK:      cmpnlesd %xmm1
+; UNSAFE-LABEL:      ugt:
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ugt:
+; FINITE-LABEL:      ugt:
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @ugt(double %x, double %y) nounwind {
@@ -296,12 +299,12 @@ define double @ugt(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ult:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      ult:
+; CHECK-LABEL:      ult:
+; CHECK:      cmpnlesd %xmm0
+; UNSAFE-LABEL:      ult:
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ult:
+; FINITE-LABEL:      ult:
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @ult(double %x, double %y) nounwind {
@@ -310,12 +313,12 @@ define double @ult(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ugt_inverse:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      ugt_inverse:
+; CHECK-LABEL:      ugt_inverse:
+; CHECK:      cmpnlesd %xmm1
+; UNSAFE-LABEL:      ugt_inverse:
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ugt_inverse:
+; FINITE-LABEL:      ugt_inverse:
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -325,12 +328,12 @@ define double @ugt_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ult_inverse:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      ult_inverse:
+; CHECK-LABEL:      ult_inverse:
+; CHECK:      cmpnlesd %xmm0
+; UNSAFE-LABEL:      ult_inverse:
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ult_inverse:
+; FINITE-LABEL:      ult_inverse:
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -340,14 +343,14 @@ define double @ult_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      uge:
+; CHECK-LABEL:      uge:
 ; CHECK-NEXT: maxsd   %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      uge:
+; UNSAFE-LABEL:      uge:
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      uge:
+; FINITE-LABEL:      uge:
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @uge(double %x, double %y) nounwind {
@@ -356,14 +359,14 @@ define double @uge(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ule:
+; CHECK-LABEL:      ule:
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ule:
+; UNSAFE-LABEL:      ule:
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ule:
+; FINITE-LABEL:      ule:
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @ule(double %x, double %y) nounwind {
@@ -372,13 +375,13 @@ define double @ule(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      uge_inverse:
+; CHECK-LABEL:      uge_inverse:
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      uge_inverse:
+; UNSAFE-LABEL:      uge_inverse:
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      uge_inverse:
+; FINITE-LABEL:      uge_inverse:
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -388,13 +391,13 @@ define double @uge_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ule_inverse:
+; CHECK-LABEL:      ule_inverse:
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ule_inverse:
+; UNSAFE-LABEL:      ule_inverse:
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ule_inverse:
+; FINITE-LABEL:      ule_inverse:
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -404,14 +407,15 @@ define double @ule_inverse(double %x, double %y) nounwind {
   ret double %d
 }
 
-; CHECK:      ugt_x:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      ugt_x:
+; CHECK-LABEL:      ugt_x:
+; CHECK:      cmpnlesd %xmm
+; CHECK-NEXT: andpd
+; UNSAFE-LABEL:      ugt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ugt_x:
+; FINITE-LABEL:      ugt_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -421,14 +425,15 @@ define double @ugt_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ult_x:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      ult_x:
+; CHECK-LABEL:      ult_x:
+; CHECK:      cmpnlesd %xmm
+; CHECK-NEXT: andpd
+; UNSAFE-LABEL:      ult_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ult_x:
+; FINITE-LABEL:      ult_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -438,14 +443,15 @@ define double @ult_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ugt_inverse_x:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      ugt_inverse_x:
+; CHECK-LABEL:      ugt_inverse_x:
+; CHECK:      cmpnlesd %xmm
+; CHECK-NEXT: andnpd
+; UNSAFE-LABEL:      ugt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ugt_inverse_x:
+; FINITE-LABEL:      ugt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -456,14 +462,15 @@ define double @ugt_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ult_inverse_x:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      ult_inverse_x:
+; CHECK-LABEL:      ult_inverse_x:
+; CHECK:      cmpnlesd %xmm
+; CHECK-NEXT: andnpd
+; UNSAFE-LABEL:      ult_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ult_inverse_x:
+; FINITE-LABEL:      ult_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -474,17 +481,17 @@ define double @ult_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      uge_x:
+; CHECK-LABEL:      uge_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      uge_x:
+; UNSAFE-LABEL:      uge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      uge_x:
+; FINITE-LABEL:      uge_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -494,17 +501,17 @@ define double @uge_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ule_x:
+; CHECK-LABEL:      ule_x:
 ; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ule_x:
+; UNSAFE-LABEL:      ule_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ule_x:
+; FINITE-LABEL:      ule_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -514,16 +521,16 @@ define double @ule_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      uge_inverse_x:
+; CHECK-LABEL:      uge_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      uge_inverse_x:
+; UNSAFE-LABEL:      uge_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      uge_inverse_x:
+; FINITE-LABEL:      uge_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -534,16 +541,16 @@ define double @uge_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ule_inverse_x:
+; CHECK-LABEL:      ule_inverse_x:
 ; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ule_inverse_x:
+; UNSAFE-LABEL:      ule_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ule_inverse_x:
+; FINITE-LABEL:      ule_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -554,13 +561,13 @@ define double @ule_inverse_x(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ogt_y:
+; CHECK-LABEL:      ogt_y:
 ; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ogt_y:
+; UNSAFE-LABEL:      ogt_y:
 ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ogt_y:
+; FINITE-LABEL:      ogt_y:
 ; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @ogt_y(double %x) nounwind {
@@ -569,13 +576,13 @@ define double @ogt_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      olt_y:
+; CHECK-LABEL:      olt_y:
 ; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      olt_y:
+; UNSAFE-LABEL:      olt_y:
 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      olt_y:
+; FINITE-LABEL:      olt_y:
 ; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @olt_y(double %x) nounwind {
@@ -584,15 +591,15 @@ define double @olt_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ogt_inverse_y:
+; CHECK-LABEL:      ogt_inverse_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ogt_inverse_y:
+; UNSAFE-LABEL:      ogt_inverse_y:
 ; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ogt_inverse_y:
+; FINITE-LABEL:      ogt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -603,15 +610,15 @@ define double @ogt_inverse_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      olt_inverse_y:
+; CHECK-LABEL:      olt_inverse_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      olt_inverse_y:
+; UNSAFE-LABEL:      olt_inverse_y:
 ; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      olt_inverse_y:
+; FINITE-LABEL:      olt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -622,12 +629,12 @@ define double @olt_inverse_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      oge_y:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      oge_y:
+; CHECK-LABEL:      oge_y:
+; CHECK:      cmplesd %xmm0
+; UNSAFE-LABEL:      oge_y:
 ; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      oge_y:
+; FINITE-LABEL:      oge_y:
 ; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @oge_y(double %x) nounwind {
@@ -636,12 +643,12 @@ define double @oge_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ole_y:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      ole_y:
+; CHECK-LABEL:      ole_y:
+; CHECK:      cmplesd %xmm
+; UNSAFE-LABEL:      ole_y:
 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ole_y:
+; FINITE-LABEL:      ole_y:
 ; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @ole_y(double %x) nounwind {
@@ -650,12 +657,12 @@ define double @ole_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      oge_inverse_y:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      oge_inverse_y:
+; CHECK-LABEL:      oge_inverse_y:
+; CHECK:      cmplesd %xmm0
+; UNSAFE-LABEL:      oge_inverse_y:
 ; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      oge_inverse_y:
+; FINITE-LABEL:      oge_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -666,12 +673,12 @@ define double @oge_inverse_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ole_inverse_y:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      ole_inverse_y:
+; CHECK-LABEL:      ole_inverse_y:
+; CHECK:      cmplesd %xmm
+; UNSAFE-LABEL:      ole_inverse_y:
 ; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ole_inverse_y:
+; FINITE-LABEL:      ole_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -682,12 +689,12 @@ define double @ole_inverse_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ugt_y:
-; CHECK:      ucomisd %xmm0, %xmm1
-; UNSAFE:      ugt_y:
+; CHECK-LABEL:      ugt_y:
+; CHECK:      cmpnlesd %xmm
+; UNSAFE-LABEL:      ugt_y:
 ; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ugt_y:
+; FINITE-LABEL:      ugt_y:
 ; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @ugt_y(double %x) nounwind {
@@ -696,12 +703,12 @@ define double @ugt_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ult_y:
-; CHECK:      ucomisd %xmm1, %xmm0
-; UNSAFE:      ult_y:
+; CHECK-LABEL:      ult_y:
+; CHECK:      cmpnlesd %xmm0
+; UNSAFE-LABEL:      ult_y:
 ; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ult_y:
+; FINITE-LABEL:      ult_y:
 ; FINITE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @ult_y(double %x) nounwind {
@@ -710,12 +717,12 @@ define double @ult_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ugt_inverse_y:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      ugt_inverse_y:
+; CHECK-LABEL:      ugt_inverse_y:
+; CHECK:      cmpnlesd %xmm
+; UNSAFE-LABEL:      ugt_inverse_y:
 ; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ugt_inverse_y:
+; FINITE-LABEL:      ugt_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -726,12 +733,12 @@ define double @ugt_inverse_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ult_inverse_y:
-; CHECK:      ucomisd %xmm
-; UNSAFE:      ult_inverse_y:
+; CHECK-LABEL:      ult_inverse_y:
+; CHECK:      cmpnlesd %xmm
+; UNSAFE-LABEL:      ult_inverse_y:
 ; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ult_inverse_y:
+; FINITE-LABEL:      ult_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
@@ -742,15 +749,15 @@ define double @ult_inverse_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      uge_y:
+; CHECK-LABEL:      uge_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      uge_y:
+; UNSAFE-LABEL:      uge_y:
 ; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      uge_y:
+; FINITE-LABEL:      uge_y:
 ; FINITE-NEXT: maxsd  {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @uge_y(double %x) nounwind {
@@ -759,15 +766,15 @@ define double @uge_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ule_y:
+; CHECK-LABEL:      ule_y:
 ; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ule_y:
+; UNSAFE-LABEL:      ule_y:
 ; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ule_y:
+; FINITE-LABEL:      ule_y:
 ; FINITE-NEXT: minsd  {{[^,]*}}, %xmm0
 ; FINITE-NEXT: ret
 define double @ule_y(double %x) nounwind {
@@ -776,13 +783,13 @@ define double @ule_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      uge_inverse_y:
+; CHECK-LABEL:      uge_inverse_y:
 ; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      uge_inverse_y:
+; UNSAFE-LABEL:      uge_inverse_y:
 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      uge_inverse_y:
+; FINITE-LABEL:      uge_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -793,13 +800,13 @@ define double @uge_inverse_y(double %x) nounwind {
   ret double %d
 }
 
-; CHECK:      ule_inverse_y:
+; CHECK-LABEL:      ule_inverse_y:
 ; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
-; UNSAFE:      ule_inverse_y:
+; UNSAFE-LABEL:      ule_inverse_y:
 ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
-; FINITE:      ule_inverse_y:
+; FINITE-LABEL:      ule_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
@@ -811,11 +818,11 @@ define double @ule_inverse_y(double %x) nounwind {
 }
 ; Test a few more misc. cases.
 
-; CHECK: clampTo3k_a:
+; CHECK-LABEL: clampTo3k_a:
 ; CHECK: minsd
-; UNSAFE: clampTo3k_a:
+; UNSAFE-LABEL: clampTo3k_a:
 ; UNSAFE: minsd
-; FINITE: clampTo3k_a:
+; FINITE-LABEL: clampTo3k_a:
 ; FINITE: minsd
 define double @clampTo3k_a(double %x) nounwind readnone {
 entry:
@@ -824,11 +831,11 @@ entry:
   ret double %x_addr.0
 }
 
-; CHECK: clampTo3k_b:
+; CHECK-LABEL: clampTo3k_b:
 ; CHECK: minsd
-; UNSAFE: clampTo3k_b:
+; UNSAFE-LABEL: clampTo3k_b:
 ; UNSAFE: minsd
-; FINITE: clampTo3k_b:
+; FINITE-LABEL: clampTo3k_b:
 ; FINITE: minsd
 define double @clampTo3k_b(double %x) nounwind readnone {
 entry:
@@ -837,11 +844,11 @@ entry:
   ret double %x_addr.0
 }
 
-; CHECK: clampTo3k_c:
+; CHECK-LABEL: clampTo3k_c:
 ; CHECK: maxsd
-; UNSAFE: clampTo3k_c:
+; UNSAFE-LABEL: clampTo3k_c:
 ; UNSAFE: maxsd
-; FINITE: clampTo3k_c:
+; FINITE-LABEL: clampTo3k_c:
 ; FINITE: maxsd
 define double @clampTo3k_c(double %x) nounwind readnone {
 entry:
@@ -850,11 +857,11 @@ entry:
   ret double %x_addr.0
 }
 
-; CHECK: clampTo3k_d:
+; CHECK-LABEL: clampTo3k_d:
 ; CHECK: maxsd
-; UNSAFE: clampTo3k_d:
+; UNSAFE-LABEL: clampTo3k_d:
 ; UNSAFE: maxsd
-; FINITE: clampTo3k_d:
+; FINITE-LABEL: clampTo3k_d:
 ; FINITE: maxsd
 define double @clampTo3k_d(double %x) nounwind readnone {
 entry:
@@ -863,11 +870,11 @@ entry:
   ret double %x_addr.0
 }
 
-; CHECK: clampTo3k_e:
+; CHECK-LABEL: clampTo3k_e:
 ; CHECK: maxsd
-; UNSAFE: clampTo3k_e:
+; UNSAFE-LABEL: clampTo3k_e:
 ; UNSAFE: maxsd
-; FINITE: clampTo3k_e:
+; FINITE-LABEL: clampTo3k_e:
 ; FINITE: maxsd
 define double @clampTo3k_e(double %x) nounwind readnone {
 entry:
@@ -876,11 +883,11 @@ entry:
   ret double %x_addr.0
 }
 
-; CHECK: clampTo3k_f:
+; CHECK-LABEL: clampTo3k_f:
 ; CHECK: maxsd
-; UNSAFE: clampTo3k_f:
+; UNSAFE-LABEL: clampTo3k_f:
 ; UNSAFE: maxsd
-; FINITE: clampTo3k_f:
+; FINITE-LABEL: clampTo3k_f:
 ; FINITE: maxsd
 define double @clampTo3k_f(double %x) nounwind readnone {
 entry:
@@ -889,11 +896,11 @@ entry:
   ret double %x_addr.0
 }
 
-; CHECK: clampTo3k_g:
+; CHECK-LABEL: clampTo3k_g:
 ; CHECK: minsd
-; UNSAFE: clampTo3k_g:
+; UNSAFE-LABEL: clampTo3k_g:
 ; UNSAFE: minsd
-; FINITE: clampTo3k_g:
+; FINITE-LABEL: clampTo3k_g:
 ; FINITE: minsd
 define double @clampTo3k_g(double %x) nounwind readnone {
 entry:
@@ -902,11 +909,11 @@ entry:
   ret double %x_addr.0
 }
 
-; CHECK: clampTo3k_h:
+; CHECK-LABEL: clampTo3k_h:
 ; CHECK: minsd
-; UNSAFE: clampTo3k_h:
+; UNSAFE-LABEL: clampTo3k_h:
 ; UNSAFE: minsd
-; FINITE: clampTo3k_h:
+; FINITE-LABEL: clampTo3k_h:
 ; FINITE: minsd
 define double @clampTo3k_h(double %x) nounwind readnone {
 entry:
@@ -915,33 +922,33 @@ entry:
   ret double %x_addr.0
 }
 
-; UNSAFE: maxpd:
+; UNSAFE-LABEL: test_maxpd:
 ; UNSAFE: maxpd
-define <2 x double> @maxpd(<2 x double> %x, <2 x double> %y) {
+define <2 x double> @test_maxpd(<2 x double> %x, <2 x double> %y) {
   %max_is_x = fcmp oge <2 x double> %x, %y
   %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
   ret <2 x double> %max
 }
 
-; UNSAFE: minpd:
+; UNSAFE-LABEL: test_minpd:
 ; UNSAFE: minpd
-define <2 x double> @minpd(<2 x double> %x, <2 x double> %y) {
+define <2 x double> @test_minpd(<2 x double> %x, <2 x double> %y) {
   %min_is_x = fcmp ole <2 x double> %x, %y
   %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
   ret <2 x double> %min
 }
 
-; UNSAFE: maxps:
+; UNSAFE-LABEL: test_maxps:
 ; UNSAFE: maxps
-define <4 x float> @maxps(<4 x float> %x, <4 x float> %y) {
+define <4 x float> @test_maxps(<4 x float> %x, <4 x float> %y) {
   %max_is_x = fcmp oge <4 x float> %x, %y
   %max = select <4 x i1> %max_is_x, <4 x float> %x, <4 x float> %y
   ret <4 x float> %max
 }
 
-; UNSAFE: minps:
+; UNSAFE-LABEL: test_minps:
 ; UNSAFE: minps
-define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
+define <4 x float> @test_minps(<4 x float> %x, <4 x float> %y) {
   %min_is_x = fcmp ole <4 x float> %x, %y
   %min = select <4 x i1> %min_is_x, <4 x float> %x, <4 x float> %y
   ret <4 x float> %min
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index 9b2e05b5bedd..47c6429b1814 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -33,7 +33,7 @@ entry:
   %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
   %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
   ret <2 x float> %tmp9
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK-NOT: shufps	$16
 ; CHECK: shufps	$1, 
 ; CHECK-NOT: shufps	$16
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 30a0fbe7d6de..1ac983254eaf 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
 
 ; CHECK: vsel_float
 ; CHECK: pandn
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
new file mode 100644
index 000000000000..ff6c10bfe5a8
--- /dev/null
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -0,0 +1,712 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s
+
+define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: addsd
+  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: cmpordpd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: cmpordsd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: sbbl    %eax, %eax
+  ; CHECK: andl    $1, %eax
+  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: comisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+  ; CHECK: cvtdq2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
+  ; CHECK: cvtdq2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
+  ; CHECK: cvtpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
+  ; CHECK: cvtpd2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
+  ; CHECK: cvtps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+  ; CHECK: cvtps2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
+  ; CHECK: cvtsd2si
+  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
+  ; CHECK: cvtsd2ss 
+  ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} 
+  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
+  ; CHECK: movl
+  ; CHECK: cvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
+  ; CHECK: cvtss2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
+  ; CHECK: cvttpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+  ; CHECK: cvttps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
+  ; CHECK: cvttsd2si
+  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: divsd
+  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+
+define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: maxpd
+  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: maxsd
+  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: minpd
+  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: minsd
+  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
+  ; CHECK: movmskpd
+  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+
+
+
+
+define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
+  ; CHECK: mulsd
+  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: packssdw
+  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: packsswb
+  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: packuswb
+  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: paddsb
+  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: paddsw
+  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: paddusb
+  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: paddusw
+  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pavgb
+  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pavgw
+  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmaddwd
+  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmaxsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pmaxub
+  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pminsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pminub
+  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+  ; CHECK: pmovmskb
+  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmulhw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmulhuw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmuludq
+  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psadbw
+  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pslld
+  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+  ; CHECK: pslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+  ; CHECK: pslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: psllq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psllw
+  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
+  ; CHECK: pslld
+  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
+  ; CHECK: psllq
+  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
+  ; CHECK: psllw
+  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: psrad
+  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psraw
+  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
+  ; CHECK: psrad
+  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
+  ; CHECK: psraw
+  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: psrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+  ; CHECK: psrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+  ; CHECK: psrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: psrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
+  ; CHECK: psrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
+  ; CHECK: psrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
+  ; CHECK: psrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psubsb
+  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psubsw
+  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psubusb
+  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psubusw
+  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
+  ; CHECK: sqrtpd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
+  ; CHECK: sqrtsd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
+  ; CHECK: movl
+  ; CHECK: movq
+  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
+
+
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
+  ; CHECK: movl
+  ; CHECK: movdqu
+  ; add operation forces the execution domain.
+  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
+  ; CHECK: movl
+  ; CHECK: movupd
+  ; fadd operation forces the execution domain.
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
+  ; CHECK: subsd
+  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: ucomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/sse2-mul.ll b/test/CodeGen/X86/sse2-mul.ll
index 0466d60ec301..e066368dc73e 100644
--- a/test/CodeGen/X86/sse2-mul.ll
+++ b/test/CodeGen/X86/sse2-mul.ll
@@ -3,7 +3,7 @@
 define <4 x i32> @test1(<4 x i32> %x, <4 x i32> %y) {
   %m = mul <4 x i32> %x, %y
   ret <4 x i32> %m
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: pshufd $49
 ; CHECK: pmuludq
 ; CHECK: pshufd $49
diff --git a/test/CodeGen/X86/sse2-vector-shifts.ll b/test/CodeGen/X86/sse2-vector-shifts.ll
new file mode 100644
index 000000000000..462def980a91
--- /dev/null
+++ b/test/CodeGen/X86/sse2-vector-shifts.ll
@@ -0,0 +1,247 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 -mcpu=corei7 | FileCheck %s
+
+; SSE2 Logical Shift Left
+
+define <8 x i16> @test_sllw_1(<8 x i16> %InVec) {
+entry:
+  %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_sllw_1:
+; CHECK: psllw   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
+entry:
+  %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_sllw_2:
+; CHECK: paddw   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sllw_3(<8 x i16> %InVec) {
+entry:
+  %shl = shl <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_sllw_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_slld_1(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_slld_1:
+; CHECK: pslld   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_slld_2:
+; CHECK: paddd   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_slld_3(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_slld_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_sllq_1(<2 x i64> %InVec) {
+entry:
+  %shl = shl <2 x i64> %InVec, <i64 0, i64 0>
+  ret <2 x i64> %shl
+}
+
+; CHECK-LABEL: test_sllq_1:
+; CHECK: psllq   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
+entry:
+  %shl = shl <2 x i64> %InVec, <i64 1, i64 1>
+  ret <2 x i64> %shl
+}
+
+; CHECK-LABEL: test_sllq_2:
+; CHECK: paddq   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_sllq_3(<2 x i64> %InVec) {
+entry:
+  %shl = shl <2 x i64> %InVec, <i64 64, i64 64>
+  ret <2 x i64> %shl
+}
+
+; CHECK-LABEL: test_sllq_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+; SSE2 Arithmetic Shift
+
+define <8 x i16> @test_sraw_1(<8 x i16> %InVec) {
+entry:
+  %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_sraw_1:
+; CHECK: psraw   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
+entry:
+  %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_sraw_2:
+; CHECK: psraw   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_sraw_3(<8 x i16> %InVec) {
+entry:
+  %shl = ashr <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_sraw_3:
+; CHECK: psraw   $15, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
+entry:
+  %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_srad_1:
+; CHECK: psrad   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
+entry:
+  %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_srad_2:
+; CHECK: psrad   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srad_3(<4 x i32> %InVec) {
+entry:
+  %shl = ashr <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_srad_3:
+; CHECK: psrad   $31, %xmm0
+; CHECK-NEXT: ret
+
+; SSE Logical Shift Right
+
+define <8 x i16> @test_srlw_1(<8 x i16> %InVec) {
+entry:
+  %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_srlw_1:
+; CHECK: psrlw   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
+entry:
+  %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_srlw_2:
+; CHECK: psrlw   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_srlw_3(<8 x i16> %InVec) {
+entry:
+  %shl = lshr <8 x i16> %InVec, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+  ret <8 x i16> %shl
+}
+
+; CHECK-LABEL: test_srlw_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srld_1(<4 x i32> %InVec) {
+entry:
+  %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_srld_1:
+; CHECK: psrld   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
+entry:
+  %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_srld_2:
+; CHECK: psrld   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_srld_3(<4 x i32> %InVec) {
+entry:
+  %shl = lshr <4 x i32> %InVec, <i32 32, i32 32, i32 32, i32 32>
+  ret <4 x i32> %shl
+}
+
+; CHECK-LABEL: test_srld_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_srlq_1(<2 x i64> %InVec) {
+entry:
+  %shl = lshr <2 x i64> %InVec, <i64 0, i64 0>
+  ret <2 x i64> %shl
+}
+
+; CHECK-LABEL: test_srlq_1:
+; CHECK: psrlq   $0, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
+entry:
+  %shl = lshr <2 x i64> %InVec, <i64 1, i64 1>
+  ret <2 x i64> %shl
+}
+
+; CHECK-LABEL: test_srlq_2:
+; CHECK: psrlq   $1, %xmm0
+; CHECK-NEXT: ret
+
+define <2 x i64> @test_srlq_3(<2 x i64> %InVec) {
+entry:
+  %shl = lshr <2 x i64> %InVec, <i64 64, i64 64>
+  ret <2 x i64> %shl
+}
+
+; CHECK-LABEL: test_srlq_3:
+; CHECK: xorps   %xmm0, %xmm0
+; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 36a0fd91bd87..9147c22dd375 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -7,8 +7,8 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-        
-; CHECK: test1:
+
+; CHECK-LABEL: test1:
 ; CHECK: 	movl	8(%esp), %eax
 ; CHECK-NEXT: 	movapd	(%eax), %xmm0
 ; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
@@ -23,12 +23,12 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-        
-; CHECK: test2:
-; CHECK: 	movl	8(%esp), %eax
-; CHECK-NEXT: 	movapd	(%eax), %xmm0
+
+; CHECK-LABEL: test2:
+; CHECK: 	movl	4(%esp), %eax
+; CHECK: 	movl	8(%esp), %ecx
+; CHECK-NEXT: 	movapd	(%ecx), %xmm0
 ; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movl	4(%esp), %eax
 ; CHECK-NEXT: 	movapd	%xmm0, (%eax)
 ; CHECK-NEXT: 	ret
 }
@@ -48,7 +48,7 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind
 	store <4 x float> %tmp13, <4 x float>* %res
 	ret void
 ; CHECK: @test3
-; CHECK: 	unpcklps	
+; CHECK: 	unpcklps
 }
 
 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
@@ -60,7 +60,7 @@ define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
 }
 
 define <4 x i32> @test5(i8** %ptr) nounwind {
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: pxor
 ; CHECK: punpcklbw
 ; CHECK: punpcklwd
@@ -85,9 +85,9 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp2, <4 x float>* %res
         ret void
-        
-; CHECK: test6:
-; CHECK: 	movaps	(%eax), %xmm0
+
+; CHECK-LABEL: test6:
+; CHECK: 	movaps	(%ecx), %xmm0
 ; CHECK:	movaps	%xmm0, (%eax)
 }
 
@@ -96,8 +96,8 @@ define void @test7() nounwind {
         shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
         store <4 x float> %2, <4 x float>* null
         ret void
-        
-; CHECK: test7:
+
+; CHECK-LABEL: test7:
 ; CHECK:	xorps	%xmm0, %xmm0
 ; CHECK:	movaps	%xmm0, 0
 }
@@ -115,7 +115,7 @@ define <2 x i64> @test8() nounwind {
 	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
 	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	ret <2 x i64> %tmp16
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: movups	(%eax), %xmm0
 }
 
@@ -125,7 +125,7 @@ define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) no
 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
 	ret <4 x float> %tmp13
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: movups	8(%esp), %xmm0
 }
 
@@ -135,7 +135,7 @@ define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
 	ret <4 x float> %tmp13
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: movaps	4(%esp), %xmm0
 }
 
@@ -143,7 +143,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
 	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp7
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK: movaps	4(%esp), %xmm0
 }
 
@@ -154,7 +154,7 @@ define void @test12() nounwind {
         %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp4, <4 x float>* null
         ret void
-; CHECK: test12:
+; CHECK-LABEL: test12:
 ; CHECK: movhlps
 ; CHECK: shufps
 }
@@ -166,7 +166,7 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl
         store <4 x float> %tmp11, <4 x float>* %res
         ret void
 ; CHECK: test13
-; CHECK: shufps	$69, (%eax), %xmm0
+; CHECK: shufps	$69, (%ecx), %xmm0
 ; CHECK: pshufd	$-40, %xmm0, %xmm0
 }
 
@@ -177,9 +177,9 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
-; CHECK: test14:
-; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
-; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
+; CHECK-LABEL: test14:
+; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
 ; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
@@ -189,12 +189,12 @@ entry:
         %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
         %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp4
-; CHECK: test15:
+; CHECK-LABEL: test15:
 ; CHECK: 	movhlps	%xmm1, %xmm0
 }
 
 ; PR8900
-; CHECK: test16:
+; CHECK-LABEL: test16:
 ; CHECK: unpcklpd
 ; CHECK: ret
 
@@ -221,4 +221,3 @@ entry:
  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
  ret <4 x float> %double2float.i
 }
-
diff --git a/test/CodeGen/X86/sse3-intrinsics-x86.ll b/test/CodeGen/X86/sse3-intrinsics-x86.ll
new file mode 100644
index 000000000000..dbd14b805fb5
--- /dev/null
+++ b/test/CodeGen/X86/sse3-intrinsics-x86.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse3 | FileCheck %s
+
+define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: addsubpd
+  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: addsubps
+  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: haddpd
+  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: haddps
+  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: hsubpd
+  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: hsubps
+  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: lddqu
+  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 48638b3b696c..6d5b19243e45 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -14,8 +14,8 @@ entry:
                 <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
 	store <8 x i16> %tmp6, <8 x i16>* %dest
 	ret void
-        
-; X64: t0:
+
+; X64-LABEL: t0:
 ; X64:	movdqa	(%rsi), %xmm0
 ; X64:	pslldq	$2, %xmm0
 ; X64:	movdqa	%xmm0, (%rdi)
@@ -27,8 +27,8 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
 	ret <8 x i16> %tmp3
-        
-; X64: t1:
+
+; X64-LABEL: t1:
 ; X64: 	movdqa	(%rdi), %xmm0
 ; X64: 	pinsrw	$0, (%rsi), %xmm0
 ; X64: 	ret
@@ -37,7 +37,7 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
 	ret <8 x i16> %tmp
-; X64: t2:
+; X64-LABEL: t2:
 ; X64:	pextrw	$1, %xmm1, %eax
 ; X64:	pinsrw	$0, %eax, %xmm0
 ; X64:	pinsrw	$3, %eax, %xmm0
@@ -47,7 +47,7 @@ define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
 	ret <8 x i16> %tmp
-; X64: t3:
+; X64-LABEL: t3:
 ; X64: 	pextrw	$5, %xmm0, %eax
 ; X64: 	pshuflw	$44, %xmm0, %xmm0
 ; X64: 	pshufhw	$27, %xmm0, %xmm0
@@ -58,12 +58,12 @@ define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
 	ret <8 x i16> %tmp
-; X64: t4:
+; X64-LABEL: t4:
 ; X64: 	pextrw	$7, [[XMM0:%xmm[0-9]+]], %eax
 ; X64: 	pshufhw	$100, [[XMM0]], [[XMM1:%xmm[0-9]+]]
 ; X64: 	pinsrw	$1, %eax, [[XMM1]]
 ; X64: 	pextrw	$1, [[XMM0]], %eax
-; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm{{[0-9]}}
 ; X64: 	ret
 }
 
@@ -127,13 +127,13 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
 	%tmp.upgrd.4 = load double* %tmp.upgrd.3
 	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
-	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1	
-	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>	
-	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0	
-	%tmp7 = extractelement <4 x float> %tmp, i32 1		
-	%tmp8 = extractelement <4 x float> %tmp6, i32 0		
-	%tmp9 = extractelement <4 x float> %tmp6, i32 1		
-	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0	
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1
+	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
+	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0
+	%tmp7 = extractelement <4 x float> %tmp, i32 1
+	%tmp8 = extractelement <4 x float> %tmp6, i32 0
+	%tmp9 = extractelement <4 x float> %tmp6, i32 1
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0
 	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
 	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
 	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
@@ -155,21 +155,21 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 @g2 = external constant <4 x i16>
 
 define internal void @t10() nounwind {
-        load <4 x i32>* @g1, align 16 
+        load <4 x i32>* @g1, align 16
         bitcast <4 x i32> %1 to <8 x i16>
         shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
-        bitcast <8 x i16> %3 to <2 x i64>  
-        extractelement <2 x i64> %4, i32 0 
-        bitcast i64 %5 to <4 x i16>        
+        bitcast <8 x i16> %3 to <2 x i64>
+        extractelement <2 x i64> %4, i32 0
+        bitcast i64 %5 to <4 x i16>
         store <4 x i16> %6, <4 x i16>* @g2, align 8
         ret void
 ; X64: 	t10:
-; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %ecx
-; X64: 		pextrw	$6, [[X0]], %eax
+; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %e{{..}}
+; X64: 		pextrw	$6, [[X0]], %e{{..}}
 ; X64: 		movlhps [[X0]], [[X0]]
 ; X64: 		pshuflw	$8, [[X0]], [[X0]]
-; X64: 		pinsrw	$2, %ecx, [[X0]]
-; X64: 		pinsrw	$3, %eax, [[X0]]
+; X64: 		pinsrw	$2, %e{{..}}, [[X0]]
+; X64: 		pinsrw	$3, %e{{..}}, [[X0]]
 }
 
 
@@ -179,7 +179,7 @@ entry:
 	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
 	ret <8 x i16> %tmp7
 
-; X64: t11:
+; X64-LABEL: t11:
 ; X64:	movd	%xmm1, %eax
 ; X64:	movlhps	%xmm0, %xmm0
 ; X64:	pshuflw	$1, %xmm0, %xmm0
@@ -193,7 +193,7 @@ entry:
 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
 	ret <8 x i16> %tmp9
 
-; X64: t12:
+; X64-LABEL: t12:
 ; X64: 	pextrw	$3, %xmm1, %eax
 ; X64: 	movlhps	%xmm0, %xmm0
 ; X64: 	pshufhw	$3, %xmm0, %xmm0
@@ -206,7 +206,7 @@ define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
 	ret <8 x i16> %tmp9
-; X64: t13:
+; X64-LABEL: t13:
 ; X64: 	punpcklqdq	%xmm0, %xmm1
 ; X64: 	pextrw	$3, %xmm1, %eax
 ; X64: 	pshufd	$52, %xmm1, %xmm0
@@ -219,7 +219,7 @@ define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
 	ret <8 x i16> %tmp9
-; X64: t14:
+; X64-LABEL: t14:
 ; X64: 	punpcklqdq	%xmm0, %xmm1
 ; X64: 	pshufhw	$8, %xmm1, %xmm0
 ; X64: 	ret
@@ -259,7 +259,7 @@ entry:
 ; rdar://8520311
 define <4 x i32> @t17() nounwind {
 entry:
-; X64: t17:
+; X64-LABEL: t17:
 ; X64:          movddup (%rax), %xmm0
   %tmp1 = load <4 x float>* undef, align 16
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
index a2a0debf9e95..a32f5de30a6c 100644
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
-;CHECK: vsel_float
+;CHECK-LABEL: vsel_float:
 ;CHECK: blendvps
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -9,7 +9,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 }
 
 
-;CHECK: vsel_4xi8
+;CHECK-LABEL: vsel_4xi8:
 ;CHECK: blendvps
 ;CHECK: ret
 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
@@ -17,7 +17,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
   ret <4 x i8> %vsel
 }
 
-;CHECK: vsel_4xi16
+;CHECK-LABEL: vsel_4xi16:
 ;CHECK: blendvps
 ;CHECK: ret
 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
@@ -26,7 +26,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
 }
 
 
-;CHECK: vsel_i32
+;CHECK-LABEL: vsel_i32:
 ;CHECK: blendvps
 ;CHECK: ret
 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
@@ -35,7 +35,7 @@ define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
 }
 
 
-;CHECK: vsel_double
+;CHECK-LABEL: vsel_double:
 ;CHECK: blendvpd
 ;CHECK: ret
 define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {
@@ -44,7 +44,7 @@ define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {
 }
 
 
-;CHECK: vsel_i64
+;CHECK-LABEL: vsel_i64:
 ;CHECK: blendvpd
 ;CHECK: ret
 define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {
@@ -53,7 +53,7 @@ define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {
 }
 
 
-;CHECK: vsel_i8
+;CHECK-LABEL: vsel_i8:
 ;CHECK: pblendvb
 ;CHECK: ret
 define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll
new file mode 100644
index 000000000000..37eff43b28c4
--- /dev/null
+++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -0,0 +1,326 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: blendpd
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: blendps
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: blendvpd
+  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: blendvps
+  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: dppd
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: dpps
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: insertps
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: mpsadbw
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: packusdw
+  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: pblendvb
+  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pblendw
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
+  ; CHECK: phminposuw
+  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pmaxsb
+  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmaxsd
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmaxud
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmaxuw
+  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pminsb
+  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pminsd
+  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pminud
+  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pminuw
+  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: pmovsxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: pmovsxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: pmovsxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: pmovsxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: pmovsxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: pmovsxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: pmovzxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: pmovzxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: pmovzxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: pmovzxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: pmovzxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: pmovzxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: pmuldq
+  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: ptest 
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: ptest 
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: ptest 
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
+  ; CHECK: roundpd
+  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
+  ; CHECK: roundps
+  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: roundsd
+  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: roundss
+  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index c6f9f0c873af..c15e24ccc96b 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1,25 +1,25 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64
 
 @g16 = external global i16
 
 define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
         %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
         ret <4 x i32> %tmp1
-; X32: pinsrd_1:
+; X32-LABEL: pinsrd_1:
 ; X32:    pinsrd $1, 4(%esp), %xmm0
 
-; X64: pinsrd_1:
+; X64-LABEL: pinsrd_1:
 ; X64:    pinsrd $1, %edi, %xmm0
 }
 
 define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
         %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
         ret <16 x i8> %tmp1
-; X32: pinsrb_1:
+; X32-LABEL: pinsrb_1:
 ; X32:    pinsrb $1, 4(%esp), %xmm0
 
-; X64: pinsrb_1:
+; X64-LABEL: pinsrb_1:
 ; X64:    pinsrb $1, %edi, %xmm0
 }
 
@@ -237,12 +237,12 @@ entry:
   %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
   %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
   ret <2 x float> %tmp9
-; X32: buildvector:
+; X32-LABEL: buildvector:
 ; X32-NOT: insertps $0
 ; X32: insertps $16
 ; X32-NOT: insertps $0
 ; X32: ret
-; X64: buildvector:
+; X64-LABEL: buildvector:
 ; X64-NOT: insertps $0
 ; X64: insertps $16
 ; X64-NOT: insertps $0
diff --git a/test/CodeGen/X86/sse42-intrinsics-x86.ll b/test/CodeGen/X86/sse42-intrinsics-x86.ll
new file mode 100644
index 000000000000..5ca800982827
--- /dev/null
+++ b/test/CodeGen/X86/sse42-intrinsics-x86.ll
@@ -0,0 +1,182 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.2 | FileCheck %s
+
+define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl $7
+  ; CHECK: movl $7
+  ; CHECK: pcmpestri $7
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
+  ; CHECK: movl $7
+  ; CHECK: movl $7
+  ; CHECK: pcmpestri $7, (
+  ; CHECK: movl
+  %1 = load <16 x i8>* %a0
+  %2 = load <16 x i8>* %a2
+  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+
+
+define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: seta
+  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: seto
+  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: sets
+  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestri
+  ; CHECK: sete
+  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: pcmpestrm
+  ; CHECK-NOT: vmov
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
+  ; CHECK: movl $7
+  ; CHECK: movl $7
+  ; CHECK: pcmpestrm $7,
+  ; CHECK-NOT: vmov
+  %1 = load <16 x i8>* %a2
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+
+
+define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri $7
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
+  ; CHECK: pcmpistri $7, (
+  ; CHECK: movl
+  %1 = load <16 x i8>* %a0
+  %2 = load <16 x i8>* %a1
+  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+
+
+define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: seta
+  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: seto
+  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: sets
+  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistri
+  ; CHECK: sete
+  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pcmpistrm $7
+  ; CHECK-NOT: vmov
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
+  ; CHECK: pcmpistrm $7, (
+  ; CHECK-NOT: vmov
+  %1 = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
index c7875238ec86..db51d9973688 100644
--- a/test/CodeGen/X86/sse42.ll
+++ b/test/CodeGen/X86/sse42.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.2 | FileCheck %s -check-prefix=X64
 
 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
diff --git a/test/CodeGen/X86/sse42_64.ll b/test/CodeGen/X86/sse42_64.ll
index 8b3a69bcaaf8..b39e76c78eb7 100644
--- a/test/CodeGen/X86/sse42_64.ll
+++ b/test/CodeGen/X86/sse42_64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.2 | FileCheck %s -check-prefix=X64
 
 declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
 declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
diff --git a/test/CodeGen/X86/sse4a.ll b/test/CodeGen/X86/sse4a.ll
index 076e21336492..165d47639d7a 100644
--- a/test/CodeGen/X86/sse4a.ll
+++ b/test/CodeGen/X86/sse4a.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4a | FileCheck %s
 
 define void @test1(i8* %p, <4 x float> %a) nounwind optsize ssp {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: movntss
   tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) nounwind
   ret void
@@ -10,7 +10,7 @@ define void @test1(i8* %p, <4 x float> %a) nounwind optsize ssp {
 declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>)
 
 define void @test2(i8* %p, <2 x double> %a) nounwind optsize ssp {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movntsd
   tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) nounwind
   ret void
@@ -19,7 +19,7 @@ define void @test2(i8* %p, <2 x double> %a) nounwind optsize ssp {
 declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>)
 
 define <2 x i64> @test3(<2 x i64> %x) nounwind uwtable ssp {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: extrq
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
   ret <2 x i64> %1
@@ -28,7 +28,7 @@ define <2 x i64> @test3(<2 x i64> %x) nounwind uwtable ssp {
 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
 
 define <2 x i64> @test4(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: extrq
   %1 = bitcast <2 x i64> %y to <16 x i8>
   %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
@@ -38,7 +38,7 @@ define <2 x i64> @test4(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
 
 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: insertq
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
   ret <2 x i64> %1
@@ -47,7 +47,7 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
 
 define <2 x i64> @test6(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: insertq
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
   ret <2 x i64> %1
diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll
index 655f75800cff..2c16a554aebb 100644
--- a/test/CodeGen/X86/sse_partial_update.ll
+++ b/test/CodeGen/X86/sse_partial_update.ll
@@ -8,7 +8,7 @@
 ; destination of rsqrtss are the same.
 define void @t1(<4 x float> %a) nounwind uwtable ssp {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: rsqrtss %xmm0, %xmm0
   %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind
   %a.addr.0.extract = extractelement <4 x float> %0, i32 0
@@ -23,7 +23,7 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
 
 define void @t2(<4 x float> %a) nounwind uwtable ssp {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: rcpss %xmm0, %xmm0
   %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind
   %a.addr.0.extract = extractelement <4 x float> %0, i32 0
diff --git a/test/CodeGen/X86/ssse3-intrinsics-x86.ll b/test/CodeGen/X86/ssse3-intrinsics-x86.ll
new file mode 100644
index 000000000000..728cbc9b60dd
--- /dev/null
+++ b/test/CodeGen/X86/ssse3-intrinsics-x86.ll
@@ -0,0 +1,120 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+ssse3 | FileCheck %s
+
+define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
+  ; CHECK: pabsb
+  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
+  ; CHECK: pabsd
+  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
+  ; CHECK: pabsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: phaddd
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phaddsw
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phaddw
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: phsubd
+  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phsubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: phsubw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pmaddubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pmulhrsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: pshufb
+  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: psignb
+  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: psignd
+  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: psignw
+  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll
index 74945e5bb1bd..87bb85fad83e 100644
--- a/test/CodeGen/X86/stack-align-memcpy.ll
+++ b/test/CodeGen/X86/stack-align-memcpy.ll
@@ -9,7 +9,7 @@ define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind {
   call void @bar(i8* %dynalloc, %struct.foo* align 4 byval %x)
   ret void
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: andl $-16, %esp
 ; CHECK: movl %esp, %esi
 ; CHECK-NOT: rep;movsl
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 0ddb2378ef2f..eafb7c29fa0a 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -realign-stack=1 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -relocation-model=static -mcpu=yonah | FileCheck %s
 
 ; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
 ; fold the load into the andpd.
@@ -45,7 +45,7 @@ entry:
   %0 = ptrtoint [2048 x i8]* %buffer to i32
   %and = and i32 %0, -16
   ret i32 %and
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK-NOT: and
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
new file mode 100644
index 000000000000..bd27ac347690
--- /dev/null
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -0,0 +1,97 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o -
+
+; PR16954
+;
+; Make sure that when we splice off the end of a machine basic block, we include
+; DBG_VALUE MI in the terminator sequence.
+
+@a = external global { i64, [56 x i8] }, align 32
+
+; Function Attrs: nounwind sspreq
+define i32 @_Z18read_response_sizev() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23), !dbg !39
+  %0 = load i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
+  tail call void @llvm.dbg.value(metadata !63, i64 0, metadata !64), !dbg !71
+  %1 = trunc i64 %0 to i32
+  ret i32 %1
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+attributes #0 = { sspreq }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !72}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !5, metadata !8, metadata !20, metadata !5, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/matt/ryan_bug/<unknown>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<unknown>", metadata !"/Users/matt/ryan_bug"}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786436, metadata !1, metadata !4, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 19, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 19, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{i32 0}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786472, metadata !"max_frame_size", i64 0} ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"read_response_size", metadata !"read_response_size", metadata !"_Z18read_response_sizev", i32 27, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_Z18read_response_sizev, null, null, metadata !14, i32 27} ; [ DW_TAG_subprogram ] [line 27] [def] [read_response_size]
+!10 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/matt/ryan_bug/<unknown>]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = metadata !{metadata !15, metadata !19}
+!15 = metadata !{i32 786688, metadata !9, metadata !"b", metadata !10, i32 28, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 28]
+!16 = metadata !{i32 786451, metadata !1, null, metadata !"B", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !17, i32 0, null, null} ; [ DW_TAG_structure_type ] [B] [line 16, size 32, align 32, offset 0] [def] [from ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"end_of_file", i32 17, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ] [end_of_file] [line 17, size 32, align 32, offset 0] [from int]
+!19 = metadata !{i32 786688, metadata !9, metadata !"c", metadata !10, i32 29, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [c] [line 29]
+!20 = metadata !{}
+!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!22 = metadata !{i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0)}
+!23 = metadata !{i32 786689, metadata !24, metadata !"p2", metadata !10, i32 33554444, metadata !32, i32 0, metadata !38} ; [ DW_TAG_arg_variable ] [p2] [line 12]
+!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"min<unsigned long long>", metadata !"min<unsigned long long>", metadata !"_ZN3__13minIyEERKT_S3_RS1_", i32 12, metadata !27, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, metadata !33, null, metadata !35, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [min<unsigned long long>]
+!25 = metadata !{i32 786489, metadata !26, null, metadata !"__1", i32 1} ; [ DW_TAG_namespace ] [__1] [line 1]
+!26 = metadata !{metadata !"main.cpp", metadata !"/Users/matt/ryan_bug"}
+!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{metadata !29, metadata !29, metadata !32}
+!29 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !30} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!31 = metadata !{i32 786468, null, null, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!32 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!33 = metadata !{metadata !34}
+!34 = metadata !{i32 786479, null, metadata !"_Tp", metadata !31, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!35 = metadata !{metadata !36, metadata !37}
+!36 = metadata !{i32 786689, metadata !24, metadata !"p1", metadata !10, i32 16777228, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 12]
+!37 = metadata !{i32 786689, metadata !24, metadata !"p2", metadata !10, i32 33554444, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p2] [line 12]
+!38 = metadata !{i32 33, i32 0, metadata !9, null}
+!39 = metadata !{i32 12, i32 0, metadata !24, metadata !38}
+!40 = metadata !{i32 9, i32 0, metadata !41, metadata !59}
+!41 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"min<unsigned long long, __1::A>", metadata !"min<unsigned long long, __1::A>", metadata !"_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", i32 7, metadata !42, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, metadata !53, null, metadata !55, i32 8} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 8] [min<unsigned long long, __1::A>]
+!42 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!43 = metadata !{metadata !29, metadata !29, metadata !32, metadata !44}
+!44 = metadata !{i32 786451, metadata !1, metadata !25, metadata !"A", i32 0, i64 8, i64 8, i32 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 0, size 8, align 8, offset 0] [def] [from ]
+!45 = metadata !{metadata !46}
+!46 = metadata !{i32 786478, metadata !1, metadata !44, metadata !"operator()", metadata !"operator()", metadata !"_ZN3__11AclERKiS2_", i32 1, metadata !47, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !52, i32 1} ; [ DW_TAG_subprogram ] [line 1] [operator()]
+!47 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !48, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!48 = metadata !{metadata !13, metadata !49, metadata !50, metadata !50}
+!49 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!50 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!51 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!52 = metadata !{i32 786468}
+!53 = metadata !{metadata !34, metadata !54}
+!54 = metadata !{i32 786479, null, metadata !"_Compare", metadata !44, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!55 = metadata !{metadata !56, metadata !57, metadata !58}
+!56 = metadata !{i32 786689, metadata !41, metadata !"p1", metadata !10, i32 16777223, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 7]
+!57 = metadata !{i32 786689, metadata !41, metadata !"p2", metadata !10, i32 33554439, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p2] [line 7]
+!58 = metadata !{i32 786689, metadata !41, metadata !"p3", metadata !10, i32 50331656, metadata !44, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p3] [line 8]
+!59 = metadata !{i32 13, i32 0, metadata !24, metadata !38}
+!63 = metadata !{i32 undef}
+!64 = metadata !{i32 786689, metadata !65, metadata !"p1", metadata !10, i32 33554433, metadata !50, i32 0, metadata !40} ; [ DW_TAG_arg_variable ] [p1] [line 1]
+!65 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"operator()", metadata !"operator()", metadata !"_ZN3__11AclERKiS2_", i32 1, metadata !47, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !46, metadata !66, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [operator()]
+!66 = metadata !{metadata !67, metadata !69, metadata !70}
+!67 = metadata !{i32 786689, metadata !65, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!68 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!69 = metadata !{i32 786689, metadata !65, metadata !"p1", metadata !10, i32 33554433, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 1]
+!70 = metadata !{i32 786689, metadata !65, metadata !"", metadata !10, i32 50331650, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 2]
+!71 = metadata !{i32 1, i32 0, metadata !65, metadata !40}
+!72 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll b/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
new file mode 100644
index 000000000000..7d499f9abd74
--- /dev/null
+++ b/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple i386-unknown-freebsd10.0 -march=x86 --relocation-model=pic %s -o -
+
+; PR16979
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd10.0"
+
+@state = internal unnamed_addr global i32 0, align 4
+
+; Function Attrs: nounwind sspreq
+define void @set_state(i32 %s) #0 {
+entry:
+  store i32 %s, i32* @state, align 4
+  ret void
+}
+
+; Function Attrs: nounwind sspreq
+define void @zero_char(i8* nocapture %p) #0 {
+entry:
+  store i8 0, i8* %p, align 1
+  tail call void @g(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) #2
+  ret void
+}
+
+declare void @g(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind sspreq
+define void @do_something(i32 %i) #0 {
+entry:
+  %data = alloca [8 x i8], align 1
+  %0 = load i32* @state, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  tail call fastcc void @send_int(i32 0)
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call fastcc void @send_int(i32 %i)
+  %arrayidx = getelementptr inbounds [8 x i8]* %data, i32 0, i32 0
+  call void @zero_char(i8* %arrayidx)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; Function Attrs: nounwind sspreq
+define internal fastcc void @send_int(i32 %p) #0 {
+entry:
+  tail call void @f(i32 %p) #2
+  tail call void @g(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) #2
+  ret void
+}
+
+declare void @f(i32) #1
+
+attributes #0 = { nounwind sspreq "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index 1e9ca1d2c24d..265ec80682bf 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-X64 %s
 ; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-KERNEL-X64 %s
 ; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | FileCheck --check-prefix=DARWIN-X64 %s
+; RUN: llc -mtriple=amd64-pc-openbsd < %s -o - | FileCheck --check-prefix=OPENBSD-AMD64 %s
 
 %struct.foo = type { [16 x i8] }
 %struct.foo.0 = type { [4 x i8] }
@@ -23,19 +24,19 @@
 ; Requires no protector.
 define void @test1a(i8* %a) nounwind uwtable {
 entry:
-; LINUX-I386: test1a:
+; LINUX-I386-LABEL: test1a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test1a:
+; LINUX-X64-LABEL: test1a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test1a:
+; LINUX-KERNEL-X64-LABEL: test1a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test1a:
+; DARWIN-X64-LABEL: test1a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -54,21 +55,25 @@ entry:
 ; Requires protector.
 define void @test1b(i8* %a) nounwind uwtable ssp {
 entry:
-; LINUX-I386: test1b:
+; LINUX-I386-LABEL: test1b:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test1b:
+; LINUX-X64-LABEL: test1b:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test1b:
+; LINUX-KERNEL-X64-LABEL: test1b:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test1b:
+; DARWIN-X64-LABEL: test1b:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
+
+; OPENBSD-AMD64-LABEL: test1b:
+; OPENBSD-AMD64: movq __guard_local(%rip)
+; OPENBSD-AMD64: callq __stack_smash_handler
   %a.addr = alloca i8*, align 8
   %buf = alloca [16 x i8], align 16
   store i8* %a, i8** %a.addr, align 8
@@ -85,19 +90,19 @@ entry:
 ; Requires protector.
 define void @test1c(i8* %a) nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test1c:
+; LINUX-I386-LABEL: test1c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test1c:
+; LINUX-X64-LABEL: test1c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test1c:
+; LINUX-KERNEL-X64-LABEL: test1c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test1c:
+; DARWIN-X64-LABEL: test1c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -116,19 +121,19 @@ entry:
 ; Requires protector.
 define void @test1d(i8* %a) nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test1d:
+; LINUX-I386-LABEL: test1d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test1d:
+; LINUX-X64-LABEL: test1d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test1d:
+; LINUX-KERNEL-X64-LABEL: test1d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test1d:
+; DARWIN-X64-LABEL: test1d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -147,19 +152,19 @@ entry:
 ; Requires no protector.
 define void @test2a(i8* %a) nounwind uwtable {
 entry:
-; LINUX-I386: test2a:
+; LINUX-I386-LABEL: test2a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test2a:
+; LINUX-X64-LABEL: test2a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test2a:
+; LINUX-KERNEL-X64-LABEL: test2a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test2a:
+; DARWIN-X64-LABEL: test2a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -180,19 +185,19 @@ entry:
 ; Requires protector.
 define void @test2b(i8* %a) nounwind uwtable ssp {
 entry:
-; LINUX-I386: test2b:
+; LINUX-I386-LABEL: test2b:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test2b:
+; LINUX-X64-LABEL: test2b:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test2b:
+; LINUX-KERNEL-X64-LABEL: test2b:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test2b:
+; DARWIN-X64-LABEL: test2b:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -213,19 +218,19 @@ entry:
 ; Requires protector.
 define void @test2c(i8* %a) nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test2c:
+; LINUX-I386-LABEL: test2c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test2c:
+; LINUX-X64-LABEL: test2c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test2c:
+; LINUX-KERNEL-X64-LABEL: test2c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test2c:
+; DARWIN-X64-LABEL: test2c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -246,19 +251,19 @@ entry:
 ; Requires protector.
 define void @test2d(i8* %a) nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test2d:
+; LINUX-I386-LABEL: test2d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test2d:
+; LINUX-X64-LABEL: test2d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test2d:
+; LINUX-KERNEL-X64-LABEL: test2d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test2d:
+; DARWIN-X64-LABEL: test2d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -279,19 +284,19 @@ entry:
 ; Requires no protector.
 define void @test3a(i8* %a) nounwind uwtable {
 entry:
-; LINUX-I386: test3a:
+; LINUX-I386-LABEL: test3a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test3a:
+; LINUX-X64-LABEL: test3a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test3a:
+; LINUX-KERNEL-X64-LABEL: test3a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test3a:
+; DARWIN-X64-LABEL: test3a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -310,19 +315,19 @@ entry:
 ; Requires no protector.
 define void @test3b(i8* %a) nounwind uwtable ssp {
 entry:
-; LINUX-I386: test3b:
+; LINUX-I386-LABEL: test3b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test3b:
+; LINUX-X64-LABEL: test3b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test3b:
+; LINUX-KERNEL-X64-LABEL: test3b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test3b:
+; DARWIN-X64-LABEL: test3b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -341,19 +346,19 @@ entry:
 ; Requires protector.
 define void @test3c(i8* %a) nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test3c:
+; LINUX-I386-LABEL: test3c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test3c:
+; LINUX-X64-LABEL: test3c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test3c:
+; LINUX-KERNEL-X64-LABEL: test3c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test3c:
+; DARWIN-X64-LABEL: test3c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -372,19 +377,19 @@ entry:
 ; Requires protector.
 define void @test3d(i8* %a) nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test3d:
+; LINUX-I386-LABEL: test3d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test3d:
+; LINUX-X64-LABEL: test3d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test3d:
+; LINUX-KERNEL-X64-LABEL: test3d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test3d:
+; DARWIN-X64-LABEL: test3d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -403,19 +408,19 @@ entry:
 ; Requires no protector.
 define void @test4a(i8* %a) nounwind uwtable {
 entry:
-; LINUX-I386: test4a:
+; LINUX-I386-LABEL: test4a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test4a:
+; LINUX-X64-LABEL: test4a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test4a:
+; LINUX-KERNEL-X64-LABEL: test4a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test4a:
+; DARWIN-X64-LABEL: test4a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -436,19 +441,19 @@ entry:
 ; Requires no protector.
 define void @test4b(i8* %a) nounwind uwtable ssp {
 entry:
-; LINUX-I386: test4b:
+; LINUX-I386-LABEL: test4b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test4b:
+; LINUX-X64-LABEL: test4b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test4b:
+; LINUX-KERNEL-X64-LABEL: test4b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test4b:
+; DARWIN-X64-LABEL: test4b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -469,19 +474,19 @@ entry:
 ; Requires protector.
 define void @test4c(i8* %a) nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test4c:
+; LINUX-I386-LABEL: test4c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test4c:
+; LINUX-X64-LABEL: test4c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test4c:
+; LINUX-KERNEL-X64-LABEL: test4c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test4c:
+; DARWIN-X64-LABEL: test4c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -502,19 +507,19 @@ entry:
 ; Requires protector.
 define void @test4d(i8* %a) nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test4d:
+; LINUX-I386-LABEL: test4d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test4d:
+; LINUX-X64-LABEL: test4d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test4d:
+; LINUX-KERNEL-X64-LABEL: test4d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test4d:
+; DARWIN-X64-LABEL: test4d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -535,19 +540,19 @@ entry:
 ; Requires no protector.
 define void @test5a(i8* %a) nounwind uwtable {
 entry:
-; LINUX-I386: test5a:
+; LINUX-I386-LABEL: test5a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test5a:
+; LINUX-X64-LABEL: test5a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test5a:
+; LINUX-KERNEL-X64-LABEL: test5a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test5a:
+; DARWIN-X64-LABEL: test5a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -562,19 +567,19 @@ entry:
 ; Requires no protector.
 define void @test5b(i8* %a) nounwind uwtable ssp {
 entry:
-; LINUX-I386: test5b:
+; LINUX-I386-LABEL: test5b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test5b:
+; LINUX-X64-LABEL: test5b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test5b:
+; LINUX-KERNEL-X64-LABEL: test5b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test5b:
+; DARWIN-X64-LABEL: test5b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -589,19 +594,19 @@ entry:
 ; Requires no protector.
 define void @test5c(i8* %a) nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test5c:
+; LINUX-I386-LABEL: test5c:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test5c:
+; LINUX-X64-LABEL: test5c:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test5c:
+; LINUX-KERNEL-X64-LABEL: test5c:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test5c:
+; DARWIN-X64-LABEL: test5c:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
@@ -616,19 +621,19 @@ entry:
 ; Requires protector.
 define void @test5d(i8* %a) nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test5d:
+; LINUX-I386-LABEL: test5d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test5d:
+; LINUX-X64-LABEL: test5d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test5d:
+; LINUX-KERNEL-X64-LABEL: test5d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test5d:
+; DARWIN-X64-LABEL: test5d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
@@ -643,19 +648,19 @@ entry:
 ; Requires no protector.
 define void @test6a() nounwind uwtable {
 entry:
-; LINUX-I386: test6a:
+; LINUX-I386-LABEL: test6a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test6a:
+; LINUX-X64-LABEL: test6a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test6a:
+; LINUX-KERNEL-X64-LABEL: test6a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test6a:
+; DARWIN-X64-LABEL: test6a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %retval = alloca i32, align 4
@@ -674,19 +679,19 @@ entry:
 ; Requires no protector.
 define void @test6b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test6b:
+; LINUX-I386-LABEL: test6b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test6b:
+; LINUX-X64-LABEL: test6b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test6b:
+; LINUX-KERNEL-X64-LABEL: test6b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test6b:
+; DARWIN-X64-LABEL: test6b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %retval = alloca i32, align 4
@@ -705,19 +710,19 @@ entry:
 ; Requires protector.
 define void @test6c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test6c:
+; LINUX-I386-LABEL: test6c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test6c:
+; LINUX-X64-LABEL: test6c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test6c:
+; LINUX-KERNEL-X64-LABEL: test6c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test6c:
+; DARWIN-X64-LABEL: test6c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %retval = alloca i32, align 4
@@ -736,19 +741,19 @@ entry:
 ; Requires protector.
 define void @test6d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test6d:
+; LINUX-I386-LABEL: test6d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test6d:
+; LINUX-X64-LABEL: test6d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test6d:
+; LINUX-KERNEL-X64-LABEL: test6d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test6d:
+; DARWIN-X64-LABEL: test6d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %retval = alloca i32, align 4
@@ -767,19 +772,19 @@ entry:
 ; Requires no protector.
 define void @test7a() nounwind uwtable readnone {
 entry:
-; LINUX-I386: test7a:
+; LINUX-I386-LABEL: test7a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test7a:
+; LINUX-X64-LABEL: test7a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test7a:
+; LINUX-KERNEL-X64-LABEL: test7a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test7a:
+; DARWIN-X64-LABEL: test7a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -793,19 +798,19 @@ entry:
 ; Requires no protector.
 define void @test7b() nounwind uwtable readnone ssp {
 entry:
-; LINUX-I386: test7b:
+; LINUX-I386-LABEL: test7b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test7b:
+; LINUX-X64-LABEL: test7b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test7b:
+; LINUX-KERNEL-X64-LABEL: test7b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test7b:
+; DARWIN-X64-LABEL: test7b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -819,19 +824,19 @@ entry:
 ; Requires protector.
 define void @test7c() nounwind uwtable readnone sspstrong {
 entry:
-; LINUX-I386: test7c:
+; LINUX-I386-LABEL: test7c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test7c:
+; LINUX-X64-LABEL: test7c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test7c:
+; LINUX-KERNEL-X64-LABEL: test7c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test7c:
+; DARWIN-X64-LABEL: test7c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -845,19 +850,19 @@ entry:
 ; Requires protector.
 define void @test7d() nounwind uwtable readnone sspreq {
 entry:
-; LINUX-I386: test7d:
+; LINUX-I386-LABEL: test7d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test7d:
+; LINUX-X64-LABEL: test7d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test7d:
+; LINUX-KERNEL-X64-LABEL: test7d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test7d:
+; DARWIN-X64-LABEL: test7d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -871,19 +876,19 @@ entry:
 ; Requires no protector.
 define void @test8a() nounwind uwtable {
 entry:
-; LINUX-I386: test8a:
+; LINUX-I386-LABEL: test8a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test8a:
+; LINUX-X64-LABEL: test8a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test8a:
+; LINUX-KERNEL-X64-LABEL: test8a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test8a:
+; DARWIN-X64-LABEL: test8a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %b = alloca i32, align 4
@@ -896,19 +901,19 @@ entry:
 ; Requires no protector.
 define void @test8b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test8b:
+; LINUX-I386-LABEL: test8b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test8b:
+; LINUX-X64-LABEL: test8b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test8b:
+; LINUX-KERNEL-X64-LABEL: test8b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test8b:
+; DARWIN-X64-LABEL: test8b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %b = alloca i32, align 4
@@ -921,19 +926,19 @@ entry:
 ; Requires protector.
 define void @test8c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test8c:
+; LINUX-I386-LABEL: test8c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test8c:
+; LINUX-X64-LABEL: test8c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test8c:
+; LINUX-KERNEL-X64-LABEL: test8c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test8c:
+; DARWIN-X64-LABEL: test8c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %b = alloca i32, align 4
@@ -946,19 +951,19 @@ entry:
 ; Requires protector.
 define void @test8d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test8d:
+; LINUX-I386-LABEL: test8d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test8d:
+; LINUX-X64-LABEL: test8d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test8d:
+; LINUX-KERNEL-X64-LABEL: test8d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test8d:
+; DARWIN-X64-LABEL: test8d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %b = alloca i32, align 4
@@ -971,19 +976,19 @@ entry:
 ; Requires no protector.
 define void @test9a() nounwind uwtable {
 entry:
-; LINUX-I386: test9a:
+; LINUX-I386-LABEL: test9a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test9a:
+; LINUX-X64-LABEL: test9a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test9a:
+; LINUX-KERNEL-X64-LABEL: test9a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test9a:
+; DARWIN-X64-LABEL: test9a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
@@ -1000,19 +1005,19 @@ entry:
 ; Requires no protector.
 define void @test9b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test9b:
+; LINUX-I386-LABEL: test9b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test9b:
+; LINUX-X64-LABEL: test9b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test9b:
+; LINUX-KERNEL-X64-LABEL: test9b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test9b:
+; DARWIN-X64-LABEL: test9b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
@@ -1029,19 +1034,19 @@ entry:
 ; Requires protector.
 define void @test9c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test9c:
+; LINUX-I386-LABEL: test9c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test9c:
+; LINUX-X64-LABEL: test9c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test9c:
+; LINUX-KERNEL-X64-LABEL: test9c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test9c:
+; DARWIN-X64-LABEL: test9c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
@@ -1058,19 +1063,19 @@ entry:
 ; Requires protector.
 define void @test9d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test9d:
+; LINUX-I386-LABEL: test9d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test9d:
+; LINUX-X64-LABEL: test9d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test9d:
+; LINUX-KERNEL-X64-LABEL: test9d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test9d:
+; DARWIN-X64-LABEL: test9d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
@@ -1087,19 +1092,19 @@ entry:
 ; Requires no protector.
 define void @test10a() nounwind uwtable {
 entry:
-; LINUX-I386: test10a:
+; LINUX-I386-LABEL: test10a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test10a:
+; LINUX-X64-LABEL: test10a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test10a:
+; LINUX-KERNEL-X64-LABEL: test10a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test10a:
+; DARWIN-X64-LABEL: test10a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
@@ -1131,19 +1136,19 @@ if.end4:                                          ; preds = %if.else, %if.then3,
 ; Requires no protector.
 define void @test10b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test10b:
+; LINUX-I386-LABEL: test10b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test10b:
+; LINUX-X64-LABEL: test10b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test10b:
+; LINUX-KERNEL-X64-LABEL: test10b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test10b:
+; DARWIN-X64-LABEL: test10b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
@@ -1175,19 +1180,19 @@ if.end4:                                          ; preds = %if.else, %if.then3,
 ; Requires protector.
 define void @test10c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test10c:
+; LINUX-I386-LABEL: test10c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test10c:
+; LINUX-X64-LABEL: test10c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test10c:
+; LINUX-KERNEL-X64-LABEL: test10c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test10c:
+; DARWIN-X64-LABEL: test10c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
@@ -1219,19 +1224,19 @@ if.end4:                                          ; preds = %if.else, %if.then3,
 ; Requires protector.
 define void @test10d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test10d:
+; LINUX-I386-LABEL: test10d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test10d:
+; LINUX-X64-LABEL: test10d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test10d:
+; LINUX-KERNEL-X64-LABEL: test10d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test10d:
+; DARWIN-X64-LABEL: test10d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
@@ -1263,19 +1268,19 @@ if.end4:                                          ; preds = %if.else, %if.then3,
 ; Requires no protector.
 define void @test11a() nounwind uwtable {
 entry:
-; LINUX-I386: test11a:
+; LINUX-I386-LABEL: test11a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test11a:
+; LINUX-X64-LABEL: test11a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test11a:
+; LINUX-KERNEL-X64-LABEL: test11a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test11a:
+; DARWIN-X64-LABEL: test11a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -1292,19 +1297,19 @@ entry:
 ; Requires no protector.
 define void @test11b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test11b:
+; LINUX-I386-LABEL: test11b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test11b:
+; LINUX-X64-LABEL: test11b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test11b:
+; LINUX-KERNEL-X64-LABEL: test11b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test11b:
+; DARWIN-X64-LABEL: test11b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -1321,19 +1326,19 @@ entry:
 ; Requires protector.
 define void @test11c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test11c:
+; LINUX-I386-LABEL: test11c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test11c:
+; LINUX-X64-LABEL: test11c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test11c:
+; LINUX-KERNEL-X64-LABEL: test11c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test11c:
+; DARWIN-X64-LABEL: test11c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
@@ -1350,19 +1355,19 @@ entry:
 ; Requires protector.
 define void @test11d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test11d:
+; LINUX-I386-LABEL: test11d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test11d:
+; LINUX-X64-LABEL: test11d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test11d:
+; LINUX-KERNEL-X64-LABEL: test11d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test11d:
+; DARWIN-X64-LABEL: test11d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
@@ -1379,19 +1384,19 @@ entry:
 ; Requires no protector.
 define void @test12a() nounwind uwtable {
 entry:
-; LINUX-I386: test12a:
+; LINUX-I386-LABEL: test12a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test12a:
+; LINUX-X64-LABEL: test12a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test12a:
+; LINUX-KERNEL-X64-LABEL: test12a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test12a:
+; DARWIN-X64-LABEL: test12a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -1407,19 +1412,19 @@ entry:
 ; Requires no protector.
 define void @test12b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test12b:
+; LINUX-I386-LABEL: test12b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test12b:
+; LINUX-X64-LABEL: test12b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test12b:
+; LINUX-KERNEL-X64-LABEL: test12b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test12b:
+; DARWIN-X64-LABEL: test12b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -1435,19 +1440,19 @@ entry:
 ; Requires protector.
 define void @test12c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test12c:
+; LINUX-I386-LABEL: test12c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test12c:
+; LINUX-X64-LABEL: test12c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test12c:
+; LINUX-KERNEL-X64-LABEL: test12c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test12c:
+; DARWIN-X64-LABEL: test12c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
@@ -1463,19 +1468,19 @@ entry:
 ; Requires protector.
 define void @test12d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test12d:
+; LINUX-I386-LABEL: test12d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test12d:
+; LINUX-X64-LABEL: test12d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test12d:
+; LINUX-KERNEL-X64-LABEL: test12d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test12d:
+; DARWIN-X64-LABEL: test12d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
@@ -1491,19 +1496,19 @@ entry:
 ; Requires no protector.
 define void @test13a() nounwind uwtable {
 entry:
-; LINUX-I386: test13a:
+; LINUX-I386-LABEL: test13a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test13a:
+; LINUX-X64-LABEL: test13a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test13a:
+; LINUX-KERNEL-X64-LABEL: test13a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test13a:
+; DARWIN-X64-LABEL: test13a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -1517,19 +1522,19 @@ entry:
 ; Requires no protector.
 define void @test13b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test13b:
+; LINUX-I386-LABEL: test13b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test13b:
+; LINUX-X64-LABEL: test13b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test13b:
+; LINUX-KERNEL-X64-LABEL: test13b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test13b:
+; DARWIN-X64-LABEL: test13b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -1543,19 +1548,19 @@ entry:
 ; Requires protector.
 define void @test13c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test13c:
+; LINUX-I386-LABEL: test13c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test13c:
+; LINUX-X64-LABEL: test13c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test13c:
+; LINUX-KERNEL-X64-LABEL: test13c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test13c:
+; DARWIN-X64-LABEL: test13c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
@@ -1569,19 +1574,19 @@ entry:
 ; Requires protector.
 define void @test13d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test13d:
+; LINUX-I386-LABEL: test13d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test13d:
+; LINUX-X64-LABEL: test13d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test13d:
+; LINUX-KERNEL-X64-LABEL: test13d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test13d:
+; DARWIN-X64-LABEL: test13d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
@@ -1595,19 +1600,19 @@ entry:
 ; Requires no protector.
 define void @test14a() nounwind uwtable {
 entry:
-; LINUX-I386: test14a:
+; LINUX-I386-LABEL: test14a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test14a:
+; LINUX-X64-LABEL: test14a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test14a:
+; LINUX-KERNEL-X64-LABEL: test14a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test14a:
+; DARWIN-X64-LABEL: test14a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -1621,19 +1626,19 @@ entry:
 ; Requires no protector.
 define void @test14b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test14b:
+; LINUX-I386-LABEL: test14b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test14b:
+; LINUX-X64-LABEL: test14b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test14b:
+; LINUX-KERNEL-X64-LABEL: test14b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test14b:
+; DARWIN-X64-LABEL: test14b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -1647,19 +1652,19 @@ entry:
 ; Requires protector.
 define void @test14c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test14c:
+; LINUX-I386-LABEL: test14c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test14c:
+; LINUX-X64-LABEL: test14c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test14c:
+; LINUX-KERNEL-X64-LABEL: test14c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test14c:
+; DARWIN-X64-LABEL: test14c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -1673,19 +1678,19 @@ entry:
 ; Requires protector.
 define void @test14d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test14d:
+; LINUX-I386-LABEL: test14d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test14d:
+; LINUX-X64-LABEL: test14d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test14d:
+; LINUX-KERNEL-X64-LABEL: test14d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test14d:
+; DARWIN-X64-LABEL: test14d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -1700,19 +1705,19 @@ entry:
 ; Requires no protector.
 define void @test15a() nounwind uwtable {
 entry:
-; LINUX-I386: test15a:
+; LINUX-I386-LABEL: test15a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test15a:
+; LINUX-X64-LABEL: test15a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test15a:
+; LINUX-KERNEL-X64-LABEL: test15a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test15a:
+; DARWIN-X64-LABEL: test15a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -1731,19 +1736,19 @@ entry:
 ; Requires no protector.
 define void @test15b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test15b:
+; LINUX-I386-LABEL: test15b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test15b:
+; LINUX-X64-LABEL: test15b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test15b:
+; LINUX-KERNEL-X64-LABEL: test15b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test15b:
+; DARWIN-X64-LABEL: test15b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -1762,19 +1767,19 @@ entry:
 ; Requires protector.
 define void @test15c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test15c:
+; LINUX-I386-LABEL: test15c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test15c:
+; LINUX-X64-LABEL: test15c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test15c:
+; LINUX-KERNEL-X64-LABEL: test15c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test15c:
+; DARWIN-X64-LABEL: test15c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -1793,19 +1798,19 @@ entry:
 ; Requires protector.
 define void @test15d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test15d:
+; LINUX-I386-LABEL: test15d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test15d:
+; LINUX-X64-LABEL: test15d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test15d:
+; LINUX-KERNEL-X64-LABEL: test15d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test15d:
+; DARWIN-X64-LABEL: test15d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -1824,19 +1829,19 @@ entry:
 ; Requires no protector.
 define void @test16a() nounwind uwtable {
 entry:
-; LINUX-I386: test16a:
+; LINUX-I386-LABEL: test16a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test16a:
+; LINUX-X64-LABEL: test16a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test16a:
+; LINUX-KERNEL-X64-LABEL: test16a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test16a:
+; DARWIN-X64-LABEL: test16a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -1852,19 +1857,19 @@ entry:
 ; Requires no protector.
 define void @test16b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test16b:
+; LINUX-I386-LABEL: test16b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test16b:
+; LINUX-X64-LABEL: test16b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test16b:
+; LINUX-KERNEL-X64-LABEL: test16b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test16b:
+; DARWIN-X64-LABEL: test16b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -1880,19 +1885,19 @@ entry:
 ; Requires protector.
 define void @test16c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test16c:
+; LINUX-I386-LABEL: test16c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test16c:
+; LINUX-X64-LABEL: test16c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test16c:
+; LINUX-KERNEL-X64-LABEL: test16c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test16c:
+; DARWIN-X64-LABEL: test16c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -1908,19 +1913,19 @@ entry:
 ; Requires protector.
 define void @test16d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test16d:
+; LINUX-I386-LABEL: test16d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test16d:
+; LINUX-X64-LABEL: test16d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test16d:
+; LINUX-KERNEL-X64-LABEL: test16d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test16d:
+; DARWIN-X64-LABEL: test16d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -1935,19 +1940,19 @@ entry:
 ; Requires no protector.
 define void @test17a() nounwind uwtable {
 entry:
-; LINUX-I386: test17a:
+; LINUX-I386-LABEL: test17a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test17a:
+; LINUX-X64-LABEL: test17a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test17a:
+; LINUX-KERNEL-X64-LABEL: test17a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test17a:
+; DARWIN-X64-LABEL: test17a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.vec, align 16
@@ -1962,19 +1967,19 @@ entry:
 ; Requires no protector.
 define void @test17b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test17b:
+; LINUX-I386-LABEL: test17b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test17b:
+; LINUX-X64-LABEL: test17b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test17b:
+; LINUX-KERNEL-X64-LABEL: test17b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test17b:
+; DARWIN-X64-LABEL: test17b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.vec, align 16
@@ -1989,19 +1994,19 @@ entry:
 ; Requires protector.
 define void @test17c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test17c:
+; LINUX-I386-LABEL: test17c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test17c:
+; LINUX-X64-LABEL: test17c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test17c:
+; LINUX-KERNEL-X64-LABEL: test17c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test17c:
+; DARWIN-X64-LABEL: test17c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.vec, align 16
@@ -2016,19 +2021,19 @@ entry:
 ; Requires protector.
 define void @test17d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test17d:
+; LINUX-I386-LABEL: test17d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test17d:
+; LINUX-X64-LABEL: test17d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test17d:
+; LINUX-KERNEL-X64-LABEL: test17d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test17d:
+; DARWIN-X64-LABEL: test17d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.vec, align 16
@@ -2043,19 +2048,19 @@ entry:
 ; Requires no protector.
 define i32 @test18a() uwtable {
 entry:
-; LINUX-I386: test18a:
+; LINUX-I386-LABEL: test18a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test18a:
+; LINUX-X64-LABEL: test18a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test18a:
+; LINUX-KERNEL-X64-LABEL: test18a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test18a:
+; DARWIN-X64-LABEL: test18a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -2079,19 +2084,19 @@ lpad:
 ; Requires no protector.
 define i32 @test18b() uwtable ssp {
 entry:
-; LINUX-I386: test18b:
+; LINUX-I386-LABEL: test18b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test18b:
+; LINUX-X64-LABEL: test18b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test18b:
+; LINUX-KERNEL-X64-LABEL: test18b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test18b:
+; DARWIN-X64-LABEL: test18b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
@@ -2115,19 +2120,19 @@ lpad:
 ; Requires protector.
 define i32 @test18c() uwtable sspstrong {
 entry:
-; LINUX-I386: test18c:
+; LINUX-I386-LABEL: test18c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test18c:
+; LINUX-X64-LABEL: test18c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test18c:
+; LINUX-KERNEL-X64-LABEL: test18c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test18c:
+; DARWIN-X64-LABEL: test18c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -2151,19 +2156,19 @@ lpad:
 ; Requires protector.
 define i32 @test18d() uwtable sspreq {
 entry:
-; LINUX-I386: test18d:
+; LINUX-I386-LABEL: test18d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test18d:
+; LINUX-X64-LABEL: test18d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test18d:
+; LINUX-KERNEL-X64-LABEL: test18d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test18d:
+; DARWIN-X64-LABEL: test18d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
@@ -2188,19 +2193,19 @@ lpad:
 ; Requires no protector.
 define i32 @test19a() uwtable {
 entry:
-; LINUX-I386: test19a:
+; LINUX-I386-LABEL: test19a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test19a:
+; LINUX-X64-LABEL: test19a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test19a:
+; LINUX-KERNEL-X64-LABEL: test19a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test19a:
+; DARWIN-X64-LABEL: test19a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -2227,19 +2232,19 @@ lpad:
 ; Requires no protector.
 define i32 @test19b() uwtable ssp {
 entry:
-; LINUX-I386: test19b:
+; LINUX-I386-LABEL: test19b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test19b:
+; LINUX-X64-LABEL: test19b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test19b:
+; LINUX-KERNEL-X64-LABEL: test19b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test19b:
+; DARWIN-X64-LABEL: test19b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
@@ -2266,19 +2271,19 @@ lpad:
 ; Requires protector.
 define i32 @test19c() uwtable sspstrong {
 entry:
-; LINUX-I386: test19c:
+; LINUX-I386-LABEL: test19c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test19c:
+; LINUX-X64-LABEL: test19c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test19c:
+; LINUX-KERNEL-X64-LABEL: test19c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test19c:
+; DARWIN-X64-LABEL: test19c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
@@ -2305,21 +2310,25 @@ lpad:
 ; Requires protector.
 define i32 @test19d() uwtable sspreq {
 entry:
-; LINUX-I386: test19d:
+; LINUX-I386-LABEL: test19d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
+; LINUX-I386-NOT: calll __stack_chk_fail
 
-; LINUX-X64: test19d:
+; LINUX-X64-LABEL: test19d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
+; LINUX-X64-NOT: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test19d:
+; LINUX-KERNEL-X64-LABEL: test19d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
+; LINUX-KERNEL-X64-NOT: callq ___stack_chk_fail
 
-; DARWIN-X64: test19d:
+; DARWIN-X64-LABEL: test19d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
+; DARWIN-X64-NOT: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
@@ -2343,19 +2352,19 @@ lpad:
 ; Requires no protector.
 define void @test20a() nounwind uwtable {
 entry:
-; LINUX-I386: test20a:
+; LINUX-I386-LABEL: test20a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test20a:
+; LINUX-X64-LABEL: test20a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test20a:
+; LINUX-KERNEL-X64-LABEL: test20a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test20a:
+; DARWIN-X64-LABEL: test20a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32*, align 8
@@ -2373,19 +2382,19 @@ entry:
 ; Requires no protector.
 define void @test20b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test20b:
+; LINUX-I386-LABEL: test20b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test20b:
+; LINUX-X64-LABEL: test20b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test20b:
+; LINUX-KERNEL-X64-LABEL: test20b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test20b:
+; DARWIN-X64-LABEL: test20b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32*, align 8
@@ -2403,19 +2412,19 @@ entry:
 ; Requires protector.
 define void @test20c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test20c:
+; LINUX-I386-LABEL: test20c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test20c:
+; LINUX-X64-LABEL: test20c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test20c:
+; LINUX-KERNEL-X64-LABEL: test20c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test20c:
+; DARWIN-X64-LABEL: test20c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32*, align 8
@@ -2433,19 +2442,19 @@ entry:
 ; Requires protector.
 define void @test20d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test20d:
+; LINUX-I386-LABEL: test20d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test20d:
+; LINUX-X64-LABEL: test20d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test20d:
+; LINUX-KERNEL-X64-LABEL: test20d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test20d:
+; DARWIN-X64-LABEL: test20d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32*, align 8
@@ -2463,19 +2472,19 @@ entry:
 ; Requires no protector.
 define void @test21a() nounwind uwtable {
 entry:
-; LINUX-I386: test21a:
+; LINUX-I386-LABEL: test21a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test21a:
+; LINUX-X64-LABEL: test21a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test21a:
+; LINUX-KERNEL-X64-LABEL: test21a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test21a:
+; DARWIN-X64-LABEL: test21a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32*, align 8
@@ -2494,19 +2503,19 @@ entry:
 ; Requires no protector.
 define void @test21b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test21b:
+; LINUX-I386-LABEL: test21b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test21b:
+; LINUX-X64-LABEL: test21b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test21b:
+; LINUX-KERNEL-X64-LABEL: test21b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test21b:
+; DARWIN-X64-LABEL: test21b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32*, align 8
@@ -2525,19 +2534,19 @@ entry:
 ; Requires protector.
 define void @test21c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test21c:
+; LINUX-I386-LABEL: test21c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test21c:
+; LINUX-X64-LABEL: test21c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test21c:
+; LINUX-KERNEL-X64-LABEL: test21c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test21c:
+; DARWIN-X64-LABEL: test21c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32*, align 8
@@ -2556,19 +2565,19 @@ entry:
 ; Requires protector.
 define void @test21d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test21d:
+; LINUX-I386-LABEL: test21d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test21d:
+; LINUX-X64-LABEL: test21d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test21d:
+; LINUX-KERNEL-X64-LABEL: test21d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test21d:
+; DARWIN-X64-LABEL: test21d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32*, align 8
@@ -2587,19 +2596,19 @@ entry:
 ; Requires no protector.
 define signext i8 @test22a() nounwind uwtable {
 entry:
-; LINUX-I386: test22a:
+; LINUX-I386-LABEL: test22a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test22a:
+; LINUX-X64-LABEL: test22a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test22a:
+; LINUX-KERNEL-X64-LABEL: test22a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test22a:
+; DARWIN-X64-LABEL: test22a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca %class.A, align 1
@@ -2614,19 +2623,19 @@ entry:
 ; Requires no protector.
 define signext i8 @test22b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test22b:
+; LINUX-I386-LABEL: test22b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test22b:
+; LINUX-X64-LABEL: test22b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test22b:
+; LINUX-KERNEL-X64-LABEL: test22b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test22b:
+; DARWIN-X64-LABEL: test22b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca %class.A, align 1
@@ -2641,19 +2650,19 @@ entry:
 ; Requires protector.
 define signext i8 @test22c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test22c:
+; LINUX-I386-LABEL: test22c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test22c:
+; LINUX-X64-LABEL: test22c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test22c:
+; LINUX-KERNEL-X64-LABEL: test22c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test22c:
+; DARWIN-X64-LABEL: test22c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca %class.A, align 1
@@ -2668,19 +2677,19 @@ entry:
 ; Requires protector.
 define signext i8 @test22d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test22d:
+; LINUX-I386-LABEL: test22d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test22d:
+; LINUX-X64-LABEL: test22d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test22d:
+; LINUX-KERNEL-X64-LABEL: test22d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test22d:
+; DARWIN-X64-LABEL: test22d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca %class.A, align 1
@@ -2695,19 +2704,19 @@ entry:
 ; Requires no protector.
 define signext i8 @test23a() nounwind uwtable {
 entry:
-; LINUX-I386: test23a:
+; LINUX-I386-LABEL: test23a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test23a:
+; LINUX-X64-LABEL: test23a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test23a:
+; LINUX-KERNEL-X64-LABEL: test23a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test23a:
+; DARWIN-X64-LABEL: test23a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca %struct.deep, align 1
@@ -2726,19 +2735,19 @@ entry:
 ; Requires no protector.
 define signext i8 @test23b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test23b:
+; LINUX-I386-LABEL: test23b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test23b:
+; LINUX-X64-LABEL: test23b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test23b:
+; LINUX-KERNEL-X64-LABEL: test23b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test23b:
+; DARWIN-X64-LABEL: test23b:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca %struct.deep, align 1
@@ -2757,19 +2766,19 @@ entry:
 ; Requires protector.
 define signext i8 @test23c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test23c:
+; LINUX-I386-LABEL: test23c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test23c:
+; LINUX-X64-LABEL: test23c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test23c:
+; LINUX-KERNEL-X64-LABEL: test23c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test23c:
+; DARWIN-X64-LABEL: test23c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca %struct.deep, align 1
@@ -2788,19 +2797,19 @@ entry:
 ; Requires protector.
 define signext i8 @test23d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test23d:
+; LINUX-I386-LABEL: test23d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test23d:
+; LINUX-X64-LABEL: test23d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test23d:
+; LINUX-KERNEL-X64-LABEL: test23d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test23d:
+; DARWIN-X64-LABEL: test23d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca %struct.deep, align 1
@@ -2819,19 +2828,19 @@ entry:
 ; Requires no protector.
 define void @test24a(i32 %n) nounwind uwtable {
 entry:
-; LINUX-I386: test24a:
+; LINUX-I386-LABEL: test24a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test24a:
+; LINUX-X64-LABEL: test24a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test24a:
+; LINUX-KERNEL-X64-LABEL: test24a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test24a:
+; DARWIN-X64-LABEL: test24a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %n.addr = alloca i32, align 4
@@ -2850,19 +2859,19 @@ entry:
 ; Requires protector.
 define void @test24b(i32 %n) nounwind uwtable ssp {
 entry:
-; LINUX-I386: test24b:
+; LINUX-I386-LABEL: test24b:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test24b:
+; LINUX-X64-LABEL: test24b:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test24b:
+; LINUX-KERNEL-X64-LABEL: test24b:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test24b:
+; DARWIN-X64-LABEL: test24b:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %n.addr = alloca i32, align 4
@@ -2881,19 +2890,19 @@ entry:
 ; Requires protector.
 define void @test24c(i32 %n) nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test24c:
+; LINUX-I386-LABEL: test24c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test24c:
+; LINUX-X64-LABEL: test24c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test24c:
+; LINUX-KERNEL-X64-LABEL: test24c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test24c:
+; DARWIN-X64-LABEL: test24c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %n.addr = alloca i32, align 4
@@ -2912,19 +2921,19 @@ entry:
 ; Requires protector.
 define void @test24d(i32 %n) nounwind uwtable sspreq  {
 entry:
-; LINUX-I386: test24d:
+; LINUX-I386-LABEL: test24d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test24d:
+; LINUX-X64-LABEL: test24d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test24d:
+; LINUX-KERNEL-X64-LABEL: test24d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test24d:
+; DARWIN-X64-LABEL: test24d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %n.addr = alloca i32, align 4
@@ -2943,19 +2952,19 @@ entry:
 ; Requires no protector.
 define i32 @test25a() nounwind uwtable {
 entry:
-; LINUX-I386: test25a:
+; LINUX-I386-LABEL: test25a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test25a:
+; LINUX-X64-LABEL: test25a:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test25a:
+; LINUX-KERNEL-X64-LABEL: test25a:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test25a:
+; DARWIN-X64-LABEL: test25a:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %a = alloca [4 x i32], align 16
@@ -2969,19 +2978,19 @@ entry:
 ; Requires no protector, except for Darwin which _does_ require a protector.
 define i32 @test25b() nounwind uwtable ssp {
 entry:
-; LINUX-I386: test25b:
+; LINUX-I386-LABEL: test25b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test25b:
+; LINUX-X64-LABEL: test25b:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test25b:
+; LINUX-KERNEL-X64-LABEL: test25b:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test25b:
+; DARWIN-X64-LABEL: test25b:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca [4 x i32], align 16
@@ -2995,19 +3004,19 @@ entry:
 ; Requires protector.
 define i32 @test25c() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test25c:
+; LINUX-I386-LABEL: test25c:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test25c:
+; LINUX-X64-LABEL: test25c:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test25c:
+; LINUX-KERNEL-X64-LABEL: test25c:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test25c:
+; DARWIN-X64-LABEL: test25c:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca [4 x i32], align 16
@@ -3021,19 +3030,19 @@ entry:
 ; Requires protector.
 define i32 @test25d() nounwind uwtable sspreq {
 entry:
-; LINUX-I386: test25d:
+; LINUX-I386-LABEL: test25d:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test25d:
+; LINUX-X64-LABEL: test25d:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test25d:
+; LINUX-KERNEL-X64-LABEL: test25d:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test25d:
+; DARWIN-X64-LABEL: test25d:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca [4 x i32], align 16
@@ -3049,19 +3058,19 @@ entry:
 ; Requires no protector.
 define void @test26() nounwind uwtable sspstrong {
 entry:
-; LINUX-I386: test26:
+; LINUX-I386-LABEL: test26:
 ; LINUX-I386-NOT: calll __stack_chk_fail
 ; LINUX-I386: .cfi_endproc
 
-; LINUX-X64: test26:
+; LINUX-X64-LABEL: test26:
 ; LINUX-X64-NOT: callq __stack_chk_fail
 ; LINUX-X64: .cfi_endproc
 
-; LINUX-KERNEL-X64: test26:
+; LINUX-KERNEL-X64-LABEL: test26:
 ; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
 ; LINUX-KERNEL-X64: .cfi_endproc
 
-; DARWIN-X64: test26:
+; DARWIN-X64-LABEL: test26:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.nest, align 4
@@ -3080,19 +3089,19 @@ entry:
 ; Requires protector.
 define i32 @test27(i32 %arg) nounwind uwtable sspstrong {
 bb:
-; LINUX-I386: test27:
+; LINUX-I386-LABEL: test27:
 ; LINUX-I386: mov{{l|q}} %gs:
 ; LINUX-I386: calll __stack_chk_fail
 
-; LINUX-X64: test27:
+; LINUX-X64-LABEL: test27:
 ; LINUX-X64: mov{{l|q}} %fs:
 ; LINUX-X64: callq __stack_chk_fail
 
-; LINUX-KERNEL-X64: test27:
+; LINUX-KERNEL-X64-LABEL: test27:
 ; LINUX-KERNEL-X64: mov{{l|q}} %gs:
 ; LINUX-KERNEL-X64: callq __stack_chk_fail
 
-; DARWIN-X64: test27:
+; DARWIN-X64-LABEL: test27:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %tmp = alloca %struct.small*, align 8
diff --git a/test/CodeGen/X86/stackmap.ll b/test/CodeGen/X86/stackmap.ll
new file mode 100644
index 000000000000..ed9558302848
--- /dev/null
+++ b/test/CodeGen/X86/stackmap.ll
@@ -0,0 +1,292 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s
+;
+; Note: Print verbose stackmaps using -debug-only=stackmaps.
+
+; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; CHECK-NEXT:   .long   0
+; Num LargeConstants
+; CHECK-NEXT:   .long   1
+; CHECK-NEXT:   .quad   4294967296
+; Num Callsites
+; CHECK-NEXT:   .long   11
+
+; Constant arguments
+;
+; CHECK-NEXT:   .long   1
+; CHECK-NEXT:   .long   L{{.*}}-_constantargs
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  4
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65535
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65536
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; LargeConstant at index 0
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+
+define void @constantargs() {
+entry:
+  %0 = inttoptr i64 12345 to i8*
+  tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 1, i32 15, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+  ret void
+}
+
+; Inline OSR Exit
+;
+; CHECK-NEXT:   .long   3
+; CHECK-NEXT:   .long   L{{.*}}-_osrinline
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrinline(i64 %a, i64 %b) {
+entry:
+  ; Runtime void->void call.
+  call void inttoptr (i64 -559038737 to void ()*)()
+  ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
+  call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 3, i32 12, i64 %a, i64 %b)
+  ret void
+}
+
+; Cold OSR Exit
+;
+; 2 live variables in register.
+;
+; CHECK-NEXT:   .long  4
+; CHECK-NEXT:   .long   L{{.*}}-_osrcold
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrcold(i64 %a, i64 %b) {
+entry:
+  %test = icmp slt i64 %a, %b
+  br i1 %test, label %ret, label %cold
+cold:
+  ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
+  %thunk = inttoptr i64 -559038737 to i8*
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b)
+  unreachable
+ret:
+  ret void
+}
+
+; Property Read
+; CHECK-NEXT:  .long  5
+; CHECK-NEXT:   .long   L{{.*}}-_propertyRead
+; CHECK-NEXT:  .short  0
+; CHECK-NEXT:  .short  0
+;
+; FIXME: There are currently no stackmap entries. After moving to
+; AnyRegCC, we will have entries for the object and return value.
+define i64 @propertyRead(i64* %obj) {
+entry:
+  %resolveRead = inttoptr i64 -559038737 to i8*
+  %result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveRead, i32 1, i64* %obj)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Property Write
+; CHECK-NEXT:  .long  6
+; CHECK-NEXT:   .long   L{{.*}}-_propertyWrite
+; CHECK-NEXT:  .short  0
+; CHECK-NEXT:  .short  0
+;
+; FIXME: There are currently no stackmap entries. After moving to
+; AnyRegCC, we will have entries for the object and return value.
+define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
+entry:
+  %resolveWrite = inttoptr i64 -559038737 to i8*
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  ret void
+}
+
+; Void JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK-NEXT:   .long  7
+; CHECK-NEXT:   .long   L{{.*}}-_jsVoidCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  ret void
+}
+
+; i64 JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK:        .long  8
+; CHECK-NEXT:   .long   L{{.*}}-_jsIntCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  %result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:      .long 11
+; CHECK-NEXT: .long L{{.*}}-_spilledValue
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 17
+;
+; Check that at least one is a spilled entry from RBP.
+; Location: Indirect RBP + ...
+; CHECK:      .byte 3
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .short 6
+define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:       .long 12
+; CHECK-LABEL: .long L{{.*}}-_spilledStackMapValue
+; CHECK-NEXT:  .short 0
+; CHECK-NEXT:  .short 17
+;
+; Check that at least one is a spilled entry from RBP.
+; Location: Indirect RBP + ...
+; CHECK:      .byte 3
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .short 6
+define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spill a subregister stackmap operand.
+;
+; CHECK:       .long 13
+; CHECK-LABEL: .long L{{.*}}-_spillSubReg
+; CHECK-NEXT:  .short 0
+; 4 locations
+; CHECK-NEXT:  .short 1
+;
+; Check that the subregister operand is a 4-byte spill.
+; Location: Indirect, 4-byte, RBP + ...
+; CHECK:      .byte 3
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .short 6
+define void @spillSubReg(i64 %arg) #0 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp = load i64* inttoptr (i64 140685446136880 to i64*)
+  br i1 undef, label %bb16, label %bb17
+
+bb16:
+  unreachable
+
+bb17:
+  %tmp32 = trunc i64 %tmp to i32
+  br i1 undef, label %bb60, label %bb61
+
+bb60:
+  tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 13, i32 5, i32 %tmp32)
+  unreachable
+
+bb61:
+  unreachable
+}
+
+; Map a single byte subregister. There is no DWARF register number, so
+; we expect the register to be encoded with the proper size and spill offset. We don't know which
+;
+; CHECK:       .long 14
+; CHECK-LABEL: .long L{{.*}}-_subRegOffset
+; CHECK-NEXT:  .short 0
+; 2 locations
+; CHECK-NEXT:  .short 2
+;
+; Check that the subregister operands are 1-byte spills.
+; Location 0: Register, 4-byte, AL
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .long 0
+;
+; Location 1: Register, 4-byte, BL
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 3
+; CHECK-NEXT: .long 0
+define void @subRegOffset(i16 %arg) {
+  %v = mul i16 %arg, 5
+  %a0 = trunc i16 %v to i8
+  tail call void asm sideeffect "nop", "~{bx}"() nounwind
+  %arghi = lshr i16 %v, 8
+  %a1 = trunc i16 %arghi to i8
+  tail call void asm sideeffect "nop", "~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
+  tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 14, i32 5, i8 %a0, i8 %a1)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i32, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/stdcall-notailcall.ll b/test/CodeGen/X86/stdcall-notailcall.ll
index 8e33c30bf293..8f522cda284a 100644
--- a/test/CodeGen/X86/stdcall-notailcall.ll
+++ b/test/CodeGen/X86/stdcall-notailcall.ll
@@ -2,7 +2,7 @@
 
 %struct.I = type { i32 (...)** }
 define x86_stdcallcc void @bar(%struct.I* nocapture %this) ssp align 2 {
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK-NOT: jmp
 ; CHECK: ret $4
 entry:
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 0dd228eb145f..7557f255658d 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -12,11 +12,11 @@ entry:
   %D = or i32 %C, %B
   store i32 %D, i32* %a0, align 4
   ret void
-  
-; X64: test1:
+
+; X64-LABEL: test1:
 ; X64: movb	%sil, (%rdi)
 
-; X32: test1:
+; X32-LABEL: test1:
 ; X32: movb	8(%esp), %al
 ; X32: movb	%al, (%{{.*}})
 }
@@ -30,12 +30,12 @@ entry:
   %D = or i32 %B, %CS
   store i32 %D, i32* %a0, align 4
   ret void
-; X64: test2:
+; X64-LABEL: test2:
 ; X64: movb	%sil, 1(%rdi)
 
-; X32: test2:
-; X32: movb	8(%esp), %al
-; X32: movb	%al, 1(%{{.*}})
+; X32-LABEL: test2:
+; X32: movb	8(%esp), %[[REG:[abcd]l]]
+; X32: movb	%[[REG]], 1(%{{.*}})
 }
 
 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -46,10 +46,10 @@ entry:
   %D = or i32 %B, %C
   store i32 %D, i32* %a0, align 4
   ret void
-; X64: test3:
+; X64-LABEL: test3:
 ; X64: movw	%si, (%rdi)
 
-; X32: test3:
+; X32-LABEL: test3:
 ; X32: movw	8(%esp), %ax
 ; X32: movw	%ax, (%{{.*}})
 }
@@ -63,12 +63,12 @@ entry:
   %D = or i32 %B, %CS
   store i32 %D, i32* %a0, align 4
   ret void
-; X64: test4:
+; X64-LABEL: test4:
 ; X64: movw	%si, 2(%rdi)
 
-; X32: test4:
-; X32: movl	8(%esp), %eax
-; X32: movw	%ax, 2(%{{.*}})
+; X32-LABEL: test4:
+; X32: movl	8(%esp), %e[[REG:[abcd]x]]
+; X32: movw	%[[REG]], 2(%{{.*}})
 }
 
 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -80,12 +80,12 @@ entry:
   %D = or i64 %B, %CS
   store i64 %D, i64* %a0, align 4
   ret void
-; X64: test5:
+; X64-LABEL: test5:
 ; X64: movw	%si, 2(%rdi)
 
-; X32: test5:
-; X32: movzwl	8(%esp), %eax
-; X32: movw	%ax, 2(%{{.*}})
+; X32-LABEL: test5:
+; X32: movzwl	8(%esp), %e[[REG:[abcd]x]]
+; X32: movw	%[[REG]], 2(%{{.*}})
 }
 
 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
@@ -97,13 +97,13 @@ entry:
   %D = or i64 %B, %CS
   store i64 %D, i64* %a0, align 4
   ret void
-; X64: test6:
+; X64-LABEL: test6:
 ; X64: movb	%sil, 5(%rdi)
 
 
-; X32: test6:
-; X32: movb	8(%esp), %al
-; X32: movb	%al, 5(%{{.*}})
+; X32-LABEL: test6:
+; X32: movb	8(%esp), %[[REG:[abcd]l]]
+; X32: movb	%[[REG]], 5(%{{.*}})
 }
 
 define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
@@ -116,20 +116,20 @@ entry:
   %D = or i64 %B, %CS
   store i64 %D, i64* %a0, align 4
   ret i32 %OtherLoad
-; X64: test7:
+; X64-LABEL: test7:
 ; X64: movb	%sil, 5(%rdi)
 
 
-; X32: test7:
-; X32: movb	8(%esp), %cl
-; X32: movb	%cl, 5(%{{.*}})
+; X32-LABEL: test7:
+; X32: movb	8(%esp), %[[REG:[abcd]l]]
+; X32: movb	%[[REG]], 5(%{{.*}})
 }
 
 ; PR7833
 
 @g_16 = internal global i32 -1
 
-; X64: test8:
+; X64-LABEL: test8:
 ; X64-NEXT: movl _g_16(%rip), %eax
 ; X64-NEXT: movl $0, _g_16(%rip)
 ; X64-NEXT: orl  $1, %eax
@@ -143,7 +143,7 @@ define void @test8() nounwind {
   ret void
 }
 
-; X64: test9:
+; X64-LABEL: test9:
 ; X64-NEXT: orb $1, _g_16(%rip)
 ; X64-NEXT: ret
 define void @test9() nounwind {
@@ -154,7 +154,7 @@ define void @test9() nounwind {
 }
 
 ; rdar://8494845 + PR8244
-; X64: test10:
+; X64-LABEL: test10:
 ; X64-NEXT: movsbl	(%rdi), %eax
 ; X64-NEXT: shrl	$8, %eax
 ; X64-NEXT: ret
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index 070cccdb87dd..bbeb7443c07f 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -5,7 +5,7 @@
 @X = internal global i16 0              ; <i16*> [#uses=2]
 
 define void @foo() nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK-NOT: mov
 ; CHECK: add
 ; CHECK-NEXT: ret
@@ -19,7 +19,7 @@ define void @foo() nounwind {
 %struct.S2 = type { i64, i16, [2 x i8], i8, [3 x i8], [7 x i8], i8, [8 x i8] }
 @s2 = external global %struct.S2, align 16
 define void @test2() nounwind uwtable ssp {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: mov
 ; CHECK-NEXT: and
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 6e4fe90053f1..705fdcdc13af 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -17,10 +17,10 @@ cond_true2732.preheader:                ; preds = %entry
         store i64 %tmp2676.us.us, i64* %tmp2666
         ret i32 0
 
-; INTEL: 	and	{{E..}}, DWORD PTR [360]
-; INTEL:	and	DWORD PTR [356], {{E..}}
-; FIXME:	mov	DWORD PTR [360], {{E..}}
-; The above line comes out as 'mov 360, EAX', but when the register is ECX it works?
+; INTEL: 	and	{{e..}}, dword ptr [360]
+; INTEL:	and	dword ptr [356], {{e..}}
+; FIXME:	mov	dword ptr [360], {{e..}}
+; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
 
 ; ATT: 	andl	360, %{{e..}}
 ; ATT:	andl	%{{e..}}, 356
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index db8313cecdce..baaee3541108 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -18,7 +18,7 @@ overflow:
   %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 
-; CHECK: func1:
+; CHECK-LABEL: func1:
 ; CHECK: subl 20(%esp)
 ; CHECK-NEXT: jno
 }
@@ -38,7 +38,7 @@ carry:
   %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
 
-; CHECK: func2:
+; CHECK-LABEL: func2:
 ; CHECK: subl 20(%esp)
 ; CHECK-NEXT: jae
 }
@@ -53,7 +53,7 @@ entry:
   %obit = extractvalue {i32, i1} %t, 1
   ret i1 %obit
 
-; CHECK: func3:
+; CHECK-LABEL: func3:
 ; CHECK: decl
 ; CHECK-NEXT: seto
 }
diff --git a/test/CodeGen/X86/sub.ll b/test/CodeGen/X86/sub.ll
index ee5ea1d0fb6e..3cf79a3deca2 100644
--- a/test/CodeGen/X86/sub.ll
+++ b/test/CodeGen/X86/sub.ll
@@ -4,7 +4,7 @@ define i32 @test1(i32 %x) {
   %xor = xor i32 %x, 31
   %sub = sub i32 32, %xor
   ret i32 %sub
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK:      xorl $-32
 ; CHECK-NEXT: addl $33
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
deleted file mode 100644
index 04d4a7199632..000000000000
--- a/test/CodeGen/X86/subtarget-feature-change.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-; This should not generate SSE instructions:
-;
-; CHECK: without.sse:
-; CHECK: flds
-; CHECK: fmuls
-; CHECK: fstps
-define void @without.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #0 {
-entry:
-  %cmp9 = icmp sgt i32 %n, 0
-  br i1 %cmp9, label %for.body, label %for.end
-
-for.body:
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
-  %mul = fmul float %0, %1
-  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-; This should generate SSE instructions:
-;
-; CHECK: with.sse
-; CHECK: movss
-; CHECK: mulss
-; CHECK: movss
-define void @with.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #1 {
-entry:
-  %cmp9 = icmp sgt i32 %n, 0
-  br i1 %cmp9, label %for.body, label %for.end
-
-for.body:
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
-  %mul = fmul float %0, %1
-  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
-attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index 58a5c0338547..a80002bc97cc 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -53,7 +53,7 @@ declare void @foo(i32)
 ; Don't zero extend the test operands to pointer type if it can be avoided.
 ; rdar://8781238
 define void @test2(i32 %x) nounwind ssp {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: cmpl $6
 ; CHECK: ja
 
@@ -81,7 +81,7 @@ if.end:                                           ; preds = %entry
 declare void @bar()
 
 define void @test3(i32 %x) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: cmpl $5
 ; CHECK: ja
 ; CHECK: cmpl $4
diff --git a/test/CodeGen/X86/switch-order-weight.ll b/test/CodeGen/X86/switch-order-weight.ll
index 0fdd56d4e1d3..207e0b3f707b 100644
--- a/test/CodeGen/X86/switch-order-weight.ll
+++ b/test/CodeGen/X86/switch-order-weight.ll
@@ -10,7 +10,7 @@ entry:
     i32 20, label %if.then5
   ]
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NOT: unr
 ; CHECK: cmpl $10
 ; CHECK: bar
diff --git a/test/CodeGen/X86/tail-call-attrs.ll b/test/CodeGen/X86/tail-call-attrs.ll
new file mode 100644
index 000000000000..17ebe997c8c1
--- /dev/null
+++ b/test/CodeGen/X86/tail-call-attrs.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -o - %s | FileCheck %s
+
+; Simple case: completely identical returns, even with extensions, shouldn't be
+; a barrier to tail calls.
+declare zeroext i1 @give_bool()
+define zeroext i1 @test_bool() {
+; CHECK-LABEL: test_bool:
+; CHECK: jmp
+  %call = tail call zeroext i1 @give_bool()
+  ret i1 %call
+}
+
+; Here, there's more zero extension to be done between the call and the return,
+; so a tail call is impossible (well, according to current Clang practice
+; anyway. The AMD64 ABI isn't crystal clear on the matter).
+declare zeroext i32 @give_i32()
+define zeroext i8 @test_i32() {
+; CHECK-LABEL: test_i32:
+; CHECK: callq _give_i32
+; CHECK: movzbl %al, %eax
+; CHECK: ret
+
+  %call = tail call zeroext i32 @give_i32()
+  %val = trunc i32 %call to i8
+  ret i8 %val
+}
+
+; Here, one function is zeroext and the other is signext. To the extent that
+; these both mean something they are incompatible so no tail call is possible.
+declare zeroext i16 @give_unsigned_i16()
+define signext i16 @test_incompatible_i16() {
+; CHECK-LABEL: test_incompatible_i16:
+; CHECK: callq _give_unsigned_i16
+; CHECK: cwtl
+; CHECK: ret
+
+  %call = tail call zeroext i16 @give_unsigned_i16()
+  ret i16 %call
+}
+
+declare inreg i32 @give_i32_inreg()
+define i32 @test_inreg_to_normal() {
+; CHECK-LABEL: test_inreg_to_normal:
+; CHECK: callq _give_i32_inreg
+; CHECK: ret
+  %val = tail call inreg i32 @give_i32_inreg()
+  ret i32 %val
+}
+
+define inreg i32 @test_normal_to_inreg() {
+; CHECK-LABEL: test_normal_to_inreg:
+; CHECK: callq _give_i32
+; CHECK: ret
+  %val = tail call i32 @give_i32()
+  ret i32 %val
+}
diff --git a/test/CodeGen/X86/tail-call-got.ll b/test/CodeGen/X86/tail-call-got.ll
index 1d7eb2e29876..84d561dcd8c3 100644
--- a/test/CodeGen/X86/tail-call-got.ll
+++ b/test/CodeGen/X86/tail-call-got.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-unknown-freebsd9.0"
 
 define double @test1(double %x) nounwind readnone {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: movl foo@GOT
 ; CHECK-NEXT: jmpl
   %1 = tail call double @foo(double %x) nounwind readnone
@@ -14,7 +14,7 @@ define double @test1(double %x) nounwind readnone {
 declare double @foo(double) readnone
 
 define double @test2(double %x) nounwind readnone {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: movl sin@GOT
 ; CHECK-NEXT: jmpl
   %1 = tail call double @sin(double %x) nounwind readnone
diff --git a/test/CodeGen/X86/tail-call-legality.ll b/test/CodeGen/X86/tail-call-legality.ll
new file mode 100644
index 000000000000..119610430b18
--- /dev/null
+++ b/test/CodeGen/X86/tail-call-legality.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=x86 -o - < %s | FileCheck %s
+
+; This used to be classified as a tail call because of a mismatch in the
+; arguments seen by Analysis.cpp and ISelLowering. As seen by ISelLowering, they
+; both return {i32, i32, i32} (since i64 is illegal) which is fine for a tail
+; call.
+
+; As seen by Analysis.cpp: i64 -> i32 is a valid trunc, second i32 passes
+; straight through and the third is undef, also OK for a tail call.
+
+; Analysis.cpp was wrong.
+
+; FIXME: in principle we *could* support some tail calls involving truncations
+; of illegal types: a single "trunc i64 %whatever to i32" is probably valid
+; because of how the extra registers are laid out.
+
+declare {i64, i32} @test()
+
+define {i32, i32, i32} @test_pair_notail(i64 %in) {
+; CHECK-LABEL: test_pair_notail
+; CHECK-NOT: jmp
+
+  %whole = tail call {i64, i32} @test()
+  %first = extractvalue {i64, i32} %whole, 0
+  %first.trunc = trunc i64 %first to i32
+
+  %second = extractvalue {i64, i32} %whole, 1
+
+  %tmp = insertvalue {i32, i32, i32} undef, i32 %first.trunc, 0
+  %res = insertvalue {i32, i32, i32} %tmp, i32 %second, 1
+  ret {i32, i32, i32} %res
+}
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 6e20af5866e4..73d93ff993c6 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -13,7 +13,7 @@ declare i1 @qux()
 ; BranchFolding should tail-merge the stores since they all precede
 ; direct branches to the same place.
 
-; CHECK: tail_merge_me:
+; CHECK-LABEL: tail_merge_me:
 ; CHECK-NOT:  GHJK
 ; CHECK:      movl $0, GHJK(%rip)
 ; CHECK-NEXT: movl $1, HABC(%rip)
@@ -60,7 +60,7 @@ declare i8* @choose(i8*, i8*)
 ; BranchFolding should tail-duplicate the indirect jump to avoid
 ; redundant branching.
 
-; CHECK: tail_duplicate_me:
+; CHECK-LABEL: tail_duplicate_me:
 ; CHECK:      movl $0, GHJK(%rip)
 ; CHECK-NEXT: jmpq *%r
 ; CHECK:      movl $0, GHJK(%rip)
@@ -107,7 +107,7 @@ altret:
 ; BranchFolding shouldn't try to merge the tails of two blocks
 ; with only a branch in common, regardless of the fallthrough situation.
 
-; CHECK: dont_merge_oddly:
+; CHECK-LABEL: dont_merge_oddly:
 ; CHECK-NOT:   ret
 ; CHECK:        ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
 ; CHECK-NEXT:   jbe .LBB2_3
@@ -118,7 +118,7 @@ altret:
 ; CHECK-NEXT:   ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
 ; CHECK-NEXT:   jbe .LBB2_2
 ; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT:   xorb %al, %al
+; CHECK-NEXT:   xorl %eax, %eax
 ; CHECK-NEXT:   ret
 ; CHECK-NEXT: .LBB2_2:
 ; CHECK-NEXT:   movb $1, %al
@@ -153,7 +153,7 @@ bb30:
 ; Do any-size tail-merging when two candidate blocks will both require
 ; an unconditional jump to complete a two-way conditional branch.
 
-; CHECK: c_expand_expr_stmt:
+; CHECK-LABEL: c_expand_expr_stmt:
 ;
 ; This test only works when register allocation happens to use %rax for both
 ; load addresses.
@@ -161,7 +161,7 @@ bb30:
 ; CHE:        jmp .LBB3_11
 ; CHE-NEXT: .LBB3_9:
 ; CHE-NEXT:   movq 8(%rax), %rax
-; CHE-NEXT:   xorb %dl, %dl
+; CHE-NEXT:   xorl %edx, %edx
 ; CHE-NEXT:   movb 16(%rax), %al
 ; CHE-NEXT:   cmpb $16, %al
 ; CHE-NEXT:   je .LBB3_11
@@ -275,7 +275,7 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
 ; instructions are involved. This function should have only
 ; one ret instruction.
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK:        callq func
 ; CHECK-NEXT: .LBB4_2:
 ; CHECK-NEXT:   popq
@@ -298,7 +298,7 @@ declare void @func()
 
 ; one - One instruction may be tail-duplicated even with optsize.
 
-; CHECK: one:
+; CHECK-LABEL: one:
 ; CHECK: movl $0, XYZ(%rip)
 ; CHECK: movl $0, XYZ(%rip)
 
@@ -335,7 +335,7 @@ return:
 ; tail instead of one. This is too much to be merged, given
 ; the optsize attribute.
 
-; CHECK: two:
+; CHECK-LABEL: two:
 ; CHECK-NOT: XYZ
 ; CHECK: ret
 ; CHECK: movl $0, XYZ(%rip)
@@ -374,7 +374,7 @@ return:
 ; two_nosize - Same as two, but without the optsize attribute.
 ; Now two instructions are enough to be tail-duplicated.
 
-; CHECK: two_nosize:
+; CHECK-LABEL: two_nosize:
 ; CHECK: movl $0, XYZ(%rip)
 ; CHECK: movl $1, XYZ(%rip)
 ; CHECK: movl $0, XYZ(%rip)
@@ -412,7 +412,7 @@ return:
 ; Tail-merging should merge the two ret instructions since one side
 ; can fall-through into the ret and the other side has to branch anyway.
 
-; CHECK: TESTE:
+; CHECK-LABEL: TESTE:
 ; CHECK: ret
 ; CHECK-NOT: ret
 ; CHECK: size TESTE
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
index 60fe77661797..deab1dcc7eb9 100644
--- a/test/CodeGen/X86/tailcall-64.ll
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -6,7 +6,7 @@ define i64 @test_trivial() {
  %A = tail call i64 @testi()
  ret i64 %A
 }
-; CHECK: test_trivial:
+; CHECK-LABEL: test_trivial:
 ; CHECK: jmp	_testi                  ## TAILCALL
 
 
@@ -15,7 +15,7 @@ define i64 @test_noop_bitcast() {
  %B = bitcast i64 %A to i64
  ret i64 %B
 }
-; CHECK: test_noop_bitcast:
+; CHECK-LABEL: test_noop_bitcast:
 ; CHECK: jmp	_testi                  ## TAILCALL
 
 
@@ -26,7 +26,7 @@ define i8* @test_inttoptr() {
   ret i8* %B
 }
 
-; CHECK: test_inttoptr:
+; CHECK-LABEL: test_inttoptr:
 ; CHECK: jmp	_testi                  ## TAILCALL
 
 
@@ -37,7 +37,7 @@ define <4 x i32> @test_vectorbitcast() {
   %B = bitcast <4 x float> %A to <4 x i32>
   ret <4 x i32> %B
 }
-; CHECK: test_vectorbitcast:
+; CHECK-LABEL: test_vectorbitcast:
 ; CHECK: jmp	_testv                  ## TAILCALL
 
 
@@ -47,7 +47,7 @@ define {i64, i64} @test_pair_trivial() {
   %A = tail call { i64, i64} @testp()
   ret { i64, i64} %A
 }
-; CHECK: test_pair_trivial:
+; CHECK-LABEL: test_pair_trivial:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
 define {i64, i64} @test_pair_notail() {
@@ -58,7 +58,7 @@ define {i64, i64} @test_pair_notail() {
 
   ret { i64, i64} %c
 }
-; CHECK: test_pair_notail:
+; CHECK-LABEL: test_pair_notail:
 ; CHECK-NOT: jmp	_testi
 
 define {i64, i64} @test_pair_extract_trivial() {
@@ -72,7 +72,7 @@ define {i64, i64} @test_pair_extract_trivial() {
   ret { i64, i64} %c
 }
 
-; CHECK: test_pair_extract_trivial:
+; CHECK-LABEL: test_pair_extract_trivial:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
 define {i64, i64} @test_pair_extract_notail() {
@@ -86,7 +86,7 @@ define {i64, i64} @test_pair_extract_notail() {
   ret { i64, i64} %c
 }
 
-; CHECK: test_pair_extract_notail:
+; CHECK-LABEL: test_pair_extract_notail:
 ; CHECK-NOT: jmp	_testp
 
 define {i8*, i64} @test_pair_extract_conv() {
@@ -102,7 +102,7 @@ define {i8*, i64} @test_pair_extract_conv() {
   ret { i8*, i64} %c
 }
 
-; CHECK: test_pair_extract_conv:
+; CHECK-LABEL: test_pair_extract_conv:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
 define {i64, i64} @test_pair_extract_multiple() {
@@ -122,7 +122,7 @@ define {i64, i64} @test_pair_extract_multiple() {
   ret { i64, i64} %e
 }
 
-; CHECK: test_pair_extract_multiple:
+; CHECK-LABEL: test_pair_extract_multiple:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
 define {i64, i64} @test_pair_extract_undef() {
@@ -134,7 +134,7 @@ define {i64, i64} @test_pair_extract_undef() {
   ret { i64, i64} %b
 }
 
-; CHECK: test_pair_extract_undef:
+; CHECK-LABEL: test_pair_extract_undef:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
 declare { i64, { i32, i32 } } @testn()
@@ -154,7 +154,7 @@ define {i64, {i32, i32}} @test_nest() {
   ret { i64, { i32, i32}} %c
 }
 
-; CHECK: test_nest:
+; CHECK-LABEL: test_nest:
 ; CHECK: jmp	_testn                  ## TAILCALL
 
 %struct.A = type { i32 }
@@ -169,7 +169,7 @@ entry:
   ret %struct.A* %x
 }
 
-; CHECK: test_upcast:
+; CHECK-LABEL: test_upcast:
 ; CHECK: jmp	_testu                  ## TAILCALL
 
 ; PR13006
@@ -206,7 +206,7 @@ entry:
 ;   return funcs[n](0, 0, 0, 0, 0, 0);
 ; }
 ;
-; CHECK: rdar12282281
+; CHECK-LABEL: rdar12282281
 ; CHECK: jmpq *%r11 # TAILCALL
 @funcs = external constant [0 x i32 (i8*, ...)*]
 
@@ -221,7 +221,7 @@ entry:
 
 define x86_fp80 @fp80_call(x86_fp80 %x) nounwind  {
 entry:
-; CHECK: fp80_call:
+; CHECK-LABEL: fp80_call:
 ; CHECK: jmp _fp80_callee
   %call = tail call x86_fp80 @fp80_callee(x86_fp80 %x) nounwind
   ret x86_fp80 %call
@@ -232,7 +232,7 @@ declare x86_fp80 @fp80_callee(x86_fp80)
 ; rdar://12229511
 define x86_fp80 @trunc_fp80(x86_fp80 %x) nounwind  {
 entry:
-; CHECK: trunc_fp80
+; CHECK-LABEL: trunc_fp80
 ; CHECK: callq _trunc
 ; CHECK-NOT: jmp _trunc
 ; CHECK: ret
diff --git a/test/CodeGen/X86/tailcall-calleesave.ll b/test/CodeGen/X86/tailcall-calleesave.ll
new file mode 100644
index 000000000000..c748bcef36dc
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-calleesave.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=x86 -tailcallopt -mcpu=core < %s | FileCheck %s
+
+target triple = "i686-apple-darwin"
+
+declare fastcc void @foo(i32, i32, i32, i32, i32, i32)
+declare i32* @bar(i32*)
+
+define fastcc void @hoge(i32 %b) nounwind {
+; Do not overwrite pushed callee-save registers
+; CHECK: pushl
+; CHECK: subl $[[SIZE:[0-9]+]], %esp
+; CHECK-NOT: [[SIZE]](%esp)
+  %a = alloca i32
+  store i32 0, i32* %a
+  %d = tail call i32* @bar(i32* %a) nounwind
+  store i32 %b, i32* %d
+  tail call fastcc void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/tailcall-cgp-dup.ll b/test/CodeGen/X86/tailcall-cgp-dup.ll
index a80b90f9eee2..a51bc889924b 100644
--- a/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -4,7 +4,7 @@
 ; rdar://9147433
 
 define i32 @foo(i32 %x) nounwind ssp {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 entry:
   switch i32 %x, label %return [
     i32 1, label %sw.bb
@@ -69,7 +69,7 @@ declare i8* @bar(i8*) uwtable optsize noinline ssp
 
 define hidden %0* @thingWithValue(i8* %self) uwtable ssp {
 entry:
-; CHECK: thingWithValue:
+; CHECK-LABEL: thingWithValue:
 ; CHECK: jmp _bar
   br i1 undef, label %if.then.i, label %if.else.i
 
diff --git a/test/CodeGen/X86/tailcall-disable.ll b/test/CodeGen/X86/tailcall-disable.ll
index b628f5e537f0..1fd2d72dc570 100644
--- a/test/CodeGen/X86/tailcall-disable.ll
+++ b/test/CodeGen/X86/tailcall-disable.ll
@@ -15,12 +15,12 @@ entry:
   ret i32 %call
 }
 
-; CALL: test1:
+; CALL-LABEL: test1:
 ; CALL-NOT: ret
 ; CALL: callq helper
 ; CALL: ret
 
-; JMP: test1:
+; JMP-LABEL: test1:
 ; JMP-NOT: ret
 ; JMP: jmp helper # TAILCALL
 
@@ -30,11 +30,11 @@ entry:
   ret i32 %call
 }
 
-; CALL: test2:
+; CALL-LABEL: test2:
 ; CALL-NOT: ret
 ; CALL: callq test2
 ; CALL: ret
 
-; JMP: test2:
+; JMP-LABEL: test2:
 ; JMP-NOT: ret
 ; JMP: jmp test2 # TAILCALL
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index e9b8721e6608..f5662d97d13f 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s
 
 declare fastcc i32 @callee(i32 %arg)
 define fastcc i32 @directcall(i32 %arg) {
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 762160202c2d..75a6d874da37 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -3,7 +3,7 @@
 ; FIXME: Win64 does not support byval.
 
 ; Expect the entry point.
-; CHECK: tailcaller:
+; CHECK-LABEL: tailcaller:
 
 ; Expect 2 rep;movs because of tail call byval lowering.
 ; CHECK: rep;
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 04c4e95710c5..9ef0d27f7de6 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -3,7 +3,7 @@
 declare i32 @putchar(i32)
 
 define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
-; CHECK: checktail:
+; CHECK-LABEL: checktail:
         %tmp1 = icmp sgt i32 %x, 0
         br i1 %tmp1, label %if-then, label %if-else
 
diff --git a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
new file mode 100644
index 000000000000..1bc617541edb
--- /dev/null
+++ b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+tbm < %s | FileCheck %s
+
+define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 2814)
+  ret i32 %0
+}
+
+declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) nounwind readnone
+
+define i32 @test_x86_tbm_bextri_u32_m(i32* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %tmp1 = load i32* %a, align 4
+  %0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %tmp1, i32 2814)
+  ret i32 %0
+}
+
+define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 2814)
+  ret i64 %0
+}
+
+declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) nounwind readnone
+
+define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEl: test_x86_tbm_bextri_u64_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %tmp1 = load i64* %a, align 8
+  %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %tmp1, i64 2814)
+  ret i64 %0
+}
diff --git a/test/CodeGen/X86/tbm_patterns.ll b/test/CodeGen/X86/tbm_patterns.ll
new file mode 100644
index 000000000000..79eea10af3ae
--- /dev/null
+++ b/test/CodeGen/X86/tbm_patterns.ll
@@ -0,0 +1,253 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+tbm < %s | FileCheck %s
+
+define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = lshr i32 %a, 4
+  %1 = and i32 %0, 4095
+  ret i32 %1
+}
+
+define i32 @test_x86_tbm_bextri_u32_m(i32* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = load i32* %a
+  %1 = lshr i32 %0, 4
+  %2 = and i32 %1, 4095
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = lshr i64 %a, 4
+  %1 = and i64 %0, 4095
+  ret i64 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind readonly {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_bextri_u64_m:
+  ; CHECK-NOT: mov
+  ; CHECK: bextr $
+  %0 = load i64* %a
+  %1 = lshr i64 %0, 4
+  %2 = and i64 %1, 4095
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_blcfill_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcfill_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcfill %
+  %0 = add i32 %a, 1
+  %1 = and i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blcfill_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcfill_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcfill %
+  %0 = add i64 %a, 1
+  %1 = and i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blci_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = add i32 1, %a
+  %1 = xor i32 %0, -1
+  %2 = or i32 %1, %a
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_blci_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = add i64 1, %a
+  %1 = xor i64 %0, -1
+  %2 = or i64 %1, %a
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_blci_u32_b(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u32_b:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = sub i32 -2, %a
+  %1 = or i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blci_u64_b(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blci_u64_b:
+  ; CHECK-NOT: mov
+  ; CHECK: blci %
+  %0 = sub i64 -2, %a
+  %1 = or i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blcic_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcic_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcic %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, 1
+  %2 = and i32 %1, %0
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_blcic_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcic_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcic %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, 1
+  %2 = and i64 %1, %0
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_blcmsk_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcmsk_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcmsk %
+  %0 = add i32 %a, 1
+  %1 = xor i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blcmsk_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcmsk_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcmsk %
+  %0 = add i64 %a, 1
+  %1 = xor i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blcs_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcs_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blcs %
+  %0 = add i32 %a, 1
+  %1 = or i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blcs_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blcs_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blcs %
+  %0 = add i64 %a, 1
+  %1 = or i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blsfill_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsfill_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blsfill %
+  %0 = add i32 %a, -1
+  %1 = or i32 %0, %a
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_blsfill_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsfill_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blsfill %
+  %0 = add i64 %a, -1
+  %1 = or i64 %0, %a
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_blsic_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsic_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: blsic %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, -1
+  %2 = or i32 %0, %1
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_blsic_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_blsic_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: blsic %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, -1
+  %2 = or i64 %0, %1
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_t1mskc_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_t1mskc_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: t1mskc %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, 1
+  %2 = or i32 %0, %1
+  ret i32 %2
+}
+
+define i64 @Ttest_x86_tbm_t1mskc_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_t1mskc_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: t1mskc %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, 1
+  %2 = or i64 %0, %1
+  ret i64 %2
+}
+
+define i32 @test_x86_tbm_tzmsk_u32(i32 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_tzmsk_u32:
+  ; CHECK-NOT: mov
+  ; CHECK: tzmsk %
+  %0 = xor i32 %a, -1
+  %1 = add i32 %a, -1
+  %2 = and i32 %0, %1
+  ret i32 %2
+}
+
+define i64 @test_x86_tbm_tzmsk_u64(i64 %a) nounwind readnone {
+entry:
+  ; CHECK-LABEL: test_x86_tbm_tzmsk_u64:
+  ; CHECK-NOT: mov
+  ; CHECK: tzmsk %
+  %0 = xor i64 %a, -1
+  %1 = add i64 %a, -1
+  %2 = and i64 %0, %1
+  ret i64 %2
+}
diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
index 97db1b340e81..19fbaafc194f 100644
--- a/test/CodeGen/X86/test-nofold.ll
+++ b/test/CodeGen/X86/test-nofold.ll
@@ -2,10 +2,10 @@
 ; rdar://5752025
 
 ; We want:
-;      CHECK: movl	$42, %ecx
-; CHECK-NEXT: movl	4(%esp), %eax
-; CHECK-NEXT: andl	$15, %eax
-; CHECK-NEXT: cmovnel	%ecx, %eax
+;      CHECK: movl	4(%esp), %ecx
+; CHECK-NEXT: andl	$15, %ecx
+; CHECK-NEXT: movl	$42, %eax
+; CHECK-NEXT: cmovel	%ecx, %eax
 ; CHECK-NEXT: ret
 ;
 ; We don't want:
@@ -39,4 +39,3 @@ entry:
 	%retval = select i1 %tmp4, i32 %tmp2, i32 42		; <i32> [#uses=1]
 	ret i32 %retval
 }
-
diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll
index 5bc28ecbc48c..c9b76c88c1a2 100644
--- a/test/CodeGen/X86/test-shrink.ll
+++ b/test/CodeGen/X86/test-shrink.ll
@@ -2,10 +2,10 @@
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=CHECK-64
 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
 
-; CHECK-64: g64xh:
+; CHECK-64-LABEL: g64xh:
 ; CHECK-64:   testb $8, {{%ah|%ch}}
 ; CHECK-64:   ret
-; CHECK-32: g64xh:
+; CHECK-32-LABEL: g64xh:
 ; CHECK-32:   testb $8, %ah
 ; CHECK-32:   ret
 define void @g64xh(i64 inreg %x) nounwind {
@@ -19,10 +19,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g64xl:
+; CHECK-64-LABEL: g64xl:
 ; CHECK-64:   testb $8, [[A0L:%dil|%cl]]
 ; CHECK-64:   ret
-; CHECK-32: g64xl:
+; CHECK-32-LABEL: g64xl:
 ; CHECK-32:   testb $8, %al
 ; CHECK-32:   ret
 define void @g64xl(i64 inreg %x) nounwind {
@@ -36,10 +36,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g32xh:
+; CHECK-64-LABEL: g32xh:
 ; CHECK-64:   testb $8, {{%ah|%ch}}
 ; CHECK-64:   ret
-; CHECK-32: g32xh:
+; CHECK-32-LABEL: g32xh:
 ; CHECK-32:   testb $8, %ah
 ; CHECK-32:   ret
 define void @g32xh(i32 inreg %x) nounwind {
@@ -53,10 +53,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g32xl:
+; CHECK-64-LABEL: g32xl:
 ; CHECK-64:   testb $8, [[A0L]]
 ; CHECK-64:   ret
-; CHECK-32: g32xl:
+; CHECK-32-LABEL: g32xl:
 ; CHECK-32:   testb $8, %al
 ; CHECK-32:   ret
 define void @g32xl(i32 inreg %x) nounwind {
@@ -70,10 +70,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g16xh:
+; CHECK-64-LABEL: g16xh:
 ; CHECK-64:   testb $8, {{%ah|%ch}}
 ; CHECK-64:   ret
-; CHECK-32: g16xh:
+; CHECK-32-LABEL: g16xh:
 ; CHECK-32:   testb $8, %ah
 ; CHECK-32:   ret
 define void @g16xh(i16 inreg %x) nounwind {
@@ -87,10 +87,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g16xl:
+; CHECK-64-LABEL: g16xl:
 ; CHECK-64:   testb $8, [[A0L]]
 ; CHECK-64:   ret
-; CHECK-32: g16xl:
+; CHECK-32-LABEL: g16xl:
 ; CHECK-32:   testb $8, %al
 ; CHECK-32:   ret
 define void @g16xl(i16 inreg %x) nounwind {
@@ -104,10 +104,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g64x16:
+; CHECK-64-LABEL: g64x16:
 ; CHECK-64:   testw $-32640, %[[A0W:di|cx]]
 ; CHECK-64:   ret
-; CHECK-32: g64x16:
+; CHECK-32-LABEL: g64x16:
 ; CHECK-32:   testw $-32640, %ax
 ; CHECK-32:   ret
 define void @g64x16(i64 inreg %x) nounwind {
@@ -121,10 +121,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g32x16:
+; CHECK-64-LABEL: g32x16:
 ; CHECK-64:   testw $-32640, %[[A0W]]
 ; CHECK-64:   ret
-; CHECK-32: g32x16:
+; CHECK-32-LABEL: g32x16:
 ; CHECK-32:   testw $-32640, %ax
 ; CHECK-32:   ret
 define void @g32x16(i32 inreg %x) nounwind {
@@ -138,10 +138,10 @@ yes:
 no:
   ret void
 }
-; CHECK-64: g64x32:
+; CHECK-64-LABEL: g64x32:
 ; CHECK-64:   testl $268468352, %e[[A0W]]
 ; CHECK-64:   ret
-; CHECK-32: g64x32:
+; CHECK-32-LABEL: g64x32:
 ; CHECK-32:   testl $268468352, %eax
 ; CHECK-32:   ret
 define void @g64x32(i64 inreg %x) nounwind {
diff --git a/test/CodeGen/X86/testl-commute.ll b/test/CodeGen/X86/testl-commute.ll
index 0e6f6363cb89..bf6debf17542 100644
--- a/test/CodeGen/X86/testl-commute.ll
+++ b/test/CodeGen/X86/testl-commute.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin7"
 
 define i32 @test(i32* %P, i32* %G) nounwind {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: ret
 ; CHECK: testl (%{{.*}}), %{{.*}}
 ; CHECK: ret
@@ -28,7 +28,7 @@ bb1:		; preds = %entry
 }
 
 define i32 @test2(i32* %P, i32* %G) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK-NOT: ret
 ; CHECK: testl (%{{.*}}), %{{.*}}
 ; CHECK: ret
@@ -49,7 +49,7 @@ bb1:		; preds = %entry
 }
 
 define i32 @test3(i32* %P, i32* %G) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK-NOT: ret
 ; CHECK: testl (%{{.*}}), %{{.*}}
 ; CHECK: ret
diff --git a/test/CodeGen/X86/this-return-64.ll b/test/CodeGen/X86/this-return-64.ll
index 2b26a89e3c87..4e6be71238ab 100644
--- a/test/CodeGen/X86/this-return-64.ll
+++ b/test/CodeGen/X86/this-return-64.ll
@@ -14,7 +14,7 @@ declare %struct.B* @B_ctor_nothisret(%struct.B*, i32)
 
 define %struct.C* @C_ctor(%struct.C* %this, i32 %y) {
 entry:
-; CHECK: C_ctor:
+; CHECK-LABEL: C_ctor:
 ; CHECK: jmp     B_ctor                  # TAILCALL
   %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
   %call = tail call %struct.B* @B_ctor(%struct.B* %0, i32 %y)
@@ -23,7 +23,7 @@ entry:
 
 define %struct.C* @C_ctor_nothisret(%struct.C* %this, i32 %y) {
 entry:
-; CHECK: C_ctor_nothisret:
+; CHECK-LABEL: C_ctor_nothisret:
 ; CHECK-NOT: jmp     B_ctor_nothisret
   %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
   %call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %0, i32 %y)
@@ -32,7 +32,7 @@ entry:
 
 define %struct.D* @D_ctor(%struct.D* %this, i32 %y) {
 entry:
-; CHECK: D_ctor:
+; CHECK-LABEL: D_ctor:
 ; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
 ; CHECK: callq   A_ctor
 ; CHECK: movq    [[SAVETHIS]], %rcx
@@ -48,7 +48,7 @@ entry:
 
 define %struct.D* @D_ctor_nothisret(%struct.D* %this, i32 %y) {
 entry:
-; CHECK: D_ctor_nothisret:
+; CHECK-LABEL: D_ctor_nothisret:
 ; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
 ; CHECK: callq   A_ctor_nothisret
 ; CHECK: movq    [[SAVETHIS]], %rcx
@@ -64,7 +64,7 @@ entry:
 
 define %struct.E* @E_ctor(%struct.E* %this, i32 %x) {
 entry:
-; CHECK: E_ctor:
+; CHECK-LABEL: E_ctor:
 ; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
 ; CHECK: callq   B_ctor
 ; CHECK: movq    [[SAVETHIS]], %rcx
@@ -77,7 +77,7 @@ entry:
 
 define %struct.E* @E_ctor_nothisret(%struct.E* %this, i32 %x) {
 entry:
-; CHECK: E_ctor_nothisret:
+; CHECK-LABEL: E_ctor_nothisret:
 ; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
 ; CHECK: callq   B_ctor_nothisret
 ; CHECK: movq    [[SAVETHIS]], %rcx
diff --git a/test/CodeGen/X86/tls-local-dynamic.ll b/test/CodeGen/X86/tls-local-dynamic.ll
index c5fd16bbec22..4841e52c5b0e 100644
--- a/test/CodeGen/X86/tls-local-dynamic.ll
+++ b/test/CodeGen/X86/tls-local-dynamic.ll
@@ -10,7 +10,7 @@ entry:
   ret i32* @x
 ; FIXME: This function uses a single thread-local variable,
 ; so we might want to fall back to general-dynamic here.
-; CHECK:       get_x:
+; CHECK-LABEL:       get_x:
 ; CHECK:       leaq x@TLSLD(%rip), %rdi
 ; CHECK-NEXT:  callq __tls_get_addr@PLT
 ; CHECK:       x@DTPOFF
@@ -26,7 +26,7 @@ entry:
   %cmp = icmp eq i32 %i, 1
   br i1 %cmp, label %return, label %if.else
 ; This bb does not access TLS, so should not call __tls_get_addr.
-; CHECK:       f:
+; CHECK-LABEL:       f:
 ; CHECK-NOT:   __tls_get_addr
 ; CHECK:       je
 
diff --git a/test/CodeGen/X86/tls-models.ll b/test/CodeGen/X86/tls-models.ll
index 7c527e210a90..8e3e95886ad8 100644
--- a/test/CodeGen/X86/tls-models.ll
+++ b/test/CodeGen/X86/tls-models.ll
@@ -25,15 +25,15 @@ entry:
   ret i32* @external_gd
 
   ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
-  ; X64:     f1:
+  ; X64-LABEL:     f1:
   ; X64:     external_gd@GOTTPOFF
-  ; X32:     f1:
+  ; X32-LABEL:     f1:
   ; X32:     external_gd@INDNTPOFF
-  ; X64_PIC: f1:
+  ; X64_PIC-LABEL: f1:
   ; X64_PIC: external_gd@TLSGD
-  ; X32_PIC: f1:
+  ; X32_PIC-LABEL: f1:
   ; X32_PIC: external_gd@TLSGD
-  ; DARWIN:  f1:
+  ; DARWIN-LABEL:  f1:
   ; DARWIN:  _external_gd@TLVP
 }
 
@@ -42,15 +42,15 @@ entry:
   ret i32* @internal_gd
 
   ; Non-PIC code can use local exec, PIC code can use local dynamic.
-  ; X64:     f2:
+  ; X64-LABEL:     f2:
   ; X64:     internal_gd@TPOFF
-  ; X32:     f2:
+  ; X32-LABEL:     f2:
   ; X32:     internal_gd@NTPOFF
-  ; X64_PIC: f2:
+  ; X64_PIC-LABEL: f2:
   ; X64_PIC: internal_gd@TLSLD
-  ; X32_PIC: f2:
+  ; X32_PIC-LABEL: f2:
   ; X32_PIC: internal_gd@TLSLDM
-  ; DARWIN:  f2:
+  ; DARWIN-LABEL:  f2:
   ; DARWIN:  _internal_gd@TLVP
 }
 
@@ -62,15 +62,15 @@ entry:
   ret i32* @external_ld
 
   ; Non-PIC code can use initial exec, PIC code use local dynamic as specified.
-  ; X64:     f3:
+  ; X64-LABEL:     f3:
   ; X64:     external_ld@GOTTPOFF
-  ; X32:     f3:
+  ; X32-LABEL:     f3:
   ; X32:     external_ld@INDNTPOFF
-  ; X64_PIC: f3:
+  ; X64_PIC-LABEL: f3:
   ; X64_PIC: external_ld@TLSLD
-  ; X32_PIC: f3:
+  ; X32_PIC-LABEL: f3:
   ; X32_PIC: external_ld@TLSLDM
-  ; DARWIN:  f3:
+  ; DARWIN-LABEL:  f3:
   ; DARWIN:  _external_ld@TLVP
 }
 
@@ -79,15 +79,15 @@ entry:
   ret i32* @internal_ld
 
   ; Non-PIC code can use local exec, PIC code can use local dynamic.
-  ; X64:     f4:
+  ; X64-LABEL:     f4:
   ; X64:     internal_ld@TPOFF
-  ; X32:     f4:
+  ; X32-LABEL:     f4:
   ; X32:     internal_ld@NTPOFF
-  ; X64_PIC: f4:
+  ; X64_PIC-LABEL: f4:
   ; X64_PIC: internal_ld@TLSLD
-  ; X32_PIC: f4:
+  ; X32_PIC-LABEL: f4:
   ; X32_PIC: internal_ld@TLSLDM
-  ; DARWIN:  f4:
+  ; DARWIN-LABEL:  f4:
   ; DARWIN:  _internal_ld@TLVP
 }
 
@@ -99,15 +99,15 @@ entry:
   ret i32* @external_ie
 
   ; Non-PIC and PIC code will use initial exec as specified.
-  ; X64:     f5:
+  ; X64-LABEL:     f5:
   ; X64:     external_ie@GOTTPOFF
-  ; X32:     f5:
+  ; X32-LABEL:     f5:
   ; X32:     external_ie@INDNTPOFF
-  ; X64_PIC: f5:
+  ; X64_PIC-LABEL: f5:
   ; X64_PIC: external_ie@GOTTPOFF
-  ; X32_PIC: f5:
+  ; X32_PIC-LABEL: f5:
   ; X32_PIC: external_ie@GOTNTPOFF
-  ; DARWIN:  f5:
+  ; DARWIN-LABEL:  f5:
   ; DARWIN:  _external_ie@TLVP
 }
 
@@ -116,15 +116,15 @@ entry:
   ret i32* @internal_ie
 
   ; Non-PIC code can use local exec, PIC code use initial exec as specified.
-  ; X64:     f6:
+  ; X64-LABEL:     f6:
   ; X64:     internal_ie@TPOFF
-  ; X32:     f6:
+  ; X32-LABEL:     f6:
   ; X32:     internal_ie@NTPOFF
-  ; X64_PIC: f6:
+  ; X64_PIC-LABEL: f6:
   ; X64_PIC: internal_ie@GOTTPOFF
-  ; X32_PIC: f6:
+  ; X32_PIC-LABEL: f6:
   ; X32_PIC: internal_ie@GOTNTPOFF
-  ; DARWIN:  f6:
+  ; DARWIN-LABEL:  f6:
   ; DARWIN:  _internal_ie@TLVP
 }
 
@@ -136,15 +136,15 @@ entry:
   ret i32* @external_le
 
   ; Non-PIC and PIC code will use local exec as specified.
-  ; X64:     f7:
+  ; X64-LABEL:     f7:
   ; X64:     external_le@TPOFF
-  ; X32:     f7:
+  ; X32-LABEL:     f7:
   ; X32:     external_le@NTPOFF
-  ; X64_PIC: f7:
+  ; X64_PIC-LABEL: f7:
   ; X64_PIC: external_le@TPOFF
-  ; X32_PIC: f7:
+  ; X32_PIC-LABEL: f7:
   ; X32_PIC: external_le@NTPOFF
-  ; DARWIN:  f7:
+  ; DARWIN-LABEL:  f7:
   ; DARWIN:  _external_le@TLVP
 }
 
@@ -153,14 +153,14 @@ entry:
   ret i32* @internal_le
 
   ; Non-PIC and PIC code will use local exec as specified.
-  ; X64:     f8:
+  ; X64-LABEL:     f8:
   ; X64:     internal_le@TPOFF
-  ; X32:     f8:
+  ; X32-LABEL:     f8:
   ; X32:     internal_le@NTPOFF
-  ; X64_PIC: f8:
+  ; X64_PIC-LABEL: f8:
   ; X64_PIC: internal_le@TPOFF
-  ; X32_PIC: f8:
+  ; X32_PIC-LABEL: f8:
   ; X32_PIC: internal_le@NTPOFF
-  ; DARWIN:  f8:
+  ; DARWIN-LABEL:  f8:
   ; DARWIN:  _internal_le@TLVP
 }
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index b823f0af2cdf..0c79da6667a1 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -11,11 +11,11 @@ entry:
 	ret i32 %tmp1
 }
 
-; X32: f1:
+; X32-LABEL: f1:
 ; X32:   leal i@TLSGD(,%ebx), %eax
 ; X32:   calll ___tls_get_addr@PLT
 
-; X64: f1:
+; X64-LABEL: f1:
 ; X64:   leaq i@TLSGD(%rip), %rdi
 ; X64:   callq __tls_get_addr@PLT
 
@@ -27,11 +27,11 @@ entry:
 	ret i32* @i
 }
 
-; X32: f2:
+; X32-LABEL: f2:
 ; X32:   leal i@TLSGD(,%ebx), %eax
 ; X32:   calll ___tls_get_addr@PLT
 
-; X64: f2:
+; X64-LABEL: f2:
 ; X64:   leaq i@TLSGD(%rip), %rdi
 ; X64:   callq __tls_get_addr@PLT
 
@@ -43,11 +43,11 @@ entry:
 	ret i32 %tmp1
 }
 
-; X32: f3:
+; X32-LABEL: f3:
 ; X32:   leal	i@TLSGD(,%ebx), %eax
 ; X32:   calll ___tls_get_addr@PLT
 
-; X64: f3:
+; X64-LABEL: f3:
 ; X64:   leaq i@TLSGD(%rip), %rdi
 ; X64:   callq __tls_get_addr@PLT
 
@@ -57,11 +57,11 @@ entry:
 	ret i32* @i
 }
 
-; X32: f4:
+; X32-LABEL: f4:
 ; X32:   leal	i@TLSGD(,%ebx), %eax
 ; X32:   calll ___tls_get_addr@PLT
 
-; X64: f4:
+; X64-LABEL: f4:
 ; X64:   leaq i@TLSGD(%rip), %rdi
 ; X64:   callq __tls_get_addr@PLT
 
@@ -74,13 +74,13 @@ entry:
 	ret i32 %add
 }
 
-; X32:    f5:
+; X32-LABEL:    f5:
 ; X32:      leal {{[jk]}}@TLSLDM(%ebx)
 ; X32: calll ___tls_get_addr@PLT
 ; X32: movl {{[jk]}}@DTPOFF(%e
 ; X32: addl {{[jk]}}@DTPOFF(%e
 
-; X64:    f5:
+; X64-LABEL:    f5:
 ; X64:      leaq {{[jk]}}@TLSLD(%rip), %rdi
 ; X64: callq	__tls_get_addr@PLT
 ; X64: movl {{[jk]}}@DTPOFF(%r
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index 3fca9f5a3791..d1e09c2442f7 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -7,10 +7,10 @@
 @i2 = external thread_local global i32
 
 define i32 @f1() {
-; X32: f1:
+; X32-LABEL: f1:
 ; X32:      movl %gs:i@NTPOFF, %eax
 ; X32-NEXT: ret
-; X64: f1:
+; X64-LABEL: f1:
 ; X64:      movl %fs:i@TPOFF, %eax
 ; X64-NEXT: ret
 
@@ -20,11 +20,11 @@ entry:
 }
 
 define i32* @f2() {
-; X32: f2:
+; X32-LABEL: f2:
 ; X32:      movl %gs:0, %eax
 ; X32-NEXT: leal i@NTPOFF(%eax), %eax
 ; X32-NEXT: ret
-; X64: f2:
+; X64-LABEL: f2:
 ; X64:      movq %fs:0, %rax
 ; X64-NEXT: leaq i@TPOFF(%rax), %rax
 ; X64-NEXT: ret
@@ -34,7 +34,7 @@ entry:
 }
 
 define i32 @f3() {
-; X32: f3:
+; X32-LABEL: f3:
 ; X32:      calll .L{{[0-9]+}}$pb
 ; X32-NEXT: .L{{[0-9]+}}$pb:
 ; X32-NEXT: popl %eax
@@ -43,7 +43,7 @@ define i32 @f3() {
 ; X32-NEXT: movl i2@GOTNTPOFF(%eax), %eax
 ; X32-NEXT: movl %gs:(%eax), %eax
 ; X32-NEXT: ret
-; X64: f3:
+; X64-LABEL: f3:
 ; X64:      movq i2@GOTTPOFF(%rip), %rax
 ; X64-NEXT: movl %fs:(%rax), %eax
 ; X64-NEXT: ret
@@ -54,7 +54,7 @@ entry:
 }
 
 define i32* @f4() {
-; X32: f4:
+; X32-LABEL: f4:
 ; X32:      calll .L{{[0-9]+}}$pb
 ; X32-NEXT: .L{{[0-9]+}}$pb:
 ; X32-NEXT: popl %ecx
@@ -63,7 +63,7 @@ define i32* @f4() {
 ; X32-NEXT: movl %gs:0, %eax
 ; X32-NEXT: addl i2@GOTNTPOFF(%ecx), %eax
 ; X32-NEXT: ret
-; X64: f4:
+; X64-LABEL: f4:
 ; X64:      movq %fs:0, %rax
 ; X64-NEXT: addq i2@GOTTPOFF(%rip), %rax
 ; X64-NEXT: ret
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index 8cdecd81bff5..76a840260b9a 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -12,19 +12,19 @@
 @b1 = thread_local global i8 0
 
 define i32 @f1() {
-; X32_LINUX: f1:
+; X32_LINUX-LABEL: f1:
 ; X32_LINUX:      movl %gs:i1@NTPOFF, %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f1:
+; X64_LINUX-LABEL: f1:
 ; X64_LINUX:      movl %fs:i1@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f1:
+; X32_WIN-LABEL: f1:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movl _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f1:
+; X64_WIN-LABEL: f1:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -37,21 +37,21 @@ entry:
 }
 
 define i32* @f2() {
-; X32_LINUX: f2:
+; X32_LINUX-LABEL: f2:
 ; X32_LINUX:      movl %gs:0, %eax
 ; X32_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f2:
+; X64_LINUX-LABEL: f2:
 ; X64_LINUX:      movq %fs:0, %rax
 ; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f2:
+; X32_WIN-LABEL: f2:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: leal _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f2:
+; X64_WIN-LABEL: f2:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -63,21 +63,21 @@ entry:
 }
 
 define i32 @f3() nounwind {
-; X32_LINUX: f3:
+; X32_LINUX-LABEL: f3:
 ; X32_LINUX:      movl i2@INDNTPOFF, %eax
 ; X32_LINUX-NEXT: movl %gs:(%eax), %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f3:
+; X64_LINUX-LABEL: f3:
 ; X64_LINUX:      movq i2@GOTTPOFF(%rip), %rax
 ; X64_LINUX-NEXT: movl %fs:(%rax), %eax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f3:
+; X32_WIN-LABEL: f3:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movl _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f3:
+; X64_WIN-LABEL: f3:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -90,21 +90,21 @@ entry:
 }
 
 define i32* @f4() {
-; X32_LINUX: f4:
+; X32_LINUX-LABEL: f4:
 ; X32_LINUX:      movl %gs:0, %eax
 ; X32_LINUX-NEXT: addl i2@INDNTPOFF, %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f4:
+; X64_LINUX-LABEL: f4:
 ; X64_LINUX:      movq %fs:0, %rax
 ; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f4:
+; X32_WIN-LABEL: f4:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: leal _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f4:
+; X64_WIN-LABEL: f4:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -116,19 +116,19 @@ entry:
 }
 
 define i32 @f5() nounwind {
-; X32_LINUX: f5:
+; X32_LINUX-LABEL: f5:
 ; X32_LINUX:      movl %gs:i3@NTPOFF, %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f5:
+; X64_LINUX-LABEL: f5:
 ; X64_LINUX:      movl %fs:i3@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f5:
+; X32_WIN-LABEL: f5:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movl _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f5:
+; X64_WIN-LABEL: f5:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -141,21 +141,21 @@ entry:
 }
 
 define i32* @f6() {
-; X32_LINUX: f6:
+; X32_LINUX-LABEL: f6:
 ; X32_LINUX:      movl %gs:0, %eax
 ; X32_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f6:
+; X64_LINUX-LABEL: f6:
 ; X64_LINUX:      movq %fs:0, %rax
 ; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f6:
+; X32_WIN-LABEL: f6:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: leal _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f6:
+; X64_WIN-LABEL: f6:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -167,10 +167,10 @@ entry:
 }
 
 define i32 @f7() {
-; X32_LINUX: f7:
+; X32_LINUX-LABEL: f7:
 ; X32_LINUX:      movl %gs:i4@NTPOFF, %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f7:
+; X64_LINUX-LABEL: f7:
 ; X64_LINUX:      movl %fs:i4@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
 
@@ -180,11 +180,11 @@ entry:
 }
 
 define i32* @f8() {
-; X32_LINUX: f8:
+; X32_LINUX-LABEL: f8:
 ; X32_LINUX:      movl %gs:0, %eax
 ; X32_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f8:
+; X64_LINUX-LABEL: f8:
 ; X64_LINUX:      movq %fs:0, %rax
 ; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
 ; X64_LINUX-NEXT: ret
@@ -194,10 +194,10 @@ entry:
 }
 
 define i32 @f9() {
-; X32_LINUX: f9:
+; X32_LINUX-LABEL: f9:
 ; X32_LINUX:      movl %gs:i5@NTPOFF, %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f9:
+; X64_LINUX-LABEL: f9:
 ; X64_LINUX:      movl %fs:i5@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
 
@@ -207,11 +207,11 @@ entry:
 }
 
 define i32* @f10() {
-; X32_LINUX: f10:
+; X32_LINUX-LABEL: f10:
 ; X32_LINUX:      movl %gs:0, %eax
 ; X32_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f10:
+; X64_LINUX-LABEL: f10:
 ; X64_LINUX:      movq %fs:0, %rax
 ; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
 ; X64_LINUX-NEXT: ret
@@ -221,29 +221,24 @@ entry:
 }
 
 define i16 @f11() {
-; X32_LINUX: f11:
+; X32_LINUX-LABEL: f11:
 ; X32_LINUX:      movzwl %gs:s1@NTPOFF, %eax
-; Why is this kill line here, but no where else?
-; X32_LINUX-NEXT: # kill
-; X32_LINUX-NEXT: ret
-; X64_LINUX: f11:
+; X32_LINUX:      ret
+; X64_LINUX-LABEL: f11:
 ; X64_LINUX:      movzwl %fs:s1@TPOFF, %eax
-; X64_LINUX-NEXT: # kill
-; X64_LINUX-NEXT: ret
-; X32_WIN: f11:
+; X64_LINUX:      ret
+; X32_WIN-LABEL: f11:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax
-; X32_WIN-NEXT: # kill
-; X32_WIN-NEXT: ret
-; X64_WIN: f11:
+; X32_WIN:      ret
+; X64_WIN-LABEL: f11:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
-; X64_WIN-NEXT: # kill
-; X64_WIN-NEXT: ret
+; X64_WIN:      ret
 
 entry:
 	%tmp1 = load i16* @s1
@@ -251,19 +246,19 @@ entry:
 }
 
 define i32 @f12() {
-; X32_LINUX: f12:
+; X32_LINUX-LABEL: f12:
 ; X32_LINUX:      movswl %gs:s1@NTPOFF, %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f12:
+; X64_LINUX-LABEL: f12:
 ; X64_LINUX:      movswl %fs:s1@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f12:
+; X32_WIN-LABEL: f12:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f12:
+; X64_WIN-LABEL: f12:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -277,19 +272,19 @@ entry:
 }
 
 define i8 @f13() {
-; X32_LINUX: f13:
+; X32_LINUX-LABEL: f13:
 ; X32_LINUX:      movb %gs:b1@NTPOFF, %al
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f13:
+; X64_LINUX-LABEL: f13:
 ; X64_LINUX:      movb %fs:b1@TPOFF, %al
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f13:
+; X32_WIN-LABEL: f13:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movb _b1@SECREL32(%eax), %al
 ; X32_WIN-NEXT: ret
-; X64_WIN: f13:
+; X64_WIN-LABEL: f13:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
@@ -302,19 +297,19 @@ entry:
 }
 
 define i32 @f14() {
-; X32_LINUX: f14:
+; X32_LINUX-LABEL: f14:
 ; X32_LINUX:      movsbl %gs:b1@NTPOFF, %eax
 ; X32_LINUX-NEXT: ret
-; X64_LINUX: f14:
+; X64_LINUX-LABEL: f14:
 ; X64_LINUX:      movsbl %fs:b1@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
-; X32_WIN: f14:
+; X32_WIN-LABEL: f14:
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
 ; X32_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
-; X64_WIN: f14:
+; X64_WIN-LABEL: f14:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
diff --git a/test/CodeGen/X86/tlv-1.ll b/test/CodeGen/X86/tlv-1.ll
index 92dac3096629..66e2f819ee24 100644
--- a/test/CodeGen/X86/tlv-1.ll
+++ b/test/CodeGen/X86/tlv-1.ll
@@ -5,7 +5,7 @@
 @c = external thread_local global %struct.A, align 4
 
 define void @main() nounwind ssp {
-; CHECK: main:
+; CHECK-LABEL: main:
 entry:
   call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
   unreachable  
@@ -18,7 +18,7 @@ entry:
 ; rdar://10291355
 define i32 @test() nounwind readonly ssp {
 entry:
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: movq _a@TLVP(%rip),
 ; CHECK: callq *
 ; CHECK: movl (%rax), [[REGISTER:%[a-z]+]]
diff --git a/test/CodeGen/X86/tlv-3.ll b/test/CodeGen/X86/tlv-3.ll
new file mode 100644
index 000000000000..4f793051836f
--- /dev/null
+++ b/test/CodeGen/X86/tlv-3.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+; PR17964
+
+; CHECK: __DATA,__thread_data,thread_local_regular
+; CHECK: _foo$tlv$init
+@foo = weak_odr thread_local global i8 1, align 4
+
+define i32 @main() {
+    ret i32 0
+}
diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
index 3f44be0b500c..149c667c8cb7 100644
--- a/test/CodeGen/X86/trap.ll
+++ b/test/CodeGen/X86/trap.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
 
-; CHECK: test0:
+; CHECK-LABEL: test0:
 ; CHECK: ud2
 define i32 @test0() noreturn nounwind  {
 entry:
@@ -8,7 +8,7 @@ entry:
 	unreachable
 }
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: int3
 define i32 @test1() noreturn nounwind  {
 entry:
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll
index 1d22a185def3..d230f1f7e2c6 100644
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
-;CHECK: load_2_i8
+;CHECK-LABEL: load_2_i8:
 ; A single 16-bit load
 ;CHECK: pmovzxbq
 ;CHECK: paddq
@@ -16,7 +16,7 @@ define void @load_2_i8(<2 x i8>* %A)  {
    ret void
 } 
 
-;CHECK: load_2_i16
+;CHECK-LABEL: load_2_i16:
 ; Read 32-bits
 ;CHECK: pmovzxwq
 ;CHECK: paddq
@@ -30,7 +30,7 @@ define void @load_2_i16(<2 x i16>* %A)  {
    ret void
 } 
 
-;CHECK: load_2_i32
+;CHECK-LABEL: load_2_i32:
 ;CHECK: pmovzxdq
 ;CHECK: paddq
 ;CHECK: pshufd
@@ -42,7 +42,7 @@ define void @load_2_i32(<2 x i32>* %A)  {
    ret void
 } 
 
-;CHECK: load_4_i8
+;CHECK-LABEL: load_4_i8:
 ;CHECK: pmovzxbd
 ;CHECK: paddd
 ;CHECK: pshufb
@@ -54,7 +54,7 @@ define void @load_4_i8(<4 x i8>* %A)  {
    ret void
 } 
 
-;CHECK: load_4_i16
+;CHECK-LABEL: load_4_i16:
 ;CHECK: pmovzxwd
 ;CHECK: paddd
 ;CHECK: pshufb
@@ -66,7 +66,7 @@ define void @load_4_i16(<4 x i16>* %A)  {
    ret void
 } 
 
-;CHECK: load_8_i8
+;CHECK-LABEL: load_8_i8:
 ;CHECK: pmovzxbw
 ;CHECK: paddw
 ;CHECK: pshufb
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 92b6859d1dc4..0ed634774ab3 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -7,7 +7,7 @@ define zeroext i1 @test1(i32 %X)  nounwind {
     %Y = trunc i32 %X to i1
     ret i1 %Y
 }
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: andl $1, %eax
 
 define i1 @test2(i32 %val, i32 %mask) nounwind {
@@ -21,8 +21,8 @@ ret_true:
 ret_false:
     ret i1 false
 }
-; CHECK: test2:
-; CHECK: btl %eax
+; CHECK-LABEL: test2:
+; CHECK: btl
 
 define i32 @test3(i8* %ptr) nounwind {
     %val = load i8* %ptr
@@ -33,7 +33,7 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: testb $1, (%eax)
 
 define i32 @test4(i8* %ptr) nounwind {
@@ -44,7 +44,7 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: testb $1, 4(%esp)
 
 define i32 @test5(double %d) nounwind {
@@ -55,5 +55,5 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: testb $1
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index 9d58019b1a99..b5ca0275d8d6 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -10,7 +10,7 @@
 @G = external global i32
 
 define i32 @test1(i32 %X) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NOT: mov
 ; CHECK: leal 1(%rdi)
         %Z = add i32 %X, 1
@@ -23,7 +23,7 @@ define i32 @test1(i32 %X) nounwind {
 ; commutted (which would require inserting a copy).
 define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
 entry:
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: leal
 ; CHECK-NOT: leal
 ; CHECK-NOT: mov
@@ -38,7 +38,7 @@ entry:
 ; rdar://9002648
 define i64 @test3(i64 %x) nounwind readnone ssp {
 entry:
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: leaq (%rdi,%rdi), %rax
 ; CHECK-NOT: addq
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll
index 7536fb8f52c4..c5a61c3779bf 100644
--- a/test/CodeGen/X86/uint_to_fp-2.ll
+++ b/test/CodeGen/X86/uint_to_fp-2.ll
@@ -4,7 +4,7 @@
 define float @test1(i32 %x) nounwind readnone {
 ; CHECK: test1
 ; CHECK: movd
-; CHECK: orpd
+; CHECK: orps
 ; CHECK: subsd
 ; CHECK: cvtsd2ss
 ; CHECK: movss
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index e5858de6ed71..ba5a790f4380 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -6,7 +6,7 @@ define zeroext i1 @a(i32 %x)  nounwind {
   %obil = extractvalue {i32, i1} %res, 1
   ret i1 %obil
   
-; CHECK: a:
+; CHECK-LABEL: a:
 ; CHECK: mull
 ; CHECK: seto %al
 ; CHECK: movzbl	%al, %eax
@@ -19,7 +19,7 @@ entry:
 	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
 	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
 	ret i32 %tmp2
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: addl
 ; CHECK-NEXT: addl
 ; CHECK-NEXT: ret
@@ -31,7 +31,7 @@ entry:
 	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
 	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
 	ret i32 %tmp2
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: addl
 ; CHECK: mull
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/unaligned-spill-folding.ll b/test/CodeGen/X86/unaligned-spill-folding.ll
new file mode 100644
index 000000000000..154ce9e324d6
--- /dev/null
+++ b/test/CodeGen/X86/unaligned-spill-folding.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -relocation-model=pic < %s | FileCheck %s -check-prefix=UNALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=16 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -force-align-stack -relocation-model=pic < %s | FileCheck %s -check-prefix=FORCEALIGNED
+
+@arr = internal unnamed_addr global [32 x i32] zeroinitializer, align 16
+
+; PR12250
+define i32 @test1() {
+vector.ph:
+  br label %vector.body
+
+vector.body:
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds [32 x i32]* @arr, i32 0, i32 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>* %1, align 16
+  %2 = add nsw <4 x i32> %wide.load, <i32 10, i32 10, i32 10, i32 10>
+  %3 = xor <4 x i32> %2, <i32 123345, i32 123345, i32 123345, i32 123345>
+  %4 = add nsw <4 x i32> %3, <i32 112, i32 112, i32 112, i32 112>
+  %5 = xor <4 x i32> %4, <i32 543345, i32 543345, i32 543345, i32 543345>
+  %6 = add nsw <4 x i32> %5, <i32 73, i32 73, i32 73, i32 73>
+  %7 = xor <4 x i32> %6, <i32 345987, i32 345987, i32 345987, i32 345987>
+  %8 = add nsw <4 x i32> %7, <i32 48, i32 48, i32 48, i32 48>
+  %9 = xor <4 x i32> %8, <i32 123987, i32 123987, i32 123987, i32 123987>
+  store <4 x i32> %9, <4 x i32>* %1, align 16
+  %index.next = add i32 %index, 4
+  %10 = icmp eq i32 %index.next, 32
+  br i1 %10, label %middle.block, label %vector.body
+
+middle.block:
+  ret i32 0
+
+; We can't fold the spill into a padd unless the stack is aligned. Just spilling
+; doesn't force stack realignment though
+; UNALIGNED-LABEL: @test1
+; UNALIGNED-NOT: andl $-{{..}}, %esp
+; UNALIGNED: movdqu {{.*}} # 16-byte Folded Spill
+; UNALIGNED-NOT: paddd {{.*}} # 16-byte Folded Reload
+
+; ALIGNED-LABEL: @test1
+; ALIGNED-NOT: andl $-{{..}}, %esp
+; ALIGNED: movdqa {{.*}} # 16-byte Spill
+; ALIGNED: paddd {{.*}} # 16-byte Folded Reload
+
+; FORCEALIGNED-LABEL: @test1
+; FORCEALIGNED: andl $-{{..}}, %esp
+; FORCEALIGNED: movdqa {{.*}} # 16-byte Spill
+; FORCEALIGNED: paddd {{.*}} # 16-byte Folded Reload
+}
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index e02e3b54752b..d7ae46939035 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -19,15 +19,18 @@ entry:
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786443, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786468, metadata !10, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 30, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 4, i32 3, metadata !7, null}
 !9 = metadata !{metadata !1}
 !10 = metadata !{metadata !"test.c", metadata !"/dir"}
+!11 = metadata !{i32 0}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/X86/unwind-init.ll b/test/CodeGen/X86/unwind-init.ll
new file mode 100644
index 000000000000..d0915e244eeb
--- /dev/null
+++ b/test/CodeGen/X86/unwind-init.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-unknown-linux < %s | FileCheck -check-prefix X8664 %s
+; RUN: llc -mtriple=i686-unknown-linux < %s | FileCheck -check-prefix X8632 %s
+; Check that all callee-saved registers are saved and restored in functions
+; that call __builtin_unwind_init(). This is its undocumented behavior in gcc,
+; and it is used in compiling libgcc_eh.
+; See also PR8541
+
+declare void @llvm.eh.unwind.init()
+
+define void @calls_unwind_init() {
+  call void @llvm.eh.unwind.init()
+  ret void
+}
+
+; X8664-LABEL: calls_unwind_init:
+; X8664: pushq %rbp
+; X8664: pushq %r15
+; X8664: pushq %r14
+; X8664: pushq %r13
+; X8664: pushq %r12
+; X8664: pushq %rbx
+; X8664: popq %rbx
+; X8664: popq %r12
+; X8664: popq %r13
+; X8664: popq %r14
+; X8664: popq %r15
+
+; X8632-LABEL: calls_unwind_init:
+; X8632: pushl %ebp
+; X8632: pushl %ebx
+; X8632: pushl %edi
+; X8632: pushl %esi
+; X8632: popl %esi
+; X8632: popl %edi
+; X8632: popl %ebx
+; X8632: popl %ebp
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
index a0448ecee4fa..fd57f5ca8d2d 100644
--- a/test/CodeGen/X86/use-add-flags.ll
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -6,7 +6,7 @@
 
 ; Use the flags on the add.
 
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ;     CHECK: addl
 ; CHECK-NOT: test
 ;     CHECK: cmovnsl
@@ -25,7 +25,7 @@ declare void @foo(i32)
 ; Don't use the flags result of the and here, since the and has no
 ; other use. A simple test is better.
 
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: testb   $16, {{%dil|%cl}}
 
 define void @test2(i32 %x) nounwind {
@@ -41,7 +41,7 @@ false:
 
 ; Do use the flags result of the and here, since the and has another use.
 
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ;      CHECK: andl    $16, %e
 ; CHECK-NEXT: jne
 
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index 8655c6c8ea54..fca4da66a85e 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
-; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
+; CHECK: divss
 
 %vec = type <9 x float>
 define %vec @vecdiv( %vec %p1, %vec %p2)
@@ -9,4 +9,3 @@ define %vec @vecdiv( %vec %p1, %vec %p2)
   %result = fdiv %vec %p1, %p2
   ret %vec %result
 }
-
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index 569586af4983..334211132f14 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
 
 %vec = type <6 x float>
+; CHECK: divps
 ; CHECK: divss
 ; CHECK: divss
-; CHECK: divps
 
 ; Scheduler causes a different instruction order to be produced on Intel Atom
 ; ATOM: divps
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index ba5483329169..f2bebf57d4dc 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -10,20 +10,20 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
 
   store float %c, float* %P2
   ret void
-; X64: test1:
+; X64-LABEL: test1:
 ; X64-NEXT: pshufd	$1, %xmm0, %xmm1
 ; X64-NEXT: addss	%xmm0, %xmm1
 ; X64-NEXT: movss	%xmm1, (%rdi)
 ; X64-NEXT: ret
 
-; W64: test1:
+; W64-LABEL: test1:
 ; W64-NEXT: movdqa  (%rcx), %xmm0
 ; W64-NEXT: pshufd  $1, %xmm0, %xmm1
 ; W64-NEXT: addss   %xmm0, %xmm1
 ; W64-NEXT: movss   %xmm1, (%rdx)
 ; W64-NEXT: ret
 
-; X32: test1:
+; X32-LABEL: test1:
 ; X32-NEXT: pshufd	$1, %xmm0, %xmm1
 ; X32-NEXT: addss	%xmm0, %xmm1
 ; X32-NEXT: movl	4(%esp), %eax
@@ -36,16 +36,16 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
   %Z = fadd <2 x float> %Q, %R
   ret <2 x float> %Z
   
-; X64: test2:
+; X64-LABEL: test2:
 ; X64-NEXT: addps	%xmm1, %xmm0
 ; X64-NEXT: ret
 
-; W64: test2:
+; W64-LABEL: test2:
 ; W64-NEXT: movaps  (%rcx), %xmm0
 ; W64-NEXT: addps   (%rdx), %xmm0
 ; W64-NEXT: ret
 
-; X32: test2:
+; X32-LABEL: test2:
 ; X32:      addps	%xmm1, %xmm0
 }
 
@@ -54,16 +54,16 @@ define <2 x float> @test3(<4 x float> %A) nounwind {
 	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 	%C = fadd <2 x float> %B, %B
 	ret <2 x float> %C
-; X64: test3:
+; X64-LABEL: test3:
 ; X64-NEXT: addps	%xmm0, %xmm0
 ; X64-NEXT: ret
 
-; W64: test3:
+; W64-LABEL: test3:
 ; W64-NEXT: movaps  (%rcx), %xmm0
 ; W64-NEXT: addps   %xmm0, %xmm0
 ; W64-NEXT: ret
 
-; X32: test3:
+; X32-LABEL: test3:
 ; X32-NEXT: addps	%xmm0, %xmm0
 ; X32-NEXT: ret
 }
@@ -71,16 +71,16 @@ define <2 x float> @test3(<4 x float> %A) nounwind {
 define <2 x float> @test4(<2 x float> %A) nounwind {
 	%C = fadd <2 x float> %A, %A
 	ret <2 x float> %C
-; X64: test4:
+; X64-LABEL: test4:
 ; X64-NEXT: addps	%xmm0, %xmm0
 ; X64-NEXT: ret
 
-; W64: test4:
+; W64-LABEL: test4:
 ; W64-NEXT: movaps  (%rcx), %xmm0
 ; W64-NEXT: addps   %xmm0, %xmm0
 ; W64-NEXT: ret
 
-; X32: test4:
+; X32-LABEL: test4:
 ; X32-NEXT: addps	%xmm0, %xmm0
 ; X32-NEXT: ret
 }
@@ -95,18 +95,18 @@ BB:
 	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 	ret <4 x float> %E
         
-; X64: test5:
+; X64-LABEL: test5:
 ; X64-NEXT: addps	%xmm0, %xmm0
 ; X64-NEXT: addps	%xmm0, %xmm0
 ; X64-NEXT: ret
 
-; W64: test5:
+; W64-LABEL: test5:
 ; W64-NEXT: movaps  (%rcx), %xmm0
 ; W64-NEXT: addps   %xmm0, %xmm0
 ; W64-NEXT: addps   %xmm0, %xmm0
 ; W64-NEXT: ret
 
-; X32: test5:
+; X32-LABEL: test5:
 ; X32-NEXT: addps	%xmm0, %xmm0
 ; X32-NEXT: addps	%xmm0, %xmm0
 ; X32-NEXT: ret
diff --git a/test/CodeGen/X86/v4i32load-crash.ll b/test/CodeGen/X86/v4i32load-crash.ll
new file mode 100644
index 000000000000..052c4c3c61b8
--- /dev/null
+++ b/test/CodeGen/X86/v4i32load-crash.ll
@@ -0,0 +1,27 @@
+; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s
+
+;PR18045:
+;Issue of selection for 'v4i32 load'.
+;This instruction is not legal for X86 CPUs with sse < 'sse4.1'.
+;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads
+;static function after legilize stage.
+
+@e = external global [4 x i32], align 4
+@f = external global [4 x i32], align 4
+
+; Function Attrs: nounwind
+define void @fn3(i32 %el) {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
+  %4 = insertelement <4 x i32> undef, i32 %0, i32 0
+  %5 = insertelement <4 x i32> %4, i32 %1, i32 1
+  %6 = insertelement <4 x i32> %5, i32 %2, i32 2
+  %7 = insertelement <4 x i32> %6, i32 %3, i32 3
+  %8 = add <4 x i32> %6, %7
+  store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*)
+  ret void
+}
+
diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll
index 8cbfb5d7243a..5da6e9636ebe 100644
--- a/test/CodeGen/X86/v8i1-masks.ll
+++ b/test/CodeGen/X86/v8i1-masks.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
 
-;CHECK: and_masks
+;CHECK-LABEL: and_masks:
 ;CHECK: vmovaps
 ;CHECK: vcmpltp
 ;CHECK: vcmpltp
diff --git a/test/CodeGen/X86/vec-sign.ll b/test/CodeGen/X86/vec-sign.ll
index 31b9c2eb4c77..b3d85fd6ec7b 100644
--- a/test/CodeGen/X86/vec-sign.ll
+++ b/test/CodeGen/X86/vec-sign.ll
@@ -2,7 +2,7 @@
 
 define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
 entry:
-; CHECK: signd:
+; CHECK-LABEL: signd:
 ; CHECK: psignd
 ; CHECK-NOT: sub
 ; CHECK: ret
@@ -17,7 +17,7 @@ entry:
 
 define <4 x i32> @blendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind {
 entry:
-; CHECK: blendvb:
+; CHECK-LABEL: blendvb:
 ; CHECK: pblendvb
 ; CHECK: ret
   %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 08eb16f6313b..5f6e7a853a33 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
-;CHECK: foo1_8
+;CHECK-LABEL: foo1_8:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
 define <8 x float> @foo1_8(<8 x i8> %src) {
@@ -8,7 +8,7 @@ define <8 x float> @foo1_8(<8 x i8> %src) {
   ret <8 x float> %res
 }
 
-;CHECK: foo1_4
+;CHECK-LABEL: foo1_4:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
 define <4 x float> @foo1_4(<4 x i8> %src) {
@@ -16,7 +16,7 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
   ret <4 x float> %res
 }
 
-;CHECK: foo2_8
+;CHECK-LABEL: foo2_8:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
 define <8 x float> @foo2_8(<8 x i8> %src) {
@@ -24,7 +24,7 @@ define <8 x float> @foo2_8(<8 x i8> %src) {
   ret <8 x float> %res
 }
 
-;CHECK: foo2_4
+;CHECK-LABEL: foo2_4:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
 define <4 x float> @foo2_4(<4 x i8> %src) {
@@ -32,14 +32,14 @@ define <4 x float> @foo2_4(<4 x i8> %src) {
   ret <4 x float> %res
 }
 
-;CHECK: foo3_8
+;CHECK-LABEL: foo3_8:
 ;CHECK: vcvttps2dq
 ;CHECK: ret
 define <8 x i8> @foo3_8(<8 x float> %src) {
   %res = fptosi <8 x float> %src to <8 x i8>
   ret <8 x i8> %res
 }
-;CHECK: foo3_4
+;CHECK-LABEL: foo3_4:
 ;CHECK: vcvttps2dq
 ;CHECK: ret
 define <4 x i8> @foo3_4(<4 x float> %src) {
diff --git a/test/CodeGen/X86/vec_compare-sse4.ll b/test/CodeGen/X86/vec_compare-sse4.ll
index b4a4a4cfa7af..084d61134206 100644
--- a/test/CodeGen/X86/vec_compare-sse4.ll
+++ b/test/CodeGen/X86/vec_compare-sse4.ll
@@ -1,15 +1,15 @@
 ; RUN: llc < %s -march=x86 -mattr=-sse3,+sse2 | FileCheck %s -check-prefix=SSE2
-; RUN: llc < %s -march=x86 -mattr=-sse42,+sse41 | FileCheck %s -check-prefix=SSE41
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -check-prefix=SSE42
+; RUN: llc < %s -march=x86 -mattr=-sse4.2,+sse4.1 | FileCheck %s -check-prefix=SSE41
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s -check-prefix=SSE42
 
 define <2 x i64> @test1(<2 x i64> %A, <2 x i64> %B) nounwind {
-; SSE42: test1:
+; SSE42-LABEL: test1:
 ; SSE42: pcmpgtq
 ; SSE42: ret
-; SSE41: test1:
+; SSE41-LABEL: test1:
 ; SSE41-NOT: pcmpgtq
 ; SSE41: ret
-; SSE2: test1:
+; SSE2-LABEL: test1:
 ; SSE2-NOT: pcmpgtq
 ; SSE2: ret
 
@@ -19,13 +19,13 @@ define <2 x i64> @test1(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test2(<2 x i64> %A, <2 x i64> %B) nounwind {
-; SSE42: test2:
+; SSE42-LABEL: test2:
 ; SSE42: pcmpeqq
 ; SSE42: ret
-; SSE41: test2:
+; SSE41-LABEL: test2:
 ; SSE41: pcmpeqq
 ; SSE41: ret
-; SSE2: test2:
+; SSE2-LABEL: test2:
 ; SSE2-NOT: pcmpeqq
 ; SSE2: ret
 
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index fd5c234bb160..365fe92220b5 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -2,7 +2,7 @@
 
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: pcmpgtd
 ; CHECK: ret
 
@@ -12,7 +12,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 }
 
 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: pcmp
 ; CHECK: pcmp
 ; CHECK: pxor
@@ -23,7 +23,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
 }
 
 define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: pcmpgtd
 ; CHECK: movdqa
 ; CHECK: ret
@@ -33,7 +33,7 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
 }
 
 define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: movdqa
 ; CHECK: pcmpgtd
 ; CHECK: ret
@@ -43,7 +43,7 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
 }
 
 define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: pcmpeqd
 ; CHECK: pshufd $-79
 ; CHECK: pand
@@ -54,7 +54,7 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test6:
+; CHECK-LABEL: test6:
 ; CHECK: pcmpeqd
 ; CHECK: pshufd $-79
 ; CHECK: pand
@@ -72,7 +72,7 @@ define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT: .long	0
 ; CHECK-NEXT: .long	2147483648
 ; CHECK-NEXT: .long	0
-; CHECK: test7:
+; CHECK-LABEL: test7:
 ; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]]
 ; CHECK: pxor [[CONSTREG]]
 ; CHECK: pxor [[CONSTREG]]
@@ -90,7 +90,7 @@ define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test8:
+; CHECK-LABEL: test8:
 ; CHECK: pxor
 ; CHECK: pxor
 ; CHECK: pcmpgtd %xmm0
@@ -107,7 +107,7 @@ define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test9:
+; CHECK-LABEL: test9:
 ; CHECK: pxor
 ; CHECK: pxor
 ; CHECK: pcmpgtd %xmm0
@@ -126,7 +126,7 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test10:
+; CHECK-LABEL: test10:
 ; CHECK: pxor
 ; CHECK: pxor
 ; CHECK: pcmpgtd %xmm1
@@ -150,7 +150,7 @@ define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind {
 ; CHECK-NEXT: .long	2147483648
 ; CHECK-NEXT: .long	2147483648
 ; CHECK-NEXT: .long	2147483648
-; CHECK: test11:
+; CHECK-LABEL: test11:
 ; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]]
 ; CHECK: pxor [[CONSTREG]]
 ; CHECK: pxor [[CONSTREG]]
@@ -168,7 +168,7 @@ define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test12(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test12:
+; CHECK-LABEL: test12:
 ; CHECK: pxor
 ; CHECK: pxor
 ; CHECK: pcmpgtd %xmm0
@@ -185,7 +185,7 @@ define <2 x i64> @test12(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test13:
+; CHECK-LABEL: test13:
 ; CHECK: pxor
 ; CHECK: pxor
 ; CHECK: pcmpgtd %xmm0
@@ -204,7 +204,7 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
 }
 
 define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK: test14:
+; CHECK-LABEL: test14:
 ; CHECK: pxor
 ; CHECK: pxor
 ; CHECK: pcmpgtd %xmm1
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index 42d7f27f7d60..3cb519adf4f8 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 -o %t
 ; RUN: not grep extractps   %t
 ; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index 2c8796bc4ff7..88f5a585b9fd 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse4.1 -o %t
 ; RUN: grep movss    %t | count 4
 ; RUN: grep movhlps  %t | count 1
 ; RUN: not grep pshufd   %t 
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index 863712ff48b3..7ec07ae0f959 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1,-avx | FileCheck %s
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
 
 ; PR11674
diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll
index dee91fd01468..fe20a474f59a 100644
--- a/test/CodeGen/X86/vec_insert-2.ll
+++ b/test/CodeGen/X86/vec_insert-2.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X32 %s
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | FileCheck --check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | FileCheck --check-prefix=X64 %s
 
 define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
-; X32: t1:
+; X32-LABEL: t1:
 ; X32: shufps $36
 ; X32: ret
 
@@ -11,7 +11,7 @@ define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
 }
 
 define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) nounwind {
-; X32: t2:
+; X32-LABEL: t2:
 ; X32: shufps $36
 ; X32: ret
 
@@ -20,11 +20,11 @@ define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) nounwind {
 }
 
 define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
-; X32: t3:
+; X32-LABEL: t3:
 ; X32: movhpd
 ; X32: ret
 
-; X64: t3:
+; X64-LABEL: t3:
 ; X64: unpcklpd
 ; X64: ret
 
@@ -33,7 +33,7 @@ define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
 }
 
 define <8 x i16> @t4(i16 %s, <8 x i16> %tmp) nounwind {
-; X32: t4:
+; X32-LABEL: t4:
 ; X32: pinsrw
 ; X32: ret
 
diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll
index a18cd86489cc..a8713398e955 100644
--- a/test/CodeGen/X86/vec_insert-3.ll
+++ b/test/CodeGen/X86/vec_insert-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | grep punpcklqdq | count 1
 
 define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
         %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 471cc1611fce..5cb9f694bd61 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,8 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep shll %t | grep 12
-; RUN: grep pslldq %t | grep 12
-; RUN: grep psrldq %t | grep 8
-; RUN: grep psrldq %t | grep 12
+; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s
 ; There are no MMX operations in @t1
 
 define void  @t1(i32 %a, x86_mmx* %P) nounwind {
@@ -12,22 +8,60 @@ define void  @t1(i32 %a, x86_mmx* %P) nounwind {
        %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
        store x86_mmx %tmp23, x86_mmx* %P
        ret void
+
+; CHECK-LABEL: t1:
+; CHECK-NOT: %mm
+; CHECK: shll $12
+; CHECK-NOT: %mm
 }
 
 define <4 x float> @t2(<4 x float>* %P) nounwind {
         %tmp1 = load <4 x float>* %P
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
         ret <4 x float> %tmp2
+
+; CHECK-LABEL: t2:
+; CHECK: pslldq $12
 }
 
 define <4 x float> @t3(<4 x float>* %P) nounwind {
         %tmp1 = load <4 x float>* %P
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
         ret <4 x float> %tmp2
+
+; CHECK-LABEL: t3:
+; CHECK: psrldq $8
 }
 
 define <4 x float> @t4(<4 x float>* %P) nounwind {
         %tmp1 = load <4 x float>* %P
         %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
         ret <4 x float> %tmp2
+
+; CHECK-LABEL: t4:
+; CHECK: psrldq $12
+}
+
+define <16 x i8> @t5(<16 x i8> %x) nounwind {
+        %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
+        ret <16 x i8> %s
+
+; CHECK-LABEL: t5:
+; CHECK: psrldq $1
+}
+
+define <16 x i8> @t6(<16 x i8> %x) nounwind {
+        %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+        ret <16 x i8> %s
+
+; CHECK-LABEL: t6:
+; CHECK: palignr $1
+}
+
+define <16 x i8> @t7(<16 x i8> %x) nounwind {
+        %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
+        ret <16 x i8> %s
+
+; CHECK-LABEL: t7:
+; CHECK: pslldq $13
 }
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 268b5c4bf972..6d4f8287cab6 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse4.2 -mtriple=i686-apple-darwin9 | FileCheck %s
 ; MMX insertelement is not available; these are promoted to XMM.
 ; (Without SSE they are split to two ints, and the code is much better.)
 
diff --git a/test/CodeGen/X86/vec_insert-8.ll b/test/CodeGen/X86/vec_insert-8.ll
index 650951cc9e5e..917832c40adb 100644
--- a/test/CodeGen/X86/vec_insert-8.ll
+++ b/test/CodeGen/X86/vec_insert-8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
 
 ; tests variable insert and extract of a 4 x i32
 
diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll
index e5a7ccc5ef94..5f2e676ef1ae 100644
--- a/test/CodeGen/X86/vec_insert-9.ll
+++ b/test/CodeGen/X86/vec_insert-9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 > %t
 ; RUN: grep pinsrd %t | count 1
 
 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
index 4e5d445ff623..0ed8f1052366 100644
--- a/test/CodeGen/X86/vec_insert.ll
+++ b/test/CodeGen/X86/vec_insert.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | not grep pinsrw
 
 define void @test(<4 x float>* %F, i32 %I) nounwind {
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_round.ll b/test/CodeGen/X86/vec_round.ll
new file mode 100644
index 000000000000..baa2f58631d4
--- /dev/null
+++ b/test/CodeGen/X86/vec_round.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=nehalem -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @use(<2 x double>)
+
+; CHECK-LABEL: @test
+; CHECK callq round
+
+; Function Attrs: nounwind uwtable
+define void @test() {
+entry:
+  %tmp = call <2 x double> @llvm.round.v2f64(<2 x double> undef)
+  call void @use(<2 x double> %tmp)
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
+
+attributes #0 = { nounwind readonly }
+
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll
index 349868a87f53..56855d3c44eb 100644
--- a/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -70,3 +70,11 @@ entry:
   %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
   ret <16 x i16> %a0
 }
+
+; CHECK: sdiv_non_splat
+; CHECK: idivl
+; CHECK: ret
+define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
+  %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
+  ret <4 x i32> %y
+}
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index 66056d0add9c..41061ae7ac23 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=-avx | FileCheck %s
 ; CHECK-NOT: movsd
 ; CHECK: movd {{%rdi|%rcx}}, %xmm0
 ; CHECK-NOT: movsd
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 6979f6bb1c26..a73909097c11 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=-avx,-pad-short-functions | FileCheck %s
 
 ; CHECK: test3
 ; CHECK: movd
diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll
index 133f23b42d90..052da30a6bb8 100644
--- a/test/CodeGen/X86/vec_set-C.ll
+++ b/test/CodeGen/X86/vec_set-C.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 | grep movq
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 | grep mov | count 1
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2,-avx | grep movq
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2,-avx | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux -mattr=+sse2,-avx | grep movd
 
 define <2 x i64> @t1(i64 %x) nounwind  {
 	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 7f5f8dd213a5..53d880b4bbdd 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep punpckl | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | grep punpckl | count 7
 
 define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
         %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_setcc.ll b/test/CodeGen/X86/vec_setcc.ll
new file mode 100644
index 000000000000..fc8a56de7917
--- /dev/null
+++ b/test/CodeGen/X86/vec_setcc.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse4.1 | FileCheck %s -check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx | FileCheck %s -check-prefix=AVX
+
+define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
+  %1 = icmp uge <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+; SSE2-LABEL: v16i8_icmp_uge:
+; SSE2: pmaxub  %xmm0, %xmm1
+; SSE2: pcmpeqb %xmm1, %xmm0
+
+; SSE41-LABEL: v16i8_icmp_uge:
+; SSE41: pmaxub  %xmm0, %xmm1
+; SSE41: pcmpeqb %xmm1, %xmm0
+
+; AVX-LABEL: v16i8_icmp_uge:
+; AVX: vpmaxub  %xmm1, %xmm0, %xmm1
+; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
+}
+
+define <16 x i8> @v16i8_icmp_ule(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
+  %1 = icmp ule <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+; SSE2-LABEL: v16i8_icmp_ule:
+; SSE2: pminub  %xmm0, %xmm1
+; SSE2: pcmpeqb %xmm1, %xmm0
+
+; SSE41-LABEL: v16i8_icmp_ule:
+; SSE41: pminub  %xmm0, %xmm1
+; SSE41: pcmpeqb %xmm1, %xmm0
+
+; AVX-LABEL: v16i8_icmp_ule:
+; AVX: vpminub  %xmm1, %xmm0, %xmm1
+; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
+}
+
+
+define <8 x i16> @v8i16_icmp_uge(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
+  %1 = icmp uge <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+; SSE2-LABEL: v8i16_icmp_uge:
+; SSE2: movdqa  {{.*}}(%rip), %xmm2
+; SEE2: pxor    %xmm2, %xmm0
+; SSE2: pxor    %xmm1, %xmm2
+; SSE2: pcmpgtw %xmm0, %xmm2
+; SSE2: pcmpeqd %xmm0, %xmm0
+; SSE2: pxor    %xmm2, %xmm0
+
+; SSE41-LABEL: v8i16_icmp_uge:
+; SSE41: pmaxuw  %xmm0, %xmm1
+; SSE41: pcmpeqw %xmm1, %xmm0
+
+; AVX-LABEL: v8i16_icmp_uge:
+; AVX: vpmaxuw  %xmm1, %xmm0, %xmm1
+; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
+}
+
+define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
+  %1 = icmp ule <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+; SSE2-LABEL: v8i16_icmp_ule:
+; SSE2: movdqa  {{.*}}(%rip), %xmm2
+; SSE2: pxor    %xmm2, %xmm1
+; SSE2: pxor    %xmm2, %xmm0
+; SSE2: pcmpgtw %xmm1, %xmm0
+; SSE2: pcmpeqd %xmm1, %xmm1
+; SSE2: pxor    %xmm0, %xmm1
+; SSE2: movdqa  %xmm1, %xmm0
+
+; SSE41-LABEL: v8i16_icmp_ule:
+; SSE41: pminuw  %xmm0, %xmm1
+; SSE41: pcmpeqw %xmm1, %xmm0
+
+; AVX-LABEL: v8i16_icmp_ule:
+; AVX: vpminuw  %xmm1, %xmm0, %xmm1
+; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
+}
+
+
+define <4 x i32> @v4i32_icmp_uge(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
+  %1 = icmp uge <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+; SSE2-LABEL: v4i32_icmp_uge:
+; SSE2: movdqa  {{.*}}(%rip), %xmm2
+; SSE2: pxor    %xmm2, %xmm0
+; SSE2: pxor    %xmm1, %xmm2
+; SSE2: pcmpgtd %xmm0, %xmm2
+; SSE2: pcmpeqd %xmm0, %xmm0
+; SSE2: pxor    %xmm2, %xmm0
+
+; SSE41-LABEL: v4i32_icmp_uge:
+; SSE41: pmaxud  %xmm0, %xmm1
+; SSE41: pcmpeqd %xmm1, %xmm0
+
+; AVX-LABEL: v4i32_icmp_uge:
+; AVX: vpmaxud  %xmm1, %xmm0, %xmm1
+; AVX: vpcmpeqd %xmm1, %xmm0, %xmm0
+}
+
+define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
+  %1 = icmp ule <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+; SSE2-LABEL: v4i32_icmp_ule:
+; SSE2: movdqa  {{.*}}(%rip), %xmm2
+; SSE2: pxor    %xmm2, %xmm1
+; SSE2: pxor    %xmm2, %xmm0
+; SSE2: pcmpgtd %xmm1, %xmm0
+; SSE2: pcmpeqd %xmm1, %xmm1
+; SSE2: pxor    %xmm0, %xmm1
+; SSE2: movdqa  %xmm1, %xmm0
+
+; SSE41-LABEL: v4i32_icmp_ule:
+; SSE41: pminud  %xmm0, %xmm1
+; SSE41: pcmpeqd %xmm1, %xmm0
+
+; AVX-LABEL: v4i32_icmp_ule:
+; AVX: pminud  %xmm1, %xmm0, %xmm1
+; AVX: pcmpeqd %xmm1, %xmm0, %xmm0
+}
+
+; At one point we were incorrectly constant-folding a setcc to 0x1 instead of
+; 0xff, leading to a constpool load. The instruction doesn't matter here, but it
+; should set all bits to 1.
+define <16 x i8> @test_setcc_constfold_vi8(<16 x i8> %l, <16 x i8> %r) {
+  %test1 = icmp eq <16 x i8> %l, %r
+  %mask1 = sext <16 x i1> %test1 to <16 x i8>
+
+  %test2 = icmp ne <16 x i8> %l, %r
+  %mask2 = sext <16 x i1> %test2 to <16 x i8>
+
+  %res = or <16 x i8> %mask1, %mask2
+  ret <16 x i8> %res
+; SSE2-LABEL: test_setcc_constfold_vi8:
+; SSE2: pcmpeqd %xmm0, %xmm0
+
+; SSE41-LABEL: test_setcc_constfold_vi8:
+; SSE41: pcmpeqd %xmm0, %xmm0
+
+; AVX-LABEL: test_setcc_constfold_vi8:
+; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
+}
+
+; Make sure sensible results come from doing extension afterwards
+define <16 x i8> @test_setcc_constfold_vi1(<16 x i8> %l, <16 x i8> %r) {
+  %test1 = icmp eq <16 x i8> %l, %r
+  %test2 = icmp ne <16 x i8> %l, %r
+
+  %res = or <16 x i1> %test1, %test2
+  %mask = sext <16 x i1> %res to <16 x i8>
+  ret <16 x i8> %mask
+; SSE2-LABEL: test_setcc_constfold_vi1:
+; SSE2: pcmpeqd %xmm0, %xmm0
+
+; SSE41-LABEL: test_setcc_constfold_vi1:
+; SSE41: pcmpeqd %xmm0, %xmm0
+
+; AVX-LABEL: test_setcc_constfold_vi1:
+; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
+}
+
+
+; 64-bit case is also particularly important, as the constant "-1" is probably
+; just 32-bits wide.
+define <2 x i64> @test_setcc_constfold_vi64(<2 x i64> %l, <2 x i64> %r) {
+  %test1 = icmp eq <2 x i64> %l, %r
+  %mask1 = sext <2 x i1> %test1 to <2 x i64>
+
+  %test2 = icmp ne <2 x i64> %l, %r
+  %mask2 = sext <2 x i1> %test2 to <2 x i64>
+
+  %res = or <2 x i64> %mask1, %mask2
+  ret <2 x i64> %res
+; SSE2-LABEL: test_setcc_constfold_vi64:
+; SSE2: pcmpeqd %xmm0, %xmm0
+
+; SSE41-LABEL: test_setcc_constfold_vi64:
+; SSE41: pcmpeqd %xmm0, %xmm0
+
+; AVX-LABEL: test_setcc_constfold_vi64:
+; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
+}
diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
index 9ef7fbdb0c50..e2fe45cf9724 100644
--- a/test/CodeGen/X86/vec_shift4.ll
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
 
 define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
index f0cfc44ab19a..8f2519728b77 100644
--- a/test/CodeGen/X86/vec_shuffle-14.ll
+++ b/test/CodeGen/X86/vec_shuffle-14.ll
@@ -1,14 +1,17 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-avx | FileCheck %s -check-prefix=X86-64
 
 define <4 x i32> @t1(i32 %a) nounwind  {
 entry:
         %tmp = insertelement <4 x i32> undef, i32 %a, i32 0
 	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> < i32 4, i32 1, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp6
+
+; X86-32-LABEL: t1:
+; X86-32: movd	4(%esp), %xmm0
+
+; X86-64-LABEL: t1:
+; X86-64: movd	%e{{..}}, %xmm0
 }
 
 define <2 x i64> @t2(i64 %a) nounwind  {
@@ -16,6 +19,12 @@ entry:
         %tmp = insertelement <2 x i64> undef, i64 %a, i32 0
 	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %tmp, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
 	ret <2 x i64> %tmp6
+
+; X86-32-LABEL: t2:
+; X86-32: movq	4(%esp), %xmm0
+
+; X86-64-LABEL: t2:
+; X86-64: movd	%r{{..}}, %xmm0
 }
 
 define <2 x i64> @t3(<2 x i64>* %a) nounwind  {
@@ -25,6 +34,13 @@ entry:
 	%tmp7 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp6, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
 	%tmp8 = bitcast <4 x i32> %tmp7 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	ret <2 x i64> %tmp8
+
+; X86-32-LABEL: t3:
+; X86-32: movl	4(%esp)
+; X86-32: movq
+
+; X86-64-LABEL: t3:
+; X86-64: movq	({{.*}}), %xmm0
 }
 
 define <2 x i64> @t4(<2 x i64> %a) nounwind  {
@@ -33,10 +49,22 @@ entry:
 	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp5, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
 	%tmp7 = bitcast <4 x i32> %tmp6 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	ret <2 x i64> %tmp7
+
+; X86-32-LABEL: t4:
+; X86-32: movq %xmm0, %xmm0
+
+; X86-64-LABEL: t4:
+; X86-64: movq {{.*}}, %xmm0
 }
 
 define <2 x i64> @t5(<2 x i64> %a) nounwind  {
 entry:
 	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
 	ret <2 x i64> %tmp6
+
+; X86-32-LABEL: t5:
+; X86-32: movq %xmm0, %xmm0
+
+; X86-64-LABEL: t5:
+; X86-64: movq {{.*}}, %xmm0
 }
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
index 09d4c1a64a01..9aeb94289c87 100644
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ b/test/CodeGen/X86/vec_shuffle-16.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
 ; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
 
-; sse:  t1:
-; sse2: t1:
+; sse-LABEL:  t1:
+; sse2-LABEL: t1:
 define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
 ; sse: shufps
 ; sse2: pshufd
@@ -11,8 +11,8 @@ define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
         ret <4 x float> %tmp1
 }
 
-; sse:  t2:
-; sse2: t2:
+; sse-LABEL:  t2:
+; sse2-LABEL: t2:
 define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
 ; sse: shufps
 ; sse2: pshufd
@@ -21,8 +21,8 @@ define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
 	ret <4 x float> %tmp
 }
 
-; sse:  t3:
-; sse2: t3:
+; sse-LABEL:  t3:
+; sse2-LABEL: t3:
 define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
 ; sse: shufps
 ; sse2: pshufd
@@ -31,8 +31,8 @@ define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
 	ret <4 x float> %tmp
 }
 
-; sse:  t4:
-; sse2: t4:
+; sse-LABEL:  t4:
+; sse2-LABEL: t4:
 define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
 
 ; sse: shufps
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index ebc8c5b34a90..f2f96ba94af1 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=-avx | FileCheck %s
 ; CHECK-NOT: xor
 ; CHECK: movd {{%rdi|%rcx}}, %xmm0
 ; CHECK-NOT: xor
diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
index d9b2388809aa..3f42a132ef2b 100644
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ b/test/CodeGen/X86/vec_shuffle-25.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: llc < %s -march=x86 -mattr=sse4.1 -o %t
 ; RUN: grep unpcklps %t | count 3
 ; RUN: grep unpckhps %t | count 1
  
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
index 4c56f848dedb..00e8e73e184e 100644
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ b/test/CodeGen/X86/vec_shuffle-26.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse4.1 | FileCheck %s
 ; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 ; Transpose example using the more generic vector shuffle. Return float8
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
index 0aff822850c0..c9b2fb51d78f 100644
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ b/test/CodeGen/X86/vec_shuffle-27.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
 
 ; ModuleID = 'vec_shuffle-27.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
@@ -7,10 +7,10 @@ target triple = "i686-apple-cl.1.0"
 define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
 entry:
 ; CHECK: subps
-; CHECK: mulps
-; CHECK: addps
 ; CHECK: subps
 ; CHECK: mulps
+; CHECK: mulps
+; CHECK: addps
 ; CHECK: addps
 	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
 	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
index 9a06015745ed..f1d0f939e60c 100644
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ b/test/CodeGen/X86/vec_shuffle-36.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
 
 define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 ; CHECK: pshufb
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
index ee8d2d5e0b3e..8fd9a5cd023e 100644
--- a/test/CodeGen/X86/vec_shuffle-39.ll
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -3,7 +3,7 @@
 
 define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: movlps (%rdi), %xmm0
 ; CHECK: ret
   %p.val = load <1 x i64>* %p, align 1
@@ -15,7 +15,7 @@ entry:
 
 define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
 entry:
-; CHECK: t1a:
+; CHECK-LABEL: t1a:
 ; CHECK: movlps (%rdi), %xmm0
 ; CHECK: ret
   %0 = bitcast <1 x i64>* %p to double*
@@ -28,7 +28,7 @@ entry:
 
 define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: movlps %xmm0, (%rdi)
 ; CHECK: ret
   %cast.i = bitcast <4 x float> %a to <2 x i64>
@@ -40,7 +40,7 @@ entry:
 
 define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
 entry:
-; CHECK: t2a:
+; CHECK-LABEL: t2a:
 ; CHECK: movlps %xmm0, (%rdi)
 ; CHECK: ret
   %0 = bitcast <1 x i64>* %p to double*
@@ -53,9 +53,9 @@ entry:
 ; rdar://10436044
 define <2 x double> @t3() nounwind readonly {
 bb:
-; CHECK: t3:
-; CHECK: punpcklqdq %xmm1, %xmm0
+; CHECK-LABEL: t3:
 ; CHECK: movq (%rax), %xmm1
+; CHECK: punpcklqdq %xmm2, %xmm0
 ; CHECK: movsd %xmm1, %xmm0
   %tmp0 = load i128* null, align 1
   %tmp1 = load <2 x i32>* undef, align 8
@@ -71,10 +71,10 @@ bb:
 ; rdar://10450317
 define <2 x i64> @t4() nounwind readonly {
 bb:
-; CHECK: t4:
-; CHECK: punpcklqdq %xmm0, %xmm1
+; CHECK-LABEL: t4:
 ; CHECK: movq (%rax), %xmm0
-; CHECK: movsd %xmm1, %xmm0
+; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]]
+; CHECK: movsd %[[XMM]], %xmm0
   %tmp0 = load i128* null, align 1
   %tmp1 = load <2 x i32>* undef, align 8
   %tmp2 = bitcast i128 %tmp0 to <16 x i8>
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index 5c668b7e5a5b..9d82f97dca1c 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -24,7 +24,7 @@ define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P
 	ret void
 
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK-NOT: pshufd
 ; CHECK: punpcklbw
 ; CHECK: punpcklbw
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index cf0ecf40554d..754cbf41867d 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,11 +1,11 @@
-; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
+; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
 
 ; Splat test for v8i16
 define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_0:
+; CHECK-LABEL: shuf_8i16_0:
 ; CHECK: pshuflw $0
 }
 
@@ -13,7 +13,7 @@ define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_1:
+; CHECK-LABEL: shuf_8i16_1:
 ; CHECK: pshuflw $5
 }
 
@@ -21,7 +21,7 @@ define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_2:
+; CHECK-LABEL: shuf_8i16_2:
 ; CHECK: punpcklwd
 ; CHECK-NEXT: pshufd $-86
 }
@@ -30,7 +30,7 @@ define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_3:
+; CHECK-LABEL: shuf_8i16_3:
 ; CHECK: pshuflw $15
 }
 
@@ -38,7 +38,7 @@ define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_4:
+; CHECK-LABEL: shuf_8i16_4:
 ; CHECK: movhlps
 }
 
@@ -46,7 +46,7 @@ define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_5:
+; CHECK-LABEL: shuf_8i16_5:
 ; CHECK: punpckhwd
 ; CHECK-NEXT: pshufd $85
 }
@@ -55,7 +55,7 @@ define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_6:
+; CHECK-LABEL: shuf_8i16_6:
 ; CHECK: punpckhwd
 ; CHECK-NEXT: pshufd $-86
 }
@@ -64,7 +64,7 @@ define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
 
-; CHECK: shuf_8i16_7:
+; CHECK-LABEL: shuf_8i16_7:
 ; CHECK: punpckhwd
 ; CHECK-NEXT: pshufd $-1
 }
@@ -74,7 +74,7 @@ define <16 x i8> @shuf_16i8_8(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_8:
+; CHECK-LABEL: shuf_16i8_8:
 ; CHECK: punpcklbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $0
@@ -84,7 +84,7 @@ define <16 x i8> @shuf_16i8_9(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_9:
+; CHECK-LABEL: shuf_16i8_9:
 ; CHECK: punpcklbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $85
@@ -94,7 +94,7 @@ define <16 x i8> @shuf_16i8_10(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_10:
+; CHECK-LABEL: shuf_16i8_10:
 ; CHECK: punpcklbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $-86
@@ -104,7 +104,7 @@ define <16 x i8> @shuf_16i8_11(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_11:
+; CHECK-LABEL: shuf_16i8_11:
 ; CHECK: punpcklbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $-1
@@ -115,7 +115,7 @@ define <16 x i8> @shuf_16i8_12(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_12:
+; CHECK-LABEL: shuf_16i8_12:
 ; CHECK: pshufd $5
 }
 
@@ -123,7 +123,7 @@ define <16 x i8> @shuf_16i8_13(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_13:
+; CHECK-LABEL: shuf_16i8_13:
 ; CHECK: punpcklbw
 ; CHECK-NEXT: punpckhbw
 ; CHECK-NEXT: pshufd $85
@@ -133,7 +133,7 @@ define <16 x i8> @shuf_16i8_14(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_14:
+; CHECK-LABEL: shuf_16i8_14:
 ; CHECK: punpcklbw
 ; CHECK-NEXT: punpckhbw
 ; CHECK-NEXT: pshufd $-86
@@ -143,7 +143,7 @@ define <16 x i8> @shuf_16i8_15(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_15:
+; CHECK-LABEL: shuf_16i8_15:
 ; CHECK: punpcklbw
 ; CHECK-NEXT: punpckhbw
 ; CHECK-NEXT: pshufd $-1
@@ -153,7 +153,7 @@ define <16 x i8> @shuf_16i8_16(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_16:
+; CHECK-LABEL: shuf_16i8_16:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $0
@@ -163,7 +163,7 @@ define <16 x i8> @shuf_16i8_17(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_17:
+; CHECK-LABEL: shuf_16i8_17:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $85
@@ -173,7 +173,7 @@ define <16 x i8> @shuf_16i8_18(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_18:
+; CHECK-LABEL: shuf_16i8_18:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $-86
@@ -183,7 +183,7 @@ define <16 x i8> @shuf_16i8_19(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_19:
+; CHECK-LABEL: shuf_16i8_19:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpcklbw
 ; CHECK-NEXT: pshufd $-1
@@ -193,7 +193,7 @@ define <16 x i8> @shuf_16i8_20(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_20:
+; CHECK-LABEL: shuf_16i8_20:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpckhbw
 ; CHECK-NEXT: pshufd $0
@@ -203,7 +203,7 @@ define <16 x i8> @shuf_16i8_21(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_21:
+; CHECK-LABEL: shuf_16i8_21:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpckhbw
 ; CHECK-NEXT: pshufd $85
@@ -213,7 +213,7 @@ define <16 x i8> @shuf_16i8_22(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_22:
+; CHECK-LABEL: shuf_16i8_22:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpckhbw
 ; CHECK-NEXT: pshufd $-86
@@ -223,7 +223,7 @@ define <16 x i8> @shuf_16i8_23(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
 	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
 	ret <16 x i8> %tmp6
 
-; CHECK: shuf_16i8_23:
+; CHECK-LABEL: shuf_16i8_23:
 ; CHECK: punpckhbw
 ; CHECK-NEXT: punpckhbw
 ; CHECK-NEXT: pshufd $-1
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index deedee801967..543c96ef3d45 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -11,10 +11,10 @@ define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	store <4 x float> %tmp10, <4 x float>* %P
 	ret void
 
-; SSE2: test_v4sf:
+; SSE2-LABEL: test_v4sf:
 ; SSE2: pshufd $0
 
-; SSE3: test_v4sf:
+; SSE3-LABEL: test_v4sf:
 ; SSE3: pshufd $0
 }
 
@@ -26,9 +26,9 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 	store <2 x double> %tmp6, <2 x double>* %P
 	ret void
 
-; SSE2: test_v2sd:
+; SSE2-LABEL: test_v2sd:
 ; SSE2: shufpd $0
 
-; SSE3: test_v2sd:
+; SSE3-LABEL: test_v2sd:
 ; SSE3: movddup
 }
diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll
new file mode 100644
index 000000000000..f9e7c20ba4e2
--- /dev/null
+++ b/test/CodeGen/X86/vec_split.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
+; SSE4-LABEL: split16:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split16:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split16:
+; AVX2: vpminuw
+; AVX2: ret
+  %1 = icmp ult <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
+; SSE4-LABEL: split32:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split32:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split32:
+; AVX2: vpminuw
+; AVX2: vpminuw
+; AVX2: ret
+  %1 = icmp ult <32 x i16> %a, %b
+  %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %2
+}
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index c294df575a10..80f12a2dec2c 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
@@ -15,7 +15,7 @@ define i16 @test1(float %f) nounwind {
 	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
 	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
 	ret i16 %tmp69
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK: subss	LCPI0_
 ; CHECK: mulss	LCPI0_
 ; CHECK: minss	LCPI0_
@@ -30,7 +30,7 @@ define i16 @test2(float %f) nounwind {
 	%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
 	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	ret i16 %tmp69
-; CHECK: test2:
+; CHECK-LABEL: test2:
 ; CHECK: addss	LCPI1_
 ; CHECK: mulss	LCPI1_
 ; CHECK: minss	LCPI1_
@@ -55,7 +55,7 @@ define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
   %B = insertelement <4 x float> undef, float %a, i32 0
   %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
   ret <4 x float> %X
-; CHECK: test3:
+; CHECK-LABEL: test3:
 ; CHECK: roundss	$4, (%eax), %xmm0
 }
 
@@ -65,7 +65,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
   %q = call <4 x float> @f()
   %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)
   ret <4 x float> %X
-; CHECK: test4:
+; CHECK-LABEL: test4:
 ; CHECK: movss	(%eax), %xmm
 ; CHECK: call
 ; CHECK: roundss $4, %xmm{{.*}}, %xmm0
@@ -77,7 +77,7 @@ entry:
   %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
 4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
   ret <2 x double> %0
-; CHECK: test5:
+; CHECK-LABEL: test5:
 ; CHECK: mov
 ; CHECK: mov
 ; CHECK: cvtsi2sd
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll
index fe7fa2fe67d5..ee20f1fcbd04 100644
--- a/test/CodeGen/X86/vec_uint_to_fp.ll
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -2,7 +2,7 @@
 
 ; Test that we are not lowering uinttofp to scalars
 define <4 x float> @test1(<4 x i32> %A) nounwind {
-; CHECK: test1:
+; CHECK-LABEL: test1:
 ; CHECK-NOT: cvtsd2ss
 ; CHECK: ret
   %C = uitofp <4 x i32> %A to <4 x float>
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index ec93ce0761cc..b87d8447e543 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
 ; RUN: opt -instsimplify -disable-output < %s
 
-;CHECK: AGEP0:
+;CHECK-LABEL: AGEP0:
 define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
 entry:
   %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
@@ -16,7 +16,7 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP1:
+;CHECK-LABEL: AGEP1:
 define i32 @AGEP1(<4 x i32*> %param) nounwind {
 entry:
 ;CHECK: padd
@@ -27,7 +27,7 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP2:
+;CHECK-LABEL: AGEP2:
 define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
 entry:
 ;CHECK: pslld $2
@@ -39,7 +39,7 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP3:
+;CHECK-LABEL: AGEP3:
 define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
 entry:
 ;CHECK: pslld $2
@@ -51,7 +51,7 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP4:
+;CHECK-LABEL: AGEP4:
 define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
 entry:
 ; Multiply offset by two (add it to itself).
@@ -63,7 +63,7 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP5:
+;CHECK-LABEL: AGEP5:
 define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
 entry:
 ;CHECK: paddd
@@ -74,7 +74,7 @@ entry:
 
 
 ; The size of each element is 1 byte. No need to multiply by element size.
-;CHECK: AGEP6:
+;CHECK-LABEL: AGEP6:
 define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
 entry:
 ;CHECK-NOT: pslld
diff --git a/test/CodeGen/X86/vector-variable-idx2.ll b/test/CodeGen/X86/vector-variable-idx2.ll
index d47df90e7e64..6e8ae2e42c94 100644
--- a/test/CodeGen/X86/vector-variable-idx2.ll
+++ b/test/CodeGen/X86/vector-variable-idx2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0.0"
diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll
index f748a14836c8..0be00da83fdf 100644
--- a/test/CodeGen/X86/viabs.ll
+++ b/test/CodeGen/X86/viabs.ll
@@ -3,18 +3,18 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
 
 define <4 x i32> @test1(<4 x i32> %a) nounwind {
-; SSE2: test1:
+; SSE2-LABEL: test1:
 ; SSE2: movdqa
 ; SSE2: psrad $31
 ; SSE2-NEXT: padd
 ; SSE2-NEXT: pxor
 ; SSE2-NEXT: ret
 
-; SSSE3: test1:
+; SSSE3-LABEL: test1:
 ; SSSE3: pabsd
 ; SSSE3-NEXT: ret
 
-; AVX2: test1:
+; AVX2-LABEL: test1:
 ; AVX2: vpabsd
 ; AVX2-NEXT: ret
         %tmp1neg = sub <4 x i32> zeroinitializer, %a
@@ -24,18 +24,18 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind {
 }
 
 define <4 x i32> @test2(<4 x i32> %a) nounwind {
-; SSE2: test2:
+; SSE2-LABEL: test2:
 ; SSE2: movdqa
 ; SSE2: psrad $31
 ; SSE2-NEXT: padd
 ; SSE2-NEXT: pxor
 ; SSE2-NEXT: ret
 
-; SSSE3: test2:
+; SSSE3-LABEL: test2:
 ; SSSE3: pabsd
 ; SSSE3-NEXT: ret
 
-; AVX2: test2:
+; AVX2-LABEL: test2:
 ; AVX2: vpabsd
 ; AVX2-NEXT: ret
         %tmp1neg = sub <4 x i32> zeroinitializer, %a
@@ -45,18 +45,18 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind {
 }
 
 define <8 x i16> @test3(<8 x i16> %a) nounwind {
-; SSE2: test3:
+; SSE2-LABEL: test3:
 ; SSE2: movdqa
 ; SSE2: psraw $15
 ; SSE2-NEXT: padd
 ; SSE2-NEXT: pxor
 ; SSE2-NEXT: ret
 
-; SSSE3: test3:
+; SSSE3-LABEL: test3:
 ; SSSE3: pabsw
 ; SSSE3-NEXT: ret
 
-; AVX2: test3:
+; AVX2-LABEL: test3:
 ; AVX2: vpabsw
 ; AVX2-NEXT: ret
         %tmp1neg = sub <8 x i16> zeroinitializer, %a
@@ -66,18 +66,18 @@ define <8 x i16> @test3(<8 x i16> %a) nounwind {
 }
 
 define <16 x i8> @test4(<16 x i8> %a) nounwind {
-; SSE2: test4:
+; SSE2-LABEL: test4:
 ; SSE2: pxor
 ; SSE2: pcmpgtb
 ; SSE2-NEXT: padd
 ; SSE2-NEXT: pxor
 ; SSE2-NEXT: ret
 
-; SSSE3: test4:
+; SSSE3-LABEL: test4:
 ; SSSE3: pabsb
 ; SSSE3-NEXT: ret
 
-; AVX2: test4:
+; AVX2-LABEL: test4:
 ; AVX2: vpabsb
 ; AVX2-NEXT: ret
         %tmp1neg = sub <16 x i8> zeroinitializer, %a
@@ -87,18 +87,18 @@ define <16 x i8> @test4(<16 x i8> %a) nounwind {
 }
 
 define <4 x i32> @test5(<4 x i32> %a) nounwind {
-; SSE2: test5:
+; SSE2-LABEL: test5:
 ; SSE2: movdqa
 ; SSE2: psrad $31
 ; SSE2-NEXT: padd
 ; SSE2-NEXT: pxor
 ; SSE2-NEXT: ret
 
-; SSSE3: test5:
+; SSSE3-LABEL: test5:
 ; SSSE3: pabsd
 ; SSSE3-NEXT: ret
 
-; AVX2: test5:
+; AVX2-LABEL: test5:
 ; AVX2: vpabsd
 ; AVX2-NEXT: ret
         %tmp1neg = sub <4 x i32> zeroinitializer, %a
@@ -108,12 +108,12 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind {
 }
 
 define <8 x i32> @test6(<8 x i32> %a) nounwind {
-; SSSE3: test6:
+; SSSE3-LABEL: test6:
 ; SSSE3: pabsd
 ; SSSE3: pabsd
 ; SSSE3-NEXT: ret
 
-; AVX2: test6:
+; AVX2-LABEL: test6:
 ; AVX2: vpabsd {{.*}}%ymm
 ; AVX2-NEXT: ret
         %tmp1neg = sub <8 x i32> zeroinitializer, %a
@@ -123,12 +123,12 @@ define <8 x i32> @test6(<8 x i32> %a) nounwind {
 }
 
 define <8 x i32> @test7(<8 x i32> %a) nounwind {
-; SSSE3: test7:
+; SSSE3-LABEL: test7:
 ; SSSE3: pabsd
 ; SSSE3: pabsd
 ; SSSE3-NEXT: ret
 
-; AVX2: test7:
+; AVX2-LABEL: test7:
 ; AVX2: vpabsd {{.*}}%ymm
 ; AVX2-NEXT: ret
         %tmp1neg = sub <8 x i32> zeroinitializer, %a
@@ -138,12 +138,12 @@ define <8 x i32> @test7(<8 x i32> %a) nounwind {
 }
 
 define <16 x i16> @test8(<16 x i16> %a) nounwind {
-; SSSE3: test8:
+; SSSE3-LABEL: test8:
 ; SSSE3: pabsw
 ; SSSE3: pabsw
 ; SSSE3-NEXT: ret
 
-; AVX2: test8:
+; AVX2-LABEL: test8:
 ; AVX2: vpabsw {{.*}}%ymm
 ; AVX2-NEXT: ret
         %tmp1neg = sub <16 x i16> zeroinitializer, %a
@@ -153,12 +153,12 @@ define <16 x i16> @test8(<16 x i16> %a) nounwind {
 }
 
 define <32 x i8> @test9(<32 x i8> %a) nounwind {
-; SSSE3: test9:
+; SSSE3-LABEL: test9:
 ; SSSE3: pabsb
 ; SSSE3: pabsb
 ; SSSE3-NEXT: ret
 
-; AVX2: test9:
+; AVX2-LABEL: test9:
 ; AVX2: vpabsb {{.*}}%ymm
 ; AVX2-NEXT: ret
         %tmp1neg = sub <32 x i8> zeroinitializer, %a
@@ -168,12 +168,12 @@ define <32 x i8> @test9(<32 x i8> %a) nounwind {
 }
 
 define <8 x i32> @test10(<8 x i32> %a) nounwind {
-; SSSE3: test10:
+; SSSE3-LABEL: test10:
 ; SSSE3: pabsd
 ; SSSE3: pabsd
 ; SSSE3-NEXT: ret
 
-; AVX2: test10:
+; AVX2-LABEL: test10:
 ; AVX2: vpabsd {{.*}}%ymm
 ; AVX2-NEXT: ret
         %tmp1neg = sub <8 x i32> zeroinitializer, %a
diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll
index cf654b6f2059..25189f23e43a 100644
--- a/test/CodeGen/X86/vselect-minmax.ll
+++ b/test/CodeGen/X86/vselect-minmax.ll
@@ -25,13 +25,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test1:
+; SSE4-LABEL: test1:
 ; SSE4: pminsb
 
-; AVX1: test1:
+; AVX1-LABEL: test1:
 ; AVX1: vpminsb
 
-; AVX2: test1:
+; AVX2-LABEL: test1:
 ; AVX2: vpminsb
 }
 
@@ -57,13 +57,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test2:
+; SSE4-LABEL: test2:
 ; SSE4: pminsb
 
-; AVX1: test2:
+; AVX1-LABEL: test2:
 ; AVX1: vpminsb
 
-; AVX2: test2:
+; AVX2-LABEL: test2:
 ; AVX2: vpminsb
 }
 
@@ -89,13 +89,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test3:
+; SSE4-LABEL: test3:
 ; SSE4: pmaxsb
 
-; AVX1: test3:
+; AVX1-LABEL: test3:
 ; AVX1: vpmaxsb
 
-; AVX2: test3:
+; AVX2-LABEL: test3:
 ; AVX2: vpmaxsb
 }
 
@@ -121,13 +121,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test4:
+; SSE4-LABEL: test4:
 ; SSE4: pmaxsb
 
-; AVX1: test4:
+; AVX1-LABEL: test4:
 ; AVX1: vpmaxsb
 
-; AVX2: test4:
+; AVX2-LABEL: test4:
 ; AVX2: vpmaxsb
 }
 
@@ -153,13 +153,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test5:
+; SSE2-LABEL: test5:
 ; SSE2: pminub
 
-; AVX1: test5:
+; AVX1-LABEL: test5:
 ; AVX1: vpminub
 
-; AVX2: test5:
+; AVX2-LABEL: test5:
 ; AVX2: vpminub
 }
 
@@ -185,13 +185,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test6:
+; SSE2-LABEL: test6:
 ; SSE2: pminub
 
-; AVX1: test6:
+; AVX1-LABEL: test6:
 ; AVX1: vpminub
 
-; AVX2: test6:
+; AVX2-LABEL: test6:
 ; AVX2: vpminub
 }
 
@@ -217,13 +217,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test7:
+; SSE2-LABEL: test7:
 ; SSE2: pmaxub
 
-; AVX1: test7:
+; AVX1-LABEL: test7:
 ; AVX1: vpmaxub
 
-; AVX2: test7:
+; AVX2-LABEL: test7:
 ; AVX2: vpmaxub
 }
 
@@ -249,13 +249,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test8:
+; SSE2-LABEL: test8:
 ; SSE2: pmaxub
 
-; AVX1: test8:
+; AVX1-LABEL: test8:
 ; AVX1: vpmaxub
 
-; AVX2: test8:
+; AVX2-LABEL: test8:
 ; AVX2: vpmaxub
 }
 
@@ -281,13 +281,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test9:
+; SSE2-LABEL: test9:
 ; SSE2: pminsw
 
-; AVX1: test9:
+; AVX1-LABEL: test9:
 ; AVX1: vpminsw
 
-; AVX2: test9:
+; AVX2-LABEL: test9:
 ; AVX2: vpminsw
 }
 
@@ -313,13 +313,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test10:
+; SSE2-LABEL: test10:
 ; SSE2: pminsw
 
-; AVX1: test10:
+; AVX1-LABEL: test10:
 ; AVX1: vpminsw
 
-; AVX2: test10:
+; AVX2-LABEL: test10:
 ; AVX2: vpminsw
 }
 
@@ -345,13 +345,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test11:
+; SSE2-LABEL: test11:
 ; SSE2: pmaxsw
 
-; AVX1: test11:
+; AVX1-LABEL: test11:
 ; AVX1: vpmaxsw
 
-; AVX2: test11:
+; AVX2-LABEL: test11:
 ; AVX2: vpmaxsw
 }
 
@@ -377,13 +377,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test12:
+; SSE2-LABEL: test12:
 ; SSE2: pmaxsw
 
-; AVX1: test12:
+; AVX1-LABEL: test12:
 ; AVX1: vpmaxsw
 
-; AVX2: test12:
+; AVX2-LABEL: test12:
 ; AVX2: vpmaxsw
 }
 
@@ -409,13 +409,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test13:
+; SSE4-LABEL: test13:
 ; SSE4: pminuw
 
-; AVX1: test13:
+; AVX1-LABEL: test13:
 ; AVX1: vpminuw
 
-; AVX2: test13:
+; AVX2-LABEL: test13:
 ; AVX2: vpminuw
 }
 
@@ -441,13 +441,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test14:
+; SSE4-LABEL: test14:
 ; SSE4: pminuw
 
-; AVX1: test14:
+; AVX1-LABEL: test14:
 ; AVX1: vpminuw
 
-; AVX2: test14:
+; AVX2-LABEL: test14:
 ; AVX2: vpminuw
 }
 
@@ -473,13 +473,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test15:
+; SSE4-LABEL: test15:
 ; SSE4: pmaxuw
 
-; AVX1: test15:
+; AVX1-LABEL: test15:
 ; AVX1: vpmaxuw
 
-; AVX2: test15:
+; AVX2-LABEL: test15:
 ; AVX2: vpmaxuw
 }
 
@@ -505,13 +505,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test16:
+; SSE4-LABEL: test16:
 ; SSE4: pmaxuw
 
-; AVX1: test16:
+; AVX1-LABEL: test16:
 ; AVX1: vpmaxuw
 
-; AVX2: test16:
+; AVX2-LABEL: test16:
 ; AVX2: vpmaxuw
 }
 
@@ -537,13 +537,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test17:
+; SSE4-LABEL: test17:
 ; SSE4: pminsd
 
-; AVX1: test17:
+; AVX1-LABEL: test17:
 ; AVX1: vpminsd
 
-; AVX2: test17:
+; AVX2-LABEL: test17:
 ; AVX2: vpminsd
 }
 
@@ -569,13 +569,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test18:
+; SSE4-LABEL: test18:
 ; SSE4: pminsd
 
-; AVX1: test18:
+; AVX1-LABEL: test18:
 ; AVX1: vpminsd
 
-; AVX2: test18:
+; AVX2-LABEL: test18:
 ; AVX2: vpminsd
 }
 
@@ -601,13 +601,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test19:
+; SSE4-LABEL: test19:
 ; SSE4: pmaxsd
 
-; AVX1: test19:
+; AVX1-LABEL: test19:
 ; AVX1: vpmaxsd
 
-; AVX2: test19:
+; AVX2-LABEL: test19:
 ; AVX2: vpmaxsd
 }
 
@@ -633,13 +633,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test20:
+; SSE4-LABEL: test20:
 ; SSE4: pmaxsd
 
-; AVX1: test20:
+; AVX1-LABEL: test20:
 ; AVX1: vpmaxsd
 
-; AVX2: test20:
+; AVX2-LABEL: test20:
 ; AVX2: vpmaxsd
 }
 
@@ -665,13 +665,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test21:
+; SSE4-LABEL: test21:
 ; SSE4: pminud
 
-; AVX1: test21:
+; AVX1-LABEL: test21:
 ; AVX1: vpminud
 
-; AVX2: test21:
+; AVX2-LABEL: test21:
 ; AVX2: vpminud
 }
 
@@ -697,13 +697,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test22:
+; SSE4-LABEL: test22:
 ; SSE4: pminud
 
-; AVX1: test22:
+; AVX1-LABEL: test22:
 ; AVX1: vpminud
 
-; AVX2: test22:
+; AVX2-LABEL: test22:
 ; AVX2: vpminud
 }
 
@@ -729,13 +729,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test23:
+; SSE4-LABEL: test23:
 ; SSE4: pmaxud
 
-; AVX1: test23:
+; AVX1-LABEL: test23:
 ; AVX1: vpmaxud
 
-; AVX2: test23:
+; AVX2-LABEL: test23:
 ; AVX2: vpmaxud
 }
 
@@ -761,13 +761,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test24:
+; SSE4-LABEL: test24:
 ; SSE4: pmaxud
 
-; AVX1: test24:
+; AVX1-LABEL: test24:
 ; AVX1: vpmaxud
 
-; AVX2: test24:
+; AVX2-LABEL: test24:
 ; AVX2: vpmaxud
 }
 
@@ -793,7 +793,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test25:
+; AVX2-LABEL: test25:
 ; AVX2: vpminsb
 }
 
@@ -819,7 +819,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test26:
+; AVX2-LABEL: test26:
 ; AVX2: vpminsb
 }
 
@@ -845,7 +845,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test27:
+; AVX2-LABEL: test27:
 ; AVX2: vpmaxsb
 }
 
@@ -871,7 +871,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test28:
+; AVX2-LABEL: test28:
 ; AVX2: vpmaxsb
 }
 
@@ -897,7 +897,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test29:
+; AVX2-LABEL: test29:
 ; AVX2: vpminub
 }
 
@@ -923,7 +923,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test30:
+; AVX2-LABEL: test30:
 ; AVX2: vpminub
 }
 
@@ -949,7 +949,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test31:
+; AVX2-LABEL: test31:
 ; AVX2: vpmaxub
 }
 
@@ -975,7 +975,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test32:
+; AVX2-LABEL: test32:
 ; AVX2: vpmaxub
 }
 
@@ -1001,7 +1001,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test33:
+; AVX2-LABEL: test33:
 ; AVX2: vpminsw
 }
 
@@ -1027,7 +1027,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test34:
+; AVX2-LABEL: test34:
 ; AVX2: vpminsw
 }
 
@@ -1053,7 +1053,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test35:
+; AVX2-LABEL: test35:
 ; AVX2: vpmaxsw
 }
 
@@ -1079,7 +1079,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test36:
+; AVX2-LABEL: test36:
 ; AVX2: vpmaxsw
 }
 
@@ -1105,7 +1105,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test37:
+; AVX2-LABEL: test37:
 ; AVX2: vpminuw
 }
 
@@ -1131,7 +1131,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test38:
+; AVX2-LABEL: test38:
 ; AVX2: vpminuw
 }
 
@@ -1157,7 +1157,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test39:
+; AVX2-LABEL: test39:
 ; AVX2: vpmaxuw
 }
 
@@ -1183,7 +1183,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test40:
+; AVX2-LABEL: test40:
 ; AVX2: vpmaxuw
 }
 
@@ -1209,7 +1209,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test41:
+; AVX2-LABEL: test41:
 ; AVX2: vpminsd
 }
 
@@ -1235,7 +1235,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test42:
+; AVX2-LABEL: test42:
 ; AVX2: vpminsd
 }
 
@@ -1261,7 +1261,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test43:
+; AVX2-LABEL: test43:
 ; AVX2: vpmaxsd
 }
 
@@ -1287,7 +1287,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test44:
+; AVX2-LABEL: test44:
 ; AVX2: vpmaxsd
 }
 
@@ -1313,7 +1313,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test45:
+; AVX2-LABEL: test45:
 ; AVX2: vpminud
 }
 
@@ -1339,7 +1339,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test46:
+; AVX2-LABEL: test46:
 ; AVX2: vpminud
 }
 
@@ -1365,7 +1365,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test47:
+; AVX2-LABEL: test47:
 ; AVX2: vpmaxud
 }
 
@@ -1391,7 +1391,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test48:
+; AVX2-LABEL: test48:
 ; AVX2: vpmaxud
 }
 
@@ -1417,13 +1417,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test49:
+; SSE4-LABEL: test49:
 ; SSE4: pmaxsb
 
-; AVX1: test49:
+; AVX1-LABEL: test49:
 ; AVX1: vpmaxsb
 
-; AVX2: test49:
+; AVX2-LABEL: test49:
 ; AVX2: vpmaxsb
 }
 
@@ -1449,13 +1449,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test50:
+; SSE4-LABEL: test50:
 ; SSE4: pmaxsb
 
-; AVX1: test50:
+; AVX1-LABEL: test50:
 ; AVX1: vpmaxsb
 
-; AVX2: test50:
+; AVX2-LABEL: test50:
 ; AVX2: vpmaxsb
 }
 
@@ -1481,13 +1481,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test51:
+; SSE4-LABEL: test51:
 ; SSE4: pminsb
 
-; AVX1: test51:
+; AVX1-LABEL: test51:
 ; AVX1: vpminsb
 
-; AVX2: test51:
+; AVX2-LABEL: test51:
 ; AVX2: vpminsb
 }
 
@@ -1513,13 +1513,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test52:
+; SSE4-LABEL: test52:
 ; SSE4: pminsb
 
-; AVX1: test52:
+; AVX1-LABEL: test52:
 ; AVX1: vpminsb
 
-; AVX2: test52:
+; AVX2-LABEL: test52:
 ; AVX2: vpminsb
 }
 
@@ -1545,13 +1545,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test53:
+; SSE2-LABEL: test53:
 ; SSE2: pmaxub
 
-; AVX1: test53:
+; AVX1-LABEL: test53:
 ; AVX1: vpmaxub
 
-; AVX2: test53:
+; AVX2-LABEL: test53:
 ; AVX2: vpmaxub
 }
 
@@ -1577,13 +1577,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test54:
+; SSE2-LABEL: test54:
 ; SSE2: pmaxub
 
-; AVX1: test54:
+; AVX1-LABEL: test54:
 ; AVX1: vpmaxub
 
-; AVX2: test54:
+; AVX2-LABEL: test54:
 ; AVX2: vpmaxub
 }
 
@@ -1609,13 +1609,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test55:
+; SSE2-LABEL: test55:
 ; SSE2: pminub
 
-; AVX1: test55:
+; AVX1-LABEL: test55:
 ; AVX1: vpminub
 
-; AVX2: test55:
+; AVX2-LABEL: test55:
 ; AVX2: vpminub
 }
 
@@ -1641,13 +1641,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test56:
+; SSE2-LABEL: test56:
 ; SSE2: pminub
 
-; AVX1: test56:
+; AVX1-LABEL: test56:
 ; AVX1: vpminub
 
-; AVX2: test56:
+; AVX2-LABEL: test56:
 ; AVX2: vpminub
 }
 
@@ -1673,13 +1673,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test57:
+; SSE2-LABEL: test57:
 ; SSE2: pmaxsw
 
-; AVX1: test57:
+; AVX1-LABEL: test57:
 ; AVX1: vpmaxsw
 
-; AVX2: test57:
+; AVX2-LABEL: test57:
 ; AVX2: vpmaxsw
 }
 
@@ -1705,13 +1705,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test58:
+; SSE2-LABEL: test58:
 ; SSE2: pmaxsw
 
-; AVX1: test58:
+; AVX1-LABEL: test58:
 ; AVX1: vpmaxsw
 
-; AVX2: test58:
+; AVX2-LABEL: test58:
 ; AVX2: vpmaxsw
 }
 
@@ -1737,13 +1737,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test59:
+; SSE2-LABEL: test59:
 ; SSE2: pminsw
 
-; AVX1: test59:
+; AVX1-LABEL: test59:
 ; AVX1: vpminsw
 
-; AVX2: test59:
+; AVX2-LABEL: test59:
 ; AVX2: vpminsw
 }
 
@@ -1769,13 +1769,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE2: test60:
+; SSE2-LABEL: test60:
 ; SSE2: pminsw
 
-; AVX1: test60:
+; AVX1-LABEL: test60:
 ; AVX1: vpminsw
 
-; AVX2: test60:
+; AVX2-LABEL: test60:
 ; AVX2: vpminsw
 }
 
@@ -1801,13 +1801,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test61:
+; SSE4-LABEL: test61:
 ; SSE4: pmaxuw
 
-; AVX1: test61:
+; AVX1-LABEL: test61:
 ; AVX1: vpmaxuw
 
-; AVX2: test61:
+; AVX2-LABEL: test61:
 ; AVX2: vpmaxuw
 }
 
@@ -1833,13 +1833,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test62:
+; SSE4-LABEL: test62:
 ; SSE4: pmaxuw
 
-; AVX1: test62:
+; AVX1-LABEL: test62:
 ; AVX1: vpmaxuw
 
-; AVX2: test62:
+; AVX2-LABEL: test62:
 ; AVX2: vpmaxuw
 }
 
@@ -1865,13 +1865,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test63:
+; SSE4-LABEL: test63:
 ; SSE4: pminuw
 
-; AVX1: test63:
+; AVX1-LABEL: test63:
 ; AVX1: vpminuw
 
-; AVX2: test63:
+; AVX2-LABEL: test63:
 ; AVX2: vpminuw
 }
 
@@ -1897,13 +1897,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test64:
+; SSE4-LABEL: test64:
 ; SSE4: pminuw
 
-; AVX1: test64:
+; AVX1-LABEL: test64:
 ; AVX1: vpminuw
 
-; AVX2: test64:
+; AVX2-LABEL: test64:
 ; AVX2: vpminuw
 }
 
@@ -1929,13 +1929,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test65:
+; SSE4-LABEL: test65:
 ; SSE4: pmaxsd
 
-; AVX1: test65:
+; AVX1-LABEL: test65:
 ; AVX1: vpmaxsd
 
-; AVX2: test65:
+; AVX2-LABEL: test65:
 ; AVX2: vpmaxsd
 }
 
@@ -1961,13 +1961,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test66:
+; SSE4-LABEL: test66:
 ; SSE4: pmaxsd
 
-; AVX1: test66:
+; AVX1-LABEL: test66:
 ; AVX1: vpmaxsd
 
-; AVX2: test66:
+; AVX2-LABEL: test66:
 ; AVX2: vpmaxsd
 }
 
@@ -1993,13 +1993,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test67:
+; SSE4-LABEL: test67:
 ; SSE4: pminsd
 
-; AVX1: test67:
+; AVX1-LABEL: test67:
 ; AVX1: vpminsd
 
-; AVX2: test67:
+; AVX2-LABEL: test67:
 ; AVX2: vpminsd
 }
 
@@ -2025,13 +2025,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test68:
+; SSE4-LABEL: test68:
 ; SSE4: pminsd
 
-; AVX1: test68:
+; AVX1-LABEL: test68:
 ; AVX1: vpminsd
 
-; AVX2: test68:
+; AVX2-LABEL: test68:
 ; AVX2: vpminsd
 }
 
@@ -2057,13 +2057,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test69:
+; SSE4-LABEL: test69:
 ; SSE4: pmaxud
 
-; AVX1: test69:
+; AVX1-LABEL: test69:
 ; AVX1: vpmaxud
 
-; AVX2: test69:
+; AVX2-LABEL: test69:
 ; AVX2: vpmaxud
 }
 
@@ -2089,13 +2089,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test70:
+; SSE4-LABEL: test70:
 ; SSE4: pmaxud
 
-; AVX1: test70:
+; AVX1-LABEL: test70:
 ; AVX1: vpmaxud
 
-; AVX2: test70:
+; AVX2-LABEL: test70:
 ; AVX2: vpmaxud
 }
 
@@ -2121,13 +2121,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test71:
+; SSE4-LABEL: test71:
 ; SSE4: pminud
 
-; AVX1: test71:
+; AVX1-LABEL: test71:
 ; AVX1: vpminud
 
-; AVX2: test71:
+; AVX2-LABEL: test71:
 ; AVX2: vpminud
 }
 
@@ -2153,13 +2153,13 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; SSE4: test72:
+; SSE4-LABEL: test72:
 ; SSE4: pminud
 
-; AVX1: test72:
+; AVX1-LABEL: test72:
 ; AVX1: vpminud
 
-; AVX2: test72:
+; AVX2-LABEL: test72:
 ; AVX2: vpminud
 }
 
@@ -2185,7 +2185,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test73:
+; AVX2-LABEL: test73:
 ; AVX2: vpmaxsb
 }
 
@@ -2211,7 +2211,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test74:
+; AVX2-LABEL: test74:
 ; AVX2: vpmaxsb
 }
 
@@ -2237,7 +2237,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test75:
+; AVX2-LABEL: test75:
 ; AVX2: vpminsb
 }
 
@@ -2263,7 +2263,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test76:
+; AVX2-LABEL: test76:
 ; AVX2: vpminsb
 }
 
@@ -2289,7 +2289,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test77:
+; AVX2-LABEL: test77:
 ; AVX2: vpmaxub
 }
 
@@ -2315,7 +2315,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test78:
+; AVX2-LABEL: test78:
 ; AVX2: vpmaxub
 }
 
@@ -2341,7 +2341,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test79:
+; AVX2-LABEL: test79:
 ; AVX2: vpminub
 }
 
@@ -2367,7 +2367,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test80:
+; AVX2-LABEL: test80:
 ; AVX2: vpminub
 }
 
@@ -2393,7 +2393,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test81:
+; AVX2-LABEL: test81:
 ; AVX2: vpmaxsw
 }
 
@@ -2419,7 +2419,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test82:
+; AVX2-LABEL: test82:
 ; AVX2: vpmaxsw
 }
 
@@ -2445,7 +2445,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test83:
+; AVX2-LABEL: test83:
 ; AVX2: vpminsw
 }
 
@@ -2471,7 +2471,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test84:
+; AVX2-LABEL: test84:
 ; AVX2: vpminsw
 }
 
@@ -2497,7 +2497,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test85:
+; AVX2-LABEL: test85:
 ; AVX2: vpmaxuw
 }
 
@@ -2523,7 +2523,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test86:
+; AVX2-LABEL: test86:
 ; AVX2: vpmaxuw
 }
 
@@ -2549,7 +2549,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test87:
+; AVX2-LABEL: test87:
 ; AVX2: vpminuw
 }
 
@@ -2575,7 +2575,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test88:
+; AVX2-LABEL: test88:
 ; AVX2: vpminuw
 }
 
@@ -2601,7 +2601,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test89:
+; AVX2-LABEL: test89:
 ; AVX2: vpmaxsd
 }
 
@@ -2627,7 +2627,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test90:
+; AVX2-LABEL: test90:
 ; AVX2: vpmaxsd
 }
 
@@ -2653,7 +2653,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test91:
+; AVX2-LABEL: test91:
 ; AVX2: vpminsd
 }
 
@@ -2679,7 +2679,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test92:
+; AVX2-LABEL: test92:
 ; AVX2: vpminsd
 }
 
@@ -2705,7 +2705,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test93:
+; AVX2-LABEL: test93:
 ; AVX2: vpmaxud
 }
 
@@ -2731,7 +2731,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test94:
+; AVX2-LABEL: test94:
 ; AVX2: vpmaxud
 }
 
@@ -2757,7 +2757,7 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test95:
+; AVX2-LABEL: test95:
 ; AVX2: vpminud
 }
 
@@ -2783,6 +2783,6 @@ vector.body:                                      ; preds = %vector.body, %vecto
 for.end:                                          ; preds = %vector.body
   ret void
 
-; AVX2: test96:
+; AVX2-LABEL: test96:
 ; AVX2: vpminud
 }
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index e775750bbea5..b8a676765868 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -5,7 +5,7 @@
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
-; CHECK: shift1a:
+; CHECK-LABEL: shift1a:
 ; CHECK: psllq
   %shl = shl <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %shl, <2 x i64>* %dst
@@ -14,7 +14,7 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
-; CHECK: shift1b:
+; CHECK-LABEL: shift1b:
 ; CHECK: movd
 ; CHECK: psllq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
@@ -27,7 +27,7 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
-; CHECK: shift2a:
+; CHECK-LABEL: shift2a:
 ; CHECK: pslld
   %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %shl, <4 x i32>* %dst
@@ -36,7 +36,7 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
-; CHECK: shift2b:
+; CHECK-LABEL: shift2b:
 ; CHECK: movd
 ; CHECK: pslld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
@@ -50,7 +50,7 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
-; CHECK: shift3a:
+; CHECK-LABEL: shift3a:
 ; CHECK: psllw
   %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %shl, <8 x i16>* %dst
@@ -60,18 +60,18 @@ entry:
 ; Make sure the shift amount is properly zero extended.
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
-; CHECK: shift3b:
+; CHECK-LABEL: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
 ; CHECK-NEXT: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
-  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
-  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
-  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
-  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
-  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
-  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %2 = insertelement <8 x i16> %1, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %2, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %3, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %4, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %5, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %6, i16 %amt, i32 7
   %shl = shl <8 x i16> %val, %7
   store <8 x i16> %shl, <8 x i16>* %dst
   ret void
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 9496893bd1a7..156649a31443 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -5,7 +5,7 @@
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
-; CHECK: shift1a:
+; CHECK-LABEL: shift1a:
 ; CHECK: psrlq
   %lshr = lshr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %lshr, <2 x i64>* %dst
@@ -14,7 +14,7 @@ entry:
 
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
-; CHECK: shift1b:
+; CHECK-LABEL: shift1b:
 ; CHECK: movd
 ; CHECK: psrlq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
@@ -26,7 +26,7 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
-; CHECK: shift2a:
+; CHECK-LABEL: shift2a:
 ; CHECK: psrld
   %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 >
   store <4 x i32> %lshr, <4 x i32>* %dst
@@ -35,7 +35,7 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
-; CHECK: shift2b:
+; CHECK-LABEL: shift2b:
 ; CHECK: movd
 ; CHECK: psrld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
@@ -50,7 +50,7 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
-; CHECK: shift3a:
+; CHECK-LABEL: shift3a:
 ; CHECK: psrlw
   %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %lshr, <8 x i16>* %dst
@@ -60,18 +60,18 @@ entry:
 ; properly zero extend the shift amount
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
-; CHECK: shift3b:
+; CHECK-LABEL: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
 ; CHECK: psrlw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
-  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
-  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
-  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
-  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
-  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
-  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %2 = insertelement <8 x i16> %1, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %2, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %3, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %4, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %5, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %6, i16 %amt, i32 7
   %lshr = lshr <8 x i16> %val, %7
   store <8 x i16> %lshr, <8 x i16>* %dst
   ret void
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index b2b48b9da935..0bdb32fcb86e 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -8,7 +8,7 @@
 ; shift1a can't use a packed shift
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
 entry:
-; CHECK: shift1a:
+; CHECK-LABEL: shift1a:
 ; CHECK: sarl
   %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
   store <2 x i64> %ashr, <2 x i64>* %dst
@@ -17,7 +17,7 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
 entry:
-; CHECK: shift2a:
+; CHECK-LABEL: shift2a:
 ; CHECK: psrad	$5
   %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
   store <4 x i32> %ashr, <4 x i32>* %dst
@@ -26,7 +26,7 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
-; CHECK: shift2b:
+; CHECK-LABEL: shift2b:
 ; CHECK: movd
 ; CHECK: psrad
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
@@ -40,7 +40,7 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
 entry:
-; CHECK: shift3a:
+; CHECK-LABEL: shift3a:
 ; CHECK: psraw	$5
   %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
   store <8 x i16> %ashr, <8 x i16>* %dst
@@ -49,18 +49,18 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
-; CHECK: shift3b:
+; CHECK-LABEL: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
 ; CHECK: psraw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
-  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
-  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
-  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
-  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
-  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
-  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %2 = insertelement <8 x i16> %1, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %2, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %3, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %4, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %5, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %6, i16 %amt, i32 7
   %ashr = ashr <8 x i16> %val, %7
   store <8 x i16> %ashr, <8 x i16>* %dst
   ret void
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index 8e24fda1835d..4363cd9399cf 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -5,7 +5,7 @@
 
 define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
-; CHECK: shift1a:
+; CHECK-LABEL: shift1a:
 ; CHECK: psllq
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %shl = shl <2 x i64> %val, %shamt
@@ -16,7 +16,7 @@ entry:
 ; shift1b can't use a packed shift
 define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
 entry:
-; CHECK: shift1b:
+; CHECK-LABEL: shift1b:
 ; CHECK: shll
   %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
   %shl = shl <2 x i64> %val, %shamt
@@ -26,7 +26,7 @@ entry:
 
 define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
-; CHECK: shift2a:
+; CHECK-LABEL: shift2a:
 ; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
@@ -36,7 +36,7 @@ entry:
 
 define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
-; CHECK: shift2b:
+; CHECK-LABEL: shift2b:
 ; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
@@ -46,7 +46,7 @@ entry:
 
 define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
 entry:
-; CHECK: shift2c:
+; CHECK-LABEL: shift2c:
 ; CHECK: pslld
   %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %shl = shl <4 x i32> %val, %shamt
@@ -56,7 +56,7 @@ entry:
 
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
 entry:
-; CHECK: shift3a:
+; CHECK-LABEL: shift3a:
 ; CHECK: movzwl
 ; CHECK: psllw
   %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
@@ -67,17 +67,17 @@ entry:
 
 define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
 entry:
-; CHECK: shift3b:
+; CHECK-LABEL: shift3b:
 ; CHECK: movzwl
 ; CHECK: psllw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
-  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
-  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
-  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
-  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
-  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
-  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %2 = insertelement <8 x i16> %1, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %2, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %3, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %4, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %5, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %6, i16 %amt, i32 7
   %shl = shl <8 x i16> %val, %7
   store <8 x i16> %shl, <8 x i16>* %dst
   ret void
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index f6c311dee521..562e520c5528 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -4,7 +4,7 @@
 
 define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
-; CHECK: shift5a:
+; CHECK-LABEL: shift5a:
 ; CHECK: movd
 ; CHECK: pslld
   %amt = load i32* %pamt 
@@ -18,7 +18,7 @@ entry:
 
 define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
-; CHECK: shift5b:
+; CHECK-LABEL: shift5b:
 ; CHECK: movd
 ; CHECK: psrad
   %amt = load i32* %pamt 
@@ -32,7 +32,7 @@ entry:
 
 define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
-; CHECK: shift5c:
+; CHECK-LABEL: shift5c:
 ; CHECK: movd
 ; CHECK: pslld
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
@@ -45,7 +45,7 @@ entry:
 
 define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
-; CHECK: shift5d:
+; CHECK-LABEL: shift5d:
 ; CHECK: movd
 ; CHECK: psrad
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index 3b7fdff84e3c..c16b29493589 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -14,7 +14,7 @@ define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind read
 
 define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
 ; CHECK: t2
-; CHECK-NOT: pand
+; CHECK: pand
 ; CHECK: ret
   %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
diff --git a/test/CodeGen/X86/warn-stack.ll b/test/CodeGen/X86/warn-stack.ll
new file mode 100644
index 000000000000..5979f45b07d8
--- /dev/null
+++ b/test/CodeGen/X86/warn-stack.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple x86_64-apple-macosx10.8.0 -warn-stack-size=80 < %s 2>&1 >/dev/null | FileCheck %s
+; Check the internal option that warns when the stack size exceeds the
+; given amount.
+; <rdar://13987214>
+
+; CHECK-NOT: nowarn
+define void @nowarn() nounwind ssp {
+entry:
+  %buffer = alloca [12 x i8], align 1
+  %arraydecay = getelementptr inbounds [12 x i8]* %buffer, i64 0, i64 0
+  call void @doit(i8* %arraydecay) nounwind
+  ret void
+}
+
+; CHECK: warning: Stack size limit exceeded (104) in warn.
+define void @warn() nounwind ssp {
+entry:
+  %buffer = alloca [80 x i8], align 1
+  %arraydecay = getelementptr inbounds [80 x i8]* %buffer, i64 0, i64 0
+  call void @doit(i8* %arraydecay) nounwind
+  ret void
+}
+
+declare void @doit(i8*)
diff --git a/test/CodeGen/X86/weak_def_can_be_hidden.ll b/test/CodeGen/X86/weak_def_can_be_hidden.ll
new file mode 100644
index 000000000000..f78f3571cec9
--- /dev/null
+++ b/test/CodeGen/X86/weak_def_can_be_hidden.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-apple-darwin  -O0 < %s | FileCheck %s
+
+@v1 = linkonce_odr global i32 32
+; CHECK: .globl  _v1
+; CHECK: .weak_def_can_be_hidden _v1
+
+define i32 @f1() {
+  %x = load i32 * @v1
+  ret i32 %x
+}
+
+@v2 = linkonce_odr global i32 32
+; CHECK: .globl  _v2
+; CHECK: .weak_definition _v2
+
+@v3 = linkonce_odr unnamed_addr global i32 32
+; CHECK: .globl  _v3
+; CHECK: .weak_def_can_be_hidden _v3
+
+define i32* @f2() {
+  ret i32* @v2
+}
+
+define i32* @f3() {
+  ret i32* @v3
+}
diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll
index d93f33ba0e58..f51f917fbac9 100644
--- a/test/CodeGen/X86/wide-fma-contraction.ll
+++ b/test/CodeGen/X86/wide-fma-contraction.ll
@@ -1,7 +1,10 @@
-; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
 
-; CHECK: fmafunc
+; CHECK-LABEL: fmafunc
+; CHECK-NOFMA-LABEL: fmafunc
 define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+
 ; CHECK-NOT: vmulps
 ; CHECK-NOT: vaddps
 ; CHECK: vfmaddps
@@ -10,11 +13,17 @@ define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c)
 ; CHECK: vfmaddps
 ; CHECK-NOT: vmulps
 ; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
   %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
   ret <16 x float> %ret
 }
 
 declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
-
-
-
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 661cde8bda3b..6041356e6ac7 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse4.2 |  FileCheck %s
 
 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index d35abc308173..1b81e9f889aa 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: padd
 ; CHECK: pand
 
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index d86042a44806..d2b8e6ee9a75 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
-; CHECK: incl
-; CHECK: incl
-; CHECK: incl
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse4.2 -post-RA-scheduler=true | FileCheck %s
+; CHECK: paddd
 
 ; Widen a v3i16 to v8i16 to do a vector add
 
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index 5931d639f19b..5207e1fa9d7b 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: psubw
 ; CHECK-NEXT: pmullw
 
@@ -33,7 +33,7 @@ forbody:		; preds = %forcond
 	%arrayidx6 = getelementptr <5 x i16>* %tmp5, i32 %tmp4		; <<5 x i16>*> [#uses=1]
 	%tmp7 = load <5 x i16>* %arrayidx6		; <<5 x i16>> [#uses=1]
 	%sub = sub <5 x i16> %tmp7, < i16 271, i16 271, i16 271, i16 271, i16 271 >		; <<5 x i16>> [#uses=1]
-	%mul = mul <5 x i16> %sub, < i16 2, i16 2, i16 2, i16 2, i16 2 >		; <<5 x i16>> [#uses=1]
+	%mul = mul <5 x i16> %sub, < i16 2, i16 4, i16 2, i16 2, i16 2 >		; <<5 x i16>> [#uses=1]
 	store <5 x i16> %mul, <5 x i16>* %arrayidx
 	br label %forinc
 
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index 7f2eff09f473..70b6a8a239ab 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse42  | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.2  | FileCheck %s
 ; CHECK: movdqa
-; CHECK: pmulld
+; CHECK: pslld $2
 ; CHECK: psubd
 
 ; widen a v3i32 to v4i32 to do a vector multiple and a subtraction
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index b983d141ddf6..329048ad77b7 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: mulps
 ; CHECK: addps
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 56c63644e02e..d115929f5aab 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s
 ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
-; CHECK: paddd
 ; CHECK: movl
+; CHECK: paddd
 ; CHECK: movlpd
 
 ; Scheduler causes produce a different instruction order
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 3979ce466d1e..40b42fbf1460 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse4.2 | FileCheck %s
 ; CHECK: pextrd
 ; CHECK: pextrd
 ; CHECK: movd
diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
index 87486d96611b..40a8dc5b6aed 100644
--- a/test/CodeGen/X86/widen_cast-3.ll
+++ b/test/CodeGen/X86/widen_cast-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: pextrd
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 5ea54267692a..1bc06a77cbf7 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: psraw
 ; CHECK: psraw
 
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 9086d3a9cfd2..ccf0bd1d0b62 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: movl
 ; CHECK: movlpd
 
diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
index 39032347c018..7c06ad8ca664 100644
--- a/test/CodeGen/X86/widen_cast-6.ll
+++ b/test/CodeGen/X86/widen_cast-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
 ; CHECK: movd
 
 ; Test bit convert that requires widening in the operand.
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index 51f1c887b00d..9f6778cff592 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: paddq
 
 ; truncate v2i64 to v2i32
diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
index 969cb512beb3..906f7cdafb95 100644
--- a/test/CodeGen/X86/widen_conv-2.ll
+++ b/test/CodeGen/X86/widen_conv-2.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: movswl
-; CHECK: movswl
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
+; CHECK: {{cwtl|movswl}}
+; CHECK: {{cwtl|movswl}}
 
 ; sign extension v2i32 to v2i16
 
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index a25fae9e1bc8..a2f3d7b82b36 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 ; CHECK: cvtsi2ss
 
 ; sign to float v2i16 to v2f32
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 1158e0455392..f633592f2ef8 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=nehalem -mattr=+sse4.2 | FileCheck %s
 ; CHECK-NOT: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
index 86727421ce03..6832de1dbb64 100644
--- a/test/CodeGen/X86/widen_extract-1.ll
+++ b/test/CodeGen/X86/widen_extract-1.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=nehalem -mattr=+sse4.2 | FileCheck %s
 ; widen extract subvector
 
 define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
 entry:
-; CHECK: convert:
+; CHECK-LABEL: convert:
 ; CHECK: unpcklpd {{%xmm[0-7]}}, {{%xmm[0-7]}}
 ; CHECK-NEXT: movapd
   %val = shufflevector <3 x double> %src, <3 x double> undef, <2 x i32> < i32 0, i32 1>
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index dfaa3d6dc91a..c59cc58f40fd 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -9,8 +9,8 @@
 ; SSE: movaps  %xmm0, (%rsp)
 ; SSE: callq   killcommon
 
-; AVX: vmovapd    compl+128(%rip), %xmm0
-; AVX: vmovapd  %xmm0, (%rsp)
+; AVX: vmovaps    compl+128(%rip), %xmm0
+; AVX: vmovaps  %xmm0, (%rsp)
 ; AVX: callq   killcommon
 
 @compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 224898c1a3e5..26815a422ec8 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.2 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
@@ -73,10 +73,7 @@ define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 ; CHECK: add3i16
 %i16vec3 = type <3 x i16>
 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: add3i16
-; CHECK: addl
-; CHECK: addl
-; CHECK: addl
+; CHECK: paddd
 ; CHECK: ret
 	%a = load %i16vec3* %ap, align 16
 	%b = load %i16vec3* %bp, align 16
@@ -88,7 +85,6 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
 ; CHECK: add4i16
 %i16vec4 = type <4 x i16>
 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: add4i16
 ; CHECK: paddd
 ; CHECK: movq
 	%a = load %i16vec4* %ap, align 16
@@ -137,9 +133,7 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
 ; CHECK: add3i8
 %i8vec3 = type <3 x i8>
 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: addb
-; CHECK: addb
-; CHECK: addb
+; CHECK: paddd
 ; CHECK: ret
 	%a = load %i8vec3* %ap, align 16
 	%b = load %i8vec3* %bp, align 16
@@ -148,7 +142,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
 	ret void
 }
 
-; CHECK: add31i8:
+; CHECK-LABEL: add31i8:
 %i8vec31 = type <31 x i8>
 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
 ; CHECK: movdqa
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 7bebb274f6ec..803402b1f1f4 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 
 ; widening shuffle v3float and then a add
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
-; CHECK: shuf:
+; CHECK-LABEL: shuf:
 ; CHECK: extractps
 ; CHECK: extractps
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
@@ -17,7 +17,7 @@ entry:
 ; widening shuffle v3float with a different mask and then a add
 define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
-; CHECK: shuf2:
+; CHECK-LABEL: shuf2:
 ; CHECK: extractps
 ; CHECK: extractps
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
@@ -32,7 +32,7 @@ entry:
 ; opA with opB, the DAG will produce new operations with opA.
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
-; CHECK: shuf3:
+; CHECK-LABEL: shuf3:
 ; CHECK: shufps
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
@@ -52,7 +52,7 @@ entry:
 
 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
 define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
-; CHECK: shuf4:
+; CHECK-LABEL: shuf4:
 ; CHECK-NOT: punpckldq
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
@@ -61,7 +61,7 @@ define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 
 ; PR11389: another CONCAT_VECTORS case
 define void @shuf5(<8 x i8>* %p) nounwind {
-; CHECK: shuf5:
+; CHECK-LABEL: shuf5:
   %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   store <8 x i8> %v, <8 x i8>* %p, align 8
   ret void
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index 2bfe5fb1007b..a24963a3f34e 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -115,8 +115,8 @@ entry:
 
 ; Load the address of the result and put it onto stack
 ; (through %ecx in the -O0 build).
-; WIN32:      leal {{[0-9]+}}(%esp), %eax
-; WIN32:      movl %eax, (%e{{[sc][px]}})
+; WIN32:      leal {{[0-9]+}}(%esp), %e{{[a-d]}}x
+; WIN32:      movl %e{{[a-d]}}x, (%e{{([a-d]x)|(sp)}})
 
 ; The this pointer goes to ECX.
 ; WIN32-NEXT: leal {{[0-9]+}}(%esp), %ecx
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index cc11e4c28e21..aff53057a954 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,10 +1,13 @@
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
-define i64 @foo(i64 %n, i64 %x) nounwind {
+define i64 @unaligned(i64 %n, i64 %x) nounwind {
+; M64-LABEL: unaligned:
+; W64-LABEL: unaligned:
+; EFI-LABEL: unaligned:
 entry:
 
   %buf0 = alloca i8, i64 4096, align 1
@@ -19,7 +22,7 @@ entry:
 ; W64: movq  %rsp, %rbp
 ; W64:       $4096, %rax
 ; W64: callq __chkstk
-; W64: subq  $4096, %rsp
+; W64: subq  %rax, %rsp
 
 ; Freestanding
 ; EFI: movq  %rsp, %rbp
@@ -49,18 +52,18 @@ entry:
   %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
 
 ; M64: subq  $48, %rsp
-; M64: leaq  -4096(%rbp), %r9
 ; M64: movq  %rax, 32(%rsp)
+; M64: leaq  -4096(%rbp), %r9
 ; M64: callq bar
 
 ; W64: subq  $48, %rsp
-; W64: leaq  -4096(%rbp), %r9
 ; W64: movq  %rax, 32(%rsp)
+; W64: leaq  -4096(%rbp), %r9
 ; W64: callq bar
 
 ; EFI: subq  $48, %rsp
-; EFI: leaq  -[[B0OFS]](%rbp), %r9
 ; EFI: movq  [[R64]], 32(%rsp)
+; EFI: leaq  -[[B0OFS]](%rbp), %r9
 ; EFI: callq _bar
 
   ret i64 %r
@@ -71,4 +74,51 @@ entry:
 
 }
 
+define i64 @aligned(i64 %n, i64 %x) nounwind {
+; M64-LABEL: aligned:
+; W64-LABEL: aligned:
+; EFI-LABEL: aligned:
+entry:
+
+  %buf1 = alloca i8, i64 %n, align 128
+
+; M64: leaq  15(%{{.*}}), %rax
+; M64: andq  $-16, %rax
+; M64: callq ___chkstk
+; M64: movq  %rsp, [[R2:%r.*]]
+; M64: andq  $-128, [[R2]]
+; M64: movq  [[R2]], %rsp
+
+; W64: leaq  15(%{{.*}}), %rax
+; W64: andq  $-16, %rax
+; W64: callq __chkstk
+; W64: subq  %rax, %rsp
+; W64: movq  %rsp, [[R2:%r.*]]
+; W64: andq  $-128, [[R2]]
+; W64: movq  [[R2]], %rsp
+
+; EFI: leaq  15(%{{.*}}), [[R1:%r.*]]
+; EFI: andq  $-16, [[R1]]
+; EFI: movq  %rsp, [[R64:%r.*]]
+; EFI: subq  [[R1]], [[R64]]
+; EFI: andq  $-128, [[R64]]
+; EFI: movq  [[R64]], %rsp
+
+  %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* undef, i8* %buf1) nounwind
+
+; M64: subq  $48, %rsp
+; M64: movq  [[R2]], 32(%rsp)
+; M64: callq bar
+
+; W64: subq  $48, %rsp
+; W64: movq  [[R2]], 32(%rsp)
+; W64: callq bar
+
+; EFI: subq  $48, %rsp
+; EFI: movq  [[R64]], 32(%rsp)
+; EFI: callq _bar
+
+  ret i64 %r
+}
+
 declare i64 @bar(i64, i64, i64, i8* nocapture, i8* nocapture) nounwind
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
index f9d4bf9c3094..9718c86300c2 100644
--- a/test/CodeGen/X86/win64_params.ll
+++ b/test/CodeGen/X86/win64_params.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX
 
 ; Verify that the 5th and 6th parameters are coming from the correct location
 ; on the stack.
@@ -6,6 +7,30 @@ define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind re
 entry:
 ; CHECK: movl    48(%rsp), %eax
 ; CHECK: addl    40(%rsp), %eax
+; LINUX: addl    %r9d, %r8d
+; LINUX: movl    %r8d, %eax
+  %add = add nsw i32 %p6, %p5
+  ret i32 %add
+}
+
+define x86_64_win64cc i32 @f7(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: movl    48(%rsp), %eax
+; CHECK: addl    40(%rsp), %eax
+; LINUX: movl    48(%rsp), %eax
+; LINUX: addl    40(%rsp), %eax
+  %add = add nsw i32 %p6, %p5
+  ret i32 %add
+}
+
+; Verify that even though we're compiling for Windows, parameters behave as
+; on other platforms here (note the x86_64_sysvcc calling convention).
+define x86_64_sysvcc i32 @f8(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: addl    %r9d, %r8d
+; CHECK: movl    %r8d, %eax
+; LINUX: addl    %r9d, %r8d
+; LINUX: movl    %r8d, %eax
   %add = add nsw i32 %p6, %p5
   ret i32 %add
 }
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index 52bc50922c26..1a51b2a64a76 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -18,8 +18,9 @@ entry:
 }
 
 declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_copy(i8*, i8*) nounwind
 
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: pushq
 ; CHECK: leaq 56(%rsp),
 define i8* @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind {
@@ -30,7 +31,7 @@ entry:
   ret i8* %ap1
 }
 
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: pushq
 ; CHECK: leaq 48(%rsp),
 define i8* @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
@@ -41,7 +42,7 @@ entry:
   ret i8* %ap1
 }
 
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: pushq
 ; CHECK: leaq 40(%rsp),
 define i8* @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind {
@@ -51,3 +52,62 @@ entry:
   call void @llvm.va_start(i8* %ap1)
   ret i8* %ap1
 }
+
+; WinX86_64 uses char* for va_list. Verify that the correct amount of bytes
+; are copied using va_copy.
+
+; CHECK-LABEL: copy1:
+; CHECK: subq $16
+; CHECK: leaq 32(%rsp), [[REG_copy1:%[a-z]+]]
+; CHECK: movq [[REG_copy1]], 8(%rsp)
+; CHECK: movq [[REG_copy1]], (%rsp)
+; CHECK: addq $16
+; CHECK: ret
+define void @copy1(i64 %a0, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %cp = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  %cp1 = bitcast i8** %cp to i8*
+  call void @llvm.va_start(i8* %ap1)
+  call void @llvm.va_copy(i8* %cp1, i8* %ap1)
+  ret void
+}
+
+; CHECK-LABEL: copy4:
+; CHECK: subq $16
+; CHECK: leaq 56(%rsp), [[REG_copy4:%[a-z]+]]
+; CHECK: movq [[REG_copy4]], 8(%rsp)
+; CHECK: movq [[REG_copy4]], (%rsp)
+; CHECK: addq $16
+; CHECK: ret
+define void @copy4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %cp = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  %cp1 = bitcast i8** %cp to i8*
+  call void @llvm.va_start(i8* %ap1)
+  call void @llvm.va_copy(i8* %cp1, i8* %ap1)
+  ret void
+}
+
+; CHECK-LABEL: arg4:
+; CHECK: pushq
+; va_start:
+; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
+; CHECK: movq [[REG_arg4_1]], (%rsp)
+; va_arg:
+; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
+; CHECK: movq [[REG_arg4_2]], (%rsp)
+; CHECK: movl 48(%rsp), %eax
+; CHECK: popq
+; CHECK: ret
+define i32 @arg4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %tmp = va_arg i8** %ap, i32
+  ret i32 %tmp
+}
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
index e4e4483ff949..3f522ea5682c 100644
--- a/test/CodeGen/X86/win_chkstk.ll
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -45,3 +45,16 @@ entry:
   %array128 = alloca [128 x i8], align 16         ; <[128 x i8]*> [#uses=0]
   ret i32 0
 }
+
+; Make sure we don't call __chkstk or __alloca on non-Windows even if the
+; caller has the Win64 calling convention.
+define x86_64_win64cc i32 @main4k_win64() nounwind {
+entry:
+; WIN_X32:    calll __chkstk
+; WIN_X64:    callq __chkstk
+; MINGW_X32:  calll __alloca
+; MINGW_X64:  callq ___chkstk
+; LINUX-NOT:  call __chkstk
+  %array4096 = alloca [4096 x i8], align 16       ; <[4096 x i8]*> [#uses=0]
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index 07ccb2337e75..bc6c612482b0 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mcpu=corei7 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 
 ; This should be a single mov, not a load of immediate + andq.
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: movl %edi, %eax
 
 define i64 @test(i64 %x) nounwind {
@@ -14,7 +14,7 @@ entry:
 }
 
 ; This copy can't be coalesced away because it needs the implicit zero-extend.
-; CHECK: bbb:
+; CHECK-LABEL: bbb:
 ; CHECK: movl %edi, %edi
 
 define void @bbb(i64 %x) nounwind {
@@ -26,7 +26,7 @@ define void @bbb(i64 %x) nounwind {
 ; This should use a 32-bit and with implicit zero-extension, not a 64-bit and
 ; with a separate mov to materialize the mask.
 ; rdar://7527390
-; CHECK: ccc:
+; CHECK-LABEL: ccc:
 ; CHECK: andl $-1048593, %edi
 
 declare void @foo(i64 %x) nounwind
@@ -38,9 +38,9 @@ define void @ccc(i64 %x) nounwind {
 }
 
 ; This requires a mov and a 64-bit and.
-; CHECK: ddd:
+; CHECK-LABEL: ddd:
 ; CHECK: movabsq $4294967296, %r
-; CHECK: andq %rax, %rdi
+; CHECK: andq %r{{..}}, %r{{..}}
 
 define void @ddd(i64 %x) nounwind {
   %t = and i64 %x, 4294967296
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index 3ec172b2b656..da8082b92518 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -9,4 +9,6 @@ entry:
         ret void
 }
 
-declare extern_weak i32 @f()
+define weak i32 @f() {
+  ret i32 42
+}
diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll
new file mode 100644
index 000000000000..183ddf446f3d
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-psub.ll
@@ -0,0 +1,220 @@
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck %s
+
+; MMX packed sub opcodes were wrongly marked as commutative.
+; This test checks that the operands of packed sub instructions are
+; never interchanged by the "Two-Address instruction pass".
+
+declare { i64, double } @getFirstParam()
+declare { i64, double } @getSecondParam()
+
+define i64 @test_psubb() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
+  %3 = bitcast <8 x i8> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
+  %5 = bitcast <8 x i8> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <8 x i8>
+  %8 = bitcast <8 x i8> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK-LABEL: test_psubb:
+; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
+; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   psubb [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubw() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
+  %5 = bitcast <4 x i16> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <4 x i16>
+  %8 = bitcast <4 x i16> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK-LABEL: test_psubw:
+; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
+; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   psubw [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+
+define i64 @test_psubd() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32>
+  %5 = bitcast <2 x i32> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <2 x i32>
+  %8 = bitcast <2 x i32> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK-LABEL: test_psubd:
+; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
+; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   psubd [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubsb() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
+  %3 = bitcast <8 x i8> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
+  %5 = bitcast <8 x i8> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <8 x i8>
+  %8 = bitcast <8 x i8> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK-LABEL: test_psubsb:
+; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
+; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   psubsb [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubswv() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
+  %5 = bitcast <4 x i16> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <4 x i16>
+  %8 = bitcast <4 x i16> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK-LABEL: test_psubswv:
+; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
+; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   psubsw [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubusbv() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
+  %3 = bitcast <8 x i8> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
+  %5 = bitcast <8 x i8> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <8 x i8>
+  %8 = bitcast <8 x i8> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK-LABEL: test_psubusbv:
+; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
+; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   psubusb [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubuswv() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
+  %5 = bitcast <4 x i16> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <4 x i16>
+  %8 = bitcast <4 x i16> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK-LABEL: test_psubuswv:
+; CHECK:   callq getFirstParam
+; CHECK:   movq %rax, [[TEMP:%[a-z0-9]+]]
+; CHECK:   callq getSecondParam
+; CHECK:   movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   psubusw [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index bc8a54346580..2d001142d7a1 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -4,11 +4,11 @@
 
 %struct.foo = type { [4 x i64] }
 
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: movq %rdi, %rax
 
 ; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
-; X32ABI: bar:
+; X32ABI-LABEL: bar:
 ; X32ABI: movl %edi, %eax
 
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
@@ -60,11 +60,11 @@ return:		; preds = %entry
 	ret void
 }
 
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: movq %rdi, %rax
 
 ; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
-; X32ABI: foo:
+; X32ABI-LABEL: foo:
 ; X32ABI: movl %edi, %eax
 
 define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll
index 8d3b300da3bf..641786f5a914 100644
--- a/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/test/CodeGen/X86/x86-64-tls-1.ll
@@ -1,6 +1,10 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 @tm_nest_level = internal thread_local global i32 0
 define i64 @z() nounwind {
-; CHECK: movabsq    $tm_nest_level@TPOFF, %rcx
+; FIXME: The codegen here is primitive at best and could be much better.
+; The add and the moves can be folded together.
+; CHECK-DAG: movq    $tm_nest_level@TPOFF, %rcx
+; CHECK-DAG: movq    %fs:0, %rax
+; CHECK: addl    %ecx, %eax
   ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
 }
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index 20bccab8ff78..2f3adb8db9a0 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -6,8 +6,8 @@
 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
 entry:
 ; CHECK:      shl4
-; CHECK:      padd
 ; CHECK:      pslld
+; CHECK:      padd
 ; CHECK:      ret
   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
@@ -67,8 +67,8 @@ entry:
 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
 entry:
 ; CHECK:      shl8
-; CHECK:      padd
 ; CHECK:      psllw
+; CHECK:      padd
 ; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -156,7 +156,7 @@ entry:
 define <16 x i8> @shl9(<16 x i8> %A) nounwind {
   %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %B
-; CHECK: shl9:
+; CHECK-LABEL: shl9:
 ; CHECK: psllw $3
 ; CHECK: pand
 ; CHECK: ret
@@ -165,7 +165,7 @@ define <16 x i8> @shl9(<16 x i8> %A) nounwind {
 define <16 x i8> @shr9(<16 x i8> %A) nounwind {
   %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %B
-; CHECK: shr9:
+; CHECK-LABEL: shr9:
 ; CHECK: psrlw $3
 ; CHECK: pand
 ; CHECK: ret
@@ -174,7 +174,7 @@ define <16 x i8> @shr9(<16 x i8> %A) nounwind {
 define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
   %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   ret <16 x i8> %B
-; CHECK: sra_v16i8_7:
+; CHECK-LABEL: sra_v16i8_7:
 ; CHECK: pxor
 ; CHECK: pcmpgtb
 ; CHECK: ret
@@ -183,7 +183,7 @@ define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
 define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
   %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %B
-; CHECK: sra_v16i8:
+; CHECK-LABEL: sra_v16i8:
 ; CHECK: psrlw $3
 ; CHECK: pand
 ; CHECK: pxor
diff --git a/test/CodeGen/X86/xmulo.ll b/test/CodeGen/X86/xmulo.ll
index 486dafeb5a24..71efac4e99a1 100644
--- a/test/CodeGen/X86/xmulo.ll
+++ b/test/CodeGen/X86/xmulo.ll
@@ -8,7 +8,7 @@ declare i32 @printf(i8*, ...)
 @.str = private unnamed_addr constant [10 x i8] c"%llx, %d\0A\00", align 1
 
 define i32 @t1() nounwind {
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK:  movl $0, 12(%esp)
 ; CHECK:  movl $0, 8(%esp)
 ; CHECK:  movl $72, 4(%esp)
@@ -22,7 +22,7 @@ define i32 @t1() nounwind {
 }
 
 define i32 @t2() nounwind {
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK:  movl $0, 12(%esp)
 ; CHECK:  movl $0, 8(%esp)
 ; CHECK:  movl $0, 4(%esp)
@@ -36,7 +36,7 @@ define i32 @t2() nounwind {
 }
 
 define i32 @t3() nounwind {
-; CHECK: t3:
+; CHECK-LABEL: t3:
 ; CHECK:  movl $1, 12(%esp)
 ; CHECK:  movl $-1, 8(%esp)
 ; CHECK:  movl $-9, 4(%esp)
diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll
index fd1b0064046a..dd1fcca48f61 100644
--- a/test/CodeGen/X86/xor-icmp.ll
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -4,14 +4,14 @@
 
 define i32 @t(i32 %a, i32 %b) nounwind ssp {
 entry:
-; X32:     t:
+; X32-LABEL:     t:
 ; X32:     xorb
 ; X32-NOT: andb
 ; X32-NOT: shrb
 ; X32:     testb $64
 ; X32:     je
 
-; X64:     t:
+; X64-LABEL:     t:
 ; X64-NOT: setne
 ; X64:     xorl
 ; X64:     testb $64
@@ -37,7 +37,7 @@ declare i32 @foo(...)
 declare i32 @bar(...)
 
 define i32 @t2(i32 %x, i32 %y) nounwind ssp {
-; X32: t2:
+; X32-LABEL: t2:
 ; X32: cmpl
 ; X32: sete
 ; X32: cmpl
@@ -45,7 +45,7 @@ define i32 @t2(i32 %x, i32 %y) nounwind ssp {
 ; X32-NOT: xor
 ; X32: je
 
-; X64: t2:
+; X64-LABEL: t2:
 ; X64: testl
 ; X64: sete
 ; X64: testl
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 996bfc40ee56..fd8e1b4cebaa 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -7,7 +7,7 @@ define <4 x i32> @test1() nounwind {
 	%tmp = xor <4 x i32> undef, undef
 	ret <4 x i32> %tmp
         
-; X32: test1:
+; X32-LABEL: test1:
 ; X32:	xorps	%xmm0, %xmm0
 ; X32:	ret
 }
@@ -16,7 +16,7 @@ define <4 x i32> @test1() nounwind {
 define i32 @test2() nounwind{
 	%tmp = xor i32 undef, undef
 	ret i32 %tmp
-; X32: test2:
+; X32-LABEL: test2:
 ; X32:	xorl	%eax, %eax
 ; X32:	ret
 }
@@ -28,13 +28,13 @@ entry:
         %tmp4 = lshr i32 %tmp3, 1
         ret i32 %tmp4
         
-; X64: test3:
+; X64-LABEL: test3:
 ; X64:	notl
 ; X64:	andl
 ; X64:	shrl
 ; X64:	ret
 
-; X32: test3:
+; X32-LABEL: test3:
 ; X32: 	movl	8(%esp), %eax
 ; X32: 	notl	%eax
 ; X32: 	andl	4(%esp), %eax
@@ -57,10 +57,10 @@ bb:
 bb12:
 	ret i32 %tmp3
         
-; X64: test4:
+; X64-LABEL: test4:
 ; X64:    notl	[[REG:%[a-z]+]]
 ; X64:    andl	{{.*}}[[REG]]
-; X32: test4:
+; X32-LABEL: test4:
 ; X32:    notl	[[REG:%[a-z]+]]
 ; X32:    andl	{{.*}}[[REG]]
 }
@@ -79,10 +79,10 @@ bb:
 	br i1 %tmp10, label %bb12, label %bb
 bb12:
 	ret i16 %tmp3
-; X64: test5:
+; X64-LABEL: test5:
 ; X64:    notl	[[REG:%[a-z]+]]
 ; X64:    andl	{{.*}}[[REG]]
-; X32: test5:
+; X32-LABEL: test5:
 ; X32:    notl	[[REG:%[a-z]+]]
 ; X32:    andl	{{.*}}[[REG]]
 }
@@ -101,10 +101,10 @@ bb:
 	br i1 %tmp10, label %bb12, label %bb
 bb12:
 	ret i8 %tmp3
-; X64: test6:
+; X64-LABEL: test6:
 ; X64:    notb	[[REG:%[a-z]+]]
 ; X64:    andb	{{.*}}[[REG]]
-; X32: test6:
+; X32-LABEL: test6:
 ; X32:    notb	[[REG:%[a-z]+]]
 ; X32:    andb	{{.*}}[[REG]]
 }
@@ -123,10 +123,10 @@ bb:
 	br i1 %tmp10, label %bb12, label %bb
 bb12:
 	ret i32 %tmp3
-; X64: test7:
+; X64-LABEL: test7:
 ; X64:    xorl	$2147483646, [[REG:%[a-z]+]]
 ; X64:    andl	{{.*}}[[REG]]
-; X32: test7:
+; X32-LABEL: test7:
 ; X32:    xorl	$2147483646, [[REG:%[a-z]+]]
 ; X32:    andl	{{.*}}[[REG]]
 }
@@ -137,8 +137,47 @@ entry:
   %t1 = sub i32 0, %a
   %t2 = add i32 %t1, -1
   ret i32 %t2
-; X64: test8:
+; X64-LABEL: test8:
 ; X64:   notl {{%eax|%edi|%ecx}}
-; X32: test8:
+; X32-LABEL: test8:
 ; X32:   notl %eax
 }
+
+define i32 @test9(i32 %a) nounwind {
+  %1 = and i32 %a, 4096
+  %2 = xor i32 %1, 4096
+  ret i32 %2
+; X64-LABEL: test9:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG:%[a-z]+]]
+; X32-LABEL: test9:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG:%[a-z]+]]
+}
+
+; PR15948
+define <4 x i32> @test10(<4 x i32> %a) nounwind {
+  %1 = and <4 x i32> %a, <i32 4096, i32 4096, i32 4096, i32 4096>
+  %2 = xor <4 x i32> %1, <i32 4096, i32 4096, i32 4096, i32 4096>
+  ret <4 x i32> %2
+; X64-LABEL: test10:
+; X64:    andnps
+; X32-LABEL: test10:
+; X32:    andnps
+}
+
+define i32 @PR17487(i1 %tobool) {
+  %tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
+  %tmp1 = zext <2 x i1> %tmp to <2 x i64>
+  %tmp2 = xor <2 x i64> %tmp1, <i64 1, i64 1>
+  %tmp3 = extractelement <2 x i64> %tmp2, i32 1
+  %add = add nsw i64 0, %tmp3
+  %cmp6 = icmp ne i64 %add, 1
+  %conv7 = zext i1 %cmp6 to i32
+  ret i32 %conv7
+
+; X64-LABEL: PR17487:
+; X64: andn
+; X32-LABEL: PR17487:
+; X32: andn
+}
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 5d25a2d74971..e3c3c5e31901 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -11,12 +11,12 @@ define double @foo() nounwind {
   call void @bar(double 0.0)
   ret double 0.0
 
-;CHECK-32: foo:
+;CHECK-32-LABEL: foo:
 ;CHECK-32: call
 ;CHECK-32: fldz
 ;CHECK-32: ret
 
-;CHECK-64: foo:
+;CHECK-64-LABEL: foo:
 ;CHECK-64: xorps
 ;CHECK-64: call
 ;CHECK-64: xorps
@@ -28,12 +28,12 @@ define float @foof() nounwind {
   call void @barf(float 0.0)
   ret float 0.0
 
-;CHECK-32: foof:
+;CHECK-32-LABEL: foof:
 ;CHECK-32: call
 ;CHECK-32: fldz
 ;CHECK-32: ret
 
-;CHECK-64: foof:
+;CHECK-64-LABEL: foof:
 ;CHECK-64: xorps
 ;CHECK-64: call
 ;CHECK-64: xorps
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
index 168b898f12bd..43e79c77acc2 100644
--- a/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
 define void @t() nounwind ssp {
-; CHECK: t:
+; CHECK-LABEL: t:
 entry:
   br i1 undef, label %return, label %if.end.i
 
@@ -14,7 +14,7 @@ if.end:                                           ; preds = %if.end.i
 ; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
 ; CHECK-NOT: movl [[REG]], [[REG]]
 ; CHECK-NEXT: testl [[REG]], [[REG]]
-; CHECK-NEXT: xorb
+; CHECK-NEXT: xorl
   %tmp138 = select i1 undef, i32 0, i32 %tmp7.i
   %tmp867 = zext i32 %tmp138 to i64
   br label %while.cond
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index ff93c68ff35a..a10923f7a80f 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -10,7 +10,7 @@ define i32 @test1(i8 %x) nounwind readnone {
 ; CHECK: movzbl
 ; CHECK-NEXT: andl {{.*}}224
 
-;; Multiple uses of %x but easily extensible. 
+;; Multiple uses of %x but easily extensible.
 define i32 @test2(i8 %x) nounwind readnone {
   %A = and i8 %x, -32
   %B = zext i8 %A to i32
@@ -21,8 +21,8 @@ define i32 @test2(i8 %x) nounwind readnone {
 }
 ; CHECK: test2
 ; CHECK: movzbl
-; CHECK: orl $63
 ; CHECK: andl $224
+; CHECK: orl $63
 
 declare void @use(i32, i8)
 
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index 6432ae38ff3a..5b2713dc6fc1 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -1,8 +1,9 @@
-; XFAIL: *
-; ...should pass. See PR12324: misched bringup
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom | FileCheck %s
 ; <rdar://problem/8006248>
 
+; This randomly started passing after an unrelated change, if it fails again it
+; might be worth looking at PR12324: misched bringup.
+
 @llvm.used = appending global [1 x i8*] [i8* bitcast (void ([40 x i16]*, i32*, i16**, i64*)* @func to i8*)], section "llvm.metadata"
 
 define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
@@ -32,10 +33,12 @@ entry:
   %tmp11 = sext i32 %tmp4 to i64
   %tmp12 = add i64 %tmp11, 5089792279245435153
 
-; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z]+]]
-; CHECK-NEXT: movslq	%e[[REGISTER_zext]], [[REGISTER_tmp:%[a-z]+]]
-; CHECK:      movq	[[REGISTER_tmp]], [[REGISTER_sext:%[a-z]+]]
-; CHECK-NEXT: subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
+; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
+; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
+; CHECK:      cmpl	$-8608074, %e[[REGISTER_zext]]
+; CHECK-NOT:  [[REGISTER_zext]]
+; CHECK-DAG:  testl     %e[[REGISTER_zext]]
+; CHECK:      subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
 
   %tmp13 = sub i64 %tmp12, 2138875574
   %tmp14 = zext i32 %tmp4 to i64
diff --git a/test/CodeGen/X86/zext-shl.ll b/test/CodeGen/X86/zext-shl.ll
index 928848e3f7a2..ac3ecc85f2d9 100644
--- a/test/CodeGen/X86/zext-shl.ll
+++ b/test/CodeGen/X86/zext-shl.ll
@@ -2,7 +2,7 @@
 
 define i32 @t1(i8 zeroext %x) nounwind readnone ssp {
 entry:
-; CHECK: t1:
+; CHECK-LABEL: t1:
 ; CHECK: shll
 ; CHECK-NOT: movzwl
 ; CHECK: ret
@@ -14,7 +14,7 @@ entry:
 
 define i32 @t2(i8 zeroext %x) nounwind readnone ssp {
 entry:
-; CHECK: t2:
+; CHECK-LABEL: t2:
 ; CHECK: shrl
 ; CHECK-NOT: movzwl
 ; CHECK: ret
diff --git a/test/CodeGen/X86/zext-trunc.ll b/test/CodeGen/X86/zext-trunc.ll
index b9ffbe87b21b..32afd6b96a8b 100644
--- a/test/CodeGen/X86/zext-trunc.ll
+++ b/test/CodeGen/X86/zext-trunc.ll
@@ -2,7 +2,7 @@
 ; rdar://7570931
 
 define i64 @foo(i64 %a, i64 %b) nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 ; CHECK: leal
 ; CHECK-NOT: movl
 ; CHECK: ret
diff --git a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
index 84e21e46348d..2a049639ce7e 100644
--- a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
+++ b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
@@ -13,7 +13,7 @@ allocas:
   call void @llvm.stackrestore(i8* %0)
   ret void
 }
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: ldaw [[REGISTER:r[0-9]+]], {{r[0-9]+}}[-r1]
 ; CHECK: set sp, [[REGISTER]]
 ; CHECK: extsp 1
diff --git a/test/CodeGen/XCore/2011-08-01-VarargsBug.ll b/test/CodeGen/XCore/2011-08-01-VarargsBug.ll
deleted file mode 100644
index 2076057441e8..000000000000
--- a/test/CodeGen/XCore/2011-08-01-VarargsBug.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=xcore | FileCheck %s
-define void @_Z1fz(...) {
-entry:
-; CHECK: _Z1fz:
-; CHECK: extsp 3
-; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
-; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
-; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
-; CHECK: stw r[[REG:[0-3]{1,1}]]
-; CHECK: , sp{{\[}}[[REG]]{{\]}}
-; CHECK: ldaw sp, sp[3]
-; CHECK: retsp 0
-  ret void
-}
diff --git a/test/CodeGen/XCore/addsub64.ll b/test/CodeGen/XCore/addsub64.ll
index d06248022e31..89271cea3338 100644
--- a/test/CodeGen/XCore/addsub64.ll
+++ b/test/CodeGen/XCore/addsub64.ll
@@ -27,7 +27,7 @@ entry:
 	%3 = add i64 %2, %a
 	ret i64 %3
 }
-; CHECK: maccu:
+; CHECK-LABEL: maccu:
 ; CHECK: maccu r1, r0, r3, r2
 ; CHECK-NEXT: retsp 0
 
@@ -39,7 +39,7 @@ entry:
 	%3 = add i64 %2, %a
 	ret i64 %3
 }
-; CHECK: maccs:
+; CHECK-LABEL: maccs:
 ; CHECK: maccs r1, r0, r3, r2
 ; CHECK-NEXT: retsp 0
 
@@ -54,6 +54,6 @@ entry:
 	%6 = add i64 %5, %3
 	ret i64 %6
 }
-; CHECK: lmul:
+; CHECK-LABEL: lmul:
 ; CHECK: lmul r1, r0, r1, r0, r2, r3
 ; CHECK-NEXT: retsp 0
diff --git a/test/CodeGen/XCore/aliases.ll b/test/CodeGen/XCore/aliases.ll
index d83b246a5527..b7ad416968f4 100644
--- a/test/CodeGen/XCore/aliases.ll
+++ b/test/CodeGen/XCore/aliases.ll
@@ -1,13 +1,15 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
-declare void @a_val() nounwind
-@b_val = external constant i32, section ".cp.rodata"
-@c_val = external global i32
+define void @a_val() nounwind {
+  ret void
+}
+@b_val = constant i32 42, section ".cp.rodata"
+@c_val = global i32 42
 
 @a = alias void ()* @a_val
 @b = alias i32* @b_val
 @c = alias i32* @c_val
 
-; CHECK: a_addr:
+; CHECK-LABEL: a_addr:
 ; CHECK: ldap r11, a
 ; CHECK: retsp
 define void ()* @a_addr() nounwind {
@@ -15,7 +17,7 @@ entry:
   ret void ()* @a
 }
 
-; CHECK: b_addr:
+; CHECK-LABEL: b_addr:
 ; CHECK: ldaw r11, cp[b]
 ; CHECK: retsp
 define i32 *@b_addr() nounwind {
@@ -23,7 +25,7 @@ entry:
   ret i32* @b
 }
 
-; CHECK: c_addr:
+; CHECK-LABEL: c_addr:
 ; CHECK: ldaw r0, dp[c]
 ; CHECK: retsp
 define i32 *@c_addr() nounwind {
diff --git a/test/CodeGen/XCore/alignment.ll b/test/CodeGen/XCore/alignment.ll
new file mode 100644
index 000000000000..28bdf3b74208
--- /dev/null
+++ b/test/CodeGen/XCore/alignment.ll
@@ -0,0 +1,9 @@
+; RUN: not llc < %s -march=xcore 2>&1 | FileCheck %s
+
+; CHECK: emitPrologue unsupported alignment: 8
+define void @f() nounwind {
+entry:
+  %BadAlignment = alloca i64, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
index 03b6b1f16950..78cb1440cc05 100644
--- a/test/CodeGen/XCore/ashr.ll
+++ b/test/CodeGen/XCore/ashr.ll
@@ -1,26 +1,26 @@
 ; RUN: llc < %s -march=xcore -asm-verbose=0 | FileCheck %s
-define i32 @ashr(i32 %a, i32 %b) {
+define i32 @ashr(i32 %a, i32 %b) nounwind {
 	%1 = ashr i32 %a, %b
 	ret i32 %1
 }
-; CHECK: ashr:
+; CHECK-LABEL: ashr:
 ; CHECK-NEXT: ashr r0, r0, r1
 
-define i32 @ashri1(i32 %a) {
+define i32 @ashri1(i32 %a) nounwind {
 	%1 = ashr i32 %a, 24
 	ret i32 %1
 }
-; CHECK: ashri1:
+; CHECK-LABEL: ashri1:
 ; CHECK-NEXT: ashr r0, r0, 24
 
-define i32 @ashri2(i32 %a) {
+define i32 @ashri2(i32 %a) nounwind {
 	%1 = ashr i32 %a, 31
 	ret i32 %1
 }
-; CHECK: ashri2:
+; CHECK-LABEL: ashri2:
 ; CHECK-NEXT: ashr r0, r0, 32
 
-define i32 @f1(i32 %a) {
+define i32 @f1(i32 %a) nounwind nounwind {
         %1 = icmp slt i32 %a, 0
 	br i1 %1, label %less, label %not_less
 less:
@@ -28,11 +28,11 @@ less:
 not_less:
 	ret i32 17
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK-NEXT: ashr r0, r0, 32
 ; CHECK-NEXT: bt r0
 
-define i32 @f2(i32 %a) {
+define i32 @f2(i32 %a) nounwind {
         %1 = icmp sge i32 %a, 0
 	br i1 %1, label %greater, label %not_greater
 greater:
@@ -40,37 +40,37 @@ greater:
 not_greater:
 	ret i32 17
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK-NEXT: ashr r0, r0, 32
 ; CHECK-NEXT: bt r0
 
-define i32 @f3(i32 %a) {
+define i32 @f3(i32 %a) nounwind {
         %1 = icmp slt i32 %a, 0
 	%2 = select i1 %1, i32 10, i32 17
 	ret i32 %2
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK-NEXT: ashr r0, r0, 32
 ; CHECK-NEXT: bt r0
 ; CHECK-NEXT: ldc r0, 17
 ; CHECK: ldc r0, 10
 
-define i32 @f4(i32 %a) {
+define i32 @f4(i32 %a) nounwind {
         %1 = icmp sge i32 %a, 0
 	%2 = select i1 %1, i32 10, i32 17
 	ret i32 %2
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK-NEXT: ashr r0, r0, 32
 ; CHECK-NEXT: bt r0
 ; CHECK-NEXT: ldc r0, 10
 ; CHECK: ldc r0, 17
 
-define i32 @f5(i32 %a) {
+define i32 @f5(i32 %a) nounwind {
         %1 = icmp sge i32 %a, 0
 	%2 = zext i1 %1 to i32
 	ret i32 %2
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK-NEXT: ashr r0, r0, 32
 ; CHECK-NEXT: eq r0, r0, 0
diff --git a/test/CodeGen/XCore/atomic.ll b/test/CodeGen/XCore/atomic.ll
new file mode 100644
index 000000000000..95fca9ac5b21
--- /dev/null
+++ b/test/CodeGen/XCore/atomic.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; CHECK-LABEL: atomic_fence
+; CHECK: #MEMBARRIER
+; CHECK: #MEMBARRIER
+; CHECK: #MEMBARRIER
+; CHECK: #MEMBARRIER
+; CHECK: retsp 0
+define void @atomic_fence() nounwind {
+entry:
+  fence acquire
+  fence release
+  fence acq_rel
+  fence seq_cst
+  ret void
+}
diff --git a/test/CodeGen/XCore/bigstructret.ll b/test/CodeGen/XCore/bigstructret.ll
index 56af930cc405..877c57140a1d 100644
--- a/test/CodeGen/XCore/bigstructret.ll
+++ b/test/CodeGen/XCore/bigstructret.ll
@@ -12,7 +12,7 @@ entry:
   %3 = insertvalue %0 %2, i32 24601, 3
   ret %0 %3
 }
-; CHECK: ReturnBigStruct:
+; CHECK-LABEL: ReturnBigStruct:
 ; CHECK: ldc r0, 12
 ; CHECK: ldc r1, 24
 ; CHECK: ldc r2, 48
@@ -29,7 +29,7 @@ entry:
   %4 = insertvalue %1 %3, i32 4321, 4
   ret %1 %4
 }
-; CHECK: ReturnBigStruct2:
+; CHECK-LABEL: ReturnBigStruct2:
 ; CHECK: ldc r1, 4321
 ; CHECK: stw r1, r0[4]
 ; CHECK: ldc r1, 24601
diff --git a/test/CodeGen/XCore/byVal.ll b/test/CodeGen/XCore/byVal.ll
new file mode 100644
index 000000000000..e9612fd6021a
--- /dev/null
+++ b/test/CodeGen/XCore/byVal.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; CHECK-LABEL: f0Test
+; CHECK: entsp 1
+; CHECK: bl f0
+; CHECK: retsp 1
+%struct.st0 = type { [0 x i32] }
+declare void @f0(%struct.st0*) nounwind
+define void @f0Test(%struct.st0* byval %s0) nounwind {
+entry:
+  call void @f0(%struct.st0* %s0) nounwind
+  ret void
+}
+
+; CHECK-LABEL: f1Test
+; CHECK: entsp 13
+; CHECK: stw r4, sp[12]
+; CHECK: stw r5, sp[11]
+; CHECK: mov r4, r0
+; CHECK: ldaw r5, sp[1]
+; CHECK: ldc r2, 40
+; CHECK: mov r0, r5
+; CHECK: bl memcpy
+; CHECK: mov r0, r5
+; CHECK: bl f1
+; CHECK: mov r0, r4
+; CHECK: ldw r5, sp[11]
+; CHECK: ldw r4, sp[12]
+; CHECK: retsp 13
+%struct.st1 = type { [10 x i32] }
+declare void @f1(%struct.st1*) nounwind
+define i32 @f1Test(i32 %i, %struct.st1* byval %s1) nounwind {
+entry:
+  call void @f1(%struct.st1* %s1) nounwind
+  ret i32 %i
+}
+
+; CHECK-LABEL: f2Test
+; CHECK: extsp 4
+; CHECK: stw lr, sp[1]
+; CHECK: stw r2, sp[3]
+; CHECK: stw r3, sp[4]
+; CHECK: ldw r0, r0[0]
+; CHECK: stw r0, sp[2]
+; CHECK: ldaw r2, sp[2]
+; CHECK: mov r0, r1
+; CHECK: mov r1, r2
+; CHECK: bl f2
+; CHECK: ldw lr, sp[1]
+; CHECK: ldaw sp, sp[4]
+; CHECK: retsp 0
+%struct.st2 = type { i32 }
+declare void @f2(i32, %struct.st2*) nounwind
+define void @f2Test(%struct.st2* byval %s2, i32 %i, ...) nounwind {
+entry:
+  call void @f2(i32 %i, %struct.st2* %s2)
+  ret void
+}
+
+; CHECK-LABEL: f3Test
+; CHECK: entsp 2
+; CHECK: ldc r1, 0
+; CHECK: ld8u r2, r0[r1]
+; CHECK: ldaw r0, sp[1]
+; CHECK: st8 r2, r0[r1]
+; CHECK: bl f
+; CHECK: retsp 2
+declare void @f3(i8*) nounwind
+define void @f3Test(i8* byval %v) nounwind {
+entry:
+  call void @f3(i8* %v) nounwind
+  ret void
+}
diff --git a/test/CodeGen/XCore/constants.ll b/test/CodeGen/XCore/constants.ll
index cad1a2153f4f..c289bf94ce30 100644
--- a/test/CodeGen/XCore/constants.ll
+++ b/test/CodeGen/XCore/constants.ll
@@ -3,9 +3,17 @@
 ; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
 ; CHECK: .LCPI0_0:
 ; CHECK: .long 12345678
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: ldw r0, cp[.LCPI0_0]
 define i32 @f() {
 entry:
 	ret i32 12345678
 }
+
+define i32 @g() {
+entry:
+; CHECK-LABEL: g:
+; CHECK: mkmsk r0, 1
+; CHECK: retsp 0
+  ret i32 1;
+}
diff --git a/test/CodeGen/XCore/epilogue_prologue.ll b/test/CodeGen/XCore/epilogue_prologue.ll
new file mode 100644
index 000000000000..185565f4e287
--- /dev/null
+++ b/test/CodeGen/XCore/epilogue_prologue.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; CHECK-LABEL: f1
+; CHECK: stw lr, sp[0]
+; CHECK: ldw lr, sp[0]
+; CHECK-NEXT: retsp 0
+define void @f1() nounwind {
+entry:
+  tail call void asm sideeffect "", "~{lr}"() nounwind
+  ret void
+}
+
+; CHECK-LABEL: f3
+; CHECK: entsp 2
+; CHECK: stw [[REG:r[4-9]+]], sp[1]
+; CHECK: mov [[REG]], r0
+; CHECK: bl f2
+; CHECK: mov r0, [[REG]]
+; CHECK: ldw [[REG]], sp[1]
+; CHECK: retsp 2
+declare void @f2()
+define i32 @f3(i32 %i) nounwind {
+entry:
+  call void @f2()
+  ret i32 %i
+}
diff --git a/test/CodeGen/XCore/events.ll b/test/CodeGen/XCore/events.ll
index 30a6ec35513e..672669be5602 100644
--- a/test/CodeGen/XCore/events.ll
+++ b/test/CodeGen/XCore/events.ll
@@ -6,7 +6,7 @@ declare i8* @llvm.xcore.checkevent(i8*)
 declare void @llvm.xcore.clre()
 
 define i32 @f(i8 addrspace(1)* %r) nounwind {
-; CHECK: f:
+; CHECK-LABEL: f:
 entry:
 ; CHECK: clre
   call void @llvm.xcore.clre()
@@ -25,7 +25,7 @@ ret:
 }
 
 define i32 @g(i8 addrspace(1)* %r) nounwind {
-; CHECK: g:
+; CHECK-LABEL: g:
 entry:
 ; CHECK: clre
   call void @llvm.xcore.clre()
diff --git a/test/CodeGen/XCore/exception.ll b/test/CodeGen/XCore/exception.ll
new file mode 100644
index 000000000000..8018cdcada7a
--- /dev/null
+++ b/test/CodeGen/XCore/exception.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+declare void @g()
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_allocate_exception(i32)
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+@_ZTIi = external constant i8*
+@_ZTId = external constant i8*
+
+; CHECK-LABEL: fn_typeid:
+; CHECK: .cfi_startproc
+; CHECK: mkmsk r0, 1
+; CHECK: retsp 0
+; CHECK: .cfi_endproc
+define i32 @fn_typeid() {
+entry:
+  %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  ret i32 %0
+}
+
+; CHECK-LABEL: fn_throw
+; CHECK: .cfi_startproc
+; CHECK: entsp 1
+; CHECK: .cfi_def_cfa_offset 4
+; CHECK: .cfi_offset 15, 0
+; CHECK: ldc r0, 4
+; CHECK: bl __cxa_allocate_exception
+; CHECK: ldaw r11, cp[_ZTIi]
+; CHECK: ldc r2, 0
+; CHECK: mov r1, r11
+; CHECK: bl __cxa_throw
+define void @fn_throw() {
+entry:
+  %0 = call i8* @__cxa_allocate_exception(i32 4) nounwind
+  call void @__cxa_throw(i8* %0, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
+  unreachable
+}
+
+; CHECK-LABEL: fn_catch
+; CHECK: .cfi_startproc
+; CHECK: .cfi_personality 0, __gxx_personality_v0
+; CHECK: [[START:.L[a-zA-Z0-9_]+]]
+; CHECK: .cfi_lsda 0, [[LSDA:.L[a-zA-Z0-9_]+]]
+; CHECK: entsp 4
+; CHECK: .cfi_def_cfa_offset 16
+; CHECK: .cfi_offset 15, 0
+define void @fn_catch() {
+entry:
+
+; N.B. we alloc no variables, hence force compiler to spill
+; CHECK: stw r4, sp[3]
+; CHECK: .cfi_offset 4, -4
+; CHECK: stw r5, sp[2]
+; CHECK: .cfi_offset 5, -8
+; CHECK: stw r6, sp[1]
+; CHECK: .cfi_offset 6, -12
+; CHECK: [[PRE_G:.L[a-zA-Z0-9_]+]]
+; CHECK: bl g
+; CHECK: [[POST_G:.L[a-zA-Z0-9_]+]]
+; CHECK: [[RETURN:.L[a-zA-Z0-9_]+]]
+; CHECK: ldw r6, sp[1]
+; CHECK: ldw r5, sp[2]
+; CHECK: ldw r4, sp[3]
+; CHECK: retsp 4
+  invoke void @g() to label %cont unwind label %lpad
+cont:
+  ret void
+
+; CHECK: {{.L[a-zA-Z0-9_]+}}
+; CHECK: [[LANDING:.L[a-zA-Z0-9_]+]]
+; CHECK: mov r5, r1
+; CHECK: mov r4, r0
+; CHECK: bl __cxa_begin_catch
+; CHECK: ldw r6, r0[0]
+; CHECK: bl __cxa_end_catch
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+          catch i8* bitcast (i8** @_ZTId to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  %3 = call i8* @__cxa_begin_catch(i8* %1) nounwind
+  %4 = bitcast i8* %3 to i32*
+  %5 = load i32* %4
+  call void @__cxa_end_catch() nounwind
+
+; CHECK: eq r0, r6, r5
+; CHECK: bf r0, [[RETURN]]
+; CHECK: mov r0, r4
+; CHECK: bl _Unwind_Resume
+; CHECK: .cfi_endproc
+; CHECK: [[END:.L[a-zA-Z0-9_]+]]
+  %6 = icmp eq i32 %5, %2
+  br i1 %6, label %Resume, label %Exit
+Resume:
+  resume { i8*, i32 } %0
+Exit:
+  ret void
+}
+
+; CHECK: [[LSDA]]:
+; CHECK: .byte  255
+; CHECK: .byte  0
+; CHECK: .asciiz
+; CHECK: .byte  3
+; CHECK: .byte  26
+; CHECK: [[SET0:.L[a-zA-Z0-9_]+]] = [[PRE_G]]-[[START]]
+; CHECK: .long [[SET0]]
+; CHECK: [[SET1:.L[a-zA-Z0-9_]+]] = [[POST_G]]-[[PRE_G]]
+; CHECK: .long [[SET1]]
+; CHECK: [[SET2:.L[a-zA-Z0-9_]+]] = [[LANDING]]-[[START]]
+; CHECK: .long [[SET2]]
+; CHECK: .byte 3
+; CHECK: [[SET3:.L[a-zA-Z0-9_]+]] = [[POST_G]]-[[START]]
+; CHECK: .long [[SET3]]
+; CHECK: [[SET4:.L[a-zA-Z0-9_]+]] = [[END]]-[[POST_G]]
+; CHECK: .long [[SET4]]
+; CHECK: .long 0
+; CHECK: .byte 0
+; CHECK: .byte 1
+; CHECK: .byte 0
+; CHECK: .byte 2
+; CHECK: .byte 125
+; CHECK: .long _ZTIi
+; CHECK: .long _ZTId
diff --git a/test/CodeGen/XCore/float-intrinsics.ll b/test/CodeGen/XCore/float-intrinsics.ll
index 69a40f3c79bf..588203655ff8 100644
--- a/test/CodeGen/XCore/float-intrinsics.ll
+++ b/test/CodeGen/XCore/float-intrinsics.ll
@@ -11,7 +11,7 @@ declare double @llvm.sin.f64(double)
 declare double @llvm.sqrt.f64(double)
 
 define double @cos(double %F) {
-; CHECK: cos:
+; CHECK-LABEL: cos:
 ; CHECK: bl cos
         %result = call double @llvm.cos.f64(double %F)
 	ret double %result
@@ -19,7 +19,7 @@ define double @cos(double %F) {
 
 declare float @llvm.cos.f32(float)
 
-; CHECK: cosf:
+; CHECK-LABEL: cosf:
 ; CHECK: bl cosf
 define float @cosf(float %F) {
         %result = call float @llvm.cos.f32(float %F)
@@ -27,7 +27,7 @@ define float @cosf(float %F) {
 }
 
 define double @exp(double %F) {
-; CHECK: exp:
+; CHECK-LABEL: exp:
 ; CHECK: bl exp
         %result = call double @llvm.exp.f64(double %F)
 	ret double %result
@@ -36,14 +36,14 @@ define double @exp(double %F) {
 declare float @llvm.exp.f32(float)
 
 define float @expf(float %F) {
-; CHECK: expf:
+; CHECK-LABEL: expf:
 ; CHECK: bl expf
         %result = call float @llvm.exp.f32(float %F)
 	ret float %result
 }
 
 define double @exp2(double %F) {
-; CHECK: exp2:
+; CHECK-LABEL: exp2:
 ; CHECK: bl exp2
         %result = call double @llvm.exp2.f64(double %F)
 	ret double %result
@@ -52,14 +52,14 @@ define double @exp2(double %F) {
 declare float @llvm.exp2.f32(float)
 
 define float @exp2f(float %F) {
-; CHECK: exp2f:
+; CHECK-LABEL: exp2f:
 ; CHECK: bl exp2f
         %result = call float @llvm.exp2.f32(float %F)
 	ret float %result
 }
 
 define double @log(double %F) {
-; CHECK: log:
+; CHECK-LABEL: log:
 ; CHECK: bl log
         %result = call double @llvm.log.f64(double %F)
 	ret double %result
@@ -68,14 +68,14 @@ define double @log(double %F) {
 declare float @llvm.log.f32(float)
 
 define float @logf(float %F) {
-; CHECK: logf:
+; CHECK-LABEL: logf:
 ; CHECK: bl logf
         %result = call float @llvm.log.f32(float %F)
 	ret float %result
 }
 
 define double @log10(double %F) {
-; CHECK: log10:
+; CHECK-LABEL: log10:
 ; CHECK: bl log10
         %result = call double @llvm.log10.f64(double %F)
 	ret double %result
@@ -84,14 +84,14 @@ define double @log10(double %F) {
 declare float @llvm.log10.f32(float)
 
 define float @log10f(float %F) {
-; CHECK: log10f:
+; CHECK-LABEL: log10f:
 ; CHECK: bl log10f
         %result = call float @llvm.log10.f32(float %F)
 	ret float %result
 }
 
 define double @log2(double %F) {
-; CHECK: log2:
+; CHECK-LABEL: log2:
 ; CHECK: bl log2
         %result = call double @llvm.log2.f64(double %F)
 	ret double %result
@@ -100,14 +100,14 @@ define double @log2(double %F) {
 declare float @llvm.log2.f32(float)
 
 define float @log2f(float %F) {
-; CHECK: log2f:
+; CHECK-LABEL: log2f:
 ; CHECK: bl log2f
         %result = call float @llvm.log2.f32(float %F)
 	ret float %result
 }
 
 define double @pow(double %F, double %power) {
-; CHECK: pow:
+; CHECK-LABEL: pow:
 ; CHECK: bl pow
         %result = call double @llvm.pow.f64(double %F, double %power)
 	ret double %result
@@ -116,14 +116,14 @@ define double @pow(double %F, double %power) {
 declare float @llvm.pow.f32(float, float)
 
 define float @powf(float %F, float %power) {
-; CHECK: powf:
+; CHECK-LABEL: powf:
 ; CHECK: bl powf
         %result = call float @llvm.pow.f32(float %F, float %power)
 	ret float %result
 }
 
 define double @powi(double %F, i32 %power) {
-; CHECK: powi:
+; CHECK-LABEL: powi:
 ; CHECK: bl __powidf2
         %result = call double @llvm.powi.f64(double %F, i32 %power)
 	ret double %result
@@ -132,14 +132,14 @@ define double @powi(double %F, i32 %power) {
 declare float @llvm.powi.f32(float, i32)
 
 define float @powif(float %F, i32 %power) {
-; CHECK: powif:
+; CHECK-LABEL: powif:
 ; CHECK: bl __powisf2
         %result = call float @llvm.powi.f32(float %F, i32 %power)
 	ret float %result
 }
 
 define double @sin(double %F) {
-; CHECK: sin:
+; CHECK-LABEL: sin:
 ; CHECK: bl sin
         %result = call double @llvm.sin.f64(double %F)
 	ret double %result
@@ -148,14 +148,14 @@ define double @sin(double %F) {
 declare float @llvm.sin.f32(float)
 
 define float @sinf(float %F) {
-; CHECK: sinf:
+; CHECK-LABEL: sinf:
 ; CHECK: bl sinf
         %result = call float @llvm.sin.f32(float %F)
 	ret float %result
 }
 
 define double @sqrt(double %F) {
-; CHECK: sqrt:
+; CHECK-LABEL: sqrt:
 ; CHECK: bl sqrt
         %result = call double @llvm.sqrt.f64(double %F)
 	ret double %result
@@ -164,7 +164,7 @@ define double @sqrt(double %F) {
 declare float @llvm.sqrt.f32(float)
 
 define float @sqrtf(float %F) {
-; CHECK: sqrtf:
+; CHECK-LABEL: sqrtf:
 ; CHECK: bl sqrtf
         %result = call float @llvm.sqrt.f32(float %F)
 	ret float %result
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index d442a19712f3..67ab6195aad2 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
 define i1 @test(double %F) nounwind {
 entry:
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: xor
 	%0 = fsub double -0.000000e+00, %F
 	%1 = fcmp olt double 0.000000e+00, %0
diff --git a/test/CodeGen/XCore/getid.ll b/test/CodeGen/XCore/getid.ll
index ec46071b546c..da80e10a0a66 100644
--- a/test/CodeGen/XCore/getid.ll
+++ b/test/CodeGen/XCore/getid.ll
@@ -2,7 +2,7 @@
 declare i32 @llvm.xcore.getid()
 
 define i32 @test() {
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: get r11, id
 ; CHECK-NEXT: mov r0, r11
 	%result = call i32 @llvm.xcore.getid()
diff --git a/test/CodeGen/XCore/globals.ll b/test/CodeGen/XCore/globals.ll
index 7487561dec96..b3a872bb6892 100644
--- a/test/CodeGen/XCore/globals.ll
+++ b/test/CodeGen/XCore/globals.ll
@@ -2,21 +2,21 @@
 
 define i32 *@addr_G1() {
 entry:
-; CHECK: addr_G1:
+; CHECK-LABEL: addr_G1:
 ; CHECK: ldaw r0, dp[G1]
 	ret i32* @G1
 }
 
 define i32 *@addr_G2() {
 entry:
-; CHECK: addr_G2:
+; CHECK-LABEL: addr_G2:
 ; CHECK: ldaw r0, dp[G2]
 	ret i32* @G2
 }
 
 define i32 *@addr_G3() {
 entry:
-; CHECK: addr_G3:
+; CHECK-LABEL: addr_G3:
 ; CHECK: ldaw r11, cp[G3]
 ; CHECK: mov r0, r11
 	ret i32* @G3
@@ -24,14 +24,14 @@ entry:
 
 define i32 **@addr_G4() {
 entry:
-; CHECK: addr_G4:
+; CHECK-LABEL: addr_G4:
 ; CHECK: ldaw r0, dp[G4]
 	ret i32** @G4
 }
 
 define i32 **@addr_G5() {
 entry:
-; CHECK: addr_G5:
+; CHECK-LABEL: addr_G5:
 ; CHECK: ldaw r11, cp[G5]
 ; CHECK: mov r0, r11
 	ret i32** @G5
@@ -39,14 +39,14 @@ entry:
 
 define i32 **@addr_G6() {
 entry:
-; CHECK: addr_G6:
+; CHECK-LABEL: addr_G6:
 ; CHECK: ldaw r0, dp[G6]
 	ret i32** @G6
 }
 
 define i32 **@addr_G7() {
 entry:
-; CHECK: addr_G7:
+; CHECK-LABEL: addr_G7:
 ; CHECK: ldaw r11, cp[G7]
 ; CHECK: mov r0, r11
 	ret i32** @G7
@@ -54,7 +54,7 @@ entry:
 
 define i32 *@addr_G8() {
 entry:
-; CHECK: addr_G8:
+; CHECK-LABEL: addr_G8:
 ; CHECK: ldaw r0, dp[G8]
 	ret i32* @G8
 }
@@ -90,3 +90,7 @@ entry:
 @G8 = internal global i32 9312
 ; CHECK: .section .dp.data,"awd",@progbits
 ; CHECK: G8:
+
+@array = global [10 x i16] zeroinitializer, align 2
+; CHECK: .globl  array.globound
+; CHECK:  array.globound = 10
diff --git a/test/CodeGen/XCore/indirectbr.ll b/test/CodeGen/XCore/indirectbr.ll
index 92690029cd0e..d7758ea1d57c 100644
--- a/test/CodeGen/XCore/indirectbr.ll
+++ b/test/CodeGen/XCore/indirectbr.ll
@@ -4,7 +4,7 @@
 @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
 
 define internal i32 @foo(i32 %i) nounwind {
-; CHECK: foo:
+; CHECK-LABEL: foo:
 entry:
   %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
diff --git a/test/CodeGen/XCore/inline-asm.ll b/test/CodeGen/XCore/inline-asm.ll
new file mode 100644
index 000000000000..af3edd1544a2
--- /dev/null
+++ b/test/CodeGen/XCore/inline-asm.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+; CHECK-LABEL: f1:
+; CHECK: foo r0
+define i32 @f1() nounwind {
+entry:
+  %asmtmp = tail call i32 asm sideeffect "foo $0", "=r"() nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK-LABEL: f2:
+; CHECK: foo 5
+define void @f2() nounwind {
+entry:
+  tail call void asm sideeffect "foo $0", "i"(i32 5) nounwind
+  ret void
+}
+
+; CHECK-LABEL: f3:
+; CHECK: foo 42
+define void @f3() nounwind {
+entry:
+  tail call void asm sideeffect "foo ${0:c}", "i"(i32 42) nounwind
+  ret void
+}
+
+; CHECK-LABEL: f4:
+; CHECK: foo -99
+define void @f4() nounwind {
+entry:
+  tail call void asm sideeffect "foo ${0:n}", "i"(i32 99) nounwind
+  ret void
+}
diff --git a/test/CodeGen/XCore/ladd_lsub_combine.ll b/test/CodeGen/XCore/ladd_lsub_combine.ll
index cd89966bcde7..b75e30db863d 100644
--- a/test/CodeGen/XCore/ladd_lsub_combine.ll
+++ b/test/CodeGen/XCore/ladd_lsub_combine.ll
@@ -8,7 +8,7 @@ entry:
 	%2 = add i64 %1, %0		; <i64> [#uses=1]
 	ret i64 %2
 }
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldc r2, 0
 ; CHECK-NEXT: ladd r1, r0, r1, r0, r2
 ; CHECK-NEXT: retsp 0
@@ -21,7 +21,7 @@ entry:
 	%2 = sub i64 %1, %0		; <i64> [#uses=1]
 	ret i64 %2
 }
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldc r2, 0
 ; CHECK-NEXT: lsub r1, r0, r1, r0, r2
 ; CHECK-NEXT: neg r1, r1
@@ -34,7 +34,7 @@ entry:
 	%1 = add i64 %x, %0		; <i64> [#uses=1]
 	ret i64 %1
 }
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ldc r3, 0
 ; CHECK-NEXT: ladd r2, r0, r0, r2, r3
 ; CHECK-NEXT: add r1, r1, r2
@@ -47,7 +47,7 @@ entry:
 	%1 = add i64 %0, %y		; <i64> [#uses=1]
 	ret i64 %1
 }
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldc r3, 0
 ; CHECK-NEXT: ladd r1, r0, r0, r1, r3
 ; CHECK-NEXT: add r1, r2, r1
@@ -60,7 +60,7 @@ entry:
 	%1 = sub i64 %x, %0		; <i64> [#uses=1]
 	ret i64 %1
 }
-; CHECK: f5:
+; CHECK-LABEL: f5:
 ; CHECK: ldc r3, 0
 ; CHECK-NEXT: lsub r2, r0, r0, r2, r3
 ; CHECK-NEXT: sub r1, r1, r2
diff --git a/test/CodeGen/XCore/licm-ldwcp.ll b/test/CodeGen/XCore/licm-ldwcp.ll
index 794c6bb64e39..f98c0eb56276 100644
--- a/test/CodeGen/XCore/licm-ldwcp.ll
+++ b/test/CodeGen/XCore/licm-ldwcp.ll
@@ -2,7 +2,7 @@
 
 ; MachineLICM should hoist the LDWCP out of the loop.
 
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK-NEXT: ldw [[REG:r[0-9]+]], cp[.LCPI0_0]
 ; CHECK-NEXT: .LBB0_1:
 ; CHECK-NEXT: stw [[REG]], r0[0]
diff --git a/test/CodeGen/XCore/linkage.ll b/test/CodeGen/XCore/linkage.ll
new file mode 100644
index 000000000000..7a1179b7ab6e
--- /dev/null
+++ b/test/CodeGen/XCore/linkage.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; CHECK: .weak fd
+define weak void @fd() {
+  call void @fr(i32* @gd, i32* @gr)
+  ret void
+}
+
+; CHECK-NOT: .hidden test_hidden
+declare hidden void @test_hidden_declaration()
+define hidden void @test_hidden() {
+  call void @test_hidden_declaration()
+  unreachable
+}
+
+; CHECK-NOT: .protected
+define protected void @test_protected() {
+  unreachable
+}
+
+; CHECK: .globl array.globound
+; CHECK: array.globound = 2
+; CHECK: .weak array.globound
+; CHECK: .globl array
+; CHECK: .weak array
+@array = weak global [2 x i32] zeroinitializer
+
+; CHECK: .weak gd
+@gd = weak global i32 0
+
+; CHECK-NOT: .hidden test_hidden_declaration
+
+; CHECK: .weak gr
+@gr = extern_weak global i32
+
+; CHECK: .weak fr
+declare extern_weak void @fr(i32*, i32*)
+
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
index 8756f37fe8a1..3e84c1befeab 100644
--- a/test/CodeGen/XCore/lit.local.cfg
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets:
     config.unsupported = True
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
index faff03b1e70d..0622f1cd135e 100644
--- a/test/CodeGen/XCore/load.ll
+++ b/test/CodeGen/XCore/load.ll
@@ -2,7 +2,7 @@
 
 define i32 @load32(i32* %p, i32 %offset) nounwind {
 entry:
-; CHECK: load32:
+; CHECK-LABEL: load32:
 ; CHECK: ldw r0, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	%1 = load i32* %0, align 4
@@ -11,7 +11,7 @@ entry:
 
 define i32 @load32_imm(i32* %p) nounwind {
 entry:
-; CHECK: load32_imm:
+; CHECK-LABEL: load32_imm:
 ; CHECK: ldw r0, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	%1 = load i32* %0, align 4
@@ -20,7 +20,7 @@ entry:
 
 define i32 @load16(i16* %p, i32 %offset) nounwind {
 entry:
-; CHECK: load16:
+; CHECK-LABEL: load16:
 ; CHECK: ld16s r0, r0[r1]
 ; CHECK-NOT: sext
 	%0 = getelementptr i16* %p, i32 %offset
@@ -31,7 +31,7 @@ entry:
 
 define i32 @load8(i8* %p, i32 %offset) nounwind {
 entry:
-; CHECK: load8:
+; CHECK-LABEL: load8:
 ; CHECK: ld8u r0, r0[r1]
 ; CHECK-NOT: zext
 	%0 = getelementptr i8* %p, i32 %offset
@@ -39,3 +39,12 @@ entry:
 	%2 = zext i8 %1 to i32
 	ret i32 %2
 }
+
+@GConst = external constant i32
+define i32 @load_cp() nounwind {
+entry:
+; CHECK-LABEL: load_cp:
+; CHECK: ldw r0, cp[GConst]
+  %0 = load i32* @GConst
+  ret i32 %0
+}
diff --git a/test/CodeGen/XCore/misc-intrinsics.ll b/test/CodeGen/XCore/misc-intrinsics.ll
index 6d39d77929a7..30d7493eb503 100644
--- a/test/CodeGen/XCore/misc-intrinsics.ll
+++ b/test/CodeGen/XCore/misc-intrinsics.ll
@@ -10,56 +10,56 @@ declare i32 @llvm.xcore.geted()
 declare i32 @llvm.xcore.getet()
 
 define i32 @bitrev(i32 %val) {
-; CHECK: bitrev:
+; CHECK-LABEL: bitrev:
 ; CHECK: bitrev r0, r0
 	%result = call i32 @llvm.xcore.bitrev(i32 %val)
 	ret i32 %result
 }
 
 define i32 @crc32(i32 %crc, i32 %data, i32 %poly) {
-; CHECK: crc32:
+; CHECK-LABEL: crc32:
 ; CHECK: crc32 r0, r1, r2
 	%result = call i32 @llvm.xcore.crc32(i32 %crc, i32 %data, i32 %poly)
 	ret i32 %result
 }
 
 define %0 @crc8(i32 %crc, i32 %data, i32 %poly) {
-; CHECK: crc8:
+; CHECK-LABEL: crc8:
 ; CHECK: crc8 r0, r1, r1, r2
 	%result = call %0 @llvm.xcore.crc8(i32 %crc, i32 %data, i32 %poly)
 	ret %0 %result
 }
 
 define i32 @zext(i32 %a, i32 %b) {
-; CHECK: zext:
+; CHECK-LABEL: zext:
 ; CHECK: zext r0, r1
 	%result = call i32 @llvm.xcore.zext(i32 %a, i32 %b)
 	ret i32 %result
 }
 
 define i32 @zexti(i32 %a) {
-; CHECK: zexti:
+; CHECK-LABEL: zexti:
 ; CHECK: zext r0, 4
 	%result = call i32 @llvm.xcore.zext(i32 %a, i32 4)
 	ret i32 %result
 }
 
 define i32 @sext(i32 %a, i32 %b) {
-; CHECK: sext:
+; CHECK-LABEL: sext:
 ; CHECK: sext r0, r1
 	%result = call i32 @llvm.xcore.sext(i32 %a, i32 %b)
 	ret i32 %result
 }
 
 define i32 @sexti(i32 %a) {
-; CHECK: sexti:
+; CHECK-LABEL: sexti:
 ; CHECK: sext r0, 4
 	%result = call i32 @llvm.xcore.sext(i32 %a, i32 4)
 	ret i32 %result
 }
 
 define i32 @geted() {
-; CHECK: geted:
+; CHECK-LABEL: geted:
 ; CHECK: get r11, ed
 ; CHECK-NEXT: mov r0, r11
 	%result = call i32 @llvm.xcore.geted()
@@ -67,7 +67,7 @@ define i32 @geted() {
 }
 
 define i32 @getet() {
-; CHECK: getet:
+; CHECK-LABEL: getet:
 ; CHECK: get r11, et
 ; CHECK-NEXT: mov r0, r11
 	%result = call i32 @llvm.xcore.getet()
diff --git a/test/CodeGen/XCore/mkmsk.ll b/test/CodeGen/XCore/mkmsk.ll
index 377612b7d215..bcec32d75522 100644
--- a/test/CodeGen/XCore/mkmsk.ll
+++ b/test/CodeGen/XCore/mkmsk.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
 
 define i32 @f(i32) nounwind {
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: mkmsk r0, r0
 ; CHECK-NEXT: retsp 0
 entry:
diff --git a/test/CodeGen/XCore/mul64.ll b/test/CodeGen/XCore/mul64.ll
index 3d373b194153..cfc98553d58b 100644
--- a/test/CodeGen/XCore/mul64.ll
+++ b/test/CodeGen/XCore/mul64.ll
@@ -7,7 +7,7 @@ entry:
 	%2 = mul i64 %1, %0
 	ret i64 %2
 }
-; CHECK: umul_lohi:
+; CHECK-LABEL: umul_lohi:
 ; CHECK: ldc [[REG:r[0-9]+]], 0
 ; CHECK-NEXT: lmul {{.*}}, [[REG]], [[REG]]
 ; CHECK-NEXT: retsp 0
@@ -19,7 +19,7 @@ entry:
 	%2 = mul i64 %1, %0
 	ret i64 %2
 }
-; CHECK: smul_lohi:
+; CHECK-LABEL: smul_lohi:
 ; CHECK: ldc
 ; CHECK-NEXT: mov
 ; CHECK-NEXT: maccs
@@ -30,7 +30,7 @@ entry:
 	%0 = mul i64 %a, %b
 	ret i64 %0
 }
-; CHECK: mul64:
+; CHECK-LABEL: mul64:
 ; CHECK: ldc
 ; CHECK-NEXT: lmul
 ; CHECK-NEXT: mul
@@ -42,7 +42,7 @@ entry:
 	%1 = mul i64 %a, %0
 	ret i64 %1
 }
-; CHECK: mul64_2:
+; CHECK-LABEL: mul64_2:
 ; CHECK: ldc
 ; CHECK-NEXT: lmul
 ; CHECK-NEXT: mul
diff --git a/test/CodeGen/XCore/offset_folding.ll b/test/CodeGen/XCore/offset_folding.ll
index 30edfe695c3f..8085a0fd28a7 100644
--- a/test/CodeGen/XCore/offset_folding.ll
+++ b/test/CodeGen/XCore/offset_folding.ll
@@ -5,7 +5,7 @@
 
 define i32 *@f1() nounwind {
 entry:
-; CHECK: f1:
+; CHECK-LABEL: f1:
 ; CHECK: ldaw r11, cp[a+4]
 ; CHECK: mov r0, r11
 	%0 = getelementptr [0 x i32]* @a, i32 0, i32 1
@@ -14,7 +14,7 @@ entry:
 
 define i32 *@f2() nounwind {
 entry:
-; CHECK: f2:
+; CHECK-LABEL: f2:
 ; CHECK: ldaw r0, dp[b+4]
 	%0 = getelementptr [0 x i32]* @b, i32 0, i32 1
 	ret i32* %0
@@ -25,7 +25,7 @@ entry:
 
 define i32 *@f3() nounwind {
 entry:
-; CHECK: f3:
+; CHECK-LABEL: f3:
 ; CHECK: ldaw r11, cp[a]
 ; CHECK: sub r0, r11, 4
 	%0 = getelementptr [0 x i32]* @a, i32 0, i32 -1
@@ -34,7 +34,7 @@ entry:
 
 define i32 *@f4() nounwind {
 entry:
-; CHECK: f4:
+; CHECK-LABEL: f4:
 ; CHECK: ldaw [[REG:r[0-9]+]], dp[b]
 ; CHECK: sub r0, [[REG]], 4
 	%0 = getelementptr [0 x i32]* @b, i32 0, i32 -1
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 80b7db4ce3a2..474448a50888 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -10,7 +10,7 @@ define private void @foo() {
 @baz = private global i32 4
 
 define i32 @bar() {
-; CHECK: bar:
+; CHECK-LABEL: bar:
 ; CHECK: bl .Lfoo
 ; CHECK: ldw r0, dp[.Lbaz]
         call void @foo()
diff --git a/test/CodeGen/XCore/ps-intrinsics.ll b/test/CodeGen/XCore/ps-intrinsics.ll
index 92b26c75e0e4..02609ed8d678 100644
--- a/test/CodeGen/XCore/ps-intrinsics.ll
+++ b/test/CodeGen/XCore/ps-intrinsics.ll
@@ -3,7 +3,7 @@ declare i32 @llvm.xcore.getps(i32)
 declare void @llvm.xcore.setps(i32, i32)
 
 define i32 @getps(i32 %reg) nounwind {
-; CHECK: getps:
+; CHECK-LABEL: getps:
 ; CHECK: get r0, ps[r0]
 	%result = call i32 @llvm.xcore.getps(i32 %reg)
 	ret i32 %result
@@ -11,7 +11,7 @@ define i32 @getps(i32 %reg) nounwind {
 
 
 define void @setps(i32 %reg, i32 %value) nounwind {
-; CHECK: setps:
+; CHECK-LABEL: setps:
 ; CHECK: set ps[r0], r1
 	call void @llvm.xcore.setps(i32 %reg, i32 %value)
 	ret void
diff --git a/test/CodeGen/XCore/resources.ll b/test/CodeGen/XCore/resources.ll
index 8f00fed160b8..5385010e138b 100644
--- a/test/CodeGen/XCore/resources.ll
+++ b/test/CodeGen/XCore/resources.ll
@@ -29,147 +29,147 @@ declare i32 @llvm.xcore.peek.p1i8(i8 addrspace(1)* %r)
 declare i32 @llvm.xcore.endin.p1i8(i8 addrspace(1)* %r)
 
 define i8 addrspace(1)* @getr() {
-; CHECK: getr:
+; CHECK-LABEL: getr:
 ; CHECK: getr r0, 5
 	%result = call i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 5)
 	ret i8 addrspace(1)* %result
 }
 
 define void @freer(i8 addrspace(1)* %r) {
-; CHECK: freer:
+; CHECK-LABEL: freer:
 ; CHECK: freer res[r0]
 	call void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
 	ret void
 }
 
 define i32 @in(i8 addrspace(1)* %r) {
-; CHECK: in:
+; CHECK-LABEL: in:
 ; CHECK: in r0, res[r0]
 	%result = call i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
 }
 
 define i32 @int(i8 addrspace(1)* %r) {
-; CHECK: int:
+; CHECK-LABEL: int:
 ; CHECK: int r0, res[r0]
 	%result = call i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
 }
 
 define i32 @inct(i8 addrspace(1)* %r) {
-; CHECK: inct:
+; CHECK-LABEL: inct:
 ; CHECK: inct r0, res[r0]
 	%result = call i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
 }
 
 define void @out(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: out:
+; CHECK-LABEL: out:
 ; CHECK: out res[r0], r1
 	call void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define void @outt(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: outt:
+; CHECK-LABEL: outt:
 ; CHECK: outt res[r0], r1
 	call void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define void @outct(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: outct:
+; CHECK-LABEL: outct:
 ; CHECK: outct res[r0], r1
 	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define void @outcti(i8 addrspace(1)* %r) {
-; CHECK: outcti:
+; CHECK-LABEL: outcti:
 ; CHECK: outct res[r0], 11
 	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 11)
 	ret void
 }
 
 define void @chkct(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: chkct:
+; CHECK-LABEL: chkct:
 ; CHECK: chkct res[r0], r1
 	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define void @chkcti(i8 addrspace(1)* %r) {
-; CHECK: chkcti:
+; CHECK-LABEL: chkcti:
 ; CHECK: chkct res[r0], 11
 	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 11)
 	ret void
 }
 
 define void @setd(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: setd:
+; CHECK-LABEL: setd:
 ; CHECK: setd res[r0], r1
 	call void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define void @setc(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: setc:
+; CHECK-LABEL: setc:
 ; CHECK: setc res[r0], r1
 	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define void @setci(i8 addrspace(1)* %r) {
-; CHECK: setci:
+; CHECK-LABEL: setci:
 ; CHECK: setc res[r0], 2
 	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 2)
 	ret void
 }
 
 define i32 @inshr(i32 %value, i8 addrspace(1)* %r) {
-; CHECK: inshr:
+; CHECK-LABEL: inshr:
 ; CHECK: inshr r0, res[r1]
 	%result = call i32 @llvm.xcore.inshr.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret i32 %result
 }
 
 define i32 @outshr(i32 %value, i8 addrspace(1)* %r) {
-; CHECK: outshr:
+; CHECK-LABEL: outshr:
 ; CHECK: outshr res[r1], r0
 	%result = call i32 @llvm.xcore.outshr.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret i32 %result
 }
 
 define void @setpt(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: setpt:
+; CHECK-LABEL: setpt:
 ; CHECK: setpt res[r0], r1
 	call void @llvm.xcore.setpt.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define i32 @getts(i8 addrspace(1)* %r) {
-; CHECK: getts:
+; CHECK-LABEL: getts:
 ; CHECK: getts r0, res[r0]
 	%result = call i32 @llvm.xcore.getts.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
 }
 
 define void @syncr(i8 addrspace(1)* %r) {
-; CHECK: syncr:
+; CHECK-LABEL: syncr:
 ; CHECK: syncr res[r0]
 	call void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r)
 	ret void
 }
 
 define void @settw(i8 addrspace(1)* %r, i32 %value) {
-; CHECK: settw:
+; CHECK-LABEL: settw:
 ; CHECK: settw res[r0], r1
 	call void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value)
 	ret void
 }
 
 define void @setv(i8 addrspace(1)* %r, i8* %p) {
-; CHECK: setv:
+; CHECK-LABEL: setv:
 ; CHECK: mov r11, r1
 ; CHECK-NEXT: setv res[r0], r11
 	call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
@@ -177,7 +177,7 @@ define void @setv(i8 addrspace(1)* %r, i8* %p) {
 }
 
 define void @setev(i8 addrspace(1)* %r, i8* %p) {
-; CHECK: setev:
+; CHECK-LABEL: setev:
 ; CHECK: mov r11, r1
 ; CHECK-NEXT: setev res[r0], r11
 	call void @llvm.xcore.setev.p1i8(i8 addrspace(1)* %r, i8* %p)
@@ -185,7 +185,7 @@ define void @setev(i8 addrspace(1)* %r, i8* %p) {
 }
 
 define void @eeu(i8 addrspace(1)* %r) {
-; CHECK: eeu:
+; CHECK-LABEL: eeu:
 ; CHECK: eeu res[r0]
 	call void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r)
 	ret void
@@ -213,28 +213,28 @@ define void @setpsc(i8 addrspace(1)* %r, i32 %value) {
 }
 
 define i32 @peek(i8 addrspace(1)* %r) {
-; CHECK: peek:
+; CHECK-LABEL: peek:
 ; CHECK: peek r0, res[r0]
 	%result = call i32 @llvm.xcore.peek.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
 }
 
 define i32 @endin(i8 addrspace(1)* %r) {
-; CHECK: endin:
+; CHECK-LABEL: endin:
 ; CHECK: endin r0, res[r0]
 	%result = call i32 @llvm.xcore.endin.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
 }
 
 define i32 @testct(i8 addrspace(1)* %r) {
-; CHECK: testct:
+; CHECK-LABEL: testct:
 ; CHECK: testct r0, res[r0]
 	%result = call i32 @llvm.xcore.testct.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
 }
 
 define i32 @testwct(i8 addrspace(1)* %r) {
-; CHECK: testwct:
+; CHECK-LABEL: testwct:
 ; CHECK: testwct r0, res[r0]
 	%result = call i32 @llvm.xcore.testwct.p1i8(i8 addrspace(1)* %r)
 	ret i32 %result
diff --git a/test/CodeGen/XCore/sext.ll b/test/CodeGen/XCore/sext.ll
index 9cd4ad66a5cd..b3e66ec09426 100644
--- a/test/CodeGen/XCore/sext.ll
+++ b/test/CodeGen/XCore/sext.ll
@@ -4,7 +4,7 @@ define i32 @sext1(i32 %a) {
 	%2 = sext i1 %1 to i32
 	ret i32 %2
 }
-; CHECK: sext1:
+; CHECK-LABEL: sext1:
 ; CHECK: sext r0, 1
 
 define i32 @sext2(i32 %a) {
@@ -12,7 +12,7 @@ define i32 @sext2(i32 %a) {
 	%2 = sext i2 %1 to i32
 	ret i32 %2
 }
-; CHECK: sext2:
+; CHECK-LABEL: sext2:
 ; CHECK: sext r0, 2
 
 define i32 @sext8(i32 %a) {
@@ -20,7 +20,7 @@ define i32 @sext8(i32 %a) {
 	%2 = sext i8 %1 to i32
 	ret i32 %2
 }
-; CHECK: sext8:
+; CHECK-LABEL: sext8:
 ; CHECK: sext r0, 8
 
 define i32 @sext16(i32 %a) {
@@ -28,5 +28,5 @@ define i32 @sext16(i32 %a) {
 	%2 = sext i16 %1 to i32
 	ret i32 %2
 }
-; CHECK: sext16:
+; CHECK-LABEL: sext16:
 ; CHECK: sext r0, 16
diff --git a/test/CodeGen/XCore/shedulingPreference.ll b/test/CodeGen/XCore/shedulingPreference.ll
new file mode 100644
index 000000000000..6c2ac6dce487
--- /dev/null
+++ b/test/CodeGen/XCore/shedulingPreference.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=xcore
+
+define void @f( ) {
+entry:
+
+  switch i32 undef, label %default [
+    i32 0, label %start
+  ]
+
+start:
+  br label %end
+
+default:
+  %arg = fadd double undef, undef
+  %res = call double @f2(i32 undef, double %arg, double undef)
+  br label %end
+
+end:
+  %unused = phi double [ %res, %default ], [ undef, %start ]
+
+  unreachable
+}
+
+declare double @f2(i32, double, double)
+
diff --git a/test/CodeGen/XCore/sr-intrinsics.ll b/test/CodeGen/XCore/sr-intrinsics.ll
index e12ed0380309..2c4175d94a9c 100644
--- a/test/CodeGen/XCore/sr-intrinsics.ll
+++ b/test/CodeGen/XCore/sr-intrinsics.ll
@@ -3,7 +3,7 @@ declare void @llvm.xcore.setsr(i32)
 declare void @llvm.xcore.clrsr(i32)
 
 define void @setsr() nounwind {
-; CHECK: setsr:
+; CHECK-LABEL: setsr:
 ; CHECK: setsr 128
 	call void @llvm.xcore.setsr(i32 128)
 	ret void
@@ -11,7 +11,7 @@ define void @setsr() nounwind {
 
 
 define void @clrsr() nounwind {
-; CHECK: clrsr:
+; CHECK-LABEL: clrsr:
 ; CHECK: clrsr 128
 	call void @llvm.xcore.clrsr(i32 128)
 	ret void
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
index 836b1254d67a..87553d8da18a 100644
--- a/test/CodeGen/XCore/store.ll
+++ b/test/CodeGen/XCore/store.ll
@@ -2,7 +2,7 @@
 
 define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
 entry:
-; CHECK: store32:
+; CHECK-LABEL: store32:
 ; CHECK: stw r2, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	store i32 %val, i32* %0, align 4
@@ -11,7 +11,7 @@ entry:
 
 define void @store32_imm(i32* %p, i32 %val) nounwind {
 entry:
-; CHECK: store32_imm:
+; CHECK-LABEL: store32_imm:
 ; CHECK: stw r1, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	store i32 %val, i32* %0, align 4
@@ -20,7 +20,7 @@ entry:
 
 define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
 entry:
-; CHECK: store16:
+; CHECK-LABEL: store16:
 ; CHECK: st16 r2, r0[r1]
 	%0 = getelementptr i16* %p, i32 %offset
 	store i16 %val, i16* %0, align 2
@@ -29,7 +29,7 @@ entry:
 
 define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
 entry:
-; CHECK: store8:
+; CHECK-LABEL: store8:
 ; CHECK: st8 r2, r0[r1]
 	%0 = getelementptr i8* %p, i32 %offset
 	store i8 %val, i8* %0, align 1
diff --git a/test/CodeGen/XCore/threads.ll b/test/CodeGen/XCore/threads.ll
index a0558e365cbb..c50da1d5934e 100644
--- a/test/CodeGen/XCore/threads.ll
+++ b/test/CodeGen/XCore/threads.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=xcore < %s | FileCheck %s
+; RUN: llc -march=xcore -O=0 < %s | FileCheck %s -check-prefix=PHINODE
 
 declare i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
 declare void @llvm.xcore.msync.p1i8(i8 addrspace(1)* %r)
@@ -10,58 +11,135 @@ declare void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %r, i8* %value)
 declare void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %r, i8* %value)
 declare void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %r, i8* %value)
 
-define i8 addrspace(1)* @getst(i8 addrspace(1)* %r) {
-; CHECK: getst:
+define i8 addrspace(1)* @test_getst(i8 addrspace(1)* %r) {
+; CHECK-LABEL: test_getst:
 ; CHECK: getst r0, res[r0]
-        %result = call i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
-        ret i8 addrspace(1)* %result
+  %result = call i8 addrspace(1)* @llvm.xcore.getst.p1i8.p1i8(i8 addrspace(1)* %r)
+  ret i8 addrspace(1)* %result
 }
 
-define void @ssync() {
-; CHECK: ssync:
+define void @test_ssync() {
+; CHECK-LABEL: test_ssync:
 ; CHECK: ssync
-	call void @llvm.xcore.ssync()
-	ret void
+  call void @llvm.xcore.ssync()
+  ret void
 }
 
-define void @mjoin(i8 addrspace(1)* %r) {
-; CHECK: mjoin:
+define void @test_mjoin(i8 addrspace(1)* %r) {
+; CHECK-LABEL: test_mjoin:
 ; CHECK: mjoin res[r0]
-	call void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
-	ret void
+  call void @llvm.xcore.mjoin.p1i8(i8 addrspace(1)* %r)
+  ret void
 }
 
-define void @initsp(i8 addrspace(1)* %t, i8* %src) {
-; CHECK: initsp:
+define void @test_initsp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK-LABEL: test_initsp:
 ; CHECK: init t[r0]:sp, r1
-        call void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initsp.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
-define void @initpc(i8 addrspace(1)* %t, i8* %src) {
-; CHECK: initpc:
+define void @test_initpc(i8 addrspace(1)* %t, i8* %src) {
+; CHECK-LABEL: test_initpc:
 ; CHECK: init t[r0]:pc, r1
-        call void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initpc.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
-define void @initlr(i8 addrspace(1)* %t, i8* %src) {
-; CHECK: initlr:
+define void @test_initlr(i8 addrspace(1)* %t, i8* %src) {
+; CHECK-LABEL: test_initlr:
 ; CHECK: init t[r0]:lr, r1
-        call void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initlr.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
-define void @initcp(i8 addrspace(1)* %t, i8* %src) {
-; CHECK: initcp:
+define void @test_initcp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK-LABEL: test_initcp:
 ; CHECK: init t[r0]:cp, r1
-        call void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initcp.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
 
-define void @initdp(i8 addrspace(1)* %t, i8* %src) {
-; CHECK: initdp:
+define void @test_initdp(i8 addrspace(1)* %t, i8* %src) {
+; CHECK-LABEL: test_initdp:
 ; CHECK: init t[r0]:dp, r1
-        call void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %t, i8* %src)
-        ret void
+  call void @llvm.xcore.initdp.p1i8(i8 addrspace(1)* %t, i8* %src)
+  ret void
 }
+
+@tl = thread_local global [3 x i32] zeroinitializer
+@tle = external thread_local global [2 x i32]
+
+define i32* @f_tl() {
+; CHECK-LABEL: f_tl:
+; CHECK: get r11, id
+; CHECK: ldaw [[R0:r[0-9]]], dp[tl]
+; CHECK: ldc [[R1:r[0-9]]], 8
+; CHECK: ldc [[R2:r[0-9]]], 12
+; r0 = id*12 + 8 + &tl
+; CHECK: lmul {{r[0-9]}}, r0, r11, [[R2]], [[R0]], [[R1]]
+  ret i32* getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 2)
+}
+
+define i32* @f_tle() {
+; CHECK-LABEL: f_tle:
+; CHECK: get r11, id
+; CHECK: shl [[R0:r[0-9]]], r11, 3
+; CHECK: ldaw [[R1:r[0-9]]], dp[tle]
+; r0 = &tl + id*8
+; CHECK: add r0, [[R1]], [[R0]]
+  ret i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0)
+}
+
+define i32 @f_tlExpr () {
+; CHECK-LABEL: f_tlExpr:
+; CHECK: get r11, id
+; CHECK: shl [[R0:r[0-9]]], r11, 3
+; CHECK: ldaw [[R1:r[0-9]]], dp[tle]
+; CHECK: add [[R2:r[0-9]]], [[R1]], [[R0]]
+; CHECK: add r0, [[R2]], [[R2]]
+  ret i32 add(
+      i32 ptrtoint( i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0) to i32),
+      i32 ptrtoint( i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0) to i32))
+}
+
+define void @phiNode1() {
+; N.B. lowering of duplicate constexpr in a PHI node requires -O=0
+; PHINODE-LABEL: phiNode1:
+; PHINODE: get r11, id
+; PHINODE-LABEL: .LBB11_1:
+; PHINODE: get r11, id
+; PHINODE: bu .LBB11_1
+entry:
+  br label %ConstantExpPhiNode
+ConstantExpPhiNode:
+  %ptr = phi i32* [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %entry ],
+                  [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
+  br label %ConstantExpPhiNode
+exit:
+  ret void
+}
+
+define void @phiNode2( i1 %bool) {
+; N.B. check an extra 'Node_crit_edge' (LBB12_1) is inserted
+; PHINODE-LABEL: phiNode2:
+; PHINODE: bf {{r[0-9]}}, .LBB12_3
+; PHINODE: bu .LBB12_1
+; PHINODE-LABEL: .LBB12_1:
+; PHINODE: get r11, id
+; PHINODE-LABEL: .LBB12_2:
+; PHINODE: get r11, id
+; PHINODE: bu .LBB12_2
+; PHINODE-LABEL: .LBB12_3:
+entry:
+  br i1 %bool, label %ConstantExpPhiNode, label %exit
+ConstantExpPhiNode:
+  %ptr = phi i32* [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %entry ],
+                  [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
+  br label %ConstantExpPhiNode
+exit:
+  ret void
+}
+
+; CHECK-LABEL: tl:
+; CHECK: .space  96
diff --git a/test/CodeGen/XCore/tls.ll b/test/CodeGen/XCore/tls.ll
index ed41afae0996..648d61199234 100644
--- a/test/CodeGen/XCore/tls.ll
+++ b/test/CodeGen/XCore/tls.ll
@@ -2,7 +2,7 @@
 
 define i32 *@addr_G() {
 entry:
-; CHECK: addr_G:
+; CHECK-LABEL: addr_G:
 ; CHECK: get r11, id
 	ret i32* @G
 }
diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll
index 6b42134997bf..7ca331a60673 100644
--- a/test/CodeGen/XCore/trampoline.ll
+++ b/test/CodeGen/XCore/trampoline.ll
@@ -4,7 +4,7 @@
 
 define void @f() nounwind {
 entry:
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: ldap r11, g.1101
 ; CHECK: stw r11, sp[7]
   %TRAMP.23 = alloca [20 x i8], align 2
diff --git a/test/CodeGen/XCore/trap.ll b/test/CodeGen/XCore/trap.ll
index eb71cb6acb6e..ef0dfd634009 100644
--- a/test/CodeGen/XCore/trap.ll
+++ b/test/CodeGen/XCore/trap.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
 define i32 @test() noreturn nounwind  {
 entry:
-; CHECK: test:
+; CHECK-LABEL: test:
 ; CHECK: ldc
 ; CHECK: ecallf
 	tail call void @llvm.trap( )
diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll
index 772a847bd220..b8b88275538c 100644
--- a/test/CodeGen/XCore/unaligned_load.ll
+++ b/test/CodeGen/XCore/unaligned_load.ll
@@ -10,7 +10,7 @@ entry:
 }
 
 ; Half word aligned load.
-; CHECK: align2:
+; CHECK-LABEL: align2:
 ; CHECK: ld16s
 ; CHECK: ld16s
 ; CHECK: or
@@ -23,7 +23,7 @@ entry:
 @a = global [5 x i8] zeroinitializer, align 4
 
 ; Constant offset from word aligned base.
-; CHECK: align3:
+; CHECK-LABEL: align3:
 ; CHECK: ldw {{r[0-9]+}}, dp
 ; CHECK: ldw {{r[0-9]+}}, dp
 ; CHECK: or
diff --git a/test/CodeGen/XCore/unaligned_store.ll b/test/CodeGen/XCore/unaligned_store.ll
index 94e1852faea7..27b428086d5e 100644
--- a/test/CodeGen/XCore/unaligned_store.ll
+++ b/test/CodeGen/XCore/unaligned_store.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
 
 ; Byte aligned store.
-; CHECK: align1:
+; CHECK-LABEL: align1:
 ; CHECK: bl __misaligned_store
 define void @align1(i32* %p, i32 %val) nounwind {
 entry:
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
index c997b78ee6bd..d1f4e6c15cd5 100644
--- a/test/CodeGen/XCore/unaligned_store_combine.ll
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -4,7 +4,7 @@
 ; of size 8
 define void @f(i64* %dst, i64* %src) nounwind {
 entry:
-; CHECK: f:
+; CHECK-LABEL: f:
 ; CHECK: ldc r2, 8
 ; CHECK: bl memmove
 	%0 = load i64* %src, align 1
diff --git a/test/CodeGen/XCore/varargs.ll b/test/CodeGen/XCore/varargs.ll
new file mode 100644
index 000000000000..28c293390c59
--- /dev/null
+++ b/test/CodeGen/XCore/varargs.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+define void @_Z1fz(...) {
+entry:
+; CHECK-LABEL: _Z1fz:
+; CHECK: extsp 3
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: stw r[[REG:[0-3]{1,1}]]
+; CHECK: , sp{{\[}}[[REG]]{{\]}}
+; CHECK: ldaw sp, sp[3]
+; CHECK: retsp 0
+  ret void
+}
+
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+declare void @f(i32) nounwind
+define void @test_vararg(...) nounwind {
+entry:
+; CHECK-LABEL: test_vararg
+; CHECK: extsp 6
+; CHECK: stw lr, sp[1]
+; CHECK: stw r0, sp[3]
+; CHECK: stw r1, sp[4]
+; CHECK: stw r2, sp[5]
+; CHECK: stw r3, sp[6]
+; CHECK: ldaw r0, sp[3]
+; CHECK: stw r0, sp[2]
+  %list = alloca i8*, align 4
+  %list1 = bitcast i8** %list to i8*
+  call void @llvm.va_start(i8* %list1)
+  br label %for.cond
+
+; CHECK-LABEL: .LBB1_1
+; CHECK: ldw r0, sp[2]
+; CHECK: add r1, r0, 4
+; CHECK: stw r1, sp[2]
+; CHECK: ldw r0, r0[0]
+; CHECK: bl f
+; CHECK: bu .LBB1_1
+for.cond:
+  %0 = va_arg i8** %list, i32
+  call void @f(i32 %0)
+  br label %for.cond
+
+  call void @llvm.va_end(i8* %list1)
+  ret void
+}
+
diff --git a/test/CodeGen/XCore/zext.ll b/test/CodeGen/XCore/zext.ll
new file mode 100644
index 000000000000..32abfcaed10b
--- /dev/null
+++ b/test/CodeGen/XCore/zext.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+define i32 @f(i1 %a) {
+entry:
+; CHECK: f
+; CHECK: zext r0, 1
+; CHECK: retsp 0
+  %b= zext i1 %a to i32
+  ret i32 %b
+}
diff --git a/test/CodeGen/XCore/zextfree.ll b/test/CodeGen/XCore/zextfree.ll
new file mode 100644
index 000000000000..48dce8865328
--- /dev/null
+++ b/test/CodeGen/XCore/zextfree.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+; CHECK-LABEL: test:
+; CHECK-NOT: zext
+define void @test(i8* %s1) {
+entry:
+  %u8 = load i8* %s1, align 1
+  %bool = icmp eq i8 %u8, 0
+  br label %BB1
+BB1:
+  br i1 %bool, label %BB1, label %BB2
+BB2:
+  br i1 %bool, label %BB1, label %BB2
+}
+
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
index 5bfca21b3ecb..21a60b826655 100644
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -1,18 +1,23 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
-!dbg = !{!0}
-!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !1, i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!6}
+
+!0 = metadata !{i32 786478, metadata !4, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 41, metadata !4} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 21, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 21, metadata !4, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !3 = metadata !{null}
 !4 = metadata !{metadata !"/foo", metadata !"bar.cpp"}
+!5 = metadata !{i32 458769, metadata !4, i32 12, metadata !"", i1 true, metadata !"", i32 0, metadata !3, metadata !3, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
 
 define <{i32, i32}> @f1() {
-; CHECK: !dbgx !1
+; CHECK: !dbgx ![[NUMBER:[0-9]+]]
   %r = insertvalue <{ i32, i32 }> zeroinitializer, i32 4, 1, !dbgx !1
-; CHECK: !dbgx !1
+; CHECK: !dbgx ![[NUMBER]]
   %e = extractvalue <{ i32, i32 }> %r, 0, !dbgx !1
   ret <{ i32, i32 }> %r
 }
 
 ; CHECK: [protected]
+!6 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index 13bd31039522..6fd788704bc2 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -8,19 +8,21 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !17, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"fb.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
+!6 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 720948, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 2, metadata !9, i32 1, i32 1, null} ; [ DW_TAG_variable ]
+!12 = metadata !{metadata !14}
+!14 = metadata !{i32 720948, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 2, metadata !9, i32 1, i32 1, null, null} ; [ DW_TAG_variable ]
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
-!16 = metadata !{i32 720907, metadata !5, i32 1, i32 11, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 720907, metadata !17, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{metadata !"fb.c", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
index 0b81a014ae5c..5a1045905306 100644
--- a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
@@ -2,12 +2,13 @@
 @0 = internal constant i32 1
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"g.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 139632)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
+!0 = metadata !{i32 720913, metadata !8, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"", metadata !"", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i32* @0} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !"g.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i32* @0, null} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !"g.c", metadata !"/private/tmp"}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
index 83d6ac28223e..d154c4399860 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -11,20 +11,21 @@ declare void @foo(...)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !17, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"cf.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9, metadata !""} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [bar]
+!6 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
-!9 = metadata !{metadata !10}
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 721153, metadata !5, metadata !"i", metadata !6, i32 16777219, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{metadata !11}
+!11 = metadata !{i32 721153, metadata !17, metadata !5, metadata !"i", i32 16777219, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !13 = metadata !{i32 3, i32 14, metadata !5, null}
 !14 = metadata !{i32 4, i32 3, metadata !15, null}
-!15 = metadata !{i32 720907, metadata !5, i32 3, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 720907, metadata !17, metadata !5, i32 3, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 5, i32 1, metadata !15, null}
-
+!17 = metadata !{metadata !"cf.c", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/2010-01-05-DbgScope.ll
index ad4c8d7d32ba..809cebf4eed4 100644
--- a/test/DebugInfo/2010-01-05-DbgScope.ll
+++ b/test/DebugInfo/2010-01-05-DbgScope.ll
@@ -8,11 +8,18 @@ entry:
 
 }
 
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!14}
+
 !0 = metadata !{i32 571, i32 3, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !2, i32 1, i32 1}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 561, metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+!1 = metadata !{i32 458763, metadata !11, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 561, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, metadata !11, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, metadata !12, metadata !12, metadata !13, null, null, metadata !""}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 458788, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458788, null, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 588, i32 1, metadata !2, null}
+!11 = metadata !{metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty"}
+!12 = metadata !{i32 0}
+!13 = metadata !{metadata !2}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-01-19-DbgScope.ll b/test/DebugInfo/2010-01-19-DbgScope.ll
index 7afb5a500fbf..1a7e378374cd 100644
--- a/test/DebugInfo/2010-01-19-DbgScope.ll
+++ b/test/DebugInfo/2010-01-19-DbgScope.ll
@@ -14,15 +14,22 @@ bb11:                                             ; preds = %entry
   ret i32 1, !dbg !11
 }
 
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!15}
+
 !0 = metadata !{i32 8647, i32 0, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !2}          ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 1, metadata !"c-parser.c", metadata !"llvmgcc", metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 458763, metadata !12, metadata !2, i32 0, i32 0, i32 0}          ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, null, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, metadata !12, i32 1, metadata !"LLVM build 00", i1 true, metadata !"", i32 0, metadata !13, metadata !13, metadata !14, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 458788, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458788, null, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8648, i32 0, metadata !8, null}
-!8 = metadata !{i32 458763, metadata !9}          ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 458763, metadata !10}         ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar2", metadata !"bar2", metadata !"bar2", metadata !3, i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 458763, metadata !12, metadata !9, i32 0, i32 0, i32 0}          ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 458763, metadata !12, metadata !10, i32 0, i32 0, i32 0}         ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, null, metadata !3, metadata !"bar2", metadata !"bar2", metadata !"bar2", i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 8652, i32 0, metadata !1, null}
+!12 = metadata !{metadata !"c-parser.c", metadata !"llvmgcc"}
+!13 = metadata !{i32 0}
+!14 = metadata !{metadata !2}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-03-12-llc-crash.ll b/test/DebugInfo/2010-03-12-llc-crash.ll
index f6de23495853..241bb3734c98 100644
--- a/test/DebugInfo/2010-03-12-llc-crash.ll
+++ b/test/DebugInfo/2010-03-12-llc-crash.ll
@@ -10,11 +10,13 @@ entry:
 }
 
 !0 = metadata !{i32 524545, metadata !1, metadata !"sy", metadata !2, i32 890, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 892, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"qpainter.h", metadata !"QtGui", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"splineeditor.cpp", metadata !"editor", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !5, metadata !"", metadata !5, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!5 = metadata !{i32 524329, metadata !"splineeditor.cpp", metadata !"src", metadata !3} ; [ DW_TAG_file_type ]
+!1 = metadata !{i32 524334, metadata !8, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 892, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !8} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, metadata !9, i32 4, metadata !"clang 1.1", i1 true, metadata !"", i32 0, metadata !10, metadata !10, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !9, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 524329, metadata !9} ; [ DW_TAG_file_type ]
 !6 = metadata !{null}
-!7 = metadata !{i32 524324, metadata !5, metadata !"int", metadata !5, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-
+!7 = metadata !{i32 524324, metadata !9, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !"qpainter.h", metadata !"QtGui"}
+!9 = metadata !{metadata !"splineeditor.cpp", metadata !"src"}
+!10 = metadata !{i32 0}
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
index 9f52d1158a15..d1afade4ad9f 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -8,6 +8,7 @@ entry:
   ret void
 }
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!5}
 !2 = metadata !{i32 786449, metadata !4, i32 32769, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [lang 0x8001]
 !3 = metadata !{}
 !0 = metadata !{i32 662302, i32 26, metadata !1, null}
@@ -15,3 +16,4 @@ entry:
 !4 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
index 15197f4461a6..1689fe6c91a8 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -37,21 +37,22 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!28}
 
 !0 = metadata !{i32 786688, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 786443, metadata !2, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786443, metadata !3, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786443, metadata !25, metadata !2, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 786443, metadata !25, metadata !3, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !25} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, i32 4, metadata !4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !24, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786449, metadata !25, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !27, metadata !27, metadata !24, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786468, metadata !25, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 2, size 8, align 8, offset 0] [def] [from ]
 !10 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !10, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786478, metadata !26, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786453, metadata !25, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{metadata !8, metadata !15}
 !15 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{i32 3, i32 0, metadata !1, null}
@@ -61,7 +62,9 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !20 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
 !21 = metadata !{i32 3, i32 0, metadata !12, null}
 !22 = metadata !{i32 3, i32 0, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !12, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !26, metadata !12, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{metadata !3, metadata !12}
 !25 = metadata !{metadata !"one.cc", metadata !"/tmp/"}
 !26 = metadata !{metadata !"one.h", metadata !"/tmp/"}
+!27 = metadata !{i32 0}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
index accdf8a86cb7..81285a9b2c90 100644
--- a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
+++ b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
@@ -8,23 +8,29 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!22}
+
 !0 = metadata !{{ [0 x i8] }** undef}
 !1 = metadata !{i32 524544, metadata !2, metadata !"x", metadata !4, i32 11, metadata !9} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 524299, metadata !3, i32 8, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"baz", metadata !"baz", metadata !"baz", metadata !4, i32 8, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !"2007-12-VarArrayDebug.c", metadata !"/Users/sabre/llvm/test/FrontendC/", metadata !5} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"2007-12-VarArrayDebug.c", metadata !"/Users/sabre/llvm/test/FrontendC/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 524299, metadata !20, metadata !3, i32 8, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, metadata !20, null, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !20} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !20, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524307, metadata !3, metadata !"", metadata !4, i32 11, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 524324, metadata !20, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524303, metadata !20, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 524307, metadata !20, metadata !3, metadata !"", i32 11, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 11, size 8, align 8, offset 0] [def] [from ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 524301, metadata !10, metadata !"b", metadata !4, i32 11, i64 8, i64 8, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ]
-!13 = metadata !{i32 524310, metadata !3, metadata !"A", metadata !4, i32 11, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
-!14 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null} ; [ DW_TAG_array_type ]
-!15 = metadata !{i32 524324, metadata !4, metadata !"char", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 524301, metadata !20, metadata !10, metadata !"b", i32 11, i64 8, i64 8, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524310, metadata !20, metadata !3, metadata !"A", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
+!14 = metadata !{i32 524289, metadata !20, metadata !4, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!15 = metadata !{i32 524324, metadata !20, metadata !4, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{metadata !"llvm.mdnode.fwdref.19"}
 !19 = metadata !{metadata !"llvm.mdnode.fwdref.23"}
+!20 = metadata !{metadata !"2007-12-VarArrayDebug.c", metadata !"/Users/sabre/llvm/test/FrontendC/"}
+!21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index 7f8e418c9bd7..4d4d61665c07 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -51,35 +51,36 @@ entry:
 }
 
 !llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!40}
 !37 = metadata !{metadata !2, metadata !10, metadata !23}
 
 !0 = metadata !{i32 786688, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 786443, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786478, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 786473, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang 1.5", i1 false, metadata !"", i32 0, null, null, metadata !37, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786443, metadata !38, metadata !2, i32 15, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 786478, metadata !38, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 786473, metadata !38} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786449, metadata !38, i32 4, metadata !"clang 1.5", i1 false, metadata !"", i32 0, metadata !39, metadata !39, metadata !37, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, metadata !3, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786434, metadata !3, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
+!7 = metadata !{i32 786468, metadata !38, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786434, metadata !38, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_class_type ] [B] [line 2, size 8, align 8, offset 0] [def] [from ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 786478, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !38, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !7, metadata !13}
-!13 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786447, metadata !38, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 16, i32 5, metadata !1, null}
 !15 = metadata !{i32 17, i32 3, metadata !1, null}
 !16 = metadata !{i32 18, i32 1, metadata !2, null}
 !17 = metadata !{i32 786689, metadata !10, metadata !"this", metadata !3, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 4, i32 7, metadata !10, null}
 !19 = metadata !{i32 786688, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21, i32 0, null} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 786443, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 786434, metadata !3, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
+!20 = metadata !{i32 786443, metadata !38, metadata !10, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 786434, metadata !38, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 5, size 8, align 8, offset 0] [def] [from ]
 !22 = metadata !{metadata !23}
-!23 = metadata !{i32 786478, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{i32 786478, metadata !38, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !25 = metadata !{metadata !7, metadata !26}
-!26 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
+!26 = metadata !{i32 786447, metadata !38, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
 !27 = metadata !{i32 9, i32 7, metadata !20, null}
 !28 = metadata !{i32 786688, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 10, i32 9, metadata !20, null}
@@ -89,4 +90,7 @@ entry:
 !33 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !3, i32 7, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
 !34 = metadata !{i32 7, i32 11, metadata !23, null}
 !35 = metadata !{i32 7, i32 19, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !23, i32 7, i32 17} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !38, metadata !23, i32 7, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{metadata !"one.cc", metadata !"/tmp" }
+!39 = metadata !{i32 0}
+!40 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
index 88eebe6f3b6f..4af2fdcdfa8a 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -1,6 +1,6 @@
-; RUN: llc -asm-verbose -O0 -o %t < %s 
+; RUN: llc -asm-verbose -O1 -o %t < %s 
 ; RUN: grep DW_AT_APPLE_omit_frame_ptr %t
-; RUN: llc -disable-fp-elim -asm-verbose -O0 -o %t < %s 
+; RUN: llc -disable-fp-elim -asm-verbose -O1 -o %t < %s 
 ; RUN: grep -v DW_AT_APPLE_omit_frame_ptr %t
 
 
@@ -20,14 +20,18 @@ return:                                           ; preds = %entry
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!12}
 !9 = metadata !{metadata !1}
 
 !0 = metadata !{i32 2, i32 0, metadata !1, null}
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786478, metadata !10, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, metadata !10, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !10, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 2, i32 0, metadata !8, null}
-!8 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786443, metadata !10, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{metadata !"a.c", metadata !"/tmp"}
+!11 = metadata !{i32 0}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
index 4061bdc834d7..ba8d0e581cd1 100644
--- a/test/DebugInfo/2010-05-03-DisableFramePtr.ll
+++ b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
@@ -1,8 +1,8 @@
-; RUN: llc  -o /dev/null -disable-non-leaf-fp-elim < %s
+; RUN: llc  -o /dev/null < %s
 ; Radar 7937664
 %struct.AppleEvent = type opaque
 
-define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) nounwind ssp {
+define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) "no-frame-pointer-elim-non-leaf" nounwind ssp {
 entry:
   %userUPP_addr = alloca void (%struct.AppleEvent*)* ; <void (%struct.AppleEvent*)**> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
@@ -16,19 +16,25 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"userUPP", metadata !2, i32 7, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!19}
+!0 = metadata !{i32 524545, metadata !1, metadata !"userUPP", metadata !2, i32 7, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, metadata !16, null, metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !16} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, metadata !16, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !18, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 524310, metadata !2, metadata !"DMNotificationUPP", metadata !2, i32 6, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 524310, metadata !16, metadata !2, metadata !"DMNotificationUPP", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524303, metadata !16, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !9 = metadata !{null, metadata !10}
-!10 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 524310, metadata !2, metadata !"AppleEvent", metadata !2, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 524307, metadata !2, metadata !"AEDesc", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 524303, metadata !16, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 524310, metadata !16, metadata !2, metadata !"AppleEvent", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 524307, metadata !16, metadata !2, metadata !"AEDesc", i32 1, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [AEDesc] [line 1, size 0, align 0, offset 0] [decl] [from ]
 !13 = metadata !{i32 7, i32 0, metadata !1, null}
 !14 = metadata !{i32 8, i32 0, metadata !15, null}
-!15 = metadata !{i32 524299, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 524299, metadata !16, metadata !1, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/"}
+!17 = metadata !{i32 0}
+!18 = metadata !{metadata !1}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index 1ade04504631..0c5d876bf05b 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -48,39 +48,47 @@ declare i64 @llvm.bswap.i64(i64) nounwind readnone
 
 declare void @uuid_LtoB(i8*, i8*)
 
+!llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!41}
 !0 = metadata !{i32 808, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !2, i32 807, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !3, i32 807, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 524329, metadata !"G.c", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 524305, i32 0, i32 1, metadata !"G.c", metadata !"/tmp", metadata !"llvm-gcc", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 524299, metadata !39, metadata !2, i32 807, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524334, metadata !39, null, metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !"gpt2gpm", i32 807, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 524329, metadata !39} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, metadata !39, i32 1, metadata !"llvm-gcc", i1 true, metadata !"", i32 0, metadata !18, metadata !18, metadata !40, null, null, i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 524309, metadata !39, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{null}
 !7 = metadata !{i32 810, i32 0, metadata !1, null}
 !8 = metadata !{i32 524545, metadata !9, metadata !"data", metadata !10, i32 201, metadata !11} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 524334, i32 0, metadata !3, metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !10, i32 202, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 524334, metadata !10, null, metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", i32 202, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !10 = metadata !{i32 524329, metadata !"OSByteOrder.h", metadata !"/usr/include/libkern/ppc", metadata !4} ; [ DW_TAG_file_type ]
-!11 = metadata !{i32 524310, metadata !3, metadata !"uint64_t", metadata !12, i32 59, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!11 = metadata !{i32 524310, metadata !36, metadata !3, metadata !"uint64_t", i32 59, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
 !12 = metadata !{i32 524329, metadata !"stdint.h", metadata !"/usr/4.2.1/include", metadata !4} ; [ DW_TAG_file_type ]
-!13 = metadata !{i32 524324, metadata !3, metadata !"long long unsigned int", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 202, i32 0, metadata !9, metadata !7}
 !15 = metadata !{i32 524545, metadata !16, metadata !"base", metadata !10, i32 92, metadata !17} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 524334, i32 0, metadata !3, metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !10, i32 95, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 524334, metadata !38, null, metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", i32 95, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524303, metadata !39, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !18 = metadata !{i32 0}
 !19 = metadata !{i32 524545, metadata !16, metadata !"byteOffset", metadata !10, i32 94, metadata !20} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 524310, metadata !3, metadata !"uintptr_t", metadata !21, i32 114, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 524310, metadata !37, metadata !3, metadata !"uintptr_t", i32 114, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
 !21 = metadata !{i32 524329, metadata !"types.h", metadata !"/usr/include/ppc", metadata !4} ; [ DW_TAG_file_type ]
-!22 = metadata !{i32 524324, metadata !3, metadata !"long unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!22 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !23 = metadata !{i32 524544, metadata !24, metadata !"u", metadata !10, i32 100, metadata !25} ; [ DW_TAG_auto_variable ]
-!24 = metadata !{i32 524299, metadata !16, i32 95, i32 0} ; [ DW_TAG_lexical_block ]
-!25 = metadata !{i32 524311, metadata !16, metadata !"", metadata !10, i32 97, i64 64, i64 64, i64 0, i32 0, null, metadata !26, i32 0, null} ; [ DW_TAG_union_type ]
+!24 = metadata !{i32 524299, metadata !38, metadata !16, i32 95, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 524311, metadata !38, metadata !16, metadata !"", i32 97, i64 64, i64 64, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_union_type ] [line 97, size 64, align 64, offset 0] [def] [from ]
 !26 = metadata !{metadata !27, metadata !28}
-!27 = metadata !{i32 524301, metadata !25, metadata !"u64", metadata !10, i32 98, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
-!28 = metadata !{i32 524301, metadata !25, metadata !"u32", metadata !10, i32 99, i64 64, i64 32, i64 0, i32 0, metadata !29} ; [ DW_TAG_member ]
-!29 = metadata !{i32 524289, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 32, i64 0, i32 0, metadata !30, metadata !32, i32 0, null} ; [ DW_TAG_array_type ]
-!30 = metadata !{i32 524310, metadata !3, metadata !"uint32_t", metadata !12, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
-!31 = metadata !{i32 524324, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!27 = metadata !{i32 524301, metadata !38, metadata !25, metadata !"u64", i32 98, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
+!28 = metadata !{i32 524301, metadata !38, metadata !25, metadata !"u32", i32 99, i64 64, i64 32, i64 0, i32 0, metadata !29} ; [ DW_TAG_member ]
+!29 = metadata !{i32 524289, metadata !39, metadata !3, metadata !"", i32 0, i64 64, i64 32, i64 0, i32 0, metadata !30, metadata !32, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from uint32_t]
+!30 = metadata !{i32 524310, metadata !36, metadata !3, metadata !"uint32_t", i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
+!31 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !32 = metadata !{metadata !33}
 !33 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
 !34 = metadata !{i32 524544, metadata !24, metadata !"addr", metadata !10, i32 96, metadata !35} ; [ DW_TAG_auto_variable ]
-!35 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!35 = metadata !{i32 524303, metadata !39, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!36 = metadata !{metadata !"stdint.h", metadata !"/usr/4.2.1/include"}
+!37 = metadata !{metadata !"types.h", metadata !"/usr/include/ppc"}
+!38 = metadata !{metadata !"OSByteOrder.h", metadata !"/usr/include/libkern/ppc"}
+!39 = metadata !{metadata !"G.c", metadata !"/tmp"}
+!40 = metadata !{metadata !2, metadata !9, metadata !16}
+!41 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
index 75e4389afef8..ad7c7d1614a5 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -27,22 +27,27 @@ return:
 }
 
 !llvm.dbg.cu = !{!4, !12}
+!llvm.module.flags = !{!21}
 !16 = metadata !{metadata !2}
 !17 = metadata !{metadata !10}
 
 !0 = metadata !{i32 3, i32 0, metadata !1, null}
-!1 = metadata !{i32 786443, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786478, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, i32 1, metadata !3, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !16, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786443, metadata !18, metadata !2, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 786478, metadata !18, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !16, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786453, metadata !18, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, metadata !18, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 3, i32 0, metadata !9, null}
-!9 = metadata !{i32 786443, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 786478, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786473, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ]
-!12 = metadata !{i32 786449, i32 1, metadata !11, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!13 = metadata !{i32 786453, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786443, metadata !20, metadata !10, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786478, metadata !20, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!12 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!13 = metadata !{i32 786453, metadata !20, metadata !11, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{metadata !15}
-!15 = metadata !{i32 786468, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!15 = metadata !{i32 786468, metadata !20, metadata !11, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!18 = metadata !{metadata !"a.c", metadata !"/tmp/"}
+!19 = metadata !{i32 0}
+!20 = metadata !{metadata !"b.c", metadata !"/tmp/"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index f5ebb2d80c48..50a34222fe63 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -2,7 +2,7 @@
 ; Check struct X for dead variable xyz from inlined function foo.
 
 ; CHECK:	DW_TAG_structure_type
-; CHECK-NEXT:	DW_AT_name
+; CHECK-NEXT:	info_string
  
 
 @i = common global i32 0                          ; <i32*> [#uses=2]
@@ -22,24 +22,25 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!28}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !27, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26,  metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !27, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !20, metadata !20, metadata !25, metadata !26,  metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5, metadata !5}
 !5 = metadata !{i32 786468, metadata !27, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786478, metadata !27, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !5}
 !9 = metadata !{i32 786689, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786688, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12, i32 0, null} ; [ DW_TAG_auto_variable ]
 !11 = metadata !{i32 786443, metadata !1, metadata !0, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 10, size 64, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !15}
 !14 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"a", i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
 !15 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"b", i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
-!16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i, null} ; [ DW_TAG_variable ]
 !17 = metadata !{i32 15, i32 0, metadata !18, null}
 !18 = metadata !{i32 786443, metadata !1, metadata !6, i32 14, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{i32 9, i32 0, metadata !0, metadata !17}
@@ -51,3 +52,4 @@ entry:
 !25 = metadata !{metadata !0, metadata !6}
 !26 = metadata !{metadata !16}
 !27 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll
index 87a4a8955a3e..6b6e61ddc28a 100644
--- a/test/DebugInfo/2010-07-19-Crash.ll
+++ b/test/DebugInfo/2010-07-19-Crash.ll
@@ -7,18 +7,24 @@ entry:
   ret i32 42, !dbg !9
 }
 
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!15}
 !llvm.dbg.sp = !{!0, !6, !11}
 !llvm.dbg.lv.foo = !{!7}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"one.c", metadata !".", metadata !"clang 2.8", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, metadata !12, i32 12, metadata !"clang 2.8", i1 true, metadata !"", i32 0, metadata !14, metadata !14, metadata !13, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 524324, metadata !12, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !7 = metadata !{i32 524544, metadata !8, metadata !"one", metadata !1, i32 8, metadata !5} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 524299, metadata !6, i32 7, i32 18} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 524299, metadata !12, metadata !6, i32 7, i32 18, i32 0} ; [ DW_TAG_lexical_block ]
 !9 = metadata !{i32 4, i32 3, metadata !10, null}
-!10 = metadata !{i32 524299, metadata !0, i32 3, i32 11} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 524299, metadata !12, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!12 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
+!13 = metadata !{metadata !0, metadata !6, metadata !11}
+!14 = metadata !{i32 0}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
index c4161b49426d..f8dbb6eb3c97 100644
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ b/test/DebugInfo/2010-10-01-crash.ll
@@ -13,10 +13,14 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !1, i32 54, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32*, i32*)* @CGRectStandardize} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop", metadata !2}
-!2 = metadata !{i32 589841, i32 0, i32 16, metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop", metadata !"clang version 2.9 (trunk 115292)", i1 true, i1 false, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 589846, metadata !1, metadata !"CGRect", metadata !1, i32 49, i64 0, i64 0, i64 0, i32 0, null}
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!27}
+!0 = metadata !{i32 589870, metadata !1, null, metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !"CGRectStandardize", i32 54, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32*, i32*)* @CGRectStandardize, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 54] [def] [scope 0] [CGRectStandardize]
+!1 = metadata !{i32 589865, metadata !25}
+!2 = metadata !{i32 589841, metadata !25, i32 16, metadata !"clang version 2.9 (trunk 115292)", i1 true, metadata !"", i32 1, metadata !26, metadata !26, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589846, metadata !25, null, metadata !"CGRect", i32 49, i64 0, i64 0, i64 0, i32 0, null}
 !23 = metadata !{i32 590081, metadata !0, metadata !"rect", metadata !1, i32 53, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 53, i32 33, metadata !0, null}
-
+!25 = metadata !{metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop"}
+!26 = metadata !{i32 0}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
index bcdd4625b603..4c205077e6d8 100644
--- a/test/DebugInfo/AArch64/dwarfdump.ll
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -21,14 +21,16 @@ define i32 @main() nounwind {
 attributes #0 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10}
 
 !0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
-!3 = metadata !{i32 786478, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!3 = metadata !{i32 786478, metadata !9, metadata !4, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
 !4 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !8 = metadata !{i32 2, i32 0, metadata !3, null}
 !9 = metadata !{metadata !"tmp.c", metadata !"/home/tim/llvm/build"}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg
index c5ce2411ed48..9a66a00189ea 100644
--- a/test/DebugInfo/AArch64/lit.local.cfg
+++ b/test/DebugInfo/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
index ba9e13aa2520..f42cb746480b 100644
--- a/test/DebugInfo/AArch64/variable-loc.ll
+++ b/test/DebugInfo/AArch64/variable-loc.ll
@@ -16,24 +16,25 @@
 ;     return 0;
 ; }
 
-  ; First make sure main_arr is where we expect it: sp + 12 == x29 - 420:
+  ; First make sure main_arr is where we expect it: sp + 4 == x29 - 412:
 ; CHECK: main:
-; CHECK: sub sp, sp, #448
-; CHECK: stp x29, x30, [sp, #432]
-; CHECK: add x29, sp, #432
-; CHECK: add {{x[0-9]+}}, sp, #12
+; CHECK: sub sp, sp, #432
+; CHECK: stp x29, x30, [sp, #416]
+; CHECK: add x29, sp, #416
+; CHECK: add {{x[0-9]+}}, sp, #4
 
-  ; Now check the debugging information reflects this:
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+
+; Now check the debugging information reflects this:
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: .word .Linfo_string7
 
-  ; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is LEB128 encoded -420.
+  ; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is LEB128 encoded -412.
 ; CHECK: DW_AT_location
 ; CHECK-NEXT: .byte 145
-; CHECK-NEXT: .ascii "\334|"
+; CHECK-NEXT: .ascii "\344|"
 
-; CHECK: .Linfo_string7:
-; CHECK-NEXT: main_arr
 
 
 target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128"
@@ -68,25 +69,26 @@ entry:
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !11, metadata !14}
-!5 = metadata !{i32 786478, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
 !6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !10}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786478, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !10, metadata !9, metadata !10}
-!14 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !10}
 !17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
-!18 = metadata !{i32 786443, metadata !6, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
 !20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
 !22 = metadata !{i32 19, i32 7, metadata !18, null}
 !23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
@@ -96,3 +98,4 @@ declare i32 @printf(i8*, ...)
 !27 = metadata !{i32 24, i32 3, metadata !18, null}
 !28 = metadata !{i32 26, i32 3, metadata !18, null}
 !29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/ARM/PR16736.ll b/test/DebugInfo/ARM/PR16736.ll
new file mode 100644
index 000000000000..d01fa22318db
--- /dev/null
+++ b/test/DebugInfo/ARM/PR16736.ll
@@ -0,0 +1,65 @@
+; RUN: llc -filetype=asm < %s | FileCheck %s
+; CHECK: @DEBUG_VALUE: h:x <- [R{{.*}}+{{.*}}]
+; generated from:
+; clang -cc1 -triple  thumbv7 -S -O1 arm.cpp  -g
+;
+; int f();
+; void g(float);
+; void h(int, int, int, int, float x) {
+;    g(x = f());
+; }
+;
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n32-S64"
+target triple = "thumbv7-apple-ios"
+
+; Function Attrs: nounwind
+define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !12), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !13), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i32 %2}, i64 0, metadata !14), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i32 %3}, i64 0, metadata !15), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{float %x}, i64 0, metadata !16), !dbg !18
+  %call = tail call arm_aapcscc i32 @_Z1fv() #3, !dbg !19
+  %conv = sitofp i32 %call to float, !dbg !19
+  tail call void @llvm.dbg.value(metadata !{float %conv}, i64 0, metadata !16), !dbg !19
+  tail call arm_aapcscc void @_Z1gf(float %conv) #3, !dbg !19
+  ret void, !dbg !20
+}
+
+declare arm_aapcscc void @_Z1gf(float)
+
+declare arm_aapcscc i32 @_Z1fv()
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { nounwind  }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !21}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 190804) (llvm/trunk 190797)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [//<unknown>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"/<unknown>", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"h", metadata !"h", metadata !"_Z1hiiiif", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32, i32, i32, float)* @_Z1hiiiif, null, null, metadata !11, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [h]
+!5 = metadata !{metadata !"/arm.cpp", metadata !""}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [//arm.cpp]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !9, metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!11 = metadata !{metadata !12, metadata !13, metadata !14, metadata !15, metadata !16}
+!12 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!13 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 33554435, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!14 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 50331651, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!15 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 67108867, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
+!16 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 83886083, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 3]
+!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!18 = metadata !{i32 3, i32 0, metadata !4, null}
+!19 = metadata !{i32 4, i32 0, metadata !4, null}
+!20 = metadata !{i32 5, i32 0, metadata !4, null}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/ARM/lit.local.cfg b/test/DebugInfo/ARM/lit.local.cfg
new file mode 100644
index 000000000000..8a3ba96497e7
--- /dev/null
+++ b/test/DebugInfo/ARM/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
new file mode 100644
index 000000000000..0378c7514d84
--- /dev/null
+++ b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -0,0 +1,103 @@
+; RUN: opt  -instcombine %s -S | FileCheck %s
+;
+; Generate me from:
+; clang -cc1 -triple thumbv7-apple-ios7.0.0 -S -target-abi apcs-gnu -gdwarf-2 -Os test.c -o test.ll -emit-llvm
+; void run(float r)
+; {
+;   int count = r;
+;   float vla[count];
+;   vla[0] = r;
+;   for (int i = 0; i < count; i++)
+;     vla[i] /= r;
+; }
+; rdar://problem/15464571
+;
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios8.0.0"
+
+; Function Attrs: nounwind optsize readnone
+define void @run(float %r) #0 {
+entry:
+  tail call void @llvm.dbg.declare(metadata !{float %r}, metadata !11), !dbg !22
+  %conv = fptosi float %r to i32, !dbg !23
+  tail call void @llvm.dbg.declare(metadata !{i32 %conv}, metadata !12), !dbg !23
+  %vla = alloca float, i32 %conv, align 4, !dbg !24
+  tail call void @llvm.dbg.declare(metadata !{float* %vla}, metadata !14), !dbg !24
+; The VLA alloca should be described by a dbg.declare:
+; CHECK: call void @llvm.dbg.declare(metadata !{float* %vla}, metadata ![[VLA:.*]])
+; The VLA alloca and following store into the array should not be lowered to like this:
+; CHECK-NOT:  call void @llvm.dbg.value(metadata !{float %r}, i64 0, metadata ![[VLA]])
+; the backend interprets this as "vla has the location of %r".
+  store float %r, float* %vla, align 4, !dbg !25, !tbaa !26
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !18), !dbg !30
+  %cmp8 = icmp sgt i32 %conv, 0, !dbg !30
+  br i1 %cmp8, label %for.body, label %for.end, !dbg !30
+
+for.body:                                         ; preds = %entry, %for.body.for.body_crit_edge
+  %0 = phi float [ %.pre, %for.body.for.body_crit_edge ], [ %r, %entry ]
+  %i.09 = phi i32 [ %inc, %for.body.for.body_crit_edge ], [ 0, %entry ]
+  %arrayidx2 = getelementptr inbounds float* %vla, i32 %i.09, !dbg !31
+  %div = fdiv float %0, %r, !dbg !31
+  store float %div, float* %arrayidx2, align 4, !dbg !31, !tbaa !26
+  %inc = add nsw i32 %i.09, 1, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !18), !dbg !30
+  %exitcond = icmp eq i32 %inc, %conv, !dbg !30
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !dbg !30
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %arrayidx2.phi.trans.insert = getelementptr inbounds float* %vla, i32 %inc
+  %.pre = load float* %arrayidx2.phi.trans.insert, align 4, !dbg !31, !tbaa !26
+  br label %for.body, !dbg !30
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20, !33}
+!llvm.ident = !{!21}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/radar/15464571/<unknown>] [DW_LANG_C99]
+!1 = metadata !{metadata !"<unknown>", metadata !"/Volumes/Data/radar/15464571"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (float)* @run, null, null, metadata !10, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [run]
+!5 = metadata !{metadata !"test.c", metadata !"/Volumes/Data/radar/15464571"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15464571/test.c]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!10 = metadata !{metadata !11, metadata !12, metadata !14, metadata !18}
+!11 = metadata !{i32 786689, metadata !4, metadata !"r", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 1]
+!12 = metadata !{i32 786688, metadata !4, metadata !"count", metadata !6, i32 3, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [count] [line 3]
+!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = metadata !{i32 786688, metadata !4, metadata !"vla", metadata !6, i32 4, metadata !15, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [vla] [line 4]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from float]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 786465, i64 0, i64 -1}       ; [ DW_TAG_subrange_type ] [unbounded]
+!18 = metadata !{i32 786688, metadata !19, metadata !"i", metadata !6, i32 6, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 6]
+!19 = metadata !{i32 786443, metadata !5, metadata !4, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/radar/15464571/test.c]
+!20 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!21 = metadata !{metadata !"clang version 3.4 "}
+!22 = metadata !{i32 1, i32 0, metadata !4, null}
+!23 = metadata !{i32 3, i32 0, metadata !4, null}
+!24 = metadata !{i32 4, i32 0, metadata !4, null}
+!25 = metadata !{i32 5, i32 0, metadata !4, null}
+!26 = metadata !{metadata !27, metadata !27, i64 0}
+!27 = metadata !{metadata !"float", metadata !28, i64 0}
+!28 = metadata !{metadata !"omnipotent char", metadata !29, i64 0}
+!29 = metadata !{metadata !"Simple C/C++ TBAA"}
+!30 = metadata !{i32 6, i32 0, metadata !19, null}
+!31 = metadata !{i32 7, i32 0, metadata !19, null}
+!32 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/ARM/selectiondag-deadcode.ll b/test/DebugInfo/ARM/selectiondag-deadcode.ll
new file mode 100644
index 000000000000..cc151e039faf
--- /dev/null
+++ b/test/DebugInfo/ARM/selectiondag-deadcode.ll
@@ -0,0 +1,27 @@
+; RUN: llc -filetype=asm < %s | FileCheck %s
+target triple = "thumbv7-apple-ios7.0.0"
+%class.Matrix3.0.6.10 = type { [9 x float] }
+define arm_aapcscc void @_Z9GetMatrixv(%class.Matrix3.0.6.10* noalias nocapture sret %agg.result) #0 {
+  br i1 fcmp oeq (float fadd (float fadd (float fmul (float undef, float undef), float fmul (float undef, float undef)), float fmul (float undef, float undef)), float 0.000000e+00), label %_ZN7Vector39NormalizeEv.exit, label %1
+  tail call arm_aapcscc void @_ZL4Sqrtd() #3
+  br label %_ZN7Vector39NormalizeEv.exit
+_ZN7Vector39NormalizeEv.exit:                     ; preds = %1, %0
+  ; rdar://problem/15094721.
+  ;
+  ; When this (partially) dead use gets eliminated (and thus the def
+  ; of the vreg holding %agg.result) the dbg_value becomes dangling
+  ; and SelectionDAGISel crashes.  It should definitely not
+  ; crash. Drop the dbg_value instead.
+  ; CHECK-NOT: "matrix"
+  tail call void @llvm.dbg.declare(metadata !{%class.Matrix3.0.6.10* %agg.result}, metadata !45)
+  %2 = getelementptr inbounds %class.Matrix3.0.6.10* %agg.result, i32 0, i32 0, i32 8
+  ret void
+}
+declare void @llvm.dbg.declare(metadata, metadata) #1
+declare arm_aapcscc void @_ZL4Sqrtd() #2
+!4 = metadata !{i32 786434, metadata !5, null, metadata !"Matrix3", i32 20, i64 288, i64 32, i32 0, i32 0, null, null, i32 0, null, null, metadata !"_ZTS7Matrix3"} ; [ DW_TAG_class_type ] [Matrix3] [line 20, size 288, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"test.ii", metadata !"/Volumes/Data/radar/15094721"}
+!39 = metadata !{i32 786478, metadata !5, metadata !40, metadata !"GetMatrix", metadata !"GetMatrix", metadata !"_Z9GetMatrixv", i32 32, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.Matrix3.0.6.10*)* @_Z9GetMatrixv, null, null, null, i32 32} ; [ DW_TAG_subprogram ] [line 32] [def] [GetMatrix]
+!40 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15094721/test.ii]
+!41 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, null, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!45 = metadata !{i32 786688, metadata !39, metadata !"matrix", metadata !40, i32 35, metadata !4, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [matrix] [line 35]
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.cc b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
index 8ffbb528f2a9..edf956d32116 100644
--- a/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
@@ -13,3 +13,6 @@ int main() {
 // $ cp dwarfdump-inl-test.* /tmp/dbginfo
 // $ cd /tmp/dbginfo
 // $ clang++ -O2 -gline-tables-only -fsanitize=address -fPIC -shared dwarfdump-inl-test.cc -o <output>
+//
+// And similarly with with gcc 4.8.2:
+// $ gcc dwarfdump-inl-test.cc -o dwarfdump-inl-test.high_pc.elf-x86-64 -g -O2 -fPIC -shared
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64
new file mode 100755
index 000000000000..f108861bec8f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-loc-list-32bit.elf.cpp b/test/DebugInfo/Inputs/dwarfdump-test-loc-list-32bit.elf.cpp
new file mode 100644
index 000000000000..04a0b20cc2ce
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-loc-list-32bit.elf.cpp
@@ -0,0 +1,13 @@
+// clang -c -g -o dwarfdump-test-loc-list-32bit.elf.o -m32 dwarfdump-test-loc-list-32bit.elf.cpp
+
+namespace pr14763 {
+struct foo {
+  foo(const foo&);
+};
+
+foo func(bool b, foo f, foo g) {
+  if (b)
+    return f;
+  return g;
+}
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-loc-list-32bit.elf.o b/test/DebugInfo/Inputs/dwarfdump-test-loc-list-32bit.elf.o
new file mode 100644
index 000000000000..25d10b980907
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-loc-list-32bit.elf.o
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64.debuglink b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64.debuglink
new file mode 100755
index 000000000000..8c08037ea371
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64.debuglink
diff --git a/test/DebugInfo/Inputs/dwarfdump-type-units.cc b/test/DebugInfo/Inputs/dwarfdump-type-units.cc
new file mode 100644
index 000000000000..06bc9a2102c7
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-type-units.cc
@@ -0,0 +1,15 @@
+struct foo {};
+struct bar {};
+void sink(void*);
+int main() {
+  foo f;
+  sink(&f);
+  bar b;
+  sink(&b);
+}
+
+// Built with GCC 4.8.1
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-type-units.cc /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ g++-4.8.1 -g -fdebug-types-section -c dwarfdump-type-units.cc -o dwarfdump-type-units.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-type-units.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-type-units.elf-x86-64
new file mode 100644
index 000000000000..064b4f06764e
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-type-units.elf-x86-64
diff --git a/test/DebugInfo/Inputs/lit.local.cfg b/test/DebugInfo/Inputs/lit.local.cfg
deleted file mode 100644
index e6f55eef7af5..000000000000
--- a/test/DebugInfo/Inputs/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = []
diff --git a/test/DebugInfo/Inputs/macho-universal b/test/DebugInfo/Inputs/macho-universal
new file mode 100755
index 000000000000..a161441802dd
--- /dev/null
+++ b/test/DebugInfo/Inputs/macho-universal
diff --git a/test/DebugInfo/Inputs/macho-universal.cc b/test/DebugInfo/Inputs/macho-universal.cc
new file mode 100644
index 000000000000..9f34fdb06d13
--- /dev/null
+++ b/test/DebugInfo/Inputs/macho-universal.cc
@@ -0,0 +1,10 @@
+// Built with Apple LLVM version 4.2 (clang-425.0.24) (based on LLVM 3.2svn)
+// clang++ -arch x86_64 -arch i386 macho-universal.cc
+
+int inc(int x) {
+  return x + 1;
+}
+
+int main(int argc, char *argv[]) {
+  return inc(argc);
+}
diff --git a/test/DebugInfo/PowerPC/lit.local.cfg b/test/DebugInfo/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..193ebebcd50e
--- /dev/null
+++ b/test/DebugInfo/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
diff --git a/test/DebugInfo/PowerPC/tls-fission.ll b/test/DebugInfo/PowerPC/tls-fission.ll
new file mode 100644
index 000000000000..4a744c722532
--- /dev/null
+++ b/test/DebugInfo/PowerPC/tls-fission.ll
@@ -0,0 +1,32 @@
+; RUN: llc -split-dwarf=Enable -mtriple=powerpc64-unknown-linux-gnu -O0 -filetype=asm < %s | FileCheck %s
+
+; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use
+; that here instead of raw assembly printing
+
+; CHECK: debug_info.dwo
+; 3 bytes of data in this DW_FORM_block1 representation of the location of 'tls'
+; CHECK: .byte 3{{ *}}# DW_AT_location
+; DW_OP_const_index (0xfx == 252) to refer to the debug_addr table
+; CHECK-NEXT: .byte 252
+; an index of zero into the debug_addr table
+; CHECK-NEXT: .byte 0
+; DW_OP_GNU_push_tls_address
+; CHECK-NEXT: .byte 224
+; check that the expected TLS address description is the first thing in the debug_addr section
+; CHECK: debug_addr
+; CHECK-NEXT: .quad tls@dtprel+32768
+
+@tls = thread_local global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !"tls.dwo"} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"tls", metadata !"tls", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
+!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/PowerPC/tls.ll b/test/DebugInfo/PowerPC/tls.ll
new file mode 100644
index 000000000000..6557f5ea47b9
--- /dev/null
+++ b/test/DebugInfo/PowerPC/tls.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O0 -filetype=asm < %s | FileCheck %s
+
+; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use
+; that here instead of raw assembly printing
+
+; 10 bytes of data in this DW_FORM_block1 representation of the location of 'tls'
+; CHECK: .byte  10{{ *}}# DW_AT_location
+; DW_OP_const8u
+; CHECK: .byte  14
+; The debug relocation of the address of the tls variable
+; CHECK: .quad  tls@dtprel+32768
+; DW_OP_GNU_push_tls_address
+; CHECK: .byte  224
+
+@tls = thread_local global i32 7, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"tls", metadata !"tls", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
+!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/SystemZ/lit.local.cfg b/test/DebugInfo/SystemZ/lit.local.cfg
index a70a6854381b..b12af09434be 100644
--- a/test/DebugInfo/SystemZ/lit.local.cfg
+++ b/test/DebugInfo/SystemZ/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'SystemZ' in targets:
     config.unsupported = True
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index e6f4ff99dd4f..560b47747dd8 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -8,10 +8,12 @@
 ;
 ; CHECK: main:
 ; CHECK: aghi    %r15, -568
-; CHECK: la      [[MAIN_ARR:%r[0-9]+]], 164(%r11)
-; CHECK: lgr     %r2, [[MAIN_ARR]]
+; CHECK: la      %r2, 164(%r11)
 ; CHECK: brasl   %r14, populate_array@PLT
 ;
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+;
 ; Now check that the debugging information reflects this:
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: .long .Linfo_string7
@@ -22,8 +24,6 @@
 ; CHECK-NEXT: .byte 145
 ; CHECK-NEXT: .ascii "\244\001"
 ;
-; CHECK: .Linfo_string7:
-; CHECK-NEXT: main_arr
 
 
 @.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 2
@@ -55,25 +55,26 @@ entry:
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !11, metadata !14}
-!5 = metadata !{i32 786478, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
 !6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !10}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786478, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !10, metadata !9, metadata !10}
-!14 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !10}
 !17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
-!18 = metadata !{i32 786443, metadata !6, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
 !20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
 !22 = metadata !{i32 19, i32 7, metadata !18, null}
 !23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
@@ -83,3 +84,4 @@ declare i32 @printf(i8*, ...)
 !27 = metadata !{i32 24, i32 3, metadata !18, null}
 !28 = metadata !{i32 26, i32 3, metadata !18, null}
 !29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 5bebeaaaf4ce..0440afce24c7 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -asm-verbose -mtriple=x86_64-macosx < %s | FileCheck %s
+; RUN: llc -O0 -asm-verbose -mtriple=x86_64-macosx -generate-dwarf-pub-sections=Enable < %s | FileCheck %s
 ; CHECK-NOT: .asciz "X" ## External Name
 ; CHECK: .asciz "Y" ## External Name
 ; Test to check type with no definition is listed in pubtypes section.
@@ -29,23 +29,26 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7, metadata !9}
 !6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 3, size 0, align 0, offset 0] [decl] [from ]
 !9 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Y] [line 4, size 32, align 32, offset 0] [def] [from ]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 786445, metadata !18, metadata !10, metadata !"x", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !13 = metadata !{i32 7, i32 0, metadata !1, null}
 !14 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 7, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786443, metadata !18, metadata !1, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !17 = metadata !{metadata !1}
 !18 = metadata !{metadata !"a.c", metadata !"/tmp/"}
+!19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
index 94eba6af5bac..d0a2dfaa1f31 100644
--- a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
+++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
@@ -1,7 +1,7 @@
 ; RUN: llc  -mtriple=i686-linux -O0 -filetype=obj -o %t %s
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
-; CHECK: DW_TAG_constant [4]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000002c] = "ro")
+; CHECK: DW_TAG_constant
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "ro")
 
 define void @foo() nounwind ssp {
 entry:
@@ -12,17 +12,19 @@ entry:
 declare void @bar(i32)
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786471, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201, null} ; [ DW_TAG_constant ]
-!6 = metadata !{i32 786470, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
-!7 = metadata !{i32 786468, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786470, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
+!7 = metadata !{i32 786468, metadata !12, metadata !1, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 3, i32 14, metadata !9, null}
-!9 = metadata !{i32 786443, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786443, metadata !12, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{metadata !0}
 !11 = metadata !{metadata !5}
 !12 = metadata !{metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 7b8d91456a54..cdfd9527d005 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -17,13 +17,14 @@ define i32 @f() nounwind {
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12,  metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @f, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [f]
 !6 = metadata !{i32 720937, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
@@ -37,13 +38,14 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !19 = metadata !{i32 5, i32 5, metadata !16, null}
 !20 = metadata !{metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo"}
 
-; CHECK: DW_TAG_variable [3]
-; CHECK: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x00000043] = "GLB")
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "GLB")
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
 
-; CHECK: DW_TAG_variable [6]
-; CHECK: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x0000004d] = "LOC")
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x{{[0-9a-f]*}}] = "LOC")
 ; CHECK: DW_AT_decl_file [DW_FORM_data1]     (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1]     (0x04)
 
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 5464b87fd114..5e6a6014d696 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -87,51 +87,52 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!83}
 
-!0 = metadata !{i32 720913, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !82, i32 4, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !9}
-!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
+!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 128, align 64, offset 0] [def] [from ]
 !6 = metadata !{i32 720937, metadata !82} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8, metadata !19, metadata !21}
 !8 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_class_type ] [baz] [line 3, size 32, align 32, offset 0] [def] [from ]
 !10 = metadata !{metadata !11, metadata !13}
 !11 = metadata !{i32 720909, metadata !82, metadata !9, metadata !"h", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
 !12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!13 = metadata !{i32 720942, metadata !6, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
-!14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 720942, metadata !82, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 0} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{null, metadata !16, metadata !12}
-!16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !19 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b_ref", i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
 !20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!21 = metadata !{i32 720942, metadata !6, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
-!22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{i32 720942, metadata !82, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25, i32 0} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null, metadata !24, metadata !12}
-!24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
-!29 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
-!30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!29 = metadata !{i32 720942, metadata !82, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 17] [def] [scope 0] [main]
+!30 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !31 = metadata !{metadata !12, metadata !12, metadata !32}
 !32 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
 !33 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !35 = metadata !{metadata !36}
 !36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!37 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
 !38 = metadata !{metadata !39}
 !39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!40 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
+!40 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
 !41 = metadata !{metadata !42}
 !42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!43 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
 !44 = metadata !{metadata !45}
 !45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!46 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
+!46 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
 !47 = metadata !{metadata !48}
 !48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -139,7 +140,7 @@ entry:
 !51 = metadata !{i32 721153, metadata !29, metadata !"argv", metadata !6, i32 33554448, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !52 = metadata !{i32 16, i32 27, metadata !29, null}
 !53 = metadata !{i32 721152, metadata !54, metadata !"myBar", metadata !6, i32 18, metadata !5, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!54 = metadata !{i32 720907, metadata !29, i32 17, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!54 = metadata !{i32 720907, metadata !82, metadata !29, i32 17, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !55 = metadata !{i32 18, i32 9, metadata !54, null}
 !56 = metadata !{i32 18, i32 17, metadata !54, null}
 !57 = metadata !{i32 19, i32 5, metadata !54, null}
@@ -154,7 +155,7 @@ entry:
 !66 = metadata !{i32 13, i32 13, metadata !40, null}
 !67 = metadata !{i32 13, i32 33, metadata !40, null}
 !68 = metadata !{i32 13, i32 34, metadata !69, null}
-!69 = metadata !{i32 720907, metadata !40, i32 13, i32 33, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!69 = metadata !{i32 720907, metadata !82, metadata !40, i32 13, i32 33, i32 1} ; [ DW_TAG_lexical_block ]
 !70 = metadata !{i32 721153, metadata !43, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !71 = metadata !{i32 6, i32 5, metadata !43, null}
 !72 = metadata !{i32 721153, metadata !43, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -166,5 +167,6 @@ entry:
 !78 = metadata !{i32 6, i32 13, metadata !46, null}
 !79 = metadata !{i32 6, i32 23, metadata !46, null}
 !80 = metadata !{i32 6, i32 24, metadata !81, null}
-!81 = metadata !{i32 720907, metadata !46, i32 6, i32 23, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!81 = metadata !{i32 720907, metadata !82, metadata !46, i32 6, i32 23, i32 2} ; [ DW_TAG_lexical_block ]
 !82 = metadata !{metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref"}
+!83 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index dcacba1912a3..87e242a0bb13 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -5,6 +5,7 @@
 ; CHECK: DW_TAG_pointer_type
 ; CHECK-NEXT: DW_AT_type
 ; CHECK-NOT: DW_AT_byte_size
+; CHECK: DW_TAG
 ; CHECK: .debug_info contents
 
 %struct.A = type { i32 }
@@ -23,17 +24,18 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786445, metadata !20, metadata !11, metadata !"b", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
 !14 = metadata !{metadata !15}
@@ -41,5 +43,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 3, i32 13, metadata !5, null}
 !18 = metadata !{i32 4, i32 3, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !20, metadata !5, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 6f1aa41fdde4..bdd0e044bf13 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -86,15 +86,16 @@ declare i32 @g(i32, i32)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!24}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !23, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [f]
 !1 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, null, null, metadata !21, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !21, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !23, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786688, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 786443, metadata !1, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786443, metadata !23, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 6, i32 3, metadata !6, null}
 !12 = metadata !{i32 1}
 !13 = metadata !{i32 7, i32 3, metadata !6, null}
@@ -108,3 +109,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !21 = metadata !{metadata !0}
 !22 = metadata !{metadata !5}
 !23 = metadata !{metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation"}
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 789f556cbca0..6e6c3a177ab3 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -2,10 +2,12 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_formal_parameter [
+; CHECK-NOT: ""
+; CHECK: DW_TAG
 ; CHECK: DW_TAG_class_type
-; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x00fd => {0x000000fd})
-; CHECK: 0x000000fd:     DW_TAG_formal_parameter [13]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x00000086] = "this")
+; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x{{[0-9a-f]*}} => {[[PARAM:0x[0-9a-f]*]]})
+; CHECK: [[PARAM]]:     DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]*}}] = "this")
 
 %class.A = type { i32 }
 
@@ -46,20 +48,21 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38}
 
 !0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10, metadata !20}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
 !6 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
-!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ]
+!13 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
 !15 = metadata !{metadata !16, metadata !17}
 !16 = metadata !{i32 786445, metadata !37, metadata !14, metadata !"m_a", i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
 !17 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
@@ -83,3 +86,4 @@ entry:
 !35 = metadata !{i32 7, i32 0, metadata !5, null}
 !36 = metadata !{i32 786689, metadata !5, metadata !"", metadata !6, i32 16777223, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 7]
 !37 = metadata !{metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests"}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 93e1ecf38f62..4d7ef4fd36f6 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -3,8 +3,10 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x0000003a: DW_TAG_subprogram [5] *
-; CHECK: 0x00000060: DW_AT_specification [DW_FORM_ref4]      (cu + 0x003a => {0x0000003a})
+; CHECK: DW_TAG_subprogram [{{[0-9]+}}] *
+; CHECK: DW_AT_specification [DW_FORM_ref4]      (cu + 0x[[OFFSET:[0-9a-f]*]] => {0x0000[[OFFSET]]})
+; CHECK: 0x0000[[OFFSET]]: DW_TAG_subprogram [{{[0-9]+}}] *
+; CHECK: DW_AT_name [DW_FORM_strp]	( .debug_str[0x{{[0-9a-f]*}}] = "bar")
 
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
@@ -15,16 +17,17 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!28}
 
 !0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [bar]
 !6 = metadata !{i32 720937, metadata !27} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
 !11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ]
 !12 = metadata !{i32 720898, metadata !27, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
 !13 = metadata !{metadata !11}
@@ -39,3 +42,4 @@ entry:
 !25 = metadata !{i32 6, i32 1, metadata !26, null}
 !26 = metadata !{i32 786443, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
new file mode 100644
index 000000000000..0c08f23e87ad
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=i686-w64-mingw32 -o %t -filetype=obj %s
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+
+; CHECK:  	 DW_AT_stmt_list [DW_FORM_sec_offset]
+;
+; generated from:
+; clang -g -S -emit-llvm test.c -o test.ll
+; int main()
+; {
+; 	return 0;
+; }
+
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!10 = metadata !{i32 3, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index 2e23222588e0..2da962752a63 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -3,10 +3,10 @@
 
 ; Check that the friend tag is there and is followed by a DW_AT_friend that has a reference back.
 
-; CHECK: 0x00000032:   DW_TAG_class_type [4]
-; CHECK: 0x00000077:   DW_TAG_class_type [4]
-; CHECK: 0x000000a0:     DW_TAG_friend [9]  
-; CHECK: 0x000000a1:       DW_AT_friend [DW_FORM_ref4]   (cu + 0x0032 => {0x00000032})
+; CHECK: [[BACK:0x[0-9a-f]*]]:   DW_TAG_class_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]       ( .debug_str[{{.*}}] = "A")
+; CHECK: DW_TAG_friend
+; CHECK-NEXT: DW_AT_friend [DW_FORM_ref4]   (cu + 0x{{[0-9a-f]*}} => {[[BACK]]})
 
 
 %class.A = type { i32 }
@@ -16,31 +16,33 @@
 @b = global %class.B zeroinitializer, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
 !0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !17}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
+!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !11}
 !9 = metadata !{i32 786445, metadata !28, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{null, metadata !14}
-!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{metadata !16}
 !16 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b, null} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
+!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_class_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
 !19 = metadata !{metadata !20, metadata !21, metadata !27}
 !20 = metadata !{i32 786445, metadata !28, metadata !18, metadata !"b", i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
 !21 = metadata !{i32 786478, metadata !6, metadata !18, metadata !"B", metadata !"B", metadata !"", i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ]
-!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null, metadata !24}
-!24 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !27 = metadata !{i32 786474, metadata !18, null, metadata !6, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_friend ]
 !28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index b99de3c1ea70..d733dfda9465 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -25,16 +25,19 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !14, i32 4, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786478, metadata !14, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786688, metadata !10, metadata !"x", metadata !6, i32 2, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786443, metadata !14, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 2, i32 7, metadata !10, null}
 !13 = metadata !{i32 3, i32 1, metadata !10, null}
+!14 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/debuginfo"}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
new file mode 100644
index 000000000000..1d51049a5f6b
--- /dev/null
+++ b/test/DebugInfo/X86/arguments.ll
@@ -0,0 +1,73 @@
+; REQUIRES: object-emission
+
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; IR generated from clang -g with the following source:
+; struct foo {
+;   foo(const foo&);
+;   int i;
+; };
+;
+; void func(foo f, foo g) {
+;   f.i++;
+; }
+
+; CHECK: debug_info contents
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_Z4func3fooS_"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name{{.*}}"f"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name{{.*}}"g"
+
+%struct.foo = type { i32 }
+
+; Function Attrs: nounwind uwtable
+define void @_Z4func3fooS_(%struct.foo* %f, %struct.foo* %g) #0 {
+entry:
+  call void @llvm.dbg.declare(metadata !{%struct.foo* %f}, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata !{%struct.foo* %g}, metadata !21), !dbg !20
+  %i = getelementptr inbounds %struct.foo* %f, i32 0, i32 0, !dbg !22
+  %0 = load i32* %i, align 4, !dbg !22
+  %inc = add nsw i32 %0, 1, !dbg !22
+  store i32 %inc, i32* %i, align 4, !dbg !22
+  ret void, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!24}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4func3fooS_", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*, %struct.foo*)* @_Z4func3fooS_, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8, metadata !8}
+!8 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
+!9 = metadata !{metadata !10, metadata !12}
+!10 = metadata !{i32 786445, metadata !1, metadata !8, metadata !"i", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 2} ; [ DW_TAG_subprogram ] [line 2] [foo]
+!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{null, metadata !15, metadata !16}
+!15 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
+!16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
+!18 = metadata !{i32 786468}
+!19 = metadata !{i32 786689, metadata !4, metadata !"f", metadata !5, i32 16777222, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [f] [line 6]
+!20 = metadata !{i32 6, i32 0, metadata !4, null}
+!21 = metadata !{i32 786689, metadata !4, metadata !"g", metadata !5, i32 33554438, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [g] [line 6]
+!22 = metadata !{i32 7, i32 0, metadata !4, null}
+!23 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index 004673061d8c..2f966a71d717 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -2,10 +2,10 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Checks that we emit debug info for the block variable declare.
-; CHECK: 0x00000030:   DW_TAG_subprogram [3]
-; CHECK: 0x0000005b:     DW_TAG_variable [5]
-; CHECK: 0x0000005c:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x000000e6] = "block")
-; CHECK: 0x00000066:       DW_AT_location [DW_FORM_data4]        (0x00000023)
+; CHECK: DW_TAG_subprogram [3]
+; CHECK: DW_TAG_variable [5]
+; CHECK: DW_AT_name [DW_FORM_strp]     ( .debug_str[{{.*}}] = "block")
+; CHECK: DW_AT_location [DW_FORM_sec_offset]        ({{.*}})
 
 %struct.__block_descriptor = type { i64, i64 }
 %struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
@@ -60,18 +60,18 @@ declare void @objc_end_catch()
 declare i32 @__objc_personality_v0(...)
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!35, !36, !37, !38}
+!llvm.module.flags = !{!35, !36, !37, !38, !64}
 
 !0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !63} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786454, metadata !63, null, metadata !"dispatch_block_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
 !10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 5, size 256, align 0, offset 0] [def] [from ]
 !12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
 !13 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
 !14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
@@ -81,7 +81,7 @@ declare i32 @__objc_personality_v0(...)
 !18 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
 !19 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
 !20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 5, size 128, align 0, offset 0] [def] [from ]
 !22 = metadata !{metadata !23, metadata !25}
 !23 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
 !24 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
@@ -89,10 +89,10 @@ declare i32 @__objc_personality_v0(...)
 !26 = metadata !{metadata !27}
 !27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!29 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !30 = metadata !{null, metadata !14}
 !31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
-!32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!32 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !33 = metadata !{null, metadata !14, metadata !14}
 !34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
 !35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
@@ -101,7 +101,7 @@ declare i32 @__objc_personality_v0(...)
 !38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
 !39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !40 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
-!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 7, size 320, align 64, offset 0] [def] [from ]
 !42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
 !43 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
 !44 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
@@ -109,7 +109,7 @@ declare i32 @__objc_personality_v0(...)
 !46 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
 !47 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
 !48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
-!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
+!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 7, size 0, align 0, offset 0] [decl] [from ]
 !50 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"block", i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
 !51 = metadata !{i32 7, i32 18, metadata !28, null}
 !52 = metadata !{i32 7, i32 19, metadata !28, null}
@@ -124,3 +124,4 @@ declare i32 @__objc_personality_v0(...)
 !61 = metadata !{i32 10, i32 21, metadata !28, null}
 !62 = metadata !{i32 9, i32 20, metadata !56, null}
 !63 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
+!64 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/byvalstruct.ll b/test/DebugInfo/X86/byvalstruct.ll
new file mode 100644
index 000000000000..3dea8632a702
--- /dev/null
+++ b/test/DebugInfo/X86/byvalstruct.ll
@@ -0,0 +1,127 @@
+; RUN: llc  -mtriple=x86_64-apple-macosx10.8.0 -O0 -filetype=obj -o %t %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; Test that we generate debug info for by-value struct args that are not used.
+;
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name {{.*}} "info"
+;
+; generated from
+;
+; typedef unsigned long NSUInteger;
+; typedef struct
+; {
+;  NSUInteger width;
+;  NSUInteger height;
+;  double pixelAspect;
+; } ImageInfo;
+; @implementation Bitmap
+; - (id)initWithCopy:(Bitmap *)otherBitmap
+;            andInfo:(ImageInfo)info
+;        andLength:(NSUInteger)length
+; {
+; }
+; @end
+
+; ModuleID = 't.mm'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%0 = type opaque
+%struct._objc_cache = type opaque
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+%struct._prop_t = type { i8*, i8* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct.ImageInfo = type { i64, i64, double }
+
+@_objc_empty_cache = external global %struct._objc_cache
+@_objc_empty_vtable = external global i8* (i8*, i8*)*
+@"OBJC_CLASS_$_Bitmap" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_Bitmap", %struct._class_t* null, %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_CLASS_RO_$_Bitmap" }, section "__DATA, __objc_data", align 8
+@"OBJC_METACLASS_$_Bitmap" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_Bitmap", %struct._class_t* @"OBJC_CLASS_$_Bitmap", %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_METACLASS_RO_$_Bitmap" }, section "__DATA, __objc_data", align 8
+@"\01L_OBJC_CLASS_NAME_" = internal global [7 x i8] c"Bitmap\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+@"\01l_OBJC_METACLASS_RO_$_Bitmap" = internal global %struct._class_ro_t { i32 3, i32 40, i32 40, i8* null, i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* null, i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [32 x i8] c"initWithCopy:andInfo:andLength:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_METH_VAR_TYPE_" = internal global [23 x i8] c"@56@0:8@16{?=QQd}24Q48\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+@"\01l_OBJC_$_INSTANCE_METHODS_Bitmap" = internal global { i32, i32, [1 x %struct._objc_method] } { i32 24, i32 1, [1 x %struct._objc_method] [%struct._objc_method { i8* getelementptr inbounds ([32 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([23 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast (i8* (%0*, i8*, %0*, %struct.ImageInfo*, i64)* @"\01-[Bitmap initWithCopy:andInfo:andLength:]" to i8*) }] }, section "__DATA, __objc_const", align 8
+@"\01l_OBJC_CLASS_RO_$_Bitmap" = internal global %struct._class_ro_t { i32 2, i32 0, i32 0, i8* null, i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_Bitmap" to %struct.__method_list_t*), %struct._objc_protocol_list* null, %struct._ivar_list_t* null, i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
+@"\01L_OBJC_LABEL_CLASS_$" = internal global [1 x i8*] [i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_Bitmap" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8
+@llvm.used = appending global [5 x i8*] [i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([23 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_Bitmap" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
+
+; Function Attrs: ssp uwtable
+define internal i8* @"\01-[Bitmap initWithCopy:andInfo:andLength:]"(%0* %self, i8* %_cmd, %0* %otherBitmap, %struct.ImageInfo* byval align 8 %info, i64 %length) #0 {
+entry:
+  %retval = alloca i8*, align 8
+  %self.addr = alloca %0*, align 8
+  %_cmd.addr = alloca i8*, align 8
+  %otherBitmap.addr = alloca %0*, align 8
+  %length.addr = alloca i64, align 8
+  store %0* %self, %0** %self.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%0** %self.addr}, metadata !28), !dbg !29
+  store i8* %_cmd, i8** %_cmd.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %_cmd.addr}, metadata !30), !dbg !29
+  store %0* %otherBitmap, %0** %otherBitmap.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%0** %otherBitmap.addr}, metadata !32), !dbg !29
+  call void @llvm.dbg.declare(metadata !{%struct.ImageInfo* %info}, metadata !33), !dbg !34
+  store i64 %length, i64* %length.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i64* %length.addr}, metadata !35), !dbg !36
+  %0 = load i8** %retval, !dbg !37
+  ret i8* %0, !dbg !37
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!24, !25, !26, !27, !38}
+
+!0 = metadata !{i32 786449, metadata !1, i32 17, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !6, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/t.mm] [DW_LANG_ObjC_plus_plus]
+!1 = metadata !{metadata !"t.mm", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"Bitmap", i32 8, i64 8, i64 8, i32 0, i32 512, null, metadata !2, i32 17, null, null, null} ; [ DW_TAG_structure_type ] [Bitmap] [line 8, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/t.mm]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"-[Bitmap initWithCopy:andInfo:andLength:]", metadata !"-[Bitmap initWithCopy:andInfo:andLength:]", metadata !"", i32 9, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*, %0*, %struct.ImageInfo*, i64)* @"\01-[Bitmap initWithCopy:andInfo:andLength:]", null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [-[Bitmap initWithCopy:andInfo:andLength:]]
+!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{metadata !4, metadata !10, metadata !11, metadata !14, metadata !15, metadata !19}
+!10 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Bitmap]
+!11 = metadata !{i32 786454, metadata !1, null, metadata !"SEL", i32 9, i64 0, i64 0, i64 0, i32 64, metadata !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [artificial] [from ]
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
+!13 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Bitmap]
+!15 = metadata !{i32 786454, metadata !1, null, metadata !"ImageInfo", i32 7, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [ImageInfo] [line 7, size 0, align 0, offset 0] [from ]
+!16 = metadata !{i32 786451, metadata !1, null, metadata !"", i32 2, i64 192, i64 64, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 2, size 192, align 64, offset 0] [def] [from ]
+!17 = metadata !{metadata !18, metadata !21, metadata !22}
+!18 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"width", i32 4, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [width] [line 4, size 64, align 64, offset 0] [from NSUInteger]
+!19 = metadata !{i32 786454, metadata !1, null, metadata !"NSUInteger", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ] [NSUInteger] [line 1, size 0, align 0, offset 0] [from long unsigned int]
+!20 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!21 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"height", i32 5, i64 64, i64 64, i64 64, i32 0, metadata !19} ; [ DW_TAG_member ] [height] [line 5, size 64, align 64, offset 64] [from NSUInteger]
+!22 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"pixelAspect", i32 6, i64 64, i64 64, i64 128, i32 0, metadata !23} ; [ DW_TAG_member ] [pixelAspect] [line 6, size 64, align 64, offset 128] [from double]
+!23 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!24 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!25 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!26 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!27 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!28 = metadata !{i32 786689, metadata !7, metadata !"self", metadata !5, i32 16777225, metadata !14, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [self] [line 9]
+!29 = metadata !{i32 9, i32 0, metadata !7, null}
+!30 = metadata !{i32 786689, metadata !7, metadata !"_cmd", metadata !5, i32 33554441, metadata !31, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [_cmd] [line 9]
+!31 = metadata !{i32 786454, metadata !1, null, metadata !"SEL", i32 9, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [from ]
+!32 = metadata !{i32 786689, metadata !7, metadata !"otherBitmap", metadata !5, i32 50331657, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [otherBitmap] [line 9]
+!33 = metadata !{i32 786689, metadata !7, metadata !"info", metadata !5, i32 67108874, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [info] [line 10]
+!34 = metadata !{i32 10, i32 0, metadata !7, null}
+!35 = metadata !{i32 786689, metadata !7, metadata !"length", metadata !5, i32 83886091, metadata !19, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [length] [line 11]
+!36 = metadata !{i32 11, i32 0, metadata !7, null}
+!37 = metadata !{i32 13, i32 0, metadata !7, null}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/coff_relative_names.ll b/test/DebugInfo/X86/coff_relative_names.ll
new file mode 100644
index 000000000000..4cc38a63bebf
--- /dev/null
+++ b/test/DebugInfo/X86/coff_relative_names.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=i686-w64-mingw32 -filetype=asm -O0 < %s | FileCheck %s
+
+; CHECK:  	.secrel32 Linfo_string0
+; CHECK:  	.secrel32 Linfo_string1
+;
+; generated from:
+; clang -g -S -emit-llvm test.c -o test.ll
+; int main()
+; {
+; 	return 0;
+; }
+
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!10 = metadata !{i32 3, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 3b9aefc38f56..4a152963a144 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -7,15 +7,15 @@
 ; first check that we have a TAG_subprogram at a given offset and it has
 ; AT_inline.
 
-; CHECK: 0x0000011e:   DW_TAG_subprogram [18]
+; CHECK: 0x0000011c:   DW_TAG_subprogram [17]
 ; CHECK-NEXT:     DW_AT_specification
 ; CHECK-NEXT:     DW_AT_inline
 
 
 ; and then that a TAG_subprogram refers to it with AT_abstract_origin.
 
-; CHECK: 0x0000015f:   DW_TAG_subprogram [20]
-; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x011e => {0x0000011e})
+; CHECK: 0x0000015d:   DW_TAG_subprogram [19]
+; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x011c => {0x0000011c})
 
 define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
 entry:
@@ -33,43 +33,44 @@ entry:
 declare void @_Z8moz_freePv(i8*)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!60}
 
 !0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [Release]
 !6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 10, size 0, align 0, offset 0] [decl] [from ]
 !12 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
+!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, null, null, null} ; [ DW_TAG_class_type ]
 !14 = metadata !{metadata !12, metadata !15}
 !15 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null, metadata !10}
 !18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !20 = metadata !{metadata !22}
 !22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
 !24 = metadata !{metadata !26}
 !26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ]
+!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
 !28 = metadata !{metadata !30}
 !30 = metadata !{i32 786689, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ]
-!32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [operator=]
+!32 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !33 = metadata !{metadata !9, metadata !34, metadata !9}
-!34 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
-!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!34 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
+!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 2, size 0, align 0, offset 0] [decl] [from ]
 !36 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
-!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
+!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null, null} ; [ DW_TAG_class_type ] [nsAutoRefCnt] [line 2, size 32, align 32, offset 0] [def] [from ]
 !38 = metadata !{metadata !39, metadata !40, metadata !36}
 !39 = metadata !{i32 786445, metadata !59, metadata !37, metadata !"mValue", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
 !40 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ]
-!41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!41 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !42 = metadata !{null, metadata !34}
 !43 = metadata !{metadata !45, metadata !46}
 !45 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
@@ -86,3 +87,4 @@ declare void @_Z8moz_freePv(i8*)
 !57 = metadata !{i32 19, i32 3, metadata !55, metadata !58}
 !58 = metadata !{i32 18, i32 41, metadata !23, null}
 !59 = metadata !{metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src"}
+!60 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/data_member_location.ll b/test/DebugInfo/X86/data_member_location.ll
new file mode 100644
index 000000000000..1adddb97be25
--- /dev/null
+++ b/test/DebugInfo/X86/data_member_location.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=x86_64-linux -O0 -o - -filetype=obj < %s | llvm-dwarfdump -debug-dump=info -| FileCheck %s
+
+; Generated from Clang with the following source:
+;
+; struct foo {
+;   char c;
+;   int i;
+; };
+; 
+; foo f;
+
+; CHECK: DW_AT_name {{.*}} "c"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_data_member_location {{.*}} (0x00)
+
+; CHECK: DW_AT_name {{.*}} "i"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_data_member_location {{.*}} (0x04)
+
+%struct.foo = type { i8, i32 }
+
+@f = global %struct.foo zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !15}
+!llvm.ident = !{!14}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !10, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/data_member_location.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"data_member_location.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !8}
+!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS3foo", metadata !"c", i32 2, i64 8, i64 8, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [c] [line 2, size 8, align 8, offset 0] [from char]
+!7 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!8 = metadata !{i32 786445, metadata !1, metadata !"_ZTS3foo", metadata !"i", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !9} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 32] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !12, i32 6, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
+!12 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/data_member_location.cpp]
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{metadata !"clang version 3.4 "}
+
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-at-specficiation.ll b/test/DebugInfo/X86/dbg-at-specficiation.ll
new file mode 100644
index 000000000000..8003a0fc15b7
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-at-specficiation.ll
@@ -0,0 +1,21 @@
+; RUN: llc  < %s | FileCheck %s
+; Radar 10147769
+; Do not unnecessarily use AT_specification DIE.
+; CHECK-NOT: AT_specification
+
+@a = common global [10 x i32] zeroinitializer, align 16
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
+
+!0 = metadata !{i32 720913, metadata !11, i32 12, metadata !"clang version 3.0 (trunk 140253)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, i32 0} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, [10 x i32]* @a, null} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !11} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720897, null, null, null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
+!8 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
+!11 = metadata !{metadata !"x.c", metadata !"/private/tmp"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-byval-parameter.ll b/test/DebugInfo/X86/dbg-byval-parameter.ll
new file mode 100644
index 000000000000..d66486d14ae5
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-byval-parameter.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=x86 -asm-verbose < %s | grep DW_TAG_formal_parameter
+
+
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+entry:
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!21}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, metadata !19, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !19, metadata !1, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
+!20 = metadata !{i32 0}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-const-int.ll b/test/DebugInfo/X86/dbg-const-int.ll
new file mode 100644
index 000000000000..f2f51c9b0f3d
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-const-int.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj %s -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+; Radar 9511391
+
+; CHECK: DW_TAG_variable
+; CHECK: "i"
+; CHECK: DW_AT_const_value [DW_FORM_sdata]   (42)
+
+define i32 @foo() nounwind uwtable readnone optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  ret i32 42, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15}
+
+!0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, metadata !14, metadata !14, metadata !11, null,  null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !13, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
+!2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !13, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 786443, metadata !13, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 2, i32 12, metadata !7, null}
+!10 = metadata !{i32 3, i32 2, metadata !7, null}
+!11 = metadata !{metadata !1}
+!12 = metadata !{metadata !6}
+!13 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
+!14 = metadata !{i32 0}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-const.ll b/test/DebugInfo/X86/dbg-const.ll
new file mode 100644
index 000000000000..12dc154c051b
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-const.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s - | FileCheck %s
+;
+; FIXME: A potentially more interesting test case would be:
+; %call = @bar()
+; dbg.value j=0
+; %call2 = @bar()
+; dbg.value j=%call
+;
+; We cannot current handle the above sequence because codegenprepare
+; hoists the second dbg.value above %call2, which then appears to
+; conflict with j=0. It does this because SelectionDAG cannot handle
+; global debug values.
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+;CHECK:        ## DW_OP_constu
+;CHECK-NEXT:  .byte	42
+define i32 @foobar() nounwind readonly noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  %call = tail call i32 @bar(), !dbg !11
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !6), !dbg !11
+  %call2 = tail call i32 @bar(), !dbg !11
+  %add = add nsw i32 %call2, %call, !dbg !12
+  ret i32 %add, !dbg !10
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare i32 @bar() nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!17}
+
+!0 = metadata !{i32 786478, metadata !15, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, metadata !16, metadata !16, metadata !13, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, metadata !15, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null}
+!7 = metadata !{i32 786443, metadata !15, metadata !0, i32 12, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 42}
+!9 = metadata !{i32 15, i32 12, metadata !7, null}
+!10 = metadata !{i32 23, i32 3, metadata !7, null}
+!11 = metadata !{i32 17, i32 3, metadata !7, null}
+!12 = metadata !{i32 18, i32 3, metadata !7, null}
+!13 = metadata !{metadata !0}
+!14 = metadata !{metadata !6}
+!15 = metadata !{metadata !"mu.c", metadata !"/private/tmp"}
+!16 = metadata !{i32 0}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
new file mode 100644
index 000000000000..7bf6f4fa5dfd
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -0,0 +1,127 @@
+; RUN: llc -O0 -fast-isel=false < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+;Radar 9321650
+
+;CHECK: ##DEBUG_VALUE: my_a 
+
+%class.A = type { i32, i32, i32, i32 }
+
+define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  %nrvo = alloca i1
+  %cleanup.dest.slot = alloca i32
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !26), !dbg !27
+  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !28), !dbg !30
+  store i32 0, i32* %j, align 4, !dbg !31
+  %tmp = load i32* %i.addr, align 4, !dbg !32
+  %cmp = icmp eq i32 %tmp, 42, !dbg !32
+  br i1 %cmp, label %if.then, label %if.end, !dbg !32
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load i32* %i.addr, align 4, !dbg !33
+  %add = add nsw i32 %tmp1, 1, !dbg !33
+  store i32 %add, i32* %j, align 4, !dbg !33
+  br label %if.end, !dbg !35
+
+if.end:                                           ; preds = %if.then, %entry
+  store i1 false, i1* %nrvo, !dbg !36
+  call void @llvm.dbg.declare(metadata !{%class.A* %agg.result}, metadata !37), !dbg !39
+  %tmp2 = load i32* %j, align 4, !dbg !40
+  %x = getelementptr inbounds %class.A* %agg.result, i32 0, i32 0, !dbg !40
+  store i32 %tmp2, i32* %x, align 4, !dbg !40
+  store i1 true, i1* %nrvo, !dbg !41
+  store i32 1, i32* %cleanup.dest.slot
+  %nrvo.val = load i1* %nrvo, !dbg !42
+  br i1 %nrvo.val, label %nrvo.skipdtor, label %nrvo.unused, !dbg !42
+
+nrvo.unused:                                      ; preds = %if.end
+  call void @_ZN1AD1Ev(%class.A* %agg.result), !dbg !42
+  br label %nrvo.skipdtor, !dbg !42
+
+nrvo.skipdtor:                                    ; preds = %nrvo.unused, %if.end
+  ret void, !dbg !42
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !43), !dbg !44
+  %this1 = load %class.A** %this.addr
+  call void @_ZN1AD2Ev(%class.A* %this1)
+  ret void, !dbg !45
+}
+
+define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !46), !dbg !47
+  %this1 = load %class.A** %this.addr
+  %x = getelementptr inbounds %class.A* %this1, i32 0, i32 0, !dbg !48
+  store i32 1, i32* %x, align 4, !dbg !48
+  ret void, !dbg !48
+}
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!52}
+
+!0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"~A", metadata !"~A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589826, metadata !51, metadata !2, metadata !"A", i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 2, size 128, align 32, offset 0] [def] [from ]
+!2 = metadata !{i32 786449, metadata !51, i32 4, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, metadata !24, metadata !24, metadata !50, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
+!5 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"x", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"y", i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"z", i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!9 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"o", i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{null, metadata !13}
+!13 = metadata !{i32 786447, metadata !2, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !13, metadata !17}
+!17 = metadata !{i32 589840, null, metadata !2, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
+!18 = metadata !{i32 786470, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
+!19 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
+!20 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = metadata !{metadata !1}
+!22 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
+!23 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = metadata !{null}
+!25 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
+!26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 4, i32 11, metadata !19, null}
+!28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786443, metadata !51, metadata !19, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 5, i32 7, metadata !29, null}
+!31 = metadata !{i32 5, i32 12, metadata !29, null}
+!32 = metadata !{i32 6, i32 3, metadata !29, null}
+!33 = metadata !{i32 7, i32 5, metadata !34, null}
+!34 = metadata !{i32 786443, metadata !51, metadata !29, i32 6, i32 16, i32 1} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 8, i32 3, metadata !34, null}
+!36 = metadata !{i32 9, i32 9, metadata !29, null}
+!37 = metadata !{i32 786688, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0, null} ; [ DW_TAG_auto_variable ]
+!38 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!39 = metadata !{i32 9, i32 5, metadata !29, null}
+!40 = metadata !{i32 10, i32 3, metadata !29, null}
+!41 = metadata !{i32 11, i32 3, metadata !29, null}
+!42 = metadata !{i32 12, i32 1, metadata !29, null}
+!43 = metadata !{i32 786689, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
+!44 = metadata !{i32 2, i32 47, metadata !22, null}
+!45 = metadata !{i32 2, i32 61, metadata !22, null}
+!46 = metadata !{i32 786689, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
+!47 = metadata !{i32 2, i32 47, metadata !25, null}
+!48 = metadata !{i32 2, i32 54, metadata !49, null}
+!49 = metadata !{i32 786443, metadata !51, metadata !25, i32 2, i32 52, i32 2} ; [ DW_TAG_lexical_block ]
+!50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25}
+!51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"}
+!52 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-declare.ll b/test/DebugInfo/X86/dbg-declare.ll
new file mode 100644
index 000000000000..988d0bcb7713
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-declare.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -O0 -mtriple x86_64-apple-darwin
+; <rdar://problem/11134152>
+
+define i32 @foo(i32* %x) nounwind uwtable ssp {
+entry:
+  %x.addr = alloca i32*, align 8
+  %saved_stack = alloca i8*
+  %cleanup.dest.slot = alloca i32
+  store i32* %x, i32** %x.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32** %x.addr}, metadata !14), !dbg !15
+  %0 = load i32** %x.addr, align 8, !dbg !16
+  %1 = load i32* %0, align 4, !dbg !16
+  %2 = zext i32 %1 to i64, !dbg !16
+  %3 = call i8* @llvm.stacksave(), !dbg !16
+  store i8* %3, i8** %saved_stack, !dbg !16
+  %vla = alloca i8, i64 %2, align 16, !dbg !16
+  call void @llvm.dbg.declare(metadata !{i8* %vla}, metadata !18), !dbg !23
+  store i32 1, i32* %cleanup.dest.slot
+  %4 = load i8** %saved_stack, !dbg !24
+  call void @llvm.stackrestore(i8* %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27}
+
+!0 = metadata !{i32 786449, metadata !26, i32 12, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !26, metadata !0, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 5, i32 21, metadata !5, null}
+!16 = metadata !{i32 7, i32 13, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !26, metadata !5, i32 6, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 786433, null, null, null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 8, offset 0] [from char]
+!20 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
+!23 = metadata !{i32 7, i32 8, metadata !17, null}
+!24 = metadata !{i32 9, i32 1, metadata !17, null}
+!25 = metadata !{i32 8, i32 3, metadata !17, null}
+!26 = metadata !{metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm"}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-file-name.ll b/test/DebugInfo/X86/dbg-file-name.ll
new file mode 100644
index 000000000000..e9c61c13168f
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-file-name.ll
@@ -0,0 +1,24 @@
+; RUN: llc -enable-dwarf-directory -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
+
+; Radar 8884898
+; CHECK: file	1 "simple.c"
+
+declare i32 @printf(i8*, ...) nounwind
+
+define i32 @main() nounwind {
+  ret i32 0
+}
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!12}
+
+!1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786468, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !10, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !5}
+!9 = metadata !{metadata !6}
+!10 = metadata !{metadata !"simple.c", metadata !"/Users/manav/one/two"}
+!11 = metadata !{i32 0}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-i128-const.ll b/test/DebugInfo/X86/dbg-i128-const.ll
new file mode 100644
index 000000000000..01b105fb100b
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-i128-const.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+
+; CHECK: DW_AT_const_value
+; CHECK-NEXT: 42
+
+define i128 @__foo(i128 %a, i128 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
+  %add = add i128 %a, %b, !dbg !11
+  ret i128 %add, !dbg !11
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!16}
+
+!0 = metadata !{i128 42 }
+!1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 786443, metadata !13, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 786478, metadata !13, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786449, metadata !13, i32 1, metadata !"clang", i1 true, metadata !"", i32 0, metadata !15, metadata !15, metadata !12, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8, metadata !8}
+!8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 786468, metadata !13, metadata !4, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 29, i32 0, metadata !2, null}
+!12 = metadata !{metadata !3}
+!13 = metadata !{metadata !"foo.c", metadata !"/tmp"}
+!14 = metadata !{metadata !"myint.h", metadata !"/tmp"}
+!15 = metadata !{i32 0}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-large-unsigned-const.ll b/test/DebugInfo/X86/dbg-large-unsigned-const.ll
new file mode 100644
index 000000000000..a037f3c269ee
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-large-unsigned-const.ll
@@ -0,0 +1,62 @@
+; RUN: llc -filetype=obj %s -o /dev/null
+; Hanle large unsigned constant values.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.7.0"
+
+define zeroext i1 @_Z3iseRKxS0_(i64* nocapture %LHS, i64* nocapture %RHS) nounwind readonly optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64* %LHS}, i64 0, metadata !7), !dbg !13
+  tail call void @llvm.dbg.value(metadata !{i64* %RHS}, i64 0, metadata !11), !dbg !14
+  %tmp1 = load i64* %LHS, align 4, !dbg !15
+  %tmp3 = load i64* %RHS, align 4, !dbg !15
+  %cmp = icmp eq i64 %tmp1, %tmp3, !dbg !15
+  ret i1 %cmp, !dbg !15
+}
+
+define zeroext i1 @_Z2fnx(i64 %a) nounwind readnone optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !21), !dbg !24
+  tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !26), !dbg !27
+  %cmp.i = icmp eq i64 %a, 9223372036854775807, !dbg !28
+  ret i1 %cmp.i, !dbg !22
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!34}
+!29 = metadata !{metadata !1, metadata !6}
+!30 = metadata !{metadata !7, metadata !11}
+!31 = metadata !{metadata !12}
+
+!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, metadata !33, metadata !33, metadata !29, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !32, null, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !32, null, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [fn]
+!7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
+!9 = metadata !{i32 786470, metadata !0, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
+!10 = metadata !{i32 786468, null, metadata !0, metadata !"long long int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 2, i32 27, metadata !1, null}
+!14 = metadata !{i32 2, i32 49, metadata !1, null}
+!15 = metadata !{i32 3, i32 3, metadata !16, null}
+!16 = metadata !{i32 786443, metadata !32, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 19, metadata !6, null}
+!21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 7, i32 10, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !32, metadata !6, i32 6, i32 22, i32 1} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 2, i32 27, metadata !1, metadata !22}
+!25 = metadata !{i64 9223372036854775807}         
+!26 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 2, i32 49, metadata !1, metadata !22}
+!28 = metadata !{i32 3, i32 3, metadata !16, metadata !22}
+!32 = metadata !{metadata !"lli.cc", metadata !"/private/tmp"}
+!33 = metadata !{i32 0}
+!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-merge-loc-entry.ll b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
new file mode 100644
index 000000000000..8b619ea8607d
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; RUN: llc < %s -o %t -filetype=obj -regalloc=basic
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin8"
+
+;CHECK: DW_AT_location{{.*}}(<0x01> 55 )
+
+%0 = type { i64, i1 }
+
+@__clz_tab = external constant [256 x i8]
+
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
+  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+  br i1 undef, label %bb2, label %bb4, !dbg !22
+
+bb2:                                              ; preds = %entry
+  br label %bb4, !dbg !23
+
+bb4:                                              ; preds = %bb2, %entry
+  br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
+
+bb82.i:                                           ; preds = %bb4
+  unreachable
+
+__udivmodti4.exit:                                ; preds = %bb4
+  ret i128 undef, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!32}
+
+!0 = metadata !{i32 786478, metadata !29, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !29, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !31, metadata !31, metadata !28, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
+!5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786478, metadata !29, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{metadata !12, metadata !12, metadata !12}
+!12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 1093, i32 0, metadata !9, null}
+!16 = metadata !{i64 0}
+!17 = metadata !{i32 786688, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0, null} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786443, metadata !29, metadata !9, i32 1094, i32 0, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"word_type", i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 1095, i32 0, metadata !18, null}
+!22 = metadata !{i32 1103, i32 0, metadata !18, null}
+!23 = metadata !{i32 1104, i32 0, metadata !18, null}
+!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
+!25 = metadata !{i32 786443, metadata !29, metadata !0, i32 879, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 1107, i32 0, metadata !18, null}
+!27 = metadata !{i32 1111, i32 0, metadata !18, null}
+!28 = metadata !{metadata !0, metadata !9}
+!29 = metadata !{metadata !"foobar.c", metadata !"/tmp"}
+!30 = metadata !{metadata !"foobar.h", metadata !"/tmp"}
+!31 = metadata !{i32 0}
+!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-prolog-end.ll b/test/DebugInfo/X86/dbg-prolog-end.ll
new file mode 100644
index 000000000000..a7c6cb5438ec
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-prolog-end.ll
@@ -0,0 +1,59 @@
+; RUN: llc -O0 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+
+;CHECK: .loc	1 2 11 prologue_end
+define i32 @foo(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !7), !dbg !8
+  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !9), !dbg !11
+  store i32 2, i32* %j, align 4, !dbg !12
+  %tmp = load i32* %j, align 4, !dbg !13
+  %inc = add nsw i32 %tmp, 1, !dbg !13
+  store i32 %inc, i32* %j, align 4, !dbg !13
+  %tmp1 = load i32* %j, align 4, !dbg !14
+  %tmp2 = load i32* %i.addr, align 4, !dbg !14
+  %add = add nsw i32 %tmp1, %tmp2, !dbg !14
+  store i32 %add, i32* %j, align 4, !dbg !14
+  %tmp3 = load i32* %j, align 4, !dbg !15
+  ret i32 %tmp3, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @foo(i32 21), !dbg !16
+  ret i32 %call, !dbg !16
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
+!18 = metadata !{metadata !1, metadata !6}
+
+!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 13, metadata !1, null}
+!9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 786443, metadata !19, metadata !1, i32 1, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 2, i32 6, metadata !10, null}
+!12 = metadata !{i32 2, i32 11, metadata !10, null}
+!13 = metadata !{i32 3, i32 2, metadata !10, null}
+!14 = metadata !{i32 4, i32 2, metadata !10, null}
+!15 = metadata !{i32 5, i32 2, metadata !10, null}
+!16 = metadata !{i32 8, i32 2, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !19, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{metadata !"/tmp/a.c", metadata !"/private/tmp"}
+!20 = metadata !{i32 0}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-subrange.ll b/test/DebugInfo/X86/dbg-subrange.ll
new file mode 100644
index 000000000000..5bf330c9b9d7
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-subrange.ll
@@ -0,0 +1,37 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; Radar 10464995
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@s = common global [4294967296 x i8] zeroinitializer, align 16
+;CHECK: .long	4294967295
+
+define void @bar() nounwind uwtable ssp {
+entry:
+  store i8 97, i8* getelementptr inbounds ([4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
+  ret void, !dbg !20
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
+
+!0 = metadata !{i32 786449, metadata !21, i32 12, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !21, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @bar, null, null, metadata !9, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
+!6 = metadata !{i32 720937, metadata !21} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!11 = metadata !{metadata !13}
+!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 720897, null, null, null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 34359738368, align 8, offset 0] [from char]
+!15 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720929, i64 0, i64 4294967296} ; [ DW_TAG_subrange_type ]
+!18 = metadata !{i32 5, i32 3, metadata !19, null}
+!19 = metadata !{i32 786443, metadata !21, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 1, metadata !19, null}
+!21 = metadata !{metadata !"small.c", metadata !"/private/tmp"}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-dag-combine.ll b/test/DebugInfo/X86/dbg-value-dag-combine.ll
new file mode 100644
index 000000000000..12aa61ba125e
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-value-dag-combine.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9817
+
+
+declare  <4 x i32> @__amdil_get_global_id_int()
+declare  void @llvm.dbg.value(metadata , i64 , metadata )
+define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata
+!7), !dbg !8
+  %0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !9), !dbg !11
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
+  %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
+  %tmp3 = add i32 0, %tmp2, !dbg !15
+; CHECK:  ##DEBUG_VALUE: idx <- E{{..$}}
+  call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
+!15
+  %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
+  store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
+  ret void, !dbg !17
+}
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!20}
+
+!0 = metadata !{i32 786478, metadata !19, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_test_kernel]
+!1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !19, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, metadata !12, metadata !12, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !19, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 1, i32 42, metadata !0, null}
+!9 = metadata !{i32 786688, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 786443, metadata !19, metadata !0, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 3, i32 41, metadata !10, null}
+!12 = metadata !{i32 0}
+!13 = metadata !{i32 786688, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 4, i32 20, metadata !10, null}
+!15 = metadata !{i32 5, i32 15, metadata !10, null}
+!16 = metadata !{i32 6, i32 18, metadata !10, null}
+!17 = metadata !{i32 7, i32 1, metadata !0, null}
+!18 = metadata !{metadata !0}
+!19 = metadata !{metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index da6423fa76a1..1a78772e2e32 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -3,14 +3,15 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic %s -filetype=obj -o %t
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
-;CHECK: DW_TAG_inlined_subroutine [12]
+;CHECK: DW_TAG_inlined_subroutine
 ;CHECK-NEXT: DW_AT_abstract_origin
 ;CHECK-NEXT: DW_AT_low_pc
 ;CHECK-NEXT: DW_AT_high_pc
 ;CHECK-NEXT: DW_AT_call_file
 ;CHECK-NEXT: DW_AT_call_line
 
-;CHECK: DW_TAG_formal_parameter [9]
+;CHECK: DW_TAG_formal_parameter
+;CHECK: DW_TAG_formal_parameter
 ;CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000055] = "sp")
 
 %struct.S1 = type { float*, i32 }
@@ -46,20 +47,21 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!43}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [foo]
 !1 = metadata !{i32 786473, metadata !42} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40,  metadata !40, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !39, metadata !40,  metadata !40, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @foobar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15] [def] [scope 0] [foobar]
+!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S1] [line 1, size 128, align 64, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !17}
 !14 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"m", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
 !15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
@@ -86,3 +88,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !40 = metadata !{metadata !19}
 !41 = metadata !{metadata !9, metadata !18}
 !42 = metadata !{metadata !"nm2.c", metadata !"/private/tmp"}
+!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-isel.ll b/test/DebugInfo/X86/dbg-value-isel.ll
new file mode 100644
index 000000000000..f899f48b1fdf
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-value-isel.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+; PR 9879
+
+; CHECK: ##DEBUG_VALUE: tid <-
+%0 = type { i8*, i8*, i8*, i8*, i32 }
+
+@sgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@fgv = internal addrspace(2) constant [1 x i8] zeroinitializer
+@lvgv = internal constant [0 x i8*] zeroinitializer
+@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @sgv to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
+
+define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata !8), !dbg !9
+  %0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
+  %1 = extractelement <4 x i32> %0, i32 0
+  br label %2
+
+; <label>:2                                       ; preds = %entry
+  %3 = phi i32 [ %1, %entry ]
+  br label %4
+
+; <label>:4                                       ; preds = %2
+  %5 = phi i32 [ %3, %2 ]
+  br label %get_local_id.exit
+
+get_local_id.exit:                                ; preds = %4
+  %6 = phi i32 [ %5, %4 ]
+  call void @llvm.dbg.value(metadata !{i32 %6}, i64 0, metadata !10), !dbg !12
+  %7 = call <4 x i32> @__amdil_get_global_id_int() nounwind, !dbg !12
+  %8 = extractelement <4 x i32> %7, i32 0, !dbg !12
+  br label %9
+
+; <label>:9                                       ; preds = %get_local_id.exit
+  %10 = phi i32 [ %8, %get_local_id.exit ]
+  br label %11
+
+; <label>:11                                      ; preds = %9
+  %12 = phi i32 [ %10, %9 ]
+  br label %get_global_id.exit
+
+get_global_id.exit:                               ; preds = %11
+  %13 = phi i32 [ %12, %11 ]
+  call void @llvm.dbg.value(metadata !{i32 %13}, i64 0, metadata !13), !dbg !14
+  %14 = call <4 x i32> @__amdil_get_local_size_int() nounwind
+  %15 = extractelement <4 x i32> %14, i32 0
+  br label %16
+
+; <label>:16                                      ; preds = %get_global_id.exit
+  %17 = phi i32 [ %15, %get_global_id.exit ]
+  br label %18
+
+; <label>:18                                      ; preds = %16
+  %19 = phi i32 [ %17, %16 ]
+  br label %get_local_size.exit
+
+get_local_size.exit:                              ; preds = %18
+  %20 = phi i32 [ %19, %18 ]
+  call void @llvm.dbg.value(metadata !{i32 %20}, i64 0, metadata !15), !dbg !16
+  %tmp5 = add i32 %6, %13, !dbg !17
+  %tmp7 = add i32 %tmp5, %20, !dbg !17
+  store i32 %tmp7, i32 addrspace(1)* %ip, align 4, !dbg !17
+  br label %return, !dbg !17
+
+return:                                           ; preds = %get_local_size.exit
+  ret void, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare <4 x i32> @__amdil_get_local_size_int() nounwind
+
+declare <4 x i32> @__amdil_get_local_id_int() nounwind
+
+declare <4 x i32> @__amdil_get_global_id_int() nounwind
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!22}
+
+!0 = metadata !{i32 786478, metadata !20, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_nbt02_kernel]
+!1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, metadata !21, metadata !21, metadata !19, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 1, i32 32, metadata !0, null}
+!10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 5, i32 24, metadata !11, null}
+!13 = metadata !{i32 786688, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!14 = metadata !{i32 6, i32 25, metadata !11, null}
+!15 = metadata !{i32 786688, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 7, i32 26, metadata !11, null}
+!17 = metadata !{i32 9, i32 24, metadata !11, null}
+!18 = metadata !{i32 10, i32 1, metadata !0, null}
+!19 = metadata !{metadata !0}
+!20 = metadata !{metadata !"OCLlLwTXZ.cl", metadata !"/tmp"}
+!21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
new file mode 100644
index 000000000000..1e21c6a00ae4
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=basic | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+;Radar 8950491
+
+;CHECK: .long Lset5
+;CHECK-NEXT:        ## DW_AT_decl_file
+;CHECK-NEXT:        ## DW_AT_decl_line
+;CHECK-NEXT:        ## DW_AT_type
+;CHECK-NEXT:        ## DW_AT_location
+
+@dfm = external global i32, align 4
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
+  %tmp.i = load i32* @dfm, align 4, !dbg !14
+  %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
+  br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
+
+if.end.i:                                         ; preds = %entry
+  switch i64 %cmd, label %if.then [
+    i64 2147772420, label %bb.i
+    i64 536897538, label %bb116.i
+  ], !dbg !22
+
+bb.i:                                             ; preds = %if.end.i
+  unreachable
+
+bb116.i:                                          ; preds = %if.end.i
+  unreachable
+
+if.then:                                          ; preds = %if.end.i
+  ret i32 undef, !dbg !23
+
+if.else:                                          ; preds = %entry
+  ret i32 0
+}
+
+declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
+declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
+declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!29}
+
+!0 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ] [line 19510] [def] [foo]
+!1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, metadata !28, metadata !28, metadata !24, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @bar3, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 14827] [local] [def] [scope 0] [bar3]
+!7 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @bar2, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15397] [local] [def] [scope 0] [bar2]
+!8 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32*)* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 12382] [local] [def] [scope 0] [bar]
+!9 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 19509, i32 20, metadata !0, null}
+!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
+!15 = metadata !{i32 786443, metadata !26, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 18086] [local] [def] [scope 0] [foo_bar]
+!17 = metadata !{i32 19514, i32 2, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !26, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
+!23 = metadata !{i32 19524, i32 1, metadata !18, null}
+!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8}
+!25 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
+!26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"}
+!27 = metadata !{metadata !"f.i", metadata !"/tmp"}
+!28 = metadata !{i32 0}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-range.ll b/test/DebugInfo/X86/dbg-value-range.ll
new file mode 100644
index 000000000000..d9e7a6382627
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-value-range.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=x86_64-apple-darwin10 < %s | FileCheck %s
+
+%struct.a = type { i32 }
+
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
+  %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
+  %tmp2 = load i32* %tmp1, align 4, !dbg !14
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
+  %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
+  %add = add nsw i32 %tmp2, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare i32 @foo(...) 
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!24}
+
+!0 = metadata !{i32 786478, metadata !22, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [bar]
+!1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, metadata !23, metadata !23, metadata !20, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !22, metadata !2, metadata !"a", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 1, size 32, align 32, offset 0] [def] [from ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786445, metadata !22, metadata !1, metadata !"c", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 5, i32 19, metadata !0, null}
+!14 = metadata !{i32 6, i32 14, metadata !12, null}
+!18 = metadata !{i32 7, i32 2, metadata !12, null}
+!19 = metadata !{i32 8, i32 2, metadata !12, null}
+!20 = metadata !{metadata !0}
+!21 = metadata !{metadata !6, metadata !11}
+!22 = metadata !{metadata !"bar.c", metadata !"/private/tmp"}
+!23 = metadata !{i32 0}
+
+; Check that variable bar:b value range is appropriately truncated in debug info.
+; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
+; Here Ltmp7 is the end of the location range.
+
+;CHECK: .loc	1 7 2
+;CHECK: movl
+;CHECK-NEXT: [[CLOBBER:Ltmp[0-9]*]]
+
+;CHECK:Ldebug_loc0:
+;CHECK-NEXT:	.quad
+;CHECK-NEXT:	.quad	[[CLOBBER]]
+;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}
+;CHECK-NEXT:    .short  Lset
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT:	.byte	85 ## DW_OP_reg
+;CHECK-NEXT: Ltmp
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-value-terminator.ll b/test/DebugInfo/X86/dbg-value-terminator.ll
new file mode 100644
index 000000000000..f08f281f4f3f
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -0,0 +1,133 @@
+; RUN: llc -mtriple=x86_64-apple-macosx < %s -verify-machineinstrs | FileCheck %s
+;
+; PR16143: MachineOperand::setIsKill(bool): Assertion
+;
+; verify-machineinstrs should ensure that DEBUG_VALUEs go before the
+; terminator.
+;
+; CHECK-LABEL: test:
+; CHECK: ##DEBUG_VALUE: i
+%a = type { i32, i32 }
+
+define hidden fastcc %a* @test() #1 {
+entry:
+  %0 = icmp eq %a* undef, null, !dbg !1
+  br i1 %0, label %"14", label %return, !dbg !1
+
+"14":                                             ; preds = %"8"
+  br i1 undef, label %"25", label %"21", !dbg !1
+
+"21":                                             ; preds = %"14"
+  br i1 undef, label %may_unswitch_on.exit, label %"6.i", !dbg !1
+
+"6.i":                                            ; preds = %"21"
+  br i1 undef, label %"10.i", label %may_unswitch_on.exit, !dbg !1
+
+"10.i":                                           ; preds = %"6.i"
+  br i1 undef, label %may_unswitch_on.exit, label %"12.i", !dbg !1
+
+"12.i":                                           ; preds = %"10.i"
+  br i1 undef, label %"4.i.i", label %"3.i.i", !dbg !1
+
+"3.i.i":                                          ; preds = %"12.i"
+  br i1 undef, label %"4.i.i", label %VEC_edge_base_index.exit.i, !dbg !1
+
+"4.i.i":                                          ; preds = %"3.i.i", %"12.i"
+  unreachable, !dbg !1
+
+VEC_edge_base_index.exit.i:                       ; preds = %"3.i.i"
+  br i1 undef, label %may_unswitch_on.exit, label %"16.i", !dbg !1
+
+"16.i":                                           ; preds = %VEC_edge_base_index.exit.i
+  br i1 undef, label %"4.i6.i", label %"3.i5.i", !dbg !1
+
+"3.i5.i":                                         ; preds = %"16.i"
+  br i1 undef, label %VEC_edge_base_index.exit7.i, label %"4.i6.i", !dbg !1
+
+"4.i6.i":                                         ; preds = %"3.i5.i", %"16.i"
+  unreachable, !dbg !1
+
+VEC_edge_base_index.exit7.i:                      ; preds = %"3.i5.i"
+  br i1 undef, label %may_unswitch_on.exit, label %"21.i", !dbg !1
+
+"21.i":                                           ; preds = %VEC_edge_base_index.exit7.i
+  br i1 undef, label %may_unswitch_on.exit, label %"23.i", !dbg !1
+
+"23.i":                                           ; preds = %"21.i"
+  br i1 undef, label %may_unswitch_on.exit, label %"26.i", !dbg !1
+
+"26.i":                                           ; preds = %"34.i", %"23.i"
+  %1 = icmp eq i32 undef, 9, !dbg !1
+  br i1 %1, label %"34.i", label %"28.i", !dbg !1
+
+"28.i":                                           ; preds = %"26.i"
+  unreachable
+
+"34.i":                                           ; preds = %"26.i"
+  br i1 undef, label %"26.i", label %"36.i", !dbg !1
+
+"36.i":                                           ; preds = %"34.i"
+  br i1 undef, label %"37.i", label %"38.i", !dbg !1
+
+"37.i":                                           ; preds = %"36.i"
+  br label %"38.i", !dbg !1
+
+"38.i":                                           ; preds = %"37.i", %"36.i"
+  br i1 undef, label %"39.i", label %"45.i", !dbg !1
+
+"39.i":                                           ; preds = %"38.i"
+  br i1 undef, label %"41.i", label %may_unswitch_on.exit, !dbg !1
+
+"41.i":                                           ; preds = %"39.i"
+  br i1 undef, label %may_unswitch_on.exit, label %"42.i", !dbg !1
+
+"42.i":                                           ; preds = %"41.i"
+  br i1 undef, label %may_unswitch_on.exit, label %"44.i", !dbg !1
+
+"44.i":                                           ; preds = %"42.i"
+  %2 = load %a** undef, align 8, !dbg !1
+  %3 = bitcast %a* %2 to %a*, !dbg !1
+  call void @llvm.dbg.value(metadata !{%a* %3}, i64 0, metadata !6), !dbg !12
+  br label %may_unswitch_on.exit, !dbg !1
+
+"45.i":                                           ; preds = %"38.i"
+  unreachable
+
+may_unswitch_on.exit:                             ; preds = %"44.i", %"42.i", %"41.i", %"39.i", %"23.i", %"21.i", %VEC_edge_base_index.exit7.i, %VEC_edge_base_index.exit.i, %"10.i", %"6.i", %"21"
+  %4 = phi %a* [ %3, %"44.i" ], [ null, %"6.i" ], [ null, %"10.i" ], [ null, %VEC_edge_base_index.exit7.i ], [ null, %VEC_edge_base_index.exit.i ], [ null, %"21.i" ], [ null, %"23.i" ], [ null, %"39.i" ], [ null, %"42.i" ], [ null, %"41.i" ], [ null, %"21" ]
+  br label %return
+
+"25":                                             ; preds = %"14"
+  unreachable
+
+"return":
+  %result = phi %a* [ null, %entry ], [ %4, %may_unswitch_on.exit ]
+  ret %a* %result, !dbg !1
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind uwtable }
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22}
+
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version", i1 true, metadata !"", i32 0, metadata !21, metadata !21, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, %a* ()* @test, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786468, null, metadata !0, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 2, i32 13, metadata !1, null}
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !6, metadata !7, metadata !10}
+!20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
+!21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
new file mode 100644
index 000000000000..8a22cd7cca03
--- /dev/null
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -0,0 +1,177 @@
+; RUN: llc -filetype=obj -O0 < %s
+; Test that we handle DBG_VALUEs in a register without crashing.
+;
+; Generated from clang with -fsanitize=address:
+; struct A {
+;   A();
+;   A(const A&);
+; };
+;
+; A func(int) {
+;   A a;
+;   return a;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.A = type { i8 }
+
+@__asan_mapping_offset = linkonce_odr constant i64 2147450880
+@__asan_mapping_scale = linkonce_odr constant i64 3
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 1, void ()* @asan.module_ctor }]
+@__asan_gen_ = private unnamed_addr constant [16 x i8] c"1 32 4 5 .addr \00", align 1
+
+; Function Attrs: sanitize_address uwtable
+define void @_Z4funci(%struct.A* noalias sret %agg.result, i32) #0 "stack-protector-buffer-size"="1" {
+entry:
+  %MyAlloca = alloca [96 x i8], align 32
+  %1 = ptrtoint [96 x i8]* %MyAlloca to i64
+  %2 = add i64 %1, 32
+  %3 = inttoptr i64 %2 to i32*
+  %4 = inttoptr i64 %1 to i64*
+  store i64 1102416563, i64* %4
+  %5 = add i64 %1, 8
+  %6 = inttoptr i64 %5 to i64*
+  store i64 ptrtoint ([16 x i8]* @__asan_gen_ to i64), i64* %6
+  %7 = add i64 %1, 16
+  %8 = inttoptr i64 %7 to i64*
+  store i64 ptrtoint (void (%struct.A*, i32)* @_Z4funci to i64), i64* %8
+  %9 = lshr i64 %1, 3
+  %10 = add i64 %9, 2147450880
+  %11 = inttoptr i64 %10 to i32*
+  store i32 -235802127, i32* %11
+  %12 = add i64 %10, 4
+  %13 = inttoptr i64 %12 to i32*
+  store i32 -185273340, i32* %13
+  %14 = add i64 %10, 8
+  %15 = inttoptr i64 %14 to i32*
+  store i32 -202116109, i32* %15
+  %16 = ptrtoint i32* %3 to i64
+  %17 = lshr i64 %16, 3
+  %18 = add i64 %17, 2147450880
+  %19 = inttoptr i64 %18 to i8*
+  %20 = load i8* %19
+  %21 = icmp ne i8 %20, 0
+  call void @llvm.dbg.declare(metadata !{i32* %3}, metadata !23)
+  br i1 %21, label %22, label %28
+
+; <label>:22                                      ; preds = %entry
+  %23 = and i64 %16, 7
+  %24 = add i64 %23, 3
+  %25 = trunc i64 %24 to i8
+  %26 = icmp sge i8 %25, %20
+  br i1 %26, label %27, label %28
+
+; <label>:27                                      ; preds = %22
+  call void @__asan_report_store4(i64 %16)
+  call void asm sideeffect "", ""()
+  unreachable
+
+; <label>:28                                      ; preds = %22, %entry
+  store i32 %0, i32* %3, align 4
+  call void @llvm.dbg.declare(metadata !{%struct.A* %agg.result}, metadata !24), !dbg !25
+  call void @_ZN1AC1Ev(%struct.A* %agg.result), !dbg !25
+  store i64 1172321806, i64* %4, !dbg !26
+  %29 = inttoptr i64 %10 to i32*, !dbg !26
+  store i32 0, i32* %29, !dbg !26
+  %30 = add i64 %10, 4, !dbg !26
+  %31 = inttoptr i64 %30 to i32*, !dbg !26
+  store i32 0, i32* %31, !dbg !26
+  %32 = add i64 %10, 8, !dbg !26
+  %33 = inttoptr i64 %32 to i32*, !dbg !26
+  store i32 0, i32* %33, !dbg !26
+  ret void, !dbg !26
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare void @_ZN1AC1Ev(%struct.A*) #2
+
+define internal void @asan.module_ctor()  "stack-protector-buffer-size"="1" {
+  call void @__asan_init_v3()
+  %1 = load volatile i64* @__asan_mapping_offset
+  %2 = load volatile i64* @__asan_mapping_scale
+  ret void
+}
+
+declare void @__asan_init_v3()
+
+declare void @__asan_report_load1(i64)
+
+declare void @__asan_report_load2(i64)
+
+declare void @__asan_report_load4(i64)
+
+declare void @__asan_report_load8(i64)
+
+declare void @__asan_report_load16(i64)
+
+declare void @__asan_report_store1(i64)
+
+declare void @__asan_report_store2(i64)
+
+declare void @__asan_report_store4(i64)
+
+declare void @__asan_report_store8(i64)
+
+declare void @__asan_report_store16(i64)
+
+declare void @__asan_report_load_n(i64, i64)
+
+declare void @__asan_report_store_n(i64, i64)
+
+declare void @__asan_handle_no_return()
+
+declare i64 @__asan_stack_malloc(i64, i64)
+
+declare void @__asan_stack_free(i64, i64, i64)
+
+declare void @__asan_poison_stack_memory(i64, i64)
+
+declare void @__asan_unpoison_stack_memory(i64, i64)
+
+declare void @__asan_before_dynamic_init(i64)
+
+declare void @__asan_after_dynamic_init()
+
+declare void @__asan_register_globals(i64, i64)
+
+declare void @__asan_unregister_globals(i64, i64)
+
+attributes #0 = { sanitize_address uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !27}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/crash.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"crash.cpp", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*, i32)* @_Z4funci, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/crash.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !21}
+!8 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
+!9 = metadata !{metadata !10, metadata !15}
+!10 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ] [line 2] [A]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{null, metadata !13}
+!13 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!14 = metadata !{i32 786468}
+!15 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !20, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
+!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{null, metadata !13, metadata !18}
+!18 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from A]
+!20 = metadata !{i32 786468}
+!21 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!22 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!23 = metadata !{i32 786689, metadata !4, metadata !"", metadata !5, i32 16777222, metadata !21, i32 0, i32 0, i64 2} ; [ DW_TAG_arg_variable ] [line 6]
+!24 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 7, metadata !8, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 7]
+!25 = metadata !{i32 7, i32 0, metadata !4, null}
+!26 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index 7e318f6443f8..6e4d2007a976 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -77,23 +77,24 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 }
 
 !llvm.dbg.cu = !{!0}
-!0 = metadata !{i32 786449, i32 16, metadata !1, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15,  metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
+!llvm.module.flags = !{!108}
+!0 = metadata !{i32 786449, metadata !107, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15,  metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
 !1 = metadata !{i32 786473, metadata !107} ; [ DW_TAG_file_type ]
 !2 = metadata !{metadata !3}
-!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [from ]
+!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
 !4 = metadata !{}
 !15 = metadata !{i32 0}
 !23 = metadata !{metadata !38, metadata !42}
 !27 = metadata !{i32 786454, metadata !107, null, metadata !"id", i32 31, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ]
 !28 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !30 = metadata !{metadata !31}
 !31 = metadata !{i32 786445, metadata !107, metadata !29, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
 !32 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
-!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, i32 0, i32 0, i32 16} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [fwd] [from ]
+!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, null, i32 0, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [decl] [from ]
 !38 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"", i32 33, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, metadata !15, i32 33} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke]
-!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !40 = metadata !{null, metadata !41, metadata !27}
 !41 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !42 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
@@ -104,3 +105,4 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 !105 = metadata !{i32 786688, metadata !42, metadata !"self", metadata !1, i32 40, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 40]
 !106 = metadata !{i32 40, i32 0, metadata !42, null}
 !107 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", metadata !""}
+!108 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index ae9503303812..c3bedf2b2643 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -5,13 +5,15 @@
 ; rdar://problem/9279956
 ; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} )
 
+; CHECK: DW_TAG_subprogram
 ; CHECK: DW_AT_name{{.*}}_block_invoke
+
 ; CHECK-NOT: DW_TAG_subprogram
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: .block_descriptor
+; CHECK-NEXT: DW_AT_name{{.*}}.block_descriptor
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_location
+
 ; CHECK-NOT: DW_TAG_subprogram
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}"self"
@@ -24,7 +26,7 @@
 ; 0x23 = DW_OP_uconst
 ; 0x91 = DW_OP_fbreg
 ; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} )
-; CHECK: DW_TAG_structure_type
+
 ; CHECK: [[A:.*]]:   DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_APPLE_objc_complete_type
 ; CHECK-NEXT: DW_AT_name{{.*}}"A"
@@ -258,52 +260,52 @@ attributes #2 = { nonlazybind }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!56, !57, !58, !59}
+!llvm.module.flags = !{!56, !57, !58, !59, !110}
 
 !0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
 !1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/<unknown>", metadata !"llvm/_build.ninja.Debug"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [from ]
+!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [def] [from ]
 !5 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m", metadata !"llvm/_build.ninja.Debug"}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
 !7 = metadata !{metadata !8, metadata !10}
 !8 = metadata !{i32 786460, null, metadata !4, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
-!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [from ]
+!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [def] [from ]
 !10 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"ivar", i32 35, i64 32, i64 32, i64 0, i32 0, metadata !11, null} ; [ DW_TAG_member ] [ivar] [line 35, size 32, align 32, offset 0] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13, metadata !27, metadata !31, metadata !35, metadata !36, metadata !39}
 !13 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"-[A init]", metadata !"-[A init]", metadata !"", i32 46, metadata !14, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*)* @"\01-[A init]", null, null, metadata !2, i32 46} ; [ DW_TAG_subprogram ] [line 46] [local] [def] [-[A init]]
-!14 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{metadata !16, metadata !23, metadata !24}
 !16 = metadata !{i32 786454, metadata !5, null, metadata !"id", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [id] [line 46, size 0, align 0, offset 0] [from ]
 !17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !19 = metadata !{metadata !20}
 !20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
 !21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
-!23 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!24 = metadata !{i32 786454, metadata !5, i32 0, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
+!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!23 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!24 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
 !25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
-!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !27 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__9-[A init]_block_invoke", metadata !"__9-[A init]_block_invoke", metadata !"", i32 49, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @"__9-[A init]_block_invoke", null, null, metadata !2, i32 49} ; [ DW_TAG_subprogram ] [line 49] [local] [def] [__9-[A init]_block_invoke]
-!28 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !30}
 !30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !31 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 52, metadata !33, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*, i8*)* @__copy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__copy_helper_block_]
 !32 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/<unknown>]
-!33 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !34 = metadata !{null, metadata !30, metadata !30}
 !35 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 52, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*)* @__destroy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__destroy_helper_block_]
 !36 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !37, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 60} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 60] [main]
-!37 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !38 = metadata !{metadata !11}
 !39 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 39, metadata !40, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (void ()*)* @run, null, null, metadata !2, i32 40} ; [ DW_TAG_subprogram ] [line 39] [local] [def] [scope 40] [run]
-!40 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !41 = metadata !{null, metadata !42}
 !42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !43} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic]
-!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [from ]
+!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [def] [from ]
 !44 = metadata !{metadata !45, metadata !46, metadata !47, metadata !48, metadata !49}
 !45 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
 !46 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int]
@@ -311,7 +313,7 @@ attributes #3 = { nounwind }
 !48 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ]
 !49 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 40, i64 64, i64 64, i64 192, i32 0, metadata !50} ; [ DW_TAG_member ] [__descriptor] [line 40, size 64, align 64, offset 192] [from ]
 !50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor]
-!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [from ]
+!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [def] [from ]
 !52 = metadata !{metadata !53, metadata !55}
 !53 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !54} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int]
 !54 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
@@ -338,7 +340,7 @@ attributes #3 = { nounwind }
 !75 = metadata !{i32 42, i32 0, metadata !39, null}
 !76 = metadata !{i32 786689, metadata !27, metadata !".block_descriptor", metadata !6, i32 16777265, metadata !77, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 49]
 !77 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1]
-!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [from ]
+!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [def] [from ]
 !79 = metadata !{metadata !80, metadata !81, metadata !82, metadata !83, metadata !84, metadata !87}
 !80 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 49, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 49, size 64, align 64, offset 0] [from ]
 !81 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 49, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 49, size 32, align 32, offset 64] [from int]
@@ -346,7 +348,7 @@ attributes #3 = { nounwind }
 !83 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 49, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 49, size 64, align 64, offset 128] [from ]
 !84 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 49, i64 64, i64 64, i64 192, i32 0, metadata !85} ; [ DW_TAG_member ] [__descriptor] [line 49, size 64, align 64, offset 192] [from ]
 !85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !86} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose]
-!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [fwd] [from ]
+!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [decl] [from ]
 !87 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"self", i32 49, i64 64, i64 64, i64 256, i32 0, metadata !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ]
 !88 = metadata !{i32 49, i32 0, metadata !27, null}
 !89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, i64 2, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 52]
@@ -354,10 +356,10 @@ attributes #3 = { nounwind }
 !91 = metadata !{i32 786688, metadata !92, metadata !"d", metadata !6, i32 50, metadata !93, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 50]
 !92 = metadata !{i32 786443, metadata !5, metadata !27, i32 49, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
 !93 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !94} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from NSMutableDictionary]
-!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [from ]
+!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [def] [from ]
 !95 = metadata !{metadata !96}
 !96 = metadata !{i32 786460, null, metadata !94, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !97} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSDictionary]
-!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [from ]
+!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [def] [from ]
 !98 = metadata !{metadata !99}
 !99 = metadata !{i32 786460, null, metadata !97, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
 !100 = metadata !{i32 50, i32 0, metadata !92, null}
@@ -370,3 +372,4 @@ attributes #3 = { nounwind }
 !107 = metadata !{i32 786688, metadata !36, metadata !"a", metadata !6, i32 61, metadata !61, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 61]
 !108 = metadata !{i32 61, i32 0, metadata !36, null}
 !109 = metadata !{i32 62, i32 0, metadata !36, null}
+!110 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index 33485b680748..1792bb4783b5 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -57,18 +57,19 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!34}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10,  metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !33, i32 4, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10,  metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
+!5 = metadata !{i32 786478, metadata !33, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
 !6 = metadata !{i32 786473, metadata !33} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !12, metadata !27, metadata !28}
 !12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"a", metadata !"a", metadata !"_ZN1C1aE", metadata !6, i32 14, metadata !9, i32 0, i32 1, i32* @_ZN1C1aE, metadata !15} ; [ DW_TAG_variable ] [a] [line 14] [def]
-!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ]
+!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [def] [from ]
 !14 = metadata !{metadata !15, metadata !16, metadata !19, metadata !20, metadata !23, metadata !24, metadata !26}
 !15 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"a", i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
 !16 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_a", i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
@@ -120,7 +121,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "const_c"
 ; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
-; PRESENT:      DW_AT_const_value {{.*}} (0x00000012)
+; PRESENT:      DW_AT_const_value {{.*}} (18)
 ; While we're here, a normal member has data_member_location and
 ; accessibility attributes.
 ; PRESENT:      DW_TAG_member
@@ -151,7 +152,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINP:      DW_AT_external
 ; DARWINP:      DW_AT_declaration
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
-; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1aE"
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_a"
 ; DARWINP:      DW_AT_external
@@ -161,7 +161,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINP:      0x[[DECL_B:[0-9a-f]+]]: DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "b"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
-; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1bE"
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_b"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
@@ -169,11 +168,10 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINP:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "c"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
-; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1cE"
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_c"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
-; DARWINP:      DW_AT_const_value {{.*}} (0x00000012)
+; DARWINP:      DW_AT_const_value {{.*}} (18)
 ; While we're here, a normal member has data_member_location and
 ; accessibility attributes.
 ; DARWINP:      DW_TAG_member
@@ -255,3 +253,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINA-NOT:  DW_AT_const_value
 ; DARWINA-NOT:  DW_AT_location
 ; DARWINA:      NULL
+!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
index 0e93427df010..67f2e5dffab0 100644
--- a/test/DebugInfo/X86/debug_frame.ll
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -10,10 +10,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7}
 !5 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !5, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!1 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !5, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
+!6 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build"}
+!7 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
new file mode 100644
index 000000000000..42a57bfed725
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: .short  2 # DWARF Arange version number
+; CHECK: # Segment Size
+; CHECK-NOT: debug_loc
+; CHECK: .quad global
+; CHECK-NOT: debug_loc
+; CHECK: # ARange terminator
+
+; --- Source code ---
+; Generated with "clang -g -O1 -S -emit-llvm"
+
+; int global = 2;
+; int foo(int bar) { return bar; }
+; int foo2(int bar2) { return bar2; }
+
+; int main() {
+;   return foo(2) + foo2(1) + global;
+; }
+
+
+; ModuleID = 'tmp/debug_ranges/a.cc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@global = global i32 2, align 4
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @_Z3fooi(i32 %bar) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %bar}, i64 0, metadata !10), !dbg !20
+  ret i32 %bar, !dbg !20
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @_Z4foo2i(i32 %bar2) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %bar2}, i64 0, metadata !13), !dbg !21
+  ret i32 %bar2, !dbg !21
+}
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @main() #1 {
+entry:
+  %call = tail call i32 @_Z3fooi(i32 2), !dbg !22
+  %call1 = tail call i32 @_Z4foo2i(i32 1), !dbg !22
+  %add = add nsw i32 %call1, %call, !dbg !22
+  %0 = load i32* @global, align 4, !dbg !22, !tbaa !23
+  %add2 = add nsw i32 %add, %0, !dbg !22
+  ret i32 %add2, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (191881)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !17, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/debug_ranges/a.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tmp/debug_ranges/a.cc", metadata !"/"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !11, metadata !14}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @_Z3fooi, null, null, metadata !9, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/debug_ranges/a.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786689, metadata !4, metadata !"bar", metadata !5, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bar] [line 2]
+!11 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo2", metadata !"foo2", metadata !"_Z4foo2i", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @_Z4foo2i, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [foo2]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786689, metadata !11, metadata !"bar2", metadata !5, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bar2] [line 3]
+!14 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !8}
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786484, i32 0, null, metadata !"global", metadata !"global", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @global, null} ; [ DW_TAG_variable ] [global] [line 1] [def]
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!20 = metadata !{i32 2, i32 0, metadata !4, null}
+!21 = metadata !{i32 3, i32 0, metadata !11, null}
+!22 = metadata !{i32 6, i32 0, metadata !14, null}
+!23 = metadata !{metadata !"int", metadata !24}
+!24 = metadata !{metadata !"omnipotent char", metadata !25}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-aranges.ll b/test/DebugInfo/X86/dwarf-aranges.ll
new file mode 100644
index 000000000000..203afc71d830
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-aranges.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s | FileCheck %s
+
+
+; -- header --
+; CHECK: .short 2 # DWARF Arange version number
+; CHECK-NEXT: .long .L.debug_info_begin0
+; CHECK-NEXT: .byte 8 # Address Size (in bytes)
+; CHECK-NEXT: .byte 0 # Segment Size (in bytes)
+; -- alignment --
+; CHECK-NEXT: .byte
+; CHECK-NEXT: .byte
+; CHECK-NEXT: .byte
+; CHECK-NEXT: .byte
+
+; <common symbols> - it should have made one span for each symbol.
+; CHECK-NEXT: .quad some_bss
+; CHECK-NEXT: .quad 4
+
+; <data section> - it should have made one span covering all vars in this CU.
+; CHECK-NEXT: .quad some_data
+; CHECK-NEXT: .Lset0 = .Ldebug_end1-some_data
+; CHECK-NEXT: .quad .Lset0
+
+; <text section> - it should have made one span covering all functions in this CU.
+; CHECK-NEXT: .quad .Lfunc_begin0
+; CHECK-NEXT: .Lset1 = .Ldebug_end2-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lset1
+
+; <other sections> - it should have made one span covering all vars in this CU.
+; CHECK-NEXT: .quad some_other
+; CHECK-NEXT: .Lset2 = .Ldebug_end3-some_other
+; CHECK-NEXT: .quad .Lset2
+
+; -- finish --
+; CHECK-NEXT: # ARange terminator
+
+
+
+; -- source code --
+; Generated from: "clang -c -g -emit-llvm"
+;
+; int some_data = 4;
+; int some_bss;
+; int some_other __attribute__ ((section ("strange+section"))) = 5;
+; 
+; void some_code()
+; {
+;    some_bss += some_data + some_other;
+; }
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@some_data = global i32 4, align 4
+@some_other = global i32 5, section "strange+section", align 4
+@some_bss = common global i32 0, align 4
+
+define void @some_code() {
+entry:
+  %0 = load i32* @some_data, align 4, !dbg !14
+  %1 = load i32* @some_other, align 4, !dbg !14
+  %add = add nsw i32 %0, %1, !dbg !14
+  %2 = load i32* @some_bss, align 4, !dbg !14
+  %add1 = add nsw i32 %2, %add, !dbg !14
+  store i32 %add1, i32* @some_bss, align 4, !dbg !14
+  ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !16}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !8, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/home/kayamon"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"some_code", metadata !"some_code", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @some_code, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [some_code]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{metadata !9, metadata !11, metadata !12}
+!9 = metadata !{i32 786484, i32 0, null, metadata !"some_data", metadata !"some_data", metadata !"", metadata !5, i32 1, metadata !10, i32 0, i32 1, i32* @some_data, null} ; [ DW_TAG_variable ] [some_data] [line 1] [def]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786484, i32 0, null, metadata !"some_other", metadata !"some_other", metadata !"", metadata !5, i32 3, metadata !10, i32 0, i32 1, i32* @some_other, null} ; [ DW_TAG_variable ] [some_other] [line 3] [def]
+!12 = metadata !{i32 786484, i32 0, null, metadata !"some_bss", metadata !"some_bss", metadata !"", metadata !5, i32 2, metadata !10, i32 0, i32 1, i32* @some_bss, null} ; [ DW_TAG_variable ] [some_bss] [line 2] [def]
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{i32 7, i32 0, metadata !4, null}
+!15 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
new file mode 100644
index 000000000000..d66e5a0c1cec
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -0,0 +1,132 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck --check-prefix=LINUX %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck --check-prefix=DARWIN %s
+; ModuleID = 'dwarf-public-names.cpp'
+;
+; Generated from:
+;
+; struct C {
+;   void member_function();
+;   static int static_member_function();
+;   static int static_member_variable;
+; };
+;
+; int C::static_member_variable = 0;
+;
+; void C::member_function() {
+;   static_member_variable = 0;
+; }
+;
+; int C::static_member_function() {
+;   return static_member_variable;
+; }
+;
+; C global_variable;
+;
+; int global_function() {
+;   return -1;
+; }
+;
+; namespace ns {
+;   void global_namespace_function() {
+;     global_variable.member_function();
+;   }
+;   int global_namespace_variable = 1;
+; }
+
+; Darwin shouldn't be generating the section by default
+; DARWIN: debug_pubnames
+; DARWIN: unit_size = 0x00000000
+
+; Skip the output to the header of the pubnames section.
+; LINUX: debug_pubnames
+
+; Check for each name in the output.
+; LINUX: global_namespace_variable
+; LINUX: global_namespace_function
+; LINUX: static_member_function
+; LINUX: global_variable
+; LINUX: global_function
+; LINUX: member_function
+
+%struct.C = type { i8 }
+
+@_ZN1C22static_member_variableE = global i32 0, align 4
+@global_variable = global %struct.C zeroinitializer, align 1
+@_ZN2ns25global_namespace_variableE = global i32 1, align 4
+
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !28), !dbg !30
+  %this1 = load %struct.C** %this.addr
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
+  ret void, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
+entry:
+  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
+  ret i32 %0, !dbg !33
+}
+
+define i32 @_Z15global_functionv() nounwind uwtable {
+entry:
+  ret i32 -1, !dbg !34
+}
+
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
+entry:
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
+  ret void, !dbg !36
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38}
+
+!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
+!3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{null, metadata !7}
+!7 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!9 = metadata !{metadata !10, metadata !12, metadata !14}
+!10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !11}
+!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{null}
+!24 = metadata !{metadata !25, metadata !26, metadata !27}
+!25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9]
+!29 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
+!30 = metadata !{i32 9, i32 0, metadata !3, null}
+!31 = metadata !{i32 10, i32 0, metadata !3, null}
+!32 = metadata !{i32 11, i32 0, metadata !3, null}
+!33 = metadata !{i32 14, i32 0, metadata !18, null}
+!34 = metadata !{i32 20, i32 0, metadata !19, null}
+!35 = metadata !{i32 25, i32 0, metadata !20, null}
+!36 = metadata !{i32 26, i32 0, metadata !20, null}
+!37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dwarf-pubnames-split.ll b/test/DebugInfo/X86/dwarf-pubnames-split.ll
new file mode 100644
index 000000000000..131e5aae51fa
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-pubnames-split.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -split-dwarf=Enable %s -o - | FileCheck %s
+; Derived from:
+
+; int main (void) {
+;    return 0;
+; }
+
+; Check that we get a symbol off of the debug_info section when using split dwarf and pubnames.
+
+; CHECK: .Lpubtypes_begin0:
+; CHECK-NEXT: .short    2                       # DWARF Version
+; CHECK-NEXT: .long     .L.debug_info_begin0    # Offset of Compilation Unit Info
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 189287) (llvm/trunk 189296)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!10 = metadata !{i32 2, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
index 5bd0c7e0bb74..b5dc01e68a93 100644
--- a/test/DebugInfo/X86/earlydup-crash.ll
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -42,44 +42,53 @@ bb33:                                             ; preds = %bb31, %bb22, %bb18,
 
 declare void @foobar(i32)
 
+!llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!47}
 !0 = metadata !{i32 590080, metadata !1, metadata !"frname_len", metadata !3, i32 517, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 589835, metadata !2, i32 515, i32 0, metadata !3, i32 19} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 589870, i32 0, metadata !3, metadata !"framework_construct_pathname", metadata !"framework_construct_pathname", metadata !"", metadata !3, i32 515, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 589865, metadata !"darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 589841, i32 0, i32 1, metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config/darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 589835, metadata !44, metadata !2, i32 515, i32 0, i32 19} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 589870, metadata !44, null, metadata !"framework_construct_pathname", metadata !"framework_construct_pathname", metadata !"", i32 515, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 589865, metadata !44}  ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 589841, metadata !44, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !46, metadata !46, metadata !45, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589845, metadata !44, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7, metadata !9, metadata !11}
-!7 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 589860, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 589862, metadata !3, metadata !"", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 589846, metadata !13, metadata !"cpp_dir", metadata !13, i32 45, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
-!13 = metadata !{i32 589865, metadata !"cpplib.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/../libcpp/include", metadata !4} ; [ DW_TAG_file_type ]
-!14 = metadata !{i32 589843, metadata !3, metadata !"cpp_dir", metadata !13, i32 43, i64 352, i64 32, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 589862, metadata !44, metadata !3, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 589846, metadata !41, metadata !13, metadata !"cpp_dir", i32 45, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589865, metadata !41} ; [ DW_TAG_file_type ]
+!14 = metadata !{i32 589843, metadata !41, metadata !3, metadata !"cpp_dir", i32 43, i64 352, i64 32, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [cpp_dir] [line 43, size 352, align 32, offset 0] [def] [from ]
 !15 = metadata !{metadata !16, metadata !18, metadata !19, metadata !21, metadata !23, metadata !25, metadata !27, metadata !29, metadata !33, metadata !36}
-!16 = metadata !{i32 589837, metadata !14, metadata !"next", metadata !13, i32 572, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ]
-!17 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!18 = metadata !{i32 589837, metadata !14, metadata !"name", metadata !13, i32 575, i64 32, i64 32, i64 32, i32 0, metadata !7} ; [ DW_TAG_member ]
-!19 = metadata !{i32 589837, metadata !14, metadata !"len", metadata !13, i32 576, i64 32, i64 32, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
-!20 = metadata !{i32 589860, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 589837, metadata !14, metadata !"sysp", metadata !13, i32 580, i64 8, i64 8, i64 96, i32 0, metadata !22} ; [ DW_TAG_member ]
-!22 = metadata !{i32 589860, metadata !3, metadata !"unsigned char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!23 = metadata !{i32 589837, metadata !14, metadata !"name_map", metadata !13, i32 584, i64 32, i64 32, i64 128, i32 0, metadata !24} ; [ DW_TAG_member ]
-!24 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!25 = metadata !{i32 589837, metadata !14, metadata !"header_map", metadata !13, i32 590, i64 32, i64 32, i64 160, i32 0, metadata !26} ; [ DW_TAG_member ]
-!26 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!27 = metadata !{i32 589837, metadata !14, metadata !"construct", metadata !13, i32 597, i64 32, i64 32, i64 192, i32 0, metadata !28} ; [ DW_TAG_member ]
-!28 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 589837, metadata !14, metadata !"ino", metadata !13, i32 601, i64 64, i64 64, i64 224, i32 0, metadata !30} ; [ DW_TAG_member ]
-!30 = metadata !{i32 589846, metadata !31, metadata !"ino_t", metadata !31, i32 141, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]
-!31 = metadata !{i32 589865, metadata !"types.h", metadata !"/usr/include/sys", metadata !4} ; [ DW_TAG_file_type ]
-!32 = metadata !{i32 589860, metadata !3, metadata !"long long unsigned int", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!33 = metadata !{i32 589837, metadata !14, metadata !"dev", metadata !13, i32 602, i64 32, i64 32, i64 288, i32 0, metadata !34} ; [ DW_TAG_member ]
-!34 = metadata !{i32 589846, metadata !31, metadata !"dev_t", metadata !31, i32 107, i64 0, i64 0, i64 0, i32 0, metadata !35} ; [ DW_TAG_typedef ]
-!35 = metadata !{i32 589860, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!36 = metadata !{i32 589837, metadata !14, metadata !"user_supplied_p", metadata !13, i32 605, i64 8, i64 8, i64 320, i32 0, metadata !37} ; [ DW_TAG_member ]
-!37 = metadata !{i32 589860, metadata !3, metadata !"_Bool", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!38 = metadata !{i32 589846, metadata !39, metadata !"size_t", metadata !39, i32 326, i64 0, i64 0, i64 0, i32 0, metadata !40} ; [ DW_TAG_typedef ]
-!39 = metadata !{i32 589865, metadata !"stddef.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/./prev-gcc/include", metadata !4} ; [ DW_TAG_file_type ]
-!40 = metadata !{i32 589860, metadata !3, metadata !"long unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"next", i32 572, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ]
+!17 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"name", i32 575, i64 32, i64 32, i64 32, i32 0, metadata !7} ; [ DW_TAG_member ]
+!19 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"len", i32 576, i64 32, i64 32, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"sysp", i32 580, i64 8, i64 8, i64 96, i32 0, metadata !22} ; [ DW_TAG_member ]
+!22 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"name_map", i32 584, i64 32, i64 32, i64 128, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"header_map", i32 590, i64 32, i64 32, i64 160, i32 0, metadata !26} ; [ DW_TAG_member ]
+!26 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!27 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"construct", i32 597, i64 32, i64 32, i64 192, i32 0, metadata !28} ; [ DW_TAG_member ]
+!28 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"ino", i32 601, i64 64, i64 64, i64 224, i32 0, metadata !30} ; [ DW_TAG_member ]
+!30 = metadata !{i32 589846, metadata !42, metadata !31, metadata !"ino_t", i32 141, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]
+!31 = metadata !{i32 589865, metadata !42} ; [ DW_TAG_file_type ]
+!32 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!33 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"dev", i32 602, i64 32, i64 32, i64 288, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 589846, metadata !42, metadata !31, metadata !"dev_t", i32 107, i64 0, i64 0, i64 0, i32 0, metadata !35} ; [ DW_TAG_typedef ]
+!35 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!36 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"user_supplied_p", i32 605, i64 8, i64 8, i64 320, i32 0, metadata !37} ; [ DW_TAG_member ]
+!37 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!38 = metadata !{i32 589846, metadata !43, metadata !39, metadata !"size_t", i32 326, i64 0, i64 0, i64 0, i32 0, metadata !40} ; [ DW_TAG_typedef ]
+!39 = metadata !{i32 589865, metadata !43} ; [ DW_TAG_file_type ]
+!40 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!41 = metadata !{metadata !"cpplib.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/../libcpp/include"}
+!42 = metadata !{metadata !"types.h", metadata !"/usr/include/sys"}
+!43 = metadata !{metadata !"stddef.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/./prev-gcc/include"}
+!44 = metadata !{metadata !"darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config"}
+!45 = metadata !{metadata !2}
+!46 = metadata !{i32 0}
+!47 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/eh_symbol.ll b/test/DebugInfo/X86/eh_symbol.ll
index a87afedae268..172ca922302a 100644
--- a/test/DebugInfo/X86/eh_symbol.ll
+++ b/test/DebugInfo/X86/eh_symbol.ll
@@ -8,11 +8,17 @@ entry:
   ret i32 42
 }
 
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!9}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 ()* @f, null, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @f, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [f]
+!1 = metadata !{i32 589865, metadata !6} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !7, metadata !7, metadata !8, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build"}
+!7 = metadata !{i32 0}
+!8 = metadata !{metadata !0}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index 7bc532ebad06..7b38fde5d34e 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -3,12 +3,15 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck --check-prefix=CHECK-DIS %s
 
 ; CHECK: 0x0000000b: DW_TAG_compile_unit
-; CHECK: 0x00000012:   DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000035] = "foo.cpp")
-; CHECK: 0x0000003c:   DW_TAG_class_type
-; CHECK: 0x0000003d:     DW_AT_name [DW_FORM_strp]       ( .debug_str[0x0000006d] = "D")
-; CHECK: 0x00000044:     DW_TAG_member
-; CHECK: 0x00000045:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x0000005d] = "c1")
-; CHECK: 0x0000008d:       DW_AT_artificial [DW_FORM_flag_present]       (true)
+; CHECK:               DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000035] = "foo.cpp")
+; CHECK: 0x{{[0-9a-f]+}}:   DW_TAG_class_type
+; CHECK:                 DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]+}}] = "D")
+; CHECK: 0x{{[0-9a-f]+}}:     DW_TAG_member
+; CHECK:                   DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]+}}] = "c1")
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x{{[0-9a-f]+}}] = "D")
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_artificial [DW_FORM_flag_present]       (true)
 
 ; CHECK-DIS: [artificial]
 
@@ -57,16 +60,17 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!54}
 
 !0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !31}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
 !6 = metadata !{i32 786473, metadata !53} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ]
+!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
+!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [def] [from ]
 !11 = metadata !{metadata !12, metadata !14, metadata !15, metadata !16, metadata !17, metadata !20}
 !12 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c1", i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
 !13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
@@ -77,7 +81,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !20 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D]
-!21 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{null, metadata !9, metadata !23}
 !23 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
 !24 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
@@ -107,3 +111,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !51 = metadata !{i32 23, i32 0, metadata !48, null}
 !52 = metadata !{i32 24, i32 0, metadata !48, null}
 !53 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo"}
+!54 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index ce3035e4f98f..a3a08f0e3b91 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -28,61 +28,68 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; An empty array should not have an AT_upper_bound attribute. But an array of 1
 ; should.
 
-; CHECK:      0x00000074:   DW_TAG_base_type [5]  
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000043] = "int")
+; CHECK:      DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "int")
 ; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
 ; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
 
+; int foo::b[1]:
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_name{{.*}}"foo"
+; CHECK:      DW_TAG_member
+; CHECK:      DW_TAG_member
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "b")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+
 ; int[1]:
-; CHECK:      0x00000082:   DW_TAG_array_type [7] *
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
-; CHECK:      0x00000087:     DW_TAG_subrange_type [8]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK:      DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK:      DW_TAG_subrange_type [{{.*}}]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 ; CHECK-NEXT: DW_AT_upper_bound [DW_FORM_data1]  (0x00)
 
-; int foo::b[1]:
-; CHECK:      0x000000a5:     DW_TAG_member [10]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x0082 => {0x00000082})
+; int bar::b[0]:
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_name{{.*}}"bar"
+; CHECK:      DW_TAG_member
+; CHECK:      DW_TAG_member
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "b")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 
 ; int[0]:
-; CHECK:      0x000000b5:   DW_TAG_array_type [7] *
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
-; CHECK:      0x000000ba:     DW_TAG_subrange_type [11]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK:      DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK:      DW_TAG_subrange_type [11]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 ; CHECK-NOT:  DW_AT_upper_bound
 
-; int bar::b[0]:
-; CHECK:      0x000000d7:     DW_TAG_member [10]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x00b5 => {0x000000b5})
-
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"func", metadata !"func", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
 !6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786688, metadata !11, metadata !"my_foo", metadata !6, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_foo] [line 12]
 !11 = metadata !{i32 786443, metadata !6, metadata !5, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
-!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ]
+!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !15}
 !14 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
 !15 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"b", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
-!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
+!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 786465, i64 0, i64 1} ; [ DW_TAG_subrange_type ] [0, 1]
 !19 = metadata !{i32 12, i32 0, metadata !11, null}
 !20 = metadata !{i32 786688, metadata !11, metadata !"my_bar", metadata !6, i32 13, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_bar] [line 13]
-!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ]
+!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [def] [from ]
 !22 = metadata !{metadata !23, metadata !24}
 !23 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"a", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
 !24 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"b", i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
-!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !26 = metadata !{metadata !27}
 !27 = metadata !{i32 786465, i64 0, i64 0} ; [ DW_TAG_subrange_type ] [0, 0]
 !28 = metadata !{i32 13, i32 0, metadata !11, null}
@@ -90,3 +97,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !30 = metadata !{i32 16, i32 0, metadata !11, null}
 !31 = metadata !{i32 17, i32 0, metadata !11, null}
 !32 = metadata !{metadata !"test.c", metadata !"/Volumes/Sandbox/llvm"}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index 1f462816337a..24364676f8ad 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -6,40 +6,44 @@
 
 @a = global %class.A zeroinitializer, align 4
 
-; CHECK:      0x0000002d:   DW_TAG_base_type [3]  
-; CHECK-NEXT: DW_AT_name
-; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
-; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
+; CHECK: DW_TAG_class_type
+; CHECK:      DW_TAG_member
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x{{[0-9a-f]*}}] = "x")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x{{[0-9a-f]*}} => {[[ARRAY:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000034:   DW_TAG_array_type [4] *
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+; CHECK:      [[ARRAY]]: DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x{{[0-9a-f]*}} => {[[BASETYPE:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000039:     DW_TAG_subrange_type [5]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
+; CHECK:      DW_TAG_subrange_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x{{[0-9a-f]*}} => {[[BASE2:0x[0-9a-f]*]]})
 ; CHECK-NOT:  DW_AT_upper_bound
 
-; CHECK:      DW_TAG_member [8]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x0000003f] = "x")
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x0034 => {0x00000034})
+; CHECK: [[BASETYPE]]: DW_TAG_base_type
+; CHECK: [[BASE2]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name
+; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !14}
 !9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] [unbound]
 !14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!17 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index b0156b8eac9d..ae17fd0c0ae8 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -27,15 +27,16 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
 
 !0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !19, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !19, metadata !6, metadata !"callee", metadata !"callee", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -46,3 +47,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !17 = metadata !{i32 8, i32 18, metadata !15, null}
 !18 = metadata !{i32 9, i32 5, metadata !15, null}
 !19 = metadata !{metadata !"ending-run.c", metadata !"/Users/echristo/tmp"}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index af6129cc28a9..a31e254c24c8 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -6,19 +6,20 @@
 @c = global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23}
 
 !0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17,  metadata !17, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !3, metadata !8, metadata !12}
-!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from int]
 !4 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786472, metadata !"A1", i64 1} ; [ DW_TAG_enumerator ]
-!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [B] [line 2, size 64, align 64, offset 0] [def] [from long unsigned int]
 !9 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786472, metadata !"B1", i64 1} ; [ DW_TAG_enumerator ]
-!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [C] [line 3, size 32, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ]
 !15 = metadata !{i32 0}
@@ -28,16 +29,17 @@
 !21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c, null} ; [ DW_TAG_variable ]
 !22 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
 
-; CHECK: DW_TAG_enumeration_type [3]
-; CHECK: DW_AT_type [DW_FORM_ref4]      (cu + 0x0026 => {0x00000026})
-; CHECK: DW_AT_enum_class [DW_FORM_flag]    (0x01)
+; CHECK: DW_TAG_enumeration_type [{{.*}}]
+; CHECK: DW_AT_type [DW_FORM_ref4]
+; CHECK: DW_AT_enum_class [DW_FORM_flag_present] (true)
 ; CHECK: DW_AT_name [DW_FORM_strp]      ( .debug_str[{{.*}}] = "A")
 
-; CHECK: DW_TAG_enumeration_type [3] *
-; CHECK: DW_AT_type [DW_FORM_ref4]      (cu + 0x0057 => {0x00000057})
-; CHECK: DW_AT_enum_class [DW_FORM_flag]    (0x01)
+; CHECK: DW_TAG_enumeration_type [{{.*}}] *
+; CHECK: DW_AT_type [DW_FORM_ref4]
+; CHECK: DW_AT_enum_class [DW_FORM_flag_present] (true)
 ; CHECK: DW_AT_name [DW_FORM_strp]          ( .debug_str[{{.*}}] = "B")
 
 ; CHECK: DW_TAG_enumeration_type [6]
 ; CHECK-NOT: DW_AT_enum_class
 ; CHECK: DW_AT_name [DW_FORM_strp]      ( .debug_str[{{.*}}] = "C")
+!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index f4ff8b4fe4d8..6bfb930cb6e7 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -4,15 +4,18 @@
 @e = global i16 0, align 2
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !8, i32 4, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def]
-!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786436, metadata !6, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ]
+!6 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786436, metadata !8, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [decl] [from ]
+!8 = metadata !{metadata !"foo.cpp", metadata !"/tmp"}
 
 ; CHECK: DW_TAG_enumeration_type
 ; CHECK-NEXT: DW_AT_name
 ; CHECK-NEXT: DW_AT_byte_size
 ; CHECK-NEXT: DW_AT_declaration
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index 8ad3c2d97a53..06408d708150 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -5,6 +5,7 @@
 @a = common global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
 !0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
@@ -23,20 +24,20 @@
 ; CHECK: Abbrev table for offset: 0x00000000
 ; CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_no
 ; CHECK: DW_AT_GNU_dwo_name      DW_FORM_strp
-; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 ; CHECK: DW_AT_GNU_addr_base     DW_FORM_sec_offset
 ; CHECK: DW_AT_low_pc    DW_FORM_addr
 ; CHECK: DW_AT_stmt_list DW_FORM_sec_offset
 ; CHECK: DW_AT_comp_dir  DW_FORM_strp
+; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 
 ; CHECK: .debug_info contents:
 ; CHECK: DW_TAG_compile_unit
 ; CHECK: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000000] = "baz.dwo")
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
 ; CHECK: DW_AT_GNU_addr_base [DW_FORM_sec_offset]                   (0x00000000)
 ; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
 ; CHECK: DW_AT_stmt_list [DW_FORM_sec_offset]   (0x00000000)
 ; CHECK: DW_AT_comp_dir [DW_FORM_strp]     ( .debug_str[0x00000008] = "/usr/local/google/home/echristo/tmp")
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
 
 ; CHECK: .debug_str contents:
 ; CHECK: 0x00000000: "baz.dwo"
@@ -54,12 +55,7 @@
 ; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 
-; CHECK: [2] DW_TAG_base_type    DW_CHILDREN_no
-; CHECK: DW_AT_name      DW_FORM_GNU_str_index
-; CHECK: DW_AT_encoding  DW_FORM_data1
-; CHECK: DW_AT_byte_size DW_FORM_data1
-
-; CHECK: [3] DW_TAG_variable     DW_CHILDREN_no
+; CHECK: [2] DW_TAG_variable     DW_CHILDREN_no
 ; CHECK: DW_AT_name      DW_FORM_GNU_str_index
 ; CHECK: DW_AT_type      DW_FORM_ref4
 ; CHECK: DW_AT_external  DW_FORM_flag_present
@@ -67,6 +63,11 @@
 ; CHECK: DW_AT_decl_line DW_FORM_data1
 ; CHECK: DW_AT_location  DW_FORM_block1
 
+; CHECK: [3] DW_TAG_base_type    DW_CHILDREN_no
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_encoding  DW_FORM_data1
+; CHECK: DW_AT_byte_size DW_FORM_data1
+
 ; Check that the rest of the compile units have information.
 ; CHECK: .debug_info.dwo contents:
 ; CHECK: DW_TAG_compile_unit
@@ -77,15 +78,15 @@
 ; CHECK-NOT: DW_AT_stmt_list
 ; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
-; CHECK: DW_TAG_base_type
-; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "int")
 ; CHECK: DW_TAG_variable
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000002) string = "a")
-; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x0018 => {0x00000018})
+; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x{{[0-9a-f]*}} => {[[TYPE:0x[0-9a-f]*]]})
 ; CHECK: DW_AT_external [DW_FORM_flag_present]   (true)
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 00 )
+; CHECK: [[TYPE]]: DW_TAG_base_type
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "int")
 
 
 ; CHECK: .debug_str.dwo contents:
@@ -110,3 +111,4 @@
 ; OBJ-NEXT: R_X86_64_32 .debug_line
 ; OBJ-NEXT: R_X86_64_32 .debug_str
 ; OBJ-NEXT: }
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/fission-hash.ll b/test/DebugInfo/X86/fission-hash.ll
new file mode 100644
index 000000000000..d3e46a9c4ff1
--- /dev/null
+++ b/test/DebugInfo/X86/fission-hash.ll
@@ -0,0 +1,16 @@
+; RUN: llc -split-dwarf=Enable -generate-cu-hash -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+
+; The source is an empty file.
+
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x0c1e629c9e5ada4f)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x0c1e629c9e5ada4f)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 188230) (llvm/trunk 188234)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, metadata !2, metadata !"foo.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
new file mode 100644
index 000000000000..0a100799bed1
--- /dev/null
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -0,0 +1,151 @@
+; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+
+; From the code:
+
+; extern int c;
+; static void foo (int p)
+; {
+;   int a, b; 
+;   unsigned int d, e;
+
+;   for (a = 0; a < 30; a++)
+;     for (d = 0; d < 30; d++)
+;       for (b = 0; b < 30; b++)
+;         for (e = 0; e < 30; e++)
+;           {
+;             int *w = &c; 
+;             *w &= p; 
+;           }
+; }
+
+; void 
+; bar ()
+; {
+;   foo (1);
+; }
+
+; compiled with:
+
+; clang -g -S -gsplit-dwarf -O1 small.c
+
+; CHECK: DW_AT_GNU_ranges_base
+
+@c = external global i32
+
+; Function Attrs: nounwind uwtable
+define void @bar() #0 {
+entry:
+  tail call fastcc void @foo(), !dbg !27
+  ret void, !dbg !28
+}
+
+; Function Attrs: nounwind uwtable
+define internal fastcc void @foo() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !29, i64 0, metadata !13), !dbg !30
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !14), !dbg !31
+  %c.promoted9 = load i32* @c, align 4, !dbg !32, !tbaa !33
+  br label %for.cond1.preheader, !dbg !31
+
+for.cond1.preheader:                              ; preds = %for.inc16, %entry
+  %and.lcssa.lcssa.lcssa10 = phi i32 [ %c.promoted9, %entry ], [ %and, %for.inc16 ]
+  %a.08 = phi i32 [ 0, %entry ], [ %inc17, %for.inc16 ]
+  br label %for.cond4.preheader, !dbg !37
+
+for.cond4.preheader:                              ; preds = %for.inc13, %for.cond1.preheader
+  %and.lcssa.lcssa7 = phi i32 [ %and.lcssa.lcssa.lcssa10, %for.cond1.preheader ], [ %and, %for.inc13 ]
+  %d.06 = phi i32 [ 0, %for.cond1.preheader ], [ %inc14, %for.inc13 ]
+  br label %for.cond7.preheader, !dbg !38
+
+for.cond7.preheader:                              ; preds = %for.inc10, %for.cond4.preheader
+  %and.lcssa5 = phi i32 [ %and.lcssa.lcssa7, %for.cond4.preheader ], [ %and, %for.inc10 ]
+  %b.03 = phi i32 [ 0, %for.cond4.preheader ], [ %inc11, %for.inc10 ]
+  br label %for.body9, !dbg !39
+
+for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+  %and2 = phi i32 [ %and.lcssa5, %for.cond7.preheader ], [ %and, %for.body9 ], !dbg !40
+  %e.01 = phi i32 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
+  tail call void @llvm.dbg.value(metadata !41, i64 0, metadata !19), !dbg !40
+  %and = and i32 %and2, 1, !dbg !32
+  %inc = add i32 %e.01, 1, !dbg !39
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !18), !dbg !39
+  %exitcond = icmp eq i32 %inc, 30, !dbg !39
+  br i1 %exitcond, label %for.inc10, label %for.body9, !dbg !39
+
+for.inc10:                                        ; preds = %for.body9
+  %inc11 = add nsw i32 %b.03, 1, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i32 %inc11}, i64 0, metadata !15), !dbg !38
+  %exitcond11 = icmp eq i32 %inc11, 30, !dbg !38
+  br i1 %exitcond11, label %for.inc13, label %for.cond7.preheader, !dbg !38
+
+for.inc13:                                        ; preds = %for.inc10
+  %inc14 = add i32 %d.06, 1, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{i32 %inc14}, i64 0, metadata !16), !dbg !37
+  %exitcond12 = icmp eq i32 %inc14, 30, !dbg !37
+  br i1 %exitcond12, label %for.inc16, label %for.cond4.preheader, !dbg !37
+
+for.inc16:                                        ; preds = %for.inc13
+  %inc17 = add nsw i32 %a.08, 1, !dbg !31
+  tail call void @llvm.dbg.value(metadata !{i32 %inc17}, i64 0, metadata !14), !dbg !31
+  %exitcond13 = icmp eq i32 %inc17, 30, !dbg !31
+  br i1 %exitcond13, label %for.end18, label %for.cond1.preheader, !dbg !31
+
+for.end18:                                        ; preds = %for.inc16
+  store i32 %and, i32* @c, align 4, !dbg !32, !tbaa !33
+  ret void, !dbg !42
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26, !43}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 191700) (llvm/trunk 191710)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"small.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/small.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"small.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 18, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @bar, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 19] [bar]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/small.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !9, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [scope 3] [foo]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11}
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !18, metadata !19}
+!13 = metadata !{i32 786689, metadata !8, metadata !"p", metadata !5, i32 16777218, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 2]
+!14 = metadata !{i32 786688, metadata !8, metadata !"a", metadata !5, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 4]
+!15 = metadata !{i32 786688, metadata !8, metadata !"b", metadata !5, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 4]
+!16 = metadata !{i32 786688, metadata !8, metadata !"d", metadata !5, i32 5, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 5]
+!17 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!18 = metadata !{i32 786688, metadata !8, metadata !"e", metadata !5, i32 5, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 5]
+!19 = metadata !{i32 786688, metadata !20, metadata !"w", metadata !5, i32 12, metadata !25, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [w] [line 12]
+!20 = metadata !{i32 786443, metadata !1, metadata !21, i32 11, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!21 = metadata !{i32 786443, metadata !1, metadata !22, i32 10, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!22 = metadata !{i32 786443, metadata !1, metadata !23, i32 9, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!23 = metadata !{i32 786443, metadata !1, metadata !24, i32 8, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!24 = metadata !{i32 786443, metadata !1, metadata !8, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!26 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!27 = metadata !{i32 20, i32 0, metadata !4, null}
+!28 = metadata !{i32 21, i32 0, metadata !4, null}
+!29 = metadata !{i32 1}
+!30 = metadata !{i32 2, i32 0, metadata !8, null}
+!31 = metadata !{i32 7, i32 0, metadata !24, null}
+!32 = metadata !{i32 13, i32 0, metadata !20, null}
+!33 = metadata !{metadata !34, metadata !34, i64 0}
+!34 = metadata !{metadata !"int", metadata !35, i64 0}
+!35 = metadata !{metadata !"omnipotent char", metadata !36, i64 0}
+!36 = metadata !{metadata !"Simple C/C++ TBAA"}
+!37 = metadata !{i32 8, i32 0, metadata !23, null} ; [ DW_TAG_imported_declaration ]
+!38 = metadata !{i32 9, i32 0, metadata !22, null}
+!39 = metadata !{i32 10, i32 0, metadata !21, null}
+!40 = metadata !{i32 12, i32 0, metadata !20, null}
+!41 = metadata !{i32* @c}
+!42 = metadata !{i32 15, i32 0, metadata !8, null}
+!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
new file mode 100644
index 000000000000..4f9cc78fec9a
--- /dev/null
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -0,0 +1,196 @@
+; REQUIRES: object-emission
+
+; RUN: llc %s -o %t -filetype=obj -O0 -generate-odr-hash -mtriple=x86_64-unknown-linux-gnu
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+;
+; Generated from:
+; struct bar {};
+
+; struct bar b;
+
+; void foo(void) {
+;   struct baz {};
+;   baz b;
+; }
+
+; namespace echidna {
+; namespace capybara {
+; namespace mongoose {
+; class fluffy {
+;   int a;
+;   int b;
+; };
+
+; fluffy animal;
+; }
+; }
+; }
+
+; namespace {
+; struct walrus {
+;   walrus() {}
+; };
+; }
+
+; walrus w;
+
+; struct wombat {
+;   struct {
+;     int a;
+;     int b;
+;   } a_b;
+; };
+
+; wombat wom;
+
+; Check that we generate a hash for bar and the value.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: debug_str{{.*}}"bar"
+; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x200520c0d5b90eff)
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: debug_str{{.*}}"echidna"
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: debug_str{{.*}}"capybara"
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: debug_str{{.*}}"mongoose"
+; CHECK: DW_TAG_class_type
+; CHECK-NEXT: debug_str{{.*}}"fluffy"
+; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8]   (0x9a0124d5a0c21c52)
+
+; We emit no hash for walrus since the type is contained in an anonymous
+; namespace and won't violate any ODR-ness.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: debug_str{{.*}}"walrus"
+; CHECK-NEXT: DW_AT_byte_size
+; CHECK-NEXT: DW_AT_decl_file
+; CHECK-NEXT: DW_AT_decl_line
+; CHECK-NOT: DW_AT_GNU_odr_signature
+; CHECK: DW_TAG_subprogram
+
+; Check that we generate a hash for wombat and the value, but not for the
+; anonymous type contained within.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: debug_str{{.*}}wombat
+; CHECK: DW_AT_GNU_odr_signature [DW_FORM_data8] (0x685bcc220141e9d7)
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_byte_size
+; CHECK-NEXT: DW_AT_decl_file
+; CHECK-NEXT: DW_AT_decl_line
+; CHECK: DW_TAG_member
+; CHECK-NEXT: debug_str{{.*}}"a"
+
+; Check that we don't generate a hash for baz.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: debug_str{{.*}}"baz"
+; CHECK-NOT: DW_AT_GNU_odr_signature
+
+%struct.bar = type { i8 }
+%"class.echidna::capybara::mongoose::fluffy" = type { i32, i32 }
+%"struct.<anonymous namespace>::walrus" = type { i8 }
+%struct.wombat = type { %struct.anon }
+%struct.anon = type { i32, i32 }
+%struct.baz = type { i8 }
+
+@b = global %struct.bar zeroinitializer, align 1
+@_ZN7echidna8capybara8mongoose6animalE = global %"class.echidna::capybara::mongoose::fluffy" zeroinitializer, align 4
+@w = internal global %"struct.<anonymous namespace>::walrus" zeroinitializer, align 1
+@wom = global %struct.wombat zeroinitializer, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+@_ZN12_GLOBAL__N_16walrusC1Ev = alias internal void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev
+
+; Function Attrs: nounwind uwtable
+define void @_Z3foov() #0 {
+entry:
+  %b = alloca %struct.baz, align 1
+  call void @llvm.dbg.declare(metadata !{%struct.baz* %b}, metadata !44), !dbg !46
+  ret void, !dbg !47
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+define internal void @__cxx_global_var_init() section ".text.startup" {
+entry:
+  call void @_ZN12_GLOBAL__N_16walrusC1Ev(%"struct.<anonymous namespace>::walrus"* @w), !dbg !48
+  ret void, !dbg !48
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace>::walrus"* %this) unnamed_addr #0 align 2 {
+entry:
+  %this.addr = alloca %"struct.<anonymous namespace>::walrus"*, align 8
+  store %"struct.<anonymous namespace>::walrus"* %this, %"struct.<anonymous namespace>::walrus"** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%"struct.<anonymous namespace>::walrus"** %this.addr}, metadata !49), !dbg !51
+  %this1 = load %"struct.<anonymous namespace>::walrus"** %this.addr
+  ret void, !dbg !52
+}
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init(), !dbg !53
+  ret void, !dbg !53
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!42, !54}
+!llvm.ident = !{!43}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !20, metadata !37, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !5, metadata !13, metadata !16}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"bar", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{i32 786434, metadata !1, metadata !6, metadata !"fluffy", i32 13, i64 64, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE"} ; [ DW_TAG_class_type ] [fluffy] [line 13, size 64, align 32, offset 0] [def] [from ]
+!6 = metadata !{i32 786489, metadata !1, metadata !7, metadata !"mongoose", i32 12} ; [ DW_TAG_namespace ] [mongoose] [line 12]
+!7 = metadata !{i32 786489, metadata !1, metadata !8, metadata !"capybara", i32 11} ; [ DW_TAG_namespace ] [capybara] [line 11]
+!8 = metadata !{i32 786489, metadata !1, null, metadata !"echidna", i32 10} ; [ DW_TAG_namespace ] [echidna] [line 10]
+!9 = metadata !{metadata !10, metadata !12}
+!10 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE", metadata !"a", i32 14, i64 32, i64 32, i64 0, i32 1, metadata !11} ; [ DW_TAG_member ] [a] [line 14, size 32, align 32, offset 0] [private] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE", metadata !"b", i32 15, i64 32, i64 32, i64 32, i32 1, metadata !11} ; [ DW_TAG_member ] [b] [line 15, size 32, align 32, offset 32] [private] [from int]
+!13 = metadata !{i32 786451, metadata !1, null, metadata !"wombat", i32 31, i64 64, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null, metadata !"_ZTS6wombat"} ; [ DW_TAG_structure_type ] [wombat] [line 31, size 64, align 32, offset 0] [def] [from ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786445, metadata !1, metadata !"_ZTS6wombat", metadata !"a_b", i32 35, i64 64, i64 32, i64 0, i32 0, metadata !"_ZTSN6wombatUt_E"} ; [ DW_TAG_member ] [a_b] [line 35, size 64, align 32, offset 0] [from _ZTSN6wombatUt_E]
+!16 = metadata !{i32 786451, metadata !1, metadata !"_ZTS6wombat", metadata !"", i32 32, i64 64, i64 32, i32 0, i32 0, null, metadata !17, i32 0, null, null, metadata !"_ZTSN6wombatUt_E"} ; [ DW_TAG_structure_type ] [line 32, size 64, align 32, offset 0] [def] [from ]
+!17 = metadata !{metadata !18, metadata !19}
+!18 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN6wombatUt_E", metadata !"a", i32 33, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ] [a] [line 33, size 32, align 32, offset 0] [from int]
+!19 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN6wombatUt_E", metadata !"b", i32 34, i64 32, i64 32, i64 32, i32 0, metadata !11} ; [ DW_TAG_member ] [b] [line 34, size 32, align 32, offset 32] [from int]
+!20 = metadata !{metadata !21, metadata !25, metadata !26, metadata !35}
+!21 = metadata !{i32 786478, metadata !1, metadata !22, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
+!22 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/bar.cpp]
+!23 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = metadata !{null}
+!25 = metadata !{i32 786478, metadata !1, metadata !22, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 29, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 29} ; [ DW_TAG_subprogram ] [line 29] [local] [def] [__cxx_global_var_init]
+!26 = metadata !{i32 786478, metadata !1, metadata !27, metadata !"walrus", metadata !"walrus", metadata !"_ZN12_GLOBAL__N_16walrusC2Ev", i32 25, metadata !31, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev, null, metadata !30, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [local] [def] [walrus]
+!27 = metadata !{i32 786451, metadata !1, metadata !28, metadata !"walrus", i32 24, i64 8, i64 8, i32 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [walrus] [line 24, size 8, align 8, offset 0] [def] [from ]
+!28 = metadata !{i32 786489, metadata !1, null, metadata !"", i32 23} ; [ DW_TAG_namespace ] [line 23]
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 786478, metadata !1, metadata !27, metadata !"walrus", metadata !"walrus", metadata !"", i32 25, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !34, i32 25} ; [ DW_TAG_subprogram ] [line 25] [walrus]
+!31 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!32 = metadata !{null, metadata !33}
+!33 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !27} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from walrus]
+!34 = metadata !{i32 786468}
+!35 = metadata !{i32 786478, metadata !1, metadata !22, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 25, metadata !36, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [local] [def]
+!36 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{metadata !38, metadata !39, metadata !40, metadata !41}
+!38 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !22, i32 3, metadata !4, i32 0, i32 1, %struct.bar* @b, null} ; [ DW_TAG_variable ] [b] [line 3] [def]
+!39 = metadata !{i32 786484, i32 0, metadata !6, metadata !"animal", metadata !"animal", metadata !"_ZN7echidna8capybara8mongoose6animalE", metadata !22, i32 18, metadata !5, i32 0, i32 1, %"class.echidna::capybara::mongoose::fluffy"* @_ZN7echidna8capybara8mongoose6animalE, null} ; [ DW_TAG_variable ] [animal] [line 18] [def]
+!40 = metadata !{i32 786484, i32 0, null, metadata !"w", metadata !"w", metadata !"", metadata !22, i32 29, metadata !27, i32 1, i32 1, %"struct.<anonymous namespace>::walrus"* @w, null} ; [ DW_TAG_variable ] [w] [line 29] [local] [def]
+!41 = metadata !{i32 786484, i32 0, null, metadata !"wom", metadata !"wom", metadata !"", metadata !22, i32 38, metadata !13, i32 0, i32 1, %struct.wombat* @wom, null} ; [ DW_TAG_variable ] [wom] [line 38] [def]
+!42 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!43 = metadata !{metadata !"clang version 3.4 "}
+!44 = metadata !{i32 786688, metadata !21, metadata !"b", metadata !22, i32 7, metadata !45, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 7]
+!45 = metadata !{i32 786451, metadata !1, metadata !21, metadata !"baz", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [baz] [line 6, size 8, align 8, offset 0] [def] [from ]
+!46 = metadata !{i32 7, i32 0, metadata !21, null}
+!47 = metadata !{i32 8, i32 0, metadata !21, null} ; [ DW_TAG_imported_declaration ]
+!48 = metadata !{i32 29, i32 0, metadata !25, null}
+!49 = metadata !{i32 786689, metadata !26, metadata !"this", null, i32 16777216, metadata !50, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !27} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from walrus]
+!51 = metadata !{i32 0, i32 0, metadata !26, null}
+!52 = metadata !{i32 25, i32 0, metadata !26, null}
+!53 = metadata !{i32 25, i32 0, metadata !35, null}
+!54 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/gnu-public-names-empty.ll b/test/DebugInfo/X86/gnu-public-names-empty.ll
new file mode 100644
index 000000000000..8b0309cc65be
--- /dev/null
+++ b/test/DebugInfo/X86/gnu-public-names-empty.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+
+; Generated from:
+
+; static int a __attribute__((section("a")));
+
+; Check that the attributes in the compile unit both point to a correct
+; location, even when nothing is exported.
+; CHECK: DW_AT_GNU_pubnames [DW_FORM_sec_offset]   (0x00000000)
+; CHECK: DW_AT_GNU_pubtypes [DW_FORM_sec_offset]   (0x00000000)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 191846) (llvm/trunk 191866)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
new file mode 100644
index 000000000000..7ad503253733
--- /dev/null
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -0,0 +1,219 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections < %s | FileCheck -check-prefix=ASM %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -generate-gnu-dwarf-pub-sections -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+; ModuleID = 'dwarf-public-names.cpp'
+;
+; Generated from:
+;
+; struct C {
+;   void member_function();
+;   static int static_member_function();
+;   static int static_member_variable;
+; };
+;
+; int C::static_member_variable = 0;
+;
+; void C::member_function() {
+;   static_member_variable = 0;
+; }
+;
+; int C::static_member_function() {
+;   return static_member_variable;
+; }
+;
+; C global_variable;
+;
+; int global_function() {
+;   return -1;
+; }
+;
+; namespace ns {
+;   void global_namespace_function() {
+;     global_variable.member_function();
+;   }
+;   int global_namespace_variable = 1;
+;   struct D {
+;     int A;
+;   } d;
+; }
+
+; ASM: .section        .debug_gnu_pubnames
+; ASM: .byte   32                      # Kind: VARIABLE, EXTERNAL
+; ASM-NEXT: .asciz  "global_variable"       # External Name
+
+; ASM: .section        .debug_gnu_pubtypes
+; ASM: .byte   16                      # Kind: TYPE, EXTERNAL
+; ASM-NEXT: .asciz  "C"                     # External Name
+
+; CHECK: .debug_info contents:
+; CHECK: DW_AT_GNU_pubnames [DW_FORM_sec_offset]   (0x00000000)
+; CHECK: DW_AT_GNU_pubtypes [DW_FORM_sec_offset]   (0x00000000)
+
+; CHECK: [[C:[0-9a-f]+]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "C"
+
+; CHECK: [[STATIC_MEM_DECL:[0-9a-f]+]]: DW_TAG_member
+; CHECK-NEXT: DW_AT_name {{.*}} "static_member_variable"
+
+; CHECK: [[MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "member_function"
+
+; CHECK: [[STATIC_MEM_FUNC_DECL:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "static_member_function"
+
+; CHECK: [[INT:[0-9a-f]+]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name {{.*}} "int"
+
+; CHECK: [[STATIC_MEM_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_DECL]]
+
+; CHECK: [[GLOB_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name {{.*}} "global_variable"
+
+; CHECK: [[NS:[0-9a-f]+]]: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name {{.*}} "ns"
+
+; CHECK: [[GLOB_NS_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_variable"
+
+; CHECK: [[D_VAR_DECL:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name {{.*}} "d"
+
+; CHECK: [[D:[0-9a-f]+]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "D"
+
+; CHECK: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_function"
+
+; CHECK: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]]
+
+; CHECK: [[D_VAR:[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]]
+
+; CHECK: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
+
+; CHECK: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
+
+; CHECK: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name {{.*}} "global_function"
+
+; CHECK-LABEL: .debug_gnu_pubnames contents:
+; CHECK-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = 0x0000017b
+; CHECK-NEXT: Offset     Linkage  Kind     Name
+; CHECK-DAG:  [[GLOBAL_FUNC]] EXTERNAL FUNCTION "global_function"
+; CHECK-DAG:  [[NS]] EXTERNAL TYPE     "ns"
+; CHECK-DAG:  [[MEM_FUNC]] EXTERNAL FUNCTION "C::member_function"
+; CHECK-DAG:  [[GLOB_VAR]] EXTERNAL VARIABLE "global_variable"
+; CHECK-DAG:  [[GLOB_NS_VAR]] EXTERNAL VARIABLE "ns::global_namespace_variable"
+; CHECK-DAG:  [[GLOB_NS_FUNC]] EXTERNAL FUNCTION "ns::global_namespace_function"
+; CHECK-DAG:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
+; CHECK-DAG:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
+; CHECK-DAG:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
+
+
+; CHECK-LABEL: debug_gnu_pubtypes contents:
+; CHECK: Offset     Linkage  Kind     Name
+; CHECK-DAG:  [[C]] EXTERNAL TYPE     "C"
+; CHECK-DAG:  [[D]] EXTERNAL TYPE     "ns::D"
+; CHECK-DAG:  [[INT]] STATIC   TYPE     "int"
+
+%struct.C = type { i8 }
+%"struct.ns::D" = type { i32 }
+
+@_ZN1C22static_member_variableE = global i32 0, align 4
+@global_variable = global %struct.C zeroinitializer, align 1
+@_ZN2ns25global_namespace_variableE = global i32 1, align 4
+@_ZN2ns1dE = global %"struct.ns::D" zeroinitializer, align 4
+
+; Function Attrs: nounwind uwtable
+define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !36), !dbg !38
+  %this1 = load %struct.C** %this.addr
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !39
+  ret void, !dbg !39
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
+entry:
+  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !40
+  ret i32 %0, !dbg !40
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z15global_functionv() #0 {
+entry:
+  ret i32 -1, !dbg !41
+}
+
+; Function Attrs: nounwind uwtable
+define void @_ZN2ns25global_namespace_functionEv() #0 {
+entry:
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !42
+  ret void, !dbg !42
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!34, !43}
+!llvm.ident = !{!35}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192862) (llvm/trunk 192861)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !21, metadata !29, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/pubnames.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"pubnames.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !17}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !8, metadata !13}
+!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1C", metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !7, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11}
+!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!12 = metadata !{i32 786468}
+!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !7}
+!16 = metadata !{i32 786468}
+!17 = metadata !{i32 786451, metadata !1, metadata !18, metadata !"D", i32 21, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, metadata !"_ZTSN2ns1DE"} ; [ DW_TAG_structure_type ] [D] [line 21, size 32, align 32, offset 0] [def] [from ]
+!18 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 17} ; [ DW_TAG_namespace ] [ns] [line 17]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN2ns1DE", metadata !"A", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [A] [line 22, size 32, align 32, offset 0] [from int]
+!21 = metadata !{metadata !22, metadata !23, metadata !24, metadata !26}
+!22 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !8, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!23 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !13, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [static_member_function]
+!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 15, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !2, i32 15} ; [ DW_TAG_subprogram ] [line 15] [def] [global_function]
+!25 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/pubnames.cpp]
+!26 = metadata !{i32 786478, metadata !1, metadata !18, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 18, metadata !27, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !2, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [global_namespace_function]
+!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{null}
+!29 = metadata !{metadata !30, metadata !31, metadata !32, metadata !33}
+!30 = metadata !{i32 786484, i32 0, metadata !4, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !25, i32 7, metadata !7, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !6} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!31 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !25, i32 13, metadata !4, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 13] [def]
+!32 = metadata !{i32 786484, i32 0, metadata !18, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !25, i32 19, metadata !7, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 19] [def]
+!33 = metadata !{i32 786484, i32 0, metadata !18, metadata !"d", metadata !"d", metadata !"_ZN2ns1dE", metadata !25, i32 23, metadata !17, i32 0, i32 1, %"struct.ns::D"* @_ZN2ns1dE, null} ; [ DW_TAG_variable ] [d] [line 23] [def]
+!34 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!35 = metadata !{metadata !"clang version 3.4 (trunk 192862) (llvm/trunk 192861)"}
+!36 = metadata !{i32 786689, metadata !22, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!37 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!38 = metadata !{i32 0, i32 0, metadata !22, null}
+!39 = metadata !{i32 9, i32 0, metadata !22, null}
+!40 = metadata !{i32 11, i32 0, metadata !23, null}
+!41 = metadata !{i32 15, i32 0, metadata !24, null}
+!42 = metadata !{i32 18, i32 0, metadata !26, null}
+
+!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/instcombine-instrinsics.ll b/test/DebugInfo/X86/instcombine-instrinsics.ll
index 446682841552..41dd09f5a425 100644
--- a/test/DebugInfo/X86/instcombine-instrinsics.ll
+++ b/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -60,17 +60,18 @@ declare i32 @put(i64, i64*, i64, %struct.i24*) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!73}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !48, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !48, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !"i1", metadata !""}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4, metadata !21, metadata !33, metadata !47}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i2", metadata !"i2", metadata !"", i32 31, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, %struct.i3* (i64)* @barz, null, null, metadata !16, i32 32} ; [ DW_TAG_subprogram ] [line 31]  [scope 32]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{metadata !8, metadata !13}
 !8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i3]
-!9 = metadata !{i32 786451, metadata !1, null, metadata !"i3", i32 25, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_structure_type ]  [line 25, size 32, align 32, offset 0] [from ]
+!9 = metadata !{i32 786451, metadata !1, null, metadata !"i3", i32 25, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [i3] [line 25, size 32, align 32, offset 0] [def] [from ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"i4", i32 26, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]  [line 26, size 32, align 32, offset 0] [from i5]
 !12 = metadata !{i32 786468, null, null, metadata !"i5", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
@@ -79,12 +80,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !15 = metadata !{i32 786468, null, null, metadata !"i8", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
 !16 = metadata !{}
 !21 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i13", metadata !"i13", metadata !"", i32 42, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @init, null, null, metadata !24, i32 43} ; [ DW_TAG_subprogram ] [line 42]  [scope 43]
-!22 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null}
 !24 = metadata !{metadata !25}
 !25 = metadata !{i32 786688, metadata !21, metadata !"i14", metadata !5, i32 45, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ]  [line 45]
 !27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i14]
-!28 = metadata !{i32 786451, metadata !1, null, metadata !"i14", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !29, i32 0, null, null} ; [ DW_TAG_structure_type ]  [line 16, size 32, align 32, offset 0] [from ]
+!28 = metadata !{i32 786451, metadata !1, null, metadata !"i14", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [i14] [line 16, size 32, align 32, offset 0] [def] [from ]
 !29 = metadata !{metadata !30}
 !30 = metadata !{i32 786445, metadata !1, metadata !28, metadata !"i16", i32 17, i64 32, i64 32, i64 0, i32 0, metadata !31} ; [ DW_TAG_member ]  [line 17, size 32, align 32, offset 0] [from i17]
 !31 = metadata !{i32 786454, metadata !1, null, metadata !"i17", i32 7, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]  [line 7, size 0, align 0, offset 0] [from int]
@@ -98,3 +99,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !52 = metadata !{i64 0}
 !55 = metadata !{%struct.i3* null}
 !72 = metadata !{%struct.i24* null}
+!73 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
index fd813b312162..46daccfe841d 100644
--- a/test/DebugInfo/X86/line-info.ll
+++ b/test/DebugInfo/X86/line-info.ll
@@ -36,6 +36,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19}
 
 !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
@@ -44,15 +45,16 @@ attributes #1 = { nounwind readnone }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
 !5 = metadata !{metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/./list0.h]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
 !11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
-!12 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{metadata !9}
 !14 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
 !15 = metadata !{i32 1, i32 0, metadata !4, null}
 !16 = metadata !{i32 2, i32 0, metadata !4, null}
 !17 = metadata !{i32 3, i32 0, metadata !18, null}
 !18 = metadata !{i32 786443, metadata !11, metadata !10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index c9bd2cfb5e8e..3d116675454c 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-macosx -darwin-gdb-compat=Disable %s -o %t -filetype=obj
+; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_subprogram [9] *
@@ -25,19 +25,20 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
@@ -52,3 +53,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !26 = metadata !{i32 6, i32 4, metadata !27, null}
 !27 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/lit.local.cfg b/test/DebugInfo/X86/lit.local.cfg
index 60d66eae4953..19840aa7574c 100644
--- a/test/DebugInfo/X86/lit.local.cfg
+++ b/test/DebugInfo/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index 77f69b961753..922ae8dfed2f 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -13,17 +13,20 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !15, i32 4, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !12}
-!5 = metadata !{i32 786478, metadata !"_Z1qv", i32 0, metadata !6, metadata !"q", metadata !"q", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"q", metadata !"q", metadata !"_Z1qv", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [q]
+!6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, metadata !15, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 786478, metadata !"", i32 0, metadata !6, metadata !"t", metadata !"t", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 786478, metadata !15, metadata !6, metadata !"t", metadata !"t", metadata !"", i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 7, i32 1, metadata !14, null}
 !14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index 4b78c8812e93..cfb06672d259 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -88,10 +88,11 @@ attributes #0 = { nounwind optsize ssp uwtable }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!83}
 
-!0 = metadata !{i32 786449, i32 12, metadata !3, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29,  metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !82, i32 12, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29,  metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
-!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ]
+!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [def] [from ]
 !3 = metadata !{i32 786473, metadata !82} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8, metadata !9}
 !5 = metadata !{i32 786472, metadata !"Ident1", i64 0} ; [ DW_TAG_enumerator ] [Ident1 :: 0]
@@ -101,13 +102,13 @@ attributes #1 = { nounwind readnone }
 !9 = metadata !{i32 786472, metadata !"Ident5", i64 10003} ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
 !10 = metadata !{i32 0}
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", metadata !3, i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
-!13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786478, metadata !82, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
+!13 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{null, metadata !15, metadata !17, metadata !21, metadata !21}
 !15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
 !16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
+!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
 !19 = metadata !{metadata !20}
 !20 = metadata !{i32 786465, i64 0, i64 51}       ; [ DW_TAG_subrange_type ] [0, 50]
 !21 = metadata !{i32 786454, metadata !82, null, metadata !"OneToFifty", i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
@@ -120,7 +121,7 @@ attributes #1 = { nounwind readnone }
 !28 = metadata !{i32 786688, metadata !12, metadata !"IntIndex", metadata !3, i32 187, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187]
 !29 = metadata !{metadata !30, metadata !35, metadata !36, metadata !38, metadata !39, metadata !40, metadata !42, metadata !46, metadata !63}
 !30 = metadata !{i32 786484, i32 0, null, metadata !"Version", metadata !"Version", metadata !"", metadata !3, i32 111, metadata !31, i32 0, i32 1, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def]
-!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
 !32 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !33 = metadata !{metadata !34}
 !34 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
@@ -133,13 +134,13 @@ attributes #1 = { nounwind readnone }
 !41 = metadata !{i32 786454, metadata !82, null, metadata !"Array1Dim", i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
 !42 = metadata !{i32 786484, i32 0, null, metadata !"Array2Glob", metadata !"Array2Glob", metadata !"", metadata !3, i32 176, metadata !43, i32 0, i32 1, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def]
 !43 = metadata !{i32 786454, metadata !82, null, metadata !"Array2Dim", i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
-!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
+!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
 !45 = metadata !{metadata !20, metadata !20}
 !46 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlb", metadata !"PtrGlb", metadata !"", metadata !3, i32 177, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def]
 !47 = metadata !{i32 786454, metadata !82, null, metadata !"RecordPtr", i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
 !48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
 !49 = metadata !{i32 786454, metadata !82, null, metadata !"RecordType", i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
-!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ]
+!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [def] [from ]
 !51 = metadata !{metadata !52, metadata !54, metadata !56, metadata !57, metadata !58}
 !52 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"PtrComp", i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
 !53 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
@@ -149,7 +150,7 @@ attributes #1 = { nounwind readnone }
 !57 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"IntComp", i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
 !58 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"StringComp", i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
 !59 = metadata !{i32 786454, metadata !82, null, metadata !"String30", i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
-!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
+!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
 !61 = metadata !{metadata !62}
 !62 = metadata !{i32 786465, i64 0, i64 31}       ; [ DW_TAG_subrange_type ] [0, 30]
 !63 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlbNext", metadata !"PtrGlbNext", metadata !"", metadata !3, i32 178, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def]
@@ -162,10 +163,11 @@ attributes #1 = { nounwind readnone }
 !73 = metadata !{i32 191, i32 0, metadata !12, null}
 !74 = metadata !{i32 192, i32 0, metadata !12, null}
 !75 = metadata !{i32 193, i32 0, metadata !76, null}
-!76 = metadata !{i32 786443, metadata !12, i32 193, i32 0, metadata !3, i32 0} ; [ DW_TAG_lexical_block ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c]
+!76 = metadata !{i32 786443, metadata !82, metadata !12, i32 193, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c]
 !77 = metadata !{i32 194, i32 0, metadata !76, null}
 !78 = metadata !{i32 195, i32 0, metadata !12, null}
 !79 = metadata !{i32 196, i32 0, metadata !12, null}
 !80 = metadata !{i32 197, i32 0, metadata !12, null}
 !81 = metadata !{i32 198, i32 0, metadata !12, null}
 !82 = metadata !{metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2"}
+!83 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/multiple-aranges.ll b/test/DebugInfo/X86/multiple-aranges.ll
new file mode 100644
index 000000000000..4c205d8e1697
--- /dev/null
+++ b/test/DebugInfo/X86/multiple-aranges.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s | FileCheck %s
+
+; First CU
+; CHECK:      .long   44                      # Length of ARange Set
+; CHECK-NEXT: .short  2                       # DWARF Arange version number
+; CHECK-NEXT: .long   .L.debug_info_begin0    # Offset Into Debug Info Section
+; CHECK-NEXT: .byte   8                       # Address Size (in bytes)
+; CHECK-NEXT: .byte   0                       # Segment Size (in bytes)
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .quad   kittens
+; CHECK-NEXT: .Lset0 = rainbows-kittens
+; CHECK-NEXT: .quad   .Lset0
+; CHECK-NEXT: .quad   0                       # ARange terminator
+; CHECK-NEXT: .quad   0
+
+; Second CU
+; CHECK-NEXT: .long   44                      # Length of ARange Set
+; CHECK-NEXT: .short  2                       # DWARF Arange version number
+; CHECK-NEXT: .long   .L.debug_info_begin1    # Offset Into Debug Info Section
+; CHECK-NEXT: .byte   8                       # Address Size (in bytes)
+; CHECK-NEXT: .byte   0                       # Segment Size (in bytes)
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .quad   rainbows
+; CHECK-NEXT: .Lset1 = .Ldebug_end0-rainbows
+; CHECK-NEXT: .quad   .Lset1
+; CHECK-NEXT: .quad   0                       # ARange terminator
+; CHECK-NEXT: .quad   0
+
+
+; Generated from: clang -c -g -emit-llvm
+;                 llvm-link test1.bc test2.bc -o test.bc
+; test1.c: int kittens = 4;
+; test2.c: int rainbows = 5;
+
+
+
+
+; ModuleID = 'test.bc'
+target triple = "x86_64-unknown-linux-gnu"
+
+@kittens = global i32 4, align 4
+@rainbows = global i32 5, align 4
+
+!llvm.dbg.cu = !{!0, !7}
+!llvm.module.flags = !{!12, !13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test1.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test1.c", metadata !"/home/kayamon"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"kittens", metadata !"kittens", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @kittens, null} ; [ DW_TAG_variable ] [kittens] [line 1] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test1.c]
+!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test2.c] [DW_LANG_C99]
+!8 = metadata !{metadata !"test2.c", metadata !"/home/kayamon"}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"rainbows", metadata !"rainbows", metadata !"", metadata !11, i32 1, metadata !6, i32 0, i32 1, i32* @rainbows, null} ; [ DW_TAG_variable ] [rainbows] [line 1] [def]
+!11 = metadata !{i32 786473, metadata !8}         ; [ DW_TAG_file_type ] [/home/kayamon/test2.c]
+!12 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index 7779d1efe917..9a660614827f 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -8,7 +8,7 @@
 ; CHECK: DW_TAG_class_type
 ; CHECK: DW_TAG_member
 ; CHECK: badbit
-; CHECK: DW_AT_const_value [DW_FORM_data4]	(0x00000001)
+; CHECK: DW_AT_const_value [DW_FORM_sdata]      (1)
 ; CHECK-NOT: DW_AT_const_value
 ; CHECK: NULL
 
@@ -30,32 +30,34 @@ declare %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"*, i8*, i6
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!1803}
 
-!0 = metadata !{i32 786449, i32 4, metadata !961, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786,  metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !1802, i32 4, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786,  metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !26}
 !4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ]
 !5 = metadata !{i32 786473, metadata !1801} ; [ DW_TAG_file_type ]
 !25 = metadata !{i32 786472, metadata !"_S_os_fmtflags_end", i64 65536} ; [ DW_TAG_enumerator ]
-!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [_Ios_Iostate] [line 146, size 32, align 32, offset 0] [def] [from ]
 !27 = metadata !{metadata !28, metadata !29, metadata !30, metadata !31, metadata !32}
 !28 = metadata !{i32 786472, metadata !"_S_goodbit", i64 0} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0]
 !29 = metadata !{i32 786472, metadata !"_S_badbit", i64 1} ; [ DW_TAG_enumerator ] [_S_badbit :: 1]
 !30 = metadata !{i32 786472, metadata !"_S_eofbit", i64 2} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2]
 !31 = metadata !{i32 786472, metadata !"_S_failbit", i64 4} ; [ DW_TAG_enumerator ] [_S_failbit :: 4]
 !32 = metadata !{i32 786472, metadata !"_S_os_ostate_end", i64 65536} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536]
-!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ]
+!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null, null} ; [ DW_TAG_class_type ] [os_base] [line 200, size 1728, align 64, offset 0] [def] [from ]
 !50 = metadata !{metadata !77}
-!54 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!54 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !55 = metadata !{metadata !56}
 !56 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !77 = metadata !{i32 786445, metadata !1801, metadata !49, metadata !"badbit", i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ]
 !78 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ]
-!79 = metadata !{i32 786454, metadata !49, metadata !"ostate", metadata !5, i32 327, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_typedef ]
+!79 = metadata !{i32 786454, metadata !1801, metadata !49, metadata !"ostate", i32 327, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_typedef ]
 !955 = metadata !{i32 0}
 !956 = metadata !{metadata !960}
-!960 = metadata !{i32 786478, i32 0, metadata !961, metadata !"main", metadata !"main", metadata !"", metadata !961, i32 73, metadata !54, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !955, i32 73} ; [ DW_TAG_subprogram ]
+!960 = metadata !{i32 786478, metadata !1802, null, metadata !"main", metadata !"main", metadata !"", i32 73, metadata !54, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !955, i32 73} ; [ DW_TAG_subprogram ]
 !961 = metadata !{i32 786473, metadata !1802} ; [ DW_TAG_file_type ]
 !1786 = metadata !{metadata !1800}
 !1800 = metadata !{i32 786484, i32 0, metadata !5, metadata !"badbit", metadata !"badbit", metadata !"badbit", metadata !5, i32 331, metadata !78, i32 1, i32 1, i32 1, metadata !77} ; [ DW_TAG_variable ]
 !1801 = metadata !{metadata !"os_base.h", metadata !"/privite/tmp"}
 !1802 = metadata !{metadata !"student2.cpp", metadata !"/privite/tmp"}
+!1803 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index a5f786c26b26..91065a312b6b 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -8,41 +8,45 @@
 ; Check that we can handle non-default array bounds. In this case, the array
 ; goes from [-3, 38].
 
-; CHECK:      0x0000002d:   DW_TAG_base_type [3]
-; CHECK-NEXT: 0x0000002e:     DW_AT_name [DW_FORM_strp]       ( .debug_str[0x00000041] = "int")
-; CHECK-NEXT: 0x00000032:     DW_AT_byte_size [DW_FORM_data1] (0x04)
-; CHECK-NEXT: 0x00000033:     DW_AT_encoding [DW_FORM_data1]  (0x05)
+; CHECK: DW_TAG_class_type
+; CHECK: DW_TAG_member
+; CHECK-NEXT:                   DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "x")
+; CHECK-NEXT:                   DW_AT_type [DW_FORM_ref4]       (cu + 0x{{[0-9a-f]*}} => {[[ARRAY:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000034:   DW_TAG_array_type [4] *
-; CHECK-NEXT: 0x00000035:     DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+; CHECK: [[ARRAY]]: DW_TAG_array_type [{{.*}}] *
+; CHECK-NEXT:                 DW_AT_type [DW_FORM_ref4]    (cu + 0x{{[0-9a-f]*}} => {[[BASE:0x[0-9a-f]*]]})
 
-; CHECK:      0x00000039:     DW_TAG_subrange_type [5]
-; CHECK-NEXT: 0x0000003a:       DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
-; CHECK-NEXT: 0x0000003e:       DW_AT_lower_bound [DW_FORM_data8]       (0xfffffffffffffffd)
-; CHECK-NEXT: 0x00000046:       DW_AT_upper_bound [DW_FORM_data1]       (0x26)
+; CHECK: DW_TAG_subrange_type
+; CHECK-NEXT:                   DW_AT_type [DW_FORM_ref4]  (cu + 0x{{[0-9a-f]*}} => {[[BASE2:0x[0-9a-f]*]]})
+; CHECK-NEXT:                   DW_AT_lower_bound [DW_FORM_data8]       (0xfffffffffffffffd)
+; CHECK-NEXT:                   DW_AT_upper_bound [DW_FORM_data1]       (0x26)
 
-; CHECK:      0x00000055:     DW_TAG_member [8]
-; CHECK-NEXT: 0x00000056:       DW_AT_name [DW_FORM_strp]       ( .debug_str[0x0000003f] = "x")
-; CHECK-NEXT: 0x0000005a:       DW_AT_type [DW_FORM_ref4]       (cu + 0x0034 => {0x00000034})
+; CHECK: [[BASE]]: DW_TAG_base_type
+; CHECK: [[BASE2]]: DW_TAG_base_type
+; CHECK-NEXT:                 DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "int")
+; CHECK-NEXT:                 DW_AT_byte_size [DW_FORM_data1] (0x04)
+; CHECK-NEXT:                 DW_AT_encoding [DW_FORM_data1]  (0x05)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9, metadata !14}
 !9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786465, i64 -3, i64 42} ; [ DW_TAG_subrange_type ] [-3, 39]
 !14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!17 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 3070ff88b3b9..a5e9632d1178 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -1,16 +1,16 @@
 ; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
-; CHECK: 0x00000027:   DW_TAG_structure_type
-; CHECK: 0x0000002c:     DW_AT_declaration
-; CHECK: 0x0000002d:     DW_AT_APPLE_runtime_class
+; CHECK: DW_TAG_structure_type
+; CHECK:                 DW_AT_declaration
+; CHECK:                 DW_AT_APPLE_runtime_class
 
 %0 = type opaque
 
 @a = common global %0* null, align 8
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !10, !11, !12}
+!llvm.module.flags = !{!9, !10, !11, !12, !14}
 
 !0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
@@ -18,9 +18,10 @@
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
+!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [FooBarBaz] [line 1, size 0, align 0, offset 0] [decl] [from ]
 !9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
 !12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
 !13 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index c3580a790c17..300f13dc5fb2 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -1,10 +1,17 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DW-CHECK
 
-; CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
+; DW-CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
 ; FIXME: The location here needs to be fixed, but llvm-dwarfdump doesn't handle
 ; DW_AT_location lists yet.
-; CHECK: DW_AT_location [DW_FORM_data4]                      (0x00000000)
+; DW-CHECK: DW_AT_location [DW_FORM_sec_offset]                      (0x00000000)
+
+; Unfortunately llvm-dwarfdump can't unparse a list of DW_AT_locations
+; right now, so we check the asm output:
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
+; vla should have a register-indirect address at one point.
+; ASM-CHECK: DEBUG_VALUE: vla <- RCX
+; ASM-CHECK: DW_OP_breg2
 
 define void @testVLAwithSize(i32 %s) nounwind uwtable ssp {
 entry:
@@ -58,31 +65,33 @@ declare i8* @llvm.stacksave() nounwind
 declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
 
 !0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !28, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 1, i32 26, metadata !5, null}
 !12 = metadata !{i32 3, i32 13, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !6, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!13 = metadata !{i32 786443, metadata !28, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 8192, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 3, i32 7, metadata !13, null}
 !19 = metadata !{i32 786688, metadata !13, metadata !"i", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !20 = metadata !{i32 4, i32 7, metadata !13, null}
 !21 = metadata !{i32 5, i32 8, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !6, metadata !13, i32 5, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !28, metadata !13, i32 5, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 6, i32 5, metadata !24, null}
-!24 = metadata !{i32 786443, metadata !6, metadata !22, i32 5, i32 27, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 786443, metadata !28, metadata !22, i32 5, i32 27, i32 2} ; [ DW_TAG_lexical_block ]
 !25 = metadata !{i32 7, i32 3, metadata !24, null}
 !26 = metadata !{i32 5, i32 22, metadata !22, null}
 !27 = metadata !{i32 8, i32 1, metadata !13, null}
 !28 = metadata !{metadata !"bar.c", metadata !"/Users/echristo/tmp"}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
new file mode 100644
index 000000000000..fa91bd27ae28
--- /dev/null
+++ b/test/DebugInfo/X86/parameters.ll
@@ -0,0 +1,116 @@
+; REQUIRES: object-emission
+;
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Test case derived from compiling the following source with clang -g:
+;
+; namespace pr14763 {
+; struct foo {
+;   foo(const foo&);
+; };
+;
+; foo func(foo f) {
+;   return f; // reference 'f' for now because otherwise we hit another bug
+; }
+;
+; void sink(void*);
+;
+; void func2(bool b, foo g) {
+;   if (b)
+;     sink(&g); // reference 'f' for now because otherwise we hit another bug
+; }
+; }
+
+; CHECK: debug_info contents
+; CHECK: DW_AT_name{{.*}} = "f"
+; 0x74 is DW_OP_breg4, showing that the parameter is accessed indirectly
+; (with a zero offset) from the register parameter
+; CHECK: DW_AT_location{{.*}}(<0x0{{.}}> 74 00
+
+; CHECK: DW_AT_name{{.*}} = "g"
+; CHECK: DW_AT_location{{.*}}([[G_LOC:0x[0-9]*]])
+; CHECK: debug_loc contents
+; CHECK-NEXT: [[G_LOC]]: Beginning
+; CHECK-NEXT:               Ending
+; CHECK-NEXT: Location description: 74 00
+
+%"struct.pr14763::foo" = type { i8 }
+
+; Function Attrs: uwtable
+define void @_ZN7pr147634funcENS_3fooE(%"struct.pr14763::foo"* noalias sret %agg.result, %"struct.pr14763::foo"* %f) #0 {
+entry:
+  call void @llvm.dbg.declare(metadata !{%"struct.pr14763::foo"* %f}, metadata !22), !dbg !24
+  call void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"* %agg.result, %"struct.pr14763::foo"* %f), !dbg !25
+  ret void, !dbg !25
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"*, %"struct.pr14763::foo"*) #2
+
+; Function Attrs: uwtable
+define void @_ZN7pr147635func2EbNS_3fooE(i1 zeroext %b, %"struct.pr14763::foo"* %g) #0 {
+entry:
+  %b.addr = alloca i8, align 1
+  %frombool = zext i1 %b to i8
+  store i8 %frombool, i8* %b.addr, align 1
+  call void @llvm.dbg.declare(metadata !{i8* %b.addr}, metadata !26), !dbg !27
+  call void @llvm.dbg.declare(metadata !{%"struct.pr14763::foo"* %g}, metadata !28), !dbg !27
+  %0 = load i8* %b.addr, align 1, !dbg !29
+  %tobool = trunc i8 %0 to i1, !dbg !29
+  br i1 %tobool, label %if.then, label %if.end, !dbg !29
+
+if.then:                                          ; preds = %entry
+  %1 = bitcast %"struct.pr14763::foo"* %g to i8*, !dbg !31
+  call void @_ZN7pr147634sinkEPv(i8* %1), !dbg !31
+  br label %if.end, !dbg !31
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void, !dbg !32
+}
+
+declare void @_ZN7pr147634sinkEPv(i8*) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !33}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/pass.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"pass.cpp", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !17}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_ZN7pr147634funcENS_3fooE", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%"struct.pr14763::foo"*, %"struct.pr14763::foo"*)* @_ZN7pr147634funcENS_3fooE, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!5 = metadata !{i32 786489, metadata !1, null, metadata !"pr14763", i32 1} ; [ DW_TAG_namespace ] [pr14763] [line 1]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"foo", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 2, size 8, align 8, offset 0] [def] [from ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 3} ; [ DW_TAG_subprogram ] [line 3] [foo]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{null, metadata !13, metadata !14}
+!13 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
+!14 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
+!16 = metadata !{i32 786468}
+!17 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func2", metadata !"func2", metadata !"_ZN7pr147635func2EbNS_3fooE", i32 12, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i1, %"struct.pr14763::foo"*)* @_ZN7pr147635func2EbNS_3fooE, null, null, metadata !2, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [func2]
+!18 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = metadata !{null, metadata !20, metadata !8}
+!20 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!22 = metadata !{i32 786689, metadata !4, metadata !"f", metadata !23, i32 16777222, metadata !8, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [f] [line 6]
+!23 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/pass.cpp]
+!24 = metadata !{i32 6, i32 0, metadata !4, null}
+!25 = metadata !{i32 7, i32 0, metadata !4, null}
+!26 = metadata !{i32 786689, metadata !17, metadata !"b", metadata !23, i32 16777228, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 12]
+!27 = metadata !{i32 12, i32 0, metadata !17, null}
+!28 = metadata !{i32 786689, metadata !17, metadata !"g", metadata !23, i32 33554444, metadata !8, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [g] [line 12]
+!29 = metadata !{i32 13, i32 0, metadata !30, null}
+!30 = metadata !{i32 786443, metadata !1, metadata !17, i32 13, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/pass.cpp]
+!31 = metadata !{i32 14, i32 0, metadata !30, null}
+!32 = metadata !{i32 15, i32 0, metadata !17, null}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index b06535325368..cf789b202343 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -9,16 +9,18 @@
 @crass = common global %struct.crass zeroinitializer, align 8
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [crass] [line 1, size 64, align 64, offset 0] [def] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786445, metadata !13, metadata !7, metadata !"ptr", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
 !11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
 !12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !13 = metadata !{metadata !"foo.c", metadata !"/Users/echristo/tmp"}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 54e0c8b6270c..caa24eeb8f66 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -3,8 +3,11 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x0000005c:     DW_TAG_subprogram [5]
-; CHECK: 0x0000007c:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x005c => {0x0000005c})
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_name [DW_FORM_strp]	( .debug_str[0x{{[0-9a-f]*}}] = "zed")
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_specification [DW_FORM_ref4]      (cu + {{.*}} => {[[BACK:0x[0-9a-f]*]]})
+; CHECK: [[BACK]]:     DW_TAG_subprogram
 
 %struct.foo = type { i8 }
 
@@ -30,26 +33,27 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !20}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !21, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [zed]
 !6 = metadata !{i32 720937, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 720942, metadata !6, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{null, metadata !15}
-!15 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !23 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -62,3 +66,4 @@ entry:
 !30 = metadata !{i32 2, i32 15, metadata !31, null}
 !31 = metadata !{i32 786443, metadata !6, metadata !20, i32 2, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
 !32 = metadata !{metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build"}
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index 295c018c5e13..6dea4a0cd982 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -76,48 +76,49 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!162}
 
-!0 = metadata !{i32 786449, i32 4, metadata !159, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !161, i32 4, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
-!5 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786434, null, metadata !"BPLFunctionWriter", metadata !6, i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786434, metadata !160, null, metadata !"BPLFunctionWriter", i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [BPLFunctionWriter] [line 15, size 64, align 64, offset 0] [def] [from ]
 !11 = metadata !{metadata !12, metadata !103}
-!12 = metadata !{i32 786445, metadata !10, metadata !"MW", metadata !6, i32 16, i64 64, i64 64, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786434, null, metadata !"BPLModuleWriter", metadata !6, i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ]
+!12 = metadata !{i32 786445, metadata !160, metadata !10, metadata !"MW", i32 16, i64 64, i64 64, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786434, metadata !160, null, metadata !"BPLModuleWriter", i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_class_type ] [BPLModuleWriter] [line 12, size 8, align 8, offset 0] [def] [from ]
 !15 = metadata !{metadata !16}
-!16 = metadata !{i32 786478, metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{null, metadata !19, metadata !20}
-!19 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 786434, null, metadata !"function<void ()>", metadata !6, i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97} ; [ DW_TAG_class_type ]
+!19 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 786434, metadata !160, null, metadata !"function<void ()>", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97, null} ; [ DW_TAG_class_type ] [function<void ()>] [line 6, size 8, align 8, offset 0] [def] [from ]
 !21 = metadata !{metadata !22, metadata !51, metadata !58, metadata !86, metadata !92}
-!22 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
-!23 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"", i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !24 = metadata !{null, metadata !25, metadata !26}
-!25 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
-!26 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null} ; [ DW_TAG_class_type ]
+!25 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
+!26 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_class_type ] [line 20, size 8, align 8, offset 0] [def] [from ]
 !27 = metadata !{metadata !28, metadata !35, metadata !41}
-!28 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!28 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"operator()", metadata !"operator()", metadata !"", i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !30 = metadata !{null, metadata !31}
-!31 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
+!31 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
 !33 = metadata !{metadata !34}
 !34 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"~", metadata !"~", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
-!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!35 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"~", metadata !"~", metadata !"", i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
+!36 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !37 = metadata !{null, metadata !38}
-!38 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
+!38 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
 !39 = metadata !{metadata !40}
 !40 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!41 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
-!42 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!41 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"", metadata !"", metadata !"", i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
+!42 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !43 = metadata !{null, metadata !38, metadata !44}
 !44 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_rvalue_reference_type ]
 !45 = metadata !{metadata !46}
@@ -126,33 +127,33 @@ entry:
 !48 = metadata !{i32 786479, null, metadata !"_Functor", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !49 = metadata !{metadata !50}
 !50 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!51 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
-!52 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!51 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !"", i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
+!52 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !53 = metadata !{null, metadata !25, metadata !20}
 !54 = metadata !{metadata !55}
 !55 = metadata !{i32 786479, null, metadata !"_Functor", metadata !20, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !56 = metadata !{metadata !57}
 !57 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!58 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
-!59 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!58 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"", i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
+!59 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !60 = metadata !{null, metadata !25, metadata !61}
-!61 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null} ; [ DW_TAG_class_type ]
+!61 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null, null} ; [ DW_TAG_class_type ] [line 23, size 8, align 8, offset 0] [def] [from ]
 !62 = metadata !{metadata !63, metadata !70, metadata !76}
-!63 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
-!64 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!63 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"operator()", metadata !"operator()", metadata !"", i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
+!64 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !65 = metadata !{null, metadata !66}
-!66 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
-!67 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
+!66 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
+!67 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
 !68 = metadata !{metadata !69}
 !69 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!70 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"~", metadata !"~", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
-!71 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!70 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"~", metadata !"~", metadata !"", i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
+!71 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !72 = metadata !{null, metadata !73}
-!73 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
+!73 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
 !74 = metadata !{metadata !75}
 !75 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!76 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
-!77 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!76 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"", metadata !"", metadata !"", i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
+!77 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !78 = metadata !{null, metadata !73, metadata !79}
 !79 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_rvalue_reference_type ]
 !80 = metadata !{metadata !81}
@@ -161,38 +162,38 @@ entry:
 !83 = metadata !{i32 786479, null, metadata !"_Functor", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !84 = metadata !{metadata !85}
 !85 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!86 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function", metadata !"function", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
-!87 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!86 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function", metadata !"function", metadata !"", i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
+!87 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !88 = metadata !{null, metadata !25, metadata !89}
 !89 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_rvalue_reference_type ]
 !90 = metadata !{metadata !91}
 !91 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!92 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
-!93 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!92 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"~function", metadata !"~function", metadata !"", i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
+!93 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !94 = metadata !{null, metadata !25}
 !95 = metadata !{metadata !96}
 !96 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !97 = metadata !{metadata !98}
 !98 = metadata !{i32 786479, null, metadata !"T", metadata !99, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!99 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !100, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!99 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !100, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !100 = metadata !{null}
 !101 = metadata !{metadata !102}
 !102 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!103 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
+!103 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
 !104 = metadata !{metadata !105}
 !105 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!106 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!107 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
-!108 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!106 = metadata !{i32 786478, metadata !6, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!107 = metadata !{i32 786478, metadata !6, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!108 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !109 = metadata !{null, metadata !110}
 !110 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_reference_type ]
 !111 = metadata !{metadata !112}
 !112 = metadata !{i32 786479, null, metadata !"_Tp", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!113 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
-!114 = metadata !{i32 786434, null, metadata !"_Base_manager", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null} ; [ DW_TAG_class_type ]
+!113 = metadata !{i32 786478, metadata !6, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
+!114 = metadata !{i32 786434, metadata !160, null, metadata !"_Base_manager", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null, null} ; [ DW_TAG_class_type ] [_Base_manager] [line 1, size 8, align 8, offset 0] [def] [from ]
 !115 = metadata !{metadata !116, metadata !113}
-!116 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
-!117 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!116 = metadata !{i32 786478, metadata !6, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
+!117 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !118 = metadata !{null, metadata !119}
 !119 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_reference_type ]
 !120 = metadata !{metadata !121}
@@ -201,21 +202,21 @@ entry:
 !123 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
 !124 = metadata !{metadata !125}
 !125 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!126 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!127 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!126 = metadata !{i32 786478, metadata !6, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!127 = metadata !{i32 786478, metadata !6, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
 !128 = metadata !{metadata !130}
-!130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true} ; [ DW_TAG_variable ]
-!131 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
-!132 = metadata !{i32 786468, null, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true, null} ; [ DW_TAG_variable ]
+!131 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
+!132 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
 !133 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777235, metadata !134, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!134 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!134 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
 !135 = metadata !{i32 19, i32 39, metadata !5, null}
 !136 = metadata !{i32 20, i32 17, metadata !137, null}
 !137 = metadata !{i32 786443, metadata !5, i32 19, i32 51, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !138 = metadata !{i32 23, i32 17, metadata !137, null}
 !139 = metadata !{i32 26, i32 15, metadata !137, null}
 !140 = metadata !{i32 786689, metadata !106, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!141 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!141 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
 !142 = metadata !{i32 8, i32 45, metadata !106, null}
 !143 = metadata !{i32 786689, metadata !106, metadata !"__f", metadata !6, i32 33554440, metadata !61, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !144 = metadata !{i32 8, i32 63, metadata !106, null}
@@ -233,4 +234,7 @@ entry:
 !156 = metadata !{i32 10, i32 13, metadata !155, null}
 !157 = metadata !{i32 4, i32 5, metadata !158, null}
 !158 = metadata !{i32 786443, metadata !127, i32 3, i32 105, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
-!159 = metadata !{i32 786473, metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ]
+!159 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
+!160 = metadata !{metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta"}
+!161 = metadata !{metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta"}
+!162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index 63ddfa7a7455..473786216e29 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -13,14 +13,17 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
-!6 = metadata !{i32 786473, metadata !"PR13303.c", metadata !"/home/probinson", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 786478, metadata !12, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 1, i32 14, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
+!11 = metadata !{i32 786443, metadata !12, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
+!12 = metadata !{metadata !"PR13303.c", metadata !"/home/probinson"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/pr9951.ll b/test/DebugInfo/X86/pr9951.ll
index cb348e2c9adc..d933beb5536f 100644
--- a/test/DebugInfo/X86/pr9951.ll
+++ b/test/DebugInfo/X86/pr9951.ll
@@ -6,15 +6,17 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!9}
 !6 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !6, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !7, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!1 = metadata !{i32 786473, metadata !7} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !7, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !6, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !7, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2"}
+!8 = metadata !{i32 0}
 
 ; CHECK:      _f:                                     ## @f
 ; CHECK-NEXT: Ltmp0:
@@ -22,3 +24,4 @@ entry:
 ; CHECK:      Ltmp9 = (Ltmp3-Ltmp2)-0
 ; CHECK-NEXT:	.long	Ltmp9
 ; CHECK-NEXT:	.quad	Ltmp0
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index 00ee7a03f46b..b37e41ac8c9f 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -19,15 +19,18 @@ entry:
 declare i32 @callme(i32)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
-!6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 786478, metadata !13, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
+!6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 5, i32 3, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
+!11 = metadata !{i32 786443, metadata !13, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
 !12 = metadata !{i32 6, i32 3, metadata !11, null}
+!13 = metadata !{metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/ref_addr_relocation.ll b/test/DebugInfo/X86/ref_addr_relocation.ll
new file mode 100644
index 000000000000..fc5197d78ebe
--- /dev/null
+++ b/test/DebugInfo/X86/ref_addr_relocation.ll
@@ -0,0 +1,71 @@
+; RUN: llc -filetype=asm -O0 -mtriple=x86_64-linux-gnu < %s | FileCheck %s
+; RUN: llc -filetype=obj -O0 %s -mtriple=x86_64-linux-gnu -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s -check-prefix=CHECK-DWARF
+
+; RUN: llc -filetype=obj %s -mtriple=x86_64-apple-darwin -o %t2
+; RUN: llvm-dwarfdump %t2 | FileCheck %s -check-prefix=DARWIN-DWARF
+
+; Testing case generated from:
+; clang++ tu1.cpp tu2.cpp -g -emit-llvm -c
+; llvm-link tu1.bc tu2.bc -o tu12.ll -S
+; cat hdr.h
+; struct foo {
+; };
+; cat tu1.cpp
+; #include "hdr.h"
+; foo f;
+; cat tu2.cpp
+; #include "hdr.h"
+; foo g;
+
+; Make sure we use relocation for ref_addr on non-darwin platforms.
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_variable
+; CHECK: .long [[TYPE:.*]] # DW_AT_type
+; CHECK: DW_TAG_structure_type
+; CHECK: debug_info_end0
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_structure_type
+; This variable's type is in the 1st CU.
+; CHECK: DW_TAG_variable
+; Make sure this is relocatable.
+; CHECK: .quad .Lsection_info+[[TYPE]] # DW_AT_type
+; CHECK-NOT: DW_TAG_structure_type
+; CHECK: debug_info_end1
+
+; CHECK-DWARF: DW_TAG_compile_unit
+; CHECK-DWARF: 0x[[ADDR:.*]]: DW_TAG_structure_type
+; CHECK-DWARF: DW_TAG_compile_unit
+; CHECK-DWARF: DW_TAG_variable
+; CHECK-DWARF: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ADDR]])
+
+; DARWIN-DWARF: DW_TAG_compile_unit
+; DARWIN-DWARF: 0x[[ADDR:.*]]: DW_TAG_structure_type
+; DARWIN-DWARF: DW_TAG_compile_unit
+; DARWIN-DWARF: DW_TAG_variable
+; DARWIN-DWARF: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ADDR]])
+
+%struct.foo = type { i8 }
+
+@f = global %struct.foo zeroinitializer, align 1
+@g = global %struct.foo zeroinitializer, align 1
+
+!llvm.dbg.cu = !{!0, !9}
+!llvm.module.flags = !{!14, !15}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 191799)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !6, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tu1.cpp", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./hdr.h", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !8, i32 2, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 2] [def]
+!8 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp]
+!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.4 (trunk 191799)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !11, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp] [DW_LANG_C_plus_plus]
+!10 = metadata !{metadata !"tu2.cpp", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786484, i32 0, null, metadata !"g", metadata !"g", metadata !"", metadata !13, i32 2, metadata !4, i32 0, i32 1, %struct.foo* @g, null} ; [ DW_TAG_variable ] [g] [line 2] [def]
+!13 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp]
+!14 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/reference-argument.ll b/test/DebugInfo/X86/reference-argument.ll
new file mode 100644
index 000000000000..be54386a4267
--- /dev/null
+++ b/test/DebugInfo/X86/reference-argument.ll
@@ -0,0 +1,103 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
+; ModuleID = 'aggregate-indirect-arg.cpp'
+; extracted from debuginfo-tests/aggregate-indirect-arg.cpp
+
+; v should not be a pointer.
+; CHECK: ##DEBUG_VALUE: foo:v <- RSI
+; rdar://problem/13658587
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%class.SVal = type { i8*, i32 }
+%class.A = type { i8 }
+
+declare void @_Z3barR4SVal(%class.SVal* %v)
+declare void @llvm.dbg.declare(metadata, metadata) #1
+declare i32 @main()
+; Function Attrs: nounwind ssp uwtable
+define linkonce_odr void @_ZN1A3fooE4SVal(%class.A* %this, %class.SVal* %v) nounwind ssp uwtable align 2 {
+entry:
+  %this.addr = alloca %class.A*, align 8
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !59), !dbg !61
+  call void @llvm.dbg.declare(metadata !{%class.SVal* %v}, metadata !62), !dbg !61
+  %this1 = load %class.A** %this.addr
+  call void @_Z3barR4SVal(%class.SVal* %v), !dbg !61
+  ret void, !dbg !61
+}
+declare void @_ZN4SValD1Ev(%class.SVal* %this)
+declare void @_ZN4SValD2Ev(%class.SVal* %this)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!47, !68}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [aggregate-indirect-arg.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"aggregate-indirect-arg.cpp", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !29, metadata !33, metadata !34, metadata !35}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"_Z3barR4SVal", i32 19, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_Z3barR4SVal, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [bar]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [aggregate-indirect-arg.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from SVal]
+!9 = metadata !{i32 786434, metadata !1, null, metadata !"SVal", i32 12, i64 128, i64 64, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_class_type ] [SVal] [line 12, size 128, align 64, offset 0] [def] [from ]
+!10 = metadata !{metadata !11, metadata !14, metadata !16, metadata !21, metadata !23}
+!11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"Data", i32 15, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] [Data] [line 15, size 64, align 64, offset 0] [from ]
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"Kind", i32 16, i64 32, i64 32, i64 64, i32 0, metadata !15} ; [ DW_TAG_member ] [Kind] [line 16, size 32, align 32, offset 64] [from unsigned int]
+!15 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!16 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 14, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [~SVal]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{null, metadata !19}
+!19 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from SVal]
+!20 = metadata !{i32 786468}
+!21 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"SVal", metadata !"SVal", metadata !"", i32 12, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !22, i32 12} ; [ DW_TAG_subprogram ] [line 12] [SVal]
+!22 = metadata !{i32 786468}
+!23 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"SVal", metadata !"SVal", metadata !"", i32 12, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !28, i32 12} ; [ DW_TAG_subprogram ] [line 12] [SVal]
+!24 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = metadata !{null, metadata !19, metadata !26}
+!26 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !27} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from SVal]
+!28 = metadata !{i32 786468}
+!29 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 25, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [def] [main]
+!30 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!33 = metadata !{i32 786478, metadata !1, null, metadata !"~SVal", metadata !"~SVal", metadata !"_ZN4SValD1Ev", i32 14, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_ZN4SValD1Ev, null, metadata !16, metadata !2, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
+!34 = metadata !{i32 786478, metadata !1, null, metadata !"~SVal", metadata !"~SVal", metadata !"_ZN4SValD2Ev", i32 14, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_ZN4SValD2Ev, null, metadata !16, metadata !2, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
+!35 = metadata !{i32 786478, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"_ZN1A3fooE4SVal", i32 22, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, %class.SVal*)* @_ZN1A3fooE4SVal, null, metadata !41, metadata !2, i32 22} ; [ DW_TAG_subprogram ] [line 22] [def] [foo]
+!36 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{null, metadata !38, metadata !9}
+!38 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!39 = metadata !{i32 786434, metadata !1, null, metadata !"A", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 20, size 8, align 8, offset 0] [def] [from ]
+!40 = metadata !{metadata !41, metadata !43}
+!41 = metadata !{i32 786478, metadata !1, metadata !39, metadata !"foo", metadata !"foo", metadata !"_ZN1A3fooE4SVal", i32 22, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !42, i32 22} ; [ DW_TAG_subprogram ] [line 22] [foo]
+!42 = metadata !{i32 786468}
+!43 = metadata !{i32 786478, metadata !1, metadata !39, metadata !"A", metadata !"A", metadata !"", i32 20, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !46, i32 20} ; [ DW_TAG_subprogram ] [line 20] [A]
+!44 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!45 = metadata !{null, metadata !38}
+!46 = metadata !{i32 786468}
+!47 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!48 = metadata !{i32 786689, metadata !4, metadata !"v", metadata !5, i32 16777235, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [v] [line 19]
+!49 = metadata !{i32 19, i32 0, metadata !4, null}
+!50 = metadata !{i32 786688, metadata !29, metadata !"v", metadata !5, i32 26, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [v] [line 26]
+!51 = metadata !{i32 26, i32 0, metadata !29, null}
+!52 = metadata !{i32 27, i32 0, metadata !29, null}
+!53 = metadata !{i32 28, i32 0, metadata !29, null}
+!54 = metadata !{i32 786688, metadata !29, metadata !"a", metadata !5, i32 29, metadata !39, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 29]
+!55 = metadata !{i32 29, i32 0, metadata !29, null}
+!56 = metadata !{i32 30, i32 0, metadata !29, null}
+!57 = metadata !{i32 31, i32 0, metadata !29, null}
+!58 = metadata !{i32 32, i32 0, metadata !29, null}
+!59 = metadata !{i32 786689, metadata !35, metadata !"this", metadata !5, i32 16777238, metadata !60, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 22]
+!60 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!61 = metadata !{i32 22, i32 0, metadata !35, null}
+!62 = metadata !{i32 786689, metadata !35, metadata !"v", metadata !5, i32 33554454, metadata !9, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [v] [line 22]
+!63 = metadata !{i32 786689, metadata !33, metadata !"this", metadata !5, i32 16777230, metadata !64, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 14]
+!64 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from SVal]
+!65 = metadata !{i32 14, i32 0, metadata !33, null}
+!66 = metadata !{i32 786689, metadata !34, metadata !"this", metadata !5, i32 16777230, metadata !64, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 14]
+!67 = metadata !{i32 14, i32 0, metadata !34, null}
+!68 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index b5aa4f6432d8..e9ea42718c84 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -21,19 +21,21 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !16, i32 4, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !16, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_rvalue_reference_type ]
-!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 786689, metadata !5, metadata !"i", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 4, i32 17, metadata !5, null}
 !13 = metadata !{i32 6, i32 3, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !16, metadata !5, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 7, i32 1, metadata !14, null}
 !16 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index 620478a879a3..72eb62f3b436 100644
--- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -48,18 +48,18 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0, !10}
+!llvm.module.flags = !{!25}
 !0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
+!1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !23, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
+!5 = metadata !{i32 786478, metadata !23, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
 !6 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !11 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !24, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
+!13 = metadata !{i32 786478, metadata !24, metadata !14, metadata !"fn", metadata !"fn", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
 !14 = metadata !{i32 786473, metadata !24} ; [ DW_TAG_file_type ]
 !15 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777218, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 2]
 !16 = metadata !{i32 2, i32 0, metadata !5, null}
@@ -71,3 +71,4 @@ entry:
 !22 = metadata !{i32 786443, metadata !24, metadata !13, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{metadata !"simple.c", metadata !"/private/tmp"}
 !24 = metadata !{metadata !"simple2.c", metadata !"/private/tmp"}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
index 4c8521f5d805..6f846c1589a3 100644
--- a/test/DebugInfo/X86/stmt-list.ll
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -11,10 +11,13 @@ entry:
 }
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7}
 !5 = metadata !{metadata !0}
 
-!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !5, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!1 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !5, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
+!6 = metadata !{metadata !"test2.c", metadata !"/home/espindola/llvm"}
+!7 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index d9604de0f62f..fccac2618cfb 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -4,6 +4,7 @@
 @yyyy = common global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
 
 !0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
@@ -13,9 +14,15 @@
 !7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !"z.c", metadata !"/home/nicholas"}
 
+; Verify that "yyyy" ended up in the stringpool.
+; LINUX: .section .debug_str,"MS",@progbits,1
+; LINUX: yyyy
+; DARWIN: .section __DWARF,__debug_str,regular,debug
+; DARWIN: yyyy
+
 ; Verify that we refer to 'yyyy' with a relocation.
 ; LINUX:      .long   .Linfo_string3          # DW_AT_name
-; LINUX-NEXT: .long   38                      # DW_AT_type
+; LINUX-NEXT: .long   {{[0-9]+}}              # DW_AT_type
 ; LINUX-NEXT:                                 # DW_AT_external
 ; LINUX-NEXT: .byte   1                       # DW_AT_decl_file
 ; LINUX-NEXT: .byte   1                       # DW_AT_decl_line
@@ -24,20 +31,13 @@
 ; LINUX-NEXT: .quad   yyyy
 
 ; Verify that we refer to 'yyyy' without a relocation.
-; DARWIN: Lset5 = Linfo_string3-Linfo_string          ## DW_AT_name
-; DARWIN-NEXT:        .long   Lset5
-; DARWIN-NEXT:        .long   39                      ## DW_AT_type
-; DARWIN-NEXT:        .byte   1                       ## DW_AT_external
+; DARWIN: Lset[[ID:[0-9]+]] = Linfo_string3-Linfo_string ## DW_AT_name
+; DARWIN-NEXT:        .long   Lset[[ID]]
+; DARWIN-NEXT:        .long   {{[0-9]+}}              ## DW_AT_type
+; DARWIN-NEXT:                                        ## DW_AT_external
 ; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_file
 ; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_line
 ; DARWIN-NEXT:        .byte   9                       ## DW_AT_location
 ; DARWIN-NEXT:        .byte   3
 ; DARWIN-NEXT:        .quad   _yyyy
-
-; Verify that "yyyy" ended up in the stringpool.
-; LINUX: .section .debug_str,"MS",@progbits,1
-; LINUX-NOT: .section
-; LINUX: yyyy
-; DARWIN: .section __DWARF,__debug_str,regular,debug
-; DARWIN-NOT: .section
-; DARWIN: yyyy
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index 76cb1f70634b..95bdd41fb063 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Make sure that structures have a decl file and decl line attached.
-; CHECK: DW_TAG_structure_type [3]
+; CHECK: DW_TAG_structure_type
 ; CHECK: DW_AT_decl_file
 ; CHECK: DW_AT_decl_line
 ; CHECK: DW_TAG_member
@@ -12,14 +12,16 @@
 @f = common global %struct.foo zeroinitializer, align 4
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !11, i32 12, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786445, metadata !11, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{metadata !"struct_bug.c", metadata !"/Users/echristo/tmp"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index da9589338439..05b147765b15 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -2,10 +2,10 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Make sure that the base type from the subrange type has a name.
-; CHECK: 0x0000006b:   DW_TAG_base_type [6]
+; CHECK: DW_TAG_subrange_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]     (cu + 0x{{[0-9a-f]+}} => {[[SUBTYPE:0x[0-9a-f]*]]})
+; CHECK: [[SUBTYPE]]: DW_TAG_base_type
 ; CHECK-NEXT: DW_AT_name
-; CHECK: DW_TAG_subrange_type [8]
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]     (cu + 0x006b => {0x0000006b})
 
 define i32 @main() nounwind uwtable {
 entry:
@@ -19,20 +19,22 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
 !0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
 !6 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786688, metadata !11, metadata !"i", metadata !6, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 4]
 !11 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c]
-!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
+!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
 !15 = metadata !{i32 4, i32 0, metadata !11, null}
 !16 = metadata !{i32 6, i32 0, metadata !11, null}
 !17 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index c7f86381204c..162c2d166d72 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -17,14 +17,17 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!11}
 !9 = metadata !{metadata !1}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, null, null, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !5, metadata !5, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null}
-!6 = metadata !{i32 786468, metadata !3, metadata !"short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, null, metadata !3, metadata !"short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 4, i32 22, metadata !8, null}
-!8 = metadata !{i32 786443, metadata !2, metadata !1, i32 3, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786443, metadata !10, metadata !1, i32 3, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build"}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/template.ll b/test/DebugInfo/X86/template.ll
new file mode 100644
index 000000000000..64a8f7a87d15
--- /dev/null
+++ b/test/DebugInfo/X86/template.ll
@@ -0,0 +1,126 @@
+; REQUIRES: object-emission
+
+; RUN: llc -mtriple=x86_64-linux -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; IR generated with `clang++ -g -emit-llvm -S` from the following code:
+; template<int x, int*, template<typename> class y, int ...z>  int func() { return 3; }
+; template<typename> struct y_impl { struct nested { }; };
+; int glbl = func<3, &glbl, y_impl, 1, 2>();
+; y_impl<int>::nested n;
+
+; CHECK: [[INT:0x[0-9a-f]*]]:{{ *}}DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name{{.*}} = "int"
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name{{.*}}"y_impl<int>"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_template_type_parameter
+
+; CHECK: DW_AT_name{{.*}}"func<3, &glbl, y_impl, 1, 2>"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_template_value_parameter
+; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]}
+; CHECK-NEXT: DW_AT_name{{.*}}= "x"
+
+; This could be made shorter by encoding it as _sdata rather than data4, or
+; even as data1. DWARF strongly urges implementations to prefer 
+; _sdata/_udata rather than dataN
+
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]{{.*}}(3)
+
+; CHECK: DW_TAG_template_value_parameter
+; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INTPTR:0x[0-9a-f]*]]}
+
+; The address of the global 'glbl', followed by DW_OP_stack_value (9f), to use
+; the value immediately, rather than indirecting through the address.
+
+; CHECK-NEXT: DW_AT_location [DW_FORM_block1]{{ *}}(<0x0a> 03 00 00 00 00 00 00 00 00 9f )
+; CHECK-NOT: NULL
+
+; CHECK: DW_TAG_GNU_template_template_param
+; CHECK-NEXT: DW_AT_name{{.*}}= "y"
+; CHECK-NEXT: DW_AT_GNU_template_name{{.*}}= "y_impl"
+; CHECK-NOT: NULL
+
+; CHECK: DW_TAG_GNU_template_parameter_pack
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_template_value_parameter
+; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]}
+; CHECK-NEXT: DW_AT_const_value  [DW_FORM_sdata]{{.*}}(1)
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_template_value_parameter
+; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]}
+; CHECK-NEXT: DW_AT_const_value  [DW_FORM_sdata]{{.*}}(2)
+
+; CHECK: [[INTPTR]]:{{ *}}DW_TAG_pointer_type
+; CHECK-NEXT: DW_AT_type{{.*}} => {[[INT]]}
+
+%"struct.y_impl<int>::nested" = type { i8 }
+
+@glbl = global i32 0, align 4
+@n = global %"struct.y_impl<int>::nested" zeroinitializer, align 1
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define internal void @__cxx_global_var_init() section ".text.startup" {
+entry:
+  %call = call i32 @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv(), !dbg !33
+  store i32 %call, i32* @glbl, align 4, !dbg !33
+  ret void, !dbg !33
+}
+
+; Function Attrs: nounwind uwtable
+define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv() #0 {
+entry:
+  ret i32 3, !dbg !34
+}
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init(), !dbg !35
+  ret void, !dbg !35
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!31, !36}
+!llvm.ident = !{!32}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192849) (llvm/trunk 192850)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !28, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"y_impl<int>", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS6y_implIiE"} ; [ DW_TAG_structure_type ] [y_impl<int>] [line 2, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 786479, null, metadata !"", metadata !7, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{i32 786451, metadata !1, metadata !"_ZTS6y_implIiE", metadata !"nested", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTSN6y_implIiE6nestedE"} ; [ DW_TAG_structure_type ] [nested] [line 2, size 8, align 8, offset 0] [def] [from ]
+!9 = metadata !{metadata !10, metadata !14, metadata !26}
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 3, metadata !12, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [local] [def] [__cxx_global_var_init]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/bar.cpp]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null}
+!14 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv", i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv, metadata !17, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func<3, &glbl, y_impl, 1, 2>]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !7}
+!17 = metadata !{metadata !18, metadata !19, metadata !21, metadata !22}
+!18 = metadata !{i32 786480, null, metadata !"x", metadata !7, i32 3, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!19 = metadata !{i32 786480, null, metadata !"", metadata !20, i32* @glbl, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!21 = metadata !{i32 803078, null, metadata !"y", null, metadata !"y_impl", null, i32 0, i32 0} ; [ DW_TAG_GNU_template_template_param ]
+!22 = metadata !{i32 803079, null, metadata !"z", null, metadata !23, null, i32 0, i32 0} ; [ DW_TAG_GNU_template_parameter_pack ]
+!23 = metadata !{metadata !24, metadata !25}
+!24 = metadata !{i32 786480, null, metadata !"", metadata !7, i32 1, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!25 = metadata !{i32 786480, null, metadata !"", metadata !7, i32 2, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!26 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 1, metadata !27, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [local] [def]
+!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = metadata !{metadata !29, metadata !30}
+!29 = metadata !{i32 786484, i32 0, null, metadata !"glbl", metadata !"glbl", metadata !"", metadata !11, i32 3, metadata !7, i32 0, i32 1, i32* @glbl, null} ; [ DW_TAG_variable ] [glbl] [line 3] [def]
+!30 = metadata !{i32 786484, i32 0, null, metadata !"n", metadata !"n", metadata !"", metadata !11, i32 4, metadata !8, i32 0, i32 1, %"struct.y_impl<int>::nested"* @n, null} ; [ DW_TAG_variable ] [n] [line 4] [def]
+!31 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!32 = metadata !{metadata !"clang version 3.4 (trunk 192849) (llvm/trunk 192850)"}
+!33 = metadata !{i32 3, i32 0, metadata !10, null}
+!34 = metadata !{i32 1, i32 0, metadata !14, null}
+!35 = metadata !{i32 1, i32 0, metadata !26, null}
+!36 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/tls-fission.ll b/test/DebugInfo/X86/tls-fission.ll
new file mode 100644
index 000000000000..8a25aced8b7b
--- /dev/null
+++ b/test/DebugInfo/X86/tls-fission.ll
@@ -0,0 +1,32 @@
+; RUN: llc -split-dwarf=Enable -mtriple=x86_64-linux -O0 -filetype=asm < %s | FileCheck %s
+
+; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use
+; that here instead of raw assembly printing
+
+; CHECK: debug_info.dwo
+; 3 bytes of data in this DW_FORM_block1 representation of the location of 'tls'
+; CHECK: .byte 3{{ *}}# DW_AT_location
+; DW_OP_const_index (0xfx == 252) to refer to the debug_addr table
+; CHECK-NEXT: .byte 252
+; an index of zero into the debug_addr table
+; CHECK-NEXT: .byte 0
+; DW_OP_lo_user based on GCC/GDB extension presumably (by experiment) to support TLS
+; CHECK-NEXT: .byte 224
+; check that the expected TLS address description is the first thing in the debug_addr section
+; CHECK: debug_addr
+; CHECK-NEXT: .quad	tls@DTPOFF
+
+@tls = thread_local global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !"tls.dwo"} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"tls", metadata !"tls", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
+!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/tls.ll b/test/DebugInfo/X86/tls.ll
new file mode 100644
index 000000000000..745c2f40f67f
--- /dev/null
+++ b/test/DebugInfo/X86/tls.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=x86_64-linux -O0 -filetype=asm < %s | FileCheck %s
+; RUN: llc -mtriple=i386-linux -O0 -filetype=asm < %s | FileCheck --check-prefix=CHECK-32 %s
+
+; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use
+; that here instead of raw assembly printing
+
+; 10 bytes of data in this DW_FORM_block1 representation of the location of 'tls'
+; CHECK: .byte	10{{ *}}# DW_AT_location
+; DW_OP_const8u (0x0e == 14) of adress
+; CHECK: .byte	14
+; The debug relocation of the address of the tls variable
+; CHECK: .quad	tls@DTPOFF
+; DW_OP_lo_user based on GCC/GDB extension presumably (by experiment) to support TLS
+; CHECK: .byte	224
+
+; same again, except with a 32 bit address
+; CHECK-32: .byte	6{{ *}}# DW_AT_location
+; CHECK-32: .byte	12
+; CHECK-32: .long	tls@DTPOFF
+; CHECK-32: .byte	224
+
+@tls = thread_local global i32 7, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"tls", metadata !"tls", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
+!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index 8d23caec9686..c70ae0798c71 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -27,6 +27,7 @@ attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!28}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 (trunk 178499) (llvm/trunk 178472)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9,  metadata !9, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"foo.cc", metadata !"/usr/local/google/home/echristo/tmp"}
@@ -34,20 +35,20 @@ attributes #1 = { nounwind readnone }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"g", metadata !"g", metadata !"_ZN7PR156371gEf", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (float)* @_ZN7PR156371gEf, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [g]
 !5 = metadata !{i32 786489, metadata !1, null, metadata !"PR15637", i32 1} ; [ DW_TAG_namespace ] [PR15637] [line 1]
-!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null, metadata !8}
 !8 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 786484, i32 0, metadata !5, metadata !"f", metadata !"f", metadata !"_ZN7PR156371fE", metadata !11, i32 6, metadata !12, i32 0, i32 1, %"union.PR15637::Value"* @_ZN7PR156371fE, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
 !11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cc]
-!12 = metadata !{i32 786455, metadata !1, metadata !5, metadata !"Value<float>", i32 2, i64 32, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, metadata !21} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [from ]
+!12 = metadata !{i32 786455, metadata !1, metadata !5, metadata !"Value<float>", i32 2, i64 32, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, metadata !21, null} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [def] [from ]
 !13 = metadata !{metadata !14, metadata !16}
 !14 = metadata !{i32 786445, metadata !1, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
 !15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !16 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"Value", metadata !"Value", metadata !"", i32 2, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !20, i32 2} ; [ DW_TAG_subprogram ] [line 2] [Value]
-!17 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{null, metadata !19}
-!19 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
+!19 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
 !20 = metadata !{i32 786468}
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 786479, null, metadata !"T", metadata !8, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
@@ -56,3 +57,4 @@ attributes #1 = { nounwind readnone }
 !25 = metadata !{i32 786688, metadata !4, metadata !"tempValue", metadata !11, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tempValue] [line 4]
 !26 = metadata !{i32 4, i32 0, metadata !4, null}
 !27 = metadata !{i32 5, i32 0, metadata !4, null}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index 658303a57fd6..6e14ed67fc30 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -10,14 +10,15 @@
 @a = common global <4 x i32> zeroinitializer, align 16
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def]
 !6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786454, metadata !12, null, metadata !"v4si", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
@@ -26,3 +27,4 @@
 ; Check that we get an array type with a vector attribute.
 ; CHECK: DW_TAG_array_type
 ; CHECK-NEXT: DW_AT_GNU_vector
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/vla.ll b/test/DebugInfo/X86/vla.ll
new file mode 100644
index 000000000000..512b22323c89
--- /dev/null
+++ b/test/DebugInfo/X86/vla.ll
@@ -0,0 +1,107 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
+; Ensure that we generate an indirect location for the variable length array a.
+; CHECK: ##DEBUG_VALUE: vla:a <- RDX
+; CHECK: DW_OP_breg1
+; rdar://problem/13658587
+;
+; generated from:
+;
+; int vla(int n) {
+;   int a[n];
+;   a[0] = 42;
+;   return a[n-1];
+; }
+;
+; int main(int argc, char** argv) {
+;    return vla(argc);
+; }
+
+; ModuleID = 'vla.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @vla(i32 %n) nounwind ssp uwtable {
+entry:
+  %n.addr = alloca i32, align 4
+  %saved_stack = alloca i8*
+  %cleanup.dest.slot = alloca i32
+  store i32 %n, i32* %n.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %n.addr}, metadata !15), !dbg !16
+  %0 = load i32* %n.addr, align 4, !dbg !17
+  %1 = zext i32 %0 to i64, !dbg !17
+  %2 = call i8* @llvm.stacksave(), !dbg !17
+  store i8* %2, i8** %saved_stack, !dbg !17
+  %vla = alloca i32, i64 %1, align 16, !dbg !17
+  call void @llvm.dbg.declare(metadata !{i32* %vla}, metadata !18), !dbg !17
+  %arrayidx = getelementptr inbounds i32* %vla, i64 0, !dbg !22
+  store i32 42, i32* %arrayidx, align 4, !dbg !22
+  %3 = load i32* %n.addr, align 4, !dbg !23
+  %sub = sub nsw i32 %3, 1, !dbg !23
+  %idxprom = sext i32 %sub to i64, !dbg !23
+  %arrayidx1 = getelementptr inbounds i32* %vla, i64 %idxprom, !dbg !23
+  %4 = load i32* %arrayidx1, align 4, !dbg !23
+  store i32 1, i32* %cleanup.dest.slot
+  %5 = load i8** %saved_stack, !dbg !24
+  call void @llvm.stackrestore(i8* %5), !dbg !24
+  ret i32 %4, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+; Function Attrs: nounwind
+declare i8* @llvm.stacksave() nounwind
+
+; Function Attrs: nounwind
+declare void @llvm.stackrestore(i8*) nounwind
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @main(i32 %argc, i8** %argv) nounwind ssp uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !25), !dbg !26
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !27), !dbg !26
+  %0 = load i32* %argc.addr, align 4, !dbg !28
+  %call = call i32 @vla(i32 %0), !dbg !28
+  ret i32 %call, !dbg !28
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!29}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/vla.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"vla.c", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !9}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"vla", metadata !"vla", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @vla, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [vla]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/vla.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{metadata !8, metadata !8, metadata !12}
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!14 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!15 = metadata !{i32 786689, metadata !4, metadata !"n", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [n] [line 1]
+!16 = metadata !{i32 1, i32 0, metadata !4, null}
+!17 = metadata !{i32 2, i32 0, metadata !4, null}
+!18 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 2, metadata !19, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 2]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !8, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!20 = metadata !{metadata !21}
+!21 = metadata !{i32 786465, i64 0, i64 -1}       ; [ DW_TAG_subrange_type ] [unbounded]
+!22 = metadata !{i32 3, i32 0, metadata !4, null}
+!23 = metadata !{i32 4, i32 0, metadata !4, null}
+!24 = metadata !{i32 5, i32 0, metadata !4, null}
+!25 = metadata !{i32 786689, metadata !9, metadata !"argc", metadata !5, i32 16777223, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 7]
+!26 = metadata !{i32 7, i32 0, metadata !9, null}
+!27 = metadata !{i32 786689, metadata !9, metadata !"argv", metadata !5, i32 33554439, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 7]
+!28 = metadata !{i32 8, i32 0, metadata !9, null}
+!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index 7dd57d7db21d..e5e07ffe9942 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -13,16 +13,17 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!16}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
 !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, null, null, metadata !13, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!2 = metadata !{i32 786449, metadata !14, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, metadata !15, metadata !15, metadata !13, null,  null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786688, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!7 = metadata !{i32 786443, metadata !14, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786433, metadata !14, metadata !2, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
 !9 = metadata !{metadata !10}
 ;CHECK: DW_TAG_subrange_type
 ;CHECK-NEXT: DW_AT_type
@@ -34,3 +35,5 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !12 = metadata !{i32 5, i32 3, metadata !7, null}
 !13 = metadata !{metadata !0}
 !14 = metadata !{metadata !"array.c", metadata !"/private/tmp"}
+!15 = metadata !{i32 0}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
index b17affed893c..458fb58f5bc0 100644
--- a/test/DebugInfo/bug_null_debuginfo.ll
+++ b/test/DebugInfo/bug_null_debuginfo.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
 
-!0 = metadata !{null, null, null}
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"", i1 false, metadata !"", i32 0, null, null, null,  null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"t", metadata !""}
+!2 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/debuginfofinder-multiple-cu.ll b/test/DebugInfo/debuginfofinder-multiple-cu.ll
new file mode 100644
index 000000000000..74965df591a6
--- /dev/null
+++ b/test/DebugInfo/debuginfofinder-multiple-cu.ll
@@ -0,0 +1,41 @@
+; RUN: opt -analyze -module-debuginfo < %s | FileCheck %s
+
+; Produced from linking:
+;   /tmp/test1.c containing f()
+;   /tmp/test2.c containing g()
+
+; Verify that both compile units and both their contained functions are
+; listed by DebugInfoFinder:
+;CHECK: Compile Unit: [ DW_TAG_compile_unit ] [/tmp/test1.c] [DW_LANG_C99]
+;CHECK: Compile Unit: [ DW_TAG_compile_unit ] [/tmp/test2.c] [DW_LANG_C99]
+;CHECK: Subprogram: [ DW_TAG_subprogram ] [line 1] [def] [f]
+;CHECK: Subprogram: [ DW_TAG_subprogram ] [line 1] [def] [g]
+
+define void @f() {
+  ret void, !dbg !14
+}
+
+define void @g() {
+  ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0, !8}
+!llvm.module.flags = !{!13, !16}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (192092)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test1.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test1.c", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @f, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test1.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.4 (192092)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !10, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test2.c] [DW_LANG_C99]
+!9 = metadata !{metadata !"test2.c", metadata !"/tmp"}
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786478, metadata !9, metadata !12, metadata !"g", metadata !"g", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @g, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [g]
+!12 = metadata !{i32 786473, metadata !9}         ; [ DW_TAG_file_type ] [/tmp/test2.c]
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{i32 1, i32 0, metadata !4, null}
+!15 = metadata !{i32 1, i32 0, metadata !11, null}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
index 5d330486d88c..fc3363133e68 100644
--- a/test/DebugInfo/dwarf-public-names.ll
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: object-emission
 
-; RUN: llc -generate-dwarf-pubnames -filetype=obj -o %t.o < %s
+; RUN: llc -generate-dwarf-pub-sections=Enable -filetype=obj -o %t.o < %s
 ; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
 ; ModuleID = 'dwarf-public-names.cpp'
 ;
@@ -37,6 +37,7 @@
 
 ; Skip the output to the header of the pubnames section.
 ; CHECK: debug_pubnames
+; CHECK: version = 0x0002
 
 ; Check for each name in the output.
 ; CHECK: global_namespace_variable
@@ -85,30 +86,31 @@ attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38}
 
-!0 = metadata !{i32 786449, i32 4, metadata !4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
 !3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
 !4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{null, metadata !7}
-!7 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
-!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ]
+!7 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !12, metadata !14}
 !10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
 !11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
 !13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
-!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{metadata !11}
 !17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
 !19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
 !20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
 !21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
-!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null}
 !24 = metadata !{metadata !25, metadata !26, metadata !27}
 !25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
@@ -124,3 +126,4 @@ attributes #1 = { nounwind readnone }
 !35 = metadata !{i32 25, i32 0, metadata !20, null}
 !36 = metadata !{i32 26, i32 0, metadata !20, null}
 !37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/dwarfdump-debug-loc-simple.test b/test/DebugInfo/dwarfdump-debug-loc-simple.test
new file mode 100644
index 000000000000..77dfa2558c0a
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-debug-loc-simple.test
@@ -0,0 +1,26 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-loc-list-32bit.elf.o | FileCheck %s
+Note: the input file was generated from Inputs/dwarfdump-test-loc-list-32bit.elf.cpp
+
+CHECK: .debug_info
+CHECK: DW_AT_name{{.*}}"f"
+CHECK: DW_AT_location{{.*}}([[F_LOC:0x[0-9a-f]*]])
+CHECK: DW_AT_name{{.*}}"g"
+CHECK: DW_AT_location{{.*}}([[G_LOC:0x[0-9a-f]*]])
+CHECK: .debug_loc contents:
+CHECK-NEXT: [[F_LOC]]: Beginning address offset: 0x0000000000000000
+CHECK-NEXT:               Ending address offset: 0x0000000000000023
+this is actually the wrong location due to PR14763, but that doesn't matter for
+the purposes of testing dwarfdump
+CHECK-NEXT:                Location description: 51
+CHECK-NEXT: {{^$}}
+CHECK-NEXT:            Beginning address offset: 0x0000000000000023
+CHECK-NEXT:               Ending address offset: 0x000000000000005d
+CHECK-NEXT:                Location description: 75 70
+CHECK-NEXT: {{^$}}
+CHECK-NEXT: [[G_LOC]]: Beginning address offset: 0x0000000000000000
+CHECK-NEXT:               Ending address offset: 0x0000000000000020
+CHECK-NEXT:               Location description: 50
+CHECK-NEXT: {{^$}}
+CHECK-NEXT:            Beginning address offset: 0x0000000000000020
+CHECK-NEXT:               Ending address offset: 0x000000000000005d
+CHECK-NEXT:                Location description: 75 74
diff --git a/test/DebugInfo/dwarfdump-pubnames.test b/test/DebugInfo/dwarfdump-pubnames.test
index e1b16c2f2741..215953abe7ff 100644
--- a/test/DebugInfo/dwarfdump-pubnames.test
+++ b/test/DebugInfo/dwarfdump-pubnames.test
@@ -2,15 +2,13 @@ RUN: llvm-dwarfdump %p/Inputs/dwarfdump-pubnames.elf-x86-64 \
 RUN:   -debug-dump=pubnames | FileCheck %s
 
 CHECK: .debug_pubnames contents:
-CHECK: Length:                161
-CHECK: Version:               2
-CHECK: Offset in .debug_info: 0
-CHECK: Size:                  321
+CHECK: length = 0x000000a1 version = 0x0002 unit_offset = 0x00000000 unit_size = 0x00000141
+
+CHECK: Offset        Name
+CHECK: 0x00000098    "global_namespace_variable"
+CHECK: 0x000000a7    "global_namespace_function"
+CHECK: 0x000000ec    "static_member_function"
+CHECK: 0x0000007c    "global_variable"
+CHECK: 0x00000103    "global_function"
+CHECK: 0x000000c2    "member_function"
 
-CHECK:  Offset    Name
-CHECK:      98    global_namespace_variable
-CHECK:      a7    global_namespace_function
-CHECK:      ec    static_member_function
-CHECK:      7c    global_variable
-CHECK:     103    global_function
-CHECK:      c2    member_function
diff --git a/test/DebugInfo/dwarfdump-type-units.test b/test/DebugInfo/dwarfdump-type-units.test
new file mode 100644
index 000000000000..5fca81d70f21
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-type-units.test
@@ -0,0 +1,32 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-type-units.elf-x86-64 > %t
+RUN: cat %t | FileCheck -check-prefix=FOO %s
+RUN: cat %t | FileCheck -check-prefix=BAR %s
+RUN: llvm-dwarfdump -debug-dump=types %p/Inputs/dwarfdump-type-units.elf-x86-64 | FileCheck -check-prefix=TYPES %s
+
+FOO: debug_info contents:
+FOO: DW_TAG_variable
+FOO-NEXT: DW_AT_name {{.*}}"f"
+FOO: DW_AT_type [DW_FORM_ref_sig8] ([[FOO_SIG:0x[0-9a-f]*]])
+
+FOO: debug_types contents:
+FOO: 0x00000000: Type Unit: {{.*}} type_signature = [[FOO_SIG]] type_offset = 0x[[FOO_OFF:[0-9a-f]*]] (next unit at
+FOO: DW_TAG_type_unit
+FOO-NOT: NULL
+FOO: 0x0000[[FOO_OFF]]: DW_TAG_structure_type
+FOO-NEXT: DW_AT_name {{.*}}"foo"
+
+BAR: debug_info contents:
+BAR: DW_TAG_variable
+BAR: DW_TAG_variable
+BAR-NEXT: DW_AT_name {{.*}}"b"
+BAR: DW_AT_type [DW_FORM_ref_sig8] ([[BAR_SIG:0x[0-9a-f]*]])
+
+BAR: debug_types contents:
+BAR: 0x00000000: Type Unit: {{.*}} type_signature = [[BAR_SIG]] type_offset = 0x[[BAR_OFF:[0-9a-f]*]] (next unit at
+BAR: DW_TAG_type_unit
+BAR-NOT: NULL
+BAR: 0x0000[[BAR_OFF]]: DW_TAG_structure_type
+BAR-NEXT: DW_AT_name {{.*}}"bar"
+
+TYPES-NOT: debug_info contents:
+TYPES: debug_types contents:
diff --git a/test/DebugInfo/enum.ll b/test/DebugInfo/enum.ll
new file mode 100644
index 000000000000..bc09846bb8e1
--- /dev/null
+++ b/test/DebugInfo/enum.ll
@@ -0,0 +1,80 @@
+; REQUIRES: object-emission
+
+; RUN: llc -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; IR generated from the following code compiled with clang -g:
+; enum e1 { I, J = 0xffffffffU, K = 0xf000000000000000ULL } a;
+; enum e2 { X };
+; void func() {
+;   int b = X;
+; }
+
+; These values were previously being truncated to -1 and 0 respectively.
+
+; CHECK: debug_info contents
+; CHECK: DW_TAG_enumeration_type
+; CHECK-NEXT: DW_AT_name{{.*}} = "e1"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NEXT: DW_AT_name{{.*}} = "J"
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]     (4294967295)
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NEXT: DW_AT_name{{.*}} = "K"
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]     (-1152921504606846976)
+
+; Check that we retain enums that aren't referenced by any variables, etc
+; CHECK: DW_TAG_enumeration_type
+; CHECK-NEXT: DW_AT_name{{.*}} = "e2"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NEXT: DW_AT_name{{.*}} = "X"
+
+@a = global i64 0, align 8
+
+; Function Attrs: nounwind uwtable
+define void @_Z4funcv() #0 {
+entry:
+  %b = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %b}, metadata !20), !dbg !22
+  store i32 0, i32* %b, align 4, !dbg !22
+  ret void, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !24}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !11, metadata !12, metadata !17, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/enum.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"enum.cpp", metadata !"/tmp"}
+!2 = metadata !{metadata !3, metadata !8}
+!3 = metadata !{i32 786436, metadata !1, null, metadata !"e1", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [e1] [line 1, size 64, align 64, offset 0] [def] [from ]
+!4 = metadata !{metadata !5, metadata !6, metadata !7}
+!5 = metadata !{i32 786472, metadata !"I", i64 0} ; [ DW_TAG_enumerator ] [I :: 0]
+!6 = metadata !{i32 786472, metadata !"J", i64 4294967295} ; [ DW_TAG_enumerator ] [J :: 4294967295]
+!7 = metadata !{i32 786472, metadata !"K", i64 -1152921504606846976} ; [ DW_TAG_enumerator ] [K :: 17293822569102704640]
+!8 = metadata !{i32 786436, metadata !1, null, metadata !"e2", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [e2] [line 2, size 32, align 32, offset 0] [def] [from ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786472, metadata !"X", i64 0} ; [ DW_TAG_enumerator ] [X :: 0]
+!11 = metadata !{i32 0}
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"func", metadata !"func", metadata !"_Z4funcv", i32 3, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4funcv, null, null, metadata !11, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [func]
+!14 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/enum.cpp]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null}
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !14, i32 1, metadata !3, i32 0, i32 1, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!20 = metadata !{i32 786688, metadata !13, metadata !"b", metadata !14, i32 4, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 4]
+!21 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!22 = metadata !{i32 4, i32 0, metadata !13, null}
+!23 = metadata !{i32 5, i32 0, metadata !13, null}
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll
new file mode 100644
index 000000000000..9a0c32ad91a3
--- /dev/null
+++ b/test/DebugInfo/global.ll
@@ -0,0 +1,39 @@
+; REQUIRES: object-emission
+
+; RUN: llc -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; generated from the following source compiled to bitcode with clang -g -O1
+; static int i;
+; int main() {
+;   (void)&i;
+; }
+
+; CHECK: debug_info contents
+; CHECK: DW_TAG_variable
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @main() #0 {
+entry:
+  ret i32 0, !dbg !12
+}
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/global.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"global.cpp", metadata !"/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/global.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"_ZL1i", metadata !5, i32 1, metadata !8, i32 1, i32 1, null, null}
+!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!12 = metadata !{i32 4, i32 0, metadata !4, null}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inheritance.ll b/test/DebugInfo/inheritance.ll
index a689cb228181..6b3ae090e151 100644
--- a/test/DebugInfo/inheritance.ll
+++ b/test/DebugInfo/inheritance.ll
@@ -106,46 +106,49 @@ return:                                           ; preds = %bb2
 declare void @_ZdlPv(i8*) nounwind
 
 !0 = metadata !{i32 459008, metadata !1, metadata !"tst", metadata !4, i32 13, metadata !8} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 458763, metadata !2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 458763, metadata !3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 458798, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"main", metadata !4, i32 11, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 458769, i32 0, i32 4, metadata !"inheritance.cpp", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 458763, metadata !44, metadata !2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 458763, metadata !44, metadata !3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 458798, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"main", i32 11, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 458769, metadata !44, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !45, metadata !45, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 458788, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 458771, metadata !4, metadata !"test1", metadata !4, i32 1, i64 64, i64 64, i64 0, i32 0, null, metadata !9, i32 0, metadata !8} ; [ DW_TAG_structure_type ]
+!7 = metadata !{i32 458788, null, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 458771, metadata !44, metadata !4, metadata !"test1", i32 1, i64 64, i64 64, i64 0, i32 0, null, metadata !9, i32 0, metadata !8, null, null} ; [ DW_TAG_structure_type ] [test1] [line 1, size 64, align 64, offset 0] [def] [from ]
 !9 = metadata !{metadata !10, metadata !14, metadata !18}
-!10 = metadata !{i32 458765, metadata !8, metadata !"_vptr$test1", metadata !4, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
-!11 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 458767, metadata !4, metadata !"__vtbl_ptr_type", metadata !13, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 458769, i32 0, i32 4, metadata !"<built-in>", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!14 = metadata !{i32 458798, i32 0, metadata !8, metadata !"test1", metadata !"test1", metadata !"", metadata !4, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i1 true} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 458765, metadata !44, metadata !8, metadata !"_vptr$test1", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
+!11 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 458767, null, metadata !4, metadata !"__vtbl_ptr_type", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 458769, metadata !46, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !45, metadata !45, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!14 = metadata !{i32 458798, i32 0, metadata !8, metadata !"test1", metadata !"test1", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i1 true, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
-!18 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"", metadata !4, i32 4, metadata !19, i1 false, i1 false, i32 1, i32 0, metadata !8, i1 false} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"", i32 4, metadata !19, i1 false, i1 false, i32 1, i32 0, metadata !8, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !20 = metadata !{null, metadata !17, metadata !7}
 !21 = metadata !{i32 11, i32 0, metadata !1, null}
 !22 = metadata !{i32 13, i32 0, metadata !1, null}
 !23 = metadata !{i32 14, i32 0, metadata !1, null}
 !24 = metadata !{i32 459009, metadata !25, metadata !"this", metadata !4, i32 13, metadata !26} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 458798, i32 0, metadata !4, metadata !"test1", metadata !"test1", metadata !"_ZN5test1C1Ev", metadata !4, i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 458790, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !27} ; [ DW_TAG_const_type ]
-!27 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{i32 458798, i32 0, metadata !4, metadata !"test1", metadata !"test1", metadata !"_ZN5test1C1Ev", i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!26 = metadata !{i32 458790, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !27} ; [ DW_TAG_const_type ]
+!27 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
 !28 = metadata !{i32 1, i32 0, metadata !25, null}
 !29 = metadata !{i32 1, i32 0, metadata !30, null}
-!30 = metadata !{i32 458763, metadata !31, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{i32 458763, metadata !25, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 458763, metadata !44, metadata !31, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 458763, metadata !44, metadata !25, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !32 = metadata !{i32 459009, metadata !33, metadata !"this", metadata !4, i32 4, metadata !26} ; [ DW_TAG_arg_variable ]
-!33 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D1Ev", metadata !4, i32 4, metadata !15, i1 false, i1 true, i32 1, i32 0, metadata !8, i1 false} ; [ DW_TAG_subprogram ]
+!33 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D1Ev", i32 4, metadata !15, i1 false, i1 true, i32 1, i32 0, metadata !8, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !34 = metadata !{i32 4, i32 0, metadata !33, null}
 !35 = metadata !{i32 5, i32 0, metadata !36, null}
-!36 = metadata !{i32 458763, metadata !33, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 458763, metadata !44, metadata !33, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !37 = metadata !{i32 6, i32 0, metadata !36, null}
 !38 = metadata !{i32 459009, metadata !39, metadata !"this", metadata !4, i32 4, metadata !26} ; [ DW_TAG_arg_variable ]
-!39 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D0Ev", metadata !4, i32 4, metadata !15, i1 false, i1 true, i32 1, i32 1, metadata !8, i1 false} ; [ DW_TAG_subprogram ]
+!39 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D0Ev", i32 4, metadata !15, i1 false, i1 true, i32 1, i32 1, metadata !8, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !40 = metadata !{i32 4, i32 0, metadata !39, null}
 !41 = metadata !{i32 5, i32 0, metadata !42, null}
-!42 = metadata !{i32 458763, metadata !39, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 458763, metadata !44, metadata !39, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !43 = metadata !{i32 6, i32 0, metadata !42, null}
+!44 = metadata !{metadata !"inheritance.cpp", metadata !"/tmp/"}
+!45 = metadata !{i32 0}
+!46 = metadata !{metadata !"<built-in>", metadata !"/tmp/"}
diff --git a/test/DebugInfo/inline-debug-info-multiret.ll b/test/DebugInfo/inline-debug-info-multiret.ll
index 108f212a255f..594512f2d8e3 100644
--- a/test/DebugInfo/inline-debug-info-multiret.ll
+++ b/test/DebugInfo/inline-debug-info-multiret.ll
@@ -120,6 +120,7 @@ attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!31}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"<unknown>", metadata !""}
@@ -128,11 +129,11 @@ attributes #2 = { nounwind }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
 !5 = metadata !{metadata !"test.cpp", metadata !""}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9}
 !13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
 !14 = metadata !{i32 4, i32 0, metadata !4, null}
@@ -152,3 +153,4 @@ attributes #2 = { nounwind }
 !28 = metadata !{i32 18, i32 0, metadata !27, null}
 !29 = metadata !{i32 19, i32 0, metadata !10, null}
 !30 = metadata !{i32 20, i32 0, metadata !10, null}
+!31 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inline-debug-info.ll b/test/DebugInfo/inline-debug-info.ll
index 7c3267abcee3..b56ca95b60f5 100644
--- a/test/DebugInfo/inline-debug-info.ll
+++ b/test/DebugInfo/inline-debug-info.ll
@@ -138,6 +138,7 @@ attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!31}
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"<unknown>", metadata !""}
@@ -146,11 +147,11 @@ attributes #2 = { nounwind }
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
 !5 = metadata !{metadata !"test.cpp", metadata !""}
 !6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
-!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9}
 !13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
 !14 = metadata !{i32 4, i32 0, metadata !4, null}
@@ -170,3 +171,4 @@ attributes #2 = { nounwind }
 !28 = metadata !{i32 18, i32 0, metadata !27, null}
 !29 = metadata !{i32 19, i32 0, metadata !10, null}
 !30 = metadata !{i32 20, i32 0, metadata !10, null}
+!31 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/inlined-arguments.ll
new file mode 100644
index 000000000000..1dd5b2c07f94
--- /dev/null
+++ b/test/DebugInfo/inlined-arguments.ll
@@ -0,0 +1,77 @@
+; REQUIRES: object-emission
+
+; RUN: llc -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; IR generated from clang -O -g with the following source
+;
+; void f1(int x, int y);
+; void f3(int line);
+; void f2() {
+;   f1(1, 2);
+; }
+; void f1(int x, int y) {
+;   f3(y);
+; }
+
+; CHECK: DW_AT_name{{.*}}"f1"
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name{{.*}}"x"
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name{{.*}}"y"
+
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 {
+  tail call void @llvm.dbg.value(metadata !15, i64 0, metadata !16), !dbg !18
+  tail call void @llvm.dbg.value(metadata !19, i64 0, metadata !20), !dbg !18
+  tail call void @_Z2f3i(i32 2), !dbg !21
+  ret void, !dbg !22
+}
+
+; Function Attrs: uwtable
+define void @_Z2f1ii(i32 %x, i32 %y) #0 {
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !13), !dbg !23
+  tail call void @llvm.dbg.value(metadata !{i32 %y}, i64 0, metadata !14), !dbg !23
+  tail call void @_Z2f3i(i32 %y), !dbg !24
+  ret void, !dbg !25
+}
+
+declare void @_Z2f3i(i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"exp.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f2", metadata !"f2", metadata !"_Z2f2v", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z2f2v, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f2]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"_Z2f1ii", i32 6, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32)* @_Z2f1ii, null, null, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11, metadata !11}
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13, metadata !14}
+!13 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !5, i32 16777222, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 6]
+!14 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 33554438, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [y] [line 6]
+!15 = metadata !{i32 undef}
+!16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !5, i32 16777222, metadata !11, i32 0, metadata !17} ; [ DW_TAG_arg_variable ] [x] [line 6]
+!17 = metadata !{i32 4, i32 0, metadata !4, null}
+!18 = metadata !{i32 6, i32 0, metadata !8, metadata !17}
+!19 = metadata !{i32 2}
+!20 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 33554438, metadata !11, i32 0, metadata !17} ; [ DW_TAG_arg_variable ] [y] [line 6]
+!21 = metadata !{i32 7, i32 0, metadata !8, metadata !17}
+!22 = metadata !{i32 5, i32 0, metadata !4, null}
+!23 = metadata !{i32 6, i32 0, metadata !8, null}
+!24 = metadata !{i32 7, i32 0, metadata !8, null}
+!25 = metadata !{i32 8, i32 0, metadata !8, null} ; [ DW_TAG_imported_declaration ]
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index 841daaa7f067..34c5101a1426 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -16,17 +16,18 @@ declare void @smth(i32)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !26, i32 4, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !26, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786478, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !26, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9, metadata !9}
 !13 = metadata !{metadata !14}
 !14 = metadata !{metadata !15, metadata !16}
@@ -52,3 +53,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !24 = metadata !{i32 5, i32 3, metadata !10, metadata !19}
 !25 = metadata !{i32 6, i32 3, metadata !10, metadata !19}
 !26 = metadata !{metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202"}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/lit.local.cfg b/test/DebugInfo/lit.local.cfg
deleted file mode 100644
index 00bd9b83b525..000000000000
--- a/test/DebugInfo/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.test']
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 163bd8ecb30b..a8799cfa5460 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -1,17 +1,28 @@
 RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400559" > %t.input
+RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64.debuglink 0x400559" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400436" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64 0x568" >> %t.input
 RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x633" >> %t.input
+RUN: echo "%p/Inputs/macho-universal 0x1f84" >> %t.input
+RUN: echo "%p/Inputs/macho-universal:i386 0x1f67" >> %t.input
+RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input
 
-RUN: llvm-symbolizer --functions --inlining --demangle=false < %t.input \
-RUN:    | FileCheck %s
+RUN: llvm-symbolizer --functions --inlining --demangle=false \
+RUN:    --default-arch=i386 < %t.input | FileCheck %s
 
-REQUIRES: shell
+CHECK:       main
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
 
 CHECK:       main
 CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
+
+CHECK:      _start
+
 CHECK:      _Z1cv
 CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test4-part1.cc:2
+
 CHECK:      inlined_h
 CHECK-NEXT: dwarfdump-inl-test.h:2
 CHECK-NEXT: inlined_g
@@ -21,5 +32,28 @@ CHECK-NEXT: dwarfdump-inl-test.cc:3
 CHECK-NEXT: main
 CHECK-NEXT: dwarfdump-inl-test.cc:
 
+CHECK:      inlined_h
+CHECK-NEXT: dwarfdump-inl-test.h:3
+CHECK-NEXT: inlined_g
+CHECK-NEXT: dwarfdump-inl-test.h:7
+CHECK-NEXT: inlined_f
+CHECK-NEXT: dwarfdump-inl-test.cc:3
+CHECK-NEXT: main
+CHECK-NEXT: dwarfdump-inl-test.cc:
+
 CHECK:       _Z3do1v
 CHECK-NEXT: dwarfdump-test3-decl.h:7
+
+CHECK:      main
+CHECK:      _Z3inci
+CHECK:      _Z3inci
+
+RUN: echo "unexisting-file 0x1234" > %t.input2
+RUN: llvm-symbolizer < %t.input2
+
+RUN: echo "%p/Inputs/macho-universal 0x1f84" > %t.input3
+RUN: llvm-symbolizer < %t.input3 | FileCheck %s --check-prefix=UNKNOWN-ARCH
+
+UNKNOWN-ARCH-NOT: main
+UNKNOWN-ARCH: ??
+UNKNOWN-ARCH-NOT: main
diff --git a/test/DebugInfo/member-order.ll b/test/DebugInfo/member-order.ll
new file mode 100644
index 000000000000..a0c283db5a16
--- /dev/null
+++ b/test/DebugInfo/member-order.ll
@@ -0,0 +1,66 @@
+; REQUIRES: object-emission
+
+; RUN: llc -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; generated by clang from:
+; struct foo {
+;   void f1();
+;   void f2();
+; };
+;
+; void foo::f1() {
+; }
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "foo"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: NULL
+; CHECK: DW_AT_name {{.*}} "f1"
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: NULL
+; CHECK: DW_AT_name {{.*}} "f2"
+
+
+%struct.foo = type { i8 }
+
+; Function Attrs: nounwind uwtable
+define void @_ZN3foo2f1Ev(%struct.foo* %this) #0 align 2 {
+entry:
+  %this.addr = alloca %struct.foo*, align 8
+  store %struct.foo* %this, %struct.foo** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !16), !dbg !18
+  %this1 = load %struct.foo** %this.addr
+  ret void, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !20}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !13, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/member-order.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"member-order.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !11}
+!6 = metadata !{i32 786478, metadata !1, metadata !4, metadata !"f1", metadata !"f1", metadata !"_ZN3foo2f1Ev", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !10, i32 2} ; [ DW_TAG_subprogram ] [line 2] [f1]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
+!10 = metadata !{i32 786468}
+!11 = metadata !{i32 786478, metadata !1, metadata !4, metadata !"f2", metadata !"f2", metadata !"_ZN3foo2f2Ev", i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [f2]
+!12 = metadata !{i32 786468}
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 786478, metadata !1, null, metadata !"f1", metadata !"f1", metadata !"_ZN3foo2f1Ev", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo2f1Ev, null, metadata !6, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
+!15 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!16 = metadata !{i32 786689, metadata !14, metadata !"this", null, i32 16777216, metadata !17, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
+!18 = metadata !{i32 0, i32 0, metadata !14, null}
+!19 = metadata !{i32 7, i32 0, metadata !14, null}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
index 20f4e68e2aa8..0bc4ee67265b 100644
--- a/test/DebugInfo/member-pointers.ll
+++ b/test/DebugInfo/member-pointers.ll
@@ -4,12 +4,12 @@
 ; RUN: llc -filetype=obj -O0 < %s > %t
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; CHECK: DW_TAG_ptr_to_member_type
-; CHECK: [[TYPE:.*]]:   DW_TAG_subroutine_type
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
+; CHECK: [[TYPE]]:   DW_TAG_subroutine_type
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NEXT: DW_AT_type
 ; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag
-; CHECK: DW_TAG_ptr_to_member_type
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
 ; IR generated from clang -g with the following source:
 ; struct S {
 ; };
@@ -21,18 +21,20 @@
 @y = global { i64, i64 } zeroinitializer, align 8
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !15, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !7, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
 !6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
 !8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ]
+!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [def] [from ]
 !10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 5, metadata !11, i32 0, i32 1, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def]
 !11 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !13 = metadata !{null, metadata !14, metadata !8}
-!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
+!14 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
 !15 = metadata !{metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch"}
+!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
index a7dcf7c6150e..9d0b25c74bae 100644
--- a/test/DebugInfo/namespace.ll
+++ b/test/DebugInfo/namespace.ll
@@ -13,16 +13,36 @@
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2:[0-9]]])
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x01)
 ; CHECK-NOT: NULL
-; CHECK: DW_TAG_variable
+; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}= "i"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name{{.*}}= "f1"
+; CHECK: [[FUNC1:0x[0-9a-f]*]]:{{ *}}DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name{{.*}}= "f1"
 ; CHECK: NULL
 ; CHECK-NOT: NULL
+; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name{{.*}}= "foo"
+; CHECK-NEXT: DW_AT_declaration
+; CHECK-NOT: NULL
+; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name{{.*}}= "bar"
+; CHECK: NULL
+; CHECK: NULL
+; CHECK: NULL
+
+; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_module
 ; This is a bug, it should be in F2 but it inherits the file from its
 ; enclosing scope
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x04)
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x08)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
+; CHECK: NULL
+; CHECK-NOT: NULL
 
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NEXT: DW_AT_MIPS_linkage_name
@@ -30,101 +50,203 @@
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_module
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0e)
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x12)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x13)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FOO]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x14)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[BAR]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x15)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC1]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x16)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[I]]})
+; CHECK-NOT: NULL
+; CHECK: [[X:0x[0-9a-f]*]]:{{ *}}DW_TAG_imported_module
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x18)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK-NEXT: DW_AT_name{{.*}}"X"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_module
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x19)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[X]]})
+; CHECK-NEXT: DW_AT_name{{.*}}"Y"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_lexical_block
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_module
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b)
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0f)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
+; CHECK: NULL
+; CHECK: NULL
+; CHECK-NOT: NULL
+
+; CHECK: DW_TAG_imported_module
+; Same bug as above, this should be F2, not F1
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
 
 ; CHECK: file_names[  [[F1]]]{{.*}}debug-info-namespace.cpp
 ; CHECK: file_names[  [[F2]]]{{.*}}foo.cpp
 
 ; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths
-; changed to protect the guilty. The C++ source code is simply:
+; changed to protect the guilty. The C++ source code is:
 ; namespace A {
 ; #line 1 "foo.cpp"
 ; namespace B {
 ; int i;
+; void f1() { }
+; void f1(int) { }
+; struct foo;
+; struct bar { };
 ; }
 ; using namespace B;
 ; }
 ;
 ; using namespace A;
-; 
+;
 ; int func(bool b) {
 ;   if (b) {
 ;     using namespace A::B;
 ;     return i;
 ;   }
 ;   using namespace A;
-;   return B::i;
+;   using B::foo;
+;   using B::bar;
+;   using B::f1;
+;   using B::i;
+;   bar x;
+;   namespace X = A;
+;   namespace Y = X;
+;   return i + X::B::i + Y::B::i;
 ; }
 
+%"struct.A::B::bar" = type { i8 }
+
 @_ZN1A1B1iE = global i32 0, align 4
 
 ; Function Attrs: nounwind uwtable
+define void @_ZN1A1B2f1Ev() #0 {
+entry:
+  ret void, !dbg !41
+}
+
+; Function Attrs: nounwind uwtable
+define void @_ZN1A1B2f1Ei(i32) #0 {
+entry:
+  %.addr = alloca i32, align 4
+  store i32 %0, i32* %.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %.addr}, metadata !42), !dbg !43
+  ret void, !dbg !43
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind uwtable
 define i32 @_Z4funcb(i1 zeroext %b) #0 {
 entry:
   %retval = alloca i32, align 4
   %b.addr = alloca i8, align 1
+  %x = alloca %"struct.A::B::bar", align 1
   %frombool = zext i1 %b to i8
   store i8 %frombool, i8* %b.addr, align 1
-  call void @llvm.dbg.declare(metadata !{i8* %b.addr}, metadata !21), !dbg !22
-  %0 = load i8* %b.addr, align 1, !dbg !23
-  %tobool = trunc i8 %0 to i1, !dbg !23
-  br i1 %tobool, label %if.then, label %if.end, !dbg !23
+  call void @llvm.dbg.declare(metadata !{i8* %b.addr}, metadata !44), !dbg !45
+  %0 = load i8* %b.addr, align 1, !dbg !46
+  %tobool = trunc i8 %0 to i1, !dbg !46
+  br i1 %tobool, label %if.then, label %if.end, !dbg !46
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* @_ZN1A1B1iE, align 4, !dbg !24
-  store i32 %1, i32* %retval, !dbg !24
-  br label %return, !dbg !24
+  %1 = load i32* @_ZN1A1B1iE, align 4, !dbg !47
+  store i32 %1, i32* %retval, !dbg !47
+  br label %return, !dbg !47
 
 if.end:                                           ; preds = %entry
-  %2 = load i32* @_ZN1A1B1iE, align 4, !dbg !25
-  store i32 %2, i32* %retval, !dbg !25
-  br label %return, !dbg !25
+  call void @llvm.dbg.declare(metadata !{%"struct.A::B::bar"* %x}, metadata !48), !dbg !49
+  %2 = load i32* @_ZN1A1B1iE, align 4, !dbg !50
+  %3 = load i32* @_ZN1A1B1iE, align 4, !dbg !50
+  %add = add nsw i32 %2, %3, !dbg !50
+  %4 = load i32* @_ZN1A1B1iE, align 4, !dbg !50
+  %add1 = add nsw i32 %add, %4, !dbg !50
+  store i32 %add1, i32* %retval, !dbg !50
+  br label %return, !dbg !50
 
 return:                                           ; preds = %if.end, %if.then
-  %3 = load i32* %retval, !dbg !26
-  ret i32 %3, !dbg !26
+  %5 = load i32* %retval, !dbg !51
+  ret i32 %5, !dbg !51
 }
 
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!52}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !11, metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/llvm/src/tools/clang//usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang"}
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !19, metadata !21, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug//usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"func", metadata !"func", metadata !"_Z4funcb", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i1)* @_Z4funcb, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [func]
-!5 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
-!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !6, i32 2, metadata !9, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def]
-!13 = metadata !{i32 786489, metadata !5, metadata !14, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
-!14 = metadata !{i32 786489, metadata !1, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
-!15 = metadata !{metadata !16, metadata !17, metadata !18, metadata !20}
-!16 = metadata !{i32 786490, metadata !14, metadata !13, i32 4} ; [ DW_TAG_imported_module ]
-!17 = metadata !{i32 786490, metadata !0, metadata !14, i32 7} ; [ DW_TAG_imported_module ]
-!18 = metadata !{i32 786490, metadata !19, metadata !13, i32 11} ; [ DW_TAG_imported_module ]
-!19 = metadata !{i32 786443, metadata !5, metadata !4, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
-!20 = metadata !{i32 786490, metadata !4, metadata !14, i32 14} ; [ DW_TAG_imported_module ]
-!21 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !6, i32 16777225, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 9]
-!22 = metadata !{i32 9, i32 0, metadata !4, null}
-!23 = metadata !{i32 10, i32 0, metadata !4, null}
-!24 = metadata !{i32 12, i32 0, metadata !19, null}
-!25 = metadata !{i32 15, i32 0, metadata !4, null}
-!26 = metadata !{i32 16, i32 0, metadata !4, null}
+!3 = metadata !{metadata !4, metadata !10, metadata !14}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f1", metadata !"f1", metadata !"_ZN1A1B2f1Ev", i32 3, metadata !8, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN1A1B2f1Ev, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f1]
+!5 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
+!6 = metadata !{i32 786489, metadata !5, metadata !7, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
+!7 = metadata !{i32 786489, metadata !1, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
+!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{null}
+!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f1", metadata !"f1", metadata !"_ZN1A1B2f1Ei", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_ZN1A1B2f1Ei, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [f1]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{null, metadata !13}
+!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = metadata !{i32 786478, metadata !5, metadata !15, metadata !"func", metadata !"func", metadata !"_Z4funcb", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i1)* @_Z4funcb, null, null, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [func]
+!15 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
+!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{metadata !13, metadata !18}
+!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786484, i32 0, metadata !6, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !15, i32 2, metadata !13, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def]
+!21 = metadata !{metadata !22, metadata !23, metadata !24, metadata !26, metadata !27, metadata !29, metadata !37, metadata !38, metadata !39, metadata !40}
+!22 = metadata !{i32 786490, metadata !7, metadata !6, i32 8} ; [ DW_TAG_imported_module ]
+!23 = metadata !{i32 786490, metadata !0, metadata !7, i32 11} ; [ DW_TAG_imported_module ]
+!24 = metadata !{i32 786490, metadata !25, metadata !6, i32 15} ; [ DW_TAG_imported_module ]
+!25 = metadata !{i32 786443, metadata !5, metadata !14, i32 14, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
+!26 = metadata !{i32 786490, metadata !14, metadata !7, i32 18} ; [ DW_TAG_imported_module ]
+!27 = metadata !{i32 786440, metadata !14, metadata !28, i32 19} ; [ DW_TAG_imported_declaration ]
+!28 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"foo", i32 5, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 5, size 0, align 0, offset 0] [decl] [from ]
+!29 = metadata !{i32 786440, metadata !14, metadata !30, i32 20} ; [ DW_TAG_imported_declaration ]
+!30 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"bar", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 8, align 8, offset 0] [def] [from ]
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786478, metadata !5, metadata !30, metadata !"bar", metadata !"bar", metadata !"", i32 6, metadata !33, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !36, i32 6} ; [ DW_TAG_subprogram ] [line 6] [bar]
+!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = metadata !{null, metadata !35}
+!35 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
+!36 = metadata !{i32 786468}
+!37 = metadata !{i32 786440, metadata !14, metadata !10, i32 21} ; [ DW_TAG_imported_declaration ]
+!38 = metadata !{i32 786440, metadata !14, metadata !20, i32 22} ; [ DW_TAG_imported_declaration ]
+!39 = metadata !{i32 786490, metadata !14, metadata !7, i32 24, metadata !"X"} ; [ DW_TAG_imported_module ]
+!40 = metadata !{i32 786490, metadata !14, metadata !39, i32 25, metadata !"Y"} ; [ DW_TAG_imported_module ]
+!41 = metadata !{i32 3, i32 0, metadata !4, null}
+!42 = metadata !{i32 786689, metadata !10, metadata !"", metadata !15, i32 16777220, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 4]
+!43 = metadata !{i32 4, i32 0, metadata !10, null}
+!44 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !15, i32 16777229, metadata !18, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 13]
+!45 = metadata !{i32 13, i32 0, metadata !14, null}
+!46 = metadata !{i32 14, i32 0, metadata !14, null}
+!47 = metadata !{i32 16, i32 0, metadata !25, null}
+!48 = metadata !{i32 786688, metadata !14, metadata !"x", metadata !15, i32 23, metadata !30, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 23]
+!49 = metadata !{i32 23, i32 0, metadata !14, null}
+!50 = metadata !{i32 26, i32 0, metadata !14, null}
+!51 = metadata !{i32 27, i32 0, metadata !14, null}
+!52 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/template-recursive-void.ll b/test/DebugInfo/template-recursive-void.ll
new file mode 100644
index 000000000000..2ed57a6576f3
--- /dev/null
+++ b/test/DebugInfo/template-recursive-void.ll
@@ -0,0 +1,65 @@
+; REQUIRES: object-emission
+
+; RUN: llc -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; This was pulled from clang's debug-info-template-recursive.cpp test.
+; class base { };
+
+; template <class T> class foo : public base  {
+;   void operator=(const foo r) { }
+; };
+
+; class bar : public foo<void> { };
+; bar filters;
+
+; CHECK: DW_TAG_template_type_parameter [{{.*}}]
+; CHECK-NEXT: DW_AT_name{{.*}}"T"
+; CHECK-NOT: DW_AT_type
+; CHECK: NULL
+
+%class.bar = type { i8 }
+
+@filters = global %class.bar zeroinitializer, align 1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!36, !37}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 187958) (llvm/trunk 187964)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"debug-info-template-recursive.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"filters", metadata !"filters", metadata !"", metadata !5, i32 10, metadata !6, i32 0, i32 1, %class.bar* @filters, null} ; [ DW_TAG_variable ] [filters] [line 10] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp]
+!6 = metadata !{i32 786434, metadata !1, null, metadata !"bar", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 8, align 8, offset 0] [def] [from ]
+!7 = metadata !{metadata !8, metadata !31}
+!8 = metadata !{i32 786460, null, metadata !6, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from foo<void>]
+!9 = metadata !{i32 786434, metadata !1, null, metadata !"foo<void>", i32 5, i64 8, i64 8, i32 0, i32 0, null, metadata !10, i32 0, null, metadata !29, null} ; [ DW_TAG_class_type ] [foo<void>] [line 5, size 8, align 8, offset 0] [def] [from ]
+!10 = metadata !{metadata !11, metadata !19, metadata !25}
+!11 = metadata !{i32 786460, null, metadata !9, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from base]
+!12 = metadata !{i32 786434, metadata !1, null, metadata !"base", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_class_type ] [base] [line 3, size 8, align 8, offset 0] [def] [from ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"base", metadata !"base", metadata !"", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [base]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !17}
+!17 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from base]
+!18 = metadata !{i32 786468}
+!19 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"operator=", metadata !"operator=", metadata !"_ZN3fooIvEaSES0_", i32 6, metadata !20, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !24, i32 6} ; [ DW_TAG_subprogram ] [line 6] [private] [operator=]
+!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = metadata !{null, metadata !22, metadata !23}
+!22 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo<void>]
+!23 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo<void>]
+!24 = metadata !{i32 786468}
+!25 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !26, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !28, i32 5} ; [ DW_TAG_subprogram ] [line 5] [foo]
+!26 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = metadata !{null, metadata !22}
+!28 = metadata !{i32 786468}
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 786479, null, metadata !"T", null, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!31 = metadata !{i32 786478, metadata !1, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 9, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !35, i32 9} ; [ DW_TAG_subprogram ] [line 9] [bar]
+!32 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = metadata !{null, metadata !34}
+!34 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
+!35 = metadata !{i32 786468}
+!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!37 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/tu-composite.ll b/test/DebugInfo/tu-composite.ll
new file mode 100644
index 000000000000..f838eca72a67
--- /dev/null
+++ b/test/DebugInfo/tu-composite.ll
@@ -0,0 +1,185 @@
+; REQUIRES: object-emission
+
+; RUN: llc -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: [[TYPE:.*]]: DW_TAG_structure_type
+; Make sure we correctly handle containing type of a struct being a type identifier.
+; CHECK-NEXT: DW_AT_containing_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "C")
+
+; Make sure we correctly handle context of a subprogram being a type identifier.
+; CHECK: [[SP:.*]]: DW_TAG_subprogram
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "foo")
+; Make sure we correctly handle containing type of a subprogram being a type identifier.
+; CHECK: DW_AT_containing_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
+; CHECK: DW_TAG_formal_parameter
+; CHECK: NULL
+; CHECK: NULL
+
+; CHECK: [[TYPE2:.*]]: DW_TAG_structure_type
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "bar")
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "D")
+; CHECK: DW_TAG_member
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "a") 
+; Make sure we correctly handle context of a struct being a type identifier.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested")
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested2")
+; CHECK-NEXT: DW_AT_declaration [DW_FORM_flag]      (0x01)
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "virt<bar>")
+; Make sure we correctly handle type of a template_type being a type identifier.
+; CHECK: DW_TAG_template_type_parameter
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "T")
+; Make sure we correctly handle derived-from of a typedef being a type identifier.
+; CHECK: DW_TAG_typedef
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz2")
+; Make sure we correctly handle derived-from of a pointer type being a type identifier.
+; CHECK: DW_TAG_pointer_type
+; CHECK: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
+; CHECK: DW_TAG_typedef
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz")
+; Make sure we correctly handle derived-from of an array type being a type identifier.
+; CHECK: DW_TAG_array_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; IR generated from clang -g with the following source:
+; struct C {
+;   virtual void foo();
+; };
+; void C::foo() {
+; }
+;
+; struct bar { };
+; typedef bar baz;
+; struct D {
+;   typedef bar baz2;
+;   static int a;
+;   struct Nested { };
+;   struct Nested2 { };
+;   template <typename T>
+;   struct virt {
+;     T* values;
+;   };
+; };
+; void test() {
+;   baz B;
+;   bar A[3];
+;   D::baz2 B2;
+;   D::Nested e;
+;   D::Nested2 *p;
+;   D::virt<bar> t;
+; }
+
+%struct.C = type { i32 (...)** }
+%struct.bar = type { i8 }
+%"struct.D::Nested" = type { i8 }
+%"struct.D::Nested2" = type { i8 }
+%"struct.D::virt" = type { %struct.bar* }
+
+@_ZTV1C = unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1C to i8*), i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS1C = constant [3 x i8] c"1C\00"
+@_ZTI1C = unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1C, i32 0, i32 0) }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1C3fooEv(%struct.C* %this) unnamed_addr #0 align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !36), !dbg !38
+  %this1 = load %struct.C** %this.addr
+  ret void, !dbg !39
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4testv() #0 {
+entry:
+  %B = alloca %struct.bar, align 1
+  %A = alloca [3 x %struct.bar], align 1
+  %B2 = alloca %struct.bar, align 1
+  %e = alloca %"struct.D::Nested", align 1
+  %p = alloca %"struct.D::Nested2"*, align 8
+  %t = alloca %"struct.D::virt", align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %B}, metadata !40), !dbg !42
+  call void @llvm.dbg.declare(metadata !{[3 x %struct.bar]* %A}, metadata !43), !dbg !47
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %B2}, metadata !48), !dbg !50
+  call void @llvm.dbg.declare(metadata !{%"struct.D::Nested"* %e}, metadata !51), !dbg !52
+  call void @llvm.dbg.declare(metadata !{%"struct.D::Nested2"** %p}, metadata !53), !dbg !55
+  call void @llvm.dbg.declare(metadata !{%"struct.D::virt"* %t}, metadata !56), !dbg !57
+  ret void, !dbg !58
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !59}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !30, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [tmp.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tmp.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !18, metadata !19, metadata !22, metadata !23, metadata !24}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !5, i32 0, metadata !"_ZTS1C", null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 64, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !13}
+!6 = metadata !{i32 786445, metadata !1, metadata !7, metadata !"_vptr$C", i32 0, i64 64, i64 0, i64 0, i32 64, metadata !8} ; [ DW_TAG_member ] [_vptr$C] [line 0, size 64, align 0, offset 0] [artificial] [from ]
+!7 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [tmp.cpp]
+!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
+!9 = metadata !{i32 786447, null, null, metadata !"__vtbl_ptr_type", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
+!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"foo", metadata !"foo", metadata !"_ZN1C3fooEv", i32 2, metadata !14, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1C", i32 256, i1 false, null, null, i32 0, metadata !17, i32 2} ; [ DW_TAG_subprogram ] [line 2] [foo]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{null, metadata !16}
+!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!17 = metadata !{i32 786468}
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"bar", i32 7, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 7, size 8, align 8, offset 0] [def] [from ]
+!19 = metadata !{i32 786451, metadata !1, null, metadata !"D", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !20, i32 0, null, null, metadata !"_ZTS1D"} ; [ DW_TAG_structure_type ] [D] [line 9, size 8, align 8, offset 0] [def] [from ]
+!20 = metadata !{metadata !21}
+!21 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1D", metadata !"a", i32 11, i64 0, i64 0, i64 0, i32 4096, metadata !12, null} ; [ DW_TAG_member ] [a] [line 11, size 0, align 0, offset 0] [static] [from int]
+!22 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"Nested", i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTSN1D6NestedE"} ; [ DW_TAG_structure_type ] [Nested] [line 12, size 8, align 8, offset 0] [def] [from ]
+!23 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"Nested2", i32 13, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTSN1D7Nested2E"} ; [ DW_TAG_structure_type ] [Nested2] [line 13, size 0, align 0, offset 0] [decl] [from ]
+!24 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"virt<bar>", i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !25, i32 0, null, metadata !28, metadata !"_ZTSN1D4virtI3barEE"} ; [ DW_TAG_structure_type ] [virt<bar>] [line 15, size 64, align 64, offset 0] [def] [from ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN1D4virtI3barEE", metadata !"values", i32 16, i64 64, i64 64, i64 0, i32 0, metadata !27} ; [ DW_TAG_member ] [values] [line 16, size 64, align 64, offset 0] [from ]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3bar]
+!28 = metadata !{metadata !29}
+!29 = metadata !{i32 786479, null, metadata !"T", metadata !"_ZTS3bar", null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!30 = metadata !{metadata !31, metadata !32}
+!31 = metadata !{i32 786478, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"_ZN1C3fooEv", i32 4, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C3fooEv, null, metadata !13, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [foo]
+!32 = metadata !{i32 786478, metadata !1, metadata !7, metadata !"test", metadata !"test", metadata !"_Z4testv", i32 20, metadata !33, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4testv, null, null, metadata !2, i32 20} ; [ DW_TAG_subprogram ] [line 20] [def] [test]
+!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = metadata !{null}
+!35 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!36 = metadata !{i32 786689, metadata !31, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!37 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!38 = metadata !{i32 0, i32 0, metadata !31, null}
+!39 = metadata !{i32 5, i32 0, metadata !31, null}
+!40 = metadata !{i32 786688, metadata !32, metadata !"B", metadata !7, i32 21, metadata !41, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [B] [line 21]
+!41 = metadata !{i32 786454, metadata !1, null, metadata !"baz", i32 8, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz] [line 8, size 0, align 0, offset 0] [from _ZTS3bar]
+!42 = metadata !{i32 21, i32 0, metadata !32, null}
+!43 = metadata !{i32 786688, metadata !32, metadata !"A", metadata !7, i32 22, metadata !44, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [A] [line 22]
+!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 24, i64 8, i32 0, i32 0, metadata !"_ZTS3bar", metadata !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 24, align 8, offset 0] [from _ZTS3bar]
+!45 = metadata !{metadata !46}
+!46 = metadata !{i32 786465, i64 0, i64 3}        ; [ DW_TAG_subrange_type ] [0, 2]
+!47 = metadata !{i32 22, i32 0, metadata !32, null}
+!48 = metadata !{i32 786688, metadata !32, metadata !"B2", metadata !7, i32 23, metadata !49, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [B2] [line 23]
+!49 = metadata !{i32 786454, metadata !1, metadata !"_ZTS1D", metadata !"baz2", i32 10, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz2] [line 10, size 0, align 0, offset 0] [from _ZTS3bar]
+!50 = metadata !{i32 23, i32 0, metadata !32, null}
+!51 = metadata !{i32 786688, metadata !32, metadata !"e", metadata !7, i32 24, metadata !22, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 24]
+!52 = metadata !{i32 24, i32 0, metadata !32, null}
+!53 = metadata !{i32 786688, metadata !32, metadata !"p", metadata !7, i32 25, metadata !54, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [p] [line 25]
+!54 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTSN1D7Nested2E"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTSN1D7Nested2E]
+!55 = metadata !{i32 25, i32 0, metadata !32, null}
+!56 = metadata !{i32 786688, metadata !32, metadata !"t", metadata !7, i32 26, metadata !24, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 26]
+!57 = metadata !{i32 26, i32 0, metadata !32, null}
+!58 = metadata !{i32 27, i32 0, metadata !32, null}
+!59 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/tu-member-pointer.ll b/test/DebugInfo/tu-member-pointer.ll
new file mode 100644
index 000000000000..b746d3b6d4f1
--- /dev/null
+++ b/test/DebugInfo/tu-member-pointer.ll
@@ -0,0 +1,30 @@
+; REQUIRES: object-emission
+
+; RUN: llc -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
+; CHECK: [[TYPE]]:   DW_TAG_base_type
+; IR generated from clang -g with the following source:
+; struct Foo {
+;   int e;
+; };
+; int Foo:*x = 0;
+
+@x = global i64 -1, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !5, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"Foo", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS3Foo"} ; [ DW_TAG_structure_type ] [Foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !7, i32 4, metadata !8, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!7 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [foo.cpp]
+!8 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9, metadata !"_ZTS3Foo"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
index 22cf4eb0fd37..8589840a6955 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -33,34 +33,35 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0, !9}
+!llvm.module.flags = !{!33}
 
 !0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !32, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !10 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786478, metadata !32, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !14 = metadata !{metadata !15, metadata !15, metadata !16}
-!15 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ]
-!17 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ]
-!18 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ]
+!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !19 = metadata !{metadata !20}
 !20 = metadata !{metadata !21, metadata !22}
 !21 = metadata !{i32 786689, metadata !12, metadata !"argc", metadata !6, i32 16777227, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 786689, metadata !12, metadata !"argv", metadata !6, i32 33554443, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !23 = metadata !{i32 6, i32 3, metadata !24, null}
-!24 = metadata !{i32 786443, metadata !5, i32 5, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 786443, metadata !32, metadata !5, i32 5, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !25 = metadata !{i32 7, i32 1, metadata !24, null}
 !26 = metadata !{i32 11, i32 14, metadata !12, null}
 !27 = metadata !{i32 11, i32 26, metadata !12, null}
 !28 = metadata !{i32 12, i32 3, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !12, i32 11, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 786443, metadata !32, metadata !12, i32 11, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 13, i32 3, metadata !29, null}
 !31 = metadata !{i32 14, i32 3, metadata !29, null}
 !32 = metadata !{metadata !"foo.c", metadata !"/tmp"}
@@ -70,3 +71,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: {{DW_TAG_compile_unit}}
 ; CHECK: {{foo\.c}}
 
+!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/version.ll b/test/DebugInfo/version.ll
new file mode 100644
index 000000000000..f4dde0a24988
--- /dev/null
+++ b/test/DebugInfo/version.ll
@@ -0,0 +1,32 @@
+; REQUIRES: object-emission
+
+; RUN: llc -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Make sure we are generating DWARF version 3 when module flag says so.
+; CHECK: Compile Unit: length = {{.*}} version = 0x0003
+
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185475)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"CodeGen/dwarf-version.c", metadata !"test"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!10 = metadata !{i32 7, i32 0, metadata !4, null}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/ExecutionEngine/MCJIT/Inputs/cross-module-b.ll b/test/ExecutionEngine/MCJIT/Inputs/cross-module-b.ll
new file mode 100644
index 000000000000..687011741103
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/cross-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FA()
+
+define i32 @FB() {
+  %r = call i32 @FA( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/Inputs/multi-module-b.ll b/test/ExecutionEngine/MCJIT/Inputs/multi-module-b.ll
new file mode 100644
index 000000000000..103b601e7f08
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/multi-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FC()
+
+define i32 @FB() {
+  %r = call i32 @FC( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/Inputs/multi-module-c.ll b/test/ExecutionEngine/MCJIT/Inputs/multi-module-c.ll
new file mode 100644
index 000000000000..b39306be9e3b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/multi-module-c.ll
@@ -0,0 +1,4 @@
+define i32 @FC() {
+  ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/Inputs/multi-module-eh-b.ll b/test/ExecutionEngine/MCJIT/Inputs/multi-module-eh-b.ll
new file mode 100644
index 000000000000..d7dbb032b5d0
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/Inputs/multi-module-eh-b.ll
@@ -0,0 +1,30 @@
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+define void @throwException_B() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @FB() {
+entry:
+  invoke void @throwException_B()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/cross-module-a.ll b/test/ExecutionEngine/MCJIT/cross-module-a.ll
new file mode 100644
index 000000000000..fe8d3864c9b6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/cross-module-a.ll
@@ -0,0 +1,13 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
new file mode 100644
index 000000000000..ee26702dfae6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
@@ -0,0 +1,14 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
new file mode 100644
index 000000000000..7c0227d74ed6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -0,0 +1,32 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=large %s
+; XFAIL: cygwin, win32, mingw, mips, powerpc64, i686, i386, aarch64, arm
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+define void @throwException() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @main() {
+entry:
+  invoke void @throwException()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
new file mode 100644
index 000000000000..00c2bb056ff3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
@@ -0,0 +1,32 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s
+; XFAIL: cygwin, win32, mingw, mips, i686, i386, darwin, aarch64, arm
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+define void @throwException() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @main() {
+entry:
+  invoke void @throwException()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/hello-sm-pic.ll b/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
new file mode 100644
index 000000000000..115846c78642
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
@@ -0,0 +1,12 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, darwin, aarch64, arm
+
+@.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%reg210 = call i32 @puts( i8* getelementptr ([12 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
index 30ed4e87e6c2..fdb36ee1d71c 100644
--- a/test/ExecutionEngine/MCJIT/lit.local.cfg
+++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -1,12 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
+root = config.root
 targets = set(root.targets_to_build.split())
 if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \
    ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets):
@@ -14,12 +6,21 @@ if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \
 else:
     config.unsupported = True
 
+# FIXME: autoconf and cmake produce different arch names. We should normalize
+# them before getting here.
 if root.host_arch not in ['i386', 'x86', 'x86_64',
-                          'AArch64', 'ARM', 'Mips', 'PowerPC', 'SystemZ']:
+                          'AArch64', 'ARM', 'Mips', 'PowerPC', 'ppc64', 'SystemZ']:
     config.unsupported = True
 
-if 'i386-apple-darwin'  in root.target_triple:
+if 'armv7' in root.host_arch:
+    config.unsupported = False
+
+if 'i386-apple-darwin' in root.target_triple:
     config.unsupported = True
 
 if 'powerpc' in root.target_triple and not 'powerpc64' in root.target_triple:
     config.unsupported = True
+
+# ExecutionEngine tests are not expected to pass in a cross-compilation setup.
+if 'native' not in config.available_features:
+    config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/multi-module-a.ll b/test/ExecutionEngine/MCJIT/multi-module-a.ll
new file mode 100644
index 000000000000..8848ca6117f4
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/multi-module-a.ll
@@ -0,0 +1,9 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
new file mode 100644
index 000000000000..66fafc924acd
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
@@ -0,0 +1,35 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-eh-b.ll %s
+; XFAIL: arm, cygwin, win32, mingw
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+declare i32 @FB()
+
+define void @throwException() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @main() {
+entry:
+  invoke void @throwException()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  %r = call i32 @FB( )
+  ret i32 %r
+}
diff --git a/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
new file mode 100644
index 000000000000..f2fa59f4821f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
@@ -0,0 +1,10 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/Inputs/cross-module-b.ll b/test/ExecutionEngine/MCJIT/remote/Inputs/cross-module-b.ll
new file mode 100644
index 000000000000..687011741103
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/Inputs/cross-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FA()
+
+define i32 @FB() {
+  %r = call i32 @FA( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-b.ll b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-b.ll
new file mode 100644
index 000000000000..103b601e7f08
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-b.ll
@@ -0,0 +1,7 @@
+declare i32 @FC()
+
+define i32 @FB() {
+  %r = call i32 @FC( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-c.ll b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-c.ll
new file mode 100644
index 000000000000..b39306be9e3b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/Inputs/multi-module-c.ll
@@ -0,0 +1,4 @@
+define i32 @FC() {
+  ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
new file mode 100644
index 000000000000..094d362262c5
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
@@ -0,0 +1,13 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
new file mode 100644
index 000000000000..bdaa9a045c8f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
@@ -0,0 +1,14 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @FA() {
+  ret i32 0
+}
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/lit.local.cfg b/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
new file mode 100644
index 000000000000..6b192ae44be7
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
@@ -0,0 +1,3 @@
+if 'armv4' in config.root.target_triple or \
+        'armv5' in config.root.target_triple:
+    config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
new file mode 100644
index 000000000000..91d0387376ca
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
@@ -0,0 +1,9 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
new file mode 100644
index 000000000000..73228e458f04
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
@@ -0,0 +1,10 @@
+; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386, arm
+
+declare i32 @FB()
+
+define i32 @main() {
+  %r = call i32 @FB( )   ; <i32> [#uses=1]
+  ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
new file mode 100644
index 000000000000..d10a4117a0dd
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
@@ -0,0 +1,10 @@
+; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
+
+define i32 @bar() {
+	ret i32 0
+}
+
+define i32 @main() {
+	%r = call i32 @bar( )		; <i32> [#uses=1]
+	ret i32 %r
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
new file mode 100644
index 000000000000..97932bc389ac
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
@@ -0,0 +1,37 @@
+; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target %s
+; XFAIL: *
+; This test should fail until remote symbol resolution is supported.
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
new file mode 100644
index 000000000000..88faf21adb1b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
@@ -0,0 +1,37 @@
+; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
+; XFAIL: *
+; This function should fail until remote symbol resolution is supported.
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
new file mode 100644
index 000000000000..63280895a9a3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
@@ -0,0 +1,88 @@
+; RUN: %lli_mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target %s
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+;
+; int main()
+; {
+;     zero_arr[zero_int + 5] = 40;
+;
+;     if (zero_double < 1.0)
+;         zero_arr[zero_int + 2] = 70;
+;
+;     for (int i = 1; i < 10; ++i) {
+;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+;     }
+;     return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4
+  %add = add nsw i32 %0, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+  store i32 40, i32* %arrayidx, align 4
+  %1 = load double* @zero_double, align 8
+  %cmp = fcmp olt double %1, 1.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4
+  %add1 = add nsw i32 %2, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+  store i32 70, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4
+  %cmp4 = icmp slt i32 %3, 10
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 %4, 1
+  %idxprom5 = sext i32 %sub to i64
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+  %5 = load i32* %arrayidx6, align 4
+  %6 = load i32* %i, align 4
+  %idxprom7 = sext i32 %6 to i64
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+  %7 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %5, %7
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+  store i32 %add9, i32* %arrayidx11, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %cmp12 = icmp eq i32 %10, 110
+  %cond = select i1 %cmp12, i32 0, i32 -1
+  ret i32 %cond
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
new file mode 100644
index 000000000000..6b2b97bc2d7e
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
@@ -0,0 +1,15 @@
+; RUN:  %lli_mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target %s
+
+; Check that a variable is always aligned as specified.
+
+@var = global i32 0, align 32
+define i32 @main() {
+  %addr = ptrtoint i32* @var to i64
+  %mask = and i64 %addr, 31
+  %tst = icmp eq i64 %mask, 0
+  br i1 %tst, label %good, label %bad
+good:
+  ret i32 0
+bad:
+  ret i32 1
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
new file mode 100644
index 000000000000..a8a93a8dc3bf
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
@@ -0,0 +1,20 @@
+; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %Q, double* %DP
+	ret double %Y
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
new file mode 100644
index 000000000000..4181fb08842c
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
@@ -0,0 +1,34 @@
+; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target %s > /dev/null
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
new file mode 100644
index 000000000000..29ab24b39336
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -0,0 +1,35 @@
+; RUN: %lli_mcjit -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
new file mode 100644
index 000000000000..8b562972b5d1
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
@@ -0,0 +1,15 @@
+; RUN: %lli_mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target %s
+
+@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
+@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
+@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %0 = load i8** @ptr, align 4
+  %1 = load i8** @ptr2, align 4
+  %cmp = icmp eq i8* %0, %1
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
new file mode 100644
index 000000000000..bad026fe7d4d
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -0,0 +1,17 @@
+; RUN: %lli_mcjit -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
+@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
+@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %0 = load i8** @ptr, align 4
+  %1 = load i8** @ptr2, align 4
+  %cmp = icmp eq i8* %0, %1
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
+
diff --git a/test/ExecutionEngine/MCJIT/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/simpletest-remote.ll
deleted file mode 100644
index 9ceaf545c5e6..000000000000
--- a/test/ExecutionEngine/MCJIT/simpletest-remote.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
-; XFAIL: arm, mips
-
-define i32 @bar() {
-	ret i32 0
-}
-
-define i32 @main() {
-	%r = call i32 @bar( )		; <i32> [#uses=1]
-	ret i32 %r
-}
-
diff --git a/test/ExecutionEngine/MCJIT/stubs-remote.ll b/test/ExecutionEngine/MCJIT/stubs-remote.ll
deleted file mode 100644
index 15cb5d037efc..000000000000
--- a/test/ExecutionEngine/MCJIT/stubs-remote.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false %s
-; XFAIL: arm, mips
-
-define i32 @main() nounwind {
-entry:
-	call void @lazily_compiled_address_is_consistent()
-	ret i32 0
-}
-
-; Test PR3043: @test should have the same address before and after
-; it's JIT-compiled.
-@funcPtr = common global i1 ()* null, align 4
-@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
-
-define void @lazily_compiled_address_is_consistent() nounwind {
-entry:
-	store i1 ()* @test, i1 ()** @funcPtr
-	%pass = tail call i1 @test()		; <i32> [#uses=1]
-	br i1 %pass, label %pass_block, label %fail_block
-pass_block:
-	ret void
-fail_block:
-	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
-	call void @exit(i32 1)
-	unreachable
-}
-
-define i1 @test() nounwind {
-entry:
-	%tmp = load i1 ()** @funcPtr
-	%eq = icmp eq i1 ()* %tmp, @test
-	ret i1 %eq
-}
-
-declare i32 @puts(i8*) noreturn
-declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
new file mode 100644
index 000000000000..9e214f5d4753
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
@@ -0,0 +1,36 @@
+; RUN: %lli_mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
+; XFAIL: mips, i686, i386, aarch64, arm
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
deleted file mode 100644
index 3b8ee9dd1cce..000000000000
--- a/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
+++ /dev/null
@@ -1,89 +0,0 @@
-; RUN: %lli_mcjit -remote-mcjit -O0 -disable-lazy-compilation=false %s
-; XFAIL: arm, mips
-
-; The intention of this test is to verify that symbols mapped to COMMON in ELF
-; work as expected.
-;
-; Compiled from this C code:
-;
-; int zero_int;
-; double zero_double;
-; int zero_arr[10];
-; 
-; int main()
-; {
-;     zero_arr[zero_int + 5] = 40;
-; 
-;     if (zero_double < 1.0)
-;         zero_arr[zero_int + 2] = 70;
-; 
-;     for (int i = 1; i < 10; ++i) {
-;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
-;     }
-;     return zero_arr[9] == 110 ? 0 : -1;
-; }
-
-@zero_int = common global i32 0, align 4
-@zero_arr = common global [10 x i32] zeroinitializer, align 16
-@zero_double = common global double 0.000000e+00, align 8
-
-define i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  %i = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4
-  %add = add nsw i32 %0, 5
-  %idxprom = sext i32 %add to i64
-  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
-  store i32 40, i32* %arrayidx, align 4
-  %1 = load double* @zero_double, align 8
-  %cmp = fcmp olt double %1, 1.000000e+00
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4
-  %add1 = add nsw i32 %2, 2
-  %idxprom2 = sext i32 %add1 to i64
-  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
-  store i32 70, i32* %arrayidx3, align 4
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
-  store i32 1, i32* %i, align 4
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4
-  %cmp4 = icmp slt i32 %3, 10
-  br i1 %cmp4, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4
-  %sub = sub nsw i32 %4, 1
-  %idxprom5 = sext i32 %sub to i64
-  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
-  %5 = load i32* %arrayidx6, align 4
-  %6 = load i32* %i, align 4
-  %idxprom7 = sext i32 %6 to i64
-  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
-  %7 = load i32* %arrayidx8, align 4
-  %add9 = add nsw i32 %5, %7
-  %8 = load i32* %i, align 4
-  %idxprom10 = sext i32 %8 to i64
-  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
-  store i32 %add9, i32* %arrayidx11, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4
-  %inc = add nsw i32 %9, 1
-  store i32 %inc, i32* %i, align 4
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
-  %cmp12 = icmp eq i32 %10, 110
-  %cond = select i1 %cmp12, i32 0, i32 -1
-  ret i32 %cond
-}
diff --git a/test/ExecutionEngine/MCJIT/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
deleted file mode 100644
index 9daf1684de81..000000000000
--- a/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN:  %lli_mcjit -remote-mcjit -O0 %s
-; XFAIL: armv7, mips
-
-; Check that a variable is always aligned as specified.
-
-@var = global i32 0, align 32
-define i32 @main() {
-  %addr = ptrtoint i32* @var to i64
-  %mask = and i64 %addr, 31
-  %tst = icmp eq i64 %mask, 0
-  br i1 %tst, label %good, label %bad
-good:
-  ret i32 0
-bad:
-  ret i32 1
-}
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
deleted file mode 100644
index 847d2253a0ed..000000000000
--- a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
-; XFAIL: arm, mips
-
-define double @test(double* %DP, double %Arg) {
-	%D = load double* %DP		; <double> [#uses=1]
-	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
-	%W = fsub double %V, %V		; <double> [#uses=3]
-	%X = fmul double %W, %W		; <double> [#uses=2]
-	%Y = fdiv double %X, %X		; <double> [#uses=2]
-	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
-	%R = bitcast double %Q to double		; <double> [#uses=1]
-	store double %Q, double* %DP
-	ret double %Y
-}
-
-define i32 @main() {
-	%X = alloca double		; <double*> [#uses=2]
-	store double 0.000000e+00, double* %X
-	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
deleted file mode 100644
index b8d94b50cfe2..000000000000
--- a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
-; XFAIL: arm, mips
-
-@count = global i32 1, align 4
-
-define i32 @main() nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %i = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %i, align 4
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
-  %cmp = icmp slt i32 %0, 49
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
-  %inc = add nsw i32 %1, 1
-  store i32 %inc, i32* @count, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
-  %inc1 = add nsw i32 %2, 1
-  store i32 %inc1, i32* %i, align 4
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
-  %sub = sub nsw i32 %3, 50
-  ret i32 %sub
-}
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
new file mode 100644
index 000000000000..eb031f2e4f45
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
@@ -0,0 +1,35 @@
+; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
deleted file mode 100644
index f2c2cd6199f7..000000000000
--- a/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: %lli_mcjit -remote-mcjit -O0 %s
-; XFAIL: arm, mips
-
-@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
-@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
-@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
-@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
-entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
-  %cmp = icmp eq i8* %0, %1
-  %. = zext i1 %cmp to i32
-  ret i32 %.
-}
-
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
new file mode 100644
index 000000000000..9e067422e312
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
@@ -0,0 +1,17 @@
+; RUN: %lli_mcjit -O0 -relocation-model=pic -code-model=small %s
+; XFAIL: mips, aarch64, arm, i686, i386
+
+@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
+@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
+@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %0 = load i8** @ptr, align 4
+  %1 = load i8** @ptr2, align 4
+  %cmp = icmp eq i8* %0, %1
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
+
diff --git a/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o b/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o
new file mode 100644
index 000000000000..5392266cf560
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Inputs/arm_secdiff_reloc.o
diff --git a/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test b/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test
new file mode 100644
index 000000000000..92e4dd793ccb
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test
@@ -0,0 +1 @@
+RUN: llvm-rtdyld -printline %p/Inputs/arm_secdiff_reloc.o
diff --git a/test/ExecutionEngine/fma3-jit.ll b/test/ExecutionEngine/fma3-jit.ll
new file mode 100644
index 000000000000..25eaa65a538b
--- /dev/null
+++ b/test/ExecutionEngine/fma3-jit.ll
@@ -0,0 +1,18 @@
+; RUN: %lli %s | FileCheck %s
+; REQUIRES: fma3
+; CHECK: 12.000000
+
+@msg_double = internal global [4 x i8] c"%f\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+  %fma = tail call double @llvm.fma.f64(double 3.0, double 3.0, double 3.0) nounwind readnone
+
+  %ptr1 = getelementptr [4 x i8]* @msg_double, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %fma)
+
+  ret i32 0
+}
+
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index b6945adf015a..28c56ad9c5c2 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -1,14 +1,9 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
+if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
+    config.unsupported = True
 
-if root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
+if 'hexagon' in config.root.target_triple:
     config.unsupported = True
 
-if 'hexagon' in root.target_triple:
+# ExecutionEngine tests are not expected to pass in a cross-compilation setup.
+if 'native' not in config.available_features:
     config.unsupported = True
diff --git a/test/ExecutionEngine/mov64zext32.ll b/test/ExecutionEngine/mov64zext32.ll
new file mode 100644
index 000000000000..f38c21a5698f
--- /dev/null
+++ b/test/ExecutionEngine/mov64zext32.ll
@@ -0,0 +1,18 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+define i64 @foo() {
+  ret i64 42
+}
+
+define i32 @main() {
+  %val = call i64 @foo()
+  %is42 = icmp eq i64 %val, 42
+  br i1 %is42, label %good, label %bad
+
+good:
+  ret i32 0
+
+bad:
+  ret i32 1
+} 
diff --git a/test/ExecutionEngine/test-interp-vec-cast.ll b/test/ExecutionEngine/test-interp-vec-cast.ll
new file mode 100644
index 000000000000..3f9f66640fa1
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-cast.ll
@@ -0,0 +1,146 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+    zext <2 x i1> <i1 true,i1 true> to <2 x i8>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i8>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i16>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i16>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i32>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i32>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i64>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i64>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i16>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i16>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i32>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i32>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i64>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i64>
+    zext <3 x i16> <i16 4, i16 4, i16 4> to <3 x i32>
+    zext <2 x i16> <i16 -4, i16 -4> to <2 x i32>
+    zext <3 x i16> <i16 4, i16 4, i16 4> to <3 x i64>
+    zext <2 x i16> <i16 -4, i16 -4> to <2 x i64>
+    zext <3 x i32> <i32 4, i32 4, i32 4> to <3 x i64>
+    zext <2 x i32> <i32 -4, i32 -4> to <2 x i64>
+
+
+    sext <2 x i1> <i1 true,i1 true> to <2 x i8>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i8>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i16>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i16>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i32>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i32>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i64>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i64>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i16>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i16>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i32>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i32>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i64>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i64>
+    sext <3 x i16> <i16 -4, i16 0, i16 4> to <3 x i32>
+    sext <2 x i16> <i16 -4, i16 4> to <2 x i32>
+    sext <3 x i16> <i16 -4, i16 0, i16 4> to <3 x i64>
+    sext <2 x i16> <i16 -4, i16 4> to <2 x i64>
+    sext <3 x i32> <i32 -4, i32 0, i32 4> to <3 x i64>
+    sext <2 x i32> <i32 -4, i32 4> to <2 x i64>
+
+
+    uitofp <3 x i1> <i1 true,i1 false,i1 true> to <3 x float>
+    uitofp <2 x i1> <i1 true,i1 true> to <2 x double>
+    uitofp <3 x i8> <i8 -4,i8 0,i8 4> to <3 x float>
+    uitofp <2 x i8> <i8 -4,i8 4> to <2 x double>
+    uitofp <3 x i16> <i16 -4,i16 0,i16 4> to <3 x float>
+    uitofp <2 x i16> <i16 -4,i16 4> to <2 x double>
+    uitofp <3 x i32> <i32 -4,i32 0,i32 4> to <3 x float>
+    uitofp <2 x i32> <i32 -4,i32 4> to <2 x double>
+    uitofp <3 x i64> <i64 -4,i64 0,i64 4> to <3 x float>
+    uitofp <2 x i64> <i64 -4,i64 4> to <2 x double>
+
+
+    sitofp <3 x i1> <i1 true,i1 false,i1 true> to <3 x float>
+    sitofp <2 x i1> <i1 true,i1 true> to <2 x double>
+    sitofp <3 x i8> <i8 -4,i8 0,i8 4> to <3 x float>
+    sitofp <2 x i8> <i8 -4,i8 4> to <2 x double>
+    sitofp <3 x i16> <i16 -4,i16 0,i16 4> to <3 x float>
+    sitofp <2 x i16> <i16 -4,i16 4> to <2 x double>
+    sitofp <3 x i32> <i32 -4,i32 0,i32 4> to <3 x float>
+    sitofp <2 x i32> <i32 -4,i32 4> to <2 x double>
+    sitofp <3 x i64> <i64 -4,i64 0,i64 4> to <3 x float>
+    sitofp <2 x i64> <i64 -4,i64 4> to <2 x double>
+
+    trunc <2 x i16> <i16 -6, i16 6> to <2 x i8>
+    trunc <3 x i16> <i16 -6, i16 6, i16 0> to <3 x i8>
+    trunc <2 x i32> <i32 -6, i32 6> to <2 x i8>
+    trunc <3 x i32> <i32 -6, i32 6, i32 0> to <3 x i8>
+    trunc <2 x i32> <i32 -6, i32 6> to <2 x i16>
+    trunc <3 x i32> <i32 -6, i32 6, i32 0> to <3 x i16>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i8>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i8>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i16>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i16>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i32>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i32>
+
+
+    fpext <2 x float>  < float 0.000000e+00, float 1.0> to <2 x double>
+    fpext <3 x float>  < float 0.000000e+00, float -1.0, float 1.0> to <3 x double>
+
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i8>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i8>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i16>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i16>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i32>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i32>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i64>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i64>
+
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i8>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i8>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i16>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i16>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i32>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i32>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i64>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i64>
+
+    fptrunc <2 x double> < double 0.000000e+00, double 1.0> to <2 x float>
+    fptrunc <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x float>
+
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <4 x i16>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <2 x i32>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to i64
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <2 x float>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to double
+
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <8 x i8>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <2 x i32>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to i64
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <2 x float>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to double
+
+    bitcast <2 x i32> <i32 1, i32 -1> to <8 x i8>
+    bitcast <2 x i32> <i32 1, i32 -1> to <4 x i16>
+    bitcast <2 x i32> <i32 1, i32 -1> to i64
+    bitcast <2 x i32> <i32 1, i32 -1> to <2 x float>
+    bitcast <2 x i32> <i32 1, i32 -1> to double
+
+    bitcast i64 1 to <8 x i8>
+    bitcast i64 1 to <4 x i16>
+    bitcast i64 1 to <2 x i32>
+    bitcast i64 1 to <2 x float>
+    bitcast i64 1 to double
+
+    bitcast <2 x float> <float 1.0, float -1.0> to <8 x i8>
+    bitcast <2 x float> <float 1.0, float -1.0> to <4 x i16>
+    bitcast <2 x float> <float 1.0, float -1.0> to i64
+    bitcast <2 x float> <float 1.0, float -1.0> to <2 x i32>
+    bitcast <2 x float> <float 1.0, float -1.0> to double
+
+    bitcast double 1.0 to <8 x i8>
+    bitcast double 1.0 to <4 x i16>
+    bitcast double 1.0 to <2 x i32>
+    bitcast double 1.0 to <2 x float>
+    bitcast double 1.0 to i64
+
+    ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-insertelement.ll b/test/ExecutionEngine/test-interp-vec-insertelement.ll
new file mode 100644
index 000000000000..814b90533d0c
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-insertelement.ll
@@ -0,0 +1,41 @@
+ ; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+ %v0 = insertelement <2 x i8> zeroinitializer, i8 1, i32 1
+ %v1 = insertelement <3 x i8> zeroinitializer, i8 2, i32 2
+ %v2 = insertelement <4 x i8> zeroinitializer, i8 3, i32 3
+ %v3 = insertelement <8 x i8> zeroinitializer, i8 4, i32 4
+ %v4 = insertelement <16 x i8> zeroinitializer, i8 5, i32 7
+
+ %v5 = insertelement <2 x i16> zeroinitializer, i16 1, i32 1
+ %v6 = insertelement <3 x i16> zeroinitializer, i16 2, i32 2
+ %v7 = insertelement <4 x i16> zeroinitializer, i16 3, i32 3
+ %v8 = insertelement <8 x i16> zeroinitializer, i16 4, i32 4
+ %v9 = insertelement <16 x i16> zeroinitializer, i16 5, i32 7
+
+ %v10 = insertelement <2 x i32> zeroinitializer, i32 1, i32 1
+ %v11 = insertelement <3 x i32> zeroinitializer, i32 2, i32 2
+ %v12 = insertelement <4 x i32> zeroinitializer, i32 3, i32 3
+ %v13 = insertelement <8 x i32> zeroinitializer, i32 4, i32 4
+ %v14 = insertelement <16 x i32> zeroinitializer, i32 5, i32 7
+
+ %v15 = insertelement <2 x i64> zeroinitializer, i64 1, i32 1
+ %v16 = insertelement <3 x i64> zeroinitializer, i64 2, i32 2
+ %v17 = insertelement <4 x i64> zeroinitializer, i64 3, i32 3
+ %v18 = insertelement <8 x i64> zeroinitializer, i64 4, i32 4
+ %v19 = insertelement <16 x i64> zeroinitializer, i64 5, i32 7
+
+ %v20 = insertelement <2 x float> zeroinitializer, float 1.0, i32 1
+ %v21 = insertelement <3 x float> zeroinitializer, float 2.0, i32 2
+ %v22 = insertelement <4 x float> zeroinitializer, float 3.0, i32 3
+ %v23 = insertelement <8 x float> zeroinitializer, float 4.0, i32 4
+ %v24 = insertelement <16 x float> zeroinitializer, float 5.0, i32 7
+
+ %v25 = insertelement <2 x double> zeroinitializer, double 1.0, i32 1
+ %v26 = insertelement <3 x double> zeroinitializer, double 2.0, i32 2
+ %v27 = insertelement <4 x double> zeroinitializer, double 3.0, i32 3
+ %v28 = insertelement <8 x double> zeroinitializer, double 4.0, i32 4
+ %v29 = insertelement <16 x double> zeroinitializer, double 5.0, i32 7
+
+ ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-insertextractvalue.ll b/test/ExecutionEngine/test-interp-vec-insertextractvalue.ll
new file mode 100644
index 000000000000..09fbf6abc3fd
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-insertextractvalue.ll
@@ -0,0 +1,21 @@
+ ; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+    %s1 = insertvalue { i32, { float, double} } undef, i32 9, 0
+    %s2 = insertvalue { i32, { float, double} } %s1, float 3.0, 1, 0
+    %s3 = insertvalue { i32, { float, double} } %s2, double 5.0, 1, 1
+
+    %s4 = extractvalue { i32, { float, double} } %s3, 1
+
+    %a1 = extractvalue { i32, { float, double} } %s3, 0
+
+    %a2 = extractvalue { i32, { float, double} } %s3, 1, 0
+    %a3 = extractvalue { i32, { float, double} } %s3, 1, 1
+    %a4 = extractvalue { float, double} %s4, 0
+    %a5 = extractvalue { float, double} %s4, 1
+
+    %aa = fpext float %a4 to double
+
+ ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-loadstore.ll b/test/ExecutionEngine/test-interp-vec-loadstore.ll
index e5007114c070..665a135d6bc9 100644
--- a/test/ExecutionEngine/test-interp-vec-loadstore.ll
+++ b/test/ExecutionEngine/test-interp-vec-loadstore.ll
@@ -1,22 +1,11 @@
 ; RUN: %lli -force-interpreter=true %s | FileCheck %s
-; XFAIL: mips
-; CHECK: 1
-; CHECK: 2
-; CHECK: 3
-; CHECK: 4
-; CHECK: 5.{{[0]+}}e+{{[0]+}}
-; CHECK: 6.{{[0]+}}e+{{[0]+}}
-; CHECK: 7.{{[0]+}}e+{{[0]+}}
-; CHECK: 8.{{[0]+}}e+{{[0]+}}
-; CHECK: 9.{{[0]+}}e+{{[0]+}}
-; CHECK: 1.{{[0]+}}e+{{[0]+}}1
-; CHECK: 1.1{{[0]+}}e+{{[0]+}}1
-; CHECK: 1.2{{[0]+}}e+{{[0]+}}1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-
-@format_i32 = internal global [4 x i8] c"%d\0A\00"
-@format_float = internal global [4 x i8] c"%e\0A\00"
+; CHECK: int test passed
+; CHECK: double test passed
+; CHECK: float test passed
+
+@msg_int = internal global [17 x i8] c"int test passed\0A\00"
+@msg_double = internal global [20 x i8] c"double test passed\0A\00"
+@msg_float = internal global [19 x i8] c"float test passed\0A\00"
 
 declare i32 @printf(i8*, ...)
 
@@ -24,62 +13,157 @@ define i32 @main() {
   %a = alloca <4 x i32>, align 16
   %b = alloca <4 x double>, align 16
   %c = alloca <4 x float>, align 16
-  
+  %pint_0 = alloca i32
+  %pint_1 = alloca i32
+  %pint_2 = alloca i32
+  %pint_3 = alloca i32
+  %pdouble_0 = alloca double
+  %pdouble_1 = alloca double
+  %pdouble_2 = alloca double
+  %pdouble_3 = alloca double
+  %pfloat_0 = alloca float
+  %pfloat_1 = alloca float
+  %pfloat_2 = alloca float
+  %pfloat_3 = alloca float
+
+  ; store constants 1,2,3,4 as vector
   store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %a, align 16
+  ; store constants 1,2,3,4 as scalars
+  store i32 1, i32* %pint_0
+  store i32 2, i32* %pint_1
+  store i32 3, i32* %pint_2
+  store i32 4, i32* %pint_3
+  
+  ; load stored scalars
+  %val_int0 = load i32* %pint_0
+  %val_int1 = load i32* %pint_1
+  %val_int2 = load i32* %pint_2
+  %val_int3 = load i32* %pint_3
 
+  ; load stored vector
   %val0 = load <4 x i32> *%a, align 16
 
+  ; extract integers from the loaded vector
   %res_i32_0 = extractelement <4 x i32> %val0, i32 0
   %res_i32_1 = extractelement <4 x i32> %val0, i32 1
   %res_i32_2 = extractelement <4 x i32> %val0, i32 2
   %res_i32_3 = extractelement <4 x i32> %val0, i32 3
-  
-  %ptr0 = getelementptr [4 x i8]* @format_i32, i32 0, i32 0
-  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_0)
-  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_1)
-  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_2)
-  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_3)
 
+  ; compare extracted data with stored constants
+  %test_result_int_0 = icmp eq i32 %res_i32_0, %val_int0
+  %test_result_int_1 = icmp eq i32 %res_i32_1, %val_int1
+  %test_result_int_2 = icmp eq i32 %res_i32_2, %val_int2
+  %test_result_int_3 = icmp eq i32 %res_i32_3, %val_int3
+
+  %test_result_int_4 = icmp eq i32 %res_i32_0, %val_int3
+  %test_result_int_5 = icmp eq i32 %res_i32_1, %val_int2
+  %test_result_int_6 = icmp eq i32 %res_i32_2, %val_int1
+  %test_result_int_7 = icmp eq i32 %res_i32_3, %val_int0
+
+  ; it should be TRUE
+  %A_i = or i1 %test_result_int_0, %test_result_int_4
+  %B_i = or i1 %test_result_int_1, %test_result_int_5
+  %C_i = or i1 %test_result_int_2, %test_result_int_6
+  %D_i = or i1 %test_result_int_3, %test_result_int_7
+  %E_i = and i1 %A_i, %B_i
+  %F_i = and i1 %C_i, %D_i
+  %res_i = and i1 %E_i, %F_i
+
+  ; if TRUE print message
+  br i1 %res_i, label %Print_int, label %Double
+Print_int:
+  %ptr0 = getelementptr [17 x i8]* @msg_int, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr0)
+  br label %Double
+Double:
   store <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, <4 x double>* %b, align 16
+  ; store constants as scalars
+  store double 5.0, double* %pdouble_0
+  store double 6.0, double* %pdouble_1
+  store double 7.0, double* %pdouble_2
+  store double 8.0, double* %pdouble_3
 
+  ; load stored vector
   %val1 = load <4 x double> *%b, align 16
+  ; load stored scalars
+  %val_double0 = load double* %pdouble_0
+  %val_double1 = load double* %pdouble_1
+  %val_double2 = load double* %pdouble_2
+  %val_double3 = load double* %pdouble_3
 
   %res_double_0 = extractelement <4 x double> %val1, i32 0
   %res_double_1 = extractelement <4 x double> %val1, i32 1
   %res_double_2 = extractelement <4 x double> %val1, i32 2
   %res_double_3 = extractelement <4 x double> %val1, i32 3
-  
-  %ptr1 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_0)
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_1)
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_2)
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_3)
 
+  %test_result_double_0 = fcmp oeq double %res_double_0, %val_double0
+  %test_result_double_1 = fcmp oeq double %res_double_1, %val_double1
+  %test_result_double_2 = fcmp oeq double %res_double_2, %val_double2
+  %test_result_double_3 = fcmp oeq double %res_double_3, %val_double3
+
+  %test_result_double_4 = fcmp oeq double %res_double_0, %val_double3
+  %test_result_double_5 = fcmp oeq double %res_double_1, %val_double2
+  %test_result_double_6 = fcmp oeq double %res_double_2, %val_double1
+  %test_result_double_7 = fcmp oeq double %res_double_3, %val_double0
+
+  %A_double = or i1 %test_result_double_0, %test_result_double_4
+  %B_double = or i1 %test_result_double_1, %test_result_double_5
+  %C_double = or i1 %test_result_double_2, %test_result_double_6
+  %D_double = or i1 %test_result_double_3, %test_result_double_7
+  %E_double = and i1 %A_double, %B_double
+  %F_double = and i1 %C_double, %D_double
+  %res_double = and i1 %E_double, %F_double
 
+  br i1 %res_double, label %Print_double, label %Float
+Print_double:
+  %ptr1 = getelementptr [20 x i8]* @msg_double, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr1)
+  br label %Float
+Float:
   store <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, <4 x float>* %c, align 16
-  
+
+  store float 9.0, float* %pfloat_0
+  store float 10.0, float* %pfloat_1
+  store float 11.0, float* %pfloat_2
+  store float 12.0, float* %pfloat_3
+
+  ; load stored vector
   %val2 = load <4 x float> *%c, align 16
-  
-  %ptr2 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
+  ; load stored scalars
+  %val_float0 = load float* %pfloat_0
+  %val_float1 = load float* %pfloat_1
+  %val_float2 = load float* %pfloat_2
+  %val_float3 = load float* %pfloat_3
+
+  %res_float_0 = extractelement <4 x float> %val2, i32 0
+  %res_float_1 = extractelement <4 x float> %val2, i32 1
+  %res_float_2 = extractelement <4 x float> %val2, i32 2
+  %res_float_3 = extractelement <4 x float> %val2, i32 3
+
+  %test_result_float_0 = fcmp oeq float %res_float_0, %val_float0
+  %test_result_float_1 = fcmp oeq float %res_float_1, %val_float1
+  %test_result_float_2 = fcmp oeq float %res_float_2, %val_float2
+  %test_result_float_3 = fcmp oeq float %res_float_3, %val_float3
+
+  %test_result_float_4 = fcmp oeq float %res_float_0, %val_float3
+  %test_result_float_5 = fcmp oeq float %res_float_1, %val_float2
+  %test_result_float_6 = fcmp oeq float %res_float_2, %val_float1
+  %test_result_float_7 = fcmp oeq float %res_float_3, %val_float0
+
+  %A_float = or i1 %test_result_float_0, %test_result_float_4
+  %B_float = or i1 %test_result_float_1, %test_result_float_5
+  %C_float = or i1 %test_result_float_2, %test_result_float_6
+  %D_float = or i1 %test_result_float_3, %test_result_float_7
+  %E_float = and i1 %A_float, %B_float
+  %F_float = and i1 %C_float, %D_float
+  %res_float = and i1 %E_float, %F_float
+
+  br i1 %res_float, label %Print_float, label %Exit
+Print_float:
+  %ptr2 = getelementptr [19 x i8]* @msg_float, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr2)
+  br label %Exit
+Exit:
 
-  ; by some reason printf doesn't print float correctly, so
-  ; floats are casted to doubles and are printed as doubles
-  
-  %res_serv_0 = extractelement <4 x float> %val2, i32 0
-  %res_float_0 = fpext float %res_serv_0 to double
-  %res_serv_1 = extractelement <4 x float> %val2, i32 1
-  %res_float_1 = fpext float %res_serv_1 to double
-  %res_serv_2 = extractelement <4 x float> %val2, i32 2
-  %res_float_2 = fpext float %res_serv_2 to double
-  %res_serv_3 = extractelement <4 x float> %val2, i32 3
-  %res_float_3 = fpext float %res_serv_3 to double
-
- 
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_0)
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_1)
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_2)
-  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_3)
- 
-  
   ret i32 0
 }
diff --git a/test/ExecutionEngine/test-interp-vec-select.ll b/test/ExecutionEngine/test-interp-vec-select.ll
new file mode 100644
index 000000000000..ce086e408dd4
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-select.ll
@@ -0,0 +1,118 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+  ; Vector values
+  %a2_i8 = add <2 x i8> zeroinitializer, <i8 0, i8 1>
+  %a3_i8 = add <3 x i8> zeroinitializer, <i8 0, i8 1, i8 2>
+  %a4_i8 = add <4 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3>
+  %a8_i8 = add <8 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %a16_i8 = add <16 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+  %a2_i16 = add <2 x i16> zeroinitializer, <i16 0, i16 1>
+  %a3_i16 = add <3 x i16> zeroinitializer, <i16 0, i16 1, i16 2>
+  %a4_i16 = add <4 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3>
+  %a8_i16 = add <8 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  %a16_i16 = add <16 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+
+  %a2_i32 = add <2 x i32> zeroinitializer, <i32 0, i32 1>
+  %a3_i32 = add <3 x i32> zeroinitializer, <i32 0, i32 1, i32 2>
+  %a4_i32 = add <4 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3>
+  %a8_i32 = add <8 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %a16_i32 = add <16 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %a2_i64 = add <2 x i64> zeroinitializer, <i64 0, i64 1>
+  %a3_i64 = add <3 x i64> zeroinitializer, <i64 0, i64 1, i64 2>
+  %a4_i64 = add <4 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3>
+  %a8_i64 = add <8 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %a16_i64 = add <16 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+
+  %a2_float = fadd <2 x float> zeroinitializer, <float 0.0, float 1.0>
+  %a3_float = fadd <3 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0>
+  %a4_float = fadd <4 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0>
+  %a8_float = fadd <8 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>
+  %a16_float = fadd <16 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>
+
+  %a2_double = fadd <2 x double> zeroinitializer, <double 0.0, double 1.0>
+  %a3_double = fadd <3 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0>
+  %a4_double = fadd <4 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0>
+  %a8_double = fadd <8 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>
+  %a16_double = fadd <16 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>
+
+  %b2_i8  = sub <2 x i8> zeroinitializer, %a2_i8
+  %b3_i8  = sub <3 x i8> zeroinitializer, %a3_i8
+  %b4_i8  = sub <4 x i8> zeroinitializer, %a4_i8
+  %b8_i8  = sub <8 x i8> zeroinitializer, %a8_i8
+  %b16_i8 = sub <16 x i8> zeroinitializer, %a16_i8
+
+  %b2_i16  = sub <2 x i16> zeroinitializer, %a2_i16
+  %b3_i16  = sub <3 x i16> zeroinitializer, %a3_i16
+  %b4_i16  = sub <4 x i16> zeroinitializer, %a4_i16
+  %b8_i16  = sub <8 x i16> zeroinitializer, %a8_i16
+  %b16_i16 = sub <16 x i16> zeroinitializer, %a16_i16
+
+  %b2_i32  = sub <2 x i32> zeroinitializer, %a2_i32
+  %b3_i32  = sub <3 x i32> zeroinitializer, %a3_i32
+  %b4_i32  = sub <4 x i32> zeroinitializer, %a4_i32
+  %b8_i32  = sub <8 x i32> zeroinitializer, %a8_i32
+  %b16_i32 = sub <16 x i32> zeroinitializer, %a16_i32
+
+  %b2_i64  = sub <2 x i64> zeroinitializer, %a2_i64
+  %b3_i64  = sub <3 x i64> zeroinitializer, %a3_i64
+  %b4_i64  = sub <4 x i64> zeroinitializer, %a4_i64
+  %b8_i64  = sub <8 x i64> zeroinitializer, %a8_i64
+  %b16_i64 = sub <16 x i64> zeroinitializer, %a16_i64
+
+  %b2_float  = fsub <2 x float> zeroinitializer, %a2_float
+  %b3_float  = fsub <3 x float> zeroinitializer, %a3_float
+  %b4_float  = fsub <4 x float> zeroinitializer, %a4_float
+  %b8_float  = fsub <8 x float> zeroinitializer, %a8_float
+  %b16_float = fsub <16 x float> zeroinitializer, %a16_float
+
+  %b2_double  = fsub <2 x double> zeroinitializer, %a2_double
+  %b3_double  = fsub <3 x double> zeroinitializer, %a3_double
+  %b4_double  = fsub <4 x double> zeroinitializer, %a4_double
+  %b8_double  = fsub <8 x double> zeroinitializer, %a8_double
+  %b16_double = fsub <16 x double> zeroinitializer, %a16_double
+
+
+
+  %v0 = select <2 x i1> <i1 true, i1 false>, <2 x i8> %a2_i8, <2 x i8> %b2_i8
+  %v1 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i8> %a3_i8, <3 x i8> %b3_i8
+  %v2 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i8> %a4_i8, <4 x i8> %b4_i8
+  %v3 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i8> %a8_i8, <8 x i8> %b8_i8
+  %v4 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> %a16_i8, <16 x i8> %b16_i8
+
+  %v5 = select <2 x i1> <i1 true, i1 false>, <2 x i16> %a2_i16, <2 x i16> %b2_i16
+  %v6 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i16> %a3_i16, <3 x i16> %b3_i16
+  %v7 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i16> %a4_i16, <4 x i16> %b4_i16
+  %v8 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a8_i16, <8 x i16> %b8_i16
+  %v9 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i16> %a16_i16, <16 x i16> %b16_i16
+
+  %v10 = select <2 x i1> <i1 true, i1 false>, <2 x i32> %a2_i32, <2 x i32> %b2_i32
+  %v11 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i32> %a3_i32, <3 x i32> %b3_i32
+  %v12 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %a4_i32, <4 x i32> %b4_i32
+  %v13 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i32> %a8_i32, <8 x i32> %b8_i32
+  %v14 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i32> %a16_i32, <16 x i32> %b16_i32
+
+  %v15 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a2_i64, <2 x i64> %b2_i64
+  %v16 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x i64> %a3_i64, <3 x i64> %b3_i64
+  %v17 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %a4_i64, <4 x i64> %b4_i64
+  %v18 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i64> %a8_i64, <8 x i64> %b8_i64
+  %v19 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i64> %a16_i64, <16 x i64> %b16_i64
+
+  %v20 = select <2 x i1> <i1 true, i1 false>, <2 x float> %a2_float, <2 x float> %b2_float
+  %v21 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x float> %a3_float, <3 x float> %b3_float
+  %v22 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a4_float, <4 x float> %b4_float
+  %v23 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x float> %a8_float, <8 x float> %b8_float
+  %v24 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x float> %a16_float, <16 x float> %b16_float
+
+  %v25 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a2_double, <2 x double> %b2_double
+  %v26 = select <3 x i1> <i1 true, i1 false, i1 true>, <3 x double> %a3_double, <3 x double> %b3_double
+  %v27 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %a4_double, <4 x double> %b4_double
+  %v28 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x double> %a8_double, <8 x double> %b8_double
+  %v29 = select <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x double> %a16_double, <16 x double> %b16_double
+
+
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-shift.ll b/test/ExecutionEngine/test-interp-vec-shift.ll
new file mode 100644
index 000000000000..3aa4f4e54f39
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-shift.ll
@@ -0,0 +1,32 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+    %shamt = add <2 x i8> <i8 0, i8 0>, <i8 1, i8 2>
+    %shift.upgrd.1 = zext <2 x i8> %shamt to <2 x i32>
+    %t1.s = shl <2 x i32> <i32 1, i32 2>, %shift.upgrd.1
+    %t2.s = shl <2 x i32> <i32 1, i32 2>, <i32 3, i32 4>
+    %shift.upgrd.2 = zext <2 x i8> %shamt to <2 x i32>
+    %t1 = shl <2 x i32> <i32 1, i32 2>, %shift.upgrd.2
+    %t2 = shl <2 x i32> <i32 1, i32 0>, <i32 5, i32 6>
+    %t2.s.upgrd.3 = shl <2 x i64> <i64 1, i64 2>, <i64 3, i64 4>
+    %t2.upgrd.4 = shl <2 x i64> <i64 1, i64 2>, <i64 6, i64 7>
+    %shift.upgrd.5 = zext <2 x i8> %shamt to <2 x i32>
+    %tr1.s = ashr <2 x i32> <i32 1, i32 2>, %shift.upgrd.5
+    %tr2.s = ashr <2 x i32> <i32 1, i32 2>, <i32 4, i32 5>
+    %shift.upgrd.6 = zext <2 x i8> %shamt to <2 x i32>
+    %tr1 = lshr <2 x i32> <i32 1, i32 2>, %shift.upgrd.6
+    %tr2 = lshr <2 x i32> <i32 1, i32 2>, <i32 5, i32 6>
+    %tr1.l = ashr <2 x i64> <i64 1, i64 2>, <i64 4, i64 5>
+    %shift.upgrd.7 = zext <2 x i8> %shamt to <2 x i64>
+    %tr2.l = ashr <2 x i64> <i64 1, i64 2>, %shift.upgrd.7
+    %tr3.l = shl <2 x i64> <i64 1, i64 2>, <i64 4, i64 5>
+    %shift.upgrd.8 = zext <2 x i8> %shamt to <2 x i64>
+    %tr4.l = shl <2 x i64> <i64 1, i64 2>, %shift.upgrd.8
+    %tr1.u = lshr <2 x i64> <i64 1, i64 2>, <i64 5, i64 6>
+    %shift.upgrd.9 = zext <2 x i8> %shamt to <2 x i64>
+    %tr2.u = lshr <2 x i64> <i64 1, i64 2>, %shift.upgrd.9
+    %tr3.u = shl <2 x i64> <i64 1, i64 2>, <i64 5, i64 6>
+    %shift.upgrd.10 = zext <2 x i8> %shamt to <2 x i64>
+    %tr4.u = shl <2 x i64> <i64 1, i64 2>, %shift.upgrd.10
+    ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-shuffle.ll b/test/ExecutionEngine/test-interp-vec-shuffle.ll
new file mode 100644
index 000000000000..e55fa99e3bb1
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-shuffle.ll
@@ -0,0 +1,81 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+
+  ; Vector values
+  %a2_i8 = add <2 x i8> zeroinitializer, <i8 0, i8 1>
+  %a3_i8 = add <3 x i8> zeroinitializer, <i8 0, i8 1, i8 2>
+  %a4_i8 = add <4 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3>
+  %a8_i8 = add <8 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %a16_i8 = add <16 x i8> zeroinitializer, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+  %a2_i16 = add <2 x i16> zeroinitializer, <i16 0, i16 1>
+  %a3_i16 = add <3 x i16> zeroinitializer, <i16 0, i16 1, i16 2>
+  %a4_i16 = add <4 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3>
+  %a8_i16 = add <8 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  %a16_i16 = add <16 x i16> zeroinitializer, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+
+  %a2_i32 = add <2 x i32> zeroinitializer, <i32 0, i32 1>
+  %a3_i32 = add <3 x i32> zeroinitializer, <i32 0, i32 1, i32 2>
+  %a4_i32 = add <4 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3>
+  %a8_i32 = add <8 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %a16_i32 = add <16 x i32> zeroinitializer, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %a2_i64 = add <2 x i64> zeroinitializer, <i64 0, i64 1>
+  %a3_i64 = add <3 x i64> zeroinitializer, <i64 0, i64 1, i64 2>
+  %a4_i64 = add <4 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3>
+  %a8_i64 = add <8 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %a16_i64 = add <16 x i64> zeroinitializer, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+
+  %a2_float = fadd <2 x float> zeroinitializer, <float 0.0, float 1.0>
+  %a3_float = fadd <3 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0>
+  %a4_float = fadd <4 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0>
+  %a8_float = fadd <8 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>
+  %a16_float = fadd <16 x float> zeroinitializer, <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>
+
+  %a2_double = fadd <2 x double> zeroinitializer, <double 0.0, double 1.0>
+  %a3_double = fadd <3 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0>
+  %a4_double = fadd <4 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0>
+  %a8_double = fadd <8 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>
+  %a16_double = fadd <16 x double> zeroinitializer, <double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>
+
+
+  %v0 = shufflevector <2 x i8> %a2_i8, <2 x i8>undef, <2 x i32> <i32 1, i32 0>
+  %v1 = shufflevector <3 x i8> %a3_i8, <3 x i8>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v2 = shufflevector <4 x i8> %a4_i8, <4 x i8>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v3 = shufflevector <8 x i8> %a8_i8, <8 x i8>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v4 = shufflevector <16 x i8> %a16_i8, <16 x i8>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v5 = shufflevector <2 x i16> %a2_i16, <2 x i16>undef, <2 x i32> <i32 1, i32 0>
+  %v6 = shufflevector <3 x i16> %a3_i16, <3 x i16>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v7 = shufflevector <4 x i16> %a4_i16, <4 x i16>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v8 = shufflevector <8 x i16> %a8_i16, <8 x i16>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v9 = shufflevector <16 x i16> %a16_i16, <16 x i16>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v10 = shufflevector <2 x i32> %a2_i32, <2 x i32>undef, <2 x i32> <i32 1, i32 0>
+  %v11 = shufflevector <3 x i32> %a3_i32, <3 x i32>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v12 = shufflevector <4 x i32> %a4_i32, <4 x i32>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v13 = shufflevector <8 x i32> %a8_i32, <8 x i32>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v14 = shufflevector <16 x i32> %a16_i32, <16 x i32>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v15 = shufflevector <2 x i64> %a2_i64, <2 x i64>undef, <2 x i32> <i32 1, i32 0>
+  %v16 = shufflevector <3 x i64> %a3_i64, <3 x i64>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v17 = shufflevector <4 x i64> %a4_i64, <4 x i64>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v18 = shufflevector <8 x i64> %a8_i64, <8 x i64>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v19 = shufflevector <16 x i64> %a16_i64, <16 x i64>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v20 = shufflevector <2 x float> %a2_float, <2 x float>undef, <2 x i32> <i32 1, i32 0>
+  %v21 = shufflevector <3 x float> %a3_float, <3 x float>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v22 = shufflevector <4 x float> %a4_float, <4 x float>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v23 = shufflevector <8 x float> %a8_float, <8 x float>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v24 = shufflevector <16 x float> %a16_float, <16 x float>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  %v25 = shufflevector <2 x double> %a2_double, <2 x double>undef, <2 x i32> <i32 1, i32 0>
+  %v26 = shufflevector <3 x double> %a3_double, <3 x double>undef, <3 x i32> <i32 2, i32 1, i32 0>
+  %v27 = shufflevector <4 x double> %a4_double, <4 x double>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %v28 = shufflevector <8 x double> %a8_double, <8 x double>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %v29 = shufflevector <16 x double> %a16_double, <16 x double>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ret i32 0
+}
+
diff --git a/test/Feature/cold.ll b/test/Feature/cold.ll
new file mode 100644
index 000000000000..dcf79c5ba393
--- /dev/null
+++ b/test/Feature/cold.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; CHECK: @fun() #0
+define void @fun() #0 {
+  ret void
+}
+
+; CHECK: attributes #0 = { cold }
+attributes #0 = { cold }
diff --git a/test/Feature/lit.local.cfg b/test/Feature/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Feature/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Feature/md_on_instruction.ll b/test/Feature/md_on_instruction.ll
index da9e49ebfb2f..955ace3da840 100644
--- a/test/Feature/md_on_instruction.ll
+++ b/test/Feature/md_on_instruction.ll
@@ -16,7 +16,12 @@ declare void @llvm.dbg.func.start(metadata) nounwind readnone
 
 declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
+!llvm.module.flags = !{!6}
+
 !0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 1, metadata !2, i1 false, i1 true}
-!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang 1.0", i1 true, i1 false, metadata !"", i32 0}
-!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!1 = metadata !{i32 458769, metadata !4, i32 12, metadata !"clang 1.0", i1 true, metadata !"", i32 0, metadata !5, metadata !5, metadata !4, null, null, metadata !""}
+!2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !3 = metadata !{i32 1, i32 13, metadata !1, metadata !1}
+!4 = metadata !{metadata !"foo.c", metadata !"/tmp"}
+!5 = metadata !{i32 0}
+!6 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Feature/newcasts.ll b/test/Feature/newcasts.ll
index 4cfc8bcf08e4..dd4748443ef9 100644
--- a/test/Feature/newcasts.ll
+++ b/test/Feature/newcasts.ll
@@ -20,6 +20,9 @@ define void @"NewCasts" (i16 %x) {
   %p = uitofp <4 x i32> %n to <4 x float>
   %q = fptosi <4 x float> %p to <4 x i32>
   %r = fptoui <4 x float> %p to <4 x i32>
+  %s = inttoptr <4 x i32> %n to <4 x i32*>
+  %t = addrspacecast <4 x i32*> %s to <4 x i32 addrspace(1)*>
+  %z = addrspacecast <4 x i32*> %s to <4 x float addrspace(2)*>
   ret void
 }
 
diff --git a/test/Feature/optnone.ll b/test/Feature/optnone.ll
new file mode 100644
index 000000000000..7d8afd4b773e
--- /dev/null
+++ b/test/Feature/optnone.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Check for the presence of attribute optnone in the disassembly.
+
+; CHECK: @foo() #0
+define void @foo() #0 {
+  ret void
+}
+
+; CHECK: attributes #0 = { noinline optnone }
+attributes #0 = { optnone noinline }
+
diff --git a/test/Feature/prefixdata.ll b/test/Feature/prefixdata.ll
new file mode 100644
index 000000000000..b53945c11ff7
--- /dev/null
+++ b/test/Feature/prefixdata.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: FileCheck %s < %t1.ll
+; RUN: llvm-as < %t1.ll | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+; RUN: opt -O3 -S < %t1.ll | FileCheck %s
+
+; CHECK: @i
+@i = linkonce_odr global i32 1
+
+; CHECK: f(){{.*}}prefix i32 1
+define void @f() prefix i32 1 {
+  ret void
+}
+
+; CHECK: g(){{.*}}prefix i32* @i
+define void @g() prefix i32* @i {
+  ret void
+}
diff --git a/test/FileCheck/check-a-b-has-b.txt b/test/FileCheck/check-a-b-has-b.txt
new file mode 100644
index 000000000000..4d64d098ae2e
--- /dev/null
+++ b/test/FileCheck/check-a-b-has-b.txt
@@ -0,0 +1,5 @@
+; RUN: FileCheck -check-prefix=A -check-prefix=B -input-file %s %s
+
+this is the string to be matched
+
+; B-DAG: this is the string to be {{matched}}
diff --git a/test/FileCheck/check-b-a-has-b.txt b/test/FileCheck/check-b-a-has-b.txt
new file mode 100644
index 000000000000..ac149906c52f
--- /dev/null
+++ b/test/FileCheck/check-b-a-has-b.txt
@@ -0,0 +1,5 @@
+; RUN: FileCheck -check-prefix=B -check-prefix=A -input-file %s %s
+
+this is the string to be matched
+
+; B-DAG: this is the string to be {{matched}}
diff --git a/test/FileCheck/check-dag-multi-prefix-2.txt b/test/FileCheck/check-dag-multi-prefix-2.txt
new file mode 100644
index 000000000000..4add70da1609
--- /dev/null
+++ b/test/FileCheck/check-dag-multi-prefix-2.txt
@@ -0,0 +1,7 @@
+; RUN: FileCheck -check-prefix=A -input-file %s %s
+
+this should be matched
+
+; B-DAG: foo
+
+; A-DAG: {{this}} should be matched
diff --git a/test/FileCheck/check-dag-multi-prefix.txt b/test/FileCheck/check-dag-multi-prefix.txt
new file mode 100644
index 000000000000..95dfe5a5192c
--- /dev/null
+++ b/test/FileCheck/check-dag-multi-prefix.txt
@@ -0,0 +1,27 @@
+; RUN: FileCheck -check-prefix=A -check-prefix=B -input-file %s %s
+
+add r10, r1, r2
+add r11, r3, r4
+mul r5, r10, r11
+
+mul r11, r3, r4
+mul r10, r1, r2
+add r5, r10, r11
+
+add r11, r3, r4
+add r10, r1, r2
+mul r5, r10, r11
+
+; B-DAG: add [[REG1:r[0-9]+]], r1, r2
+; B-DAG: add [[REG2:r[0-9]+]], r3, r4
+; B: mul r5, [[REG1]], [[REG2]]
+
+; A-DAG: mul [[REG1:r[0-9]+]], r1, r2
+; A-DAG: mul [[REG2:r[0-9]+]], r3, r4
+; A: add r5, [[REG1]], [[REG2]]
+
+; B-DAG: add [[REG1:r[0-9]+]], r1, r2
+; B-DAG: add [[REG2:r[0-9]+]], r3, r4
+; B-NOT: xor
+; B-DAG: mul r5, [[REG1]], [[REG2]]
+
diff --git a/test/FileCheck/check-dag-substring-prefix.txt b/test/FileCheck/check-dag-substring-prefix.txt
new file mode 100644
index 000000000000..49d4b2b9ba91
--- /dev/null
+++ b/test/FileCheck/check-dag-substring-prefix.txt
@@ -0,0 +1,7 @@
+; RUN: not FileCheck -check-prefix=A -check-prefix=AA -input-file %s %s
+
+this is the string to be matched
+this should also be matched
+
+; BAA-DAG: this is the string to be {{matched}}
+; BAA-DAG: this should also be {{matched}}
diff --git a/test/FileCheck/check-dag-xfails.txt b/test/FileCheck/check-dag-xfails.txt
new file mode 100644
index 000000000000..3f4f98e348ef
--- /dev/null
+++ b/test/FileCheck/check-dag-xfails.txt
@@ -0,0 +1,85 @@
+; RUN: not FileCheck -check-prefix=X1 -input-file %s %s
+; RUN: not FileCheck -check-prefix=X2 -input-file %s %s
+; RUN: not FileCheck -check-prefix=X3 -input-file %s %s
+; RUN: not FileCheck -check-prefix=X4 -input-file %s %s
+; RUN: not FileCheck -check-prefix=X5 -input-file %s %s
+; RUN: not FileCheck -check-prefix=X6 -input-file %s %s
+
+__x1
+add r10, r1, r2
+add r11, r3, r4
+mul r5, r10, r12
+__x1
+
+; X1: __x1
+; X1-DAG: add [[REG1:r[0-9]+]], r1, r2
+; X1-DAG: add [[REG2:r[0-9]+]], r3, r4
+; X1: mul r5, [[REG1]], [[REG2]]
+; X1: __x1
+
+__x2
+mul r11, r3, r4
+mul r10, r1, r2
+add r5, r11, r11
+__x2
+
+; X2: __x2
+; X2-DAG: mul [[REG1:r[0-9]+]], r1, r2
+; X2-DAG: mul [[REG2:r[0-9]+]], r3, r4
+; X2: add r5, [[REG1]], [[REG2]]
+; X2: __x2
+
+__x3
+add r11, r3, r4
+add r12, r1, r2
+mul r5, r10, r11
+__x3
+
+; X3: __x3
+; X3-DAG: add [[REG1:r[0-9]+]], r1, r2
+; X3-DAG: add [[REG2:r[0-9]+]], r3, r4
+; X3-DAG: mul r5, [[REG1]], [[REG2]]
+; X3: __x3
+
+__x4
+add r11, r3, r4
+add r12, r1, r2
+not
+mul r5, r12, r11
+__x4
+
+; X4: __x4
+; X4-DAG: add [[REG1:r[0-9]+]], r1, r2
+; X4-DAG: add [[REG2:r[0-9]+]], r3, r4
+; X4-NOT: not
+; X4-DAG: mul r5, [[REG1]], [[REG2]]
+; X4: __x4
+
+__x5
+mul r5, r12, r11
+add r11, r3, r4
+add r12, r1, r2
+not
+__x5
+
+; X5: __x5
+; X5-DAG: add [[REG1:r[0-9]+]], r1, r2
+; X5-DAG: add [[REG2:r[0-9]+]], r3, r4
+; X5-NOT: not
+; X5-DAG: mul r5, [[REG1]], [[REG2]]
+; X5: __x5
+
+__x6
+add r11, r3, r4
+mul r6, r12, r11
+add r12, r1, r2
+mul r5, r12, r11
+__x6
+
+; X6: __x6
+; X6-DAG: add [[REG1:r[0-9]+]], r1, r2
+; X6-DAG: add [[REG2:r[0-9]+]], r3, r4
+; X6-NOT: not
+; X6-DAG: mul r5, [[REG1]], [[REG2]]
+; X6-DAG: mul r6, [[REG1]], [[REG2]]
+; X6: __x6
diff --git a/test/FileCheck/check-dag.txt b/test/FileCheck/check-dag.txt
new file mode 100644
index 000000000000..2b5a47551e83
--- /dev/null
+++ b/test/FileCheck/check-dag.txt
@@ -0,0 +1,26 @@
+; RUN: FileCheck -input-file %s %s
+
+add r10, r1, r2
+add r11, r3, r4
+mul r5, r10, r11
+
+mul r11, r3, r4
+mul r10, r1, r2
+add r5, r10, r11
+
+add r11, r3, r4
+add r10, r1, r2
+mul r5, r10, r11
+
+; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2
+; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4
+; CHECK: mul r5, [[REG1]], [[REG2]]
+
+; CHECK-DAG: mul [[REG1:r[0-9]+]], r1, r2
+; CHECK-DAG: mul [[REG2:r[0-9]+]], r3, r4
+; CHECK: add r5, [[REG1]], [[REG2]]
+
+; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2
+; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4
+; CHECK-NOT: xor
+; CHECK-DAG: mul r5, [[REG1]], [[REG2]]
diff --git a/test/FileCheck/check-label-dag-capture.txt b/test/FileCheck/check-label-dag-capture.txt
new file mode 100644
index 000000000000..d8f90f492574
--- /dev/null
+++ b/test/FileCheck/check-label-dag-capture.txt
@@ -0,0 +1,11 @@
+; RUN: FileCheck -input-file %s %s
+
+bar
+foo
+foo
+zed
+
+CHECK-LABEL: {{^}}bar
+CHECK: {{^}}[[FOO:foo]]
+CHECK-DAG: {{^}}[[FOO]]
+CHECK-LABEL: {{^}}zed
diff --git a/test/FileCheck/check-label-dag.txt b/test/FileCheck/check-label-dag.txt
new file mode 100644
index 000000000000..2f54c3ea94ec
--- /dev/null
+++ b/test/FileCheck/check-label-dag.txt
@@ -0,0 +1,11 @@
+; RUN: not FileCheck -input-file %s %s 2>&1 | FileCheck --check-prefix=ERROR %s
+
+bar
+zed
+
+CHECK-LABEL: {{^}}bar
+CHECK-DAG: {{^}}foo
+CHECK-LABEL: {{^}}zed
+
+ERROR: error: expected string not found in input
+ERROR-NEXT: CHECK-DAG: {{.....}}foo
diff --git a/test/FileCheck/check-label.txt b/test/FileCheck/check-label.txt
new file mode 100644
index 000000000000..27f0beeb2911
--- /dev/null
+++ b/test/FileCheck/check-label.txt
@@ -0,0 +1,51 @@
+; RUN: FileCheck -input-file %s %s -check-prefix=CHECKOK
+; RUN: not FileCheck -input-file %s %s -check-prefix=CHECKFAIL 2>&1 | FileCheck %s -check-prefix=CHECKERROR
+
+label0:
+a
+b
+
+label1:
+b
+c
+
+label2:
+a
+c
+
+; CHECKOK-LABEL: {{^}}label0:
+; CHECKOK: {{^}}a
+; CHECKOK: {{^}}b
+
+; CHECKOK-LABEL: {{^}}label1:
+; CHECKOK: {{^}}b
+; CHECKOK: {{^}}c
+
+; CHECKOK-LABEL: {{^}}label2:
+; CHECKOK: {{^}}a
+; CHECKOK: {{^}}c
+
+; CHECKFAIL-LABEL: {{^}}label0:
+; CHECKFAIL: {{^}}a
+; CHECKFAIL: {{^}}b
+; CHECKFAIL: {{^}}c
+
+; CHECKERROR: expected string not found in input
+; CHECKERROR-NEXT: CHECKFAIL: {{[{][{]\^[}][}]}}c
+
+; CHECKFAIL-LABEL: {{^}}label1:
+; CHECKFAIL: {{^}}a
+; CHECKFAIL: {{^}}b
+; CHECKFAIL: {{^}}c
+
+; CHECKERROR: expected string not found in input
+; CHECKERROR-NEXT: CHECKFAIL: {{[{][{]\^[}][}]}}a
+
+; CHECKFAIL-LABEL: {{^}}label2:
+; CHECKFAIL: {{^}}a
+; CHECKFAIL: {{^}}b
+; CHECKFAIL: {{^}}c
+
+; CHECKERROR: expected string not found in input
+; CHECKERROR-NEXT: CHECKFAIL: {{[{][{]\^[}][}]}}b
+
diff --git a/test/FileCheck/check-multi-prefix-label.txt b/test/FileCheck/check-multi-prefix-label.txt
new file mode 100644
index 000000000000..41fe64151d47
--- /dev/null
+++ b/test/FileCheck/check-multi-prefix-label.txt
@@ -0,0 +1,6 @@
+// RUN: FileCheck -check-prefix=ONE -check-prefix=TWO -input-file %s %s
+
+foo
+bar
+; ONE-LABEL: {{f}}oo
+; TWO-NEXT: {{b}}ar
diff --git a/test/FileCheck/check-multiple-prefixes-mixed.txt b/test/FileCheck/check-multiple-prefixes-mixed.txt
new file mode 100644
index 000000000000..cd3b70a425ec
--- /dev/null
+++ b/test/FileCheck/check-multiple-prefixes-mixed.txt
@@ -0,0 +1,10 @@
+// RUN: FileCheck -check-prefix=B -check-prefix=BOTH -input-file %s %s
+// RUN: FileCheck -check-prefix=A -check-prefix=BOTH -input-file %s %s
+
+; A: {{a}}aaaaa
+; B: {{b}}bbbb
+; BOTH: {{q}}qqqqq
+aaaaaa
+bbbbb
+qqqqqq
+ccccc
diff --git a/test/FileCheck/check-multiple-prefixes-nomatch.txt b/test/FileCheck/check-multiple-prefixes-nomatch.txt
new file mode 100644
index 000000000000..9d3835985f34
--- /dev/null
+++ b/test/FileCheck/check-multiple-prefixes-nomatch.txt
@@ -0,0 +1,10 @@
+; RUN: not FileCheck -input-file %s %s -check-prefix=FOO -check-prefix=BAR 2>&1 | FileCheck %s
+
+BAR
+bar
+foo
+; BAR: ba{{z}}
+; FOO: fo{{o}}
+
+; CHECK: {{error: expected string not found in input}}
+; CHECK-NEXT: {{B}}AR: ba{{[{][{]z[}][}]}}
diff --git a/test/FileCheck/check-multiple-prefixes-substr.txt b/test/FileCheck/check-multiple-prefixes-substr.txt
new file mode 100644
index 000000000000..76a2ca8e75c5
--- /dev/null
+++ b/test/FileCheck/check-multiple-prefixes-substr.txt
@@ -0,0 +1,5 @@
+// RUN: FileCheck -check-prefix=CHECKER -check-prefix=CHECK -input-file %s %s
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=CHECKER -input-file %s %s
+
+foo
+; CHECKER: fo{{o}}
diff --git a/test/FileCheck/check-not-diaginfo.txt b/test/FileCheck/check-not-diaginfo.txt
index a4c3ca884010..44a46a3d25dd 100644
--- a/test/FileCheck/check-not-diaginfo.txt
+++ b/test/FileCheck/check-not-diaginfo.txt
@@ -1,4 +1,4 @@
-; RUN: FileCheck -input-file %s %s 2>&1 | FileCheck -check-prefix DIAG %s
+; RUN: not FileCheck -input-file %s %s 2>&1 | FileCheck -check-prefix DIAG %s
 
 CHECK-NOT: test
 
diff --git a/test/FileCheck/check-prefixes.txt b/test/FileCheck/check-prefixes.txt
new file mode 100644
index 000000000000..fc7a0435eb70
--- /dev/null
+++ b/test/FileCheck/check-prefixes.txt
@@ -0,0 +1,9 @@
+// RUN: FileCheck -check-prefix=ANOTHER-PREFIX -input-file %s %s
+// RUN: not FileCheck -check-prefix=PREFIX -input-file %s %s 2>&1 | FileCheck -check-prefix=CHECK-NONEXISTENT-PREFIX %s
+
+foobar
+; ANOTHER-PREFIX: foobar
+
+; We use regex to match the colon so that FileCheck won't think it is a check
+; prefix.
+; CHECK-NONEXISTENT-PREFIX: error: no check strings found with prefix 'PREFIX{{:}}'
diff --git a/test/FileCheck/check-substring-multi-prefix-2.txt b/test/FileCheck/check-substring-multi-prefix-2.txt
new file mode 100644
index 000000000000..618a2884d4ad
--- /dev/null
+++ b/test/FileCheck/check-substring-multi-prefix-2.txt
@@ -0,0 +1,11 @@
+; RUN: FileCheck -check-prefix=FOO -check-prefix=FOOBAR -check-prefix=BARFOO -input-file %s %s
+; RUN: FileCheck -check-prefix=FOOBAR -check-prefix=FOO -check-prefix=BARFOO -input-file %s %s
+; RUN: FileCheck -check-prefix=FOOBAR -check-prefix=BARFOO -check-prefix=FOO -input-file %s %s
+
+this is the match
+this is another
+
+FOO
+FOOBAR
+FOOBAR: this is the {{match}}
+BARFOO: this is {{another}}
diff --git a/test/FileCheck/check-substring-multi-prefix.txt b/test/FileCheck/check-substring-multi-prefix.txt
new file mode 100644
index 000000000000..b7edb8b530e5
--- /dev/null
+++ b/test/FileCheck/check-substring-multi-prefix.txt
@@ -0,0 +1,9 @@
+// RUN: FileCheck -check-prefix=AAAOVERLAP -check-prefix=OVERLAP -input-file %s %s
+
+foo
+bar
+buzz
+
+OVERLAP: foo
+AAAOVERLAP: bar
+OVERLAP: buzz
diff --git a/test/FileCheck/first-character-match.txt b/test/FileCheck/first-character-match.txt
new file mode 100644
index 000000000000..4b09c21df58d
--- /dev/null
+++ b/test/FileCheck/first-character-match.txt
@@ -0,0 +1,2 @@
+RUN: FileCheck -check-prefix=RUN -input-file %s %s
+// Prefix is at the first character in the file. The run line then matches itself.
diff --git a/test/FileCheck/line-count-2.txt b/test/FileCheck/line-count-2.txt
new file mode 100644
index 000000000000..a56ab6dcafd2
--- /dev/null
+++ b/test/FileCheck/line-count-2.txt
@@ -0,0 +1,11 @@
+// RUN: FileCheck -input-file %s %s
+
+something else
+CHECK: {{some}}thing else
+
+foobar
+
+CHECK: {{foo}}bar
+ALMOSTCHECK
+10 wowomg
+CHECK: [[@LINE-1]] {{wow}}omg
diff --git a/test/FileCheck/line-count.txt b/test/FileCheck/line-count.txt
new file mode 100644
index 000000000000..6f91c2050bf8
--- /dev/null
+++ b/test/FileCheck/line-count.txt
@@ -0,0 +1,15 @@
+; RUN: FileCheck  -input-file %s %s
+2
+3 aaa
+4 bbb
+5 ccc
+6 CHECK: [[@LINE-3]] {{a}}aa
+7 CHECK: [[@LINE-3]] {{b}}bb
+8 CHECK: [[@LINE-3]] {{c}}cc
+9 foobar
+10 CHECK: [[@LINE-1]] {{foo}}bar
+11
+12 arst CHECK: [[@LINE]] {{a}}rst
+13
+14
+
diff --git a/test/FileCheck/lit.local.cfg b/test/FileCheck/lit.local.cfg
deleted file mode 100644
index ee25f56231c5..000000000000
--- a/test/FileCheck/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.txt']
diff --git a/test/FileCheck/multiple-missing-prefixes.txt b/test/FileCheck/multiple-missing-prefixes.txt
new file mode 100644
index 000000000000..cb557d9f2605
--- /dev/null
+++ b/test/FileCheck/multiple-missing-prefixes.txt
@@ -0,0 +1,9 @@
+// RUN: FileCheck -check-prefix=ANOTHER-PREFIX -input-file %s %s
+// RUN: not FileCheck -check-prefix=PREFIX1 -check-prefix=PREFIX2 -input-file %s %s 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK-NONEXISTENT-PREFIX -check-prefix=ALSO-NONEXISTENT %s
+
+foobar
+; ANOTHER-PREFIX: foobar
+
+; We use regex to match the colon so that FileCheck won't think it is a check
+; prefix.
+; CHECK-NONEXISTENT-PREFIX: error: no check strings found with prefixes 'PREFIX1{{:}}', 'PREFIX2{{:}}'
diff --git a/test/FileCheck/separate-multi-prefix.txt b/test/FileCheck/separate-multi-prefix.txt
new file mode 100644
index 000000000000..5578d7f1b964
--- /dev/null
+++ b/test/FileCheck/separate-multi-prefix.txt
@@ -0,0 +1,7 @@
+// RUN: not FileCheck -check-prefix=SOMEPREFIX -input-file %s %s
+// RUN: FileCheck -check-prefix=ANOTHER -input-file %s %s
+
+asdf
+; SOMEPREFIX: {{t}}his_is_not_asdf
+; ANOTHER: {{a}}sdf
+
diff --git a/test/FileCheck/validate-check-prefix.txt b/test/FileCheck/validate-check-prefix.txt
new file mode 100644
index 000000000000..db3392d58190
--- /dev/null
+++ b/test/FileCheck/validate-check-prefix.txt
@@ -0,0 +1,9 @@
+// RUN: not FileCheck -check-prefix=A! -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
+// RUN: FileCheck -check-prefix=A1a-B_c -input-file %s %s
+// RUN: not FileCheck -check-prefix=REPEAT -check-prefix=REPEAT -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
+// RUN: not FileCheck -check-prefix=VALID -check-prefix=A! -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
+foobar
+; A1a-B_c: foobar
+
+; BAD_PREFIX: Supplied check-prefix is invalid! Prefixes must be
+  unique and start with a letter and contain only alphanumeric characters, hyphens and underscores
diff --git a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index 38168fc2d68d..2c4d82eb1ae7 100644
--- a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -64,8 +64,10 @@ entry:
   ret void
 }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
+!3 = metadata !{metadata !6, metadata !6, i64 0}
 !4 = metadata !{i32 156132, i32 156164, i32 156205, i32 156238, i32 156282, i32 156332, i32 156370, i32 156408, i32 156447, i32 156486, i32 156536, i32 156574, i32 156612, i32 156651, i32 156690, i32 156740, i32 156778, i32 156816, i32 156855, i32 156894, i32 156944, i32 156982, i32 157020, i32 157059, i32 157098, i32 157148, i32 157186, i32 157224, i32 157263, i32 157302, i32 157352, i32 157390, i32 157428, i32 157467, i32 157506, i32 157556, i32 157594, i32 157632, i32 157671, i32 157710, i32 157760, i32 157798, i32 157836, i32 157875, i32 157914, i32 157952, i32 157996, i32 158046, i32 158099, i32 158140, i32 158179, i32 158218, i32 158268, i32 158321, i32 158362, i32 158401, i32 158440, i32 158490, i32 158543, i32 158584, i32 158623, i32 158662, i32 158712, i32 158765, i32 158806, i32 158845, i32 158884, i32 158922, i32 158963, i32 158996, i32 159029, i32 159062, i32 159109, i32 159154, i32 159199, i32 159243, i32 159286, i32 159329, i32 159375, i32 159422, i32 159478, i32 159522, i32 159566}
+!5 = metadata !{metadata !"any pointer", metadata !1}
+!6 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
index b05ed3c77c16..ba763cf03ffc 100644
--- a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
+++ b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
@@ -1,13 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
 
diff --git a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
index da8f54137598..1087c9a58ff3 100644
--- a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
+++ b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 %struct_of_7_bytes_4_aligned = type { i32, i8, i8, i8}
 
-@f = global %struct_of_7_bytes_4_aligned zeroinitializer, align 4
+@f = external global %struct_of_7_bytes_4_aligned , align 4
 
 ; Accessing bytes 4 and 6, not ok to widen to i32 if sanitize_address is set.
 
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index fb32e704af86..6002b9e897d7 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -89,6 +89,25 @@ entry:
 ; CHECK-NOT: = alloca
 ; CHECK: ret void
 
+; Check that asan does not touch allocas with alignment > 32.
+define void @alloca_alignment_test() sanitize_address {
+entry:
+  %x = alloca [10 x i8], align 64
+  %y = alloca [10 x i8], align 128
+  %z = alloca [10 x i8], align 256
+  call void @alloca_test_use([10 x i8]* %x)
+  call void @alloca_test_use([10 x i8]* %y)
+  call void @alloca_test_use([10 x i8]* %z)
+  ret void
+}
+
+; CHECK: define void @alloca_alignment_test()
+; CHECK: = alloca{{.*}} align 64
+; CHECK: = alloca{{.*}} align 128
+; CHECK: = alloca{{.*}} align 256
+; CHECK: ret void
+
+
 define void @LongDoubleTest(x86_fp80* nocapture %a) nounwind uwtable sanitize_address {
 entry:
     store x86_fp80 0xK3FFF8000000000000000, x86_fp80* %a, align 16
diff --git a/test/Instrumentation/AddressSanitizer/coverage.ll b/test/Instrumentation/AddressSanitizer/coverage.ll
new file mode 100644
index 000000000000..47a54c0ef85e
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/coverage.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -asan -asan-coverage=1 -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @foo(i32* %a) sanitize_address {
+entry:
+  ret i32 0
+}
+; CHECK: define i32 @foo(i32* %a) #0 {
+; CHECK: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1
+; CHECK: %1 = icmp eq i8 0, %0
+; CHECK: br i1 %1, label %2, label %3
+; CHECK: call void @__sanitizer_cov(i64 ptrtoint (i32 (i32*)* @foo to i64))
+; CHECK: store atomic i8 1, i8* @__asan_gen_cov_foo monotonic, align 1
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index ec51caeb5868..daf29571c4a6 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -31,17 +31,16 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo", metadata !"clang version 3.3 (trunk 169314)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
+!0 = metadata !{i32 786449, metadata !16, i32 4, metadata !"clang version 3.3 (trunk 169314)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !16, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
 !6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 786689, metadata !5, metadata !"p", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
 !11 = metadata !{i32 1, i32 0, metadata !5, null}
 !12 = metadata !{i32 786688, metadata !13, metadata !"r", metadata !6, i32 2, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [r] [line 2]
@@ -55,7 +54,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ;   CHECK-NOT: DW_TAG_auto_variable
 
 
-!13 = metadata !{i32 786443, metadata !5, i32 1, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc]
+!13 = metadata !{i32 786443, metadata !16, metadata !5, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc]
 !14 = metadata !{i32 2, i32 0, metadata !13, null}
 !15 = metadata !{i32 3, i32 0, metadata !13, null}
 !16 = metadata !{metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo"}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
index 0928c494154e..d4fd93c16477 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
@@ -16,5 +16,5 @@ declare void @_Z3fooPi(i32*)
 ; We create one global string constant for the stack frame above.
 ; It should have unnamed_addr and align 1.
 ; Make sure we don't create any other global constants.
-; CHECK: = private unnamed_addr constant{{.*}}align 1
-; CHECK-NOT: = private unnamed_addr constant
+; CHECK: = internal unnamed_addr constant{{.*}}align 1
+; CHECK-NOT: = internal unnamed_addr constant
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 2c183f523feb..4717277b9afd 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -9,12 +9,73 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: llvm.global_ctors
 ; CHECK: llvm.global_dtors
 
-; CHECK: define internal void @asan.module_ctor
+; Test that we don't instrument global arrays with static initializer
+; indexed with constants in-bounds. But instrument all other cases.
+
+@GlobSt = global [10 x i32] zeroinitializer, align 16  ; static initializer
+@GlobDy = global [10 x i32] zeroinitializer, align 16  ; dynamic initializer
+@GlobEx = external global [10 x i32] , align 16        ; extern initializer
+
+; GlobSt is declared here, and has static initializer -- ok to optimize.
+define i32 @AccessGlobSt_0_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobSt_0_2
+; CHECK-NOT: __asan_report
+; CHECK: ret i32 %0
+}
+
+; GlobSt is accessed out of bounds -- can't optimize
+define i32 @AccessGlobSt_0_12() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 12), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobSt_0_12
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+; GlobSt is accessed with Gep that has non-0 first index -- can't optimize.
+define i32 @AccessGlobSt_1_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 1, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobSt_1_2
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+; GlobDy is declared with dynamic initializer -- can't optimize.
+define i32 @AccessGlobDy_0_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobDy, i64 0, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobDy_0_2
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+; GlobEx is an external global -- can't optimize.
+define i32 @AccessGlobEx_0_2() sanitize_address {
+entry:
+    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobEx, i64 0, i64 2), align 8
+    ret i32 %0
+; CHECK-LABEL: define i32 @AccessGlobEx_0_2
+; CHECK: __asan_report
+; CHECK: ret i32
+}
+
+
+!llvm.asan.dynamically_initialized_globals = !{!0}
+!0 = metadata !{[10 x i32]* @GlobDy}
+
+; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
 ; CHECK: call void @__asan_register_globals
 ; CHECK: ret
 
-; CHECK: define internal void @asan.module_dtor
+; CHECK-LABEL: define internal void @asan.module_dtor
 ; CHECK-NOT: ret
 ; CHECK: call void @__asan_unregister_globals
 ; CHECK: ret
diff --git a/test/Instrumentation/AddressSanitizer/keep-instrumented_functions.ll b/test/Instrumentation/AddressSanitizer/keep-instrumented_functions.ll
new file mode 100644
index 000000000000..ff3bbb047fff
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/keep-instrumented_functions.ll
@@ -0,0 +1,23 @@
+; Test the -asan-keep-uninstrumented-functions flag: FOO should get cloned
+; RUN: opt < %s -asan -asan-keep-uninstrumented-functions -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global i32 0, align 4
+
+define i32 @main() sanitize_address {
+entry:
+  tail call void @FOO(i32* @a)
+  ret i32 0
+}
+
+define void @FOO(i32* nocapture %x) sanitize_address {
+entry:
+  store i32 1, i32* %x, align 4
+  ret void
+}
+
+; main should not be cloned since it is not being instrumented by asan.
+; CHECK-NOT: NOASAN_main
+; CHECK: define void @FOO{{.*}} section "ASAN"
+; CHECK: define void @NOASAN_FOO{{.*}} section "NOASAN"
diff --git a/test/Instrumentation/AddressSanitizer/lifetime-uar.ll b/test/Instrumentation/AddressSanitizer/lifetime-uar.ll
new file mode 100644
index 000000000000..21eaf7f15412
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/lifetime-uar.ll
@@ -0,0 +1,33 @@
+; Test handling of llvm.lifetime intrinsics in UAR mode.
+; RUN: opt < %s -asan -asan-use-after-return -asan-check-lifetime -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+define i32 @basic_test() sanitize_address {
+  ; CHECK-LABEL: define i32 @basic_test()
+
+entry:
+  %retval = alloca i32, align 4
+  %c = alloca i8, align 1
+
+  call void @llvm.lifetime.start(i64 1, i8* %c)
+  ; Memory is unpoisoned at llvm.lifetime.start
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 1)
+
+  store i32 0, i32* %retval
+  store i8 0, i8* %c, align 1
+
+  call void @llvm.lifetime.end(i64 1, i8* %c)
+  ; Memory is poisoned at llvm.lifetime.end
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 1)
+
+  ; No need to unpoison memory at function exit in UAR mode.
+  ; CHECK-NOT: @__asan_unpoison_stack_memory
+  ; CHECK: ret void
+
+  ret i32 0
+}
+
diff --git a/test/Instrumentation/AddressSanitizer/lifetime.ll b/test/Instrumentation/AddressSanitizer/lifetime.ll
index 334872865f1a..d80331e38723 100644
--- a/test/Instrumentation/AddressSanitizer/lifetime.ll
+++ b/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -15,7 +15,7 @@ entry:
   call void @llvm.lifetime.end(i64 -1, i8* %i.ptr)
 
 ; Check that lifetime with no size are ignored.
-; CHECK: @lifetime_no_size
+; CHECK-LABEL: define void @lifetime_no_size()
 ; CHECK-NOT: @__asan_poison_stack_memory
 ; CHECK-NOT: @__asan_unpoison_stack_memory
 ; CHECK: ret void
@@ -24,7 +24,7 @@ entry:
 
 ; Generic case of lifetime analysis.
 define void @lifetime() sanitize_address {
-  ; CHECK: @lifetime
+  ; CHECK-LABEL: define void @lifetime()
 
   ; Regular variable lifetime intrinsics.
   %i = alloca i32, align 4
@@ -62,7 +62,7 @@ define void @lifetime() sanitize_address {
 
 ; Check that arguments of lifetime may come from phi nodes.
 define void @phi_args(i1 %x) sanitize_address {
-  ; CHECK: @phi_args
+  ; CHECK-LABEL: define void @phi_args(i1 %x)
 
 entry:
   %i = alloca i64, align 4
diff --git a/test/Instrumentation/AddressSanitizer/lit.local.cfg b/test/Instrumentation/AddressSanitizer/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Instrumentation/AddressSanitizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/BoundsChecking/lit.local.cfg b/test/Instrumentation/BoundsChecking/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Instrumentation/BoundsChecking/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/BoundsChecking/simple-32.ll b/test/Instrumentation/BoundsChecking/simple-32.ll
new file mode 100644
index 000000000000..38b210f7e236
--- /dev/null
+++ b/test/Instrumentation/BoundsChecking/simple-32.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -bounds-checking -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+
+%struct.s2_packed = type <{ i64, i32, i32, i32, i16, i8 }>
+
+; CHECK-LABEL: @f
+; CHECK-NOT: trap
+define i16 @f() {
+entry:
+  %packed1 = alloca %struct.s2_packed, align 8
+  %gep = getelementptr inbounds %struct.s2_packed* %packed1, i32 0, i32 4
+  %ptr = bitcast i16* %gep to i32*
+  %val = load i32* %ptr, align 4
+  %valt = trunc i32 %val to i16
+  ret i16 %valt
+}
+
+; CHECK-LABEL: @f
+; CHECK: call void @llvm.trap()
+define i16 @f2() {
+entry:
+  %packed1 = alloca %struct.s2_packed, align 8
+  %gep = getelementptr inbounds %struct.s2_packed* %packed1, i32 0, i32 4
+  %ptr = bitcast i16* %gep to i48*
+  %val = load i48* %ptr, align 4
+  %valt = trunc i48 %val to i16
+  ret i16 %valt
+}
diff --git a/test/Instrumentation/BoundsChecking/simple.ll b/test/Instrumentation/BoundsChecking/simple.ll
index 16870c78a875..72b58f4b0a32 100644
--- a/test/Instrumentation/BoundsChecking/simple.ll
+++ b/test/Instrumentation/BoundsChecking/simple.ll
@@ -126,3 +126,20 @@ define i64 @f12(i64 %x, i64 %y) nounwind {
   %4 = load i64* %3, align 8
   ret i64 %4
 }
+
+; PR17402
+; CHECK-LABEL: @f13
+define void @f13() nounwind {
+entry:
+  br label %alive
+
+dead:
+  ; Self-refential GEPs can occur in dead code.
+  %incdec.ptr = getelementptr inbounds i32* %incdec.ptr, i64 1
+  ; CHECK: %incdec.ptr = getelementptr inbounds i32* %incdec.ptr
+  %l = load i32* %incdec.ptr
+  br label %alive
+
+alive:
+  ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt b/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
new file mode 100644
index 000000000000..97ce5e60a544
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/Inputs/abilist.txt
@@ -0,0 +1,8 @@
+fun:discard*=uninstrumented
+fun:discard*=discard
+
+fun:functional=uninstrumented
+fun:functional=functional
+
+fun:custom*=uninstrumented
+fun:custom*=custom
diff --git a/test/Instrumentation/DataFlowSanitizer/abilist.ll b/test/Instrumentation/DataFlowSanitizer/abilist.ll
new file mode 100644
index 000000000000..66ddc140a082
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/abilist.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -dfsan -dfsan-args-abi -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK: i32 @discard(i32 %a, i32 %b)
+define i32 @discard(i32 %a, i32 %b) {
+  ret i32 0
+}
+
+; CHECK: i32 @functional(i32 %a, i32 %b)
+define i32 @functional(i32 %a, i32 %b) {
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+declare void @custom1(i32 %a, i32 %b)
+
+declare i32 @custom2(i32 %a, i32 %b)
+
+declare void @customcb(i32 (i32)* %cb)
+
+declare i32 @cb(i32)
+
+; CHECK: @"dfs$f"
+define void @f() {
+  ; CHECK: %[[LABELRETURN:.*]] = alloca i16
+
+  ; CHECK: call void @__dfsw_custom1(i32 1, i32 2, i16 0, i16 0)
+  call void @custom1(i32 1, i32 2)
+
+  ; CHECK: call i32 @__dfsw_custom2(i32 1, i32 2, i16 0, i16 0, i16* %[[LABELRETURN]])
+  call i32 @custom2(i32 1, i32 2)
+
+  ; CHECK: call void @__dfsw_customcb({{.*}} @"dfst0$customcb", i8* bitcast ({{.*}} @"dfs$cb" to i8*), i16 0)
+  call void @customcb(i32 (i32)* @cb)
+
+  ret void
+}
+
+; CHECK: define i32 (i32, i32)* @discardg(i32)
+; CHECK: %[[CALL:.*]] = call { i32 (i32, i32)*, i16 } @"dfs$g"(i32 %0, i16 0)
+; CHECK: %[[XVAL:.*]] = extractvalue { i32 (i32, i32)*, i16 } %[[CALL]], 0
+; CHECK: ret {{.*}} %[[XVAL]]
+@discardg = alias i32 (i32, i32)* (i32)* @g
+
+; CHECK: define linkonce_odr { i32, i16 } @"dfsw$custom2"(i32, i32, i16, i16)
+; CHECK: %[[LABELRETURN2:.*]] = alloca i16
+; CHECK: %[[RV:.*]] = call i32 @__dfsw_custom2
+; CHECK: %[[RVSHADOW:.*]] = load i16* %[[LABELRETURN2]]
+; CHECK: insertvalue {{.*}}[[RV]], 0
+; CHECK: insertvalue {{.*}}[[RVSHADOW]], 1
+; CHECK: ret { i32, i16 }
+
+; CHECK: @"dfs$g"
+define i32 (i32, i32)* @g(i32) {
+  ; CHECK: ret {{.*}} @"dfsw$custom2"
+  ret i32 (i32, i32)* @custom2
+}
+
+; CHECK: define { i32, i16 } @"dfs$adiscard"(i32, i32, i16, i16)
+; CHECK: %[[CALL:.*]] = call i32 @discard(i32 %0, i32 %1)
+; CHECK: %[[IVAL0:.*]] = insertvalue { i32, i16 } undef, i32 %[[CALL]], 0
+; CHECK: %[[IVAL1:.*]] = insertvalue { i32, i16 } %[[IVAL0]], i16 0, 1
+; CHECK: ret { i32, i16 } %[[IVAL1]]
+@adiscard = alias i32 (i32, i32)* @discard
+
+; CHECK: declare void @__dfsw_custom1(i32, i32, i16, i16)
+; CHECK: declare i32 @__dfsw_custom2(i32, i32, i16, i16, i16*)
+
+; CHECK-LABEL: define linkonce_odr i32 @"dfst0$customcb"(i32 (i32)*, i32, i16, i16*)
+; CHECK: %[[BC:.*]] = bitcast i32 (i32)* %0 to { i32, i16 } (i32, i16)*
+; CHECK: %[[CALL:.*]] = call { i32, i16 } %[[BC]](i32 %1, i16 %2)
+; CHECK: %[[XVAL0:.*]] = extractvalue { i32, i16 } %[[CALL]], 0
+; CHECK: %[[XVAL1:.*]] = extractvalue { i32, i16 } %[[CALL]], 1
+; CHECK: store i16 %[[XVAL1]], i16* %3
+; CHECK: ret i32 %[[XVAL0]]
diff --git a/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll b/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll
new file mode 100644
index 000000000000..a699f7523c1a
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -dfsan -verify -dfsan-args-abi -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @"dfs$unreachable_bb1"
+define i8 @unreachable_bb1() {
+  ; CHECK: ret { i8, i16 } { i8 1, i16 0 }
+  ; CHECK-NOT: bb2:
+  ; CHECK-NOT: bb3:
+  ; CHECK-NOT: bb4:
+  ret i8 1
+
+bb2:
+  ret i8 2
+
+bb3:
+  br label %bb4
+
+bb4:
+  br label %bb3
+}
+
+declare void @abort() noreturn
+
+; CHECK-LABEL: @"dfs$unreachable_bb2"
+define i8 @unreachable_bb2() {
+  call void @abort() noreturn
+  ; CHECK-NOT: i8 12
+  ; CHECK: unreachable
+  ret i8 12
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/arith.ll b/test/Instrumentation/DataFlowSanitizer/arith.ll
new file mode 100644
index 000000000000..dc618963e8bb
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/arith.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @add(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$add"
+  ; CHECK-DAG: %[[ALABEL:.*]] = load{{.*}}__dfsan_arg_tls, i64 0, i64 0
+  ; CHECK-DAG: %[[BLABEL:.*]] = load{{.*}}__dfsan_arg_tls, i64 0, i64 1
+  ; CHECK: %[[UNION:.*]] = call{{.*}}__dfsan_union(i16 zeroext %[[ALABEL]], i16 zeroext %[[BLABEL]])
+  ; CHECK: %[[ADDLABEL:.*]] = phi i16 [ %[[UNION]], {{.*}} ], [ %[[ALABEL]], {{.*}} ]
+  ; CHECK: add i8
+  ; CHECK: store i16 %[[ADDLABEL]], i16* @__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = add i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @sub(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$sub"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: sub i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = sub i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @mul(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$mul"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: mul i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = mul i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @sdiv(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$sdiv"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: sdiv i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = sdiv i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @udiv(i8 %a, i8 %b) {
+  ; CHECK: @"dfs$udiv"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: udiv i8
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %c = udiv i8 %a, %b
+  ret i8 %c
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/call.ll b/test/Instrumentation/DataFlowSanitizer/call.ll
new file mode 100644
index 000000000000..813f4c1e76fa
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/call.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [64 x i16]
+; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global i16
+
+declare i32 @f(i32)
+declare float @llvm.sqrt.f32(float)
+
+; CHECK: @"dfs$call"
+define i32 @call() {
+  ; CHECK: store{{.*}}__dfsan_arg_tls
+  ; CHECK: call{{.*}}@"dfs$f"
+  ; CHECK: load{{.*}}__dfsan_retval_tls
+  %r = call i32 @f(i32 0)
+
+  ; CHECK-NOT: store{{.*}}__dfsan_arg_tls
+  %i = call float @llvm.sqrt.f32(float -1.0)
+
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i32
+  ret i32 %r
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll b/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
new file mode 100644
index 000000000000..6bcd5c5f0c18
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -dfsan -dfsan-args-abi -dfsan-debug-nonzero-labels -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare i32 @g()
+
+; CHECK: define { i32, i16 } @"dfs$f"(i32, i16)
+define i32 @f(i32) {
+  ; CHECK: [[LOCALLABELALLOCA:%.*]] = alloca i16
+  ; CHECK: [[ARGCMP:%.*]] = icmp ne i16 %1, 0
+  ; CHECK: br i1 [[ARGCMP]]
+  %i = alloca i32
+  store i32 %0, i32* %i
+  ; CHECK: [[CALL:%.*]] = call { i32, i16 } @"dfs$g"()
+  ; CHECK: [[CALLLABEL:%.*]] = extractvalue { i32, i16 } [[CALL]], 1
+  ; CHECK: [[CALLCMP:%.*]] = icmp ne i16 [[CALLLABEL]], 0
+  ; CHECK: br i1 [[CALLCMP]]
+  %call = call i32 @g()
+  ; CHECK: [[LOCALLABEL:%.*]] = load i16* [[LOCALLABELALLOCA]]
+  ; CHECK: [[LOCALCMP:%.*]] = icmp ne i16 [[LOCALLABEL]], 0
+  ; CHECK: br i1 [[LOCALCMP]]
+  %load = load i32* %i
+  ret i32 %load
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/load.ll b/test/Instrumentation/DataFlowSanitizer/load.ll
new file mode 100644
index 000000000000..6431213f8be5
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/load.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @load8(i8* %p) {
+  ; CHECK: @"dfs$load8"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: load
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i8
+  %a = load i8* %p
+  ret i8 %a
+}
+
+define i16 @load16(i16* %p) {
+  ; CHECK: @"dfs$load16"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: load
+  ; CHECK: load
+  ; CHECK: icmp ne
+  ; CHECK: call{{.*}}__dfsan_union
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i16
+  %a = load i16* %p
+  ret i16 %a
+}
+
+define i32 @load32(i32* %p) {
+  ; CHECK: @"dfs$load32"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: bitcast
+  ; CHECK: load
+  ; CHECK: trunc
+  ; CHECK: shl
+  ; CHECK: lshr
+  ; CHECK: or
+  ; CHECK: icmp eq
+
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i32
+
+  ; CHECK: call{{.*}}__dfsan_union_load
+
+  %a = load i32* %p
+  ret i32 %a
+}
+
+define i64 @load64(i64* %p) {
+  ; CHECK: @"dfs$load64"
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: bitcast
+  ; CHECK: load
+  ; CHECK: trunc
+  ; CHECK: shl
+  ; CHECK: lshr
+  ; CHECK: or
+  ; CHECK: icmp eq
+
+  ; CHECK: store{{.*}}__dfsan_retval_tls
+  ; CHECK: ret i64
+
+  ; CHECK: call{{.*}}__dfsan_union_load
+
+  ; CHECK: getelementptr
+  ; CHECK: load
+  ; CHECK: icmp eq
+
+  %a = load i64* %p
+  ret i64 %a
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/memset.ll b/test/Instrumentation/DataFlowSanitizer/memset.ll
new file mode 100644
index 000000000000..062ef1ac9f49
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/memset.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -dfsan -dfsan-args-abi -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+define void @ms(i8* %p, i8 %v) {
+  ; CHECK-LABEL: @"dfs$ms"(i8*, i8, i16, i16)
+  ; CHECK: call void @__dfsan_set_label(i16 %3, i8* %0, i64 1)
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 %v, i64 1, i32 1, i1 1)
+  ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
new file mode 100644
index 000000000000..1a5646074d21
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+; RUN: opt < %s -dfsan -dfsan-args-abi -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK: module asm ".symver dfs$f1,dfs$f@@version1"
+module asm ".symver f1,f@@version1"
+
+; CHECK: @"dfs$f2" = alias {{.*}} @"dfs$f1"
+@f2 = alias void ()* @f1
+
+; CHECK: define void @"dfs$f1"
+define void @f1() {
+  ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/store.ll b/test/Instrumentation/DataFlowSanitizer/store.ll
new file mode 100644
index 000000000000..95091777a326
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @store8(i8 %v, i8* %p) {
+  ; CHECK: @"dfs$store8"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i8 %v, i8* %p
+  ret void
+}
+
+define void @store16(i16 %v, i16* %p) {
+  ; CHECK: @"dfs$store16"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i16 %v, i16* %p
+  ret void
+}
+
+define void @store32(i32 %v, i32* %p) {
+  ; CHECK: @"dfs$store32"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i32 %v, i32* %p
+  ret void
+}
+
+define void @store64(i64 %v, i64* %p) {
+  ; CHECK: @"dfs$store64"
+  ; CHECK: load{{.*}}__dfsan_arg_tls
+  ; CHECK: ptrtoint
+  ; CHECK: and
+  ; CHECK: mul
+  ; CHECK: inttoptr
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: insertelement
+  ; CHECK: bitcast
+  ; CHECK: getelementptr
+  ; CHECK: store
+  ; CHECK: store
+  store i64 %v, i64* %p
+  ret void
+}
diff --git a/test/Instrumentation/MemorySanitizer/X86/vararg.ll b/test/Instrumentation/MemorySanitizer/X86/vararg.ll
new file mode 100644
index 000000000000..518c3dbf41d3
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/X86/vararg.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S
+; Test that code using va_start can be compiled on i386.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+define void @VaStart(i8* %s, ...) {
+entry:
+  %vl = alloca i8*, align 4
+  %vl1 = bitcast i8** %vl to i8*
+  call void @llvm.va_start(i8* %vl1)
+  ret void
+}
+
+declare void @llvm.va_start(i8*)
diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll
new file mode 100644
index 000000000000..ff0245262cb3
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -0,0 +1,189 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; atomicrmw xchg: store clean shadow, return clean shadow
+
+define i32 @AtomicRmwXchg(i32* %p, i32 %x) sanitize_memory {
+entry:
+  %0 = atomicrmw xchg i32* %p, i32 %x seq_cst
+  ret i32 %0
+}
+
+; CHECK: @AtomicRmwXchg
+; CHECK: store i32 0,
+; CHECK: atomicrmw xchg {{.*}} seq_cst
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomicrmw max: exactly the same as above
+
+define i32 @AtomicRmwMax(i32* %p, i32 %x) sanitize_memory {
+entry:
+  %0 = atomicrmw max i32* %p, i32 %x seq_cst
+  ret i32 %0
+}
+
+; CHECK: @AtomicRmwMax
+; CHECK: store i32 0,
+; CHECK: atomicrmw max {{.*}} seq_cst
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; cmpxchg: the same as above, but also check %a shadow
+
+define i32 @Cmpxchg(i32* %p, i32 %a, i32 %b) sanitize_memory {
+entry:
+  %0 = cmpxchg i32* %p, i32 %a, i32 %b seq_cst
+  ret i32 %0
+}
+
+; CHECK: @Cmpxchg
+; CHECK: store i32 0,
+; CHECK: icmp
+; CHECK: br
+; CHECK: @__msan_warning
+; CHECK: cmpxchg {{.*}} seq_cst
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; relaxed cmpxchg: bump up to "release"
+
+define i32 @CmpxchgMonotonic(i32* %p, i32 %a, i32 %b) sanitize_memory {
+entry:
+  %0 = cmpxchg i32* %p, i32 %a, i32 %b monotonic
+  ret i32 %0
+}
+
+; CHECK: @CmpxchgMonotonic
+; CHECK: store i32 0,
+; CHECK: icmp
+; CHECK: br
+; CHECK: @__msan_warning
+; CHECK: cmpxchg {{.*}} release
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load: preserve alignment, load shadow value after app value
+
+define i32 @AtomicLoad(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p seq_cst, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoad
+; CHECK: load atomic i32* {{.*}} seq_cst, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load: preserve alignment, load shadow value after app value
+
+define i32 @AtomicLoadAcquire(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p acquire, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoadAcquire
+; CHECK: load atomic i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load monotonic: bump up to load acquire
+
+define i32 @AtomicLoadMonotonic(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p monotonic, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoadMonotonic
+; CHECK: load atomic i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic load unordered: bump up to load acquire
+
+define i32 @AtomicLoadUnordered(i32* %p) sanitize_memory {
+entry:
+  %0 = load atomic i32* %p unordered, align 16
+  ret i32 %0
+}
+
+; CHECK: @AtomicLoadUnordered
+; CHECK: load atomic i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+
+; atomic store: preserve alignment, store clean shadow value before app value
+
+define void @AtomicStore(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p seq_cst, align 16
+  ret void
+}
+
+; CHECK: @AtomicStore
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p seq_cst, align 16
+; CHECK: ret void
+
+
+; atomic store: preserve alignment, store clean shadow value before app value
+
+define void @AtomicStoreRelease(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p release, align 16
+  ret void
+}
+
+; CHECK: @AtomicStoreRelease
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p release, align 16
+; CHECK: ret void
+
+
+; atomic store monotonic: bumped up to store release
+
+define void @AtomicStoreMonotonic(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p monotonic, align 16
+  ret void
+}
+
+; CHECK: @AtomicStoreMonotonic
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p release, align 16
+; CHECK: ret void
+
+
+; atomic store unordered: bumped up to store release
+
+define void @AtomicStoreUnordered(i32* %p, i32 %x) sanitize_memory {
+entry:
+  store atomic i32 %x, i32* %p unordered, align 16
+  ret void
+}
+
+; CHECK: @AtomicStoreUnordered
+; CHECK-NOT: @__msan_param_tls
+; CHECK: store i32 0, i32* {{.*}}, align 16
+; CHECK: store atomic i32 %x, i32* %p release, align 16
+; CHECK: ret void
diff --git a/test/Instrumentation/MemorySanitizer/lit.local.cfg b/test/Instrumentation/MemorySanitizer/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Instrumentation/MemorySanitizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 1e7a31793dea..72a992dd5901 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -1,12 +1,25 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
 ; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK-ORIGINS %s
+; RUN: opt < %s -msan -msan-check-access-address=1 -S | FileCheck %s -check-prefix=CHECK-AA
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 ; Check the presence of __msan_init
 ; CHECK: @llvm.global_ctors {{.*}} @__msan_init
 
-; Check the presence and the linkage type of __msan_track_origins
-; CHECK: @__msan_track_origins = weak_odr constant i32 0
+; Check the presence and the linkage type of __msan_track_origins and
+; other interface symbols.
+; CHECK-NOT: @__msan_track_origins
+; CHECK-ORIGINS: @__msan_track_origins = weak_odr constant i32 1
+; CHECK-NOT: @__msan_keep_going = weak_odr constant i32 0
+; CHECK: @__msan_retval_tls = external thread_local(initialexec) global [{{.*}}]
+; CHECK: @__msan_retval_origin_tls = external thread_local(initialexec) global i32
+; CHECK: @__msan_param_tls = external thread_local(initialexec) global [{{.*}}]
+; CHECK: @__msan_param_origin_tls = external thread_local(initialexec) global [{{.*}}]
+; CHECK: @__msan_va_arg_tls = external thread_local(initialexec) global [{{.*}}]
+; CHECK: @__msan_va_arg_overflow_size_tls = external thread_local(initialexec) global i64
+; CHECK: @__msan_origin_tls = external thread_local(initialexec) global i32
 
 
 ; Check instrumentation of stores
@@ -247,6 +260,8 @@ entry:
 
 ; CHECK: @Select
 ; CHECK: select
+; CHECK-NEXT: sext i1 {{.*}} to i32
+; CHECK-NEXT: or i32
 ; CHECK-NEXT: select
 ; CHECK: ret i32
 
@@ -261,6 +276,13 @@ entry:
   ret <8 x i16> %cond
 }
 
+; CHECK: @SelectVector
+; CHECK: select <8 x i1>
+; CHECK-NEXT: sext <8 x i1> {{.*}} to <8 x i16>
+; CHECK-NEXT: or <8 x i16>
+; CHECK-NEXT: select <8 x i1>
+; CHECK: ret <8 x i16>
+
 ; CHECK-ORIGINS: @SelectVector
 ; CHECK-ORIGINS: bitcast <8 x i1> {{.*}} to i8
 ; CHECK-ORIGINS: icmp ne i8
@@ -268,6 +290,38 @@ entry:
 ; CHECK-ORIGINS: ret <8 x i16>
 
 
+; Check that we propagate origin for "select" with scalar condition and vector
+; arguments. Select condition shadow is sign-extended to the vector type and
+; mixed into the result shadow.
+
+define <8 x i16> @SelectVector2(<8 x i16> %a, <8 x i16> %b, i1 %c) nounwind uwtable readnone sanitize_memory {
+entry:
+  %cond = select i1 %c, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %cond
+}
+
+; CHECK: @SelectVector2
+; CHECK: select i1
+; CHECK: sext i1 {{.*}} to i128
+; CHECK: bitcast i128 {{.*}} to <8 x i16>
+; CHECK: or <8 x i16>
+; CHECK: select i1
+; CHECK: ret <8 x i16>
+
+
+define { i64, i64 } @SelectStruct(i1 zeroext %x, { i64, i64 } %a, { i64, i64 } %b) readnone sanitize_memory {
+entry:
+  %c = select i1 %x, { i64, i64 } %a, { i64, i64 } %b
+  ret { i64, i64 } %c
+}
+
+; CHECK: @SelectStruct
+; CHECK: select i1 {{.*}}, { i64, i64 }
+; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
+; CHECK-NEXT: select i1 {{.*}}, { i64, i64 }
+; CHECK: ret { i64, i64 }
+
+
 define i8* @IntToPtr(i64 %x) nounwind uwtable readnone sanitize_memory {
 entry:
   %0 = inttoptr i64 %x to i8*
@@ -407,8 +461,8 @@ define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
 }
 
 ; CHECK: @ShadowLoadAlignmentLarge
-; CHECK: load i32* {{.*}} align 64
 ; CHECK: load volatile i32* {{.*}} align 64
+; CHECK: load i32* {{.*}} align 64
 ; CHECK: ret i32
 
 define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
@@ -418,14 +472,14 @@ define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
 }
 
 ; CHECK: @ShadowLoadAlignmentSmall
-; CHECK: load i32* {{.*}} align 2
 ; CHECK: load volatile i32* {{.*}} align 2
+; CHECK: load i32* {{.*}} align 2
 ; CHECK: ret i32
 
 ; CHECK-ORIGINS: @ShadowLoadAlignmentSmall
+; CHECK-ORIGINS: load volatile i32* {{.*}} align 2
 ; CHECK-ORIGINS: load i32* {{.*}} align 2
 ; CHECK-ORIGINS: load i32* {{.*}} align 4
-; CHECK-ORIGINS: load volatile i32* {{.*}} align 2
 ; CHECK-ORIGINS: ret i32
 
 
@@ -565,8 +619,8 @@ define <8 x i8*> @VectorOfPointers(<8 x i8*>* %p) nounwind uwtable sanitize_memo
 }
 
 ; CHECK: @VectorOfPointers
-; CHECK: load <8 x i64>*
 ; CHECK: load <8 x i8*>*
+; CHECK: load <8 x i64>*
 ; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
 ; CHECK: ret <8 x i8*>
 
@@ -584,6 +638,31 @@ define void @VACopy(i8* %p1, i8* %p2) nounwind uwtable sanitize_memory {
 ; CHECK: ret void
 
 
+; Test that va_start instrumentation does not use va_arg_tls*.
+; It should work with a local stack copy instead.
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+declare void @llvm.va_start(i8*) nounwind
+
+; Function Attrs: nounwind uwtable
+define void @VAStart(i32 %x, ...) {
+entry:
+  %x.addr = alloca i32, align 4
+  %va = alloca [1 x %struct.__va_list_tag], align 16
+  store i32 %x, i32* %x.addr, align 4
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag]* %va, i32 0, i32 0
+  %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  ret void
+}
+
+; CHECK: @VAStart
+; CHECK: call void @llvm.va_start
+; CHECK-NOT: @__msan_va_arg_tls
+; CHECK-NOT: @__msan_va_arg_overflow_size_tls
+; CHECK: ret void
+
+
 ; Test handling of volatile stores.
 ; Check that MemorySanitizer does not add a check of the value being stored.
 
@@ -623,3 +702,102 @@ declare void @bar()
 ; CHECK: store {{.*}} @__msan_retval_tls
 ; CHECK-NOT: @__msan_warning
 ; CHECK: ret i32
+
+
+; Test that stack allocations are unpoisoned in functions missing
+; sanitize_memory attribute
+
+define i32 @NoSanitizeMemoryAlloca() {
+entry:
+  %p = alloca i32, align 4
+  %x = call i32 @NoSanitizeMemoryAllocaHelper(i32* %p)
+  ret i32 %x
+}
+
+declare i32 @NoSanitizeMemoryAllocaHelper(i32* %p)
+
+; CHECK: @NoSanitizeMemoryAlloca
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 4, i32 4, i1 false)
+; CHECK: call i32 @NoSanitizeMemoryAllocaHelper(i32*
+; CHECK: ret i32
+
+
+; Test that undef is unpoisoned in functions missing
+; sanitize_memory attribute
+
+define i32 @NoSanitizeMemoryUndef() {
+entry:
+  %x = call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
+  ret i32 %x
+}
+
+declare i32 @NoSanitizeMemoryUndefHelper(i32 %x)
+
+; CHECK: @NoSanitizeMemoryAlloca
+; CHECK: store i32 0, i32* {{.*}} @__msan_param_tls
+; CHECK: call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
+; CHECK: ret i32
+
+
+; Test argument shadow alignment
+
+define <2 x i64> @ArgumentShadowAlignment(i64 %a, <2 x i64> %b) sanitize_memory {
+entry:
+  ret <2 x i64> %b
+}
+
+; CHECK: @ArgumentShadowAlignment
+; CHECK: load <2 x i64>* {{.*}} @__msan_param_tls {{.*}}, align 8
+; CHECK: store <2 x i64> {{.*}} @__msan_retval_tls {{.*}}, align 8
+; CHECK: ret <2 x i64>
+
+
+; Test byval argument shadow alignment
+
+define <2 x i64> @ByValArgumentShadowLargeAlignment(<2 x i64>* byval %p) sanitize_memory {
+entry:
+  %x = load <2 x i64>* %p
+  ret <2 x i64> %x
+}
+
+; CHECK-AA: @ByValArgumentShadowLargeAlignment
+; CHECK-AA: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 16, i32 8, i1 false)
+; CHECK-AA: ret <2 x i64>
+
+
+define i16 @ByValArgumentShadowSmallAlignment(i16* byval %p) sanitize_memory {
+entry:
+  %x = load i16* %p
+  ret i16 %x
+}
+
+; CHECK-AA: @ByValArgumentShadowSmallAlignment
+; CHECK-AA: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 2, i32 2, i1 false)
+; CHECK-AA: ret i16
+
+
+; Test origin propagation for insertvalue
+
+define { i64, i32 } @make_pair_64_32(i64 %x, i32 %y) sanitize_memory {
+entry:
+  %a = insertvalue { i64, i32 } undef, i64 %x, 0
+  %b = insertvalue { i64, i32 } %a, i32 %y, 1
+  ret { i64, i32 } %b
+}
+
+; CHECK-ORIGINS: @make_pair_64_32
+; First element shadow
+; CHECK-ORIGINS: insertvalue { i64, i32 } { i64 -1, i32 -1 }, i64 {{.*}}, 0
+; First element origin
+; CHECK-ORIGINS: icmp ne i64
+; CHECK-ORIGINS: select i1
+; First element app value
+; CHECK-ORIGINS: insertvalue { i64, i32 } undef, i64 {{.*}}, 0
+; Second element shadow
+; CHECK-ORIGINS: insertvalue { i64, i32 } {{.*}}, i32 {{.*}}, 1
+; Second element origin
+; CHECK-ORIGINS: icmp ne i32
+; CHECK-ORIGINS: select i1
+; Second element app value
+; CHECK-ORIGINS: insertvalue { i64, i32 } {{.*}}, i32 {{.*}}, 1
+; CHECK-ORIGINS: ret { i64, i32 }
diff --git a/test/Instrumentation/MemorySanitizer/return_from_main.ll b/test/Instrumentation/MemorySanitizer/return_from_main.ll
new file mode 100644
index 000000000000..81dc88834db1
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/return_from_main.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() sanitize_memory {
+entry:
+  %call = tail call i32 @f()
+  ret i32 %call
+}
+
+declare i32 @f() sanitize_memory
+
+; CHECK: @main
+; CHECK: call i32 @f()
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: br i1
+; CHECK: call void @__msan_warning_noreturn()
+; CHECK: ret i32
diff --git a/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
new file mode 100644
index 000000000000..9425e25bde5b
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
+
+; Single argument vector conversion.
+
+define i32 @test_cvtsd2si(<2 x double> %value) sanitize_memory {
+entry:
+  %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %value)
+  ret i32 %0
+}
+
+; CHECK: @test_cvtsd2si
+; CHECK: [[S:%[_01-9a-z]+]] = extractelement <2 x i64> {{.*}}, i32 0
+; CHECK: icmp ne {{.*}}[[S]], 0
+; CHECK: br
+; CHECK: call void @__msan_warning_noreturn
+; CHECK: call i32 @llvm.x86.sse2.cvtsd2si
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32
+
+; Two-argument vector conversion.
+
+define <2 x double> @test_cvtsi2sd(i32 %a, double %b) sanitize_memory {
+entry:
+  %vec = insertelement <2 x double> undef, double %b, i32 1
+  %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %vec, i32 %a)
+  ret <2 x double> %0
+}
+
+; CHECK: @test_cvtsi2sd
+; CHECK: [[Sa:%[_01-9a-z]+]] = load i32* {{.*}} @__msan_param_tls
+; CHECK: [[Sout0:%[_01-9a-z]+]] = insertelement <2 x i64> <i64 -1, i64 -1>, i64 {{.*}}, i32 1
+; Clear low half of result shadow
+; CHECK: [[Sout:%[_01-9a-z]+]] = insertelement <2 x i64> {{.*}}[[Sout0]], i64 0, i32 0
+; Trap on %a shadow.
+; CHECK: icmp ne {{.*}}[[Sa]], 0
+; CHECK: br
+; CHECK: call void @__msan_warning_noreturn
+; CHECK: call <2 x double> @llvm.x86.sse2.cvtsi2sd
+; CHECK: store <2 x i64> {{.*}}[[Sout]], {{.*}} @__msan_retval_tls
+; CHECK: ret <2 x double>
+
+; x86_mmx packed vector conversion.
+
+define x86_mmx @test_cvtps2pi(<4 x float> %value) sanitize_memory {
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %value)
+  ret x86_mmx %0
+}
+
+; CHECK: @test_cvtps2pi
+; CHECK: extractelement <4 x i32> {{.*}}, i32 0
+; CHECK: extractelement <4 x i32> {{.*}}, i32 1
+; CHECK: [[S:%[_01-9a-z]+]] = or i32
+; CHECK: icmp ne {{.*}}[[S]], 0
+; CHECK: br
+; CHECK: call void @__msan_warning_noreturn
+; CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+; CHECK: store i64 0, {{.*}} @__msan_retval_tls
+; CHECK: ret x86_mmx
diff --git a/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll b/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll
new file mode 100644
index 000000000000..555695d25845
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-wrap-indirect-calls=zzz -msan-wrap-indirect-calls-fast=0 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-wrap-indirect-calls=zzz -msan-wrap-indirect-calls-fast=1 -S | FileCheck -check-prefix=CHECK-FAST %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Test for -msan-wrap-indirect-calls functionality.
+; Replaces indirect call to %f with a call to whatever is returned from the
+; wrapper function.
+
+; This does not depend on the sanitize_memory attribute.
+define i32 @func(i32 (i32, i32)* nocapture %f, i32 %x, i32 %y) {
+entry:
+  %call = tail call i32 %f(i32 %x, i32 %y)
+  ret i32 %call
+}
+
+; CHECK: @func
+; CHECK: bitcast i32 (i32, i32)* %f to void ()*
+; CHECK: call void ()* (void ()*)* @zzz(void ()*
+; CHECK: [[A:%[01-9a-z_.]+]] = bitcast void ()* {{.*}} to i32 (i32, i32)*
+; CHECK: call i32 {{.*}}[[A]](i32 {{.*}}, i32 {{.*}})
+; CHECK: ret i32
+
+; CHECK-FAST: @func
+; CHECK-FAST: bitcast i32 (i32, i32)* %f to void ()*
+; CHECK-FAST-DAG: icmp ult void ()* {{.*}}, bitcast (i32* @__executable_start to void ()*)
+; CHECK-FAST-DAG: icmp uge void ()* {{.*}}, bitcast (i32* @_end to void ()*)
+; CHECK-FAST: or i1
+; CHECK-FAST: br i1
+; CHECK-FAST: call void ()* (void ()*)* @zzz(void ()*
+; CHECK-FAST: br label
+; CHECK-FAST: [[A:%[01-9a-z_.]+]] = phi i32 (i32, i32)* [ %f, %entry ], [ {{.*}} ]
+; CHECK-FAST: call i32 {{.*}}[[A]](i32 {{.*}}, i32 {{.*}})
+; CHECK-FAST: ret i32
diff --git a/test/Instrumentation/ThreadSanitizer/lit.local.cfg b/test/Instrumentation/ThreadSanitizer/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Instrumentation/ThreadSanitizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll b/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll
new file mode 100644
index 000000000000..3949fd50a97d
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; no sanitize_thread attribute here
+define i32 @read_4_bytes(i32* %a) {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: define i32 @read_4_bytes(i32* %a) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK: ret i32 %tmp1
+
+; no sanitize_thread attribute here
+define i32 @read_4_bytes_and_call(i32* %a) {
+entry:
+  call void @foo()
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: define i32 @read_4_bytes_and_call(i32* %a) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = call i8* @llvm.returnaddress(i32 0)
+; CHECK-NEXT:   call void @__tsan_func_entry(i8* %0)
+; CHECK-NEXT:   call void @foo()
+; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   call void @__tsan_func_exit()
+; CHECK-NEXT:   ret i32 %tmp1
+
+declare void @foo()
+
diff --git a/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
index 482362aa7dce..cb6603bc1438 100644
--- a/test/Instrumentation/ThreadSanitizer/read_before_write.ll
+++ b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable {
+define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
 entry:
   %0 = load i32* %ptr, align 4
   %inc = add nsw i32 %0, 1
@@ -14,7 +14,7 @@ entry:
 ; CHECK: __tsan_write
 ; CHECK: ret void
 
-define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable {
+define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
 entry:
   %0 = load i32* %ptr, align 4
   %inc = add nsw i32 %0, 1
diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
index 7b6b94edf1b1..33614a32f9ca 100644
--- a/test/Instrumentation/ThreadSanitizer/read_from_global.ll
+++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 @const_global = external constant i32
-define i32 @read_from_const_global() nounwind uwtable readnone {
+define i32 @read_from_const_global() nounwind uwtable sanitize_thread readnone {
 entry:
   %0 = load i32* @const_global, align 4
   ret i32 %0
@@ -14,7 +14,7 @@ entry:
 ; CHECK: ret i32
 
 @non_const_global = global i32 0, align 4
-define i32 @read_from_non_const_global() nounwind uwtable readonly {
+define i32 @read_from_non_const_global() nounwind uwtable sanitize_thread readonly {
 entry:
   %0 = load i32* @non_const_global, align 4
   ret i32 %0
@@ -25,7 +25,7 @@ entry:
 ; CHECK: ret i32
 
 @const_global_array = external constant [10 x i32]
-define i32 @read_from_const_global_array(i32 %idx) nounwind uwtable readnone {
+define i32 @read_from_const_global_array(i32 %idx) nounwind uwtable sanitize_thread readnone {
 entry:
   %idxprom = sext i32 %idx to i64
   %arrayidx = getelementptr inbounds [10 x i32]* @const_global_array, i64 0, i64 %idxprom
@@ -38,10 +38,10 @@ entry:
 ; CHECK: ret i32
 
 %struct.Foo = type { i32 (...)** }
-define void @call_virtual_func(%struct.Foo* %f) uwtable {
+define void @call_virtual_func(%struct.Foo* %f) uwtable sanitize_thread {
 entry:
   %0 = bitcast %struct.Foo* %f to void (%struct.Foo*)***
-  %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !3
+  %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !2
   %1 = load void (%struct.Foo*)** %vtable, align 8
   call void %1(%struct.Foo* %f)
   ret void
@@ -54,8 +54,6 @@ entry:
 ; CHECK: = load
 ; CHECK: ret void
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"vtable pointer", metadata !2}
-
+!0 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!1 = metadata !{metadata !"vtable pointer", metadata !0}
+!2 = metadata !{metadata !1, metadata !1, i64 0}
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index 19dd45bda230..d449a97a62a1 100644
--- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @read_4_bytes(i32* %a) {
+define i32 @read_4_bytes(i32* %a) sanitize_thread {
 entry:
   %tmp1 = load i32* %a, align 4
   ret i32 %tmp1
@@ -11,7 +11,7 @@ entry:
 
 ; CHECK: @llvm.global_ctors = {{.*}}@__tsan_init
 
-; CHECK: define i32 @read_4_bytes(i32* %a) {
+; CHECK: define i32 @read_4_bytes(i32* %a)
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i32* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_read4(i8* %1)
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_read.ll b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
index 404ca3ffe50f..811ad8d1cf57 100644
--- a/test/Instrumentation/ThreadSanitizer/vptr_read.ll
+++ b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
@@ -2,12 +2,12 @@
 ; Check that vptr reads are treated in a special way.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i8 @Foo(i8* %a) nounwind uwtable {
+define i8 @Foo(i8* %a) nounwind uwtable sanitize_thread {
 entry:
 ; CHECK: call void @__tsan_vptr_read
   %0 = load i8* %a, align 8, !tbaa !0
   ret i8 %0
 }
-!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!0 = metadata !{metadata !2, metadata !2, i64 0}
 !1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-
+!2 = metadata !{metadata !"vtable pointer", metadata !1}
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_update.ll b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
index f31865901b57..95c7bb0e5915 100644
--- a/test/Instrumentation/ThreadSanitizer/vptr_update.ll
+++ b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
@@ -2,12 +2,12 @@
 ; Check that vtable pointer updates are treated in a special way.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @Foo(i8** nocapture %a, i8* %b) nounwind uwtable {
+define void @Foo(i8** nocapture %a, i8* %b) nounwind uwtable sanitize_thread {
 entry:
 ; CHECK: call void @__tsan_vptr_update
   store i8* %b, i8** %a, align 8, !tbaa !0
   ret void
 }
-!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!0 = metadata !{metadata !2, metadata !2, i64 0}
 !1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-
+!2 = metadata !{metadata !"vtable pointer", metadata !1}
diff --git a/test/Integer/lit.local.cfg b/test/Integer/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Integer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/JitListener/lit.local.cfg b/test/JitListener/lit.local.cfg
index a5aa6de182c4..d995820bc3b6 100644
--- a/test/JitListener/lit.local.cfg
+++ b/test/JitListener/lit.local.cfg
@@ -1,11 +1,3 @@
-config.suffixes = ['.ll']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-if not root.llvm_use_intel_jitevents == "ON":
+if not config.root.llvm_use_intel_jitevents == "ON":
     config.unsupported = True
 
diff --git a/test/JitListener/test-common-symbols.ll b/test/JitListener/test-common-symbols.ll
index bc94bda9a410..a389bf7a6bc2 100644
--- a/test/JitListener/test-common-symbols.ll
+++ b/test/JitListener/test-common-symbols.ll
@@ -76,38 +76,38 @@ for.end:                                          ; preds = %for.cond
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build", metadata !"clang version 3.1 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 720913, metadata !34, i32 12, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !34, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !34} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{metadata !14, metadata !15, metadata !17}
-!14 = metadata !{i32 720948, i32 0, null, metadata !"zero_int", metadata !"zero_int", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @zero_int} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 720948, i32 0, null, metadata !"zero_double", metadata !"zero_double", metadata !"", metadata !6, i32 2, metadata !16, i32 0, i32 1, double* @zero_double} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 720932, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 720948, i32 0, null, metadata !"zero_arr", metadata !"zero_arr", metadata !"", metadata !6, i32 3, metadata !18, i32 0, i32 1, [10 x i32]* @zero_arr} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!12 = metadata !{metadata !14, metadata !15, metadata !17}
+!14 = metadata !{i32 720948, i32 0, null, metadata !"zero_int", metadata !"zero_int", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @zero_int, null} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 720948, i32 0, null, metadata !"zero_double", metadata !"zero_double", metadata !"", metadata !6, i32 2, metadata !16, i32 0, i32 1, double* @zero_double, null} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 720932, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 720948, i32 0, null, metadata !"zero_arr", metadata !"zero_arr", metadata !"", metadata !6, i32 3, metadata !18, i32 0, i32 1, [10 x i32]* @zero_arr, null} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
 !19 = metadata !{metadata !20}
 !20 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
 !21 = metadata !{i32 7, i32 5, metadata !22, null}
-!22 = metadata !{i32 720907, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 720907, metadata !34, metadata !5, i32 6, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 9, i32 5, metadata !22, null}
 !24 = metadata !{i32 10, i32 9, metadata !22, null}
 !25 = metadata !{i32 721152, metadata !26, metadata !"i", metadata !6, i32 12, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 720907, metadata !22, i32 12, i32 5, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 720907, metadata !34, metadata !22, i32 12, i32 5, i32 1} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{i32 12, i32 14, metadata !26, null}
 !28 = metadata !{i32 12, i32 19, metadata !26, null}
 !29 = metadata !{i32 13, i32 9, metadata !30, null}
-!30 = metadata !{i32 720907, metadata !26, i32 12, i32 34, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 720907, metadata !34, metadata !26, i32 12, i32 34, i32 2} ; [ DW_TAG_lexical_block ]
 !31 = metadata !{i32 14, i32 5, metadata !30, null}
 !32 = metadata !{i32 12, i32 29, metadata !26, null}
 !33 = metadata !{i32 15, i32 5, metadata !22, null}
+!34 = metadata !{metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build"}
+!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/JitListener/test-inline.ll b/test/JitListener/test-inline.ll
index ca5d8d6484b8..0d365b1eaa98 100644
--- a/test/JitListener/test-inline.ll
+++ b/test/JitListener/test-inline.ll
@@ -132,53 +132,51 @@ entry:
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!78}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test-inline.cpp", metadata !"/home/akaylor/dev", metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-inline.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !35, metadata !40}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 32, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
-!6 = metadata !{i32 786473, metadata !"test-inline.cpp", metadata !"/home/akaylor/dev", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{i32 786449, metadata !77, i32 4, metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43, null, metadata !""} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-inline.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !35, metadata !40}
+!5 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", i32 32, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
+!6 = metadata !{i32 786473, metadata !77} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !12, metadata !16, metadata !29, metadata !32, metadata !33}
-!9 = metadata !{i32 786468, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!9 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
 !10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
-!11 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!11 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
 !12 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
+!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
 !16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
-!17 = metadata !{i32 786451, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !18, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!17 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [def] [from ]
 !18 = metadata !{metadata !19, metadata !21, metadata !23}
-!19 = metadata !{i32 786445, metadata !17, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !20} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
-!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!21 = metadata !{i32 786445, metadata !17, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !22} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
-!22 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !20, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
-!23 = metadata !{i32 786478, i32 0, metadata !17, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !27, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
-!24 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = metadata !{i32 786445, metadata !77, metadata !17, metadata !"c", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !20} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
+!20 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!21 = metadata !{i32 786445, metadata !77, metadata !17, metadata !"c2", i32 24, i64 16, i64 8, i64 8, i32 0, metadata !22} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
+!22 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !20, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!23 = metadata !{i32 786478, metadata !77, metadata !17, metadata !"char_struct", metadata !"char_struct", metadata !"", i32 22, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !27, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
+!24 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !25 = metadata !{null, metadata !26}
 !26 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
 !27 = metadata !{metadata !28}
 !28 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !29 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !30 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !31} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!31 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!32 = metadata !{i32 786468, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!31 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!32 = metadata !{i32 786468, null, null, metadata !"unsigned short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
 !33 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !34} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
-!34 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!35 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 38, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
-!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!35 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 38, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
+!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !37 = metadata !{metadata !31, metadata !31, metadata !38}
 !38 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !39 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!40 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 27, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
-!41 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 27, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
+!41 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !42 = metadata !{metadata !31}
-!43 = metadata !{metadata !44}
-!44 = metadata !{metadata !45}
-!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !17, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
+!43 = metadata !{metadata !45}
+!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !17, i32 0, i32 1, %struct.char_struct* @compound_char, null} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
 !46 = metadata !{i32 786689, metadata !5, metadata !"pf", metadata !6, i32 16777248, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
 !47 = metadata !{i32 32, i32 0, metadata !5, null}
 !48 = metadata !{i32 786689, metadata !5, metadata !"ppd", metadata !6, i32 33554464, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
@@ -187,21 +185,21 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !51 = metadata !{i32 786689, metadata !5, metadata !"us", metadata !6, i32 83886112, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
 !52 = metadata !{i32 786689, metadata !5, metadata !"l", metadata !6, i32 100663328, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
 !53 = metadata !{i32 786688, metadata !54, metadata !"result", metadata !6, i32 34, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
-!54 = metadata !{i32 786443, metadata !5, i32 33, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!54 = metadata !{i32 786443, metadata !77, metadata !5, i32 33, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
 !55 = metadata !{i32 34, i32 0, metadata !54, null}
 !56 = metadata !{i32 35, i32 0, metadata !54, null}
 !57 = metadata !{i32 29, i32 0, metadata !58, null}
-!58 = metadata !{i32 786443, metadata !40, i32 28, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!58 = metadata !{i32 786443, metadata !77, metadata !40, i32 28, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
 !59 = metadata !{i32 786689, metadata !35, metadata !"argc", metadata !6, i32 16777254, metadata !31, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
 !60 = metadata !{i32 38, i32 0, metadata !35, null}
 !61 = metadata !{i32 786689, metadata !35, metadata !"argv", metadata !6, i32 33554470, metadata !38, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
 !62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
-!63 = metadata !{i32 786443, metadata !35, i32 39, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!63 = metadata !{i32 786443, metadata !77, metadata !35, i32 39, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
 !64 = metadata !{i32 40, i32 0, metadata !63, null}
 !65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
 !66 = metadata !{i32 41, i32 0, metadata !63, null}
 !67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
-!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !69, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
 !69 = metadata !{metadata !15, metadata !15}
 !70 = metadata !{i32 42, i32 0, metadata !63, null}
 !71 = metadata !{i32 44, i32 0, metadata !63, null}
@@ -210,3 +208,5 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
 !75 = metadata !{i32 48, i32 0, metadata !63, null}
 !76 = metadata !{i32 49, i32 0, metadata !63, null}
+!77 = metadata !{metadata !"test-inline.cpp", metadata !"/home/akaylor/dev"}
+!78 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/JitListener/test-parameters.ll b/test/JitListener/test-parameters.ll
index 1e2a2b342aae..7feb6bb65a3f 100644
--- a/test/JitListener/test-parameters.ll
+++ b/test/JitListener/test-parameters.ll
@@ -131,55 +131,53 @@ entry:
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!78}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev", metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-parameters.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !10, metadata !38}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 27, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
-!6 = metadata !{i32 786473, metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = metadata !{i32 786449, metadata !77, i32 4, metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43, null, metadata !""} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-parameters.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10, metadata !38}
+!5 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 27, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
+!6 = metadata !{i32 786473, metadata !77} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 32, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
-!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", i32 32, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
+!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !13, metadata !14, metadata !16, metadata !20, metadata !33, metadata !35, metadata !36}
-!13 = metadata !{i32 786468, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!13 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
 !14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
-!15 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!15 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
 !16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!17 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !13, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
+!17 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !13, metadata !18, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
 !20 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
-!21 = metadata !{i32 786451, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !22, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!21 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [def] [from ]
 !22 = metadata !{metadata !23, metadata !25, metadata !27}
-!23 = metadata !{i32 786445, metadata !21, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
-!24 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!25 = metadata !{i32 786445, metadata !21, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !26} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
-!26 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !24, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
-!27 = metadata !{i32 786478, i32 0, metadata !21, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !28, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !31, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
-!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{i32 786445, metadata !77, metadata !21, metadata !"c", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
+!24 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{i32 786445, metadata !77, metadata !21, metadata !"c2", i32 24, i64 16, i64 8, i64 8, i32 0, metadata !26} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
+!26 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !24, metadata !18, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!27 = metadata !{i32 786478, metadata !77, metadata !21, metadata !"char_struct", metadata !"char_struct", metadata !"", i32 22, metadata !28, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !31, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !30}
 !30 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !21} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
 !31 = metadata !{metadata !32}
 !32 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !33 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !34 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!35 = metadata !{i32 786468, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!35 = metadata !{i32 786468, null, null, metadata !"unsigned short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
 !36 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !37} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
-!37 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!38 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 38, metadata !39, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
-!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!38 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 38, metadata !39, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
+!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !40 = metadata !{metadata !9, metadata !9, metadata !41}
 !41 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !42} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
 !42 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!43 = metadata !{metadata !44}
-!44 = metadata !{metadata !45}
-!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !21, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
+!43 = metadata !{metadata !45}
+!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !21, i32 0, i32 1, %struct.char_struct* @compound_char, null} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
 !46 = metadata !{i32 29, i32 0, metadata !47, null}
-!47 = metadata !{i32 786443, metadata !5, i32 28, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!47 = metadata !{i32 786443, metadata !77, metadata !5, i32 28, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
 !48 = metadata !{i32 786689, metadata !10, metadata !"pf", metadata !6, i32 16777248, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
 !49 = metadata !{i32 32, i32 0, metadata !10, null}
 !50 = metadata !{i32 786689, metadata !10, metadata !"ppd", metadata !6, i32 33554464, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
@@ -188,19 +186,19 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !53 = metadata !{i32 786689, metadata !10, metadata !"us", metadata !6, i32 83886112, metadata !35, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
 !54 = metadata !{i32 786689, metadata !10, metadata !"l", metadata !6, i32 100663328, metadata !36, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
 !55 = metadata !{i32 786688, metadata !56, metadata !"result", metadata !6, i32 34, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
-!56 = metadata !{i32 786443, metadata !10, i32 33, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!56 = metadata !{i32 786443, metadata !77, metadata !10, i32 33, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
 !57 = metadata !{i32 34, i32 0, metadata !56, null}
 !58 = metadata !{i32 35, i32 0, metadata !56, null}
 !59 = metadata !{i32 786689, metadata !38, metadata !"argc", metadata !6, i32 16777254, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
 !60 = metadata !{i32 38, i32 0, metadata !38, null}
 !61 = metadata !{i32 786689, metadata !38, metadata !"argv", metadata !6, i32 33554470, metadata !41, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
 !62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
-!63 = metadata !{i32 786443, metadata !38, i32 39, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!63 = metadata !{i32 786443, metadata !77, metadata !38, i32 39, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
 !64 = metadata !{i32 40, i32 0, metadata !63, null}
 !65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
 !66 = metadata !{i32 41, i32 0, metadata !63, null}
 !67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
-!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !13, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !13, metadata !69, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
 !69 = metadata !{metadata !19, metadata !19}
 !70 = metadata !{i32 42, i32 0, metadata !63, null}
 !71 = metadata !{i32 44, i32 0, metadata !63, null}
@@ -209,3 +207,5 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
 !75 = metadata !{i32 48, i32 0, metadata !63, null}
 !76 = metadata !{i32 49, i32 0, metadata !63, null}
+!77 = metadata !{metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev"}
+!78 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/LTO/cfi_endproc.ll b/test/LTO/cfi_endproc.ll
new file mode 100644
index 000000000000..a5cc649fc863
--- /dev/null
+++ b/test/LTO/cfi_endproc.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s >%t1
+; RUN: llvm-lto -o %t2 %t1
+; RUN: llvm-nm %t2 | FileCheck %s -check-prefix=NOEXPORT
+; RUN: llvm-lto -o %t3 -exported-symbol=main %t1
+; RUN: llvm-nm %t3 | FileCheck %s -check-prefix=EXPORT
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".text"
+module asm ".align 16, 0x90"
+module asm ".type PR14512, @function"
+module asm "PR14512:.cfi_startproc"
+module asm "ret"
+module asm ".cfi_endproc"
+
+declare void @PR14512()
+
+; Without -exported-symbol, main should be eliminated by LTO.
+; With -exported-symbol=main, main should be preserved by LTO.
+define i32 @main(i32 %argc, i8** %argv) {
+; NOEXPORT-NOT: main
+; EXPORT: main
+  call void @PR14512()
+  ret i32 0
+}
+
+; RUN: llvm-lto -o %t -dso-symbol=zed1 -dso-symbol=zed2 %t1 -disable-opt
+; RUN: llvm-nm %t | FileCheck %s -check-prefix=ZED1_AND_ZED2
+; ZED1_AND_ZED2: V zed1
+@zed1 = linkonce_odr global i32 42
+define i32* @get_zed1() {
+  ret i32* @zed1
+}
+
+; ZED1_AND_ZED2: d zed2
+@zed2 = linkonce_odr unnamed_addr global i32 42
diff --git a/test/LTO/linkonce_odr_func.ll b/test/LTO/linkonce_odr_func.ll
new file mode 100644
index 000000000000..8a4932672f6d
--- /dev/null
+++ b/test/LTO/linkonce_odr_func.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s >%t1
+; RUN: llvm-lto -o %t2 -dso-symbol=foo1 -dso-symbol=foo2 -dso-symbol=foo3 \
+; RUN:     -dso-symbol=foo4  %t1 -disable-opt
+; RUN: llvm-nm %t2 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: t foo1
+define linkonce_odr void @foo1() noinline {
+  ret void
+}
+
+; CHECK: W foo2
+define linkonce_odr void @foo2() noinline {
+  ret void
+}
+
+; CHECK: t foo3
+define linkonce_odr void @foo3() noinline {
+  ret void
+}
+
+; CHECK: W foo4
+define linkonce_odr void @foo4() noinline {
+  ret void
+}
+
+declare void @f(void()*)
+
+declare void @p()
+
+define void @bar() {
+bb0:
+  call void @foo1()
+  call void @f(void()* @foo2)
+  invoke void @foo3() to label %bb1 unwind label %clean
+bb1:
+  invoke void @f(void()* @foo4) to label %bb2 unwind label %clean
+bb2:
+  ret void
+clean:
+  landingpad {i32, i32} personality void()* @p cleanup
+  ret void
+}
diff --git a/test/LTO/lit.local.cfg b/test/LTO/lit.local.cfg
new file mode 100644
index 000000000000..6df0e03ee648
--- /dev/null
+++ b/test/LTO/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+  config.unsupported = True
diff --git a/test/LTO/runtime-library.ll b/test/LTO/runtime-library.ll
new file mode 100644
index 000000000000..76fc6f0cc47b
--- /dev/null
+++ b/test/LTO/runtime-library.ll
@@ -0,0 +1,27 @@
+; runtime library implementations should be added to llvm.compiler.used
+; RUN: llvm-as <%s >%t1
+; RUN: llvm-lto -o %t2 %t1
+; RUN: llvm-nm -no-sort %t2 | FileCheck %s -check-prefix=KEEP -check-prefix=LOSE
+
+target triple = "x86_64-apple-darwin9"
+
+; KEEP-LABEL: _puts
+define void @puts() {
+  ret void
+}
+
+; KEEP-LABEL: ___divti3
+define void @__divti3() {
+  ret void
+}
+
+; KEEP-LABEL: _memset
+define void @memset() {
+  ret void
+}
+
+; LOSE-NOT: _myprintf
+define void @myprintf() {
+  ret void
+}
+
diff --git a/test/Linker/2011-08-04-DebugLoc.ll b/test/Linker/2011-08-04-DebugLoc.ll
index 699f0b535464..d26e8cd04c58 100644
--- a/test/Linker/2011-08-04-DebugLoc.ll
+++ b/test/Linker/2011-08-04-DebugLoc.ll
@@ -14,13 +14,18 @@ define i32 @foo() nounwind ssp {
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !10, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!2 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 2, i32 13, metadata !7, null}
-!7 = metadata !{i32 589835, metadata !1, i32 2, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 589835, metadata !8, metadata !1, i32 2, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
+!9 = metadata !{i32 0}
+!10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-04-DebugLoc2.ll b/test/Linker/2011-08-04-DebugLoc2.ll
index f30e1805f40f..c20941d36858 100644
--- a/test/Linker/2011-08-04-DebugLoc2.ll
+++ b/test/Linker/2011-08-04-DebugLoc2.ll
@@ -11,13 +11,18 @@ define i32 @bar() nounwind ssp {
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"b.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @bar, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"b.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !10, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
+!2 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 1, i32 13, metadata !7, null}
-!7 = metadata !{i32 589835, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 589835, metadata !8, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
+!9 = metadata !{i32 0}
+!10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-04-Metadata.ll b/test/Linker/2011-08-04-Metadata.ll
index 952eccc50768..cdf4f6f14367 100644
--- a/test/Linker/2011-08-04-Metadata.ll
+++ b/test/Linker/2011-08-04-Metadata.ll
@@ -15,15 +15,19 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/one.c", metadata !"/Volumes/Lalgate/Slate/D", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0}
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @foo, null, null}
-!2 = metadata !{i32 589865, metadata !"/tmp/one.c", metadata !"/Volumes/Lalgate/Slate/D", metadata !0}
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0}
+!0 = metadata !{i32 589841, metadata !9, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, null, null, metadata !""}
+!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
+!2 = metadata !{i32 589865, metadata !9}
+!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x", metadata !"x", metadata !"", metadata !2, i32 2, metadata !6, i32 1, i32 1, i32* @x}
-!6 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !7 = metadata !{i32 3, i32 14, metadata !8, null}
-!8 = metadata !{i32 589835, metadata !1, i32 3, i32 12, metadata !2, i32 0}
+!8 = metadata !{i32 589835, metadata !9, metadata !1, i32 3, i32 12, i32 0}
+!9 = metadata !{metadata !"/tmp/one.c", metadata !"/Volumes/Lalgate/Slate/D"}
+!10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-04-Metadata2.ll b/test/Linker/2011-08-04-Metadata2.ll
index fa5e7c9a3f76..80884cc70dba 100644
--- a/test/Linker/2011-08-04-Metadata2.ll
+++ b/test/Linker/2011-08-04-Metadata2.ll
@@ -15,15 +15,19 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/two.c", metadata !"/Volumes/Lalgate/Slate/D", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0}
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @bar, null, null}
-!2 = metadata !{i32 589865, metadata !"/tmp/two.c", metadata !"/Volumes/Lalgate/Slate/D", metadata !0}
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0}
+!0 = metadata !{i32 589841, metadata !9, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, null, null, metadata !""}
+!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [bar]
+!2 = metadata !{i32 589865, metadata !9}
+!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x", metadata !"x", metadata !"", metadata !2, i32 1, metadata !6, i32 1, i32 1, i32* @x}
-!6 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !7 = metadata !{i32 2, i32 14, metadata !8, null}
-!8 = metadata !{i32 589835, metadata !1, i32 2, i32 12, metadata !2, i32 0}
+!8 = metadata !{i32 589835, metadata !9, metadata !1, i32 2, i32 12, i32 0}
+!9 = metadata !{metadata !"/tmp/two.c", metadata !"/Volumes/Lalgate/Slate/D"}
+!10 = metadata !{metadata !1}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index 328e83bd07b9..b077f2357892 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -18,20 +18,23 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"n1.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 137954)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !16, i32 4, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooN2N11AE", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_Z3fooN2N11AE, null, null} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"n1.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !6, metadata !"", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !16, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3fooN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
+!6 = metadata !{i32 720937, metadata !16} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 721153, metadata !5, metadata !"mya", metadata !6, i32 16777220, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 720898, metadata !11, metadata !"A", metadata !12, i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null} ; [ DW_TAG_class_type ]
-!11 = metadata !{i32 720953, null, metadata !"N1", metadata !12, i32 2} ; [ DW_TAG_namespace ]
-!12 = metadata !{i32 720937, metadata !"./n.h", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
+!11 = metadata !{i32 720953, metadata !17, null, metadata !"N1", i32 2} ; [ DW_TAG_namespace ]
+!12 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
 !13 = metadata !{i32 4, i32 12, metadata !5, null}
 !14 = metadata !{i32 4, i32 18, metadata !15, null}
-!15 = metadata !{i32 720907, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 720907, metadata !16, metadata !5, i32 4, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{metadata !"n1.c", metadata !"/private/tmp"}
+!17 = metadata !{metadata !"./n.h", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-class-type2.ll b/test/Linker/2011-08-18-unique-class-type2.ll
index 95892a44a5ba..7bfcd919678a 100644
--- a/test/Linker/2011-08-18-unique-class-type2.ll
+++ b/test/Linker/2011-08-18-unique-class-type2.ll
@@ -16,20 +16,23 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"n2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 137954)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !16, i32 4, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"_Z3barN2N11AE", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_Z3barN2N11AE, null, null} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"n2.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !6, metadata !"", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"_Z3barN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3barN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
+!6 = metadata !{i32 720937, metadata !16} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
 !9 = metadata !{i32 721153, metadata !5, metadata !"youra", metadata !6, i32 16777220, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 720898, metadata !11, metadata !"A", metadata !12, i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null} ; [ DW_TAG_class_type ]
-!11 = metadata !{i32 720953, null, metadata !"N1", metadata !12, i32 2} ; [ DW_TAG_namespace ]
-!12 = metadata !{i32 720937, metadata !"./n.h", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
+!11 = metadata !{i32 720953, metadata !17, null, metadata !"N1", i32 2} ; [ DW_TAG_namespace ]
+!12 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
 !13 = metadata !{i32 4, i32 12, metadata !5, null}
 !14 = metadata !{i32 4, i32 20, metadata !15, null}
-!15 = metadata !{i32 720907, metadata !5, i32 4, i32 19, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 720907, metadata !16, metadata !5, i32 4, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{metadata !"n2.c", metadata !"/private/tmp"}
+!17 = metadata !{metadata !"./n.h", metadata !"/private/tmp"}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index cc0df4d0165e..0e14f464bbbe 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -10,17 +10,18 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 137954)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !12, i32 12, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @foo, null, null} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"one.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !6, metadata !"", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
+!6 = metadata !{i32 720937, metadata !12} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 1, i32 13, metadata !11, null}
-!11 = metadata !{i32 720907, metadata !5, i32 1, i32 11, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-
+!11 = metadata !{i32 720907, metadata !12, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/2011-08-18-unique-debug-type2.ll b/test/Linker/2011-08-18-unique-debug-type2.ll
index 986da5b2cffa..1185100b9380 100644
--- a/test/Linker/2011-08-18-unique-debug-type2.ll
+++ b/test/Linker/2011-08-18-unique-debug-type2.ll
@@ -10,16 +10,18 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"two.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 137954)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !12, i32 12, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @bar, null, null} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"two.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !6, metadata !"", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
+!6 = metadata !{i32 720937, metadata !12} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 1, i32 13, metadata !11, null}
-!11 = metadata !{i32 720907, metadata !5, i32 1, i32 11, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 720907, metadata !12, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !"two.c", metadata !"/private/tmp"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
index 7f64f95c3a7f..4cca9d576d65 100644
--- a/test/Linker/DbgDeclare.ll
+++ b/test/Linker/DbgDeclare.ll
@@ -35,24 +35,26 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @test(i32, i8**)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"main.cpp", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 173515)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 173515)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"main.cpp", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !20, null, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 3, i32 0, metadata !5, null}
 !16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554435, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 5, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !5, i32 4, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !20, metadata !5, i32 4, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{i32 6, i32 0, metadata !18, null}
+!20 = metadata !{metadata !"main.cpp", metadata !"/private/tmp"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
index e2e56b289338..2649fccbcab8 100644
--- a/test/Linker/DbgDeclare2.ll
+++ b/test/Linker/DbgDeclare2.ll
@@ -48,29 +48,32 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare i32 @puts(i8*)
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"main.cpp", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 173515)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang version 3.3 (trunk 173515)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"print_args", metadata !"print_args", metadata !"test", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i8**)* @test, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"test.cpp", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !26, null, metadata !"print_args", metadata !"print_args", metadata !"test", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i8**)* @test, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 4, i32 0, metadata !5, null}
 !16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554436, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786688, metadata !18, metadata !"i", metadata !6, i32 6, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786443, metadata !19, i32 6, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 786443, metadata !5, i32 5, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !26, metadata !19, i32 6, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !26, metadata !5, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{i32 6, i32 0, metadata !18, null}
 !21 = metadata !{i32 8, i32 0, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !18, i32 7, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !26, metadata !18, i32 7, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 9, i32 0, metadata !22, null}
 !24 = metadata !{i32 10, i32 0, metadata !19, null}
+!25 = metadata !{metadata !"main.cpp", metadata !"/private/tmp"}
+!26 = metadata !{metadata !"test.cpp", metadata !"/private/tmp"}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-inheritance-a.ll b/test/Linker/Inputs/type-unique-inheritance-a.ll
new file mode 100644
index 000000000000..381210cd3cf2
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-inheritance-a.ll
@@ -0,0 +1,94 @@
+; CHECK: [ DW_TAG_class_type ] [A]
+; CHECK: [ DW_TAG_class_type ] [Base]
+; CHECK: [ DW_TAG_class_type ] [B]
+; CHECK-NOT: DW_TAG_class_type
+; Content of header files:
+; 
+; class Base;
+; class A : Base {
+;   int x;
+; };
+; 
+; class A;
+; class Base {
+;   int b;
+; };
+; 
+; class B {
+;   int bb;
+;   A *a;
+; };
+; Content of foo.cpp:
+; 
+; #include "b.hpp"
+; #include "a.hpp"
+; 
+; void f(int a) {
+;   A t;
+; }
+; Content of bar.cpp:
+; 
+; #include "b.hpp"
+; #include "a.hpp"
+; void g(int a) {
+;   B t;
+; }
+; 
+; void f(int);
+; int main() {
+;   A a;
+;   f(0);
+;   g(1);
+;   return 0;
+; }
+; ModuleID = 'foo.cpp'
+
+%class.A = type { %class.Base, i32 }
+%class.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1fi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %class.A, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !20), !dbg !21
+  call void @llvm.dbg.declare(metadata !{%class.A* %t}, metadata !22), !dbg !23
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !25}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 3, i64 64, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!6 = metadata !{metadata !7, metadata !13}
+!7 = metadata !{i32 786460, null, metadata !"_ZTS1A", null, i32 0, i64 0, i64 0, i64 0, i32 1, metadata !8} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
+!8 = metadata !{i32 786434, metadata !9, null, metadata !"Base", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
+!9 = metadata !{metadata !"./b.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786445, metadata !9, metadata !"_ZTS4Base", metadata !"b", i32 4, i64 32, i64 32, i64 0, i32 1, metadata !12} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
+!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1A", metadata !"x", i32 4, i64 32, i64 32, i64 32, i32 1, metadata !12} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786478, metadata !1, metadata !16, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 5, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [f]
+!16 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{null, metadata !12}
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!20 = metadata !{i32 786689, metadata !15, metadata !"a", metadata !16, i32 16777221, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 5]
+!21 = metadata !{i32 5, i32 0, metadata !15, null}
+!22 = metadata !{i32 786688, metadata !15, metadata !"t", metadata !16, i32 6, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 6]
+!23 = metadata !{i32 6, i32 0, metadata !15, null}
+!24 = metadata !{i32 7, i32 0, metadata !15, null}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-inheritance-b.ll b/test/Linker/Inputs/type-unique-inheritance-b.ll
new file mode 100644
index 000000000000..0cd43f6a9d44
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-inheritance-b.ll
@@ -0,0 +1,81 @@
+; ModuleID = 'bar.cpp'
+
+%class.B = type { i32, %class.A* }
+%class.A = type { %class.Base, i32 }
+%class.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1gi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %class.B, align 8
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !28), !dbg !29
+  call void @llvm.dbg.declare(metadata !{%class.B* %t}, metadata !30), !dbg !31
+  ret void, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: ssp uwtable
+define i32 @main() #2 {
+entry:
+  %retval = alloca i32, align 4
+  %a = alloca %class.A, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !33), !dbg !34
+  call void @_Z1fi(i32 0), !dbg !35
+  call void @_Z1gi(i32 1), !dbg !36
+  ret i32 0, !dbg !37
+}
+
+declare void @_Z1fi(i32) #3
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27, !38}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !19, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !11, metadata !15}
+!4 = metadata !{i32 786434, metadata !5, null, metadata !"B", i32 7, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 7, size 128, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./b.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!6 = metadata !{metadata !7, metadata !9}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"bb", i32 8, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [bb] [line 8, size 32, align 32, offset 0] [private] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"a", i32 9, i64 64, i64 64, i64 64, i32 1, metadata !10} ; [ DW_TAG_member ] [a] [line 9, size 64, align 64, offset 64] [private] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!11 = metadata !{i32 786434, metadata !12, null, metadata !"A", i32 3, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
+!12 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
+!13 = metadata !{metadata !14, metadata !18}
+!14 = metadata !{i32 786460, null, metadata !"_ZTS1A", null, i32 0, i64 0, i64 0, i64 0, i32 1, metadata !15} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
+!15 = metadata !{i32 786434, metadata !5, null, metadata !"Base", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !16, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 4, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
+!18 = metadata !{i32 786445, metadata !12, metadata !"_ZTS1A", metadata !"x", i32 4, i64 32, i64 32, i64 32, i32 1, metadata !8} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
+!19 = metadata !{metadata !20, metadata !24}
+!20 = metadata !{i32 786478, metadata !1, metadata !21, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!21 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp]
+!22 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{null, metadata !8}
+!24 = metadata !{i32 786478, metadata !1, metadata !21, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !25, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [main]
+!25 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = metadata !{metadata !8}
+!27 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!28 = metadata !{i32 786689, metadata !20, metadata !"a", metadata !21, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!29 = metadata !{i32 4, i32 0, metadata !20, null}
+!30 = metadata !{i32 786688, metadata !20, metadata !"t", metadata !21, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!31 = metadata !{i32 5, i32 0, metadata !20, null}
+!32 = metadata !{i32 6, i32 0, metadata !20, null}
+!33 = metadata !{i32 786688, metadata !24, metadata !"a", metadata !21, i32 10, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 10]
+!34 = metadata !{i32 10, i32 0, metadata !24, null}
+!35 = metadata !{i32 11, i32 0, metadata !24, null}
+!36 = metadata !{i32 12, i32 0, metadata !24, null}
+!37 = metadata !{i32 13, i32 0, metadata !24, null}
+!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-simple2-a.ll b/test/Linker/Inputs/type-unique-simple2-a.ll
new file mode 100644
index 000000000000..63470f3f5e8b
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-simple2-a.ll
@@ -0,0 +1,88 @@
+; Make sure the backend generates a single DIE and uses ref_addr.
+; CHECK: 0x[[BASE:.*]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "Base"
+; CHECK-NOT: DW_TAG_structure_type
+; CHECK: 0x[[INT:.*]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "int"
+; CHECK-NOT: DW_TAG_base_type
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[INT]])
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[BASE]])
+
+; Make sure llvm-link only generates a single copy of the struct.
+; LINK: DW_TAG_structure_type
+; LINK-NOT: DW_TAG_structure_type
+
+; Content of header files:
+; struct Base {
+;   int a;
+;   Base *b;
+; };
+; Content of foo.cpp:
+; 
+; #include "a.hpp"
+; void f(int a) {
+;   Base t;
+; }
+; Content of bar.cpp:
+; 
+; #include "a.hpp"
+; void f(int);
+; void g(int a) {
+;   Base t;
+; }
+; int main() {
+;   f(0);
+;   g(1);
+;   return 0;
+; }
+; ModuleID = 'foo.cpp'
+
+%struct.Base = type { i32, %struct.Base* }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1fi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 8
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !17), !dbg !18
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !19), !dbg !20
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !22}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"."}
+!6 = metadata !{metadata !7, metadata !9}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4Base"}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 3, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!13 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [foo.cpp]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{null, metadata !8}
+!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!17 = metadata !{i32 786689, metadata !12, metadata !"a", metadata !13, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!18 = metadata !{i32 3, i32 0, metadata !12, null}
+!19 = metadata !{i32 786688, metadata !12, metadata !"t", metadata !13, i32 4, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 4]
+!20 = metadata !{i32 4, i32 0, metadata !12, null}
+!21 = metadata !{i32 5, i32 0, metadata !12, null}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/Inputs/type-unique-simple2-b.ll b/test/Linker/Inputs/type-unique-simple2-b.ll
new file mode 100644
index 000000000000..f564d81f1bb1
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-simple2-b.ll
@@ -0,0 +1,67 @@
+; ModuleID = 'bar.cpp'
+
+%struct.Base = type { i32, %struct.Base* }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1gi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 8
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !20), !dbg !21
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !22), !dbg !23
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: ssp uwtable
+define i32 @main() #2 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @_Z1fi(i32 0), !dbg !25
+  call void @_Z1gi(i32 1), !dbg !26
+  ret i32 0, !dbg !27
+}
+
+declare void @_Z1fi(i32) #3
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !28}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"."}
+!6 = metadata !{metadata !7, metadata !9}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4Base"}
+!11 = metadata !{metadata !12, metadata !16}
+!12 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!13 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [bar.cpp]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{null, metadata !8}
+!16 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{metadata !8}
+!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!20 = metadata !{i32 786689, metadata !12, metadata !"a", metadata !13, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!21 = metadata !{i32 4, i32 0, metadata !12, null}
+!22 = metadata !{i32 786688, metadata !12, metadata !"t", metadata !13, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!23 = metadata !{i32 5, i32 0, metadata !12, null}
+!24 = metadata !{i32 6, i32 0, metadata !12, null}
+!25 = metadata !{i32 8, i32 0, metadata !16, null} ; [ DW_TAG_imported_declaration ]
+!26 = metadata !{i32 9, i32 0, metadata !16, null}
+!27 = metadata !{i32 10, i32 0, metadata !16, null}
+!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/lit.local.cfg b/test/Linker/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Linker/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/prefixdata.ll b/test/Linker/prefixdata.ll
new file mode 100644
index 000000000000..1f11dc7083eb
--- /dev/null
+++ b/test/Linker/prefixdata.ll
@@ -0,0 +1,9 @@
+; RUN: echo > %t.ll
+; RUN: llvm-link %t.ll %s -S -o - | FileCheck %s
+
+@i = linkonce_odr global i32 1
+
+; CHECK: define void @f() prefix i32* @i
+define void @f() prefix i32* @i {
+  ret void
+}
diff --git a/test/Linker/transitive-lazy-link.ll b/test/Linker/transitive-lazy-link.ll
new file mode 100644
index 000000000000..c1cacae5a9a8
--- /dev/null
+++ b/test/Linker/transitive-lazy-link.ll
@@ -0,0 +1,20 @@
+; @f and @g are lazily linked. @f requires @g - ensure @g is correctly linked.
+
+; RUN: echo "declare i32 @f(i32)" > %t.1.ll
+; RUN: echo "define i32 @h(i32 %x) {" >> %t.1.ll
+; RUN: echo "  %1 = call i32 @f(i32 %x)" >> %t.1.ll
+; RUN: echo "  ret i32 %1" >> %t.1.ll
+; RUN: echo "}" >> %t.1.ll
+; RUN: llvm-as < %t.1.ll > %t.1.bc
+; RUN: llvm-as < %s > %t.2.bc
+; RUN: llvm-link %t.1.bc %t.2.bc
+
+define available_externally i32 @f(i32 %x) {
+  %1 = call i32 @g(i32 %x)
+  ret i32 %1
+}
+
+define available_externally i32 @g(i32 %x) {
+  ret i32 5
+}
+
diff --git a/test/Linker/type-unique-inheritance.ll b/test/Linker/type-unique-inheritance.ll
new file mode 100644
index 000000000000..1ba1b08fae65
--- /dev/null
+++ b/test/Linker/type-unique-inheritance.ll
@@ -0,0 +1 @@
+; RUN: llvm-link %S/Inputs/type-unique-inheritance-a.ll %S/Inputs/type-unique-inheritance-b.ll -S -o - | FileCheck %S/Inputs/type-unique-inheritance-a.ll
diff --git a/test/Linker/type-unique-simple-a.ll b/test/Linker/type-unique-simple-a.ll
new file mode 100644
index 000000000000..4bfdff977d10
--- /dev/null
+++ b/test/Linker/type-unique-simple-a.ll
@@ -0,0 +1,91 @@
+; REQUIRES: object-emission
+
+; RUN: llvm-link %s %p/type-unique-simple-b.ll -S -o %t
+; RUN: cat %t | FileCheck %s -check-prefix=LINK
+; RUN: llc -filetype=obj -O0 < %t > %t2
+; RUN: llvm-dwarfdump -debug-dump=info %t2 | FileCheck %s
+
+; Make sure the backend generates a single DIE and uses ref_addr.
+; CHECK: 0x[[BASE:.*]]: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "Base"
+; CHECK-NOT: DW_TAG_structure_type
+; CHECK: 0x[[INT:.*]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name {{.*}} = "int"
+; CHECK-NOT: DW_TAG_base_type
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[INT]])
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[BASE]])
+
+; Make sure llvm-link only generates a single copy of the struct.
+; LINK: DW_TAG_structure_type
+; LINK-NOT: DW_TAG_structure_type
+; Content of header files:
+; struct Base {
+;   int a;
+; };
+; Content of foo.cpp:
+; 
+; #include "a.hpp"
+; void f(int a) {
+;   Base t;
+; }
+; Content of bar.cpp:
+; 
+; #include "a.hpp"
+; void f(int);
+; void g(int a) {
+;   Base t;
+; }
+; int main() {
+;   f(0);
+;   g(1);
+;   return 0;
+; }
+; ModuleID = 'foo.cpp'
+
+%struct.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1fi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !17), !dbg !18
+  ret void, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14, !20}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 3, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null, metadata !8}
+!14 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!15 = metadata !{i32 786689, metadata !10, metadata !"a", metadata !11, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!16 = metadata !{i32 3, i32 0, metadata !10, null}
+!17 = metadata !{i32 786688, metadata !10, metadata !"t", metadata !11, i32 4, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 4]
+!18 = metadata !{i32 4, i32 0, metadata !10, null}
+!19 = metadata !{i32 5, i32 0, metadata !10, null}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/type-unique-simple-b.ll b/test/Linker/type-unique-simple-b.ll
new file mode 100644
index 000000000000..c46e67f4ff1a
--- /dev/null
+++ b/test/Linker/type-unique-simple-b.ll
@@ -0,0 +1,67 @@
+; RUN: true
+
+; ModuleID = 'bar.cpp'
+
+%struct.Base = type { i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1gi(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %t = alloca %struct.Base, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !18), !dbg !19
+  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !20), !dbg !21
+  ret void, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: ssp uwtable
+define i32 @main() #2 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @_Z1fi(i32 0), !dbg !23
+  call void @_Z1gi(i32 1), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare void @_Z1fi(i32) #3
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"bar.cpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10, metadata !14}
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp]
+!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null, metadata !8}
+!14 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !8}
+!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!18 = metadata !{i32 786689, metadata !10, metadata !"a", metadata !11, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!19 = metadata !{i32 4, i32 0, metadata !10, null}
+!20 = metadata !{i32 786688, metadata !10, metadata !"t", metadata !11, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!21 = metadata !{i32 5, i32 0, metadata !10, null}
+!22 = metadata !{i32 6, i32 0, metadata !10, null}
+!23 = metadata !{i32 8, i32 0, metadata !14, null} ; [ DW_TAG_imported_declaration ]
+!24 = metadata !{i32 9, i32 0, metadata !14, null}
+!25 = metadata !{i32 10, i32 0, metadata !14, null}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Linker/type-unique-simple2.ll b/test/Linker/type-unique-simple2.ll
new file mode 100644
index 000000000000..ead91df6da15
--- /dev/null
+++ b/test/Linker/type-unique-simple2.ll
@@ -0,0 +1,6 @@
+; REQUIRES: object-emission
+
+; RUN: llvm-link %S/Inputs/type-unique-simple2-a.ll %S/Inputs/type-unique-simple2-b.ll -S -o %t
+; RUN: cat %t | FileCheck %S/Inputs/type-unique-simple2-a.ll -check-prefix=LINK
+; RUN: llc -filetype=obj -O0 < %t > %t2
+; RUN: llvm-dwarfdump -debug-dump=info %t2 | FileCheck %S/Inputs/type-unique-simple2-a.ll
diff --git a/test/Linker/unnamed-addr-err-a.ll b/test/Linker/unnamed-addr-err-a.ll
new file mode 100644
index 000000000000..4872098004b5
--- /dev/null
+++ b/test/Linker/unnamed-addr-err-a.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-link %s %p/unnamed-addr-err-b.ll -S -o - 2>&1 | FileCheck %s
+
+@foo = appending unnamed_addr global [1 x i32] [i32 42]
+; CHECK: Appending variables with different unnamed_addr need to be linked
diff --git a/test/Linker/unnamed-addr-err-b.ll b/test/Linker/unnamed-addr-err-b.ll
new file mode 100644
index 000000000000..5e5fed9bb62b
--- /dev/null
+++ b/test/Linker/unnamed-addr-err-b.ll
@@ -0,0 +1,4 @@
+; This file is for use with unnamed-addr-err-a.ll
+; RUN: true
+
+@foo = appending global [1 x i32] [i32 42]
diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll
index e9c03ee14247..adaa40024cfb 100644
--- a/test/Linker/unnamed-addr1-a.ll
+++ b/test/Linker/unnamed-addr1-a.ll
@@ -1,27 +1,46 @@
-; RUN: llvm-link %s %p/unnamed-addr1-b.ll -S -o - | sort | FileCheck %s
+; RUN: llvm-link %s %p/unnamed-addr1-b.ll -S -o - | FileCheck %s
 
 ; Only in this file
-@a = common global i32 0
-; CHECK: @a = common global i32 0
-@b = common unnamed_addr global i32 0
-; CHECK: @b = common unnamed_addr global i32 0
+@global-a = common global i32 0
+; CHECK-DAG: @global-a = common global i32 0
+@global-b = common unnamed_addr global i32 0
+; CHECK-DAG: @global-b = common unnamed_addr global i32 0
+
+define weak void @func-a() { ret void }
+; CHECK-DAG: define weak void @func-a() {
+define weak void @func-b() unnamed_addr { ret void }
+; CHECK-DAG: define weak void @func-b() unnamed_addr {
 
 ; Other file has unnamed_addr definition
-@c = common unnamed_addr global i32 0
-; CHECK: @c = common unnamed_addr global i32 0
-@d = external global i32
-; CHECK: @d = unnamed_addr global i32 42
-@e = external unnamed_addr global i32
-; CHECK: @e = unnamed_addr global i32 42
-@f = weak global i32 42
-; CHECK: @f = unnamed_addr global i32 42
+@global-c = common unnamed_addr global i32 0
+; CHECK-DAG: @global-c = common unnamed_addr global i32 0
+@global-d = external global i32
+; CHECK-DAG: @global-d = global i32 42
+@global-e = external unnamed_addr global i32
+; CHECK-DAG: @global-e = unnamed_addr global i32 42
+@global-f = weak global i32 42
+; CHECK-DAG: @global-f = global i32 42
+
+declare void @func-c()
+; CHECK-DAG: define weak void @func-c() {
+define weak void @func-d() { ret void }
+; CHECK-DAG: define weak void @func-d() {
+define weak void @func-e() unnamed_addr { ret void }
+; CHECK-DAG: define weak void @func-e() unnamed_addr {
 
 ; Other file has non-unnamed_addr definition
-@g = common unnamed_addr global i32 0
-; CHECK: @g = common unnamed_addr global i32 0
-@h = external global i32
-; CHECK: @h = global i32 42
-@i = external unnamed_addr global i32
-; CHECK: @i = global i32 42
-@j = weak global i32 42
-; CHECK: @j = global i32 42
+@global-g = common unnamed_addr global i32 0
+; CHECK-DAG: @global-g = common global i32 0
+@global-h = external global i32
+; CHECK-DAG: @global-h = global i32 42
+@global-i = external unnamed_addr global i32
+; CHECK-DAG: @global-i = global i32 42
+@global-j = weak global i32 42
+; CHECK-DAG: @global-j = global i32 42
+
+declare void @func-g()
+; CHECK-DAG: define weak void @func-g() {
+define weak void @func-h() { ret void }
+; CHECK-DAG: define weak void @func-h() {
+define weak void @func-i() unnamed_addr { ret void }
+; CHECK-DAG: define weak void @func-i() {
diff --git a/test/Linker/unnamed-addr1-b.ll b/test/Linker/unnamed-addr1-b.ll
index 7d94dc1928c4..aa1507b9c6b3 100644
--- a/test/Linker/unnamed-addr1-b.ll
+++ b/test/Linker/unnamed-addr1-b.ll
@@ -1,12 +1,20 @@
 ; This file is for use with unnamed-addr1-a.ll
 ; RUN: true
 
-@c = common unnamed_addr global i32 42
-@d = unnamed_addr global i32 42
-@e = unnamed_addr global i32 42
-@f = unnamed_addr global i32 42
-
-@g = common global i32 42
-@h = global i32 42
-@i = global i32 42
-@j = global i32 42
+@global-c = common unnamed_addr global i32 42
+@global-d = unnamed_addr global i32 42
+@global-e = unnamed_addr global i32 42
+@global-f = unnamed_addr global i32 42
+
+define weak void @func-c() unnamed_addr { ret void }
+define weak void @func-d() unnamed_addr { ret void }
+define weak void @func-e() unnamed_addr { ret void }
+
+@global-g = common global i32 42
+@global-h = global i32 42
+@global-i = global i32 42
+@global-j = global i32 42
+
+define weak void @func-g() { ret void }
+define weak void @func-h() { ret void }
+define weak void @func-i() { ret void }
diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s
new file mode 100644
index 000000000000..3bcef34e4f5d
--- /dev/null
+++ b/test/MC/AArch64/adrp-relocation.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s
+        .text
+// These should produce an ADRP/ADD pair to calculate the address of
+// testfn. The important point is that LLVM shouldn't think it can deal with the
+// relocation on the ADRP itself (even though it knows everything about the
+// relative offsets of testfn and foo) because its value depends on where this
+// object file's .text section gets relocated in memory.
+        adrp x0, sym
+        adrp x0, :got:sym
+        adrp x0, :gottprel:sym
+        adrp x0, :tlsdesc:sym
+
+        .global sym
+sym:
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 sym
+// CHECK: R_AARCH64_ADR_GOT_PAGE sym
+// CHECK: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
+// CHECK: R_AARCH64_TLSDESC_ADR_PAGE sym
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index 1e9024c5eede..2e6e0bbd387c 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple=aarch64 < %s 2> %t
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
 
 //------------------------------------------------------------------------------
@@ -2892,13 +2892,13 @@
         movi wzr, #0x44444444
         movi w3, #0xffff
         movi x9, #0x0000ffff00000000
-// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         movi wzr, #0x44444444
 // CHECK-ERROR-NEXT:         ^
-// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         movi w3, #0xffff
 // CHECK-ERROR-NEXT:         ^
-// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         movi x9, #0x0000ffff00000000
 // CHECK-ERROR-NEXT:         ^
 
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
index ad3064e5e524..a50efb33109d 100644
--- a/test/MC/AArch64/basic-a64-instructions.s
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s
   .globl _func
 
 // Check that the assembler can handle the documented syntax from the ARM ARM.
diff --git a/test/MC/AArch64/basic-pic.s b/test/MC/AArch64/basic-pic.s
new file mode 100644
index 000000000000..a10874dcca09
--- /dev/null
+++ b/test/MC/AArch64/basic-pic.s
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s
+
+// CHECK: RELOCATION RECORDS FOR [.rela.text]
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/AArch64/basic-pic.ll"
+	.text
+	.globl	get_globalvar
+	.type	get_globalvar,@function
+get_globalvar:                          // @get_globalvar
+	.cfi_startproc
+// BB#0:
+	adrp	x0, :got:var
+	ldr	x0, [x0, #:got_lo12:var]
+	ldr	 w0, [x0]
+	ret
+.Ltmp0:
+	.size	get_globalvar, .Ltmp0-get_globalvar
+	.cfi_endproc
+
+// CHECK: R_AARCH64_ADR_GOT_PAGE var
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC var
+
+	.globl	get_globalvaraddr
+	.type	get_globalvaraddr,@function
+get_globalvaraddr:                      // @get_globalvaraddr
+	.cfi_startproc
+// BB#0:
+	adrp	x0, :got:var
+	ldr	x0, [x0, #:got_lo12:var]
+	ret
+.Ltmp1:
+	.size	get_globalvaraddr, .Ltmp1-get_globalvaraddr
+	.cfi_endproc
+// CHECK: R_AARCH64_ADR_GOT_PAGE var
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC var
+
+	.globl	get_hiddenvar
+	.type	get_hiddenvar,@function
+get_hiddenvar:                          // @get_hiddenvar
+	.cfi_startproc
+// BB#0:
+	adrp	x0, hiddenvar
+	ldr	w0, [x0, #:lo12:hiddenvar]
+	ret
+.Ltmp2:
+	.size	get_hiddenvar, .Ltmp2-get_hiddenvar
+	.cfi_endproc
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+// CHECK: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
+
+	.globl	get_hiddenvaraddr
+	.type	get_hiddenvaraddr,@function
+get_hiddenvaraddr:                      // @get_hiddenvaraddr
+	.cfi_startproc
+// BB#0:
+	adrp	x0, hiddenvar
+	add	x0, x0, #:lo12:hiddenvar
+	ret
+.Ltmp3:
+	.size	get_hiddenvaraddr, .Ltmp3-get_hiddenvaraddr
+	.cfi_endproc
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+// CHECK: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
+
+	.globl	get_func
+	.type	get_func,@function
+get_func:                               // @get_func
+	.cfi_startproc
+// BB#0:
+	adrp	x0, :got:get_func
+	ldr	x0, [x0, #:got_lo12:get_func]
+	ret
+.Ltmp4:
+	.size	get_func, .Ltmp4-get_func
+	.cfi_endproc
+
+// Particularly important that the ADRP gets a relocation, LLVM tends to think
+// it can relax it because it knows where get_func is. It can't!
+// CHECK: R_AARCH64_ADR_GOT_PAGE get_func
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC get_func
+
+	.type	var,@object             // @var
+	.bss
+	.globl	var
+	.align	2
+var:
+	.word	0                       // 0x0
+	.size	var, 4
+
+	.hidden	hiddenvar               // @hiddenvar
+	.type	hiddenvar,@object
+	.globl	hiddenvar
+	.align	2
+hiddenvar:
+	.word	0                       // 0x0
+	.size	hiddenvar, 4
+
+
diff --git a/test/MC/AArch64/elf-extern.s b/test/MC/AArch64/elf-extern.s
new file mode 100644
index 000000000000..dfa3fb002ed5
--- /dev/null
+++ b/test/MC/AArch64/elf-extern.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
+
+// External symbols are a different concept to global variables but should still
+// get relocations and so on when used.
+
+	.file	"<stdin>"
+	.text
+	.globl	check_extern
+	.type	check_extern,@function
+check_extern:                           // @check_extern
+	.cfi_startproc
+// BB#0:
+	sub	sp, sp, #16
+.Ltmp2:
+	.cfi_def_cfa sp, 16
+	str	x30, [sp, #8]           // 8-byte Folded Spill
+.Ltmp3:
+	.cfi_offset x30, -8
+	bl	memcpy
+	mov	 x0, xzr
+	ldr	x30, [sp, #8]           // 8-byte Folded Reload
+	add	sp, sp, #16
+	ret
+.Ltmp4:
+	.size	check_extern, .Ltmp4-check_extern
+	.cfi_endproc
+
+
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_AARCH64_CALL26 memcpy
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
index 942920be4de2..bc43113fee03 100644
--- a/test/MC/AArch64/elf-globaladdress.ll
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -40,7 +40,7 @@ define void @address() {
 ; OBJ: }
 
 ; OBJ: Relocations [
-; OBJ:   Section (1) .text {
+; OBJ:   Section (2) .rela.text {
 ; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var8
 ; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC  var8
 ; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var16
diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s
index 51d444a36def..3b3aa65819d8 100644
--- a/test/MC/AArch64/elf-objdump.s
+++ b/test/MC/AArch64/elf-objdump.s
@@ -1,5 +1,5 @@
 // 64 bit little endian
-// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
+// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d -
 
 // We just want to see if llvm-objdump works at all.
 // CHECK: .text
diff --git a/test/MC/AArch64/elf-reloc-addend.s b/test/MC/AArch64/elf-reloc-addend.s
new file mode 100644
index 000000000000..0e7e2cafb721
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-addend.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=aarch64-linux-gnu -r - | FileCheck %s
+
+	add x0, x4, #:lo12:sym
+// CHECK: 0 R_AARCH64_ADD_ABS_LO12_NC sym
+	add x3, x5, #:lo12:sym+1
+// CHECK: 4 R_AARCH64_ADD_ABS_LO12_NC sym+1
+	add x3, x5, #:lo12:sym-1
+// CHECK: 8 R_AARCH64_ADD_ABS_LO12_NC sym-1
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
index 0321dda332c2..e37991bfba1c 100644
--- a/test/MC/AArch64/elf-reloc-addsubimm.s
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -4,7 +4,7 @@
         add x2, x3, #:lo12:some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_ADD_ABS_LO12_NC some_label 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-condbr.s b/test/MC/AArch64/elf-reloc-condbr.s
index 684e75a33cb7..b70dfa70fb8e 100644
--- a/test/MC/AArch64/elf-reloc-condbr.s
+++ b/test/MC/AArch64/elf-reloc-condbr.s
@@ -4,7 +4,7 @@
         b.eq somewhere
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_CONDBR19 somewhere 0x0
 // OBJ-NEXT:   }
 // OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
index de43c4feac98..d4c3a4eb50d0 100644
--- a/test/MC/AArch64/elf-reloc-ldrlit.s
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -7,7 +7,7 @@
         prfm pldl3keep, some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_LD_PREL_LO19 some_label 0x0
 // OBJ-NEXT:     0x4 R_AARCH64_LD_PREL_LO19 some_label 0x0
 // OBJ-NEXT:     0x8 R_AARCH64_LD_PREL_LO19 some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
index e1f841bd20f7..371e7e51f245 100644
--- a/test/MC/AArch64/elf-reloc-ldstunsimm.s
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \
 // RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldrb w0, [sp, #:lo12:some_label]
@@ -8,7 +8,7 @@
         str q0, [sp, #:lo12:some_label]
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0  R_AARCH64_LDST8_ABS_LO12_NC   some_label 0x0
 // OBJ-NEXT:     0x4  R_AARCH64_LDST16_ABS_LO12_NC  some_label 0x0
 // OBJ-NEXT:     0x8  R_AARCH64_LDST32_ABS_LO12_NC  some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
index 8a7e532cdd23..333159562c0f 100644
--- a/test/MC/AArch64/elf-reloc-movw.s
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -23,7 +23,7 @@
         movn x19, #:abs_g2_s:some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0  R_AARCH64_MOVW_UABS_G0    some_label 0x0
 // OBJ-NEXT:     0x4  R_AARCH64_MOVW_UABS_G0_NC some_label 0x0
 // OBJ-NEXT:     0x8  R_AARCH64_MOVW_UABS_G1    some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
index b5f072712fd7..093891d931aa 100644
--- a/test/MC/AArch64/elf-reloc-pcreladdressing.s
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -8,7 +8,7 @@
         ldr x0, [x5, #:got_lo12:some_label]
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_ADR_PREL_LO21    some_label 0x0
 // OBJ-NEXT:     0x4 R_AARCH64_ADR_PREL_PG_HI21 some_label 0x0
 // OBJ-NEXT:     0x8 R_AARCH64_ADR_GOT_PAGE     some_label 0x0
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
index 037e89632e82..25c98163b584 100644
--- a/test/MC/AArch64/elf-reloc-tstb.s
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -5,7 +5,7 @@
         tbnz w3, #15, somewhere
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0  R_AARCH64_TSTBR14 somewhere 0x0
 // OBJ-NEXT:     0x4  R_AARCH64_TSTBR14 somewhere 0x0
 // OBJ-NEXT:   }
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
index bead07c12d2c..9ac66bd876a7 100644
--- a/test/MC/AArch64/elf-reloc-uncondbrimm.s
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -5,7 +5,7 @@
         bl somewhere
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rela.text {
 // OBJ-NEXT:     0x0 R_AARCH64_JUMP26 somewhere 0x0
 // OBJ-NEXT:     0x4 R_AARCH64_CALL26 somewhere 0x0
 // OBJ-NEXT:   }
diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s
index e891adbbb375..bc005b1d5304 100644
--- a/test/MC/AArch64/gicv3-regs-diagnostics.s
+++ b/test/MC/AArch64/gicv3-regs-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
 
         // Write-only
         mrs x10, icc_eoir1_el1
diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s
new file mode 100644
index 000000000000..cf34a952e90c
--- /dev/null
+++ b/test/MC/AArch64/inline-asm-modifiers.s
@@ -0,0 +1,209 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s
+
+	.file	"<stdin>"
+	.text
+	.globl	test_inline_modifier_L
+	.type	test_inline_modifier_L,@function
+test_inline_modifier_L:                 // @test_inline_modifier_L
+// BB#0:
+	//APP
+	add x0, x0, #:lo12:var_simple
+	//NO_APP
+	//APP
+	ldr x0, [x0, #:got_lo12:var_got]
+	//NO_APP
+	//APP
+	add x0, x0, #:tlsdesc_lo12:var_tlsgd
+	//NO_APP
+	//APP
+	add x0, x0, #:dtprel_lo12:var_tlsld
+	//NO_APP
+	//APP
+	ldr x0, [x0, #:gottprel_lo12:var_tlsie]
+	//NO_APP
+	//APP
+	add x0, x0, #:tprel_lo12:var_tlsle
+	//NO_APP
+	ret
+.Ltmp0:
+	.size	test_inline_modifier_L, .Ltmp0-test_inline_modifier_L
+
+// CHECK: R_AARCH64_ADD_ABS_LO12_NC var_simple
+// CHECK: R_AARCH64_LD64_GOT_LO12_NC var_got
+// CHECK: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
+// CHECK: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
+// CHECK: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
+// CHECK: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+
+	.globl	test_inline_modifier_G
+	.type	test_inline_modifier_G,@function
+test_inline_modifier_G:                 // @test_inline_modifier_G
+// BB#0:
+	//APP
+	add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
+	//NO_APP
+	//APP
+	add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
+	//NO_APP
+	ret
+.Ltmp1:
+	.size	test_inline_modifier_G, .Ltmp1-test_inline_modifier_G
+
+// CHECK: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
+// CHECK: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
+
+	.globl	test_inline_modifier_A
+	.type	test_inline_modifier_A,@function
+test_inline_modifier_A:                 // @test_inline_modifier_A
+// BB#0:
+	//APP
+	adrp x0, var_simple
+	//NO_APP
+	//APP
+	adrp x0, :got:var_got
+	//NO_APP
+	//APP
+	adrp x0, :tlsdesc:var_tlsgd
+	//NO_APP
+	//APP
+	adrp x0, :gottprel:var_tlsie
+	//NO_APP
+	ret
+.Ltmp2:
+	.size	test_inline_modifier_A, .Ltmp2-test_inline_modifier_A
+// CHECK: R_AARCH64_ADR_PREL_PG_HI21 var_simple
+// CHECK: R_AARCH64_ADR_GOT_PAGE var_got
+// CHECK: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
+// CHECK: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+
+	.globl	test_inline_modifier_wx
+	.type	test_inline_modifier_wx,@function
+test_inline_modifier_wx:                // @test_inline_modifier_wx
+// BB#0:
+	mov	 w2, w0
+	//APP
+	add w2, w2, w2
+	//NO_APP
+	mov	 w2, w0
+	//APP
+	add w2, w2, w2
+	//NO_APP
+	//APP
+	add x0, x0, x0
+	//NO_APP
+	mov	 x0, x1
+	//APP
+	add x0, x0, x0
+	//NO_APP
+	mov	 x0, x1
+	//APP
+	add w0, w0, w0
+	//NO_APP
+	//APP
+	add x1, x1, x1
+	//NO_APP
+	//APP
+	add w0, wzr, wzr
+	//NO_APP
+	//APP
+	add x0, xzr, xzr
+	//NO_APP
+	ret
+.Ltmp3:
+	.size	test_inline_modifier_wx, .Ltmp3-test_inline_modifier_wx
+
+	.globl	test_inline_modifier_bhsdq
+	.type	test_inline_modifier_bhsdq,@function
+test_inline_modifier_bhsdq:             // @test_inline_modifier_bhsdq
+// BB#0:
+	//APP
+	ldr b0, [sp]
+	//NO_APP
+	//APP
+	ldr h0, [sp]
+	//NO_APP
+	//APP
+	ldr s0, [sp]
+	//NO_APP
+	//APP
+	ldr d0, [sp]
+	//NO_APP
+	//APP
+	ldr q0, [sp]
+	//NO_APP
+	//APP
+	ldr b0, [sp]
+	//NO_APP
+	//APP
+	ldr h0, [sp]
+	//NO_APP
+	//APP
+	ldr s0, [sp]
+	//NO_APP
+	//APP
+	ldr d0, [sp]
+	//NO_APP
+	//APP
+	ldr q0, [sp]
+	//NO_APP
+	ret
+.Ltmp4:
+	.size	test_inline_modifier_bhsdq, .Ltmp4-test_inline_modifier_bhsdq
+
+	.globl	test_inline_modifier_c
+	.type	test_inline_modifier_c,@function
+test_inline_modifier_c:                 // @test_inline_modifier_c
+// BB#0:
+	//APP
+	adr x0, 3
+	//NO_APP
+	ret
+.Ltmp5:
+	.size	test_inline_modifier_c, .Ltmp5-test_inline_modifier_c
+
+	.hidden	var_simple              // @var_simple
+	.type	var_simple,@object
+	.bss
+	.globl	var_simple
+	.align	2
+var_simple:
+	.word	0                       // 0x0
+	.size	var_simple, 4
+
+	.type	var_got,@object         // @var_got
+	.globl	var_got
+	.align	2
+var_got:
+	.word	0                       // 0x0
+	.size	var_got, 4
+
+	.type	var_tlsgd,@object       // @var_tlsgd
+	.section	.tbss,"awT",@nobits
+	.globl	var_tlsgd
+	.align	2
+var_tlsgd:
+	.word	0                       // 0x0
+	.size	var_tlsgd, 4
+
+	.type	var_tlsld,@object       // @var_tlsld
+	.globl	var_tlsld
+	.align	2
+var_tlsld:
+	.word	0                       // 0x0
+	.size	var_tlsld, 4
+
+	.type	var_tlsie,@object       // @var_tlsie
+	.globl	var_tlsie
+	.align	2
+var_tlsie:
+	.word	0                       // 0x0
+	.size	var_tlsie, 4
+
+	.type	var_tlsle,@object       // @var_tlsle
+	.globl	var_tlsle
+	.align	2
+var_tlsle:
+	.word	0                       // 0x0
+	.size	var_tlsle, 4
+
+
diff --git a/test/MC/AArch64/jump-table.s b/test/MC/AArch64/jump-table.s
new file mode 100644
index 000000000000..578ebf4e6608
--- /dev/null
+++ b/test/MC/AArch64/jump-table.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
+
+	.file	"<stdin>"
+	.text
+	.globl	test_jumptable
+	.type	test_jumptable,@function
+test_jumptable:                         // @test_jumptable
+	.cfi_startproc
+// BB#0:
+	ubfx	w1, w0, #0, #32
+	cmp w0, #4
+	b.hi .LBB0_3
+// BB#1:
+	adrp	x0, .LJTI0_0
+	add	x0, x0, #:lo12:.LJTI0_0
+	ldr	x0, [x0, x1, lsl #3]
+	br	x0
+.LBB0_2:                                // %lbl1
+	movz	x0, #1
+	ret
+.LBB0_3:                                // %def
+	mov	 x0, xzr
+	ret
+.LBB0_4:                                // %lbl2
+	movz	x0, #2
+	ret
+.LBB0_5:                                // %lbl3
+	movz	x0, #4
+	ret
+.LBB0_6:                                // %lbl4
+	movz	x0, #8
+	ret
+.Ltmp0:
+	.size	test_jumptable, .Ltmp0-test_jumptable
+	.cfi_endproc
+	.section	.rodata,"a",@progbits
+	.align	3
+.LJTI0_0:
+	.xword	.LBB0_2
+	.xword	.LBB0_4
+	.xword	.LBB0_5
+	.xword	.LBB0_3
+	.xword	.LBB0_6
+
+
+
+// First make sure we get a page/lo12 pair in .text to pick up the jump-table
+
+// CHECK:      Relocations [
+// CHECK:        Section ({{[0-9]+}}) .rela.text {
+// CHECK-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 .rodata
+// CHECK-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC .rodata
+// CHECK:        }
+
+// Also check the targets in .rodata are relocated
+// CHECK:        Section ({{[0-9]+}}) .rela.rodata {
+// CHECK-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ABS64 .text
+// CHECK:        }
+// CHECK:      ]
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
index cc02173c8ed4..75dba81bc0b5 100644
--- a/test/MC/AArch64/lit.local.cfg
+++ b/test/MC/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
 \ No newline at end of file
diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s
new file mode 100644
index 000000000000..cde792a2fb65
--- /dev/null
+++ b/test/MC/AArch64/neon-2velem.s
@@ -0,0 +1,271 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions with 2 vectors and an element
+//------------------------------------------------------------------------------
+
+        mla v0.2s, v1.2s, v2.s[2]
+        mla v0.2s, v1.2s, v22.s[2]
+        mla v3.4s, v8.4s, v2.s[1]
+        mla v3.4s, v8.4s, v22.s[3]
+
+// CHECK: mla	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x08,0x82,0x2f]
+// CHECK: mla	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x08,0x96,0x2f]
+// CHECK: mla	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x01,0xa2,0x6f]
+// CHECK: mla	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x09,0xb6,0x6f]
+
+        mla v0.4h, v1.4h, v2.h[2]
+        mla v0.4h, v1.4h, v15.h[2]
+        mla v0.8h, v1.8h, v2.h[7]
+        mla v0.8h, v1.8h, v14.h[6]
+
+// CHECK: mla	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x00,0x62,0x2f]
+// CHECK: mla	v0.4h, v1.4h, v15.h[2]  // encoding: [0x20,0x00,0x6f,0x2f]
+// CHECK: mla	v0.8h, v1.8h, v2.h[7]   // encoding: [0x20,0x08,0x72,0x6f]
+// CHECK: mla	v0.8h, v1.8h, v14.h[6]  // encoding: [0x20,0x08,0x6e,0x6f]
+
+        mls v0.2s, v1.2s, v2.s[2]
+        mls v0.2s, v1.2s, v22.s[2]
+        mls v3.4s, v8.4s, v2.s[1]
+        mls v3.4s, v8.4s, v22.s[3]
+
+// CHECK: mls	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x48,0x82,0x2f]
+// CHECK: mls	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x48,0x96,0x2f]
+// CHECK: mls	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x41,0xa2,0x6f]
+// CHECK: mls	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x49,0xb6,0x6f]
+
+        mls v0.4h, v1.4h, v2.h[2]
+        mls v0.4h, v1.4h, v15.h[2]
+        mls v0.8h, v1.8h, v2.h[7]
+        mls v0.8h, v1.8h, v14.h[6]
+
+// CHECK: mls	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x40,0x62,0x2f]
+// CHECK: mls	v0.4h, v1.4h, v15.h[2]  // encoding: [0x20,0x40,0x6f,0x2f]
+// CHECK: mls	v0.8h, v1.8h, v2.h[7]   // encoding: [0x20,0x48,0x72,0x6f]
+// CHECK: mls	v0.8h, v1.8h, v14.h[6]  // encoding: [0x20,0x48,0x6e,0x6f]
+
+        fmla v0.2s, v1.2s, v2.s[2]
+        fmla v0.2s, v1.2s, v22.s[2]
+        fmla v3.4s, v8.4s, v2.s[1]
+        fmla v3.4s, v8.4s, v22.s[3]
+        fmla v0.2d, v1.2d, v2.d[1]
+        fmla v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmla	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x18,0x82,0x0f]
+// CHECK: fmla	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x18,0x96,0x0f]
+// CHECK: fmla	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x11,0xa2,0x4f]
+// CHECK: fmla	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x19,0xb6,0x4f]
+// CHECK: fmla	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x18,0xc2,0x4f]
+// CHECK: fmla	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x18,0xd6,0x4f]
+
+        fmls v0.2s, v1.2s, v2.s[2]
+        fmls v0.2s, v1.2s, v22.s[2]
+        fmls v3.4s, v8.4s, v2.s[1]
+        fmls v3.4s, v8.4s, v22.s[3]
+        fmls v0.2d, v1.2d, v2.d[1]
+        fmls v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmls	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x58,0x82,0x0f]
+// CHECK: fmls	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x58,0x96,0x0f]
+// CHECK: fmls	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x51,0xa2,0x4f]
+// CHECK: fmls	v3.4s, v8.4s, v22.s[3]  // encoding: [0x03,0x59,0xb6,0x4f]
+// CHECK: fmls	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x58,0xc2,0x4f]
+// CHECK: fmls	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x58,0xd6,0x4f]
+
+        smlal v0.4s, v1.4h, v2.h[2]
+        smlal v0.2d, v1.2s, v2.s[2]
+        smlal v0.2d, v1.2s, v22.s[2]
+        smlal2 v0.4s, v1.8h, v1.h[2]
+        smlal2 v0.2d, v1.4s, v1.s[2]
+        smlal2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: smlal	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x20,0x62,0x0f]
+// CHECK: smlal	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x28,0x82,0x0f]
+// CHECK: smlal	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x0f]
+// CHECK: smlal2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x20,0x61,0x4f]
+// CHECK: smlal2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x28,0x81,0x4f]
+// CHECK: smlal2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x4f]
+
+        smlsl v0.4s, v1.4h, v2.h[2]
+        smlsl v0.2d, v1.2s, v2.s[2]
+        smlsl v0.2d, v1.2s, v22.s[2]
+        smlsl2 v0.4s, v1.8h, v1.h[2]
+        smlsl2 v0.2d, v1.4s, v1.s[2]
+        smlsl2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: smlsl	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x60,0x62,0x0f]
+// CHECK: smlsl	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x68,0x82,0x0f]
+// CHECK: smlsl	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x0f]
+// CHECK: smlsl2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x60,0x61,0x4f]
+// CHECK: smlsl2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x68,0x81,0x4f]
+// CHECK: smlsl2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x4f]
+
+        sqdmlal v0.4s, v1.4h, v2.h[2]
+        sqdmlal v0.2d, v1.2s, v2.s[2]
+        sqdmlal v0.2d, v1.2s, v22.s[2]
+        sqdmlal2 v0.4s, v1.8h, v1.h[2]
+        sqdmlal2 v0.2d, v1.4s, v1.s[2]
+        sqdmlal2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: sqdmlal	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x30,0x62,0x0f]
+// CHECK: sqdmlal	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x38,0x82,0x0f]
+// CHECK: sqdmlal	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x38,0x96,0x0f]
+// CHECK: sqdmlal2	v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x30,0x61,0x4f]
+// CHECK: sqdmlal2	v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x38,0x81,0x4f]
+// CHECK: sqdmlal2	v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x38,0x96,0x4f]
+
+        umlal v0.4s, v1.4h, v2.h[2]
+        umlal v0.2d, v1.2s, v2.s[2]
+        umlal v0.2d, v1.2s, v22.s[2]
+        umlal2 v0.4s, v1.8h, v1.h[2]
+        umlal2 v0.2d, v1.4s, v1.s[2]
+        umlal2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: umlal	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x20,0x62,0x2f]
+// CHECK: umlal	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x28,0x82,0x2f]
+// CHECK: umlal	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x2f]
+// CHECK: umlal2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x20,0x61,0x6f]
+// CHECK: umlal2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x28,0x81,0x6f]
+// CHECK: umlal2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x28,0x96,0x6f]
+
+        umlsl v0.4s, v1.4h, v2.h[2]
+        umlsl v0.2d, v1.2s, v2.s[2]
+        umlsl v0.2d, v1.2s, v22.s[2]
+        umlsl2 v0.4s, v1.8h, v1.h[2]
+        umlsl2 v0.2d, v1.4s, v1.s[2]
+        umlsl2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: umlsl	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x60,0x62,0x2f]
+// CHECK: umlsl	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x68,0x82,0x2f]
+// CHECK: umlsl	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x2f]
+// CHECK: umlsl2	v0.4s, v1.8h, v1.h[2]   // encoding: [0x20,0x60,0x61,0x6f]
+// CHECK: umlsl2	v0.2d, v1.4s, v1.s[2]   // encoding: [0x20,0x68,0x81,0x6f]
+// CHECK: umlsl2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0x68,0x96,0x6f]
+
+        sqdmlsl v0.4s, v1.4h, v2.h[2]
+        sqdmlsl v0.2d, v1.2s, v2.s[2]
+        sqdmlsl v0.2d, v1.2s, v22.s[2]
+        sqdmlsl2 v0.4s, v1.8h, v1.h[2]
+        sqdmlsl2 v0.2d, v1.4s, v1.s[2]
+        sqdmlsl2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: sqdmlsl	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0x70,0x62,0x0f]
+// CHECK: sqdmlsl	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0x78,0x82,0x0f]
+// CHECK: sqdmlsl	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0x78,0x96,0x0f]
+// CHECK: sqdmlsl2	v0.4s, v1.8h, v1.h[2] // encoding: [0x20,0x70,0x61,0x4f]
+// CHECK: sqdmlsl2	v0.2d, v1.4s, v1.s[2] // encoding: [0x20,0x78,0x81,0x4f]
+// CHECK: sqdmlsl2	v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0x78,0x96,0x4f]
+
+        mul v0.4h, v1.4h, v2.h[2]
+        mul v0.8h, v1.8h, v2.h[2]
+        mul v0.2s, v1.2s, v2.s[2]
+        mul v0.2s, v1.2s, v22.s[2]
+        mul v0.4s, v1.4s, v2.s[2]
+        mul v0.4s, v1.4s, v22.s[2]
+
+// CHECK: mul	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x80,0x62,0x0f]
+// CHECK: mul	v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0x80,0x62,0x4f]
+// CHECK: mul	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x88,0x82,0x0f]
+// CHECK: mul	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x88,0x96,0x0f]
+// CHECK: mul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x88,0x82,0x4f]
+// CHECK: mul	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x88,0x96,0x4f]
+
+        fmul v0.2s, v1.2s, v2.s[2]
+        fmul v0.2s, v1.2s, v22.s[2]
+        fmul v0.4s, v1.4s, v2.s[2]
+        fmul v0.4s, v1.4s, v22.s[2]
+        fmul v0.2d, v1.2d, v2.d[1]
+        fmul v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmul	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x0f]
+// CHECK: fmul	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x0f]
+// CHECK: fmul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x4f]
+// CHECK: fmul	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x4f]
+// CHECK: fmul	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x98,0xc2,0x4f]
+// CHECK: fmul	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x98,0xd6,0x4f]
+
+        fmulx v0.2s, v1.2s, v2.s[2]
+        fmulx v0.2s, v1.2s, v22.s[2]
+        fmulx v0.4s, v1.4s, v2.s[2]
+        fmulx v0.4s, v1.4s, v22.s[2]
+        fmulx v0.2d, v1.2d, v2.d[1]
+        fmulx v0.2d, v1.2d, v22.d[1]
+
+// CHECK: fmulx	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x2f]
+// CHECK: fmulx	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x2f]
+// CHECK: fmulx	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x6f]
+// CHECK: fmulx	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x6f]
+// CHECK: fmulx	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x98,0xc2,0x6f]
+// CHECK: fmulx	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x98,0xd6,0x6f]
+
+        smull v0.4s, v1.4h, v2.h[2]
+        smull v0.2d, v1.2s, v2.s[2]
+        smull v0.2d, v1.2s, v22.s[2]
+        smull2 v0.4s, v1.8h, v2.h[2]
+        smull2 v0.2d, v1.4s, v2.s[2]
+        smull2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: smull	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x0f]
+// CHECK: smull	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x0f]
+// CHECK: smull	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x0f]
+// CHECK: smull2	v0.4s, v1.8h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x4f]
+// CHECK: smull2	v0.2d, v1.4s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x4f]
+// CHECK: smull2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x4f]
+
+        umull v0.4s, v1.4h, v2.h[2]
+        umull v0.2d, v1.2s, v2.s[2]
+        umull v0.2d, v1.2s, v22.s[2]
+        umull2 v0.4s, v1.8h, v2.h[2]
+        umull2 v0.2d, v1.4s, v2.s[2]
+        umull2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: umull	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x2f]
+// CHECK: umull	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x2f]
+// CHECK: umull	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x2f]
+// CHECK: umull2	v0.4s, v1.8h, v2.h[2]   // encoding: [0x20,0xa0,0x62,0x6f]
+// CHECK: umull2	v0.2d, v1.4s, v2.s[2]   // encoding: [0x20,0xa8,0x82,0x6f]
+// CHECK: umull2	v0.2d, v1.4s, v22.s[2]  // encoding: [0x20,0xa8,0x96,0x6f]
+
+        sqdmull v0.4s, v1.4h, v2.h[2]
+        sqdmull v0.2d, v1.2s, v2.s[2]
+        sqdmull v0.2d, v1.2s, v22.s[2]
+        sqdmull2 v0.4s, v1.8h, v2.h[2]
+        sqdmull2 v0.2d, v1.4s, v2.s[2]
+        sqdmull2 v0.2d, v1.4s, v22.s[2]
+
+// CHECK: sqdmull	v0.4s, v1.4h, v2.h[2]   // encoding: [0x20,0xb0,0x62,0x0f]
+// CHECK: sqdmull	v0.2d, v1.2s, v2.s[2]   // encoding: [0x20,0xb8,0x82,0x0f]
+// CHECK: sqdmull	v0.2d, v1.2s, v22.s[2]  // encoding: [0x20,0xb8,0x96,0x0f]
+// CHECK: sqdmull2	v0.4s, v1.8h, v2.h[2] // encoding: [0x20,0xb0,0x62,0x4f]
+// CHECK: sqdmull2	v0.2d, v1.4s, v2.s[2] // encoding: [0x20,0xb8,0x82,0x4f]
+// CHECK: sqdmull2	v0.2d, v1.4s, v22.s[2] // encoding: [0x20,0xb8,0x96,0x4f]
+
+        sqdmulh v0.4h, v1.4h, v2.h[2]
+        sqdmulh v0.8h, v1.8h, v2.h[2]
+        sqdmulh v0.2s, v1.2s, v2.s[2]
+        sqdmulh v0.2s, v1.2s, v22.s[2]
+        sqdmulh v0.4s, v1.4s, v2.s[2]
+        sqdmulh v0.4s, v1.4s, v22.s[2]
+
+// CHECK: sqdmulh	v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0xc0,0x62,0x0f]
+// CHECK: sqdmulh	v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0xc0,0x62,0x4f]
+// CHECK: sqdmulh	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0xc8,0x82,0x0f]
+// CHECK: sqdmulh	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0xc8,0x96,0x0f]
+// CHECK: sqdmulh	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0xc8,0x82,0x4f]
+// CHECK: sqdmulh	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0xc8,0x96,0x4f]
+
+        sqrdmulh v0.4h, v1.4h, v2.h[2]
+        sqrdmulh v0.8h, v1.8h, v2.h[2]
+        sqrdmulh v0.2s, v1.2s, v2.s[2]
+        sqrdmulh v0.2s, v1.2s, v22.s[2]
+        sqrdmulh v0.4s, v1.4s, v2.s[2]
+        sqrdmulh v0.4s, v1.4s, v22.s[2]
+
+// CHECK: sqrdmulh	v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0xd0,0x62,0x0f]
+// CHECK: sqrdmulh	v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0xd0,0x62,0x4f]
+// CHECK: sqrdmulh	v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0xd8,0x82,0x0f]
+// CHECK: sqrdmulh	v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0xd8,0x96,0x0f]
+// CHECK: sqrdmulh	v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0xd8,0x82,0x4f]
+// CHECK: sqrdmulh	v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0xd8,0x96,0x4f]
diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s
new file mode 100644
index 000000000000..3ff86bfd6a40
--- /dev/null
+++ b/test/MC/AArch64/neon-3vdiff.s
@@ -0,0 +1,415 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions with 3 different vector data types
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Long
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Long - Variant 1
+//------------------------------------------------------------------------------
+
+        saddl v0.8h, v1.8b, v2.8b
+        saddl v0.4s, v1.4h, v2.4h
+        saddl v0.2d, v1.2s, v2.2s
+
+// CHECK: saddl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x00,0x22,0x0e]
+// CHECK: saddl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x00,0x62,0x0e]
+// CHECK: saddl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x00,0xa2,0x0e]
+
+        saddl2 v0.4s, v1.8h, v2.8h
+        saddl2 v0.8h, v1.16b, v2.16b
+        saddl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: saddl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x00,0x62,0x4e]
+// CHECK: saddl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x00,0x22,0x4e]
+// CHECK: saddl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x00,0xa2,0x4e]
+
+        uaddl v0.8h, v1.8b, v2.8b
+        uaddl v0.4s, v1.4h, v2.4h
+        uaddl v0.2d, v1.2s, v2.2s
+
+// CHECK: uaddl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x00,0x22,0x2e]
+// CHECK: uaddl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x00,0x62,0x2e]
+// CHECK: uaddl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x00,0xa2,0x2e]
+
+        uaddl2 v0.8h, v1.16b, v2.16b
+        uaddl2 v0.4s, v1.8h, v2.8h
+        uaddl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uaddl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x00,0x22,0x6e]
+// CHECK: uaddl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x00,0x62,0x6e]
+// CHECK: uaddl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x00,0xa2,0x6e]
+
+        ssubl v0.8h, v1.8b, v2.8b
+        ssubl v0.4s, v1.4h, v2.4h
+        ssubl v0.2d, v1.2s, v2.2s
+
+// CHECK: ssubl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x20,0x22,0x0e]
+// CHECK: ssubl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x20,0x62,0x0e]
+// CHECK: ssubl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x20,0xa2,0x0e]
+
+        ssubl2 v0.8h, v1.16b, v2.16b
+        ssubl2 v0.4s, v1.8h, v2.8h
+        ssubl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: ssubl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x20,0x22,0x4e]
+// CHECK: ssubl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x20,0x62,0x4e]
+// CHECK: ssubl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x20,0xa2,0x4e]
+
+        usubl v0.8h, v1.8b, v2.8b
+        usubl v0.4s, v1.4h, v2.4h
+        usubl v0.2d, v1.2s, v2.2s
+
+// CHECK: usubl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x20,0x22,0x2e]
+// CHECK: usubl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x20,0x62,0x2e]
+// CHECK: usubl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x20,0xa2,0x2e]
+
+        usubl2 v0.8h, v1.16b, v2.16b
+        usubl2 v0.4s, v1.8h, v2.8h
+        usubl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: usubl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x20,0x22,0x6e]
+// CHECK: usubl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x20,0x62,0x6e]
+// CHECK: usubl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x20,0xa2,0x6e]
+
+        sabal v0.8h, v1.8b, v2.8b
+        sabal v0.4s, v1.4h, v2.4h
+        sabal v0.2d, v1.2s, v2.2s
+
+// CHECK: sabal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x50,0x22,0x0e]
+// CHECK: sabal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x50,0x62,0x0e]
+// CHECK: sabal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x50,0xa2,0x0e]
+
+        sabal2 v0.8h, v1.16b, v2.16b
+        sabal2 v0.4s, v1.8h, v2.8h
+        sabal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sabal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x50,0x22,0x4e]
+// CHECK: sabal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x50,0x62,0x4e]
+// CHECK: sabal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x50,0xa2,0x4e]
+
+        uabal v0.8h, v1.8b, v2.8b
+        uabal v0.4s, v1.4h, v2.4h
+        uabal v0.2d, v1.2s, v2.2s
+
+// CHECK: uabal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x50,0x22,0x2e]
+// CHECK: uabal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x50,0x62,0x2e]
+// CHECK: uabal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x50,0xa2,0x2e]
+
+        uabal2 v0.8h, v1.16b, v2.16b
+        uabal2 v0.4s, v1.8h, v2.8h
+        uabal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uabal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x50,0x22,0x6e]
+// CHECK: uabal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x50,0x62,0x6e]
+// CHECK: uabal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x50,0xa2,0x6e]
+
+        sabdl v0.8h, v1.8b, v2.8b
+        sabdl v0.4s, v1.4h, v2.4h
+        sabdl v0.2d, v1.2s, v2.2s
+
+// CHECK: sabdl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x70,0x22,0x0e]
+// CHECK: sabdl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x70,0x62,0x0e]
+// CHECK: sabdl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x70,0xa2,0x0e]
+
+        sabdl2 v0.8h, v1.16b, v2.16b
+        sabdl2 v0.4s, v1.8h, v2.8h
+        sabdl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sabdl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x70,0x22,0x4e]
+// CHECK: sabdl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x70,0x62,0x4e]
+// CHECK: sabdl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x70,0xa2,0x4e]
+
+        uabdl v0.8h, v1.8b, v2.8b
+        uabdl v0.4s, v1.4h, v2.4h
+        uabdl v0.2d, v1.2s, v2.2s
+
+// CHECK: uabdl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x70,0x22,0x2e]
+// CHECK: uabdl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x70,0x62,0x2e]
+// CHECK: uabdl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x70,0xa2,0x2e]
+
+        uabdl2 v0.8h, v1.16b, v2.16b
+        uabdl2 v0.4s, v1.8h, v2.8h
+        uabdl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: uabdl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x70,0x22,0x6e]
+// CHECK: uabdl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x70,0x62,0x6e]
+// CHECK: uabdl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x70,0xa2,0x6e]
+
+        smlal v0.8h, v1.8b, v2.8b
+        smlal v0.4s, v1.4h, v2.4h
+        smlal v0.2d, v1.2s, v2.2s
+
+// CHECK: smlal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x80,0x22,0x0e]
+// CHECK: smlal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x80,0x62,0x0e]
+// CHECK: smlal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x80,0xa2,0x0e]
+
+        smlal2 v0.8h, v1.16b, v2.16b
+        smlal2 v0.4s, v1.8h, v2.8h
+        smlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smlal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x80,0x22,0x4e]
+// CHECK: smlal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x80,0x62,0x4e]
+// CHECK: smlal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x80,0xa2,0x4e]
+
+        umlal v0.8h, v1.8b, v2.8b
+        umlal v0.4s, v1.4h, v2.4h
+        umlal v0.2d, v1.2s, v2.2s
+
+// CHECK: umlal	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0x80,0x22,0x2e]
+// CHECK: umlal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x80,0x62,0x2e]
+// CHECK: umlal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x80,0xa2,0x2e]
+
+        umlal2 v0.8h, v1.16b, v2.16b
+        umlal2 v0.4s, v1.8h, v2.8h
+        umlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umlal2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0x80,0x22,0x6e]
+// CHECK: umlal2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0x80,0x62,0x6e]
+// CHECK: umlal2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0x80,0xa2,0x6e]
+
+        smlsl v0.8h, v1.8b, v2.8b
+        smlsl v0.4s, v1.4h, v2.4h
+        smlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: smlsl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xa0,0x22,0x0e]
+// CHECK: smlsl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xa0,0x62,0x0e]
+// CHECK: smlsl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xa0,0xa2,0x0e]
+
+        smlsl2 v0.8h, v1.16b, v2.16b
+        smlsl2 v0.4s, v1.8h, v2.8h
+        smlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smlsl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xa0,0x22,0x4e]
+// CHECK: smlsl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xa0,0x62,0x4e]
+// CHECK: smlsl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xa0,0xa2,0x4e]
+
+        umlsl v0.8h, v1.8b, v2.8b
+        umlsl v0.4s, v1.4h, v2.4h
+        umlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: umlsl	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xa0,0x22,0x2e]
+// CHECK: umlsl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xa0,0x62,0x2e]
+// CHECK: umlsl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xa0,0xa2,0x2e]
+
+        umlsl2 v0.8h, v1.16b, v2.16b
+        umlsl2 v0.4s, v1.8h, v2.8h
+        umlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umlsl2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xa0,0x22,0x6e]
+// CHECK: umlsl2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xa0,0x62,0x6e]
+// CHECK: umlsl2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xa0,0xa2,0x6e]
+
+        smull v0.8h, v1.8b, v2.8b
+        smull v0.4s, v1.4h, v2.4h
+        smull v0.2d, v1.2s, v2.2s
+
+// CHECK: smull	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xc0,0x22,0x0e]
+// CHECK: smull	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xc0,0x62,0x0e]
+// CHECK: smull	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xc0,0xa2,0x0e]
+
+        smull2 v0.8h, v1.16b, v2.16b
+        smull2 v0.4s, v1.8h, v2.8h
+        smull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: smull2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xc0,0x22,0x4e]
+// CHECK: smull2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xc0,0x62,0x4e]
+// CHECK: smull2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xc0,0xa2,0x4e]
+
+        umull v0.8h, v1.8b, v2.8b
+        umull v0.4s, v1.4h, v2.4h
+        umull v0.2d, v1.2s, v2.2s
+
+// CHECK: umull	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xc0,0x22,0x2e]
+// CHECK: umull	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xc0,0x62,0x2e]
+// CHECK: umull	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xc0,0xa2,0x2e]
+
+        umull2 v0.8h, v1.16b, v2.16b
+        umull2 v0.4s, v1.8h, v2.8h
+        umull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: umull2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xc0,0x22,0x6e]
+// CHECK: umull2	v0.4s, v1.8h, v2.8h     // encoding: [0x20,0xc0,0x62,0x6e]
+// CHECK: umull2	v0.2d, v1.4s, v2.4s     // encoding: [0x20,0xc0,0xa2,0x6e]
+
+//------------------------------------------------------------------------------
+// Long - Variant 2
+//------------------------------------------------------------------------------
+
+        sqdmlal v0.4s, v1.4h, v2.4h
+        sqdmlal v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmlal	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0x90,0x62,0x0e]
+// CHECK: sqdmlal	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0x90,0xa2,0x0e]
+
+        sqdmlal2 v0.4s, v1.8h, v2.8h
+        sqdmlal2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmlal2	v0.4s, v1.8h, v2.8h // encoding: [0x20,0x90,0x62,0x4e]
+// CHECK: sqdmlal2	v0.2d, v1.4s, v2.4s // encoding: [0x20,0x90,0xa2,0x4e]
+
+        sqdmlsl v0.4s, v1.4h, v2.4h
+        sqdmlsl v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmlsl	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xb0,0x62,0x0e]
+// CHECK: sqdmlsl	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xb0,0xa2,0x0e]
+
+        sqdmlsl2 v0.4s, v1.8h, v2.8h
+        sqdmlsl2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmlsl2	v0.4s, v1.8h, v2.8h // encoding: [0x20,0xb0,0x62,0x4e]
+// CHECK: sqdmlsl2	v0.2d, v1.4s, v2.4s // encoding: [0x20,0xb0,0xa2,0x4e]
+
+        sqdmull v0.4s, v1.4h, v2.4h
+        sqdmull v0.2d, v1.2s, v2.2s
+
+// CHECK: sqdmull	v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xd0,0x62,0x0e]
+// CHECK: sqdmull	v0.2d, v1.2s, v2.2s     // encoding: [0x20,0xd0,0xa2,0x0e]
+
+        sqdmull2 v0.4s, v1.8h, v2.8h
+        sqdmull2 v0.2d, v1.4s, v2.4s
+
+// CHECK: sqdmull2	v0.4s, v1.8h, v2.8h // encoding: [0x20,0xd0,0x62,0x4e]
+// CHECK: sqdmull2	v0.2d, v1.4s, v2.4s // encoding: [0x20,0xd0,0xa2,0x4e]
+
+//------------------------------------------------------------------------------
+// Long - Variant 3
+//------------------------------------------------------------------------------
+
+        pmull v0.8h, v1.8b, v2.8b
+        pmull v0.1q, v1.1d, v2.1d
+
+// CHECK: pmull	v0.8h, v1.8b, v2.8b     // encoding: [0x20,0xe0,0x22,0x0e]
+// CHECK: pmull	v0.1q, v1.1d, v2.1d     // encoding: [0x20,0xe0,0xe2,0x0e]
+
+        pmull2 v0.8h, v1.16b, v2.16b
+        pmull2 v0.1q, v1.2d, v2.2d
+
+// CHECK: pmull2	v0.8h, v1.16b, v2.16b   // encoding: [0x20,0xe0,0x22,0x4e]
+// CHECK: pmull2	v0.1q, v1.2d, v2.2d     // encoding: [0x20,0xe0,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Widen
+//------------------------------------------------------------------------------
+
+        saddw v0.8h, v1.8h, v2.8b
+        saddw v0.4s, v1.4s, v2.4h
+        saddw v0.2d, v1.2d, v2.2s
+
+// CHECK: saddw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x10,0x22,0x0e]
+// CHECK: saddw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x10,0x62,0x0e]
+// CHECK: saddw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x10,0xa2,0x0e]
+
+        saddw2 v0.8h, v1.8h, v2.16b
+        saddw2 v0.4s, v1.4s, v2.8h
+        saddw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: saddw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x10,0x22,0x4e]
+// CHECK: saddw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x10,0x62,0x4e]
+// CHECK: saddw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x10,0xa2,0x4e]
+
+        uaddw v0.8h, v1.8h, v2.8b
+        uaddw v0.4s, v1.4s, v2.4h
+        uaddw v0.2d, v1.2d, v2.2s
+
+// CHECK: uaddw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x10,0x22,0x2e]
+// CHECK: uaddw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x10,0x62,0x2e]
+// CHECK: uaddw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x10,0xa2,0x2e]
+
+        uaddw2 v0.8h, v1.8h, v2.16b
+        uaddw2 v0.4s, v1.4s, v2.8h
+        uaddw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: uaddw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x10,0x22,0x6e]
+// CHECK: uaddw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x10,0x62,0x6e]
+// CHECK: uaddw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x10,0xa2,0x6e]
+
+        ssubw v0.8h, v1.8h, v2.8b
+        ssubw v0.4s, v1.4s, v2.4h
+        ssubw v0.2d, v1.2d, v2.2s
+
+// CHECK: ssubw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x30,0x22,0x0e]
+// CHECK: ssubw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x30,0x62,0x0e]
+// CHECK: ssubw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x30,0xa2,0x0e]
+
+        ssubw2 v0.8h, v1.8h, v2.16b
+        ssubw2 v0.4s, v1.4s, v2.8h
+        ssubw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: ssubw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x30,0x22,0x4e]
+// CHECK: ssubw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x30,0x62,0x4e]
+// CHECK: ssubw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x30,0xa2,0x4e]
+
+        usubw v0.8h, v1.8h, v2.8b
+        usubw v0.4s, v1.4s, v2.4h
+        usubw v0.2d, v1.2d, v2.2s
+
+// CHECK: usubw	v0.8h, v1.8h, v2.8b     // encoding: [0x20,0x30,0x22,0x2e]
+// CHECK: usubw	v0.4s, v1.4s, v2.4h     // encoding: [0x20,0x30,0x62,0x2e]
+// CHECK: usubw	v0.2d, v1.2d, v2.2s     // encoding: [0x20,0x30,0xa2,0x2e]
+
+        usubw2 v0.8h, v1.8h, v2.16b
+        usubw2 v0.4s, v1.4s, v2.8h
+        usubw2 v0.2d, v1.2d, v2.4s
+
+// CHECK: usubw2	v0.8h, v1.8h, v2.16b    // encoding: [0x20,0x30,0x22,0x6e]
+// CHECK: usubw2	v0.4s, v1.4s, v2.8h     // encoding: [0x20,0x30,0x62,0x6e]
+// CHECK: usubw2	v0.2d, v1.2d, v2.4s     // encoding: [0x20,0x30,0xa2,0x6e]
+
+//------------------------------------------------------------------------------
+// Narrow
+//------------------------------------------------------------------------------
+
+        addhn v0.8b, v1.8h, v2.8h
+        addhn v0.4h, v1.4s, v2.4s
+        addhn v0.2s, v1.2d, v2.2d
+
+// CHECK: addhn	v0.8b, v1.8h, v2.8h     // encoding: [0x20,0x40,0x22,0x0e]
+// CHECK: addhn	v0.4h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x0e]
+// CHECK: addhn	v0.2s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x0e]
+
+        addhn2 v0.16b, v1.8h, v2.8h
+        addhn2 v0.8h, v1.4s, v2.4s
+        addhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: addhn2	v0.16b, v1.8h, v2.8h    // encoding: [0x20,0x40,0x22,0x4e]
+// CHECK: addhn2	v0.8h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x4e]
+// CHECK: addhn2	v0.4s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x4e]
+
+        raddhn v0.8b, v1.8h, v2.8h
+        raddhn v0.4h, v1.4s, v2.4s
+        raddhn v0.2s, v1.2d, v2.2d
+
+// CHECK: raddhn	v0.8b, v1.8h, v2.8h     // encoding: [0x20,0x40,0x22,0x2e]
+// CHECK: raddhn	v0.4h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x2e]
+// CHECK: raddhn	v0.2s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x2e]
+
+        raddhn2 v0.16b, v1.8h, v2.8h
+        raddhn2 v0.8h, v1.4s, v2.4s
+        raddhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: raddhn2	v0.16b, v1.8h, v2.8h    // encoding: [0x20,0x40,0x22,0x6e]
+// CHECK: raddhn2	v0.8h, v1.4s, v2.4s     // encoding: [0x20,0x40,0x62,0x6e]
+// CHECK: raddhn2	v0.4s, v1.2d, v2.2d     // encoding: [0x20,0x40,0xa2,0x6e]
+
+        rsubhn v0.8b, v1.8h, v2.8h
+        rsubhn v0.4h, v1.4s, v2.4s
+        rsubhn v0.2s, v1.2d, v2.2d
+
+// CHECK: rsubhn	v0.8b, v1.8h, v2.8h     // encoding: [0x20,0x60,0x22,0x2e]
+// CHECK: rsubhn	v0.4h, v1.4s, v2.4s     // encoding: [0x20,0x60,0x62,0x2e]
+// CHECK: rsubhn	v0.2s, v1.2d, v2.2d     // encoding: [0x20,0x60,0xa2,0x2e]
+
+        rsubhn2 v0.16b, v1.8h, v2.8h
+        rsubhn2 v0.8h, v1.4s, v2.4s
+        rsubhn2 v0.4s, v1.2d, v2.2d
+
+// CHECK: rsubhn2	v0.16b, v1.8h, v2.8h    // encoding: [0x20,0x60,0x22,0x6e]
+// CHECK: rsubhn2	v0.8h, v1.4s, v2.4s     // encoding: [0x20,0x60,0x62,0x6e]
+// CHECK: rsubhn2	v0.4s, v1.2d, v2.2d     // encoding: [0x20,0x60,0xa2,0x6e]
diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s
new file mode 100644
index 000000000000..178eb26f64c2
--- /dev/null
+++ b/test/MC/AArch64/neon-aba-abd.s
@@ -0,0 +1,78 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Absolute Difference and Accumulate (Signed, Unsigned)
+//----------------------------------------------------------------------
+         uaba v0.8b, v1.8b, v2.8b
+         uaba v0.16b, v1.16b, v2.16b
+         uaba v0.4h, v1.4h, v2.4h
+         uaba v0.8h, v1.8h, v2.8h
+         uaba v0.2s, v1.2s, v2.2s
+         uaba v0.4s, v1.4s, v2.4s
+
+// CHECK: uaba v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x7c,0x22,0x2e]
+// CHECK: uaba v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x7c,0x22,0x6e]
+// CHECK: uaba v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x7c,0x62,0x2e]
+// CHECK: uaba v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x7c,0x62,0x6e]
+// CHECK: uaba v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x7c,0xa2,0x2e]
+// CHECK: uaba v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x7c,0xa2,0x6e]
+
+
+         saba v0.8b, v1.8b, v2.8b
+         saba v0.16b, v1.16b, v2.16b
+         saba v0.4h, v1.4h, v2.4h
+         saba v0.8h, v1.8h, v2.8h
+         saba v0.2s, v1.2s, v2.2s
+         saba v0.4s, v1.4s, v2.4s
+
+// CHECK: saba v0.8b, v1.8b, v2.8b         // encoding: [0x20,0x7c,0x22,0x0e]
+// CHECK: saba v0.16b, v1.16b, v2.16b      // encoding: [0x20,0x7c,0x22,0x4e]
+// CHECK: saba v0.4h, v1.4h, v2.4h         // encoding: [0x20,0x7c,0x62,0x0e]
+// CHECK: saba v0.8h, v1.8h, v2.8h         // encoding: [0x20,0x7c,0x62,0x4e]
+// CHECK: saba v0.2s, v1.2s, v2.2s         // encoding: [0x20,0x7c,0xa2,0x0e]
+// CHECK: saba v0.4s, v1.4s, v2.4s         // encoding: [0x20,0x7c,0xa2,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Absolute Difference (Signed, Unsigned)
+//----------------------------------------------------------------------
+         uabd v0.8b, v1.8b, v2.8b
+         uabd v0.16b, v1.16b, v2.16b
+         uabd v0.4h, v1.4h, v2.4h
+         uabd v0.8h, v1.8h, v2.8h
+         uabd v0.2s, v1.2s, v2.2s
+         uabd v0.4s, v1.4s, v2.4s
+
+// CHECK: uabd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x74,0x22,0x2e]
+// CHECK: uabd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x74,0x22,0x6e]
+// CHECK: uabd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x74,0x62,0x2e]
+// CHECK: uabd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x74,0x62,0x6e]
+// CHECK: uabd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x74,0xa2,0x2e]
+// CHECK: uabd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x74,0xa2,0x6e]
+
+         sabd v0.8b, v1.8b, v2.8b
+         sabd v0.16b, v1.16b, v2.16b
+         sabd v0.4h, v1.4h, v2.4h
+         sabd v0.8h, v1.8h, v2.8h
+         sabd v0.2s, v1.2s, v2.2s
+         sabd v0.4s, v1.4s, v2.4s
+
+// CHECK: sabd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x74,0x22,0x0e]
+// CHECK: sabd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x74,0x22,0x4e]
+// CHECK: sabd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x74,0x62,0x0e]
+// CHECK: sabd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x74,0x62,0x4e]
+// CHECK: sabd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x74,0xa2,0x0e]
+// CHECK: sabd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x74,0xa2,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Absolute Difference (Floating Point)
+//----------------------------------------------------------------------
+         fabd v0.2s, v1.2s, v2.2s
+         fabd v31.4s, v15.4s, v16.4s
+         fabd v7.2d, v8.2d, v25.2d
+
+// CHECK: fabd v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xd4,0xa2,0x2e]
+// CHECK: fabd v31.4s, v15.4s, v16.4s // encoding: [0xff,0xd5,0xb0,0x6e]
+// CHECK: fabd v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xd5,0xf9,0x6e]
+
diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s
new file mode 100644
index 000000000000..8b1c2d421ba6
--- /dev/null
+++ b/test/MC/AArch64/neon-across.s
@@ -0,0 +1,101 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions across vector registers
+//------------------------------------------------------------------------------
+
+        saddlv h0, v1.8b
+        saddlv h0, v1.16b
+        saddlv s0, v1.4h
+        saddlv s0, v1.8h
+        saddlv d0, v1.4s
+
+// CHECK: saddlv	h0, v1.8b               // encoding: [0x20,0x38,0x30,0x0e]
+// CHECK: saddlv	h0, v1.16b              // encoding: [0x20,0x38,0x30,0x4e]
+// CHECK: saddlv	s0, v1.4h               // encoding: [0x20,0x38,0x70,0x0e]
+// CHECK: saddlv	s0, v1.8h               // encoding: [0x20,0x38,0x70,0x4e]
+// CHECK: saddlv	d0, v1.4s               // encoding: [0x20,0x38,0xb0,0x4e]
+
+        uaddlv h0, v1.8b
+        uaddlv h0, v1.16b
+        uaddlv s0, v1.4h
+        uaddlv s0, v1.8h
+        uaddlv d0, v1.4s
+
+// CHECK: uaddlv	h0, v1.8b               // encoding: [0x20,0x38,0x30,0x2e]
+// CHECK: uaddlv	h0, v1.16b              // encoding: [0x20,0x38,0x30,0x6e]
+// CHECK: uaddlv	s0, v1.4h               // encoding: [0x20,0x38,0x70,0x2e]
+// CHECK: uaddlv	s0, v1.8h               // encoding: [0x20,0x38,0x70,0x6e]
+// CHECK: uaddlv	d0, v1.4s               // encoding: [0x20,0x38,0xb0,0x6e]
+
+        smaxv b0, v1.8b
+        smaxv b0, v1.16b
+        smaxv h0, v1.4h
+        smaxv h0, v1.8h
+        smaxv s0, v1.4s
+
+// CHECK: smaxv	b0, v1.8b               // encoding: [0x20,0xa8,0x30,0x0e]
+// CHECK: smaxv	b0, v1.16b              // encoding: [0x20,0xa8,0x30,0x4e]
+// CHECK: smaxv	h0, v1.4h               // encoding: [0x20,0xa8,0x70,0x0e]
+// CHECK: smaxv	h0, v1.8h               // encoding: [0x20,0xa8,0x70,0x4e]
+// CHECK: smaxv	s0, v1.4s               // encoding: [0x20,0xa8,0xb0,0x4e]
+
+        sminv b0, v1.8b
+        sminv b0, v1.16b
+        sminv h0, v1.4h
+        sminv h0, v1.8h
+        sminv s0, v1.4s
+
+// CHECK: sminv	b0, v1.8b               // encoding: [0x20,0xa8,0x31,0x0e]
+// CHECK: sminv	b0, v1.16b              // encoding: [0x20,0xa8,0x31,0x4e]
+// CHECK: sminv	h0, v1.4h               // encoding: [0x20,0xa8,0x71,0x0e]
+// CHECK: sminv	h0, v1.8h               // encoding: [0x20,0xa8,0x71,0x4e]
+// CHECK: sminv	s0, v1.4s               // encoding: [0x20,0xa8,0xb1,0x4e]
+
+        umaxv b0, v1.8b
+        umaxv b0, v1.16b
+        umaxv h0, v1.4h
+        umaxv h0, v1.8h
+        umaxv s0, v1.4s
+
+// CHECK: umaxv	b0, v1.8b               // encoding: [0x20,0xa8,0x30,0x2e]
+// CHECK: umaxv	b0, v1.16b              // encoding: [0x20,0xa8,0x30,0x6e]
+// CHECK: umaxv	h0, v1.4h               // encoding: [0x20,0xa8,0x70,0x2e]
+// CHECK: umaxv	h0, v1.8h               // encoding: [0x20,0xa8,0x70,0x6e]
+// CHECK: umaxv	s0, v1.4s               // encoding: [0x20,0xa8,0xb0,0x6e]
+
+        uminv b0, v1.8b
+        uminv b0, v1.16b
+        uminv h0, v1.4h
+        uminv h0, v1.8h
+        uminv s0, v1.4s
+
+// CHECK: uminv	b0, v1.8b               // encoding: [0x20,0xa8,0x31,0x2e]
+// CHECK: uminv	b0, v1.16b              // encoding: [0x20,0xa8,0x31,0x6e]
+// CHECK: uminv	h0, v1.4h               // encoding: [0x20,0xa8,0x71,0x2e]
+// CHECK: uminv	h0, v1.8h               // encoding: [0x20,0xa8,0x71,0x6e]
+// CHECK: uminv	s0, v1.4s               // encoding: [0x20,0xa8,0xb1,0x6e]
+
+        addv b0, v1.8b
+        addv b0, v1.16b
+        addv h0, v1.4h
+        addv h0, v1.8h
+        addv s0, v1.4s
+
+// CHECK: addv	b0, v1.8b               // encoding: [0x20,0xb8,0x31,0x0e]
+// CHECK: addv	b0, v1.16b              // encoding: [0x20,0xb8,0x31,0x4e]
+// CHECK: addv	h0, v1.4h               // encoding: [0x20,0xb8,0x71,0x0e]
+// CHECK: addv	h0, v1.8h               // encoding: [0x20,0xb8,0x71,0x4e]
+// CHECK: addv	s0, v1.4s               // encoding: [0x20,0xb8,0xb1,0x4e]
+
+        fmaxnmv s0, v1.4s
+        fminnmv s0, v1.4s
+        fmaxv s0, v1.4s
+        fminv s0, v1.4s
+
+// CHECK: fmaxnmv	s0, v1.4s               // encoding: [0x20,0xc8,0x30,0x6e]
+// CHECK: fminnmv	s0, v1.4s               // encoding: [0x20,0xc8,0xb0,0x6e]
+// CHECK: fmaxv	s0, v1.4s               // encoding: [0x20,0xf8,0x30,0x6e]
+// CHECK: fminv	s0, v1.4s               // encoding: [0x20,0xf8,0xb0,0x6e]
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
new file mode 100644
index 000000000000..df9938b07e52
--- /dev/null
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector  Add Pairwise (Integer)
+//------------------------------------------------------------------------------
+         addp v0.8b, v1.8b, v2.8b
+         addp v0.16b, v1.16b, v2.16b
+         addp v0.4h, v1.4h, v2.4h
+         addp v0.8h, v1.8h, v2.8h
+         addp v0.2s, v1.2s, v2.2s
+         addp v0.4s, v1.4s, v2.4s
+         addp v0.2d, v1.2d, v2.2d
+
+// CHECK: addp v0.8b, v1.8b, v2.8b        // encoding: [0x20,0xbc,0x22,0x0e]
+// CHECK: addp v0.16b, v1.16b, v2.16b     // encoding: [0x20,0xbc,0x22,0x4e]
+// CHECK: addp v0.4h, v1.4h, v2.4h        // encoding: [0x20,0xbc,0x62,0x0e]
+// CHECK: addp v0.8h, v1.8h, v2.8h        // encoding: [0x20,0xbc,0x62,0x4e]
+// CHECK: addp v0.2s, v1.2s, v2.2s        // encoding: [0x20,0xbc,0xa2,0x0e]
+// CHECK: addp v0.4s, v1.4s, v2.4s        // encoding: [0x20,0xbc,0xa2,0x4e]
+// CHECK: addp v0.2d, v1.2d, v2.2d        // encoding: [0x20,0xbc,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Add Pairwise (Floating Point
+//------------------------------------------------------------------------------
+         faddp v0.2s, v1.2s, v2.2s
+         faddp v0.4s, v1.4s, v2.4s
+         faddp v0.2d, v1.2d, v2.2d
+
+// CHECK: faddp v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x2e]
+// CHECK: faddp v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x6e]
+// CHECK: faddp v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x6e]
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
new file mode 100644
index 000000000000..68f169b3dd90
--- /dev/null
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Add
+//------------------------------------------------------------------------------
+         add v0.8b, v1.8b, v2.8b
+         add v0.16b, v1.16b, v2.16b
+         add v0.4h, v1.4h, v2.4h
+         add v0.8h, v1.8h, v2.8h
+         add v0.2s, v1.2s, v2.2s
+         add v0.4s, v1.4s, v2.4s
+         add v0.2d, v1.2d, v2.2d
+
+// CHECK: add v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x84,0x22,0x0e]
+// CHECK: add v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x84,0x22,0x4e]
+// CHECK: add v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x84,0x62,0x0e]
+// CHECK: add v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x84,0x62,0x4e]
+// CHECK: add v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x84,0xa2,0x0e]
+// CHECK: add v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x84,0xa2,0x4e]
+// CHECK: add v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x84,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Sub
+//------------------------------------------------------------------------------
+         sub v0.8b, v1.8b, v2.8b
+         sub v0.16b, v1.16b, v2.16b
+         sub v0.4h, v1.4h, v2.4h
+         sub v0.8h, v1.8h, v2.8h
+         sub v0.2s, v1.2s, v2.2s
+         sub v0.4s, v1.4s, v2.4s
+         sub v0.2d, v1.2d, v2.2d
+
+// CHECK: sub v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x84,0x22,0x2e]
+// CHECK: sub v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x84,0x22,0x6e]
+// CHECK: sub v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x84,0x62,0x2e]
+// CHECK: sub v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x84,0x62,0x6e]
+// CHECK: sub v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x84,0xa2,0x2e]
+// CHECK: sub v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x84,0xa2,0x6e]
+// CHECK: sub v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x84,0xe2,0x6e]
+
+//------------------------------------------------------------------------------
+// Vector Floating-Point Add
+//------------------------------------------------------------------------------
+         fadd v0.2s, v1.2s, v2.2s
+         fadd v0.4s, v1.4s, v2.4s
+         fadd v0.2d, v1.2d, v2.2d
+
+// CHECK: fadd v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x0e]
+// CHECK: fadd v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x4e]
+// CHECK: fadd v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Vector Floating-Point Sub
+//------------------------------------------------------------------------------
+         fsub v0.2s, v1.2s, v2.2s
+         fsub v0.4s, v1.4s, v2.4s
+         fsub v0.2d, v1.2d, v2.2d
+
+// CHECK: fsub v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0xa2,0x0e]
+// CHECK: fsub v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0xa2,0x4e]
+// CHECK: fsub v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0xe2,0x4e]
+
+
+
diff --git a/test/MC/AArch64/neon-bitwise-instructions.s b/test/MC/AArch64/neon-bitwise-instructions.s
new file mode 100644
index 000000000000..79d0a9b70b54
--- /dev/null
+++ b/test/MC/AArch64/neon-bitwise-instructions.s
@@ -0,0 +1,60 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Vector And
+//------------------------------------------------------------------------------
+         and v0.8b, v1.8b, v2.8b
+         and v0.16b, v1.16b, v2.16b
+
+// CHECK: and v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0x22,0x0e]
+// CHECK: and v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0x22,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Vector Orr
+//------------------------------------------------------------------------------
+         orr v0.8b, v1.8b, v2.8b
+         orr v0.16b, v1.16b, v2.16b
+
+// CHECK: orr v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0xa2,0x0e]
+// CHECK: orr v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0xa2,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Vector Eor
+//------------------------------------------------------------------------------
+         eor v0.8b, v1.8b, v2.8b
+         eor v0.16b, v1.16b, v2.16b
+
+// CHECK: eor v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0x22,0x2e]
+// CHECK: eor v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0x22,0x6e]
+
+
+//----------------------------------------------------------------------
+// Vector Bitwise
+//----------------------------------------------------------------------
+
+         bit v0.8b, v1.8b, v2.8b
+         bit v0.16b, v1.16b, v2.16b
+         bif v0.8b, v1.8b, v2.8b
+         bif v0.16b, v1.16b, v2.16b
+         bsl v0.8b, v1.8b, v2.8b
+         bsl v0.16b, v1.16b, v2.16b
+         orn v0.8b, v1.8b, v2.8b
+         orn v0.16b, v1.16b, v2.16b
+         bic v0.8b, v1.8b, v2.8b
+         bic v0.16b, v1.16b, v2.16b
+
+// CHECK: bit v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0xa2,0x2e]
+// CHECK: bit v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0xa2,0x6e]
+// CHECK: bif v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0xe2,0x2e]
+// CHECK: bif v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0xe2,0x6e]
+// CHECK: bsl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0x62,0x2e]
+// CHECK: bsl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0x62,0x6e]
+// CHECK: orn v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0xe2,0x0e]
+// CHECK: orn v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0xe2,0x4e]
+// CHECK: bic v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x1c,0x62,0x0e]
+// CHECK: bic v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x1c,0x62,0x4e]
+
diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s
new file mode 100644
index 000000000000..e4bc20258357
--- /dev/null
+++ b/test/MC/AArch64/neon-compare-instructions.s
@@ -0,0 +1,405 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal (Integer)
+//----------------------------------------------------------------------
+
+         cmeq v0.8b, v15.8b, v17.8b
+         cmeq v1.16b, v31.16b, v8.16b
+         cmeq v15.4h, v16.4h, v17.4h
+         cmeq v5.8h, v6.8h, v7.8h
+         cmeq v29.2s, v27.2s, v28.2s
+         cmeq v9.4s, v7.4s, v8.4s
+         cmeq v3.2d, v31.2d, v21.2d
+
+// CHECK: cmeq v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x8d,0x31,0x2e]
+// CHECK: cmeq v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x8f,0x28,0x6e]
+// CHECK: cmeq v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x8e,0x71,0x2e]
+// CHECK: cmeq v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x8c,0x67,0x6e]
+// CHECK: cmeq v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x8f,0xbc,0x2e]
+// CHECK: cmeq v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x8c,0xa8,0x6e]
+// CHECK: cmeq v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x8f,0xf5,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Higher or Same (Unsigned Integer)
+// Vector Compare Mask Less or Same (Unsigned Integer)
+// CMLS is alias for CMHS with operands reversed.
+//----------------------------------------------------------------------
+
+         cmhs v0.8b, v15.8b, v17.8b
+         cmhs v1.16b, v31.16b, v8.16b
+         cmhs v15.4h, v16.4h, v17.4h
+         cmhs v5.8h, v6.8h, v7.8h
+         cmhs v29.2s, v27.2s, v28.2s
+         cmhs v9.4s, v7.4s, v8.4s
+         cmhs v3.2d, v31.2d, v21.2d
+
+         cmls v0.8b, v17.8b, v15.8b
+         cmls v1.16b, v8.16b, v31.16b
+         cmls v15.4h, v17.4h, v16.4h
+         cmls v5.8h, v7.8h, v6.8h
+         cmls v29.2s, v28.2s, v27.2s
+         cmls v9.4s, v8.4s, v7.4s
+         cmls v3.2d, v21.2d, v31.2d
+
+// CHECK: cmhs v0.8b, v15.8b, v17.8b   // encoding: [0xe0,0x3d,0x31,0x2e]
+// CHECK: cmhs v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x6e]
+// CHECK: cmhs v15.4h, v16.4h, v17.4h  // encoding: [0x0f,0x3e,0x71,0x2e]
+// CHECK: cmhs v5.8h, v6.8h, v7.8h     // encoding: [0xc5,0x3c,0x67,0x6e]
+// CHECK: cmhs v29.2s, v27.2s, v28.2s  // encoding: [0x7d,0x3f,0xbc,0x2e]
+// CHECK: cmhs v9.4s, v7.4s, v8.4s     // encoding: [0xe9,0x3c,0xa8,0x6e]
+// CHECK: cmhs v3.2d, v31.2d, v21.2d   // encoding: [0xe3,0x3f,0xf5,0x6e]
+// CHECK: cmhs v0.8b, v15.8b, v17.8b   // encoding: [0xe0,0x3d,0x31,0x2e]
+// CHECK: cmhs v1.16b, v31.16b, v8.16b // encoding: [0xe1,0x3f,0x28,0x6e]
+// CHECK: cmhs v15.4h, v16.4h, v17.4h  // encoding: [0x0f,0x3e,0x71,0x2e]
+// CHECK: cmhs v5.8h, v6.8h, v7.8h     // encoding: [0xc5,0x3c,0x67,0x6e]
+// CHECK: cmhs v29.2s, v27.2s, v28.2s  // encoding: [0x7d,0x3f,0xbc,0x2e]
+// CHECK: cmhs v9.4s, v7.4s, v8.4s     // encoding: [0xe9,0x3c,0xa8,0x6e]
+// CHECK: cmhs v3.2d, v31.2d, v21.2d   // encoding: [0xe3,0x3f,0xf5,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than or Equal (Integer)
+// Vector Compare Mask Less Than or Equal (Integer)
+// CMLE is alias for CMGE with operands reversed.
+//----------------------------------------------------------------------
+
+         cmge v0.8b, v15.8b, v17.8b
+         cmge v1.16b, v31.16b, v8.16b
+         cmge v15.4h, v16.4h, v17.4h
+         cmge v5.8h, v6.8h, v7.8h
+         cmge v29.2s, v27.2s, v28.2s
+         cmge v9.4s, v7.4s, v8.4s
+         cmge v3.2d, v31.2d, v21.2d
+
+         cmle v0.8b, v17.8b, v15.8b
+         cmle v1.16b, v8.16b, v31.16b
+         cmle v15.4h, v17.4h, v16.4h
+         cmle v5.8h, v7.8h, v6.8h
+         cmle v29.2s, v28.2s, v27.2s
+         cmle v9.4s, v8.4s, v7.4s
+         cmle v3.2d, v21.2d, v31.2d
+
+// CHECK: cmge v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x3d,0x31,0x0e]
+// CHECK: cmge v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x3f,0x28,0x4e]
+// CHECK: cmge v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x3e,0x71,0x0e]
+// CHECK: cmge v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x3c,0x67,0x4e]
+// CHECK: cmge v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x3f,0xbc,0x0e]
+// CHECK: cmge v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x3c,0xa8,0x4e]
+// CHECK: cmge v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x3f,0xf5,0x4e]
+// CHECK: cmge v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x3d,0x31,0x0e]
+// CHECK: cmge v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x3f,0x28,0x4e]
+// CHECK: cmge v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x3e,0x71,0x0e]
+// CHECK: cmge v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x3c,0x67,0x4e]
+// CHECK: cmge v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x3f,0xbc,0x0e]
+// CHECK: cmge v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x3c,0xa8,0x4e]
+// CHECK: cmge v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x3f,0xf5,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Higher (Unsigned Integer)
+// Vector Compare Mask Lower (Unsigned Integer)
+// CMLO is alias for CMHI with operands reversed.
+//----------------------------------------------------------------------
+
+         cmhi v0.8b, v15.8b, v17.8b
+         cmhi v1.16b, v31.16b, v8.16b
+         cmhi v15.4h, v16.4h, v17.4h
+         cmhi v5.8h, v6.8h, v7.8h
+         cmhi v29.2s, v27.2s, v28.2s
+         cmhi v9.4s, v7.4s, v8.4s
+         cmhi v3.2d, v31.2d, v21.2d
+
+         cmlo v0.8b, v17.8b, v15.8b
+         cmlo v1.16b, v8.16b, v31.16b
+         cmlo v15.4h, v17.4h, v16.4h
+         cmlo v5.8h, v7.8h, v6.8h
+         cmlo v29.2s, v28.2s, v27.2s
+         cmlo v9.4s, v8.4s, v7.4s
+         cmlo v3.2d, v21.2d, v31.2d
+
+// CHECK: cmhi v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x35,0x31,0x2e]
+// CHECK: cmhi v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x37,0x28,0x6e]
+// CHECK: cmhi v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x36,0x71,0x2e]
+// CHECK: cmhi v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x34,0x67,0x6e]
+// CHECK: cmhi v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x37,0xbc,0x2e]
+// CHECK: cmhi v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x34,0xa8,0x6e]
+// CHECK: cmhi v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x37,0xf5,0x6e]
+// CHECK: cmhi v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x35,0x31,0x2e]
+// CHECK: cmhi v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x37,0x28,0x6e]
+// CHECK: cmhi v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x36,0x71,0x2e]
+// CHECK: cmhi v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x34,0x67,0x6e]
+// CHECK: cmhi v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x37,0xbc,0x2e]
+// CHECK: cmhi v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x34,0xa8,0x6e]
+// CHECK: cmhi v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x37,0xf5,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than (Integer)
+// Vector Compare Mask Less Than (Integer)
+// CMLT is alias for CMGT with operands reversed.
+//----------------------------------------------------------------------
+
+         cmgt v0.8b, v15.8b, v17.8b
+         cmgt v1.16b, v31.16b, v8.16b
+         cmgt v15.4h, v16.4h, v17.4h
+         cmgt v5.8h, v6.8h, v7.8h
+         cmgt v29.2s, v27.2s, v28.2s
+         cmgt v9.4s, v7.4s, v8.4s
+         cmgt v3.2d, v31.2d, v21.2d
+
+         cmlt v0.8b, v17.8b, v15.8b
+         cmlt v1.16b, v8.16b, v31.16b
+         cmlt v15.4h, v17.4h, v16.4h
+         cmlt v5.8h, v7.8h, v6.8h
+         cmlt v29.2s, v28.2s, v27.2s
+         cmlt v9.4s, v8.4s, v7.4s
+         cmlt v3.2d, v21.2d, v31.2d
+
+// CHECK: cmgt v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x35,0x31,0x0e]
+// CHECK: cmgt v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x37,0x28,0x4e]
+// CHECK: cmgt v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x36,0x71,0x0e]
+// CHECK: cmgt v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x34,0x67,0x4e]
+// CHECK: cmgt v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x37,0xbc,0x0e]
+// CHECK: cmgt v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x34,0xa8,0x4e]
+// CHECK: cmgt v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x37,0xf5,0x4e]
+// CHECK: cmgt v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x35,0x31,0x0e]
+// CHECK: cmgt v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x37,0x28,0x4e]
+// CHECK: cmgt v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x36,0x71,0x0e]
+// CHECK: cmgt v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x34,0x67,0x4e]
+// CHECK: cmgt v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x37,0xbc,0x0e]
+// CHECK: cmgt v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x34,0xa8,0x4e]
+// CHECK: cmgt v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x37,0xf5,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Bitwise Test (Integer)
+//----------------------------------------------------------------------
+
+         cmtst v0.8b, v15.8b, v17.8b
+         cmtst v1.16b, v31.16b, v8.16b
+         cmtst v15.4h, v16.4h, v17.4h
+         cmtst v5.8h, v6.8h, v7.8h
+         cmtst v29.2s, v27.2s, v28.2s
+         cmtst v9.4s, v7.4s, v8.4s
+         cmtst v3.2d, v31.2d, v21.2d
+
+// CHECK: cmtst v0.8b, v15.8b, v17.8b    // encoding: [0xe0,0x8d,0x31,0x0e]
+// CHECK: cmtst v1.16b, v31.16b, v8.16b  // encoding: [0xe1,0x8f,0x28,0x4e]
+// CHECK: cmtst v15.4h, v16.4h, v17.4h   // encoding: [0x0f,0x8e,0x71,0x0e]
+// CHECK: cmtst v5.8h, v6.8h, v7.8h      // encoding: [0xc5,0x8c,0x67,0x4e]
+// CHECK: cmtst v29.2s, v27.2s, v28.2s   // encoding: [0x7d,0x8f,0xbc,0x0e]
+// CHECK: cmtst v9.4s, v7.4s, v8.4s      // encoding: [0xe9,0x8c,0xa8,0x4e]
+// CHECK: cmtst v3.2d, v31.2d, v21.2d    // encoding: [0xe3,0x8f,0xf5,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal (Floating Point)
+//----------------------------------------------------------------------
+
+         fcmeq v0.2s, v31.2s, v16.2s
+         fcmeq v4.4s, v7.4s, v15.4s
+         fcmeq v29.2d, v2.2d, v5.2d
+
+// CHECK: fcmeq v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0x30,0x0e]
+// CHECK: fcmeq v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xe4,0x2f,0x4e]
+// CHECK: fcmeq v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xe4,0x65,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than Or Equal (Floating Point)
+// Vector Compare Mask Less Than Or Equal (Floating Point)
+// FCMLE is alias for FCMGE with operands reversed.
+//----------------------------------------------------------------------
+
+         fcmge v31.4s, v29.4s, v28.4s
+         fcmge v3.2s, v8.2s, v12.2s
+         fcmge v17.2d, v15.2d, v13.2d
+         fcmle v31.4s, v28.4s, v29.4s
+         fcmle v3.2s,  v12.2s, v8.2s
+         fcmle v17.2d, v13.2d, v15.2d
+
+// CHECK: fcmge v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xe7,0x3c,0x6e]
+// CHECK: fcmge v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xe5,0x2c,0x2e]
+// CHECK: fcmge v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xe5,0x6d,0x6e]
+// CHECK: fcmge v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xe7,0x3c,0x6e]
+// CHECK: fcmge v3.2s,  v8.2s, v12.2s   // encoding: [0x03,0xe5,0x2c,0x2e]
+// CHECK: fcmge v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xe5,0x6d,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than (Floating Point)
+// Vector Compare Mask Less Than (Floating Point)
+// FCMLT is alias for FCMGT with operands reversed.
+//----------------------------------------------------------------------
+
+         fcmgt v0.2s, v31.2s, v16.2s
+         fcmgt v4.4s, v7.4s, v15.4s
+         fcmgt v29.2d, v2.2d, v5.2d
+         fcmlt v0.2s, v16.2s, v31.2s
+         fcmlt v4.4s, v15.4s, v7.4s
+         fcmlt v29.2d, v5.2d, v2.2d
+
+// CHECK: fcmgt v0.2s, v31.2s, v16.2s  // encoding: [0xe0,0xe7,0xb0,0x2e]
+// CHECK: fcmgt v4.4s, v7.4s, v15.4s   // encoding: [0xe4,0xe4,0xaf,0x6e]
+// CHECK: fcmgt v29.2d, v2.2d, v5.2d   // encoding: [0x5d,0xe4,0xe5,0x6e]
+// CHECK: fcmgt v0.2s, v31.2s, v16.2s  // encoding: [0xe0,0xe7,0xb0,0x2e]
+// CHECK: fcmgt v4.4s, v7.4s, v15.4s   // encoding: [0xe4,0xe4,0xaf,0x6e]
+// CHECK: fcmgt v29.2d, v2.2d, v5.2d   // encoding: [0x5d,0xe4,0xe5,0x6e]
+
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal to Zero (Integer)
+//----------------------------------------------------------------------
+
+         cmeq v0.8b, v15.8b, #0
+         cmeq v1.16b, v31.16b, #0
+         cmeq v15.4h, v16.4h, #0
+         cmeq v5.8h, v6.8h, #0
+         cmeq v29.2s, v27.2s, #0
+         cmeq v9.4s, v7.4s, #0
+         cmeq v3.2d, v31.2d, #0
+
+// CHECK: cmeq v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x99,0x20,0x0e]
+// CHECK: cmeq v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x9b,0x20,0x4e]
+// CHECK: cmeq v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x9a,0x60,0x0e]
+// CHECK: cmeq v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x98,0x60,0x4e]
+// CHECK: cmeq v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x9b,0xa0,0x0e]
+// CHECK: cmeq v9.4s, v7.4s, #0x0     // encoding: [0xe9,0x98,0xa0,0x4e]
+// CHECK: cmeq v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x9b,0xe0,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
+//----------------------------------------------------------------------
+         cmge v0.8b, v15.8b, #0
+         cmge v1.16b, v31.16b, #0
+         cmge v15.4h, v16.4h, #0
+         cmge v5.8h, v6.8h, #0
+         cmge v29.2s, v27.2s, #0
+         cmge v17.4s, v20.4s, #0
+         cmge v3.2d, v31.2d, #0
+
+// CHECK: cmge v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x89,0x20,0x2e]
+// CHECK: cmge v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x8b,0x20,0x6e]
+// CHECK: cmge v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x8a,0x60,0x2e]
+// CHECK: cmge v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x88,0x60,0x6e]
+// CHECK: cmge v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x8b,0xa0,0x2e]
+// CHECK: cmge v17.4s, v20.4s, #0x0   // encoding: [0x91,0x8a,0xa0,0x6e]
+// CHECK: cmge v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x8b,0xe0,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than Zero (Signed Integer)
+//----------------------------------------------------------------------
+
+         cmgt v0.8b, v15.8b, #0
+         cmgt v1.16b, v31.16b, #0
+         cmgt v15.4h, v16.4h, #0
+         cmgt v5.8h, v6.8h, #0
+         cmgt v29.2s, v27.2s, #0
+         cmgt v9.4s, v7.4s, #0
+         cmgt v3.2d, v31.2d, #0
+
+// CHECK: cmgt v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x89,0x20,0x0e]
+// CHECK: cmgt v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x8b,0x20,0x4e]
+// CHECK: cmgt v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x8a,0x60,0x0e]
+// CHECK: cmgt v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x88,0x60,0x4e]
+// CHECK: cmgt v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x8b,0xa0,0x0e]
+// CHECK: cmgt v9.4s, v7.4s, #0x0     // encoding: [0xe9,0x88,0xa0,0x4e]
+// CHECK: cmgt v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x8b,0xe0,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
+//----------------------------------------------------------------------
+         cmle v0.8b, v15.8b, #0
+         cmle v1.16b, v31.16b, #0
+         cmle v15.4h, v16.4h, #0
+         cmle v5.8h, v6.8h, #0
+         cmle v29.2s, v27.2s, #0
+         cmle v9.4s, v7.4s, #0
+         cmle v3.2d, v31.2d, #0
+
+// CHECK: cmle v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x99,0x20,0x2e]
+// CHECK: cmle v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x9b,0x20,0x6e]
+// CHECK: cmle v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x9a,0x60,0x2e]
+// CHECK: cmle v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x98,0x60,0x6e]
+// CHECK: cmle v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x9b,0xa0,0x2e]
+// CHECK: cmle v9.4s, v7.4s, #0x0     // encoding: [0xe9,0x98,0xa0,0x6e]
+// CHECK: cmle v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x9b,0xe0,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than Zero (Signed Integer)
+//----------------------------------------------------------------------
+         cmlt v0.8b, v15.8b, #0
+         cmlt v1.16b, v31.16b, #0
+         cmlt v15.4h, v16.4h, #0
+         cmlt v5.8h, v6.8h, #0
+         cmlt v29.2s, v27.2s, #0
+         cmlt v9.4s, v7.4s, #0
+         cmlt v3.2d, v31.2d, #0
+
+// CHECK: cmlt v0.8b, v15.8b, #0x0    // encoding: [0xe0,0xa9,0x20,0x0e]
+// CHECK: cmlt v1.16b, v31.16b, #0x0  // encoding: [0xe1,0xab,0x20,0x4e]
+// CHECK: cmlt v15.4h, v16.4h, #0x0   // encoding: [0x0f,0xaa,0x60,0x0e]
+// CHECK: cmlt v5.8h, v6.8h, #0x0     // encoding: [0xc5,0xa8,0x60,0x4e]
+// CHECK: cmlt v29.2s, v27.2s, #0x0   // encoding: [0x7d,0xab,0xa0,0x0e]
+// CHECK: cmlt v9.4s, v7.4s, #0x0     // encoding: [0xe9,0xa8,0xa0,0x4e]
+// CHECK: cmlt v3.2d, v31.2d, #0x0    // encoding: [0xe3,0xab,0xe0,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal to Zero (Floating Point)
+//----------------------------------------------------------------------
+         fcmeq v0.2s, v31.2s, #0.0
+         fcmeq v4.4s, v7.4s, #0.0
+         fcmeq v29.2d, v2.2d, #0.0
+
+// CHECK: fcmeq v0.2s, v31.2s, #0.0  // encoding: [0xe0,0xdb,0xa0,0x0e]
+// CHECK: fcmeq v4.4s, v7.4s, #0.0   // encoding: [0xe4,0xd8,0xa0,0x4e]
+// CHECK: fcmeq v29.2d, v2.2d, #0.0  // encoding: [0x5d,0xd8,0xe0,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
+//----------------------------------------------------------------------
+         fcmge v31.4s, v29.4s, #0.0
+         fcmge v3.2s, v8.2s, #0.0
+         fcmge v17.2d, v15.2d, #0.0
+
+// CHECK: fcmge v31.4s, v29.4s, #0.0  // encoding: [0xbf,0xcb,0xa0,0x6e]
+// CHECK: fcmge v3.2s, v8.2s, #0.0    // encoding: [0x03,0xc9,0xa0,0x2e]
+// CHECK: fcmge v17.2d, v15.2d, #0.0   // encoding: [0xf1,0xc9,0xe0,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than Zero (Floating Point)
+//----------------------------------------------------------------------
+         fcmgt v0.2s, v31.2s, #0.0
+         fcmgt v4.4s, v7.4s, #0.0
+         fcmgt v29.2d, v2.2d, #0.0
+
+// CHECK: fcmgt v0.2s, v31.2s, #0.0   // encoding: [0xe0,0xcb,0xa0,0x0e]
+// CHECK: fcmgt v4.4s, v7.4s, #0.0    // encoding: [0xe4,0xc8,0xa0,0x4e]
+// CHECK: fcmgt v29.2d, v2.2d, #0.0   // encoding: [0x5d,0xc8,0xe0,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than or Equal To Zero (Floating Point)
+//----------------------------------------------------------------------
+         fcmle v1.4s, v8.4s, #0.0
+         fcmle v3.2s, v20.2s, #0.0
+         fcmle v7.2d, v13.2d, #0.0
+
+// CHECK: fcmle v1.4s, v8.4s, #0.0   // encoding: [0x01,0xd9,0xa0,0x6e]
+// CHECK: fcmle v3.2s, v20.2s, #0.0  // encoding: [0x83,0xda,0xa0,0x2e]
+// CHECK: fcmle v7.2d, v13.2d, #0.0  // encoding: [0xa7,0xd9,0xe0,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than Zero (Floating Point)
+//----------------------------------------------------------------------
+         fcmlt v16.2s, v2.2s, #0.0
+         fcmlt v15.4s, v4.4s, #0.0
+         fcmlt v5.2d, v29.2d, #0.0
+
+// CHECK: fcmlt v16.2s, v2.2s, #0.0   // encoding: [0x50,0xe8,0xa0,0x0e]
+// CHECK: fcmlt v15.4s, v4.4s, #0.0   // encoding: [0x8f,0xe8,0xa0,0x4e]
+// CHECK: fcmlt v5.2d, v29.2d, #0.0   // encoding: [0xa5,0xeb,0xe0,0x4e]
+
+
+
+
+
+
+
+
+
diff --git a/test/MC/AArch64/neon-crypto.s b/test/MC/AArch64/neon-crypto.s
new file mode 100644
index 000000000000..2952dd5aac29
--- /dev/null
+++ b/test/MC/AArch64/neon-crypto.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for crypto
+//------------------------------------------------------------------------------
+
+        aese v0.16b, v1.16b
+        aesd v0.16b, v1.16b
+        aesmc v0.16b, v1.16b
+        aesimc v0.16b, v1.16b
+
+// CHECK-NO-CRYPTO: error: instruction requires a CPU feature not currently enabled
+// CHECK: aese	v0.16b, v1.16b          // encoding: [0x20,0x48,0x28,0x4e]
+// CHECK: aesd	v0.16b, v1.16b          // encoding: [0x20,0x58,0x28,0x4e]
+// CHECK: aesmc	v0.16b, v1.16b          // encoding: [0x20,0x68,0x28,0x4e]
+// CHECK: aesimc	v0.16b, v1.16b          // encoding: [0x20,0x78,0x28,0x4e]
+
+        sha1h s0, s1
+        sha1su1 v0.4s, v1.4s
+        sha256su0 v0.4s, v1.4s
+
+// CHECK: sha1h	s0, s1                  // encoding: [0x20,0x08,0x28,0x5e]
+// CHECK: sha1su1	v0.4s, v1.4s            // encoding: [0x20,0x18,0x28,0x5e]
+// CHECK: sha256su0	v0.4s, v1.4s    // encoding: [0x20,0x28,0x28,0x5e]
+
+        sha1c q0, s1, v2.4s
+        sha1p q0, s1, v2.4s
+        sha1m q0, s1, v2.4s
+        sha1su0 v0.4s, v1.4s, v2.4s
+        sha256h q0, q1, v2.4s
+        sha256h2 q0, q1, v2.4s
+        sha256su1 v0.4s, v1.4s, v2.4s
+
+// CHECK: sha1c	q0, s1, v2.4s           // encoding: [0x20,0x00,0x02,0x5e]
+// CHECK: sha1p	q0, s1, v2.4s           // encoding: [0x20,0x10,0x02,0x5e]
+// CHECK: sha1m	q0, s1, v2.4s           // encoding: [0x20,0x20,0x02,0x5e]
+// CHECK: sha1su0	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x30,0x02,0x5e]
+// CHECK: sha256h	q0, q1, v2.4s           // encoding: [0x20,0x40,0x02,0x5e]
+// CHECK: sha256h2	q0, q1, v2.4s   // encoding: [0x20,0x50,0x02,0x5e]
+// CHECK: sha256su1	v0.4s, v1.4s, v2.4s // encoding: [0x20,0x60,0x02,0x5e]
+
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
new file mode 100644
index 000000000000..be6c163741f9
--- /dev/null
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -0,0 +1,7318 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+//------------------------------------------------------------------------------
+// Vector Integer Add/sub
+//------------------------------------------------------------------------------
+
+        // Mismatched vector types
+        add v0.16b, v1.8b, v2.8b
+        sub v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sub v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Vector Floating-Point Add/sub
+//------------------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        fadd v0.2d, v1.2s, v2.2s
+        fsub v0.4s, v1.2s, v2.4s
+        fsub v0.8b, v1.8b, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fadd v0.2d, v1.2s, v2.2s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsub v0.4s, v1.2s, v2.4s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsub v0.8b, v1.8b, v2.8b
+// CHECK-ERROR:                  ^
+
+//----------------------------------------------------------------------
+// Vector Integer Mul
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        mul v0.16b, v1.8b, v2.8b
+        mul v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         mul v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         mul v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Vector Floating-Point Mul/Div
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        fmul v0.16b, v1.8b, v2.8b
+        fdiv v0.2s, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fmul v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fdiv v0.2s, v1.2d, v2.2d
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Vector And Orr Eor Bsl Bit Bif, Orn, Bic,
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        and v0.8b, v1.16b, v2.8b
+        orr v0.4h, v1.4h, v2.4h
+        eor v0.2s, v1.2s, v2.2s
+        bsl v0.8b, v1.16b, v2.8b
+        bsl v0.2s, v1.2s, v2.2s
+        bit v0.2d, v1.2d, v2.2d
+        bif v0.4h, v1.4h, v2.4h
+        orn v0.8b, v1.16b, v2.16b
+        bic v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         and v0.8b, v1.16b, v2.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         orr v0.4h, v1.4h, v2.4h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         eor v0.2s, v1.2s, v2.2s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         bsl v0.8b, v1.16b, v2.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         bsl v0.2s, v1.2s, v2.2s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         bit v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         bif v0.4h, v1.4h, v2.4h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         orn v0.8b, v1.16b, v2.16b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         bic v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Vector Integer Multiply-accumulate and Multiply-subtract
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        mla v0.16b, v1.8b, v2.8b
+        mls v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         mla v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         mls v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Vector Floating-Point Multiply-accumulate and Multiply-subtract
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        fmla v0.2s, v1.2d, v2.2d
+        fmls v0.16b, v1.8b, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fmla v0.2s, v1.2d, v2.2d
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fmls v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                         ^
+
+
+//----------------------------------------------------------------------
+// Vector Move Immediate Shifted
+// Vector Move Inverted Immediate Shifted
+// Vector Bitwise Bit Clear (AND NOT) - immediate
+// Vector Bitwise OR - immedidate
+//----------------------------------------------------------------------
+      // out of range immediate (0 to 0xff)
+      movi v0.2s, #-1
+      mvni v1.4s, #256
+      // out of range shift (0, 8, 16, 24 and 0, 8)
+      bic v15.4h, #1, lsl #7
+      orr v31.2s, #1, lsl #25
+      movi v5.4h, #10, lsl #16
+      // invalid vector type (2s, 4s, 4h, 8h)
+      movi v5.8b, #1, lsl #8
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          movi v0.2s, #-1
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         mvni v1.4s, #256
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         bic v15.4h, #1, lsl #7
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         orr v31.2s, #1, lsl #25
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         movi v5.4h, #10, lsl #16
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         movi v5.8b, #1, lsl #8
+// CHECK-ERROR:                         ^
+//----------------------------------------------------------------------
+// Vector Move Immediate Masked
+// Vector Move Inverted Immediate Masked
+//----------------------------------------------------------------------
+      // out of range immediate (0 to 0xff)
+      movi v0.2s, #-1, msl #8
+      mvni v7.4s, #256, msl #16
+      // out of range shift (8, 16)
+      movi v3.2s, #1, msl #0
+      mvni v17.4s, #255, msl #32
+      // invalid vector type (2s, 4s)
+      movi v5.4h, #31, msl #8
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         movi v0.2s, #-1, msl #8
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         mvni v7.4s, #256, msl #16
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         movi v3.2s, #1, msl #0
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         mvni v17.4s, #255, msl #32
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         movi v5.4h, #31, msl #8
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Vector Immediate - per byte
+//----------------------------------------------------------------------
+        // out of range immediate (0 to 0xff)
+        movi v0.8b, #-1
+        movi v1.16b, #256
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         movi v0.8b, #-1
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         movi v1.16b, #256
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Estimate
+//----------------------------------------------------------------------
+
+    frecpe s19, h14
+    frecpe d13, s13
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpe s19, h14
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpe d13, s13
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Exponent
+//----------------------------------------------------------------------
+
+    frecpx s18, h10
+    frecpx d16, s19
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpx s18, h10
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecpx d16, s19
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Square Root Estimate
+//----------------------------------------------------------------------
+
+    frsqrte s22, h13
+    frsqrte d21, s12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrte s22, h13
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrte d21, s12
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Vector Move Immediate - bytemask, per doubleword
+//---------------------------------------------------------------------
+        // invalid bytemask (0x00 or 0xff)
+        movi v0.2d, #0x10ff00ff00ff00ff
+
+// CHECK:ERROR: error: invalid operand for instruction
+// CHECK:ERROR:         movi v0.2d, #0x10ff00ff00ff00ff
+// CHECK:ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Vector Move Immediate - bytemask, one doubleword
+//----------------------------------------------------------------------
+        // invalid bytemask (0x00 or 0xff)
+        movi v0.2d, #0xffff00ff001f00ff
+
+// CHECK:ERROR: error: invalid operand for instruction
+// CHECK:ERROR:         movi v0.2d, #0xffff00ff001f00ff
+// CHECK:ERROR:                     ^
+//----------------------------------------------------------------------
+// Vector Floating Point Move Immediate
+//----------------------------------------------------------------------
+        // invalid vector type (2s, 4s, 2d)
+         fmov v0.4h, #1.0
+
+// CHECK:ERROR: error: invalid operand for instruction
+// CHECK:ERROR:         fmov v0.4h, #1.0
+// CHECK:ERROR:              ^
+
+//----------------------------------------------------------------------
+// Vector Move -  register
+//----------------------------------------------------------------------
+      // invalid vector type (8b, 16b)
+      mov v0.2s, v31.8b
+// CHECK:ERROR: error: invalid operand for instruction
+// CHECK:ERROR:         mov v0.2s, v31.8b
+// CHECK:ERROR:                ^
+
+//----------------------------------------------------------------------
+// Vector Absolute Difference and Accumulate (Signed, Unsigned)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types (2d)
+        saba v0.16b, v1.8b, v2.8b
+        uaba v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saba v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaba v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Vector Absolute Difference and Accumulate (Signed, Unsigned)
+// Vector Absolute Difference (Signed, Unsigned)
+
+        // Mismatched and invalid vector types (2d)
+        uaba v0.16b, v1.8b, v2.8b
+        saba v0.2d, v1.2d, v2.2d
+        uabd v0.4s, v1.2s, v2.2s
+        sabd v0.4h, v1.8h, v8.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaba v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saba v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uabd v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sabd v0.4h, v1.8h, v8.8h
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Vector Absolute Difference (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fabd v0.2s, v1.4s, v2.2d
+        fabd v0.4h, v1.4h, v2.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabd v0.2s, v1.4s, v2.2d
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabd v0.4h, v1.4h, v2.4h
+// CHECK-ERROR:                 ^
+//----------------------------------------------------------------------
+// Vector Multiply (Polynomial)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+         pmul v0.8b, v1.8b, v2.16b
+         pmul v0.2s, v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         pmul v0.8b, v1.8b, v2.16b
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         pmul v0.2s, v1.2s, v2.2s
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Scalar Integer Add and Sub
+//----------------------------------------------------------------------
+
+      // Mismatched registers
+         add d0, s1, d2
+         sub s1, d1, d2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         add d0, s1, d2
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sub s1, d1, d2
+// CHECK-ERROR:             ^
+
+//----------------------------------------------------------------------
+// Vector Reciprocal Step (Floating Point)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+         frecps v0.4s, v1.2d, v2.4s
+         frecps v0.8h, v1.8h, v2.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecps v0.4s, v1.2d, v2.4s
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frecps v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                  ^
+
+//----------------------------------------------------------------------
+// Vector Reciprocal Square Root Step (Floating Point)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+         frsqrts v0.2d, v1.2d, v2.2s
+         frsqrts v0.4h, v1.4h, v2.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrts v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        frsqrts v0.4h, v1.4h, v2.4h
+// CHECK-ERROR:                   ^
+
+
+//----------------------------------------------------------------------
+// Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        facge v0.2d, v1.2s, v2.2d
+        facge v0.4h, v1.4h, v2.4h
+        facle v0.8h, v1.4h, v2.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        facge v0.2d, v1.2s, v2.2d
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        facge v0.4h, v1.4h, v2.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        facle v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                 ^
+//----------------------------------------------------------------------
+// Vector Absolute Compare Mask Less Than (Floating Point)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        facgt v0.2d, v1.2d, v2.4s
+        facgt v0.8h, v1.8h, v2.8h
+        faclt v0.8b, v1.8b, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        facgt v0.2d, v1.2d, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        facgt v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        faclt v0.8b, v1.8b, v2.8b
+// CHECK-ERROR:                 ^
+
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal (Integer)
+//----------------------------------------------------------------------
+
+         // Mismatched vector types
+         cmeq c0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmeq c0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Higher or Same (Unsigned Integer)
+// Vector Compare Mask Less or Same (Unsigned Integer)
+// CMLS is alias for CMHS with operands reversed.
+//----------------------------------------------------------------------
+
+         // Mismatched vector types
+         cmhs c0.4h, v1.8b, v2.8b
+         cmls c0.16b, v1.16b, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmhs c0.4h, v1.8b, v2.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmls c0.16b, v1.16b, v2.2d
+// CHECK-ERROR:                                ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than or Equal (Integer)
+// Vector Compare Mask Less Than or Equal (Integer)
+// CMLE is alias for CMGE with operands reversed.
+//----------------------------------------------------------------------
+
+         // Mismatched vector types
+         cmge c0.8h, v1.8b, v2.8b
+         cmle c0.4h, v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmge c0.8h, v1.8b, v2.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmle c0.4h, v1.2s, v2.2s
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Higher (Unsigned Integer)
+// Vector Compare Mask Lower (Unsigned Integer)
+// CMLO is alias for CMHI with operands reversed.
+//----------------------------------------------------------------------
+
+         // Mismatched vector types
+         cmhi c0.4s, v1.4s, v2.16b
+         cmlo c0.8b, v1.8b, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmhi c0.4s, v1.4s, v2.16b
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmlo c0.8b, v1.8b, v2.2s
+// CHECK-ERROR:                               ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than (Integer)
+// Vector Compare Mask Less Than (Integer)
+// CMLT is alias for CMGT with operands reversed.
+//----------------------------------------------------------------------
+
+         // Mismatched vector types
+         cmgt c0.8b, v1.4s, v2.16b
+         cmlt c0.8h, v1.16b, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmgt c0.8b, v1.4s, v2.16b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmlt c0.8h, v1.16b, v2.4s
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Bitwise Test (Integer)
+//----------------------------------------------------------------------
+
+         // Mismatched vector types
+         cmtst c0.16b, v1.16b, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmtst c0.16b, v1.16b, v2.4s
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal (Floating Point)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        fcmeq v0.2d, v1.2s, v2.2d
+        fcmeq v0.16b, v1.16b, v2.16b
+        fcmeq v0.8b, v1.4h, v2.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmeq v0.2d, v1.2s, v2.2d
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmeq v0.16b, v1.16b, v2.16b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmeq v0.8b, v1.4h, v2.4h
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than Or Equal (Floating Point)
+// Vector Compare Mask Less Than Or Equal (Floating Point)
+// FCMLE is alias for FCMGE with operands reversed.
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+         fcmge v31.4s, v29.2s, v28.4s
+         fcmge v3.8b, v8.2s, v12.2s
+         fcmle v17.8h, v15.2d, v13.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmge v31.4s, v29.2s, v28.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmge v3.8b, v8.2s, v12.2s
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmle v17.8h, v15.2d, v13.2d
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than (Floating Point)
+// Vector Compare Mask Less Than (Floating Point)
+// FCMLT is alias for FCMGT with operands reversed.
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+         fcmgt v0.2d, v31.2s, v16.2s
+         fcmgt v4.4s, v7.4s, v15.4h
+         fcmlt v29.2d, v5.2d, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmgt v0.2d, v31.2s, v16.2s
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected floating-point constant #0.0 or invalid register type
+// CHECK-ERROR:        fcmgt v4.4s, v7.4s, v15.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected floating-point constant #0.0 or invalid register type
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, v2.16b
+// CHECK-ERROR:                                ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal to Zero (Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types and invalid imm
+         // Mismatched vector types
+         cmeq c0.2d, v1.2s, #0
+         cmeq c0.2d, v1.2d, #1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmeq c0.2d, v1.2s, #0
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmeq c0.2d, v1.2d, #1
+// CHECK-ERROR:                            ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types and invalid imm
+         cmge c0.8h, v1.8b, #0
+         cmge c0.4s, v1.4s, #-1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmge c0.8h, v1.8b, #0
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmge c0.4s, v1.4s, #-1
+// CHECK-ERROR:                             ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than Zero (Signed Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types and invalid imm
+         cmgt c0.8b, v1.4s, #0
+         cmgt c0.8b, v1.8b, #-255
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmgt c0.8b, v1.4s, #0
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmgt c0.8b, v1.8b, #-255
+// CHECK-ERROR:                             ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types and invalid imm
+         cmle c0.4h, v1.2s, #0
+         cmle c0.16b, v1.16b, #16
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        cmle c0.4h, v1.2s, #0
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmle c0.16b, v1.16b, #16
+// CHECK-ERROR:                               ^
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than Zero (Signed Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types and invalid imm
+         cmlt c0.8h, v1.16b, #0
+         cmlt c0.8h, v1.8h, #-15
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmlt c0.8h, v1.16b, #0
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cmlt c0.8h, v1.8h, #-15
+// CHECK-ERROR:                             ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Equal to Zero (Floating Point)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types, invalid imm
+        fcmeq v0.2d, v1.2s, #0.0
+        fcmeq v0.16b, v1.16b, #0.0
+        fcmeq v0.8b, v1.4h, #1.0
+        fcmeq v0.8b, v1.4h, #1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmeq v0.2d, v1.2s, #0.0
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmeq v0.16b, v1.16b, #0.0
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmeq v0.8b, v1.4h, #1.0
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error:  Expected floating-point immediate
+// CHECK-ERROR:        fcmeq v0.8b, v1.4h, #1
+// CHECK-ERROR:                             ^
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types, invalid imm
+         fcmge v31.4s, v29.2s, #0.0
+         fcmge v3.8b, v8.2s, #0.0
+         fcmle v17.8h, v15.2d, #-1.0
+         fcmle v17.8h, v15.2d, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmge v31.4s, v29.2s, #0.0
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmge v3.8b, v8.2s, #0.0
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmle v17.8h, v15.2d, #-1.0
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error:  Expected floating-point immediate
+// CHECK-ERROR:        fcmle v17.8h, v15.2d, #0
+// CHECK-ERROR:                               ^
+//----------------------------------------------------------------------
+// Vector Compare Mask Greater Than Zero (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types, invalid imm
+         fcmgt v0.2d, v31.2s, #0.0
+         fcmgt v4.4s, v7.4h, #0.0
+         fcmlt v29.2d, v5.2d, #255.0
+         fcmlt v29.2d, v5.2d, #255
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmgt v0.2d, v31.2s, #0.0
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmgt v4.4s, v7.4h, #0.0
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected floating-point constant #0.0 or invalid register type
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #255.0
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error:  Expected floating-point immediate
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #255
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than or Equal To Zero (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types, invalid imm
+         fcmge v31.4s, v29.2s, #0.0
+         fcmge v3.8b, v8.2s, #0.0
+         fcmle v17.2d, v15.2d, #15.0
+         fcmle v17.2d, v15.2d, #15
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmge v31.4s, v29.2s, #0.0
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmge v3.8b, v8.2s, #0.0
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: expected floating-point constant #0.0 or invalid register type
+// CHECK-ERROR:        fcmle v17.2d, v15.2d, #15.0
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error:  Expected floating-point immediate
+// CHECK-ERROR:        fcmle v17.2d, v15.2d, #15
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Vector Compare Mask Less Than Zero (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types, invalid imm
+         fcmgt v0.2d, v31.2s, #0.0
+         fcmgt v4.4s, v7.4h, #0.0
+         fcmlt v29.2d, v5.2d, #16.0
+         fcmlt v29.2d, v5.2d, #2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmgt v0.2d, v31.2s, #0.0
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcmgt v4.4s, v7.4h, #0.0
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected floating-point constant #0.0 or invalid register type
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #16.0
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error:  Expected floating-point immediate
+// CHECK-ERROR:        fcmlt v29.2d, v5.2d, #2
+// CHECK-ERROR:                              ^
+
+/-----------------------------------------------------------------------
+// Vector Integer Halving Add (Signed)
+// Vector Integer Halving Add (Unsigned)
+// Vector Integer Halving Sub (Signed)
+// Vector Integer Halving Sub (Unsigned)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types (2d)
+        shadd v0.2d, v1.2d, v2.2d
+        uhadd v4.2s, v5.2s, v5.4h
+        shsub v11.4h, v12.8h, v13.4h
+        uhsub v31.16b, v29.8b, v28.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        shadd v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uhadd v4.2s, v5.2s, v5.4h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        shsub v11.4h, v12.8h, v13.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uhsub v31.16b, v29.8b, v28.8b
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Vector Integer Rouding Halving Add (Signed)
+// Vector Integer Rouding Halving Add (Unsigned)
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types (2d)
+        srhadd v0.2s, v1.2s, v2.2d
+        urhadd v0.16b, v1.16b, v2.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        srhadd v0.2s, v1.2s, v2.2d
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        urhadd v0.16b, v1.16b, v2.8h
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Vector Integer Saturating Add (Signed)
+// Vector Integer Saturating Add (Unsigned)
+// Vector Integer Saturating Sub (Signed)
+// Vector Integer Saturating Sub (Unsigned)
+//----------------------------------------------------------------------
+
+        // Mismatched vector types
+        sqadd v0.2s, v1.2s, v2.2d
+        uqadd v31.8h, v1.4h, v2.4h
+        sqsub v10.8h, v1.16b, v2.16b
+        uqsub v31.8b, v1.8b, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqadd v0.2s, v1.2s, v2.2d
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqadd v31.8h, v1.4h, v2.4h
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqsub v10.8h, v1.16b, v2.16b
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqsub v31.8b, v1.8b, v2.4s
+// CHECK-ERROR:                                ^
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Add (Signed)
+// Scalar Integer Saturating Add (Unsigned)
+// Scalar Integer Saturating Sub (Signed)
+// Scalar Integer Saturating Sub (Unsigned)
+//----------------------------------------------------------------------
+
+      // Mismatched registers
+         sqadd d0, s31, d2
+         uqadd s0, s1, d2
+         sqsub b0, b2, s18
+         uqsub h1, h2, d2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqadd d0, s31, d2
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqadd s0, s1, d2
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqsub b0, b2, s18
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqsub h1, h2, d2
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Doubling Multiply Half High (Signed)
+//----------------------------------------------------------------------
+
+    sqdmulh h10, s11, h12
+    sqdmulh s20, h21, s2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh h10, s11, h12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh s20, h21, s2
+// CHECK-ERROR:                     ^
+
+//------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Doubling Multiply Half High (Signed)
+//------------------------------------------------------------------------
+
+    sqrdmulh h10, s11, h12
+    sqrdmulh s20, h21, s2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh h10, s11, h12
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh s20, h21, s2
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Vector Shift Left (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        sshl v0.4s, v15.2s, v16.2s
+        ushl v1.16b, v25.16b, v6.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshl v0.4s, v15.2s, v16.2s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ushl v1.16b, v25.16b, v6.8h
+// CHECK-ERROR:                                 ^
+
+//----------------------------------------------------------------------
+// Vector Saturating Shift Left (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        sqshl v0.2s, v15.4s, v16.2d
+        uqshl v1.8b, v25.4h, v6.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqshl v0.2s, v15.4s, v16.2d 
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqshl v1.8b, v25.4h, v6.8h
+// CHECK-ERROR:                         ^
+
+//----------------------------------------------------------------------
+// Vector Rouding Shift Left (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        srshl v0.8h, v15.8h, v16.16b
+        urshl v1.2d, v25.2d, v6.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        srshl v0.8h, v15.8h, v16.16b
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        urshl v1.2d, v25.2d, v6.4s
+// CHECK-ERROR:                                ^
+
+//----------------------------------------------------------------------
+// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        sqrshl v0.2s, v15.8h, v16.16b
+        uqrshl v1.4h, v25.4h,  v6.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrshl v0.2s, v15.8h, v16.16b
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqrshl v1.4h, v25.4h,  v6.2d
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Scalar Integer Shift Left (Signed, Unsigned)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        sshl d0, d1, s2
+        ushl b2, b0, b1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshl d0, d1, s2
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ushl b2, b0, b1
+// CHECK-ERROR:             ^
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Shift Left (Signed, Unsigned)
+//----------------------------------------------------------------------
+
+        // Mismatched vector types
+        sqshl b0, s1, b0
+        uqshl h0, b1, h0
+        sqshl s0, h1, s0
+        uqshl d0, b1, d0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqshl b0, s1, b0
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqshl h0, b1, h0
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqshl s0, h1, s0
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqshl d0, b1, d0
+// CHECK-ERROR:                  ^
+
+//----------------------------------------------------------------------
+// Scalar Integer Rouding Shift Left (Signed, Unsigned)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        srshl h0, h1, h2
+        urshl s0, s1, s2
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        srshl h0, h1, h2
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        urshl s0, s1, s2
+// CHECK-ERROR:              ^
+
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+//----------------------------------------------------------------------
+
+        // Mismatched vector types
+        sqrshl b0, b1, s0
+        uqrshl h0, h1, b0
+        sqrshl s0, s1, h0
+        uqrshl d0, d1, b0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrshl b0, b1, s0
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqrshl h0, h1, b0
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrshl s0, s1, h0
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqrshl d0, d1, b0
+// CHECK-ERROR:                       ^
+
+
+//----------------------------------------------------------------------
+// Vector Maximum (Signed, Unsigned)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        smax v0.2d, v1.2d, v2.2d
+        umax v0.4h, v1.4h, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smax v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umax v0.4h, v1.4h, v2.2s
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Vector Minimum (Signed, Unsigned)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        smin v0.2d, v1.2d, v2.2d
+        umin v0.2s, v1.2s, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smin v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umin v0.2s, v1.2s, v2.8b
+// CHECK-ERROR:                             ^
+
+
+//----------------------------------------------------------------------
+// Vector Maximum (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fmax v0.2s, v1.2s, v2.4s
+        fmax v0.8b, v1.8b, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmax v0.2s, v1.2s, v2.4s
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmax v0.8b, v1.8b, v2.8b
+// CHECK-ERROR:                ^
+//----------------------------------------------------------------------
+// Vector Minimum (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fmin v0.4s, v1.4s, v2.2d
+        fmin v0.8h, v1.8h, v2.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmin v0.4s, v1.4s, v2.2d
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmin v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Vector maxNum (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fmaxnm v0.2s, v1.2s, v2.2d
+        fmaxnm v0.4h, v1.8h, v2.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnm v0.2s, v1.2s, v2.2d
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnm v0.4h, v1.8h, v2.4h
+// CHECK-ERROR:                  ^
+
+//----------------------------------------------------------------------
+// Vector minNum (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fminnm v0.4s, v1.2s, v2.4s
+        fminnm v0.16b, v0.16b, v0.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnm v0.4s, v1.2s, v2.4s
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnm v0.16b, v0.16b, v0.16b
+// CHECK-ERROR:                  ^
+
+
+//----------------------------------------------------------------------
+// Vector Maximum Pairwise (Signed, Unsigned)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        smaxp v0.2d, v1.2d, v2.2d
+        umaxp v0.4h, v1.4h, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smaxp v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umaxp v0.4h, v1.4h, v2.2s
+// CHECK-ERROR:                               ^
+
+//----------------------------------------------------------------------
+// Vector Minimum Pairwise (Signed, Unsigned)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        sminp v0.2d, v1.2d, v2.2d
+        uminp v0.2s, v1.2s, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sminp v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uminp v0.2s, v1.2s, v2.8b
+// CHECK-ERROR:                               ^
+
+
+//----------------------------------------------------------------------
+// Vector Maximum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fmaxp v0.2s, v1.2s, v2.4s
+        fmaxp v0.8b, v1.8b, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxp v0.2s, v1.2s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxp v0.8b, v1.8b, v2.8b
+// CHECK-ERROR:                 ^
+//----------------------------------------------------------------------
+// Vector Minimum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fminp v0.4s, v1.4s, v2.2d
+        fminp v0.8h, v1.8h, v2.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminp v0.4s, v1.4s, v2.2d
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminp v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Vector maxNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fmaxnmp v0.2s, v1.2s, v2.2d
+        fmaxnmp v0.4h, v1.8h, v2.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmp v0.2s, v1.2s, v2.2d
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmp v0.4h, v1.8h, v2.4h
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Vector minNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        fminnmp v0.4s, v1.2s, v2.4s
+        fminnmp v0.16b, v0.16b, v0.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmp v0.4s, v1.2s, v2.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmp v0.16b, v0.16b, v0.16b
+// CHECK-ERROR:                   ^
+
+
+//----------------------------------------------------------------------
+// Vector Add Pairwise (Integer)
+//----------------------------------------------------------------------
+
+        // Mismatched vector types
+        addp v0.16b, v1.8b, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         addp v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                         ^
+
+//----------------------------------------------------------------------
+// Vector Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+        // Mismatched and invalid vector types
+        faddp v0.16b, v1.8b, v2.8b
+        faddp v0.2d, v1.2d, v2.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         faddp v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         faddp v0.2d, v1.2d, v2.8h
+// CHECK-ERROR:                                ^
+
+
+//----------------------------------------------------------------------
+// Vector Saturating Doubling Multiply High
+//----------------------------------------------------------------------
+         // Mismatched and invalid vector types
+         sqdmulh v2.4h, v25.8h, v3.4h
+         sqdmulh v12.2d, v5.2d, v13.2d
+         sqdmulh v3.8b, v1.8b, v30.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqdmulh v2.4h, v25.8h, v3.4h
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqdmulh v12.2d, v5.2d, v13.2d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqdmulh v3.8b, v1.8b, v30.8b
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Vector Saturating Rouding Doubling Multiply High
+//----------------------------------------------------------------------
+         // Mismatched and invalid vector types
+         sqrdmulh v2.2s, v25.4s, v3.4s
+         sqrdmulh v12.16b, v5.16b, v13.16b
+         sqrdmulh v3.4h, v1.4h, v30.2d
+
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrdmulh v2.2s, v25.4s, v3.4s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrdmulh v12.16b, v5.16b, v13.16b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrdmulh v3.4h, v1.4h, v30.2d
+// CHECK-ERROR:                                    ^
+
+//----------------------------------------------------------------------
+// Vector Multiply Extended
+//----------------------------------------------------------------------
+         // Mismatched and invalid vector types
+      fmulx v21.2s, v5.2s, v13.2d
+      fmulx v1.4h, v25.4h, v3.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fmulx v21.2s, v5.2s, v13.2d
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fmulx v1.4h, v25.4h, v3.4h
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Vector Shift Left by Immediate
+//------------------------------------------------------------------------------
+         // Mismatched vector types and out of range
+         shl v0.4s, v15,2s, #3
+         shl v0.2d, v17.4s, #3
+         shl v0.8b, v31.8b, #-1
+         shl v0.8b, v31.8b, #8
+         shl v0.4s, v21.4s, #32
+         shl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: expected comma before next operand
+// CHECK-ERROR:         shl v0.4s, v15,2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shl v0.2d, v17.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         shl v0.8b, v31.8b, #-1
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         shl v0.8b, v31.8b, #8
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         shl v0.4s, v21.4s, #32
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         shl v0.2d, v1.2d, #64
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Vector Shift Left Long by Immediate
+//----------------------------------------------------------------------
+        // Mismatched vector types
+        sshll v0.4s, v15.2s, #3
+        ushll v1.16b, v25.16b, #6
+        sshll2 v0.2d, v3.8s, #15
+        ushll2 v1.4s, v25.4s, #7
+
+        // Out of range 
+        sshll v0.8h, v1.8b, #-1
+        sshll v0.8h, v1.8b, #9
+        ushll v0.4s, v1.4h, #17
+        ushll v0.2d, v1.2s, #33
+        sshll2 v0.8h, v1.16b, #9
+        sshll2 v0.4s, v1.8h, #17
+        ushll2 v0.2d, v1.4s, #33
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshll v0.4s, v15.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ushll v1.16b, v25.16b, #6
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshll2 v0.2d, v3.8s, #15
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ushll2 v1.4s, v25.4s, #7
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sshll v0.8h, v1.8b, #-1
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sshll v0.8h, v1.8b, #9
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        ushll v0.4s, v1.4h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        ushll v0.2d, v1.2s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sshll2 v0.8h, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        sshll2 v0.4s, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        ushll2 v0.2d, v1.4s, #33
+// CHECK-ERROR:                             ^
+
+
+//------------------------------------------------------------------------------
+// Vector shift right by immediate
+//------------------------------------------------------------------------------
+         sshr v0.8b, v1.8h, #3
+         sshr v0.4h, v1.4s, #3
+         sshr v0.2s, v1.2d, #3
+         sshr v0.16b, v1.16b, #9
+         sshr v0.8h, v1.8h, #17
+         sshr v0.4s, v1.4s, #33
+         sshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         sshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector  shift right by immediate
+//------------------------------------------------------------------------------
+         ushr v0.8b, v1.8h, #3
+         ushr v0.4h, v1.4s, #3
+         ushr v0.2s, v1.2d, #3
+         ushr v0.16b, v1.16b, #9
+         ushr v0.8h, v1.8h, #17
+         ushr v0.4s, v1.4s, #33
+         ushr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ushr v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ushr v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ushr v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ushr v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ushr v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ssra v0.8b, v1.8h, #3
+         ssra v0.4h, v1.4s, #3
+         ssra v0.2s, v1.2d, #3
+         ssra v0.16b, v1.16b, #9
+         ssra v0.8h, v1.8h, #17
+         ssra v0.4s, v1.4s, #33
+         ssra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ssra v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ssra v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ssra v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ssra v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ssra v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector  shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         usra v0.8b, v1.8h, #3
+         usra v0.4h, v1.4s, #3
+         usra v0.2s, v1.2d, #3
+         usra v0.16b, v1.16b, #9
+         usra v0.8h, v1.8h, #17
+         usra v0.4s, v1.4s, #33
+         usra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.8b, v1.8h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.4h, v1.4s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usra v0.2s, v1.2d, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         usra v0.16b, v1.16b, #9
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         usra v0.8h, v1.8h, #17
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         usra v0.4s, v1.4s, #33
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         usra v0.2d, v1.2d, #65
+// CHECK-ERROR:                            ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right by immediate
+//------------------------------------------------------------------------------
+         srshr v0.8b, v1.8h, #3
+         srshr v0.4h, v1.4s, #3
+         srshr v0.2s, v1.2d, #3
+         srshr v0.16b, v1.16b, #9
+         srshr v0.8h, v1.8h, #17
+         srshr v0.4s, v1.4s, #33
+         srshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         srshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         srshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         srshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         srshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vecotr rounding shift right by immediate
+//------------------------------------------------------------------------------
+         urshr v0.8b, v1.8h, #3
+         urshr v0.4h, v1.4s, #3
+         urshr v0.2s, v1.2d, #3
+         urshr v0.16b, v1.16b, #9
+         urshr v0.8h, v1.8h, #17
+         urshr v0.4s, v1.4s, #33
+         urshr v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urshr v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         urshr v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         urshr v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         urshr v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         urshr v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         srsra v0.8b, v1.8h, #3
+         srsra v0.4h, v1.4s, #3
+         srsra v0.2s, v1.2d, #3
+         srsra v0.16b, v1.16b, #9
+         srsra v0.8h, v1.8h, #17
+         srsra v0.4s, v1.4s, #33
+         srsra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         srsra v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         srsra v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         srsra v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         srsra v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         srsra v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ursra v0.8b, v1.8h, #3
+         ursra v0.4h, v1.4s, #3
+         ursra v0.2s, v1.2d, #3
+         ursra v0.16b, v1.16b, #9
+         ursra v0.8h, v1.8h, #17
+         ursra v0.4s, v1.4s, #33
+         ursra v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursra v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         ursra v0.16b, v1.16b, #9
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         ursra v0.8h, v1.8h, #17
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ursra v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ursra v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector shift right and insert by immediate
+//------------------------------------------------------------------------------
+         sri v0.8b, v1.8h, #3
+         sri v0.4h, v1.4s, #3
+         sri v0.2s, v1.2d, #3
+         sri v0.16b, v1.16b, #9
+         sri v0.8h, v1.8h, #17
+         sri v0.4s, v1.4s, #33
+         sri v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.8b, v1.8h, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.4h, v1.4s, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sri v0.2s, v1.2d, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sri v0.16b, v1.16b, #9
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sri v0.8h, v1.8h, #17
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sri v0.4s, v1.4s, #33
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         sri v0.2d, v1.2d, #65
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Vector shift left and insert by immediate
+//------------------------------------------------------------------------------
+         sli v0.8b, v1.8h, #3
+         sli v0.4h, v1.4s, #3
+         sli v0.2s, v1.2d, #3
+         sli v0.16b, v1.16b, #8
+         sli v0.8h, v1.8h, #16
+         sli v0.4s, v1.4s, #32
+         sli v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.8b, v1.8h, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.4h, v1.4s, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sli v0.2s, v1.2d, #3
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sli v0.16b, v1.16b, #8
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sli v0.8h, v1.8h, #16
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sli v0.4s, v1.4s, #32
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sli v0.2d, v1.2d, #64
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left unsigned by immediate
+//------------------------------------------------------------------------------
+         sqshlu v0.8b, v1.8h, #3
+         sqshlu v0.4h, v1.4s, #3
+         sqshlu v0.2s, v1.2d, #3
+         sqshlu v0.16b, v1.16b, #8
+         sqshlu v0.8h, v1.8h, #16
+         sqshlu v0.4s, v1.4s, #32
+         sqshlu v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.8b, v1.8h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.4h, v1.4s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshlu v0.2s, v1.2d, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sqshlu v0.16b, v1.16b, #8
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sqshlu v0.8h, v1.8h, #16
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sqshlu v0.4s, v1.4s, #32
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sqshlu v0.2d, v1.2d, #64
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         sqshl v0.8b, v1.8h, #3
+         sqshl v0.4h, v1.4s, #3
+         sqshl v0.2s, v1.2d, #3
+         sqshl v0.16b, v1.16b, #8
+         sqshl v0.8h, v1.8h, #16
+         sqshl v0.4s, v1.4s, #32
+         sqshl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshl v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         sqshl v0.16b, v1.16b, #8
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         sqshl v0.8h, v1.8h, #16
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         sqshl v0.4s, v1.4s, #32
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         sqshl v0.2d, v1.2d, #64
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         uqshl v0.8b, v1.8h, #3
+         uqshl v0.4h, v1.4s, #3
+         uqshl v0.2s, v1.2d, #3
+         uqshl v0.16b, v1.16b, #8
+         uqshl v0.8h, v1.8h, #16
+         uqshl v0.4s, v1.4s, #32
+         uqshl v0.2d, v1.2d, #64
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.8b, v1.8h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.4h, v1.4s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshl v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:         uqshl v0.16b, v1.16b, #8
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:         uqshl v0.8h, v1.8h, #16
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:         uqshl v0.4s, v1.4s, #32
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:         uqshl v0.2d, v1.2d, #64
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector shift right narrow by immediate
+//------------------------------------------------------------------------------
+         shrn v0.8b, v1.8b, #3
+         shrn v0.4h, v1.4h, #3
+         shrn v0.2s, v1.2s, #3
+         shrn2 v0.16b, v1.8h, #17
+         shrn2 v0.8h, v1.4s, #33
+         shrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         shrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         shrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         shrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrun v0.8b, v1.8b, #3
+         sqshrun v0.4h, v1.4h, #3
+         sqshrun v0.2s, v1.2s, #3
+         sqshrun2 v0.16b, v1.8h, #17
+         sqshrun2 v0.8h, v1.4s, #33
+         sqshrun2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrun v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqshrun2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqshrun2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqshrun2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right narrow by immediate
+//------------------------------------------------------------------------------
+         rshrn v0.8b, v1.8b, #3
+         rshrn v0.4h, v1.4h, #3
+         rshrn v0.2s, v1.2s, #3
+         rshrn2 v0.16b, v1.8h, #17
+         rshrn2 v0.8h, v1.4s, #33
+         rshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         rshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         rshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         rshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrun v0.8b, v1.8b, #3
+         sqrshrun v0.4h, v1.4h, #3
+         sqrshrun v0.2s, v1.2s, #3
+         sqrshrun2 v0.16b, v1.8h, #17
+         sqrshrun2 v0.8h, v1.4s, #33
+         sqrshrun2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.8b, v1.8b, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.4h, v1.4h, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrun v0.2s, v1.2s, #3
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqrshrun2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqrshrun2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqrshrun2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                 ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrn v0.8b, v1.8b, #3
+         sqshrn v0.4h, v1.4h, #3
+         sqshrn v0.2s, v1.2s, #3
+         sqshrn2 v0.16b, v1.8h, #17
+         sqshrn2 v0.8h, v1.4s, #33
+         sqshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                               ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         uqshrn v0.8b, v1.8b, #3
+         uqshrn v0.4h, v1.4h, #3
+         uqshrn v0.2s, v1.2s, #3
+         uqshrn2 v0.16b, v1.8h, #17
+         uqshrn2 v0.8h, v1.4s, #33
+         uqshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         uqshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         uqshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         uqshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                               ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrn v0.8b, v1.8b, #3
+         sqrshrn v0.4h, v1.4h, #3
+         sqrshrn v0.2s, v1.2s, #3
+         sqrshrn2 v0.16b, v1.8h, #17
+         sqrshrn2 v0.8h, v1.4s, #33
+         sqrshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqrshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         sqrshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         sqrshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         sqrshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         uqrshrn v0.8b, v1.8b, #3
+         uqrshrn v0.4h, v1.4h, #3
+         uqrshrn v0.2s, v1.2s, #3
+         uqrshrn2 v0.16b, v1.8h, #17
+         uqrshrn2 v0.8h, v1.4s, #33
+         uqrshrn2 v0.4s, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.8b, v1.8b, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.4h, v1.4h, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqrshrn v0.2s, v1.2s, #3
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:         uqrshrn2 v0.16b, v1.8h, #17
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:         uqrshrn2 v0.8h, v1.4s, #33
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         uqrshrn2 v0.4s, v1.2d, #65
+// CHECK-ERROR:                                ^
+
+//------------------------------------------------------------------------------
+// Fixed-point convert to floating-point
+//------------------------------------------------------------------------------
+         scvtf v0.2s, v1.2d, #3
+         scvtf v0.4s, v1.4h, #3
+         scvtf v0.2d, v1.2s, #3
+         ucvtf v0.2s, v1.2s, #33
+         ucvtf v0.4s, v1.4s, #33
+         ucvtf v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.2s, v1.2d, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.4s, v1.4h, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.2d, v1.2s, #3
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ucvtf v0.2s, v1.2s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         ucvtf v0.4s, v1.4s, #33
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         ucvtf v0.2d, v1.2d, #65
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert to fixed-point
+//------------------------------------------------------------------------------
+         fcvtzs v0.2s, v1.2d, #3
+         fcvtzs v0.4s, v1.4h, #3
+         fcvtzs v0.2d, v1.2s, #3
+         fcvtzu v0.2s, v1.2s, #33
+         fcvtzu v0.4s, v1.4s, #33
+         fcvtzu v0.2d, v1.2d, #65
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.2s, v1.2d, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.4s, v1.4h, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.2d, v1.2s, #3
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         fcvtzu v0.2s, v1.2s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:         fcvtzu v0.4s, v1.4s, #33
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:         fcvtzu v0.2d, v1.2d, #65
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Vector operation on 3 operands with different types
+//----------------------------------------------------------------------
+
+        // Mismatched and invalid vector types
+        saddl v0.8h, v1.8h, v2.8b
+        saddl v0.4s, v1.4s, v2.4h
+        saddl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        saddl2 v0.4s, v1.8s, v2.8h
+        saddl2 v0.8h, v1.16h, v2.16b
+        saddl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        uaddl v0.8h, v1.8h, v2.8b
+        uaddl v0.4s, v1.4s, v2.4h
+        uaddl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        uaddl2 v0.8h, v1.16h, v2.16b
+        uaddl2 v0.4s, v1.8s, v2.8h
+        uaddl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        ssubl v0.8h, v1.8h, v2.8b
+        ssubl v0.4s, v1.4s, v2.4h
+        ssubl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        ssubl2 v0.8h, v1.16h, v2.16b
+        ssubl2 v0.4s, v1.8s, v2.8h
+        ssubl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        usubl v0.8h, v1.8h, v2.8b
+        usubl v0.4s, v1.4s, v2.4h
+        usubl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        usubl2 v0.8h, v1.16h, v2.16b
+        usubl2 v0.4s, v1.8s, v2.8h
+        usubl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        sabal v0.8h, v1.8h, v2.8b
+        sabal v0.4s, v1.4s, v2.4h
+        sabal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        sabal2 v0.8h, v1.16h, v2.16b
+        sabal2 v0.4s, v1.8s, v2.8h
+        sabal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        uabal v0.8h, v1.8h, v2.8b
+        uabal v0.4s, v1.4s, v2.4h
+        uabal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        uabal2 v0.8h, v1.16h, v2.16b
+        uabal2 v0.4s, v1.8s, v2.8h
+        uabal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        sabdl v0.8h, v1.8h, v2.8b
+        sabdl v0.4s, v1.4s, v2.4h
+        sabdl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        sabdl2 v0.8h, v1.16h, v2.16b
+        sabdl2 v0.4s, v1.8s, v2.8h
+        sabdl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sabdl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        uabdl v0.8h, v1.8h, v2.8b
+        uabdl v0.4s, v1.4s, v2.4h
+        uabdl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        uabdl2 v0.8h, v1.16h, v2.16b
+        uabdl2 v0.4s, v1.8s, v2.8h
+        uabdl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uabdl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        smlal v0.8h, v1.8h, v2.8b
+        smlal v0.4s, v1.4s, v2.4h
+        smlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        smlal2 v0.8h, v1.16h, v2.16b
+        smlal2 v0.4s, v1.8s, v2.8h
+        smlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        umlal v0.8h, v1.8h, v2.8b
+        umlal v0.4s, v1.4s, v2.4h
+        umlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        umlal2 v0.8h, v1.16h, v2.16b
+        umlal2 v0.4s, v1.8s, v2.8h
+        umlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        smlsl v0.8h, v1.8h, v2.8b
+        smlsl v0.4s, v1.4s, v2.4h
+        smlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        smlsl2 v0.8h, v1.16h, v2.16b
+        smlsl2 v0.4s, v1.8s, v2.8h
+        smlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        umlsl v0.8h, v1.8h, v2.8b
+        umlsl v0.4s, v1.4s, v2.4h
+        umlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        umlsl2 v0.8h, v1.16h, v2.16b
+        umlsl2 v0.4s, v1.8s, v2.8h
+        umlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        smull v0.8h, v1.8h, v2.8b
+        smull v0.4s, v1.4s, v2.4h
+        smull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        smull2 v0.8h, v1.16h, v2.16b
+        smull2 v0.4s, v1.8s, v2.8h
+        smull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+        umull v0.8h, v1.8h, v2.8b
+        umull v0.4s, v1.4s, v2.4h
+        umull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                        ^
+
+        umull2 v0.8h, v1.16h, v2.16b
+        umull2 v0.4s, v1.8s, v2.8h
+        umull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Long - Variant 2
+//------------------------------------------------------------------------------
+
+        sqdmlal v0.4s, v1.4s, v2.4h
+        sqdmlal v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                          ^
+
+        sqdmlal2 v0.4s, v1.8s, v2.8h
+        sqdmlal2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                        ^
+
+        // Mismatched vector types
+        sqdmlal v0.8h, v1.8b, v2.8b
+        sqdmlal2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.8h, v1.8b, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR:                    ^
+
+        sqdmlsl v0.4s, v1.4s, v2.4h
+        sqdmlsl v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                          ^
+
+        sqdmlsl2 v0.4s, v1.8s, v2.8h
+        sqdmlsl2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                        ^
+
+        // Mismatched vector types
+        sqdmlsl v0.8h, v1.8b, v2.8b
+        sqdmlsl2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.8h, v1.8b, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR:                    ^
+
+
+        sqdmull v0.4s, v1.4s, v2.4h
+        sqdmull v0.2d, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.4s, v1.4s, v2.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.2d, v1.2d, v2.2s
+// CHECK-ERROR:                          ^
+
+        sqdmull2 v0.4s, v1.8s, v2.8h
+        sqdmull2 v0.2d, v1.4d, v2.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.4s, v1.8s, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.2d, v1.4d, v2.4s
+// CHECK-ERROR:                        ^
+
+        // Mismatched vector types
+        sqdmull v0.8h, v1.8b, v2.8b
+        sqdmull2 v0.8h, v1.16b, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.8h, v1.8b, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.8h, v1.16b, v2.16b
+// CHECK-ERROR:                    ^
+
+
+//------------------------------------------------------------------------------
+// Long - Variant 3
+//------------------------------------------------------------------------------
+
+        pmull v0.8h, v1.8h, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.8h, v1.8h, v2.8b
+// CHECK-ERROR:                        ^
+
+        pmull v0.1q, v1.2d, v2.2d
+        
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.1q, v1.2d, v2.2d
+// CHECK-ERROR:                     ^
+
+        // Mismatched vector types
+        pmull v0.4s, v1.4h, v2.4h
+        pmull v0.2d, v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.4s, v1.4h, v2.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull v0.2d, v1.2s, v2.2s
+// CHECK-ERROR:                 ^
+
+
+        pmull2 v0.8h, v1.16h, v2.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull2 v0.8h, v1.16h, v2.16b
+// CHECK-ERROR:                      ^
+
+        pmull2 v0.q, v1.2d, v2.2d
+        
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull2 v0.q, v1.2d, v2.2d
+// CHECK-ERROR:                  ^
+
+        // Mismatched vector types
+        pmull2 v0.4s, v1.8h v2.8h
+        pmull2 v0.2d, v1.4s, v2.4s
+
+// CHECK-ERROR: error: expected comma before next operand
+// CHECK-ERROR:        pmull2 v0.4s, v1.8h v2.8h
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        pmull2 v0.2d, v1.4s, v2.4s
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Widen
+//------------------------------------------------------------------------------
+
+        saddw v0.8h, v1.8h, v2.8h
+        saddw v0.4s, v1.4s, v2.4s
+        saddw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        saddw2 v0.8h, v1.8h, v2.16h
+        saddw2 v0.4s, v1.4s, v2.8s
+        saddw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+        uaddw v0.8h, v1.8h, v2.8h
+        uaddw v0.4s, v1.4s, v2.4s
+        uaddw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        uaddw2 v0.8h, v1.8h, v2.16h
+        uaddw2 v0.4s, v1.4s, v2.8s
+        uaddw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+        ssubw v0.8h, v1.8h, v2.8h
+        ssubw v0.4s, v1.4s, v2.4s
+        ssubw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        ssubw2 v0.8h, v1.8h, v2.16h
+        ssubw2 v0.4s, v1.4s, v2.8s
+        ssubw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ssubw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+        usubw v0.8h, v1.8h, v2.8h
+        usubw v0.4s, v1.4s, v2.4s
+        usubw v0.2d, v1.2d, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw v0.8h, v1.8h, v2.8h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw v0.4s, v1.4s, v2.4s
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw v0.2d, v1.2d, v2.2d
+// CHECK-ERROR:                               ^
+
+        usubw2 v0.8h, v1.8h, v2.16h
+        usubw2 v0.4s, v1.4s, v2.8s
+        usubw2 v0.2d, v1.2d, v2.4d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw2 v0.8h, v1.8h, v2.16h
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw2 v0.4s, v1.4s, v2.8s
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usubw2 v0.2d, v1.2d, v2.4d
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Narrow
+//------------------------------------------------------------------------------
+
+        addhn v0.8b, v1.8h, v2.8d
+        addhn v0.4h, v1.4s, v2.4h
+        addhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn v0.8b, v1.8h, v2.8d
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR:                               ^
+
+        addhn2 v0.16b, v1.8h, v2.8b
+        addhn2 v0.8h, v1.4s, v2.4h
+        addhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR:                                ^
+
+        raddhn v0.8b, v1.8h, v2.8b
+        raddhn v0.4h, v1.4s, v2.4h
+        raddhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn v0.8b, v1.8h, v2.8b
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR:                                ^
+
+        raddhn2 v0.16b, v1.8h, v2.8b
+        raddhn2 v0.8h, v1.4s, v2.4h
+        raddhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        raddhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR:                                 ^
+
+        rsubhn v0.8b, v1.8h, v2.8b
+        rsubhn v0.4h, v1.4s, v2.4h
+        rsubhn v0.2s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn v0.8b, v1.8h, v2.8b
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn v0.4h, v1.4s, v2.4h
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn v0.2s, v1.2d, v2.2s
+// CHECK-ERROR:                                ^
+
+        rsubhn2 v0.16b, v1.8h, v2.8b
+        rsubhn2 v0.8h, v1.4s, v2.4h
+        rsubhn2 v0.4s, v1.2d, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn2 v0.16b, v1.8h, v2.8b
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn2 v0.8h, v1.4s, v2.4h
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        rsubhn2 v0.4s, v1.2d, v2.2s
+// CHECK-ERROR:                                 ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+         // invalid vector types
+      addp s0, d1.2d
+      addp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          addp s0, d1.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          addp d0, d1.2s
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // invalid vector types
+      faddp s0, d1.2d
+      faddp d0, d1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          faddp s0, d1.2d
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          faddp d0, d1.2s
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce Maximum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fmaxp s0, v1.2d
+      fmaxp d31, v2.2s
+      fmaxp h3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp s0, v1.2d
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp d31, v2.2s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxp h3, v2.2s
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce Minimum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fminp s0, v1.4h
+      fminp d31, v2.8h
+      fminp b3, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp s0, v1.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp d31, v2.8h
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminp b3, v2.2s
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Scalar Reduce maxNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fmaxnmp s0, v1.8b
+      fmaxnmp d31, v2.16b
+      fmaxnmp v1.2s, v2.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxnmp s0, v1.8b
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmaxnmp d31, v2.16b
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:          fmaxnmp v1.2s, v2.2s
+// CHECK-ERROR:          ^
+
+//----------------------------------------------------------------------
+// Scalar Reduce minNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         // mismatched and invalid vector types
+      fminnmp s0, v1.2d
+      fminnmp d31, v2.4s
+      fminnmp v1.4s, v2.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp s0, v1.2d
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp d31, v2.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fminnmp v1.4s, v2.2d
+// CHECK-ERROR:          ^
+
+      mla v0.2d, v1.2d, v16.d[1]
+      mla v0.2s, v1.2s, v2.s[4]
+      mla v0.4s, v1.4s, v2.s[4]
+      mla v0.2h, v1.2h, v2.h[1]
+      mla v0.4h, v1.4h, v2.h[8]
+      mla v0.8h, v1.8h, v2.h[8]
+      mla v0.4h, v1.4h, v16.h[2]
+      mla v0.8h, v1.8h, v16.h[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.2d, v1.2d, v16.d[1]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.2h, v1.2h, v2.h[1]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mla v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mla v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                              ^
+
+      mls v0.2d, v1.2d, v16.d[1]
+      mls v0.2s, v1.2s, v2.s[4]
+      mls v0.4s, v1.4s, v2.s[4]
+      mls v0.2h, v1.2h, v2.h[1]
+      mls v0.4h, v1.4h, v2.h[8]
+      mls v0.8h, v1.8h, v2.h[8]
+      mls v0.4h, v1.4h, v16.h[2]
+      mls v0.8h, v1.8h, v16.h[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.2d, v1.2d, v16.d[1]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.2h, v1.2h, v2.h[1]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mls v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mls v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                              ^
+
+      fmla v0.4h, v1.4h, v2.h[2]
+      fmla v0.8h, v1.8h, v2.h[2]
+      fmla v0.2s, v1.2s, v2.s[4]
+      fmla v0.2s, v1.2s, v22.s[4]
+      fmla v3.4s, v8.4s, v2.s[4]
+      fmla v3.4s, v8.4s, v22.s[4]
+      fmla v0.2d, v1.2d, v2.d[2]
+      fmla v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmla v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmla v0.8h, v1.8h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v3.4s, v8.4s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v3.4s, v8.4s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmla v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                 ^
+
+      fmls v0.4h, v1.4h, v2.h[2]
+      fmls v0.8h, v1.8h, v2.h[2]
+      fmls v0.2s, v1.2s, v2.s[4]
+      fmls v0.2s, v1.2s, v22.s[4]
+      fmls v3.4s, v8.4s, v2.s[4]
+      fmls v3.4s, v8.4s, v22.s[4]
+      fmls v0.2d, v1.2d, v2.d[2]
+      fmls v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmls v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmls v0.8h, v1.8h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v3.4s, v8.4s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v3.4s, v8.4s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmls v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                 ^
+
+      smlal v0.4h, v1.4h, v2.h[2]
+      smlal v0.4s, v1.4h, v2.h[8]
+      smlal v0.4s, v1.4h, v16.h[2]
+      smlal v0.2s, v1.2s, v2.s[4]
+      smlal v0.2d, v1.2s, v2.s[4]
+      smlal v0.2d, v1.2s, v22.s[4]
+      smlal2 v0.4h, v1.8h, v1.h[2]
+      smlal2 v0.4s, v1.8h, v1.h[8]
+      smlal2 v0.4s, v1.8h, v16.h[2]
+      smlal2 v0.2s, v1.4s, v1.s[2]
+      smlal2 v0.2d, v1.4s, v1.s[4]
+      smlal2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlal2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      smlsl v0.4h, v1.4h, v2.h[2]
+      smlsl v0.4s, v1.4h, v2.h[8]
+      smlsl v0.4s, v1.4h, v16.h[2]
+      smlsl v0.2s, v1.2s, v2.s[4]
+      smlsl v0.2d, v1.2s, v2.s[4]
+      smlsl v0.2d, v1.2s, v22.s[4]
+      smlsl2 v0.4h, v1.8h, v1.h[2]
+      smlsl2 v0.4s, v1.8h, v1.h[8]
+      smlsl2 v0.4s, v1.8h, v16.h[2]
+      smlsl2 v0.2s, v1.4s, v1.s[2]
+      smlsl2 v0.2d, v1.4s, v1.s[4]
+      smlsl2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      umlal v0.4h, v1.4h, v2.h[2]
+      umlal v0.4s, v1.4h, v2.h[8]
+      umlal v0.4s, v1.4h, v16.h[2]
+      umlal v0.2s, v1.2s, v2.s[4]
+      umlal v0.2d, v1.2s, v2.s[4]
+      umlal v0.2d, v1.2s, v22.s[4]
+      umlal2 v0.4h, v1.8h, v1.h[2]
+      umlal2 v0.4s, v1.8h, v1.h[8]
+      umlal2 v0.4s, v1.8h, v16.h[2]
+      umlal2 v0.2s, v1.4s, v1.s[2]
+      umlal2 v0.2d, v1.4s, v1.s[4]
+      umlal2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlal2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      umlsl v0.4h, v1.4h, v2.h[2]
+      umlsl v0.4s, v1.4h, v2.h[8]
+      umlsl v0.4s, v1.4h, v16.h[2]
+      umlsl v0.2s, v1.2s, v2.s[4]
+      umlsl v0.2d, v1.2s, v2.s[4]
+      umlsl v0.2d, v1.2s, v22.s[4]
+      umlsl2 v0.4h, v1.8h, v1.h[2]
+      umlsl2 v0.4s, v1.8h, v1.h[8]
+      umlsl2 v0.4s, v1.8h, v16.h[2]
+      umlsl2 v0.2s, v1.4s, v1.s[2]
+      umlsl2 v0.2d, v1.4s, v1.s[4]
+      umlsl2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      sqdmlal v0.4h, v1.4h, v2.h[2]
+      sqdmlal v0.4s, v1.4h, v2.h[8]
+      sqdmlal v0.4s, v1.4h, v16.h[2]
+      sqdmlal v0.2s, v1.2s, v2.s[4]
+      sqdmlal v0.2d, v1.2s, v2.s[4]
+      sqdmlal v0.2d, v1.2s, v22.s[4]
+      sqdmlal2 v0.4h, v1.8h, v1.h[2]
+      sqdmlal2 v0.4s, v1.8h, v1.h[8]
+      sqdmlal2 v0.4s, v1.8h, v16.h[2]
+      sqdmlal2 v0.2s, v1.4s, v1.s[2]
+      sqdmlal2 v0.2d, v1.4s, v1.s[4]
+      sqdmlal2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+
+      sqdmlsl v0.4h, v1.4h, v2.h[2]
+      sqdmlsl v0.4s, v1.4h, v2.h[8]
+      sqdmlsl v0.4s, v1.4h, v16.h[2]
+      sqdmlsl v0.2s, v1.2s, v2.s[4]
+      sqdmlsl v0.2d, v1.2s, v2.s[4]
+      sqdmlsl v0.2d, v1.2s, v22.s[4]
+      sqdmlsl2 v0.4h, v1.8h, v1.h[2]
+      sqdmlsl2 v0.4s, v1.8h, v1.h[8]
+      sqdmlsl2 v0.4s, v1.8h, v16.h[2]
+      sqdmlsl2 v0.2s, v1.4s, v1.s[2]
+      sqdmlsl2 v0.2d, v1.4s, v1.s[4]
+      sqdmlsl2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.4s, v1.4h, v16.h[2]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.4h, v1.8h, v1.h[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8h, v1.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8h, v16.h[2]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl2 v0.2s, v1.4s, v1.s[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v1.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+
+      mul v0.4h, v1.4h, v2.h[8]
+      mul v0.4h, v1.4h, v16.h[8]
+      mul v0.8h, v1.8h, v2.h[8]
+      mul v0.8h, v1.8h, v16.h[8]
+      mul v0.2s, v1.2s, v2.s[4]
+      mul v0.2s, v1.2s, v22.s[4]
+      mul v0.4s, v1.4s, v2.s[4]
+      mul v0.4s, v1.4s, v22.s[4]
+      mul v0.2d, v1.2d, v2.d[1]
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4h, v1.4h, v16.h[8]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.8h, v1.8h, v16.h[8]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        mul v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                ^
+
+      fmul v0.4h, v1.4h, v2.h[4]
+      fmul v0.2s, v1.2s, v2.s[4]
+      fmul v0.2s, v1.2s, v22.s[4]
+      fmul v0.4s, v1.4s, v2.s[4]
+      fmul v0.4s, v1.4s, v22.s[4]
+      fmul v0.2d, v1.2d, v2.d[2]
+      fmul v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        mul v0.2d, v1.2d, v2.d[1]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmul v0.4h, v1.4h, v2.h[4]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmul v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                 ^
+
+      fmulx v0.4h, v1.4h, v2.h[4]
+      fmulx v0.2s, v1.2s, v2.s[4]
+      fmulx v0.2s, v1.2s, v22.s[4]
+      fmulx v0.4s, v1.4s, v2.s[4]
+      fmulx v0.4s, v1.4s, v22.s[4]
+      fmulx v0.2d, v1.2d, v2.d[2]
+      fmulx v0.2d, v1.2d, v22.d[2]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmulx v0.4h, v1.4h, v2.h[4]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2d, v1.2d, v2.d[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        fmulx v0.2d, v1.2d, v22.d[2]
+// CHECK-ERROR:                                  ^
+
+      smull v0.4h, v1.4h, v2.h[2]
+      smull v0.4s, v1.4h, v2.h[8]
+      smull v0.4s, v1.4h, v16.h[4]
+      smull v0.2s, v1.2s, v2.s[2]
+      smull v0.2d, v1.2s, v2.s[4]
+      smull v0.2d, v1.2s, v22.s[4]
+      smull2 v0.4h, v1.8h, v2.h[2]
+      smull2 v0.4s, v1.8h, v2.h[8]
+      smull2 v0.4s, v1.8h, v16.h[4]
+      smull2 v0.2s, v1.4s, v2.s[2]
+      smull2 v0.2d, v1.4s, v2.s[4]
+      smull2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.4s, v1.4h, v16.h[4]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull v0.2s, v1.2s, v2.s[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.4h, v1.8h, v2.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull2 v0.4s, v1.8h, v2.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.4s, v1.8h, v16.h[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smull2 v0.2s, v1.4s, v2.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull2 v0.2d, v1.4s, v2.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        smull2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      umull v0.4h, v1.4h, v2.h[2]
+      umull v0.4s, v1.4h, v2.h[8]
+      umull v0.4s, v1.4h, v16.h[4]
+      umull v0.2s, v1.2s, v2.s[2]
+      umull v0.2d, v1.2s, v2.s[4]
+      umull v0.2d, v1.2s, v22.s[4]
+      umull2 v0.4h, v1.8h, v2.h[2]
+      umull2 v0.4s, v1.8h, v2.h[8]
+      umull2 v0.4s, v1.8h, v16.h[4]
+      umull2 v0.2s, v1.4s, v2.s[2]
+      umull2 v0.2d, v1.4s, v2.s[4]
+      umull2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.4s, v1.4h, v16.h[4]
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull v0.2s, v1.2s, v2.s[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.4h, v1.8h, v2.h[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull2 v0.4s, v1.8h, v2.h[8]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.4s, v1.8h, v16.h[4]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umull2 v0.2s, v1.4s, v2.s[2]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull2 v0.2d, v1.4s, v2.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        umull2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                   ^
+
+      sqdmull v0.4h, v1.4h, v2.h[2]
+      sqdmull v0.4s, v1.4h, v2.h[8]
+      sqdmull v0.4s, v1.4h, v16.h[4]
+      sqdmull v0.2s, v1.2s, v2.s[2]
+      sqdmull v0.2d, v1.2s, v2.s[4]
+      sqdmull v0.2d, v1.2s, v22.s[4]
+      sqdmull2 v0.4h, v1.8h, v2.h[2]
+      sqdmull2 v0.4s, v1.8h, v2.h[8]
+      sqdmull2 v0.4s, v1.8h, v16.h[4]
+      sqdmull2 v0.2s, v1.4s, v2.s[2]
+      sqdmull2 v0.2d, v1.4s, v2.s[4]
+      sqdmull2 v0.2d, v1.4s, v22.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.4h, v1.4h, v2.h[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull v0.4s, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.4s, v1.4h, v16.h[4]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull v0.2s, v1.2s, v2.s[2]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull v0.2d, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull v0.2d, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.4h, v1.8h, v2.h[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull2 v0.4s, v1.8h, v2.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.4s, v1.8h, v16.h[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull2 v0.2s, v1.4s, v2.s[2]
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v2.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+
+      sqdmulh v0.4h, v1.4h, v2.h[8]
+      sqdmulh v0.4h, v1.4h, v16.h[2]
+      sqdmulh v0.8h, v1.8h, v2.h[8]
+      sqdmulh v0.8h, v1.8h, v16.h[2]
+      sqdmulh v0.2s, v1.2s, v2.s[4]
+      sqdmulh v0.2s, v1.2s, v22.s[4]
+      sqdmulh v0.4s, v1.4s, v2.s[4]
+      sqdmulh v0.4s, v1.4s, v22.s[4]
+      sqdmulh v0.2d, v1.2d, v22.d[1]
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmulh v0.2d, v1.2d, v22.d[1]
+// CHECK-ERROR:                   ^
+
+      sqrdmulh v0.4h, v1.4h, v2.h[8]
+      sqrdmulh v0.4h, v1.4h, v16.h[2]
+      sqrdmulh v0.8h, v1.8h, v2.h[8]
+      sqrdmulh v0.8h, v1.8h, v16.h[2]
+      sqrdmulh v0.2s, v1.2s, v2.s[4]
+      sqrdmulh v0.2s, v1.2s, v22.s[4]
+      sqrdmulh v0.4s, v1.4s, v2.s[4]
+      sqrdmulh v0.4s, v1.4s, v22.s[4]
+      sqrdmulh v0.2d, v1.2d, v22.d[1]
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v2.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v16.h[2]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v16.h[2]
+// CHECK-ERROR:                                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v22.s[4]
+// CHECK-ERROR:                                     ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v2.s[4]
+// CHECK-ERROR:                                    ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v22.s[4]
+// CHECK-ERROR:                                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqrdmulh v0.2d, v1.2d, v22.d[1]
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Across vectors
+//----------------------------------------------------------------------
+
+        saddlv b0, v1.8b
+        saddlv b0, v1.16b
+        saddlv h0, v1.4h
+        saddlv h0, v1.8h
+        saddlv s0, v1.2s
+        saddlv s0, v1.4s
+        saddlv d0, v1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv b0, v1.8b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv b0, v1.16b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv h0, v1.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv h0, v1.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv s0, v1.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv s0, v1.4s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        saddlv d0, v1.2s
+// CHECK-ERROR:                   ^
+
+        uaddlv b0, v1.8b
+        uaddlv b0, v1.16b
+        uaddlv h0, v1.4h
+        uaddlv h0, v1.8h
+        uaddlv s0, v1.2s
+        uaddlv s0, v1.4s
+        uaddlv d0, v1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv b0, v1.8b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv b0, v1.16b
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv h0, v1.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv h0, v1.8h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv s0, v1.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv s0, v1.4s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uaddlv d0, v1.2s
+// CHECK-ERROR:                   ^
+
+        smaxv s0, v1.2s
+        sminv s0, v1.2s
+        umaxv s0, v1.2s
+        uminv s0, v1.2s
+        addv s0, v1.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smaxv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sminv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umaxv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uminv s0, v1.2s
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addv s0, v1.2s
+// CHECK-ERROR:                 ^
+
+        smaxv d0, v1.2d
+        sminv d0, v1.2d
+        umaxv d0, v1.2d
+        uminv d0, v1.2d
+        addv d0, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smaxv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sminv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umaxv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uminv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        addv d0, v1.2d
+// CHECK-ERROR:             ^
+
+        fmaxnmv b0, v1.16b
+        fminnmv b0, v1.16b
+        fmaxv b0, v1.16b
+        fminv b0, v1.16b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmv b0, v1.16b
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmv b0, v1.16b
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxv b0, v1.16b
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminv b0, v1.16b
+// CHECK-ERROR:              ^
+
+        fmaxnmv h0, v1.8h
+        fminnmv h0, v1.8h
+        fmaxv h0, v1.8h
+        fminv h0, v1.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmv h0, v1.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmv h0, v1.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxv h0, v1.8h
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminv h0, v1.8h
+// CHECK-ERROR:              ^
+
+        fmaxnmv d0, v1.2d
+        fminnmv d0, v1.2d
+        fmaxv d0, v1.2d
+        fminv d0, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxnmv d0, v1.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminnmv d0, v1.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fmaxv d0, v1.2d
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fminv d0, v1.2d
+// CHECK-ERROR:              ^
+
+//----------------------------------------------------------------------
+// Floating-point Multiply Extended
+//----------------------------------------------------------------------
+
+    fmulx s20, h22, s15
+    fmulx d23, d11, s1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx s20, h22, s15
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx d23, d11, s1
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Step
+//----------------------------------------------------------------------
+
+    frecps s21, s16, h13
+    frecps d22, s30, d21
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frecps s21, s16, h13
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frecps d22, s30, d21
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Square Root Step
+//----------------------------------------------------------------------
+
+    frsqrts s21, h5, s12
+    frsqrts d8, s22, d18
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frsqrts s21, h5, s12
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          frsqrts d8, s22, d18
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Vector load/store multiple N-element structure (class SIMD lselem)
+//----------------------------------------------------------------------
+         ld1 {x3}, [x2]
+         ld1 {v4}, [x0]
+         ld1 {v32.16b}, [x0]
+         ld1 {v15.8h}, [x32]
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld1 {x3}, [x2]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld1 {v4}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld1 {v32.16b}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld1 {v15.8h}, [x32]
+// CHECK-ERROR:                       ^
+
+         ld1 {v0.16b, v2.16b}, [x0]
+         ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+         ld1 v0.8b, v1.8b}, [x0]
+         ld1 {v0.8h-v4.8h}, [x0]
+         ld1 {v1.8h-v1.8h}, [x0]
+         ld1 {v15.8h-v17.4h}, [x15]
+         ld1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld1 {v0.16b, v2.16b}, [x0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+// CHECK-ERROR:                                         ^
+// CHECK-ERROR: error: '{' expected
+// CHECK-ERROR:        ld1 v0.8b, v1.8b}, [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld1 {v0.8h-v4.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld1 {v1.8h-v1.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld1 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: '}' expected
+// CHECK-ERROR:        ld1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR:                        ^
+
+         ld2 {v15.8h, v16.4h}, [x15]
+         ld2 {v0.8b, v2.8b}, [x0]
+         ld2 {v15.4h, v16.4h, v17.4h}, [x32]
+         ld2 {v15.8h-v16.4h}, [x15]
+         ld2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld2 {v15.8h, v16.4h}, [x15]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld2 {v0.8b, v2.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld2 {v15.4h, v16.4h, v17.4h}, [x32]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld2 {v15.8h-v16.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR:            ^
+
+         ld3 {v15.8h, v16.8h, v17.4h}, [x15]
+         ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+         ld3 {v0.8b, v2.8b, v3.8b}, [x0]
+         ld3 {v15.8h-v17.4h}, [x15]
+         ld3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld3 {v15.8h, v16.8h, v17.4h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld3 {v0.8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld3 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR:            ^
+
+         ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+         ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+         ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+         ld4 {v15.8h-v18.4h}, [x15]
+         ld4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+// CHECK-ERROR:                                             ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        ld4 {v15.8h-v18.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ld4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR:            ^
+
+         st1 {x3}, [x2]
+         st1 {v4}, [x0]
+         st1 {v32.16b}, [x0]
+         st1 {v15.8h}, [x32]
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st1 {x3}, [x2]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st1 {v4}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st1 {v32.16b}, [x0]
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st1 {v15.8h}, [x32]
+// CHECK-ERROR:                       ^
+
+         st1 {v0.16b, v2.16b}, [x0]
+         st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+         st1 v0.8b, v1.8b}, [x0]
+         st1 {v0.8h-v4.8h}, [x0]
+         st1 {v1.8h-v1.8h}, [x0]
+         st1 {v15.8h-v17.4h}, [x15]
+         st1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st1 {v0.16b, v2.16b}, [x0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
+// CHECK-ERROR:                                         ^
+// CHECK-ERROR: error: '{' expected
+// CHECK-ERROR:        st1 v0.8b, v1.8b}, [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st1 {v0.8h-v4.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st1 {v1.8h-v1.8h}, [x0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st1 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: '}' expected
+// CHECK-ERROR:        st1 {v0.8b-v2.8b, [x0]
+// CHECK-ERROR:                        ^
+
+         st2 {v15.8h, v16.4h}, [x15]
+         st2 {v0.8b, v2.8b}, [x0]
+         st2 {v15.4h, v16.4h, v17.4h}, [x30]
+         st2 {v15.8h-v16.4h}, [x15]
+         st2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st2 {v15.8h, v16.4h}, [x15]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st2 {v0.8b, v2.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st2 {v15.4h, v16.4h, v17.4h}, [x30]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st2 {v15.8h-v16.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st2 {v0.2d-v2.2d}, [x0]
+// CHECK-ERROR:            ^
+
+         st3 {v15.8h, v16.8h, v17.4h}, [x15]
+         st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+         st3 {v0.8b, v2.8b, v3.8b}, [x0]
+         st3 {v15.8h-v17.4h}, [x15]
+         st3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st3 {v15.8h, v16.8h, v17.4h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR:        st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st3 {v0.8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st3 {v15.8h-v17.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st3 {v31.4s-v2.4s}, [sp]
+// CHECK-ERROR:            ^
+
+         st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+         st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+         st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+         st4 {v15.8h-v18.4h}, [x15]
+         st4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR:        st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
+// CHECK-ERROR:                                             ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR:        st4 {v15.8h-v18.4h}, [x15]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        st4 {v31.2s-v1.2s}, [x31]
+// CHECK-ERROR:            ^
+
+//----------------------------------------------------------------------
+// Vector post-index load/store multiple N-element structure
+// (class SIMD lselem-post)
+//----------------------------------------------------------------------
+         ld1 {v0.16b}, [x0], #8
+         ld1 {v0.8h, v1.16h}, [x0], x1
+         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          ld1 {v0.16b}, [x0], #8
+// CHECK-ERROR:                              ^
+// CHECK-ERROR:  error: expected vector type register
+// CHECK-ERROR:          ld1 {v0.8h, v1.16h}, [x0], x1
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+// CHECK-ERROR:                                                  ^
+
+         ld2 {v0.16b, v1.16b}, [x0], #16
+         ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+         ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          ld2 {v0.16b, v1.16b}, [x0], #16
+// CHECK-ERROR:                                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+// CHECK-ERROR:                                           ^
+// CHECK-ERROR:  error: invalid space between two vectors
+// CHECK-ERROR:          ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:                                     ^
+
+         st1 {v0.16b}, [x0], #8
+         st1 {v0.8h, v1.16h}, [x0], x1
+         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st1 {v0.16b}, [x0], #8
+// CHECK-ERROR:                              ^
+// CHECK-ERROR:  error: expected vector type register
+// CHECK-ERROR:          st1 {v0.8h, v1.16h}, [x0], x1
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], #24
+                                                 ^
+
+         st2 {v0.16b, v1.16b}, [x0], #16
+         st3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+         st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st2 {v0.16b, v1.16b}, [x0], #16
+// CHECK-ERROR:                                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:          st3 {v5.2s, v6.2s, v7.2s}, [x1], #48
+// CHECK-ERROR:                                           ^
+// CHECK-ERROR:  error: invalid space between two vectors
+// CHECK-ERROR:          st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
+// CHECK-ERROR:                                     ^
+
+//------------------------------------------------------------------------------
+// Load single N-element structure to all lanes of N consecutive
+// registers (N = 1,2,3,4)
+//------------------------------------------------------------------------------
+         ld1r {x1}, [x0]
+         ld2r {v31.4s, v0.2s}, [sp]
+         ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
+// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: ld1r {x1}, [x0]
+// CHECK-ERROR:       ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: ld2r {v31.4s, v0.2s}, [sp]
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+// CHECK-ERROR:      ^
+// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Load/Store single N-element structure to/from one lane of N consecutive
+// registers (N = 1, 2,3,4)
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[16], [x0]
+         ld2 {v15.h, v16.h}[8], [x15]
+         ld3 {v31.s, v0.s, v1.s}[-1], [sp]
+         ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:: error: lane number incompatible with layout
+// CHECK-ERROR: ld1 {v0.b}[16], [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: ld2 {v15.h, v16.h}[8], [x15]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: expected lane number
+// CHECK-ERROR: ld3 {v31.s, v0.s, v1.s}[-1], [sp]
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:                              ^
+
+         st1 {v0.d}[16], [x0]
+         st2 {v31.s, v0.s}[3], [8]
+         st3 {v15.h, v16.h, v17.h}[-1], [x15]
+         st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:: error: lane number incompatible with layout
+// CHECK-ERROR: st1 {v0.d}[16], [x0]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st2 {v31.s, v0.s}[3], [8]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected lane number
+// CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[-1], [x15]
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: lane number incompatible with layout
+// CHECK-ERROR: st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
+// CHECK-ERROR:                              ^
+
+//------------------------------------------------------------------------------
+// Post-index of load single N-element structure to all lanes of N consecutive
+// registers (N = 1,2,3,4)
+//------------------------------------------------------------------------------
+         ld1r {v15.8h}, [x15], #5
+         ld2r {v0.2d, v1.2d}, [x0], #7
+         ld3r {v15.4h, v16.4h, v17.4h}, [x15], #1
+         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld1r {v15.8h}, [x15], #5
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld2r {v0.2d, v1.2d}, [x0], #7
+// CHECK-ERROR:                            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #1
+// CHECK-ERROR:                                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], sp
+// CHECK-ERROR:                                           ^
+
+//------------------------------------------------------------------------------
+// Post-index of Load/Store single N-element structure to/from one lane of N
+// consecutive registers (N = 1, 2,3,4)
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[0], [x0], #2
+         ld2 {v15.h, v16.h}[0], [x15], #3
+         ld3 {v31.s, v0.s, v1.d}[0], [sp], x9
+         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #24
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld1 {v0.b}[0], [x0], #2
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld2 {v15.h, v16.h}[0], [x15], #3
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: ld3 {v31.s, v0.s, v1.d}[0], [sp], x9
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #24
+// CHECK-ERROR:                                        ^
+
+         st1 {v0.d}[0], [x0], #7
+         st2 {v31.s, v0.s}[0], [sp], #6
+         st3 {v15.h, v16.h, v17.h}[0], [x15], #8
+         st4 {v0.b, v1.b, v2.b, v3.b}[1], [x0], #1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st1 {v0.d}[0], [x0], #7
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st2 {v31.s, v0.s}[0], [sp], #6
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[0], [x15], #8
+// CHECK-ERROR:                                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: st4 {v0.b, v1.b, v2.b, v3.b}[1], [x0], #1
+// CHECK-ERROR:                                        ^
+
+
+         ins v2.b[16], w1
+         ins v7.h[8], w14
+         ins v20.s[5], w30
+         ins v1.d[2], x7
+         ins v2.b[3], b1
+         ins v7.h[2], h14
+         ins v20.s[1], s30
+         ins v1.d[0], d7
+
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v2.b[16], w1
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v7.h[8], w14
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v20.s[5], w30
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:         ins v1.d[2], x7
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v2.b[3], b1
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v7.h[2], h14
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v20.s[1], s30
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ins v1.d[0], d7
+// CHECK-ERROR:                      ^
+
+         smov w1, v0.b[16]
+         smov w14, v6.h[8]
+         smov x1, v0.b[16]
+         smov x14, v6.h[8]
+         smov x20, v9.s[5]
+         smov w1, v0.d[0]
+         smov w14, v6.d[1]
+         smov x1, v0.d[0]
+         smov x14, v6.d[1]
+         smov x20, v9.d[0]
+
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov w1, v0.b[16]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov w14, v6.h[8]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov x1, v0.b[16]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov x14, v6.h[8]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         smov x20, v9.s[5]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov w1, v0.d[0]
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov w14, v6.d[1]
+// CHECK-ERROR                      ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov x1, v0.d[0]
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov x14, v6.d[1]
+// CHECK-ERROR                      ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         smov x20, v9.d[0]
+// CHECK-ERROR                      ^
+
+         umov w1, v0.b[16]
+         umov w14, v6.h[8]
+         umov w20, v9.s[5]
+         umov x7, v18.d[3]
+         umov w1, v0.d[0]
+         umov s20, v9.s[2]
+         umov d7, v18.d[1]
+
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov w1, v0.b[16]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov w14, v6.h[8]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov w20, v9.s[5]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR         umov x7, v18.d[3]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         umov w1, v0.d[0]
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         umov s20, v9.s[2]
+// CHECK-ERROR              ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         umov d7, v18.d[1]
+// CHECK-ERROR              ^
+
+         Ins v1.h[2], v3.b[6]
+         Ins v6.h[7], v7.s[2]
+         Ins v15.d[0], v22.s[2]
+         Ins v0.d[0], v4.b[1]
+
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v1.h[2], v3.b[6]
+// CHECK-ERROR                         ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v6.h[7], v7.s[2]
+// CHECK-ERROR                         ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v15.d[0], v22.s[2]
+// CHECK-ERROR                           ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         Ins v0.d[0], v4.b[1]
+// CHECK-ERROR                         ^
+
+         dup v1.8h, v2.b[2]
+         dup v11.4s, v7.h[7]
+         dup v17.2d, v20.s[0]
+         dup v1.16b, v2.h[2]
+         dup v11.8h, v7.s[3]
+         dup v17.4s, v20.d[0]
+         dup v5.2d, v1.b[1]
+
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.8h, v2.b[2]
+// CHECK-ERROR                       ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v11.4s, v7.h[7]
+// CHECK-ERROR                        ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v17.2d, v20.s[0]
+// CHECK-ERROR                         ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.16b, v2.h[2]
+// CHECK-ERROR                        ^
+// CHECK-ERROR invalid operand for instruction
+// CHECK-ERROR         dup v11.8h, v7.s[3]
+// CHECK-ERROR                        ^
+// CHECK-ERROR invalid operand for instruction
+// CHECK-ERROR         dup v17.4s, v20.d[0]
+// CHECK-ERROR                         ^
+// CHECK-ERROR invalid operand for instruction
+// CHECK-ERROR         dup v5.2d, v1.b[1]
+// CHECK-ERROR                       ^
+
+         dup v1.8b, b1
+         dup v11.4h, h14
+         dup v17.2s, s30
+         dup v1.16b, d2
+         dup v11.8s, w16
+         dup v17.4d, w28
+         dup v5.2d, w0
+
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.8b, b1
+// CHECK-ERROR                    ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v11.4h, h14
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v17.2s, s30
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v1.16b, d2
+// CHECK-ERROR                     ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v11.8s, w16
+// CHECK-ERROR             ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v17.4d, w28
+// CHECK-ERROR             ^
+// CHECK-ERROR error: invalid operand for instruction
+// CHECK-ERROR         dup v5.2d, w0
+// CHECK-ERROR                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal
+//----------------------------------------------------------------------
+
+         cmeq b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmeq b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal To Zero
+//----------------------------------------------------------------------
+
+         cmeq d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmeq d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher Or Same
+//----------------------------------------------------------------------
+
+         cmhs b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmhs b20, d21, d22
+// CHECK-ERROR:               ^
+
+        
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal
+//----------------------------------------------------------------------
+
+         cmge b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmge b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmge d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmge d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher
+//----------------------------------------------------------------------
+
+         cmhi b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmhi b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than
+//----------------------------------------------------------------------
+
+         cmgt b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmgt b20, d21, d22
+// CHECK-ERROR:               ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than Zero
+//----------------------------------------------------------------------
+
+         cmgt d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmgt d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmle d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmle d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Less Than Zero
+//----------------------------------------------------------------------
+
+         cmlt d20, b21, #0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmlt d20, b21, #0
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Test Bits
+//----------------------------------------------------------------------
+
+         cmtst b20, d21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          cmtst b20, d21, d22
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal
+//----------------------------------------------------------------------
+
+         fcmeq s10, h11, s12
+         fcmeq d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmeq h10, s11, #0.0
+         fcmeq d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmeq d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         fcmge s10, h11, s12
+         fcmge d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmge h10, s11, #0.0
+         fcmge d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmge d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than
+//----------------------------------------------------------------------
+
+         fcmgt s10, h11, s12
+         fcmgt d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than Zero
+//----------------------------------------------------------------------
+
+         fcmgt h10, s11, #0.0
+         fcmgt d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmgt d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmle h10, s11, #0.0
+         fcmle d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmle h10, s11, #0.0
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmle d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than
+//----------------------------------------------------------------------
+
+         fcmlt h10, s11, #0.0
+         fcmlt d20, s21, #0.0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmlt h10, s11, #0.0
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fcmlt d20, s21, #0.0
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         facge s10, h11, s12
+         facge d20, s21, d22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facge s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facge d20, s21, d22
+// CHECK-ERROR:                     ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than
+//----------------------------------------------------------------------
+
+         facgt s10, h11, s12
+         facgt d20, d21, s22
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facgt s10, h11, s12
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          facgt d20, d21, s22
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Accumulated of Unsigned Value
+//----------------------------------------------------------------------
+
+        suqadd b0, h1
+        suqadd h0, s1
+        suqadd s0, d1
+        suqadd d0, b0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd b0, h1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd h0, s1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd s0, d1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        suqadd d0, b0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Accumulated of Signed Value
+//----------------------------------------------------------------------
+
+        usqadd b0, h1
+        usqadd h0, s1
+        usqadd s0, d1
+        usqadd d0, b1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd b0, h1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd h0, s1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd s0, d1
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        usqadd d0, b1
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Absolute Value
+//----------------------------------------------------------------------
+
+    abs d29, s24
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        abs d29, s24
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Scalar Negate
+//----------------------------------------------------------------------
+
+    neg d29, s24
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        neg d29, s24
+// CHECK-ERROR:                 ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Add Long
+//----------------------------------------------------------------------
+
+    sqdmlal s17, h27, s12
+    sqdmlal d19, s24, d12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal s17, h27, s12
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:        sqdmlal d19, s24, d12
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Subtract Long
+//----------------------------------------------------------------------
+
+    sqdmlsl s14, h12, s25
+    sqdmlsl d12, s23, d13
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl s14, h12, s25
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:        sqdmlsl d12, s23, d13
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply Long
+//----------------------------------------------------------------------
+
+    sqdmull s12, h22, s12
+    sqdmull d15, s22, d12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmull s12, h22, s12
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR:        sqdmull d15, s22, d12
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Unsigned Narrow
+//----------------------------------------------------------------------
+
+    sqxtun b19, b14
+    sqxtun h21, h15
+    sqxtun s20, s12
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtun b19, b14
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtun h21, h15
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtun s20, s12
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Signed Narrow
+//----------------------------------------------------------------------
+
+    sqxtn b18, b18
+    sqxtn h20, h17
+    sqxtn s19, s14
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtn b18, b18
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtn h20, h17
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqxtn s19, s14
+// CHECK-ERROR:                   ^
+
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+    uqxtn b18, b18
+    uqxtn h20, h17
+    uqxtn s19, s14
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqxtn b18, b18
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqxtn h20, h17
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        uqxtn s19, s14
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right (Immediate)
+//----------------------------------------------------------------------
+        sshr d15, d16, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        sshr d15, d16, #99
+// CHECK-ERROR:                       ^
+
+        sshr d15, s16, #31
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sshr d15, s16, #31
+// CHECK-ERROR:                  ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right (Immediate)
+//----------------------------------------------------------------------
+
+        ushr d10, d17, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ushr d10, d17, #99
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+
+        srshr d19, d18, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        srshr d19, d18, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Unigned Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+
+        urshr d20, d23, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        urshr d20, d23, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        ssra d18, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ssra d18, d12, #99
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        usra d20, d13, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        usra d20, d13, #99
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        srsra d15, d11, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        srsra d15, d11, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+
+        ursra d18, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ursra d18, d10, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Scalar Shift Left (Immediate)
+//----------------------------------------------------------------------
+
+        shl d7, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        shl d7, d10, #99
+// CHECK-ERROR:                     ^
+
+        shl d7, s16, #31
+        
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        shl d7, s16, #31
+// CHECK-ERROR:                ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+
+        sqshl b11, b19, #99
+        sqshl h13, h18, #99
+        sqshl s14, s17, #99
+        sqshl d15, d16, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sqshl b11, b19, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        sqshl h13, h18, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        sqshl s14, s17, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        sqshl d15, d16, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+
+        uqshl b18, b15, #99
+        uqshl h11, h18, #99
+        uqshl s14, s19, #99
+        uqshl d15, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        uqshl b18, b15, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        uqshl h11, h18, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        uqshl s14, s19, #99
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        uqshl d15, d12, #99
+// CHECK-ERROR:                        ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left Unsigned (Immediate)
+//----------------------------------------------------------------------
+
+        sqshlu b15, b18, #99
+        sqshlu h19, h17, #99
+        sqshlu s16, s14, #99
+        sqshlu d11, d13, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR:        sqshlu  b15, b18, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR:        sqshlu  h19, h17, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR:        sqshlu  s16, s14, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        sqshlu  d11, d13, #99
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Shift Right And Insert (Immediate)
+//----------------------------------------------------------------------
+
+        sri d10, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        sri d10, d12, #99
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Shift Left And Insert (Immediate)
+//----------------------------------------------------------------------
+
+        sli d10, d14, #99
+
+// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR:        sli d10, d14, #99
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqshrn b10, h15, #99
+        sqshrn h17, s10, #99
+        sqshrn s18, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqshrn  b10, h15, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqshrn  h17, s10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqshrn  s18, d10, #99
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        uqshrn b12, h10, #99
+        uqshrn h10, s14, #99
+        uqshrn s10, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        uqshrn  b12, h10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        uqshrn  h10, s14, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        uqshrn  s10, d12, #99
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqrshrn b10, h13, #99
+        sqrshrn h15, s10, #99
+        sqrshrn s15, d12, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqrshrn b10, h13, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqrshrn h15, s10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqrshrn s15, d12, #99
+// CHECK-ERROR:                          ^
+        
+//----------------------------------------------------------------------
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        uqrshrn b10, h12, #99
+        uqrshrn h12, s10, #99
+        uqrshrn s10, d10, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        uqrshrn b10, h12, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        uqrshrn h12, s10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        uqrshrn s10, d10, #99
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqshrun b15, h10, #99
+        sqshrun h20, s14, #99
+        sqshrun s10, d15, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqshrun b15, h10, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqshrun h20, s14, #99
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqshrun s10, d15, #99
+// CHECK-ERROR:                          ^
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqrshrun b17, h10, #99
+        sqrshrun h10, s13, #99
+        sqrshrun s22, d16, #99
+
+// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR:        sqrshrun b17, h10, #99
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR:        sqrshrun h10, s13, #99
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        sqrshrun s22, d16, #99
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    scvtf s22, s13, #0
+    scvtf s22, s13, #33
+    scvtf d21, d12, #65
+    scvtf d21, s12, #31
+        
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        scvtf s22, s13, #0
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        scvtf s22, s13, #33
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        scvtf d21, d12, #65
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        scvtf d21, s12, #31
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    ucvtf s22, s13, #34
+    ucvtf d21, d14, #65
+    ucvtf d21, s14, #64
+        
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        ucvtf s22, s13, #34
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        ucvtf d21, d14, #65
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        ucvtf d21, s14, #64
+// CHECK-ERROR:                   ^
+
+//------------------------------------------------------------------------------
+// Element reverse
+//------------------------------------------------------------------------------
+         rev64 v6.2d, v8.2d
+         rev32 v30.2s, v31.2s
+         rev32 v30.4s, v31.4s
+         rev32 v30.2d, v31.2d
+         rev16 v21.4h, v1.4h
+         rev16 v21.8h, v1.8h
+         rev16 v21.2s, v1.2s
+         rev16 v21.4s, v1.4s
+         rev16 v21.2d, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev64 v6.2d, v8.2d
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.2s, v31.2s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.4s, v31.4s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev32 v30.2d, v31.2d
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.4h, v1.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.8h, v1.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.2s, v1.2s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.4s, v1.4s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rev16 v21.2d, v1.2d
+// CHECK-ERROR:                   ^
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add long
+//------------------------------------------------------------------------------
+
+         saddlp v3.8h, v21.8h
+         saddlp v8.8b, v5.8b
+         saddlp v9.8h, v1.4s
+         saddlp v0.4s, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v3.8h, v21.8h
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v8.8b, v5.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v9.8h, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         saddlp v0.4s, v1.2d
+// CHECK-ERROR:                          ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add long
+//------------------------------------------------------------------------------
+
+         uaddlp v3.8h, v21.8h
+         uaddlp v8.8b, v5.8b
+         uaddlp v9.8h, v1.4s
+         uaddlp v0.4s, v1.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v3.8h, v21.8h
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v8.8b, v5.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v9.8h, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uaddlp v0.4s, v1.2d
+// CHECK-ERROR:                          ^
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         sadalp v3.16b, v21.16b
+         sadalp v8.4h, v5.4h
+         sadalp v9.4s, v1.4s
+         sadalp v0.4h, v1.2s
+         sadalp v12.2d, v4.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v3.16b, v21.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v8.4h, v5.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v9.4s, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v0.4h, v1.2s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sadalp v12.2d, v4.8h
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         uadalp v3.16b, v21.16b
+         uadalp v8.4h, v5.4h
+         uadalp v9.4s, v1.4s
+         uadalp v0.4h, v1.2s
+         uadalp v12.2d, v4.8h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v3.16b, v21.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v8.4h, v5.4h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v9.4s, v1.4s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v0.4h, v1.2s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uadalp v12.2d, v4.8h
+// CHECK-ERROR:                           ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating accumulate of unsigned value
+//------------------------------------------------------------------------------
+
+         suqadd v0.16b, v31.8b
+         suqadd v1.8b, v9.8h
+         suqadd v13.4h, v21.4s
+         suqadd v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v0.16b, v31.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v1.8b, v9.8h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v13.4h, v21.4s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         suqadd v4.2s, v0.2d
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating accumulate of signed value
+//------------------------------------------------------------------------------
+
+         usqadd v0.16b, v31.8b
+         usqadd v2.8h, v4.4h
+         usqadd v13.4h, v21.4s
+         usqadd v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v0.16b, v31.8b
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v2.8h, v4.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v13.4h, v21.4s
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         usqadd v4.2s, v0.2d
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Integer saturating absolute
+//------------------------------------------------------------------------------
+
+         sqabs v0.16b, v31.8b
+         sqabs v2.8h, v4.4h
+         sqabs v6.4s, v8.2s
+         sqabs v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v0.16b, v31.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v2.8h, v4.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v6.4s, v8.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqabs v6.2d, v8.2s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating negate
+//------------------------------------------------------------------------------
+
+         sqneg v0.16b, v31.8b
+         sqneg v2.8h, v4.4h
+         sqneg v6.4s, v8.2s
+         sqneg v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v0.16b, v31.8b
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v2.8h, v4.4h
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v6.4s, v8.2s
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqneg v6.2d, v8.2s
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Integer absolute
+//------------------------------------------------------------------------------
+
+         abs v0.16b, v31.8b
+         abs v2.8h, v4.4h
+         abs v6.4s, v8.2s
+         abs v6.2d, v8.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v0.16b, v31.8b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v2.8h, v4.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v6.4s, v8.2s
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         abs v6.2d, v8.2s
+// CHECK-ERROR:                    ^
+
+//------------------------------------------------------------------------------
+// Integer count leading sign bits
+//------------------------------------------------------------------------------
+
+         cls v0.2d, v31.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cls v0.2d, v31.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Integer count leading zeros
+//------------------------------------------------------------------------------
+
+         clz v0.2d, v31.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         clz v0.2d, v31.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Population count
+//------------------------------------------------------------------------------
+
+         cnt v2.8h, v4.8h
+         cnt v6.4s, v8.4s
+         cnt v6.2d, v8.2d
+         cnt v13.4h, v21.4h
+         cnt v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v2.8h, v4.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v6.4s, v8.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v6.2d, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v13.4h, v21.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         cnt v4.2s, v0.2s
+// CHECK-ERROR:                ^
+
+
+//------------------------------------------------------------------------------
+// Bitwise NOT
+//------------------------------------------------------------------------------
+
+         not v2.8h, v4.8h
+         not v6.4s, v8.4s
+         not v6.2d, v8.2d
+         not v13.4h, v21.4h
+         not v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v2.8h, v4.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v6.4s, v8.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v6.2d, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v13.4h, v21.4h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         not v4.2s, v0.2s
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Bitwise reverse
+//------------------------------------------------------------------------------
+
+         rbit v2.8h, v4.8h
+         rbit v6.4s, v8.4s
+         rbit v6.2d, v8.2d
+         rbit v13.4h, v21.4h
+         rbit v4.2s, v0.2s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v6.4s, v8.4s
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v6.2d, v8.2d
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         rbit v4.2s, v0.2s
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Floating-point absolute
+//------------------------------------------------------------------------------
+
+         fabs v0.16b, v31.16b
+         fabs v2.8h, v4.8h
+         fabs v1.8b, v9.8b
+         fabs v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v0.16b, v31.16b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v1.8b, v9.8b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fabs v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Floating-point negate
+//------------------------------------------------------------------------------
+
+         fneg v0.16b, v31.16b
+         fneg v2.8h, v4.8h
+         fneg v1.8b, v9.8b
+         fneg v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v0.16b, v31.16b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v2.8h, v4.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v1.8b, v9.8b
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fneg v13.4h, v21.4h
+// CHECK-ERROR:                  ^
+
+//------------------------------------------------------------------------------
+// Integer extract and narrow
+//------------------------------------------------------------------------------
+
+         xtn v0.16b, v31.8h
+         xtn v2.8h, v4.4s
+         xtn v6.4s, v8.2d
+         xtn2 v1.8b, v9.8h
+         xtn2 v13.4h, v21.4s
+         xtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v0.16b, v31.8h
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v2.8h, v4.4s
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn v6.4s, v8.2d
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v1.8b, v9.8h
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v13.4h, v21.4s
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         xtn2 v4.2s, v0.2d
+// CHECK-ERROR:              ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and unsigned narrow
+//------------------------------------------------------------------------------
+
+         sqxtun v0.16b, v31.8h
+         sqxtun v2.8h, v4.4s
+         sqxtun v6.4s, v8.2d
+         sqxtun2 v1.8b, v9.8h
+         sqxtun2 v13.4h, v21.4s
+         sqxtun2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v0.16b, v31.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v2.8h, v4.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun v6.4s, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v1.8b, v9.8h
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v13.4h, v21.4s
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtun2 v4.2s, v0.2d
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         sqxtn v0.16b, v31.8h
+         sqxtn v2.8h, v4.4s
+         sqxtn v6.4s, v8.2d
+         sqxtn2 v1.8b, v9.8h
+         sqxtn2 v13.4h, v21.4s
+         sqxtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v0.16b, v31.8h
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v1.8b, v9.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sqxtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         uqxtn v0.16b, v31.8h
+         uqxtn v2.8h, v4.4s
+         uqxtn v6.4s, v8.2d
+         uqxtn2 v1.8b, v9.8h
+         uqxtn2 v13.4h, v21.4s
+         uqxtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v0.16b, v31.8h
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v1.8b, v9.8h
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         uqxtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Integer shift left long
+//------------------------------------------------------------------------------
+
+         shll2 v2.8h, v4.16b, #7
+         shll2 v6.4s, v8.8h, #15
+         shll2 v6.2d, v8.4s, #31
+         shll v2.8h, v4.16b, #8
+         shll v6.4s, v8.8h, #16
+         shll v6.2d, v8.4s, #32
+         shll v2.8h, v4.8b, #8
+         shll v6.4s, v8.4h, #16
+         shll v6.2d, v8.2s, #32
+         shll2 v2.8h, v4.8b, #5
+         shll2 v6.4s, v8.4h, #14
+         shll2 v6.2d, v8.2s, #1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v2.8h, v4.16b, #7
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.4s, v8.8h, #15
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.2d, v8.4s, #31
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v2.8h, v4.16b, #8
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v6.4s, v8.8h, #16
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll v6.2d, v8.4s, #32
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v2.8h, v4.8b, #5
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.4s, v8.4h, #14
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         shll2 v6.2d, v8.2s, #1
+// CHECK-ERROR:                      ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize
+//------------------------------------------------------------------------------
+
+         fcvtn v2.8h, v4.4s
+         fcvtn v6.4s, v8.2d
+         fcvtn2 v13.4h, v21.4s
+         fcvtn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn v2.8h, v4.4s
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn v6.4s, v8.2d
+// CHECK-ERROR:               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn2 v13.4h, v21.4s
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtn2 v4.2s, v0.2d
+// CHECK-ERROR:                ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize with inexact
+//------------------------------------------------------------------------------
+
+         fcvtxn v6.4s, v8.2d
+         fcvtxn2 v4.2s, v0.2d
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtxn v6.4s, v8.2d
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtxn2 v4.2s, v0.2d
+// CHECK-ERROR:                 ^
+
+//------------------------------------------------------------------------------
+// Floating-point convert upsize
+//------------------------------------------------------------------------------
+
+         fcvtl2 v9.4s, v1.4h
+         fcvtl2 v0.2d, v1.2s
+         fcvtl v12.4s, v4.8h
+         fcvtl v17.2d, v28.4s
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl2 v9.4s, v1.4h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl2 v0.2d, v1.2s
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl v12.4s, v4.8h
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtl v17.2d, v28.4s
+// CHECK-ERROR:                       ^
+
+//------------------------------------------------------------------------------
+// Floating-point round to integral
+//------------------------------------------------------------------------------
+
+         frintn v0.16b, v31.16b
+         frintn v2.8h, v4.8h
+         frintn v1.8b, v9.8b
+         frintn v13.4h, v21.4h
+
+         frinta v0.16b, v31.16b
+         frinta v2.8h, v4.8h
+         frinta v1.8b, v9.8b
+         frinta v13.4h, v21.4h
+
+         frintp v0.16b, v31.16b
+         frintp v2.8h, v4.8h
+         frintp v1.8b, v9.8b
+         frintp v13.4h, v21.4h
+
+         frintm v0.16b, v31.16b
+         frintm v2.8h, v4.8h
+         frintm v1.8b, v9.8b
+         frintm v13.4h, v21.4h
+
+         frintx v0.16b, v31.16b
+         frintx v2.8h, v4.8h
+         frintx v1.8b, v9.8b
+         frintx v13.4h, v21.4h
+
+         frintz v0.16b, v31.16b
+         frintz v2.8h, v4.8h
+         frintz v1.8b, v9.8b
+         frintz v13.4h, v21.4h
+
+         frinti v0.16b, v31.16b
+         frinti v2.8h, v4.8h
+         frinti v1.8b, v9.8b
+         frinti v13.4h, v21.4h
+
+         fcvtns v0.16b, v31.16b
+         fcvtns v2.8h, v4.8h
+         fcvtns v1.8b, v9.8b
+         fcvtns v13.4h, v21.4h
+
+         fcvtnu v0.16b, v31.16b
+         fcvtnu v2.8h, v4.8h
+         fcvtnu v1.8b, v9.8b
+         fcvtnu v13.4h, v21.4h
+
+         fcvtps v0.16b, v31.16b
+         fcvtps v2.8h, v4.8h
+         fcvtps v1.8b, v9.8b
+         fcvtps v13.4h, v21.4h
+
+         fcvtpu v0.16b, v31.16b
+         fcvtpu v2.8h, v4.8h
+         fcvtpu v1.8b, v9.8b
+         fcvtpu v13.4h, v21.4h
+
+         fcvtms v0.16b, v31.16b
+         fcvtms v2.8h, v4.8h
+         fcvtms v1.8b, v9.8b
+         fcvtms v13.4h, v21.4h
+
+         fcvtmu v0.16b, v31.16b
+         fcvtmu v2.8h, v4.8h
+         fcvtmu v1.8b, v9.8b
+         fcvtmu v13.4h, v21.4h
+
+         fcvtzs v0.16b, v31.16b
+         fcvtzs v2.8h, v4.8h
+         fcvtzs v1.8b, v9.8b
+         fcvtzs v13.4h, v21.4h
+
+         fcvtzu v0.16b, v31.16b
+         fcvtzu v2.8h, v4.8h
+         fcvtzu v1.8b, v9.8b
+         fcvtzu v13.4h, v21.4h
+
+         fcvtas v0.16b, v31.16b
+         fcvtas v2.8h, v4.8h
+         fcvtas v1.8b, v9.8b
+         fcvtas v13.4h, v21.4h
+
+         fcvtau v0.16b, v31.16b
+         fcvtau v2.8h, v4.8h
+         fcvtau v1.8b, v9.8b
+         fcvtau v13.4h, v21.4h
+
+         urecpe v0.16b, v31.16b
+         urecpe v2.8h, v4.8h
+         urecpe v1.8b, v9.8b
+         urecpe v13.4h, v21.4h
+         urecpe v1.2d, v9.2d
+
+         ursqrte v0.16b, v31.16b
+         ursqrte v2.8h, v4.8h
+         ursqrte v1.8b, v9.8b
+         ursqrte v13.4h, v21.4h
+         ursqrte v1.2d, v9.2d
+
+         scvtf v0.16b, v31.16b
+         scvtf v2.8h, v4.8h
+         scvtf v1.8b, v9.8b
+         scvtf v13.4h, v21.4h
+
+         ucvtf v0.16b, v31.16b
+         ucvtf v2.8h, v4.8h
+         ucvtf v1.8b, v9.8b
+         ucvtf v13.4h, v21.4h
+
+         frecpe v0.16b, v31.16b
+         frecpe v2.8h, v4.8h
+         frecpe v1.8b, v9.8b
+         frecpe v13.4h, v21.4h
+
+         frsqrte v0.16b, v31.16b
+         frsqrte v2.8h, v4.8h
+         frsqrte v1.8b, v9.8b
+         frsqrte v13.4h, v21.4h
+
+         fsqrt v0.16b, v31.16b
+         fsqrt v2.8h, v4.8h
+         fsqrt v1.8b, v9.8b
+         fsqrt v13.4h, v21.4h
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintn v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinta v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintp v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintm v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintx v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frintz v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frinti v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtns v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtnu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtps v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtpu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtms v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtmu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzs v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtzu v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtas v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fcvtau v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         urecpe v1.2d, v9.2d
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v0.16b, v31.16b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v2.8h, v4.8h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v1.8b, v9.8b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v13.4h, v21.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ursqrte v1.2d, v9.2d
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v0.16b, v31.16b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v2.8h, v4.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v1.8b, v9.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frecpe v13.4h, v21.4h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v0.16b, v31.16b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v2.8h, v4.8h
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v1.8b, v9.8b
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         frsqrte v13.4h, v21.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v0.16b, v31.16b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v2.8h, v4.8h
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v1.8b, v9.8b
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         fsqrt v13.4h, v21.4h
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzs s21, s12, #0
+    fcvtzs d21, d12, #65
+    fcvtzs s21, d12, #1
+
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        fcvtzs s21, s12, #0
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        fcvtzs d21, d12, #65
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs s21, d12, #1
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzu s21, s12, #33
+    fcvtzu d21, d12, #0
+    fcvtzu s21, d12, #1
+
+// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR:        fcvtzu s21, s12, #33
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR:        fcvtzu d21, d12, #0
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu s21, d12, #1
+// CHECK-ERROR:                    ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+        aese v0.8h, v1.8h
+        aese v0.4s, v1.4s
+        aese v0.2d, v1.2d
+        aesd v0.8h, v1.8h
+        aesmc v0.8h, v1.8h
+        aesimc v0.8h, v1.8h
+
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aese v0.8h, v1.8h
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aese v0.4s, v1.4s
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aese v0.2d, v1.2d
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aesd v0.8h, v1.8h
+// CHECK:                 ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aesmc v0.8h, v1.8h
+// CHECK:                  ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         aesimc v0.8h, v1.8h
+// CHECK:                   ^
+
+        sha1h b0, b1
+        sha1h h0, h1
+        sha1h d0, d1
+        sha1h q0, q1
+        sha1su1 v0.16b, v1.16b
+        sha1su1 v0.8h, v1.8h
+        sha1su1 v0.2d, v1.2d
+        sha256su0 v0.16b, v1.16b
+
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h b0, b1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h h0, h1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h d0, d1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1h q0, q1
+// CHECK:               ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su1 v0.16b, v1.16b
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su1 v0.8h, v1.8h
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su1 v0.2d, v1.2d
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha256su0 v0.16b, v1.16b
+// CHECK:                      ^
+
+        sha1c q0, q1, v2.4s
+        sha1p q0, q1, v2.4s
+        sha1m q0, q1, v2.4s
+        sha1su0 v0.16b, v1.16b, v2.16b
+        sha1su0 v0.8h, v1.8h, v2.8h
+        sha1su0 v0.2d, v1.2d, v2.2d
+        sha256h q0, q1, q2
+        sha256h v0.4s, v1.4s, v2.4s
+        sha256h2 q0, q1, q2
+        sha256su1 v0.16b, v1.16b, v2.16b
+
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1c q0, q1, v2.4s
+// CHECK:                   ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1p q0, q1, v2.4s
+// CHECK:                   ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1m q0, q1, v2.4s
+// CHECK:                   ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su0 v0.16b, v1.16b, v2.16b
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su0 v0.8h, v1.8h, v2.8h
+// CHECK:                    ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha1su0 v0.2d, v1.2d, v2.2d
+// CHECK:                    ^
+// CHECK:  error: too few operands for instruction
+// CHECK:         sha256h q0, q1, q2
+// CHECK:         ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha256h v0.4s, v1.4s, v2.4s
+// CHECK:                    ^
+// CHECK:  error: too few operands for instruction
+// CHECK:         sha256h2 q0, q1, q2
+// CHECK:         ^
+// CHECK:  error: invalid operand for instruction
+// CHECK:         sha256su1 v0.16b, v1.16b, v2.16b
+// CHECK:                      ^
+
+//----------------------------------------------------------------------
+// Bitwise extract
+//----------------------------------------------------------------------
+
+        ext v0.8b, v1.8b, v2.4h, #0x3
+        ext v0.4h, v1.4h, v2.4h, #0x3
+        ext v0.2s, v1.2s, v2.2s, #0x1
+        ext v0.1d, v1.1d, v2.1d, #0x0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.8b, v1.8b, v2.4h, #0x3
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.4h, v1.4h, v2.4h, #0x3
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.2s, v1.2s, v2.2s, #0x1
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.1d, v1.1d, v2.1d, #0x0
+// CHECK-ERROR:                ^
+
+        ext v0.16b, v1.16b, v2.8h, #0x3
+        ext v0.8h, v1.8h, v2.8h, #0x3
+        ext v0.4s, v1.4s, v2.4s, #0x1
+        ext v0.2d, v1.2d, v2.2d, #0x0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.16b, v1.16b, v2.8h, #0x3
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.8h, v1.8h, v2.8h, #0x3
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.4s, v1.4s, v2.4s, #0x1
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ext v0.2d, v1.2d, v2.2d, #0x0
+// CHECK-ERROR:                ^
+
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+        uzp1 v0.16b, v1.8b, v2.8b
+        uzp1 v0.8b, v1.4b, v2.4b
+        uzp1 v0.8h, v1.4h, v2.4h
+        uzp1 v0.4h, v1.2h, v2.2h
+        uzp1 v0.4s, v1.2s, v2.2s
+        uzp1 v0.2s, v1.1s, v2.1s
+        uzp1 v0.2d, v1.1d, v2.1d
+        uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        uzp2 v0.16b, v1.8b, v2.8b
+        uzp2 v0.8b, v1.4b, v2.4b
+        uzp2 v0.8h, v1.4h, v2.4h
+        uzp2 v0.4h, v1.2h, v2.2h
+        uzp2 v0.4s, v1.2s, v2.2s
+        uzp2 v0.2s, v1.1s, v2.1s
+        uzp2 v0.2d, v1.1d, v2.1d
+        uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip1 v0.16b, v1.8b, v2.8b
+        zip1 v0.8b, v1.4b, v2.4b
+        zip1 v0.8h, v1.4h, v2.4h
+        zip1 v0.4h, v1.2h, v2.2h
+        zip1 v0.4s, v1.2s, v2.2s
+        zip1 v0.2s, v1.1s, v2.1s
+        zip1 v0.2d, v1.1d, v2.1d
+        zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip2 v0.16b, v1.8b, v2.8b
+        zip2 v0.8b, v1.4b, v2.4b
+        zip2 v0.8h, v1.4h, v2.4h
+        zip2 v0.4h, v1.2h, v2.2h
+        zip2 v0.4s, v1.2s, v2.2s
+        zip2 v0.2s, v1.1s, v2.1s
+        zip2 v0.2d, v1.1d, v2.1d
+        zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn1 v0.16b, v1.8b, v2.8b
+        trn1 v0.8b, v1.4b, v2.4b
+        trn1 v0.8h, v1.4h, v2.4h
+        trn1 v0.4h, v1.2h, v2.2h
+        trn1 v0.4s, v1.2s, v2.2s
+        trn1 v0.2s, v1.1s, v2.1s
+        trn1 v0.2d, v1.1d, v2.1d
+        trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn2 v0.16b, v1.8b, v2.8b
+        trn2 v0.8b, v1.4b, v2.4b
+        trn2 v0.8h, v1.4h, v2.4h
+        trn2 v0.4h, v1.2h, v2.2h
+        trn2 v0.4s, v1.2s, v2.2s
+        trn2 v0.2s, v1.1s, v2.1s
+        trn2 v0.2d, v1.1d, v2.1d
+        trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+//----------------------------------------------------------------------
+// Permutation with 3 vectors
+//----------------------------------------------------------------------
+
+        uzp1 v0.16b, v1.8b, v2.8b
+        uzp1 v0.8b, v1.4b, v2.4b
+        uzp1 v0.8h, v1.4h, v2.4h
+        uzp1 v0.4h, v1.2h, v2.2h
+        uzp1 v0.4s, v1.2s, v2.2s
+        uzp1 v0.2s, v1.1s, v2.1s
+        uzp1 v0.2d, v1.1d, v2.1d
+        uzp1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        uzp2 v0.16b, v1.8b, v2.8b
+        uzp2 v0.8b, v1.4b, v2.4b
+        uzp2 v0.8h, v1.4h, v2.4h
+        uzp2 v0.4h, v1.2h, v2.2h
+        uzp2 v0.4s, v1.2s, v2.2s
+        uzp2 v0.2s, v1.1s, v2.1s
+        uzp2 v0.2d, v1.1d, v2.1d
+        uzp2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
+// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip1 v0.16b, v1.8b, v2.8b
+        zip1 v0.8b, v1.4b, v2.4b
+        zip1 v0.8h, v1.4h, v2.4h
+        zip1 v0.4h, v1.2h, v2.2h
+        zip1 v0.4s, v1.2s, v2.2s
+        zip1 v0.2s, v1.1s, v2.1s
+        zip1 v0.2d, v1.1d, v2.1d
+        zip1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
+// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        zip2 v0.16b, v1.8b, v2.8b
+        zip2 v0.8b, v1.4b, v2.4b
+        zip2 v0.8h, v1.4h, v2.4h
+        zip2 v0.4h, v1.2h, v2.2h
+        zip2 v0.4s, v1.2s, v2.2s
+        zip2 v0.2s, v1.1s, v2.1s
+        zip2 v0.2d, v1.1d, v2.1d
+        zip2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
+// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn1 v0.16b, v1.8b, v2.8b
+        trn1 v0.8b, v1.4b, v2.4b
+        trn1 v0.8h, v1.4h, v2.4h
+        trn1 v0.4h, v1.2h, v2.2h
+        trn1 v0.4s, v1.2s, v2.2s
+        trn1 v0.2s, v1.1s, v2.1s
+        trn1 v0.2d, v1.1d, v2.1d
+        trn1 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
+// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+        trn2 v0.16b, v1.8b, v2.8b
+        trn2 v0.8b, v1.4b, v2.4b
+        trn2 v0.8h, v1.4h, v2.4h
+        trn2 v0.4h, v1.2h, v2.2h
+        trn2 v0.4s, v1.2s, v2.2s
+        trn2 v0.2s, v1.1s, v2.1s
+        trn2 v0.2d, v1.1d, v2.1d
+        trn2 v0.1d, v1.1d, v2.1d
+
+// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR                      ^
+// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR                     ^
+// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
+// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR                 ^
+
+//----------------------------------------------------------------------
+// Floating Point  multiply (scalar, by element)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      fmul    s0, s1, v1.h[0]
+      fmul    h0, h1, v1.s[0]
+      // invalid lane
+      fmul    s2, s29, v10.s[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmul    s0, s1, v1.h[0]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmul    h0, h1, v1.s[0]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR:          fmul    s2, s29, v10.s[4]
+// CHECK-ERROR:                                 ^
+
+//----------------------------------------------------------------------
+// Floating Point  multiply extended (scalar, by element)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      fmulx    d0, d1, v1.b[0]
+      fmulx    h0, h1, v1.d[0]
+      // invalid lane
+      fmulx    d2, d29, v10.d[3]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx    d0, d1, v1.b[0]
+// CHECK-ERROR:                              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmulx    h0, h1, v1.d[0]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR:          fmulx    d2, d29, v10.d[3]
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Floating Point fused multiply-add (scalar, by element)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      fmla    b0, b1, v1.b[0]
+      fmla    d30, s11, v1.d[1]
+      // invalid lane
+      fmla    s16, s22, v16.s[5]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmla    b0, b1, v1.b[0]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmla    d30, s11, v1.d[1]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR:          fmla    s16, s22, v16.s[5]
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Floating Point fused multiply-subtract (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    fmls    s29, h10, v28.s[1]
+    fmls    h7, h17, v26.s[2]
+    // invalid lane
+    fmls    d16, d22, v16.d[-1]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmls    s29, h10, v28.s[1]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          fmls    h7, h17, v26.s[2]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error:  expected lane number
+// CHECK-ERROR:          fmls    d16, d22, v16.d[-1]
+// CHECK-ERROR:                                  ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply-add long
+// (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqdmlal s0, h0, v0.s[0]
+    sqdmlal s8, s9, v14.s[1]
+    // invalid lane
+    sqdmlal s4, s5, v1.s[5]
+    // invalid vector index
+    sqdmlal s0, h0, v17.h[0]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlal s0, h0, v0.s[0]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlal s8, s9, v14.s[1]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmlal s4, s5, v1.s[5]
+// CHECK-ERROR:                               ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlal s0, h0, v17.h[0]
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply-subtract long
+// (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqdmlsl s1, h1, v1.d[0]
+    sqdmlsl d1, h1, v13.s[0]
+    // invalid lane
+    sqdmlsl d1, s1, v13.s[4]
+    // invalid vector index
+    sqdmlsl s1, h1, v20.h[7]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlsl s1, h1, v1.d[0]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlsl d1, h1, v13.s[0]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmlsl d1, s1, v13.s[4]
+// CHECK-ERROR:                                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmlsl s1, h1, v20.h[7]
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply long (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    // invalid lane
+    // invalid vector index
+    // mismatched and invalid vector types
+    sqdmull s1, h1, v1.s[1]
+    sqdmull s1, s1, v4.s[0]
+    // invalid lane
+    sqdmull s12, h17, v9.h[9]
+    // invalid vector index
+    sqdmull s1, h1, v16.h[5]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmull s1, h1, v1.s[1]
+// CHECK-ERROR:                             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmull s1, s1, v4.s[0]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmull s12, h17, v9.h[9]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmull s1, h1, v16.h[5]
+// CHECK-ERROR:                           ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqdmulh h0, s1, v0.h[0]
+    sqdmulh s25, s26, v27.h[3]
+    // invalid lane
+    sqdmulh s25, s26, v27.s[4]
+    // invalid vector index
+    sqdmulh s0, h1, v30.h[0]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmulh h0, s1, v0.h[0]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmulh s25, s26, v27.h[3]
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqdmulh s25, s26, v27.s[4]
+// CHECK-ERROR:                                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqdmulh s0, h1, v30.h[0]
+// CHECK-ERROR:                      ^
+
+//----------------------------------------------------------------------
+// Scalar Signed saturating rounding doubling multiply
+// returning high half (scalar, by element)
+//----------------------------------------------------------------------
+    // mismatched and invalid vector types
+    sqrdmulh h31, h30, v14.s[2]
+    sqrdmulh s5, h6, v7.s[2]
+    // invalid lane
+    sqrdmulh h31, h30, v14.h[9]
+    // invalid vector index
+    sqrdmulh h31, h30, v20.h[4]
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqrdmulh h31, h30, v14.s[2]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqrdmulh s5, h6, v7.s[2]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          sqrdmulh h31, h30, v14.h[9]
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          sqrdmulh h31, h30, v20.h[4]
+// CHECK-ERROR:                              ^
+
+//----------------------------------------------------------------------
+// Scalar Duplicate element (scalar)
+//----------------------------------------------------------------------
+      // mismatched and invalid vector types
+      dup b0, v1.d[0]
+      dup h0, v31.b[8]
+      dup s0, v2.h[4]
+      dup d0, v17.s[3]
+      // invalid  lane
+      dup d0, v17.d[4]
+      dup s0, v1.s[7]
+      dup h0, v31.h[16]
+      dup b1, v3.b[16]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup b0, v1.d[0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup h0, v31.b[8]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup s0, v2.h[4]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:          dup d0, v17.s[3]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup d0, v17.d[4]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup s0, v1.s[7]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup h0, v31.h[16]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR:          dup b1, v3.b[16]
+// CHECK-ERROR:                       ^
+
+//----------------------------------------------------------------------
+// Table look up
+//----------------------------------------------------------------------
+
+        tbl v0.8b, {v1.8b}, v2.8b
+        tbl v0.8b, {v1.8b, v2.8b}, v2.8b
+        tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+        tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b, v2.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+// CHECK-ERROR:                                                    ^
+
+        tbx v0.8b, {v1.8b}, v2.8b
+        tbx v0.8b, {v1.8b, v2.8b}, v2.8b
+        tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+        tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b, v2.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid number of vectors
+// CHECK-ERROR:        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
+// CHECK-ERROR:                                                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+// Odd
+//----------------------------------------------------------------------
+
+    fcvtxn s0, s1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtxn s0, s1
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtas s0, d0
+    fcvtas d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtas s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtas d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtau s0, d0
+    fcvtau d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtau s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtau d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtms s0, d0
+    fcvtms d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtms s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtms d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtmu s0, d0
+    fcvtmu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtmu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtmu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtns s0, d0
+    fcvtns d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtns s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtns d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtnu s0, d0
+    fcvtnu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtnu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtnu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtps s0, d0
+    fcvtps d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtps s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtps d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtpu s0, d0
+    fcvtpu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtpu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtpu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+//----------------------------------------------------------------------
+
+    fcvtzs s0, d0
+    fcvtzs d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+// Zero
+//----------------------------------------------------------------------
+
+    fcvtzu s0, d0
+    fcvtzu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Difference
+//----------------------------------------------------------------------
+
+
+    fabd s29, d24, s20
+    fabd d29, s24, d20
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fabd s29, d24, s20
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fabd d29, s24, d20
+// CHECK-ERROR:                  ^
diff --git a/test/MC/AArch64/neon-extract.s b/test/MC/AArch64/neon-extract.s
new file mode 100644
index 000000000000..2d58a75a4907
--- /dev/null
+++ b/test/MC/AArch64/neon-extract.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for bitwise extract
+//------------------------------------------------------------------------------
+
+        ext v0.8b, v1.8b, v2.8b, #0x3
+        ext v0.16b, v1.16b, v2.16b, #0x3
+
+// CHECK: ext	v0.8b, v1.8b, v2.8b, #0x3  // encoding: [0x20,0x18,0x02,0x2e]
+// CHECK: ext	v0.16b, v1.16b, v2.16b, #0x3 // encoding: [0x20,0x18,0x02,0x6e]
diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s
new file mode 100644
index 000000000000..212eda2f2092
--- /dev/null
+++ b/test/MC/AArch64/neon-facge-facgt.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
+// FACLE is alias for FACGE with operands reversed
+//----------------------------------------------------------------------
+         facge v0.2s, v31.2s, v16.2s
+         facge v4.4s, v7.4s, v15.4s
+         facge v29.2d, v2.2d, v5.2d
+         facle v0.2s, v16.2s, v31.2s
+         facle v4.4s, v15.4s, v7.4s
+         facle v29.2d, v5.2d, v2.2d
+
+// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
+// CHECK: facge v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xec,0x2f,0x6e]
+// CHECK: facge v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xec,0x65,0x6e]
+// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
+// CHECK: facge v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xec,0x2f,0x6e]
+// CHECK: facge v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xec,0x65,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Absolute Compare Mask Less Than (Floating Point)
+// FACLT is alias for FACGT with operands reversed
+//----------------------------------------------------------------------
+         facgt v31.4s, v29.4s, v28.4s
+         facgt v3.2s, v8.2s, v12.2s
+         facgt v17.2d, v15.2d, v13.2d
+         faclt v31.4s, v28.4s, v29.4s
+         faclt v3.2s,  v12.2s, v8.2s
+         faclt v17.2d, v13.2d, v15.2d
+
+// CHECK: facgt v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xef,0xbc,0x6e]
+// CHECK: facgt v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xed,0xac,0x2e]
+// CHECK: facgt v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xed,0xed,0x6e]
+// CHECK: facgt v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xef,0xbc,0x6e]
+// CHECK: facgt v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xed,0xac,0x2e]
+// CHECK: facgt v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xed,0xed,0x6e]
+
+
diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s
new file mode 100644
index 000000000000..79fe5da5e76f
--- /dev/null
+++ b/test/MC/AArch64/neon-frsqrt-frecp.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Reciprocal Square Root Step (Floating Point)
+//----------------------------------------------------------------------
+         frsqrts v0.2s, v31.2s, v16.2s
+         frsqrts v4.4s, v7.4s, v15.4s
+         frsqrts v29.2d, v2.2d, v5.2d
+
+// CHECK: frsqrts v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xff,0xb0,0x0e]
+// CHECK: frsqrts v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xfc,0xaf,0x4e]
+// CHECK: frsqrts v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xfc,0xe5,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Reciprocal Step (Floating Point)
+//----------------------------------------------------------------------
+         frecps v31.4s, v29.4s, v28.4s
+         frecps v3.2s, v8.2s, v12.2s
+         frecps v17.2d, v15.2d, v13.2d
+
+// CHECK: frecps v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xff,0x3c,0x4e]
+// CHECK: frecps v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xfd,0x2c,0x0e]
+// CHECK: frecps v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xfd,0x6d,0x4e]
+
+
diff --git a/test/MC/AArch64/neon-halving-add-sub.s b/test/MC/AArch64/neon-halving-add-sub.s
new file mode 100644
index 000000000000..555f1b83b4f3
--- /dev/null
+++ b/test/MC/AArch64/neon-halving-add-sub.s
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Halving Add (Signed)
+//------------------------------------------------------------------------------
+         shadd v0.8b, v1.8b, v2.8b
+         shadd v0.16b, v1.16b, v2.16b
+         shadd v0.4h, v1.4h, v2.4h
+         shadd v0.8h, v1.8h, v2.8h
+         shadd v0.2s, v1.2s, v2.2s
+         shadd v0.4s, v1.4s, v2.4s
+
+// CHECK: shadd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x04,0x22,0x0e]
+// CHECK: shadd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x04,0x22,0x4e]
+// CHECK: shadd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x04,0x62,0x0e]
+// CHECK: shadd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x04,0x62,0x4e]
+// CHECK: shadd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x04,0xa2,0x0e]
+// CHECK: shadd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x04,0xa2,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Halving Add (Unsigned)
+//------------------------------------------------------------------------------
+         uhadd v0.8b, v1.8b, v2.8b
+         uhadd v0.16b, v1.16b, v2.16b
+         uhadd v0.4h, v1.4h, v2.4h
+         uhadd v0.8h, v1.8h, v2.8h
+         uhadd v0.2s, v1.2s, v2.2s
+         uhadd v0.4s, v1.4s, v2.4s
+
+// CHECK: uhadd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x04,0x22,0x2e]
+// CHECK: uhadd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x04,0x22,0x6e]
+// CHECK: uhadd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x04,0x62,0x2e]
+// CHECK: uhadd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x04,0x62,0x6e]
+// CHECK: uhadd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x04,0xa2,0x2e]
+// CHECK: uhadd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x04,0xa2,0x6e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Halving Sub (Signed)
+//------------------------------------------------------------------------------
+         shsub v0.8b, v1.8b, v2.8b
+         shsub v0.16b, v1.16b, v2.16b
+         shsub v0.4h, v1.4h, v2.4h
+         shsub v0.8h, v1.8h, v2.8h
+         shsub v0.2s, v1.2s, v2.2s
+         shsub v0.4s, v1.4s, v2.4s
+
+// CHECK: shsub v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x24,0x22,0x0e]
+// CHECK: shsub v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x24,0x22,0x4e]
+// CHECK: shsub v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x24,0x62,0x0e]
+// CHECK: shsub v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x24,0x62,0x4e]
+// CHECK: shsub v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x24,0xa2,0x0e]
+// CHECK: shsub v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x24,0xa2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Halving Sub (Unsigned)
+//------------------------------------------------------------------------------
+         uhsub v0.8b, v1.8b, v2.8b
+         uhsub v0.16b, v1.16b, v2.16b
+         uhsub v0.4h, v1.4h, v2.4h
+         uhsub v0.8h, v1.8h, v2.8h
+         uhsub v0.2s, v1.2s, v2.2s
+         uhsub v0.4s, v1.4s, v2.4s
+
+// CHECK: uhsub v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x24,0x22,0x2e]
+// CHECK: uhsub v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x24,0x22,0x6e]
+// CHECK: uhsub v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x24,0x62,0x2e]
+// CHECK: uhsub v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x24,0x62,0x6e]
+// CHECK: uhsub v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x24,0xa2,0x2e]
+// CHECK: uhsub v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x24,0xa2,0x6e]
+
diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s
new file mode 100644
index 000000000000..8d2dadb1997f
--- /dev/null
+++ b/test/MC/AArch64/neon-max-min-pairwise.s
@@ -0,0 +1,110 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Maximum Pairwise (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+         smaxp v0.8b, v1.8b, v2.8b
+         smaxp v0.16b, v1.16b, v2.16b
+         smaxp v0.4h, v1.4h, v2.4h
+         smaxp v0.8h, v1.8h, v2.8h
+         smaxp v0.2s, v1.2s, v2.2s
+         smaxp v0.4s, v1.4s, v2.4s
+
+// CHECK: smaxp v0.8b, v1.8b, v2.8b        // encoding: [0x20,0xa4,0x22,0x0e]
+// CHECK: smaxp v0.16b, v1.16b, v2.16b     // encoding: [0x20,0xa4,0x22,0x4e]
+// CHECK: smaxp v0.4h, v1.4h, v2.4h        // encoding: [0x20,0xa4,0x62,0x0e]
+// CHECK: smaxp v0.8h, v1.8h, v2.8h        // encoding: [0x20,0xa4,0x62,0x4e]
+// CHECK: smaxp v0.2s, v1.2s, v2.2s        // encoding: [0x20,0xa4,0xa2,0x0e]
+// CHECK: smaxp v0.4s, v1.4s, v2.4s        // encoding: [0x20,0xa4,0xa2,0x4e]
+
+         umaxp v0.8b, v1.8b, v2.8b
+         umaxp v0.16b, v1.16b, v2.16b
+         umaxp v0.4h, v1.4h, v2.4h
+         umaxp v0.8h, v1.8h, v2.8h
+         umaxp v0.2s, v1.2s, v2.2s
+         umaxp v0.4s, v1.4s, v2.4s
+
+// CHECK: umaxp v0.8b, v1.8b, v2.8b         // encoding: [0x20,0xa4,0x22,0x2e]
+// CHECK: umaxp v0.16b, v1.16b, v2.16b      // encoding: [0x20,0xa4,0x22,0x6e]
+// CHECK: umaxp v0.4h, v1.4h, v2.4h         // encoding: [0x20,0xa4,0x62,0x2e]
+// CHECK: umaxp v0.8h, v1.8h, v2.8h         // encoding: [0x20,0xa4,0x62,0x6e]
+// CHECK: umaxp v0.2s, v1.2s, v2.2s         // encoding: [0x20,0xa4,0xa2,0x2e]
+// CHECK: umaxp v0.4s, v1.4s, v2.4s         // encoding: [0x20,0xa4,0xa2,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Minimum Pairwise (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+         sminp v0.8b, v1.8b, v2.8b
+         sminp v0.16b, v1.16b, v2.16b
+         sminp v0.4h, v1.4h, v2.4h
+         sminp v0.8h, v1.8h, v2.8h
+         sminp v0.2s, v1.2s, v2.2s
+         sminp v0.4s, v1.4s, v2.4s
+
+// CHECK: sminp v0.8b, v1.8b, v2.8b        // encoding: [0x20,0xac,0x22,0x0e]
+// CHECK: sminp v0.16b, v1.16b, v2.16b     // encoding: [0x20,0xac,0x22,0x4e]
+// CHECK: sminp v0.4h, v1.4h, v2.4h        // encoding: [0x20,0xac,0x62,0x0e]
+// CHECK: sminp v0.8h, v1.8h, v2.8h        // encoding: [0x20,0xac,0x62,0x4e]
+// CHECK: sminp v0.2s, v1.2s, v2.2s        // encoding: [0x20,0xac,0xa2,0x0e]
+// CHECK: sminp v0.4s, v1.4s, v2.4s        // encoding: [0x20,0xac,0xa2,0x4e]
+
+         uminp v0.8b, v1.8b, v2.8b
+         uminp v0.16b, v1.16b, v2.16b
+         uminp v0.4h, v1.4h, v2.4h
+         uminp v0.8h, v1.8h, v2.8h
+         uminp v0.2s, v1.2s, v2.2s
+         uminp v0.4s, v1.4s, v2.4s
+
+// CHECK: uminp v0.8b, v1.8b, v2.8b         // encoding: [0x20,0xac,0x22,0x2e]
+// CHECK: uminp v0.16b, v1.16b, v2.16b      // encoding: [0x20,0xac,0x22,0x6e]
+// CHECK: uminp v0.4h, v1.4h, v2.4h         // encoding: [0x20,0xac,0x62,0x2e]
+// CHECK: uminp v0.8h, v1.8h, v2.8h         // encoding: [0x20,0xac,0x62,0x6e]
+// CHECK: uminp v0.2s, v1.2s, v2.2s         // encoding: [0x20,0xac,0xa2,0x2e]
+// CHECK: uminp v0.4s, v1.4s, v2.4s         // encoding: [0x20,0xac,0xa2,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Maximum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         fmaxp v0.2s, v1.2s, v2.2s
+         fmaxp v31.4s, v15.4s, v16.4s
+         fmaxp v7.2d, v8.2d, v25.2d
+
+// CHECK: fmaxp v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xf4,0x22,0x2e]
+// CHECK: fmaxp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x6e]
+// CHECK: fmaxp v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xf5,0x79,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Minimum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         fminp v10.2s, v15.2s, v22.2s
+         fminp v3.4s, v5.4s, v6.4s
+         fminp v17.2d, v13.2d, v2.2d
+
+// CHECK: fminp v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xf5,0xb6,0x2e]
+// CHECK: fminp v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xf4,0xa6,0x6e]
+// CHECK: fminp v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xf5,0xe2,0x6e]
+
+//----------------------------------------------------------------------
+// Vector maxNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         fmaxnmp v0.2s, v1.2s, v2.2s
+         fmaxnmp v31.4s, v15.4s, v16.4s
+         fmaxnmp v7.2d, v8.2d, v25.2d
+
+// CHECK: fmaxnmp v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xc4,0x22,0x2e]
+// CHECK: fmaxnmp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x6e]
+// CHECK: fmaxnmp v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xc5,0x79,0x6e]
+
+//----------------------------------------------------------------------
+// Vector minNum Pairwise (Floating Point)
+//----------------------------------------------------------------------
+         fminnmp v10.2s, v15.2s, v22.2s
+         fminnmp v3.4s, v5.4s, v6.4s
+         fminnmp v17.2d, v13.2d, v2.2d
+
+// CHECK: fminnmp v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xc5,0xb6,0x2e]
+// CHECK: fminnmp v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xc4,0xa6,0x6e]
+// CHECK: fminnmp v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xc5,0xe2,0x6e]
+
diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s
new file mode 100644
index 000000000000..6d1efde5077f
--- /dev/null
+++ b/test/MC/AArch64/neon-max-min.s
@@ -0,0 +1,110 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Maximum (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+         smax v0.8b, v1.8b, v2.8b
+         smax v0.16b, v1.16b, v2.16b
+         smax v0.4h, v1.4h, v2.4h
+         smax v0.8h, v1.8h, v2.8h
+         smax v0.2s, v1.2s, v2.2s
+         smax v0.4s, v1.4s, v2.4s
+
+// CHECK: smax v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x64,0x22,0x0e]
+// CHECK: smax v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x64,0x22,0x4e]
+// CHECK: smax v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x64,0x62,0x0e]
+// CHECK: smax v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x64,0x62,0x4e]
+// CHECK: smax v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x64,0xa2,0x0e]
+// CHECK: smax v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x64,0xa2,0x4e]
+
+         umax v0.8b, v1.8b, v2.8b
+         umax v0.16b, v1.16b, v2.16b
+         umax v0.4h, v1.4h, v2.4h
+         umax v0.8h, v1.8h, v2.8h
+         umax v0.2s, v1.2s, v2.2s
+         umax v0.4s, v1.4s, v2.4s
+
+// CHECK: umax v0.8b, v1.8b, v2.8b         // encoding: [0x20,0x64,0x22,0x2e]
+// CHECK: umax v0.16b, v1.16b, v2.16b      // encoding: [0x20,0x64,0x22,0x6e]
+// CHECK: umax v0.4h, v1.4h, v2.4h         // encoding: [0x20,0x64,0x62,0x2e]
+// CHECK: umax v0.8h, v1.8h, v2.8h         // encoding: [0x20,0x64,0x62,0x6e]
+// CHECK: umax v0.2s, v1.2s, v2.2s         // encoding: [0x20,0x64,0xa2,0x2e]
+// CHECK: umax v0.4s, v1.4s, v2.4s         // encoding: [0x20,0x64,0xa2,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Minimum (Signed and Unsigned Integer)
+//----------------------------------------------------------------------
+         smin v0.8b, v1.8b, v2.8b
+         smin v0.16b, v1.16b, v2.16b
+         smin v0.4h, v1.4h, v2.4h
+         smin v0.8h, v1.8h, v2.8h
+         smin v0.2s, v1.2s, v2.2s
+         smin v0.4s, v1.4s, v2.4s
+
+// CHECK: smin v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x6c,0x22,0x0e]
+// CHECK: smin v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x6c,0x22,0x4e]
+// CHECK: smin v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x6c,0x62,0x0e]
+// CHECK: smin v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x6c,0x62,0x4e]
+// CHECK: smin v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x6c,0xa2,0x0e]
+// CHECK: smin v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x6c,0xa2,0x4e]
+
+         umin v0.8b, v1.8b, v2.8b
+         umin v0.16b, v1.16b, v2.16b
+         umin v0.4h, v1.4h, v2.4h
+         umin v0.8h, v1.8h, v2.8h
+         umin v0.2s, v1.2s, v2.2s
+         umin v0.4s, v1.4s, v2.4s
+
+// CHECK: umin v0.8b, v1.8b, v2.8b         // encoding: [0x20,0x6c,0x22,0x2e]
+// CHECK: umin v0.16b, v1.16b, v2.16b      // encoding: [0x20,0x6c,0x22,0x6e]
+// CHECK: umin v0.4h, v1.4h, v2.4h         // encoding: [0x20,0x6c,0x62,0x2e]
+// CHECK: umin v0.8h, v1.8h, v2.8h         // encoding: [0x20,0x6c,0x62,0x6e]
+// CHECK: umin v0.2s, v1.2s, v2.2s         // encoding: [0x20,0x6c,0xa2,0x2e]
+// CHECK: umin v0.4s, v1.4s, v2.4s         // encoding: [0x20,0x6c,0xa2,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Maximum (Floating Point)
+//----------------------------------------------------------------------
+         fmax v0.2s, v1.2s, v2.2s
+         fmax v31.4s, v15.4s, v16.4s
+         fmax v7.2d, v8.2d, v25.2d
+
+// CHECK: fmax v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xf4,0x22,0x0e]
+// CHECK: fmax v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x4e]
+// CHECK: fmax v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xf5,0x79,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Minimum (Floating Point)
+//----------------------------------------------------------------------
+         fmin v10.2s, v15.2s, v22.2s
+         fmin v3.4s, v5.4s, v6.4s
+         fmin v17.2d, v13.2d, v2.2d
+
+// CHECK: fmin v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xf5,0xb6,0x0e]
+// CHECK: fmin v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xf4,0xa6,0x4e]
+// CHECK: fmin v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xf5,0xe2,0x4e]
+
+//----------------------------------------------------------------------
+// Vector maxNum (Floating Point)
+//----------------------------------------------------------------------
+         fmaxnm v0.2s, v1.2s, v2.2s
+         fmaxnm v31.4s, v15.4s, v16.4s
+         fmaxnm v7.2d, v8.2d, v25.2d
+
+// CHECK: fmaxnm v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xc4,0x22,0x0e]
+// CHECK: fmaxnm v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x4e]
+// CHECK: fmaxnm v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xc5,0x79,0x4e]
+
+//----------------------------------------------------------------------
+// Vector minNum (Floating Point)
+//----------------------------------------------------------------------
+         fminnm v10.2s, v15.2s, v22.2s
+         fminnm v3.4s, v5.4s, v6.4s
+         fminnm v17.2d, v13.2d, v2.2d
+
+// CHECK: fminnm v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xc5,0xb6,0x0e]
+// CHECK: fminnm v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xc4,0xa6,0x4e]
+// CHECK: fminnm v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xc5,0xe2,0x4e]
+
diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s
new file mode 100644
index 000000000000..3072e6f1200d
--- /dev/null
+++ b/test/MC/AArch64/neon-mla-mls-instructions.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Integer Multiply-accumulate
+//----------------------------------------------------------------------
+         mla v0.8b, v1.8b, v2.8b
+         mla v0.16b, v1.16b, v2.16b
+         mla v0.4h, v1.4h, v2.4h
+         mla v0.8h, v1.8h, v2.8h
+         mla v0.2s, v1.2s, v2.2s
+         mla v0.4s, v1.4s, v2.4s
+
+// CHECK: mla v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x94,0x22,0x0e]
+// CHECK: mla v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x94,0x22,0x4e]
+// CHECK: mla v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x94,0x62,0x0e]
+// CHECK: mla v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x94,0x62,0x4e]
+// CHECK: mla v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x94,0xa2,0x0e]
+// CHECK: mla v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x94,0xa2,0x4e]
+
+
+//----------------------------------------------------------------------
+// Vector Integer Multiply-subtract
+//----------------------------------------------------------------------
+         mls v0.8b, v1.8b, v2.8b
+         mls v0.16b, v1.16b, v2.16b
+         mls v0.4h, v1.4h, v2.4h
+         mls v0.8h, v1.8h, v2.8h
+         mls v0.2s, v1.2s, v2.2s
+         mls v0.4s, v1.4s, v2.4s
+
+// CHECK: mls v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x94,0x22,0x2e]
+// CHECK: mls v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x94,0x22,0x6e]
+// CHECK: mls v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x94,0x62,0x2e]
+// CHECK: mls v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x94,0x62,0x6e]
+// CHECK: mls v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x94,0xa2,0x2e]
+// CHECK: mls v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x94,0xa2,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Floating-Point Multiply-accumulate
+//----------------------------------------------------------------------
+         fmla v0.2s, v1.2s, v2.2s
+         fmla v0.4s, v1.4s, v2.4s
+         fmla v0.2d, v1.2d, v2.2d
+
+// CHECK: fmla v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xcc,0x22,0x0e]
+// CHECK: fmla v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xcc,0x22,0x4e]
+// CHECK: fmla v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xcc,0x62,0x4e]
+
+//----------------------------------------------------------------------
+// Vector Floating-Point Multiply-subtract
+//----------------------------------------------------------------------
+         fmls v0.2s, v1.2s, v2.2s
+         fmls v0.4s, v1.4s, v2.4s
+         fmls v0.2d, v1.2d, v2.2d
+
+// CHECK: fmls v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xcc,0xa2,0x0e]
+// CHECK: fmls v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xcc,0xa2,0x4e]
+// CHECK: fmls v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xcc,0xe2,0x4e]
+
diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s
new file mode 100644
index 000000000000..c2ca80322001
--- /dev/null
+++ b/test/MC/AArch64/neon-mov.s
@@ -0,0 +1,209 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//----------------------------------------------------------------------
+// Vector Move Immediate Shifted
+//----------------------------------------------------------------------
+         movi v0.2s, #1
+         movi v1.2s, #0
+         movi v15.2s, #1, lsl #8
+         movi v16.2s, #1, lsl #16
+         movi v31.2s, #1, lsl #24
+         movi v0.4s, #1
+         movi v0.4s, #1, lsl #8
+         movi v0.4s, #1, lsl #16
+         movi v0.4s, #1, lsl #24
+         movi v0.4h, #1
+         movi v0.4h, #1, lsl #8
+         movi v0.8h, #1
+         movi v0.8h, #1, lsl #8
+
+// CHECK:  movi v0.2s, #0x1           // encoding: [0x20,0x04,0x00,0x0f]
+// CHECK:  movi v1.2s, #0x0           // encoding: [0x01,0x04,0x00,0x0f]
+// CHECK:  movi v15.2s, #0x1, lsl #8  // encoding: [0x2f,0x24,0x00,0x0f]
+// CHECK:  movi v16.2s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x0f]
+// CHECK:  movi v31.2s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x0f]
+// CHECK:  movi v0.4s, #0x1           // encoding: [0x20,0x04,0x00,0x4f]
+// CHECK:  movi v0.4s, #0x1, lsl #8   // encoding: [0x20,0x24,0x00,0x4f]
+// CHECK:  movi v0.4s, #0x1, lsl #16  // encoding: [0x20,0x44,0x00,0x4f]
+// CHECK:  movi v0.4s, #0x1, lsl #24  // encoding: [0x20,0x64,0x00,0x4f]
+// CHECK:  movi v0.4h, #0x1           // encoding: [0x20,0x84,0x00,0x0f]
+// CHECK:  movi v0.4h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x0f]
+// CHECK:  movi v0.8h, #0x1           // encoding: [0x20,0x84,0x00,0x4f]
+// CHECK:  movi v0.8h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x4f]
+
+//----------------------------------------------------------------------
+// Vector Move Inverted Immediate Shifted
+//----------------------------------------------------------------------
+         mvni v0.2s, #1
+         mvni v1.2s, #0
+         mvni v0.2s, #1, lsl #8
+         mvni v0.2s, #1, lsl #16
+         mvni v0.2s, #1, lsl #24
+         mvni v0.4s, #1
+         mvni v15.4s, #1, lsl #8
+         mvni v16.4s, #1, lsl #16
+         mvni v31.4s, #1, lsl #24
+         mvni v0.4h, #1
+         mvni v0.4h, #1, lsl #8
+         mvni v0.8h, #1
+         mvni v0.8h, #1, lsl #8
+
+// CHECK:  mvni v0.2s, #0x1           // encoding: [0x20,0x04,0x00,0x2f]
+// CHECK:  mvni v1.2s, #0x0           // encoding: [0x01,0x04,0x00,0x2f]
+// CHECK:  mvni v0.2s, #0x1, lsl #8   // encoding: [0x20,0x24,0x00,0x2f]
+// CHECK:  mvni v0.2s, #0x1, lsl #16  // encoding: [0x20,0x44,0x00,0x2f]
+// CHECK:  mvni v0.2s, #0x1, lsl #24  // encoding: [0x20,0x64,0x00,0x2f]
+// CHECK:  mvni v0.4s, #0x1           // encoding: [0x20,0x04,0x00,0x6f]
+// CHECK:  mvni v15.4s, #0x1, lsl #8  // encoding: [0x2f,0x24,0x00,0x6f]
+// CHECK:  mvni v16.4s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x6f]
+// CHECK:  mvni v31.4s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x6f]
+// CHECK:  mvni v0.4h, #0x1           // encoding: [0x20,0x84,0x00,0x2f]
+// CHECK:  mvni v0.4h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x2f]
+// CHECK:  mvni v0.8h, #0x1           // encoding: [0x20,0x84,0x00,0x6f]
+// CHECK:  mvni v0.8h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x6f]
+
+//----------------------------------------------------------------------
+// Vector Bitwise Bit Clear (AND NOT) - immediate
+//----------------------------------------------------------------------
+         bic v0.2s, #1
+         bic v1.2s, #0
+         bic v0.2s, #1, lsl #8
+         bic v0.2s, #1, lsl #16
+         bic v0.2s, #1, lsl #24
+         bic v0.4s, #1
+         bic v0.4s, #1, lsl #8
+         bic v0.4s, #1, lsl #16
+         bic v0.4s, #1, lsl #24
+         bic v15.4h, #1
+         bic v16.4h, #1, lsl #8
+         bic v0.8h, #1
+         bic v31.8h, #1, lsl #8
+
+// CHECK:  bic v0.2s, #0x1           // encoding: [0x20,0x14,0x00,0x2f]
+// CHECK:  bic v1.2s, #0x0           // encoding: [0x01,0x14,0x00,0x2f]
+// CHECK:  bic v0.2s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x2f]
+// CHECK:  bic v0.2s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x2f]
+// CHECK:  bic v0.2s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x2f]
+// CHECK:  bic v0.4s, #0x1           // encoding: [0x20,0x14,0x00,0x6f]
+// CHECK:  bic v0.4s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x6f]
+// CHECK:  bic v0.4s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x6f]
+// CHECK:  bic v0.4s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x6f]
+// CHECK:  bic v15.4h, #0x1          // encoding: [0x2f,0x94,0x00,0x2f]
+// CHECK:  bic v16.4h, #0x1, lsl #8  // encoding: [0x30,0xb4,0x00,0x2f]
+// CHECK:  bic v0.8h, #0x1           // encoding: [0x20,0x94,0x00,0x6f]
+// CHECK:  bic v31.8h, #0x1, lsl #8  // encoding: [0x3f,0xb4,0x00,0x6f]
+
+//----------------------------------------------------------------------
+// Vector Bitwise OR - immedidate
+//----------------------------------------------------------------------
+         orr v0.2s, #1
+         orr v1.2s, #0
+         orr v0.2s, #1, lsl #8
+         orr v0.2s, #1, lsl #16
+         orr v0.2s, #1, lsl #24
+         orr v0.4s, #1
+         orr v0.4s, #1, lsl #8
+         orr v0.4s, #1, lsl #16
+         orr v0.4s, #1, lsl #24
+         orr v31.4h, #1
+         orr v15.4h, #1, lsl #8
+         orr v0.8h, #1
+         orr v16.8h, #1, lsl #8
+
+// CHECK:  orr v0.2s, #0x1           // encoding: [0x20,0x14,0x00,0x0f]
+// CHECK:  orr v1.2s, #0x0           // encoding: [0x01,0x14,0x00,0x0f]
+// CHECK:  orr v0.2s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x0f]
+// CHECK:  orr v0.2s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x0f]
+// CHECK:  orr v0.2s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x0f]
+// CHECK:  orr v0.4s, #0x1           // encoding: [0x20,0x14,0x00,0x4f]
+// CHECK:  orr v0.4s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x4f]
+// CHECK:  orr v0.4s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x4f]
+// CHECK:  orr v0.4s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x4f]
+// CHECK:  orr v31.4h, #0x1          // encoding: [0x3f,0x94,0x00,0x0f]
+// CHECK:  orr v15.4h, #0x1, lsl #8  // encoding: [0x2f,0xb4,0x00,0x0f]
+// CHECK:  orr v0.8h, #0x1           // encoding: [0x20,0x94,0x00,0x4f]
+// CHECK:  orr v16.8h, #0x1, lsl #8  // encoding: [0x30,0xb4,0x00,0x4f]
+
+//----------------------------------------------------------------------
+// Vector Move Immediate Masked
+//----------------------------------------------------------------------
+         movi v0.2s, #1, msl #8
+         movi v1.2s, #1, msl #16
+         movi v0.4s, #1, msl #8
+         movi v31.4s, #1, msl #16
+
+// CHECK:  movi v0.2s, #0x1, msl #8   // encoding: [0x20,0xc4,0x00,0x0f]
+// CHECK:  movi v1.2s, #0x1, msl #16  // encoding: [0x21,0xd4,0x00,0x0f]
+// CHECK:  movi v0.4s, #0x1, msl #8   // encoding: [0x20,0xc4,0x00,0x4f]
+// CHECK:  movi v31.4s, #0x1, msl #16 // encoding: [0x3f,0xd4,0x00,0x4f]
+
+//----------------------------------------------------------------------
+// Vector Move Inverted Immediate Masked
+//----------------------------------------------------------------------
+         mvni v1.2s, #0x1, msl #8
+         mvni v0.2s, #0x1, msl #16
+         mvni v31.4s, #0x1, msl #8
+         mvni v0.4s, #0x1, msl #16
+
+// CHECK:   mvni v1.2s, #0x1, msl #8  // encoding: [0x21,0xc4,0x00,0x2f]
+// CHECK:   mvni v0.2s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x2f]
+// CHECK:   mvni v31.4s, #0x1, msl #8 // encoding: [0x3f,0xc4,0x00,0x6f]
+// CHECK:   mvni v0.4s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x6f]
+
+//----------------------------------------------------------------------
+// Vector Immediate - per byte
+//----------------------------------------------------------------------
+         movi v0.8b, #0
+         movi v31.8b, #0xff
+         movi v15.16b, #0xf
+         movi v31.16b, #0x1f
+
+// CHECK:   movi v0.8b, #0x0        // encoding: [0x00,0xe4,0x00,0x0f]
+// CHECK:   movi v31.8b, #0xff      // encoding: [0xff,0xe7,0x07,0x0f]
+// CHECK:   movi v15.16b, #0xf      // encoding: [0xef,0xe5,0x00,0x4f]
+// CHECK:   movi v31.16b, #0x1f     // encoding: [0xff,0xe7,0x00,0x4f]
+
+//----------------------------------------------------------------------
+// Vector Move Immediate - bytemask, per doubleword
+//---------------------------------------------------------------------
+         movi v0.2d, #0xff00ff00ff00ff00
+
+// CHECK: movi v0.2d, #0xff00ff00ff00ff00 // encoding: [0x40,0xe5,0x05,0x6f]
+
+//----------------------------------------------------------------------
+// Vector Move Immediate - bytemask, one doubleword
+//----------------------------------------------------------------------
+         movi d0, #0xff00ff00ff00ff00
+
+// CHECK: movi d0,  #0xff00ff00ff00ff00 // encoding: [0x40,0xe5,0x05,0x2f]
+
+//----------------------------------------------------------------------
+// Vector Floating Point Move Immediate
+//----------------------------------------------------------------------
+         fmov v1.2s, #1.0
+         fmov v15.4s, #1.0
+         fmov v31.2d, #1.0
+
+// CHECK:  fmov v1.2s, #1.00000000     // encoding: [0x01,0xf6,0x03,0x0f]
+// CHECK:  fmov v15.4s, #1.00000000    // encoding: [0x0f,0xf6,0x03,0x4f]
+// CHECK:  fmov v31.2d, #1.00000000    // encoding: [0x1f,0xf6,0x03,0x6f]
+
+
+//----------------------------------------------------------------------
+// Vector Move -  register
+//----------------------------------------------------------------------
+
+      // FIXME: these should all print with the "mov" syntax.
+      mov v0.8b, v31.8b
+      mov v15.16b, v16.16b
+      orr v0.8b, v31.8b, v31.8b
+      orr v15.16b, v16.16b, v16.16b
+
+// CHECK:   orr v0.8b, v31.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
+// CHECK:   orr v15.16b, v16.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
+// CHECK:   orr v0.8b, v31.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
+// CHECK:   orr v15.16b, v16.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
+
diff --git a/test/MC/AArch64/neon-mul-div-instructions.s b/test/MC/AArch64/neon-mul-div-instructions.s
new file mode 100644
index 000000000000..1fe6d2b819ce
--- /dev/null
+++ b/test/MC/AArch64/neon-mul-div-instructions.s
@@ -0,0 +1,86 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Vector Integer Mul
+//----------------------------------------------------------------------
+         mul v0.8b, v1.8b, v2.8b
+         mul v0.16b, v1.16b, v2.16b
+         mul v0.4h, v1.4h, v2.4h
+         mul v0.8h, v1.8h, v2.8h
+         mul v0.2s, v1.2s, v2.2s
+         mul v0.4s, v1.4s, v2.4s
+
+// CHECK: mul v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x9c,0x22,0x0e]
+// CHECK: mul v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x9c,0x22,0x4e]
+// CHECK: mul v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x9c,0x62,0x0e]
+// CHECK: mul v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x9c,0x62,0x4e]
+// CHECK: mul v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x9c,0xa2,0x0e]
+// CHECK: mul v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x9c,0xa2,0x4e]
+
+
+//----------------------------------------------------------------------
+// Vector Floating-Point Mul
+//----------------------------------------------------------------------
+         fmul v0.2s, v1.2s, v2.2s
+         fmul v0.4s, v1.4s, v2.4s
+         fmul v0.2d, v1.2d, v2.2d
+
+// CHECK: fmul v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xdc,0x22,0x2e]
+// CHECK: fmul v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xdc,0x22,0x6e]
+// CHECK: fmul v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xdc,0x62,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Floating-Point Div
+//----------------------------------------------------------------------
+         fdiv v0.2s, v1.2s, v2.2s
+         fdiv v0.4s, v1.4s, v2.4s
+         fdiv v0.2d, v1.2d, v2.2d
+
+// CHECK: fdiv v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xfc,0x22,0x2e]
+// CHECK: fdiv v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xfc,0x22,0x6e]
+// CHECK: fdiv v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xfc,0x62,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Multiply (Polynomial)
+//----------------------------------------------------------------------
+         pmul v17.8b, v31.8b, v16.8b
+         pmul v0.16b, v1.16b, v2.16b
+
+// CHECK: pmul v17.8b, v31.8b, v16.8b     // encoding: [0xf1,0x9f,0x30,0x2e]
+// CHECK: pmul v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x9c,0x22,0x6e]
+
+//----------------------------------------------------------------------
+// Vector Saturating Doubling Multiply High
+//----------------------------------------------------------------------
+         sqdmulh v2.4h, v25.4h, v3.4h
+         sqdmulh v12.8h, v5.8h, v13.8h
+         sqdmulh v3.2s, v1.2s, v30.2s
+
+// CHECK: sqdmulh v2.4h, v25.4h, v3.4h    // encoding: [0x22,0xb7,0x63,0x0e]
+// CHECK: sqdmulh v12.8h, v5.8h, v13.8h   // encoding: [0xac,0xb4,0x6d,0x4e]
+// CHECK: sqdmulh v3.2s, v1.2s, v30.2s    // encoding: [0x23,0xb4,0xbe,0x0e]
+
+//----------------------------------------------------------------------
+// Vector Saturating Rouding Doubling Multiply High
+//----------------------------------------------------------------------
+         sqrdmulh v2.4h, v25.4h, v3.4h
+         sqrdmulh v12.8h, v5.8h, v13.8h
+         sqrdmulh v3.2s, v1.2s, v30.2s
+
+// CHECK: sqrdmulh v2.4h, v25.4h, v3.4h    // encoding: [0x22,0xb7,0x63,0x2e]
+// CHECK: sqrdmulh v12.8h, v5.8h, v13.8h   // encoding: [0xac,0xb4,0x6d,0x6e]
+// CHECK: sqrdmulh v3.2s, v1.2s, v30.2s    // encoding: [0x23,0xb4,0xbe,0x2e]
+
+//----------------------------------------------------------------------
+// Vector Multiply Extended
+//----------------------------------------------------------------------
+      fmulx v21.2s, v5.2s, v13.2s
+      fmulx v1.4s, v25.4s, v3.4s
+      fmulx v31.2d, v22.2d, v2.2d
+
+// CHECK: fmulx v21.2s, v5.2s, v13.2s // encoding: [0xb5,0xdc,0x2d,0x0e]
+// CHECK: fmulx v1.4s, v25.4s, v3.4s // encoding: [0x21,0xdf,0x23,0x4e]
+// CHECK: fmulx v31.2d, v22.2d, v2.2d // encoding: [0xdf,0xde,0x62,0x4e]
+
diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s
new file mode 100644
index 000000000000..20a4acde37fa
--- /dev/null
+++ b/test/MC/AArch64/neon-perm.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions for permute
+//------------------------------------------------------------------------------
+
+        uzp1 v0.8b, v1.8b, v2.8b
+        uzp1 v0.16b, v1.16b, v2.16b
+        uzp1 v0.4h, v1.4h, v2.4h
+        uzp1 v0.8h, v1.8h, v2.8h
+        uzp1 v0.2s, v1.2s, v2.2s
+        uzp1 v0.4s, v1.4s, v2.4s
+        uzp1 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x18,0x02,0x0e]
+// CHECK: uzp1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x18,0x02,0x4e]
+// CHECK: uzp1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x18,0x42,0x0e]
+// CHECK: uzp1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x18,0x42,0x4e]
+// CHECK: uzp1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x18,0x82,0x0e]
+// CHECK: uzp1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x18,0x82,0x4e]
+// CHECK: uzp1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x18,0xc2,0x4e]
+
+        trn1 v0.8b, v1.8b, v2.8b
+        trn1 v0.16b, v1.16b, v2.16b
+        trn1 v0.4h, v1.4h, v2.4h
+        trn1 v0.8h, v1.8h, v2.8h
+        trn1 v0.2s, v1.2s, v2.2s
+        trn1 v0.4s, v1.4s, v2.4s
+        trn1 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x28,0x02,0x0e]
+// CHECK: trn1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x28,0x02,0x4e]
+// CHECK: trn1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x28,0x42,0x0e]
+// CHECK: trn1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x28,0x42,0x4e]
+// CHECK: trn1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x28,0x82,0x0e]
+// CHECK: trn1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x28,0x82,0x4e]
+// CHECK: trn1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x28,0xc2,0x4e]
+
+        zip1 v0.8b, v1.8b, v2.8b
+        zip1 v0.16b, v1.16b, v2.16b
+        zip1 v0.4h, v1.4h, v2.4h
+        zip1 v0.8h, v1.8h, v2.8h
+        zip1 v0.2s, v1.2s, v2.2s
+        zip1 v0.4s, v1.4s, v2.4s
+        zip1 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip1	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x38,0x02,0x0e]
+// CHECK: zip1	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x38,0x02,0x4e]
+// CHECK: zip1	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x38,0x42,0x0e]
+// CHECK: zip1	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x38,0x42,0x4e]
+// CHECK: zip1	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x38,0x82,0x0e]
+// CHECK: zip1	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x38,0x82,0x4e]
+// CHECK: zip1	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x38,0xc2,0x4e]
+
+        uzp2 v0.8b, v1.8b, v2.8b
+        uzp2 v0.16b, v1.16b, v2.16b
+        uzp2 v0.4h, v1.4h, v2.4h
+        uzp2 v0.8h, v1.8h, v2.8h
+        uzp2 v0.2s, v1.2s, v2.2s
+        uzp2 v0.4s, v1.4s, v2.4s
+        uzp2 v0.2d, v1.2d, v2.2d
+
+// CHECK: uzp2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x58,0x02,0x0e]
+// CHECK: uzp2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x58,0x02,0x4e]
+// CHECK: uzp2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x58,0x42,0x0e]
+// CHECK: uzp2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x58,0x42,0x4e]
+// CHECK: uzp2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x58,0x82,0x0e]
+// CHECK: uzp2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x58,0x82,0x4e]
+// CHECK: uzp2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x58,0xc2,0x4e]
+
+        trn2 v0.8b, v1.8b, v2.8b
+        trn2 v0.16b, v1.16b, v2.16b
+        trn2 v0.4h, v1.4h, v2.4h
+        trn2 v0.8h, v1.8h, v2.8h
+        trn2 v0.2s, v1.2s, v2.2s
+        trn2 v0.4s, v1.4s, v2.4s
+        trn2 v0.2d, v1.2d, v2.2d
+
+// CHECK: trn2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x68,0x02,0x0e]
+// CHECK: trn2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x68,0x02,0x4e]
+// CHECK: trn2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x68,0x42,0x0e]
+// CHECK: trn2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x68,0x42,0x4e]
+// CHECK: trn2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x68,0x82,0x0e]
+// CHECK: trn2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x68,0x82,0x4e]
+// CHECK: trn2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x68,0xc2,0x4e]
+
+        zip2 v0.8b, v1.8b, v2.8b
+        zip2 v0.16b, v1.16b, v2.16b
+        zip2 v0.4h, v1.4h, v2.4h
+        zip2 v0.8h, v1.8h, v2.8h
+        zip2 v0.2s, v1.2s, v2.2s
+        zip2 v0.4s, v1.4s, v2.4s
+        zip2 v0.2d, v1.2d, v2.2d
+
+// CHECK: zip2	v0.8b, v1.8b, v2.8b     // encoding: [0x20,0x78,0x02,0x0e]
+// CHECK: zip2	v0.16b, v1.16b, v2.16b  // encoding: [0x20,0x78,0x02,0x4e]
+// CHECK: zip2	v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x78,0x42,0x0e]
+// CHECK: zip2	v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x78,0x42,0x4e]
+// CHECK: zip2	v0.2s, v1.2s, v2.2s     // encoding: [0x20,0x78,0x82,0x0e]
+// CHECK: zip2	v0.4s, v1.4s, v2.4s     // encoding: [0x20,0x78,0x82,0x4e]
+// CHECK: zip2	v0.2d, v1.2d, v2.2d     // encoding: [0x20,0x78,0xc2,0x4e]
diff --git a/test/MC/AArch64/neon-rounding-halving-add.s b/test/MC/AArch64/neon-rounding-halving-add.s
new file mode 100644
index 000000000000..47ac21268020
--- /dev/null
+++ b/test/MC/AArch64/neon-rounding-halving-add.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Rouding Halving Add (Signed)
+//------------------------------------------------------------------------------
+         srhadd v0.8b, v1.8b, v2.8b
+         srhadd v0.16b, v1.16b, v2.16b
+         srhadd v0.4h, v1.4h, v2.4h
+         srhadd v0.8h, v1.8h, v2.8h
+         srhadd v0.2s, v1.2s, v2.2s
+         srhadd v0.4s, v1.4s, v2.4s
+
+// CHECK: srhadd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x14,0x22,0x0e]
+// CHECK: srhadd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x14,0x22,0x4e]
+// CHECK: srhadd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x14,0x62,0x0e]
+// CHECK: srhadd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x14,0x62,0x4e]
+// CHECK: srhadd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x14,0xa2,0x0e]
+// CHECK: srhadd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x14,0xa2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Rouding Halving Add (Unsigned)
+//------------------------------------------------------------------------------
+         urhadd v0.8b, v1.8b, v2.8b
+         urhadd v0.16b, v1.16b, v2.16b
+         urhadd v0.4h, v1.4h, v2.4h
+         urhadd v0.8h, v1.8h, v2.8h
+         urhadd v0.2s, v1.2s, v2.2s
+         urhadd v0.4s, v1.4s, v2.4s
+
+// CHECK: urhadd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x14,0x22,0x2e]
+// CHECK: urhadd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x14,0x22,0x6e]
+// CHECK: urhadd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x14,0x62,0x2e]
+// CHECK: urhadd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x14,0x62,0x6e]
+// CHECK: urhadd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x14,0xa2,0x2e]
+// CHECK: urhadd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x14,0xa2,0x6e]
+
diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s
new file mode 100644
index 000000000000..e70f766f2b62
--- /dev/null
+++ b/test/MC/AArch64/neon-rounding-shift.s
@@ -0,0 +1,45 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         srshl v0.8b, v1.8b, v2.8b
+         srshl v0.16b, v1.16b, v2.16b
+         srshl v0.4h, v1.4h, v2.4h
+         srshl v0.8h, v1.8h, v2.8h
+         srshl v0.2s, v1.2s, v2.2s
+         srshl v0.4s, v1.4s, v2.4s
+         srshl v0.2d, v1.2d, v2.2d
+
+// CHECK: srshl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x54,0x22,0x0e]
+// CHECK: srshl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x54,0x22,0x4e]
+// CHECK: srshl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x54,0x62,0x0e]
+// CHECK: srshl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x54,0x62,0x4e]
+// CHECK: srshl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x54,0xa2,0x0e]
+// CHECK: srshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x54,0xa2,0x4e]
+// CHECK: srshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x54,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         urshl v0.8b, v1.8b, v2.8b
+         urshl v0.16b, v1.16b, v2.16b
+         urshl v0.4h, v1.4h, v2.4h
+         urshl v0.8h, v1.8h, v2.8h
+         urshl v0.2s, v1.2s, v2.2s
+         urshl v0.4s, v1.4s, v2.4s
+         urshl v0.2d, v1.2d, v2.2d
+
+// CHECK: urshl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x54,0x22,0x2e]
+// CHECK: urshl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x54,0x22,0x6e]
+// CHECK: urshl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x54,0x62,0x2e]
+// CHECK: urshl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x54,0x62,0x6e]
+// CHECK: urshl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x54,0xa2,0x2e]
+// CHECK: urshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x54,0xa2,0x6e]
+// CHECK: urshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x54,0xe2,0x6e]
+
+
+
diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s
new file mode 100644
index 000000000000..4a7ed1094262
--- /dev/null
+++ b/test/MC/AArch64/neon-saturating-add-sub.s
@@ -0,0 +1,82 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Add (Signed)
+//------------------------------------------------------------------------------
+         sqadd v0.8b, v1.8b, v2.8b
+         sqadd v0.16b, v1.16b, v2.16b
+         sqadd v0.4h, v1.4h, v2.4h
+         sqadd v0.8h, v1.8h, v2.8h
+         sqadd v0.2s, v1.2s, v2.2s
+         sqadd v0.4s, v1.4s, v2.4s
+         sqadd v0.2d, v1.2d, v2.2d
+
+// CHECK: sqadd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x0c,0x22,0x0e]
+// CHECK: sqadd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x0c,0x22,0x4e]
+// CHECK: sqadd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x0c,0x62,0x0e]
+// CHECK: sqadd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x0c,0x62,0x4e]
+// CHECK: sqadd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x0c,0xa2,0x0e]
+// CHECK: sqadd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x0c,0xa2,0x4e]
+// CHECK: sqadd v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x0c,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Add (Unsigned)
+//------------------------------------------------------------------------------
+         uqadd v0.8b, v1.8b, v2.8b
+         uqadd v0.16b, v1.16b, v2.16b
+         uqadd v0.4h, v1.4h, v2.4h
+         uqadd v0.8h, v1.8h, v2.8h
+         uqadd v0.2s, v1.2s, v2.2s
+         uqadd v0.4s, v1.4s, v2.4s
+         uqadd v0.2d, v1.2d, v2.2d
+
+// CHECK: uqadd v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x0c,0x22,0x2e]
+// CHECK: uqadd v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x0c,0x22,0x6e]
+// CHECK: uqadd v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x0c,0x62,0x2e]
+// CHECK: uqadd v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x0c,0x62,0x6e]
+// CHECK: uqadd v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x0c,0xa2,0x2e]
+// CHECK: uqadd v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x0c,0xa2,0x6e]
+// CHECK: uqadd v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x0c,0xe2,0x6e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Sub (Signed)
+//------------------------------------------------------------------------------
+         sqsub v0.8b, v1.8b, v2.8b
+         sqsub v0.16b, v1.16b, v2.16b
+         sqsub v0.4h, v1.4h, v2.4h
+         sqsub v0.8h, v1.8h, v2.8h
+         sqsub v0.2s, v1.2s, v2.2s
+         sqsub v0.4s, v1.4s, v2.4s
+         sqsub v0.2d, v1.2d, v2.2d
+
+// CHECK: sqsub v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x2c,0x22,0x0e]
+// CHECK: sqsub v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x2c,0x22,0x4e]
+// CHECK: sqsub v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x2c,0x62,0x0e]
+// CHECK: sqsub v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x2c,0x62,0x4e]
+// CHECK: sqsub v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x2c,0xa2,0x0e]
+// CHECK: sqsub v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x2c,0xa2,0x4e]
+// CHECK: sqsub v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x2c,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Sub (Unsigned)
+//------------------------------------------------------------------------------
+         uqsub v0.8b, v1.8b, v2.8b
+         uqsub v0.16b, v1.16b, v2.16b
+         uqsub v0.4h, v1.4h, v2.4h
+         uqsub v0.8h, v1.8h, v2.8h
+         uqsub v0.2s, v1.2s, v2.2s
+         uqsub v0.4s, v1.4s, v2.4s
+         uqsub v0.2d, v1.2d, v2.2d
+
+// CHECK: uqsub v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x2c,0x22,0x2e]
+// CHECK: uqsub v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x2c,0x22,0x6e]
+// CHECK: uqsub v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x2c,0x62,0x2e]
+// CHECK: uqsub v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x2c,0x62,0x6e]
+// CHECK: uqsub v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x2c,0xa2,0x2e]
+// CHECK: uqsub v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x2c,0xa2,0x6e]
+// CHECK: uqsub v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x2c,0xe2,0x6e]
+
+
diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s
new file mode 100644
index 000000000000..9215c1cabefd
--- /dev/null
+++ b/test/MC/AArch64/neon-saturating-rounding-shift.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqrshl v0.8b, v1.8b, v2.8b
+         sqrshl v0.16b, v1.16b, v2.16b
+         sqrshl v0.4h, v1.4h, v2.4h
+         sqrshl v0.8h, v1.8h, v2.8h
+         sqrshl v0.2s, v1.2s, v2.2s
+         sqrshl v0.4s, v1.4s, v2.4s
+         sqrshl v0.2d, v1.2d, v2.2d
+
+// CHECK: sqrshl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x5c,0x22,0x0e]
+// CHECK: sqrshl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x5c,0x22,0x4e]
+// CHECK: sqrshl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x5c,0x62,0x0e]
+// CHECK: sqrshl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x5c,0x62,0x4e]
+// CHECK: sqrshl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x5c,0xa2,0x0e]
+// CHECK: sqrshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x5c,0xa2,0x4e]
+// CHECK: sqrshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x5c,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqrshl v0.8b, v1.8b, v2.8b
+         uqrshl v0.16b, v1.16b, v2.16b
+         uqrshl v0.4h, v1.4h, v2.4h
+         uqrshl v0.8h, v1.8h, v2.8h
+         uqrshl v0.2s, v1.2s, v2.2s
+         uqrshl v0.4s, v1.4s, v2.4s
+         uqrshl v0.2d, v1.2d, v2.2d
+
+// CHECK: uqrshl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x5c,0x22,0x2e]
+// CHECK: uqrshl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x5c,0x22,0x6e]
+// CHECK: uqrshl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x5c,0x62,0x2e]
+// CHECK: uqrshl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x5c,0x62,0x6e]
+// CHECK: uqrshl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x5c,0xa2,0x2e]
+// CHECK: uqrshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x5c,0xa2,0x6e]
+// CHECK: uqrshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x5c,0xe2,0x6e]
+
diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s
new file mode 100644
index 000000000000..9ae393a040b6
--- /dev/null
+++ b/test/MC/AArch64/neon-saturating-shift.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqshl v0.8b, v1.8b, v2.8b
+         sqshl v0.16b, v1.16b, v2.16b
+         sqshl v0.4h, v1.4h, v2.4h
+         sqshl v0.8h, v1.8h, v2.8h
+         sqshl v0.2s, v1.2s, v2.2s
+         sqshl v0.4s, v1.4s, v2.4s
+         sqshl v0.2d, v1.2d, v2.2d
+
+// CHECK: sqshl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x4c,0x22,0x0e]
+// CHECK: sqshl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x4c,0x22,0x4e]
+// CHECK: sqshl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x4c,0x62,0x0e]
+// CHECK: sqshl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x4c,0x62,0x4e]
+// CHECK: sqshl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x4c,0xa2,0x0e]
+// CHECK: sqshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x4c,0xa2,0x4e]
+// CHECK: sqshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x4c,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Saturating Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqshl v0.8b, v1.8b, v2.8b
+         uqshl v0.16b, v1.16b, v2.16b
+         uqshl v0.4h, v1.4h, v2.4h
+         uqshl v0.8h, v1.8h, v2.8h
+         uqshl v0.2s, v1.2s, v2.2s
+         uqshl v0.4s, v1.4s, v2.4s
+         uqshl v0.2d, v1.2d, v2.2d
+
+// CHECK: uqshl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x4c,0x22,0x2e]
+// CHECK: uqshl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x4c,0x22,0x6e]
+// CHECK: uqshl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x4c,0x62,0x2e]
+// CHECK: uqshl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x4c,0x62,0x6e]
+// CHECK: uqshl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x4c,0xa2,0x2e]
+// CHECK: uqshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x4c,0xa2,0x6e]
+// CHECK: uqshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x4c,0xe2,0x6e]
+
diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s
new file mode 100644
index 000000000000..d08756c0c10c
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-abs.s
@@ -0,0 +1,35 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Absolute Value
+//----------------------------------------------------------------------
+
+    abs d29, d24
+
+// CHECK: abs d29, d24    // encoding: [0x1d,0xbb,0xe0,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Difference
+//----------------------------------------------------------------------
+
+    fabd s29, s24, s20
+    fabd d29, d24, d20
+
+// CHECK: fabd s29, s24, s20  // encoding: [0x1d,0xd7,0xb4,0x7e]
+// CHECK: fabd d29, d24, d20  // encoding: [0x1d,0xd7,0xf4,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Absolute Value
+//----------------------------------------------------------------------
+
+    sqabs b19, b14
+    sqabs h21, h15
+    sqabs s20, s12
+    sqabs d18, d12
+
+// CHECK: sqabs b19, b14    // encoding: [0xd3,0x79,0x20,0x5e]
+// CHECK: sqabs h21, h15    // encoding: [0xf5,0x79,0x60,0x5e]
+// CHECK: sqabs s20, s12    // encoding: [0x94,0x79,0xa0,0x5e]
+// CHECK: sqabs d18, d12    // encoding: [0x92,0x79,0xe0,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s
new file mode 100644
index 000000000000..0a3eba732122
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-add-sub.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Add
+//------------------------------------------------------------------------------
+         add d31, d0, d16
+
+// CHECK: add d31, d0, d16       // encoding: [0x1f,0x84,0xf0,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Sub
+//------------------------------------------------------------------------------
+         sub d1, d7, d8
+
+// CHECK: sub d1, d7, d8       // encoding: [0xe1,0x84,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s
new file mode 100644
index 000000000000..fec9d12d8b8d
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Floating Point fused multiply-add (scalar, by element)
+//------------------------------------------------------------------------------
+    fmla    s0, s1, v1.s[0]
+    fmla    s30, s11, v1.s[1]
+    fmla    s4, s5, v7.s[2]
+    fmla    s16, s22, v16.s[3]
+    fmla    d0, d1, v1.d[0]
+    fmla    d30, d11, v1.d[1]
+
+// CHECK: fmla    s0, s1, v1.s[0]         // encoding: [0x20,0x10,0x81,0x5f]
+// CHECK: fmla    s30, s11, v1.s[1]       // encoding: [0x7e,0x11,0xa1,0x5f]
+// CHECK: fmla    s4, s5, v7.s[2]         // encoding: [0xa4,0x18,0x87,0x5f]
+// CHECK: fmla    s16, s22, v16.s[3]      // encoding: [0xd0,0x1a,0xb0,0x5f]
+// CHECK: fmla    d0, d1, v1.d[0]         // encoding: [0x20,0x10,0xc1,0x5f]
+// CHECK: fmla    d30, d11, v1.d[1]       // encoding: [0x7e,0x19,0xc1,0x5f]
+ 
+//------------------------------------------------------------------------------
+// Floating Point fused multiply-subtract (scalar, by element)
+//------------------------------------------------------------------------------
+
+    fmls    s2, s3, v4.s[0]
+    fmls    s29, s10, v28.s[1]      
+    fmls    s5, s12, v23.s[2]       
+    fmls    s7, s17, v26.s[3]       
+    fmls    d0, d1, v1.d[0]         
+    fmls    d30, d11, v1.d[1]       
+
+// CHECK: fmls    s2, s3, v4.s[0]     // encoding: [0x62,0x50,0x84,0x5f]
+// CHECK: fmls    s29, s10, v28.s[1]  // encoding: [0x5d,0x51,0xbc,0x5f]
+// CHECK: fmls    s5, s12, v23.s[2]   // encoding: [0x85,0x59,0x97,0x5f]
+// CHECK: fmls    s7, s17, v26.s[3]   // encoding: [0x27,0x5a,0xba,0x5f]
+// CHECK: fmls    d0, d1, v1.d[0]     // encoding: [0x20,0x50,0xc1,0x5f]
+// CHECK: fmls    d30, d11, v1.d[1]   // encoding: [0x7e,0x59,0xc1,0x5f]
+
+        
+        
+        
+
+        
+        
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s
new file mode 100644
index 000000000000..8b8a3f57a9ca
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Floating Point  multiply (scalar, by element)
+//------------------------------------------------------------------------------
+    fmul    s0, s1, v1.s[0]
+    fmul    s30, s11, v1.s[1]
+    fmul    s4, s5, v7.s[2]
+    fmul    s16, s22, v16.s[3]
+    fmul    d0, d1, v1.d[0]
+    fmul    d30, d11, v1.d[1]
+
+// CHECK: fmul    s0, s1, v1.s[0]      // encoding: [0x20,0x90,0x81,0x5f]
+// CHECK: fmul    s30, s11, v1.s[1]    // encoding: [0x7e,0x91,0xa1,0x5f]
+// CHECK: fmul    s4, s5, v7.s[2]      // encoding: [0xa4,0x98,0x87,0x5f]
+// CHECK: fmul    s16, s22, v16.s[3]   // encoding: [0xd0,0x9a,0xb0,0x5f]
+// CHECK: fmul    d0, d1, v1.d[0]      // encoding: [0x20,0x90,0xc1,0x5f]
+// CHECK: fmul    d30, d11, v1.d[1]    // encoding: [0x7e,0x99,0xc1,0x5f]
+
+
+//------------------------------------------------------------------------------
+// Floating Point  multiply extended (scalar, by element)
+//------------------------------------------------------------------------------
+    fmulx   s6, s2, v8.s[0]
+    fmulx   s7, s3, v13.s[1]
+    fmulx   s9, s7, v9.s[2]
+    fmulx   s13, s21, v10.s[3]
+    fmulx   d15, d9, v7.d[0]
+    fmulx   d13, d12, v11.d[1]
+
+// CHECK: fmulx   s6, s2, v8.s[0]         // encoding: [0x46,0x90,0x88,0x7f]
+// CHECK: fmulx   s7, s3, v13.s[1]        // encoding: [0x67,0x90,0xad,0x7f]
+// CHECK: fmulx   s9, s7, v9.s[2]         // encoding: [0xe9,0x98,0x89,0x7f]
+// CHECK: fmulx   s13, s21, v10.s[3]      // encoding: [0xad,0x9a,0xaa,0x7f]
+// CHECK: fmulx   d15, d9, v7.d[0]        // encoding: [0x2f,0x91,0xc7,0x7f]
+// CHECK: fmulx   d13, d12, v11.d[1]      // encoding: [0x8d,0x99,0xcb,0x7f]
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
new file mode 100644
index 000000000000..e3d7e0514f9f
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply-add long (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmlal s0, h0, v0.h[0]
+    sqdmlal s7, h1, v4.h[3]
+    sqdmlal s11, h16, v8.h[4]
+    sqdmlal s30, h30, v15.h[7]
+    sqdmlal d0, s0, v3.s[0]
+    sqdmlal d30, s30, v30.s[3]
+    sqdmlal d8, s9, v14.s[1]
+
+// CHECK: sqdmlal s0, h0, v0.h[0]       // encoding: [0x00,0x30,0x40,0x5f]
+// CHECK: sqdmlal s7, h1, v4.h[3]       // encoding: [0x27,0x30,0x74,0x5f]
+// CHECK: sqdmlal s11, h16, v8.h[4]     // encoding: [0x0b,0x3a,0x48,0x5f]
+// CHECK: sqdmlal s30, h30, v15.h[7]    // encoding: [0xde,0x3b,0x7f,0x5f]
+// CHECK: sqdmlal d0, s0, v3.s[0]       // encoding: [0x00,0x30,0x83,0x5f]
+// CHECK: sqdmlal d30, s30, v30.s[3]    // encoding: [0xde,0x3b,0xbe,0x5f]
+// CHECK: sqdmlal d8, s9, v14.s[1]      // encoding: [0x28,0x31,0xae,0x5f]
+ 
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply-subtract long (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmlsl s1, h1, v1.h[0]
+    sqdmlsl s8, h2, v5.h[1]
+    sqdmlsl s12, h13, v14.h[2]
+    sqdmlsl s29, h28, v11.h[7]
+    sqdmlsl d1, s1, v13.s[0]
+    sqdmlsl d31, s31, v31.s[2]
+    sqdmlsl d16, s18, v28.s[3]
+
+// CHECK: sqdmlsl s1, h1, v1.h[0]       // encoding: [0x21,0x70,0x41,0x5f]
+// CHECK: sqdmlsl s8, h2, v5.h[1]       // encoding: [0x48,0x70,0x55,0x5f]
+// CHECK: sqdmlsl s12, h13, v14.h[2]    // encoding: [0xac,0x71,0x6e,0x5f]
+// CHECK: sqdmlsl s29, h28, v11.h[7]    // encoding: [0x9d,0x7b,0x7b,0x5f]
+// CHECK: sqdmlsl d1, s1, v13.s[0]      // encoding: [0x21,0x70,0x8d,0x5f]
+// CHECK: sqdmlsl d31, s31, v31.s[2]    // encoding: [0xff,0x7b,0x9f,0x5f]
+// CHECK: sqdmlsl d16, s18, v28.s[3]    // encoding: [0x50,0x7a,0xbc,0x5f]
+
+
+        
+
+        
+        
+
diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
new file mode 100644
index 000000000000..8a8405ef282e
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s
@@ -0,0 +1,58 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//-----------------------------------------------------------------------------
+// Signed saturating doubling multiply long (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmull s1, h1, v1.h[1]
+    sqdmull s8, h2, v5.h[2]
+    sqdmull s12, h17, v9.h[3]
+    sqdmull s31, h31, v15.h[7]
+    sqdmull d1, s1, v4.s[0]
+    sqdmull d31, s31, v31.s[3]
+    sqdmull d9, s10, v15.s[0]
+
+
+// CHECK: sqdmull s1, h1, v1.h[1]       // encoding: [0x21,0xb0,0x51,0x5f]
+// CHECK: sqdmull s8, h2, v5.h[2]       // encoding: [0x48,0xb0,0x65,0x5f]
+// CHECK: sqdmull s12, h17, v9.h[3]     // encoding: [0x2c,0xb2,0x79,0x5f]
+// CHECK: sqdmull s31, h31, v15.h[7]    // encoding: [0xff,0xbb,0x7f,0x5f]
+// CHECK: sqdmull d1, s1, v4.s[0]       // encoding: [0x21,0xb0,0x84,0x5f]
+// CHECK: sqdmull d31, s31, v31.s[3]    // encoding: [0xff,0xbb,0xbf,0x5f]
+// CHECK: sqdmull d9, s10, v15.s[0]     // encoding: [0x49,0xb1,0x8f,0x5f]
+ 
+//-----------------------------------------------------------------------------
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqdmulh h0, h1, v0.h[0]
+    sqdmulh h10, h11, v10.h[4]
+    sqdmulh h20, h21, v15.h[7]
+    sqdmulh s25, s26, v27.s[3]
+    sqdmulh s2, s6, v7.s[0]
+
+// CHECK: sqdmulh h0, h1, v0.h[0]       // encoding: [0x20,0xc0,0x40,0x5f]
+// CHECK: sqdmulh h10, h11, v10.h[4]    // encoding: [0x6a,0xc9,0x4a,0x5f]
+// CHECK: sqdmulh h20, h21, v15.h[7]    // encoding: [0xb4,0xca,0x7f,0x5f]
+// CHECK: sqdmulh s25, s26, v27.s[3]    // encoding: [0x59,0xcb,0xbb,0x5f]
+// CHECK: sqdmulh s2, s6, v7.s[0]       // encoding: [0xc2,0xc0,0x87,0x5f]
+
+//-----------------------------------------------------------------------------
+// Signed saturating rounding doubling multiply returning
+// high half (scalar, by element)
+//-----------------------------------------------------------------------------
+    sqrdmulh h31, h30, v14.h[2]
+    sqrdmulh h1, h1, v1.h[4]
+    sqrdmulh h21, h22, v15.h[7]
+    sqrdmulh s5, s6, v7.s[2]
+    sqrdmulh s20, s26, v27.s[1]
+
+// CHECK: sqrdmulh h31, h30, v14.h[2]   // encoding: [0xdf,0xd3,0x6e,0x5f]
+// CHECK: sqrdmulh h1, h1, v1.h[4]      // encoding: [0x21,0xd8,0x41,0x5f]
+// CHECK: sqrdmulh h21, h22, v15.h[7]   // encoding: [0xd5,0xda,0x7f,0x5f]
+// CHECK: sqrdmulh s5, s6, v7.s[2]      // encoding: [0xc5,0xd8,0x87,0x5f]
+// CHECK: sqrdmulh s20, s26, v27.s[1]   // encoding: [0x54,0xd3,0xbb,0x5f]
+        
+
+        
+        
+
diff --git a/test/MC/AArch64/neon-scalar-compare.s b/test/MC/AArch64/neon-scalar-compare.s
new file mode 100644
index 000000000000..55ade0efc258
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-compare.s
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal
+//----------------------------------------------------------------------
+
+         cmeq d20, d21, d22
+
+// CHECK: cmeq d20, d21, d22   // encoding: [0xb4,0x8e,0xf6,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Equal To Zero
+//----------------------------------------------------------------------
+
+         cmeq d20, d21, #0x0
+
+// CHECK: cmeq d20, d21, #0x0   // encoding: [0xb4,0x9a,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher Or Same
+//----------------------------------------------------------------------
+
+         cmhs d20, d21, d22
+
+// CHECK: cmhs d20, d21, d22   // encoding: [0xb4,0x3e,0xf6,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal
+//----------------------------------------------------------------------
+
+         cmge d20, d21, d22
+
+// CHECK: cmge d20, d21, d22    // encoding: [0xb4,0x3e,0xf6,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greather Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmge d20, d21, #0x0
+
+// CHECK: cmge d20, d21, #0x0   // encoding: [0xb4,0x8a,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Unsigned Higher
+//----------------------------------------------------------------------
+
+         cmhi d20, d21, d22
+
+// CHECK: cmhi d20, d21, d22   // encoding: [0xb4,0x36,0xf6,0x7e]
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than
+//----------------------------------------------------------------------
+
+         cmgt d20, d21, d22
+
+// CHECK: cmgt d20, d21, d22   // encoding: [0xb4,0x36,0xf6,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Greater Than Zero
+//----------------------------------------------------------------------
+
+         cmgt d20, d21, #0x0
+
+// CHECK: cmgt d20, d21, #0x0   // encoding: [0xb4,0x8a,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Signed Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         cmle d20, d21, #0x0
+
+// CHECK: cmle d20, d21, #0x0   // encoding: [0xb4,0x9a,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Less Than Zero
+//----------------------------------------------------------------------
+
+         cmlt d20, d21, #0x0
+
+// CHECK: cmlt d20, d21, #0x0   // encoding: [0xb4,0xaa,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Compare Bitwise Test Bits
+//----------------------------------------------------------------------
+
+         cmtst d20, d21, d22
+
+// CHECK: cmtst d20, d21, d22   // encoding: [0xb4,0x8e,0xf6,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s
new file mode 100644
index 000000000000..97416daf0801
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-cvt.s
@@ -0,0 +1,181 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Integer Convert To Floating-point
+//----------------------------------------------------------------------
+
+    scvtf s22, s13
+    scvtf d21, d12
+
+// CHECK: scvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x5e]
+// CHECK: scvtf d21, d12    // encoding: [0x95,0xd9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Integer Convert To Floating-point
+//----------------------------------------------------------------------
+
+    ucvtf s22, s13
+    ucvtf d21, d14
+
+// CHECK: ucvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x7e]
+// CHECK: ucvtf d21, d14    // encoding: [0xd5,0xd9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    scvtf s22, s13, #32
+    scvtf d21, d12, #64
+
+// CHECK: scvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x5f]
+// CHECK: scvtf d21, d12, #64  // encoding: [0x95,0xe5,0x40,0x5f]    
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+//----------------------------------------------------------------------
+
+    ucvtf s22, s13, #32
+    ucvtf d21, d14, #64
+
+// CHECK: ucvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x7f]
+// CHECK: ucvtf d21, d14, #64  // encoding: [0xd5,0xe5,0x40,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzs s21, s12, #1
+    fcvtzs d21, d12, #1
+
+// CHECK: fcvtzs s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x5f]
+// CHECK: fcvtzs d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x5f]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+//----------------------------------------------------------------------
+
+    fcvtzu s21, s12, #1
+    fcvtzu d21, d12, #1
+
+// CHECK: fcvtzu s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x7f]
+// CHECK: fcvtzu d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+// Odd
+//----------------------------------------------------------------------
+
+    fcvtxn s22, d13
+
+// CHECK: fcvtxn s22, d13    // encoding: [0xb6,0x69,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtas s12, s13
+    fcvtas d21, d14
+
+// CHECK: fcvtas s12, s13    // encoding: [0xac,0xc9,0x21,0x5e]
+// CHECK: fcvtas d21, d14    // encoding: [0xd5,0xc9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtau s12, s13
+    fcvtau d21, d14
+
+// CHECK: fcvtau s12, s13    // encoding: [0xac,0xc9,0x21,0x7e]
+// CHECK: fcvtau d21, d14    // encoding: [0xd5,0xc9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtms s22, s13
+    fcvtms d21, d14
+
+// CHECK: fcvtms s22, s13    // encoding: [0xb6,0xb9,0x21,0x5e]
+// CHECK: fcvtms d21, d14    // encoding: [0xd5,0xb9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtmu s12, s13
+    fcvtmu d21, d14
+
+// CHECK: fcvtmu s12, s13    // encoding: [0xac,0xb9,0x21,0x7e]
+// CHECK: fcvtmu d21, d14    // encoding: [0xd5,0xb9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtns s22, s13
+    fcvtns d21, d14
+
+// CHECK: fcvtns s22, s13    // encoding: [0xb6,0xa9,0x21,0x5e]
+// CHECK: fcvtns d21, d14    // encoding: [0xd5,0xa9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtnu s12, s13
+    fcvtnu d21, d14
+
+// CHECK: fcvtnu s12, s13    // encoding: [0xac,0xa9,0x21,0x7e]
+// CHECK: fcvtnu d21, d14    // encoding: [0xd5,0xa9,0x61,0x7e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtps s22, s13
+    fcvtps d21, d14
+
+// CHECK: fcvtps s22, s13    // encoding: [0xb6,0xa9,0xa1,0x5e]
+// CHECK: fcvtps d21, d14    // encoding: [0xd5,0xa9,0xe1,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtpu s12, s13
+    fcvtpu d21, d14
+
+// CHECK: fcvtpu s12, s13    // encoding: [0xac,0xa9,0xa1,0x7e]
+// CHECK: fcvtpu d21, d14    // encoding: [0xd5,0xa9,0xe1,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+//----------------------------------------------------------------------
+
+    fcvtzs s12, s13
+    fcvtzs d21, d14
+
+// CHECK: fcvtzs s12, s13    // encoding: [0xac,0xb9,0xa1,0x5e]
+// CHECK: fcvtzs d21, d14    // encoding: [0xd5,0xb9,0xe1,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+// Zero
+//----------------------------------------------------------------------
+
+    fcvtzu s12, s13
+    fcvtzu d21, d14
+
+// CHECK: fcvtzu s12, s13    // encoding: [0xac,0xb9,0xa1,0x7e]
+// CHECK: fcvtzu d21, d14    // encoding: [0xd5,0xb9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s
new file mode 100644
index 000000000000..77c638df0952
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-dup.s
@@ -0,0 +1,55 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Duplicate element (scalar)
+//------------------------------------------------------------------------------
+         dup b0, v0.b[15]
+         dup b1, v0.b[7]
+         dup b17, v0.b[0]
+         dup h5, v31.h[7]
+         dup h9, v1.h[4]
+         dup h11, v17.h[0]
+         dup s2, v2.s[3]
+         dup s4, v21.s[0]
+         dup s31, v21.s[2]
+         dup d3, v5.d[0]
+         dup d6, v5.d[1]
+
+// CHECK: dup b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: dup b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: dup b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: dup h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: dup h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: dup h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: dup s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: dup s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: dup s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: dup d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: dup d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
+
+//------------------------------------------------------------------------------
+// Aliases for Duplicate element (scalar)
+//------------------------------------------------------------------------------
+         mov b0, v0.b[15]
+         mov b1, v0.b[7]
+         mov b17, v0.b[0]
+         mov h5, v31.h[7]
+         mov h9, v1.h[4]
+         mov h11, v17.h[0]
+         mov s2, v2.s[3]
+         mov s4, v21.s[0]
+         mov s31, v21.s[2]
+         mov d3, v5.d[0]
+         mov d6, v5.d[1]
+
+// CHECK: dup b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: dup b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: dup b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: dup h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: dup h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: dup h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: dup s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: dup s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: dup s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: dup d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: dup d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s
new file mode 100644
index 000000000000..e25224e386f0
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-extract-narrow.s
@@ -0,0 +1,40 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Unsigned Narrow
+//----------------------------------------------------------------------
+
+    sqxtun b19, h14
+    sqxtun h21, s15
+    sqxtun s20, d12
+
+// CHECK: sqxtun b19, h14  // encoding: [0xd3,0x29,0x21,0x7e]
+// CHECK: sqxtun h21, s15  // encoding: [0xf5,0x29,0x61,0x7e]
+// CHECK: sqxtun s20, d12  // encoding: [0x94,0x29,0xa1,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Extract Signed Narrow
+//----------------------------------------------------------------------
+
+    sqxtn b18, h18
+    sqxtn h20, s17
+    sqxtn s19, d14
+
+// CHECK: sqxtn b18, h18  // encoding: [0x52,0x4a,0x21,0x5e]
+// CHECK: sqxtn h20, s17  // encoding: [0x34,0x4a,0x61,0x5e]
+// CHECK: sqxtn s19, d14  // encoding: [0xd3,0x49,0xa1,0x5e]
+
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Saturating Extract Narrow
+//----------------------------------------------------------------------
+
+    uqxtn b18, h18
+    uqxtn h20, s17
+    uqxtn s19, d14
+
+// CHECK: uqxtn b18, h18  // encoding: [0x52,0x4a,0x21,0x7e]
+// CHECK: uqxtn h20, s17  // encoding: [0x34,0x4a,0x61,0x7e]
+// CHECK: uqxtn s19, d14  // encoding: [0xd3,0x49,0xa1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s
new file mode 100644
index 000000000000..a59ec0d1d6ed
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-fp-compare.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal
+//----------------------------------------------------------------------
+
+         fcmeq s10, s11, s12
+         fcmeq d20, d21, d22
+
+// CHECK: fcmeq s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x5e]
+// CHECK: fcmeq d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmeq s10, s11, #0.0
+         fcmeq d20, d21, #0.0
+
+// CHECK: fcmeq s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x5e]
+// CHECK: fcmeq d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         fcmge s10, s11, s12
+         fcmge d20, d21, d22
+
+// CHECK: fcmge s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x7e]
+// CHECK: fcmge d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmge s10, s11, #0.0
+         fcmge d20, d21, #0.0
+
+// CHECK: fcmge s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x7e]
+// CHECK: fcmge d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than
+//----------------------------------------------------------------------
+
+         fcmgt s10, s11, s12
+         fcmgt d20, d21, d22
+
+// CHECK: fcmgt s10, s11, s12   // encoding: [0x6a,0xe5,0xac,0x7e]
+// CHECK: fcmgt d20, d21, d22   // encoding: [0xb4,0xe6,0xf6,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Greather Than Zero
+//----------------------------------------------------------------------
+
+         fcmgt s10, s11, #0.0
+         fcmgt d20, d21, #0.0
+
+// CHECK: fcmgt s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x5e]
+// CHECK: fcmgt d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+//----------------------------------------------------------------------
+
+         fcmle s10, s11, #0.0
+         fcmle d20, d21, #0.0
+
+// CHECK: fcmle s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x7e]
+// CHECK: fcmle d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Compare Mask Less Than
+//----------------------------------------------------------------------
+
+         fcmlt s10, s11, #0.0
+         fcmlt d20, d21, #0.0
+
+// CHECK: fcmlt s10, s11, #0.0   // encoding: [0x6a,0xe9,0xa0,0x5e]
+// CHECK: fcmlt d20, d21, #0.0   // encoding: [0xb4,0xea,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+//----------------------------------------------------------------------
+
+         facge s10, s11, s12
+         facge d20, d21, d22
+
+// CHECK: facge s10, s11, s12    // encoding: [0x6a,0xed,0x2c,0x7e]
+// CHECK: facge d20, d21, d22    // encoding: [0xb4,0xee,0x76,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Compare Mask Greater Than
+//----------------------------------------------------------------------
+
+         facgt s10, s11, s12
+         facgt d20, d21, d22
+
+// CHECK: facgt s10, s11, s12   // encoding: [0x6a,0xed,0xac,0x7e]
+// CHECK: facgt d20, d21, d22   // encoding: [0xb4,0xee,0xf6,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s
new file mode 100644
index 000000000000..e33bdad91a94
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-mul.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Doubling Multiply Half High
+//----------------------------------------------------------------------
+
+    sqdmulh h10, h11, h12
+    sqdmulh s20, s21, s2
+        
+// CHECK: sqdmulh h10, h11, h12     // encoding: [0x6a,0xb5,0x6c,0x5e]
+// CHECK: sqdmulh s20, s21, s2      // encoding: [0xb4,0xb6,0xa2,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+//----------------------------------------------------------------------
+
+    sqrdmulh h10, h11, h12
+    sqrdmulh s20, s21, s2
+        
+// CHECK: sqrdmulh h10, h11, h12     // encoding: [0x6a,0xb5,0x6c,0x7e]
+// CHECK: sqrdmulh s20, s21, s2      // encoding: [0xb4,0xb6,0xa2,0x7e]
+
+//----------------------------------------------------------------------
+// Floating-point Multiply Extended
+//----------------------------------------------------------------------
+
+    fmulx s20, s22, s15
+    fmulx d23, d11, d1
+
+// CHECK: fmulx s20, s22, s15   // encoding: [0xd4,0xde,0x2f,0x5e]
+// CHECK: fmulx d23, d11, d1    // encoding: [0x77,0xdd,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Add Long
+//----------------------------------------------------------------------
+
+    sqdmlal s17, h27, h12
+    sqdmlal d19, s24, s12
+
+// CHECK: sqdmlal s17, h27, h12  // encoding: [0x71,0x93,0x6c,0x5e]
+// CHECK: sqdmlal d19, s24, s12  // encoding: [0x13,0x93,0xac,0x5e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply-Subtract Long
+//----------------------------------------------------------------------
+
+    sqdmlsl s14, h12, h25
+    sqdmlsl d12, s23, s13
+
+// CHECK: sqdmlsl s14, h12, h25  // encoding: [0x8e,0xb1,0x79,0x5e]
+// CHECK: sqdmlsl d12, s23, s13  // encoding: [0xec,0xb2,0xad,0x5e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Doubling Multiply Long
+//----------------------------------------------------------------------
+
+    sqdmull s12, h22, h12
+    sqdmull d15, s22, s12
+
+// CHECK: sqdmull s12, h22, h12  // encoding: [0xcc,0xd2,0x6c,0x5e]
+// CHECK: sqdmull d15, s22, s12  // encoding: [0xcf,0xd2,0xac,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-neg.s b/test/MC/AArch64/neon-scalar-neg.s
new file mode 100644
index 000000000000..8e5d61dd2459
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-neg.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Negate
+//----------------------------------------------------------------------
+
+    neg d29, d24
+
+// CHECK: neg d29, d24    // encoding: [0x1d,0xbb,0xe0,0x7e]
+        
+//----------------------------------------------------------------------
+// Scalar Signed Saturating Negate
+//----------------------------------------------------------------------
+
+    sqneg b19, b14
+    sqneg h21, h15
+    sqneg s20, s12
+    sqneg d18, d12
+
+// CHECK: sqneg b19, b14    // encoding: [0xd3,0x79,0x20,0x7e]
+// CHECK: sqneg h21, h15    // encoding: [0xf5,0x79,0x60,0x7e]
+// CHECK: sqneg s20, s12    // encoding: [0x94,0x79,0xa0,0x7e]
+// CHECK: sqneg d18, d12    // encoding: [0x92,0x79,0xe0,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s
new file mode 100644
index 000000000000..7a886f3b4a73
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-recip.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Step
+//----------------------------------------------------------------------
+
+    frecps s21, s16, s13
+    frecps d22, d30, d21
+
+// CHECK: frecps s21, s16, s13   // encoding: [0x15,0xfe,0x2d,0x5e]
+// CHECK: frecps d22, d30, d21   // encoding: [0xd6,0xff,0x75,0x5e]
+
+//----------------------------------------------------------------------
+// Floating-point Reciprocal Square Root Step
+//----------------------------------------------------------------------
+
+    frsqrts s21, s5, s12
+    frsqrts d8, d22, d18
+
+// CHECK: frsqrts s21, s5, s12   // encoding: [0xb5,0xfc,0xac,0x5e]
+// CHECK: frsqrts d8, d22, d18   // encoding: [0xc8,0xfe,0xf2,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Estimate
+//----------------------------------------------------------------------
+
+    frecpe s19, s14
+    frecpe d13, d13
+
+// CHECK: frecpe s19, s14    // encoding: [0xd3,0xd9,0xa1,0x5e]
+// CHECK: frecpe d13, d13    // encoding: [0xad,0xd9,0xe1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Exponent
+//----------------------------------------------------------------------
+
+    frecpx s18, s10
+    frecpx d16, d19
+
+// CHECK: frecpx s18, s10    // encoding: [0x52,0xf9,0xa1,0x5e]
+// CHECK: frecpx d16, d19    // encoding: [0x70,0xfa,0xe1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Reciprocal Square Root Estimate
+//----------------------------------------------------------------------
+
+    frsqrte s22, s13
+    frsqrte d21, d12
+
+// CHECK: frsqrte s22, s13    // encoding: [0xb6,0xd9,0xa1,0x7e]
+// CHECK: frsqrte d21, d12    // encoding: [0x95,0xd9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
new file mode 100644
index 000000000000..403a940ec2f2
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Integer)
+//----------------------------------------------------------------------
+      addp d0, v1.2d
+
+// CHECK: addp d0, v1.2d     // encoding: [0x20,0xb8,0xf1,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Reduce Add Pairwise (Floating Point)
+//----------------------------------------------------------------------
+      faddp d20, v1.2d
+
+// CHECK: faddp d20, v1.2d     // encoding: [0x34,0xd8,0x70,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s
new file mode 100644
index 000000000000..6113e09af388
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-rounding-shift.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         srshl d17, d31, d8
+
+// CHECK: srshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         urshl d17, d31, d8
+
+// CHECK: urshl d17, d31, d8      // encoding: [0xf1,0x57,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
new file mode 100644
index 000000000000..0bf243495999
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s
@@ -0,0 +1,81 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Signed)
+//------------------------------------------------------------------------------
+         sqadd b0, b1, b2
+         sqadd h10, h11, h12
+         sqadd s20, s21, s2
+         sqadd d17, d31, d8
+
+// CHECK: sqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x5e]
+// CHECK: sqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x5e]
+// CHECK: sqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x5e]
+// CHECK: sqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Add (Unsigned)
+//------------------------------------------------------------------------------
+         uqadd b0, b1, b2
+         uqadd h10, h11, h12
+         uqadd s20, s21, s2
+         uqadd d17, d31, d8
+
+// CHECK: uqadd b0, b1, b2        // encoding: [0x20,0x0c,0x22,0x7e]
+// CHECK: uqadd h10, h11, h12     // encoding: [0x6a,0x0d,0x6c,0x7e]
+// CHECK: uqadd s20, s21, s2      // encoding: [0xb4,0x0e,0xa2,0x7e]
+// CHECK: uqadd d17, d31, d8      // encoding: [0xf1,0x0f,0xe8,0x7e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Signed)
+//------------------------------------------------------------------------------
+         sqsub b0, b1, b2
+         sqsub h10, h11, h12
+         sqsub s20, s21, s2
+         sqsub d17, d31, d8
+
+// CHECK: sqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x5e]
+// CHECK: sqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x5e]
+// CHECK: sqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x5e]
+// CHECK: sqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Sub (Unsigned)
+//------------------------------------------------------------------------------
+         uqsub b0, b1, b2
+         uqsub h10, h11, h12
+         uqsub s20, s21, s2
+         uqsub d17, d31, d8
+
+// CHECK: uqsub b0, b1, b2        // encoding: [0x20,0x2c,0x22,0x7e]
+// CHECK: uqsub h10, h11, h12     // encoding: [0x6a,0x2d,0x6c,0x7e]
+// CHECK: uqsub s20, s21, s2      // encoding: [0xb4,0x2e,0xa2,0x7e]
+// CHECK: uqsub d17, d31, d8      // encoding: [0xf1,0x2f,0xe8,0x7e]
+
+//----------------------------------------------------------------------
+// Signed Saturating Accumulated of Unsigned Value
+//----------------------------------------------------------------------
+
+    suqadd b19, b14
+    suqadd h20, h15
+    suqadd s21, s12
+    suqadd d18, d22
+
+// CHECK: suqadd b19, b14    // encoding: [0xd3,0x39,0x20,0x5e]
+// CHECK: suqadd h20, h15    // encoding: [0xf4,0x39,0x60,0x5e]
+// CHECK: suqadd s21, s12    // encoding: [0x95,0x39,0xa0,0x5e]
+// CHECK: suqadd d18, d22    // encoding: [0xd2,0x3a,0xe0,0x5e]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Accumulated of Signed Value
+//----------------------------------------------------------------------
+
+    usqadd b19, b14
+    usqadd h20, h15
+    usqadd s21, s12
+    usqadd d18, d22
+
+// CHECK: usqadd b19, b14    // encoding: [0xd3,0x39,0x20,0x7e]
+// CHECK: usqadd h20, h15    // encoding: [0xf4,0x39,0x60,0x7e]
+// CHECK: usqadd s21, s12    // encoding: [0x95,0x39,0xa0,0x7e]
+// CHECK: usqadd d18, d22    // encoding: [0xd2,0x3a,0xe0,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
new file mode 100644
index 000000000000..b09a58923445
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqrshl b0, b1, b2
+         sqrshl h10, h11, h12
+         sqrshl s20, s21, s2
+         sqrshl d17, d31, d8
+
+// CHECK: sqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x5e]
+// CHECK: sqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x5e]
+// CHECK: sqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x5e]
+// CHECK: sqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqrshl b0, b1, b2
+         uqrshl h10, h11, h12
+         uqrshl s20, s21, s2
+         uqrshl d17, d31, d8
+
+// CHECK: uqrshl b0, b1, b2        // encoding: [0x20,0x5c,0x22,0x7e]
+// CHECK: uqrshl h10, h11, h12     // encoding: [0x6a,0x5d,0x6c,0x7e]
+// CHECK: uqrshl s20, s21, s2      // encoding: [0xb4,0x5e,0xa2,0x7e]
+// CHECK: uqrshl d17, d31, d8      // encoding: [0xf1,0x5f,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s
new file mode 100644
index 000000000000..b53c9f072f35
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-saturating-shift.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sqshl b0, b1, b2
+         sqshl h10, h11, h12
+         sqshl s20, s21, s2
+         sqshl d17, d31, d8
+
+// CHECK: sqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x5e]
+// CHECK: sqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x5e]
+// CHECK: sqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x5e]
+// CHECK: sqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Saturating Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         uqshl b0, b1, b2
+         uqshl h10, h11, h12
+         uqshl s20, s21, s2
+         uqshl d17, d31, d8
+
+// CHECK: uqshl b0, b1, b2        // encoding: [0x20,0x4c,0x22,0x7e]
+// CHECK: uqshl h10, h11, h12     // encoding: [0x6a,0x4d,0x6c,0x7e]
+// CHECK: uqshl s20, s21, s2      // encoding: [0xb4,0x4e,0xa2,0x7e]
+// CHECK: uqshl d17, d31, d8      // encoding: [0xf1,0x4f,0xe8,0x7e]
+
+
diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s
new file mode 100644
index 000000000000..96cb815eafa8
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-shift-imm.s
@@ -0,0 +1,186 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right (Immediate)
+//----------------------------------------------------------------------
+        sshr d15, d16, #12
+
+// CHECK: sshr d15, d16, #12  // encoding: [0x0f,0x06,0x74,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right (Immediate)
+//----------------------------------------------------------------------
+        ushr d10, d17, #18
+
+// CHECK: ushr d10, d17, #18  // encoding: [0x2a,0x06,0x6e,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+        srshr d19, d18, #7
+
+// CHECK: srshr d19, d18, #7  // encoding: [0x53,0x26,0x79,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unigned Rounding Shift Right (Immediate)
+//----------------------------------------------------------------------
+        urshr d20, d23, #31
+
+// CHECK: urshr d20, d23, #31  // encoding: [0xf4,0x26,0x61,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        ssra d18, d12, #21
+
+// CHECK: ssra d18, d12, #21  // encoding: [0x92,0x15,0x6b,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        usra d20, d13, #61
+
+// CHECK: usra d20, d13, #61  // encoding: [0xb4,0x15,0x43,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        srsra d15, d11, #19
+
+// CHECK: srsra d15, d11, #19  // encoding: [0x6f,0x35,0x6d,0x5f]
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+//----------------------------------------------------------------------
+        ursra d18, d10, #13
+
+// CHECK: ursra d18, d10, #13  // encoding: [0x52,0x35,0x73,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Shift Left (Immediate)
+//----------------------------------------------------------------------
+        shl d7, d10, #12
+
+// CHECK: shl d7, d10, #12  // encoding: [0x47,0x55,0x4c,0x5f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+        sqshl b11, b19, #7
+        sqshl h13, h18, #11
+        sqshl s14, s17, #22
+        sqshl d15, d16, #51
+
+// CHECK: sqshl b11, b19, #7   // encoding: [0x6b,0x76,0x0f,0x5f]
+// CHECK: sqshl h13, h18, #11  // encoding: [0x4d,0x76,0x1b,0x5f]
+// CHECK: sqshl s14, s17, #22  // encoding: [0x2e,0x76,0x36,0x5f]
+// CHECK: sqshl d15, d16, #51  // encoding: [0x0f,0x76,0x73,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Left (Immediate)
+//----------------------------------------------------------------------
+        uqshl b18, b15, #6
+        uqshl h11, h18, #7
+        uqshl s14, s19, #18
+        uqshl d15, d12, #19
+
+// CHECK: uqshl b18, b15, #6   // encoding: [0xf2,0x75,0x0e,0x7f]
+// CHECK: uqshl h11, h18, #7   // encoding: [0x4b,0x76,0x17,0x7f]
+// CHECK: uqshl s14, s19, #18  // encoding: [0x6e,0x76,0x32,0x7f]
+// CHECK: uqshl d15, d12, #19  // encoding: [0x8f,0x75,0x53,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Left Unsigned (Immediate)
+//----------------------------------------------------------------------
+        sqshlu b15, b18, #6
+        sqshlu h19, h17, #6
+        sqshlu s16, s14, #25
+        sqshlu d11, d13, #32
+
+// CHECK: sqshlu  b15, b18, #6   // encoding: [0x4f,0x66,0x0e,0x7f]
+// CHECK: sqshlu  h19, h17, #6   // encoding: [0x33,0x66,0x16,0x7f]
+// CHECK: sqshlu  s16, s14, #25  // encoding: [0xd0,0x65,0x39,0x7f]
+// CHECK: sqshlu  d11, d13, #32  // encoding: [0xab,0x65,0x60,0x7f]
+
+//----------------------------------------------------------------------
+// Shift Right And Insert (Immediate)
+//----------------------------------------------------------------------
+        sri d10, d12, #14
+
+// CHECK: sri d10, d12, #14  // encoding: [0x8a,0x45,0x72,0x7f]
+
+//----------------------------------------------------------------------
+// Shift Left And Insert (Immediate)
+//----------------------------------------------------------------------
+        sli d10, d14, #12
+
+// CHECK: sli d10, d14, #12  // encoding: [0xca,0x55,0x4c,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        sqshrn b10, h15, #5
+        sqshrn h17, s10, #4
+        sqshrn s18, d10, #31
+
+// CHECK: sqshrn  b10, h15, #5   // encoding: [0xea,0x95,0x0b,0x5f]
+// CHECK: sqshrn  h17, s10, #4   // encoding: [0x51,0x95,0x1c,0x5f]
+// CHECK: sqshrn  s18, d10, #31  // encoding: [0x52,0x95,0x21,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        uqshrn b12, h10, #7
+        uqshrn h10, s14, #5
+        uqshrn s10, d12, #13
+
+// CHECK: uqshrn  b12, h10, #7   // encoding: [0x4c,0x95,0x09,0x7f]
+// CHECK: uqshrn  h10, s14, #5   // encoding: [0xca,0x95,0x1b,0x7f]
+// CHECK: uqshrn  s10, d12, #13  // encoding: [0x8a,0x95,0x33,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        sqrshrn b10, h13, #2
+        sqrshrn h15, s10, #6
+        sqrshrn s15, d12, #9
+
+// CHECK: sqrshrn b10, h13, #2  // encoding: [0xaa,0x9d,0x0e,0x5f]
+// CHECK: sqrshrn h15, s10, #6  // encoding: [0x4f,0x9d,0x1a,0x5f]
+// CHECK: sqrshrn s15, d12, #9  // encoding: [0x8f,0x9d,0x37,0x5f]
+
+//----------------------------------------------------------------------
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+//----------------------------------------------------------------------
+        uqrshrn b10, h12, #5
+        uqrshrn h12, s10, #14
+        uqrshrn s10, d10, #25
+
+// CHECK: uqrshrn b10, h12, #5   // encoding: [0x8a,0x9d,0x0b,0x7f]
+// CHECK: uqrshrn h12, s10, #14  // encoding: [0x4c,0x9d,0x12,0x7f]
+// CHECK: uqrshrn s10, d10, #25  // encoding: [0x4a,0x9d,0x27,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+        sqshrun b15, h10, #7
+        sqshrun h20, s14, #3
+        sqshrun s10, d15, #15
+
+// CHECK: sqshrun b15, h10, #7   // encoding: [0x4f,0x85,0x09,0x7f]
+// CHECK: sqshrun h20, s14, #3   // encoding: [0xd4,0x85,0x1d,0x7f]
+// CHECK: sqshrun s10, d15, #15  // encoding: [0xea,0x85,0x31,0x7f]
+
+//----------------------------------------------------------------------
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+//----------------------------------------------------------------------
+
+        sqrshrun b17, h10, #6
+        sqrshrun h10, s13, #15
+        sqrshrun s22, d16, #31
+
+// CHECK: sqrshrun b17, h10, #6   // encoding: [0x51,0x8d,0x0a,0x7f]
+// CHECK: sqrshrun h10, s13, #15  // encoding: [0xaa,0x8d,0x11,0x7f]
+// CHECK: sqrshrun s22, d16, #31  // encoding: [0x16,0x8e,0x21,0x7f]
diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s
new file mode 100644
index 000000000000..366840a93159
--- /dev/null
+++ b/test/MC/AArch64/neon-scalar-shift.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sshl d17, d31, d8
+
+// CHECK: sshl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x5e]
+
+//------------------------------------------------------------------------------
+// Scalar Integer Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         ushl d17, d31, d8
+
+// CHECK: ushl d17, d31, d8      // encoding: [0xf1,0x47,0xe8,0x7e]
+
diff --git a/test/MC/AArch64/neon-shift-left-long.s b/test/MC/AArch64/neon-shift-left-long.s
new file mode 100644
index 000000000000..97604587424e
--- /dev/null
+++ b/test/MC/AArch64/neon-shift-left-long.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Integer shift left long (Signed)
+//------------------------------------------------------------------------------
+         sshll v0.8h, v1.8b, #3
+         sshll v0.4s, v1.4h, #3
+         sshll v0.2d, v1.2s, #3
+         sshll2 v0.8h, v1.16b, #3
+         sshll2 v0.4s, v1.8h, #3
+         sshll2 v0.2d, v1.4s, #3
+
+// CHECK: sshll v0.8h, v1.8b, #3         // encoding: [0x20,0xa4,0x0b,0x0f]
+// CHECK: sshll v0.4s, v1.4h, #3         // encoding: [0x20,0xa4,0x13,0x0f]
+// CHECK: sshll v0.2d, v1.2s, #3         // encoding: [0x20,0xa4,0x23,0x0f]
+// CHECK: sshll2 v0.8h, v1.16b, #3       // encoding: [0x20,0xa4,0x0b,0x4f]
+// CHECK: sshll2 v0.4s, v1.8h, #3        // encoding: [0x20,0xa4,0x13,0x4f]
+// CHECK: sshll2 v0.2d, v1.4s, #3        // encoding: [0x20,0xa4,0x23,0x4f]
+
+//------------------------------------------------------------------------------
+// Integer shift left long (Unsigned)
+//------------------------------------------------------------------------------
+         ushll v0.8h, v1.8b, #3
+         ushll v0.4s, v1.4h, #3
+         ushll v0.2d, v1.2s, #3
+         ushll2 v0.8h, v1.16b, #3
+         ushll2 v0.4s, v1.8h, #3
+         ushll2 v0.2d, v1.4s, #3
+
+// CHECK: ushll v0.8h, v1.8b, #3         // encoding: [0x20,0xa4,0x0b,0x2f]
+// CHECK: ushll v0.4s, v1.4h, #3         // encoding: [0x20,0xa4,0x13,0x2f]
+// CHECK: ushll v0.2d, v1.2s, #3         // encoding: [0x20,0xa4,0x23,0x2f]
+// CHECK: ushll2 v0.8h, v1.16b, #3       // encoding: [0x20,0xa4,0x0b,0x6f]
+// CHECK: ushll2 v0.4s, v1.8h, #3        // encoding: [0x20,0xa4,0x13,0x6f]
+// CHECK: ushll2 v0.2d, v1.4s, #3        // encoding: [0x20,0xa4,0x23,0x6f]
diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s
new file mode 100644
index 000000000000..614e6de16222
--- /dev/null
+++ b/test/MC/AArch64/neon-shift.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Vector Integer Shift Lef (Signed)
+//------------------------------------------------------------------------------
+         sshl v0.8b, v1.8b, v2.8b
+         sshl v0.16b, v1.16b, v2.16b
+         sshl v0.4h, v1.4h, v2.4h
+         sshl v0.8h, v1.8h, v2.8h
+         sshl v0.2s, v1.2s, v2.2s
+         sshl v0.4s, v1.4s, v2.4s
+         sshl v0.2d, v1.2d, v2.2d
+
+// CHECK: sshl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x44,0x22,0x0e]
+// CHECK: sshl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x44,0x22,0x4e]
+// CHECK: sshl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x44,0x62,0x0e]
+// CHECK: sshl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x44,0x62,0x4e]
+// CHECK: sshl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x44,0xa2,0x0e]
+// CHECK: sshl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x44,0xa2,0x4e]
+// CHECK: sshl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x44,0xe2,0x4e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Shift Lef (Unsigned)
+//------------------------------------------------------------------------------
+         ushl v0.8b, v1.8b, v2.8b
+         ushl v0.16b, v1.16b, v2.16b
+         ushl v0.4h, v1.4h, v2.4h
+         ushl v0.8h, v1.8h, v2.8h
+         ushl v0.2s, v1.2s, v2.2s
+         ushl v0.4s, v1.4s, v2.4s
+         ushl v0.2d, v1.2d, v2.2d
+
+// CHECK: ushl v0.8b, v1.8b, v2.8b        // encoding: [0x20,0x44,0x22,0x2e]
+// CHECK: ushl v0.16b, v1.16b, v2.16b     // encoding: [0x20,0x44,0x22,0x6e]
+// CHECK: ushl v0.4h, v1.4h, v2.4h        // encoding: [0x20,0x44,0x62,0x2e]
+// CHECK: ushl v0.8h, v1.8h, v2.8h        // encoding: [0x20,0x44,0x62,0x6e]
+// CHECK: ushl v0.2s, v1.2s, v2.2s        // encoding: [0x20,0x44,0xa2,0x2e]
+// CHECK: ushl v0.4s, v1.4s, v2.4s        // encoding: [0x20,0x44,0xa2,0x6e]
+// CHECK: ushl v0.2d, v1.2d, v2.2d        // encoding: [0x20,0x44,0xe2,0x6e]
+
+//------------------------------------------------------------------------------
+// Vector Integer Shift Left by Immediate
+//------------------------------------------------------------------------------
+         shl v0.8b, v1.8b, #3
+         shl v0.4h, v1.4h, #3
+         shl v0.2s, v1.2s, #3
+         shl v0.16b, v1.16b, #3
+         shl v0.8h, v1.8h, #3
+         shl v0.4s, v1.4s, #3
+         shl v0.2d, v1.2d, #3
+
+// CHECK: shl v0.8b, v1.8b, #3        // encoding: [0x20,0x54,0x0b,0x0f]
+// CHECK: shl v0.4h, v1.4h, #3        // encoding: [0x20,0x54,0x13,0x0f]
+// CHECK: shl v0.2s, v1.2s, #3        // encoding: [0x20,0x54,0x23,0x0f]
+// CHECK: shl v0.16b, v1.16b, #3      // encoding: [0x20,0x54,0x0b,0x4f]
+// CHECK: shl v0.8h, v1.8h, #3        // encoding: [0x20,0x54,0x13,0x4f]
+// CHECK: shl v0.4s, v1.4s, #3        // encoding: [0x20,0x54,0x23,0x4f]
+// CHECK: shl v0.2d, v1.2d, #3        // encoding: [0x20,0x54,0x43,0x4f]
diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s
new file mode 100644
index 000000000000..f254d65b3b0c
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-copy.s
@@ -0,0 +1,135 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Insert element (vector, from main)
+//------------------------------------------------------------------------------
+         ins v2.b[2], w1
+         ins v7.h[7], w14
+         ins v20.s[0], w30
+         ins v1.d[1], x7
+         
+         mov v2.b[2], w1
+         mov v7.h[7], w14
+         mov v20.s[0], w30
+         mov v1.d[1], x7
+
+// CHECK: ins	v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
+// CHECK: ins	v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
+// CHECK: ins	v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
+// CHECK: ins	v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
+
+// CHECK: ins v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
+// CHECK: ins v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
+// CHECK: ins v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
+// CHECK: ins v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Signed integer move (main, from element)
+//------------------------------------------------------------------------------
+         smov w1, v0.b[15]
+         smov w14, v6.h[4]
+         smov x1, v0.b[15]
+         smov x14, v6.h[4]
+         smov x20, v9.s[2]
+
+// CHECK: smov	w1, v0.b[15]          // encoding: [0x01,0x2c,0x1f,0x0e]
+// CHECK: smov	w14, v6.h[4]          // encoding: [0xce,0x2c,0x12,0x0e]
+// CHECK: smov	x1, v0.b[15]          // encoding: [0x01,0x2c,0x1f,0x4e]
+// CHECK: smov	x14, v6.h[4]          // encoding: [0xce,0x2c,0x12,0x4e]
+// CHECK: smov	x20, v9.s[2]          // encoding: [0x34,0x2d,0x14,0x4e]
+
+
+//------------------------------------------------------------------------------
+// Unsigned integer move (main, from element)
+//------------------------------------------------------------------------------
+         umov w1, v0.b[15]
+         umov w14, v6.h[4]
+         umov w20, v9.s[2]
+         umov x7, v18.d[1]
+
+         mov w20, v9.s[2]
+         mov x7, v18.d[1]
+
+// CHECK: umov	w1, v0.b[15]          // encoding: [0x01,0x3c,0x1f,0x0e]
+// CHECK: umov	w14, v6.h[4]          // encoding: [0xce,0x3c,0x12,0x0e]
+// CHECK: umov	w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
+// CHECK: umov	x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
+
+// CHECK: umov  w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
+// CHECK: umov  x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
+
+//------------------------------------------------------------------------------
+// Insert element (vector, from element)
+//------------------------------------------------------------------------------
+
+         ins v1.b[14], v3.b[6]
+         ins v6.h[7], v7.h[5]
+         ins v15.s[3], v22.s[2]
+         ins v0.d[0], v4.d[1]
+
+         mov v1.b[14], v3.b[6]
+         mov v6.h[7], v7.h[5]
+         mov v15.s[3], v22.s[2]
+         mov v0.d[0], v4.d[1]
+
+// CHECK: ins	v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
+// CHECK: ins	v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
+// CHECK: ins	v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
+// CHECK: ins	v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
+
+// CHECK: ins v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
+// CHECK: ins v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
+// CHECK: ins v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
+// CHECK: ins v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
+
+//------------------------------------------------------------------------------
+// Duplicate to all lanes( vector, from element)
+//------------------------------------------------------------------------------
+         dup v1.8b, v2.b[2]
+         dup v11.4h, v7.h[7]
+         dup v17.2s, v20.s[0]
+         dup v1.16b, v2.b[2]
+         dup v11.8h, v7.h[7]
+         dup v17.4s, v20.s[0]
+         dup v5.2d, v1.d[1]         
+
+// CHECK: dup v1.8b, v2.b[2]        // encoding: [0x41,0x04,0x05,0x0e]
+// CHECK: dup v11.4h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x0e]
+// CHECK: dup v17.2s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x0e]
+// CHECK: dup v1.16b, v2.b[2]       // encoding: [0x41,0x04,0x05,0x4e]
+// CHECK: dup v11.8h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x4e]
+// CHECK: dup v17.4s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x4e]
+// CHECK: dup v5.2d, v1.d[1]        // encoding: [0x25,0x04,0x18,0x4e]
+
+//------------------------------------------------------------------------------
+// Duplicate to all lanes( vector, from main)
+//------------------------------------------------------------------------------
+         dup v1.8b, w1
+         dup v11.4h, w14
+         dup v17.2s, w30
+         dup v1.16b, w2
+         dup v11.8h, w16
+         dup v17.4s, w28
+         dup v5.2d, x0        
+
+// CHECK: dup	v1.8b, w1             // encoding: [0x21,0x0c,0x01,0x0e]
+// CHECK: dup	v11.4h, w14           // encoding: [0xcb,0x0d,0x02,0x0e]
+// CHECK: dup	v17.2s, w30           // encoding: [0xd1,0x0f,0x04,0x0e]
+// CHECK: dup	v1.16b, w2            // encoding: [0x41,0x0c,0x01,0x4e]
+// CHECK: dup	v11.8h, w16           // encoding: [0x0b,0x0e,0x02,0x4e]
+// CHECK: dup	v17.4s, w28           // encoding: [0x91,0x0f,0x04,0x4e]
+// CHECK: dup	v5.2d, x0             // encoding: [0x05,0x0c,0x08,0x4e]
+
+
+
+
+
+
+
+
+
+
diff --git a/test/MC/AArch64/neon-simd-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-ldst-multi-elem.s
new file mode 100644
index 000000000000..05fe4dac9138
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-ldst-multi-elem.s
@@ -0,0 +1,463 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from one register
+//------------------------------------------------------------------------------
+         st1 {v0.16b}, [x0]
+         st1 {v15.8h}, [x15]
+         st1 {v31.4s}, [sp]
+         st1 {v0.2d}, [x0]
+         st1 {v0.8b}, [x0]
+         st1 {v15.4h}, [x15]
+         st1 {v31.2s}, [sp]
+         st1 {v0.1d}, [x0]
+// CHECK:	st1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x00,0x4c]
+// CHECK:	st1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x00,0x4c]
+// CHECK:	st1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x00,0x4c]
+// CHECK:	st1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x00,0x4c]
+// CHECK:	st1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x00,0x0c]
+// CHECK:	st1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x00,0x0c]
+// CHECK:	st1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x00,0x0c]
+// CHECK:	st1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from two consecutive registers
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b}, [x0]
+         st1 {v15.8h, v16.8h}, [x15]
+         st1 {v31.4s, v0.4s}, [sp]
+         st1 {v0.2d, v1.2d}, [x0]
+         st1 {v0.8b, v1.8b}, [x0]
+         st1 {v15.4h, v16.4h}, [x15]
+         st1 {v31.2s, v0.2s}, [sp]
+         st1 {v0.1d, v1.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
+
+         st1 {v0.16b-v1.16b}, [x0]
+         st1 {v15.8h-v16.8h}, [x15]
+         st1 {v31.4s-v0.4s}, [sp]
+         st1 {v0.2d-v1.2d}, [x0]
+         st1 {v0.8b-v1.8b}, [x0]
+         st1 {v15.4h-v16.4h}, [x15]
+         st1 {v31.2s-v0.2s}, [sp]
+         st1 {v0.1d-v1.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from three consecutive registers
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b}, [x0]
+         st1 {v15.8h, v16.8h, v17.8h}, [x15]
+         st1 {v31.4s, v0.4s, v1.4s}, [sp]
+         st1 {v0.2d, v1.2d, v2.2d}, [x0]
+         st1 {v0.8b, v1.8b, v2.8b}, [x0]
+         st1 {v15.4h, v16.4h, v17.4h}, [x15]
+         st1 {v31.2s, v0.2s, v1.2s}, [sp]
+         st1 {v0.1d, v1.1d, v2.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
+
+         st1 {v0.16b-v2.16b}, [x0]
+         st1 {v15.8h-v17.8h}, [x15]
+         st1 {v31.4s-v1.4s}, [sp]
+         st1 {v0.2d-v2.2d}, [x0]
+         st1 {v0.8b-v2.8b}, [x0]
+         st1 {v15.4h-v17.4h}, [x15]
+         st1 {v31.2s-v1.2s}, [sp]
+         st1 {v0.1d-v2.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from four consecutive registers
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+         st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
+
+         st1 {v0.16b-v3.16b}, [x0]
+         st1 {v15.8h-v18.8h}, [x15]
+         st1 {v31.4s-v2.4s}, [sp]
+         st1 {v0.2d-v3.2d}, [x0]
+         st1 {v0.8b-v3.8b}, [x0]
+         st1 {v15.4h-v18.4h}, [x15]
+         st1 {v31.2s-v2.2s}, [sp]
+         st1 {v0.1d-v3.1d}, [x0]
+// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
+// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
+// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
+// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
+// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
+// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
+// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
+// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 2-element structures from two consecutive registers
+//------------------------------------------------------------------------------
+         st2 {v0.16b, v1.16b}, [x0]
+         st2 {v15.8h, v16.8h}, [x15]
+         st2 {v31.4s, v0.4s}, [sp]
+         st2 {v0.2d, v1.2d}, [x0]
+         st2 {v0.8b, v1.8b}, [x0]
+         st2 {v15.4h, v16.4h}, [x15]
+         st2 {v31.2s, v0.2s}, [sp]
+// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
+// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
+// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
+// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
+// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
+// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
+// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
+
+         st2 {v0.16b-v1.16b}, [x0]
+         st2 {v15.8h-v16.8h}, [x15]
+         st2 {v31.4s-v0.4s}, [sp]
+         st2 {v0.2d-v1.2d}, [x0]
+         st2 {v0.8b-v1.8b}, [x0]
+         st2 {v15.4h-v16.4h}, [x15]
+         st2 {v31.2s-v0.2s}, [sp]
+// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
+// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
+// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
+// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
+// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
+// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
+// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 3-element structures from three consecutive registers
+//------------------------------------------------------------------------------
+         st3 {v0.16b, v1.16b, v2.16b}, [x0]
+         st3 {v15.8h, v16.8h, v17.8h}, [x15]
+         st3 {v31.4s, v0.4s, v1.4s}, [sp]
+         st3 {v0.2d, v1.2d, v2.2d}, [x0]
+         st3 {v0.8b, v1.8b, v2.8b}, [x0]
+         st3 {v15.4h, v16.4h, v17.4h}, [x15]
+         st3 {v31.2s, v0.2s, v1.2s}, [sp]
+// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
+// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
+// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
+// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
+// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
+// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
+// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
+
+         st3 {v0.16b-v2.16b}, [x0]
+         st3 {v15.8h-v17.8h}, [x15]
+         st3 {v31.4s-v1.4s}, [sp]
+         st3 {v0.2d-v2.2d}, [x0]
+         st3 {v0.8b-v2.8b}, [x0]
+         st3 {v15.4h-v17.4h}, [x15]
+         st3 {v31.2s-v1.2s}, [sp]
+// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
+// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
+// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
+// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
+// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
+// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
+// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 4-element structures from four consecutive registers
+//------------------------------------------------------------------------------
+         st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
+// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
+// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
+// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
+// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
+// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
+// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
+
+         st4 {v0.16b-v3.16b}, [x0]
+         st4 {v15.8h-v18.8h}, [x15]
+         st4 {v31.4s-v2.4s}, [sp]
+         st4 {v0.2d-v3.2d}, [x0]
+         st4 {v0.8b-v3.8b}, [x0]
+         st4 {v15.4h-v18.4h}, [x15]
+         st4 {v31.2s-v2.2s}, [sp]
+// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
+// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
+// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
+// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
+// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
+// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
+// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to one register
+//------------------------------------------------------------------------------
+         ld1 {v0.16b}, [x0]
+         ld1 {v15.8h}, [x15]
+         ld1 {v31.4s}, [sp]
+         ld1 {v0.2d}, [x0]
+         ld1 {v0.8b}, [x0]
+         ld1 {v15.4h}, [x15]
+         ld1 {v31.2s}, [sp]
+         ld1 {v0.1d}, [x0]
+// CHECK:	ld1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x40,0x4c]
+// CHECK:	ld1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x40,0x4c]
+// CHECK:	ld1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x40,0x0c]
+// CHECK:	ld1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x40,0x0c]
+// CHECK:	ld1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to two consecutive registers
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b}, [x0]
+         ld1 {v15.8h, v16.8h}, [x15]
+         ld1 {v31.4s, v0.4s}, [sp]
+         ld1 {v0.2d, v1.2d}, [x0]
+         ld1 {v0.8b, v1.8b}, [x0]
+         ld1 {v15.4h, v16.4h}, [x15]
+         ld1 {v31.2s, v0.2s}, [sp]
+         ld1 {v0.1d, v1.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
+
+         ld1 {v0.16b-v1.16b}, [x0]
+         ld1 {v15.8h-v16.8h}, [x15]
+         ld1 {v31.4s-v0.4s}, [sp]
+         ld1 {v0.2d-v1.2d}, [x0]
+         ld1 {v0.8b-v1.8b}, [x0]
+         ld1 {v15.4h-v16.4h}, [x15]
+         ld1 {v31.2s-v0.2s}, [sp]
+         ld1 {v0.1d-v1.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to three consecutive registers
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b}, [x0]
+         ld1 {v15.8h, v16.8h, v17.8h}, [x15]
+         ld1 {v31.4s, v0.4s, v1.4s}, [sp]
+         ld1 {v0.2d, v1.2d, v2.2d}, [x0]
+         ld1 {v0.8b, v1.8b, v2.8b}, [x0]
+         ld1 {v15.4h, v16.4h, v17.4h}, [x15]
+         ld1 {v31.2s, v0.2s, v1.2s}, [sp]
+         ld1 {v0.1d, v1.1d, v2.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
+
+         ld1 {v0.16b-v2.16b}, [x0]
+         ld1 {v15.8h-v17.8h}, [x15]
+         ld1 {v31.4s-v1.4s}, [sp]
+         ld1 {v0.2d-v2.2d}, [x0]
+         ld1 {v0.8b-v2.8b}, [x0]
+         ld1 {v15.4h-v17.4h}, [x15]
+         ld1 {v31.2s-v1.2s}, [sp]
+         ld1 {v0.1d-v2.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures to four consecutive registers
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+         ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
+
+         ld1 {v0.16b-v3.16b}, [x0]
+         ld1 {v15.8h-v18.8h}, [x15]
+         ld1 {v31.4s-v2.4s}, [sp]
+         ld1 {v0.2d-v3.2d}, [x0]
+         ld1 {v0.8b-v3.8b}, [x0]
+         ld1 {v15.4h-v18.4h}, [x15]
+         ld1 {v31.2s-v2.2s}, [sp]
+         ld1 {v0.1d-v3.1d}, [x0]
+// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
+// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
+// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
+// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
+// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
+// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
+// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
+// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 4-element structures to two consecutive registers
+//------------------------------------------------------------------------------
+         ld2 {v0.16b, v1.16b}, [x0]
+         ld2 {v15.8h, v16.8h}, [x15]
+         ld2 {v31.4s, v0.4s}, [sp]
+         ld2 {v0.2d, v1.2d}, [x0]
+         ld2 {v0.8b, v1.8b}, [x0]
+         ld2 {v15.4h, v16.4h}, [x15]
+         ld2 {v31.2s, v0.2s}, [sp]
+// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
+// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
+// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
+// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
+// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
+// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
+// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
+
+         ld2 {v0.16b-v1.16b}, [x0]
+         ld2 {v15.8h-v16.8h}, [x15]
+         ld2 {v31.4s-v0.4s}, [sp]
+         ld2 {v0.2d-v1.2d}, [x0]
+         ld2 {v0.8b-v1.8b}, [x0]
+         ld2 {v15.4h-v16.4h}, [x15]
+         ld2 {v31.2s-v0.2s}, [sp]
+// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
+// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
+// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
+// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
+// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
+// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
+// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 3-element structures to three consecutive registers
+//------------------------------------------------------------------------------
+         ld3 {v0.16b, v1.16b, v2.16b}, [x0]
+         ld3 {v15.8h, v16.8h, v17.8h}, [x15]
+         ld3 {v31.4s, v0.4s, v1.4s}, [sp]
+         ld3 {v0.2d, v1.2d, v2.2d}, [x0]
+         ld3 {v0.8b, v1.8b, v2.8b}, [x0]
+         ld3 {v15.4h, v16.4h, v17.4h}, [x15]
+         ld3 {v31.2s, v0.2s, v1.2s}, [sp]
+// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
+// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
+// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
+// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
+// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
+// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
+// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
+
+         ld3 {v0.16b-v2.16b}, [x0]
+         ld3 {v15.8h-v17.8h}, [x15]
+         ld3 {v31.4s-v1.4s}, [sp]
+         ld3 {v0.2d-v2.2d}, [x0]
+         ld3 {v0.8b-v2.8b}, [x0]
+         ld3 {v15.4h-v17.4h}, [x15]
+         ld3 {v31.2s-v1.2s}, [sp]
+// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
+// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
+// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
+// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
+// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
+// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
+// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 4-element structures to four consecutive registers
+//------------------------------------------------------------------------------
+         ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
+// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
+// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
+// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
+// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
+// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
+// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
+
+         ld4 {v0.16b-v3.16b}, [x0]
+         ld4 {v15.8h-v18.8h}, [x15]
+         ld4 {v31.4s-v2.4s}, [sp]
+         ld4 {v0.2d-v3.2d}, [x0]
+         ld4 {v0.8b-v3.8b}, [x0]
+         ld4 {v15.4h-v18.4h}, [x15]
+         ld4 {v31.2s-v2.2s}, [sp]
+// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
+// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
+// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
+// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
+// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
+// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
+// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
diff --git a/test/MC/AArch64/neon-simd-ldst-one-elem.s b/test/MC/AArch64/neon-simd-ldst-one-elem.s
new file mode 100644
index 000000000000..140d7525fee6
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-ldst-one-elem.s
@@ -0,0 +1,325 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Load single 1-element structure to all lanes of 1 register
+//------------------------------------------------------------------------------
+         ld1r {v0.16b}, [x0]
+         ld1r {v15.8h}, [x15]
+         ld1r {v31.4s}, [sp]
+         ld1r {v0.2d}, [x0]
+         ld1r {v0.8b}, [x0]
+         ld1r {v15.4h}, [x15]
+         ld1r {v31.2s}, [sp]
+         ld1r {v0.1d}, [x0]
+// CHECK: ld1r {v0.16b}, [x0]          // encoding: [0x00,0xc0,0x40,0x4d]
+// CHECK: ld1r {v15.8h}, [x15]         // encoding: [0xef,0xc5,0x40,0x4d]
+// CHECK: ld1r {v31.4s}, [sp]          // encoding: [0xff,0xcb,0x40,0x4d]
+// CHECK: ld1r {v0.2d}, [x0]           // encoding: [0x00,0xcc,0x40,0x4d]
+// CHECK: ld1r {v0.8b}, [x0]           // encoding: [0x00,0xc0,0x40,0x0d]
+// CHECK: ld1r {v15.4h}, [x15]         // encoding: [0xef,0xc5,0x40,0x0d]
+// CHECK: ld1r {v31.2s}, [sp]          // encoding: [0xff,0xcb,0x40,0x0d]
+// CHECK: ld1r {v0.1d}, [x0]           // encoding: [0x00,0xcc,0x40,0x0d]
+
+//------------------------------------------------------------------------------
+// Load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2r {v0.16b, v1.16b}, [x0]
+         ld2r {v15.8h, v16.8h}, [x15]
+         ld2r {v31.4s, v0.4s}, [sp]
+         ld2r {v0.2d, v1.2d}, [x0]
+         ld2r {v0.8b, v1.8b}, [x0]
+         ld2r {v15.4h, v16.4h}, [x15]
+         ld2r {v31.2s, v0.2s}, [sp]
+         ld2r {v31.1d, v0.1d}, [sp]
+// CHECK: ld2r {v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xc0,0x60,0x4d]
+// CHECK: ld2r {v15.8h, v16.8h}, [x15] // encoding: [0xef,0xc5,0x60,0x4d]
+// CHECK: ld2r {v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xcb,0x60,0x4d]
+// CHECK: ld2r {v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xcc,0x60,0x4d]
+// CHECK: ld2r {v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xc0,0x60,0x0d]
+// CHECK: ld2r {v15.4h, v16.4h}, [x15] // encoding: [0xef,0xc5,0x60,0x0d]
+// CHECK: ld2r {v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xcb,0x60,0x0d]
+// CHECK: ld2r {v31.1d, v0.1d}, [sp]   // encoding: [0xff,0xcf,0x60,0x0d]
+
+         ld3r {v0.16b, v1.16b, v2.16b}, [x0]
+         ld3r {v15.8h, v16.8h, v17.8h}, [x15]
+         ld3r {v31.4s, v0.4s, v1.4s}, [sp]
+         ld3r {v0.2d, v1.2d, v2.2d}, [x0]
+         ld3r {v0.8b, v1.8b, v2.8b}, [x0]
+         ld3r {v15.4h, v16.4h, v17.4h}, [x15]
+         ld3r {v31.2s, v0.2s, v1.2s}, [sp]
+         ld3r {v31.1d, v0.1d, v1.1d}, [sp]
+// CHECK: ld3r {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0xe0,0x40,0x4d]
+// CHECK: ld3r {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0xe5,0x40,0x4d]
+// CHECK: ld3r {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0xeb,0x40,0x4d]
+// CHECK: ld3r {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0xec,0x40,0x4d]
+// CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0xe0,0x40,0x0d]
+// CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0xe5,0x40,0x0d]
+// CHECK: ld3r {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0xeb,0x40,0x0d]
+// CHECK: ld3r {v31.1d, v0.1d, v1.1d}, [sp] // encoding: [0xff,0xef,0x40,0x0d]
+
+         ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
+         ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
+         ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
+         ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+         ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
+         ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
+         ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp]
+// CHECK: ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0xe0,0x60,0x4d]
+// CHECK: ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0xe5,0x60,0x4d]
+// CHECK: ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0xeb,0x60,0x4d]
+// CHECK: ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0xec,0x60,0x4d]
+// CHECK: ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0xe0,0x60,0x0d]
+// CHECK: ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0xe5,0x60,0x0d]
+// CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0xeb,0x60,0x0d]
+// CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp] // encoding: [0xff,0xef,0x60,0x0d]
+
+//------------------------------------------------------------------------------
+// Load single 1-element structure to one lane of 1 register.
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[9], [x0]
+         ld1 {v15.h}[7], [x15]
+         ld1 {v31.s}[3], [sp]
+         ld1 {v0.d}[1], [x0]
+// CHECK: ld1 {v0.b}[9], [x0]         // encoding: [0x00,0x04,0x40,0x4d]
+// CHECK: ld1 {v15.h}[7], [x15]       // encoding: [0xef,0x59,0x40,0x4d]
+// CHECK: ld1 {v31.s}[3], [sp]        // encoding: [0xff,0x93,0x40,0x4d]
+// CHECK: ld1 {v0.d}[1], [x0]         // encoding: [0x00,0x84,0x40,0x4d]
+
+//------------------------------------------------------------------------------
+// Load single N-element structure to one lane of N consecutive registers
+// (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2 {v0.b, v1.b}[9], [x0]
+         ld2 {v15.h, v16.h}[7], [x15]
+         ld2 {v31.s, v0.s}[3], [sp]
+         ld2 {v0.d, v1.d}[1], [x0]
+// CHECK: ld2 {v0.b, v1.b}[9], [x0]   // encoding: [0x00,0x04,0x60,0x4d]
+// CHECK: ld2 {v15.h, v16.h}[7], [x15] // encoding: [0xef,0x59,0x60,0x4d]
+// CHECK: ld2 {v31.s, v0.s}[3], [sp]  // encoding: [0xff,0x93,0x60,0x4d]
+// CHECK: ld2 {v0.d, v1.d}[1], [x0]   // encoding: [0x00,0x84,0x60,0x4d]
+
+         ld3 {v0.b, v1.b, v2.b}[9], [x0]
+         ld3 {v15.h, v16.h, v17.h}[7], [x15]
+         ld3 {v31.s, v0.s, v1.s}[3], [sp]
+         ld3 {v0.d, v1.d, v2.d}[1], [x0]
+// CHECK: ld3 {v0.b, v1.b, v2.b}[9], [x0] // encoding: [0x00,0x24,0x40,0x4d]
+// CHECK: ld3 {v15.h, v16.h, v17.h}[7], [x15] // encoding: [0xef,0x79,0x40,0x4d]
+// CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp] // encoding: [0xff,0xb3,0x40,0x4d]
+// CHECK: ld3 {v0.d, v1.d, v2.d}[1], [x0] // encoding: [0x00,0xa4,0x40,0x4d]
+
+         ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
+         ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15]
+         ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp]
+         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
+// CHECK: ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0] // encoding: [0x00,0x24,0x60,0x4d]
+// CHECK: ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15] // encoding: [0xef,0x79,0x60,0x4d]
+// CHECK: ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp] // encoding: [0xff,0xb3,0x60,0x4d]
+// CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0] // encoding: [0x00,0xa4,0x60,0x4d]
+
+//------------------------------------------------------------------------------
+// Store single 1-element structure from one lane of 1 register.
+//------------------------------------------------------------------------------
+         st1 {v0.b}[9], [x0]
+         st1 {v15.h}[7], [x15]
+         st1 {v31.s}[3], [sp]
+         st1 {v0.d}[1], [x0]
+// CHECK: st1 {v0.b}[9], [x0]         // encoding: [0x00,0x04,0x00,0x4d]
+// CHECK: st1 {v15.h}[7], [x15]       // encoding: [0xef,0x59,0x00,0x4d]
+// CHECK: st1 {v31.s}[3], [sp]        // encoding: [0xff,0x93,0x00,0x4d]
+// CHECK: st1 {v0.d}[1], [x0]         // encoding: [0x00,0x84,0x00,0x4d]
+
+//------------------------------------------------------------------------------
+// Store single N-element structure from one lane of N consecutive registers
+// (N = 2,3,4)
+//------------------------------------------------------------------------------
+         st2 {v0.b, v1.b}[9], [x0]
+         st2 {v15.h, v16.h}[7], [x15]
+         st2 {v31.s, v0.s}[3], [sp]
+         st2 {v0.d, v1.d}[1], [x0]
+// CHECK: st2 {v0.b, v1.b}[9], [x0]   // encoding: [0x00,0x04,0x20,0x4d]
+// CHECK: st2 {v15.h, v16.h}[7], [x15] // encoding: [0xef,0x59,0x20,0x4d]
+// CHECK: st2 {v31.s, v0.s}[3], [sp]  // encoding: [0xff,0x93,0x20,0x4d]
+// CHECK: st2 {v0.d, v1.d}[1], [x0]   // encoding: [0x00,0x84,0x20,0x4d]
+
+         st3 {v0.b, v1.b, v2.b}[9], [x0]
+         st3 {v15.h, v16.h, v17.h}[7], [x15]
+         st3 {v31.s, v0.s, v1.s}[3], [sp]
+         st3 {v0.d, v1.d, v2.d}[1], [x0]
+// CHECK: st3 {v0.b, v1.b, v2.b}[9], [x0] // encoding: [0x00,0x24,0x00,0x4d]
+// CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15] // encoding: [0xef,0x79,0x00,0x4d]
+// CHECK: st3 {v31.s, v0.s, v1.s}[3], [sp] // encoding: [0xff,0xb3,0x00,0x4d]
+// CHECK: st3 {v0.d, v1.d, v2.d}[1], [x0] // encoding: [0x00,0xa4,0x00,0x4d]
+
+         st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
+         st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15]
+         st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp]
+         st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
+// CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0] // encoding: [0x00,0x24,0x20,0x4d]
+// CHECK: st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15] // encoding: [0xef,0x79,0x20,0x4d]
+// CHECK: st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp] // encoding: [0xff,0xb3,0x20,0x4d]
+// CHECK: st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0] // encoding: [0x00,0xa4,0x20,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index oad single 1-element structure to all lanes of 1 register
+//------------------------------------------------------------------------------
+         ld1r {v0.16b}, [x0], #1
+         ld1r {v15.8h}, [x15], #2
+         ld1r {v31.4s}, [sp], #4
+         ld1r {v0.2d}, [x0], #8
+         ld1r {v0.8b}, [x0], x0
+         ld1r {v15.4h}, [x15], x1
+         ld1r {v31.2s}, [sp], x2
+         ld1r {v0.1d}, [x0], x3
+// CHECK: ld1r {v0.16b}, [x0], #1      // encoding: [0x00,0xc0,0xdf,0x4d]
+// CHECK: ld1r {v15.8h}, [x15], #2     // encoding: [0xef,0xc5,0xdf,0x4d]
+// CHECK: ld1r {v31.4s}, [sp], #4      // encoding: [0xff,0xcb,0xdf,0x4d]
+// CHECK: ld1r {v0.2d}, [x0], #8       // encoding: [0x00,0xcc,0xdf,0x4d]
+// CHECK: ld1r {v0.8b}, [x0], x0       // encoding: [0x00,0xc0,0xc0,0x0d]
+// CHECK: ld1r {v15.4h}, [x15], x1     // encoding: [0xef,0xc5,0xc1,0x0d]
+// CHECK: ld1r {v31.2s}, [sp], x2      // encoding: [0xff,0xcb,0xc2,0x0d]
+// CHECK: ld1r {v0.1d}, [x0], x3       // encoding: [0x00,0xcc,0xc3,0x0d]
+
+//------------------------------------------------------------------------------
+// Post-index load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2r {v0.16b, v1.16b}, [x0], #2
+         ld2r {v15.8h, v16.8h}, [x15], #4
+         ld2r {v31.4s, v0.4s}, [sp], #8
+         ld2r {v0.2d, v1.2d}, [x0], #16
+         ld2r {v0.8b, v1.8b}, [x0], x6
+         ld2r {v15.4h, v16.4h}, [x15], x7
+         ld2r {v31.2s, v0.2s}, [sp], x9
+         ld2r {v31.1d, v0.1d}, [x0], x5
+// CHECK: ld2r {v0.16b, v1.16b}, [x0], #2 // encoding: [0x00,0xc0,0xff,0x4d]
+// CHECK: ld2r {v15.8h, v16.8h}, [x15], #4 // encoding: [0xef,0xc5,0xff,0x4d]
+// CHECK: ld2r {v31.4s, v0.4s}, [sp], #8 // encoding: [0xff,0xcb,0xff,0x4d]
+// CHECK: ld2r {v0.2d, v1.2d}, [x0], #16 // encoding: [0x00,0xcc,0xff,0x4d]
+// CHECK: ld2r {v0.8b, v1.8b}, [x0], x6 // encoding: [0x00,0xc0,0xe6,0x0d]
+// CHECK: ld2r {v15.4h, v16.4h}, [x15], x7 // encoding: [0xef,0xc5,0xe7,0x0d]
+// CHECK: ld2r {v31.2s, v0.2s}, [sp], x9 // encoding: [0xff,0xcb,0xe9,0x0d]
+// CHECK: ld2r {v31.1d, v0.1d}, [x0], x5 // encoding: [0x1f,0xcc,0xe5,0x0d]
+
+         ld3r {v0.16b, v1.16b, v2.16b}, [x0], x9
+         ld3r {v15.8h, v16.8h, v17.8h}, [x15], x6
+         ld3r {v31.4s, v0.4s, v1.4s}, [sp], x7
+         ld3r {v0.2d, v1.2d, v2.2d}, [x0], x5
+         ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3
+         ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6
+         ld3r {v31.2s, v0.2s, v1.2s}, [sp], #12
+         ld3r {v31.1d, v0.1d, v1.1d}, [sp], #24
+// CHECK: ld3r {v0.16b, v1.16b, v2.16b}, [x0], x9 // encoding: [0x00,0xe0,0xc9,0x4d]
+// CHECK: ld3r {v15.8h, v16.8h, v17.8h}, [x15], x6 // encoding: [0xef,0xe5,0xc6,0x4d]
+// CHECK: ld3r {v31.4s, v0.4s, v1.4s}, [sp], x7 // encoding: [0xff,0xeb,0xc7,0x4d]
+// CHECK: ld3r {v0.2d, v1.2d, v2.2d}, [x0], x5 // encoding: [0x00,0xec,0xc5,0x4d]
+// CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3 // encoding: [0x00,0xe0,0xdf,0x0d]
+// CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6 // encoding: [0xef,0xe5,0xdf,0x0d]
+// CHECK: ld3r {v31.2s, v0.2s, v1.2s}, [sp], #12 // encoding: [0xff,0xeb,0xdf,0x0d]
+// CHECK: ld3r {v31.1d, v0.1d, v1.1d}, [sp], #24 // encoding: [0xff,0xef,0xdf,0x0d]
+
+         ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #4
+         ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], #8
+         ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #16
+         ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #32
+         ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x5
+         ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x9
+         ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30
+         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7
+// CHECK: ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #4 // encoding: [0x00,0xe0,0xff,0x4d]
+// CHECK: ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], #8 // encoding: [0xef,0xe5,0xff,0x4d]
+// CHECK: ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #16 // encoding: [0xff,0xeb,0xff,0x4d]
+// CHECK: ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #32 // encoding: [0x00,0xec,0xff,0x4d]
+// CHECK: ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x5 // encoding: [0x00,0xe0,0xe5,0x0d]
+// CHECK: ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x9 // encoding: [0xef,0xe5,0xe9,0x0d]
+// CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30 // encoding: [0xff,0xeb,0xfe,0x0d]
+// CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7 // encoding: [0xff,0xef,0xe7,0x0d]
+
+//------------------------------------------------------------------------------
+// Post-index load single 1-element structure to one lane of 1 register.
+//------------------------------------------------------------------------------
+         ld1 {v0.b}[9], [x0], #1
+         ld1 {v15.h}[7], [x15], x9
+         ld1 {v31.s}[3], [sp], x6
+         ld1 {v0.d}[1], [x0], #8
+// CHECK: ld1 {v0.b}[9], [x0], #1     // encoding: [0x00,0x04,0xdf,0x4d]
+// CHECK: ld1 {v15.h}[7], [x15], x9   // encoding: [0xef,0x59,0xc9,0x4d]
+// CHECK: ld1 {v31.s}[3], [sp], x6    // encoding: [0xff,0x93,0xc6,0x4d]
+// CHECK: ld1 {v0.d}[1], [x0], #8     // encoding: [0x00,0x84,0xdf,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index load single N-element structure to one lane of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         ld2 {v0.b, v1.b}[9], [x0], x3
+         ld2 {v15.h, v16.h}[7], [x15], #4
+         ld2 {v31.s, v0.s}[3], [sp], #8
+         ld2 {v0.d, v1.d}[1], [x0], x0
+// CHECK: ld2 {v0.b, v1.b}[9], [x0], x3 // encoding: [0x00,0x04,0xe3,0x4d]
+// CHECK: ld2 {v15.h, v16.h}[7], [x15], #4 // encoding: [0xef,0x59,0xff,0x4d]
+// CHECK: ld2 {v31.s, v0.s}[3], [sp], #8 // encoding: [0xff,0x93,0xff,0x4d]
+// CHECK: ld2 {v0.d, v1.d}[1], [x0], x0 // encoding: [0x00,0x84,0xe0,0x4d]
+
+         ld3 {v0.b, v1.b, v2.b}[9], [x0], #3
+         ld3 {v15.h, v16.h, v17.h}[7], [x15], #6
+         ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
+         ld3 {v0.d, v1.d, v2.d}[1], [x0], x6
+// CHECK: ld3 {v0.b, v1.b, v2.b}[9], [x0], #3 // encoding: [0x00,0x24,0xdf,0x4d]
+// CHECK: ld3 {v15.h, v16.h, v17.h}[7], [x15], #6 // encoding: [0xef,0x79,0xdf,0x4d]
+// CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3 // encoding: [0xff,0xb3,0xc3,0x4d]
+// CHECK: ld3 {v0.d, v1.d, v2.d}[1], [x0], x6 // encoding: [0x00,0xa4,0xc6,0x4d]
+
+         ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
+         ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7
+         ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16
+         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
+// CHECK: ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5 // encoding: [0x00,0x24,0xe5,0x4d]
+// CHECK: ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7 // encoding: [0xef,0x79,0xe7,0x4d]
+// CHECK: ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16 // encoding: [0xff,0xb3,0xff,0x4d]
+// CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32 // encoding: [0x00,0xa4,0xff,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index store single 1-element structure from one lane of 1 register.
+//------------------------------------------------------------------------------
+         st1 {v0.b}[9], [x0], #1
+         st1 {v15.h}[7], [x15], x9
+         st1 {v31.s}[3], [sp], x6
+         st1 {v0.d}[1], [x0], #8
+// CHECK: st1 {v0.b}[9], [x0], #1     // encoding: [0x00,0x04,0x9f,0x4d]
+// CHECK: st1 {v15.h}[7], [x15], x9   // encoding: [0xef,0x59,0x89,0x4d]
+// CHECK: st1 {v31.s}[3], [sp], x6    // encoding: [0xff,0x93,0x86,0x4d]
+// CHECK: st1 {v0.d}[1], [x0], #8     // encoding: [0x00,0x84,0x9f,0x4d]
+
+//------------------------------------------------------------------------------
+// Post-index store single N-element structure from one lane of N consecutive
+// registers (N = 2,3,4)
+//------------------------------------------------------------------------------
+         st2 {v0.b, v1.b}[9], [x0], x3
+         st2 {v15.h, v16.h}[7], [x15], #4
+         st2 {v31.s, v0.s}[3], [sp], #8
+         st2 {v0.d, v1.d}[1], [x0], x0
+// CHECK: st2 {v0.b, v1.b}[9], [x0], x3 // encoding: [0x00,0x04,0xa3,0x4d]
+// CHECK: st2 {v15.h, v16.h}[7], [x15], #4 // encoding: [0xef,0x59,0xbf,0x4d]
+// CHECK: st2 {v31.s, v0.s}[3], [sp], #8 // encoding: [0xff,0x93,0xbf,0x4d]
+// CHECK: st2 {v0.d, v1.d}[1], [x0], x0 // encoding: [0x00,0x84,0xa0,0x4d]
+
+         st3 {v0.b, v1.b, v2.b}[9], [x0], #3
+         st3 {v15.h, v16.h, v17.h}[7], [x15], #6
+         st3 {v31.s, v0.s, v1.s}[3], [sp], x3
+         st3 {v0.d, v1.d, v2.d}[1], [x0], x6
+// CHECK: st3 {v0.b, v1.b, v2.b}[9], [x0], #3 // encoding: [0x00,0x24,0x9f,0x4d]
+// CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6 // encoding: [0xef,0x79,0x9f,0x4d]
+// CHECK: st3 {v31.s, v0.s, v1.s}[3], [sp], x3 // encoding: [0xff,0xb3,0x83,0x4d]
+// CHECK: st3 {v0.d, v1.d, v2.d}[1], [x0], x6 // encoding: [0x00,0xa4,0x86,0x4d]
+
+         st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
+         st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7
+         st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16
+         st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
+// CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5 // encoding: [0x00,0x24,0xa5,0x4d]
+// CHECK: st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7 // encoding: [0xef,0x79,0xa7,0x4d]
+// CHECK: st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16 // encoding: [0xff,0xb3,0xbf,0x4d]
+// CHECK: st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32 // encoding: [0x00,0xa4,0xbf,0x4d]
diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s
new file mode 100644
index 000000000000..9e0f9c5b4d95
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-misc.s
@@ -0,0 +1,646 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+
+//------------------------------------------------------------------------------
+// Element reverse
+//------------------------------------------------------------------------------
+         rev64 v0.16b, v31.16b
+         rev64 v2.8h, v4.8h
+         rev64 v6.4s, v8.4s
+         rev64 v1.8b, v9.8b
+         rev64 v13.4h, v21.4h
+         rev64 v4.2s, v0.2s
+
+// CHECK:	rev64	v0.16b, v31.16b         // encoding: [0xe0,0x0b,0x20,0x4e]
+// CHECK:	rev64	v2.8h, v4.8h            // encoding: [0x82,0x08,0x60,0x4e]
+// CHECK:	rev64	v6.4s, v8.4s            // encoding: [0x06,0x09,0xa0,0x4e]
+// CHECK:	rev64	v1.8b, v9.8b            // encoding: [0x21,0x09,0x20,0x0e]
+// CHECK:	rev64	v13.4h, v21.4h          // encoding: [0xad,0x0a,0x60,0x0e]
+// CHECK:	rev64	v4.2s, v0.2s            // encoding: [0x04,0x08,0xa0,0x0e]
+
+         rev32 v30.16b, v31.16b
+         rev32 v4.8h, v7.8h
+         rev32 v21.8b, v1.8b
+         rev32 v0.4h, v9.4h
+
+// CHECK:	rev32	v30.16b, v31.16b        // encoding: [0xfe,0x0b,0x20,0x6e]
+// CHECK:	rev32	v4.8h, v7.8h            // encoding: [0xe4,0x08,0x60,0x6e]
+// CHECK:	rev32	v21.8b, v1.8b           // encoding: [0x35,0x08,0x20,0x2e]
+// CHECK:	rev32	v0.4h, v9.4h            // encoding: [0x20,0x09,0x60,0x2e]
+
+         rev16 v30.16b, v31.16b
+         rev16 v21.8b, v1.8b
+
+// CHECK:	rev16	v30.16b, v31.16b        // encoding: [0xfe,0x1b,0x20,0x4e]
+// CHECK:	rev16	v21.8b, v1.8b           // encoding: [0x35,0x18,0x20,0x0e]
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add long
+//------------------------------------------------------------------------------
+
+         saddlp v3.8h, v21.16b
+         saddlp v8.4h, v5.8b
+         saddlp v9.4s, v1.8h
+         saddlp v0.2s, v1.4h
+         saddlp v12.2d, v4.4s
+         saddlp v17.1d, v28.2s
+
+// CHECK:	saddlp	v3.8h, v21.16b          // encoding: [0xa3,0x2a,0x20,0x4e]
+// CHECK:	saddlp	v8.4h, v5.8b            // encoding: [0xa8,0x28,0x20,0x0e]
+// CHECK:	saddlp	v9.4s, v1.8h            // encoding: [0x29,0x28,0x60,0x4e]
+// CHECK:	saddlp	v0.2s, v1.4h            // encoding: [0x20,0x28,0x60,0x0e]
+// CHECK:	saddlp	v12.2d, v4.4s           // encoding: [0x8c,0x28,0xa0,0x4e]
+// CHECK:	saddlp	v17.1d, v28.2s          // encoding: [0x91,0x2b,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add long
+//------------------------------------------------------------------------------
+
+         uaddlp v3.8h, v21.16b
+         uaddlp v8.4h, v5.8b
+         uaddlp v9.4s, v1.8h
+         uaddlp v0.2s, v1.4h
+         uaddlp v12.2d, v4.4s
+         uaddlp v17.1d, v28.2s
+
+// CHECK:	uaddlp	v3.8h, v21.16b          // encoding: [0xa3,0x2a,0x20,0x6e]
+// CHECK:	uaddlp	v8.4h, v5.8b            // encoding: [0xa8,0x28,0x20,0x2e]
+// CHECK:	uaddlp	v9.4s, v1.8h            // encoding: [0x29,0x28,0x60,0x6e]
+// CHECK:	uaddlp	v0.2s, v1.4h            // encoding: [0x20,0x28,0x60,0x2e]
+// CHECK:	uaddlp	v12.2d, v4.4s           // encoding: [0x8c,0x28,0xa0,0x6e]
+// CHECK:	uaddlp	v17.1d, v28.2s          // encoding: [0x91,0x2b,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Signed integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         sadalp v3.8h, v21.16b
+         sadalp v8.4h, v5.8b
+         sadalp v9.4s, v1.8h
+         sadalp v0.2s, v1.4h
+         sadalp v12.2d, v4.4s
+         sadalp v17.1d, v28.2s
+
+// CHECK:	sadalp	v3.8h, v21.16b          // encoding: [0xa3,0x6a,0x20,0x4e]
+// CHECK:	sadalp	v8.4h, v5.8b            // encoding: [0xa8,0x68,0x20,0x0e]
+// CHECK:	sadalp	v9.4s, v1.8h            // encoding: [0x29,0x68,0x60,0x4e]
+// CHECK:	sadalp	v0.2s, v1.4h            // encoding: [0x20,0x68,0x60,0x0e]
+// CHECK:	sadalp	v12.2d, v4.4s           // encoding: [0x8c,0x68,0xa0,0x4e]
+// CHECK:	sadalp	v17.1d, v28.2s          // encoding: [0x91,0x6b,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer pairwise add and accumulate long
+//------------------------------------------------------------------------------
+
+         uadalp v3.8h, v21.16b
+         uadalp v8.4h, v5.8b
+         uadalp v9.4s, v1.8h
+         uadalp v0.2s, v1.4h
+         uadalp v12.2d, v4.4s
+         uadalp v17.1d, v28.2s
+
+// CHECK:	uadalp	v3.8h, v21.16b          // encoding: [0xa3,0x6a,0x20,0x6e]
+// CHECK:	uadalp	v8.4h, v5.8b            // encoding: [0xa8,0x68,0x20,0x2e]
+// CHECK:	uadalp	v9.4s, v1.8h            // encoding: [0x29,0x68,0x60,0x6e]
+// CHECK:	uadalp	v0.2s, v1.4h            // encoding: [0x20,0x68,0x60,0x2e]
+// CHECK:	uadalp	v12.2d, v4.4s           // encoding: [0x8c,0x68,0xa0,0x6e]
+// CHECK:	uadalp	v17.1d, v28.2s          // encoding: [0x91,0x6b,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating accumulate of unsigned value
+//------------------------------------------------------------------------------
+
+         suqadd v0.16b, v31.16b
+         suqadd v2.8h, v4.8h
+         suqadd v6.4s, v8.4s
+         suqadd v6.2d, v8.2d
+         suqadd v1.8b, v9.8b
+         suqadd v13.4h, v21.4h
+         suqadd v4.2s, v0.2s
+
+// CHECK:	suqadd	v0.16b, v31.16b         // encoding: [0xe0,0x3b,0x20,0x4e]
+// CHECK:	suqadd	v2.8h, v4.8h            // encoding: [0x82,0x38,0x60,0x4e]
+// CHECK:	suqadd	v6.4s, v8.4s            // encoding: [0x06,0x39,0xa0,0x4e]
+// CHECK:	suqadd	v6.2d, v8.2d            // encoding: [0x06,0x39,0xe0,0x4e]
+// CHECK:	suqadd	v1.8b, v9.8b            // encoding: [0x21,0x39,0x20,0x0e]
+// CHECK:	suqadd	v13.4h, v21.4h          // encoding: [0xad,0x3a,0x60,0x0e]
+// CHECK:	suqadd	v4.2s, v0.2s            // encoding: [0x04,0x38,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating accumulate of signed value
+//------------------------------------------------------------------------------
+
+         usqadd v0.16b, v31.16b
+         usqadd v2.8h, v4.8h
+         usqadd v6.4s, v8.4s
+         usqadd v6.2d, v8.2d
+         usqadd v1.8b, v9.8b
+         usqadd v13.4h, v21.4h
+         usqadd v4.2s, v0.2s
+
+// CHECK:	usqadd	v0.16b, v31.16b         // encoding: [0xe0,0x3b,0x20,0x6e]
+// CHECK:	usqadd	v2.8h, v4.8h            // encoding: [0x82,0x38,0x60,0x6e]
+// CHECK:	usqadd	v6.4s, v8.4s            // encoding: [0x06,0x39,0xa0,0x6e]
+// CHECK:	usqadd	v6.2d, v8.2d            // encoding: [0x06,0x39,0xe0,0x6e]
+// CHECK:	usqadd	v1.8b, v9.8b            // encoding: [0x21,0x39,0x20,0x2e]
+// CHECK:	usqadd	v13.4h, v21.4h          // encoding: [0xad,0x3a,0x60,0x2e]
+// CHECK:	usqadd	v4.2s, v0.2s            // encoding: [0x04,0x38,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer saturating absolute
+//------------------------------------------------------------------------------
+
+         sqabs v0.16b, v31.16b
+         sqabs v2.8h, v4.8h
+         sqabs v6.4s, v8.4s
+         sqabs v6.2d, v8.2d
+         sqabs v1.8b, v9.8b
+         sqabs v13.4h, v21.4h
+         sqabs v4.2s, v0.2s
+
+// CHECK:	sqabs	v0.16b, v31.16b         // encoding: [0xe0,0x7b,0x20,0x4e]
+// CHECK:	sqabs	v2.8h, v4.8h            // encoding: [0x82,0x78,0x60,0x4e]
+// CHECK:	sqabs	v6.4s, v8.4s            // encoding: [0x06,0x79,0xa0,0x4e]
+// CHECK:	sqabs	v6.2d, v8.2d            // encoding: [0x06,0x79,0xe0,0x4e]
+// CHECK:	sqabs	v1.8b, v9.8b            // encoding: [0x21,0x79,0x20,0x0e]
+// CHECK:	sqabs	v13.4h, v21.4h          // encoding: [0xad,0x7a,0x60,0x0e]
+// CHECK:	sqabs	v4.2s, v0.2s            // encoding: [0x04,0x78,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating negate
+//------------------------------------------------------------------------------
+
+         sqneg v0.16b, v31.16b
+         sqneg v2.8h, v4.8h
+         sqneg v6.4s, v8.4s
+         sqneg v6.2d, v8.2d
+         sqneg v1.8b, v9.8b
+         sqneg v13.4h, v21.4h
+         sqneg v4.2s, v0.2s
+
+// CHECK:	sqneg	v0.16b, v31.16b         // encoding: [0xe0,0x7b,0x20,0x6e]
+// CHECK:	sqneg	v2.8h, v4.8h            // encoding: [0x82,0x78,0x60,0x6e]
+// CHECK:	sqneg	v6.4s, v8.4s            // encoding: [0x06,0x79,0xa0,0x6e]
+// CHECK:	sqneg	v6.2d, v8.2d            // encoding: [0x06,0x79,0xe0,0x6e]
+// CHECK:	sqneg	v1.8b, v9.8b            // encoding: [0x21,0x79,0x20,0x2e]
+// CHECK:	sqneg	v13.4h, v21.4h          // encoding: [0xad,0x7a,0x60,0x2e]
+// CHECK:	sqneg	v4.2s, v0.2s            // encoding: [0x04,0x78,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer absolute
+//------------------------------------------------------------------------------
+
+         abs v0.16b, v31.16b
+         abs v2.8h, v4.8h
+         abs v6.4s, v8.4s
+         abs v6.2d, v8.2d
+         abs v1.8b, v9.8b
+         abs v13.4h, v21.4h
+         abs v4.2s, v0.2s
+
+// CHECK:	abs	v0.16b, v31.16b         // encoding: [0xe0,0xbb,0x20,0x4e]
+// CHECK:	abs	v2.8h, v4.8h            // encoding: [0x82,0xb8,0x60,0x4e]
+// CHECK:	abs	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa0,0x4e]
+// CHECK:	abs	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe0,0x4e]
+// CHECK:	abs	v1.8b, v9.8b            // encoding: [0x21,0xb9,0x20,0x0e]
+// CHECK:	abs	v13.4h, v21.4h          // encoding: [0xad,0xba,0x60,0x0e]
+// CHECK:	abs	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Integer negate
+//------------------------------------------------------------------------------
+
+         neg v0.16b, v31.16b
+         neg v2.8h, v4.8h
+         neg v6.4s, v8.4s
+         neg v6.2d, v8.2d
+         neg v1.8b, v9.8b
+         neg v13.4h, v21.4h
+         neg v4.2s, v0.2s
+
+// CHECK:	neg	v0.16b, v31.16b         // encoding: [0xe0,0xbb,0x20,0x6e]
+// CHECK:	neg	v2.8h, v4.8h            // encoding: [0x82,0xb8,0x60,0x6e]
+// CHECK:	neg	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa0,0x6e]
+// CHECK:	neg	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe0,0x6e]
+// CHECK:	neg	v1.8b, v9.8b            // encoding: [0x21,0xb9,0x20,0x2e]
+// CHECK:	neg	v13.4h, v21.4h          // encoding: [0xad,0xba,0x60,0x2e]
+// CHECK:	neg	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer count leading sign bits
+//------------------------------------------------------------------------------
+
+         cls v0.16b, v31.16b
+         cls v2.8h, v4.8h
+         cls v6.4s, v8.4s
+         cls v1.8b, v9.8b
+         cls v13.4h, v21.4h
+         cls v4.2s, v0.2s
+
+// CHECK:	cls	v0.16b, v31.16b         // encoding: [0xe0,0x4b,0x20,0x4e]
+// CHECK:	cls	v2.8h, v4.8h            // encoding: [0x82,0x48,0x60,0x4e]
+// CHECK:	cls	v6.4s, v8.4s            // encoding: [0x06,0x49,0xa0,0x4e]
+// CHECK:	cls	v1.8b, v9.8b            // encoding: [0x21,0x49,0x20,0x0e]
+// CHECK:	cls	v13.4h, v21.4h          // encoding: [0xad,0x4a,0x60,0x0e]
+// CHECK:	cls	v4.2s, v0.2s            // encoding: [0x04,0x48,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Integer count leading zeros
+//------------------------------------------------------------------------------
+
+         clz v0.16b, v31.16b
+         clz v2.8h, v4.8h
+         clz v6.4s, v8.4s
+         clz v1.8b, v9.8b
+         clz v13.4h, v21.4h
+         clz v4.2s, v0.2s
+
+// CHECK:	clz	v0.16b, v31.16b         // encoding: [0xe0,0x4b,0x20,0x6e]
+// CHECK:	clz	v2.8h, v4.8h            // encoding: [0x82,0x48,0x60,0x6e]
+// CHECK:	clz	v6.4s, v8.4s            // encoding: [0x06,0x49,0xa0,0x6e]
+// CHECK:	clz	v1.8b, v9.8b            // encoding: [0x21,0x49,0x20,0x2e]
+// CHECK:	clz	v13.4h, v21.4h          // encoding: [0xad,0x4a,0x60,0x2e]
+// CHECK:	clz	v4.2s, v0.2s            // encoding: [0x04,0x48,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Population count
+//------------------------------------------------------------------------------
+
+         cnt v0.16b, v31.16b
+         cnt v1.8b, v9.8b
+
+// CHECK:	cnt	v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x20,0x4e]
+// CHECK:	cnt	v1.8b, v9.8b            // encoding: [0x21,0x59,0x20,0x0e]
+
+//------------------------------------------------------------------------------
+// Bitwise NOT
+//------------------------------------------------------------------------------
+
+         not v0.16b, v31.16b
+         not v1.8b, v9.8b
+
+// CHECK:	not	v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x20,0x6e]
+// CHECK:	not	v1.8b, v9.8b            // encoding: [0x21,0x59,0x20,0x2e]
+
+//------------------------------------------------------------------------------
+// Bitwise reverse
+//------------------------------------------------------------------------------
+
+         rbit v0.16b, v31.16b
+         rbit v1.8b, v9.8b
+
+// CHECK:	rbit	v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x60,0x6e]
+// CHECK:	rbit	v1.8b, v9.8b            // encoding: [0x21,0x59,0x60,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point absolute
+//------------------------------------------------------------------------------
+
+         fabs v6.4s, v8.4s
+         fabs v6.2d, v8.2d
+         fabs v4.2s, v0.2s
+
+// CHECK:	fabs	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x4e]
+// CHECK:	fabs	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x4e]
+// CHECK:	fabs	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x0e]
+
+//------------------------------------------------------------------------------
+// Floating-point negate
+//------------------------------------------------------------------------------
+
+         fneg v6.4s, v8.4s
+         fneg v6.2d, v8.2d
+         fneg v4.2s, v0.2s
+
+// CHECK:	fneg	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x6e]
+// CHECK:	fneg	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x6e]
+// CHECK:	fneg	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer extract and narrow
+//------------------------------------------------------------------------------
+
+         xtn2 v0.16b, v31.8h
+         xtn2 v2.8h, v4.4s
+         xtn2 v6.4s, v8.2d
+         xtn v1.8b, v9.8h
+         xtn v13.4h, v21.4s
+         xtn v4.2s, v0.2d
+
+// CHECK:	xtn2	v0.16b, v31.8h          // encoding: [0xe0,0x2b,0x21,0x4e]
+// CHECK:	xtn2	v2.8h, v4.4s            // encoding: [0x82,0x28,0x61,0x4e]
+// CHECK:	xtn2	v6.4s, v8.2d            // encoding: [0x06,0x29,0xa1,0x4e]
+// CHECK:	xtn	v1.8b, v9.8h            // encoding: [0x21,0x29,0x21,0x0e]
+// CHECK:	xtn	v13.4h, v21.4s          // encoding: [0xad,0x2a,0x61,0x0e]
+// CHECK:	xtn	v4.2s, v0.2d            // encoding: [0x04,0x28,0xa1,0x0e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and unsigned narrow
+//------------------------------------------------------------------------------
+
+         sqxtun2 v0.16b, v31.8h
+         sqxtun2 v2.8h, v4.4s
+         sqxtun2 v6.4s, v8.2d
+         sqxtun v1.8b, v9.8h
+         sqxtun v13.4h, v21.4s
+         sqxtun v4.2s, v0.2d
+
+// CHECK:	sqxtun2	v0.16b, v31.8h          // encoding: [0xe0,0x2b,0x21,0x6e]
+// CHECK:	sqxtun2	v2.8h, v4.4s            // encoding: [0x82,0x28,0x61,0x6e]
+// CHECK:	sqxtun2	v6.4s, v8.2d            // encoding: [0x06,0x29,0xa1,0x6e]
+// CHECK:	sqxtun	v1.8b, v9.8h            // encoding: [0x21,0x29,0x21,0x2e]
+// CHECK:	sqxtun	v13.4h, v21.4s          // encoding: [0xad,0x2a,0x61,0x2e]
+// CHECK:	sqxtun	v4.2s, v0.2d            // encoding: [0x04,0x28,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Signed integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         sqxtn2 v0.16b, v31.8h
+         sqxtn2 v2.8h, v4.4s
+         sqxtn2 v6.4s, v8.2d
+         sqxtn v1.8b, v9.8h
+         sqxtn v13.4h, v21.4s
+         sqxtn v4.2s, v0.2d
+
+// CHECK:	sqxtn2	v0.16b, v31.8h          // encoding: [0xe0,0x4b,0x21,0x4e]
+// CHECK:	sqxtn2	v2.8h, v4.4s            // encoding: [0x82,0x48,0x61,0x4e]
+// CHECK:	sqxtn2	v6.4s, v8.2d            // encoding: [0x06,0x49,0xa1,0x4e]
+// CHECK:	sqxtn	v1.8b, v9.8h            // encoding: [0x21,0x49,0x21,0x0e]
+// CHECK:	sqxtn	v13.4h, v21.4s          // encoding: [0xad,0x4a,0x61,0x0e]
+// CHECK:	sqxtn	v4.2s, v0.2d            // encoding: [0x04,0x48,0xa1,0x0e]
+
+//------------------------------------------------------------------------------
+// Unsigned integer saturating extract and narrow
+//------------------------------------------------------------------------------
+
+         uqxtn2 v0.16b, v31.8h
+         uqxtn2 v2.8h, v4.4s
+         uqxtn2 v6.4s, v8.2d
+         uqxtn v1.8b, v9.8h
+         uqxtn v13.4h, v21.4s
+         uqxtn v4.2s, v0.2d
+
+// CHECK:	uqxtn2	v0.16b, v31.8h          // encoding: [0xe0,0x4b,0x21,0x6e]
+// CHECK:	uqxtn2	v2.8h, v4.4s            // encoding: [0x82,0x48,0x61,0x6e]
+// CHECK:	uqxtn2	v6.4s, v8.2d            // encoding: [0x06,0x49,0xa1,0x6e]
+// CHECK:	uqxtn	v1.8b, v9.8h            // encoding: [0x21,0x49,0x21,0x2e]
+// CHECK:	uqxtn	v13.4h, v21.4s          // encoding: [0xad,0x4a,0x61,0x2e]
+// CHECK:	uqxtn	v4.2s, v0.2d            // encoding: [0x04,0x48,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Integer shift left long
+//------------------------------------------------------------------------------
+
+         shll2 v2.8h, v4.16b, #8
+         shll2 v6.4s, v8.8h, #16
+         shll2 v6.2d, v8.4s, #32
+         shll v2.8h, v4.8b, #8
+         shll v6.4s, v8.4h, #16
+         shll v6.2d, v8.2s, #32
+
+// CHECK:	shll2	v2.8h, v4.16b, #8      // encoding: [0x82,0x38,0x21,0x6e]
+// CHECK:	shll2	v6.4s, v8.8h, #16      // encoding: [0x06,0x39,0x61,0x6e]
+// CHECK:	shll2	v6.2d, v8.4s, #32      // encoding: [0x06,0x39,0xa1,0x6e]
+// CHECK:	shll	v2.8h, v4.8b, #8       // encoding: [0x82,0x38,0x21,0x2e]
+// CHECK:	shll	v6.4s, v8.4h, #16      // encoding: [0x06,0x39,0x61,0x2e]
+// CHECK:	shll	v6.2d, v8.2s, #32      // encoding: [0x06,0x39,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize
+//------------------------------------------------------------------------------
+
+         fcvtn2 v2.8h, v4.4s
+         fcvtn2 v6.4s, v8.2d
+         fcvtn v13.4h, v21.4s
+         fcvtn v4.2s, v0.2d
+
+// CHECK:	fcvtn2	v2.8h, v4.4s            // encoding: [0x82,0x68,0x21,0x4e]
+// CHECK:	fcvtn2	v6.4s, v8.2d            // encoding: [0x06,0x69,0x61,0x4e]
+// CHECK:	fcvtn	v13.4h, v21.4s          // encoding: [0xad,0x6a,0x21,0x0e]
+// CHECK:	fcvtn	v4.2s, v0.2d            // encoding: [0x04,0x68,0x61,0x0e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert downsize with inexact
+//------------------------------------------------------------------------------
+
+         fcvtxn2 v6.4s, v8.2d
+         fcvtxn v4.2s, v0.2d
+
+// CHECK:	fcvtxn2	v6.4s, v8.2d            // encoding: [0x06,0x69,0x61,0x6e]
+// CHECK:	fcvtxn	v4.2s, v0.2d            // encoding: [0x04,0x68,0x61,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert upsize
+//------------------------------------------------------------------------------
+
+         fcvtl v9.4s, v1.4h
+         fcvtl v0.2d, v1.2s
+         fcvtl2 v12.4s, v4.8h
+         fcvtl2 v17.2d, v28.4s
+
+// CHECK:	fcvtl	v9.4s, v1.4h            // encoding: [0x29,0x78,0x21,0x0e]
+// CHECK:	fcvtl	v0.2d, v1.2s            // encoding: [0x20,0x78,0x61,0x0e]
+// CHECK:	fcvtl2	v12.4s, v4.8h           // encoding: [0x8c,0x78,0x21,0x4e]
+// CHECK:	fcvtl2	v17.2d, v28.4s          // encoding: [0x91,0x7b,0x61,0x4e]
+
+//------------------------------------------------------------------------------
+// Floating-point round to integral
+//------------------------------------------------------------------------------
+
+         frintn v6.4s, v8.4s
+         frintn v6.2d, v8.2d
+         frintn v4.2s, v0.2s
+
+// CHECK:	frintn	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x4e]
+// CHECK:	frintn	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x4e]
+// CHECK:	frintn	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x0e]
+
+         frinta v6.4s, v8.4s
+         frinta v6.2d, v8.2d
+         frinta v4.2s, v0.2s
+
+// CHECK:	frinta	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x6e]
+// CHECK:	frinta	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x6e]
+// CHECK:	frinta	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x2e]
+
+         frintp v6.4s, v8.4s
+         frintp v6.2d, v8.2d
+         frintp v4.2s, v0.2s
+
+// CHECK:	frintp	v6.4s, v8.4s            // encoding: [0x06,0x89,0xa1,0x4e]
+// CHECK:	frintp	v6.2d, v8.2d            // encoding: [0x06,0x89,0xe1,0x4e]
+// CHECK:	frintp	v4.2s, v0.2s            // encoding: [0x04,0x88,0xa1,0x0e]
+
+         frintm v6.4s, v8.4s
+         frintm v6.2d, v8.2d
+         frintm v4.2s, v0.2s
+
+// CHECK:	frintm	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x4e]
+// CHECK:	frintm	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x4e]
+// CHECK:	frintm	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x0e]
+
+         frintx v6.4s, v8.4s
+         frintx v6.2d, v8.2d
+         frintx v4.2s, v0.2s
+
+// CHECK:	frintx	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x6e]
+// CHECK:	frintx	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x6e]
+// CHECK:	frintx	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x2e]
+
+         frintz v6.4s, v8.4s
+         frintz v6.2d, v8.2d
+         frintz v4.2s, v0.2s
+
+// CHECK:	frintz	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x4e]
+// CHECK:	frintz	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x4e]
+// CHECK:	frintz	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x0e]
+
+         frinti v6.4s, v8.4s
+         frinti v6.2d, v8.2d
+         frinti v4.2s, v0.2s
+
+// CHECK:	frinti	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x6e]
+// CHECK:	frinti	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x6e]
+// CHECK:	frinti	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x2e]
+
+//------------------------------------------------------------------------------
+// Floating-point convert to integer
+//------------------------------------------------------------------------------
+
+         fcvtns v6.4s, v8.4s
+         fcvtns v6.2d, v8.2d
+         fcvtns v4.2s, v0.2s
+
+// CHECK:	fcvtns	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x4e]
+// CHECK:	fcvtns	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x4e]
+// CHECK:	fcvtns	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x0e]
+
+         fcvtnu v6.4s, v8.4s
+         fcvtnu v6.2d, v8.2d
+         fcvtnu v4.2s, v0.2s
+
+// CHECK:	fcvtnu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x6e]
+// CHECK:	fcvtnu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x6e]
+// CHECK:	fcvtnu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x2e]
+
+         fcvtps v6.4s, v8.4s
+         fcvtps v6.2d, v8.2d
+         fcvtps v4.2s, v0.2s
+
+// CHECK:	fcvtps	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x4e]
+// CHECK:	fcvtps	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x4e]
+// CHECK:	fcvtps	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x0e]
+
+         fcvtpu v6.4s, v8.4s
+         fcvtpu v6.2d, v8.2d
+         fcvtpu v4.2s, v0.2s
+
+// CHECK:	fcvtpu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x6e]
+// CHECK:	fcvtpu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x6e]
+// CHECK:	fcvtpu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x2e]
+
+         fcvtms v6.4s, v8.4s
+         fcvtms v6.2d, v8.2d
+         fcvtms v4.2s, v0.2s
+
+// CHECK:	fcvtms	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x4e]
+// CHECK:	fcvtms	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x4e]
+// CHECK:	fcvtms	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x0e]
+
+         fcvtmu v6.4s, v8.4s
+         fcvtmu v6.2d, v8.2d
+         fcvtmu v4.2s, v0.2s
+
+// CHECK:	fcvtmu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x6e]
+// CHECK:	fcvtmu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x6e]
+// CHECK:	fcvtmu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x2e]
+
+         fcvtzs v6.4s, v8.4s
+         fcvtzs v6.2d, v8.2d
+         fcvtzs v4.2s, v0.2s
+
+// CHECK:	fcvtzs	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x4e]
+// CHECK:	fcvtzs	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x4e]
+// CHECK:	fcvtzs	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x0e]
+
+
+         fcvtzu v6.4s, v8.4s
+         fcvtzu v6.2d, v8.2d
+         fcvtzu v4.2s, v0.2s
+
+// CHECK:	fcvtzu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x6e]
+// CHECK:	fcvtzu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x6e]
+// CHECK:	fcvtzu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x2e]
+
+         fcvtas v6.4s, v8.4s
+         fcvtas v6.2d, v8.2d
+         fcvtas v4.2s, v0.2s
+
+// CHECK:	fcvtas	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x4e]
+// CHECK:	fcvtas	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x4e]
+// CHECK:	fcvtas	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x0e]
+
+         fcvtau v6.4s, v8.4s
+         fcvtau v6.2d, v8.2d
+         fcvtau v4.2s, v0.2s
+
+// CHECK:	fcvtau	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x6e]
+// CHECK:	fcvtau	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x6e]
+// CHECK:	fcvtau	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x2e]
+
+         urecpe v6.4s, v8.4s
+         urecpe v4.2s, v0.2s
+
+// CHECK:	urecpe	v6.4s, v8.4s            // encoding: [0x06,0xc9,0xa1,0x4e]
+// CHECK:	urecpe	v4.2s, v0.2s            // encoding: [0x04,0xc8,0xa1,0x0e]
+
+         ursqrte v6.4s, v8.4s
+         ursqrte v4.2s, v0.2s
+
+// CHECK:	ursqrte	v6.4s, v8.4s            // encoding: [0x06,0xc9,0xa1,0x6e]
+// CHECK:	ursqrte	v4.2s, v0.2s            // encoding: [0x04,0xc8,0xa1,0x2e]
+
+         scvtf v6.4s, v8.4s
+         scvtf v6.2d, v8.2d
+         scvtf v4.2s, v0.2s
+
+// CHECK:	scvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x4e]
+// CHECK:	scvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x4e]
+// CHECK:	scvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x0e]
+
+         ucvtf v6.4s, v8.4s
+         ucvtf v6.2d, v8.2d
+         ucvtf v4.2s, v0.2s
+
+// CHECK:	ucvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x6e]
+// CHECK:	ucvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x6e]
+// CHECK:	ucvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x2e]
+
+         frecpe v6.4s, v8.4s
+         frecpe v6.2d, v8.2d
+         frecpe v4.2s, v0.2s
+
+// CHECK:	frecpe	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x4e]
+// CHECK:	frecpe	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x4e]
+// CHECK:	frecpe	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x0e]
+
+         frsqrte v6.4s, v8.4s
+         frsqrte v6.2d, v8.2d
+         frsqrte v4.2s, v0.2s
+
+// CHECK:	frsqrte	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x6e]
+// CHECK:	frsqrte	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x6e]
+// CHECK:	frsqrte	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x2e]
+
+         fsqrt v6.4s, v8.4s
+         fsqrt v6.2d, v8.2d
+         fsqrt v4.2s, v0.2s
+
+// CHECK:	fsqrt	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa1,0x6e]
+// CHECK:	fsqrt	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe1,0x6e]
+// CHECK:	fsqrt	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa1,0x2e]
+
+
diff --git a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
new file mode 100644
index 000000000000..8dc271e38d2c
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
@@ -0,0 +1,389 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from one register (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b}, [x0], x1
+         ld1 {v15.8h}, [x15], x2
+         ld1 {v31.4s}, [sp], #16
+         ld1 {v0.2d}, [x0], #16
+         ld1 {v0.8b}, [x0], x2
+         ld1 {v15.4h}, [x15], x3
+         ld1 {v31.2s}, [sp], #8
+         ld1 {v0.1d}, [x0], #8
+// CHECK: ld1 {v0.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x70,0xc1,0x4c]
+// CHECK: ld1 {v15.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x75,0xc2,0x4c]
+// CHECK: ld1 {v31.4s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x7b,0xdf,0x4c]
+// CHECK: ld1 {v0.2d}, [x0], #16
+// CHECK:     // encoding: [0x00,0x7c,0xdf,0x4c]
+// CHECK: ld1 {v0.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x70,0xc2,0x0c]
+// CHECK: ld1 {v15.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x75,0xc3,0x0c]
+// CHECK: ld1 {v31.2s}, [sp], #8
+// CHECK:     // encoding: [0xff,0x7b,0xdf,0x0c]
+// CHECK: ld1 {v0.1d}, [x0], #8
+// CHECK:     // encoding: [0x00,0x7c,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b}, [x0], x1
+         ld1 {v15.8h, v16.8h}, [x15], x2
+         ld1 {v31.4s, v0.4s}, [sp], #32
+         ld1 {v0.2d, v1.2d}, [x0], #32
+         ld1 {v0.8b, v1.8b}, [x0], x2
+         ld1 {v15.4h, v16.4h}, [x15], x3
+         ld1 {v31.2s, v0.2s}, [sp], #16
+         ld1 {v0.1d, v1.1d}, [x0], #16
+// CHECK: ld1 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0xa0,0xc1,0x4c]
+// CHECK: ld1 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0xa5,0xc2,0x4c]
+// CHECK: ld1 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0xab,0xdf,0x4c]
+// CHECK: ld1 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0xac,0xdf,0x4c]
+// CHECK: ld1 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0xa0,0xc2,0x0c]
+// CHECK: ld1 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0xa5,0xc3,0x0c]
+// CHECK: ld1 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0xab,0xdf,0x0c]
+// CHECK: ld1 {v0.1d, v1.1d}, [x0], #16
+// CHECK:     // encoding: [0x00,0xac,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         ld1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         ld1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         ld1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         ld1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+         ld1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK: ld1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x60,0xc1,0x4c]
+// CHECK: ld1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x65,0xc2,0x4c]
+// CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x6b,0xdf,0x4c]
+// CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x6c,0xdf,0x4c]
+// CHECK: ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x60,0xc2,0x0c]
+// CHECK: ld1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x65,0xc3,0x0c]
+// CHECK: ld1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x6b,0xdf,0x0c]
+// CHECK: ld1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK:     // encoding: [0x00,0x6c,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 1-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+         ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK: ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x20,0xc1,0x4c]
+// CHECK: ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x25,0xc2,0x4c]
+// CHECK: ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x2b,0xdf,0x4c]
+// CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x2c,0xdf,0x4c]
+// CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x20,0xc3,0x0c]
+// CHECK: ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x25,0xc4,0x0c]
+// CHECK: ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x2b,0xdf,0x0c]
+// CHECK: ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x2c,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 2-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld2 {v0.16b, v1.16b}, [x0], x1
+         ld2 {v15.8h, v16.8h}, [x15], x2
+         ld2 {v31.4s, v0.4s}, [sp], #32
+         ld2 {v0.2d, v1.2d}, [x0], #32
+         ld2 {v0.8b, v1.8b}, [x0], x2
+         ld2 {v15.4h, v16.4h}, [x15], x3
+         ld2 {v31.2s, v0.2s}, [sp], #16
+// CHECK: ld2 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x80,0xc1,0x4c]
+// CHECK: ld2 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x85,0xc2,0x4c]
+// CHECK: ld2 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x8b,0xdf,0x4c]
+// CHECK: ld2 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x8c,0xdf,0x4c]
+// CHECK: ld2 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x80,0xc2,0x0c]
+// CHECK: ld2 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x85,0xc3,0x0c]
+// CHECK: ld2 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x8b,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 3-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         ld3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         ld3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         ld3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         ld3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         ld3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x40,0xc1,0x4c]
+// CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x45,0xc2,0x4c]
+// CHECK: ld3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x4b,0xdf,0x4c]
+// CHECK: ld3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x4c,0xdf,0x4c]
+// CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x40,0xc2,0x0c]
+// CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x45,0xc3,0x0c]
+// CHECK: ld3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x4b,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Load multiple 4-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x00,0xc1,0x4c]
+// CHECK: ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x05,0xc2,0x4c]
+// CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x0b,0xdf,0x4c]
+// CHECK: ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x0c,0xdf,0x4c]
+// CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x00,0xc3,0x0c]
+// CHECK: ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x05,0xc4,0x0c]
+// CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x0b,0xdf,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from one register (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b}, [x0], x1
+         st1 {v15.8h}, [x15], x2
+         st1 {v31.4s}, [sp], #16
+         st1 {v0.2d}, [x0], #16
+         st1 {v0.8b}, [x0], x2
+         st1 {v15.4h}, [x15], x3
+         st1 {v31.2s}, [sp], #8
+         st1 {v0.1d}, [x0], #8
+// CHECK: st1 {v0.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x70,0x81,0x4c]
+// CHECK: st1 {v15.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x75,0x82,0x4c]
+// CHECK: st1 {v31.4s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x7b,0x9f,0x4c]
+// CHECK: st1 {v0.2d}, [x0], #16
+// CHECK:     // encoding: [0x00,0x7c,0x9f,0x4c]
+// CHECK: st1 {v0.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x70,0x82,0x0c]
+// CHECK: st1 {v15.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x75,0x83,0x0c]
+// CHECK: st1 {v31.2s}, [sp], #8
+// CHECK:     // encoding: [0xff,0x7b,0x9f,0x0c]
+// CHECK: st1 {v0.1d}, [x0], #8
+// CHECK:     // encoding: [0x00,0x7c,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b}, [x0], x1
+         st1 {v15.8h, v16.8h}, [x15], x2
+         st1 {v31.4s, v0.4s}, [sp], #32
+         st1 {v0.2d, v1.2d}, [x0], #32
+         st1 {v0.8b, v1.8b}, [x0], x2
+         st1 {v15.4h, v16.4h}, [x15], x3
+         st1 {v31.2s, v0.2s}, [sp], #16
+         st1 {v0.1d, v1.1d}, [x0], #16
+// CHECK: st1 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0xa0,0x81,0x4c]
+// CHECK: st1 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0xa5,0x82,0x4c]
+// CHECK: st1 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0xab,0x9f,0x4c]
+// CHECK: st1 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0xac,0x9f,0x4c]
+// CHECK: st1 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0xa0,0x82,0x0c]
+// CHECK: st1 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0xa5,0x83,0x0c]
+// CHECK: st1 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0xab,0x9f,0x0c]
+// CHECK: st1 {v0.1d, v1.1d}, [x0], #16
+// CHECK:     // encoding: [0x00,0xac,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         st1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         st1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         st1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         st1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         st1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+         st1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK: st1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x60,0x81,0x4c]
+// CHECK: st1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x65,0x82,0x4c]
+// CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x6b,0x9f,0x4c]
+// CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x6c,0x9f,0x4c]
+// CHECK: st1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x60,0x82,0x0c]
+// CHECK: st1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x65,0x83,0x0c]
+// CHECK: st1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x6b,0x9f,0x0c]
+// CHECK: st1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK:     // encoding: [0x00,0x6c,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 1-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+         st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK: st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x20,0x81,0x4c]
+// CHECK: st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x25,0x82,0x4c]
+// CHECK: st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x2b,0x9f,0x4c]
+// CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x2c,0x9f,0x4c]
+// CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x20,0x83,0x0c]
+// CHECK: st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x25,0x84,0x0c]
+// CHECK: st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x2b,0x9f,0x0c]
+// CHECK: st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x2c,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 2-element structures from two consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st2 {v0.16b, v1.16b}, [x0], x1
+         st2 {v15.8h, v16.8h}, [x15], x2
+         st2 {v31.4s, v0.4s}, [sp], #32
+         st2 {v0.2d, v1.2d}, [x0], #32
+         st2 {v0.8b, v1.8b}, [x0], x2
+         st2 {v15.4h, v16.4h}, [x15], x3
+         st2 {v31.2s, v0.2s}, [sp], #16
+// CHECK: st2 {v0.16b, v1.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x80,0x81,0x4c]
+// CHECK: st2 {v15.8h, v16.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x85,0x82,0x4c]
+// CHECK: st2 {v31.4s, v0.4s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x8b,0x9f,0x4c]
+// CHECK: st2 {v0.2d, v1.2d}, [x0], #32
+// CHECK:     // encoding: [0x00,0x8c,0x9f,0x4c]
+// CHECK: st2 {v0.8b, v1.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x80,0x82,0x0c]
+// CHECK: st2 {v15.4h, v16.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x85,0x83,0x0c]
+// CHECK: st2 {v31.2s, v0.2s}, [sp], #16
+// CHECK:     // encoding: [0xff,0x8b,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 3-element structures from three consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+         st3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+         st3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+         st3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+         st3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+         st3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x40,0x81,0x4c]
+// CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x45,0x82,0x4c]
+// CHECK: st3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK:     // encoding: [0xff,0x4b,0x9f,0x4c]
+// CHECK: st3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK:     // encoding: [0x00,0x4c,0x9f,0x4c]
+// CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK:     // encoding: [0x00,0x40,0x82,0x0c]
+// CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK:     // encoding: [0xef,0x45,0x83,0x0c]
+// CHECK: st3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK:     // encoding: [0xff,0x4b,0x9f,0x0c]
+
+//------------------------------------------------------------------------------
+// Store multiple 4-element structures from four consecutive registers
+// (post-index)
+//------------------------------------------------------------------------------
+         st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+         st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+         st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+         st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+         st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+         st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+// CHECK:     // encoding: [0x00,0x00,0x81,0x4c]
+// CHECK: st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK:     // encoding: [0xef,0x05,0x82,0x4c]
+// CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK:     // encoding: [0xff,0x0b,0x9f,0x4c]
+// CHECK: st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK:     // encoding: [0x00,0x0c,0x9f,0x4c]
+// CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK:     // encoding: [0x00,0x00,0x83,0x0c]
+// CHECK: st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK:     // encoding: [0xef,0x05,0x84,0x0c]
+// CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK:     // encoding: [0xff,0x0b,0x9f,0x0c]
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
new file mode 100644
index 000000000000..a16432324efc
--- /dev/null
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -0,0 +1,434 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Vector shift right by immediate
+//------------------------------------------------------------------------------
+         sshr v0.8b, v1.8b, #3
+         sshr v0.4h, v1.4h, #3
+         sshr v0.2s, v1.2s, #3
+         sshr v0.16b, v1.16b, #3
+         sshr v0.8h, v1.8h, #3
+         sshr v0.4s, v1.4s, #3
+         sshr v0.2d, v1.2d, #3
+// CHECK:	sshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x04,0x0d,0x0f]
+// CHECK:	sshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x04,0x1d,0x0f]
+// CHECK:	sshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x04,0x3d,0x0f]
+// CHECK:	sshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x04,0x0d,0x4f]
+// CHECK:	sshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x04,0x1d,0x4f]
+// CHECK:	sshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x04,0x3d,0x4f]
+// CHECK:	sshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x04,0x7d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector  shift right by immediate
+//------------------------------------------------------------------------------
+         ushr v0.8b, v1.8b, #3
+         ushr v0.4h, v1.4h, #3
+         ushr v0.2s, v1.2s, #3
+         ushr v0.16b, v1.16b, #3
+         ushr v0.8h, v1.8h, #3
+         ushr v0.4s, v1.4s, #3
+         ushr v0.2d, v1.2d, #3
+
+// CHECK: 	ushr	v0.8b, v1.8b, #3        // encoding: [0x20,0x04,0x0d,0x2f]
+// CHECK: 	ushr	v0.4h, v1.4h, #3        // encoding: [0x20,0x04,0x1d,0x2f]
+// CHECK:	ushr	v0.2s, v1.2s, #3        // encoding: [0x20,0x04,0x3d,0x2f]
+// CHECK:	ushr	v0.16b, v1.16b, #3      // encoding: [0x20,0x04,0x0d,0x6f]
+// CHECK:	ushr	v0.8h, v1.8h, #3        // encoding: [0x20,0x04,0x1d,0x6f]
+// CHECK:	ushr	v0.4s, v1.4s, #3        // encoding: [0x20,0x04,0x3d,0x6f]
+// CHECK:	ushr	v0.2d, v1.2d, #3        // encoding: [0x20,0x04,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ssra v0.8b, v1.8b, #3
+         ssra v0.4h, v1.4h, #3
+         ssra v0.2s, v1.2s, #3
+         ssra v0.16b, v1.16b, #3
+         ssra v0.8h, v1.8h, #3
+         ssra v0.4s, v1.4s, #3
+         ssra v0.2d, v1.2d, #3
+
+// CHECK:	ssra	v0.8b, v1.8b, #3        // encoding: [0x20,0x14,0x0d,0x0f]
+// CHECK:	ssra	v0.4h, v1.4h, #3        // encoding: [0x20,0x14,0x1d,0x0f]
+// CHECK:	ssra	v0.2s, v1.2s, #3        // encoding: [0x20,0x14,0x3d,0x0f]
+// CHECK:	ssra	v0.16b, v1.16b, #3      // encoding: [0x20,0x14,0x0d,0x4f]
+// CHECK:	ssra	v0.8h, v1.8h, #3        // encoding: [0x20,0x14,0x1d,0x4f]
+// CHECK:	ssra	v0.4s, v1.4s, #3        // encoding: [0x20,0x14,0x3d,0x4f]
+// CHECK:	ssra	v0.2d, v1.2d, #3        // encoding: [0x20,0x14,0x7d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector  shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         usra v0.8b, v1.8b, #3
+         usra v0.4h, v1.4h, #3
+         usra v0.2s, v1.2s, #3
+         usra v0.16b, v1.16b, #3
+         usra v0.8h, v1.8h, #3
+         usra v0.4s, v1.4s, #3
+         usra v0.2d, v1.2d, #3
+
+// CHECK:	usra	v0.8b, v1.8b, #3        // encoding: [0x20,0x14,0x0d,0x2f]
+// CHECK:	usra	v0.4h, v1.4h, #3        // encoding: [0x20,0x14,0x1d,0x2f]
+// CHECK:	usra	v0.2s, v1.2s, #3        // encoding: [0x20,0x14,0x3d,0x2f]
+// CHECK:	usra	v0.16b, v1.16b, #3      // encoding: [0x20,0x14,0x0d,0x6f]
+// CHECK:	usra	v0.8h, v1.8h, #3        // encoding: [0x20,0x14,0x1d,0x6f]
+// CHECK:	usra	v0.4s, v1.4s, #3        // encoding: [0x20,0x14,0x3d,0x6f]
+// CHECK:	usra	v0.2d, v1.2d, #3        // encoding: [0x20,0x14,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right by immediate
+//------------------------------------------------------------------------------
+         srshr v0.8b, v1.8b, #3
+         srshr v0.4h, v1.4h, #3
+         srshr v0.2s, v1.2s, #3
+         srshr v0.16b, v1.16b, #3
+         srshr v0.8h, v1.8h, #3
+         srshr v0.4s, v1.4s, #3
+         srshr v0.2d, v1.2d, #3
+
+// CHECK:	srshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x24,0x0d,0x0f]
+// CHECK:	srshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x24,0x1d,0x0f]
+// CHECK:	srshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x24,0x3d,0x0f]
+// CHECK:	srshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x24,0x0d,0x4f]
+// CHECK:	srshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x24,0x1d,0x4f]
+// CHECK:	srshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x24,0x3d,0x4f]
+// CHECK:	srshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x24,0x7d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vecotr rounding shift right by immediate
+//------------------------------------------------------------------------------
+         urshr v0.8b, v1.8b, #3
+         urshr v0.4h, v1.4h, #3
+         urshr v0.2s, v1.2s, #3
+         urshr v0.16b, v1.16b, #3
+         urshr v0.8h, v1.8h, #3
+         urshr v0.4s, v1.4s, #3
+         urshr v0.2d, v1.2d, #3
+
+// CHECK:	urshr	v0.8b, v1.8b, #3        // encoding: [0x20,0x24,0x0d,0x2f]
+// CHECK:	urshr	v0.4h, v1.4h, #3        // encoding: [0x20,0x24,0x1d,0x2f]
+// CHECK:	urshr	v0.2s, v1.2s, #3        // encoding: [0x20,0x24,0x3d,0x2f]
+// CHECK:	urshr	v0.16b, v1.16b, #3      // encoding: [0x20,0x24,0x0d,0x6f]
+// CHECK:	urshr	v0.8h, v1.8h, #3        // encoding: [0x20,0x24,0x1d,0x6f]
+// CHECK:	urshr	v0.4s, v1.4s, #3        // encoding: [0x20,0x24,0x3d,0x6f]
+// CHECK:	urshr	v0.2d, v1.2d, #3        // encoding: [0x20,0x24,0x7d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         srsra v0.8b, v1.8b, #3
+         srsra v0.4h, v1.4h, #3
+         srsra v0.2s, v1.2s, #3
+         srsra v0.16b, v1.16b, #3
+         srsra v0.8h, v1.8h, #3
+         srsra v0.4s, v1.4s, #3
+         srsra v0.2d, v1.2d, #3
+
+// CHECK:	srsra	v0.8b, v1.8b, #3        // encoding: [0x20,0x34,0x0d,0x0f]
+// CHECK:	srsra	v0.4h, v1.4h, #3        // encoding: [0x20,0x34,0x1d,0x0f]
+// CHECK:	srsra	v0.2s, v1.2s, #3        // encoding: [0x20,0x34,0x3d,0x0f]
+// CHECK:	srsra	v0.16b, v1.16b, #3      // encoding: [0x20,0x34,0x0d,0x4f]
+// CHECK:	srsra	v0.8h, v1.8h, #3        // encoding: [0x20,0x34,0x1d,0x4f]
+// CHECK:	srsra	v0.4s, v1.4s, #3        // encoding: [0x20,0x34,0x3d,0x4f]
+// CHECK:	srsra	v0.2d, v1.2d, #3        // encoding: [0x20,0x34,0x7d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right and accumulate by immediate
+//------------------------------------------------------------------------------
+         ursra v0.8b, v1.8b, #3
+         ursra v0.4h, v1.4h, #3
+         ursra v0.2s, v1.2s, #3
+         ursra v0.16b, v1.16b, #3
+         ursra v0.8h, v1.8h, #3
+         ursra v0.4s, v1.4s, #3
+         ursra v0.2d, v1.2d, #3
+
+// CHECK:	ursra	v0.8b, v1.8b, #3        // encoding: [0x20,0x34,0x0d,0x2f]
+// CHECK:	ursra	v0.4h, v1.4h, #3        // encoding: [0x20,0x34,0x1d,0x2f]
+// CHECK:	ursra	v0.2s, v1.2s, #3        // encoding: [0x20,0x34,0x3d,0x2f]
+// CHECK:	ursra	v0.16b, v1.16b, #3      // encoding: [0x20,0x34,0x0d,0x6f]
+// CHECK:	ursra	v0.8h, v1.8h, #3        // encoding: [0x20,0x34,0x1d,0x6f]
+// CHECK:	ursra	v0.4s, v1.4s, #3        // encoding: [0x20,0x34,0x3d,0x6f]
+// CHECK:	ursra	v0.2d, v1.2d, #3        // encoding: [0x20,0x34,0x7d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift right and insert by immediate
+//------------------------------------------------------------------------------
+         sri v0.8b, v1.8b, #3
+         sri v0.4h, v1.4h, #3
+         sri v0.2s, v1.2s, #3
+         sri v0.16b, v1.16b, #3
+         sri v0.8h, v1.8h, #3
+         sri v0.4s, v1.4s, #3
+         sri v0.2d, v1.2d, #3
+
+// CHECK:	sri	v0.8b, v1.8b, #3        // encoding: [0x20,0x44,0x0d,0x2f]
+// CHECK:	sri	v0.4h, v1.4h, #3        // encoding: [0x20,0x44,0x1d,0x2f]
+// CHECK:	sri	v0.2s, v1.2s, #3        // encoding: [0x20,0x44,0x3d,0x2f]
+// CHECK:	sri	v0.16b, v1.16b, #3      // encoding: [0x20,0x44,0x0d,0x6f]
+// CHECK:	sri	v0.8h, v1.8h, #3        // encoding: [0x20,0x44,0x1d,0x6f]
+// CHECK:	sri	v0.4s, v1.4s, #3        // encoding: [0x20,0x44,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift left and insert by immediate
+//------------------------------------------------------------------------------
+         sli v0.8b, v1.8b, #3
+         sli v0.4h, v1.4h, #3
+         sli v0.2s, v1.2s, #3
+         sli v0.16b, v1.16b, #3
+         sli v0.8h, v1.8h, #3
+         sli v0.4s, v1.4s, #3
+         sli v0.2d, v1.2d, #3
+
+// CHECK:	sli	v0.8b, v1.8b, #3        // encoding: [0x20,0x54,0x0b,0x2f]
+// CHECK:	sli	v0.4h, v1.4h, #3        // encoding: [0x20,0x54,0x13,0x2f]
+// CHECK:	sli	v0.2s, v1.2s, #3        // encoding: [0x20,0x54,0x23,0x2f]
+// CHECK:	sli	v0.16b, v1.16b, #3      // encoding: [0x20,0x54,0x0b,0x6f]
+// CHECK:	sli	v0.8h, v1.8h, #3        // encoding: [0x20,0x54,0x13,0x6f]
+// CHECK:	sli	v0.4s, v1.4s, #3        // encoding: [0x20,0x54,0x23,0x6f]
+// CHECK:	sli	v0.2d, v1.2d, #3        // encoding: [0x20,0x54,0x43,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left unsigned by immediate
+//------------------------------------------------------------------------------
+         sqshlu v0.8b, v1.8b, #3
+         sqshlu v0.4h, v1.4h, #3
+         sqshlu v0.2s, v1.2s, #3
+         sqshlu v0.16b, v1.16b, #3
+         sqshlu v0.8h, v1.8h, #3
+         sqshlu v0.4s, v1.4s, #3
+         sqshlu v0.2d, v1.2d, #3
+
+// CHECK:	sqshlu	v0.8b, v1.8b, #3        // encoding: [0x20,0x64,0x0b,0x2f]
+// CHECK:	sqshlu	v0.4h, v1.4h, #3        // encoding: [0x20,0x64,0x13,0x2f]
+// CHECK:	sqshlu	v0.2s, v1.2s, #3        // encoding: [0x20,0x64,0x23,0x2f]
+// CHECK:	sqshlu	v0.16b, v1.16b, #3      // encoding: [0x20,0x64,0x0b,0x6f]
+// CHECK:	sqshlu	v0.8h, v1.8h, #3        // encoding: [0x20,0x64,0x13,0x6f]
+// CHECK:	sqshlu	v0.4s, v1.4s, #3        // encoding: [0x20,0x64,0x23,0x6f]
+// CHECK:	sqshlu	v0.2d, v1.2d, #3        // encoding: [0x20,0x64,0x43,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         sqshl v0.8b, v1.8b, #3
+         sqshl v0.4h, v1.4h, #3
+         sqshl v0.2s, v1.2s, #3
+         sqshl v0.16b, v1.16b, #3
+         sqshl v0.8h, v1.8h, #3
+         sqshl v0.4s, v1.4s, #3
+         sqshl v0.2d, v1.2d, #3
+
+// CHECK:	sqshl	v0.8b, v1.8b, #3        // encoding: [0x20,0x74,0x0b,0x0f]
+// CHECK:	sqshl	v0.4h, v1.4h, #3        // encoding: [0x20,0x74,0x13,0x0f]
+// CHECK:	sqshl	v0.2s, v1.2s, #3        // encoding: [0x20,0x74,0x23,0x0f]
+// CHECK:	sqshl	v0.16b, v1.16b, #3      // encoding: [0x20,0x74,0x0b,0x4f]
+// CHECK:	sqshl	v0.8h, v1.8h, #3        // encoding: [0x20,0x74,0x13,0x4f]
+// CHECK:	sqshl	v0.4s, v1.4s, #3        // encoding: [0x20,0x74,0x23,0x4f]
+// CHECK:	sqshl	v0.2d, v1.2d, #3        // encoding: [0x20,0x74,0x43,0x4f]
+
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift left by immediate
+//------------------------------------------------------------------------------
+         uqshl v0.8b, v1.8b, #3
+         uqshl v0.4h, v1.4h, #3
+         uqshl v0.2s, v1.2s, #3
+         uqshl v0.16b, v1.16b, #3
+         uqshl v0.8h, v1.8h, #3
+         uqshl v0.4s, v1.4s, #3
+         uqshl v0.2d, v1.2d, #3
+
+// CHECK:	uqshl	v0.8b, v1.8b, #3        // encoding: [0x20,0x74,0x0b,0x2f]
+// CHECK:	uqshl	v0.4h, v1.4h, #3        // encoding: [0x20,0x74,0x13,0x2f]
+// CHECK:	uqshl	v0.2s, v1.2s, #3        // encoding: [0x20,0x74,0x23,0x2f]
+// CHECK:	uqshl	v0.16b, v1.16b, #3      // encoding: [0x20,0x74,0x0b,0x6f]
+// CHECK:	uqshl	v0.8h, v1.8h, #3        // encoding: [0x20,0x74,0x13,0x6f]
+// CHECK:	uqshl	v0.4s, v1.4s, #3        // encoding: [0x20,0x74,0x23,0x6f]
+// CHECK:	uqshl	v0.2d, v1.2d, #3        // encoding: [0x20,0x74,0x43,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector shift right narrow by immediate
+//------------------------------------------------------------------------------
+         shrn v0.8b, v1.8h, #3
+         shrn v0.4h, v1.4s, #3
+         shrn v0.2s, v1.2d, #3
+         shrn2 v0.16b, v1.8h, #3
+         shrn2 v0.8h, v1.4s, #3
+         shrn2 v0.4s, v1.2d, #3
+
+// CHECK:	shrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x84,0x0d,0x0f]
+// CHECK:	shrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x0f]
+// CHECK:	shrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x0f]
+// CHECK:	shrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x84,0x0d,0x4f]
+// CHECK:	shrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x4f]
+// CHECK:	shrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x4f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrun v0.8b, v1.8h, #3
+         sqshrun v0.4h, v1.4s, #3
+         sqshrun v0.2s, v1.2d, #3
+         sqshrun2 v0.16b, v1.8h, #3
+         sqshrun2 v0.8h, v1.4s, #3
+         sqshrun2 v0.4s, v1.2d, #3
+
+// CHECK:	sqshrun	v0.8b, v1.8h, #3        // encoding: [0x20,0x84,0x0d,0x2f]
+// CHECK:	sqshrun	v0.4h, v1.4s, #3        // encoding: [0x20,0x84,0x1d,0x2f]
+// CHECK:	sqshrun	v0.2s, v1.2d, #3        // encoding: [0x20,0x84,0x3d,0x2f]
+// CHECK:	sqshrun2	v0.16b, v1.8h, #3 	// encoding: [0x20,0x84,0x0d,0x6f]
+// CHECK:	sqshrun2	v0.8h, v1.4s, #3 	// encoding: [0x20,0x84,0x1d,0x6f]
+// CHECK:	sqshrun2	v0.4s, v1.2d, #3 	// encoding: [0x20,0x84,0x3d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector rounding shift right narrow by immediate
+//------------------------------------------------------------------------------
+         rshrn v0.8b, v1.8h, #3
+         rshrn v0.4h, v1.4s, #3
+         rshrn v0.2s, v1.2d, #3
+         rshrn2 v0.16b, v1.8h, #3
+         rshrn2 v0.8h, v1.4s, #3
+         rshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	rshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x8c,0x0d,0x0f]
+// CHECK:	rshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x8c,0x1d,0x0f]
+// CHECK:	rshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x8c,0x3d,0x0f]
+// CHECK:	rshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x8c,0x0d,0x4f]
+// CHECK:	rshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x8c,0x1d,0x4f]
+// CHECK:	rshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x8c,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded unsigned narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrun v0.8b, v1.8h, #3
+         sqrshrun v0.4h, v1.4s, #3
+         sqrshrun v0.2s, v1.2d, #3
+         sqrshrun2 v0.16b, v1.8h, #3
+         sqrshrun2 v0.8h, v1.4s, #3
+         sqrshrun2 v0.4s, v1.2d, #3
+
+// CHECK:	sqrshrun	v0.8b, v1.8h, #3    // encoding: [0x20,0x8c,0x0d,0x2f]
+// CHECK:	sqrshrun	v0.4h, v1.4s, #3    // encoding: [0x20,0x8c,0x1d,0x2f]
+// CHECK:	sqrshrun	v0.2s, v1.2d, #3    // encoding: [0x20,0x8c,0x3d,0x2f]
+// CHECK:	sqrshrun2	v0.16b, v1.8h, #3   // encoding: [0x20,0x8c,0x0d,0x6f]
+// CHECK:	sqrshrun2	v0.8h, v1.4s, #3    // encoding: [0x20,0x8c,0x1d,0x6f]
+// CHECK:	sqrshrun2	v0.4s, v1.2d, #3    // encoding: [0x20,0x8c,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         sqshrn v0.8b, v1.8h, #3
+         sqshrn v0.4h, v1.4s, #3
+         sqshrn v0.2s, v1.2d, #3
+         sqshrn2 v0.16b, v1.8h, #3
+         sqshrn2 v0.8h, v1.4s, #3
+         sqshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	sqshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x94,0x0d,0x0f]
+// CHECK:	sqshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x0f]
+// CHECK:	sqshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x0f]
+// CHECK:	sqshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x94,0x0d,0x4f]
+// CHECK:	sqshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x4f]
+// CHECK:	sqshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right narrow by immediate
+//------------------------------------------------------------------------------
+         uqshrn v0.8b, v1.8h, #3
+         uqshrn v0.4h, v1.4s, #3
+         uqshrn v0.2s, v1.2d, #3
+         uqshrn2 v0.16b, v1.8h, #3
+         uqshrn2 v0.8h, v1.4s, #3
+         uqshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	uqshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x94,0x0d,0x2f]
+// CHECK:	uqshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x2f]
+// CHECK:	uqshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x2f]
+// CHECK:	uqshrn2	v0.16b, v1.8h, #3       // encoding: [0x20,0x94,0x0d,0x6f]
+// CHECK:	uqshrn2	v0.8h, v1.4s, #3        // encoding: [0x20,0x94,0x1d,0x6f]
+// CHECK:	uqshrn2	v0.4s, v1.2d, #3        // encoding: [0x20,0x94,0x3d,0x6f]
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         sqrshrn v0.8b, v1.8h, #3
+         sqrshrn v0.4h, v1.4s, #3
+         sqrshrn v0.2s, v1.2d, #3
+         sqrshrn2 v0.16b, v1.8h, #3
+         sqrshrn2 v0.8h, v1.4s, #3
+         sqrshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	sqrshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x9c,0x0d,0x0f]
+// CHECK:	sqrshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x9c,0x1d,0x0f]
+// CHECK:	sqrshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x9c,0x3d,0x0f]
+// CHECK:	sqrshrn2	v0.16b, v1.8h, #3   // encoding: [0x20,0x9c,0x0d,0x4f]
+// CHECK:	sqrshrn2	v0.8h, v1.4s, #3    // encoding: [0x20,0x9c,0x1d,0x4f]
+// CHECK:	sqrshrn2	v0.4s, v1.2d, #3    // encoding: [0x20,0x9c,0x3d,0x4f]
+
+
+//------------------------------------------------------------------------------
+// Vector saturating shift right rounded narrow by immediate
+//------------------------------------------------------------------------------
+         uqrshrn v0.8b, v1.8h, #3
+         uqrshrn v0.4h, v1.4s, #3
+         uqrshrn v0.2s, v1.2d, #3
+         uqrshrn2 v0.16b, v1.8h, #3
+         uqrshrn2 v0.8h, v1.4s, #3
+         uqrshrn2 v0.4s, v1.2d, #3
+
+// CHECK:	uqrshrn	v0.8b, v1.8h, #3        // encoding: [0x20,0x9c,0x0d,0x2f]
+// CHECK:	uqrshrn	v0.4h, v1.4s, #3        // encoding: [0x20,0x9c,0x1d,0x2f]
+// CHECK:	uqrshrn	v0.2s, v1.2d, #3        // encoding: [0x20,0x9c,0x3d,0x2f]
+// CHECK:	uqrshrn2	v0.16b, v1.8h, #3   // encoding: [0x20,0x9c,0x0d,0x6f]
+// CHECK:	uqrshrn2	v0.8h, v1.4s, #3    // encoding: [0x20,0x9c,0x1d,0x6f]
+// CHECK:	uqrshrn2	v0.4s, v1.2d, #3    // encoding: [0x20,0x9c,0x3d,0x6f]
+
+
+//------------------------------------------------------------------------------
+// Fixed-point convert to floating-point
+//------------------------------------------------------------------------------
+         scvtf v0.2s, v1.2s, #3
+         scvtf v0.4s, v1.4s, #3
+         scvtf v0.2d, v1.2d, #3
+         ucvtf v0.2s, v1.2s, #3
+         ucvtf v0.4s, v1.4s, #3
+         ucvtf v0.2d, v1.2d, #3
+
+// CHECK:	scvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x0f]
+// CHECK:	scvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x4f]
+// CHECK:	scvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x4f]
+// CHECK:	ucvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x2f]
+// CHECK:	ucvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x6f]
+// CHECK:	ucvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x6f]
+
+//------------------------------------------------------------------------------
+// Floating-point convert to fixed-point
+//------------------------------------------------------------------------------
+         fcvtzs v0.2s, v1.2s, #3
+         fcvtzs v0.4s, v1.4s, #3
+         fcvtzs v0.2d, v1.2d, #3
+         fcvtzu v0.2s, v1.2s, #3
+         fcvtzu v0.4s, v1.4s, #3
+         fcvtzu v0.2d, v1.2d, #3
+
+
+// CHECK:	fcvtzs	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x0f]
+// CHECK:	fcvtzs	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x4f]
+// CHECK:	fcvtzs	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x4f]
+// CHECK:	fcvtzu	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x2f]
+// CHECK:	fcvtzu	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x6f]
+// CHECK:	fcvtzu	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x6f]
+
diff --git a/test/MC/AArch64/neon-tbl.s b/test/MC/AArch64/neon-tbl.s
new file mode 100644
index 000000000000..ff3e86b1c9b9
--- /dev/null
+++ b/test/MC/AArch64/neon-tbl.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+// Check that the assembler can handle the documented syntax for AArch64
+
+//------------------------------------------------------------------------------
+// Instructions across vector registers
+//------------------------------------------------------------------------------
+
+        tbl v0.8b, {v1.16b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+        tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
+        tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
+
+// CHECK: tbl	v0.8b, {v1.16b}, v2.8b  // encoding: [0x20,0x00,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x20,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x40,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x60,0x02,0x0e]
+// CHECK: tbl	v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x63,0x02,0x0e]
+
+        tbl v0.16b, {v1.16b}, v2.16b
+        tbl v0.16b, {v1.16b, v2.16b}, v2.16b
+        tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+        tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
+        tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
+
+// CHECK: tbl	v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x00,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x20,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x40,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x60,0x02,0x4e]
+// CHECK: tbl	v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x63,0x02,0x4e]
+
+        tbx v0.8b, {v1.16b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
+        tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
+
+// CHECK: tbx	v0.8b, {v1.16b}, v2.8b  // encoding: [0x20,0x10,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x30,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x50,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x70,0x02,0x0e]
+// CHECK: tbx	v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x73,0x02,0x0e]
+
+        tbx v0.16b, {v1.16b}, v2.16b
+        tbx v0.16b, {v1.16b, v2.16b}, v2.16b
+        tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+        tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
+        tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
+
+// CHECK: tbx	v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x10,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x30,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x50,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x70,0x02,0x4e]
+// CHECK: tbx	v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x73,0x02,0x4e]
+
diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s
new file mode 100644
index 000000000000..ea786c0ba678
--- /dev/null
+++ b/test/MC/AArch64/noneon-diagnostics.s
@@ -0,0 +1,28 @@
+// RUN: not llvm-mc  -triple aarch64-none-linux-gnu -mattr=-neon < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+        fmla v3.4s, v12.4s, v17.4s
+        fmla v1.2d, v30.2d, v20.2d
+        fmla v9.2s, v9.2s, v0.2s
+// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT:    fmla v3.4s, v12.4s, v17.4s
+// CHECK-ERROR-NEXT:    ^
+// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT:    fmla v1.2d, v30.2d, v20.2d
+// CHECK-ERROR-NEXT:    ^
+// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT:    fmla v9.2s, v9.2s, v0.2s
+// CHECK-ERROR-NEXT:    ^
+
+        fmls v3.4s, v12.4s, v17.4s
+        fmls v1.2d, v30.2d, v20.2d
+        fmls v9.2s, v9.2s, v0.2s
+// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT:    fmls v3.4s, v12.4s, v17.4s
+// CHECK-ERROR-NEXT:    ^
+// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT:    fmls v1.2d, v30.2d, v20.2d
+// CHECK-ERROR-NEXT:    ^
+// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT:    fmls v9.2s, v9.2s, v0.2s
+// CHECK-ERROR-NEXT:    ^
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index d0e336ecaf23..f99cb41fe5e9 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -17,7 +17,7 @@
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
 
 // CHECK-ELF:      Relocations [
-// CHECK-ELF-NEXT:   Section (1) .text {
+// CHECK-ELF-NEXT:   Section (2) .rela.text {
 // CHECK-ELF-NEXT:     0x0 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM:[^ ]+]]
 // CHECK-ELF-NEXT:     0x4 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x8 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
@@ -30,11 +30,11 @@
         movn w8, #:dtprel_g1:var
 // CHECK: movz    x5, #:dtprel_g1:var     // encoding: [0x05'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
-// CHECK-NEXT: movn    x6, #:dtprel_g1:var     // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK: movn    x6, #:dtprel_g1:var     // encoding: [0x06'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
-// CHECK-NEXT: movz    w7, #:dtprel_g1:var     // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK: movz    w7, #:dtprel_g1:var     // encoding: [0x07'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
-// CHECK-NEXT: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
 
 // CHECK-ELF-NEXT:     0x10 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
diff --git a/test/MC/AArch64/trace-regs-diagnostics.s b/test/MC/AArch64/trace-regs-diagnostics.s
index 82ec7c0c745d..41331e7703c8 100644
--- a/test/MC/AArch64/trace-regs-diagnostics.s
+++ b/test/MC/AArch64/trace-regs-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
         // Write-only
         mrs x12, trcoslar
         mrs x10, trclar
diff --git a/test/MC/ARM/2010-11-30-reloc-movt.s b/test/MC/ARM/2010-11-30-reloc-movt.s
new file mode 100644
index 000000000000..9de88f08b6d0
--- /dev/null
+++ b/test/MC/ARM/2010-11-30-reloc-movt.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc  %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
+// RUN:    llvm-readobj -s -sr -sd | FileCheck  %s
+
+	.syntax unified
+	.eabi_attribute	6, 10
+	.eabi_attribute	8, 1
+	.eabi_attribute	9, 2
+	.fpu	neon
+	.eabi_attribute	20, 1
+	.eabi_attribute	21, 1
+	.eabi_attribute	23, 3
+	.eabi_attribute	24, 1
+	.eabi_attribute	25, 1
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/ARM/2010-11-30-reloc-movt.ll"
+	.text
+	.globl	barf
+	.align	2
+	.type	barf,%function
+barf:                                   @ @barf
+@ BB#0:                                 @ %entry
+	push	{r11, lr}
+	movw	r0, :lower16:a
+	movt	r0, :upper16:a
+	bl	foo
+	pop	{r11, pc}
+.Ltmp0:
+	.size	barf, .Ltmp0-barf
+
+
+
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
+// CHECK-NEXT:       0010: 0088BDE8
+// CHECK-NEXT:     )
+// CHECK:          Relocations [
+// CHECK-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
+// CHECK-NEXT:       0x8 R_ARM_MOVT_ABS
+// CHECK-NEXT:       0xC R_ARM_CALL foo
+// CHECK-NEXT:     ]
diff --git a/test/MC/ARM/AlignedBundling/lit.local.cfg b/test/MC/ARM/AlignedBundling/lit.local.cfg
index 6c49f08b7496..ba763cf03ffc 100644
--- a/test/MC/ARM/AlignedBundling/lit.local.cfg
+++ b/test/MC/ARM/AlignedBundling/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/ARM/align_arm_2_thumb.s b/test/MC/ARM/align_arm_2_thumb.s
new file mode 100644
index 000000000000..120e96480b5c
--- /dev/null
+++ b/test/MC/ARM/align_arm_2_thumb.s
@@ -0,0 +1,15 @@
+@ RUN: llvm-mc -triple armv7-none-linux -filetype=obj -o %t.o %s
+@ RUN: llvm-objdump -triple thumbv7-none-linux -d %t.o | FileCheck --check-prefix=ARM_2_THUMB %s
+
+@ RUN: llvm-mc -triple armv7-apple-darwin -filetype=obj -o %t_darwin.o %s
+@ RUN: llvm-objdump -triple thumbv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=ARM_2_THUMB %s
+
+.syntax unified
+.code 16
+@ ARM_2_THUMB-LABEL: foo
+foo:
+  add r0, r0
+.align 3
+@ ARM_2_THUMB: 2: 00 bf     nop
+  add r0, r0
+
diff --git a/test/MC/ARM/align_thumb_2_arm.s b/test/MC/ARM/align_thumb_2_arm.s
new file mode 100644
index 000000000000..328bfabce65c
--- /dev/null
+++ b/test/MC/ARM/align_thumb_2_arm.s
@@ -0,0 +1,15 @@
+@ RUN: llvm-mc -triple thumbv7-none-linux -filetype=obj -o %t.o %s
+@ RUN: llvm-objdump -triple armv7-none-linux -d %t.o | FileCheck --check-prefix=THUMB_2_ARM %s
+
+@ RUN: llvm-mc -triple thumbv7-apple-darwin -filetype=obj -o %t_darwin.o %s
+@ RUN: llvm-objdump -triple armv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=THUMB_2_ARM %s
+
+.syntax unified
+.code 32
+@ THUMB_2_ARM-LABEL: foo
+foo:
+  add r0, r0
+.align 3
+@ THUMB_2_ARM: 4: 00 f0 20 e3    nop
+  add r0, r0
+
diff --git a/test/MC/ARM/arm-ldrd.s b/test/MC/ARM/arm-ldrd.s
new file mode 100644
index 000000000000..c26ee25aad7d
--- /dev/null
+++ b/test/MC/ARM/arm-ldrd.s
@@ -0,0 +1,57 @@
+// RUN: not llvm-mc -arch arm -mattr=+v5te %s 2>&1 | FileCheck %s
+//
+// rdar://14479793
+
+ldrd r1, r2, [pc, #0]
+ldrd r1, r2, [r3, #4]
+ldrd r1, r2, [r3], #4
+ldrd r1, r2, [r3, #4]!
+ldrd r1, r2, [r3, -r4]!
+ldrd r1, r2, [r3, r4]
+ldrd r1, r2, [r3], r4
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+// CHECK: error: Rt must be even-numbered
+
+ldrd r0, r3, [pc, #0]
+ldrd r0, r3, [r4, #4]
+ldrd r0, r3, [r4], #4
+ldrd r0, r3, [r4, #4]!
+ldrd r0, r3, [r4, -r5]!
+ldrd r0, r3, [r4, r5]
+ldrd r0, r3, [r4], r5
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+// CHECK: error: destination operands must be sequential
+
+ldrd lr, pc, [pc, #0]
+ldrd lr, pc, [r3, #4]
+ldrd lr, pc, [r3], #4
+ldrd lr, pc, [r3, #4]!
+ldrd lr, pc, [r3, -r4]!
+ldrd lr, pc, [r3, r4]
+ldrd lr, pc, [r3], r4
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+// CHECK: error: Rt can't be R14
+
+ldrd r0, r1, [r0], #4
+ldrd r0, r1, [r1], #4
+ldrd r0, r1, [r0, #4]!
+ldrd r0, r1, [r1, #4]!
+// CHECK: error: base register needs to be different from destination registers
+// CHECK: error: base register needs to be different from destination registers
+// CHECK: error: base register needs to be different from destination registers
+// CHECK: error: base register needs to be different from destination registers
diff --git a/test/MC/ARM/arm-memory-instructions.s b/test/MC/ARM/arm-memory-instructions.s
index d8d9130b690b..ad35dd26a04a 100644
--- a/test/MC/ARM/arm-memory-instructions.s
+++ b/test/MC/ARM/arm-memory-instructions.s
@@ -16,12 +16,14 @@ _func:
         ldr r2, [r4, #4095]!
         ldr r1, [r2], #30
         ldr r3, [r1], #-30
+        ldr r9, [r2], #-0
 
 @ CHECK: ldr	r5, [r7]                @ encoding: [0x00,0x50,0x97,0xe5]
 @ CHECK: ldr	r6, [r3, #63]           @ encoding: [0x3f,0x60,0x93,0xe5]
 @ CHECK: ldr	r2, [r4, #4095]!        @ encoding: [0xff,0x2f,0xb4,0xe5]
 @ CHECK: ldr	r1, [r2], #30           @ encoding: [0x1e,0x10,0x92,0xe4]
 @ CHECK: ldr	r3, [r1], #-30          @ encoding: [0x1e,0x30,0x11,0xe4]
+@ CHECK: ldr	r9, [r2], #-0           @ encoding: [0x00,0x90,0x12,0xe4]
 
 @------------------------------------------------------------------------------
 @ FIXME: LDR (literal)
@@ -112,21 +114,21 @@ _func:
 @------------------------------------------------------------------------------
 @ LDRD (immediate)
 @------------------------------------------------------------------------------
-        ldrd r3, r4, [r5]
-        ldrd r7, r8, [r2, #15]
-        ldrd r1, r2, [r9, #32]!
+        ldrd r2, r3, [r5]
+        ldrd r6, r7, [r2, #15]
+        ldrd r0, r1, [r9, #32]!
         ldrd r6, r7, [r1], #8
-        ldrd r1, r2, [r8], #0
-        ldrd r1, r2, [r8], #+0
-        ldrd r1, r2, [r8], #-0
+        ldrd r0, r1, [r8], #0
+        ldrd r0, r1, [r8], #+0
+        ldrd r0, r1, [r8], #-0
 
-@ CHECK: ldrd	r3, r4, [r5]            @ encoding: [0xd0,0x30,0xc5,0xe1]
-@ CHECK: ldrd	r7, r8, [r2, #15]       @ encoding: [0xdf,0x70,0xc2,0xe1]
-@ CHECK: ldrd	r1, r2, [r9, #32]!      @ encoding: [0xd0,0x12,0xe9,0xe1]
-@ CHECK: ldrd	r6, r7, [r1], #8        @ encoding: [0xd8,0x60,0xc1,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], #0        @ encoding: [0xd0,0x10,0xc8,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], #0        @ encoding: [0xd0,0x10,0xc8,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], #-0       @ encoding: [0xd0,0x10,0x48,0xe0]
+@ CHECK: ldrd r2, r3, [r5]            @ encoding: [0xd0,0x20,0xc5,0xe1]
+@ CHECK: ldrd r6, r7, [r2, #15]       @ encoding: [0xdf,0x60,0xc2,0xe1]
+@ CHECK: ldrd r0, r1, [r9, #32]!      @ encoding: [0xd0,0x02,0xe9,0xe1]
+@ CHECK: ldrd r6, r7, [r1], #8        @ encoding: [0xd8,0x60,0xc1,0xe0]
+@ CHECK: ldrd r0, r1, [r8], #0        @ encoding: [0xd0,0x00,0xc8,0xe0]
+@ CHECK: ldrd r0, r1, [r8], #0        @ encoding: [0xd0,0x00,0xc8,0xe0]
+@ CHECK: ldrd r0, r1, [r8], #-0       @ encoding: [0xd0,0x00,0x48,0xe0]
 
 
 @------------------------------------------------------------------------------
@@ -141,15 +143,15 @@ Lbaz: .quad 0
 @------------------------------------------------------------------------------
 @ LDRD (register)
 @------------------------------------------------------------------------------
-        ldrd r3, r4, [r1, r3]
+        ldrd r4, r5, [r1, r3]
         ldrd r4, r5, [r7, r2]!
-        ldrd r1, r2, [r8], r12
-        ldrd r1, r2, [r8], -r12
+        ldrd r0, r1, [r8], r12
+        ldrd r0, r1, [r8], -r12
 
-@ CHECK: ldrd	r3, r4, [r1, r3]        @ encoding: [0xd3,0x30,0x81,0xe1]
-@ CHECK: ldrd	r4, r5, [r7, r2]!       @ encoding: [0xd2,0x40,0xa7,0xe1]
-@ CHECK: ldrd	r1, r2, [r8], r12       @ encoding: [0xdc,0x10,0x88,0xe0]
-@ CHECK: ldrd	r1, r2, [r8], -r12      @ encoding: [0xdc,0x10,0x08,0xe0]
+@ CHECK: ldrd r4, r5, [r1, r3]        @ encoding: [0xd3,0x40,0x81,0xe1]
+@ CHECK: ldrd r4, r5, [r7, r2]!       @ encoding: [0xd2,0x40,0xa7,0xe1]
+@ CHECK: ldrd r0, r1, [r8], r12       @ encoding: [0xdc,0x00,0x88,0xe0]
+@ CHECK: ldrd r0, r1, [r8], -r12      @ encoding: [0xdc,0x00,0x08,0xe0]
 
 
 @------------------------------------------------------------------------------
@@ -308,13 +310,14 @@ Lbaz: .quad 0
         str r3, [r5, #40]!
         str r9, [sp], #4095
         str r1, [r7], #-128
+        str r1, [r0], #-0
 
 @ CHECK: str	r8, [r12]               @ encoding: [0x00,0x80,0x8c,0xe5]
 @ CHECK: str	r7, [r1, #12]           @ encoding: [0x0c,0x70,0x81,0xe5]
 @ CHECK: str	r3, [r5, #40]!          @ encoding: [0x28,0x30,0xa5,0xe5]
 @ CHECK: str	r9, [sp], #4095         @ encoding: [0xff,0x9f,0x8d,0xe4]
 @ CHECK: str	r1, [r7], #-128         @ encoding: [0x80,0x10,0x07,0xe4]
-
+@ CHECK: str	r1, [r0], #-0           @ encoding: [0x00,0x10,0x00,0xe4]
 
 @------------------------------------------------------------------------------
 @ FIXME: STR (literal)
diff --git a/test/MC/ARM/arm-thumb-cpus-default.s b/test/MC/ARM/arm-thumb-cpus-default.s
new file mode 100644
index 000000000000..636ee3c50571
--- /dev/null
+++ b/test/MC/ARM/arm-thumb-cpus-default.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -show-encoding -arch=arm < %s | FileCheck %s --check-prefix=CHECK-ARM-ONLY
+@ RUN: llvm-mc -show-encoding -triple=armv4t < %s | FileCheck %s --check-prefix=CHECK-ARM-THUMB
+@ RUN: llvm-mc -show-encoding -arch=arm -mcpu=cortex-a15 < %s| FileCheck %s --check-prefix=CHECK-ARM-THUMB
+@ RUN: llvm-mc -show-encoding -arch=arm -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+@ RUN: llvm-mc -show-encoding -triple=armv7m < %s | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+@ RUN: llvm-mc -show-encoding -triple=armv6m < %s | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+
+        @ Make sure the architecture chosen by LLVM defaults to a compatible
+        @ ARM/Thumb mode.
+        movs r0, r0
+@ CHECK-ARM-THUMB: movs r0, r0 @ encoding: [0x00,0x00,0xb0,0xe1]
+@ CHECK-ARM-ONLY: movs r0, r0 @ encoding: [0x00,0x00,0xb0,0xe1]
+@ CHECK-THUMB-ONLY: movs r0, r0 @ encoding: [0x00,0x00]
diff --git a/test/MC/ARM/arm-thumb-cpus.s b/test/MC/ARM/arm-thumb-cpus.s
new file mode 100644
index 000000000000..24be989db330
--- /dev/null
+++ b/test/MC/ARM/arm-thumb-cpus.s
@@ -0,0 +1,20 @@
+@ RUN: not llvm-mc -show-encoding -arch=arm < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARM-ONLY
+@ RUN: llvm-mc -show-encoding -triple=armv4t < %s 2>&1| FileCheck %s --check-prefix=CHECK-ARM-THUMB
+@ RUN: llvm-mc -show-encoding -arch=arm -mcpu=cortex-a15 < %s 2>&1| FileCheck %s --check-prefix=CHECK-ARM-THUMB
+@ RUN: not llvm-mc -show-encoding -arch=arm -mcpu=cortex-m3 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+@ RUN: not llvm-mc -show-encoding -triple=armv7m < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+@ RUN: not llvm-mc -show-encoding -triple=armv6m < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+
+        @ Make sure correct diagnostics are given for CPUs without support for
+        @ one or other of the execution states.
+        .thumb
+        .arm
+        .code 16
+        .code 32
+@ CHECK-ARM-THUMB-NOT: target does not support
+
+@ CHECK-ARM-ONLY: target does not support Thumb mode
+@ CHECK-ARM-ONLY: target does not support Thumb mode
+
+@ CHECK-THUMB-ONLY: target does not support ARM mode
+@ CHECK-THUMB-ONLY: target does not support ARM mode
diff --git a/test/MC/ARM/arm-thumb-trustzone.s b/test/MC/ARM/arm-thumb-trustzone.s
index a080b3efac88..7755a3c8e69b 100644
--- a/test/MC/ARM/arm-thumb-trustzone.s
+++ b/test/MC/ARM/arm-thumb-trustzone.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+@ RUN: not llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
 @ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
 
   .syntax unified
diff --git a/test/MC/ARM/arm-trustzone.s b/test/MC/ARM/arm-trustzone.s
index 69157f60dc0a..72bac48e84e4 100644
--- a/test/MC/ARM/arm-trustzone.s
+++ b/test/MC/ARM/arm-trustzone.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+@ RUN: not llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
 @ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
 
   .syntax unified
diff --git a/test/MC/ARM/basic-arm-instructions-v8.s b/test/MC/ARM/basic-arm-instructions-v8.s
new file mode 100644
index 000000000000..4ed83c1f025a
--- /dev/null
+++ b/test/MC/ARM/basic-arm-instructions-v8.s
@@ -0,0 +1,59 @@
+@ New ARMv8 A32 encodings
+
+@ RUN: llvm-mc -triple armv8 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-V8
+@ RUN: not llvm-mc -triple armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+
+@ HLT
+        hlt  #0
+        hlt  #65535
+@ CHECK-V8: hlt  #0                       @ encoding: [0x70,0x00,0x00,0xe1]
+@ CHECK-V8: hlt  #65535                   @ encoding: [0x7f,0xff,0x0f,0xe1]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+@ AL condition code allowable
+        hltal  #0
+@ CHECK-V8: hlt  #0                       @ encoding: [0x70,0x00,0x00,0xe1]
+@ CHECK-V7: error: instruction requires: armv8
+
+@------------------------------------------------------------------------------
+@ DMB (v8 barriers)
+@------------------------------------------------------------------------------
+        dmb ishld
+        dmb oshld
+        dmb nshld
+        dmb ld
+
+@ CHECK-V8: dmb ishld @ encoding: [0x59,0xf0,0x7f,0xf5]
+@ CHECK-V8: dmb oshld @ encoding: [0x51,0xf0,0x7f,0xf5]
+@ CHECK-V8: dmb nshld @ encoding: [0x55,0xf0,0x7f,0xf5]
+@ CHECK-V8: dmb ld @ encoding: [0x5d,0xf0,0x7f,0xf5]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ DSB (v8 barriers)
+@------------------------------------------------------------------------------
+        dsb ishld
+        dsb oshld
+        dsb nshld
+        dsb ld
+
+@ CHECK-V8: dsb ishld @ encoding: [0x49,0xf0,0x7f,0xf5]
+@ CHECK-V8: dsb oshld @ encoding: [0x41,0xf0,0x7f,0xf5]
+@ CHECK-V8: dsb nshld @ encoding: [0x45,0xf0,0x7f,0xf5]
+@ CHECK-V8: dsb ld @ encoding: [0x4d,0xf0,0x7f,0xf5]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ SEVL
+@------------------------------------------------------------------------------
+        sevl
+
+@ CHECK-V8: sevl @ encoding: [0x05,0xf0,0x20,0xe3]
+@ CHECK-V7: error: instruction requires: armv8
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 71b5b5da09be..29bc6c07cc6c 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -153,7 +153,6 @@ Lforward:
 @ CHECK: adr	r1, #301989888          @ encoding: [0x12,0x14,0x8f,0xe2]
 @ CHECK: adr	r1, #-2147483647        @ encoding: [0x06,0x11,0x8f,0xe2]
 
-
 @------------------------------------------------------------------------------
 @ ADD
 @------------------------------------------------------------------------------
@@ -187,6 +186,7 @@ Lforward:
 
 	add r0, #-4
 	add r4, r5, #-21
+        add r0, pc, #0xc0000000
 
 @ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
 @ CHECK: add	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe0]
@@ -217,6 +217,7 @@ Lforward:
 
 @ CHECK: sub	r0, r0, #4              @ encoding: [0x04,0x00,0x40,0xe2]
 @ CHECK: sub	r4, r5, #21             @ encoding: [0x15,0x40,0x45,0xe2]
+@ CHECK: adr    r0, #-1073741824        @ encoding: [0x03,0x01,0x8f,0xe2]
 
     @ Test right shift by 32, which is encoded as 0
     add r3, r1, r2, lsr #32
@@ -459,10 +460,14 @@ Lforward:
 @------------------------------------------------------------------------------
         cdp  p7, #1, c1, c1, c1, #4
         cdp2  p7, #1, c1, c1, c1, #4
+        cdp2   p12, #0, c6, c12, c0, #7
 
 @ CHECK: cdp  p7, #1, c1, c1, c1, #4     @ encoding: [0x81,0x17,0x11,0xee]
 @ CHECK: cdp2  p7, #1, c1, c1, c1, #4    @ encoding: [0x81,0x17,0x11,0xfe]
+@ CHECK: cdp2  p12, #0, c6, c12, c0, #7   @ encoding: [0xe0,0x6c,0x0c,0xfe]
 
+        cdpne  p7, #1, c1, c1, c1, #4
+@ CHECK: cdpne  p7, #1, c1, c1, c1, #4     @ encoding: [0x81,0x17,0x11,0x1e]
 
 @------------------------------------------------------------------------------
 @ CLREX
@@ -778,9 +783,13 @@ Lforward:
 @------------------------------------------------------------------------------
         isb sy
         isb
+        isb #15
+        isb #1
 
 @ CHECK: isb sy                         @ encoding: [0x6f,0xf0,0x7f,0xf5]
 @ CHECK: isb sy                         @ encoding: [0x6f,0xf0,0x7f,0xf5]
+@ CHECK: isb sy                         @ encoding: [0x6f,0xf0,0x7f,0xf5]
+@ CHECK: isb #0x1                       @ encoding: [0x61,0xf0,0x7f,0xf5]
 
 
 @------------------------------------------------------------------------------
@@ -796,8 +805,8 @@ Lforward:
         ldc2l p7, c1, [r8]
         ldc2l p8, c0, [r9, #-224]
         ldc2l p9, c1, [r10, #-120]!
-        ldc2l p10, c2, [r11], #16
-        ldc2l p11, c3, [r12], #-72
+        ldc2l p0, c2, [r11], #16
+        ldc2l p1, c3, [r12], #-72
 
         ldc p12, c4, [r0, #4]
         ldc p13, c5, [r1]
@@ -837,8 +846,8 @@ Lforward:
 @ CHECK: ldc2l	p7, c1, [r8]            @ encoding: [0x00,0x17,0xd8,0xfd]
 @ CHECK: ldc2l	p8, c0, [r9, #-224]     @ encoding: [0x38,0x08,0x59,0xfd]
 @ CHECK: ldc2l	p9, c1, [r10, #-120]!   @ encoding: [0x1e,0x19,0x7a,0xfd]
-@ CHECK: ldc2l	p10, c2, [r11], #16     @ encoding: [0x04,0x2a,0xfb,0xfc]
-@ CHECK: ldc2l	p11, c3, [r12], #-72    @ encoding: [0x12,0x3b,0x7c,0xfc]
+@ CHECK: ldc2l	p0, c2, [r11], #16      @ encoding: [0x04,0x20,0xfb,0xfc]
+@ CHECK: ldc2l	p1, c3, [r12], #-72     @ encoding: [0x12,0x31,0x7c,0xfc]
 
 @ CHECK: ldc	p12, c4, [r0, #4]       @ encoding: [0x01,0x4c,0x90,0xed]
 @ CHECK: ldc	p13, c5, [r1]           @ encoding: [0x00,0x5d,0x91,0xed]
@@ -900,8 +909,8 @@ Lforward:
 @ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
 @ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
 @ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
-@ CHECK: ldm	r0, {lr, r0, r2} ^          @ encoding: [0x05,0x40,0xd0,0xe8]
-@ CHECK: ldm	sp!, {pc, r0, r1, r2, r3} ^ @ encoding: [0x0f,0x80,0xfd,0xe8]
+@ CHECK: ldm	r0, {r0, r2, lr} ^          @ encoding: [0x05,0x40,0xd0,0xe8]
+@ CHECK: ldm	sp!, {r0, r1, r2, r3, pc} ^ @ encoding: [0x0f,0x80,0xfd,0xe8]
 
 
 @------------------------------------------------------------------------------
@@ -963,6 +972,9 @@ Lforward:
 @ CHECK: mcr  p7, #1, r5, c1, c1, #4    @ encoding: [0x91,0x57,0x21,0xee]
 @ CHECK: mcr2  p7, #1, r5, c1, c1, #4   @ encoding: [0x91,0x57,0x21,0xfe]
 
+        mcrls  p7, #1, r5, c1, c1, #4
+@ CHECK: mcrls  p7, #1, r5, c1, c1, #4   @ encoding: [0x91,0x57,0x21,0x9e]
+
 @------------------------------------------------------------------------------
 @ MCRR/MCRR2
 @------------------------------------------------------------------------------
@@ -972,6 +984,8 @@ Lforward:
 @ CHECK: mcrr  p7, #15, r5, r4, c1      @ encoding: [0xf1,0x57,0x44,0xec]
 @ CHECK: mcrr2  p7, #15, r5, r4, c1     @ encoding: [0xf1,0x57,0x44,0xfc]
 
+        mcrrgt  p7, #15, r5, r4, c1
+@ CHECK: mcrrgt  p7, #15, r5, r4, c1     @ encoding: [0xf1,0x57,0x44,0xcc]
 
 @------------------------------------------------------------------------------
 @ MLA
@@ -1062,10 +1076,17 @@ Lforward:
 @ MRC/MRC2
 @------------------------------------------------------------------------------
         mrc  p14, #0, r1, c1, c2, #4
+        mrc  p15, #7, apsr_nzcv, c15, c6, #6
         mrc2  p14, #0, r1, c1, c2, #4
+        mrc2  p9, #7, apsr_nzcv, c15, c0, #1
+
+@ CHECK: mrc  p14, #0, r1, c1, c2, #4             @ encoding: [0x92,0x1e,0x11,0xee]
+@ CHECK: mrc  p15, #7, apsr_nzcv, c15, c6, #6     @ encoding: [0xd6,0xff,0xff,0xee]
+@ CHECK: mrc2  p14, #0, r1, c1, c2, #4            @ encoding: [0x92,0x1e,0x11,0xfe]
+@ CHECK: mrc2  p9, #7, apsr_nzcv, c15, c0, #1     @ encoding: [0x30,0xf9,0xff,0xfe]
 
-@ CHECK: mrc  p14, #0, r1, c1, c2, #4   @ encoding: [0x92,0x1e,0x11,0xee]
-@ CHECK: mrc2  p14, #0, r1, c1, c2, #4  @ encoding: [0x92,0x1e,0x11,0xfe]
+         mrceq  p15, #7, apsr_nzcv, c15, c6, #6
+@ CHECK: mrceq  p15, #7, apsr_nzcv, c15, c6, #6   @ encoding: [0xd6,0xff,0xff,0x0e]
 
 @------------------------------------------------------------------------------
 @ MRRC/MRRC2
@@ -1076,6 +1097,8 @@ Lforward:
 @ CHECK: mrrc  p7, #1, r5, r4, c1       @ encoding: [0x11,0x57,0x54,0xec]
 @ CHECK: mrrc2  p7, #1, r5, r4, c1      @ encoding: [0x11,0x57,0x54,0xfc]
 
+        mrrclo  p7, #1, r5, r4, c1
+@ CHECK: mrrclo  p7, #1, r5, r4, c1      @ encoding: [0x11,0x57,0x54,0x3c]
 
 @------------------------------------------------------------------------------
 @ MRS
@@ -1235,9 +1258,11 @@ Lforward:
 @ NOP
 @------------------------------------------------------------------------------
         nop
+        nop.w
         nopgt
 
 @ CHECK: nop @ encoding: [0x00,0xf0,0x20,0xe3]
+@ CHECK: nop @ encoding: [0x00,0xf0,0x20,0xe3]
 @ CHECK: nopgt @ encoding: [0x00,0xf0,0x20,0xc3]
 
 
@@ -1637,6 +1662,30 @@ Lforward:
 @ CHECK: rsc	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0xe6,0xe0]
 
 @------------------------------------------------------------------------------
+@ RRX/RRXS
+@------------------------------------------------------------------------------
+
+         rrx r0, r1
+	 rrx sp, pc
+	 rrx pc, lr
+	 rrx lr, sp
+
+@ CHECK: rrx	r0, r1                  @ encoding: [0x61,0x00,0xa0,0xe1]
+@ CHECK: rrx	sp, pc                  @ encoding: [0x6f,0xd0,0xa0,0xe1]
+@ CHECK: rrx	pc, lr                  @ encoding: [0x6e,0xf0,0xa0,0xe1]
+@ CHECK: rrx	lr, sp                  @ encoding: [0x6d,0xe0,0xa0,0xe1]
+
+         rrxs r0, r1
+	 rrxs sp, pc
+	 rrxs pc, lr
+	 rrxs lr, sp
+
+@CHECK: rrxs	r0, r1                  @ encoding: [0x61,0x00,0xb0,0xe1]
+@CHECK: rrxs	sp, pc                  @ encoding: [0x6f,0xd0,0xb0,0xe1]
+@CHECK: rrxs	pc, lr                  @ encoding: [0x6e,0xf0,0xb0,0xe1]
+@CHECK: rrxs	lr, sp                  @ encoding: [0x6d,0xe0,0xb0,0xe1]
+
+@ ------------------------------------------------------------------------------
 @ SADD16/SADD8
 @------------------------------------------------------------------------------
         sadd16 r1, r2, r3
@@ -1737,9 +1786,13 @@ Lforward:
 @ SETEND
 @------------------------------------------------------------------------------
         setend be
+        setend BE
         setend le
+        setend LE
 
 @ CHECK: setend	be                      @ encoding: [0x00,0x02,0x01,0xf1]
+@ CHECK: setend	be                      @ encoding: [0x00,0x02,0x01,0xf1]
+@ CHECK: setend	le                      @ encoding: [0x00,0x00,0x01,0xf1]
 @ CHECK: setend	le                      @ encoding: [0x00,0x00,0x01,0xf1]
 
 
@@ -2069,15 +2122,15 @@ Lforward:
 @ CHECK: srsia	sp!, #2                 @ encoding: [0x02,0x05,0xed,0xf8]
 @ CHECK: srsib	sp!, #14                @ encoding: [0x0e,0x05,0xed,0xf9]
 
-@ CHECK: srsda	sp, #11                 @ encoding: [0x0b,0x05,0x4d,0xf8]
-@ CHECK: srsdb	sp, #10                 @ encoding: [0x0a,0x05,0x4d,0xf9]
-@ CHECK: srsia	sp, #9                  @ encoding: [0x09,0x05,0xcd,0xf8]
-@ CHECK: srsib	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf9]
+@ CHECK: srsib	sp, #11                 @ encoding: [0x0b,0x05,0xcd,0xf9]
+@ CHECK: srsia	sp, #10                 @ encoding: [0x0a,0x05,0xcd,0xf8]
+@ CHECK: srsdb	sp, #9                  @ encoding: [0x09,0x05,0x4d,0xf9]
+@ CHECK: srsda	sp, #5                  @ encoding: [0x05,0x05,0x4d,0xf8]
 
-@ CHECK: srsda	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf8]
-@ CHECK: srsdb	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf9]
-@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
 @ CHECK: srsib	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf9]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf9]
+@ CHECK: srsda	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf8]
 
 @ CHECK: srsia	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf8]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
@@ -2114,14 +2167,14 @@ Lforward:
 @ CHECK: srsdb	sp!, #19                @ encoding: [0x13,0x05,0x6d,0xf9]
 @ CHECK: srsia	sp!, #2                 @ encoding: [0x02,0x05,0xed,0xf8]
 @ CHECK: srsib	sp!, #14                @ encoding: [0x0e,0x05,0xed,0xf9]
-@ CHECK: srsda	sp, #11                 @ encoding: [0x0b,0x05,0x4d,0xf8]
-@ CHECK: srsdb	sp, #10                 @ encoding: [0x0a,0x05,0x4d,0xf9]
-@ CHECK: srsia	sp, #9                  @ encoding: [0x09,0x05,0xcd,0xf8]
-@ CHECK: srsib	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf9]
-@ CHECK: srsda	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf8]
-@ CHECK: srsdb	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf9]
-@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
+@ CHECK: srsib	sp, #11                 @ encoding: [0x0b,0x05,0xcd,0xf9]
+@ CHECK: srsia	sp, #10                 @ encoding: [0x0a,0x05,0xcd,0xf8]
+@ CHECK: srsdb	sp, #9                  @ encoding: [0x09,0x05,0x4d,0xf9]
+@ CHECK: srsda	sp, #5                  @ encoding: [0x05,0x05,0x4d,0xf8]
 @ CHECK: srsib	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf9]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf9]
+@ CHECK: srsda	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf8]
 @ CHECK: srsia	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf8]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
 
@@ -2187,8 +2240,8 @@ Lforward:
         stc2l p7, c1, [r8]
         stc2l p8, c0, [r9, #-224]
         stc2l p9, c1, [r10, #-120]!
-        stc2l p10, c2, [r11], #16
-        stc2l p11, c3, [r12], #-72
+        stc2l p0, c2, [r11], #16
+        stc2l p1, c3, [r12], #-72
 
         stc p12, c4, [r0, #4]
         stc p13, c5, [r1]
@@ -2228,8 +2281,8 @@ Lforward:
 @ CHECK: stc2l	p7, c1, [r8]            @ encoding: [0x00,0x17,0xc8,0xfd]
 @ CHECK: stc2l	p8, c0, [r9, #-224]     @ encoding: [0x38,0x08,0x49,0xfd]
 @ CHECK: stc2l	p9, c1, [r10, #-120]!   @ encoding: [0x1e,0x19,0x6a,0xfd]
-@ CHECK: stc2l	p10, c2, [r11], #16     @ encoding: [0x04,0x2a,0xeb,0xfc]
-@ CHECK: stc2l	p11, c3, [r12], #-72    @ encoding: [0x12,0x3b,0x6c,0xfc]
+@ CHECK: stc2l	p0, c2, [r11], #16      @ encoding: [0x04,0x20,0xeb,0xfc]
+@ CHECK: stc2l	p1, c3, [r12], #-72     @ encoding: [0x12,0x31,0x6c,0xfc]
 
 @ CHECK: stc	p12, c4, [r0, #4]       @ encoding: [0x01,0x4c,0x80,0xed]
 @ CHECK: stc	p13, c5, [r1]           @ encoding: [0x00,0x5d,0x81,0xed]
@@ -2277,7 +2330,7 @@ Lforward:
         stmdb     r0!, {r1,r5,r7,sp}
 
 @ CHECK: stm	r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8]
-@ CHECK: stm	r3, {lr, r1, r3, r4, r5, r6} @ encoding: [0x7a,0x40,0x83,0xe8]
+@ CHECK: stm	r3, {r1, r3, r4, r5, r6, lr} @ encoding: [0x7a,0x40,0x83,0xe8]
 @ CHECK: stmib	r4, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x84,0xe9]
 @ CHECK: stmda	r5, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x05,0xe8]
 @ CHECK: stmdb	r6, {r1, r3, r4, r5, r6, r8} @ encoding: [0x7a,0x01,0x06,0xe9]
@@ -2875,6 +2928,7 @@ Lforward:
         hint #2
         hint #1
         hint #0
+        hintgt #239
 
 @ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
 @ CHECK: wfehi                          @ encoding: [0x02,0xf0,0x20,0x83]
@@ -2887,3 +2941,4 @@ Lforward:
 @ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
 @ CHECK: yield                          @ encoding: [0x01,0xf0,0x20,0xe3]
 @ CHECK: nop                            @ encoding: [0x00,0xf0,0x20,0xe3]
+@ CHECK: hintgt #239                    @ encoding: [0xef,0xf0,0x20,0xc3]
diff --git a/test/MC/ARM/basic-thumb-instructions.s b/test/MC/ARM/basic-thumb-instructions.s
index 22e21da88e40..dec7f5b13334 100644
--- a/test/MC/ARM/basic-thumb-instructions.s
+++ b/test/MC/ARM/basic-thumb-instructions.s
@@ -85,11 +85,15 @@ _func:
 @ ADR
 @------------------------------------------------------------------------------
         adr r2, _baz
-        adr	r2, #3
+        adr r5, #0
+        adr r2, #4
+        adr r3, #1020
 
 @ CHECK: adr	r2, _baz                @ encoding: [A,0xa2]
             @   fixup A - offset: 0, value: _baz, kind: fixup_thumb_adr_pcrel_10
-@ CHECK: adr	r2, #3                  @ encoding: [0x03,0xa2]
+@ CHECK: adr	r5, #0                  @ encoding: [0x00,0xa5]
+@ CHECK: adr	r2, #4                  @ encoding: [0x01,0xa2]
+@ CHECK: adr	r3, #1020               @ encoding: [0xff,0xa3]
 
 @------------------------------------------------------------------------------
 @ ASR (immediate)
@@ -124,7 +128,7 @@ _func:
         beq _bar
         b       #1838
         b       #-420
-        beq     #336
+        beq     #-256
         beq     #160
 
 @ CHECK: b	_baz                    @ encoding: [A,0xe0'A']
@@ -133,7 +137,7 @@ _func:
              @   fixup A - offset: 0, value: _bar, kind: fixup_arm_thumb_bcc
 @ CHECK: b       #1838                   @ encoding: [0x97,0xe3]
 @ CHECK: b       #-420                   @ encoding: [0x2e,0xe7]
-@ CHECK: beq     #336                    @ encoding: [0xa8,0xd0]
+@ CHECK: beq     #-256                   @ encoding: [0x80,0xd0]
 @ CHECK: beq     #160                    @ encoding: [0x50,0xd0]
 
 @------------------------------------------------------------------------------
@@ -212,6 +216,16 @@ _func:
 @ CHECK: cmp	r8, r1                  @ encoding: [0x88,0x45]
 
 @------------------------------------------------------------------------------
+@ CPS
+@------------------------------------------------------------------------------
+
+        cpsie f
+        cpsid a
+
+@ CHECK: cpsie f                        @ encoding: [0x61,0xb6]
+@ CHECK: cpsid a                        @ encoding: [0x74,0xb6]
+
+@------------------------------------------------------------------------------
 @ EOR
 @------------------------------------------------------------------------------
         eors r4, r5
diff --git a/test/MC/ARM/basic-thumb2-instructions-v8.s b/test/MC/ARM/basic-thumb2-instructions-v8.s
new file mode 100644
index 000000000000..a7882aead01f
--- /dev/null
+++ b/test/MC/ARM/basic-thumb2-instructions-v8.s
@@ -0,0 +1,87 @@
+@ New ARMv8 T32 encodings
+
+@ RUN: llvm-mc -triple thumbv8 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-V8
+@ RUN: not llvm-mc -triple thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+
+@ HLT
+        hlt  #0
+        hlt  #63
+@ CHECK-V8: hlt  #0                       @ encoding: [0x80,0xba]
+@ CHECK-V8: hlt  #63                      @ encoding: [0xbf,0xba]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+@ In IT block
+        it pl
+        hlt #24
+
+@ CHECK-V8: it pl                         @ encoding: [0x58,0xbf]
+@ CHECK-V8: hlt #24                       @ encoding: [0x98,0xba]
+@ CHECK-V7: error: instruction requires: armv8
+
+@ Can accept AL condition code
+        hltal #24
+@ CHECK-V8: hlt #24                       @ encoding: [0x98,0xba]
+@ CHECK-V7: error: instruction requires: armv8
+
+@ DCPS{1,2,3}
+        dcps1
+        dcps2
+        dcps3
+@ CHECK-V8: dcps1                         @ encoding: [0x8f,0xf7,0x01,0x80]
+@ CHECK-V8: dcps2                         @ encoding: [0x8f,0xf7,0x02,0x80]
+@ CHECK-V8: dcps3                         @ encoding: [0x8f,0xf7,0x03,0x80]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+@------------------------------------------------------------------------------
+@ DMB (v8 barriers)
+@------------------------------------------------------------------------------
+        dmb ishld
+        dmb oshld
+        dmb nshld
+        dmb ld
+
+@ CHECK-V8: dmb ishld @ encoding: [0xbf,0xf3,0x59,0x8f]
+@ CHECK-V8: dmb oshld @ encoding: [0xbf,0xf3,0x51,0x8f]
+@ CHECK-V8: dmb nshld @ encoding: [0xbf,0xf3,0x55,0x8f]
+@ CHECK-V8: dmb ld @ encoding: [0xbf,0xf3,0x5d,0x8f]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ DSB (v8 barriers)
+@------------------------------------------------------------------------------
+        dsb ishld
+        dsb oshld
+        dsb nshld
+        dsb ld
+
+@ CHECK-V8: dsb ishld @ encoding: [0xbf,0xf3,0x49,0x8f]
+@ CHECK-V8: dsb oshld @ encoding: [0xbf,0xf3,0x41,0x8f]
+@ CHECK-V8: dsb nshld @ encoding: [0xbf,0xf3,0x45,0x8f]
+@ CHECK-V8: dsb ld @ encoding: [0xbf,0xf3,0x4d,0x8f]
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+@ CHECK-V7: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ SEVL
+@------------------------------------------------------------------------------
+        sevl
+        sevl.w
+        it ge
+        sevlge
+
+@ CHECK-V8: sevl @ encoding: [0x50,0xbf]
+@ CHECK-V8: sevl.w @ encoding: [0xaf,0xf3,0x05,0x80]
+@ CHECK-V8: it ge @ encoding: [0xa8,0xbf]
+@ CHECK-V8: sevlge @ encoding: [0x50,0xbf]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error:
+@ CHECK-V7: error: instruction requires: armv8
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 8127feba6d40..3a5f48832c15 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -79,6 +79,8 @@ _func:
         add r0, r0, #32
         adds r2, r2, #56
         adds r2, #56
+        add r1, r7, #0xcbcbcbcb
+        add sp, sp, #0x1fe0000
 
         adds.w r2, #-16
         adds.w r2, r2, #-16
@@ -101,6 +103,8 @@ _func:
 @ CHECK: add.w	r0, r0, #32             @ encoding: [0x00,0xf1,0x20,0x00]
 @ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
 @ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
+@ CHECK: add.w  r1, r7, #3419130827     @ encoding: [0x07,0xf1,0xcb,0x31]
+@ CHECK: add.w	sp, sp, #33423360       @ encoding: [0x0d,0xf1,0xff,0x7d]
 
 @ CHECK: subs.w	r2, r2, #16             @ encoding: [0xb2,0xf1,0x10,0x02]
 @ CHECK: subs.w	r2, r2, #16             @ encoding: [0xb2,0xf1,0x10,0x02]
@@ -134,12 +138,14 @@ _func:
 @------------------------------------------------------------------------------
 
         subw r11, pc, #3270
+        adr.w r2, #3
         adr.w r11, #-826
         adr.w r1, #-0x0
 
-@ CHECK: subw	r11, pc, #3270          @ encoding: [0xaf,0xf6,0xc6,0x4b]
-@ CHECK: adr.w	r11, #-826              @ encoding: [0xaf,0xf2,0x3a,0x3b]
-@ CHECK: adr.w	r1, #-0                 @ encoding: [0xaf,0xf2,0x00,0x01]
+@ CHECK: subw  r11, pc, #3270          @ encoding: [0xaf,0xf6,0xc6,0x4b]
+@ CHECK: adr.w r2, #3                  @ encoding: [0x0f,0xf2,0x03,0x02]
+@ CHECK: adr.w r11, #-826              @ encoding: [0xaf,0xf2,0x3a,0x3b]
+@ CHECK: adr.w r1, #-0                 @ encoding: [0xaf,0xf2,0x00,0x01]
 
 @------------------------------------------------------------------------------
 @ AND (immediate)
@@ -148,12 +154,15 @@ _func:
         ands r3, r12, #0xf
         and r1, #0xff
         and r1, r1, #0xff
+        and r5, r4, #0xffffffff
+        ands r1, r9, #0xffffffff
 
 @ CHECK: and	r2, r5, #1044480        @ encoding: [0x05,0xf4,0x7f,0x22]
 @ CHECK: ands	r3, r12, #15            @ encoding: [0x1c,0xf0,0x0f,0x03]
 @ CHECK: and	r1, r1, #255            @ encoding: [0x01,0xf0,0xff,0x01]
 @ CHECK: and	r1, r1, #255            @ encoding: [0x01,0xf0,0xff,0x01]
-
+@ CHECK: and	r5, r4, #4294967295     @ encoding: [0x04,0xf0,0xff,0x35]
+@ CHECK: ands	r1, r9, #4294967295     @ encoding: [0x19,0xf0,0xff,0x31]
 
 @------------------------------------------------------------------------------
 @ AND (register)
@@ -255,6 +264,8 @@ _func:
 @ BIC
 @------------------------------------------------------------------------------
         bic r10, r1, #0xf
+        bic r5, r2, #0xffffffff
+        bics r11, r10, #0xffffffff
         bic r12, r3, r6
         bic r11, r2, r6, lsl #12
         bic r8, r4, r1, lsr #11
@@ -272,6 +283,8 @@ _func:
         bic r12, r6, ror #29
 
 @ CHECK: bic	r10, r1, #15            @ encoding: [0x21,0xf0,0x0f,0x0a]
+@ CHECK: bic	r5, r2, #4294967295     @ encoding: [0x22,0xf0,0xff,0x35]
+@ CHECK: bics	r11, r10, #4294967295   @ encoding: [0x3a,0xf0,0xff,0x3b]
 @ CHECK: bic.w	r12, r3, r6             @ encoding: [0x23,0xea,0x06,0x0c]
 @ CHECK: bic.w	r11, r2, r6, lsl #12    @ encoding: [0x22,0xea,0x06,0x3b]
 @ CHECK: bic.w	r8, r4, r1, lsr #11     @ encoding: [0x24,0xea,0xd1,0x28]
@@ -401,6 +414,31 @@ _func:
 @ CHECK: cmn.w	r2, #2                  @ encoding: [0x12,0xf1,0x02,0x0f]
 @ CHECK: cmp.w	r9, #1                  @ encoding: [0xb9,0xf1,0x01,0x0f]
 
+@------------------------------------------------------------------------------
+@ CPS
+@------------------------------------------------------------------------------
+
+        cpsie f
+        cpsid a
+        cpsie.w f
+        cpsid.w a
+        cpsie i, #3
+        cpsie.w i, #3
+        cpsid f, #9
+        cpsid.w f, #9
+        cps #0
+        cps.w #0
+
+@ CHECK: cpsie f                        @ encoding: [0x61,0xb6]
+@ CHECK: cpsid a                        @ encoding: [0x74,0xb6]
+@ CHECK: cpsie.w f                      @ encoding: [0xaf,0xf3,0x20,0x84]
+@ CHECK: cpsid.w a                      @ encoding: [0xaf,0xf3,0x80,0x86]
+@ CHECK: cpsie i, #3                    @ encoding: [0xaf,0xf3,0x43,0x85]
+@ CHECK: cpsie i, #3                    @ encoding: [0xaf,0xf3,0x43,0x85]
+@ CHECK: cpsid f, #9                    @ encoding: [0xaf,0xf3,0x29,0x87]
+@ CHECK: cpsid f, #9                    @ encoding: [0xaf,0xf3,0x29,0x87]
+@ CHECK: cps   #0                       @ encoding: [0xaf,0xf3,0x00,0x81]
+@ CHECK: cps   #0                       @ encoding: [0xaf,0xf3,0x00,0x81]
 
 @------------------------------------------------------------------------------
 @ DBG
@@ -571,9 +609,13 @@ _func:
 @------------------------------------------------------------------------------
         isb sy
         isb
+        isb #15
+        isb #1
 
 @ CHECK: isb	sy                      @ encoding: [0xbf,0xf3,0x6f,0x8f]
 @ CHECK: isb	sy                      @ encoding: [0xbf,0xf3,0x6f,0x8f]
+@ CHECK: isb	sy                      @ encoding: [0xbf,0xf3,0x6f,0x8f]
+@ CHECK: isb	#0x1                    @ encoding: [0xbf,0xf3,0x61,0x8f]
 
 
 @------------------------------------------------------------------------------
@@ -620,8 +662,8 @@ _func:
         ldc2l p7, c1, [r8]
         ldc2l p8, c0, [r9, #-224]
         ldc2l p9, c1, [r10, #-120]!
-        ldc2l p10, c2, [r11], #16
-        ldc2l p11, c3, [r12], #-72
+        ldc2l p0, c2, [r11], #16
+        ldc2l p1, c3, [r12], #-72
 
         ldc p12, c4, [r0, #4]
         ldc p13, c5, [r1]
@@ -648,8 +690,8 @@ _func:
 @ CHECK: ldc2l	p7, c1, [r8]            @ encoding: [0xd8,0xfd,0x00,0x17]
 @ CHECK: ldc2l	p8, c0, [r9, #-224]     @ encoding: [0x59,0xfd,0x38,0x08]
 @ CHECK: ldc2l	p9, c1, [r10, #-120]!   @ encoding: [0x7a,0xfd,0x1e,0x19]
-@ CHECK: ldc2l	p10, c2, [r11], #16     @ encoding: [0xfb,0xfc,0x04,0x2a]
-@ CHECK: ldc2l	p11, c3, [r12], #-72    @ encoding: [0x7c,0xfc,0x12,0x3b]
+@ CHECK: ldc2l	p0, c2, [r11], #16      @ encoding: [0xfb,0xfc,0x04,0x20]
+@ CHECK: ldc2l	p1, c3, [r12], #-72     @ encoding: [0x7c,0xfc,0x12,0x31]
 
 @ CHECK: ldc	p12, c4, [r0, #4]       @ encoding: [0x90,0xed,0x01,0x4c]
 @ CHECK: ldc	p13, c5, [r1]           @ encoding: [0x91,0xed,0x00,0x5d]
@@ -704,7 +746,7 @@ _func:
 @ CHECK: ldm.w	r4, {r5, r6}            @ encoding: [0x94,0xe8,0x60,0x00]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
-@ CHECK: pop.w	{pc, r4, r5, r6, r7, r8, r9, r10, r11} @ encoding: [0xbd,0xe8,0xf0,0x8f]
+@ CHECK: pop.w	{r4, r5, r6, r7, r8, r9, r10, r11, pc} @ encoding: [0xbd,0xe8,0xf0,0x8f]
 
 
 @------------------------------------------------------------------------------
@@ -764,6 +806,43 @@ _func:
 @ CHECK: ldr.w	lr, _strcmp-4           @ encoding: [0x5f'A',0xf8'A',A,0xe0'A']
 @ CHECK: @   fixup A - offset: 0, value: _strcmp-4, kind: fixup_t2_ldst_pcrel_12
 
+        ldr r7, [pc, #8]
+        ldr.n r7, [pc, #8]
+        ldr.w r7, [pc, #8]
+        ldr r4, [pc, #1020]
+        ldr r3, [pc, #-1020]
+        ldr r6, [pc, #1024]
+        ldr r0, [pc, #-1024]
+        ldr r2, [pc, #4095]
+        ldr r1, [pc, #-4095]
+        ldr r8, [pc, #132]
+        ldr pc, [pc, #256]
+        ldr pc, [pc, #-400]
+
+@ CHECK: ldr	r7, [pc, #8]            @ encoding: [0x02,0x4f]
+@ CHECK: ldr	r7, [pc, #8]            @ encoding: [0x02,0x4f]
+@ CHECK: ldr.w	r7, [pc, #8]            @ encoding: [0xdf,0xf8,0x08,0x70]
+@ CHECK: ldr	r4, [pc, #1020]       @ encoding: [0xff,0x4c]
+@ CHECK: ldr.w	r3, [pc, #-1020]        @ encoding: [0x5f,0xf8,0xfc,0x33]
+@ CHECK: ldr.w	r6, [pc, #1024]       @ encoding: [0xdf,0xf8,0x00,0x64]
+@ CHECK: ldr.w	r0, [pc, #-1024]      @ encoding: [0x5f,0xf8,0x00,0x04]
+@ CHECK: ldr.w	r2, [pc, #4095]       @ encoding: [0xdf,0xf8,0xff,0x2f]
+@ CHECK: ldr.w	r1, [pc, #-4095]      @ encoding: [0x5f,0xf8,0xff,0x1f]
+@ CHECK: ldr.w	r8, [pc, #132]        @ encoding: [0xdf,0xf8,0x84,0x80]
+@ CHECK: ldr.w	pc, [pc, #256]          @ encoding: [0xdf,0xf8,0x00,0xf1]
+@ CHECK: ldr.w	pc, [pc, #-400]         @ encoding: [0x5f,0xf8,0x90,0xf1]
+
+        ldrb  r9, [pc, #-0]
+        ldrsb r11, [pc, #-0]
+        ldrh  r10, [pc, #-0]
+        ldrsh r1, [pc, #-0]
+        ldr   r5, [pc, #-0]
+
+@ CHECK: ldrb.w	r9, [pc, #-0]           @ encoding: [0x1f,0xf8,0x00,0x90]
+@ CHECK: ldrsb.w	r11, [pc, #-0]        @ encoding: [0x1f,0xf9,0x00,0xb0]
+@ CHECK: ldrh.w	r10, [pc, #-0]          @ encoding: [0x3f,0xf8,0x00,0xa0]
+@ CHECK: ldrsh.w	r1, [pc, #-0]         @ encoding: [0x3f,0xf9,0x00,0x10]
+@ CHECK: ldr.w	r5, [pc, #-0]           @ encoding: [0x5f,0xf8,0x00,0x50]
 
 @------------------------------------------------------------------------------
 @ LDR(register)
@@ -1244,8 +1323,15 @@ _func:
         movlo r1, #-1
 
         @ alias for mvn
-	mov r3, #-3
+        mov r3, #-3
+        mov r11, #0xabcd
+        movs r0, #1
+        it ne
+        movne r3, #15
 
+        itt eq
+        moveq r0, #255
+        moveq r1, #256
 
 @ CHECK: movs	r1, #21                 @ encoding: [0x15,0x21]
 @ CHECK: movs.w	r1, #21                 @ encoding: [0x5f,0xf0,0x15,0x01]
@@ -1264,6 +1350,14 @@ _func:
 @ CHECK: it	lo                      @ encoding: [0x38,0xbf]
 @ CHECK: movlo.w	r1, #-1         @ encoding: [0x4f,0xf0,0xff,0x31]
 @ CHECK: mvn	r3, #2                  @ encoding: [0x6f,0xf0,0x02,0x03]
+@ CHECK: movw	r11, #43981             @ encoding: [0x4a,0xf6,0xcd,0x3b]
+@ CHECK: movs	r0, #1                  @ encoding: [0x01,0x20]
+@ CHECK: it	ne                      @ encoding: [0x18,0xbf]
+@ CHECK: movne	r3, #15                 @ encoding: [0x0f,0x23]
+
+@ CHECK: itt    eq                      @ encoding: [0x04,0xbf]
+@ CHECK: moveq  r0, #255                @ encoding: [0xff,0x20]
+@ CHECK: movweq r1, #256                @ encoding: [0x40,0xf2,0x00,0x11]
 
 @------------------------------------------------------------------------------
 @ MOV(shifted register)
@@ -1322,16 +1416,19 @@ _func:
 @ MRC/MRC2
 @------------------------------------------------------------------------------
         mrc  p14, #0, r1, c1, c2, #4
-        mrc2  p14, #0, r1, c1, c2, #4
-        mrc p11, #1, r1, c2, c2
+        mrc  p15, #7, apsr_nzcv, c15, c6, #6
+        mrc  p9, #1, r1, c2, c2
         mrc2 p12, #3, r3, c3, c4
-
-@ CHECK: mrc	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
-@ CHECK: mrc2	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xfe,0x92,0x1e]
-@ CHECK: mrc	p11, #1, r1, c2, c2, #0 @ encoding: [0x32,0xee,0x12,0x1b]
-@ CHECK: mrc2	p12, #3, r3, c3, c4, #0 @ encoding: [0x73,0xfe,0x14,0x3c]
-
-
+        mrc2 p14, #0, r1, c1, c2, #4
+        mrc2 p8, #7, apsr_nzcv, c15, c0, #1
+ 
+@ CHECK: mrc  p14, #0, r1, c1, c2, #4            @ encoding: [0x11,0xee,0x92,0x1e]
+@ CHECK: mrc  p15, #7, apsr_nzcv, c15, c6, #6    @ encoding: [0xff,0xee,0xd6,0xff]
+@ CHECK: mrc  p9, #1, r1, c2, c2, #0             @ encoding: [0x32,0xee,0x12,0x19]
+@ CHECK: mrc2 p12, #3, r3, c3, c4, #0            @ encoding: [0x73,0xfe,0x14,0x3c]
+@ CHECK: mrc2 p14, #0, r1, c1, c2, #4            @ encoding: [0x11,0xfe,0x92,0x1e]
+@ CHECK: mrc2 p8, #7, apsr_nzcv, c15, c0, #1     @ encoding: [0xff,0xfe,0x30,0xf8]
+ 
 @------------------------------------------------------------------------------
 @ MRRC/MRRC2
 @------------------------------------------------------------------------------
@@ -1565,6 +1662,9 @@ _func:
 @ FIXME: pld	_foo                    @ encoding: [0x9f'A',0xf8'A',A,0xf0'A']
             @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
 
+        pld [pc,#-4095]
+@ CHECK: pld [pc, #-4095]            @ encoding: [0x1f,0xf8,0xff,0xff]
+
 
 @------------------------------------------------------------------------------
 @ PLD(register)
@@ -1591,12 +1691,16 @@ _func:
         pli [r6, #33]
         pli [r6, #257]
         pli [r7, #257]
+        pli [pc, #+4095]
+        pli [pc, #-4095]
 
 @ CHECK: pli	[r5, #-4]               @ encoding: [0x15,0xf9,0x04,0xfc]
 @ CHECK: pli	[r6, #32]               @ encoding: [0x96,0xf9,0x20,0xf0]
 @ CHECK: pli	[r6, #33]               @ encoding: [0x96,0xf9,0x21,0xf0]
 @ CHECK: pli	[r6, #257]              @ encoding: [0x96,0xf9,0x01,0xf1]
 @ CHECK: pli	[r7, #257]              @ encoding: [0x97,0xf9,0x01,0xf1]
+@ CHECK: pli    [pc, #4095]             @ encoding: [0x9f,0xf9,0xff,0xff]
+@ CHECK: pli    [pc, #-4095]            @ encoding: [0x1f,0xf9,0xff,0xff]
 
 
 @------------------------------------------------------------------------------
@@ -2345,10 +2449,10 @@ _func:
 @ CHECK: srsia	sp, #0                  @ encoding: [0x8d,0xe9,0x00,0xc0]
 @ CHECK: srsdb	sp!, #19                @ encoding: [0x2d,0xe8,0x13,0xc0]
 @ CHECK: srsia	sp!, #2                 @ encoding: [0xad,0xe9,0x02,0xc0]
-@ CHECK: srsdb	sp, #10                 @ encoding: [0x0d,0xe8,0x0a,0xc0]
-@ CHECK: srsia	sp, #9                  @ encoding: [0x8d,0xe9,0x09,0xc0]
-@ CHECK: srsdb	sp!, #5                 @ encoding: [0x2d,0xe8,0x05,0xc0]
+@ CHECK: srsia	sp, #10                 @ encoding: [0x8d,0xe9,0x0a,0xc0]
+@ CHECK: srsdb	sp, #9                  @ encoding: [0x0d,0xe8,0x09,0xc0]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x2d,0xe8,0x05,0xc0]
 @ CHECK: srsia	sp, #5                  @ encoding: [0x8d,0xe9,0x05,0xc0]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
 
@@ -2371,10 +2475,10 @@ _func:
 @ CHECK: srsia	sp, #0                  @ encoding: [0x8d,0xe9,0x00,0xc0]
 @ CHECK: srsdb	sp!, #19                @ encoding: [0x2d,0xe8,0x13,0xc0]
 @ CHECK: srsia	sp!, #2                 @ encoding: [0xad,0xe9,0x02,0xc0]
-@ CHECK: srsdb	sp, #10                 @ encoding: [0x0d,0xe8,0x0a,0xc0]
-@ CHECK: srsia	sp, #9                  @ encoding: [0x8d,0xe9,0x09,0xc0]
-@ CHECK: srsdb	sp!, #5                 @ encoding: [0x2d,0xe8,0x05,0xc0]
+@ CHECK: srsia	sp, #10                 @ encoding: [0x8d,0xe9,0x0a,0xc0]
+@ CHECK: srsdb	sp, #9                  @ encoding: [0x0d,0xe8,0x09,0xc0]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x2d,0xe8,0x05,0xc0]
 @ CHECK: srsia	sp, #5                  @ encoding: [0x8d,0xe9,0x05,0xc0]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
 
@@ -2450,8 +2554,8 @@ _func:
         stc2l p7, c1, [r8]
         stc2l p8, c0, [r9, #-224]
         stc2l p9, c1, [r10, #-120]!
-        stc2l p10, c2, [r11], #16
-        stc2l p11, c3, [r12], #-72
+        stc2l p0, c2, [r11], #16
+        stc2l p1, c3, [r12], #-72
 
         stc p12, c4, [r0, #4]
         stc p13, c5, [r1]
@@ -2478,8 +2582,8 @@ _func:
 @ CHECK: stc2l	p7, c1, [r8]            @ encoding: [0xc8,0xfd,0x00,0x17]
 @ CHECK: stc2l	p8, c0, [r9, #-224]     @ encoding: [0x49,0xfd,0x38,0x08]
 @ CHECK: stc2l	p9, c1, [r10, #-120]!   @ encoding: [0x6a,0xfd,0x1e,0x19]
-@ CHECK: stc2l	p10, c2, [r11], #16     @ encoding: [0xeb,0xfc,0x04,0x2a]
-@ CHECK: stc2l	p11, c3, [r12], #-72    @ encoding: [0x6c,0xfc,0x12,0x3b]
+@ CHECK: stc2l	p0, c2, [r11], #16      @ encoding: [0xeb,0xfc,0x04,0x20]
+@ CHECK: stc2l	p1, c3, [r12], #-72     @ encoding: [0x6c,0xfc,0x12,0x31]
 
 @ CHECK: stc	p12, c4, [r0, #4]       @ encoding: [0x80,0xed,0x01,0x4c]
 @ CHECK: stc	p13, c5, [r1]           @ encoding: [0x81,0xed,0x00,0x5d]
@@ -2612,6 +2716,7 @@ _func:
         strb r9, [r2], #4
         strb r3, [sp], #-4
         strb r4, [r8, #-0]!
+        strb r1, [r0], #-0
 
 @ CHECK: strb	r5, [r5, #-4]           @ encoding: [0x05,0xf8,0x04,0x5c]
 @ CHECK: strb.w	r5, [r6, #32]           @ encoding: [0x86,0xf8,0x20,0x50]
@@ -2625,6 +2730,7 @@ _func:
 @ CHECK: strb	r9, [r2], #4            @ encoding: [0x02,0xf8,0x04,0x9b]
 @ CHECK: strb	r3, [sp], #-4           @ encoding: [0x0d,0xf8,0x04,0x39]
 @ CHECK: strb	r4, [r8, #-0]!          @ encoding: [0x08,0xf8,0x00,0x4d]
+@ CHECK: strb	r1, [r0], #-0           @ encoding: [0x00,0xf8,0x00,0x19]
 
 
 @------------------------------------------------------------------------------
@@ -3487,11 +3593,21 @@ _func:
         wfige
         yieldlt
         hint.w #4
+        hint.w #3
+        hint.w #2
+        hint.w #1
+        hint.w #0
+        hint #4
         hint #3
         hint #2
         hint #1
         hint #0
 
+        itet lt
+        hintlt #15
+        hintge #16
+        hintlt #239
+
 @ CHECK: wfe                            @ encoding: [0x20,0xbf]
 @ CHECK: wfi                            @ encoding: [0x30,0xbf]
 @ CHECK: yield                          @ encoding: [0x10,0xbf]
@@ -3504,17 +3620,53 @@ _func:
 @ CHECK: wfe.w                          @ encoding: [0xaf,0xf3,0x02,0x80]
 @ CHECK: yield.w                        @ encoding: [0xaf,0xf3,0x01,0x80]
 @ CHECK: nop.w                          @ encoding: [0xaf,0xf3,0x00,0x80]
+@ CHECK: sev                            @ encoding: [0x40,0xbf]
+@ CHECK: wfi                            @ encoding: [0x30,0xbf]
+@ CHECK: wfe                            @ encoding: [0x20,0xbf]
+@ CHECK: yield                          @ encoding: [0x10,0xbf]
+@ CHECK: nop                            @ encoding: [0x00,0xbf]
 
+@ CHECK: itet	lt                      @ encoding: [0xb6,0xbf]
+@ CHECK: hintlt #15                     @ encoding: [0xf0,0xbf]
+@ CHECK: hintge.w #16                   @ encoding: [0xaf,0xf3,0x10,0x80]
+@ CHECK: hintlt.w #239                  @ encoding: [0xaf,0xf3,0xef,0x80]
+
+@------------------------------------------------------------------------------
+@ Unallocated wide/narrow hints
+@------------------------------------------------------------------------------
+        hint #7
+        hint.w #7
+@ CHECK: hint #7                        @ encoding: [0x70,0xbf]
+@ CHECK: hint.w #7                      @ encoding: [0xaf,0xf3,0x07,0x80]
 
 @------------------------------------------------------------------------------
 @ Alternate syntax for LDR*(literal) encodings
 @------------------------------------------------------------------------------
+        ldrb r11, [pc, #22]
+        ldrh r11, [pc, #22]
+        ldrsb r11, [pc, #22]
+        ldrsh r11, [pc, #22]
+        ldr.w r11, [pc, #22]
+        ldrb.w r11, [pc, #22]
+        ldrh.w r11, [pc, #22]
+        ldrsb.w r11, [pc, #22]
+        ldrsh.w r11, [pc, #22]
+
+@ CHECK: ldrb.w r11, [pc, #22]        @ encoding: [0x9f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w r11, [pc, #22]        @ encoding: [0xbf,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #22]       @ encoding: [0x9f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #22]       @ encoding: [0xbf,0xf9,0x16,0xb0]
+@ CHECK: ldr.w r11, [pc, #22]         @ encoding: [0xdf,0xf8,0x16,0xb0]
+@ CHECK: ldrb.w r11, [pc, #22]        @ encoding: [0x9f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w r11, [pc, #22]        @ encoding: [0xbf,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #22]       @ encoding: [0x9f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #22]       @ encoding: [0xbf,0xf9,0x16,0xb0]
+
         ldr r11, [pc, #-22]
         ldrb r11, [pc, #-22]
         ldrh r11, [pc, #-22]
         ldrsb r11, [pc, #-22]
         ldrsh r11, [pc, #-22]
-
         ldr.w r11, [pc, #-22]
         ldrb.w r11, [pc, #-22]
         ldrh.w r11, [pc, #-22]
@@ -3533,5 +3685,9 @@ _func:
 @ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
 
 @ rdar://12596361
-        ldr r1, [pc, #12]
-@ CHECK: ldr.n r1, [pc, #12]        @ encoding: [0x03,0x49]
+         ldr r1, [pc, #12]
+@ CHECK: ldr r1, [pc, #12]              @ encoding: [0x03,0x49]
+
+@ rdar://14214063
+         subs pc, lr, #4
+@ CHECK: subs pc, lr, #4                @ encoding: [0xde,0xf3,0x04,0x8f]
diff --git a/test/MC/ARM/crc32-thumb.s b/test/MC/ARM/crc32-thumb.s
new file mode 100644
index 000000000000..3a0e7a9229a9
--- /dev/null
+++ b/test/MC/ARM/crc32-thumb.s
@@ -0,0 +1,30 @@
+@ RUN: llvm-mc -triple=thumbv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+@ RUN: not llvm-mc -triple=thumbv8 -mattr=-crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
+        crc32b  r0, r1, r2
+        crc32h  r0, r1, r2
+        crc32w  r0, r1, r2
+
+@ CHECK:  crc32b    r0, r1, r2              @ encoding: [0xc1,0xfa,0x82,0xf0]
+@ CHECK:  crc32h    r0, r1, r2              @ encoding: [0xc1,0xfa,0x92,0xf0]
+@ CHECK:  crc32w    r0, r1, r2              @ encoding: [0xc1,0xfa,0xa2,0xf0]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+
+        crc32cb  r0, r1, r2
+        crc32ch  r0, r1, r2
+        crc32cw  r0, r1, r2
+
+@ CHECK:  crc32cb   r0, r1, r2              @ encoding: [0xd1,0xfa,0x82,0xf0]
+@ CHECK:  crc32ch   r0, r1, r2              @ encoding: [0xd1,0xfa,0x92,0xf0]
+@ CHECK:  crc32cw   r0, r1, r2              @ encoding: [0xd1,0xfa,0xa2,0xf0]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
diff --git a/test/MC/ARM/crc32.s b/test/MC/ARM/crc32.s
new file mode 100644
index 000000000000..45a1f0ccadb6
--- /dev/null
+++ b/test/MC/ARM/crc32.s
@@ -0,0 +1,30 @@
+@ RUN: llvm-mc -triple=armv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+@ RUN: not llvm-mc -triple=thumbv8 -mattr=-crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
+        crc32b  r0, r1, r2
+        crc32h  r0, r1, r2
+        crc32w  r0, r1, r2
+
+@ CHECK:  crc32b    r0, r1, r2              @ encoding: [0x42,0x00,0x01,0xe1]
+@ CHECK:  crc32h    r0, r1, r2              @ encoding: [0x42,0x00,0x21,0xe1]
+@ CHECK:  crc32w    r0, r1, r2              @ encoding: [0x42,0x00,0x41,0xe1]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+
+        crc32cb  r0, r1, r2
+        crc32ch  r0, r1, r2
+        crc32cw  r0, r1, r2
+
+@ CHECK:  crc32cb   r0, r1, r2              @ encoding: [0x42,0x02,0x01,0xe1]
+@ CHECK:  crc32ch   r0, r1, r2              @ encoding: [0x42,0x02,0x21,0xe1]
+@ CHECK:  crc32cw   r0, r1, r2              @ encoding: [0x42,0x02,0x41,0xe1]
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
+@ CHECK-NOCRC: error: instruction requires: crc
diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll
index e3325b6bf6e6..9fccf2e9f880 100644
--- a/test/MC/ARM/data-in-code.ll
+++ b/test/MC/ARM/data-in-code.ll
@@ -1,7 +1,9 @@
-;; RUN: llc -O0 -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
+;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort \
+;; RUN:   -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
 ;; RUN:   llvm-readobj -t | FileCheck -check-prefix=ARM %s
 
-;; RUN: llc -O0 -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
+;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort \
+;; RUN:   -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
 ;; RUN:   llvm-readobj -t | FileCheck -check-prefix=TMB %s
 
 ;; Ensure that if a jump table is generated that it has Mapping Symbols
@@ -119,7 +121,7 @@ exit:
 
 ;; ARM:        Symbol {
 ;; ARM:          Name: $a
-;; ARM-NEXT:     Value: 0xAC
+;; ARM-NEXT:     Value: 0x{{[0-9A-F]+}}
 ;; ARM-NEXT:     Size: 0
 ;; ARM-NEXT:     Binding: Local
 ;; ARM-NEXT:     Type: None
@@ -135,7 +137,7 @@ exit:
 
 ;; ARM:        Symbol {
 ;; ARM:          Name: $d
-;; ARM-NEXT:     Value: 0x30
+;; ARM-NEXT:     Value: 0x{{[0-9A-F]+}}
 ;; ARM-NEXT:     Size: 0
 ;; ARM-NEXT:     Binding: Local
 ;; ARM-NEXT:     Type: None
@@ -146,7 +148,7 @@ exit:
 
 ;; TMB:        Symbol {
 ;; TMB:          Name: $d.2
-;; TMB-NEXT:     Value: 0x16
+;; TMB-NEXT:     Value: 0x{{[0-9A-F]+}}
 ;; TMB-NEXT:     Size: 0
 ;; TMB-NEXT:     Binding: Local
 ;; TMB-NEXT:     Type: None
@@ -164,7 +166,7 @@ exit:
 
 ;; TMB:        Symbol {
 ;; TMB:          Name: $t
-;; TMB-NEXT:     Value: 0x36
+;; TMB-NEXT:     Value: 0x{{[0-9A-F]+}}
 ;; TMB-NEXT:     Size: 0
 ;; TMB-NEXT:     Binding: Local
 ;; TMB-NEXT:     Type: None
diff --git a/test/MC/ARM/deprecated-v8.s b/test/MC/ARM/deprecated-v8.s
new file mode 100644
index 000000000000..aa72c2e7bd8e
--- /dev/null
+++ b/test/MC/ARM/deprecated-v8.s
@@ -0,0 +1,51 @@
+@ RUN: llvm-mc -triple armv8 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV8
+@ RUN: llvm-mc -triple thumbv8 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMBV8
+@ RUN: llvm-mc -triple armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV7
+@ RUN: llvm-mc -triple thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMBV7
+@ RUN: llvm-mc -triple armv6 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV6
+setend be
+@ CHECK-ARMV8: warning: deprecated
+@ CHECK-THUMBV8: warning: deprecated
+@ CHECK-ARMV7-NOT: warning: deprecated
+@ CHECK-THUMBV7-NOT: warning: deprecated
+mcr p15, #0, r5, c7, c5, #4
+@ CHECK-ARMV8: warning: deprecated since v7, use 'isb'
+@ CHECK-THUMBV8: warning: deprecated since v7, use 'isb'
+@ CHECK-ARMV7: warning: deprecated since v7, use 'isb'
+@ CHECK-THUMBV7: warning: deprecated since v7, use 'isb'
+@ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'isb'
+mcr p15, #0, r5, c7, c10, #4
+@ CHECK-ARMV8: warning: deprecated since v7, use 'dsb'
+@ CHECK-THUMBV8: warning: deprecated since v7, use 'dsb'
+@ CHECK-ARMV7: warning: deprecated since v7, use 'dsb'
+@ CHECK-THUMBV7: warning: deprecated since v7, use 'dsb'
+@ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'dsb'
+mcr p15, #0, r5, c7, c10, #5
+@ CHECK-ARMV8: warning: deprecated since v7, use 'dmb'
+@ CHECK-THUMBV8: warning: deprecated since v7, use 'dmb'
+@ CHECK-ARMV7: warning: deprecated since v7, use 'dmb'
+@ CHECK-THUMBV7: warning: deprecated since v7, use 'dmb'
+@ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'dmb'
+it ge
+movge r0, #4096
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+ite ge
+addge r0, r1
+addlt r0, r2
+@ CHECK-ARMV8: warning: applying IT instruction to more than one subsequent instruction is deprecated
+@ CHECK-THUMBV8: warning: applying IT instruction to more than one subsequent instruction is deprecated
+@ CHECK-THUMBV7-NOT: warning
+it ge
+movge r0, pc // invalid operand
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+it ge
+revge r0, r0 // invalid instruction
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+it ge
+clzge r0, r0 // only has 32-bit form
+@ CHECK-THUMBV8: warning: deprecated instruction in IT block
+@ CHECK-THUMBV7-NOT: warning
+
diff --git a/test/MC/ARM/diagnostics-noneon.s b/test/MC/ARM/diagnostics-noneon.s
new file mode 100644
index 000000000000..310344ad710f
--- /dev/null
+++ b/test/MC/ARM/diagnostics-noneon.s
@@ -0,0 +1,7 @@
+@ RUN: not llvm-mc -triple=armv7-apple-darwin -mattr=-neon < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+        vmov d5, d10
+        vmov q4, q5
+@ CHECK-ERRORS: error: instruction requires: NEON
+@ CHECK-ERRORS: error: instruction requires: NEON
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index d65cfd7a67a5..3c26f6d645c8 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -1,5 +1,7 @@
 @ RUN: not llvm-mc -triple=armv7-apple-darwin < %s 2> %t
 @ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+@ RUN: not llvm-mc -triple=armv8 < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V8 < %t %s
 
 @ Check for various assembly diagnostic messages on invalid input.
 
@@ -93,6 +95,26 @@
 
 @ CHECK-ERRORS: error: invalid operand for instruction
 
+        @ Out of range immediates for v8 HLT instruction.
+        hlt #65536
+        hlt #-1
+@CHECK-ERRORS-V8: error: invalid operand for instruction
+@CHECK-ERRORS-V8:         hlt #65536
+@CHECK-ERRORS-V8:              ^
+@CHECK-ERRORS-V8: error: invalid operand for instruction
+@CHECK-ERRORS-V8:         hlt #-1
+@CHECK-ERRORS-V8:              ^
+
+        @ Illegal condition code for v8 HLT instruction.
+        hlteq #2
+        hltlt #23
+@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS-V8:        hlteq #2
+@CHECK-ERRORS-V8:        ^
+@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS-V8:        hltlt #23
+@CHECK-ERRORS-V8:        ^
+
         @ Out of range 4 and 3 bit immediates on CDP[2]
 
         @ Out of range immediates for CDP/CDP2
@@ -129,6 +151,11 @@
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 
+        @ p10 and p11 are reserved for NEON
+        mcr p10, #2, r5, c1, c1, #4
+        mcrr p11, #8, r5, r4, c1
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
 
         @ Out of range immediate for MOV
         movw r9, 0x10000
@@ -371,3 +398,70 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS:         msr foo, #0
 @ CHECK-ERRORS:             ^
+
+        isb #-1
+        isb #16
+@ CHECK-ERRORS: error: immediate value out of range
+@ CHECK-ERRORS: error: immediate value out of range
+
+        nop.n
+@ CHECK-ERRORS: error: instruction with .n (narrow) qualifier not allowed in arm mode
+
+	dmbeq #5
+	dsble #15
+	isblo #7
+@ CHECK-ERRORS: error: instruction 'dmb' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'dsb' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'isb' is not predicable, but condition code specified
+
+	dmblt
+	dsbne
+	isbeq
+@ CHECK-ERRORS: error: instruction 'dmb' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'dsb' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'isb' is not predicable, but condition code specified
+
+        mcr2le  p7, #1, r5, c1, c1, #4
+        mcrr2ne p7, #15, r5, r4, c1
+        mrc2lo  p14, #0, r1, c1, c2, #4
+        mrrc2lo  p7, #1, r5, r4, c1
+        cdp2hi   p10, #0, c6, c12, c0, #7
+@ CHECK-ERRORS: error: instruction 'mcr2' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'mcrr2' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'mrc2' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'mrrc2' is not predicable, but condition code specified
+@ CHECK-ERRORS: error: instruction 'cdp2' is not predicable, but condition code specified
+
+        bkpteq #7
+@ CHECK-ERRORS: error: instruction 'bkpt' is not predicable, but condition code specified
+
+        ldm r2!, {r2, r3}
+        ldmdb r2!, {r2, r3}
+        ldmda r2!, {r2, r3}
+        popeq {sp}
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+@ CHECK-ERRORS: error: writeback register not allowed in register list
+
+        vrintz.f32.f32 s0, s1
+        vrintr.f32 s0, s1
+        vrintx.f64.f64 d2, d5
+        vrintz.f64 d10, d9
+        vrinta.f32.f32 s6, s7
+        vrintn.f32 s8, s9
+        vrintp.f64.f64 d10, d11
+        vrintm.f64 d12, d13
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS: error: instruction requires: FPARMv8
+
+        stm sp!, {r0, pc}^
+        ldm sp!, {r0}^
+@ CHECK-ERRORS: error: system STM cannot have writeback register
+@ CHECK-ERRORS: error: writeback register only allowed on system LDM if PC in register-list
diff --git a/test/MC/ARM/directive-cpu.s b/test/MC/ARM/directive-cpu.s
new file mode 100644
index 000000000000..952dd93f370c
--- /dev/null
+++ b/test/MC/ARM/directive-cpu.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ CHECK: Name: .ARM.attribute
+@ CHECK: SectionData (
+
+@ <format-version>
+@ CHECK: 41
+
+@ <section-length>
+@ CHECK: 1A0000 00
+
+@ <vendor-name> "aeabi\0"
+@ CHECK: 616561 626900
+
+@ <file-tag>
+@ CHECK: 01
+
+@ <size>
+@ CHECK: 10000000
+
+	.cpu	cortex-a8
+@ CHECK: 05
+@ CHECK: 434F52 5445582D 413800
+
+@ CHECK: )
diff --git a/test/MC/ARM/directive-eabi_attribute.s b/test/MC/ARM/directive-eabi_attribute.s
new file mode 100644
index 000000000000..c060b809c843
--- /dev/null
+++ b/test/MC/ARM/directive-eabi_attribute.s
@@ -0,0 +1,56 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ CHECK: Name: .ARM.attribute
+@ CHECK: SectionData (
+
+@ <format-version>
+@ CHECK: 41
+
+@ <section-length>
+@ CHECK: 250000 00
+
+@ <vendor-name> "aeabi\0"
+@ CHECK: 616561 626900
+
+@ <file-tag>
+@ CHECK: 01
+
+@ <size>
+@ CHECK: 1B000000
+
+@ <attribute>*
+
+	.eabi_attribute 6, 10
+@ CHECK: 060A
+
+	.eabi_attribute 7, 65
+@ CHECK: 0741
+
+	.eabi_attribute 8, 1
+@ CHECK: 0801
+
+	.eabi_attribute 9, 2
+@ CHECK: 0902
+
+	.eabi_attribute 10, 3
+@ CHECK: 0A03
+
+	.eabi_attribute 12, 1
+@ CHECK: 0C01
+
+	.eabi_attribute 20, 1
+@ CHECK: 1401
+
+	.eabi_attribute 21, 1
+@ CHECK: 1501
+
+	.eabi_attribute 23, 3
+@ CHECK: 1703
+
+	.eabi_attribute 24, 1
+@ CHECK: 1801
+
+	.eabi_attribute 25, 1
+@ CHECK: 1901
+@ CHECK: )
diff --git a/test/MC/ARM/directive-fpu-multiple.s b/test/MC/ARM/directive-fpu-multiple.s
new file mode 100644
index 000000000000..6a93f246822f
--- /dev/null
+++ b/test/MC/ARM/directive-fpu-multiple.s
@@ -0,0 +1,26 @@
+@ Check multiple .fpu directives.
+
+@ The later .fpu directive should overwrite the earlier one.
+@ See also: directive-fpu-multiple2.s.
+
+@ RUN: llvm-mc < %s -triple arm-unknown-linux-gnueabi -filetype=obj \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+	.fpu neon
+	.fpu vfpv4
+
+@ CHECK:      Name: .ARM.attributes
+@ CHECK-NEXT: Type: SHT_ARM_ATTRIBUTES (0x70000003)
+@ CHECK-NEXT: Flags [ (0x0)
+@ CHECK-NEXT: ]
+@ CHECK-NEXT: Address: 0x0
+@ CHECK-NEXT: Offset: 0x34
+@ CHECK-NEXT: Size: 18
+@ CHECK-NEXT: Link: 0
+@ CHECK-NEXT: Info: 0
+@ CHECK-NEXT: AddressAlignment: 1
+@ CHECK-NEXT: EntrySize: 0
+@ CHECK-NEXT: SectionData (
+@ CHECK-NEXT:   0000: 41110000 00616561 62690001 07000000
+@ CHECK-NEXT:   0010: 0A05
+@ CHECK-NEXT: )
diff --git a/test/MC/ARM/directive-fpu.s b/test/MC/ARM/directive-fpu.s
new file mode 100644
index 000000000000..24e159c74fca
--- /dev/null
+++ b/test/MC/ARM/directive-fpu.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ CHECK: Name: .ARM.attribute
+@ CHECK: SectionData (
+
+@ <format-version>
+@ CHECK: 41
+
+@ <section-length>
+@ CHECK: 130000 00
+
+@ <vendor-name> "aeabi\0"
+@ CHECK: 616561 626900
+
+@ <file-tag>
+@ CHECK: 01
+
+@ <size>
+@ CHECK: 09000000
+
+	.fpu	neon
+@ CHECK: 0A03
+@ CHECK: 0C01
+
+@ CHECK: )
diff --git a/test/MC/ARM/eh-compact-pr0.s b/test/MC/ARM/eh-compact-pr0.s
new file mode 100644
index 000000000000..1d825bf3ddc1
--- /dev/null
+++ b/test/MC/ARM/eh-compact-pr0.s
@@ -0,0 +1,104 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+
+@ Check the compact pr0 model
+
+	.syntax unified
+
+	.section	.TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+func1:
+	.fnstart
+	.save	{r11, lr}
+	push	{r11, lr}
+	.setfp	r11, sp
+	mov	r11, sp
+	pop	{r11, lr}
+	mov	pc, lr
+	.fnend
+
+	.section	.TEST2
+	.globl	func2
+	.align	2
+	.type	func2,%function
+func2:
+	.fnstart
+	.save	{r11, lr}
+	push	{r11, lr}
+	pop	{r11, pc}
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ Check .TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Name: .TEST1
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00482DE9 0DB0A0E1 0048BDE8 0EF0A0E1  |.H-......H......|
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check .ARM.exidx.TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx.TEST1
+@-------------------------------------------------------------------------------
+@ 0x80   = Compact model 0, personality routine: __aeabi_unwind_cpp_pr0
+@ 0x9B   = $sp can be found in $r11
+@ 0x8480 = pop {r11, r14}
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 80849B80                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@-------------------------------------------------------------------------------
+@ The first word should be relocated to .TEST1 section.  Besides, there is
+@ another relocation entry for __aeabi_unwind_cpp_pr0, so that the linker
+@ will keep __aeabi_unwind_cpp_pr0.
+@-------------------------------------------------------------------------------
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
+@ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ CHECK:     ]
+
+
+@-------------------------------------------------------------------------------
+@ Check .TEST2 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .TEST2
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00482DE9 0088BDE8                    |.H-.....|
+@ CHECK:     )
+@ CHECK:   }
+@-------------------------------------------------------------------------------
+@ Check .ARM.exidx.TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx.TEST2
+@-------------------------------------------------------------------------------
+@ 0x80   = Compact model 0, personality routine: __aeabi_unwind_cpp_pr0
+@ 0x8480 = pop {r11, r14}
+@ 0xB0   = finish
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 B0808480                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+@-------------------------------------------------------------------------------
+@ The first word should be relocated to .TEST2 section.  Besides, there is
+@ another relocation entry for __aeabi_unwind_cpp_pr0, so that the linker
+@ will keep __aeabi_unwind_cpp_pr0.
+@-------------------------------------------------------------------------------
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .TEST2 0x0
+@ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ CHECK:     ]
diff --git a/test/MC/ARM/eh-compact-pr1.s b/test/MC/ARM/eh-compact-pr1.s
new file mode 100644
index 000000000000..17d32f834e3e
--- /dev/null
+++ b/test/MC/ARM/eh-compact-pr1.s
@@ -0,0 +1,74 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+
+@ Check the compact pr1 model
+
+	.syntax unified
+
+	.section .TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+func1:
+	.fnstart
+	.save	{r4, r5, r11, lr}
+	push	{r4, r5, r11, lr}
+	add	r0, r1, r0
+	.setfp	r11, sp, #8
+	add	r11, sp, #8
+	pop	{r4, r5, r11, pc}
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ Check .TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Name: .TEST1
+@ CHECK:     SectionData (
+@ CHECK:       0000: 30482DE9 000081E0 08B08DE2 3088BDE8  |0H-.........0...|
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check .ARM.extab.TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.extab.TEST1
+@-------------------------------------------------------------------------------
+@ 0x81   = Compact model 1, personality routine: __aeabi_unwind_cpp_pr1
+@ 0x9B   = $sp can be found in $r11
+@ 0x41   = $sp = $sp - 8
+@ 0x8483 = pop {r4, r5, r11, r14}
+@ 0xB0   = finish
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 419B0181 B0B08384 00000000           |A...........|
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check .ARM.exidx.TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx.TEST1
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 00000000                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+@-------------------------------------------------------------------------------
+@ The first word should be relocated to .TEST1 section, and the second word
+@ should be relocated to .ARM.extab.TEST1 section.  Besides, there is
+@ another relocation entry for __aeabi_unwind_cpp_pr1, so that the linker
+@ will keep __aeabi_unwind_cpp_pr1.
+@-------------------------------------------------------------------------------
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
+@ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ CHECK:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ CHECK:     ]
diff --git a/test/MC/ARM/eh-directive-cantunwind-diagnostics.s b/test/MC/ARM/eh-directive-cantunwind-diagnostics.s
new file mode 100644
index 000000000000..640cc3005fcb
--- /dev/null
+++ b/test/MC/ARM/eh-directive-cantunwind-diagnostics.s
@@ -0,0 +1,106 @@
+@ RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi < %s 2> %t
+@ RUN: FileCheck < %t %s
+
+@ Check the diagnostics for .cantunwind, .handlerdata, and .personality
+
+@ .cantunwind directive can't be used with .handlerdata directive nor
+@ .personality directive.  This test case check for the diagnostics for
+@ the conflicts.
+
+
+        .syntax unified
+        .text
+
+@-------------------------------------------------------------------------------
+@ TEST1: cantunwind + personality
+@-------------------------------------------------------------------------------
+        .globl  func1
+        .align  2
+        .type   func1,%function
+        .fnstart
+func1:
+        .cantunwind
+        .personality    __gxx_personality_v0
+@ CHECK: error: .personality can't be used with .cantunwind directive
+@ CHECK:        .personality __gxx_personality_v0
+@ CHECK:        ^
+@ CHECK: error: .cantunwind was specified here
+@ CHECK:        .cantunwind
+@ CHECK:        ^
+        .fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2: cantunwind + handlerdata
+@-------------------------------------------------------------------------------
+        .globl  func2
+        .align  2
+        .type   func2,%function
+        .fnstart
+func2:
+        .cantunwind
+        .handlerdata
+@ CHECK: error: .handlerdata can't be used with .cantunwind directive
+@ CHECK:        .handlerdata
+@ CHECK:        ^
+@ CHECK: error: .cantunwind was specified here
+@ CHECK:        .cantunwind
+@ CHECK:        ^
+        .fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST3: personality + cantunwind
+@-------------------------------------------------------------------------------
+        .globl  func3
+        .align  2
+        .type   func3,%function
+        .fnstart
+func3:
+        .personality    __gxx_personality_v0
+        .cantunwind
+@ CHECK: error: .cantunwind can't be used with .personality directive
+@ CHECK:        .cantunwind
+@ CHECK:        ^
+@ CHECK: error: .personality was specified here
+@ CHECK:        .personality __gxx_personality_v0
+@ CHECK:        ^
+        .fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST4: handlerdata + cantunwind
+@-------------------------------------------------------------------------------
+        .globl  func4
+        .align  2
+        .type   func4,%function
+        .fnstart
+func4:
+        .handlerdata
+        .cantunwind
+@ CHECK: error: .cantunwind can't be used with .handlerdata directive
+@ CHECK:        .cantunwind
+@ CHECK:        ^
+@ CHECK: error: .handlerdata was specified here
+@ CHECK:        .handlerdata
+@ CHECK:        ^
+        .fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST5: cantunwind + fnstart
+@-------------------------------------------------------------------------------
+        .globl  func5
+        .align  2
+        .type   func5,%function
+        .cantunwind
+@ CHECK: error: .fnstart must precede .cantunwind directive
+@ CHECK:        .cantunwind
+@ CHECK:        ^
+        .fnstart
+func5:
+        .fnend
diff --git a/test/MC/ARM/eh-directive-cantunwind.s b/test/MC/ARM/eh-directive-cantunwind.s
new file mode 100644
index 000000000000..499c2c44dfc3
--- /dev/null
+++ b/test/MC/ARM/eh-directive-cantunwind.s
@@ -0,0 +1,51 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+
+@ Check the .cantunwind directive
+
+@ When a function contains a .cantunwind directive, we should create an entry
+@ in corresponding .ARM.exidx, and its second word should be EXIDX_CANTUNWIND.
+
+	.syntax	unified
+
+	.text
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	bx	lr
+	.cantunwind
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ Check .text section
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Name: .text
+@ CHECK:     SectionData (
+@ CHECK:       0000: 1EFF2FE1                             |../.|
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check .ARM.exidx section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx
+@-------------------------------------------------------------------------------
+@ The first word should be the offset to .text.
+@ The second word should be EXIDX_CANTUNWIND (01000000).
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 01000000                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .text 0x0
+@ CHECK:     ]
diff --git a/test/MC/ARM/eh-directive-fnend-diagnostics.s b/test/MC/ARM/eh-directive-fnend-diagnostics.s
new file mode 100644
index 000000000000..99161eec41e6
--- /dev/null
+++ b/test/MC/ARM/eh-directive-fnend-diagnostics.s
@@ -0,0 +1,17 @@
+@ RUN: not llvm-mc %s -triple=armv7-unknown-linux-gnueabi \
+@ RUN:   -filetype=obj -o /dev/null 2>&1 | FileCheck %s
+
+@ Check the diagnostics for mismatched .fnend directive
+
+
+	.syntax unified
+	.text
+
+	.globl	func1
+	.align	2
+	.type	func1,%function
+func1:
+	.fnend
+@ CHECK: error: .fnstart must precede .fnend directive
+@ CHECK:        .fnend
+@ CHECK:        ^
diff --git a/test/MC/ARM/eh-directive-fnstart-diagnostics.s b/test/MC/ARM/eh-directive-fnstart-diagnostics.s
new file mode 100644
index 000000000000..75ddd9faebc1
--- /dev/null
+++ b/test/MC/ARM/eh-directive-fnstart-diagnostics.s
@@ -0,0 +1,31 @@
+@ RUN: not llvm-mc %s -triple=armv7-unknown-linux-gnueabi \
+@ RUN:   -filetype=obj -o /dev/null 2>&1 | FileCheck %s
+
+@ Check the diagnostics for the mismatched .fnstart directives.
+
+@ There should be some diagnostics when the previous .fnstart is not closed
+@ by the .fnend directive.
+
+
+	.syntax unified
+	.text
+
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	@ Intentionally miss the .fnend directive
+
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+@ CHECK: error: .fnstart starts before the end of previous one
+@ CHECK:        .fnstart
+@ CHECK:        ^
+@ CHECK: error: previous .fnstart starts here
+@ CHECK:        .fnstart
+@ CHECK:        ^
+func2:
+	.fnend
diff --git a/test/MC/ARM/eh-directive-handlerdata.s b/test/MC/ARM/eh-directive-handlerdata.s
new file mode 100644
index 000000000000..793d357bac76
--- /dev/null
+++ b/test/MC/ARM/eh-directive-handlerdata.s
@@ -0,0 +1,107 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+
+@ Check the .handlerdata directive (without .personality directive)
+
+	.syntax unified
+
+@-------------------------------------------------------------------------------
+@ TEST1
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	bx	lr
+	.handlerdata
+	.fnend
+
+
+@ CHECK:Section {
+@ CHECK:  Name: .TEST1
+@ CHECK:  SectionData (
+@ CHECK:    0000: 1EFF2FE1                             |../.|
+@ CHECK:  )
+@ CHECK:}
+
+@ CHECK:Section {
+@ CHECK:  Name: .ARM.extab.TEST1
+@ CHECK:  SectionData (
+@ CHECK:    0000: B0B0B080                             |....|
+@ CHECK:  )
+@ CHECK:}
+
+@ CHECK:Section {
+@ CHECK:  Name: .ARM.exidx.TEST1
+@ CHECK:  SectionData (
+@ CHECK:    0000: 00000000 00000000                    |........|
+@ CHECK:  )
+@ CHECK:}
+@-------------------------------------------------------------------------------
+@ We should see a relocation entry to __aeabi_unwind_cpp_pr0, so that the
+@ linker can keep __aeabi_unwind_cpp_pr0.
+@-------------------------------------------------------------------------------
+@ CHECK:  Relocations [
+@ CHECK:    0x0 R_ARM_PREL31 .TEST1 0x0
+@ CHECK:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ CHECK:    0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ CHECK:  ]
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+@-------------------------------------------------------------------------------
+@ Use a lot of unwind opcdes to get __aeabi_unwind_cpp_pr1.
+@-------------------------------------------------------------------------------
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, r12}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, r12}
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, r12}
+	.pad	#0x240
+	sub	sp, sp, #0x240
+	add	sp, sp, #0x240
+	bx	lr
+	.handlerdata
+	.fnend
+
+
+
+@ CHECK:Section {
+@ CHECK:  Name: .TEST2
+@ CHECK:  SectionData (
+@ CHECK:    0000: F01F2DE9 F01FBDE8 09DD4DE2 09DD8DE2  |..-.......M.....|
+@ CHECK:    0010: 1EFF2FE1                             |../.|
+@ CHECK:  )
+@ CHECK:}
+
+@ CHECK:Section {
+@ CHECK:  Name: .ARM.extab.TEST2
+@ CHECK:  SectionData (
+@ CHECK:    0000: 0FB20181 B0B0FF81                    |........|
+@ CHECK:  )
+@ CHECK:}
+
+@ CHECK:Section {
+@ CHECK:  Name: .ARM.exidx.TEST2
+@ CHECK:  SectionData (
+@ CHECK:    0000: 00000000 00000000                    |........|
+@ CHECK:  )
+@ CHECK:}
+@-------------------------------------------------------------------------------
+@ We should see a relocation entry to __aeabi_unwind_cpp_pr0, so that the
+@ linker can keep __aeabi_unwind_cpp_pr0.
+@-------------------------------------------------------------------------------
+@ CHECK:  Relocations [
+@ CHECK:    0x0 R_ARM_PREL31 .TEST2 0x0
+@ CHECK:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ CHECK:    0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
+@ CHECK:  ]
diff --git a/test/MC/ARM/eh-directive-integrated-test.s b/test/MC/ARM/eh-directive-integrated-test.s
new file mode 100644
index 000000000000..df2b290fec92
--- /dev/null
+++ b/test/MC/ARM/eh-directive-integrated-test.s
@@ -0,0 +1,93 @@
+@ Integrated test for ARM unwind directive parser and assembler.
+
+@ This is a simplified real world test case generated from this C++ code
+@ (with and without -fomit-frame-pointer)
+@
+@   extern void print(int, int, int, int, int);
+@   extern void print(double, double, double, double, double);
+@
+@   void test(int a, int b, int c, int d, int e,
+@             double m, double n, double p, double q, double r) {
+@     try {
+@       print(a, b, c, d, e);
+@     } catch (...) {
+@       print(m, n, p, q, r);
+@     }
+@   }
+@
+@ This test case should check the unwind opcode to adjust the opcode and
+@ restore the general-purpose and VFP registers.
+
+
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+
+@-------------------------------------------------------------------------------
+@ Assembly without frame pointer elimination
+@-------------------------------------------------------------------------------
+	.syntax unified
+	.section	.TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+func1:
+	.fnstart
+	.save	{r4, r11, lr}
+	push	{r4, r11, lr}
+	.setfp	r11, sp, #4
+	add	r11, sp, #4
+	.vsave	{d8, d9, d10, d11, d12}
+	vpush	{d8, d9, d10, d11, d12}
+	.pad	#28
+	sub	sp, sp, #28
+	sub	sp, r11, #44
+	vpop	{d8, d9, d10, d11, d12}
+	pop	{r4, r11, pc}
+.Ltmp1:
+	.size	func1, .Ltmp1-func1
+	.globl	__gxx_personality_v0
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 C94A9B01 B0818484           |.....J......|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ Assembly with frame pointer elimination
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2
+	.align	2
+	.type	func2,%function
+func2:
+	.fnstart
+	.save	{r4, lr}
+	push	{r4, lr}
+	.vsave	{d8, d9, d10, d11, d12}
+	vpush	{d8, d9, d10, d11, d12}
+	.pad	#24
+	sub	sp, sp, #24
+	add	sp, sp, #24
+	vpop	{d8, d9, d10, d11, d12}
+	pop	{r4, pc}
+.Ltmp2:
+	.size	func2, .Ltmp2-func2
+	.globl	__gxx_personality_v0
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 84C90501 B0B0B0A8           |............|
+@ CHECK:   )
+@ CHECK: }
diff --git a/test/MC/ARM/eh-directive-multiple-offsets.s b/test/MC/ARM/eh-directive-multiple-offsets.s
new file mode 100644
index 000000000000..6e81f41665a9
--- /dev/null
+++ b/test/MC/ARM/eh-directive-multiple-offsets.s
@@ -0,0 +1,168 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ Check for different combination of .setfp, .pad, .save and .vsave.
+
+	.syntax	unified
+
+@-------------------------------------------------------------------------------
+@ TEST1: Check .pad before .setfp directive.
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1
+	.type	func1,%function
+	.align	2
+	.fnstart
+func1:
+	.pad	#12
+	sub	sp, sp, #12
+	.setfp	fp, sp, #8
+	add	fp, sp, #8
+	sub	sp, fp, #8
+	add	sp, sp, #12
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0009B00                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2: Check .pad after .setfp directive.
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2
+	.type	func2,%function
+	.align	2
+	.fnstart
+func2:
+	.setfp	fp, sp, #8
+	add	fp, sp, #8
+	.pad	#12
+	sub	sp, sp, #12
+	add	sp, sp, #12
+	sub	sp, fp, #8
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0419B00                    |.....A..|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST3: Check .setfp, .pad, .setfp directive.
+@-------------------------------------------------------------------------------
+	.section	.TEST3
+	.globl	func3
+	.type	func3,%function
+	.align	2
+	.fnstart
+func3:
+	@ prologue:
+	.setfp	fp, sp, #4
+	add	fp, sp, #4
+	.pad	#8
+	sub	sp, sp, #8
+	.setfp	fp, sp, #4
+	add	fp, sp, #4
+
+	@ epilogue:
+	add	sp, fp, #4
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST3
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0009B00                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST4: Check ".setfp fp, sp" and ".setfp fp, fp" directive.
+@-------------------------------------------------------------------------------
+	.section	.TEST4
+	.globl	func4
+	.type	func4,%function
+	.align	2
+	.fnstart
+func4:
+	@ prologue:
+	.setfp	fp, sp, #8
+	add	fp, sp, #8
+	.setfp	fp, fp, #8
+	add	fp, fp, #8
+
+	@ epilogue:
+	sub	sp, fp, #16
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST4
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0439B00                    |.....C..|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST5: Check .setfp, .save, .setfp directive.
+@-------------------------------------------------------------------------------
+	.section	.TEST5
+	.globl	func5
+	.type	func5,%function
+	.align	2
+	.fnstart
+func5:
+	@ prologue:
+	.setfp	fp, sp, #16
+	add	fp, sp, #16
+	.save	{r4, r5, r6, r7, r8}
+	push	{r4, r5, r6, r7, r8}
+	.pad	#8
+	add	sp, sp, #8
+	.pad	#8
+	sub	sp, sp, #8
+	.save	{r9, r10}
+	push	{r9, r10}
+	.setfp	fp, sp, #24
+	add	fp, sp, #24
+
+	@ epilogue:
+	sub	sp, fp, #24
+	pop	{r9, r10}
+	add	sp, sp, #16
+	pop	{r4, r5, r6, r7, r8}
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST5
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 80459B01 B0A40360           |.....E.....`|
+@ CHECK:   )
+@ CHECK: }
diff --git a/test/MC/ARM/eh-directive-pad-diagnostics.s b/test/MC/ARM/eh-directive-pad-diagnostics.s
new file mode 100644
index 000000000000..707215962155
--- /dev/null
+++ b/test/MC/ARM/eh-directive-pad-diagnostics.s
@@ -0,0 +1,39 @@
+@ RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK < %t %s
+
+@ Check the diagnostics for .pad directive.
+
+
+	.syntax unified
+	.text
+
+@-------------------------------------------------------------------------------
+@ TEST1: .pad before .fnstart
+@-------------------------------------------------------------------------------
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.pad	#0
+@ CHECK: error: .fnstart must precede .pad directive
+@ CHECK:        .pad #0
+@ CHECK:        ^
+	.fnstart
+func1:
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2: .pad after .handlerdata
+@-------------------------------------------------------------------------------
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	.handlerdata
+	.pad	#0
+@ CHECK: error: .pad must precede .handlerdata directive
+@ CHECK:        .pad #0
+@ CHECK:        ^
+	.fnend
diff --git a/test/MC/ARM/eh-directive-pad.s b/test/MC/ARM/eh-directive-pad.s
new file mode 100644
index 000000000000..f8263e6621f0
--- /dev/null
+++ b/test/MC/ARM/eh-directive-pad.s
@@ -0,0 +1,226 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ Check for different stack pointer offsets.
+
+@ The .pad directive will track the stack pointer offsets.  There are several
+@ ways to encode the stack offsets.  We have to test:
+@
+@              offset <  0x00
+@              offset == 0x00
+@     0x04  <= offset <= 0x100
+@     0x104 <= offset <= 0x200
+@     0x204 <= offset
+
+
+	.syntax unified
+
+@-------------------------------------------------------------------------------
+@ TEST1
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	.pad	#0
+	sub	sp, sp, #0
+	add	sp, sp, #0
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit nothing (will be filled up with finish opcode).
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B0B000                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2a
+	.align	2
+	.type	func2a,%function
+	.fnstart
+func2a:
+	.pad	#0x4
+	sub	sp, sp, #0x4
+	add	sp, sp, #0x4
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func2b
+	.align	2
+	.type	func2b,%function
+	.fnstart
+func2b:
+	.pad	#0x100
+	sub	sp, sp, #0x100
+	add	sp, sp, #0x100
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit ((offset - 4) >> 2).
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B00000 00000000 B0B03F00  |..............?.|
+@ CHECK:   )
+@ CHECK: }
+
+
+@-------------------------------------------------------------------------------
+@ TEST3
+@-------------------------------------------------------------------------------
+	.section	.TEST3
+	.globl	func3a
+	.align	2
+	.type	func3a,%function
+	.fnstart
+func3a:
+	.pad	#0x104
+	sub	sp, sp, #0x104
+	add	sp, sp, #0x104
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func3b
+	.align	2
+	.type	func3b,%function
+	.fnstart
+func3b:
+	.pad	#0x200
+	sub	sp, sp, #0x200
+	add	sp, sp, #0x200
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0x3F and ((offset - 0x104) >> 2).
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST3
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B03F0000 00000000 B03F3F00  |.....?.......??.|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST4
+@-------------------------------------------------------------------------------
+	.section	.TEST4
+	.globl	func4a
+	.align	2
+	.type	func4a,%function
+	.fnstart
+func4a:
+	.pad	#0x204
+	sub	sp, sp, #0x204
+	add	sp, sp, #0x204
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func4b
+	.align	2
+	.type	func4b,%function
+	.fnstart
+func4b:
+	.pad	#0x580
+	sub	sp, sp, #0x580
+	add	sp, sp, #0x580
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0xB2 and the ULEB128 encoding of
+@ ((offset - 0x204) >> 2).
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST4
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B000B200 00000000 01DFB200  |................|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST5
+@-------------------------------------------------------------------------------
+	.section	.TEST5
+	.globl	func4a
+	.align	2
+	.type	func4a,%function
+	.fnstart
+func5a:
+	.pad	#-0x4
+	add	sp, sp, #0x4
+	sub	sp, sp, #0x4
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func5b
+	.align	2
+	.type	func5b,%function
+	.fnstart
+func5b:
+	.pad	#-0x104
+	add	sp, sp, #0x104
+	sub	sp, sp, #0x4
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func5c
+	.align	2
+	.type	func5c,%function
+	.fnstart
+func5c:
+	.pad	#-0x204
+	add	sp, sp, #0x204
+	sub	sp, sp, #0x4
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit (0x40 | (-offset - 4)) >> 2.  When (-offset - 4)
+@ is greater than 0x3f, then multiple 0x7f should be emitted.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST5
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B04000 00000000 B07F4000  |......@.......@.|
+@ CHECK:     0010: 00000000 7F7F4000                    |......@.|
+@ CHECK:   )
+@ CHECK: }
diff --git a/test/MC/ARM/eh-directive-personality-diagnostics.s b/test/MC/ARM/eh-directive-personality-diagnostics.s
new file mode 100644
index 000000000000..83e9c25f0bd7
--- /dev/null
+++ b/test/MC/ARM/eh-directive-personality-diagnostics.s
@@ -0,0 +1,39 @@
+@ RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK < %t %s
+
+@ Check the diagnostics for .personality directive.
+
+
+	.syntax unified
+	.text
+
+@-------------------------------------------------------------------------------
+@ TEST1: .personality before .fnstart
+@-------------------------------------------------------------------------------
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.personality	__gxx_personality_v0
+@ CHECK: error: .fnstart must precede .personality directive
+@ CHECK:        .personality __gxx_personality_v0
+@ CHECK:        ^
+	.fnstart
+func1:
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2: .personality after .handlerdata
+@-------------------------------------------------------------------------------
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	.handlerdata
+	.personality	__gxx_personality_v0
+@ CHECK: error: .personality must precede .handlerdata directive
+@ CHECK:        .personality __gxx_personality_v0
+@ CHECK:        ^
+	.fnend
diff --git a/test/MC/ARM/eh-directive-personality.s b/test/MC/ARM/eh-directive-personality.s
new file mode 100644
index 000000000000..f49372295748
--- /dev/null
+++ b/test/MC/ARM/eh-directive-personality.s
@@ -0,0 +1,89 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+
+@ Check the .personality directive.
+
+	.syntax unified
+
+@-------------------------------------------------------------------------------
+@ TEST1
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+
+@ CHECK: Section {
+@ CHECK:   Name: .TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 1EFF2FE1                             |../.|
+@ CHECK:   )
+@ CHECK: }
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B0B000                    |........|
+@ CHECK:   )
+@ CHECK: }
+@ CHECK:   Relocations [
+@ CHECK:     0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ CHECK:   ]
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.exidx.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 00000000                    |........|
+@ CHECK:   )
+@ CHECK: }
+@ CHECK:   Relocations [
+@ CHECK:     0x0 R_ARM_PREL31 .TEST1 0x0
+@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ CHECK:   ]
+
+
+@-------------------------------------------------------------------------------
+@ TEST2
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	bx	lr
+	.personality __gxx_personality_v0
+	@ The .handlerdata directive is intentionally ignored.  The .fnend		@ directive should create the EXTAB entry and flush the unwind opcodes.
+	.fnend
+
+
+@ CHECK: Section {
+@ CHECK:   Name: .TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 1EFF2FE1                             |../.|
+@ CHECK:   )
+@ CHECK: }
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B0B000                    |........|
+@ CHECK:   )
+@ CHECK: }
+@ CHECK:   Relocations [
+@ CHECK:     0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ CHECK:   ]
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.exidx.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 00000000                    |........|
+@ CHECK:   )
+@ CHECK: }
+@ CHECK:   Relocations [
+@ CHECK:     0x0 R_ARM_PREL31 .TEST2 0x0
+@ CHECK:     0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
+@ CHECK:   ]
diff --git a/test/MC/ARM/eh-directive-save-diagnoatics.s b/test/MC/ARM/eh-directive-save-diagnoatics.s
new file mode 100644
index 000000000000..0e6d7404a3af
--- /dev/null
+++ b/test/MC/ARM/eh-directive-save-diagnoatics.s
@@ -0,0 +1,41 @@
+@ RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK < %t %s
+
+@ Check the diagnostics for .save directive
+
+@ .save directive should always come after .fnstart directive and
+@ before .handlerdata directive.
+
+	.syntax unified
+	.text
+
+@-------------------------------------------------------------------------------
+@ TEST1: .save before .fnstart
+@-------------------------------------------------------------------------------
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.save	{r4, r5, r6, r7}
+@ CHECK: error: .fnstart must precede .save or .vsave directives
+@ CHECK:        .save {r4, r5, r6, r7}
+@ CHECK:        ^
+	.fnstart
+func1:
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2: .save after .handlerdata
+@-------------------------------------------------------------------------------
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	.handlerdata
+	.save	{r4, r5, r6, r7}
+@ CHECK: error: .save or .vsave must precede .handlerdata directive
+@ CHECK:        .save {r4, r5, r6, r7}
+@ CHECK:        ^
+	.fnend
diff --git a/test/MC/ARM/eh-directive-save.s b/test/MC/ARM/eh-directive-save.s
new file mode 100644
index 000000000000..652a7bb56c5c
--- /dev/null
+++ b/test/MC/ARM/eh-directive-save.s
@@ -0,0 +1,343 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ Check the .save directive
+
+@ The .save directive records the GPR registers which are pushed to the
+@ stack.  There are 4 different unwind opcodes:
+@
+@     0xB100: pop r[3:0]
+@     0xA0:   pop r[(4+x):4]		@ r[4+x]-r[4] must be consecutive.
+@     0xA8:   pop r14, r[(4+x):4]	@ r[4+x]-r[4] must be consecutive.
+@     0x8000: pop r[15:4]
+@
+@ If register list specifed by .save directive is possible to be encoded
+@ by 0xA0 or 0xA8, then the assembler should prefer them over 0x8000.
+
+
+	.syntax unified
+
+@-------------------------------------------------------------------------------
+@ TEST1
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1a
+	.align	2
+	.type	func1a,%function
+	.fnstart
+func1a:
+	.save	{r0}
+	push	{r0}
+	pop	{r0}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func1b
+	.align	2
+	.type	func1b,%function
+	.fnstart
+func1b:
+	.save	{r0, r1}
+	push	{r0, r1}
+	pop	{r0, r1}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func1c
+	.align	2
+	.type	func1c,%function
+	.fnstart
+func1c:
+	.save	{r0, r2}
+	push	{r0, r2}
+	pop	{r0, r2}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func1d
+	.align	2
+	.type	func1d,%function
+	.fnstart
+func1d:
+	.save	{r1, r2}
+	push	{r1, r2}
+	pop	{r1, r2}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func1e
+	.align	2
+	.type	func1e,%function
+	.fnstart
+func1e:
+	.save	{r0, r1, r2, r3}
+	push	{r0, r1, r2, r3}
+	pop	{r0, r1, r2, r3}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0xB000 unwind opcode.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B001B100 00000000 B003B100  |................|
+@ CHECK:     0010: 00000000 B005B100 00000000 B006B100  |................|
+@ CHECK:     0020: 00000000 B00FB100                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2a
+	.align	2
+	.type	func2a,%function
+	.fnstart
+func2a:
+	.save	{r4}
+	push	{r4}
+	pop	{r4}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func2b
+	.align	2
+	.type	func2b,%function
+	.fnstart
+func2b:
+	.save	{r4, r5}
+	push	{r4, r5}
+	pop	{r4, r5}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func2c
+	.align	2
+	.type	func2c,%function
+	.fnstart
+func2c:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0xA0 unwind opcode.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B0A000 00000000 B0B0A100  |................|
+@ CHECK:     0010: 00000000 B0B0A700                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST3
+@-------------------------------------------------------------------------------
+	.section	.TEST3
+	.globl	func3a
+	.align	2
+	.type	func3a,%function
+	.fnstart
+func3a:
+	.save	{r4, r14}
+	push	{r4, r14}
+	pop	{r4, r14}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func3b
+	.align	2
+	.type	func3b,%function
+	.fnstart
+func3b:
+	.save	{r4, r5, r14}
+	push	{r4, r5, r14}
+	pop	{r4, r5, r14}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func3c
+	.align	2
+	.type	func3c,%function
+	.fnstart
+func3c:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, r14}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, r14}
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, r14}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0xA8 unwind opcode.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST3
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B0A800 00000000 B0B0A900  |................|
+@ CHECK:     0010: 00000000 B0B0AF00                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST4
+@-------------------------------------------------------------------------------
+	.section	.TEST4
+	.globl	func4a
+	.align	2
+	.type	func4a,%function
+	.fnstart
+func4a:
+	.save	{r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+	push	{r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func4b
+	.align	2
+	.type	func4b,%function
+	.fnstart
+func4b:
+	@ Note: r7 is missing intentionally.
+	.save	{r4, r5, r6, r8, r9, r10, r11}
+	push	{r4, r5, r6, r8, r9, r10, r11}
+	pop	{r4, r5, r6, r8, r9, r10, r11}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func4c
+	.align	2
+	.type	func4c,%function
+	.fnstart
+func4c:
+	@ Note: r7 is missing intentionally.
+	.save	{r4, r5, r6, r8, r9, r10, r11, r14}
+	push	{r4, r5, r6, r8, r9, r10, r11, r14}
+	pop	{r4, r5, r6, r8, r9, r10, r11, r14}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func4d
+	.align	2
+	.type	func4d,%function
+	.fnstart
+func4d:
+	@ Note: The register list is not start with r4.
+	.save	{r5, r6, r7}
+	push	{r5, r6, r7}
+	pop	{r5, r6, r7}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func4e
+	.align	2
+	.type	func4e,%function
+	.fnstart
+func4e:
+	@ Note: The register list is not start with r4.
+	.save	{r5, r6, r7, r14}
+	push	{r5, r6, r7, r14}
+	pop	{r5, r6, r7, r14}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0x8000 unwind opcode.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST4
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0FF8500 00000000 B0F78000  |................|
+@ CHECK:     0010: 00000000 B0F78400 00000000 B00E8000  |................|
+@ CHECK:     0020: 00000000 B00E8400                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST5
+@-------------------------------------------------------------------------------
+	.section	.TEST5
+	.globl	func5a
+	.align	2
+	.type	func5a,%function
+	.fnstart
+func5a:
+	.save	{r0, r1, r2, r3, r4, r5, r6}
+	push	{r0, r1, r2, r3, r4, r5, r6}
+	pop	{r0, r1, r2, r3, r4, r5, r6}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func5b
+	.align	2
+	.type	func5b,%function
+	.fnstart
+func5b:
+	.save	{r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+	push	{r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+	pop	{r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r14}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ Check the order of unwind opcode to pop registers.
+@ 0xB10F "pop {r0-r3}" should be emitted before 0xA2 "pop {r4-r6}".
+@ 0xB10F "pop {r0-r3}" should be emitted before 0x85FF "pop {r4-r12, r14}".
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST5
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 A20FB100 00000000 850FB101  |................|
+@ CHECK:     0010: B0B0B0FF                             |....|
+@ CHECK:   )
+@ CHECK: }
diff --git a/test/MC/ARM/eh-directive-section-comdat.s b/test/MC/ARM/eh-directive-section-comdat.s
new file mode 100644
index 000000000000..296718f096ca
--- /dev/null
+++ b/test/MC/ARM/eh-directive-section-comdat.s
@@ -0,0 +1,126 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr -t | FileCheck %s
+
+@ Check the .group section for the function in comdat section.
+
+@ In C++, the instantiation of the template will come with linkonce (or
+@ linkonce_odr) linkage, so that the linker can remove the duplicated
+@ instantiation.  When the exception handling is enabled on those function,
+@ we have to group the corresponding .ARM.extab and .ARM.exidx with the
+@ text section together.
+@
+@ This test case will check the content of .group section.  The section index
+@ of the grouped sections should be recorded in .group section.
+
+	.syntax unified
+	.section	.TEST1,"axG",%progbits,func1,comdat
+	.weak	func1
+	.align	2
+	.type	func1,%function
+func1:
+	.fnstart
+	.save	{r4, lr}
+	push	{r4, lr}
+	.vsave	{d8, d9, d10, d11, d12}
+	vpush	{d8, d9, d10, d11, d12}
+	.pad	#24
+	sub	sp, sp, #24
+
+	add	sp, sp, #24
+	vpop	{d8, d9, d10, d11, d12}
+	pop	{r4, pc}
+
+	.globl	__gxx_personality_v0
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ Check the .group section
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Index: 1
+@ CHECK:     Name: .group
+@ CHECK:     Type: SHT_GROUP (0x11)
+@ CHECK:     Flags [ (0x0)
+@ CHECK:     ]
+@ CHECK:     SectionData (
+@-------------------------------------------------------------------------------
+@ The second, third, and fourth word should correspond to the section index
+@ of .TEST1, .ARM.extab.TEST1, and .ARM.exidx.TEST1.
+@-------------------------------------------------------------------------------
+@ CHECK:       0000: 01000000 05000000 06000000 08000000  |................|
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check the .TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Index: 5
+@ CHECK:     Name: .TEST1
+@ CHECK:     Type: SHT_PROGBITS (0x1)
+@-------------------------------------------------------------------------------
+@ The flags should contain SHF_GROUP.
+@-------------------------------------------------------------------------------
+@ CHECK:     Flags [ (0x206)
+@ CHECK:       SHF_ALLOC (0x2)
+@ CHECK:       SHF_EXECINSTR (0x4)
+@ CHECK:       SHF_GROUP (0x200)
+@ CHECK:     ]
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.extab.TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Index: 6
+@ CHECK:     Name: .ARM.extab.TEST1
+@ CHECK:     Type: SHT_PROGBITS (0x1)
+@-------------------------------------------------------------------------------
+@ The flags should contain SHF_GROUP.
+@-------------------------------------------------------------------------------
+@ CHECK:     Flags [ (0x202)
+@ CHECK:       SHF_ALLOC (0x2)
+@ CHECK:       SHF_GROUP (0x200)
+@ CHECK:     ]
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.exidx.TEST1 section
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Index: 8
+@ CHECK:     Name: .ARM.exidx.TEST1
+@ CHECK:     Type: SHT_ARM_EXIDX (0x70000001)
+@-------------------------------------------------------------------------------
+@ The flags should contain SHF_GROUP.
+@-------------------------------------------------------------------------------
+@ CHECK:     Flags [ (0x282)
+@ CHECK:       SHF_ALLOC (0x2)
+@ CHECK:       SHF_GROUP (0x200)
+@ CHECK:       SHF_LINK_ORDER (0x80)
+@ CHECK:     ]
+@ CHECK:     Link: 5
+@ CHECK:   }
+@ CHECK: ]
+
+
+
+@-------------------------------------------------------------------------------
+@ Check symbol func1.  It should be weak binding, and belong to .TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK: Symbols [
+@ CHECK:   Symbol {
+@ CHECK:     Name: func1
+@ CHECK:     Binding: Weak (0x2)
+@ CHECK:     Type: Function (0x2)
+@ CHECK:     Section: .TEST1 (0x5)
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/ARM/eh-directive-section-multiple-func.s b/test/MC/ARM/eh-directive-section-multiple-func.s
new file mode 100644
index 000000000000..9f632b8b89a6
--- /dev/null
+++ b/test/MC/ARM/eh-directive-section-multiple-func.s
@@ -0,0 +1,128 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr -t | FileCheck %s
+
+@ Check whether the section is switched back properly.
+
+@ The assembler should switch the section back to the corresponding section
+@ after it have emitted the exception handling indices and tables.  In this
+@ test case, we are checking whether the section is correct when .section
+@ directives is used.
+
+@ In this example, func1 and func2 should be defined in .TEST1 section.
+@ It is incorrect if the func2 is in .text, .ARM.extab.TEST1, or
+@ .ARM.exidx.TEST1 sections.
+
+	.syntax	unified
+
+	.section	.TEST1
+
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+
+@-------------------------------------------------------------------------------
+@ Check the .text section.  This should be empty.
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Name: .text
+@ CHECK:     SectionData (
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check the .TEST1 section.  There should be two "bx lr" instructions.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .TEST1
+@ CHECK:     SectionData (
+@ CHECK:       0000: 1EFF2FE1 1EFF2FE1                    |../.../.|
+@ CHECK:     )
+@ CHECK:   }
+
+
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.extab.TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.extab.TEST1
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 B0B0B000 00000000 B0B0B000  |................|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ CHECK:       0x8 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ CHECK:     ]
+
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.exidx.TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx.TEST1
+@ CHECK:     Link: 4
+@-------------------------------------------------------------------------------
+@ The first word should be the offset to .TEST1.
+@ The second word should be the offset to .ARM.extab.TEST1
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 00000000 04000000 08000000  |................|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+@-------------------------------------------------------------------------------
+@ The first word of each entry should be relocated to .TEST1 section.
+@ The second word of each entry should be relocated to
+@ .ARM.extab.TESET1 section.
+@-------------------------------------------------------------------------------
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
+@ CHECK:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ CHECK:       0x8 R_ARM_PREL31 .TEST1 0x0
+@ CHECK:       0xC R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ CHECK:     ]
+
+
+@-------------------------------------------------------------------------------
+@ Check the symbols "func1" and "func2".  They should belong to .TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK: Symbols [
+@ CHECK:   Symbol {
+@ CHECK:     Name: func1
+@ CHECK:     Value: 0x0
+@ CHECK:     Size: 0
+@ CHECK:     Binding: Global (0x1)
+@ CHECK:     Type: Function (0x2)
+@ CHECK:     Other: 0
+@ CHECK:     Section: .TEST1 (0x4)
+@ CHECK:   }
+@ CHECK:   Symbol {
+@ CHECK:     Name: func2
+@ CHECK:     Value: 0x4
+@ CHECK:     Size: 0
+@ CHECK:     Binding: Global (0x1)
+@ CHECK:     Type: Function (0x2)
+@ CHECK:     Other: 0
+@ CHECK:     Section: .TEST1 (0x4)
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/ARM/eh-directive-section.s b/test/MC/ARM/eh-directive-section.s
new file mode 100644
index 000000000000..7c1f32ee39d4
--- /dev/null
+++ b/test/MC/ARM/eh-directive-section.s
@@ -0,0 +1,163 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr -t | FileCheck %s
+
+@ Check the combination of .section, .fnstart, and .fnend directives.
+
+@ For the functions in .text section, the exception handling index (EXIDX)
+@ should be generated in .ARM.exidx, and the exception handling table (EXTAB)
+@ should be generated in .ARM.extab.
+
+@ For the functions in custom section specified by .section directives,
+@ the EXIDX should be generated in ".ARM.exidx[[SECTION_NAME]]", and the EXTAB
+@ should be generated in ".ARM.extab[[SECTION_NAME]]".
+
+	.syntax	unified
+
+@-------------------------------------------------------------------------------
+@ .TEST1 section
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+
+@-------------------------------------------------------------------------------
+@ TEST2 section (without the dot in the beginning)
+@-------------------------------------------------------------------------------
+	.section	TEST2
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	bx	lr
+	.personality	__gxx_personality_v0
+	.handlerdata
+	.fnend
+
+
+@-------------------------------------------------------------------------------
+@ Check the .TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Index: 4
+@ CHECK:     Name: .TEST1
+@ CHECK:     SectionData (
+@ CHECK:       0000: 1EFF2FE1                             |../.|
+@ CHECK:     )
+@ CHECK:   }
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.extab.TEST1 section, the EXTAB of .TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.extab.TEST1
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 B0B0B000                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ CHECK:     ]
+
+
+@-------------------------------------------------------------------------------
+@ Check the.ARM.exidx.TEST1 section, the EXIDX of .TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx.TEST1
+
+@-------------------------------------------------------------------------------
+@ This section should linked with .TEST1 section.
+@-------------------------------------------------------------------------------
+@ CHECK:     Link: 4
+
+@-------------------------------------------------------------------------------
+@ The first word should be relocated to the code address in .TEST1 section.
+@ The second word should be relocated to the EHTAB entry in .ARM.extab.TEST1
+@ section.
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 00000000                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
+@ CHECK:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
+@ CHECK:     ]
+
+
+@-------------------------------------------------------------------------------
+@ Check the TEST2 section (without the dot in the beginning)
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Index: 9
+@ CHECK:     Name: TEST2
+@ CHECK:     SectionData (
+@ CHECK:       0000: 1EFF2FE1                             |../.|
+@ CHECK:     )
+@ CHECK:   }
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.extabTEST2 section, the EXTAB of TEST2 section.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.extabTEST2
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 B0B0B000                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 __gxx_personality_v0 0x0
+@ CHECK:     ]
+
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.exidxTEST2 section, the EXIDX of TEST2 section.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidxTEST2
+
+@-------------------------------------------------------------------------------
+@ This section should linked with TEST2 section.
+@-------------------------------------------------------------------------------
+@ CHECK:     Link: 9
+
+@-------------------------------------------------------------------------------
+@ The first word should be relocated to the code address in TEST2 section.
+@ The second word should be relocated to the EHTAB entry in .ARM.extabTEST2
+@ section.
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 00000000                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 TEST2 0x0
+@ CHECK:       0x4 R_ARM_PREL31 .ARM.extabTEST2 0x0
+@ CHECK:     ]
+
+
+
+@-------------------------------------------------------------------------------
+@ Check the symbols and the sections they belong to
+@-------------------------------------------------------------------------------
+@ CHECK: Symbols [
+@ CHECK:   Symbol {
+@ CHECK:     Name: func1
+@ CHECK:     Section: .TEST1 (0x4)
+@ CHECK:   }
+@ CHECK:   Symbol {
+@ CHECK:     Name: func2
+@ CHECK:     Section: TEST2 (0x9)
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/ARM/eh-directive-setfp-diagnostics.s b/test/MC/ARM/eh-directive-setfp-diagnostics.s
new file mode 100644
index 000000000000..a5b8aa2386a5
--- /dev/null
+++ b/test/MC/ARM/eh-directive-setfp-diagnostics.s
@@ -0,0 +1,87 @@
+@ RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK < %t %s
+
+@ Check the diagnostics for .setfp directive.
+
+
+	.syntax unified
+	.text
+
+@-------------------------------------------------------------------------------
+@ TEST1: .setfp before .fnstart
+@-------------------------------------------------------------------------------
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.setfp	fp, sp, #0
+@ CHECK: error: .fnstart must precede .setfp directive
+@ CHECK:        .setfp fp, sp, #0
+@ CHECK:        ^
+	.fnstart
+func1:
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2: .setfp after .handlerdata
+@-------------------------------------------------------------------------------
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	.handlerdata
+	.setfp	fp, sp, #0
+@ CHECK: error: .setfp must precede .handlerdata directive
+@ CHECK:        .setfp fp, sp, #0
+@ CHECK:        ^
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST3: .setfp with bad fp register
+@-------------------------------------------------------------------------------
+	.globl	func3
+	.align	2
+	.type	func3,%function
+	.fnstart
+func3:
+	.setfp	0, r0, #0
+@ CHECK: error: frame pointer register expected
+@ CHECK:        .setfp 0, r0, #0
+@ CHECK:               ^
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST4: .setfp with bad sp register
+@-------------------------------------------------------------------------------
+	.globl	func4
+	.align	2
+	.type	func4,%function
+	.fnstart
+func4:
+	.setfp	fp, 0, #0
+@ CHECK: error: stack pointer register expected
+@ CHECK:        .setfp fp, 0, #0
+@ CHECK:                   ^
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST5: .setfp with non-sp register as second operand
+@-------------------------------------------------------------------------------
+	.globl	func5
+	.align	2
+	.type	func5,%function
+	.fnstart
+func5:
+	.setfp	fp, r0, #0
+@ CHECK: error: register should be either $sp or the latest fp register
+@ CHECK:        .setfp fp, r0, #0
+@ CHECK:                   ^
+	.fnend
diff --git a/test/MC/ARM/eh-directive-setfp.s b/test/MC/ARM/eh-directive-setfp.s
new file mode 100644
index 000000000000..dfa79e622d2e
--- /dev/null
+++ b/test/MC/ARM/eh-directive-setfp.s
@@ -0,0 +1,239 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd | FileCheck %s
+
+@ Check for .setfp directive.
+
+@ The .setfp directive will track the offset between the frame pointer and
+@ the stack pointer.  This is required for the function that will change
+@ the stack pointer out of the function prologue.  If the exception is thrown,
+@ then libunwind will reconstruct the stack pointer from the frame pointer.
+@ The reconstruction code is implemented by two different unwind opcode:
+@ (i) the unwind opcode to copy stack offset from the other register, and
+@ (ii) the unwind opcode to add or substract the stack offset.
+@
+@ This file includes several cases separated by different range of -offset
+@
+@              (-offset) <  0x00
+@              (-offset) == 0x00
+@     0x04  <= (-offset) <= 0x100
+@     0x104 <= (-offset) <= 0x200
+@     0x204 <= (-offset)
+
+
+	.syntax unified
+
+@-------------------------------------------------------------------------------
+@ TEST1
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	.setfp	fp, sp, #0
+	add	fp, sp, #0
+	sub	sp, fp, #0
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0x9B to copy stack pointer from r11.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0B09B00                    |........|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2a
+	.align	2
+	.type	func2a,%function
+	.fnstart
+func2a:
+	.setfp	fp, sp, #-4
+	add	fp, sp, #4
+	sub	sp, fp, #4
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func2b
+	.align	2
+	.type	func2b,%function
+	.fnstart
+func2b:
+	.setfp	fp, sp, #-0x100
+	add	fp, sp, #0x100
+	sub	sp, fp, #0x100
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0x9B to copy stack pointer from r11.
+@ The assembler should emit ((-offset - 4) >> 2) for offset.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0009B00 00000000 B03F9B00  |.............?..|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST3
+@-------------------------------------------------------------------------------
+	.section	.TEST3
+	.globl	func3a
+	.align	2
+	.type	func3a,%function
+	.fnstart
+func3a:
+	.setfp	fp, sp, #-0x104
+	sub	fp, sp, #0x104
+	add	sp, fp, #0x104
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func3b
+	.align	2
+	.type	func3b,%function
+	.fnstart
+func3b:
+	.setfp	fp, sp, #-0x200
+	sub	fp, sp, #0x200
+	add	sp, fp, #0x200
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0x9B to copy stack pointer from r11.
+@ The assembler should emit 0x3F and ((-offset - 0x104) >> 2) for offset.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST3
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 3F009B00 00000000 3F3F9B00  |....?.......??..|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST4
+@-------------------------------------------------------------------------------
+	.section	.TEST4
+	.globl	func4a
+	.align	2
+	.type	func4a,%function
+	.fnstart
+func4a:
+	.setfp	fp, sp, #-0x204
+	sub	fp, sp, #0x204
+	add	sp, fp, #0x204
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func4b
+	.align	2
+	.type	func4b,%function
+	.fnstart
+func4b:
+	.setfp	fp, sp, #-0x580
+	sub	fp, sp, #0x580
+	add	sp, fp, #0x580
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0x9B to copy stack pointer from r11.
+@ The assembler should emit 0xB2 and the ULEB128 encoding of
+@ ((-offset - 0x204) >> 2) for offset.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST4
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 00B29B00 00000000 DFB29B01  |................|
+@ CHECK:     0010: B0B0B001                             |....|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST5
+@-------------------------------------------------------------------------------
+	.section	.TEST5
+	.globl	func5a
+	.align	2
+	.type	func5a,%function
+	.fnstart
+func5a:
+	.setfp	fp, sp, #0x4
+	add	fp, sp, #0x4
+	sub	sp, fp, #0x4
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func5b
+	.align	2
+	.type	func5b,%function
+	.fnstart
+func5b:
+	.setfp	fp, sp, #0x104
+	add	fp, sp, #0x104
+	sub	sp, fp, #0x104
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func5c
+	.align	2
+	.type	func5c,%function
+	.fnstart
+func5c:
+	.setfp	fp, sp, #0x204
+	add	fp, sp, #0x204
+	sub	sp, fp, #0x204
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@-------------------------------------------------------------------------------
+@ The assembler should emit 0x9B to copy stack pointer from r11.
+@ The assembler should emit (0x40 | (offset - 4)) >> 2 for offset.
+@ If (offset - 4) is greater than 0x3f, then multiple 0x7f should be emitted.
+@-------------------------------------------------------------------------------
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST5
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B0409B00 00000000 7F409B00  |.....@.......@..|
+@ CHECK:     0010: 00000000 7F409B01 B0B0B07F           |.....@......|
+@ CHECK:   )
+@ CHECK: }
diff --git a/test/MC/ARM/eh-directive-text-section-multiple-func.s b/test/MC/ARM/eh-directive-text-section-multiple-func.s
new file mode 100644
index 000000000000..50e09f5f11b7
--- /dev/null
+++ b/test/MC/ARM/eh-directive-text-section-multiple-func.s
@@ -0,0 +1,81 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr -r -t | FileCheck %s
+
+@ Check whether the section is switched back or not.
+
+@ The assembler should emit the machine code of "func2" in .text section.
+@ It is incorrect if the machine code is emitted in .ARM.exidx or .ARM.extab.
+@ Besides, there should be two entries in .ARM.exidx section.
+
+	.syntax	unified
+
+	.text
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	bx	lr
+	.fnend
+
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	bx	lr
+	.fnend
+
+
+@-------------------------------------------------------------------------------
+@ Check the .text section.  There should be two "bx lr" instructions.
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Name: .text
+@ CHECK:     SectionData (
+@ CHECK:       0000: 1EFF2FE1 1EFF2FE1                    |../.../.|
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check the .ARM.exidx section.
+@ There should be two entries (two words per entry.)
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx
+@ CHECK:     SectionData (
+@-------------------------------------------------------------------------------
+@ The first word should be the offset to .text.  The second word should be
+@ 0xB0B0B080, which means compact model 0 is used (0x80) and the rest of the
+@ word is filled with FINISH opcode (0xB0).
+@-------------------------------------------------------------------------------
+@ CHECK:       0000: 00000000 B0B0B080 04000000 B0B0B080 |................|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+
+@-------------------------------------------------------------------------------
+@ The first word of each entry should be relocated to .text section.
+@-------------------------------------------------------------------------------
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .text 0x0
+@ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ CHECK:       0x8 R_ARM_PREL31 .text 0x0
+@ CHECK:     ]
+
+
+@-------------------------------------------------------------------------------
+@ Check the symbols "func1" and "func2".  They should belong to .text section.
+@-------------------------------------------------------------------------------
+@ CHECK: Symbols [
+@ CHECK:   Symbol {
+@ CHECK:     Name: func1
+@ CHECK:     Section: .text (0x1)
+@ CHECK:   }
+@ CHECK:   Symbol {
+@ CHECK:     Name: func2
+@ CHECK:     Section: .text (0x1)
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/ARM/eh-directive-text-section.s b/test/MC/ARM/eh-directive-text-section.s
new file mode 100644
index 000000000000..4a073aa81151
--- /dev/null
+++ b/test/MC/ARM/eh-directive-text-section.s
@@ -0,0 +1,82 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+
+@ Check the .fnstart directive and the .fnend directive.
+
+@ The .fnstart directive and .fnend directive should create an entry in
+@ exception handling table.  For example, if the function is defined in .text
+@ section, then there should be an entry in .ARM.exidx section.
+
+	.syntax	unified
+
+	.text
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.fnstart
+func1:
+	bx	lr
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ Check the .text section.
+@-------------------------------------------------------------------------------
+@ CHECK: Sections [
+@ CHECK:   Section {
+
+@-------------------------------------------------------------------------------
+@ Check the index of .text section.  This will be used in .ARM.exidx.
+@-------------------------------------------------------------------------------
+@ CHECK:     Index: 1
+@ CHECK:     Name: .text
+@ CHECK:     Type: SHT_PROGBITS (0x1)
+@ CHECK:     Flags [ (0x6)
+@ CHECK:       SHF_ALLOC (0x2)
+@ CHECK:       SHF_EXECINSTR (0x4)
+@ CHECK:     ]
+@ CHECK:     SectionData (
+@ CHECK:       0000: 1EFF2FE1                             |../.|
+@ CHECK:     )
+@ CHECK:   }
+
+
+@-------------------------------------------------------------------------------
+@ Check the name of the EXIDX section.  For the function in the .text section,
+@ this should be .ARM.exidx.  It is incorrect to see .ARM.exidx.text here.
+@-------------------------------------------------------------------------------
+@ CHECK:   Section {
+@ CHECK:     Name: .ARM.exidx
+@ CHECK:     Type: SHT_ARM_EXIDX (0x70000001)
+@ CHECK:     Flags [ (0x82)
+@ CHECK:       SHF_ALLOC (0x2)
+@ CHECK:       SHF_LINK_ORDER (0x80)
+@ CHECK:     ]
+
+@-------------------------------------------------------------------------------
+@ Check the linked section of the EXIDX section.  This should be the index
+@ of the .text section.
+@-------------------------------------------------------------------------------
+@ CHECK:     Link: 1
+
+@-------------------------------------------------------------------------------
+@ The first word should be the offset to .text.  The second word should be
+@ 0xB0B0B080, which means compact model 0 is used (0x80) and the rest of the
+@ word is filled with FINISH opcode (0xB0).
+@-------------------------------------------------------------------------------
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000 B0B0B080                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+
+@-------------------------------------------------------------------------------
+@ The first word should be relocated to the code address in .text section.
+@ Besides, since this function is using compact model 0, thus we have to
+@ add an relocation to __aeabi_unwind_cpp_pr0.
+@-------------------------------------------------------------------------------
+@ CHECK:     Relocations [
+@ CHECK:       0x0 R_ARM_PREL31 .text 0x0
+@ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ CHECK:     ]
diff --git a/test/MC/ARM/eh-directive-vsave-diagnostics.s b/test/MC/ARM/eh-directive-vsave-diagnostics.s
new file mode 100644
index 000000000000..62787f37c27f
--- /dev/null
+++ b/test/MC/ARM/eh-directive-vsave-diagnostics.s
@@ -0,0 +1,41 @@
+@ RUN: not llvm-mc -triple=armv7-unknown-linux-gnueabi < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK < %t %s
+
+@ Check the diagnostics for .vsave directive
+
+@ .vsave directive should always come after .fnstart directive
+@ and before .handlerdata directive.
+
+	.syntax unified
+	.text
+
+@-------------------------------------------------------------------------------
+@ TEST1: .vsave before .fnstart
+@-------------------------------------------------------------------------------
+	.globl	func1
+	.align	2
+	.type	func1,%function
+	.vsave	{d0, d1, d2, d3}
+@ CHECK: error: .fnstart must precede .save or .vsave directives
+@ CHECK:        .vsave {d0, d1, d2, d3}
+@ CHECK:        ^
+	.fnstart
+func1:
+	.fnend
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2: .vsave after .handlerdata
+@-------------------------------------------------------------------------------
+	.globl	func2
+	.align	2
+	.type	func2,%function
+	.fnstart
+func2:
+	.handlerdata
+	.vsave	{d0, d1, d2, d3}
+@ CHECK: error: .save or .vsave must precede .handlerdata directive
+@ CHECK:        .vsave {d0, d1, d2, d3}
+@ CHECK:        ^
+	.fnend
diff --git a/test/MC/ARM/eh-directive-vsave.s b/test/MC/ARM/eh-directive-vsave.s
new file mode 100644
index 000000000000..c9b78d7e276b
--- /dev/null
+++ b/test/MC/ARM/eh-directive-vsave.s
@@ -0,0 +1,130 @@
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
+
+@ Check the .vsave directive
+
+@ The .vsave directive records the VFP registers which are pushed to the
+@ stack.  There are two different opcodes:
+@
+@     0xC800: pop d[(16+x+y):(16+x)]    @ d[16+x+y]-d[16+x] must be consecutive
+@     0xC900: pop d[(x+y):x]            @ d[x+y]-d[x] must be consecutive
+
+
+	.syntax unified
+
+@-------------------------------------------------------------------------------
+@ TEST1
+@-------------------------------------------------------------------------------
+	.section	.TEST1
+	.globl	func1a
+	.align	2
+	.type	func1a,%function
+	.fnstart
+func1a:
+	.vsave	{d0}
+	vpush	{d0}
+	vpop	{d0}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func1b
+	.align	2
+	.type	func1b,%function
+	.fnstart
+func1b:
+	.vsave	{d0, d1, d2, d3}
+	vpush	{d0, d1, d2, d3}
+	vpop	{d0, d1, d2, d3}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func1c
+	.align	2
+	.type	func1c,%function
+	.fnstart
+func1c:
+	.vsave	{d0, d1, d2, d3, d4, d5, d6, d7}
+	vpush	{d0, d1, d2, d3, d4, d5, d6, d7}
+	vpop	{d0, d1, d2, d3, d4, d5, d6, d7}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func1d
+	.align	2
+	.type	func1d,%function
+	.fnstart
+func1d:
+	.vsave	{d2, d3, d4, d5, d6, d7}
+	vpush	{d2, d3, d4, d5, d6, d7}
+	vpop	{d2, d3, d4, d5, d6, d7}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST1
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B000C900 00000000 B003C900  |................|
+@ CHECK:     0010: 00000000 B007C900 00000000 B025C900  |.............%..|
+@ CHECK:   )
+@ CHECK: }
+
+
+
+@-------------------------------------------------------------------------------
+@ TEST2
+@-------------------------------------------------------------------------------
+	.section	.TEST2
+	.globl	func2a
+	.align	2
+	.type	func2a,%function
+	.fnstart
+func2a:
+	.vsave	{d16}
+	vpush	{d16}
+	vpop	{d16}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func2b
+	.align	2
+	.type	func2b,%function
+	.fnstart
+func2b:
+	.vsave	{d16, d17, d18, d19}
+	vpush	{d16, d17, d18, d19}
+	vpop	{d16, d17, d18, d19}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+	.globl	func2c
+	.align	2
+	.type	func2c,%function
+	.fnstart
+func2c:
+	.vsave	{d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+	vpush	{d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+	vpop	{d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+	bx	lr
+	.personality __gxx_personality_v0
+	.handlerdata
+	.fnend
+
+@ CHECK: Section {
+@ CHECK:   Name: .ARM.extab.TEST2
+@ CHECK:   SectionData (
+@ CHECK:     0000: 00000000 B000C800 00000000 B003C800  |................|
+@ CHECK:     0010: 00000000 B00FC800                    |........|
+@ CHECK:   )
+@ CHECK: }
diff --git a/test/MC/ARM/elf-eflags-eabi-cg.ll b/test/MC/ARM/elf-eflags-eabi-cg.ll
deleted file mode 100644
index 0b9de7f2a62a..000000000000
--- a/test/MC/ARM/elf-eflags-eabi-cg.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; Codegen version to check for ELF header flags.
-;
-; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | llvm-readobj -h | \
-; RUN: FileCheck %s
-
-define void @bar() nounwind {
-entry:
-  ret void
-}
-
-; For now the only e_flag set is EF_ARM_EABI_VER5
-; CHECK: ElfHeader {
-; CHECK:   Flags [ (0x5000000)
diff --git a/test/MC/ARM/elf-movt.s b/test/MC/ARM/elf-movt.s
index 74b3c9f2f53b..0080db4ac8a0 100644
--- a/test/MC/ARM/elf-movt.s
+++ b/test/MC/ARM/elf-movt.s
@@ -30,9 +30,29 @@ barf:                                   @ @barf
 @ OBJ-NEXT:     AddressAlignment: 4
 @ OBJ-NEXT:     EntrySize: 0
 @ OBJ-NEXT:     Relocations [
-@ OBJ-NEXT:       0x0 R_ARM_MOVW_PREL_NC
-@ OBJ-NEXT:       0x4 R_ARM_MOVT_PREL
 @ OBJ-NEXT:     ]
 @ OBJ-NEXT:     SectionData (
 @ OBJ-NEXT:       0000: F00F0FE3 F40F4FE3
 @ OBJ-NEXT:     )
+@ OBJ-NEXT:   }
+@ OBJ-NEXT:   Section {
+@ OBJ-NEXT:     Index: 2
+@ OBJ-NEXT:     Name: .rel.text (1)
+@ OBJ-NEXT:     Type: SHT_REL (0x9)
+@ OBJ-NEXT:     Flags [ (0x0)
+@ OBJ-NEXT:     ]
+@ OBJ-NEXT:     Address: 0x0
+@ OBJ-NEXT:     Offset: 0x22C
+@ OBJ-NEXT:     Size: 16
+@ OBJ-NEXT:     Link: 6
+@ OBJ-NEXT:     Info: 1
+@ OBJ-NEXT:     AddressAlignment: 4
+@ OBJ-NEXT:     EntrySize: 8
+@ OBJ-NEXT:     Relocations [
+@ OBJ-NEXT:       0x0 R_ARM_MOVW_PREL_NC
+@ OBJ-NEXT:       0x4 R_ARM_MOVT_PREL
+@ OBJ-NEXT:   ]
+@ OBJ-NEXT:     SectionData (
+@ OBJ-NEXT:       0000: 00000000 2D060000 04000000 2E060000  |....-...........|
+@ OBJ-NEXT:     )
+@ OBJ-NEXT:   }
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index 9b5dbd9e99dd..28be85b7db12 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -61,7 +61,7 @@ bb3:                                              ; preds = %bb, %entry
 declare void @exit(i32) noreturn nounwind
 
 ; OBJ: Relocations [
-; OBJ:   Section (1) .text {
+; OBJ:   Section (2) .rel.text {
 ; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC _MergedGlobals
 ; OBJ:   }
 ; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-02.ll b/test/MC/ARM/elf-reloc-02.ll
index f0217644cad9..7eb49cc5510f 100644
--- a/test/MC/ARM/elf-reloc-02.ll
+++ b/test/MC/ARM/elf-reloc-02.ll
@@ -42,7 +42,7 @@ declare i32 @write(...)
 declare void @exit(i32) noreturn nounwind
 
 ;; OBJ:      Relocations [
-;; OBJ:        Section (1) .text {
+;; OBJ:        Section (2) .rel.text {
 ;; OBJ-NEXT:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC .L.str
 ;; OBJ:        }
 ;; OBJ:      ]
diff --git a/test/MC/ARM/elf-reloc-03.ll b/test/MC/ARM/elf-reloc-03.ll
index ac46e697c18a..ee5e59eb340c 100644
--- a/test/MC/ARM/elf-reloc-03.ll
+++ b/test/MC/ARM/elf-reloc-03.ll
@@ -89,7 +89,7 @@ entry:
 declare void @exit(i32) noreturn nounwind
 
 ;; OBJ: Relocations [
-;; OBJ:   Section (1) .text {
+;; OBJ:   Section (2) .rel.text {
 ;; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC vtable
 ;; OBJ:   }
 ;; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
index 612942f2c595..a0402bdc3814 100644
--- a/test/MC/ARM/elf-reloc-condcall.s
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -8,7 +8,7 @@
         b some_label
 
 // OBJ:      Relocations [
-// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:   Section (2) .rel.text {
 // OBJ-NEXT:     0x0  R_ARM_JUMP24 some_label 0x0
 // OBJ-NEXT:     0x4  R_ARM_CALL   some_label 0x0
 // OBJ-NEXT:     0x8  R_ARM_CALL   some_label 0x0
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.ll b/test/MC/ARM/elf-thumbfunc-reloc.ll
index e7d2c340d44d..9fd360e1a013 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.ll
+++ b/test/MC/ARM/elf-thumbfunc-reloc.ll
@@ -29,7 +29,7 @@ entry:
 ; CHECK: ]
 
 ; CHECK:      Relocations [
-; CHECK-NEXT:   Section (1) .text {
+; CHECK-NEXT:   Section (2) .rel.text {
 ; CHECK-NEXT:     0x8 R_ARM_THM_CALL foo 0x0
 ; CHECK-NEXT:   }
 ; CHECK-NEXT: ]
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
index 87a26d8df84e..614702012f0c 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.s
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -1,5 +1,5 @@
 @@ test st_value bit 0 of thumb function
-@ RUN: llvm-mc %s -triple=arm-freebsd-eabi -filetype=obj -o - | \
+@ RUN: llvm-mc %s -triple=armv4t-freebsd-eabi -filetype=obj -o - | \
 @ RUN: llvm-readobj -r  | FileCheck %s
 
 
@@ -18,7 +18,7 @@ f:
 
 @@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
 @CHECK:      Relocations [
-@CHECK-NEXT:   Section (1) .text {
+@CHECK-NEXT:   Section (2) .rel.text {
 @CHECK-NEXT:     0x4 R_ARM_THM_CALL g 0x0
 @CHECK-NEXT:   }
 @CHECK-NEXT: ]
diff --git a/test/MC/ARM/elf-thumbfunc.s b/test/MC/ARM/elf-thumbfunc.s
index 26f5f0b159af..0ea11821b96b 100644
--- a/test/MC/ARM/elf-thumbfunc.s
+++ b/test/MC/ARM/elf-thumbfunc.s
@@ -5,9 +5,9 @@
 	.text
 	.globl	foo
 	.align	2
-	.type	foo,%function
 	.code	16
 	.thumb_func
+	.type	foo,%function
 foo:
 	bx	lr
 
diff --git a/test/MC/ARM/fp-armv8.s b/test/MC/ARM/fp-armv8.s
new file mode 100644
index 000000000000..1ffd5902e5c7
--- /dev/null
+++ b/test/MC/ARM/fp-armv8.s
@@ -0,0 +1,129 @@
+@ RUN: llvm-mc -triple armv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
+
+@ VCVT{B,T}
+
+  vcvtt.f64.f16 d3, s1
+@ CHECK: vcvtt.f64.f16 d3, s1      @ encoding: [0xe0,0x3b,0xb2,0xee]
+  vcvtt.f16.f64 s5, d12
+@ CHECK: vcvtt.f16.f64 s5, d12     @ encoding: [0xcc,0x2b,0xf3,0xee]
+
+  vcvtb.f64.f16 d3, s1
+@ CHECK: vcvtb.f64.f16 d3, s1     @ encoding: [0x60,0x3b,0xb2,0xee]
+  vcvtb.f16.f64 s4, d1
+@ CHECK: vcvtb.f16.f64 s4, d1     @ encoding: [0x41,0x2b,0xb3,0xee]
+
+  vcvttge.f64.f16 d3, s1
+@ CHECK: vcvttge.f64.f16 d3, s1      @ encoding: [0xe0,0x3b,0xb2,0xae]
+  vcvttgt.f16.f64 s5, d12
+@ CHECK: vcvttgt.f16.f64 s5, d12     @ encoding: [0xcc,0x2b,0xf3,0xce]
+
+  vcvtbeq.f64.f16 d3, s1
+@ CHECK: vcvtbeq.f64.f16 d3, s1     @ encoding: [0x60,0x3b,0xb2,0x0e]
+  vcvtblt.f16.f64 s4, d1
+@ CHECK: vcvtblt.f16.f64 s4, d1     @ encoding: [0x41,0x2b,0xb3,0xbe]
+
+
+@ VCVT{A,N,P,M}
+
+  vcvta.s32.f32 s2, s3
+@ CHECK: vcvta.s32.f32 s2, s3     @ encoding: [0xe1,0x1a,0xbc,0xfe]
+  vcvta.s32.f64 s2, d3
+@ CHECK: vcvta.s32.f64 s2, d3     @ encoding: [0xc3,0x1b,0xbc,0xfe]
+  vcvtn.s32.f32 s6, s23
+@ CHECK: vcvtn.s32.f32 s6, s23     @ encoding: [0xeb,0x3a,0xbd,0xfe]
+  vcvtn.s32.f64 s6, d23
+@ CHECK: vcvtn.s32.f64 s6, d23     @ encoding: [0xe7,0x3b,0xbd,0xfe]
+  vcvtp.s32.f32 s0, s4
+@ CHECK: vcvtp.s32.f32 s0, s4     @ encoding: [0xc2,0x0a,0xbe,0xfe]
+  vcvtp.s32.f64 s0, d4
+@ CHECK: vcvtp.s32.f64 s0, d4     @ encoding: [0xc4,0x0b,0xbe,0xfe]
+  vcvtm.s32.f32 s17, s8
+@ CHECK: vcvtm.s32.f32 s17, s8     @ encoding: [0xc4,0x8a,0xff,0xfe]
+  vcvtm.s32.f64 s17, d8
+@ CHECK: vcvtm.s32.f64 s17, d8     @ encoding: [0xc8,0x8b,0xff,0xfe]
+
+  vcvta.u32.f32 s2, s3
+@ CHECK: vcvta.u32.f32 s2, s3     @ encoding: [0x61,0x1a,0xbc,0xfe]
+  vcvta.u32.f64 s2, d3
+@ CHECK: vcvta.u32.f64 s2, d3     @ encoding: [0x43,0x1b,0xbc,0xfe]
+  vcvtn.u32.f32 s6, s23
+@ CHECK: vcvtn.u32.f32 s6, s23     @ encoding: [0x6b,0x3a,0xbd,0xfe]
+  vcvtn.u32.f64 s6, d23
+@ CHECK: vcvtn.u32.f64 s6, d23     @ encoding: [0x67,0x3b,0xbd,0xfe]
+  vcvtp.u32.f32 s0, s4
+@ CHECK: vcvtp.u32.f32 s0, s4     @ encoding: [0x42,0x0a,0xbe,0xfe]
+  vcvtp.u32.f64 s0, d4
+@ CHECK: vcvtp.u32.f64 s0, d4     @ encoding: [0x44,0x0b,0xbe,0xfe]
+  vcvtm.u32.f32 s17, s8
+@ CHECK: vcvtm.u32.f32 s17, s8     @ encoding: [0x44,0x8a,0xff,0xfe]
+  vcvtm.u32.f64 s17, d8
+@ CHECK: vcvtm.u32.f64 s17, d8     @ encoding: [0x48,0x8b,0xff,0xfe]
+
+
+@ VSEL
+  vselge.f32 s4, s1, s23
+@ CHECK: vselge.f32 s4, s1, s23    @ encoding: [0xab,0x2a,0x20,0xfe]
+  vselge.f64 d30, d31, d23
+@ CHECK: vselge.f64 d30, d31, d23  @ encoding: [0xa7,0xeb,0x6f,0xfe]
+  vselgt.f32 s0, s1, s0
+@ CHECK: vselgt.f32 s0, s1, s0    @ encoding: [0x80,0x0a,0x30,0xfe]
+  vselgt.f64 d5, d10, d20
+@ CHECK: vselgt.f64 d5, d10, d20  @ encoding: [0x24,0x5b,0x3a,0xfe]
+  vseleq.f32 s30, s28, s23
+@ CHECK: vseleq.f32 s30, s28, s23 @ encoding: [0x2b,0xfa,0x0e,0xfe]
+  vseleq.f64 d2, d4, d8
+@ CHECK: vseleq.f64 d2, d4, d8    @ encoding: [0x08,0x2b,0x04,0xfe]
+  vselvs.f32 s21, s16, s14
+@ CHECK: vselvs.f32 s21, s16, s14 @ encoding: [0x07,0xaa,0x58,0xfe]
+  vselvs.f64 d0, d1, d31
+@ CHECK: vselvs.f64 d0, d1, d31   @ encoding: [0x2f,0x0b,0x11,0xfe]
+
+
+@ VMAXNM / VMINNM
+  vmaxnm.f32 s5, s12, s0
+@ CHECK: vmaxnm.f32 s5, s12, s0    @ encoding: [0x00,0x2a,0xc6,0xfe]
+  vmaxnm.f64 d5, d22, d30
+@ CHECK: vmaxnm.f64 d5, d22, d30   @ encoding: [0xae,0x5b,0x86,0xfe]
+  vminnm.f32 s0, s0, s12
+@ CHECK: vminnm.f32 s0, s0, s12    @ encoding: [0x46,0x0a,0x80,0xfe]
+  vminnm.f64 d4, d6, d9
+@ CHECK: vminnm.f64 d4, d6, d9     @ encoding: [0x49,0x4b,0x86,0xfe]
+
+@ VRINT{Z,R,X}
+
+  vrintzge.f64 d3, d12
+@ CHECK: vrintzge.f64 d3, d12   @ encoding: [0xcc,0x3b,0xb6,0xae]
+  vrintz.f32 s3, s24
+@ CHECK: vrintz.f32 s3, s24     @ encoding: [0xcc,0x1a,0xf6,0xee]
+  vrintrlt.f64 d5, d0
+@ CHECK: vrintrlt.f64 d5, d0    @ encoding: [0x40,0x5b,0xb6,0xbe]
+  vrintr.f32 s0, s9
+@ CHECK: vrintr.f32 s0, s9      @ encoding: [0x64,0x0a,0xb6,0xee]
+  vrintxeq.f64 d28, d30
+@ CHECK: vrintxeq.f64 d28, d30  @ encoding: [0x6e,0xcb,0xf7,0x0e]
+  vrintxvs.f32 s10, s14
+@ CHECK: vrintxvs.f32 s10, s14  @ encoding: [0x47,0x5a,0xb7,0x6e]
+
+@ VRINT{A,N,P,M}
+
+  vrinta.f64 d3, d4
+@ CHECK: vrinta.f64 d3, d4     @ encoding: [0x44,0x3b,0xb8,0xfe]
+  vrinta.f32 s12, s1
+@ CHECK: vrinta.f32 s12, s1    @ encoding: [0x60,0x6a,0xb8,0xfe]
+  vrintn.f64 d3, d4
+@ CHECK: vrintn.f64 d3, d4     @ encoding: [0x44,0x3b,0xb9,0xfe]
+  vrintn.f32 s12, s1
+@ CHECK: vrintn.f32 s12, s1    @ encoding: [0x60,0x6a,0xb9,0xfe]
+  vrintp.f64 d3, d4
+@ CHECK: vrintp.f64 d3, d4     @ encoding: [0x44,0x3b,0xba,0xfe]
+  vrintp.f32 s12, s1
+@ CHECK: vrintp.f32 s12, s1    @ encoding: [0x60,0x6a,0xba,0xfe]
+  vrintm.f64 d3, d4
+@ CHECK: vrintm.f64 d3, d4     @ encoding: [0x44,0x3b,0xbb,0xfe]
+  vrintm.f32 s12, s1
+@ CHECK: vrintm.f32 s12, s1    @ encoding: [0x60,0x6a,0xbb,0xfe]
+
+@ MVFR2
+
+  vmrs sp, mvfr2
+@ CHECK: vmrs sp, mvfr2        @ encoding: [0x10,0xda,0xf5,0xee]
diff --git a/test/MC/ARM/idiv.s b/test/MC/ARM/idiv.s
new file mode 100644
index 000000000000..6238a0fe0662
--- /dev/null
+++ b/test/MC/ARM/idiv.s
@@ -0,0 +1,33 @@
+@ RUN: llvm-mc -triple=armv7 -mcpu=cortex-a15 -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-ARM %s
+@ RUN: llvm-mc -triple=thumbv7 -mcpu=cortex-a15 -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-THUMB %s
+
+@ RUN: llvm-mc -triple=armv7 -mcpu=cortex-a15 -mattr=-hwdiv -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-ARM-NOTHUMBHWDIV %s
+@ RUN: llvm-mc -triple=thumbv7 -mcpu=cortex-a15 -mattr=-hwdiv-arm -show-encoding < %s 2>&1 | FileCheck -check-prefix A15-THUMB-NOARMHWDIV %s
+
+@ RUN: llvm-mc -triple=armv8 -show-encoding < %s 2>&1 | FileCheck -check-prefix ARMV8 %s
+@ RUN: llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck -check-prefix THUMBV8 %s
+
+@ RUN: llvm-mc -triple=armv8 -mattr=-hwdiv -show-encoding < %s 2>&1 | FileCheck -check-prefix ARMV8-NOTHUMBHWDIV %s
+@ RUN: llvm-mc -triple=thumbv8 -mattr=-hwdiv-arm -show-encoding < %s 2>&1 | FileCheck -check-prefix THUMBV8-NOTHUMBHWDIV %s
+
+        sdiv  r1, r2, r3
+        udiv  r3, r4, r5
+@ A15-ARM:              sdiv   r1, r2, r3               @ encoding: [0x12,0xf3,0x11,0xe7]
+@ A15-ARM:              udiv   r3, r4, r5               @ encoding: [0x14,0xf5,0x33,0xe7]
+@ A15-THUMB:            sdiv   r1, r2, r3               @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ A15-THUMB:            udiv   r3, r4, r5               @ encoding: [0xb4,0xfb,0xf5,0xf3]
+
+@ A15-ARM-NOTHUMBHWDIV: sdiv    r1, r2, r3              @ encoding: [0x12,0xf3,0x11,0xe7]
+@ A15-ARM-NOTHUMBHWDIV: udiv    r3, r4, r5              @ encoding: [0x14,0xf5,0x33,0xe7]
+@ A15-THUMB-NOARMHWDIV: sdiv    r1, r2, r3              @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ A15-THUMB-NOARMHWDIV: udiv    r3, r4, r5              @ encoding: [0xb4,0xfb,0xf5,0xf3]
+
+@ ARMV8:                sdiv    r1, r2, r3              @ encoding: [0x12,0xf3,0x11,0xe7]
+@ ARMV8:                udiv    r3, r4, r5              @ encoding: [0x14,0xf5,0x33,0xe7]
+@ THUMBV8:              sdiv    r1, r2, r3              @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ THUMBV8:              udiv    r3, r4, r5              @ encoding: [0xb4,0xfb,0xf5,0xf3]
+
+@ ARMV8-NOTHUMBHWDIV:   sdiv    r1, r2, r3              @ encoding: [0x12,0xf3,0x11,0xe7]
+@ ARMV8-NOTHUMBHWDIV:   udiv    r3, r4, r5              @ encoding: [0x14,0xf5,0x33,0xe7]
+@ THUMBV8-NOTHUMBHWDIV: sdiv    r1, r2, r3              @ encoding: [0x92,0xfb,0xf3,0xf1]
+@ THUMBV8-NOTHUMBHWDIV: udiv    r3, r4, r5              @ encoding: [0xb4,0xfb,0xf5,0xf3]
diff --git a/test/MC/ARM/invalid-barrier.s b/test/MC/ARM/invalid-barrier.s
new file mode 100644
index 000000000000..29fcd8e74623
--- /dev/null
+++ b/test/MC/ARM/invalid-barrier.s
@@ -0,0 +1,28 @@
+@ RUN: not llvm-mc -triple=armv7   -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck %s
+
+@------------------------------------------------------------------------------
+@ DMB
+@------------------------------------------------------------------------------
+        dmb #0x10
+        dmb imaginary_scope
+
+@ CHECK: error: immediate value out of range
+@ CHECK: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ DSB
+@------------------------------------------------------------------------------
+        dsb #0x10
+        dsb imaginary_scope
+@ CHECK: error: immediate value out of range
+@ CHECK: error: invalid operand for instruction
+
+@------------------------------------------------------------------------------
+@ ISB
+@------------------------------------------------------------------------------
+        isb #0x1f
+        isb imaginary_domain
+
+@ CHECK: error: immediate value out of range
+@ CHECK: error: invalid operand for instruction
diff --git a/test/MC/ARM/invalid-crc32.s b/test/MC/ARM/invalid-crc32.s
new file mode 100644
index 000000000000..a541002acb17
--- /dev/null
+++ b/test/MC/ARM/invalid-crc32.s
@@ -0,0 +1,16 @@
+@ RUN: not llvm-mc -triple=armv8 -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck %s
+
+        crc32cbeq  r0, r1, r2
+        crc32bne   r0, r1, r2
+        crc32chcc  r0, r1, r2
+        crc32hpl   r0, r1, r2
+        crc32cwgt  r0, r1, r2
+        crc32wle   r0, r1, r2
+
+@ CHECK: error: instruction 'crc32cb' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32b' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32ch' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32h' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32cw' is not predicable, but condition code specified
+@ CHECK: error: instruction 'crc32w' is not predicable, but condition code specified
diff --git a/test/MC/ARM/invalid-fp-armv8.s b/test/MC/ARM/invalid-fp-armv8.s
new file mode 100644
index 000000000000..21adb7eb3e5e
--- /dev/null
+++ b/test/MC/ARM/invalid-fp-armv8.s
@@ -0,0 +1,89 @@
+@ RUN: not llvm-mc -triple armv8 -show-encoding -mattr=-neon < %s 2>&1 | FileCheck %s --check-prefix=V8
+
+@ VCVT{B,T}
+
+  vcvtt.f64.f16 d3, s1
+@ V7-NOT: vcvtt.f64.f16 d3, s1      @ encoding: [0xe0,0x3b,0xb2,0xee]
+  vcvtt.f16.f64 s5, d12
+@ V7-NOT: vcvtt.f16.f64 s5, d12     @ encoding: [0xcc,0x2b,0xf3,0xee]
+
+  vsel.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselne.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselmi.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselpl.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselvc.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselcs.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselcc.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselhs.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vsello.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselhi.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vsells.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vsellt.f32 s3, s4, s6
+@ V8: error: invalid instruction
+  vselle.f32 s3, s4, s6
+@ V8: error: invalid instruction
+
+vseleq.f32 s0, d2, d1
+@ V8: error: invalid operand for instruction
+vselgt.f64 s3, s2, s1
+@ V8: error: invalid operand for instruction
+vselgt.f32 s0, q3, q1
+@ V8: error: invalid operand for instruction
+vselgt.f64 q0, s3, q1
+@ V8: error: invalid operand for instruction
+
+vmaxnm.f32 s0, d2, d1
+@ V8: error: invalid operand for instruction
+vminnm.f64 s3, s2, s1
+@ V8: error: invalid operand for instruction
+vmaxnm.f32 s0, q3, q1
+@ V8: error: invalid operand for instruction
+vmaxnm.f64 q0, s3, q1
+@ V8: error: invalid operand for instruction
+vmaxnmgt.f64 q0, s3, q1
+@ CHECK: error: instruction 'vmaxnm' is not predicable, but condition code specified
+
+vcvta.s32.f64 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtp.s32.f32 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtn.u32.f64 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtm.u32.f32 d3, s2
+@ V8: error: invalid operand for instruction
+vcvtnge.u32.f64 d3, s2
+@ V8: error: instruction 'vcvtn' is not predicable, but condition code specified
+
+vcvtbgt.f64.f16 q0, d3
+@ V8: error: invalid operand for instruction
+vcvttlt.f64.f16 s0, s3
+@ V8: error: invalid operand for instruction
+vcvttvs.f16.f64 s0, s3
+@ V8: error: invalid operand for instruction
+vcvtthi.f16.f64 q0, d3
+@ V8: error: invalid operand for instruction
+
+vrintrlo.f32.f32 d3, q0
+@ V8: error: invalid operand for instruction
+vrintxcs.f32.f32 d3, d0
+@ V8: error: instruction requires: NEON
+
+vrinta.f64.f64 s3, q0
+@ V8: error: invalid operand for instruction
+vrintn.f32.f32 d3, d0
+@ V8: error: instruction requires: NEON
+vrintp.f32 q3, q0
+@ V8: error: instruction requires: NEON
+vrintmlt.f32 q3, q0
+@ V8: error: instruction 'vrintm' is not predicable, but condition code specified
diff --git a/test/MC/ARM/invalid-hint-arm.s b/test/MC/ARM/invalid-hint-arm.s
index e0cd97a19028..49a2e5c7c59f 100644
--- a/test/MC/ARM/invalid-hint-arm.s
+++ b/test/MC/ARM/invalid-hint-arm.s
@@ -1,7 +1,8 @@
-@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
 
-hint #5
-hint #100
+hint #240
+hint #1000
+
+@ CHECK: error: immediate operand must be in the range [0,239]
+@ CHECK: error: immediate operand must be in the range [0,239]
 
-@ CHECK: error: immediate operand must be in the range [0,4]
-@ CHECK: error: immediate operand must be in the range [0,4]
diff --git a/test/MC/ARM/invalid-hint-thumb.s b/test/MC/ARM/invalid-hint-thumb.s
index fd0a761da27b..d2b50c4d7dd3 100644
--- a/test/MC/ARM/invalid-hint-thumb.s
+++ b/test/MC/ARM/invalid-hint-thumb.s
@@ -1,9 +1,8 @@
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
 
-hint #5
-hint.w #5
-hint #100
+hint #240
+hint #1000
+
+@ CHECK: error: immediate operand must be in the range [0,239]
+@ CHECK: error: immediate operand must be in the range [0,239]
 
-@ CHECK: error: immediate operand must be in the range [0,4]
-@ CHECK: error: immediate operand must be in the range [0,4]
-@ CHECK: error: immediate operand must be in the range [0,4]
diff --git a/test/MC/ARM/invalid-idiv.s b/test/MC/ARM/invalid-idiv.s
new file mode 100644
index 000000000000..a84e66a0ccfe
--- /dev/null
+++ b/test/MC/ARM/invalid-idiv.s
@@ -0,0 +1,28 @@
+@ RUN: not llvm-mc -triple=armv7 -mcpu=cortex-a15 -mattr=-hwdiv-arm < %s 2> %t
+@ RUN: FileCheck --check-prefix=ARM-A15 < %t %s
+@ RUN: not llvm-mc -triple=thumbv7 -mcpu=cortex-a15 -mattr=-hwdiv < %s 2> %t
+@ RUN: FileCheck --check-prefix=THUMB-A15 < %t %s
+@ RUN: not llvm-mc -triple=armv7 < %s 2> %t
+@ RUN: FileCheck --check-prefix=ARM < %t %s
+@ RUN: not llvm-mc -triple=thumbv7 < %s 2> %t
+@ RUN: FileCheck --check-prefix=THUMB < %t %s
+
+        sdiv  r1, r2, r3
+        udiv  r3, r4, r5
+@ ARM-A15: error: instruction requires: divide in ARM
+@ ARM-A15: sdiv r1, r2, r3
+@ ARM-A15: error: instruction requires: divide in ARM
+@ ARM-A15: udiv r3, r4, r5
+@ THUMB-A15: error: instruction requires: arm-mode
+@ THUMB-A15: sdiv r1, r2, r3
+@ THUMB-A15: error: instruction requires: arm-mode
+@ THUMB-A15: udiv r3, r4, r5
+
+@ ARM: error: instruction requires: divide in ARM
+@ ARM: sdiv r1, r2, r3
+@ ARM: error: instruction requires: divide in ARM
+@ ARM: udiv r3, r4, r5
+@ THUMB: error: instruction requires: divide in THUMB
+@ THUMB: sdiv r1, r2, r3
+@ THUMB: error: instruction requires: divide in THUMB
+@ THUMB: udiv r3, r4, r5
diff --git a/test/MC/ARM/invalid-neon-v8.s b/test/MC/ARM/invalid-neon-v8.s
new file mode 100644
index 000000000000..361946d4a074
--- /dev/null
+++ b/test/MC/ARM/invalid-neon-v8.s
@@ -0,0 +1,70 @@
+@ RUN: not llvm-mc -triple armv8 -mattr=-fp-armv8 -show-encoding < %s 2>&1 | FileCheck %s
+
+vmaxnm.f32 s4, d5, q1
+@ CHECK: error: invalid operand for instruction
+vmaxnm.f64.f64 s4, d5, q1
+@ CHECK: error: invalid operand for instruction
+vmaxnmge.f64.f64 s4, d5, q1
+@ CHECK: error: instruction 'vmaxnm' is not predicable, but condition code specified
+
+vcvta.s32.f32 s1, s2
+@ CHECK: error: instruction requires: FPARMv8
+vcvtp.u32.f32 s1, d2
+@ CHECK: error: invalid operand for instruction
+vcvtp.f32.u32 d1, q2
+@ CHECK: error: invalid operand for instruction
+vcvtplo.f32.u32 s1, s2
+@ CHECK: error: instruction 'vcvtp' is not predicable, but condition code specified
+
+vrinta.f64.f64 s3, d12
+@ CHECK: error: invalid operand for instruction
+vrintn.f32 d3, q12
+@ CHECK: error: invalid operand for instruction
+vrintz.f32 d3, q12
+@ CHECK: error: invalid operand for instruction
+vrintmge.f32.f32 d3, d4
+@ CHECK: error: instruction 'vrintm' is not predicable, but condition code specified
+
+aesd.8  q0, s1
+@ CHECK: error: invalid operand for instruction
+aese.8  s0, q1
+@ CHECK: error: invalid operand for instruction
+aesimc.8  s0, q1
+@ CHECK: error: invalid operand for instruction
+aesmc.8  q0, d1
+@ CHECK: error: invalid operand for instruction
+aesdge.8 q0, q1
+@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified
+
+sha1h.32  d0, q1
+@ CHECK: error: invalid operand for instruction
+sha1su1.32  q0, s1
+@ CHECK: error: invalid operand for instruction
+sha256su0.32  s0, q1
+@ CHECK: error: invalid operand for instruction
+sha1heq.32  q0, q1
+@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified
+
+sha1c.32  s0, d1, q2
+@ CHECK: error: invalid operand for instruction
+sha1m.32  q0, s1, q2
+@ CHECK: error: invalid operand for instruction
+sha1p.32  s0, q1, q2
+@ CHECK: error: invalid operand for instruction
+sha1su0.32  d0, q1, q2
+@ CHECK: error: invalid operand for instruction
+sha256h.32  q0, s1, q2
+@ CHECK: error: invalid operand for instruction
+sha256h2.32  q0, q1, s2
+@ CHECK: error: invalid operand for instruction
+sha256su1.32  s0, d1, q2
+@ CHECK: error: invalid operand for instruction
+sha256su1lt.32  q0, d1, q2
+@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified
+
+vmull.p64 q0, s1, s3
+@ CHECK: error: invalid operand for instruction
+vmull.p64 s1, d2, d3
+@ CHECK: error: invalid operand for instruction
+vmullge.p64 q0, d16, d17
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
diff --git a/test/MC/ARM/lit.local.cfg b/test/MC/ARM/lit.local.cfg
index 57009139616f..8a3ba96497e7 100644
--- a/test/MC/ARM/lit.local.cfg
+++ b/test/MC/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/MC/ARM/load-store-acquire-release-v8-thumb.s b/test/MC/ARM/load-store-acquire-release-v8-thumb.s
new file mode 100644
index 000000000000..e34a26387357
--- /dev/null
+++ b/test/MC/ARM/load-store-acquire-release-v8-thumb.s
@@ -0,0 +1,48 @@
+@ RUN: llvm-mc -triple=thumbv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+        ldaexb  r3, [r4]
+        ldaexh  r2, [r5]
+        ldaex  r1, [r7]
+        ldaexd  r6, r7, [r8]
+
+@ CHECK:  ldaexb	r3, [r4]                @ encoding: [0xd4,0xe8,0xcf,0x3f]
+@ CHECK:  ldaexh	r2, [r5]                @ encoding: [0xd5,0xe8,0xdf,0x2f]
+@ CHECK:  ldaex	r1, [r7]                @ encoding: [0xd7,0xe8,0xef,0x1f]
+@ CHECK:  ldaexd	r6, r7, [r8]            @ encoding: [0xd8,0xe8,0xff,0x67]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+        stlexb  r1, r3, [r4]
+        stlexh  r4, r2, [r5]
+        stlex  r2, r1, [r7]
+        stlexd  r6, r2, r3, [r8]
+@ CHECK: stlexb r1, r3, [r4]            @ encoding: [0xc4,0xe8,0xc1,0x3f]
+@ CHECK: stlexh r4, r2, [r5]            @ encoding: [0xc5,0xe8,0xd4,0x2f]
+@ CHECK: stlex r2, r1, [r7]            @ encoding: [0xc7,0xe8,0xe2,0x1f]
+@ CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0xc8,0xe8,0xf6,0x23]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+         lda r5, [r6]
+         ldab r5, [r6]
+         ldah r12, [r9]
+@ CHECK: lda r5, [r6]                   @ encoding: [0xd6,0xe8,0xaf,0x5f]
+@ CHECK: ldab r5, [r6]                  @ encoding: [0xd6,0xe8,0x8f,0x5f]
+@ CHECK: ldah r12, [r9]                 @ encoding: [0xd9,0xe8,0x9f,0xcf]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+
+         stl r3, [r0]
+         stlb r2, [r1]
+         stlh r2, [r3]
+@ CHECK: stl r3, [r0]                   @ encoding: [0xc0,0xe8,0xaf,0x3f]
+@ CHECK: stlb r2, [r1]                  @ encoding: [0xc1,0xe8,0x8f,0x2f]
+@ CHECK: stlh r2, [r3]                  @ encoding: [0xc3,0xe8,0x9f,0x2f]
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
+@ CHECK-V7: error: instruction requires: armv8
diff --git a/test/MC/ARM/load-store-acquire-release-v8.s b/test/MC/ARM/load-store-acquire-release-v8.s
new file mode 100644
index 000000000000..bc55364e684b
--- /dev/null
+++ b/test/MC/ARM/load-store-acquire-release-v8.s
@@ -0,0 +1,48 @@
+@ RUN: llvm-mc -triple=armv8 -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+        ldaexb  r3, [r4]
+        ldaexh  r2, [r5]
+        ldaex  r1, [r7]
+        ldaexd  r6, r7, [r8]
+
+@ CHECK: ldaexb r3, [r4]                @ encoding: [0x9f,0x3e,0xd4,0xe1]
+@ CHECK: ldaexh r2, [r5]                @ encoding: [0x9f,0x2e,0xf5,0xe1]
+@ CHECK: ldaex r1, [r7]                @ encoding: [0x9f,0x1e,0x97,0xe1]
+@ CHECK: ldaexd r6, r7, [r8]            @ encoding: [0x9f,0x6e,0xb8,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+
+        stlexb  r1, r3, [r4]
+        stlexh  r4, r2, [r5]
+        stlex  r2, r1, [r7]
+        stlexd  r6, r2, r3, [r8]
+@ CHECK: stlexb r1, r3, [r4]            @ encoding: [0x93,0x1e,0xc4,0xe1]
+@ CHECK: stlexh r4, r2, [r5]            @ encoding: [0x92,0x4e,0xe5,0xe1]
+@ CHECK: stlex r2, r1, [r7]            @ encoding: [0x91,0x2e,0x87,0xe1]
+@ CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0x92,0x6e,0xa8,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+
+         lda r5, [r6]
+         ldab r5, [r6]
+         ldah r12, [r9]
+@ CHECK: lda r5, [r6]                   @ encoding: [0x9f,0x5c,0x96,0xe1]
+@ CHECK: ldab r5, [r6]                  @ encoding: [0x9f,0x5c,0xd6,0xe1]
+@ CHECK: ldah r12, [r9]                 @ encoding: [0x9f,0xcc,0xf9,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+
+         stl r3, [r0]
+         stlb r2, [r1]
+         stlh r2, [r3]
+@ CHECK: stl r3, [r0]                   @ encoding: [0x93,0xfc,0x80,0xe1]
+@ CHECK: stlb r2, [r1]                  @ encoding: [0x92,0xfc,0xc1,0xe1]
+@ CHECK: stlh r2, [r3]                  @ encoding: [0x92,0xfc,0xe3,0xe1]
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
+@ CHECK-V7: instruction requires: armv8
diff --git a/test/MC/ARM/mapping-within-section.s b/test/MC/ARM/mapping-within-section.s
index 56dd6ef07e73..b1379d28a364 100644
--- a/test/MC/ARM/mapping-within-section.s
+++ b/test/MC/ARM/mapping-within-section.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=arm-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
 
     .text
 @ $a at 0x0000
diff --git a/test/MC/ARM/multi-section-mapping.s b/test/MC/ARM/multi-section-mapping.s
index f7c4e89a85ea..2b1b0efab53c 100644
--- a/test/MC/ARM/multi-section-mapping.s
+++ b/test/MC/ARM/multi-section-mapping.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=arm-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
 
         .text
         add r0, r0, r0
diff --git a/test/MC/ARM/neon-convert-encoding.s b/test/MC/ARM/neon-convert-encoding.s
index 1733c5222bee..20c789547145 100644
--- a/test/MC/ARM/neon-convert-encoding.s
+++ b/test/MC/ARM/neon-convert-encoding.s
@@ -18,20 +18,36 @@
 	vcvt.f32.u32	q8, q8
 @ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf2]
 	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0x20,0x07,0xfb,0xf3]
+	vcvt.s32.f32	d16, d16, #0
 @ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf3]
 	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xa0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	d16, d16, #0
 @ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf2]
 	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0x20,0x06,0xfb,0xf3]
+	vcvt.f32.s32	d16, d16, #0
 @ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf3]
 	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xa0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	d16, d16, #0
 @ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf2]
 	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0x60,0x07,0xfb,0xf3]
+	vcvt.s32.f32	q8, q8, #0
 @ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf3]
 	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xe0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	q8, q8, #0
 @ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf2]
 	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0x60,0x06,0xfb,0xf3]
+	vcvt.f32.s32	q8, q8, #0
 @ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf3]
 	vcvt.f32.u32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xe0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	q8, q8, #0
 @ CHECK: vcvt.f32.f16	q8, d16         @ encoding: [0x20,0x07,0xf6,0xf3]
 	vcvt.f32.f16	q8, d16
 @ CHECK: vcvt.f16.f32	d16, q8         @ encoding: [0x20,0x06,0xf6,0xf3]
diff --git a/test/MC/ARM/neon-crypto.s b/test/MC/ARM/neon-crypto.s
new file mode 100644
index 000000000000..92d24da6c651
--- /dev/null
+++ b/test/MC/ARM/neon-crypto.s
@@ -0,0 +1,51 @@
+@ RUN: llvm-mc -triple armv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+
+aesd.8  q0, q1
+aese.8  q0, q1
+aesimc.8  q0, q1
+aesmc.8  q0, q1
+@ CHECK: aesd.8 q0, q1          @ encoding: [0x42,0x03,0xb0,0xf3]
+@ CHECK: aese.8 q0, q1          @ encoding: [0x02,0x03,0xb0,0xf3]
+@ CHECK: aesimc.8 q0, q1        @ encoding: [0xc2,0x03,0xb0,0xf3]
+@ CHECK: aesmc.8 q0, q1         @ encoding: [0x82,0x03,0xb0,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+sha1h.32  q0, q1
+sha1su1.32  q0, q1
+sha256su0.32  q0, q1
+@ CHECK: sha1h.32  q0, q1       @ encoding: [0xc2,0x02,0xb9,0xf3]
+@ CHECK: sha1su1.32 q0, q1      @ encoding: [0x82,0x03,0xba,0xf3]
+@ CHECK: sha256su0.32 q0, q1    @ encoding: [0xc2,0x03,0xba,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+sha1c.32  q0, q1, q2
+sha1m.32  q0, q1, q2
+sha1p.32  q0, q1, q2
+sha1su0.32  q0, q1, q2
+sha256h.32  q0, q1, q2
+sha256h2.32  q0, q1, q2
+sha256su1.32  q0, q1, q2
+@ CHECK: sha1c.32  q0, q1, q2   @ encoding: [0x44,0x0c,0x02,0xf2]
+@ CHECK: sha1m.32  q0, q1, q2   @ encoding: [0x44,0x0c,0x22,0xf2]
+@ CHECK: sha1p.32 q0, q1, q2    @ encoding: [0x44,0x0c,0x12,0xf2]
+@ CHECK: sha1su0.32  q0, q1, q2      @ encoding: [0x44,0x0c,0x32,0xf2]
+@ CHECK: sha256h.32  q0, q1, q2      @ encoding: [0x44,0x0c,0x02,0xf3]
+@ CHECK: sha256h2.32 q0, q1, q2      @ encoding: [0x44,0x0c,0x12,0xf3]
+@ CHECK: sha256su1.32 q0, q1, q2     @ encoding: [0x44,0x0c,0x22,0xf3]
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+@ CHECK-V7: instruction requires: crypto armv8
+
+vmull.p64 q8, d16, d17
+@ CHECK: vmull.p64  q8, d16, d17    @ encoding: [0xa1,0x0e,0xe0,0xf2]
+@ CHECK-V7: instruction requires: crypto armv8
diff --git a/test/MC/ARM/neon-mov-encoding.s b/test/MC/ARM/neon-mov-encoding.s
index 6f26a13c3ea9..ba66d7e14be1 100644
--- a/test/MC/ARM/neon-mov-encoding.s
+++ b/test/MC/ARM/neon-mov-encoding.s
@@ -128,3 +128,42 @@
 @ CHECK: vmov.8	d18[1], r1              @ encoding: [0xb0,0x1b,0x42,0xee]
 @ CHECK: vmov.16	d18[1], r1      @ encoding: [0xf0,0x1b,0x02,0xee]
 @ CHECK: vmov.32	d18[1], r1      @ encoding: [0x90,0x1b,0x22,0xee]
+
+
+        vmvn.8 d1, d2
+        vmvn.16 d1, d2
+        vmvn.32 d1, d2
+        vmvn.64 d1, d2
+        vmvn.i8 d1, d2
+        vmvn.i16 d1, d2
+        vmvn.i32 d1, d2
+        vmvn.i64 d1, d2
+        vmvn.s8 d1, d2
+        vmvn.s16 d1, d2
+        vmvn.s32 d1, d2
+        vmvn.s64 d1, d2
+        vmvn.u8 d1, d2
+        vmvn.u16 d1, d2
+        vmvn.u32 d1, d2
+        vmvn.u64 d1, d2
+        vmvn.f32 d1, d2
+        vmvn.f64 d1, d2
+
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
+@ CHECK: vmvn	d1, d2                  @ encoding: [0x82,0x15,0xb0,0xf3]
diff --git a/test/MC/ARM/neon-v8.s b/test/MC/ARM/neon-v8.s
new file mode 100644
index 000000000000..429c8e3c0873
--- /dev/null
+++ b/test/MC/ARM/neon-v8.s
@@ -0,0 +1,83 @@
+@ RUN: llvm-mc -triple armv8 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+vmaxnm.f32 d4, d5, d1
+@ CHECK: vmaxnm.f32 d4, d5, d1 @ encoding: [0x11,0x4f,0x05,0xf3]
+vmaxnm.f32 q2, q4, q6
+@ CHECK: vmaxnm.f32 q2, q4, q6 @ encoding: [0x5c,0x4f,0x08,0xf3]
+vminnm.f32 d5, d4, d30
+@ CHECK: vminnm.f32 d5, d4, d30 @ encoding: [0x3e,0x5f,0x24,0xf3]
+vminnm.f32 q0, q13, q2
+@ CHECK: vminnm.f32 q0, q13, q2 @ encoding: [0xd4,0x0f,0x2a,0xf3]
+
+vcvta.s32.f32	d4, d6
+@ CHECK: vcvta.s32.f32	d4, d6 @ encoding: [0x06,0x40,0xbb,0xf3]
+vcvta.u32.f32	d12, d10
+@ CHECK: vcvta.u32.f32	d12, d10 @ encoding: [0x8a,0xc0,0xbb,0xf3]
+vcvta.s32.f32	q4, q6
+@ CHECK: vcvta.s32.f32	q4, q6 @ encoding: [0x4c,0x80,0xbb,0xf3]
+vcvta.u32.f32	q4, q10
+@ CHECK: vcvta.u32.f32	q4, q10 @ encoding: [0xe4,0x80,0xbb,0xf3]
+
+vcvtm.s32.f32	d1, d30
+@ CHECK: vcvtm.s32.f32	d1, d30 @ encoding: [0x2e,0x13,0xbb,0xf3]
+vcvtm.u32.f32	d12, d10
+@ CHECK: vcvtm.u32.f32	d12, d10 @ encoding: [0x8a,0xc3,0xbb,0xf3]
+vcvtm.s32.f32	q1, q10
+@ CHECK: vcvtm.s32.f32	q1, q10 @ encoding: [0x64,0x23,0xbb,0xf3]
+vcvtm.u32.f32	q13, q1
+@ CHECK: vcvtm.u32.f32	q13, q1 @ encoding: [0xc2,0xa3,0xfb,0xf3]
+
+vcvtn.s32.f32	d15, d17
+@ CHECK: vcvtn.s32.f32	d15, d17 @ encoding: [0x21,0xf1,0xbb,0xf3]
+vcvtn.u32.f32	d5, d3
+@ CHECK: vcvtn.u32.f32	d5, d3 @ encoding: [0x83,0x51,0xbb,0xf3]
+vcvtn.s32.f32	q3, q8
+@ CHECK: vcvtn.s32.f32	q3, q8 @ encoding: [0x60,0x61,0xbb,0xf3]
+vcvtn.u32.f32	q5, q3
+@ CHECK: vcvtn.u32.f32	q5, q3 @ encoding: [0xc6,0xa1,0xbb,0xf3]
+
+vcvtp.s32.f32	d11, d21
+@ CHECK: vcvtp.s32.f32	d11, d21 @ encoding: [0x25,0xb2,0xbb,0xf3]
+vcvtp.u32.f32	d14, d23
+@ CHECK: vcvtp.u32.f32	d14, d23 @ encoding: [0xa7,0xe2,0xbb,0xf3]
+vcvtp.s32.f32	q4, q15
+@ CHECK: vcvtp.s32.f32	q4, q15 @ encoding: [0x6e,0x82,0xbb,0xf3]
+vcvtp.u32.f32	q9, q8
+@ CHECK: vcvtp.u32.f32	q9, q8 @ encoding: [0xe0,0x22,0xfb,0xf3]
+
+vrintn.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0x00,0x34,0xba,0xf3]
+vrintn.f32 q1, q4
+@ CHECK: vrintn.f32 q1, q4 @ encoding: [0x48,0x24,0xba,0xf3]
+vrintx.f32 d5, d12
+@ CHECK: vrintx.f32 d5, d12 @ encoding: [0x8c,0x54,0xba,0xf3]
+vrintx.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xc6,0x04,0xba,0xf3]
+vrinta.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0x00,0x35,0xba,0xf3]
+vrinta.f32 q8, q2
+@ CHECK: vrinta.f32 q8, q2 @ encoding: [0x44,0x05,0xfa,0xf3]
+vrintz.f32 d12, d18
+@ CHECK: vrintz.f32 d12, d18 @ encoding: [0xa2,0xc5,0xba,0xf3]
+vrintz.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xc8,0x25,0xfa,0xf3]
+vrintm.f32 d3, d0
+@ CHECK: vrintm.f32 d3, d0 @ encoding: [0x80,0x36,0xba,0xf3]
+vrintm.f32 q1, q4
+@ CHECK: vrintm.f32 q1, q4 @ encoding: [0xc8,0x26,0xba,0xf3]
+vrintp.f32 d3, d0
+@ CHECK: vrintp.f32 d3, d0 @ encoding: [0x80,0x37,0xba,0xf3]
+vrintp.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xc8,0x27,0xba,0xf3]
+
+@ test the aliases of vrint
+vrintn.f32.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0x00,0x34,0xba,0xf3]
+vrintx.f32.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xc6,0x04,0xba,0xf3]
+vrinta.f32.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0x00,0x35,0xba,0xf3]
+vrintz.f32.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xc8,0x25,0xfa,0xf3]
+vrintp.f32.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xc8,0x27,0xba,0xf3]
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index ef9f037c536f..3c7e34e83ca6 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -282,4 +282,4 @@
         vst2.16	{d16, d17}, [r0, :128]
 
 @ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf4]
-@ CHECK: vst2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x40,0xf4]
-\ No newline at end of file
+@ CHECK: vst2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x40,0xf4]
diff --git a/test/MC/ARM/neont2-vld-encoding.s b/test/MC/ARM/neont2-vld-encoding.s
index 7db855278116..d379d170bab5 100644
--- a/test/MC/ARM/neont2-vld-encoding.s
+++ b/test/MC/ARM/neont2-vld-encoding.s
@@ -1,112 +1,111 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 .code 16
 
-@ CHECK: vld1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x60,0xf9]
-	vld1.8	{d16}, [r0:64]
-@ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x60,0xf9]
+@ CHECK: vld1.8	{d16}, [r0:64]        @ encoding: [0x60,0xf9,0x1f,0x07]
+  vld1.8	{d16}, [r0:64]
+@ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x60,0xf9,0x4f,0x07]
   vld1.16	{d16}, [r0]
-@ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x60,0xf9]
+@ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x60,0xf9,0x8f,0x07]
   vld1.32	{d16}, [r0]
-@ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x60,0xf9]
+@ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0x60,0xf9,0xcf,0x07]
   vld1.64	{d16}, [r0]
-@ CHECK: vld1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x60,0xf9]
+@ CHECK: vld1.8	{d16, d17}, [r0:64]   @ encoding: [0x60,0xf9,0x1f,0x0a]
   vld1.8	{d16, d17}, [r0:64]
-@ CHECK: vld1.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x0a,0x60,0xf9]
+@ CHECK: vld1.16	{d16, d17}, [r0:128]  @ encoding: [0x60,0xf9,0x6f,0x0a]
   vld1.16	{d16, d17}, [r0:128]
-@ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x60,0xf9]
+@ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x60,0xf9,0x8f,0x0a]
   vld1.32	{d16, d17}, [r0]
-@ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x60,0xf9]
+@ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0x60,0xf9,0xcf,0x0a]
   vld1.64	{d16, d17}, [r0]
 
-@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x60,0xf9]
+@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x60,0xf9,0x1f,0x08]
   vld2.8	{d16, d17}, [r0:64]
-@ CHECK: vld2.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x08,0x60,0xf9]
+@ CHECK: vld2.16	{d16, d17}, [r0:128]  @ encoding: [0x60,0xf9,0x6f,0x08]
   vld2.16	{d16, d17}, [r0:128]
-@ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x60,0xf9]
+@ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x60,0xf9,0x8f,0x08]
   vld2.32	{d16, d17}, [r0]
-@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x60,0xf9]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x60,0xf9,0x1f,0x03]
   vld2.8	{d16, d17, d18, d19}, [r0:64]
-@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x60,0xf9]
+@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x60,0xf9,0x6f,0x03]
   vld2.16	{d16, d17, d18, d19}, [r0:128]
-@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x60,0xf9]
+@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0x60,0xf9,0xbf,0x03]
   vld2.32	{d16, d17, d18, d19}, [r0:256]
 
-@ CHECK: vld3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x04,0x60,0xf9]
+@ CHECK: vld3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x60,0xf9,0x1f,0x04]
   vld3.8	{d16, d17, d18}, [r0:64]
-@ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x60,0xf9]
+@ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x60,0xf9,0x4f,0x04]
   vld3.16	{d16, d17, d18}, [r0]
-@ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x60,0xf9]
+@ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x60,0xf9,0x8f,0x04]
   vld3.32	{d16, d17, d18}, [r0]
-@ CHECK: vld3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x60,0xf9]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x60,0xf9,0x1d,0x05]
   vld3.8	{d16, d18, d20}, [r0:64]!
-@ CHECK: vld3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x1d,0x15,0x60,0xf9]
+@ CHECK: vld3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x60,0xf9,0x1d,0x15]
   vld3.8	{d17, d19, d21}, [r0:64]!
-@ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x60,0xf9] 
+@ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x60,0xf9,0x4d,0x05] 
   vld3.16	{d16, d18, d20}, [r0]!
-@ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x60,0xf9]
+@ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x60,0xf9,0x4d,0x15]
   vld3.16	{d17, d19, d21}, [r0]!
-@ CHECK: vld3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x60,0xf9]
+@ CHECK: vld3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x60,0xf9,0x8d,0x05]
   vld3.32	{d16, d18, d20}, [r0]!
-@ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x60,0xf9]
+@ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x60,0xf9,0x8d,0x15]
   vld3.32	{d17, d19, d21}, [r0]!
 
-@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x00,0x60,0xf9]
+@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x60,0xf9,0x1f,0x00]
   vld4.8	{d16, d17, d18, d19}, [r0:64]
-@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x00,0x60,0xf9]
+@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x60,0xf9,0x6f,0x00]
   vld4.16	{d16, d17, d18, d19}, [r0:128]
-@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x00,0x60,0xf9]
+@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0x60,0xf9,0xbf,0x00]
   vld4.32	{d16, d17, d18, d19}, [r0:256]
-@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x3d,0x01,0x60,0xf9]
+@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x60,0xf9,0x3d,0x01]
   vld4.8	{d16, d18, d20, d22}, [r0:256]!
-@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x3d,0x11,0x60,0xf9]
+@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x60,0xf9,0x3d,0x11]
   vld4.8	{d17, d19, d21, d23}, [r0:256]!
-@ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf9]
+@ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x60,0xf9,0x4d,0x01]
   vld4.16	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf9]
+@ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x60,0xf9,0x4d,0x11]
   vld4.16	{d17, d19, d21, d23}, [r0]!
-@ CHECK: vld4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf9]
+@ CHECK: vld4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x60,0xf9,0x8d,0x01]
   vld4.32	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vld4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf9]
+@ CHECK: vld4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x60,0xf9,0x8d,0x11]
   vld4.32	{d17, d19, d21, d23}, [r0]!
 
-@ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf9]
+@ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0xe0,0xf9,0x6f,0x00]
   vld1.8	{d16[3]}, [r0]
-@ CHECK: vld1.16	{d16[2]}, [r0:16]     @ encoding: [0x9f,0x04,0xe0,0xf9]
+@ CHECK: vld1.16	{d16[2]}, [r0:16]     @ encoding: [0xe0,0xf9,0x9f,0x04]
   vld1.16	{d16[2]}, [r0:16]
-@ CHECK: vld1.32	{d16[1]}, [r0:32]     @ encoding: [0xbf,0x08,0xe0,0xf9]
+@ CHECK: vld1.32	{d16[1]}, [r0:32]     @ encoding: [0xe0,0xf9,0xbf,0x08]
   vld1.32	{d16[1]}, [r0:32]
 
-@ CHECK: vld2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xe0,0xf9]
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0xe0,0xf9,0x3f,0x01]
   vld2.8	{d16[1], d17[1]}, [r0:16]
-@ CHECK: vld2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xe0,0xf9]
+@ CHECK: vld2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0xe0,0xf9,0x5f,0x05]
   vld2.16	{d16[1], d17[1]}, [r0:32]
-@ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf9]
+@ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0xe0,0xf9,0x8f,0x09]
   vld2.32	{d16[1], d17[1]}, [r0]
-@ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf9]
+@ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0xe0,0xf9,0x6f,0x15]
   vld2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vld2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xe0,0xf9]
+@ CHECK: vld2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0xe0,0xf9,0x5f,0x19]
   vld2.32	{d17[0], d19[0]}, [r0:64]
 
-@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf9]
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0xe0,0xf9,0x2f,0x02]
   vld3.8	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vld3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xe0,0xf9]
+@ CHECK: vld3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0xe0,0xf9,0x4f,0x06]
   vld3.16	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vld3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xe0,0xf9]
+@ CHECK: vld3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0xe0,0xf9,0x8f,0x0a]
   vld3.32	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vld3.16	{d16[1], d18[1], d20[1]}, [r0] @ encoding: [0x6f,0x06,0xe0,0xf9]
+@ CHECK: vld3.16	{d16[1], d18[1], d20[1]}, [r0] @ encoding: [0xe0,0xf9,0x6f,0x06]
   vld3.16	{d16[1], d18[1], d20[1]}, [r0]
-@ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xcf,0x1a,0xe0,0xf9]
+@ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xe0,0xf9,0xcf,0x1a]
   vld3.32	{d17[1], d19[1], d21[1]}, [r0]
 
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0x3f,0x03,0xe0,0xf9]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0xe0,0xf9,0x3f,0x03]
   vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
-@ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf9]
+@ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0xe0,0xf9,0x4f,0x07]
   vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xaf,0x0b,0xe0,0xf9]
+@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xe0,0xf9,0xaf,0x0b]
   vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
-@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64] @ encoding: [0x7f,0x07,0xe0,0xf9]
+@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64] @ encoding: [0xe0,0xf9,0x7f,0x07]
   vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64]
-@ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf9]
+@ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0xe0,0xf9,0x4f,0x1b]
   vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/test/MC/ARM/neont2-vst-encoding.s b/test/MC/ARM/neont2-vst-encoding.s
index 9adf7514f4b7..410d8f349f11 100644
--- a/test/MC/ARM/neont2-vst-encoding.s
+++ b/test/MC/ARM/neont2-vst-encoding.s
@@ -1,105 +1,104 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 .code 16
 
-@ CHECK: vst1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x40,0xf9]
+@ CHECK: vst1.8	{d16}, [r0:64]        @ encoding: [0x40,0xf9,0x1f,0x07]
   vst1.8	{d16}, [r0:64]
-@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf9]
+@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x40,0xf9,0x4f,0x07]
   vst1.16	{d16}, [r0]
-@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf9]
+@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x40,0xf9,0x8f,0x07]
   vst1.32	{d16}, [r0]
-@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf9]
+@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0x40,0xf9,0xcf,0x07]
   vst1.64	{d16}, [r0]
-@ CHECK: vst1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x40,0xf9]
+@ CHECK: vst1.8	{d16, d17}, [r0:64]   @ encoding: [0x40,0xf9,0x1f,0x0a]
   vst1.8	{d16, d17}, [r0:64]
-@ CHECK: vst1.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x0a,0x40,0xf9]
+@ CHECK: vst1.16	{d16, d17}, [r0:128]  @ encoding: [0x40,0xf9,0x6f,0x0a]
   vst1.16	{d16, d17}, [r0:128]
-@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf9]
+@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x40,0xf9,0x8f,0x0a]
   vst1.32	{d16, d17}, [r0]
-@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf9]
+@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0x40,0xf9,0xcf,0x0a]
   vst1.64	{d16, d17}, [r0]
 
-@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf9]
+@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x40,0xf9,0x1f,0x08]
   vst2.8	{d16, d17}, [r0:64]
-@ CHECK: vst2.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x08,0x40,0xf9]
+@ CHECK: vst2.16	{d16, d17}, [r0:128]  @ encoding: [0x40,0xf9,0x6f,0x08]
   vst2.16	{d16, d17}, [r0:128]
-@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf9]
+@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x40,0xf9,0x8f,0x08]
   vst2.32	{d16, d17}, [r0]
-@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x40,0xf9]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x40,0xf9,0x1f,0x03]
   vst2.8	{d16, d17, d18, d19}, [r0:64]
-@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x40,0xf9]
+@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x40,0xf9,0x6f,0x03]
   vst2.16	{d16, d17, d18, d19}, [r0:128]
-@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x40,0xf9]
+@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0x40,0xf9,0xbf,0x03]
   vst2.32	{d16, d17, d18, d19}, [r0:256]
 
-@ CHECK: vst3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x04,0x40,0xf9]
+@ CHECK: vst3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x40,0xf9,0x1f,0x04]
   vst3.8	{d16, d17, d18}, [r0:64]
-@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf9]
+@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x40,0xf9,0x4f,0x04]
   vst3.16	{d16, d17, d18}, [r0]
-@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf9]
+@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x40,0xf9,0x8f,0x04]
   vst3.32	{d16, d17, d18}, [r0]
-@ CHECK: vst3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x40,0xf9]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x40,0xf9,0x1d,0x05]
   vst3.8	{d16, d18, d20}, [r0:64]!
-@ CHECK: vst3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x1d,0x15,0x40,0xf9]
+@ CHECK: vst3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x40,0xf9,0x1d,0x15]
   vst3.8	{d17, d19, d21}, [r0:64]!
-@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf9]
+@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x40,0xf9,0x4d,0x05]
   vst3.16	{d16, d18, d20}, [r0]!
-@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf9]
+@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x40,0xf9,0x4d,0x15]
   vst3.16	{d17, d19, d21}, [r0]!
-@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x40,0xf9]
+@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x40,0xf9,0x8d,0x05]
   vst3.32	{d16, d18, d20}, [r0]!
-@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf9]
+@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x40,0xf9,0x8d,0x15]
   vst3.32	{d17, d19, d21}, [r0]!
 
-@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x00,0x40,0xf9]
+@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x40,0xf9,0x1f,0x00]
   vst4.8	{d16, d17, d18, d19}, [r0:64]
-@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x00,0x40,0xf9]
+@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x40,0xf9,0x6f,0x00]
   vst4.16	{d16, d17, d18, d19}, [r0:128]
-@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x3d,0x01,0x40,0xf9]
+@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x40,0xf9,0x3d,0x01]
   vst4.8	{d16, d18, d20, d22}, [r0:256]!
-@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x3d,0x11,0x40,0xf9]
+@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x40,0xf9,0x3d,0x11]
   vst4.8	{d17, d19, d21, d23}, [r0:256]!
-@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf9]
+@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x40,0xf9,0x4d,0x01]
   vst4.16	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf9]
+@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x40,0xf9,0x4d,0x11]
   vst4.16	{d17, d19, d21, d23}, [r0]!
-@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf9]
+@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x40,0xf9,0x8d,0x01]
   vst4.32	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf9]
+@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x40,0xf9,0x8d,0x11]
   vst4.32	{d17, d19, d21, d23}, [r0]!
 
-@ CHECK: vst2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xc0,0xf9]
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0xc0,0xf9,0x3f,0x01]
   vst2.8	{d16[1], d17[1]}, [r0:16]
-@ CHECK: vst2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xc0,0xf9]
+@ CHECK: vst2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0xc0,0xf9,0x5f,0x05]
   vst2.16	{d16[1], d17[1]}, [r0:32]
-@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf9]
+@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0xc0,0xf9,0x8f,0x09]
   vst2.32	{d16[1], d17[1]}, [r0]
-@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf9]
+@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0xc0,0xf9,0x6f,0x15]
   vst2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vst2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xc0,0xf9]
+@ CHECK: vst2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0xc0,0xf9,0x5f,0x19]
   vst2.32	{d17[0], d19[0]}, [r0:64]
 
-@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf9]
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0xc0,0xf9,0x2f,0x02]
   vst3.8	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf9]
+@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0xc0,0xf9,0x4f,0x06]
   vst3.16	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf9]
+@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0xc0,0xf9,0x8f,0x0a]
   vst3.32	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf9]
+@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xc0,0xf9,0xaf,0x16]
   vst3.16	{d17[2], d19[2], d21[2]}, [r0]
-@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf9]
+@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0xc0,0xf9,0x4f,0x0a]
   vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0x3f,0x03,0xc0,0xf9]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0xc0,0xf9,0x3f,0x03]
   vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
-@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf9]
+@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0xc0,0xf9,0x4f,0x07]
   vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xaf,0x0b,0xc0,0xf9]
+@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xc0,0xf9,0xaf,0x0b]
   vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
-@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64] @ encoding: [0xff,0x17,0xc0,0xf9]
+@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64] @ encoding: [0xc0,0xf9,0xff,0x17]
   vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64]
-@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
+@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0xc0,0xf9,0x4f,0x1b]
   vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 @ rdar://11082188
diff --git a/test/MC/ARM/obsolete-v8.s b/test/MC/ARM/obsolete-v8.s
new file mode 100644
index 000000000000..0d6176b3dd13
--- /dev/null
+++ b/test/MC/ARM/obsolete-v8.s
@@ -0,0 +1,7 @@
+@ RUN: not llvm-mc -triple=armv8 < %s 2>&1 | FileCheck %s
+
+swp r0, r1, [r2]
+@ CHECK: instruction requires: armv7 or earlier
+
+swpb r0, r1, [r2]
+@ CHECK: instruction requires: armv7 or earlier
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
index 2a226205a086..d840e9cd798a 100644
--- a/test/MC/ARM/simple-fp-encoding.s
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -157,6 +157,10 @@
         vmrs  r0, fpexc
 @ CHECK: vmrs  r0, fpsid             @ encoding: [0x10,0x0a,0xf0,0xee]
         vmrs  r0, fpsid
+@ CHECK: vmrs	r1, fpinst           @ encoding: [0x10,0x1a,0xf9,0xee]
+        vmrs r1, fpinst
+@ CHECK: vmrs	r8, fpinst2          @ encoding: [0x10,0x8a,0xfa,0xee]
+        vmrs r8, fpinst2
 
 @ CHECK: vmsr fpscr, r0              @ encoding: [0x10,0x0a,0xe1,0xee]
         vmsr    fpscr, r0
@@ -164,6 +168,10 @@
         vmsr  fpexc, r0
 @ CHECK: vmsr  fpsid, r0             @ encoding: [0x10,0x0a,0xe0,0xee]
         vmsr  fpsid, r0
+@ CHECK: vmsr	fpinst, r3           @ encoding: [0x10,0x3a,0xe9,0xee]
+        vmsr fpinst, r3	
+@ CHECK: vmsr	fpinst2, r4          @ encoding: [0x10,0x4a,0xea,0xee]
+        vmsr fpinst2, r4
 
         vmov.f64        d16, #3.000000e+00
         vmov.f32        s0, #3.000000e+00
@@ -289,6 +297,20 @@
         vstmia  r1, {s2,s3-s6,s7}
         vstmdb sp!, {q4-q7}
 
+        fldmiax r5!, {d0-d2}
+        fldmiaxeq r0, {d4,d5}
+        fldmdbxne r5!, {d4,d5,d6}
+@ CHECK: fldmiax r5!, {d0, d1, d2}      @ encoding: [0x07,0x0b,0xb5,0xec]
+@ CHECK: fldmiaxeq r0, {d4, d5}         @ encoding: [0x05,0x4b,0x90,0x0c]
+@ CHECK: fldmdbxne r5!, {d4, d5, d6}    @ encoding: [0x07,0x4b,0x35,0x1d]
+
+        fstmiax r5!, {d0-d7}
+        fstmiaxeq r4, {d8,d9}
+        fstmdbxne r7!, {d2-d4}
+@ CHECK: fstmiax r5!, {d0, d1, d2, d3, d4, d5, d6, d7} @ encoding: [0x11,0x0b,0xa5,0xec]
+@ CHECK: fstmiaxeq r4, {d8, d9}         @ encoding: [0x05,0x8b,0x84,0x0c]
+@ CHECK: fstmdbxne r7!, {d2, d3, d4}    @ encoding: [0x07,0x2b,0x27,0x1d]
+
 @ CHECK: vcvtr.s32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbd,0xee]
 @ CHECK: vcvtr.s32.f32  s0, s1 @ encoding: [0x60,0x0a,0xbd,0xee]
 @ CHECK: vcvtr.u32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbc,0xee]
diff --git a/test/MC/ARM/single-precision-fp.s b/test/MC/ARM/single-precision-fp.s
new file mode 100644
index 000000000000..2ed0cfe233c7
--- /dev/null
+++ b/test/MC/ARM/single-precision-fp.s
@@ -0,0 +1,194 @@
+@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=+fp-only-sp,-neon 2> %t > %t2
+@ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
+@ RUN:     FileCheck %s < %t2
+
+        vadd.f64 d0, d1, d2
+        vsub.f64 d2, d3, d4
+        vdiv.f64 d4, d5, d6
+        vmul.f64 d6, d7, d8
+        vnmul.f64 d8, d9, d10
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vadd.f64 d0, d1, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vsub.f64 d2, d3, d4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vdiv.f64 d4, d5, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmul.f64 d6, d7, d8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vnmul.f64 d8, d9, d10
+
+        vmla.f64 d11, d10, d9
+        vmls.f64 d8, d7, d6
+        vnmla.f64 d5, d4, d3
+        vnmls.f64 d2, d1, d0
+        vfma.f64 d1, d2, d3
+        vfms.f64 d4, d5, d6
+        vfnma.f64 d7, d8, d9
+        vfnms.f64 d10, d11, d12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmla.f64 d11, d10, d9
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmls.f64 d8, d7, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vnmla.f64 d5, d4, d3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vnmls.f64 d2, d1, d0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfma.f64 d1, d2, d3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfms.f64 d4, d5, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfnma.f64 d7, d8, d9
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vfnms.f64 d10, d11, d12
+
+        vneg.f64 d15, d14
+        vsqrt.f64 d13, d12
+        vsqrt d13, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vneg.f64 d15, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vsqrt.f64 d13, d12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vsqrt d13, d14
+
+        vcmpe.f64 d0, d1
+        vcmp.f64 d2, d3
+        vabs.f64 d4, d5
+        vcmpe.f64 d5, #0
+        vcmp.f64 d6, #0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmpe.f64 d0, d1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmp.f64 d2, d3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vabs.f64 d4, d5
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmpe.f64 d5, #0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcmp.f64 d6, #0
+
+        @ FIXME: overlapping aliases and a probable TableGen indeterminacy mean
+        @ that the actual reason can vary by platform.
+        vmov.f64 d11, d10
+@ CHECK-ERRORS: error: instruction requires:
+@ CHECK-ERRORS-NEXT: vmov.f64 d11, d10
+
+        vcvt.f64.s32 d9, s8
+        vcvt.f64.u32 d7, s6
+        vcvt.s32.f64 s5, d4
+        vcvt.u32.f64 s3, d2
+        vcvtr.s32.f64 s1, d0
+        vcvtr.u32.f64 s1, d2
+        vcvt.s16.f64 d3, d4, #1
+        vcvt.u16.f64 d5, d6, #2
+        vcvt.s32.f64 d7, d8, #3
+        vcvt.u32.f64 d9, d10, #4
+        vcvt.f64.s16 d11, d12, #3
+        vcvt.f64.u16 d13, d14, #2
+        vcvt.f64.s32 d15, d14, #1
+        vcvt.f64.u32 d13, d12, #1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.s32 d9, s8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.u32 d7, s6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.s32.f64 s5, d4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.u32.f64 s3, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtr.s32.f64 s1, d0
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtr.u32.f64 s1, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.s16.f64 d3, d4, #1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.u16.f64 d5, d6, #2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.s32.f64 d7, d8, #3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.u32.f64 d9, d10, #4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.s16 d11, d12, #3
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.u16 d13, d14, #2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.s32 d15, d14, #1
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvt.f64.u32 d13, d12, #1
+
+        @ v8 operations, also double precision so make sure they're rejected.
+        vselgt.f64 d0, d1, d2
+        vselge.f64 d3, d4, d5
+        vseleq.f64 d6, d7, d8
+        vselvs.f64 d9, d10, d11
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vselgt.f64 d0, d1, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vselge.f64 d3, d4, d5
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vseleq.f64 d6, d7, d8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vselvs.f64 d9, d10, d11
+
+        vmaxnm.f64 d12, d13, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vmaxnm.f64 d12, d13, d14
+
+        vcvtb.f64.f16 d7, s8
+        vcvtb.f16.f64 s9, d10
+        vcvtt.f64.f16 d11, s12
+        vcvtt.f16.f64 s13, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtb.f64.f16 d7, s8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtb.f16.f64 s9, d10
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtt.f64.f16 d11, s12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vcvtt.f16.f64 s13, d14
+
+        vrintz.f64 d15, d14
+        vrintr.f64.f64 d13, d12
+        vrintx.f64 d11, d10
+        vrinta.f64.f64 d9, d8
+        vrintn.f64 d7, d6
+        vrintp.f64.f64 d5, d4
+        vrintm.f64 d3, d2
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintz.f64 d15, d14
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintr.f64.f64 d13, d12
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintx.f64 d11, d10
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrinta.f64.f64 d9, d8
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintn.f64 d7, d6
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintp.f64.f64 d5, d4
+@ CHECK-ERRORS: error: instruction requires: double precision VFP
+@ CHECK-ERRORS-NEXT: vrintm.f64 d3, d2
+
+        @ Double precisionish operations that actually *are* allowed.
+        vldr d0, [sp]
+        vstr d3, [sp]
+        vldm r0, {d0, d1}
+        vstm r4, {d3, d4}
+        vpush {d6, d7}
+        vpop {d8, d9}
+        vmov r1, r0, d1
+        vmov d2, r3, r4
+        vmov.f64 r5, r6, d7
+        vmov.f64 d8, r9, r10
+@ CHECK: vldr d0, [sp]
+@ CHECK: vstr d3, [sp]
+@ CHECK: vldmia r0, {d0, d1}
+@ CHECK: vstmia r4, {d3, d4}
+@ CHECK: vpush {d6, d7}
+@ CHECK: vpop {d8, d9}
+@ CHECK: vmov r1, r0, d1
+@ CHECK: vmov d2, r3, r4
+@ CHECK: vmov r5, r6, d7
+@ CHECK: vmov d8, r9, r10
diff --git a/test/MC/ARM/thumb-diagnostics.s b/test/MC/ARM/thumb-diagnostics.s
index 6f822d1c8e8a..19d17c2deef6 100644
--- a/test/MC/ARM/thumb-diagnostics.s
+++ b/test/MC/ARM/thumb-diagnostics.s
@@ -2,6 +2,8 @@
 @ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 @ RUN: not llvm-mc -triple=thumbv5-apple-darwin < %s 2> %t
 @ RUN: FileCheck --check-prefix=CHECK-ERRORS-V5 < %t %s
+@ RUN: not llvm-mc -triple=thumbv8 < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V8 < %t %s
 
 @ Check for various assembly diagnostic messages on invalid input.
 
@@ -38,10 +40,25 @@ error: invalid operand for instruction
         bkpt #-1
              ^
 
+@ Out of range immediates for v8 HLT instruction.
+        hlt #64
+        hlt #-1
+@CHECK-ERRORS: error: instruction requires: armv8 arm-mode
+@CHECK-ERRORS:        hlt #64
+@CHECK-ERRORS:        ^
+@CHECK-ERRORS-V8: error: instruction requires: arm-mode
+@CHECK-ERRORS-V8:         hlt #64
+@CHECK-ERRORS-V8:              ^
+@CHECK-ERRORS: error: invalid operand for instruction
+@CHECK-ERRORS:         hlt #-1
+@CHECK-ERRORS:              ^
+
 @ Invalid writeback and register lists for LDM
         ldm r2!, {r5, r8}
         ldm r2, {r5, r7}
         ldm r2!, {r2, r3, r4}
+        ldm r2!, {r2, r3, r4, r10}
+        ldmdb r2!, {r2, r3, r4}
 @ CHECK-ERRORS: error: registers must be in range r0-r7
 @ CHECK-ERRORS:         ldm r2!, {r5, r8}
 @ CHECK-ERRORS:                  ^
@@ -51,7 +68,12 @@ error: invalid operand for instruction
 @ CHECK-ERRORS: error: writeback operator '!' not allowed when base register in register list
 @ CHECK-ERRORS:         ldm r2!, {r2, r3, r4}
 @ CHECK-ERRORS:               ^
-
+@ CHECK-ERRORS-V8: error: writeback operator '!' not allowed when base register in register list
+@ CHECK-ERRORS-V8:         ldm r2!, {r2, r3, r4, r10}
+@ CHECK-ERRORS-V8:               ^
+@ CHECK-ERRORS-V8: error: writeback register not allowed in register list
+@ CHECK-ERRORS-V8:         ldmdb r2!, {r2, r3, r4}
+@ CHECK-ERRORS-V8:                 ^
 
 @ Invalid writeback and register lists for PUSH/POP
         pop {r1, r2, r10}
@@ -67,12 +89,20 @@ error: invalid operand for instruction
 @ Invalid writeback and register lists for STM
         stm r1, {r2, r6}
         stm r1!, {r2, r9}
+        stm r2!, {r2, r9}
+        stmdb r2!, {r0, r2}
 @ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         stm r1, {r2, r6}
 @ CHECK-ERRORS:         ^
 @ CHECK-ERRORS: error: registers must be in range r0-r7
 @ CHECK-ERRORS:         stm r1!, {r2, r9}
 @ CHECK-ERRORS:                  ^
+@ CHECK-ERRORS-V8: error: writeback operator '!' not allowed when base register in register list
+@ CHECK-ERRORS-V8:         stm r2!, {r2, r9}
+@ CHECK-ERRORS-V8:                  ^
+@ CHECK-ERRORS-V8: error: writeback register not allowed in register list
+@ CHECK-ERRORS-V8:         stmdb r2!, {r0, r2}
+@ CHECK-ERRORS-V8:                  ^
 
 @ Out of range immediates for LSL instruction.
         lsls r4, r5, #-1
@@ -138,7 +168,26 @@ error: invalid operand for instruction
 @ CHECK-ERRORS: error: source register must be the same as destination
 @ CHECK-ERRORS:         add r2, sp, ip
 @ CHECK-ERRORS:                     ^
- 
+
+
+@------------------------------------------------------------------------------
+@ B/Bcc - out of range immediates for Thumb1 branches
+@------------------------------------------------------------------------------
+
+        beq    #-258
+        bne    #256
+        bgt    #13
+        b      #-1048578
+        b      #1048576
+        b      #10323
+
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+
 @------------------------------------------------------------------------------
 @ WFE/WFI/YIELD - are not supported pre v6T2
 @------------------------------------------------------------------------------
@@ -146,13 +195,26 @@ error: invalid operand for instruction
         wfi
         yield
 
-@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: error: instruction requires: armv6m or armv6t2
 @ CHECK-ERRORS: wfe
 @ CHECK-ERRORS: ^
-@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: error: instruction requires: armv6m or armv6t2
 @ CHECK-ERRORS: wfi
 @ CHECK-ERRORS: ^
-@ CHECK-ERRORS: error: instruction requires: thumb2
+@ CHECK-ERRORS: error: instruction requires: armv6m or armv6t2
 @ CHECK-ERRORS: yield
 @ CHECK-ERRORS: ^
 
+@------------------------------------------------------------------------------
+@ PLDW required mp-extensions
+@------------------------------------------------------------------------------
+        pldw [r0, #4]
+@ CHECK-ERRORS: error: instruction requires: mp-extensions
+
+@------------------------------------------------------------------------------
+@ LDR(lit) - invalid offsets
+@------------------------------------------------------------------------------
+
+        ldr r4, [pc, #-12]
+@ CHECK-ERRORS: error: instruction requires: thumb2
+
diff --git a/test/MC/ARM/thumb-fp-armv8.s b/test/MC/ARM/thumb-fp-armv8.s
new file mode 100644
index 000000000000..a730fa2d8f43
--- /dev/null
+++ b/test/MC/ARM/thumb-fp-armv8.s
@@ -0,0 +1,130 @@
+@ RUN: llvm-mc -triple thumbv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
+
+@ VCVT{B,T}
+
+  vcvtt.f64.f16 d3, s1
+@ CHECK: vcvtt.f64.f16 d3, s1      @ encoding: [0xb2,0xee,0xe0,0x3b]
+  vcvtt.f16.f64 s5, d12
+@ CHECK: vcvtt.f16.f64 s5, d12     @ encoding: [0xf3,0xee,0xcc,0x2b]
+
+  vcvtb.f64.f16 d3, s1
+@ CHECK: vcvtb.f64.f16 d3, s1     @ encoding: [0xb2,0xee,0x60,0x3b]
+  vcvtb.f16.f64 s4, d1
+@ CHECK: vcvtb.f16.f64 s4, d1     @ encoding: [0xb3,0xee,0x41,0x2b]
+
+  it ge
+  vcvttge.f64.f16 d3, s1
+@ CHECK: vcvttge.f64.f16 d3, s1      @ encoding: [0xb2,0xee,0xe0,0x3b]
+  it gt
+  vcvttgt.f16.f64 s5, d12
+@ CHECK: vcvttgt.f16.f64 s5, d12     @ encoding: [0xf3,0xee,0xcc,0x2b]
+  it eq
+  vcvtbeq.f64.f16 d3, s1
+@ CHECK: vcvtbeq.f64.f16 d3, s1     @ encoding: [0xb2,0xee,0x60,0x3b]
+  it lt
+  vcvtblt.f16.f64 s4, d1
+@ CHECK: vcvtblt.f16.f64 s4, d1     @ encoding: [0xb3,0xee,0x41,0x2b]
+
+
+@ VCVT{A,N,P,M}
+
+  vcvta.s32.f32 s2, s3
+@ CHECK: vcvta.s32.f32 s2, s3     @ encoding: [0xbc,0xfe,0xe1,0x1a]
+  vcvta.s32.f64 s2, d3
+@ CHECK: vcvta.s32.f64 s2, d3     @ encoding: [0xbc,0xfe,0xc3,0x1b]
+  vcvtn.s32.f32 s6, s23
+@ CHECK: vcvtn.s32.f32 s6, s23     @ encoding: [0xbd,0xfe,0xeb,0x3a]
+  vcvtn.s32.f64 s6, d23
+@ CHECK: vcvtn.s32.f64 s6, d23     @ encoding: [0xbd,0xfe,0xe7,0x3b]
+  vcvtp.s32.f32 s0, s4
+@ CHECK: vcvtp.s32.f32 s0, s4     @ encoding: [0xbe,0xfe,0xc2,0x0a]
+  vcvtp.s32.f64 s0, d4
+@ CHECK: vcvtp.s32.f64 s0, d4     @ encoding: [0xbe,0xfe,0xc4,0x0b]
+  vcvtm.s32.f32 s17, s8
+@ CHECK: vcvtm.s32.f32 s17, s8     @ encoding: [0xff,0xfe,0xc4,0x8a]
+  vcvtm.s32.f64 s17, d8
+@ CHECK: vcvtm.s32.f64 s17, d8     @ encoding: [0xff,0xfe,0xc8,0x8b]
+
+  vcvta.u32.f32 s2, s3
+@ CHECK: vcvta.u32.f32 s2, s3     @ encoding: [0xbc,0xfe,0x61,0x1a]
+  vcvta.u32.f64 s2, d3
+@ CHECK: vcvta.u32.f64 s2, d3     @ encoding: [0xbc,0xfe,0x43,0x1b]
+  vcvtn.u32.f32 s6, s23
+@ CHECK: vcvtn.u32.f32 s6, s23     @ encoding: [0xbd,0xfe,0x6b,0x3a]
+  vcvtn.u32.f64 s6, d23
+@ CHECK: vcvtn.u32.f64 s6, d23     @ encoding: [0xbd,0xfe,0x67,0x3b]
+  vcvtp.u32.f32 s0, s4
+@ CHECK: vcvtp.u32.f32 s0, s4     @ encoding: [0xbe,0xfe,0x42,0x0a]
+  vcvtp.u32.f64 s0, d4
+@ CHECK: vcvtp.u32.f64 s0, d4     @ encoding: [0xbe,0xfe,0x44,0x0b]
+  vcvtm.u32.f32 s17, s8
+@ CHECK: vcvtm.u32.f32 s17, s8     @ encoding: [0xff,0xfe,0x44,0x8a]
+  vcvtm.u32.f64 s17, d8
+@ CHECK: vcvtm.u32.f64 s17, d8     @ encoding: [0xff,0xfe,0x48,0x8b]
+
+
+@ VSEL
+  vselge.f32 s4, s1, s23
+@ CHECK: vselge.f32 s4, s1, s23    @ encoding: [0x20,0xfe,0xab,0x2a]
+  vselge.f64 d30, d31, d23
+@ CHECK: vselge.f64 d30, d31, d23  @ encoding: [0x6f,0xfe,0xa7,0xeb]
+  vselgt.f32 s0, s1, s0
+@ CHECK: vselgt.f32 s0, s1, s0    @ encoding: [0x30,0xfe,0x80,0x0a]
+  vselgt.f64 d5, d10, d20
+@ CHECK: vselgt.f64 d5, d10, d20  @ encoding: [0x3a,0xfe,0x24,0x5b]
+  vseleq.f32 s30, s28, s23
+@ CHECK: vseleq.f32 s30, s28, s23 @ encoding: [0x0e,0xfe,0x2b,0xfa]
+  vseleq.f64 d2, d4, d8
+@ CHECK: vseleq.f64 d2, d4, d8    @ encoding: [0x04,0xfe,0x08,0x2b]
+  vselvs.f32 s21, s16, s14
+@ CHECK: vselvs.f32 s21, s16, s14 @ encoding: [0x58,0xfe,0x07,0xaa]
+  vselvs.f64 d0, d1, d31
+@ CHECK: vselvs.f64 d0, d1, d31   @ encoding: [0x11,0xfe,0x2f,0x0b]
+
+
+@ VMAXNM / VMINNM
+  vmaxnm.f32 s5, s12, s0
+@ CHECK: vmaxnm.f32 s5, s12, s0    @ encoding: [0xc6,0xfe,0x00,0x2a]
+  vmaxnm.f64 d5, d22, d30
+@ CHECK: vmaxnm.f64 d5, d22, d30   @ encoding: [0x86,0xfe,0xae,0x5b]
+  vminnm.f32 s0, s0, s12
+@ CHECK: vminnm.f32 s0, s0, s12    @ encoding: [0x80,0xfe,0x46,0x0a]
+  vminnm.f64 d4, d6, d9
+@ CHECK: vminnm.f64 d4, d6, d9     @ encoding: [0x86,0xfe,0x49,0x4b]
+
+@ VRINT{Z,R,X}
+  it ge
+  vrintzge.f64 d3, d12
+@ CHECK: vrintzge.f64 d3, d12   @ encoding: [0xb6,0xee,0xcc,0x3b]
+  vrintz.f32 s3, s24
+@ CHECK: vrintz.f32 s3, s24     @ encoding: [0xf6,0xee,0xcc,0x1a]
+  it lt
+  vrintrlt.f64 d5, d0
+@ CHECK: vrintrlt.f64 d5, d0    @ encoding: [0xb6,0xee,0x40,0x5b]
+  vrintr.f32 s0, s9
+@ CHECK: vrintr.f32 s0, s9      @ encoding: [0xb6,0xee,0x64,0x0a]
+  it eq
+  vrintxeq.f64 d28, d30
+@ CHECK: vrintxeq.f64 d28, d30  @ encoding: [0xf7,0xee,0x6e,0xcb]
+  it vs
+  vrintxvs.f32 s10, s14
+@ CHECK: vrintxvs.f32 s10, s14  @ encoding: [0xb7,0xee,0x47,0x5a]
+
+@ VRINT{A,N,P,M}
+
+  vrinta.f64 d3, d4
+@ CHECK: vrinta.f64 d3, d4     @ encoding: [0xb8,0xfe,0x44,0x3b]
+  vrinta.f32 s12, s1
+@ CHECK: vrinta.f32 s12, s1    @ encoding: [0xb8,0xfe,0x60,0x6a]
+  vrintn.f64 d3, d4
+@ CHECK: vrintn.f64 d3, d4     @ encoding: [0xb9,0xfe,0x44,0x3b]
+  vrintn.f32 s12, s1
+@ CHECK: vrintn.f32 s12, s1    @ encoding: [0xb9,0xfe,0x60,0x6a]
+  vrintp.f64 d3, d4
+@ CHECK: vrintp.f64 d3, d4     @ encoding: [0xba,0xfe,0x44,0x3b]
+  vrintp.f32 s12, s1
+@ CHECK: vrintp.f32 s12, s1    @ encoding: [0xba,0xfe,0x60,0x6a]
+  vrintm.f64 d3, d4
+@ CHECK: vrintm.f64 d3, d4     @ encoding: [0xbb,0xfe,0x44,0x3b]
+  vrintm.f32 s12, s1
+@ CHECK: vrintm.f32 s12, s1    @ encoding: [0xbb,0xfe,0x60,0x6a]
diff --git a/test/MC/ARM/thumb-hints.s b/test/MC/ARM/thumb-hints.s
new file mode 100644
index 000000000000..b3c4cee40bbd
--- /dev/null
+++ b/test/MC/ARM/thumb-hints.s
@@ -0,0 +1,64 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv6-apple-darwin -mcpu=cortex-m0 -show-encoding < %s | FileCheck %s 
+@ RUN: not llvm-mc -triple=thumbv6-apple-darwin -show-encoding < %s > %t 2> %t2
+@ RUN: FileCheck %s --check-prefix=CHECK-EVIL-PRE-UAL < %t
+@ RUN: FileCheck %s --check-prefix CHECK-ERROR < %t2
+
+  .syntax unified
+
+        nop
+        yield
+        wfe
+        wfi
+        sev
+@ CHECK: nop                            @ encoding: [0x00,0xbf]
+@ CHECK: yield                          @ encoding: [0x10,0xbf]
+@ CHECK: wfe                            @ encoding: [0x20,0xbf]
+@ CHECK: wfi                            @ encoding: [0x30,0xbf]
+@ CHECK: sev                            @ encoding: [0x40,0xbf]
+
+@ CHECK-EVIL-PRE-UAL: mov r8, r8                     @ encoding: [0xc0,0x46]
+
+        dmb sy
+        dmb
+        dsb sy
+        dsb
+        isb sy
+        isb
+@ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
+@ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
+@ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
+@ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
+@ CHECK: isb	sy                      @ encoding: [0xbf,0xf3,0x6f,0x8f]
+@ CHECK: isb	sy                      @ encoding: [0xbf,0xf3,0x6f,0x8f]
+
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: yield
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: wfe
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: wfi
+
+@ CHECK-ERROR: error: instruction requires: armv6m or armv6t2
+@ CHECK-ERROR-NEXT: sev
+
+@ CHECK-ERROR: error:
+@ CHECK-ERROR-NEXT: dmb sy
+
+@ CHECK-ERROR: error: instruction requires: data-barriers
+@ CHECK-ERROR-NEXT: dmb
+
+@ CHECK-ERROR: error:
+@ CHECK-ERROR-NEXT: dsb sy
+
+@ CHECK-ERROR: error: instruction requires: data-barriers
+@ CHECK-ERROR-NEXT: dsb
+
+@ CHECK-ERROR: error:
+@ CHECK-ERROR-NEXT: isb sy
+
+@ CHECK-ERROR: error: instruction requires: data-barriers
+@ CHECK-ERROR-NEXT: isb
diff --git a/test/MC/ARM/thumb-invalid-crypto.txt b/test/MC/ARM/thumb-invalid-crypto.txt
new file mode 100644
index 000000000000..a5f9a19690cd
--- /dev/null
+++ b/test/MC/ARM/thumb-invalid-crypto.txt
@@ -0,0 +1,42 @@
+@ RUN: not llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s 2>&1 | FileCheck %s
+
+iteee lo
+aesdlo.8  q0, q1
+@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified
+aesimchs.8  q0, q1
+@ CHECK: error: instruction 'aesimc' is not predicable, but condition code specified
+aesmchs.8  q0, q1
+@ CHECK: error: instruction 'aesmc' is not predicable, but condition code specified
+aesehs.8 q0, q1
+@ CHECK: error: instruction 'aese' is not predicable, but condition code specified
+
+itee hs
+sha1hhs.32  q0, q1
+@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified
+sha1su1lo.32  q0, q1
+@ CHECK: error: instruction 'sha1su1' is not predicable, but condition code specified
+sha256su0lo.32  q0, q1
+@ CHECK: error: instruction 'sha256su0' is not predicable, but condition code specified
+
+iteee lo
+sha1clo.32  s0, d1, q2
+@ CHECK: error: instruction 'sha1c' is not predicable, but condition code specified
+sha1mhs.32  q0, s1, q2
+@ CHECK: error: instruction 'sha1m' is not predicable, but condition code specified
+sha1phs.32  s0, q1, q2
+@ CHECK: error: instruction 'sha1p' is not predicable, but condition code specified
+sha1su0hs.32  d0, q1, q2
+@ CHECK: error: instruction 'sha1su0' is not predicable, but condition code specified
+itee hs
+sha256hhs.32  q0, s1, q2
+@ CHECK: error: instruction 'sha256h' is not predicable, but condition code specified
+sha256h2lo.32  q0, q1, s2
+@ CHECK: error: instruction 'sha256h2' is not predicable, but condition code specified
+sha256su1lo.32  s0, d1, q2
+@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified
+
+ite lo
+vmulllo.p64 q0, s1, s3
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
+vmullhs.p64 q0, d16, d17
+@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified
diff --git a/test/MC/ARM/thumb-neon-crypto.s b/test/MC/ARM/thumb-neon-crypto.s
new file mode 100644
index 000000000000..096e9e81b10a
--- /dev/null
+++ b/test/MC/ARM/thumb-neon-crypto.s
@@ -0,0 +1,35 @@
+@ RUN: llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s
+
+aesd.8  q0, q1
+@ CHECK: aesd.8  q0, q1         @ encoding: [0xb0,0xff,0x42,0x03]
+aese.8  q0, q1
+@ CHECK: aese.8 q0, q1          @ encoding: [0xb0,0xff,0x02,0x03]
+aesimc.8  q0, q1
+@ CHECK: aesimc.8 q0, q1        @ encoding: [0xb0,0xff,0xc2,0x03]
+aesmc.8  q0, q1
+@ CHECK: aesmc.8 q0, q1         @ encoding: [0xb0,0xff,0x82,0x03]
+
+sha1h.32  q0, q1
+@ CHECK: sha1h.32  q0, q1       @ encoding: [0xb9,0xff,0xc2,0x02]
+sha1su1.32  q0, q1
+@ CHECK: sha1su1.32 q0, q1      @ encoding: [0xba,0xff,0x82,0x03]
+sha256su0.32  q0, q1
+@ CHECK: sha256su0.32 q0, q1    @ encoding: [0xba,0xff,0xc2,0x03]
+
+sha1c.32  q0, q1, q2
+@ CHECK: sha1c.32  q0, q1, q2   @ encoding: [0x02,0xef,0x44,0x0c]
+sha1m.32  q0, q1, q2
+@ CHECK: sha1m.32  q0, q1, q2   @ encoding: [0x22,0xef,0x44,0x0c]
+sha1p.32  q0, q1, q2
+@ CHECK: sha1p.32 q0, q1, q2    @ encoding: [0x12,0xef,0x44,0x0c]
+sha1su0.32  q0, q1, q2
+@ CHECK: sha1su0.32  q0, q1, q2      @ encoding: [0x32,0xef,0x44,0x0c]
+sha256h.32  q0, q1, q2
+@ CHECK: sha256h.32  q0, q1, q2      @ encoding: [0x02,0xff,0x44,0x0c]
+sha256h2.32  q0, q1, q2
+@ CHECK: sha256h2.32 q0, q1, q2      @ encoding: [0x12,0xff,0x44,0x0c]
+sha256su1.32  q0, q1, q2
+@ CHECK: sha256su1.32 q0, q1, q2     @ encoding: [0x22,0xff,0x44,0x0c]
+
+vmull.p64 q8, d16, d17
+@ CHECK: vmull.p64  q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0e]
diff --git a/test/MC/ARM/thumb-neon-v8.s b/test/MC/ARM/thumb-neon-v8.s
new file mode 100644
index 000000000000..5b327810f85a
--- /dev/null
+++ b/test/MC/ARM/thumb-neon-v8.s
@@ -0,0 +1,83 @@
+@ RUN: llvm-mc -triple thumbv8 -mattr=+neon -show-encoding < %s | FileCheck %s
+
+vmaxnm.f32 d4, d5, d1
+@ CHECK: vmaxnm.f32 d4, d5, d1 @ encoding: [0x05,0xff,0x11,0x4f]
+vmaxnm.f32 q2, q4, q6
+@ CHECK: vmaxnm.f32 q2, q4, q6 @ encoding: [0x08,0xff,0x5c,0x4f]
+vminnm.f32 d5, d4, d30
+@ CHECK: vminnm.f32 d5, d4, d30 @ encoding: [0x24,0xff,0x3e,0x5f]
+vminnm.f32 q0, q13, q2
+@ CHECK: vminnm.f32 q0, q13, q2 @ encoding: [0x2a,0xff,0xd4,0x0f]
+
+vcvta.s32.f32	d4, d6
+@ CHECK: vcvta.s32.f32	d4, d6 @ encoding: [0xbb,0xff,0x06,0x40]
+vcvta.u32.f32	d12, d10
+@ CHECK: vcvta.u32.f32	d12, d10 @ encoding: [0xbb,0xff,0x8a,0xc0]
+vcvta.s32.f32	q4, q6
+@ CHECK: vcvta.s32.f32	q4, q6 @ encoding: [0xbb,0xff,0x4c,0x80]
+vcvta.u32.f32	q4, q10
+@ CHECK: vcvta.u32.f32	q4, q10 @ encoding: [0xbb,0xff,0xe4,0x80]
+
+vcvtm.s32.f32	d1, d30
+@ CHECK: vcvtm.s32.f32	d1, d30 @ encoding: [0xbb,0xff,0x2e,0x13]
+vcvtm.u32.f32	d12, d10
+@ CHECK: vcvtm.u32.f32	d12, d10 @ encoding: [0xbb,0xff,0x8a,0xc3]
+vcvtm.s32.f32	q1, q10
+@ CHECK: vcvtm.s32.f32	q1, q10 @ encoding: [0xbb,0xff,0x64,0x23]
+vcvtm.u32.f32	q13, q1
+@ CHECK: vcvtm.u32.f32	q13, q1 @ encoding: [0xfb,0xff,0xc2,0xa3]
+
+vcvtn.s32.f32	d15, d17
+@ CHECK: vcvtn.s32.f32	d15, d17 @ encoding: [0xbb,0xff,0x21,0xf1]
+vcvtn.u32.f32	d5, d3
+@ CHECK: vcvtn.u32.f32	d5, d3 @ encoding: [0xbb,0xff,0x83,0x51]
+vcvtn.s32.f32	q3, q8
+@ CHECK: vcvtn.s32.f32	q3, q8 @ encoding: [0xbb,0xff,0x60,0x61]
+vcvtn.u32.f32	q5, q3
+@ CHECK: vcvtn.u32.f32	q5, q3 @ encoding: [0xbb,0xff,0xc6,0xa1]
+
+vcvtp.s32.f32	d11, d21
+@ CHECK: vcvtp.s32.f32	d11, d21 @ encoding: [0xbb,0xff,0x25,0xb2]
+vcvtp.u32.f32	d14, d23
+@ CHECK: vcvtp.u32.f32	d14, d23 @ encoding: [0xbb,0xff,0xa7,0xe2]
+vcvtp.s32.f32	q4, q15
+@ CHECK: vcvtp.s32.f32	q4, q15 @ encoding: [0xbb,0xff,0x6e,0x82]
+vcvtp.u32.f32	q9, q8
+@ CHECK: vcvtp.u32.f32	q9, q8 @ encoding: [0xfb,0xff,0xe0,0x22]
+
+vrintn.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x34]
+vrintn.f32 q1, q4
+@ CHECK: vrintn.f32 q1, q4 @ encoding: [0xba,0xff,0x48,0x24]
+vrintx.f32 d5, d12
+@ CHECK: vrintx.f32 d5, d12 @ encoding: [0xba,0xff,0x8c,0x54]
+vrintx.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xba,0xff,0xc6,0x04]
+vrinta.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x35]
+vrinta.f32 q8, q2
+@ CHECK: vrinta.f32 q8, q2 @ encoding: [0xfa,0xff,0x44,0x05]
+vrintz.f32 d12, d18
+@ CHECK: vrintz.f32 d12, d18 @ encoding: [0xba,0xff,0xa2,0xc5]
+vrintz.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xfa,0xff,0xc8,0x25]
+vrintm.f32 d3, d0
+@ CHECK: vrintm.f32 d3, d0 @ encoding: [0xba,0xff,0x80,0x36]
+vrintm.f32 q1, q4
+@ CHECK: vrintm.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x26]
+vrintp.f32 d3, d0
+@ CHECK: vrintp.f32 d3, d0 @ encoding: [0xba,0xff,0x80,0x37]
+vrintp.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x27]
+
+@ test the aliases of vrint
+vrintn.f32.f32 d3, d0
+@ CHECK: vrintn.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x34]
+vrintx.f32.f32 q0, q3
+@ CHECK: vrintx.f32 q0, q3 @ encoding: [0xba,0xff,0xc6,0x04]
+vrinta.f32.f32 d3, d0
+@ CHECK: vrinta.f32 d3, d0 @ encoding: [0xba,0xff,0x00,0x35]
+vrintz.f32.f32 q9, q4
+@ CHECK: vrintz.f32 q9, q4 @ encoding: [0xfa,0xff,0xc8,0x25]
+vrintp.f32.f32 q1, q4
+@ CHECK: vrintp.f32 q1, q4 @ encoding: [0xba,0xff,0xc8,0x27]
diff --git a/test/MC/ARM/thumb-nop.s b/test/MC/ARM/thumb-nop.s
deleted file mode 100644
index 0b580ea14a93..000000000000
--- a/test/MC/ARM/thumb-nop.s
+++ /dev/null
@@ -1,9 +0,0 @@
-@ RUN: llvm-mc -triple=thumbv6-apple-darwin -show-encoding < %s | FileCheck %s -check-prefix=CHECK-V6
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding < %s | FileCheck %s -check-prefix=CHECK-V7
-
-  .syntax unified
-
-        nop
-
-@ CHECK-V6: nop                            @ encoding: [0xc0,0x46]
-@ CHECK-V7: nop                            @ encoding: [0x00,0xbf]
diff --git a/test/MC/ARM/thumb-only-conditionals.s b/test/MC/ARM/thumb-only-conditionals.s
new file mode 100644
index 000000000000..8693c249c008
--- /dev/null
+++ b/test/MC/ARM/thumb-only-conditionals.s
@@ -0,0 +1,54 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-ios -o - %s | FileCheck %s
+
+        itte eq
+        dmbeq #11
+        dsbeq #7
+        isbne #15
+@ CHECK: itte eq
+@ CHECK-NEXT: dmbeq ish
+@ CHECK-NEXT: dsbeq nsh
+@ CHECK-NEXT: isbne sy
+
+        itet le
+        dmble
+        dsbgt
+        isble
+@ CHECK: itet le
+@ CHECK-NEXT: dmble sy
+@ CHECK-NEXT: dsbgt sy
+@ CHECK-NEXT: isble sy
+
+        itt gt
+        cdpgt  p7, #1, c1, c1, c1, #4
+        cdp2gt  p7, #1, c1, c1, c1, #4
+@ CHECK: itt gt
+@ CHECK-NEXT: cdpgt  p7, #1, c1, c1, c1, #4
+@ CHECK-NEXT: cdp2gt  p7, #1, c1, c1, c1, #4
+
+        itt ne
+        mcrne p0, #0, r0, c0, c0, #0
+        mcr2ne p0, #0, r0, c0, c0, #0
+@ CHECK: itt ne
+@ CHECK-NEXT: mcrne p0, #0, r0, c0, c0, #0
+@ CHECK-NEXT: mcr2ne p0, #0, r0, c0, c0, #0
+
+        ite le
+        mcrrle  p7, #15, r5, r4, c1
+        mcrr2gt  p7, #15, r5, r4, c1
+@ CHECK: ite le
+@ CHECK-NEXT: mcrrle  p7, #15, r5, r4, c1
+@ CHECK-NEXT: mcrr2gt  p7, #15, r5, r4, c1
+
+        ite eq
+        mrceq p9, #1, r1, c2, c2
+        mrc2ne p12, #3, r3, c3, c4
+@ CHECK: ite eq
+@ CHECK-NEXT: mrceq p9, #1, r1, c2, c2
+@ CHECK-NEXT: mrc2ne p12, #3, r3, c3, c4
+
+        itt lo
+        mrrclo  p7, #1, r5, r4, c1
+        mrrc2lo  p7, #1, r5, r4, c1
+@ CHECK: itt lo
+@ CHECK-NEXT: mrrclo  p7, #1, r5, r4, c1
+@ CHECK-NEXT: mrrc2lo  p7, #1, r5, r4, c1
diff --git a/test/MC/ARM/thumb.s b/test/MC/ARM/thumb.s
index 2223bdcd4673..9a725410c3d0 100644
--- a/test/MC/ARM/thumb.s
+++ b/test/MC/ARM/thumb.s
@@ -42,7 +42,7 @@
 @ CHECK: bkpt  #2                       @ encoding: [0x02,0xbe]
 
         nop
-@ CHECK: nop @ encoding: [0xc0,0x46]
+@ CHECK: mov r8, r8 @ encoding: [0xc0,0x46]
 
         cpsie aif
 @ CHECK: cpsie aif                      @ encoding: [0x67,0xb6]
diff --git a/test/MC/ARM/thumb2-b.w-encodingT4.s b/test/MC/ARM/thumb2-b.w-encodingT4.s
index be77b06267a2..aff02e1e1514 100644
--- a/test/MC/ARM/thumb2-b.w-encodingT4.s
+++ b/test/MC/ARM/thumb2-b.w-encodingT4.s
@@ -9,4 +9,4 @@ _foo:
 @------------------------------------------------------------------------------
         b.w   0x3680c
 
-@ CHECK: b.w	#223244                    @ encoding: [0x6d,0xf0,0x0c,0xb0]
+@ CHECK: b.w	#223244                    @ encoding: [0x36,0xf0,0x06,0xbc]
diff --git a/test/MC/ARM/thumb2-branches.s b/test/MC/ARM/thumb2-branches.s
new file mode 100644
index 000000000000..9148233a79c9
--- /dev/null
+++ b/test/MC/ARM/thumb2-branches.s
@@ -0,0 +1,286 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         b.n    #-2048
+         b.n    #2046
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         b.w    #-2048
+         b.w    #2046
+         b.w    #-1677216
+         b.w    #1677214
+
+@ CHECK: b.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: b.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         b      #-2048
+         b      #2046
+         b      #-2050
+         b      #2048
+         b      #-1677216
+         b      #1677214
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+@ CHECK: b.w	#-2050                  @ encoding: [0xff,0xf7,0xff,0xbb]
+@ CHECK: b.w	#2048                   @ encoding: [0x00,0xf0,0x00,0xbc]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with width narrow suffix in IT block 
+@------------------------------------------------------------------------------
+
+         it     eq
+         beq.n  #-2048
+         it     ne
+         bne.n  #-2046
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: beq	#-2048                  @ encoding: [0x00,0xe4] 
+@ CHECK: it	ne                      @ encoding: [0x18,0xbf] 
+@ CHECK: bne	#-2046                  @ encoding: [0x01,0xe4]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with wide suffix in IT block
+@------------------------------------------------------------------------------
+
+         it     gt
+         bgt.w  #-2048
+         it     le
+         ble.w  #2046
+         it     ge
+         bge.w  #-1677216
+         it     lt
+         blt.w  #1677214
+
+@ CHECK: it	gt                      @ encoding: [0xc8,0xbf]
+@ CHECK: bgt.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: it	le                      @ encoding: [0xd8,0xbf]
+@ CHECK: ble.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: it	ge                      @ encoding: [0xa8,0xbf]
+@ CHECK: bge.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: it	lt                      @ encoding: [0xb8,0xbf]
+@ CHECK: blt.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ conditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         beq.n    #-256
+         bne.n    #254
+
+@ CHECK: beq	#-256                   @ encoding: [0x80,0xd0]
+@ CHECK: bne	#254                    @ encoding: [0x7f,0xd1]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         bmi.w    #-256
+         bne.w    #254
+         blt.w    #-1048576
+         bge.w    #1048574
+
+@ CHECK: bmi.w	#-256                   @ encoding: [0x3f,0xf5,0x80,0xaf]
+@ CHECK: bne.w	#254                    @ encoding: [0x40,0xf0,0x7f,0x80]
+@ CHECK: blt.w	#-1048576               @ encoding: [0xc0,0xf6,0x00,0x80]
+@ CHECK: bge.w	#1048574                @ encoding: [0xbf,0xf2,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         bne     #-256
+         bgt     #254
+         bne     #-258
+         bgt     #256
+         bne     #-1048576
+         bgt     #1048574
+
+@ CHECK: bne	#-256                   @ encoding: [0x80,0xd1]
+@ CHECK: bgt	#254                    @ encoding: [0x7f,0xdc]
+@ CHECK: bne.w	#-258                   @ encoding: [0x7f,0xf4,0x7f,0xaf]
+@ CHECK: bgt.w	#256                    @ encoding: [0x00,0xf3,0x80,0x80]
+@ CHECK: bne.w	#-1048576               @ encoding: [0x40,0xf4,0x00,0x80]
+@ CHECK: bgt.w	#1048574                @ encoding: [0x3f,0xf3,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ same branch insturction encoding to conditional or unconditional depending
+@ on whether it is in an IT block or not
+@------------------------------------------------------------------------------
+
+         it     eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xd1]
+
+         ite    eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: ite	eq                      @ encoding: [0x0c,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xe0]
+
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         b.n    #-2048
+         b.n    #2046
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         b.w    #-2048
+         b.w    #2046
+         b.w    #-1677216
+         b.w    #1677214
+
+@ CHECK: b.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: b.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         b      #-2048
+         b      #2046
+         b      #-2050
+         b      #2048
+         b      #-1677216
+         b      #1677214
+
+@ CHECK: b	#-2048                  @ encoding: [0x00,0xe4]
+@ CHECK: b	#2046                   @ encoding: [0xff,0xe3]
+@ CHECK: b.w	#-2050                  @ encoding: [0xff,0xf7,0xff,0xbb]
+@ CHECK: b.w	#2048                   @ encoding: [0x00,0xf0,0x00,0xbc]
+@ CHECK: b.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: b.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with width narrow suffix in IT block 
+@------------------------------------------------------------------------------
+
+         it     eq
+         beq.n  #-2048
+         it     ne
+         bne.n  #-2046
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: beq	#-2048                  @ encoding: [0x00,0xe4] 
+@ CHECK: it	ne                      @ encoding: [0x18,0xbf] 
+@ CHECK: bne	#-2046                  @ encoding: [0x01,0xe4]
+
+@------------------------------------------------------------------------------
+@ unconditional branches with wide suffix in IT block
+@------------------------------------------------------------------------------
+
+         it     gt
+         bgt.w  #-2048
+         it     le
+         ble.w  #2046
+         it     ge
+         bge.w  #-1677216
+         it     lt
+         blt.w  #1677214
+
+@ CHECK: it	gt                      @ encoding: [0xc8,0xbf]
+@ CHECK: bgt.w	#-2048                  @ encoding: [0xff,0xf7,0x00,0xbc]
+@ CHECK: it	le                      @ encoding: [0xd8,0xbf]
+@ CHECK: ble.w	#2046                   @ encoding: [0x00,0xf0,0xff,0xbb]
+@ CHECK: it	ge                      @ encoding: [0xa8,0xbf]
+@ CHECK: bge.w	#-1677216               @ encoding: [0x66,0xf6,0x30,0xbc]
+@ CHECK: it	lt                      @ encoding: [0xb8,0xbf]
+@ CHECK: blt.w	#1677214                @ encoding: [0x99,0xf1,0xcf,0xbb]
+
+@------------------------------------------------------------------------------
+@ conditional branches accept narrow suffix and encode to short encodings
+@------------------------------------------------------------------------------
+
+         beq.n    #-256
+         bne.n    #254
+
+@ CHECK: beq	#-256                   @ encoding: [0x80,0xd0]
+@ CHECK: bne	#254                    @ encoding: [0x7f,0xd1]
+
+@------------------------------------------------------------------------------
+@ unconditional branches accept wide suffix and encode to wide encodings
+@------------------------------------------------------------------------------
+
+         bmi.w    #-256
+         bne.w    #254
+         blt.w    #-1048576
+         bge.w    #1048574
+
+@ CHECK: bmi.w	#-256                   @ encoding: [0x3f,0xf5,0x80,0xaf]
+@ CHECK: bne.w	#254                    @ encoding: [0x40,0xf0,0x7f,0x80]
+@ CHECK: blt.w	#-1048576               @ encoding: [0xc0,0xf6,0x00,0x80]
+@ CHECK: bge.w	#1048574                @ encoding: [0xbf,0xf2,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ unconditional branches without width suffix encode depending of offset size
+@------------------------------------------------------------------------------
+
+         bne     #-256
+         bgt     #254
+         bne     #-258
+         bgt     #256
+         bne     #-1048576
+         bgt     #1048574
+
+@ CHECK: bne	#-256                   @ encoding: [0x80,0xd1]
+@ CHECK: bgt	#254                    @ encoding: [0x7f,0xdc]
+@ CHECK: bne.w	#-258                   @ encoding: [0x7f,0xf4,0x7f,0xaf]
+@ CHECK: bgt.w	#256                    @ encoding: [0x00,0xf3,0x80,0x80]
+@ CHECK: bne.w	#-1048576               @ encoding: [0x40,0xf4,0x00,0x80]
+@ CHECK: bgt.w	#1048574                @ encoding: [0x3f,0xf3,0xff,0xaf]
+
+@------------------------------------------------------------------------------
+@ same branch insturction encoding to conditional or unconditional depending
+@ on whether it is in an IT block or not
+@------------------------------------------------------------------------------
+
+         it     eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: it	eq                      @ encoding: [0x08,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xd1]
+
+         ite    eq
+         addeq  r0, r1
+         bne    #128
+
+@ CHECK: ite	eq                      @ encoding: [0x0c,0xbf]
+@ CHECK: addeq	r0, r1                  @ encoding: [0x08,0x44]
+@ CHECK: bne	#128                    @ encoding: [0x40,0xe0]
+
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index d94c686e2f83..6ac2db02cca7 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -42,3 +42,31 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+
+        isb  #-1
+        isb  #16
+@ CHECK-ERRORS: error: immediate value out of range
+@ CHECK-ERRORS: error: immediate value out of range
+
+        itt eq
+        bkpteq #1
+@ CHECK-ERRORS: error: instruction 'bkpt' is not predicable, but condition code specified
+
+        nopeq
+        nopeq
+
+@ out of range operands for Thumb2 targets
+
+        beq.w  #-1048578
+        bne.w  #1048576
+        blt.w  #1013411
+        b.w    #-16777218
+        b.w    #16777216
+        b.w    #1592313
+
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
+@ CHECK-ERRORS: error: branch target out of range
diff --git a/test/MC/ARM/thumb2-ldrd.s b/test/MC/ARM/thumb2-ldrd.s
new file mode 100644
index 000000000000..4463c21fe8c0
--- /dev/null
+++ b/test/MC/ARM/thumb2-ldrd.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -arch thumb -mattr=+thumb2 \
+// RUN: < %s >/dev/null 2> %t
+// RUN: grep "error: destination operands can't be identical" %t | count 4
+// rdar://14479780
+
+ldrd r0, r0, [pc, #0]
+ldrd r0, r0, [r1, #4]
+ldrd r0, r0, [r1], #4
+ldrd r0, r0, [r1, #4]!
diff --git a/test/MC/ARM/thumb2-pldw.s b/test/MC/ARM/thumb2-pldw.s
new file mode 100644
index 000000000000..f0eeaf9297a1
--- /dev/null
+++ b/test/MC/ARM/thumb2-pldw.s
@@ -0,0 +1,7 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -mattr=+mp -show-encoding < %s | FileCheck %s
+
+@------------------------------------------------------------------------------
+@ PLD(literal)
+@------------------------------------------------------------------------------
+         pldw   [r0, #257]
+@ CHECK: pldw   [r0, #257]              @ encoding: [0xb0,0xf8,0x01,0xf1]
diff --git a/test/MC/ARM/v8_IT_manual.s b/test/MC/ARM/v8_IT_manual.s
new file mode 100644
index 000000000000..4b63aa82dd68
--- /dev/null
+++ b/test/MC/ARM/v8_IT_manual.s
@@ -0,0 +1,6739 @@
+@ RUN: llvm-mc -triple thumbv8 -show-encoding < %s 2>&1 | FileCheck %s
+
+@ ADD reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, r2, r3
+@ ADD reg, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r1, pc
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge pc, r2
+@ ADD reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r11, r2, r3
+@ ADD imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, r2, #3
+@ ADD imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, #3
+@ ADD imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r11, r2, #3
+@ ADD imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r11, r2, #333
+@ ADD SP+imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+addge r1, sp, #32
+@ ADD SP+imm, encoding T2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge sp, #32
+@ ADD SP+imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r1, sp, #33
+@ ADD SP+imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge r1, sp, #333
+
+@ SUB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+subge r4, r3, r2
+@ SUB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r14, r3, r2
+@ SUB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+subge r4, r3, #2
+@ SUB imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+subge r4, #3
+@ SUB imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r14, r3, #2
+@ SUB imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r14, r3, #2222
+@ SUB SP-imm, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge sp, #32
+@ SUB SP-imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r4, sp, #33
+@ SUB SP-imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge r4, sp, #3333
+
+@ MOV reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+movge r4, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge r4, pc
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge pc, r5
+@ MOV reg, encoding T3 (32-bit) -- can only appear as MOVS or MOV.W
+@ MOV imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+movge r4, #5
+@ MOV imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge r14, #5
+@ MOV imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge r14, #555
+
+@ CMP reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmpge r3, r4
+@ CMP reg, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmpge r13, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge r3, pc
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge pc, r4
+@ CMP reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge r3, r4, lsl #1 
+@ CMP imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmpge r3, #4
+@ CMP imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmpge r13, #4
+
+@ AND reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+andge r5, r6
+@ AND reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r9, r6
+
+@ EOR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+eorge r7, r6
+@ EOR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r7, r9
+
+@ LSL imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lslge r7, r0, #1
+@ LSL imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lslge r7, r10, #1
+@ LSL reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lslge r7, r0
+@ LSL reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lslge r7, r10
+
+@ LSR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lsrge r3, r2, #1
+@ LSR imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lsrge r3, r12, #1
+@ LSR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+lsrge r3, r2
+@ LSR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+lsrge r3, r12
+
+@ ASR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+asrge r2, r3, #4
+@ ASR imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+asrge r12, r3, #4
+@ ASR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+asrge r2, r3
+@ ASR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+asrge r12, r3
+
+@ ADC reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+adcge r5, r4
+@ ADC reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r5, r5, r14
+
+@ SBC reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+sbcge r5, r6
+@ SBC reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r9, r9, r6
+
+@ ROR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+rorge r7, r6
+@ ROR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rorge r7, r9
+
+@ TST reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+tstge r7, r0
+@ TST reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+tstge r7, r10
+
+@ RSB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+rsbge r1, r0, #0
+@ RSB imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r11, r0, #0
+
+@ CMN reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+cmnge r1, r2
+@ CMN reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+cmnge r11, r2
+
+@ ORR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+orrge r3, r2
+@ ORR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r3, r12
+
+@ MUL reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+mulge r3, r4, r3
+@ MUL reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mulge r3, r4, r5
+
+@ BIC reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+bicge r5, r4
+@ BIC reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r5, r14
+
+@ MVN reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+mvnge r5, r6
+@ MVN reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r9, r6
+
+@ BX, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+bxge r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bxge pc
+
+@ BLX, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+blxge r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+blxge pc
+
+@ LDR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [r1, r2]
+@ LDR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r10, [r1, r2]
+@ LDR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [r1]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [r1, #8]
+@ LDR imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [sp]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrge r0, [sp, #8]
+@ LDR reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r0, [r1, #2]
+@ LDR reg, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r0, [r1, #-2]
+@ LDR lit, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r0, [pc, #8]
+@ LDR lit, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge r10, [pc, #8]
+
+@ STR reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [r2, r3]
+@ STR reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge r11, [r2, r3]
+@ STR imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [r2]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [r2, #4]
+@ STR imm, encoding T2
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [sp]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strge r1, [sp, #4]
+@ STR imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge r1, [r2, #3]
+@ STR imm, encoding T4 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge r1, [r2, #-3]
+
+@ STRH reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strhge r4, [r3, r2]
+@ STRH reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge r14, [r3, r2]
+@ STRH imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strhge r4, [r3]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strhge r4, [r3, #2]
+@ STRH imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge r4, [r3, #1]
+@ STRH imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge r4, [r3, #-2]
+
+@ STRB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strbge r3, [r4, r5]
+@ STRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge r3, [r14, r5]
+@ STRB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strbge r3, [r4]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+strbge r3, [r4, #5]
+@ STRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge r3, [r14, #5]
+@ STRB reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge r3, [r4, #-5]
+
+@ LDRSB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrsbge r6, [r5, r4]
+@ LDRSB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge r9, [r5, r4]
+
+@ LDRH reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrhge r5, [r6, r7]
+@ LDRH reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge r5, [r9, r7]
+@ LDRH imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrhge r5, [r6]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrhge r5, [r6, #8]
+@ LDRH imm, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge r5, [r6, #7]
+@ LDRH imm, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge r5, [r6, #-8]
+
+@ LDRB reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrbge r0, [r7, r6]
+@ LDRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge r10, [r7, r6]
+@ LDRB imm, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrbge r0, [r7]
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrbge r0, [r7, #6]
+@ LDRB reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge r10, [r7, #6]
+@ LDRB reg, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge r0, [r7, #-6]
+
+@ LDRSH reg, encoding T1
+@ CHECK-NOT: [[@LINE+2]]:1: warning
+it ge
+ldrshge r7, [r0, r1]
+@ LDRSH reg, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge r7, [r0, r11]
+
+@ ADR, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adrge r1, #24
+@ ADR, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adrge r1, #-23
+@ ADR, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adrge r1, #23
+
+@ SXTH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxthge r4, r3
+@ SXTH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxthge r4, r9
+
+@ SXTB, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxtbge r4, r5
+@ SXTB, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sxtbge r14, r5
+
+@ UXTH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxthge r6, r5
+@ UXTH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxthge r9, r5
+
+@ UXTB, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxtbge r6, r7
+@ UXTB, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+uxtbge r6, r9
+
+@ PUSH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+pushge {r1, r3, r7}
+@ PUSH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+pushge {r1, r13, r7}
+@ PUSH, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+pushge {r13}
+
+@ REV, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revge r7, r6
+@ REV, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revge r9, r6
+
+@ REV16, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rev16ge r7, r0
+@ REV16, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rev16ge r7, r10
+
+@ REVSH, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revshge r1, r0
+@ REVSH, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+revshge r11, r0
+
+@ POP, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+popge {r1, r0, r5}
+@ POP, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+popge {r1, r5, r10}
+@ POP, encoding T3 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+popge {r10}
+
+@ NOP, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+nopge
+@ NOP, encoding T2 (32-bit) -- can only appear as NOP.W
+
+@ STM, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stmge r1!, {r2, r3}
+@ STM, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stmge r1, {r2, r3}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stmge r1!, {r2, r13}
+
+@ LDM, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r4!, {r2, r3}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r4, {r2, r3}
+@ LDM, encoding T2 (32-bit)
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r14!, {r2, r3}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldmge r14, {r2, r3}
+
+@ SVC, encoding T1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+svcge #55
+
+@ B, encoding T2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bge #2014
+
+@ The following Thumb instructions only have 32-bit encodings.
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strexge r0, r0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strdge r0, r0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movsge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnsge.w r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, sp, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r0, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r1, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r2, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r3, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r4, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r5, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r6, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r7, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r8, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r9, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r10, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r11, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, r12, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, sp, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, lr, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mcrrge p0, #0, r0, pc, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r0, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r1, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r2, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r3, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r4, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r5, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r6, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r7, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r8, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r9, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r10, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r11, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, r12, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, sp, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, lr, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mrrcge p14, #0, r0, pc, c0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc], #-0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc], {0}
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc], #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc, #-0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stcge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldcge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+stclge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r0, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r1, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r2, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r3, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r4, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r5, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r6, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r7, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r8, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r9, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r10, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r11, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [r12, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [sp, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [lr, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldclge p0, c0, [pc, #0]!
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge.w r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movsge.w r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, pc, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #4096
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #8192
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #12288
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #16384
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #20480
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #24576
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #28672
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #32768
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #36864
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #40960
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #45056
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #49152
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #53248
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #57344
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #61440
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r1, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r2, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r3, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r4, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r5, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r6, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r7, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r8, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r9, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r10, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r11, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r12, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, sp, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, lr, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, pc, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #4096
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #8192
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #12288
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #16384
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #20480
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #24576
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #28672
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #32768
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #36864
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #40960
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #45056
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #49152
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #53248
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #57344
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #61440
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfige r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bfcge r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+andge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+bicge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movge.w r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+orrge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movsge.w r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ornge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mvnge r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+eorge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addsge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+adcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbcge r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, sp, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subsge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r0, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r1, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r2, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r3, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r4, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r5, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r6, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r7, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r8, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r9, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r10, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r11, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, r12, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+rsbsge.w r0, lr, #8388608
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r1, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r2, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r3, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r4, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r5, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r6, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r7, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r8, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r9, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r10, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r11, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, r12, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, sp, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, lr, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+addwge r0, pc, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #6144
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #10240
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #14336
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #18432
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #22528
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #26624
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #30720
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #34816
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #38912
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #43008
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #47104
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #51200
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #55296
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #59392
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movwge r0, #63488
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r1, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r2, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r3, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r4, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r5, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r6, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r7, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r8, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r9, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r10, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r11, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, r12, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, sp, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, lr, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+subwge r0, pc, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #2048
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #6144
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #10240
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #14336
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #18432
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #22528
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #26624
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #30720
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #34816
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #38912
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #43008
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #47104
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #51200
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #55296
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #59392
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+movtge r0, #63488
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ssatge r0, #1, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+sbfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r2
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r3
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r4
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r5
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r6
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r7
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r8
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r9
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r10
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r11
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, r12
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+usatge r0, #0, lr
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r0, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r1, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r2, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r3, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r4, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r5, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r6, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r7, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r8, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r9, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r10, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r11, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, r12, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ubfxge r0, lr, #0, #1
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strbge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrbge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strhge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrhge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+strge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r0, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r1, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r2, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r3, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r4, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r5, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r6, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r7, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r8, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r9, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r10, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r11, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r12, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [sp, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [lr, r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [pc, #-0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrsbge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r1]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r2]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r3]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r4]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r5]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r6]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r7]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r8]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r9]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r10]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r11]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [r12]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [sp]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [lr]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+ldrshge.w r0, [pc, #0]
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r1, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r2, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r3, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r4, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r5, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r6, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r7, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r8, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r9, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r10, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r11, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, r12, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+mlage r0, lr, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smullge r0, r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umullge r0, r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+smlalge r0, r0, lr, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r0, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r1, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r2, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r3, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r4, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r5, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r6, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r7, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r8, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r9, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r10, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r11, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, r12, r0
+@ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
+it ge
+umlalge r0, r0, lr, r0
diff --git a/test/MC/ARM/vfp4.s b/test/MC/ARM/vfp4.s
index 0a1fe92abd87..8b1b0e0c538b 100644
--- a/test/MC/ARM/vfp4.s
+++ b/test/MC/ARM/vfp4.s
@@ -1,9 +1,13 @@
 @ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4   | FileCheck %s --check-prefix=ARM
 @ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
-@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 | FileCheck %s --check-prefix=THUMB_V7EM
+@ RUN: not llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 > %t 2> %t2
+@ RUN:     FileCheck %s < %t --check-prefix=THUMB_V7EM
+@ RUN:     FileCheck %s < %t2 --check-prefix=THUMB_V7EM-ERRORS
 
 @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
 @ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f64 d16, d18, d17
 vfma.f64 d16, d18, d17
 
 @ ARM: vfma.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0xa2,0xee]
@@ -13,14 +17,20 @@ vfma.f32 s2, s4, s0
 
 @ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
 @ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 d16, d18, d17
 vfma.f32 d16, d18, d17
 
 @ ARM: vfma.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x08,0xf2]
 @ THUMB: vfma.f32	q2, q4, q0 @ encoding: [0x08,0xef,0x50,0x4c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfma.f32 q2, q4, q0
 vfma.f32 q2, q4, q0
 
 @ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee]
 @ THUMB: vfnma.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xe1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfnma.f64 d16, d18, d17
 vfnma.f64 d16, d18, d17
 
 @ ARM: vfnma.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0x92,0xee]
@@ -30,6 +40,8 @@ vfnma.f32 s2, s4, s0
 
 @ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee]
 @ THUMB: vfms.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xe1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f64 d16, d18, d17
 vfms.f64 d16, d18, d17
 
 @ ARM: vfms.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0xa2,0xee]
@@ -39,16 +51,23 @@ vfms.f32 s2, s4, s0
 
 @ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
 @ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 d16, d18, d17
 vfms.f32 d16, d18, d17
 
 @ ARM: vfms.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x28,0xf2]
 @ THUMB: vfms.f32	q2, q4, q0 @ encoding: [0x28,0xef,0x50,0x4c]
+@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS-NEXT: vfms.f32 q2, q4, q0
 vfms.f32 q2, q4, q0
 
 @ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee]
 @ THUMB: vfnms.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xa1,0x0b]
+@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS-NEXT: vfnms.f64 d16, d18, d17
 vfnms.f64 d16, d18, d17
 
 @ ARM: vfnms.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0x92,0xee]
 @ THUMB: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
+@ THUMB_V7EM: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
 vfnms.f32 s2, s4, s0
diff --git a/test/MC/ARM/xscale-attributes.ll b/test/MC/ARM/xscale-attributes.ll
index d1e9931e1448..718fd8fcc78b 100644
--- a/test/MC/ARM/xscale-attributes.ll
+++ b/test/MC/ARM/xscale-attributes.ll
@@ -26,13 +26,14 @@ entry:
 ; OBJ-NEXT:     ]
 ; OBJ-NEXT:     Address: 0x0
 ; OBJ-NEXT:     Offset: 0x38
-; OBJ-NEXT:     Size: 32
+; OBJ-NEXT:     Size: 40
 ; OBJ-NEXT:     Link: 0
 ; OBJ-NEXT:     Info: 0
 ; OBJ-NEXT:     AddressAlignment: 1
 ; OBJ-NEXT:     EntrySize: 0
 ; OBJ-NEXT:     SectionData (
-; OBJ-NEXT:       0000: 411F0000 00616561 62690001 15000000
-; OBJ-NEXT:       0010: 06050801 09011401 15011703 18011901
+; OBJ-NEXT:       0000: 41270000 00616561 62690001 1D000000
+; OBJ-NEXT:       0010: 05585343 414C4500 06050801 09011401
+; OBJ-NEXT:       0020: 15011703 18011901
 ; OBJ-NEXT:     )
 ; OBJ-NEXT:   }
diff --git a/test/MC/AsmParser/align_invalid.s b/test/MC/AsmParser/align_invalid.s
index 0d06d9423ced..7ffbed42635a 100644
--- a/test/MC/AsmParser/align_invalid.s
+++ b/test/MC/AsmParser/align_invalid.s
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -triple i386-linux-gnu < %s 2>&1 | FileCheck %s -check-prefix=ELF
-# RUN: llvm-mc -triple i386-apple-darwin < %s 2>&1 | FileCheck %s -check-prefix=DARWIN
+# RUN: not llvm-mc -triple i386-linux-gnu < %s 2>&1 | FileCheck %s -check-prefix=ELF
+# RUN: not llvm-mc -triple i386-apple-darwin < %s 2>&1 | FileCheck %s -check-prefix=DARWIN
 
 .align 3
 # ELF: error: alignment must be a power of 2
diff --git a/test/MC/AsmParser/cfi-window-save.s b/test/MC/AsmParser/cfi-window-save.s
new file mode 100644
index 000000000000..c309436c788f
--- /dev/null
+++ b/test/MC/AsmParser/cfi-window-save.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu <%s | FileCheck %s
+
+# Should use SPARC as the target to test this. However, SPARC does not support
+# asm parsing yet.
+
+# CHECK: .cfi_window_save
+
+
+f:
+        .cfi_startproc
+        nop
+        .cfi_window_save
+        nop
+        .cfi_endproc
+
diff --git a/test/MC/AsmParser/directive_align.s b/test/MC/AsmParser/directive_align.s
index 15eb430bdaf3..7ce28559d951 100644
--- a/test/MC/AsmParser/directive_align.s
+++ b/test/MC/AsmParser/directive_align.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
+# RUN: not llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
 
 # CHECK: TEST0:
 # CHECK: .align 1
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
index 121890e69a4a..9b99e0f24e99 100644
--- a/test/MC/AsmParser/directive_file.s
+++ b/test/MC/AsmParser/directive_file.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
 
         .file "hello"
-        .file 1 "world"
+        .file 1 "worl\144"   # "\144" is "d"
         .file 2 "directory" "file"
 
 # CHECK: .file "hello"
diff --git a/test/MC/AsmParser/directive_fill.s b/test/MC/AsmParser/directive_fill.s
index 60bd468cd348..bb3ced091c80 100644
--- a/test/MC/AsmParser/directive_fill.s
+++ b/test/MC/AsmParser/directive_fill.s
@@ -15,3 +15,19 @@ TEST1:
 # CHECK: .quad 4
 TEST2:  
         .fill 1, 8, 4
+
+# CHECK: TEST3
+# CHECK: .byte 0
+# CHECK: .byte 0
+# CHECK: .byte 0
+# CHECK: .byte 0
+TEST3:
+	.fill 4
+
+# CHECK: TEST4
+# CHECK: .short 0
+# CHECK: .short 0
+# CHECK: .short 0
+# CHECK: .short 0
+TEST4:
+	.fill 4, 2
diff --git a/test/MC/AsmParser/directive_incbin.s b/test/MC/AsmParser/directive_incbin.s
index 55f9c7951ffa..ed4e27a2c1ad 100644
--- a/test/MC/AsmParser/directive_incbin.s
+++ b/test/MC/AsmParser/directive_incbin.s
@@ -1,6 +1,6 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s -I %p | FileCheck %s
 
 .data
-.incbin "incbin_abcd"
+.incbin "incbin\137abcd"  # "\137" is underscore "_"
 
 # CHECK: .ascii	 "abcd\n"
diff --git a/test/MC/AsmParser/directive_include.s b/test/MC/AsmParser/directive_include.s
index fabd941d9999..f53bc671fc64 100644
--- a/test/MC/AsmParser/directive_include.s
+++ b/test/MC/AsmParser/directive_include.s
@@ -5,5 +5,5 @@
 # CHECK: a = 0
 # CHECK: TESTB:
 TESTA:  
-	.include       "directive_set.s"
+	.include       "directive\137set.s"   # "\137" is underscore "_"
 TESTB:
diff --git a/test/MC/AsmParser/directive_loc.s b/test/MC/AsmParser/directive_loc.s
index 164d42a3fa7c..700a32ca389f 100644
--- a/test/MC/AsmParser/directive_loc.s
+++ b/test/MC/AsmParser/directive_loc.s
@@ -6,3 +6,4 @@
         .loc 1 2
         .loc 1 2 3
         .loc 1 2 discriminator 1
+        .loc 1 0
diff --git a/test/MC/AsmParser/floating-literals.s b/test/MC/AsmParser/floating-literals.s
index d44bb9830c08..6578e32ce822 100644
--- a/test/MC/AsmParser/floating-literals.s
+++ b/test/MC/AsmParser/floating-literals.s
@@ -1,4 +1,5 @@
-# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2> /dev/null | FileCheck %s
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 > /dev/null| FileCheck %s --check-prefix=CHECK-ERROR
 
 # CHECK: .long	1067412619
 # CHECK: .long	1075000115
@@ -42,3 +43,40 @@
 // APFloat should reject these with an error, not crash:
 //.double -1.2e+
 //.double -1.2e
+
+# CHECK: .long 1310177520
+.float 0x12f7.1ep+17
+# CHECK: .long 1084227584
+.float 0x.ap+3
+# CHECK: .quad 4602678819172646912
+.double 0x2.p-2
+# CHECK: .long 1094713344
+.float 0x3p2
+# CHECK: .long 872284160
+.float 0x7fp-30
+# CHECK: .long 3212836864
+.float -0x1.0p0
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit
+# CHECK-ERROR: unexpected token in directive
+.float 0xa.apa
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit
+# CHECK-ERROR: unexpected token in directive
+.double -0x1.2p+
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one exponent digit
+# CHECK-ERROR: unexpected token in directive
+.double -0x1.2p
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one significand digit
+# CHECK-ERROR: unexpected token in directive
+.float 0xp2
+
+# CHECK-ERROR: invalid hexadecimal floating-point constant: expected at least one significand digit
+# CHECK-ERROR: unexpected token in directive
+.float 0x.p5
+
+# CHECK-ERROR: error: invalid hexadecimal floating-point constant: expected exponent part 'p'
+# CHECK-ERROR: unexpected token in directive
+.float 0x1.2
diff --git a/test/MC/AsmParser/lit.local.cfg b/test/MC/AsmParser/lit.local.cfg
index 6c49f08b7496..ba763cf03ffc 100644
--- a/test/MC/AsmParser/lit.local.cfg
+++ b/test/MC/AsmParser/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/AsmParser/macros-darwin.s b/test/MC/AsmParser/macros-darwin.s
index 31b9edb37818..95965d3fe1cb 100644
--- a/test/MC/AsmParser/macros-darwin.s
+++ b/test/MC/AsmParser/macros-darwin.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple i386-apple-darwin10 %s 2> %t.err | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin10 %s | FileCheck %s
 
 .macro test1
 .globl "$0 $1 $2 $$3 $n"
diff --git a/test/MC/AsmParser/secure_log_unique.s b/test/MC/AsmParser/secure_log_unique.s
new file mode 100644
index 000000000000..8145981a31cd
--- /dev/null
+++ b/test/MC/AsmParser/secure_log_unique.s
@@ -0,0 +1,9 @@
+// RUN: rm -f %t
+// RUN: env AS_SECURE_LOG_FILE=%t llvm-mc -triple x86_64-apple-darwin %s
+// RUN: env AS_SECURE_LOG_FILE=%t llvm-mc -triple x86_64-apple-darwin %s
+// RUN: FileCheck --input-file=%t %s
+.secure_log_unique "foobar"
+
+// CHECK: "foobar"
+// CHECK-NEXT: "foobar"
+
diff --git a/test/MC/COFF/alias.s b/test/MC/COFF/alias.s
new file mode 100644
index 000000000000..f6f6d46bf6a3
--- /dev/null
+++ b/test/MC/COFF/alias.s
@@ -0,0 +1,106 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -t -r | FileCheck %s
+
+local1:
+external_aliased_to_local = local1
+
+        .globl  global_aliased_to_external
+global_aliased_to_external = external1
+
+        .globl  global_aliased_to_local
+local2:
+global_aliased_to_local = local2
+
+        .weak   weak_aliased_to_external
+weak_aliased_to_external = external2
+
+// Generate relocs against the above aliases.
+        .long external_aliased_to_local
+        .long global_aliased_to_external
+        .long global_aliased_to_local
+        .long weak_aliased_to_external
+
+// CHECK:      Relocations [
+// CHECK:        0x0 IMAGE_REL_I386_DIR32 local1
+// CHECK:        0x4 IMAGE_REL_I386_DIR32 external1
+// CHECK:        0x8 IMAGE_REL_I386_DIR32 local2
+// CHECK:        0xC IMAGE_REL_I386_DIR32 external2
+// CHECK:      ]
+// CHECK:      Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section: .text (1)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: Static (0x3)
+// CHECK-NEXT:     AuxSymbolCount: 1
+// CHECK:        }
+// CHECK:        Symbol {
+// CHECK-NEXT:     Name: local1
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section: .text (1)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: Static (0x3)
+// CHECK-NEXT:     AuxSymbolCount: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: global_aliased_to_external
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: External (0x2)
+// CHECK-NEXT:     AuxSymbolCount: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: external1
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: External (0x2)
+// CHECK-NEXT:     AuxSymbolCount: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: global_aliased_to_local
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section: .text (1)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: Static (0x3)
+// CHECK-NEXT:     AuxSymbolCount: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: local2
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section: .text (1)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: Static (0x3)
+// CHECK-NEXT:     AuxSymbolCount: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: weak_aliased_to_external
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: WeakExternal (0x69)
+// CHECK-NEXT:     AuxSymbolCount: 1
+// CHECK-NEXT:     AuxWeakExternal {
+// CHECK-NEXT:       Linked: external2 (9)
+// CHECK-NEXT:       Search: Library (0x2)
+// CHECK-NEXT:       Unused: (00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: external2
+// CHECK-NEXT:     Value: 0
+// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     BaseType: Null (0x0)
+// CHECK-NEXT:     ComplexType: Null (0x0)
+// CHECK-NEXT:     StorageClass: External (0x2)
+// CHECK-NEXT:     AuxSymbolCount: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/COFF/bss_section.ll b/test/MC/COFF/bss_section.ll
new file mode 100644
index 000000000000..60924f13064f
--- /dev/null
+++ b/test/MC/COFF/bss_section.ll
@@ -0,0 +1,6 @@
+; RUN: llc -mtriple i386-pc-win32 < %s | FileCheck %s
+
+%struct.foo = type { i32, i32 }
+
+@"\01?thingy@@3Ufoo@@B" = global %struct.foo zeroinitializer, align 4
+; CHECK: .bss
diff --git a/test/MC/COFF/eh-frame.s b/test/MC/COFF/eh-frame.s
new file mode 100644
index 000000000000..e606b764d647
--- /dev/null
+++ b/test/MC/COFF/eh-frame.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -s | FileCheck %s
+
+	.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+_main:
+	.cfi_startproc
+	ret
+	.cfi_endproc
+
+// CHECK:    Name: .eh_frame
diff --git a/test/MC/COFF/feat00.s b/test/MC/COFF/feat00.s
new file mode 100644
index 000000000000..d08f407cef58
--- /dev/null
+++ b/test/MC/COFF/feat00.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -t | FileCheck %s
+
+"@feat.00" = 123
+.globl @feat.00
+
+// CHECK: Symbol {
+// CHECK:   Name: @feat.00
+// CHECK:   Value: 123
+// CHECK:   Section: (-1)
+// CHECK:   BaseType: Null (0x0)
+// CHECK:   ComplexType: Null (0x0)
+// CHECK:   StorageClass: External (0x2)
+// CHECK:   AuxSymbolCount: 0
+// CHECK: }
diff --git a/test/MC/COFF/linkonce-invalid.s b/test/MC/COFF/linkonce-invalid.s
new file mode 100644
index 000000000000..90ce4a7ad317
--- /dev/null
+++ b/test/MC/COFF/linkonce-invalid.s
@@ -0,0 +1,40 @@
+// Test invalid use of the .linkonce directive.
+//
+// RUN: not llvm-mc -triple i386-pc-win32 -filetype=obj %s 2>&1 | FileCheck %s
+
+.section non_comdat
+
+.section comdat
+.linkonce discard
+
+.section assoc
+.linkonce associative comdat
+
+
+.section invalid
+
+// CHECK: error: unrecognized COMDAT type 'unknown'
+.linkonce unknown
+
+// CHECK: error: unexpected token in directive
+.linkonce discard foo
+
+// CHECK: error: expected associated section name
+.linkonce associative
+
+// CHECK: error: cannot associate unknown section 'unknown'
+.linkonce associative unknown
+
+// CHECK: error: cannot associate a section with itself
+.linkonce associative invalid
+
+// CHECK: error: associated section must be a COMDAT section
+.linkonce associative non_comdat
+
+// CHECK: error: associated section cannot be itself associative
+.linkonce associative assoc
+
+// CHECK: error: section 'multi' is already linkonce
+.section multi
+.linkonce discard
+.linkonce same_size
diff --git a/test/MC/COFF/linkonce.s b/test/MC/COFF/linkonce.s
new file mode 100644
index 000000000000..e7b7f475a3c6
--- /dev/null
+++ b/test/MC/COFF/linkonce.s
@@ -0,0 +1,179 @@
+// Test section manipulation via .linkonce directive.
+//
+// RUN: llvm-mc -triple i386-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+
+.section s1
+.linkonce
+.long 1
+
+.section s2
+.linkonce one_only
+.long 1
+
+.section s3
+.linkonce discard
+.long 1
+
+.section s4
+.linkonce same_size
+.long 1
+
+.section s5
+.linkonce same_contents
+.long 1
+
+.section s6
+.linkonce associative s1
+.long 1
+
+.section s7
+.linkonce largest
+.long 1
+
+.section s8
+.linkonce newest
+.long 1
+
+.section .foo$bar
+.linkonce discard
+.long 1
+
+// Check that valid '.section' names can be associated.
+.section multi
+.linkonce associative .foo$bar
+.long 1
+
+
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Name: s1
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: s2
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: s3
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: s4
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: s5
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: s6
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: s7
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: s8
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: multi
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: s1
+// CHECK:     Section: s1 (1)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 1
+// CHECK:       Selection: Any (0x2)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: s2
+// CHECK:     Section: s2 (2)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 2
+// CHECK:       Selection: NoDuplicates (0x1)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: s3
+// CHECK:     Section: s3 (3)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 3
+// CHECK:       Selection: Any (0x2)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: s4
+// CHECK:     Section: s4 (4)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 4
+// CHECK:       Selection: SameSize (0x3)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: s5
+// CHECK:     Section: s5 (5)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 5
+// CHECK:       Selection: ExactMatch (0x4)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: s6
+// CHECK:     Section: s6 (6)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 1
+// CHECK:       Selection: Associative (0x5)
+// CHECK:       AssocSection: s1
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: s7
+// CHECK:     Section: s7 (7)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 7
+// CHECK:       Selection: Largest (0x6)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: s8
+// CHECK:     Section: s8 (8)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 8
+// CHECK:       Selection: Newest (0x7)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: multi
+// CHECK:     Value: 0
+// CHECK:     Section: multi (10)
+// CHECK:     AuxSectionDef {
+// CHECK:       Number: 9
+// CHECK:       Selection: Associative (0x5)
+// CHECK:       AssocSection: .foo$bar
+// CHECK:     }
+// CHECK:   }
diff --git a/test/MC/COFF/lit.local.cfg b/test/MC/COFF/lit.local.cfg
index 41a8434f9993..ba763cf03ffc 100644
--- a/test/MC/COFF/lit.local.cfg
+++ b/test/MC/COFF/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s', '.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/COFF/lset0.s b/test/MC/COFF/lset0.s
new file mode 100755
index 000000000000..f5020c83ef60
--- /dev/null
+++ b/test/MC/COFF/lset0.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-nm | FileCheck %s
+
+not_global = 123
+global = 456
+.globl global
+.Llocal = 789
+
+// CHECK-NOT: not_global
+// CHECK-NOT: Llocal
+// CHECK: global
+// CHECK-NOT: not_global
+// CHECK-NOT: Llocal
diff --git a/test/MC/COFF/rdata.ll b/test/MC/COFF/rdata.ll
new file mode 100644
index 000000000000..f0417811704c
--- /dev/null
+++ b/test/MC/COFF/rdata.ll
@@ -0,0 +1,6 @@
+; RUN: llc -mtriple i386-pc-win32 < %s | FileCheck %s
+
+%struct.foo = type { i32, i32 }
+
+@"\01?thingy@@3Ufoo@@B" = constant %struct.foo zeroinitializer, align 4
+; CHECK: .section        .rdata
diff --git a/test/MC/COFF/section-comdat.s b/test/MC/COFF/section-comdat.s
new file mode 100644
index 000000000000..dd5be871b050
--- /dev/null
+++ b/test/MC/COFF/section-comdat.s
@@ -0,0 +1,188 @@
+// RUN: llvm-mc -triple i386-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+
+.section assocSec
+.linkonce
+.long 1
+
+.section secName, "dr", discard, "Symbol1"
+.globl Symbol1
+Symbol1:
+.long 1
+
+.section secName, "dr", one_only, "Symbol2"
+.globl Symbol2
+Symbol2:
+.long 1
+
+.section SecName, "dr", same_size, "Symbol3"
+.globl Symbol3
+Symbol3:
+.long 1
+
+.section SecName, "dr", same_contents, "Symbol4"
+.globl Symbol4
+Symbol4:
+.long 1
+
+.section SecName, "dr", associative assocSec, "Symbol5"
+.globl Symbol5
+Symbol5:
+.long 1
+
+.section SecName, "dr", largest, "Symbol6"
+.globl Symbol6
+Symbol6:
+.long 1
+
+.section SecName, "dr", newest, "Symbol7"
+.globl Symbol7
+Symbol7:
+.long 1
+
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number: 1
+// CHECK:     Name: assocSec
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 2
+// CHECK:     Name: secName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 3
+// CHECK:     Name: secName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 4
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 5
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 6
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 7
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 8
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: assocSec
+// CHECK:     Section: assocSec (1)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Any
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: secName
+// CHECK:     Section: secName (2)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Any
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: secName
+// CHECK:     Section: secName (3)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: NoDuplicates
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (4)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: SameSize
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (5)
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: ExactMatch
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (6)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Associative
+// CHECK:       AssocSection: assocSec (1)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (7)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Largest
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (8)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Newest (0x7)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol1
+// CHECK:     Section: secName (2)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol2
+// CHECK:     Section: secName (3)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol3
+// CHECK:     Section: SecName (4)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol4
+// CHECK:     Section: SecName (5)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol5
+// CHECK:     Section: SecName (6)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol6
+// CHECK:     Section: SecName (7)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol7
+// CHECK:     Section: SecName (8)
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/section-invalid-flags.s b/test/MC/COFF/section-invalid-flags.s
new file mode 100644
index 000000000000..17b1550a904e
--- /dev/null
+++ b/test/MC/COFF/section-invalid-flags.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -triple i386-pc-win32 -filetype=obj %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple x86_64-pc-win32 -filetype=obj %s 2>&1 | FileCheck %s
+
+// CHECK: error: conflicting section flags 'b' and 'd'
+.section s_db,"db"; .long 1
+
+// CHECK: error: conflicting section flags 'b' and 'd'
+.section s_bd,"bd"; .long 1
diff --git a/test/MC/COFF/section-name-encoding.s b/test/MC/COFF/section-name-encoding.s
new file mode 100644
index 000000000000..0f531f397565
--- /dev/null
+++ b/test/MC/COFF/section-name-encoding.s
@@ -0,0 +1,62 @@
+// Check that COFF section names are properly encoded.
+//
+// Encodings for different lengths:
+//   [0, 8]:               raw name
+//   (8, 999999]:          base 10 string table index (/9999999)
+//
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s | FileCheck %s
+
+// Raw encoding
+
+// CHECK:   Section {
+// CHECK:     Number: 1
+// CHECK:     Name: s (73 00 00 00 00 00 00 00)
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 2
+// CHECK:     Name: s1234567 (73 31 32 33 34 35 36 37)
+// CHECK:   }
+.section s;        .long 1
+.section s1234567; .long 1
+
+
+// Base 10 encoding
+
+// /4
+// CHECK:   Section {
+// CHECK:     Number: 3
+// CHECK:     Name: s12345678 (2F 34 00 00 00 00 00 00)
+// CHECK:   }
+.section s12345678; .long 1
+
+
+// Generate padding sections to increase the string table size to at least
+// 1,000,000 bytes.
+.macro pad_sections2 pad
+  // 10x \pad
+  .section p0\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad; .long 1
+  .section p1\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad; .long 1
+  .section p2\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad; .long 1
+  .section p3\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad; .long 1
+  .section p4\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad; .long 1
+.endm
+
+.macro pad_sections pad
+  // 20x \pad
+  pad_sections2 \pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad
+.endm
+
+// 1000x 'a'
+pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+
+// /1000029 == 4 + 10 + (5 * (2 + (20 * 10 * 1000) + 1))
+//             v   |     |    v    ~~~~~~~~~~~~~~    v
+//    table size   v     v   "p0"        pad         NUL seperator
+//     "s12345678\0"     # of pad sections
+//
+// CHECK:   Section {
+// CHECK:     Number: 9
+// CHECK:     Name: seven_digit (2F 31 30 30 30 30 32 39)
+// CHECK:   }
+.section seven_digit; .long 1
diff --git a/test/MC/COFF/section.s b/test/MC/COFF/section.s
new file mode 100644
index 000000000000..d7547e626eb3
--- /dev/null
+++ b/test/MC/COFF/section.s
@@ -0,0 +1,170 @@
+// RUN: llvm-mc -triple i386-pc-win32 -filetype=obj %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s | FileCheck %s
+
+.section .foo$bar; .long 1
+.section .foo@bar; .long 1
+.section ABCDEFGHIJKLMNOPQRSTUVWXYZ; .long 1
+.section abcdefghijklmnopqrstuvwxyz; .long 1
+.section _0123456789; .long 1
+
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Name: .foo$bar
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: .foo@bar
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: ABCDEFGHIJKLMNOPQRSTUVWXYZ
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: abcdefghijklmnopqrstuvwxyz
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: _0123456789
+// CHECK:   }
+
+// Test that the defaults are used
+.section s      ; .long 1
+.section s_, "" ; .long 1
+.section s_a,"a"; .long 1
+.section s_b,"b"; .long 1
+.section s_d,"d"; .long 1
+.section s_n,"n"; .long 1
+.section s_r,"r"; .long 1
+.section s_s,"s"; .long 1
+.section s_w,"w"; .long 1
+.section s_x,"x"; .long 1
+.section s_y,"y"; .long 1
+
+// CHECK:        Section {
+// CHECK:          Name: s
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_a
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_b
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_UNINITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_d
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_n
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_LNK_REMOVE
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_r
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_s
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_SHARED
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_w
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_x
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_CODE
+// CHECK-NEXT:       IMAGE_SCN_MEM_EXECUTE
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:        }
+// CHECK:        Section {
+// CHECK:          Name: s_y
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:     ]
+// CHECK:        }
+
+// w makes read-only to readable
+.section s_rw,"rw"; .long 1
+// CHECK:        Section {
+// CHECK:          Name: s_rw
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+// CHECK-NEXT:     ]
+// CHECK:        }
+
+// r cancels w
+.section s_wr,"wr"; .long 1
+// CHECK:        Section {
+// CHECK:          Name: s_wr
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:        }
+
+// y cancels both
+.section s_rwy,"rwy"; .long 1
+// CHECK:        Section {
+// CHECK:          Name: s_rwy
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_1BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:     ]
+// CHECK:        }
+
+// CHECK:      ]
diff --git a/test/MC/COFF/seh-align1.s b/test/MC/COFF/seh-align1.s
new file mode 100644
index 000000000000..aafc6ede0851
--- /dev/null
+++ b/test/MC/COFF/seh-align1.s
@@ -0,0 +1,65 @@
+// This test checks the alignment and padding of the unwind info.
+
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -sd -sr -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 8
+// CHECK:          RelocationCount: 0
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 01000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .pdata
+// CHECK:          RawDataSize: 12
+// CHECK:          RelocationCount: 3
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[BeginDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:       [[EndDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:       [[UnwindDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 00000000 01000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        UnwindInformation [
+// CHECK-NEXT:     RuntimeFunction {
+// CHECK-NEXT:     StartAddress: smallFunc {{(\+0x[A-F0-9]+ )?}}([[BeginDisp]])
+// CHECK-NEXT:     EndAddress: smallFunc {{(\+0x[A-F0-9]+ )?}}([[EndDisp]])
+// CHECK-NEXT:     UnwindInfoAddress: .xdata {{(\+0x[A-F0-9]+ )?}}([[UnwindDisp]])
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 0
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// Generate the minimal unwind info.
+// It contains only the version set to 1. All other bytes are 0.
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+smallFunc:
+    ret
+    .seh_endproc
diff --git a/test/MC/COFF/seh-align2.s b/test/MC/COFF/seh-align2.s
new file mode 100644
index 000000000000..5e6c49a720ce
--- /dev/null
+++ b/test/MC/COFF/seh-align2.s
@@ -0,0 +1,78 @@
+// This test checks the alignment and padding of the unwind info.
+
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -sd -sr -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 16
+// CHECK:          RelocationCount: 1
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[HandlerDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB __C_specific_handler
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 09000100 04220000 00000000 BEBAFECA
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK:          Name: .pdata
+// CHECK:          RawDataSize: 12
+// CHECK:          RelocationCount: 3
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[BeginDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[EndDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[UnwindDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: FCFFFFFF 05000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK:      UnwindInformation [
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: func {{(\+0x[A-F0-9]+ )?}}([[BeginDisp]])
+// CHECK-NEXT:     EndAddress: func {{(\+0x[A-F0-9]+ )?}}([[EndDisp]])
+// CHECK-NEXT:     UnwindInfoAddress: .xdata {{(\+0x[A-F0-9]+ )?}}([[UnwindDisp]])
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ExceptionHandler
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 1
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:         0x04: ALLOC_SMALL size=24
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Handler: __C_specific_handler ([[HandlerDisp]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// Generates only one unwind code.
+// Requires padding of the unwind code array.
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+    subq $24, %rsp
+    .seh_stackalloc 24
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0xcafebabe
+    .text
+    .seh_endprologue
+func:
+    addq $24, %rsp
+    ret
+    .seh_endproc
diff --git a/test/MC/COFF/seh-align3.s b/test/MC/COFF/seh-align3.s
new file mode 100644
index 000000000000..238b5de35645
--- /dev/null
+++ b/test/MC/COFF/seh-align3.s
@@ -0,0 +1,83 @@
+// This test checks the alignment and padding of the unwind info.
+
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -sd -sr -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 16
+// CHECK:          RelocationCount: 1
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[HandlerDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB __C_specific_handler
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: 19000200 04D002C0 00000000 BEBAFECA
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK:          Name: .pdata
+// CHECK:          RawDataSize: 12
+// CHECK:          RelocationCount: 3
+// CHECK:          Characteristics [
+// CHECK-NEXT:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:       IMAGE_SCN_MEM_READ
+// CHECK-NEXT:     ]
+// CHECK:          Relocations [
+// CHECK-NEXT:       [[BeginDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[EndDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:       [[UnwindDisp:0x[A-F0-9]+]] IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     ]
+// CHECK:          SectionData (
+// CHECK-NEXT:       0000: FCFFFFFF 05000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK:      UnwindInformation [
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: func {{(\+0x[A-F0-9]+ )?}}([[BeginDisp]])
+// CHECK-NEXT:     EndAddress: func {{(\+0x[A-F0-9]+ )?}}([[EndDisp]])
+// CHECK-NEXT:     UnwindInfoAddress: .xdata {{(\+0x[A-F0-9]+ )?}}([[UnwindDisp]])
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ExceptionHandler
+// CHECK-NEXT:         TerminateHandler
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 2
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:         0x04: PUSH_NONVOL reg=R13
+// CHECK-NEXT:         0x02: PUSH_NONVOL reg=R12
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Handler: __C_specific_handler ([[HandlerDisp]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// Generates two unwind codes.
+// Requires no padding of the unwind code array.
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+    push %r12
+    .seh_pushreg 12
+    push %r13
+    .seh_pushreg 13
+    .seh_handler __C_specific_handler, @except, @unwind
+    .seh_handlerdata
+    .long 0xcafebabe
+    .text
+    .seh_endprologue
+func:
+    pop %r13
+    pop %r12
+    ret
+    .seh_endproc
diff --git a/test/MC/COFF/seh-section.s b/test/MC/COFF/seh-section.s
index 7f05cc372e07..026c0d733484 100644
--- a/test/MC/COFF/seh-section.s
+++ b/test/MC/COFF/seh-section.s
@@ -1,7 +1,6 @@
 // This test ensures that, if the section containing a function has a suffix
 // (e.g. .text$foo), its unwind info section also has a suffix (.xdata$foo).
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s -sd | FileCheck %s
-// XFAIL: *
 
 // CHECK:      Name: .xdata$foo
 // CHECK-NEXT: VirtualSize
@@ -16,7 +15,6 @@
 // CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
 // CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
 // CHECK-NEXT:   IMAGE_SCN_MEM_READ
-// CHECK-NEXT:   IMAGE_SCN_MEM_WRITE
 // CHECK-NEXT: ]
 // CHECK-NEXT: SectionData (
 // CHECK-NEXT:   0000: 01050200 05500402
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
index bef425efb437..72d42f4b2ea3 100644
--- a/test/MC/COFF/seh.s
+++ b/test/MC/COFF/seh.s
@@ -1,8 +1,6 @@
 // This test checks that the SEH directives emit the correct unwind data.
 
-// TODO: Expected fail because SET_FPREG has a wrong offset.
-// XFAIL: *
-// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -u | FileCheck %s
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -u -r | FileCheck %s
 
 // CHECK:      Sections [
 // CHECK:        Section {
@@ -36,6 +34,27 @@
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
 
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT:   Section (2) .xdata {
+// CHECK-NEXT:     0x14 IMAGE_REL_AMD64_ADDR32NB __C_specific_handler
+// CHECK-NEXT:     0x20 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x24 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x28 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section (3) .pdata {
+// CHECK-NEXT:     0x0 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x4 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x8 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     0xC IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x10 IMAGE_REL_AMD64_ADDR32NB func
+// CHECK-NEXT:     0x14 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:     0x18 IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:     0x1C IMAGE_REL_AMD64_ADDR32NB smallFunc
+// CHECK-NEXT:     0x20 IMAGE_REL_AMD64_ADDR32NB .xdata
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+
 // CHECK:      UnwindInformation [
 // CHECK-NEXT:   RuntimeFunction {
 // CHECK-NEXT:     StartAddress: [[CodeSect1:[^ ]+]] [[BeginDisp1:(\+0x[A-F0-9]+)?]]
diff --git a/test/MC/COFF/tricky-names.ll b/test/MC/COFF/tricky-names.ll
new file mode 100644
index 000000000000..6e041d38d345
--- /dev/null
+++ b/test/MC/COFF/tricky-names.ll
@@ -0,0 +1,38 @@
+; Check how tricky symbols are printed in the asm output.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s --check-prefix=ASM
+
+; Check that we can roundtrip these names through our assembler.
+; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -t | FileCheck %s --check-prefix=READOBJ
+
+
+@"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" = global i32 0
+@"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4" = global i32 0
+@"\01@foo.bar" = global i32 0
+
+define weak i32 @"\01??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"() section ".text" {
+  %a = load i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ"
+  %b = load i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
+  %c = load i32* @"\01@foo.bar"
+  %x = add i32 %a, %b
+  %y = add i32 %x, %c
+  ret i32 %y
+}
+
+; Check that these symbols are not quoted. They occur in output that gets passed to GAS.
+; ASM: .globl __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4
+; ASM-NOT: .globl "__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
+; ASM: .globl @foo.bar
+; ASM-NOT: .globl "@foo.bar"
+
+; READOBJ: Symbol
+; READOBJ: Name: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
+; READOBJ: Symbol
+; READOBJ: Name: ??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ
+; READOBJ: Symbol
+; READOBJ: Name: __ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4
+; READOBJ: Symbol
+; READOBJ: Name: @foo.bar
diff --git a/test/MC/COFF/weak-symbol-section-specification.ll b/test/MC/COFF/weak-symbol-section-specification.ll
deleted file mode 100644
index 4772c929f29e..000000000000
--- a/test/MC/COFF/weak-symbol-section-specification.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; The purpose of this test is to verify that weak linkage type is not ignored by backend,
-; if section was specialized.
-
-; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | llvm-readobj -s -sd | FileCheck %s
-
-@a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
-
-; CHECK:           Name:                      .data$a
-; CHECK-NEXT:      VirtualSize:               0
-; CHECK-NEXT:      VirtualAddress:            0
-; CHECK-NEXT:      RawDataSize:               {{[0-9]+}}
-; CHECK-NEXT:      PointerToRawData:          0x{{[0-9A-F]+}}
-; CHECK-NEXT:      PointerToRelocations:      0x0
-; CHECK-NEXT:      PointerToLineNumbers:      0x0
-; CHECK-NEXT:      RelocationCount:           0
-; CHECK-NEXT:      LineNumberCount:           0
-; CHECK-NEXT:      Characteristics [ (0x40401040)
-; CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
-; CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
-; CHECK-NEXT:        IMAGE_SCN_LNK_COMDAT
-; CHECK-NEXT:        IMAGE_SCN_MEM_READ
-; CHECK-NEXT:      ]
-; CHECK-NEXT:      SectionData (
-; CHECK-NEXT:        0000: 00000000 00000000 00000000
-; CHECK-NEXT:      )
diff --git a/test/MC/COFF/weak-symbol.ll b/test/MC/COFF/weak-symbol.ll
new file mode 100644
index 000000000000..7f2e87cb75a7
--- /dev/null
+++ b/test/MC/COFF/weak-symbol.ll
@@ -0,0 +1,44 @@
+; Test that weak functions and globals are placed into selectany COMDAT
+; sections with the mangled name as suffix. Ensure that the weak linkage
+; type is not ignored by the backend if the section was specialized.
+;
+; RUN: llc -mtriple=i686-pc-win32 %s     -o - | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=i686-pc-mingw32 %s   -o - | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-win32 %s   -o - | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=x86_64-pc-mingw32 %s -o - | FileCheck %s --check-prefix=X64
+
+; Mangled function
+; X86: .section .text$_Z3foo
+; X86: .linkonce discard
+; X86: .globl __Z3foo
+;
+; X64: .section .text$_Z3foo
+; X64: .linkonce discard
+; X64: .globl _Z3foo
+define weak void @_Z3foo() {
+  ret void
+}
+
+; Unmangled function
+; X86: .section .sect$f
+; X86: .linkonce discard
+; X86: .globl _f
+;
+; X64: .section .sect$f
+; X64: .linkonce discard
+; X64: .globl f
+define weak void @f() section ".sect" {
+  ret void
+}
+
+; Weak global
+; X86: .section .data$a
+; X86: .linkonce discard
+; X86: .globl _a
+; X86: .zero 12
+;
+; X64: .section .data$a
+; X64: .linkonce discard
+; X64: .globl a
+; X64: .zero 12
+@a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
diff --git a/test/MC/Disassembler/AArch64/a64-ignored-fields.txt b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
index 966530d36a33..799ecdfdcdf3 100644
--- a/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
+++ b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=fp-armv8 -disassemble -show-encoding < %s | FileCheck %s
 
 # The "Rm" bits are ignored, but the canonical representation has them filled
 # with 0s. This is what we should produce even if the input bit-pattern had
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index 4fa2d5078b2f..40926b1fddc2 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
 
 #------------------------------------------------------------------------------
 # Add/sub (immediate)
diff --git a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
index adb8f75ed990..53638638d58c 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 #------------------------------------------------------------------------------
 # Load-store exclusive
diff --git a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
index 775660bba8a3..637ebdbdba8a 100644
--- a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
+++ b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 # None of these instructions should be classified as unpredictable:
 
diff --git a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
index 48ea8170ba9e..f52d37ffda9b 100644
--- a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
+++ b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 # None of these instructions should be classified as unpredictable:
 
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
index f9df30e4d3df..9a66a00189ea 100644
--- a/test/MC/Disassembler/AArch64/lit.local.cfg
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
new file mode 100644
index 000000000000..863730ac6be8
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -0,0 +1,2638 @@
+# RUN: llvm-mc  -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Vector Integer Add/Sub
+#------------------------------------------------------------------------------
+# CHECK: add v31.8b, v31.8b, v31.8b
+# CHECK: sub v0.2d, v0.2d, v0.2d
+0xff 0x87 0x3f 0x0e
+0x00 0x84 0xe0 0x6e
+
+#------------------------------------------------------------------------------
+# Vector Floating-Point Add/Sub
+#------------------------------------------------------------------------------
+
+# CHECK: fadd v0.4s, v0.4s, v0.4s
+# CHECK: fsub v31.2s, v31.2s, v31.2s
+0x00 0xd4 0x20 0x4e
+0xff 0xd7 0xbf 0x0e
+
+#------------------------------------------------------------------------------
+# Vector Integer Mul
+#------------------------------------------------------------------------------
+# CHECK: mul v0.8b, v1.8b, v2.8b
+0x20 0x9c 0x22 0x0e
+
+#------------------------------------------------------------------------------
+# Vector Floating-Point Mul/Div
+#------------------------------------------------------------------------------
+# CHECK: fmul v0.2s, v1.2s, v2.2s
+# CHECK: fdiv v31.2s, v31.2s, v31.2s
+0x20 0xdc 0x22 0x2e
+0xff 0xff 0x3f 0x2e
+
+#----------------------------------------------------------------------
+# Vector Polynomial Multiply
+#----------------------------------------------------------------------
+# CHECK: pmul v0.8b, v15.8b, v16.8b
+# CHECK: pmul v31.16b, v7.16b, v8.16b
+0xe0 0x9d 0x30 0x2e
+0xff 0x9c 0x28 0x6e
+
+#------------------------------------------------------------------------------
+# Vector And, Orr, Eor, Orn, Bic
+#------------------------------------------------------------------------------
+# CHECK: and v2.8b, v2.8b, v2.8b
+# CHECK: orr v31.16b, v31.16b, v30.16b
+# CHECK: eor v0.16b, v1.16b, v2.16b
+# CHECK: orn v9.16b, v10.16b, v11.16b
+# CHECK: bic v31.8b, v30.8b, v29.8b
+0x42 0x1c 0x22 0x0e
+0xff 0x1f 0xbe 0x4e
+0x20 0x1c 0x22 0x6e
+0x49 0x1d 0xeb 0x4e
+0xdf 0x1f 0x7d 0x0e
+
+#------------------------------------------------------------------------------
+# Vector Bsl, Bit, Bif
+#------------------------------------------------------------------------------
+# CHECK: bsl v0.8b, v1.8b, v2.8b
+# CHECK: bit v31.16b, v31.16b, v31.16b
+# CHECK: bif v0.16b, v1.16b, v2.16b
+0x20 0x1c 0x62 0x2e
+0xff 0x1f 0xbf 0x6e
+0x20 0x1c 0xe2 0x6e
+
+
+#------------------------------------------------------------------------------
+# Vector Integer Multiply-accumulate and Multiply-subtract
+#------------------------------------------------------------------------------
+# CHECK: mla v0.8b, v1.8b, v2.8b
+# CHECK: mls v31.4h, v31.4h, v31.4h
+0x20 0x94 0x22 0x0e
+0xff 0x97 0x7f 0x2e
+
+#------------------------------------------------------------------------------
+# Vector Floating-Point Multiply-accumulate and Multiply-subtract
+#------------------------------------------------------------------------------
+# CHECK: fmla v0.2s, v1.2s, v2.2s
+# CHECK: fmls v31.2s, v31.2s, v31.2s
+0x20 0xcc 0x22 0x0e
+0xff 0xcf 0xbf 0x0e
+
+#------------------------------------------------------------------------------
+# Vector Move Immediate Shifted
+# Vector Move Inverted Immediate Shifted
+# Vector Bitwise Bit Clear (AND NOT) - immediate
+# Vector Bitwise OR - immedidate
+#------------------------------------------------------------------------------
+# CHECK: movi v31.4s, #0xff, lsl #24
+# CHECK: mvni v0.2s, #0x0
+# CHECK: bic v15.4h, #0xf, lsl #8
+# CHECK: orr v16.8h, #0x1f
+0xff 0x67 0x07 0x4f
+0x00 0x04 0x00 0x2f
+0xef 0xb5 0x00 0x2f
+0xf0 0x97 0x00 0x4f
+
+#------------------------------------------------------------------------------
+# Vector Move Immediate Masked
+# Vector Move Inverted Immediate Masked
+#------------------------------------------------------------------------------
+# CHECK: movi v8.2s, #0x8, msl #8
+# CHECK: mvni v16.4s, #0x10, msl #16
+0x08 0xc5 0x00 0x0f
+0x10 0xd6 0x00 0x6f
+
+#------------------------------------------------------------------------------
+# Vector Immediate - per byte
+# Vector Move Immediate - bytemask, per doubleword
+# Vector Move Immediate - bytemask, one doubleword
+#------------------------------------------------------------------------------
+# CHECK: movi v16.8b, #0xff
+# CHECK: movi v31.16b, #0x1f
+# CHECK: movi d15, #0xff00ff00ff00ff
+# CHECK: movi v31.2d, #0xff0000ff0000ffff
+0xf0 0xe7 0x07 0x0f
+0xff 0xe7 0x00 0x4f
+0xaf 0xe6 0x02 0x2f
+0x7f 0xe6 0x04 0x6f
+
+#------------------------------------------------------------------------------
+# Vector Floating Point Move Immediate
+#------------------------------------------------------------------------------
+# CHECK: fmov v0.2s, #13.0
+# CHECK: fmov v15.4s, #1.0
+# CHECK: fmov v31.2d, #-1.25
+0x40 0xf5 0x01 0x0f
+0x0f 0xf6 0x03 0x4f
+0x9f 0xf6 0x07 0x6f
+
+#------------------------------------------------------------------------------
+# Vector Move - register
+#------------------------------------------------------------------------------
+
+# FIXME: these should print as "mov", but TableGen can't handle it.
+
+# CHECK: orr v1.16b, v15.16b, v15.16b
+# CHECK: orr v25.8b, v4.8b, v4.8b
+0xe1 0x1d 0xaf 0x4e
+0x99 0x1c 0xa4 0x0e
+
+#----------------------------------------------------------------------
+# Vector Absolute Difference and Accumulate (Signed, Unsigned)
+# Vector Absolute Difference (Signed, Unsigned)
+# Vector Absolute Difference (Floating Point)
+#----------------------------------------------------------------------
+
+# CHECK: uaba v0.8b, v1.8b, v2.8b
+# CHECK: saba v31.16b, v30.16b, v29.16b
+# CHECK: uabd v15.4h, v16.4h, v17.4h
+# CHECK: sabd v5.4h, v4.4h, v6.4h
+# CHECK: fabd v1.4s, v31.4s, v16.4s
+0x20 0x7c 0x22 0x2e
+0xdf 0x7f 0x3d 0x4e
+0x0f 0x76 0x71 0x2e
+0x85 0x74 0x66 0x0e
+0xe1 0xd7 0xb0 0x6e
+
+#----------------------------------------------------------------------
+# Scalar Integer Add
+# Scalar Integer Sub
+#----------------------------------------------------------------------
+
+# CHECK: add d17, d31, d29
+# CHECK: sub d15, d5, d16
+0xf1 0x87 0xfd 0x5e
+0xaf 0x84 0xf0 0x7e
+
+#----------------------------------------------------------------------
+# Vector Reciprocal Square Root Step (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: frsqrts v31.2d, v15.2d, v8.2d
+0xff 0xfd 0xe8 0x4e
+
+#----------------------------------------------------------------------
+# Vector Reciprocal Step (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: frecps  v5.4s, v7.4s, v16.4s
+0xe5 0xfc 0x30 0x4e
+
+#----------------------------------------------------------------------
+# Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: facge v0.4s, v31.4s, v16.4s
+0xe0 0xef 0x30 0x6e
+
+#----------------------------------------------------------------------
+# Vector Absolute Compare Mask Less Than (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: facgt v31.2d, v29.2d, v28.2d
+0xbf 0xef 0xfc 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Equal (Integer)
+#----------------------------------------------------------------------
+# CHECK: cmeq v5.16b, v15.16b, v31.16b
+0xe5 0x8d 0x3f 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Higher or Same (Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: cmhs v1.8b, v16.8b, v30.8b
+0x01 0x3e 0x3e 0x2e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than or Equal (Integer)
+#----------------------------------------------------------------------
+# CHECK: cmge v20.4h, v11.4h, v23.4h
+0x74 0x3d 0x77 0x0e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Higher (Unsigned Integer)
+# CHECK: cmhi v13.8h, v3.8h, v27.8h
+0x6d 0x34 0x7b 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than (Integer)
+#----------------------------------------------------------------------
+# CHECK: cmgt v9.4s, v4.4s, v28.4s
+0x89 0x34 0xbc 0x4e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Bitwise Test (Integer)
+#----------------------------------------------------------------------
+# CHECK: cmtst v21.2s, v19.2s, v18.2s
+0x75 0x8e 0xb2 0x0e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Equal (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmeq v0.2s, v15.2s, v16.2s
+0xe0 0xe5 0x30 0x0e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than Or Equal (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmge v31.4s, v7.4s, v29.4s
+0xff 0xe4 0x3d 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmgt v17.4s, v8.4s, v25.4s
+0x11 0xe5 0xb9 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Equal to Zero (Integer)
+#----------------------------------------------------------------------
+# CHECK: cmeq v31.16b, v15.16b, #0x0
+0xff 0x99 0x20 0x4e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
+#----------------------------------------------------------------------
+# CHECK: cmge v3.8b, v15.8b, #0x0
+0xe3 0x89 0x20 0x2e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than Zero (Signed Integer)
+#----------------------------------------------------------------------
+# CHECK: cmgt v22.2s, v9.2s, #0x0
+0x36 0x89 0xa0 0x0e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
+#----------------------------------------------------------------------
+# CHECK: cmle v5.2d, v14.2d, #0x0
+0xc5 0x99 0xe0 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Less Than Zero (Signed Integer)
+#----------------------------------------------------------------------
+# CHECK: cmlt v13.8h, v11.8h, #0x0
+0x6d 0xa9 0x60 0x4e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Equal to Zero (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmeq v15.2s, v21.2s, #0.0
+0xaf 0xda 0xa0 0x0e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmge v14.2d, v13.2d, #0.0
+0xae 0xc9 0xe0 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Greater Than Zero (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmgt v9.4s, v23.4s, #0.0
+0xe9 0xca 0xa0 0x4e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Less Than or Equal To Zero (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmle v11.2d, v6.2d, #0.0
+0xcb 0xd8 0xe0 0x6e
+
+#----------------------------------------------------------------------
+# Vector Compare Mask Less Than Zero (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fcmlt v12.4s, v25.4s, #0.0
+0x2c 0xeb 0xa0 0x4e
+
+
+#------------------------------------------------------------------------------
+# Vector Integer Halving Add (Signed)
+# Vector Integer Halving Add (Unsigned)
+# Vector Integer Halving Sub (Signed)
+# Vector Integer Halving Sub (Unsigned)
+#------------------------------------------------------------------------------
+# CHECK: shadd v0.8b, v31.8b, v29.8b
+# CHECK: uhadd v15.16b, v16.16b, v17.16b
+# CHECK: shsub v0.4h, v1.4h, v2.4h
+# CHECK: uhadd v5.8h, v7.8h, v8.8h
+# CHECK: shsub v9.2s, v11.2s, v21.2s
+# CHECK: uhsub v22.4s, v30.4s, v19.4s
+0xe0 0x07 0x3d 0x0e
+0x0f 0x06 0x31 0x6e
+0x20 0x24 0x62 0x0e
+0xe5 0x04 0x68 0x6e
+0x69 0x25 0xb5 0x0e
+0xd6 0x27 0xb3 0x6e
+
+#------------------------------------------------------------------------------
+# Vector Integer Rouding Halving Add (Signed)
+# Vector Integer Rouding Halving Add (Unsigned)
+#------------------------------------------------------------------------------
+# CHECK: srhadd v3.8b, v5.8b, v7.8b
+# CHECK: urhadd v7.16b, v17.16b, v27.16b
+# CHECK: srhadd v10.4h, v11.4h, v13.4h
+# CHECK: urhadd v1.8h, v2.8h, v3.8h
+# CHECK: srhadd v4.2s, v5.2s, v6.2s
+# CHECK: urhadd v7.4s, v7.4s, v7.4s
+0xa3 0x14 0x27 0x0e
+0x27 0x16 0x3b 0x6e
+0x6a 0x15 0x6d 0x0e
+0x41 0x14 0x63 0x6e
+0xa4 0x14 0xa6 0x0e
+0xe7 0x14 0xa7 0x6e
+
+#------------------------------------------------------------------------------
+# Vector Integer Saturating Add (Signed)
+# Vector Integer Saturating Add (Unsigned)
+# Vector Integer Saturating Sub (Signed)
+# Vector Integer Saturating Sub (Unsigned)
+#------------------------------------------------------------------------------
+# CHECK: sqsub v0.8b, v1.8b, v2.8b
+# CHECK: sqadd v0.16b, v1.16b, v2.16b
+# CHECK: uqsub v0.4h, v1.4h, v2.4h
+# CHECK: uqadd v0.8h, v1.8h, v2.8h
+# CHECK: sqadd v0.2s, v1.2s, v2.2s
+# CHECK: sqsub v0.4s, v1.4s, v2.4s
+# CHECK: sqsub v0.2d, v1.2d, v2.2d
+0x20 0x2c 0x22 0x0e
+0x20 0x0c 0x22 0x4e
+0x20 0x2c 0x62 0x2e
+0x20 0x0c 0x62 0x6e
+0x20 0x0c 0xa2 0x0e
+0x20 0x2c 0xa2 0x4e
+0x20 0x2c 0xe2 0x4e
+
+#------------------------------------------------------------------------------
+# Scalar Integer Saturating Add (Signed)
+# Scalar Integer Saturating Add (Unsigned)
+# Scalar Integer Saturating Sub (Signed)
+# Scalar Integer Saturating Add (Unsigned)
+#------------------------------------------------------------------------------
+# CHECK: sqadd b20, b11, b15
+# CHECK: uqadd h0, h1, h5
+# CHECK: sqsub s20, s10, s7
+# CHECK: uqsub d16, d16, d16
+0x74 0x0d 0x2f 0x5e
+0x20 0x0c 0x65 0x7e
+0x54 0x2d 0xa7 0x5e
+0x10 0x2e 0xf0 0x7e
+
+
+#----------------------------------------------------------------------
+# Vector Shift Left (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: sshl v10.8b, v15.8b, v22.8b
+# CHECK: ushl v10.16b, v5.16b, v2.16b
+# CHECK: sshl v10.4h, v15.4h, v22.4h
+# CHECK: ushl v10.8h, v5.8h, v2.8h
+# CHECK: sshl v10.2s, v15.2s, v22.2s
+# CHECK: ushl v10.4s, v5.4s, v2.4s
+# CHECK: sshl v0.2d, v1.2d, v2.2d
+0xea 0x45 0x36 0x0e
+0xaa 0x44 0x22 0x6e
+0xea 0x45 0x76 0x0e
+0xaa 0x44 0x62 0x6e
+0xea 0x45 0xb6 0x0e
+0xaa 0x44 0xa2 0x6e
+0x20 0x44 0xe2 0x4e
+
+#----------------------------------------------------------------------
+# Vector Saturating Shift Left (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: sqshl v1.8b, v15.8b, v22.8b
+# CHECK: uqshl v2.16b, v14.16b, v23.16b
+# CHECK: sqshl v3.4h, v13.4h, v24.4h
+# CHECK: uqshl v4.8h, v12.8h, v25.8h
+# CHECK: sqshl v5.2s, v11.2s, v26.2s
+# CHECK: uqshl v6.4s, v10.4s, v27.4s
+# CHECK: uqshl v0.2d, v1.2d, v2.2d
+0xe1 0x4d 0x36 0x0e
+0xc2 0x4d 0x37 0x6e
+0xa3 0x4d 0x78 0x0e
+0x84 0x4d 0x79 0x6e
+0x65 0x4d 0xba 0x0e
+0x46 0x4d 0xbb 0x6e
+0x20 0x4c 0xe2 0x6e
+
+#----------------------------------------------------------------------
+# Vector Rouding Shift Left (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: srshl v10.8b, v5.8b, v22.8b
+# CHECK: urshl v10.16b, v5.16b, v2.16b
+# CHECK: srshl v1.4h, v5.4h, v31.4h
+# CHECK: urshl v1.8h, v5.8h, v2.8h
+# CHECK: srshl v10.2s, v15.2s, v2.2s
+# CHECK: urshl v1.4s, v5.4s, v2.4s
+# CHECK: urshl v0.2d, v1.2d, v2.2d
+0xaa 0x54 0x36 0x0e
+0xaa 0x54 0x22 0x6e
+0xa1 0x54 0x7f 0x0e
+0xa1 0x54 0x62 0x6e
+0xea 0x55 0xa2 0x0e
+0xa1 0x54 0xa2 0x6e
+0x20 0x54 0xe2 0x6e
+
+#----------------------------------------------------------------------
+# Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: sqrshl v1.8b, v15.8b, v22.8b
+# CHECK: uqrshl v2.16b, v14.16b, v23.16b
+# CHECK: sqrshl v3.4h, v13.4h, v24.4h
+# CHECK: uqrshl v4.8h, v12.8h, v25.8h
+# CHECK: sqrshl v5.2s, v11.2s, v26.2s
+# CHECK: uqrshl v6.4s, v10.4s, v27.4s
+# CHECK: uqrshl v6.4s, v10.4s, v27.4s
+0xe1 0x5d 0x36 0x0e
+0xc2 0x5d 0x37 0x6e
+0xa3 0x5d 0x78 0x0e
+0x84 0x5d 0x79 0x6e
+0x65 0x5d 0xba 0x0e
+0x46 0x5d 0xbb 0x6e
+0x46 0x5d 0xbb 0x6e
+
+#----------------------------------------------------------------------
+# Scalar Integer Shift Left (Signed, Unsigned)
+#----------------------------------------------------------------------
+# CHECK: sshl d31, d31, d31
+# CHECK: ushl d0, d0, d0
+0xff 0x47 0xff 0x5e
+0x00 0x44 0xe0 0x7e
+
+#----------------------------------------------------------------------
+# Scalar Integer Saturating Shift Left (Signed, Unsigned)
+#----------------------------------------------------------------------
+# CHECK: sqshl d31, d31, d31
+# CHECK: uqshl s23, s20, s16
+# CHECK: sqshl h3, h4, h15
+# CHECK: uqshl b11, b20, b30
+0xff 0x4f 0xff 0x5e
+0x97 0x4e 0xb0 0x7e
+0x83 0x4c 0x6f 0x5e
+0x8b 0x4e 0x3e 0x7e
+
+#----------------------------------------------------------------------
+# Scalar Integer Rouding Shift Left (Signed, Unsigned)
+#----------------------------------------------------------------------
+# CHECK: srshl d16, d16, d16
+# CHECK: urshl d8, d7, d4
+0x10 0x56 0xf0 0x5e
+0xe8 0x54 0xe4 0x7e
+
+#----------------------------------------------------------------------
+# Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+#----------------------------------------------------------------------
+# CHECK: sqrshl d31, d31, d31
+# CHECK: uqrshl s23, s20, s16
+# CHECK: sqrshl h3, h4, h15
+# CHECK: uqrshl b11, b20, b30
+0xff 0x5f 0xff 0x5e
+0x97 0x5e 0xb0 0x7e
+0x83 0x5c 0x6f 0x5e
+0x8b 0x5e 0x3e 0x7e
+
+#----------------------------------------------------------------------
+# Vector Maximum (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: smax v1.8b, v15.8b, v22.8b
+# CHECK: umax v2.16b, v14.16b, v23.16b
+# CHECK: smax v3.4h, v13.4h, v24.4h
+# CHECK: umax v4.8h, v12.8h, v25.8h
+# CHECK: smax v5.2s, v11.2s, v26.2s
+# CHECK: umax v6.4s, v10.4s, v27.4s
+0xe1 0x65 0x36 0x0e
+0xc2 0x65 0x37 0x6e
+0xa3 0x65 0x78 0x0e
+0x84 0x65 0x79 0x6e
+0x65 0x65 0xba 0x0e
+0x46 0x65 0xbb 0x6e
+
+#----------------------------------------------------------------------
+# Vector Minimum (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: umin v1.8b, v15.8b, v22.8b
+# CHECK: smin v2.16b, v14.16b, v23.16b
+# CHECK: umin v3.4h, v13.4h, v24.4h
+# CHECK: smin v4.8h, v12.8h, v25.8h
+# CHECK: umin v5.2s, v11.2s, v26.2s
+# CHECK: smin v6.4s, v10.4s, v27.4s
+0xe1 0x6d 0x36 0x2e
+0xc2 0x6d 0x37 0x4e
+0xa3 0x6d 0x78 0x2e
+0x84 0x6d 0x79 0x4e
+0x65 0x6d 0xba 0x2e
+0x46 0x6d 0xbb 0x4e
+
+#----------------------------------------------------------------------
+# Vector Maximum (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fmax v29.2s, v28.2s, v25.2s
+# CHECK: fmax v9.4s, v8.4s, v5.4s
+# CHECK: fmax v11.2d, v10.2d, v7.2d
+0x9d 0xf7 0x39 0x0e
+0x09 0xf5 0x25 0x4e
+0x4b 0xf5 0x67 0x4e
+
+#----------------------------------------------------------------------
+# Vector Minimum (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fmin v29.2s, v28.2s, v25.2s
+# CHECK: fmin v9.4s, v8.4s, v5.4s
+# CHECK: fmin v11.2d, v10.2d, v7.2d
+0x9d 0xf7 0xb9 0x0e
+0x09 0xf5 0xa5 0x4e
+0x4b 0xf5 0xe7 0x4e
+
+#----------------------------------------------------------------------
+# Vector maxNum (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fmaxnm v9.2s, v8.2s, v5.2s
+# CHECK: fmaxnm v9.4s, v8.4s, v5.4s
+# CHECK: fmaxnm v11.2d, v10.2d, v7.2d
+0x09 0xc5 0x25 0x0e
+0x09 0xc5 0x25 0x4e
+0x4b 0xc5 0x67 0x4e
+
+#----------------------------------------------------------------------
+# Vector minNum (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fminnm v2.2s, v8.2s, v25.2s
+# CHECK: fminnm v9.4s, v8.4s, v5.4s
+# CHECK: fminnm v11.2d, v10.2d, v7.2d
+0x02 0xc5 0xb9 0x0e
+0x09 0xc5 0xa5 0x4e
+0x4b 0xc5 0xe7 0x4e
+
+
+#----------------------------------------------------------------------
+# Vector Maximum Pairwise (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: smaxp v1.8b, v15.8b, v22.8b
+# CHECK: umaxp v2.16b, v14.16b, v23.16b
+# CHECK: smaxp v3.4h, v13.4h, v24.4h
+# CHECK: umaxp v4.8h, v12.8h, v25.8h
+# CHECK: smaxp v5.2s, v11.2s, v26.2s
+# CHECK: umaxp v6.4s, v10.4s, v27.4s
+0xe1 0xa5 0x36 0x0e
+0xc2 0xa5 0x37 0x6e
+0xa3 0xa5 0x78 0x0e
+0x84 0xa5 0x79 0x6e
+0x65 0xa5 0xba 0x0e
+0x46 0xa5 0xbb 0x6e
+
+#----------------------------------------------------------------------
+# Vector Minimum Pairwise (Signed and Unsigned Integer)
+#----------------------------------------------------------------------
+# CHECK: uminp v1.8b, v15.8b, v22.8b
+# CHECK: sminp v2.16b, v14.16b, v23.16b
+# CHECK: uminp v3.4h, v13.4h, v24.4h
+# CHECK: sminp v4.8h, v12.8h, v25.8h
+# CHECK: uminp v5.2s, v11.2s, v26.2s
+# CHECK: sminp v6.4s, v10.4s, v27.4s
+0xe1 0xad 0x36 0x2e
+0xc2 0xad 0x37 0x4e
+0xa3 0xad 0x78 0x2e
+0x84 0xad 0x79 0x4e
+0x65 0xad 0xba 0x2e
+0x46 0xad 0xbb 0x4e
+
+#----------------------------------------------------------------------
+# Vector Maximum Pairwise (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fmaxp v29.2s, v28.2s, v25.2s
+# CHECK: fmaxp v9.4s, v8.4s, v5.4s
+# CHECK: fmaxp v11.2d, v10.2d, v7.2d
+0x9d 0xf7 0x39 0x2e
+0x09 0xf5 0x25 0x6e
+0x4b 0xf5 0x67 0x6e
+
+#----------------------------------------------------------------------
+# Vector Minimum Pairwise (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fminp v29.2s, v28.2s, v25.2s
+# CHECK: fminp v9.4s, v8.4s, v5.4s
+# CHECK: fminp v11.2d, v10.2d, v7.2d
+0x9d 0xf7 0xb9 0x2e
+0x09 0xf5 0xa5 0x6e
+0x4b 0xf5 0xe7 0x6e
+
+#----------------------------------------------------------------------
+# Vector maxNum Pairwise (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fmaxnmp v9.2s, v8.2s, v5.2s
+# CHECK: fmaxnmp v9.4s, v8.4s, v5.4s
+# CHECK: fmaxnmp v11.2d, v10.2d, v7.2d
+0x09 0xc5 0x25 0x2e
+0x09 0xc5 0x25 0x6e
+0x4b 0xc5 0x67 0x6e
+
+#----------------------------------------------------------------------
+# Vector minNum Pairwise (Floating Point)
+#----------------------------------------------------------------------
+# CHECK: fminnmp v2.2s, v8.2s, v25.2s
+# CHECK: fminnmp v9.4s, v8.4s, v5.4s
+# CHECK: fminnmp v11.2d, v10.2d, v7.2d
+0x02 0xc5 0xb9 0x2e
+0x09 0xc5 0xa5 0x6e
+0x4b 0xc5 0xe7 0x6e
+
+#------------------------------------------------------------------------------
+# Vector Add Pairwise (Integer)
+#------------------------------------------------------------------------------
+# CHECK: addp v31.8b, v31.8b, v31.8b
+# CHECK: addp v0.2d, v0.2d, v0.2d
+0xff 0xbf 0x3f 0x0e
+0x00 0xbc 0xe0 0x4e
+
+#------------------------------------------------------------------------------
+# Vector Add Pairwise (Floating Point)
+#------------------------------------------------------------------------------
+# CHECK: faddp v0.4s, v0.4s, v0.4s
+# CHECK: faddp v31.2s, v31.2s, v31.2s
+0x00 0xd4 0x20 0x6e
+0xff 0xd7 0x3f 0x2e
+
+
+#------------------------------------------------------------------------------
+# Vector Saturating Doubling Multiply High
+# Vector Saturating Rouding Doubling Multiply High
+#------------------------------------------------------------------------------
+# CHECK: sqdmulh v31.2s, v31.2s, v31.2s
+# CHECK: sqdmulh v5.4s, v7.4s, v9.4s
+# CHECK: sqrdmulh v31.4h, v3.4h, v13.4h
+# CHECK: sqrdmulh v0.8h, v10.8h, v20.8h
+0xff 0xb7 0xbf 0x0e
+0xe5 0xb4 0xa9 0x4e
+0x7f 0xb4 0x6d 0x2e
+0x40 0xb5 0x74 0x6e
+
+#------------------------------------------------------------------------------
+# Vector Multiply Extended
+#------------------------------------------------------------------------------
+# CHECK: fmulx v1.2s, v22.2s, v2.2s
+# CHECK: fmulx v21.4s, v15.4s, v3.4s
+# CHECK: fmulx v11.2d, v5.2d, v23.2d
+0xc1 0xde 0x22 0x0e
+0xf5 0xdd 0x23 0x4e
+0xab 0xdc 0x77 0x4e
+
+#----------------------------------------------------------------------
+# Vector Shift Left long 
+#----------------------------------------------------------------------
+# CHECK: shll2	v2.8h, v4.16b, #8
+# CHECK: shll2	v6.4s, v8.8h, #16
+# CHECK: shll2	v6.2d, v8.4s, #32
+# CHECK: shll	v2.8h, v4.8b, #8
+# CHECK: shll	v6.4s, v8.4h, #16
+# CHECK: shll	v6.2d, v8.2s, #32
+
+0x82,0x38,0x21,0x6e
+0x06,0x39,0x61,0x6e
+0x06,0x39,0xa1,0x6e
+0x82,0x38,0x21,0x2e
+0x06,0x39,0x61,0x2e
+0x06,0x39,0xa1,0x2e
+
+#----------------------------------------------------------------------
+# Vector Shift Left by Immediate
+#----------------------------------------------------------------------
+# CHECK: shl v0.4h, v1.4h, #3
+# CHECK: shl v0.16b, v1.16b, #3
+# CHECK: shl v0.4s, v1.4s, #3
+# CHECK: shl v0.2d, v1.2d, #3
+0x20,0x54,0x13,0x0f
+0x20,0x54,0x0b,0x4f
+0x20,0x54,0x23,0x4f
+0x20,0x54,0x43,0x4f
+
+#----------------------------------------------------------------------
+# Vector Shift Left Long (Signed, Unsigned) by Immediate
+#----------------------------------------------------------------------
+# CHECK: sshll v0.2d, v1.2s, #3
+# CHECK: sshll2 v0.4s, v1.8h, #3
+# CHECK: ushll v0.4s, v1.4h, #3
+# CHECK: ushll2 v0.8h, v1.16b, #3
+0x20 0xa4 0x23 0x0f
+0x20 0xa4 0x13 0x4f
+0x20 0xa4 0x13 0x2f
+0x20 0xa4 0x0b 0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sshr v0.8b, v1.8b, #3
+# CHECK: sshr v0.4h, v1.4h, #3
+# CHECK: sshr v0.2s, v1.2s, #3
+# CHECK: sshr v0.16b, v1.16b, #3
+# CHECK: sshr v0.8h, v1.8h, #3
+# CHECK: sshr v0.4s, v1.4s, #3
+# CHECK: sshr v0.2d, v1.2d, #3
+0x20,0x04,0x0d,0x0f
+0x20,0x04,0x1d,0x0f
+0x20,0x04,0x3d,0x0f
+0x20,0x04,0x0d,0x4f
+0x20,0x04,0x1d,0x4f
+0x20,0x04,0x3d,0x4f
+0x20,0x04,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer shift right (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: ushr v0.8b, v1.8b, #3
+# CHECK: ushr v0.4h, v1.4h, #3
+# CHECK: ushr v0.2s, v1.2s, #3
+# CHECK: ushr v0.16b, v1.16b, #3
+# CHECK: ushr v0.8h, v1.8h, #3
+# CHECK: ushr v0.4s, v1.4s, #3
+# CHECK: ushr v0.2d, v1.2d, #3
+0x20,0x04,0x0d,0x2f
+0x20,0x04,0x1d,0x2f
+0x20,0x04,0x3d,0x2f
+0x20,0x04,0x0d,0x6f
+0x20,0x04,0x1d,0x6f
+0x20,0x04,0x3d,0x6f
+0x20,0x04,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and accumulate (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: ssra v0.8b, v1.8b, #3
+# CHECK: ssra v0.4h, v1.4h, #3
+# CHECK: ssra v0.2s, v1.2s, #3
+# CHECK: ssra v0.16b, v1.16b, #3
+# CHECK: ssra v0.8h, v1.8h, #3
+# CHECK: ssra v0.4s, v1.4s, #3
+# CHECK: ssra v0.2d, v1.2d, #3
+0x20,0x14,0x0d,0x0f
+0x20,0x14,0x1d,0x0f
+0x20,0x14,0x3d,0x0f
+0x20,0x14,0x0d,0x4f
+0x20,0x14,0x1d,0x4f
+0x20,0x14,0x3d,0x4f
+0x20,0x14,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and accumulate (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: usra v0.8b, v1.8b, #3
+# CHECK: usra v0.4h, v1.4h, #3
+# CHECK: usra v0.2s, v1.2s, #3
+# CHECK: usra v0.16b, v1.16b, #3
+# CHECK: usra v0.8h, v1.8h, #3
+# CHECK: usra v0.4s, v1.4s, #3
+# CHECK: usra v0.2d, v1.2d, #3
+0x20,0x14,0x0d,0x2f
+0x20,0x14,0x1d,0x2f
+0x20,0x14,0x3d,0x2f
+0x20,0x14,0x0d,0x6f
+0x20,0x14,0x1d,0x6f
+0x20,0x14,0x3d,0x6f
+0x20,0x14,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: srshr v0.8b, v1.8b, #3
+# CHECK: srshr v0.4h, v1.4h, #3
+# CHECK: srshr v0.2s, v1.2s, #3
+# CHECK: srshr v0.16b, v1.16b, #3
+# CHECK: srshr v0.8h, v1.8h, #3
+# CHECK: srshr v0.4s, v1.4s, #3
+# CHECK: srshr v0.2d, v1.2d, #3
+0x20,0x24,0x0d,0x0f
+0x20,0x24,0x1d,0x0f
+0x20,0x24,0x3d,0x0f
+0x20,0x24,0x0d,0x4f
+0x20,0x24,0x1d,0x4f
+0x20,0x24,0x3d,0x4f
+0x20,0x24,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: urshr v0.8b, v1.8b, #3
+# CHECK: urshr v0.4h, v1.4h, #3
+# CHECK: urshr v0.2s, v1.2s, #3
+# CHECK: urshr v0.16b, v1.16b, #3
+# CHECK: urshr v0.8h, v1.8h, #3
+# CHECK: urshr v0.4s, v1.4s, #3
+# CHECK: urshr v0.2d, v1.2d, #3
+0x20,0x24,0x0d,0x2f
+0x20,0x24,0x1d,0x2f
+0x20,0x24,0x3d,0x2f
+0x20,0x24,0x0d,0x6f
+0x20,0x24,0x1d,0x6f
+0x20,0x24,0x3d,0x6f
+0x20,0x24,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right and accumulate (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: srsra v0.8b, v1.8b, #3
+# CHECK: srsra v0.4h, v1.4h, #3
+# CHECK: srsra v0.2s, v1.2s, #3
+# CHECK: srsra v0.16b, v1.16b, #3
+# CHECK: srsra v0.8h, v1.8h, #3
+# CHECK: srsra v0.4s, v1.4s, #3
+# CHECK: srsra v0.2d, v1.2d, #3
+0x20,0x34,0x0d,0x0f
+0x20,0x34,0x1d,0x0f
+0x20,0x34,0x3d,0x0f
+0x20,0x34,0x0d,0x4f
+0x20,0x34,0x1d,0x4f
+0x20,0x34,0x3d,0x4f
+0x20,0x34,0x7d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right and accumulate (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: ursra v0.8b, v1.8b, #3
+# CHECK: ursra v0.4h, v1.4h, #3
+# CHECK: ursra v0.2s, v1.2s, #3
+# CHECK: ursra v0.16b, v1.16b, #3
+# CHECK: ursra v0.8h, v1.8h, #3
+# CHECK: ursra v0.4s, v1.4s, #3
+# CHECK: ursra v0.2d, v1.2d, #3
+0x20,0x34,0x0d,0x2f
+0x20,0x34,0x1d,0x2f
+0x20,0x34,0x3d,0x2f
+0x20,0x34,0x0d,0x6f
+0x20,0x34,0x1d,0x6f
+0x20,0x34,0x3d,0x6f
+0x20,0x34,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right and insert
+#-----------------------------------------------------------------------------
+# CHECK: sri v0.8b, v1.8b, #3
+# CHECK: sri v0.4h, v1.4h, #3
+# CHECK: sri v0.2s, v1.2s, #3
+# CHECK: sri v0.16b, v1.16b, #3
+# CHECK: sri v0.8h, v1.8h, #3
+# CHECK: sri v0.4s, v1.4s, #3
+# CHECK: sri v0.2d, v1.2d, #3
+0x20,0x44,0x0d,0x2f
+0x20,0x44,0x1d,0x2f
+0x20,0x44,0x3d,0x2f
+0x20,0x44,0x0d,0x6f
+0x20,0x44,0x1d,0x6f
+0x20,0x44,0x3d,0x6f
+0x20,0x44,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift left and insert
+#-----------------------------------------------------------------------------
+# CHECK: sli v0.8b, v1.8b, #3
+# CHECK: sli v0.4h, v1.4h, #3
+# CHECK: sli v0.2s, v1.2s, #3
+# CHECK: sli v0.16b, v1.16b, #3
+# CHECK: sli v0.8h, v1.8h, #3
+# CHECK: sli v0.4s, v1.4s, #3
+# CHECK: sli v0.2d, v1.2d, #3
+0x20,0x54,0x0b,0x2f
+0x20,0x54,0x13,0x2f
+0x20,0x54,0x23,0x2f
+0x20,0x54,0x0b,0x6f
+0x20,0x54,0x13,0x6f
+0x20,0x54,0x23,0x6f
+0x20,0x54,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left unsigned
+#-----------------------------------------------------------------------------
+# CHECK: sqshlu v0.8b, v1.8b, #3
+# CHECK: sqshlu v0.4h, v1.4h, #3
+# CHECK: sqshlu v0.2s, v1.2s, #3
+# CHECK: sqshlu v0.16b, v1.16b, #3
+# CHECK: sqshlu v0.8h, v1.8h, #3
+# CHECK: sqshlu v0.4s, v1.4s, #3
+# CHECK: sqshlu v0.2d, v1.2d, #3
+0x20,0x64,0x0b,0x2f
+0x20,0x64,0x13,0x2f
+0x20,0x64,0x23,0x2f
+0x20,0x64,0x0b,0x6f
+0x20,0x64,0x13,0x6f
+0x20,0x64,0x23,0x6f
+0x20,0x64,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshl v0.8b, v1.8b, #3
+# CHECK: sqshl v0.4h, v1.4h, #3
+# CHECK: sqshl v0.2s, v1.2s, #3
+# CHECK: sqshl v0.16b, v1.16b, #3
+# CHECK: sqshl v0.8h, v1.8h, #3
+# CHECK: sqshl v0.4s, v1.4s, #3
+# CHECK: sqshl v0.2d, v1.2d, #3
+0x20,0x74,0x0b,0x0f
+0x20,0x74,0x13,0x0f
+0x20,0x74,0x23,0x0f
+0x20,0x74,0x0b,0x4f
+0x20,0x74,0x13,0x4f
+0x20,0x74,0x23,0x4f
+0x20,0x74,0x43,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift left (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqshl v0.8b, v1.8b, #3
+# CHECK: uqshl v0.4h, v1.4h, #3
+# CHECK: uqshl v0.2s, v1.2s, #3
+# CHECK: uqshl v0.16b, v1.16b, #3
+# CHECK: uqshl v0.8h, v1.8h, #3
+# CHECK: uqshl v0.4s, v1.4s, #3
+# CHECK: uqshl v0.2d, v1.2d, #3
+0x20,0x74,0x0b,0x2f
+0x20,0x74,0x13,0x2f
+0x20,0x74,0x23,0x2f
+0x20,0x74,0x0b,0x6f
+0x20,0x74,0x13,0x6f
+0x20,0x74,0x23,0x6f
+0x20,0x74,0x43,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer shift right narrow
+#-----------------------------------------------------------------------------
+# CHECK: shrn v0.8b, v1.8h, #3
+# CHECK: shrn v0.4h, v1.4s, #3
+# CHECK: shrn v0.2s, v1.2d, #3
+# CHECK: shrn2 v0.16b, v1.8h, #3
+# CHECK: shrn2 v0.8h, v1.4s, #3
+# CHECK: shrn2 v0.4s, v1.2d, #3
+0x20,0x84,0x0d,0x0f
+0x20,0x84,0x1d,0x0f
+0x20,0x84,0x3d,0x0f
+0x20,0x84,0x0d,0x4f
+0x20,0x84,0x1d,0x4f
+0x20,0x84,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right unsigned narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshrun v0.8b, v1.8h, #3
+# CHECK: sqshrun v0.4h, v1.4s, #3
+# CHECK: sqshrun v0.2s, v1.2d, #3
+# CHECK: sqshrun2 v0.16b, v1.8h, #3
+# CHECK: sqshrun2 v0.8h, v1.4s, #3
+# CHECK: sqshrun2 v0.4s, v1.2d, #3
+0x20,0x84,0x0d,0x2f
+0x20,0x84,0x1d,0x2f
+0x20,0x84,0x3d,0x2f
+0x20,0x84,0x0d,0x6f
+0x20,0x84,0x1d,0x6f
+0x20,0x84,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer rounding shift right narrow
+#-----------------------------------------------------------------------------
+# CHECK: rshrn v0.8b, v1.8h, #3
+# CHECK: rshrn v0.4h, v1.4s, #3
+# CHECK: rshrn v0.2s, v1.2d, #3
+# CHECK: rshrn2 v0.16b, v1.8h, #3
+# CHECK: rshrn2 v0.8h, v1.4s, #3
+# CHECK: rshrn2 v0.4s, v1.2d, #3
+0x20,0x8c,0x0d,0x0f
+0x20,0x8c,0x1d,0x0f
+0x20,0x8c,0x3d,0x0f
+0x20,0x8c,0x0d,0x4f
+0x20,0x8c,0x1d,0x4f
+0x20,0x8c,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded unsigned narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqrshrun v0.8b, v1.8h, #3
+# CHECK: sqrshrun v0.4h, v1.4s, #3
+# CHECK: sqrshrun v0.2s, v1.2d, #3
+# CHECK: sqrshrun2 v0.16b, v1.8h, #3
+# CHECK: sqrshrun2 v0.8h, v1.4s, #3
+# CHECK: sqrshrun2 v0.4s, v1.2d, #3
+0x20,0x8c,0x0d,0x2f
+0x20,0x8c,0x1d,0x2f
+0x20,0x8c,0x3d,0x2f
+0x20,0x8c,0x0d,0x6f
+0x20,0x8c,0x1d,0x6f
+0x20,0x8c,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqshrn v0.8b, v1.8h, #3
+# CHECK: sqshrn v0.4h, v1.4s, #3
+# CHECK: sqshrn v0.2s, v1.2d, #3
+# CHECK: sqshrn2 v0.16b, v1.8h, #3
+# CHECK: sqshrn2 v0.8h, v1.4s, #3
+# CHECK: sqshrn2 v0.4s, v1.2d, #3
+0x20,0x94,0x0d,0x0f
+0x20,0x94,0x1d,0x0f
+0x20,0x94,0x3d,0x0f
+0x20,0x94,0x0d,0x4f
+0x20,0x94,0x1d,0x4f
+0x20,0x94,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right narrow (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqshrn v0.8b, v1.8h, #3
+# CHECK: uqshrn v0.4h, v1.4s, #3
+# CHECK: uqshrn v0.2s, v1.2d, #3
+# CHECK: uqshrn2 v0.16b, v1.8h, #3
+# CHECK: uqshrn2 v0.8h, v1.4s, #3
+# CHECK: uqshrn2 v0.4s, v1.2d, #3
+0x20,0x94,0x0d,0x2f
+0x20,0x94,0x1d,0x2f
+0x20,0x94,0x3d,0x2f
+0x20,0x94,0x0d,0x6f
+0x20,0x94,0x1d,0x6f
+0x20,0x94,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded narrow (Signed)
+#-----------------------------------------------------------------------------
+# CHECK: sqrshrn v0.8b, v1.8h, #3
+# CHECK: sqrshrn v0.4h, v1.4s, #3
+# CHECK: sqrshrn v0.2s, v1.2d, #3
+# CHECK: sqrshrn2 v0.16b, v1.8h, #3
+# CHECK: sqrshrn2 v0.8h, v1.4s, #3
+# CHECK: sqrshrn2 v0.4s, v1.2d, #3
+0x20,0x9c,0x0d,0x0f
+0x20,0x9c,0x1d,0x0f
+0x20,0x9c,0x3d,0x0f
+0x20,0x9c,0x0d,0x4f
+0x20,0x9c,0x1d,0x4f
+0x20,0x9c,0x3d,0x4f
+
+#-----------------------------------------------------------------------------
+#Integer saturating shift right rounded narrow (Unsigned)
+#-----------------------------------------------------------------------------
+# CHECK: uqrshrn v0.8b, v1.8h, #3
+# CHECK: uqrshrn v0.4h, v1.4s, #3
+# CHECK: uqrshrn v0.2s, v1.2d, #3
+# CHECK: uqrshrn2 v0.16b, v1.8h, #3
+# CHECK: uqrshrn2 v0.8h, v1.4s, #3
+# CHECK: uqrshrn2 v0.4s, v1.2d, #3
+0x20,0x9c,0x0d,0x2f
+0x20,0x9c,0x1d,0x2f
+0x20,0x9c,0x3d,0x2f
+0x20,0x9c,0x0d,0x6f
+0x20,0x9c,0x1d,0x6f
+0x20,0x9c,0x3d,0x6f
+
+#-----------------------------------------------------------------------------
+#Fixed-point convert to floating-point
+#-----------------------------------------------------------------------------
+# CHECK: scvtf v0.2s, v1.2s, #3
+# CHECK: scvtf v0.4s, v1.4s, #3
+# CHECK: scvtf v0.2d, v1.2d, #3
+# CHECK: ucvtf v0.2s, v1.2s, #3
+# CHECK: ucvtf v0.4s, v1.4s, #3
+# CHECK: ucvtf v0.2d, v1.2d, #3
+
+0x20,0xe4,0x3d,0x0f
+0x20,0xe4,0x3d,0x4f
+0x20,0xe4,0x7d,0x4f
+0x20,0xe4,0x3d,0x2f
+0x20,0xe4,0x3d,0x6f
+0x20,0xe4,0x7d,0x6f
+
+#-----------------------------------------------------------------------------
+#Floating-point convert to fixed-point
+#-----------------------------------------------------------------------------
+# CHECK: fcvtzs v0.2s, v1.2s, #3
+# CHECK: fcvtzs v0.4s, v1.4s, #3
+# CHECK: fcvtzs v0.2d, v1.2d, #3
+# CHECK: fcvtzu v0.2s, v1.2s, #3
+# CHECK: fcvtzu v0.4s, v1.4s, #3
+# CHECK: fcvtzu v0.2d, v1.2d, #3
+0x20,0xfc,0x3d,0x0f
+0x20,0xfc,0x3d,0x4f
+0x20,0xfc,0x7d,0x4f
+0x20,0xfc,0x3d,0x2f
+0x20,0xfc,0x3d,0x6f
+0x20,0xfc,0x7d,0x6f
+
+
+#------------------------------------------------------------------------------
+# Vector with 3 operands having different data types
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Long
+#------------------------------------------------------------------------------
+
+#------------------------------------------------------------------------------
+# Long - Variant 1
+#------------------------------------------------------------------------------
+
+# CHECK: saddl v0.8h, v1.8b, v2.8b
+# CHECK: saddl v0.4s, v1.4h, v2.4h
+# CHECK: saddl v0.2d, v1.2s, v2.2s
+0x20 0x00 0x22 0x0e
+0x20 0x00 0x62 0x0e
+0x20 0x00 0xa2 0x0e
+
+# CHECK: saddl2 v0.4s, v1.8h, v2.8h
+# CHECK: saddl2 v0.8h, v1.16b, v2.16b
+# CHECK: saddl2 v0.2d, v1.4s, v2.4s
+0x20 0x00 0x62 0x4e
+0x20 0x00 0x22 0x4e
+0x20 0x00 0xa2 0x4e
+
+# CHECK: uaddl v0.8h, v1.8b, v2.8b
+# CHECK: uaddl v0.4s, v1.4h, v2.4h
+# CHECK: uaddl v0.2d, v1.2s, v2.2s
+0x20 0x00 0x22 0x2e
+0x20 0x00 0x62 0x2e
+0x20 0x00 0xa2 0x2e
+
+# CHECK: uaddl2 v0.8h, v1.16b, v2.16b
+# CHECK: uaddl2 v0.4s, v1.8h, v2.8h
+# CHECK: uaddl2 v0.2d, v1.4s, v2.4s
+0x20 0x00 0x22 0x6e
+0x20 0x00 0x62 0x6e
+0x20 0x00 0xa2 0x6e
+
+# CHECK: ssubl v0.8h, v1.8b, v2.8b
+# CHECK: ssubl v0.4s, v1.4h, v2.4h
+# CHECK: ssubl v0.2d, v1.2s, v2.2s
+0x20 0x20 0x22 0x0e
+0x20 0x20 0x62 0x0e
+0x20 0x20 0xa2 0x0e
+
+# CHECK: ssubl2 v0.8h, v1.16b, v2.16b
+# CHECK: ssubl2 v0.4s, v1.8h, v2.8h
+# CHECK: ssubl2 v0.2d, v1.4s, v2.4s
+0x20 0x20 0x22 0x4e
+0x20 0x20 0x62 0x4e
+0x20 0x20 0xa2 0x4e
+
+# CHECK: usubl v0.8h, v1.8b, v2.8b
+# CHECK: usubl v0.4s, v1.4h, v2.4h
+# CHECK: usubl v0.2d, v1.2s, v2.2s
+0x20 0x20 0x22 0x2e
+0x20 0x20 0x62 0x2e
+0x20 0x20 0xa2 0x2e
+
+# CHECK: usubl2 v0.8h, v1.16b, v2.16b
+# CHECK: usubl2 v0.4s, v1.8h, v2.8h
+# CHECK: usubl2 v0.2d, v1.4s, v2.4s
+0x20 0x20 0x22 0x6e
+0x20 0x20 0x62 0x6e
+0x20 0x20 0xa2 0x6e
+
+# CHECK: sabal v0.8h, v1.8b, v2.8b
+# CHECK: sabal v0.4s, v1.4h, v2.4h
+# CHECK: sabal v0.2d, v1.2s, v2.2s
+0x20 0x50 0x22 0x0e
+0x20 0x50 0x62 0x0e
+0x20 0x50 0xa2 0x0e
+
+# CHECK: sabal2 v0.8h, v1.16b, v2.16b
+# CHECK: sabal2 v0.4s, v1.8h, v2.8h
+# CHECK: sabal2 v0.2d, v1.4s, v2.4s
+0x20 0x50 0x22 0x4e
+0x20 0x50 0x62 0x4e
+0x20 0x50 0xa2 0x4e
+
+# CHECK: uabal v0.8h, v1.8b, v2.8b
+# CHECK: uabal v0.4s, v1.4h, v2.4h
+# CHECK: uabal v0.2d, v1.2s, v2.2s
+0x20 0x50 0x22 0x2e
+0x20 0x50 0x62 0x2e
+0x20 0x50 0xa2 0x2e
+
+# CHECK: uabal2 v0.8h, v1.16b, v2.16b
+# CHECK: uabal2 v0.4s, v1.8h, v2.8h
+# CHECK: uabal2 v0.2d, v1.4s, v2.4s
+0x20 0x50 0x22 0x6e
+0x20 0x50 0x62 0x6e
+0x20 0x50 0xa2 0x6e
+
+# CHECK: sabdl v0.8h, v1.8b, v2.8b
+# CHECK: sabdl v0.4s, v1.4h, v2.4h
+# CHECK: sabdl v0.2d, v1.2s, v2.2s
+0x20 0x70 0x22 0x0e
+0x20 0x70 0x62 0x0e
+0x20 0x70 0xa2 0x0e
+
+# CHECK: sabdl2 v0.8h, v1.16b, v2.16b
+# CHECK: sabdl2 v0.4s, v1.8h, v2.8h
+# CHECK: sabdl2 v0.2d, v1.4s, v2.4s
+0x20 0x70 0x22 0x4e
+0x20 0x70 0x62 0x4e
+0x20 0x70 0xa2 0x4e
+
+# CHECK: uabdl v0.8h, v1.8b, v2.8b
+# CHECK: uabdl v0.4s, v1.4h, v2.4h
+# CHECK: uabdl v0.2d, v1.2s, v2.2s
+0x20 0x70 0x22 0x2e
+0x20 0x70 0x62 0x2e
+0x20 0x70 0xa2 0x2e
+
+# CHECK: uabdl2 v0.8h, v1.16b, v2.16b
+# CHECK: uabdl2 v0.4s, v1.8h, v2.8h
+# CHECK: uabdl2 v0.2d, v1.4s, v2.4s
+0x20 0x70 0x22 0x6e
+0x20 0x70 0x62 0x6e
+0x20 0x70 0xa2 0x6e
+
+# CHECK: smlal v0.8h, v1.8b, v2.8b
+# CHECK: smlal v0.4s, v1.4h, v2.4h
+# CHECK: smlal v0.2d, v1.2s, v2.2s
+0x20 0x80 0x22 0x0e
+0x20 0x80 0x62 0x0e
+0x20 0x80 0xa2 0x0e
+
+# CHECK: smlal2 v0.8h, v1.16b, v2.16b
+# CHECK: smlal2 v0.4s, v1.8h, v2.8h
+# CHECK: smlal2 v0.2d, v1.4s, v2.4s
+0x20 0x80 0x22 0x4e
+0x20 0x80 0x62 0x4e
+0x20 0x80 0xa2 0x4e
+
+# CHECK: umlal v0.8h, v1.8b, v2.8b
+# CHECK: umlal v0.4s, v1.4h, v2.4h
+# CHECK: umlal v0.2d, v1.2s, v2.2s
+
+0x20 0x80 0x22 0x2e
+0x20 0x80 0x62 0x2e
+0x20 0x80 0xa2 0x2e
+
+# CHECK: umlal2 v0.8h, v1.16b, v2.16b
+# CHECK: umlal2 v0.4s, v1.8h, v2.8h
+# CHECK: umlal2 v0.2d, v1.4s, v2.4s
+0x20 0x80 0x22 0x6e
+0x20 0x80 0x62 0x6e
+0x20 0x80 0xa2 0x6e
+
+# CHECK: smlsl v0.8h, v1.8b, v2.8b
+# CHECK: smlsl v0.4s, v1.4h, v2.4h
+# CHECK: smlsl v0.2d, v1.2s, v2.2s
+0x20 0xa0 0x22 0x0e
+0x20 0xa0 0x62 0x0e
+0x20 0xa0 0xa2 0x0e
+
+# CHECK: smlsl2 v0.8h, v1.16b, v2.16b
+# CHECK: smlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: smlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xa0 0x22 0x4e
+0x20 0xa0 0x62 0x4e
+0x20 0xa0 0xa2 0x4e
+
+# CHECK: umlsl v0.8h, v1.8b, v2.8b
+# CHECK: umlsl v0.4s, v1.4h, v2.4h
+# CHECK: umlsl v0.2d, v1.2s, v2.2s
+0x20 0xa0 0x22 0x2e
+0x20 0xa0 0x62 0x2e
+0x20 0xa0 0xa2 0x2e
+
+# CHECK: umlsl2 v0.8h, v1.16b, v2.16b
+# CHECK: umlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: umlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xa0 0x22 0x6e
+0x20 0xa0 0x62 0x6e
+0x20 0xa0 0xa2 0x6e
+
+# CHECK: smull v0.8h, v1.8b, v2.8b
+# CHECK: smull v0.4s, v1.4h, v2.4h
+# CHECK: smull v0.2d, v1.2s, v2.2s
+0x20 0xc0 0x22 0x0e
+0x20 0xc0 0x62 0x0e
+0x20 0xc0 0xa2 0x0e
+
+# CHECK: smull2 v0.8h, v1.16b, v2.16b
+# CHECK: smull2 v0.4s, v1.8h, v2.8h
+# CHECK: smull2 v0.2d, v1.4s, v2.4s
+0x20 0xc0 0x22 0x4e
+0x20 0xc0 0x62 0x4e
+0x20 0xc0 0xa2 0x4e
+
+# CHECK: umull v0.8h, v1.8b, v2.8b
+# CHECK: umull v0.4s, v1.4h, v2.4h
+# CHECK: umull v0.2d, v1.2s, v2.2s
+0x20 0xc0 0x22 0x2e
+0x20 0xc0 0x62 0x2e
+0x20 0xc0 0xa2 0x2e
+
+# CHECK: umull2 v0.8h, v1.16b, v2.16b
+# CHECK: umull2 v0.4s, v1.8h, v2.8h
+# CHECK: umull2 v0.2d, v1.4s, v2.4s
+0x20 0xc0 0x22 0x6e
+0x20 0xc0 0x62 0x6e
+0x20 0xc0 0xa2 0x6e
+
+#------------------------------------------------------------------------------
+# Long - Variant 2
+#------------------------------------------------------------------------------
+
+# CHECK: sqdmlal v0.4s, v1.4h, v2.4h
+# CHECK: sqdmlal v0.2d, v1.2s, v2.2s
+0x20 0x90 0x62 0x0e
+0x20 0x90 0xa2 0x0e
+
+# CHECK: sqdmlal2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmlal2 v0.2d, v1.4s, v2.4s
+0x20 0x90 0x62 0x4e
+0x20 0x90 0xa2 0x4e
+
+# CHECK: sqdmlsl v0.4s, v1.4h, v2.4h
+# CHECK: sqdmlsl v0.2d, v1.2s, v2.2s
+0x20 0xb0 0x62 0x0e
+0x20 0xb0 0xa2 0x0e
+
+# CHECK: sqdmlsl2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmlsl2 v0.2d, v1.4s, v2.4s
+0x20 0xb0 0x62 0x4e
+0x20 0xb0 0xa2 0x4e
+
+# CHECK: sqdmull v0.4s, v1.4h, v2.4h
+# CHECK: sqdmull v0.2d, v1.2s, v2.2s
+0x20 0xd0 0x62 0x0e
+0x20 0xd0 0xa2 0x0e
+
+# CHECK: sqdmull2 v0.4s, v1.8h, v2.8h
+# CHECK: sqdmull2 v0.2d, v1.4s, v2.4s
+0x20 0xd0 0x62 0x4e
+0x20 0xd0 0xa2 0x4e
+
+#------------------------------------------------------------------------------
+# Long - Variant 3
+#------------------------------------------------------------------------------
+
+# CHECK: pmull v0.8h, v1.8b, v2.8b
+0x20 0xe0 0x22 0x0e
+
+# CHECK: pmull2 v0.8h, v1.16b, v2.16b
+0x20 0xe0 0x22 0x4e
+
+#------------------------------------------------------------------------------
+# Widen
+#------------------------------------------------------------------------------
+
+# CHECK: saddw v0.8h, v1.8h, v2.8b
+# CHECK: saddw v0.4s, v1.4s, v2.4h
+# CHECK: saddw v0.2d, v1.2d, v2.2s
+0x20 0x10 0x22 0x0e
+0x20 0x10 0x62 0x0e
+0x20 0x10 0xa2 0x0e
+
+# CHECK: saddw2 v0.8h, v1.8h, v2.16b
+# CHECK: saddw2 v0.4s, v1.4s, v2.8h
+# CHECK: saddw2 v0.2d, v1.2d, v2.4s
+0x20 0x10 0x22 0x4e
+0x20 0x10 0x62 0x4e
+0x20 0x10 0xa2 0x4e
+
+# CHECK: uaddw v0.8h, v1.8h, v2.8b
+# CHECK: uaddw v0.4s, v1.4s, v2.4h
+# CHECK: uaddw v0.2d, v1.2d, v2.2s
+0x20 0x10 0x22 0x2e
+0x20 0x10 0x62 0x2e
+0x20 0x10 0xa2 0x2e
+
+# CHECK: uaddw2 v0.8h, v1.8h, v2.16b
+# CHECK: uaddw2 v0.4s, v1.4s, v2.8h
+# CHECK: uaddw2 v0.2d, v1.2d, v2.4s
+0x20 0x10 0x22 0x6e
+0x20 0x10 0x62 0x6e
+0x20 0x10 0xa2 0x6e
+
+# CHECK: ssubw v0.8h, v1.8h, v2.8b
+# CHECK: ssubw v0.4s, v1.4s, v2.4h
+# CHECK: ssubw v0.2d, v1.2d, v2.2s
+0x20 0x30 0x22 0x0e
+0x20 0x30 0x62 0x0e
+0x20 0x30 0xa2 0x0e
+
+# CHECK: ssubw2 v0.8h, v1.8h, v2.16b
+# CHECK: ssubw2 v0.4s, v1.4s, v2.8h
+# CHECK: ssubw2 v0.2d, v1.2d, v2.4s
+0x20 0x30 0x22 0x4e
+0x20 0x30 0x62 0x4e
+0x20 0x30 0xa2 0x4e
+
+# CHECK: usubw v0.8h, v1.8h, v2.8b
+# CHECK: usubw v0.4s, v1.4s, v2.4h
+# CHECK: usubw v0.2d, v1.2d, v2.2s
+0x20 0x30 0x22 0x2e
+0x20 0x30 0x62 0x2e
+0x20 0x30 0xa2 0x2e
+
+# CHECK: usubw2 v0.8h, v1.8h, v2.16b
+# CHECK: usubw2 v0.4s, v1.4s, v2.8h
+# CHECK: usubw2 v0.2d, v1.2d, v2.4s
+0x20 0x30 0x22 0x6e
+0x20 0x30 0x62 0x6e
+0x20 0x30 0xa2 0x6e
+
+#------------------------------------------------------------------------------
+# Narrow
+#------------------------------------------------------------------------------
+
+# CHECK: addhn v0.8b, v1.8h, v2.8h
+# CHECK: addhn v0.4h, v1.4s, v2.4s
+# CHECK: addhn v0.2s, v1.2d, v2.2d
+0x20 0x40 0x22 0x0e
+0x20 0x40 0x62 0x0e
+0x20 0x40 0xa2 0x0e
+
+# CHECK: addhn2 v0.16b, v1.8h, v2.8h
+# CHECK: addhn2 v0.8h, v1.4s, v2.4s
+# CHECK: addhn2 v0.4s, v1.2d, v2.2d
+0x20 0x40 0x22 0x4e
+0x20 0x40 0x62 0x4e
+0x20 0x40 0xa2 0x4e
+
+# CHECK: raddhn v0.8b, v1.8h, v2.8h
+# CHECK: raddhn v0.4h, v1.4s, v2.4s
+# CHECK: raddhn v0.2s, v1.2d, v2.2d
+0x20 0x40 0x22 0x2e
+0x20 0x40 0x62 0x2e
+0x20 0x40 0xa2 0x2e
+
+# CHECK: raddhn2 v0.16b, v1.8h, v2.8h
+# CHECK: raddhn2 v0.8h, v1.4s, v2.4s
+# CHECK: raddhn2 v0.4s, v1.2d, v2.2d
+0x20 0x40 0x22 0x6e
+0x20 0x40 0x62 0x6e
+0x20 0x40 0xa2 0x6e
+
+# CHECK: rsubhn v0.8b, v1.8h, v2.8h
+# CHECK: rsubhn v0.4h, v1.4s, v2.4s
+# CHECK: rsubhn v0.2s, v1.2d, v2.2d
+0x20 0x60 0x22 0x2e
+0x20 0x60 0x62 0x2e
+0x20 0x60 0xa2 0x2e
+
+# CHECK: rsubhn2 v0.16b, v1.8h, v2.8h
+# CHECK: rsubhn2 v0.8h, v1.4s, v2.4s
+# CHECK: rsubhn2 v0.4s, v1.2d, v2.2d
+0x20 0x60 0x22 0x6e
+0x20 0x60 0x62 0x6e
+0x20 0x60 0xa2 0x6e
+
+#----------------------------------------------------------------------
+# Scalar Integer Saturating Doubling Multiply Half High
+#----------------------------------------------------------------------
+# CHECK: sqdmulh h10, h11, h12
+# CHECK: sqdmulh s20, s21, s2
+0x6a,0xb5,0x6c,0x5e
+0xb4,0xb6,0xa2,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Integer Saturating Rounding Doubling Multiply Half High
+#----------------------------------------------------------------------
+# CHECK: sqrdmulh h10, h11, h12
+# CHECK: sqrdmulh s20, s21, s2
+0x6a,0xb5,0x6c,0x7e
+0xb4,0xb6,0xa2,0x7e
+
+#----------------------------------------------------------------------
+# Floating-point multiply extended
+#----------------------------------------------------------------------
+# CHECK: fmulx s20, s22, s15
+# CHECK: fmulx d23, d11, d1
+0xd4,0xde,0x2f,0x5e
+0x77,0xdd,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Floating-point Reciprocal Step
+#----------------------------------------------------------------------
+# CHECK: frecps s21, s16, s13
+# CHECK: frecps d22, d30, d21
+0x15,0xfe,0x2d,0x5e
+0xd6,0xff,0x75,0x5e
+
+#----------------------------------------------------------------------
+# Floating-point Reciprocal Square Root Step
+#----------------------------------------------------------------------
+# CHECK: frsqrts s21, s5, s12
+# CHECK: frsqrts d8, d22, d18
+0xb5,0xfc,0xac,0x5e
+0xc8,0xfe,0xf2,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Signed Integer Convert To Floating-point
+#----------------------------------------------------------------------
+# CHECK: scvtf s22, s13
+# CHECK: scvtf d21, d12
+0xb6,0xd9,0x21,0x5e
+0x95,0xd9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Integer Convert To Floating-point
+#----------------------------------------------------------------------
+# CHECK: ucvtf s22, s13
+# CHECK: ucvtf d21, d14
+0xb6,0xd9,0x21,0x7e
+0xd5,0xd9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Estimate
+#----------------------------------------------------------------------
+# CHECK: frecpe s19, s14
+# CHECK: frecpe d13, d13
+0xd3,0xd9,0xa1,0x5e
+0xad,0xd9,0xe1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Exponent
+#----------------------------------------------------------------------
+# CHECK: frecpx s18, s10
+# CHECK: frecpx d16, d19
+0x52,0xf9,0xa1,0x5e
+0x70,0xfa,0xe1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Reciprocal Square Root Estimate
+#----------------------------------------------------------------------
+# CHECK: frsqrte s22, s13
+# CHECK: frsqrte d21, d12
+0xb6,0xd9,0xa1,0x7e
+0x95,0xd9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Bitwise Equal
+#----------------------------------------------------------------------
+# CHECK: cmeq d20, d21, d22
+0xb4,0x8e,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Bitwise Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: cmeq d20, d21, #0x0
+0xb4,0x9a,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Unsigned Higher Or Same
+#----------------------------------------------------------------------
+# CHECK: cmhs d20, d21, d22
+0xb4,0x3e,0xf6,0x7e
+
+        
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greather Than Or Equal
+#----------------------------------------------------------------------
+# CHECK: cmge d20, d21, d22
+0xb4,0x3e,0xf6,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greather Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: cmge d20, d21, #0x0
+0xb4,0x8a,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Unsigned Higher
+#----------------------------------------------------------------------
+# CHECK: cmhi d20, d21, d22
+0xb4,0x36,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greater Than
+#----------------------------------------------------------------------
+# CHECK: cmgt d20, d21, d22
+0xb4,0x36,0xf6,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Greater Than Zero
+#----------------------------------------------------------------------
+# CHECK: cmgt d20, d21, #0x0
+0xb4,0x8a,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Signed Less Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: cmle d20, d21, #0x0
+0xb4,0x9a,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Compare Less Than Zero
+#----------------------------------------------------------------------
+# CHECK: cmlt d20, d21, #0x0
+0xb4,0xaa,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Compare Bitwise Test Bits
+#----------------------------------------------------------------------
+# CHECK: cmtst d20, d21, d22
+0xb4,0x8e,0xf6,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Equal
+#----------------------------------------------------------------------
+# CHECK: fcmeq s10, s11, s12
+# CHECK: fcmeq d20, d21, d22
+0x6a,0xe5,0x2c,0x5e
+0xb4,0xe6,0x76,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: fcmeq s10, s11, #0.0
+# CHECK: fcmeq d20, d21, #0.0
+0x6a,0xd9,0xa0,0x5e
+0xb4,0xda,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greater Than Or Equal
+#----------------------------------------------------------------------
+# CHECK: fcmge s10, s11, s12
+# CHECK: fcmge d20, d21, d22
+0x6a,0xe5,0x2c,0x7e
+0xb4,0xe6,0x76,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: fcmge s10, s11, #0.0
+# CHECK: fcmge d20, d21, #0.0
+0x6a,0xc9,0xa0,0x7e
+0xb4,0xca,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greather Than
+#----------------------------------------------------------------------
+# CHECK: fcmgt s10, s11, s12
+# CHECK: fcmgt d20, d21, d22
+0x6a,0xe5,0xac,0x7e
+0xb4,0xe6,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Greather Than Zero
+#----------------------------------------------------------------------
+# CHECK: fcmgt s10, s11, #0.0
+# CHECK: fcmgt d20, d21, #0.0
+0x6a,0xc9,0xa0,0x5e
+0xb4,0xca,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+#----------------------------------------------------------------------
+# CHECK: fcmle s10, s11, #0.0
+# CHECK: fcmle d20, d21, #0.0
+0x6a,0xd9,0xa0,0x7e
+0xb4,0xda,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Compare Mask Less Than
+#----------------------------------------------------------------------
+# CHECK: fcmlt s10, s11, #0.0
+# CHECK: fcmlt d20, d21, #0.0
+0x6a,0xe9,0xa0,0x5e
+0xb4,0xea,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+#----------------------------------------------------------------------
+# CHECK: facge s10, s11, s12
+# CHECK: facge d20, d21, d22
+0x6a,0xed,0x2c,0x7e
+0xb4,0xee,0x76,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Absolute Compare Mask Greater Than
+#----------------------------------------------------------------------
+# CHECK: facgt s10, s11, s12   
+# CHECK: facgt d20, d21, d22   
+0x6a,0xed,0xac,0x7e
+0xb4,0xee,0xf6,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Absolute Value
+#----------------------------------------------------------------------
+# CHECK: abs d29, d24
+0x1d,0xbb,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Absolute Value
+#----------------------------------------------------------------------
+# CHECK: sqabs b19, b14
+# CHECK: sqabs h21, h15
+# CHECK: sqabs s20, s12
+# CHECK: sqabs d18, d12
+0xd3,0x79,0x20,0x5e
+0xf5,0x79,0x60,0x5e
+0x94,0x79,0xa0,0x5e
+0x92,0x79,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Negate
+#----------------------------------------------------------------------
+# CHECK: neg d29, d24
+0x1d,0xbb,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Negate
+#----------------------------------------------------------------------
+# CHECK: sqneg b19, b14
+# CHECK: sqneg h21, h15
+# CHECK: sqneg s20, s12
+# CHECK: sqneg d18, d12
+0xd3,0x79,0x20,0x7e
+0xf5,0x79,0x60,0x7e
+0x94,0x79,0xa0,0x7e
+0x92,0x79,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Signed Saturating Accumulated of Unsigned Value
+#----------------------------------------------------------------------
+# CHECK: suqadd b19, b14
+# CHECK: suqadd h20, h15
+# CHECK: suqadd s21, s12
+# CHECK: suqadd d18, d22
+0xd3,0x39,0x20,0x5e
+0xf4,0x39,0x60,0x5e
+0x95,0x39,0xa0,0x5e
+0xd2,0x3a,0xe0,0x5e
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Accumulated of Signed Value
+#----------------------------------------------------------------------
+# CHECK: usqadd b19, b14
+# CHECK: usqadd h20, h15
+# CHECK: usqadd s21, s12
+# CHECK: usqadd d18, d22
+0xd3,0x39,0x20,0x7e
+0xf4,0x39,0x60,0x7e
+0x95,0x39,0xa0,0x7e
+0xd2,0x3a,0xe0,0x7e
+
+#----------------------------------------------------------------------
+# Signed Saturating Doubling Multiply-Add Long
+#----------------------------------------------------------------------
+# CHECK: sqdmlal s17, h27, h12
+# CHECK: sqdmlal d19, s24, s12
+0x71,0x93,0x6c,0x5e
+0x13,0x93,0xac,0x5e
+        
+#----------------------------------------------------------------------
+# Signed Saturating Doubling Multiply-Subtract Long
+#----------------------------------------------------------------------
+# CHECK: sqdmlsl s14, h12, h25
+# CHECK: sqdmlsl d12, s23, s13
+0x8e,0xb1,0x79,0x5e
+0xec,0xb2,0xad,0x5e
+        
+#----------------------------------------------------------------------
+# Signed Saturating Doubling Multiply Long
+#----------------------------------------------------------------------
+# CHECK: sqdmull s12, h22, h12
+# CHECK: sqdmull d15, s22, s12
+0xcc,0xd2,0x6c,0x5e
+0xcf,0xd2,0xac,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Extract Unsigned Narrow
+#----------------------------------------------------------------------
+# CHECK: sqxtun b19, h14
+# CHECK: sqxtun h21, s15
+# CHECK: sqxtun s20, d12
+0xd3,0x29,0x21,0x7e
+0xf5,0x29,0x61,0x7e
+0x94,0x29,0xa1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Saturating Extract Signed Narrow
+#----------------------------------------------------------------------
+# CHECK: sqxtn b18, h18
+# CHECK: sqxtn h20, s17
+# CHECK: sqxtn s19, d14
+0x52,0x4a,0x21,0x5e
+0x34,0x4a,0x61,0x5e
+0xd3,0x49,0xa1,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Saturating Extract Narrow
+#----------------------------------------------------------------------
+# CHECK: uqxtn b18, h18
+# CHECK: uqxtn h20, s17
+# CHECK: uqxtn s19, d14
+0x52,0x4a,0x21,0x7e
+0x34,0x4a,0x61,0x7e
+0xd3,0x49,0xa1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Signed Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sshr d15, d16, #12
+0x0f,0x06,0x74,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ushr d10, d17, #18
+0x2a,0x06,0x6e,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Rounding Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: srshr d19, d18, #7
+0x53,0x26,0x79,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unigned Rounding Shift Right (Immediate)
+#----------------------------------------------------------------------
+# CHECK: urshr d20, d23, #31
+0xf4,0x26,0x61,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ssra d18, d12, #21
+0x92,0x15,0x6b,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: usra d20, d13, #61
+0xb4,0x15,0x43,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: srsra d15, d11, #19
+0x6f,0x35,0x6d,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ursra d18, d10, #13
+0x52,0x35,0x73,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: shl d7, d10, #12
+0x47,0x55,0x4c,0x5f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshl b11, b19, #7
+# CHECK: sqshl h13, h18, #11
+# CHECK: sqshl s14, s17, #22
+# CHECK: sqshl d15, d16, #51
+0x6b,0x76,0x0f,0x5f
+0x4d,0x76,0x1b,0x5f
+0x2e,0x76,0x36,0x5f
+0x0f,0x76,0x73,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Shift Left (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqshl b18, b15, #6
+# CHECK: uqshl h11, h18, #7
+# CHECK: uqshl s14, s19, #18
+# CHECK: uqshl d15, d12, #19
+0xf2,0x75,0x0e,0x7f
+0x4b,0x76,0x17,0x7f
+0x6e,0x76,0x32,0x7f
+0x8f,0x75,0x53,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Left Unsigned (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshlu b15, b18, #6
+# CHECK: sqshlu h19, h17, #6
+# CHECK: sqshlu s16, s14, #25
+# CHECK: sqshlu d11, d13, #32
+0x4f,0x66,0x0e,0x7f
+0x33,0x66,0x16,0x7f
+0xd0,0x65,0x39,0x7f
+0xab,0x65,0x60,0x7f
+
+#----------------------------------------------------------------------
+# Shift Right And Insert (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sri d10, d12, #14
+0x8a,0x45,0x72,0x7f
+
+#----------------------------------------------------------------------
+# Shift Left And Insert (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sli d10, d14, #12
+0xca,0x55,0x4c,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshrn b10, h15, #5
+# CHECK: sqshrn h17, s10, #4
+# CHECK: sqshrn s18, d10, #31
+0xea,0x95,0x0b,0x5f
+0x51,0x95,0x1c,0x5f
+0x52,0x95,0x21,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqshrn b12, h10, #7
+# CHECK: uqshrn h10, s14, #5
+# CHECK: uqshrn s10, d12, #13
+0x4c,0x95,0x09,0x7f
+0xca,0x95,0x1b,0x7f
+0x8a,0x95,0x33,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Rounded Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqrshrn b10, h13, #2
+# CHECK: sqrshrn h15, s10, #6
+# CHECK: sqrshrn s15, d12, #9
+0xaa,0x9d,0x0e,0x5f
+0x4f,0x9d,0x1a,0x5f
+0x8f,0x9d,0x37,0x5f
+
+#----------------------------------------------------------------------
+# Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: uqrshrn b10, h12, #5
+# CHECK: uqrshrn h12, s10, #14
+# CHECK: uqrshrn s10, d10, #25
+0x8a,0x9d,0x0b,0x7f
+0x4c,0x9d,0x12,0x7f
+0x4a,0x9d,0x27,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Shift Right Unsigned Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqshrun b15, h10, #7
+# CHECK: sqshrun h20, s14, #3
+# CHECK: sqshrun s10, d15, #15
+0x4f,0x85,0x09,0x7f
+0xd4,0x85,0x1d,0x7f
+0xea,0x85,0x31,0x7f
+
+#----------------------------------------------------------------------
+# Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+#----------------------------------------------------------------------
+# CHECK: sqrshrun b17, h10, #6
+# CHECK: sqrshrun h10, s13, #15
+# CHECK: sqrshrun s22, d16, #31
+0x51,0x8d,0x0a,0x7f
+0xaa,0x8d,0x11,0x7f
+0x16,0x8e,0x21,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: scvtf s22, s13, #32
+# CHECK: scvtf d21, d12, #64
+0xb6,0xe5,0x20,0x5f
+0x95,0xe5,0x40,0x5f
+
+#----------------------------------------------------------------------
+# Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: ucvtf s22, s13, #32
+# CHECK: ucvtf d21, d14, #64
+0xb6,0xe5,0x20,0x7f
+0xd5,0xe5,0x40,0x7f
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: fcvtzs s21, s12, #1
+# CHECK: fcvtzs d21, d12, #1
+0x95,0xfd,0x3f,0x5f
+0x95,0xfd,0x7f,0x5f
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+#----------------------------------------------------------------------
+# CHECK: fcvtzu s21, s12, #1
+# CHECK: fcvtzu d21, d12, #1
+0x95,0xfd,0x3f,0x7f
+0x95,0xfd,0x7f,0x7f
+
+#----------------------------------------------------------------------
+# Vector load/store multiple N-element structure
+#----------------------------------------------------------------------
+# CHECK: ld1 {v0.16b}, [x0]
+# CHECK: ld1 {v15.8h, v16.8h}, [x15]
+# CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp]
+# CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+0x00,0x70,0x40,0x4c
+0xef,0xa5,0x40,0x4c
+0xff,0x6b,0x40,0x4c
+0x00,0x2c,0x40,0x4c
+
+# CHECK: ld2 {v0.8b, v1.8b}, [x0]
+# CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15]
+# CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+0x00,0x80,0x40,0x0c
+0xef,0x45,0x40,0x0c
+0xff,0x0b,0x40,0x0c
+
+# CHECK: st1 {v0.16b}, [x0]
+# CHECK: st1 {v15.8h, v16.8h}, [x15]
+# CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp]
+# CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+0x00,0x70,0x00,0x4c
+0xef,0xa5,0x00,0x4c
+0xff,0x6b,0x00,0x4c
+0x00,0x2c,0x00,0x4c
+
+# CHECK: st2 {v0.8b, v1.8b}, [x0]
+# CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15]
+# CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+0x00,0x80,0x00,0x0c
+0xef,0x45,0x00,0x0c
+0xff,0x0b,0x00,0x0c
+
+#----------------------------------------------------------------------
+# Vector load/store multiple N-element structure (post-index)
+#----------------------------------------------------------------------
+# CHECK: ld1 {v15.8h}, [x15], x2
+# CHECK: ld1 {v31.4s, v0.4s}, [sp], #32
+# CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+# CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+0xef,0x75,0xc2,0x4c
+0xff,0xab,0xdf,0x4c
+0x00,0x6c,0xdf,0x4c
+0x00,0x20,0xc3,0x0c
+
+# CHECK: ld2 {v0.16b, v1.16b}, [x0], x1
+# CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+# CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+0x00,0x80,0xc1,0x4c
+0xef,0x45,0xc2,0x4c
+0xff,0x0b,0xdf,0x4c
+
+
+# CHECK: st1 {v15.8h}, [x15], x2
+# CHECK: st1 {v31.4s, v0.4s}, [sp], #32
+# CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+# CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+0xef,0x75,0x82,0x4c
+0xff,0xab,0x9f,0x4c
+0x00,0x6c,0x9f,0x4c
+0x00,0x20,0x83,0x0c
+
+# CHECK: st2 {v0.16b, v1.16b}, [x0], x1
+# CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+# CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+0x00,0x80,0x81,0x4c
+0xef,0x45,0x82,0x4c
+0xff,0x0b,0x9f,0x4c
+
+#----------------------------------------------------------------------
+# Vector load single N-element structure to all lane of N
+# consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1r {v0.16b}, [x0]
+# CHECK: ld1r {v15.8h}, [x15]
+# CHECK: ld2r {v31.4s, v0.4s}, [sp]
+# CHECK: ld2r {v0.2d, v1.2d}, [x0]
+# CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0]
+# CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15]
+# CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+# CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp]
+0x00,0xc0,0x40,0x4d
+0xef,0xc5,0x40,0x4d
+0xff,0xcb,0x60,0x4d
+0x00,0xcc,0x60,0x4d
+0x00,0xe0,0x40,0x0d
+0xef,0xe5,0x40,0x0d
+0xff,0xeb,0x60,0x0d
+0xff,0xef,0x60,0x0d
+
+#----------------------------------------------------------------------
+# Vector load/store single N-element structure to/from one lane of N
+# consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1 {v0.b}[9], [x0]
+# CHECK: ld2 {v15.h, v16.h}[7], [x15]
+# CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp]
+# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
+# CHECK: st1 {v0.d}[1], [x0]
+# CHECK: st2 {v31.s, v0.s}[3], [sp]
+# CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15]
+# CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
+0x00,0x04,0x40,0x4d
+0xef,0x59,0x60,0x4d
+0xff,0xb3,0x40,0x4d
+0x00,0xa4,0x60,0x4d
+0x00,0x84,0x00,0x4d
+0xff,0x93,0x20,0x4d
+0xef,0x79,0x00,0x4d
+0x00,0x24,0x20,0x4d
+
+#----------------------------------------------------------------------
+# Post-index of vector load single N-element structure to all lane of N
+# consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1r {v0.16b}, [x0], #1
+# CHECK: ld1r {v15.8h}, [x15], #2
+# CHECK: ld2r {v31.4s, v0.4s}, [sp], #8
+# CHECK: ld2r {v0.2d, v1.2d}, [x0], #16
+# CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3
+# CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6
+# CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30
+# CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7
+0x00,0xc0,0xdf,0x4d
+0xef,0xc5,0xdf,0x4d
+0xff,0xcb,0xff,0x4d
+0x00,0xcc,0xff,0x4d
+0x00,0xe0,0xdf,0x0d
+0xef,0xe5,0xdf,0x0d
+0xff,0xeb,0xfe,0x0d
+0xff,0xef,0xe7,0x0d
+
+#----------------------------------------------------------------------
+# Post-index of vector load/store single N-element structure to/from
+#  one lane of N consecutive registers (N = 1,2,3,4)
+#----------------------------------------------------------------------
+# CHECK: ld1 {v0.b}[9], [x0], #1
+# CHECK: ld2 {v15.h, v16.h}[7], [x15], #4
+# CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
+# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
+# CHECK: ld4 {v0.h, v1.h, v2.h, v3.h}[7], [x0], x0
+# CHECK: st1 {v0.d}[1], [x0], #8
+# CHECK: st2 {v31.s, v0.s}[3], [sp], #8
+# CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6
+# CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
+0x00,0x04,0xdf,0x4d
+0xef,0x59,0xff,0x4d
+0xff,0xb3,0xc3,0x4d
+0x00,0xa4,0xff,0x4d
+0x00,0x78,0xe0,0x4d
+0x00,0x84,0x9f,0x4d
+0xff,0x93,0xbf,0x4d
+0xef,0x79,0x9f,0x4d
+0x00,0x24,0xa5,0x4d
+
+#----------------------------------------------------------------------
+# Bitwise extract
+#----------------------------------------------------------------------
+0x20,0x18,0x02,0x2e
+0x20,0x18,0x02,0x6e
+# CHECK: ext v0.8b, v1.8b, v2.8b, #0x3
+# CHECK: ext v0.16b, v1.16b, v2.16b, #0x3
+
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: uzp1	v1.8b, v1.8b, v2.8b
+# CHECK: uzp1	v2.16b, v1.16b, v2.16b
+# CHECK: uzp1	v3.4h, v1.4h, v2.4h
+# CHECK: uzp1	v4.8h, v1.8h, v2.8h
+# CHECK: uzp1	v5.2s, v1.2s, v2.2s
+# CHECK: uzp1	v6.4s, v1.4s, v2.4s
+# CHECK: uzp1	v7.2d, v1.2d, v2.2d
+0x21,0x18,0x02,0x0e
+0x22,0x18,0x02,0x4e
+0x23,0x18,0x42,0x0e
+0x24,0x18,0x42,0x4e
+0x25,0x18,0x82,0x0e
+0x26,0x18,0x82,0x4e
+0x27,0x18,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: trn1	v8.8b, v1.8b, v2.8b
+# CHECK: trn1	v9.16b, v1.16b, v2.16b
+# CHECK: trn1	v10.4h, v1.4h, v2.4h
+# CHECK: trn1	v27.8h, v7.8h, v2.8h
+# CHECK: trn1	v12.2s, v7.2s, v2.2s
+# CHECK: trn1	v29.4s, v6.4s, v2.4s
+# CHECK: trn1	v14.2d, v6.2d, v2.2d
+0x28,0x28,0x02,0x0e
+0x29,0x28,0x02,0x4e
+0x2a,0x28,0x42,0x0e
+0xfb,0x28,0x42,0x4e
+0xec,0x28,0x82,0x0e
+0xdd,0x28,0x82,0x4e
+0xce,0x28,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get primary result
+#----------------------------------------------------------------------
+# CHECK: zip1	v31.8b, v5.8b, v2.8b
+# CHECK: zip1	v0.16b, v5.16b, v2.16b
+# CHECK: zip1	v17.4h, v4.4h, v2.4h
+# CHECK: zip1	v2.8h, v4.8h, v2.8h
+# CHECK: zip1	v19.2s, v3.2s, v2.2s
+# CHECK: zip1	v4.4s, v3.4s, v2.4s
+# CHECK: zip1	v21.2d, v2.2d, v2.2d
+0xbf,0x38,0x02,0x0e
+0xa0,0x38,0x02,0x4e
+0x91,0x38,0x42,0x0e
+0x82,0x38,0x42,0x4e
+0x73,0x38,0x82,0x0e
+0x64,0x38,0x82,0x4e
+0x55,0x38,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# unzip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: uzp2	v6.8b, v2.8b, v2.8b
+# CHECK: uzp2	v23.16b, v1.16b, v2.16b
+# CHECK: uzp2	v8.4h, v1.4h, v2.4h
+# CHECK: uzp2	v25.8h, v0.8h, v2.8h
+# CHECK: uzp2	v10.2s, v0.2s, v2.2s
+# CHECK: uzp2	v27.4s, v7.4s, v2.4s
+# CHECK: uzp2	v12.2d, v7.2d, v2.2d
+0x46,0x58,0x02,0x0e
+0x37,0x58,0x02,0x4e
+0x28,0x58,0x42,0x0e
+0x19,0x58,0x42,0x4e
+0x0a,0x58,0x82,0x0e
+0xfb,0x58,0x82,0x4e
+0xec,0x58,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# transpose with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: trn2	v29.8b, v6.8b, v2.8b
+# CHECK: trn2	v14.16b, v6.16b, v2.16b
+# CHECK: trn2	v31.4h, v5.4h, v2.4h
+# CHECK: trn2	v0.8h, v5.8h, v2.8h
+# CHECK: trn2	v17.2s, v4.2s, v2.2s
+# CHECK: trn2	v2.4s, v4.4s, v2.4s
+# CHECK: trn2	v19.2d, v3.2d, v2.2d
+0xdd,0x68,0x02,0x0e
+0xce,0x68,0x02,0x4e
+0xbf,0x68,0x42,0x0e
+0xa0,0x68,0x42,0x4e
+0x91,0x68,0x82,0x0e
+0x82,0x68,0x82,0x4e
+0x73,0x68,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# zip with 3 same vectors to get secondary result
+#----------------------------------------------------------------------
+# CHECK: zip2	v4.8b, v3.8b, v2.8b
+# CHECK: zip2	v21.16b, v2.16b, v2.16b
+# CHECK: zip2	v6.4h, v2.4h, v2.4h
+# CHECK: zip2	v23.8h, v1.8h, v2.8h
+# CHECK: zip2	v8.2s, v1.2s, v2.2s
+# CHECK: zip2	v25.4s, v0.4s, v2.4s
+# CHECK: zip2	v10.2d, v0.2d, v2.2d
+0x64,0x78,0x02,0x0e
+0x55,0x78,0x02,0x4e
+0x46,0x78,0x42,0x0e
+0x37,0x78,0x42,0x4e
+0x28,0x78,0x82,0x0e
+0x19,0x78,0x82,0x4e
+0x0a,0x78,0xc2,0x4e
+
+#----------------------------------------------------------------------
+# Scalar Floating Point  multiply (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmul s0, s1, v1.s[0]
+# CHECK: fmul s0, s1, v1.s[3]
+# CHECK: fmul d0, d1, v1.d[0]
+# CHECK: fmul d0, d1, v1.d[1]
+# CHECK: fmul d15, d15, v15.d[1]
+0x20 0x90 0x81 0x5f
+0x20 0x98 0xa1 0x5f
+0x20 0x90 0xc1 0x5f
+0x20 0x98 0xc1 0x5f
+0xef 0x99 0xcf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point  multiply extended (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmulx s3, s5, v7.s[0]
+# CHECK: fmulx s3, s5, v7.s[3]
+# CHECK: fmulx s3, s5, v15.s[3]
+# CHECK: fmulx d0, d4, v8.d[0]
+# CHECK: fmulx d0, d4, v8.d[1]
+0xa3 0x90 0x87 0x7f
+0xa3 0x98 0xa7 0x7f
+0xa3 0x98 0xaf 0x7f
+0x80 0x90 0xc8 0x7f
+0x80 0x98 0xc8 0x7f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point fused multiply-add (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmla s0, s1, v1.s[0]
+# CHECK: fmla s0, s1, v1.s[3]
+# CHECK: fmla d0, d1, v1.d[0]
+# CHECK: fmla d0, d1, v1.d[1]
+# CHECK: fmla d15, d15, v15.d[1]
+0x20 0x10 0x81 0x5f
+0x20 0x18 0xa1 0x5f
+0x20 0x10 0xc1 0x5f
+0x20 0x18 0xc1 0x5f
+0xef 0x19 0xcf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Floating Point fused multiply-sub (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: fmls s3, s5, v7.s[0]
+# CHECK: fmls s3, s5, v7.s[3]
+# CHECK: fmls s3, s5, v15.s[3]
+# CHECK: fmls d0, d4, v8.d[0]
+# CHECK: fmls d0, d4, v8.d[1]
+0xa3 0x50 0x87 0x5f
+0xa3 0x58 0xa7 0x5f
+0xa3 0x58 0xaf 0x5f
+0x80 0x50 0xc8 0x5f
+0x80 0x58 0xc8 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling
+# multiply-add long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmlal s0, h0, v0.h[0]
+# CHECK: sqdmlal s0, h0, v0.h[1]
+# CHECK: sqdmlal s0, h0, v0.h[2]
+# CHECK: sqdmlal s0, h0, v0.h[3]
+# CHECK: sqdmlal s0, h0, v0.h[4]
+# CHECK: sqdmlal s0, h0, v0.h[5]
+# CHECK: sqdmlal s0, h0, v0.h[6]
+# CHECK: sqdmlal s0, h0, v0.h[7]
+# CHECK: sqdmlal d8, s9, v15.s[0]
+# CHECK: sqdmlal d8, s9, v15.s[1]
+# CHECK: sqdmlal d8, s9, v15.s[2]
+# CHECK: sqdmlal d8, s9, v15.s[3]
+0x00 0x30 0x40 0x5f
+0x00 0x30 0x50 0x5f
+0x00 0x30 0x60 0x5f
+0x00 0x30 0x70 0x5f
+0x00 0x38 0x40 0x5f
+0x00 0x38 0x50 0x5f
+0x00 0x38 0x60 0x5f
+0x00 0x38 0x70 0x5f
+0x28 0x31 0x8f 0x5f
+0x28 0x31 0xaf 0x5f
+0x28 0x39 0x8f 0x5f
+0x28 0x39 0xaf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling
+# multiply-sub long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmlsl s0, h0, v0.h[0]
+# CHECK: sqdmlsl s0, h0, v0.h[1]
+# CHECK: sqdmlsl s0, h0, v0.h[2]
+# CHECK: sqdmlsl s0, h0, v0.h[3]
+# CHECK: sqdmlsl s0, h0, v0.h[4]
+# CHECK: sqdmlsl s0, h0, v0.h[5]
+# CHECK: sqdmlsl s0, h0, v0.h[6]
+# CHECK: sqdmlsl s0, h0, v0.h[7]
+# CHECK: sqdmlsl d8, s9, v15.s[0]
+# CHECK: sqdmlsl d8, s9, v15.s[1]
+# CHECK: sqdmlsl d8, s9, v15.s[2]
+# CHECK: sqdmlsl d8, s9, v15.s[3]
+0x00 0x70 0x40 0x5f
+0x00 0x70 0x50 0x5f
+0x00 0x70 0x60 0x5f
+0x00 0x70 0x70 0x5f
+0x00 0x78 0x40 0x5f
+0x00 0x78 0x50 0x5f
+0x00 0x78 0x60 0x5f
+0x00 0x78 0x70 0x5f
+0x28 0x71 0x8f 0x5f
+0x28 0x71 0xaf 0x5f
+0x28 0x79 0x8f 0x5f
+0x28 0x79 0xaf 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling multiply long (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmull s1, h1, v1.h[0]
+# CHECK: sqdmull s1, h1, v1.h[1]
+# CHECK: sqdmull s1, h1, v1.h[2]
+# CHECK: sqdmull s1, h1, v1.h[3]
+# CHECK: sqdmull s1, h1, v1.h[4]
+# CHECK: sqdmull s1, h1, v1.h[5]
+# CHECK: sqdmull s1, h1, v1.h[6]
+# CHECK: sqdmull s1, h1, v1.h[7]
+# CHECK: sqdmull d1, s1, v4.s[0]
+# CHECK: sqdmull d1, s1, v4.s[1]
+# CHECK: sqdmull d1, s1, v4.s[2]
+# CHECK: sqdmull d1, s1, v4.s[3]
+0x21 0xb0 0x41 0x5f
+0x21 0xb0 0x51 0x5f
+0x21 0xb0 0x61 0x5f
+0x21 0xb0 0x71 0x5f
+0x21 0xb8 0x41 0x5f
+0x21 0xb8 0x51 0x5f
+0x21 0xb8 0x61 0x5f
+0x21 0xb8 0x71 0x5f
+0x21 0xb0 0x84 0x5f
+0x21 0xb0 0xa4 0x5f
+0x21 0xb8 0x84 0x5f
+0x21 0xb8 0xa4 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating doubling multiply returning
+# high half (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqdmulh h7, h1, v14.h[0]
+# CHECK: sqdmulh h7, h15, v8.h[1]
+# CHECK: sqdmulh h7, h15, v8.h[2]
+# CHECK: sqdmulh h7, h15, v8.h[3]
+# CHECK: sqdmulh h7, h15, v8.h[4]
+# CHECK: sqdmulh h7, h15, v8.h[5]
+# CHECK: sqdmulh h7, h15, v8.h[6]
+# CHECK: sqdmulh h7, h15, v8.h[7]
+# CHECK: sqdmulh s15, s3, v4.s[0]
+# CHECK: sqdmulh s15, s14, v16.s[1]
+# CHECK: sqdmulh s15, s15, v16.s[2]
+# CHECK: sqdmulh s15, s16, v17.s[3]
+0x27 0xc0 0x4e 0x5f
+0xe7 0xc1 0x58 0x5f
+0xe7 0xc1 0x68 0x5f
+0xe7 0xc1 0x78 0x5f
+0xe7 0xc9 0x48 0x5f
+0xe7 0xc9 0x58 0x5f
+0xe7 0xc9 0x68 0x5f
+0xe7 0xc9 0x78 0x5f
+0x6f 0xc0 0x84 0x5f
+0xcf 0xc1 0xb0 0x5f
+0xef 0xc9 0x90 0x5f
+0x0f 0xca 0xb1 0x5f
+
+#----------------------------------------------------------------------
+# Scalar Signed saturating rounding doubling multiply
+# returning high half (scalar, by element)
+#----------------------------------------------------------------------
+# CHECK: sqrdmulh h7, h1, v14.h[0]
+# CHECK: sqrdmulh h7, h15, v8.h[1]
+# CHECK: sqrdmulh h7, h15, v8.h[2]
+# CHECK: sqrdmulh h7, h15, v8.h[3]
+# CHECK: sqrdmulh h7, h15, v8.h[4]
+# CHECK: sqrdmulh h7, h15, v8.h[5]
+# CHECK: sqrdmulh h7, h15, v8.h[6]
+# CHECK: sqrdmulh h7, h15, v8.h[7]
+# CHECK: sqrdmulh s15, s3, v4.s[0]
+# CHECK: sqrdmulh s15, s14, v16.s[1]
+# CHECK: sqrdmulh s15, s15, v16.s[2]
+# CHECK: sqrdmulh s15, s16, v17.s[3]
+0x27 0xd0 0x4e 0x5f
+0xe7 0xd1 0x58 0x5f
+0xe7 0xd1 0x68 0x5f
+0xe7 0xd1 0x78 0x5f
+0xe7 0xd9 0x48 0x5f
+0xe7 0xd9 0x58 0x5f
+0xe7 0xd9 0x68 0x5f
+0xe7 0xd9 0x78 0x5f
+0x6f 0xd0 0x84 0x5f
+0xcf 0xd1 0xb0 0x5f
+0xef 0xd9 0x90 0x5f
+0x0f 0xda 0xb1 0x5f
+
+#----------------------------------------------------------------------
+#Duplicate element (scalar)
+#----------------------------------------------------------------------
+# CHECK: dup b0, v0.b[15]
+# CHECK: dup h2, v31.h[5]
+# CHECK: dup s17, v2.s[2]
+# CHECK: dup d6, v12.d[1]
+0x00 0x04 0x1f 0x5e
+0xe2 0x07 0x16 0x5e
+0x51 0x04 0x14 0x5e
+0x86 0x05 0x18 0x5e
+
+#----------------------------------------------------------------------
+# Table look up
+#----------------------------------------------------------------------
+0x20,0x00,0x02,0x0e
+0xf0,0x23,0x02,0x0e
+0x20,0x40,0x02,0x0e
+0xf0,0x62,0x02,0x0e
+# CHECK: tbl v0.8b, {v1.16b}, v2.8b
+# CHECK: tbl v16.8b, {v31.16b, v0.16b}, v2.8b
+# CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+# CHECK: tbl v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
+
+0x20,0x00,0x02,0x4e
+0xf0,0x23,0x02,0x4e
+0x20,0x40,0x02,0x4e
+0xe0,0x63,0x02,0x4e
+# CHECK: tbl v0.16b, {v1.16b}, v2.16b
+# CHECK: tbl v16.16b, {v31.16b, v0.16b}, v2.16b
+# CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+# CHECK: tbl v0.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
+
+0x20,0x10,0x02,0x0e
+0xf0,0x33,0x02,0x0e
+0x20,0x50,0x02,0x0e
+0xf0,0x72,0x02,0x0e
+# CHECK: tbx v0.8b, {v1.16b}, v2.8b
+# CHECK: tbx v16.8b, {v31.16b, v0.16b}, v2.8b
+# CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
+# CHECK: tbx v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
+
+0x20,0x10,0x02,0x4e
+0xf0,0x33,0x02,0x4e
+0x20,0x50,0x02,0x4e
+0xf0,0x73,0x02,0x4e
+# CHECK: tbx v0.16b, {v1.16b}, v2.16b
+# CHECK: tbx v16.16b, {v31.16b, v0.16b}, v2.16b
+# CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
+# CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+# Odd
+#----------------------------------------------------------------------
+# CHECK: fcvtxn s22, d13
+0xb6,0x69,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+# With Ties To Away
+#----------------------------------------------------------------------
+# CHECK: fcvtas s12, s13
+# CHECK: fcvtas d21, d14
+
+0xac,0xc9,0x21,0x5e
+0xd5,0xc9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding To
+# Nearest With Ties To Away
+#----------------------------------------------------------------------
+# CHECK: fcvtau s12, s13
+# CHECK: fcvtau d21, d14
+0xac,0xc9,0x21,0x7e
+0xd5,0xc9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward
+# Minus Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtms s22, s13
+# CHECK: fcvtms d21, d14
+0xb6,0xb9,0x21,0x5e
+0xd5,0xb9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+# Minus Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtmu s12, s13
+# CHECK: fcvtmu d21, d14
+0xac,0xb9,0x21,0x7e
+0xd5,0xb9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+# With Ties To Even
+#----------------------------------------------------------------------
+
+# CHECK: fcvtns s22, s13
+# CHECK: fcvtns d21, d14
+
+0xb6,0xa9,0x21,0x5e
+0xd5,0xa9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding To
+# Nearest With Ties To Even
+#----------------------------------------------------------------------
+
+# CHECK: fcvtnu s12, s13
+# CHECK: fcvtnu d21, d14
+0xac,0xa9,0x21,0x7e
+0xd5,0xa9,0x61,0x7e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward
+# Positive Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtps s22, s13
+# CHECK: fcvtps d21, d14
+0xb6,0xa9,0xa1,0x5e
+0xd5,0xa9,0xe1,0x5e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+# Positive Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtpu s12, s13
+# CHECK: fcvtpu d21, d14
+0xac,0xa9,0xa1,0x7e
+0xd5,0xa9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+#----------------------------------------------------------------------
+# CHECK: fcvtzs s12, s13
+# CHECK: fcvtzs d21, d14
+0xac,0xb9,0xa1,0x5e
+0xd5,0xb9,0xe1,0x5e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+# Zero
+#----------------------------------------------------------------------
+# CHECK: fcvtzu s12, s13
+# CHECK: fcvtzu d21, d14
+0xac,0xb9,0xa1,0x7e
+0xd5,0xb9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Absolute Difference
+#----------------------------------------------------------------------
+# CHECK: fabd s29, s24, s20
+# CHECK: fabd d29, d24, d20
+0x1d,0xd7,0xb4,0x7e
+0x1d,0xd7,0xf4,0x7e
diff --git a/test/MC/Disassembler/ARM/arm-LDREXD-reencoding.txt b/test/MC/Disassembler/ARM/arm-LDREXD-reencoding.txt
new file mode 100644
index 000000000000..e73d4ce88e59
--- /dev/null
+++ b/test/MC/Disassembler/ARM/arm-LDREXD-reencoding.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -triple armv7 -show-encoding -disassemble < %s | FileCheck %s
+
+0x9f 0x0f 0xb0 0xe1
+0x9f 0xcf 0xb1 0xe1
+0x9f 0xcf 0xb3 0xe1
+0x9f 0x8f 0xbd 0xe1
+0x9f 0xcf 0xbe 0xe1
+
+# CHECK: ldrexd	r0, r1, [r0]            @ encoding: [0x9f,0x0f,0xb0,0xe1]
+# CHECK: ldrexd	r12, sp, [r1]           @ encoding: [0x9f,0xcf,0xb1,0xe1]
+# CHECK: ldrexd	r12, sp, [r3]           @ encoding: [0x9f,0xcf,0xb3,0xe1]
+# CHECK: ldrexd	r8, r9, [sp]            @ encoding: [0x9f,0x8f,0xbd,0xe1]
+# CHECK: ldrexd	r12, sp, [lr]           @ encoding: [0x9f,0xcf,0xbe,0xe1]
+
diff --git a/test/MC/Disassembler/ARM/arm-STREXD-reencoding.txt b/test/MC/Disassembler/ARM/arm-STREXD-reencoding.txt
new file mode 100644
index 000000000000..27944ac8bc54
--- /dev/null
+++ b/test/MC/Disassembler/ARM/arm-STREXD-reencoding.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -triple armv7 -show-encoding -disassemble < %s | FileCheck %s
+
+0x92 0x1f 0xa0 0xe1
+0x90 0x4f 0xa3 0xe1
+0x92 0xdf 0xa4 0xe1
+0x90 0xaf 0xa6 0xe1
+0x9c 0x5f 0xa8 0xe1
+
+# CHECK: strexd	r1, r2, r3, [r0]        @ encoding: [0x92,0x1f,0xa0,0xe1]
+# CHECK: strexd	r4, r0, r1, [r3]        @ encoding: [0x90,0x4f,0xa3,0xe1]
+# CHECK: strexd	sp, r2, r3, [r4]        @ encoding: [0x92,0xdf,0xa4,0xe1]
+# CHECK: strexd	r10, r0, r1, [r6]       @ encoding: [0x90,0xaf,0xa6,0xe1]
+# CHECK: strexd	r5, r12, sp, [r8]       @ encoding: [0x9c,0x5f,0xa8,0xe1]
+
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index 98daaa7649aa..acc2d9fec609 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -362,7 +362,3 @@
 
 # CHECK: ldmgt	sp!, {r9}
 0x00 0x02 0xbd 0xc8
-
-# CHECK: cdp2	p10, #0, c6, c12, c0, #7
-0xe0 0x6a 0x0c 0xfe
-
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions-v8.txt b/test/MC/Disassembler/ARM/basic-arm-instructions-v8.txt
new file mode 100644
index 000000000000..d9286bffe0a5
--- /dev/null
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions-v8.txt
@@ -0,0 +1,58 @@
+# RUN: llvm-mc -disassemble -triple armv8 -mattr=+db -show-encoding < %s | FileCheck %s
+
+# New v8 ARM instructions
+
+# HLT
+
+0x70 0x00 0x00 0xe1
+# CHECK: hlt #0
+
+0x7f 0xff 0x0f 0xe1
+# CHECK: hlt #65535
+
+0x59 0xf0 0x7f 0xf5
+0x51 0xf0 0x7f 0xf5
+0x55 0xf0 0x7f 0xf5
+0x5d 0xf0 0x7f 0xf5
+# CHECK: dmb ishld
+# CHECK: dmb oshld
+# CHECK: dmb nshld
+# CHECK: dmb ld
+
+0x05 0xf0 0x20 0xe3
+# CHECK: sevl
+
+
+# These are the only coprocessor instructions that remain defined in ARMv8
+# (The operations on p10/p11 disassemble into FP/NEON instructions)
+
+0x10 0x0e 0x00 0xee
+# CHECK: mcr p14
+
+0x10 0x0f 0x00 0xee
+# CHECK: mcr p15
+
+0x10 0x0e 0x10 0xee
+# CHECK: mrc p14
+
+0x10 0x0f 0x10 0xee
+# CHECK: mrc p15
+
+0x00 0x0e 0x40 0xec
+# CHECK: mcrr p14
+
+0x00 0x0f 0x40 0xec
+# CHECK: mcrr p15
+
+0x00 0x0e 0x50 0xec
+# CHECK: mrrc p14
+
+0x00 0x0f 0x50 0xec
+# CHECK: mrrc p15
+
+0x00 0x0e 0x80 0xec
+# CHECK: stc p14
+
+0x00 0x0e 0x90 0xec
+# CHECK: ldc p14
+
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
index 9f63e1e914ff..8bcf4e6e3faa 100644
--- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
@@ -454,10 +454,14 @@
 # CHECK: cpsie  aif
 # CHECK: cps  #15
 # CHECK: cpsid  if, #10
+# CHECK: cpsid  af, #17
+# CHECK: cpsie  f, #26
 
 0xc0 0x01 0x08 0xf1
 0x0f 0x00 0x02 0xf1
 0xca 0x00 0x0e 0xf1
+0x51 0x01 0x0e 0xf1
+0x5a 0x00 0x0a 0xf1
 
 
 #------------------------------------------------------------------------------
@@ -609,8 +613,10 @@
 # ISB
 #------------------------------------------------------------------------------
 # CHECK: isb sy
+# CHECK: isb #0xa
 
 0x6f 0xf0 0x7f 0xf5
+0x6a 0xf0 0x7f 0xf5
 
 
 
@@ -757,10 +763,14 @@
 # MRC/MRC2
 #------------------------------------------------------------------------------
 # CHECK: mrc  p14, #0, r1, c1, c2, #4
+# CHECK: mrc  p15, #7, apsr_nzcv, c15, c6, #6
 # CHECK: mrc2  p14, #0, r1, c1, c2, #4
+# CHECK: mrc2  p9, #7, apsr_nzcv, c15, c0, #1
 
 0x92 0x1e 0x11 0xee
+0xd6 0xff 0xff 0xee
 0x92 0x1e 0x11 0xfe
+0x30 0xf9 0xff 0xfe
 
 #------------------------------------------------------------------------------
 # MRRC/MRRC2
@@ -1297,6 +1307,29 @@
 0x77 0x69 0xe6 0xe0
 
 #------------------------------------------------------------------------------
+# RRX/RRXS
+#------------------------------------------------------------------------------
+# CHECK: rrx	r0, r1
+# CHECK: rrx	sp, pc
+# CHECK: rrx	pc, lr
+# CHECK: rrx	lr, sp
+
+0x61 0x00 0xa0 0xe1
+0x6f 0xd0 0xa0 0xe1
+0x6e 0xf0 0xa0 0xe1
+0x6d 0xe0 0xa0 0xe1
+
+# CHECK: rrxs	r0, r1
+# CHECK: rrxs	sp, pc
+# CHECK: rrxs	pc, lr
+# CHECK: rrxs	lr, sp
+
+0x61 0x00 0xb0 0xe1
+0x6f 0xd0 0xb0 0xe1
+0x6e 0xf0 0xb0 0xe1
+0x6d 0xe0 0xb0 0xe1
+
+#------------------------------------------------------------------------------
 # SADD16/SADD8
 #------------------------------------------------------------------------------
 # CHECK: sadd16 r1, r2, r3
@@ -2387,6 +2420,7 @@
 # CHECK: wfilt
 # CHECK: yield
 # CHECK: yieldne
+# CHECK: hint #5
 
 0x02 0xf0 0x20 0xe3
 0x02 0xf0 0x20 0x83
@@ -2394,3 +2428,4 @@
 0x03 0xf0 0x20 0xb3
 0x01 0xf0 0x20 0xe3
 0x01 0xf0 0x20 0x13
+0x05 0xf0 0x20 0xe3
diff --git a/test/MC/Disassembler/ARM/crc32-thumb.txt b/test/MC/Disassembler/ARM/crc32-thumb.txt
new file mode 100644
index 000000000000..2f83b58fd484
--- /dev/null
+++ b/test/MC/Disassembler/ARM/crc32-thumb.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv8 2>&1 | FileCheck %s
+
+# CHECK:  crc32b  r0, r1, r2
+# CHECK:  crc32h  r0, r1, r2
+# CHECK:  crc32w  r0, r1, r2
+# CHECK:  crc32cb r0, r1, r2
+# CHECK:  crc32ch r0, r1, r2
+# CHECK:  crc32cw r0, r1, r2
+
+0xc1 0xfa 0x82 0xf0
+0xc1 0xfa 0x92 0xf0
+0xc1 0xfa 0xa2 0xf0
+0xd1 0xfa 0x82 0xf0
+0xd1 0xfa 0x92 0xf0
+0xd1 0xfa 0xa2 0xf0
diff --git a/test/MC/Disassembler/ARM/crc32.txt b/test/MC/Disassembler/ARM/crc32.txt
new file mode 100644
index 000000000000..17bb03220ef0
--- /dev/null
+++ b/test/MC/Disassembler/ARM/crc32.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=armv8 2>&1 | FileCheck %s
+
+# CHECK:  crc32b  r0, r1, r2
+# CHECK:  crc32h  r0, r1, r2
+# CHECK:  crc32w  r0, r1, r2
+# CHECK:  crc32cb r0, r1, r2
+# CHECK:  crc32ch r0, r1, r2
+# CHECK:  crc32cw r0, r1, r2
+
+0x42 0x00 0x01 0xe1
+0x42 0x00 0x21 0xe1
+0x42 0x00 0x41 0xe1
+0x42 0x02 0x01 0xe1
+0x42 0x02 0x21 0xe1
+0x42 0x02 0x41 0xe1
diff --git a/test/MC/Disassembler/ARM/fp-armv8.txt b/test/MC/Disassembler/ARM/fp-armv8.txt
new file mode 100644
index 000000000000..46a26f5d6dc8
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fp-armv8.txt
@@ -0,0 +1,160 @@
+# RUN: llvm-mc -disassemble -triple armv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
+
+0xe0 0x3b 0xb2 0xee
+# CHECK: vcvtt.f64.f16 d3, s1
+
+0xcc 0x2b 0xf3 0xee
+# CHECK: vcvtt.f16.f64 s5, d12
+
+0x60 0x3b 0xb2 0xee
+# CHECK: vcvtb.f64.f16 d3, s1
+
+0x41 0x2b 0xb3 0xee
+# CHECK: vcvtb.f16.f64 s4, d1
+
+0xe0 0x3b 0xb2 0xae
+# CHECK: vcvttge.f64.f16 d3, s1
+
+0xcc 0x2b 0xf3 0xce
+# CHECK: vcvttgt.f16.f64 s5, d12
+
+0x60 0x3b 0xb2 0x0e
+# CHECK: vcvtbeq.f64.f16 d3, s1
+
+0x41 0x2b 0xb3 0xbe
+# CHECK: vcvtblt.f16.f64 s4, d1
+
+
+0xe1 0x1a 0xbc 0xfe
+# CHECK: vcvta.s32.f32 s2, s3
+
+0xc3 0x1b 0xbc 0xfe
+# CHECK: vcvta.s32.f64 s2, d3
+
+0xeb 0x3a 0xbd 0xfe
+# CHECK: vcvtn.s32.f32 s6, s23
+
+0xe7 0x3b 0xbd 0xfe
+# CHECK: vcvtn.s32.f64 s6, d23
+
+0xc2 0x0a 0xbe 0xfe
+# CHECK: vcvtp.s32.f32 s0, s4
+
+0xc4 0x0b 0xbe 0xfe
+# CHECK: vcvtp.s32.f64 s0, d4
+
+0xc4 0x8a 0xff 0xfe
+# CHECK: vcvtm.s32.f32 s17, s8
+
+0xc8 0x8b 0xff 0xfe
+# CHECK: vcvtm.s32.f64 s17, d8
+
+0x61 0x1a 0xbc 0xfe
+# CHECK: vcvta.u32.f32 s2, s3
+
+0x43 0x1b 0xbc 0xfe
+# CHECK: vcvta.u32.f64 s2, d3
+
+0x6b 0x3a 0xbd 0xfe
+# CHECK: vcvtn.u32.f32 s6, s23
+
+0x67 0x3b 0xbd 0xfe
+# CHECK: vcvtn.u32.f64 s6, d23
+
+0x42 0x0a 0xbe 0xfe
+# CHECK: vcvtp.u32.f32 s0, s4
+
+0x44 0x0b 0xbe 0xfe
+# CHECK: vcvtp.u32.f64 s0, d4
+
+0x44 0x8a 0xff 0xfe
+# CHECK: vcvtm.u32.f32 s17, s8
+
+0x48 0x8b 0xff 0xfe
+# CHECK: vcvtm.u32.f64 s17, d8
+
+
+0xab 0x2a 0x20 0xfe
+# CHECK: vselge.f32 s4, s1, s23
+
+0xa7 0xeb 0x6f 0xfe
+# CHECK: vselge.f64 d30, d31, d23
+
+0x80 0x0a 0x30 0xfe
+# CHECK: vselgt.f32 s0, s1, s0
+
+0x24 0x5b 0x3a 0xfe
+# CHECK: vselgt.f64 d5, d10, d20
+
+0x2b 0xfa 0x0e 0xfe
+# CHECK: vseleq.f32 s30, s28, s23
+
+0x08 0x2b 0x04 0xfe
+# CHECK: vseleq.f64 d2, d4, d8
+
+0x07 0xaa 0x58 0xfe
+# CHECK: vselvs.f32 s21, s16, s14
+
+0x2f 0x0b 0x11 0xfe
+# CHECK: vselvs.f64 d0, d1, d31
+
+
+0x00 0x2a 0xc6 0xfe
+# CHECK: vmaxnm.f32 s5, s12, s0
+
+0xae 0x5b 0x86 0xfe
+# CHECK: vmaxnm.f64 d5, d22, d30
+
+0x46 0x0a 0x80 0xfe
+# CHECK: vminnm.f32 s0, s0, s12
+
+0x49 0x4b 0x86 0xfe
+# CHECK: vminnm.f64 d4, d6, d9
+
+
+0xcc 0x3b 0xb6 0xae
+# CHECK: vrintzge.f64 d3, d12
+
+0xcc 0x1a 0xf6 0xee
+# CHECK: vrintz.f32 s3, s24
+
+0x40 0x5b 0xb6 0xbe
+# CHECK: vrintrlt.f64 d5, d0
+
+0x64 0x0a 0xb6 0xee
+# CHECK: vrintr.f32 s0, s9
+
+0x6e 0xcb 0xf7 0x0e
+# CHECK: vrintxeq.f64 d28, d30
+
+0x47 0x5a 0xb7 0x6e
+# CHECK: vrintxvs.f32 s10, s14
+
+0x44 0x3b 0xb8 0xfe
+# CHECK: vrinta.f64 d3, d4
+
+0x60 0x6a 0xb8 0xfe
+# CHECK: vrinta.f32 s12, s1
+
+0x44 0x3b 0xb9 0xfe
+# CHECK: vrintn.f64 d3, d4
+
+0x60 0x6a 0xb9 0xfe
+# CHECK: vrintn.f32 s12, s1
+
+0x44 0x3b 0xba 0xfe
+# CHECK: vrintp.f64 d3, d4
+
+0x60 0x6a 0xba 0xfe
+# CHECK: vrintp.f32 s12, s1
+
+0x44 0x3b 0xbb 0xfe
+# CHECK: vrintm.f64 d3, d4
+
+0x60 0x6a 0xbb 0xfe
+# CHECK: vrintm.f32 s12, s1
+
+
+0x10 0xa 0xf5 0xee
+# CHECK: vmrs r0, mvfr2
+
diff --git a/test/MC/Disassembler/ARM/fp-encoding.txt b/test/MC/Disassembler/ARM/fp-encoding.txt
index 8dedf808c180..196bf44d4af3 100644
--- a/test/MC/Disassembler/ARM/fp-encoding.txt
+++ b/test/MC/Disassembler/ARM/fp-encoding.txt
@@ -117,18 +117,26 @@
 # CHECK: vmoveq  s0, r1
 
 0x10 0x0a 0xf1 0xee
-# CHECK: vmrs    r0, fpscr
+# CHECK: vmrs  r0, fpscr
 0x10 0x0a 0xf8 0xee
 # CHECK: vmrs  r0, fpexc
 0x10 0x0a 0xf0 0xee
 # CHECK: vmrs  r0, fpsid
+0x10 0x1a 0xf9 0xee
+# CHECK: vmrs  r1, fpinst
+0x10 0x8a 0xfa 0xee
+# CHECK: vmrs  r8, fpinst2
 
 0x10 0x0a 0xe1 0xee
-# CHECK: vmsr    fpscr, r0
+# CHECK: vmsr  fpscr, r0
 0x10 0x0a 0xe8 0xee
 # CHECK: vmsr  fpexc, r0
 0x10 0x0a 0xe0 0xee
 # CHECK: vmsr  fpsid, r0
+0x10 0x3a 0xe9 0xee
+# CHECK: vmsr  fpinst, r3
+0x10 0x4a 0xea 0xee
+# CHECK: vmsr fpinst2, r4
 
 0x10 0x0a 0x00 0xee
 0x90 0x1a 0x00 0xee
@@ -221,6 +229,20 @@
 # CHECK: vldmdbeq r5!, {s21, s22, s23}
 # CHECK: vldmdbeq r7!, {d14, d15, d16, d17}
 
+0x0d 0x4b 0x96 0x0c
+0x0f 0x3b 0xb7 0x0c
+0x09 0x1b 0x38 0xed
+# CHECK: fldmiaxeq r6, {d4, d5, d6, d7, d8, d9}
+# CHECK: fldmiaxeq r7!, {d3, d4, d5, d6, d7, d8, d9}
+# CHECK: fldmdbx   r8!, {d1, d2, d3, d4}
+
+0x07 0x2b 0x83 0xec
+0x05 0x5b 0xa3 0x0c
+0x0f 0x3b 0x20 0x1d
+# CHECK: fstmiax   r3, {d2, d3, d4}
+# CHECK: fstmiaxeq r3!, {d5, d6}
+# CHECK: fstmdbxne r0!, {d3, d4, d5, d6, d7, d8, d9}
+
 0x04 0x7a 0xa6 0x0c
 0x0c 0xfb 0xa4 0x0c
 0x03 0xaa 0xf8 0x0c
diff --git a/test/MC/Disassembler/ARM/invalid-BFI-arm.txt b/test/MC/Disassembler/ARM/invalid-BFI-arm.txt
deleted file mode 100644
index f7acce9fc015..000000000000
--- a/test/MC/Disassembler/ARM/invalid-BFI-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=60 Name=BFI Format=ARM_FORMAT_DPFRM(4)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 1| 0: 1: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# if d == 15 then UNPREDICTABLE;
-0x16 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt
deleted file mode 100644
index 356c376fedea..000000000000
--- a/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=2249 Name=tBcc Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 1: 1: 0: 1| 1: 1: 1: 0| 0: 1: 1: 0| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# if cond = '1110' then UNDEFINED
-0x6f 0xde
diff --git a/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt
deleted file mode 100644
index bc8b7e10b91d..000000000000
--- a/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# invalid imod value (0b01)
-0xc0 0x67 0x4 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
deleted file mode 100644
index 842a52b72928..000000000000
--- a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
-
-# invalid (imod, M, iflags) combination
-0x93 0x00 0x02 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt b/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt
deleted file mode 100644
index 83961569bd1a..000000000000
--- a/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=1908 Name=t2DMB Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 0: 1: 1| 1: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 1| 0: 0: 0: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# Inst{3-0} encodes the option: SY, ST, ISH, ISHST, NSH, NSHST, OSH, OSHST.
-# Reject invalid encodings.
-#
-# See also A8.6.42 DSB
-# All other encodings of option are reserved. It is IMPLEMENTATION DEFINED whether options
-# other than SY are implemented. All unsupported and reserved options must execute as a full
-# system DSB operation, but software must not rely on this behavior.
-0xbf 0xf3 0x51 0x8f
diff --git a/test/MC/Disassembler/ARM/invalid-DSB-arm.txt b/test/MC/Disassembler/ARM/invalid-DSB-arm.txt
deleted file mode 100644
index 2c6e6a7fa265..000000000000
--- a/test/MC/Disassembler/ARM/invalid-DSB-arm.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=102 Name=DSB Format=ARM_FORMAT_MISCFRM(26)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 1: 0: 1| 0: 1: 1: 1| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 1: 0: 0| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# Inst{3-0} encodes the option: SY, ST, ISH, ISHST, NSH, NSHST, OSH, OSHST.
-# Reject invalid encodings.
-#
-# See also A8.6.42 DSB
-# All other encodings of option are reserved. It is IMPLEMENTATION DEFINED whether options
-# other than SY are implemented. All unsupported and reserved options must execute as a full
-# system DSB operation, but software must not rely on this behavior.
-0x40 0xf0 0x7f 0xf5
diff --git a/test/MC/Disassembler/ARM/invalid-FSTMX-arm.txt b/test/MC/Disassembler/ARM/invalid-FSTMX-arm.txt
new file mode 100644
index 000000000000..b81b4e8fb129
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-FSTMX-arm.txt
@@ -0,0 +1,8 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7 2>&1 | FileCheck %s -check-prefix=CHECK-WARN
+# RUN: llvm-mc --disassemble %s -triple=armv7 2>&1 | FileCheck %s
+
+# offset=1
+# CHECK-WARN: potentially undefined
+# CHECK-WARN: 0x01 0xdb 0x84 0xec
+# CHECK: fstmiax r4, {d13}
+0x01 0xdb 0x84 0xec
diff --git a/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt b/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt
deleted file mode 100644
index 4297c016e86b..000000000000
--- a/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
-
-# CBZ / CBNZ not allowed in IT block.
-
-0xdb 0xbf 0x42 0xbb
diff --git a/test/MC/Disassembler/ARM/invalid-IT-thumb.txt b/test/MC/Disassembler/ARM/invalid-IT-thumb.txt
deleted file mode 100644
index 1a8ff48cd99d..000000000000
--- a/test/MC/Disassembler/ARM/invalid-IT-thumb.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown 2>&1 | grep "potentially undefined instruction encoding"
-
-0xff 0xbf 0x6b 0x80 0x00 0x75
diff --git a/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt b/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt
deleted file mode 100644
index 6cff09e71960..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=0 Name=PHI Format=(42)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 0: 1| 1: 1: 0: 0| 0: 0: 0: 1| 1: 1: 1: 1| 1: 0: 1: 1| 0: 1: 0: 0| 1: 0: 0: 1| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# The bytes have 0b0000 for P,U,D,W; from A8.6.51, it is undefined.
-0x92 0xb4 0x1f 0xdc
-
diff --git a/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt b/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt
deleted file mode 100644
index 7d8c492649ac..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
-
-# Writeback is not allowed is Rn is in the target register list.
-
-0xb4 0xe8 0x34 0x04
diff --git a/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt
deleted file mode 100644
index 68d22debb0e8..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
-
-# Opcode=140 Name=LDRB_POST Format=ARM_FORMAT_LDFRM(6)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 1: 0: 1| 0: 1: 1: 1| 0: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 1: 0: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# if wback && (n == 15 || n == t) then UNPREDICTABLE
-0x05 0x70 0xd7 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt
deleted file mode 100644
index 4df5309b1366..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=1930 Name=t2LDRD_PRE Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 1: 0: 0: 1| 1: 1: 1: 1| 1: 1: 1: 1| 1: 1: 1: 0| 1: 0: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-# 
-# A8.6.66 LDRD (immediate)
-# if Rn = '1111' then SEE LDRD (literal)
-# A8.6.67 LDRD (literal)
-# Inst{21} = 0
-0xff 0xe9 0x0 0xeb
diff --git a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
deleted file mode 100644
index ecab5a5758e2..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# LDR_PRE/POST has encoding Inst{4} = 0.
-0xde 0x69 0x18 0x46
diff --git a/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt
deleted file mode 100644
index 30cb727ece8f..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding"
-
-# Opcode=165 Name=LDR_PRE Format=ARM_FORMAT_LDFRM(6)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 0: 1: 1| 0: 1: 1: 1| 0: 1: 1: 0| 0: 0: 0: 0| 1: 0: 0: 0| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# if m == 15 then UNPREDICTABLE
-0x8f 0x60 0xb7 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt
deleted file mode 100644
index 7b7286af123f..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# LDR (register) has encoding Inst{4} = 0.
-0xba 0xae 0x9f 0x57
diff --git a/test/MC/Disassembler/ARM/invalid-MCR-arm.txt b/test/MC/Disassembler/ARM/invalid-MCR-arm.txt
deleted file mode 100644
index bb4b06c90a62..000000000000
--- a/test/MC/Disassembler/ARM/invalid-MCR-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=171 Name=MCR Format=ARM_FORMAT_BRFRM(2)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 1: 0| 1: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 0: 0: 0: 1| 1: 0: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C
-0x1b 0x1b 0xa0 0x2e
diff --git a/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt
deleted file mode 100644
index 528563acb3fc..000000000000
--- a/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=185 Name=MOVTi16 Format=ARM_FORMAT_DPFRM(4)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 0: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# if d == 15 then UNPREDICTABLE
-0x00 0xf0 0x41 0xe3
diff --git a/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt
deleted file mode 100644
index 41ec53f3c70b..000000000000
--- a/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=0 Name=PHI Format=(42)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-# To qualify as a MOV (register) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
-# The instruction is UNPREDICTABLE, and is not a valid intruction.
-#
-# See also
-# A8.6.97 MOV (register)
-0x2 0xd0 0xbc 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt
deleted file mode 100644
index e5f2a5ecbc15..000000000000
--- a/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 0: 1| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 0: 1: 0: 0| 0: 0: 1: 0| 1: 0: 0: 1| 0: 0: 1: 1|
-# -------------------------------------------------------------------------------------------------
-# A8.6.89 LSL (register): Inst{7-4} = 0b0001
-0x93 0x42 0xa0 0xd1
diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt
deleted file mode 100644
index 3f4c1e5ff96b..000000000000
--- a/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=0 Name=PHI Format=(42)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 1| 0: 0: 0: 0| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-# To qualify as an LSL (immediate) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
-# The instruction is UNPREDICTABLE, and is not a valid intruction.
-#
-# See also
-# A8.6.88 LSL (immediate)
-# A8.6.98 MOV (shifted register), and
-# I.1 Instruction encoding diagrams and pseudocode
-0x2 0xd1 0xbc 0xf1
-
-
diff --git a/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt b/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt
deleted file mode 100644
index c20ce542a6b2..000000000000
--- a/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s
-
-# CHECK: invalid instruction encoding
-0x00 0x1a 0x50 0xfc
diff --git a/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt b/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt
deleted file mode 100644
index 901667ab9075..000000000000
--- a/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=206 Name=MSRi Format=ARM_FORMAT_BRFRM(2)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 0: 0| 0: 0: 1: 1| 0: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 0| 0: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
-# The hints instructions have more specific encodings, so if mask == 0,
-# we should reject this as an invalid instruction.
-0xa7 0xf1 0x20 0x3
diff --git a/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt b/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt
deleted file mode 100644
index 499aa868f904..000000000000
--- a/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=134 Name=LDMIA Format=ARM_FORMAT_LDSTMULFRM(10)
-# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 1: 0: 0: 0| 1: 0: 0: 1| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 0: 1| 0: 0: 1: 1| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# B6.1.8 RFE has Inst{15-0} as 0x0a00 ==> Not an RFE instruction
-# A8.6.53 LDM/LDMIA/LDMFD is predicated with Inst{31-28} as cond ==> Not an LDMIA instruction
-0x32 0xb1 0x99 0xf8
diff --git a/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt b/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt
deleted file mode 100644
index 7bc97d5260d4..000000000000
--- a/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=271 Name=SBFX Format=ARM_FORMAT_DPFRM(4)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 0: 1: 0| 0: 1: 1: 1| 0: 1: 0: 1| 0: 1: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# if d == 15 || n == 15 then UNPREDICTABLE;
-0x5f 0x54 0xa7 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt b/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt
deleted file mode 100644
index fe4f43aadd7e..000000000000
--- a/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=284 Name=SMLAD Format=ARM_FORMAT_MULFRM(1)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 0: 0: 1| 0: 1: 1: 1| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 1: 0| 1: 0: 0: 0| 0: 0: 0: 1| 1: 0: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.167
-# if d == 15 || n == 15 | m == 15 then UNPREDICTABLE
-0x1b 0x68 0xf 0x97
diff --git a/test/MC/Disassembler/ARM/invalid-SRS-arm.txt b/test/MC/Disassembler/ARM/invalid-SRS-arm.txt
deleted file mode 100644
index eedd05cea624..000000000000
--- a/test/MC/Disassembler/ARM/invalid-SRS-arm.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=0 Name=PHI Format=(42)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 0: 0| 0: 1: 0: 1| 0: 0: 0: 1| 1: 1: 0: 0| 1: 0: 0: 0| 0: 0: 1: 1|
-# -------------------------------------------------------------------------------------------------
-# Unknown format
-#
-# B6.1.10 SRS
-# Inst{19-8} = 0xd05
-# Inst{7-5} = 0b000
-0x83 0x1c 0xc5 0xf8
diff --git a/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt
deleted file mode 100644
index 3d5235de55d7..000000000000
--- a/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=2313 Name=tSTMIA_UPD Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 1: 1: 0: 0| 0: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# if BitCount(registers) < 1 then UNPREDICTABLE
-0x00 0xc7
diff --git a/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt b/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt
deleted file mode 100644
index f67f38e28e02..000000000000
--- a/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=390 Name=SXTBr_rot Format=ARM_FORMAT_EXTFRM(14)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 1: 0: 0| 0: 1: 1: 1| 0: 1: 0: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.223 SXTB
-# if d == 15 || m == 15 then UNPREDICTABLE;
-0x75 0xf4 0xaf 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt b/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt
deleted file mode 100644
index f57c48f0e22b..000000000000
--- a/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=419 Name=UMAAL Format=ARM_FORMAT_MULFRM(1)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 0: 0: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.244 UMAAL
-# if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-0x98 0xbf 0x4f 0xf0
diff --git a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
deleted file mode 100644
index 00b85264686d..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | FileCheck %s
-
-# Opcode=737 Name=VLD1DUPq8_UPD Format=ARM_FORMAT_NLdSt(30)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 1: 0: 0| 1: 0: 1: 0| 0: 0: 0: 0| 0: 0: 1: 1| 1: 1: 0: 0| 0: 0: 1: 1| 1: 1: 0: 1|
-# -------------------------------------------------------------------------------------------------
-# 
-# 'a' == 1 and data_size == 8 is invalid
-0x3d 0x3c 0xa0 0xf4
-# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt
deleted file mode 100644
index 9bb0995ecef8..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
-
-0xa0 0xf9 0x10 0x08
-# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt
deleted file mode 100644
index 58def05f2783..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=871 Name=VLD3DUPd32_UPD Format=ARM_FORMAT_NLdSt(30)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 1: 0: 0| 1: 0: 1: 0| 0: 0: 1: 0| 0: 0: 1: 0| 1: 1: 1: 0| 1: 0: 0: 1| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.315 VLD3 (single 3-element structure to all lanes)
-# The a bit must be encoded as 0.
-0xa2 0xf9 0x92 0x2e
diff --git a/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt
deleted file mode 100644
index 84c98bfbcaf4..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
-
-0xa0 0xf9 0xc0 0x0f
-# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt
deleted file mode 100644
index 9024b09531cf..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
-
-0xa0 0xf9 0x30 0x0b
-# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt
deleted file mode 100644
index 54fcadb2a4f7..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# core registers out of range
-0xa5 0xba 0x72 0xed
diff --git a/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt b/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt
deleted file mode 100644
index f961c64f7d99..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding"
-# XFAIL: *
-
-# Opcode=1225 Name=VQADDsv16i8 Format=ARM_FORMAT_N3Reg(37)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 0: 1: 0| 0: 1: 0: 0| 0: 0: 0: 0| 1: 1: 1: 0| 0: 0: 0: 0| 1: 1: 0: 1| 1: 0: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# Qm -> bit[0] == 0, otherwise UNDEFINED
-0xdb 0xe0 0x40 0xf2
diff --git a/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt
deleted file mode 100644
index 9462812f26d1..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
-
-0x80 0xf9 0x10 0x08
-# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
deleted file mode 100644
index 99da8ce9d85a..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=1839 Name=VST1d8Twb_register Format=ARM_FORMAT_NLdSt(30)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 1: 0: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 1: 1: 0| 0: 0: 1: 0| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.391 VST1 (multiple single elements)
-# This encoding looks like: vst1.8 {d0,d1,d2}, [r0:128]
-# But bits 5-4 for the alignment of 128 encoded as align = 0b10, is available only if <list>
-# contains two or four registers.  rdar://11220250
-0x00 0xf9 0x2f 0x06
diff --git a/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt
deleted file mode 100644
index 07a1c7aac699..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding"
-# XFAIL: *
-
-# Opcode=1641 Name=VST2b32_UPD Format=ARM_FORMAT_NLdSt(30)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 0: 0| 0: 0: 1: 1| 0: 0: 0: 0| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.393 VST2 (multiple 2-element structures)
-# type == '1001' and align == '11' ==> UNDEFINED
-0xb3 0x9 0x3 0xf4
diff --git a/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt
deleted file mode 100644
index f6e71bcfd65b..000000000000
--- a/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s
-
-0x80 0xf9 0x30 0x0b
-# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-armv7.txt b/test/MC/Disassembler/ARM/invalid-armv7.txt
new file mode 100644
index 000000000000..550173f7823d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-armv7.txt
@@ -0,0 +1,502 @@
+# RUN: not llvm-mc -disassemble %s -mcpu cortex-a8 -triple armv7 2>&1 | FileCheck %s
+
+# This file is checking ARMv7 encodings which are globally invalid, usually due
+# to the constraints of the instructions not being met. For example invalid
+# combinations of registers.
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for bfi
+#------------------------------------------------------------------------------
+
+# Opcode=60 Name=BFI Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 1| 0: 1: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if d == 15 then UNPREDICTABLE;
+[0x16 0xf0 0xcf 0xe7]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x16 0xf0 0xcf 0xe7]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for cdp2
+#------------------------------------------------------------------------------
+
+[0xe0 0x6a 0x0c 0xfe]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xe0 0x6a 0x0c 0xfe]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for cps*
+#------------------------------------------------------------------------------
+
+# invalid imod value (0b01)
+[0xc0 0x67 0x4 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xc0 0x67 0x4 0xf1]
+
+# invalid (imod, M, iflags) combination
+[0x93 0x00 0x02 0xf1]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x93 0x00 0x02 0xf1]
+
+# CPS: various encodings that are ambiguous with other instructions
+[0x9f 0xff 0x4e 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x9f 0xff 0x4e 0xf1]
+
+[0x80 0x80 0x2c 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0x80 0x2c 0xf1]
+
+[0xce 0x3f 0x28 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xce 0x3f 0x28 0xf1]
+
+[0x80 0x00 0x20 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0x00 0x20 0xf1]
+
+[0xa0 0x00 0x00 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xa0 0x00 0x00 0xf1]
+
+
+#------------------------------------------------------------------------------
+# Undefined encoding space for hint instructions
+#------------------------------------------------------------------------------
+
+# FIXME: is it "dbg #14" or not????
+[0xfe 0xf0 0x20 0xe3]
+# CHCK: invalid instruction encoding
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for ldc
+#------------------------------------------------------------------------------
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 0: 1| 1: 1: 0: 0| 0: 0: 0: 1| 1: 1: 1: 1| 1: 0: 1: 1| 0: 1: 0: 0| 1: 0: 0: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# The bytes have 0b0000 for P,U,D,W; from A8.6.51, it is undefined.
+
+[0x92 0xb4 0x1f 0xdc]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x92 0xb4 0x1f 0xdc]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for ldm
+#------------------------------------------------------------------------------
+
+# Opcode=134 Name=LDMIA Format=ARM_FORMAT_LDSTMULFRM(10)
+# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 1: 0: 0: 1| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 0: 1| 0: 0: 1: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# B6.1.8 RFE has Inst{15-0} as 0x0a00 ==> Not an RFE instruction
+# A8.6.53 LDM/LDMIA/LDMFD is predicated with Inst{31-28} as cond ==> Not an LDMIA instruction
+
+[0x32 0xb1 0x99 0xf8]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x32 0xb1 0x99 0xf8]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for ldr
+#------------------------------------------------------------------------------
+
+# Opcode=165 Name=LDR_PRE Format=ARM_FORMAT_LDFRM(6)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 0: 1: 1| 0: 1: 1: 1| 0: 1: 1: 0| 0: 0: 0: 0| 1: 0: 0: 0| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# if m == 15 then UNPREDICTABLE
+
+[0x8f 0x60 0xb7 0xe7]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x8f 0x60 0xb7 0xe7]
+
+# LDR (register) has encoding Inst{4} = 0.
+[0xba 0xae 0x9f 0x57]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xba 0xae 0x9f 0x57]
+
+# LDR_PRE/POST has encoding Inst{4} = 0.
+[0xde 0x69 0x18 0x46]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xde 0x69 0x18 0x46]
+
+# Opcode=140 Name=LDRB_POST Format=ARM_FORMAT_LDFRM(6)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 1: 0: 1| 0: 1: 1: 1| 0: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 1: 0: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# if wback && (n == 15 || n == t) then UNPREDICTABLE
+[0x05 0x70 0xd7 0xe6]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x05 0x70 0xd7 0xe6]
+
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for mcr
+#------------------------------------------------------------------------------
+
+# Opcode=171 Name=MCR Format=ARM_FORMAT_BRFRM(2)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 1: 0| 1: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 0: 0: 0: 1| 1: 0: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C
+
+[0x1b 0x1b 0xa0 0x2e]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x1b 0x1b 0xa0 0x2e]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for mov/lsl
+#------------------------------------------------------------------------------
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 1| 0: 0: 0: 0| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+# To qualify as an LSL (immediate) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
+# The instruction is UNPREDICTABLE, and is not a valid instruction.
+#
+# See also
+# A8.6.88 LSL (immediate)
+# A8.6.98 MOV (shifted register), and
+# I.1 Instruction encoding diagrams and pseudocode
+
+[0x2 0xd1 0xbc 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x2 0xd1 0xbc 0xf1]
+
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 1| 1: 1: 0: 0| 1: 1: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+# To qualify as a MOV (register) instruction, Inst{19-16} "should" be 0b0000, instead it is = 0b1100.
+# The instruction is UNPREDICTABLE, and is not a valid instruction.
+#
+# See also
+# A8.6.97 MOV (register)
+
+[0x2 0xd0 0xbc 0xf1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x2 0xd0 0xbc 0xf1]
+
+# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 0: 1| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 0: 1: 0: 0| 0: 0: 1: 0| 1: 0: 0: 1| 0: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# A8.6.89 LSL (register): Inst{7-4} = 0b0001
+[0x93 0x42 0xa0 0xd1]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x93 0x42 0xa0 0xd1]
+
+# Opcode=185 Name=MOVTi16 Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 0: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if d == 15 then UNPREDICTABLE
+[0x00 0xf0 0x41 0xe3]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x00 0xf0 0x41 0xe3]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for mrrc2
+#------------------------------------------------------------------------------
+
+[0x00 0x1a 0x50 0xfc]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x1a 0x50 0xfc]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for msr (imm)
+#------------------------------------------------------------------------------
+
+# Opcode=206 Name=MSRi Format=ARM_FORMAT_BRFRM(2)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 1: 1| 0: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 1: 0: 1: 0| 0: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
+# The hints instructions have more specific encodings, so if mask == 0,
+# we should reject this as an invalid instruction.
+
+[0xa7 0xf1 0x20 0x3]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xa7 0xf1 0x20 0x3]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for sbfx
+#------------------------------------------------------------------------------
+
+# Opcode=271 Name=SBFX Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 0: 1: 0| 0: 1: 1: 1| 0: 1: 0: 1| 0: 1: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# if d == 15 || n == 15 then UNPREDICTABLE;
+
+[0x5f 0x54 0xa7 0xe7]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x5f 0x54 0xa7 0xe7]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for smlad
+#------------------------------------------------------------------------------
+
+# Opcode=284 Name=SMLAD Format=ARM_FORMAT_MULFRM(1)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 0: 0: 1| 0: 1: 1: 1| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 1: 0| 1: 0: 0: 0| 0: 0: 0: 1| 1: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.167
+# if d == 15 || n == 15 | m == 15 then UNPREDICTABLE
+
+[0x1b 0x68 0xf 0x97]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x1b 0x68 0xf 0x97]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for srs
+#------------------------------------------------------------------------------
+
+# Opcode=0 Name=PHI Format=(42)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 0: 0| 0: 1: 0: 1| 0: 0: 0: 1| 1: 1: 0: 0| 1: 0: 0: 0| 0: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# Unknown format
+#
+# B6.1.10 SRS
+# Inst{19-8} = 0xd05
+# Inst{7-5} = 0b000
+
+[0x83 0x1c 0xc5 0xf8]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x83 0x1c 0xc5 0xf8]
+
+[0x00 0x00 0x20 0xf8]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x00 0x20 0xf8]
+
+[0xff 0xff 0xaf 0xf8]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xff 0xff 0xaf 0xf8]
+
+[0x13 0x00 0xa0 0xf8]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x13 0x00 0xa0 0xf8]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for sxtb
+#------------------------------------------------------------------------------
+
+# Opcode=390 Name=SXTBr_rot Format=ARM_FORMAT_EXTFRM(14)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 1: 0: 0| 0: 1: 1: 1| 0: 1: 0: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.223 SXTB
+# if d == 15 || m == 15 then UNPREDICTABLE;
+
+[0x75 0xf4 0xaf 0xe6]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x75 0xf4 0xaf 0xe6]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON umaal
+#------------------------------------------------------------------------------
+
+# Opcode=419 Name=UMAAL Format=ARM_FORMAT_MULFRM(1)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 0: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.244 UMAAL
+# if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+[0x98 0xbf 0x4f 0xf0]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x98 0xbf 0x4f 0xf0]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vcvt (float <-> fixed)
+#------------------------------------------------------------------------------
+
+# imm6=0b0xxxxx -> UNDEFINED
+[0x1e 0xcf 0x92 0xf3]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x1e 0xcf 0x92 0xf3]
+
+[0x3e 0xcf 0x92 0xf3]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x3e 0xcf 0x92 0xf3]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vext
+#------------------------------------------------------------------------------
+
+# invalid imm4 value (0b1xxx)
+# A8.8.316: if Q == '0' && imm4<3> == '1' then UNDEFINED;
+[0x8f 0xf9 0xf7 0xf2]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x8f 0xf9 0xf7 0xf2]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vldmsdb
+#------------------------------------------------------------------------------
+
+# core registers out of range
+[0xa5 0xba 0x72 0xed]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xa5 0xba 0x72 0xed]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vmov
+#------------------------------------------------------------------------------
+
+# VMOV cmode=0b1111 op=1 is UNDEFINED
+[0x70 0xef 0xc7 0xf3]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x70 0xef 0xc7 0xf3]
+
+#  VMOV cmode=0b1111 op=1 is UNDEFINED
+[0x30 0x0f 0x80 0xf3]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x30 0x0f 0x80 0xf3]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vqadd
+#------------------------------------------------------------------------------
+
+# Opcode=1225 Name=VQADDsv16i8 Format=ARM_FORMAT_N3Reg(37)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 0: 1: 0| 0: 1: 0: 0| 0: 0: 0: 0| 1: 1: 1: 0| 0: 0: 0: 0| 1: 1: 0: 1| 1: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# Qm -> bit[0] == 0, otherwise UNDEFINED
+[0xdb 0xe0 0x40 0xf2]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xdb 0xe0 0x40 0xf2]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vld/vst
+#------------------------------------------------------------------------------
+
+# A8.6.393 VST2 (multiple 2-element structures)
+[0xb3 0x09 0x03 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xb3 0x09 0x03 0xf4]
+
+# size == '11' ==> UNDEFINED
+[0xc3 0x08 0x03 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xc3 0x08 0x03 0xf4]
+
+# type == '1000' and align == '11' ==> UNDEFINED
+[0xb3 0x08 0x03 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xb3 0x08 0x03 0xf4]
+
+# VST1 multi-element, type == 0b0111, align == 0b10 -> undefined
+[0xaf 0xb7 0x07 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xaf 0xb7 0x07 0xf4]
+
+# VST1 multi-element, type == 0b0111, align == 0b11 -> undefined
+[0xbf 0xb7 0x07 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xbf 0xb7 0x07 0xf4]
+
+# VST1 multi-element, type == 0b1010, align == 0b11 -> undefined
+[0xbf 0x8a 0x03 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xbf 0x8a 0x03 0xf4]
+
+# VST1 multi-element, type == 0b0110, align == 0b10 -> undefined
+[0xaf 0xb6 0x07 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xaf 0xb6 0x07 0xf4]
+
+# VST1 multi-element, type == 0b0110, align == 0b11 -> undefined
+[0xbf 0xb6 0x07 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xbf 0xb6 0x07 0xf4]
+
+# VST2 multi-element, type == 0b0100, align == 0b11 -> undefined
+[0x4f 0xa8 0x07 0xf7]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x4f 0xa8 0x07 0xf7]
+
+# VST2 multi-element, type == 0b0100, align == 0b11 -> undefined
+[0x4f 0xa9 0x07 0xf7]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x4f 0xa9 0x07 0xf7]
+
+# VST3 multi-element, size = 0b11 -> undefined
+[0xbf 0xa4 0x0b 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xbf 0xa4 0x0b 0xf4]
+
+# VST3 multi-element, align = 0b10 -> undefined
+[0x6f 0xa4 0x0b 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x6f 0xa4 0x0b 0xf4]
+
+# VST3 multi-element, align = 0b11 -> undefined
+[0x7f 0xa4 0x0b 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x7f 0xa4 0x0b 0xf4]
+
+# VST4 multi-element, size = 0b11 -> undefined
+[0xcf 0x50 0x03 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xcf 0x50 0x03 0xf4]
+
+
+# Opcode=737 Name=VLD1DUPq8_UPD Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 0: 0| 1: 0: 1: 0| 0: 0: 0: 0| 0: 0: 1: 1| 1: 1: 0: 0| 0: 0: 1: 1| 1: 1: 0: 1|
+# -------------------------------------------------------------------------------------------------
+# 
+# 'a' == 1 and data_size == 8 is invalid
+[0x3d 0x3c 0xa0 0xf4]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x3d 0x3c 0xa0 0xf4]
diff --git a/test/MC/Disassembler/ARM/invalid-armv8.txt b/test/MC/Disassembler/ARM/invalid-armv8.txt
new file mode 100644
index 000000000000..772ff1ddca96
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-armv8.txt
@@ -0,0 +1,167 @@
+# RUN: not llvm-mc -triple armv8 -show-encoding -disassemble %s 2>&1 | FileCheck %s
+
+# Coprocessors other than CP10, CP11, CP14 and CP15 are undefined in ARMv8;
+# but in ARMv7, all these instructions are valid
+
+# RUN: llvm-mc -triple armv7 -show-encoding -disassemble %s | FileCheck %s --check-prefix=CHECK-V7
+
+[0x00 0x01 0x00 0xee]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x00 0xee]
+
+[0x00 0x0e 0x00 0xee]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x00 0xee]
+
+[0x00 0x0f 0x00 0xee]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x00 0xee]
+
+[0x00 0x01 0x00 0xfe]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x00 0xfe]
+
+[0x00 0x0e 0x00 0xfe]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x00 0xfe]
+
+[0x00 0x0f 0x00 0xfe]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x00 0xfe]
+
+[0x10 0x01 0x00 0xee]
+# CHECK-V7: mcr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x00 0xee]
+
+[0x10 0x01 0x00 0xfe]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x00 0xfe]
+
+[0x10 0x0e 0x00 0xfe]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0e 0x00 0xfe]
+
+[0x10 0x0f 0x00 0xfe]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0f 0x00 0xfe]
+
+[0x10 0x01 0x10 0xee]
+# CHECK-V7: mrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x10 0xee]
+
+[0x10 0x01 0x10 0xfe]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x01 0x10 0xfe]
+
+[0x10 0x0e 0x10 0xfe]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0e 0x10 0xfe]
+
+[0x10 0x0f 0x10 0xfe]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0x0f 0x10 0xfe]
+
+[0x00 0x01 0x40 0xec]
+# CHECK-V7: mcrr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x40 0xec]
+
+[0x00 0x01 0x40 0xfc]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x40 0xfc]
+
+[0x00 0x0e 0x40 0xfc]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x40 0xfc]
+
+[0x00 0x0f 0x40 0xfc]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x40 0xfc]
+
+[0x00 0x01 0x50 0xec]
+# CHECK-V7: mrrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x50 0xec]
+
+[0x00 0x0e 0x50 0xfc]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x50 0xfc]
+
+[0x00 0x0f 0x50 0xfc]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x50 0xfc]
+
+[0x00 0x01 0x50 0xfc]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x50 0xfc]
+
+[0x00 0x01 0x80 0xec]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x80 0xec]
+
+[0x00 0x0f 0x80 0xec]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x80 0xec]
+
+[0x00 0x01 0x80 0xfc]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x80 0xfc]
+
+[0x00 0x0e 0x80 0xfc]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x80 0xfc]
+
+[0x00 0x0f 0x80 0xfc]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x80 0xfc]
+
+[0x00 0x01 0x90 0xec]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x90 0xec]
+
+[0x00 0x0f 0x90 0xec]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x90 0xec]
+
+[0x00 0x01 0x90 0xfc]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x01 0x90 0xfc]
+
+[0x00 0x0e 0x90 0xfc]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0e 0x90 0xfc]
+
+[0x00 0x0f 0x90 0xfc]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0x0f 0x90 0xfc]
+
diff --git a/test/MC/Disassembler/ARM/invalid-because-armv7.txt b/test/MC/Disassembler/ARM/invalid-because-armv7.txt
new file mode 100644
index 000000000000..beed8e4eb018
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-because-armv7.txt
@@ -0,0 +1,26 @@
+# RUN: not llvm-mc -disassemble -triple armv7 -show-encoding < %s 2>&1 | FileCheck %s
+
+# This file is checking encodings that are valid on some triples, but not on the
+# ARMv7 triple, probably because the relevant instruction is v8, though there
+# could be other reasons.
+
+# Would be vcvtt.f64.f16 d3, s1
+[0xe0 0x3b 0xb2 0xee]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xe0 0x3b 0xb2 0xee]
+
+# Would be vcvtb.f16.f64 s4, d1
+[0x41 0x2b 0xb3 0xee]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x41 0x2b 0xb3 0xee]
+
+# Would be vcvtblt.f16.f64 s4, d1
+[0x41 0x2b 0xb3 0xbe]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x41 0x2b 0xb3 0xbe]
+
+# Would be vmrs r0, mvfr2
+[0x10 0xa 0xf5 0xee]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xa 0xf5 0xee]
+
diff --git a/test/MC/Disassembler/ARM/invalid-hint-arm.txt b/test/MC/Disassembler/ARM/invalid-hint-arm.txt
deleted file mode 100644
index 7da96d8f15ce..000000000000
--- a/test/MC/Disassembler/ARM/invalid-hint-arm.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -disassemble < %s 2>&1 | FileCheck %s
-
-#------------------------------------------------------------------------------
-# Undefined encoding space for hint instructions
-#------------------------------------------------------------------------------
-
-0x05 0xf0 0x20 0xe3
-# CHECK: invalid instruction encoding
-0x41 0xf0 0x20 0xe3
-# CHECK: invalid instruction encoding
-0xfe 0xf0 0x20 0xe3
-# CHECK: invalid instruction encoding
-
diff --git a/test/MC/Disassembler/ARM/invalid-hint-thumb.txt b/test/MC/Disassembler/ARM/invalid-hint-thumb.txt
deleted file mode 100644
index 1e4133668b84..000000000000
--- a/test/MC/Disassembler/ARM/invalid-hint-thumb.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: llvm-mc -triple=thumbv7 -disassemble -show-encoding < %s 2>&1 | FileCheck %s
-
-#------------------------------------------------------------------------------
-# Undefined encoding space for hint instructions
-#------------------------------------------------------------------------------
-
-0xaf 0xf3 0x05 0x80
-# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt
deleted file mode 100644
index c9f1cf12d197..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=1894 Name=t2Bcc Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 0: 1: 1: 1| 1: 0: 1: 0| 1: 1: 1: 1| 1: 0: 0: 0| 1: 0: 1: 1| 0: 1: 0: 0| 0: 1: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.16 B
-# if cond<3:1> == '111' then SEE "Related Encodings"
-0xaf 0xf7 0x44 0x8b
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt
deleted file mode 100644
index eb415f755ebf..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=1922 Name=t2LDRBT Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 1: 0: 0: 0| 0: 0: 0: 1| 0: 0: 0: 0| 1: 1: 1: 1| 1: 1: 1: 0| 0: 0: 0: 0| 0: 0: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# The unpriviledged Load/Store cannot have SP or PC as Rt.
-0x10 0xf8 0x3 0xfe
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt
deleted file mode 100644
index 6c13560a3127..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-# XFAIL: *
-
-# Opcode=1934 Name=t2LDREXD Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 1| 0: 0: 1: 0| 1: 0: 0: 0| 1: 0: 0: 0| 0: 1: 1: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-# 
-# if t == t2 then UNPREDICTABLE
-0xd2 0xe8 0x7f 0x88
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt
deleted file mode 100644
index 7f84e0813096..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=1953 Name=t2LDRSHi12 Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 0: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# if Rt = '1111' then SEE "Unallocated memory hints"
-0xb3 0xf9 0xdf 0xf8
diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt
deleted file mode 100644
index e44cf952cd36..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=1954 Name=t2LDRSHi8 Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 1: 0: 0: 1| 0: 0: 1: 1| 0: 1: 0: 1| 1: 1: 1: 1| 1: 1: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# if Rt == '1111' and PUW == '100' then SEE "Unallocated memory hints"
-0x35 0xf9 0x00 0xfc
diff --git a/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt
deleted file mode 100644
index 8c0d48bd56c0..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# SP and PC are not allowed in the register list on STM instructions in Thumb2.
-
-0x2d 0xe9 0xf7 0xb6
diff --git a/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt
deleted file mode 100644
index 64ba3685cd9b..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-# XFAIL: *
-
-# Opcode=2124 Name=t2STRD_PRE Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 1: 0: 0: 1| 1: 1: 1: 0| 0: 1: 0: 0| 0: 1: 0: 0| 0: 1: 1: 0| 0: 0: 0: 0| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-# 
-# if wback && (n == t || n == t2) then UNPREDICTABLE
-0xe4 0xe9 0x02 0x46
diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt
deleted file mode 100644
index 243c11d56137..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-# XFAIL: *
-
-# Opcode=2127 Name=t2STREXB Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 0| 0: 0: 1: 0| 1: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-# 
-# if d == n || d == t then UNPREDICTABLE
-0xc2 0xe8 0x42 0x8f
diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt
deleted file mode 100644
index 7a7c4a5c54f5..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=2128 Name=t2STREXD Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 0| 0: 0: 1: 0| 0: 1: 1: 1| 1: 0: 0: 0| 0: 1: 1: 1| 1: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# if d == n || d == t || d == t2 then UNPREDICTABLE
-mc-input.txt:1:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt
deleted file mode 100644
index 2ad3e7df9ce3..000000000000
--- a/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# Opcode=2137 Name=t2STR_POST Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 0| 1: 0: 1: 1| 1: 1: 1: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# if Rn == '1111' then UNDEFINED
-0x4f 0xf8 0xff 0xeb
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv7-xfail.txt b/test/MC/Disassembler/ARM/invalid-thumbv7-xfail.txt
new file mode 100644
index 000000000000..ca5dd65e0a68
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-thumbv7-xfail.txt
@@ -0,0 +1,38 @@
+# RUN: llvm-mc -disassemble -triple thumbv7 2>&1 | FileCheck %s
+# XFAIL: *
+
+#------------------------------------------------------------------------------
+# Undefined encodings for ldrexd/strexd
+#------------------------------------------------------------------------------
+
+# FIXME: "ldrexd r8, r8, [r2]"
+# Rt == Rt2 is UNPREDICTABLE
+
+[0xd2 0xe8 0x7f 0x88]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xd2 0xe8 0x7f 0x88]
+
+# Opcode=2127 Name=t2STREXB Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 0| 0: 0: 1: 0| 1: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if d == n || d == t then UNPREDICTABLE
+
+[0xc2 0xe8 0x42 0x8f]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xc2 0xe8 0x42 0x8f]
+
+# Opcode=2128 Name=t2STREXD Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 0| 1: 1: 0: 0| 0: 0: 1: 0| 0: 1: 1: 1| 1: 0: 0: 0| 0: 1: 1: 1| 1: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if d == n || d == t || d == t2 then UNPREDICTABLE
+
+# FIXME: should be unpredictable since it's "strexd r8, r7, r8, [r2]"
+[0xc2 0xe8 0x78 0x78]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xc2 0xe8 0x78 0x78]
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv7.txt b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
new file mode 100644
index 000000000000..2c84b8a7aa57
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
@@ -0,0 +1,407 @@
+# RUN: not llvm-mc -disassemble %s -mcpu cortex-a8 -triple thumbv7 2>&1 | FileCheck %s
+
+# This file is checking Thumbv7 encodings which are globally invalid, usually due
+# to the constraints of the instructions not being met. For example invalid
+# combinations of registers.
+
+#------------------------------------------------------------------------------
+# Undefined encoding for b.cc
+#------------------------------------------------------------------------------
+
+# Opcode=1894 Name=t2Bcc Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 1: 1| 1: 0: 1: 0| 1: 1: 1: 1| 1: 0: 0: 0| 1: 0: 1: 1| 0: 1: 0: 0| 0: 1: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.16 B
+# if cond<3:1> == '111' then SEE "Related Encodings"
+
+[0xaf 0xf7 0x44 0x8b]
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0xaf 0xf7 0x44 0x8b]
+
+# Opcode=2249 Name=tBcc Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 1: 1: 0: 1| 1: 1: 1: 0| 0: 1: 1: 0| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# if cond = '1110' then UNDEFINED
+[0x6f 0xde]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x6f 0xde]
+
+#------------------------------------------------------------------------------
+# Undefined encoding for it
+#------------------------------------------------------------------------------
+
+[0xff 0xbf 0x6b 0x80 0x00 0x75]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xff 0xbf 0x6b 0x80 0x00 0x75]
+
+[0x50 0xbf] # hint #5; legal as the third instruction for the iteee above
+
+# Two warnings from this block since there are two instructions in there
+[0xdb 0xbf 0x42 0xbb]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xdb 0xbf 0x42 0xbb]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xdb 0xbf 0x42 0xbb]
+
+#------------------------------------------------------------------------------
+# Undefined encoding for ldm
+#------------------------------------------------------------------------------
+
+# Writeback is not allowed is Rn is in the target register list.
+[0xb4 0xe8 0x34 0x04]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0xb4 0xe8 0x34 0x04]
+
+
+#------------------------------------------------------------------------------
+# Undefined encoding for ldrd
+#------------------------------------------------------------------------------
+
+# Opcode=1930 Name=t2LDRD_PRE Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 1: 0: 0: 1| 1: 1: 1: 1| 1: 1: 1: 1| 1: 1: 1: 0| 1: 0: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.66 LDRD (immediate)
+# if Rn = '1111' then SEE LDRD (literal)
+# A8.6.67 LDRD (literal)
+# Inst{21} = 0
+
+[0xff 0xe9 0x0 0xeb]
+# CHECK: potentially undefined
+# CHECK-NEXT: [0xff 0xe9 0x0 0xeb]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for ldrbt
+#------------------------------------------------------------------------------
+
+# Opcode=1922 Name=t2LDRBT Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 0: 0: 0: 1| 0: 0: 0: 0| 1: 1: 1: 1| 1: 1: 1: 0| 0: 0: 0: 0| 0: 0: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# The unpriviledged Load/Store cannot have SP or PC as Rt.
+[0x10 0xf8 0x3 0xfe]
+# CHECK: potentially undefined instruction encoding
+# CHECK-NEXT: [0x10 0xf8 0x3 0xfe]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for ldrsh
+#------------------------------------------------------------------------------
+
+# invalid LDRSHs Rt=PC
+[0x30 0xf9 0x00 0xf0]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x30 0xf9 0x00 0xf0]
+
+# invalid LDRSHi8 Rt=PC
+[0x30 0xf9 0x00 0xfc]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x30 0xf9 0x00 0xfc]
+
+# invalid LDRSHi12 Rt=PC
+[0xb0 0xf9 0x00 0xf0]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xb0 0xf9 0x00 0xf0]
+
+# Opcode=1954 Name=t2LDRSHi8 Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 1| 0: 0: 1: 1| 0: 1: 0: 1| 1: 1: 1: 1| 1: 1: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if Rt == '1111' and PUW == '100' then SEE "Unallocated memory hints"
+[0x35 0xf9 0x00 0xfc]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x35 0xf9 0x00 0xfc]
+
+# Opcode=1953 Name=t2LDRSHi12 Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 1| 1: 0: 1: 1| 0: 0: 1: 1| 1: 1: 1: 1| 1: 0: 0: 0| 1: 1: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# if Rt = '1111' then SEE "Unallocated memory hints"
+[0xb3 0xf9 0xdf 0xf8]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xb3 0xf9 0xdf 0xf8]
+
+
+#------------------------------------------------------------------------------
+# Undefined encoding for push
+#------------------------------------------------------------------------------
+
+# SP and PC are not allowed in the register list on STM instructions in Thumb2.
+[0x2d 0xe9 0xf7 0xb6]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x2d 0xe9 0xf7 0xb6]
+
+
+#------------------------------------------------------------------------------
+# Undefined encoding for stmia
+#------------------------------------------------------------------------------
+
+# Opcode=2313 Name=tSTMIA_UPD Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 1: 1: 0: 0| 0: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if BitCount(registers) < 1 then UNPREDICTABLE
+[0x00 0xc7]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xc7]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for str
+#------------------------------------------------------------------------------
+
+# invalid STRi12   Rn=PC
+[0xcf 0xf8 0x00 0x00]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xcf 0xf8 0x00 0x00]
+
+# invalid STRi8    Rn=PC
+[0x4f 0xf8 0x00 0x0c]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x4f 0xf8 0x00 0x0c]
+
+# invalid STRs     Rn=PC
+[0x4f 0xf8 0x00 0x00]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x4f 0xf8 0x00 0x00]
+
+# invalid STRBi12  Rn=PC
+[0x0f 0xf8 0x00 0x00]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x0f 0xf8 0x00 0x00]
+
+# invalid STRBi8   Rn=PC
+[0x0f 0xf8 0x00 0x0c]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x0f 0xf8 0x00 0x0c]
+
+# invalid STRBs    Rn=PC
+[0x0f 0xf8 0x00 0x00]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x0f 0xf8 0x00 0x00]
+
+# invalid STRHi12  Rn=PC
+[0xaf 0xf8 0x00 0x00]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xaf 0xf8 0x00 0x00]
+
+# invalid STRHi8   Rn=PC
+[0x2f 0xf8 0x00 0x0c]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x2f 0xf8 0x00 0x0c]
+
+# invalid STRHs    Rn=PC
+[0x2f 0xf8 0x00 0x00]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x2f 0xf8 0x00 0x00]
+
+# invalid STRBT    Rn=PC
+[0x0f 0xf8 0x00 0x0e]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x0f 0xf8 0x00 0x0e]
+
+# invalid STRHT    Rn=PC
+[0x2f 0xf8 0x00 0x0e]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x2f 0xf8 0x00 0x0e]
+
+# invalid STRT     Rn=PC
+[0x4f 0xf8 0x00 0x0e]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x4f 0xf8 0x00 0x0e]
+
+# Opcode=2137 Name=t2STR_POST Format=ARM_FORMAT_THUMBFRM(25)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 0| 1: 0: 1: 1| 1: 1: 1: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# if Rn == '1111' then UNDEFINED
+
+[0x4f 0xf8 0xff 0xeb]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x4f 0xf8 0xff 0xeb]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for strd
+#------------------------------------------------------------------------------
+
+# Rt == Rn is UNPREDICTABLE
+[0xe4 0xe9 0x02 0x46]
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: [0xe4 0xe9 0x02 0x46]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON/VFP instructions with invalid predicate bits
+#------------------------------------------------------------------------------
+
+# VABS
+[0x40 0xde 0x00 0x0a]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xde 0x00 0x0a]
+
+
+# VMLA
+[0xf0 0xde 0xe0 0x0b]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xf0 0xde 0xe0 0x0b]
+
+# VMOV/VDUP between scalar and core registers with invalid predicate bits (pred != 0b1110)
+
+# VMOV
+[0x00 0xde 0x10 0x0b]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xde 0x10 0x0b]
+
+# VDUP
+[0xff 0xde 0xf0 0xfb]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xff 0xde 0xf0 0xfb]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vld instructions
+#------------------------------------------------------------------------------
+
+# size = '00' and index_align == '0001' so UNDEFINED
+[0xa0 0xf9 0x10 0x08]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xa0 0xf9 0x10 0x08]
+
+
+# vld3
+
+# Opcode=871 Name=VLD3DUPd32_UPD Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 0: 1: 0: 0| 1: 0: 1: 0| 0: 0: 1: 0| 0: 0: 1: 0| 1: 1: 1: 0| 1: 0: 0: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.315 VLD3 (single 3-element structure to all lanes)
+# The a bit must be encoded as 0.
+
+[0xa2 0xf9 0x92 0x2e]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xa2 0xf9 0x92 0x2e]
+
+
+# Some vld4 ones
+# size == '11' and a == '0' so UNDEFINED
+[0xa0 0xf9 0xc0 0x0f]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xa0 0xf9 0xc0 0x0f]
+
+[0xa0 0xf9 0x30 0x0b]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0xa0 0xf9 0x30 0x0b]
+
+
+# VLD1 multi-element, type=0b1010 align=0b11
+[0x24 0xf9 0xbf 0x8a]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x24 0xf9 0xbf 0x8a]
+
+# VLD1 multi-element type=0b0111 align=0b1x
+[0x24 0xf9 0xbf 0x87]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x24 0xf9 0xbf 0x87]
+
+# VLD1 multi-element type=0b0010 align=0b1x
+[0x24 0xf9 0xbf 0x86]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x24 0xf9 0xbf 0x86]
+
+# VLD2 multi-element size=0b11
+[0x60 0xf9 0xcf 0x08]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x60 0xf9 0xcf 0x08]
+
+# VLD2 multi-element type=0b1111 align=0b11
+[0x60 0xf9 0xbf 0x08]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x60 0xf9 0xbf 0x08]
+
+# VLD2 multi-element type=0b1001 align=0b11
+[0x60 0xf9 0xbf 0x09]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x60 0xf9 0xbf 0x09]
+
+# VLD3 multi-element size=0b11
+[0x60 0xf9 0x7f 0x04]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x60 0xf9 0x7f 0x04]
+
+# VLD3 multi-element align=0b1x
+[0x60 0xf9 0xcf 0x04]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x60 0xf9 0xcf 0x04]
+
+# VLD4 multi-element size=0b11
+[0x60 0xf9 0xcd 0x11]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x60 0xf9 0xcd 0x11]
+
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vst1
+#------------------------------------------------------------------------------
+
+# size == '10' and index_align == '0001' so UNDEFINED
+[0x80 0xf9 0x10 0x08]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xf9 0x10 0x08]
+
+# Opcode=1839 Name=VST1d8Twb_register Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 1: 1: 0| 0: 0: 1: 0| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.391 VST1 (multiple single elements)
+# This encoding looks like: vst1.8 {d0,d1,d2}, [r0:128]
+# But bits 5-4 for the alignment of 128 encoded as align = 0b10, is available only if <list>
+# contains two or four registers.  rdar://11220250
+[0x00 0xf9 0x2f 0x06]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xf9 0x2f 0x06]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for NEON vst4
+#------------------------------------------------------------------------------
+
+[0x80 0xf9 0x30 0x0b]
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xf9 0x30 0x0b]
+
+
+#------------------------------------------------------------------------------
+# Unpredictable STMs
+#------------------------------------------------------------------------------
+
+# 32-bit Thumb STM instructions cannot have a writeback register which appears
+# in the list.
+
+[0xa1,0xe8,0x07,0x04]
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: [0xa1,0xe8,0x07,0x04]
+
+[0x21,0xe9,0x07,0x04]
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: [0x21,0xe9,0x07,0x04]
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv8.txt b/test/MC/Disassembler/ARM/invalid-thumbv8.txt
new file mode 100644
index 000000000000..4c6b249c7906
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-thumbv8.txt
@@ -0,0 +1,167 @@
+# RUN: not llvm-mc -disassemble %s -show-encoding -triple thumbv8 2>&1 | FileCheck %s
+
+# Coprocessors other than CP10, CP11, CP14 and CP15 are undefined in ARMv8;
+# but in ARMv7, all these instructions are valid
+
+# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble %s | FileCheck %s --check-prefix=CHECK-V7
+
+[0x00 0xee 0x00 0x01]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x00 0x01]
+
+[0x00 0xee 0x00 0x0e]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x00 0x0e]
+
+[0x00 0xee 0x00 0x0f]
+# CHECK-V7: cdp
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x00 0x0f]
+
+[0x00 0xfe 0x00 0x01]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x00 0x01]
+
+[0x00 0xfe 0x00 0x0e]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x00 0x0e]
+
+[0x00 0xfe 0x00 0x0f]
+# CHECK-V7: cdp2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x00 0x0f]
+
+[0x00 0xee 0x10 0x01]
+# CHECK-V7: mcr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xee 0x10 0x01]
+
+[0x00 0xfe 0x10 0x01]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x10 0x01]
+
+[0x00 0xfe 0x10 0x0e]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x10 0x0e]
+
+[0x00 0xfe 0x10 0x0f]
+# CHECK-V7: mcr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x00 0xfe 0x10 0x0f]
+
+[0x10 0xee 0x10 0x01]
+# CHECK-V7: mrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xee 0x10 0x01]
+
+[0x10 0xfe 0x10 0x01]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xfe 0x10 0x01]
+
+[0x10 0xfe 0x10 0x0e]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xfe 0x10 0x0e]
+
+[0x10 0xfe 0x10 0x0f]
+# CHECK-V7: mrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x10 0xfe 0x10 0x0f]
+
+[0x40 0xec 0x00 0x01]
+# CHECK-V7: mcrr
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xec 0x00 0x01]
+
+[0x40 0xfc 0x00 0x01]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xfc 0x00 0x01]
+
+[0x40 0xfc 0x00 0x0e]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xfc 0x00 0x0e]
+
+[0x40 0xfc 0x00 0x0f]
+# CHECK-V7: mcrr2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x40 0xfc 0x00 0x0f]
+
+[0x50 0xec 0x00 0x01]
+# CHECK-V7: mrrc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xec 0x00 0x01]
+
+[0x50 0xfc 0x00 0x0e]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xfc 0x00 0x0e]
+
+[0x50 0xfc 0x00 0x0f]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xfc 0x00 0x0f]
+
+[0x50 0xfc 0x00 0x01]
+# CHECK-V7: mrrc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x50 0xfc 0x00 0x01]
+
+[0x80 0xec 0x00 0x01]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xec 0x00 0x01]
+
+[0x80 0xec 0x00 0x0f]
+# CHECK-V7: stc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xec 0x00 0x0f]
+
+[0x80 0xfc 0x00 0x01]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xfc 0x00 0x01]
+
+[0x80 0xfc 0x00 0x0e]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xfc 0x00 0x0e]
+
+[0x80 0xfc 0x00 0x0f]
+# CHECK-V7: stc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xfc 0x00 0x0f]
+
+[0x90 0xec 0x00 0x01]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xec 0x00 0x01]
+
+[0x90 0xec 0x00 0x0f]
+# CHECK-V7: ldc
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xec 0x00 0x0f]
+
+[0x90 0xfc 0x00 0x01]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xfc 0x00 0x01]
+
+[0x90 0xfc 0x00 0x0e]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xfc 0x00 0x0e]
+
+[0x90 0xfc 0x00 0x0f]
+# CHECK-V7: ldc2
+# CHECK: invalid instruction encoding
+# CHECK-NEXT: [0x90 0xfc 0x00 0x0f]
+
diff --git a/test/MC/Disassembler/ARM/lit.local.cfg b/test/MC/Disassembler/ARM/lit.local.cfg
index 22a76e5cab2d..8a3ba96497e7 100644
--- a/test/MC/Disassembler/ARM/lit.local.cfg
+++ b/test/MC/Disassembler/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/ARM/load-store-acquire-release-v8-thumb.txt b/test/MC/Disassembler/ARM/load-store-acquire-release-v8-thumb.txt
new file mode 100644
index 000000000000..8a2ba74261a6
--- /dev/null
+++ b/test/MC/Disassembler/ARM/load-store-acquire-release-v8-thumb.txt
@@ -0,0 +1,33 @@
+# RUN: llvm-mc -triple=thumbv8 -disassemble -show-encoding < %s | FileCheck %s
+
+0xd4 0xe8 0xcf 0x3f
+0xd5 0xe8 0xdf 0x2f
+0xd7 0xe8 0xef 0x1f
+0xd8 0xe8 0xff 0x67
+# CHECK:  ldaexb	r3, [r4]                @ encoding: [0xd4,0xe8,0xcf,0x3f]
+# CHECK:  ldaexh	r2, [r5]                @ encoding: [0xd5,0xe8,0xdf,0x2f]
+# CHECK:  ldaex	r1, [r7]                @ encoding: [0xd7,0xe8,0xef,0x1f]
+# CHECK:  ldaexd	r6, r7, [r8]            @ encoding: [0xd8,0xe8,0xff,0x67]
+
+0xc4 0xe8 0xc1 0x3f
+0xc5 0xe8 0xd4 0x2f
+0xc7 0xe8 0xe2 0x1f
+0xc8 0xe8 0xf6 0x23
+# CHECK: stlexb r1, r3, [r4]            @ encoding: [0xc4,0xe8,0xc1,0x3f]
+# CHECK: stlexh r4, r2, [r5]            @ encoding: [0xc5,0xe8,0xd4,0x2f]
+# CHECK: stlex r2, r1, [r7]            @ encoding: [0xc7,0xe8,0xe2,0x1f]
+# CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0xc8,0xe8,0xf6,0x23]
+
+0xd6 0xe8 0xaf 0x5f
+0xd6 0xe8 0x8f 0x5f
+0xd9 0xe8 0x9f 0xcf
+# CHECK: lda r5, [r6]                   @ encoding: [0xd6,0xe8,0xaf,0x5f]
+# CHECK: ldab r5, [r6]                  @ encoding: [0xd6,0xe8,0x8f,0x5f]
+# CHECK: ldah r12, [r9]                 @ encoding: [0xd9,0xe8,0x9f,0xcf]
+
+0xc0 0xe8 0xaf 0x3f
+0xc1 0xe8 0x8f 0x2f
+0xc3 0xe8 0x9f 0x2f
+# CHECK: stl r3, [r0]                   @ encoding: [0xc0,0xe8,0xaf,0x3f]
+# CHECK: stlb r2, [r1]                  @ encoding: [0xc1,0xe8,0x8f,0x2f]
+# CHECK: stlh r2, [r3]                  @ encoding: [0xc3,0xe8,0x9f,0x2f]
diff --git a/test/MC/Disassembler/ARM/load-store-acquire-release-v8.txt b/test/MC/Disassembler/ARM/load-store-acquire-release-v8.txt
new file mode 100644
index 000000000000..058f9ccdec47
--- /dev/null
+++ b/test/MC/Disassembler/ARM/load-store-acquire-release-v8.txt
@@ -0,0 +1,32 @@
+# RUN: llvm-mc -triple=armv8 -disassemble -show-encoding < %s | FileCheck %s
+0x9f 0x0e 0xd8 0xe1
+0x9f 0x1e 0xfc 0xe1
+0x9f 0x1e 0x90 0xe1
+0x9f 0x8e 0xbd 0xe1
+# CHECK: ldaexb r0, [r8]       @ encoding: [0x9f,0x0e,0xd8,0xe1]
+# CHECK: ldaexh r1, [r12]      @ encoding: [0x9f,0x1e,0xfc,0xe1]
+# CHECK: ldaex  r1, [r0]       @ encoding: [0x9f,0x1e,0x90,0xe1]
+# CHECK: ldaexd r8, r9, [sp]   @ encoding: [0x9f,0x8e,0xbd,0xe1]
+
+0x93 0x1e 0xc4 0xe1
+0x92 0x4e 0xe5 0xe1
+0x91 0x2e 0x87 0xe1
+0x92 0x6e 0xa8 0xe1
+# CHECK: stlexb r1, r3, [r4]            @ encoding: [0x93,0x1e,0xc4,0xe1]
+# CHECK: stlexh r4, r2, [r5]            @ encoding: [0x92,0x4e,0xe5,0xe1]
+# CHECK: stlex r2, r1, [r7]            @ encoding: [0x91,0x2e,0x87,0xe1]
+# CHECK: stlexd r6, r2, r3, [r8]        @ encoding: [0x92,0x6e,0xa8,0xe1]
+
+0x9f 0x5c 0x96 0xe1
+0x9f 0x5c 0xd6 0xe1
+0x9f 0xcc 0xf9 0xe1
+# CHECK: lda r5, [r6]          @ encoding: [0x9f,0x5c,0x96,0xe1]
+# CHECK: ldab r5, [r6]         @ encoding: [0x9f,0x5c,0xd6,0xe1]
+# CHECK: ldah r12, [r9]        @ encoding: [0x9f,0xcc,0xf9,0xe1]
+
+0x93 0xfc 0x80 0xe1
+0x92 0xfc 0xc1 0xe1
+0x92 0xfc 0xe3 0xe1
+# CHECK: stl r3, [r0]                   @ encoding: [0x93,0xfc,0x80,0xe1]
+# CHECK: stlb r2, [r1]                  @ encoding: [0x92,0xfc,0xc1,0xe1]
+# CHECK: stlh r2, [r3]                  @ encoding: [0x92,0xfc,0xe3,0xe1]
diff --git a/test/MC/Disassembler/ARM/neon-crypto.txt b/test/MC/Disassembler/ARM/neon-crypto.txt
new file mode 100644
index 000000000000..086c781991d9
--- /dev/null
+++ b/test/MC/Disassembler/ARM/neon-crypto.txt
@@ -0,0 +1,35 @@
+# RUN: llvm-mc -triple armv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s
+
+0x42,0x03,0xb0,0xf3
+# CHECK: aesd.8 q0, q1
+0x02,0x03,0xb0,0xf3
+# CHECK: aese.8 q0, q1
+0xc2,0x03,0xb0,0xf3
+# CHECK: aesimc.8 q0, q1
+0x82,0x03,0xb0,0xf3
+# CHECK: aesmc.8 q0, q1
+
+0xc2,0x02,0xb9,0xf3
+# CHECK: sha1h.32  q0, q1
+0x82,0x03,0xba,0xf3
+# CHECK: sha1su1.32 q0, q1
+0xc2,0x03,0xba,0xf3
+# CHECK: sha256su0.32 q0, q1
+
+0x44,0x0c,0x02,0xf2
+# CHECK: sha1c.32  q0, q1, q2
+0x44,0x0c,0x22,0xf2
+# CHECK: sha1m.32  q0, q1, q2
+0x44,0x0c,0x12,0xf2
+# CHECK: sha1p.32 q0, q1, q2
+0x44,0x0c,0x32,0xf2
+# CHECK: sha1su0.32  q0, q1, q2
+0x44,0x0c,0x02,0xf3
+# CHECK: sha256h.32  q0, q1, q2
+0x44,0x0c,0x12,0xf3
+# CHECK: sha256h2.32 q0, q1, q2
+0x44,0x0c,0x22,0xf3
+# CHECK: sha256su1.32 q0, q1, q2
+
+0xa1,0x0e,0xe0,0xf2
+# CHECK: vmull.p64  q8, d16, d17
diff --git a/test/MC/Disassembler/ARM/neon-v8.txt b/test/MC/Disassembler/ARM/neon-v8.txt
new file mode 100644
index 000000000000..8c6e6898b941
--- /dev/null
+++ b/test/MC/Disassembler/ARM/neon-v8.txt
@@ -0,0 +1,71 @@
+# RUN: llvm-mc -triple armv8-unknown-unknown -mattr=+neon -disassemble < %s | FileCheck %s
+
+0x11 0x4f 0x05 0xf3
+# CHECK: vmaxnm.f32 d4, d5, d1
+0x5c 0x4f 0x08 0xf3
+# CHECK: vmaxnm.f32 q2, q4, q6
+0x3e 0x5f 0x24 0xf3
+# CHECK: vminnm.f32 d5, d4, d30
+0xd4 0x0f 0x2a 0xf3
+# CHECK: vminnm.f32 q0, q13, q2
+
+0x06 0x40 0xbb 0xf3
+# CHECK: vcvta.s32.f32	d4, d6
+0x8a 0xc0 0xbb 0xf3
+# CHECK: vcvta.u32.f32	d12, d10
+0x4c 0x80 0xbb 0xf3
+# CHECK: vcvta.s32.f32	q4, q6
+0xe4 0x80 0xbb 0xf3
+# CHECK: vcvta.u32.f32	q4, q10
+
+0x2e 0x13 0xbb 0xf3
+# CHECK: vcvtm.s32.f32	d1, d30
+0x8a 0xc3 0xbb 0xf3
+# CHECK: vcvtm.u32.f32	d12, d10
+0x64 0x23 0xbb 0xf3
+# CHECK: vcvtm.s32.f32	q1, q10
+0xc2 0xa3 0xfb 0xf3
+# CHECK: vcvtm.u32.f32	q13, q1
+
+0x21 0xf1 0xbb 0xf3
+# CHECK: vcvtn.s32.f32	d15, d17
+0x83 0x51 0xbb 0xf3
+# CHECK: vcvtn.u32.f32	d5, d3
+0x60 0x61 0xbb 0xf3
+# CHECK: vcvtn.s32.f32	q3, q8
+0xc6 0xa1 0xbb 0xf3
+# CHECK: vcvtn.u32.f32	q5, q3
+
+0x25 0xb2 0xbb 0xf3
+# CHECK: vcvtp.s32.f32	d11, d21
+0xa7 0xe2 0xbb 0xf3
+# CHECK: vcvtp.u32.f32	d14, d23
+0x6e 0x82 0xbb 0xf3
+# CHECK: vcvtp.s32.f32	q4, q15
+0xe0 0x22 0xfb 0xf3
+# CHECK: vcvtp.u32.f32	q9, q8
+
+0x00 0x34 0xba 0xf3
+# CHECK: vrintn.f32 d3, d0
+0x48 0x24 0xba 0xf3
+# CHECK: vrintn.f32 q1, q4
+0x8c 0x54 0xba 0xf3
+# CHECK: vrintx.f32 d5, d12
+0xc6 0x04 0xba 0xf3
+# CHECK: vrintx.f32 q0, q3
+0x00 0x35 0xba 0xf3
+# CHECK: vrinta.f32 d3, d0
+0x44 0x05 0xfa 0xf3
+# CHECK: vrinta.f32 q8, q2
+0xa2 0xc5 0xba 0xf3
+# CHECK: vrintz.f32 d12, d18
+0xc8 0x25 0xfa 0xf3
+# CHECK: vrintz.f32 q9, q4
+0x80 0x36 0xba 0xf3
+# CHECK: vrintm.f32 d3, d0
+0xc8 0x26 0xba 0xf3
+# CHECK: vrintm.f32 q1, q4
+0x80 0x37 0xba 0xf3
+# CHECK: vrintp.f32 d3, d0
+0xc8 0x27 0xba 0xf3
+# CHECK: vrintp.f32 q1, q4
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index 337457847213..e493fbaefc53 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -1629,9 +1629,6 @@
 0xc0 0xf9 0x4f 0x1b
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
-0x63 0xf9 0x37 0xc9
-# CHECK: vld2.8	{d28, d30}, [r3:256], r7
-
 # rdar://10798451
 0xe7 0xf9 0x32 0x1d
 # CHECK vld2.8	{d17[], d19[]}, [r7:16], r2
diff --git a/test/MC/Disassembler/ARM/thumb-fp-armv8.txt b/test/MC/Disassembler/ARM/thumb-fp-armv8.txt
new file mode 100644
index 000000000000..c90eed6fa1fb
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-fp-armv8.txt
@@ -0,0 +1,163 @@
+# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+fp-armv8 -show-encoding < %s | FileCheck %s
+
+0xb2 0xee 0xe0 0x3b
+# CHECK: vcvtt.f64.f16 d3, s1
+
+0xf3 0xee 0xcc 0x2b
+# CHECK: vcvtt.f16.f64 s5, d12
+
+0xb2 0xee 0x60 0x3b
+# CHECK: vcvtb.f64.f16 d3, s1
+
+0xb3 0xee 0x41 0x2b
+# CHECK: vcvtb.f16.f64 s4, d1
+
+0xa8 0xbf # IT block
+0xb2 0xee 0xe0 0x3b
+# CHECK: vcvttge.f64.f16 d3, s1
+
+0xc8 0xbf # IT block
+0xf3 0xee 0xcc 0x2b
+# CHECK: vcvttgt.f16.f64 s5, d12
+
+0x08 0xbf # IT block
+0xb2 0xee 0x60 0x3b
+# CHECK: vcvtbeq.f64.f16 d3, s1
+
+0xb8 0xbf # IT block
+0xb3 0xee 0x41 0x2b
+# CHECK: vcvtblt.f16.f64 s4, d1
+
+
+0xbc 0xfe 0xe1 0x1a
+# CHECK: vcvta.s32.f32 s2, s3
+
+0xbc 0xfe 0xc3 0x1b
+# CHECK: vcvta.s32.f64 s2, d3
+
+0xbd 0xfe 0xeb 0x3a
+# CHECK: vcvtn.s32.f32 s6, s23
+
+0xbd 0xfe 0xe7 0x3b
+# CHECK: vcvtn.s32.f64 s6, d23
+
+0xbe 0xfe 0xc2 0x0a
+# CHECK: vcvtp.s32.f32 s0, s4
+
+0xbe 0xfe 0xc4 0x0b
+# CHECK: vcvtp.s32.f64 s0, d4
+
+0xff 0xfe 0xc4 0x8a
+# CHECK: vcvtm.s32.f32 s17, s8
+
+0xff 0xfe 0xc8 0x8b
+# CHECK: vcvtm.s32.f64 s17, d8
+
+0xbc 0xfe 0x61 0x1a
+# CHECK: vcvta.u32.f32 s2, s3
+
+0xbc 0xfe 0x43 0x1b
+# CHECK: vcvta.u32.f64 s2, d3
+
+0xbd 0xfe 0x6b 0x3a
+# CHECK: vcvtn.u32.f32 s6, s23
+
+0xbd 0xfe 0x67 0x3b
+# CHECK: vcvtn.u32.f64 s6, d23
+
+0xbe 0xfe 0x42 0x0a
+# CHECK: vcvtp.u32.f32 s0, s4
+
+0xbe 0xfe 0x44 0x0b
+# CHECK: vcvtp.u32.f64 s0, d4
+
+0xff 0xfe 0x44 0x8a
+# CHECK: vcvtm.u32.f32 s17, s8
+
+0xff 0xfe 0x48 0x8b
+# CHECK: vcvtm.u32.f64 s17, d8
+
+
+0x20 0xfe 0xab 0x2a
+# CHECK: vselge.f32 s4, s1, s23
+
+0x6f 0xfe 0xa7 0xeb
+# CHECK: vselge.f64 d30, d31, d23
+
+0x30 0xfe 0x80 0x0a
+# CHECK: vselgt.f32 s0, s1, s0
+
+0x3a 0xfe 0x24 0x5b
+# CHECK: vselgt.f64 d5, d10, d20
+
+0x0e 0xfe 0x2b 0xfa
+# CHECK: vseleq.f32 s30, s28, s23
+
+0x04 0xfe 0x08 0x2b
+# CHECK: vseleq.f64 d2, d4, d8
+
+0x58 0xfe 0x07 0xaa
+# CHECK: vselvs.f32 s21, s16, s14
+
+0x11 0xfe 0x2f 0x0b
+# CHECK: vselvs.f64 d0, d1, d31
+
+
+0xc6 0xfe 0x00 0x2a
+# CHECK: vmaxnm.f32 s5, s12, s0
+
+0x86 0xfe 0xae 0x5b
+# CHECK: vmaxnm.f64 d5, d22, d30
+
+0x80 0xfe 0x46 0x0a
+# CHECK: vminnm.f32 s0, s0, s12
+
+0x86 0xfe 0x49 0x4b
+# CHECK: vminnm.f64 d4, d6, d9
+
+
+0xa8 0xbf # IT block
+0xb6 0xee 0xcc 0x3b
+# CHECK: vrintzge.f64 d3, d12
+
+0xf6 0xee 0xcc 0x1a
+# CHECK: vrintz.f32 s3, s24
+
+0xb8 0xbf # IT block
+0xb6 0xee 0x40 0x5b
+# CHECK: vrintrlt.f64 d5, d0
+
+0xb6 0xee 0x64 0x0a
+# CHECK: vrintr.f32 s0, s9
+
+0x08 0xbf # IT block
+0xf7 0xee 0x6e 0xcb
+# CHECK: vrintxeq.f64 d28, d30
+
+0x68 0xbf # IT block
+0xb7 0xee 0x47 0x5a
+# CHECK: vrintxvs.f32 s10, s14
+
+0xb8 0xfe 0x44 0x3b
+# CHECK: vrinta.f64 d3, d4
+
+0xb8 0xfe 0x60 0x6a
+# CHECK: vrinta.f32 s12, s1
+
+0xb9 0xfe 0x44 0x3b
+# CHECK: vrintn.f64 d3, d4
+
+0xb9 0xfe 0x60 0x6a
+# CHECK: vrintn.f32 s12, s1
+
+0xba 0xfe 0x44 0x3b
+# CHECK: vrintp.f64 d3, d4
+
+0xba 0xfe 0x60 0x6a
+# CHECK: vrintp.f32 s12, s1
+
+0xbb 0xfe 0x44 0x3b
+# CHECK: vrintm.f64 d3, d4
+
+0xbb 0xfe 0x60 0x6a
+# CHECK: vrintm.f32 s12, s1
diff --git a/test/MC/Disassembler/ARM/thumb-neon-crypto.txt b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt
new file mode 100644
index 000000000000..c725c7f5dc38
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt
@@ -0,0 +1,43 @@
+# RUN: llvm-mc -triple thumbv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s
+
+0xb0 0xff 0x42 0x03
+# CHECK: aesd.8  q0, q1
+0xb0 0xff 0x02 0x03
+# CHECK: aese.8 q0, q1
+0xb0 0xff 0xc2 0x03
+# CHECK: aesimc.8 q0, q1
+0xb0 0xff 0x82 0x03
+# CHECK: aesmc.8 q0, q1
+
+0xb9 0xff 0xc2 0x02
+# CHECK: sha1h.32  q0, q1
+0xba 0xff 0x82 0x03
+# CHECK: sha1su1.32 q0, q1
+0xba 0xff 0xc2 0x03
+# CHECK: sha256su0.32 q0, q1
+
+0x02 0xef 0x44 0x0c
+# CHECK: sha1c.32  q0, q1, q2
+0x22 0xef 0x44 0x0c
+# CHECK: sha1m.32  q0, q1, q2
+0x12 0xef 0x44 0x0c
+# CHECK: sha1p.32 q0, q1, q2
+0x32 0xef 0x44 0x0c
+# CHECK: sha1su0.32  q0, q1, q2
+0x02 0xff 0x44 0x0c
+# CHECK: sha256h.32  q0, q1, q2
+0x12 0xff 0x44 0x0c
+# CHECK: sha256h2.32 q0, q1, q2
+0x22 0xff 0x44 0x0c
+# CHECK: sha256su1.32 q0, q1, q2
+
+0xe0 0xef 0xa1 0x0e
+# CHECK: vmull.p64  q8, d16, d17
+
+# This used to be incorrectly decoded into an sha256h.32 [0x00,0xff,0x40,0x0c]
+# The other similar encodings are stc2 [0x00,0xfd,0x40,0x0c] and cdp2 [0x00,0xfe,0x40,0x0c]
+0x00 0xfc 0x40 0x0c
+# CHECK-NOT: sha256h.32
+# CHECK-NOT: stc2
+# CHECK-NOT: cdp2
+
diff --git a/test/MC/Disassembler/ARM/thumb-neon-v8.txt b/test/MC/Disassembler/ARM/thumb-neon-v8.txt
new file mode 100644
index 000000000000..27c09ea0f822
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-neon-v8.txt
@@ -0,0 +1,71 @@
+# RUN: llvm-mc -triple thumbv8-unknown-unknown -mattr=+neon -disassemble < %s | FileCheck %s
+
+0x5 0xff 0x11 0x4f
+# CHECK: vmaxnm.f32 d4, d5, d1
+0x08 0xff 0x5c 0x4f
+# CHECK: vmaxnm.f32 q2, q4, q6
+0x24 0xff 0x3e 0x5f
+# CHECK: vminnm.f32 d5, d4, d30
+0x2a 0xff 0xd4 0x0f
+# CHECK: vminnm.f32 q0, q13, q2
+
+0xbb 0xff 0x06 0x40
+# CHECK: vcvta.s32.f32	d4, d6
+0xbb 0xff 0x8a 0xc0
+# CHECK: vcvta.u32.f32	d12, d10
+0xbb 0xff 0x4c 0x80
+# CHECK: vcvta.s32.f32	q4, q6
+0xbb 0xff 0xe4 0x80
+# CHECK: vcvta.u32.f32	q4, q10
+
+0xbb 0xff 0x2e 0x13
+# CHECK: vcvtm.s32.f32	d1, d30
+0xbb 0xff 0x8a 0xc3
+# CHECK: vcvtm.u32.f32	d12, d10
+0xbb 0xff 0x64 0x23
+# CHECK: vcvtm.s32.f32	q1, q10
+0xfb 0xff 0xc2 0xa3
+# CHECK: vcvtm.u32.f32	q13, q1
+
+0xbb 0xff 0x21 0xf1
+# CHECK: vcvtn.s32.f32	d15, d17
+0xbb 0xff 0x83 0x51
+# CHECK: vcvtn.u32.f32	d5, d3
+0xbb 0xff 0x60 0x61
+# CHECK: vcvtn.s32.f32	q3, q8
+0xbb 0xff 0xc6 0xa1
+# CHECK: vcvtn.u32.f32	q5, q3
+
+0xbb 0xff 0x25 0xb2
+# CHECK: vcvtp.s32.f32	d11, d21
+0xbb 0xff 0xa7 0xe2
+# CHECK: vcvtp.u32.f32	d14, d23
+0xbb 0xff 0x6e 0x82
+# CHECK: vcvtp.s32.f32	q4, q15
+0xfb 0xff 0xe0 0x22
+# CHECK: vcvtp.u32.f32	q9, q8
+
+0xba 0xff 0x00 0x34
+# CHECK: vrintn.f32 d3, d0
+0xba 0xff 0x48 0x24
+# CHECK: vrintn.f32 q1, q4
+0xba 0xff 0x8c 0x54
+# CHECK: vrintx.f32 d5, d12
+0xba 0xff 0xc6 0x04
+# CHECK: vrintx.f32 q0, q3
+0xba 0xff 0x00 0x35
+# CHECK: vrinta.f32 d3, d0
+0xfa 0xff 0x44 0x05
+# CHECK: vrinta.f32 q8, q2
+0xba 0xff 0xa2 0xc5
+# CHECK: vrintz.f32 d12, d18
+0xfa 0xff 0xc8 0x25
+# CHECK: vrintz.f32 q9, q4
+0xba 0xff 0x80 0x36
+# CHECK: vrintm.f32 d3, d0
+0xba 0xff 0xc8 0x26
+# CHECK: vrintm.f32 q1, q4
+0xba 0xff 0x80 0x37
+# CHECK: vrintp.f32 d3, d0
+0xba 0xff 0xc8 0x27
+# CHECK: vrintp.f32 q1, q4
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
index 757ce6e3977b..df2bac140cf7 100644
--- a/test/MC/Disassembler/ARM/thumb-tests.txt
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -125,7 +125,7 @@
 # CHECK: cps  #15
 0xaf 0xf3 0x0f 0x81
 
-# CHECK: cpsie.w  if, #10
+# CHECK: cpsie  if, #10
 0xaf 0xf3 0x6a 0x85
 
 # CHECK: cpsie aif
@@ -221,6 +221,9 @@
 # CHECK:	stc2	p12, c15, [r9], {137}
 0x89 0xfc 0x89 0xfc
 
+# CHECK:	stc2	p0, c0, [r0, #0]!
+0xa0 0xfd 0x00 0x00
+
 # CHECK:	vmov	r1, r0, d11
 0x50 0xec 0x1b 0x1b
 
diff --git a/test/MC/Disassembler/ARM/thumb-v8.txt b/test/MC/Disassembler/ARM/thumb-v8.txt
new file mode 100644
index 000000000000..eb5ffea7d667
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb-v8.txt
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+db -show-encoding < %s | FileCheck %s
+
+0x80 0xba
+# CHECK: hlt #0
+
+0xbf 0xba
+# CHECK: hlt #63
+
+# DCPS{1,2,3}
+
+0x8f 0xf7 0x01 0x80 
+# CHECK: dcps1
+
+0x8f 0xf7 0x02 0x80 
+# CHECK: dcps2
+
+0x8f 0xf7 0x03 0x80 
+# CHECK: dcps3
+
+0xbf 0xf3 0x59 0x8f
+0xbf 0xf3 0x51 0x8f
+0xbf 0xf3 0x55 0x8f
+0xbf 0xf3 0x5d 0x8f
+
+# CHECK: dmb ishld
+# CHECK: dmb oshld
+# CHECK: dmb nshld
+# CHECK: dmb ld
diff --git a/test/MC/Disassembler/ARM/thumb1.txt b/test/MC/Disassembler/ARM/thumb1.txt
index de9596aab732..a129abba70fd 100644
--- a/test/MC/Disassembler/ARM/thumb1.txt
+++ b/test/MC/Disassembler/ARM/thumb1.txt
@@ -54,8 +54,12 @@
 #------------------------------------------------------------------------------
 # ADR
 #------------------------------------------------------------------------------
-# CHECK: adr	r2, #3
+# CHECK: adr    r5, #0
+# CHECK: adr    r2, #12
+# CHECK: adr    r3, #1020
+0x00 0xa5
 0x03 0xa2
+0xff 0xa3
 
 #------------------------------------------------------------------------------
 # ASR (immediate)
@@ -279,9 +283,11 @@
 #------------------------------------------------------------------------------
 # CHECK: mov r3, r4
 # CHECK: movs r1, r3
+# CHECK: mov r8, r8
 
 0x23 0x46
 0x19 0x00
+0xc0 0x46
 
 
 #------------------------------------------------------------------------------
@@ -310,14 +316,6 @@
 
 
 #------------------------------------------------------------------------------
-# NOP
-#------------------------------------------------------------------------------
-# CHECK: nop
-
-0xc0 0x46
-
-
-#------------------------------------------------------------------------------
 # ORR
 #------------------------------------------------------------------------------
 # CHECK: orrs  r3, r4
diff --git a/test/MC/Disassembler/ARM/thumb2-v8.txt b/test/MC/Disassembler/ARM/thumb2-v8.txt
new file mode 100644
index 000000000000..1b2f09562e2c
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb2-v8.txt
@@ -0,0 +1,40 @@
+# RUN: llvm-mc -triple=thumbv8 -disassemble < %s | FileCheck %s
+# CHECK: sevl
+# CHECK: sevl.w
+0x50 0xbf
+0xaf 0xf3 0x05 0x80
+
+
+# These are the only coprocessor instructions that remain defined in ARMv8
+# (The operations on p10/p11 disassemble into FP/NEON instructions)
+
+0x00 0xee 0x10 0x0e
+# CHECK: mcr p14
+
+0x00 0xee 0x10 0x0f
+# CHECK: mcr p15
+
+0x10 0xee 0x10 0x0e
+# CHECK: mrc p14
+
+0x10 0xee 0x10 0x0f
+# CHECK: mrc p15
+
+0x40 0xec 0x00 0x0e
+# CHECK: mcrr p14
+
+0x40 0xec 0x00 0x0f
+# CHECK: mcrr p15
+
+0x50 0xec 0x00 0x0e
+# CHECK: mrrc p14
+
+0x50 0xec 0x00 0x0f
+# CHECK: mrrc p15
+
+0x80 0xec 0x00 0x0e
+# CHECK: stc p14
+
+0x90 0xec 0x00 0x0e
+# CHECK: ldc p14
+
diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt
index 31f75b39fa9c..c8b408031336 100644
--- a/test/MC/Disassembler/ARM/thumb2.txt
+++ b/test/MC/Disassembler/ARM/thumb2.txt
@@ -170,8 +170,10 @@
 0x13 0xf5 0xce 0xa9
 
 # CHECK: b.w   #208962
+# CHECK: b.w   #-16777216
 
 0x33 0xf0 0x21 0xb8 # rdar://12585795
+0x00 0xf4 0x00 0x90
 
 #------------------------------------------------------------------------------
 # BFC
@@ -447,8 +449,10 @@
 # ISB
 #------------------------------------------------------------------------------
 #CHECK: isb sy
+#CHECK: isb #0xa
 
 0xbf 0xf3 0x6f 0x8f
+0xbf 0xf3 0x6a 0x8f
 
 #------------------------------------------------------------------------------
 # IT
@@ -549,6 +553,17 @@
 
 
 #------------------------------------------------------------------------------
+# LDR(literal)
+#------------------------------------------------------------------------------
+# CHECK: ldr.w r4, [pc, #-0]
+# CHECK: ldr.w r2, [pc, #-40]
+# CHECK: ldr.w r1, [pc, #1024]
+0x5f 0xf8 0x00 0x40
+0x5f 0xf8 0x28 0x20
+0xdf 0xf8 0x00 0x14
+
+
+#------------------------------------------------------------------------------
 # LDR(register)
 #------------------------------------------------------------------------------
 # CHECK: ldr.w r1, [r8, r1]
@@ -561,6 +576,7 @@
 # CHECK: ldr r2, [r4, #255]!
 # CHECK: ldr r8, [sp, #4]!
 # CHECK: ldr lr, [sp, #-4]!
+# CHECK: ldr lr, [sp, #0]!
 # CHECK: ldr r2, [r4], #255
 # CHECK: ldr r8, [sp], #4
 # CHECK: ldr lr, [sp], #-4
@@ -575,6 +591,7 @@
 0x54 0xf8 0xff 0x2f
 0x5d 0xf8 0x04 0x8f
 0x5d 0xf8 0x04 0xed
+0x5d 0xf8 0x00 0xef
 0x54 0xf8 0xff 0x2b
 0x5d 0xf8 0x04 0x8b
 0x5d 0xf8 0x04 0xe9
@@ -608,6 +625,7 @@
 # CHECK: ldrb r5, [r8, #255]!
 # CHECK: ldrb r2, [r5, #4]!
 # CHECK: ldrb r1, [r4, #-4]!
+# CHECK: ldrb r1, [r4, #0]!
 # CHECK: ldrb lr, [r3], #255
 # CHECK: ldrb r9, [r2], #4
 # CHECK: ldrb r3, [sp], #-4
@@ -621,12 +639,24 @@
 0x18 0xf8 0xff 0x5f
 0x15 0xf8 0x04 0x2f
 0x14 0xf8 0x04 0x1d
+0x14 0xf8 0x00 0x1f
 0x13 0xf8 0xff 0xeb
 0x12 0xf8 0x04 0x9b
 0x1d 0xf8 0x04 0x39
 
 
 #------------------------------------------------------------------------------
+# LDRB(literal)
+#------------------------------------------------------------------------------
+# CHECK: ldrb.w r6, [pc, #-0]
+# CHECK: ldrb.w r10, [pc, #227]
+# CHECK: ldrb.w r5, [pc, #0]
+0x1f 0xf8 0x00 0x60
+0x9f 0xf8 0xe3 0xa0
+0x9f 0xf8 0x00 0x50
+
+
+#------------------------------------------------------------------------------
 # LDRBT
 #------------------------------------------------------------------------------
 # CHECK: ldrbt r1, [r2]
@@ -651,7 +681,9 @@
 # CHECK: ldrd r8, r1, [r3]
 # CHECK: ldrd r0, r1, [r2], #-0
 # CHECK: ldrd r0, r1, [r2, #-0]!
+# CHECK: ldrd r0, r1, [r2, #0]!
 # CHECK: ldrd r0, r1, [r2, #-0]
+# CHECK: ldrd r1, r1, [r0], #0
 
 0xd6 0xe9 0x06 0x35
 0xf6 0xe9 0x06 0x35
@@ -661,7 +693,9 @@
 0xd3 0xe9 0x00 0x81
 0x72 0xe8 0x00 0x01
 0x72 0xe9 0x00 0x01
+0xf2 0xe9 0x00 0x01
 0x52 0xe9 0x00 0x01
+0xf0 0xe8 0x00 0x11
 
 
 #------------------------------------------------------------------------------
@@ -695,14 +729,12 @@
 # CHECK: ldrh.w r5, [r6, #33]
 # CHECK: ldrh.w r5, [r6, #257]
 # CHECK: ldrh.w lr, [r7, #257]
-# CHECK: ldrh.w r0, [pc, #-21]
 
 0x35 0xf8 0x04 0x5c
 0x35 0x8c
 0xb6 0xf8 0x21 0x50
 0xb6 0xf8 0x01 0x51
 0xb7 0xf8 0x01 0xe1
-0x3f 0xf8 0x15 0x00
 
 
 #------------------------------------------------------------------------------
@@ -717,6 +749,7 @@
 # CHECK: ldrh r5, [r8, #255]!
 # CHECK: ldrh r2, [r5, #4]!
 # CHECK: ldrh r1, [r4, #-4]!
+# CHECK: ldrh r1, [r4, #0]!
 # CHECK: ldrh lr, [r3], #255
 # CHECK: ldrh r9, [r2], #4
 # CHECK: ldrh r3, [sp], #-4
@@ -730,12 +763,24 @@
 0x38 0xf8 0xff 0x5f
 0x35 0xf8 0x04 0x2f
 0x34 0xf8 0x04 0x1d
+0x34 0xf8 0x00 0x1f
 0x33 0xf8 0xff 0xeb
 0x32 0xf8 0x04 0x9b
 0x3d 0xf8 0x04 0x39
 
 
 #------------------------------------------------------------------------------
+# LDRH(literal)
+#------------------------------------------------------------------------------
+# CHECK: ldrh.w r7, [pc, #-0]
+# CHECK: ldrh.w r5, [pc, #121]
+# CHECK: ldrh.w r4, [pc, #0]
+0x3f 0xf8 0x00 0x70
+0xbf 0xf8 0x79 0x50
+0xbf 0xf8 0x00 0x40
+
+
+#------------------------------------------------------------------------------
 # LDRSB(immediate)
 #------------------------------------------------------------------------------
 # CHECK: ldrsb r5, [r5, #-4]
@@ -763,6 +808,7 @@
 # CHECK: ldrsb r5, [r8, #255]!
 # CHECK: ldrsb r2, [r5, #4]!
 # CHECK: ldrsb r1, [r4, #-4]!
+# CHECK: ldrsb r1, [r4, #0]!
 # CHECK: ldrsb lr, [r3], #255
 # CHECK: ldrsb r9, [r2], #4
 # CHECK: ldrsb r3, [sp], #-4
@@ -776,12 +822,24 @@
 0x18 0xf9 0xff 0x5f
 0x15 0xf9 0x04 0x2f
 0x14 0xf9 0x04 0x1d
+0x14 0xf9 0x00 0x1f
 0x13 0xf9 0xff 0xeb
 0x12 0xf9 0x04 0x9b
 0x1d 0xf9 0x04 0x39
 
 
 #------------------------------------------------------------------------------
+# LDRSB(literal)
+#------------------------------------------------------------------------------
+# CHECK: ldrsb.w r0, [pc, #-0]
+# CHECK: ldrsb.w r12, [pc, #80]
+# CHECK: ldrsb.w r3, [pc, #0]
+0x1f 0xf9 0x00 0x00
+0x9f 0xf9 0x50 0xc0
+0x9f 0xf9 0x00 0x30
+
+
+#------------------------------------------------------------------------------
 # LDRSBT
 #------------------------------------------------------------------------------
 # CHECK: ldrsbt r1, [r2]
@@ -824,6 +882,7 @@
 # CHECK: ldrsh r5, [r8, #255]!
 # CHECK: ldrsh r2, [r5, #4]!
 # CHECK: ldrsh r1, [r4, #-4]!
+# CHECK: ldrsh r1, [r4, #0]!
 # CHECK: ldrsh lr, [r3], #255
 # CHECK: ldrsh r9, [r2], #4
 # CHECK: ldrsh r3, [sp], #-4
@@ -837,12 +896,24 @@
 0x38 0xf9 0xff 0x5f
 0x35 0xf9 0x04 0x2f
 0x34 0xf9 0x04 0x1d
+0x34 0xf9 0x00 0x1f
 0x33 0xf9 0xff 0xeb
 0x32 0xf9 0x04 0x9b
 0x3d 0xf9 0x04 0x39
 
 
 #------------------------------------------------------------------------------
+# LDRSH(literal)
+#------------------------------------------------------------------------------
+# CHECK: ldrsh.w r0, [pc, #-0]
+# CHECK: ldrsh.w r10, [pc, #-231]
+# CHECK: ldrsh.w r6, [pc, #0]
+0x3f 0xf9 0x00 0x00
+0x3f 0xf9 0xe7 0xa0
+0xbf 0xf9 0x00 0x60
+
+
+#------------------------------------------------------------------------------
 # LDRSHT
 #------------------------------------------------------------------------------
 # CHECK: ldrsht r1, [r2]
@@ -1235,6 +1306,17 @@
 0x1d 0xf8 0x02 0xf0
 
 #------------------------------------------------------------------------------
+# PLD(literal)
+#------------------------------------------------------------------------------
+# CHECK: pld [pc, #-0]
+# CHECK: pld [pc, #455]
+# CHECK: pld [pc, #0]
+
+0x1f 0xf8 0x00 0xf0
+0x9f 0xf8 0xc7 0xf1
+0x9f 0xf8 0x00 0xf0
+
+#------------------------------------------------------------------------------
 # PLI(immediate)
 #------------------------------------------------------------------------------
 # CHECK: pli [r5, #-4]
@@ -1266,6 +1348,17 @@
 0x1d 0xf9 0x12 0xf0
 0x1d 0xf9 0x02 0xf0
 
+#------------------------------------------------------------------------------
+# PLI(literal)
+#------------------------------------------------------------------------------
+# CHECK: pli [pc, #-0]
+# CHECK: pli [pc, #-328]
+# CHECK: pli [pc, #0]
+
+0x1f 0xf9 0x00 0xf0
+0x1f 0xf9 0x48 0xf1
+0x9f 0xf9 0x00 0xf0
+
 
 #------------------------------------------------------------------------------
 # QADD/QADD16/QADD8
@@ -1835,16 +1928,20 @@
 #------------------------------------------------------------------------------
 # STRD (immediate)
 #------------------------------------------------------------------------------
+# CHECK: strd r1, r1, [r0], #0
 # CHECK: strd r6, r3, [r5], #-8
 # CHECK: strd r8, r5, [r5], #-0
 # CHECK: strd r7, r4, [r5], #-4
 # CHECK: strd r0, r1, [r2, #-0]!
+# CHECK: strd r0, r1, [r2, #0]!
 # CHECK: strd r0, r1, [r2, #-0]
 
+0xe0 0xe8 0x00 0x11
 0x65 0xe8 0x02 0x63
 0x65 0xe8 0x00 0x85
 0x65 0xe8 0x01 0x74
 0x62 0xe9 0x00 0x01
+0xe2 0xe9 0x00 0x01
 0x42 0xe9 0x00 0x01
 
 #------------------------------------------------------------------------------
@@ -1876,6 +1973,7 @@
 # CHECK: strh r5, [r8, #255]!
 # CHECK: strh r2, [r5, #4]!
 # CHECK: strh r1, [r4, #-4]!
+# CHECK: strh r1, [r4, #0]!
 # CHECK: strh lr, [r3], #255
 # CHECK: strh r9, [r2], #4
 # CHECK: strh r3, [sp], #-4
@@ -1888,6 +1986,7 @@
 0x28 0xf8 0xff 0x5f
 0x25 0xf8 0x04 0x2f
 0x24 0xf8 0x04 0x1d
+0x24 0xf8 0x00 0x1f
 0x23 0xf8 0xff 0xeb
 0x22 0xf8 0x04 0x9b
 0x2d 0xf8 0x04 0x39
@@ -1952,6 +2051,7 @@
 # CHECK: sub.w r12, r6, #256
 # CHECK: subw r12, r6, #256
 # CHECK: subs.w r1, r2, #496
+# CHECK: subs pc, lr, #4
 
 0x0a 0xbf
 0x11 0x1f
@@ -1963,6 +2063,7 @@
 0xa6 0xf5 0x80 0x7c
 0xa6 0xf2 0x00 0x1c
 0xb2 0xf5 0xf8 0x71
+0xde 0xf3 0x04 0x8f
 
 
 #------------------------------------------------------------------------------
@@ -2606,3 +2707,14 @@
 0x30 0xbf
 0x10 0xbf
 
+#------------------------------------------------------------------------------
+# Unallocated hints (They execute as NOPs, but software must not use them.)
+#------------------------------------------------------------------------------
+# CHECK: hint #6
+# CHECK: hint.w #6
+# CHECK: hint.w #102
+
+0x60 0xbf
+0xaf 0xf3 0x06 0x80
+0xaf 0xf3 0x66 0x80
+
diff --git a/test/MC/Disassembler/MBlaze/lit.local.cfg b/test/MC/Disassembler/MBlaze/lit.local.cfg
deleted file mode 100644
index 3955b4e167a5..000000000000
--- a/test/MC/Disassembler/MBlaze/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.txt']
-
-targets = set(config.root.targets_to_build.split())
-if not 'MBlaze' in targets:
-    config.unsupported = True
-
diff --git a/test/MC/Disassembler/MBlaze/mblaze_branch.txt b/test/MC/Disassembler/MBlaze/mblaze_branch.txt
deleted file mode 100644
index 5f4051712fa6..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_branch.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# Branch instructions
-################################################################################
-
-# CHECK:    beq     r2, r3
-0x9c 0x02 0x18 0x00
-
-# CHECK:    bge     r2, r3
-0x9c 0xa2 0x18 0x00
-
-# CHECK:    bgt     r2, r3
-0x9c 0x82 0x18 0x00
-
-# CHECK:    ble     r2, r3
-0x9c 0x62 0x18 0x00
-
-# CHECK:    blt     r2, r3
-0x9c 0x42 0x18 0x00
-
-# CHECK:    bne     r2, r3
-0x9c 0x22 0x18 0x00
-
-# CHECK:    beqd    r2, r3
-0x9e 0x02 0x18 0x00
-
-# CHECK:    bged    r2, r3
-0x9e 0xa2 0x18 0x00
-
-# CHECK:    bgtd    r2, r3
-0x9e 0x82 0x18 0x00
-
-# CHECK:    bled    r2, r3
-0x9e 0x62 0x18 0x00
-
-# CHECK:    bltd    r2, r3
-0x9e 0x42 0x18 0x00
-
-# CHECK:    bned    r2, r3
-0x9e 0x22 0x18 0x00
-
-# CHECK:    br      r3
-0x98 0x00 0x18 0x00
-
-# CHECK:    bra     r3
-0x98 0x08 0x18 0x00
-
-# CHECK:    brd     r3
-0x98 0x10 0x18 0x00
-
-# CHECK:    brad    r3
-0x98 0x18 0x18 0x00
-
-# CHECK:    brld    r15, r3
-0x99 0xf4 0x18 0x00
-
-# CHECK:    brald   r15, r3
-0x99 0xfc 0x18 0x00
-
-# CHECK:    brk     r15, r3
-0x99 0xec 0x18 0x00
-
-# CHECK:    beqi    r2, 0
-0xbc 0x02 0x00 0x00
-
-# CHECK:    bgei    r2, 0
-0xbc 0xa2 0x00 0x00
-
-# CHECK:    bgti    r2, 0
-0xbc 0x82 0x00 0x00
-
- # CHECK:   blei    r2, 0
-0xbc 0x62 0x00 0x00
-
-# CHECK:    blti    r2, 0
-0xbc 0x42 0x00 0x00
-
-# CHECK:    bnei    r2, 0
-0xbc 0x22 0x00 0x00
-
-# CHECK:    beqid   r2, 0
-0xbe 0x02 0x00 0x00
-
-# CHECK:    bgeid   r2, 0
-0xbe 0xa2 0x00 0x00
-
-# CHECK:    bgtid   r2, 0
-0xbe 0x82 0x00 0x00
-
-# CHECK:    bleid   r2, 0
-0xbe 0x62 0x00 0x00
-
-# CHECK:    bltid   r2, 0
-0xbe 0x42 0x00 0x00
-
-# CHECK:    bneid   r2, 0
-0xbe 0x22 0x00 0x00
-
-# CHECK:    bri     0
-0xb8 0x00 0x00 0x00
-
-# CHECK:    brai    0
-0xb8 0x08 0x00 0x00
-
-# CHECK:    brid    0
-0xb8 0x10 0x00 0x00
-
-# CHECK:    braid   0
-0xb8 0x18 0x00 0x00
-
-# CHECK:    brlid   r15, 0
-0xb9 0xf4 0x00 0x00
-
-# CHECK:    bralid  r15, 0
-0xb9 0xfc 0x00 0x00
-
-# CHECK:    brki    r15, 0
-0xb9 0xec 0x00 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_fpu.txt b/test/MC/Disassembler/MBlaze/mblaze_fpu.txt
deleted file mode 100644
index 0fb7abcdea54..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_fpu.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# FPU instructions
-################################################################################
-
-# CHECK:    fadd         r0, r1, r2
-0x58 0x01 0x10 0x00
-
-# CHECK:    frsub        r0, r1, r2
-0x58 0x01 0x10 0x80
-
-# CHECK:    fmul         r0, r1, r2
-0x58 0x01 0x11 0x00
-
-# CHECK:    fdiv         r0, r1, r2
-0x58 0x01 0x11 0x80
-
-# CHECK:    fsqrt        r0, r1
-0x58 0x01 0x03 0x80
-
-# CHECK:    fint         r0, r1
-0x58 0x01 0x03 0x00
-
-# CHECK:    flt          r0, r1
-0x58 0x01 0x02 0x80
-
-# CHECK:    fcmp.un     r0, r1, r2
-0x58 0x01 0x12 0x00
-
-# CHECK:    fcmp.lt     r0, r1, r2
-0x58 0x01 0x12 0x10
-
-# CHECK:    fcmp.eq     r0, r1, r2
-0x58 0x01 0x12 0x20
-
-# CHECK:    fcmp.le     r0, r1, r2
-0x58 0x01 0x12 0x30
-
-# CHECK:    fcmp.gt     r0, r1, r2
-0x58 0x01 0x12 0x40
-
-# CHECK:    fcmp.ne     r0, r1, r2
-0x58 0x01 0x12 0x50
-
-# CHECK:    fcmp.ge     r0, r1, r2
-0x58 0x01 0x12 0x60
diff --git a/test/MC/Disassembler/MBlaze/mblaze_fsl.txt b/test/MC/Disassembler/MBlaze/mblaze_fsl.txt
deleted file mode 100644
index a12b3b486e60..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_fsl.txt
+++ /dev/null
@@ -1,338 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# FSL instructions
-################################################################################
-
-# CHECK:    get         r0, rfsl0
-0x6c 0x00 0x00 0x00
-
-# CHECK:    nget        r0, rfsl0
-0x6c 0x00 0x40 0x00
-
-# CHECK:    cget        r0, rfsl0
-0x6c 0x00 0x20 0x00
-
-# CHECK:    ncget       r0, rfsl0
-0x6c 0x00 0x60 0x00
-
-# CHECK:    tget        r0, rfsl0
-0x6c 0x00 0x10 0x00
-
-# CHECK:    tnget       r0, rfsl0
-0x6c 0x00 0x50 0x00
-
-# CHECK:    tcget       r0, rfsl0
-0x6c 0x00 0x30 0x00
-
-# CHECK:    tncget      r0, rfsl0
-0x6c 0x00 0x70 0x00
-
-# CHECK:    aget        r0, rfsl0
-0x6c 0x00 0x08 0x00
-
-# CHECK:    naget       r0, rfsl0
-0x6c 0x00 0x48 0x00
-
-# CHECK:    caget       r0, rfsl0
-0x6c 0x00 0x28 0x00
-
-# CHECK:    ncaget      r0, rfsl0
-0x6c 0x00 0x68 0x00
-
-# CHECK:    taget       r0, rfsl0
-0x6c 0x00 0x18 0x00
-
-# CHECK:    tnaget      r0, rfsl0
-0x6c 0x00 0x58 0x00
-
-# CHECK:    tcaget      r0, rfsl0
-0x6c 0x00 0x38 0x00
-
-# CHECK:    tncaget     r0, rfsl0
-0x6c 0x00 0x78 0x00
-
-# CHECK:    eget        r0, rfsl0
-0x6c 0x00 0x04 0x00
-
-# CHECK:    neget       r0, rfsl0
-0x6c 0x00 0x44 0x00
-
-# CHECK:    ecget       r0, rfsl0
-0x6c 0x00 0x24 0x00
-
-# CHECK:    necget      r0, rfsl0
-0x6c 0x00 0x64 0x00
-
-# CHECK:    teget       r0, rfsl0
-0x6c 0x00 0x14 0x00
-
-# CHECK:    tneget      r0, rfsl0
-0x6c 0x00 0x54 0x00
-
-# CHECK:    tecget      r0, rfsl0
-0x6c 0x00 0x34 0x00
-
-# CHECK:    tnecget     r0, rfsl0
-0x6c 0x00 0x74 0x00
-
-# CHECK:    eaget       r0, rfsl0
-0x6c 0x00 0x0c 0x00
-
-# CHECK:    neaget      r0, rfsl0
-0x6c 0x00 0x4c 0x00
-
-# CHECK:    ecaget      r0, rfsl0
-0x6c 0x00 0x2c 0x00
-
-# CHECK:    necaget     r0, rfsl0
-0x6c 0x00 0x6c 0x00
-
-# CHECK:    teaget      r0, rfsl0
-0x6c 0x00 0x1c 0x00
-
-# CHECK:    tneaget     r0, rfsl0
-0x6c 0x00 0x5c 0x00
-
-# CHECK:    tecaget     r0, rfsl0
-0x6c 0x00 0x3c 0x00
-
-# CHECK:    tnecaget    r0, rfsl0
-0x6c 0x00 0x7c 0x00
-
-# CHECK:    getd        r0, r1
-0x4c 0x00 0x08 0x00
-
-# CHECK:    ngetd       r0, r1
-0x4c 0x00 0x0a 0x00
-
-# CHECK:    cgetd       r0, r1
-0x4c 0x00 0x09 0x00
-
-# CHECK:    ncgetd      r0, r1
-0x4c 0x00 0x0b 0x00
-
-# CHECK:    tgetd       r0, r1
-0x4c 0x00 0x08 0x80
-
-# CHECK:    tngetd      r0, r1
-0x4c 0x00 0x0a 0x80
-
-# CHECK:    tcgetd      r0, r1
-0x4c 0x00 0x09 0x80
-
-# CHECK:    tncgetd     r0, r1
-0x4c 0x00 0x0b 0x80
-
-# CHECK:    agetd       r0, r1
-0x4c 0x00 0x08 0x40
-
-# CHECK:    nagetd      r0, r1
-0x4c 0x00 0x0a 0x40
-
-# CHECK:    cagetd     r0, r1
-0x4c 0x00 0x09 0x40
-
-# CHECK:    ncagetd     r0, r1
-0x4c 0x00 0x0b 0x40
-
-# CHECK:    tagetd      r0, r1
-0x4c 0x00 0x08 0xc0
-
-# CHECK:    tnagetd     r0, r1
-0x4c 0x00 0x0a 0xc0
-
-# CHECK:    tcagetd     r0, r1
-0x4c 0x00 0x09 0xc0
-
-# CHECK:    tncagetd    r0, r1
-0x4c 0x00 0x0b 0xc0
-
-# CHECK:    egetd       r0, r1
-0x4c 0x00 0x08 0x20
-
-# CHECK:    negetd      r0, r1
-0x4c 0x00 0x0a 0x20
-
-# CHECK:    ecgetd      r0, r1
-0x4c 0x00 0x09 0x20
-
-# CHECK:    necgetd     r0, r1
-0x4c 0x00 0x0b 0x20
-
-# CHECK:    tegetd      r0, r1
-0x4c 0x00 0x08 0xa0
-
-# CHECK:    tnegetd     r0, r1
-0x4c 0x00 0x0a 0xa0
-
-# CHECK:    tecgetd     r0, r1
-0x4c 0x00 0x09 0xa0
-
-# CHECK:    tnecgetd    r0, r1
-0x4c 0x00 0x0b 0xa0
-
-# CHECK:    eagetd      r0, r1
-0x4c 0x00 0x08 0x60
-
-# CHECK:    neagetd     r0, r1
-0x4c 0x00 0x0a 0x60
-
-# CHECK:    ecagetd     r0, r1
-0x4c 0x00 0x09 0x60
-
-# CHECK:    necagetd    r0, r1
-0x4c 0x00 0x0b 0x60
-
-# CHECK:    teagetd     r0, r1
-0x4c 0x00 0x08 0xe0
-
-# CHECK:    tneagetd    r0, r1
-0x4c 0x00 0x0a 0xe0
-
-# CHECK:    tecagetd    r0, r1
-0x4c 0x00 0x09 0xe0
-
-# CHECK:    tnecagetd   r0, r1
-0x4c 0x00 0x0b 0xe0
-
-# CHECK:    put         r0, rfsl0
-0x6c 0x00 0x80 0x00
-
-# CHECK:    aput        r0, rfsl0
-0x6c 0x00 0x88 0x00
-
-# CHECK:    cput        r0, rfsl0
-0x6c 0x00 0xa0 0x00
-
-# CHECK:    caput       r0, rfsl0
-0x6c 0x00 0xa8 0x00
-
-# CHECK:    nput        r0, rfsl0
-0x6c 0x00 0xc0 0x00
-
-# CHECK:    naput       r0, rfsl0
-0x6c 0x00 0xc8 0x00
-
-# CHECK:    ncput       r0, rfsl0
-0x6c 0x00 0xe0 0x00
-
-# CHECK:    ncaput      r0, rfsl0
-0x6c 0x00 0xe8 0x00
-
-# CHECK:    tput        rfsl0
-0x6c 0x00 0x90 0x00
-
-# CHECK:    taput       rfsl0
-0x6c 0x00 0x98 0x00
-
-# CHECK:    tcput       rfsl0
-0x6c 0x00 0xb0 0x00
-
-# CHECK:    tcaput      rfsl0
-0x6c 0x00 0xb8 0x00
-
-# CHECK:    tnput       rfsl0
-0x6c 0x00 0xd0 0x00
-
-# CHECK:    tnaput      rfsl0
-0x6c 0x00 0xd8 0x00
-
-# CHECK:    tncput      rfsl0
-0x6c 0x00 0xf0 0x00
-
-# CHECK:    tncaput     rfsl0
-0x6c 0x00 0xf8 0x00
-
-# CHECK:    putd        r0, r1
-0x4c 0x00 0x0c 0x00
-
-# CHECK:    aputd       r0, r1
-0x4c 0x00 0x0c 0x40
-
-# CHECK:    cputd       r0, r1
-0x4c 0x00 0x0d 0x00
-
-# CHECK:    caputd      r0, r1
-0x4c 0x00 0x0d 0x40
-
-# CHECK:    nputd       r0, r1
-0x4c 0x00 0x0e 0x00
-
-# CHECK:    naputd      r0, r1
-0x4c 0x00 0x0e 0x40
-
-# CHECK:    ncputd      r0, r1
-0x4c 0x00 0x0f 0x00
-
-# CHECK:    ncaputd     r0, r1
-0x4c 0x00 0x0f 0x40
-
-# CHECK:    tputd       r1
-0x4c 0x00 0x0c 0x80
-
-# CHECK:    taputd      r1
-0x4c 0x00 0x0c 0xc0
-
-# CHECK:    tcputd      r1
-0x4c 0x00 0x0d 0x80
-
-# CHECK:    tcaputd     r1
-0x4c 0x00 0x0d 0xc0
-
-# CHECK:    tnputd      r1
-0x4c 0x00 0x0e 0x80
-
-# CHECK:    tnaputd     r1
-0x4c 0x00 0x0e 0xc0
-
-# CHECK:    tncputd     r1
-0x4c 0x00 0x0f 0x80
-
-# CHECK:    tncaputd    r1
-0x4c 0x00 0x0f 0xc0
-
-# CHECK:    get     r0, rfsl1
-0x6c 0x00 0x00 0x01
-
-# CHECK:    get     r0, rfsl2
-0x6c 0x00 0x00 0x02
-
-# CHECK:    get     r0, rfsl3
-0x6c 0x00 0x00 0x03
-
-# CHECK:    get     r0, rfsl4
-0x6c 0x00 0x00 0x04
-
-# CHECK:    get     r0, rfsl5
-0x6c 0x00 0x00 0x05
-
-# CHECK:    get     r0, rfsl6
-0x6c 0x00 0x00 0x06
-
-# CHECK:    get     r0, rfsl7
-0x6c 0x00 0x00 0x07
-
-# CHECK:    get     r0, rfsl8
-0x6c 0x00 0x00 0x08
-
-# CHECK:    get     r0, rfsl9
-0x6c 0x00 0x00 0x09
-
-# CHECK:    get     r0, rfsl10
-0x6c 0x00 0x00 0x0a
-
-# CHECK:    get     r0, rfsl11
-0x6c 0x00 0x00 0x0b
-
-# CHECK:    get     r0, rfsl12
-0x6c 0x00 0x00 0x0c
-
-# CHECK:    get     r0, rfsl13
-0x6c 0x00 0x00 0x0d
-
-# CHECK:    get     r0, rfsl14
-0x6c 0x00 0x00 0x0e
-
-# CHECK:    get     r0, rfsl15
-0x6c 0x00 0x00 0x0f
diff --git a/test/MC/Disassembler/MBlaze/mblaze_imm.txt b/test/MC/Disassembler/MBlaze/mblaze_imm.txt
deleted file mode 100644
index 3833ea85d779..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_imm.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# IMM instruction processing
-################################################################################
-
-# CHECK:    addi    r0, r0, 0
-0x20 0x00 0x00 0x00
-
-# CHECK:    addi    r0, r0, 1
-0x20 0x00 0x00 0x01
-
-# CHECK:    addi    r0, r0, 2
-0x20 0x00 0x00 0x02
-
-# CHECK:    addi    r0, r0, 4
-0x20 0x00 0x00 0x04
-
-# CHECK:    addi    r0, r0, 8
-0x20 0x00 0x00 0x08
-
-# CHECK:    addi    r0, r0, 16
-0x20 0x00 0x00 0x10
-
-# CHECK:    addi    r0, r0, 32
-0x20 0x00 0x00 0x20
-
-# CHECK:    addi    r0, r0, 64
-0x20 0x00 0x00 0x40
-
-# CHECK:    addi    r0, r0, 128
-0x20 0x00 0x00 0x80
-
-# CHECK:    addi    r0, r0, 256
-0x20 0x00 0x01 0x00
-
-# CHECK:    addi    r0, r0, 512
-0x20 0x00 0x02 0x00
-
-# CHECK:    addi    r0, r0, 1024
-0x20 0x00 0x04 0x00
-
-# CHECK:    addi    r0, r0, 2048
-0x20 0x00 0x08 0x00
-
-# CHECK:    addi    r0, r0, 4096
-0x20 0x00 0x10 0x00
-
-# CHECK:    addi    r0, r0, 8192
-0x20 0x00 0x20 0x00
-
-# CHECK:    addi    r0, r0, 16384
-0x20 0x00 0x40 0x00
-
-# CHECK:    imm     0
-# CHECK:    addi    r0, r0, -32768
-0xb0 0x00 0x00 0x00 0x20 0x00 0x80 0x00
-
-# CHECK:    imm     1
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x01 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     2
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x02 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     4
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x04 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     8
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x08 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     16
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x10 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     32
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x20 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     64
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x40 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     128
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x00 0x80 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     256
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x01 0x00 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     512
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x02 0x00 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     1024
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x04 0x00 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     2048
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x08 0x00 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     4096
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x10 0x00 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     8192
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x20 0x00 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     16384
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x40 0x00 0x20 0x00 0x00 0x00
-
-# CHECK:    imm     -32768
-# CHECK:    addi    r0, r0, 0
-0xb0 0x00 0x80 0x00 0x20 0x00 0x00 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_mbar.txt b/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
deleted file mode 100644
index 6beba86b15ba..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# Memory Barrier instructions
-################################################################################
-
-# CHECK:    mbar        0
-0xB8 0x02 0x00 0x04
-
-# CHECK:    mbar        1
-0xB8 0x22 0x00 0x04
-
-# CHECK:    mbar        2
-0xB8 0x42 0x00 0x04
diff --git a/test/MC/Disassembler/MBlaze/mblaze_memory.txt b/test/MC/Disassembler/MBlaze/mblaze_memory.txt
deleted file mode 100644
index 584d61c47dcf..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_memory.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# Memory instructions
-################################################################################
-
-# CHECK:    lbu     r1, r2, r3
-0xc0 0x22 0x18 0x00
-
-# CHECK:    lbur    r1, r2, r3
-0xc0 0x22 0x1a 0x00
-
-# CHECK:    lbui    r1, r2, 28
-0xe0 0x22 0x00 0x1c
-
-# CHECK:    lhu     r1, r2, r3
-0xc4 0x22 0x18 0x00
-
-# CHECK:    lhur    r1, r2, r3
-0xc4 0x22 0x1a 0x00
-
-# CHECK:    lhui    r1, r2, 28
-0xe4 0x22 0x00 0x1c
-
-# CHECK:    lw      r1, r2, r3
-0xc8 0x22 0x18 0x00
-
-# CHECK:    lwr    r1, r2, r3
-0xc8 0x22 0x1a 0x00
-
-# CHECK:    lwi     r1, r2, 28
-0xe8 0x22 0x00 0x1c
-
-# CHECK:    lwx      r1, r2, r3
-0xc8 0x22 0x1c 0x00
-
-# CHECK:    sb      r1, r2, r3
-0xd0 0x22 0x18 0x00
-
-# CHECK:    sbr     r1, r2, r3
-0xd0 0x22 0x1a 0x00
-
-# CHECK:    sbi     r1, r2, 28
-0xf0 0x22 0x00 0x1c
-
-# CHECK:    sh      r1, r2, r3
-0xd4 0x22 0x18 0x00
-
-# CHECK:    shr     r1, r2, r3
-0xd4 0x22 0x1a 0x00
-
-# CHECK:    shi     r1, r2, 28
-0xf4 0x22 0x00 0x1c
-
-# CHECK:    sw      r1, r2, r3
-0xd8 0x22 0x18 0x00
-
-# CHECK:    swr    r1, r2, r3
-0xd8 0x22 0x1a 0x00
-
-# CHECK:    swi     r1, r2, 28
-0xf8 0x22 0x00 0x1c
-
-# CHECK:    swx      r1, r2, r3
-0xd8 0x22 0x1c 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_operands.txt b/test/MC/Disassembler/MBlaze/mblaze_operands.txt
deleted file mode 100644
index f0304b12bd7b..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_operands.txt
+++ /dev/null
@@ -1,197 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# Operands disassembly
-################################################################################
-
-# CHECK:    add     r0, r0, r0
-0x00 0x00 0x00 0x00
-
-# CHECK:    add     r1, r1, r1
-0x00 0x21 0x08 0x00
-
-# CHECK:    add     r2, r2, r2
-0x00 0x42 0x10 0x00
-
-# CHECK:    add     r3, r3, r3
-0x00 0x63 0x18 0x00
-
-# CHECK:    add     r4, r4, r4
-0x00 0x84 0x20 0x00
-
-# CHECK:    add     r5, r5, r5
-0x00 0xa5 0x28 0x00
-
-# CHECK:    add     r6, r6, r6
-0x00 0xc6 0x30 0x00
-
-# CHECK:    add     r7, r7, r7
-0x00 0xe7 0x38 0x00
-
-# CHECK:    add     r8, r8, r8
-0x01 0x08 0x40 0x00
-
-# CHECK:    add     r9, r9, r9
-0x01 0x29 0x48 0x00
-
-# CHECK:    add     r10, r10, r10
-0x01 0x4a 0x50 0x00
-
-# CHECK:    add     r11, r11, r11
-0x01 0x6b 0x58 0x00
-
-# CHECK:    add     r12, r12, r12
-0x01 0x8c 0x60 0x00
-
-# CHECK:    add     r13, r13, r13
-0x01 0xad 0x68 0x00
-
-# CHECK:    add     r14, r14, r14
-0x01 0xce 0x70 0x00
-
-# CHECK:    add     r15, r15, r15
-0x01 0xef 0x78 0x00
-
-# CHECK:    add     r16, r16, r16
-0x02 0x10 0x80 0x00
-
-# CHECK:    add     r17, r17, r17
-0x02 0x31 0x88 0x00
-
-# CHECK:    add     r18, r18, r18
-0x02 0x52 0x90 0x00
-
-# CHECK:    add     r19, r19, r19
-0x02 0x73 0x98 0x00
-
-# CHECK:    add     r20, r20, r20
-0x02 0x94 0xa0 0x00
-
-# CHECK:    add     r21, r21, r21
-0x02 0xb5 0xa8 0x00
-
-# CHECK:    add     r22, r22, r22
-0x02 0xd6 0xb0 0x00
-
-# CHECK:    add     r23, r23, r23
-0x02 0xf7 0xb8 0x00
-
-# CHECK:    add     r24, r24, r24
-0x03 0x18 0xc0 0x00
-
-# CHECK:    add     r25, r25, r25
-0x03 0x39 0xc8 0x00
-
-# CHECK:    add     r26, r26, r26
-0x03 0x5a 0xd0 0x00
-
-# CHECK:    add     r27, r27, r27
-0x03 0x7b 0xd8 0x00
-
-# CHECK:    add     r28, r28, r28
-0x03 0x9c 0xe0 0x00
-
-# CHECK:    add     r29, r29, r29
-0x03 0xbd 0xe8 0x00
-
-# CHECK:    add     r30, r30, r30
-0x03 0xde 0xf0 0x00
-
-# CHECK:    add     r31, r31, r31
-0x03 0xff 0xf8 0x00
-
-# CHECK:    addi    r0, r0, 0
-0x20 0x00 0x00 0x00
-
-# CHECK:    addi    r0, r0, 1
-0x20 0x00 0x00 0x01
-
-# CHECK:    addi    r0, r0, 2
-0x20 0x00 0x00 0x02
-
-# CHECK:    addi    r0, r0, 4
-0x20 0x00 0x00 0x04
-
-# CHECK:    addi    r0, r0, 8
-0x20 0x00 0x00 0x08
-
-# CHECK:    addi    r0, r0, 16
-0x20 0x00 0x00 0x10
-
-# CHECK:    addi    r0, r0, 32
-0x20 0x00 0x00 0x20
-
-# CHECK:    addi    r0, r0, 64
-0x20 0x00 0x00 0x40
-
-# CHECK:    addi    r0, r0, 128
-0x20 0x00 0x00 0x80
-
-# CHECK:    addi    r0, r0, 256
-0x20 0x00 0x01 0x00
-
-# CHECK:    addi    r0, r0, 512
-0x20 0x00 0x02 0x00
-
-# CHECK:    addi    r0, r0, 1024
-0x20 0x00 0x04 0x00
-
-# CHECK:    addi    r0, r0, 2048
-0x20 0x00 0x08 0x00
-
-# CHECK:    addi    r0, r0, 4096
-0x20 0x00 0x10 0x00
-
-# CHECK:    addi    r0, r0, 8192
-0x20 0x00 0x20 0x00
-
-# CHECK:    addi    r0, r0, 16384
-0x20 0x00 0x40 0x00
-
-# CHECK:    addi    r0, r0, -1
-0x20 0x00 0xff 0xff
-
-# CHECK:    addi    r0, r0, -2
-0x20 0x00 0xff 0xfe
-
-# CHECK:    addi    r0, r0, -4
-0x20 0x00 0xff 0xfc
-
-# CHECK:    addi    r0, r0, -8
-0x20 0x00 0xff 0xf8
-
-# CHECK:    addi    r0, r0, -16
-0x20 0x00 0xff 0xf0
-
-# CHECK:    addi    r0, r0, -32
-0x20 0x00 0xff 0xe0
-
-# CHECK:    addi    r0, r0, -64
-0x20 0x00 0xff 0xc0
-
-# CHECK:    addi    r0, r0, -128
-0x20 0x00 0xff 0x80
-
-# CHECK:    addi    r0, r0, -256
-0x20 0x00 0xff 0x00
-
-# CHECK:    addi    r0, r0, -512
-0x20 0x00 0xfe 0x00
-
-# CHECK:    addi    r0, r0, -1024
-0x20 0x00 0xfc 0x00
-
-# CHECK:    addi    r0, r0, -2048
-0x20 0x00 0xf8 0x00
-
-# CHECK:    addi    r0, r0, -4096
-0x20 0x00 0xf0 0x00
-
-# CHECK:    addi    r0, r0, -8192
-0x20 0x00 0xe0 0x00
-
-# CHECK:    addi    r0, r0, -16384
-0x20 0x00 0xc0 0x00
-
-# CHECK:    addi    r0, r0, -32768
-0x20 0x00 0x80 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
deleted file mode 100644
index cb19ee0427bd..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# Pattern instructions
-################################################################################
-
-# CHECK:    pcmpbf      r0, r1, r2
-0x80 0x01 0x14 0x00
-
-# CHECK:    pcmpne      r0, r1, r2
-0x8c 0x01 0x14 0x00
-
-# CHECK:    pcmpeq      r0, r1, r2
-0x88 0x01 0x14 0x00
-
-# CHECK:    clz         r0, r1
-0x90 0x01 0x00 0xE0
diff --git a/test/MC/Disassembler/MBlaze/mblaze_shift.txt b/test/MC/Disassembler/MBlaze/mblaze_shift.txt
deleted file mode 100644
index 2783ffcb3e72..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_shift.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# Shift instructions
-################################################################################
-
-# CHECK:    bsrl    r1, r2, r3
-0x44 0x22 0x18 0x00
-
-# CHECK:    bsra    r1, r2, r3
-0x44 0x22 0x1a 0x00
-
-# CHECK:    bsll    r1, r2, r3
-0x44 0x22 0x1c 0x00
-
-# CHECK:    bsrli   r1, r2, 0
-0x64 0x22 0x00 0x00
-
-# CHECK:    bsrai   r1, r2, 0
-0x64 0x22 0x02 0x00
-
-# CHECK:    bslli   r1, r2, 0
-0x64 0x22 0x04 0x00
-
-# CHECK:    sra     r1, r2
-0x90 0x22 0x00 0x01
-
-# CHECK:    srl     r1, r2
-0x90 0x22 0x00 0x41
diff --git a/test/MC/Disassembler/MBlaze/mblaze_special.txt b/test/MC/Disassembler/MBlaze/mblaze_special.txt
deleted file mode 100644
index a808cc9ccfb3..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_special.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# Special instructions
-################################################################################
-
-# CHECK:    mfs         r0, rpc
-0x94 0x00 0x80 0x00
-
-# CHECK:    msrclr      r0, 0
-0x94 0x11 0x00 0x00
-
-# CHECK:    msrset      r0, 0
-0x94 0x10 0x00 0x00
-
-# CHECK:    mts         rpc, r0
-0x94 0x00 0xc0 0x00
-
-# CHECK:    wdc         r0, r1
-0x90 0x00 0x08 0x64
-
-# CHECK:    wdc.clear   r0, r1
-0x90 0x00 0x08 0x66
-
-# CHECK:    wdc.flush   r0, r1
-0x90 0x00 0x08 0x74
-
-# CHECK:    wic         r0, r1
-0x90 0x00 0x08 0x68
-
-################################################################################
-# Special registers
-################################################################################
-
-# CHECK:    mfs         r1, rpc
-0x94 0x20 0x80 0x00
-
-# CHECK:    mfs         r1, rmsr
-0x94 0x20 0x80 0x01
-
-# CHECK:    mfs         r1, rear
-0x94 0x20 0x80 0x03
-
-# CHECK:    mfs         r1, resr
-0x94 0x20 0x80 0x05
-
-# CHECK:    mfs         r1, rfsr
-0x94 0x20 0x80 0x07
-
-# CHECK:    mfs         r1, rbtr
-0x94 0x20 0x80 0x0b
-
-# CHECK:    mfs         r1, redr
-0x94 0x20 0x80 0x0d
-
-# CHECK:    mfs         r1, rpid
-0x94 0x20 0x90 0x00
-
-# CHECK:    mfs         r1, rzpr
-0x94 0x20 0x90 0x01
-
-# CHECK:    mfs         r1, rtlbx
-0x94 0x20 0x90 0x02
-
-# CHECK:    mfs         r1, rtlbhi
-0x94 0x20 0x90 0x04
-
-# CHECK:    mfs         r1, rtlblo
-0x94 0x20 0x90 0x03
-
-# CHECK:    mfs         r1, rpvr0
-0x94 0x20 0xa0 0x00
-
-# CHECK:    mfs         r1, rpvr1
-0x94 0x20 0xa0 0x01
-
-# CHECK:    mfs         r1, rpvr2
-0x94 0x20 0xa0 0x02
-
-# CHECK:    mfs         r1, rpvr3
-0x94 0x20 0xa0 0x03
-
-# CHECK:    mfs         r1, rpvr4
-0x94 0x20 0xa0 0x04
-
-# CHECK:    mfs         r1, rpvr5
-0x94 0x20 0xa0 0x05
-
-# CHECK:    mfs         r1, rpvr6
-0x94 0x20 0xa0 0x06
-
-# CHECK:    mfs         r1, rpvr7
-0x94 0x20 0xa0 0x07
-
-# CHECK:    mfs         r1, rpvr8
-0x94 0x20 0xa0 0x08
-
-# CHECK:    mfs         r1, rpvr9
-0x94 0x20 0xa0 0x09
-
-# CHECK:    mfs         r1, rpvr10
-0x94 0x20 0xa0 0x0a
-
-# CHECK:    mfs         r1, rpvr11
-0x94 0x20 0xa0 0x0b
diff --git a/test/MC/Disassembler/MBlaze/mblaze_typea.txt b/test/MC/Disassembler/MBlaze/mblaze_typea.txt
deleted file mode 100644
index ce99950548bb..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_typea.txt
+++ /dev/null
@@ -1,74 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# TYPE A instructions
-################################################################################
-
-# CHECK:    add     r1, r2, r3
-0x00 0x22 0x18 0x00
-
-# CHECK:    addc    r1, r2, r3
-0x08 0x22 0x18 0x00
-
-# CHECK:    addk    r1, r2, r3
-0x10 0x22 0x18 0x00
-
-# CHECK:    addkc   r1, r2, r3
-0x18 0x22 0x18 0x00
-
-# CHECK:    and     r1, r2, r3
-0x84 0x22 0x18 0x00
-
-# CHECK:    andn    r1, r2, r3
-0x8c 0x22 0x18 0x00
-
-# CHECK:    cmp     r1, r2, r3
-0x14 0x22 0x18 0x01
-
-# CHECK:    cmpu    r1, r2, r3
-0x14 0x22 0x18 0x03
-
-# CHECK:    idiv    r1, r2, r3
-0x48 0x22 0x18 0x00
-
-# CHECK:    idivu   r1, r2, r3
-0x48 0x22 0x18 0x02
-
-# CHECK:    mul    r1, r2, r3
-0x40 0x22 0x18 0x00
-
-# CHECK:    mulh   r1, r2, r3
-0x40 0x22 0x18 0x01
-
-# CHECK:    mulhu  r1, r2, r3
-0x40 0x22 0x18 0x03
-
-# CHECK:    mulhsu r1, r2, r3
-0x40 0x22 0x18 0x02
-
-# CHECK:    or      r1, r2, r3
-0x80 0x22 0x18 0x00
-
-# CHECK:    rsub    r1, r2, r3
-0x04 0x22 0x18 0x00
-
-# CHECK:    rsubc   r1, r2, r3
-0x0c 0x22 0x18 0x00
-
-# CHECK:    rsubk   r1, r2, r3
-0x14 0x22 0x18 0x00
-
-# CHECK:    rsubkc  r1, r2, r3
-0x1c 0x22 0x18 0x00
-
-# CHECK:    sext16  r1, r2
-0x90 0x22 0x00 0x61
-
-# CHECK:    sext8   r1, r2
-0x90 0x22 0x00 0x60
-
-# CHECK:    xor     r1, r2, r3
-0x88 0x22 0x18 0x00
-
-# CHECK:    or      r0, r0, r0
-0x80 0x00 0x00 0x00
diff --git a/test/MC/Disassembler/MBlaze/mblaze_typeb.txt b/test/MC/Disassembler/MBlaze/mblaze_typeb.txt
deleted file mode 100644
index 99782ac2c159..000000000000
--- a/test/MC/Disassembler/MBlaze/mblaze_typeb.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
-
-################################################################################
-# TYPE B instructions
-################################################################################
-
-# CHECK:    addi    r1, r2, 15
-0x20 0x22 0x00 0x0f
-
-# CHECK:    addic   r1, r2, 15
-0x28 0x22 0x00 0x0f
-
-# CHECK:    addik   r1, r2, 15
-0x30 0x22 0x00 0x0f
-
-# CHECK:    addikc  r1, r2, 15
-0x38 0x22 0x00 0x0f
-
-# CHECK:    andi    r1, r2, 15
-0xa4 0x22 0x00 0x0f
-
-# CHECK:    andni   r1, r2, 15
-0xac 0x22 0x00 0x0f
-
-# CHECK:    muli    r1, r2, 15
-0x60 0x22 0x00 0x0f
-
-# CHECK:    ori     r1, r2, 15
-0xa0 0x22 0x00 0x0f
-
-# CHECK:    rsubi   r1, r2, 15
-0x24 0x22 0x00 0x0f
-
-# CHECK:    rsubic  r1, r2, 15
-0x2c 0x22 0x00 0x0f
-
-# CHECK:    rsubik  r1, r2, 15
-0x34 0x22 0x00 0x0f
-
-# CHECK:    rsubikc r1, r2, 15
-0x3c 0x22 0x00 0x0f
-
-# CHECK:    rtbd r15, 15
-0xb6 0x4f 0x00 0x0f
-
-# CHECK:    rted r15, 15
-0xb6 0x8f 0x00 0x0f
-
-# CHECK:    rtid r15, 15
-0xb6 0x2f 0x00 0x0f
-
-# CHECK:    rtsd r15, 15
-0xb6 0x0f 0x00 0x0f
-
-# CHECK:    xori r1, r2, 15
-0xa8 0x22 0x00 0x0f
diff --git a/test/MC/Disassembler/Mips/lit.local.cfg b/test/MC/Disassembler/Mips/lit.local.cfg
index 9b698b2e6c41..1fa54b428cd9 100644
--- a/test/MC/Disassembler/Mips/lit.local.cfg
+++ b/test/MC/Disassembler/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/Mips/micromips.txt b/test/MC/Disassembler/Mips/micromips.txt
new file mode 100644
index 000000000000..b2d0cc02fc72
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips.txt
@@ -0,0 +1,287 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=micromips \
+# RUN: | FileCheck %s
+
+# CHECK: add $9, $6, $7
+0x00 0xe6 0x49 0x10
+
+# CHECK: addi $9, $6, 17767
+0x11 0x26 0x45 0x67
+
+# CHECK: addiu $9, $6, -15001
+0x31 0x26 0xc5 0x67
+
+# CHECK: addi $9, $6, 17767
+0x11 0x26 0x45 0x67
+
+# CHECK: addiu $9, $6, -15001
+0x31 0x26 0xc5 0x67
+
+# CHECK: addu $9, $6, $7
+0x00 0xe6 0x49 0x50
+
+# CHECK: sub $9, $6, $7
+0x00 0xe6 0x49 0x90
+
+# CHECK: subu $4, $3, $5
+0x00 0xa3 0x21 0xd0
+
+# CHECK: sub $6, $zero, $7
+0x00 0xe0 0x31 0x90
+
+# CHECK: subu $6, $zero, $7
+0x00 0xe0 0x31 0xd0
+
+# CHECK: addu $7, $8, $zero
+0x00 0x08 0x39 0x50
+
+# CHECK: slt $3, $3, $5
+0x00 0xa3 0x1b 0x50
+
+# CHECK: slti $3, $3, 103
+0x90 0x63 0x00 0x67
+
+# CHECK: slti $3, $3, 103
+0x90 0x63 0x00 0x67
+
+# CHECK: sltiu $3, $3, 103
+0xb0 0x63 0x00 0x67
+
+# CHECK: sltu $3, $3, $5
+0x00 0xa3 0x1b 0x90
+
+# CHECK: lui $9, 17767
+0x41 0xa9 0x45 0x67
+
+# CHECK: and $9, $6, $7
+0x00 0xe6 0x4a 0x50
+
+# CHECK: andi $9, $6, 17767
+0xd1 0x26 0x45 0x67
+
+# CHECK: andi $9, $6, 17767
+0xd1 0x26 0x45 0x67
+
+# CHECK: or $3, $4, $5
+0x00 0xa4 0x1a 0x90
+
+# CHECK: ori $9, $6, 17767
+0x51 0x26 0x45 0x67
+
+# CHECK: xor $3, $3, $5
+0x00 0xa3 0x1b 0x10
+
+# CHECK: xori $9, $6, 17767
+0x71 0x26 0x45 0x67
+
+# CHECK: xori $9, $6, 17767
+0x71 0x26 0x45 0x67
+
+# CHECK: nor $9, $6, $7
+0x00 0xe6 0x4a 0xd0
+
+# CHECK: not $7, $8
+0x00 0x08 0x3a 0xd0
+
+# CHECK: mul $9, $6, $7
+0x00 0xe6 0x4a 0x10
+
+# CHECK: mult $9, $7
+0x00 0xe9 0x8b 0x3c
+
+# CHECK: multu $9, $7
+0x00 0xe9 0x9b 0x3c
+
+# CHECK-EB: div $zero, $9, $7
+0x00 0xe9 0xab 0x3c
+
+# CHECK-EB: divu $zero, $9, $7
+0x00 0xe9 0xbb 0x3c
+
+# CHECK: sll $4, $3, 7
+0x00 0x83 0x38 0x00
+
+# CHECK: sllv $2, $3, $5
+0x00 0x65 0x10 0x10
+
+# CHECK: sra $4, $3, 7
+0x00 0x83 0x38 0x80
+
+# CHECK: srav $2, $3, $5
+0x00 0x65 0x10 0x90
+
+# CHECK: srl $4, $3, 7
+0x00 0x83 0x38 0x40
+
+# CHECK: srlv $2, $3, $5
+0x00 0x65 0x10 0x50
+
+# CHECK: rotr $9, $6, 7
+0x01 0x26 0x38 0xc0
+
+# CHECK: rotrv $9, $6, $7
+0x00 0xc7 0x48 0xd0
+
+# CHECK: lb $5, 8($4)
+0x1c 0xa4 0x00 0x08
+
+# CHECK: lbu $6, 8($4)
+0x14 0xc4 0x00 0x08
+
+# CHECK: lh $2, 8($4)
+0x3c 0x44 0x00 0x08
+
+# CHECK: lhu $4, 8($2)
+0x34 0x82 0x00 0x08
+
+# CHECK: lw  $6, 4($5)
+0xfc 0xc5 0x00 0x04
+
+# CHECK: sb $5, 8($4)
+0x18 0xa4 0x00 0x08
+
+# CHECK: sh  $2, 8($4)
+0x38 0x44 0x00 0x08
+
+# CHECK: sw  $5, 4($6)
+0xf8 0xa6 0x00 0x04
+
+# CHECK: lwl $4, 16($5)
+0x60 0x85 0x00 0x10
+
+# CHECK: lwr $4, 16($5)
+0x60 0x85 0x10 0x10
+
+# CHECK: swl $4, 16($5)
+0x60 0x85 0x80 0x10
+
+# CHECK: swr $4, 16($5)
+0x60 0x85 0x90 0x10
+
+# CHECK: movz $9, $6, $7
+0x00 0xe6 0x48 0x58
+
+# CHECK: movn $9, $6, $7
+0x00 0xe6 0x48 0x18
+
+# CHECK: movt $9, $6, $fcc0
+0x55 0x26 0x09 0x7b
+
+# CHECK: movf $9, $6, $fcc0
+0x55 0x26 0x01 0x7b
+
+# CHECK: mthi   $6
+0x00 0x06 0x2d 0x7c
+
+# CHECK: mfhi   $6
+0x00 0x06 0x0d 0x7c
+
+# CHECK: mtlo   $6
+0x00 0x06 0x3d 0x7c
+
+# CHECK: mflo   $6
+0x00 0x06 0x1d 0x7c
+
+# CHECK: madd   $4, $5
+0x00 0xa4 0xcb 0x3c
+
+# CHECK: maddu  $4, $5
+0x00 0xa4 0xdb 0x3c
+
+# CHECK: msub   $4, $5
+0x00 0xa4 0xeb 0x3c
+
+# CHECK: msubu  $4, $5
+0x00 0xa4 0xfb 0x3c
+
+# CHECK: clz $9, $6
+0x01 0x26 0x5b 0x3c
+
+# CHECK: clo $9, $6
+0x01 0x26 0x4b 0x3c
+
+# CHECK: seb $9, $6
+0x01 0x26 0x2b 0x3c
+
+# CHECK: seh $9, $6
+0x01 0x26 0x3b 0x3c
+
+# CHECK: wsbh $9, $6
+0x01 0x26 0x7b 0x3c
+
+# CHECK: ext $9, $6, 3, 7
+0x01 0x26 0x30 0xec
+
+# CHECK: ins $9, $6, 3, 7
+0x01 0x26 0x48 0xcc
+
+# CHECK: j 1328
+0xd4 0x00 0x02 0x98
+
+# CHECK: jal 1328
+0xf4 0x00 0x02 0x98
+
+# CHECK: jalr $ra, $6
+0x03 0xe6 0x0f 0x3c
+
+# CHECK: jr $7
+0x00 0x07 0x0f 0x3c
+
+# CHECK: beq $9, $6, 1332
+0x94 0xc9 0x02 0x9a
+
+# CHECK: bgez $6, 1332
+0x40 0x46 0x02 0x9a
+
+# CHECK: bgezal $6, 1332
+0x40 0x66 0x02 0x9a
+
+# CHECK: bltzal $6, 1332
+0x40 0x26 0x02 0x9a
+
+# CHECK: bgtz $6, 1332
+0x40 0xc6 0x02 0x9a
+
+# CHECK: blez $6, 1332
+0x40 0x86 0x02 0x9a
+
+# CHECK: bne $9, $6, 1332
+0xb4 0xc9 0x02 0x9a
+
+# CHECK: bltz $6, 1332
+0x40 0x06 0x02 0x9a
+
+# CHECK: teq $8, $9, 0
+0x01 0x28 0x00 0x3c
+
+# CHECK: tge $8, $9, 0
+0x01 0x28 0x02 0x3c
+
+# CHECK: tgeu $8, $9, 0
+0x01 0x28 0x04 0x3c
+
+# CHECK: tlt $8, $9, 0
+0x01 0x28 0x08 0x3c
+
+# CHECK: tltu $8, $9, 0
+0x01 0x28 0x0a 0x3c
+
+# CHECK: tne $8, $9, 0
+0x01 0x28 0x0c 0x3c
+
+# CHECK: teqi $9, 17767
+0x41,0xc9,0x45,0x67
+
+# CHECK: tgei $9, 17767
+0x41 0x29 0x45 0x67
+
+# CHECK: tgeiu $9, 17767
+0x41 0x69 0x45 0x67
+
+# CHECK: tlti $9, 17767
+0x41 0x09 0x45 0x67
+
+# CHECK: tltiu $9, 17767
+0x41 0x49 0x45 0x67
+
+# CHECK: tnei $9, 17767
+0x41 0x89 0x45 0x67
diff --git a/test/MC/Disassembler/Mips/micromips_le.txt b/test/MC/Disassembler/Mips/micromips_le.txt
new file mode 100644
index 000000000000..5b2fe30dd065
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips_le.txt
@@ -0,0 +1,287 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mattr=micromips \
+# RUN: | FileCheck %s
+
+# CHECK: add $9, $6, $7
+0xe6 0x00 0x10 0x49
+
+# CHECK: addi $9, $6, 17767
+0x26 0x11 0x67 0x45
+
+# CHECK: addiu $9, $6, -15001
+0x26 0x31 0x67 0xc5
+
+# CHECK: addi $9, $6, 17767
+0x26 0x11 0x67 0x45
+
+# CHECK: addiu $9, $6, -15001
+0x26 0x31 0x67 0xc5
+
+# CHECK: addu $9, $6, $7
+0xe6 0x00 0x50 0x49
+
+# CHECK: sub $9, $6, $7
+0xe6 0x00 0x90 0x49
+
+# CHECK: subu  $4, $3, $5
+0xa3 0x00 0xd0 0x21
+
+# CHECK: sub $6, $zero, $7
+0xe0 0x00 0x90 0x31
+
+# CHECK: subu $6, $zero, $7
+0xe0 0x00 0xd0 0x31
+
+# CHECK: addu $7, $8, $zero
+0x08 0x00 0x50 0x39
+
+# CHECK: slt $3, $3, $5
+0xa3 0x00 0x50 0x1b
+
+# CHECK: slti $3, $3, 103
+0x63 0x90 0x67 0x00
+
+# CHECK: slti $3, $3, 103
+0x63 0x90 0x67 0x00
+
+# CHECK: sltiu $3, $3, 103
+0x63 0xb0 0x67 0x00
+
+# CHECK: sltu $3, $3, $5
+0xa3 0x00 0x90 0x1b
+
+# CHECK: lui $9, 17767
+0xa9 0x41 0x67 0x45
+
+# CHECK: and $9, $6, $7
+0xe6 0x00 0x50 0x4a
+
+# CHECK: andi $9, $6, 17767
+0x26 0xd1 0x67 0x45
+
+# CHECK: andi $9, $6, 17767
+0x26 0xd1 0x67 0x45
+
+# CHECK: or $3, $4, $5
+0xa4 0x00 0x90 0x1a
+
+# CHECK: ori $9, $6, 17767
+0x26 0x51 0x67 0x45
+
+# CHECK: xor $3, $3, $5
+0xa3 0x00 0x10 0x1b
+
+# CHECK: xori $9, $6, 17767
+0x26 0x71 0x67 0x45
+
+# CHECK: xori $9, $6, 17767
+0x26 0x71 0x67 0x45
+
+# CHECK: nor $9, $6, $7
+0xe6 0x00 0xd0 0x4a
+
+# CHECK: not $7, $8
+0x08 0x00 0xd0 0x3a
+
+# CHECK: mul $9, $6, $7
+0xe6 0x00 0x10 0x4a
+
+# CHECK: mult $9, $7
+0xe9 0x00 0x3c 0x8b
+
+# CHECK: multu $9, $7
+0xe9 0x00 0x3c 0x9b
+
+# CHECK: div $zero, $9, $7
+0xe9 0x00 0x3c 0xab
+
+# CHECK: divu $zero, $9, $7
+0xe9 0x00 0x3c 0xbb
+
+# CHECK: sll $4, $3, 7
+0x83 0x00 0x00 0x38
+
+# CHECK: sllv $2, $3, $5
+0x65 0x00 0x10 0x10
+
+# CHECK: sra $4, $3, 7
+0x83 0x00 0x80 0x38
+
+# CHECK: srav $2, $3, $5
+0x65 0x00 0x90 0x10
+
+# CHECK: srl $4, $3, 7
+0x83 0x00 0x40 0x38
+
+# CHECK: srlv $2, $3, $5
+0x65 0x00 0x50 0x10
+
+# CHECK: rotr $9, $6, 7
+0x26 0x01 0xc0 0x38
+
+# CHECK: rotrv $9, $6, $7
+0xc7 0x00 0xd0 0x48
+
+# CHECK: lb $5, 8($4)
+0xa4 0x1c 0x08 0x00
+
+# CHECK: lbu $6, 8($4)
+0xc4 0x14 0x08 0x00
+
+# CHECK: lh $2, 8($4)
+0x44 0x3c 0x08 0x00
+
+# CHECK: lhu $4, 8($2)
+0x82 0x34 0x08 0x00
+
+# CHECK: lw $6, 4($5)
+0xc5 0xfc 0x04 0x00
+
+# CHECK: sb $5, 8($4)
+0xa4 0x18 0x08 0x00
+
+# CHECK: sh $2, 8($4)
+0x44 0x38 0x08 0x00
+
+# CHECK: sw $5, 4($6)
+0xa6 0xf8 0x04 0x00
+
+# CHECK: lwl $4, 16($5)
+0x85 0x60 0x10 0x00
+
+# CHECK: lwr $4, 16($5)
+0x85 0x60 0x10 0x10
+
+# CHECK: swl $4, 16($5)
+0x85 0x60 0x10 0x80
+
+# CHECK: swr $4, 16($5)
+0x85 0x60 0x10 0x90
+
+# CHECK: movz $9, $6, $7
+0xe6 0x00 0x58 0x48
+
+# CHECK: movn $9, $6, $7
+0xe6 0x00 0x18 0x48
+
+# CHECK: movt $9, $6, $fcc0
+0x26 0x55 0x7b 0x09
+
+# CHECK: movf $9, $6, $fcc0
+0x26 0x55 0x7b 0x01
+
+# CHECK: mthi $6
+0x06 0x00 0x7c 0x2d
+
+# CHECK: mfhi $6
+0x06 0x00 0x7c 0x0d
+
+# CHECK: mtlo $6
+0x06 0x00 0x7c 0x3d
+
+# CHECK: mflo $6
+0x06 0x00 0x7c 0x1d
+
+# CHECK: madd $4, $5
+0xa4 0x00 0x3c 0xcb
+
+# CHECK: maddu $4, $5
+0xa4 0x00 0x3c 0xdb
+
+# CHECK: msub $4, $5
+0xa4 0x00 0x3c 0xeb
+
+# CHECK: msubu $4, $5
+0xa4 0x00 0x3c 0xfb
+
+# CHECK: clz $9, $6
+0x26 0x01 0x3c 0x5b
+
+# CHECK: clo $9, $6
+0x26 0x01 0x3c 0x4b
+
+# CHECK: seb $9, $6
+0x26 0x01 0x3c 0x2b
+
+# CHECK: seh $9, $6
+0x26 0x01 0x3c 0x3b
+
+# CHECK: wsbh $9, $6
+0x26 0x01 0x3c 0x7b
+
+# CHECK: ext $9, $6, 3, 7
+0x26 0x01 0xec 0x30
+
+# CHECK: ins $9, $6, 3, 7
+0x26 0x01 0xcc 0x48
+
+# CHECK: j 1328
+0x00 0xd4 0x98 0x02
+
+# CHECK: jal 1328
+0x00 0xf4 0x98 0x02
+
+# CHECK: jalr $ra, $6
+0xe6 0x03 0x3c 0x0f
+
+# CHECK: jr $7
+0x07 0x00 0x3c 0x0f
+
+# CHECK: beq $9, $6, 1332
+0xc9 0x94 0x9a 0x02
+
+# CHECK: bgez $6, 1332
+0x46 0x40 0x9a 0x02
+
+# CHECK: bgezal $6, 1332
+0x66 0x40 0x9a 0x02
+
+# CHECK: bltzal $6, 1332
+0x26 0x40 0x9a 0x02
+
+# CHECK: bgtz $6, 1332
+0xc6 0x40 0x9a 0x02
+
+# CHECK: blez $6, 1332
+0x86 0x40 0x9a 0x02
+
+# CHECK: bne $9, $6, 1332
+0xc9 0xb4 0x9a 0x02
+
+# CHECK: bltz $6, 1332
+0x06 0x40 0x9a 0x02
+
+# CHECK: teq $8, $9, 0
+0x28 0x01 0x3c 0x00
+
+# CHECK: tge $8, $9, 0
+0x28 0x01 0x3c 0x02
+
+# CHECK: tgeu $8, $9, 0
+0x28 0x01 0x3c 0x04
+
+# CHECK: tlt $8, $9, 0
+0x28 0x01 0x3c 0x08
+
+# CHECK: tltu $8, $9, 0
+0x28 0x01 0x3c 0x0a
+
+# CHECK: tne $8, $9, 0
+0x28 0x01 0x3c 0x0c
+
+# CHECK: teqi $9, 17767
+0xc9 0x41 0x67 0x45
+
+# CHECK: tgei $9, 17767
+0x29 0x41 0x67 0x45
+
+# CHECK: tgeiu $9, 17767
+0x69 0x41 0x67 0x45
+
+# CHECK: tlti $9, 17767
+0x09 0x41 0x67 0x45
+
+# CHECK: tltiu $9, 17767
+0x49 0x41 0x67 0x45
+
+# CHECK: tnei $9, 17767
+0x89 0x41 0x67 0x45
diff --git a/test/MC/Disassembler/Mips/mips-dsp.txt b/test/MC/Disassembler/Mips/mips-dsp.txt
index d10e62cd23cd..3f60ae1f8e65 100644
--- a/test/MC/Disassembler/Mips/mips-dsp.txt
+++ b/test/MC/Disassembler/Mips/mips-dsp.txt
@@ -11,3 +11,12 @@
 
 # CHECK: mtlo $21, $ac3
 0x13 0x18 0xa0 0x02
+
+# CHECK: lbux $10, $20($26)
+0x8a 0x51 0x54 0x7f
+
+# CHECK: lhx  $11, $21($27)
+0x0a 0x59 0x75 0x7f
+
+# CHECK: lwx  $12, $22($gp)
+0x0a 0x60 0x96 0x7f
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index ef8bf71bd3a6..6d02925ff7b6 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -35,9 +35,15 @@
 # CHECK: bc1f 1332
 0x45 0x00 0x01 0x4c
 
+# CHECK: bc1f $fcc7, 1332
+0x45 0x1c 0x01 0x4c
+
 # CHECK: bc1t 1332
 0x45 0x01 0x01 0x4c
 
+# CHECK: bc1t $fcc7, 1332
+0x45 0x1d 0x01 0x4c
+
 # CHECK: beq $9, $6, 1332
 0x11 0x26 0x01 0x4c
 
@@ -260,6 +266,24 @@
 # CHECK: mov.s $f6, $f7
 0x46 0x00 0x39 0x86
 
+# CHECK: movf $3, $2, $fcc7
+0x00,0x5c,0x18,0x01
+
+# CHECK: movf.d $f4, $f2, $fcc7
+0x46,0x3c,0x11,0x11
+
+# CHECK: movf.s $f4, $f2, $fcc7
+0x46,0x1c,0x11,0x11
+
+# CHECK: movt $3, $2, $fcc7
+0x00,0x5d,0x18,0x01
+
+# CHECK: movt.d $f4, $f2, $fcc7
+0x46,0x3d,0x11,0x11
+
+# CHECK: movt.s $f4, $f2, $fcc7
+0x46,0x1d,0x11,0x11
+
 # CHECK: msub   $6,  $7
 0x70 0xc7 0x00 0x04
 
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
index a0885a4bfe85..61e6fc868d04 100644
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ b/test/MC/Disassembler/Mips/mips32_le.txt
@@ -35,9 +35,15 @@
 # CHECK: bc1f 1332
 0x4c 0x01 0x00 0x45
 
+# CHECK: bc1f $fcc7, 1332
+0x4c 0x01 0x1c 0x45
+
 # CHECK: bc1t 1332
 0x4c 0x01 0x01 0x45
 
+# CHECK: bc1t $fcc7, 1332
+0x4c 0x01 0x1d 0x45
+
 # CHECK: beq $9, $6, 1332
 0x4c 0x01 0x26 0x11
 
@@ -260,6 +266,30 @@
 # CHECK: mov.s $f6, $f7
 0x86 0x39 0x00 0x46
 
+# CHECK: move $7, $8
+0x21,0x38,0x00,0x01
+
+# CHECK: move $3, $2
+0x25,0x18,0x40,0x00
+
+# CHECK: movf $3, $2, $fcc7
+0x01,0x18,0x5c,0x00
+
+# CHECK: movf.d $f4, $f2, $fcc7
+0x11,0x11,0x3c,0x46
+
+# CHECK: movf.s $f4, $f2, $fcc7
+0x11,0x11,0x1c,0x46
+
+# CHECK: movt $3, $2, $fcc7
+0x01,0x18,0x5d,0x00
+
+# CHECK: movt.d $f4, $f2, $fcc7
+0x11,0x11,0x3d,0x46
+
+# CHECK: movt.s $f4, $f2, $fcc7
+0x11,0x11,0x1d,0x46
+
 # CHECK: msub   $6,  $7
 0x04 0x00 0xc7 0x70
 
diff --git a/test/MC/Disassembler/Mips/mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2.txt
index 991eaa6cc97f..11d9058221c2 100644
--- a/test/MC/Disassembler/Mips/mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2.txt
@@ -35,9 +35,15 @@
 # CHECK: bc1f 1332
 0x45 0x00 0x01 0x4c
 
+# CHECK: bc1f $fcc7, 1332
+0x45 0x1c 0x01 0x4c
+
 # CHECK: bc1t 1332
 0x45 0x01 0x01 0x4c
 
+# CHECK: bc1t $fcc7, 1332
+0x45 0x1d 0x01 0x4c
+
 # CHECK: beq $9, $6, 1332
 0x11 0x26 0x01 0x4c
 
@@ -236,6 +242,9 @@
 # CHECK: lui  $6, 17767
 0x3c 0x06 0x45 0x67
 
+# CHECK: luxc1 $f0, $6($5)
+0x4c 0xa6 0x00 0x05
+
 # CHECK: lw  $4, 24($5)
 0x8c 0xa4 0x00 0x18
 
@@ -248,6 +257,9 @@
 # CHECK: lwr   $3, 16($5)
 0x98 0xa3 0x00 0x10
 
+# CHECK: lwxc1 $f20, $12($14)
+0x4d 0xcc 0x05 0x00
+
 # CHECK: madd   $6,  $7
 0x70 0xc7 0x00 0x00
 
@@ -398,6 +410,9 @@
 # CHECK: subu  $4, $3, $5
 0x00 0x65 0x20 0x23
 
+# CHECK: suxc1 $f4, $24($5)
+0x4c 0xb8 0x20 0x0d
+
 # CHECK: sw  $4, 24($5)
 0xac 0xa4 0x00 0x18
 
@@ -410,6 +425,9 @@
 # CHECK: swr $6, 16($7)
 0xb8 0xe6 0x00 0x10
 
+# CHECK: swxc1 $f26, $18($22)
+0x4e 0xd2 0xd0 0x08
+
 # CHECK: sync  7
 0x00 0x00 0x01 0xcf
 
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
index 10c293821c9d..adafcf1258cc 100644
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips32r2_le.txt
@@ -35,9 +35,15 @@
 # CHECK: bc1f 1332
 0x4c 0x01 0x00 0x45
 
+# CHECK: bc1f $fcc7, 1332
+0x4c 0x01 0x1c 0x45
+
 # CHECK: bc1t 1332
 0x4c 0x01 0x01 0x45
 
+# CHECK: bc1t $fcc7, 1332
+0x4c 0x01 0x1d 0x45
+
 # CHECK: beq $9, $6, 1332
 0x4c 0x01 0x26 0x11
 
@@ -236,6 +242,9 @@
 # CHECK: lui  $6, 17767
 0x67 0x45 0x06 0x3c
 
+# CHECK: luxc1 $f0, $6($5)
+0x05 0x00 0xa6 0x4c
+
 # CHECK: lw  $4, 24($5)
 0x18 0x00 0xa4 0x8c
 
@@ -248,6 +257,9 @@
 # CHECK: lwr   $3, 16($5)
 0x10 0x00 0xa3 0x98
 
+# CHECK: lwxc1 $f20, $12($14)
+0x00 0x05 0xcc 0x4d
+
 # CHECK: madd   $6,  $7
 0x00 0x00 0xc7 0x70
 
@@ -398,6 +410,9 @@
 # CHECK: subu  $4, $3, $5
 0x23 0x20 0x65 0x00
 
+# CHECK: suxc1 $f4, $24($5)
+0x0d 0x20 0xb8 0x4c
+
 # CHECK: sw  $4, 24($5)
 0x18 0x00 0xa4 0xac
 
@@ -410,6 +425,9 @@
 # CHECK: swr $6, 16($7)
 0x10 0x00 0xe6 0xb8
 
+# CHECK: swxc1 $f26, $18($22)
+0x08 0xd0 0xd2 0x4e
+
 # CHECK: sync  7
 0xcf 0x01 0x00 0x00
 
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index b88747370b67..f3d2d100cae3 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -2,6 +2,9 @@
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
+# CHECK: daddiu $sp, $sp, -32
+0x67 0xbd 0xff 0xe0
+
 # CHECK: daddu $26, $1, $11
 0x00 0x2b 0xd0 0x2d
 
@@ -64,3 +67,21 @@
 
 # CHECK: sd $6, 17767($zero)
 0xfc 0x06 0x45 0x67
+
+# CHECK: luxc1 $f0, $6($5)
+0x4c 0xa6 0x00 0x05
+
+# CHECK: lwxc1 $f20, $12($14)
+0x4d 0xcc 0x05 0x00
+
+# CHECK: suxc1 $f4, $24($5)
+0x4c 0xb8 0x20 0x0d
+
+# CHECK: swxc1 $f26, $18($22)
+0x4e 0xd2 0xd0 0x08
+
+# CHECK: ldxc1 $f2, $2($10)
+0x4d 0x42 0x00 0x81
+
+# CHECK: sdxc1 $f8, $4($25)
+0x4f 0x24 0x40 0x09
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
index ddc3c2b60be4..0d3d2faf1312 100644
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ b/test/MC/Disassembler/Mips/mips64_le.txt
@@ -64,3 +64,21 @@
 
 # CHECK: sd $6, 17767($zero)
 0x67 0x45 0x06 0xfc
+
+# CHECK: luxc1 $f0, $6($5)
+0x05 0x00 0xa6 0x4c
+
+# CHECK: lwxc1 $f20, $12($14)
+0x00 0x05 0xcc 0x4d
+
+# CHECK: suxc1 $f4, $24($5)
+0x0d 0x20 0xb8 0x4c
+
+# CHECK: swxc1 $f26, $18($22)
+0x08 0xd0 0xd2 0x4e
+
+# CHECK: ldxc1 $f2, $2($10)
+0x81 0x00 0x42 0x4d
+
+# CHECK: sdxc1 $f8, $4($25)
+0x09 0x40 0x24 0x4f
diff --git a/test/MC/Disassembler/SystemZ/insns-pcrel.txt b/test/MC/Disassembler/SystemZ/insns-pcrel.txt
new file mode 100644
index 000000000000..b7edab65e703
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/insns-pcrel.txt
@@ -0,0 +1,1732 @@
+# Test instructions that have PC-relative operands.  There is no attempt
+# to keep the instructions in alphabetical order, since adding new instructions
+# in the middle would mean updating all later offsets.
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu | FileCheck %s
+
+# 0x00000000:
+# CHECK: brasl %r0, 0x0
+0xc0 0x05 0x00 0x00 0x00 0x00
+
+# 0x00000006:
+# CHECK: brasl %r14, 0x6
+0xc0 0xe5 0x00 0x00 0x00 0x00
+
+# 0x0000000c:
+# CHECK: brasl %r15, 0xc
+0xc0 0xf5 0x00 0x00 0x00 0x00
+
+# 0x00000012:
+# CHECK: brasl %r0, 0x10
+0xc0 0x05 0xff 0xff 0xff 0xff
+
+# 0x00000018:
+# CHECK: brasl %r14, 0xffffffff00000018
+0xc0 0xe5 0x80 0x00 0x00 0x00
+
+# 0x0000001e:
+# CHECK: brasl %r15, 0x10000001c
+0xc0 0xf5 0x7f 0xff 0xff 0xff
+
+# 0x00000024:
+# CHECK: bras %r0, 0x24
+0xa7 0x05 0x00 0x00
+
+# 0x00000028:
+# CHECK: bras %r14, 0x28
+0xa7 0xe5 0x00 0x00
+
+# 0x0000002c:
+# CHECK: bras %r15, 0x2c
+0xa7 0xf5 0x00 0x00
+
+# 0x00000030:
+# CHECK: bras %r0, 0x2e
+0xa7 0x05 0xff 0xff
+
+# 0x00000034:
+# CHECK: bras %r14, 0xffffffffffff0034
+0xa7 0xe5 0x80 0x00
+
+# 0x00000038:
+# CHECK: bras %r15, 0x10036
+0xa7 0xf5 0x7f 0xff
+
+# 0x0000003c:
+# CHECK: brcl 0, 0x3c
+0xc0 0x04 0x00 0x00 0x00 0x00
+
+# 0x00000042:
+# CHECK: jgo 0x42
+0xc0 0x14 0x00 0x00 0x00 0x00
+
+# 0x00000048:
+# CHECK: jgh 0x48
+0xc0 0x24 0x00 0x00 0x00 0x00
+
+# 0x0000004e:
+# CHECK: jgnle 0x4e
+0xc0 0x34 0x00 0x00 0x00 0x00
+
+# 0x00000054:
+# CHECK: jgl 0x54
+0xc0 0x44 0x00 0x00 0x00 0x00
+
+# 0x0000005a:
+# CHECK: jgnhe 0x5a
+0xc0 0x54 0x00 0x00 0x00 0x00
+
+# 0x00000060:
+# CHECK: jglh 0x60
+0xc0 0x64 0x00 0x00 0x00 0x00
+
+# 0x00000066:
+# CHECK: jgne 0x66
+0xc0 0x74 0x00 0x00 0x00 0x00
+
+# 0x0000006c:
+# CHECK: jge 0x6c
+0xc0 0x84 0x00 0x00 0x00 0x00
+
+# 0x00000072:
+# CHECK: jgnlh 0x72
+0xc0 0x94 0x00 0x00 0x00 0x00
+
+# 0x00000078:
+# CHECK: jghe 0x78
+0xc0 0xa4 0x00 0x00 0x00 0x00
+
+# 0x0000007e:
+# CHECK: jgnl 0x7e
+0xc0 0xb4 0x00 0x00 0x00 0x00
+
+# 0x00000084:
+# CHECK: jgle 0x84
+0xc0 0xc4 0x00 0x00 0x00 0x00
+
+# 0x0000008a:
+# CHECK: jgnh 0x8a
+0xc0 0xd4 0x00 0x00 0x00 0x00
+
+# 0x00000090:
+# CHECK: jgno 0x90
+0xc0 0xe4 0x00 0x00 0x00 0x00
+
+# 0x00000096:
+# CHECK: jg 0x96
+0xc0 0xf4 0x00 0x00 0x00 0x00
+
+# 0x0000009c:
+# CHECK: brcl 0, 0x9a
+0xc0 0x04 0xff 0xff 0xff 0xff
+
+# 0x000000a2:
+# CHECK: brcl 0, 0xffffffff000000a2
+0xc0 0x04 0x80 0x00 0x00 0x00
+
+# 0x000000a8:
+# CHECK: brcl 0, 0x1000000a6
+0xc0 0x04 0x7f 0xff 0xff 0xff
+
+# 0x000000ae:
+# CHECK: jg 0xac
+0xc0 0xf4 0xff 0xff 0xff 0xff
+
+# 0x000000b4:
+# CHECK: jg 0xffffffff000000b4
+0xc0 0xf4 0x80 0x00 0x00 0x00
+
+# 0x000000ba:
+# CHECK: jg 0x1000000b8
+0xc0 0xf4 0x7f 0xff 0xff 0xff
+
+# 0x000000c0:
+# CHECK: brc 0, 0xc0
+0xa7 0x04 0x00 0x00
+
+# 0x000000c4:
+# CHECK: jo 0xc4
+0xa7 0x14 0x00 0x00
+
+# 0x000000c8:
+# CHECK: jh 0xc8
+0xa7 0x24 0x00 0x00
+
+# 0x000000cc:
+# CHECK: jnle 0xcc
+0xa7 0x34 0x00 0x00
+
+# 0x000000d0:
+# CHECK: jl 0xd0
+0xa7 0x44 0x00 0x00
+
+# 0x000000d4:
+# CHECK: jnhe 0xd4
+0xa7 0x54 0x00 0x00
+
+# 0x000000d8:
+# CHECK: jlh 0xd8
+0xa7 0x64 0x00 0x00
+
+# 0x000000dc:
+# CHECK: jne 0xdc
+0xa7 0x74 0x00 0x00
+
+# 0x000000e0:
+# CHECK: je 0xe0
+0xa7 0x84 0x00 0x00
+
+# 0x000000e4:
+# CHECK: jnlh 0xe4
+0xa7 0x94 0x00 0x00
+
+# 0x000000e8:
+# CHECK: jhe 0xe8
+0xa7 0xa4 0x00 0x00
+
+# 0x000000ec:
+# CHECK: jnl 0xec
+0xa7 0xb4 0x00 0x00
+
+# 0x000000f0:
+# CHECK: jle 0xf0
+0xa7 0xc4 0x00 0x00
+
+# 0x000000f4:
+# CHECK: jnh 0xf4
+0xa7 0xd4 0x00 0x00
+
+# 0x000000f8:
+# CHECK: jno 0xf8
+0xa7 0xe4 0x00 0x00
+
+# 0x000000fc:
+# CHECK: j 0xfc
+0xa7 0xf4 0x00 0x00
+
+# 0x00000100:
+# CHECK: brc 0, 0xfe
+0xa7 0x04 0xff 0xff
+
+# 0x00000104:
+# CHECK: brc 0, 0xffffffffffff0104
+0xa7 0x04 0x80 0x00
+
+# 0x00000108:
+# CHECK: brc 0, 0x10106
+0xa7 0x04 0x7f 0xff
+
+# 0x0000010c:
+# CHECK: j 0x10a
+0xa7 0xf4 0xff 0xff
+
+# 0x00000110:
+# CHECK: j 0xffffffffffff0110
+0xa7 0xf4 0x80 0x00
+
+# 0x00000114:
+# CHECK: j 0x10112
+0xa7 0xf4 0x7f 0xff
+
+# 0x00000118:
+# CHECK: cgfrl %r0, 0x118
+0xc6 0x0c 0x00 0x00 0x00 0x00
+
+# 0x0000011e:
+# CHECK: cgfrl %r15, 0x11e
+0xc6 0xfc 0x00 0x00 0x00 0x00
+
+# 0x00000124:
+# CHECK: cgfrl %r0, 0x122
+0xc6 0x0c 0xff 0xff 0xff 0xff
+
+# 0x0000012a:
+# CHECK: cgfrl %r15, 0x128
+0xc6 0xfc 0xff 0xff 0xff 0xff
+
+# 0x00000130:
+# CHECK: cgfrl %r0, 0xffffffff00000130
+0xc6 0x0c 0x80 0x00 0x00 0x00
+
+# 0x00000136:
+# CHECK: cgfrl %r15, 0xffffffff00000136
+0xc6 0xfc 0x80 0x00 0x00 0x00
+
+# 0x0000013c:
+# CHECK: cgfrl %r0, 0x10000013a
+0xc6 0x0c 0x7f 0xff 0xff 0xff
+
+# 0x00000142:
+# CHECK: cgfrl %r15, 0x100000140
+0xc6 0xfc 0x7f 0xff 0xff 0xff
+
+# 0x00000148:
+# CHECK: cghrl %r0, 0x148
+0xc6 0x04 0x00 0x00 0x00 0x00
+
+# 0x0000014e:
+# CHECK: cghrl %r15, 0x14e
+0xc6 0xf4 0x00 0x00 0x00 0x00
+
+# 0x00000154:
+# CHECK: cghrl %r0, 0x152
+0xc6 0x04 0xff 0xff 0xff 0xff
+
+# 0x0000015a:
+# CHECK: cghrl %r15, 0x158
+0xc6 0xf4 0xff 0xff 0xff 0xff
+
+# 0x00000160:
+# CHECK: cghrl %r0, 0xffffffff00000160
+0xc6 0x04 0x80 0x00 0x00 0x00
+
+# 0x00000166:
+# CHECK: cghrl %r15, 0xffffffff00000166
+0xc6 0xf4 0x80 0x00 0x00 0x00
+
+# 0x0000016c:
+# CHECK: cghrl %r0, 0x10000016a
+0xc6 0x04 0x7f 0xff 0xff 0xff
+
+# 0x00000172:
+# CHECK: cghrl %r15, 0x100000170
+0xc6 0xf4 0x7f 0xff 0xff 0xff
+
+# 0x00000178:
+# CHECK: cgrl %r0, 0x178
+0xc6 0x08 0x00 0x00 0x00 0x00
+
+# 0x0000017e:
+# CHECK: cgrl %r15, 0x17e
+0xc6 0xf8 0x00 0x00 0x00 0x00
+
+# 0x00000184:
+# CHECK: cgrl %r0, 0x182
+0xc6 0x08 0xff 0xff 0xff 0xff
+
+# 0x0000018a:
+# CHECK: cgrl %r15, 0x188
+0xc6 0xf8 0xff 0xff 0xff 0xff
+
+# 0x00000190:
+# CHECK: cgrl %r0, 0xffffffff00000190
+0xc6 0x08 0x80 0x00 0x00 0x00
+
+# 0x00000196:
+# CHECK: cgrl %r15, 0xffffffff00000196
+0xc6 0xf8 0x80 0x00 0x00 0x00
+
+# 0x0000019c:
+# CHECK: cgrl %r0, 0x10000019a
+0xc6 0x08 0x7f 0xff 0xff 0xff
+
+# 0x000001a2:
+# CHECK: cgrl %r15, 0x1000001a0
+0xc6 0xf8 0x7f 0xff 0xff 0xff
+
+# 0x000001a8:
+# CHECK: chrl %r0, 0x1a8
+0xc6 0x05 0x00 0x00 0x00 0x00
+
+# 0x000001ae:
+# CHECK: chrl %r15, 0x1ae
+0xc6 0xf5 0x00 0x00 0x00 0x00
+
+# 0x000001b4:
+# CHECK: chrl %r0, 0x1b2
+0xc6 0x05 0xff 0xff 0xff 0xff
+
+# 0x000001ba:
+# CHECK: chrl %r15, 0x1b8
+0xc6 0xf5 0xff 0xff 0xff 0xff
+
+# 0x000001c0:
+# CHECK: chrl %r0, 0xffffffff000001c0
+0xc6 0x05 0x80 0x00 0x00 0x00
+
+# 0x000001c6:
+# CHECK: chrl %r15, 0xffffffff000001c6
+0xc6 0xf5 0x80 0x00 0x00 0x00
+
+# 0x000001cc:
+# CHECK: chrl %r0, 0x1000001ca
+0xc6 0x05 0x7f 0xff 0xff 0xff
+
+# 0x000001d2:
+# CHECK: chrl %r15, 0x1000001d0
+0xc6 0xf5 0x7f 0xff 0xff 0xff
+
+# 0x000001d8:
+# CHECK: clgfrl %r0, 0x1d8
+0xc6 0x0e 0x00 0x00 0x00 0x00
+
+# 0x000001de:
+# CHECK: clgfrl %r15, 0x1de
+0xc6 0xfe 0x00 0x00 0x00 0x00
+
+# 0x000001e4:
+# CHECK: clgfrl %r0, 0x1e2
+0xc6 0x0e 0xff 0xff 0xff 0xff
+
+# 0x000001ea:
+# CHECK: clgfrl %r15, 0x1e8
+0xc6 0xfe 0xff 0xff 0xff 0xff
+
+# 0x000001f0:
+# CHECK: clgfrl %r0, 0xffffffff000001f0
+0xc6 0x0e 0x80 0x00 0x00 0x00
+
+# 0x000001f6:
+# CHECK: clgfrl %r15, 0xffffffff000001f6
+0xc6 0xfe 0x80 0x00 0x00 0x00
+
+# 0x000001fc:
+# CHECK: clgfrl %r0, 0x1000001fa
+0xc6 0x0e 0x7f 0xff 0xff 0xff
+
+# 0x00000202:
+# CHECK: clgfrl %r15, 0x100000200
+0xc6 0xfe 0x7f 0xff 0xff 0xff
+
+# 0x00000208:
+# CHECK: clghrl %r0, 0x208
+0xc6 0x06 0x00 0x00 0x00 0x00
+
+# 0x0000020e:
+# CHECK: clghrl %r15, 0x20e
+0xc6 0xf6 0x00 0x00 0x00 0x00
+
+# 0x00000214:
+# CHECK: clghrl %r0, 0x212
+0xc6 0x06 0xff 0xff 0xff 0xff
+
+# 0x0000021a:
+# CHECK: clghrl %r15, 0x218
+0xc6 0xf6 0xff 0xff 0xff 0xff
+
+# 0x00000220:
+# CHECK: clghrl %r0, 0xffffffff00000220
+0xc6 0x06 0x80 0x00 0x00 0x00
+
+# 0x00000226:
+# CHECK: clghrl %r15, 0xffffffff00000226
+0xc6 0xf6 0x80 0x00 0x00 0x00
+
+# 0x0000022c:
+# CHECK: clghrl %r0, 0x10000022a
+0xc6 0x06 0x7f 0xff 0xff 0xff
+
+# 0x00000232:
+# CHECK: clghrl %r15, 0x100000230
+0xc6 0xf6 0x7f 0xff 0xff 0xff
+
+# 0x00000238:
+# CHECK: clgrl %r0, 0x238
+0xc6 0x0a 0x00 0x00 0x00 0x00
+
+# 0x0000023e:
+# CHECK: clgrl %r15, 0x23e
+0xc6 0xfa 0x00 0x00 0x00 0x00
+
+# 0x00000244:
+# CHECK: clgrl %r0, 0x242
+0xc6 0x0a 0xff 0xff 0xff 0xff
+
+# 0x0000024a:
+# CHECK: clgrl %r15, 0x248
+0xc6 0xfa 0xff 0xff 0xff 0xff
+
+# 0x00000250:
+# CHECK: clgrl %r0, 0xffffffff00000250
+0xc6 0x0a 0x80 0x00 0x00 0x00
+
+# 0x00000256:
+# CHECK: clgrl %r15, 0xffffffff00000256
+0xc6 0xfa 0x80 0x00 0x00 0x00
+
+# 0x0000025c:
+# CHECK: clgrl %r0, 0x10000025a
+0xc6 0x0a 0x7f 0xff 0xff 0xff
+
+# 0x00000262:
+# CHECK: clgrl %r15, 0x100000260
+0xc6 0xfa 0x7f 0xff 0xff 0xff
+
+# 0x00000268:
+# CHECK: clhrl %r0, 0x268
+0xc6 0x07 0x00 0x00 0x00 0x00
+
+# 0x0000026e:
+# CHECK: clhrl %r15, 0x26e
+0xc6 0xf7 0x00 0x00 0x00 0x00
+
+# 0x00000274:
+# CHECK: clhrl %r0, 0x272
+0xc6 0x07 0xff 0xff 0xff 0xff
+
+# 0x0000027a:
+# CHECK: clhrl %r15, 0x278
+0xc6 0xf7 0xff 0xff 0xff 0xff
+
+# 0x00000280:
+# CHECK: clhrl %r0, 0xffffffff00000280
+0xc6 0x07 0x80 0x00 0x00 0x00
+
+# 0x00000286:
+# CHECK: clhrl %r15, 0xffffffff00000286
+0xc6 0xf7 0x80 0x00 0x00 0x00
+
+# 0x0000028c:
+# CHECK: clhrl %r0, 0x10000028a
+0xc6 0x07 0x7f 0xff 0xff 0xff
+
+# 0x00000292:
+# CHECK: clhrl %r15, 0x100000290
+0xc6 0xf7 0x7f 0xff 0xff 0xff
+
+# 0x00000298:
+# CHECK: clrl %r0, 0x298
+0xc6 0x0f 0x00 0x00 0x00 0x00
+
+# 0x0000029e:
+# CHECK: clrl %r15, 0x29e
+0xc6 0xff 0x00 0x00 0x00 0x00
+
+# 0x000002a4:
+# CHECK: clrl %r0, 0x2a2
+0xc6 0x0f 0xff 0xff 0xff 0xff
+
+# 0x000002aa:
+# CHECK: clrl %r15, 0x2a8
+0xc6 0xff 0xff 0xff 0xff 0xff
+
+# 0x000002b0:
+# CHECK: clrl %r0, 0xffffffff000002b0
+0xc6 0x0f 0x80 0x00 0x00 0x00
+
+# 0x000002b6:
+# CHECK: clrl %r15, 0xffffffff000002b6
+0xc6 0xff 0x80 0x00 0x00 0x00
+
+# 0x000002bc:
+# CHECK: clrl %r0, 0x1000002ba
+0xc6 0x0f 0x7f 0xff 0xff 0xff
+
+# 0x000002c2:
+# CHECK: clrl %r15, 0x1000002c0
+0xc6 0xff 0x7f 0xff 0xff 0xff
+
+# 0x000002c8:
+# CHECK: crl %r0, 0x2c8
+0xc6 0x0d 0x00 0x00 0x00 0x00
+
+# 0x000002ce:
+# CHECK: crl %r15, 0x2ce
+0xc6 0xfd 0x00 0x00 0x00 0x00
+
+# 0x000002d4:
+# CHECK: crl %r0, 0x2d2
+0xc6 0x0d 0xff 0xff 0xff 0xff
+
+# 0x000002da:
+# CHECK: crl %r15, 0x2d8
+0xc6 0xfd 0xff 0xff 0xff 0xff
+
+# 0x000002e0:
+# CHECK: crl %r0, 0xffffffff000002e0
+0xc6 0x0d 0x80 0x00 0x00 0x00
+
+# 0x000002e6:
+# CHECK: crl %r15, 0xffffffff000002e6
+0xc6 0xfd 0x80 0x00 0x00 0x00
+
+# 0x000002ec:
+# CHECK: crl %r0, 0x1000002ea
+0xc6 0x0d 0x7f 0xff 0xff 0xff
+
+# 0x000002f2:
+# CHECK: crl %r15, 0x1000002f0
+0xc6 0xfd 0x7f 0xff 0xff 0xff
+
+# 0x000002f8:
+# CHECK: larl %r0, 0x2f8
+0xc0 0x00 0x00 0x00 0x00 0x00
+
+# 0x000002fe:
+# CHECK: larl %r15, 0x2fe
+0xc0 0xf0 0x00 0x00 0x00 0x00
+
+# 0x00000304:
+# CHECK: larl %r0, 0x302
+0xc0 0x00 0xff 0xff 0xff 0xff
+
+# 0x0000030a:
+# CHECK: larl %r15, 0x308
+0xc0 0xf0 0xff 0xff 0xff 0xff
+
+# 0x00000310:
+# CHECK: larl %r0, 0xffffffff00000310
+0xc0 0x00 0x80 0x00 0x00 0x00
+
+# 0x00000316:
+# CHECK: larl %r15, 0xffffffff00000316
+0xc0 0xf0 0x80 0x00 0x00 0x00
+
+# 0x0000031c:
+# CHECK: larl %r0, 0x10000031a
+0xc0 0x00 0x7f 0xff 0xff 0xff
+
+# 0x00000322:
+# CHECK: larl %r15, 0x100000320
+0xc0 0xf0 0x7f 0xff 0xff 0xff
+
+# 0x00000328:
+# CHECK: lgfrl %r0, 0x328
+0xc4 0x0c 0x00 0x00 0x00 0x00
+
+# 0x0000032e:
+# CHECK: lgfrl %r15, 0x32e
+0xc4 0xfc 0x00 0x00 0x00 0x00
+
+# 0x00000334:
+# CHECK: lgfrl %r0, 0x332
+0xc4 0x0c 0xff 0xff 0xff 0xff
+
+# 0x0000033a:
+# CHECK: lgfrl %r15, 0x338
+0xc4 0xfc 0xff 0xff 0xff 0xff
+
+# 0x00000340:
+# CHECK: lgfrl %r0, 0xffffffff00000340
+0xc4 0x0c 0x80 0x00 0x00 0x00
+
+# 0x00000346:
+# CHECK: lgfrl %r15, 0xffffffff00000346
+0xc4 0xfc 0x80 0x00 0x00 0x00
+
+# 0x0000034c:
+# CHECK: lgfrl %r0, 0x10000034a
+0xc4 0x0c 0x7f 0xff 0xff 0xff
+
+# 0x00000352:
+# CHECK: lgfrl %r15, 0x100000350
+0xc4 0xfc 0x7f 0xff 0xff 0xff
+
+# 0x00000358:
+# CHECK: lghrl %r0, 0x358
+0xc4 0x04 0x00 0x00 0x00 0x00
+
+# 0x0000035e:
+# CHECK: lghrl %r15, 0x35e
+0xc4 0xf4 0x00 0x00 0x00 0x00
+
+# 0x00000364:
+# CHECK: lghrl %r0, 0x362
+0xc4 0x04 0xff 0xff 0xff 0xff
+
+# 0x0000036a:
+# CHECK: lghrl %r15, 0x368
+0xc4 0xf4 0xff 0xff 0xff 0xff
+
+# 0x00000370:
+# CHECK: lghrl %r0, 0xffffffff00000370
+0xc4 0x04 0x80 0x00 0x00 0x00
+
+# 0x00000376:
+# CHECK: lghrl %r15, 0xffffffff00000376
+0xc4 0xf4 0x80 0x00 0x00 0x00
+
+# 0x0000037c:
+# CHECK: lghrl %r0, 0x10000037a
+0xc4 0x04 0x7f 0xff 0xff 0xff
+
+# 0x00000382:
+# CHECK: lghrl %r15, 0x100000380
+0xc4 0xf4 0x7f 0xff 0xff 0xff
+
+# 0x00000388:
+# CHECK: lgrl %r0, 0x388
+0xc4 0x08 0x00 0x00 0x00 0x00
+
+# 0x0000038e:
+# CHECK: lgrl %r15, 0x38e
+0xc4 0xf8 0x00 0x00 0x00 0x00
+
+# 0x00000394:
+# CHECK: lgrl %r0, 0x392
+0xc4 0x08 0xff 0xff 0xff 0xff
+
+# 0x0000039a:
+# CHECK: lgrl %r15, 0x398
+0xc4 0xf8 0xff 0xff 0xff 0xff
+
+# 0x000003a0:
+# CHECK: lgrl %r0, 0xffffffff000003a0
+0xc4 0x08 0x80 0x00 0x00 0x00
+
+# 0x000003a6:
+# CHECK: lgrl %r15, 0xffffffff000003a6
+0xc4 0xf8 0x80 0x00 0x00 0x00
+
+# 0x000003ac:
+# CHECK: lgrl %r0, 0x1000003aa
+0xc4 0x08 0x7f 0xff 0xff 0xff
+
+# 0x000003b2:
+# CHECK: lgrl %r15, 0x1000003b0
+0xc4 0xf8 0x7f 0xff 0xff 0xff
+
+# 0x000003b8:
+# CHECK: lhrl %r0, 0x3b8
+0xc4 0x05 0x00 0x00 0x00 0x00
+
+# 0x000003be:
+# CHECK: lhrl %r15, 0x3be
+0xc4 0xf5 0x00 0x00 0x00 0x00
+
+# 0x000003c4:
+# CHECK: lhrl %r0, 0x3c2
+0xc4 0x05 0xff 0xff 0xff 0xff
+
+# 0x000003ca:
+# CHECK: lhrl %r15, 0x3c8
+0xc4 0xf5 0xff 0xff 0xff 0xff
+
+# 0x000003d0:
+# CHECK: lhrl %r0, 0xffffffff000003d0
+0xc4 0x05 0x80 0x00 0x00 0x00
+
+# 0x000003d6:
+# CHECK: lhrl %r15, 0xffffffff000003d6
+0xc4 0xf5 0x80 0x00 0x00 0x00
+
+# 0x000003dc:
+# CHECK: lhrl %r0, 0x1000003da
+0xc4 0x05 0x7f 0xff 0xff 0xff
+
+# 0x000003e2:
+# CHECK: lhrl %r15, 0x1000003e0
+0xc4 0xf5 0x7f 0xff 0xff 0xff
+
+# 0x000003e8:
+# CHECK: llgfrl %r0, 0x3e8
+0xc4 0x0e 0x00 0x00 0x00 0x00
+
+# 0x000003ee:
+# CHECK: llgfrl %r15, 0x3ee
+0xc4 0xfe 0x00 0x00 0x00 0x00
+
+# 0x000003f4:
+# CHECK: llgfrl %r0, 0x3f2
+0xc4 0x0e 0xff 0xff 0xff 0xff
+
+# 0x000003fa:
+# CHECK: llgfrl %r15, 0x3f8
+0xc4 0xfe 0xff 0xff 0xff 0xff
+
+# 0x00000400:
+# CHECK: llgfrl %r0, 0xffffffff00000400
+0xc4 0x0e 0x80 0x00 0x00 0x00
+
+# 0x00000406:
+# CHECK: llgfrl %r15, 0xffffffff00000406
+0xc4 0xfe 0x80 0x00 0x00 0x00
+
+# 0x0000040c:
+# CHECK: llgfrl %r0, 0x10000040a
+0xc4 0x0e 0x7f 0xff 0xff 0xff
+
+# 0x00000412:
+# CHECK: llgfrl %r15, 0x100000410
+0xc4 0xfe 0x7f 0xff 0xff 0xff
+
+# 0x00000418:
+# CHECK: llghrl %r0, 0x418
+0xc4 0x06 0x00 0x00 0x00 0x00
+
+# 0x0000041e:
+# CHECK: llghrl %r15, 0x41e
+0xc4 0xf6 0x00 0x00 0x00 0x00
+
+# 0x00000424:
+# CHECK: llghrl %r0, 0x422
+0xc4 0x06 0xff 0xff 0xff 0xff
+
+# 0x0000042a:
+# CHECK: llghrl %r15, 0x428
+0xc4 0xf6 0xff 0xff 0xff 0xff
+
+# 0x00000430:
+# CHECK: llghrl %r0, 0xffffffff00000430
+0xc4 0x06 0x80 0x00 0x00 0x00
+
+# 0x00000436:
+# CHECK: llghrl %r15, 0xffffffff00000436
+0xc4 0xf6 0x80 0x00 0x00 0x00
+
+# 0x0000043c:
+# CHECK: llghrl %r0, 0x10000043a
+0xc4 0x06 0x7f 0xff 0xff 0xff
+
+# 0x00000442:
+# CHECK: llghrl %r15, 0x100000440
+0xc4 0xf6 0x7f 0xff 0xff 0xff
+
+# 0x00000448:
+# CHECK: llhrl %r0, 0x448
+0xc4 0x02 0x00 0x00 0x00 0x00
+
+# 0x0000044e:
+# CHECK: llhrl %r15, 0x44e
+0xc4 0xf2 0x00 0x00 0x00 0x00
+
+# 0x00000454:
+# CHECK: llhrl %r0, 0x452
+0xc4 0x02 0xff 0xff 0xff 0xff
+
+# 0x0000045a:
+# CHECK: llhrl %r15, 0x458
+0xc4 0xf2 0xff 0xff 0xff 0xff
+
+# 0x00000460:
+# CHECK: llhrl %r0, 0xffffffff00000460
+0xc4 0x02 0x80 0x00 0x00 0x00
+
+# 0x00000466:
+# CHECK: llhrl %r15, 0xffffffff00000466
+0xc4 0xf2 0x80 0x00 0x00 0x00
+
+# 0x0000046c:
+# CHECK: llhrl %r0, 0x10000046a
+0xc4 0x02 0x7f 0xff 0xff 0xff
+
+# 0x00000472:
+# CHECK: llhrl %r15, 0x100000470
+0xc4 0xf2 0x7f 0xff 0xff 0xff
+
+# 0x00000478:
+# CHECK: lrl %r0, 0x478
+0xc4 0x0d 0x00 0x00 0x00 0x00
+
+# 0x0000047e:
+# CHECK: lrl %r15, 0x47e
+0xc4 0xfd 0x00 0x00 0x00 0x00
+
+# 0x00000484:
+# CHECK: lrl %r0, 0x482
+0xc4 0x0d 0xff 0xff 0xff 0xff
+
+# 0x0000048a:
+# CHECK: lrl %r15, 0x488
+0xc4 0xfd 0xff 0xff 0xff 0xff
+
+# 0x00000490:
+# CHECK: lrl %r0, 0xffffffff00000490
+0xc4 0x0d 0x80 0x00 0x00 0x00
+
+# 0x00000496:
+# CHECK: lrl %r15, 0xffffffff00000496
+0xc4 0xfd 0x80 0x00 0x00 0x00
+
+# 0x0000049c:
+# CHECK: lrl %r0, 0x10000049a
+0xc4 0x0d 0x7f 0xff 0xff 0xff
+
+# 0x000004a2:
+# CHECK: lrl %r15, 0x1000004a0
+0xc4 0xfd 0x7f 0xff 0xff 0xff
+
+# 0x000004a8:
+# CHECK: stgrl %r0, 0x4a8
+0xc4 0x0b 0x00 0x00 0x00 0x00
+
+# 0x000004ae:
+# CHECK: stgrl %r15, 0x4ae
+0xc4 0xfb 0x00 0x00 0x00 0x00
+
+# 0x000004b4:
+# CHECK: stgrl %r0, 0x4b2
+0xc4 0x0b 0xff 0xff 0xff 0xff
+
+# 0x000004ba:
+# CHECK: stgrl %r15, 0x4b8
+0xc4 0xfb 0xff 0xff 0xff 0xff
+
+# 0x000004c0:
+# CHECK: stgrl %r0, 0xffffffff000004c0
+0xc4 0x0b 0x80 0x00 0x00 0x00
+
+# 0x000004c6:
+# CHECK: stgrl %r15, 0xffffffff000004c6
+0xc4 0xfb 0x80 0x00 0x00 0x00
+
+# 0x000004cc:
+# CHECK: stgrl %r0, 0x1000004ca
+0xc4 0x0b 0x7f 0xff 0xff 0xff
+
+# 0x000004d2:
+# CHECK: stgrl %r15, 0x1000004d0
+0xc4 0xfb 0x7f 0xff 0xff 0xff
+
+# 0x000004d8:
+# CHECK: sthrl %r0, 0x4d8
+0xc4 0x07 0x00 0x00 0x00 0x00
+
+# 0x000004de:
+# CHECK: sthrl %r15, 0x4de
+0xc4 0xf7 0x00 0x00 0x00 0x00
+
+# 0x000004e4:
+# CHECK: sthrl %r0, 0x4e2
+0xc4 0x07 0xff 0xff 0xff 0xff
+
+# 0x000004ea:
+# CHECK: sthrl %r15, 0x4e8
+0xc4 0xf7 0xff 0xff 0xff 0xff
+
+# 0x000004f0:
+# CHECK: sthrl %r0, 0xffffffff000004f0
+0xc4 0x07 0x80 0x00 0x00 0x00
+
+# 0x000004f6:
+# CHECK: sthrl %r15, 0xffffffff000004f6
+0xc4 0xf7 0x80 0x00 0x00 0x00
+
+# 0x000004fc:
+# CHECK: sthrl %r0, 0x1000004fa
+0xc4 0x07 0x7f 0xff 0xff 0xff
+
+# 0x00000502:
+# CHECK: sthrl %r15, 0x100000500
+0xc4 0xf7 0x7f 0xff 0xff 0xff
+
+# 0x00000508:
+# CHECK: strl %r0, 0x508
+0xc4 0x0f 0x00 0x00 0x00 0x00
+
+# 0x0000050e:
+# CHECK: strl %r15, 0x50e
+0xc4 0xff 0x00 0x00 0x00 0x00
+
+# 0x00000514:
+# CHECK: strl %r0, 0x512
+0xc4 0x0f 0xff 0xff 0xff 0xff
+
+# 0x0000051a:
+# CHECK: strl %r15, 0x518
+0xc4 0xff 0xff 0xff 0xff 0xff
+
+# 0x00000520:
+# CHECK: strl %r0, 0xffffffff00000520
+0xc4 0x0f 0x80 0x00 0x00 0x00
+
+# 0x00000526:
+# CHECK: strl %r15, 0xffffffff00000526
+0xc4 0xff 0x80 0x00 0x00 0x00
+
+# 0x0000052c:
+# CHECK: strl %r0, 0x10000052a
+0xc4 0x0f 0x7f 0xff 0xff 0xff
+
+# 0x00000532:
+# CHECK: strl %r15, 0x100000530
+0xc4 0xff 0x7f 0xff 0xff 0xff
+
+# 0x00000538:
+# CHECK: cgrj %r0, %r0, 0, 0x538
+0xec 0x00 0x00 0x00 0x00 0x64
+
+# 0x0000053e:
+# CHECK: cgrj %r0, %r15, 0, 0x53e
+0xec 0x0f 0x00 0x00 0x00 0x64
+
+# 0x00000544:
+# CHECK: cgrj %r15, %r0, 0, 0x544
+0xec 0xf0 0x00 0x00 0x00 0x64
+
+# 0x0000054a:
+# CHECK: cgrj %r7, %r8, 0, 0x54a
+0xec 0x78 0x00 0x00 0x00 0x64
+
+# 0x00000550:
+# CHECK: cgrj %r0, %r0, 0, 0x54e
+0xec 0x00 0xff 0xff 0x00 0x64
+
+# 0x00000556:
+# CHECK: cgrj %r0, %r0, 0, 0xffffffffffff0556
+0xec 0x00 0x80 0x00 0x00 0x64
+
+# 0x0000055c:
+# CHECK: cgrj %r0, %r0, 0, 0x1055a
+0xec 0x00 0x7f 0xff 0x00 0x64
+
+# 0x00000562:
+# CHECK: cgrj %r0, %r0, 1, 0x562
+0xec 0x00 0x00 0x00 0x10 0x64
+
+# 0x00000568:
+# CHECK: cgrjh %r0, %r0, 0x568
+0xec 0x00 0x00 0x00 0x20 0x64
+
+# 0x0000056e:
+# CHECK: cgrj %r0, %r0, 3, 0x56e
+0xec 0x00 0x00 0x00 0x30 0x64
+
+# 0x00000574:
+# CHECK: cgrjl %r0, %r0, 0x574
+0xec 0x00 0x00 0x00 0x40 0x64
+
+# 0x0000057a:
+# CHECK: cgrj %r0, %r0, 5, 0x57a
+0xec 0x00 0x00 0x00 0x50 0x64
+
+# 0x00000580:
+# CHECK: cgrjlh %r0, %r0, 0x580
+0xec 0x00 0x00 0x00 0x60 0x64
+
+# 0x00000586:
+# CHECK: cgrj %r0, %r0, 7, 0x586
+0xec 0x00 0x00 0x00 0x70 0x64
+
+# 0x0000058c:
+# CHECK: cgrje %r0, %r0, 0x58c
+0xec 0x00 0x00 0x00 0x80 0x64
+
+# 0x00000592:
+# CHECK: cgrj %r0, %r0, 9, 0x592
+0xec 0x00 0x00 0x00 0x90 0x64
+
+# 0x00000598:
+# CHECK: cgrjhe %r0, %r0, 0x598
+0xec 0x00 0x00 0x00 0xa0 0x64
+
+# 0x0000059e:
+# CHECK: cgrj %r0, %r0, 11, 0x59e
+0xec 0x00 0x00 0x00 0xb0 0x64
+
+# 0x000005a4:
+# CHECK: cgrjle %r0, %r0, 0x5a4
+0xec 0x00 0x00 0x00 0xc0 0x64
+
+# 0x000005aa:
+# CHECK: cgrj %r0, %r0, 13, 0x5aa
+0xec 0x00 0x00 0x00 0xd0 0x64
+
+# 0x000005b0:
+# CHECK: cgrj %r0, %r0, 14, 0x5b0
+0xec 0x00 0x00 0x00 0xe0 0x64
+
+# 0x000005b6:
+# CHECK: cgrj %r0, %r0, 15, 0x5b6
+0xec 0x00 0x00 0x00 0xf0 0x64
+
+# 0x000005bc:
+# CHECK: crj %r0, %r0, 0, 0x5bc
+0xec 0x00 0x00 0x00 0x00 0x76
+
+# 0x000005c2:
+# CHECK: crj %r0, %r15, 0, 0x5c2
+0xec 0x0f 0x00 0x00 0x00 0x76
+
+# 0x000005c8:
+# CHECK: crj %r15, %r0, 0, 0x5c8
+0xec 0xf0 0x00 0x00 0x00 0x76
+
+# 0x000005ce:
+# CHECK: crj %r7, %r8, 0, 0x5ce
+0xec 0x78 0x00 0x00 0x00 0x76
+
+# 0x000005d4:
+# CHECK: crj %r0, %r0, 0, 0x5d2
+0xec 0x00 0xff 0xff 0x00 0x76
+
+# 0x000005da:
+# CHECK: crj %r0, %r0, 0, 0xffffffffffff05da
+0xec 0x00 0x80 0x00 0x00 0x76
+
+# 0x000005e0:
+# CHECK: crj %r0, %r0, 0, 0x105de
+0xec 0x00 0x7f 0xff 0x00 0x76
+
+# 0x000005e6:
+# CHECK: crj %r0, %r0, 1, 0x5e6
+0xec 0x00 0x00 0x00 0x10 0x76
+
+# 0x000005ec:
+# CHECK: crjh %r0, %r0, 0x5ec
+0xec 0x00 0x00 0x00 0x20 0x76
+
+# 0x000005f2:
+# CHECK: crj %r0, %r0, 3, 0x5f2
+0xec 0x00 0x00 0x00 0x30 0x76
+
+# 0x000005f8:
+# CHECK: crjl %r0, %r0, 0x5f8
+0xec 0x00 0x00 0x00 0x40 0x76
+
+# 0x000005fe:
+# CHECK: crj %r0, %r0, 5, 0x5fe
+0xec 0x00 0x00 0x00 0x50 0x76
+
+# 0x00000604:
+# CHECK: crjlh %r0, %r0, 0x604
+0xec 0x00 0x00 0x00 0x60 0x76
+
+# 0x0000060a:
+# CHECK: crj %r0, %r0, 7, 0x60a
+0xec 0x00 0x00 0x00 0x70 0x76
+
+# 0x00000610:
+# CHECK: crje %r0, %r0, 0x610
+0xec 0x00 0x00 0x00 0x80 0x76
+
+# 0x00000616:
+# CHECK: crj %r0, %r0, 9, 0x616
+0xec 0x00 0x00 0x00 0x90 0x76
+
+# 0x0000061c:
+# CHECK: crjhe %r0, %r0, 0x61c
+0xec 0x00 0x00 0x00 0xa0 0x76
+
+# 0x00000622:
+# CHECK: crj %r0, %r0, 11, 0x622
+0xec 0x00 0x00 0x00 0xb0 0x76
+
+# 0x00000628:
+# CHECK: crjle %r0, %r0, 0x628
+0xec 0x00 0x00 0x00 0xc0 0x76
+
+# 0x0000062e:
+# CHECK: crj %r0, %r0, 13, 0x62e
+0xec 0x00 0x00 0x00 0xd0 0x76
+
+# 0x00000634:
+# CHECK: crj %r0, %r0, 14, 0x634
+0xec 0x00 0x00 0x00 0xe0 0x76
+
+# 0x0000063a:
+# CHECK: crj %r0, %r0, 15, 0x63a
+0xec 0x00 0x00 0x00 0xf0 0x76
+
+# 0x00000640:
+# CHECK: cgij %r0, 0, 0, 0x640
+0xec 0x00 0x00 0x00 0x00 0x7c
+
+# 0x00000646:
+# CHECK: cgij %r0, -128, 0, 0x646
+0xec 0x00 0x00 0x00 0x80 0x7c
+
+# 0x0000064c:
+# CHECK: cgij %r0, -1, 0, 0x64c
+0xec 0x00 0x00 0x00 0xff 0x7c
+
+# 0x00000652:
+# CHECK: cgij %r0, 127, 0, 0x652
+0xec 0x00 0x00 0x00 0x7f 0x7c
+
+# 0x00000658:
+# CHECK: cgij %r15, 0, 0, 0x658
+0xec 0xf0 0x00 0x00 0x00 0x7c
+
+# 0x0000065e:
+# CHECK: cgij %r7, 100, 0, 0x65e
+0xec 0x70 0x00 0x00 0x64 0x7c
+
+# 0x00000664:
+# CHECK: cgij %r0, 0, 0, 0x662
+0xec 0x00 0xff 0xff 0x00 0x7c
+
+# 0x0000066a:
+# CHECK: cgij %r0, 0, 0, 0xffffffffffff066a
+0xec 0x00 0x80 0x00 0x00 0x7c
+
+# 0x00000670:
+# CHECK: cgij %r0, 0, 0, 0x1066e
+0xec 0x00 0x7f 0xff 0x00 0x7c
+
+# 0x00000676:
+# CHECK: cgij %r0, 0, 1, 0x676
+0xec 0x01 0x00 0x00 0x00 0x7c
+
+# 0x0000067c:
+# CHECK: cgijh %r0, 0, 0x67c
+0xec 0x02 0x00 0x00 0x00 0x7c
+
+# 0x00000682:
+# CHECK: cgij %r0, 0, 3, 0x682
+0xec 0x03 0x00 0x00 0x00 0x7c
+
+# 0x00000688:
+# CHECK: cgijl %r0, 0, 0x688
+0xec 0x04 0x00 0x00 0x00 0x7c
+
+# 0x0000068e:
+# CHECK: cgij %r0, 0, 5, 0x68e
+0xec 0x05 0x00 0x00 0x00 0x7c
+
+# 0x00000694:
+# CHECK: cgijlh %r0, 0, 0x694
+0xec 0x06 0x00 0x00 0x00 0x7c
+
+# 0x0000069a:
+# CHECK: cgij %r0, 0, 7, 0x69a
+0xec 0x07 0x00 0x00 0x00 0x7c
+
+# 0x000006a0:
+# CHECK: cgije %r0, 0, 0x6a0
+0xec 0x08 0x00 0x00 0x00 0x7c
+
+# 0x000006a6:
+# CHECK: cgij %r0, 0, 9, 0x6a6
+0xec 0x09 0x00 0x00 0x00 0x7c
+
+# 0x000006ac:
+# CHECK: cgijhe %r0, 0, 0x6ac
+0xec 0x0a 0x00 0x00 0x00 0x7c
+
+# 0x000006b2:
+# CHECK: cgij %r0, 0, 11, 0x6b2
+0xec 0x0b 0x00 0x00 0x00 0x7c
+
+# 0x000006b8:
+# CHECK: cgijle %r0, 0, 0x6b8
+0xec 0x0c 0x00 0x00 0x00 0x7c
+
+# 0x000006be:
+# CHECK: cgij %r0, 0, 13, 0x6be
+0xec 0x0d 0x00 0x00 0x00 0x7c
+
+# 0x000006c4:
+# CHECK: cgij %r0, 0, 14, 0x6c4
+0xec 0x0e 0x00 0x00 0x00 0x7c
+
+# 0x000006ca:
+# CHECK: cgij %r0, 0, 15, 0x6ca
+0xec 0x0f 0x00 0x00 0x00 0x7c
+
+# 0x000006d0:
+# CHECK: cij %r0, 0, 0, 0x6d0
+0xec 0x00 0x00 0x00 0x00 0x7e
+
+# 0x000006d6:
+# CHECK: cij %r0, -128, 0, 0x6d6
+0xec 0x00 0x00 0x00 0x80 0x7e
+
+# 0x000006dc:
+# CHECK: cij %r0, -1, 0, 0x6dc
+0xec 0x00 0x00 0x00 0xff 0x7e
+
+# 0x000006e2:
+# CHECK: cij %r0, 127, 0, 0x6e2
+0xec 0x00 0x00 0x00 0x7f 0x7e
+
+# 0x000006e8:
+# CHECK: cij %r15, 0, 0, 0x6e8
+0xec 0xf0 0x00 0x00 0x00 0x7e
+
+# 0x000006ee:
+# CHECK: cij %r7, 100, 0, 0x6ee
+0xec 0x70 0x00 0x00 0x64 0x7e
+
+# 0x000006f4:
+# CHECK: cij %r0, 0, 0, 0x6f2
+0xec 0x00 0xff 0xff 0x00 0x7e
+
+# 0x000006fa:
+# CHECK: cij %r0, 0, 0, 0xffffffffffff06fa
+0xec 0x00 0x80 0x00 0x00 0x7e
+
+# 0x00000700:
+# CHECK: cij %r0, 0, 0, 0x106fe
+0xec 0x00 0x7f 0xff 0x00 0x7e
+
+# 0x00000706:
+# CHECK: cij %r0, 0, 1, 0x706
+0xec 0x01 0x00 0x00 0x00 0x7e
+
+# 0x0000070c:
+# CHECK: cijh %r0, 0, 0x70c
+0xec 0x02 0x00 0x00 0x00 0x7e
+
+# 0x00000712:
+# CHECK: cij %r0, 0, 3, 0x712
+0xec 0x03 0x00 0x00 0x00 0x7e
+
+# 0x00000718:
+# CHECK: cijl %r0, 0, 0x718
+0xec 0x04 0x00 0x00 0x00 0x7e
+
+# 0x0000071e:
+# CHECK: cij %r0, 0, 5, 0x71e
+0xec 0x05 0x00 0x00 0x00 0x7e
+
+# 0x00000724:
+# CHECK: cijlh %r0, 0, 0x724
+0xec 0x06 0x00 0x00 0x00 0x7e
+
+# 0x0000072a:
+# CHECK: cij %r0, 0, 7, 0x72a
+0xec 0x07 0x00 0x00 0x00 0x7e
+
+# 0x00000730:
+# CHECK: cije %r0, 0, 0x730
+0xec 0x08 0x00 0x00 0x00 0x7e
+
+# 0x00000736:
+# CHECK: cij %r0, 0, 9, 0x736
+0xec 0x09 0x00 0x00 0x00 0x7e
+
+# 0x0000073c:
+# CHECK: cijhe %r0, 0, 0x73c
+0xec 0x0a 0x00 0x00 0x00 0x7e
+
+# 0x00000742:
+# CHECK: cij %r0, 0, 11, 0x742
+0xec 0x0b 0x00 0x00 0x00 0x7e
+
+# 0x00000748:
+# CHECK: cijle %r0, 0, 0x748
+0xec 0x0c 0x00 0x00 0x00 0x7e
+
+# 0x0000074e:
+# CHECK: cij %r0, 0, 13, 0x74e
+0xec 0x0d 0x00 0x00 0x00 0x7e
+
+# 0x00000754:
+# CHECK: cij %r0, 0, 14, 0x754
+0xec 0x0e 0x00 0x00 0x00 0x7e
+
+# 0x0000075a:
+# CHECK: cij %r0, 0, 15, 0x75a
+0xec 0x0f 0x00 0x00 0x00 0x7e
+
+# 0x00000760:
+# CHECK: brct %r0, 0x760
+0xa7 0x06 0x00 0x00
+
+# 0x00000764:
+# CHECK: brct %r1, 0x762
+0xa7 0x16 0xff 0xff
+
+# 0x00000768:
+# CHECK: brct %r9, 0xffffffffffff0768
+0xa7 0x96 0x80 0x00
+
+# 0x0000076c:
+# CHECK: brct %r15, 0x1076a
+0xa7 0xf6 0x7f 0xff
+
+# 0x00000770:
+# CHECK: brctg %r0, 0x770
+0xa7 0x07 0x00 0x00
+
+# 0x00000774:
+# CHECK: brctg %r1, 0x772
+0xa7 0x17 0xff 0xff
+
+# 0x00000778:
+# CHECK: brctg %r9, 0xffffffffffff0778
+0xa7 0x97 0x80 0x00
+
+# 0x0000077c:
+# CHECK: brctg %r15, 0x1077a
+0xa7 0xf7 0x7f 0xff
+
+# 0x00000780:
+# CHECK: pfdrl 0, 0x780
+0xc6 0x02 0x00 0x00 0x00 0x00
+
+# 0x00000786:
+# CHECK: pfdrl 15, 0x786
+0xc6 0xf2 0x00 0x00 0x00 0x00
+
+# 0x0000078c:
+# CHECK: pfdrl 0, 0x78a
+0xc6 0x02 0xff 0xff 0xff 0xff
+
+# 0x00000792:
+# CHECK: pfdrl 15, 0x790
+0xc6 0xf2 0xff 0xff 0xff 0xff
+
+# 0x00000798:
+# CHECK: pfdrl 0, 0xffffffff00000798
+0xc6 0x02 0x80 0x00 0x00 0x00
+
+# 0x0000079e:
+# CHECK: pfdrl 15, 0xffffffff0000079e
+0xc6 0xf2 0x80 0x00 0x00 0x00
+
+# 0x000007a4:
+# CHECK: pfdrl 0, 0x1000007a2
+0xc6 0x02 0x7f 0xff 0xff 0xff
+
+# 0x000007aa:
+# CHECK: pfdrl 15, 0x1000007a8
+0xc6 0xf2 0x7f 0xff 0xff 0xff
+
+# 0x000007b0:
+# CHECK: clgrj %r0, %r0, 0, 0x7b0
+0xec 0x00 0x00 0x00 0x00 0x65
+
+# 0x000007b6:
+# CHECK: clgrj %r0, %r15, 0, 0x7b6
+0xec 0x0f 0x00 0x00 0x00 0x65
+
+# 0x000007bc:
+# CHECK: clgrj %r15, %r0, 0, 0x7bc
+0xec 0xf0 0x00 0x00 0x00 0x65
+
+# 0x000007c2:
+# CHECK: clgrj %r7, %r8, 0, 0x7c2
+0xec 0x78 0x00 0x00 0x00 0x65
+
+# 0x000007c8:
+# CHECK: clgrj %r0, %r0, 0, 0x7c6
+0xec 0x00 0xff 0xff 0x00 0x65
+
+# 0x000007ce:
+# CHECK: clgrj %r0, %r0, 0, 0xffffffffffff07ce
+0xec 0x00 0x80 0x00 0x00 0x65
+
+# 0x000007d4:
+# CHECK: clgrj %r0, %r0, 0, 0x107d2
+0xec 0x00 0x7f 0xff 0x00 0x65
+
+# 0x000007da:
+# CHECK: clgrj %r0, %r0, 1, 0x7da
+0xec 0x00 0x00 0x00 0x10 0x65
+
+# 0x000007e0:
+# CHECK: clgrjh %r0, %r0, 0x7e0
+0xec 0x00 0x00 0x00 0x20 0x65
+
+# 0x000007e6:
+# CHECK: clgrj %r0, %r0, 3, 0x7e6
+0xec 0x00 0x00 0x00 0x30 0x65
+
+# 0x000007ec:
+# CHECK: clgrjl %r0, %r0, 0x7ec
+0xec 0x00 0x00 0x00 0x40 0x65
+
+# 0x000007f2:
+# CHECK: clgrj %r0, %r0, 5, 0x7f2
+0xec 0x00 0x00 0x00 0x50 0x65
+
+# 0x000007f8:
+# CHECK: clgrjlh %r0, %r0, 0x7f8
+0xec 0x00 0x00 0x00 0x60 0x65
+
+# 0x000007fe:
+# CHECK: clgrj %r0, %r0, 7, 0x7fe
+0xec 0x00 0x00 0x00 0x70 0x65
+
+# 0x00000804:
+# CHECK: clgrje %r0, %r0, 0x804
+0xec 0x00 0x00 0x00 0x80 0x65
+
+# 0x0000080a:
+# CHECK: clgrj %r0, %r0, 9, 0x80a
+0xec 0x00 0x00 0x00 0x90 0x65
+
+# 0x00000810:
+# CHECK: clgrjhe %r0, %r0, 0x810
+0xec 0x00 0x00 0x00 0xa0 0x65
+
+# 0x00000816:
+# CHECK: clgrj %r0, %r0, 11, 0x816
+0xec 0x00 0x00 0x00 0xb0 0x65
+
+# 0x0000081c:
+# CHECK: clgrjle %r0, %r0, 0x81c
+0xec 0x00 0x00 0x00 0xc0 0x65
+
+# 0x00000822:
+# CHECK: clgrj %r0, %r0, 13, 0x822
+0xec 0x00 0x00 0x00 0xd0 0x65
+
+# 0x00000828:
+# CHECK: clgrj %r0, %r0, 14, 0x828
+0xec 0x00 0x00 0x00 0xe0 0x65
+
+# 0x0000082e:
+# CHECK: clgrj %r0, %r0, 15, 0x82e
+0xec 0x00 0x00 0x00 0xf0 0x65
+
+# 0x00000834:
+# CHECK: clrj %r0, %r0, 0, 0x834
+0xec 0x00 0x00 0x00 0x00 0x77
+
+# 0x0000083a:
+# CHECK: clrj %r0, %r15, 0, 0x83a
+0xec 0x0f 0x00 0x00 0x00 0x77
+
+# 0x00000840:
+# CHECK: clrj %r15, %r0, 0, 0x840
+0xec 0xf0 0x00 0x00 0x00 0x77
+
+# 0x00000846:
+# CHECK: clrj %r7, %r8, 0, 0x846
+0xec 0x78 0x00 0x00 0x00 0x77
+
+# 0x0000084c:
+# CHECK: clrj %r0, %r0, 0, 0x84a
+0xec 0x00 0xff 0xff 0x00 0x77
+
+# 0x00000852:
+# CHECK: clrj %r0, %r0, 0, 0xffffffffffff0852
+0xec 0x00 0x80 0x00 0x00 0x77
+
+# 0x00000858:
+# CHECK: clrj %r0, %r0, 0, 0x10856
+0xec 0x00 0x7f 0xff 0x00 0x77
+
+# 0x0000085e:
+# CHECK: clrj %r0, %r0, 1, 0x85e
+0xec 0x00 0x00 0x00 0x10 0x77
+
+# 0x00000864:
+# CHECK: clrjh %r0, %r0, 0x864
+0xec 0x00 0x00 0x00 0x20 0x77
+
+# 0x0000086a:
+# CHECK: clrj %r0, %r0, 3, 0x86a
+0xec 0x00 0x00 0x00 0x30 0x77
+
+# 0x00000870:
+# CHECK: clrjl %r0, %r0, 0x870
+0xec 0x00 0x00 0x00 0x40 0x77
+
+# 0x00000876:
+# CHECK: clrj %r0, %r0, 5, 0x876
+0xec 0x00 0x00 0x00 0x50 0x77
+
+# 0x0000087c:
+# CHECK: clrjlh %r0, %r0, 0x87c
+0xec 0x00 0x00 0x00 0x60 0x77
+
+# 0x00000882:
+# CHECK: clrj %r0, %r0, 7, 0x882
+0xec 0x00 0x00 0x00 0x70 0x77
+
+# 0x00000888:
+# CHECK: clrje %r0, %r0, 0x888
+0xec 0x00 0x00 0x00 0x80 0x77
+
+# 0x0000088e:
+# CHECK: clrj %r0, %r0, 9, 0x88e
+0xec 0x00 0x00 0x00 0x90 0x77
+
+# 0x00000894:
+# CHECK: clrjhe %r0, %r0, 0x894
+0xec 0x00 0x00 0x00 0xa0 0x77
+
+# 0x0000089a:
+# CHECK: clrj %r0, %r0, 11, 0x89a
+0xec 0x00 0x00 0x00 0xb0 0x77
+
+# 0x000008a0:
+# CHECK: clrjle %r0, %r0, 0x8a0
+0xec 0x00 0x00 0x00 0xc0 0x77
+
+# 0x000008a6:
+# CHECK: clrj %r0, %r0, 13, 0x8a6
+0xec 0x00 0x00 0x00 0xd0 0x77
+
+# 0x000008ac:
+# CHECK: clrj %r0, %r0, 14, 0x8ac
+0xec 0x00 0x00 0x00 0xe0 0x77
+
+# 0x000008b2:
+# CHECK: clrj %r0, %r0, 15, 0x8b2
+0xec 0x00 0x00 0x00 0xf0 0x77
+
+# 0x000008b8:
+# CHECK: clgij %r0, 0, 0, 0x8b8
+0xec 0x00 0x00 0x00 0x00 0x7d
+
+# 0x000008be:
+# CHECK: clgij %r0, 127, 0, 0x8be
+0xec 0x00 0x00 0x00 0x7f 0x7d
+
+# 0x000008c4:
+# CHECK: clgij %r0, 128, 0, 0x8c4
+0xec 0x00 0x00 0x00 0x80 0x7d
+
+# 0x000008ca:
+# CHECK: clgij %r0, 255, 0, 0x8ca
+0xec 0x00 0x00 0x00 0xff 0x7d
+
+# 0x000008d0:
+# CHECK: clgij %r15, 0, 0, 0x8d0
+0xec 0xf0 0x00 0x00 0x00 0x7d
+
+# 0x000008d6:
+# CHECK: clgij %r7, 100, 0, 0x8d6
+0xec 0x70 0x00 0x00 0x64 0x7d
+
+# 0x000008dc:
+# CHECK: clgij %r0, 0, 0, 0x8da
+0xec 0x00 0xff 0xff 0x00 0x7d
+
+# 0x000008e2:
+# CHECK: clgij %r0, 0, 0, 0xffffffffffff08e2
+0xec 0x00 0x80 0x00 0x00 0x7d
+
+# 0x000008e8:
+# CHECK: clgij %r0, 0, 0, 0x108e6
+0xec 0x00 0x7f 0xff 0x00 0x7d
+
+# 0x000008ee:
+# CHECK: clgij %r0, 0, 1, 0x8ee
+0xec 0x01 0x00 0x00 0x00 0x7d
+
+# 0x000008f4:
+# CHECK: clgijh %r0, 0, 0x8f4
+0xec 0x02 0x00 0x00 0x00 0x7d
+
+# 0x000008fa:
+# CHECK: clgij %r0, 0, 3, 0x8fa
+0xec 0x03 0x00 0x00 0x00 0x7d
+
+# 0x00000900:
+# CHECK: clgijl %r0, 0, 0x900
+0xec 0x04 0x00 0x00 0x00 0x7d
+
+# 0x00000906:
+# CHECK: clgij %r0, 0, 5, 0x906
+0xec 0x05 0x00 0x00 0x00 0x7d
+
+# 0x0000090c:
+# CHECK: clgijlh %r0, 0, 0x90c
+0xec 0x06 0x00 0x00 0x00 0x7d
+
+# 0x00000912:
+# CHECK: clgij %r0, 0, 7, 0x912
+0xec 0x07 0x00 0x00 0x00 0x7d
+
+# 0x00000918:
+# CHECK: clgije %r0, 0, 0x918
+0xec 0x08 0x00 0x00 0x00 0x7d
+
+# 0x0000091e:
+# CHECK: clgij %r0, 0, 9, 0x91e
+0xec 0x09 0x00 0x00 0x00 0x7d
+
+# 0x00000924:
+# CHECK: clgijhe %r0, 0, 0x924
+0xec 0x0a 0x00 0x00 0x00 0x7d
+
+# 0x0000092a:
+# CHECK: clgij %r0, 0, 11, 0x92a
+0xec 0x0b 0x00 0x00 0x00 0x7d
+
+# 0x00000930:
+# CHECK: clgijle %r0, 0, 0x930
+0xec 0x0c 0x00 0x00 0x00 0x7d
+
+# 0x00000936:
+# CHECK: clgij %r0, 0, 13, 0x936
+0xec 0x0d 0x00 0x00 0x00 0x7d
+
+# 0x0000093c:
+# CHECK: clgij %r0, 0, 14, 0x93c
+0xec 0x0e 0x00 0x00 0x00 0x7d
+
+# 0x00000942:
+# CHECK: clgij %r0, 0, 15, 0x942
+0xec 0x0f 0x00 0x00 0x00 0x7d
+
+# 0x00000948:
+# CHECK: clij %r0, 0, 0, 0x948
+0xec 0x00 0x00 0x00 0x00 0x7f
+
+# 0x0000094e:
+# CHECK: clij %r0, 127, 0, 0x94e
+0xec 0x00 0x00 0x00 0x7f 0x7f
+
+# 0x00000954:
+# CHECK: clij %r0, 128, 0, 0x954
+0xec 0x00 0x00 0x00 0x80 0x7f
+
+# 0x0000095a:
+# CHECK: clij %r0, 255, 0, 0x95a
+0xec 0x00 0x00 0x00 0xff 0x7f
+
+# 0x00000960:
+# CHECK: clij %r15, 0, 0, 0x960
+0xec 0xf0 0x00 0x00 0x00 0x7f
+
+# 0x00000966:
+# CHECK: clij %r7, 100, 0, 0x966
+0xec 0x70 0x00 0x00 0x64 0x7f
+
+# 0x0000096c:
+# CHECK: clij %r0, 0, 0, 0x96a
+0xec 0x00 0xff 0xff 0x00 0x7f
+
+# 0x00000972:
+# CHECK: clij %r0, 0, 0, 0xffffffffffff0972
+0xec 0x00 0x80 0x00 0x00 0x7f
+
+# 0x00000978:
+# CHECK: clij %r0, 0, 0, 0x10976
+0xec 0x00 0x7f 0xff 0x00 0x7f
+
+# 0x0000097e:
+# CHECK: clij %r0, 0, 1, 0x97e
+0xec 0x01 0x00 0x00 0x00 0x7f
+
+# 0x00000984:
+# CHECK: clijh %r0, 0, 0x984
+0xec 0x02 0x00 0x00 0x00 0x7f
+
+# 0x0000098a:
+# CHECK: clij %r0, 0, 3, 0x98a
+0xec 0x03 0x00 0x00 0x00 0x7f
+
+# 0x00000990:
+# CHECK: clijl %r0, 0, 0x990
+0xec 0x04 0x00 0x00 0x00 0x7f
+
+# 0x00000996:
+# CHECK: clij %r0, 0, 5, 0x996
+0xec 0x05 0x00 0x00 0x00 0x7f
+
+# 0x0000099c:
+# CHECK: clijlh %r0, 0, 0x99c
+0xec 0x06 0x00 0x00 0x00 0x7f
+
+# 0x000009a2:
+# CHECK: clij %r0, 0, 7, 0x9a2
+0xec 0x07 0x00 0x00 0x00 0x7f
+
+# 0x000009a8:
+# CHECK: clije %r0, 0, 0x9a8
+0xec 0x08 0x00 0x00 0x00 0x7f
+
+# 0x000009ae:
+# CHECK: clij %r0, 0, 9, 0x9ae
+0xec 0x09 0x00 0x00 0x00 0x7f
+
+# 0x000009b4:
+# CHECK: clijhe %r0, 0, 0x9b4
+0xec 0x0a 0x00 0x00 0x00 0x7f
+
+# 0x000009ba:
+# CHECK: clij %r0, 0, 11, 0x9ba
+0xec 0x0b 0x00 0x00 0x00 0x7f
+
+# 0x000009c0:
+# CHECK: clijle %r0, 0, 0x9c0
+0xec 0x0c 0x00 0x00 0x00 0x7f
+
+# 0x000009c6:
+# CHECK: clij %r0, 0, 13, 0x9c6
+0xec 0x0d 0x00 0x00 0x00 0x7f
+
+# 0x000009cc:
+# CHECK: clij %r0, 0, 14, 0x9cc
+0xec 0x0e 0x00 0x00 0x00 0x7f
+
+# 0x000009d2:
+# CHECK: clij %r0, 0, 15, 0x9d2
+0xec 0x0f 0x00 0x00 0x00 0x7f
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
new file mode 100644
index 000000000000..78d348d7c194
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -0,0 +1,7751 @@
+# Test instructions that don't have PC-relative operands.
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
+
+# CHECK: adbr %f0, %f0
+0xb3 0x1a 0x00 0x00
+
+# CHECK: adbr %f0, %f15
+0xb3 0x1a 0x00 0x0f
+
+# CHECK: adbr %f7, %f8
+0xb3 0x1a 0x00 0x78
+
+# CHECK: adbr %f15, %f0
+0xb3 0x1a 0x00 0xf0
+
+# CHECK: adb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x1a
+
+# CHECK: adb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x1a
+
+# CHECK: adb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x1a
+
+# CHECK: adb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x1a
+
+# CHECK: adb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x1a
+
+# CHECK: adb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x1a
+
+# CHECK: adb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x1a
+
+# CHECK: aebr %f0, %f0
+0xb3 0x0a 0x00 0x00
+
+# CHECK: aebr %f0, %f15
+0xb3 0x0a 0x00 0x0f
+
+# CHECK: aebr %f7, %f8
+0xb3 0x0a 0x00 0x78
+
+# CHECK: aebr %f15, %f0
+0xb3 0x0a 0x00 0xf0
+
+# CHECK: aeb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x0a
+
+# CHECK: aeb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x0a
+
+# CHECK: aeb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x0a
+
+# CHECK: aeb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x0a
+
+# CHECK: aeb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x0a
+
+# CHECK: aeb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x0a
+
+# CHECK: aeb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x0a
+
+# CHECK: afi %r0, -2147483648
+0xc2 0x09 0x80 0x00 0x00 0x00
+
+# CHECK: afi %r0, -1
+0xc2 0x09 0xff 0xff 0xff 0xff
+
+# CHECK: afi %r0, 0
+0xc2 0x09 0x00 0x00 0x00 0x00
+
+# CHECK: afi %r0, 1
+0xc2 0x09 0x00 0x00 0x00 0x01
+
+# CHECK: afi %r0, 2147483647
+0xc2 0x09 0x7f 0xff 0xff 0xff
+
+# CHECK: afi %r15, 0
+0xc2 0xf9 0x00 0x00 0x00 0x00
+
+# CHECK: agfi %r0, -2147483648
+0xc2 0x08 0x80 0x00 0x00 0x00
+
+# CHECK: agfi %r0, -1
+0xc2 0x08 0xff 0xff 0xff 0xff
+
+# CHECK: agfi %r0, 0
+0xc2 0x08 0x00 0x00 0x00 0x00
+
+# CHECK: agfi %r0, 1
+0xc2 0x08 0x00 0x00 0x00 0x01
+
+# CHECK: agfi %r0, 2147483647
+0xc2 0x08 0x7f 0xff 0xff 0xff
+
+# CHECK: agfi %r15, 0
+0xc2 0xf8 0x00 0x00 0x00 0x00
+
+# CHECK: agfr %r0, %r0
+0xb9 0x18 0x00 0x00
+
+# CHECK: agfr %r0, %r15
+0xb9 0x18 0x00 0x0f
+
+# CHECK: agfr %r15, %r0
+0xb9 0x18 0x00 0xf0
+
+# CHECK: agfr %r7, %r8
+0xb9 0x18 0x00 0x78
+
+# CHECK: agf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x18
+
+# CHECK: agf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x18
+
+# CHECK: agf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x18
+
+# CHECK: agf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x18
+
+# CHECK: agf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x18
+
+# CHECK: agf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x18
+
+# CHECK: agf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x18
+
+# CHECK: agf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x18
+
+# CHECK: agf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x18
+
+# CHECK: agf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x18
+
+# CHECK: aghi %r0, -32768
+0xa7 0x0b 0x80 0x00
+
+# CHECK: aghi %r0, -1
+0xa7 0x0b 0xff 0xff
+
+# CHECK: aghi %r0, 0
+0xa7 0x0b 0x00 0x00
+
+# CHECK: aghi %r0, 1
+0xa7 0x0b 0x00 0x01
+
+# CHECK: aghi %r0, 32767
+0xa7 0x0b 0x7f 0xff
+
+# CHECK: aghi %r15, 0
+0xa7 0xfb 0x00 0x00
+
+# CHECK: aghik %r0, %r1, -32768
+0xec 0x01 0x80 0x00 0x00 0xd9
+
+# CHECK: aghik %r2, %r3, -1
+0xec 0x23 0xff 0xff 0x00 0xd9
+
+# CHECK: aghik %r4, %r5, 0
+0xec 0x45 0x00 0x00 0x00 0xd9
+
+# CHECK: aghik %r6, %r7, 1
+0xec 0x67 0x00 0x01 0x00 0xd9
+
+# CHECK: aghik %r8, %r15, 32767
+0xec 0x8f 0x7f 0xff 0x00 0xd9
+
+# CHECK: agr %r0, %r0
+0xb9 0x08 0x00 0x00
+
+# CHECK: agr %r0, %r15
+0xb9 0x08 0x00 0x0f
+
+# CHECK: agr %r15, %r0
+0xb9 0x08 0x00 0xf0
+
+# CHECK: agr %r7, %r8
+0xb9 0x08 0x00 0x78
+
+# CHECK: agrk %r0, %r0, %r0
+0xb9 0xe8 0x00 0x00
+
+# CHECK: agrk %r2, %r3, %r4
+0xb9 0xe8 0x40 0x23
+
+# CHECK: agsi -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x7a
+
+# CHECK: agsi -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x7a
+
+# CHECK: agsi 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x7a
+
+# CHECK: agsi 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x7a
+
+# CHECK: agsi 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x7a
+
+# CHECK: agsi 0, -128
+0xeb 0x80 0x00 0x00 0x00 0x7a
+
+# CHECK: agsi 0, -1
+0xeb 0xff 0x00 0x00 0x00 0x7a
+
+# CHECK: agsi 0, 1
+0xeb 0x01 0x00 0x00 0x00 0x7a
+
+# CHECK: agsi 0, 127
+0xeb 0x7f 0x00 0x00 0x00 0x7a
+
+# CHECK: agsi 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x7a
+
+# CHECK: agsi 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x7a
+
+# CHECK: agsi 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x7a
+
+# CHECK: agsi 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x7a
+
+# CHECK: ag %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x08
+
+# CHECK: ag %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x08
+
+# CHECK: ag %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x08
+
+# CHECK: ag %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x08
+
+# CHECK: ag %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x08
+
+# CHECK: ag %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x08
+
+# CHECK: ag %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x08
+
+# CHECK: ag %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x08
+
+# CHECK: ag %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x08
+
+# CHECK: ag %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x08
+
+# CHECK: ahi %r0, -32768
+0xa7 0x0a 0x80 0x00
+
+# CHECK: ahi %r0, -1
+0xa7 0x0a 0xff 0xff
+
+# CHECK: ahi %r0, 0
+0xa7 0x0a 0x00 0x00
+
+# CHECK: ahi %r0, 1
+0xa7 0x0a 0x00 0x01
+
+# CHECK: ahi %r0, 32767
+0xa7 0x0a 0x7f 0xff
+
+# CHECK: ahi %r15, 0
+0xa7 0xfa 0x00 0x00
+
+# CHECK: ahik %r0, %r1, -32768
+0xec 0x01 0x80 0x00 0x00 0xd8
+
+# CHECK: ahik %r2, %r3, -1
+0xec 0x23 0xff 0xff 0x00 0xd8
+
+# CHECK: ahik %r4, %r5, 0
+0xec 0x45 0x00 0x00 0x00 0xd8
+
+# CHECK: ahik %r6, %r7, 1
+0xec 0x67 0x00 0x01 0x00 0xd8
+
+# CHECK: ahik %r8, %r15, 32767
+0xec 0x8f 0x7f 0xff 0x00 0xd8
+
+# CHECK: ah %r0, 0
+0x4a 0x00 0x00 0x00
+
+# CHECK: ah %r0, 4095
+0x4a 0x00 0x0f 0xff
+
+# CHECK: ah %r0, 0(%r1)
+0x4a 0x00 0x10 0x00
+
+# CHECK: ah %r0, 0(%r15)
+0x4a 0x00 0xf0 0x00
+
+# CHECK: ah %r0, 4095(%r1,%r15)
+0x4a 0x01 0xff 0xff
+
+# CHECK: ah %r0, 4095(%r15,%r1)
+0x4a 0x0f 0x1f 0xff
+
+# CHECK: ah %r15, 0
+0x4a 0xf0 0x00 0x00
+
+# CHECK: ahy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x7a
+
+# CHECK: ahy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x7a
+
+# CHECK: ahy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x7a
+
+# CHECK: ahy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x7a
+
+# CHECK: ahy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x7a
+
+# CHECK: ahy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x7a
+
+# CHECK: ahy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x7a
+
+# CHECK: ahy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x7a
+
+# CHECK: ahy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x7a
+
+# CHECK: ahy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x7a
+
+# CHECK: aih %r0, -2147483648
+0xcc 0x08 0x80 0x00 0x00 0x00
+
+# CHECK: aih %r0, -1
+0xcc 0x08 0xff 0xff 0xff 0xff
+
+# CHECK: aih %r0, 0
+0xcc 0x08 0x00 0x00 0x00 0x00
+
+# CHECK: aih %r0, 1
+0xcc 0x08 0x00 0x00 0x00 0x01
+
+# CHECK: aih %r0, 2147483647
+0xcc 0x08 0x7f 0xff 0xff 0xff
+
+# CHECK: aih %r15, 0
+0xcc 0xf8 0x00 0x00 0x00 0x00
+
+# CHECK: alcgr %r0, %r0
+0xb9 0x88 0x00 0x00
+
+# CHECK: alcgr %r0, %r15
+0xb9 0x88 0x00 0x0f
+
+# CHECK: alcgr %r15, %r0
+0xb9 0x88 0x00 0xf0
+
+# CHECK: alcgr %r7, %r8
+0xb9 0x88 0x00 0x78
+
+# CHECK: alcg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x88
+
+# CHECK: alcg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x88
+
+# CHECK: alcg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x88
+
+# CHECK: alcg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x88
+
+# CHECK: alcg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x88
+
+# CHECK: alcg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x88
+
+# CHECK: alcg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x88
+
+# CHECK: alcg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x88
+
+# CHECK: alcg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x88
+
+# CHECK: alcg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x88
+
+# CHECK: alcr %r0, %r0
+0xb9 0x98 0x00 0x00
+
+# CHECK: alcr %r0, %r15
+0xb9 0x98 0x00 0x0f
+
+# CHECK: alcr %r15, %r0
+0xb9 0x98 0x00 0xf0
+
+# CHECK: alcr %r7, %r8
+0xb9 0x98 0x00 0x78
+
+# CHECK: alc %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x98
+
+# CHECK: alc %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x98
+
+# CHECK: alc %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x98
+
+# CHECK: alc %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x98
+
+# CHECK: alc %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x98
+
+# CHECK: alc %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x98
+
+# CHECK: alc %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x98
+
+# CHECK: alc %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x98
+
+# CHECK: alc %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x98
+
+# CHECK: alc %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x98
+
+# CHECK: alfi %r0, 0
+0xc2 0x0b 0x00 0x00 0x00 0x00
+
+# CHECK: alfi %r0, 4294967295
+0xc2 0x0b 0xff 0xff 0xff 0xff
+
+# CHECK: alfi %r15, 0
+0xc2 0xfb 0x00 0x00 0x00 0x00
+
+# CHECK: algfi %r0, 0
+0xc2 0x0a 0x00 0x00 0x00 0x00
+
+# CHECK: algfi %r0, 4294967295
+0xc2 0x0a 0xff 0xff 0xff 0xff
+
+# CHECK: algfi %r15, 0
+0xc2 0xfa 0x00 0x00 0x00 0x00
+
+# CHECK: algfr %r0, %r0
+0xb9 0x1a 0x00 0x00
+
+# CHECK: algfr %r0, %r15
+0xb9 0x1a 0x00 0x0f
+
+# CHECK: algfr %r15, %r0
+0xb9 0x1a 0x00 0xf0
+
+# CHECK: algfr %r7, %r8
+0xb9 0x1a 0x00 0x78
+
+# CHECK: algf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x1a
+
+# CHECK: algf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x1a
+
+# CHECK: algf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x1a
+
+# CHECK: algf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x1a
+
+# CHECK: algf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x1a
+
+# CHECK: algf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x1a
+
+# CHECK: algf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x1a
+
+# CHECK: algf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x1a
+
+# CHECK: algf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x1a
+
+# CHECK: algf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x1a
+
+# CHECK: algr %r0, %r0
+0xb9 0x0a 0x00 0x00
+
+# CHECK: algr %r0, %r15
+0xb9 0x0a 0x00 0x0f
+
+# CHECK: algr %r15, %r0
+0xb9 0x0a 0x00 0xf0
+
+# CHECK: algr %r7, %r8
+0xb9 0x0a 0x00 0x78
+
+# CHECK: algrk %r0, %r0, %r0
+0xb9 0xea 0x00 0x00
+
+# CHECK: algrk %r2, %r3, %r4
+0xb9 0xea 0x40 0x23
+
+# CHECK: alg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x0a
+
+# CHECK: alg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x0a
+
+# CHECK: alg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x0a
+
+# CHECK: alg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x0a
+
+# CHECK: alg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x0a
+
+# CHECK: alg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x0a
+
+# CHECK: alg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x0a
+
+# CHECK: alg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x0a
+
+# CHECK: alg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x0a
+
+# CHECK: alg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x0a
+
+# CHECK: alghsik %r0, %r1, -32768
+0xec 0x01 0x80 0x00 0x00 0xdb
+
+# CHECK: alghsik %r2, %r3, -1
+0xec 0x23 0xff 0xff 0x00 0xdb
+
+# CHECK: alghsik %r4, %r5, 0
+0xec 0x45 0x00 0x00 0x00 0xdb
+
+# CHECK: alghsik %r6, %r7, 1
+0xec 0x67 0x00 0x01 0x00 0xdb
+
+# CHECK: alghsik %r8, %r15, 32767
+0xec 0x8f 0x7f 0xff 0x00 0xdb
+
+# CHECK: alhsik %r0, %r1, -32768
+0xec 0x01 0x80 0x00 0x00 0xda
+
+# CHECK: alhsik %r2, %r3, -1
+0xec 0x23 0xff 0xff 0x00 0xda
+
+# CHECK: alhsik %r4, %r5, 0
+0xec 0x45 0x00 0x00 0x00 0xda
+
+# CHECK: alhsik %r6, %r7, 1
+0xec 0x67 0x00 0x01 0x00 0xda
+
+# CHECK: alhsik %r8, %r15, 32767
+0xec 0x8f 0x7f 0xff 0x00 0xda
+
+# CHECK: alr %r0, %r0
+0x1e 0x00
+
+# CHECK: alr %r0, %r15
+0x1e 0x0f
+
+# CHECK: alr %r15, %r0
+0x1e 0xf0
+
+# CHECK: alr %r7, %r8
+0x1e 0x78
+
+# CHECK: alrk %r0, %r0, %r0
+0xb9 0xfa 0x00 0x00
+
+# CHECK: alrk %r2, %r3, %r4
+0xb9 0xfa 0x40 0x23
+
+# CHECK: al %r0, 0
+0x5e 0x00 0x00 0x00
+
+# CHECK: al %r0, 4095
+0x5e 0x00 0x0f 0xff
+
+# CHECK: al %r0, 0(%r1)
+0x5e 0x00 0x10 0x00
+
+# CHECK: al %r0, 0(%r15)
+0x5e 0x00 0xf0 0x00
+
+# CHECK: al %r0, 4095(%r1,%r15)
+0x5e 0x01 0xff 0xff
+
+# CHECK: al %r0, 4095(%r15,%r1)
+0x5e 0x0f 0x1f 0xff
+
+# CHECK: al %r15, 0
+0x5e 0xf0 0x00 0x00
+
+# CHECK: aly %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x5e
+
+# CHECK: aly %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x5e
+
+# CHECK: aly %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x5e
+
+# CHECK: aly %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x5e
+
+# CHECK: aly %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x5e
+
+# CHECK: aly %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x5e
+
+# CHECK: aly %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x5e
+
+# CHECK: aly %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x5e
+
+# CHECK: aly %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x5e
+
+# CHECK: aly %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x5e
+
+# CHECK: ar %r0, %r0
+0x1a 0x00
+
+# CHECK: ar %r0, %r15
+0x1a 0x0f
+
+# CHECK: ar %r15, %r0
+0x1a 0xf0
+
+# CHECK: ar %r7, %r8
+0x1a 0x78
+
+# CHECK: ark %r0, %r0, %r0
+0xb9 0xf8 0x00 0x00
+
+# CHECK: ark %r2, %r3, %r4
+0xb9 0xf8 0x40 0x23
+
+# CHECK: asi -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x6a
+
+# CHECK: asi -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x6a
+
+# CHECK: asi 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x6a
+
+# CHECK: asi 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x6a
+
+# CHECK: asi 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x6a
+
+# CHECK: asi 0, -128
+0xeb 0x80 0x00 0x00 0x00 0x6a
+
+# CHECK: asi 0, -1
+0xeb 0xff 0x00 0x00 0x00 0x6a
+
+# CHECK: asi 0, 1
+0xeb 0x01 0x00 0x00 0x00 0x6a
+
+# CHECK: asi 0, 127
+0xeb 0x7f 0x00 0x00 0x00 0x6a
+
+# CHECK: asi 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x6a
+
+# CHECK: asi 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x6a
+
+# CHECK: asi 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x6a
+
+# CHECK: asi 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x6a
+
+# CHECK: a %r0, 0
+0x5a 0x00 0x00 0x00
+
+# CHECK: a %r0, 4095
+0x5a 0x00 0x0f 0xff
+
+# CHECK: a %r0, 0(%r1)
+0x5a 0x00 0x10 0x00
+
+# CHECK: a %r0, 0(%r15)
+0x5a 0x00 0xf0 0x00
+
+# CHECK: a %r0, 4095(%r1,%r15)
+0x5a 0x01 0xff 0xff
+
+# CHECK: a %r0, 4095(%r15,%r1)
+0x5a 0x0f 0x1f 0xff
+
+# CHECK: a %r15, 0
+0x5a 0xf0 0x00 0x00
+
+# CHECK: axbr %f0, %f0
+0xb3 0x4a 0x00 0x00
+
+# CHECK: axbr %f0, %f13
+0xb3 0x4a 0x00 0x0d
+
+# CHECK: axbr %f8, %f8
+0xb3 0x4a 0x00 0x88
+
+# CHECK: axbr %f13, %f0
+0xb3 0x4a 0x00 0xd0
+
+# CHECK: ay %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x5a
+
+# CHECK: ay %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x5a
+
+# CHECK: ay %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x5a
+
+# CHECK: ay %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x5a
+
+# CHECK: ay %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x5a
+
+# CHECK: ay %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x5a
+
+# CHECK: ay %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x5a
+
+# CHECK: ay %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x5a
+
+# CHECK: ay %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x5a
+
+# CHECK: ay %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x5a
+
+# CHECK: basr %r0, %r1
+0x0d 0x01
+
+# CHECK: basr %r0, %r15
+0x0d 0x0f
+
+# CHECK: basr %r14, %r9
+0x0d 0xe9
+
+# CHECK: basr %r15, %r1
+0x0d 0xf1
+
+# CHECK: bcr 0, %r14
+0x07 0x0e
+
+# CHECK: bor %r13
+0x07 0x1d
+
+# CHECK: bhr %r12
+0x07 0x2c
+
+# CHECK: bnler %r11
+0x07 0x3b
+
+# CHECK: blr %r10
+0x07 0x4a
+
+# CHECK: bnher %r9
+0x07 0x59
+
+# CHECK: blhr %r8
+0x07 0x68
+
+# CHECK: bner %r7
+0x07 0x77
+
+# CHECK: ber %r6
+0x07 0x86
+
+# CHECK: bnlhr %r5
+0x07 0x95
+
+# CHECK: bher %r4
+0x07 0xa4
+
+# CHECK: bnlr %r3
+0x07 0xb3
+
+# CHECK: bler %r2
+0x07 0xc2
+
+# CHECK: bnhr %r1
+0x07 0xd1
+
+# CHECK: bnor %r0
+0x07 0xe0
+
+# CHECK: br %r1
+0x07 0xf1
+
+# CHECK: br %r14
+0x07 0xfe
+
+# CHECK: br %r15
+0x07 0xff
+
+# CHECK: cdbr %f0, %f0
+0xb3 0x19 0x00 0x00
+
+# CHECK: cdbr %f0, %f15
+0xb3 0x19 0x00 0x0f
+
+# CHECK: cdbr %f7, %f8
+0xb3 0x19 0x00 0x78
+
+# CHECK: cdbr %f15, %f0
+0xb3 0x19 0x00 0xf0
+
+# CHECK: cdb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x19
+
+# CHECK: cdb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x19
+
+# CHECK: cdb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x19
+
+# CHECK: cdb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x19
+
+# CHECK: cdb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x19
+
+# CHECK: cdb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x19
+
+# CHECK: cdb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x19
+
+# CHECK: cdfbr %f0, %r0
+0xb3 0x95 0x00 0x00
+
+# CHECK: cdfbr %f0, %r15
+0xb3 0x95 0x00 0x0f
+
+# CHECK: cdfbr %f15, %r0
+0xb3 0x95 0x00 0xf0
+
+# CHECK: cdfbr %f7, %r8
+0xb3 0x95 0x00 0x78
+
+# CHECK: cdfbr %f15, %r15
+0xb3 0x95 0x00 0xff
+
+# CHECK: cdgbr %f0, %r0
+0xb3 0xa5 0x00 0x00
+
+# CHECK: cdgbr %f0, %r15
+0xb3 0xa5 0x00 0x0f
+
+# CHECK: cdgbr %f15, %r0
+0xb3 0xa5 0x00 0xf0
+
+# CHECK: cdgbr %f7, %r8
+0xb3 0xa5 0x00 0x78
+
+# CHECK: cdgbr %f15, %r15
+0xb3 0xa5 0x00 0xff
+
+# CHECK: cebr %f0, %f0
+0xb3 0x09 0x00 0x00
+
+# CHECK: cebr %f0, %f15
+0xb3 0x09 0x00 0x0f
+
+# CHECK: cebr %f7, %f8
+0xb3 0x09 0x00 0x78
+
+# CHECK: cebr %f15, %f0
+0xb3 0x09 0x00 0xf0
+
+# CHECK: ceb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x09
+
+# CHECK: ceb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x09
+
+# CHECK: ceb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x09
+
+# CHECK: ceb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x09
+
+# CHECK: ceb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x09
+
+# CHECK: ceb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x09
+
+# CHECK: ceb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x09
+
+# CHECK: cefbr %f0, %r0
+0xb3 0x94 0x00 0x00
+
+# CHECK: cefbr %f0, %r15
+0xb3 0x94 0x00 0x0f
+
+# CHECK: cefbr %f15, %r0
+0xb3 0x94 0x00 0xf0
+
+# CHECK: cefbr %f7, %r8
+0xb3 0x94 0x00 0x78
+
+# CHECK: cefbr %f15, %r15
+0xb3 0x94 0x00 0xff
+
+# CHECK: cegbr %f0, %r0
+0xb3 0xa4 0x00 0x00
+
+# CHECK: cegbr %f0, %r15
+0xb3 0xa4 0x00 0x0f
+
+# CHECK: cegbr %f15, %r0
+0xb3 0xa4 0x00 0xf0
+
+# CHECK: cegbr %f7, %r8
+0xb3 0xa4 0x00 0x78
+
+# CHECK: cegbr %f15, %r15
+0xb3 0xa4 0x00 0xff
+
+# CHECK: cfdbr %r0, 0, %f0
+0xb3 0x99 0x00 0x00
+
+# CHECK: cfdbr %r0, 0, %f15
+0xb3 0x99 0x00 0x0f
+
+# CHECK: cfdbr %r0, 15, %f0
+0xb3 0x99 0xf0 0x00
+
+# CHECK: cfdbr %r4, 5, %f6
+0xb3 0x99 0x50 0x46
+
+# CHECK: cfdbr %r15, 0, %f0
+0xb3 0x99 0x00 0xf0
+
+# CHECK: cfebr %r0, 0, %f0
+0xb3 0x98 0x00 0x00
+
+# CHECK: cfebr %r0, 0, %f15
+0xb3 0x98 0x00 0x0f
+
+# CHECK: cfebr %r0, 15, %f0
+0xb3 0x98 0xf0 0x00
+
+# CHECK: cfebr %r4, 5, %f6
+0xb3 0x98 0x50 0x46
+
+# CHECK: cfebr %r15, 0, %f0
+0xb3 0x98 0x00 0xf0
+
+# CHECK: cfi %r0, -2147483648
+0xc2 0x0d 0x80 0x00 0x00 0x00
+
+# CHECK: cfi %r0, -1
+0xc2 0x0d 0xff 0xff 0xff 0xff
+
+# CHECK: cfi %r0, 0
+0xc2 0x0d 0x00 0x00 0x00 0x00
+
+# CHECK: cfi %r0, 1
+0xc2 0x0d 0x00 0x00 0x00 0x01
+
+# CHECK: cfi %r0, 2147483647
+0xc2 0x0d 0x7f 0xff 0xff 0xff
+
+# CHECK: cfi %r15, 0
+0xc2 0xfd 0x00 0x00 0x00 0x00
+
+# CHECK: cfxbr %r0, 0, %f0
+0xb3 0x9a 0x00 0x00
+
+# CHECK: cfxbr %r0, 0, %f13
+0xb3 0x9a 0x00 0x0d
+
+# CHECK: cfxbr %r0, 15, %f0
+0xb3 0x9a 0xf0 0x00
+
+# CHECK: cfxbr %r4, 5, %f8
+0xb3 0x9a 0x50 0x48
+
+# CHECK: cfxbr %r15, 0, %f0
+0xb3 0x9a 0x00 0xf0
+
+# CHECK: cgdbr %r0, 0, %f0
+0xb3 0xa9 0x00 0x00
+
+# CHECK: cgdbr %r0, 0, %f15
+0xb3 0xa9 0x00 0x0f
+
+# CHECK: cgdbr %r0, 15, %f0
+0xb3 0xa9 0xf0 0x00
+
+# CHECK: cgdbr %r4, 5, %f6
+0xb3 0xa9 0x50 0x46
+
+# CHECK: cgdbr %r15, 0, %f0
+0xb3 0xa9 0x00 0xf0
+
+# CHECK: cgebr %r0, 0, %f0
+0xb3 0xa8 0x00 0x00
+
+# CHECK: cgebr %r0, 0, %f15
+0xb3 0xa8 0x00 0x0f
+
+# CHECK: cgebr %r0, 15, %f0
+0xb3 0xa8 0xf0 0x00
+
+# CHECK: cgebr %r4, 5, %f6
+0xb3 0xa8 0x50 0x46
+
+# CHECK: cgebr %r15, 0, %f0
+0xb3 0xa8 0x00 0xf0
+
+# CHECK: cgfi %r0, -2147483648
+0xc2 0x0c 0x80 0x00 0x00 0x00
+
+# CHECK: cgfi %r0, -1
+0xc2 0x0c 0xff 0xff 0xff 0xff
+
+# CHECK: cgfi %r0, 0
+0xc2 0x0c 0x00 0x00 0x00 0x00
+
+# CHECK: cgfi %r0, 1
+0xc2 0x0c 0x00 0x00 0x00 0x01
+
+# CHECK: cgfi %r0, 2147483647
+0xc2 0x0c 0x7f 0xff 0xff 0xff
+
+# CHECK: cgfi %r15, 0
+0xc2 0xfc 0x00 0x00 0x00 0x00
+
+# CHECK: cgfr %r0, %r0
+0xb9 0x30 0x00 0x00
+
+# CHECK: cgfr %r0, %r15
+0xb9 0x30 0x00 0x0f
+
+# CHECK: cgfr %r15, %r0
+0xb9 0x30 0x00 0xf0
+
+# CHECK: cgfr %r7, %r8
+0xb9 0x30 0x00 0x78
+
+# CHECK: cgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x30
+
+# CHECK: cgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x30
+
+# CHECK: cgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x30
+
+# CHECK: cgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x30
+
+# CHECK: cgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x30
+
+# CHECK: cgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x30
+
+# CHECK: cgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x30
+
+# CHECK: cgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x30
+
+# CHECK: cgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x30
+
+# CHECK: cgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x30
+
+# CHECK: cghi %r0, -32768
+0xa7 0x0f 0x80 0x00
+
+# CHECK: cghi %r0, -1
+0xa7 0x0f 0xff 0xff
+
+# CHECK: cghi %r0, 0
+0xa7 0x0f 0x00 0x00
+
+# CHECK: cghi %r0, 1
+0xa7 0x0f 0x00 0x01
+
+# CHECK: cghi %r0, 32767
+0xa7 0x0f 0x7f 0xff
+
+# CHECK: cghi %r15, 0
+0xa7 0xff 0x00 0x00
+
+# CHECK: cghsi 0, 0
+0xe5 0x58 0x00 0x00 0x00 0x00
+
+# CHECK: cghsi 4095, 0
+0xe5 0x58 0x0f 0xff 0x00 0x00
+
+# CHECK: cghsi 0, -32768
+0xe5 0x58 0x00 0x00 0x80 0x00
+
+# CHECK: cghsi 0, -1
+0xe5 0x58 0x00 0x00 0xff 0xff
+
+# CHECK: cghsi 0, 0
+0xe5 0x58 0x00 0x00 0x00 0x00
+
+# CHECK: cghsi 0, 1
+0xe5 0x58 0x00 0x00 0x00 0x01
+
+# CHECK: cghsi 0, 32767
+0xe5 0x58 0x00 0x00 0x7f 0xff
+
+# CHECK: cghsi 0(%r1), 42
+0xe5 0x58 0x10 0x00 0x00 0x2a
+
+# CHECK: cghsi 0(%r15), 42
+0xe5 0x58 0xf0 0x00 0x00 0x2a
+
+# CHECK: cghsi 4095(%r1), 42
+0xe5 0x58 0x1f 0xff 0x00 0x2a
+
+# CHECK: cghsi 4095(%r15), 42
+0xe5 0x58 0xff 0xff 0x00 0x2a
+
+# CHECK: cgh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x34
+
+# CHECK: cgh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x34
+
+# CHECK: cgh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x34
+
+# CHECK: cgh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x34
+
+# CHECK: cgh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x34
+
+# CHECK: cgh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x34
+
+# CHECK: cgh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x34
+
+# CHECK: cgh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x34
+
+# CHECK: cgh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x34
+
+# CHECK: cgh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x34
+
+# CHECK: cgr %r0, %r0
+0xb9 0x20 0x00 0x00
+
+# CHECK: cgr %r0, %r15
+0xb9 0x20 0x00 0x0f
+
+# CHECK: cgr %r15, %r0
+0xb9 0x20 0x00 0xf0
+
+# CHECK: cgr %r7, %r8
+0xb9 0x20 0x00 0x78
+
+# CHECK: cg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x20
+
+# CHECK: cg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x20
+
+# CHECK: cg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x20
+
+# CHECK: cg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x20
+
+# CHECK: cg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x20
+
+# CHECK: cg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x20
+
+# CHECK: cg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x20
+
+# CHECK: cg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x20
+
+# CHECK: cg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x20
+
+# CHECK: cg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x20
+
+# CHECK: cgxbr %r0, 0, %f0
+0xb3 0xaa 0x00 0x00
+
+# CHECK: cgxbr %r0, 0, %f13
+0xb3 0xaa 0x00 0x0d
+
+# CHECK: cgxbr %r0, 15, %f0
+0xb3 0xaa 0xf0 0x00
+
+# CHECK: cgxbr %r4, 5, %f8
+0xb3 0xaa 0x50 0x48
+
+# CHECK: cgxbr %r15, 0, %f0
+0xb3 0xaa 0x00 0xf0
+
+# CHECK: chf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xcd
+
+# CHECK: chf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xcd
+
+# CHECK: chf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xcd
+
+# CHECK: chf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xcd
+
+# CHECK: chf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xcd
+
+# CHECK: chf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xcd
+
+# CHECK: chf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xcd
+
+# CHECK: chf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xcd
+
+# CHECK: chf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xcd
+
+# CHECK: chf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xcd
+
+# CHECK: chhsi 0, 0
+0xe5 0x54 0x00 0x00 0x00 0x00
+
+# CHECK: chhsi 4095, 0
+0xe5 0x54 0x0f 0xff 0x00 0x00
+
+# CHECK: chhsi 0, -32768
+0xe5 0x54 0x00 0x00 0x80 0x00
+
+# CHECK: chhsi 0, -1
+0xe5 0x54 0x00 0x00 0xff 0xff
+
+# CHECK: chhsi 0, 0
+0xe5 0x54 0x00 0x00 0x00 0x00
+
+# CHECK: chhsi 0, 1
+0xe5 0x54 0x00 0x00 0x00 0x01
+
+# CHECK: chhsi 0, 32767
+0xe5 0x54 0x00 0x00 0x7f 0xff
+
+# CHECK: chhsi 0(%r1), 42
+0xe5 0x54 0x10 0x00 0x00 0x2a
+
+# CHECK: chhsi 0(%r15), 42
+0xe5 0x54 0xf0 0x00 0x00 0x2a
+
+# CHECK: chhsi 4095(%r1), 42
+0xe5 0x54 0x1f 0xff 0x00 0x2a
+
+# CHECK: chhsi 4095(%r15), 42
+0xe5 0x54 0xff 0xff 0x00 0x2a
+
+# CHECK: chi %r0, -32768
+0xa7 0x0e 0x80 0x00
+
+# CHECK: chi %r0, -1
+0xa7 0x0e 0xff 0xff
+
+# CHECK: chi %r0, 0
+0xa7 0x0e 0x00 0x00
+
+# CHECK: chi %r0, 1
+0xa7 0x0e 0x00 0x01
+
+# CHECK: chi %r0, 32767
+0xa7 0x0e 0x7f 0xff
+
+# CHECK: chi %r15, 0
+0xa7 0xfe 0x00 0x00
+
+# CHECK: chsi 0, 0
+0xe5 0x5c 0x00 0x00 0x00 0x00
+
+# CHECK: chsi 4095, 0
+0xe5 0x5c 0x0f 0xff 0x00 0x00
+
+# CHECK: chsi 0, -32768
+0xe5 0x5c 0x00 0x00 0x80 0x00
+
+# CHECK: chsi 0, -1
+0xe5 0x5c 0x00 0x00 0xff 0xff
+
+# CHECK: chsi 0, 0
+0xe5 0x5c 0x00 0x00 0x00 0x00
+
+# CHECK: chsi 0, 1
+0xe5 0x5c 0x00 0x00 0x00 0x01
+
+# CHECK: chsi 0, 32767
+0xe5 0x5c 0x00 0x00 0x7f 0xff
+
+# CHECK: chsi 0(%r1), 42
+0xe5 0x5c 0x10 0x00 0x00 0x2a
+
+# CHECK: chsi 0(%r15), 42
+0xe5 0x5c 0xf0 0x00 0x00 0x2a
+
+# CHECK: chsi 4095(%r1), 42
+0xe5 0x5c 0x1f 0xff 0x00 0x2a
+
+# CHECK: chsi 4095(%r15), 42
+0xe5 0x5c 0xff 0xff 0x00 0x2a
+
+# CHECK: ch %r0, 0
+0x49 0x00 0x00 0x00
+
+# CHECK: ch %r0, 4095
+0x49 0x00 0x0f 0xff
+
+# CHECK: ch %r0, 0(%r1)
+0x49 0x00 0x10 0x00
+
+# CHECK: ch %r0, 0(%r15)
+0x49 0x00 0xf0 0x00
+
+# CHECK: ch %r0, 4095(%r1,%r15)
+0x49 0x01 0xff 0xff
+
+# CHECK: ch %r0, 4095(%r15,%r1)
+0x49 0x0f 0x1f 0xff
+
+# CHECK: ch %r15, 0
+0x49 0xf0 0x00 0x00
+
+# CHECK: chy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x79
+
+# CHECK: chy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x79
+
+# CHECK: chy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x79
+
+# CHECK: chy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x79
+
+# CHECK: chy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x79
+
+# CHECK: chy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x79
+
+# CHECK: chy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x79
+
+# CHECK: chy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x79
+
+# CHECK: chy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x79
+
+# CHECK: chy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x79
+
+# CHECK: cih %r0, -2147483648
+0xcc 0x0d 0x80 0x00 0x00 0x00
+
+# CHECK: cih %r0, -1
+0xcc 0x0d 0xff 0xff 0xff 0xff
+
+# CHECK: cih %r0, 0
+0xcc 0x0d 0x00 0x00 0x00 0x00
+
+# CHECK: cih %r0, 1
+0xcc 0x0d 0x00 0x00 0x00 0x01
+
+# CHECK: cih %r0, 2147483647
+0xcc 0x0d 0x7f 0xff 0xff 0xff
+
+# CHECK: cih %r15, 0
+0xcc 0xfd 0x00 0x00 0x00 0x00
+
+# CHECK: clc 0(1), 0
+0xd5 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: clc 0(1), 0(%r1)
+0xd5 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: clc 0(1), 0(%r15)
+0xd5 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: clc 0(1), 4095
+0xd5 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: clc 0(1), 4095(%r1)
+0xd5 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: clc 0(1), 4095(%r15)
+0xd5 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: clc 0(1,%r1), 0
+0xd5 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: clc 0(1,%r15), 0
+0xd5 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: clc 4095(1,%r1), 0
+0xd5 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: clc 4095(1,%r15), 0
+0xd5 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: clc 0(256,%r1), 0
+0xd5 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: clc 0(256,%r15), 0
+0xd5 0xff 0xf0 0x00 0x00 0x00
+
+# CHECK: clfhsi 0, 0
+0xe5 0x5d 0x00 0x00 0x00 0x00
+
+# CHECK: clfhsi 4095, 0
+0xe5 0x5d 0x0f 0xff 0x00 0x00
+
+# CHECK: clfhsi 0, 65535
+0xe5 0x5d 0x00 0x00 0xff 0xff
+
+# CHECK: clfhsi 0(%r1), 42
+0xe5 0x5d 0x10 0x00 0x00 0x2a
+
+# CHECK: clfhsi 0(%r15), 42
+0xe5 0x5d 0xf0 0x00 0x00 0x2a
+
+# CHECK: clfhsi 4095(%r1), 42
+0xe5 0x5d 0x1f 0xff 0x00 0x2a
+
+# CHECK: clfhsi 4095(%r15), 42
+0xe5 0x5d 0xff 0xff 0x00 0x2a
+
+# CHECK: clfi %r0, 0
+0xc2 0x0f 0x00 0x00 0x00 0x00
+
+# CHECK: clfi %r0, 4294967295
+0xc2 0x0f 0xff 0xff 0xff 0xff
+
+# CHECK: clfi %r15, 0
+0xc2 0xff 0x00 0x00 0x00 0x00
+
+# CHECK: clgfi %r0, 0
+0xc2 0x0e 0x00 0x00 0x00 0x00
+
+# CHECK: clgfi %r0, 4294967295
+0xc2 0x0e 0xff 0xff 0xff 0xff
+
+# CHECK: clgfi %r15, 0
+0xc2 0xfe 0x00 0x00 0x00 0x00
+
+# CHECK: clgfr %r0, %r0
+0xb9 0x31 0x00 0x00
+
+# CHECK: clgfr %r0, %r15
+0xb9 0x31 0x00 0x0f
+
+# CHECK: clgfr %r15, %r0
+0xb9 0x31 0x00 0xf0
+
+# CHECK: clgfr %r7, %r8
+0xb9 0x31 0x00 0x78
+
+# CHECK: clgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x31
+
+# CHECK: clgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x31
+
+# CHECK: clgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x31
+
+# CHECK: clgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x31
+
+# CHECK: clgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x31
+
+# CHECK: clgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x31
+
+# CHECK: clgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x31
+
+# CHECK: clgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x31
+
+# CHECK: clgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x31
+
+# CHECK: clgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x31
+
+# CHECK: clghsi 0, 0
+0xe5 0x59 0x00 0x00 0x00 0x00
+
+# CHECK: clghsi 4095, 0
+0xe5 0x59 0x0f 0xff 0x00 0x00
+
+# CHECK: clghsi 0, 65535
+0xe5 0x59 0x00 0x00 0xff 0xff
+
+# CHECK: clghsi 0(%r1), 42
+0xe5 0x59 0x10 0x00 0x00 0x2a
+
+# CHECK: clghsi 0(%r15), 42
+0xe5 0x59 0xf0 0x00 0x00 0x2a
+
+# CHECK: clghsi 4095(%r1), 42
+0xe5 0x59 0x1f 0xff 0x00 0x2a
+
+# CHECK: clghsi 4095(%r15), 42
+0xe5 0x59 0xff 0xff 0x00 0x2a
+
+# CHECK: clgr %r0, %r0
+0xb9 0x21 0x00 0x00
+
+# CHECK: clgr %r0, %r15
+0xb9 0x21 0x00 0x0f
+
+# CHECK: clgr %r15, %r0
+0xb9 0x21 0x00 0xf0
+
+# CHECK: clgr %r7, %r8
+0xb9 0x21 0x00 0x78
+
+# CHECK: clg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x21
+
+# CHECK: clg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x21
+
+# CHECK: clg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x21
+
+# CHECK: clg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x21
+
+# CHECK: clg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x21
+
+# CHECK: clg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x21
+
+# CHECK: clg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x21
+
+# CHECK: clg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x21
+
+# CHECK: clg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x21
+
+# CHECK: clg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x21
+
+# CHECK: clhf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xcf
+
+# CHECK: clhf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xcf
+
+# CHECK: clhf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xcf
+
+# CHECK: clhf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xcf
+
+# CHECK: clhf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xcf
+
+# CHECK: clhf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xcf
+
+# CHECK: clhf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xcf
+
+# CHECK: clhf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xcf
+
+# CHECK: clhf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xcf
+
+# CHECK: clhf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xcf
+
+# CHECK: clhhsi 0, 0
+0xe5 0x55 0x00 0x00 0x00 0x00
+
+# CHECK: clhhsi 4095, 0
+0xe5 0x55 0x0f 0xff 0x00 0x00
+
+# CHECK: clhhsi 0, 65535
+0xe5 0x55 0x00 0x00 0xff 0xff
+
+# CHECK: clhhsi 0(%r1), 42
+0xe5 0x55 0x10 0x00 0x00 0x2a
+
+# CHECK: clhhsi 0(%r15), 42
+0xe5 0x55 0xf0 0x00 0x00 0x2a
+
+# CHECK: clhhsi 4095(%r1), 42
+0xe5 0x55 0x1f 0xff 0x00 0x2a
+
+# CHECK: clhhsi 4095(%r15), 42
+0xe5 0x55 0xff 0xff 0x00 0x2a
+
+# CHECK: cli 0, 0
+0x95 0x00 0x00 0x00
+
+# CHECK: cli 4095, 0
+0x95 0x00 0x0f 0xff
+
+# CHECK: cli 0, 255
+0x95 0xff 0x00 0x00
+
+# CHECK: cli 0(%r1), 42
+0x95 0x2a 0x10 0x00
+
+# CHECK: cli 0(%r15), 42
+0x95 0x2a 0xf0 0x00
+
+# CHECK: cli 4095(%r1), 42
+0x95 0x2a 0x1f 0xff
+
+# CHECK: cli 4095(%r15), 42
+0x95 0x2a 0xff 0xff
+
+# CHECK: clih %r0, 0
+0xcc 0x0f 0x00 0x00 0x00 0x00
+
+# CHECK: clih %r0, 1
+0xcc 0x0f 0x00 0x00 0x00 0x01
+
+# CHECK: clih %r0, 4294967295
+0xcc 0x0f 0xff 0xff 0xff 0xff
+
+# CHECK: clih %r15, 0
+0xcc 0xff 0x00 0x00 0x00 0x00
+
+# CHECK: cliy -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x55
+
+# CHECK: cliy -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x55
+
+# CHECK: cliy 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x55
+
+# CHECK: cliy 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x55
+
+# CHECK: cliy 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x55
+
+# CHECK: cliy 0, 255
+0xeb 0xff 0x00 0x00 0x00 0x55
+
+# CHECK: cliy 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x55
+
+# CHECK: cliy 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x55
+
+# CHECK: cliy 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x55
+
+# CHECK: cliy 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x55
+
+# CHECK: clr %r0, %r0
+0x15 0x00
+
+# CHECK: clr %r0, %r15
+0x15 0x0f
+
+# CHECK: clr %r15, %r0
+0x15 0xf0
+
+# CHECK: clr %r7, %r8
+0x15 0x78
+
+# CHECK: clst %r0, %r0
+0xb2 0x5d 0x00 0x00
+
+# CHECK: clst %r0, %r15
+0xb2 0x5d 0x00 0x0f
+
+# CHECK: clst %r15, %r0
+0xb2 0x5d 0x00 0xf0
+
+# CHECK: clst %r7, %r8
+0xb2 0x5d 0x00 0x78
+
+# CHECK: cl %r0, 0
+0x55 0x00 0x00 0x00
+
+# CHECK: cl %r0, 4095
+0x55 0x00 0x0f 0xff
+
+# CHECK: cl %r0, 0(%r1)
+0x55 0x00 0x10 0x00
+
+# CHECK: cl %r0, 0(%r15)
+0x55 0x00 0xf0 0x00
+
+# CHECK: cl %r0, 4095(%r1,%r15)
+0x55 0x01 0xff 0xff
+
+# CHECK: cl %r0, 4095(%r15,%r1)
+0x55 0x0f 0x1f 0xff
+
+# CHECK: cl %r15, 0
+0x55 0xf0 0x00 0x00
+
+# CHECK: cly %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x55
+
+# CHECK: cly %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x55
+
+# CHECK: cly %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x55
+
+# CHECK: cly %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x55
+
+# CHECK: cly %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x55
+
+# CHECK: cly %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x55
+
+# CHECK: cly %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x55
+
+# CHECK: cly %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x55
+
+# CHECK: cly %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x55
+
+# CHECK: cly %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x55
+
+# CHECK: cpsdr %f0, %f0, %f0
+0xb3 0x72 0x00 0x00
+
+# CHECK: cpsdr %f0, %f0, %f15
+0xb3 0x72 0x00 0x0f
+
+# CHECK: cpsdr %f0, %f15, %f0
+0xb3 0x72 0xf0 0x00
+
+# CHECK: cpsdr %f15, %f0, %f0
+0xb3 0x72 0x00 0xf0
+
+# CHECK: cpsdr %f1, %f2, %f3
+0xb3 0x72 0x20 0x13
+
+# CHECK: cpsdr %f15, %f15, %f15
+0xb3 0x72 0xf0 0xff
+
+# CHECK: cr %r0, %r0
+0x19 0x00
+
+# CHECK: cr %r0, %r15
+0x19 0x0f
+
+# CHECK: cr %r15, %r0
+0x19 0xf0
+
+# CHECK: cr %r7, %r8
+0x19 0x78
+
+# CHECK: csg %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x30
+
+# CHECK: csg %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x30
+
+# CHECK: csg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x30
+
+# CHECK: csg %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x30
+
+# CHECK: csg %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x30
+
+# CHECK: csg %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x30
+
+# CHECK: csg %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x30
+
+# CHECK: csg %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x30
+
+# CHECK: csg %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x30
+
+# CHECK: csg %r0, %r15, 0
+0xeb 0x0f 0x00 0x00 0x00 0x30
+
+# CHECK: csg %r15, %r0, 0
+0xeb 0xf0 0x00 0x00 0x00 0x30
+
+# CHECK: cs %r0, %r0, 0
+0xba 0x00 0x00 0x00
+
+# CHECK: cs %r0, %r0, 4095
+0xba 0x00 0x0f 0xff
+
+# CHECK: cs %r0, %r0, 0(%r1)
+0xba 0x00 0x10 0x00
+
+# CHECK: cs %r0, %r0, 0(%r15)
+0xba 0x00 0xf0 0x00
+
+# CHECK: cs %r0, %r0, 4095(%r1)
+0xba 0x00 0x1f 0xff
+
+# CHECK: cs %r0, %r0, 4095(%r15)
+0xba 0x00 0xff 0xff
+
+# CHECK: cs %r0, %r15, 0
+0xba 0x0f 0x00 0x00
+
+# CHECK: cs %r15, %r0, 0
+0xba 0xf0 0x00 0x00
+
+# CHECK: csy %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x14
+
+# CHECK: csy %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x14
+
+# CHECK: csy %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x14
+
+# CHECK: csy %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x14
+
+# CHECK: csy %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x14
+
+# CHECK: csy %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x14
+
+# CHECK: csy %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x14
+
+# CHECK: csy %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x14
+
+# CHECK: csy %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x14
+
+# CHECK: csy %r0, %r15, 0
+0xeb 0x0f 0x00 0x00 0x00 0x14
+
+# CHECK: csy %r15, %r0, 0
+0xeb 0xf0 0x00 0x00 0x00 0x14
+
+# CHECK: c %r0, 0
+0x59 0x00 0x00 0x00
+
+# CHECK: c %r0, 4095
+0x59 0x00 0x0f 0xff
+
+# CHECK: c %r0, 0(%r1)
+0x59 0x00 0x10 0x00
+
+# CHECK: c %r0, 0(%r15)
+0x59 0x00 0xf0 0x00
+
+# CHECK: c %r0, 4095(%r1,%r15)
+0x59 0x01 0xff 0xff
+
+# CHECK: c %r0, 4095(%r15,%r1)
+0x59 0x0f 0x1f 0xff
+
+# CHECK: c %r15, 0
+0x59 0xf0 0x00 0x00
+
+# CHECK: cxbr %f0, %f0
+0xb3 0x49 0x00 0x00
+
+# CHECK: cxbr %f0, %f13
+0xb3 0x49 0x00 0x0d
+
+# CHECK: cxbr %f8, %f8
+0xb3 0x49 0x00 0x88
+
+# CHECK: cxbr %f13, %f0
+0xb3 0x49 0x00 0xd0
+
+# CHECK: cxfbr %f0, %r0
+0xb3 0x96 0x00 0x00
+
+# CHECK: cxfbr %f0, %r15
+0xb3 0x96 0x00 0x0f
+
+# CHECK: cxfbr %f13, %r0
+0xb3 0x96 0x00 0xd0
+
+# CHECK: cxfbr %f8, %r7
+0xb3 0x96 0x00 0x87
+
+# CHECK: cxfbr %f13, %r15
+0xb3 0x96 0x00 0xdf
+
+# CHECK: cxgbr %f0, %r0
+0xb3 0xa6 0x00 0x00
+
+# CHECK: cxgbr %f0, %r15
+0xb3 0xa6 0x00 0x0f
+
+# CHECK: cxgbr %f13, %r0
+0xb3 0xa6 0x00 0xd0
+
+# CHECK: cxgbr %f8, %r7
+0xb3 0xa6 0x00 0x87
+
+# CHECK: cxgbr %f13, %r15
+0xb3 0xa6 0x00 0xdf
+
+# CHECK: cy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x59
+
+# CHECK: cy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x59
+
+# CHECK: cy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x59
+
+# CHECK: cy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x59
+
+# CHECK: cy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x59
+
+# CHECK: cy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x59
+
+# CHECK: cy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x59
+
+# CHECK: cy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x59
+
+# CHECK: cy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x59
+
+# CHECK: cy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x59
+
+# CHECK: ddbr %f0, %f0
+0xb3 0x1d 0x00 0x00
+
+# CHECK: ddbr %f0, %f15
+0xb3 0x1d 0x00 0x0f
+
+# CHECK: ddbr %f7, %f8
+0xb3 0x1d 0x00 0x78
+
+# CHECK: ddbr %f15, %f0
+0xb3 0x1d 0x00 0xf0
+
+# CHECK: ddb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x1d
+
+# CHECK: ddb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x1d
+
+# CHECK: ddb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x1d
+
+# CHECK: ddb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x1d
+
+# CHECK: ddb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x1d
+
+# CHECK: ddb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x1d
+
+# CHECK: ddb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x1d
+
+# CHECK: debr %f0, %f0
+0xb3 0x0d 0x00 0x00
+
+# CHECK: debr %f0, %f15
+0xb3 0x0d 0x00 0x0f
+
+# CHECK: debr %f7, %f8
+0xb3 0x0d 0x00 0x78
+
+# CHECK: debr %f15, %f0
+0xb3 0x0d 0x00 0xf0
+
+# CHECK: deb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x0d
+
+# CHECK: deb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x0d
+
+# CHECK: deb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x0d
+
+# CHECK: deb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x0d
+
+# CHECK: deb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x0d
+
+# CHECK: deb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x0d
+
+# CHECK: deb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x0d
+
+# CHECK: dlgr %r0, %r0
+0xb9 0x87 0x00 0x00
+
+# CHECK: dlgr %r0, %r15
+0xb9 0x87 0x00 0x0f
+
+# CHECK: dlgr %r14, %r0
+0xb9 0x87 0x00 0xe0
+
+# CHECK: dlgr %r6, %r9
+0xb9 0x87 0x00 0x69
+
+# CHECK: dlg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x87
+
+# CHECK: dlg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x87
+
+# CHECK: dlg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x87
+
+# CHECK: dlg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x87
+
+# CHECK: dlg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x87
+
+# CHECK: dlg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x87
+
+# CHECK: dlg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x87
+
+# CHECK: dlg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x87
+
+# CHECK: dlg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x87
+
+# CHECK: dlg %r14, 0
+0xe3 0xe0 0x00 0x00 0x00 0x87
+
+# CHECK: dlr %r0, %r0
+0xb9 0x97 0x00 0x00
+
+# CHECK: dlr %r0, %r15
+0xb9 0x97 0x00 0x0f
+
+# CHECK: dlr %r14, %r0
+0xb9 0x97 0x00 0xe0
+
+# CHECK: dlr %r6, %r9
+0xb9 0x97 0x00 0x69
+
+# CHECK: dl %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x97
+
+# CHECK: dl %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x97
+
+# CHECK: dl %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x97
+
+# CHECK: dl %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x97
+
+# CHECK: dl %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x97
+
+# CHECK: dl %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x97
+
+# CHECK: dl %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x97
+
+# CHECK: dl %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x97
+
+# CHECK: dl %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x97
+
+# CHECK: dl %r14, 0
+0xe3 0xe0 0x00 0x00 0x00 0x97
+
+# CHECK: dsgfr %r0, %r0
+0xb9 0x1d 0x00 0x00
+
+# CHECK: dsgfr %r0, %r15
+0xb9 0x1d 0x00 0x0f
+
+# CHECK: dsgfr %r14, %r0
+0xb9 0x1d 0x00 0xe0
+
+# CHECK: dsgfr %r6, %r9
+0xb9 0x1d 0x00 0x69
+
+# CHECK: dsgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x1d
+
+# CHECK: dsgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x1d
+
+# CHECK: dsgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x1d
+
+# CHECK: dsgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x1d
+
+# CHECK: dsgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x1d
+
+# CHECK: dsgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x1d
+
+# CHECK: dsgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x1d
+
+# CHECK: dsgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x1d
+
+# CHECK: dsgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x1d
+
+# CHECK: dsgf %r14, 0
+0xe3 0xe0 0x00 0x00 0x00 0x1d
+
+# CHECK: dsgr %r0, %r0
+0xb9 0x0d 0x00 0x00
+
+# CHECK: dsgr %r0, %r15
+0xb9 0x0d 0x00 0x0f
+
+# CHECK: dsgr %r14, %r0
+0xb9 0x0d 0x00 0xe0
+
+# CHECK: dsgr %r6, %r9
+0xb9 0x0d 0x00 0x69
+
+# CHECK: dsg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x0d
+
+# CHECK: dsg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x0d
+
+# CHECK: dsg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x0d
+
+# CHECK: dsg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x0d
+
+# CHECK: dsg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x0d
+
+# CHECK: dsg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x0d
+
+# CHECK: dsg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x0d
+
+# CHECK: dsg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x0d
+
+# CHECK: dsg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x0d
+
+# CHECK: dsg %r14, 0
+0xe3 0xe0 0x00 0x00 0x00 0x0d
+
+# CHECK: dxbr %f0, %f0
+0xb3 0x4d 0x00 0x00
+
+# CHECK: dxbr %f0, %f13
+0xb3 0x4d 0x00 0x0d
+
+# CHECK: dxbr %f8, %f8
+0xb3 0x4d 0x00 0x88
+
+# CHECK: dxbr %f13, %f0
+0xb3 0x4d 0x00 0xd0
+
+# CHECK: ear %r0, %a0
+0xb2 0x4f 0x00 0x00
+
+# CHECK: ear %r0, %a15
+0xb2 0x4f 0x00 0x0f
+
+# CHECK: ear %r15, %a0
+0xb2 0x4f 0x00 0xf0
+
+# CHECK: ear %r7, %a8
+0xb2 0x4f 0x00 0x78
+
+# CHECK: ear %r15, %a15
+0xb2 0x4f 0x00 0xff
+
+# CHECK: fidbr %f0, 0, %f0
+0xb3 0x5f 0x00 0x00
+
+# CHECK: fidbr %f0, 0, %f15
+0xb3 0x5f 0x00 0x0f
+
+# CHECK: fidbr %f0, 15, %f0
+0xb3 0x5f 0xf0 0x00
+
+# CHECK: fidbr %f4, 5, %f6
+0xb3 0x5f 0x50 0x46
+
+# CHECK: fidbr %f15, 0, %f0
+0xb3 0x5f 0x00 0xf0
+
+# CHECK: fidbra	%f0, 0, %f0, 1
+0xb3 0x5f 0x01 0x00
+
+# CHECK: fidbra	%f0, 0, %f0, 15
+0xb3 0x5f 0x0f 0x00
+
+# CHECK: fidbra	%f0, 0, %f15, 1
+0xb3 0x5f 0x01 0x0f
+
+# CHECK: fidbra	%f0, 15, %f0, 1
+0xb3 0x5f 0xf1 0x00
+
+# CHECK: fidbra	%f4, 5, %f6, 7
+0xb3 0x5f 0x57 0x46
+
+# CHECK: fidbra	%f15, 0, %f0, 1
+0xb3 0x5f 0x01 0xf0
+
+# CHECK: fiebr %f0, 0, %f0
+0xb3 0x57 0x00 0x00
+
+# CHECK: fiebr %f0, 0, %f15
+0xb3 0x57 0x00 0x0f
+
+# CHECK: fiebr %f0, 15, %f0
+0xb3 0x57 0xf0 0x00
+
+# CHECK: fiebr %f4, 5, %f6
+0xb3 0x57 0x50 0x46
+
+# CHECK: fiebr %f15, 0, %f0
+0xb3 0x57 0x00 0xf0
+
+# CHECK: fiebra	%f0, 0, %f0, 1
+0xb3 0x57 0x01 0x00
+
+# CHECK: fiebra	%f0, 0, %f0, 15
+0xb3 0x57 0x0f 0x00
+
+# CHECK: fiebra	%f0, 0, %f15, 1
+0xb3 0x57 0x01 0x0f
+
+# CHECK: fiebra	%f0, 15, %f0, 1
+0xb3 0x57 0xf1 0x00
+
+# CHECK: fiebra	%f4, 5, %f6, 7
+0xb3 0x57 0x57 0x46
+
+# CHECK: fiebra	%f15, 0, %f0, 1
+0xb3 0x57 0x01 0xf0
+
+# CHECK: fixbr %f0, 0, %f0
+0xb3 0x47 0x00 0x00
+
+# CHECK: fixbr %f0, 0, %f13
+0xb3 0x47 0x00 0x0d
+
+# CHECK: fixbr %f0, 15, %f0
+0xb3 0x47 0xf0 0x00
+
+# CHECK: fixbr %f4, 5, %f8
+0xb3 0x47 0x50 0x48
+
+# CHECK: fixbr %f13, 0, %f0
+0xb3 0x47 0x00 0xd0
+
+# CHECK: fixbra	%f0, 0, %f0, 1
+0xb3 0x47 0x01 0x00
+
+# CHECK: fixbra	%f0, 0, %f0, 15
+0xb3 0x47 0x0f 0x00
+
+# CHECK: fixbra	%f0, 0, %f13, 1
+0xb3 0x47 0x01 0x0d
+
+# CHECK: fixbra	%f0, 15, %f0, 1
+0xb3 0x47 0xf1 0x00
+
+# CHECK: fixbra	%f4, 5, %f8, 9
+0xb3 0x47 0x59 0x48
+
+# CHECK: fixbra	%f13, 0, %f0, 1
+0xb3 0x47 0x01 0xd0
+
+# CHECK: flogr %r0, %r0
+0xb9 0x83 0x00 0x00
+
+# CHECK: flogr %r0, %r15
+0xb9 0x83 0x00 0x0f
+
+# CHECK: flogr %r10, %r9
+0xb9 0x83 0x00 0xa9
+
+# CHECK: flogr %r14, %r0
+0xb9 0x83 0x00 0xe0
+
+# CHECK: ic %r0, 0
+0x43 0x00 0x00 0x00
+
+# CHECK: ic %r0, 4095
+0x43 0x00 0x0f 0xff
+
+# CHECK: ic %r0, 0(%r1)
+0x43 0x00 0x10 0x00
+
+# CHECK: ic %r0, 0(%r15)
+0x43 0x00 0xf0 0x00
+
+# CHECK: ic %r0, 4095(%r1,%r15)
+0x43 0x01 0xff 0xff
+
+# CHECK: ic %r0, 4095(%r15,%r1)
+0x43 0x0f 0x1f 0xff
+
+# CHECK: ic %r15, 0
+0x43 0xf0 0x00 0x00
+
+# CHECK: icy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x73
+
+# CHECK: icy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x73
+
+# CHECK: icy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x73
+
+# CHECK: icy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x73
+
+# CHECK: icy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x73
+
+# CHECK: icy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x73
+
+# CHECK: icy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x73
+
+# CHECK: icy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x73
+
+# CHECK: icy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x73
+
+# CHECK: icy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x73
+
+# CHECK: iihf %r0, 0
+0xc0 0x08 0x00 0x00 0x00 0x00
+
+# CHECK: iihf %r0, 4294967295
+0xc0 0x08 0xff 0xff 0xff 0xff
+
+# CHECK: iihf %r15, 0
+0xc0 0xf8 0x00 0x00 0x00 0x00
+
+# CHECK: iihh %r0, 0
+0xa5 0x00 0x00 0x00
+
+# CHECK: iihh %r0, 32768
+0xa5 0x00 0x80 0x00
+
+# CHECK: iihh %r0, 65535
+0xa5 0x00 0xff 0xff
+
+# CHECK: iihh %r15, 0
+0xa5 0xf0 0x00 0x00
+
+# CHECK: iihl %r0, 0
+0xa5 0x01 0x00 0x00
+
+# CHECK: iihl %r0, 32768
+0xa5 0x01 0x80 0x00
+
+# CHECK: iihl %r0, 65535
+0xa5 0x01 0xff 0xff
+
+# CHECK: iihl %r15, 0
+0xa5 0xf1 0x00 0x00
+
+# CHECK: iilf %r0, 0
+0xc0 0x09 0x00 0x00 0x00 0x00
+
+# CHECK: iilf %r0, 4294967295
+0xc0 0x09 0xff 0xff 0xff 0xff
+
+# CHECK: iilf %r15, 0
+0xc0 0xf9 0x00 0x00 0x00 0x00
+
+# CHECK: iilh %r0, 0
+0xa5 0x02 0x00 0x00
+
+# CHECK: iilh %r0, 32768
+0xa5 0x02 0x80 0x00
+
+# CHECK: iilh %r0, 65535
+0xa5 0x02 0xff 0xff
+
+# CHECK: iilh %r15, 0
+0xa5 0xf2 0x00 0x00
+
+# CHECK: iill %r0, 0
+0xa5 0x03 0x00 0x00
+
+# CHECK: iill %r0, 32768
+0xa5 0x03 0x80 0x00
+
+# CHECK: iill %r0, 65535
+0xa5 0x03 0xff 0xff
+
+# CHECK: iill %r15, 0
+0xa5 0xf3 0x00 0x00
+
+# CHECK: ipm %r0
+0xb2 0x22 0x00 0x00
+
+# CHECK: ipm %r1
+0xb2 0x22 0x00 0x10
+
+# CHECK: ipm %r15
+0xb2 0x22 0x00 0xf0
+
+# CHECK: la %r0, 0
+0x41 0x00 0x00 0x00
+
+# CHECK: la %r0, 4095
+0x41 0x00 0x0f 0xff
+
+# CHECK: la %r0, 0(%r1)
+0x41 0x00 0x10 0x00
+
+# CHECK: la %r0, 0(%r15)
+0x41 0x00 0xf0 0x00
+
+# CHECK: la %r0, 4095(%r1,%r15)
+0x41 0x01 0xff 0xff
+
+# CHECK: la %r0, 4095(%r15,%r1)
+0x41 0x0f 0x1f 0xff
+
+# CHECK: la %r15, 0
+0x41 0xf0 0x00 0x00
+
+# CHECK: lay %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x71
+
+# CHECK: lay %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x71
+
+# CHECK: lay %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x71
+
+# CHECK: lay %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x71
+
+# CHECK: lay %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x71
+
+# CHECK: lay %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x71
+
+# CHECK: lay %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x71
+
+# CHECK: lay %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x71
+
+# CHECK: lay %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x71
+
+# CHECK: lay %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x71
+
+# CHECK: lbr %r0, %r15
+0xb9 0x26 0x00 0x0f
+
+# CHECK: lbr %r7, %r8
+0xb9 0x26 0x00 0x78
+
+# CHECK: lbr %r15, %r0
+0xb9 0x26 0x00 0xf0
+
+# CHECK: lb %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x76
+
+# CHECK: lb %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x76
+
+# CHECK: lb %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x76
+
+# CHECK: lb %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x76
+
+# CHECK: lb %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x76
+
+# CHECK: lb %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x76
+
+# CHECK: lb %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x76
+
+# CHECK: lb %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x76
+
+# CHECK: lb %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x76
+
+# CHECK: lb %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x76
+
+# CHECK: lbh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc0
+
+# CHECK: lbh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc0
+
+# CHECK: lbh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc0
+
+# CHECK: lbh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc0
+
+# CHECK: lbh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc0
+
+# CHECK: lbh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc0
+
+# CHECK: lbh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc0
+
+# CHECK: lbh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc0
+
+# CHECK: lbh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc0
+
+# CHECK: lbh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc0
+
+# CHECK: lcdbr %f0, %f9
+0xb3 0x13 0x00 0x09
+
+# CHECK: lcdbr %f0, %f15
+0xb3 0x13 0x00 0x0f
+
+# CHECK: lcdbr %f15, %f0
+0xb3 0x13 0x00 0xf0
+
+# CHECK: lcdbr %f15, %f9
+0xb3 0x13 0x00 0xf9
+
+# CHECK: lcebr %f0, %f9
+0xb3 0x03 0x00 0x09
+
+# CHECK: lcebr %f0, %f15
+0xb3 0x03 0x00 0x0f
+
+# CHECK: lcebr %f15, %f0
+0xb3 0x03 0x00 0xf0
+
+# CHECK: lcebr %f15, %f9
+0xb3 0x03 0x00 0xf9
+
+# CHECK: lcgfr %r0, %r0
+0xb9 0x13 0x00 0x00
+
+# CHECK: lcgfr %r0, %r15
+0xb9 0x13 0x00 0x0f
+
+# CHECK: lcgfr %r15, %r0
+0xb9 0x13 0x00 0xf0
+
+# CHECK: lcgfr %r7, %r8
+0xb9 0x13 0x00 0x78
+
+# CHECK: lcgr %r0, %r0
+0xb9 0x03 0x00 0x00
+
+# CHECK: lcgr %r0, %r15
+0xb9 0x03 0x00 0x0f
+
+# CHECK: lcgr %r15, %r0
+0xb9 0x03 0x00 0xf0
+
+# CHECK: lcgr %r7, %r8
+0xb9 0x03 0x00 0x78
+
+# CHECK: lcr %r0, %r0
+0x13 0x00
+
+# CHECK: lcr %r0, %r15
+0x13 0x0f
+
+# CHECK: lcr %r15, %r0
+0x13 0xf0
+
+# CHECK: lcr %r7, %r8
+0x13 0x78
+
+# CHECK: lcxbr %f0, %f8
+0xb3 0x43 0x00 0x08
+
+# CHECK: lcxbr %f0, %f13
+0xb3 0x43 0x00 0x0d
+
+# CHECK: lcxbr %f13, %f0
+0xb3 0x43 0x00 0xd0
+
+# CHECK: lcxbr %f13, %f9
+0xb3 0x43 0x00 0xd9
+
+# CHECK: ldebr %f0, %f15
+0xb3 0x04 0x00 0x0f
+
+# CHECK: ldebr %f7, %f8
+0xb3 0x04 0x00 0x78
+
+# CHECK: ldebr %f15, %f0
+0xb3 0x04 0x00 0xf0
+
+# CHECK: ldeb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x04
+
+# CHECK: ldeb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x04
+
+# CHECK: ldeb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x04
+
+# CHECK: ldeb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x04
+
+# CHECK: ldeb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x04
+
+# CHECK: ldeb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x04
+
+# CHECK: ldeb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x04
+
+# CHECK: ldgr %f0, %r0
+0xb3 0xc1 0x00 0x00
+
+# CHECK: ldgr %f0, %r15
+0xb3 0xc1 0x00 0x0f
+
+# CHECK: ldgr %f15, %r0
+0xb3 0xc1 0x00 0xf0
+
+# CHECK: ldgr %f7, %r9
+0xb3 0xc1 0x00 0x79
+
+# CHECK: ldgr %f15, %r15
+0xb3 0xc1 0x00 0xff
+
+# CHECK: ldr %f0, %f9
+0x28 0x09
+
+# CHECK: ldr %f0, %f15
+0x28 0x0f
+
+# CHECK: ldr %f15, %f0
+0x28 0xf0
+
+# CHECK: ldr %f15, %f9
+0x28 0xf9
+
+# CHECK: ld %f0, 0
+0x68 0x00 0x00 0x00
+
+# CHECK: ld %f0, 4095
+0x68 0x00 0x0f 0xff
+
+# CHECK: ld %f0, 0(%r1)
+0x68 0x00 0x10 0x00
+
+# CHECK: ld %f0, 0(%r15)
+0x68 0x00 0xf0 0x00
+
+# CHECK: ld %f0, 4095(%r1,%r15)
+0x68 0x01 0xff 0xff
+
+# CHECK: ld %f0, 4095(%r15,%r1)
+0x68 0x0f 0x1f 0xff
+
+# CHECK: ld %f15, 0
+0x68 0xf0 0x00 0x00
+
+# CHECK: ldxbr %f0, %f0
+0xb3 0x45 0x00 0x00
+
+# CHECK: ldxbr %f0, %f13
+0xb3 0x45 0x00 0x0d
+
+# CHECK: ldxbr %f8, %f12
+0xb3 0x45 0x00 0x8c
+
+# CHECK: ldxbr %f13, %f0
+0xb3 0x45 0x00 0xd0
+
+# CHECK: ldxbr %f13, %f13
+0xb3 0x45 0x00 0xdd
+
+# CHECK: ldy %f0, -524288
+0xed 0x00 0x00 0x00 0x80 0x65
+
+# CHECK: ldy %f0, -1
+0xed 0x00 0x0f 0xff 0xff 0x65
+
+# CHECK: ldy %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x65
+
+# CHECK: ldy %f0, 1
+0xed 0x00 0x00 0x01 0x00 0x65
+
+# CHECK: ldy %f0, 524287
+0xed 0x00 0x0f 0xff 0x7f 0x65
+
+# CHECK: ldy %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x65
+
+# CHECK: ldy %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x65
+
+# CHECK: ldy %f0, 524287(%r1,%r15)
+0xed 0x01 0xff 0xff 0x7f 0x65
+
+# CHECK: ldy %f0, 524287(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x7f 0x65
+
+# CHECK: ldy %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x65
+
+# CHECK: ledbr %f0, %f0
+0xb3 0x44 0x00 0x00
+
+# CHECK: ledbr %f0, %f15
+0xb3 0x44 0x00 0x0f
+
+# CHECK: ledbr %f7, %f8
+0xb3 0x44 0x00 0x78
+
+# CHECK: ledbr %f15, %f0
+0xb3 0x44 0x00 0xf0
+
+# CHECK: ledbr %f15, %f15
+0xb3 0x44 0x00 0xff
+
+# CHECK: ler %f0, %f9
+0x38 0x09
+
+# CHECK: ler %f0, %f15
+0x38 0x0f
+
+# CHECK: ler %f15, %f0
+0x38 0xf0
+
+# CHECK: ler %f15, %f9
+0x38 0xf9
+
+# CHECK: le %f0, 0
+0x78 0x00 0x00 0x00
+
+# CHECK: le %f0, 4095
+0x78 0x00 0x0f 0xff
+
+# CHECK: le %f0, 0(%r1)
+0x78 0x00 0x10 0x00
+
+# CHECK: le %f0, 0(%r15)
+0x78 0x00 0xf0 0x00
+
+# CHECK: le %f0, 4095(%r1,%r15)
+0x78 0x01 0xff 0xff
+
+# CHECK: le %f0, 4095(%r15,%r1)
+0x78 0x0f 0x1f 0xff
+
+# CHECK: le %f15, 0
+0x78 0xf0 0x00 0x00
+
+# CHECK: lexbr %f0, %f0
+0xb3 0x46 0x00 0x00
+
+# CHECK: lexbr %f0, %f13
+0xb3 0x46 0x00 0x0d
+
+# CHECK: lexbr %f8, %f12
+0xb3 0x46 0x00 0x8c
+
+# CHECK: lexbr %f13, %f0
+0xb3 0x46 0x00 0xd0
+
+# CHECK: lexbr %f13, %f13
+0xb3 0x46 0x00 0xdd
+
+# CHECK: ley %f0, -524288
+0xed 0x00 0x00 0x00 0x80 0x64
+
+# CHECK: ley %f0, -1
+0xed 0x00 0x0f 0xff 0xff 0x64
+
+# CHECK: ley %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x64
+
+# CHECK: ley %f0, 1
+0xed 0x00 0x00 0x01 0x00 0x64
+
+# CHECK: ley %f0, 524287
+0xed 0x00 0x0f 0xff 0x7f 0x64
+
+# CHECK: ley %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x64
+
+# CHECK: ley %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x64
+
+# CHECK: ley %f0, 524287(%r1,%r15)
+0xed 0x01 0xff 0xff 0x7f 0x64
+
+# CHECK: ley %f0, 524287(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x7f 0x64
+
+# CHECK: ley %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x64
+
+# CHECK: lfh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xca
+
+# CHECK: lfh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xca
+
+# CHECK: lfh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xca
+
+# CHECK: lfh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xca
+
+# CHECK: lfh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xca
+
+# CHECK: lfh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xca
+
+# CHECK: lfh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xca
+
+# CHECK: lfh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xca
+
+# CHECK: lfh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xca
+
+# CHECK: lfh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xca
+
+# CHECK: lgbr %r0, %r15
+0xb9 0x06 0x00 0x0f
+
+# CHECK: lgbr %r7, %r8
+0xb9 0x06 0x00 0x78
+
+# CHECK: lgbr %r15, %r0
+0xb9 0x06 0x00 0xf0
+
+# CHECK: lgb %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x77
+
+# CHECK: lgb %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x77
+
+# CHECK: lgb %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x77
+
+# CHECK: lgb %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x77
+
+# CHECK: lgb %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x77
+
+# CHECK: lgb %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x77
+
+# CHECK: lgb %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x77
+
+# CHECK: lgb %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x77
+
+# CHECK: lgb %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x77
+
+# CHECK: lgb %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x77
+
+# CHECK: lgdr %r0, %f0
+0xb3 0xcd 0x00 0x00
+
+# CHECK: lgdr %r0, %f15
+0xb3 0xcd 0x00 0x0f
+
+# CHECK: lgdr %r15, %f0
+0xb3 0xcd 0x00 0xf0
+
+# CHECK: lgdr %r8, %f8
+0xb3 0xcd 0x00 0x88
+
+# CHECK: lgdr %r15, %f15
+0xb3 0xcd 0x00 0xff
+
+# CHECK: lgfi %r0, -2147483648
+0xc0 0x01 0x80 0x00 0x00 0x00
+
+# CHECK: lgfi %r0, -1
+0xc0 0x01 0xff 0xff 0xff 0xff
+
+# CHECK: lgfi %r0, 0
+0xc0 0x01 0x00 0x00 0x00 0x00
+
+# CHECK: lgfi %r0, 1
+0xc0 0x01 0x00 0x00 0x00 0x01
+
+# CHECK: lgfi %r0, 2147483647
+0xc0 0x01 0x7f 0xff 0xff 0xff
+
+# CHECK: lgfi %r15, 0
+0xc0 0xf1 0x00 0x00 0x00 0x00
+
+# CHECK: lgfr %r0, %r15
+0xb9 0x14 0x00 0x0f
+
+# CHECK: lgfr %r7, %r8
+0xb9 0x14 0x00 0x78
+
+# CHECK: lgfr %r15, %r0
+0xb9 0x14 0x00 0xf0
+
+# CHECK: lgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x14
+
+# CHECK: lgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x14
+
+# CHECK: lgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x14
+
+# CHECK: lgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x14
+
+# CHECK: lgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x14
+
+# CHECK: lgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x14
+
+# CHECK: lgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x14
+
+# CHECK: lgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x14
+
+# CHECK: lgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x14
+
+# CHECK: lgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x14
+
+# CHECK: lghi %r0, -32768
+0xa7 0x09 0x80 0x00
+
+# CHECK: lghi %r0, -1
+0xa7 0x09 0xff 0xff
+
+# CHECK: lghi %r0, 0
+0xa7 0x09 0x00 0x00
+
+# CHECK: lghi %r0, 1
+0xa7 0x09 0x00 0x01
+
+# CHECK: lghi %r0, 32767
+0xa7 0x09 0x7f 0xff
+
+# CHECK: lghi %r15, 0
+0xa7 0xf9 0x00 0x00
+
+# CHECK: lghr %r0, %r15
+0xb9 0x07 0x00 0x0f
+
+# CHECK: lghr %r7, %r8
+0xb9 0x07 0x00 0x78
+
+# CHECK: lghr %r15, %r0
+0xb9 0x07 0x00 0xf0
+
+# CHECK: lgh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x15
+
+# CHECK: lgh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x15
+
+# CHECK: lgh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x15
+
+# CHECK: lgh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x15
+
+# CHECK: lgh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x15
+
+# CHECK: lgh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x15
+
+# CHECK: lgh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x15
+
+# CHECK: lgh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x15
+
+# CHECK: lgh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x15
+
+# CHECK: lgh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x15
+
+# CHECK: lgr %r0, %r9
+0xb9 0x04 0x00 0x09
+
+# CHECK: lgr %r0, %r15
+0xb9 0x04 0x00 0x0f
+
+# CHECK: lgr %r15, %r0
+0xb9 0x04 0x00 0xf0
+
+# CHECK: lgr %r15, %r9
+0xb9 0x04 0x00 0xf9
+
+# CHECK: lg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x04
+
+# CHECK: lg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x04
+
+# CHECK: lg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x04
+
+# CHECK: lg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x04
+
+# CHECK: lg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x04
+
+# CHECK: lg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x04
+
+# CHECK: lg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x04
+
+# CHECK: lg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x04
+
+# CHECK: lg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x04
+
+# CHECK: lg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x04
+
+# CHECK: lhi %r0, -32768
+0xa7 0x08 0x80 0x00
+
+# CHECK: lhi %r0, -1
+0xa7 0x08 0xff 0xff
+
+# CHECK: lhi %r0, 0
+0xa7 0x08 0x00 0x00
+
+# CHECK: lhi %r0, 1
+0xa7 0x08 0x00 0x01
+
+# CHECK: lhi %r0, 32767
+0xa7 0x08 0x7f 0xff
+
+# CHECK: lhi %r15, 0
+0xa7 0xf8 0x00 0x00
+
+# CHECK: lhh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc4
+
+# CHECK: lhh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc4
+
+# CHECK: lhh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc4
+
+# CHECK: lhh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc4
+
+# CHECK: lhh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc4
+
+# CHECK: lhh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc4
+
+# CHECK: lhh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc4
+
+# CHECK: lhh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc4
+
+# CHECK: lhh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc4
+
+# CHECK: lhh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc4
+
+# CHECK: lhr %r0, %r15
+0xb9 0x27 0x00 0x0f
+
+# CHECK: lhr %r7, %r8
+0xb9 0x27 0x00 0x78
+
+# CHECK: lhr %r15, %r0
+0xb9 0x27 0x00 0xf0
+
+# CHECK: lh %r0, 0
+0x48 0x00 0x00 0x00
+
+# CHECK: lh %r0, 4095
+0x48 0x00 0x0f 0xff
+
+# CHECK: lh %r0, 0(%r1)
+0x48 0x00 0x10 0x00
+
+# CHECK: lh %r0, 0(%r15)
+0x48 0x00 0xf0 0x00
+
+# CHECK: lh %r0, 4095(%r1,%r15)
+0x48 0x01 0xff 0xff
+
+# CHECK: lh %r0, 4095(%r15,%r1)
+0x48 0x0f 0x1f 0xff
+
+# CHECK: lh %r15, 0
+0x48 0xf0 0x00 0x00
+
+# CHECK: lhy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x78
+
+# CHECK: lhy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x78
+
+# CHECK: lhy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x78
+
+# CHECK: lhy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x78
+
+# CHECK: lhy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x78
+
+# CHECK: lhy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x78
+
+# CHECK: lhy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x78
+
+# CHECK: lhy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x78
+
+# CHECK: lhy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x78
+
+# CHECK: lhy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x78
+
+# CHECK: llcr %r0, %r15
+0xb9 0x94 0x00 0x0f
+
+# CHECK: llcr %r7, %r8
+0xb9 0x94 0x00 0x78
+
+# CHECK: llcr %r15, %r0
+0xb9 0x94 0x00 0xf0
+
+# CHECK: llc %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x94
+
+# CHECK: llc %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x94
+
+# CHECK: llc %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x94
+
+# CHECK: llc %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x94
+
+# CHECK: llc %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x94
+
+# CHECK: llc %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x94
+
+# CHECK: llc %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x94
+
+# CHECK: llc %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x94
+
+# CHECK: llc %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x94
+
+# CHECK: llc %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x94
+
+# CHECK: llch %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc2
+
+# CHECK: llch %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc2
+
+# CHECK: llch %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc2
+
+# CHECK: llch %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc2
+
+# CHECK: llch %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc2
+
+# CHECK: llch %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc2
+
+# CHECK: llch %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc2
+
+# CHECK: llch %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc2
+
+# CHECK: llch %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc2
+
+# CHECK: llch %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc2
+
+# CHECK: llgcr %r0, %r15
+0xb9 0x84 0x00 0x0f
+
+# CHECK: llgcr %r7, %r8
+0xb9 0x84 0x00 0x78
+
+# CHECK: llgcr %r15, %r0
+0xb9 0x84 0x00 0xf0
+
+# CHECK: llgc %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x90
+
+# CHECK: llgc %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x90
+
+# CHECK: llgc %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x90
+
+# CHECK: llgc %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x90
+
+# CHECK: llgc %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x90
+
+# CHECK: llgc %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x90
+
+# CHECK: llgc %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x90
+
+# CHECK: llgc %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x90
+
+# CHECK: llgc %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x90
+
+# CHECK: llgc %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x90
+
+# CHECK: llgfr %r0, %r15
+0xb9 0x16 0x00 0x0f
+
+# CHECK: llgfr %r7, %r8
+0xb9 0x16 0x00 0x78
+
+# CHECK: llgfr %r15, %r0
+0xb9 0x16 0x00 0xf0
+
+# CHECK: llgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x16
+
+# CHECK: llgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x16
+
+# CHECK: llgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x16
+
+# CHECK: llgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x16
+
+# CHECK: llgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x16
+
+# CHECK: llgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x16
+
+# CHECK: llgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x16
+
+# CHECK: llgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x16
+
+# CHECK: llgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x16
+
+# CHECK: llgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x16
+
+# CHECK: llghr %r0, %r15
+0xb9 0x85 0x00 0x0f
+
+# CHECK: llghr %r7, %r8
+0xb9 0x85 0x00 0x78
+
+# CHECK: llghr %r15, %r0
+0xb9 0x85 0x00 0xf0
+
+# CHECK: llgh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x91
+
+# CHECK: llgh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x91
+
+# CHECK: llgh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x91
+
+# CHECK: llgh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x91
+
+# CHECK: llgh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x91
+
+# CHECK: llgh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x91
+
+# CHECK: llgh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x91
+
+# CHECK: llgh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x91
+
+# CHECK: llgh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x91
+
+# CHECK: llgh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x91
+
+# CHECK: llhr %r0, %r15
+0xb9 0x95 0x00 0x0f
+
+# CHECK: llhr %r7, %r8
+0xb9 0x95 0x00 0x78
+
+# CHECK: llhr %r15, %r0
+0xb9 0x95 0x00 0xf0
+
+# CHECK: llh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x95
+
+# CHECK: llh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x95
+
+# CHECK: llh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x95
+
+# CHECK: llh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x95
+
+# CHECK: llh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x95
+
+# CHECK: llh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x95
+
+# CHECK: llh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x95
+
+# CHECK: llh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x95
+
+# CHECK: llh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x95
+
+# CHECK: llh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x95
+
+# CHECK: llhh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc6
+
+# CHECK: llhh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc6
+
+# CHECK: llhh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc6
+
+# CHECK: llhh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc6
+
+# CHECK: llhh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc6
+
+# CHECK: llhh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc6
+
+# CHECK: llhh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc6
+
+# CHECK: llhh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc6
+
+# CHECK: llhh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc6
+
+# CHECK: llhh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc6
+
+# CHECK: llihf %r0, 0
+0xc0 0x0e 0x00 0x00 0x00 0x00
+
+# CHECK: llihf %r0, 4294967295
+0xc0 0x0e 0xff 0xff 0xff 0xff
+
+# CHECK: llihf %r15, 0
+0xc0 0xfe 0x00 0x00 0x00 0x00
+
+# CHECK: llihh %r0, 0
+0xa5 0x0c 0x00 0x00
+
+# CHECK: llihh %r0, 32768
+0xa5 0x0c 0x80 0x00
+
+# CHECK: llihh %r0, 65535
+0xa5 0x0c 0xff 0xff
+
+# CHECK: llihh %r15, 0
+0xa5 0xfc 0x00 0x00
+
+# CHECK: llihl %r0, 0
+0xa5 0x0d 0x00 0x00
+
+# CHECK: llihl %r0, 32768
+0xa5 0x0d 0x80 0x00
+
+# CHECK: llihl %r0, 65535
+0xa5 0x0d 0xff 0xff
+
+# CHECK: llihl %r15, 0
+0xa5 0xfd 0x00 0x00
+
+# CHECK: llilf %r0, 0
+0xc0 0x0f 0x00 0x00 0x00 0x00
+
+# CHECK: llilf %r0, 4294967295
+0xc0 0x0f 0xff 0xff 0xff 0xff
+
+# CHECK: llilf %r15, 0
+0xc0 0xff 0x00 0x00 0x00 0x00
+
+# CHECK: llilh %r0, 0
+0xa5 0x0e 0x00 0x00
+
+# CHECK: llilh %r0, 32768
+0xa5 0x0e 0x80 0x00
+
+# CHECK: llilh %r0, 65535
+0xa5 0x0e 0xff 0xff
+
+# CHECK: llilh %r15, 0
+0xa5 0xfe 0x00 0x00
+
+# CHECK: llill %r0, 0
+0xa5 0x0f 0x00 0x00
+
+# CHECK: llill %r0, 32768
+0xa5 0x0f 0x80 0x00
+
+# CHECK: llill %r0, 65535
+0xa5 0x0f 0xff 0xff
+
+# CHECK: llill %r15, 0
+0xa5 0xff 0x00 0x00
+
+# CHECK: lmg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x04
+
+# CHECK: lmg %r0, %r15, 0
+0xeb 0x0f 0x00 0x00 0x00 0x04
+
+# CHECK: lmg %r14, %r15, 0
+0xeb 0xef 0x00 0x00 0x00 0x04
+
+# CHECK: lmg %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x04
+
+# CHECK: lmg %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x04
+
+# CHECK: lmg %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x04
+
+# CHECK: lmg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x04
+
+# CHECK: lmg %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x04
+
+# CHECK: lmg %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x04
+
+# CHECK: lmg %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x04
+
+# CHECK: lmg %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x04
+
+# CHECK: lmg %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x04
+
+# CHECK: lmg %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x04
+
+# CHECK: lndbr %f0, %f9
+0xb3 0x11 0x00 0x09
+
+# CHECK: lndbr %f0, %f15
+0xb3 0x11 0x00 0x0f
+
+# CHECK: lndbr %f15, %f0
+0xb3 0x11 0x00 0xf0
+
+# CHECK: lndbr %f15, %f9
+0xb3 0x11 0x00 0xf9
+
+# CHECK: lnebr %f0, %f9
+0xb3 0x01 0x00 0x09
+
+# CHECK: lnebr %f0, %f15
+0xb3 0x01 0x00 0x0f
+
+# CHECK: lnebr %f15, %f0
+0xb3 0x01 0x00 0xf0
+
+# CHECK: lnebr %f15, %f9
+0xb3 0x01 0x00 0xf9
+
+# CHECK: lngfr %r0, %r0
+0xb9 0x11 0x00 0x00
+
+# CHECK: lngfr %r0, %r15
+0xb9 0x11 0x00 0x0f
+
+# CHECK: lngfr %r15, %r0
+0xb9 0x11 0x00 0xf0
+
+# CHECK: lngfr %r7, %r8
+0xb9 0x11 0x00 0x78
+
+# CHECK: lngr %r0, %r0
+0xb9 0x01 0x00 0x00
+
+# CHECK: lngr %r0, %r15
+0xb9 0x01 0x00 0x0f
+
+# CHECK: lngr %r15, %r0
+0xb9 0x01 0x00 0xf0
+
+# CHECK: lngr %r7, %r8
+0xb9 0x01 0x00 0x78
+
+# CHECK: lnr %r0, %r0
+0x11 0x00
+
+# CHECK: lnr %r0, %r15
+0x11 0x0f
+
+# CHECK: lnr %r15, %r0
+0x11 0xf0
+
+# CHECK: lnr %r7, %r8
+0x11 0x78
+
+# CHECK: lnxbr %f0, %f8
+0xb3 0x41 0x00 0x08
+
+# CHECK: lnxbr %f0, %f13
+0xb3 0x41 0x00 0x0d
+
+# CHECK: lnxbr %f13, %f0
+0xb3 0x41 0x00 0xd0
+
+# CHECK: lnxbr %f13, %f9
+0xb3 0x41 0x00 0xd9
+
+# CHECK: loc %r7, 6399(%r8), 0
+0xeb 0x70 0x88 0xff 0x01 0xf2
+
+# CHECK: loco %r7, 6399(%r8)
+0xeb 0x71 0x88 0xff 0x01 0xf2
+
+# CHECK: loch %r7, 6399(%r8)
+0xeb 0x72 0x88 0xff 0x01 0xf2
+
+# CHECK: locnle %r7, 6399(%r8)
+0xeb 0x73 0x88 0xff 0x01 0xf2
+
+# CHECK: locl %r7, 6399(%r8)
+0xeb 0x74 0x88 0xff 0x01 0xf2
+
+# CHECK: locnhe %r7, 6399(%r8)
+0xeb 0x75 0x88 0xff 0x01 0xf2
+
+# CHECK: loclh %r7, 6399(%r8)
+0xeb 0x76 0x88 0xff 0x01 0xf2
+
+# CHECK: locne %r7, 6399(%r8)
+0xeb 0x77 0x88 0xff 0x01 0xf2
+
+# CHECK: loce %r7, 6399(%r8)
+0xeb 0x78 0x88 0xff 0x01 0xf2
+
+# CHECK: locnlh %r7, 6399(%r8)
+0xeb 0x79 0x88 0xff 0x01 0xf2
+
+# CHECK: loche %r7, 6399(%r8)
+0xeb 0x7a 0x88 0xff 0x01 0xf2
+
+# CHECK: locnl %r7, 6399(%r8)
+0xeb 0x7b 0x88 0xff 0x01 0xf2
+
+# CHECK: locle %r7, 6399(%r8)
+0xeb 0x7c 0x88 0xff 0x01 0xf2
+
+# CHECK: locnh %r7, 6399(%r8)
+0xeb 0x7d 0x88 0xff 0x01 0xf2
+
+# CHECK: locno %r7, 6399(%r8)
+0xeb 0x7e 0x88 0xff 0x01 0xf2
+
+# CHECK: loc %r7, 6399(%r8), 15
+0xeb 0x7f 0x88 0xff 0x01 0xf2
+
+# CHECK: locg %r7, 6399(%r8), 0
+0xeb 0x70 0x88 0xff 0x01 0xe2
+
+# CHECK: locgo %r7, 6399(%r8)
+0xeb 0x71 0x88 0xff 0x01 0xe2
+
+# CHECK: locgh %r7, 6399(%r8)
+0xeb 0x72 0x88 0xff 0x01 0xe2
+
+# CHECK: locgnle %r7, 6399(%r8)
+0xeb 0x73 0x88 0xff 0x01 0xe2
+
+# CHECK: locgl %r7, 6399(%r8)
+0xeb 0x74 0x88 0xff 0x01 0xe2
+
+# CHECK: locgnhe %r7, 6399(%r8)
+0xeb 0x75 0x88 0xff 0x01 0xe2
+
+# CHECK: locglh %r7, 6399(%r8)
+0xeb 0x76 0x88 0xff 0x01 0xe2
+
+# CHECK: locgne %r7, 6399(%r8)
+0xeb 0x77 0x88 0xff 0x01 0xe2
+
+# CHECK: locge %r7, 6399(%r8)
+0xeb 0x78 0x88 0xff 0x01 0xe2
+
+# CHECK: locgnlh %r7, 6399(%r8)
+0xeb 0x79 0x88 0xff 0x01 0xe2
+
+# CHECK: locghe %r7, 6399(%r8)
+0xeb 0x7a 0x88 0xff 0x01 0xe2
+
+# CHECK: locgnl %r7, 6399(%r8)
+0xeb 0x7b 0x88 0xff 0x01 0xe2
+
+# CHECK: locgle %r7, 6399(%r8)
+0xeb 0x7c 0x88 0xff 0x01 0xe2
+
+# CHECK: locgnh %r7, 6399(%r8)
+0xeb 0x7d 0x88 0xff 0x01 0xe2
+
+# CHECK: locgno %r7, 6399(%r8)
+0xeb 0x7e 0x88 0xff 0x01 0xe2
+
+# CHECK: locg %r7, 6399(%r8), 15
+0xeb 0x7f 0x88 0xff 0x01 0xe2
+
+# CHECK: locr %r11, %r3, 0
+0xb9 0xf2 0x00 0xb3
+
+# CHECK: locro %r11, %r3
+0xb9 0xf2 0x10 0xb3
+
+# CHECK: locrh %r11, %r3
+0xb9 0xf2 0x20 0xb3
+
+# CHECK: locrnle %r11, %r3
+0xb9 0xf2 0x30 0xb3
+
+# CHECK: locrl %r11, %r3
+0xb9 0xf2 0x40 0xb3
+
+# CHECK: locrnhe %r11, %r3
+0xb9 0xf2 0x50 0xb3
+
+# CHECK: locrlh %r11, %r3
+0xb9 0xf2 0x60 0xb3
+
+# CHECK: locrne %r11, %r3
+0xb9 0xf2 0x70 0xb3
+
+# CHECK: locre %r11, %r3
+0xb9 0xf2 0x80 0xb3
+
+# CHECK: locrnlh %r11, %r3
+0xb9 0xf2 0x90 0xb3
+
+# CHECK: locrhe %r11, %r3
+0xb9 0xf2 0xa0 0xb3
+
+# CHECK: locrnl %r11, %r3
+0xb9 0xf2 0xb0 0xb3
+
+# CHECK: locrle %r11, %r3
+0xb9 0xf2 0xc0 0xb3
+
+# CHECK: locrnh %r11, %r3
+0xb9 0xf2 0xd0 0xb3
+
+# CHECK: locrno %r11, %r3
+0xb9 0xf2 0xe0 0xb3
+
+# CHECK: locr %r11, %r3, 15
+0xb9 0xf2 0xf0 0xb3
+
+# CHECK: locgr %r11, %r3, 0
+0xb9 0xe2 0x00 0xb3
+
+# CHECK: locgro %r11, %r3
+0xb9 0xe2 0x10 0xb3
+
+# CHECK: locgrh %r11, %r3
+0xb9 0xe2 0x20 0xb3
+
+# CHECK: locgrnle %r11, %r3
+0xb9 0xe2 0x30 0xb3
+
+# CHECK: locgrl %r11, %r3
+0xb9 0xe2 0x40 0xb3
+
+# CHECK: locgrnhe %r11, %r3
+0xb9 0xe2 0x50 0xb3
+
+# CHECK: locgrlh %r11, %r3
+0xb9 0xe2 0x60 0xb3
+
+# CHECK: locgrne %r11, %r3
+0xb9 0xe2 0x70 0xb3
+
+# CHECK: locgre %r11, %r3
+0xb9 0xe2 0x80 0xb3
+
+# CHECK: locgrnlh %r11, %r3
+0xb9 0xe2 0x90 0xb3
+
+# CHECK: locgrhe %r11, %r3
+0xb9 0xe2 0xa0 0xb3
+
+# CHECK: locgrnl %r11, %r3
+0xb9 0xe2 0xb0 0xb3
+
+# CHECK: locgrle %r11, %r3
+0xb9 0xe2 0xc0 0xb3
+
+# CHECK: locgrnh %r11, %r3
+0xb9 0xe2 0xd0 0xb3
+
+# CHECK: locgrno %r11, %r3
+0xb9 0xe2 0xe0 0xb3
+
+# CHECK: locgr %r11, %r3, 15
+0xb9 0xe2 0xf0 0xb3
+
+# CHECK: lpdbr %f0, %f9
+0xb3 0x10 0x00 0x09
+
+# CHECK: lpdbr %f0, %f15
+0xb3 0x10 0x00 0x0f
+
+# CHECK: lpdbr %f15, %f0
+0xb3 0x10 0x00 0xf0
+
+# CHECK: lpdbr %f15, %f9
+0xb3 0x10 0x00 0xf9
+
+# CHECK: lpebr %f0, %f9
+0xb3 0x00 0x00 0x09
+
+# CHECK: lpebr %f0, %f15
+0xb3 0x00 0x00 0x0f
+
+# CHECK: lpebr %f15, %f0
+0xb3 0x00 0x00 0xf0
+
+# CHECK: lpebr %f15, %f9
+0xb3 0x00 0x00 0xf9
+
+# CHECK: lpgfr %r0, %r0
+0xb9 0x10 0x00 0x00
+
+# CHECK: lpgfr %r0, %r15
+0xb9 0x10 0x00 0x0f
+
+# CHECK: lpgfr %r15, %r0
+0xb9 0x10 0x00 0xf0
+
+# CHECK: lpgfr %r7, %r8
+0xb9 0x10 0x00 0x78
+
+# CHECK: lpgr %r0, %r0
+0xb9 0x00 0x00 0x00
+
+# CHECK: lpgr %r0, %r15
+0xb9 0x00 0x00 0x0f
+
+# CHECK: lpgr %r15, %r0
+0xb9 0x00 0x00 0xf0
+
+# CHECK: lpgr %r7, %r8
+0xb9 0x00 0x00 0x78
+
+# CHECK: lpr %r0, %r0
+0x10 0x00
+
+# CHECK: lpr %r0, %r15
+0x10 0x0f
+
+# CHECK: lpr %r15, %r0
+0x10 0xf0
+
+# CHECK: lpr %r7, %r8
+0x10 0x78
+
+# CHECK: lpxbr %f0, %f8
+0xb3 0x40 0x00 0x08
+
+# CHECK: lpxbr %f0, %f13
+0xb3 0x40 0x00 0x0d
+
+# CHECK: lpxbr %f13, %f0
+0xb3 0x40 0x00 0xd0
+
+# CHECK: lpxbr %f13, %f9
+0xb3 0x40 0x00 0xd9
+
+# CHECK: lr %r0, %r9
+0x18 0x09
+
+# CHECK: lr %r0, %r15
+0x18 0x0f
+
+# CHECK: lr %r15, %r0
+0x18 0xf0
+
+# CHECK: lr %r15, %r9
+0x18 0xf9
+
+# CHECK: lrvgr %r0, %r0
+0xb9 0x0f 0x00 0x00
+
+# CHECK: lrvgr %r0, %r15
+0xb9 0x0f 0x00 0x0f
+
+# CHECK: lrvgr %r15, %r0
+0xb9 0x0f 0x00 0xf0
+
+# CHECK: lrvgr %r7, %r8
+0xb9 0x0f 0x00 0x78
+
+# CHECK: lrvgr %r15, %r15
+0xb9 0x0f 0x00 0xff
+
+# CHECK: lrvg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x0f
+
+# CHECK: lrvg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x0f
+
+# CHECK: lrvg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x0f
+
+# CHECK: lrvg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x0f
+
+# CHECK: lrvg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x0f
+
+# CHECK: lrvg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x0f
+
+# CHECK: lrvg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x0f
+
+# CHECK: lrvg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x0f
+
+# CHECK: lrvg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x0f
+
+# CHECK: lrvg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x0f
+
+# CHECK: lrvr %r0, %r0
+0xb9 0x1f 0x00 0x00
+
+# CHECK: lrvr %r0, %r15
+0xb9 0x1f 0x00 0x0f
+
+# CHECK: lrvr %r15, %r0
+0xb9 0x1f 0x00 0xf0
+
+# CHECK: lrvr %r7, %r8
+0xb9 0x1f 0x00 0x78
+
+# CHECK: lrvr %r15, %r15
+0xb9 0x1f 0x00 0xff
+
+# CHECK: lrv %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x1e
+
+# CHECK: lrv %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x1e
+
+# CHECK: lrv %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x1e
+
+# CHECK: lrv %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x1e
+
+# CHECK: lrv %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x1e
+
+# CHECK: lrv %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x1e
+
+# CHECK: lrv %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x1e
+
+# CHECK: lrv %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x1e
+
+# CHECK: lrv %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x1e
+
+# CHECK: lrv %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x1e
+
+# CHECK: l %r0, 0
+0x58 0x00 0x00 0x00
+
+# CHECK: l %r0, 4095
+0x58 0x00 0x0f 0xff
+
+# CHECK: l %r0, 0(%r1)
+0x58 0x00 0x10 0x00
+
+# CHECK: l %r0, 0(%r15)
+0x58 0x00 0xf0 0x00
+
+# CHECK: l %r0, 4095(%r1,%r15)
+0x58 0x01 0xff 0xff
+
+# CHECK: l %r0, 4095(%r15,%r1)
+0x58 0x0f 0x1f 0xff
+
+# CHECK: l %r15, 0
+0x58 0xf0 0x00 0x00
+
+# CHECK: lt %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x12
+
+# CHECK: lt %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x12
+
+# CHECK: lt %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x12
+
+# CHECK: lt %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x12
+
+# CHECK: lt %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x12
+
+# CHECK: lt %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x12
+
+# CHECK: lt %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x12
+
+# CHECK: lt %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x12
+
+# CHECK: lt %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x12
+
+# CHECK: lt %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x12
+
+# CHECK: ltdbr %f0, %f9
+0xb3 0x12 0x00 0x09
+
+# CHECK: ltdbr %f0, %f15
+0xb3 0x12 0x00 0x0f
+
+# CHECK: ltdbr %f15, %f0
+0xb3 0x12 0x00 0xf0
+
+# CHECK: ltdbr %f15, %f9
+0xb3 0x12 0x00 0xf9
+
+# CHECK: ltebr %f0, %f9
+0xb3 0x02 0x00 0x09
+
+# CHECK: ltebr %f0, %f15
+0xb3 0x02 0x00 0x0f
+
+# CHECK: ltebr %f15, %f0
+0xb3 0x02 0x00 0xf0
+
+# CHECK: ltebr %f15, %f9
+0xb3 0x02 0x00 0xf9
+
+# CHECK: ltg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x02
+
+# CHECK: ltg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x02
+
+# CHECK: ltg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x02
+
+# CHECK: ltg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x02
+
+# CHECK: ltg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x02
+
+# CHECK: ltg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x02
+
+# CHECK: ltg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x02
+
+# CHECK: ltg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x02
+
+# CHECK: ltg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x02
+
+# CHECK: ltg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x02
+
+# CHECK: ltgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x32
+
+# CHECK: ltgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x32
+
+# CHECK: ltgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x32
+
+# CHECK: ltgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x32
+
+# CHECK: ltgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x32
+
+# CHECK: ltgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x32
+
+# CHECK: ltgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x32
+
+# CHECK: ltgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x32
+
+# CHECK: ltgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x32
+
+# CHECK: ltgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x32
+
+# CHECK: ltgfr %r0, %r9
+0xb9 0x12 0x00 0x09
+
+# CHECK: ltgfr %r0, %r15
+0xb9 0x12 0x00 0x0f
+
+# CHECK: ltgfr %r15, %r0
+0xb9 0x12 0x00 0xf0
+
+# CHECK: ltgfr %r15, %r9
+0xb9 0x12 0x00 0xf9
+
+# CHECK: ltgr %r0, %r9
+0xb9 0x02 0x00 0x09
+
+# CHECK: ltgr %r0, %r15
+0xb9 0x02 0x00 0x0f
+
+# CHECK: ltgr %r15, %r0
+0xb9 0x02 0x00 0xf0
+
+# CHECK: ltgr %r15, %r9
+0xb9 0x02 0x00 0xf9
+
+# CHECK: ltr %r0, %r9
+0x12 0x09
+
+# CHECK: ltr %r0, %r15
+0x12 0x0f
+
+# CHECK: ltr %r15, %r0
+0x12 0xf0
+
+# CHECK: ltr %r15, %r9
+0x12 0xf9
+
+# CHECK: ltxbr %f0, %f9
+0xb3 0x42 0x00 0x09
+
+# CHECK: ltxbr %f0, %f13
+0xb3 0x42 0x00 0x0d
+
+# CHECK: ltxbr %f13, %f0
+0xb3 0x42 0x00 0xd0
+
+# CHECK: ltxbr %f13, %f9
+0xb3 0x42 0x00 0xd9
+
+# CHECK: lxr %f0, %f8
+0xb3 0x65 0x00 0x08
+
+# CHECK: lxr %f0, %f13
+0xb3 0x65 0x00 0x0d
+
+# CHECK: lxr %f13, %f0
+0xb3 0x65 0x00 0xd0
+
+# CHECK: lxr %f13, %f9
+0xb3 0x65 0x00 0xd9
+
+# CHECK: ly %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x58
+
+# CHECK: ly %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x58
+
+# CHECK: ly %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x58
+
+# CHECK: ly %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x58
+
+# CHECK: ly %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x58
+
+# CHECK: ly %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x58
+
+# CHECK: ly %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x58
+
+# CHECK: ly %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x58
+
+# CHECK: ly %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x58
+
+# CHECK: ly %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x58
+
+# CHECK: lzdr %f0
+0xb3 0x75 0x00 0x00
+
+# CHECK: lzdr %f7
+0xb3 0x75 0x00 0x70
+
+# CHECK: lzdr %f15
+0xb3 0x75 0x00 0xf0
+
+# CHECK: lzer %f0
+0xb3 0x74 0x00 0x00
+
+# CHECK: lzer %f7
+0xb3 0x74 0x00 0x70
+
+# CHECK: lzer %f15
+0xb3 0x74 0x00 0xf0
+
+# CHECK: lzxr %f0
+0xb3 0x76 0x00 0x00
+
+# CHECK: lzxr %f8
+0xb3 0x76 0x00 0x80
+
+# CHECK: lzxr %f13
+0xb3 0x76 0x00 0xd0
+
+# CHECK: madbr %f0, %f0, %f0
+0xb3 0x1e 0x00 0x00
+
+# CHECK: madbr %f0, %f0, %f15
+0xb3 0x1e 0x00 0x0f
+
+# CHECK: madbr %f0, %f15, %f0
+0xb3 0x1e 0x00 0xf0
+
+# CHECK: madbr %f15, %f0, %f0
+0xb3 0x1e 0xf0 0x00
+
+# CHECK: madbr %f7, %f8, %f9
+0xb3 0x1e 0x70 0x89
+
+# CHECK: madbr %f15, %f15, %f15
+0xb3 0x1e 0xf0 0xff
+
+# CHECK: madb %f0, %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x1e
+
+# CHECK: madb %f0, %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x1e
+
+# CHECK: madb %f0, %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x1e
+
+# CHECK: madb %f0, %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x1e
+
+# CHECK: madb %f0, %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x1e
+
+# CHECK: madb %f0, %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x1e
+
+# CHECK: madb %f0, %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x1e
+
+# CHECK: madb %f15, %f0, 0
+0xed 0x00 0x00 0x00 0xf0 0x1e
+
+# CHECK: madb %f15, %f15, 0
+0xed 0xf0 0x00 0x00 0xf0 0x1e
+
+# CHECK: maebr %f0, %f0, %f0
+0xb3 0x0e 0x00 0x00
+
+# CHECK: maebr %f0, %f0, %f15
+0xb3 0x0e 0x00 0x0f
+
+# CHECK: maebr %f0, %f15, %f0
+0xb3 0x0e 0x00 0xf0
+
+# CHECK: maebr %f15, %f0, %f0
+0xb3 0x0e 0xf0 0x00
+
+# CHECK: maebr %f7, %f8, %f9
+0xb3 0x0e 0x70 0x89
+
+# CHECK: maebr %f15, %f15, %f15
+0xb3 0x0e 0xf0 0xff
+
+# CHECK: maeb %f0, %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x0e
+
+# CHECK: maeb %f0, %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x0e
+
+# CHECK: maeb %f0, %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x0e
+
+# CHECK: maeb %f0, %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x0e
+
+# CHECK: maeb %f0, %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x0e
+
+# CHECK: maeb %f0, %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x0e
+
+# CHECK: maeb %f0, %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x0e
+
+# CHECK: maeb %f15, %f0, 0
+0xed 0x00 0x00 0x00 0xf0 0x0e
+
+# CHECK: maeb %f15, %f15, 0
+0xed 0xf0 0x00 0x00 0xf0 0x0e
+
+# CHECK: mdbr %f0, %f0
+0xb3 0x1c 0x00 0x00
+
+# CHECK: mdbr %f0, %f15
+0xb3 0x1c 0x00 0x0f
+
+# CHECK: mdbr %f7, %f8
+0xb3 0x1c 0x00 0x78
+
+# CHECK: mdbr %f15, %f0
+0xb3 0x1c 0x00 0xf0
+
+# CHECK: mdb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x1c
+
+# CHECK: mdb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x1c
+
+# CHECK: mdb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x1c
+
+# CHECK: mdb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x1c
+
+# CHECK: mdb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x1c
+
+# CHECK: mdb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x1c
+
+# CHECK: mdb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x1c
+
+# CHECK: mdebr %f0, %f0
+0xb3 0x0c 0x00 0x00
+
+# CHECK: mdebr %f0, %f15
+0xb3 0x0c 0x00 0x0f
+
+# CHECK: mdebr %f7, %f8
+0xb3 0x0c 0x00 0x78
+
+# CHECK: mdebr %f15, %f0
+0xb3 0x0c 0x00 0xf0
+
+# CHECK: mdeb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x0c
+
+# CHECK: mdeb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x0c
+
+# CHECK: mdeb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x0c
+
+# CHECK: mdeb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x0c
+
+# CHECK: mdeb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x0c
+
+# CHECK: mdeb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x0c
+
+# CHECK: mdeb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x0c
+
+# CHECK: meebr %f0, %f0
+0xb3 0x17 0x00 0x00
+
+# CHECK: meebr %f0, %f15
+0xb3 0x17 0x00 0x0f
+
+# CHECK: meebr %f7, %f8
+0xb3 0x17 0x00 0x78
+
+# CHECK: meebr %f15, %f0
+0xb3 0x17 0x00 0xf0
+
+# CHECK: meeb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x17
+
+# CHECK: meeb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x17
+
+# CHECK: meeb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x17
+
+# CHECK: meeb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x17
+
+# CHECK: meeb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x17
+
+# CHECK: meeb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x17
+
+# CHECK: meeb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x17
+
+# CHECK: mghi %r0, -32768
+0xa7 0x0d 0x80 0x00
+
+# CHECK: mghi %r0, -1
+0xa7 0x0d 0xff 0xff
+
+# CHECK: mghi %r0, 0
+0xa7 0x0d 0x00 0x00
+
+# CHECK: mghi %r0, 1
+0xa7 0x0d 0x00 0x01
+
+# CHECK: mghi %r0, 32767
+0xa7 0x0d 0x7f 0xff
+
+# CHECK: mghi %r15, 0
+0xa7 0xfd 0x00 0x00
+
+# CHECK: mhi %r0, -32768
+0xa7 0x0c 0x80 0x00
+
+# CHECK: mhi %r0, -1
+0xa7 0x0c 0xff 0xff
+
+# CHECK: mhi %r0, 0
+0xa7 0x0c 0x00 0x00
+
+# CHECK: mhi %r0, 1
+0xa7 0x0c 0x00 0x01
+
+# CHECK: mhi %r0, 32767
+0xa7 0x0c 0x7f 0xff
+
+# CHECK: mhi %r15, 0
+0xa7 0xfc 0x00 0x00
+
+# CHECK: mh %r0, 0
+0x4c 0x00 0x00 0x00
+
+# CHECK: mh %r0, 4095
+0x4c 0x00 0x0f 0xff
+
+# CHECK: mh %r0, 0(%r1)
+0x4c 0x00 0x10 0x00
+
+# CHECK: mh %r0, 0(%r15)
+0x4c 0x00 0xf0 0x00
+
+# CHECK: mh %r0, 4095(%r1,%r15)
+0x4c 0x01 0xff 0xff
+
+# CHECK: mh %r0, 4095(%r15,%r1)
+0x4c 0x0f 0x1f 0xff
+
+# CHECK: mh %r15, 0
+0x4c 0xf0 0x00 0x00
+
+# CHECK: mhy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x7c
+
+# CHECK: mhy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x7c
+
+# CHECK: mhy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x7c
+
+# CHECK: mhy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x7c
+
+# CHECK: mhy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x7c
+
+# CHECK: mhy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x7c
+
+# CHECK: mhy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x7c
+
+# CHECK: mhy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x7c
+
+# CHECK: mhy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x7c
+
+# CHECK: mhy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x7c
+
+# CHECK: mlgr %r0, %r0
+0xb9 0x86 0x00 0x00
+
+# CHECK: mlgr %r0, %r15
+0xb9 0x86 0x00 0x0f
+
+# CHECK: mlgr %r14, %r0
+0xb9 0x86 0x00 0xe0
+
+# CHECK: mlgr %r6, %r9
+0xb9 0x86 0x00 0x69
+
+# CHECK: mlg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x86
+
+# CHECK: mlg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x86
+
+# CHECK: mlg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x86
+
+# CHECK: mlg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x86
+
+# CHECK: mlg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x86
+
+# CHECK: mlg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x86
+
+# CHECK: mlg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x86
+
+# CHECK: mlg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x86
+
+# CHECK: mlg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x86
+
+# CHECK: mlg %r14, 0
+0xe3 0xe0 0x00 0x00 0x00 0x86
+
+# CHECK: msdbr %f0, %f0, %f0
+0xb3 0x1f 0x00 0x00
+
+# CHECK: msdbr %f0, %f0, %f15
+0xb3 0x1f 0x00 0x0f
+
+# CHECK: msdbr %f0, %f15, %f0
+0xb3 0x1f 0x00 0xf0
+
+# CHECK: msdbr %f15, %f0, %f0
+0xb3 0x1f 0xf0 0x00
+
+# CHECK: msdbr %f7, %f8, %f9
+0xb3 0x1f 0x70 0x89
+
+# CHECK: msdbr %f15, %f15, %f15
+0xb3 0x1f 0xf0 0xff
+
+# CHECK: msdb %f0, %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x1f
+
+# CHECK: msdb %f0, %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x1f
+
+# CHECK: msdb %f0, %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x1f
+
+# CHECK: msdb %f0, %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x1f
+
+# CHECK: msdb %f0, %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x1f
+
+# CHECK: msdb %f0, %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x1f
+
+# CHECK: msdb %f0, %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x1f
+
+# CHECK: msdb %f15, %f0, 0
+0xed 0x00 0x00 0x00 0xf0 0x1f
+
+# CHECK: msdb %f15, %f15, 0
+0xed 0xf0 0x00 0x00 0xf0 0x1f
+
+# CHECK: msebr %f0, %f0, %f0
+0xb3 0x0f 0x00 0x00
+
+# CHECK: msebr %f0, %f0, %f15
+0xb3 0x0f 0x00 0x0f
+
+# CHECK: msebr %f0, %f15, %f0
+0xb3 0x0f 0x00 0xf0
+
+# CHECK: msebr %f15, %f0, %f0
+0xb3 0x0f 0xf0 0x00
+
+# CHECK: msebr %f7, %f8, %f9
+0xb3 0x0f 0x70 0x89
+
+# CHECK: msebr %f15, %f15, %f15
+0xb3 0x0f 0xf0 0xff
+
+# CHECK: mseb %f0, %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x0f
+
+# CHECK: mseb %f0, %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x0f
+
+# CHECK: mseb %f0, %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x0f
+
+# CHECK: mseb %f0, %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x0f
+
+# CHECK: mseb %f0, %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x0f
+
+# CHECK: mseb %f0, %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x0f
+
+# CHECK: mseb %f0, %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x0f
+
+# CHECK: mseb %f15, %f0, 0
+0xed 0x00 0x00 0x00 0xf0 0x0f
+
+# CHECK: mseb %f15, %f15, 0
+0xed 0xf0 0x00 0x00 0xf0 0x0f
+
+# CHECK: msfi %r0, -2147483648
+0xc2 0x01 0x80 0x00 0x00 0x00
+
+# CHECK: msfi %r0, -1
+0xc2 0x01 0xff 0xff 0xff 0xff
+
+# CHECK: msfi %r0, 0
+0xc2 0x01 0x00 0x00 0x00 0x00
+
+# CHECK: msfi %r0, 1
+0xc2 0x01 0x00 0x00 0x00 0x01
+
+# CHECK: msfi %r0, 2147483647
+0xc2 0x01 0x7f 0xff 0xff 0xff
+
+# CHECK: msfi %r15, 0
+0xc2 0xf1 0x00 0x00 0x00 0x00
+
+# CHECK: msgfi %r0, -2147483648
+0xc2 0x00 0x80 0x00 0x00 0x00
+
+# CHECK: msgfi %r0, -1
+0xc2 0x00 0xff 0xff 0xff 0xff
+
+# CHECK: msgfi %r0, 0
+0xc2 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: msgfi %r0, 1
+0xc2 0x00 0x00 0x00 0x00 0x01
+
+# CHECK: msgfi %r0, 2147483647
+0xc2 0x00 0x7f 0xff 0xff 0xff
+
+# CHECK: msgfi %r15, 0
+0xc2 0xf0 0x00 0x00 0x00 0x00
+
+# CHECK: msgfr %r0, %r0
+0xb9 0x1c 0x00 0x00
+
+# CHECK: msgfr %r0, %r15
+0xb9 0x1c 0x00 0x0f
+
+# CHECK: msgfr %r15, %r0
+0xb9 0x1c 0x00 0xf0
+
+# CHECK: msgfr %r7, %r8
+0xb9 0x1c 0x00 0x78
+
+# CHECK: msgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x1c
+
+# CHECK: msgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x1c
+
+# CHECK: msgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x1c
+
+# CHECK: msgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x1c
+
+# CHECK: msgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x1c
+
+# CHECK: msgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x1c
+
+# CHECK: msgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x1c
+
+# CHECK: msgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x1c
+
+# CHECK: msgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x1c
+
+# CHECK: msgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x1c
+
+# CHECK: msgr %r0, %r0
+0xb9 0x0c 0x00 0x00
+
+# CHECK: msgr %r0, %r15
+0xb9 0x0c 0x00 0x0f
+
+# CHECK: msgr %r15, %r0
+0xb9 0x0c 0x00 0xf0
+
+# CHECK: msgr %r7, %r8
+0xb9 0x0c 0x00 0x78
+
+# CHECK: msg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x0c
+
+# CHECK: msg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x0c
+
+# CHECK: msg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x0c
+
+# CHECK: msg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x0c
+
+# CHECK: msg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x0c
+
+# CHECK: msg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x0c
+
+# CHECK: msg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x0c
+
+# CHECK: msg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x0c
+
+# CHECK: msg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x0c
+
+# CHECK: msg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x0c
+
+# CHECK: msr %r0, %r0
+0xb2 0x52 0x00 0x00
+
+# CHECK: msr %r0, %r15
+0xb2 0x52 0x00 0x0f
+
+# CHECK: msr %r15, %r0
+0xb2 0x52 0x00 0xf0
+
+# CHECK: msr %r7, %r8
+0xb2 0x52 0x00 0x78
+
+# CHECK: ms %r0, 0
+0x71 0x00 0x00 0x00
+
+# CHECK: ms %r0, 4095
+0x71 0x00 0x0f 0xff
+
+# CHECK: ms %r0, 0(%r1)
+0x71 0x00 0x10 0x00
+
+# CHECK: ms %r0, 0(%r15)
+0x71 0x00 0xf0 0x00
+
+# CHECK: ms %r0, 4095(%r1,%r15)
+0x71 0x01 0xff 0xff
+
+# CHECK: ms %r0, 4095(%r15,%r1)
+0x71 0x0f 0x1f 0xff
+
+# CHECK: ms %r15, 0
+0x71 0xf0 0x00 0x00
+
+# CHECK: msy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x51
+
+# CHECK: msy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x51
+
+# CHECK: msy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x51
+
+# CHECK: msy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x51
+
+# CHECK: msy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x51
+
+# CHECK: msy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x51
+
+# CHECK: msy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x51
+
+# CHECK: msy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x51
+
+# CHECK: msy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x51
+
+# CHECK: msy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x51
+
+# CHECK: mvc 0(1), 0
+0xd2 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: mvc 0(1), 0(%r1)
+0xd2 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: mvc 0(1), 0(%r15)
+0xd2 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: mvc 0(1), 4095
+0xd2 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: mvc 0(1), 4095(%r1)
+0xd2 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: mvc 0(1), 4095(%r15)
+0xd2 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: mvc 0(1,%r1), 0
+0xd2 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: mvc 0(1,%r15), 0
+0xd2 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: mvc 4095(1,%r1), 0
+0xd2 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: mvc 4095(1,%r15), 0
+0xd2 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: mvc 0(256,%r1), 0
+0xd2 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: mvc 0(256,%r15), 0
+0xd2 0xff 0xf0 0x00 0x00 0x00
+
+# CHECK: mvghi 0, 0
+0xe5 0x48 0x00 0x00 0x00 0x00
+
+# CHECK: mvghi 4095, 0
+0xe5 0x48 0x0f 0xff 0x00 0x00
+
+# CHECK: mvghi 0, -32768
+0xe5 0x48 0x00 0x00 0x80 0x00
+
+# CHECK: mvghi 0, -1
+0xe5 0x48 0x00 0x00 0xff 0xff
+
+# CHECK: mvghi 0, 0
+0xe5 0x48 0x00 0x00 0x00 0x00
+
+# CHECK: mvghi 0, 1
+0xe5 0x48 0x00 0x00 0x00 0x01
+
+# CHECK: mvghi 0, 32767
+0xe5 0x48 0x00 0x00 0x7f 0xff
+
+# CHECK: mvghi 0(%r1), 42
+0xe5 0x48 0x10 0x00 0x00 0x2a
+
+# CHECK: mvghi 0(%r15), 42
+0xe5 0x48 0xf0 0x00 0x00 0x2a
+
+# CHECK: mvghi 4095(%r1), 42
+0xe5 0x48 0x1f 0xff 0x00 0x2a
+
+# CHECK: mvghi 4095(%r15), 42
+0xe5 0x48 0xff 0xff 0x00 0x2a
+
+# CHECK: mvhhi 0, 0
+0xe5 0x44 0x00 0x00 0x00 0x00
+
+# CHECK: mvhhi 4095, 0
+0xe5 0x44 0x0f 0xff 0x00 0x00
+
+# CHECK: mvhhi 0, -32768
+0xe5 0x44 0x00 0x00 0x80 0x00
+
+# CHECK: mvhhi 0, -1
+0xe5 0x44 0x00 0x00 0xff 0xff
+
+# CHECK: mvhhi 0, 0
+0xe5 0x44 0x00 0x00 0x00 0x00
+
+# CHECK: mvhhi 0, 1
+0xe5 0x44 0x00 0x00 0x00 0x01
+
+# CHECK: mvhhi 0, 32767
+0xe5 0x44 0x00 0x00 0x7f 0xff
+
+# CHECK: mvhhi 0(%r1), 42
+0xe5 0x44 0x10 0x00 0x00 0x2a
+
+# CHECK: mvhhi 0(%r15), 42
+0xe5 0x44 0xf0 0x00 0x00 0x2a
+
+# CHECK: mvhhi 4095(%r1), 42
+0xe5 0x44 0x1f 0xff 0x00 0x2a
+
+# CHECK: mvhhi 4095(%r15), 42
+0xe5 0x44 0xff 0xff 0x00 0x2a
+
+# CHECK: mvhi 0, 0
+0xe5 0x4c 0x00 0x00 0x00 0x00
+
+# CHECK: mvhi 4095, 0
+0xe5 0x4c 0x0f 0xff 0x00 0x00
+
+# CHECK: mvhi 0, -32768
+0xe5 0x4c 0x00 0x00 0x80 0x00
+
+# CHECK: mvhi 0, -1
+0xe5 0x4c 0x00 0x00 0xff 0xff
+
+# CHECK: mvhi 0, 0
+0xe5 0x4c 0x00 0x00 0x00 0x00
+
+# CHECK: mvhi 0, 1
+0xe5 0x4c 0x00 0x00 0x00 0x01
+
+# CHECK: mvhi 0, 32767
+0xe5 0x4c 0x00 0x00 0x7f 0xff
+
+# CHECK: mvhi 0(%r1), 42
+0xe5 0x4c 0x10 0x00 0x00 0x2a
+
+# CHECK: mvhi 0(%r15), 42
+0xe5 0x4c 0xf0 0x00 0x00 0x2a
+
+# CHECK: mvhi 4095(%r1), 42
+0xe5 0x4c 0x1f 0xff 0x00 0x2a
+
+# CHECK: mvhi 4095(%r15), 42
+0xe5 0x4c 0xff 0xff 0x00 0x2a
+
+# CHECK: mvi 0, 0
+0x92 0x00 0x00 0x00
+
+# CHECK: mvi 4095, 0
+0x92 0x00 0x0f 0xff
+
+# CHECK: mvi 0, 255
+0x92 0xff 0x00 0x00
+
+# CHECK: mvi 0(%r1), 42
+0x92 0x2a 0x10 0x00
+
+# CHECK: mvi 0(%r15), 42
+0x92 0x2a 0xf0 0x00
+
+# CHECK: mvi 4095(%r1), 42
+0x92 0x2a 0x1f 0xff
+
+# CHECK: mvi 4095(%r15), 42
+0x92 0x2a 0xff 0xff
+
+# CHECK: mviy -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x52
+
+# CHECK: mviy -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x52
+
+# CHECK: mviy 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x52
+
+# CHECK: mviy 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x52
+
+# CHECK: mviy 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x52
+
+# CHECK: mviy 0, 255
+0xeb 0xff 0x00 0x00 0x00 0x52
+
+# CHECK: mviy 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x52
+
+# CHECK: mviy 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x52
+
+# CHECK: mviy 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x52
+
+# CHECK: mviy 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x52
+
+# CHECK: mvst %r0, %r0
+0xb2 0x55 0x00 0x00
+
+# CHECK: mvst %r0, %r15
+0xb2 0x55 0x00 0x0f
+
+# CHECK: mvst %r15, %r0
+0xb2 0x55 0x00 0xf0
+
+# CHECK: mvst %r7, %r8
+0xb2 0x55 0x00 0x78
+
+# CHECK: mxbr %f0, %f0
+0xb3 0x4c 0x00 0x00
+
+# CHECK: mxbr %f0, %f13
+0xb3 0x4c 0x00 0x0d
+
+# CHECK: mxbr %f8, %f5
+0xb3 0x4c 0x00 0x85
+
+# CHECK: mxbr %f13, %f13
+0xb3 0x4c 0x00 0xdd
+
+# CHECK: mxdbr %f0, %f0
+0xb3 0x07 0x00 0x00
+
+# CHECK: mxdbr %f0, %f15
+0xb3 0x07 0x00 0x0f
+
+# CHECK: mxdbr %f8, %f8
+0xb3 0x07 0x00 0x88
+
+# CHECK: mxdbr %f13, %f0
+0xb3 0x07 0x00 0xd0
+
+# CHECK: mxdb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x07
+
+# CHECK: mxdb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x07
+
+# CHECK: mxdb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x07
+
+# CHECK: mxdb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x07
+
+# CHECK: mxdb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x07
+
+# CHECK: mxdb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x07
+
+# CHECK: mxdb %f13, 0
+0xed 0xd0 0x00 0x00 0x00 0x07
+
+# CHECK: nc 0(1), 0
+0xd4 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: nc 0(1), 0(%r1)
+0xd4 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: nc 0(1), 0(%r15)
+0xd4 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: nc 0(1), 4095
+0xd4 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: nc 0(1), 4095(%r1)
+0xd4 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: nc 0(1), 4095(%r15)
+0xd4 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: nc 0(1,%r1), 0
+0xd4 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: nc 0(1,%r15), 0
+0xd4 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: nc 4095(1,%r1), 0
+0xd4 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: nc 4095(1,%r15), 0
+0xd4 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: nc 0(256,%r1), 0
+0xd4 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: nc 0(256,%r15), 0
+0xd4 0xff 0xf0 0x00 0x00 0x00
+
+# CHECK: ngr %r0, %r0
+0xb9 0x80 0x00 0x00
+
+# CHECK: ngr %r0, %r15
+0xb9 0x80 0x00 0x0f
+
+# CHECK: ngr %r15, %r0
+0xb9 0x80 0x00 0xf0
+
+# CHECK: ngr %r7, %r8
+0xb9 0x80 0x00 0x78
+
+# CHECK: ng %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x80
+
+# CHECK: ngrk %r0, %r0, %r0
+0xb9 0xe4 0x00 0x00
+
+# CHECK: ngrk %r2, %r3, %r4
+0xb9 0xe4 0x40 0x23
+
+# CHECK: ng %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x80
+
+# CHECK: ng %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x80
+
+# CHECK: ng %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x80
+
+# CHECK: ng %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x80
+
+# CHECK: ng %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x80
+
+# CHECK: ng %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x80
+
+# CHECK: ng %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x80
+
+# CHECK: ng %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x80
+
+# CHECK: ng %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x80
+
+# CHECK: nihf %r0, 0
+0xc0 0x0a 0x00 0x00 0x00 0x00
+
+# CHECK: nihf %r0, 4294967295
+0xc0 0x0a 0xff 0xff 0xff 0xff
+
+# CHECK: nihf %r15, 0
+0xc0 0xfa 0x00 0x00 0x00 0x00
+
+# CHECK: nihh %r0, 0
+0xa5 0x04 0x00 0x00
+
+# CHECK: nihh %r0, 32768
+0xa5 0x04 0x80 0x00
+
+# CHECK: nihh %r0, 65535
+0xa5 0x04 0xff 0xff
+
+# CHECK: nihh %r15, 0
+0xa5 0xf4 0x00 0x00
+
+# CHECK: nihl %r0, 0
+0xa5 0x05 0x00 0x00
+
+# CHECK: nihl %r0, 32768
+0xa5 0x05 0x80 0x00
+
+# CHECK: nihl %r0, 65535
+0xa5 0x05 0xff 0xff
+
+# CHECK: nihl %r15, 0
+0xa5 0xf5 0x00 0x00
+
+# CHECK: nilf %r0, 0
+0xc0 0x0b 0x00 0x00 0x00 0x00
+
+# CHECK: nilf %r0, 4294967295
+0xc0 0x0b 0xff 0xff 0xff 0xff
+
+# CHECK: nilf %r15, 0
+0xc0 0xfb 0x00 0x00 0x00 0x00
+
+# CHECK: nilh %r0, 0
+0xa5 0x06 0x00 0x00
+
+# CHECK: nilh %r0, 32768
+0xa5 0x06 0x80 0x00
+
+# CHECK: nilh %r0, 65535
+0xa5 0x06 0xff 0xff
+
+# CHECK: nilh %r15, 0
+0xa5 0xf6 0x00 0x00
+
+# CHECK: nill %r0, 0
+0xa5 0x07 0x00 0x00
+
+# CHECK: nill %r0, 32768
+0xa5 0x07 0x80 0x00
+
+# CHECK: nill %r0, 65535
+0xa5 0x07 0xff 0xff
+
+# CHECK: nill %r15, 0
+0xa5 0xf7 0x00 0x00
+
+# CHECK: ni 0, 0
+0x94 0x00 0x00 0x00
+
+# CHECK: ni 4095, 0
+0x94 0x00 0x0f 0xff
+
+# CHECK: ni 0, 255
+0x94 0xff 0x00 0x00
+
+# CHECK: ni 0(%r1), 42
+0x94 0x2a 0x10 0x00
+
+# CHECK: ni 0(%r15), 42
+0x94 0x2a 0xf0 0x00
+
+# CHECK: ni 4095(%r1), 42
+0x94 0x2a 0x1f 0xff
+
+# CHECK: ni 4095(%r15), 42
+0x94 0x2a 0xff 0xff
+
+# CHECK: niy -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x54
+
+# CHECK: niy -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x54
+
+# CHECK: niy 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x54
+
+# CHECK: niy 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x54
+
+# CHECK: niy 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x54
+
+# CHECK: niy 0, 255
+0xeb 0xff 0x00 0x00 0x00 0x54
+
+# CHECK: niy 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x54
+
+# CHECK: niy 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x54
+
+# CHECK: niy 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x54
+
+# CHECK: niy 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x54
+
+# CHECK: nr %r0, %r0
+0x14 0x00
+
+# CHECK: nr %r0, %r15
+0x14 0x0f
+
+# CHECK: nr %r15, %r0
+0x14 0xf0
+
+# CHECK: nr %r7, %r8
+0x14 0x78
+
+# CHECK: nrk %r0, %r0, %r0
+0xb9 0xf4 0x00 0x00
+
+# CHECK: nrk %r2, %r3, %r4
+0xb9 0xf4 0x40 0x23
+
+# CHECK: n %r0, 0
+0x54 0x00 0x00 0x00
+
+# CHECK: n %r0, 4095
+0x54 0x00 0x0f 0xff
+
+# CHECK: n %r0, 0(%r1)
+0x54 0x00 0x10 0x00
+
+# CHECK: n %r0, 0(%r15)
+0x54 0x00 0xf0 0x00
+
+# CHECK: n %r0, 4095(%r1,%r15)
+0x54 0x01 0xff 0xff
+
+# CHECK: n %r0, 4095(%r15,%r1)
+0x54 0x0f 0x1f 0xff
+
+# CHECK: n %r15, 0
+0x54 0xf0 0x00 0x00
+
+# CHECK: ny %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x54
+
+# CHECK: ny %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x54
+
+# CHECK: ny %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x54
+
+# CHECK: ny %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x54
+
+# CHECK: ny %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x54
+
+# CHECK: ny %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x54
+
+# CHECK: ny %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x54
+
+# CHECK: ny %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x54
+
+# CHECK: ny %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x54
+
+# CHECK: ny %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x54
+
+# CHECK: oc 0(1), 0
+0xd6 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: oc 0(1), 0(%r1)
+0xd6 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: oc 0(1), 0(%r15)
+0xd6 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: oc 0(1), 4095
+0xd6 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: oc 0(1), 4095(%r1)
+0xd6 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: oc 0(1), 4095(%r15)
+0xd6 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: oc 0(1,%r1), 0
+0xd6 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: oc 0(1,%r15), 0
+0xd6 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: oc 4095(1,%r1), 0
+0xd6 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: oc 4095(1,%r15), 0
+0xd6 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: oc 0(256,%r1), 0
+0xd6 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: oc 0(256,%r15), 0
+0xd6 0xff 0xf0 0x00 0x00 0x00
+
+# CHECK: ogr %r0, %r0
+0xb9 0x81 0x00 0x00
+
+# CHECK: ogr %r0, %r15
+0xb9 0x81 0x00 0x0f
+
+# CHECK: ogr %r15, %r0
+0xb9 0x81 0x00 0xf0
+
+# CHECK: ogr %r7, %r8
+0xb9 0x81 0x00 0x78
+
+# CHECK: ogrk %r0, %r0, %r0
+0xb9 0xe6 0x00 0x00
+
+# CHECK: ogrk %r2, %r3, %r4
+0xb9 0xe6 0x40 0x23
+
+# CHECK: og %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x81
+
+# CHECK: og %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x81
+
+# CHECK: og %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x81
+
+# CHECK: og %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x81
+
+# CHECK: og %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x81
+
+# CHECK: og %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x81
+
+# CHECK: og %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x81
+
+# CHECK: og %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x81
+
+# CHECK: og %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x81
+
+# CHECK: og %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x81
+
+# CHECK: oihf %r0, 0
+0xc0 0x0c 0x00 0x00 0x00 0x00
+
+# CHECK: oihf %r0, 4294967295
+0xc0 0x0c 0xff 0xff 0xff 0xff
+
+# CHECK: oihf %r15, 0
+0xc0 0xfc 0x00 0x00 0x00 0x00
+
+# CHECK: oihh %r0, 0
+0xa5 0x08 0x00 0x00
+
+# CHECK: oihh %r0, 32768
+0xa5 0x08 0x80 0x00
+
+# CHECK: oihh %r0, 65535
+0xa5 0x08 0xff 0xff
+
+# CHECK: oihh %r15, 0
+0xa5 0xf8 0x00 0x00
+
+# CHECK: oihl %r0, 0
+0xa5 0x09 0x00 0x00
+
+# CHECK: oihl %r0, 32768
+0xa5 0x09 0x80 0x00
+
+# CHECK: oihl %r0, 65535
+0xa5 0x09 0xff 0xff
+
+# CHECK: oihl %r15, 0
+0xa5 0xf9 0x00 0x00
+
+# CHECK: oilf %r0, 0
+0xc0 0x0d 0x00 0x00 0x00 0x00
+
+# CHECK: oilf %r0, 4294967295
+0xc0 0x0d 0xff 0xff 0xff 0xff
+
+# CHECK: oilf %r15, 0
+0xc0 0xfd 0x00 0x00 0x00 0x00
+
+# CHECK: oilh %r0, 0
+0xa5 0x0a 0x00 0x00
+
+# CHECK: oilh %r0, 32768
+0xa5 0x0a 0x80 0x00
+
+# CHECK: oilh %r0, 65535
+0xa5 0x0a 0xff 0xff
+
+# CHECK: oilh %r15, 0
+0xa5 0xfa 0x00 0x00
+
+# CHECK: oill %r0, 0
+0xa5 0x0b 0x00 0x00
+
+# CHECK: oill %r0, 32768
+0xa5 0x0b 0x80 0x00
+
+# CHECK: oill %r0, 65535
+0xa5 0x0b 0xff 0xff
+
+# CHECK: oill %r15, 0
+0xa5 0xfb 0x00 0x00
+
+# CHECK: oi 0, 0
+0x96 0x00 0x00 0x00
+
+# CHECK: oi 4095, 0
+0x96 0x00 0x0f 0xff
+
+# CHECK: oi 0, 255
+0x96 0xff 0x00 0x00
+
+# CHECK: oi 0(%r1), 42
+0x96 0x2a 0x10 0x00
+
+# CHECK: oi 0(%r15), 42
+0x96 0x2a 0xf0 0x00
+
+# CHECK: oi 4095(%r1), 42
+0x96 0x2a 0x1f 0xff
+
+# CHECK: oi 4095(%r15), 42
+0x96 0x2a 0xff 0xff
+
+# CHECK: oiy -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x56
+
+# CHECK: oiy -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x56
+
+# CHECK: oiy 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x56
+
+# CHECK: oiy 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x56
+
+# CHECK: oiy 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x56
+
+# CHECK: oiy 0, 255
+0xeb 0xff 0x00 0x00 0x00 0x56
+
+# CHECK: oiy 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x56
+
+# CHECK: oiy 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x56
+
+# CHECK: oiy 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x56
+
+# CHECK: oiy 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x56
+
+# CHECK: or %r0, %r0
+0x16 0x00
+
+# CHECK: or %r0, %r15
+0x16 0x0f
+
+# CHECK: or %r15, %r0
+0x16 0xf0
+
+# CHECK: or %r7, %r8
+0x16 0x78
+
+# CHECK: ork %r0, %r0, %r0
+0xb9 0xf6 0x00 0x00
+
+# CHECK: ork %r2, %r3, %r4
+0xb9 0xf6 0x40 0x23
+
+# CHECK: o %r0, 0
+0x56 0x00 0x00 0x00
+
+# CHECK: o %r0, 4095
+0x56 0x00 0x0f 0xff
+
+# CHECK: o %r0, 0(%r1)
+0x56 0x00 0x10 0x00
+
+# CHECK: o %r0, 0(%r15)
+0x56 0x00 0xf0 0x00
+
+# CHECK: o %r0, 4095(%r1,%r15)
+0x56 0x01 0xff 0xff
+
+# CHECK: o %r0, 4095(%r15,%r1)
+0x56 0x0f 0x1f 0xff
+
+# CHECK: o %r15, 0
+0x56 0xf0 0x00 0x00
+
+# CHECK: oy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x56
+
+# CHECK: oy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x56
+
+# CHECK: oy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x56
+
+# CHECK: oy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x56
+
+# CHECK: oy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x56
+
+# CHECK: oy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x56
+
+# CHECK: oy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x56
+
+# CHECK: oy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x56
+
+# CHECK: oy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x56
+
+# CHECK: oy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x56
+
+# CHECK: pfd 0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x36
+
+# CHECK: pfd 0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x36
+
+# CHECK: pfd 0, 0
+0xe3 0x00 0x00 0x00 0x00 0x36
+
+# CHECK: pfd 0, 1
+0xe3 0x00 0x00 0x01 0x00 0x36
+
+# CHECK: pfd 0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x36
+
+# CHECK: pfd 0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x36
+
+# CHECK: pfd 0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x36
+
+# CHECK: pfd 0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x36
+
+# CHECK: pfd 0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x36
+
+# CHECK: pfd 15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x36
+
+# CHECK: risbg %r0, %r0, 0, 0, 0
+0xec 0x00 0x00 0x00 0x00 0x55
+
+# CHECK: risbg %r0, %r0, 0, 0, 63
+0xec 0x00 0x00 0x00 0x3f 0x55
+
+# CHECK: risbg %r0, %r0, 0, 255, 0
+0xec 0x00 0x00 0xff 0x00 0x55
+
+# CHECK: risbg %r0, %r0, 255, 0, 0
+0xec 0x00 0xff 0x00 0x00 0x55
+
+# CHECK: risbg %r0, %r15, 0, 0, 0
+0xec 0x0f 0x00 0x00 0x00 0x55
+
+# CHECK: risbg %r15, %r0, 0, 0, 0
+0xec 0xf0 0x00 0x00 0x00 0x55
+
+# CHECK: risbg %r4, %r5, 6, 7, 8
+0xec 0x45 0x06 0x07 0x08 0x55
+
+# CHECK: risbhg %r0, %r0, 0, 0, 0
+0xec 0x00 0x00 0x00 0x00 0x5d
+
+# CHECK: risbhg %r0, %r0, 0, 0, 63
+0xec 0x00 0x00 0x00 0x3f 0x5d
+
+# CHECK: risbhg %r0, %r0, 0, 255, 0
+0xec 0x00 0x00 0xff 0x00 0x5d
+
+# CHECK: risbhg %r0, %r0, 255, 0, 0
+0xec 0x00 0xff 0x00 0x00 0x5d
+
+# CHECK: risbhg %r0, %r15, 0, 0, 0
+0xec 0x0f 0x00 0x00 0x00 0x5d
+
+# CHECK: risbhg %r15, %r0, 0, 0, 0
+0xec 0xf0 0x00 0x00 0x00 0x5d
+
+# CHECK: risbhg %r4, %r5, 6, 7, 8
+0xec 0x45 0x06 0x07 0x08 0x5d
+
+# CHECK: risblg %r0, %r0, 0, 0, 0
+0xec 0x00 0x00 0x00 0x00 0x51
+
+# CHECK: risblg %r0, %r0, 0, 0, 63
+0xec 0x00 0x00 0x00 0x3f 0x51
+
+# CHECK: risblg %r0, %r0, 0, 255, 0
+0xec 0x00 0x00 0xff 0x00 0x51
+
+# CHECK: risblg %r0, %r0, 255, 0, 0
+0xec 0x00 0xff 0x00 0x00 0x51
+
+# CHECK: risblg %r0, %r15, 0, 0, 0
+0xec 0x0f 0x00 0x00 0x00 0x51
+
+# CHECK: risblg %r15, %r0, 0, 0, 0
+0xec 0xf0 0x00 0x00 0x00 0x51
+
+# CHECK: risblg %r4, %r5, 6, 7, 8
+0xec 0x45 0x06 0x07 0x08 0x51
+
+# CHECK: rnsbg %r0, %r0, 0, 0, 0
+0xec 0x00 0x00 0x00 0x00 0x54
+
+# CHECK: rnsbg %r0, %r0, 0, 0, 63
+0xec 0x00 0x00 0x00 0x3f 0x54
+
+# CHECK: rnsbg %r0, %r0, 0, 255, 0
+0xec 0x00 0x00 0xff 0x00 0x54
+
+# CHECK: rnsbg %r0, %r0, 255, 0, 0
+0xec 0x00 0xff 0x00 0x00 0x54
+
+# CHECK: rnsbg %r0, %r15, 0, 0, 0
+0xec 0x0f 0x00 0x00 0x00 0x54
+
+# CHECK: rnsbg %r15, %r0, 0, 0, 0
+0xec 0xf0 0x00 0x00 0x00 0x54
+
+# CHECK: rnsbg %r4, %r5, 6, 7, 8
+0xec 0x45 0x06 0x07 0x08 0x54
+
+# CHECK: rosbg %r0, %r0, 0, 0, 0
+0xec 0x00 0x00 0x00 0x00 0x56
+
+# CHECK: rosbg %r0, %r0, 0, 0, 63
+0xec 0x00 0x00 0x00 0x3f 0x56
+
+# CHECK: rosbg %r0, %r0, 0, 255, 0
+0xec 0x00 0x00 0xff 0x00 0x56
+
+# CHECK: rosbg %r0, %r0, 255, 0, 0
+0xec 0x00 0xff 0x00 0x00 0x56
+
+# CHECK: rosbg %r0, %r15, 0, 0, 0
+0xec 0x0f 0x00 0x00 0x00 0x56
+
+# CHECK: rosbg %r15, %r0, 0, 0, 0
+0xec 0xf0 0x00 0x00 0x00 0x56
+
+# CHECK: rosbg %r4, %r5, 6, 7, 8
+0xec 0x45 0x06 0x07 0x08 0x56
+
+# CHECK: rxsbg %r0, %r0, 0, 0, 0
+0xec 0x00 0x00 0x00 0x00 0x57
+
+# CHECK: rxsbg %r0, %r0, 0, 0, 63
+0xec 0x00 0x00 0x00 0x3f 0x57
+
+# CHECK: rxsbg %r0, %r0, 0, 255, 0
+0xec 0x00 0x00 0xff 0x00 0x57
+
+# CHECK: rxsbg %r0, %r0, 255, 0, 0
+0xec 0x00 0xff 0x00 0x00 0x57
+
+# CHECK: rxsbg %r0, %r15, 0, 0, 0
+0xec 0x0f 0x00 0x00 0x00 0x57
+
+# CHECK: rxsbg %r15, %r0, 0, 0, 0
+0xec 0xf0 0x00 0x00 0x00 0x57
+
+# CHECK: rxsbg %r4, %r5, 6, 7, 8
+0xec 0x45 0x06 0x07 0x08 0x57
+
+# CHECK: rllg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x1c
+
+# CHECK: rllg %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0x1c
+
+# CHECK: rllg %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0x1c
+
+# CHECK: rllg %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x1c
+
+# CHECK: rllg %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x1c
+
+# CHECK: rllg %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x1c
+
+# CHECK: rllg %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x1c
+
+# CHECK: rllg %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x1c
+
+# CHECK: rllg %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x1c
+
+# CHECK: rllg %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x1c
+
+# CHECK: rllg %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x1c
+
+# CHECK: rllg %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x1c
+
+# CHECK: rll %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x1d
+
+# CHECK: rll %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0x1d
+
+# CHECK: rll %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0x1d
+
+# CHECK: rll %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x1d
+
+# CHECK: rll %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x1d
+
+# CHECK: rll %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x1d
+
+# CHECK: rll %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x1d
+
+# CHECK: rll %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x1d
+
+# CHECK: rll %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x1d
+
+# CHECK: rll %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x1d
+
+# CHECK: rll %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x1d
+
+# CHECK: rll %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x1d
+
+# CHECK: sdbr %f0, %f0
+0xb3 0x1b 0x00 0x00
+
+# CHECK: sdbr %f0, %f15
+0xb3 0x1b 0x00 0x0f
+
+# CHECK: sdbr %f7, %f8
+0xb3 0x1b 0x00 0x78
+
+# CHECK: sdbr %f15, %f0
+0xb3 0x1b 0x00 0xf0
+
+# CHECK: sdb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x1b
+
+# CHECK: sdb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x1b
+
+# CHECK: sdb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x1b
+
+# CHECK: sdb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x1b
+
+# CHECK: sdb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x1b
+
+# CHECK: sdb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x1b
+
+# CHECK: sdb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x1b
+
+# CHECK: sebr %f0, %f0
+0xb3 0x0b 0x00 0x00
+
+# CHECK: sebr %f0, %f15
+0xb3 0x0b 0x00 0x0f
+
+# CHECK: sebr %f7, %f8
+0xb3 0x0b 0x00 0x78
+
+# CHECK: sebr %f15, %f0
+0xb3 0x0b 0x00 0xf0
+
+# CHECK: seb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x0b
+
+# CHECK: seb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x0b
+
+# CHECK: seb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x0b
+
+# CHECK: seb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x0b
+
+# CHECK: seb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x0b
+
+# CHECK: seb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x0b
+
+# CHECK: seb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x0b
+
+# CHECK: sgfr %r0, %r0
+0xb9 0x19 0x00 0x00
+
+# CHECK: sgfr %r0, %r15
+0xb9 0x19 0x00 0x0f
+
+# CHECK: sgfr %r15, %r0
+0xb9 0x19 0x00 0xf0
+
+# CHECK: sgfr %r7, %r8
+0xb9 0x19 0x00 0x78
+
+# CHECK: sgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x19
+
+# CHECK: sgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x19
+
+# CHECK: sgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x19
+
+# CHECK: sgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x19
+
+# CHECK: sgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x19
+
+# CHECK: sgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x19
+
+# CHECK: sgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x19
+
+# CHECK: sgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x19
+
+# CHECK: sgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x19
+
+# CHECK: sgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x19
+
+# CHECK: sgr %r0, %r0
+0xb9 0x09 0x00 0x00
+
+# CHECK: sgr %r0, %r15
+0xb9 0x09 0x00 0x0f
+
+# CHECK: sgr %r15, %r0
+0xb9 0x09 0x00 0xf0
+
+# CHECK: sgr %r7, %r8
+0xb9 0x09 0x00 0x78
+
+# CHECK: sgrk %r0, %r0, %r0
+0xb9 0xe9 0x00 0x00
+
+# CHECK: sgrk %r2, %r3, %r4
+0xb9 0xe9 0x40 0x23
+
+# CHECK: sg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x09
+
+# CHECK: sg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x09
+
+# CHECK: sg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x09
+
+# CHECK: sg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x09
+
+# CHECK: sg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x09
+
+# CHECK: sg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x09
+
+# CHECK: sg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x09
+
+# CHECK: sg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x09
+
+# CHECK: sg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x09
+
+# CHECK: sg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x09
+
+# CHECK: sh %r0, 0
+0x4b 0x00 0x00 0x00
+
+# CHECK: sh %r0, 4095
+0x4b 0x00 0x0f 0xff
+
+# CHECK: sh %r0, 0(%r1)
+0x4b 0x00 0x10 0x00
+
+# CHECK: sh %r0, 0(%r15)
+0x4b 0x00 0xf0 0x00
+
+# CHECK: sh %r0, 4095(%r1,%r15)
+0x4b 0x01 0xff 0xff
+
+# CHECK: sh %r0, 4095(%r15,%r1)
+0x4b 0x0f 0x1f 0xff
+
+# CHECK: sh %r15, 0
+0x4b 0xf0 0x00 0x00
+
+# CHECK: shy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x7b
+
+# CHECK: shy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x7b
+
+# CHECK: shy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x7b
+
+# CHECK: shy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x7b
+
+# CHECK: shy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x7b
+
+# CHECK: shy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x7b
+
+# CHECK: shy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x7b
+
+# CHECK: shy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x7b
+
+# CHECK: shy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x7b
+
+# CHECK: shy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x7b
+
+# CHECK: slbgr %r0, %r0
+0xb9 0x89 0x00 0x00
+
+# CHECK: slbgr %r0, %r15
+0xb9 0x89 0x00 0x0f
+
+# CHECK: slbgr %r15, %r0
+0xb9 0x89 0x00 0xf0
+
+# CHECK: slbgr %r7, %r8
+0xb9 0x89 0x00 0x78
+
+# CHECK: slbg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x89
+
+# CHECK: slbg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x89
+
+# CHECK: slbg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x89
+
+# CHECK: slbg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x89
+
+# CHECK: slbg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x89
+
+# CHECK: slbg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x89
+
+# CHECK: slbg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x89
+
+# CHECK: slbg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x89
+
+# CHECK: slbg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x89
+
+# CHECK: slbg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x89
+
+# CHECK: slbr %r0, %r0
+0xb9 0x99 0x00 0x00
+
+# CHECK: slbr %r0, %r15
+0xb9 0x99 0x00 0x0f
+
+# CHECK: slbr %r15, %r0
+0xb9 0x99 0x00 0xf0
+
+# CHECK: slbr %r7, %r8
+0xb9 0x99 0x00 0x78
+
+# CHECK: slb %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x99
+
+# CHECK: slb %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x99
+
+# CHECK: slb %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x99
+
+# CHECK: slb %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x99
+
+# CHECK: slb %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x99
+
+# CHECK: slb %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x99
+
+# CHECK: slb %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x99
+
+# CHECK: slb %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x99
+
+# CHECK: slb %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x99
+
+# CHECK: slb %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x99
+
+# CHECK: slfi %r0, 0
+0xc2 0x05 0x00 0x00 0x00 0x00
+
+# CHECK: slfi %r0, 4294967295
+0xc2 0x05 0xff 0xff 0xff 0xff
+
+# CHECK: slfi %r15, 0
+0xc2 0xf5 0x00 0x00 0x00 0x00
+
+# CHECK: slgfi %r0, 0
+0xc2 0x04 0x00 0x00 0x00 0x00
+
+# CHECK: slgfi %r0, 4294967295
+0xc2 0x04 0xff 0xff 0xff 0xff
+
+# CHECK: slgfi %r15, 0
+0xc2 0xf4 0x00 0x00 0x00 0x00
+
+# CHECK: slgfr %r0, %r0
+0xb9 0x1b 0x00 0x00
+
+# CHECK: slgfr %r0, %r15
+0xb9 0x1b 0x00 0x0f
+
+# CHECK: slgfr %r15, %r0
+0xb9 0x1b 0x00 0xf0
+
+# CHECK: slgfr %r7, %r8
+0xb9 0x1b 0x00 0x78
+
+# CHECK: slgf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x1b
+
+# CHECK: slgf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x1b
+
+# CHECK: slgf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x1b
+
+# CHECK: slgf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x1b
+
+# CHECK: slgf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x1b
+
+# CHECK: slgf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x1b
+
+# CHECK: slgf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x1b
+
+# CHECK: slgf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x1b
+
+# CHECK: slgf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x1b
+
+# CHECK: slgf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x1b
+
+# CHECK: slgr %r0, %r0
+0xb9 0x0b 0x00 0x00
+
+# CHECK: slgr %r0, %r15
+0xb9 0x0b 0x00 0x0f
+
+# CHECK: slgr %r15, %r0
+0xb9 0x0b 0x00 0xf0
+
+# CHECK: slgr %r7, %r8
+0xb9 0x0b 0x00 0x78
+
+# CHECK: slgrk %r0, %r0, %r0
+0xb9 0xeb 0x00 0x00
+
+# CHECK: slgrk %r2, %r3, %r4
+0xb9 0xeb 0x40 0x23
+
+# CHECK: slg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x0b
+
+# CHECK: slg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x0b
+
+# CHECK: slg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x0b
+
+# CHECK: slg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x0b
+
+# CHECK: slg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x0b
+
+# CHECK: slg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x0b
+
+# CHECK: slg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x0b
+
+# CHECK: slg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x0b
+
+# CHECK: slg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x0b
+
+# CHECK: slg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x0b
+
+# CHECK: sllg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x0d
+
+# CHECK: sllg %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0x0d
+
+# CHECK: sllg %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0x0d
+
+# CHECK: sllg %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x0d
+
+# CHECK: sllg %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x0d
+
+# CHECK: sllg %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x0d
+
+# CHECK: sllg %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x0d
+
+# CHECK: sllg %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x0d
+
+# CHECK: sllg %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x0d
+
+# CHECK: sllg %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x0d
+
+# CHECK: sllg %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x0d
+
+# CHECK: sllg %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x0d
+
+# CHECK: sllk %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0xdf
+
+# CHECK: sllk %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0xdf
+
+# CHECK: sllk %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0xdf
+
+# CHECK: sllk %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0xdf
+
+# CHECK: sllk %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0xdf
+
+# CHECK: sllk %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0xdf
+
+# CHECK: sllk %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0xdf
+
+# CHECK: sllk %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0xdf
+
+# CHECK: sllk %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0xdf
+
+# CHECK: sllk %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0xdf
+
+# CHECK: sllk %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0xdf
+
+# CHECK: sllk %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0xdf
+
+# CHECK: sll %r0, 0
+0x89 0x00 0x00 0x00
+
+# CHECK: sll %r7, 0
+0x89 0x70 0x00 0x00
+
+# CHECK: sll %r15, 0
+0x89 0xf0 0x00 0x00
+
+# CHECK: sll %r0, 4095
+0x89 0x00 0x0f 0xff
+
+# CHECK: sll %r0, 0(%r1)
+0x89 0x00 0x10 0x00
+
+# CHECK: sll %r0, 0(%r15)
+0x89 0x00 0xf0 0x00
+
+# CHECK: sll %r0, 4095(%r1)
+0x89 0x00 0x1f 0xff
+
+# CHECK: sll %r0, 4095(%r15)
+0x89 0x00 0xff 0xff
+
+# CHECK: slr %r0, %r0
+0x1f 0x00
+
+# CHECK: slr %r0, %r15
+0x1f 0x0f
+
+# CHECK: slr %r15, %r0
+0x1f 0xf0
+
+# CHECK: slr %r7, %r8
+0x1f 0x78
+
+# CHECK: slrk %r0, %r0, %r0
+0xb9 0xfb 0x00 0x00
+
+# CHECK: slrk %r2, %r3, %r4
+0xb9 0xfb 0x40 0x23
+
+# CHECK: sl %r0, 0
+0x5f 0x00 0x00 0x00
+
+# CHECK: sl %r0, 4095
+0x5f 0x00 0x0f 0xff
+
+# CHECK: sl %r0, 0(%r1)
+0x5f 0x00 0x10 0x00
+
+# CHECK: sl %r0, 0(%r15)
+0x5f 0x00 0xf0 0x00
+
+# CHECK: sl %r0, 4095(%r1,%r15)
+0x5f 0x01 0xff 0xff
+
+# CHECK: sl %r0, 4095(%r15,%r1)
+0x5f 0x0f 0x1f 0xff
+
+# CHECK: sl %r15, 0
+0x5f 0xf0 0x00 0x00
+
+# CHECK: sly %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x5f
+
+# CHECK: sly %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x5f
+
+# CHECK: sly %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x5f
+
+# CHECK: sly %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x5f
+
+# CHECK: sly %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x5f
+
+# CHECK: sly %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x5f
+
+# CHECK: sly %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x5f
+
+# CHECK: sly %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x5f
+
+# CHECK: sly %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x5f
+
+# CHECK: sly %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x5f
+
+# CHECK: sqdbr %f0, %f0
+0xb3 0x15 0x00 0x00
+
+# CHECK: sqdbr %f0, %f15
+0xb3 0x15 0x00 0x0f
+
+# CHECK: sqdbr %f7, %f8
+0xb3 0x15 0x00 0x78
+
+# CHECK: sqdbr %f15, %f0
+0xb3 0x15 0x00 0xf0
+
+# CHECK: sqdb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x15
+
+# CHECK: sqdb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x15
+
+# CHECK: sqdb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x15
+
+# CHECK: sqdb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x15
+
+# CHECK: sqdb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x15
+
+# CHECK: sqdb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x15
+
+# CHECK: sqdb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x15
+
+# CHECK: sqebr %f0, %f0
+0xb3 0x14 0x00 0x00
+
+# CHECK: sqebr %f0, %f15
+0xb3 0x14 0x00 0x0f
+
+# CHECK: sqebr %f7, %f8
+0xb3 0x14 0x00 0x78
+
+# CHECK: sqebr %f15, %f0
+0xb3 0x14 0x00 0xf0
+
+# CHECK: sqeb %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x14
+
+# CHECK: sqeb %f0, 4095
+0xed 0x00 0x0f 0xff 0x00 0x14
+
+# CHECK: sqeb %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x14
+
+# CHECK: sqeb %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x14
+
+# CHECK: sqeb %f0, 4095(%r1,%r15)
+0xed 0x01 0xff 0xff 0x00 0x14
+
+# CHECK: sqeb %f0, 4095(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x00 0x14
+
+# CHECK: sqeb %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x14
+
+# CHECK: sqxbr %f0, %f0
+0xb3 0x16 0x00 0x00
+
+# CHECK: sqxbr %f0, %f13
+0xb3 0x16 0x00 0x0d
+
+# CHECK: sqxbr %f8, %f8
+0xb3 0x16 0x00 0x88
+
+# CHECK: sqxbr %f13, %f0
+0xb3 0x16 0x00 0xd0
+
+# CHECK: srag %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x0a
+
+# CHECK: srag %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0x0a
+
+# CHECK: srag %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0x0a
+
+# CHECK: srag %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x0a
+
+# CHECK: srag %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x0a
+
+# CHECK: srag %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x0a
+
+# CHECK: srag %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x0a
+
+# CHECK: srag %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x0a
+
+# CHECK: srag %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x0a
+
+# CHECK: srag %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x0a
+
+# CHECK: srag %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x0a
+
+# CHECK: srag %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x0a
+
+# CHECK: srak %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0xdc
+
+# CHECK: srak %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0xdc
+
+# CHECK: srak %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0xdc
+
+# CHECK: srak %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0xdc
+
+# CHECK: srak %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0xdc
+
+# CHECK: srak %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0xdc
+
+# CHECK: srak %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0xdc
+
+# CHECK: srak %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0xdc
+
+# CHECK: srak %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0xdc
+
+# CHECK: srak %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0xdc
+
+# CHECK: srak %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0xdc
+
+# CHECK: srak %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0xdc
+
+# CHECK: sra %r0, 0
+0x8a 0x00 0x00 0x00
+
+# CHECK: sra %r7, 0
+0x8a 0x70 0x00 0x00
+
+# CHECK: sra %r15, 0
+0x8a 0xf0 0x00 0x00
+
+# CHECK: sra %r0, 4095
+0x8a 0x00 0x0f 0xff
+
+# CHECK: sra %r0, 0(%r1)
+0x8a 0x00 0x10 0x00
+
+# CHECK: sra %r0, 0(%r15)
+0x8a 0x00 0xf0 0x00
+
+# CHECK: sra %r0, 4095(%r1)
+0x8a 0x00 0x1f 0xff
+
+# CHECK: sra %r0, 4095(%r15)
+0x8a 0x00 0xff 0xff
+
+# CHECK: srlg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x0c
+
+# CHECK: srlg %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0x0c
+
+# CHECK: srlg %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0x0c
+
+# CHECK: srlg %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x0c
+
+# CHECK: srlg %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x0c
+
+# CHECK: srlg %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x0c
+
+# CHECK: srlg %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x0c
+
+# CHECK: srlg %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x0c
+
+# CHECK: srlg %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x0c
+
+# CHECK: srlg %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x0c
+
+# CHECK: srlg %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x0c
+
+# CHECK: srlg %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x0c
+
+# CHECK: srlk %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0xde
+
+# CHECK: srlk %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0xde
+
+# CHECK: srlk %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0xde
+
+# CHECK: srlk %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0xde
+
+# CHECK: srlk %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0xde
+
+# CHECK: srlk %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0xde
+
+# CHECK: srlk %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0xde
+
+# CHECK: srlk %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0xde
+
+# CHECK: srlk %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0xde
+
+# CHECK: srlk %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0xde
+
+# CHECK: srlk %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0xde
+
+# CHECK: srlk %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0xde
+
+# CHECK: srl %r0, 0
+0x88 0x00 0x00 0x00
+
+# CHECK: srl %r7, 0
+0x88 0x70 0x00 0x00
+
+# CHECK: srl %r15, 0
+0x88 0xf0 0x00 0x00
+
+# CHECK: srl %r0, 4095
+0x88 0x00 0x0f 0xff
+
+# CHECK: srl %r0, 0(%r1)
+0x88 0x00 0x10 0x00
+
+# CHECK: srl %r0, 0(%r15)
+0x88 0x00 0xf0 0x00
+
+# CHECK: srl %r0, 4095(%r1)
+0x88 0x00 0x1f 0xff
+
+# CHECK: srl %r0, 4095(%r15)
+0x88 0x00 0xff 0xff
+
+# CHECK: sr %r0, %r0
+0x1b 0x00
+
+# CHECK: sr %r0, %r15
+0x1b 0x0f
+
+# CHECK: sr %r15, %r0
+0x1b 0xf0
+
+# CHECK: sr %r7, %r8
+0x1b 0x78
+
+# CHECK: srk %r0, %r0, %r0
+0xb9 0xf9 0x00 0x00
+
+# CHECK: srk %r2, %r3, %r4
+0xb9 0xf9 0x40 0x23
+
+# CHECK: srst %r0, %r0
+0xb2 0x5e 0x00 0x00
+
+# CHECK: srst %r0, %r15
+0xb2 0x5e 0x00 0x0f
+
+# CHECK: srst %r15, %r0
+0xb2 0x5e 0x00 0xf0
+
+# CHECK: srst %r7, %r8
+0xb2 0x5e 0x00 0x78
+
+# CHECK: stc %r0, 0
+0x42 0x00 0x00 0x00
+
+# CHECK: stc %r0, 4095
+0x42 0x00 0x0f 0xff
+
+# CHECK: stc %r0, 0(%r1)
+0x42 0x00 0x10 0x00
+
+# CHECK: stc %r0, 0(%r15)
+0x42 0x00 0xf0 0x00
+
+# CHECK: stc %r0, 4095(%r1,%r15)
+0x42 0x01 0xff 0xff
+
+# CHECK: stc %r0, 4095(%r15,%r1)
+0x42 0x0f 0x1f 0xff
+
+# CHECK: stc %r15, 0
+0x42 0xf0 0x00 0x00
+
+# CHECK: stch %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc3
+
+# CHECK: stch %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc3
+
+# CHECK: stch %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc3
+
+# CHECK: stch %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc3
+
+# CHECK: stch %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc3
+
+# CHECK: stch %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc3
+
+# CHECK: stch %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc3
+
+# CHECK: stch %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc3
+
+# CHECK: stch %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc3
+
+# CHECK: stch %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc3
+
+# CHECK: stcy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x72
+
+# CHECK: stcy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x72
+
+# CHECK: stcy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x72
+
+# CHECK: stcy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x72
+
+# CHECK: stcy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x72
+
+# CHECK: stcy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x72
+
+# CHECK: stcy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x72
+
+# CHECK: stcy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x72
+
+# CHECK: stcy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x72
+
+# CHECK: stcy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x72
+
+# CHECK: std %f0, 0
+0x60 0x00 0x00 0x00
+
+# CHECK: std %f0, 4095
+0x60 0x00 0x0f 0xff
+
+# CHECK: std %f0, 0(%r1)
+0x60 0x00 0x10 0x00
+
+# CHECK: std %f0, 0(%r15)
+0x60 0x00 0xf0 0x00
+
+# CHECK: std %f0, 4095(%r1,%r15)
+0x60 0x01 0xff 0xff
+
+# CHECK: std %f0, 4095(%r15,%r1)
+0x60 0x0f 0x1f 0xff
+
+# CHECK: std %f15, 0
+0x60 0xf0 0x00 0x00
+
+# CHECK: stdy %f0, -524288
+0xed 0x00 0x00 0x00 0x80 0x67
+
+# CHECK: stdy %f0, -1
+0xed 0x00 0x0f 0xff 0xff 0x67
+
+# CHECK: stdy %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x67
+
+# CHECK: stdy %f0, 1
+0xed 0x00 0x00 0x01 0x00 0x67
+
+# CHECK: stdy %f0, 524287
+0xed 0x00 0x0f 0xff 0x7f 0x67
+
+# CHECK: stdy %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x67
+
+# CHECK: stdy %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x67
+
+# CHECK: stdy %f0, 524287(%r1,%r15)
+0xed 0x01 0xff 0xff 0x7f 0x67
+
+# CHECK: stdy %f0, 524287(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x7f 0x67
+
+# CHECK: stdy %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x67
+
+# CHECK: ste %f0, 0
+0x70 0x00 0x00 0x00
+
+# CHECK: ste %f0, 4095
+0x70 0x00 0x0f 0xff
+
+# CHECK: ste %f0, 0(%r1)
+0x70 0x00 0x10 0x00
+
+# CHECK: ste %f0, 0(%r15)
+0x70 0x00 0xf0 0x00
+
+# CHECK: ste %f0, 4095(%r1,%r15)
+0x70 0x01 0xff 0xff
+
+# CHECK: ste %f0, 4095(%r15,%r1)
+0x70 0x0f 0x1f 0xff
+
+# CHECK: ste %f15, 0
+0x70 0xf0 0x00 0x00
+
+# CHECK: stey %f0, -524288
+0xed 0x00 0x00 0x00 0x80 0x66
+
+# CHECK: stey %f0, -1
+0xed 0x00 0x0f 0xff 0xff 0x66
+
+# CHECK: stey %f0, 0
+0xed 0x00 0x00 0x00 0x00 0x66
+
+# CHECK: stey %f0, 1
+0xed 0x00 0x00 0x01 0x00 0x66
+
+# CHECK: stey %f0, 524287
+0xed 0x00 0x0f 0xff 0x7f 0x66
+
+# CHECK: stey %f0, 0(%r1)
+0xed 0x00 0x10 0x00 0x00 0x66
+
+# CHECK: stey %f0, 0(%r15)
+0xed 0x00 0xf0 0x00 0x00 0x66
+
+# CHECK: stey %f0, 524287(%r1,%r15)
+0xed 0x01 0xff 0xff 0x7f 0x66
+
+# CHECK: stey %f0, 524287(%r15,%r1)
+0xed 0x0f 0x1f 0xff 0x7f 0x66
+
+# CHECK: stey %f15, 0
+0xed 0xf0 0x00 0x00 0x00 0x66
+
+# CHECK: stg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x24
+
+# CHECK: stg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x24
+
+# CHECK: stg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x24
+
+# CHECK: stg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x24
+
+# CHECK: stg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x24
+
+# CHECK: stg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x24
+
+# CHECK: stg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x24
+
+# CHECK: stg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x24
+
+# CHECK: stg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x24
+
+# CHECK: stg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x24
+
+# CHECK: sth %r0, 0
+0x40 0x00 0x00 0x00
+
+# CHECK: sth %r0, 4095
+0x40 0x00 0x0f 0xff
+
+# CHECK: sth %r0, 0(%r1)
+0x40 0x00 0x10 0x00
+
+# CHECK: sth %r0, 0(%r15)
+0x40 0x00 0xf0 0x00
+
+# CHECK: sth %r0, 4095(%r1,%r15)
+0x40 0x01 0xff 0xff
+
+# CHECK: sth %r0, 4095(%r15,%r1)
+0x40 0x0f 0x1f 0xff
+
+# CHECK: sth %r15, 0
+0x40 0xf0 0x00 0x00
+
+# CHECK: sthh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xc7
+
+# CHECK: sthh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xc7
+
+# CHECK: sthh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xc7
+
+# CHECK: sthh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xc7
+
+# CHECK: sthh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xc7
+
+# CHECK: sthh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xc7
+
+# CHECK: sthh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xc7
+
+# CHECK: sthh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xc7
+
+# CHECK: sthh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xc7
+
+# CHECK: sthh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xc7
+
+# CHECK: stfh %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0xcb
+
+# CHECK: stfh %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0xcb
+
+# CHECK: stfh %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0xcb
+
+# CHECK: stfh %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0xcb
+
+# CHECK: stfh %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0xcb
+
+# CHECK: stfh %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0xcb
+
+# CHECK: stfh %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0xcb
+
+# CHECK: stfh %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0xcb
+
+# CHECK: stfh %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0xcb
+
+# CHECK: stfh %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0xcb
+
+# CHECK: sthy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x70
+
+# CHECK: sthy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x70
+
+# CHECK: sthy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x70
+
+# CHECK: sthy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x70
+
+# CHECK: sthy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x70
+
+# CHECK: sthy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x70
+
+# CHECK: sthy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x70
+
+# CHECK: sthy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x70
+
+# CHECK: sthy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x70
+
+# CHECK: sthy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x70
+
+# CHECK: stmg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x24
+
+# CHECK: stmg %r0, %r15, 0
+0xeb 0x0f 0x00 0x00 0x00 0x24
+
+# CHECK: stmg %r14, %r15, 0
+0xeb 0xef 0x00 0x00 0x00 0x24
+
+# CHECK: stmg %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x24
+
+# CHECK: stmg %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x24
+
+# CHECK: stmg %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x24
+
+# CHECK: stmg %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x24
+
+# CHECK: stmg %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x24
+
+# CHECK: stmg %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x24
+
+# CHECK: stmg %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x24
+
+# CHECK: stmg %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x24
+
+# CHECK: stmg %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x24
+
+# CHECK: stmg %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x24
+
+# CHECK: strvg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x2f
+
+# CHECK: strvg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x2f
+
+# CHECK: strvg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x2f
+
+# CHECK: strvg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x2f
+
+# CHECK: strvg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x2f
+
+# CHECK: strvg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x2f
+
+# CHECK: strvg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x2f
+
+# CHECK: strvg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x2f
+
+# CHECK: strvg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x2f
+
+# CHECK: strvg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x2f
+
+# CHECK: strv %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x3e
+
+# CHECK: strv %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x3e
+
+# CHECK: strv %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x3e
+
+# CHECK: strv %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x3e
+
+# CHECK: strv %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x3e
+
+# CHECK: strv %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x3e
+
+# CHECK: strv %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x3e
+
+# CHECK: strv %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x3e
+
+# CHECK: strv %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x3e
+
+# CHECK: strv %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x3e
+
+# CHECK: st %r0, 0
+0x50 0x00 0x00 0x00
+
+# CHECK: st %r0, 4095
+0x50 0x00 0x0f 0xff
+
+# CHECK: st %r0, 0(%r1)
+0x50 0x00 0x10 0x00
+
+# CHECK: st %r0, 0(%r15)
+0x50 0x00 0xf0 0x00
+
+# CHECK: st %r0, 4095(%r1,%r15)
+0x50 0x01 0xff 0xff
+
+# CHECK: st %r0, 4095(%r15,%r1)
+0x50 0x0f 0x1f 0xff
+
+# CHECK: st %r15, 0
+0x50 0xf0 0x00 0x00
+
+# CHECK: stoc %r1, 2(%r3), 0
+0xeb 0x10 0x30 0x02 0x00 0xf3
+
+# CHECK: stoco %r1, 2(%r3)
+0xeb 0x11 0x30 0x02 0x00 0xf3
+
+# CHECK: stoch %r1, 2(%r3)
+0xeb 0x12 0x30 0x02 0x00 0xf3
+
+# CHECK: stocnle %r1, 2(%r3)
+0xeb 0x13 0x30 0x02 0x00 0xf3
+
+# CHECK: stocl %r1, 2(%r3)
+0xeb 0x14 0x30 0x02 0x00 0xf3
+
+# CHECK: stocnhe %r1, 2(%r3)
+0xeb 0x15 0x30 0x02 0x00 0xf3
+
+# CHECK: stoclh %r1, 2(%r3)
+0xeb 0x16 0x30 0x02 0x00 0xf3
+
+# CHECK: stocne %r1, 2(%r3)
+0xeb 0x17 0x30 0x02 0x00 0xf3
+
+# CHECK: stoce %r1, 2(%r3)
+0xeb 0x18 0x30 0x02 0x00 0xf3
+
+# CHECK: stocnlh %r1, 2(%r3)
+0xeb 0x19 0x30 0x02 0x00 0xf3
+
+# CHECK: stoche %r1, 2(%r3)
+0xeb 0x1a 0x30 0x02 0x00 0xf3
+
+# CHECK: stocnl %r1, 2(%r3)
+0xeb 0x1b 0x30 0x02 0x00 0xf3
+
+# CHECK: stocle %r1, 2(%r3)
+0xeb 0x1c 0x30 0x02 0x00 0xf3
+
+# CHECK: stocnh %r1, 2(%r3)
+0xeb 0x1d 0x30 0x02 0x00 0xf3
+
+# CHECK: stocno %r1, 2(%r3)
+0xeb 0x1e 0x30 0x02 0x00 0xf3
+
+# CHECK: stoc %r1, 2(%r3), 15
+0xeb 0x1f 0x30 0x02 0x00 0xf3
+
+# CHECK: stocg %r1, 2(%r3), 0
+0xeb 0x10 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgo %r1, 2(%r3)
+0xeb 0x11 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgh %r1, 2(%r3)
+0xeb 0x12 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgnle %r1, 2(%r3)
+0xeb 0x13 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgl %r1, 2(%r3)
+0xeb 0x14 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgnhe %r1, 2(%r3)
+0xeb 0x15 0x30 0x02 0x00 0xe3
+
+# CHECK: stocglh %r1, 2(%r3)
+0xeb 0x16 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgne %r1, 2(%r3)
+0xeb 0x17 0x30 0x02 0x00 0xe3
+
+# CHECK: stocge %r1, 2(%r3)
+0xeb 0x18 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgnlh %r1, 2(%r3)
+0xeb 0x19 0x30 0x02 0x00 0xe3
+
+# CHECK: stocghe %r1, 2(%r3)
+0xeb 0x1a 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgnl %r1, 2(%r3)
+0xeb 0x1b 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgle %r1, 2(%r3)
+0xeb 0x1c 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgnh %r1, 2(%r3)
+0xeb 0x1d 0x30 0x02 0x00 0xe3
+
+# CHECK: stocgno %r1, 2(%r3)
+0xeb 0x1e 0x30 0x02 0x00 0xe3
+
+# CHECK: stocg %r1, 2(%r3), 15
+0xeb 0x1f 0x30 0x02 0x00 0xe3
+
+# CHECK: s %r0, 0
+0x5b 0x00 0x00 0x00
+
+# CHECK: s %r0, 4095
+0x5b 0x00 0x0f 0xff
+
+# CHECK: s %r0, 0(%r1)
+0x5b 0x00 0x10 0x00
+
+# CHECK: s %r0, 0(%r15)
+0x5b 0x00 0xf0 0x00
+
+# CHECK: s %r0, 4095(%r1,%r15)
+0x5b 0x01 0xff 0xff
+
+# CHECK: s %r0, 4095(%r15,%r1)
+0x5b 0x0f 0x1f 0xff
+
+# CHECK: s %r15, 0
+0x5b 0xf0 0x00 0x00
+
+# CHECK: sty %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x50
+
+# CHECK: sty %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x50
+
+# CHECK: sty %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x50
+
+# CHECK: sty %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x50
+
+# CHECK: sty %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x50
+
+# CHECK: sty %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x50
+
+# CHECK: sty %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x50
+
+# CHECK: sty %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x50
+
+# CHECK: sty %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x50
+
+# CHECK: sty %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x50
+
+# CHECK: sxbr %f0, %f0
+0xb3 0x4b 0x00 0x00
+
+# CHECK: sxbr %f0, %f13
+0xb3 0x4b 0x00 0x0d
+
+# CHECK: sxbr %f8, %f8
+0xb3 0x4b 0x00 0x88
+
+# CHECK: sxbr %f13, %f0
+0xb3 0x4b 0x00 0xd0
+
+# CHECK: sy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x5b
+
+# CHECK: sy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x5b
+
+# CHECK: sy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x5b
+
+# CHECK: sy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x5b
+
+# CHECK: sy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x5b
+
+# CHECK: sy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x5b
+
+# CHECK: sy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x5b
+
+# CHECK: sy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x5b
+
+# CHECK: sy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x5b
+
+# CHECK: sy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x5b
+
+# CHECK: tm 0, 0
+0x91 0x00 0x00 0x00
+
+# CHECK: tm 4095, 0
+0x91 0x00 0x0f 0xff
+
+# CHECK: tm 0, 255
+0x91 0xff 0x00 0x00
+
+# CHECK: tm 0(%r1), 42
+0x91 0x2a 0x10 0x00
+
+# CHECK: tm 0(%r15), 42
+0x91 0x2a 0xf0 0x00
+
+# CHECK: tm 4095(%r1), 42
+0x91 0x2a 0x1f 0xff
+
+# CHECK: tm 4095(%r15), 42
+0x91 0x2a 0xff 0xff
+
+# CHECK: tmhh %r0, 0
+0xa7 0x02 0x00 0x00
+
+# CHECK: tmhh %r0, 32768
+0xa7 0x02 0x80 0x00
+
+# CHECK: tmhh %r0, 65535
+0xa7 0x02 0xff 0xff
+
+# CHECK: tmhh %r15, 0
+0xa7 0xf2 0x00 0x00
+
+# CHECK: tmhl %r0, 0
+0xa7 0x03 0x00 0x00
+
+# CHECK: tmhl %r0, 32768
+0xa7 0x03 0x80 0x00
+
+# CHECK: tmhl %r0, 65535
+0xa7 0x03 0xff 0xff
+
+# CHECK: tmhl %r15, 0
+0xa7 0xf3 0x00 0x00
+
+# CHECK: tmlh %r0, 0
+0xa7 0x00 0x00 0x00
+
+# CHECK: tmlh %r0, 32768
+0xa7 0x00 0x80 0x00
+
+# CHECK: tmlh %r0, 65535
+0xa7 0x00 0xff 0xff
+
+# CHECK: tmlh %r15, 0
+0xa7 0xf0 0x00 0x00
+
+# CHECK: tmll %r0, 0
+0xa7 0x01 0x00 0x00
+
+# CHECK: tmll %r0, 32768
+0xa7 0x01 0x80 0x00
+
+# CHECK: tmll %r0, 65535
+0xa7 0x01 0xff 0xff
+
+# CHECK: tmll %r15, 0
+0xa7 0xf1 0x00 0x00
+
+# CHECK: tmy -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x51
+
+# CHECK: tmy -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x51
+
+# CHECK: tmy 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x51
+
+# CHECK: tmy 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x51
+
+# CHECK: tmy 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x51
+
+# CHECK: tmy 0, 255
+0xeb 0xff 0x00 0x00 0x00 0x51
+
+# CHECK: tmy 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x51
+
+# CHECK: tmy 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x51
+
+# CHECK: tmy 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x51
+
+# CHECK: tmy 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x51
+
+# CHECK: xc 0(1), 0
+0xd7 0x00 0x00 0x00 0x00 0x00
+
+# CHECK: xc 0(1), 0(%r1)
+0xd7 0x00 0x00 0x00 0x10 0x00
+
+# CHECK: xc 0(1), 0(%r15)
+0xd7 0x00 0x00 0x00 0xf0 0x00
+
+# CHECK: xc 0(1), 4095
+0xd7 0x00 0x00 0x00 0x0f 0xff
+
+# CHECK: xc 0(1), 4095(%r1)
+0xd7 0x00 0x00 0x00 0x1f 0xff
+
+# CHECK: xc 0(1), 4095(%r15)
+0xd7 0x00 0x00 0x00 0xff 0xff
+
+# CHECK: xc 0(1,%r1), 0
+0xd7 0x00 0x10 0x00 0x00 0x00
+
+# CHECK: xc 0(1,%r15), 0
+0xd7 0x00 0xf0 0x00 0x00 0x00
+
+# CHECK: xc 4095(1,%r1), 0
+0xd7 0x00 0x1f 0xff 0x00 0x00
+
+# CHECK: xc 4095(1,%r15), 0
+0xd7 0x00 0xff 0xff 0x00 0x00
+
+# CHECK: xc 0(256,%r1), 0
+0xd7 0xff 0x10 0x00 0x00 0x00
+
+# CHECK: xc 0(256,%r15), 0
+0xd7 0xff 0xf0 0x00 0x00 0x00
+
+# CHECK: xgr %r0, %r0
+0xb9 0x82 0x00 0x00
+
+# CHECK: xgr %r0, %r15
+0xb9 0x82 0x00 0x0f
+
+# CHECK: xgr %r15, %r0
+0xb9 0x82 0x00 0xf0
+
+# CHECK: xgr %r7, %r8
+0xb9 0x82 0x00 0x78
+
+# CHECK: xgrk %r0, %r0, %r0
+0xb9 0xe7 0x00 0x00
+
+# CHECK: xgrk %r2, %r3, %r4
+0xb9 0xe7 0x40 0x23
+
+# CHECK: xg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x82
+
+# CHECK: xg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x82
+
+# CHECK: xg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x82
+
+# CHECK: xg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x82
+
+# CHECK: xg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x82
+
+# CHECK: xg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x82
+
+# CHECK: xg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x82
+
+# CHECK: xg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x82
+
+# CHECK: xg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x82
+
+# CHECK: xg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x82
+
+# CHECK: xihf %r0, 0
+0xc0 0x06 0x00 0x00 0x00 0x00
+
+# CHECK: xihf %r0, 4294967295
+0xc0 0x06 0xff 0xff 0xff 0xff
+
+# CHECK: xihf %r15, 0
+0xc0 0xf6 0x00 0x00 0x00 0x00
+
+# CHECK: xilf %r0, 0
+0xc0 0x07 0x00 0x00 0x00 0x00
+
+# CHECK: xilf %r0, 4294967295
+0xc0 0x07 0xff 0xff 0xff 0xff
+
+# CHECK: xilf %r15, 0
+0xc0 0xf7 0x00 0x00 0x00 0x00
+
+# CHECK: xi 0, 0
+0x97 0x00 0x00 0x00
+
+# CHECK: xi 4095, 0
+0x97 0x00 0x0f 0xff
+
+# CHECK: xi 0, 255
+0x97 0xff 0x00 0x00
+
+# CHECK: xi 0(%r1), 42
+0x97 0x2a 0x10 0x00
+
+# CHECK: xi 0(%r15), 42
+0x97 0x2a 0xf0 0x00
+
+# CHECK: xi 4095(%r1), 42
+0x97 0x2a 0x1f 0xff
+
+# CHECK: xi 4095(%r15), 42
+0x97 0x2a 0xff 0xff
+
+# CHECK: xiy -524288, 0
+0xeb 0x00 0x00 0x00 0x80 0x57
+
+# CHECK: xiy -1, 0
+0xeb 0x00 0x0f 0xff 0xff 0x57
+
+# CHECK: xiy 0, 0
+0xeb 0x00 0x00 0x00 0x00 0x57
+
+# CHECK: xiy 1, 0
+0xeb 0x00 0x00 0x01 0x00 0x57
+
+# CHECK: xiy 524287, 0
+0xeb 0x00 0x0f 0xff 0x7f 0x57
+
+# CHECK: xiy 0, 255
+0xeb 0xff 0x00 0x00 0x00 0x57
+
+# CHECK: xiy 0(%r1), 42
+0xeb 0x2a 0x10 0x00 0x00 0x57
+
+# CHECK: xiy 0(%r15), 42
+0xeb 0x2a 0xf0 0x00 0x00 0x57
+
+# CHECK: xiy 524287(%r1), 42
+0xeb 0x2a 0x1f 0xff 0x7f 0x57
+
+# CHECK: xiy 524287(%r15), 42
+0xeb 0x2a 0xff 0xff 0x7f 0x57
+
+# CHECK: xr %r0, %r0
+0x17 0x00
+
+# CHECK: xr %r0, %r15
+0x17 0x0f
+
+# CHECK: xr %r15, %r0
+0x17 0xf0
+
+# CHECK: xr %r7, %r8
+0x17 0x78
+
+# CHECK: xrk %r0, %r0, %r0
+0xb9 0xf7 0x00 0x00
+
+# CHECK: xrk %r2, %r3, %r4
+0xb9 0xf7 0x40 0x23
+
+# CHECK: x %r0, 0
+0x57 0x00 0x00 0x00
+
+# CHECK: x %r0, 4095
+0x57 0x00 0x0f 0xff
+
+# CHECK: x %r0, 0(%r1)
+0x57 0x00 0x10 0x00
+
+# CHECK: x %r0, 0(%r15)
+0x57 0x00 0xf0 0x00
+
+# CHECK: x %r0, 4095(%r1,%r15)
+0x57 0x01 0xff 0xff
+
+# CHECK: x %r0, 4095(%r15,%r1)
+0x57 0x0f 0x1f 0xff
+
+# CHECK: x %r15, 0
+0x57 0xf0 0x00 0x00
+
+# CHECK: xy %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x57
+
+# CHECK: xy %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x57
+
+# CHECK: xy %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x57
+
+# CHECK: xy %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x57
+
+# CHECK: xy %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x57
+
+# CHECK: xy %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x57
+
+# CHECK: xy %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x57
+
+# CHECK: xy %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x57
+
+# CHECK: xy %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x57
+
+# CHECK: xy %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x57
diff --git a/test/MC/Disassembler/SystemZ/invalid-regs.txt b/test/MC/Disassembler/SystemZ/invalid-regs.txt
new file mode 100644
index 000000000000..12440677ba34
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/invalid-regs.txt
@@ -0,0 +1,22 @@
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu < %s 2>&1 | FileCheck %s
+
+# This would be "axbr %f14, %f0", but %r14 is invalid.
+#
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: 0xb3 0x4a 0x00 0xe0
+# CHECK-NEXT: ^
+0xb3 0x4a 0x00 0xe0
+
+# This would be "axbr %f0, %f2", but %f2 is invalid.
+#
+# CHECK-NEXT: warning: invalid instruction encoding
+# CHECK-NEXT: 0xb3 0x4a 0x00 0x02
+# CHECK-NEXT: ^
+0xb3 0x4a 0x00 0x02
+
+# This would be "dlr %r1, %r8", but %r1 is invalid.
+#
+# CHECK-NEXT: warning: invalid instruction encoding
+# CHECK-NEXT: 0xb9 0x97 0x00 0x18
+# CHECK-NEXT: ^
+0xb9 0x97 0x00 0x18
diff --git a/test/MC/Disassembler/SystemZ/lit.local.cfg b/test/MC/Disassembler/SystemZ/lit.local.cfg
new file mode 100644
index 000000000000..b12af09434be
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'SystemZ' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/SystemZ/trunc-01.txt b/test/MC/Disassembler/SystemZ/trunc-01.txt
new file mode 100644
index 000000000000..336142ddea3a
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/trunc-01.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu 2>&1 | FileCheck %s
+# Every instruction must be at least two bytes long.
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: 0xc4
+0xc4
diff --git a/test/MC/Disassembler/SystemZ/trunc-02.txt b/test/MC/Disassembler/SystemZ/trunc-02.txt
new file mode 100644
index 000000000000..e1e20516e83a
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/trunc-02.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu 2>&1 | FileCheck %s
+# If the top bits are 0b10, the instruction must be 4 bytes long.
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: 0xb9 0x08 0x00
+0xb9 0x08 0x00
diff --git a/test/MC/Disassembler/SystemZ/trunc-03.txt b/test/MC/Disassembler/SystemZ/trunc-03.txt
new file mode 100644
index 000000000000..94d0c3789683
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/trunc-03.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu 2>&1 | FileCheck %s
+# If the top bits are 0b11, the instruction must be 6 bytes long.
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: 0xed 0x00 0x00 0x00 0x00
+0xed 0x00 0x00 0x00 0x00
diff --git a/test/MC/Disassembler/SystemZ/unmapped.txt b/test/MC/Disassembler/SystemZ/unmapped.txt
new file mode 100644
index 000000000000..9379ff63195c
--- /dev/null
+++ b/test/MC/Disassembler/SystemZ/unmapped.txt
@@ -0,0 +1,32 @@
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu 2>&1 | FileCheck %s
+
+# An unmapped 2-byte instruction
+#
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: 0x01
+# CHECK-NEXT: ^
+0x01
+0x10
+
+# An unmapped 4-byte instruction
+#
+# CHECK-NEXT: warning: invalid instruction encoding
+# CHECK-NEXT: 0x53
+# CHECK-NEXT: ^
+0x53
+0x00 0x00 0x00
+
+# An unmapped 6-byte instruction
+#
+# CHECK-NEXT: warning: invalid instruction encoding
+# CHECK-NEXT: 0xff
+# CHECK-NEXT: ^
+0xff
+0x00 0x00 0x00 0x00 0x00
+
+# A trailing zero (also unmapped)
+#
+# CHECK-NEXT: warning: invalid instruction encoding
+# CHECK-NEXT: 0x00
+# CHECK-NEXT: ^
+0x00
diff --git a/test/MC/Disassembler/X86/intel-syntax-32.txt b/test/MC/Disassembler/X86/intel-syntax-32.txt
index 08bae6ec6753..2298823604aa 100644
--- a/test/MC/Disassembler/X86/intel-syntax-32.txt
+++ b/test/MC/Disassembler/X86/intel-syntax-32.txt
@@ -1,13 +1,31 @@
 # RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s
 
-# CHECK: sgdt
+# CHECK: sgdt opaque ptr [eax]
 0x0f 0x01 0x00
 
-# CHECK: sidt
+# CHECK: sidt opaque ptr [eax]
 0x0f 0x01 0x08
 
-# CHECK: lgdt
+# CHECK: lgdt opaque ptr [eax]
 0x0f 0x01 0x10
 
-# CHECK: lidt
+# CHECK: lidt opaque ptr [eax]
 0x0f 0x01 0x18
+
+# CHECK: mov al, byte ptr [878082192]
+0xa0 0x90 0x78 0x56 0x34
+
+# CHECK: mov ax, word ptr [878082192]
+0x66 0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: mov eax, dword ptr [878082192]
+0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: mov byte ptr [878082192], al
+0xa2 0x90 0x78 0x56 0x34
+
+# CHECK: mov word ptr [878082192], ax
+0x66 0xa3 0x90 0x78 0x56 0x34
+
+# CHECK: mov dword ptr [878082192], eax
+0xa3 0x90 0x78 0x56 0x34
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 57e602f1500e..3689525d92fa 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -12,70 +12,70 @@
 # CHECK: movsq
 0x48 0xa5
 
-# CHECK: pop FS
+# CHECK: pop fs
 0x0f 0xa1
 
-# CHECK: pop GS
+# CHECK: pop gs
 0x0f 0xa9
 
-# CHECK: in AL, DX
+# CHECK: in al, dx
 0xec
 
 # CHECK: nop
 0x90
 
-# CHECK: xchg EAX, R8D
+# CHECK: xchg eax, r8d
 0x41 0x90
 
-# CHECK: xchg RAX, R8
+# CHECK: xchg rax, r8
 0x49 0x90
 
-# CHECK: add AL, 0
+# CHECK: add al, 0
 0x04 0x00
 
-# CHECK: add AX, 0
+# CHECK: add ax, 0
 0x66 0x05 0x00 0x00
 
-# CHECK: add EAX, 0
+# CHECK: add eax, 0
 0x05 0x00 0x00 0x00 0x00
 
-# CHECK: add RAX, 0
+# CHECK: add rax, 0
 0x48 0x05 0x00 0x00 0x00 0x00
 
-# CHECK: adc AL, 0
+# CHECK: adc al, 0
 0x14 0x00
 
-# CHECK: adc AX, 0
+# CHECK: adc ax, 0
 0x66 0x15 0x00 0x00
 
-# CHECK: adc EAX, 0
+# CHECK: adc eax, 0
 0x15 0x00 0x00 0x00 0x00
 
-# CHECK: adc RAX, 0
+# CHECK: adc rax, 0
 0x48 0x15 0x00 0x00 0x00 0x00
 
-# CHECK: cmp AL, 0
+# CHECK: cmp al, 0
 0x3c 0x00
 
-# CHECK: cmp AX, 0
+# CHECK: cmp ax, 0
 0x66 0x3d 0x00 0x00
 
-# CHECK: cmp EAX, 0
+# CHECK: cmp eax, 0
 0x3d 0x00 0x00 0x00 0x00
 
-# CHECK: cmp RAX, 0
+# CHECK: cmp rax, 0
 0x48 0x3d 0x00 0x00 0x00 0x00
 
-# CHECK: test AL, 0
+# CHECK: test al, 0
 0xa8 0x00
 
-# CHECK: test AX, 0
+# CHECK: test ax, 0
 0x66 0xa9 0x00 0x00
 
-# CHECK: test EAX, 0
+# CHECK: test eax, 0
 0xa9 0x00 0x00 0x00 0x00
 
-# CHECK: test RAX, 0
+# CHECK: test rax, 0
 0x48 0xa9 0x00 0x00 0x00 0x00
 
 # CHECK: sysret
@@ -105,17 +105,50 @@
 # CHECK: retf
 0x66 0xcb
 
-# CHECK: vpgatherqq YMM2, QWORD PTR [RDI + 2*YMM1], YMM0
+# CHECK: vshufpd xmm0, xmm1, xmm2, 1
+0xc5 0xf1 0xc6 0xc2 0x01
+
+# CHECK: vpgatherqq ymm2, qword ptr [rdi + 2*ymm1], ymm0
 0xc4 0xe2 0xfd 0x91 0x14 0x4f
 
-# CHECK: vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
+# CHECK: vpgatherdd xmm10, dword ptr [r15 + 2*xmm9], xmm8
 0xc4 0x02 0x39 0x90 0x14 0x4f
 
-# CHECK: xsave64 OPAQUE PTR [RAX]
+# CHECK: xsave64 opaque ptr [rax]
 0x48 0x0f 0xae 0x20
 
-# CHECK: xrstor64 OPAQUE PTR [RAX]
+# CHECK: xrstor64 opaque ptr [rax]
 0x48 0x0f 0xae 0x28
 
-# CHECK: xsaveopt64 OPAQUE PTR [RAX]
+# CHECK: xsaveopt64 opaque ptr [rax]
 0x48 0x0f 0xae 0x30
+
+# CHECK: movabs al, byte ptr [-6066930261531658096]
+0xa0 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs al, byte ptr [-6066930261531658096]
+0x48 0xa0 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs ax, word ptr [-6066930261531658096]
+0x66 0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs eax, dword ptr [-6066930261531658096]
+0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs rax, qword ptr [-6066930261531658096]
+0x48 0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs byte ptr [-6066930261531658096], al
+0xa2 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs byte ptr [-6066930261531658096], al
+0x48 0xa2 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs word ptr [-6066930261531658096], ax
+0x66 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs dword ptr [-6066930261531658096], eax
+0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabs qword ptr [-6066930261531658096], rax
+0x48 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
diff --git a/test/MC/Disassembler/X86/lit.local.cfg b/test/MC/Disassembler/X86/lit.local.cfg
index 6211b3e53853..ba763cf03ffc 100644
--- a/test/MC/Disassembler/X86/lit.local.cfg
+++ b/test/MC/Disassembler/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/Disassembler/X86/prefixes.txt b/test/MC/Disassembler/X86/prefixes.txt
new file mode 100644
index 000000000000..56596e387511
--- /dev/null
+++ b/test/MC/Disassembler/X86/prefixes.txt
@@ -0,0 +1,59 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
+
+# CHECK: lock
+# CHECK-NEXT:	orl	$16, %fs:776
+0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10
+
+# CHECK: movq	%fs:768, %rdi
+0x64 0x48 0x8b 0x3c 0x25 0x00 0x03 0x00 0x00
+
+# CHECK: rep
+# CHECK-NEXT:		stosq
+0xf3 0x48 0xab
+
+# CHECK: rep
+# CHECK-NEXT:		stosl
+0xf3 0x67 0x48 0xab
+
+# CHECK: movl 32(%rbp), %eax
+0x8b 0x45 0x20
+
+# CHECK: movl %es:32(%rbp), %eax
+0x26 0x8b 0x45 0x20
+
+# CHECK: movl %es:32(%rbp), %eax
+0x2e 0x26 0x8b 0x45 0x20
+
+# Test that multiple prefixes stack.
+#    (todo- the correct disassembly is actually more like "es movl %cs:32(%rbp), %eax"
+#    but we don't support that)
+# CHECK: movl %cs:32(%rbp), %eax
+0x26 0x2e 0x8b 0x45 0x20
+
+# Test that 0xf3 as part of the opcode works.
+# CHECK: cvtdq2pd	(%rax), %xmm0
+0xf3 0x0f 0xe6 0x00
+
+# CHECK: pause
+0xf3 0x90
+
+# CHECK: nop
+0x90
+
+# CHECK: 		lock
+# CHECK-NEXT:	nop
+0xf0 0x90
+
+# Test that multiple redundant prefixes work (redundant, but valid x86).
+# CHECK: rep
+# CHECK-NEXT: rep
+# CHECK-NEXT: stosq
+0xf3 0xf3 0x48 0xab
+
+# Test that a prefix on it's own works. It's debatable as to if this is 
+# something that is considered valid, but however as LLVM's own disassembler
+# has decided to disassemble prefixes as being separate opcodes, it therefore 
+# should be capable of re-consuming it's own output.
+# CHECK: rep
+0xf3
+# ***IMPORTANT ^-- this must be at the end of the file to be a valid test *** 
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 9827a1809f1b..7ca087438972 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -129,6 +129,9 @@
 # CHECK: vcvtsd2si %xmm0, %rax
 0xc4 0xe1 0xfb 0x2d 0xc0
 
+# CHECK: vcvtsd2si %xmm0, %rax
+0xc4 0xe1 0xff 0x2d 0xc0
+
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%rax)
 0xc4 0xe2 0x71 0x2f 0x00
 
@@ -260,6 +263,9 @@
 # CHECK: vmovups %ymm0, %ymm1
 0xc5 0xfc 0x11 0xc1
 
+# CHECK: vmovups %ymm0, %ymm1
+0xc4 0xe1 0xfc 0x11 0xc1
+
 # CHECK: vmovaps %ymm1, %ymm0
 0xc5 0xfc 0x28 0xc1
 
@@ -722,9 +728,66 @@
 # CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
 0xc4 0xe3 0x79 0x6a 0x01 0x10
 
+# CHECK: vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xfd 0x6a 0x01 0x10
+
+# CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x6a 0x01 0x10
+
+# CHECK: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x6a 0xc2 0x10
+
+# CHECK: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
+0xc4 0xe3 0x79 0x6a 0xc2 0x10
+
+# CHECK: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xfd 0x6a 0xc2 0x10
+
+# CHECK: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
+0xc4 0xe3 0x7d 0x6a 0xc2 0x10
+
+# CHECK: vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+0xc4 0xe3 0x79 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %xmm1, %xmm2, %xmm0, %xmm0
+0xc4 0xe3 0x79 0x68 0xc2 0x10
+
+# CHECK: vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x68 0xc2 0x10
+
+# CHECK: vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+0xc4 0xe3 0xfd 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+0xc4 0xe3 0x7d 0x68 0x01 0x10
+
+# CHECK: vfmaddps   %ymm1, %ymm2, %ymm0, %ymm0
+0xc4 0xe3 0x7d 0x68 0xc2 0x10
+
+# CHECK: vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+0xc4 0xe3 0xfd 0x68 0xc2 0x10
+
+# CHECK: vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+0xc4 0xe3 0x69 0x48 0xcb 0x40
+
 # CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
 0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21
 
+# CHECK: vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+0xc4 0xe3 0xd5 0x48 0x30 0x12
+
+# CHECK: vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+0xc4 0xe3 0x61 0x48 0x20 0x13
+
+# CHECK: vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+0xc4 0xe3 0x6d 0x48 0xd4 0x40
+
+# CHECK: vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+0xc4 0xe3 0x75 0x49 0x40 0x04 0x11
+
 # CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
 0xc4 0xe2 0xf9 0x92 0x14 0x4f
 
@@ -756,7 +819,7 @@
 
 # rdar://13493622 lldb doesn't print the x86 rep/repne prefix when disassembling
 # CHECK: repne
-# CHECK-NEXT: movsd
+# CHECK-NEXT: movsl
 0xf2 0xa5
 # CHECK: repne
 # CHECK-NEXT: movsq
@@ -764,7 +827,92 @@
 # CHECK: repne
 # CHECK-NEXT: movb  $0, (%rax)
 0xf2 0xc6 0x0 0x0
-# CHECK: rep
+
+# rdar://11019859 Support 2013 Haswell RTM instructions and HLE prefixes
+# CHECK: xrelease
 # CHECK-NEXT: lock
 # CHECK-NEXT: incl   (%rax)
 0xf3 0xf0 0xff 0x00
+
+# CHECK: xrelease
+# CHECK-NEXT: xchgl %ebx, %eax
+0xf3 0x93
+# CHECK: xrelease
+# CHECK-NEXT: xchgl %ebx, (%rax)
+0xf3 0x87 0x18
+# CHECK: xrelease
+# CHECK-NEXT: movb %al, (%rbx)
+0xf3 0x88 0x03
+# CHECK: xrelease
+# CHECK-NEXT: movl %eax, (%rbx)
+0xf3 0x89 0x03
+# CHECK: xrelease
+# CHECK-NEXT: movb $1, (%rbx)
+0xf3 0xc6 0x03 0x01
+# CHECK: xrelease
+# CHECK-NEXT: movl $1, (%rbx)
+0xf3 0xc7 0x03 0x01 0x00 0x00 0x00
+
+# CHECK: xacquire
+# CHECK-NEXT: xchgl %ebx, %eax
+0xf2 0x93
+# CHECK: xacquire
+# CHECK-NEXT: xchgl %ebx, (%rax)
+0xf2 0x87 0x18
+
+# CHECK: bextr $2814, %edi, %eax
+0x8f 0xea 0x78 0x10 0xc7 0xfe 0x0a 0x00 0x00
+
+# CHECK: blci %rdi, %rax
+0x8f 0xe9 0xf8 0x02 0xf7
+
+# CHECK: vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+0x8f 0xe8 0x60 0xa2 0xe2 0x10
+
+# CHECK: vpcmov (%rax), %xmm2, %xmm3, %xmm4
+0x8f 0xe8 0xe0 0xa2 0x20 0x20
+
+# CHECK: vpcmov %xmm1, (%rax), %xmm3, %xmm4
+0x8f 0xe8 0x60 0xa2 0x20 0x10
+
+# CHECK: vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+0x8f 0xe8 0x64 0xa2 0xe2 0x10
+
+# CHECK: vpcmov (%rax), %ymm2, %ymm3, %ymm4
+0x8f 0xe8 0xe4 0xa2 0x20 0x20
+
+# CHECK: vpcmov %ymm1, (%rax), %ymm3, %ymm4
+0x8f 0xe8 0x64 0xa2 0x20 0x10
+
+# CHECK: vpcomb $55, %xmm6, %xmm4, %xmm2
+0x8f 0xe8 0x58 0xcc 0xd6 0x37
+
+# CHECK: vpcomb $56, 8(%rax), %xmm3, %xmm2
+0x8f 0xe8 0x60 0xcc 0x50 0x08 0x38
+
+# CHECK: vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+0x8f 0xe8 0x58 0x9e 0xd6 0x40
+# CHECK: vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3
+0x8f 0xe8 0x58 0x9e 0x1c 0x08 0x40
+
+# CHECK: vprotd (%rax), %xmm0, %xmm3
+0x8f 0xe9 0xf8 0x92 0x18
+# CHECK: vprotd %xmm2, (%rax,%rcx), %xmm4
+0x8f 0xe9 0x68 0x92 0x24 0x08
+# CHECK: vprotd %xmm5, %xmm3, %xmm2
+0x8f 0xe9 0x50 0x92 0xd3
+# CHECK: vprotd $43, (%rcx), %xmm6
+0x8f 0xe8 0x78 0xc2 0x31 0x2b
+# CHECK: vprotd $44, (%rax,%rcx), %xmm7
+0x8f 0xe8 0x78 0xc2 0x3c 0x08 0x2c
+# CHECK: vprotd $45, %xmm4, %xmm4
+0x8f 0xe8 0x78 0xc2 0xe4 0x2d
+
+# CHECK: vfrczps 4(%rax), %xmm3
+0x8f 0xe9 0x78 0x80 0x58 0x04
+# CHECK: vfrczps %xmm6, %xmm5
+0x8f 0xe9 0x78 0x80 0xee
+# CHECK: vfrczps (%rcx), %xmm1
+0x8f 0xe9 0x78 0x80 0x09
+# CHECK: vfrczps %ymm2, %ymm4
+0x8f 0xe9 0x7c 0x80 0xe2
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 76d67d352ccf..b6a62c4f6975 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -648,3 +648,51 @@
 
 # CHECK: adoxl (%eax), %eax
 0xf3 0x0f 0x38 0xf6 0x00
+
+# CHECK: movb 878082192, %al
+0xa0 0x90 0x78 0x56 0x34
+
+# CHECK: movw 878082192, %ax
+0x66 0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: movl 878082192, %eax
+0xa1 0x90 0x78 0x56 0x34
+
+# CHECK: movb %al, 878082192
+0xa2 0x90 0x78 0x56 0x34
+
+# CHECK: movw %ax, 878082192
+0x66 0xa3 0x90 0x78 0x56 0x34
+
+# CHECK: movl %eax, 878082192
+0xa3 0x90 0x78 0x56 0x34
+
+# CHECK: incl %ecx
+0xff 0xc1
+
+# CHECK: decl %ecx
+0xff 0xc9
+
+# CHECK: incw %cx
+0x66 0xff 0xc1
+
+# CHECK: decw %cx
+0x66 0xff 0xc9
+
+# CHECK: incb %cl
+0xfe 0xc1
+
+# CHECK: decb %cl
+0xfe 0xc9
+
+# CHECK: incl %ecx
+0x41
+
+# CHECK: decl %ecx
+0x49
+
+# CHECK: movq %xmm0, %xmm0
+0xf3 0x0f 0x7e 0xc0
+
+# CHECK: vmovq %xmm0, %xmm0
+0xc5 0xfa 0x7e 0xc0
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index c285af72b358..8c6bc0e2964c 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -127,3 +127,117 @@
 
 # CHECK: stac
 0x0f 0x01 0xcb
+
+# CHECK: movabsb -6066930261531658096, %al
+0xa0 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsb -6066930261531658096, %al
+0x48 0xa0 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsw -6066930261531658096, %ax
+0x66 0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsl -6066930261531658096, %eax
+0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsq -6066930261531658096, %rax
+0x48 0xa1 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsb %al, -6066930261531658096
+0xa2 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsb %al, -6066930261531658096
+0x48 0xa2 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsw %ax, -6066930261531658096
+0x66 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsl %eax, -6066930261531658096
+0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: movabsq %rax, -6066930261531658096
+0x48 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: sha1rnds4 $1, %xmm1, %xmm2
+0x0f 0x3a 0xcc 0xd1 0x01
+
+# CHECK: sha1rnds4 $1, (%rax), %xmm2
+0x0f 0x3a 0xcc 0x10 0x01
+
+# CHECK: sha1nexte %xmm1, %xmm2
+0x0f 0x38 0xc8 0xd1
+
+# CHECK: sha1nexte (%rax), %xmm2
+0x0f 0x38 0xc8 0x10
+
+# CHECK: sha1msg1 %xmm1, %xmm2
+0x0f 0x38 0xc9 0xd1
+
+# CHECK: sha1msg1 (%rax), %xmm2
+0x0f 0x38 0xc9 0x10
+
+# CHECK: sha1msg2 %xmm1, %xmm2
+0x0f 0x38 0xca 0xd1
+
+# CHECK: sha1msg2 (%rax), %xmm2
+0x0f 0x38 0xca 0x10
+
+# CHECK: sha256rnds2 (%rax), %xmm2
+0x0f 0x38 0xcb 0x10
+
+# CHECK: sha256rnds2 %xmm1, %xmm2
+0x0f 0x38 0xcb 0xd1
+
+# CHECK: sha256msg1 %xmm1, %xmm2
+0x0f 0x38 0xcc 0xd1
+
+# CHECK: sha256msg1 (%rax), %xmm2
+0x0f 0x38 0xcc 0x10
+
+# CHECK: sha256msg2 %xmm1, %xmm2
+0x0f 0x38 0xcd 0xd1
+
+# CHECK: sha256msg2 (%rax), %xmm2
+0x0f 0x38 0xcd 0x10
+
+# CHECK: incl %ecx
+0xff 0xc1
+
+# CHECK: decl %ecx
+0xff 0xc9
+
+# CHECK: incw %cx
+0x66 0xff 0xc1
+
+# CHECK: decw %cx
+0x66 0xff 0xc9
+
+# CHECK: incb %cl
+0xfe 0xc1
+
+# CHECK: decb %cl
+0xfe 0xc9
+
+# CHECK: incq %rcx
+0x48 0xff 0xc1
+
+# CHECK: decq %rcx
+0x48 0xff 0xc9
+
+# CHECK: movq %xmm0, %xmm0
+0xf3 0x0f 0x7e 0xc0
+
+# CHECK: vmovq %xmm0, %xmm0
+0xc5 0xfa 0x7e 0xc0
+
+# CHECK: vmovq %xmm0, %rax
+0xc4 0xe1 0xf9 0x7e 0xc0
+
+# CHECK: movd %xmm0, %rax
+0x66 0x48 0x0f 0x7e 0xc0
+
+# CHECK: pextrw $3, %xmm3, %ecx
+0x66 0x0f 0x3a 0x15 0xd9 0x03
+
+# CHECK: pextrw $3, %xmm3, (%rax)
+0x66 0x0f 0x3a 0x15 0x18 0x03
diff --git a/test/MC/Disassembler/XCore/lit.local.cfg b/test/MC/Disassembler/XCore/lit.local.cfg
index 15b65836e717..4d17d4642045 100644
--- a/test/MC/Disassembler/XCore/lit.local.cfg
+++ b/test/MC/Disassembler/XCore/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.txt']
-
 targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets:
     config.unsupported = True
diff --git a/test/MC/ELF/alias-reloc.s b/test/MC/ELF/alias-reloc.s
index c25c25932f09..0ee73e9ca24a 100644
--- a/test/MC/ELF/alias-reloc.s
+++ b/test/MC/ELF/alias-reloc.s
@@ -18,14 +18,23 @@ foo2:
     .quad    bar2
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section ({{[0-9]+}}) zed {
+// CHECK-NEXT:   Section ({{[0-9]+}}) .relazed {
 // CHECK-NEXT:     0x1 R_X86_64_PLT32 bar 0xFFFFFFFFFFFFFFFC
 // CHECK-NEXT:     0x5 R_X86_64_64 bar2 0x0
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
 
 // CHECK:      Symbols [
-// CHECK:        Symbol {
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name:  (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local (0x0)
+// CHECK-NEXT:     Type: None (0x0)
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section:  (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: bar
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s
index 0575f41fc851..8da75f7ef2de 100644
--- a/test/MC/ELF/alias.s
+++ b/test/MC/ELF/alias.s
@@ -19,6 +19,15 @@ bar4 = foo4
 
 // CHECK:      Symbols [
 // CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name:  (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local (0x0)
+// CHECK-NEXT:     Type: None (0x0)
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section:  (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: bar
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
diff --git a/test/MC/ELF/bad-relocation.s b/test/MC/ELF/bad-relocation.s
new file mode 100644
index 000000000000..1a66744ec863
--- /dev/null
+++ b/test/MC/ELF/bad-relocation.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o /dev/null 2>&1 | FileCheck  %s
+
+// CHECK: error: invalid variant 'BADRELOC'
+
+        .text
+foo:
+	leal	.Lfoo@BADRELOC(%ebx), %eax
diff --git a/test/MC/ELF/basic-elf-32.s b/test/MC/ELF/basic-elf-32.s
index 3ddb53981a9f..1f618e1bb6c6 100644
--- a/test/MC/ELF/basic-elf-32.s
+++ b/test/MC/ELF/basic-elf-32.s
@@ -45,7 +45,7 @@ main:                                   # @main
 // CHECK:     Name: .rel.text
 
 // CHECK: Relocations [
-// CHECK:   Section (1) .text {
+// CHECK:   Section (2) .rel.text {
 // CHECK:     0x6  R_386_32   .rodata.str1.1
 // CHECK:     0xB  R_386_PC32 puts
 // CHECK:     0x12 R_386_32   .rodata.str1.1
diff --git a/test/MC/ELF/basic-elf-64.s b/test/MC/ELF/basic-elf-64.s
index f98623ad1e3c..a77f3e63e093 100644
--- a/test/MC/ELF/basic-elf-64.s
+++ b/test/MC/ELF/basic-elf-64.s
@@ -45,7 +45,7 @@ main:                                   # @main
 // CHECK:     Name: .rela.text
 
 // CHECK: Relocations [
-// CHECK:   Section (1) .text {
+// CHECK:   Section (2) .rela.text {
 // CHECK:     0x5  R_X86_64_32   .rodata.str1.1 0x0
 // CHECK:     0xA  R_X86_64_PC32 puts           0xFFFFFFFFFFFFFFFC
 // CHECK:     0xF  R_X86_64_32   .rodata.str1.1 0x6
diff --git a/test/MC/ELF/bss-large.ll b/test/MC/ELF/bss-large.ll
new file mode 100644
index 000000000000..e2a7a23a452d
--- /dev/null
+++ b/test/MC/ELF/bss-large.ll
@@ -0,0 +1,13 @@
+; RUN: llc -filetype=obj %s -o %t
+
+; PR16338 - ICE when compiling very large two-dimensional array
+; Check if a huge object can be put into bss section
+; C++ code is:
+;   int a[60666][60666];
+
+; ModuleID = 'test.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a0 = addrspace(1) global [4 x [4 x i32]] zeroinitializer, align 16
+@a = global [60666 x [60666 x i32]] zeroinitializer, align 16
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index 137b8b6f8490..b3768cb9834c 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -26,7 +26,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -49,5 +48,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index 1cad32507cde..d7a53c462b70 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -24,7 +24,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -46,5 +45,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index f1a54a810e0a..eac2c731fa93 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -25,7 +25,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -48,5 +47,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index b1e74ea098c2..00d8b99af9d6 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -21,7 +21,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -43,5 +42,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index abde0de4fad0..36e147f5a4da 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -21,7 +21,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -43,5 +42,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index a910faba64cc..839d6717debc 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -22,7 +22,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -44,5 +43,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index f7f95fbc5f2d..951a6001e519 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -21,7 +21,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -43,5 +42,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index f7a07e45d8b5..4abbb53b8fc9 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -22,7 +22,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -44,5 +43,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 35a73efb0f4a..34254c862a46 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -29,7 +29,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -52,5 +51,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 5817d1f4af7f..3de769f39fa0 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -21,7 +21,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -43,5 +42,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index 932a1828eb29..98c759d4fffc 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -24,7 +24,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -46,5 +45,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index 6c25d5b47195..d25b5ff2e93f 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -22,7 +22,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -44,5 +43,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 075c6b9b8435..9f5ae4be9ed4 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -22,7 +22,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -44,5 +43,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index c83b47c142b4..9773a36a3b03 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -22,7 +22,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -44,5 +43,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
new file mode 100644
index 000000000000..c7d438a19260
--- /dev/null
+++ b/test/MC/ELF/cfi-window-save.s
@@ -0,0 +1,51 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
+
+# Should use SPARC as the target to test this. However, SPARC does not
+# use MC yet.
+
+f:
+        .cfi_startproc
+        nop
+        .cfi_window_save
+        nop
+        .cfi_endproc
+
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00412D00 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK:        }
+
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 4ac0e34f10c3..05cb0ae35bd2 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -28,7 +28,6 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
@@ -51,4 +50,5 @@ f:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index 98f4fa9c62e6..b8b6e6b52af1 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -227,78 +227,6 @@ f36:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
-// CHECK-NEXT:       0x20  R_X86_64_PC32 .text 0x0
-// CHECK-NEXT:       0x29  R_X86_64_32   bar   0x0
-// CHECK-NEXT:       0x43  R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x5C  R_X86_64_PC32 .text 0x1
-// CHECK-NEXT:       0x65  R_X86_64_32   bar   0x0
-// CHECK-NEXT:       0x74  R_X86_64_PC32 .text 0x2
-// CHECK-NEXT:       0x7D  R_X86_64_32   bar   0x0
-// CHECK-NEXT:       0x97  R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0xB0  R_X86_64_PC32 .text 0x3
-// CHECK-NEXT:       0xB9  R_X86_64_16   bar   0x0
-// CHECK-NEXT:       0xCE  R_X86_64_16   foo   0x0
-// CHECK-NEXT:       0xE0  R_X86_64_PC32 .text 0x4
-// CHECK-NEXT:       0xFE  R_X86_64_32   foo   0x0
-// CHECK-NEXT:       0x110 R_X86_64_PC32 .text 0x5
-// CHECK-NEXT:       0x12E R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x144 R_X86_64_PC32 .text 0x6
-// CHECK-NEXT:       0x162 R_X86_64_16   foo   0x0
-// CHECK-NEXT:       0x174 R_X86_64_PC32 .text 0x7
-// CHECK-NEXT:       0x192 R_X86_64_32   foo   0x0
-// CHECK-NEXT:       0x1A4 R_X86_64_PC32 .text 0x8
-// CHECK-NEXT:       0x1C2 R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x1D8 R_X86_64_PC32 .text 0x9
-// CHECK-NEXT:       0x1F6 R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x20C R_X86_64_PC32 .text 0xA
-// CHECK-NEXT:       0x22A R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x240 R_X86_64_PC32 .text 0xB
-// CHECK-NEXT:       0x25E R_X86_64_PC16 foo   0x0
-// CHECK-NEXT:       0x270 R_X86_64_PC32 .text 0xC
-// CHECK-NEXT:       0x28E R_X86_64_PC32 foo   0x0
-// CHECK-NEXT:       0x2A0 R_X86_64_PC32 .text 0xD
-// CHECK-NEXT:       0x2BE R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x2D4 R_X86_64_PC32 .text 0xE
-// CHECK-NEXT:       0x2F2 R_X86_64_PC16 foo   0x0
-// CHECK-NEXT:       0x304 R_X86_64_PC32 .text 0xF
-// CHECK-NEXT:       0x322 R_X86_64_PC32 foo   0x0
-// CHECK-NEXT:       0x334 R_X86_64_PC32 .text 0x10
-// CHECK-NEXT:       0x352 R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x368 R_X86_64_PC32 .text 0x11
-// CHECK-NEXT:       0x386 R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x39C R_X86_64_PC32 .text 0x12
-// CHECK-NEXT:       0x3BA R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x3D0 R_X86_64_PC32 .text 0x13
-// CHECK-NEXT:       0x3EE R_X86_64_16   foo   0x0
-// CHECK-NEXT:       0x400 R_X86_64_PC32 .text 0x14
-// CHECK-NEXT:       0x41E R_X86_64_32   foo   0x0
-// CHECK-NEXT:       0x430 R_X86_64_PC32 .text 0x15
-// CHECK-NEXT:       0x44E R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x464 R_X86_64_PC32 .text 0x16
-// CHECK-NEXT:       0x482 R_X86_64_16   foo   0x0
-// CHECK-NEXT:       0x494 R_X86_64_PC32 .text 0x17
-// CHECK-NEXT:       0x4B2 R_X86_64_32   foo   0x0
-// CHECK-NEXT:       0x4C4 R_X86_64_PC32 .text 0x18
-// CHECK-NEXT:       0x4E2 R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x4F8 R_X86_64_PC32 .text 0x19
-// CHECK-NEXT:       0x516 R_X86_64_64   foo   0x0
-// CHECK-NEXT:       0x52C R_X86_64_PC32 .text 0x1A
-// CHECK-NEXT:       0x54A R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x560 R_X86_64_PC32 .text 0x1B
-// CHECK-NEXT:       0x57E R_X86_64_PC16 foo   0x0
-// CHECK-NEXT:       0x590 R_X86_64_PC32 .text 0x1C
-// CHECK-NEXT:       0x5AE R_X86_64_PC32 foo   0x0
-// CHECK-NEXT:       0x5C0 R_X86_64_PC32 .text 0x1D
-// CHECK-NEXT:       0x5DE R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x5F4 R_X86_64_PC32 .text 0x1E
-// CHECK-NEXT:       0x612 R_X86_64_PC16 foo   0x0
-// CHECK-NEXT:       0x624 R_X86_64_PC32 .text 0x1F
-// CHECK-NEXT:       0x642 R_X86_64_PC32 foo   0x0
-// CHECK-NEXT:       0x654 R_X86_64_PC32 .text 0x20
-// CHECK-NEXT:       0x672 R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x688 R_X86_64_PC32 .text 0x21
-// CHECK-NEXT:       0x6A6 R_X86_64_PC64 foo   0x0
-// CHECK-NEXT:       0x6BC R_X86_64_PC32 .text 0x22
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 14000000 00000000 017A4C52 00017810
@@ -427,5 +355,77 @@ f36:
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20  R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:       0x29  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x43  R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x5C  R_X86_64_PC32 .text 0x1
+// CHECK-NEXT:       0x65  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x74  R_X86_64_PC32 .text 0x2
+// CHECK-NEXT:       0x7D  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x97  R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0xB0  R_X86_64_PC32 .text 0x3
+// CHECK-NEXT:       0xB9  R_X86_64_16   bar   0x0
+// CHECK-NEXT:       0xCE  R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0xE0  R_X86_64_PC32 .text 0x4
+// CHECK-NEXT:       0xFE  R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x110 R_X86_64_PC32 .text 0x5
+// CHECK-NEXT:       0x12E R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x144 R_X86_64_PC32 .text 0x6
+// CHECK-NEXT:       0x162 R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x174 R_X86_64_PC32 .text 0x7
+// CHECK-NEXT:       0x192 R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x1A4 R_X86_64_PC32 .text 0x8
+// CHECK-NEXT:       0x1C2 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x1D8 R_X86_64_PC32 .text 0x9
+// CHECK-NEXT:       0x1F6 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x20C R_X86_64_PC32 .text 0xA
+// CHECK-NEXT:       0x22A R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x240 R_X86_64_PC32 .text 0xB
+// CHECK-NEXT:       0x25E R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x270 R_X86_64_PC32 .text 0xC
+// CHECK-NEXT:       0x28E R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x2A0 R_X86_64_PC32 .text 0xD
+// CHECK-NEXT:       0x2BE R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x2D4 R_X86_64_PC32 .text 0xE
+// CHECK-NEXT:       0x2F2 R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x304 R_X86_64_PC32 .text 0xF
+// CHECK-NEXT:       0x322 R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x334 R_X86_64_PC32 .text 0x10
+// CHECK-NEXT:       0x352 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x368 R_X86_64_PC32 .text 0x11
+// CHECK-NEXT:       0x386 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x39C R_X86_64_PC32 .text 0x12
+// CHECK-NEXT:       0x3BA R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x3D0 R_X86_64_PC32 .text 0x13
+// CHECK-NEXT:       0x3EE R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x400 R_X86_64_PC32 .text 0x14
+// CHECK-NEXT:       0x41E R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x430 R_X86_64_PC32 .text 0x15
+// CHECK-NEXT:       0x44E R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x464 R_X86_64_PC32 .text 0x16
+// CHECK-NEXT:       0x482 R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x494 R_X86_64_PC32 .text 0x17
+// CHECK-NEXT:       0x4B2 R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x4C4 R_X86_64_PC32 .text 0x18
+// CHECK-NEXT:       0x4E2 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x4F8 R_X86_64_PC32 .text 0x19
+// CHECK-NEXT:       0x516 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x52C R_X86_64_PC32 .text 0x1A
+// CHECK-NEXT:       0x54A R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x560 R_X86_64_PC32 .text 0x1B
+// CHECK-NEXT:       0x57E R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x590 R_X86_64_PC32 .text 0x1C
+// CHECK-NEXT:       0x5AE R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x5C0 R_X86_64_PC32 .text 0x1D
+// CHECK-NEXT:       0x5DE R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x5F4 R_X86_64_PC32 .text 0x1E
+// CHECK-NEXT:       0x612 R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x624 R_X86_64_PC32 .text 0x1F
+// CHECK-NEXT:       0x642 R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x654 R_X86_64_PC32 .text 0x20
+// CHECK-NEXT:       0x672 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x688 R_X86_64_PC32 .text 0x21
+// CHECK-NEXT:       0x6A6 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x6BC R_X86_64_PC32 .text 0x22
 // CHECK-NEXT:     ]
 // CHECK:        }
diff --git a/test/MC/ELF/comdat-dup-group-name.s b/test/MC/ELF/comdat-dup-group-name.s
new file mode 100644
index 000000000000..1181e2eab02a
--- /dev/null
+++ b/test/MC/ELF/comdat-dup-group-name.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
+
+// Test that we produce two foo sections, each in separate groups
+
+// CHECK: Index: 1
+// CHECK-NEXT: Name: .group
+
+// CHECK: Index: 2
+// CHECK-NEXT: Name: .group
+
+// CHECK: Index: 6
+// CHECK-NEXT: Name: .foo
+
+// CHECK: Index: 7
+// CHECK-NEXT: Name: .foo
+
+// CHECK: Symbols [
+
+// CHECK: Name: f1
+// CHECK-NOT: }
+// CHECK: Section: .group (0x1)
+
+// CHECK: Name: f2
+// CHECK-NOT: }
+// CHECK: Section: .group (0x2)
+
+// CHECK: Name: .foo
+// CHECK-NOT: }
+// CHECK: Section: .foo (0x6)
+
+// CHECK: Name: .foo
+// CHECK-NOT: }
+// CHECK: Section: .foo (0x7)
+
+
+	.section	.foo,"axG",@progbits,f1,comdat
+        nop
+
+	.section	.foo,"axG",@progbits,f2,comdat
+        nop
+
diff --git a/test/MC/ELF/comdat-reloc.s b/test/MC/ELF/comdat-reloc.s
new file mode 100644
index 000000000000..d893a7b149ef
--- /dev/null
+++ b/test/MC/ELF/comdat-reloc.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
+
+  .text
+  .globl  hello
+  .type  hello,@function
+hello:
+  call  world
+  ret
+
+  .section  .text.world,"axG",@progbits,world,comdat
+  .type  world,@function
+world:
+  call  doctor
+  ret
+
+// CHECK:  Name: .group
+// CHECK-NOT: SectionData
+// CHECK: SectionData
+// CHECK-NEXT: 0000: 01000000 06000000 07000000
+
+// CHECK: Index: 6
+// CHECK-NEXT: Name: .text.world
+// CHECK-NOT: Section {
+// CHECK: SHF_GROUP
+
+// CHECK: Index: 7
+// CHECK-NEXT: Name: .rela.text.world
+// CHECK-NOT: Section {
+// CHECK: SHF_GROUP
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index f9469dfae273..3e4a001f05ad 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -39,7 +39,7 @@
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
 // CHECK-NEXT:     Offset: 0x54
-// CHECK-NEXT:     Size: 8
+// CHECK-NEXT:     Size: 12
 // CHECK-NEXT:     Link: 13
 // CHECK-NEXT:     Info: 13
 // CHECK-NEXT:     AddressAlignment: 4
@@ -82,7 +82,7 @@
 g1:
         nop
 
-        .section	.bar,"axG",@progbits,g1,comdat
+        .section	.bar,"ax?",@progbits
         nop
 
         .section	.zed,"axG",@progbits,g2,comdat
diff --git a/test/MC/ELF/comp-dir.s b/test/MC/ELF/comp-dir.s
index 59e3d7ded261..1b91f64a502c 100644
--- a/test/MC/ELF/comp-dir.s
+++ b/test/MC/ELF/comp-dir.s
@@ -1,7 +1,17 @@
+// REQUIRES: shell
+// XFAIL: mingw
 // RUN: llvm-mc -triple=x86_64-linux-unknown -g -fdebug-compilation-dir=/test/comp/dir %s -filetype=obj -o %t.o
 // RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
 
 // CHECK: DW_AT_comp_dir [DW_FORM_string] ("{{([A-Za-z]:.*)?}}/test/comp/dir")
 
+// RUN: mkdir -p %t.foo
+// RUN: ln -sf %t.foo %t.bar
+// RUN: cd %t.foo
+// RUN: env PWD=%t.bar llvm-mc -triple=x86_64-linux-unknown -g %s -filetype=obj -o %t.o
+// RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck --check-prefix=PWD %s
+// PWD: DW_AT_comp_dir [DW_FORM_string] ("{{.*}}.bar")
+
+
 f:
   nop
diff --git a/test/MC/ELF/debug-line.s b/test/MC/ELF/debug-line.s
index 75e050e9da1c..38ef8284a0df 100644
--- a/test/MC/ELF/debug-line.s
+++ b/test/MC/ELF/debug-line.s
@@ -1,6 +1,15 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
-// Test that .debug_line is populated.
+// Test that .debug_line is populated.  TODO: This test should really be using
+// llvm-dwarfdump, but it cannot parse this particular object file.  The content
+// of .debug_line was checked using GNU binutils:
+
+// $ objdump --dwarf=decodedline debug-line.o
+// [...]
+// File name                            Line number    Starting address
+// foo.c                                          4                   0
+// foo.c                                          5                 0x4
+// foo.c                                          6                 0x5
 
 // CHECK:        Section {
 // CHECK:          Name: .debug_line
@@ -8,17 +17,18 @@
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x44
-// CHECK-NEXT:     Size: 55
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 57
 // CHECK-NEXT:     Link: 0
 // CHECK-NEXT:     Info: 0
 // CHECK-NEXT:     AddressAlignment: 1
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000: 33000000 02001C00 00000101 FB0E0D00
+
+// CHECK-NEXT:       0000: 35000000 02001C00 00000101 FB0E0D00
 // CHECK-NEXT:       0010: 01010101 00000001 00000100 666F6F2E
 // CHECK-NEXT:       0020: 63000000 00000009 02000000 00000000
-// CHECK-NEXT:       0030: 00150204 000101
+// CHECK-NEXT:       0030: 00154B21 02080001 01
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
 
@@ -28,3 +38,12 @@
 	.file 1 "foo.c"
 	.loc 1 4 0
 	subq	$8, %rsp
+
+// Test that .loc works with values, not just instructions.
+
+	.loc 1 5 0
+	.byte 0xc3
+
+	.loc 1 6 0
+l:
+	.quad l
diff --git a/test/MC/ELF/debug-line2.s b/test/MC/ELF/debug-line2.s
new file mode 100644
index 000000000000..71b0b1647886
--- /dev/null
+++ b/test/MC/ELF/debug-line2.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
+
+// Test that two subsequent .loc directives generate two
+// distinct line table entries.
+
+// CHECK:        Section {
+// CHECK:          Name: .debug_line
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 56
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 34000000 02001C00 00000101 FB0E0D00
+// CHECK-NEXT:       0010: 01010101 00000001 00000100 666F6F2E
+// CHECK-NEXT:       0020: 63000000 00000009 02000000 00000000
+// CHECK-NEXT:       0030: 00011302 01000101
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+
+	.section	.debug_line,"",@progbits
+	.text
+
+	.file 1 "foo.c"
+	.loc 1 1 0
+	.loc 1 2 0
+	nop
diff --git a/test/MC/ELF/file-double.s b/test/MC/ELF/file-double.s
new file mode 100644
index 000000000000..b0731e67ed0d
--- /dev/null
+++ b/test/MC/ELF/file-double.s
@@ -0,0 +1,47 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
+
+// Test that a STT_FILE symbol and a symbol of the same name can coexist.
+
+.file "foo.c"
+.file "bar.c"
+	.globl foo.c
+foo.c:
+
+	.globl bar.c
+bar.c:
+
+// CHECK:        Symbol {
+// CHECK:          Name: foo.c (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: File
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
+// CHECK:          Name: bar.c (7)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: File
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:        Name: bar.c (7)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:        Name: foo.c (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
index 907bf424f470..a702bc8610c8 100644
--- a/test/MC/ELF/gen-dwarf.s
+++ b/test/MC/ELF/gen-dwarf.s
@@ -15,11 +15,11 @@ foo:
     .size foo, .-foo
 
 // CHECK:      Relocations [
-// CHECK:        Section ({{[^ ]+}}) .debug_info {
+// CHECK:        Section ({{[^ ]+}}) .rel.debug_info {
 // CHECK-NEXT:     0x6 R_386_32 .debug_abbrev 0x0
 // CHECK-NEXT:     0xC R_386_32 .debug_line 0x0
 // CHECK:        }
-// CHECK-NEXT:   Section ({{[^ ]+}}) .debug_aranges {
+// CHECK-NEXT:   Section ({{[^ ]+}}) .rel.debug_aranges {
 // CHECK-NEXT:     0x6 R_386_32 .debug_info 0x0
 // CHECK-NEXT:     0x10 R_386_32 .text 0x0
 // CHECK-NEXT:   }
diff --git a/test/MC/ELF/got.s b/test/MC/ELF/got.s
index 60dea6d3b172..30114b75d84d 100644
--- a/test/MC/ELF/got.s
+++ b/test/MC/ELF/got.s
@@ -7,7 +7,7 @@
         movl	foo@GOTPCREL(%rip), %eax
 
 // CHECK:      Relocations [
-// CHECK:        Section ({{[^ ]+}}) .text {
+// CHECK:        Section ({{[^ ]+}}) .rela.text {
 // CHECK-NEXT:       0x{{[^ ]+}} R_X86_64_GOT32 foo 0x{{[^ ]+}}
 // CHECK-NEXT:       0x{{[^ ]+}} R_X86_64_GOTPCREL foo 0x{{[^ ]+}}
 // CHECK-NEXT:   }
diff --git a/test/MC/ELF/lit.local.cfg b/test/MC/ELF/lit.local.cfg
index 56bf00859572..ba763cf03ffc 100644
--- a/test/MC/ELF/lit.local.cfg
+++ b/test/MC/ELF/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/ELF/local-reloc.s b/test/MC/ELF/local-reloc.s
index 4241ba5af410..0c745197c814 100644
--- a/test/MC/ELF/local-reloc.s
+++ b/test/MC/ELF/local-reloc.s
@@ -7,7 +7,7 @@
 foo:
 
 // CHECKT:     Relocations [
-// CHECK:        Section (1) .text {
+// CHECK:        Section (2) .rela.text {
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32S .text 0x{{[^ ]+}}
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
diff --git a/test/MC/ELF/merge.s b/test/MC/ELF/merge.s
index d34635a6710a..0e92583192d4 100644
--- a/test/MC/ELF/merge.s
+++ b/test/MC/ELF/merge.s
@@ -23,7 +23,7 @@ zed:
 foo:
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:   Section (2) .rela.text {
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PC32    .Lfoo 0x{{[^ ]+}}
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .sec1 0x{{[^ ]+}}
 // CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .Lfoo 0x{{[^ ]+}}
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 24d0172e724e..9bc831056ae3 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -4,7 +4,7 @@
 // correctly point to the section or the symbol.
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:   Section (2) .rel.text {
 // CHECK-NEXT:     0x2          R_386_GOTOFF     .Lfoo 0x0
 // CHECK-NEXT:     0x{{[^ ]+}}  R_386_PLT32      bar2 0x0
 // CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOTPC      _GLOBAL_OFFSET_TABLE_ 0x0
diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s
index 551f5ff378ef..fc7420c39898 100644
--- a/test/MC/ELF/relocation-pc.s
+++ b/test/MC/ELF/relocation-pc.s
@@ -9,8 +9,6 @@
 // CHECK:          Index: 1
 // CHECK-NEXT:     Name: .text
 // CHECK:          Relocations [
-// CHECK-NEXT:       0x1 R_X86_64_PC8 - 0x0
-// CHECK-NEXT:       0x3 R_X86_64_PC32 - 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
 
@@ -28,5 +26,7 @@
 // CHECK-NEXT:     AddressAlignment: 8
 // CHECK-NEXT:     EntrySize: 24
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x1 R_X86_64_PC8 - 0x0
+// CHECK-NEXT:       0x3 R_X86_64_PC32 - 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index 19bcc18d8f88..682307501d66 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -14,14 +14,17 @@ bar:
         leaq	foo@TPOFF(%rax), %rax    # R_X86_64_TPOFF32
         leaq	foo@TLSLD(%rip), %rdi    # R_X86_64_TLSLD
         leaq	foo@dtpoff(%rax), %rcx   # R_X86_64_DTPOFF32
+        movabs  foo@GOT, %rax		 # R_X86_64_GOT64
+        movabs  foo@GOTOFF, %rax	 # R_X86_64_GOTOFF64
         pushq    $bar
         movq	foo(%rip), %rdx
         leaq    foo-bar(%r14),%r14
         addq	$bar,%rax         # R_X86_64_32S
-
+	.quad	foo@DTPOFF
+        movabsq	$baz@TPOFF, %rax
 
 // CHECK:        Section {
-// CHECK:          Name: .text
+// CHECK:          Name: .rela.text
 // CHECK:          Relocations [
 // CHECK-NEXT:       0x1 R_X86_64_32        .text
 // CHECK-NEXT:       0x8 R_X86_64_32S       .text
@@ -34,10 +37,14 @@ bar:
 // CHECK-NEXT:       0x3B R_X86_64_TPOFF32  foo 0x0
 // CHECK-NEXT:       0x42 R_X86_64_TLSLD    foo 0xFFFFFFFFFFFFFFFC
 // CHECK-NEXT:       0x49 R_X86_64_DTPOFF32 foo 0x0
-// CHECK-NEXT:       0x4E R_X86_64_32S      .text 0x0
-// CHECK-NEXT:       0x55 R_X86_64_PC32     foo 0xFFFFFFFFFFFFFFFC
-// CHECK-NEXT:       0x5C R_X86_64_PC32     foo 0x5C
-// CHECK-NEXT:       0x63 R_X86_64_32S      .text 0x0
+// CHECK-NEXT:       0x4F R_X86_64_GOT64 foo 0x0
+// CHECK-NEXT:       0x59 R_X86_64_GOTOFF64 foo 0x0
+// CHECK-NEXT:       0x62 R_X86_64_32S .text 0x0
+// CHECK-NEXT:       0x69 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x70 R_X86_64_PC32 foo 0x70
+// CHECK-NEXT:       0x77 R_X86_64_32S .text 0x0
+// CHECK-NEXT:       0x7B R_X86_64_DTPOFF64 foo 0x0
+// CHECK-NEXT:       0x85 R_X86_64_TPOFF64 baz 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
 
diff --git a/test/MC/ELF/rename.s b/test/MC/ELF/rename.s
index c50910b41ec0..5364dde9f460 100644
--- a/test/MC/ELF/rename.s
+++ b/test/MC/ELF/rename.s
@@ -32,10 +32,27 @@ defined3:
 // CHECK-NEXT:     AddressAlignment: 4
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 2
+// CHECK-NEXT:     Name: .rela.text (1)
+// CHECK-NEXT:     Type: SHT_RELA (0x4)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x320
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 6
+// CHECK-NEXT:     Info: 1
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
 // CHECK-NEXT:       0x0 R_X86_64_32 .text 0x0
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
 
+
 // Symbol 2 is section 1
 // CHECK:        Symbol {
 // CHECK:          Name: .text (0)
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
index a6794034c94d..7dc23c2e596a 100644
--- a/test/MC/ELF/section.s
+++ b/test/MC/ELF/section.s
@@ -5,12 +5,12 @@
 .section	.note.GNU-stack,"",@progbits
 .section	.note.GNU-stack2,"",%progbits
 .section	.note.GNU-,"",@progbits
-.section	-.note.GNU,"",@progbits
+.section	-.note.GNU,"","progbits"
 
 // CHECK: Name: .note.GNU-stack (56)
-// CHECK: Name: .note.GNU-stack2 (143)
-// CHECK: Name: .note.GNU- (160)
-// CHECK: Name: -.note.GNU (132)
+// CHECK: Name: .note.GNU-stack2 (153)
+// CHECK: Name: .note.GNU- (170)
+// CHECK: Name: -.note.GNU (142)
 
 // Test that the defaults are used
 
@@ -120,11 +120,28 @@ bar:
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:   }
 
+.section .excluded,"e",@progbits
+
+// CHECK:      Section {
+// CHECK:        Name: .excluded (92)
+// CHECK-NEXT:   Type: SHT_PROGBITS (0x1)
+// CHECK-NEXT:   Flags [ (0x80000000)
+// CHECK-NEXT:     SHF_EXCLUDE (0x80000000)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Address: 0x0
+// CHECK-NEXT:   Offset: 0x50
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Link: 0
+// CHECK-NEXT:   Info: 0
+// CHECK-NEXT:   AddressAlignment: 1
+// CHECK-NEXT:   EntrySize: 0
+// CHECK-NEXT: }
+
 // Test that we handle the strings like gas
 .section bar-"foo"
 .section "foo"
 
 // CHECK:        Section {
-// CHECK:          Name: bar-"foo" (171)
+// CHECK:          Name: bar-"foo" (181)
 // CHECK:        Section {
 // CHECK:          Name: foo (52)
diff --git a/test/MC/ELF/symbol-names.s b/test/MC/ELF/symbol-names.s
new file mode 100644
index 000000000000..6459ac93ccf0
--- /dev/null
+++ b/test/MC/ELF/symbol-names.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj -t | FileCheck %s
+
+// MC allows ?'s in symbol names as an extension.
+
+.text
+.globl foo?bar
+.type foo?bar, @function
+foo?bar:
+ret
+
+// CHECK: Symbol
+// CHECK: Name: foo?bar
diff --git a/test/MC/ELF/symref.s b/test/MC/ELF/symref.s
index 9a71a81930ee..c8015b96a316 100644
--- a/test/MC/ELF/symref.s
+++ b/test/MC/ELF/symref.s
@@ -22,7 +22,7 @@ defined3:
 global1:
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:   Section (2) .rela.text {
 // CHECK-NEXT:     0x0 R_X86_64_32 .text 0x0
 // CHECK-NEXT:     0x4 R_X86_64_32 bar2@zed 0x0
 // CHECK-NEXT:     0x8 R_X86_64_32 .text 0x0
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index a5b98129c94a..c2f3631ec432 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -31,6 +31,16 @@ tls:
         .type tls,@tls_object
         .type tls,@gnu_indirect_function
 
+// Test that "<type>" is accepted.
+tls_quoted:
+        .global tls_quoted
+        .type tls_quoted,"tls_object"
+
+// Test that "<type>" is accepted.
+tls_upper_case:
+        .global tls_upper_case
+        .type tls_upper_case,STT_TLS
+
 // CHECK:        Symbol {
 // CHECK:          Name: bar
 // CHECK-NEXT:     Value: 0x0
@@ -85,3 +95,21 @@ tls:
 // CHECK-NEXT:     Other: 0
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: tls_quoted
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: tls_upper_case
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/weak-relocation.s b/test/MC/ELF/weak-relocation.s
index 0f5bba2383d7..19153aa50daa 100644
--- a/test/MC/ELF/weak-relocation.s
+++ b/test/MC/ELF/weak-relocation.s
@@ -8,7 +8,7 @@ bar:
         call    foo
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section ({{[0-9]+}}) .text {
+// CHECK-NEXT:   Section ({{[0-9]+}}) .rela.text {
 // CHECK-NEXT:     0x1 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
index 87173641b56f..6c2d33397c85 100644
--- a/test/MC/ELF/weakref.s
+++ b/test/MC/ELF/weakref.s
@@ -71,6 +71,15 @@ bar15:
 
 // CHECK:      Symbols [
 // CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name:  (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local (0x0)
+// CHECK-NEXT:     Type: None (0x0)
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section:  (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: bar6 (21)
 // CHECK-NEXT:     Value: 0x18
 // CHECK-NEXT:     Size: 0
diff --git a/test/MC/ELF/x86_64-reloc-sizetest.s b/test/MC/ELF/x86_64-reloc-sizetest.s
index bd67ee0f9de8..10c13123e650 100644
--- a/test/MC/ELF/x86_64-reloc-sizetest.s
+++ b/test/MC/ELF/x86_64-reloc-sizetest.s
@@ -7,7 +7,7 @@ L: movq $(L + 2147483648),%rax
 
 
 // CHECK:      Relocations [
-// CHECK-NEXT:   Section ({{[0-9]+}}) .text {
+// CHECK-NEXT:   Section ({{[0-9]+}}) .rela.text {
 // CHECK-NEXT:     0x3 R_X86_64_32S {{[^ ]+}} 0x80000000
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
diff --git a/test/MC/MBlaze/lit.local.cfg b/test/MC/MBlaze/lit.local.cfg
deleted file mode 100644
index b0e1d850ff96..000000000000
--- a/test/MC/MBlaze/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
-targets = set(config.root.targets_to_build.split())
-if not 'MBlaze' in targets:
-    config.unsupported = True
-
diff --git a/test/MC/MBlaze/mblaze_branch.s b/test/MC/MBlaze/mblaze_branch.s
deleted file mode 100644
index 2ec431926604..000000000000
--- a/test/MC/MBlaze/mblaze_branch.s
+++ /dev/null
@@ -1,197 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to make sure that all of the TYPE-A instructions supported by
-# the Microblaze can be parsed by the assembly parser.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   000000 00000 00000 00000 00000000000
-
-# CHECK:    beq
-# BINARY:   100111 00000 00010 00011 00000000000
-# CHECK:    encoding: [0x9c,0x02,0x18,0x00]
-            beq     r2, r3
-
-# CHECK:    bge
-# BINARY:   100111 00101 00010 00011 00000000000
-# CHECK:    encoding: [0x9c,0xa2,0x18,0x00]
-            bge     r2, r3
-
-# CHECK:    bgt
-# BINARY:   100111 00100 00010 00011 00000000000
-# CHECK:    encoding: [0x9c,0x82,0x18,0x00]
-            bgt     r2, r3
-
-# CHECK:    ble
-# BINARY:   100111 00011 00010 00011 00000000000
-# CHECK:    encoding: [0x9c,0x62,0x18,0x00]
-            ble     r2, r3
-
-# CHECK:    blt
-# BINARY:   100111 00010 00010 00011 00000000000
-# CHECK:    encoding: [0x9c,0x42,0x18,0x00]
-            blt     r2, r3
-
-# CHECK:    bne
-# BINARY:   100111 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x9c,0x22,0x18,0x00]
-            bne     r2, r3
-
-# CHECK:    beqd
-# BINARY:   100111 10000 00010 00011 00000000000
-# CHECK:    encoding: [0x9e,0x02,0x18,0x00]
-            beqd    r2, r3
-
-# CHECK:    bged
-# BINARY:   100111 10101 00010 00011 00000000000
-# CHECK:    encoding: [0x9e,0xa2,0x18,0x00]
-            bged    r2, r3
-
-# CHECK:    bgtd
-# BINARY:   100111 10100 00010 00011 00000000000
-# CHECK:    encoding: [0x9e,0x82,0x18,0x00]
-            bgtd    r2, r3
-
-# CHECK:    bled
-# BINARY:   100111 10011 00010 00011 00000000000
-# CHECK:    encoding: [0x9e,0x62,0x18,0x00]
-            bled    r2, r3
-
-# CHECK:    bltd
-# BINARY:   100111 10010 00010 00011 00000000000
-# CHECK:    encoding: [0x9e,0x42,0x18,0x00]
-            bltd    r2, r3
-
-# CHECK:    bned
-# BINARY:   100111 10001 00010 00011 00000000000
-# CHECK:    encoding: [0x9e,0x22,0x18,0x00]
-            bned    r2, r3
-
-# CHECK:    br
-# BINARY:   100110 00000 00000 00011 00000000000
-# CHECK:    encoding: [0x98,0x00,0x18,0x00]
-            br      r3
-
-# CHECK:    bra
-# BINARY:   100110 00000 01000 00011 00000000000
-# CHECK:    encoding: [0x98,0x08,0x18,0x00]
-            bra     r3
-
-# CHECK:    brd
-# BINARY:   100110 00000 10000 00011 00000000000
-# CHECK:    encoding: [0x98,0x10,0x18,0x00]
-            brd     r3
-
-# CHECK:    brad
-# BINARY:   100110 00000 11000 00011 00000000000
-# CHECK:    encoding: [0x98,0x18,0x18,0x00]
-            brad    r3
-
-# CHECK:    brld
-# BINARY:   100110 01111 10100 00011 00000000000
-# CHECK:    encoding: [0x99,0xf4,0x18,0x00]
-            brld    r15, r3
-
-# CHECK:    brald
-# BINARY:   100110 01111 11100 00011 00000000000
-# CHECK:    encoding: [0x99,0xfc,0x18,0x00]
-            brald   r15, r3
-
-# CHECK:    brk
-# BINARY:   100110 01111 01100 00011 00000000000
-# CHECK:    encoding: [0x99,0xec,0x18,0x00]
-            brk     r15, r3
-
-# CHECK:    beqi
-# BINARY:   101111 00000 00010 0000000000000000
-# CHECK:    encoding: [0xbc,0x02,0x00,0x00]
-            beqi    r2, 0
-
-# CHECK:    bgei
-# BINARY:   101111 00101 00010 0000000000000000
-# CHECK:    encoding: [0xbc,0xa2,0x00,0x00]
-            bgei    r2, 0
-
-# CHECK:    bgti
-# BINARY:   101111 00100 00010 0000000000000000
-# CHECK:    encoding: [0xbc,0x82,0x00,0x00]
-            bgti    r2, 0
-
-# CHECK:    blei
-# BINARY:   101111 00011 00010 0000000000000000
-# CHECK:    encoding: [0xbc,0x62,0x00,0x00]
-            blei    r2, 0
-
-# CHECK:    blti
-# BINARY:   101111 00010 00010 0000000000000000
-# CHECK:    encoding: [0xbc,0x42,0x00,0x00]
-            blti    r2, 0
-
-# CHECK:    bnei
-# BINARY:   101111 00001 00010 0000000000000000
-# CHECK:    encoding: [0xbc,0x22,0x00,0x00]
-            bnei    r2, 0
-
-# CHECK:    beqid
-# BINARY:   101111 10000 00010 0000000000000000
-# CHECK:    encoding: [0xbe,0x02,0x00,0x00]
-            beqid   r2, 0
-
-# CHECK:    bgeid
-# BINARY:   101111 10101 00010 0000000000000000
-# CHECK:    encoding: [0xbe,0xa2,0x00,0x00]
-            bgeid   r2, 0
-
-# CHECK:    bgtid
-# BINARY:   101111 10100 00010 0000000000000000
-# CHECK:    encoding: [0xbe,0x82,0x00,0x00]
-            bgtid   r2, 0
-
-# CHECK:    bleid
-# BINARY:   101111 10011 00010 0000000000000000
-# CHECK:    encoding: [0xbe,0x62,0x00,0x00]
-            bleid   r2, 0
-
-# CHECK:    bltid
-# BINARY:   101111 10010 00010 0000000000000000
-# CHECK:    encoding: [0xbe,0x42,0x00,0x00]
-            bltid   r2, 0
-
-# CHECK:    bneid
-# BINARY:   101111 10001 00010 0000000000000000
-# CHECK:    encoding: [0xbe,0x22,0x00,0x00]
-            bneid   r2, 0
-
-# CHECK:    bri
-# BINARY:   101110 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb8,0x00,0x00,0x00]
-            bri     0
-
-# CHECK:    brai
-# BINARY:   101110 00000 01000 0000000000000000
-# CHECK:    encoding: [0xb8,0x08,0x00,0x00]
-            brai    0
-
-# CHECK:    brid
-# BINARY:   101110 00000 10000 0000000000000000
-# CHECK:    encoding: [0xb8,0x10,0x00,0x00]
-            brid    0
-
-# CHECK:    braid
-# BINARY:   101110 00000 11000 0000000000000000
-# CHECK:    encoding: [0xb8,0x18,0x00,0x00]
-            braid   0
-
-# CHECK:    brlid
-# BINARY:   101110 01111 10100 0000000000000000
-# CHECK:    encoding: [0xb9,0xf4,0x00,0x00]
-            brlid   r15, 0
-
-# CHECK:    bralid
-# BINARY:   101110 01111 11100 0000000000000000
-# CHECK:    encoding: [0xb9,0xfc,0x00,0x00]
-            bralid  r15, 0
-
-# CHECK:    brki
-# BINARY:   101110 01111 01100 0000000000000000
-# CHECK:    encoding: [0xb9,0xec,0x00,0x00]
-            brki    r15, 0
diff --git a/test/MC/MBlaze/mblaze_fpu.s b/test/MC/MBlaze/mblaze_fpu.s
deleted file mode 100644
index a3b683848513..000000000000
--- a/test/MC/MBlaze/mblaze_fpu.s
+++ /dev/null
@@ -1,77 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to ensure that all FPU instructions can be parsed by the
-# assembly parser correctly.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   011011 00000 00000 00000 00000000000
-
-# CHECK:    fadd
-# BINARY:   010110 00000 00001 00010 00000000000
-# CHECK:    encoding: [0x58,0x01,0x10,0x00]
-            fadd         r0, r1, r2
-
-# CHECK:    frsub
-# BINARY:   010110 00000 00001 00010 00010000000
-# CHECK:    encoding: [0x58,0x01,0x10,0x80]
-            frsub        r0, r1, r2
-
-# CHECK:    fmul
-# BINARY:   010110 00000 00001 00010 00100000000
-# CHECK:    encoding: [0x58,0x01,0x11,0x00]
-            fmul         r0, r1, r2
-
-# CHECK:    fdiv
-# BINARY:   010110 00000 00001 00010 00110000000
-# CHECK:    encoding: [0x58,0x01,0x11,0x80]
-            fdiv         r0, r1, r2
-
-# CHECK:    fsqrt
-# BINARY:   010110 00000 00001 00000 01110000000
-# CHECK:    encoding: [0x58,0x01,0x03,0x80]
-            fsqrt        r0, r1
-
-# CHECK:    fint
-# BINARY:   010110 00000 00001 00000 01100000000
-# CHECK:    encoding: [0x58,0x01,0x03,0x00]
-            fint         r0, r1
-
-# CHECK:    flt
-# BINARY:   010110 00000 00001 00000 01010000000
-# CHECK:    encoding: [0x58,0x01,0x02,0x80]
-            flt          r0, r1
-
-# CHECK:    fcmp.un
-# BINARY:   010110 00000 00001 00010 01000000000
-# CHECK:    encoding: [0x58,0x01,0x12,0x00]
-            fcmp.un     r0, r1, r2
-
-# CHECK:    fcmp.lt
-# BINARY:   010110 00000 00001 00010 01000010000
-# CHECK:    encoding: [0x58,0x01,0x12,0x10]
-            fcmp.lt     r0, r1, r2
-
-# CHECK:    fcmp.eq
-# BINARY:   010110 00000 00001 00010 01000100000
-# CHECK:    encoding: [0x58,0x01,0x12,0x20]
-            fcmp.eq     r0, r1, r2
-
-# CHECK:    fcmp.le
-# BINARY:   010110 00000 00001 00010 01000110000
-# CHECK:    encoding: [0x58,0x01,0x12,0x30]
-            fcmp.le     r0, r1, r2
-
-# CHECK:    fcmp.gt
-# BINARY:   010110 00000 00001 00010 01001000000
-# CHECK:    encoding: [0x58,0x01,0x12,0x40]
-            fcmp.gt     r0, r1, r2
-
-# CHECK:    fcmp.ne
-# BINARY:   010110 00000 00001 00010 01001010000
-# CHECK:    encoding: [0x58,0x01,0x12,0x50]
-            fcmp.ne     r0, r1, r2
-
-# CHECK:    fcmp.ge
-# BINARY:   010110 00000 00001 00010 01001100000
-# CHECK:    encoding: [0x58,0x01,0x12,0x60]
-            fcmp.ge     r0, r1, r2
diff --git a/test/MC/MBlaze/mblaze_fsl.s b/test/MC/MBlaze/mblaze_fsl.s
deleted file mode 100644
index d0a42b34991f..000000000000
--- a/test/MC/MBlaze/mblaze_fsl.s
+++ /dev/null
@@ -1,568 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to ensure that all FSL immediate operands and FSL instructions
-# can be parsed by the assembly parser correctly.
-
-# TYPE F:   OPCODE RD           NCTAE        FSL
-# BINARY:   011011 00000 000000 00000 000000 0000
-
-# TYPE FD:  OPCODE RD          RB      NCTAE
-# BINARY:   011011 00000 00000 00000 0 00000 00000
-
-# TYPE FP:  OPCODE       RA      NCTA         FSL
-#           000000 00000 00000 1 0000 0000000 0000
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x00,0x00]
-            get         r0, rfsl0
-
-# CHECK:    nget
-# BINARY:   011011 00000 000000 10000 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x40,0x00]
-            nget        r0, rfsl0
-
-# CHECK:    cget
-# BINARY:   011011 00000 000000 01000 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x20,0x00]
-            cget        r0, rfsl0
-
-# CHECK:    ncget
-# BINARY:   011011 00000 000000 11000 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x60,0x00]
-            ncget       r0, rfsl0
-
-# CHECK:    tget
-# BINARY:   011011 00000 000000 00100 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x10,0x00]
-            tget        r0, rfsl0
-
-# CHECK:    tnget
-# BINARY:   011011 00000 000000 10100 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x50,0x00]
-            tnget       r0, rfsl0
-
-# CHECK:    tcget
-# BINARY:   011011 00000 000000 01100 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x30,0x00]
-            tcget       r0, rfsl0
-
-# CHECK:    tncget
-# BINARY:   011011 00000 000000 11100 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x70,0x00]
-            tncget      r0, rfsl0
-
-# CHECK:    aget
-# BINARY:   011011 00000 000000 00010 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x08,0x00]
-            aget        r0, rfsl0
-
-# CHECK:    naget
-# BINARY:   011011 00000 000000 10010 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x48,0x00]
-            naget       r0, rfsl0
-
-# CHECK:    caget
-# BINARY:   011011 00000 000000 01010 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x28,0x00]
-            caget       r0, rfsl0
-
-# CHECK:    ncaget
-# BINARY:   011011 00000 000000 11010 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x68,0x00]
-            ncaget      r0, rfsl0
-
-# CHECK:    taget
-# BINARY:   011011 00000 000000 00110 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x18,0x00]
-            taget       r0, rfsl0
-
-# CHECK:    tnaget
-# BINARY:   011011 00000 000000 10110 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x58,0x00]
-            tnaget      r0, rfsl0
-
-# CHECK:    tcaget
-# BINARY:   011011 00000 000000 01110 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x38,0x00]
-            tcaget      r0, rfsl0
-
-# CHECK:    tncaget
-# BINARY:   011011 00000 000000 11110 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x78,0x00]
-            tncaget     r0, rfsl0
-
-# CHECK:    eget
-# BINARY:   011011 00000 000000 00001 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x04,0x00]
-            eget        r0, rfsl0
-
-# CHECK:    neget
-# BINARY:   011011 00000 000000 10001 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x44,0x00]
-            neget       r0, rfsl0
-
-# CHECK:    ecget
-# BINARY:   011011 00000 000000 01001 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x24,0x00]
-            ecget       r0, rfsl0
-
-# CHECK:    necget
-# BINARY:   011011 00000 000000 11001 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x64,0x00]
-            necget      r0, rfsl0
-
-# CHECK:    teget
-# BINARY:   011011 00000 000000 00101 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x14,0x00]
-            teget       r0, rfsl0
-
-# CHECK:    tneget
-# BINARY:   011011 00000 000000 10101 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x54,0x00]
-            tneget      r0, rfsl0
-
-# CHECK:    tecget
-# BINARY:   011011 00000 000000 01101 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x34,0x00]
-            tecget      r0, rfsl0
-
-# CHECK:    tnecget
-# BINARY:   011011 00000 000000 11101 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x74,0x00]
-            tnecget     r0, rfsl0
-
-# CHECK:    eaget
-# BINARY:   011011 00000 000000 00011 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x0c,0x00]
-            eaget       r0, rfsl0
-
-# CHECK:    neaget
-# BINARY:   011011 00000 000000 10011 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x4c,0x00]
-            neaget      r0, rfsl0
-
-# CHECK:    ecaget
-# BINARY:   011011 00000 000000 01011 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x2c,0x00]
-            ecaget      r0, rfsl0
-
-# CHECK:    necaget
-# BINARY:   011011 00000 000000 11011 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x6c,0x00]
-            necaget     r0, rfsl0
-
-# CHECK:    teaget
-# BINARY:   011011 00000 000000 00111 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x1c,0x00]
-            teaget      r0, rfsl0
-
-# CHECK:    tneaget
-# BINARY:   011011 00000 000000 10111 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x5c,0x00]
-            tneaget     r0, rfsl0
-
-# CHECK:    tecaget
-# BINARY:   011011 00000 000000 01111 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x3c,0x00]
-            tecaget     r0, rfsl0
-
-# CHECK:    tnecaget
-# BINARY:   011011 00000 000000 11111 000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x7c,0x00]
-            tnecaget    r0, rfsl0
-
-# CHECK:    getd
-# BINARY:   010011 00000 00000 00001 0 00000 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0x00]
-            getd        r0, r1
-
-# CHECK:    ngetd
-# BINARY:   010011 00000 00000 00001 0 10000 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0x00]
-            ngetd       r0, r1
-
-# CHECK:    cgetd
-# BINARY:   010011 00000 00000 00001 0 01000 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0x00]
-            cgetd       r0, r1
-
-# CHECK:    ncgetd
-# BINARY:   010011 00000 00000 00001 0 11000 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0x00]
-            ncgetd      r0, r1
-
-# CHECK:    tgetd
-# BINARY:   010011 00000 00000 00001 0 00100 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0x80]
-            tgetd       r0, r1
-
-# CHECK:    tngetd
-# BINARY:   010011 00000 00000 00001 0 10100 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0x80]
-            tngetd      r0, r1
-
-# CHECK:    tcgetd
-# BINARY:   010011 00000 00000 00001 0 01100 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0x80]
-            tcgetd      r0, r1
-
-# CHECK:    tncgetd
-# BINARY:   010011 00000 00000 00001 0 11100 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0x80]
-            tncgetd     r0, r1
-
-# CHECK:    agetd
-# BINARY:   010011 00000 00000 00001 0 00010 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0x40]
-            agetd       r0, r1
-
-# CHECK:    nagetd
-# BINARY:   010011 00000 00000 00001 0 10010 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0x40]
-            nagetd      r0, r1
-
-# CHECK:    cagetd
-# BINARY:   010011 00000 00000 00001 0 01010 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0x40]
-            cagetd     r0, r1
-
-# CHECK:    ncagetd
-# BINARY:   010011 00000 00000 00001 0 11010 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0x40]
-            ncagetd     r0, r1
-
-# CHECK:    tagetd
-# BINARY:   010011 00000 00000 00001 0 00110 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0xc0]
-            tagetd      r0, r1
-
-# CHECK:    tnagetd
-# BINARY:   010011 00000 00000 00001 0 10110 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0xc0]
-            tnagetd     r0, r1
-
-# CHECK:    tcagetd
-# BINARY:   010011 00000 00000 00001 0 01110 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0xc0]
-            tcagetd     r0, r1
-
-# CHECK:    tncagetd
-# BINARY:   010011 00000 00000 00001 0 11110 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0xc0]
-            tncagetd    r0, r1
-
-# CHECK:    egetd
-# BINARY:   010011 00000 00000 00001 0 00001 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0x20]
-            egetd       r0, r1
-
-# CHECK:    negetd
-# BINARY:   010011 00000 00000 00001 0 10001 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0x20]
-            negetd      r0, r1
-
-# CHECK:    ecgetd
-# BINARY:   010011 00000 00000 00001 0 01001 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0x20]
-            ecgetd      r0, r1
-
-# CHECK:    necgetd
-# BINARY:   010011 00000 00000 00001 0 11001 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0x20]
-            necgetd     r0, r1
-
-# CHECK:    tegetd
-# BINARY:   010011 00000 00000 00001 0 00101 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0xa0]
-            tegetd      r0, r1
-
-# CHECK:    tnegetd
-# BINARY:   010011 00000 00000 00001 0 10101 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0xa0]
-            tnegetd     r0, r1
-
-# CHECK:    tecgetd
-# BINARY:   010011 00000 00000 00001 0 01101 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0xa0]
-            tecgetd     r0, r1
-
-# CHECK:    tnecgetd
-# BINARY:   010011 00000 00000 00001 0 11101 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0xa0]
-            tnecgetd    r0, r1
-
-# CHECK:    eagetd
-# BINARY:   010011 00000 00000 00001 0 00011 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0x60]
-            eagetd      r0, r1
-
-# CHECK:    neagetd
-# BINARY:   010011 00000 00000 00001 0 10011 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0x60]
-            neagetd     r0, r1
-
-# CHECK:    ecagetd
-# BINARY:   010011 00000 00000 00001 0 01011 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0x60]
-            ecagetd     r0, r1
-
-# CHECK:    necagetd
-# BINARY:   010011 00000 00000 00001 0 11011 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0x60]
-            necagetd    r0, r1
-
-# CHECK:    teagetd
-# BINARY:   010011 00000 00000 00001 0 00111 00000
-# CHECK:    encoding: [0x4c,0x00,0x08,0xe0]
-            teagetd     r0, r1
-
-# CHECK:    tneagetd
-# BINARY:   010011 00000 00000 00001 0 10111 00000
-# CHECK:    encoding: [0x4c,0x00,0x0a,0xe0]
-            tneagetd    r0, r1
-
-# CHECK:    tecagetd
-# BINARY:   010011 00000 00000 00001 0 01111 00000
-# CHECK:    encoding: [0x4c,0x00,0x09,0xe0]
-            tecagetd    r0, r1
-
-# CHECK:    tnecagetd
-# BINARY:   010011 00000 00000 00001 0 11111 00000
-# CHECK:    encoding: [0x4c,0x00,0x0b,0xe0]
-            tnecagetd   r0, r1
-
-# CHECK:    put
-# BINARY:   011011 00000 00000 1 0000 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x80,0x00]
-            put         r0, rfsl0
-
-# CHECK:    aput
-# BINARY:   011011 00000 00000 1 0001 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x88,0x00]
-            aput        r0, rfsl0
-
-# CHECK:    cput
-# BINARY:   011011 00000 00000 1 0100 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xa0,0x00]
-            cput        r0, rfsl0
-
-# CHECK:    caput
-# BINARY:   011011 00000 00000 1 0101 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xa8,0x00]
-            caput       r0, rfsl0
-
-# CHECK:    nput
-# BINARY:   011011 00000 00000 1 1000 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xc0,0x00]
-            nput        r0, rfsl0
-
-# CHECK:    naput
-# BINARY:   011011 00000 00000 1 1001 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xc8,0x00]
-            naput       r0, rfsl0
-
-# CHECK:    ncput
-# BINARY:   011011 00000 00000 1 1100 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xe0,0x00]
-            ncput       r0, rfsl0
-
-# CHECK:    ncaput
-# BINARY:   011011 00000 00000 1 1101 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xe8,0x00]
-            ncaput      r0, rfsl0
-
-# CHECK:    tput
-# BINARY:   011011 00000 00000 1 0010 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x90,0x00]
-            tput        rfsl0
-
-# CHECK:    taput
-# BINARY:   011011 00000 00000 1 0011 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0x98,0x00]
-            taput       rfsl0
-
-# CHECK:    tcput
-# BINARY:   011011 00000 00000 1 0110 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xb0,0x00]
-            tcput       rfsl0
-
-# CHECK:    tcaput
-# BINARY:   011011 00000 00000 1 0111 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xb8,0x00]
-            tcaput      rfsl0
-
-# CHECK:    tnput
-# BINARY:   011011 00000 00000 1 1010 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xd0,0x00]
-            tnput       rfsl0
-
-# CHECK:    tnaput
-# BINARY:   011011 00000 00000 1 1011 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xd8,0x00]
-            tnaput      rfsl0
-
-# CHECK:    tncput
-# BINARY:   011011 00000 00000 1 1110 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xf0,0x00]
-            tncput      rfsl0
-
-# CHECK:    tncaput
-# BINARY:   011011 00000 00000 1 1111 0000000 0000
-# CHECK:    encoding: [0x6c,0x00,0xf8,0x00]
-            tncaput     rfsl0
-
-# CHECK:    putd
-# BINARY:   010011 00000 00000 00001 1 0000 000000
-# CHECK:    encoding: [0x4c,0x00,0x0c,0x00]
-            putd        r0, r1
-
-# CHECK:    aputd
-# BINARY:   010011 00000 00000 00001 1 0001 000000
-# CHECK:    encoding: [0x4c,0x00,0x0c,0x40]
-            aputd       r0, r1
-
-# CHECK:    cputd
-# BINARY:   010011 00000 00000 00001 1 0100 000000
-# CHECK:    encoding: [0x4c,0x00,0x0d,0x00]
-            cputd       r0, r1
-
-# CHECK:    caputd
-# BINARY:   010011 00000 00000 00001 1 0101 000000
-# CHECK:    encoding: [0x4c,0x00,0x0d,0x40]
-            caputd      r0, r1
-
-# CHECK:    nputd
-# BINARY:   010011 00000 00000 00001 1 1000 000000
-# CHECK:    encoding: [0x4c,0x00,0x0e,0x00]
-            nputd       r0, r1
-
-# CHECK:    naputd
-# BINARY:   010011 00000 00000 00001 1 1001 000000
-# CHECK:    encoding: [0x4c,0x00,0x0e,0x40]
-            naputd      r0, r1
-
-# CHECK:    ncputd
-# BINARY:   010011 00000 00000 00001 1 1100 000000
-# CHECK:    encoding: [0x4c,0x00,0x0f,0x00]
-            ncputd      r0, r1
-
-# CHECK:    ncaputd
-# BINARY:   010011 00000 00000 00001 1 1101 000000
-# CHECK:    encoding: [0x4c,0x00,0x0f,0x40]
-            ncaputd     r0, r1
-
-# CHECK:    tputd
-# BINARY:   010011 00000 00000 00001 1 0010 000000
-# CHECK:    encoding: [0x4c,0x00,0x0c,0x80]
-            tputd       r1
-
-# CHECK:    taputd
-# BINARY:   010011 00000 00000 00001 1 0011 000000
-# CHECK:    encoding: [0x4c,0x00,0x0c,0xc0]
-            taputd      r1
-
-# CHECK:    tcputd
-# BINARY:   010011 00000 00000 00001 1 0110 000000
-# CHECK:    encoding: [0x4c,0x00,0x0d,0x80]
-            tcputd      r1
-
-# CHECK:    tcaputd
-# BINARY:   010011 00000 00000 00001 1 0111 000000
-# CHECK:    encoding: [0x4c,0x00,0x0d,0xc0]
-            tcaputd     r1
-
-# CHECK:    tnputd
-# BINARY:   010011 00000 00000 00001 1 1010 000000
-# CHECK:    encoding: [0x4c,0x00,0x0e,0x80]
-            tnputd      r1
-
-# CHECK:    tnaputd
-# BINARY:   010011 00000 00000 00001 1 1011 000000
-# CHECK:    encoding: [0x4c,0x00,0x0e,0xc0]
-            tnaputd     r1
-
-# CHECK:    tncputd
-# BINARY:   010011 00000 00000 00001 1 1110 000000
-# CHECK:    encoding: [0x4c,0x00,0x0f,0x80]
-            tncputd     r1
-
-# CHECK:    tncaputd
-# BINARY:   010011 00000 00000 00001 1 1111 000000
-# CHECK:    encoding: [0x4c,0x00,0x0f,0xc0]
-            tncaputd    r1
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0001
-# CHECK:    encoding: [0x6c,0x00,0x00,0x01]
-            get     r0, rfsl1
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0010
-# CHECK:    encoding: [0x6c,0x00,0x00,0x02]
-            get     r0, rfsl2
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0011
-# CHECK:    encoding: [0x6c,0x00,0x00,0x03]
-            get     r0, rfsl3
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0100
-# CHECK:    encoding: [0x6c,0x00,0x00,0x04]
-            get     r0, rfsl4
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0101
-# CHECK:    encoding: [0x6c,0x00,0x00,0x05]
-            get     r0, rfsl5
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0110
-# CHECK:    encoding: [0x6c,0x00,0x00,0x06]
-            get     r0, rfsl6
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 0111
-# CHECK:    encoding: [0x6c,0x00,0x00,0x07]
-            get     r0, rfsl7
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1000
-# CHECK:    encoding: [0x6c,0x00,0x00,0x08]
-            get     r0, rfsl8
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1001
-# CHECK:    encoding: [0x6c,0x00,0x00,0x09]
-            get     r0, rfsl9
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1010
-# CHECK:    encoding: [0x6c,0x00,0x00,0x0a]
-            get     r0, rfsl10
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1011
-# CHECK:    encoding: [0x6c,0x00,0x00,0x0b]
-            get     r0, rfsl11
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1100
-# CHECK:    encoding: [0x6c,0x00,0x00,0x0c]
-            get     r0, rfsl12
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1101
-# CHECK:    encoding: [0x6c,0x00,0x00,0x0d]
-            get     r0, rfsl13
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1110
-# CHECK:    encoding: [0x6c,0x00,0x00,0x0e]
-            get     r0, rfsl14
-
-# CHECK:    get
-# BINARY:   011011 00000 000000 00000 000000 1111
-# CHECK:    encoding: [0x6c,0x00,0x00,0x0f]
-            get     r0, rfsl15
diff --git a/test/MC/MBlaze/mblaze_imm.s b/test/MC/MBlaze/mblaze_imm.s
deleted file mode 100644
index 08b8a0f26818..000000000000
--- a/test/MC/MBlaze/mblaze_imm.s
+++ /dev/null
@@ -1,194 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# In the microblaze instruction set, any TYPE-B instruction with a
-# signed immediate value requiring more than 16-bits must be prefixed
-# with an IMM instruction that contains the high 16-bits. The higher
-# 16-bits are then combined with the lower 16-bits in the original
-# instruction to form a 32-bit immediate value.
-#
-# The generation of IMM instructions is handled automatically by the
-# code emitter. Test to ensure that IMM instructions are generated
-# when they are suppose to and are not generated when they are not
-# needed.
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00000000
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000001
-# CHECK:    encoding: [0x20,0x00,0x00,0x01]
-            addi    r0, r0, 0x00000001
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000010
-# CHECK:    encoding: [0x20,0x00,0x00,0x02]
-            addi    r0, r0, 0x00000002
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000100
-# CHECK:    encoding: [0x20,0x00,0x00,0x04]
-            addi    r0, r0, 0x00000004
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000001000
-# CHECK:    encoding: [0x20,0x00,0x00,0x08]
-            addi    r0, r0, 0x00000008
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000010000
-# CHECK:    encoding: [0x20,0x00,0x00,0x10]
-            addi    r0, r0, 0x00000010
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000100000
-# CHECK:    encoding: [0x20,0x00,0x00,0x20]
-            addi    r0, r0, 0x00000020
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000001000000
-# CHECK:    encoding: [0x20,0x00,0x00,0x40]
-            addi    r0, r0, 0x00000040
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000010000000
-# CHECK:    encoding: [0x20,0x00,0x00,0x80]
-            addi    r0, r0, 0x00000080
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000100000000
-# CHECK:    encoding: [0x20,0x00,0x01,0x00]
-            addi    r0, r0, 0x00000100
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000001000000000
-# CHECK:    encoding: [0x20,0x00,0x02,0x00]
-            addi    r0, r0, 0x00000200
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000010000000000
-# CHECK:    encoding: [0x20,0x00,0x04,0x00]
-            addi    r0, r0, 0x00000400
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000100000000000
-# CHECK:    encoding: [0x20,0x00,0x08,0x00]
-            addi    r0, r0, 0x00000800
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0001000000000000
-# CHECK:    encoding: [0x20,0x00,0x10,0x00]
-            addi    r0, r0, 0x00001000
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0010000000000000
-# CHECK:    encoding: [0x20,0x00,0x20,0x00]
-            addi    r0, r0, 0x00002000
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0100000000000000
-# CHECK:    encoding: [0x20,0x00,0x40,0x00]
-            addi    r0, r0, 0x00004000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000000000000
-# BINARY:   001000 00000 00000 1000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x00,0x20,0x00,0x80,0x00]
-            addi    r0, r0, 0x00008000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000000000001
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x01,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00010000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000000000010
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x02,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00020000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000000000100
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x04,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00040000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000000001000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x08,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00080000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000000010000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x10,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00100000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000000100000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x20,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00200000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000001000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x40,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00400000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000010000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x00,0x80,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x00800000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000000100000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x01,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x01000000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000001000000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x02,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x02000000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000010000000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x04,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x04000000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0000100000000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x08,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x08000000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0001000000000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x10,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x10000000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0010000000000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x20,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x20000000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 0100000000000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x40,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x40000000
-
-# CHECK:    addi
-# BINARY:   101100 00000 00000 1000000000000000
-#           001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0xb0,0x00,0x80,0x00,0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0x80000000
diff --git a/test/MC/MBlaze/mblaze_memory.s b/test/MC/MBlaze/mblaze_memory.s
deleted file mode 100644
index fe744753ee4d..000000000000
--- a/test/MC/MBlaze/mblaze_memory.s
+++ /dev/null
@@ -1,107 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to make sure that all of the TYPE-A instructions supported by
-# the Microblaze can be parsed by the assembly parser.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   000000 00000 00000 00000 00000000000
-
-# CHECK:    lbu
-# BINARY:   110000 00001 00010 00011 00000000000
-# CHECK:    encoding: [0xc0,0x22,0x18,0x00]
-            lbu     r1, r2, r3
-
-# CHECK:    lbur
-# BINARY:   110000 00001 00010 00011 01000000000
-# CHECK:    encoding: [0xc0,0x22,0x1a,0x00]
-            lbur    r1, r2, r3
-
-# CHECK:    lbui
-# BINARY:   111000 00001 00010 0000000000011100
-# CHECK:    encoding: [0xe0,0x22,0x00,0x1c]
-            lbui    r1, r2, 28
-
-# CHECK:    lhu
-# BINARY:   110001 00001 00010 00011 00000000000
-# CHECK:    encoding: [0xc4,0x22,0x18,0x00]
-            lhu     r1, r2, r3
-
-# CHECK:    lhur
-# BINARY:   110001 00001 00010 00011 01000000000
-# CHECK:    encoding: [0xc4,0x22,0x1a,0x00]
-            lhur    r1, r2, r3
-
-# CHECK:    lhui
-# BINARY:   111001 00001 00010 0000000000011100
-# CHECK:    encoding: [0xe4,0x22,0x00,0x1c]
-            lhui    r1, r2, 28
-
-# CHECK:    lw
-# BINARY:   110010 00001 00010 00011 00000000000
-# CHECK:    encoding: [0xc8,0x22,0x18,0x00]
-            lw      r1, r2, r3
-
-# CHECK:    lwr
-# BINARY:   110010 00001 00010 00011 01000000000
-# CHECK:    encoding: [0xc8,0x22,0x1a,0x00]
-            lwr    r1, r2, r3
-
-# CHECK:    lwi
-# BINARY:   111010 00001 00010 0000000000011100
-# CHECK:    encoding: [0xe8,0x22,0x00,0x1c]
-            lwi     r1, r2, 28
-
-# CHECK:    lwx
-# BINARY:   110010 00001 00010 00011 10000000000
-# CHECK:    encoding: [0xc8,0x22,0x1c,0x00]
-            lwx      r1, r2, r3
-
-# CHECK:    sb
-# BINARY:   110100 00001 00010 00011 00000000000
-# CHECK:    encoding: [0xd0,0x22,0x18,0x00]
-            sb      r1, r2, r3
-
-# CHECK:    sbr
-# BINARY:   110100 00001 00010 00011 01000000000
-# CHECK:    encoding: [0xd0,0x22,0x1a,0x00]
-            sbr     r1, r2, r3
-
-# CHECK:    sbi
-# BINARY:   111100 00001 00010 0000000000011100
-# CHECK:    encoding: [0xf0,0x22,0x00,0x1c]
-            sbi     r1, r2, 28
-
-# CHECK:    sh
-# BINARY:   110101 00001 00010 00011 00000000000
-# CHECK:    encoding: [0xd4,0x22,0x18,0x00]
-            sh      r1, r2, r3
-
-# CHECK:    shr
-# BINARY:   110101 00001 00010 00011 01000000000
-# CHECK:    encoding: [0xd4,0x22,0x1a,0x00]
-            shr     r1, r2, r3
-
-# CHECK:    shi
-# BINARY:   111101 00001 00010 0000000000011100
-# CHECK:    encoding: [0xf4,0x22,0x00,0x1c]
-            shi     r1, r2, 28
-
-# CHECK:    sw
-# BINARY:   110110 00001 00010 00011 00000000000
-# CHECK:    encoding: [0xd8,0x22,0x18,0x00]
-            sw      r1, r2, r3
-
-# CHECK:    swr
-# BINARY:   110110 00001 00010 00011 01000000000
-# CHECK:    encoding: [0xd8,0x22,0x1a,0x00]
-            swr    r1, r2, r3
-
-# CHECK:    swi
-# BINARY:   111110 00001 00010 0000000000011100
-# CHECK:    encoding: [0xf8,0x22,0x00,0x1c]
-            swi     r1, r2, 28
-
-# CHECK:    swx
-# BINARY:   110110 00001 00010 00011 10000000000
-# CHECK:    encoding: [0xd8,0x22,0x1c,0x00]
-            swx      r1, r2, r3
diff --git a/test/MC/MBlaze/mblaze_operands.s b/test/MC/MBlaze/mblaze_operands.s
deleted file mode 100644
index d5f1d8059f37..000000000000
--- a/test/MC/MBlaze/mblaze_operands.s
+++ /dev/null
@@ -1,328 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to ensure that all register and immediate operands can be parsed by
-# the assembly parser correctly. Testing the parsing of FSL immediate
-# values is done in a different test.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   000000 00000 00000 00000 00000000000
-
-# CHECK:    add
-# BINARY:   000000 00000 00000 00000 00000000000
-# CHECK:    encoding: [0x00,0x00,0x00,0x00]
-            add     r0, r0, r0
-
-# CHECK:    add
-# BINARY:   000000 00001 00001 00001 00000000000
-# CHECK:    encoding: [0x00,0x21,0x08,0x00]
-            add     r1, r1, r1
-
-# CHECK:    add
-# BINARY:   000000 00010 00010 00010 00000000000
-# CHECK:    encoding: [0x00,0x42,0x10,0x00]
-            add     r2, r2, r2
-
-# CHECK:    add
-# BINARY:   000000 00011 00011 00011 00000000000
-# CHECK:    encoding: [0x00,0x63,0x18,0x00]
-            add     r3, r3, r3
-
-# CHECK:    add
-# BINARY:   000000 00100 00100 00100 00000000000
-# CHECK:    encoding: [0x00,0x84,0x20,0x00]
-            add     r4, r4, r4
-
-# CHECK:    add
-# BINARY:   000000 00101 00101 00101 00000000000
-# CHECK:    encoding: [0x00,0xa5,0x28,0x00]
-            add     r5, r5, r5
-
-# CHECK:    add
-# BINARY:   000000 00110 00110 00110 00000000000
-# CHECK:    encoding: [0x00,0xc6,0x30,0x00]
-            add     r6, r6, r6
-
-# CHECK:    add
-# BINARY:   000000 00111 00111 00111 00000000000
-# CHECK:    encoding: [0x00,0xe7,0x38,0x00]
-            add     r7, r7, r7
-
-# CHECK:    add
-# BINARY:   000000 01000 01000 01000 00000000000
-# CHECK:    encoding: [0x01,0x08,0x40,0x00]
-            add     r8, r8, r8
-
-# CHECK:    add
-# BINARY:   000000 01001 01001 01001 00000000000
-# CHECK:    encoding: [0x01,0x29,0x48,0x00]
-            add     r9, r9, r9
-
-# CHECK:    add
-# BINARY:   000000 01010 01010 01010 00000000000
-# CHECK:    encoding: [0x01,0x4a,0x50,0x00]
-            add     r10, r10, r10
-
-# CHECK:    add
-# BINARY:   000000 01011 01011 01011 00000000000
-# CHECK:    encoding: [0x01,0x6b,0x58,0x00]
-            add     r11, r11, r11
-
-# CHECK:    add
-# BINARY:   000000 01100 01100 01100 00000000000
-# CHECK:    encoding: [0x01,0x8c,0x60,0x00]
-            add     r12, r12, r12
-
-# CHECK:    add
-# BINARY:   000000 01101 01101 01101 00000000000
-# CHECK:    encoding: [0x01,0xad,0x68,0x00]
-            add     r13, r13, r13
-
-# CHECK:    add
-# BINARY:   000000 01110 01110 01110 00000000000
-# CHECK:    encoding: [0x01,0xce,0x70,0x00]
-            add     r14, r14, r14
-
-# CHECK:    add
-# BINARY:   000000 01111 01111 01111 00000000000
-# CHECK:    encoding: [0x01,0xef,0x78,0x00]
-            add     r15, r15, r15
-
-# CHECK:    add
-# BINARY:   000000 10000 10000 10000 00000000000
-# CHECK:    encoding: [0x02,0x10,0x80,0x00]
-            add     r16, r16, r16
-
-# CHECK:    add
-# BINARY:   000000 10001 10001 10001 00000000000
-# CHECK:    encoding: [0x02,0x31,0x88,0x00]
-            add     r17, r17, r17
-
-# CHECK:    add
-# BINARY:   000000 10010 10010 10010 00000000000
-# CHECK:    encoding: [0x02,0x52,0x90,0x00]
-            add     r18, r18, r18
-
-# CHECK:    add
-# BINARY:   000000 10011 10011 10011 00000000000
-# CHECK:    encoding: [0x02,0x73,0x98,0x00]
-            add     r19, r19, r19
-
-# CHECK:    add
-# BINARY:   000000 10100 10100 10100 00000000000
-# CHECK:    encoding: [0x02,0x94,0xa0,0x00]
-            add     r20, r20, r20
-
-# CHECK:    add
-# BINARY:   000000 10101 10101 10101 00000000000
-# CHECK:    encoding: [0x02,0xb5,0xa8,0x00]
-            add     r21, r21, r21
-
-# CHECK:    add
-# BINARY:   000000 10110 10110 10110 00000000000
-# CHECK:    encoding: [0x02,0xd6,0xb0,0x00]
-            add     r22, r22, r22
-
-# CHECK:    add
-# BINARY:   000000 10111 10111 10111 00000000000
-# CHECK:    encoding: [0x02,0xf7,0xb8,0x00]
-            add     r23, r23, r23
-
-# CHECK:    add
-# BINARY:   000000 11000 11000 11000 00000000000
-# CHECK:    encoding: [0x03,0x18,0xc0,0x00]
-            add     r24, r24, r24
-
-# CHECK:    add
-# BINARY:   000000 11001 11001 11001 00000000000
-# CHECK:    encoding: [0x03,0x39,0xc8,0x00]
-            add     r25, r25, r25
-
-# CHECK:    add
-# BINARY:   000000 11010 11010 11010 00000000000
-# CHECK:    encoding: [0x03,0x5a,0xd0,0x00]
-            add     r26, r26, r26
-
-# CHECK:    add
-# BINARY:   000000 11011 11011 11011 00000000000
-# CHECK:    encoding: [0x03,0x7b,0xd8,0x00]
-            add     r27, r27, r27
-
-# CHECK:    add
-# BINARY:   000000 11100 11100 11100 00000000000
-# CHECK:    encoding: [0x03,0x9c,0xe0,0x00]
-            add     r28, r28, r28
-
-# CHECK:    add
-# BINARY:   000000 11101 11101 11101 00000000000
-# CHECK:    encoding: [0x03,0xbd,0xe8,0x00]
-            add     r29, r29, r29
-
-# CHECK:    add
-# BINARY:   000000 11110 11110 11110 00000000000
-# CHECK:    encoding: [0x03,0xde,0xf0,0x00]
-            add     r30, r30, r30
-
-# CHECK:    add
-# BINARY:   000000 11111 11111 11111 00000000000
-# CHECK:    encoding: [0x03,0xff,0xf8,0x00]
-            add     r31, r31, r31
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000000
-# CHECK:    encoding: [0x20,0x00,0x00,0x00]
-            addi    r0, r0, 0
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000001
-# CHECK:    encoding: [0x20,0x00,0x00,0x01]
-            addi    r0, r0, 1
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000010
-# CHECK:    encoding: [0x20,0x00,0x00,0x02]
-            addi    r0, r0, 2
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000000100
-# CHECK:    encoding: [0x20,0x00,0x00,0x04]
-            addi    r0, r0, 4
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000001000
-# CHECK:    encoding: [0x20,0x00,0x00,0x08]
-            addi    r0, r0, 8
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000010000
-# CHECK:    encoding: [0x20,0x00,0x00,0x10]
-            addi    r0, r0, 16
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000000100000
-# CHECK:    encoding: [0x20,0x00,0x00,0x20]
-            addi    r0, r0, 32
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000001000000
-# CHECK:    encoding: [0x20,0x00,0x00,0x40]
-            addi    r0, r0, 64
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000010000000
-# CHECK:    encoding: [0x20,0x00,0x00,0x80]
-            addi    r0, r0, 128
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000000100000000
-# CHECK:    encoding: [0x20,0x00,0x01,0x00]
-            addi    r0, r0, 256
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000001000000000
-# CHECK:    encoding: [0x20,0x00,0x02,0x00]
-            addi    r0, r0, 512
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000010000000000
-# CHECK:    encoding: [0x20,0x00,0x04,0x00]
-            addi    r0, r0, 1024
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0000100000000000
-# CHECK:    encoding: [0x20,0x00,0x08,0x00]
-            addi    r0, r0, 2048
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0001000000000000
-# CHECK:    encoding: [0x20,0x00,0x10,0x00]
-            addi    r0, r0, 4096
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0010000000000000
-# CHECK:    encoding: [0x20,0x00,0x20,0x00]
-            addi    r0, r0, 8192
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 0100000000000000
-# CHECK:    encoding: [0x20,0x00,0x40,0x00]
-            addi    r0, r0, 16384
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111111111111
-# CHECK:    encoding: [0x20,0x00,0xff,0xff]
-            addi    r0, r0, -1
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111111111110
-# CHECK:    encoding: [0x20,0x00,0xff,0xfe]
-            addi    r0, r0, -2
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111111111100
-# CHECK:    encoding: [0x20,0x00,0xff,0xfc]
-            addi    r0, r0, -4
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111111111000
-# CHECK:    encoding: [0x20,0x00,0xff,0xf8]
-            addi    r0, r0, -8
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111111110000
-# CHECK:    encoding: [0x20,0x00,0xff,0xf0]
-            addi    r0, r0, -16
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111111100000
-# CHECK:    encoding: [0x20,0x00,0xff,0xe0]
-            addi    r0, r0, -32
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111111000000
-# CHECK:    encoding: [0x20,0x00,0xff,0xc0]
-            addi    r0, r0, -64
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111110000000
-# CHECK:    encoding: [0x20,0x00,0xff,0x80]
-            addi    r0, r0, -128
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111100000000
-# CHECK:    encoding: [0x20,0x00,0xff,0x00]
-            addi    r0, r0, -256
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111111000000000
-# CHECK:    encoding: [0x20,0x00,0xfe,0x00]
-            addi    r0, r0, -512
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111110000000000
-# CHECK:    encoding: [0x20,0x00,0xfc,0x00]
-            addi    r0, r0, -1024
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111100000000000
-# CHECK:    encoding: [0x20,0x00,0xf8,0x00]
-            addi    r0, r0, -2048
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1111000000000000
-# CHECK:    encoding: [0x20,0x00,0xf0,0x00]
-            addi    r0, r0, -4096
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1110000000000000
-# CHECK:    encoding: [0x20,0x00,0xe0,0x00]
-            addi    r0, r0, -8192
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1100000000000000
-# CHECK:    encoding: [0x20,0x00,0xc0,0x00]
-            addi    r0, r0, -16384
-
-# CHECK:    addi
-# BINARY:   001000 00000 00000 1000000000000000
-# CHECK:    encoding: [0x20,0x00,0x80,0x00]
-            addi    r0, r0, -32768
diff --git a/test/MC/MBlaze/mblaze_pattern.s b/test/MC/MBlaze/mblaze_pattern.s
deleted file mode 100644
index 6bbc234e3d43..000000000000
--- a/test/MC/MBlaze/mblaze_pattern.s
+++ /dev/null
@@ -1,22 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to ensure that all FPU instructions can be parsed by the
-# assembly parser correctly.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   011011 00000 00000 00000 00000000000
-
-# CHECK:    pcmpbf
-# BINARY:   100000 00000 00001 00010 10000000000
-# CHECK:    encoding: [0x80,0x01,0x14,0x00]
-            pcmpbf      r0, r1, r2
-
-# CHECK:    pcmpne
-# BINARY:   100011 00000 00001 00010 10000000000
-# CHECK:    encoding: [0x8c,0x01,0x14,0x00]
-            pcmpne      r0, r1, r2
-
-# CHECK:    pcmpeq
-# BINARY:   100010 00000 00001 00010 10000000000
-# CHECK:    encoding: [0x88,0x01,0x14,0x00]
-            pcmpeq      r0, r1, r2
diff --git a/test/MC/MBlaze/mblaze_shift.s b/test/MC/MBlaze/mblaze_shift.s
deleted file mode 100644
index a25502b35094..000000000000
--- a/test/MC/MBlaze/mblaze_shift.s
+++ /dev/null
@@ -1,47 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to make sure that all of the TYPE-A instructions supported by
-# the Microblaze can be parsed by the assembly parser.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   000000 00000 00000 00000 00000000000
-
-# CHECK:    bsrl
-# BINARY:   010001 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x44,0x22,0x18,0x00]
-            bsrl    r1, r2, r3
-
-# CHECK:    bsra
-# BINARY:   010001 00001 00010 00011 01000000000
-# CHECK:    encoding: [0x44,0x22,0x1a,0x00]
-            bsra    r1, r2, r3
-
-# CHECK:    bsll
-# BINARY:   010001 00001 00010 00011 10000000000
-# CHECK:    encoding: [0x44,0x22,0x1c,0x00]
-            bsll    r1, r2, r3
-
-# CHECK:    bsrli
-# BINARY:   011001 00001 00010 0000000000000000
-# CHECK:    encoding: [0x64,0x22,0x00,0x00]
-            bsrli   r1, r2, 0
-
-# CHECK:    bsrai
-# BINARY:   011001 00001 00010 0000001000000000
-# CHECK:    encoding: [0x64,0x22,0x02,0x00]
-            bsrai   r1, r2, 0
-
-# CHECK:    bslli
-# BINARY:   011001 00001 00010 0000010000000000
-# CHECK:    encoding: [0x64,0x22,0x04,0x00]
-            bslli   r1, r2, 0
-
-# CHECK:    sra
-# BINARY:   100100 00001 00010 00000 00000000001
-# CHECK:    encoding: [0x90,0x22,0x00,0x01]
-            sra     r1, r2
-
-# CHECK:    srl
-# BINARY:   100100 00001 00010 00000 00001000001
-# CHECK:    encoding: [0x90,0x22,0x00,0x41]
-            srl     r1, r2
diff --git a/test/MC/MBlaze/mblaze_special.s b/test/MC/MBlaze/mblaze_special.s
deleted file mode 100644
index c55ec277c18a..000000000000
--- a/test/MC/MBlaze/mblaze_special.s
+++ /dev/null
@@ -1,167 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to ensure that all special instructions and special registers can be
-# parsed by the assembly parser correctly.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   011011 00000 00000 00000 00000000000
-
-# CHECK:    mfs
-# BINARY:   100101 00000 00000 10000 00000000000
-# CHECK:    encoding: [0x94,0x00,0x80,0x00]
-            mfs         r0, rpc
-
-# CHECK:    msrclr
-# BINARY:   100101 00000 100010 000000000000000
-# CHECK:    encoding: [0x94,0x11,0x00,0x00]
-            msrclr      r0, 0x0
-
-# CHECK:    msrset
-# BINARY:   100101 00000 100000 000000000000000
-# CHECK:    encoding: [0x94,0x10,0x00,0x00]
-            msrset      r0, 0x0
-
-# CHECK:    mts
-# BINARY:   100101 00000 00000 11 00000000000000
-# CHECK:    encoding: [0x94,0x00,0xc0,0x00]
-            mts         rpc, r0
-
-# CHECK:    wdc
-# BINARY:   100100 00000 00000 00001 00001100100
-# CHECK:    encoding: [0x90,0x00,0x08,0x64]
-            wdc         r0, r1
-
-# CHECK:    wdc.clear
-# BINARY:   100100 00000 00000 00001 00001100110
-# CHECK:    encoding: [0x90,0x00,0x08,0x66]
-            wdc.clear   r0, r1
-
-# CHECK:    wdc.flush
-# BINARY:   100100 00000 00000 00001 00001110100
-# CHECK:    encoding: [0x90,0x00,0x08,0x74]
-            wdc.flush   r0, r1
-
-# CHECK:    wic
-# BINARY:   100100 00000 00000 00001 00001101000
-# CHECK:    encoding: [0x90,0x00,0x08,0x68]
-            wic         r0, r1
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10000 00000000000
-# CHECK:    encoding: [0x94,0x20,0x80,0x00]
-            mfs         r1, rpc
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10000 00000000001
-# CHECK:    encoding: [0x94,0x20,0x80,0x01]
-            mfs         r1, rmsr
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10000 00000000011
-# CHECK:    encoding: [0x94,0x20,0x80,0x03]
-            mfs         r1, rear
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10000 00000000101
-# CHECK:    encoding: [0x94,0x20,0x80,0x05]
-            mfs         r1, resr
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10000 00000000111
-# CHECK:    encoding: [0x94,0x20,0x80,0x07]
-            mfs         r1, rfsr
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10000 00000001011
-# CHECK:    encoding: [0x94,0x20,0x80,0x0b]
-            mfs         r1, rbtr
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10000 00000001101
-# CHECK:    encoding: [0x94,0x20,0x80,0x0d]
-            mfs         r1, redr
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10010 00000000000
-# CHECK:    encoding: [0x94,0x20,0x90,0x00]
-            mfs         r1, rpid
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10010 00000000001
-# CHECK:    encoding: [0x94,0x20,0x90,0x01]
-            mfs         r1, rzpr
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10010 00000000010
-# CHECK:    encoding: [0x94,0x20,0x90,0x02]
-            mfs         r1, rtlbx
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10010 00000000100
-# CHECK:    encoding: [0x94,0x20,0x90,0x04]
-            mfs         r1, rtlbhi
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10010 00000000011
-# CHECK:    encoding: [0x94,0x20,0x90,0x03]
-            mfs         r1, rtlblo
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000000
-# CHECK:    encoding: [0x94,0x20,0xa0,0x00]
-            mfs         r1, rpvr0
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000001
-# CHECK:    encoding: [0x94,0x20,0xa0,0x01]
-            mfs         r1, rpvr1
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000010
-# CHECK:    encoding: [0x94,0x20,0xa0,0x02]
-            mfs         r1, rpvr2
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000011
-# CHECK:    encoding: [0x94,0x20,0xa0,0x03]
-            mfs         r1, rpvr3
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000100
-# CHECK:    encoding: [0x94,0x20,0xa0,0x04]
-            mfs         r1, rpvr4
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000101
-# CHECK:    encoding: [0x94,0x20,0xa0,0x05]
-            mfs         r1, rpvr5
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000110
-# CHECK:    encoding: [0x94,0x20,0xa0,0x06]
-            mfs         r1, rpvr6
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000000111
-# CHECK:    encoding: [0x94,0x20,0xa0,0x07]
-            mfs         r1, rpvr7
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000001000
-# CHECK:    encoding: [0x94,0x20,0xa0,0x08]
-            mfs         r1, rpvr8
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000001001
-# CHECK:    encoding: [0x94,0x20,0xa0,0x09]
-            mfs         r1, rpvr9
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000001010
-# CHECK:    encoding: [0x94,0x20,0xa0,0x0a]
-            mfs         r1, rpvr10
-
-# CHECK:    mfs
-# BINARY:   100101 00001 00000 10100 00000001011
-# CHECK:    encoding: [0x94,0x20,0xa0,0x0b]
-            mfs         r1, rpvr11
diff --git a/test/MC/MBlaze/mblaze_typea.s b/test/MC/MBlaze/mblaze_typea.s
deleted file mode 100644
index a0735e482cbb..000000000000
--- a/test/MC/MBlaze/mblaze_typea.s
+++ /dev/null
@@ -1,122 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to make sure that all of the TYPE-A instructions supported by
-# the Microblaze can be parsed by the assembly parser.
-
-# TYPE A:   OPCODE RD    RA    RB    FLAGS
-# BINARY:   000000 00000 00000 00000 00000000000
-
-# CHECK:    add
-# BINARY:   000000 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x00,0x22,0x18,0x00]
-            add     r1, r2, r3
-
-# CHECK:    addc
-# BINARY:   000010 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x08,0x22,0x18,0x00]
-            addc    r1, r2, r3
-
-# CHECK:    addk
-# BINARY:   000100 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x10,0x22,0x18,0x00]
-            addk    r1, r2, r3
-
-# CHECK:    addkc
-# BINARY:   000110 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x18,0x22,0x18,0x00]
-            addkc   r1, r2, r3
-
-# CHECK:    and
-# BINARY:   100001 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x84,0x22,0x18,0x00]
-            and     r1, r2, r3
-
-# CHECK:    andn
-# BINARY:   100011 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x8c,0x22,0x18,0x00]
-            andn    r1, r2, r3
-
-# CHECK:    cmp
-# BINARY:   000101 00001 00010 00011 00000000001
-# CHECK:    encoding: [0x14,0x22,0x18,0x01]
-            cmp     r1, r2, r3
-
-# CHECK:    cmpu
-# BINARY:   000101 00001 00010 00011 00000000011
-# CHECK:    encoding: [0x14,0x22,0x18,0x03]
-            cmpu    r1, r2, r3
-
-# CHECK:    idiv
-# BINARY:   010010 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x48,0x22,0x18,0x00]
-            idiv    r1, r2, r3
-
-# CHECK:    idivu
-# BINARY:   010010 00001 00010 00011 00000000010
-# CHECK:    encoding: [0x48,0x22,0x18,0x02]
-            idivu   r1, r2, r3
-
-# CHECK:    mul
-# BINARY:   010000 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x40,0x22,0x18,0x00]
-            mul    r1, r2, r3
-
-# CHECK:    mulh
-# BINARY:   010000 00001 00010 00011 00000000001
-# CHECK:    encoding: [0x40,0x22,0x18,0x01]
-            mulh   r1, r2, r3
-
-# CHECK:    mulhu
-# BINARY:   010000 00001 00010 00011 00000000011
-# CHECK:    encoding: [0x40,0x22,0x18,0x03]
-            mulhu  r1, r2, r3
-
-# CHECK:    mulhsu
-# BINARY:   010000 00001 00010 00011 00000000010
-# CHECK:    encoding: [0x40,0x22,0x18,0x02]
-            mulhsu r1, r2, r3
-
-# CHECK:    or
-# BINARY:   100000 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x80,0x22,0x18,0x00]
-            or      r1, r2, r3
-
-# CHECK:    rsub
-# BINARY:   000001 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x04,0x22,0x18,0x00]
-            rsub    r1, r2, r3
-
-# CHECK:    rsubc
-# BINARY:   000011 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x0c,0x22,0x18,0x00]
-            rsubc   r1, r2, r3
-
-# CHECK:    rsubk
-# BINARY:   000101 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x14,0x22,0x18,0x00]
-            rsubk   r1, r2, r3
-
-# CHECK:    rsubkc
-# BINARY:   000111 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x1c,0x22,0x18,0x00]
-            rsubkc  r1, r2, r3
-
-# CHECK:    sext16
-# BINARY:   100100 00001 00010 00000 00001100001
-# CHECK:    encoding: [0x90,0x22,0x00,0x61]
-            sext16  r1, r2
-
-# CHECK:    sext8
-# BINARY:   100100 00001 00010 00000 00001100000
-# CHECK:    encoding: [0x90,0x22,0x00,0x60]
-            sext8   r1, r2
-
-# CHECK:    xor
-# BINARY:   100010 00001 00010 00011 00000000000
-# CHECK:    encoding: [0x88,0x22,0x18,0x00]
-            xor     r1, r2, r3
-
-# CHECK:    nop
-# BINARY:   100000 00000 00000 00000 00000000000
-# CHECK:    encoding: [0x80,0x00,0x00,0x00]
-        nop
diff --git a/test/MC/MBlaze/mblaze_typeb.s b/test/MC/MBlaze/mblaze_typeb.s
deleted file mode 100644
index ac4f1e2932a7..000000000000
--- a/test/MC/MBlaze/mblaze_typeb.s
+++ /dev/null
@@ -1,92 +0,0 @@
-# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
-
-# Test to make sure that all of the TYPE-B instructions supported by
-# the Microblaze can be parsed by the assembly parser.
-
-# TYPE B:   OPCODE RD    RA    IMMEDIATE
-#           000000 00000 00000 0000000000000000
-
-# CHECK:    addi
-# BINARY:   001000 00001 00010 0000000000001111
-# CHECK:    encoding: [0x20,0x22,0x00,0x0f]
-            addi    r1, r2, 0x000F
-
-# CHECK:    addic
-# BINARY:   001010 00001 00010 0000000000001111
-# CHECK:    encoding: [0x28,0x22,0x00,0x0f]
-            addic   r1, r2, 0x000F
-
-# CHECK:    addik
-# BINARY:   001100 00001 00010 0000000000001111
-# CHECK:    encoding: [0x30,0x22,0x00,0x0f]
-            addik   r1, r2, 0x000F
-
-# CHECK:    addikc
-# BINARY:   001110 00001 00010 0000000000001111
-# CHECK:    encoding: [0x38,0x22,0x00,0x0f]
-            addikc  r1, r2, 0x000F
-
-# CHECK:    andi
-# BINARY:   101001 00001 00010 0000000000001111
-# CHECK:    encoding: [0xa4,0x22,0x00,0x0f]
-            andi    r1, r2, 0x000F
-
-# CHECK:    andni
-# BINARY:   101011 00001 00010 0000000000001111
-# CHECK:    encoding: [0xac,0x22,0x00,0x0f]
-            andni   r1, r2, 0x000F
-
-# CHECK:    muli
-# BINARY:   011000 00001 00010 0000000000001111
-# CHECK:    encoding: [0x60,0x22,0x00,0x0f]
-            muli    r1, r2, 0x000F
-
-# CHECK:    ori
-# BINARY:   101000 00001 00010 0000000000001111
-# CHECK:    encoding: [0xa0,0x22,0x00,0x0f]
-            ori     r1, r2, 0x000F
-
-# CHECK:    rsubi
-# BINARY:   001001 00001 00010 0000000000001111
-# CHECK:    encoding: [0x24,0x22,0x00,0x0f]
-            rsubi   r1, r2, 0x000F
-
-# CHECK:    rsubic
-# BINARY:   001011 00001 00010 0000000000001111
-# CHECK:    encoding: [0x2c,0x22,0x00,0x0f]
-            rsubic  r1, r2, 0x000F
-
-# CHECK:    rsubik
-# BINARY:   001101 00001 00010 0000000000001111
-# CHECK:    encoding: [0x34,0x22,0x00,0x0f]
-            rsubik  r1, r2, 0x000F
-
-# CHECK:    rsubikc
-# BINARY:   001111 00001 00010 0000000000001111
-# CHECK:    encoding: [0x3c,0x22,0x00,0x0f]
-            rsubikc r1, r2, 0x000F
-
-# CHECK:    rtbd
-# BINARY:   101101 10010 01111 0000000000001111
-# CHECK:    encoding: [0xb6,0x4f,0x00,0x0f]
-            rtbd r15, 0x000F
-
-# CHECK:    rted
-# BINARY:   101101 10001 01111 0000000000001111
-# CHECK:    encoding: [0xb6,0x8f,0x00,0x0f]
-            rted r15, 0x000F
-
-# CHECK:    rtid
-# BINARY:   101101 10001 01111 0000000000001111
-# CHECK:    encoding: [0xb6,0x2f,0x00,0x0f]
-            rtid r15, 0x000F
-
-# CHECK:    rtsd
-# BINARY:   101101 10000 01111 0000000000001111
-# CHECK:    encoding: [0xb6,0x0f,0x00,0x0f]
-            rtsd r15, 0x000F
-
-# CHECK:    xori
-# BINARY:   101010 00001 00010 0000000000001111
-# CHECK:    encoding: [0xa8,0x22,0x00,0x0f]
-            xori r1, r2, 0x000F
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
index 9f0d39d9217b..8a3ba96497e7 100644
--- a/test/MC/MachO/ARM/lit.local.cfg
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s', '.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/MC/MachO/bad-darwin-x86_64-32-bit-abs-addr.s b/test/MC/MachO/bad-darwin-x86_64-32-bit-abs-addr.s
new file mode 100644
index 000000000000..5fcd31626025
--- /dev/null
+++ b/test/MC/MachO/bad-darwin-x86_64-32-bit-abs-addr.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+mov $_f, %rsi
+// CHECK-ERROR: 32-bit absolute addressing is not supported in 64-bit mode
diff --git a/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
new file mode 100644
index 000000000000..1ccebc5124c3
--- /dev/null
+++ b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+.quad _foo - _bar
+// CHECK-ERROR: unsupported relocation with subtraction expression
diff --git a/test/MC/MachO/bad-indirect-symbols.s b/test/MC/MachO/bad-indirect-symbols.s
new file mode 100644
index 000000000000..7c16e90522c3
--- /dev/null
+++ b/test/MC/MachO/bad-indirect-symbols.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+x: .indirect_symbol _y
+// CHECK-ERROR: 4:4: error: indirect symbol not in a symbol pointer or stub section
diff --git a/test/MC/MachO/bss.s b/test/MC/MachO/bss.s
new file mode 100644
index 000000000000..15d490ad5eba
--- /dev/null
+++ b/test/MC/MachO/bss.s
@@ -0,0 +1,17 @@
+// The purpose of this test is to verify that bss sections are emited correctly.
+
+// RUN: llvm-mc -filetype=obj -triple i686-apple-darwin9 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s | llvm-readobj -s | FileCheck %s
+
+    .bss
+    .globl _g0
+    .align 4
+_g0:
+    .long 0
+
+// CHECK:		Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:	Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:	Address: 0x0
+// CHECK-NEXT:	Size: 0x4
+// CHECK-NEXT:	Offset: 0
+// CHECK-NEXT:	Alignment: 4
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s
new file mode 100644
index 000000000000..5d548790a7a9
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// Test case for rdar://9356266
+
+// This tests that this expression does not cause a crash and produces these
+// four relocation entries:
+// Relocation information (__DATA,__data) 4 entries
+// address  pcrel length extern type    scattered symbolnum/value
+// 00000004 False long   False  SUB     False     2 (__DATA,__data)
+// 00000004 False long   False  UNSIGND False     2 (__DATA,__data)
+// 00000000 False long   False  SUB     False     2 (__DATA,__data)
+// 00000000 False long   False  UNSIGND False     2 (__DATA,__data)
+
+	.data
+L_var1:
+L_var2:
+// This was working fine
+	.long L_var2 - L_var1
+	
+	.set L_var3, .
+	.set L_var4, .
+// But this was causing a crash
+	.long L_var4 - L_var3
+
+// CHECK:  ('_relocations', [
+// CHECK:    # Relocation 0
+// CHECK:    (('word-0', 0x4),
+// CHECK:     ('word-1', 0x54000002)),
+// CHECK:    # Relocation 1
+// CHECK:    (('word-0', 0x4),
+// CHECK:     ('word-1', 0x4000002)),
+// CHECK:    # Relocation 2
+// CHECK:    (('word-0', 0x0),
+// CHECK:     ('word-1', 0x54000002)),
+// CHECK:    # Relocation 3
+// CHECK:    (('word-0', 0x0),
+// CHECK:     ('word-1', 0x4000002)),
+// CHECK:  ])
diff --git a/test/MC/MachO/lit.local.cfg b/test/MC/MachO/lit.local.cfg
index 41a8434f9993..ba763cf03ffc 100644
--- a/test/MC/MachO/lit.local.cfg
+++ b/test/MC/MachO/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s', '.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/MachO/tlv-bss.ll b/test/MC/MachO/tlv-bss.ll
new file mode 100644
index 000000000000..af620f9e3048
--- /dev/null
+++ b/test/MC/MachO/tlv-bss.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin12 -filetype=obj -o - %s | macho-dump | FileCheck %s
+; Test that we emit weak_odr thread_locals correctly into the thread_bss section
+; PR15972
+
+; CHECK: __thread_bss
+; CHECK: 'size', 8
+; CHECK: 'alignment', 3
+; CHECK: __thread_vars
+
+; Generated from this C++ source
+; template<class T>
+; struct Tls {
+;   static __thread void* val;
+; };
+
+; template<class T> __thread void* Tls<T>::val;
+
+; void* f(int x) {
+;         return Tls<long>::val;
+; }
+
+@_ZN3TlsIlE3valE = weak_odr thread_local global i8* null, align 8
+
+; Function Attrs: nounwind ssp uwtable
+define i8* @_Z1fi(i32 %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %0 = load i8** @_ZN3TlsIlE3valE, align 8
+  ret i8* %0
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/MC/Mips/abicalls.ll b/test/MC/Mips/abicalls.ll
new file mode 100644
index 000000000000..7b98b02d05ad
--- /dev/null
+++ b/test/MC/Mips/abicalls.ll
@@ -0,0 +1,15 @@
+; 
+; When the assembler is ready a .s file for it will
+; be created.
+
+; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
+; TODO need to support -mno-abicalls
+
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-STATIC %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+
+; CHECK-STATIC: .abicalls
+; CHECK-STATIC-NEXT: pic0
+; CHECK-PIC: .abicalls
+; CHECK-PIC-NOT: pic0
diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s
index 93ff0b8bd277..167159885d72 100644
--- a/test/MC/Mips/eh-frame.s
+++ b/test/MC/Mips/eh-frame.s
@@ -19,7 +19,7 @@ func:
 	.cfi_startproc
 	.cfi_endproc
 
-// MIPS32: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS32: RELOCATION RECORDS FOR [.rel.eh_frame]:
 // MIPS32-NEXT: R_MIPS_32
 // MIPS32: Contents of section .eh_frame:
 // MIPS32-NEXT: 0000
@@ -55,7 +55,7 @@ func:
 // MIPS32: 0b
 // FIXME: The instructions are different from the ones produces by gas.
 
-// MIPS32EL: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS32EL: RELOCATION RECORDS FOR [.rel.eh_frame]:
 // MIPS32EL-NEXT: R_MIPS_32
 // MIPS32EL: Contents of section .eh_frame:
 // MIPS32EL-NEXT: 0000
@@ -91,7 +91,7 @@ func:
 // MIPS32EL: 0b
 // FIXME: The instructions are different from the ones produces by gas.
 
-// MIPS64: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS64: RELOCATION RECORDS FOR [.rela.eh_frame]:
 // MIPS64-NEXT: R_MIPS_64
 // MIPS64: Contents of section .eh_frame:
 // MIPS64-NEXT: 0000
@@ -129,7 +129,7 @@ func:
 // FIXME: The instructions are different from the ones produces by gas.
 
 
-// MIPS64EL: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS64EL: RELOCATION RECORDS FOR [.rela.eh_frame]:
 // MIPS64EL-NEXT: R_MIPS_64
 // MIPS64EL: Contents of section .eh_frame:
 // MIPS64EL-NEXT: 0000
diff --git a/test/MC/Mips/elf-gprel-32-64.ll b/test/MC/Mips/elf-gprel-32-64.ll
index 47003fa1e752..4057eb823a5a 100644
--- a/test/MC/Mips/elf-gprel-32-64.ll
+++ b/test/MC/Mips/elf-gprel-32-64.ll
@@ -31,7 +31,7 @@ return:
 
 ; R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
 ; CHECK:      Relocations [
-; CHECK:        Section ({{[a-z0-9]+}}) .rodata {
+; CHECK:        Section ({{[a-z0-9]+}}) .rela.rodata {
 ; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
 ; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
 ; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
diff --git a/test/MC/Mips/elf-tls.ll b/test/MC/Mips/elf-tls.ll
index 9f604e00a0a8..bcce3d515e7d 100644
--- a/test/MC/Mips/elf-tls.ll
+++ b/test/MC/Mips/elf-tls.ll
@@ -3,7 +3,7 @@
 ; Check that the appropriate relocations were created.
 
 ; CHECK: Relocations [
-; CHECK:   Section (1) .text {
+; CHECK:   Section (2) .rel.text {
 ; CHECK:     R_MIPS_TLS_LDM
 ; CHECK:     R_MIPS_TLS_DTPREL_HI16
 ; CHECK:     R_MIPS_TLS_DTPREL_LO16
diff --git a/test/MC/Mips/elf_eflags.ll b/test/MC/Mips/elf_eflags.ll
index 6d16a42aca2f..9432dcf59c32 100644
--- a/test/MC/Mips/elf_eflags.ll
+++ b/test/MC/Mips/elf_eflags.ll
@@ -6,60 +6,63 @@
 
 ; EF_MIPS_NOREORDER (0x00000001) is always on by default currently
 ; EF_MIPS_PIC (0x00000002)
-; EF_MIPS_CPIC (0x00000004) - not tested yet
+; EF_MIPS_CPIC (0x00000004) - See note below
 ; EF_MIPS_ABI2 (0x00000020) - n32 not tested yet
 ; EF_MIPS_ARCH_32 (0x50000000)
 ; EF_MIPS_ARCH_64 (0x60000000)
 ; EF_MIPS_ARCH_32R2 (0x70000000)
 ; EF_MIPS_ARCH_64R2 (0x80000000)
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
+; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
+; TODO need to support -mno-abicalls
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE32 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32 -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE32R2 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE64 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 %s -print-hack-directives -o - | FileCheck -check-prefix=CHECK-BE64_PIC %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE64R2 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64r2 -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
+
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic -print-hack-directives %s -o - | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
  
 ; 32(R1) bit with NO_REORDER and static
-; CHECK-BE32: Flags [ (0x50001001)
+; CHECK-BE32: .mips_hack_elf_flags 0x50001005
 ;
 ; 32(R1) bit with NO_REORDER and PIC
-; CHECK-BE32_PIC: Flags [ (0x50001003)
+; CHECK-BE32_PIC: .mips_hack_elf_flags 0x50001007
 ;
 ; 32R2 bit with NO_REORDER and static
-; CHECK-BE32R2: Flags [ (0x70001001)
+; CHECK-BE32R2: .mips_hack_elf_flags 0x70001005
 ;
 ; 32R2 bit with NO_REORDER and PIC
-; CHECK-BE32R2_PIC: Flags [ (0x70001003)
+; CHECK-BE32R2_PIC: .mips_hack_elf_flags 0x70001007
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and static
-; CHECK-BE32R2-MICROMIPS: Flags [ (0x72001001)
+; CHECK-BE32R2-MICROMIPS: .mips_hack_elf_flags 0x72001005
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and PIC
-;CHECK-BE32R2-MICROMIPS_PIC: Flags [ (0x72001003)
+; CHECK-BE32R2-MICROMIPS_PIC: .mips_hack_elf_flags 0x72001007
 ;
 ; 64(R1) bit with NO_REORDER and static
-; CHECK-BE64: Flags [ (0x60000001)
+; CHECK-BE64: .mips_hack_elf_flags 0x60000005
 ;
 ; 64(R1) bit with NO_REORDER and PIC
-; CHECK-BE64_PIC: Flags [ (0x60000003)
+; CHECK-BE64_PIC: .mips_hack_elf_flags 0x60000007
 ;
 ; 64R2 bit with NO_REORDER and static
-; CHECK-BE64R2: Flags [ (0x80000001)
+; CHECK-BE64R2: .mips_hack_elf_flags 0x80000005
 ;
 ; 64R2 bit with NO_REORDER and PIC
-; CHECK-BE64R2_PIC: Flags [ (0x80000003)
+; CHECK-BE64R2_PIC: .mips_hack_elf_flags 0x80000007
 ;
 ; 32R2 bit MIPS16 with PIC
-; CHECK-LE32R2-MIPS16: Flags [ (0x74001002)
- 
+; CHECK-LE32R2-MIPS16: .mips_hack_elf_flags 0x74001006
+
 define i32 @main() nounwind {
 entry:
   ret i32 0
diff --git a/test/MC/Mips/elf_eflags.s b/test/MC/Mips/elf_eflags.s
new file mode 100644
index 000000000000..c56596444aea
--- /dev/null
+++ b/test/MC/Mips/elf_eflags.s
@@ -0,0 +1,5 @@
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o -| llvm-readobj -h | FileCheck %s
+
+        .mips_hack_elf_flags 0x50001005
+
+// CHECK: Flags [ (0x50001005)
diff --git a/test/MC/Mips/elf_st_other.ll b/test/MC/Mips/elf_st_other.ll
index bc56c0033e58..31294c88f87d 100644
--- a/test/MC/Mips/elf_st_other.ll
+++ b/test/MC/Mips/elf_st_other.ll
@@ -1,12 +1,11 @@
 ; This tests value of ELF st_other field for function symbol table entries.
 ; For microMIPS value should be equal to STO_MIPS_MICROMIPS.
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -t | FileCheck %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -print-hack-directives %s -o - | FileCheck %s
 
 define i32 @main() nounwind {
 entry:
   ret i32 0
 }
 
-; CHECK:     Name: main
-; CHECK:     Other: 128
+; CHECK:     .mips_hack_stocg main, 128
diff --git a/test/MC/Mips/elf_st_other.s b/test/MC/Mips/elf_st_other.s
new file mode 100644
index 000000000000..2d632887799a
--- /dev/null
+++ b/test/MC/Mips/elf_st_other.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o -| llvm-readobj -t | FileCheck %s
+
+        .text
+        .globl  main
+        .align  2
+        .type   main,@function
+        .set    nomips16                # @main
+        .ent    main
+        .mips_hack_stocg main, 128
+main:
+
+// CHECK:     Name: main
+// CHECK:     Other: 128
diff --git a/test/MC/Mips/lit.local.cfg b/test/MC/Mips/lit.local.cfg
index d2e3b28dbd86..1fa54b428cd9 100644
--- a/test/MC/Mips/lit.local.cfg
+++ b/test/MC/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/MC/Mips/micromips-alu-instructions.s b/test/MC/Mips/micromips-alu-instructions.s
index c541e1ace554..276a83e82c03 100644
--- a/test/MC/Mips/micromips-alu-instructions.s
+++ b/test/MC/Mips/micromips-alu-instructions.s
@@ -1,38 +1,79 @@
-# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
 #------------------------------------------------------------------------------
 # Arithmetic and Logical Instructions
 #------------------------------------------------------------------------------
-# CHECK: add   $9, $6, $7      # encoding: [0x10,0x49,0xe6,0x00]
-# CHECK: addi  $9, $6, 17767   # encoding: [0x67,0x45,0x26,0x11]
-# CHECK: addiu $9, $6, -15001  # encoding: [0x67,0xc5,0x26,0x31]
-# CHECK: addi  $9, $6, 17767   # encoding: [0x67,0x45,0x26,0x11]
-# CHECK: addiu $9, $6, -15001  # encoding: [0x67,0xc5,0x26,0x31]
-# CHECK: addu  $9, $6, $7      # encoding: [0x50,0x49,0xe6,0x00]
-# CHECK: sub   $9, $6, $7      # encoding: [0x90,0x49,0xe6,0x00]
-# CHECK: subu  $4, $3, $5      # encoding: [0xd0,0x21,0xa3,0x00]
-# CHECK: neg   $6, $7          # encoding: [0x90,0x31,0xe0,0x00]
-# CHECK: negu  $6, $7          # encoding: [0xd0,0x31,0xe0,0x00]
-# CHECK: move  $7, $8          # encoding: [0x50,0x39,0x08,0x00]
-# CHECK: slt    $3, $3, $5     # encoding: [0x50,0x1b,0xa3,0x00]
-# CHECK: slti   $3, $3, 103    # encoding: [0x67,0x00,0x63,0x90]
-# CHECK: slti   $3, $3, 103    # encoding: [0x67,0x00,0x63,0x90]
-# CHECK: sltiu  $3, $3, 103    # encoding: [0x67,0x00,0x63,0xb0]
-# CHECK: sltu   $3, $3, $5     # encoding: [0x90,0x1b,0xa3,0x00]
-# CHECK: and    $9, $6, $7     # encoding: [0x50,0x4a,0xe6,0x00]
-# CHECK: andi   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0xd1]
-# CHECK: andi   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0xd1]
-# CHECK: or     $3, $4, $5     # encoding: [0x90,0x1a,0xa4,0x00]
-# CHECK: ori    $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x51]
-# CHECK: xor    $3, $3, $5     # encoding: [0x10,0x1b,0xa3,0x00]
-# CHECK: xori   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x71]
-# CHECK: xori   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x71]
-# CHECK: nor    $9, $6, $7     # encoding: [0xd0,0x4a,0xe6,0x00]
-# CHECK: not    $7, $8         # encoding: [0xd0,0x3a,0x08,0x00]
-# CHECK: mul    $9, $6, $7     # encoding: [0x10,0x4a,0xe6,0x00]
-# CHECK: mult   $9, $7         # encoding: [0x3c,0x8b,0xe9,0x00]
-# CHECK: multu  $9, $7         # encoding: [0x3c,0x9b,0xe9,0x00]
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: add   $9, $6, $7      # encoding: [0xe6,0x00,0x10,0x49]
+# CHECK-EL: addi  $9, $6, 17767   # encoding: [0x26,0x11,0x67,0x45]
+# CHECK-EL: addiu $9, $6, -15001  # encoding: [0x26,0x31,0x67,0xc5]
+# CHECK-EL: addi  $9, $6, 17767   # encoding: [0x26,0x11,0x67,0x45]
+# CHECK-EL: addiu $9, $6, -15001  # encoding: [0x26,0x31,0x67,0xc5]
+# CHECK-EL: addu  $9, $6, $7      # encoding: [0xe6,0x00,0x50,0x49]
+# CHECK-EL: sub   $9, $6, $7      # encoding: [0xe6,0x00,0x90,0x49]
+# CHECK-EL: subu  $4, $3, $5      # encoding: [0xa3,0x00,0xd0,0x21]
+# CHECK-EL: neg   $6, $7          # encoding: [0xe0,0x00,0x90,0x31]
+# CHECK-EL: negu  $6, $7          # encoding: [0xe0,0x00,0xd0,0x31]
+# CHECK-EL: move  $7, $8          # encoding: [0x08,0x00,0x50,0x39]
+# CHECK-EL: slt    $3, $3, $5     # encoding: [0xa3,0x00,0x50,0x1b]
+# CHECK-EL: slti   $3, $3, 103    # encoding: [0x63,0x90,0x67,0x00]
+# CHECK-EL: slti   $3, $3, 103    # encoding: [0x63,0x90,0x67,0x00]
+# CHECK-EL: sltiu  $3, $3, 103    # encoding: [0x63,0xb0,0x67,0x00]
+# CHECK-EL: sltu   $3, $3, $5     # encoding: [0xa3,0x00,0x90,0x1b]
+# CHECK-EL: lui    $9, 17767      # encoding: [0xa9,0x41,0x67,0x45]
+# CHECK-EL: and    $9, $6, $7     # encoding: [0xe6,0x00,0x50,0x4a]
+# CHECK-EL: andi   $9, $6, 17767  # encoding: [0x26,0xd1,0x67,0x45]
+# CHECK-EL: andi   $9, $6, 17767  # encoding: [0x26,0xd1,0x67,0x45]
+# CHECK-EL: or     $3, $4, $5     # encoding: [0xa4,0x00,0x90,0x1a]
+# CHECK-EL: ori    $9, $6, 17767  # encoding: [0x26,0x51,0x67,0x45]
+# CHECK-EL: xor    $3, $3, $5     # encoding: [0xa3,0x00,0x10,0x1b]
+# CHECK-EL: xori   $9, $6, 17767  # encoding: [0x26,0x71,0x67,0x45]
+# CHECK-EL: xori   $9, $6, 17767  # encoding: [0x26,0x71,0x67,0x45]
+# CHECK-EL: nor    $9, $6, $7     # encoding: [0xe6,0x00,0xd0,0x4a]
+# CHECK-EL: not    $7, $8         # encoding: [0x08,0x00,0xd0,0x3a]
+# CHECK-EL: mul    $9, $6, $7     # encoding: [0xe6,0x00,0x10,0x4a]
+# CHECK-EL: mult   $9, $7         # encoding: [0xe9,0x00,0x3c,0x8b]
+# CHECK-EL: multu  $9, $7         # encoding: [0xe9,0x00,0x3c,0x9b]
+# CHECK-EL: div    $zero, $9, $7  # encoding: [0xe9,0x00,0x3c,0xab]
+# CHECK-EL: divu   $zero, $9, $7  # encoding: [0xe9,0x00,0x3c,0xbb]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: add $9, $6, $7        # encoding: [0x00,0xe6,0x49,0x10]
+# CHECK-EB: addi  $9, $6, 17767   # encoding: [0x11,0x26,0x45,0x67]
+# CHECK-EB: addiu $9, $6, -15001  # encoding: [0x31,0x26,0xc5,0x67]
+# CHECK-EB: addi  $9, $6, 17767   # encoding: [0x11,0x26,0x45,0x67]
+# CHECK-EB: addiu $9, $6, -15001  # encoding: [0x31,0x26,0xc5,0x67]
+# CHECK-EB: addu  $9, $6, $7      # encoding: [0x00,0xe6,0x49,0x50]
+# CHECK-EB: sub $9, $6, $7        # encoding: [0x00,0xe6,0x49,0x90]
+# CHECK-EB: subu  $4, $3, $5      # encoding: [0x00,0xa3,0x21,0xd0]
+# CHECK-EB: neg $6, $7            # encoding: [0x00,0xe0,0x31,0x90]
+# CHECK-EB: negu  $6, $7          # encoding: [0x00,0xe0,0x31,0xd0]
+# CHECK-EB: move  $7, $8          # encoding: [0x00,0x08,0x39,0x50]
+# CHECK-EB: slt $3, $3, $5        # encoding: [0x00,0xa3,0x1b,0x50]
+# CHECK-EB: slti  $3, $3, 103     # encoding: [0x90,0x63,0x00,0x67]
+# CHECK-EB: slti  $3, $3, 103     # encoding: [0x90,0x63,0x00,0x67]
+# CHECK-EB: sltiu $3, $3, 103     # encoding: [0xb0,0x63,0x00,0x67]
+# CHECK-EB: sltu  $3, $3, $5      # encoding: [0x00,0xa3,0x1b,0x90]
+# CHECK-EB: lui $9, 17767         # encoding: [0x41,0xa9,0x45,0x67]
+# CHECK-EB: and $9, $6, $7        # encoding: [0x00,0xe6,0x4a,0x50]
+# CHECK-EB:  andi  $9, $6, 17767  # encoding: [0xd1,0x26,0x45,0x67]
+# CHECK-EB:  andi  $9, $6, 17767  # encoding: [0xd1,0x26,0x45,0x67]
+# CHECK-EB:  or  $3, $4, $5       # encoding: [0x00,0xa4,0x1a,0x90]
+# CHECK-EB:  ori $9, $6, 17767    # encoding: [0x51,0x26,0x45,0x67]
+# CHECK-EB:  xor $3, $3, $5       # encoding: [0x00,0xa3,0x1b,0x10]
+# CHECK-EB:  xori  $9, $6, 17767  # encoding: [0x71,0x26,0x45,0x67]
+# CHECK-EB:  xori  $9, $6, 17767  # encoding: [0x71,0x26,0x45,0x67]
+# CHECK-EB:  nor $9, $6, $7       # encoding: [0x00,0xe6,0x4a,0xd0]
+# CHECK-EB:  not $7, $8           # encoding: [0x00,0x08,0x3a,0xd0]
+# CHECK-EB:  mul $9, $6, $7       # encoding: [0x00,0xe6,0x4a,0x10]
+# CHECK-EB:  mult  $9, $7         # encoding: [0x00,0xe9,0x8b,0x3c]
+# CHECK-EB:  multu $9, $7         # encoding: [0x00,0xe9,0x9b,0x3c]
+# CHECK-EB: div  $zero, $9, $7    # encoding: [0x00,0xe9,0xab,0x3c]
+# CHECK-EB: divu $zero, $9, $7    # encoding: [0x00,0xe9,0xbb,0x3c]
     add    $9, $6, $7
     add    $9, $6, 17767
     addu   $9, $6, -15001
@@ -49,6 +90,7 @@
     slti   $3, $3, 103
     sltiu  $3, $3, 103
     sltu   $3, $3, $5
+    lui    $9, 17767
     and    $9, $6, $7
     and    $9, $6, 17767
     andi   $9, $6, 17767
@@ -62,3 +104,5 @@
     mul    $9, $6, $7
     mult   $9, $7
     multu  $9, $7
+    div    $0, $9, $7
+    divu   $0, $9, $7
diff --git a/test/MC/Mips/micromips-branch-instructions.s b/test/MC/Mips/micromips-branch-instructions.s
new file mode 100644
index 000000000000..84df2a17c83c
--- /dev/null
+++ b/test/MC/Mips/micromips-branch-instructions.s
@@ -0,0 +1,65 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EL
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EB
+# Check that the assembler can handle the documented syntax
+# for arithmetic and logical instructions.
+#------------------------------------------------------------------------------
+# Branch Instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: b 1332               # encoding: [0x00,0x94,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: beq $9, $6, 1332     # encoding: [0xc9,0x94,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bgez $6, 1332        # encoding: [0x46,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bgezal $6, 1332      # encoding: [0x66,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bltzal $6, 1332      # encoding: [0x26,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bgtz $6, 1332        # encoding: [0xc6,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: blez $6, 1332        # encoding: [0x86,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bne $9, $6, 1332     # encoding: [0xc9,0xb4,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bal 1332             # encoding: [0x60,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bltz $6, 1332        # encoding: [0x06,0x40,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: b 1332               # encoding: [0x94,0x00,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: beq $9, $6, 1332     # encoding: [0x94,0xc9,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bgez $6, 1332        # encoding: [0x40,0x46,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bgezal $6, 1332      # encoding: [0x40,0x66,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bltzal $6, 1332      # encoding: [0x40,0x26,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bgtz $6, 1332        # encoding: [0x40,0xc6,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: blez $6, 1332        # encoding: [0x40,0x86,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bne $9, $6, 1332     # encoding: [0xb4,0xc9,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bal 1332             # encoding: [0x40,0x60,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bltz $6, 1332        # encoding: [0x40,0x06,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+
+     b      1332
+     beq    $9,$6,1332
+     bgez   $6,1332
+     bgezal $6,1332
+     bltzal $6,1332
+     bgtz   $6,1332
+     blez   $6,1332
+     bne    $9,$6,1332
+     bal    1332
+     bltz   $6,1332
diff --git a/test/MC/Mips/micromips-branch16.s b/test/MC/Mips/micromips-branch16.s
new file mode 100644
index 000000000000..321ee8640f70
--- /dev/null
+++ b/test/MC/Mips/micromips-branch16.s
@@ -0,0 +1,69 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: b           bar # encoding: [A,0x94'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: beq $3, $4, bar # encoding: [0x83'A',0x94'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bne $3, $4, bar # encoding: [0x83'A',0xb4'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bgez    $4, bar # encoding: [0x44'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bgtz    $4, bar # encoding: [0xc4'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: blez    $4, bar # encoding: [0x84'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bltz    $4, bar # encoding: [0x04'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bgezal  $4, bar # encoding: [0x64'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bltzal  $4, bar # encoding: [0x24'A',0x40'A',0x00,0x00]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC16_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC16_S1
+# CHECK-ELF: ]
+
+  b       bar
+  beq     $3, $4, bar
+  bne     $3, $4, bar
+  bgez    $4, bar
+  bgtz    $4, bar
+  blez    $4, bar
+  bltz    $4, bar
+  bgezal  $4, bar
+  bltzal  $4, bar
diff --git a/test/MC/Mips/micromips-expansions.s b/test/MC/Mips/micromips-expansions.s
new file mode 100644
index 000000000000..af4d3b579003
--- /dev/null
+++ b/test/MC/Mips/micromips-expansions.s
@@ -0,0 +1,57 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mcpu=mips32r2 -mattr=micromips | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for macro instructions
+#------------------------------------------------------------------------------
+# Load immediate instructions
+#------------------------------------------------------------------------------
+# CHECK: ori   $5, $zero, 123        # encoding: [0xa0,0x50,0x7b,0x00]
+# CHECK: addiu $6, $zero, -2345      # encoding: [0xc0,0x30,0xd7,0xf6]
+# CHECK: lui   $7, 1                 # encoding: [0xa7,0x41,0x01,0x00]
+# CHECK: ori   $7, $7, 2             # encoding: [0xe7,0x50,0x02,0x00]
+# CHECK: addiu $4, $zero, 20         # encoding: [0x80,0x30,0x14,0x00]
+# CHECK: lui   $7, 1                 # encoding: [0xa7,0x41,0x01,0x00]
+# CHECK: ori   $7, $7, 2             # encoding: [0xe7,0x50,0x02,0x00]
+# CHECK: addiu $4, $5, 20            # encoding: [0x85,0x30,0x14,0x00]
+# CHECK: lui   $7, 1                 # encoding: [0xa7,0x41,0x01,0x00]
+# CHECK: ori   $7, $7, 2             # encoding: [0xe7,0x50,0x02,0x00]
+# CHECK: addu  $7, $7, $8            # encoding: [0x07,0x01,0x50,0x39]
+# CHECK: lui   $10, %hi(symbol)      # encoding: [0xaa'A',0x41'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_HI,
+# CHECK:                               kind: fixup_MICROMIPS_HI16
+# CHECK: addu  $10, $10, $4          # encoding: [0x8a,0x00,0x50,0x51]
+# CHECK: lw    $10, %lo(symbol)($10) # encoding: [0x4a'A',0xfd'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_LO,
+# CHECK:                               kind: fixup_MICROMIPS_LO16
+# CHECK: lui   $1, %hi(symbol)       # encoding: [0xa1'A',0x41'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_HI,
+# CHECK:                               kind: fixup_MICROMIPS_HI16
+# CHECK: addu  $1, $1, $9            # encoding: [0x21,0x01,0x50,0x09]
+# CHECK: sw    $10, %lo(symbol)($1)  # encoding: [0x41'A',0xf9'A',0x00,0x00]
+# CHECK:                             # fixup A - offset: 0,
+# CHECK:                               value: symbol@ABS_LO,
+# CHECK:                               kind: fixup_MICROMIPS_LO16
+# CHECK: lui   $10, 10               # encoding: [0xaa,0x41,0x0a,0x00]
+# CHECK: addu  $10, $10, $4          # encoding: [0x8a,0x00,0x50,0x51]
+# CHECK: lw    $10, 123($10)         # encoding: [0x4a,0xfd,0x7b,0x00]
+# CHECK: lui   $1, 2                 # encoding: [0xa1,0x41,0x02,0x00]
+# CHECK: addu  $1, $1, $9            # encoding: [0x21,0x01,0x50,0x09]
+# CHECK: sw    $10, 57920($1)        # encoding: [0x41,0xf9,0x40,0xe2]
+
+    li $5,123
+    li $6,-2345
+    li $7,65538
+
+    la $a0, 20
+    la $7,65538
+    la $a0, 20($a1)
+    la $7,65538($8)
+
+    lw  $t2, symbol($a0)
+    sw  $t2, symbol($t1)
+
+    lw  $t2, 655483($a0)
+    sw  $t2, 123456($t1)
diff --git a/test/MC/Mips/micromips-expressions.s b/test/MC/Mips/micromips-expressions.s
new file mode 100644
index 000000000000..509e98072bb4
--- /dev/null
+++ b/test/MC/Mips/micromips-expressions.s
@@ -0,0 +1,35 @@
+# RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mcpu=mips32r2 -mattr=micromips | FileCheck %s
+# Check that the assembler can handle the expressions as operands.
+# CHECK:  .text
+# CHECK:  .globl  foo
+# CHECK:  foo:
+# CHECK:  lw   $4, %lo(foo)($4)    # encoding: [0x84'A',0xfc'A',0x00,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  lw   $4, 56($4)          # encoding: [0x84,0xfc,0x38,0x00]
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x84'A',0xfc'A',0x08,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x84'A',0xfc'A',0x08,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x84'A',0xfc'A',0x08,0x00]
+# CHECK:                           # fixup A - offset: 0,
+# CHECK:                             value: foo@ABS_LO,
+# CHECK:                             kind: fixup_MICROMIPS_LO16
+# CHECK:  .space  64
+
+  .globl  foo
+  .ent  foo
+foo:
+  lw  $4,%lo(foo)($4)
+  lw  $4,((10 + 4) * 4)($4)
+  lw  $4,%lo (2 * 4) + foo($4)
+  lw  $4,%lo((2 * 4) + foo)($4)
+  lw  $4,(((%lo ((2 * 4) + foo))))($4)
+  .space  64
+  .end  foo
diff --git a/test/MC/Mips/micromips-jump-instructions.s b/test/MC/Mips/micromips-jump-instructions.s
new file mode 100644
index 000000000000..6f571b687911
--- /dev/null
+++ b/test/MC/Mips/micromips-jump-instructions.s
@@ -0,0 +1,40 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EL
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck %s -check-prefix=CHECK-EB
+# Check that the assembler can handle the documented syntax
+# for jump and branch instructions.
+#------------------------------------------------------------------------------
+# Jump instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: j 1328      # encoding: [0x00,0xd4,0x98,0x02]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jal 1328    # encoding: [0x00,0xf4,0x98,0x02]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jalr $6     # encoding: [0xe6,0x03,0x3c,0x0f]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jr $7       # encoding: [0x07,0x00,0x3c,0x0f]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jr $7       # encoding: [0x07,0x00,0x3c,0x0f]
+# CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: j 1328      # encoding: [0xd4,0x00,0x02,0x98]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jal 1328    # encoding: [0xf4,0x00,0x02,0x98]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jalr $6     # encoding: [0x03,0xe6,0x0f,0x3c]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jr $7       # encoding: [0x00,0x07,0x0f,0x3c]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jr $7       # encoding: [0x00,0x07,0x0f,0x3c]
+# CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+
+     j 1328
+     jal 1328
+     jalr $6
+     jr $7
+     j $7
diff --git a/test/MC/Mips/micromips-jump26.s b/test/MC/Mips/micromips-jump26.s
new file mode 100644
index 000000000000..936a9984ab0a
--- /dev/null
+++ b/test/MC/Mips/micromips-jump26.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: foo:
+# CHECK-FIXUP:   jal bar # encoding: [A,0xf4'A',A,0b000000AA]
+# CHECK-FIXUP:           #   fixup A - offset: 0,
+# CHECK-FIXUP:               value: bar, kind: fixup_MICROMIPS_26_S1
+# CHECK-FIXUP:   nop     # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_26_S1
+# CHECK-ELF: ]
+
+foo:
+  jal bar
diff --git a/test/MC/Mips/micromips-loadstore-instructions.s b/test/MC/Mips/micromips-loadstore-instructions.s
index 623e2acdcbf6..cc7514b3231a 100644
--- a/test/MC/Mips/micromips-loadstore-instructions.s
+++ b/test/MC/Mips/micromips-loadstore-instructions.s
@@ -1,17 +1,31 @@
-# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
 # Check that the assembler can handle the documented syntax
 # for load and store instructions.
 #------------------------------------------------------------------------------
 # Load and Store Instructions
 #------------------------------------------------------------------------------
-# CHECK: lb     $5, 8($4)      # encoding: [0x08,0x00,0xa4,0x1c]
-# CHECK: lbu    $6, 8($4)      # encoding: [0x08,0x00,0xc4,0x14]
-# CHECK: lh     $2, 8($4)      # encoding: [0x08,0x00,0x44,0x3c]
-# CHECK: lhu    $4, 8($2)      # encoding: [0x08,0x00,0x82,0x34]
-# CHECK: lw     $6, 4($5)      # encoding: [0x04,0x00,0xc5,0xfc]
-# CHECK: sb     $5, 8($4)      # encoding: [0x08,0x00,0xa4,0x18]
-# CHECK: sh     $2, 8($4)      # encoding: [0x08,0x00,0x44,0x38]
-# CHECK: sw     $5, 4($6)      # encoding: [0x04,0x00,0xa6,0xf8]
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: lb     $5, 8($4)      # encoding: [0xa4,0x1c,0x08,0x00]
+# CHECK-EL: lbu    $6, 8($4)      # encoding: [0xc4,0x14,0x08,0x00]
+# CHECK-EL: lh     $2, 8($4)      # encoding: [0x44,0x3c,0x08,0x00]
+# CHECK-EL: lhu    $4, 8($2)      # encoding: [0x82,0x34,0x08,0x00]
+# CHECK-EL: lw     $6, 4($5)      # encoding: [0xc5,0xfc,0x04,0x00]
+# CHECK-EL: sb     $5, 8($4)      # encoding: [0xa4,0x18,0x08,0x00]
+# CHECK-EL: sh     $2, 8($4)      # encoding: [0x44,0x38,0x08,0x00]
+# CHECK-EL: sw     $5, 4($6)      # encoding: [0xa6,0xf8,0x04,0x00]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: lb     $5, 8($4)      # encoding: [0x1c,0xa4,0x00,0x08]
+# CHECK-EB: lbu    $6, 8($4)      # encoding: [0x14,0xc4,0x00,0x08]
+# CHECK-EB: lh     $2, 8($4)      # encoding: [0x3c,0x44,0x00,0x08]
+# CHECK-EB: lhu    $4, 8($2)      # encoding: [0x34,0x82,0x00,0x08]
+# CHECK-EB: lw     $6, 4($5)      # encoding: [0xfc,0xc5,0x00,0x04]
+# CHECK-EB: sb     $5, 8($4)      # encoding: [0x18,0xa4,0x00,0x08]
+# CHECK-EB: sh     $2, 8($4)      # encoding: [0x38,0x44,0x00,0x08]
+# CHECK-EB: sw     $5, 4($6)      # encoding: [0xf8,0xa6,0x00,0x04]
      lb     $5, 8($4)
      lbu    $6, 8($4)
      lh     $2, 8($4)
diff --git a/test/MC/Mips/micromips-loadstore-unaligned.s b/test/MC/Mips/micromips-loadstore-unaligned.s
new file mode 100644
index 000000000000..ab1d8b935a10
--- /dev/null
+++ b/test/MC/Mips/micromips-loadstore-unaligned.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for loads and stores.
+#------------------------------------------------------------------------------
+# Load and Store unaligned instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: lwl $4, 16($5)   # encoding: [0x85,0x60,0x10,0x00]
+# CHECK-EL: lwr $4, 16($5)   # encoding: [0x85,0x60,0x10,0x10]
+# CHECK-EL: swl $4, 16($5)   # encoding: [0x85,0x60,0x10,0x80]
+# CHECK-EL: swr $4, 16($5)   # encoding: [0x85,0x60,0x10,0x90]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: lwl $4, 16($5)   # encoding: [0x60,0x85,0x00,0x10]
+# CHECK-EB: lwr $4, 16($5)   # encoding: [0x60,0x85,0x10,0x10]
+# CHECK-EB: swl $4, 16($5)   # encoding: [0x60,0x85,0x80,0x10]
+# CHECK-EB: swr $4, 16($5)   # encoding: [0x60,0x85,0x90,0x10]
+     lwl  $4, 16($5)
+     lwr  $4, 16($5)
+     swl  $4, 16($5)
+     swr  $4, 16($5)
diff --git a/test/MC/Mips/micromips-long-branch.ll b/test/MC/Mips/micromips-long-branch.ll
new file mode 100644
index 000000000000..3267f4a729ac
--- /dev/null
+++ b/test/MC/Mips/micromips-long-branch.ll
@@ -0,0 +1,16437 @@
+; RUN: llc %s -march=mipsel -mcpu=mips32r2 -mattr=micromips -filetype=asm \
+; RUN: -relocation-model=pic -O3 -o - | FileCheck %s
+
+@a = common global [10 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK:  addiu $sp, $sp, -8
+; CHECK:  sw  $ra, 0($sp)
+; CHECK:  lui $[[REG1:[0-9]+]], 2
+; CHECK:  addiu $[[REG1]], $[[REG1]], 0
+; CHECK:  addu  $[[REG1]], $ra, $[[REG1]]
+; CHECK:  lw  $ra, 0($sp)
+; CHECK:  jr  $[[REG1]]
+; CHECK:  addiu $sp, $sp, 8
+
+for.body:
+  %1 = load i32* %i, align 4
+  %2 = load i32* %i, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @a, i32 0, i64 %idxprom
+  store i32 %1, i32* %arrayidx, align 4  %nop0 = alloca i1, i1 0
+  %nop1 = alloca i1, i1 0
+  %nop2 = alloca i1, i1 0
+  %nop3 = alloca i1, i1 0
+  %nop4 = alloca i1, i1 0
+  %nop5 = alloca i1, i1 0
+  %nop6 = alloca i1, i1 0
+  %nop7 = alloca i1, i1 0
+  %nop8 = alloca i1, i1 0
+  %nop9 = alloca i1, i1 0
+  %nop10 = alloca i1, i1 0
+  %nop11 = alloca i1, i1 0
+  %nop12 = alloca i1, i1 0
+  %nop13 = alloca i1, i1 0
+  %nop14 = alloca i1, i1 0
+  %nop15 = alloca i1, i1 0
+  %nop16 = alloca i1, i1 0
+  %nop17 = alloca i1, i1 0
+  %nop18 = alloca i1, i1 0
+  %nop19 = alloca i1, i1 0
+  %nop20 = alloca i1, i1 0
+  %nop21 = alloca i1, i1 0
+  %nop22 = alloca i1, i1 0
+  %nop23 = alloca i1, i1 0
+  %nop24 = alloca i1, i1 0
+  %nop25 = alloca i1, i1 0
+  %nop26 = alloca i1, i1 0
+  %nop27 = alloca i1, i1 0
+  %nop28 = alloca i1, i1 0
+  %nop29 = alloca i1, i1 0
+  %nop30 = alloca i1, i1 0
+  %nop31 = alloca i1, i1 0
+  %nop32 = alloca i1, i1 0
+  %nop33 = alloca i1, i1 0
+  %nop34 = alloca i1, i1 0
+  %nop35 = alloca i1, i1 0
+  %nop36 = alloca i1, i1 0
+  %nop37 = alloca i1, i1 0
+  %nop38 = alloca i1, i1 0
+  %nop39 = alloca i1, i1 0
+  %nop40 = alloca i1, i1 0
+  %nop41 = alloca i1, i1 0
+  %nop42 = alloca i1, i1 0
+  %nop43 = alloca i1, i1 0
+  %nop44 = alloca i1, i1 0
+  %nop45 = alloca i1, i1 0
+  %nop46 = alloca i1, i1 0
+  %nop47 = alloca i1, i1 0
+  %nop48 = alloca i1, i1 0
+  %nop49 = alloca i1, i1 0
+  %nop50 = alloca i1, i1 0
+  %nop51 = alloca i1, i1 0
+  %nop52 = alloca i1, i1 0
+  %nop53 = alloca i1, i1 0
+  %nop54 = alloca i1, i1 0
+  %nop55 = alloca i1, i1 0
+  %nop56 = alloca i1, i1 0
+  %nop57 = alloca i1, i1 0
+  %nop58 = alloca i1, i1 0
+  %nop59 = alloca i1, i1 0
+  %nop60 = alloca i1, i1 0
+  %nop61 = alloca i1, i1 0
+  %nop62 = alloca i1, i1 0
+  %nop63 = alloca i1, i1 0
+  %nop64 = alloca i1, i1 0
+  %nop65 = alloca i1, i1 0
+  %nop66 = alloca i1, i1 0
+  %nop67 = alloca i1, i1 0
+  %nop68 = alloca i1, i1 0
+  %nop69 = alloca i1, i1 0
+  %nop70 = alloca i1, i1 0
+  %nop71 = alloca i1, i1 0
+  %nop72 = alloca i1, i1 0
+  %nop73 = alloca i1, i1 0
+  %nop74 = alloca i1, i1 0
+  %nop75 = alloca i1, i1 0
+  %nop76 = alloca i1, i1 0
+  %nop77 = alloca i1, i1 0
+  %nop78 = alloca i1, i1 0
+  %nop79 = alloca i1, i1 0
+  %nop80 = alloca i1, i1 0
+  %nop81 = alloca i1, i1 0
+  %nop82 = alloca i1, i1 0
+  %nop83 = alloca i1, i1 0
+  %nop84 = alloca i1, i1 0
+  %nop85 = alloca i1, i1 0
+  %nop86 = alloca i1, i1 0
+  %nop87 = alloca i1, i1 0
+  %nop88 = alloca i1, i1 0
+  %nop89 = alloca i1, i1 0
+  %nop90 = alloca i1, i1 0
+  %nop91 = alloca i1, i1 0
+  %nop92 = alloca i1, i1 0
+  %nop93 = alloca i1, i1 0
+  %nop94 = alloca i1, i1 0
+  %nop95 = alloca i1, i1 0
+  %nop96 = alloca i1, i1 0
+  %nop97 = alloca i1, i1 0
+  %nop98 = alloca i1, i1 0
+  %nop99 = alloca i1, i1 0
+  %nop100 = alloca i1, i1 0
+  %nop101 = alloca i1, i1 0
+  %nop102 = alloca i1, i1 0
+  %nop103 = alloca i1, i1 0
+  %nop104 = alloca i1, i1 0
+  %nop105 = alloca i1, i1 0
+  %nop106 = alloca i1, i1 0
+  %nop107 = alloca i1, i1 0
+  %nop108 = alloca i1, i1 0
+  %nop109 = alloca i1, i1 0
+  %nop110 = alloca i1, i1 0
+  %nop111 = alloca i1, i1 0
+  %nop112 = alloca i1, i1 0
+  %nop113 = alloca i1, i1 0
+  %nop114 = alloca i1, i1 0
+  %nop115 = alloca i1, i1 0
+  %nop116 = alloca i1, i1 0
+  %nop117 = alloca i1, i1 0
+  %nop118 = alloca i1, i1 0
+  %nop119 = alloca i1, i1 0
+  %nop120 = alloca i1, i1 0
+  %nop121 = alloca i1, i1 0
+  %nop122 = alloca i1, i1 0
+  %nop123 = alloca i1, i1 0
+  %nop124 = alloca i1, i1 0
+  %nop125 = alloca i1, i1 0
+  %nop126 = alloca i1, i1 0
+  %nop127 = alloca i1, i1 0
+  %nop128 = alloca i1, i1 0
+  %nop129 = alloca i1, i1 0
+  %nop130 = alloca i1, i1 0
+  %nop131 = alloca i1, i1 0
+  %nop132 = alloca i1, i1 0
+  %nop133 = alloca i1, i1 0
+  %nop134 = alloca i1, i1 0
+  %nop135 = alloca i1, i1 0
+  %nop136 = alloca i1, i1 0
+  %nop137 = alloca i1, i1 0
+  %nop138 = alloca i1, i1 0
+  %nop139 = alloca i1, i1 0
+  %nop140 = alloca i1, i1 0
+  %nop141 = alloca i1, i1 0
+  %nop142 = alloca i1, i1 0
+  %nop143 = alloca i1, i1 0
+  %nop144 = alloca i1, i1 0
+  %nop145 = alloca i1, i1 0
+  %nop146 = alloca i1, i1 0
+  %nop147 = alloca i1, i1 0
+  %nop148 = alloca i1, i1 0
+  %nop149 = alloca i1, i1 0
+  %nop150 = alloca i1, i1 0
+  %nop151 = alloca i1, i1 0
+  %nop152 = alloca i1, i1 0
+  %nop153 = alloca i1, i1 0
+  %nop154 = alloca i1, i1 0
+  %nop155 = alloca i1, i1 0
+  %nop156 = alloca i1, i1 0
+  %nop157 = alloca i1, i1 0
+  %nop158 = alloca i1, i1 0
+  %nop159 = alloca i1, i1 0
+  %nop160 = alloca i1, i1 0
+  %nop161 = alloca i1, i1 0
+  %nop162 = alloca i1, i1 0
+  %nop163 = alloca i1, i1 0
+  %nop164 = alloca i1, i1 0
+  %nop165 = alloca i1, i1 0
+  %nop166 = alloca i1, i1 0
+  %nop167 = alloca i1, i1 0
+  %nop168 = alloca i1, i1 0
+  %nop169 = alloca i1, i1 0
+  %nop170 = alloca i1, i1 0
+  %nop171 = alloca i1, i1 0
+  %nop172 = alloca i1, i1 0
+  %nop173 = alloca i1, i1 0
+  %nop174 = alloca i1, i1 0
+  %nop175 = alloca i1, i1 0
+  %nop176 = alloca i1, i1 0
+  %nop177 = alloca i1, i1 0
+  %nop178 = alloca i1, i1 0
+  %nop179 = alloca i1, i1 0
+  %nop180 = alloca i1, i1 0
+  %nop181 = alloca i1, i1 0
+  %nop182 = alloca i1, i1 0
+  %nop183 = alloca i1, i1 0
+  %nop184 = alloca i1, i1 0
+  %nop185 = alloca i1, i1 0
+  %nop186 = alloca i1, i1 0
+  %nop187 = alloca i1, i1 0
+  %nop188 = alloca i1, i1 0
+  %nop189 = alloca i1, i1 0
+  %nop190 = alloca i1, i1 0
+  %nop191 = alloca i1, i1 0
+  %nop192 = alloca i1, i1 0
+  %nop193 = alloca i1, i1 0
+  %nop194 = alloca i1, i1 0
+  %nop195 = alloca i1, i1 0
+  %nop196 = alloca i1, i1 0
+  %nop197 = alloca i1, i1 0
+  %nop198 = alloca i1, i1 0
+  %nop199 = alloca i1, i1 0
+  %nop200 = alloca i1, i1 0
+  %nop201 = alloca i1, i1 0
+  %nop202 = alloca i1, i1 0
+  %nop203 = alloca i1, i1 0
+  %nop204 = alloca i1, i1 0
+  %nop205 = alloca i1, i1 0
+  %nop206 = alloca i1, i1 0
+  %nop207 = alloca i1, i1 0
+  %nop208 = alloca i1, i1 0
+  %nop209 = alloca i1, i1 0
+  %nop210 = alloca i1, i1 0
+  %nop211 = alloca i1, i1 0
+  %nop212 = alloca i1, i1 0
+  %nop213 = alloca i1, i1 0
+  %nop214 = alloca i1, i1 0
+  %nop215 = alloca i1, i1 0
+  %nop216 = alloca i1, i1 0
+  %nop217 = alloca i1, i1 0
+  %nop218 = alloca i1, i1 0
+  %nop219 = alloca i1, i1 0
+  %nop220 = alloca i1, i1 0
+  %nop221 = alloca i1, i1 0
+  %nop222 = alloca i1, i1 0
+  %nop223 = alloca i1, i1 0
+  %nop224 = alloca i1, i1 0
+  %nop225 = alloca i1, i1 0
+  %nop226 = alloca i1, i1 0
+  %nop227 = alloca i1, i1 0
+  %nop228 = alloca i1, i1 0
+  %nop229 = alloca i1, i1 0
+  %nop230 = alloca i1, i1 0
+  %nop231 = alloca i1, i1 0
+  %nop232 = alloca i1, i1 0
+  %nop233 = alloca i1, i1 0
+  %nop234 = alloca i1, i1 0
+  %nop235 = alloca i1, i1 0
+  %nop236 = alloca i1, i1 0
+  %nop237 = alloca i1, i1 0
+  %nop238 = alloca i1, i1 0
+  %nop239 = alloca i1, i1 0
+  %nop240 = alloca i1, i1 0
+  %nop241 = alloca i1, i1 0
+  %nop242 = alloca i1, i1 0
+  %nop243 = alloca i1, i1 0
+  %nop244 = alloca i1, i1 0
+  %nop245 = alloca i1, i1 0
+  %nop246 = alloca i1, i1 0
+  %nop247 = alloca i1, i1 0
+  %nop248 = alloca i1, i1 0
+  %nop249 = alloca i1, i1 0
+  %nop250 = alloca i1, i1 0
+  %nop251 = alloca i1, i1 0
+  %nop252 = alloca i1, i1 0
+  %nop253 = alloca i1, i1 0
+  %nop254 = alloca i1, i1 0
+  %nop255 = alloca i1, i1 0
+  %nop256 = alloca i1, i1 0
+  %nop257 = alloca i1, i1 0
+  %nop258 = alloca i1, i1 0
+  %nop259 = alloca i1, i1 0
+  %nop260 = alloca i1, i1 0
+  %nop261 = alloca i1, i1 0
+  %nop262 = alloca i1, i1 0
+  %nop263 = alloca i1, i1 0
+  %nop264 = alloca i1, i1 0
+  %nop265 = alloca i1, i1 0
+  %nop266 = alloca i1, i1 0
+  %nop267 = alloca i1, i1 0
+  %nop268 = alloca i1, i1 0
+  %nop269 = alloca i1, i1 0
+  %nop270 = alloca i1, i1 0
+  %nop271 = alloca i1, i1 0
+  %nop272 = alloca i1, i1 0
+  %nop273 = alloca i1, i1 0
+  %nop274 = alloca i1, i1 0
+  %nop275 = alloca i1, i1 0
+  %nop276 = alloca i1, i1 0
+  %nop277 = alloca i1, i1 0
+  %nop278 = alloca i1, i1 0
+  %nop279 = alloca i1, i1 0
+  %nop280 = alloca i1, i1 0
+  %nop281 = alloca i1, i1 0
+  %nop282 = alloca i1, i1 0
+  %nop283 = alloca i1, i1 0
+  %nop284 = alloca i1, i1 0
+  %nop285 = alloca i1, i1 0
+  %nop286 = alloca i1, i1 0
+  %nop287 = alloca i1, i1 0
+  %nop288 = alloca i1, i1 0
+  %nop289 = alloca i1, i1 0
+  %nop290 = alloca i1, i1 0
+  %nop291 = alloca i1, i1 0
+  %nop292 = alloca i1, i1 0
+  %nop293 = alloca i1, i1 0
+  %nop294 = alloca i1, i1 0
+  %nop295 = alloca i1, i1 0
+  %nop296 = alloca i1, i1 0
+  %nop297 = alloca i1, i1 0
+  %nop298 = alloca i1, i1 0
+  %nop299 = alloca i1, i1 0
+  %nop300 = alloca i1, i1 0
+  %nop301 = alloca i1, i1 0
+  %nop302 = alloca i1, i1 0
+  %nop303 = alloca i1, i1 0
+  %nop304 = alloca i1, i1 0
+  %nop305 = alloca i1, i1 0
+  %nop306 = alloca i1, i1 0
+  %nop307 = alloca i1, i1 0
+  %nop308 = alloca i1, i1 0
+  %nop309 = alloca i1, i1 0
+  %nop310 = alloca i1, i1 0
+  %nop311 = alloca i1, i1 0
+  %nop312 = alloca i1, i1 0
+  %nop313 = alloca i1, i1 0
+  %nop314 = alloca i1, i1 0
+  %nop315 = alloca i1, i1 0
+  %nop316 = alloca i1, i1 0
+  %nop317 = alloca i1, i1 0
+  %nop318 = alloca i1, i1 0
+  %nop319 = alloca i1, i1 0
+  %nop320 = alloca i1, i1 0
+  %nop321 = alloca i1, i1 0
+  %nop322 = alloca i1, i1 0
+  %nop323 = alloca i1, i1 0
+  %nop324 = alloca i1, i1 0
+  %nop325 = alloca i1, i1 0
+  %nop326 = alloca i1, i1 0
+  %nop327 = alloca i1, i1 0
+  %nop328 = alloca i1, i1 0
+  %nop329 = alloca i1, i1 0
+  %nop330 = alloca i1, i1 0
+  %nop331 = alloca i1, i1 0
+  %nop332 = alloca i1, i1 0
+  %nop333 = alloca i1, i1 0
+  %nop334 = alloca i1, i1 0
+  %nop335 = alloca i1, i1 0
+  %nop336 = alloca i1, i1 0
+  %nop337 = alloca i1, i1 0
+  %nop338 = alloca i1, i1 0
+  %nop339 = alloca i1, i1 0
+  %nop340 = alloca i1, i1 0
+  %nop341 = alloca i1, i1 0
+  %nop342 = alloca i1, i1 0
+  %nop343 = alloca i1, i1 0
+  %nop344 = alloca i1, i1 0
+  %nop345 = alloca i1, i1 0
+  %nop346 = alloca i1, i1 0
+  %nop347 = alloca i1, i1 0
+  %nop348 = alloca i1, i1 0
+  %nop349 = alloca i1, i1 0
+  %nop350 = alloca i1, i1 0
+  %nop351 = alloca i1, i1 0
+  %nop352 = alloca i1, i1 0
+  %nop353 = alloca i1, i1 0
+  %nop354 = alloca i1, i1 0
+  %nop355 = alloca i1, i1 0
+  %nop356 = alloca i1, i1 0
+  %nop357 = alloca i1, i1 0
+  %nop358 = alloca i1, i1 0
+  %nop359 = alloca i1, i1 0
+  %nop360 = alloca i1, i1 0
+  %nop361 = alloca i1, i1 0
+  %nop362 = alloca i1, i1 0
+  %nop363 = alloca i1, i1 0
+  %nop364 = alloca i1, i1 0
+  %nop365 = alloca i1, i1 0
+  %nop366 = alloca i1, i1 0
+  %nop367 = alloca i1, i1 0
+  %nop368 = alloca i1, i1 0
+  %nop369 = alloca i1, i1 0
+  %nop370 = alloca i1, i1 0
+  %nop371 = alloca i1, i1 0
+  %nop372 = alloca i1, i1 0
+  %nop373 = alloca i1, i1 0
+  %nop374 = alloca i1, i1 0
+  %nop375 = alloca i1, i1 0
+  %nop376 = alloca i1, i1 0
+  %nop377 = alloca i1, i1 0
+  %nop378 = alloca i1, i1 0
+  %nop379 = alloca i1, i1 0
+  %nop380 = alloca i1, i1 0
+  %nop381 = alloca i1, i1 0
+  %nop382 = alloca i1, i1 0
+  %nop383 = alloca i1, i1 0
+  %nop384 = alloca i1, i1 0
+  %nop385 = alloca i1, i1 0
+  %nop386 = alloca i1, i1 0
+  %nop387 = alloca i1, i1 0
+  %nop388 = alloca i1, i1 0
+  %nop389 = alloca i1, i1 0
+  %nop390 = alloca i1, i1 0
+  %nop391 = alloca i1, i1 0
+  %nop392 = alloca i1, i1 0
+  %nop393 = alloca i1, i1 0
+  %nop394 = alloca i1, i1 0
+  %nop395 = alloca i1, i1 0
+  %nop396 = alloca i1, i1 0
+  %nop397 = alloca i1, i1 0
+  %nop398 = alloca i1, i1 0
+  %nop399 = alloca i1, i1 0
+  %nop400 = alloca i1, i1 0
+  %nop401 = alloca i1, i1 0
+  %nop402 = alloca i1, i1 0
+  %nop403 = alloca i1, i1 0
+  %nop404 = alloca i1, i1 0
+  %nop405 = alloca i1, i1 0
+  %nop406 = alloca i1, i1 0
+  %nop407 = alloca i1, i1 0
+  %nop408 = alloca i1, i1 0
+  %nop409 = alloca i1, i1 0
+  %nop410 = alloca i1, i1 0
+  %nop411 = alloca i1, i1 0
+  %nop412 = alloca i1, i1 0
+  %nop413 = alloca i1, i1 0
+  %nop414 = alloca i1, i1 0
+  %nop415 = alloca i1, i1 0
+  %nop416 = alloca i1, i1 0
+  %nop417 = alloca i1, i1 0
+  %nop418 = alloca i1, i1 0
+  %nop419 = alloca i1, i1 0
+  %nop420 = alloca i1, i1 0
+  %nop421 = alloca i1, i1 0
+  %nop422 = alloca i1, i1 0
+  %nop423 = alloca i1, i1 0
+  %nop424 = alloca i1, i1 0
+  %nop425 = alloca i1, i1 0
+  %nop426 = alloca i1, i1 0
+  %nop427 = alloca i1, i1 0
+  %nop428 = alloca i1, i1 0
+  %nop429 = alloca i1, i1 0
+  %nop430 = alloca i1, i1 0
+  %nop431 = alloca i1, i1 0
+  %nop432 = alloca i1, i1 0
+  %nop433 = alloca i1, i1 0
+  %nop434 = alloca i1, i1 0
+  %nop435 = alloca i1, i1 0
+  %nop436 = alloca i1, i1 0
+  %nop437 = alloca i1, i1 0
+  %nop438 = alloca i1, i1 0
+  %nop439 = alloca i1, i1 0
+  %nop440 = alloca i1, i1 0
+  %nop441 = alloca i1, i1 0
+  %nop442 = alloca i1, i1 0
+  %nop443 = alloca i1, i1 0
+  %nop444 = alloca i1, i1 0
+  %nop445 = alloca i1, i1 0
+  %nop446 = alloca i1, i1 0
+  %nop447 = alloca i1, i1 0
+  %nop448 = alloca i1, i1 0
+  %nop449 = alloca i1, i1 0
+  %nop450 = alloca i1, i1 0
+  %nop451 = alloca i1, i1 0
+  %nop452 = alloca i1, i1 0
+  %nop453 = alloca i1, i1 0
+  %nop454 = alloca i1, i1 0
+  %nop455 = alloca i1, i1 0
+  %nop456 = alloca i1, i1 0
+  %nop457 = alloca i1, i1 0
+  %nop458 = alloca i1, i1 0
+  %nop459 = alloca i1, i1 0
+  %nop460 = alloca i1, i1 0
+  %nop461 = alloca i1, i1 0
+  %nop462 = alloca i1, i1 0
+  %nop463 = alloca i1, i1 0
+  %nop464 = alloca i1, i1 0
+  %nop465 = alloca i1, i1 0
+  %nop466 = alloca i1, i1 0
+  %nop467 = alloca i1, i1 0
+  %nop468 = alloca i1, i1 0
+  %nop469 = alloca i1, i1 0
+  %nop470 = alloca i1, i1 0
+  %nop471 = alloca i1, i1 0
+  %nop472 = alloca i1, i1 0
+  %nop473 = alloca i1, i1 0
+  %nop474 = alloca i1, i1 0
+  %nop475 = alloca i1, i1 0
+  %nop476 = alloca i1, i1 0
+  %nop477 = alloca i1, i1 0
+  %nop478 = alloca i1, i1 0
+  %nop479 = alloca i1, i1 0
+  %nop480 = alloca i1, i1 0
+  %nop481 = alloca i1, i1 0
+  %nop482 = alloca i1, i1 0
+  %nop483 = alloca i1, i1 0
+  %nop484 = alloca i1, i1 0
+  %nop485 = alloca i1, i1 0
+  %nop486 = alloca i1, i1 0
+  %nop487 = alloca i1, i1 0
+  %nop488 = alloca i1, i1 0
+  %nop489 = alloca i1, i1 0
+  %nop490 = alloca i1, i1 0
+  %nop491 = alloca i1, i1 0
+  %nop492 = alloca i1, i1 0
+  %nop493 = alloca i1, i1 0
+  %nop494 = alloca i1, i1 0
+  %nop495 = alloca i1, i1 0
+  %nop496 = alloca i1, i1 0
+  %nop497 = alloca i1, i1 0
+  %nop498 = alloca i1, i1 0
+  %nop499 = alloca i1, i1 0
+  %nop500 = alloca i1, i1 0
+  %nop501 = alloca i1, i1 0
+  %nop502 = alloca i1, i1 0
+  %nop503 = alloca i1, i1 0
+  %nop504 = alloca i1, i1 0
+  %nop505 = alloca i1, i1 0
+  %nop506 = alloca i1, i1 0
+  %nop507 = alloca i1, i1 0
+  %nop508 = alloca i1, i1 0
+  %nop509 = alloca i1, i1 0
+  %nop510 = alloca i1, i1 0
+  %nop511 = alloca i1, i1 0
+  %nop512 = alloca i1, i1 0
+  %nop513 = alloca i1, i1 0
+  %nop514 = alloca i1, i1 0
+  %nop515 = alloca i1, i1 0
+  %nop516 = alloca i1, i1 0
+  %nop517 = alloca i1, i1 0
+  %nop518 = alloca i1, i1 0
+  %nop519 = alloca i1, i1 0
+  %nop520 = alloca i1, i1 0
+  %nop521 = alloca i1, i1 0
+  %nop522 = alloca i1, i1 0
+  %nop523 = alloca i1, i1 0
+  %nop524 = alloca i1, i1 0
+  %nop525 = alloca i1, i1 0
+  %nop526 = alloca i1, i1 0
+  %nop527 = alloca i1, i1 0
+  %nop528 = alloca i1, i1 0
+  %nop529 = alloca i1, i1 0
+  %nop530 = alloca i1, i1 0
+  %nop531 = alloca i1, i1 0
+  %nop532 = alloca i1, i1 0
+  %nop533 = alloca i1, i1 0
+  %nop534 = alloca i1, i1 0
+  %nop535 = alloca i1, i1 0
+  %nop536 = alloca i1, i1 0
+  %nop537 = alloca i1, i1 0
+  %nop538 = alloca i1, i1 0
+  %nop539 = alloca i1, i1 0
+  %nop540 = alloca i1, i1 0
+  %nop541 = alloca i1, i1 0
+  %nop542 = alloca i1, i1 0
+  %nop543 = alloca i1, i1 0
+  %nop544 = alloca i1, i1 0
+  %nop545 = alloca i1, i1 0
+  %nop546 = alloca i1, i1 0
+  %nop547 = alloca i1, i1 0
+  %nop548 = alloca i1, i1 0
+  %nop549 = alloca i1, i1 0
+  %nop550 = alloca i1, i1 0
+  %nop551 = alloca i1, i1 0
+  %nop552 = alloca i1, i1 0
+  %nop553 = alloca i1, i1 0
+  %nop554 = alloca i1, i1 0
+  %nop555 = alloca i1, i1 0
+  %nop556 = alloca i1, i1 0
+  %nop557 = alloca i1, i1 0
+  %nop558 = alloca i1, i1 0
+  %nop559 = alloca i1, i1 0
+  %nop560 = alloca i1, i1 0
+  %nop561 = alloca i1, i1 0
+  %nop562 = alloca i1, i1 0
+  %nop563 = alloca i1, i1 0
+  %nop564 = alloca i1, i1 0
+  %nop565 = alloca i1, i1 0
+  %nop566 = alloca i1, i1 0
+  %nop567 = alloca i1, i1 0
+  %nop568 = alloca i1, i1 0
+  %nop569 = alloca i1, i1 0
+  %nop570 = alloca i1, i1 0
+  %nop571 = alloca i1, i1 0
+  %nop572 = alloca i1, i1 0
+  %nop573 = alloca i1, i1 0
+  %nop574 = alloca i1, i1 0
+  %nop575 = alloca i1, i1 0
+  %nop576 = alloca i1, i1 0
+  %nop577 = alloca i1, i1 0
+  %nop578 = alloca i1, i1 0
+  %nop579 = alloca i1, i1 0
+  %nop580 = alloca i1, i1 0
+  %nop581 = alloca i1, i1 0
+  %nop582 = alloca i1, i1 0
+  %nop583 = alloca i1, i1 0
+  %nop584 = alloca i1, i1 0
+  %nop585 = alloca i1, i1 0
+  %nop586 = alloca i1, i1 0
+  %nop587 = alloca i1, i1 0
+  %nop588 = alloca i1, i1 0
+  %nop589 = alloca i1, i1 0
+  %nop590 = alloca i1, i1 0
+  %nop591 = alloca i1, i1 0
+  %nop592 = alloca i1, i1 0
+  %nop593 = alloca i1, i1 0
+  %nop594 = alloca i1, i1 0
+  %nop595 = alloca i1, i1 0
+  %nop596 = alloca i1, i1 0
+  %nop597 = alloca i1, i1 0
+  %nop598 = alloca i1, i1 0
+  %nop599 = alloca i1, i1 0
+  %nop600 = alloca i1, i1 0
+  %nop601 = alloca i1, i1 0
+  %nop602 = alloca i1, i1 0
+  %nop603 = alloca i1, i1 0
+  %nop604 = alloca i1, i1 0
+  %nop605 = alloca i1, i1 0
+  %nop606 = alloca i1, i1 0
+  %nop607 = alloca i1, i1 0
+  %nop608 = alloca i1, i1 0
+  %nop609 = alloca i1, i1 0
+  %nop610 = alloca i1, i1 0
+  %nop611 = alloca i1, i1 0
+  %nop612 = alloca i1, i1 0
+  %nop613 = alloca i1, i1 0
+  %nop614 = alloca i1, i1 0
+  %nop615 = alloca i1, i1 0
+  %nop616 = alloca i1, i1 0
+  %nop617 = alloca i1, i1 0
+  %nop618 = alloca i1, i1 0
+  %nop619 = alloca i1, i1 0
+  %nop620 = alloca i1, i1 0
+  %nop621 = alloca i1, i1 0
+  %nop622 = alloca i1, i1 0
+  %nop623 = alloca i1, i1 0
+  %nop624 = alloca i1, i1 0
+  %nop625 = alloca i1, i1 0
+  %nop626 = alloca i1, i1 0
+  %nop627 = alloca i1, i1 0
+  %nop628 = alloca i1, i1 0
+  %nop629 = alloca i1, i1 0
+  %nop630 = alloca i1, i1 0
+  %nop631 = alloca i1, i1 0
+  %nop632 = alloca i1, i1 0
+  %nop633 = alloca i1, i1 0
+  %nop634 = alloca i1, i1 0
+  %nop635 = alloca i1, i1 0
+  %nop636 = alloca i1, i1 0
+  %nop637 = alloca i1, i1 0
+  %nop638 = alloca i1, i1 0
+  %nop639 = alloca i1, i1 0
+  %nop640 = alloca i1, i1 0
+  %nop641 = alloca i1, i1 0
+  %nop642 = alloca i1, i1 0
+  %nop643 = alloca i1, i1 0
+  %nop644 = alloca i1, i1 0
+  %nop645 = alloca i1, i1 0
+  %nop646 = alloca i1, i1 0
+  %nop647 = alloca i1, i1 0
+  %nop648 = alloca i1, i1 0
+  %nop649 = alloca i1, i1 0
+  %nop650 = alloca i1, i1 0
+  %nop651 = alloca i1, i1 0
+  %nop652 = alloca i1, i1 0
+  %nop653 = alloca i1, i1 0
+  %nop654 = alloca i1, i1 0
+  %nop655 = alloca i1, i1 0
+  %nop656 = alloca i1, i1 0
+  %nop657 = alloca i1, i1 0
+  %nop658 = alloca i1, i1 0
+  %nop659 = alloca i1, i1 0
+  %nop660 = alloca i1, i1 0
+  %nop661 = alloca i1, i1 0
+  %nop662 = alloca i1, i1 0
+  %nop663 = alloca i1, i1 0
+  %nop664 = alloca i1, i1 0
+  %nop665 = alloca i1, i1 0
+  %nop666 = alloca i1, i1 0
+  %nop667 = alloca i1, i1 0
+  %nop668 = alloca i1, i1 0
+  %nop669 = alloca i1, i1 0
+  %nop670 = alloca i1, i1 0
+  %nop671 = alloca i1, i1 0
+  %nop672 = alloca i1, i1 0
+  %nop673 = alloca i1, i1 0
+  %nop674 = alloca i1, i1 0
+  %nop675 = alloca i1, i1 0
+  %nop676 = alloca i1, i1 0
+  %nop677 = alloca i1, i1 0
+  %nop678 = alloca i1, i1 0
+  %nop679 = alloca i1, i1 0
+  %nop680 = alloca i1, i1 0
+  %nop681 = alloca i1, i1 0
+  %nop682 = alloca i1, i1 0
+  %nop683 = alloca i1, i1 0
+  %nop684 = alloca i1, i1 0
+  %nop685 = alloca i1, i1 0
+  %nop686 = alloca i1, i1 0
+  %nop687 = alloca i1, i1 0
+  %nop688 = alloca i1, i1 0
+  %nop689 = alloca i1, i1 0
+  %nop690 = alloca i1, i1 0
+  %nop691 = alloca i1, i1 0
+  %nop692 = alloca i1, i1 0
+  %nop693 = alloca i1, i1 0
+  %nop694 = alloca i1, i1 0
+  %nop695 = alloca i1, i1 0
+  %nop696 = alloca i1, i1 0
+  %nop697 = alloca i1, i1 0
+  %nop698 = alloca i1, i1 0
+  %nop699 = alloca i1, i1 0
+  %nop700 = alloca i1, i1 0
+  %nop701 = alloca i1, i1 0
+  %nop702 = alloca i1, i1 0
+  %nop703 = alloca i1, i1 0
+  %nop704 = alloca i1, i1 0
+  %nop705 = alloca i1, i1 0
+  %nop706 = alloca i1, i1 0
+  %nop707 = alloca i1, i1 0
+  %nop708 = alloca i1, i1 0
+  %nop709 = alloca i1, i1 0
+  %nop710 = alloca i1, i1 0
+  %nop711 = alloca i1, i1 0
+  %nop712 = alloca i1, i1 0
+  %nop713 = alloca i1, i1 0
+  %nop714 = alloca i1, i1 0
+  %nop715 = alloca i1, i1 0
+  %nop716 = alloca i1, i1 0
+  %nop717 = alloca i1, i1 0
+  %nop718 = alloca i1, i1 0
+  %nop719 = alloca i1, i1 0
+  %nop720 = alloca i1, i1 0
+  %nop721 = alloca i1, i1 0
+  %nop722 = alloca i1, i1 0
+  %nop723 = alloca i1, i1 0
+  %nop724 = alloca i1, i1 0
+  %nop725 = alloca i1, i1 0
+  %nop726 = alloca i1, i1 0
+  %nop727 = alloca i1, i1 0
+  %nop728 = alloca i1, i1 0
+  %nop729 = alloca i1, i1 0
+  %nop730 = alloca i1, i1 0
+  %nop731 = alloca i1, i1 0
+  %nop732 = alloca i1, i1 0
+  %nop733 = alloca i1, i1 0
+  %nop734 = alloca i1, i1 0
+  %nop735 = alloca i1, i1 0
+  %nop736 = alloca i1, i1 0
+  %nop737 = alloca i1, i1 0
+  %nop738 = alloca i1, i1 0
+  %nop739 = alloca i1, i1 0
+  %nop740 = alloca i1, i1 0
+  %nop741 = alloca i1, i1 0
+  %nop742 = alloca i1, i1 0
+  %nop743 = alloca i1, i1 0
+  %nop744 = alloca i1, i1 0
+  %nop745 = alloca i1, i1 0
+  %nop746 = alloca i1, i1 0
+  %nop747 = alloca i1, i1 0
+  %nop748 = alloca i1, i1 0
+  %nop749 = alloca i1, i1 0
+  %nop750 = alloca i1, i1 0
+  %nop751 = alloca i1, i1 0
+  %nop752 = alloca i1, i1 0
+  %nop753 = alloca i1, i1 0
+  %nop754 = alloca i1, i1 0
+  %nop755 = alloca i1, i1 0
+  %nop756 = alloca i1, i1 0
+  %nop757 = alloca i1, i1 0
+  %nop758 = alloca i1, i1 0
+  %nop759 = alloca i1, i1 0
+  %nop760 = alloca i1, i1 0
+  %nop761 = alloca i1, i1 0
+  %nop762 = alloca i1, i1 0
+  %nop763 = alloca i1, i1 0
+  %nop764 = alloca i1, i1 0
+  %nop765 = alloca i1, i1 0
+  %nop766 = alloca i1, i1 0
+  %nop767 = alloca i1, i1 0
+  %nop768 = alloca i1, i1 0
+  %nop769 = alloca i1, i1 0
+  %nop770 = alloca i1, i1 0
+  %nop771 = alloca i1, i1 0
+  %nop772 = alloca i1, i1 0
+  %nop773 = alloca i1, i1 0
+  %nop774 = alloca i1, i1 0
+  %nop775 = alloca i1, i1 0
+  %nop776 = alloca i1, i1 0
+  %nop777 = alloca i1, i1 0
+  %nop778 = alloca i1, i1 0
+  %nop779 = alloca i1, i1 0
+  %nop780 = alloca i1, i1 0
+  %nop781 = alloca i1, i1 0
+  %nop782 = alloca i1, i1 0
+  %nop783 = alloca i1, i1 0
+  %nop784 = alloca i1, i1 0
+  %nop785 = alloca i1, i1 0
+  %nop786 = alloca i1, i1 0
+  %nop787 = alloca i1, i1 0
+  %nop788 = alloca i1, i1 0
+  %nop789 = alloca i1, i1 0
+  %nop790 = alloca i1, i1 0
+  %nop791 = alloca i1, i1 0
+  %nop792 = alloca i1, i1 0
+  %nop793 = alloca i1, i1 0
+  %nop794 = alloca i1, i1 0
+  %nop795 = alloca i1, i1 0
+  %nop796 = alloca i1, i1 0
+  %nop797 = alloca i1, i1 0
+  %nop798 = alloca i1, i1 0
+  %nop799 = alloca i1, i1 0
+  %nop800 = alloca i1, i1 0
+  %nop801 = alloca i1, i1 0
+  %nop802 = alloca i1, i1 0
+  %nop803 = alloca i1, i1 0
+  %nop804 = alloca i1, i1 0
+  %nop805 = alloca i1, i1 0
+  %nop806 = alloca i1, i1 0
+  %nop807 = alloca i1, i1 0
+  %nop808 = alloca i1, i1 0
+  %nop809 = alloca i1, i1 0
+  %nop810 = alloca i1, i1 0
+  %nop811 = alloca i1, i1 0
+  %nop812 = alloca i1, i1 0
+  %nop813 = alloca i1, i1 0
+  %nop814 = alloca i1, i1 0
+  %nop815 = alloca i1, i1 0
+  %nop816 = alloca i1, i1 0
+  %nop817 = alloca i1, i1 0
+  %nop818 = alloca i1, i1 0
+  %nop819 = alloca i1, i1 0
+  %nop820 = alloca i1, i1 0
+  %nop821 = alloca i1, i1 0
+  %nop822 = alloca i1, i1 0
+  %nop823 = alloca i1, i1 0
+  %nop824 = alloca i1, i1 0
+  %nop825 = alloca i1, i1 0
+  %nop826 = alloca i1, i1 0
+  %nop827 = alloca i1, i1 0
+  %nop828 = alloca i1, i1 0
+  %nop829 = alloca i1, i1 0
+  %nop830 = alloca i1, i1 0
+  %nop831 = alloca i1, i1 0
+  %nop832 = alloca i1, i1 0
+  %nop833 = alloca i1, i1 0
+  %nop834 = alloca i1, i1 0
+  %nop835 = alloca i1, i1 0
+  %nop836 = alloca i1, i1 0
+  %nop837 = alloca i1, i1 0
+  %nop838 = alloca i1, i1 0
+  %nop839 = alloca i1, i1 0
+  %nop840 = alloca i1, i1 0
+  %nop841 = alloca i1, i1 0
+  %nop842 = alloca i1, i1 0
+  %nop843 = alloca i1, i1 0
+  %nop844 = alloca i1, i1 0
+  %nop845 = alloca i1, i1 0
+  %nop846 = alloca i1, i1 0
+  %nop847 = alloca i1, i1 0
+  %nop848 = alloca i1, i1 0
+  %nop849 = alloca i1, i1 0
+  %nop850 = alloca i1, i1 0
+  %nop851 = alloca i1, i1 0
+  %nop852 = alloca i1, i1 0
+  %nop853 = alloca i1, i1 0
+  %nop854 = alloca i1, i1 0
+  %nop855 = alloca i1, i1 0
+  %nop856 = alloca i1, i1 0
+  %nop857 = alloca i1, i1 0
+  %nop858 = alloca i1, i1 0
+  %nop859 = alloca i1, i1 0
+  %nop860 = alloca i1, i1 0
+  %nop861 = alloca i1, i1 0
+  %nop862 = alloca i1, i1 0
+  %nop863 = alloca i1, i1 0
+  %nop864 = alloca i1, i1 0
+  %nop865 = alloca i1, i1 0
+  %nop866 = alloca i1, i1 0
+  %nop867 = alloca i1, i1 0
+  %nop868 = alloca i1, i1 0
+  %nop869 = alloca i1, i1 0
+  %nop870 = alloca i1, i1 0
+  %nop871 = alloca i1, i1 0
+  %nop872 = alloca i1, i1 0
+  %nop873 = alloca i1, i1 0
+  %nop874 = alloca i1, i1 0
+  %nop875 = alloca i1, i1 0
+  %nop876 = alloca i1, i1 0
+  %nop877 = alloca i1, i1 0
+  %nop878 = alloca i1, i1 0
+  %nop879 = alloca i1, i1 0
+  %nop880 = alloca i1, i1 0
+  %nop881 = alloca i1, i1 0
+  %nop882 = alloca i1, i1 0
+  %nop883 = alloca i1, i1 0
+  %nop884 = alloca i1, i1 0
+  %nop885 = alloca i1, i1 0
+  %nop886 = alloca i1, i1 0
+  %nop887 = alloca i1, i1 0
+  %nop888 = alloca i1, i1 0
+  %nop889 = alloca i1, i1 0
+  %nop890 = alloca i1, i1 0
+  %nop891 = alloca i1, i1 0
+  %nop892 = alloca i1, i1 0
+  %nop893 = alloca i1, i1 0
+  %nop894 = alloca i1, i1 0
+  %nop895 = alloca i1, i1 0
+  %nop896 = alloca i1, i1 0
+  %nop897 = alloca i1, i1 0
+  %nop898 = alloca i1, i1 0
+  %nop899 = alloca i1, i1 0
+  %nop900 = alloca i1, i1 0
+  %nop901 = alloca i1, i1 0
+  %nop902 = alloca i1, i1 0
+  %nop903 = alloca i1, i1 0
+  %nop904 = alloca i1, i1 0
+  %nop905 = alloca i1, i1 0
+  %nop906 = alloca i1, i1 0
+  %nop907 = alloca i1, i1 0
+  %nop908 = alloca i1, i1 0
+  %nop909 = alloca i1, i1 0
+  %nop910 = alloca i1, i1 0
+  %nop911 = alloca i1, i1 0
+  %nop912 = alloca i1, i1 0
+  %nop913 = alloca i1, i1 0
+  %nop914 = alloca i1, i1 0
+  %nop915 = alloca i1, i1 0
+  %nop916 = alloca i1, i1 0
+  %nop917 = alloca i1, i1 0
+  %nop918 = alloca i1, i1 0
+  %nop919 = alloca i1, i1 0
+  %nop920 = alloca i1, i1 0
+  %nop921 = alloca i1, i1 0
+  %nop922 = alloca i1, i1 0
+  %nop923 = alloca i1, i1 0
+  %nop924 = alloca i1, i1 0
+  %nop925 = alloca i1, i1 0
+  %nop926 = alloca i1, i1 0
+  %nop927 = alloca i1, i1 0
+  %nop928 = alloca i1, i1 0
+  %nop929 = alloca i1, i1 0
+  %nop930 = alloca i1, i1 0
+  %nop931 = alloca i1, i1 0
+  %nop932 = alloca i1, i1 0
+  %nop933 = alloca i1, i1 0
+  %nop934 = alloca i1, i1 0
+  %nop935 = alloca i1, i1 0
+  %nop936 = alloca i1, i1 0
+  %nop937 = alloca i1, i1 0
+  %nop938 = alloca i1, i1 0
+  %nop939 = alloca i1, i1 0
+  %nop940 = alloca i1, i1 0
+  %nop941 = alloca i1, i1 0
+  %nop942 = alloca i1, i1 0
+  %nop943 = alloca i1, i1 0
+  %nop944 = alloca i1, i1 0
+  %nop945 = alloca i1, i1 0
+  %nop946 = alloca i1, i1 0
+  %nop947 = alloca i1, i1 0
+  %nop948 = alloca i1, i1 0
+  %nop949 = alloca i1, i1 0
+  %nop950 = alloca i1, i1 0
+  %nop951 = alloca i1, i1 0
+  %nop952 = alloca i1, i1 0
+  %nop953 = alloca i1, i1 0
+  %nop954 = alloca i1, i1 0
+  %nop955 = alloca i1, i1 0
+  %nop956 = alloca i1, i1 0
+  %nop957 = alloca i1, i1 0
+  %nop958 = alloca i1, i1 0
+  %nop959 = alloca i1, i1 0
+  %nop960 = alloca i1, i1 0
+  %nop961 = alloca i1, i1 0
+  %nop962 = alloca i1, i1 0
+  %nop963 = alloca i1, i1 0
+  %nop964 = alloca i1, i1 0
+  %nop965 = alloca i1, i1 0
+  %nop966 = alloca i1, i1 0
+  %nop967 = alloca i1, i1 0
+  %nop968 = alloca i1, i1 0
+  %nop969 = alloca i1, i1 0
+  %nop970 = alloca i1, i1 0
+  %nop971 = alloca i1, i1 0
+  %nop972 = alloca i1, i1 0
+  %nop973 = alloca i1, i1 0
+  %nop974 = alloca i1, i1 0
+  %nop975 = alloca i1, i1 0
+  %nop976 = alloca i1, i1 0
+  %nop977 = alloca i1, i1 0
+  %nop978 = alloca i1, i1 0
+  %nop979 = alloca i1, i1 0
+  %nop980 = alloca i1, i1 0
+  %nop981 = alloca i1, i1 0
+  %nop982 = alloca i1, i1 0
+  %nop983 = alloca i1, i1 0
+  %nop984 = alloca i1, i1 0
+  %nop985 = alloca i1, i1 0
+  %nop986 = alloca i1, i1 0
+  %nop987 = alloca i1, i1 0
+  %nop988 = alloca i1, i1 0
+  %nop989 = alloca i1, i1 0
+  %nop990 = alloca i1, i1 0
+  %nop991 = alloca i1, i1 0
+  %nop992 = alloca i1, i1 0
+  %nop993 = alloca i1, i1 0
+  %nop994 = alloca i1, i1 0
+  %nop995 = alloca i1, i1 0
+  %nop996 = alloca i1, i1 0
+  %nop997 = alloca i1, i1 0
+  %nop998 = alloca i1, i1 0
+  %nop999 = alloca i1, i1 0
+  %nop1000 = alloca i1, i1 0
+  %nop1001 = alloca i1, i1 0
+  %nop1002 = alloca i1, i1 0
+  %nop1003 = alloca i1, i1 0
+  %nop1004 = alloca i1, i1 0
+  %nop1005 = alloca i1, i1 0
+  %nop1006 = alloca i1, i1 0
+  %nop1007 = alloca i1, i1 0
+  %nop1008 = alloca i1, i1 0
+  %nop1009 = alloca i1, i1 0
+  %nop1010 = alloca i1, i1 0
+  %nop1011 = alloca i1, i1 0
+  %nop1012 = alloca i1, i1 0
+  %nop1013 = alloca i1, i1 0
+  %nop1014 = alloca i1, i1 0
+  %nop1015 = alloca i1, i1 0
+  %nop1016 = alloca i1, i1 0
+  %nop1017 = alloca i1, i1 0
+  %nop1018 = alloca i1, i1 0
+  %nop1019 = alloca i1, i1 0
+  %nop1020 = alloca i1, i1 0
+  %nop1021 = alloca i1, i1 0
+  %nop1022 = alloca i1, i1 0
+  %nop1023 = alloca i1, i1 0
+  %nop1024 = alloca i1, i1 0
+  %nop1025 = alloca i1, i1 0
+  %nop1026 = alloca i1, i1 0
+  %nop1027 = alloca i1, i1 0
+  %nop1028 = alloca i1, i1 0
+  %nop1029 = alloca i1, i1 0
+  %nop1030 = alloca i1, i1 0
+  %nop1031 = alloca i1, i1 0
+  %nop1032 = alloca i1, i1 0
+  %nop1033 = alloca i1, i1 0
+  %nop1034 = alloca i1, i1 0
+  %nop1035 = alloca i1, i1 0
+  %nop1036 = alloca i1, i1 0
+  %nop1037 = alloca i1, i1 0
+  %nop1038 = alloca i1, i1 0
+  %nop1039 = alloca i1, i1 0
+  %nop1040 = alloca i1, i1 0
+  %nop1041 = alloca i1, i1 0
+  %nop1042 = alloca i1, i1 0
+  %nop1043 = alloca i1, i1 0
+  %nop1044 = alloca i1, i1 0
+  %nop1045 = alloca i1, i1 0
+  %nop1046 = alloca i1, i1 0
+  %nop1047 = alloca i1, i1 0
+  %nop1048 = alloca i1, i1 0
+  %nop1049 = alloca i1, i1 0
+  %nop1050 = alloca i1, i1 0
+  %nop1051 = alloca i1, i1 0
+  %nop1052 = alloca i1, i1 0
+  %nop1053 = alloca i1, i1 0
+  %nop1054 = alloca i1, i1 0
+  %nop1055 = alloca i1, i1 0
+  %nop1056 = alloca i1, i1 0
+  %nop1057 = alloca i1, i1 0
+  %nop1058 = alloca i1, i1 0
+  %nop1059 = alloca i1, i1 0
+  %nop1060 = alloca i1, i1 0
+  %nop1061 = alloca i1, i1 0
+  %nop1062 = alloca i1, i1 0
+  %nop1063 = alloca i1, i1 0
+  %nop1064 = alloca i1, i1 0
+  %nop1065 = alloca i1, i1 0
+  %nop1066 = alloca i1, i1 0
+  %nop1067 = alloca i1, i1 0
+  %nop1068 = alloca i1, i1 0
+  %nop1069 = alloca i1, i1 0
+  %nop1070 = alloca i1, i1 0
+  %nop1071 = alloca i1, i1 0
+  %nop1072 = alloca i1, i1 0
+  %nop1073 = alloca i1, i1 0
+  %nop1074 = alloca i1, i1 0
+  %nop1075 = alloca i1, i1 0
+  %nop1076 = alloca i1, i1 0
+  %nop1077 = alloca i1, i1 0
+  %nop1078 = alloca i1, i1 0
+  %nop1079 = alloca i1, i1 0
+  %nop1080 = alloca i1, i1 0
+  %nop1081 = alloca i1, i1 0
+  %nop1082 = alloca i1, i1 0
+  %nop1083 = alloca i1, i1 0
+  %nop1084 = alloca i1, i1 0
+  %nop1085 = alloca i1, i1 0
+  %nop1086 = alloca i1, i1 0
+  %nop1087 = alloca i1, i1 0
+  %nop1088 = alloca i1, i1 0
+  %nop1089 = alloca i1, i1 0
+  %nop1090 = alloca i1, i1 0
+  %nop1091 = alloca i1, i1 0
+  %nop1092 = alloca i1, i1 0
+  %nop1093 = alloca i1, i1 0
+  %nop1094 = alloca i1, i1 0
+  %nop1095 = alloca i1, i1 0
+  %nop1096 = alloca i1, i1 0
+  %nop1097 = alloca i1, i1 0
+  %nop1098 = alloca i1, i1 0
+  %nop1099 = alloca i1, i1 0
+  %nop1100 = alloca i1, i1 0
+  %nop1101 = alloca i1, i1 0
+  %nop1102 = alloca i1, i1 0
+  %nop1103 = alloca i1, i1 0
+  %nop1104 = alloca i1, i1 0
+  %nop1105 = alloca i1, i1 0
+  %nop1106 = alloca i1, i1 0
+  %nop1107 = alloca i1, i1 0
+  %nop1108 = alloca i1, i1 0
+  %nop1109 = alloca i1, i1 0
+  %nop1110 = alloca i1, i1 0
+  %nop1111 = alloca i1, i1 0
+  %nop1112 = alloca i1, i1 0
+  %nop1113 = alloca i1, i1 0
+  %nop1114 = alloca i1, i1 0
+  %nop1115 = alloca i1, i1 0
+  %nop1116 = alloca i1, i1 0
+  %nop1117 = alloca i1, i1 0
+  %nop1118 = alloca i1, i1 0
+  %nop1119 = alloca i1, i1 0
+  %nop1120 = alloca i1, i1 0
+  %nop1121 = alloca i1, i1 0
+  %nop1122 = alloca i1, i1 0
+  %nop1123 = alloca i1, i1 0
+  %nop1124 = alloca i1, i1 0
+  %nop1125 = alloca i1, i1 0
+  %nop1126 = alloca i1, i1 0
+  %nop1127 = alloca i1, i1 0
+  %nop1128 = alloca i1, i1 0
+  %nop1129 = alloca i1, i1 0
+  %nop1130 = alloca i1, i1 0
+  %nop1131 = alloca i1, i1 0
+  %nop1132 = alloca i1, i1 0
+  %nop1133 = alloca i1, i1 0
+  %nop1134 = alloca i1, i1 0
+  %nop1135 = alloca i1, i1 0
+  %nop1136 = alloca i1, i1 0
+  %nop1137 = alloca i1, i1 0
+  %nop1138 = alloca i1, i1 0
+  %nop1139 = alloca i1, i1 0
+  %nop1140 = alloca i1, i1 0
+  %nop1141 = alloca i1, i1 0
+  %nop1142 = alloca i1, i1 0
+  %nop1143 = alloca i1, i1 0
+  %nop1144 = alloca i1, i1 0
+  %nop1145 = alloca i1, i1 0
+  %nop1146 = alloca i1, i1 0
+  %nop1147 = alloca i1, i1 0
+  %nop1148 = alloca i1, i1 0
+  %nop1149 = alloca i1, i1 0
+  %nop1150 = alloca i1, i1 0
+  %nop1151 = alloca i1, i1 0
+  %nop1152 = alloca i1, i1 0
+  %nop1153 = alloca i1, i1 0
+  %nop1154 = alloca i1, i1 0
+  %nop1155 = alloca i1, i1 0
+  %nop1156 = alloca i1, i1 0
+  %nop1157 = alloca i1, i1 0
+  %nop1158 = alloca i1, i1 0
+  %nop1159 = alloca i1, i1 0
+  %nop1160 = alloca i1, i1 0
+  %nop1161 = alloca i1, i1 0
+  %nop1162 = alloca i1, i1 0
+  %nop1163 = alloca i1, i1 0
+  %nop1164 = alloca i1, i1 0
+  %nop1165 = alloca i1, i1 0
+  %nop1166 = alloca i1, i1 0
+  %nop1167 = alloca i1, i1 0
+  %nop1168 = alloca i1, i1 0
+  %nop1169 = alloca i1, i1 0
+  %nop1170 = alloca i1, i1 0
+  %nop1171 = alloca i1, i1 0
+  %nop1172 = alloca i1, i1 0
+  %nop1173 = alloca i1, i1 0
+  %nop1174 = alloca i1, i1 0
+  %nop1175 = alloca i1, i1 0
+  %nop1176 = alloca i1, i1 0
+  %nop1177 = alloca i1, i1 0
+  %nop1178 = alloca i1, i1 0
+  %nop1179 = alloca i1, i1 0
+  %nop1180 = alloca i1, i1 0
+  %nop1181 = alloca i1, i1 0
+  %nop1182 = alloca i1, i1 0
+  %nop1183 = alloca i1, i1 0
+  %nop1184 = alloca i1, i1 0
+  %nop1185 = alloca i1, i1 0
+  %nop1186 = alloca i1, i1 0
+  %nop1187 = alloca i1, i1 0
+  %nop1188 = alloca i1, i1 0
+  %nop1189 = alloca i1, i1 0
+  %nop1190 = alloca i1, i1 0
+  %nop1191 = alloca i1, i1 0
+  %nop1192 = alloca i1, i1 0
+  %nop1193 = alloca i1, i1 0
+  %nop1194 = alloca i1, i1 0
+  %nop1195 = alloca i1, i1 0
+  %nop1196 = alloca i1, i1 0
+  %nop1197 = alloca i1, i1 0
+  %nop1198 = alloca i1, i1 0
+  %nop1199 = alloca i1, i1 0
+  %nop1200 = alloca i1, i1 0
+  %nop1201 = alloca i1, i1 0
+  %nop1202 = alloca i1, i1 0
+  %nop1203 = alloca i1, i1 0
+  %nop1204 = alloca i1, i1 0
+  %nop1205 = alloca i1, i1 0
+  %nop1206 = alloca i1, i1 0
+  %nop1207 = alloca i1, i1 0
+  %nop1208 = alloca i1, i1 0
+  %nop1209 = alloca i1, i1 0
+  %nop1210 = alloca i1, i1 0
+  %nop1211 = alloca i1, i1 0
+  %nop1212 = alloca i1, i1 0
+  %nop1213 = alloca i1, i1 0
+  %nop1214 = alloca i1, i1 0
+  %nop1215 = alloca i1, i1 0
+  %nop1216 = alloca i1, i1 0
+  %nop1217 = alloca i1, i1 0
+  %nop1218 = alloca i1, i1 0
+  %nop1219 = alloca i1, i1 0
+  %nop1220 = alloca i1, i1 0
+  %nop1221 = alloca i1, i1 0
+  %nop1222 = alloca i1, i1 0
+  %nop1223 = alloca i1, i1 0
+  %nop1224 = alloca i1, i1 0
+  %nop1225 = alloca i1, i1 0
+  %nop1226 = alloca i1, i1 0
+  %nop1227 = alloca i1, i1 0
+  %nop1228 = alloca i1, i1 0
+  %nop1229 = alloca i1, i1 0
+  %nop1230 = alloca i1, i1 0
+  %nop1231 = alloca i1, i1 0
+  %nop1232 = alloca i1, i1 0
+  %nop1233 = alloca i1, i1 0
+  %nop1234 = alloca i1, i1 0
+  %nop1235 = alloca i1, i1 0
+  %nop1236 = alloca i1, i1 0
+  %nop1237 = alloca i1, i1 0
+  %nop1238 = alloca i1, i1 0
+  %nop1239 = alloca i1, i1 0
+  %nop1240 = alloca i1, i1 0
+  %nop1241 = alloca i1, i1 0
+  %nop1242 = alloca i1, i1 0
+  %nop1243 = alloca i1, i1 0
+  %nop1244 = alloca i1, i1 0
+  %nop1245 = alloca i1, i1 0
+  %nop1246 = alloca i1, i1 0
+  %nop1247 = alloca i1, i1 0
+  %nop1248 = alloca i1, i1 0
+  %nop1249 = alloca i1, i1 0
+  %nop1250 = alloca i1, i1 0
+  %nop1251 = alloca i1, i1 0
+  %nop1252 = alloca i1, i1 0
+  %nop1253 = alloca i1, i1 0
+  %nop1254 = alloca i1, i1 0
+  %nop1255 = alloca i1, i1 0
+  %nop1256 = alloca i1, i1 0
+  %nop1257 = alloca i1, i1 0
+  %nop1258 = alloca i1, i1 0
+  %nop1259 = alloca i1, i1 0
+  %nop1260 = alloca i1, i1 0
+  %nop1261 = alloca i1, i1 0
+  %nop1262 = alloca i1, i1 0
+  %nop1263 = alloca i1, i1 0
+  %nop1264 = alloca i1, i1 0
+  %nop1265 = alloca i1, i1 0
+  %nop1266 = alloca i1, i1 0
+  %nop1267 = alloca i1, i1 0
+  %nop1268 = alloca i1, i1 0
+  %nop1269 = alloca i1, i1 0
+  %nop1270 = alloca i1, i1 0
+  %nop1271 = alloca i1, i1 0
+  %nop1272 = alloca i1, i1 0
+  %nop1273 = alloca i1, i1 0
+  %nop1274 = alloca i1, i1 0
+  %nop1275 = alloca i1, i1 0
+  %nop1276 = alloca i1, i1 0
+  %nop1277 = alloca i1, i1 0
+  %nop1278 = alloca i1, i1 0
+  %nop1279 = alloca i1, i1 0
+  %nop1280 = alloca i1, i1 0
+  %nop1281 = alloca i1, i1 0
+  %nop1282 = alloca i1, i1 0
+  %nop1283 = alloca i1, i1 0
+  %nop1284 = alloca i1, i1 0
+  %nop1285 = alloca i1, i1 0
+  %nop1286 = alloca i1, i1 0
+  %nop1287 = alloca i1, i1 0
+  %nop1288 = alloca i1, i1 0
+  %nop1289 = alloca i1, i1 0
+  %nop1290 = alloca i1, i1 0
+  %nop1291 = alloca i1, i1 0
+  %nop1292 = alloca i1, i1 0
+  %nop1293 = alloca i1, i1 0
+  %nop1294 = alloca i1, i1 0
+  %nop1295 = alloca i1, i1 0
+  %nop1296 = alloca i1, i1 0
+  %nop1297 = alloca i1, i1 0
+  %nop1298 = alloca i1, i1 0
+  %nop1299 = alloca i1, i1 0
+  %nop1300 = alloca i1, i1 0
+  %nop1301 = alloca i1, i1 0
+  %nop1302 = alloca i1, i1 0
+  %nop1303 = alloca i1, i1 0
+  %nop1304 = alloca i1, i1 0
+  %nop1305 = alloca i1, i1 0
+  %nop1306 = alloca i1, i1 0
+  %nop1307 = alloca i1, i1 0
+  %nop1308 = alloca i1, i1 0
+  %nop1309 = alloca i1, i1 0
+  %nop1310 = alloca i1, i1 0
+  %nop1311 = alloca i1, i1 0
+  %nop1312 = alloca i1, i1 0
+  %nop1313 = alloca i1, i1 0
+  %nop1314 = alloca i1, i1 0
+  %nop1315 = alloca i1, i1 0
+  %nop1316 = alloca i1, i1 0
+  %nop1317 = alloca i1, i1 0
+  %nop1318 = alloca i1, i1 0
+  %nop1319 = alloca i1, i1 0
+  %nop1320 = alloca i1, i1 0
+  %nop1321 = alloca i1, i1 0
+  %nop1322 = alloca i1, i1 0
+  %nop1323 = alloca i1, i1 0
+  %nop1324 = alloca i1, i1 0
+  %nop1325 = alloca i1, i1 0
+  %nop1326 = alloca i1, i1 0
+  %nop1327 = alloca i1, i1 0
+  %nop1328 = alloca i1, i1 0
+  %nop1329 = alloca i1, i1 0
+  %nop1330 = alloca i1, i1 0
+  %nop1331 = alloca i1, i1 0
+  %nop1332 = alloca i1, i1 0
+  %nop1333 = alloca i1, i1 0
+  %nop1334 = alloca i1, i1 0
+  %nop1335 = alloca i1, i1 0
+  %nop1336 = alloca i1, i1 0
+  %nop1337 = alloca i1, i1 0
+  %nop1338 = alloca i1, i1 0
+  %nop1339 = alloca i1, i1 0
+  %nop1340 = alloca i1, i1 0
+  %nop1341 = alloca i1, i1 0
+  %nop1342 = alloca i1, i1 0
+  %nop1343 = alloca i1, i1 0
+  %nop1344 = alloca i1, i1 0
+  %nop1345 = alloca i1, i1 0
+  %nop1346 = alloca i1, i1 0
+  %nop1347 = alloca i1, i1 0
+  %nop1348 = alloca i1, i1 0
+  %nop1349 = alloca i1, i1 0
+  %nop1350 = alloca i1, i1 0
+  %nop1351 = alloca i1, i1 0
+  %nop1352 = alloca i1, i1 0
+  %nop1353 = alloca i1, i1 0
+  %nop1354 = alloca i1, i1 0
+  %nop1355 = alloca i1, i1 0
+  %nop1356 = alloca i1, i1 0
+  %nop1357 = alloca i1, i1 0
+  %nop1358 = alloca i1, i1 0
+  %nop1359 = alloca i1, i1 0
+  %nop1360 = alloca i1, i1 0
+  %nop1361 = alloca i1, i1 0
+  %nop1362 = alloca i1, i1 0
+  %nop1363 = alloca i1, i1 0
+  %nop1364 = alloca i1, i1 0
+  %nop1365 = alloca i1, i1 0
+  %nop1366 = alloca i1, i1 0
+  %nop1367 = alloca i1, i1 0
+  %nop1368 = alloca i1, i1 0
+  %nop1369 = alloca i1, i1 0
+  %nop1370 = alloca i1, i1 0
+  %nop1371 = alloca i1, i1 0
+  %nop1372 = alloca i1, i1 0
+  %nop1373 = alloca i1, i1 0
+  %nop1374 = alloca i1, i1 0
+  %nop1375 = alloca i1, i1 0
+  %nop1376 = alloca i1, i1 0
+  %nop1377 = alloca i1, i1 0
+  %nop1378 = alloca i1, i1 0
+  %nop1379 = alloca i1, i1 0
+  %nop1380 = alloca i1, i1 0
+  %nop1381 = alloca i1, i1 0
+  %nop1382 = alloca i1, i1 0
+  %nop1383 = alloca i1, i1 0
+  %nop1384 = alloca i1, i1 0
+  %nop1385 = alloca i1, i1 0
+  %nop1386 = alloca i1, i1 0
+  %nop1387 = alloca i1, i1 0
+  %nop1388 = alloca i1, i1 0
+  %nop1389 = alloca i1, i1 0
+  %nop1390 = alloca i1, i1 0
+  %nop1391 = alloca i1, i1 0
+  %nop1392 = alloca i1, i1 0
+  %nop1393 = alloca i1, i1 0
+  %nop1394 = alloca i1, i1 0
+  %nop1395 = alloca i1, i1 0
+  %nop1396 = alloca i1, i1 0
+  %nop1397 = alloca i1, i1 0
+  %nop1398 = alloca i1, i1 0
+  %nop1399 = alloca i1, i1 0
+  %nop1400 = alloca i1, i1 0
+  %nop1401 = alloca i1, i1 0
+  %nop1402 = alloca i1, i1 0
+  %nop1403 = alloca i1, i1 0
+  %nop1404 = alloca i1, i1 0
+  %nop1405 = alloca i1, i1 0
+  %nop1406 = alloca i1, i1 0
+  %nop1407 = alloca i1, i1 0
+  %nop1408 = alloca i1, i1 0
+  %nop1409 = alloca i1, i1 0
+  %nop1410 = alloca i1, i1 0
+  %nop1411 = alloca i1, i1 0
+  %nop1412 = alloca i1, i1 0
+  %nop1413 = alloca i1, i1 0
+  %nop1414 = alloca i1, i1 0
+  %nop1415 = alloca i1, i1 0
+  %nop1416 = alloca i1, i1 0
+  %nop1417 = alloca i1, i1 0
+  %nop1418 = alloca i1, i1 0
+  %nop1419 = alloca i1, i1 0
+  %nop1420 = alloca i1, i1 0
+  %nop1421 = alloca i1, i1 0
+  %nop1422 = alloca i1, i1 0
+  %nop1423 = alloca i1, i1 0
+  %nop1424 = alloca i1, i1 0
+  %nop1425 = alloca i1, i1 0
+  %nop1426 = alloca i1, i1 0
+  %nop1427 = alloca i1, i1 0
+  %nop1428 = alloca i1, i1 0
+  %nop1429 = alloca i1, i1 0
+  %nop1430 = alloca i1, i1 0
+  %nop1431 = alloca i1, i1 0
+  %nop1432 = alloca i1, i1 0
+  %nop1433 = alloca i1, i1 0
+  %nop1434 = alloca i1, i1 0
+  %nop1435 = alloca i1, i1 0
+  %nop1436 = alloca i1, i1 0
+  %nop1437 = alloca i1, i1 0
+  %nop1438 = alloca i1, i1 0
+  %nop1439 = alloca i1, i1 0
+  %nop1440 = alloca i1, i1 0
+  %nop1441 = alloca i1, i1 0
+  %nop1442 = alloca i1, i1 0
+  %nop1443 = alloca i1, i1 0
+  %nop1444 = alloca i1, i1 0
+  %nop1445 = alloca i1, i1 0
+  %nop1446 = alloca i1, i1 0
+  %nop1447 = alloca i1, i1 0
+  %nop1448 = alloca i1, i1 0
+  %nop1449 = alloca i1, i1 0
+  %nop1450 = alloca i1, i1 0
+  %nop1451 = alloca i1, i1 0
+  %nop1452 = alloca i1, i1 0
+  %nop1453 = alloca i1, i1 0
+  %nop1454 = alloca i1, i1 0
+  %nop1455 = alloca i1, i1 0
+  %nop1456 = alloca i1, i1 0
+  %nop1457 = alloca i1, i1 0
+  %nop1458 = alloca i1, i1 0
+  %nop1459 = alloca i1, i1 0
+  %nop1460 = alloca i1, i1 0
+  %nop1461 = alloca i1, i1 0
+  %nop1462 = alloca i1, i1 0
+  %nop1463 = alloca i1, i1 0
+  %nop1464 = alloca i1, i1 0
+  %nop1465 = alloca i1, i1 0
+  %nop1466 = alloca i1, i1 0
+  %nop1467 = alloca i1, i1 0
+  %nop1468 = alloca i1, i1 0
+  %nop1469 = alloca i1, i1 0
+  %nop1470 = alloca i1, i1 0
+  %nop1471 = alloca i1, i1 0
+  %nop1472 = alloca i1, i1 0
+  %nop1473 = alloca i1, i1 0
+  %nop1474 = alloca i1, i1 0
+  %nop1475 = alloca i1, i1 0
+  %nop1476 = alloca i1, i1 0
+  %nop1477 = alloca i1, i1 0
+  %nop1478 = alloca i1, i1 0
+  %nop1479 = alloca i1, i1 0
+  %nop1480 = alloca i1, i1 0
+  %nop1481 = alloca i1, i1 0
+  %nop1482 = alloca i1, i1 0
+  %nop1483 = alloca i1, i1 0
+  %nop1484 = alloca i1, i1 0
+  %nop1485 = alloca i1, i1 0
+  %nop1486 = alloca i1, i1 0
+  %nop1487 = alloca i1, i1 0
+  %nop1488 = alloca i1, i1 0
+  %nop1489 = alloca i1, i1 0
+  %nop1490 = alloca i1, i1 0
+  %nop1491 = alloca i1, i1 0
+  %nop1492 = alloca i1, i1 0
+  %nop1493 = alloca i1, i1 0
+  %nop1494 = alloca i1, i1 0
+  %nop1495 = alloca i1, i1 0
+  %nop1496 = alloca i1, i1 0
+  %nop1497 = alloca i1, i1 0
+  %nop1498 = alloca i1, i1 0
+  %nop1499 = alloca i1, i1 0
+  %nop1500 = alloca i1, i1 0
+  %nop1501 = alloca i1, i1 0
+  %nop1502 = alloca i1, i1 0
+  %nop1503 = alloca i1, i1 0
+  %nop1504 = alloca i1, i1 0
+  %nop1505 = alloca i1, i1 0
+  %nop1506 = alloca i1, i1 0
+  %nop1507 = alloca i1, i1 0
+  %nop1508 = alloca i1, i1 0
+  %nop1509 = alloca i1, i1 0
+  %nop1510 = alloca i1, i1 0
+  %nop1511 = alloca i1, i1 0
+  %nop1512 = alloca i1, i1 0
+  %nop1513 = alloca i1, i1 0
+  %nop1514 = alloca i1, i1 0
+  %nop1515 = alloca i1, i1 0
+  %nop1516 = alloca i1, i1 0
+  %nop1517 = alloca i1, i1 0
+  %nop1518 = alloca i1, i1 0
+  %nop1519 = alloca i1, i1 0
+  %nop1520 = alloca i1, i1 0
+  %nop1521 = alloca i1, i1 0
+  %nop1522 = alloca i1, i1 0
+  %nop1523 = alloca i1, i1 0
+  %nop1524 = alloca i1, i1 0
+  %nop1525 = alloca i1, i1 0
+  %nop1526 = alloca i1, i1 0
+  %nop1527 = alloca i1, i1 0
+  %nop1528 = alloca i1, i1 0
+  %nop1529 = alloca i1, i1 0
+  %nop1530 = alloca i1, i1 0
+  %nop1531 = alloca i1, i1 0
+  %nop1532 = alloca i1, i1 0
+  %nop1533 = alloca i1, i1 0
+  %nop1534 = alloca i1, i1 0
+  %nop1535 = alloca i1, i1 0
+  %nop1536 = alloca i1, i1 0
+  %nop1537 = alloca i1, i1 0
+  %nop1538 = alloca i1, i1 0
+  %nop1539 = alloca i1, i1 0
+  %nop1540 = alloca i1, i1 0
+  %nop1541 = alloca i1, i1 0
+  %nop1542 = alloca i1, i1 0
+  %nop1543 = alloca i1, i1 0
+  %nop1544 = alloca i1, i1 0
+  %nop1545 = alloca i1, i1 0
+  %nop1546 = alloca i1, i1 0
+  %nop1547 = alloca i1, i1 0
+  %nop1548 = alloca i1, i1 0
+  %nop1549 = alloca i1, i1 0
+  %nop1550 = alloca i1, i1 0
+  %nop1551 = alloca i1, i1 0
+  %nop1552 = alloca i1, i1 0
+  %nop1553 = alloca i1, i1 0
+  %nop1554 = alloca i1, i1 0
+  %nop1555 = alloca i1, i1 0
+  %nop1556 = alloca i1, i1 0
+  %nop1557 = alloca i1, i1 0
+  %nop1558 = alloca i1, i1 0
+  %nop1559 = alloca i1, i1 0
+  %nop1560 = alloca i1, i1 0
+  %nop1561 = alloca i1, i1 0
+  %nop1562 = alloca i1, i1 0
+  %nop1563 = alloca i1, i1 0
+  %nop1564 = alloca i1, i1 0
+  %nop1565 = alloca i1, i1 0
+  %nop1566 = alloca i1, i1 0
+  %nop1567 = alloca i1, i1 0
+  %nop1568 = alloca i1, i1 0
+  %nop1569 = alloca i1, i1 0
+  %nop1570 = alloca i1, i1 0
+  %nop1571 = alloca i1, i1 0
+  %nop1572 = alloca i1, i1 0
+  %nop1573 = alloca i1, i1 0
+  %nop1574 = alloca i1, i1 0
+  %nop1575 = alloca i1, i1 0
+  %nop1576 = alloca i1, i1 0
+  %nop1577 = alloca i1, i1 0
+  %nop1578 = alloca i1, i1 0
+  %nop1579 = alloca i1, i1 0
+  %nop1580 = alloca i1, i1 0
+  %nop1581 = alloca i1, i1 0
+  %nop1582 = alloca i1, i1 0
+  %nop1583 = alloca i1, i1 0
+  %nop1584 = alloca i1, i1 0
+  %nop1585 = alloca i1, i1 0
+  %nop1586 = alloca i1, i1 0
+  %nop1587 = alloca i1, i1 0
+  %nop1588 = alloca i1, i1 0
+  %nop1589 = alloca i1, i1 0
+  %nop1590 = alloca i1, i1 0
+  %nop1591 = alloca i1, i1 0
+  %nop1592 = alloca i1, i1 0
+  %nop1593 = alloca i1, i1 0
+  %nop1594 = alloca i1, i1 0
+  %nop1595 = alloca i1, i1 0
+  %nop1596 = alloca i1, i1 0
+  %nop1597 = alloca i1, i1 0
+  %nop1598 = alloca i1, i1 0
+  %nop1599 = alloca i1, i1 0
+  %nop1600 = alloca i1, i1 0
+  %nop1601 = alloca i1, i1 0
+  %nop1602 = alloca i1, i1 0
+  %nop1603 = alloca i1, i1 0
+  %nop1604 = alloca i1, i1 0
+  %nop1605 = alloca i1, i1 0
+  %nop1606 = alloca i1, i1 0
+  %nop1607 = alloca i1, i1 0
+  %nop1608 = alloca i1, i1 0
+  %nop1609 = alloca i1, i1 0
+  %nop1610 = alloca i1, i1 0
+  %nop1611 = alloca i1, i1 0
+  %nop1612 = alloca i1, i1 0
+  %nop1613 = alloca i1, i1 0
+  %nop1614 = alloca i1, i1 0
+  %nop1615 = alloca i1, i1 0
+  %nop1616 = alloca i1, i1 0
+  %nop1617 = alloca i1, i1 0
+  %nop1618 = alloca i1, i1 0
+  %nop1619 = alloca i1, i1 0
+  %nop1620 = alloca i1, i1 0
+  %nop1621 = alloca i1, i1 0
+  %nop1622 = alloca i1, i1 0
+  %nop1623 = alloca i1, i1 0
+  %nop1624 = alloca i1, i1 0
+  %nop1625 = alloca i1, i1 0
+  %nop1626 = alloca i1, i1 0
+  %nop1627 = alloca i1, i1 0
+  %nop1628 = alloca i1, i1 0
+  %nop1629 = alloca i1, i1 0
+  %nop1630 = alloca i1, i1 0
+  %nop1631 = alloca i1, i1 0
+  %nop1632 = alloca i1, i1 0
+  %nop1633 = alloca i1, i1 0
+  %nop1634 = alloca i1, i1 0
+  %nop1635 = alloca i1, i1 0
+  %nop1636 = alloca i1, i1 0
+  %nop1637 = alloca i1, i1 0
+  %nop1638 = alloca i1, i1 0
+  %nop1639 = alloca i1, i1 0
+  %nop1640 = alloca i1, i1 0
+  %nop1641 = alloca i1, i1 0
+  %nop1642 = alloca i1, i1 0
+  %nop1643 = alloca i1, i1 0
+  %nop1644 = alloca i1, i1 0
+  %nop1645 = alloca i1, i1 0
+  %nop1646 = alloca i1, i1 0
+  %nop1647 = alloca i1, i1 0
+  %nop1648 = alloca i1, i1 0
+  %nop1649 = alloca i1, i1 0
+  %nop1650 = alloca i1, i1 0
+  %nop1651 = alloca i1, i1 0
+  %nop1652 = alloca i1, i1 0
+  %nop1653 = alloca i1, i1 0
+  %nop1654 = alloca i1, i1 0
+  %nop1655 = alloca i1, i1 0
+  %nop1656 = alloca i1, i1 0
+  %nop1657 = alloca i1, i1 0
+  %nop1658 = alloca i1, i1 0
+  %nop1659 = alloca i1, i1 0
+  %nop1660 = alloca i1, i1 0
+  %nop1661 = alloca i1, i1 0
+  %nop1662 = alloca i1, i1 0
+  %nop1663 = alloca i1, i1 0
+  %nop1664 = alloca i1, i1 0
+  %nop1665 = alloca i1, i1 0
+  %nop1666 = alloca i1, i1 0
+  %nop1667 = alloca i1, i1 0
+  %nop1668 = alloca i1, i1 0
+  %nop1669 = alloca i1, i1 0
+  %nop1670 = alloca i1, i1 0
+  %nop1671 = alloca i1, i1 0
+  %nop1672 = alloca i1, i1 0
+  %nop1673 = alloca i1, i1 0
+  %nop1674 = alloca i1, i1 0
+  %nop1675 = alloca i1, i1 0
+  %nop1676 = alloca i1, i1 0
+  %nop1677 = alloca i1, i1 0
+  %nop1678 = alloca i1, i1 0
+  %nop1679 = alloca i1, i1 0
+  %nop1680 = alloca i1, i1 0
+  %nop1681 = alloca i1, i1 0
+  %nop1682 = alloca i1, i1 0
+  %nop1683 = alloca i1, i1 0
+  %nop1684 = alloca i1, i1 0
+  %nop1685 = alloca i1, i1 0
+  %nop1686 = alloca i1, i1 0
+  %nop1687 = alloca i1, i1 0
+  %nop1688 = alloca i1, i1 0
+  %nop1689 = alloca i1, i1 0
+  %nop1690 = alloca i1, i1 0
+  %nop1691 = alloca i1, i1 0
+  %nop1692 = alloca i1, i1 0
+  %nop1693 = alloca i1, i1 0
+  %nop1694 = alloca i1, i1 0
+  %nop1695 = alloca i1, i1 0
+  %nop1696 = alloca i1, i1 0
+  %nop1697 = alloca i1, i1 0
+  %nop1698 = alloca i1, i1 0
+  %nop1699 = alloca i1, i1 0
+  %nop1700 = alloca i1, i1 0
+  %nop1701 = alloca i1, i1 0
+  %nop1702 = alloca i1, i1 0
+  %nop1703 = alloca i1, i1 0
+  %nop1704 = alloca i1, i1 0
+  %nop1705 = alloca i1, i1 0
+  %nop1706 = alloca i1, i1 0
+  %nop1707 = alloca i1, i1 0
+  %nop1708 = alloca i1, i1 0
+  %nop1709 = alloca i1, i1 0
+  %nop1710 = alloca i1, i1 0
+  %nop1711 = alloca i1, i1 0
+  %nop1712 = alloca i1, i1 0
+  %nop1713 = alloca i1, i1 0
+  %nop1714 = alloca i1, i1 0
+  %nop1715 = alloca i1, i1 0
+  %nop1716 = alloca i1, i1 0
+  %nop1717 = alloca i1, i1 0
+  %nop1718 = alloca i1, i1 0
+  %nop1719 = alloca i1, i1 0
+  %nop1720 = alloca i1, i1 0
+  %nop1721 = alloca i1, i1 0
+  %nop1722 = alloca i1, i1 0
+  %nop1723 = alloca i1, i1 0
+  %nop1724 = alloca i1, i1 0
+  %nop1725 = alloca i1, i1 0
+  %nop1726 = alloca i1, i1 0
+  %nop1727 = alloca i1, i1 0
+  %nop1728 = alloca i1, i1 0
+  %nop1729 = alloca i1, i1 0
+  %nop1730 = alloca i1, i1 0
+  %nop1731 = alloca i1, i1 0
+  %nop1732 = alloca i1, i1 0
+  %nop1733 = alloca i1, i1 0
+  %nop1734 = alloca i1, i1 0
+  %nop1735 = alloca i1, i1 0
+  %nop1736 = alloca i1, i1 0
+  %nop1737 = alloca i1, i1 0
+  %nop1738 = alloca i1, i1 0
+  %nop1739 = alloca i1, i1 0
+  %nop1740 = alloca i1, i1 0
+  %nop1741 = alloca i1, i1 0
+  %nop1742 = alloca i1, i1 0
+  %nop1743 = alloca i1, i1 0
+  %nop1744 = alloca i1, i1 0
+  %nop1745 = alloca i1, i1 0
+  %nop1746 = alloca i1, i1 0
+  %nop1747 = alloca i1, i1 0
+  %nop1748 = alloca i1, i1 0
+  %nop1749 = alloca i1, i1 0
+  %nop1750 = alloca i1, i1 0
+  %nop1751 = alloca i1, i1 0
+  %nop1752 = alloca i1, i1 0
+  %nop1753 = alloca i1, i1 0
+  %nop1754 = alloca i1, i1 0
+  %nop1755 = alloca i1, i1 0
+  %nop1756 = alloca i1, i1 0
+  %nop1757 = alloca i1, i1 0
+  %nop1758 = alloca i1, i1 0
+  %nop1759 = alloca i1, i1 0
+  %nop1760 = alloca i1, i1 0
+  %nop1761 = alloca i1, i1 0
+  %nop1762 = alloca i1, i1 0
+  %nop1763 = alloca i1, i1 0
+  %nop1764 = alloca i1, i1 0
+  %nop1765 = alloca i1, i1 0
+  %nop1766 = alloca i1, i1 0
+  %nop1767 = alloca i1, i1 0
+  %nop1768 = alloca i1, i1 0
+  %nop1769 = alloca i1, i1 0
+  %nop1770 = alloca i1, i1 0
+  %nop1771 = alloca i1, i1 0
+  %nop1772 = alloca i1, i1 0
+  %nop1773 = alloca i1, i1 0
+  %nop1774 = alloca i1, i1 0
+  %nop1775 = alloca i1, i1 0
+  %nop1776 = alloca i1, i1 0
+  %nop1777 = alloca i1, i1 0
+  %nop1778 = alloca i1, i1 0
+  %nop1779 = alloca i1, i1 0
+  %nop1780 = alloca i1, i1 0
+  %nop1781 = alloca i1, i1 0
+  %nop1782 = alloca i1, i1 0
+  %nop1783 = alloca i1, i1 0
+  %nop1784 = alloca i1, i1 0
+  %nop1785 = alloca i1, i1 0
+  %nop1786 = alloca i1, i1 0
+  %nop1787 = alloca i1, i1 0
+  %nop1788 = alloca i1, i1 0
+  %nop1789 = alloca i1, i1 0
+  %nop1790 = alloca i1, i1 0
+  %nop1791 = alloca i1, i1 0
+  %nop1792 = alloca i1, i1 0
+  %nop1793 = alloca i1, i1 0
+  %nop1794 = alloca i1, i1 0
+  %nop1795 = alloca i1, i1 0
+  %nop1796 = alloca i1, i1 0
+  %nop1797 = alloca i1, i1 0
+  %nop1798 = alloca i1, i1 0
+  %nop1799 = alloca i1, i1 0
+  %nop1800 = alloca i1, i1 0
+  %nop1801 = alloca i1, i1 0
+  %nop1802 = alloca i1, i1 0
+  %nop1803 = alloca i1, i1 0
+  %nop1804 = alloca i1, i1 0
+  %nop1805 = alloca i1, i1 0
+  %nop1806 = alloca i1, i1 0
+  %nop1807 = alloca i1, i1 0
+  %nop1808 = alloca i1, i1 0
+  %nop1809 = alloca i1, i1 0
+  %nop1810 = alloca i1, i1 0
+  %nop1811 = alloca i1, i1 0
+  %nop1812 = alloca i1, i1 0
+  %nop1813 = alloca i1, i1 0
+  %nop1814 = alloca i1, i1 0
+  %nop1815 = alloca i1, i1 0
+  %nop1816 = alloca i1, i1 0
+  %nop1817 = alloca i1, i1 0
+  %nop1818 = alloca i1, i1 0
+  %nop1819 = alloca i1, i1 0
+  %nop1820 = alloca i1, i1 0
+  %nop1821 = alloca i1, i1 0
+  %nop1822 = alloca i1, i1 0
+  %nop1823 = alloca i1, i1 0
+  %nop1824 = alloca i1, i1 0
+  %nop1825 = alloca i1, i1 0
+  %nop1826 = alloca i1, i1 0
+  %nop1827 = alloca i1, i1 0
+  %nop1828 = alloca i1, i1 0
+  %nop1829 = alloca i1, i1 0
+  %nop1830 = alloca i1, i1 0
+  %nop1831 = alloca i1, i1 0
+  %nop1832 = alloca i1, i1 0
+  %nop1833 = alloca i1, i1 0
+  %nop1834 = alloca i1, i1 0
+  %nop1835 = alloca i1, i1 0
+  %nop1836 = alloca i1, i1 0
+  %nop1837 = alloca i1, i1 0
+  %nop1838 = alloca i1, i1 0
+  %nop1839 = alloca i1, i1 0
+  %nop1840 = alloca i1, i1 0
+  %nop1841 = alloca i1, i1 0
+  %nop1842 = alloca i1, i1 0
+  %nop1843 = alloca i1, i1 0
+  %nop1844 = alloca i1, i1 0
+  %nop1845 = alloca i1, i1 0
+  %nop1846 = alloca i1, i1 0
+  %nop1847 = alloca i1, i1 0
+  %nop1848 = alloca i1, i1 0
+  %nop1849 = alloca i1, i1 0
+  %nop1850 = alloca i1, i1 0
+  %nop1851 = alloca i1, i1 0
+  %nop1852 = alloca i1, i1 0
+  %nop1853 = alloca i1, i1 0
+  %nop1854 = alloca i1, i1 0
+  %nop1855 = alloca i1, i1 0
+  %nop1856 = alloca i1, i1 0
+  %nop1857 = alloca i1, i1 0
+  %nop1858 = alloca i1, i1 0
+  %nop1859 = alloca i1, i1 0
+  %nop1860 = alloca i1, i1 0
+  %nop1861 = alloca i1, i1 0
+  %nop1862 = alloca i1, i1 0
+  %nop1863 = alloca i1, i1 0
+  %nop1864 = alloca i1, i1 0
+  %nop1865 = alloca i1, i1 0
+  %nop1866 = alloca i1, i1 0
+  %nop1867 = alloca i1, i1 0
+  %nop1868 = alloca i1, i1 0
+  %nop1869 = alloca i1, i1 0
+  %nop1870 = alloca i1, i1 0
+  %nop1871 = alloca i1, i1 0
+  %nop1872 = alloca i1, i1 0
+  %nop1873 = alloca i1, i1 0
+  %nop1874 = alloca i1, i1 0
+  %nop1875 = alloca i1, i1 0
+  %nop1876 = alloca i1, i1 0
+  %nop1877 = alloca i1, i1 0
+  %nop1878 = alloca i1, i1 0
+  %nop1879 = alloca i1, i1 0
+  %nop1880 = alloca i1, i1 0
+  %nop1881 = alloca i1, i1 0
+  %nop1882 = alloca i1, i1 0
+  %nop1883 = alloca i1, i1 0
+  %nop1884 = alloca i1, i1 0
+  %nop1885 = alloca i1, i1 0
+  %nop1886 = alloca i1, i1 0
+  %nop1887 = alloca i1, i1 0
+  %nop1888 = alloca i1, i1 0
+  %nop1889 = alloca i1, i1 0
+  %nop1890 = alloca i1, i1 0
+  %nop1891 = alloca i1, i1 0
+  %nop1892 = alloca i1, i1 0
+  %nop1893 = alloca i1, i1 0
+  %nop1894 = alloca i1, i1 0
+  %nop1895 = alloca i1, i1 0
+  %nop1896 = alloca i1, i1 0
+  %nop1897 = alloca i1, i1 0
+  %nop1898 = alloca i1, i1 0
+  %nop1899 = alloca i1, i1 0
+  %nop1900 = alloca i1, i1 0
+  %nop1901 = alloca i1, i1 0
+  %nop1902 = alloca i1, i1 0
+  %nop1903 = alloca i1, i1 0
+  %nop1904 = alloca i1, i1 0
+  %nop1905 = alloca i1, i1 0
+  %nop1906 = alloca i1, i1 0
+  %nop1907 = alloca i1, i1 0
+  %nop1908 = alloca i1, i1 0
+  %nop1909 = alloca i1, i1 0
+  %nop1910 = alloca i1, i1 0
+  %nop1911 = alloca i1, i1 0
+  %nop1912 = alloca i1, i1 0
+  %nop1913 = alloca i1, i1 0
+  %nop1914 = alloca i1, i1 0
+  %nop1915 = alloca i1, i1 0
+  %nop1916 = alloca i1, i1 0
+  %nop1917 = alloca i1, i1 0
+  %nop1918 = alloca i1, i1 0
+  %nop1919 = alloca i1, i1 0
+  %nop1920 = alloca i1, i1 0
+  %nop1921 = alloca i1, i1 0
+  %nop1922 = alloca i1, i1 0
+  %nop1923 = alloca i1, i1 0
+  %nop1924 = alloca i1, i1 0
+  %nop1925 = alloca i1, i1 0
+  %nop1926 = alloca i1, i1 0
+  %nop1927 = alloca i1, i1 0
+  %nop1928 = alloca i1, i1 0
+  %nop1929 = alloca i1, i1 0
+  %nop1930 = alloca i1, i1 0
+  %nop1931 = alloca i1, i1 0
+  %nop1932 = alloca i1, i1 0
+  %nop1933 = alloca i1, i1 0
+  %nop1934 = alloca i1, i1 0
+  %nop1935 = alloca i1, i1 0
+  %nop1936 = alloca i1, i1 0
+  %nop1937 = alloca i1, i1 0
+  %nop1938 = alloca i1, i1 0
+  %nop1939 = alloca i1, i1 0
+  %nop1940 = alloca i1, i1 0
+  %nop1941 = alloca i1, i1 0
+  %nop1942 = alloca i1, i1 0
+  %nop1943 = alloca i1, i1 0
+  %nop1944 = alloca i1, i1 0
+  %nop1945 = alloca i1, i1 0
+  %nop1946 = alloca i1, i1 0
+  %nop1947 = alloca i1, i1 0
+  %nop1948 = alloca i1, i1 0
+  %nop1949 = alloca i1, i1 0
+  %nop1950 = alloca i1, i1 0
+  %nop1951 = alloca i1, i1 0
+  %nop1952 = alloca i1, i1 0
+  %nop1953 = alloca i1, i1 0
+  %nop1954 = alloca i1, i1 0
+  %nop1955 = alloca i1, i1 0
+  %nop1956 = alloca i1, i1 0
+  %nop1957 = alloca i1, i1 0
+  %nop1958 = alloca i1, i1 0
+  %nop1959 = alloca i1, i1 0
+  %nop1960 = alloca i1, i1 0
+  %nop1961 = alloca i1, i1 0
+  %nop1962 = alloca i1, i1 0
+  %nop1963 = alloca i1, i1 0
+  %nop1964 = alloca i1, i1 0
+  %nop1965 = alloca i1, i1 0
+  %nop1966 = alloca i1, i1 0
+  %nop1967 = alloca i1, i1 0
+  %nop1968 = alloca i1, i1 0
+  %nop1969 = alloca i1, i1 0
+  %nop1970 = alloca i1, i1 0
+  %nop1971 = alloca i1, i1 0
+  %nop1972 = alloca i1, i1 0
+  %nop1973 = alloca i1, i1 0
+  %nop1974 = alloca i1, i1 0
+  %nop1975 = alloca i1, i1 0
+  %nop1976 = alloca i1, i1 0
+  %nop1977 = alloca i1, i1 0
+  %nop1978 = alloca i1, i1 0
+  %nop1979 = alloca i1, i1 0
+  %nop1980 = alloca i1, i1 0
+  %nop1981 = alloca i1, i1 0
+  %nop1982 = alloca i1, i1 0
+  %nop1983 = alloca i1, i1 0
+  %nop1984 = alloca i1, i1 0
+  %nop1985 = alloca i1, i1 0
+  %nop1986 = alloca i1, i1 0
+  %nop1987 = alloca i1, i1 0
+  %nop1988 = alloca i1, i1 0
+  %nop1989 = alloca i1, i1 0
+  %nop1990 = alloca i1, i1 0
+  %nop1991 = alloca i1, i1 0
+  %nop1992 = alloca i1, i1 0
+  %nop1993 = alloca i1, i1 0
+  %nop1994 = alloca i1, i1 0
+  %nop1995 = alloca i1, i1 0
+  %nop1996 = alloca i1, i1 0
+  %nop1997 = alloca i1, i1 0
+  %nop1998 = alloca i1, i1 0
+  %nop1999 = alloca i1, i1 0
+  %nop2000 = alloca i1, i1 0
+  %nop2001 = alloca i1, i1 0
+  %nop2002 = alloca i1, i1 0
+  %nop2003 = alloca i1, i1 0
+  %nop2004 = alloca i1, i1 0
+  %nop2005 = alloca i1, i1 0
+  %nop2006 = alloca i1, i1 0
+  %nop2007 = alloca i1, i1 0
+  %nop2008 = alloca i1, i1 0
+  %nop2009 = alloca i1, i1 0
+  %nop2010 = alloca i1, i1 0
+  %nop2011 = alloca i1, i1 0
+  %nop2012 = alloca i1, i1 0
+  %nop2013 = alloca i1, i1 0
+  %nop2014 = alloca i1, i1 0
+  %nop2015 = alloca i1, i1 0
+  %nop2016 = alloca i1, i1 0
+  %nop2017 = alloca i1, i1 0
+  %nop2018 = alloca i1, i1 0
+  %nop2019 = alloca i1, i1 0
+  %nop2020 = alloca i1, i1 0
+  %nop2021 = alloca i1, i1 0
+  %nop2022 = alloca i1, i1 0
+  %nop2023 = alloca i1, i1 0
+  %nop2024 = alloca i1, i1 0
+  %nop2025 = alloca i1, i1 0
+  %nop2026 = alloca i1, i1 0
+  %nop2027 = alloca i1, i1 0
+  %nop2028 = alloca i1, i1 0
+  %nop2029 = alloca i1, i1 0
+  %nop2030 = alloca i1, i1 0
+  %nop2031 = alloca i1, i1 0
+  %nop2032 = alloca i1, i1 0
+  %nop2033 = alloca i1, i1 0
+  %nop2034 = alloca i1, i1 0
+  %nop2035 = alloca i1, i1 0
+  %nop2036 = alloca i1, i1 0
+  %nop2037 = alloca i1, i1 0
+  %nop2038 = alloca i1, i1 0
+  %nop2039 = alloca i1, i1 0
+  %nop2040 = alloca i1, i1 0
+  %nop2041 = alloca i1, i1 0
+  %nop2042 = alloca i1, i1 0
+  %nop2043 = alloca i1, i1 0
+  %nop2044 = alloca i1, i1 0
+  %nop2045 = alloca i1, i1 0
+  %nop2046 = alloca i1, i1 0
+  %nop2047 = alloca i1, i1 0
+  %nop2048 = alloca i1, i1 0
+  %nop2049 = alloca i1, i1 0
+  %nop2050 = alloca i1, i1 0
+  %nop2051 = alloca i1, i1 0
+  %nop2052 = alloca i1, i1 0
+  %nop2053 = alloca i1, i1 0
+  %nop2054 = alloca i1, i1 0
+  %nop2055 = alloca i1, i1 0
+  %nop2056 = alloca i1, i1 0
+  %nop2057 = alloca i1, i1 0
+  %nop2058 = alloca i1, i1 0
+  %nop2059 = alloca i1, i1 0
+  %nop2060 = alloca i1, i1 0
+  %nop2061 = alloca i1, i1 0
+  %nop2062 = alloca i1, i1 0
+  %nop2063 = alloca i1, i1 0
+  %nop2064 = alloca i1, i1 0
+  %nop2065 = alloca i1, i1 0
+  %nop2066 = alloca i1, i1 0
+  %nop2067 = alloca i1, i1 0
+  %nop2068 = alloca i1, i1 0
+  %nop2069 = alloca i1, i1 0
+  %nop2070 = alloca i1, i1 0
+  %nop2071 = alloca i1, i1 0
+  %nop2072 = alloca i1, i1 0
+  %nop2073 = alloca i1, i1 0
+  %nop2074 = alloca i1, i1 0
+  %nop2075 = alloca i1, i1 0
+  %nop2076 = alloca i1, i1 0
+  %nop2077 = alloca i1, i1 0
+  %nop2078 = alloca i1, i1 0
+  %nop2079 = alloca i1, i1 0
+  %nop2080 = alloca i1, i1 0
+  %nop2081 = alloca i1, i1 0
+  %nop2082 = alloca i1, i1 0
+  %nop2083 = alloca i1, i1 0
+  %nop2084 = alloca i1, i1 0
+  %nop2085 = alloca i1, i1 0
+  %nop2086 = alloca i1, i1 0
+  %nop2087 = alloca i1, i1 0
+  %nop2088 = alloca i1, i1 0
+  %nop2089 = alloca i1, i1 0
+  %nop2090 = alloca i1, i1 0
+  %nop2091 = alloca i1, i1 0
+  %nop2092 = alloca i1, i1 0
+  %nop2093 = alloca i1, i1 0
+  %nop2094 = alloca i1, i1 0
+  %nop2095 = alloca i1, i1 0
+  %nop2096 = alloca i1, i1 0
+  %nop2097 = alloca i1, i1 0
+  %nop2098 = alloca i1, i1 0
+  %nop2099 = alloca i1, i1 0
+  %nop2100 = alloca i1, i1 0
+  %nop2101 = alloca i1, i1 0
+  %nop2102 = alloca i1, i1 0
+  %nop2103 = alloca i1, i1 0
+  %nop2104 = alloca i1, i1 0
+  %nop2105 = alloca i1, i1 0
+  %nop2106 = alloca i1, i1 0
+  %nop2107 = alloca i1, i1 0
+  %nop2108 = alloca i1, i1 0
+  %nop2109 = alloca i1, i1 0
+  %nop2110 = alloca i1, i1 0
+  %nop2111 = alloca i1, i1 0
+  %nop2112 = alloca i1, i1 0
+  %nop2113 = alloca i1, i1 0
+  %nop2114 = alloca i1, i1 0
+  %nop2115 = alloca i1, i1 0
+  %nop2116 = alloca i1, i1 0
+  %nop2117 = alloca i1, i1 0
+  %nop2118 = alloca i1, i1 0
+  %nop2119 = alloca i1, i1 0
+  %nop2120 = alloca i1, i1 0
+  %nop2121 = alloca i1, i1 0
+  %nop2122 = alloca i1, i1 0
+  %nop2123 = alloca i1, i1 0
+  %nop2124 = alloca i1, i1 0
+  %nop2125 = alloca i1, i1 0
+  %nop2126 = alloca i1, i1 0
+  %nop2127 = alloca i1, i1 0
+  %nop2128 = alloca i1, i1 0
+  %nop2129 = alloca i1, i1 0
+  %nop2130 = alloca i1, i1 0
+  %nop2131 = alloca i1, i1 0
+  %nop2132 = alloca i1, i1 0
+  %nop2133 = alloca i1, i1 0
+  %nop2134 = alloca i1, i1 0
+  %nop2135 = alloca i1, i1 0
+  %nop2136 = alloca i1, i1 0
+  %nop2137 = alloca i1, i1 0
+  %nop2138 = alloca i1, i1 0
+  %nop2139 = alloca i1, i1 0
+  %nop2140 = alloca i1, i1 0
+  %nop2141 = alloca i1, i1 0
+  %nop2142 = alloca i1, i1 0
+  %nop2143 = alloca i1, i1 0
+  %nop2144 = alloca i1, i1 0
+  %nop2145 = alloca i1, i1 0
+  %nop2146 = alloca i1, i1 0
+  %nop2147 = alloca i1, i1 0
+  %nop2148 = alloca i1, i1 0
+  %nop2149 = alloca i1, i1 0
+  %nop2150 = alloca i1, i1 0
+  %nop2151 = alloca i1, i1 0
+  %nop2152 = alloca i1, i1 0
+  %nop2153 = alloca i1, i1 0
+  %nop2154 = alloca i1, i1 0
+  %nop2155 = alloca i1, i1 0
+  %nop2156 = alloca i1, i1 0
+  %nop2157 = alloca i1, i1 0
+  %nop2158 = alloca i1, i1 0
+  %nop2159 = alloca i1, i1 0
+  %nop2160 = alloca i1, i1 0
+  %nop2161 = alloca i1, i1 0
+  %nop2162 = alloca i1, i1 0
+  %nop2163 = alloca i1, i1 0
+  %nop2164 = alloca i1, i1 0
+  %nop2165 = alloca i1, i1 0
+  %nop2166 = alloca i1, i1 0
+  %nop2167 = alloca i1, i1 0
+  %nop2168 = alloca i1, i1 0
+  %nop2169 = alloca i1, i1 0
+  %nop2170 = alloca i1, i1 0
+  %nop2171 = alloca i1, i1 0
+  %nop2172 = alloca i1, i1 0
+  %nop2173 = alloca i1, i1 0
+  %nop2174 = alloca i1, i1 0
+  %nop2175 = alloca i1, i1 0
+  %nop2176 = alloca i1, i1 0
+  %nop2177 = alloca i1, i1 0
+  %nop2178 = alloca i1, i1 0
+  %nop2179 = alloca i1, i1 0
+  %nop2180 = alloca i1, i1 0
+  %nop2181 = alloca i1, i1 0
+  %nop2182 = alloca i1, i1 0
+  %nop2183 = alloca i1, i1 0
+  %nop2184 = alloca i1, i1 0
+  %nop2185 = alloca i1, i1 0
+  %nop2186 = alloca i1, i1 0
+  %nop2187 = alloca i1, i1 0
+  %nop2188 = alloca i1, i1 0
+  %nop2189 = alloca i1, i1 0
+  %nop2190 = alloca i1, i1 0
+  %nop2191 = alloca i1, i1 0
+  %nop2192 = alloca i1, i1 0
+  %nop2193 = alloca i1, i1 0
+  %nop2194 = alloca i1, i1 0
+  %nop2195 = alloca i1, i1 0
+  %nop2196 = alloca i1, i1 0
+  %nop2197 = alloca i1, i1 0
+  %nop2198 = alloca i1, i1 0
+  %nop2199 = alloca i1, i1 0
+  %nop2200 = alloca i1, i1 0
+  %nop2201 = alloca i1, i1 0
+  %nop2202 = alloca i1, i1 0
+  %nop2203 = alloca i1, i1 0
+  %nop2204 = alloca i1, i1 0
+  %nop2205 = alloca i1, i1 0
+  %nop2206 = alloca i1, i1 0
+  %nop2207 = alloca i1, i1 0
+  %nop2208 = alloca i1, i1 0
+  %nop2209 = alloca i1, i1 0
+  %nop2210 = alloca i1, i1 0
+  %nop2211 = alloca i1, i1 0
+  %nop2212 = alloca i1, i1 0
+  %nop2213 = alloca i1, i1 0
+  %nop2214 = alloca i1, i1 0
+  %nop2215 = alloca i1, i1 0
+  %nop2216 = alloca i1, i1 0
+  %nop2217 = alloca i1, i1 0
+  %nop2218 = alloca i1, i1 0
+  %nop2219 = alloca i1, i1 0
+  %nop2220 = alloca i1, i1 0
+  %nop2221 = alloca i1, i1 0
+  %nop2222 = alloca i1, i1 0
+  %nop2223 = alloca i1, i1 0
+  %nop2224 = alloca i1, i1 0
+  %nop2225 = alloca i1, i1 0
+  %nop2226 = alloca i1, i1 0
+  %nop2227 = alloca i1, i1 0
+  %nop2228 = alloca i1, i1 0
+  %nop2229 = alloca i1, i1 0
+  %nop2230 = alloca i1, i1 0
+  %nop2231 = alloca i1, i1 0
+  %nop2232 = alloca i1, i1 0
+  %nop2233 = alloca i1, i1 0
+  %nop2234 = alloca i1, i1 0
+  %nop2235 = alloca i1, i1 0
+  %nop2236 = alloca i1, i1 0
+  %nop2237 = alloca i1, i1 0
+  %nop2238 = alloca i1, i1 0
+  %nop2239 = alloca i1, i1 0
+  %nop2240 = alloca i1, i1 0
+  %nop2241 = alloca i1, i1 0
+  %nop2242 = alloca i1, i1 0
+  %nop2243 = alloca i1, i1 0
+  %nop2244 = alloca i1, i1 0
+  %nop2245 = alloca i1, i1 0
+  %nop2246 = alloca i1, i1 0
+  %nop2247 = alloca i1, i1 0
+  %nop2248 = alloca i1, i1 0
+  %nop2249 = alloca i1, i1 0
+  %nop2250 = alloca i1, i1 0
+  %nop2251 = alloca i1, i1 0
+  %nop2252 = alloca i1, i1 0
+  %nop2253 = alloca i1, i1 0
+  %nop2254 = alloca i1, i1 0
+  %nop2255 = alloca i1, i1 0
+  %nop2256 = alloca i1, i1 0
+  %nop2257 = alloca i1, i1 0
+  %nop2258 = alloca i1, i1 0
+  %nop2259 = alloca i1, i1 0
+  %nop2260 = alloca i1, i1 0
+  %nop2261 = alloca i1, i1 0
+  %nop2262 = alloca i1, i1 0
+  %nop2263 = alloca i1, i1 0
+  %nop2264 = alloca i1, i1 0
+  %nop2265 = alloca i1, i1 0
+  %nop2266 = alloca i1, i1 0
+  %nop2267 = alloca i1, i1 0
+  %nop2268 = alloca i1, i1 0
+  %nop2269 = alloca i1, i1 0
+  %nop2270 = alloca i1, i1 0
+  %nop2271 = alloca i1, i1 0
+  %nop2272 = alloca i1, i1 0
+  %nop2273 = alloca i1, i1 0
+  %nop2274 = alloca i1, i1 0
+  %nop2275 = alloca i1, i1 0
+  %nop2276 = alloca i1, i1 0
+  %nop2277 = alloca i1, i1 0
+  %nop2278 = alloca i1, i1 0
+  %nop2279 = alloca i1, i1 0
+  %nop2280 = alloca i1, i1 0
+  %nop2281 = alloca i1, i1 0
+  %nop2282 = alloca i1, i1 0
+  %nop2283 = alloca i1, i1 0
+  %nop2284 = alloca i1, i1 0
+  %nop2285 = alloca i1, i1 0
+  %nop2286 = alloca i1, i1 0
+  %nop2287 = alloca i1, i1 0
+  %nop2288 = alloca i1, i1 0
+  %nop2289 = alloca i1, i1 0
+  %nop2290 = alloca i1, i1 0
+  %nop2291 = alloca i1, i1 0
+  %nop2292 = alloca i1, i1 0
+  %nop2293 = alloca i1, i1 0
+  %nop2294 = alloca i1, i1 0
+  %nop2295 = alloca i1, i1 0
+  %nop2296 = alloca i1, i1 0
+  %nop2297 = alloca i1, i1 0
+  %nop2298 = alloca i1, i1 0
+  %nop2299 = alloca i1, i1 0
+  %nop2300 = alloca i1, i1 0
+  %nop2301 = alloca i1, i1 0
+  %nop2302 = alloca i1, i1 0
+  %nop2303 = alloca i1, i1 0
+  %nop2304 = alloca i1, i1 0
+  %nop2305 = alloca i1, i1 0
+  %nop2306 = alloca i1, i1 0
+  %nop2307 = alloca i1, i1 0
+  %nop2308 = alloca i1, i1 0
+  %nop2309 = alloca i1, i1 0
+  %nop2310 = alloca i1, i1 0
+  %nop2311 = alloca i1, i1 0
+  %nop2312 = alloca i1, i1 0
+  %nop2313 = alloca i1, i1 0
+  %nop2314 = alloca i1, i1 0
+  %nop2315 = alloca i1, i1 0
+  %nop2316 = alloca i1, i1 0
+  %nop2317 = alloca i1, i1 0
+  %nop2318 = alloca i1, i1 0
+  %nop2319 = alloca i1, i1 0
+  %nop2320 = alloca i1, i1 0
+  %nop2321 = alloca i1, i1 0
+  %nop2322 = alloca i1, i1 0
+  %nop2323 = alloca i1, i1 0
+  %nop2324 = alloca i1, i1 0
+  %nop2325 = alloca i1, i1 0
+  %nop2326 = alloca i1, i1 0
+  %nop2327 = alloca i1, i1 0
+  %nop2328 = alloca i1, i1 0
+  %nop2329 = alloca i1, i1 0
+  %nop2330 = alloca i1, i1 0
+  %nop2331 = alloca i1, i1 0
+  %nop2332 = alloca i1, i1 0
+  %nop2333 = alloca i1, i1 0
+  %nop2334 = alloca i1, i1 0
+  %nop2335 = alloca i1, i1 0
+  %nop2336 = alloca i1, i1 0
+  %nop2337 = alloca i1, i1 0
+  %nop2338 = alloca i1, i1 0
+  %nop2339 = alloca i1, i1 0
+  %nop2340 = alloca i1, i1 0
+  %nop2341 = alloca i1, i1 0
+  %nop2342 = alloca i1, i1 0
+  %nop2343 = alloca i1, i1 0
+  %nop2344 = alloca i1, i1 0
+  %nop2345 = alloca i1, i1 0
+  %nop2346 = alloca i1, i1 0
+  %nop2347 = alloca i1, i1 0
+  %nop2348 = alloca i1, i1 0
+  %nop2349 = alloca i1, i1 0
+  %nop2350 = alloca i1, i1 0
+  %nop2351 = alloca i1, i1 0
+  %nop2352 = alloca i1, i1 0
+  %nop2353 = alloca i1, i1 0
+  %nop2354 = alloca i1, i1 0
+  %nop2355 = alloca i1, i1 0
+  %nop2356 = alloca i1, i1 0
+  %nop2357 = alloca i1, i1 0
+  %nop2358 = alloca i1, i1 0
+  %nop2359 = alloca i1, i1 0
+  %nop2360 = alloca i1, i1 0
+  %nop2361 = alloca i1, i1 0
+  %nop2362 = alloca i1, i1 0
+  %nop2363 = alloca i1, i1 0
+  %nop2364 = alloca i1, i1 0
+  %nop2365 = alloca i1, i1 0
+  %nop2366 = alloca i1, i1 0
+  %nop2367 = alloca i1, i1 0
+  %nop2368 = alloca i1, i1 0
+  %nop2369 = alloca i1, i1 0
+  %nop2370 = alloca i1, i1 0
+  %nop2371 = alloca i1, i1 0
+  %nop2372 = alloca i1, i1 0
+  %nop2373 = alloca i1, i1 0
+  %nop2374 = alloca i1, i1 0
+  %nop2375 = alloca i1, i1 0
+  %nop2376 = alloca i1, i1 0
+  %nop2377 = alloca i1, i1 0
+  %nop2378 = alloca i1, i1 0
+  %nop2379 = alloca i1, i1 0
+  %nop2380 = alloca i1, i1 0
+  %nop2381 = alloca i1, i1 0
+  %nop2382 = alloca i1, i1 0
+  %nop2383 = alloca i1, i1 0
+  %nop2384 = alloca i1, i1 0
+  %nop2385 = alloca i1, i1 0
+  %nop2386 = alloca i1, i1 0
+  %nop2387 = alloca i1, i1 0
+  %nop2388 = alloca i1, i1 0
+  %nop2389 = alloca i1, i1 0
+  %nop2390 = alloca i1, i1 0
+  %nop2391 = alloca i1, i1 0
+  %nop2392 = alloca i1, i1 0
+  %nop2393 = alloca i1, i1 0
+  %nop2394 = alloca i1, i1 0
+  %nop2395 = alloca i1, i1 0
+  %nop2396 = alloca i1, i1 0
+  %nop2397 = alloca i1, i1 0
+  %nop2398 = alloca i1, i1 0
+  %nop2399 = alloca i1, i1 0
+  %nop2400 = alloca i1, i1 0
+  %nop2401 = alloca i1, i1 0
+  %nop2402 = alloca i1, i1 0
+  %nop2403 = alloca i1, i1 0
+  %nop2404 = alloca i1, i1 0
+  %nop2405 = alloca i1, i1 0
+  %nop2406 = alloca i1, i1 0
+  %nop2407 = alloca i1, i1 0
+  %nop2408 = alloca i1, i1 0
+  %nop2409 = alloca i1, i1 0
+  %nop2410 = alloca i1, i1 0
+  %nop2411 = alloca i1, i1 0
+  %nop2412 = alloca i1, i1 0
+  %nop2413 = alloca i1, i1 0
+  %nop2414 = alloca i1, i1 0
+  %nop2415 = alloca i1, i1 0
+  %nop2416 = alloca i1, i1 0
+  %nop2417 = alloca i1, i1 0
+  %nop2418 = alloca i1, i1 0
+  %nop2419 = alloca i1, i1 0
+  %nop2420 = alloca i1, i1 0
+  %nop2421 = alloca i1, i1 0
+  %nop2422 = alloca i1, i1 0
+  %nop2423 = alloca i1, i1 0
+  %nop2424 = alloca i1, i1 0
+  %nop2425 = alloca i1, i1 0
+  %nop2426 = alloca i1, i1 0
+  %nop2427 = alloca i1, i1 0
+  %nop2428 = alloca i1, i1 0
+  %nop2429 = alloca i1, i1 0
+  %nop2430 = alloca i1, i1 0
+  %nop2431 = alloca i1, i1 0
+  %nop2432 = alloca i1, i1 0
+  %nop2433 = alloca i1, i1 0
+  %nop2434 = alloca i1, i1 0
+  %nop2435 = alloca i1, i1 0
+  %nop2436 = alloca i1, i1 0
+  %nop2437 = alloca i1, i1 0
+  %nop2438 = alloca i1, i1 0
+  %nop2439 = alloca i1, i1 0
+  %nop2440 = alloca i1, i1 0
+  %nop2441 = alloca i1, i1 0
+  %nop2442 = alloca i1, i1 0
+  %nop2443 = alloca i1, i1 0
+  %nop2444 = alloca i1, i1 0
+  %nop2445 = alloca i1, i1 0
+  %nop2446 = alloca i1, i1 0
+  %nop2447 = alloca i1, i1 0
+  %nop2448 = alloca i1, i1 0
+  %nop2449 = alloca i1, i1 0
+  %nop2450 = alloca i1, i1 0
+  %nop2451 = alloca i1, i1 0
+  %nop2452 = alloca i1, i1 0
+  %nop2453 = alloca i1, i1 0
+  %nop2454 = alloca i1, i1 0
+  %nop2455 = alloca i1, i1 0
+  %nop2456 = alloca i1, i1 0
+  %nop2457 = alloca i1, i1 0
+  %nop2458 = alloca i1, i1 0
+  %nop2459 = alloca i1, i1 0
+  %nop2460 = alloca i1, i1 0
+  %nop2461 = alloca i1, i1 0
+  %nop2462 = alloca i1, i1 0
+  %nop2463 = alloca i1, i1 0
+  %nop2464 = alloca i1, i1 0
+  %nop2465 = alloca i1, i1 0
+  %nop2466 = alloca i1, i1 0
+  %nop2467 = alloca i1, i1 0
+  %nop2468 = alloca i1, i1 0
+  %nop2469 = alloca i1, i1 0
+  %nop2470 = alloca i1, i1 0
+  %nop2471 = alloca i1, i1 0
+  %nop2472 = alloca i1, i1 0
+  %nop2473 = alloca i1, i1 0
+  %nop2474 = alloca i1, i1 0
+  %nop2475 = alloca i1, i1 0
+  %nop2476 = alloca i1, i1 0
+  %nop2477 = alloca i1, i1 0
+  %nop2478 = alloca i1, i1 0
+  %nop2479 = alloca i1, i1 0
+  %nop2480 = alloca i1, i1 0
+  %nop2481 = alloca i1, i1 0
+  %nop2482 = alloca i1, i1 0
+  %nop2483 = alloca i1, i1 0
+  %nop2484 = alloca i1, i1 0
+  %nop2485 = alloca i1, i1 0
+  %nop2486 = alloca i1, i1 0
+  %nop2487 = alloca i1, i1 0
+  %nop2488 = alloca i1, i1 0
+  %nop2489 = alloca i1, i1 0
+  %nop2490 = alloca i1, i1 0
+  %nop2491 = alloca i1, i1 0
+  %nop2492 = alloca i1, i1 0
+  %nop2493 = alloca i1, i1 0
+  %nop2494 = alloca i1, i1 0
+  %nop2495 = alloca i1, i1 0
+  %nop2496 = alloca i1, i1 0
+  %nop2497 = alloca i1, i1 0
+  %nop2498 = alloca i1, i1 0
+  %nop2499 = alloca i1, i1 0
+  %nop2500 = alloca i1, i1 0
+  %nop2501 = alloca i1, i1 0
+  %nop2502 = alloca i1, i1 0
+  %nop2503 = alloca i1, i1 0
+  %nop2504 = alloca i1, i1 0
+  %nop2505 = alloca i1, i1 0
+  %nop2506 = alloca i1, i1 0
+  %nop2507 = alloca i1, i1 0
+  %nop2508 = alloca i1, i1 0
+  %nop2509 = alloca i1, i1 0
+  %nop2510 = alloca i1, i1 0
+  %nop2511 = alloca i1, i1 0
+  %nop2512 = alloca i1, i1 0
+  %nop2513 = alloca i1, i1 0
+  %nop2514 = alloca i1, i1 0
+  %nop2515 = alloca i1, i1 0
+  %nop2516 = alloca i1, i1 0
+  %nop2517 = alloca i1, i1 0
+  %nop2518 = alloca i1, i1 0
+  %nop2519 = alloca i1, i1 0
+  %nop2520 = alloca i1, i1 0
+  %nop2521 = alloca i1, i1 0
+  %nop2522 = alloca i1, i1 0
+  %nop2523 = alloca i1, i1 0
+  %nop2524 = alloca i1, i1 0
+  %nop2525 = alloca i1, i1 0
+  %nop2526 = alloca i1, i1 0
+  %nop2527 = alloca i1, i1 0
+  %nop2528 = alloca i1, i1 0
+  %nop2529 = alloca i1, i1 0
+  %nop2530 = alloca i1, i1 0
+  %nop2531 = alloca i1, i1 0
+  %nop2532 = alloca i1, i1 0
+  %nop2533 = alloca i1, i1 0
+  %nop2534 = alloca i1, i1 0
+  %nop2535 = alloca i1, i1 0
+  %nop2536 = alloca i1, i1 0
+  %nop2537 = alloca i1, i1 0
+  %nop2538 = alloca i1, i1 0
+  %nop2539 = alloca i1, i1 0
+  %nop2540 = alloca i1, i1 0
+  %nop2541 = alloca i1, i1 0
+  %nop2542 = alloca i1, i1 0
+  %nop2543 = alloca i1, i1 0
+  %nop2544 = alloca i1, i1 0
+  %nop2545 = alloca i1, i1 0
+  %nop2546 = alloca i1, i1 0
+  %nop2547 = alloca i1, i1 0
+  %nop2548 = alloca i1, i1 0
+  %nop2549 = alloca i1, i1 0
+  %nop2550 = alloca i1, i1 0
+  %nop2551 = alloca i1, i1 0
+  %nop2552 = alloca i1, i1 0
+  %nop2553 = alloca i1, i1 0
+  %nop2554 = alloca i1, i1 0
+  %nop2555 = alloca i1, i1 0
+  %nop2556 = alloca i1, i1 0
+  %nop2557 = alloca i1, i1 0
+  %nop2558 = alloca i1, i1 0
+  %nop2559 = alloca i1, i1 0
+  %nop2560 = alloca i1, i1 0
+  %nop2561 = alloca i1, i1 0
+  %nop2562 = alloca i1, i1 0
+  %nop2563 = alloca i1, i1 0
+  %nop2564 = alloca i1, i1 0
+  %nop2565 = alloca i1, i1 0
+  %nop2566 = alloca i1, i1 0
+  %nop2567 = alloca i1, i1 0
+  %nop2568 = alloca i1, i1 0
+  %nop2569 = alloca i1, i1 0
+  %nop2570 = alloca i1, i1 0
+  %nop2571 = alloca i1, i1 0
+  %nop2572 = alloca i1, i1 0
+  %nop2573 = alloca i1, i1 0
+  %nop2574 = alloca i1, i1 0
+  %nop2575 = alloca i1, i1 0
+  %nop2576 = alloca i1, i1 0
+  %nop2577 = alloca i1, i1 0
+  %nop2578 = alloca i1, i1 0
+  %nop2579 = alloca i1, i1 0
+  %nop2580 = alloca i1, i1 0
+  %nop2581 = alloca i1, i1 0
+  %nop2582 = alloca i1, i1 0
+  %nop2583 = alloca i1, i1 0
+  %nop2584 = alloca i1, i1 0
+  %nop2585 = alloca i1, i1 0
+  %nop2586 = alloca i1, i1 0
+  %nop2587 = alloca i1, i1 0
+  %nop2588 = alloca i1, i1 0
+  %nop2589 = alloca i1, i1 0
+  %nop2590 = alloca i1, i1 0
+  %nop2591 = alloca i1, i1 0
+  %nop2592 = alloca i1, i1 0
+  %nop2593 = alloca i1, i1 0
+  %nop2594 = alloca i1, i1 0
+  %nop2595 = alloca i1, i1 0
+  %nop2596 = alloca i1, i1 0
+  %nop2597 = alloca i1, i1 0
+  %nop2598 = alloca i1, i1 0
+  %nop2599 = alloca i1, i1 0
+  %nop2600 = alloca i1, i1 0
+  %nop2601 = alloca i1, i1 0
+  %nop2602 = alloca i1, i1 0
+  %nop2603 = alloca i1, i1 0
+  %nop2604 = alloca i1, i1 0
+  %nop2605 = alloca i1, i1 0
+  %nop2606 = alloca i1, i1 0
+  %nop2607 = alloca i1, i1 0
+  %nop2608 = alloca i1, i1 0
+  %nop2609 = alloca i1, i1 0
+  %nop2610 = alloca i1, i1 0
+  %nop2611 = alloca i1, i1 0
+  %nop2612 = alloca i1, i1 0
+  %nop2613 = alloca i1, i1 0
+  %nop2614 = alloca i1, i1 0
+  %nop2615 = alloca i1, i1 0
+  %nop2616 = alloca i1, i1 0
+  %nop2617 = alloca i1, i1 0
+  %nop2618 = alloca i1, i1 0
+  %nop2619 = alloca i1, i1 0
+  %nop2620 = alloca i1, i1 0
+  %nop2621 = alloca i1, i1 0
+  %nop2622 = alloca i1, i1 0
+  %nop2623 = alloca i1, i1 0
+  %nop2624 = alloca i1, i1 0
+  %nop2625 = alloca i1, i1 0
+  %nop2626 = alloca i1, i1 0
+  %nop2627 = alloca i1, i1 0
+  %nop2628 = alloca i1, i1 0
+  %nop2629 = alloca i1, i1 0
+  %nop2630 = alloca i1, i1 0
+  %nop2631 = alloca i1, i1 0
+  %nop2632 = alloca i1, i1 0
+  %nop2633 = alloca i1, i1 0
+  %nop2634 = alloca i1, i1 0
+  %nop2635 = alloca i1, i1 0
+  %nop2636 = alloca i1, i1 0
+  %nop2637 = alloca i1, i1 0
+  %nop2638 = alloca i1, i1 0
+  %nop2639 = alloca i1, i1 0
+  %nop2640 = alloca i1, i1 0
+  %nop2641 = alloca i1, i1 0
+  %nop2642 = alloca i1, i1 0
+  %nop2643 = alloca i1, i1 0
+  %nop2644 = alloca i1, i1 0
+  %nop2645 = alloca i1, i1 0
+  %nop2646 = alloca i1, i1 0
+  %nop2647 = alloca i1, i1 0
+  %nop2648 = alloca i1, i1 0
+  %nop2649 = alloca i1, i1 0
+  %nop2650 = alloca i1, i1 0
+  %nop2651 = alloca i1, i1 0
+  %nop2652 = alloca i1, i1 0
+  %nop2653 = alloca i1, i1 0
+  %nop2654 = alloca i1, i1 0
+  %nop2655 = alloca i1, i1 0
+  %nop2656 = alloca i1, i1 0
+  %nop2657 = alloca i1, i1 0
+  %nop2658 = alloca i1, i1 0
+  %nop2659 = alloca i1, i1 0
+  %nop2660 = alloca i1, i1 0
+  %nop2661 = alloca i1, i1 0
+  %nop2662 = alloca i1, i1 0
+  %nop2663 = alloca i1, i1 0
+  %nop2664 = alloca i1, i1 0
+  %nop2665 = alloca i1, i1 0
+  %nop2666 = alloca i1, i1 0
+  %nop2667 = alloca i1, i1 0
+  %nop2668 = alloca i1, i1 0
+  %nop2669 = alloca i1, i1 0
+  %nop2670 = alloca i1, i1 0
+  %nop2671 = alloca i1, i1 0
+  %nop2672 = alloca i1, i1 0
+  %nop2673 = alloca i1, i1 0
+  %nop2674 = alloca i1, i1 0
+  %nop2675 = alloca i1, i1 0
+  %nop2676 = alloca i1, i1 0
+  %nop2677 = alloca i1, i1 0
+  %nop2678 = alloca i1, i1 0
+  %nop2679 = alloca i1, i1 0
+  %nop2680 = alloca i1, i1 0
+  %nop2681 = alloca i1, i1 0
+  %nop2682 = alloca i1, i1 0
+  %nop2683 = alloca i1, i1 0
+  %nop2684 = alloca i1, i1 0
+  %nop2685 = alloca i1, i1 0
+  %nop2686 = alloca i1, i1 0
+  %nop2687 = alloca i1, i1 0
+  %nop2688 = alloca i1, i1 0
+  %nop2689 = alloca i1, i1 0
+  %nop2690 = alloca i1, i1 0
+  %nop2691 = alloca i1, i1 0
+  %nop2692 = alloca i1, i1 0
+  %nop2693 = alloca i1, i1 0
+  %nop2694 = alloca i1, i1 0
+  %nop2695 = alloca i1, i1 0
+  %nop2696 = alloca i1, i1 0
+  %nop2697 = alloca i1, i1 0
+  %nop2698 = alloca i1, i1 0
+  %nop2699 = alloca i1, i1 0
+  %nop2700 = alloca i1, i1 0
+  %nop2701 = alloca i1, i1 0
+  %nop2702 = alloca i1, i1 0
+  %nop2703 = alloca i1, i1 0
+  %nop2704 = alloca i1, i1 0
+  %nop2705 = alloca i1, i1 0
+  %nop2706 = alloca i1, i1 0
+  %nop2707 = alloca i1, i1 0
+  %nop2708 = alloca i1, i1 0
+  %nop2709 = alloca i1, i1 0
+  %nop2710 = alloca i1, i1 0
+  %nop2711 = alloca i1, i1 0
+  %nop2712 = alloca i1, i1 0
+  %nop2713 = alloca i1, i1 0
+  %nop2714 = alloca i1, i1 0
+  %nop2715 = alloca i1, i1 0
+  %nop2716 = alloca i1, i1 0
+  %nop2717 = alloca i1, i1 0
+  %nop2718 = alloca i1, i1 0
+  %nop2719 = alloca i1, i1 0
+  %nop2720 = alloca i1, i1 0
+  %nop2721 = alloca i1, i1 0
+  %nop2722 = alloca i1, i1 0
+  %nop2723 = alloca i1, i1 0
+  %nop2724 = alloca i1, i1 0
+  %nop2725 = alloca i1, i1 0
+  %nop2726 = alloca i1, i1 0
+  %nop2727 = alloca i1, i1 0
+  %nop2728 = alloca i1, i1 0
+  %nop2729 = alloca i1, i1 0
+  %nop2730 = alloca i1, i1 0
+  %nop2731 = alloca i1, i1 0
+  %nop2732 = alloca i1, i1 0
+  %nop2733 = alloca i1, i1 0
+  %nop2734 = alloca i1, i1 0
+  %nop2735 = alloca i1, i1 0
+  %nop2736 = alloca i1, i1 0
+  %nop2737 = alloca i1, i1 0
+  %nop2738 = alloca i1, i1 0
+  %nop2739 = alloca i1, i1 0
+  %nop2740 = alloca i1, i1 0
+  %nop2741 = alloca i1, i1 0
+  %nop2742 = alloca i1, i1 0
+  %nop2743 = alloca i1, i1 0
+  %nop2744 = alloca i1, i1 0
+  %nop2745 = alloca i1, i1 0
+  %nop2746 = alloca i1, i1 0
+  %nop2747 = alloca i1, i1 0
+  %nop2748 = alloca i1, i1 0
+  %nop2749 = alloca i1, i1 0
+  %nop2750 = alloca i1, i1 0
+  %nop2751 = alloca i1, i1 0
+  %nop2752 = alloca i1, i1 0
+  %nop2753 = alloca i1, i1 0
+  %nop2754 = alloca i1, i1 0
+  %nop2755 = alloca i1, i1 0
+  %nop2756 = alloca i1, i1 0
+  %nop2757 = alloca i1, i1 0
+  %nop2758 = alloca i1, i1 0
+  %nop2759 = alloca i1, i1 0
+  %nop2760 = alloca i1, i1 0
+  %nop2761 = alloca i1, i1 0
+  %nop2762 = alloca i1, i1 0
+  %nop2763 = alloca i1, i1 0
+  %nop2764 = alloca i1, i1 0
+  %nop2765 = alloca i1, i1 0
+  %nop2766 = alloca i1, i1 0
+  %nop2767 = alloca i1, i1 0
+  %nop2768 = alloca i1, i1 0
+  %nop2769 = alloca i1, i1 0
+  %nop2770 = alloca i1, i1 0
+  %nop2771 = alloca i1, i1 0
+  %nop2772 = alloca i1, i1 0
+  %nop2773 = alloca i1, i1 0
+  %nop2774 = alloca i1, i1 0
+  %nop2775 = alloca i1, i1 0
+  %nop2776 = alloca i1, i1 0
+  %nop2777 = alloca i1, i1 0
+  %nop2778 = alloca i1, i1 0
+  %nop2779 = alloca i1, i1 0
+  %nop2780 = alloca i1, i1 0
+  %nop2781 = alloca i1, i1 0
+  %nop2782 = alloca i1, i1 0
+  %nop2783 = alloca i1, i1 0
+  %nop2784 = alloca i1, i1 0
+  %nop2785 = alloca i1, i1 0
+  %nop2786 = alloca i1, i1 0
+  %nop2787 = alloca i1, i1 0
+  %nop2788 = alloca i1, i1 0
+  %nop2789 = alloca i1, i1 0
+  %nop2790 = alloca i1, i1 0
+  %nop2791 = alloca i1, i1 0
+  %nop2792 = alloca i1, i1 0
+  %nop2793 = alloca i1, i1 0
+  %nop2794 = alloca i1, i1 0
+  %nop2795 = alloca i1, i1 0
+  %nop2796 = alloca i1, i1 0
+  %nop2797 = alloca i1, i1 0
+  %nop2798 = alloca i1, i1 0
+  %nop2799 = alloca i1, i1 0
+  %nop2800 = alloca i1, i1 0
+  %nop2801 = alloca i1, i1 0
+  %nop2802 = alloca i1, i1 0
+  %nop2803 = alloca i1, i1 0
+  %nop2804 = alloca i1, i1 0
+  %nop2805 = alloca i1, i1 0
+  %nop2806 = alloca i1, i1 0
+  %nop2807 = alloca i1, i1 0
+  %nop2808 = alloca i1, i1 0
+  %nop2809 = alloca i1, i1 0
+  %nop2810 = alloca i1, i1 0
+  %nop2811 = alloca i1, i1 0
+  %nop2812 = alloca i1, i1 0
+  %nop2813 = alloca i1, i1 0
+  %nop2814 = alloca i1, i1 0
+  %nop2815 = alloca i1, i1 0
+  %nop2816 = alloca i1, i1 0
+  %nop2817 = alloca i1, i1 0
+  %nop2818 = alloca i1, i1 0
+  %nop2819 = alloca i1, i1 0
+  %nop2820 = alloca i1, i1 0
+  %nop2821 = alloca i1, i1 0
+  %nop2822 = alloca i1, i1 0
+  %nop2823 = alloca i1, i1 0
+  %nop2824 = alloca i1, i1 0
+  %nop2825 = alloca i1, i1 0
+  %nop2826 = alloca i1, i1 0
+  %nop2827 = alloca i1, i1 0
+  %nop2828 = alloca i1, i1 0
+  %nop2829 = alloca i1, i1 0
+  %nop2830 = alloca i1, i1 0
+  %nop2831 = alloca i1, i1 0
+  %nop2832 = alloca i1, i1 0
+  %nop2833 = alloca i1, i1 0
+  %nop2834 = alloca i1, i1 0
+  %nop2835 = alloca i1, i1 0
+  %nop2836 = alloca i1, i1 0
+  %nop2837 = alloca i1, i1 0
+  %nop2838 = alloca i1, i1 0
+  %nop2839 = alloca i1, i1 0
+  %nop2840 = alloca i1, i1 0
+  %nop2841 = alloca i1, i1 0
+  %nop2842 = alloca i1, i1 0
+  %nop2843 = alloca i1, i1 0
+  %nop2844 = alloca i1, i1 0
+  %nop2845 = alloca i1, i1 0
+  %nop2846 = alloca i1, i1 0
+  %nop2847 = alloca i1, i1 0
+  %nop2848 = alloca i1, i1 0
+  %nop2849 = alloca i1, i1 0
+  %nop2850 = alloca i1, i1 0
+  %nop2851 = alloca i1, i1 0
+  %nop2852 = alloca i1, i1 0
+  %nop2853 = alloca i1, i1 0
+  %nop2854 = alloca i1, i1 0
+  %nop2855 = alloca i1, i1 0
+  %nop2856 = alloca i1, i1 0
+  %nop2857 = alloca i1, i1 0
+  %nop2858 = alloca i1, i1 0
+  %nop2859 = alloca i1, i1 0
+  %nop2860 = alloca i1, i1 0
+  %nop2861 = alloca i1, i1 0
+  %nop2862 = alloca i1, i1 0
+  %nop2863 = alloca i1, i1 0
+  %nop2864 = alloca i1, i1 0
+  %nop2865 = alloca i1, i1 0
+  %nop2866 = alloca i1, i1 0
+  %nop2867 = alloca i1, i1 0
+  %nop2868 = alloca i1, i1 0
+  %nop2869 = alloca i1, i1 0
+  %nop2870 = alloca i1, i1 0
+  %nop2871 = alloca i1, i1 0
+  %nop2872 = alloca i1, i1 0
+  %nop2873 = alloca i1, i1 0
+  %nop2874 = alloca i1, i1 0
+  %nop2875 = alloca i1, i1 0
+  %nop2876 = alloca i1, i1 0
+  %nop2877 = alloca i1, i1 0
+  %nop2878 = alloca i1, i1 0
+  %nop2879 = alloca i1, i1 0
+  %nop2880 = alloca i1, i1 0
+  %nop2881 = alloca i1, i1 0
+  %nop2882 = alloca i1, i1 0
+  %nop2883 = alloca i1, i1 0
+  %nop2884 = alloca i1, i1 0
+  %nop2885 = alloca i1, i1 0
+  %nop2886 = alloca i1, i1 0
+  %nop2887 = alloca i1, i1 0
+  %nop2888 = alloca i1, i1 0
+  %nop2889 = alloca i1, i1 0
+  %nop2890 = alloca i1, i1 0
+  %nop2891 = alloca i1, i1 0
+  %nop2892 = alloca i1, i1 0
+  %nop2893 = alloca i1, i1 0
+  %nop2894 = alloca i1, i1 0
+  %nop2895 = alloca i1, i1 0
+  %nop2896 = alloca i1, i1 0
+  %nop2897 = alloca i1, i1 0
+  %nop2898 = alloca i1, i1 0
+  %nop2899 = alloca i1, i1 0
+  %nop2900 = alloca i1, i1 0
+  %nop2901 = alloca i1, i1 0
+  %nop2902 = alloca i1, i1 0
+  %nop2903 = alloca i1, i1 0
+  %nop2904 = alloca i1, i1 0
+  %nop2905 = alloca i1, i1 0
+  %nop2906 = alloca i1, i1 0
+  %nop2907 = alloca i1, i1 0
+  %nop2908 = alloca i1, i1 0
+  %nop2909 = alloca i1, i1 0
+  %nop2910 = alloca i1, i1 0
+  %nop2911 = alloca i1, i1 0
+  %nop2912 = alloca i1, i1 0
+  %nop2913 = alloca i1, i1 0
+  %nop2914 = alloca i1, i1 0
+  %nop2915 = alloca i1, i1 0
+  %nop2916 = alloca i1, i1 0
+  %nop2917 = alloca i1, i1 0
+  %nop2918 = alloca i1, i1 0
+  %nop2919 = alloca i1, i1 0
+  %nop2920 = alloca i1, i1 0
+  %nop2921 = alloca i1, i1 0
+  %nop2922 = alloca i1, i1 0
+  %nop2923 = alloca i1, i1 0
+  %nop2924 = alloca i1, i1 0
+  %nop2925 = alloca i1, i1 0
+  %nop2926 = alloca i1, i1 0
+  %nop2927 = alloca i1, i1 0
+  %nop2928 = alloca i1, i1 0
+  %nop2929 = alloca i1, i1 0
+  %nop2930 = alloca i1, i1 0
+  %nop2931 = alloca i1, i1 0
+  %nop2932 = alloca i1, i1 0
+  %nop2933 = alloca i1, i1 0
+  %nop2934 = alloca i1, i1 0
+  %nop2935 = alloca i1, i1 0
+  %nop2936 = alloca i1, i1 0
+  %nop2937 = alloca i1, i1 0
+  %nop2938 = alloca i1, i1 0
+  %nop2939 = alloca i1, i1 0
+  %nop2940 = alloca i1, i1 0
+  %nop2941 = alloca i1, i1 0
+  %nop2942 = alloca i1, i1 0
+  %nop2943 = alloca i1, i1 0
+  %nop2944 = alloca i1, i1 0
+  %nop2945 = alloca i1, i1 0
+  %nop2946 = alloca i1, i1 0
+  %nop2947 = alloca i1, i1 0
+  %nop2948 = alloca i1, i1 0
+  %nop2949 = alloca i1, i1 0
+  %nop2950 = alloca i1, i1 0
+  %nop2951 = alloca i1, i1 0
+  %nop2952 = alloca i1, i1 0
+  %nop2953 = alloca i1, i1 0
+  %nop2954 = alloca i1, i1 0
+  %nop2955 = alloca i1, i1 0
+  %nop2956 = alloca i1, i1 0
+  %nop2957 = alloca i1, i1 0
+  %nop2958 = alloca i1, i1 0
+  %nop2959 = alloca i1, i1 0
+  %nop2960 = alloca i1, i1 0
+  %nop2961 = alloca i1, i1 0
+  %nop2962 = alloca i1, i1 0
+  %nop2963 = alloca i1, i1 0
+  %nop2964 = alloca i1, i1 0
+  %nop2965 = alloca i1, i1 0
+  %nop2966 = alloca i1, i1 0
+  %nop2967 = alloca i1, i1 0
+  %nop2968 = alloca i1, i1 0
+  %nop2969 = alloca i1, i1 0
+  %nop2970 = alloca i1, i1 0
+  %nop2971 = alloca i1, i1 0
+  %nop2972 = alloca i1, i1 0
+  %nop2973 = alloca i1, i1 0
+  %nop2974 = alloca i1, i1 0
+  %nop2975 = alloca i1, i1 0
+  %nop2976 = alloca i1, i1 0
+  %nop2977 = alloca i1, i1 0
+  %nop2978 = alloca i1, i1 0
+  %nop2979 = alloca i1, i1 0
+  %nop2980 = alloca i1, i1 0
+  %nop2981 = alloca i1, i1 0
+  %nop2982 = alloca i1, i1 0
+  %nop2983 = alloca i1, i1 0
+  %nop2984 = alloca i1, i1 0
+  %nop2985 = alloca i1, i1 0
+  %nop2986 = alloca i1, i1 0
+  %nop2987 = alloca i1, i1 0
+  %nop2988 = alloca i1, i1 0
+  %nop2989 = alloca i1, i1 0
+  %nop2990 = alloca i1, i1 0
+  %nop2991 = alloca i1, i1 0
+  %nop2992 = alloca i1, i1 0
+  %nop2993 = alloca i1, i1 0
+  %nop2994 = alloca i1, i1 0
+  %nop2995 = alloca i1, i1 0
+  %nop2996 = alloca i1, i1 0
+  %nop2997 = alloca i1, i1 0
+  %nop2998 = alloca i1, i1 0
+  %nop2999 = alloca i1, i1 0
+  %nop3000 = alloca i1, i1 0
+  %nop3001 = alloca i1, i1 0
+  %nop3002 = alloca i1, i1 0
+  %nop3003 = alloca i1, i1 0
+  %nop3004 = alloca i1, i1 0
+  %nop3005 = alloca i1, i1 0
+  %nop3006 = alloca i1, i1 0
+  %nop3007 = alloca i1, i1 0
+  %nop3008 = alloca i1, i1 0
+  %nop3009 = alloca i1, i1 0
+  %nop3010 = alloca i1, i1 0
+  %nop3011 = alloca i1, i1 0
+  %nop3012 = alloca i1, i1 0
+  %nop3013 = alloca i1, i1 0
+  %nop3014 = alloca i1, i1 0
+  %nop3015 = alloca i1, i1 0
+  %nop3016 = alloca i1, i1 0
+  %nop3017 = alloca i1, i1 0
+  %nop3018 = alloca i1, i1 0
+  %nop3019 = alloca i1, i1 0
+  %nop3020 = alloca i1, i1 0
+  %nop3021 = alloca i1, i1 0
+  %nop3022 = alloca i1, i1 0
+  %nop3023 = alloca i1, i1 0
+  %nop3024 = alloca i1, i1 0
+  %nop3025 = alloca i1, i1 0
+  %nop3026 = alloca i1, i1 0
+  %nop3027 = alloca i1, i1 0
+  %nop3028 = alloca i1, i1 0
+  %nop3029 = alloca i1, i1 0
+  %nop3030 = alloca i1, i1 0
+  %nop3031 = alloca i1, i1 0
+  %nop3032 = alloca i1, i1 0
+  %nop3033 = alloca i1, i1 0
+  %nop3034 = alloca i1, i1 0
+  %nop3035 = alloca i1, i1 0
+  %nop3036 = alloca i1, i1 0
+  %nop3037 = alloca i1, i1 0
+  %nop3038 = alloca i1, i1 0
+  %nop3039 = alloca i1, i1 0
+  %nop3040 = alloca i1, i1 0
+  %nop3041 = alloca i1, i1 0
+  %nop3042 = alloca i1, i1 0
+  %nop3043 = alloca i1, i1 0
+  %nop3044 = alloca i1, i1 0
+  %nop3045 = alloca i1, i1 0
+  %nop3046 = alloca i1, i1 0
+  %nop3047 = alloca i1, i1 0
+  %nop3048 = alloca i1, i1 0
+  %nop3049 = alloca i1, i1 0
+  %nop3050 = alloca i1, i1 0
+  %nop3051 = alloca i1, i1 0
+  %nop3052 = alloca i1, i1 0
+  %nop3053 = alloca i1, i1 0
+  %nop3054 = alloca i1, i1 0
+  %nop3055 = alloca i1, i1 0
+  %nop3056 = alloca i1, i1 0
+  %nop3057 = alloca i1, i1 0
+  %nop3058 = alloca i1, i1 0
+  %nop3059 = alloca i1, i1 0
+  %nop3060 = alloca i1, i1 0
+  %nop3061 = alloca i1, i1 0
+  %nop3062 = alloca i1, i1 0
+  %nop3063 = alloca i1, i1 0
+  %nop3064 = alloca i1, i1 0
+  %nop3065 = alloca i1, i1 0
+  %nop3066 = alloca i1, i1 0
+  %nop3067 = alloca i1, i1 0
+  %nop3068 = alloca i1, i1 0
+  %nop3069 = alloca i1, i1 0
+  %nop3070 = alloca i1, i1 0
+  %nop3071 = alloca i1, i1 0
+  %nop3072 = alloca i1, i1 0
+  %nop3073 = alloca i1, i1 0
+  %nop3074 = alloca i1, i1 0
+  %nop3075 = alloca i1, i1 0
+  %nop3076 = alloca i1, i1 0
+  %nop3077 = alloca i1, i1 0
+  %nop3078 = alloca i1, i1 0
+  %nop3079 = alloca i1, i1 0
+  %nop3080 = alloca i1, i1 0
+  %nop3081 = alloca i1, i1 0
+  %nop3082 = alloca i1, i1 0
+  %nop3083 = alloca i1, i1 0
+  %nop3084 = alloca i1, i1 0
+  %nop3085 = alloca i1, i1 0
+  %nop3086 = alloca i1, i1 0
+  %nop3087 = alloca i1, i1 0
+  %nop3088 = alloca i1, i1 0
+  %nop3089 = alloca i1, i1 0
+  %nop3090 = alloca i1, i1 0
+  %nop3091 = alloca i1, i1 0
+  %nop3092 = alloca i1, i1 0
+  %nop3093 = alloca i1, i1 0
+  %nop3094 = alloca i1, i1 0
+  %nop3095 = alloca i1, i1 0
+  %nop3096 = alloca i1, i1 0
+  %nop3097 = alloca i1, i1 0
+  %nop3098 = alloca i1, i1 0
+  %nop3099 = alloca i1, i1 0
+  %nop3100 = alloca i1, i1 0
+  %nop3101 = alloca i1, i1 0
+  %nop3102 = alloca i1, i1 0
+  %nop3103 = alloca i1, i1 0
+  %nop3104 = alloca i1, i1 0
+  %nop3105 = alloca i1, i1 0
+  %nop3106 = alloca i1, i1 0
+  %nop3107 = alloca i1, i1 0
+  %nop3108 = alloca i1, i1 0
+  %nop3109 = alloca i1, i1 0
+  %nop3110 = alloca i1, i1 0
+  %nop3111 = alloca i1, i1 0
+  %nop3112 = alloca i1, i1 0
+  %nop3113 = alloca i1, i1 0
+  %nop3114 = alloca i1, i1 0
+  %nop3115 = alloca i1, i1 0
+  %nop3116 = alloca i1, i1 0
+  %nop3117 = alloca i1, i1 0
+  %nop3118 = alloca i1, i1 0
+  %nop3119 = alloca i1, i1 0
+  %nop3120 = alloca i1, i1 0
+  %nop3121 = alloca i1, i1 0
+  %nop3122 = alloca i1, i1 0
+  %nop3123 = alloca i1, i1 0
+  %nop3124 = alloca i1, i1 0
+  %nop3125 = alloca i1, i1 0
+  %nop3126 = alloca i1, i1 0
+  %nop3127 = alloca i1, i1 0
+  %nop3128 = alloca i1, i1 0
+  %nop3129 = alloca i1, i1 0
+  %nop3130 = alloca i1, i1 0
+  %nop3131 = alloca i1, i1 0
+  %nop3132 = alloca i1, i1 0
+  %nop3133 = alloca i1, i1 0
+  %nop3134 = alloca i1, i1 0
+  %nop3135 = alloca i1, i1 0
+  %nop3136 = alloca i1, i1 0
+  %nop3137 = alloca i1, i1 0
+  %nop3138 = alloca i1, i1 0
+  %nop3139 = alloca i1, i1 0
+  %nop3140 = alloca i1, i1 0
+  %nop3141 = alloca i1, i1 0
+  %nop3142 = alloca i1, i1 0
+  %nop3143 = alloca i1, i1 0
+  %nop3144 = alloca i1, i1 0
+  %nop3145 = alloca i1, i1 0
+  %nop3146 = alloca i1, i1 0
+  %nop3147 = alloca i1, i1 0
+  %nop3148 = alloca i1, i1 0
+  %nop3149 = alloca i1, i1 0
+  %nop3150 = alloca i1, i1 0
+  %nop3151 = alloca i1, i1 0
+  %nop3152 = alloca i1, i1 0
+  %nop3153 = alloca i1, i1 0
+  %nop3154 = alloca i1, i1 0
+  %nop3155 = alloca i1, i1 0
+  %nop3156 = alloca i1, i1 0
+  %nop3157 = alloca i1, i1 0
+  %nop3158 = alloca i1, i1 0
+  %nop3159 = alloca i1, i1 0
+  %nop3160 = alloca i1, i1 0
+  %nop3161 = alloca i1, i1 0
+  %nop3162 = alloca i1, i1 0
+  %nop3163 = alloca i1, i1 0
+  %nop3164 = alloca i1, i1 0
+  %nop3165 = alloca i1, i1 0
+  %nop3166 = alloca i1, i1 0
+  %nop3167 = alloca i1, i1 0
+  %nop3168 = alloca i1, i1 0
+  %nop3169 = alloca i1, i1 0
+  %nop3170 = alloca i1, i1 0
+  %nop3171 = alloca i1, i1 0
+  %nop3172 = alloca i1, i1 0
+  %nop3173 = alloca i1, i1 0
+  %nop3174 = alloca i1, i1 0
+  %nop3175 = alloca i1, i1 0
+  %nop3176 = alloca i1, i1 0
+  %nop3177 = alloca i1, i1 0
+  %nop3178 = alloca i1, i1 0
+  %nop3179 = alloca i1, i1 0
+  %nop3180 = alloca i1, i1 0
+  %nop3181 = alloca i1, i1 0
+  %nop3182 = alloca i1, i1 0
+  %nop3183 = alloca i1, i1 0
+  %nop3184 = alloca i1, i1 0
+  %nop3185 = alloca i1, i1 0
+  %nop3186 = alloca i1, i1 0
+  %nop3187 = alloca i1, i1 0
+  %nop3188 = alloca i1, i1 0
+  %nop3189 = alloca i1, i1 0
+  %nop3190 = alloca i1, i1 0
+  %nop3191 = alloca i1, i1 0
+  %nop3192 = alloca i1, i1 0
+  %nop3193 = alloca i1, i1 0
+  %nop3194 = alloca i1, i1 0
+  %nop3195 = alloca i1, i1 0
+  %nop3196 = alloca i1, i1 0
+  %nop3197 = alloca i1, i1 0
+  %nop3198 = alloca i1, i1 0
+  %nop3199 = alloca i1, i1 0
+  %nop3200 = alloca i1, i1 0
+  %nop3201 = alloca i1, i1 0
+  %nop3202 = alloca i1, i1 0
+  %nop3203 = alloca i1, i1 0
+  %nop3204 = alloca i1, i1 0
+  %nop3205 = alloca i1, i1 0
+  %nop3206 = alloca i1, i1 0
+  %nop3207 = alloca i1, i1 0
+  %nop3208 = alloca i1, i1 0
+  %nop3209 = alloca i1, i1 0
+  %nop3210 = alloca i1, i1 0
+  %nop3211 = alloca i1, i1 0
+  %nop3212 = alloca i1, i1 0
+  %nop3213 = alloca i1, i1 0
+  %nop3214 = alloca i1, i1 0
+  %nop3215 = alloca i1, i1 0
+  %nop3216 = alloca i1, i1 0
+  %nop3217 = alloca i1, i1 0
+  %nop3218 = alloca i1, i1 0
+  %nop3219 = alloca i1, i1 0
+  %nop3220 = alloca i1, i1 0
+  %nop3221 = alloca i1, i1 0
+  %nop3222 = alloca i1, i1 0
+  %nop3223 = alloca i1, i1 0
+  %nop3224 = alloca i1, i1 0
+  %nop3225 = alloca i1, i1 0
+  %nop3226 = alloca i1, i1 0
+  %nop3227 = alloca i1, i1 0
+  %nop3228 = alloca i1, i1 0
+  %nop3229 = alloca i1, i1 0
+  %nop3230 = alloca i1, i1 0
+  %nop3231 = alloca i1, i1 0
+  %nop3232 = alloca i1, i1 0
+  %nop3233 = alloca i1, i1 0
+  %nop3234 = alloca i1, i1 0
+  %nop3235 = alloca i1, i1 0
+  %nop3236 = alloca i1, i1 0
+  %nop3237 = alloca i1, i1 0
+  %nop3238 = alloca i1, i1 0
+  %nop3239 = alloca i1, i1 0
+  %nop3240 = alloca i1, i1 0
+  %nop3241 = alloca i1, i1 0
+  %nop3242 = alloca i1, i1 0
+  %nop3243 = alloca i1, i1 0
+  %nop3244 = alloca i1, i1 0
+  %nop3245 = alloca i1, i1 0
+  %nop3246 = alloca i1, i1 0
+  %nop3247 = alloca i1, i1 0
+  %nop3248 = alloca i1, i1 0
+  %nop3249 = alloca i1, i1 0
+  %nop3250 = alloca i1, i1 0
+  %nop3251 = alloca i1, i1 0
+  %nop3252 = alloca i1, i1 0
+  %nop3253 = alloca i1, i1 0
+  %nop3254 = alloca i1, i1 0
+  %nop3255 = alloca i1, i1 0
+  %nop3256 = alloca i1, i1 0
+  %nop3257 = alloca i1, i1 0
+  %nop3258 = alloca i1, i1 0
+  %nop3259 = alloca i1, i1 0
+  %nop3260 = alloca i1, i1 0
+  %nop3261 = alloca i1, i1 0
+  %nop3262 = alloca i1, i1 0
+  %nop3263 = alloca i1, i1 0
+  %nop3264 = alloca i1, i1 0
+  %nop3265 = alloca i1, i1 0
+  %nop3266 = alloca i1, i1 0
+  %nop3267 = alloca i1, i1 0
+  %nop3268 = alloca i1, i1 0
+  %nop3269 = alloca i1, i1 0
+  %nop3270 = alloca i1, i1 0
+  %nop3271 = alloca i1, i1 0
+  %nop3272 = alloca i1, i1 0
+  %nop3273 = alloca i1, i1 0
+  %nop3274 = alloca i1, i1 0
+  %nop3275 = alloca i1, i1 0
+  %nop3276 = alloca i1, i1 0
+  %nop3277 = alloca i1, i1 0
+  %nop3278 = alloca i1, i1 0
+  %nop3279 = alloca i1, i1 0
+  %nop3280 = alloca i1, i1 0
+  %nop3281 = alloca i1, i1 0
+  %nop3282 = alloca i1, i1 0
+  %nop3283 = alloca i1, i1 0
+  %nop3284 = alloca i1, i1 0
+  %nop3285 = alloca i1, i1 0
+  %nop3286 = alloca i1, i1 0
+  %nop3287 = alloca i1, i1 0
+  %nop3288 = alloca i1, i1 0
+  %nop3289 = alloca i1, i1 0
+  %nop3290 = alloca i1, i1 0
+  %nop3291 = alloca i1, i1 0
+  %nop3292 = alloca i1, i1 0
+  %nop3293 = alloca i1, i1 0
+  %nop3294 = alloca i1, i1 0
+  %nop3295 = alloca i1, i1 0
+  %nop3296 = alloca i1, i1 0
+  %nop3297 = alloca i1, i1 0
+  %nop3298 = alloca i1, i1 0
+  %nop3299 = alloca i1, i1 0
+  %nop3300 = alloca i1, i1 0
+  %nop3301 = alloca i1, i1 0
+  %nop3302 = alloca i1, i1 0
+  %nop3303 = alloca i1, i1 0
+  %nop3304 = alloca i1, i1 0
+  %nop3305 = alloca i1, i1 0
+  %nop3306 = alloca i1, i1 0
+  %nop3307 = alloca i1, i1 0
+  %nop3308 = alloca i1, i1 0
+  %nop3309 = alloca i1, i1 0
+  %nop3310 = alloca i1, i1 0
+  %nop3311 = alloca i1, i1 0
+  %nop3312 = alloca i1, i1 0
+  %nop3313 = alloca i1, i1 0
+  %nop3314 = alloca i1, i1 0
+  %nop3315 = alloca i1, i1 0
+  %nop3316 = alloca i1, i1 0
+  %nop3317 = alloca i1, i1 0
+  %nop3318 = alloca i1, i1 0
+  %nop3319 = alloca i1, i1 0
+  %nop3320 = alloca i1, i1 0
+  %nop3321 = alloca i1, i1 0
+  %nop3322 = alloca i1, i1 0
+  %nop3323 = alloca i1, i1 0
+  %nop3324 = alloca i1, i1 0
+  %nop3325 = alloca i1, i1 0
+  %nop3326 = alloca i1, i1 0
+  %nop3327 = alloca i1, i1 0
+  %nop3328 = alloca i1, i1 0
+  %nop3329 = alloca i1, i1 0
+  %nop3330 = alloca i1, i1 0
+  %nop3331 = alloca i1, i1 0
+  %nop3332 = alloca i1, i1 0
+  %nop3333 = alloca i1, i1 0
+  %nop3334 = alloca i1, i1 0
+  %nop3335 = alloca i1, i1 0
+  %nop3336 = alloca i1, i1 0
+  %nop3337 = alloca i1, i1 0
+  %nop3338 = alloca i1, i1 0
+  %nop3339 = alloca i1, i1 0
+  %nop3340 = alloca i1, i1 0
+  %nop3341 = alloca i1, i1 0
+  %nop3342 = alloca i1, i1 0
+  %nop3343 = alloca i1, i1 0
+  %nop3344 = alloca i1, i1 0
+  %nop3345 = alloca i1, i1 0
+  %nop3346 = alloca i1, i1 0
+  %nop3347 = alloca i1, i1 0
+  %nop3348 = alloca i1, i1 0
+  %nop3349 = alloca i1, i1 0
+  %nop3350 = alloca i1, i1 0
+  %nop3351 = alloca i1, i1 0
+  %nop3352 = alloca i1, i1 0
+  %nop3353 = alloca i1, i1 0
+  %nop3354 = alloca i1, i1 0
+  %nop3355 = alloca i1, i1 0
+  %nop3356 = alloca i1, i1 0
+  %nop3357 = alloca i1, i1 0
+  %nop3358 = alloca i1, i1 0
+  %nop3359 = alloca i1, i1 0
+  %nop3360 = alloca i1, i1 0
+  %nop3361 = alloca i1, i1 0
+  %nop3362 = alloca i1, i1 0
+  %nop3363 = alloca i1, i1 0
+  %nop3364 = alloca i1, i1 0
+  %nop3365 = alloca i1, i1 0
+  %nop3366 = alloca i1, i1 0
+  %nop3367 = alloca i1, i1 0
+  %nop3368 = alloca i1, i1 0
+  %nop3369 = alloca i1, i1 0
+  %nop3370 = alloca i1, i1 0
+  %nop3371 = alloca i1, i1 0
+  %nop3372 = alloca i1, i1 0
+  %nop3373 = alloca i1, i1 0
+  %nop3374 = alloca i1, i1 0
+  %nop3375 = alloca i1, i1 0
+  %nop3376 = alloca i1, i1 0
+  %nop3377 = alloca i1, i1 0
+  %nop3378 = alloca i1, i1 0
+  %nop3379 = alloca i1, i1 0
+  %nop3380 = alloca i1, i1 0
+  %nop3381 = alloca i1, i1 0
+  %nop3382 = alloca i1, i1 0
+  %nop3383 = alloca i1, i1 0
+  %nop3384 = alloca i1, i1 0
+  %nop3385 = alloca i1, i1 0
+  %nop3386 = alloca i1, i1 0
+  %nop3387 = alloca i1, i1 0
+  %nop3388 = alloca i1, i1 0
+  %nop3389 = alloca i1, i1 0
+  %nop3390 = alloca i1, i1 0
+  %nop3391 = alloca i1, i1 0
+  %nop3392 = alloca i1, i1 0
+  %nop3393 = alloca i1, i1 0
+  %nop3394 = alloca i1, i1 0
+  %nop3395 = alloca i1, i1 0
+  %nop3396 = alloca i1, i1 0
+  %nop3397 = alloca i1, i1 0
+  %nop3398 = alloca i1, i1 0
+  %nop3399 = alloca i1, i1 0
+  %nop3400 = alloca i1, i1 0
+  %nop3401 = alloca i1, i1 0
+  %nop3402 = alloca i1, i1 0
+  %nop3403 = alloca i1, i1 0
+  %nop3404 = alloca i1, i1 0
+  %nop3405 = alloca i1, i1 0
+  %nop3406 = alloca i1, i1 0
+  %nop3407 = alloca i1, i1 0
+  %nop3408 = alloca i1, i1 0
+  %nop3409 = alloca i1, i1 0
+  %nop3410 = alloca i1, i1 0
+  %nop3411 = alloca i1, i1 0
+  %nop3412 = alloca i1, i1 0
+  %nop3413 = alloca i1, i1 0
+  %nop3414 = alloca i1, i1 0
+  %nop3415 = alloca i1, i1 0
+  %nop3416 = alloca i1, i1 0
+  %nop3417 = alloca i1, i1 0
+  %nop3418 = alloca i1, i1 0
+  %nop3419 = alloca i1, i1 0
+  %nop3420 = alloca i1, i1 0
+  %nop3421 = alloca i1, i1 0
+  %nop3422 = alloca i1, i1 0
+  %nop3423 = alloca i1, i1 0
+  %nop3424 = alloca i1, i1 0
+  %nop3425 = alloca i1, i1 0
+  %nop3426 = alloca i1, i1 0
+  %nop3427 = alloca i1, i1 0
+  %nop3428 = alloca i1, i1 0
+  %nop3429 = alloca i1, i1 0
+  %nop3430 = alloca i1, i1 0
+  %nop3431 = alloca i1, i1 0
+  %nop3432 = alloca i1, i1 0
+  %nop3433 = alloca i1, i1 0
+  %nop3434 = alloca i1, i1 0
+  %nop3435 = alloca i1, i1 0
+  %nop3436 = alloca i1, i1 0
+  %nop3437 = alloca i1, i1 0
+  %nop3438 = alloca i1, i1 0
+  %nop3439 = alloca i1, i1 0
+  %nop3440 = alloca i1, i1 0
+  %nop3441 = alloca i1, i1 0
+  %nop3442 = alloca i1, i1 0
+  %nop3443 = alloca i1, i1 0
+  %nop3444 = alloca i1, i1 0
+  %nop3445 = alloca i1, i1 0
+  %nop3446 = alloca i1, i1 0
+  %nop3447 = alloca i1, i1 0
+  %nop3448 = alloca i1, i1 0
+  %nop3449 = alloca i1, i1 0
+  %nop3450 = alloca i1, i1 0
+  %nop3451 = alloca i1, i1 0
+  %nop3452 = alloca i1, i1 0
+  %nop3453 = alloca i1, i1 0
+  %nop3454 = alloca i1, i1 0
+  %nop3455 = alloca i1, i1 0
+  %nop3456 = alloca i1, i1 0
+  %nop3457 = alloca i1, i1 0
+  %nop3458 = alloca i1, i1 0
+  %nop3459 = alloca i1, i1 0
+  %nop3460 = alloca i1, i1 0
+  %nop3461 = alloca i1, i1 0
+  %nop3462 = alloca i1, i1 0
+  %nop3463 = alloca i1, i1 0
+  %nop3464 = alloca i1, i1 0
+  %nop3465 = alloca i1, i1 0
+  %nop3466 = alloca i1, i1 0
+  %nop3467 = alloca i1, i1 0
+  %nop3468 = alloca i1, i1 0
+  %nop3469 = alloca i1, i1 0
+  %nop3470 = alloca i1, i1 0
+  %nop3471 = alloca i1, i1 0
+  %nop3472 = alloca i1, i1 0
+  %nop3473 = alloca i1, i1 0
+  %nop3474 = alloca i1, i1 0
+  %nop3475 = alloca i1, i1 0
+  %nop3476 = alloca i1, i1 0
+  %nop3477 = alloca i1, i1 0
+  %nop3478 = alloca i1, i1 0
+  %nop3479 = alloca i1, i1 0
+  %nop3480 = alloca i1, i1 0
+  %nop3481 = alloca i1, i1 0
+  %nop3482 = alloca i1, i1 0
+  %nop3483 = alloca i1, i1 0
+  %nop3484 = alloca i1, i1 0
+  %nop3485 = alloca i1, i1 0
+  %nop3486 = alloca i1, i1 0
+  %nop3487 = alloca i1, i1 0
+  %nop3488 = alloca i1, i1 0
+  %nop3489 = alloca i1, i1 0
+  %nop3490 = alloca i1, i1 0
+  %nop3491 = alloca i1, i1 0
+  %nop3492 = alloca i1, i1 0
+  %nop3493 = alloca i1, i1 0
+  %nop3494 = alloca i1, i1 0
+  %nop3495 = alloca i1, i1 0
+  %nop3496 = alloca i1, i1 0
+  %nop3497 = alloca i1, i1 0
+  %nop3498 = alloca i1, i1 0
+  %nop3499 = alloca i1, i1 0
+  %nop3500 = alloca i1, i1 0
+  %nop3501 = alloca i1, i1 0
+  %nop3502 = alloca i1, i1 0
+  %nop3503 = alloca i1, i1 0
+  %nop3504 = alloca i1, i1 0
+  %nop3505 = alloca i1, i1 0
+  %nop3506 = alloca i1, i1 0
+  %nop3507 = alloca i1, i1 0
+  %nop3508 = alloca i1, i1 0
+  %nop3509 = alloca i1, i1 0
+  %nop3510 = alloca i1, i1 0
+  %nop3511 = alloca i1, i1 0
+  %nop3512 = alloca i1, i1 0
+  %nop3513 = alloca i1, i1 0
+  %nop3514 = alloca i1, i1 0
+  %nop3515 = alloca i1, i1 0
+  %nop3516 = alloca i1, i1 0
+  %nop3517 = alloca i1, i1 0
+  %nop3518 = alloca i1, i1 0
+  %nop3519 = alloca i1, i1 0
+  %nop3520 = alloca i1, i1 0
+  %nop3521 = alloca i1, i1 0
+  %nop3522 = alloca i1, i1 0
+  %nop3523 = alloca i1, i1 0
+  %nop3524 = alloca i1, i1 0
+  %nop3525 = alloca i1, i1 0
+  %nop3526 = alloca i1, i1 0
+  %nop3527 = alloca i1, i1 0
+  %nop3528 = alloca i1, i1 0
+  %nop3529 = alloca i1, i1 0
+  %nop3530 = alloca i1, i1 0
+  %nop3531 = alloca i1, i1 0
+  %nop3532 = alloca i1, i1 0
+  %nop3533 = alloca i1, i1 0
+  %nop3534 = alloca i1, i1 0
+  %nop3535 = alloca i1, i1 0
+  %nop3536 = alloca i1, i1 0
+  %nop3537 = alloca i1, i1 0
+  %nop3538 = alloca i1, i1 0
+  %nop3539 = alloca i1, i1 0
+  %nop3540 = alloca i1, i1 0
+  %nop3541 = alloca i1, i1 0
+  %nop3542 = alloca i1, i1 0
+  %nop3543 = alloca i1, i1 0
+  %nop3544 = alloca i1, i1 0
+  %nop3545 = alloca i1, i1 0
+  %nop3546 = alloca i1, i1 0
+  %nop3547 = alloca i1, i1 0
+  %nop3548 = alloca i1, i1 0
+  %nop3549 = alloca i1, i1 0
+  %nop3550 = alloca i1, i1 0
+  %nop3551 = alloca i1, i1 0
+  %nop3552 = alloca i1, i1 0
+  %nop3553 = alloca i1, i1 0
+  %nop3554 = alloca i1, i1 0
+  %nop3555 = alloca i1, i1 0
+  %nop3556 = alloca i1, i1 0
+  %nop3557 = alloca i1, i1 0
+  %nop3558 = alloca i1, i1 0
+  %nop3559 = alloca i1, i1 0
+  %nop3560 = alloca i1, i1 0
+  %nop3561 = alloca i1, i1 0
+  %nop3562 = alloca i1, i1 0
+  %nop3563 = alloca i1, i1 0
+  %nop3564 = alloca i1, i1 0
+  %nop3565 = alloca i1, i1 0
+  %nop3566 = alloca i1, i1 0
+  %nop3567 = alloca i1, i1 0
+  %nop3568 = alloca i1, i1 0
+  %nop3569 = alloca i1, i1 0
+  %nop3570 = alloca i1, i1 0
+  %nop3571 = alloca i1, i1 0
+  %nop3572 = alloca i1, i1 0
+  %nop3573 = alloca i1, i1 0
+  %nop3574 = alloca i1, i1 0
+  %nop3575 = alloca i1, i1 0
+  %nop3576 = alloca i1, i1 0
+  %nop3577 = alloca i1, i1 0
+  %nop3578 = alloca i1, i1 0
+  %nop3579 = alloca i1, i1 0
+  %nop3580 = alloca i1, i1 0
+  %nop3581 = alloca i1, i1 0
+  %nop3582 = alloca i1, i1 0
+  %nop3583 = alloca i1, i1 0
+  %nop3584 = alloca i1, i1 0
+  %nop3585 = alloca i1, i1 0
+  %nop3586 = alloca i1, i1 0
+  %nop3587 = alloca i1, i1 0
+  %nop3588 = alloca i1, i1 0
+  %nop3589 = alloca i1, i1 0
+  %nop3590 = alloca i1, i1 0
+  %nop3591 = alloca i1, i1 0
+  %nop3592 = alloca i1, i1 0
+  %nop3593 = alloca i1, i1 0
+  %nop3594 = alloca i1, i1 0
+  %nop3595 = alloca i1, i1 0
+  %nop3596 = alloca i1, i1 0
+  %nop3597 = alloca i1, i1 0
+  %nop3598 = alloca i1, i1 0
+  %nop3599 = alloca i1, i1 0
+  %nop3600 = alloca i1, i1 0
+  %nop3601 = alloca i1, i1 0
+  %nop3602 = alloca i1, i1 0
+  %nop3603 = alloca i1, i1 0
+  %nop3604 = alloca i1, i1 0
+  %nop3605 = alloca i1, i1 0
+  %nop3606 = alloca i1, i1 0
+  %nop3607 = alloca i1, i1 0
+  %nop3608 = alloca i1, i1 0
+  %nop3609 = alloca i1, i1 0
+  %nop3610 = alloca i1, i1 0
+  %nop3611 = alloca i1, i1 0
+  %nop3612 = alloca i1, i1 0
+  %nop3613 = alloca i1, i1 0
+  %nop3614 = alloca i1, i1 0
+  %nop3615 = alloca i1, i1 0
+  %nop3616 = alloca i1, i1 0
+  %nop3617 = alloca i1, i1 0
+  %nop3618 = alloca i1, i1 0
+  %nop3619 = alloca i1, i1 0
+  %nop3620 = alloca i1, i1 0
+  %nop3621 = alloca i1, i1 0
+  %nop3622 = alloca i1, i1 0
+  %nop3623 = alloca i1, i1 0
+  %nop3624 = alloca i1, i1 0
+  %nop3625 = alloca i1, i1 0
+  %nop3626 = alloca i1, i1 0
+  %nop3627 = alloca i1, i1 0
+  %nop3628 = alloca i1, i1 0
+  %nop3629 = alloca i1, i1 0
+  %nop3630 = alloca i1, i1 0
+  %nop3631 = alloca i1, i1 0
+  %nop3632 = alloca i1, i1 0
+  %nop3633 = alloca i1, i1 0
+  %nop3634 = alloca i1, i1 0
+  %nop3635 = alloca i1, i1 0
+  %nop3636 = alloca i1, i1 0
+  %nop3637 = alloca i1, i1 0
+  %nop3638 = alloca i1, i1 0
+  %nop3639 = alloca i1, i1 0
+  %nop3640 = alloca i1, i1 0
+  %nop3641 = alloca i1, i1 0
+  %nop3642 = alloca i1, i1 0
+  %nop3643 = alloca i1, i1 0
+  %nop3644 = alloca i1, i1 0
+  %nop3645 = alloca i1, i1 0
+  %nop3646 = alloca i1, i1 0
+  %nop3647 = alloca i1, i1 0
+  %nop3648 = alloca i1, i1 0
+  %nop3649 = alloca i1, i1 0
+  %nop3650 = alloca i1, i1 0
+  %nop3651 = alloca i1, i1 0
+  %nop3652 = alloca i1, i1 0
+  %nop3653 = alloca i1, i1 0
+  %nop3654 = alloca i1, i1 0
+  %nop3655 = alloca i1, i1 0
+  %nop3656 = alloca i1, i1 0
+  %nop3657 = alloca i1, i1 0
+  %nop3658 = alloca i1, i1 0
+  %nop3659 = alloca i1, i1 0
+  %nop3660 = alloca i1, i1 0
+  %nop3661 = alloca i1, i1 0
+  %nop3662 = alloca i1, i1 0
+  %nop3663 = alloca i1, i1 0
+  %nop3664 = alloca i1, i1 0
+  %nop3665 = alloca i1, i1 0
+  %nop3666 = alloca i1, i1 0
+  %nop3667 = alloca i1, i1 0
+  %nop3668 = alloca i1, i1 0
+  %nop3669 = alloca i1, i1 0
+  %nop3670 = alloca i1, i1 0
+  %nop3671 = alloca i1, i1 0
+  %nop3672 = alloca i1, i1 0
+  %nop3673 = alloca i1, i1 0
+  %nop3674 = alloca i1, i1 0
+  %nop3675 = alloca i1, i1 0
+  %nop3676 = alloca i1, i1 0
+  %nop3677 = alloca i1, i1 0
+  %nop3678 = alloca i1, i1 0
+  %nop3679 = alloca i1, i1 0
+  %nop3680 = alloca i1, i1 0
+  %nop3681 = alloca i1, i1 0
+  %nop3682 = alloca i1, i1 0
+  %nop3683 = alloca i1, i1 0
+  %nop3684 = alloca i1, i1 0
+  %nop3685 = alloca i1, i1 0
+  %nop3686 = alloca i1, i1 0
+  %nop3687 = alloca i1, i1 0
+  %nop3688 = alloca i1, i1 0
+  %nop3689 = alloca i1, i1 0
+  %nop3690 = alloca i1, i1 0
+  %nop3691 = alloca i1, i1 0
+  %nop3692 = alloca i1, i1 0
+  %nop3693 = alloca i1, i1 0
+  %nop3694 = alloca i1, i1 0
+  %nop3695 = alloca i1, i1 0
+  %nop3696 = alloca i1, i1 0
+  %nop3697 = alloca i1, i1 0
+  %nop3698 = alloca i1, i1 0
+  %nop3699 = alloca i1, i1 0
+  %nop3700 = alloca i1, i1 0
+  %nop3701 = alloca i1, i1 0
+  %nop3702 = alloca i1, i1 0
+  %nop3703 = alloca i1, i1 0
+  %nop3704 = alloca i1, i1 0
+  %nop3705 = alloca i1, i1 0
+  %nop3706 = alloca i1, i1 0
+  %nop3707 = alloca i1, i1 0
+  %nop3708 = alloca i1, i1 0
+  %nop3709 = alloca i1, i1 0
+  %nop3710 = alloca i1, i1 0
+  %nop3711 = alloca i1, i1 0
+  %nop3712 = alloca i1, i1 0
+  %nop3713 = alloca i1, i1 0
+  %nop3714 = alloca i1, i1 0
+  %nop3715 = alloca i1, i1 0
+  %nop3716 = alloca i1, i1 0
+  %nop3717 = alloca i1, i1 0
+  %nop3718 = alloca i1, i1 0
+  %nop3719 = alloca i1, i1 0
+  %nop3720 = alloca i1, i1 0
+  %nop3721 = alloca i1, i1 0
+  %nop3722 = alloca i1, i1 0
+  %nop3723 = alloca i1, i1 0
+  %nop3724 = alloca i1, i1 0
+  %nop3725 = alloca i1, i1 0
+  %nop3726 = alloca i1, i1 0
+  %nop3727 = alloca i1, i1 0
+  %nop3728 = alloca i1, i1 0
+  %nop3729 = alloca i1, i1 0
+  %nop3730 = alloca i1, i1 0
+  %nop3731 = alloca i1, i1 0
+  %nop3732 = alloca i1, i1 0
+  %nop3733 = alloca i1, i1 0
+  %nop3734 = alloca i1, i1 0
+  %nop3735 = alloca i1, i1 0
+  %nop3736 = alloca i1, i1 0
+  %nop3737 = alloca i1, i1 0
+  %nop3738 = alloca i1, i1 0
+  %nop3739 = alloca i1, i1 0
+  %nop3740 = alloca i1, i1 0
+  %nop3741 = alloca i1, i1 0
+  %nop3742 = alloca i1, i1 0
+  %nop3743 = alloca i1, i1 0
+  %nop3744 = alloca i1, i1 0
+  %nop3745 = alloca i1, i1 0
+  %nop3746 = alloca i1, i1 0
+  %nop3747 = alloca i1, i1 0
+  %nop3748 = alloca i1, i1 0
+  %nop3749 = alloca i1, i1 0
+  %nop3750 = alloca i1, i1 0
+  %nop3751 = alloca i1, i1 0
+  %nop3752 = alloca i1, i1 0
+  %nop3753 = alloca i1, i1 0
+  %nop3754 = alloca i1, i1 0
+  %nop3755 = alloca i1, i1 0
+  %nop3756 = alloca i1, i1 0
+  %nop3757 = alloca i1, i1 0
+  %nop3758 = alloca i1, i1 0
+  %nop3759 = alloca i1, i1 0
+  %nop3760 = alloca i1, i1 0
+  %nop3761 = alloca i1, i1 0
+  %nop3762 = alloca i1, i1 0
+  %nop3763 = alloca i1, i1 0
+  %nop3764 = alloca i1, i1 0
+  %nop3765 = alloca i1, i1 0
+  %nop3766 = alloca i1, i1 0
+  %nop3767 = alloca i1, i1 0
+  %nop3768 = alloca i1, i1 0
+  %nop3769 = alloca i1, i1 0
+  %nop3770 = alloca i1, i1 0
+  %nop3771 = alloca i1, i1 0
+  %nop3772 = alloca i1, i1 0
+  %nop3773 = alloca i1, i1 0
+  %nop3774 = alloca i1, i1 0
+  %nop3775 = alloca i1, i1 0
+  %nop3776 = alloca i1, i1 0
+  %nop3777 = alloca i1, i1 0
+  %nop3778 = alloca i1, i1 0
+  %nop3779 = alloca i1, i1 0
+  %nop3780 = alloca i1, i1 0
+  %nop3781 = alloca i1, i1 0
+  %nop3782 = alloca i1, i1 0
+  %nop3783 = alloca i1, i1 0
+  %nop3784 = alloca i1, i1 0
+  %nop3785 = alloca i1, i1 0
+  %nop3786 = alloca i1, i1 0
+  %nop3787 = alloca i1, i1 0
+  %nop3788 = alloca i1, i1 0
+  %nop3789 = alloca i1, i1 0
+  %nop3790 = alloca i1, i1 0
+  %nop3791 = alloca i1, i1 0
+  %nop3792 = alloca i1, i1 0
+  %nop3793 = alloca i1, i1 0
+  %nop3794 = alloca i1, i1 0
+  %nop3795 = alloca i1, i1 0
+  %nop3796 = alloca i1, i1 0
+  %nop3797 = alloca i1, i1 0
+  %nop3798 = alloca i1, i1 0
+  %nop3799 = alloca i1, i1 0
+  %nop3800 = alloca i1, i1 0
+  %nop3801 = alloca i1, i1 0
+  %nop3802 = alloca i1, i1 0
+  %nop3803 = alloca i1, i1 0
+  %nop3804 = alloca i1, i1 0
+  %nop3805 = alloca i1, i1 0
+  %nop3806 = alloca i1, i1 0
+  %nop3807 = alloca i1, i1 0
+  %nop3808 = alloca i1, i1 0
+  %nop3809 = alloca i1, i1 0
+  %nop3810 = alloca i1, i1 0
+  %nop3811 = alloca i1, i1 0
+  %nop3812 = alloca i1, i1 0
+  %nop3813 = alloca i1, i1 0
+  %nop3814 = alloca i1, i1 0
+  %nop3815 = alloca i1, i1 0
+  %nop3816 = alloca i1, i1 0
+  %nop3817 = alloca i1, i1 0
+  %nop3818 = alloca i1, i1 0
+  %nop3819 = alloca i1, i1 0
+  %nop3820 = alloca i1, i1 0
+  %nop3821 = alloca i1, i1 0
+  %nop3822 = alloca i1, i1 0
+  %nop3823 = alloca i1, i1 0
+  %nop3824 = alloca i1, i1 0
+  %nop3825 = alloca i1, i1 0
+  %nop3826 = alloca i1, i1 0
+  %nop3827 = alloca i1, i1 0
+  %nop3828 = alloca i1, i1 0
+  %nop3829 = alloca i1, i1 0
+  %nop3830 = alloca i1, i1 0
+  %nop3831 = alloca i1, i1 0
+  %nop3832 = alloca i1, i1 0
+  %nop3833 = alloca i1, i1 0
+  %nop3834 = alloca i1, i1 0
+  %nop3835 = alloca i1, i1 0
+  %nop3836 = alloca i1, i1 0
+  %nop3837 = alloca i1, i1 0
+  %nop3838 = alloca i1, i1 0
+  %nop3839 = alloca i1, i1 0
+  %nop3840 = alloca i1, i1 0
+  %nop3841 = alloca i1, i1 0
+  %nop3842 = alloca i1, i1 0
+  %nop3843 = alloca i1, i1 0
+  %nop3844 = alloca i1, i1 0
+  %nop3845 = alloca i1, i1 0
+  %nop3846 = alloca i1, i1 0
+  %nop3847 = alloca i1, i1 0
+  %nop3848 = alloca i1, i1 0
+  %nop3849 = alloca i1, i1 0
+  %nop3850 = alloca i1, i1 0
+  %nop3851 = alloca i1, i1 0
+  %nop3852 = alloca i1, i1 0
+  %nop3853 = alloca i1, i1 0
+  %nop3854 = alloca i1, i1 0
+  %nop3855 = alloca i1, i1 0
+  %nop3856 = alloca i1, i1 0
+  %nop3857 = alloca i1, i1 0
+  %nop3858 = alloca i1, i1 0
+  %nop3859 = alloca i1, i1 0
+  %nop3860 = alloca i1, i1 0
+  %nop3861 = alloca i1, i1 0
+  %nop3862 = alloca i1, i1 0
+  %nop3863 = alloca i1, i1 0
+  %nop3864 = alloca i1, i1 0
+  %nop3865 = alloca i1, i1 0
+  %nop3866 = alloca i1, i1 0
+  %nop3867 = alloca i1, i1 0
+  %nop3868 = alloca i1, i1 0
+  %nop3869 = alloca i1, i1 0
+  %nop3870 = alloca i1, i1 0
+  %nop3871 = alloca i1, i1 0
+  %nop3872 = alloca i1, i1 0
+  %nop3873 = alloca i1, i1 0
+  %nop3874 = alloca i1, i1 0
+  %nop3875 = alloca i1, i1 0
+  %nop3876 = alloca i1, i1 0
+  %nop3877 = alloca i1, i1 0
+  %nop3878 = alloca i1, i1 0
+  %nop3879 = alloca i1, i1 0
+  %nop3880 = alloca i1, i1 0
+  %nop3881 = alloca i1, i1 0
+  %nop3882 = alloca i1, i1 0
+  %nop3883 = alloca i1, i1 0
+  %nop3884 = alloca i1, i1 0
+  %nop3885 = alloca i1, i1 0
+  %nop3886 = alloca i1, i1 0
+  %nop3887 = alloca i1, i1 0
+  %nop3888 = alloca i1, i1 0
+  %nop3889 = alloca i1, i1 0
+  %nop3890 = alloca i1, i1 0
+  %nop3891 = alloca i1, i1 0
+  %nop3892 = alloca i1, i1 0
+  %nop3893 = alloca i1, i1 0
+  %nop3894 = alloca i1, i1 0
+  %nop3895 = alloca i1, i1 0
+  %nop3896 = alloca i1, i1 0
+  %nop3897 = alloca i1, i1 0
+  %nop3898 = alloca i1, i1 0
+  %nop3899 = alloca i1, i1 0
+  %nop3900 = alloca i1, i1 0
+  %nop3901 = alloca i1, i1 0
+  %nop3902 = alloca i1, i1 0
+  %nop3903 = alloca i1, i1 0
+  %nop3904 = alloca i1, i1 0
+  %nop3905 = alloca i1, i1 0
+  %nop3906 = alloca i1, i1 0
+  %nop3907 = alloca i1, i1 0
+  %nop3908 = alloca i1, i1 0
+  %nop3909 = alloca i1, i1 0
+  %nop3910 = alloca i1, i1 0
+  %nop3911 = alloca i1, i1 0
+  %nop3912 = alloca i1, i1 0
+  %nop3913 = alloca i1, i1 0
+  %nop3914 = alloca i1, i1 0
+  %nop3915 = alloca i1, i1 0
+  %nop3916 = alloca i1, i1 0
+  %nop3917 = alloca i1, i1 0
+  %nop3918 = alloca i1, i1 0
+  %nop3919 = alloca i1, i1 0
+  %nop3920 = alloca i1, i1 0
+  %nop3921 = alloca i1, i1 0
+  %nop3922 = alloca i1, i1 0
+  %nop3923 = alloca i1, i1 0
+  %nop3924 = alloca i1, i1 0
+  %nop3925 = alloca i1, i1 0
+  %nop3926 = alloca i1, i1 0
+  %nop3927 = alloca i1, i1 0
+  %nop3928 = alloca i1, i1 0
+  %nop3929 = alloca i1, i1 0
+  %nop3930 = alloca i1, i1 0
+  %nop3931 = alloca i1, i1 0
+  %nop3932 = alloca i1, i1 0
+  %nop3933 = alloca i1, i1 0
+  %nop3934 = alloca i1, i1 0
+  %nop3935 = alloca i1, i1 0
+  %nop3936 = alloca i1, i1 0
+  %nop3937 = alloca i1, i1 0
+  %nop3938 = alloca i1, i1 0
+  %nop3939 = alloca i1, i1 0
+  %nop3940 = alloca i1, i1 0
+  %nop3941 = alloca i1, i1 0
+  %nop3942 = alloca i1, i1 0
+  %nop3943 = alloca i1, i1 0
+  %nop3944 = alloca i1, i1 0
+  %nop3945 = alloca i1, i1 0
+  %nop3946 = alloca i1, i1 0
+  %nop3947 = alloca i1, i1 0
+  %nop3948 = alloca i1, i1 0
+  %nop3949 = alloca i1, i1 0
+  %nop3950 = alloca i1, i1 0
+  %nop3951 = alloca i1, i1 0
+  %nop3952 = alloca i1, i1 0
+  %nop3953 = alloca i1, i1 0
+  %nop3954 = alloca i1, i1 0
+  %nop3955 = alloca i1, i1 0
+  %nop3956 = alloca i1, i1 0
+  %nop3957 = alloca i1, i1 0
+  %nop3958 = alloca i1, i1 0
+  %nop3959 = alloca i1, i1 0
+  %nop3960 = alloca i1, i1 0
+  %nop3961 = alloca i1, i1 0
+  %nop3962 = alloca i1, i1 0
+  %nop3963 = alloca i1, i1 0
+  %nop3964 = alloca i1, i1 0
+  %nop3965 = alloca i1, i1 0
+  %nop3966 = alloca i1, i1 0
+  %nop3967 = alloca i1, i1 0
+  %nop3968 = alloca i1, i1 0
+  %nop3969 = alloca i1, i1 0
+  %nop3970 = alloca i1, i1 0
+  %nop3971 = alloca i1, i1 0
+  %nop3972 = alloca i1, i1 0
+  %nop3973 = alloca i1, i1 0
+  %nop3974 = alloca i1, i1 0
+  %nop3975 = alloca i1, i1 0
+  %nop3976 = alloca i1, i1 0
+  %nop3977 = alloca i1, i1 0
+  %nop3978 = alloca i1, i1 0
+  %nop3979 = alloca i1, i1 0
+  %nop3980 = alloca i1, i1 0
+  %nop3981 = alloca i1, i1 0
+  %nop3982 = alloca i1, i1 0
+  %nop3983 = alloca i1, i1 0
+  %nop3984 = alloca i1, i1 0
+  %nop3985 = alloca i1, i1 0
+  %nop3986 = alloca i1, i1 0
+  %nop3987 = alloca i1, i1 0
+  %nop3988 = alloca i1, i1 0
+  %nop3989 = alloca i1, i1 0
+  %nop3990 = alloca i1, i1 0
+  %nop3991 = alloca i1, i1 0
+  %nop3992 = alloca i1, i1 0
+  %nop3993 = alloca i1, i1 0
+  %nop3994 = alloca i1, i1 0
+  %nop3995 = alloca i1, i1 0
+  %nop3996 = alloca i1, i1 0
+  %nop3997 = alloca i1, i1 0
+  %nop3998 = alloca i1, i1 0
+  %nop3999 = alloca i1, i1 0
+  %nop4000 = alloca i1, i1 0
+  %nop4001 = alloca i1, i1 0
+  %nop4002 = alloca i1, i1 0
+  %nop4003 = alloca i1, i1 0
+  %nop4004 = alloca i1, i1 0
+  %nop4005 = alloca i1, i1 0
+  %nop4006 = alloca i1, i1 0
+  %nop4007 = alloca i1, i1 0
+  %nop4008 = alloca i1, i1 0
+  %nop4009 = alloca i1, i1 0
+  %nop4010 = alloca i1, i1 0
+  %nop4011 = alloca i1, i1 0
+  %nop4012 = alloca i1, i1 0
+  %nop4013 = alloca i1, i1 0
+  %nop4014 = alloca i1, i1 0
+  %nop4015 = alloca i1, i1 0
+  %nop4016 = alloca i1, i1 0
+  %nop4017 = alloca i1, i1 0
+  %nop4018 = alloca i1, i1 0
+  %nop4019 = alloca i1, i1 0
+  %nop4020 = alloca i1, i1 0
+  %nop4021 = alloca i1, i1 0
+  %nop4022 = alloca i1, i1 0
+  %nop4023 = alloca i1, i1 0
+  %nop4024 = alloca i1, i1 0
+  %nop4025 = alloca i1, i1 0
+  %nop4026 = alloca i1, i1 0
+  %nop4027 = alloca i1, i1 0
+  %nop4028 = alloca i1, i1 0
+  %nop4029 = alloca i1, i1 0
+  %nop4030 = alloca i1, i1 0
+  %nop4031 = alloca i1, i1 0
+  %nop4032 = alloca i1, i1 0
+  %nop4033 = alloca i1, i1 0
+  %nop4034 = alloca i1, i1 0
+  %nop4035 = alloca i1, i1 0
+  %nop4036 = alloca i1, i1 0
+  %nop4037 = alloca i1, i1 0
+  %nop4038 = alloca i1, i1 0
+  %nop4039 = alloca i1, i1 0
+  %nop4040 = alloca i1, i1 0
+  %nop4041 = alloca i1, i1 0
+  %nop4042 = alloca i1, i1 0
+  %nop4043 = alloca i1, i1 0
+  %nop4044 = alloca i1, i1 0
+  %nop4045 = alloca i1, i1 0
+  %nop4046 = alloca i1, i1 0
+  %nop4047 = alloca i1, i1 0
+  %nop4048 = alloca i1, i1 0
+  %nop4049 = alloca i1, i1 0
+  %nop4050 = alloca i1, i1 0
+  %nop4051 = alloca i1, i1 0
+  %nop4052 = alloca i1, i1 0
+  %nop4053 = alloca i1, i1 0
+  %nop4054 = alloca i1, i1 0
+  %nop4055 = alloca i1, i1 0
+  %nop4056 = alloca i1, i1 0
+  %nop4057 = alloca i1, i1 0
+  %nop4058 = alloca i1, i1 0
+  %nop4059 = alloca i1, i1 0
+  %nop4060 = alloca i1, i1 0
+  %nop4061 = alloca i1, i1 0
+  %nop4062 = alloca i1, i1 0
+  %nop4063 = alloca i1, i1 0
+  %nop4064 = alloca i1, i1 0
+  %nop4065 = alloca i1, i1 0
+  %nop4066 = alloca i1, i1 0
+  %nop4067 = alloca i1, i1 0
+  %nop4068 = alloca i1, i1 0
+  %nop4069 = alloca i1, i1 0
+  %nop4070 = alloca i1, i1 0
+  %nop4071 = alloca i1, i1 0
+  %nop4072 = alloca i1, i1 0
+  %nop4073 = alloca i1, i1 0
+  %nop4074 = alloca i1, i1 0
+  %nop4075 = alloca i1, i1 0
+  %nop4076 = alloca i1, i1 0
+  %nop4077 = alloca i1, i1 0
+  %nop4078 = alloca i1, i1 0
+  %nop4079 = alloca i1, i1 0
+  %nop4080 = alloca i1, i1 0
+  %nop4081 = alloca i1, i1 0
+  %nop4082 = alloca i1, i1 0
+  %nop4083 = alloca i1, i1 0
+  %nop4084 = alloca i1, i1 0
+  %nop4085 = alloca i1, i1 0
+  %nop4086 = alloca i1, i1 0
+  %nop4087 = alloca i1, i1 0
+  %nop4088 = alloca i1, i1 0
+  %nop4089 = alloca i1, i1 0
+  %nop4090 = alloca i1, i1 0
+  %nop4091 = alloca i1, i1 0
+  %nop4092 = alloca i1, i1 0
+  %nop4093 = alloca i1, i1 0
+  %nop4094 = alloca i1, i1 0
+  %nop4095 = alloca i1, i1 0
+  %nop4096 = alloca i1, i1 0
+  %nop4097 = alloca i1, i1 0
+  %nop4098 = alloca i1, i1 0
+  %nop4099 = alloca i1, i1 0
+  %nop4100 = alloca i1, i1 0
+  %nop4101 = alloca i1, i1 0
+  %nop4102 = alloca i1, i1 0
+  %nop4103 = alloca i1, i1 0
+  %nop4104 = alloca i1, i1 0
+  %nop4105 = alloca i1, i1 0
+  %nop4106 = alloca i1, i1 0
+  %nop4107 = alloca i1, i1 0
+  %nop4108 = alloca i1, i1 0
+  %nop4109 = alloca i1, i1 0
+  %nop4110 = alloca i1, i1 0
+  %nop4111 = alloca i1, i1 0
+  %nop4112 = alloca i1, i1 0
+  %nop4113 = alloca i1, i1 0
+  %nop4114 = alloca i1, i1 0
+  %nop4115 = alloca i1, i1 0
+  %nop4116 = alloca i1, i1 0
+  %nop4117 = alloca i1, i1 0
+  %nop4118 = alloca i1, i1 0
+  %nop4119 = alloca i1, i1 0
+  %nop4120 = alloca i1, i1 0
+  %nop4121 = alloca i1, i1 0
+  %nop4122 = alloca i1, i1 0
+  %nop4123 = alloca i1, i1 0
+  %nop4124 = alloca i1, i1 0
+  %nop4125 = alloca i1, i1 0
+  %nop4126 = alloca i1, i1 0
+  %nop4127 = alloca i1, i1 0
+  %nop4128 = alloca i1, i1 0
+  %nop4129 = alloca i1, i1 0
+  %nop4130 = alloca i1, i1 0
+  %nop4131 = alloca i1, i1 0
+  %nop4132 = alloca i1, i1 0
+  %nop4133 = alloca i1, i1 0
+  %nop4134 = alloca i1, i1 0
+  %nop4135 = alloca i1, i1 0
+  %nop4136 = alloca i1, i1 0
+  %nop4137 = alloca i1, i1 0
+  %nop4138 = alloca i1, i1 0
+  %nop4139 = alloca i1, i1 0
+  %nop4140 = alloca i1, i1 0
+  %nop4141 = alloca i1, i1 0
+  %nop4142 = alloca i1, i1 0
+  %nop4143 = alloca i1, i1 0
+  %nop4144 = alloca i1, i1 0
+  %nop4145 = alloca i1, i1 0
+  %nop4146 = alloca i1, i1 0
+  %nop4147 = alloca i1, i1 0
+  %nop4148 = alloca i1, i1 0
+  %nop4149 = alloca i1, i1 0
+  %nop4150 = alloca i1, i1 0
+  %nop4151 = alloca i1, i1 0
+  %nop4152 = alloca i1, i1 0
+  %nop4153 = alloca i1, i1 0
+  %nop4154 = alloca i1, i1 0
+  %nop4155 = alloca i1, i1 0
+  %nop4156 = alloca i1, i1 0
+  %nop4157 = alloca i1, i1 0
+  %nop4158 = alloca i1, i1 0
+  %nop4159 = alloca i1, i1 0
+  %nop4160 = alloca i1, i1 0
+  %nop4161 = alloca i1, i1 0
+  %nop4162 = alloca i1, i1 0
+  %nop4163 = alloca i1, i1 0
+  %nop4164 = alloca i1, i1 0
+  %nop4165 = alloca i1, i1 0
+  %nop4166 = alloca i1, i1 0
+  %nop4167 = alloca i1, i1 0
+  %nop4168 = alloca i1, i1 0
+  %nop4169 = alloca i1, i1 0
+  %nop4170 = alloca i1, i1 0
+  %nop4171 = alloca i1, i1 0
+  %nop4172 = alloca i1, i1 0
+  %nop4173 = alloca i1, i1 0
+  %nop4174 = alloca i1, i1 0
+  %nop4175 = alloca i1, i1 0
+  %nop4176 = alloca i1, i1 0
+  %nop4177 = alloca i1, i1 0
+  %nop4178 = alloca i1, i1 0
+  %nop4179 = alloca i1, i1 0
+  %nop4180 = alloca i1, i1 0
+  %nop4181 = alloca i1, i1 0
+  %nop4182 = alloca i1, i1 0
+  %nop4183 = alloca i1, i1 0
+  %nop4184 = alloca i1, i1 0
+  %nop4185 = alloca i1, i1 0
+  %nop4186 = alloca i1, i1 0
+  %nop4187 = alloca i1, i1 0
+  %nop4188 = alloca i1, i1 0
+  %nop4189 = alloca i1, i1 0
+  %nop4190 = alloca i1, i1 0
+  %nop4191 = alloca i1, i1 0
+  %nop4192 = alloca i1, i1 0
+  %nop4193 = alloca i1, i1 0
+  %nop4194 = alloca i1, i1 0
+  %nop4195 = alloca i1, i1 0
+  %nop4196 = alloca i1, i1 0
+  %nop4197 = alloca i1, i1 0
+  %nop4198 = alloca i1, i1 0
+  %nop4199 = alloca i1, i1 0
+  %nop4200 = alloca i1, i1 0
+  %nop4201 = alloca i1, i1 0
+  %nop4202 = alloca i1, i1 0
+  %nop4203 = alloca i1, i1 0
+  %nop4204 = alloca i1, i1 0
+  %nop4205 = alloca i1, i1 0
+  %nop4206 = alloca i1, i1 0
+  %nop4207 = alloca i1, i1 0
+  %nop4208 = alloca i1, i1 0
+  %nop4209 = alloca i1, i1 0
+  %nop4210 = alloca i1, i1 0
+  %nop4211 = alloca i1, i1 0
+  %nop4212 = alloca i1, i1 0
+  %nop4213 = alloca i1, i1 0
+  %nop4214 = alloca i1, i1 0
+  %nop4215 = alloca i1, i1 0
+  %nop4216 = alloca i1, i1 0
+  %nop4217 = alloca i1, i1 0
+  %nop4218 = alloca i1, i1 0
+  %nop4219 = alloca i1, i1 0
+  %nop4220 = alloca i1, i1 0
+  %nop4221 = alloca i1, i1 0
+  %nop4222 = alloca i1, i1 0
+  %nop4223 = alloca i1, i1 0
+  %nop4224 = alloca i1, i1 0
+  %nop4225 = alloca i1, i1 0
+  %nop4226 = alloca i1, i1 0
+  %nop4227 = alloca i1, i1 0
+  %nop4228 = alloca i1, i1 0
+  %nop4229 = alloca i1, i1 0
+  %nop4230 = alloca i1, i1 0
+  %nop4231 = alloca i1, i1 0
+  %nop4232 = alloca i1, i1 0
+  %nop4233 = alloca i1, i1 0
+  %nop4234 = alloca i1, i1 0
+  %nop4235 = alloca i1, i1 0
+  %nop4236 = alloca i1, i1 0
+  %nop4237 = alloca i1, i1 0
+  %nop4238 = alloca i1, i1 0
+  %nop4239 = alloca i1, i1 0
+  %nop4240 = alloca i1, i1 0
+  %nop4241 = alloca i1, i1 0
+  %nop4242 = alloca i1, i1 0
+  %nop4243 = alloca i1, i1 0
+  %nop4244 = alloca i1, i1 0
+  %nop4245 = alloca i1, i1 0
+  %nop4246 = alloca i1, i1 0
+  %nop4247 = alloca i1, i1 0
+  %nop4248 = alloca i1, i1 0
+  %nop4249 = alloca i1, i1 0
+  %nop4250 = alloca i1, i1 0
+  %nop4251 = alloca i1, i1 0
+  %nop4252 = alloca i1, i1 0
+  %nop4253 = alloca i1, i1 0
+  %nop4254 = alloca i1, i1 0
+  %nop4255 = alloca i1, i1 0
+  %nop4256 = alloca i1, i1 0
+  %nop4257 = alloca i1, i1 0
+  %nop4258 = alloca i1, i1 0
+  %nop4259 = alloca i1, i1 0
+  %nop4260 = alloca i1, i1 0
+  %nop4261 = alloca i1, i1 0
+  %nop4262 = alloca i1, i1 0
+  %nop4263 = alloca i1, i1 0
+  %nop4264 = alloca i1, i1 0
+  %nop4265 = alloca i1, i1 0
+  %nop4266 = alloca i1, i1 0
+  %nop4267 = alloca i1, i1 0
+  %nop4268 = alloca i1, i1 0
+  %nop4269 = alloca i1, i1 0
+  %nop4270 = alloca i1, i1 0
+  %nop4271 = alloca i1, i1 0
+  %nop4272 = alloca i1, i1 0
+  %nop4273 = alloca i1, i1 0
+  %nop4274 = alloca i1, i1 0
+  %nop4275 = alloca i1, i1 0
+  %nop4276 = alloca i1, i1 0
+  %nop4277 = alloca i1, i1 0
+  %nop4278 = alloca i1, i1 0
+  %nop4279 = alloca i1, i1 0
+  %nop4280 = alloca i1, i1 0
+  %nop4281 = alloca i1, i1 0
+  %nop4282 = alloca i1, i1 0
+  %nop4283 = alloca i1, i1 0
+  %nop4284 = alloca i1, i1 0
+  %nop4285 = alloca i1, i1 0
+  %nop4286 = alloca i1, i1 0
+  %nop4287 = alloca i1, i1 0
+  %nop4288 = alloca i1, i1 0
+  %nop4289 = alloca i1, i1 0
+  %nop4290 = alloca i1, i1 0
+  %nop4291 = alloca i1, i1 0
+  %nop4292 = alloca i1, i1 0
+  %nop4293 = alloca i1, i1 0
+  %nop4294 = alloca i1, i1 0
+  %nop4295 = alloca i1, i1 0
+  %nop4296 = alloca i1, i1 0
+  %nop4297 = alloca i1, i1 0
+  %nop4298 = alloca i1, i1 0
+  %nop4299 = alloca i1, i1 0
+  %nop4300 = alloca i1, i1 0
+  %nop4301 = alloca i1, i1 0
+  %nop4302 = alloca i1, i1 0
+  %nop4303 = alloca i1, i1 0
+  %nop4304 = alloca i1, i1 0
+  %nop4305 = alloca i1, i1 0
+  %nop4306 = alloca i1, i1 0
+  %nop4307 = alloca i1, i1 0
+  %nop4308 = alloca i1, i1 0
+  %nop4309 = alloca i1, i1 0
+  %nop4310 = alloca i1, i1 0
+  %nop4311 = alloca i1, i1 0
+  %nop4312 = alloca i1, i1 0
+  %nop4313 = alloca i1, i1 0
+  %nop4314 = alloca i1, i1 0
+  %nop4315 = alloca i1, i1 0
+  %nop4316 = alloca i1, i1 0
+  %nop4317 = alloca i1, i1 0
+  %nop4318 = alloca i1, i1 0
+  %nop4319 = alloca i1, i1 0
+  %nop4320 = alloca i1, i1 0
+  %nop4321 = alloca i1, i1 0
+  %nop4322 = alloca i1, i1 0
+  %nop4323 = alloca i1, i1 0
+  %nop4324 = alloca i1, i1 0
+  %nop4325 = alloca i1, i1 0
+  %nop4326 = alloca i1, i1 0
+  %nop4327 = alloca i1, i1 0
+  %nop4328 = alloca i1, i1 0
+  %nop4329 = alloca i1, i1 0
+  %nop4330 = alloca i1, i1 0
+  %nop4331 = alloca i1, i1 0
+  %nop4332 = alloca i1, i1 0
+  %nop4333 = alloca i1, i1 0
+  %nop4334 = alloca i1, i1 0
+  %nop4335 = alloca i1, i1 0
+  %nop4336 = alloca i1, i1 0
+  %nop4337 = alloca i1, i1 0
+  %nop4338 = alloca i1, i1 0
+  %nop4339 = alloca i1, i1 0
+  %nop4340 = alloca i1, i1 0
+  %nop4341 = alloca i1, i1 0
+  %nop4342 = alloca i1, i1 0
+  %nop4343 = alloca i1, i1 0
+  %nop4344 = alloca i1, i1 0
+  %nop4345 = alloca i1, i1 0
+  %nop4346 = alloca i1, i1 0
+  %nop4347 = alloca i1, i1 0
+  %nop4348 = alloca i1, i1 0
+  %nop4349 = alloca i1, i1 0
+  %nop4350 = alloca i1, i1 0
+  %nop4351 = alloca i1, i1 0
+  %nop4352 = alloca i1, i1 0
+  %nop4353 = alloca i1, i1 0
+  %nop4354 = alloca i1, i1 0
+  %nop4355 = alloca i1, i1 0
+  %nop4356 = alloca i1, i1 0
+  %nop4357 = alloca i1, i1 0
+  %nop4358 = alloca i1, i1 0
+  %nop4359 = alloca i1, i1 0
+  %nop4360 = alloca i1, i1 0
+  %nop4361 = alloca i1, i1 0
+  %nop4362 = alloca i1, i1 0
+  %nop4363 = alloca i1, i1 0
+  %nop4364 = alloca i1, i1 0
+  %nop4365 = alloca i1, i1 0
+  %nop4366 = alloca i1, i1 0
+  %nop4367 = alloca i1, i1 0
+  %nop4368 = alloca i1, i1 0
+  %nop4369 = alloca i1, i1 0
+  %nop4370 = alloca i1, i1 0
+  %nop4371 = alloca i1, i1 0
+  %nop4372 = alloca i1, i1 0
+  %nop4373 = alloca i1, i1 0
+  %nop4374 = alloca i1, i1 0
+  %nop4375 = alloca i1, i1 0
+  %nop4376 = alloca i1, i1 0
+  %nop4377 = alloca i1, i1 0
+  %nop4378 = alloca i1, i1 0
+  %nop4379 = alloca i1, i1 0
+  %nop4380 = alloca i1, i1 0
+  %nop4381 = alloca i1, i1 0
+  %nop4382 = alloca i1, i1 0
+  %nop4383 = alloca i1, i1 0
+  %nop4384 = alloca i1, i1 0
+  %nop4385 = alloca i1, i1 0
+  %nop4386 = alloca i1, i1 0
+  %nop4387 = alloca i1, i1 0
+  %nop4388 = alloca i1, i1 0
+  %nop4389 = alloca i1, i1 0
+  %nop4390 = alloca i1, i1 0
+  %nop4391 = alloca i1, i1 0
+  %nop4392 = alloca i1, i1 0
+  %nop4393 = alloca i1, i1 0
+  %nop4394 = alloca i1, i1 0
+  %nop4395 = alloca i1, i1 0
+  %nop4396 = alloca i1, i1 0
+  %nop4397 = alloca i1, i1 0
+  %nop4398 = alloca i1, i1 0
+  %nop4399 = alloca i1, i1 0
+  %nop4400 = alloca i1, i1 0
+  %nop4401 = alloca i1, i1 0
+  %nop4402 = alloca i1, i1 0
+  %nop4403 = alloca i1, i1 0
+  %nop4404 = alloca i1, i1 0
+  %nop4405 = alloca i1, i1 0
+  %nop4406 = alloca i1, i1 0
+  %nop4407 = alloca i1, i1 0
+  %nop4408 = alloca i1, i1 0
+  %nop4409 = alloca i1, i1 0
+  %nop4410 = alloca i1, i1 0
+  %nop4411 = alloca i1, i1 0
+  %nop4412 = alloca i1, i1 0
+  %nop4413 = alloca i1, i1 0
+  %nop4414 = alloca i1, i1 0
+  %nop4415 = alloca i1, i1 0
+  %nop4416 = alloca i1, i1 0
+  %nop4417 = alloca i1, i1 0
+  %nop4418 = alloca i1, i1 0
+  %nop4419 = alloca i1, i1 0
+  %nop4420 = alloca i1, i1 0
+  %nop4421 = alloca i1, i1 0
+  %nop4422 = alloca i1, i1 0
+  %nop4423 = alloca i1, i1 0
+  %nop4424 = alloca i1, i1 0
+  %nop4425 = alloca i1, i1 0
+  %nop4426 = alloca i1, i1 0
+  %nop4427 = alloca i1, i1 0
+  %nop4428 = alloca i1, i1 0
+  %nop4429 = alloca i1, i1 0
+  %nop4430 = alloca i1, i1 0
+  %nop4431 = alloca i1, i1 0
+  %nop4432 = alloca i1, i1 0
+  %nop4433 = alloca i1, i1 0
+  %nop4434 = alloca i1, i1 0
+  %nop4435 = alloca i1, i1 0
+  %nop4436 = alloca i1, i1 0
+  %nop4437 = alloca i1, i1 0
+  %nop4438 = alloca i1, i1 0
+  %nop4439 = alloca i1, i1 0
+  %nop4440 = alloca i1, i1 0
+  %nop4441 = alloca i1, i1 0
+  %nop4442 = alloca i1, i1 0
+  %nop4443 = alloca i1, i1 0
+  %nop4444 = alloca i1, i1 0
+  %nop4445 = alloca i1, i1 0
+  %nop4446 = alloca i1, i1 0
+  %nop4447 = alloca i1, i1 0
+  %nop4448 = alloca i1, i1 0
+  %nop4449 = alloca i1, i1 0
+  %nop4450 = alloca i1, i1 0
+  %nop4451 = alloca i1, i1 0
+  %nop4452 = alloca i1, i1 0
+  %nop4453 = alloca i1, i1 0
+  %nop4454 = alloca i1, i1 0
+  %nop4455 = alloca i1, i1 0
+  %nop4456 = alloca i1, i1 0
+  %nop4457 = alloca i1, i1 0
+  %nop4458 = alloca i1, i1 0
+  %nop4459 = alloca i1, i1 0
+  %nop4460 = alloca i1, i1 0
+  %nop4461 = alloca i1, i1 0
+  %nop4462 = alloca i1, i1 0
+  %nop4463 = alloca i1, i1 0
+  %nop4464 = alloca i1, i1 0
+  %nop4465 = alloca i1, i1 0
+  %nop4466 = alloca i1, i1 0
+  %nop4467 = alloca i1, i1 0
+  %nop4468 = alloca i1, i1 0
+  %nop4469 = alloca i1, i1 0
+  %nop4470 = alloca i1, i1 0
+  %nop4471 = alloca i1, i1 0
+  %nop4472 = alloca i1, i1 0
+  %nop4473 = alloca i1, i1 0
+  %nop4474 = alloca i1, i1 0
+  %nop4475 = alloca i1, i1 0
+  %nop4476 = alloca i1, i1 0
+  %nop4477 = alloca i1, i1 0
+  %nop4478 = alloca i1, i1 0
+  %nop4479 = alloca i1, i1 0
+  %nop4480 = alloca i1, i1 0
+  %nop4481 = alloca i1, i1 0
+  %nop4482 = alloca i1, i1 0
+  %nop4483 = alloca i1, i1 0
+  %nop4484 = alloca i1, i1 0
+  %nop4485 = alloca i1, i1 0
+  %nop4486 = alloca i1, i1 0
+  %nop4487 = alloca i1, i1 0
+  %nop4488 = alloca i1, i1 0
+  %nop4489 = alloca i1, i1 0
+  %nop4490 = alloca i1, i1 0
+  %nop4491 = alloca i1, i1 0
+  %nop4492 = alloca i1, i1 0
+  %nop4493 = alloca i1, i1 0
+  %nop4494 = alloca i1, i1 0
+  %nop4495 = alloca i1, i1 0
+  %nop4496 = alloca i1, i1 0
+  %nop4497 = alloca i1, i1 0
+  %nop4498 = alloca i1, i1 0
+  %nop4499 = alloca i1, i1 0
+  %nop4500 = alloca i1, i1 0
+  %nop4501 = alloca i1, i1 0
+  %nop4502 = alloca i1, i1 0
+  %nop4503 = alloca i1, i1 0
+  %nop4504 = alloca i1, i1 0
+  %nop4505 = alloca i1, i1 0
+  %nop4506 = alloca i1, i1 0
+  %nop4507 = alloca i1, i1 0
+  %nop4508 = alloca i1, i1 0
+  %nop4509 = alloca i1, i1 0
+  %nop4510 = alloca i1, i1 0
+  %nop4511 = alloca i1, i1 0
+  %nop4512 = alloca i1, i1 0
+  %nop4513 = alloca i1, i1 0
+  %nop4514 = alloca i1, i1 0
+  %nop4515 = alloca i1, i1 0
+  %nop4516 = alloca i1, i1 0
+  %nop4517 = alloca i1, i1 0
+  %nop4518 = alloca i1, i1 0
+  %nop4519 = alloca i1, i1 0
+  %nop4520 = alloca i1, i1 0
+  %nop4521 = alloca i1, i1 0
+  %nop4522 = alloca i1, i1 0
+  %nop4523 = alloca i1, i1 0
+  %nop4524 = alloca i1, i1 0
+  %nop4525 = alloca i1, i1 0
+  %nop4526 = alloca i1, i1 0
+  %nop4527 = alloca i1, i1 0
+  %nop4528 = alloca i1, i1 0
+  %nop4529 = alloca i1, i1 0
+  %nop4530 = alloca i1, i1 0
+  %nop4531 = alloca i1, i1 0
+  %nop4532 = alloca i1, i1 0
+  %nop4533 = alloca i1, i1 0
+  %nop4534 = alloca i1, i1 0
+  %nop4535 = alloca i1, i1 0
+  %nop4536 = alloca i1, i1 0
+  %nop4537 = alloca i1, i1 0
+  %nop4538 = alloca i1, i1 0
+  %nop4539 = alloca i1, i1 0
+  %nop4540 = alloca i1, i1 0
+  %nop4541 = alloca i1, i1 0
+  %nop4542 = alloca i1, i1 0
+  %nop4543 = alloca i1, i1 0
+  %nop4544 = alloca i1, i1 0
+  %nop4545 = alloca i1, i1 0
+  %nop4546 = alloca i1, i1 0
+  %nop4547 = alloca i1, i1 0
+  %nop4548 = alloca i1, i1 0
+  %nop4549 = alloca i1, i1 0
+  %nop4550 = alloca i1, i1 0
+  %nop4551 = alloca i1, i1 0
+  %nop4552 = alloca i1, i1 0
+  %nop4553 = alloca i1, i1 0
+  %nop4554 = alloca i1, i1 0
+  %nop4555 = alloca i1, i1 0
+  %nop4556 = alloca i1, i1 0
+  %nop4557 = alloca i1, i1 0
+  %nop4558 = alloca i1, i1 0
+  %nop4559 = alloca i1, i1 0
+  %nop4560 = alloca i1, i1 0
+  %nop4561 = alloca i1, i1 0
+  %nop4562 = alloca i1, i1 0
+  %nop4563 = alloca i1, i1 0
+  %nop4564 = alloca i1, i1 0
+  %nop4565 = alloca i1, i1 0
+  %nop4566 = alloca i1, i1 0
+  %nop4567 = alloca i1, i1 0
+  %nop4568 = alloca i1, i1 0
+  %nop4569 = alloca i1, i1 0
+  %nop4570 = alloca i1, i1 0
+  %nop4571 = alloca i1, i1 0
+  %nop4572 = alloca i1, i1 0
+  %nop4573 = alloca i1, i1 0
+  %nop4574 = alloca i1, i1 0
+  %nop4575 = alloca i1, i1 0
+  %nop4576 = alloca i1, i1 0
+  %nop4577 = alloca i1, i1 0
+  %nop4578 = alloca i1, i1 0
+  %nop4579 = alloca i1, i1 0
+  %nop4580 = alloca i1, i1 0
+  %nop4581 = alloca i1, i1 0
+  %nop4582 = alloca i1, i1 0
+  %nop4583 = alloca i1, i1 0
+  %nop4584 = alloca i1, i1 0
+  %nop4585 = alloca i1, i1 0
+  %nop4586 = alloca i1, i1 0
+  %nop4587 = alloca i1, i1 0
+  %nop4588 = alloca i1, i1 0
+  %nop4589 = alloca i1, i1 0
+  %nop4590 = alloca i1, i1 0
+  %nop4591 = alloca i1, i1 0
+  %nop4592 = alloca i1, i1 0
+  %nop4593 = alloca i1, i1 0
+  %nop4594 = alloca i1, i1 0
+  %nop4595 = alloca i1, i1 0
+  %nop4596 = alloca i1, i1 0
+  %nop4597 = alloca i1, i1 0
+  %nop4598 = alloca i1, i1 0
+  %nop4599 = alloca i1, i1 0
+  %nop4600 = alloca i1, i1 0
+  %nop4601 = alloca i1, i1 0
+  %nop4602 = alloca i1, i1 0
+  %nop4603 = alloca i1, i1 0
+  %nop4604 = alloca i1, i1 0
+  %nop4605 = alloca i1, i1 0
+  %nop4606 = alloca i1, i1 0
+  %nop4607 = alloca i1, i1 0
+  %nop4608 = alloca i1, i1 0
+  %nop4609 = alloca i1, i1 0
+  %nop4610 = alloca i1, i1 0
+  %nop4611 = alloca i1, i1 0
+  %nop4612 = alloca i1, i1 0
+  %nop4613 = alloca i1, i1 0
+  %nop4614 = alloca i1, i1 0
+  %nop4615 = alloca i1, i1 0
+  %nop4616 = alloca i1, i1 0
+  %nop4617 = alloca i1, i1 0
+  %nop4618 = alloca i1, i1 0
+  %nop4619 = alloca i1, i1 0
+  %nop4620 = alloca i1, i1 0
+  %nop4621 = alloca i1, i1 0
+  %nop4622 = alloca i1, i1 0
+  %nop4623 = alloca i1, i1 0
+  %nop4624 = alloca i1, i1 0
+  %nop4625 = alloca i1, i1 0
+  %nop4626 = alloca i1, i1 0
+  %nop4627 = alloca i1, i1 0
+  %nop4628 = alloca i1, i1 0
+  %nop4629 = alloca i1, i1 0
+  %nop4630 = alloca i1, i1 0
+  %nop4631 = alloca i1, i1 0
+  %nop4632 = alloca i1, i1 0
+  %nop4633 = alloca i1, i1 0
+  %nop4634 = alloca i1, i1 0
+  %nop4635 = alloca i1, i1 0
+  %nop4636 = alloca i1, i1 0
+  %nop4637 = alloca i1, i1 0
+  %nop4638 = alloca i1, i1 0
+  %nop4639 = alloca i1, i1 0
+  %nop4640 = alloca i1, i1 0
+  %nop4641 = alloca i1, i1 0
+  %nop4642 = alloca i1, i1 0
+  %nop4643 = alloca i1, i1 0
+  %nop4644 = alloca i1, i1 0
+  %nop4645 = alloca i1, i1 0
+  %nop4646 = alloca i1, i1 0
+  %nop4647 = alloca i1, i1 0
+  %nop4648 = alloca i1, i1 0
+  %nop4649 = alloca i1, i1 0
+  %nop4650 = alloca i1, i1 0
+  %nop4651 = alloca i1, i1 0
+  %nop4652 = alloca i1, i1 0
+  %nop4653 = alloca i1, i1 0
+  %nop4654 = alloca i1, i1 0
+  %nop4655 = alloca i1, i1 0
+  %nop4656 = alloca i1, i1 0
+  %nop4657 = alloca i1, i1 0
+  %nop4658 = alloca i1, i1 0
+  %nop4659 = alloca i1, i1 0
+  %nop4660 = alloca i1, i1 0
+  %nop4661 = alloca i1, i1 0
+  %nop4662 = alloca i1, i1 0
+  %nop4663 = alloca i1, i1 0
+  %nop4664 = alloca i1, i1 0
+  %nop4665 = alloca i1, i1 0
+  %nop4666 = alloca i1, i1 0
+  %nop4667 = alloca i1, i1 0
+  %nop4668 = alloca i1, i1 0
+  %nop4669 = alloca i1, i1 0
+  %nop4670 = alloca i1, i1 0
+  %nop4671 = alloca i1, i1 0
+  %nop4672 = alloca i1, i1 0
+  %nop4673 = alloca i1, i1 0
+  %nop4674 = alloca i1, i1 0
+  %nop4675 = alloca i1, i1 0
+  %nop4676 = alloca i1, i1 0
+  %nop4677 = alloca i1, i1 0
+  %nop4678 = alloca i1, i1 0
+  %nop4679 = alloca i1, i1 0
+  %nop4680 = alloca i1, i1 0
+  %nop4681 = alloca i1, i1 0
+  %nop4682 = alloca i1, i1 0
+  %nop4683 = alloca i1, i1 0
+  %nop4684 = alloca i1, i1 0
+  %nop4685 = alloca i1, i1 0
+  %nop4686 = alloca i1, i1 0
+  %nop4687 = alloca i1, i1 0
+  %nop4688 = alloca i1, i1 0
+  %nop4689 = alloca i1, i1 0
+  %nop4690 = alloca i1, i1 0
+  %nop4691 = alloca i1, i1 0
+  %nop4692 = alloca i1, i1 0
+  %nop4693 = alloca i1, i1 0
+  %nop4694 = alloca i1, i1 0
+  %nop4695 = alloca i1, i1 0
+  %nop4696 = alloca i1, i1 0
+  %nop4697 = alloca i1, i1 0
+  %nop4698 = alloca i1, i1 0
+  %nop4699 = alloca i1, i1 0
+  %nop4700 = alloca i1, i1 0
+  %nop4701 = alloca i1, i1 0
+  %nop4702 = alloca i1, i1 0
+  %nop4703 = alloca i1, i1 0
+  %nop4704 = alloca i1, i1 0
+  %nop4705 = alloca i1, i1 0
+  %nop4706 = alloca i1, i1 0
+  %nop4707 = alloca i1, i1 0
+  %nop4708 = alloca i1, i1 0
+  %nop4709 = alloca i1, i1 0
+  %nop4710 = alloca i1, i1 0
+  %nop4711 = alloca i1, i1 0
+  %nop4712 = alloca i1, i1 0
+  %nop4713 = alloca i1, i1 0
+  %nop4714 = alloca i1, i1 0
+  %nop4715 = alloca i1, i1 0
+  %nop4716 = alloca i1, i1 0
+  %nop4717 = alloca i1, i1 0
+  %nop4718 = alloca i1, i1 0
+  %nop4719 = alloca i1, i1 0
+  %nop4720 = alloca i1, i1 0
+  %nop4721 = alloca i1, i1 0
+  %nop4722 = alloca i1, i1 0
+  %nop4723 = alloca i1, i1 0
+  %nop4724 = alloca i1, i1 0
+  %nop4725 = alloca i1, i1 0
+  %nop4726 = alloca i1, i1 0
+  %nop4727 = alloca i1, i1 0
+  %nop4728 = alloca i1, i1 0
+  %nop4729 = alloca i1, i1 0
+  %nop4730 = alloca i1, i1 0
+  %nop4731 = alloca i1, i1 0
+  %nop4732 = alloca i1, i1 0
+  %nop4733 = alloca i1, i1 0
+  %nop4734 = alloca i1, i1 0
+  %nop4735 = alloca i1, i1 0
+  %nop4736 = alloca i1, i1 0
+  %nop4737 = alloca i1, i1 0
+  %nop4738 = alloca i1, i1 0
+  %nop4739 = alloca i1, i1 0
+  %nop4740 = alloca i1, i1 0
+  %nop4741 = alloca i1, i1 0
+  %nop4742 = alloca i1, i1 0
+  %nop4743 = alloca i1, i1 0
+  %nop4744 = alloca i1, i1 0
+  %nop4745 = alloca i1, i1 0
+  %nop4746 = alloca i1, i1 0
+  %nop4747 = alloca i1, i1 0
+  %nop4748 = alloca i1, i1 0
+  %nop4749 = alloca i1, i1 0
+  %nop4750 = alloca i1, i1 0
+  %nop4751 = alloca i1, i1 0
+  %nop4752 = alloca i1, i1 0
+  %nop4753 = alloca i1, i1 0
+  %nop4754 = alloca i1, i1 0
+  %nop4755 = alloca i1, i1 0
+  %nop4756 = alloca i1, i1 0
+  %nop4757 = alloca i1, i1 0
+  %nop4758 = alloca i1, i1 0
+  %nop4759 = alloca i1, i1 0
+  %nop4760 = alloca i1, i1 0
+  %nop4761 = alloca i1, i1 0
+  %nop4762 = alloca i1, i1 0
+  %nop4763 = alloca i1, i1 0
+  %nop4764 = alloca i1, i1 0
+  %nop4765 = alloca i1, i1 0
+  %nop4766 = alloca i1, i1 0
+  %nop4767 = alloca i1, i1 0
+  %nop4768 = alloca i1, i1 0
+  %nop4769 = alloca i1, i1 0
+  %nop4770 = alloca i1, i1 0
+  %nop4771 = alloca i1, i1 0
+  %nop4772 = alloca i1, i1 0
+  %nop4773 = alloca i1, i1 0
+  %nop4774 = alloca i1, i1 0
+  %nop4775 = alloca i1, i1 0
+  %nop4776 = alloca i1, i1 0
+  %nop4777 = alloca i1, i1 0
+  %nop4778 = alloca i1, i1 0
+  %nop4779 = alloca i1, i1 0
+  %nop4780 = alloca i1, i1 0
+  %nop4781 = alloca i1, i1 0
+  %nop4782 = alloca i1, i1 0
+  %nop4783 = alloca i1, i1 0
+  %nop4784 = alloca i1, i1 0
+  %nop4785 = alloca i1, i1 0
+  %nop4786 = alloca i1, i1 0
+  %nop4787 = alloca i1, i1 0
+  %nop4788 = alloca i1, i1 0
+  %nop4789 = alloca i1, i1 0
+  %nop4790 = alloca i1, i1 0
+  %nop4791 = alloca i1, i1 0
+  %nop4792 = alloca i1, i1 0
+  %nop4793 = alloca i1, i1 0
+  %nop4794 = alloca i1, i1 0
+  %nop4795 = alloca i1, i1 0
+  %nop4796 = alloca i1, i1 0
+  %nop4797 = alloca i1, i1 0
+  %nop4798 = alloca i1, i1 0
+  %nop4799 = alloca i1, i1 0
+  %nop4800 = alloca i1, i1 0
+  %nop4801 = alloca i1, i1 0
+  %nop4802 = alloca i1, i1 0
+  %nop4803 = alloca i1, i1 0
+  %nop4804 = alloca i1, i1 0
+  %nop4805 = alloca i1, i1 0
+  %nop4806 = alloca i1, i1 0
+  %nop4807 = alloca i1, i1 0
+  %nop4808 = alloca i1, i1 0
+  %nop4809 = alloca i1, i1 0
+  %nop4810 = alloca i1, i1 0
+  %nop4811 = alloca i1, i1 0
+  %nop4812 = alloca i1, i1 0
+  %nop4813 = alloca i1, i1 0
+  %nop4814 = alloca i1, i1 0
+  %nop4815 = alloca i1, i1 0
+  %nop4816 = alloca i1, i1 0
+  %nop4817 = alloca i1, i1 0
+  %nop4818 = alloca i1, i1 0
+  %nop4819 = alloca i1, i1 0
+  %nop4820 = alloca i1, i1 0
+  %nop4821 = alloca i1, i1 0
+  %nop4822 = alloca i1, i1 0
+  %nop4823 = alloca i1, i1 0
+  %nop4824 = alloca i1, i1 0
+  %nop4825 = alloca i1, i1 0
+  %nop4826 = alloca i1, i1 0
+  %nop4827 = alloca i1, i1 0
+  %nop4828 = alloca i1, i1 0
+  %nop4829 = alloca i1, i1 0
+  %nop4830 = alloca i1, i1 0
+  %nop4831 = alloca i1, i1 0
+  %nop4832 = alloca i1, i1 0
+  %nop4833 = alloca i1, i1 0
+  %nop4834 = alloca i1, i1 0
+  %nop4835 = alloca i1, i1 0
+  %nop4836 = alloca i1, i1 0
+  %nop4837 = alloca i1, i1 0
+  %nop4838 = alloca i1, i1 0
+  %nop4839 = alloca i1, i1 0
+  %nop4840 = alloca i1, i1 0
+  %nop4841 = alloca i1, i1 0
+  %nop4842 = alloca i1, i1 0
+  %nop4843 = alloca i1, i1 0
+  %nop4844 = alloca i1, i1 0
+  %nop4845 = alloca i1, i1 0
+  %nop4846 = alloca i1, i1 0
+  %nop4847 = alloca i1, i1 0
+  %nop4848 = alloca i1, i1 0
+  %nop4849 = alloca i1, i1 0
+  %nop4850 = alloca i1, i1 0
+  %nop4851 = alloca i1, i1 0
+  %nop4852 = alloca i1, i1 0
+  %nop4853 = alloca i1, i1 0
+  %nop4854 = alloca i1, i1 0
+  %nop4855 = alloca i1, i1 0
+  %nop4856 = alloca i1, i1 0
+  %nop4857 = alloca i1, i1 0
+  %nop4858 = alloca i1, i1 0
+  %nop4859 = alloca i1, i1 0
+  %nop4860 = alloca i1, i1 0
+  %nop4861 = alloca i1, i1 0
+  %nop4862 = alloca i1, i1 0
+  %nop4863 = alloca i1, i1 0
+  %nop4864 = alloca i1, i1 0
+  %nop4865 = alloca i1, i1 0
+  %nop4866 = alloca i1, i1 0
+  %nop4867 = alloca i1, i1 0
+  %nop4868 = alloca i1, i1 0
+  %nop4869 = alloca i1, i1 0
+  %nop4870 = alloca i1, i1 0
+  %nop4871 = alloca i1, i1 0
+  %nop4872 = alloca i1, i1 0
+  %nop4873 = alloca i1, i1 0
+  %nop4874 = alloca i1, i1 0
+  %nop4875 = alloca i1, i1 0
+  %nop4876 = alloca i1, i1 0
+  %nop4877 = alloca i1, i1 0
+  %nop4878 = alloca i1, i1 0
+  %nop4879 = alloca i1, i1 0
+  %nop4880 = alloca i1, i1 0
+  %nop4881 = alloca i1, i1 0
+  %nop4882 = alloca i1, i1 0
+  %nop4883 = alloca i1, i1 0
+  %nop4884 = alloca i1, i1 0
+  %nop4885 = alloca i1, i1 0
+  %nop4886 = alloca i1, i1 0
+  %nop4887 = alloca i1, i1 0
+  %nop4888 = alloca i1, i1 0
+  %nop4889 = alloca i1, i1 0
+  %nop4890 = alloca i1, i1 0
+  %nop4891 = alloca i1, i1 0
+  %nop4892 = alloca i1, i1 0
+  %nop4893 = alloca i1, i1 0
+  %nop4894 = alloca i1, i1 0
+  %nop4895 = alloca i1, i1 0
+  %nop4896 = alloca i1, i1 0
+  %nop4897 = alloca i1, i1 0
+  %nop4898 = alloca i1, i1 0
+  %nop4899 = alloca i1, i1 0
+  %nop4900 = alloca i1, i1 0
+  %nop4901 = alloca i1, i1 0
+  %nop4902 = alloca i1, i1 0
+  %nop4903 = alloca i1, i1 0
+  %nop4904 = alloca i1, i1 0
+  %nop4905 = alloca i1, i1 0
+  %nop4906 = alloca i1, i1 0
+  %nop4907 = alloca i1, i1 0
+  %nop4908 = alloca i1, i1 0
+  %nop4909 = alloca i1, i1 0
+  %nop4910 = alloca i1, i1 0
+  %nop4911 = alloca i1, i1 0
+  %nop4912 = alloca i1, i1 0
+  %nop4913 = alloca i1, i1 0
+  %nop4914 = alloca i1, i1 0
+  %nop4915 = alloca i1, i1 0
+  %nop4916 = alloca i1, i1 0
+  %nop4917 = alloca i1, i1 0
+  %nop4918 = alloca i1, i1 0
+  %nop4919 = alloca i1, i1 0
+  %nop4920 = alloca i1, i1 0
+  %nop4921 = alloca i1, i1 0
+  %nop4922 = alloca i1, i1 0
+  %nop4923 = alloca i1, i1 0
+  %nop4924 = alloca i1, i1 0
+  %nop4925 = alloca i1, i1 0
+  %nop4926 = alloca i1, i1 0
+  %nop4927 = alloca i1, i1 0
+  %nop4928 = alloca i1, i1 0
+  %nop4929 = alloca i1, i1 0
+  %nop4930 = alloca i1, i1 0
+  %nop4931 = alloca i1, i1 0
+  %nop4932 = alloca i1, i1 0
+  %nop4933 = alloca i1, i1 0
+  %nop4934 = alloca i1, i1 0
+  %nop4935 = alloca i1, i1 0
+  %nop4936 = alloca i1, i1 0
+  %nop4937 = alloca i1, i1 0
+  %nop4938 = alloca i1, i1 0
+  %nop4939 = alloca i1, i1 0
+  %nop4940 = alloca i1, i1 0
+  %nop4941 = alloca i1, i1 0
+  %nop4942 = alloca i1, i1 0
+  %nop4943 = alloca i1, i1 0
+  %nop4944 = alloca i1, i1 0
+  %nop4945 = alloca i1, i1 0
+  %nop4946 = alloca i1, i1 0
+  %nop4947 = alloca i1, i1 0
+  %nop4948 = alloca i1, i1 0
+  %nop4949 = alloca i1, i1 0
+  %nop4950 = alloca i1, i1 0
+  %nop4951 = alloca i1, i1 0
+  %nop4952 = alloca i1, i1 0
+  %nop4953 = alloca i1, i1 0
+  %nop4954 = alloca i1, i1 0
+  %nop4955 = alloca i1, i1 0
+  %nop4956 = alloca i1, i1 0
+  %nop4957 = alloca i1, i1 0
+  %nop4958 = alloca i1, i1 0
+  %nop4959 = alloca i1, i1 0
+  %nop4960 = alloca i1, i1 0
+  %nop4961 = alloca i1, i1 0
+  %nop4962 = alloca i1, i1 0
+  %nop4963 = alloca i1, i1 0
+  %nop4964 = alloca i1, i1 0
+  %nop4965 = alloca i1, i1 0
+  %nop4966 = alloca i1, i1 0
+  %nop4967 = alloca i1, i1 0
+  %nop4968 = alloca i1, i1 0
+  %nop4969 = alloca i1, i1 0
+  %nop4970 = alloca i1, i1 0
+  %nop4971 = alloca i1, i1 0
+  %nop4972 = alloca i1, i1 0
+  %nop4973 = alloca i1, i1 0
+  %nop4974 = alloca i1, i1 0
+  %nop4975 = alloca i1, i1 0
+  %nop4976 = alloca i1, i1 0
+  %nop4977 = alloca i1, i1 0
+  %nop4978 = alloca i1, i1 0
+  %nop4979 = alloca i1, i1 0
+  %nop4980 = alloca i1, i1 0
+  %nop4981 = alloca i1, i1 0
+  %nop4982 = alloca i1, i1 0
+  %nop4983 = alloca i1, i1 0
+  %nop4984 = alloca i1, i1 0
+  %nop4985 = alloca i1, i1 0
+  %nop4986 = alloca i1, i1 0
+  %nop4987 = alloca i1, i1 0
+  %nop4988 = alloca i1, i1 0
+  %nop4989 = alloca i1, i1 0
+  %nop4990 = alloca i1, i1 0
+  %nop4991 = alloca i1, i1 0
+  %nop4992 = alloca i1, i1 0
+  %nop4993 = alloca i1, i1 0
+  %nop4994 = alloca i1, i1 0
+  %nop4995 = alloca i1, i1 0
+  %nop4996 = alloca i1, i1 0
+  %nop4997 = alloca i1, i1 0
+  %nop4998 = alloca i1, i1 0
+  %nop4999 = alloca i1, i1 0
+  %nop5000 = alloca i1, i1 0
+  %nop5001 = alloca i1, i1 0
+  %nop5002 = alloca i1, i1 0
+  %nop5003 = alloca i1, i1 0
+  %nop5004 = alloca i1, i1 0
+  %nop5005 = alloca i1, i1 0
+  %nop5006 = alloca i1, i1 0
+  %nop5007 = alloca i1, i1 0
+  %nop5008 = alloca i1, i1 0
+  %nop5009 = alloca i1, i1 0
+  %nop5010 = alloca i1, i1 0
+  %nop5011 = alloca i1, i1 0
+  %nop5012 = alloca i1, i1 0
+  %nop5013 = alloca i1, i1 0
+  %nop5014 = alloca i1, i1 0
+  %nop5015 = alloca i1, i1 0
+  %nop5016 = alloca i1, i1 0
+  %nop5017 = alloca i1, i1 0
+  %nop5018 = alloca i1, i1 0
+  %nop5019 = alloca i1, i1 0
+  %nop5020 = alloca i1, i1 0
+  %nop5021 = alloca i1, i1 0
+  %nop5022 = alloca i1, i1 0
+  %nop5023 = alloca i1, i1 0
+  %nop5024 = alloca i1, i1 0
+  %nop5025 = alloca i1, i1 0
+  %nop5026 = alloca i1, i1 0
+  %nop5027 = alloca i1, i1 0
+  %nop5028 = alloca i1, i1 0
+  %nop5029 = alloca i1, i1 0
+  %nop5030 = alloca i1, i1 0
+  %nop5031 = alloca i1, i1 0
+  %nop5032 = alloca i1, i1 0
+  %nop5033 = alloca i1, i1 0
+  %nop5034 = alloca i1, i1 0
+  %nop5035 = alloca i1, i1 0
+  %nop5036 = alloca i1, i1 0
+  %nop5037 = alloca i1, i1 0
+  %nop5038 = alloca i1, i1 0
+  %nop5039 = alloca i1, i1 0
+  %nop5040 = alloca i1, i1 0
+  %nop5041 = alloca i1, i1 0
+  %nop5042 = alloca i1, i1 0
+  %nop5043 = alloca i1, i1 0
+  %nop5044 = alloca i1, i1 0
+  %nop5045 = alloca i1, i1 0
+  %nop5046 = alloca i1, i1 0
+  %nop5047 = alloca i1, i1 0
+  %nop5048 = alloca i1, i1 0
+  %nop5049 = alloca i1, i1 0
+  %nop5050 = alloca i1, i1 0
+  %nop5051 = alloca i1, i1 0
+  %nop5052 = alloca i1, i1 0
+  %nop5053 = alloca i1, i1 0
+  %nop5054 = alloca i1, i1 0
+  %nop5055 = alloca i1, i1 0
+  %nop5056 = alloca i1, i1 0
+  %nop5057 = alloca i1, i1 0
+  %nop5058 = alloca i1, i1 0
+  %nop5059 = alloca i1, i1 0
+  %nop5060 = alloca i1, i1 0
+  %nop5061 = alloca i1, i1 0
+  %nop5062 = alloca i1, i1 0
+  %nop5063 = alloca i1, i1 0
+  %nop5064 = alloca i1, i1 0
+  %nop5065 = alloca i1, i1 0
+  %nop5066 = alloca i1, i1 0
+  %nop5067 = alloca i1, i1 0
+  %nop5068 = alloca i1, i1 0
+  %nop5069 = alloca i1, i1 0
+  %nop5070 = alloca i1, i1 0
+  %nop5071 = alloca i1, i1 0
+  %nop5072 = alloca i1, i1 0
+  %nop5073 = alloca i1, i1 0
+  %nop5074 = alloca i1, i1 0
+  %nop5075 = alloca i1, i1 0
+  %nop5076 = alloca i1, i1 0
+  %nop5077 = alloca i1, i1 0
+  %nop5078 = alloca i1, i1 0
+  %nop5079 = alloca i1, i1 0
+  %nop5080 = alloca i1, i1 0
+  %nop5081 = alloca i1, i1 0
+  %nop5082 = alloca i1, i1 0
+  %nop5083 = alloca i1, i1 0
+  %nop5084 = alloca i1, i1 0
+  %nop5085 = alloca i1, i1 0
+  %nop5086 = alloca i1, i1 0
+  %nop5087 = alloca i1, i1 0
+  %nop5088 = alloca i1, i1 0
+  %nop5089 = alloca i1, i1 0
+  %nop5090 = alloca i1, i1 0
+  %nop5091 = alloca i1, i1 0
+  %nop5092 = alloca i1, i1 0
+  %nop5093 = alloca i1, i1 0
+  %nop5094 = alloca i1, i1 0
+  %nop5095 = alloca i1, i1 0
+  %nop5096 = alloca i1, i1 0
+  %nop5097 = alloca i1, i1 0
+  %nop5098 = alloca i1, i1 0
+  %nop5099 = alloca i1, i1 0
+  %nop5100 = alloca i1, i1 0
+  %nop5101 = alloca i1, i1 0
+  %nop5102 = alloca i1, i1 0
+  %nop5103 = alloca i1, i1 0
+  %nop5104 = alloca i1, i1 0
+  %nop5105 = alloca i1, i1 0
+  %nop5106 = alloca i1, i1 0
+  %nop5107 = alloca i1, i1 0
+  %nop5108 = alloca i1, i1 0
+  %nop5109 = alloca i1, i1 0
+  %nop5110 = alloca i1, i1 0
+  %nop5111 = alloca i1, i1 0
+  %nop5112 = alloca i1, i1 0
+  %nop5113 = alloca i1, i1 0
+  %nop5114 = alloca i1, i1 0
+  %nop5115 = alloca i1, i1 0
+  %nop5116 = alloca i1, i1 0
+  %nop5117 = alloca i1, i1 0
+  %nop5118 = alloca i1, i1 0
+  %nop5119 = alloca i1, i1 0
+  %nop5120 = alloca i1, i1 0
+  %nop5121 = alloca i1, i1 0
+  %nop5122 = alloca i1, i1 0
+  %nop5123 = alloca i1, i1 0
+  %nop5124 = alloca i1, i1 0
+  %nop5125 = alloca i1, i1 0
+  %nop5126 = alloca i1, i1 0
+  %nop5127 = alloca i1, i1 0
+  %nop5128 = alloca i1, i1 0
+  %nop5129 = alloca i1, i1 0
+  %nop5130 = alloca i1, i1 0
+  %nop5131 = alloca i1, i1 0
+  %nop5132 = alloca i1, i1 0
+  %nop5133 = alloca i1, i1 0
+  %nop5134 = alloca i1, i1 0
+  %nop5135 = alloca i1, i1 0
+  %nop5136 = alloca i1, i1 0
+  %nop5137 = alloca i1, i1 0
+  %nop5138 = alloca i1, i1 0
+  %nop5139 = alloca i1, i1 0
+  %nop5140 = alloca i1, i1 0
+  %nop5141 = alloca i1, i1 0
+  %nop5142 = alloca i1, i1 0
+  %nop5143 = alloca i1, i1 0
+  %nop5144 = alloca i1, i1 0
+  %nop5145 = alloca i1, i1 0
+  %nop5146 = alloca i1, i1 0
+  %nop5147 = alloca i1, i1 0
+  %nop5148 = alloca i1, i1 0
+  %nop5149 = alloca i1, i1 0
+  %nop5150 = alloca i1, i1 0
+  %nop5151 = alloca i1, i1 0
+  %nop5152 = alloca i1, i1 0
+  %nop5153 = alloca i1, i1 0
+  %nop5154 = alloca i1, i1 0
+  %nop5155 = alloca i1, i1 0
+  %nop5156 = alloca i1, i1 0
+  %nop5157 = alloca i1, i1 0
+  %nop5158 = alloca i1, i1 0
+  %nop5159 = alloca i1, i1 0
+  %nop5160 = alloca i1, i1 0
+  %nop5161 = alloca i1, i1 0
+  %nop5162 = alloca i1, i1 0
+  %nop5163 = alloca i1, i1 0
+  %nop5164 = alloca i1, i1 0
+  %nop5165 = alloca i1, i1 0
+  %nop5166 = alloca i1, i1 0
+  %nop5167 = alloca i1, i1 0
+  %nop5168 = alloca i1, i1 0
+  %nop5169 = alloca i1, i1 0
+  %nop5170 = alloca i1, i1 0
+  %nop5171 = alloca i1, i1 0
+  %nop5172 = alloca i1, i1 0
+  %nop5173 = alloca i1, i1 0
+  %nop5174 = alloca i1, i1 0
+  %nop5175 = alloca i1, i1 0
+  %nop5176 = alloca i1, i1 0
+  %nop5177 = alloca i1, i1 0
+  %nop5178 = alloca i1, i1 0
+  %nop5179 = alloca i1, i1 0
+  %nop5180 = alloca i1, i1 0
+  %nop5181 = alloca i1, i1 0
+  %nop5182 = alloca i1, i1 0
+  %nop5183 = alloca i1, i1 0
+  %nop5184 = alloca i1, i1 0
+  %nop5185 = alloca i1, i1 0
+  %nop5186 = alloca i1, i1 0
+  %nop5187 = alloca i1, i1 0
+  %nop5188 = alloca i1, i1 0
+  %nop5189 = alloca i1, i1 0
+  %nop5190 = alloca i1, i1 0
+  %nop5191 = alloca i1, i1 0
+  %nop5192 = alloca i1, i1 0
+  %nop5193 = alloca i1, i1 0
+  %nop5194 = alloca i1, i1 0
+  %nop5195 = alloca i1, i1 0
+  %nop5196 = alloca i1, i1 0
+  %nop5197 = alloca i1, i1 0
+  %nop5198 = alloca i1, i1 0
+  %nop5199 = alloca i1, i1 0
+  %nop5200 = alloca i1, i1 0
+  %nop5201 = alloca i1, i1 0
+  %nop5202 = alloca i1, i1 0
+  %nop5203 = alloca i1, i1 0
+  %nop5204 = alloca i1, i1 0
+  %nop5205 = alloca i1, i1 0
+  %nop5206 = alloca i1, i1 0
+  %nop5207 = alloca i1, i1 0
+  %nop5208 = alloca i1, i1 0
+  %nop5209 = alloca i1, i1 0
+  %nop5210 = alloca i1, i1 0
+  %nop5211 = alloca i1, i1 0
+  %nop5212 = alloca i1, i1 0
+  %nop5213 = alloca i1, i1 0
+  %nop5214 = alloca i1, i1 0
+  %nop5215 = alloca i1, i1 0
+  %nop5216 = alloca i1, i1 0
+  %nop5217 = alloca i1, i1 0
+  %nop5218 = alloca i1, i1 0
+  %nop5219 = alloca i1, i1 0
+  %nop5220 = alloca i1, i1 0
+  %nop5221 = alloca i1, i1 0
+  %nop5222 = alloca i1, i1 0
+  %nop5223 = alloca i1, i1 0
+  %nop5224 = alloca i1, i1 0
+  %nop5225 = alloca i1, i1 0
+  %nop5226 = alloca i1, i1 0
+  %nop5227 = alloca i1, i1 0
+  %nop5228 = alloca i1, i1 0
+  %nop5229 = alloca i1, i1 0
+  %nop5230 = alloca i1, i1 0
+  %nop5231 = alloca i1, i1 0
+  %nop5232 = alloca i1, i1 0
+  %nop5233 = alloca i1, i1 0
+  %nop5234 = alloca i1, i1 0
+  %nop5235 = alloca i1, i1 0
+  %nop5236 = alloca i1, i1 0
+  %nop5237 = alloca i1, i1 0
+  %nop5238 = alloca i1, i1 0
+  %nop5239 = alloca i1, i1 0
+  %nop5240 = alloca i1, i1 0
+  %nop5241 = alloca i1, i1 0
+  %nop5242 = alloca i1, i1 0
+  %nop5243 = alloca i1, i1 0
+  %nop5244 = alloca i1, i1 0
+  %nop5245 = alloca i1, i1 0
+  %nop5246 = alloca i1, i1 0
+  %nop5247 = alloca i1, i1 0
+  %nop5248 = alloca i1, i1 0
+  %nop5249 = alloca i1, i1 0
+  %nop5250 = alloca i1, i1 0
+  %nop5251 = alloca i1, i1 0
+  %nop5252 = alloca i1, i1 0
+  %nop5253 = alloca i1, i1 0
+  %nop5254 = alloca i1, i1 0
+  %nop5255 = alloca i1, i1 0
+  %nop5256 = alloca i1, i1 0
+  %nop5257 = alloca i1, i1 0
+  %nop5258 = alloca i1, i1 0
+  %nop5259 = alloca i1, i1 0
+  %nop5260 = alloca i1, i1 0
+  %nop5261 = alloca i1, i1 0
+  %nop5262 = alloca i1, i1 0
+  %nop5263 = alloca i1, i1 0
+  %nop5264 = alloca i1, i1 0
+  %nop5265 = alloca i1, i1 0
+  %nop5266 = alloca i1, i1 0
+  %nop5267 = alloca i1, i1 0
+  %nop5268 = alloca i1, i1 0
+  %nop5269 = alloca i1, i1 0
+  %nop5270 = alloca i1, i1 0
+  %nop5271 = alloca i1, i1 0
+  %nop5272 = alloca i1, i1 0
+  %nop5273 = alloca i1, i1 0
+  %nop5274 = alloca i1, i1 0
+  %nop5275 = alloca i1, i1 0
+  %nop5276 = alloca i1, i1 0
+  %nop5277 = alloca i1, i1 0
+  %nop5278 = alloca i1, i1 0
+  %nop5279 = alloca i1, i1 0
+  %nop5280 = alloca i1, i1 0
+  %nop5281 = alloca i1, i1 0
+  %nop5282 = alloca i1, i1 0
+  %nop5283 = alloca i1, i1 0
+  %nop5284 = alloca i1, i1 0
+  %nop5285 = alloca i1, i1 0
+  %nop5286 = alloca i1, i1 0
+  %nop5287 = alloca i1, i1 0
+  %nop5288 = alloca i1, i1 0
+  %nop5289 = alloca i1, i1 0
+  %nop5290 = alloca i1, i1 0
+  %nop5291 = alloca i1, i1 0
+  %nop5292 = alloca i1, i1 0
+  %nop5293 = alloca i1, i1 0
+  %nop5294 = alloca i1, i1 0
+  %nop5295 = alloca i1, i1 0
+  %nop5296 = alloca i1, i1 0
+  %nop5297 = alloca i1, i1 0
+  %nop5298 = alloca i1, i1 0
+  %nop5299 = alloca i1, i1 0
+  %nop5300 = alloca i1, i1 0
+  %nop5301 = alloca i1, i1 0
+  %nop5302 = alloca i1, i1 0
+  %nop5303 = alloca i1, i1 0
+  %nop5304 = alloca i1, i1 0
+  %nop5305 = alloca i1, i1 0
+  %nop5306 = alloca i1, i1 0
+  %nop5307 = alloca i1, i1 0
+  %nop5308 = alloca i1, i1 0
+  %nop5309 = alloca i1, i1 0
+  %nop5310 = alloca i1, i1 0
+  %nop5311 = alloca i1, i1 0
+  %nop5312 = alloca i1, i1 0
+  %nop5313 = alloca i1, i1 0
+  %nop5314 = alloca i1, i1 0
+  %nop5315 = alloca i1, i1 0
+  %nop5316 = alloca i1, i1 0
+  %nop5317 = alloca i1, i1 0
+  %nop5318 = alloca i1, i1 0
+  %nop5319 = alloca i1, i1 0
+  %nop5320 = alloca i1, i1 0
+  %nop5321 = alloca i1, i1 0
+  %nop5322 = alloca i1, i1 0
+  %nop5323 = alloca i1, i1 0
+  %nop5324 = alloca i1, i1 0
+  %nop5325 = alloca i1, i1 0
+  %nop5326 = alloca i1, i1 0
+  %nop5327 = alloca i1, i1 0
+  %nop5328 = alloca i1, i1 0
+  %nop5329 = alloca i1, i1 0
+  %nop5330 = alloca i1, i1 0
+  %nop5331 = alloca i1, i1 0
+  %nop5332 = alloca i1, i1 0
+  %nop5333 = alloca i1, i1 0
+  %nop5334 = alloca i1, i1 0
+  %nop5335 = alloca i1, i1 0
+  %nop5336 = alloca i1, i1 0
+  %nop5337 = alloca i1, i1 0
+  %nop5338 = alloca i1, i1 0
+  %nop5339 = alloca i1, i1 0
+  %nop5340 = alloca i1, i1 0
+  %nop5341 = alloca i1, i1 0
+  %nop5342 = alloca i1, i1 0
+  %nop5343 = alloca i1, i1 0
+  %nop5344 = alloca i1, i1 0
+  %nop5345 = alloca i1, i1 0
+  %nop5346 = alloca i1, i1 0
+  %nop5347 = alloca i1, i1 0
+  %nop5348 = alloca i1, i1 0
+  %nop5349 = alloca i1, i1 0
+  %nop5350 = alloca i1, i1 0
+  %nop5351 = alloca i1, i1 0
+  %nop5352 = alloca i1, i1 0
+  %nop5353 = alloca i1, i1 0
+  %nop5354 = alloca i1, i1 0
+  %nop5355 = alloca i1, i1 0
+  %nop5356 = alloca i1, i1 0
+  %nop5357 = alloca i1, i1 0
+  %nop5358 = alloca i1, i1 0
+  %nop5359 = alloca i1, i1 0
+  %nop5360 = alloca i1, i1 0
+  %nop5361 = alloca i1, i1 0
+  %nop5362 = alloca i1, i1 0
+  %nop5363 = alloca i1, i1 0
+  %nop5364 = alloca i1, i1 0
+  %nop5365 = alloca i1, i1 0
+  %nop5366 = alloca i1, i1 0
+  %nop5367 = alloca i1, i1 0
+  %nop5368 = alloca i1, i1 0
+  %nop5369 = alloca i1, i1 0
+  %nop5370 = alloca i1, i1 0
+  %nop5371 = alloca i1, i1 0
+  %nop5372 = alloca i1, i1 0
+  %nop5373 = alloca i1, i1 0
+  %nop5374 = alloca i1, i1 0
+  %nop5375 = alloca i1, i1 0
+  %nop5376 = alloca i1, i1 0
+  %nop5377 = alloca i1, i1 0
+  %nop5378 = alloca i1, i1 0
+  %nop5379 = alloca i1, i1 0
+  %nop5380 = alloca i1, i1 0
+  %nop5381 = alloca i1, i1 0
+  %nop5382 = alloca i1, i1 0
+  %nop5383 = alloca i1, i1 0
+  %nop5384 = alloca i1, i1 0
+  %nop5385 = alloca i1, i1 0
+  %nop5386 = alloca i1, i1 0
+  %nop5387 = alloca i1, i1 0
+  %nop5388 = alloca i1, i1 0
+  %nop5389 = alloca i1, i1 0
+  %nop5390 = alloca i1, i1 0
+  %nop5391 = alloca i1, i1 0
+  %nop5392 = alloca i1, i1 0
+  %nop5393 = alloca i1, i1 0
+  %nop5394 = alloca i1, i1 0
+  %nop5395 = alloca i1, i1 0
+  %nop5396 = alloca i1, i1 0
+  %nop5397 = alloca i1, i1 0
+  %nop5398 = alloca i1, i1 0
+  %nop5399 = alloca i1, i1 0
+  %nop5400 = alloca i1, i1 0
+  %nop5401 = alloca i1, i1 0
+  %nop5402 = alloca i1, i1 0
+  %nop5403 = alloca i1, i1 0
+  %nop5404 = alloca i1, i1 0
+  %nop5405 = alloca i1, i1 0
+  %nop5406 = alloca i1, i1 0
+  %nop5407 = alloca i1, i1 0
+  %nop5408 = alloca i1, i1 0
+  %nop5409 = alloca i1, i1 0
+  %nop5410 = alloca i1, i1 0
+  %nop5411 = alloca i1, i1 0
+  %nop5412 = alloca i1, i1 0
+  %nop5413 = alloca i1, i1 0
+  %nop5414 = alloca i1, i1 0
+  %nop5415 = alloca i1, i1 0
+  %nop5416 = alloca i1, i1 0
+  %nop5417 = alloca i1, i1 0
+  %nop5418 = alloca i1, i1 0
+  %nop5419 = alloca i1, i1 0
+  %nop5420 = alloca i1, i1 0
+  %nop5421 = alloca i1, i1 0
+  %nop5422 = alloca i1, i1 0
+  %nop5423 = alloca i1, i1 0
+  %nop5424 = alloca i1, i1 0
+  %nop5425 = alloca i1, i1 0
+  %nop5426 = alloca i1, i1 0
+  %nop5427 = alloca i1, i1 0
+  %nop5428 = alloca i1, i1 0
+  %nop5429 = alloca i1, i1 0
+  %nop5430 = alloca i1, i1 0
+  %nop5431 = alloca i1, i1 0
+  %nop5432 = alloca i1, i1 0
+  %nop5433 = alloca i1, i1 0
+  %nop5434 = alloca i1, i1 0
+  %nop5435 = alloca i1, i1 0
+  %nop5436 = alloca i1, i1 0
+  %nop5437 = alloca i1, i1 0
+  %nop5438 = alloca i1, i1 0
+  %nop5439 = alloca i1, i1 0
+  %nop5440 = alloca i1, i1 0
+  %nop5441 = alloca i1, i1 0
+  %nop5442 = alloca i1, i1 0
+  %nop5443 = alloca i1, i1 0
+  %nop5444 = alloca i1, i1 0
+  %nop5445 = alloca i1, i1 0
+  %nop5446 = alloca i1, i1 0
+  %nop5447 = alloca i1, i1 0
+  %nop5448 = alloca i1, i1 0
+  %nop5449 = alloca i1, i1 0
+  %nop5450 = alloca i1, i1 0
+  %nop5451 = alloca i1, i1 0
+  %nop5452 = alloca i1, i1 0
+  %nop5453 = alloca i1, i1 0
+  %nop5454 = alloca i1, i1 0
+  %nop5455 = alloca i1, i1 0
+  %nop5456 = alloca i1, i1 0
+  %nop5457 = alloca i1, i1 0
+  %nop5458 = alloca i1, i1 0
+  %nop5459 = alloca i1, i1 0
+  %nop5460 = alloca i1, i1 0
+  %nop5461 = alloca i1, i1 0
+  %nop5462 = alloca i1, i1 0
+  %nop5463 = alloca i1, i1 0
+  %nop5464 = alloca i1, i1 0
+  %nop5465 = alloca i1, i1 0
+  %nop5466 = alloca i1, i1 0
+  %nop5467 = alloca i1, i1 0
+  %nop5468 = alloca i1, i1 0
+  %nop5469 = alloca i1, i1 0
+  %nop5470 = alloca i1, i1 0
+  %nop5471 = alloca i1, i1 0
+  %nop5472 = alloca i1, i1 0
+  %nop5473 = alloca i1, i1 0
+  %nop5474 = alloca i1, i1 0
+  %nop5475 = alloca i1, i1 0
+  %nop5476 = alloca i1, i1 0
+  %nop5477 = alloca i1, i1 0
+  %nop5478 = alloca i1, i1 0
+  %nop5479 = alloca i1, i1 0
+  %nop5480 = alloca i1, i1 0
+  %nop5481 = alloca i1, i1 0
+  %nop5482 = alloca i1, i1 0
+  %nop5483 = alloca i1, i1 0
+  %nop5484 = alloca i1, i1 0
+  %nop5485 = alloca i1, i1 0
+  %nop5486 = alloca i1, i1 0
+  %nop5487 = alloca i1, i1 0
+  %nop5488 = alloca i1, i1 0
+  %nop5489 = alloca i1, i1 0
+  %nop5490 = alloca i1, i1 0
+  %nop5491 = alloca i1, i1 0
+  %nop5492 = alloca i1, i1 0
+  %nop5493 = alloca i1, i1 0
+  %nop5494 = alloca i1, i1 0
+  %nop5495 = alloca i1, i1 0
+  %nop5496 = alloca i1, i1 0
+  %nop5497 = alloca i1, i1 0
+  %nop5498 = alloca i1, i1 0
+  %nop5499 = alloca i1, i1 0
+  %nop5500 = alloca i1, i1 0
+  %nop5501 = alloca i1, i1 0
+  %nop5502 = alloca i1, i1 0
+  %nop5503 = alloca i1, i1 0
+  %nop5504 = alloca i1, i1 0
+  %nop5505 = alloca i1, i1 0
+  %nop5506 = alloca i1, i1 0
+  %nop5507 = alloca i1, i1 0
+  %nop5508 = alloca i1, i1 0
+  %nop5509 = alloca i1, i1 0
+  %nop5510 = alloca i1, i1 0
+  %nop5511 = alloca i1, i1 0
+  %nop5512 = alloca i1, i1 0
+  %nop5513 = alloca i1, i1 0
+  %nop5514 = alloca i1, i1 0
+  %nop5515 = alloca i1, i1 0
+  %nop5516 = alloca i1, i1 0
+  %nop5517 = alloca i1, i1 0
+  %nop5518 = alloca i1, i1 0
+  %nop5519 = alloca i1, i1 0
+  %nop5520 = alloca i1, i1 0
+  %nop5521 = alloca i1, i1 0
+  %nop5522 = alloca i1, i1 0
+  %nop5523 = alloca i1, i1 0
+  %nop5524 = alloca i1, i1 0
+  %nop5525 = alloca i1, i1 0
+  %nop5526 = alloca i1, i1 0
+  %nop5527 = alloca i1, i1 0
+  %nop5528 = alloca i1, i1 0
+  %nop5529 = alloca i1, i1 0
+  %nop5530 = alloca i1, i1 0
+  %nop5531 = alloca i1, i1 0
+  %nop5532 = alloca i1, i1 0
+  %nop5533 = alloca i1, i1 0
+  %nop5534 = alloca i1, i1 0
+  %nop5535 = alloca i1, i1 0
+  %nop5536 = alloca i1, i1 0
+  %nop5537 = alloca i1, i1 0
+  %nop5538 = alloca i1, i1 0
+  %nop5539 = alloca i1, i1 0
+  %nop5540 = alloca i1, i1 0
+  %nop5541 = alloca i1, i1 0
+  %nop5542 = alloca i1, i1 0
+  %nop5543 = alloca i1, i1 0
+  %nop5544 = alloca i1, i1 0
+  %nop5545 = alloca i1, i1 0
+  %nop5546 = alloca i1, i1 0
+  %nop5547 = alloca i1, i1 0
+  %nop5548 = alloca i1, i1 0
+  %nop5549 = alloca i1, i1 0
+  %nop5550 = alloca i1, i1 0
+  %nop5551 = alloca i1, i1 0
+  %nop5552 = alloca i1, i1 0
+  %nop5553 = alloca i1, i1 0
+  %nop5554 = alloca i1, i1 0
+  %nop5555 = alloca i1, i1 0
+  %nop5556 = alloca i1, i1 0
+  %nop5557 = alloca i1, i1 0
+  %nop5558 = alloca i1, i1 0
+  %nop5559 = alloca i1, i1 0
+  %nop5560 = alloca i1, i1 0
+  %nop5561 = alloca i1, i1 0
+  %nop5562 = alloca i1, i1 0
+  %nop5563 = alloca i1, i1 0
+  %nop5564 = alloca i1, i1 0
+  %nop5565 = alloca i1, i1 0
+  %nop5566 = alloca i1, i1 0
+  %nop5567 = alloca i1, i1 0
+  %nop5568 = alloca i1, i1 0
+  %nop5569 = alloca i1, i1 0
+  %nop5570 = alloca i1, i1 0
+  %nop5571 = alloca i1, i1 0
+  %nop5572 = alloca i1, i1 0
+  %nop5573 = alloca i1, i1 0
+  %nop5574 = alloca i1, i1 0
+  %nop5575 = alloca i1, i1 0
+  %nop5576 = alloca i1, i1 0
+  %nop5577 = alloca i1, i1 0
+  %nop5578 = alloca i1, i1 0
+  %nop5579 = alloca i1, i1 0
+  %nop5580 = alloca i1, i1 0
+  %nop5581 = alloca i1, i1 0
+  %nop5582 = alloca i1, i1 0
+  %nop5583 = alloca i1, i1 0
+  %nop5584 = alloca i1, i1 0
+  %nop5585 = alloca i1, i1 0
+  %nop5586 = alloca i1, i1 0
+  %nop5587 = alloca i1, i1 0
+  %nop5588 = alloca i1, i1 0
+  %nop5589 = alloca i1, i1 0
+  %nop5590 = alloca i1, i1 0
+  %nop5591 = alloca i1, i1 0
+  %nop5592 = alloca i1, i1 0
+  %nop5593 = alloca i1, i1 0
+  %nop5594 = alloca i1, i1 0
+  %nop5595 = alloca i1, i1 0
+  %nop5596 = alloca i1, i1 0
+  %nop5597 = alloca i1, i1 0
+  %nop5598 = alloca i1, i1 0
+  %nop5599 = alloca i1, i1 0
+  %nop5600 = alloca i1, i1 0
+  %nop5601 = alloca i1, i1 0
+  %nop5602 = alloca i1, i1 0
+  %nop5603 = alloca i1, i1 0
+  %nop5604 = alloca i1, i1 0
+  %nop5605 = alloca i1, i1 0
+  %nop5606 = alloca i1, i1 0
+  %nop5607 = alloca i1, i1 0
+  %nop5608 = alloca i1, i1 0
+  %nop5609 = alloca i1, i1 0
+  %nop5610 = alloca i1, i1 0
+  %nop5611 = alloca i1, i1 0
+  %nop5612 = alloca i1, i1 0
+  %nop5613 = alloca i1, i1 0
+  %nop5614 = alloca i1, i1 0
+  %nop5615 = alloca i1, i1 0
+  %nop5616 = alloca i1, i1 0
+  %nop5617 = alloca i1, i1 0
+  %nop5618 = alloca i1, i1 0
+  %nop5619 = alloca i1, i1 0
+  %nop5620 = alloca i1, i1 0
+  %nop5621 = alloca i1, i1 0
+  %nop5622 = alloca i1, i1 0
+  %nop5623 = alloca i1, i1 0
+  %nop5624 = alloca i1, i1 0
+  %nop5625 = alloca i1, i1 0
+  %nop5626 = alloca i1, i1 0
+  %nop5627 = alloca i1, i1 0
+  %nop5628 = alloca i1, i1 0
+  %nop5629 = alloca i1, i1 0
+  %nop5630 = alloca i1, i1 0
+  %nop5631 = alloca i1, i1 0
+  %nop5632 = alloca i1, i1 0
+  %nop5633 = alloca i1, i1 0
+  %nop5634 = alloca i1, i1 0
+  %nop5635 = alloca i1, i1 0
+  %nop5636 = alloca i1, i1 0
+  %nop5637 = alloca i1, i1 0
+  %nop5638 = alloca i1, i1 0
+  %nop5639 = alloca i1, i1 0
+  %nop5640 = alloca i1, i1 0
+  %nop5641 = alloca i1, i1 0
+  %nop5642 = alloca i1, i1 0
+  %nop5643 = alloca i1, i1 0
+  %nop5644 = alloca i1, i1 0
+  %nop5645 = alloca i1, i1 0
+  %nop5646 = alloca i1, i1 0
+  %nop5647 = alloca i1, i1 0
+  %nop5648 = alloca i1, i1 0
+  %nop5649 = alloca i1, i1 0
+  %nop5650 = alloca i1, i1 0
+  %nop5651 = alloca i1, i1 0
+  %nop5652 = alloca i1, i1 0
+  %nop5653 = alloca i1, i1 0
+  %nop5654 = alloca i1, i1 0
+  %nop5655 = alloca i1, i1 0
+  %nop5656 = alloca i1, i1 0
+  %nop5657 = alloca i1, i1 0
+  %nop5658 = alloca i1, i1 0
+  %nop5659 = alloca i1, i1 0
+  %nop5660 = alloca i1, i1 0
+  %nop5661 = alloca i1, i1 0
+  %nop5662 = alloca i1, i1 0
+  %nop5663 = alloca i1, i1 0
+  %nop5664 = alloca i1, i1 0
+  %nop5665 = alloca i1, i1 0
+  %nop5666 = alloca i1, i1 0
+  %nop5667 = alloca i1, i1 0
+  %nop5668 = alloca i1, i1 0
+  %nop5669 = alloca i1, i1 0
+  %nop5670 = alloca i1, i1 0
+  %nop5671 = alloca i1, i1 0
+  %nop5672 = alloca i1, i1 0
+  %nop5673 = alloca i1, i1 0
+  %nop5674 = alloca i1, i1 0
+  %nop5675 = alloca i1, i1 0
+  %nop5676 = alloca i1, i1 0
+  %nop5677 = alloca i1, i1 0
+  %nop5678 = alloca i1, i1 0
+  %nop5679 = alloca i1, i1 0
+  %nop5680 = alloca i1, i1 0
+  %nop5681 = alloca i1, i1 0
+  %nop5682 = alloca i1, i1 0
+  %nop5683 = alloca i1, i1 0
+  %nop5684 = alloca i1, i1 0
+  %nop5685 = alloca i1, i1 0
+  %nop5686 = alloca i1, i1 0
+  %nop5687 = alloca i1, i1 0
+  %nop5688 = alloca i1, i1 0
+  %nop5689 = alloca i1, i1 0
+  %nop5690 = alloca i1, i1 0
+  %nop5691 = alloca i1, i1 0
+  %nop5692 = alloca i1, i1 0
+  %nop5693 = alloca i1, i1 0
+  %nop5694 = alloca i1, i1 0
+  %nop5695 = alloca i1, i1 0
+  %nop5696 = alloca i1, i1 0
+  %nop5697 = alloca i1, i1 0
+  %nop5698 = alloca i1, i1 0
+  %nop5699 = alloca i1, i1 0
+  %nop5700 = alloca i1, i1 0
+  %nop5701 = alloca i1, i1 0
+  %nop5702 = alloca i1, i1 0
+  %nop5703 = alloca i1, i1 0
+  %nop5704 = alloca i1, i1 0
+  %nop5705 = alloca i1, i1 0
+  %nop5706 = alloca i1, i1 0
+  %nop5707 = alloca i1, i1 0
+  %nop5708 = alloca i1, i1 0
+  %nop5709 = alloca i1, i1 0
+  %nop5710 = alloca i1, i1 0
+  %nop5711 = alloca i1, i1 0
+  %nop5712 = alloca i1, i1 0
+  %nop5713 = alloca i1, i1 0
+  %nop5714 = alloca i1, i1 0
+  %nop5715 = alloca i1, i1 0
+  %nop5716 = alloca i1, i1 0
+  %nop5717 = alloca i1, i1 0
+  %nop5718 = alloca i1, i1 0
+  %nop5719 = alloca i1, i1 0
+  %nop5720 = alloca i1, i1 0
+  %nop5721 = alloca i1, i1 0
+  %nop5722 = alloca i1, i1 0
+  %nop5723 = alloca i1, i1 0
+  %nop5724 = alloca i1, i1 0
+  %nop5725 = alloca i1, i1 0
+  %nop5726 = alloca i1, i1 0
+  %nop5727 = alloca i1, i1 0
+  %nop5728 = alloca i1, i1 0
+  %nop5729 = alloca i1, i1 0
+  %nop5730 = alloca i1, i1 0
+  %nop5731 = alloca i1, i1 0
+  %nop5732 = alloca i1, i1 0
+  %nop5733 = alloca i1, i1 0
+  %nop5734 = alloca i1, i1 0
+  %nop5735 = alloca i1, i1 0
+  %nop5736 = alloca i1, i1 0
+  %nop5737 = alloca i1, i1 0
+  %nop5738 = alloca i1, i1 0
+  %nop5739 = alloca i1, i1 0
+  %nop5740 = alloca i1, i1 0
+  %nop5741 = alloca i1, i1 0
+  %nop5742 = alloca i1, i1 0
+  %nop5743 = alloca i1, i1 0
+  %nop5744 = alloca i1, i1 0
+  %nop5745 = alloca i1, i1 0
+  %nop5746 = alloca i1, i1 0
+  %nop5747 = alloca i1, i1 0
+  %nop5748 = alloca i1, i1 0
+  %nop5749 = alloca i1, i1 0
+  %nop5750 = alloca i1, i1 0
+  %nop5751 = alloca i1, i1 0
+  %nop5752 = alloca i1, i1 0
+  %nop5753 = alloca i1, i1 0
+  %nop5754 = alloca i1, i1 0
+  %nop5755 = alloca i1, i1 0
+  %nop5756 = alloca i1, i1 0
+  %nop5757 = alloca i1, i1 0
+  %nop5758 = alloca i1, i1 0
+  %nop5759 = alloca i1, i1 0
+  %nop5760 = alloca i1, i1 0
+  %nop5761 = alloca i1, i1 0
+  %nop5762 = alloca i1, i1 0
+  %nop5763 = alloca i1, i1 0
+  %nop5764 = alloca i1, i1 0
+  %nop5765 = alloca i1, i1 0
+  %nop5766 = alloca i1, i1 0
+  %nop5767 = alloca i1, i1 0
+  %nop5768 = alloca i1, i1 0
+  %nop5769 = alloca i1, i1 0
+  %nop5770 = alloca i1, i1 0
+  %nop5771 = alloca i1, i1 0
+  %nop5772 = alloca i1, i1 0
+  %nop5773 = alloca i1, i1 0
+  %nop5774 = alloca i1, i1 0
+  %nop5775 = alloca i1, i1 0
+  %nop5776 = alloca i1, i1 0
+  %nop5777 = alloca i1, i1 0
+  %nop5778 = alloca i1, i1 0
+  %nop5779 = alloca i1, i1 0
+  %nop5780 = alloca i1, i1 0
+  %nop5781 = alloca i1, i1 0
+  %nop5782 = alloca i1, i1 0
+  %nop5783 = alloca i1, i1 0
+  %nop5784 = alloca i1, i1 0
+  %nop5785 = alloca i1, i1 0
+  %nop5786 = alloca i1, i1 0
+  %nop5787 = alloca i1, i1 0
+  %nop5788 = alloca i1, i1 0
+  %nop5789 = alloca i1, i1 0
+  %nop5790 = alloca i1, i1 0
+  %nop5791 = alloca i1, i1 0
+  %nop5792 = alloca i1, i1 0
+  %nop5793 = alloca i1, i1 0
+  %nop5794 = alloca i1, i1 0
+  %nop5795 = alloca i1, i1 0
+  %nop5796 = alloca i1, i1 0
+  %nop5797 = alloca i1, i1 0
+  %nop5798 = alloca i1, i1 0
+  %nop5799 = alloca i1, i1 0
+  %nop5800 = alloca i1, i1 0
+  %nop5801 = alloca i1, i1 0
+  %nop5802 = alloca i1, i1 0
+  %nop5803 = alloca i1, i1 0
+  %nop5804 = alloca i1, i1 0
+  %nop5805 = alloca i1, i1 0
+  %nop5806 = alloca i1, i1 0
+  %nop5807 = alloca i1, i1 0
+  %nop5808 = alloca i1, i1 0
+  %nop5809 = alloca i1, i1 0
+  %nop5810 = alloca i1, i1 0
+  %nop5811 = alloca i1, i1 0
+  %nop5812 = alloca i1, i1 0
+  %nop5813 = alloca i1, i1 0
+  %nop5814 = alloca i1, i1 0
+  %nop5815 = alloca i1, i1 0
+  %nop5816 = alloca i1, i1 0
+  %nop5817 = alloca i1, i1 0
+  %nop5818 = alloca i1, i1 0
+  %nop5819 = alloca i1, i1 0
+  %nop5820 = alloca i1, i1 0
+  %nop5821 = alloca i1, i1 0
+  %nop5822 = alloca i1, i1 0
+  %nop5823 = alloca i1, i1 0
+  %nop5824 = alloca i1, i1 0
+  %nop5825 = alloca i1, i1 0
+  %nop5826 = alloca i1, i1 0
+  %nop5827 = alloca i1, i1 0
+  %nop5828 = alloca i1, i1 0
+  %nop5829 = alloca i1, i1 0
+  %nop5830 = alloca i1, i1 0
+  %nop5831 = alloca i1, i1 0
+  %nop5832 = alloca i1, i1 0
+  %nop5833 = alloca i1, i1 0
+  %nop5834 = alloca i1, i1 0
+  %nop5835 = alloca i1, i1 0
+  %nop5836 = alloca i1, i1 0
+  %nop5837 = alloca i1, i1 0
+  %nop5838 = alloca i1, i1 0
+  %nop5839 = alloca i1, i1 0
+  %nop5840 = alloca i1, i1 0
+  %nop5841 = alloca i1, i1 0
+  %nop5842 = alloca i1, i1 0
+  %nop5843 = alloca i1, i1 0
+  %nop5844 = alloca i1, i1 0
+  %nop5845 = alloca i1, i1 0
+  %nop5846 = alloca i1, i1 0
+  %nop5847 = alloca i1, i1 0
+  %nop5848 = alloca i1, i1 0
+  %nop5849 = alloca i1, i1 0
+  %nop5850 = alloca i1, i1 0
+  %nop5851 = alloca i1, i1 0
+  %nop5852 = alloca i1, i1 0
+  %nop5853 = alloca i1, i1 0
+  %nop5854 = alloca i1, i1 0
+  %nop5855 = alloca i1, i1 0
+  %nop5856 = alloca i1, i1 0
+  %nop5857 = alloca i1, i1 0
+  %nop5858 = alloca i1, i1 0
+  %nop5859 = alloca i1, i1 0
+  %nop5860 = alloca i1, i1 0
+  %nop5861 = alloca i1, i1 0
+  %nop5862 = alloca i1, i1 0
+  %nop5863 = alloca i1, i1 0
+  %nop5864 = alloca i1, i1 0
+  %nop5865 = alloca i1, i1 0
+  %nop5866 = alloca i1, i1 0
+  %nop5867 = alloca i1, i1 0
+  %nop5868 = alloca i1, i1 0
+  %nop5869 = alloca i1, i1 0
+  %nop5870 = alloca i1, i1 0
+  %nop5871 = alloca i1, i1 0
+  %nop5872 = alloca i1, i1 0
+  %nop5873 = alloca i1, i1 0
+  %nop5874 = alloca i1, i1 0
+  %nop5875 = alloca i1, i1 0
+  %nop5876 = alloca i1, i1 0
+  %nop5877 = alloca i1, i1 0
+  %nop5878 = alloca i1, i1 0
+  %nop5879 = alloca i1, i1 0
+  %nop5880 = alloca i1, i1 0
+  %nop5881 = alloca i1, i1 0
+  %nop5882 = alloca i1, i1 0
+  %nop5883 = alloca i1, i1 0
+  %nop5884 = alloca i1, i1 0
+  %nop5885 = alloca i1, i1 0
+  %nop5886 = alloca i1, i1 0
+  %nop5887 = alloca i1, i1 0
+  %nop5888 = alloca i1, i1 0
+  %nop5889 = alloca i1, i1 0
+  %nop5890 = alloca i1, i1 0
+  %nop5891 = alloca i1, i1 0
+  %nop5892 = alloca i1, i1 0
+  %nop5893 = alloca i1, i1 0
+  %nop5894 = alloca i1, i1 0
+  %nop5895 = alloca i1, i1 0
+  %nop5896 = alloca i1, i1 0
+  %nop5897 = alloca i1, i1 0
+  %nop5898 = alloca i1, i1 0
+  %nop5899 = alloca i1, i1 0
+  %nop5900 = alloca i1, i1 0
+  %nop5901 = alloca i1, i1 0
+  %nop5902 = alloca i1, i1 0
+  %nop5903 = alloca i1, i1 0
+  %nop5904 = alloca i1, i1 0
+  %nop5905 = alloca i1, i1 0
+  %nop5906 = alloca i1, i1 0
+  %nop5907 = alloca i1, i1 0
+  %nop5908 = alloca i1, i1 0
+  %nop5909 = alloca i1, i1 0
+  %nop5910 = alloca i1, i1 0
+  %nop5911 = alloca i1, i1 0
+  %nop5912 = alloca i1, i1 0
+  %nop5913 = alloca i1, i1 0
+  %nop5914 = alloca i1, i1 0
+  %nop5915 = alloca i1, i1 0
+  %nop5916 = alloca i1, i1 0
+  %nop5917 = alloca i1, i1 0
+  %nop5918 = alloca i1, i1 0
+  %nop5919 = alloca i1, i1 0
+  %nop5920 = alloca i1, i1 0
+  %nop5921 = alloca i1, i1 0
+  %nop5922 = alloca i1, i1 0
+  %nop5923 = alloca i1, i1 0
+  %nop5924 = alloca i1, i1 0
+  %nop5925 = alloca i1, i1 0
+  %nop5926 = alloca i1, i1 0
+  %nop5927 = alloca i1, i1 0
+  %nop5928 = alloca i1, i1 0
+  %nop5929 = alloca i1, i1 0
+  %nop5930 = alloca i1, i1 0
+  %nop5931 = alloca i1, i1 0
+  %nop5932 = alloca i1, i1 0
+  %nop5933 = alloca i1, i1 0
+  %nop5934 = alloca i1, i1 0
+  %nop5935 = alloca i1, i1 0
+  %nop5936 = alloca i1, i1 0
+  %nop5937 = alloca i1, i1 0
+  %nop5938 = alloca i1, i1 0
+  %nop5939 = alloca i1, i1 0
+  %nop5940 = alloca i1, i1 0
+  %nop5941 = alloca i1, i1 0
+  %nop5942 = alloca i1, i1 0
+  %nop5943 = alloca i1, i1 0
+  %nop5944 = alloca i1, i1 0
+  %nop5945 = alloca i1, i1 0
+  %nop5946 = alloca i1, i1 0
+  %nop5947 = alloca i1, i1 0
+  %nop5948 = alloca i1, i1 0
+  %nop5949 = alloca i1, i1 0
+  %nop5950 = alloca i1, i1 0
+  %nop5951 = alloca i1, i1 0
+  %nop5952 = alloca i1, i1 0
+  %nop5953 = alloca i1, i1 0
+  %nop5954 = alloca i1, i1 0
+  %nop5955 = alloca i1, i1 0
+  %nop5956 = alloca i1, i1 0
+  %nop5957 = alloca i1, i1 0
+  %nop5958 = alloca i1, i1 0
+  %nop5959 = alloca i1, i1 0
+  %nop5960 = alloca i1, i1 0
+  %nop5961 = alloca i1, i1 0
+  %nop5962 = alloca i1, i1 0
+  %nop5963 = alloca i1, i1 0
+  %nop5964 = alloca i1, i1 0
+  %nop5965 = alloca i1, i1 0
+  %nop5966 = alloca i1, i1 0
+  %nop5967 = alloca i1, i1 0
+  %nop5968 = alloca i1, i1 0
+  %nop5969 = alloca i1, i1 0
+  %nop5970 = alloca i1, i1 0
+  %nop5971 = alloca i1, i1 0
+  %nop5972 = alloca i1, i1 0
+  %nop5973 = alloca i1, i1 0
+  %nop5974 = alloca i1, i1 0
+  %nop5975 = alloca i1, i1 0
+  %nop5976 = alloca i1, i1 0
+  %nop5977 = alloca i1, i1 0
+  %nop5978 = alloca i1, i1 0
+  %nop5979 = alloca i1, i1 0
+  %nop5980 = alloca i1, i1 0
+  %nop5981 = alloca i1, i1 0
+  %nop5982 = alloca i1, i1 0
+  %nop5983 = alloca i1, i1 0
+  %nop5984 = alloca i1, i1 0
+  %nop5985 = alloca i1, i1 0
+  %nop5986 = alloca i1, i1 0
+  %nop5987 = alloca i1, i1 0
+  %nop5988 = alloca i1, i1 0
+  %nop5989 = alloca i1, i1 0
+  %nop5990 = alloca i1, i1 0
+  %nop5991 = alloca i1, i1 0
+  %nop5992 = alloca i1, i1 0
+  %nop5993 = alloca i1, i1 0
+  %nop5994 = alloca i1, i1 0
+  %nop5995 = alloca i1, i1 0
+  %nop5996 = alloca i1, i1 0
+  %nop5997 = alloca i1, i1 0
+  %nop5998 = alloca i1, i1 0
+  %nop5999 = alloca i1, i1 0
+  %nop6000 = alloca i1, i1 0
+  %nop6001 = alloca i1, i1 0
+  %nop6002 = alloca i1, i1 0
+  %nop6003 = alloca i1, i1 0
+  %nop6004 = alloca i1, i1 0
+  %nop6005 = alloca i1, i1 0
+  %nop6006 = alloca i1, i1 0
+  %nop6007 = alloca i1, i1 0
+  %nop6008 = alloca i1, i1 0
+  %nop6009 = alloca i1, i1 0
+  %nop6010 = alloca i1, i1 0
+  %nop6011 = alloca i1, i1 0
+  %nop6012 = alloca i1, i1 0
+  %nop6013 = alloca i1, i1 0
+  %nop6014 = alloca i1, i1 0
+  %nop6015 = alloca i1, i1 0
+  %nop6016 = alloca i1, i1 0
+  %nop6017 = alloca i1, i1 0
+  %nop6018 = alloca i1, i1 0
+  %nop6019 = alloca i1, i1 0
+  %nop6020 = alloca i1, i1 0
+  %nop6021 = alloca i1, i1 0
+  %nop6022 = alloca i1, i1 0
+  %nop6023 = alloca i1, i1 0
+  %nop6024 = alloca i1, i1 0
+  %nop6025 = alloca i1, i1 0
+  %nop6026 = alloca i1, i1 0
+  %nop6027 = alloca i1, i1 0
+  %nop6028 = alloca i1, i1 0
+  %nop6029 = alloca i1, i1 0
+  %nop6030 = alloca i1, i1 0
+  %nop6031 = alloca i1, i1 0
+  %nop6032 = alloca i1, i1 0
+  %nop6033 = alloca i1, i1 0
+  %nop6034 = alloca i1, i1 0
+  %nop6035 = alloca i1, i1 0
+  %nop6036 = alloca i1, i1 0
+  %nop6037 = alloca i1, i1 0
+  %nop6038 = alloca i1, i1 0
+  %nop6039 = alloca i1, i1 0
+  %nop6040 = alloca i1, i1 0
+  %nop6041 = alloca i1, i1 0
+  %nop6042 = alloca i1, i1 0
+  %nop6043 = alloca i1, i1 0
+  %nop6044 = alloca i1, i1 0
+  %nop6045 = alloca i1, i1 0
+  %nop6046 = alloca i1, i1 0
+  %nop6047 = alloca i1, i1 0
+  %nop6048 = alloca i1, i1 0
+  %nop6049 = alloca i1, i1 0
+  %nop6050 = alloca i1, i1 0
+  %nop6051 = alloca i1, i1 0
+  %nop6052 = alloca i1, i1 0
+  %nop6053 = alloca i1, i1 0
+  %nop6054 = alloca i1, i1 0
+  %nop6055 = alloca i1, i1 0
+  %nop6056 = alloca i1, i1 0
+  %nop6057 = alloca i1, i1 0
+  %nop6058 = alloca i1, i1 0
+  %nop6059 = alloca i1, i1 0
+  %nop6060 = alloca i1, i1 0
+  %nop6061 = alloca i1, i1 0
+  %nop6062 = alloca i1, i1 0
+  %nop6063 = alloca i1, i1 0
+  %nop6064 = alloca i1, i1 0
+  %nop6065 = alloca i1, i1 0
+  %nop6066 = alloca i1, i1 0
+  %nop6067 = alloca i1, i1 0
+  %nop6068 = alloca i1, i1 0
+  %nop6069 = alloca i1, i1 0
+  %nop6070 = alloca i1, i1 0
+  %nop6071 = alloca i1, i1 0
+  %nop6072 = alloca i1, i1 0
+  %nop6073 = alloca i1, i1 0
+  %nop6074 = alloca i1, i1 0
+  %nop6075 = alloca i1, i1 0
+  %nop6076 = alloca i1, i1 0
+  %nop6077 = alloca i1, i1 0
+  %nop6078 = alloca i1, i1 0
+  %nop6079 = alloca i1, i1 0
+  %nop6080 = alloca i1, i1 0
+  %nop6081 = alloca i1, i1 0
+  %nop6082 = alloca i1, i1 0
+  %nop6083 = alloca i1, i1 0
+  %nop6084 = alloca i1, i1 0
+  %nop6085 = alloca i1, i1 0
+  %nop6086 = alloca i1, i1 0
+  %nop6087 = alloca i1, i1 0
+  %nop6088 = alloca i1, i1 0
+  %nop6089 = alloca i1, i1 0
+  %nop6090 = alloca i1, i1 0
+  %nop6091 = alloca i1, i1 0
+  %nop6092 = alloca i1, i1 0
+  %nop6093 = alloca i1, i1 0
+  %nop6094 = alloca i1, i1 0
+  %nop6095 = alloca i1, i1 0
+  %nop6096 = alloca i1, i1 0
+  %nop6097 = alloca i1, i1 0
+  %nop6098 = alloca i1, i1 0
+  %nop6099 = alloca i1, i1 0
+  %nop6100 = alloca i1, i1 0
+  %nop6101 = alloca i1, i1 0
+  %nop6102 = alloca i1, i1 0
+  %nop6103 = alloca i1, i1 0
+  %nop6104 = alloca i1, i1 0
+  %nop6105 = alloca i1, i1 0
+  %nop6106 = alloca i1, i1 0
+  %nop6107 = alloca i1, i1 0
+  %nop6108 = alloca i1, i1 0
+  %nop6109 = alloca i1, i1 0
+  %nop6110 = alloca i1, i1 0
+  %nop6111 = alloca i1, i1 0
+  %nop6112 = alloca i1, i1 0
+  %nop6113 = alloca i1, i1 0
+  %nop6114 = alloca i1, i1 0
+  %nop6115 = alloca i1, i1 0
+  %nop6116 = alloca i1, i1 0
+  %nop6117 = alloca i1, i1 0
+  %nop6118 = alloca i1, i1 0
+  %nop6119 = alloca i1, i1 0
+  %nop6120 = alloca i1, i1 0
+  %nop6121 = alloca i1, i1 0
+  %nop6122 = alloca i1, i1 0
+  %nop6123 = alloca i1, i1 0
+  %nop6124 = alloca i1, i1 0
+  %nop6125 = alloca i1, i1 0
+  %nop6126 = alloca i1, i1 0
+  %nop6127 = alloca i1, i1 0
+  %nop6128 = alloca i1, i1 0
+  %nop6129 = alloca i1, i1 0
+  %nop6130 = alloca i1, i1 0
+  %nop6131 = alloca i1, i1 0
+  %nop6132 = alloca i1, i1 0
+  %nop6133 = alloca i1, i1 0
+  %nop6134 = alloca i1, i1 0
+  %nop6135 = alloca i1, i1 0
+  %nop6136 = alloca i1, i1 0
+  %nop6137 = alloca i1, i1 0
+  %nop6138 = alloca i1, i1 0
+  %nop6139 = alloca i1, i1 0
+  %nop6140 = alloca i1, i1 0
+  %nop6141 = alloca i1, i1 0
+  %nop6142 = alloca i1, i1 0
+  %nop6143 = alloca i1, i1 0
+  %nop6144 = alloca i1, i1 0
+  %nop6145 = alloca i1, i1 0
+  %nop6146 = alloca i1, i1 0
+  %nop6147 = alloca i1, i1 0
+  %nop6148 = alloca i1, i1 0
+  %nop6149 = alloca i1, i1 0
+  %nop6150 = alloca i1, i1 0
+  %nop6151 = alloca i1, i1 0
+  %nop6152 = alloca i1, i1 0
+  %nop6153 = alloca i1, i1 0
+  %nop6154 = alloca i1, i1 0
+  %nop6155 = alloca i1, i1 0
+  %nop6156 = alloca i1, i1 0
+  %nop6157 = alloca i1, i1 0
+  %nop6158 = alloca i1, i1 0
+  %nop6159 = alloca i1, i1 0
+  %nop6160 = alloca i1, i1 0
+  %nop6161 = alloca i1, i1 0
+  %nop6162 = alloca i1, i1 0
+  %nop6163 = alloca i1, i1 0
+  %nop6164 = alloca i1, i1 0
+  %nop6165 = alloca i1, i1 0
+  %nop6166 = alloca i1, i1 0
+  %nop6167 = alloca i1, i1 0
+  %nop6168 = alloca i1, i1 0
+  %nop6169 = alloca i1, i1 0
+  %nop6170 = alloca i1, i1 0
+  %nop6171 = alloca i1, i1 0
+  %nop6172 = alloca i1, i1 0
+  %nop6173 = alloca i1, i1 0
+  %nop6174 = alloca i1, i1 0
+  %nop6175 = alloca i1, i1 0
+  %nop6176 = alloca i1, i1 0
+  %nop6177 = alloca i1, i1 0
+  %nop6178 = alloca i1, i1 0
+  %nop6179 = alloca i1, i1 0
+  %nop6180 = alloca i1, i1 0
+  %nop6181 = alloca i1, i1 0
+  %nop6182 = alloca i1, i1 0
+  %nop6183 = alloca i1, i1 0
+  %nop6184 = alloca i1, i1 0
+  %nop6185 = alloca i1, i1 0
+  %nop6186 = alloca i1, i1 0
+  %nop6187 = alloca i1, i1 0
+  %nop6188 = alloca i1, i1 0
+  %nop6189 = alloca i1, i1 0
+  %nop6190 = alloca i1, i1 0
+  %nop6191 = alloca i1, i1 0
+  %nop6192 = alloca i1, i1 0
+  %nop6193 = alloca i1, i1 0
+  %nop6194 = alloca i1, i1 0
+  %nop6195 = alloca i1, i1 0
+  %nop6196 = alloca i1, i1 0
+  %nop6197 = alloca i1, i1 0
+  %nop6198 = alloca i1, i1 0
+  %nop6199 = alloca i1, i1 0
+  %nop6200 = alloca i1, i1 0
+  %nop6201 = alloca i1, i1 0
+  %nop6202 = alloca i1, i1 0
+  %nop6203 = alloca i1, i1 0
+  %nop6204 = alloca i1, i1 0
+  %nop6205 = alloca i1, i1 0
+  %nop6206 = alloca i1, i1 0
+  %nop6207 = alloca i1, i1 0
+  %nop6208 = alloca i1, i1 0
+  %nop6209 = alloca i1, i1 0
+  %nop6210 = alloca i1, i1 0
+  %nop6211 = alloca i1, i1 0
+  %nop6212 = alloca i1, i1 0
+  %nop6213 = alloca i1, i1 0
+  %nop6214 = alloca i1, i1 0
+  %nop6215 = alloca i1, i1 0
+  %nop6216 = alloca i1, i1 0
+  %nop6217 = alloca i1, i1 0
+  %nop6218 = alloca i1, i1 0
+  %nop6219 = alloca i1, i1 0
+  %nop6220 = alloca i1, i1 0
+  %nop6221 = alloca i1, i1 0
+  %nop6222 = alloca i1, i1 0
+  %nop6223 = alloca i1, i1 0
+  %nop6224 = alloca i1, i1 0
+  %nop6225 = alloca i1, i1 0
+  %nop6226 = alloca i1, i1 0
+  %nop6227 = alloca i1, i1 0
+  %nop6228 = alloca i1, i1 0
+  %nop6229 = alloca i1, i1 0
+  %nop6230 = alloca i1, i1 0
+  %nop6231 = alloca i1, i1 0
+  %nop6232 = alloca i1, i1 0
+  %nop6233 = alloca i1, i1 0
+  %nop6234 = alloca i1, i1 0
+  %nop6235 = alloca i1, i1 0
+  %nop6236 = alloca i1, i1 0
+  %nop6237 = alloca i1, i1 0
+  %nop6238 = alloca i1, i1 0
+  %nop6239 = alloca i1, i1 0
+  %nop6240 = alloca i1, i1 0
+  %nop6241 = alloca i1, i1 0
+  %nop6242 = alloca i1, i1 0
+  %nop6243 = alloca i1, i1 0
+  %nop6244 = alloca i1, i1 0
+  %nop6245 = alloca i1, i1 0
+  %nop6246 = alloca i1, i1 0
+  %nop6247 = alloca i1, i1 0
+  %nop6248 = alloca i1, i1 0
+  %nop6249 = alloca i1, i1 0
+  %nop6250 = alloca i1, i1 0
+  %nop6251 = alloca i1, i1 0
+  %nop6252 = alloca i1, i1 0
+  %nop6253 = alloca i1, i1 0
+  %nop6254 = alloca i1, i1 0
+  %nop6255 = alloca i1, i1 0
+  %nop6256 = alloca i1, i1 0
+  %nop6257 = alloca i1, i1 0
+  %nop6258 = alloca i1, i1 0
+  %nop6259 = alloca i1, i1 0
+  %nop6260 = alloca i1, i1 0
+  %nop6261 = alloca i1, i1 0
+  %nop6262 = alloca i1, i1 0
+  %nop6263 = alloca i1, i1 0
+  %nop6264 = alloca i1, i1 0
+  %nop6265 = alloca i1, i1 0
+  %nop6266 = alloca i1, i1 0
+  %nop6267 = alloca i1, i1 0
+  %nop6268 = alloca i1, i1 0
+  %nop6269 = alloca i1, i1 0
+  %nop6270 = alloca i1, i1 0
+  %nop6271 = alloca i1, i1 0
+  %nop6272 = alloca i1, i1 0
+  %nop6273 = alloca i1, i1 0
+  %nop6274 = alloca i1, i1 0
+  %nop6275 = alloca i1, i1 0
+  %nop6276 = alloca i1, i1 0
+  %nop6277 = alloca i1, i1 0
+  %nop6278 = alloca i1, i1 0
+  %nop6279 = alloca i1, i1 0
+  %nop6280 = alloca i1, i1 0
+  %nop6281 = alloca i1, i1 0
+  %nop6282 = alloca i1, i1 0
+  %nop6283 = alloca i1, i1 0
+  %nop6284 = alloca i1, i1 0
+  %nop6285 = alloca i1, i1 0
+  %nop6286 = alloca i1, i1 0
+  %nop6287 = alloca i1, i1 0
+  %nop6288 = alloca i1, i1 0
+  %nop6289 = alloca i1, i1 0
+  %nop6290 = alloca i1, i1 0
+  %nop6291 = alloca i1, i1 0
+  %nop6292 = alloca i1, i1 0
+  %nop6293 = alloca i1, i1 0
+  %nop6294 = alloca i1, i1 0
+  %nop6295 = alloca i1, i1 0
+  %nop6296 = alloca i1, i1 0
+  %nop6297 = alloca i1, i1 0
+  %nop6298 = alloca i1, i1 0
+  %nop6299 = alloca i1, i1 0
+  %nop6300 = alloca i1, i1 0
+  %nop6301 = alloca i1, i1 0
+  %nop6302 = alloca i1, i1 0
+  %nop6303 = alloca i1, i1 0
+  %nop6304 = alloca i1, i1 0
+  %nop6305 = alloca i1, i1 0
+  %nop6306 = alloca i1, i1 0
+  %nop6307 = alloca i1, i1 0
+  %nop6308 = alloca i1, i1 0
+  %nop6309 = alloca i1, i1 0
+  %nop6310 = alloca i1, i1 0
+  %nop6311 = alloca i1, i1 0
+  %nop6312 = alloca i1, i1 0
+  %nop6313 = alloca i1, i1 0
+  %nop6314 = alloca i1, i1 0
+  %nop6315 = alloca i1, i1 0
+  %nop6316 = alloca i1, i1 0
+  %nop6317 = alloca i1, i1 0
+  %nop6318 = alloca i1, i1 0
+  %nop6319 = alloca i1, i1 0
+  %nop6320 = alloca i1, i1 0
+  %nop6321 = alloca i1, i1 0
+  %nop6322 = alloca i1, i1 0
+  %nop6323 = alloca i1, i1 0
+  %nop6324 = alloca i1, i1 0
+  %nop6325 = alloca i1, i1 0
+  %nop6326 = alloca i1, i1 0
+  %nop6327 = alloca i1, i1 0
+  %nop6328 = alloca i1, i1 0
+  %nop6329 = alloca i1, i1 0
+  %nop6330 = alloca i1, i1 0
+  %nop6331 = alloca i1, i1 0
+  %nop6332 = alloca i1, i1 0
+  %nop6333 = alloca i1, i1 0
+  %nop6334 = alloca i1, i1 0
+  %nop6335 = alloca i1, i1 0
+  %nop6336 = alloca i1, i1 0
+  %nop6337 = alloca i1, i1 0
+  %nop6338 = alloca i1, i1 0
+  %nop6339 = alloca i1, i1 0
+  %nop6340 = alloca i1, i1 0
+  %nop6341 = alloca i1, i1 0
+  %nop6342 = alloca i1, i1 0
+  %nop6343 = alloca i1, i1 0
+  %nop6344 = alloca i1, i1 0
+  %nop6345 = alloca i1, i1 0
+  %nop6346 = alloca i1, i1 0
+  %nop6347 = alloca i1, i1 0
+  %nop6348 = alloca i1, i1 0
+  %nop6349 = alloca i1, i1 0
+  %nop6350 = alloca i1, i1 0
+  %nop6351 = alloca i1, i1 0
+  %nop6352 = alloca i1, i1 0
+  %nop6353 = alloca i1, i1 0
+  %nop6354 = alloca i1, i1 0
+  %nop6355 = alloca i1, i1 0
+  %nop6356 = alloca i1, i1 0
+  %nop6357 = alloca i1, i1 0
+  %nop6358 = alloca i1, i1 0
+  %nop6359 = alloca i1, i1 0
+  %nop6360 = alloca i1, i1 0
+  %nop6361 = alloca i1, i1 0
+  %nop6362 = alloca i1, i1 0
+  %nop6363 = alloca i1, i1 0
+  %nop6364 = alloca i1, i1 0
+  %nop6365 = alloca i1, i1 0
+  %nop6366 = alloca i1, i1 0
+  %nop6367 = alloca i1, i1 0
+  %nop6368 = alloca i1, i1 0
+  %nop6369 = alloca i1, i1 0
+  %nop6370 = alloca i1, i1 0
+  %nop6371 = alloca i1, i1 0
+  %nop6372 = alloca i1, i1 0
+  %nop6373 = alloca i1, i1 0
+  %nop6374 = alloca i1, i1 0
+  %nop6375 = alloca i1, i1 0
+  %nop6376 = alloca i1, i1 0
+  %nop6377 = alloca i1, i1 0
+  %nop6378 = alloca i1, i1 0
+  %nop6379 = alloca i1, i1 0
+  %nop6380 = alloca i1, i1 0
+  %nop6381 = alloca i1, i1 0
+  %nop6382 = alloca i1, i1 0
+  %nop6383 = alloca i1, i1 0
+  %nop6384 = alloca i1, i1 0
+  %nop6385 = alloca i1, i1 0
+  %nop6386 = alloca i1, i1 0
+  %nop6387 = alloca i1, i1 0
+  %nop6388 = alloca i1, i1 0
+  %nop6389 = alloca i1, i1 0
+  %nop6390 = alloca i1, i1 0
+  %nop6391 = alloca i1, i1 0
+  %nop6392 = alloca i1, i1 0
+  %nop6393 = alloca i1, i1 0
+  %nop6394 = alloca i1, i1 0
+  %nop6395 = alloca i1, i1 0
+  %nop6396 = alloca i1, i1 0
+  %nop6397 = alloca i1, i1 0
+  %nop6398 = alloca i1, i1 0
+  %nop6399 = alloca i1, i1 0
+  %nop6400 = alloca i1, i1 0
+  %nop6401 = alloca i1, i1 0
+  %nop6402 = alloca i1, i1 0
+  %nop6403 = alloca i1, i1 0
+  %nop6404 = alloca i1, i1 0
+  %nop6405 = alloca i1, i1 0
+  %nop6406 = alloca i1, i1 0
+  %nop6407 = alloca i1, i1 0
+  %nop6408 = alloca i1, i1 0
+  %nop6409 = alloca i1, i1 0
+  %nop6410 = alloca i1, i1 0
+  %nop6411 = alloca i1, i1 0
+  %nop6412 = alloca i1, i1 0
+  %nop6413 = alloca i1, i1 0
+  %nop6414 = alloca i1, i1 0
+  %nop6415 = alloca i1, i1 0
+  %nop6416 = alloca i1, i1 0
+  %nop6417 = alloca i1, i1 0
+  %nop6418 = alloca i1, i1 0
+  %nop6419 = alloca i1, i1 0
+  %nop6420 = alloca i1, i1 0
+  %nop6421 = alloca i1, i1 0
+  %nop6422 = alloca i1, i1 0
+  %nop6423 = alloca i1, i1 0
+  %nop6424 = alloca i1, i1 0
+  %nop6425 = alloca i1, i1 0
+  %nop6426 = alloca i1, i1 0
+  %nop6427 = alloca i1, i1 0
+  %nop6428 = alloca i1, i1 0
+  %nop6429 = alloca i1, i1 0
+  %nop6430 = alloca i1, i1 0
+  %nop6431 = alloca i1, i1 0
+  %nop6432 = alloca i1, i1 0
+  %nop6433 = alloca i1, i1 0
+  %nop6434 = alloca i1, i1 0
+  %nop6435 = alloca i1, i1 0
+  %nop6436 = alloca i1, i1 0
+  %nop6437 = alloca i1, i1 0
+  %nop6438 = alloca i1, i1 0
+  %nop6439 = alloca i1, i1 0
+  %nop6440 = alloca i1, i1 0
+  %nop6441 = alloca i1, i1 0
+  %nop6442 = alloca i1, i1 0
+  %nop6443 = alloca i1, i1 0
+  %nop6444 = alloca i1, i1 0
+  %nop6445 = alloca i1, i1 0
+  %nop6446 = alloca i1, i1 0
+  %nop6447 = alloca i1, i1 0
+  %nop6448 = alloca i1, i1 0
+  %nop6449 = alloca i1, i1 0
+  %nop6450 = alloca i1, i1 0
+  %nop6451 = alloca i1, i1 0
+  %nop6452 = alloca i1, i1 0
+  %nop6453 = alloca i1, i1 0
+  %nop6454 = alloca i1, i1 0
+  %nop6455 = alloca i1, i1 0
+  %nop6456 = alloca i1, i1 0
+  %nop6457 = alloca i1, i1 0
+  %nop6458 = alloca i1, i1 0
+  %nop6459 = alloca i1, i1 0
+  %nop6460 = alloca i1, i1 0
+  %nop6461 = alloca i1, i1 0
+  %nop6462 = alloca i1, i1 0
+  %nop6463 = alloca i1, i1 0
+  %nop6464 = alloca i1, i1 0
+  %nop6465 = alloca i1, i1 0
+  %nop6466 = alloca i1, i1 0
+  %nop6467 = alloca i1, i1 0
+  %nop6468 = alloca i1, i1 0
+  %nop6469 = alloca i1, i1 0
+  %nop6470 = alloca i1, i1 0
+  %nop6471 = alloca i1, i1 0
+  %nop6472 = alloca i1, i1 0
+  %nop6473 = alloca i1, i1 0
+  %nop6474 = alloca i1, i1 0
+  %nop6475 = alloca i1, i1 0
+  %nop6476 = alloca i1, i1 0
+  %nop6477 = alloca i1, i1 0
+  %nop6478 = alloca i1, i1 0
+  %nop6479 = alloca i1, i1 0
+  %nop6480 = alloca i1, i1 0
+  %nop6481 = alloca i1, i1 0
+  %nop6482 = alloca i1, i1 0
+  %nop6483 = alloca i1, i1 0
+  %nop6484 = alloca i1, i1 0
+  %nop6485 = alloca i1, i1 0
+  %nop6486 = alloca i1, i1 0
+  %nop6487 = alloca i1, i1 0
+  %nop6488 = alloca i1, i1 0
+  %nop6489 = alloca i1, i1 0
+  %nop6490 = alloca i1, i1 0
+  %nop6491 = alloca i1, i1 0
+  %nop6492 = alloca i1, i1 0
+  %nop6493 = alloca i1, i1 0
+  %nop6494 = alloca i1, i1 0
+  %nop6495 = alloca i1, i1 0
+  %nop6496 = alloca i1, i1 0
+  %nop6497 = alloca i1, i1 0
+  %nop6498 = alloca i1, i1 0
+  %nop6499 = alloca i1, i1 0
+  %nop6500 = alloca i1, i1 0
+  %nop6501 = alloca i1, i1 0
+  %nop6502 = alloca i1, i1 0
+  %nop6503 = alloca i1, i1 0
+  %nop6504 = alloca i1, i1 0
+  %nop6505 = alloca i1, i1 0
+  %nop6506 = alloca i1, i1 0
+  %nop6507 = alloca i1, i1 0
+  %nop6508 = alloca i1, i1 0
+  %nop6509 = alloca i1, i1 0
+  %nop6510 = alloca i1, i1 0
+  %nop6511 = alloca i1, i1 0
+  %nop6512 = alloca i1, i1 0
+  %nop6513 = alloca i1, i1 0
+  %nop6514 = alloca i1, i1 0
+  %nop6515 = alloca i1, i1 0
+  %nop6516 = alloca i1, i1 0
+  %nop6517 = alloca i1, i1 0
+  %nop6518 = alloca i1, i1 0
+  %nop6519 = alloca i1, i1 0
+  %nop6520 = alloca i1, i1 0
+  %nop6521 = alloca i1, i1 0
+  %nop6522 = alloca i1, i1 0
+  %nop6523 = alloca i1, i1 0
+  %nop6524 = alloca i1, i1 0
+  %nop6525 = alloca i1, i1 0
+  %nop6526 = alloca i1, i1 0
+  %nop6527 = alloca i1, i1 0
+  %nop6528 = alloca i1, i1 0
+  %nop6529 = alloca i1, i1 0
+  %nop6530 = alloca i1, i1 0
+  %nop6531 = alloca i1, i1 0
+  %nop6532 = alloca i1, i1 0
+  %nop6533 = alloca i1, i1 0
+  %nop6534 = alloca i1, i1 0
+  %nop6535 = alloca i1, i1 0
+  %nop6536 = alloca i1, i1 0
+  %nop6537 = alloca i1, i1 0
+  %nop6538 = alloca i1, i1 0
+  %nop6539 = alloca i1, i1 0
+  %nop6540 = alloca i1, i1 0
+  %nop6541 = alloca i1, i1 0
+  %nop6542 = alloca i1, i1 0
+  %nop6543 = alloca i1, i1 0
+  %nop6544 = alloca i1, i1 0
+  %nop6545 = alloca i1, i1 0
+  %nop6546 = alloca i1, i1 0
+  %nop6547 = alloca i1, i1 0
+  %nop6548 = alloca i1, i1 0
+  %nop6549 = alloca i1, i1 0
+  %nop6550 = alloca i1, i1 0
+  %nop6551 = alloca i1, i1 0
+  %nop6552 = alloca i1, i1 0
+  %nop6553 = alloca i1, i1 0
+  %nop6554 = alloca i1, i1 0
+  %nop6555 = alloca i1, i1 0
+  %nop6556 = alloca i1, i1 0
+  %nop6557 = alloca i1, i1 0
+  %nop6558 = alloca i1, i1 0
+  %nop6559 = alloca i1, i1 0
+  %nop6560 = alloca i1, i1 0
+  %nop6561 = alloca i1, i1 0
+  %nop6562 = alloca i1, i1 0
+  %nop6563 = alloca i1, i1 0
+  %nop6564 = alloca i1, i1 0
+  %nop6565 = alloca i1, i1 0
+  %nop6566 = alloca i1, i1 0
+  %nop6567 = alloca i1, i1 0
+  %nop6568 = alloca i1, i1 0
+  %nop6569 = alloca i1, i1 0
+  %nop6570 = alloca i1, i1 0
+  %nop6571 = alloca i1, i1 0
+  %nop6572 = alloca i1, i1 0
+  %nop6573 = alloca i1, i1 0
+  %nop6574 = alloca i1, i1 0
+  %nop6575 = alloca i1, i1 0
+  %nop6576 = alloca i1, i1 0
+  %nop6577 = alloca i1, i1 0
+  %nop6578 = alloca i1, i1 0
+  %nop6579 = alloca i1, i1 0
+  %nop6580 = alloca i1, i1 0
+  %nop6581 = alloca i1, i1 0
+  %nop6582 = alloca i1, i1 0
+  %nop6583 = alloca i1, i1 0
+  %nop6584 = alloca i1, i1 0
+  %nop6585 = alloca i1, i1 0
+  %nop6586 = alloca i1, i1 0
+  %nop6587 = alloca i1, i1 0
+  %nop6588 = alloca i1, i1 0
+  %nop6589 = alloca i1, i1 0
+  %nop6590 = alloca i1, i1 0
+  %nop6591 = alloca i1, i1 0
+  %nop6592 = alloca i1, i1 0
+  %nop6593 = alloca i1, i1 0
+  %nop6594 = alloca i1, i1 0
+  %nop6595 = alloca i1, i1 0
+  %nop6596 = alloca i1, i1 0
+  %nop6597 = alloca i1, i1 0
+  %nop6598 = alloca i1, i1 0
+  %nop6599 = alloca i1, i1 0
+  %nop6600 = alloca i1, i1 0
+  %nop6601 = alloca i1, i1 0
+  %nop6602 = alloca i1, i1 0
+  %nop6603 = alloca i1, i1 0
+  %nop6604 = alloca i1, i1 0
+  %nop6605 = alloca i1, i1 0
+  %nop6606 = alloca i1, i1 0
+  %nop6607 = alloca i1, i1 0
+  %nop6608 = alloca i1, i1 0
+  %nop6609 = alloca i1, i1 0
+  %nop6610 = alloca i1, i1 0
+  %nop6611 = alloca i1, i1 0
+  %nop6612 = alloca i1, i1 0
+  %nop6613 = alloca i1, i1 0
+  %nop6614 = alloca i1, i1 0
+  %nop6615 = alloca i1, i1 0
+  %nop6616 = alloca i1, i1 0
+  %nop6617 = alloca i1, i1 0
+  %nop6618 = alloca i1, i1 0
+  %nop6619 = alloca i1, i1 0
+  %nop6620 = alloca i1, i1 0
+  %nop6621 = alloca i1, i1 0
+  %nop6622 = alloca i1, i1 0
+  %nop6623 = alloca i1, i1 0
+  %nop6624 = alloca i1, i1 0
+  %nop6625 = alloca i1, i1 0
+  %nop6626 = alloca i1, i1 0
+  %nop6627 = alloca i1, i1 0
+  %nop6628 = alloca i1, i1 0
+  %nop6629 = alloca i1, i1 0
+  %nop6630 = alloca i1, i1 0
+  %nop6631 = alloca i1, i1 0
+  %nop6632 = alloca i1, i1 0
+  %nop6633 = alloca i1, i1 0
+  %nop6634 = alloca i1, i1 0
+  %nop6635 = alloca i1, i1 0
+  %nop6636 = alloca i1, i1 0
+  %nop6637 = alloca i1, i1 0
+  %nop6638 = alloca i1, i1 0
+  %nop6639 = alloca i1, i1 0
+  %nop6640 = alloca i1, i1 0
+  %nop6641 = alloca i1, i1 0
+  %nop6642 = alloca i1, i1 0
+  %nop6643 = alloca i1, i1 0
+  %nop6644 = alloca i1, i1 0
+  %nop6645 = alloca i1, i1 0
+  %nop6646 = alloca i1, i1 0
+  %nop6647 = alloca i1, i1 0
+  %nop6648 = alloca i1, i1 0
+  %nop6649 = alloca i1, i1 0
+  %nop6650 = alloca i1, i1 0
+  %nop6651 = alloca i1, i1 0
+  %nop6652 = alloca i1, i1 0
+  %nop6653 = alloca i1, i1 0
+  %nop6654 = alloca i1, i1 0
+  %nop6655 = alloca i1, i1 0
+  %nop6656 = alloca i1, i1 0
+  %nop6657 = alloca i1, i1 0
+  %nop6658 = alloca i1, i1 0
+  %nop6659 = alloca i1, i1 0
+  %nop6660 = alloca i1, i1 0
+  %nop6661 = alloca i1, i1 0
+  %nop6662 = alloca i1, i1 0
+  %nop6663 = alloca i1, i1 0
+  %nop6664 = alloca i1, i1 0
+  %nop6665 = alloca i1, i1 0
+  %nop6666 = alloca i1, i1 0
+  %nop6667 = alloca i1, i1 0
+  %nop6668 = alloca i1, i1 0
+  %nop6669 = alloca i1, i1 0
+  %nop6670 = alloca i1, i1 0
+  %nop6671 = alloca i1, i1 0
+  %nop6672 = alloca i1, i1 0
+  %nop6673 = alloca i1, i1 0
+  %nop6674 = alloca i1, i1 0
+  %nop6675 = alloca i1, i1 0
+  %nop6676 = alloca i1, i1 0
+  %nop6677 = alloca i1, i1 0
+  %nop6678 = alloca i1, i1 0
+  %nop6679 = alloca i1, i1 0
+  %nop6680 = alloca i1, i1 0
+  %nop6681 = alloca i1, i1 0
+  %nop6682 = alloca i1, i1 0
+  %nop6683 = alloca i1, i1 0
+  %nop6684 = alloca i1, i1 0
+  %nop6685 = alloca i1, i1 0
+  %nop6686 = alloca i1, i1 0
+  %nop6687 = alloca i1, i1 0
+  %nop6688 = alloca i1, i1 0
+  %nop6689 = alloca i1, i1 0
+  %nop6690 = alloca i1, i1 0
+  %nop6691 = alloca i1, i1 0
+  %nop6692 = alloca i1, i1 0
+  %nop6693 = alloca i1, i1 0
+  %nop6694 = alloca i1, i1 0
+  %nop6695 = alloca i1, i1 0
+  %nop6696 = alloca i1, i1 0
+  %nop6697 = alloca i1, i1 0
+  %nop6698 = alloca i1, i1 0
+  %nop6699 = alloca i1, i1 0
+  %nop6700 = alloca i1, i1 0
+  %nop6701 = alloca i1, i1 0
+  %nop6702 = alloca i1, i1 0
+  %nop6703 = alloca i1, i1 0
+  %nop6704 = alloca i1, i1 0
+  %nop6705 = alloca i1, i1 0
+  %nop6706 = alloca i1, i1 0
+  %nop6707 = alloca i1, i1 0
+  %nop6708 = alloca i1, i1 0
+  %nop6709 = alloca i1, i1 0
+  %nop6710 = alloca i1, i1 0
+  %nop6711 = alloca i1, i1 0
+  %nop6712 = alloca i1, i1 0
+  %nop6713 = alloca i1, i1 0
+  %nop6714 = alloca i1, i1 0
+  %nop6715 = alloca i1, i1 0
+  %nop6716 = alloca i1, i1 0
+  %nop6717 = alloca i1, i1 0
+  %nop6718 = alloca i1, i1 0
+  %nop6719 = alloca i1, i1 0
+  %nop6720 = alloca i1, i1 0
+  %nop6721 = alloca i1, i1 0
+  %nop6722 = alloca i1, i1 0
+  %nop6723 = alloca i1, i1 0
+  %nop6724 = alloca i1, i1 0
+  %nop6725 = alloca i1, i1 0
+  %nop6726 = alloca i1, i1 0
+  %nop6727 = alloca i1, i1 0
+  %nop6728 = alloca i1, i1 0
+  %nop6729 = alloca i1, i1 0
+  %nop6730 = alloca i1, i1 0
+  %nop6731 = alloca i1, i1 0
+  %nop6732 = alloca i1, i1 0
+  %nop6733 = alloca i1, i1 0
+  %nop6734 = alloca i1, i1 0
+  %nop6735 = alloca i1, i1 0
+  %nop6736 = alloca i1, i1 0
+  %nop6737 = alloca i1, i1 0
+  %nop6738 = alloca i1, i1 0
+  %nop6739 = alloca i1, i1 0
+  %nop6740 = alloca i1, i1 0
+  %nop6741 = alloca i1, i1 0
+  %nop6742 = alloca i1, i1 0
+  %nop6743 = alloca i1, i1 0
+  %nop6744 = alloca i1, i1 0
+  %nop6745 = alloca i1, i1 0
+  %nop6746 = alloca i1, i1 0
+  %nop6747 = alloca i1, i1 0
+  %nop6748 = alloca i1, i1 0
+  %nop6749 = alloca i1, i1 0
+  %nop6750 = alloca i1, i1 0
+  %nop6751 = alloca i1, i1 0
+  %nop6752 = alloca i1, i1 0
+  %nop6753 = alloca i1, i1 0
+  %nop6754 = alloca i1, i1 0
+  %nop6755 = alloca i1, i1 0
+  %nop6756 = alloca i1, i1 0
+  %nop6757 = alloca i1, i1 0
+  %nop6758 = alloca i1, i1 0
+  %nop6759 = alloca i1, i1 0
+  %nop6760 = alloca i1, i1 0
+  %nop6761 = alloca i1, i1 0
+  %nop6762 = alloca i1, i1 0
+  %nop6763 = alloca i1, i1 0
+  %nop6764 = alloca i1, i1 0
+  %nop6765 = alloca i1, i1 0
+  %nop6766 = alloca i1, i1 0
+  %nop6767 = alloca i1, i1 0
+  %nop6768 = alloca i1, i1 0
+  %nop6769 = alloca i1, i1 0
+  %nop6770 = alloca i1, i1 0
+  %nop6771 = alloca i1, i1 0
+  %nop6772 = alloca i1, i1 0
+  %nop6773 = alloca i1, i1 0
+  %nop6774 = alloca i1, i1 0
+  %nop6775 = alloca i1, i1 0
+  %nop6776 = alloca i1, i1 0
+  %nop6777 = alloca i1, i1 0
+  %nop6778 = alloca i1, i1 0
+  %nop6779 = alloca i1, i1 0
+  %nop6780 = alloca i1, i1 0
+  %nop6781 = alloca i1, i1 0
+  %nop6782 = alloca i1, i1 0
+  %nop6783 = alloca i1, i1 0
+  %nop6784 = alloca i1, i1 0
+  %nop6785 = alloca i1, i1 0
+  %nop6786 = alloca i1, i1 0
+  %nop6787 = alloca i1, i1 0
+  %nop6788 = alloca i1, i1 0
+  %nop6789 = alloca i1, i1 0
+  %nop6790 = alloca i1, i1 0
+  %nop6791 = alloca i1, i1 0
+  %nop6792 = alloca i1, i1 0
+  %nop6793 = alloca i1, i1 0
+  %nop6794 = alloca i1, i1 0
+  %nop6795 = alloca i1, i1 0
+  %nop6796 = alloca i1, i1 0
+  %nop6797 = alloca i1, i1 0
+  %nop6798 = alloca i1, i1 0
+  %nop6799 = alloca i1, i1 0
+  %nop6800 = alloca i1, i1 0
+  %nop6801 = alloca i1, i1 0
+  %nop6802 = alloca i1, i1 0
+  %nop6803 = alloca i1, i1 0
+  %nop6804 = alloca i1, i1 0
+  %nop6805 = alloca i1, i1 0
+  %nop6806 = alloca i1, i1 0
+  %nop6807 = alloca i1, i1 0
+  %nop6808 = alloca i1, i1 0
+  %nop6809 = alloca i1, i1 0
+  %nop6810 = alloca i1, i1 0
+  %nop6811 = alloca i1, i1 0
+  %nop6812 = alloca i1, i1 0
+  %nop6813 = alloca i1, i1 0
+  %nop6814 = alloca i1, i1 0
+  %nop6815 = alloca i1, i1 0
+  %nop6816 = alloca i1, i1 0
+  %nop6817 = alloca i1, i1 0
+  %nop6818 = alloca i1, i1 0
+  %nop6819 = alloca i1, i1 0
+  %nop6820 = alloca i1, i1 0
+  %nop6821 = alloca i1, i1 0
+  %nop6822 = alloca i1, i1 0
+  %nop6823 = alloca i1, i1 0
+  %nop6824 = alloca i1, i1 0
+  %nop6825 = alloca i1, i1 0
+  %nop6826 = alloca i1, i1 0
+  %nop6827 = alloca i1, i1 0
+  %nop6828 = alloca i1, i1 0
+  %nop6829 = alloca i1, i1 0
+  %nop6830 = alloca i1, i1 0
+  %nop6831 = alloca i1, i1 0
+  %nop6832 = alloca i1, i1 0
+  %nop6833 = alloca i1, i1 0
+  %nop6834 = alloca i1, i1 0
+  %nop6835 = alloca i1, i1 0
+  %nop6836 = alloca i1, i1 0
+  %nop6837 = alloca i1, i1 0
+  %nop6838 = alloca i1, i1 0
+  %nop6839 = alloca i1, i1 0
+  %nop6840 = alloca i1, i1 0
+  %nop6841 = alloca i1, i1 0
+  %nop6842 = alloca i1, i1 0
+  %nop6843 = alloca i1, i1 0
+  %nop6844 = alloca i1, i1 0
+  %nop6845 = alloca i1, i1 0
+  %nop6846 = alloca i1, i1 0
+  %nop6847 = alloca i1, i1 0
+  %nop6848 = alloca i1, i1 0
+  %nop6849 = alloca i1, i1 0
+  %nop6850 = alloca i1, i1 0
+  %nop6851 = alloca i1, i1 0
+  %nop6852 = alloca i1, i1 0
+  %nop6853 = alloca i1, i1 0
+  %nop6854 = alloca i1, i1 0
+  %nop6855 = alloca i1, i1 0
+  %nop6856 = alloca i1, i1 0
+  %nop6857 = alloca i1, i1 0
+  %nop6858 = alloca i1, i1 0
+  %nop6859 = alloca i1, i1 0
+  %nop6860 = alloca i1, i1 0
+  %nop6861 = alloca i1, i1 0
+  %nop6862 = alloca i1, i1 0
+  %nop6863 = alloca i1, i1 0
+  %nop6864 = alloca i1, i1 0
+  %nop6865 = alloca i1, i1 0
+  %nop6866 = alloca i1, i1 0
+  %nop6867 = alloca i1, i1 0
+  %nop6868 = alloca i1, i1 0
+  %nop6869 = alloca i1, i1 0
+  %nop6870 = alloca i1, i1 0
+  %nop6871 = alloca i1, i1 0
+  %nop6872 = alloca i1, i1 0
+  %nop6873 = alloca i1, i1 0
+  %nop6874 = alloca i1, i1 0
+  %nop6875 = alloca i1, i1 0
+  %nop6876 = alloca i1, i1 0
+  %nop6877 = alloca i1, i1 0
+  %nop6878 = alloca i1, i1 0
+  %nop6879 = alloca i1, i1 0
+  %nop6880 = alloca i1, i1 0
+  %nop6881 = alloca i1, i1 0
+  %nop6882 = alloca i1, i1 0
+  %nop6883 = alloca i1, i1 0
+  %nop6884 = alloca i1, i1 0
+  %nop6885 = alloca i1, i1 0
+  %nop6886 = alloca i1, i1 0
+  %nop6887 = alloca i1, i1 0
+  %nop6888 = alloca i1, i1 0
+  %nop6889 = alloca i1, i1 0
+  %nop6890 = alloca i1, i1 0
+  %nop6891 = alloca i1, i1 0
+  %nop6892 = alloca i1, i1 0
+  %nop6893 = alloca i1, i1 0
+  %nop6894 = alloca i1, i1 0
+  %nop6895 = alloca i1, i1 0
+  %nop6896 = alloca i1, i1 0
+  %nop6897 = alloca i1, i1 0
+  %nop6898 = alloca i1, i1 0
+  %nop6899 = alloca i1, i1 0
+  %nop6900 = alloca i1, i1 0
+  %nop6901 = alloca i1, i1 0
+  %nop6902 = alloca i1, i1 0
+  %nop6903 = alloca i1, i1 0
+  %nop6904 = alloca i1, i1 0
+  %nop6905 = alloca i1, i1 0
+  %nop6906 = alloca i1, i1 0
+  %nop6907 = alloca i1, i1 0
+  %nop6908 = alloca i1, i1 0
+  %nop6909 = alloca i1, i1 0
+  %nop6910 = alloca i1, i1 0
+  %nop6911 = alloca i1, i1 0
+  %nop6912 = alloca i1, i1 0
+  %nop6913 = alloca i1, i1 0
+  %nop6914 = alloca i1, i1 0
+  %nop6915 = alloca i1, i1 0
+  %nop6916 = alloca i1, i1 0
+  %nop6917 = alloca i1, i1 0
+  %nop6918 = alloca i1, i1 0
+  %nop6919 = alloca i1, i1 0
+  %nop6920 = alloca i1, i1 0
+  %nop6921 = alloca i1, i1 0
+  %nop6922 = alloca i1, i1 0
+  %nop6923 = alloca i1, i1 0
+  %nop6924 = alloca i1, i1 0
+  %nop6925 = alloca i1, i1 0
+  %nop6926 = alloca i1, i1 0
+  %nop6927 = alloca i1, i1 0
+  %nop6928 = alloca i1, i1 0
+  %nop6929 = alloca i1, i1 0
+  %nop6930 = alloca i1, i1 0
+  %nop6931 = alloca i1, i1 0
+  %nop6932 = alloca i1, i1 0
+  %nop6933 = alloca i1, i1 0
+  %nop6934 = alloca i1, i1 0
+  %nop6935 = alloca i1, i1 0
+  %nop6936 = alloca i1, i1 0
+  %nop6937 = alloca i1, i1 0
+  %nop6938 = alloca i1, i1 0
+  %nop6939 = alloca i1, i1 0
+  %nop6940 = alloca i1, i1 0
+  %nop6941 = alloca i1, i1 0
+  %nop6942 = alloca i1, i1 0
+  %nop6943 = alloca i1, i1 0
+  %nop6944 = alloca i1, i1 0
+  %nop6945 = alloca i1, i1 0
+  %nop6946 = alloca i1, i1 0
+  %nop6947 = alloca i1, i1 0
+  %nop6948 = alloca i1, i1 0
+  %nop6949 = alloca i1, i1 0
+  %nop6950 = alloca i1, i1 0
+  %nop6951 = alloca i1, i1 0
+  %nop6952 = alloca i1, i1 0
+  %nop6953 = alloca i1, i1 0
+  %nop6954 = alloca i1, i1 0
+  %nop6955 = alloca i1, i1 0
+  %nop6956 = alloca i1, i1 0
+  %nop6957 = alloca i1, i1 0
+  %nop6958 = alloca i1, i1 0
+  %nop6959 = alloca i1, i1 0
+  %nop6960 = alloca i1, i1 0
+  %nop6961 = alloca i1, i1 0
+  %nop6962 = alloca i1, i1 0
+  %nop6963 = alloca i1, i1 0
+  %nop6964 = alloca i1, i1 0
+  %nop6965 = alloca i1, i1 0
+  %nop6966 = alloca i1, i1 0
+  %nop6967 = alloca i1, i1 0
+  %nop6968 = alloca i1, i1 0
+  %nop6969 = alloca i1, i1 0
+  %nop6970 = alloca i1, i1 0
+  %nop6971 = alloca i1, i1 0
+  %nop6972 = alloca i1, i1 0
+  %nop6973 = alloca i1, i1 0
+  %nop6974 = alloca i1, i1 0
+  %nop6975 = alloca i1, i1 0
+  %nop6976 = alloca i1, i1 0
+  %nop6977 = alloca i1, i1 0
+  %nop6978 = alloca i1, i1 0
+  %nop6979 = alloca i1, i1 0
+  %nop6980 = alloca i1, i1 0
+  %nop6981 = alloca i1, i1 0
+  %nop6982 = alloca i1, i1 0
+  %nop6983 = alloca i1, i1 0
+  %nop6984 = alloca i1, i1 0
+  %nop6985 = alloca i1, i1 0
+  %nop6986 = alloca i1, i1 0
+  %nop6987 = alloca i1, i1 0
+  %nop6988 = alloca i1, i1 0
+  %nop6989 = alloca i1, i1 0
+  %nop6990 = alloca i1, i1 0
+  %nop6991 = alloca i1, i1 0
+  %nop6992 = alloca i1, i1 0
+  %nop6993 = alloca i1, i1 0
+  %nop6994 = alloca i1, i1 0
+  %nop6995 = alloca i1, i1 0
+  %nop6996 = alloca i1, i1 0
+  %nop6997 = alloca i1, i1 0
+  %nop6998 = alloca i1, i1 0
+  %nop6999 = alloca i1, i1 0
+  %nop7000 = alloca i1, i1 0
+  %nop7001 = alloca i1, i1 0
+  %nop7002 = alloca i1, i1 0
+  %nop7003 = alloca i1, i1 0
+  %nop7004 = alloca i1, i1 0
+  %nop7005 = alloca i1, i1 0
+  %nop7006 = alloca i1, i1 0
+  %nop7007 = alloca i1, i1 0
+  %nop7008 = alloca i1, i1 0
+  %nop7009 = alloca i1, i1 0
+  %nop7010 = alloca i1, i1 0
+  %nop7011 = alloca i1, i1 0
+  %nop7012 = alloca i1, i1 0
+  %nop7013 = alloca i1, i1 0
+  %nop7014 = alloca i1, i1 0
+  %nop7015 = alloca i1, i1 0
+  %nop7016 = alloca i1, i1 0
+  %nop7017 = alloca i1, i1 0
+  %nop7018 = alloca i1, i1 0
+  %nop7019 = alloca i1, i1 0
+  %nop7020 = alloca i1, i1 0
+  %nop7021 = alloca i1, i1 0
+  %nop7022 = alloca i1, i1 0
+  %nop7023 = alloca i1, i1 0
+  %nop7024 = alloca i1, i1 0
+  %nop7025 = alloca i1, i1 0
+  %nop7026 = alloca i1, i1 0
+  %nop7027 = alloca i1, i1 0
+  %nop7028 = alloca i1, i1 0
+  %nop7029 = alloca i1, i1 0
+  %nop7030 = alloca i1, i1 0
+  %nop7031 = alloca i1, i1 0
+  %nop7032 = alloca i1, i1 0
+  %nop7033 = alloca i1, i1 0
+  %nop7034 = alloca i1, i1 0
+  %nop7035 = alloca i1, i1 0
+  %nop7036 = alloca i1, i1 0
+  %nop7037 = alloca i1, i1 0
+  %nop7038 = alloca i1, i1 0
+  %nop7039 = alloca i1, i1 0
+  %nop7040 = alloca i1, i1 0
+  %nop7041 = alloca i1, i1 0
+  %nop7042 = alloca i1, i1 0
+  %nop7043 = alloca i1, i1 0
+  %nop7044 = alloca i1, i1 0
+  %nop7045 = alloca i1, i1 0
+  %nop7046 = alloca i1, i1 0
+  %nop7047 = alloca i1, i1 0
+  %nop7048 = alloca i1, i1 0
+  %nop7049 = alloca i1, i1 0
+  %nop7050 = alloca i1, i1 0
+  %nop7051 = alloca i1, i1 0
+  %nop7052 = alloca i1, i1 0
+  %nop7053 = alloca i1, i1 0
+  %nop7054 = alloca i1, i1 0
+  %nop7055 = alloca i1, i1 0
+  %nop7056 = alloca i1, i1 0
+  %nop7057 = alloca i1, i1 0
+  %nop7058 = alloca i1, i1 0
+  %nop7059 = alloca i1, i1 0
+  %nop7060 = alloca i1, i1 0
+  %nop7061 = alloca i1, i1 0
+  %nop7062 = alloca i1, i1 0
+  %nop7063 = alloca i1, i1 0
+  %nop7064 = alloca i1, i1 0
+  %nop7065 = alloca i1, i1 0
+  %nop7066 = alloca i1, i1 0
+  %nop7067 = alloca i1, i1 0
+  %nop7068 = alloca i1, i1 0
+  %nop7069 = alloca i1, i1 0
+  %nop7070 = alloca i1, i1 0
+  %nop7071 = alloca i1, i1 0
+  %nop7072 = alloca i1, i1 0
+  %nop7073 = alloca i1, i1 0
+  %nop7074 = alloca i1, i1 0
+  %nop7075 = alloca i1, i1 0
+  %nop7076 = alloca i1, i1 0
+  %nop7077 = alloca i1, i1 0
+  %nop7078 = alloca i1, i1 0
+  %nop7079 = alloca i1, i1 0
+  %nop7080 = alloca i1, i1 0
+  %nop7081 = alloca i1, i1 0
+  %nop7082 = alloca i1, i1 0
+  %nop7083 = alloca i1, i1 0
+  %nop7084 = alloca i1, i1 0
+  %nop7085 = alloca i1, i1 0
+  %nop7086 = alloca i1, i1 0
+  %nop7087 = alloca i1, i1 0
+  %nop7088 = alloca i1, i1 0
+  %nop7089 = alloca i1, i1 0
+  %nop7090 = alloca i1, i1 0
+  %nop7091 = alloca i1, i1 0
+  %nop7092 = alloca i1, i1 0
+  %nop7093 = alloca i1, i1 0
+  %nop7094 = alloca i1, i1 0
+  %nop7095 = alloca i1, i1 0
+  %nop7096 = alloca i1, i1 0
+  %nop7097 = alloca i1, i1 0
+  %nop7098 = alloca i1, i1 0
+  %nop7099 = alloca i1, i1 0
+  %nop7100 = alloca i1, i1 0
+  %nop7101 = alloca i1, i1 0
+  %nop7102 = alloca i1, i1 0
+  %nop7103 = alloca i1, i1 0
+  %nop7104 = alloca i1, i1 0
+  %nop7105 = alloca i1, i1 0
+  %nop7106 = alloca i1, i1 0
+  %nop7107 = alloca i1, i1 0
+  %nop7108 = alloca i1, i1 0
+  %nop7109 = alloca i1, i1 0
+  %nop7110 = alloca i1, i1 0
+  %nop7111 = alloca i1, i1 0
+  %nop7112 = alloca i1, i1 0
+  %nop7113 = alloca i1, i1 0
+  %nop7114 = alloca i1, i1 0
+  %nop7115 = alloca i1, i1 0
+  %nop7116 = alloca i1, i1 0
+  %nop7117 = alloca i1, i1 0
+  %nop7118 = alloca i1, i1 0
+  %nop7119 = alloca i1, i1 0
+  %nop7120 = alloca i1, i1 0
+  %nop7121 = alloca i1, i1 0
+  %nop7122 = alloca i1, i1 0
+  %nop7123 = alloca i1, i1 0
+  %nop7124 = alloca i1, i1 0
+  %nop7125 = alloca i1, i1 0
+  %nop7126 = alloca i1, i1 0
+  %nop7127 = alloca i1, i1 0
+  %nop7128 = alloca i1, i1 0
+  %nop7129 = alloca i1, i1 0
+  %nop7130 = alloca i1, i1 0
+  %nop7131 = alloca i1, i1 0
+  %nop7132 = alloca i1, i1 0
+  %nop7133 = alloca i1, i1 0
+  %nop7134 = alloca i1, i1 0
+  %nop7135 = alloca i1, i1 0
+  %nop7136 = alloca i1, i1 0
+  %nop7137 = alloca i1, i1 0
+  %nop7138 = alloca i1, i1 0
+  %nop7139 = alloca i1, i1 0
+  %nop7140 = alloca i1, i1 0
+  %nop7141 = alloca i1, i1 0
+  %nop7142 = alloca i1, i1 0
+  %nop7143 = alloca i1, i1 0
+  %nop7144 = alloca i1, i1 0
+  %nop7145 = alloca i1, i1 0
+  %nop7146 = alloca i1, i1 0
+  %nop7147 = alloca i1, i1 0
+  %nop7148 = alloca i1, i1 0
+  %nop7149 = alloca i1, i1 0
+  %nop7150 = alloca i1, i1 0
+  %nop7151 = alloca i1, i1 0
+  %nop7152 = alloca i1, i1 0
+  %nop7153 = alloca i1, i1 0
+  %nop7154 = alloca i1, i1 0
+  %nop7155 = alloca i1, i1 0
+  %nop7156 = alloca i1, i1 0
+  %nop7157 = alloca i1, i1 0
+  %nop7158 = alloca i1, i1 0
+  %nop7159 = alloca i1, i1 0
+  %nop7160 = alloca i1, i1 0
+  %nop7161 = alloca i1, i1 0
+  %nop7162 = alloca i1, i1 0
+  %nop7163 = alloca i1, i1 0
+  %nop7164 = alloca i1, i1 0
+  %nop7165 = alloca i1, i1 0
+  %nop7166 = alloca i1, i1 0
+  %nop7167 = alloca i1, i1 0
+  %nop7168 = alloca i1, i1 0
+  %nop7169 = alloca i1, i1 0
+  %nop7170 = alloca i1, i1 0
+  %nop7171 = alloca i1, i1 0
+  %nop7172 = alloca i1, i1 0
+  %nop7173 = alloca i1, i1 0
+  %nop7174 = alloca i1, i1 0
+  %nop7175 = alloca i1, i1 0
+  %nop7176 = alloca i1, i1 0
+  %nop7177 = alloca i1, i1 0
+  %nop7178 = alloca i1, i1 0
+  %nop7179 = alloca i1, i1 0
+  %nop7180 = alloca i1, i1 0
+  %nop7181 = alloca i1, i1 0
+  %nop7182 = alloca i1, i1 0
+  %nop7183 = alloca i1, i1 0
+  %nop7184 = alloca i1, i1 0
+  %nop7185 = alloca i1, i1 0
+  %nop7186 = alloca i1, i1 0
+  %nop7187 = alloca i1, i1 0
+  %nop7188 = alloca i1, i1 0
+  %nop7189 = alloca i1, i1 0
+  %nop7190 = alloca i1, i1 0
+  %nop7191 = alloca i1, i1 0
+  %nop7192 = alloca i1, i1 0
+  %nop7193 = alloca i1, i1 0
+  %nop7194 = alloca i1, i1 0
+  %nop7195 = alloca i1, i1 0
+  %nop7196 = alloca i1, i1 0
+  %nop7197 = alloca i1, i1 0
+  %nop7198 = alloca i1, i1 0
+  %nop7199 = alloca i1, i1 0
+  %nop7200 = alloca i1, i1 0
+  %nop7201 = alloca i1, i1 0
+  %nop7202 = alloca i1, i1 0
+  %nop7203 = alloca i1, i1 0
+  %nop7204 = alloca i1, i1 0
+  %nop7205 = alloca i1, i1 0
+  %nop7206 = alloca i1, i1 0
+  %nop7207 = alloca i1, i1 0
+  %nop7208 = alloca i1, i1 0
+  %nop7209 = alloca i1, i1 0
+  %nop7210 = alloca i1, i1 0
+  %nop7211 = alloca i1, i1 0
+  %nop7212 = alloca i1, i1 0
+  %nop7213 = alloca i1, i1 0
+  %nop7214 = alloca i1, i1 0
+  %nop7215 = alloca i1, i1 0
+  %nop7216 = alloca i1, i1 0
+  %nop7217 = alloca i1, i1 0
+  %nop7218 = alloca i1, i1 0
+  %nop7219 = alloca i1, i1 0
+  %nop7220 = alloca i1, i1 0
+  %nop7221 = alloca i1, i1 0
+  %nop7222 = alloca i1, i1 0
+  %nop7223 = alloca i1, i1 0
+  %nop7224 = alloca i1, i1 0
+  %nop7225 = alloca i1, i1 0
+  %nop7226 = alloca i1, i1 0
+  %nop7227 = alloca i1, i1 0
+  %nop7228 = alloca i1, i1 0
+  %nop7229 = alloca i1, i1 0
+  %nop7230 = alloca i1, i1 0
+  %nop7231 = alloca i1, i1 0
+  %nop7232 = alloca i1, i1 0
+  %nop7233 = alloca i1, i1 0
+  %nop7234 = alloca i1, i1 0
+  %nop7235 = alloca i1, i1 0
+  %nop7236 = alloca i1, i1 0
+  %nop7237 = alloca i1, i1 0
+  %nop7238 = alloca i1, i1 0
+  %nop7239 = alloca i1, i1 0
+  %nop7240 = alloca i1, i1 0
+  %nop7241 = alloca i1, i1 0
+  %nop7242 = alloca i1, i1 0
+  %nop7243 = alloca i1, i1 0
+  %nop7244 = alloca i1, i1 0
+  %nop7245 = alloca i1, i1 0
+  %nop7246 = alloca i1, i1 0
+  %nop7247 = alloca i1, i1 0
+  %nop7248 = alloca i1, i1 0
+  %nop7249 = alloca i1, i1 0
+  %nop7250 = alloca i1, i1 0
+  %nop7251 = alloca i1, i1 0
+  %nop7252 = alloca i1, i1 0
+  %nop7253 = alloca i1, i1 0
+  %nop7254 = alloca i1, i1 0
+  %nop7255 = alloca i1, i1 0
+  %nop7256 = alloca i1, i1 0
+  %nop7257 = alloca i1, i1 0
+  %nop7258 = alloca i1, i1 0
+  %nop7259 = alloca i1, i1 0
+  %nop7260 = alloca i1, i1 0
+  %nop7261 = alloca i1, i1 0
+  %nop7262 = alloca i1, i1 0
+  %nop7263 = alloca i1, i1 0
+  %nop7264 = alloca i1, i1 0
+  %nop7265 = alloca i1, i1 0
+  %nop7266 = alloca i1, i1 0
+  %nop7267 = alloca i1, i1 0
+  %nop7268 = alloca i1, i1 0
+  %nop7269 = alloca i1, i1 0
+  %nop7270 = alloca i1, i1 0
+  %nop7271 = alloca i1, i1 0
+  %nop7272 = alloca i1, i1 0
+  %nop7273 = alloca i1, i1 0
+  %nop7274 = alloca i1, i1 0
+  %nop7275 = alloca i1, i1 0
+  %nop7276 = alloca i1, i1 0
+  %nop7277 = alloca i1, i1 0
+  %nop7278 = alloca i1, i1 0
+  %nop7279 = alloca i1, i1 0
+  %nop7280 = alloca i1, i1 0
+  %nop7281 = alloca i1, i1 0
+  %nop7282 = alloca i1, i1 0
+  %nop7283 = alloca i1, i1 0
+  %nop7284 = alloca i1, i1 0
+  %nop7285 = alloca i1, i1 0
+  %nop7286 = alloca i1, i1 0
+  %nop7287 = alloca i1, i1 0
+  %nop7288 = alloca i1, i1 0
+  %nop7289 = alloca i1, i1 0
+  %nop7290 = alloca i1, i1 0
+  %nop7291 = alloca i1, i1 0
+  %nop7292 = alloca i1, i1 0
+  %nop7293 = alloca i1, i1 0
+  %nop7294 = alloca i1, i1 0
+  %nop7295 = alloca i1, i1 0
+  %nop7296 = alloca i1, i1 0
+  %nop7297 = alloca i1, i1 0
+  %nop7298 = alloca i1, i1 0
+  %nop7299 = alloca i1, i1 0
+  %nop7300 = alloca i1, i1 0
+  %nop7301 = alloca i1, i1 0
+  %nop7302 = alloca i1, i1 0
+  %nop7303 = alloca i1, i1 0
+  %nop7304 = alloca i1, i1 0
+  %nop7305 = alloca i1, i1 0
+  %nop7306 = alloca i1, i1 0
+  %nop7307 = alloca i1, i1 0
+  %nop7308 = alloca i1, i1 0
+  %nop7309 = alloca i1, i1 0
+  %nop7310 = alloca i1, i1 0
+  %nop7311 = alloca i1, i1 0
+  %nop7312 = alloca i1, i1 0
+  %nop7313 = alloca i1, i1 0
+  %nop7314 = alloca i1, i1 0
+  %nop7315 = alloca i1, i1 0
+  %nop7316 = alloca i1, i1 0
+  %nop7317 = alloca i1, i1 0
+  %nop7318 = alloca i1, i1 0
+  %nop7319 = alloca i1, i1 0
+  %nop7320 = alloca i1, i1 0
+  %nop7321 = alloca i1, i1 0
+  %nop7322 = alloca i1, i1 0
+  %nop7323 = alloca i1, i1 0
+  %nop7324 = alloca i1, i1 0
+  %nop7325 = alloca i1, i1 0
+  %nop7326 = alloca i1, i1 0
+  %nop7327 = alloca i1, i1 0
+  %nop7328 = alloca i1, i1 0
+  %nop7329 = alloca i1, i1 0
+  %nop7330 = alloca i1, i1 0
+  %nop7331 = alloca i1, i1 0
+  %nop7332 = alloca i1, i1 0
+  %nop7333 = alloca i1, i1 0
+  %nop7334 = alloca i1, i1 0
+  %nop7335 = alloca i1, i1 0
+  %nop7336 = alloca i1, i1 0
+  %nop7337 = alloca i1, i1 0
+  %nop7338 = alloca i1, i1 0
+  %nop7339 = alloca i1, i1 0
+  %nop7340 = alloca i1, i1 0
+  %nop7341 = alloca i1, i1 0
+  %nop7342 = alloca i1, i1 0
+  %nop7343 = alloca i1, i1 0
+  %nop7344 = alloca i1, i1 0
+  %nop7345 = alloca i1, i1 0
+  %nop7346 = alloca i1, i1 0
+  %nop7347 = alloca i1, i1 0
+  %nop7348 = alloca i1, i1 0
+  %nop7349 = alloca i1, i1 0
+  %nop7350 = alloca i1, i1 0
+  %nop7351 = alloca i1, i1 0
+  %nop7352 = alloca i1, i1 0
+  %nop7353 = alloca i1, i1 0
+  %nop7354 = alloca i1, i1 0
+  %nop7355 = alloca i1, i1 0
+  %nop7356 = alloca i1, i1 0
+  %nop7357 = alloca i1, i1 0
+  %nop7358 = alloca i1, i1 0
+  %nop7359 = alloca i1, i1 0
+  %nop7360 = alloca i1, i1 0
+  %nop7361 = alloca i1, i1 0
+  %nop7362 = alloca i1, i1 0
+  %nop7363 = alloca i1, i1 0
+  %nop7364 = alloca i1, i1 0
+  %nop7365 = alloca i1, i1 0
+  %nop7366 = alloca i1, i1 0
+  %nop7367 = alloca i1, i1 0
+  %nop7368 = alloca i1, i1 0
+  %nop7369 = alloca i1, i1 0
+  %nop7370 = alloca i1, i1 0
+  %nop7371 = alloca i1, i1 0
+  %nop7372 = alloca i1, i1 0
+  %nop7373 = alloca i1, i1 0
+  %nop7374 = alloca i1, i1 0
+  %nop7375 = alloca i1, i1 0
+  %nop7376 = alloca i1, i1 0
+  %nop7377 = alloca i1, i1 0
+  %nop7378 = alloca i1, i1 0
+  %nop7379 = alloca i1, i1 0
+  %nop7380 = alloca i1, i1 0
+  %nop7381 = alloca i1, i1 0
+  %nop7382 = alloca i1, i1 0
+  %nop7383 = alloca i1, i1 0
+  %nop7384 = alloca i1, i1 0
+  %nop7385 = alloca i1, i1 0
+  %nop7386 = alloca i1, i1 0
+  %nop7387 = alloca i1, i1 0
+  %nop7388 = alloca i1, i1 0
+  %nop7389 = alloca i1, i1 0
+  %nop7390 = alloca i1, i1 0
+  %nop7391 = alloca i1, i1 0
+  %nop7392 = alloca i1, i1 0
+  %nop7393 = alloca i1, i1 0
+  %nop7394 = alloca i1, i1 0
+  %nop7395 = alloca i1, i1 0
+  %nop7396 = alloca i1, i1 0
+  %nop7397 = alloca i1, i1 0
+  %nop7398 = alloca i1, i1 0
+  %nop7399 = alloca i1, i1 0
+  %nop7400 = alloca i1, i1 0
+  %nop7401 = alloca i1, i1 0
+  %nop7402 = alloca i1, i1 0
+  %nop7403 = alloca i1, i1 0
+  %nop7404 = alloca i1, i1 0
+  %nop7405 = alloca i1, i1 0
+  %nop7406 = alloca i1, i1 0
+  %nop7407 = alloca i1, i1 0
+  %nop7408 = alloca i1, i1 0
+  %nop7409 = alloca i1, i1 0
+  %nop7410 = alloca i1, i1 0
+  %nop7411 = alloca i1, i1 0
+  %nop7412 = alloca i1, i1 0
+  %nop7413 = alloca i1, i1 0
+  %nop7414 = alloca i1, i1 0
+  %nop7415 = alloca i1, i1 0
+  %nop7416 = alloca i1, i1 0
+  %nop7417 = alloca i1, i1 0
+  %nop7418 = alloca i1, i1 0
+  %nop7419 = alloca i1, i1 0
+  %nop7420 = alloca i1, i1 0
+  %nop7421 = alloca i1, i1 0
+  %nop7422 = alloca i1, i1 0
+  %nop7423 = alloca i1, i1 0
+  %nop7424 = alloca i1, i1 0
+  %nop7425 = alloca i1, i1 0
+  %nop7426 = alloca i1, i1 0
+  %nop7427 = alloca i1, i1 0
+  %nop7428 = alloca i1, i1 0
+  %nop7429 = alloca i1, i1 0
+  %nop7430 = alloca i1, i1 0
+  %nop7431 = alloca i1, i1 0
+  %nop7432 = alloca i1, i1 0
+  %nop7433 = alloca i1, i1 0
+  %nop7434 = alloca i1, i1 0
+  %nop7435 = alloca i1, i1 0
+  %nop7436 = alloca i1, i1 0
+  %nop7437 = alloca i1, i1 0
+  %nop7438 = alloca i1, i1 0
+  %nop7439 = alloca i1, i1 0
+  %nop7440 = alloca i1, i1 0
+  %nop7441 = alloca i1, i1 0
+  %nop7442 = alloca i1, i1 0
+  %nop7443 = alloca i1, i1 0
+  %nop7444 = alloca i1, i1 0
+  %nop7445 = alloca i1, i1 0
+  %nop7446 = alloca i1, i1 0
+  %nop7447 = alloca i1, i1 0
+  %nop7448 = alloca i1, i1 0
+  %nop7449 = alloca i1, i1 0
+  %nop7450 = alloca i1, i1 0
+  %nop7451 = alloca i1, i1 0
+  %nop7452 = alloca i1, i1 0
+  %nop7453 = alloca i1, i1 0
+  %nop7454 = alloca i1, i1 0
+  %nop7455 = alloca i1, i1 0
+  %nop7456 = alloca i1, i1 0
+  %nop7457 = alloca i1, i1 0
+  %nop7458 = alloca i1, i1 0
+  %nop7459 = alloca i1, i1 0
+  %nop7460 = alloca i1, i1 0
+  %nop7461 = alloca i1, i1 0
+  %nop7462 = alloca i1, i1 0
+  %nop7463 = alloca i1, i1 0
+  %nop7464 = alloca i1, i1 0
+  %nop7465 = alloca i1, i1 0
+  %nop7466 = alloca i1, i1 0
+  %nop7467 = alloca i1, i1 0
+  %nop7468 = alloca i1, i1 0
+  %nop7469 = alloca i1, i1 0
+  %nop7470 = alloca i1, i1 0
+  %nop7471 = alloca i1, i1 0
+  %nop7472 = alloca i1, i1 0
+  %nop7473 = alloca i1, i1 0
+  %nop7474 = alloca i1, i1 0
+  %nop7475 = alloca i1, i1 0
+  %nop7476 = alloca i1, i1 0
+  %nop7477 = alloca i1, i1 0
+  %nop7478 = alloca i1, i1 0
+  %nop7479 = alloca i1, i1 0
+  %nop7480 = alloca i1, i1 0
+  %nop7481 = alloca i1, i1 0
+  %nop7482 = alloca i1, i1 0
+  %nop7483 = alloca i1, i1 0
+  %nop7484 = alloca i1, i1 0
+  %nop7485 = alloca i1, i1 0
+  %nop7486 = alloca i1, i1 0
+  %nop7487 = alloca i1, i1 0
+  %nop7488 = alloca i1, i1 0
+  %nop7489 = alloca i1, i1 0
+  %nop7490 = alloca i1, i1 0
+  %nop7491 = alloca i1, i1 0
+  %nop7492 = alloca i1, i1 0
+  %nop7493 = alloca i1, i1 0
+  %nop7494 = alloca i1, i1 0
+  %nop7495 = alloca i1, i1 0
+  %nop7496 = alloca i1, i1 0
+  %nop7497 = alloca i1, i1 0
+  %nop7498 = alloca i1, i1 0
+  %nop7499 = alloca i1, i1 0
+  %nop7500 = alloca i1, i1 0
+  %nop7501 = alloca i1, i1 0
+  %nop7502 = alloca i1, i1 0
+  %nop7503 = alloca i1, i1 0
+  %nop7504 = alloca i1, i1 0
+  %nop7505 = alloca i1, i1 0
+  %nop7506 = alloca i1, i1 0
+  %nop7507 = alloca i1, i1 0
+  %nop7508 = alloca i1, i1 0
+  %nop7509 = alloca i1, i1 0
+  %nop7510 = alloca i1, i1 0
+  %nop7511 = alloca i1, i1 0
+  %nop7512 = alloca i1, i1 0
+  %nop7513 = alloca i1, i1 0
+  %nop7514 = alloca i1, i1 0
+  %nop7515 = alloca i1, i1 0
+  %nop7516 = alloca i1, i1 0
+  %nop7517 = alloca i1, i1 0
+  %nop7518 = alloca i1, i1 0
+  %nop7519 = alloca i1, i1 0
+  %nop7520 = alloca i1, i1 0
+  %nop7521 = alloca i1, i1 0
+  %nop7522 = alloca i1, i1 0
+  %nop7523 = alloca i1, i1 0
+  %nop7524 = alloca i1, i1 0
+  %nop7525 = alloca i1, i1 0
+  %nop7526 = alloca i1, i1 0
+  %nop7527 = alloca i1, i1 0
+  %nop7528 = alloca i1, i1 0
+  %nop7529 = alloca i1, i1 0
+  %nop7530 = alloca i1, i1 0
+  %nop7531 = alloca i1, i1 0
+  %nop7532 = alloca i1, i1 0
+  %nop7533 = alloca i1, i1 0
+  %nop7534 = alloca i1, i1 0
+  %nop7535 = alloca i1, i1 0
+  %nop7536 = alloca i1, i1 0
+  %nop7537 = alloca i1, i1 0
+  %nop7538 = alloca i1, i1 0
+  %nop7539 = alloca i1, i1 0
+  %nop7540 = alloca i1, i1 0
+  %nop7541 = alloca i1, i1 0
+  %nop7542 = alloca i1, i1 0
+  %nop7543 = alloca i1, i1 0
+  %nop7544 = alloca i1, i1 0
+  %nop7545 = alloca i1, i1 0
+  %nop7546 = alloca i1, i1 0
+  %nop7547 = alloca i1, i1 0
+  %nop7548 = alloca i1, i1 0
+  %nop7549 = alloca i1, i1 0
+  %nop7550 = alloca i1, i1 0
+  %nop7551 = alloca i1, i1 0
+  %nop7552 = alloca i1, i1 0
+  %nop7553 = alloca i1, i1 0
+  %nop7554 = alloca i1, i1 0
+  %nop7555 = alloca i1, i1 0
+  %nop7556 = alloca i1, i1 0
+  %nop7557 = alloca i1, i1 0
+  %nop7558 = alloca i1, i1 0
+  %nop7559 = alloca i1, i1 0
+  %nop7560 = alloca i1, i1 0
+  %nop7561 = alloca i1, i1 0
+  %nop7562 = alloca i1, i1 0
+  %nop7563 = alloca i1, i1 0
+  %nop7564 = alloca i1, i1 0
+  %nop7565 = alloca i1, i1 0
+  %nop7566 = alloca i1, i1 0
+  %nop7567 = alloca i1, i1 0
+  %nop7568 = alloca i1, i1 0
+  %nop7569 = alloca i1, i1 0
+  %nop7570 = alloca i1, i1 0
+  %nop7571 = alloca i1, i1 0
+  %nop7572 = alloca i1, i1 0
+  %nop7573 = alloca i1, i1 0
+  %nop7574 = alloca i1, i1 0
+  %nop7575 = alloca i1, i1 0
+  %nop7576 = alloca i1, i1 0
+  %nop7577 = alloca i1, i1 0
+  %nop7578 = alloca i1, i1 0
+  %nop7579 = alloca i1, i1 0
+  %nop7580 = alloca i1, i1 0
+  %nop7581 = alloca i1, i1 0
+  %nop7582 = alloca i1, i1 0
+  %nop7583 = alloca i1, i1 0
+  %nop7584 = alloca i1, i1 0
+  %nop7585 = alloca i1, i1 0
+  %nop7586 = alloca i1, i1 0
+  %nop7587 = alloca i1, i1 0
+  %nop7588 = alloca i1, i1 0
+  %nop7589 = alloca i1, i1 0
+  %nop7590 = alloca i1, i1 0
+  %nop7591 = alloca i1, i1 0
+  %nop7592 = alloca i1, i1 0
+  %nop7593 = alloca i1, i1 0
+  %nop7594 = alloca i1, i1 0
+  %nop7595 = alloca i1, i1 0
+  %nop7596 = alloca i1, i1 0
+  %nop7597 = alloca i1, i1 0
+  %nop7598 = alloca i1, i1 0
+  %nop7599 = alloca i1, i1 0
+  %nop7600 = alloca i1, i1 0
+  %nop7601 = alloca i1, i1 0
+  %nop7602 = alloca i1, i1 0
+  %nop7603 = alloca i1, i1 0
+  %nop7604 = alloca i1, i1 0
+  %nop7605 = alloca i1, i1 0
+  %nop7606 = alloca i1, i1 0
+  %nop7607 = alloca i1, i1 0
+  %nop7608 = alloca i1, i1 0
+  %nop7609 = alloca i1, i1 0
+  %nop7610 = alloca i1, i1 0
+  %nop7611 = alloca i1, i1 0
+  %nop7612 = alloca i1, i1 0
+  %nop7613 = alloca i1, i1 0
+  %nop7614 = alloca i1, i1 0
+  %nop7615 = alloca i1, i1 0
+  %nop7616 = alloca i1, i1 0
+  %nop7617 = alloca i1, i1 0
+  %nop7618 = alloca i1, i1 0
+  %nop7619 = alloca i1, i1 0
+  %nop7620 = alloca i1, i1 0
+  %nop7621 = alloca i1, i1 0
+  %nop7622 = alloca i1, i1 0
+  %nop7623 = alloca i1, i1 0
+  %nop7624 = alloca i1, i1 0
+  %nop7625 = alloca i1, i1 0
+  %nop7626 = alloca i1, i1 0
+  %nop7627 = alloca i1, i1 0
+  %nop7628 = alloca i1, i1 0
+  %nop7629 = alloca i1, i1 0
+  %nop7630 = alloca i1, i1 0
+  %nop7631 = alloca i1, i1 0
+  %nop7632 = alloca i1, i1 0
+  %nop7633 = alloca i1, i1 0
+  %nop7634 = alloca i1, i1 0
+  %nop7635 = alloca i1, i1 0
+  %nop7636 = alloca i1, i1 0
+  %nop7637 = alloca i1, i1 0
+  %nop7638 = alloca i1, i1 0
+  %nop7639 = alloca i1, i1 0
+  %nop7640 = alloca i1, i1 0
+  %nop7641 = alloca i1, i1 0
+  %nop7642 = alloca i1, i1 0
+  %nop7643 = alloca i1, i1 0
+  %nop7644 = alloca i1, i1 0
+  %nop7645 = alloca i1, i1 0
+  %nop7646 = alloca i1, i1 0
+  %nop7647 = alloca i1, i1 0
+  %nop7648 = alloca i1, i1 0
+  %nop7649 = alloca i1, i1 0
+  %nop7650 = alloca i1, i1 0
+  %nop7651 = alloca i1, i1 0
+  %nop7652 = alloca i1, i1 0
+  %nop7653 = alloca i1, i1 0
+  %nop7654 = alloca i1, i1 0
+  %nop7655 = alloca i1, i1 0
+  %nop7656 = alloca i1, i1 0
+  %nop7657 = alloca i1, i1 0
+  %nop7658 = alloca i1, i1 0
+  %nop7659 = alloca i1, i1 0
+  %nop7660 = alloca i1, i1 0
+  %nop7661 = alloca i1, i1 0
+  %nop7662 = alloca i1, i1 0
+  %nop7663 = alloca i1, i1 0
+  %nop7664 = alloca i1, i1 0
+  %nop7665 = alloca i1, i1 0
+  %nop7666 = alloca i1, i1 0
+  %nop7667 = alloca i1, i1 0
+  %nop7668 = alloca i1, i1 0
+  %nop7669 = alloca i1, i1 0
+  %nop7670 = alloca i1, i1 0
+  %nop7671 = alloca i1, i1 0
+  %nop7672 = alloca i1, i1 0
+  %nop7673 = alloca i1, i1 0
+  %nop7674 = alloca i1, i1 0
+  %nop7675 = alloca i1, i1 0
+  %nop7676 = alloca i1, i1 0
+  %nop7677 = alloca i1, i1 0
+  %nop7678 = alloca i1, i1 0
+  %nop7679 = alloca i1, i1 0
+  %nop7680 = alloca i1, i1 0
+  %nop7681 = alloca i1, i1 0
+  %nop7682 = alloca i1, i1 0
+  %nop7683 = alloca i1, i1 0
+  %nop7684 = alloca i1, i1 0
+  %nop7685 = alloca i1, i1 0
+  %nop7686 = alloca i1, i1 0
+  %nop7687 = alloca i1, i1 0
+  %nop7688 = alloca i1, i1 0
+  %nop7689 = alloca i1, i1 0
+  %nop7690 = alloca i1, i1 0
+  %nop7691 = alloca i1, i1 0
+  %nop7692 = alloca i1, i1 0
+  %nop7693 = alloca i1, i1 0
+  %nop7694 = alloca i1, i1 0
+  %nop7695 = alloca i1, i1 0
+  %nop7696 = alloca i1, i1 0
+  %nop7697 = alloca i1, i1 0
+  %nop7698 = alloca i1, i1 0
+  %nop7699 = alloca i1, i1 0
+  %nop7700 = alloca i1, i1 0
+  %nop7701 = alloca i1, i1 0
+  %nop7702 = alloca i1, i1 0
+  %nop7703 = alloca i1, i1 0
+  %nop7704 = alloca i1, i1 0
+  %nop7705 = alloca i1, i1 0
+  %nop7706 = alloca i1, i1 0
+  %nop7707 = alloca i1, i1 0
+  %nop7708 = alloca i1, i1 0
+  %nop7709 = alloca i1, i1 0
+  %nop7710 = alloca i1, i1 0
+  %nop7711 = alloca i1, i1 0
+  %nop7712 = alloca i1, i1 0
+  %nop7713 = alloca i1, i1 0
+  %nop7714 = alloca i1, i1 0
+  %nop7715 = alloca i1, i1 0
+  %nop7716 = alloca i1, i1 0
+  %nop7717 = alloca i1, i1 0
+  %nop7718 = alloca i1, i1 0
+  %nop7719 = alloca i1, i1 0
+  %nop7720 = alloca i1, i1 0
+  %nop7721 = alloca i1, i1 0
+  %nop7722 = alloca i1, i1 0
+  %nop7723 = alloca i1, i1 0
+  %nop7724 = alloca i1, i1 0
+  %nop7725 = alloca i1, i1 0
+  %nop7726 = alloca i1, i1 0
+  %nop7727 = alloca i1, i1 0
+  %nop7728 = alloca i1, i1 0
+  %nop7729 = alloca i1, i1 0
+  %nop7730 = alloca i1, i1 0
+  %nop7731 = alloca i1, i1 0
+  %nop7732 = alloca i1, i1 0
+  %nop7733 = alloca i1, i1 0
+  %nop7734 = alloca i1, i1 0
+  %nop7735 = alloca i1, i1 0
+  %nop7736 = alloca i1, i1 0
+  %nop7737 = alloca i1, i1 0
+  %nop7738 = alloca i1, i1 0
+  %nop7739 = alloca i1, i1 0
+  %nop7740 = alloca i1, i1 0
+  %nop7741 = alloca i1, i1 0
+  %nop7742 = alloca i1, i1 0
+  %nop7743 = alloca i1, i1 0
+  %nop7744 = alloca i1, i1 0
+  %nop7745 = alloca i1, i1 0
+  %nop7746 = alloca i1, i1 0
+  %nop7747 = alloca i1, i1 0
+  %nop7748 = alloca i1, i1 0
+  %nop7749 = alloca i1, i1 0
+  %nop7750 = alloca i1, i1 0
+  %nop7751 = alloca i1, i1 0
+  %nop7752 = alloca i1, i1 0
+  %nop7753 = alloca i1, i1 0
+  %nop7754 = alloca i1, i1 0
+  %nop7755 = alloca i1, i1 0
+  %nop7756 = alloca i1, i1 0
+  %nop7757 = alloca i1, i1 0
+  %nop7758 = alloca i1, i1 0
+  %nop7759 = alloca i1, i1 0
+  %nop7760 = alloca i1, i1 0
+  %nop7761 = alloca i1, i1 0
+  %nop7762 = alloca i1, i1 0
+  %nop7763 = alloca i1, i1 0
+  %nop7764 = alloca i1, i1 0
+  %nop7765 = alloca i1, i1 0
+  %nop7766 = alloca i1, i1 0
+  %nop7767 = alloca i1, i1 0
+  %nop7768 = alloca i1, i1 0
+  %nop7769 = alloca i1, i1 0
+  %nop7770 = alloca i1, i1 0
+  %nop7771 = alloca i1, i1 0
+  %nop7772 = alloca i1, i1 0
+  %nop7773 = alloca i1, i1 0
+  %nop7774 = alloca i1, i1 0
+  %nop7775 = alloca i1, i1 0
+  %nop7776 = alloca i1, i1 0
+  %nop7777 = alloca i1, i1 0
+  %nop7778 = alloca i1, i1 0
+  %nop7779 = alloca i1, i1 0
+  %nop7780 = alloca i1, i1 0
+  %nop7781 = alloca i1, i1 0
+  %nop7782 = alloca i1, i1 0
+  %nop7783 = alloca i1, i1 0
+  %nop7784 = alloca i1, i1 0
+  %nop7785 = alloca i1, i1 0
+  %nop7786 = alloca i1, i1 0
+  %nop7787 = alloca i1, i1 0
+  %nop7788 = alloca i1, i1 0
+  %nop7789 = alloca i1, i1 0
+  %nop7790 = alloca i1, i1 0
+  %nop7791 = alloca i1, i1 0
+  %nop7792 = alloca i1, i1 0
+  %nop7793 = alloca i1, i1 0
+  %nop7794 = alloca i1, i1 0
+  %nop7795 = alloca i1, i1 0
+  %nop7796 = alloca i1, i1 0
+  %nop7797 = alloca i1, i1 0
+  %nop7798 = alloca i1, i1 0
+  %nop7799 = alloca i1, i1 0
+  %nop7800 = alloca i1, i1 0
+  %nop7801 = alloca i1, i1 0
+  %nop7802 = alloca i1, i1 0
+  %nop7803 = alloca i1, i1 0
+  %nop7804 = alloca i1, i1 0
+  %nop7805 = alloca i1, i1 0
+  %nop7806 = alloca i1, i1 0
+  %nop7807 = alloca i1, i1 0
+  %nop7808 = alloca i1, i1 0
+  %nop7809 = alloca i1, i1 0
+  %nop7810 = alloca i1, i1 0
+  %nop7811 = alloca i1, i1 0
+  %nop7812 = alloca i1, i1 0
+  %nop7813 = alloca i1, i1 0
+  %nop7814 = alloca i1, i1 0
+  %nop7815 = alloca i1, i1 0
+  %nop7816 = alloca i1, i1 0
+  %nop7817 = alloca i1, i1 0
+  %nop7818 = alloca i1, i1 0
+  %nop7819 = alloca i1, i1 0
+  %nop7820 = alloca i1, i1 0
+  %nop7821 = alloca i1, i1 0
+  %nop7822 = alloca i1, i1 0
+  %nop7823 = alloca i1, i1 0
+  %nop7824 = alloca i1, i1 0
+  %nop7825 = alloca i1, i1 0
+  %nop7826 = alloca i1, i1 0
+  %nop7827 = alloca i1, i1 0
+  %nop7828 = alloca i1, i1 0
+  %nop7829 = alloca i1, i1 0
+  %nop7830 = alloca i1, i1 0
+  %nop7831 = alloca i1, i1 0
+  %nop7832 = alloca i1, i1 0
+  %nop7833 = alloca i1, i1 0
+  %nop7834 = alloca i1, i1 0
+  %nop7835 = alloca i1, i1 0
+  %nop7836 = alloca i1, i1 0
+  %nop7837 = alloca i1, i1 0
+  %nop7838 = alloca i1, i1 0
+  %nop7839 = alloca i1, i1 0
+  %nop7840 = alloca i1, i1 0
+  %nop7841 = alloca i1, i1 0
+  %nop7842 = alloca i1, i1 0
+  %nop7843 = alloca i1, i1 0
+  %nop7844 = alloca i1, i1 0
+  %nop7845 = alloca i1, i1 0
+  %nop7846 = alloca i1, i1 0
+  %nop7847 = alloca i1, i1 0
+  %nop7848 = alloca i1, i1 0
+  %nop7849 = alloca i1, i1 0
+  %nop7850 = alloca i1, i1 0
+  %nop7851 = alloca i1, i1 0
+  %nop7852 = alloca i1, i1 0
+  %nop7853 = alloca i1, i1 0
+  %nop7854 = alloca i1, i1 0
+  %nop7855 = alloca i1, i1 0
+  %nop7856 = alloca i1, i1 0
+  %nop7857 = alloca i1, i1 0
+  %nop7858 = alloca i1, i1 0
+  %nop7859 = alloca i1, i1 0
+  %nop7860 = alloca i1, i1 0
+  %nop7861 = alloca i1, i1 0
+  %nop7862 = alloca i1, i1 0
+  %nop7863 = alloca i1, i1 0
+  %nop7864 = alloca i1, i1 0
+  %nop7865 = alloca i1, i1 0
+  %nop7866 = alloca i1, i1 0
+  %nop7867 = alloca i1, i1 0
+  %nop7868 = alloca i1, i1 0
+  %nop7869 = alloca i1, i1 0
+  %nop7870 = alloca i1, i1 0
+  %nop7871 = alloca i1, i1 0
+  %nop7872 = alloca i1, i1 0
+  %nop7873 = alloca i1, i1 0
+  %nop7874 = alloca i1, i1 0
+  %nop7875 = alloca i1, i1 0
+  %nop7876 = alloca i1, i1 0
+  %nop7877 = alloca i1, i1 0
+  %nop7878 = alloca i1, i1 0
+  %nop7879 = alloca i1, i1 0
+  %nop7880 = alloca i1, i1 0
+  %nop7881 = alloca i1, i1 0
+  %nop7882 = alloca i1, i1 0
+  %nop7883 = alloca i1, i1 0
+  %nop7884 = alloca i1, i1 0
+  %nop7885 = alloca i1, i1 0
+  %nop7886 = alloca i1, i1 0
+  %nop7887 = alloca i1, i1 0
+  %nop7888 = alloca i1, i1 0
+  %nop7889 = alloca i1, i1 0
+  %nop7890 = alloca i1, i1 0
+  %nop7891 = alloca i1, i1 0
+  %nop7892 = alloca i1, i1 0
+  %nop7893 = alloca i1, i1 0
+  %nop7894 = alloca i1, i1 0
+  %nop7895 = alloca i1, i1 0
+  %nop7896 = alloca i1, i1 0
+  %nop7897 = alloca i1, i1 0
+  %nop7898 = alloca i1, i1 0
+  %nop7899 = alloca i1, i1 0
+  %nop7900 = alloca i1, i1 0
+  %nop7901 = alloca i1, i1 0
+  %nop7902 = alloca i1, i1 0
+  %nop7903 = alloca i1, i1 0
+  %nop7904 = alloca i1, i1 0
+  %nop7905 = alloca i1, i1 0
+  %nop7906 = alloca i1, i1 0
+  %nop7907 = alloca i1, i1 0
+  %nop7908 = alloca i1, i1 0
+  %nop7909 = alloca i1, i1 0
+  %nop7910 = alloca i1, i1 0
+  %nop7911 = alloca i1, i1 0
+  %nop7912 = alloca i1, i1 0
+  %nop7913 = alloca i1, i1 0
+  %nop7914 = alloca i1, i1 0
+  %nop7915 = alloca i1, i1 0
+  %nop7916 = alloca i1, i1 0
+  %nop7917 = alloca i1, i1 0
+  %nop7918 = alloca i1, i1 0
+  %nop7919 = alloca i1, i1 0
+  %nop7920 = alloca i1, i1 0
+  %nop7921 = alloca i1, i1 0
+  %nop7922 = alloca i1, i1 0
+  %nop7923 = alloca i1, i1 0
+  %nop7924 = alloca i1, i1 0
+  %nop7925 = alloca i1, i1 0
+  %nop7926 = alloca i1, i1 0
+  %nop7927 = alloca i1, i1 0
+  %nop7928 = alloca i1, i1 0
+  %nop7929 = alloca i1, i1 0
+  %nop7930 = alloca i1, i1 0
+  %nop7931 = alloca i1, i1 0
+  %nop7932 = alloca i1, i1 0
+  %nop7933 = alloca i1, i1 0
+  %nop7934 = alloca i1, i1 0
+  %nop7935 = alloca i1, i1 0
+  %nop7936 = alloca i1, i1 0
+  %nop7937 = alloca i1, i1 0
+  %nop7938 = alloca i1, i1 0
+  %nop7939 = alloca i1, i1 0
+  %nop7940 = alloca i1, i1 0
+  %nop7941 = alloca i1, i1 0
+  %nop7942 = alloca i1, i1 0
+  %nop7943 = alloca i1, i1 0
+  %nop7944 = alloca i1, i1 0
+  %nop7945 = alloca i1, i1 0
+  %nop7946 = alloca i1, i1 0
+  %nop7947 = alloca i1, i1 0
+  %nop7948 = alloca i1, i1 0
+  %nop7949 = alloca i1, i1 0
+  %nop7950 = alloca i1, i1 0
+  %nop7951 = alloca i1, i1 0
+  %nop7952 = alloca i1, i1 0
+  %nop7953 = alloca i1, i1 0
+  %nop7954 = alloca i1, i1 0
+  %nop7955 = alloca i1, i1 0
+  %nop7956 = alloca i1, i1 0
+  %nop7957 = alloca i1, i1 0
+  %nop7958 = alloca i1, i1 0
+  %nop7959 = alloca i1, i1 0
+  %nop7960 = alloca i1, i1 0
+  %nop7961 = alloca i1, i1 0
+  %nop7962 = alloca i1, i1 0
+  %nop7963 = alloca i1, i1 0
+  %nop7964 = alloca i1, i1 0
+  %nop7965 = alloca i1, i1 0
+  %nop7966 = alloca i1, i1 0
+  %nop7967 = alloca i1, i1 0
+  %nop7968 = alloca i1, i1 0
+  %nop7969 = alloca i1, i1 0
+  %nop7970 = alloca i1, i1 0
+  %nop7971 = alloca i1, i1 0
+  %nop7972 = alloca i1, i1 0
+  %nop7973 = alloca i1, i1 0
+  %nop7974 = alloca i1, i1 0
+  %nop7975 = alloca i1, i1 0
+  %nop7976 = alloca i1, i1 0
+  %nop7977 = alloca i1, i1 0
+  %nop7978 = alloca i1, i1 0
+  %nop7979 = alloca i1, i1 0
+  %nop7980 = alloca i1, i1 0
+  %nop7981 = alloca i1, i1 0
+  %nop7982 = alloca i1, i1 0
+  %nop7983 = alloca i1, i1 0
+  %nop7984 = alloca i1, i1 0
+  %nop7985 = alloca i1, i1 0
+  %nop7986 = alloca i1, i1 0
+  %nop7987 = alloca i1, i1 0
+  %nop7988 = alloca i1, i1 0
+  %nop7989 = alloca i1, i1 0
+  %nop7990 = alloca i1, i1 0
+  %nop7991 = alloca i1, i1 0
+  %nop7992 = alloca i1, i1 0
+  %nop7993 = alloca i1, i1 0
+  %nop7994 = alloca i1, i1 0
+  %nop7995 = alloca i1, i1 0
+  %nop7996 = alloca i1, i1 0
+  %nop7997 = alloca i1, i1 0
+  %nop7998 = alloca i1, i1 0
+  %nop7999 = alloca i1, i1 0
+  %nop8000 = alloca i1, i1 0
+  %nop8001 = alloca i1, i1 0
+  %nop8002 = alloca i1, i1 0
+  %nop8003 = alloca i1, i1 0
+  %nop8004 = alloca i1, i1 0
+  %nop8005 = alloca i1, i1 0
+  %nop8006 = alloca i1, i1 0
+  %nop8007 = alloca i1, i1 0
+  %nop8008 = alloca i1, i1 0
+  %nop8009 = alloca i1, i1 0
+  %nop8010 = alloca i1, i1 0
+  %nop8011 = alloca i1, i1 0
+  %nop8012 = alloca i1, i1 0
+  %nop8013 = alloca i1, i1 0
+  %nop8014 = alloca i1, i1 0
+  %nop8015 = alloca i1, i1 0
+  %nop8016 = alloca i1, i1 0
+  %nop8017 = alloca i1, i1 0
+  %nop8018 = alloca i1, i1 0
+  %nop8019 = alloca i1, i1 0
+  %nop8020 = alloca i1, i1 0
+  %nop8021 = alloca i1, i1 0
+  %nop8022 = alloca i1, i1 0
+  %nop8023 = alloca i1, i1 0
+  %nop8024 = alloca i1, i1 0
+  %nop8025 = alloca i1, i1 0
+  %nop8026 = alloca i1, i1 0
+  %nop8027 = alloca i1, i1 0
+  %nop8028 = alloca i1, i1 0
+  %nop8029 = alloca i1, i1 0
+  %nop8030 = alloca i1, i1 0
+  %nop8031 = alloca i1, i1 0
+  %nop8032 = alloca i1, i1 0
+  %nop8033 = alloca i1, i1 0
+  %nop8034 = alloca i1, i1 0
+  %nop8035 = alloca i1, i1 0
+  %nop8036 = alloca i1, i1 0
+  %nop8037 = alloca i1, i1 0
+  %nop8038 = alloca i1, i1 0
+  %nop8039 = alloca i1, i1 0
+  %nop8040 = alloca i1, i1 0
+  %nop8041 = alloca i1, i1 0
+  %nop8042 = alloca i1, i1 0
+  %nop8043 = alloca i1, i1 0
+  %nop8044 = alloca i1, i1 0
+  %nop8045 = alloca i1, i1 0
+  %nop8046 = alloca i1, i1 0
+  %nop8047 = alloca i1, i1 0
+  %nop8048 = alloca i1, i1 0
+  %nop8049 = alloca i1, i1 0
+  %nop8050 = alloca i1, i1 0
+  %nop8051 = alloca i1, i1 0
+  %nop8052 = alloca i1, i1 0
+  %nop8053 = alloca i1, i1 0
+  %nop8054 = alloca i1, i1 0
+  %nop8055 = alloca i1, i1 0
+  %nop8056 = alloca i1, i1 0
+  %nop8057 = alloca i1, i1 0
+  %nop8058 = alloca i1, i1 0
+  %nop8059 = alloca i1, i1 0
+  %nop8060 = alloca i1, i1 0
+  %nop8061 = alloca i1, i1 0
+  %nop8062 = alloca i1, i1 0
+  %nop8063 = alloca i1, i1 0
+  %nop8064 = alloca i1, i1 0
+  %nop8065 = alloca i1, i1 0
+  %nop8066 = alloca i1, i1 0
+  %nop8067 = alloca i1, i1 0
+  %nop8068 = alloca i1, i1 0
+  %nop8069 = alloca i1, i1 0
+  %nop8070 = alloca i1, i1 0
+  %nop8071 = alloca i1, i1 0
+  %nop8072 = alloca i1, i1 0
+  %nop8073 = alloca i1, i1 0
+  %nop8074 = alloca i1, i1 0
+  %nop8075 = alloca i1, i1 0
+  %nop8076 = alloca i1, i1 0
+  %nop8077 = alloca i1, i1 0
+  %nop8078 = alloca i1, i1 0
+  %nop8079 = alloca i1, i1 0
+  %nop8080 = alloca i1, i1 0
+  %nop8081 = alloca i1, i1 0
+  %nop8082 = alloca i1, i1 0
+  %nop8083 = alloca i1, i1 0
+  %nop8084 = alloca i1, i1 0
+  %nop8085 = alloca i1, i1 0
+  %nop8086 = alloca i1, i1 0
+  %nop8087 = alloca i1, i1 0
+  %nop8088 = alloca i1, i1 0
+  %nop8089 = alloca i1, i1 0
+  %nop8090 = alloca i1, i1 0
+  %nop8091 = alloca i1, i1 0
+  %nop8092 = alloca i1, i1 0
+  %nop8093 = alloca i1, i1 0
+  %nop8094 = alloca i1, i1 0
+  %nop8095 = alloca i1, i1 0
+  %nop8096 = alloca i1, i1 0
+  %nop8097 = alloca i1, i1 0
+  %nop8098 = alloca i1, i1 0
+  %nop8099 = alloca i1, i1 0
+  %nop8100 = alloca i1, i1 0
+  %nop8101 = alloca i1, i1 0
+  %nop8102 = alloca i1, i1 0
+  %nop8103 = alloca i1, i1 0
+  %nop8104 = alloca i1, i1 0
+  %nop8105 = alloca i1, i1 0
+  %nop8106 = alloca i1, i1 0
+  %nop8107 = alloca i1, i1 0
+  %nop8108 = alloca i1, i1 0
+  %nop8109 = alloca i1, i1 0
+  %nop8110 = alloca i1, i1 0
+  %nop8111 = alloca i1, i1 0
+  %nop8112 = alloca i1, i1 0
+  %nop8113 = alloca i1, i1 0
+  %nop8114 = alloca i1, i1 0
+  %nop8115 = alloca i1, i1 0
+  %nop8116 = alloca i1, i1 0
+  %nop8117 = alloca i1, i1 0
+  %nop8118 = alloca i1, i1 0
+  %nop8119 = alloca i1, i1 0
+  %nop8120 = alloca i1, i1 0
+  %nop8121 = alloca i1, i1 0
+  %nop8122 = alloca i1, i1 0
+  %nop8123 = alloca i1, i1 0
+  %nop8124 = alloca i1, i1 0
+  %nop8125 = alloca i1, i1 0
+  %nop8126 = alloca i1, i1 0
+  %nop8127 = alloca i1, i1 0
+  %nop8128 = alloca i1, i1 0
+  %nop8129 = alloca i1, i1 0
+  %nop8130 = alloca i1, i1 0
+  %nop8131 = alloca i1, i1 0
+  %nop8132 = alloca i1, i1 0
+  %nop8133 = alloca i1, i1 0
+  %nop8134 = alloca i1, i1 0
+  %nop8135 = alloca i1, i1 0
+  %nop8136 = alloca i1, i1 0
+  %nop8137 = alloca i1, i1 0
+  %nop8138 = alloca i1, i1 0
+  %nop8139 = alloca i1, i1 0
+  %nop8140 = alloca i1, i1 0
+  %nop8141 = alloca i1, i1 0
+  %nop8142 = alloca i1, i1 0
+  %nop8143 = alloca i1, i1 0
+  %nop8144 = alloca i1, i1 0
+  %nop8145 = alloca i1, i1 0
+  %nop8146 = alloca i1, i1 0
+  %nop8147 = alloca i1, i1 0
+  %nop8148 = alloca i1, i1 0
+  %nop8149 = alloca i1, i1 0
+  %nop8150 = alloca i1, i1 0
+  %nop8151 = alloca i1, i1 0
+  %nop8152 = alloca i1, i1 0
+  %nop8153 = alloca i1, i1 0
+  %nop8154 = alloca i1, i1 0
+  %nop8155 = alloca i1, i1 0
+  %nop8156 = alloca i1, i1 0
+  %nop8157 = alloca i1, i1 0
+  %nop8158 = alloca i1, i1 0
+  %nop8159 = alloca i1, i1 0
+  %nop8160 = alloca i1, i1 0
+  %nop8161 = alloca i1, i1 0
+  %nop8162 = alloca i1, i1 0
+  %nop8163 = alloca i1, i1 0
+  %nop8164 = alloca i1, i1 0
+  %nop8165 = alloca i1, i1 0
+  %nop8166 = alloca i1, i1 0
+  %nop8167 = alloca i1, i1 0
+  %nop8168 = alloca i1, i1 0
+  %nop8169 = alloca i1, i1 0
+  %nop8170 = alloca i1, i1 0
+  %nop8171 = alloca i1, i1 0
+  %nop8172 = alloca i1, i1 0
+  %nop8173 = alloca i1, i1 0
+  %nop8174 = alloca i1, i1 0
+  %nop8175 = alloca i1, i1 0
+  %nop8176 = alloca i1, i1 0
+  %nop8177 = alloca i1, i1 0
+  %nop8178 = alloca i1, i1 0
+  %nop8179 = alloca i1, i1 0
+  %nop8180 = alloca i1, i1 0
+  %nop8181 = alloca i1, i1 0
+  %nop8182 = alloca i1, i1 0
+  %nop8183 = alloca i1, i1 0
+  %nop8184 = alloca i1, i1 0
+  %nop8185 = alloca i1, i1 0
+  %nop8186 = alloca i1, i1 0
+  %nop8187 = alloca i1, i1 0
+  %nop8188 = alloca i1, i1 0
+  %nop8189 = alloca i1, i1 0
+  %nop8190 = alloca i1, i1 0
+  %nop8191 = alloca i1, i1 0
+  %nop8192 = alloca i1, i1 0
+  %nop8193 = alloca i1, i1 0
+  %nop8194 = alloca i1, i1 0
+  %nop8195 = alloca i1, i1 0
+  %nop8196 = alloca i1, i1 0
+  %nop8197 = alloca i1, i1 0
+  %nop8198 = alloca i1, i1 0
+  %nop8199 = alloca i1, i1 0
+  %nop8200 = alloca i1, i1 0
+  %nop8201 = alloca i1, i1 0
+  %nop8202 = alloca i1, i1 0
+  %nop8203 = alloca i1, i1 0
+  %nop8204 = alloca i1, i1 0
+  %nop8205 = alloca i1, i1 0
+  %nop8206 = alloca i1, i1 0
+  %nop8207 = alloca i1, i1 0
+  %nop8208 = alloca i1, i1 0
+  %nop8209 = alloca i1, i1 0
+  %nop8210 = alloca i1, i1 0
+  %nop8211 = alloca i1, i1 0
+  %nop8212 = alloca i1, i1 0
+  %nop8213 = alloca i1, i1 0
+  %nop8214 = alloca i1, i1 0
+  %nop8215 = alloca i1, i1 0
+  %nop8216 = alloca i1, i1 0
+  %nop8217 = alloca i1, i1 0
+  %nop8218 = alloca i1, i1 0
+  %nop8219 = alloca i1, i1 0
+  %nop8220 = alloca i1, i1 0
+  %nop8221 = alloca i1, i1 0
+  %nop8222 = alloca i1, i1 0
+  %nop8223 = alloca i1, i1 0
+  %nop8224 = alloca i1, i1 0
+  %nop8225 = alloca i1, i1 0
+  %nop8226 = alloca i1, i1 0
+  %nop8227 = alloca i1, i1 0
+  %nop8228 = alloca i1, i1 0
+  %nop8229 = alloca i1, i1 0
+  %nop8230 = alloca i1, i1 0
+  %nop8231 = alloca i1, i1 0
+  %nop8232 = alloca i1, i1 0
+  %nop8233 = alloca i1, i1 0
+  %nop8234 = alloca i1, i1 0
+  %nop8235 = alloca i1, i1 0
+  %nop8236 = alloca i1, i1 0
+  %nop8237 = alloca i1, i1 0
+  %nop8238 = alloca i1, i1 0
+  %nop8239 = alloca i1, i1 0
+  %nop8240 = alloca i1, i1 0
+  %nop8241 = alloca i1, i1 0
+  %nop8242 = alloca i1, i1 0
+  %nop8243 = alloca i1, i1 0
+  %nop8244 = alloca i1, i1 0
+  %nop8245 = alloca i1, i1 0
+  %nop8246 = alloca i1, i1 0
+  %nop8247 = alloca i1, i1 0
+  %nop8248 = alloca i1, i1 0
+  %nop8249 = alloca i1, i1 0
+  %nop8250 = alloca i1, i1 0
+  %nop8251 = alloca i1, i1 0
+  %nop8252 = alloca i1, i1 0
+  %nop8253 = alloca i1, i1 0
+  %nop8254 = alloca i1, i1 0
+  %nop8255 = alloca i1, i1 0
+  %nop8256 = alloca i1, i1 0
+  %nop8257 = alloca i1, i1 0
+  %nop8258 = alloca i1, i1 0
+  %nop8259 = alloca i1, i1 0
+  %nop8260 = alloca i1, i1 0
+  %nop8261 = alloca i1, i1 0
+  %nop8262 = alloca i1, i1 0
+  %nop8263 = alloca i1, i1 0
+  %nop8264 = alloca i1, i1 0
+  %nop8265 = alloca i1, i1 0
+  %nop8266 = alloca i1, i1 0
+  %nop8267 = alloca i1, i1 0
+  %nop8268 = alloca i1, i1 0
+  %nop8269 = alloca i1, i1 0
+  %nop8270 = alloca i1, i1 0
+  %nop8271 = alloca i1, i1 0
+  %nop8272 = alloca i1, i1 0
+  %nop8273 = alloca i1, i1 0
+  %nop8274 = alloca i1, i1 0
+  %nop8275 = alloca i1, i1 0
+  %nop8276 = alloca i1, i1 0
+  %nop8277 = alloca i1, i1 0
+  %nop8278 = alloca i1, i1 0
+  %nop8279 = alloca i1, i1 0
+  %nop8280 = alloca i1, i1 0
+  %nop8281 = alloca i1, i1 0
+  %nop8282 = alloca i1, i1 0
+  %nop8283 = alloca i1, i1 0
+  %nop8284 = alloca i1, i1 0
+  %nop8285 = alloca i1, i1 0
+  %nop8286 = alloca i1, i1 0
+  %nop8287 = alloca i1, i1 0
+  %nop8288 = alloca i1, i1 0
+  %nop8289 = alloca i1, i1 0
+  %nop8290 = alloca i1, i1 0
+  %nop8291 = alloca i1, i1 0
+  %nop8292 = alloca i1, i1 0
+  %nop8293 = alloca i1, i1 0
+  %nop8294 = alloca i1, i1 0
+  %nop8295 = alloca i1, i1 0
+  %nop8296 = alloca i1, i1 0
+  %nop8297 = alloca i1, i1 0
+  %nop8298 = alloca i1, i1 0
+  %nop8299 = alloca i1, i1 0
+  %nop8300 = alloca i1, i1 0
+  %nop8301 = alloca i1, i1 0
+  %nop8302 = alloca i1, i1 0
+  %nop8303 = alloca i1, i1 0
+  %nop8304 = alloca i1, i1 0
+  %nop8305 = alloca i1, i1 0
+  %nop8306 = alloca i1, i1 0
+  %nop8307 = alloca i1, i1 0
+  %nop8308 = alloca i1, i1 0
+  %nop8309 = alloca i1, i1 0
+  %nop8310 = alloca i1, i1 0
+  %nop8311 = alloca i1, i1 0
+  %nop8312 = alloca i1, i1 0
+  %nop8313 = alloca i1, i1 0
+  %nop8314 = alloca i1, i1 0
+  %nop8315 = alloca i1, i1 0
+  %nop8316 = alloca i1, i1 0
+  %nop8317 = alloca i1, i1 0
+  %nop8318 = alloca i1, i1 0
+  %nop8319 = alloca i1, i1 0
+  %nop8320 = alloca i1, i1 0
+  %nop8321 = alloca i1, i1 0
+  %nop8322 = alloca i1, i1 0
+  %nop8323 = alloca i1, i1 0
+  %nop8324 = alloca i1, i1 0
+  %nop8325 = alloca i1, i1 0
+  %nop8326 = alloca i1, i1 0
+  %nop8327 = alloca i1, i1 0
+  %nop8328 = alloca i1, i1 0
+  %nop8329 = alloca i1, i1 0
+  %nop8330 = alloca i1, i1 0
+  %nop8331 = alloca i1, i1 0
+  %nop8332 = alloca i1, i1 0
+  %nop8333 = alloca i1, i1 0
+  %nop8334 = alloca i1, i1 0
+  %nop8335 = alloca i1, i1 0
+  %nop8336 = alloca i1, i1 0
+  %nop8337 = alloca i1, i1 0
+  %nop8338 = alloca i1, i1 0
+  %nop8339 = alloca i1, i1 0
+  %nop8340 = alloca i1, i1 0
+  %nop8341 = alloca i1, i1 0
+  %nop8342 = alloca i1, i1 0
+  %nop8343 = alloca i1, i1 0
+  %nop8344 = alloca i1, i1 0
+  %nop8345 = alloca i1, i1 0
+  %nop8346 = alloca i1, i1 0
+  %nop8347 = alloca i1, i1 0
+  %nop8348 = alloca i1, i1 0
+  %nop8349 = alloca i1, i1 0
+  %nop8350 = alloca i1, i1 0
+  %nop8351 = alloca i1, i1 0
+  %nop8352 = alloca i1, i1 0
+  %nop8353 = alloca i1, i1 0
+  %nop8354 = alloca i1, i1 0
+  %nop8355 = alloca i1, i1 0
+  %nop8356 = alloca i1, i1 0
+  %nop8357 = alloca i1, i1 0
+  %nop8358 = alloca i1, i1 0
+  %nop8359 = alloca i1, i1 0
+  %nop8360 = alloca i1, i1 0
+  %nop8361 = alloca i1, i1 0
+  %nop8362 = alloca i1, i1 0
+  %nop8363 = alloca i1, i1 0
+  %nop8364 = alloca i1, i1 0
+  %nop8365 = alloca i1, i1 0
+  %nop8366 = alloca i1, i1 0
+  %nop8367 = alloca i1, i1 0
+  %nop8368 = alloca i1, i1 0
+  %nop8369 = alloca i1, i1 0
+  %nop8370 = alloca i1, i1 0
+  %nop8371 = alloca i1, i1 0
+  %nop8372 = alloca i1, i1 0
+  %nop8373 = alloca i1, i1 0
+  %nop8374 = alloca i1, i1 0
+  %nop8375 = alloca i1, i1 0
+  %nop8376 = alloca i1, i1 0
+  %nop8377 = alloca i1, i1 0
+  %nop8378 = alloca i1, i1 0
+  %nop8379 = alloca i1, i1 0
+  %nop8380 = alloca i1, i1 0
+  %nop8381 = alloca i1, i1 0
+  %nop8382 = alloca i1, i1 0
+  %nop8383 = alloca i1, i1 0
+  %nop8384 = alloca i1, i1 0
+  %nop8385 = alloca i1, i1 0
+  %nop8386 = alloca i1, i1 0
+  %nop8387 = alloca i1, i1 0
+  %nop8388 = alloca i1, i1 0
+  %nop8389 = alloca i1, i1 0
+  %nop8390 = alloca i1, i1 0
+  %nop8391 = alloca i1, i1 0
+  %nop8392 = alloca i1, i1 0
+  %nop8393 = alloca i1, i1 0
+  %nop8394 = alloca i1, i1 0
+  %nop8395 = alloca i1, i1 0
+  %nop8396 = alloca i1, i1 0
+  %nop8397 = alloca i1, i1 0
+  %nop8398 = alloca i1, i1 0
+  %nop8399 = alloca i1, i1 0
+  %nop8400 = alloca i1, i1 0
+  %nop8401 = alloca i1, i1 0
+  %nop8402 = alloca i1, i1 0
+  %nop8403 = alloca i1, i1 0
+  %nop8404 = alloca i1, i1 0
+  %nop8405 = alloca i1, i1 0
+  %nop8406 = alloca i1, i1 0
+  %nop8407 = alloca i1, i1 0
+  %nop8408 = alloca i1, i1 0
+  %nop8409 = alloca i1, i1 0
+  %nop8410 = alloca i1, i1 0
+  %nop8411 = alloca i1, i1 0
+  %nop8412 = alloca i1, i1 0
+  %nop8413 = alloca i1, i1 0
+  %nop8414 = alloca i1, i1 0
+  %nop8415 = alloca i1, i1 0
+  %nop8416 = alloca i1, i1 0
+  %nop8417 = alloca i1, i1 0
+  %nop8418 = alloca i1, i1 0
+  %nop8419 = alloca i1, i1 0
+  %nop8420 = alloca i1, i1 0
+  %nop8421 = alloca i1, i1 0
+  %nop8422 = alloca i1, i1 0
+  %nop8423 = alloca i1, i1 0
+  %nop8424 = alloca i1, i1 0
+  %nop8425 = alloca i1, i1 0
+  %nop8426 = alloca i1, i1 0
+  %nop8427 = alloca i1, i1 0
+  %nop8428 = alloca i1, i1 0
+  %nop8429 = alloca i1, i1 0
+  %nop8430 = alloca i1, i1 0
+  %nop8431 = alloca i1, i1 0
+  %nop8432 = alloca i1, i1 0
+  %nop8433 = alloca i1, i1 0
+  %nop8434 = alloca i1, i1 0
+  %nop8435 = alloca i1, i1 0
+  %nop8436 = alloca i1, i1 0
+  %nop8437 = alloca i1, i1 0
+  %nop8438 = alloca i1, i1 0
+  %nop8439 = alloca i1, i1 0
+  %nop8440 = alloca i1, i1 0
+  %nop8441 = alloca i1, i1 0
+  %nop8442 = alloca i1, i1 0
+  %nop8443 = alloca i1, i1 0
+  %nop8444 = alloca i1, i1 0
+  %nop8445 = alloca i1, i1 0
+  %nop8446 = alloca i1, i1 0
+  %nop8447 = alloca i1, i1 0
+  %nop8448 = alloca i1, i1 0
+  %nop8449 = alloca i1, i1 0
+  %nop8450 = alloca i1, i1 0
+  %nop8451 = alloca i1, i1 0
+  %nop8452 = alloca i1, i1 0
+  %nop8453 = alloca i1, i1 0
+  %nop8454 = alloca i1, i1 0
+  %nop8455 = alloca i1, i1 0
+  %nop8456 = alloca i1, i1 0
+  %nop8457 = alloca i1, i1 0
+  %nop8458 = alloca i1, i1 0
+  %nop8459 = alloca i1, i1 0
+  %nop8460 = alloca i1, i1 0
+  %nop8461 = alloca i1, i1 0
+  %nop8462 = alloca i1, i1 0
+  %nop8463 = alloca i1, i1 0
+  %nop8464 = alloca i1, i1 0
+  %nop8465 = alloca i1, i1 0
+  %nop8466 = alloca i1, i1 0
+  %nop8467 = alloca i1, i1 0
+  %nop8468 = alloca i1, i1 0
+  %nop8469 = alloca i1, i1 0
+  %nop8470 = alloca i1, i1 0
+  %nop8471 = alloca i1, i1 0
+  %nop8472 = alloca i1, i1 0
+  %nop8473 = alloca i1, i1 0
+  %nop8474 = alloca i1, i1 0
+  %nop8475 = alloca i1, i1 0
+  %nop8476 = alloca i1, i1 0
+  %nop8477 = alloca i1, i1 0
+  %nop8478 = alloca i1, i1 0
+  %nop8479 = alloca i1, i1 0
+  %nop8480 = alloca i1, i1 0
+  %nop8481 = alloca i1, i1 0
+  %nop8482 = alloca i1, i1 0
+  %nop8483 = alloca i1, i1 0
+  %nop8484 = alloca i1, i1 0
+  %nop8485 = alloca i1, i1 0
+  %nop8486 = alloca i1, i1 0
+  %nop8487 = alloca i1, i1 0
+  %nop8488 = alloca i1, i1 0
+  %nop8489 = alloca i1, i1 0
+  %nop8490 = alloca i1, i1 0
+  %nop8491 = alloca i1, i1 0
+  %nop8492 = alloca i1, i1 0
+  %nop8493 = alloca i1, i1 0
+  %nop8494 = alloca i1, i1 0
+  %nop8495 = alloca i1, i1 0
+  %nop8496 = alloca i1, i1 0
+  %nop8497 = alloca i1, i1 0
+  %nop8498 = alloca i1, i1 0
+  %nop8499 = alloca i1, i1 0
+  %nop8500 = alloca i1, i1 0
+  %nop8501 = alloca i1, i1 0
+  %nop8502 = alloca i1, i1 0
+  %nop8503 = alloca i1, i1 0
+  %nop8504 = alloca i1, i1 0
+  %nop8505 = alloca i1, i1 0
+  %nop8506 = alloca i1, i1 0
+  %nop8507 = alloca i1, i1 0
+  %nop8508 = alloca i1, i1 0
+  %nop8509 = alloca i1, i1 0
+  %nop8510 = alloca i1, i1 0
+  %nop8511 = alloca i1, i1 0
+  %nop8512 = alloca i1, i1 0
+  %nop8513 = alloca i1, i1 0
+  %nop8514 = alloca i1, i1 0
+  %nop8515 = alloca i1, i1 0
+  %nop8516 = alloca i1, i1 0
+  %nop8517 = alloca i1, i1 0
+  %nop8518 = alloca i1, i1 0
+  %nop8519 = alloca i1, i1 0
+  %nop8520 = alloca i1, i1 0
+  %nop8521 = alloca i1, i1 0
+  %nop8522 = alloca i1, i1 0
+  %nop8523 = alloca i1, i1 0
+  %nop8524 = alloca i1, i1 0
+  %nop8525 = alloca i1, i1 0
+  %nop8526 = alloca i1, i1 0
+  %nop8527 = alloca i1, i1 0
+  %nop8528 = alloca i1, i1 0
+  %nop8529 = alloca i1, i1 0
+  %nop8530 = alloca i1, i1 0
+  %nop8531 = alloca i1, i1 0
+  %nop8532 = alloca i1, i1 0
+  %nop8533 = alloca i1, i1 0
+  %nop8534 = alloca i1, i1 0
+  %nop8535 = alloca i1, i1 0
+  %nop8536 = alloca i1, i1 0
+  %nop8537 = alloca i1, i1 0
+  %nop8538 = alloca i1, i1 0
+  %nop8539 = alloca i1, i1 0
+  %nop8540 = alloca i1, i1 0
+  %nop8541 = alloca i1, i1 0
+  %nop8542 = alloca i1, i1 0
+  %nop8543 = alloca i1, i1 0
+  %nop8544 = alloca i1, i1 0
+  %nop8545 = alloca i1, i1 0
+  %nop8546 = alloca i1, i1 0
+  %nop8547 = alloca i1, i1 0
+  %nop8548 = alloca i1, i1 0
+  %nop8549 = alloca i1, i1 0
+  %nop8550 = alloca i1, i1 0
+  %nop8551 = alloca i1, i1 0
+  %nop8552 = alloca i1, i1 0
+  %nop8553 = alloca i1, i1 0
+  %nop8554 = alloca i1, i1 0
+  %nop8555 = alloca i1, i1 0
+  %nop8556 = alloca i1, i1 0
+  %nop8557 = alloca i1, i1 0
+  %nop8558 = alloca i1, i1 0
+  %nop8559 = alloca i1, i1 0
+  %nop8560 = alloca i1, i1 0
+  %nop8561 = alloca i1, i1 0
+  %nop8562 = alloca i1, i1 0
+  %nop8563 = alloca i1, i1 0
+  %nop8564 = alloca i1, i1 0
+  %nop8565 = alloca i1, i1 0
+  %nop8566 = alloca i1, i1 0
+  %nop8567 = alloca i1, i1 0
+  %nop8568 = alloca i1, i1 0
+  %nop8569 = alloca i1, i1 0
+  %nop8570 = alloca i1, i1 0
+  %nop8571 = alloca i1, i1 0
+  %nop8572 = alloca i1, i1 0
+  %nop8573 = alloca i1, i1 0
+  %nop8574 = alloca i1, i1 0
+  %nop8575 = alloca i1, i1 0
+  %nop8576 = alloca i1, i1 0
+  %nop8577 = alloca i1, i1 0
+  %nop8578 = alloca i1, i1 0
+  %nop8579 = alloca i1, i1 0
+  %nop8580 = alloca i1, i1 0
+  %nop8581 = alloca i1, i1 0
+  %nop8582 = alloca i1, i1 0
+  %nop8583 = alloca i1, i1 0
+  %nop8584 = alloca i1, i1 0
+  %nop8585 = alloca i1, i1 0
+  %nop8586 = alloca i1, i1 0
+  %nop8587 = alloca i1, i1 0
+  %nop8588 = alloca i1, i1 0
+  %nop8589 = alloca i1, i1 0
+  %nop8590 = alloca i1, i1 0
+  %nop8591 = alloca i1, i1 0
+  %nop8592 = alloca i1, i1 0
+  %nop8593 = alloca i1, i1 0
+  %nop8594 = alloca i1, i1 0
+  %nop8595 = alloca i1, i1 0
+  %nop8596 = alloca i1, i1 0
+  %nop8597 = alloca i1, i1 0
+  %nop8598 = alloca i1, i1 0
+  %nop8599 = alloca i1, i1 0
+  %nop8600 = alloca i1, i1 0
+  %nop8601 = alloca i1, i1 0
+  %nop8602 = alloca i1, i1 0
+  %nop8603 = alloca i1, i1 0
+  %nop8604 = alloca i1, i1 0
+  %nop8605 = alloca i1, i1 0
+  %nop8606 = alloca i1, i1 0
+  %nop8607 = alloca i1, i1 0
+  %nop8608 = alloca i1, i1 0
+  %nop8609 = alloca i1, i1 0
+  %nop8610 = alloca i1, i1 0
+  %nop8611 = alloca i1, i1 0
+  %nop8612 = alloca i1, i1 0
+  %nop8613 = alloca i1, i1 0
+  %nop8614 = alloca i1, i1 0
+  %nop8615 = alloca i1, i1 0
+  %nop8616 = alloca i1, i1 0
+  %nop8617 = alloca i1, i1 0
+  %nop8618 = alloca i1, i1 0
+  %nop8619 = alloca i1, i1 0
+  %nop8620 = alloca i1, i1 0
+  %nop8621 = alloca i1, i1 0
+  %nop8622 = alloca i1, i1 0
+  %nop8623 = alloca i1, i1 0
+  %nop8624 = alloca i1, i1 0
+  %nop8625 = alloca i1, i1 0
+  %nop8626 = alloca i1, i1 0
+  %nop8627 = alloca i1, i1 0
+  %nop8628 = alloca i1, i1 0
+  %nop8629 = alloca i1, i1 0
+  %nop8630 = alloca i1, i1 0
+  %nop8631 = alloca i1, i1 0
+  %nop8632 = alloca i1, i1 0
+  %nop8633 = alloca i1, i1 0
+  %nop8634 = alloca i1, i1 0
+  %nop8635 = alloca i1, i1 0
+  %nop8636 = alloca i1, i1 0
+  %nop8637 = alloca i1, i1 0
+  %nop8638 = alloca i1, i1 0
+  %nop8639 = alloca i1, i1 0
+  %nop8640 = alloca i1, i1 0
+  %nop8641 = alloca i1, i1 0
+  %nop8642 = alloca i1, i1 0
+  %nop8643 = alloca i1, i1 0
+  %nop8644 = alloca i1, i1 0
+  %nop8645 = alloca i1, i1 0
+  %nop8646 = alloca i1, i1 0
+  %nop8647 = alloca i1, i1 0
+  %nop8648 = alloca i1, i1 0
+  %nop8649 = alloca i1, i1 0
+  %nop8650 = alloca i1, i1 0
+  %nop8651 = alloca i1, i1 0
+  %nop8652 = alloca i1, i1 0
+  %nop8653 = alloca i1, i1 0
+  %nop8654 = alloca i1, i1 0
+  %nop8655 = alloca i1, i1 0
+  %nop8656 = alloca i1, i1 0
+  %nop8657 = alloca i1, i1 0
+  %nop8658 = alloca i1, i1 0
+  %nop8659 = alloca i1, i1 0
+  %nop8660 = alloca i1, i1 0
+  %nop8661 = alloca i1, i1 0
+  %nop8662 = alloca i1, i1 0
+  %nop8663 = alloca i1, i1 0
+  %nop8664 = alloca i1, i1 0
+  %nop8665 = alloca i1, i1 0
+  %nop8666 = alloca i1, i1 0
+  %nop8667 = alloca i1, i1 0
+  %nop8668 = alloca i1, i1 0
+  %nop8669 = alloca i1, i1 0
+  %nop8670 = alloca i1, i1 0
+  %nop8671 = alloca i1, i1 0
+  %nop8672 = alloca i1, i1 0
+  %nop8673 = alloca i1, i1 0
+  %nop8674 = alloca i1, i1 0
+  %nop8675 = alloca i1, i1 0
+  %nop8676 = alloca i1, i1 0
+  %nop8677 = alloca i1, i1 0
+  %nop8678 = alloca i1, i1 0
+  %nop8679 = alloca i1, i1 0
+  %nop8680 = alloca i1, i1 0
+  %nop8681 = alloca i1, i1 0
+  %nop8682 = alloca i1, i1 0
+  %nop8683 = alloca i1, i1 0
+  %nop8684 = alloca i1, i1 0
+  %nop8685 = alloca i1, i1 0
+  %nop8686 = alloca i1, i1 0
+  %nop8687 = alloca i1, i1 0
+  %nop8688 = alloca i1, i1 0
+  %nop8689 = alloca i1, i1 0
+  %nop8690 = alloca i1, i1 0
+  %nop8691 = alloca i1, i1 0
+  %nop8692 = alloca i1, i1 0
+  %nop8693 = alloca i1, i1 0
+  %nop8694 = alloca i1, i1 0
+  %nop8695 = alloca i1, i1 0
+  %nop8696 = alloca i1, i1 0
+  %nop8697 = alloca i1, i1 0
+  %nop8698 = alloca i1, i1 0
+  %nop8699 = alloca i1, i1 0
+  %nop8700 = alloca i1, i1 0
+  %nop8701 = alloca i1, i1 0
+  %nop8702 = alloca i1, i1 0
+  %nop8703 = alloca i1, i1 0
+  %nop8704 = alloca i1, i1 0
+  %nop8705 = alloca i1, i1 0
+  %nop8706 = alloca i1, i1 0
+  %nop8707 = alloca i1, i1 0
+  %nop8708 = alloca i1, i1 0
+  %nop8709 = alloca i1, i1 0
+  %nop8710 = alloca i1, i1 0
+  %nop8711 = alloca i1, i1 0
+  %nop8712 = alloca i1, i1 0
+  %nop8713 = alloca i1, i1 0
+  %nop8714 = alloca i1, i1 0
+  %nop8715 = alloca i1, i1 0
+  %nop8716 = alloca i1, i1 0
+  %nop8717 = alloca i1, i1 0
+  %nop8718 = alloca i1, i1 0
+  %nop8719 = alloca i1, i1 0
+  %nop8720 = alloca i1, i1 0
+  %nop8721 = alloca i1, i1 0
+  %nop8722 = alloca i1, i1 0
+  %nop8723 = alloca i1, i1 0
+  %nop8724 = alloca i1, i1 0
+  %nop8725 = alloca i1, i1 0
+  %nop8726 = alloca i1, i1 0
+  %nop8727 = alloca i1, i1 0
+  %nop8728 = alloca i1, i1 0
+  %nop8729 = alloca i1, i1 0
+  %nop8730 = alloca i1, i1 0
+  %nop8731 = alloca i1, i1 0
+  %nop8732 = alloca i1, i1 0
+  %nop8733 = alloca i1, i1 0
+  %nop8734 = alloca i1, i1 0
+  %nop8735 = alloca i1, i1 0
+  %nop8736 = alloca i1, i1 0
+  %nop8737 = alloca i1, i1 0
+  %nop8738 = alloca i1, i1 0
+  %nop8739 = alloca i1, i1 0
+  %nop8740 = alloca i1, i1 0
+  %nop8741 = alloca i1, i1 0
+  %nop8742 = alloca i1, i1 0
+  %nop8743 = alloca i1, i1 0
+  %nop8744 = alloca i1, i1 0
+  %nop8745 = alloca i1, i1 0
+  %nop8746 = alloca i1, i1 0
+  %nop8747 = alloca i1, i1 0
+  %nop8748 = alloca i1, i1 0
+  %nop8749 = alloca i1, i1 0
+  %nop8750 = alloca i1, i1 0
+  %nop8751 = alloca i1, i1 0
+  %nop8752 = alloca i1, i1 0
+  %nop8753 = alloca i1, i1 0
+  %nop8754 = alloca i1, i1 0
+  %nop8755 = alloca i1, i1 0
+  %nop8756 = alloca i1, i1 0
+  %nop8757 = alloca i1, i1 0
+  %nop8758 = alloca i1, i1 0
+  %nop8759 = alloca i1, i1 0
+  %nop8760 = alloca i1, i1 0
+  %nop8761 = alloca i1, i1 0
+  %nop8762 = alloca i1, i1 0
+  %nop8763 = alloca i1, i1 0
+  %nop8764 = alloca i1, i1 0
+  %nop8765 = alloca i1, i1 0
+  %nop8766 = alloca i1, i1 0
+  %nop8767 = alloca i1, i1 0
+  %nop8768 = alloca i1, i1 0
+  %nop8769 = alloca i1, i1 0
+  %nop8770 = alloca i1, i1 0
+  %nop8771 = alloca i1, i1 0
+  %nop8772 = alloca i1, i1 0
+  %nop8773 = alloca i1, i1 0
+  %nop8774 = alloca i1, i1 0
+  %nop8775 = alloca i1, i1 0
+  %nop8776 = alloca i1, i1 0
+  %nop8777 = alloca i1, i1 0
+  %nop8778 = alloca i1, i1 0
+  %nop8779 = alloca i1, i1 0
+  %nop8780 = alloca i1, i1 0
+  %nop8781 = alloca i1, i1 0
+  %nop8782 = alloca i1, i1 0
+  %nop8783 = alloca i1, i1 0
+  %nop8784 = alloca i1, i1 0
+  %nop8785 = alloca i1, i1 0
+  %nop8786 = alloca i1, i1 0
+  %nop8787 = alloca i1, i1 0
+  %nop8788 = alloca i1, i1 0
+  %nop8789 = alloca i1, i1 0
+  %nop8790 = alloca i1, i1 0
+  %nop8791 = alloca i1, i1 0
+  %nop8792 = alloca i1, i1 0
+  %nop8793 = alloca i1, i1 0
+  %nop8794 = alloca i1, i1 0
+  %nop8795 = alloca i1, i1 0
+  %nop8796 = alloca i1, i1 0
+  %nop8797 = alloca i1, i1 0
+  %nop8798 = alloca i1, i1 0
+  %nop8799 = alloca i1, i1 0
+  %nop8800 = alloca i1, i1 0
+  %nop8801 = alloca i1, i1 0
+  %nop8802 = alloca i1, i1 0
+  %nop8803 = alloca i1, i1 0
+  %nop8804 = alloca i1, i1 0
+  %nop8805 = alloca i1, i1 0
+  %nop8806 = alloca i1, i1 0
+  %nop8807 = alloca i1, i1 0
+  %nop8808 = alloca i1, i1 0
+  %nop8809 = alloca i1, i1 0
+  %nop8810 = alloca i1, i1 0
+  %nop8811 = alloca i1, i1 0
+  %nop8812 = alloca i1, i1 0
+  %nop8813 = alloca i1, i1 0
+  %nop8814 = alloca i1, i1 0
+  %nop8815 = alloca i1, i1 0
+  %nop8816 = alloca i1, i1 0
+  %nop8817 = alloca i1, i1 0
+  %nop8818 = alloca i1, i1 0
+  %nop8819 = alloca i1, i1 0
+  %nop8820 = alloca i1, i1 0
+  %nop8821 = alloca i1, i1 0
+  %nop8822 = alloca i1, i1 0
+  %nop8823 = alloca i1, i1 0
+  %nop8824 = alloca i1, i1 0
+  %nop8825 = alloca i1, i1 0
+  %nop8826 = alloca i1, i1 0
+  %nop8827 = alloca i1, i1 0
+  %nop8828 = alloca i1, i1 0
+  %nop8829 = alloca i1, i1 0
+  %nop8830 = alloca i1, i1 0
+  %nop8831 = alloca i1, i1 0
+  %nop8832 = alloca i1, i1 0
+  %nop8833 = alloca i1, i1 0
+  %nop8834 = alloca i1, i1 0
+  %nop8835 = alloca i1, i1 0
+  %nop8836 = alloca i1, i1 0
+  %nop8837 = alloca i1, i1 0
+  %nop8838 = alloca i1, i1 0
+  %nop8839 = alloca i1, i1 0
+  %nop8840 = alloca i1, i1 0
+  %nop8841 = alloca i1, i1 0
+  %nop8842 = alloca i1, i1 0
+  %nop8843 = alloca i1, i1 0
+  %nop8844 = alloca i1, i1 0
+  %nop8845 = alloca i1, i1 0
+  %nop8846 = alloca i1, i1 0
+  %nop8847 = alloca i1, i1 0
+  %nop8848 = alloca i1, i1 0
+  %nop8849 = alloca i1, i1 0
+  %nop8850 = alloca i1, i1 0
+  %nop8851 = alloca i1, i1 0
+  %nop8852 = alloca i1, i1 0
+  %nop8853 = alloca i1, i1 0
+  %nop8854 = alloca i1, i1 0
+  %nop8855 = alloca i1, i1 0
+  %nop8856 = alloca i1, i1 0
+  %nop8857 = alloca i1, i1 0
+  %nop8858 = alloca i1, i1 0
+  %nop8859 = alloca i1, i1 0
+  %nop8860 = alloca i1, i1 0
+  %nop8861 = alloca i1, i1 0
+  %nop8862 = alloca i1, i1 0
+  %nop8863 = alloca i1, i1 0
+  %nop8864 = alloca i1, i1 0
+  %nop8865 = alloca i1, i1 0
+  %nop8866 = alloca i1, i1 0
+  %nop8867 = alloca i1, i1 0
+  %nop8868 = alloca i1, i1 0
+  %nop8869 = alloca i1, i1 0
+  %nop8870 = alloca i1, i1 0
+  %nop8871 = alloca i1, i1 0
+  %nop8872 = alloca i1, i1 0
+  %nop8873 = alloca i1, i1 0
+  %nop8874 = alloca i1, i1 0
+  %nop8875 = alloca i1, i1 0
+  %nop8876 = alloca i1, i1 0
+  %nop8877 = alloca i1, i1 0
+  %nop8878 = alloca i1, i1 0
+  %nop8879 = alloca i1, i1 0
+  %nop8880 = alloca i1, i1 0
+  %nop8881 = alloca i1, i1 0
+  %nop8882 = alloca i1, i1 0
+  %nop8883 = alloca i1, i1 0
+  %nop8884 = alloca i1, i1 0
+  %nop8885 = alloca i1, i1 0
+  %nop8886 = alloca i1, i1 0
+  %nop8887 = alloca i1, i1 0
+  %nop8888 = alloca i1, i1 0
+  %nop8889 = alloca i1, i1 0
+  %nop8890 = alloca i1, i1 0
+  %nop8891 = alloca i1, i1 0
+  %nop8892 = alloca i1, i1 0
+  %nop8893 = alloca i1, i1 0
+  %nop8894 = alloca i1, i1 0
+  %nop8895 = alloca i1, i1 0
+  %nop8896 = alloca i1, i1 0
+  %nop8897 = alloca i1, i1 0
+  %nop8898 = alloca i1, i1 0
+  %nop8899 = alloca i1, i1 0
+  %nop8900 = alloca i1, i1 0
+  %nop8901 = alloca i1, i1 0
+  %nop8902 = alloca i1, i1 0
+  %nop8903 = alloca i1, i1 0
+  %nop8904 = alloca i1, i1 0
+  %nop8905 = alloca i1, i1 0
+  %nop8906 = alloca i1, i1 0
+  %nop8907 = alloca i1, i1 0
+  %nop8908 = alloca i1, i1 0
+  %nop8909 = alloca i1, i1 0
+  %nop8910 = alloca i1, i1 0
+  %nop8911 = alloca i1, i1 0
+  %nop8912 = alloca i1, i1 0
+  %nop8913 = alloca i1, i1 0
+  %nop8914 = alloca i1, i1 0
+  %nop8915 = alloca i1, i1 0
+  %nop8916 = alloca i1, i1 0
+  %nop8917 = alloca i1, i1 0
+  %nop8918 = alloca i1, i1 0
+  %nop8919 = alloca i1, i1 0
+  %nop8920 = alloca i1, i1 0
+  %nop8921 = alloca i1, i1 0
+  %nop8922 = alloca i1, i1 0
+  %nop8923 = alloca i1, i1 0
+  %nop8924 = alloca i1, i1 0
+  %nop8925 = alloca i1, i1 0
+  %nop8926 = alloca i1, i1 0
+  %nop8927 = alloca i1, i1 0
+  %nop8928 = alloca i1, i1 0
+  %nop8929 = alloca i1, i1 0
+  %nop8930 = alloca i1, i1 0
+  %nop8931 = alloca i1, i1 0
+  %nop8932 = alloca i1, i1 0
+  %nop8933 = alloca i1, i1 0
+  %nop8934 = alloca i1, i1 0
+  %nop8935 = alloca i1, i1 0
+  %nop8936 = alloca i1, i1 0
+  %nop8937 = alloca i1, i1 0
+  %nop8938 = alloca i1, i1 0
+  %nop8939 = alloca i1, i1 0
+  %nop8940 = alloca i1, i1 0
+  %nop8941 = alloca i1, i1 0
+  %nop8942 = alloca i1, i1 0
+  %nop8943 = alloca i1, i1 0
+  %nop8944 = alloca i1, i1 0
+  %nop8945 = alloca i1, i1 0
+  %nop8946 = alloca i1, i1 0
+  %nop8947 = alloca i1, i1 0
+  %nop8948 = alloca i1, i1 0
+  %nop8949 = alloca i1, i1 0
+  %nop8950 = alloca i1, i1 0
+  %nop8951 = alloca i1, i1 0
+  %nop8952 = alloca i1, i1 0
+  %nop8953 = alloca i1, i1 0
+  %nop8954 = alloca i1, i1 0
+  %nop8955 = alloca i1, i1 0
+  %nop8956 = alloca i1, i1 0
+  %nop8957 = alloca i1, i1 0
+  %nop8958 = alloca i1, i1 0
+  %nop8959 = alloca i1, i1 0
+  %nop8960 = alloca i1, i1 0
+  %nop8961 = alloca i1, i1 0
+  %nop8962 = alloca i1, i1 0
+  %nop8963 = alloca i1, i1 0
+  %nop8964 = alloca i1, i1 0
+  %nop8965 = alloca i1, i1 0
+  %nop8966 = alloca i1, i1 0
+  %nop8967 = alloca i1, i1 0
+  %nop8968 = alloca i1, i1 0
+  %nop8969 = alloca i1, i1 0
+  %nop8970 = alloca i1, i1 0
+  %nop8971 = alloca i1, i1 0
+  %nop8972 = alloca i1, i1 0
+  %nop8973 = alloca i1, i1 0
+  %nop8974 = alloca i1, i1 0
+  %nop8975 = alloca i1, i1 0
+  %nop8976 = alloca i1, i1 0
+  %nop8977 = alloca i1, i1 0
+  %nop8978 = alloca i1, i1 0
+  %nop8979 = alloca i1, i1 0
+  %nop8980 = alloca i1, i1 0
+  %nop8981 = alloca i1, i1 0
+  %nop8982 = alloca i1, i1 0
+  %nop8983 = alloca i1, i1 0
+  %nop8984 = alloca i1, i1 0
+  %nop8985 = alloca i1, i1 0
+  %nop8986 = alloca i1, i1 0
+  %nop8987 = alloca i1, i1 0
+  %nop8988 = alloca i1, i1 0
+  %nop8989 = alloca i1, i1 0
+  %nop8990 = alloca i1, i1 0
+  %nop8991 = alloca i1, i1 0
+  %nop8992 = alloca i1, i1 0
+  %nop8993 = alloca i1, i1 0
+  %nop8994 = alloca i1, i1 0
+  %nop8995 = alloca i1, i1 0
+  %nop8996 = alloca i1, i1 0
+  %nop8997 = alloca i1, i1 0
+  %nop8998 = alloca i1, i1 0
+  %nop8999 = alloca i1, i1 0
+  %nop9000 = alloca i1, i1 0
+  %nop9001 = alloca i1, i1 0
+  %nop9002 = alloca i1, i1 0
+  %nop9003 = alloca i1, i1 0
+  %nop9004 = alloca i1, i1 0
+  %nop9005 = alloca i1, i1 0
+  %nop9006 = alloca i1, i1 0
+  %nop9007 = alloca i1, i1 0
+  %nop9008 = alloca i1, i1 0
+  %nop9009 = alloca i1, i1 0
+  %nop9010 = alloca i1, i1 0
+  %nop9011 = alloca i1, i1 0
+  %nop9012 = alloca i1, i1 0
+  %nop9013 = alloca i1, i1 0
+  %nop9014 = alloca i1, i1 0
+  %nop9015 = alloca i1, i1 0
+  %nop9016 = alloca i1, i1 0
+  %nop9017 = alloca i1, i1 0
+  %nop9018 = alloca i1, i1 0
+  %nop9019 = alloca i1, i1 0
+  %nop9020 = alloca i1, i1 0
+  %nop9021 = alloca i1, i1 0
+  %nop9022 = alloca i1, i1 0
+  %nop9023 = alloca i1, i1 0
+  %nop9024 = alloca i1, i1 0
+  %nop9025 = alloca i1, i1 0
+  %nop9026 = alloca i1, i1 0
+  %nop9027 = alloca i1, i1 0
+  %nop9028 = alloca i1, i1 0
+  %nop9029 = alloca i1, i1 0
+  %nop9030 = alloca i1, i1 0
+  %nop9031 = alloca i1, i1 0
+  %nop9032 = alloca i1, i1 0
+  %nop9033 = alloca i1, i1 0
+  %nop9034 = alloca i1, i1 0
+  %nop9035 = alloca i1, i1 0
+  %nop9036 = alloca i1, i1 0
+  %nop9037 = alloca i1, i1 0
+  %nop9038 = alloca i1, i1 0
+  %nop9039 = alloca i1, i1 0
+  %nop9040 = alloca i1, i1 0
+  %nop9041 = alloca i1, i1 0
+  %nop9042 = alloca i1, i1 0
+  %nop9043 = alloca i1, i1 0
+  %nop9044 = alloca i1, i1 0
+  %nop9045 = alloca i1, i1 0
+  %nop9046 = alloca i1, i1 0
+  %nop9047 = alloca i1, i1 0
+  %nop9048 = alloca i1, i1 0
+  %nop9049 = alloca i1, i1 0
+  %nop9050 = alloca i1, i1 0
+  %nop9051 = alloca i1, i1 0
+  %nop9052 = alloca i1, i1 0
+  %nop9053 = alloca i1, i1 0
+  %nop9054 = alloca i1, i1 0
+  %nop9055 = alloca i1, i1 0
+  %nop9056 = alloca i1, i1 0
+  %nop9057 = alloca i1, i1 0
+  %nop9058 = alloca i1, i1 0
+  %nop9059 = alloca i1, i1 0
+  %nop9060 = alloca i1, i1 0
+  %nop9061 = alloca i1, i1 0
+  %nop9062 = alloca i1, i1 0
+  %nop9063 = alloca i1, i1 0
+  %nop9064 = alloca i1, i1 0
+  %nop9065 = alloca i1, i1 0
+  %nop9066 = alloca i1, i1 0
+  %nop9067 = alloca i1, i1 0
+  %nop9068 = alloca i1, i1 0
+  %nop9069 = alloca i1, i1 0
+  %nop9070 = alloca i1, i1 0
+  %nop9071 = alloca i1, i1 0
+  %nop9072 = alloca i1, i1 0
+  %nop9073 = alloca i1, i1 0
+  %nop9074 = alloca i1, i1 0
+  %nop9075 = alloca i1, i1 0
+  %nop9076 = alloca i1, i1 0
+  %nop9077 = alloca i1, i1 0
+  %nop9078 = alloca i1, i1 0
+  %nop9079 = alloca i1, i1 0
+  %nop9080 = alloca i1, i1 0
+  %nop9081 = alloca i1, i1 0
+  %nop9082 = alloca i1, i1 0
+  %nop9083 = alloca i1, i1 0
+  %nop9084 = alloca i1, i1 0
+  %nop9085 = alloca i1, i1 0
+  %nop9086 = alloca i1, i1 0
+  %nop9087 = alloca i1, i1 0
+  %nop9088 = alloca i1, i1 0
+  %nop9089 = alloca i1, i1 0
+  %nop9090 = alloca i1, i1 0
+  %nop9091 = alloca i1, i1 0
+  %nop9092 = alloca i1, i1 0
+  %nop9093 = alloca i1, i1 0
+  %nop9094 = alloca i1, i1 0
+  %nop9095 = alloca i1, i1 0
+  %nop9096 = alloca i1, i1 0
+  %nop9097 = alloca i1, i1 0
+  %nop9098 = alloca i1, i1 0
+  %nop9099 = alloca i1, i1 0
+  %nop9100 = alloca i1, i1 0
+  %nop9101 = alloca i1, i1 0
+  %nop9102 = alloca i1, i1 0
+  %nop9103 = alloca i1, i1 0
+  %nop9104 = alloca i1, i1 0
+  %nop9105 = alloca i1, i1 0
+  %nop9106 = alloca i1, i1 0
+  %nop9107 = alloca i1, i1 0
+  %nop9108 = alloca i1, i1 0
+  %nop9109 = alloca i1, i1 0
+  %nop9110 = alloca i1, i1 0
+  %nop9111 = alloca i1, i1 0
+  %nop9112 = alloca i1, i1 0
+  %nop9113 = alloca i1, i1 0
+  %nop9114 = alloca i1, i1 0
+  %nop9115 = alloca i1, i1 0
+  %nop9116 = alloca i1, i1 0
+  %nop9117 = alloca i1, i1 0
+  %nop9118 = alloca i1, i1 0
+  %nop9119 = alloca i1, i1 0
+  %nop9120 = alloca i1, i1 0
+  %nop9121 = alloca i1, i1 0
+  %nop9122 = alloca i1, i1 0
+  %nop9123 = alloca i1, i1 0
+  %nop9124 = alloca i1, i1 0
+  %nop9125 = alloca i1, i1 0
+  %nop9126 = alloca i1, i1 0
+  %nop9127 = alloca i1, i1 0
+  %nop9128 = alloca i1, i1 0
+  %nop9129 = alloca i1, i1 0
+  %nop9130 = alloca i1, i1 0
+  %nop9131 = alloca i1, i1 0
+  %nop9132 = alloca i1, i1 0
+  %nop9133 = alloca i1, i1 0
+  %nop9134 = alloca i1, i1 0
+  %nop9135 = alloca i1, i1 0
+  %nop9136 = alloca i1, i1 0
+  %nop9137 = alloca i1, i1 0
+  %nop9138 = alloca i1, i1 0
+  %nop9139 = alloca i1, i1 0
+  %nop9140 = alloca i1, i1 0
+  %nop9141 = alloca i1, i1 0
+  %nop9142 = alloca i1, i1 0
+  %nop9143 = alloca i1, i1 0
+  %nop9144 = alloca i1, i1 0
+  %nop9145 = alloca i1, i1 0
+  %nop9146 = alloca i1, i1 0
+  %nop9147 = alloca i1, i1 0
+  %nop9148 = alloca i1, i1 0
+  %nop9149 = alloca i1, i1 0
+  %nop9150 = alloca i1, i1 0
+  %nop9151 = alloca i1, i1 0
+  %nop9152 = alloca i1, i1 0
+  %nop9153 = alloca i1, i1 0
+  %nop9154 = alloca i1, i1 0
+  %nop9155 = alloca i1, i1 0
+  %nop9156 = alloca i1, i1 0
+  %nop9157 = alloca i1, i1 0
+  %nop9158 = alloca i1, i1 0
+  %nop9159 = alloca i1, i1 0
+  %nop9160 = alloca i1, i1 0
+  %nop9161 = alloca i1, i1 0
+  %nop9162 = alloca i1, i1 0
+  %nop9163 = alloca i1, i1 0
+  %nop9164 = alloca i1, i1 0
+  %nop9165 = alloca i1, i1 0
+  %nop9166 = alloca i1, i1 0
+  %nop9167 = alloca i1, i1 0
+  %nop9168 = alloca i1, i1 0
+  %nop9169 = alloca i1, i1 0
+  %nop9170 = alloca i1, i1 0
+  %nop9171 = alloca i1, i1 0
+  %nop9172 = alloca i1, i1 0
+  %nop9173 = alloca i1, i1 0
+  %nop9174 = alloca i1, i1 0
+  %nop9175 = alloca i1, i1 0
+  %nop9176 = alloca i1, i1 0
+  %nop9177 = alloca i1, i1 0
+  %nop9178 = alloca i1, i1 0
+  %nop9179 = alloca i1, i1 0
+  %nop9180 = alloca i1, i1 0
+  %nop9181 = alloca i1, i1 0
+  %nop9182 = alloca i1, i1 0
+  %nop9183 = alloca i1, i1 0
+  %nop9184 = alloca i1, i1 0
+  %nop9185 = alloca i1, i1 0
+  %nop9186 = alloca i1, i1 0
+  %nop9187 = alloca i1, i1 0
+  %nop9188 = alloca i1, i1 0
+  %nop9189 = alloca i1, i1 0
+  %nop9190 = alloca i1, i1 0
+  %nop9191 = alloca i1, i1 0
+  %nop9192 = alloca i1, i1 0
+  %nop9193 = alloca i1, i1 0
+  %nop9194 = alloca i1, i1 0
+  %nop9195 = alloca i1, i1 0
+  %nop9196 = alloca i1, i1 0
+  %nop9197 = alloca i1, i1 0
+  %nop9198 = alloca i1, i1 0
+  %nop9199 = alloca i1, i1 0
+  %nop9200 = alloca i1, i1 0
+  %nop9201 = alloca i1, i1 0
+  %nop9202 = alloca i1, i1 0
+  %nop9203 = alloca i1, i1 0
+  %nop9204 = alloca i1, i1 0
+  %nop9205 = alloca i1, i1 0
+  %nop9206 = alloca i1, i1 0
+  %nop9207 = alloca i1, i1 0
+  %nop9208 = alloca i1, i1 0
+  %nop9209 = alloca i1, i1 0
+  %nop9210 = alloca i1, i1 0
+  %nop9211 = alloca i1, i1 0
+  %nop9212 = alloca i1, i1 0
+  %nop9213 = alloca i1, i1 0
+  %nop9214 = alloca i1, i1 0
+  %nop9215 = alloca i1, i1 0
+  %nop9216 = alloca i1, i1 0
+  %nop9217 = alloca i1, i1 0
+  %nop9218 = alloca i1, i1 0
+  %nop9219 = alloca i1, i1 0
+  %nop9220 = alloca i1, i1 0
+  %nop9221 = alloca i1, i1 0
+  %nop9222 = alloca i1, i1 0
+  %nop9223 = alloca i1, i1 0
+  %nop9224 = alloca i1, i1 0
+  %nop9225 = alloca i1, i1 0
+  %nop9226 = alloca i1, i1 0
+  %nop9227 = alloca i1, i1 0
+  %nop9228 = alloca i1, i1 0
+  %nop9229 = alloca i1, i1 0
+  %nop9230 = alloca i1, i1 0
+  %nop9231 = alloca i1, i1 0
+  %nop9232 = alloca i1, i1 0
+  %nop9233 = alloca i1, i1 0
+  %nop9234 = alloca i1, i1 0
+  %nop9235 = alloca i1, i1 0
+  %nop9236 = alloca i1, i1 0
+  %nop9237 = alloca i1, i1 0
+  %nop9238 = alloca i1, i1 0
+  %nop9239 = alloca i1, i1 0
+  %nop9240 = alloca i1, i1 0
+  %nop9241 = alloca i1, i1 0
+  %nop9242 = alloca i1, i1 0
+  %nop9243 = alloca i1, i1 0
+  %nop9244 = alloca i1, i1 0
+  %nop9245 = alloca i1, i1 0
+  %nop9246 = alloca i1, i1 0
+  %nop9247 = alloca i1, i1 0
+  %nop9248 = alloca i1, i1 0
+  %nop9249 = alloca i1, i1 0
+  %nop9250 = alloca i1, i1 0
+  %nop9251 = alloca i1, i1 0
+  %nop9252 = alloca i1, i1 0
+  %nop9253 = alloca i1, i1 0
+  %nop9254 = alloca i1, i1 0
+  %nop9255 = alloca i1, i1 0
+  %nop9256 = alloca i1, i1 0
+  %nop9257 = alloca i1, i1 0
+  %nop9258 = alloca i1, i1 0
+  %nop9259 = alloca i1, i1 0
+  %nop9260 = alloca i1, i1 0
+  %nop9261 = alloca i1, i1 0
+  %nop9262 = alloca i1, i1 0
+  %nop9263 = alloca i1, i1 0
+  %nop9264 = alloca i1, i1 0
+  %nop9265 = alloca i1, i1 0
+  %nop9266 = alloca i1, i1 0
+  %nop9267 = alloca i1, i1 0
+  %nop9268 = alloca i1, i1 0
+  %nop9269 = alloca i1, i1 0
+  %nop9270 = alloca i1, i1 0
+  %nop9271 = alloca i1, i1 0
+  %nop9272 = alloca i1, i1 0
+  %nop9273 = alloca i1, i1 0
+  %nop9274 = alloca i1, i1 0
+  %nop9275 = alloca i1, i1 0
+  %nop9276 = alloca i1, i1 0
+  %nop9277 = alloca i1, i1 0
+  %nop9278 = alloca i1, i1 0
+  %nop9279 = alloca i1, i1 0
+  %nop9280 = alloca i1, i1 0
+  %nop9281 = alloca i1, i1 0
+  %nop9282 = alloca i1, i1 0
+  %nop9283 = alloca i1, i1 0
+  %nop9284 = alloca i1, i1 0
+  %nop9285 = alloca i1, i1 0
+  %nop9286 = alloca i1, i1 0
+  %nop9287 = alloca i1, i1 0
+  %nop9288 = alloca i1, i1 0
+  %nop9289 = alloca i1, i1 0
+  %nop9290 = alloca i1, i1 0
+  %nop9291 = alloca i1, i1 0
+  %nop9292 = alloca i1, i1 0
+  %nop9293 = alloca i1, i1 0
+  %nop9294 = alloca i1, i1 0
+  %nop9295 = alloca i1, i1 0
+  %nop9296 = alloca i1, i1 0
+  %nop9297 = alloca i1, i1 0
+  %nop9298 = alloca i1, i1 0
+  %nop9299 = alloca i1, i1 0
+  %nop9300 = alloca i1, i1 0
+  %nop9301 = alloca i1, i1 0
+  %nop9302 = alloca i1, i1 0
+  %nop9303 = alloca i1, i1 0
+  %nop9304 = alloca i1, i1 0
+  %nop9305 = alloca i1, i1 0
+  %nop9306 = alloca i1, i1 0
+  %nop9307 = alloca i1, i1 0
+  %nop9308 = alloca i1, i1 0
+  %nop9309 = alloca i1, i1 0
+  %nop9310 = alloca i1, i1 0
+  %nop9311 = alloca i1, i1 0
+  %nop9312 = alloca i1, i1 0
+  %nop9313 = alloca i1, i1 0
+  %nop9314 = alloca i1, i1 0
+  %nop9315 = alloca i1, i1 0
+  %nop9316 = alloca i1, i1 0
+  %nop9317 = alloca i1, i1 0
+  %nop9318 = alloca i1, i1 0
+  %nop9319 = alloca i1, i1 0
+  %nop9320 = alloca i1, i1 0
+  %nop9321 = alloca i1, i1 0
+  %nop9322 = alloca i1, i1 0
+  %nop9323 = alloca i1, i1 0
+  %nop9324 = alloca i1, i1 0
+  %nop9325 = alloca i1, i1 0
+  %nop9326 = alloca i1, i1 0
+  %nop9327 = alloca i1, i1 0
+  %nop9328 = alloca i1, i1 0
+  %nop9329 = alloca i1, i1 0
+  %nop9330 = alloca i1, i1 0
+  %nop9331 = alloca i1, i1 0
+  %nop9332 = alloca i1, i1 0
+  %nop9333 = alloca i1, i1 0
+  %nop9334 = alloca i1, i1 0
+  %nop9335 = alloca i1, i1 0
+  %nop9336 = alloca i1, i1 0
+  %nop9337 = alloca i1, i1 0
+  %nop9338 = alloca i1, i1 0
+  %nop9339 = alloca i1, i1 0
+  %nop9340 = alloca i1, i1 0
+  %nop9341 = alloca i1, i1 0
+  %nop9342 = alloca i1, i1 0
+  %nop9343 = alloca i1, i1 0
+  %nop9344 = alloca i1, i1 0
+  %nop9345 = alloca i1, i1 0
+  %nop9346 = alloca i1, i1 0
+  %nop9347 = alloca i1, i1 0
+  %nop9348 = alloca i1, i1 0
+  %nop9349 = alloca i1, i1 0
+  %nop9350 = alloca i1, i1 0
+  %nop9351 = alloca i1, i1 0
+  %nop9352 = alloca i1, i1 0
+  %nop9353 = alloca i1, i1 0
+  %nop9354 = alloca i1, i1 0
+  %nop9355 = alloca i1, i1 0
+  %nop9356 = alloca i1, i1 0
+  %nop9357 = alloca i1, i1 0
+  %nop9358 = alloca i1, i1 0
+  %nop9359 = alloca i1, i1 0
+  %nop9360 = alloca i1, i1 0
+  %nop9361 = alloca i1, i1 0
+  %nop9362 = alloca i1, i1 0
+  %nop9363 = alloca i1, i1 0
+  %nop9364 = alloca i1, i1 0
+  %nop9365 = alloca i1, i1 0
+  %nop9366 = alloca i1, i1 0
+  %nop9367 = alloca i1, i1 0
+  %nop9368 = alloca i1, i1 0
+  %nop9369 = alloca i1, i1 0
+  %nop9370 = alloca i1, i1 0
+  %nop9371 = alloca i1, i1 0
+  %nop9372 = alloca i1, i1 0
+  %nop9373 = alloca i1, i1 0
+  %nop9374 = alloca i1, i1 0
+  %nop9375 = alloca i1, i1 0
+  %nop9376 = alloca i1, i1 0
+  %nop9377 = alloca i1, i1 0
+  %nop9378 = alloca i1, i1 0
+  %nop9379 = alloca i1, i1 0
+  %nop9380 = alloca i1, i1 0
+  %nop9381 = alloca i1, i1 0
+  %nop9382 = alloca i1, i1 0
+  %nop9383 = alloca i1, i1 0
+  %nop9384 = alloca i1, i1 0
+  %nop9385 = alloca i1, i1 0
+  %nop9386 = alloca i1, i1 0
+  %nop9387 = alloca i1, i1 0
+  %nop9388 = alloca i1, i1 0
+  %nop9389 = alloca i1, i1 0
+  %nop9390 = alloca i1, i1 0
+  %nop9391 = alloca i1, i1 0
+  %nop9392 = alloca i1, i1 0
+  %nop9393 = alloca i1, i1 0
+  %nop9394 = alloca i1, i1 0
+  %nop9395 = alloca i1, i1 0
+  %nop9396 = alloca i1, i1 0
+  %nop9397 = alloca i1, i1 0
+  %nop9398 = alloca i1, i1 0
+  %nop9399 = alloca i1, i1 0
+  %nop9400 = alloca i1, i1 0
+  %nop9401 = alloca i1, i1 0
+  %nop9402 = alloca i1, i1 0
+  %nop9403 = alloca i1, i1 0
+  %nop9404 = alloca i1, i1 0
+  %nop9405 = alloca i1, i1 0
+  %nop9406 = alloca i1, i1 0
+  %nop9407 = alloca i1, i1 0
+  %nop9408 = alloca i1, i1 0
+  %nop9409 = alloca i1, i1 0
+  %nop9410 = alloca i1, i1 0
+  %nop9411 = alloca i1, i1 0
+  %nop9412 = alloca i1, i1 0
+  %nop9413 = alloca i1, i1 0
+  %nop9414 = alloca i1, i1 0
+  %nop9415 = alloca i1, i1 0
+  %nop9416 = alloca i1, i1 0
+  %nop9417 = alloca i1, i1 0
+  %nop9418 = alloca i1, i1 0
+  %nop9419 = alloca i1, i1 0
+  %nop9420 = alloca i1, i1 0
+  %nop9421 = alloca i1, i1 0
+  %nop9422 = alloca i1, i1 0
+  %nop9423 = alloca i1, i1 0
+  %nop9424 = alloca i1, i1 0
+  %nop9425 = alloca i1, i1 0
+  %nop9426 = alloca i1, i1 0
+  %nop9427 = alloca i1, i1 0
+  %nop9428 = alloca i1, i1 0
+  %nop9429 = alloca i1, i1 0
+  %nop9430 = alloca i1, i1 0
+  %nop9431 = alloca i1, i1 0
+  %nop9432 = alloca i1, i1 0
+  %nop9433 = alloca i1, i1 0
+  %nop9434 = alloca i1, i1 0
+  %nop9435 = alloca i1, i1 0
+  %nop9436 = alloca i1, i1 0
+  %nop9437 = alloca i1, i1 0
+  %nop9438 = alloca i1, i1 0
+  %nop9439 = alloca i1, i1 0
+  %nop9440 = alloca i1, i1 0
+  %nop9441 = alloca i1, i1 0
+  %nop9442 = alloca i1, i1 0
+  %nop9443 = alloca i1, i1 0
+  %nop9444 = alloca i1, i1 0
+  %nop9445 = alloca i1, i1 0
+  %nop9446 = alloca i1, i1 0
+  %nop9447 = alloca i1, i1 0
+  %nop9448 = alloca i1, i1 0
+  %nop9449 = alloca i1, i1 0
+  %nop9450 = alloca i1, i1 0
+  %nop9451 = alloca i1, i1 0
+  %nop9452 = alloca i1, i1 0
+  %nop9453 = alloca i1, i1 0
+  %nop9454 = alloca i1, i1 0
+  %nop9455 = alloca i1, i1 0
+  %nop9456 = alloca i1, i1 0
+  %nop9457 = alloca i1, i1 0
+  %nop9458 = alloca i1, i1 0
+  %nop9459 = alloca i1, i1 0
+  %nop9460 = alloca i1, i1 0
+  %nop9461 = alloca i1, i1 0
+  %nop9462 = alloca i1, i1 0
+  %nop9463 = alloca i1, i1 0
+  %nop9464 = alloca i1, i1 0
+  %nop9465 = alloca i1, i1 0
+  %nop9466 = alloca i1, i1 0
+  %nop9467 = alloca i1, i1 0
+  %nop9468 = alloca i1, i1 0
+  %nop9469 = alloca i1, i1 0
+  %nop9470 = alloca i1, i1 0
+  %nop9471 = alloca i1, i1 0
+  %nop9472 = alloca i1, i1 0
+  %nop9473 = alloca i1, i1 0
+  %nop9474 = alloca i1, i1 0
+  %nop9475 = alloca i1, i1 0
+  %nop9476 = alloca i1, i1 0
+  %nop9477 = alloca i1, i1 0
+  %nop9478 = alloca i1, i1 0
+  %nop9479 = alloca i1, i1 0
+  %nop9480 = alloca i1, i1 0
+  %nop9481 = alloca i1, i1 0
+  %nop9482 = alloca i1, i1 0
+  %nop9483 = alloca i1, i1 0
+  %nop9484 = alloca i1, i1 0
+  %nop9485 = alloca i1, i1 0
+  %nop9486 = alloca i1, i1 0
+  %nop9487 = alloca i1, i1 0
+  %nop9488 = alloca i1, i1 0
+  %nop9489 = alloca i1, i1 0
+  %nop9490 = alloca i1, i1 0
+  %nop9491 = alloca i1, i1 0
+  %nop9492 = alloca i1, i1 0
+  %nop9493 = alloca i1, i1 0
+  %nop9494 = alloca i1, i1 0
+  %nop9495 = alloca i1, i1 0
+  %nop9496 = alloca i1, i1 0
+  %nop9497 = alloca i1, i1 0
+  %nop9498 = alloca i1, i1 0
+  %nop9499 = alloca i1, i1 0
+  %nop9500 = alloca i1, i1 0
+  %nop9501 = alloca i1, i1 0
+  %nop9502 = alloca i1, i1 0
+  %nop9503 = alloca i1, i1 0
+  %nop9504 = alloca i1, i1 0
+  %nop9505 = alloca i1, i1 0
+  %nop9506 = alloca i1, i1 0
+  %nop9507 = alloca i1, i1 0
+  %nop9508 = alloca i1, i1 0
+  %nop9509 = alloca i1, i1 0
+  %nop9510 = alloca i1, i1 0
+  %nop9511 = alloca i1, i1 0
+  %nop9512 = alloca i1, i1 0
+  %nop9513 = alloca i1, i1 0
+  %nop9514 = alloca i1, i1 0
+  %nop9515 = alloca i1, i1 0
+  %nop9516 = alloca i1, i1 0
+  %nop9517 = alloca i1, i1 0
+  %nop9518 = alloca i1, i1 0
+  %nop9519 = alloca i1, i1 0
+  %nop9520 = alloca i1, i1 0
+  %nop9521 = alloca i1, i1 0
+  %nop9522 = alloca i1, i1 0
+  %nop9523 = alloca i1, i1 0
+  %nop9524 = alloca i1, i1 0
+  %nop9525 = alloca i1, i1 0
+  %nop9526 = alloca i1, i1 0
+  %nop9527 = alloca i1, i1 0
+  %nop9528 = alloca i1, i1 0
+  %nop9529 = alloca i1, i1 0
+  %nop9530 = alloca i1, i1 0
+  %nop9531 = alloca i1, i1 0
+  %nop9532 = alloca i1, i1 0
+  %nop9533 = alloca i1, i1 0
+  %nop9534 = alloca i1, i1 0
+  %nop9535 = alloca i1, i1 0
+  %nop9536 = alloca i1, i1 0
+  %nop9537 = alloca i1, i1 0
+  %nop9538 = alloca i1, i1 0
+  %nop9539 = alloca i1, i1 0
+  %nop9540 = alloca i1, i1 0
+  %nop9541 = alloca i1, i1 0
+  %nop9542 = alloca i1, i1 0
+  %nop9543 = alloca i1, i1 0
+  %nop9544 = alloca i1, i1 0
+  %nop9545 = alloca i1, i1 0
+  %nop9546 = alloca i1, i1 0
+  %nop9547 = alloca i1, i1 0
+  %nop9548 = alloca i1, i1 0
+  %nop9549 = alloca i1, i1 0
+  %nop9550 = alloca i1, i1 0
+  %nop9551 = alloca i1, i1 0
+  %nop9552 = alloca i1, i1 0
+  %nop9553 = alloca i1, i1 0
+  %nop9554 = alloca i1, i1 0
+  %nop9555 = alloca i1, i1 0
+  %nop9556 = alloca i1, i1 0
+  %nop9557 = alloca i1, i1 0
+  %nop9558 = alloca i1, i1 0
+  %nop9559 = alloca i1, i1 0
+  %nop9560 = alloca i1, i1 0
+  %nop9561 = alloca i1, i1 0
+  %nop9562 = alloca i1, i1 0
+  %nop9563 = alloca i1, i1 0
+  %nop9564 = alloca i1, i1 0
+  %nop9565 = alloca i1, i1 0
+  %nop9566 = alloca i1, i1 0
+  %nop9567 = alloca i1, i1 0
+  %nop9568 = alloca i1, i1 0
+  %nop9569 = alloca i1, i1 0
+  %nop9570 = alloca i1, i1 0
+  %nop9571 = alloca i1, i1 0
+  %nop9572 = alloca i1, i1 0
+  %nop9573 = alloca i1, i1 0
+  %nop9574 = alloca i1, i1 0
+  %nop9575 = alloca i1, i1 0
+  %nop9576 = alloca i1, i1 0
+  %nop9577 = alloca i1, i1 0
+  %nop9578 = alloca i1, i1 0
+  %nop9579 = alloca i1, i1 0
+  %nop9580 = alloca i1, i1 0
+  %nop9581 = alloca i1, i1 0
+  %nop9582 = alloca i1, i1 0
+  %nop9583 = alloca i1, i1 0
+  %nop9584 = alloca i1, i1 0
+  %nop9585 = alloca i1, i1 0
+  %nop9586 = alloca i1, i1 0
+  %nop9587 = alloca i1, i1 0
+  %nop9588 = alloca i1, i1 0
+  %nop9589 = alloca i1, i1 0
+  %nop9590 = alloca i1, i1 0
+  %nop9591 = alloca i1, i1 0
+  %nop9592 = alloca i1, i1 0
+  %nop9593 = alloca i1, i1 0
+  %nop9594 = alloca i1, i1 0
+  %nop9595 = alloca i1, i1 0
+  %nop9596 = alloca i1, i1 0
+  %nop9597 = alloca i1, i1 0
+  %nop9598 = alloca i1, i1 0
+  %nop9599 = alloca i1, i1 0
+  %nop9600 = alloca i1, i1 0
+  %nop9601 = alloca i1, i1 0
+  %nop9602 = alloca i1, i1 0
+  %nop9603 = alloca i1, i1 0
+  %nop9604 = alloca i1, i1 0
+  %nop9605 = alloca i1, i1 0
+  %nop9606 = alloca i1, i1 0
+  %nop9607 = alloca i1, i1 0
+  %nop9608 = alloca i1, i1 0
+  %nop9609 = alloca i1, i1 0
+  %nop9610 = alloca i1, i1 0
+  %nop9611 = alloca i1, i1 0
+  %nop9612 = alloca i1, i1 0
+  %nop9613 = alloca i1, i1 0
+  %nop9614 = alloca i1, i1 0
+  %nop9615 = alloca i1, i1 0
+  %nop9616 = alloca i1, i1 0
+  %nop9617 = alloca i1, i1 0
+  %nop9618 = alloca i1, i1 0
+  %nop9619 = alloca i1, i1 0
+  %nop9620 = alloca i1, i1 0
+  %nop9621 = alloca i1, i1 0
+  %nop9622 = alloca i1, i1 0
+  %nop9623 = alloca i1, i1 0
+  %nop9624 = alloca i1, i1 0
+  %nop9625 = alloca i1, i1 0
+  %nop9626 = alloca i1, i1 0
+  %nop9627 = alloca i1, i1 0
+  %nop9628 = alloca i1, i1 0
+  %nop9629 = alloca i1, i1 0
+  %nop9630 = alloca i1, i1 0
+  %nop9631 = alloca i1, i1 0
+  %nop9632 = alloca i1, i1 0
+  %nop9633 = alloca i1, i1 0
+  %nop9634 = alloca i1, i1 0
+  %nop9635 = alloca i1, i1 0
+  %nop9636 = alloca i1, i1 0
+  %nop9637 = alloca i1, i1 0
+  %nop9638 = alloca i1, i1 0
+  %nop9639 = alloca i1, i1 0
+  %nop9640 = alloca i1, i1 0
+  %nop9641 = alloca i1, i1 0
+  %nop9642 = alloca i1, i1 0
+  %nop9643 = alloca i1, i1 0
+  %nop9644 = alloca i1, i1 0
+  %nop9645 = alloca i1, i1 0
+  %nop9646 = alloca i1, i1 0
+  %nop9647 = alloca i1, i1 0
+  %nop9648 = alloca i1, i1 0
+  %nop9649 = alloca i1, i1 0
+  %nop9650 = alloca i1, i1 0
+  %nop9651 = alloca i1, i1 0
+  %nop9652 = alloca i1, i1 0
+  %nop9653 = alloca i1, i1 0
+  %nop9654 = alloca i1, i1 0
+  %nop9655 = alloca i1, i1 0
+  %nop9656 = alloca i1, i1 0
+  %nop9657 = alloca i1, i1 0
+  %nop9658 = alloca i1, i1 0
+  %nop9659 = alloca i1, i1 0
+  %nop9660 = alloca i1, i1 0
+  %nop9661 = alloca i1, i1 0
+  %nop9662 = alloca i1, i1 0
+  %nop9663 = alloca i1, i1 0
+  %nop9664 = alloca i1, i1 0
+  %nop9665 = alloca i1, i1 0
+  %nop9666 = alloca i1, i1 0
+  %nop9667 = alloca i1, i1 0
+  %nop9668 = alloca i1, i1 0
+  %nop9669 = alloca i1, i1 0
+  %nop9670 = alloca i1, i1 0
+  %nop9671 = alloca i1, i1 0
+  %nop9672 = alloca i1, i1 0
+  %nop9673 = alloca i1, i1 0
+  %nop9674 = alloca i1, i1 0
+  %nop9675 = alloca i1, i1 0
+  %nop9676 = alloca i1, i1 0
+  %nop9677 = alloca i1, i1 0
+  %nop9678 = alloca i1, i1 0
+  %nop9679 = alloca i1, i1 0
+  %nop9680 = alloca i1, i1 0
+  %nop9681 = alloca i1, i1 0
+  %nop9682 = alloca i1, i1 0
+  %nop9683 = alloca i1, i1 0
+  %nop9684 = alloca i1, i1 0
+  %nop9685 = alloca i1, i1 0
+  %nop9686 = alloca i1, i1 0
+  %nop9687 = alloca i1, i1 0
+  %nop9688 = alloca i1, i1 0
+  %nop9689 = alloca i1, i1 0
+  %nop9690 = alloca i1, i1 0
+  %nop9691 = alloca i1, i1 0
+  %nop9692 = alloca i1, i1 0
+  %nop9693 = alloca i1, i1 0
+  %nop9694 = alloca i1, i1 0
+  %nop9695 = alloca i1, i1 0
+  %nop9696 = alloca i1, i1 0
+  %nop9697 = alloca i1, i1 0
+  %nop9698 = alloca i1, i1 0
+  %nop9699 = alloca i1, i1 0
+  %nop9700 = alloca i1, i1 0
+  %nop9701 = alloca i1, i1 0
+  %nop9702 = alloca i1, i1 0
+  %nop9703 = alloca i1, i1 0
+  %nop9704 = alloca i1, i1 0
+  %nop9705 = alloca i1, i1 0
+  %nop9706 = alloca i1, i1 0
+  %nop9707 = alloca i1, i1 0
+  %nop9708 = alloca i1, i1 0
+  %nop9709 = alloca i1, i1 0
+  %nop9710 = alloca i1, i1 0
+  %nop9711 = alloca i1, i1 0
+  %nop9712 = alloca i1, i1 0
+  %nop9713 = alloca i1, i1 0
+  %nop9714 = alloca i1, i1 0
+  %nop9715 = alloca i1, i1 0
+  %nop9716 = alloca i1, i1 0
+  %nop9717 = alloca i1, i1 0
+  %nop9718 = alloca i1, i1 0
+  %nop9719 = alloca i1, i1 0
+  %nop9720 = alloca i1, i1 0
+  %nop9721 = alloca i1, i1 0
+  %nop9722 = alloca i1, i1 0
+  %nop9723 = alloca i1, i1 0
+  %nop9724 = alloca i1, i1 0
+  %nop9725 = alloca i1, i1 0
+  %nop9726 = alloca i1, i1 0
+  %nop9727 = alloca i1, i1 0
+  %nop9728 = alloca i1, i1 0
+  %nop9729 = alloca i1, i1 0
+  %nop9730 = alloca i1, i1 0
+  %nop9731 = alloca i1, i1 0
+  %nop9732 = alloca i1, i1 0
+  %nop9733 = alloca i1, i1 0
+  %nop9734 = alloca i1, i1 0
+  %nop9735 = alloca i1, i1 0
+  %nop9736 = alloca i1, i1 0
+  %nop9737 = alloca i1, i1 0
+  %nop9738 = alloca i1, i1 0
+  %nop9739 = alloca i1, i1 0
+  %nop9740 = alloca i1, i1 0
+  %nop9741 = alloca i1, i1 0
+  %nop9742 = alloca i1, i1 0
+  %nop9743 = alloca i1, i1 0
+  %nop9744 = alloca i1, i1 0
+  %nop9745 = alloca i1, i1 0
+  %nop9746 = alloca i1, i1 0
+  %nop9747 = alloca i1, i1 0
+  %nop9748 = alloca i1, i1 0
+  %nop9749 = alloca i1, i1 0
+  %nop9750 = alloca i1, i1 0
+  %nop9751 = alloca i1, i1 0
+  %nop9752 = alloca i1, i1 0
+  %nop9753 = alloca i1, i1 0
+  %nop9754 = alloca i1, i1 0
+  %nop9755 = alloca i1, i1 0
+  %nop9756 = alloca i1, i1 0
+  %nop9757 = alloca i1, i1 0
+  %nop9758 = alloca i1, i1 0
+  %nop9759 = alloca i1, i1 0
+  %nop9760 = alloca i1, i1 0
+  %nop9761 = alloca i1, i1 0
+  %nop9762 = alloca i1, i1 0
+  %nop9763 = alloca i1, i1 0
+  %nop9764 = alloca i1, i1 0
+  %nop9765 = alloca i1, i1 0
+  %nop9766 = alloca i1, i1 0
+  %nop9767 = alloca i1, i1 0
+  %nop9768 = alloca i1, i1 0
+  %nop9769 = alloca i1, i1 0
+  %nop9770 = alloca i1, i1 0
+  %nop9771 = alloca i1, i1 0
+  %nop9772 = alloca i1, i1 0
+  %nop9773 = alloca i1, i1 0
+  %nop9774 = alloca i1, i1 0
+  %nop9775 = alloca i1, i1 0
+  %nop9776 = alloca i1, i1 0
+  %nop9777 = alloca i1, i1 0
+  %nop9778 = alloca i1, i1 0
+  %nop9779 = alloca i1, i1 0
+  %nop9780 = alloca i1, i1 0
+  %nop9781 = alloca i1, i1 0
+  %nop9782 = alloca i1, i1 0
+  %nop9783 = alloca i1, i1 0
+  %nop9784 = alloca i1, i1 0
+  %nop9785 = alloca i1, i1 0
+  %nop9786 = alloca i1, i1 0
+  %nop9787 = alloca i1, i1 0
+  %nop9788 = alloca i1, i1 0
+  %nop9789 = alloca i1, i1 0
+  %nop9790 = alloca i1, i1 0
+  %nop9791 = alloca i1, i1 0
+  %nop9792 = alloca i1, i1 0
+  %nop9793 = alloca i1, i1 0
+  %nop9794 = alloca i1, i1 0
+  %nop9795 = alloca i1, i1 0
+  %nop9796 = alloca i1, i1 0
+  %nop9797 = alloca i1, i1 0
+  %nop9798 = alloca i1, i1 0
+  %nop9799 = alloca i1, i1 0
+  %nop9800 = alloca i1, i1 0
+  %nop9801 = alloca i1, i1 0
+  %nop9802 = alloca i1, i1 0
+  %nop9803 = alloca i1, i1 0
+  %nop9804 = alloca i1, i1 0
+  %nop9805 = alloca i1, i1 0
+  %nop9806 = alloca i1, i1 0
+  %nop9807 = alloca i1, i1 0
+  %nop9808 = alloca i1, i1 0
+  %nop9809 = alloca i1, i1 0
+  %nop9810 = alloca i1, i1 0
+  %nop9811 = alloca i1, i1 0
+  %nop9812 = alloca i1, i1 0
+  %nop9813 = alloca i1, i1 0
+  %nop9814 = alloca i1, i1 0
+  %nop9815 = alloca i1, i1 0
+  %nop9816 = alloca i1, i1 0
+  %nop9817 = alloca i1, i1 0
+  %nop9818 = alloca i1, i1 0
+  %nop9819 = alloca i1, i1 0
+  %nop9820 = alloca i1, i1 0
+  %nop9821 = alloca i1, i1 0
+  %nop9822 = alloca i1, i1 0
+  %nop9823 = alloca i1, i1 0
+  %nop9824 = alloca i1, i1 0
+  %nop9825 = alloca i1, i1 0
+  %nop9826 = alloca i1, i1 0
+  %nop9827 = alloca i1, i1 0
+  %nop9828 = alloca i1, i1 0
+  %nop9829 = alloca i1, i1 0
+  %nop9830 = alloca i1, i1 0
+  %nop9831 = alloca i1, i1 0
+  %nop9832 = alloca i1, i1 0
+  %nop9833 = alloca i1, i1 0
+  %nop9834 = alloca i1, i1 0
+  %nop9835 = alloca i1, i1 0
+  %nop9836 = alloca i1, i1 0
+  %nop9837 = alloca i1, i1 0
+  %nop9838 = alloca i1, i1 0
+  %nop9839 = alloca i1, i1 0
+  %nop9840 = alloca i1, i1 0
+  %nop9841 = alloca i1, i1 0
+  %nop9842 = alloca i1, i1 0
+  %nop9843 = alloca i1, i1 0
+  %nop9844 = alloca i1, i1 0
+  %nop9845 = alloca i1, i1 0
+  %nop9846 = alloca i1, i1 0
+  %nop9847 = alloca i1, i1 0
+  %nop9848 = alloca i1, i1 0
+  %nop9849 = alloca i1, i1 0
+  %nop9850 = alloca i1, i1 0
+  %nop9851 = alloca i1, i1 0
+  %nop9852 = alloca i1, i1 0
+  %nop9853 = alloca i1, i1 0
+  %nop9854 = alloca i1, i1 0
+  %nop9855 = alloca i1, i1 0
+  %nop9856 = alloca i1, i1 0
+  %nop9857 = alloca i1, i1 0
+  %nop9858 = alloca i1, i1 0
+  %nop9859 = alloca i1, i1 0
+  %nop9860 = alloca i1, i1 0
+  %nop9861 = alloca i1, i1 0
+  %nop9862 = alloca i1, i1 0
+  %nop9863 = alloca i1, i1 0
+  %nop9864 = alloca i1, i1 0
+  %nop9865 = alloca i1, i1 0
+  %nop9866 = alloca i1, i1 0
+  %nop9867 = alloca i1, i1 0
+  %nop9868 = alloca i1, i1 0
+  %nop9869 = alloca i1, i1 0
+  %nop9870 = alloca i1, i1 0
+  %nop9871 = alloca i1, i1 0
+  %nop9872 = alloca i1, i1 0
+  %nop9873 = alloca i1, i1 0
+  %nop9874 = alloca i1, i1 0
+  %nop9875 = alloca i1, i1 0
+  %nop9876 = alloca i1, i1 0
+  %nop9877 = alloca i1, i1 0
+  %nop9878 = alloca i1, i1 0
+  %nop9879 = alloca i1, i1 0
+  %nop9880 = alloca i1, i1 0
+  %nop9881 = alloca i1, i1 0
+  %nop9882 = alloca i1, i1 0
+  %nop9883 = alloca i1, i1 0
+  %nop9884 = alloca i1, i1 0
+  %nop9885 = alloca i1, i1 0
+  %nop9886 = alloca i1, i1 0
+  %nop9887 = alloca i1, i1 0
+  %nop9888 = alloca i1, i1 0
+  %nop9889 = alloca i1, i1 0
+  %nop9890 = alloca i1, i1 0
+  %nop9891 = alloca i1, i1 0
+  %nop9892 = alloca i1, i1 0
+  %nop9893 = alloca i1, i1 0
+  %nop9894 = alloca i1, i1 0
+  %nop9895 = alloca i1, i1 0
+  %nop9896 = alloca i1, i1 0
+  %nop9897 = alloca i1, i1 0
+  %nop9898 = alloca i1, i1 0
+  %nop9899 = alloca i1, i1 0
+  %nop9900 = alloca i1, i1 0
+  %nop9901 = alloca i1, i1 0
+  %nop9902 = alloca i1, i1 0
+  %nop9903 = alloca i1, i1 0
+  %nop9904 = alloca i1, i1 0
+  %nop9905 = alloca i1, i1 0
+  %nop9906 = alloca i1, i1 0
+  %nop9907 = alloca i1, i1 0
+  %nop9908 = alloca i1, i1 0
+  %nop9909 = alloca i1, i1 0
+  %nop9910 = alloca i1, i1 0
+  %nop9911 = alloca i1, i1 0
+  %nop9912 = alloca i1, i1 0
+  %nop9913 = alloca i1, i1 0
+  %nop9914 = alloca i1, i1 0
+  %nop9915 = alloca i1, i1 0
+  %nop9916 = alloca i1, i1 0
+  %nop9917 = alloca i1, i1 0
+  %nop9918 = alloca i1, i1 0
+  %nop9919 = alloca i1, i1 0
+  %nop9920 = alloca i1, i1 0
+  %nop9921 = alloca i1, i1 0
+  %nop9922 = alloca i1, i1 0
+  %nop9923 = alloca i1, i1 0
+  %nop9924 = alloca i1, i1 0
+  %nop9925 = alloca i1, i1 0
+  %nop9926 = alloca i1, i1 0
+  %nop9927 = alloca i1, i1 0
+  %nop9928 = alloca i1, i1 0
+  %nop9929 = alloca i1, i1 0
+  %nop9930 = alloca i1, i1 0
+  %nop9931 = alloca i1, i1 0
+  %nop9932 = alloca i1, i1 0
+  %nop9933 = alloca i1, i1 0
+  %nop9934 = alloca i1, i1 0
+  %nop9935 = alloca i1, i1 0
+  %nop9936 = alloca i1, i1 0
+  %nop9937 = alloca i1, i1 0
+  %nop9938 = alloca i1, i1 0
+  %nop9939 = alloca i1, i1 0
+  %nop9940 = alloca i1, i1 0
+  %nop9941 = alloca i1, i1 0
+  %nop9942 = alloca i1, i1 0
+  %nop9943 = alloca i1, i1 0
+  %nop9944 = alloca i1, i1 0
+  %nop9945 = alloca i1, i1 0
+  %nop9946 = alloca i1, i1 0
+  %nop9947 = alloca i1, i1 0
+  %nop9948 = alloca i1, i1 0
+  %nop9949 = alloca i1, i1 0
+  %nop9950 = alloca i1, i1 0
+  %nop9951 = alloca i1, i1 0
+  %nop9952 = alloca i1, i1 0
+  %nop9953 = alloca i1, i1 0
+  %nop9954 = alloca i1, i1 0
+  %nop9955 = alloca i1, i1 0
+  %nop9956 = alloca i1, i1 0
+  %nop9957 = alloca i1, i1 0
+  %nop9958 = alloca i1, i1 0
+  %nop9959 = alloca i1, i1 0
+  %nop9960 = alloca i1, i1 0
+  %nop9961 = alloca i1, i1 0
+  %nop9962 = alloca i1, i1 0
+  %nop9963 = alloca i1, i1 0
+  %nop9964 = alloca i1, i1 0
+  %nop9965 = alloca i1, i1 0
+  %nop9966 = alloca i1, i1 0
+  %nop9967 = alloca i1, i1 0
+  %nop9968 = alloca i1, i1 0
+  %nop9969 = alloca i1, i1 0
+  %nop9970 = alloca i1, i1 0
+  %nop9971 = alloca i1, i1 0
+  %nop9972 = alloca i1, i1 0
+  %nop9973 = alloca i1, i1 0
+  %nop9974 = alloca i1, i1 0
+  %nop9975 = alloca i1, i1 0
+  %nop9976 = alloca i1, i1 0
+  %nop9977 = alloca i1, i1 0
+  %nop9978 = alloca i1, i1 0
+  %nop9979 = alloca i1, i1 0
+  %nop9980 = alloca i1, i1 0
+  %nop9981 = alloca i1, i1 0
+  %nop9982 = alloca i1, i1 0
+  %nop9983 = alloca i1, i1 0
+  %nop9984 = alloca i1, i1 0
+  %nop9985 = alloca i1, i1 0
+  %nop9986 = alloca i1, i1 0
+  %nop9987 = alloca i1, i1 0
+  %nop9988 = alloca i1, i1 0
+  %nop9989 = alloca i1, i1 0
+  %nop9990 = alloca i1, i1 0
+  %nop9991 = alloca i1, i1 0
+  %nop9992 = alloca i1, i1 0
+  %nop9993 = alloca i1, i1 0
+  %nop9994 = alloca i1, i1 0
+  %nop9995 = alloca i1, i1 0
+  %nop9996 = alloca i1, i1 0
+  %nop9997 = alloca i1, i1 0
+  %nop9998 = alloca i1, i1 0
+  %nop9999 = alloca i1, i1 0
+  %nop10000 = alloca i1, i1 0
+  %nop10001 = alloca i1, i1 0
+  %nop10002 = alloca i1, i1 0
+  %nop10003 = alloca i1, i1 0
+  %nop10004 = alloca i1, i1 0
+  %nop10005 = alloca i1, i1 0
+  %nop10006 = alloca i1, i1 0
+  %nop10007 = alloca i1, i1 0
+  %nop10008 = alloca i1, i1 0
+  %nop10009 = alloca i1, i1 0
+  %nop10010 = alloca i1, i1 0
+  %nop10011 = alloca i1, i1 0
+  %nop10012 = alloca i1, i1 0
+  %nop10013 = alloca i1, i1 0
+  %nop10014 = alloca i1, i1 0
+  %nop10015 = alloca i1, i1 0
+  %nop10016 = alloca i1, i1 0
+  %nop10017 = alloca i1, i1 0
+  %nop10018 = alloca i1, i1 0
+  %nop10019 = alloca i1, i1 0
+  %nop10020 = alloca i1, i1 0
+  %nop10021 = alloca i1, i1 0
+  %nop10022 = alloca i1, i1 0
+  %nop10023 = alloca i1, i1 0
+  %nop10024 = alloca i1, i1 0
+  %nop10025 = alloca i1, i1 0
+  %nop10026 = alloca i1, i1 0
+  %nop10027 = alloca i1, i1 0
+  %nop10028 = alloca i1, i1 0
+  %nop10029 = alloca i1, i1 0
+  %nop10030 = alloca i1, i1 0
+  %nop10031 = alloca i1, i1 0
+  %nop10032 = alloca i1, i1 0
+  %nop10033 = alloca i1, i1 0
+  %nop10034 = alloca i1, i1 0
+  %nop10035 = alloca i1, i1 0
+  %nop10036 = alloca i1, i1 0
+  %nop10037 = alloca i1, i1 0
+  %nop10038 = alloca i1, i1 0
+  %nop10039 = alloca i1, i1 0
+  %nop10040 = alloca i1, i1 0
+  %nop10041 = alloca i1, i1 0
+  %nop10042 = alloca i1, i1 0
+  %nop10043 = alloca i1, i1 0
+  %nop10044 = alloca i1, i1 0
+  %nop10045 = alloca i1, i1 0
+  %nop10046 = alloca i1, i1 0
+  %nop10047 = alloca i1, i1 0
+  %nop10048 = alloca i1, i1 0
+  %nop10049 = alloca i1, i1 0
+  %nop10050 = alloca i1, i1 0
+  %nop10051 = alloca i1, i1 0
+  %nop10052 = alloca i1, i1 0
+  %nop10053 = alloca i1, i1 0
+  %nop10054 = alloca i1, i1 0
+  %nop10055 = alloca i1, i1 0
+  %nop10056 = alloca i1, i1 0
+  %nop10057 = alloca i1, i1 0
+  %nop10058 = alloca i1, i1 0
+  %nop10059 = alloca i1, i1 0
+  %nop10060 = alloca i1, i1 0
+  %nop10061 = alloca i1, i1 0
+  %nop10062 = alloca i1, i1 0
+  %nop10063 = alloca i1, i1 0
+  %nop10064 = alloca i1, i1 0
+  %nop10065 = alloca i1, i1 0
+  %nop10066 = alloca i1, i1 0
+  %nop10067 = alloca i1, i1 0
+  %nop10068 = alloca i1, i1 0
+  %nop10069 = alloca i1, i1 0
+  %nop10070 = alloca i1, i1 0
+  %nop10071 = alloca i1, i1 0
+  %nop10072 = alloca i1, i1 0
+  %nop10073 = alloca i1, i1 0
+  %nop10074 = alloca i1, i1 0
+  %nop10075 = alloca i1, i1 0
+  %nop10076 = alloca i1, i1 0
+  %nop10077 = alloca i1, i1 0
+  %nop10078 = alloca i1, i1 0
+  %nop10079 = alloca i1, i1 0
+  %nop10080 = alloca i1, i1 0
+  %nop10081 = alloca i1, i1 0
+  %nop10082 = alloca i1, i1 0
+  %nop10083 = alloca i1, i1 0
+  %nop10084 = alloca i1, i1 0
+  %nop10085 = alloca i1, i1 0
+  %nop10086 = alloca i1, i1 0
+  %nop10087 = alloca i1, i1 0
+  %nop10088 = alloca i1, i1 0
+  %nop10089 = alloca i1, i1 0
+  %nop10090 = alloca i1, i1 0
+  %nop10091 = alloca i1, i1 0
+  %nop10092 = alloca i1, i1 0
+  %nop10093 = alloca i1, i1 0
+  %nop10094 = alloca i1, i1 0
+  %nop10095 = alloca i1, i1 0
+  %nop10096 = alloca i1, i1 0
+  %nop10097 = alloca i1, i1 0
+  %nop10098 = alloca i1, i1 0
+  %nop10099 = alloca i1, i1 0
+  %nop10100 = alloca i1, i1 0
+  %nop10101 = alloca i1, i1 0
+  %nop10102 = alloca i1, i1 0
+  %nop10103 = alloca i1, i1 0
+  %nop10104 = alloca i1, i1 0
+  %nop10105 = alloca i1, i1 0
+  %nop10106 = alloca i1, i1 0
+  %nop10107 = alloca i1, i1 0
+  %nop10108 = alloca i1, i1 0
+  %nop10109 = alloca i1, i1 0
+  %nop10110 = alloca i1, i1 0
+  %nop10111 = alloca i1, i1 0
+  %nop10112 = alloca i1, i1 0
+  %nop10113 = alloca i1, i1 0
+  %nop10114 = alloca i1, i1 0
+  %nop10115 = alloca i1, i1 0
+  %nop10116 = alloca i1, i1 0
+  %nop10117 = alloca i1, i1 0
+  %nop10118 = alloca i1, i1 0
+  %nop10119 = alloca i1, i1 0
+  %nop10120 = alloca i1, i1 0
+  %nop10121 = alloca i1, i1 0
+  %nop10122 = alloca i1, i1 0
+  %nop10123 = alloca i1, i1 0
+  %nop10124 = alloca i1, i1 0
+  %nop10125 = alloca i1, i1 0
+  %nop10126 = alloca i1, i1 0
+  %nop10127 = alloca i1, i1 0
+  %nop10128 = alloca i1, i1 0
+  %nop10129 = alloca i1, i1 0
+  %nop10130 = alloca i1, i1 0
+  %nop10131 = alloca i1, i1 0
+  %nop10132 = alloca i1, i1 0
+  %nop10133 = alloca i1, i1 0
+  %nop10134 = alloca i1, i1 0
+  %nop10135 = alloca i1, i1 0
+  %nop10136 = alloca i1, i1 0
+  %nop10137 = alloca i1, i1 0
+  %nop10138 = alloca i1, i1 0
+  %nop10139 = alloca i1, i1 0
+  %nop10140 = alloca i1, i1 0
+  %nop10141 = alloca i1, i1 0
+  %nop10142 = alloca i1, i1 0
+  %nop10143 = alloca i1, i1 0
+  %nop10144 = alloca i1, i1 0
+  %nop10145 = alloca i1, i1 0
+  %nop10146 = alloca i1, i1 0
+  %nop10147 = alloca i1, i1 0
+  %nop10148 = alloca i1, i1 0
+  %nop10149 = alloca i1, i1 0
+  %nop10150 = alloca i1, i1 0
+  %nop10151 = alloca i1, i1 0
+  %nop10152 = alloca i1, i1 0
+  %nop10153 = alloca i1, i1 0
+  %nop10154 = alloca i1, i1 0
+  %nop10155 = alloca i1, i1 0
+  %nop10156 = alloca i1, i1 0
+  %nop10157 = alloca i1, i1 0
+  %nop10158 = alloca i1, i1 0
+  %nop10159 = alloca i1, i1 0
+  %nop10160 = alloca i1, i1 0
+  %nop10161 = alloca i1, i1 0
+  %nop10162 = alloca i1, i1 0
+  %nop10163 = alloca i1, i1 0
+  %nop10164 = alloca i1, i1 0
+  %nop10165 = alloca i1, i1 0
+  %nop10166 = alloca i1, i1 0
+  %nop10167 = alloca i1, i1 0
+  %nop10168 = alloca i1, i1 0
+  %nop10169 = alloca i1, i1 0
+  %nop10170 = alloca i1, i1 0
+  %nop10171 = alloca i1, i1 0
+  %nop10172 = alloca i1, i1 0
+  %nop10173 = alloca i1, i1 0
+  %nop10174 = alloca i1, i1 0
+  %nop10175 = alloca i1, i1 0
+  %nop10176 = alloca i1, i1 0
+  %nop10177 = alloca i1, i1 0
+  %nop10178 = alloca i1, i1 0
+  %nop10179 = alloca i1, i1 0
+  %nop10180 = alloca i1, i1 0
+  %nop10181 = alloca i1, i1 0
+  %nop10182 = alloca i1, i1 0
+  %nop10183 = alloca i1, i1 0
+  %nop10184 = alloca i1, i1 0
+  %nop10185 = alloca i1, i1 0
+  %nop10186 = alloca i1, i1 0
+  %nop10187 = alloca i1, i1 0
+  %nop10188 = alloca i1, i1 0
+  %nop10189 = alloca i1, i1 0
+  %nop10190 = alloca i1, i1 0
+  %nop10191 = alloca i1, i1 0
+  %nop10192 = alloca i1, i1 0
+  %nop10193 = alloca i1, i1 0
+  %nop10194 = alloca i1, i1 0
+  %nop10195 = alloca i1, i1 0
+  %nop10196 = alloca i1, i1 0
+  %nop10197 = alloca i1, i1 0
+  %nop10198 = alloca i1, i1 0
+  %nop10199 = alloca i1, i1 0
+  %nop10200 = alloca i1, i1 0
+  %nop10201 = alloca i1, i1 0
+  %nop10202 = alloca i1, i1 0
+  %nop10203 = alloca i1, i1 0
+  %nop10204 = alloca i1, i1 0
+  %nop10205 = alloca i1, i1 0
+  %nop10206 = alloca i1, i1 0
+  %nop10207 = alloca i1, i1 0
+  %nop10208 = alloca i1, i1 0
+  %nop10209 = alloca i1, i1 0
+  %nop10210 = alloca i1, i1 0
+  %nop10211 = alloca i1, i1 0
+  %nop10212 = alloca i1, i1 0
+  %nop10213 = alloca i1, i1 0
+  %nop10214 = alloca i1, i1 0
+  %nop10215 = alloca i1, i1 0
+  %nop10216 = alloca i1, i1 0
+  %nop10217 = alloca i1, i1 0
+  %nop10218 = alloca i1, i1 0
+  %nop10219 = alloca i1, i1 0
+  %nop10220 = alloca i1, i1 0
+  %nop10221 = alloca i1, i1 0
+  %nop10222 = alloca i1, i1 0
+  %nop10223 = alloca i1, i1 0
+  %nop10224 = alloca i1, i1 0
+  %nop10225 = alloca i1, i1 0
+  %nop10226 = alloca i1, i1 0
+  %nop10227 = alloca i1, i1 0
+  %nop10228 = alloca i1, i1 0
+  %nop10229 = alloca i1, i1 0
+  %nop10230 = alloca i1, i1 0
+  %nop10231 = alloca i1, i1 0
+  %nop10232 = alloca i1, i1 0
+  %nop10233 = alloca i1, i1 0
+  %nop10234 = alloca i1, i1 0
+  %nop10235 = alloca i1, i1 0
+  %nop10236 = alloca i1, i1 0
+  %nop10237 = alloca i1, i1 0
+  %nop10238 = alloca i1, i1 0
+  %nop10239 = alloca i1, i1 0
+  %nop10240 = alloca i1, i1 0
+  %nop10241 = alloca i1, i1 0
+  %nop10242 = alloca i1, i1 0
+  %nop10243 = alloca i1, i1 0
+  %nop10244 = alloca i1, i1 0
+  %nop10245 = alloca i1, i1 0
+  %nop10246 = alloca i1, i1 0
+  %nop10247 = alloca i1, i1 0
+  %nop10248 = alloca i1, i1 0
+  %nop10249 = alloca i1, i1 0
+  %nop10250 = alloca i1, i1 0
+  %nop10251 = alloca i1, i1 0
+  %nop10252 = alloca i1, i1 0
+  %nop10253 = alloca i1, i1 0
+  %nop10254 = alloca i1, i1 0
+  %nop10255 = alloca i1, i1 0
+  %nop10256 = alloca i1, i1 0
+  %nop10257 = alloca i1, i1 0
+  %nop10258 = alloca i1, i1 0
+  %nop10259 = alloca i1, i1 0
+  %nop10260 = alloca i1, i1 0
+  %nop10261 = alloca i1, i1 0
+  %nop10262 = alloca i1, i1 0
+  %nop10263 = alloca i1, i1 0
+  %nop10264 = alloca i1, i1 0
+  %nop10265 = alloca i1, i1 0
+  %nop10266 = alloca i1, i1 0
+  %nop10267 = alloca i1, i1 0
+  %nop10268 = alloca i1, i1 0
+  %nop10269 = alloca i1, i1 0
+  %nop10270 = alloca i1, i1 0
+  %nop10271 = alloca i1, i1 0
+  %nop10272 = alloca i1, i1 0
+  %nop10273 = alloca i1, i1 0
+  %nop10274 = alloca i1, i1 0
+  %nop10275 = alloca i1, i1 0
+  %nop10276 = alloca i1, i1 0
+  %nop10277 = alloca i1, i1 0
+  %nop10278 = alloca i1, i1 0
+  %nop10279 = alloca i1, i1 0
+  %nop10280 = alloca i1, i1 0
+  %nop10281 = alloca i1, i1 0
+  %nop10282 = alloca i1, i1 0
+  %nop10283 = alloca i1, i1 0
+  %nop10284 = alloca i1, i1 0
+  %nop10285 = alloca i1, i1 0
+  %nop10286 = alloca i1, i1 0
+  %nop10287 = alloca i1, i1 0
+  %nop10288 = alloca i1, i1 0
+  %nop10289 = alloca i1, i1 0
+  %nop10290 = alloca i1, i1 0
+  %nop10291 = alloca i1, i1 0
+  %nop10292 = alloca i1, i1 0
+  %nop10293 = alloca i1, i1 0
+  %nop10294 = alloca i1, i1 0
+  %nop10295 = alloca i1, i1 0
+  %nop10296 = alloca i1, i1 0
+  %nop10297 = alloca i1, i1 0
+  %nop10298 = alloca i1, i1 0
+  %nop10299 = alloca i1, i1 0
+  %nop10300 = alloca i1, i1 0
+  %nop10301 = alloca i1, i1 0
+  %nop10302 = alloca i1, i1 0
+  %nop10303 = alloca i1, i1 0
+  %nop10304 = alloca i1, i1 0
+  %nop10305 = alloca i1, i1 0
+  %nop10306 = alloca i1, i1 0
+  %nop10307 = alloca i1, i1 0
+  %nop10308 = alloca i1, i1 0
+  %nop10309 = alloca i1, i1 0
+  %nop10310 = alloca i1, i1 0
+  %nop10311 = alloca i1, i1 0
+  %nop10312 = alloca i1, i1 0
+  %nop10313 = alloca i1, i1 0
+  %nop10314 = alloca i1, i1 0
+  %nop10315 = alloca i1, i1 0
+  %nop10316 = alloca i1, i1 0
+  %nop10317 = alloca i1, i1 0
+  %nop10318 = alloca i1, i1 0
+  %nop10319 = alloca i1, i1 0
+  %nop10320 = alloca i1, i1 0
+  %nop10321 = alloca i1, i1 0
+  %nop10322 = alloca i1, i1 0
+  %nop10323 = alloca i1, i1 0
+  %nop10324 = alloca i1, i1 0
+  %nop10325 = alloca i1, i1 0
+  %nop10326 = alloca i1, i1 0
+  %nop10327 = alloca i1, i1 0
+  %nop10328 = alloca i1, i1 0
+  %nop10329 = alloca i1, i1 0
+  %nop10330 = alloca i1, i1 0
+  %nop10331 = alloca i1, i1 0
+  %nop10332 = alloca i1, i1 0
+  %nop10333 = alloca i1, i1 0
+  %nop10334 = alloca i1, i1 0
+  %nop10335 = alloca i1, i1 0
+  %nop10336 = alloca i1, i1 0
+  %nop10337 = alloca i1, i1 0
+  %nop10338 = alloca i1, i1 0
+  %nop10339 = alloca i1, i1 0
+  %nop10340 = alloca i1, i1 0
+  %nop10341 = alloca i1, i1 0
+  %nop10342 = alloca i1, i1 0
+  %nop10343 = alloca i1, i1 0
+  %nop10344 = alloca i1, i1 0
+  %nop10345 = alloca i1, i1 0
+  %nop10346 = alloca i1, i1 0
+  %nop10347 = alloca i1, i1 0
+  %nop10348 = alloca i1, i1 0
+  %nop10349 = alloca i1, i1 0
+  %nop10350 = alloca i1, i1 0
+  %nop10351 = alloca i1, i1 0
+  %nop10352 = alloca i1, i1 0
+  %nop10353 = alloca i1, i1 0
+  %nop10354 = alloca i1, i1 0
+  %nop10355 = alloca i1, i1 0
+  %nop10356 = alloca i1, i1 0
+  %nop10357 = alloca i1, i1 0
+  %nop10358 = alloca i1, i1 0
+  %nop10359 = alloca i1, i1 0
+  %nop10360 = alloca i1, i1 0
+  %nop10361 = alloca i1, i1 0
+  %nop10362 = alloca i1, i1 0
+  %nop10363 = alloca i1, i1 0
+  %nop10364 = alloca i1, i1 0
+  %nop10365 = alloca i1, i1 0
+  %nop10366 = alloca i1, i1 0
+  %nop10367 = alloca i1, i1 0
+  %nop10368 = alloca i1, i1 0
+  %nop10369 = alloca i1, i1 0
+  %nop10370 = alloca i1, i1 0
+  %nop10371 = alloca i1, i1 0
+  %nop10372 = alloca i1, i1 0
+  %nop10373 = alloca i1, i1 0
+  %nop10374 = alloca i1, i1 0
+  %nop10375 = alloca i1, i1 0
+  %nop10376 = alloca i1, i1 0
+  %nop10377 = alloca i1, i1 0
+  %nop10378 = alloca i1, i1 0
+  %nop10379 = alloca i1, i1 0
+  %nop10380 = alloca i1, i1 0
+  %nop10381 = alloca i1, i1 0
+  %nop10382 = alloca i1, i1 0
+  %nop10383 = alloca i1, i1 0
+  %nop10384 = alloca i1, i1 0
+  %nop10385 = alloca i1, i1 0
+  %nop10386 = alloca i1, i1 0
+  %nop10387 = alloca i1, i1 0
+  %nop10388 = alloca i1, i1 0
+  %nop10389 = alloca i1, i1 0
+  %nop10390 = alloca i1, i1 0
+  %nop10391 = alloca i1, i1 0
+  %nop10392 = alloca i1, i1 0
+  %nop10393 = alloca i1, i1 0
+  %nop10394 = alloca i1, i1 0
+  %nop10395 = alloca i1, i1 0
+  %nop10396 = alloca i1, i1 0
+  %nop10397 = alloca i1, i1 0
+  %nop10398 = alloca i1, i1 0
+  %nop10399 = alloca i1, i1 0
+  %nop10400 = alloca i1, i1 0
+  %nop10401 = alloca i1, i1 0
+  %nop10402 = alloca i1, i1 0
+  %nop10403 = alloca i1, i1 0
+  %nop10404 = alloca i1, i1 0
+  %nop10405 = alloca i1, i1 0
+  %nop10406 = alloca i1, i1 0
+  %nop10407 = alloca i1, i1 0
+  %nop10408 = alloca i1, i1 0
+  %nop10409 = alloca i1, i1 0
+  %nop10410 = alloca i1, i1 0
+  %nop10411 = alloca i1, i1 0
+  %nop10412 = alloca i1, i1 0
+  %nop10413 = alloca i1, i1 0
+  %nop10414 = alloca i1, i1 0
+  %nop10415 = alloca i1, i1 0
+  %nop10416 = alloca i1, i1 0
+  %nop10417 = alloca i1, i1 0
+  %nop10418 = alloca i1, i1 0
+  %nop10419 = alloca i1, i1 0
+  %nop10420 = alloca i1, i1 0
+  %nop10421 = alloca i1, i1 0
+  %nop10422 = alloca i1, i1 0
+  %nop10423 = alloca i1, i1 0
+  %nop10424 = alloca i1, i1 0
+  %nop10425 = alloca i1, i1 0
+  %nop10426 = alloca i1, i1 0
+  %nop10427 = alloca i1, i1 0
+  %nop10428 = alloca i1, i1 0
+  %nop10429 = alloca i1, i1 0
+  %nop10430 = alloca i1, i1 0
+  %nop10431 = alloca i1, i1 0
+  %nop10432 = alloca i1, i1 0
+  %nop10433 = alloca i1, i1 0
+  %nop10434 = alloca i1, i1 0
+  %nop10435 = alloca i1, i1 0
+  %nop10436 = alloca i1, i1 0
+  %nop10437 = alloca i1, i1 0
+  %nop10438 = alloca i1, i1 0
+  %nop10439 = alloca i1, i1 0
+  %nop10440 = alloca i1, i1 0
+  %nop10441 = alloca i1, i1 0
+  %nop10442 = alloca i1, i1 0
+  %nop10443 = alloca i1, i1 0
+  %nop10444 = alloca i1, i1 0
+  %nop10445 = alloca i1, i1 0
+  %nop10446 = alloca i1, i1 0
+  %nop10447 = alloca i1, i1 0
+  %nop10448 = alloca i1, i1 0
+  %nop10449 = alloca i1, i1 0
+  %nop10450 = alloca i1, i1 0
+  %nop10451 = alloca i1, i1 0
+  %nop10452 = alloca i1, i1 0
+  %nop10453 = alloca i1, i1 0
+  %nop10454 = alloca i1, i1 0
+  %nop10455 = alloca i1, i1 0
+  %nop10456 = alloca i1, i1 0
+  %nop10457 = alloca i1, i1 0
+  %nop10458 = alloca i1, i1 0
+  %nop10459 = alloca i1, i1 0
+  %nop10460 = alloca i1, i1 0
+  %nop10461 = alloca i1, i1 0
+  %nop10462 = alloca i1, i1 0
+  %nop10463 = alloca i1, i1 0
+  %nop10464 = alloca i1, i1 0
+  %nop10465 = alloca i1, i1 0
+  %nop10466 = alloca i1, i1 0
+  %nop10467 = alloca i1, i1 0
+  %nop10468 = alloca i1, i1 0
+  %nop10469 = alloca i1, i1 0
+  %nop10470 = alloca i1, i1 0
+  %nop10471 = alloca i1, i1 0
+  %nop10472 = alloca i1, i1 0
+  %nop10473 = alloca i1, i1 0
+  %nop10474 = alloca i1, i1 0
+  %nop10475 = alloca i1, i1 0
+  %nop10476 = alloca i1, i1 0
+  %nop10477 = alloca i1, i1 0
+  %nop10478 = alloca i1, i1 0
+  %nop10479 = alloca i1, i1 0
+  %nop10480 = alloca i1, i1 0
+  %nop10481 = alloca i1, i1 0
+  %nop10482 = alloca i1, i1 0
+  %nop10483 = alloca i1, i1 0
+  %nop10484 = alloca i1, i1 0
+  %nop10485 = alloca i1, i1 0
+  %nop10486 = alloca i1, i1 0
+  %nop10487 = alloca i1, i1 0
+  %nop10488 = alloca i1, i1 0
+  %nop10489 = alloca i1, i1 0
+  %nop10490 = alloca i1, i1 0
+  %nop10491 = alloca i1, i1 0
+  %nop10492 = alloca i1, i1 0
+  %nop10493 = alloca i1, i1 0
+  %nop10494 = alloca i1, i1 0
+  %nop10495 = alloca i1, i1 0
+  %nop10496 = alloca i1, i1 0
+  %nop10497 = alloca i1, i1 0
+  %nop10498 = alloca i1, i1 0
+  %nop10499 = alloca i1, i1 0
+  %nop10500 = alloca i1, i1 0
+  %nop10501 = alloca i1, i1 0
+  %nop10502 = alloca i1, i1 0
+  %nop10503 = alloca i1, i1 0
+  %nop10504 = alloca i1, i1 0
+  %nop10505 = alloca i1, i1 0
+  %nop10506 = alloca i1, i1 0
+  %nop10507 = alloca i1, i1 0
+  %nop10508 = alloca i1, i1 0
+  %nop10509 = alloca i1, i1 0
+  %nop10510 = alloca i1, i1 0
+  %nop10511 = alloca i1, i1 0
+  %nop10512 = alloca i1, i1 0
+  %nop10513 = alloca i1, i1 0
+  %nop10514 = alloca i1, i1 0
+  %nop10515 = alloca i1, i1 0
+  %nop10516 = alloca i1, i1 0
+  %nop10517 = alloca i1, i1 0
+  %nop10518 = alloca i1, i1 0
+  %nop10519 = alloca i1, i1 0
+  %nop10520 = alloca i1, i1 0
+  %nop10521 = alloca i1, i1 0
+  %nop10522 = alloca i1, i1 0
+  %nop10523 = alloca i1, i1 0
+  %nop10524 = alloca i1, i1 0
+  %nop10525 = alloca i1, i1 0
+  %nop10526 = alloca i1, i1 0
+  %nop10527 = alloca i1, i1 0
+  %nop10528 = alloca i1, i1 0
+  %nop10529 = alloca i1, i1 0
+  %nop10530 = alloca i1, i1 0
+  %nop10531 = alloca i1, i1 0
+  %nop10532 = alloca i1, i1 0
+  %nop10533 = alloca i1, i1 0
+  %nop10534 = alloca i1, i1 0
+  %nop10535 = alloca i1, i1 0
+  %nop10536 = alloca i1, i1 0
+  %nop10537 = alloca i1, i1 0
+  %nop10538 = alloca i1, i1 0
+  %nop10539 = alloca i1, i1 0
+  %nop10540 = alloca i1, i1 0
+  %nop10541 = alloca i1, i1 0
+  %nop10542 = alloca i1, i1 0
+  %nop10543 = alloca i1, i1 0
+  %nop10544 = alloca i1, i1 0
+  %nop10545 = alloca i1, i1 0
+  %nop10546 = alloca i1, i1 0
+  %nop10547 = alloca i1, i1 0
+  %nop10548 = alloca i1, i1 0
+  %nop10549 = alloca i1, i1 0
+  %nop10550 = alloca i1, i1 0
+  %nop10551 = alloca i1, i1 0
+  %nop10552 = alloca i1, i1 0
+  %nop10553 = alloca i1, i1 0
+  %nop10554 = alloca i1, i1 0
+  %nop10555 = alloca i1, i1 0
+  %nop10556 = alloca i1, i1 0
+  %nop10557 = alloca i1, i1 0
+  %nop10558 = alloca i1, i1 0
+  %nop10559 = alloca i1, i1 0
+  %nop10560 = alloca i1, i1 0
+  %nop10561 = alloca i1, i1 0
+  %nop10562 = alloca i1, i1 0
+  %nop10563 = alloca i1, i1 0
+  %nop10564 = alloca i1, i1 0
+  %nop10565 = alloca i1, i1 0
+  %nop10566 = alloca i1, i1 0
+  %nop10567 = alloca i1, i1 0
+  %nop10568 = alloca i1, i1 0
+  %nop10569 = alloca i1, i1 0
+  %nop10570 = alloca i1, i1 0
+  %nop10571 = alloca i1, i1 0
+  %nop10572 = alloca i1, i1 0
+  %nop10573 = alloca i1, i1 0
+  %nop10574 = alloca i1, i1 0
+  %nop10575 = alloca i1, i1 0
+  %nop10576 = alloca i1, i1 0
+  %nop10577 = alloca i1, i1 0
+  %nop10578 = alloca i1, i1 0
+  %nop10579 = alloca i1, i1 0
+  %nop10580 = alloca i1, i1 0
+  %nop10581 = alloca i1, i1 0
+  %nop10582 = alloca i1, i1 0
+  %nop10583 = alloca i1, i1 0
+  %nop10584 = alloca i1, i1 0
+  %nop10585 = alloca i1, i1 0
+  %nop10586 = alloca i1, i1 0
+  %nop10587 = alloca i1, i1 0
+  %nop10588 = alloca i1, i1 0
+  %nop10589 = alloca i1, i1 0
+  %nop10590 = alloca i1, i1 0
+  %nop10591 = alloca i1, i1 0
+  %nop10592 = alloca i1, i1 0
+  %nop10593 = alloca i1, i1 0
+  %nop10594 = alloca i1, i1 0
+  %nop10595 = alloca i1, i1 0
+  %nop10596 = alloca i1, i1 0
+  %nop10597 = alloca i1, i1 0
+  %nop10598 = alloca i1, i1 0
+  %nop10599 = alloca i1, i1 0
+  %nop10600 = alloca i1, i1 0
+  %nop10601 = alloca i1, i1 0
+  %nop10602 = alloca i1, i1 0
+  %nop10603 = alloca i1, i1 0
+  %nop10604 = alloca i1, i1 0
+  %nop10605 = alloca i1, i1 0
+  %nop10606 = alloca i1, i1 0
+  %nop10607 = alloca i1, i1 0
+  %nop10608 = alloca i1, i1 0
+  %nop10609 = alloca i1, i1 0
+  %nop10610 = alloca i1, i1 0
+  %nop10611 = alloca i1, i1 0
+  %nop10612 = alloca i1, i1 0
+  %nop10613 = alloca i1, i1 0
+  %nop10614 = alloca i1, i1 0
+  %nop10615 = alloca i1, i1 0
+  %nop10616 = alloca i1, i1 0
+  %nop10617 = alloca i1, i1 0
+  %nop10618 = alloca i1, i1 0
+  %nop10619 = alloca i1, i1 0
+  %nop10620 = alloca i1, i1 0
+  %nop10621 = alloca i1, i1 0
+  %nop10622 = alloca i1, i1 0
+  %nop10623 = alloca i1, i1 0
+  %nop10624 = alloca i1, i1 0
+  %nop10625 = alloca i1, i1 0
+  %nop10626 = alloca i1, i1 0
+  %nop10627 = alloca i1, i1 0
+  %nop10628 = alloca i1, i1 0
+  %nop10629 = alloca i1, i1 0
+  %nop10630 = alloca i1, i1 0
+  %nop10631 = alloca i1, i1 0
+  %nop10632 = alloca i1, i1 0
+  %nop10633 = alloca i1, i1 0
+  %nop10634 = alloca i1, i1 0
+  %nop10635 = alloca i1, i1 0
+  %nop10636 = alloca i1, i1 0
+  %nop10637 = alloca i1, i1 0
+  %nop10638 = alloca i1, i1 0
+  %nop10639 = alloca i1, i1 0
+  %nop10640 = alloca i1, i1 0
+  %nop10641 = alloca i1, i1 0
+  %nop10642 = alloca i1, i1 0
+  %nop10643 = alloca i1, i1 0
+  %nop10644 = alloca i1, i1 0
+  %nop10645 = alloca i1, i1 0
+  %nop10646 = alloca i1, i1 0
+  %nop10647 = alloca i1, i1 0
+  %nop10648 = alloca i1, i1 0
+  %nop10649 = alloca i1, i1 0
+  %nop10650 = alloca i1, i1 0
+  %nop10651 = alloca i1, i1 0
+  %nop10652 = alloca i1, i1 0
+  %nop10653 = alloca i1, i1 0
+  %nop10654 = alloca i1, i1 0
+  %nop10655 = alloca i1, i1 0
+  %nop10656 = alloca i1, i1 0
+  %nop10657 = alloca i1, i1 0
+  %nop10658 = alloca i1, i1 0
+  %nop10659 = alloca i1, i1 0
+  %nop10660 = alloca i1, i1 0
+  %nop10661 = alloca i1, i1 0
+  %nop10662 = alloca i1, i1 0
+  %nop10663 = alloca i1, i1 0
+  %nop10664 = alloca i1, i1 0
+  %nop10665 = alloca i1, i1 0
+  %nop10666 = alloca i1, i1 0
+  %nop10667 = alloca i1, i1 0
+  %nop10668 = alloca i1, i1 0
+  %nop10669 = alloca i1, i1 0
+  %nop10670 = alloca i1, i1 0
+  %nop10671 = alloca i1, i1 0
+  %nop10672 = alloca i1, i1 0
+  %nop10673 = alloca i1, i1 0
+  %nop10674 = alloca i1, i1 0
+  %nop10675 = alloca i1, i1 0
+  %nop10676 = alloca i1, i1 0
+  %nop10677 = alloca i1, i1 0
+  %nop10678 = alloca i1, i1 0
+  %nop10679 = alloca i1, i1 0
+  %nop10680 = alloca i1, i1 0
+  %nop10681 = alloca i1, i1 0
+  %nop10682 = alloca i1, i1 0
+  %nop10683 = alloca i1, i1 0
+  %nop10684 = alloca i1, i1 0
+  %nop10685 = alloca i1, i1 0
+  %nop10686 = alloca i1, i1 0
+  %nop10687 = alloca i1, i1 0
+  %nop10688 = alloca i1, i1 0
+  %nop10689 = alloca i1, i1 0
+  %nop10690 = alloca i1, i1 0
+  %nop10691 = alloca i1, i1 0
+  %nop10692 = alloca i1, i1 0
+  %nop10693 = alloca i1, i1 0
+  %nop10694 = alloca i1, i1 0
+  %nop10695 = alloca i1, i1 0
+  %nop10696 = alloca i1, i1 0
+  %nop10697 = alloca i1, i1 0
+  %nop10698 = alloca i1, i1 0
+  %nop10699 = alloca i1, i1 0
+  %nop10700 = alloca i1, i1 0
+  %nop10701 = alloca i1, i1 0
+  %nop10702 = alloca i1, i1 0
+  %nop10703 = alloca i1, i1 0
+  %nop10704 = alloca i1, i1 0
+  %nop10705 = alloca i1, i1 0
+  %nop10706 = alloca i1, i1 0
+  %nop10707 = alloca i1, i1 0
+  %nop10708 = alloca i1, i1 0
+  %nop10709 = alloca i1, i1 0
+  %nop10710 = alloca i1, i1 0
+  %nop10711 = alloca i1, i1 0
+  %nop10712 = alloca i1, i1 0
+  %nop10713 = alloca i1, i1 0
+  %nop10714 = alloca i1, i1 0
+  %nop10715 = alloca i1, i1 0
+  %nop10716 = alloca i1, i1 0
+  %nop10717 = alloca i1, i1 0
+  %nop10718 = alloca i1, i1 0
+  %nop10719 = alloca i1, i1 0
+  %nop10720 = alloca i1, i1 0
+  %nop10721 = alloca i1, i1 0
+  %nop10722 = alloca i1, i1 0
+  %nop10723 = alloca i1, i1 0
+  %nop10724 = alloca i1, i1 0
+  %nop10725 = alloca i1, i1 0
+  %nop10726 = alloca i1, i1 0
+  %nop10727 = alloca i1, i1 0
+  %nop10728 = alloca i1, i1 0
+  %nop10729 = alloca i1, i1 0
+  %nop10730 = alloca i1, i1 0
+  %nop10731 = alloca i1, i1 0
+  %nop10732 = alloca i1, i1 0
+  %nop10733 = alloca i1, i1 0
+  %nop10734 = alloca i1, i1 0
+  %nop10735 = alloca i1, i1 0
+  %nop10736 = alloca i1, i1 0
+  %nop10737 = alloca i1, i1 0
+  %nop10738 = alloca i1, i1 0
+  %nop10739 = alloca i1, i1 0
+  %nop10740 = alloca i1, i1 0
+  %nop10741 = alloca i1, i1 0
+  %nop10742 = alloca i1, i1 0
+  %nop10743 = alloca i1, i1 0
+  %nop10744 = alloca i1, i1 0
+  %nop10745 = alloca i1, i1 0
+  %nop10746 = alloca i1, i1 0
+  %nop10747 = alloca i1, i1 0
+  %nop10748 = alloca i1, i1 0
+  %nop10749 = alloca i1, i1 0
+  %nop10750 = alloca i1, i1 0
+  %nop10751 = alloca i1, i1 0
+  %nop10752 = alloca i1, i1 0
+  %nop10753 = alloca i1, i1 0
+  %nop10754 = alloca i1, i1 0
+  %nop10755 = alloca i1, i1 0
+  %nop10756 = alloca i1, i1 0
+  %nop10757 = alloca i1, i1 0
+  %nop10758 = alloca i1, i1 0
+  %nop10759 = alloca i1, i1 0
+  %nop10760 = alloca i1, i1 0
+  %nop10761 = alloca i1, i1 0
+  %nop10762 = alloca i1, i1 0
+  %nop10763 = alloca i1, i1 0
+  %nop10764 = alloca i1, i1 0
+  %nop10765 = alloca i1, i1 0
+  %nop10766 = alloca i1, i1 0
+  %nop10767 = alloca i1, i1 0
+  %nop10768 = alloca i1, i1 0
+  %nop10769 = alloca i1, i1 0
+  %nop10770 = alloca i1, i1 0
+  %nop10771 = alloca i1, i1 0
+  %nop10772 = alloca i1, i1 0
+  %nop10773 = alloca i1, i1 0
+  %nop10774 = alloca i1, i1 0
+  %nop10775 = alloca i1, i1 0
+  %nop10776 = alloca i1, i1 0
+  %nop10777 = alloca i1, i1 0
+  %nop10778 = alloca i1, i1 0
+  %nop10779 = alloca i1, i1 0
+  %nop10780 = alloca i1, i1 0
+  %nop10781 = alloca i1, i1 0
+  %nop10782 = alloca i1, i1 0
+  %nop10783 = alloca i1, i1 0
+  %nop10784 = alloca i1, i1 0
+  %nop10785 = alloca i1, i1 0
+  %nop10786 = alloca i1, i1 0
+  %nop10787 = alloca i1, i1 0
+  %nop10788 = alloca i1, i1 0
+  %nop10789 = alloca i1, i1 0
+  %nop10790 = alloca i1, i1 0
+  %nop10791 = alloca i1, i1 0
+  %nop10792 = alloca i1, i1 0
+  %nop10793 = alloca i1, i1 0
+  %nop10794 = alloca i1, i1 0
+  %nop10795 = alloca i1, i1 0
+  %nop10796 = alloca i1, i1 0
+  %nop10797 = alloca i1, i1 0
+  %nop10798 = alloca i1, i1 0
+  %nop10799 = alloca i1, i1 0
+  %nop10800 = alloca i1, i1 0
+  %nop10801 = alloca i1, i1 0
+  %nop10802 = alloca i1, i1 0
+  %nop10803 = alloca i1, i1 0
+  %nop10804 = alloca i1, i1 0
+  %nop10805 = alloca i1, i1 0
+  %nop10806 = alloca i1, i1 0
+  %nop10807 = alloca i1, i1 0
+  %nop10808 = alloca i1, i1 0
+  %nop10809 = alloca i1, i1 0
+  %nop10810 = alloca i1, i1 0
+  %nop10811 = alloca i1, i1 0
+  %nop10812 = alloca i1, i1 0
+  %nop10813 = alloca i1, i1 0
+  %nop10814 = alloca i1, i1 0
+  %nop10815 = alloca i1, i1 0
+  %nop10816 = alloca i1, i1 0
+  %nop10817 = alloca i1, i1 0
+  %nop10818 = alloca i1, i1 0
+  %nop10819 = alloca i1, i1 0
+  %nop10820 = alloca i1, i1 0
+  %nop10821 = alloca i1, i1 0
+  %nop10822 = alloca i1, i1 0
+  %nop10823 = alloca i1, i1 0
+  %nop10824 = alloca i1, i1 0
+  %nop10825 = alloca i1, i1 0
+  %nop10826 = alloca i1, i1 0
+  %nop10827 = alloca i1, i1 0
+  %nop10828 = alloca i1, i1 0
+  %nop10829 = alloca i1, i1 0
+  %nop10830 = alloca i1, i1 0
+  %nop10831 = alloca i1, i1 0
+  %nop10832 = alloca i1, i1 0
+  %nop10833 = alloca i1, i1 0
+  %nop10834 = alloca i1, i1 0
+  %nop10835 = alloca i1, i1 0
+  %nop10836 = alloca i1, i1 0
+  %nop10837 = alloca i1, i1 0
+  %nop10838 = alloca i1, i1 0
+  %nop10839 = alloca i1, i1 0
+  %nop10840 = alloca i1, i1 0
+  %nop10841 = alloca i1, i1 0
+  %nop10842 = alloca i1, i1 0
+  %nop10843 = alloca i1, i1 0
+  %nop10844 = alloca i1, i1 0
+  %nop10845 = alloca i1, i1 0
+  %nop10846 = alloca i1, i1 0
+  %nop10847 = alloca i1, i1 0
+  %nop10848 = alloca i1, i1 0
+  %nop10849 = alloca i1, i1 0
+  %nop10850 = alloca i1, i1 0
+  %nop10851 = alloca i1, i1 0
+  %nop10852 = alloca i1, i1 0
+  %nop10853 = alloca i1, i1 0
+  %nop10854 = alloca i1, i1 0
+  %nop10855 = alloca i1, i1 0
+  %nop10856 = alloca i1, i1 0
+  %nop10857 = alloca i1, i1 0
+  %nop10858 = alloca i1, i1 0
+  %nop10859 = alloca i1, i1 0
+  %nop10860 = alloca i1, i1 0
+  %nop10861 = alloca i1, i1 0
+  %nop10862 = alloca i1, i1 0
+  %nop10863 = alloca i1, i1 0
+  %nop10864 = alloca i1, i1 0
+  %nop10865 = alloca i1, i1 0
+  %nop10866 = alloca i1, i1 0
+  %nop10867 = alloca i1, i1 0
+  %nop10868 = alloca i1, i1 0
+  %nop10869 = alloca i1, i1 0
+  %nop10870 = alloca i1, i1 0
+  %nop10871 = alloca i1, i1 0
+  %nop10872 = alloca i1, i1 0
+  %nop10873 = alloca i1, i1 0
+  %nop10874 = alloca i1, i1 0
+  %nop10875 = alloca i1, i1 0
+  %nop10876 = alloca i1, i1 0
+  %nop10877 = alloca i1, i1 0
+  %nop10878 = alloca i1, i1 0
+  %nop10879 = alloca i1, i1 0
+  %nop10880 = alloca i1, i1 0
+  %nop10881 = alloca i1, i1 0
+  %nop10882 = alloca i1, i1 0
+  %nop10883 = alloca i1, i1 0
+  %nop10884 = alloca i1, i1 0
+  %nop10885 = alloca i1, i1 0
+  %nop10886 = alloca i1, i1 0
+  %nop10887 = alloca i1, i1 0
+  %nop10888 = alloca i1, i1 0
+  %nop10889 = alloca i1, i1 0
+  %nop10890 = alloca i1, i1 0
+  %nop10891 = alloca i1, i1 0
+  %nop10892 = alloca i1, i1 0
+  %nop10893 = alloca i1, i1 0
+  %nop10894 = alloca i1, i1 0
+  %nop10895 = alloca i1, i1 0
+  %nop10896 = alloca i1, i1 0
+  %nop10897 = alloca i1, i1 0
+  %nop10898 = alloca i1, i1 0
+  %nop10899 = alloca i1, i1 0
+  %nop10900 = alloca i1, i1 0
+  %nop10901 = alloca i1, i1 0
+  %nop10902 = alloca i1, i1 0
+  %nop10903 = alloca i1, i1 0
+  %nop10904 = alloca i1, i1 0
+  %nop10905 = alloca i1, i1 0
+  %nop10906 = alloca i1, i1 0
+  %nop10907 = alloca i1, i1 0
+  %nop10908 = alloca i1, i1 0
+  %nop10909 = alloca i1, i1 0
+  %nop10910 = alloca i1, i1 0
+  %nop10911 = alloca i1, i1 0
+  %nop10912 = alloca i1, i1 0
+  %nop10913 = alloca i1, i1 0
+  %nop10914 = alloca i1, i1 0
+  %nop10915 = alloca i1, i1 0
+  %nop10916 = alloca i1, i1 0
+  %nop10917 = alloca i1, i1 0
+  %nop10918 = alloca i1, i1 0
+  %nop10919 = alloca i1, i1 0
+  %nop10920 = alloca i1, i1 0
+  %nop10921 = alloca i1, i1 0
+  %nop10922 = alloca i1, i1 0
+  %nop10923 = alloca i1, i1 0
+  %nop10924 = alloca i1, i1 0
+  %nop10925 = alloca i1, i1 0
+  %nop10926 = alloca i1, i1 0
+  %nop10927 = alloca i1, i1 0
+  %nop10928 = alloca i1, i1 0
+  %nop10929 = alloca i1, i1 0
+  %nop10930 = alloca i1, i1 0
+  %nop10931 = alloca i1, i1 0
+  %nop10932 = alloca i1, i1 0
+  %nop10933 = alloca i1, i1 0
+  %nop10934 = alloca i1, i1 0
+  %nop10935 = alloca i1, i1 0
+  %nop10936 = alloca i1, i1 0
+  %nop10937 = alloca i1, i1 0
+  %nop10938 = alloca i1, i1 0
+  %nop10939 = alloca i1, i1 0
+  %nop10940 = alloca i1, i1 0
+  %nop10941 = alloca i1, i1 0
+  %nop10942 = alloca i1, i1 0
+  %nop10943 = alloca i1, i1 0
+  %nop10944 = alloca i1, i1 0
+  %nop10945 = alloca i1, i1 0
+  %nop10946 = alloca i1, i1 0
+  %nop10947 = alloca i1, i1 0
+  %nop10948 = alloca i1, i1 0
+  %nop10949 = alloca i1, i1 0
+  %nop10950 = alloca i1, i1 0
+  %nop10951 = alloca i1, i1 0
+  %nop10952 = alloca i1, i1 0
+  %nop10953 = alloca i1, i1 0
+  %nop10954 = alloca i1, i1 0
+  %nop10955 = alloca i1, i1 0
+  %nop10956 = alloca i1, i1 0
+  %nop10957 = alloca i1, i1 0
+  %nop10958 = alloca i1, i1 0
+  %nop10959 = alloca i1, i1 0
+  %nop10960 = alloca i1, i1 0
+  %nop10961 = alloca i1, i1 0
+  %nop10962 = alloca i1, i1 0
+  %nop10963 = alloca i1, i1 0
+  %nop10964 = alloca i1, i1 0
+  %nop10965 = alloca i1, i1 0
+  %nop10966 = alloca i1, i1 0
+  %nop10967 = alloca i1, i1 0
+  %nop10968 = alloca i1, i1 0
+  %nop10969 = alloca i1, i1 0
+  %nop10970 = alloca i1, i1 0
+  %nop10971 = alloca i1, i1 0
+  %nop10972 = alloca i1, i1 0
+  %nop10973 = alloca i1, i1 0
+  %nop10974 = alloca i1, i1 0
+  %nop10975 = alloca i1, i1 0
+  %nop10976 = alloca i1, i1 0
+  %nop10977 = alloca i1, i1 0
+  %nop10978 = alloca i1, i1 0
+  %nop10979 = alloca i1, i1 0
+  %nop10980 = alloca i1, i1 0
+  %nop10981 = alloca i1, i1 0
+  %nop10982 = alloca i1, i1 0
+  %nop10983 = alloca i1, i1 0
+  %nop10984 = alloca i1, i1 0
+  %nop10985 = alloca i1, i1 0
+  %nop10986 = alloca i1, i1 0
+  %nop10987 = alloca i1, i1 0
+  %nop10988 = alloca i1, i1 0
+  %nop10989 = alloca i1, i1 0
+  %nop10990 = alloca i1, i1 0
+  %nop10991 = alloca i1, i1 0
+  %nop10992 = alloca i1, i1 0
+  %nop10993 = alloca i1, i1 0
+  %nop10994 = alloca i1, i1 0
+  %nop10995 = alloca i1, i1 0
+  %nop10996 = alloca i1, i1 0
+  %nop10997 = alloca i1, i1 0
+  %nop10998 = alloca i1, i1 0
+  %nop10999 = alloca i1, i1 0
+  %nop11000 = alloca i1, i1 0
+  %nop11001 = alloca i1, i1 0
+  %nop11002 = alloca i1, i1 0
+  %nop11003 = alloca i1, i1 0
+  %nop11004 = alloca i1, i1 0
+  %nop11005 = alloca i1, i1 0
+  %nop11006 = alloca i1, i1 0
+  %nop11007 = alloca i1, i1 0
+  %nop11008 = alloca i1, i1 0
+  %nop11009 = alloca i1, i1 0
+  %nop11010 = alloca i1, i1 0
+  %nop11011 = alloca i1, i1 0
+  %nop11012 = alloca i1, i1 0
+  %nop11013 = alloca i1, i1 0
+  %nop11014 = alloca i1, i1 0
+  %nop11015 = alloca i1, i1 0
+  %nop11016 = alloca i1, i1 0
+  %nop11017 = alloca i1, i1 0
+  %nop11018 = alloca i1, i1 0
+  %nop11019 = alloca i1, i1 0
+  %nop11020 = alloca i1, i1 0
+  %nop11021 = alloca i1, i1 0
+  %nop11022 = alloca i1, i1 0
+  %nop11023 = alloca i1, i1 0
+  %nop11024 = alloca i1, i1 0
+  %nop11025 = alloca i1, i1 0
+  %nop11026 = alloca i1, i1 0
+  %nop11027 = alloca i1, i1 0
+  %nop11028 = alloca i1, i1 0
+  %nop11029 = alloca i1, i1 0
+  %nop11030 = alloca i1, i1 0
+  %nop11031 = alloca i1, i1 0
+  %nop11032 = alloca i1, i1 0
+  %nop11033 = alloca i1, i1 0
+  %nop11034 = alloca i1, i1 0
+  %nop11035 = alloca i1, i1 0
+  %nop11036 = alloca i1, i1 0
+  %nop11037 = alloca i1, i1 0
+  %nop11038 = alloca i1, i1 0
+  %nop11039 = alloca i1, i1 0
+  %nop11040 = alloca i1, i1 0
+  %nop11041 = alloca i1, i1 0
+  %nop11042 = alloca i1, i1 0
+  %nop11043 = alloca i1, i1 0
+  %nop11044 = alloca i1, i1 0
+  %nop11045 = alloca i1, i1 0
+  %nop11046 = alloca i1, i1 0
+  %nop11047 = alloca i1, i1 0
+  %nop11048 = alloca i1, i1 0
+  %nop11049 = alloca i1, i1 0
+  %nop11050 = alloca i1, i1 0
+  %nop11051 = alloca i1, i1 0
+  %nop11052 = alloca i1, i1 0
+  %nop11053 = alloca i1, i1 0
+  %nop11054 = alloca i1, i1 0
+  %nop11055 = alloca i1, i1 0
+  %nop11056 = alloca i1, i1 0
+  %nop11057 = alloca i1, i1 0
+  %nop11058 = alloca i1, i1 0
+  %nop11059 = alloca i1, i1 0
+  %nop11060 = alloca i1, i1 0
+  %nop11061 = alloca i1, i1 0
+  %nop11062 = alloca i1, i1 0
+  %nop11063 = alloca i1, i1 0
+  %nop11064 = alloca i1, i1 0
+  %nop11065 = alloca i1, i1 0
+  %nop11066 = alloca i1, i1 0
+  %nop11067 = alloca i1, i1 0
+  %nop11068 = alloca i1, i1 0
+  %nop11069 = alloca i1, i1 0
+  %nop11070 = alloca i1, i1 0
+  %nop11071 = alloca i1, i1 0
+  %nop11072 = alloca i1, i1 0
+  %nop11073 = alloca i1, i1 0
+  %nop11074 = alloca i1, i1 0
+  %nop11075 = alloca i1, i1 0
+  %nop11076 = alloca i1, i1 0
+  %nop11077 = alloca i1, i1 0
+  %nop11078 = alloca i1, i1 0
+  %nop11079 = alloca i1, i1 0
+  %nop11080 = alloca i1, i1 0
+  %nop11081 = alloca i1, i1 0
+  %nop11082 = alloca i1, i1 0
+  %nop11083 = alloca i1, i1 0
+  %nop11084 = alloca i1, i1 0
+  %nop11085 = alloca i1, i1 0
+  %nop11086 = alloca i1, i1 0
+  %nop11087 = alloca i1, i1 0
+  %nop11088 = alloca i1, i1 0
+  %nop11089 = alloca i1, i1 0
+  %nop11090 = alloca i1, i1 0
+  %nop11091 = alloca i1, i1 0
+  %nop11092 = alloca i1, i1 0
+  %nop11093 = alloca i1, i1 0
+  %nop11094 = alloca i1, i1 0
+  %nop11095 = alloca i1, i1 0
+  %nop11096 = alloca i1, i1 0
+  %nop11097 = alloca i1, i1 0
+  %nop11098 = alloca i1, i1 0
+  %nop11099 = alloca i1, i1 0
+  %nop11100 = alloca i1, i1 0
+  %nop11101 = alloca i1, i1 0
+  %nop11102 = alloca i1, i1 0
+  %nop11103 = alloca i1, i1 0
+  %nop11104 = alloca i1, i1 0
+  %nop11105 = alloca i1, i1 0
+  %nop11106 = alloca i1, i1 0
+  %nop11107 = alloca i1, i1 0
+  %nop11108 = alloca i1, i1 0
+  %nop11109 = alloca i1, i1 0
+  %nop11110 = alloca i1, i1 0
+  %nop11111 = alloca i1, i1 0
+  %nop11112 = alloca i1, i1 0
+  %nop11113 = alloca i1, i1 0
+  %nop11114 = alloca i1, i1 0
+  %nop11115 = alloca i1, i1 0
+  %nop11116 = alloca i1, i1 0
+  %nop11117 = alloca i1, i1 0
+  %nop11118 = alloca i1, i1 0
+  %nop11119 = alloca i1, i1 0
+  %nop11120 = alloca i1, i1 0
+  %nop11121 = alloca i1, i1 0
+  %nop11122 = alloca i1, i1 0
+  %nop11123 = alloca i1, i1 0
+  %nop11124 = alloca i1, i1 0
+  %nop11125 = alloca i1, i1 0
+  %nop11126 = alloca i1, i1 0
+  %nop11127 = alloca i1, i1 0
+  %nop11128 = alloca i1, i1 0
+  %nop11129 = alloca i1, i1 0
+  %nop11130 = alloca i1, i1 0
+  %nop11131 = alloca i1, i1 0
+  %nop11132 = alloca i1, i1 0
+  %nop11133 = alloca i1, i1 0
+  %nop11134 = alloca i1, i1 0
+  %nop11135 = alloca i1, i1 0
+  %nop11136 = alloca i1, i1 0
+  %nop11137 = alloca i1, i1 0
+  %nop11138 = alloca i1, i1 0
+  %nop11139 = alloca i1, i1 0
+  %nop11140 = alloca i1, i1 0
+  %nop11141 = alloca i1, i1 0
+  %nop11142 = alloca i1, i1 0
+  %nop11143 = alloca i1, i1 0
+  %nop11144 = alloca i1, i1 0
+  %nop11145 = alloca i1, i1 0
+  %nop11146 = alloca i1, i1 0
+  %nop11147 = alloca i1, i1 0
+  %nop11148 = alloca i1, i1 0
+  %nop11149 = alloca i1, i1 0
+  %nop11150 = alloca i1, i1 0
+  %nop11151 = alloca i1, i1 0
+  %nop11152 = alloca i1, i1 0
+  %nop11153 = alloca i1, i1 0
+  %nop11154 = alloca i1, i1 0
+  %nop11155 = alloca i1, i1 0
+  %nop11156 = alloca i1, i1 0
+  %nop11157 = alloca i1, i1 0
+  %nop11158 = alloca i1, i1 0
+  %nop11159 = alloca i1, i1 0
+  %nop11160 = alloca i1, i1 0
+  %nop11161 = alloca i1, i1 0
+  %nop11162 = alloca i1, i1 0
+  %nop11163 = alloca i1, i1 0
+  %nop11164 = alloca i1, i1 0
+  %nop11165 = alloca i1, i1 0
+  %nop11166 = alloca i1, i1 0
+  %nop11167 = alloca i1, i1 0
+  %nop11168 = alloca i1, i1 0
+  %nop11169 = alloca i1, i1 0
+  %nop11170 = alloca i1, i1 0
+  %nop11171 = alloca i1, i1 0
+  %nop11172 = alloca i1, i1 0
+  %nop11173 = alloca i1, i1 0
+  %nop11174 = alloca i1, i1 0
+  %nop11175 = alloca i1, i1 0
+  %nop11176 = alloca i1, i1 0
+  %nop11177 = alloca i1, i1 0
+  %nop11178 = alloca i1, i1 0
+  %nop11179 = alloca i1, i1 0
+  %nop11180 = alloca i1, i1 0
+  %nop11181 = alloca i1, i1 0
+  %nop11182 = alloca i1, i1 0
+  %nop11183 = alloca i1, i1 0
+  %nop11184 = alloca i1, i1 0
+  %nop11185 = alloca i1, i1 0
+  %nop11186 = alloca i1, i1 0
+  %nop11187 = alloca i1, i1 0
+  %nop11188 = alloca i1, i1 0
+  %nop11189 = alloca i1, i1 0
+  %nop11190 = alloca i1, i1 0
+  %nop11191 = alloca i1, i1 0
+  %nop11192 = alloca i1, i1 0
+  %nop11193 = alloca i1, i1 0
+  %nop11194 = alloca i1, i1 0
+  %nop11195 = alloca i1, i1 0
+  %nop11196 = alloca i1, i1 0
+  %nop11197 = alloca i1, i1 0
+  %nop11198 = alloca i1, i1 0
+  %nop11199 = alloca i1, i1 0
+  %nop11200 = alloca i1, i1 0
+  %nop11201 = alloca i1, i1 0
+  %nop11202 = alloca i1, i1 0
+  %nop11203 = alloca i1, i1 0
+  %nop11204 = alloca i1, i1 0
+  %nop11205 = alloca i1, i1 0
+  %nop11206 = alloca i1, i1 0
+  %nop11207 = alloca i1, i1 0
+  %nop11208 = alloca i1, i1 0
+  %nop11209 = alloca i1, i1 0
+  %nop11210 = alloca i1, i1 0
+  %nop11211 = alloca i1, i1 0
+  %nop11212 = alloca i1, i1 0
+  %nop11213 = alloca i1, i1 0
+  %nop11214 = alloca i1, i1 0
+  %nop11215 = alloca i1, i1 0
+  %nop11216 = alloca i1, i1 0
+  %nop11217 = alloca i1, i1 0
+  %nop11218 = alloca i1, i1 0
+  %nop11219 = alloca i1, i1 0
+  %nop11220 = alloca i1, i1 0
+  %nop11221 = alloca i1, i1 0
+  %nop11222 = alloca i1, i1 0
+  %nop11223 = alloca i1, i1 0
+  %nop11224 = alloca i1, i1 0
+  %nop11225 = alloca i1, i1 0
+  %nop11226 = alloca i1, i1 0
+  %nop11227 = alloca i1, i1 0
+  %nop11228 = alloca i1, i1 0
+  %nop11229 = alloca i1, i1 0
+  %nop11230 = alloca i1, i1 0
+  %nop11231 = alloca i1, i1 0
+  %nop11232 = alloca i1, i1 0
+  %nop11233 = alloca i1, i1 0
+  %nop11234 = alloca i1, i1 0
+  %nop11235 = alloca i1, i1 0
+  %nop11236 = alloca i1, i1 0
+  %nop11237 = alloca i1, i1 0
+  %nop11238 = alloca i1, i1 0
+  %nop11239 = alloca i1, i1 0
+  %nop11240 = alloca i1, i1 0
+  %nop11241 = alloca i1, i1 0
+  %nop11242 = alloca i1, i1 0
+  %nop11243 = alloca i1, i1 0
+  %nop11244 = alloca i1, i1 0
+  %nop11245 = alloca i1, i1 0
+  %nop11246 = alloca i1, i1 0
+  %nop11247 = alloca i1, i1 0
+  %nop11248 = alloca i1, i1 0
+  %nop11249 = alloca i1, i1 0
+  %nop11250 = alloca i1, i1 0
+  %nop11251 = alloca i1, i1 0
+  %nop11252 = alloca i1, i1 0
+  %nop11253 = alloca i1, i1 0
+  %nop11254 = alloca i1, i1 0
+  %nop11255 = alloca i1, i1 0
+  %nop11256 = alloca i1, i1 0
+  %nop11257 = alloca i1, i1 0
+  %nop11258 = alloca i1, i1 0
+  %nop11259 = alloca i1, i1 0
+  %nop11260 = alloca i1, i1 0
+  %nop11261 = alloca i1, i1 0
+  %nop11262 = alloca i1, i1 0
+  %nop11263 = alloca i1, i1 0
+  %nop11264 = alloca i1, i1 0
+  %nop11265 = alloca i1, i1 0
+  %nop11266 = alloca i1, i1 0
+  %nop11267 = alloca i1, i1 0
+  %nop11268 = alloca i1, i1 0
+  %nop11269 = alloca i1, i1 0
+  %nop11270 = alloca i1, i1 0
+  %nop11271 = alloca i1, i1 0
+  %nop11272 = alloca i1, i1 0
+  %nop11273 = alloca i1, i1 0
+  %nop11274 = alloca i1, i1 0
+  %nop11275 = alloca i1, i1 0
+  %nop11276 = alloca i1, i1 0
+  %nop11277 = alloca i1, i1 0
+  %nop11278 = alloca i1, i1 0
+  %nop11279 = alloca i1, i1 0
+  %nop11280 = alloca i1, i1 0
+  %nop11281 = alloca i1, i1 0
+  %nop11282 = alloca i1, i1 0
+  %nop11283 = alloca i1, i1 0
+  %nop11284 = alloca i1, i1 0
+  %nop11285 = alloca i1, i1 0
+  %nop11286 = alloca i1, i1 0
+  %nop11287 = alloca i1, i1 0
+  %nop11288 = alloca i1, i1 0
+  %nop11289 = alloca i1, i1 0
+  %nop11290 = alloca i1, i1 0
+  %nop11291 = alloca i1, i1 0
+  %nop11292 = alloca i1, i1 0
+  %nop11293 = alloca i1, i1 0
+  %nop11294 = alloca i1, i1 0
+  %nop11295 = alloca i1, i1 0
+  %nop11296 = alloca i1, i1 0
+  %nop11297 = alloca i1, i1 0
+  %nop11298 = alloca i1, i1 0
+  %nop11299 = alloca i1, i1 0
+  %nop11300 = alloca i1, i1 0
+  %nop11301 = alloca i1, i1 0
+  %nop11302 = alloca i1, i1 0
+  %nop11303 = alloca i1, i1 0
+  %nop11304 = alloca i1, i1 0
+  %nop11305 = alloca i1, i1 0
+  %nop11306 = alloca i1, i1 0
+  %nop11307 = alloca i1, i1 0
+  %nop11308 = alloca i1, i1 0
+  %nop11309 = alloca i1, i1 0
+  %nop11310 = alloca i1, i1 0
+  %nop11311 = alloca i1, i1 0
+  %nop11312 = alloca i1, i1 0
+  %nop11313 = alloca i1, i1 0
+  %nop11314 = alloca i1, i1 0
+  %nop11315 = alloca i1, i1 0
+  %nop11316 = alloca i1, i1 0
+  %nop11317 = alloca i1, i1 0
+  %nop11318 = alloca i1, i1 0
+  %nop11319 = alloca i1, i1 0
+  %nop11320 = alloca i1, i1 0
+  %nop11321 = alloca i1, i1 0
+  %nop11322 = alloca i1, i1 0
+  %nop11323 = alloca i1, i1 0
+  %nop11324 = alloca i1, i1 0
+  %nop11325 = alloca i1, i1 0
+  %nop11326 = alloca i1, i1 0
+  %nop11327 = alloca i1, i1 0
+  %nop11328 = alloca i1, i1 0
+  %nop11329 = alloca i1, i1 0
+  %nop11330 = alloca i1, i1 0
+  %nop11331 = alloca i1, i1 0
+  %nop11332 = alloca i1, i1 0
+  %nop11333 = alloca i1, i1 0
+  %nop11334 = alloca i1, i1 0
+  %nop11335 = alloca i1, i1 0
+  %nop11336 = alloca i1, i1 0
+  %nop11337 = alloca i1, i1 0
+  %nop11338 = alloca i1, i1 0
+  %nop11339 = alloca i1, i1 0
+  %nop11340 = alloca i1, i1 0
+  %nop11341 = alloca i1, i1 0
+  %nop11342 = alloca i1, i1 0
+  %nop11343 = alloca i1, i1 0
+  %nop11344 = alloca i1, i1 0
+  %nop11345 = alloca i1, i1 0
+  %nop11346 = alloca i1, i1 0
+  %nop11347 = alloca i1, i1 0
+  %nop11348 = alloca i1, i1 0
+  %nop11349 = alloca i1, i1 0
+  %nop11350 = alloca i1, i1 0
+  %nop11351 = alloca i1, i1 0
+  %nop11352 = alloca i1, i1 0
+  %nop11353 = alloca i1, i1 0
+  %nop11354 = alloca i1, i1 0
+  %nop11355 = alloca i1, i1 0
+  %nop11356 = alloca i1, i1 0
+  %nop11357 = alloca i1, i1 0
+  %nop11358 = alloca i1, i1 0
+  %nop11359 = alloca i1, i1 0
+  %nop11360 = alloca i1, i1 0
+  %nop11361 = alloca i1, i1 0
+  %nop11362 = alloca i1, i1 0
+  %nop11363 = alloca i1, i1 0
+  %nop11364 = alloca i1, i1 0
+  %nop11365 = alloca i1, i1 0
+  %nop11366 = alloca i1, i1 0
+  %nop11367 = alloca i1, i1 0
+  %nop11368 = alloca i1, i1 0
+  %nop11369 = alloca i1, i1 0
+  %nop11370 = alloca i1, i1 0
+  %nop11371 = alloca i1, i1 0
+  %nop11372 = alloca i1, i1 0
+  %nop11373 = alloca i1, i1 0
+  %nop11374 = alloca i1, i1 0
+  %nop11375 = alloca i1, i1 0
+  %nop11376 = alloca i1, i1 0
+  %nop11377 = alloca i1, i1 0
+  %nop11378 = alloca i1, i1 0
+  %nop11379 = alloca i1, i1 0
+  %nop11380 = alloca i1, i1 0
+  %nop11381 = alloca i1, i1 0
+  %nop11382 = alloca i1, i1 0
+  %nop11383 = alloca i1, i1 0
+  %nop11384 = alloca i1, i1 0
+  %nop11385 = alloca i1, i1 0
+  %nop11386 = alloca i1, i1 0
+  %nop11387 = alloca i1, i1 0
+  %nop11388 = alloca i1, i1 0
+  %nop11389 = alloca i1, i1 0
+  %nop11390 = alloca i1, i1 0
+  %nop11391 = alloca i1, i1 0
+  %nop11392 = alloca i1, i1 0
+  %nop11393 = alloca i1, i1 0
+  %nop11394 = alloca i1, i1 0
+  %nop11395 = alloca i1, i1 0
+  %nop11396 = alloca i1, i1 0
+  %nop11397 = alloca i1, i1 0
+  %nop11398 = alloca i1, i1 0
+  %nop11399 = alloca i1, i1 0
+  %nop11400 = alloca i1, i1 0
+  %nop11401 = alloca i1, i1 0
+  %nop11402 = alloca i1, i1 0
+  %nop11403 = alloca i1, i1 0
+  %nop11404 = alloca i1, i1 0
+  %nop11405 = alloca i1, i1 0
+  %nop11406 = alloca i1, i1 0
+  %nop11407 = alloca i1, i1 0
+  %nop11408 = alloca i1, i1 0
+  %nop11409 = alloca i1, i1 0
+  %nop11410 = alloca i1, i1 0
+  %nop11411 = alloca i1, i1 0
+  %nop11412 = alloca i1, i1 0
+  %nop11413 = alloca i1, i1 0
+  %nop11414 = alloca i1, i1 0
+  %nop11415 = alloca i1, i1 0
+  %nop11416 = alloca i1, i1 0
+  %nop11417 = alloca i1, i1 0
+  %nop11418 = alloca i1, i1 0
+  %nop11419 = alloca i1, i1 0
+  %nop11420 = alloca i1, i1 0
+  %nop11421 = alloca i1, i1 0
+  %nop11422 = alloca i1, i1 0
+  %nop11423 = alloca i1, i1 0
+  %nop11424 = alloca i1, i1 0
+  %nop11425 = alloca i1, i1 0
+  %nop11426 = alloca i1, i1 0
+  %nop11427 = alloca i1, i1 0
+  %nop11428 = alloca i1, i1 0
+  %nop11429 = alloca i1, i1 0
+  %nop11430 = alloca i1, i1 0
+  %nop11431 = alloca i1, i1 0
+  %nop11432 = alloca i1, i1 0
+  %nop11433 = alloca i1, i1 0
+  %nop11434 = alloca i1, i1 0
+  %nop11435 = alloca i1, i1 0
+  %nop11436 = alloca i1, i1 0
+  %nop11437 = alloca i1, i1 0
+  %nop11438 = alloca i1, i1 0
+  %nop11439 = alloca i1, i1 0
+  %nop11440 = alloca i1, i1 0
+  %nop11441 = alloca i1, i1 0
+  %nop11442 = alloca i1, i1 0
+  %nop11443 = alloca i1, i1 0
+  %nop11444 = alloca i1, i1 0
+  %nop11445 = alloca i1, i1 0
+  %nop11446 = alloca i1, i1 0
+  %nop11447 = alloca i1, i1 0
+  %nop11448 = alloca i1, i1 0
+  %nop11449 = alloca i1, i1 0
+  %nop11450 = alloca i1, i1 0
+  %nop11451 = alloca i1, i1 0
+  %nop11452 = alloca i1, i1 0
+  %nop11453 = alloca i1, i1 0
+  %nop11454 = alloca i1, i1 0
+  %nop11455 = alloca i1, i1 0
+  %nop11456 = alloca i1, i1 0
+  %nop11457 = alloca i1, i1 0
+  %nop11458 = alloca i1, i1 0
+  %nop11459 = alloca i1, i1 0
+  %nop11460 = alloca i1, i1 0
+  %nop11461 = alloca i1, i1 0
+  %nop11462 = alloca i1, i1 0
+  %nop11463 = alloca i1, i1 0
+  %nop11464 = alloca i1, i1 0
+  %nop11465 = alloca i1, i1 0
+  %nop11466 = alloca i1, i1 0
+  %nop11467 = alloca i1, i1 0
+  %nop11468 = alloca i1, i1 0
+  %nop11469 = alloca i1, i1 0
+  %nop11470 = alloca i1, i1 0
+  %nop11471 = alloca i1, i1 0
+  %nop11472 = alloca i1, i1 0
+  %nop11473 = alloca i1, i1 0
+  %nop11474 = alloca i1, i1 0
+  %nop11475 = alloca i1, i1 0
+  %nop11476 = alloca i1, i1 0
+  %nop11477 = alloca i1, i1 0
+  %nop11478 = alloca i1, i1 0
+  %nop11479 = alloca i1, i1 0
+  %nop11480 = alloca i1, i1 0
+  %nop11481 = alloca i1, i1 0
+  %nop11482 = alloca i1, i1 0
+  %nop11483 = alloca i1, i1 0
+  %nop11484 = alloca i1, i1 0
+  %nop11485 = alloca i1, i1 0
+  %nop11486 = alloca i1, i1 0
+  %nop11487 = alloca i1, i1 0
+  %nop11488 = alloca i1, i1 0
+  %nop11489 = alloca i1, i1 0
+  %nop11490 = alloca i1, i1 0
+  %nop11491 = alloca i1, i1 0
+  %nop11492 = alloca i1, i1 0
+  %nop11493 = alloca i1, i1 0
+  %nop11494 = alloca i1, i1 0
+  %nop11495 = alloca i1, i1 0
+  %nop11496 = alloca i1, i1 0
+  %nop11497 = alloca i1, i1 0
+  %nop11498 = alloca i1, i1 0
+  %nop11499 = alloca i1, i1 0
+  %nop11500 = alloca i1, i1 0
+  %nop11501 = alloca i1, i1 0
+  %nop11502 = alloca i1, i1 0
+  %nop11503 = alloca i1, i1 0
+  %nop11504 = alloca i1, i1 0
+  %nop11505 = alloca i1, i1 0
+  %nop11506 = alloca i1, i1 0
+  %nop11507 = alloca i1, i1 0
+  %nop11508 = alloca i1, i1 0
+  %nop11509 = alloca i1, i1 0
+  %nop11510 = alloca i1, i1 0
+  %nop11511 = alloca i1, i1 0
+  %nop11512 = alloca i1, i1 0
+  %nop11513 = alloca i1, i1 0
+  %nop11514 = alloca i1, i1 0
+  %nop11515 = alloca i1, i1 0
+  %nop11516 = alloca i1, i1 0
+  %nop11517 = alloca i1, i1 0
+  %nop11518 = alloca i1, i1 0
+  %nop11519 = alloca i1, i1 0
+  %nop11520 = alloca i1, i1 0
+  %nop11521 = alloca i1, i1 0
+  %nop11522 = alloca i1, i1 0
+  %nop11523 = alloca i1, i1 0
+  %nop11524 = alloca i1, i1 0
+  %nop11525 = alloca i1, i1 0
+  %nop11526 = alloca i1, i1 0
+  %nop11527 = alloca i1, i1 0
+  %nop11528 = alloca i1, i1 0
+  %nop11529 = alloca i1, i1 0
+  %nop11530 = alloca i1, i1 0
+  %nop11531 = alloca i1, i1 0
+  %nop11532 = alloca i1, i1 0
+  %nop11533 = alloca i1, i1 0
+  %nop11534 = alloca i1, i1 0
+  %nop11535 = alloca i1, i1 0
+  %nop11536 = alloca i1, i1 0
+  %nop11537 = alloca i1, i1 0
+  %nop11538 = alloca i1, i1 0
+  %nop11539 = alloca i1, i1 0
+  %nop11540 = alloca i1, i1 0
+  %nop11541 = alloca i1, i1 0
+  %nop11542 = alloca i1, i1 0
+  %nop11543 = alloca i1, i1 0
+  %nop11544 = alloca i1, i1 0
+  %nop11545 = alloca i1, i1 0
+  %nop11546 = alloca i1, i1 0
+  %nop11547 = alloca i1, i1 0
+  %nop11548 = alloca i1, i1 0
+  %nop11549 = alloca i1, i1 0
+  %nop11550 = alloca i1, i1 0
+  %nop11551 = alloca i1, i1 0
+  %nop11552 = alloca i1, i1 0
+  %nop11553 = alloca i1, i1 0
+  %nop11554 = alloca i1, i1 0
+  %nop11555 = alloca i1, i1 0
+  %nop11556 = alloca i1, i1 0
+  %nop11557 = alloca i1, i1 0
+  %nop11558 = alloca i1, i1 0
+  %nop11559 = alloca i1, i1 0
+  %nop11560 = alloca i1, i1 0
+  %nop11561 = alloca i1, i1 0
+  %nop11562 = alloca i1, i1 0
+  %nop11563 = alloca i1, i1 0
+  %nop11564 = alloca i1, i1 0
+  %nop11565 = alloca i1, i1 0
+  %nop11566 = alloca i1, i1 0
+  %nop11567 = alloca i1, i1 0
+  %nop11568 = alloca i1, i1 0
+  %nop11569 = alloca i1, i1 0
+  %nop11570 = alloca i1, i1 0
+  %nop11571 = alloca i1, i1 0
+  %nop11572 = alloca i1, i1 0
+  %nop11573 = alloca i1, i1 0
+  %nop11574 = alloca i1, i1 0
+  %nop11575 = alloca i1, i1 0
+  %nop11576 = alloca i1, i1 0
+  %nop11577 = alloca i1, i1 0
+  %nop11578 = alloca i1, i1 0
+  %nop11579 = alloca i1, i1 0
+  %nop11580 = alloca i1, i1 0
+  %nop11581 = alloca i1, i1 0
+  %nop11582 = alloca i1, i1 0
+  %nop11583 = alloca i1, i1 0
+  %nop11584 = alloca i1, i1 0
+  %nop11585 = alloca i1, i1 0
+  %nop11586 = alloca i1, i1 0
+  %nop11587 = alloca i1, i1 0
+  %nop11588 = alloca i1, i1 0
+  %nop11589 = alloca i1, i1 0
+  %nop11590 = alloca i1, i1 0
+  %nop11591 = alloca i1, i1 0
+  %nop11592 = alloca i1, i1 0
+  %nop11593 = alloca i1, i1 0
+  %nop11594 = alloca i1, i1 0
+  %nop11595 = alloca i1, i1 0
+  %nop11596 = alloca i1, i1 0
+  %nop11597 = alloca i1, i1 0
+  %nop11598 = alloca i1, i1 0
+  %nop11599 = alloca i1, i1 0
+  %nop11600 = alloca i1, i1 0
+  %nop11601 = alloca i1, i1 0
+  %nop11602 = alloca i1, i1 0
+  %nop11603 = alloca i1, i1 0
+  %nop11604 = alloca i1, i1 0
+  %nop11605 = alloca i1, i1 0
+  %nop11606 = alloca i1, i1 0
+  %nop11607 = alloca i1, i1 0
+  %nop11608 = alloca i1, i1 0
+  %nop11609 = alloca i1, i1 0
+  %nop11610 = alloca i1, i1 0
+  %nop11611 = alloca i1, i1 0
+  %nop11612 = alloca i1, i1 0
+  %nop11613 = alloca i1, i1 0
+  %nop11614 = alloca i1, i1 0
+  %nop11615 = alloca i1, i1 0
+  %nop11616 = alloca i1, i1 0
+  %nop11617 = alloca i1, i1 0
+  %nop11618 = alloca i1, i1 0
+  %nop11619 = alloca i1, i1 0
+  %nop11620 = alloca i1, i1 0
+  %nop11621 = alloca i1, i1 0
+  %nop11622 = alloca i1, i1 0
+  %nop11623 = alloca i1, i1 0
+  %nop11624 = alloca i1, i1 0
+  %nop11625 = alloca i1, i1 0
+  %nop11626 = alloca i1, i1 0
+  %nop11627 = alloca i1, i1 0
+  %nop11628 = alloca i1, i1 0
+  %nop11629 = alloca i1, i1 0
+  %nop11630 = alloca i1, i1 0
+  %nop11631 = alloca i1, i1 0
+  %nop11632 = alloca i1, i1 0
+  %nop11633 = alloca i1, i1 0
+  %nop11634 = alloca i1, i1 0
+  %nop11635 = alloca i1, i1 0
+  %nop11636 = alloca i1, i1 0
+  %nop11637 = alloca i1, i1 0
+  %nop11638 = alloca i1, i1 0
+  %nop11639 = alloca i1, i1 0
+  %nop11640 = alloca i1, i1 0
+  %nop11641 = alloca i1, i1 0
+  %nop11642 = alloca i1, i1 0
+  %nop11643 = alloca i1, i1 0
+  %nop11644 = alloca i1, i1 0
+  %nop11645 = alloca i1, i1 0
+  %nop11646 = alloca i1, i1 0
+  %nop11647 = alloca i1, i1 0
+  %nop11648 = alloca i1, i1 0
+  %nop11649 = alloca i1, i1 0
+  %nop11650 = alloca i1, i1 0
+  %nop11651 = alloca i1, i1 0
+  %nop11652 = alloca i1, i1 0
+  %nop11653 = alloca i1, i1 0
+  %nop11654 = alloca i1, i1 0
+  %nop11655 = alloca i1, i1 0
+  %nop11656 = alloca i1, i1 0
+  %nop11657 = alloca i1, i1 0
+  %nop11658 = alloca i1, i1 0
+  %nop11659 = alloca i1, i1 0
+  %nop11660 = alloca i1, i1 0
+  %nop11661 = alloca i1, i1 0
+  %nop11662 = alloca i1, i1 0
+  %nop11663 = alloca i1, i1 0
+  %nop11664 = alloca i1, i1 0
+  %nop11665 = alloca i1, i1 0
+  %nop11666 = alloca i1, i1 0
+  %nop11667 = alloca i1, i1 0
+  %nop11668 = alloca i1, i1 0
+  %nop11669 = alloca i1, i1 0
+  %nop11670 = alloca i1, i1 0
+  %nop11671 = alloca i1, i1 0
+  %nop11672 = alloca i1, i1 0
+  %nop11673 = alloca i1, i1 0
+  %nop11674 = alloca i1, i1 0
+  %nop11675 = alloca i1, i1 0
+  %nop11676 = alloca i1, i1 0
+  %nop11677 = alloca i1, i1 0
+  %nop11678 = alloca i1, i1 0
+  %nop11679 = alloca i1, i1 0
+  %nop11680 = alloca i1, i1 0
+  %nop11681 = alloca i1, i1 0
+  %nop11682 = alloca i1, i1 0
+  %nop11683 = alloca i1, i1 0
+  %nop11684 = alloca i1, i1 0
+  %nop11685 = alloca i1, i1 0
+  %nop11686 = alloca i1, i1 0
+  %nop11687 = alloca i1, i1 0
+  %nop11688 = alloca i1, i1 0
+  %nop11689 = alloca i1, i1 0
+  %nop11690 = alloca i1, i1 0
+  %nop11691 = alloca i1, i1 0
+  %nop11692 = alloca i1, i1 0
+  %nop11693 = alloca i1, i1 0
+  %nop11694 = alloca i1, i1 0
+  %nop11695 = alloca i1, i1 0
+  %nop11696 = alloca i1, i1 0
+  %nop11697 = alloca i1, i1 0
+  %nop11698 = alloca i1, i1 0
+  %nop11699 = alloca i1, i1 0
+  %nop11700 = alloca i1, i1 0
+  %nop11701 = alloca i1, i1 0
+  %nop11702 = alloca i1, i1 0
+  %nop11703 = alloca i1, i1 0
+  %nop11704 = alloca i1, i1 0
+  %nop11705 = alloca i1, i1 0
+  %nop11706 = alloca i1, i1 0
+  %nop11707 = alloca i1, i1 0
+  %nop11708 = alloca i1, i1 0
+  %nop11709 = alloca i1, i1 0
+  %nop11710 = alloca i1, i1 0
+  %nop11711 = alloca i1, i1 0
+  %nop11712 = alloca i1, i1 0
+  %nop11713 = alloca i1, i1 0
+  %nop11714 = alloca i1, i1 0
+  %nop11715 = alloca i1, i1 0
+  %nop11716 = alloca i1, i1 0
+  %nop11717 = alloca i1, i1 0
+  %nop11718 = alloca i1, i1 0
+  %nop11719 = alloca i1, i1 0
+  %nop11720 = alloca i1, i1 0
+  %nop11721 = alloca i1, i1 0
+  %nop11722 = alloca i1, i1 0
+  %nop11723 = alloca i1, i1 0
+  %nop11724 = alloca i1, i1 0
+  %nop11725 = alloca i1, i1 0
+  %nop11726 = alloca i1, i1 0
+  %nop11727 = alloca i1, i1 0
+  %nop11728 = alloca i1, i1 0
+  %nop11729 = alloca i1, i1 0
+  %nop11730 = alloca i1, i1 0
+  %nop11731 = alloca i1, i1 0
+  %nop11732 = alloca i1, i1 0
+  %nop11733 = alloca i1, i1 0
+  %nop11734 = alloca i1, i1 0
+  %nop11735 = alloca i1, i1 0
+  %nop11736 = alloca i1, i1 0
+  %nop11737 = alloca i1, i1 0
+  %nop11738 = alloca i1, i1 0
+  %nop11739 = alloca i1, i1 0
+  %nop11740 = alloca i1, i1 0
+  %nop11741 = alloca i1, i1 0
+  %nop11742 = alloca i1, i1 0
+  %nop11743 = alloca i1, i1 0
+  %nop11744 = alloca i1, i1 0
+  %nop11745 = alloca i1, i1 0
+  %nop11746 = alloca i1, i1 0
+  %nop11747 = alloca i1, i1 0
+  %nop11748 = alloca i1, i1 0
+  %nop11749 = alloca i1, i1 0
+  %nop11750 = alloca i1, i1 0
+  %nop11751 = alloca i1, i1 0
+  %nop11752 = alloca i1, i1 0
+  %nop11753 = alloca i1, i1 0
+  %nop11754 = alloca i1, i1 0
+  %nop11755 = alloca i1, i1 0
+  %nop11756 = alloca i1, i1 0
+  %nop11757 = alloca i1, i1 0
+  %nop11758 = alloca i1, i1 0
+  %nop11759 = alloca i1, i1 0
+  %nop11760 = alloca i1, i1 0
+  %nop11761 = alloca i1, i1 0
+  %nop11762 = alloca i1, i1 0
+  %nop11763 = alloca i1, i1 0
+  %nop11764 = alloca i1, i1 0
+  %nop11765 = alloca i1, i1 0
+  %nop11766 = alloca i1, i1 0
+  %nop11767 = alloca i1, i1 0
+  %nop11768 = alloca i1, i1 0
+  %nop11769 = alloca i1, i1 0
+  %nop11770 = alloca i1, i1 0
+  %nop11771 = alloca i1, i1 0
+  %nop11772 = alloca i1, i1 0
+  %nop11773 = alloca i1, i1 0
+  %nop11774 = alloca i1, i1 0
+  %nop11775 = alloca i1, i1 0
+  %nop11776 = alloca i1, i1 0
+  %nop11777 = alloca i1, i1 0
+  %nop11778 = alloca i1, i1 0
+  %nop11779 = alloca i1, i1 0
+  %nop11780 = alloca i1, i1 0
+  %nop11781 = alloca i1, i1 0
+  %nop11782 = alloca i1, i1 0
+  %nop11783 = alloca i1, i1 0
+  %nop11784 = alloca i1, i1 0
+  %nop11785 = alloca i1, i1 0
+  %nop11786 = alloca i1, i1 0
+  %nop11787 = alloca i1, i1 0
+  %nop11788 = alloca i1, i1 0
+  %nop11789 = alloca i1, i1 0
+  %nop11790 = alloca i1, i1 0
+  %nop11791 = alloca i1, i1 0
+  %nop11792 = alloca i1, i1 0
+  %nop11793 = alloca i1, i1 0
+  %nop11794 = alloca i1, i1 0
+  %nop11795 = alloca i1, i1 0
+  %nop11796 = alloca i1, i1 0
+  %nop11797 = alloca i1, i1 0
+  %nop11798 = alloca i1, i1 0
+  %nop11799 = alloca i1, i1 0
+  %nop11800 = alloca i1, i1 0
+  %nop11801 = alloca i1, i1 0
+  %nop11802 = alloca i1, i1 0
+  %nop11803 = alloca i1, i1 0
+  %nop11804 = alloca i1, i1 0
+  %nop11805 = alloca i1, i1 0
+  %nop11806 = alloca i1, i1 0
+  %nop11807 = alloca i1, i1 0
+  %nop11808 = alloca i1, i1 0
+  %nop11809 = alloca i1, i1 0
+  %nop11810 = alloca i1, i1 0
+  %nop11811 = alloca i1, i1 0
+  %nop11812 = alloca i1, i1 0
+  %nop11813 = alloca i1, i1 0
+  %nop11814 = alloca i1, i1 0
+  %nop11815 = alloca i1, i1 0
+  %nop11816 = alloca i1, i1 0
+  %nop11817 = alloca i1, i1 0
+  %nop11818 = alloca i1, i1 0
+  %nop11819 = alloca i1, i1 0
+  %nop11820 = alloca i1, i1 0
+  %nop11821 = alloca i1, i1 0
+  %nop11822 = alloca i1, i1 0
+  %nop11823 = alloca i1, i1 0
+  %nop11824 = alloca i1, i1 0
+  %nop11825 = alloca i1, i1 0
+  %nop11826 = alloca i1, i1 0
+  %nop11827 = alloca i1, i1 0
+  %nop11828 = alloca i1, i1 0
+  %nop11829 = alloca i1, i1 0
+  %nop11830 = alloca i1, i1 0
+  %nop11831 = alloca i1, i1 0
+  %nop11832 = alloca i1, i1 0
+  %nop11833 = alloca i1, i1 0
+  %nop11834 = alloca i1, i1 0
+  %nop11835 = alloca i1, i1 0
+  %nop11836 = alloca i1, i1 0
+  %nop11837 = alloca i1, i1 0
+  %nop11838 = alloca i1, i1 0
+  %nop11839 = alloca i1, i1 0
+  %nop11840 = alloca i1, i1 0
+  %nop11841 = alloca i1, i1 0
+  %nop11842 = alloca i1, i1 0
+  %nop11843 = alloca i1, i1 0
+  %nop11844 = alloca i1, i1 0
+  %nop11845 = alloca i1, i1 0
+  %nop11846 = alloca i1, i1 0
+  %nop11847 = alloca i1, i1 0
+  %nop11848 = alloca i1, i1 0
+  %nop11849 = alloca i1, i1 0
+  %nop11850 = alloca i1, i1 0
+  %nop11851 = alloca i1, i1 0
+  %nop11852 = alloca i1, i1 0
+  %nop11853 = alloca i1, i1 0
+  %nop11854 = alloca i1, i1 0
+  %nop11855 = alloca i1, i1 0
+  %nop11856 = alloca i1, i1 0
+  %nop11857 = alloca i1, i1 0
+  %nop11858 = alloca i1, i1 0
+  %nop11859 = alloca i1, i1 0
+  %nop11860 = alloca i1, i1 0
+  %nop11861 = alloca i1, i1 0
+  %nop11862 = alloca i1, i1 0
+  %nop11863 = alloca i1, i1 0
+  %nop11864 = alloca i1, i1 0
+  %nop11865 = alloca i1, i1 0
+  %nop11866 = alloca i1, i1 0
+  %nop11867 = alloca i1, i1 0
+  %nop11868 = alloca i1, i1 0
+  %nop11869 = alloca i1, i1 0
+  %nop11870 = alloca i1, i1 0
+  %nop11871 = alloca i1, i1 0
+  %nop11872 = alloca i1, i1 0
+  %nop11873 = alloca i1, i1 0
+  %nop11874 = alloca i1, i1 0
+  %nop11875 = alloca i1, i1 0
+  %nop11876 = alloca i1, i1 0
+  %nop11877 = alloca i1, i1 0
+  %nop11878 = alloca i1, i1 0
+  %nop11879 = alloca i1, i1 0
+  %nop11880 = alloca i1, i1 0
+  %nop11881 = alloca i1, i1 0
+  %nop11882 = alloca i1, i1 0
+  %nop11883 = alloca i1, i1 0
+  %nop11884 = alloca i1, i1 0
+  %nop11885 = alloca i1, i1 0
+  %nop11886 = alloca i1, i1 0
+  %nop11887 = alloca i1, i1 0
+  %nop11888 = alloca i1, i1 0
+  %nop11889 = alloca i1, i1 0
+  %nop11890 = alloca i1, i1 0
+  %nop11891 = alloca i1, i1 0
+  %nop11892 = alloca i1, i1 0
+  %nop11893 = alloca i1, i1 0
+  %nop11894 = alloca i1, i1 0
+  %nop11895 = alloca i1, i1 0
+  %nop11896 = alloca i1, i1 0
+  %nop11897 = alloca i1, i1 0
+  %nop11898 = alloca i1, i1 0
+  %nop11899 = alloca i1, i1 0
+  %nop11900 = alloca i1, i1 0
+  %nop11901 = alloca i1, i1 0
+  %nop11902 = alloca i1, i1 0
+  %nop11903 = alloca i1, i1 0
+  %nop11904 = alloca i1, i1 0
+  %nop11905 = alloca i1, i1 0
+  %nop11906 = alloca i1, i1 0
+  %nop11907 = alloca i1, i1 0
+  %nop11908 = alloca i1, i1 0
+  %nop11909 = alloca i1, i1 0
+  %nop11910 = alloca i1, i1 0
+  %nop11911 = alloca i1, i1 0
+  %nop11912 = alloca i1, i1 0
+  %nop11913 = alloca i1, i1 0
+  %nop11914 = alloca i1, i1 0
+  %nop11915 = alloca i1, i1 0
+  %nop11916 = alloca i1, i1 0
+  %nop11917 = alloca i1, i1 0
+  %nop11918 = alloca i1, i1 0
+  %nop11919 = alloca i1, i1 0
+  %nop11920 = alloca i1, i1 0
+  %nop11921 = alloca i1, i1 0
+  %nop11922 = alloca i1, i1 0
+  %nop11923 = alloca i1, i1 0
+  %nop11924 = alloca i1, i1 0
+  %nop11925 = alloca i1, i1 0
+  %nop11926 = alloca i1, i1 0
+  %nop11927 = alloca i1, i1 0
+  %nop11928 = alloca i1, i1 0
+  %nop11929 = alloca i1, i1 0
+  %nop11930 = alloca i1, i1 0
+  %nop11931 = alloca i1, i1 0
+  %nop11932 = alloca i1, i1 0
+  %nop11933 = alloca i1, i1 0
+  %nop11934 = alloca i1, i1 0
+  %nop11935 = alloca i1, i1 0
+  %nop11936 = alloca i1, i1 0
+  %nop11937 = alloca i1, i1 0
+  %nop11938 = alloca i1, i1 0
+  %nop11939 = alloca i1, i1 0
+  %nop11940 = alloca i1, i1 0
+  %nop11941 = alloca i1, i1 0
+  %nop11942 = alloca i1, i1 0
+  %nop11943 = alloca i1, i1 0
+  %nop11944 = alloca i1, i1 0
+  %nop11945 = alloca i1, i1 0
+  %nop11946 = alloca i1, i1 0
+  %nop11947 = alloca i1, i1 0
+  %nop11948 = alloca i1, i1 0
+  %nop11949 = alloca i1, i1 0
+  %nop11950 = alloca i1, i1 0
+  %nop11951 = alloca i1, i1 0
+  %nop11952 = alloca i1, i1 0
+  %nop11953 = alloca i1, i1 0
+  %nop11954 = alloca i1, i1 0
+  %nop11955 = alloca i1, i1 0
+  %nop11956 = alloca i1, i1 0
+  %nop11957 = alloca i1, i1 0
+  %nop11958 = alloca i1, i1 0
+  %nop11959 = alloca i1, i1 0
+  %nop11960 = alloca i1, i1 0
+  %nop11961 = alloca i1, i1 0
+  %nop11962 = alloca i1, i1 0
+  %nop11963 = alloca i1, i1 0
+  %nop11964 = alloca i1, i1 0
+  %nop11965 = alloca i1, i1 0
+  %nop11966 = alloca i1, i1 0
+  %nop11967 = alloca i1, i1 0
+  %nop11968 = alloca i1, i1 0
+  %nop11969 = alloca i1, i1 0
+  %nop11970 = alloca i1, i1 0
+  %nop11971 = alloca i1, i1 0
+  %nop11972 = alloca i1, i1 0
+  %nop11973 = alloca i1, i1 0
+  %nop11974 = alloca i1, i1 0
+  %nop11975 = alloca i1, i1 0
+  %nop11976 = alloca i1, i1 0
+  %nop11977 = alloca i1, i1 0
+  %nop11978 = alloca i1, i1 0
+  %nop11979 = alloca i1, i1 0
+  %nop11980 = alloca i1, i1 0
+  %nop11981 = alloca i1, i1 0
+  %nop11982 = alloca i1, i1 0
+  %nop11983 = alloca i1, i1 0
+  %nop11984 = alloca i1, i1 0
+  %nop11985 = alloca i1, i1 0
+  %nop11986 = alloca i1, i1 0
+  %nop11987 = alloca i1, i1 0
+  %nop11988 = alloca i1, i1 0
+  %nop11989 = alloca i1, i1 0
+  %nop11990 = alloca i1, i1 0
+  %nop11991 = alloca i1, i1 0
+  %nop11992 = alloca i1, i1 0
+  %nop11993 = alloca i1, i1 0
+  %nop11994 = alloca i1, i1 0
+  %nop11995 = alloca i1, i1 0
+  %nop11996 = alloca i1, i1 0
+  %nop11997 = alloca i1, i1 0
+  %nop11998 = alloca i1, i1 0
+  %nop11999 = alloca i1, i1 0
+  %nop12000 = alloca i1, i1 0
+  %nop12001 = alloca i1, i1 0
+  %nop12002 = alloca i1, i1 0
+  %nop12003 = alloca i1, i1 0
+  %nop12004 = alloca i1, i1 0
+  %nop12005 = alloca i1, i1 0
+  %nop12006 = alloca i1, i1 0
+  %nop12007 = alloca i1, i1 0
+  %nop12008 = alloca i1, i1 0
+  %nop12009 = alloca i1, i1 0
+  %nop12010 = alloca i1, i1 0
+  %nop12011 = alloca i1, i1 0
+  %nop12012 = alloca i1, i1 0
+  %nop12013 = alloca i1, i1 0
+  %nop12014 = alloca i1, i1 0
+  %nop12015 = alloca i1, i1 0
+  %nop12016 = alloca i1, i1 0
+  %nop12017 = alloca i1, i1 0
+  %nop12018 = alloca i1, i1 0
+  %nop12019 = alloca i1, i1 0
+  %nop12020 = alloca i1, i1 0
+  %nop12021 = alloca i1, i1 0
+  %nop12022 = alloca i1, i1 0
+  %nop12023 = alloca i1, i1 0
+  %nop12024 = alloca i1, i1 0
+  %nop12025 = alloca i1, i1 0
+  %nop12026 = alloca i1, i1 0
+  %nop12027 = alloca i1, i1 0
+  %nop12028 = alloca i1, i1 0
+  %nop12029 = alloca i1, i1 0
+  %nop12030 = alloca i1, i1 0
+  %nop12031 = alloca i1, i1 0
+  %nop12032 = alloca i1, i1 0
+  %nop12033 = alloca i1, i1 0
+  %nop12034 = alloca i1, i1 0
+  %nop12035 = alloca i1, i1 0
+  %nop12036 = alloca i1, i1 0
+  %nop12037 = alloca i1, i1 0
+  %nop12038 = alloca i1, i1 0
+  %nop12039 = alloca i1, i1 0
+  %nop12040 = alloca i1, i1 0
+  %nop12041 = alloca i1, i1 0
+  %nop12042 = alloca i1, i1 0
+  %nop12043 = alloca i1, i1 0
+  %nop12044 = alloca i1, i1 0
+  %nop12045 = alloca i1, i1 0
+  %nop12046 = alloca i1, i1 0
+  %nop12047 = alloca i1, i1 0
+  %nop12048 = alloca i1, i1 0
+  %nop12049 = alloca i1, i1 0
+  %nop12050 = alloca i1, i1 0
+  %nop12051 = alloca i1, i1 0
+  %nop12052 = alloca i1, i1 0
+  %nop12053 = alloca i1, i1 0
+  %nop12054 = alloca i1, i1 0
+  %nop12055 = alloca i1, i1 0
+  %nop12056 = alloca i1, i1 0
+  %nop12057 = alloca i1, i1 0
+  %nop12058 = alloca i1, i1 0
+  %nop12059 = alloca i1, i1 0
+  %nop12060 = alloca i1, i1 0
+  %nop12061 = alloca i1, i1 0
+  %nop12062 = alloca i1, i1 0
+  %nop12063 = alloca i1, i1 0
+  %nop12064 = alloca i1, i1 0
+  %nop12065 = alloca i1, i1 0
+  %nop12066 = alloca i1, i1 0
+  %nop12067 = alloca i1, i1 0
+  %nop12068 = alloca i1, i1 0
+  %nop12069 = alloca i1, i1 0
+  %nop12070 = alloca i1, i1 0
+  %nop12071 = alloca i1, i1 0
+  %nop12072 = alloca i1, i1 0
+  %nop12073 = alloca i1, i1 0
+  %nop12074 = alloca i1, i1 0
+  %nop12075 = alloca i1, i1 0
+  %nop12076 = alloca i1, i1 0
+  %nop12077 = alloca i1, i1 0
+  %nop12078 = alloca i1, i1 0
+  %nop12079 = alloca i1, i1 0
+  %nop12080 = alloca i1, i1 0
+  %nop12081 = alloca i1, i1 0
+  %nop12082 = alloca i1, i1 0
+  %nop12083 = alloca i1, i1 0
+  %nop12084 = alloca i1, i1 0
+  %nop12085 = alloca i1, i1 0
+  %nop12086 = alloca i1, i1 0
+  %nop12087 = alloca i1, i1 0
+  %nop12088 = alloca i1, i1 0
+  %nop12089 = alloca i1, i1 0
+  %nop12090 = alloca i1, i1 0
+  %nop12091 = alloca i1, i1 0
+  %nop12092 = alloca i1, i1 0
+  %nop12093 = alloca i1, i1 0
+  %nop12094 = alloca i1, i1 0
+  %nop12095 = alloca i1, i1 0
+  %nop12096 = alloca i1, i1 0
+  %nop12097 = alloca i1, i1 0
+  %nop12098 = alloca i1, i1 0
+  %nop12099 = alloca i1, i1 0
+  %nop12100 = alloca i1, i1 0
+  %nop12101 = alloca i1, i1 0
+  %nop12102 = alloca i1, i1 0
+  %nop12103 = alloca i1, i1 0
+  %nop12104 = alloca i1, i1 0
+  %nop12105 = alloca i1, i1 0
+  %nop12106 = alloca i1, i1 0
+  %nop12107 = alloca i1, i1 0
+  %nop12108 = alloca i1, i1 0
+  %nop12109 = alloca i1, i1 0
+  %nop12110 = alloca i1, i1 0
+  %nop12111 = alloca i1, i1 0
+  %nop12112 = alloca i1, i1 0
+  %nop12113 = alloca i1, i1 0
+  %nop12114 = alloca i1, i1 0
+  %nop12115 = alloca i1, i1 0
+  %nop12116 = alloca i1, i1 0
+  %nop12117 = alloca i1, i1 0
+  %nop12118 = alloca i1, i1 0
+  %nop12119 = alloca i1, i1 0
+  %nop12120 = alloca i1, i1 0
+  %nop12121 = alloca i1, i1 0
+  %nop12122 = alloca i1, i1 0
+  %nop12123 = alloca i1, i1 0
+  %nop12124 = alloca i1, i1 0
+  %nop12125 = alloca i1, i1 0
+  %nop12126 = alloca i1, i1 0
+  %nop12127 = alloca i1, i1 0
+  %nop12128 = alloca i1, i1 0
+  %nop12129 = alloca i1, i1 0
+  %nop12130 = alloca i1, i1 0
+  %nop12131 = alloca i1, i1 0
+  %nop12132 = alloca i1, i1 0
+  %nop12133 = alloca i1, i1 0
+  %nop12134 = alloca i1, i1 0
+  %nop12135 = alloca i1, i1 0
+  %nop12136 = alloca i1, i1 0
+  %nop12137 = alloca i1, i1 0
+  %nop12138 = alloca i1, i1 0
+  %nop12139 = alloca i1, i1 0
+  %nop12140 = alloca i1, i1 0
+  %nop12141 = alloca i1, i1 0
+  %nop12142 = alloca i1, i1 0
+  %nop12143 = alloca i1, i1 0
+  %nop12144 = alloca i1, i1 0
+  %nop12145 = alloca i1, i1 0
+  %nop12146 = alloca i1, i1 0
+  %nop12147 = alloca i1, i1 0
+  %nop12148 = alloca i1, i1 0
+  %nop12149 = alloca i1, i1 0
+  %nop12150 = alloca i1, i1 0
+  %nop12151 = alloca i1, i1 0
+  %nop12152 = alloca i1, i1 0
+  %nop12153 = alloca i1, i1 0
+  %nop12154 = alloca i1, i1 0
+  %nop12155 = alloca i1, i1 0
+  %nop12156 = alloca i1, i1 0
+  %nop12157 = alloca i1, i1 0
+  %nop12158 = alloca i1, i1 0
+  %nop12159 = alloca i1, i1 0
+  %nop12160 = alloca i1, i1 0
+  %nop12161 = alloca i1, i1 0
+  %nop12162 = alloca i1, i1 0
+  %nop12163 = alloca i1, i1 0
+  %nop12164 = alloca i1, i1 0
+  %nop12165 = alloca i1, i1 0
+  %nop12166 = alloca i1, i1 0
+  %nop12167 = alloca i1, i1 0
+  %nop12168 = alloca i1, i1 0
+  %nop12169 = alloca i1, i1 0
+  %nop12170 = alloca i1, i1 0
+  %nop12171 = alloca i1, i1 0
+  %nop12172 = alloca i1, i1 0
+  %nop12173 = alloca i1, i1 0
+  %nop12174 = alloca i1, i1 0
+  %nop12175 = alloca i1, i1 0
+  %nop12176 = alloca i1, i1 0
+  %nop12177 = alloca i1, i1 0
+  %nop12178 = alloca i1, i1 0
+  %nop12179 = alloca i1, i1 0
+  %nop12180 = alloca i1, i1 0
+  %nop12181 = alloca i1, i1 0
+  %nop12182 = alloca i1, i1 0
+  %nop12183 = alloca i1, i1 0
+  %nop12184 = alloca i1, i1 0
+  %nop12185 = alloca i1, i1 0
+  %nop12186 = alloca i1, i1 0
+  %nop12187 = alloca i1, i1 0
+  %nop12188 = alloca i1, i1 0
+  %nop12189 = alloca i1, i1 0
+  %nop12190 = alloca i1, i1 0
+  %nop12191 = alloca i1, i1 0
+  %nop12192 = alloca i1, i1 0
+  %nop12193 = alloca i1, i1 0
+  %nop12194 = alloca i1, i1 0
+  %nop12195 = alloca i1, i1 0
+  %nop12196 = alloca i1, i1 0
+  %nop12197 = alloca i1, i1 0
+  %nop12198 = alloca i1, i1 0
+  %nop12199 = alloca i1, i1 0
+  %nop12200 = alloca i1, i1 0
+  %nop12201 = alloca i1, i1 0
+  %nop12202 = alloca i1, i1 0
+  %nop12203 = alloca i1, i1 0
+  %nop12204 = alloca i1, i1 0
+  %nop12205 = alloca i1, i1 0
+  %nop12206 = alloca i1, i1 0
+  %nop12207 = alloca i1, i1 0
+  %nop12208 = alloca i1, i1 0
+  %nop12209 = alloca i1, i1 0
+  %nop12210 = alloca i1, i1 0
+  %nop12211 = alloca i1, i1 0
+  %nop12212 = alloca i1, i1 0
+  %nop12213 = alloca i1, i1 0
+  %nop12214 = alloca i1, i1 0
+  %nop12215 = alloca i1, i1 0
+  %nop12216 = alloca i1, i1 0
+  %nop12217 = alloca i1, i1 0
+  %nop12218 = alloca i1, i1 0
+  %nop12219 = alloca i1, i1 0
+  %nop12220 = alloca i1, i1 0
+  %nop12221 = alloca i1, i1 0
+  %nop12222 = alloca i1, i1 0
+  %nop12223 = alloca i1, i1 0
+  %nop12224 = alloca i1, i1 0
+  %nop12225 = alloca i1, i1 0
+  %nop12226 = alloca i1, i1 0
+  %nop12227 = alloca i1, i1 0
+  %nop12228 = alloca i1, i1 0
+  %nop12229 = alloca i1, i1 0
+  %nop12230 = alloca i1, i1 0
+  %nop12231 = alloca i1, i1 0
+  %nop12232 = alloca i1, i1 0
+  %nop12233 = alloca i1, i1 0
+  %nop12234 = alloca i1, i1 0
+  %nop12235 = alloca i1, i1 0
+  %nop12236 = alloca i1, i1 0
+  %nop12237 = alloca i1, i1 0
+  %nop12238 = alloca i1, i1 0
+  %nop12239 = alloca i1, i1 0
+  %nop12240 = alloca i1, i1 0
+  %nop12241 = alloca i1, i1 0
+  %nop12242 = alloca i1, i1 0
+  %nop12243 = alloca i1, i1 0
+  %nop12244 = alloca i1, i1 0
+  %nop12245 = alloca i1, i1 0
+  %nop12246 = alloca i1, i1 0
+  %nop12247 = alloca i1, i1 0
+  %nop12248 = alloca i1, i1 0
+  %nop12249 = alloca i1, i1 0
+  %nop12250 = alloca i1, i1 0
+  %nop12251 = alloca i1, i1 0
+  %nop12252 = alloca i1, i1 0
+  %nop12253 = alloca i1, i1 0
+  %nop12254 = alloca i1, i1 0
+  %nop12255 = alloca i1, i1 0
+  %nop12256 = alloca i1, i1 0
+  %nop12257 = alloca i1, i1 0
+  %nop12258 = alloca i1, i1 0
+  %nop12259 = alloca i1, i1 0
+  %nop12260 = alloca i1, i1 0
+  %nop12261 = alloca i1, i1 0
+  %nop12262 = alloca i1, i1 0
+  %nop12263 = alloca i1, i1 0
+  %nop12264 = alloca i1, i1 0
+  %nop12265 = alloca i1, i1 0
+  %nop12266 = alloca i1, i1 0
+  %nop12267 = alloca i1, i1 0
+  %nop12268 = alloca i1, i1 0
+  %nop12269 = alloca i1, i1 0
+  %nop12270 = alloca i1, i1 0
+  %nop12271 = alloca i1, i1 0
+  %nop12272 = alloca i1, i1 0
+  %nop12273 = alloca i1, i1 0
+  %nop12274 = alloca i1, i1 0
+  %nop12275 = alloca i1, i1 0
+  %nop12276 = alloca i1, i1 0
+  %nop12277 = alloca i1, i1 0
+  %nop12278 = alloca i1, i1 0
+  %nop12279 = alloca i1, i1 0
+  %nop12280 = alloca i1, i1 0
+  %nop12281 = alloca i1, i1 0
+  %nop12282 = alloca i1, i1 0
+  %nop12283 = alloca i1, i1 0
+  %nop12284 = alloca i1, i1 0
+  %nop12285 = alloca i1, i1 0
+  %nop12286 = alloca i1, i1 0
+  %nop12287 = alloca i1, i1 0
+  %nop12288 = alloca i1, i1 0
+  %nop12289 = alloca i1, i1 0
+  %nop12290 = alloca i1, i1 0
+  %nop12291 = alloca i1, i1 0
+  %nop12292 = alloca i1, i1 0
+  %nop12293 = alloca i1, i1 0
+  %nop12294 = alloca i1, i1 0
+  %nop12295 = alloca i1, i1 0
+  %nop12296 = alloca i1, i1 0
+  %nop12297 = alloca i1, i1 0
+  %nop12298 = alloca i1, i1 0
+  %nop12299 = alloca i1, i1 0
+  %nop12300 = alloca i1, i1 0
+  %nop12301 = alloca i1, i1 0
+  %nop12302 = alloca i1, i1 0
+  %nop12303 = alloca i1, i1 0
+  %nop12304 = alloca i1, i1 0
+  %nop12305 = alloca i1, i1 0
+  %nop12306 = alloca i1, i1 0
+  %nop12307 = alloca i1, i1 0
+  %nop12308 = alloca i1, i1 0
+  %nop12309 = alloca i1, i1 0
+  %nop12310 = alloca i1, i1 0
+  %nop12311 = alloca i1, i1 0
+  %nop12312 = alloca i1, i1 0
+  %nop12313 = alloca i1, i1 0
+  %nop12314 = alloca i1, i1 0
+  %nop12315 = alloca i1, i1 0
+  %nop12316 = alloca i1, i1 0
+  %nop12317 = alloca i1, i1 0
+  %nop12318 = alloca i1, i1 0
+  %nop12319 = alloca i1, i1 0
+  %nop12320 = alloca i1, i1 0
+  %nop12321 = alloca i1, i1 0
+  %nop12322 = alloca i1, i1 0
+  %nop12323 = alloca i1, i1 0
+  %nop12324 = alloca i1, i1 0
+  %nop12325 = alloca i1, i1 0
+  %nop12326 = alloca i1, i1 0
+  %nop12327 = alloca i1, i1 0
+  %nop12328 = alloca i1, i1 0
+  %nop12329 = alloca i1, i1 0
+  %nop12330 = alloca i1, i1 0
+  %nop12331 = alloca i1, i1 0
+  %nop12332 = alloca i1, i1 0
+  %nop12333 = alloca i1, i1 0
+  %nop12334 = alloca i1, i1 0
+  %nop12335 = alloca i1, i1 0
+  %nop12336 = alloca i1, i1 0
+  %nop12337 = alloca i1, i1 0
+  %nop12338 = alloca i1, i1 0
+  %nop12339 = alloca i1, i1 0
+  %nop12340 = alloca i1, i1 0
+  %nop12341 = alloca i1, i1 0
+  %nop12342 = alloca i1, i1 0
+  %nop12343 = alloca i1, i1 0
+  %nop12344 = alloca i1, i1 0
+  %nop12345 = alloca i1, i1 0
+  %nop12346 = alloca i1, i1 0
+  %nop12347 = alloca i1, i1 0
+  %nop12348 = alloca i1, i1 0
+  %nop12349 = alloca i1, i1 0
+  %nop12350 = alloca i1, i1 0
+  %nop12351 = alloca i1, i1 0
+  %nop12352 = alloca i1, i1 0
+  %nop12353 = alloca i1, i1 0
+  %nop12354 = alloca i1, i1 0
+  %nop12355 = alloca i1, i1 0
+  %nop12356 = alloca i1, i1 0
+  %nop12357 = alloca i1, i1 0
+  %nop12358 = alloca i1, i1 0
+  %nop12359 = alloca i1, i1 0
+  %nop12360 = alloca i1, i1 0
+  %nop12361 = alloca i1, i1 0
+  %nop12362 = alloca i1, i1 0
+  %nop12363 = alloca i1, i1 0
+  %nop12364 = alloca i1, i1 0
+  %nop12365 = alloca i1, i1 0
+  %nop12366 = alloca i1, i1 0
+  %nop12367 = alloca i1, i1 0
+  %nop12368 = alloca i1, i1 0
+  %nop12369 = alloca i1, i1 0
+  %nop12370 = alloca i1, i1 0
+  %nop12371 = alloca i1, i1 0
+  %nop12372 = alloca i1, i1 0
+  %nop12373 = alloca i1, i1 0
+  %nop12374 = alloca i1, i1 0
+  %nop12375 = alloca i1, i1 0
+  %nop12376 = alloca i1, i1 0
+  %nop12377 = alloca i1, i1 0
+  %nop12378 = alloca i1, i1 0
+  %nop12379 = alloca i1, i1 0
+  %nop12380 = alloca i1, i1 0
+  %nop12381 = alloca i1, i1 0
+  %nop12382 = alloca i1, i1 0
+  %nop12383 = alloca i1, i1 0
+  %nop12384 = alloca i1, i1 0
+  %nop12385 = alloca i1, i1 0
+  %nop12386 = alloca i1, i1 0
+  %nop12387 = alloca i1, i1 0
+  %nop12388 = alloca i1, i1 0
+  %nop12389 = alloca i1, i1 0
+  %nop12390 = alloca i1, i1 0
+  %nop12391 = alloca i1, i1 0
+  %nop12392 = alloca i1, i1 0
+  %nop12393 = alloca i1, i1 0
+  %nop12394 = alloca i1, i1 0
+  %nop12395 = alloca i1, i1 0
+  %nop12396 = alloca i1, i1 0
+  %nop12397 = alloca i1, i1 0
+  %nop12398 = alloca i1, i1 0
+  %nop12399 = alloca i1, i1 0
+  %nop12400 = alloca i1, i1 0
+  %nop12401 = alloca i1, i1 0
+  %nop12402 = alloca i1, i1 0
+  %nop12403 = alloca i1, i1 0
+  %nop12404 = alloca i1, i1 0
+  %nop12405 = alloca i1, i1 0
+  %nop12406 = alloca i1, i1 0
+  %nop12407 = alloca i1, i1 0
+  %nop12408 = alloca i1, i1 0
+  %nop12409 = alloca i1, i1 0
+  %nop12410 = alloca i1, i1 0
+  %nop12411 = alloca i1, i1 0
+  %nop12412 = alloca i1, i1 0
+  %nop12413 = alloca i1, i1 0
+  %nop12414 = alloca i1, i1 0
+  %nop12415 = alloca i1, i1 0
+  %nop12416 = alloca i1, i1 0
+  %nop12417 = alloca i1, i1 0
+  %nop12418 = alloca i1, i1 0
+  %nop12419 = alloca i1, i1 0
+  %nop12420 = alloca i1, i1 0
+  %nop12421 = alloca i1, i1 0
+  %nop12422 = alloca i1, i1 0
+  %nop12423 = alloca i1, i1 0
+  %nop12424 = alloca i1, i1 0
+  %nop12425 = alloca i1, i1 0
+  %nop12426 = alloca i1, i1 0
+  %nop12427 = alloca i1, i1 0
+  %nop12428 = alloca i1, i1 0
+  %nop12429 = alloca i1, i1 0
+  %nop12430 = alloca i1, i1 0
+  %nop12431 = alloca i1, i1 0
+  %nop12432 = alloca i1, i1 0
+  %nop12433 = alloca i1, i1 0
+  %nop12434 = alloca i1, i1 0
+  %nop12435 = alloca i1, i1 0
+  %nop12436 = alloca i1, i1 0
+  %nop12437 = alloca i1, i1 0
+  %nop12438 = alloca i1, i1 0
+  %nop12439 = alloca i1, i1 0
+  %nop12440 = alloca i1, i1 0
+  %nop12441 = alloca i1, i1 0
+  %nop12442 = alloca i1, i1 0
+  %nop12443 = alloca i1, i1 0
+  %nop12444 = alloca i1, i1 0
+  %nop12445 = alloca i1, i1 0
+  %nop12446 = alloca i1, i1 0
+  %nop12447 = alloca i1, i1 0
+  %nop12448 = alloca i1, i1 0
+  %nop12449 = alloca i1, i1 0
+  %nop12450 = alloca i1, i1 0
+  %nop12451 = alloca i1, i1 0
+  %nop12452 = alloca i1, i1 0
+  %nop12453 = alloca i1, i1 0
+  %nop12454 = alloca i1, i1 0
+  %nop12455 = alloca i1, i1 0
+  %nop12456 = alloca i1, i1 0
+  %nop12457 = alloca i1, i1 0
+  %nop12458 = alloca i1, i1 0
+  %nop12459 = alloca i1, i1 0
+  %nop12460 = alloca i1, i1 0
+  %nop12461 = alloca i1, i1 0
+  %nop12462 = alloca i1, i1 0
+  %nop12463 = alloca i1, i1 0
+  %nop12464 = alloca i1, i1 0
+  %nop12465 = alloca i1, i1 0
+  %nop12466 = alloca i1, i1 0
+  %nop12467 = alloca i1, i1 0
+  %nop12468 = alloca i1, i1 0
+  %nop12469 = alloca i1, i1 0
+  %nop12470 = alloca i1, i1 0
+  %nop12471 = alloca i1, i1 0
+  %nop12472 = alloca i1, i1 0
+  %nop12473 = alloca i1, i1 0
+  %nop12474 = alloca i1, i1 0
+  %nop12475 = alloca i1, i1 0
+  %nop12476 = alloca i1, i1 0
+  %nop12477 = alloca i1, i1 0
+  %nop12478 = alloca i1, i1 0
+  %nop12479 = alloca i1, i1 0
+  %nop12480 = alloca i1, i1 0
+  %nop12481 = alloca i1, i1 0
+  %nop12482 = alloca i1, i1 0
+  %nop12483 = alloca i1, i1 0
+  %nop12484 = alloca i1, i1 0
+  %nop12485 = alloca i1, i1 0
+  %nop12486 = alloca i1, i1 0
+  %nop12487 = alloca i1, i1 0
+  %nop12488 = alloca i1, i1 0
+  %nop12489 = alloca i1, i1 0
+  %nop12490 = alloca i1, i1 0
+  %nop12491 = alloca i1, i1 0
+  %nop12492 = alloca i1, i1 0
+  %nop12493 = alloca i1, i1 0
+  %nop12494 = alloca i1, i1 0
+  %nop12495 = alloca i1, i1 0
+  %nop12496 = alloca i1, i1 0
+  %nop12497 = alloca i1, i1 0
+  %nop12498 = alloca i1, i1 0
+  %nop12499 = alloca i1, i1 0
+  %nop12500 = alloca i1, i1 0
+  %nop12501 = alloca i1, i1 0
+  %nop12502 = alloca i1, i1 0
+  %nop12503 = alloca i1, i1 0
+  %nop12504 = alloca i1, i1 0
+  %nop12505 = alloca i1, i1 0
+  %nop12506 = alloca i1, i1 0
+  %nop12507 = alloca i1, i1 0
+  %nop12508 = alloca i1, i1 0
+  %nop12509 = alloca i1, i1 0
+  %nop12510 = alloca i1, i1 0
+  %nop12511 = alloca i1, i1 0
+  %nop12512 = alloca i1, i1 0
+  %nop12513 = alloca i1, i1 0
+  %nop12514 = alloca i1, i1 0
+  %nop12515 = alloca i1, i1 0
+  %nop12516 = alloca i1, i1 0
+  %nop12517 = alloca i1, i1 0
+  %nop12518 = alloca i1, i1 0
+  %nop12519 = alloca i1, i1 0
+  %nop12520 = alloca i1, i1 0
+  %nop12521 = alloca i1, i1 0
+  %nop12522 = alloca i1, i1 0
+  %nop12523 = alloca i1, i1 0
+  %nop12524 = alloca i1, i1 0
+  %nop12525 = alloca i1, i1 0
+  %nop12526 = alloca i1, i1 0
+  %nop12527 = alloca i1, i1 0
+  %nop12528 = alloca i1, i1 0
+  %nop12529 = alloca i1, i1 0
+  %nop12530 = alloca i1, i1 0
+  %nop12531 = alloca i1, i1 0
+  %nop12532 = alloca i1, i1 0
+  %nop12533 = alloca i1, i1 0
+  %nop12534 = alloca i1, i1 0
+  %nop12535 = alloca i1, i1 0
+  %nop12536 = alloca i1, i1 0
+  %nop12537 = alloca i1, i1 0
+  %nop12538 = alloca i1, i1 0
+  %nop12539 = alloca i1, i1 0
+  %nop12540 = alloca i1, i1 0
+  %nop12541 = alloca i1, i1 0
+  %nop12542 = alloca i1, i1 0
+  %nop12543 = alloca i1, i1 0
+  %nop12544 = alloca i1, i1 0
+  %nop12545 = alloca i1, i1 0
+  %nop12546 = alloca i1, i1 0
+  %nop12547 = alloca i1, i1 0
+  %nop12548 = alloca i1, i1 0
+  %nop12549 = alloca i1, i1 0
+  %nop12550 = alloca i1, i1 0
+  %nop12551 = alloca i1, i1 0
+  %nop12552 = alloca i1, i1 0
+  %nop12553 = alloca i1, i1 0
+  %nop12554 = alloca i1, i1 0
+  %nop12555 = alloca i1, i1 0
+  %nop12556 = alloca i1, i1 0
+  %nop12557 = alloca i1, i1 0
+  %nop12558 = alloca i1, i1 0
+  %nop12559 = alloca i1, i1 0
+  %nop12560 = alloca i1, i1 0
+  %nop12561 = alloca i1, i1 0
+  %nop12562 = alloca i1, i1 0
+  %nop12563 = alloca i1, i1 0
+  %nop12564 = alloca i1, i1 0
+  %nop12565 = alloca i1, i1 0
+  %nop12566 = alloca i1, i1 0
+  %nop12567 = alloca i1, i1 0
+  %nop12568 = alloca i1, i1 0
+  %nop12569 = alloca i1, i1 0
+  %nop12570 = alloca i1, i1 0
+  %nop12571 = alloca i1, i1 0
+  %nop12572 = alloca i1, i1 0
+  %nop12573 = alloca i1, i1 0
+  %nop12574 = alloca i1, i1 0
+  %nop12575 = alloca i1, i1 0
+  %nop12576 = alloca i1, i1 0
+  %nop12577 = alloca i1, i1 0
+  %nop12578 = alloca i1, i1 0
+  %nop12579 = alloca i1, i1 0
+  %nop12580 = alloca i1, i1 0
+  %nop12581 = alloca i1, i1 0
+  %nop12582 = alloca i1, i1 0
+  %nop12583 = alloca i1, i1 0
+  %nop12584 = alloca i1, i1 0
+  %nop12585 = alloca i1, i1 0
+  %nop12586 = alloca i1, i1 0
+  %nop12587 = alloca i1, i1 0
+  %nop12588 = alloca i1, i1 0
+  %nop12589 = alloca i1, i1 0
+  %nop12590 = alloca i1, i1 0
+  %nop12591 = alloca i1, i1 0
+  %nop12592 = alloca i1, i1 0
+  %nop12593 = alloca i1, i1 0
+  %nop12594 = alloca i1, i1 0
+  %nop12595 = alloca i1, i1 0
+  %nop12596 = alloca i1, i1 0
+  %nop12597 = alloca i1, i1 0
+  %nop12598 = alloca i1, i1 0
+  %nop12599 = alloca i1, i1 0
+  %nop12600 = alloca i1, i1 0
+  %nop12601 = alloca i1, i1 0
+  %nop12602 = alloca i1, i1 0
+  %nop12603 = alloca i1, i1 0
+  %nop12604 = alloca i1, i1 0
+  %nop12605 = alloca i1, i1 0
+  %nop12606 = alloca i1, i1 0
+  %nop12607 = alloca i1, i1 0
+  %nop12608 = alloca i1, i1 0
+  %nop12609 = alloca i1, i1 0
+  %nop12610 = alloca i1, i1 0
+  %nop12611 = alloca i1, i1 0
+  %nop12612 = alloca i1, i1 0
+  %nop12613 = alloca i1, i1 0
+  %nop12614 = alloca i1, i1 0
+  %nop12615 = alloca i1, i1 0
+  %nop12616 = alloca i1, i1 0
+  %nop12617 = alloca i1, i1 0
+  %nop12618 = alloca i1, i1 0
+  %nop12619 = alloca i1, i1 0
+  %nop12620 = alloca i1, i1 0
+  %nop12621 = alloca i1, i1 0
+  %nop12622 = alloca i1, i1 0
+  %nop12623 = alloca i1, i1 0
+  %nop12624 = alloca i1, i1 0
+  %nop12625 = alloca i1, i1 0
+  %nop12626 = alloca i1, i1 0
+  %nop12627 = alloca i1, i1 0
+  %nop12628 = alloca i1, i1 0
+  %nop12629 = alloca i1, i1 0
+  %nop12630 = alloca i1, i1 0
+  %nop12631 = alloca i1, i1 0
+  %nop12632 = alloca i1, i1 0
+  %nop12633 = alloca i1, i1 0
+  %nop12634 = alloca i1, i1 0
+  %nop12635 = alloca i1, i1 0
+  %nop12636 = alloca i1, i1 0
+  %nop12637 = alloca i1, i1 0
+  %nop12638 = alloca i1, i1 0
+  %nop12639 = alloca i1, i1 0
+  %nop12640 = alloca i1, i1 0
+  %nop12641 = alloca i1, i1 0
+  %nop12642 = alloca i1, i1 0
+  %nop12643 = alloca i1, i1 0
+  %nop12644 = alloca i1, i1 0
+  %nop12645 = alloca i1, i1 0
+  %nop12646 = alloca i1, i1 0
+  %nop12647 = alloca i1, i1 0
+  %nop12648 = alloca i1, i1 0
+  %nop12649 = alloca i1, i1 0
+  %nop12650 = alloca i1, i1 0
+  %nop12651 = alloca i1, i1 0
+  %nop12652 = alloca i1, i1 0
+  %nop12653 = alloca i1, i1 0
+  %nop12654 = alloca i1, i1 0
+  %nop12655 = alloca i1, i1 0
+  %nop12656 = alloca i1, i1 0
+  %nop12657 = alloca i1, i1 0
+  %nop12658 = alloca i1, i1 0
+  %nop12659 = alloca i1, i1 0
+  %nop12660 = alloca i1, i1 0
+  %nop12661 = alloca i1, i1 0
+  %nop12662 = alloca i1, i1 0
+  %nop12663 = alloca i1, i1 0
+  %nop12664 = alloca i1, i1 0
+  %nop12665 = alloca i1, i1 0
+  %nop12666 = alloca i1, i1 0
+  %nop12667 = alloca i1, i1 0
+  %nop12668 = alloca i1, i1 0
+  %nop12669 = alloca i1, i1 0
+  %nop12670 = alloca i1, i1 0
+  %nop12671 = alloca i1, i1 0
+  %nop12672 = alloca i1, i1 0
+  %nop12673 = alloca i1, i1 0
+  %nop12674 = alloca i1, i1 0
+  %nop12675 = alloca i1, i1 0
+  %nop12676 = alloca i1, i1 0
+  %nop12677 = alloca i1, i1 0
+  %nop12678 = alloca i1, i1 0
+  %nop12679 = alloca i1, i1 0
+  %nop12680 = alloca i1, i1 0
+  %nop12681 = alloca i1, i1 0
+  %nop12682 = alloca i1, i1 0
+  %nop12683 = alloca i1, i1 0
+  %nop12684 = alloca i1, i1 0
+  %nop12685 = alloca i1, i1 0
+  %nop12686 = alloca i1, i1 0
+  %nop12687 = alloca i1, i1 0
+  %nop12688 = alloca i1, i1 0
+  %nop12689 = alloca i1, i1 0
+  %nop12690 = alloca i1, i1 0
+  %nop12691 = alloca i1, i1 0
+  %nop12692 = alloca i1, i1 0
+  %nop12693 = alloca i1, i1 0
+  %nop12694 = alloca i1, i1 0
+  %nop12695 = alloca i1, i1 0
+  %nop12696 = alloca i1, i1 0
+  %nop12697 = alloca i1, i1 0
+  %nop12698 = alloca i1, i1 0
+  %nop12699 = alloca i1, i1 0
+  %nop12700 = alloca i1, i1 0
+  %nop12701 = alloca i1, i1 0
+  %nop12702 = alloca i1, i1 0
+  %nop12703 = alloca i1, i1 0
+  %nop12704 = alloca i1, i1 0
+  %nop12705 = alloca i1, i1 0
+  %nop12706 = alloca i1, i1 0
+  %nop12707 = alloca i1, i1 0
+  %nop12708 = alloca i1, i1 0
+  %nop12709 = alloca i1, i1 0
+  %nop12710 = alloca i1, i1 0
+  %nop12711 = alloca i1, i1 0
+  %nop12712 = alloca i1, i1 0
+  %nop12713 = alloca i1, i1 0
+  %nop12714 = alloca i1, i1 0
+  %nop12715 = alloca i1, i1 0
+  %nop12716 = alloca i1, i1 0
+  %nop12717 = alloca i1, i1 0
+  %nop12718 = alloca i1, i1 0
+  %nop12719 = alloca i1, i1 0
+  %nop12720 = alloca i1, i1 0
+  %nop12721 = alloca i1, i1 0
+  %nop12722 = alloca i1, i1 0
+  %nop12723 = alloca i1, i1 0
+  %nop12724 = alloca i1, i1 0
+  %nop12725 = alloca i1, i1 0
+  %nop12726 = alloca i1, i1 0
+  %nop12727 = alloca i1, i1 0
+  %nop12728 = alloca i1, i1 0
+  %nop12729 = alloca i1, i1 0
+  %nop12730 = alloca i1, i1 0
+  %nop12731 = alloca i1, i1 0
+  %nop12732 = alloca i1, i1 0
+  %nop12733 = alloca i1, i1 0
+  %nop12734 = alloca i1, i1 0
+  %nop12735 = alloca i1, i1 0
+  %nop12736 = alloca i1, i1 0
+  %nop12737 = alloca i1, i1 0
+  %nop12738 = alloca i1, i1 0
+  %nop12739 = alloca i1, i1 0
+  %nop12740 = alloca i1, i1 0
+  %nop12741 = alloca i1, i1 0
+  %nop12742 = alloca i1, i1 0
+  %nop12743 = alloca i1, i1 0
+  %nop12744 = alloca i1, i1 0
+  %nop12745 = alloca i1, i1 0
+  %nop12746 = alloca i1, i1 0
+  %nop12747 = alloca i1, i1 0
+  %nop12748 = alloca i1, i1 0
+  %nop12749 = alloca i1, i1 0
+  %nop12750 = alloca i1, i1 0
+  %nop12751 = alloca i1, i1 0
+  %nop12752 = alloca i1, i1 0
+  %nop12753 = alloca i1, i1 0
+  %nop12754 = alloca i1, i1 0
+  %nop12755 = alloca i1, i1 0
+  %nop12756 = alloca i1, i1 0
+  %nop12757 = alloca i1, i1 0
+  %nop12758 = alloca i1, i1 0
+  %nop12759 = alloca i1, i1 0
+  %nop12760 = alloca i1, i1 0
+  %nop12761 = alloca i1, i1 0
+  %nop12762 = alloca i1, i1 0
+  %nop12763 = alloca i1, i1 0
+  %nop12764 = alloca i1, i1 0
+  %nop12765 = alloca i1, i1 0
+  %nop12766 = alloca i1, i1 0
+  %nop12767 = alloca i1, i1 0
+  %nop12768 = alloca i1, i1 0
+  %nop12769 = alloca i1, i1 0
+  %nop12770 = alloca i1, i1 0
+  %nop12771 = alloca i1, i1 0
+  %nop12772 = alloca i1, i1 0
+  %nop12773 = alloca i1, i1 0
+  %nop12774 = alloca i1, i1 0
+  %nop12775 = alloca i1, i1 0
+  %nop12776 = alloca i1, i1 0
+  %nop12777 = alloca i1, i1 0
+  %nop12778 = alloca i1, i1 0
+  %nop12779 = alloca i1, i1 0
+  %nop12780 = alloca i1, i1 0
+  %nop12781 = alloca i1, i1 0
+  %nop12782 = alloca i1, i1 0
+  %nop12783 = alloca i1, i1 0
+  %nop12784 = alloca i1, i1 0
+  %nop12785 = alloca i1, i1 0
+  %nop12786 = alloca i1, i1 0
+  %nop12787 = alloca i1, i1 0
+  %nop12788 = alloca i1, i1 0
+  %nop12789 = alloca i1, i1 0
+  %nop12790 = alloca i1, i1 0
+  %nop12791 = alloca i1, i1 0
+  %nop12792 = alloca i1, i1 0
+  %nop12793 = alloca i1, i1 0
+  %nop12794 = alloca i1, i1 0
+  %nop12795 = alloca i1, i1 0
+  %nop12796 = alloca i1, i1 0
+  %nop12797 = alloca i1, i1 0
+  %nop12798 = alloca i1, i1 0
+  %nop12799 = alloca i1, i1 0
+  %nop12800 = alloca i1, i1 0
+  %nop12801 = alloca i1, i1 0
+  %nop12802 = alloca i1, i1 0
+  %nop12803 = alloca i1, i1 0
+  %nop12804 = alloca i1, i1 0
+  %nop12805 = alloca i1, i1 0
+  %nop12806 = alloca i1, i1 0
+  %nop12807 = alloca i1, i1 0
+  %nop12808 = alloca i1, i1 0
+  %nop12809 = alloca i1, i1 0
+  %nop12810 = alloca i1, i1 0
+  %nop12811 = alloca i1, i1 0
+  %nop12812 = alloca i1, i1 0
+  %nop12813 = alloca i1, i1 0
+  %nop12814 = alloca i1, i1 0
+  %nop12815 = alloca i1, i1 0
+  %nop12816 = alloca i1, i1 0
+  %nop12817 = alloca i1, i1 0
+  %nop12818 = alloca i1, i1 0
+  %nop12819 = alloca i1, i1 0
+  %nop12820 = alloca i1, i1 0
+  %nop12821 = alloca i1, i1 0
+  %nop12822 = alloca i1, i1 0
+  %nop12823 = alloca i1, i1 0
+  %nop12824 = alloca i1, i1 0
+  %nop12825 = alloca i1, i1 0
+  %nop12826 = alloca i1, i1 0
+  %nop12827 = alloca i1, i1 0
+  %nop12828 = alloca i1, i1 0
+  %nop12829 = alloca i1, i1 0
+  %nop12830 = alloca i1, i1 0
+  %nop12831 = alloca i1, i1 0
+  %nop12832 = alloca i1, i1 0
+  %nop12833 = alloca i1, i1 0
+  %nop12834 = alloca i1, i1 0
+  %nop12835 = alloca i1, i1 0
+  %nop12836 = alloca i1, i1 0
+  %nop12837 = alloca i1, i1 0
+  %nop12838 = alloca i1, i1 0
+  %nop12839 = alloca i1, i1 0
+  %nop12840 = alloca i1, i1 0
+  %nop12841 = alloca i1, i1 0
+  %nop12842 = alloca i1, i1 0
+  %nop12843 = alloca i1, i1 0
+  %nop12844 = alloca i1, i1 0
+  %nop12845 = alloca i1, i1 0
+  %nop12846 = alloca i1, i1 0
+  %nop12847 = alloca i1, i1 0
+  %nop12848 = alloca i1, i1 0
+  %nop12849 = alloca i1, i1 0
+  %nop12850 = alloca i1, i1 0
+  %nop12851 = alloca i1, i1 0
+  %nop12852 = alloca i1, i1 0
+  %nop12853 = alloca i1, i1 0
+  %nop12854 = alloca i1, i1 0
+  %nop12855 = alloca i1, i1 0
+  %nop12856 = alloca i1, i1 0
+  %nop12857 = alloca i1, i1 0
+  %nop12858 = alloca i1, i1 0
+  %nop12859 = alloca i1, i1 0
+  %nop12860 = alloca i1, i1 0
+  %nop12861 = alloca i1, i1 0
+  %nop12862 = alloca i1, i1 0
+  %nop12863 = alloca i1, i1 0
+  %nop12864 = alloca i1, i1 0
+  %nop12865 = alloca i1, i1 0
+  %nop12866 = alloca i1, i1 0
+  %nop12867 = alloca i1, i1 0
+  %nop12868 = alloca i1, i1 0
+  %nop12869 = alloca i1, i1 0
+  %nop12870 = alloca i1, i1 0
+  %nop12871 = alloca i1, i1 0
+  %nop12872 = alloca i1, i1 0
+  %nop12873 = alloca i1, i1 0
+  %nop12874 = alloca i1, i1 0
+  %nop12875 = alloca i1, i1 0
+  %nop12876 = alloca i1, i1 0
+  %nop12877 = alloca i1, i1 0
+  %nop12878 = alloca i1, i1 0
+  %nop12879 = alloca i1, i1 0
+  %nop12880 = alloca i1, i1 0
+  %nop12881 = alloca i1, i1 0
+  %nop12882 = alloca i1, i1 0
+  %nop12883 = alloca i1, i1 0
+  %nop12884 = alloca i1, i1 0
+  %nop12885 = alloca i1, i1 0
+  %nop12886 = alloca i1, i1 0
+  %nop12887 = alloca i1, i1 0
+  %nop12888 = alloca i1, i1 0
+  %nop12889 = alloca i1, i1 0
+  %nop12890 = alloca i1, i1 0
+  %nop12891 = alloca i1, i1 0
+  %nop12892 = alloca i1, i1 0
+  %nop12893 = alloca i1, i1 0
+  %nop12894 = alloca i1, i1 0
+  %nop12895 = alloca i1, i1 0
+  %nop12896 = alloca i1, i1 0
+  %nop12897 = alloca i1, i1 0
+  %nop12898 = alloca i1, i1 0
+  %nop12899 = alloca i1, i1 0
+  %nop12900 = alloca i1, i1 0
+  %nop12901 = alloca i1, i1 0
+  %nop12902 = alloca i1, i1 0
+  %nop12903 = alloca i1, i1 0
+  %nop12904 = alloca i1, i1 0
+  %nop12905 = alloca i1, i1 0
+  %nop12906 = alloca i1, i1 0
+  %nop12907 = alloca i1, i1 0
+  %nop12908 = alloca i1, i1 0
+  %nop12909 = alloca i1, i1 0
+  %nop12910 = alloca i1, i1 0
+  %nop12911 = alloca i1, i1 0
+  %nop12912 = alloca i1, i1 0
+  %nop12913 = alloca i1, i1 0
+  %nop12914 = alloca i1, i1 0
+  %nop12915 = alloca i1, i1 0
+  %nop12916 = alloca i1, i1 0
+  %nop12917 = alloca i1, i1 0
+  %nop12918 = alloca i1, i1 0
+  %nop12919 = alloca i1, i1 0
+  %nop12920 = alloca i1, i1 0
+  %nop12921 = alloca i1, i1 0
+  %nop12922 = alloca i1, i1 0
+  %nop12923 = alloca i1, i1 0
+  %nop12924 = alloca i1, i1 0
+  %nop12925 = alloca i1, i1 0
+  %nop12926 = alloca i1, i1 0
+  %nop12927 = alloca i1, i1 0
+  %nop12928 = alloca i1, i1 0
+  %nop12929 = alloca i1, i1 0
+  %nop12930 = alloca i1, i1 0
+  %nop12931 = alloca i1, i1 0
+  %nop12932 = alloca i1, i1 0
+  %nop12933 = alloca i1, i1 0
+  %nop12934 = alloca i1, i1 0
+  %nop12935 = alloca i1, i1 0
+  %nop12936 = alloca i1, i1 0
+  %nop12937 = alloca i1, i1 0
+  %nop12938 = alloca i1, i1 0
+  %nop12939 = alloca i1, i1 0
+  %nop12940 = alloca i1, i1 0
+  %nop12941 = alloca i1, i1 0
+  %nop12942 = alloca i1, i1 0
+  %nop12943 = alloca i1, i1 0
+  %nop12944 = alloca i1, i1 0
+  %nop12945 = alloca i1, i1 0
+  %nop12946 = alloca i1, i1 0
+  %nop12947 = alloca i1, i1 0
+  %nop12948 = alloca i1, i1 0
+  %nop12949 = alloca i1, i1 0
+  %nop12950 = alloca i1, i1 0
+  %nop12951 = alloca i1, i1 0
+  %nop12952 = alloca i1, i1 0
+  %nop12953 = alloca i1, i1 0
+  %nop12954 = alloca i1, i1 0
+  %nop12955 = alloca i1, i1 0
+  %nop12956 = alloca i1, i1 0
+  %nop12957 = alloca i1, i1 0
+  %nop12958 = alloca i1, i1 0
+  %nop12959 = alloca i1, i1 0
+  %nop12960 = alloca i1, i1 0
+  %nop12961 = alloca i1, i1 0
+  %nop12962 = alloca i1, i1 0
+  %nop12963 = alloca i1, i1 0
+  %nop12964 = alloca i1, i1 0
+  %nop12965 = alloca i1, i1 0
+  %nop12966 = alloca i1, i1 0
+  %nop12967 = alloca i1, i1 0
+  %nop12968 = alloca i1, i1 0
+  %nop12969 = alloca i1, i1 0
+  %nop12970 = alloca i1, i1 0
+  %nop12971 = alloca i1, i1 0
+  %nop12972 = alloca i1, i1 0
+  %nop12973 = alloca i1, i1 0
+  %nop12974 = alloca i1, i1 0
+  %nop12975 = alloca i1, i1 0
+  %nop12976 = alloca i1, i1 0
+  %nop12977 = alloca i1, i1 0
+  %nop12978 = alloca i1, i1 0
+  %nop12979 = alloca i1, i1 0
+  %nop12980 = alloca i1, i1 0
+  %nop12981 = alloca i1, i1 0
+  %nop12982 = alloca i1, i1 0
+  %nop12983 = alloca i1, i1 0
+  %nop12984 = alloca i1, i1 0
+  %nop12985 = alloca i1, i1 0
+  %nop12986 = alloca i1, i1 0
+  %nop12987 = alloca i1, i1 0
+  %nop12988 = alloca i1, i1 0
+  %nop12989 = alloca i1, i1 0
+  %nop12990 = alloca i1, i1 0
+  %nop12991 = alloca i1, i1 0
+  %nop12992 = alloca i1, i1 0
+  %nop12993 = alloca i1, i1 0
+  %nop12994 = alloca i1, i1 0
+  %nop12995 = alloca i1, i1 0
+  %nop12996 = alloca i1, i1 0
+  %nop12997 = alloca i1, i1 0
+  %nop12998 = alloca i1, i1 0
+  %nop12999 = alloca i1, i1 0
+  %nop13000 = alloca i1, i1 0
+  %nop13001 = alloca i1, i1 0
+  %nop13002 = alloca i1, i1 0
+  %nop13003 = alloca i1, i1 0
+  %nop13004 = alloca i1, i1 0
+  %nop13005 = alloca i1, i1 0
+  %nop13006 = alloca i1, i1 0
+  %nop13007 = alloca i1, i1 0
+  %nop13008 = alloca i1, i1 0
+  %nop13009 = alloca i1, i1 0
+  %nop13010 = alloca i1, i1 0
+  %nop13011 = alloca i1, i1 0
+  %nop13012 = alloca i1, i1 0
+  %nop13013 = alloca i1, i1 0
+  %nop13014 = alloca i1, i1 0
+  %nop13015 = alloca i1, i1 0
+  %nop13016 = alloca i1, i1 0
+  %nop13017 = alloca i1, i1 0
+  %nop13018 = alloca i1, i1 0
+  %nop13019 = alloca i1, i1 0
+  %nop13020 = alloca i1, i1 0
+  %nop13021 = alloca i1, i1 0
+  %nop13022 = alloca i1, i1 0
+  %nop13023 = alloca i1, i1 0
+  %nop13024 = alloca i1, i1 0
+  %nop13025 = alloca i1, i1 0
+  %nop13026 = alloca i1, i1 0
+  %nop13027 = alloca i1, i1 0
+  %nop13028 = alloca i1, i1 0
+  %nop13029 = alloca i1, i1 0
+  %nop13030 = alloca i1, i1 0
+  %nop13031 = alloca i1, i1 0
+  %nop13032 = alloca i1, i1 0
+  %nop13033 = alloca i1, i1 0
+  %nop13034 = alloca i1, i1 0
+  %nop13035 = alloca i1, i1 0
+  %nop13036 = alloca i1, i1 0
+  %nop13037 = alloca i1, i1 0
+  %nop13038 = alloca i1, i1 0
+  %nop13039 = alloca i1, i1 0
+  %nop13040 = alloca i1, i1 0
+  %nop13041 = alloca i1, i1 0
+  %nop13042 = alloca i1, i1 0
+  %nop13043 = alloca i1, i1 0
+  %nop13044 = alloca i1, i1 0
+  %nop13045 = alloca i1, i1 0
+  %nop13046 = alloca i1, i1 0
+  %nop13047 = alloca i1, i1 0
+  %nop13048 = alloca i1, i1 0
+  %nop13049 = alloca i1, i1 0
+  %nop13050 = alloca i1, i1 0
+  %nop13051 = alloca i1, i1 0
+  %nop13052 = alloca i1, i1 0
+  %nop13053 = alloca i1, i1 0
+  %nop13054 = alloca i1, i1 0
+  %nop13055 = alloca i1, i1 0
+  %nop13056 = alloca i1, i1 0
+  %nop13057 = alloca i1, i1 0
+  %nop13058 = alloca i1, i1 0
+  %nop13059 = alloca i1, i1 0
+  %nop13060 = alloca i1, i1 0
+  %nop13061 = alloca i1, i1 0
+  %nop13062 = alloca i1, i1 0
+  %nop13063 = alloca i1, i1 0
+  %nop13064 = alloca i1, i1 0
+  %nop13065 = alloca i1, i1 0
+  %nop13066 = alloca i1, i1 0
+  %nop13067 = alloca i1, i1 0
+  %nop13068 = alloca i1, i1 0
+  %nop13069 = alloca i1, i1 0
+  %nop13070 = alloca i1, i1 0
+  %nop13071 = alloca i1, i1 0
+  %nop13072 = alloca i1, i1 0
+  %nop13073 = alloca i1, i1 0
+  %nop13074 = alloca i1, i1 0
+  %nop13075 = alloca i1, i1 0
+  %nop13076 = alloca i1, i1 0
+  %nop13077 = alloca i1, i1 0
+  %nop13078 = alloca i1, i1 0
+  %nop13079 = alloca i1, i1 0
+  %nop13080 = alloca i1, i1 0
+  %nop13081 = alloca i1, i1 0
+  %nop13082 = alloca i1, i1 0
+  %nop13083 = alloca i1, i1 0
+  %nop13084 = alloca i1, i1 0
+  %nop13085 = alloca i1, i1 0
+  %nop13086 = alloca i1, i1 0
+  %nop13087 = alloca i1, i1 0
+  %nop13088 = alloca i1, i1 0
+  %nop13089 = alloca i1, i1 0
+  %nop13090 = alloca i1, i1 0
+  %nop13091 = alloca i1, i1 0
+  %nop13092 = alloca i1, i1 0
+  %nop13093 = alloca i1, i1 0
+  %nop13094 = alloca i1, i1 0
+  %nop13095 = alloca i1, i1 0
+  %nop13096 = alloca i1, i1 0
+  %nop13097 = alloca i1, i1 0
+  %nop13098 = alloca i1, i1 0
+  %nop13099 = alloca i1, i1 0
+  %nop13100 = alloca i1, i1 0
+  %nop13101 = alloca i1, i1 0
+  %nop13102 = alloca i1, i1 0
+  %nop13103 = alloca i1, i1 0
+  %nop13104 = alloca i1, i1 0
+  %nop13105 = alloca i1, i1 0
+  %nop13106 = alloca i1, i1 0
+  %nop13107 = alloca i1, i1 0
+  %nop13108 = alloca i1, i1 0
+  %nop13109 = alloca i1, i1 0
+  %nop13110 = alloca i1, i1 0
+  %nop13111 = alloca i1, i1 0
+  %nop13112 = alloca i1, i1 0
+  %nop13113 = alloca i1, i1 0
+  %nop13114 = alloca i1, i1 0
+  %nop13115 = alloca i1, i1 0
+  %nop13116 = alloca i1, i1 0
+  %nop13117 = alloca i1, i1 0
+  %nop13118 = alloca i1, i1 0
+  %nop13119 = alloca i1, i1 0
+  %nop13120 = alloca i1, i1 0
+  %nop13121 = alloca i1, i1 0
+  %nop13122 = alloca i1, i1 0
+  %nop13123 = alloca i1, i1 0
+  %nop13124 = alloca i1, i1 0
+  %nop13125 = alloca i1, i1 0
+  %nop13126 = alloca i1, i1 0
+  %nop13127 = alloca i1, i1 0
+  %nop13128 = alloca i1, i1 0
+  %nop13129 = alloca i1, i1 0
+  %nop13130 = alloca i1, i1 0
+  %nop13131 = alloca i1, i1 0
+  %nop13132 = alloca i1, i1 0
+  %nop13133 = alloca i1, i1 0
+  %nop13134 = alloca i1, i1 0
+  %nop13135 = alloca i1, i1 0
+  %nop13136 = alloca i1, i1 0
+  %nop13137 = alloca i1, i1 0
+  %nop13138 = alloca i1, i1 0
+  %nop13139 = alloca i1, i1 0
+  %nop13140 = alloca i1, i1 0
+  %nop13141 = alloca i1, i1 0
+  %nop13142 = alloca i1, i1 0
+  %nop13143 = alloca i1, i1 0
+  %nop13144 = alloca i1, i1 0
+  %nop13145 = alloca i1, i1 0
+  %nop13146 = alloca i1, i1 0
+  %nop13147 = alloca i1, i1 0
+  %nop13148 = alloca i1, i1 0
+  %nop13149 = alloca i1, i1 0
+  %nop13150 = alloca i1, i1 0
+  %nop13151 = alloca i1, i1 0
+  %nop13152 = alloca i1, i1 0
+  %nop13153 = alloca i1, i1 0
+  %nop13154 = alloca i1, i1 0
+  %nop13155 = alloca i1, i1 0
+  %nop13156 = alloca i1, i1 0
+  %nop13157 = alloca i1, i1 0
+  %nop13158 = alloca i1, i1 0
+  %nop13159 = alloca i1, i1 0
+  %nop13160 = alloca i1, i1 0
+  %nop13161 = alloca i1, i1 0
+  %nop13162 = alloca i1, i1 0
+  %nop13163 = alloca i1, i1 0
+  %nop13164 = alloca i1, i1 0
+  %nop13165 = alloca i1, i1 0
+  %nop13166 = alloca i1, i1 0
+  %nop13167 = alloca i1, i1 0
+  %nop13168 = alloca i1, i1 0
+  %nop13169 = alloca i1, i1 0
+  %nop13170 = alloca i1, i1 0
+  %nop13171 = alloca i1, i1 0
+  %nop13172 = alloca i1, i1 0
+  %nop13173 = alloca i1, i1 0
+  %nop13174 = alloca i1, i1 0
+  %nop13175 = alloca i1, i1 0
+  %nop13176 = alloca i1, i1 0
+  %nop13177 = alloca i1, i1 0
+  %nop13178 = alloca i1, i1 0
+  %nop13179 = alloca i1, i1 0
+  %nop13180 = alloca i1, i1 0
+  %nop13181 = alloca i1, i1 0
+  %nop13182 = alloca i1, i1 0
+  %nop13183 = alloca i1, i1 0
+  %nop13184 = alloca i1, i1 0
+  %nop13185 = alloca i1, i1 0
+  %nop13186 = alloca i1, i1 0
+  %nop13187 = alloca i1, i1 0
+  %nop13188 = alloca i1, i1 0
+  %nop13189 = alloca i1, i1 0
+  %nop13190 = alloca i1, i1 0
+  %nop13191 = alloca i1, i1 0
+  %nop13192 = alloca i1, i1 0
+  %nop13193 = alloca i1, i1 0
+  %nop13194 = alloca i1, i1 0
+  %nop13195 = alloca i1, i1 0
+  %nop13196 = alloca i1, i1 0
+  %nop13197 = alloca i1, i1 0
+  %nop13198 = alloca i1, i1 0
+  %nop13199 = alloca i1, i1 0
+  %nop13200 = alloca i1, i1 0
+  %nop13201 = alloca i1, i1 0
+  %nop13202 = alloca i1, i1 0
+  %nop13203 = alloca i1, i1 0
+  %nop13204 = alloca i1, i1 0
+  %nop13205 = alloca i1, i1 0
+  %nop13206 = alloca i1, i1 0
+  %nop13207 = alloca i1, i1 0
+  %nop13208 = alloca i1, i1 0
+  %nop13209 = alloca i1, i1 0
+  %nop13210 = alloca i1, i1 0
+  %nop13211 = alloca i1, i1 0
+  %nop13212 = alloca i1, i1 0
+  %nop13213 = alloca i1, i1 0
+  %nop13214 = alloca i1, i1 0
+  %nop13215 = alloca i1, i1 0
+  %nop13216 = alloca i1, i1 0
+  %nop13217 = alloca i1, i1 0
+  %nop13218 = alloca i1, i1 0
+  %nop13219 = alloca i1, i1 0
+  %nop13220 = alloca i1, i1 0
+  %nop13221 = alloca i1, i1 0
+  %nop13222 = alloca i1, i1 0
+  %nop13223 = alloca i1, i1 0
+  %nop13224 = alloca i1, i1 0
+  %nop13225 = alloca i1, i1 0
+  %nop13226 = alloca i1, i1 0
+  %nop13227 = alloca i1, i1 0
+  %nop13228 = alloca i1, i1 0
+  %nop13229 = alloca i1, i1 0
+  %nop13230 = alloca i1, i1 0
+  %nop13231 = alloca i1, i1 0
+  %nop13232 = alloca i1, i1 0
+  %nop13233 = alloca i1, i1 0
+  %nop13234 = alloca i1, i1 0
+  %nop13235 = alloca i1, i1 0
+  %nop13236 = alloca i1, i1 0
+  %nop13237 = alloca i1, i1 0
+  %nop13238 = alloca i1, i1 0
+  %nop13239 = alloca i1, i1 0
+  %nop13240 = alloca i1, i1 0
+  %nop13241 = alloca i1, i1 0
+  %nop13242 = alloca i1, i1 0
+  %nop13243 = alloca i1, i1 0
+  %nop13244 = alloca i1, i1 0
+  %nop13245 = alloca i1, i1 0
+  %nop13246 = alloca i1, i1 0
+  %nop13247 = alloca i1, i1 0
+  %nop13248 = alloca i1, i1 0
+  %nop13249 = alloca i1, i1 0
+  %nop13250 = alloca i1, i1 0
+  %nop13251 = alloca i1, i1 0
+  %nop13252 = alloca i1, i1 0
+  %nop13253 = alloca i1, i1 0
+  %nop13254 = alloca i1, i1 0
+  %nop13255 = alloca i1, i1 0
+  %nop13256 = alloca i1, i1 0
+  %nop13257 = alloca i1, i1 0
+  %nop13258 = alloca i1, i1 0
+  %nop13259 = alloca i1, i1 0
+  %nop13260 = alloca i1, i1 0
+  %nop13261 = alloca i1, i1 0
+  %nop13262 = alloca i1, i1 0
+  %nop13263 = alloca i1, i1 0
+  %nop13264 = alloca i1, i1 0
+  %nop13265 = alloca i1, i1 0
+  %nop13266 = alloca i1, i1 0
+  %nop13267 = alloca i1, i1 0
+  %nop13268 = alloca i1, i1 0
+  %nop13269 = alloca i1, i1 0
+  %nop13270 = alloca i1, i1 0
+  %nop13271 = alloca i1, i1 0
+  %nop13272 = alloca i1, i1 0
+  %nop13273 = alloca i1, i1 0
+  %nop13274 = alloca i1, i1 0
+  %nop13275 = alloca i1, i1 0
+  %nop13276 = alloca i1, i1 0
+  %nop13277 = alloca i1, i1 0
+  %nop13278 = alloca i1, i1 0
+  %nop13279 = alloca i1, i1 0
+  %nop13280 = alloca i1, i1 0
+  %nop13281 = alloca i1, i1 0
+  %nop13282 = alloca i1, i1 0
+  %nop13283 = alloca i1, i1 0
+  %nop13284 = alloca i1, i1 0
+  %nop13285 = alloca i1, i1 0
+  %nop13286 = alloca i1, i1 0
+  %nop13287 = alloca i1, i1 0
+  %nop13288 = alloca i1, i1 0
+  %nop13289 = alloca i1, i1 0
+  %nop13290 = alloca i1, i1 0
+  %nop13291 = alloca i1, i1 0
+  %nop13292 = alloca i1, i1 0
+  %nop13293 = alloca i1, i1 0
+  %nop13294 = alloca i1, i1 0
+  %nop13295 = alloca i1, i1 0
+  %nop13296 = alloca i1, i1 0
+  %nop13297 = alloca i1, i1 0
+  %nop13298 = alloca i1, i1 0
+  %nop13299 = alloca i1, i1 0
+  %nop13300 = alloca i1, i1 0
+  %nop13301 = alloca i1, i1 0
+  %nop13302 = alloca i1, i1 0
+  %nop13303 = alloca i1, i1 0
+  %nop13304 = alloca i1, i1 0
+  %nop13305 = alloca i1, i1 0
+  %nop13306 = alloca i1, i1 0
+  %nop13307 = alloca i1, i1 0
+  %nop13308 = alloca i1, i1 0
+  %nop13309 = alloca i1, i1 0
+  %nop13310 = alloca i1, i1 0
+  %nop13311 = alloca i1, i1 0
+  %nop13312 = alloca i1, i1 0
+  %nop13313 = alloca i1, i1 0
+  %nop13314 = alloca i1, i1 0
+  %nop13315 = alloca i1, i1 0
+  %nop13316 = alloca i1, i1 0
+  %nop13317 = alloca i1, i1 0
+  %nop13318 = alloca i1, i1 0
+  %nop13319 = alloca i1, i1 0
+  %nop13320 = alloca i1, i1 0
+  %nop13321 = alloca i1, i1 0
+  %nop13322 = alloca i1, i1 0
+  %nop13323 = alloca i1, i1 0
+  %nop13324 = alloca i1, i1 0
+  %nop13325 = alloca i1, i1 0
+  %nop13326 = alloca i1, i1 0
+  %nop13327 = alloca i1, i1 0
+  %nop13328 = alloca i1, i1 0
+  %nop13329 = alloca i1, i1 0
+  %nop13330 = alloca i1, i1 0
+  %nop13331 = alloca i1, i1 0
+  %nop13332 = alloca i1, i1 0
+  %nop13333 = alloca i1, i1 0
+  %nop13334 = alloca i1, i1 0
+  %nop13335 = alloca i1, i1 0
+  %nop13336 = alloca i1, i1 0
+  %nop13337 = alloca i1, i1 0
+  %nop13338 = alloca i1, i1 0
+  %nop13339 = alloca i1, i1 0
+  %nop13340 = alloca i1, i1 0
+  %nop13341 = alloca i1, i1 0
+  %nop13342 = alloca i1, i1 0
+  %nop13343 = alloca i1, i1 0
+  %nop13344 = alloca i1, i1 0
+  %nop13345 = alloca i1, i1 0
+  %nop13346 = alloca i1, i1 0
+  %nop13347 = alloca i1, i1 0
+  %nop13348 = alloca i1, i1 0
+  %nop13349 = alloca i1, i1 0
+  %nop13350 = alloca i1, i1 0
+  %nop13351 = alloca i1, i1 0
+  %nop13352 = alloca i1, i1 0
+  %nop13353 = alloca i1, i1 0
+  %nop13354 = alloca i1, i1 0
+  %nop13355 = alloca i1, i1 0
+  %nop13356 = alloca i1, i1 0
+  %nop13357 = alloca i1, i1 0
+  %nop13358 = alloca i1, i1 0
+  %nop13359 = alloca i1, i1 0
+  %nop13360 = alloca i1, i1 0
+  %nop13361 = alloca i1, i1 0
+  %nop13362 = alloca i1, i1 0
+  %nop13363 = alloca i1, i1 0
+  %nop13364 = alloca i1, i1 0
+  %nop13365 = alloca i1, i1 0
+  %nop13366 = alloca i1, i1 0
+  %nop13367 = alloca i1, i1 0
+  %nop13368 = alloca i1, i1 0
+  %nop13369 = alloca i1, i1 0
+  %nop13370 = alloca i1, i1 0
+  %nop13371 = alloca i1, i1 0
+  %nop13372 = alloca i1, i1 0
+  %nop13373 = alloca i1, i1 0
+  %nop13374 = alloca i1, i1 0
+  %nop13375 = alloca i1, i1 0
+  %nop13376 = alloca i1, i1 0
+  %nop13377 = alloca i1, i1 0
+  %nop13378 = alloca i1, i1 0
+  %nop13379 = alloca i1, i1 0
+  %nop13380 = alloca i1, i1 0
+  %nop13381 = alloca i1, i1 0
+  %nop13382 = alloca i1, i1 0
+  %nop13383 = alloca i1, i1 0
+  %nop13384 = alloca i1, i1 0
+  %nop13385 = alloca i1, i1 0
+  %nop13386 = alloca i1, i1 0
+  %nop13387 = alloca i1, i1 0
+  %nop13388 = alloca i1, i1 0
+  %nop13389 = alloca i1, i1 0
+  %nop13390 = alloca i1, i1 0
+  %nop13391 = alloca i1, i1 0
+  %nop13392 = alloca i1, i1 0
+  %nop13393 = alloca i1, i1 0
+  %nop13394 = alloca i1, i1 0
+  %nop13395 = alloca i1, i1 0
+  %nop13396 = alloca i1, i1 0
+  %nop13397 = alloca i1, i1 0
+  %nop13398 = alloca i1, i1 0
+  %nop13399 = alloca i1, i1 0
+  %nop13400 = alloca i1, i1 0
+  %nop13401 = alloca i1, i1 0
+  %nop13402 = alloca i1, i1 0
+  %nop13403 = alloca i1, i1 0
+  %nop13404 = alloca i1, i1 0
+  %nop13405 = alloca i1, i1 0
+  %nop13406 = alloca i1, i1 0
+  %nop13407 = alloca i1, i1 0
+  %nop13408 = alloca i1, i1 0
+  %nop13409 = alloca i1, i1 0
+  %nop13410 = alloca i1, i1 0
+  %nop13411 = alloca i1, i1 0
+  %nop13412 = alloca i1, i1 0
+  %nop13413 = alloca i1, i1 0
+  %nop13414 = alloca i1, i1 0
+  %nop13415 = alloca i1, i1 0
+  %nop13416 = alloca i1, i1 0
+  %nop13417 = alloca i1, i1 0
+  %nop13418 = alloca i1, i1 0
+  %nop13419 = alloca i1, i1 0
+  %nop13420 = alloca i1, i1 0
+  %nop13421 = alloca i1, i1 0
+  %nop13422 = alloca i1, i1 0
+  %nop13423 = alloca i1, i1 0
+  %nop13424 = alloca i1, i1 0
+  %nop13425 = alloca i1, i1 0
+  %nop13426 = alloca i1, i1 0
+  %nop13427 = alloca i1, i1 0
+  %nop13428 = alloca i1, i1 0
+  %nop13429 = alloca i1, i1 0
+  %nop13430 = alloca i1, i1 0
+  %nop13431 = alloca i1, i1 0
+  %nop13432 = alloca i1, i1 0
+  %nop13433 = alloca i1, i1 0
+  %nop13434 = alloca i1, i1 0
+  %nop13435 = alloca i1, i1 0
+  %nop13436 = alloca i1, i1 0
+  %nop13437 = alloca i1, i1 0
+  %nop13438 = alloca i1, i1 0
+  %nop13439 = alloca i1, i1 0
+  %nop13440 = alloca i1, i1 0
+  %nop13441 = alloca i1, i1 0
+  %nop13442 = alloca i1, i1 0
+  %nop13443 = alloca i1, i1 0
+  %nop13444 = alloca i1, i1 0
+  %nop13445 = alloca i1, i1 0
+  %nop13446 = alloca i1, i1 0
+  %nop13447 = alloca i1, i1 0
+  %nop13448 = alloca i1, i1 0
+  %nop13449 = alloca i1, i1 0
+  %nop13450 = alloca i1, i1 0
+  %nop13451 = alloca i1, i1 0
+  %nop13452 = alloca i1, i1 0
+  %nop13453 = alloca i1, i1 0
+  %nop13454 = alloca i1, i1 0
+  %nop13455 = alloca i1, i1 0
+  %nop13456 = alloca i1, i1 0
+  %nop13457 = alloca i1, i1 0
+  %nop13458 = alloca i1, i1 0
+  %nop13459 = alloca i1, i1 0
+  %nop13460 = alloca i1, i1 0
+  %nop13461 = alloca i1, i1 0
+  %nop13462 = alloca i1, i1 0
+  %nop13463 = alloca i1, i1 0
+  %nop13464 = alloca i1, i1 0
+  %nop13465 = alloca i1, i1 0
+  %nop13466 = alloca i1, i1 0
+  %nop13467 = alloca i1, i1 0
+  %nop13468 = alloca i1, i1 0
+  %nop13469 = alloca i1, i1 0
+  %nop13470 = alloca i1, i1 0
+  %nop13471 = alloca i1, i1 0
+  %nop13472 = alloca i1, i1 0
+  %nop13473 = alloca i1, i1 0
+  %nop13474 = alloca i1, i1 0
+  %nop13475 = alloca i1, i1 0
+  %nop13476 = alloca i1, i1 0
+  %nop13477 = alloca i1, i1 0
+  %nop13478 = alloca i1, i1 0
+  %nop13479 = alloca i1, i1 0
+  %nop13480 = alloca i1, i1 0
+  %nop13481 = alloca i1, i1 0
+  %nop13482 = alloca i1, i1 0
+  %nop13483 = alloca i1, i1 0
+  %nop13484 = alloca i1, i1 0
+  %nop13485 = alloca i1, i1 0
+  %nop13486 = alloca i1, i1 0
+  %nop13487 = alloca i1, i1 0
+  %nop13488 = alloca i1, i1 0
+  %nop13489 = alloca i1, i1 0
+  %nop13490 = alloca i1, i1 0
+  %nop13491 = alloca i1, i1 0
+  %nop13492 = alloca i1, i1 0
+  %nop13493 = alloca i1, i1 0
+  %nop13494 = alloca i1, i1 0
+  %nop13495 = alloca i1, i1 0
+  %nop13496 = alloca i1, i1 0
+  %nop13497 = alloca i1, i1 0
+  %nop13498 = alloca i1, i1 0
+  %nop13499 = alloca i1, i1 0
+  %nop13500 = alloca i1, i1 0
+  %nop13501 = alloca i1, i1 0
+  %nop13502 = alloca i1, i1 0
+  %nop13503 = alloca i1, i1 0
+  %nop13504 = alloca i1, i1 0
+  %nop13505 = alloca i1, i1 0
+  %nop13506 = alloca i1, i1 0
+  %nop13507 = alloca i1, i1 0
+  %nop13508 = alloca i1, i1 0
+  %nop13509 = alloca i1, i1 0
+  %nop13510 = alloca i1, i1 0
+  %nop13511 = alloca i1, i1 0
+  %nop13512 = alloca i1, i1 0
+  %nop13513 = alloca i1, i1 0
+  %nop13514 = alloca i1, i1 0
+  %nop13515 = alloca i1, i1 0
+  %nop13516 = alloca i1, i1 0
+  %nop13517 = alloca i1, i1 0
+  %nop13518 = alloca i1, i1 0
+  %nop13519 = alloca i1, i1 0
+  %nop13520 = alloca i1, i1 0
+  %nop13521 = alloca i1, i1 0
+  %nop13522 = alloca i1, i1 0
+  %nop13523 = alloca i1, i1 0
+  %nop13524 = alloca i1, i1 0
+  %nop13525 = alloca i1, i1 0
+  %nop13526 = alloca i1, i1 0
+  %nop13527 = alloca i1, i1 0
+  %nop13528 = alloca i1, i1 0
+  %nop13529 = alloca i1, i1 0
+  %nop13530 = alloca i1, i1 0
+  %nop13531 = alloca i1, i1 0
+  %nop13532 = alloca i1, i1 0
+  %nop13533 = alloca i1, i1 0
+  %nop13534 = alloca i1, i1 0
+  %nop13535 = alloca i1, i1 0
+  %nop13536 = alloca i1, i1 0
+  %nop13537 = alloca i1, i1 0
+  %nop13538 = alloca i1, i1 0
+  %nop13539 = alloca i1, i1 0
+  %nop13540 = alloca i1, i1 0
+  %nop13541 = alloca i1, i1 0
+  %nop13542 = alloca i1, i1 0
+  %nop13543 = alloca i1, i1 0
+  %nop13544 = alloca i1, i1 0
+  %nop13545 = alloca i1, i1 0
+  %nop13546 = alloca i1, i1 0
+  %nop13547 = alloca i1, i1 0
+  %nop13548 = alloca i1, i1 0
+  %nop13549 = alloca i1, i1 0
+  %nop13550 = alloca i1, i1 0
+  %nop13551 = alloca i1, i1 0
+  %nop13552 = alloca i1, i1 0
+  %nop13553 = alloca i1, i1 0
+  %nop13554 = alloca i1, i1 0
+  %nop13555 = alloca i1, i1 0
+  %nop13556 = alloca i1, i1 0
+  %nop13557 = alloca i1, i1 0
+  %nop13558 = alloca i1, i1 0
+  %nop13559 = alloca i1, i1 0
+  %nop13560 = alloca i1, i1 0
+  %nop13561 = alloca i1, i1 0
+  %nop13562 = alloca i1, i1 0
+  %nop13563 = alloca i1, i1 0
+  %nop13564 = alloca i1, i1 0
+  %nop13565 = alloca i1, i1 0
+  %nop13566 = alloca i1, i1 0
+  %nop13567 = alloca i1, i1 0
+  %nop13568 = alloca i1, i1 0
+  %nop13569 = alloca i1, i1 0
+  %nop13570 = alloca i1, i1 0
+  %nop13571 = alloca i1, i1 0
+  %nop13572 = alloca i1, i1 0
+  %nop13573 = alloca i1, i1 0
+  %nop13574 = alloca i1, i1 0
+  %nop13575 = alloca i1, i1 0
+  %nop13576 = alloca i1, i1 0
+  %nop13577 = alloca i1, i1 0
+  %nop13578 = alloca i1, i1 0
+  %nop13579 = alloca i1, i1 0
+  %nop13580 = alloca i1, i1 0
+  %nop13581 = alloca i1, i1 0
+  %nop13582 = alloca i1, i1 0
+  %nop13583 = alloca i1, i1 0
+  %nop13584 = alloca i1, i1 0
+  %nop13585 = alloca i1, i1 0
+  %nop13586 = alloca i1, i1 0
+  %nop13587 = alloca i1, i1 0
+  %nop13588 = alloca i1, i1 0
+  %nop13589 = alloca i1, i1 0
+  %nop13590 = alloca i1, i1 0
+  %nop13591 = alloca i1, i1 0
+  %nop13592 = alloca i1, i1 0
+  %nop13593 = alloca i1, i1 0
+  %nop13594 = alloca i1, i1 0
+  %nop13595 = alloca i1, i1 0
+  %nop13596 = alloca i1, i1 0
+  %nop13597 = alloca i1, i1 0
+  %nop13598 = alloca i1, i1 0
+  %nop13599 = alloca i1, i1 0
+  %nop13600 = alloca i1, i1 0
+  %nop13601 = alloca i1, i1 0
+  %nop13602 = alloca i1, i1 0
+  %nop13603 = alloca i1, i1 0
+  %nop13604 = alloca i1, i1 0
+  %nop13605 = alloca i1, i1 0
+  %nop13606 = alloca i1, i1 0
+  %nop13607 = alloca i1, i1 0
+  %nop13608 = alloca i1, i1 0
+  %nop13609 = alloca i1, i1 0
+  %nop13610 = alloca i1, i1 0
+  %nop13611 = alloca i1, i1 0
+  %nop13612 = alloca i1, i1 0
+  %nop13613 = alloca i1, i1 0
+  %nop13614 = alloca i1, i1 0
+  %nop13615 = alloca i1, i1 0
+  %nop13616 = alloca i1, i1 0
+  %nop13617 = alloca i1, i1 0
+  %nop13618 = alloca i1, i1 0
+  %nop13619 = alloca i1, i1 0
+  %nop13620 = alloca i1, i1 0
+  %nop13621 = alloca i1, i1 0
+  %nop13622 = alloca i1, i1 0
+  %nop13623 = alloca i1, i1 0
+  %nop13624 = alloca i1, i1 0
+  %nop13625 = alloca i1, i1 0
+  %nop13626 = alloca i1, i1 0
+  %nop13627 = alloca i1, i1 0
+  %nop13628 = alloca i1, i1 0
+  %nop13629 = alloca i1, i1 0
+  %nop13630 = alloca i1, i1 0
+  %nop13631 = alloca i1, i1 0
+  %nop13632 = alloca i1, i1 0
+  %nop13633 = alloca i1, i1 0
+  %nop13634 = alloca i1, i1 0
+  %nop13635 = alloca i1, i1 0
+  %nop13636 = alloca i1, i1 0
+  %nop13637 = alloca i1, i1 0
+  %nop13638 = alloca i1, i1 0
+  %nop13639 = alloca i1, i1 0
+  %nop13640 = alloca i1, i1 0
+  %nop13641 = alloca i1, i1 0
+  %nop13642 = alloca i1, i1 0
+  %nop13643 = alloca i1, i1 0
+  %nop13644 = alloca i1, i1 0
+  %nop13645 = alloca i1, i1 0
+  %nop13646 = alloca i1, i1 0
+  %nop13647 = alloca i1, i1 0
+  %nop13648 = alloca i1, i1 0
+  %nop13649 = alloca i1, i1 0
+  %nop13650 = alloca i1, i1 0
+  %nop13651 = alloca i1, i1 0
+  %nop13652 = alloca i1, i1 0
+  %nop13653 = alloca i1, i1 0
+  %nop13654 = alloca i1, i1 0
+  %nop13655 = alloca i1, i1 0
+  %nop13656 = alloca i1, i1 0
+  %nop13657 = alloca i1, i1 0
+  %nop13658 = alloca i1, i1 0
+  %nop13659 = alloca i1, i1 0
+  %nop13660 = alloca i1, i1 0
+  %nop13661 = alloca i1, i1 0
+  %nop13662 = alloca i1, i1 0
+  %nop13663 = alloca i1, i1 0
+  %nop13664 = alloca i1, i1 0
+  %nop13665 = alloca i1, i1 0
+  %nop13666 = alloca i1, i1 0
+  %nop13667 = alloca i1, i1 0
+  %nop13668 = alloca i1, i1 0
+  %nop13669 = alloca i1, i1 0
+  %nop13670 = alloca i1, i1 0
+  %nop13671 = alloca i1, i1 0
+  %nop13672 = alloca i1, i1 0
+  %nop13673 = alloca i1, i1 0
+  %nop13674 = alloca i1, i1 0
+  %nop13675 = alloca i1, i1 0
+  %nop13676 = alloca i1, i1 0
+  %nop13677 = alloca i1, i1 0
+  %nop13678 = alloca i1, i1 0
+  %nop13679 = alloca i1, i1 0
+  %nop13680 = alloca i1, i1 0
+  %nop13681 = alloca i1, i1 0
+  %nop13682 = alloca i1, i1 0
+  %nop13683 = alloca i1, i1 0
+  %nop13684 = alloca i1, i1 0
+  %nop13685 = alloca i1, i1 0
+  %nop13686 = alloca i1, i1 0
+  %nop13687 = alloca i1, i1 0
+  %nop13688 = alloca i1, i1 0
+  %nop13689 = alloca i1, i1 0
+  %nop13690 = alloca i1, i1 0
+  %nop13691 = alloca i1, i1 0
+  %nop13692 = alloca i1, i1 0
+  %nop13693 = alloca i1, i1 0
+  %nop13694 = alloca i1, i1 0
+  %nop13695 = alloca i1, i1 0
+  %nop13696 = alloca i1, i1 0
+  %nop13697 = alloca i1, i1 0
+  %nop13698 = alloca i1, i1 0
+  %nop13699 = alloca i1, i1 0
+  %nop13700 = alloca i1, i1 0
+  %nop13701 = alloca i1, i1 0
+  %nop13702 = alloca i1, i1 0
+  %nop13703 = alloca i1, i1 0
+  %nop13704 = alloca i1, i1 0
+  %nop13705 = alloca i1, i1 0
+  %nop13706 = alloca i1, i1 0
+  %nop13707 = alloca i1, i1 0
+  %nop13708 = alloca i1, i1 0
+  %nop13709 = alloca i1, i1 0
+  %nop13710 = alloca i1, i1 0
+  %nop13711 = alloca i1, i1 0
+  %nop13712 = alloca i1, i1 0
+  %nop13713 = alloca i1, i1 0
+  %nop13714 = alloca i1, i1 0
+  %nop13715 = alloca i1, i1 0
+  %nop13716 = alloca i1, i1 0
+  %nop13717 = alloca i1, i1 0
+  %nop13718 = alloca i1, i1 0
+  %nop13719 = alloca i1, i1 0
+  %nop13720 = alloca i1, i1 0
+  %nop13721 = alloca i1, i1 0
+  %nop13722 = alloca i1, i1 0
+  %nop13723 = alloca i1, i1 0
+  %nop13724 = alloca i1, i1 0
+  %nop13725 = alloca i1, i1 0
+  %nop13726 = alloca i1, i1 0
+  %nop13727 = alloca i1, i1 0
+  %nop13728 = alloca i1, i1 0
+  %nop13729 = alloca i1, i1 0
+  %nop13730 = alloca i1, i1 0
+  %nop13731 = alloca i1, i1 0
+  %nop13732 = alloca i1, i1 0
+  %nop13733 = alloca i1, i1 0
+  %nop13734 = alloca i1, i1 0
+  %nop13735 = alloca i1, i1 0
+  %nop13736 = alloca i1, i1 0
+  %nop13737 = alloca i1, i1 0
+  %nop13738 = alloca i1, i1 0
+  %nop13739 = alloca i1, i1 0
+  %nop13740 = alloca i1, i1 0
+  %nop13741 = alloca i1, i1 0
+  %nop13742 = alloca i1, i1 0
+  %nop13743 = alloca i1, i1 0
+  %nop13744 = alloca i1, i1 0
+  %nop13745 = alloca i1, i1 0
+  %nop13746 = alloca i1, i1 0
+  %nop13747 = alloca i1, i1 0
+  %nop13748 = alloca i1, i1 0
+  %nop13749 = alloca i1, i1 0
+  %nop13750 = alloca i1, i1 0
+  %nop13751 = alloca i1, i1 0
+  %nop13752 = alloca i1, i1 0
+  %nop13753 = alloca i1, i1 0
+  %nop13754 = alloca i1, i1 0
+  %nop13755 = alloca i1, i1 0
+  %nop13756 = alloca i1, i1 0
+  %nop13757 = alloca i1, i1 0
+  %nop13758 = alloca i1, i1 0
+  %nop13759 = alloca i1, i1 0
+  %nop13760 = alloca i1, i1 0
+  %nop13761 = alloca i1, i1 0
+  %nop13762 = alloca i1, i1 0
+  %nop13763 = alloca i1, i1 0
+  %nop13764 = alloca i1, i1 0
+  %nop13765 = alloca i1, i1 0
+  %nop13766 = alloca i1, i1 0
+  %nop13767 = alloca i1, i1 0
+  %nop13768 = alloca i1, i1 0
+  %nop13769 = alloca i1, i1 0
+  %nop13770 = alloca i1, i1 0
+  %nop13771 = alloca i1, i1 0
+  %nop13772 = alloca i1, i1 0
+  %nop13773 = alloca i1, i1 0
+  %nop13774 = alloca i1, i1 0
+  %nop13775 = alloca i1, i1 0
+  %nop13776 = alloca i1, i1 0
+  %nop13777 = alloca i1, i1 0
+  %nop13778 = alloca i1, i1 0
+  %nop13779 = alloca i1, i1 0
+  %nop13780 = alloca i1, i1 0
+  %nop13781 = alloca i1, i1 0
+  %nop13782 = alloca i1, i1 0
+  %nop13783 = alloca i1, i1 0
+  %nop13784 = alloca i1, i1 0
+  %nop13785 = alloca i1, i1 0
+  %nop13786 = alloca i1, i1 0
+  %nop13787 = alloca i1, i1 0
+  %nop13788 = alloca i1, i1 0
+  %nop13789 = alloca i1, i1 0
+  %nop13790 = alloca i1, i1 0
+  %nop13791 = alloca i1, i1 0
+  %nop13792 = alloca i1, i1 0
+  %nop13793 = alloca i1, i1 0
+  %nop13794 = alloca i1, i1 0
+  %nop13795 = alloca i1, i1 0
+  %nop13796 = alloca i1, i1 0
+  %nop13797 = alloca i1, i1 0
+  %nop13798 = alloca i1, i1 0
+  %nop13799 = alloca i1, i1 0
+  %nop13800 = alloca i1, i1 0
+  %nop13801 = alloca i1, i1 0
+  %nop13802 = alloca i1, i1 0
+  %nop13803 = alloca i1, i1 0
+  %nop13804 = alloca i1, i1 0
+  %nop13805 = alloca i1, i1 0
+  %nop13806 = alloca i1, i1 0
+  %nop13807 = alloca i1, i1 0
+  %nop13808 = alloca i1, i1 0
+  %nop13809 = alloca i1, i1 0
+  %nop13810 = alloca i1, i1 0
+  %nop13811 = alloca i1, i1 0
+  %nop13812 = alloca i1, i1 0
+  %nop13813 = alloca i1, i1 0
+  %nop13814 = alloca i1, i1 0
+  %nop13815 = alloca i1, i1 0
+  %nop13816 = alloca i1, i1 0
+  %nop13817 = alloca i1, i1 0
+  %nop13818 = alloca i1, i1 0
+  %nop13819 = alloca i1, i1 0
+  %nop13820 = alloca i1, i1 0
+  %nop13821 = alloca i1, i1 0
+  %nop13822 = alloca i1, i1 0
+  %nop13823 = alloca i1, i1 0
+  %nop13824 = alloca i1, i1 0
+  %nop13825 = alloca i1, i1 0
+  %nop13826 = alloca i1, i1 0
+  %nop13827 = alloca i1, i1 0
+  %nop13828 = alloca i1, i1 0
+  %nop13829 = alloca i1, i1 0
+  %nop13830 = alloca i1, i1 0
+  %nop13831 = alloca i1, i1 0
+  %nop13832 = alloca i1, i1 0
+  %nop13833 = alloca i1, i1 0
+  %nop13834 = alloca i1, i1 0
+  %nop13835 = alloca i1, i1 0
+  %nop13836 = alloca i1, i1 0
+  %nop13837 = alloca i1, i1 0
+  %nop13838 = alloca i1, i1 0
+  %nop13839 = alloca i1, i1 0
+  %nop13840 = alloca i1, i1 0
+  %nop13841 = alloca i1, i1 0
+  %nop13842 = alloca i1, i1 0
+  %nop13843 = alloca i1, i1 0
+  %nop13844 = alloca i1, i1 0
+  %nop13845 = alloca i1, i1 0
+  %nop13846 = alloca i1, i1 0
+  %nop13847 = alloca i1, i1 0
+  %nop13848 = alloca i1, i1 0
+  %nop13849 = alloca i1, i1 0
+  %nop13850 = alloca i1, i1 0
+  %nop13851 = alloca i1, i1 0
+  %nop13852 = alloca i1, i1 0
+  %nop13853 = alloca i1, i1 0
+  %nop13854 = alloca i1, i1 0
+  %nop13855 = alloca i1, i1 0
+  %nop13856 = alloca i1, i1 0
+  %nop13857 = alloca i1, i1 0
+  %nop13858 = alloca i1, i1 0
+  %nop13859 = alloca i1, i1 0
+  %nop13860 = alloca i1, i1 0
+  %nop13861 = alloca i1, i1 0
+  %nop13862 = alloca i1, i1 0
+  %nop13863 = alloca i1, i1 0
+  %nop13864 = alloca i1, i1 0
+  %nop13865 = alloca i1, i1 0
+  %nop13866 = alloca i1, i1 0
+  %nop13867 = alloca i1, i1 0
+  %nop13868 = alloca i1, i1 0
+  %nop13869 = alloca i1, i1 0
+  %nop13870 = alloca i1, i1 0
+  %nop13871 = alloca i1, i1 0
+  %nop13872 = alloca i1, i1 0
+  %nop13873 = alloca i1, i1 0
+  %nop13874 = alloca i1, i1 0
+  %nop13875 = alloca i1, i1 0
+  %nop13876 = alloca i1, i1 0
+  %nop13877 = alloca i1, i1 0
+  %nop13878 = alloca i1, i1 0
+  %nop13879 = alloca i1, i1 0
+  %nop13880 = alloca i1, i1 0
+  %nop13881 = alloca i1, i1 0
+  %nop13882 = alloca i1, i1 0
+  %nop13883 = alloca i1, i1 0
+  %nop13884 = alloca i1, i1 0
+  %nop13885 = alloca i1, i1 0
+  %nop13886 = alloca i1, i1 0
+  %nop13887 = alloca i1, i1 0
+  %nop13888 = alloca i1, i1 0
+  %nop13889 = alloca i1, i1 0
+  %nop13890 = alloca i1, i1 0
+  %nop13891 = alloca i1, i1 0
+  %nop13892 = alloca i1, i1 0
+  %nop13893 = alloca i1, i1 0
+  %nop13894 = alloca i1, i1 0
+  %nop13895 = alloca i1, i1 0
+  %nop13896 = alloca i1, i1 0
+  %nop13897 = alloca i1, i1 0
+  %nop13898 = alloca i1, i1 0
+  %nop13899 = alloca i1, i1 0
+  %nop13900 = alloca i1, i1 0
+  %nop13901 = alloca i1, i1 0
+  %nop13902 = alloca i1, i1 0
+  %nop13903 = alloca i1, i1 0
+  %nop13904 = alloca i1, i1 0
+  %nop13905 = alloca i1, i1 0
+  %nop13906 = alloca i1, i1 0
+  %nop13907 = alloca i1, i1 0
+  %nop13908 = alloca i1, i1 0
+  %nop13909 = alloca i1, i1 0
+  %nop13910 = alloca i1, i1 0
+  %nop13911 = alloca i1, i1 0
+  %nop13912 = alloca i1, i1 0
+  %nop13913 = alloca i1, i1 0
+  %nop13914 = alloca i1, i1 0
+  %nop13915 = alloca i1, i1 0
+  %nop13916 = alloca i1, i1 0
+  %nop13917 = alloca i1, i1 0
+  %nop13918 = alloca i1, i1 0
+  %nop13919 = alloca i1, i1 0
+  %nop13920 = alloca i1, i1 0
+  %nop13921 = alloca i1, i1 0
+  %nop13922 = alloca i1, i1 0
+  %nop13923 = alloca i1, i1 0
+  %nop13924 = alloca i1, i1 0
+  %nop13925 = alloca i1, i1 0
+  %nop13926 = alloca i1, i1 0
+  %nop13927 = alloca i1, i1 0
+  %nop13928 = alloca i1, i1 0
+  %nop13929 = alloca i1, i1 0
+  %nop13930 = alloca i1, i1 0
+  %nop13931 = alloca i1, i1 0
+  %nop13932 = alloca i1, i1 0
+  %nop13933 = alloca i1, i1 0
+  %nop13934 = alloca i1, i1 0
+  %nop13935 = alloca i1, i1 0
+  %nop13936 = alloca i1, i1 0
+  %nop13937 = alloca i1, i1 0
+  %nop13938 = alloca i1, i1 0
+  %nop13939 = alloca i1, i1 0
+  %nop13940 = alloca i1, i1 0
+  %nop13941 = alloca i1, i1 0
+  %nop13942 = alloca i1, i1 0
+  %nop13943 = alloca i1, i1 0
+  %nop13944 = alloca i1, i1 0
+  %nop13945 = alloca i1, i1 0
+  %nop13946 = alloca i1, i1 0
+  %nop13947 = alloca i1, i1 0
+  %nop13948 = alloca i1, i1 0
+  %nop13949 = alloca i1, i1 0
+  %nop13950 = alloca i1, i1 0
+  %nop13951 = alloca i1, i1 0
+  %nop13952 = alloca i1, i1 0
+  %nop13953 = alloca i1, i1 0
+  %nop13954 = alloca i1, i1 0
+  %nop13955 = alloca i1, i1 0
+  %nop13956 = alloca i1, i1 0
+  %nop13957 = alloca i1, i1 0
+  %nop13958 = alloca i1, i1 0
+  %nop13959 = alloca i1, i1 0
+  %nop13960 = alloca i1, i1 0
+  %nop13961 = alloca i1, i1 0
+  %nop13962 = alloca i1, i1 0
+  %nop13963 = alloca i1, i1 0
+  %nop13964 = alloca i1, i1 0
+  %nop13965 = alloca i1, i1 0
+  %nop13966 = alloca i1, i1 0
+  %nop13967 = alloca i1, i1 0
+  %nop13968 = alloca i1, i1 0
+  %nop13969 = alloca i1, i1 0
+  %nop13970 = alloca i1, i1 0
+  %nop13971 = alloca i1, i1 0
+  %nop13972 = alloca i1, i1 0
+  %nop13973 = alloca i1, i1 0
+  %nop13974 = alloca i1, i1 0
+  %nop13975 = alloca i1, i1 0
+  %nop13976 = alloca i1, i1 0
+  %nop13977 = alloca i1, i1 0
+  %nop13978 = alloca i1, i1 0
+  %nop13979 = alloca i1, i1 0
+  %nop13980 = alloca i1, i1 0
+  %nop13981 = alloca i1, i1 0
+  %nop13982 = alloca i1, i1 0
+  %nop13983 = alloca i1, i1 0
+  %nop13984 = alloca i1, i1 0
+  %nop13985 = alloca i1, i1 0
+  %nop13986 = alloca i1, i1 0
+  %nop13987 = alloca i1, i1 0
+  %nop13988 = alloca i1, i1 0
+  %nop13989 = alloca i1, i1 0
+  %nop13990 = alloca i1, i1 0
+  %nop13991 = alloca i1, i1 0
+  %nop13992 = alloca i1, i1 0
+  %nop13993 = alloca i1, i1 0
+  %nop13994 = alloca i1, i1 0
+  %nop13995 = alloca i1, i1 0
+  %nop13996 = alloca i1, i1 0
+  %nop13997 = alloca i1, i1 0
+  %nop13998 = alloca i1, i1 0
+  %nop13999 = alloca i1, i1 0
+  %nop14000 = alloca i1, i1 0
+  %nop14001 = alloca i1, i1 0
+  %nop14002 = alloca i1, i1 0
+  %nop14003 = alloca i1, i1 0
+  %nop14004 = alloca i1, i1 0
+  %nop14005 = alloca i1, i1 0
+  %nop14006 = alloca i1, i1 0
+  %nop14007 = alloca i1, i1 0
+  %nop14008 = alloca i1, i1 0
+  %nop14009 = alloca i1, i1 0
+  %nop14010 = alloca i1, i1 0
+  %nop14011 = alloca i1, i1 0
+  %nop14012 = alloca i1, i1 0
+  %nop14013 = alloca i1, i1 0
+  %nop14014 = alloca i1, i1 0
+  %nop14015 = alloca i1, i1 0
+  %nop14016 = alloca i1, i1 0
+  %nop14017 = alloca i1, i1 0
+  %nop14018 = alloca i1, i1 0
+  %nop14019 = alloca i1, i1 0
+  %nop14020 = alloca i1, i1 0
+  %nop14021 = alloca i1, i1 0
+  %nop14022 = alloca i1, i1 0
+  %nop14023 = alloca i1, i1 0
+  %nop14024 = alloca i1, i1 0
+  %nop14025 = alloca i1, i1 0
+  %nop14026 = alloca i1, i1 0
+  %nop14027 = alloca i1, i1 0
+  %nop14028 = alloca i1, i1 0
+  %nop14029 = alloca i1, i1 0
+  %nop14030 = alloca i1, i1 0
+  %nop14031 = alloca i1, i1 0
+  %nop14032 = alloca i1, i1 0
+  %nop14033 = alloca i1, i1 0
+  %nop14034 = alloca i1, i1 0
+  %nop14035 = alloca i1, i1 0
+  %nop14036 = alloca i1, i1 0
+  %nop14037 = alloca i1, i1 0
+  %nop14038 = alloca i1, i1 0
+  %nop14039 = alloca i1, i1 0
+  %nop14040 = alloca i1, i1 0
+  %nop14041 = alloca i1, i1 0
+  %nop14042 = alloca i1, i1 0
+  %nop14043 = alloca i1, i1 0
+  %nop14044 = alloca i1, i1 0
+  %nop14045 = alloca i1, i1 0
+  %nop14046 = alloca i1, i1 0
+  %nop14047 = alloca i1, i1 0
+  %nop14048 = alloca i1, i1 0
+  %nop14049 = alloca i1, i1 0
+  %nop14050 = alloca i1, i1 0
+  %nop14051 = alloca i1, i1 0
+  %nop14052 = alloca i1, i1 0
+  %nop14053 = alloca i1, i1 0
+  %nop14054 = alloca i1, i1 0
+  %nop14055 = alloca i1, i1 0
+  %nop14056 = alloca i1, i1 0
+  %nop14057 = alloca i1, i1 0
+  %nop14058 = alloca i1, i1 0
+  %nop14059 = alloca i1, i1 0
+  %nop14060 = alloca i1, i1 0
+  %nop14061 = alloca i1, i1 0
+  %nop14062 = alloca i1, i1 0
+  %nop14063 = alloca i1, i1 0
+  %nop14064 = alloca i1, i1 0
+  %nop14065 = alloca i1, i1 0
+  %nop14066 = alloca i1, i1 0
+  %nop14067 = alloca i1, i1 0
+  %nop14068 = alloca i1, i1 0
+  %nop14069 = alloca i1, i1 0
+  %nop14070 = alloca i1, i1 0
+  %nop14071 = alloca i1, i1 0
+  %nop14072 = alloca i1, i1 0
+  %nop14073 = alloca i1, i1 0
+  %nop14074 = alloca i1, i1 0
+  %nop14075 = alloca i1, i1 0
+  %nop14076 = alloca i1, i1 0
+  %nop14077 = alloca i1, i1 0
+  %nop14078 = alloca i1, i1 0
+  %nop14079 = alloca i1, i1 0
+  %nop14080 = alloca i1, i1 0
+  %nop14081 = alloca i1, i1 0
+  %nop14082 = alloca i1, i1 0
+  %nop14083 = alloca i1, i1 0
+  %nop14084 = alloca i1, i1 0
+  %nop14085 = alloca i1, i1 0
+  %nop14086 = alloca i1, i1 0
+  %nop14087 = alloca i1, i1 0
+  %nop14088 = alloca i1, i1 0
+  %nop14089 = alloca i1, i1 0
+  %nop14090 = alloca i1, i1 0
+  %nop14091 = alloca i1, i1 0
+  %nop14092 = alloca i1, i1 0
+  %nop14093 = alloca i1, i1 0
+  %nop14094 = alloca i1, i1 0
+  %nop14095 = alloca i1, i1 0
+  %nop14096 = alloca i1, i1 0
+  %nop14097 = alloca i1, i1 0
+  %nop14098 = alloca i1, i1 0
+  %nop14099 = alloca i1, i1 0
+  %nop14100 = alloca i1, i1 0
+  %nop14101 = alloca i1, i1 0
+  %nop14102 = alloca i1, i1 0
+  %nop14103 = alloca i1, i1 0
+  %nop14104 = alloca i1, i1 0
+  %nop14105 = alloca i1, i1 0
+  %nop14106 = alloca i1, i1 0
+  %nop14107 = alloca i1, i1 0
+  %nop14108 = alloca i1, i1 0
+  %nop14109 = alloca i1, i1 0
+  %nop14110 = alloca i1, i1 0
+  %nop14111 = alloca i1, i1 0
+  %nop14112 = alloca i1, i1 0
+  %nop14113 = alloca i1, i1 0
+  %nop14114 = alloca i1, i1 0
+  %nop14115 = alloca i1, i1 0
+  %nop14116 = alloca i1, i1 0
+  %nop14117 = alloca i1, i1 0
+  %nop14118 = alloca i1, i1 0
+  %nop14119 = alloca i1, i1 0
+  %nop14120 = alloca i1, i1 0
+  %nop14121 = alloca i1, i1 0
+  %nop14122 = alloca i1, i1 0
+  %nop14123 = alloca i1, i1 0
+  %nop14124 = alloca i1, i1 0
+  %nop14125 = alloca i1, i1 0
+  %nop14126 = alloca i1, i1 0
+  %nop14127 = alloca i1, i1 0
+  %nop14128 = alloca i1, i1 0
+  %nop14129 = alloca i1, i1 0
+  %nop14130 = alloca i1, i1 0
+  %nop14131 = alloca i1, i1 0
+  %nop14132 = alloca i1, i1 0
+  %nop14133 = alloca i1, i1 0
+  %nop14134 = alloca i1, i1 0
+  %nop14135 = alloca i1, i1 0
+  %nop14136 = alloca i1, i1 0
+  %nop14137 = alloca i1, i1 0
+  %nop14138 = alloca i1, i1 0
+  %nop14139 = alloca i1, i1 0
+  %nop14140 = alloca i1, i1 0
+  %nop14141 = alloca i1, i1 0
+  %nop14142 = alloca i1, i1 0
+  %nop14143 = alloca i1, i1 0
+  %nop14144 = alloca i1, i1 0
+  %nop14145 = alloca i1, i1 0
+  %nop14146 = alloca i1, i1 0
+  %nop14147 = alloca i1, i1 0
+  %nop14148 = alloca i1, i1 0
+  %nop14149 = alloca i1, i1 0
+  %nop14150 = alloca i1, i1 0
+  %nop14151 = alloca i1, i1 0
+  %nop14152 = alloca i1, i1 0
+  %nop14153 = alloca i1, i1 0
+  %nop14154 = alloca i1, i1 0
+  %nop14155 = alloca i1, i1 0
+  %nop14156 = alloca i1, i1 0
+  %nop14157 = alloca i1, i1 0
+  %nop14158 = alloca i1, i1 0
+  %nop14159 = alloca i1, i1 0
+  %nop14160 = alloca i1, i1 0
+  %nop14161 = alloca i1, i1 0
+  %nop14162 = alloca i1, i1 0
+  %nop14163 = alloca i1, i1 0
+  %nop14164 = alloca i1, i1 0
+  %nop14165 = alloca i1, i1 0
+  %nop14166 = alloca i1, i1 0
+  %nop14167 = alloca i1, i1 0
+  %nop14168 = alloca i1, i1 0
+  %nop14169 = alloca i1, i1 0
+  %nop14170 = alloca i1, i1 0
+  %nop14171 = alloca i1, i1 0
+  %nop14172 = alloca i1, i1 0
+  %nop14173 = alloca i1, i1 0
+  %nop14174 = alloca i1, i1 0
+  %nop14175 = alloca i1, i1 0
+  %nop14176 = alloca i1, i1 0
+  %nop14177 = alloca i1, i1 0
+  %nop14178 = alloca i1, i1 0
+  %nop14179 = alloca i1, i1 0
+  %nop14180 = alloca i1, i1 0
+  %nop14181 = alloca i1, i1 0
+  %nop14182 = alloca i1, i1 0
+  %nop14183 = alloca i1, i1 0
+  %nop14184 = alloca i1, i1 0
+  %nop14185 = alloca i1, i1 0
+  %nop14186 = alloca i1, i1 0
+  %nop14187 = alloca i1, i1 0
+  %nop14188 = alloca i1, i1 0
+  %nop14189 = alloca i1, i1 0
+  %nop14190 = alloca i1, i1 0
+  %nop14191 = alloca i1, i1 0
+  %nop14192 = alloca i1, i1 0
+  %nop14193 = alloca i1, i1 0
+  %nop14194 = alloca i1, i1 0
+  %nop14195 = alloca i1, i1 0
+  %nop14196 = alloca i1, i1 0
+  %nop14197 = alloca i1, i1 0
+  %nop14198 = alloca i1, i1 0
+  %nop14199 = alloca i1, i1 0
+  %nop14200 = alloca i1, i1 0
+  %nop14201 = alloca i1, i1 0
+  %nop14202 = alloca i1, i1 0
+  %nop14203 = alloca i1, i1 0
+  %nop14204 = alloca i1, i1 0
+  %nop14205 = alloca i1, i1 0
+  %nop14206 = alloca i1, i1 0
+  %nop14207 = alloca i1, i1 0
+  %nop14208 = alloca i1, i1 0
+  %nop14209 = alloca i1, i1 0
+  %nop14210 = alloca i1, i1 0
+  %nop14211 = alloca i1, i1 0
+  %nop14212 = alloca i1, i1 0
+  %nop14213 = alloca i1, i1 0
+  %nop14214 = alloca i1, i1 0
+  %nop14215 = alloca i1, i1 0
+  %nop14216 = alloca i1, i1 0
+  %nop14217 = alloca i1, i1 0
+  %nop14218 = alloca i1, i1 0
+  %nop14219 = alloca i1, i1 0
+  %nop14220 = alloca i1, i1 0
+  %nop14221 = alloca i1, i1 0
+  %nop14222 = alloca i1, i1 0
+  %nop14223 = alloca i1, i1 0
+  %nop14224 = alloca i1, i1 0
+  %nop14225 = alloca i1, i1 0
+  %nop14226 = alloca i1, i1 0
+  %nop14227 = alloca i1, i1 0
+  %nop14228 = alloca i1, i1 0
+  %nop14229 = alloca i1, i1 0
+  %nop14230 = alloca i1, i1 0
+  %nop14231 = alloca i1, i1 0
+  %nop14232 = alloca i1, i1 0
+  %nop14233 = alloca i1, i1 0
+  %nop14234 = alloca i1, i1 0
+  %nop14235 = alloca i1, i1 0
+  %nop14236 = alloca i1, i1 0
+  %nop14237 = alloca i1, i1 0
+  %nop14238 = alloca i1, i1 0
+  %nop14239 = alloca i1, i1 0
+  %nop14240 = alloca i1, i1 0
+  %nop14241 = alloca i1, i1 0
+  %nop14242 = alloca i1, i1 0
+  %nop14243 = alloca i1, i1 0
+  %nop14244 = alloca i1, i1 0
+  %nop14245 = alloca i1, i1 0
+  %nop14246 = alloca i1, i1 0
+  %nop14247 = alloca i1, i1 0
+  %nop14248 = alloca i1, i1 0
+  %nop14249 = alloca i1, i1 0
+  %nop14250 = alloca i1, i1 0
+  %nop14251 = alloca i1, i1 0
+  %nop14252 = alloca i1, i1 0
+  %nop14253 = alloca i1, i1 0
+  %nop14254 = alloca i1, i1 0
+  %nop14255 = alloca i1, i1 0
+  %nop14256 = alloca i1, i1 0
+  %nop14257 = alloca i1, i1 0
+  %nop14258 = alloca i1, i1 0
+  %nop14259 = alloca i1, i1 0
+  %nop14260 = alloca i1, i1 0
+  %nop14261 = alloca i1, i1 0
+  %nop14262 = alloca i1, i1 0
+  %nop14263 = alloca i1, i1 0
+  %nop14264 = alloca i1, i1 0
+  %nop14265 = alloca i1, i1 0
+  %nop14266 = alloca i1, i1 0
+  %nop14267 = alloca i1, i1 0
+  %nop14268 = alloca i1, i1 0
+  %nop14269 = alloca i1, i1 0
+  %nop14270 = alloca i1, i1 0
+  %nop14271 = alloca i1, i1 0
+  %nop14272 = alloca i1, i1 0
+  %nop14273 = alloca i1, i1 0
+  %nop14274 = alloca i1, i1 0
+  %nop14275 = alloca i1, i1 0
+  %nop14276 = alloca i1, i1 0
+  %nop14277 = alloca i1, i1 0
+  %nop14278 = alloca i1, i1 0
+  %nop14279 = alloca i1, i1 0
+  %nop14280 = alloca i1, i1 0
+  %nop14281 = alloca i1, i1 0
+  %nop14282 = alloca i1, i1 0
+  %nop14283 = alloca i1, i1 0
+  %nop14284 = alloca i1, i1 0
+  %nop14285 = alloca i1, i1 0
+  %nop14286 = alloca i1, i1 0
+  %nop14287 = alloca i1, i1 0
+  %nop14288 = alloca i1, i1 0
+  %nop14289 = alloca i1, i1 0
+  %nop14290 = alloca i1, i1 0
+  %nop14291 = alloca i1, i1 0
+  %nop14292 = alloca i1, i1 0
+  %nop14293 = alloca i1, i1 0
+  %nop14294 = alloca i1, i1 0
+  %nop14295 = alloca i1, i1 0
+  %nop14296 = alloca i1, i1 0
+  %nop14297 = alloca i1, i1 0
+  %nop14298 = alloca i1, i1 0
+  %nop14299 = alloca i1, i1 0
+  %nop14300 = alloca i1, i1 0
+  %nop14301 = alloca i1, i1 0
+  %nop14302 = alloca i1, i1 0
+  %nop14303 = alloca i1, i1 0
+  %nop14304 = alloca i1, i1 0
+  %nop14305 = alloca i1, i1 0
+  %nop14306 = alloca i1, i1 0
+  %nop14307 = alloca i1, i1 0
+  %nop14308 = alloca i1, i1 0
+  %nop14309 = alloca i1, i1 0
+  %nop14310 = alloca i1, i1 0
+  %nop14311 = alloca i1, i1 0
+  %nop14312 = alloca i1, i1 0
+  %nop14313 = alloca i1, i1 0
+  %nop14314 = alloca i1, i1 0
+  %nop14315 = alloca i1, i1 0
+  %nop14316 = alloca i1, i1 0
+  %nop14317 = alloca i1, i1 0
+  %nop14318 = alloca i1, i1 0
+  %nop14319 = alloca i1, i1 0
+  %nop14320 = alloca i1, i1 0
+  %nop14321 = alloca i1, i1 0
+  %nop14322 = alloca i1, i1 0
+  %nop14323 = alloca i1, i1 0
+  %nop14324 = alloca i1, i1 0
+  %nop14325 = alloca i1, i1 0
+  %nop14326 = alloca i1, i1 0
+  %nop14327 = alloca i1, i1 0
+  %nop14328 = alloca i1, i1 0
+  %nop14329 = alloca i1, i1 0
+  %nop14330 = alloca i1, i1 0
+  %nop14331 = alloca i1, i1 0
+  %nop14332 = alloca i1, i1 0
+  %nop14333 = alloca i1, i1 0
+  %nop14334 = alloca i1, i1 0
+  %nop14335 = alloca i1, i1 0
+  %nop14336 = alloca i1, i1 0
+  %nop14337 = alloca i1, i1 0
+  %nop14338 = alloca i1, i1 0
+  %nop14339 = alloca i1, i1 0
+  %nop14340 = alloca i1, i1 0
+  %nop14341 = alloca i1, i1 0
+  %nop14342 = alloca i1, i1 0
+  %nop14343 = alloca i1, i1 0
+  %nop14344 = alloca i1, i1 0
+  %nop14345 = alloca i1, i1 0
+  %nop14346 = alloca i1, i1 0
+  %nop14347 = alloca i1, i1 0
+  %nop14348 = alloca i1, i1 0
+  %nop14349 = alloca i1, i1 0
+  %nop14350 = alloca i1, i1 0
+  %nop14351 = alloca i1, i1 0
+  %nop14352 = alloca i1, i1 0
+  %nop14353 = alloca i1, i1 0
+  %nop14354 = alloca i1, i1 0
+  %nop14355 = alloca i1, i1 0
+  %nop14356 = alloca i1, i1 0
+  %nop14357 = alloca i1, i1 0
+  %nop14358 = alloca i1, i1 0
+  %nop14359 = alloca i1, i1 0
+  %nop14360 = alloca i1, i1 0
+  %nop14361 = alloca i1, i1 0
+  %nop14362 = alloca i1, i1 0
+  %nop14363 = alloca i1, i1 0
+  %nop14364 = alloca i1, i1 0
+  %nop14365 = alloca i1, i1 0
+  %nop14366 = alloca i1, i1 0
+  %nop14367 = alloca i1, i1 0
+  %nop14368 = alloca i1, i1 0
+  %nop14369 = alloca i1, i1 0
+  %nop14370 = alloca i1, i1 0
+  %nop14371 = alloca i1, i1 0
+  %nop14372 = alloca i1, i1 0
+  %nop14373 = alloca i1, i1 0
+  %nop14374 = alloca i1, i1 0
+  %nop14375 = alloca i1, i1 0
+  %nop14376 = alloca i1, i1 0
+  %nop14377 = alloca i1, i1 0
+  %nop14378 = alloca i1, i1 0
+  %nop14379 = alloca i1, i1 0
+  %nop14380 = alloca i1, i1 0
+  %nop14381 = alloca i1, i1 0
+  %nop14382 = alloca i1, i1 0
+  %nop14383 = alloca i1, i1 0
+  %nop14384 = alloca i1, i1 0
+  %nop14385 = alloca i1, i1 0
+  %nop14386 = alloca i1, i1 0
+  %nop14387 = alloca i1, i1 0
+  %nop14388 = alloca i1, i1 0
+  %nop14389 = alloca i1, i1 0
+  %nop14390 = alloca i1, i1 0
+  %nop14391 = alloca i1, i1 0
+  %nop14392 = alloca i1, i1 0
+  %nop14393 = alloca i1, i1 0
+  %nop14394 = alloca i1, i1 0
+  %nop14395 = alloca i1, i1 0
+  %nop14396 = alloca i1, i1 0
+  %nop14397 = alloca i1, i1 0
+  %nop14398 = alloca i1, i1 0
+  %nop14399 = alloca i1, i1 0
+  %nop14400 = alloca i1, i1 0
+  %nop14401 = alloca i1, i1 0
+  %nop14402 = alloca i1, i1 0
+  %nop14403 = alloca i1, i1 0
+  %nop14404 = alloca i1, i1 0
+  %nop14405 = alloca i1, i1 0
+  %nop14406 = alloca i1, i1 0
+  %nop14407 = alloca i1, i1 0
+  %nop14408 = alloca i1, i1 0
+  %nop14409 = alloca i1, i1 0
+  %nop14410 = alloca i1, i1 0
+  %nop14411 = alloca i1, i1 0
+  %nop14412 = alloca i1, i1 0
+  %nop14413 = alloca i1, i1 0
+  %nop14414 = alloca i1, i1 0
+  %nop14415 = alloca i1, i1 0
+  %nop14416 = alloca i1, i1 0
+  %nop14417 = alloca i1, i1 0
+  %nop14418 = alloca i1, i1 0
+  %nop14419 = alloca i1, i1 0
+  %nop14420 = alloca i1, i1 0
+  %nop14421 = alloca i1, i1 0
+  %nop14422 = alloca i1, i1 0
+  %nop14423 = alloca i1, i1 0
+  %nop14424 = alloca i1, i1 0
+  %nop14425 = alloca i1, i1 0
+  %nop14426 = alloca i1, i1 0
+  %nop14427 = alloca i1, i1 0
+  %nop14428 = alloca i1, i1 0
+  %nop14429 = alloca i1, i1 0
+  %nop14430 = alloca i1, i1 0
+  %nop14431 = alloca i1, i1 0
+  %nop14432 = alloca i1, i1 0
+  %nop14433 = alloca i1, i1 0
+  %nop14434 = alloca i1, i1 0
+  %nop14435 = alloca i1, i1 0
+  %nop14436 = alloca i1, i1 0
+  %nop14437 = alloca i1, i1 0
+  %nop14438 = alloca i1, i1 0
+  %nop14439 = alloca i1, i1 0
+  %nop14440 = alloca i1, i1 0
+  %nop14441 = alloca i1, i1 0
+  %nop14442 = alloca i1, i1 0
+  %nop14443 = alloca i1, i1 0
+  %nop14444 = alloca i1, i1 0
+  %nop14445 = alloca i1, i1 0
+  %nop14446 = alloca i1, i1 0
+  %nop14447 = alloca i1, i1 0
+  %nop14448 = alloca i1, i1 0
+  %nop14449 = alloca i1, i1 0
+  %nop14450 = alloca i1, i1 0
+  %nop14451 = alloca i1, i1 0
+  %nop14452 = alloca i1, i1 0
+  %nop14453 = alloca i1, i1 0
+  %nop14454 = alloca i1, i1 0
+  %nop14455 = alloca i1, i1 0
+  %nop14456 = alloca i1, i1 0
+  %nop14457 = alloca i1, i1 0
+  %nop14458 = alloca i1, i1 0
+  %nop14459 = alloca i1, i1 0
+  %nop14460 = alloca i1, i1 0
+  %nop14461 = alloca i1, i1 0
+  %nop14462 = alloca i1, i1 0
+  %nop14463 = alloca i1, i1 0
+  %nop14464 = alloca i1, i1 0
+  %nop14465 = alloca i1, i1 0
+  %nop14466 = alloca i1, i1 0
+  %nop14467 = alloca i1, i1 0
+  %nop14468 = alloca i1, i1 0
+  %nop14469 = alloca i1, i1 0
+  %nop14470 = alloca i1, i1 0
+  %nop14471 = alloca i1, i1 0
+  %nop14472 = alloca i1, i1 0
+  %nop14473 = alloca i1, i1 0
+  %nop14474 = alloca i1, i1 0
+  %nop14475 = alloca i1, i1 0
+  %nop14476 = alloca i1, i1 0
+  %nop14477 = alloca i1, i1 0
+  %nop14478 = alloca i1, i1 0
+  %nop14479 = alloca i1, i1 0
+  %nop14480 = alloca i1, i1 0
+  %nop14481 = alloca i1, i1 0
+  %nop14482 = alloca i1, i1 0
+  %nop14483 = alloca i1, i1 0
+  %nop14484 = alloca i1, i1 0
+  %nop14485 = alloca i1, i1 0
+  %nop14486 = alloca i1, i1 0
+  %nop14487 = alloca i1, i1 0
+  %nop14488 = alloca i1, i1 0
+  %nop14489 = alloca i1, i1 0
+  %nop14490 = alloca i1, i1 0
+  %nop14491 = alloca i1, i1 0
+  %nop14492 = alloca i1, i1 0
+  %nop14493 = alloca i1, i1 0
+  %nop14494 = alloca i1, i1 0
+  %nop14495 = alloca i1, i1 0
+  %nop14496 = alloca i1, i1 0
+  %nop14497 = alloca i1, i1 0
+  %nop14498 = alloca i1, i1 0
+  %nop14499 = alloca i1, i1 0
+  %nop14500 = alloca i1, i1 0
+  %nop14501 = alloca i1, i1 0
+  %nop14502 = alloca i1, i1 0
+  %nop14503 = alloca i1, i1 0
+  %nop14504 = alloca i1, i1 0
+  %nop14505 = alloca i1, i1 0
+  %nop14506 = alloca i1, i1 0
+  %nop14507 = alloca i1, i1 0
+  %nop14508 = alloca i1, i1 0
+  %nop14509 = alloca i1, i1 0
+  %nop14510 = alloca i1, i1 0
+  %nop14511 = alloca i1, i1 0
+  %nop14512 = alloca i1, i1 0
+  %nop14513 = alloca i1, i1 0
+  %nop14514 = alloca i1, i1 0
+  %nop14515 = alloca i1, i1 0
+  %nop14516 = alloca i1, i1 0
+  %nop14517 = alloca i1, i1 0
+  %nop14518 = alloca i1, i1 0
+  %nop14519 = alloca i1, i1 0
+  %nop14520 = alloca i1, i1 0
+  %nop14521 = alloca i1, i1 0
+  %nop14522 = alloca i1, i1 0
+  %nop14523 = alloca i1, i1 0
+  %nop14524 = alloca i1, i1 0
+  %nop14525 = alloca i1, i1 0
+  %nop14526 = alloca i1, i1 0
+  %nop14527 = alloca i1, i1 0
+  %nop14528 = alloca i1, i1 0
+  %nop14529 = alloca i1, i1 0
+  %nop14530 = alloca i1, i1 0
+  %nop14531 = alloca i1, i1 0
+  %nop14532 = alloca i1, i1 0
+  %nop14533 = alloca i1, i1 0
+  %nop14534 = alloca i1, i1 0
+  %nop14535 = alloca i1, i1 0
+  %nop14536 = alloca i1, i1 0
+  %nop14537 = alloca i1, i1 0
+  %nop14538 = alloca i1, i1 0
+  %nop14539 = alloca i1, i1 0
+  %nop14540 = alloca i1, i1 0
+  %nop14541 = alloca i1, i1 0
+  %nop14542 = alloca i1, i1 0
+  %nop14543 = alloca i1, i1 0
+  %nop14544 = alloca i1, i1 0
+  %nop14545 = alloca i1, i1 0
+  %nop14546 = alloca i1, i1 0
+  %nop14547 = alloca i1, i1 0
+  %nop14548 = alloca i1, i1 0
+  %nop14549 = alloca i1, i1 0
+  %nop14550 = alloca i1, i1 0
+  %nop14551 = alloca i1, i1 0
+  %nop14552 = alloca i1, i1 0
+  %nop14553 = alloca i1, i1 0
+  %nop14554 = alloca i1, i1 0
+  %nop14555 = alloca i1, i1 0
+  %nop14556 = alloca i1, i1 0
+  %nop14557 = alloca i1, i1 0
+  %nop14558 = alloca i1, i1 0
+  %nop14559 = alloca i1, i1 0
+  %nop14560 = alloca i1, i1 0
+  %nop14561 = alloca i1, i1 0
+  %nop14562 = alloca i1, i1 0
+  %nop14563 = alloca i1, i1 0
+  %nop14564 = alloca i1, i1 0
+  %nop14565 = alloca i1, i1 0
+  %nop14566 = alloca i1, i1 0
+  %nop14567 = alloca i1, i1 0
+  %nop14568 = alloca i1, i1 0
+  %nop14569 = alloca i1, i1 0
+  %nop14570 = alloca i1, i1 0
+  %nop14571 = alloca i1, i1 0
+  %nop14572 = alloca i1, i1 0
+  %nop14573 = alloca i1, i1 0
+  %nop14574 = alloca i1, i1 0
+  %nop14575 = alloca i1, i1 0
+  %nop14576 = alloca i1, i1 0
+  %nop14577 = alloca i1, i1 0
+  %nop14578 = alloca i1, i1 0
+  %nop14579 = alloca i1, i1 0
+  %nop14580 = alloca i1, i1 0
+  %nop14581 = alloca i1, i1 0
+  %nop14582 = alloca i1, i1 0
+  %nop14583 = alloca i1, i1 0
+  %nop14584 = alloca i1, i1 0
+  %nop14585 = alloca i1, i1 0
+  %nop14586 = alloca i1, i1 0
+  %nop14587 = alloca i1, i1 0
+  %nop14588 = alloca i1, i1 0
+  %nop14589 = alloca i1, i1 0
+  %nop14590 = alloca i1, i1 0
+  %nop14591 = alloca i1, i1 0
+  %nop14592 = alloca i1, i1 0
+  %nop14593 = alloca i1, i1 0
+  %nop14594 = alloca i1, i1 0
+  %nop14595 = alloca i1, i1 0
+  %nop14596 = alloca i1, i1 0
+  %nop14597 = alloca i1, i1 0
+  %nop14598 = alloca i1, i1 0
+  %nop14599 = alloca i1, i1 0
+  %nop14600 = alloca i1, i1 0
+  %nop14601 = alloca i1, i1 0
+  %nop14602 = alloca i1, i1 0
+  %nop14603 = alloca i1, i1 0
+  %nop14604 = alloca i1, i1 0
+  %nop14605 = alloca i1, i1 0
+  %nop14606 = alloca i1, i1 0
+  %nop14607 = alloca i1, i1 0
+  %nop14608 = alloca i1, i1 0
+  %nop14609 = alloca i1, i1 0
+  %nop14610 = alloca i1, i1 0
+  %nop14611 = alloca i1, i1 0
+  %nop14612 = alloca i1, i1 0
+  %nop14613 = alloca i1, i1 0
+  %nop14614 = alloca i1, i1 0
+  %nop14615 = alloca i1, i1 0
+  %nop14616 = alloca i1, i1 0
+  %nop14617 = alloca i1, i1 0
+  %nop14618 = alloca i1, i1 0
+  %nop14619 = alloca i1, i1 0
+  %nop14620 = alloca i1, i1 0
+  %nop14621 = alloca i1, i1 0
+  %nop14622 = alloca i1, i1 0
+  %nop14623 = alloca i1, i1 0
+  %nop14624 = alloca i1, i1 0
+  %nop14625 = alloca i1, i1 0
+  %nop14626 = alloca i1, i1 0
+  %nop14627 = alloca i1, i1 0
+  %nop14628 = alloca i1, i1 0
+  %nop14629 = alloca i1, i1 0
+  %nop14630 = alloca i1, i1 0
+  %nop14631 = alloca i1, i1 0
+  %nop14632 = alloca i1, i1 0
+  %nop14633 = alloca i1, i1 0
+  %nop14634 = alloca i1, i1 0
+  %nop14635 = alloca i1, i1 0
+  %nop14636 = alloca i1, i1 0
+  %nop14637 = alloca i1, i1 0
+  %nop14638 = alloca i1, i1 0
+  %nop14639 = alloca i1, i1 0
+  %nop14640 = alloca i1, i1 0
+  %nop14641 = alloca i1, i1 0
+  %nop14642 = alloca i1, i1 0
+  %nop14643 = alloca i1, i1 0
+  %nop14644 = alloca i1, i1 0
+  %nop14645 = alloca i1, i1 0
+  %nop14646 = alloca i1, i1 0
+  %nop14647 = alloca i1, i1 0
+  %nop14648 = alloca i1, i1 0
+  %nop14649 = alloca i1, i1 0
+  %nop14650 = alloca i1, i1 0
+  %nop14651 = alloca i1, i1 0
+  %nop14652 = alloca i1, i1 0
+  %nop14653 = alloca i1, i1 0
+  %nop14654 = alloca i1, i1 0
+  %nop14655 = alloca i1, i1 0
+  %nop14656 = alloca i1, i1 0
+  %nop14657 = alloca i1, i1 0
+  %nop14658 = alloca i1, i1 0
+  %nop14659 = alloca i1, i1 0
+  %nop14660 = alloca i1, i1 0
+  %nop14661 = alloca i1, i1 0
+  %nop14662 = alloca i1, i1 0
+  %nop14663 = alloca i1, i1 0
+  %nop14664 = alloca i1, i1 0
+  %nop14665 = alloca i1, i1 0
+  %nop14666 = alloca i1, i1 0
+  %nop14667 = alloca i1, i1 0
+  %nop14668 = alloca i1, i1 0
+  %nop14669 = alloca i1, i1 0
+  %nop14670 = alloca i1, i1 0
+  %nop14671 = alloca i1, i1 0
+  %nop14672 = alloca i1, i1 0
+  %nop14673 = alloca i1, i1 0
+  %nop14674 = alloca i1, i1 0
+  %nop14675 = alloca i1, i1 0
+  %nop14676 = alloca i1, i1 0
+  %nop14677 = alloca i1, i1 0
+  %nop14678 = alloca i1, i1 0
+  %nop14679 = alloca i1, i1 0
+  %nop14680 = alloca i1, i1 0
+  %nop14681 = alloca i1, i1 0
+  %nop14682 = alloca i1, i1 0
+  %nop14683 = alloca i1, i1 0
+  %nop14684 = alloca i1, i1 0
+  %nop14685 = alloca i1, i1 0
+  %nop14686 = alloca i1, i1 0
+  %nop14687 = alloca i1, i1 0
+  %nop14688 = alloca i1, i1 0
+  %nop14689 = alloca i1, i1 0
+  %nop14690 = alloca i1, i1 0
+  %nop14691 = alloca i1, i1 0
+  %nop14692 = alloca i1, i1 0
+  %nop14693 = alloca i1, i1 0
+  %nop14694 = alloca i1, i1 0
+  %nop14695 = alloca i1, i1 0
+  %nop14696 = alloca i1, i1 0
+  %nop14697 = alloca i1, i1 0
+  %nop14698 = alloca i1, i1 0
+  %nop14699 = alloca i1, i1 0
+  %nop14700 = alloca i1, i1 0
+  %nop14701 = alloca i1, i1 0
+  %nop14702 = alloca i1, i1 0
+  %nop14703 = alloca i1, i1 0
+  %nop14704 = alloca i1, i1 0
+  %nop14705 = alloca i1, i1 0
+  %nop14706 = alloca i1, i1 0
+  %nop14707 = alloca i1, i1 0
+  %nop14708 = alloca i1, i1 0
+  %nop14709 = alloca i1, i1 0
+  %nop14710 = alloca i1, i1 0
+  %nop14711 = alloca i1, i1 0
+  %nop14712 = alloca i1, i1 0
+  %nop14713 = alloca i1, i1 0
+  %nop14714 = alloca i1, i1 0
+  %nop14715 = alloca i1, i1 0
+  %nop14716 = alloca i1, i1 0
+  %nop14717 = alloca i1, i1 0
+  %nop14718 = alloca i1, i1 0
+  %nop14719 = alloca i1, i1 0
+  %nop14720 = alloca i1, i1 0
+  %nop14721 = alloca i1, i1 0
+  %nop14722 = alloca i1, i1 0
+  %nop14723 = alloca i1, i1 0
+  %nop14724 = alloca i1, i1 0
+  %nop14725 = alloca i1, i1 0
+  %nop14726 = alloca i1, i1 0
+  %nop14727 = alloca i1, i1 0
+  %nop14728 = alloca i1, i1 0
+  %nop14729 = alloca i1, i1 0
+  %nop14730 = alloca i1, i1 0
+  %nop14731 = alloca i1, i1 0
+  %nop14732 = alloca i1, i1 0
+  %nop14733 = alloca i1, i1 0
+  %nop14734 = alloca i1, i1 0
+  %nop14735 = alloca i1, i1 0
+  %nop14736 = alloca i1, i1 0
+  %nop14737 = alloca i1, i1 0
+  %nop14738 = alloca i1, i1 0
+  %nop14739 = alloca i1, i1 0
+  %nop14740 = alloca i1, i1 0
+  %nop14741 = alloca i1, i1 0
+  %nop14742 = alloca i1, i1 0
+  %nop14743 = alloca i1, i1 0
+  %nop14744 = alloca i1, i1 0
+  %nop14745 = alloca i1, i1 0
+  %nop14746 = alloca i1, i1 0
+  %nop14747 = alloca i1, i1 0
+  %nop14748 = alloca i1, i1 0
+  %nop14749 = alloca i1, i1 0
+  %nop14750 = alloca i1, i1 0
+  %nop14751 = alloca i1, i1 0
+  %nop14752 = alloca i1, i1 0
+  %nop14753 = alloca i1, i1 0
+  %nop14754 = alloca i1, i1 0
+  %nop14755 = alloca i1, i1 0
+  %nop14756 = alloca i1, i1 0
+  %nop14757 = alloca i1, i1 0
+  %nop14758 = alloca i1, i1 0
+  %nop14759 = alloca i1, i1 0
+  %nop14760 = alloca i1, i1 0
+  %nop14761 = alloca i1, i1 0
+  %nop14762 = alloca i1, i1 0
+  %nop14763 = alloca i1, i1 0
+  %nop14764 = alloca i1, i1 0
+  %nop14765 = alloca i1, i1 0
+  %nop14766 = alloca i1, i1 0
+  %nop14767 = alloca i1, i1 0
+  %nop14768 = alloca i1, i1 0
+  %nop14769 = alloca i1, i1 0
+  %nop14770 = alloca i1, i1 0
+  %nop14771 = alloca i1, i1 0
+  %nop14772 = alloca i1, i1 0
+  %nop14773 = alloca i1, i1 0
+  %nop14774 = alloca i1, i1 0
+  %nop14775 = alloca i1, i1 0
+  %nop14776 = alloca i1, i1 0
+  %nop14777 = alloca i1, i1 0
+  %nop14778 = alloca i1, i1 0
+  %nop14779 = alloca i1, i1 0
+  %nop14780 = alloca i1, i1 0
+  %nop14781 = alloca i1, i1 0
+  %nop14782 = alloca i1, i1 0
+  %nop14783 = alloca i1, i1 0
+  %nop14784 = alloca i1, i1 0
+  %nop14785 = alloca i1, i1 0
+  %nop14786 = alloca i1, i1 0
+  %nop14787 = alloca i1, i1 0
+  %nop14788 = alloca i1, i1 0
+  %nop14789 = alloca i1, i1 0
+  %nop14790 = alloca i1, i1 0
+  %nop14791 = alloca i1, i1 0
+  %nop14792 = alloca i1, i1 0
+  %nop14793 = alloca i1, i1 0
+  %nop14794 = alloca i1, i1 0
+  %nop14795 = alloca i1, i1 0
+  %nop14796 = alloca i1, i1 0
+  %nop14797 = alloca i1, i1 0
+  %nop14798 = alloca i1, i1 0
+  %nop14799 = alloca i1, i1 0
+  %nop14800 = alloca i1, i1 0
+  %nop14801 = alloca i1, i1 0
+  %nop14802 = alloca i1, i1 0
+  %nop14803 = alloca i1, i1 0
+  %nop14804 = alloca i1, i1 0
+  %nop14805 = alloca i1, i1 0
+  %nop14806 = alloca i1, i1 0
+  %nop14807 = alloca i1, i1 0
+  %nop14808 = alloca i1, i1 0
+  %nop14809 = alloca i1, i1 0
+  %nop14810 = alloca i1, i1 0
+  %nop14811 = alloca i1, i1 0
+  %nop14812 = alloca i1, i1 0
+  %nop14813 = alloca i1, i1 0
+  %nop14814 = alloca i1, i1 0
+  %nop14815 = alloca i1, i1 0
+  %nop14816 = alloca i1, i1 0
+  %nop14817 = alloca i1, i1 0
+  %nop14818 = alloca i1, i1 0
+  %nop14819 = alloca i1, i1 0
+  %nop14820 = alloca i1, i1 0
+  %nop14821 = alloca i1, i1 0
+  %nop14822 = alloca i1, i1 0
+  %nop14823 = alloca i1, i1 0
+  %nop14824 = alloca i1, i1 0
+  %nop14825 = alloca i1, i1 0
+  %nop14826 = alloca i1, i1 0
+  %nop14827 = alloca i1, i1 0
+  %nop14828 = alloca i1, i1 0
+  %nop14829 = alloca i1, i1 0
+  %nop14830 = alloca i1, i1 0
+  %nop14831 = alloca i1, i1 0
+  %nop14832 = alloca i1, i1 0
+  %nop14833 = alloca i1, i1 0
+  %nop14834 = alloca i1, i1 0
+  %nop14835 = alloca i1, i1 0
+  %nop14836 = alloca i1, i1 0
+  %nop14837 = alloca i1, i1 0
+  %nop14838 = alloca i1, i1 0
+  %nop14839 = alloca i1, i1 0
+  %nop14840 = alloca i1, i1 0
+  %nop14841 = alloca i1, i1 0
+  %nop14842 = alloca i1, i1 0
+  %nop14843 = alloca i1, i1 0
+  %nop14844 = alloca i1, i1 0
+  %nop14845 = alloca i1, i1 0
+  %nop14846 = alloca i1, i1 0
+  %nop14847 = alloca i1, i1 0
+  %nop14848 = alloca i1, i1 0
+  %nop14849 = alloca i1, i1 0
+  %nop14850 = alloca i1, i1 0
+  %nop14851 = alloca i1, i1 0
+  %nop14852 = alloca i1, i1 0
+  %nop14853 = alloca i1, i1 0
+  %nop14854 = alloca i1, i1 0
+  %nop14855 = alloca i1, i1 0
+  %nop14856 = alloca i1, i1 0
+  %nop14857 = alloca i1, i1 0
+  %nop14858 = alloca i1, i1 0
+  %nop14859 = alloca i1, i1 0
+  %nop14860 = alloca i1, i1 0
+  %nop14861 = alloca i1, i1 0
+  %nop14862 = alloca i1, i1 0
+  %nop14863 = alloca i1, i1 0
+  %nop14864 = alloca i1, i1 0
+  %nop14865 = alloca i1, i1 0
+  %nop14866 = alloca i1, i1 0
+  %nop14867 = alloca i1, i1 0
+  %nop14868 = alloca i1, i1 0
+  %nop14869 = alloca i1, i1 0
+  %nop14870 = alloca i1, i1 0
+  %nop14871 = alloca i1, i1 0
+  %nop14872 = alloca i1, i1 0
+  %nop14873 = alloca i1, i1 0
+  %nop14874 = alloca i1, i1 0
+  %nop14875 = alloca i1, i1 0
+  %nop14876 = alloca i1, i1 0
+  %nop14877 = alloca i1, i1 0
+  %nop14878 = alloca i1, i1 0
+  %nop14879 = alloca i1, i1 0
+  %nop14880 = alloca i1, i1 0
+  %nop14881 = alloca i1, i1 0
+  %nop14882 = alloca i1, i1 0
+  %nop14883 = alloca i1, i1 0
+  %nop14884 = alloca i1, i1 0
+  %nop14885 = alloca i1, i1 0
+  %nop14886 = alloca i1, i1 0
+  %nop14887 = alloca i1, i1 0
+  %nop14888 = alloca i1, i1 0
+  %nop14889 = alloca i1, i1 0
+  %nop14890 = alloca i1, i1 0
+  %nop14891 = alloca i1, i1 0
+  %nop14892 = alloca i1, i1 0
+  %nop14893 = alloca i1, i1 0
+  %nop14894 = alloca i1, i1 0
+  %nop14895 = alloca i1, i1 0
+  %nop14896 = alloca i1, i1 0
+  %nop14897 = alloca i1, i1 0
+  %nop14898 = alloca i1, i1 0
+  %nop14899 = alloca i1, i1 0
+  %nop14900 = alloca i1, i1 0
+  %nop14901 = alloca i1, i1 0
+  %nop14902 = alloca i1, i1 0
+  %nop14903 = alloca i1, i1 0
+  %nop14904 = alloca i1, i1 0
+  %nop14905 = alloca i1, i1 0
+  %nop14906 = alloca i1, i1 0
+  %nop14907 = alloca i1, i1 0
+  %nop14908 = alloca i1, i1 0
+  %nop14909 = alloca i1, i1 0
+  %nop14910 = alloca i1, i1 0
+  %nop14911 = alloca i1, i1 0
+  %nop14912 = alloca i1, i1 0
+  %nop14913 = alloca i1, i1 0
+  %nop14914 = alloca i1, i1 0
+  %nop14915 = alloca i1, i1 0
+  %nop14916 = alloca i1, i1 0
+  %nop14917 = alloca i1, i1 0
+  %nop14918 = alloca i1, i1 0
+  %nop14919 = alloca i1, i1 0
+  %nop14920 = alloca i1, i1 0
+  %nop14921 = alloca i1, i1 0
+  %nop14922 = alloca i1, i1 0
+  %nop14923 = alloca i1, i1 0
+  %nop14924 = alloca i1, i1 0
+  %nop14925 = alloca i1, i1 0
+  %nop14926 = alloca i1, i1 0
+  %nop14927 = alloca i1, i1 0
+  %nop14928 = alloca i1, i1 0
+  %nop14929 = alloca i1, i1 0
+  %nop14930 = alloca i1, i1 0
+  %nop14931 = alloca i1, i1 0
+  %nop14932 = alloca i1, i1 0
+  %nop14933 = alloca i1, i1 0
+  %nop14934 = alloca i1, i1 0
+  %nop14935 = alloca i1, i1 0
+  %nop14936 = alloca i1, i1 0
+  %nop14937 = alloca i1, i1 0
+  %nop14938 = alloca i1, i1 0
+  %nop14939 = alloca i1, i1 0
+  %nop14940 = alloca i1, i1 0
+  %nop14941 = alloca i1, i1 0
+  %nop14942 = alloca i1, i1 0
+  %nop14943 = alloca i1, i1 0
+  %nop14944 = alloca i1, i1 0
+  %nop14945 = alloca i1, i1 0
+  %nop14946 = alloca i1, i1 0
+  %nop14947 = alloca i1, i1 0
+  %nop14948 = alloca i1, i1 0
+  %nop14949 = alloca i1, i1 0
+  %nop14950 = alloca i1, i1 0
+  %nop14951 = alloca i1, i1 0
+  %nop14952 = alloca i1, i1 0
+  %nop14953 = alloca i1, i1 0
+  %nop14954 = alloca i1, i1 0
+  %nop14955 = alloca i1, i1 0
+  %nop14956 = alloca i1, i1 0
+  %nop14957 = alloca i1, i1 0
+  %nop14958 = alloca i1, i1 0
+  %nop14959 = alloca i1, i1 0
+  %nop14960 = alloca i1, i1 0
+  %nop14961 = alloca i1, i1 0
+  %nop14962 = alloca i1, i1 0
+  %nop14963 = alloca i1, i1 0
+  %nop14964 = alloca i1, i1 0
+  %nop14965 = alloca i1, i1 0
+  %nop14966 = alloca i1, i1 0
+  %nop14967 = alloca i1, i1 0
+  %nop14968 = alloca i1, i1 0
+  %nop14969 = alloca i1, i1 0
+  %nop14970 = alloca i1, i1 0
+  %nop14971 = alloca i1, i1 0
+  %nop14972 = alloca i1, i1 0
+  %nop14973 = alloca i1, i1 0
+  %nop14974 = alloca i1, i1 0
+  %nop14975 = alloca i1, i1 0
+  %nop14976 = alloca i1, i1 0
+  %nop14977 = alloca i1, i1 0
+  %nop14978 = alloca i1, i1 0
+  %nop14979 = alloca i1, i1 0
+  %nop14980 = alloca i1, i1 0
+  %nop14981 = alloca i1, i1 0
+  %nop14982 = alloca i1, i1 0
+  %nop14983 = alloca i1, i1 0
+  %nop14984 = alloca i1, i1 0
+  %nop14985 = alloca i1, i1 0
+  %nop14986 = alloca i1, i1 0
+  %nop14987 = alloca i1, i1 0
+  %nop14988 = alloca i1, i1 0
+  %nop14989 = alloca i1, i1 0
+  %nop14990 = alloca i1, i1 0
+  %nop14991 = alloca i1, i1 0
+  %nop14992 = alloca i1, i1 0
+  %nop14993 = alloca i1, i1 0
+  %nop14994 = alloca i1, i1 0
+  %nop14995 = alloca i1, i1 0
+  %nop14996 = alloca i1, i1 0
+  %nop14997 = alloca i1, i1 0
+  %nop14998 = alloca i1, i1 0
+  %nop14999 = alloca i1, i1 0
+  %nop15000 = alloca i1, i1 0
+  %nop15001 = alloca i1, i1 0
+  %nop15002 = alloca i1, i1 0
+  %nop15003 = alloca i1, i1 0
+  %nop15004 = alloca i1, i1 0
+  %nop15005 = alloca i1, i1 0
+  %nop15006 = alloca i1, i1 0
+  %nop15007 = alloca i1, i1 0
+  %nop15008 = alloca i1, i1 0
+  %nop15009 = alloca i1, i1 0
+  %nop15010 = alloca i1, i1 0
+  %nop15011 = alloca i1, i1 0
+  %nop15012 = alloca i1, i1 0
+  %nop15013 = alloca i1, i1 0
+  %nop15014 = alloca i1, i1 0
+  %nop15015 = alloca i1, i1 0
+  %nop15016 = alloca i1, i1 0
+  %nop15017 = alloca i1, i1 0
+  %nop15018 = alloca i1, i1 0
+  %nop15019 = alloca i1, i1 0
+  %nop15020 = alloca i1, i1 0
+  %nop15021 = alloca i1, i1 0
+  %nop15022 = alloca i1, i1 0
+  %nop15023 = alloca i1, i1 0
+  %nop15024 = alloca i1, i1 0
+  %nop15025 = alloca i1, i1 0
+  %nop15026 = alloca i1, i1 0
+  %nop15027 = alloca i1, i1 0
+  %nop15028 = alloca i1, i1 0
+  %nop15029 = alloca i1, i1 0
+  %nop15030 = alloca i1, i1 0
+  %nop15031 = alloca i1, i1 0
+  %nop15032 = alloca i1, i1 0
+  %nop15033 = alloca i1, i1 0
+  %nop15034 = alloca i1, i1 0
+  %nop15035 = alloca i1, i1 0
+  %nop15036 = alloca i1, i1 0
+  %nop15037 = alloca i1, i1 0
+  %nop15038 = alloca i1, i1 0
+  %nop15039 = alloca i1, i1 0
+  %nop15040 = alloca i1, i1 0
+  %nop15041 = alloca i1, i1 0
+  %nop15042 = alloca i1, i1 0
+  %nop15043 = alloca i1, i1 0
+  %nop15044 = alloca i1, i1 0
+  %nop15045 = alloca i1, i1 0
+  %nop15046 = alloca i1, i1 0
+  %nop15047 = alloca i1, i1 0
+  %nop15048 = alloca i1, i1 0
+  %nop15049 = alloca i1, i1 0
+  %nop15050 = alloca i1, i1 0
+  %nop15051 = alloca i1, i1 0
+  %nop15052 = alloca i1, i1 0
+  %nop15053 = alloca i1, i1 0
+  %nop15054 = alloca i1, i1 0
+  %nop15055 = alloca i1, i1 0
+  %nop15056 = alloca i1, i1 0
+  %nop15057 = alloca i1, i1 0
+  %nop15058 = alloca i1, i1 0
+  %nop15059 = alloca i1, i1 0
+  %nop15060 = alloca i1, i1 0
+  %nop15061 = alloca i1, i1 0
+  %nop15062 = alloca i1, i1 0
+  %nop15063 = alloca i1, i1 0
+  %nop15064 = alloca i1, i1 0
+  %nop15065 = alloca i1, i1 0
+  %nop15066 = alloca i1, i1 0
+  %nop15067 = alloca i1, i1 0
+  %nop15068 = alloca i1, i1 0
+  %nop15069 = alloca i1, i1 0
+  %nop15070 = alloca i1, i1 0
+  %nop15071 = alloca i1, i1 0
+  %nop15072 = alloca i1, i1 0
+  %nop15073 = alloca i1, i1 0
+  %nop15074 = alloca i1, i1 0
+  %nop15075 = alloca i1, i1 0
+  %nop15076 = alloca i1, i1 0
+  %nop15077 = alloca i1, i1 0
+  %nop15078 = alloca i1, i1 0
+  %nop15079 = alloca i1, i1 0
+  %nop15080 = alloca i1, i1 0
+  %nop15081 = alloca i1, i1 0
+  %nop15082 = alloca i1, i1 0
+  %nop15083 = alloca i1, i1 0
+  %nop15084 = alloca i1, i1 0
+  %nop15085 = alloca i1, i1 0
+  %nop15086 = alloca i1, i1 0
+  %nop15087 = alloca i1, i1 0
+  %nop15088 = alloca i1, i1 0
+  %nop15089 = alloca i1, i1 0
+  %nop15090 = alloca i1, i1 0
+  %nop15091 = alloca i1, i1 0
+  %nop15092 = alloca i1, i1 0
+  %nop15093 = alloca i1, i1 0
+  %nop15094 = alloca i1, i1 0
+  %nop15095 = alloca i1, i1 0
+  %nop15096 = alloca i1, i1 0
+  %nop15097 = alloca i1, i1 0
+  %nop15098 = alloca i1, i1 0
+  %nop15099 = alloca i1, i1 0
+  %nop15100 = alloca i1, i1 0
+  %nop15101 = alloca i1, i1 0
+  %nop15102 = alloca i1, i1 0
+  %nop15103 = alloca i1, i1 0
+  %nop15104 = alloca i1, i1 0
+  %nop15105 = alloca i1, i1 0
+  %nop15106 = alloca i1, i1 0
+  %nop15107 = alloca i1, i1 0
+  %nop15108 = alloca i1, i1 0
+  %nop15109 = alloca i1, i1 0
+  %nop15110 = alloca i1, i1 0
+  %nop15111 = alloca i1, i1 0
+  %nop15112 = alloca i1, i1 0
+  %nop15113 = alloca i1, i1 0
+  %nop15114 = alloca i1, i1 0
+  %nop15115 = alloca i1, i1 0
+  %nop15116 = alloca i1, i1 0
+  %nop15117 = alloca i1, i1 0
+  %nop15118 = alloca i1, i1 0
+  %nop15119 = alloca i1, i1 0
+  %nop15120 = alloca i1, i1 0
+  %nop15121 = alloca i1, i1 0
+  %nop15122 = alloca i1, i1 0
+  %nop15123 = alloca i1, i1 0
+  %nop15124 = alloca i1, i1 0
+  %nop15125 = alloca i1, i1 0
+  %nop15126 = alloca i1, i1 0
+  %nop15127 = alloca i1, i1 0
+  %nop15128 = alloca i1, i1 0
+  %nop15129 = alloca i1, i1 0
+  %nop15130 = alloca i1, i1 0
+  %nop15131 = alloca i1, i1 0
+  %nop15132 = alloca i1, i1 0
+  %nop15133 = alloca i1, i1 0
+  %nop15134 = alloca i1, i1 0
+  %nop15135 = alloca i1, i1 0
+  %nop15136 = alloca i1, i1 0
+  %nop15137 = alloca i1, i1 0
+  %nop15138 = alloca i1, i1 0
+  %nop15139 = alloca i1, i1 0
+  %nop15140 = alloca i1, i1 0
+  %nop15141 = alloca i1, i1 0
+  %nop15142 = alloca i1, i1 0
+  %nop15143 = alloca i1, i1 0
+  %nop15144 = alloca i1, i1 0
+  %nop15145 = alloca i1, i1 0
+  %nop15146 = alloca i1, i1 0
+  %nop15147 = alloca i1, i1 0
+  %nop15148 = alloca i1, i1 0
+  %nop15149 = alloca i1, i1 0
+  %nop15150 = alloca i1, i1 0
+  %nop15151 = alloca i1, i1 0
+  %nop15152 = alloca i1, i1 0
+  %nop15153 = alloca i1, i1 0
+  %nop15154 = alloca i1, i1 0
+  %nop15155 = alloca i1, i1 0
+  %nop15156 = alloca i1, i1 0
+  %nop15157 = alloca i1, i1 0
+  %nop15158 = alloca i1, i1 0
+  %nop15159 = alloca i1, i1 0
+  %nop15160 = alloca i1, i1 0
+  %nop15161 = alloca i1, i1 0
+  %nop15162 = alloca i1, i1 0
+  %nop15163 = alloca i1, i1 0
+  %nop15164 = alloca i1, i1 0
+  %nop15165 = alloca i1, i1 0
+  %nop15166 = alloca i1, i1 0
+  %nop15167 = alloca i1, i1 0
+  %nop15168 = alloca i1, i1 0
+  %nop15169 = alloca i1, i1 0
+  %nop15170 = alloca i1, i1 0
+  %nop15171 = alloca i1, i1 0
+  %nop15172 = alloca i1, i1 0
+  %nop15173 = alloca i1, i1 0
+  %nop15174 = alloca i1, i1 0
+  %nop15175 = alloca i1, i1 0
+  %nop15176 = alloca i1, i1 0
+  %nop15177 = alloca i1, i1 0
+  %nop15178 = alloca i1, i1 0
+  %nop15179 = alloca i1, i1 0
+  %nop15180 = alloca i1, i1 0
+  %nop15181 = alloca i1, i1 0
+  %nop15182 = alloca i1, i1 0
+  %nop15183 = alloca i1, i1 0
+  %nop15184 = alloca i1, i1 0
+  %nop15185 = alloca i1, i1 0
+  %nop15186 = alloca i1, i1 0
+  %nop15187 = alloca i1, i1 0
+  %nop15188 = alloca i1, i1 0
+  %nop15189 = alloca i1, i1 0
+  %nop15190 = alloca i1, i1 0
+  %nop15191 = alloca i1, i1 0
+  %nop15192 = alloca i1, i1 0
+  %nop15193 = alloca i1, i1 0
+  %nop15194 = alloca i1, i1 0
+  %nop15195 = alloca i1, i1 0
+  %nop15196 = alloca i1, i1 0
+  %nop15197 = alloca i1, i1 0
+  %nop15198 = alloca i1, i1 0
+  %nop15199 = alloca i1, i1 0
+  %nop15200 = alloca i1, i1 0
+  %nop15201 = alloca i1, i1 0
+  %nop15202 = alloca i1, i1 0
+  %nop15203 = alloca i1, i1 0
+  %nop15204 = alloca i1, i1 0
+  %nop15205 = alloca i1, i1 0
+  %nop15206 = alloca i1, i1 0
+  %nop15207 = alloca i1, i1 0
+  %nop15208 = alloca i1, i1 0
+  %nop15209 = alloca i1, i1 0
+  %nop15210 = alloca i1, i1 0
+  %nop15211 = alloca i1, i1 0
+  %nop15212 = alloca i1, i1 0
+  %nop15213 = alloca i1, i1 0
+  %nop15214 = alloca i1, i1 0
+  %nop15215 = alloca i1, i1 0
+  %nop15216 = alloca i1, i1 0
+  %nop15217 = alloca i1, i1 0
+  %nop15218 = alloca i1, i1 0
+  %nop15219 = alloca i1, i1 0
+  %nop15220 = alloca i1, i1 0
+  %nop15221 = alloca i1, i1 0
+  %nop15222 = alloca i1, i1 0
+  %nop15223 = alloca i1, i1 0
+  %nop15224 = alloca i1, i1 0
+  %nop15225 = alloca i1, i1 0
+  %nop15226 = alloca i1, i1 0
+  %nop15227 = alloca i1, i1 0
+  %nop15228 = alloca i1, i1 0
+  %nop15229 = alloca i1, i1 0
+  %nop15230 = alloca i1, i1 0
+  %nop15231 = alloca i1, i1 0
+  %nop15232 = alloca i1, i1 0
+  %nop15233 = alloca i1, i1 0
+  %nop15234 = alloca i1, i1 0
+  %nop15235 = alloca i1, i1 0
+  %nop15236 = alloca i1, i1 0
+  %nop15237 = alloca i1, i1 0
+  %nop15238 = alloca i1, i1 0
+  %nop15239 = alloca i1, i1 0
+  %nop15240 = alloca i1, i1 0
+  %nop15241 = alloca i1, i1 0
+  %nop15242 = alloca i1, i1 0
+  %nop15243 = alloca i1, i1 0
+  %nop15244 = alloca i1, i1 0
+  %nop15245 = alloca i1, i1 0
+  %nop15246 = alloca i1, i1 0
+  %nop15247 = alloca i1, i1 0
+  %nop15248 = alloca i1, i1 0
+  %nop15249 = alloca i1, i1 0
+  %nop15250 = alloca i1, i1 0
+  %nop15251 = alloca i1, i1 0
+  %nop15252 = alloca i1, i1 0
+  %nop15253 = alloca i1, i1 0
+  %nop15254 = alloca i1, i1 0
+  %nop15255 = alloca i1, i1 0
+  %nop15256 = alloca i1, i1 0
+  %nop15257 = alloca i1, i1 0
+  %nop15258 = alloca i1, i1 0
+  %nop15259 = alloca i1, i1 0
+  %nop15260 = alloca i1, i1 0
+  %nop15261 = alloca i1, i1 0
+  %nop15262 = alloca i1, i1 0
+  %nop15263 = alloca i1, i1 0
+  %nop15264 = alloca i1, i1 0
+  %nop15265 = alloca i1, i1 0
+  %nop15266 = alloca i1, i1 0
+  %nop15267 = alloca i1, i1 0
+  %nop15268 = alloca i1, i1 0
+  %nop15269 = alloca i1, i1 0
+  %nop15270 = alloca i1, i1 0
+  %nop15271 = alloca i1, i1 0
+  %nop15272 = alloca i1, i1 0
+  %nop15273 = alloca i1, i1 0
+  %nop15274 = alloca i1, i1 0
+  %nop15275 = alloca i1, i1 0
+  %nop15276 = alloca i1, i1 0
+  %nop15277 = alloca i1, i1 0
+  %nop15278 = alloca i1, i1 0
+  %nop15279 = alloca i1, i1 0
+  %nop15280 = alloca i1, i1 0
+  %nop15281 = alloca i1, i1 0
+  %nop15282 = alloca i1, i1 0
+  %nop15283 = alloca i1, i1 0
+  %nop15284 = alloca i1, i1 0
+  %nop15285 = alloca i1, i1 0
+  %nop15286 = alloca i1, i1 0
+  %nop15287 = alloca i1, i1 0
+  %nop15288 = alloca i1, i1 0
+  %nop15289 = alloca i1, i1 0
+  %nop15290 = alloca i1, i1 0
+  %nop15291 = alloca i1, i1 0
+  %nop15292 = alloca i1, i1 0
+  %nop15293 = alloca i1, i1 0
+  %nop15294 = alloca i1, i1 0
+  %nop15295 = alloca i1, i1 0
+  %nop15296 = alloca i1, i1 0
+  %nop15297 = alloca i1, i1 0
+  %nop15298 = alloca i1, i1 0
+  %nop15299 = alloca i1, i1 0
+  %nop15300 = alloca i1, i1 0
+  %nop15301 = alloca i1, i1 0
+  %nop15302 = alloca i1, i1 0
+  %nop15303 = alloca i1, i1 0
+  %nop15304 = alloca i1, i1 0
+  %nop15305 = alloca i1, i1 0
+  %nop15306 = alloca i1, i1 0
+  %nop15307 = alloca i1, i1 0
+  %nop15308 = alloca i1, i1 0
+  %nop15309 = alloca i1, i1 0
+  %nop15310 = alloca i1, i1 0
+  %nop15311 = alloca i1, i1 0
+  %nop15312 = alloca i1, i1 0
+  %nop15313 = alloca i1, i1 0
+  %nop15314 = alloca i1, i1 0
+  %nop15315 = alloca i1, i1 0
+  %nop15316 = alloca i1, i1 0
+  %nop15317 = alloca i1, i1 0
+  %nop15318 = alloca i1, i1 0
+  %nop15319 = alloca i1, i1 0
+  %nop15320 = alloca i1, i1 0
+  %nop15321 = alloca i1, i1 0
+  %nop15322 = alloca i1, i1 0
+  %nop15323 = alloca i1, i1 0
+  %nop15324 = alloca i1, i1 0
+  %nop15325 = alloca i1, i1 0
+  %nop15326 = alloca i1, i1 0
+  %nop15327 = alloca i1, i1 0
+  %nop15328 = alloca i1, i1 0
+  %nop15329 = alloca i1, i1 0
+  %nop15330 = alloca i1, i1 0
+  %nop15331 = alloca i1, i1 0
+  %nop15332 = alloca i1, i1 0
+  %nop15333 = alloca i1, i1 0
+  %nop15334 = alloca i1, i1 0
+  %nop15335 = alloca i1, i1 0
+  %nop15336 = alloca i1, i1 0
+  %nop15337 = alloca i1, i1 0
+  %nop15338 = alloca i1, i1 0
+  %nop15339 = alloca i1, i1 0
+  %nop15340 = alloca i1, i1 0
+  %nop15341 = alloca i1, i1 0
+  %nop15342 = alloca i1, i1 0
+  %nop15343 = alloca i1, i1 0
+  %nop15344 = alloca i1, i1 0
+  %nop15345 = alloca i1, i1 0
+  %nop15346 = alloca i1, i1 0
+  %nop15347 = alloca i1, i1 0
+  %nop15348 = alloca i1, i1 0
+  %nop15349 = alloca i1, i1 0
+  %nop15350 = alloca i1, i1 0
+  %nop15351 = alloca i1, i1 0
+  %nop15352 = alloca i1, i1 0
+  %nop15353 = alloca i1, i1 0
+  %nop15354 = alloca i1, i1 0
+  %nop15355 = alloca i1, i1 0
+  %nop15356 = alloca i1, i1 0
+  %nop15357 = alloca i1, i1 0
+  %nop15358 = alloca i1, i1 0
+  %nop15359 = alloca i1, i1 0
+  %nop15360 = alloca i1, i1 0
+  %nop15361 = alloca i1, i1 0
+  %nop15362 = alloca i1, i1 0
+  %nop15363 = alloca i1, i1 0
+  %nop15364 = alloca i1, i1 0
+  %nop15365 = alloca i1, i1 0
+  %nop15366 = alloca i1, i1 0
+  %nop15367 = alloca i1, i1 0
+  %nop15368 = alloca i1, i1 0
+  %nop15369 = alloca i1, i1 0
+  %nop15370 = alloca i1, i1 0
+  %nop15371 = alloca i1, i1 0
+  %nop15372 = alloca i1, i1 0
+  %nop15373 = alloca i1, i1 0
+  %nop15374 = alloca i1, i1 0
+  %nop15375 = alloca i1, i1 0
+  %nop15376 = alloca i1, i1 0
+  %nop15377 = alloca i1, i1 0
+  %nop15378 = alloca i1, i1 0
+  %nop15379 = alloca i1, i1 0
+  %nop15380 = alloca i1, i1 0
+  %nop15381 = alloca i1, i1 0
+  %nop15382 = alloca i1, i1 0
+  %nop15383 = alloca i1, i1 0
+  %nop15384 = alloca i1, i1 0
+  %nop15385 = alloca i1, i1 0
+  %nop15386 = alloca i1, i1 0
+  %nop15387 = alloca i1, i1 0
+  %nop15388 = alloca i1, i1 0
+  %nop15389 = alloca i1, i1 0
+  %nop15390 = alloca i1, i1 0
+  %nop15391 = alloca i1, i1 0
+  %nop15392 = alloca i1, i1 0
+  %nop15393 = alloca i1, i1 0
+  %nop15394 = alloca i1, i1 0
+  %nop15395 = alloca i1, i1 0
+  %nop15396 = alloca i1, i1 0
+  %nop15397 = alloca i1, i1 0
+  %nop15398 = alloca i1, i1 0
+  %nop15399 = alloca i1, i1 0
+  %nop15400 = alloca i1, i1 0
+  %nop15401 = alloca i1, i1 0
+  %nop15402 = alloca i1, i1 0
+  %nop15403 = alloca i1, i1 0
+  %nop15404 = alloca i1, i1 0
+  %nop15405 = alloca i1, i1 0
+  %nop15406 = alloca i1, i1 0
+  %nop15407 = alloca i1, i1 0
+  %nop15408 = alloca i1, i1 0
+  %nop15409 = alloca i1, i1 0
+  %nop15410 = alloca i1, i1 0
+  %nop15411 = alloca i1, i1 0
+  %nop15412 = alloca i1, i1 0
+  %nop15413 = alloca i1, i1 0
+  %nop15414 = alloca i1, i1 0
+  %nop15415 = alloca i1, i1 0
+  %nop15416 = alloca i1, i1 0
+  %nop15417 = alloca i1, i1 0
+  %nop15418 = alloca i1, i1 0
+  %nop15419 = alloca i1, i1 0
+  %nop15420 = alloca i1, i1 0
+  %nop15421 = alloca i1, i1 0
+  %nop15422 = alloca i1, i1 0
+  %nop15423 = alloca i1, i1 0
+  %nop15424 = alloca i1, i1 0
+  %nop15425 = alloca i1, i1 0
+  %nop15426 = alloca i1, i1 0
+  %nop15427 = alloca i1, i1 0
+  %nop15428 = alloca i1, i1 0
+  %nop15429 = alloca i1, i1 0
+  %nop15430 = alloca i1, i1 0
+  %nop15431 = alloca i1, i1 0
+  %nop15432 = alloca i1, i1 0
+  %nop15433 = alloca i1, i1 0
+  %nop15434 = alloca i1, i1 0
+  %nop15435 = alloca i1, i1 0
+  %nop15436 = alloca i1, i1 0
+  %nop15437 = alloca i1, i1 0
+  %nop15438 = alloca i1, i1 0
+  %nop15439 = alloca i1, i1 0
+  %nop15440 = alloca i1, i1 0
+  %nop15441 = alloca i1, i1 0
+  %nop15442 = alloca i1, i1 0
+  %nop15443 = alloca i1, i1 0
+  %nop15444 = alloca i1, i1 0
+  %nop15445 = alloca i1, i1 0
+  %nop15446 = alloca i1, i1 0
+  %nop15447 = alloca i1, i1 0
+  %nop15448 = alloca i1, i1 0
+  %nop15449 = alloca i1, i1 0
+  %nop15450 = alloca i1, i1 0
+  %nop15451 = alloca i1, i1 0
+  %nop15452 = alloca i1, i1 0
+  %nop15453 = alloca i1, i1 0
+  %nop15454 = alloca i1, i1 0
+  %nop15455 = alloca i1, i1 0
+  %nop15456 = alloca i1, i1 0
+  %nop15457 = alloca i1, i1 0
+  %nop15458 = alloca i1, i1 0
+  %nop15459 = alloca i1, i1 0
+  %nop15460 = alloca i1, i1 0
+  %nop15461 = alloca i1, i1 0
+  %nop15462 = alloca i1, i1 0
+  %nop15463 = alloca i1, i1 0
+  %nop15464 = alloca i1, i1 0
+  %nop15465 = alloca i1, i1 0
+  %nop15466 = alloca i1, i1 0
+  %nop15467 = alloca i1, i1 0
+  %nop15468 = alloca i1, i1 0
+  %nop15469 = alloca i1, i1 0
+  %nop15470 = alloca i1, i1 0
+  %nop15471 = alloca i1, i1 0
+  %nop15472 = alloca i1, i1 0
+  %nop15473 = alloca i1, i1 0
+  %nop15474 = alloca i1, i1 0
+  %nop15475 = alloca i1, i1 0
+  %nop15476 = alloca i1, i1 0
+  %nop15477 = alloca i1, i1 0
+  %nop15478 = alloca i1, i1 0
+  %nop15479 = alloca i1, i1 0
+  %nop15480 = alloca i1, i1 0
+  %nop15481 = alloca i1, i1 0
+  %nop15482 = alloca i1, i1 0
+  %nop15483 = alloca i1, i1 0
+  %nop15484 = alloca i1, i1 0
+  %nop15485 = alloca i1, i1 0
+  %nop15486 = alloca i1, i1 0
+  %nop15487 = alloca i1, i1 0
+  %nop15488 = alloca i1, i1 0
+  %nop15489 = alloca i1, i1 0
+  %nop15490 = alloca i1, i1 0
+  %nop15491 = alloca i1, i1 0
+  %nop15492 = alloca i1, i1 0
+  %nop15493 = alloca i1, i1 0
+  %nop15494 = alloca i1, i1 0
+  %nop15495 = alloca i1, i1 0
+  %nop15496 = alloca i1, i1 0
+  %nop15497 = alloca i1, i1 0
+  %nop15498 = alloca i1, i1 0
+  %nop15499 = alloca i1, i1 0
+  %nop15500 = alloca i1, i1 0
+  %nop15501 = alloca i1, i1 0
+  %nop15502 = alloca i1, i1 0
+  %nop15503 = alloca i1, i1 0
+  %nop15504 = alloca i1, i1 0
+  %nop15505 = alloca i1, i1 0
+  %nop15506 = alloca i1, i1 0
+  %nop15507 = alloca i1, i1 0
+  %nop15508 = alloca i1, i1 0
+  %nop15509 = alloca i1, i1 0
+  %nop15510 = alloca i1, i1 0
+  %nop15511 = alloca i1, i1 0
+  %nop15512 = alloca i1, i1 0
+  %nop15513 = alloca i1, i1 0
+  %nop15514 = alloca i1, i1 0
+  %nop15515 = alloca i1, i1 0
+  %nop15516 = alloca i1, i1 0
+  %nop15517 = alloca i1, i1 0
+  %nop15518 = alloca i1, i1 0
+  %nop15519 = alloca i1, i1 0
+  %nop15520 = alloca i1, i1 0
+  %nop15521 = alloca i1, i1 0
+  %nop15522 = alloca i1, i1 0
+  %nop15523 = alloca i1, i1 0
+  %nop15524 = alloca i1, i1 0
+  %nop15525 = alloca i1, i1 0
+  %nop15526 = alloca i1, i1 0
+  %nop15527 = alloca i1, i1 0
+  %nop15528 = alloca i1, i1 0
+  %nop15529 = alloca i1, i1 0
+  %nop15530 = alloca i1, i1 0
+  %nop15531 = alloca i1, i1 0
+  %nop15532 = alloca i1, i1 0
+  %nop15533 = alloca i1, i1 0
+  %nop15534 = alloca i1, i1 0
+  %nop15535 = alloca i1, i1 0
+  %nop15536 = alloca i1, i1 0
+  %nop15537 = alloca i1, i1 0
+  %nop15538 = alloca i1, i1 0
+  %nop15539 = alloca i1, i1 0
+  %nop15540 = alloca i1, i1 0
+  %nop15541 = alloca i1, i1 0
+  %nop15542 = alloca i1, i1 0
+  %nop15543 = alloca i1, i1 0
+  %nop15544 = alloca i1, i1 0
+  %nop15545 = alloca i1, i1 0
+  %nop15546 = alloca i1, i1 0
+  %nop15547 = alloca i1, i1 0
+  %nop15548 = alloca i1, i1 0
+  %nop15549 = alloca i1, i1 0
+  %nop15550 = alloca i1, i1 0
+  %nop15551 = alloca i1, i1 0
+  %nop15552 = alloca i1, i1 0
+  %nop15553 = alloca i1, i1 0
+  %nop15554 = alloca i1, i1 0
+  %nop15555 = alloca i1, i1 0
+  %nop15556 = alloca i1, i1 0
+  %nop15557 = alloca i1, i1 0
+  %nop15558 = alloca i1, i1 0
+  %nop15559 = alloca i1, i1 0
+  %nop15560 = alloca i1, i1 0
+  %nop15561 = alloca i1, i1 0
+  %nop15562 = alloca i1, i1 0
+  %nop15563 = alloca i1, i1 0
+  %nop15564 = alloca i1, i1 0
+  %nop15565 = alloca i1, i1 0
+  %nop15566 = alloca i1, i1 0
+  %nop15567 = alloca i1, i1 0
+  %nop15568 = alloca i1, i1 0
+  %nop15569 = alloca i1, i1 0
+  %nop15570 = alloca i1, i1 0
+  %nop15571 = alloca i1, i1 0
+  %nop15572 = alloca i1, i1 0
+  %nop15573 = alloca i1, i1 0
+  %nop15574 = alloca i1, i1 0
+  %nop15575 = alloca i1, i1 0
+  %nop15576 = alloca i1, i1 0
+  %nop15577 = alloca i1, i1 0
+  %nop15578 = alloca i1, i1 0
+  %nop15579 = alloca i1, i1 0
+  %nop15580 = alloca i1, i1 0
+  %nop15581 = alloca i1, i1 0
+  %nop15582 = alloca i1, i1 0
+  %nop15583 = alloca i1, i1 0
+  %nop15584 = alloca i1, i1 0
+  %nop15585 = alloca i1, i1 0
+  %nop15586 = alloca i1, i1 0
+  %nop15587 = alloca i1, i1 0
+  %nop15588 = alloca i1, i1 0
+  %nop15589 = alloca i1, i1 0
+  %nop15590 = alloca i1, i1 0
+  %nop15591 = alloca i1, i1 0
+  %nop15592 = alloca i1, i1 0
+  %nop15593 = alloca i1, i1 0
+  %nop15594 = alloca i1, i1 0
+  %nop15595 = alloca i1, i1 0
+  %nop15596 = alloca i1, i1 0
+  %nop15597 = alloca i1, i1 0
+  %nop15598 = alloca i1, i1 0
+  %nop15599 = alloca i1, i1 0
+  %nop15600 = alloca i1, i1 0
+  %nop15601 = alloca i1, i1 0
+  %nop15602 = alloca i1, i1 0
+  %nop15603 = alloca i1, i1 0
+  %nop15604 = alloca i1, i1 0
+  %nop15605 = alloca i1, i1 0
+  %nop15606 = alloca i1, i1 0
+  %nop15607 = alloca i1, i1 0
+  %nop15608 = alloca i1, i1 0
+  %nop15609 = alloca i1, i1 0
+  %nop15610 = alloca i1, i1 0
+  %nop15611 = alloca i1, i1 0
+  %nop15612 = alloca i1, i1 0
+  %nop15613 = alloca i1, i1 0
+  %nop15614 = alloca i1, i1 0
+  %nop15615 = alloca i1, i1 0
+  %nop15616 = alloca i1, i1 0
+  %nop15617 = alloca i1, i1 0
+  %nop15618 = alloca i1, i1 0
+  %nop15619 = alloca i1, i1 0
+  %nop15620 = alloca i1, i1 0
+  %nop15621 = alloca i1, i1 0
+  %nop15622 = alloca i1, i1 0
+  %nop15623 = alloca i1, i1 0
+  %nop15624 = alloca i1, i1 0
+  %nop15625 = alloca i1, i1 0
+  %nop15626 = alloca i1, i1 0
+  %nop15627 = alloca i1, i1 0
+  %nop15628 = alloca i1, i1 0
+  %nop15629 = alloca i1, i1 0
+  %nop15630 = alloca i1, i1 0
+  %nop15631 = alloca i1, i1 0
+  %nop15632 = alloca i1, i1 0
+  %nop15633 = alloca i1, i1 0
+  %nop15634 = alloca i1, i1 0
+  %nop15635 = alloca i1, i1 0
+  %nop15636 = alloca i1, i1 0
+  %nop15637 = alloca i1, i1 0
+  %nop15638 = alloca i1, i1 0
+  %nop15639 = alloca i1, i1 0
+  %nop15640 = alloca i1, i1 0
+  %nop15641 = alloca i1, i1 0
+  %nop15642 = alloca i1, i1 0
+  %nop15643 = alloca i1, i1 0
+  %nop15644 = alloca i1, i1 0
+  %nop15645 = alloca i1, i1 0
+  %nop15646 = alloca i1, i1 0
+  %nop15647 = alloca i1, i1 0
+  %nop15648 = alloca i1, i1 0
+  %nop15649 = alloca i1, i1 0
+  %nop15650 = alloca i1, i1 0
+  %nop15651 = alloca i1, i1 0
+  %nop15652 = alloca i1, i1 0
+  %nop15653 = alloca i1, i1 0
+  %nop15654 = alloca i1, i1 0
+  %nop15655 = alloca i1, i1 0
+  %nop15656 = alloca i1, i1 0
+  %nop15657 = alloca i1, i1 0
+  %nop15658 = alloca i1, i1 0
+  %nop15659 = alloca i1, i1 0
+  %nop15660 = alloca i1, i1 0
+  %nop15661 = alloca i1, i1 0
+  %nop15662 = alloca i1, i1 0
+  %nop15663 = alloca i1, i1 0
+  %nop15664 = alloca i1, i1 0
+  %nop15665 = alloca i1, i1 0
+  %nop15666 = alloca i1, i1 0
+  %nop15667 = alloca i1, i1 0
+  %nop15668 = alloca i1, i1 0
+  %nop15669 = alloca i1, i1 0
+  %nop15670 = alloca i1, i1 0
+  %nop15671 = alloca i1, i1 0
+  %nop15672 = alloca i1, i1 0
+  %nop15673 = alloca i1, i1 0
+  %nop15674 = alloca i1, i1 0
+  %nop15675 = alloca i1, i1 0
+  %nop15676 = alloca i1, i1 0
+  %nop15677 = alloca i1, i1 0
+  %nop15678 = alloca i1, i1 0
+  %nop15679 = alloca i1, i1 0
+  %nop15680 = alloca i1, i1 0
+  %nop15681 = alloca i1, i1 0
+  %nop15682 = alloca i1, i1 0
+  %nop15683 = alloca i1, i1 0
+  %nop15684 = alloca i1, i1 0
+  %nop15685 = alloca i1, i1 0
+  %nop15686 = alloca i1, i1 0
+  %nop15687 = alloca i1, i1 0
+  %nop15688 = alloca i1, i1 0
+  %nop15689 = alloca i1, i1 0
+  %nop15690 = alloca i1, i1 0
+  %nop15691 = alloca i1, i1 0
+  %nop15692 = alloca i1, i1 0
+  %nop15693 = alloca i1, i1 0
+  %nop15694 = alloca i1, i1 0
+  %nop15695 = alloca i1, i1 0
+  %nop15696 = alloca i1, i1 0
+  %nop15697 = alloca i1, i1 0
+  %nop15698 = alloca i1, i1 0
+  %nop15699 = alloca i1, i1 0
+  %nop15700 = alloca i1, i1 0
+  %nop15701 = alloca i1, i1 0
+  %nop15702 = alloca i1, i1 0
+  %nop15703 = alloca i1, i1 0
+  %nop15704 = alloca i1, i1 0
+  %nop15705 = alloca i1, i1 0
+  %nop15706 = alloca i1, i1 0
+  %nop15707 = alloca i1, i1 0
+  %nop15708 = alloca i1, i1 0
+  %nop15709 = alloca i1, i1 0
+  %nop15710 = alloca i1, i1 0
+  %nop15711 = alloca i1, i1 0
+  %nop15712 = alloca i1, i1 0
+  %nop15713 = alloca i1, i1 0
+  %nop15714 = alloca i1, i1 0
+  %nop15715 = alloca i1, i1 0
+  %nop15716 = alloca i1, i1 0
+  %nop15717 = alloca i1, i1 0
+  %nop15718 = alloca i1, i1 0
+  %nop15719 = alloca i1, i1 0
+  %nop15720 = alloca i1, i1 0
+  %nop15721 = alloca i1, i1 0
+  %nop15722 = alloca i1, i1 0
+  %nop15723 = alloca i1, i1 0
+  %nop15724 = alloca i1, i1 0
+  %nop15725 = alloca i1, i1 0
+  %nop15726 = alloca i1, i1 0
+  %nop15727 = alloca i1, i1 0
+  %nop15728 = alloca i1, i1 0
+  %nop15729 = alloca i1, i1 0
+  %nop15730 = alloca i1, i1 0
+  %nop15731 = alloca i1, i1 0
+  %nop15732 = alloca i1, i1 0
+  %nop15733 = alloca i1, i1 0
+  %nop15734 = alloca i1, i1 0
+  %nop15735 = alloca i1, i1 0
+  %nop15736 = alloca i1, i1 0
+  %nop15737 = alloca i1, i1 0
+  %nop15738 = alloca i1, i1 0
+  %nop15739 = alloca i1, i1 0
+  %nop15740 = alloca i1, i1 0
+  %nop15741 = alloca i1, i1 0
+  %nop15742 = alloca i1, i1 0
+  %nop15743 = alloca i1, i1 0
+  %nop15744 = alloca i1, i1 0
+  %nop15745 = alloca i1, i1 0
+  %nop15746 = alloca i1, i1 0
+  %nop15747 = alloca i1, i1 0
+  %nop15748 = alloca i1, i1 0
+  %nop15749 = alloca i1, i1 0
+  %nop15750 = alloca i1, i1 0
+  %nop15751 = alloca i1, i1 0
+  %nop15752 = alloca i1, i1 0
+  %nop15753 = alloca i1, i1 0
+  %nop15754 = alloca i1, i1 0
+  %nop15755 = alloca i1, i1 0
+  %nop15756 = alloca i1, i1 0
+  %nop15757 = alloca i1, i1 0
+  %nop15758 = alloca i1, i1 0
+  %nop15759 = alloca i1, i1 0
+  %nop15760 = alloca i1, i1 0
+  %nop15761 = alloca i1, i1 0
+  %nop15762 = alloca i1, i1 0
+  %nop15763 = alloca i1, i1 0
+  %nop15764 = alloca i1, i1 0
+  %nop15765 = alloca i1, i1 0
+  %nop15766 = alloca i1, i1 0
+  %nop15767 = alloca i1, i1 0
+  %nop15768 = alloca i1, i1 0
+  %nop15769 = alloca i1, i1 0
+  %nop15770 = alloca i1, i1 0
+  %nop15771 = alloca i1, i1 0
+  %nop15772 = alloca i1, i1 0
+  %nop15773 = alloca i1, i1 0
+  %nop15774 = alloca i1, i1 0
+  %nop15775 = alloca i1, i1 0
+  %nop15776 = alloca i1, i1 0
+  %nop15777 = alloca i1, i1 0
+  %nop15778 = alloca i1, i1 0
+  %nop15779 = alloca i1, i1 0
+  %nop15780 = alloca i1, i1 0
+  %nop15781 = alloca i1, i1 0
+  %nop15782 = alloca i1, i1 0
+  %nop15783 = alloca i1, i1 0
+  %nop15784 = alloca i1, i1 0
+  %nop15785 = alloca i1, i1 0
+  %nop15786 = alloca i1, i1 0
+  %nop15787 = alloca i1, i1 0
+  %nop15788 = alloca i1, i1 0
+  %nop15789 = alloca i1, i1 0
+  %nop15790 = alloca i1, i1 0
+  %nop15791 = alloca i1, i1 0
+  %nop15792 = alloca i1, i1 0
+  %nop15793 = alloca i1, i1 0
+  %nop15794 = alloca i1, i1 0
+  %nop15795 = alloca i1, i1 0
+  %nop15796 = alloca i1, i1 0
+  %nop15797 = alloca i1, i1 0
+  %nop15798 = alloca i1, i1 0
+  %nop15799 = alloca i1, i1 0
+  %nop15800 = alloca i1, i1 0
+  %nop15801 = alloca i1, i1 0
+  %nop15802 = alloca i1, i1 0
+  %nop15803 = alloca i1, i1 0
+  %nop15804 = alloca i1, i1 0
+  %nop15805 = alloca i1, i1 0
+  %nop15806 = alloca i1, i1 0
+  %nop15807 = alloca i1, i1 0
+  %nop15808 = alloca i1, i1 0
+  %nop15809 = alloca i1, i1 0
+  %nop15810 = alloca i1, i1 0
+  %nop15811 = alloca i1, i1 0
+  %nop15812 = alloca i1, i1 0
+  %nop15813 = alloca i1, i1 0
+  %nop15814 = alloca i1, i1 0
+  %nop15815 = alloca i1, i1 0
+  %nop15816 = alloca i1, i1 0
+  %nop15817 = alloca i1, i1 0
+  %nop15818 = alloca i1, i1 0
+  %nop15819 = alloca i1, i1 0
+  %nop15820 = alloca i1, i1 0
+  %nop15821 = alloca i1, i1 0
+  %nop15822 = alloca i1, i1 0
+  %nop15823 = alloca i1, i1 0
+  %nop15824 = alloca i1, i1 0
+  %nop15825 = alloca i1, i1 0
+  %nop15826 = alloca i1, i1 0
+  %nop15827 = alloca i1, i1 0
+  %nop15828 = alloca i1, i1 0
+  %nop15829 = alloca i1, i1 0
+  %nop15830 = alloca i1, i1 0
+  %nop15831 = alloca i1, i1 0
+  %nop15832 = alloca i1, i1 0
+  %nop15833 = alloca i1, i1 0
+  %nop15834 = alloca i1, i1 0
+  %nop15835 = alloca i1, i1 0
+  %nop15836 = alloca i1, i1 0
+  %nop15837 = alloca i1, i1 0
+  %nop15838 = alloca i1, i1 0
+  %nop15839 = alloca i1, i1 0
+  %nop15840 = alloca i1, i1 0
+  %nop15841 = alloca i1, i1 0
+  %nop15842 = alloca i1, i1 0
+  %nop15843 = alloca i1, i1 0
+  %nop15844 = alloca i1, i1 0
+  %nop15845 = alloca i1, i1 0
+  %nop15846 = alloca i1, i1 0
+  %nop15847 = alloca i1, i1 0
+  %nop15848 = alloca i1, i1 0
+  %nop15849 = alloca i1, i1 0
+  %nop15850 = alloca i1, i1 0
+  %nop15851 = alloca i1, i1 0
+  %nop15852 = alloca i1, i1 0
+  %nop15853 = alloca i1, i1 0
+  %nop15854 = alloca i1, i1 0
+  %nop15855 = alloca i1, i1 0
+  %nop15856 = alloca i1, i1 0
+  %nop15857 = alloca i1, i1 0
+  %nop15858 = alloca i1, i1 0
+  %nop15859 = alloca i1, i1 0
+  %nop15860 = alloca i1, i1 0
+  %nop15861 = alloca i1, i1 0
+  %nop15862 = alloca i1, i1 0
+  %nop15863 = alloca i1, i1 0
+  %nop15864 = alloca i1, i1 0
+  %nop15865 = alloca i1, i1 0
+  %nop15866 = alloca i1, i1 0
+  %nop15867 = alloca i1, i1 0
+  %nop15868 = alloca i1, i1 0
+  %nop15869 = alloca i1, i1 0
+  %nop15870 = alloca i1, i1 0
+  %nop15871 = alloca i1, i1 0
+  %nop15872 = alloca i1, i1 0
+  %nop15873 = alloca i1, i1 0
+  %nop15874 = alloca i1, i1 0
+  %nop15875 = alloca i1, i1 0
+  %nop15876 = alloca i1, i1 0
+  %nop15877 = alloca i1, i1 0
+  %nop15878 = alloca i1, i1 0
+  %nop15879 = alloca i1, i1 0
+  %nop15880 = alloca i1, i1 0
+  %nop15881 = alloca i1, i1 0
+  %nop15882 = alloca i1, i1 0
+  %nop15883 = alloca i1, i1 0
+  %nop15884 = alloca i1, i1 0
+  %nop15885 = alloca i1, i1 0
+  %nop15886 = alloca i1, i1 0
+  %nop15887 = alloca i1, i1 0
+  %nop15888 = alloca i1, i1 0
+  %nop15889 = alloca i1, i1 0
+  %nop15890 = alloca i1, i1 0
+  %nop15891 = alloca i1, i1 0
+  %nop15892 = alloca i1, i1 0
+  %nop15893 = alloca i1, i1 0
+  %nop15894 = alloca i1, i1 0
+  %nop15895 = alloca i1, i1 0
+  %nop15896 = alloca i1, i1 0
+  %nop15897 = alloca i1, i1 0
+  %nop15898 = alloca i1, i1 0
+  %nop15899 = alloca i1, i1 0
+  %nop15900 = alloca i1, i1 0
+  %nop15901 = alloca i1, i1 0
+  %nop15902 = alloca i1, i1 0
+  %nop15903 = alloca i1, i1 0
+  %nop15904 = alloca i1, i1 0
+  %nop15905 = alloca i1, i1 0
+  %nop15906 = alloca i1, i1 0
+  %nop15907 = alloca i1, i1 0
+  %nop15908 = alloca i1, i1 0
+  %nop15909 = alloca i1, i1 0
+  %nop15910 = alloca i1, i1 0
+  %nop15911 = alloca i1, i1 0
+  %nop15912 = alloca i1, i1 0
+  %nop15913 = alloca i1, i1 0
+  %nop15914 = alloca i1, i1 0
+  %nop15915 = alloca i1, i1 0
+  %nop15916 = alloca i1, i1 0
+  %nop15917 = alloca i1, i1 0
+  %nop15918 = alloca i1, i1 0
+  %nop15919 = alloca i1, i1 0
+  %nop15920 = alloca i1, i1 0
+  %nop15921 = alloca i1, i1 0
+  %nop15922 = alloca i1, i1 0
+  %nop15923 = alloca i1, i1 0
+  %nop15924 = alloca i1, i1 0
+  %nop15925 = alloca i1, i1 0
+  %nop15926 = alloca i1, i1 0
+  %nop15927 = alloca i1, i1 0
+  %nop15928 = alloca i1, i1 0
+  %nop15929 = alloca i1, i1 0
+  %nop15930 = alloca i1, i1 0
+  %nop15931 = alloca i1, i1 0
+  %nop15932 = alloca i1, i1 0
+  %nop15933 = alloca i1, i1 0
+  %nop15934 = alloca i1, i1 0
+  %nop15935 = alloca i1, i1 0
+  %nop15936 = alloca i1, i1 0
+  %nop15937 = alloca i1, i1 0
+  %nop15938 = alloca i1, i1 0
+  %nop15939 = alloca i1, i1 0
+  %nop15940 = alloca i1, i1 0
+  %nop15941 = alloca i1, i1 0
+  %nop15942 = alloca i1, i1 0
+  %nop15943 = alloca i1, i1 0
+  %nop15944 = alloca i1, i1 0
+  %nop15945 = alloca i1, i1 0
+  %nop15946 = alloca i1, i1 0
+  %nop15947 = alloca i1, i1 0
+  %nop15948 = alloca i1, i1 0
+  %nop15949 = alloca i1, i1 0
+  %nop15950 = alloca i1, i1 0
+  %nop15951 = alloca i1, i1 0
+  %nop15952 = alloca i1, i1 0
+  %nop15953 = alloca i1, i1 0
+  %nop15954 = alloca i1, i1 0
+  %nop15955 = alloca i1, i1 0
+  %nop15956 = alloca i1, i1 0
+  %nop15957 = alloca i1, i1 0
+  %nop15958 = alloca i1, i1 0
+  %nop15959 = alloca i1, i1 0
+  %nop15960 = alloca i1, i1 0
+  %nop15961 = alloca i1, i1 0
+  %nop15962 = alloca i1, i1 0
+  %nop15963 = alloca i1, i1 0
+  %nop15964 = alloca i1, i1 0
+  %nop15965 = alloca i1, i1 0
+  %nop15966 = alloca i1, i1 0
+  %nop15967 = alloca i1, i1 0
+  %nop15968 = alloca i1, i1 0
+  %nop15969 = alloca i1, i1 0
+  %nop15970 = alloca i1, i1 0
+  %nop15971 = alloca i1, i1 0
+  %nop15972 = alloca i1, i1 0
+  %nop15973 = alloca i1, i1 0
+  %nop15974 = alloca i1, i1 0
+  %nop15975 = alloca i1, i1 0
+  %nop15976 = alloca i1, i1 0
+  %nop15977 = alloca i1, i1 0
+  %nop15978 = alloca i1, i1 0
+  %nop15979 = alloca i1, i1 0
+  %nop15980 = alloca i1, i1 0
+  %nop15981 = alloca i1, i1 0
+  %nop15982 = alloca i1, i1 0
+  %nop15983 = alloca i1, i1 0
+  %nop15984 = alloca i1, i1 0
+  %nop15985 = alloca i1, i1 0
+  %nop15986 = alloca i1, i1 0
+  %nop15987 = alloca i1, i1 0
+  %nop15988 = alloca i1, i1 0
+  %nop15989 = alloca i1, i1 0
+  %nop15990 = alloca i1, i1 0
+  %nop15991 = alloca i1, i1 0
+  %nop15992 = alloca i1, i1 0
+  %nop15993 = alloca i1, i1 0
+  %nop15994 = alloca i1, i1 0
+  %nop15995 = alloca i1, i1 0
+  %nop15996 = alloca i1, i1 0
+  %nop15997 = alloca i1, i1 0
+  %nop15998 = alloca i1, i1 0
+  %nop15999 = alloca i1, i1 0
+  %nop16000 = alloca i1, i1 0
+  %nop16001 = alloca i1, i1 0
+  %nop16002 = alloca i1, i1 0
+  %nop16003 = alloca i1, i1 0
+  %nop16004 = alloca i1, i1 0
+  %nop16005 = alloca i1, i1 0
+  %nop16006 = alloca i1, i1 0
+  %nop16007 = alloca i1, i1 0
+  %nop16008 = alloca i1, i1 0
+  %nop16009 = alloca i1, i1 0
+  %nop16010 = alloca i1, i1 0
+  %nop16011 = alloca i1, i1 0
+  %nop16012 = alloca i1, i1 0
+  %nop16013 = alloca i1, i1 0
+  %nop16014 = alloca i1, i1 0
+  %nop16015 = alloca i1, i1 0
+  %nop16016 = alloca i1, i1 0
+  %nop16017 = alloca i1, i1 0
+  %nop16018 = alloca i1, i1 0
+  %nop16019 = alloca i1, i1 0
+  %nop16020 = alloca i1, i1 0
+  %nop16021 = alloca i1, i1 0
+  %nop16022 = alloca i1, i1 0
+  %nop16023 = alloca i1, i1 0
+  %nop16024 = alloca i1, i1 0
+  %nop16025 = alloca i1, i1 0
+  %nop16026 = alloca i1, i1 0
+  %nop16027 = alloca i1, i1 0
+  %nop16028 = alloca i1, i1 0
+  %nop16029 = alloca i1, i1 0
+  %nop16030 = alloca i1, i1 0
+  %nop16031 = alloca i1, i1 0
+  %nop16032 = alloca i1, i1 0
+  %nop16033 = alloca i1, i1 0
+  %nop16034 = alloca i1, i1 0
+  %nop16035 = alloca i1, i1 0
+  %nop16036 = alloca i1, i1 0
+  %nop16037 = alloca i1, i1 0
+  %nop16038 = alloca i1, i1 0
+  %nop16039 = alloca i1, i1 0
+  %nop16040 = alloca i1, i1 0
+  %nop16041 = alloca i1, i1 0
+  %nop16042 = alloca i1, i1 0
+  %nop16043 = alloca i1, i1 0
+  %nop16044 = alloca i1, i1 0
+  %nop16045 = alloca i1, i1 0
+  %nop16046 = alloca i1, i1 0
+  %nop16047 = alloca i1, i1 0
+  %nop16048 = alloca i1, i1 0
+  %nop16049 = alloca i1, i1 0
+  %nop16050 = alloca i1, i1 0
+  %nop16051 = alloca i1, i1 0
+  %nop16052 = alloca i1, i1 0
+  %nop16053 = alloca i1, i1 0
+  %nop16054 = alloca i1, i1 0
+  %nop16055 = alloca i1, i1 0
+  %nop16056 = alloca i1, i1 0
+  %nop16057 = alloca i1, i1 0
+  %nop16058 = alloca i1, i1 0
+  %nop16059 = alloca i1, i1 0
+  %nop16060 = alloca i1, i1 0
+  %nop16061 = alloca i1, i1 0
+  %nop16062 = alloca i1, i1 0
+  %nop16063 = alloca i1, i1 0
+  %nop16064 = alloca i1, i1 0
+  %nop16065 = alloca i1, i1 0
+  %nop16066 = alloca i1, i1 0
+  %nop16067 = alloca i1, i1 0
+  %nop16068 = alloca i1, i1 0
+  %nop16069 = alloca i1, i1 0
+  %nop16070 = alloca i1, i1 0
+  %nop16071 = alloca i1, i1 0
+  %nop16072 = alloca i1, i1 0
+  %nop16073 = alloca i1, i1 0
+  %nop16074 = alloca i1, i1 0
+  %nop16075 = alloca i1, i1 0
+  %nop16076 = alloca i1, i1 0
+  %nop16077 = alloca i1, i1 0
+  %nop16078 = alloca i1, i1 0
+  %nop16079 = alloca i1, i1 0
+  %nop16080 = alloca i1, i1 0
+  %nop16081 = alloca i1, i1 0
+  %nop16082 = alloca i1, i1 0
+  %nop16083 = alloca i1, i1 0
+  %nop16084 = alloca i1, i1 0
+  %nop16085 = alloca i1, i1 0
+  %nop16086 = alloca i1, i1 0
+  %nop16087 = alloca i1, i1 0
+  %nop16088 = alloca i1, i1 0
+  %nop16089 = alloca i1, i1 0
+  %nop16090 = alloca i1, i1 0
+  %nop16091 = alloca i1, i1 0
+  %nop16092 = alloca i1, i1 0
+  %nop16093 = alloca i1, i1 0
+  %nop16094 = alloca i1, i1 0
+  %nop16095 = alloca i1, i1 0
+  %nop16096 = alloca i1, i1 0
+  %nop16097 = alloca i1, i1 0
+  %nop16098 = alloca i1, i1 0
+  %nop16099 = alloca i1, i1 0
+  %nop16100 = alloca i1, i1 0
+  %nop16101 = alloca i1, i1 0
+  %nop16102 = alloca i1, i1 0
+  %nop16103 = alloca i1, i1 0
+  %nop16104 = alloca i1, i1 0
+  %nop16105 = alloca i1, i1 0
+  %nop16106 = alloca i1, i1 0
+  %nop16107 = alloca i1, i1 0
+  %nop16108 = alloca i1, i1 0
+  %nop16109 = alloca i1, i1 0
+  %nop16110 = alloca i1, i1 0
+  %nop16111 = alloca i1, i1 0
+  %nop16112 = alloca i1, i1 0
+  %nop16113 = alloca i1, i1 0
+  %nop16114 = alloca i1, i1 0
+  %nop16115 = alloca i1, i1 0
+  %nop16116 = alloca i1, i1 0
+  %nop16117 = alloca i1, i1 0
+  %nop16118 = alloca i1, i1 0
+  %nop16119 = alloca i1, i1 0
+  %nop16120 = alloca i1, i1 0
+  %nop16121 = alloca i1, i1 0
+  %nop16122 = alloca i1, i1 0
+  %nop16123 = alloca i1, i1 0
+  %nop16124 = alloca i1, i1 0
+  %nop16125 = alloca i1, i1 0
+  %nop16126 = alloca i1, i1 0
+  %nop16127 = alloca i1, i1 0
+  %nop16128 = alloca i1, i1 0
+  %nop16129 = alloca i1, i1 0
+  %nop16130 = alloca i1, i1 0
+  %nop16131 = alloca i1, i1 0
+  %nop16132 = alloca i1, i1 0
+  %nop16133 = alloca i1, i1 0
+  %nop16134 = alloca i1, i1 0
+  %nop16135 = alloca i1, i1 0
+  %nop16136 = alloca i1, i1 0
+  %nop16137 = alloca i1, i1 0
+  %nop16138 = alloca i1, i1 0
+  %nop16139 = alloca i1, i1 0
+  %nop16140 = alloca i1, i1 0
+  %nop16141 = alloca i1, i1 0
+  %nop16142 = alloca i1, i1 0
+  %nop16143 = alloca i1, i1 0
+  %nop16144 = alloca i1, i1 0
+  %nop16145 = alloca i1, i1 0
+  %nop16146 = alloca i1, i1 0
+  %nop16147 = alloca i1, i1 0
+  %nop16148 = alloca i1, i1 0
+  %nop16149 = alloca i1, i1 0
+  %nop16150 = alloca i1, i1 0
+  %nop16151 = alloca i1, i1 0
+  %nop16152 = alloca i1, i1 0
+  %nop16153 = alloca i1, i1 0
+  %nop16154 = alloca i1, i1 0
+  %nop16155 = alloca i1, i1 0
+  %nop16156 = alloca i1, i1 0
+  %nop16157 = alloca i1, i1 0
+  %nop16158 = alloca i1, i1 0
+  %nop16159 = alloca i1, i1 0
+  %nop16160 = alloca i1, i1 0
+  %nop16161 = alloca i1, i1 0
+  %nop16162 = alloca i1, i1 0
+  %nop16163 = alloca i1, i1 0
+  %nop16164 = alloca i1, i1 0
+  %nop16165 = alloca i1, i1 0
+  %nop16166 = alloca i1, i1 0
+  %nop16167 = alloca i1, i1 0
+  %nop16168 = alloca i1, i1 0
+  %nop16169 = alloca i1, i1 0
+  %nop16170 = alloca i1, i1 0
+  %nop16171 = alloca i1, i1 0
+  %nop16172 = alloca i1, i1 0
+  %nop16173 = alloca i1, i1 0
+  %nop16174 = alloca i1, i1 0
+  %nop16175 = alloca i1, i1 0
+  %nop16176 = alloca i1, i1 0
+  %nop16177 = alloca i1, i1 0
+  %nop16178 = alloca i1, i1 0
+  %nop16179 = alloca i1, i1 0
+  %nop16180 = alloca i1, i1 0
+  %nop16181 = alloca i1, i1 0
+  %nop16182 = alloca i1, i1 0
+  %nop16183 = alloca i1, i1 0
+  %nop16184 = alloca i1, i1 0
+  %nop16185 = alloca i1, i1 0
+  %nop16186 = alloca i1, i1 0
+  %nop16187 = alloca i1, i1 0
+  %nop16188 = alloca i1, i1 0
+  %nop16189 = alloca i1, i1 0
+  %nop16190 = alloca i1, i1 0
+  %nop16191 = alloca i1, i1 0
+  %nop16192 = alloca i1, i1 0
+  %nop16193 = alloca i1, i1 0
+  %nop16194 = alloca i1, i1 0
+  %nop16195 = alloca i1, i1 0
+  %nop16196 = alloca i1, i1 0
+  %nop16197 = alloca i1, i1 0
+  %nop16198 = alloca i1, i1 0
+  %nop16199 = alloca i1, i1 0
+  %nop16200 = alloca i1, i1 0
+  %nop16201 = alloca i1, i1 0
+  %nop16202 = alloca i1, i1 0
+  %nop16203 = alloca i1, i1 0
+  %nop16204 = alloca i1, i1 0
+  %nop16205 = alloca i1, i1 0
+  %nop16206 = alloca i1, i1 0
+  %nop16207 = alloca i1, i1 0
+  %nop16208 = alloca i1, i1 0
+  %nop16209 = alloca i1, i1 0
+  %nop16210 = alloca i1, i1 0
+  %nop16211 = alloca i1, i1 0
+  %nop16212 = alloca i1, i1 0
+  %nop16213 = alloca i1, i1 0
+  %nop16214 = alloca i1, i1 0
+  %nop16215 = alloca i1, i1 0
+  %nop16216 = alloca i1, i1 0
+  %nop16217 = alloca i1, i1 0
+  %nop16218 = alloca i1, i1 0
+  %nop16219 = alloca i1, i1 0
+  %nop16220 = alloca i1, i1 0
+  %nop16221 = alloca i1, i1 0
+  %nop16222 = alloca i1, i1 0
+  %nop16223 = alloca i1, i1 0
+  %nop16224 = alloca i1, i1 0
+  %nop16225 = alloca i1, i1 0
+  %nop16226 = alloca i1, i1 0
+  %nop16227 = alloca i1, i1 0
+  %nop16228 = alloca i1, i1 0
+  %nop16229 = alloca i1, i1 0
+  %nop16230 = alloca i1, i1 0
+  %nop16231 = alloca i1, i1 0
+  %nop16232 = alloca i1, i1 0
+  %nop16233 = alloca i1, i1 0
+  %nop16234 = alloca i1, i1 0
+  %nop16235 = alloca i1, i1 0
+  %nop16236 = alloca i1, i1 0
+  %nop16237 = alloca i1, i1 0
+  %nop16238 = alloca i1, i1 0
+  %nop16239 = alloca i1, i1 0
+  %nop16240 = alloca i1, i1 0
+  %nop16241 = alloca i1, i1 0
+  %nop16242 = alloca i1, i1 0
+  %nop16243 = alloca i1, i1 0
+  %nop16244 = alloca i1, i1 0
+  %nop16245 = alloca i1, i1 0
+  %nop16246 = alloca i1, i1 0
+  %nop16247 = alloca i1, i1 0
+  %nop16248 = alloca i1, i1 0
+  %nop16249 = alloca i1, i1 0
+  %nop16250 = alloca i1, i1 0
+  %nop16251 = alloca i1, i1 0
+  %nop16252 = alloca i1, i1 0
+  %nop16253 = alloca i1, i1 0
+  %nop16254 = alloca i1, i1 0
+  %nop16255 = alloca i1, i1 0
+  %nop16256 = alloca i1, i1 0
+  %nop16257 = alloca i1, i1 0
+  %nop16258 = alloca i1, i1 0
+  %nop16259 = alloca i1, i1 0
+  %nop16260 = alloca i1, i1 0
+  %nop16261 = alloca i1, i1 0
+  %nop16262 = alloca i1, i1 0
+  %nop16263 = alloca i1, i1 0
+  %nop16264 = alloca i1, i1 0
+  %nop16265 = alloca i1, i1 0
+  %nop16266 = alloca i1, i1 0
+  %nop16267 = alloca i1, i1 0
+  %nop16268 = alloca i1, i1 0
+  %nop16269 = alloca i1, i1 0
+  %nop16270 = alloca i1, i1 0
+  %nop16271 = alloca i1, i1 0
+  %nop16272 = alloca i1, i1 0
+  %nop16273 = alloca i1, i1 0
+  %nop16274 = alloca i1, i1 0
+  %nop16275 = alloca i1, i1 0
+  %nop16276 = alloca i1, i1 0
+  %nop16277 = alloca i1, i1 0
+  %nop16278 = alloca i1, i1 0
+  %nop16279 = alloca i1, i1 0
+  %nop16280 = alloca i1, i1 0
+  %nop16281 = alloca i1, i1 0
+  %nop16282 = alloca i1, i1 0
+  %nop16283 = alloca i1, i1 0
+  %nop16284 = alloca i1, i1 0
+  %nop16285 = alloca i1, i1 0
+  %nop16286 = alloca i1, i1 0
+  %nop16287 = alloca i1, i1 0
+  %nop16288 = alloca i1, i1 0
+  %nop16289 = alloca i1, i1 0
+  %nop16290 = alloca i1, i1 0
+  %nop16291 = alloca i1, i1 0
+  %nop16292 = alloca i1, i1 0
+  %nop16293 = alloca i1, i1 0
+  %nop16294 = alloca i1, i1 0
+  %nop16295 = alloca i1, i1 0
+  %nop16296 = alloca i1, i1 0
+  %nop16297 = alloca i1, i1 0
+  %nop16298 = alloca i1, i1 0
+  %nop16299 = alloca i1, i1 0
+  %nop16300 = alloca i1, i1 0
+  %nop16301 = alloca i1, i1 0
+  %nop16302 = alloca i1, i1 0
+  %nop16303 = alloca i1, i1 0
+  %nop16304 = alloca i1, i1 0
+  %nop16305 = alloca i1, i1 0
+  %nop16306 = alloca i1, i1 0
+  %nop16307 = alloca i1, i1 0
+  %nop16308 = alloca i1, i1 0
+  %nop16309 = alloca i1, i1 0
+  %nop16310 = alloca i1, i1 0
+  %nop16311 = alloca i1, i1 0
+  %nop16312 = alloca i1, i1 0
+  %nop16313 = alloca i1, i1 0
+  %nop16314 = alloca i1, i1 0
+  %nop16315 = alloca i1, i1 0
+  %nop16316 = alloca i1, i1 0
+  %nop16317 = alloca i1, i1 0
+  %nop16318 = alloca i1, i1 0
+  %nop16319 = alloca i1, i1 0
+  %nop16320 = alloca i1, i1 0
+  %nop16321 = alloca i1, i1 0
+  %nop16322 = alloca i1, i1 0
+  %nop16323 = alloca i1, i1 0
+  %nop16324 = alloca i1, i1 0
+  %nop16325 = alloca i1, i1 0
+  %nop16326 = alloca i1, i1 0
+  %nop16327 = alloca i1, i1 0
+  %nop16328 = alloca i1, i1 0
+  %nop16329 = alloca i1, i1 0
+  %nop16330 = alloca i1, i1 0
+  %nop16331 = alloca i1, i1 0
+  %nop16332 = alloca i1, i1 0
+  %nop16333 = alloca i1, i1 0
+  %nop16334 = alloca i1, i1 0
+  %nop16335 = alloca i1, i1 0
+  %nop16336 = alloca i1, i1 0
+  %nop16337 = alloca i1, i1 0
+  %nop16338 = alloca i1, i1 0
+  %nop16339 = alloca i1, i1 0
+  %nop16340 = alloca i1, i1 0
+  %nop16341 = alloca i1, i1 0
+  %nop16342 = alloca i1, i1 0
+  %nop16343 = alloca i1, i1 0
+  %nop16344 = alloca i1, i1 0
+  %nop16345 = alloca i1, i1 0
+  %nop16346 = alloca i1, i1 0
+  %nop16347 = alloca i1, i1 0
+  %nop16348 = alloca i1, i1 0
+  %nop16349 = alloca i1, i1 0
+  %nop16350 = alloca i1, i1 0
+  %nop16351 = alloca i1, i1 0
+  %nop16352 = alloca i1, i1 0
+  %nop16353 = alloca i1, i1 0
+  %nop16354 = alloca i1, i1 0
+  %nop16355 = alloca i1, i1 0
+  %nop16356 = alloca i1, i1 0
+  %nop16357 = alloca i1, i1 0
+  %nop16358 = alloca i1, i1 0
+  %nop16359 = alloca i1, i1 0
+  %nop16360 = alloca i1, i1 0
+  %nop16361 = alloca i1, i1 0
+  %nop16362 = alloca i1, i1 0
+  %nop16363 = alloca i1, i1 0
+  %nop16364 = alloca i1, i1 0
+  %nop16365 = alloca i1, i1 0
+  %nop16366 = alloca i1, i1 0
+  %nop16367 = alloca i1, i1 0
+  %nop16368 = alloca i1, i1 0
+  %nop16369 = alloca i1, i1 0
+  %nop16370 = alloca i1, i1 0
+  %nop16371 = alloca i1, i1 0
+  %nop16372 = alloca i1, i1 0
+  %nop16373 = alloca i1, i1 0
+  %nop16374 = alloca i1, i1 0
+  %nop16375 = alloca i1, i1 0
+  %nop16376 = alloca i1, i1 0
+  %nop16377 = alloca i1, i1 0
+  br label %for.inc
+
+for.inc:
+  %3 = load i32* %i, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+; CHECK:  addiu $sp, $sp, -8
+; CHECK:  sw  $ra, 0($sp)
+; CHECK:  lui $[[REG1:[0-9]+]], 65534
+; CHECK:  addiu $[[REG1]], $[[REG1]], -12
+; CHECK:  addu  $[[REG1]], $ra, $[[REG1]]
+; CHECK:  lw  $ra, 0($sp)
+; CHECK:  jr  $[[REG1]]
+; CHECK:  addiu $sp, $sp, 8
+
+for.end:
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
+  "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
+  "no-infs-fp-math"="false" "no-nans-fp-math"="false"
+  "stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
+  "use-soft-float"="false" }
diff --git a/test/MC/Mips/micromips-movcond-instructions.s b/test/MC/Mips/micromips-movcond-instructions.s
new file mode 100644
index 000000000000..5da8702d2200
--- /dev/null
+++ b/test/MC/Mips/micromips-movcond-instructions.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for move conditional instructions.
+#------------------------------------------------------------------------------
+# Move Conditional
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: movz    $9, $6, $7        # encoding: [0xe6,0x00,0x58,0x48]
+# CHECK-EL: movn    $9, $6, $7        # encoding: [0xe6,0x00,0x18,0x48]
+# CHECK-EL: movt    $9, $6, $fcc0     # encoding: [0x26,0x55,0x7b,0x09]
+# CHECK-EL: movf    $9, $6, $fcc0     # encoding: [0x26,0x55,0x7b,0x01]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: movz    $9, $6, $7        # encoding: [0x00,0xe6,0x48,0x58]
+# CHECK-EB: movn    $9, $6, $7        # encoding: [0x00,0xe6,0x48,0x18]
+# CHECK-EB: movt    $9, $6, $fcc0     # encoding: [0x55,0x26,0x09,0x7b]
+# CHECK-EB: movf    $9, $6, $fcc0     # encoding: [0x55,0x26,0x01,0x7b]
+     movz    $9, $6, $7
+     movn    $9, $6, $7
+     movt    $9, $6, $fcc0
+     movf    $9, $6, $fcc0
diff --git a/test/MC/Mips/micromips-multiply-instructions.s b/test/MC/Mips/micromips-multiply-instructions.s
new file mode 100644
index 000000000000..7c3c5185b25a
--- /dev/null
+++ b/test/MC/Mips/micromips-multiply-instructions.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for Multiply Add/Sub instructions.
+#------------------------------------------------------------------------------
+# Multiply Add/Sub Instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: madd   $4, $5    # encoding: [0xa4,0x00,0x3c,0xcb]
+# CHECK-EL: maddu  $4, $5    # encoding: [0xa4,0x00,0x3c,0xdb]
+# CHECK-EL: msub   $4, $5    # encoding: [0xa4,0x00,0x3c,0xeb]
+# CHECK-EL: msubu  $4, $5    # encoding: [0xa4,0x00,0x3c,0xfb]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: madd   $4, $5    # encoding: [0x00,0xa4,0xcb,0x3c]
+# CHECK-EB: maddu  $4, $5    # encoding: [0x00,0xa4,0xdb,0x3c]
+# CHECK-EB: msub   $4, $5    # encoding: [0x00,0xa4,0xeb,0x3c]
+# CHECK-EB: msubu  $4, $5    # encoding: [0x00,0xa4,0xfb,0x3c]
+    madd     $4, $5
+    maddu    $4, $5
+    msub     $4, $5
+    msubu    $4, $5
diff --git a/test/MC/Mips/micromips-relocations.s b/test/MC/Mips/micromips-relocations.s
new file mode 100644
index 000000000000..804dd2f595f7
--- /dev/null
+++ b/test/MC/Mips/micromips-relocations.s
@@ -0,0 +1,99 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: lui $2, %hi(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0xa2'A',0x41'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@ABS_HI,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_HI16
+# CHECK-FIXUP: addiu $2, $2, %lo(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0x42'A',0x30'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@ABS_LO,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_LO16
+# CHECK-FIXUP: lw $25, %call16(strchr)($gp)
+# CHECK-FIXUP:        # encoding: [0x3c'A',0xff'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: strchr@GOT_CALL,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_CALL16
+# CHECK-FIXUP: lw $3, %got(loop_1)($2)
+# CHECK-FIXUP:        # encoding: [0x62'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_1@GOT,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT16
+# CHECK-FIXUP: lui $2, %dtprel_hi(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0xa2'A',0x41'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@DTPREL_HI,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_DTPREL_HI16
+# CHECK-FIXUP: addiu $2, $2, %dtprel_lo(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0x42'A',0x30'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@DTPREL_LO,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_DTPREL_LO16
+# CHECK-FIXUP: lw $3, %got(loop_1)($2)
+# CHECK-FIXUP:        # encoding: [0x62'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_1@GOT,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT16
+# CHECK-FIXUP: lw $4, %got_disp(loop_2)($3)
+# CHECK-FIXUP:        # encoding: [0x83'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_2@GOT_DISP,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT_DISP
+# CHECK-FIXUP: lw $5, %got_page(loop_3)($4)
+# CHECK-FIXUP:        # encoding: [0xa4'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_3@GOT_PAGE,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT_PAGE
+# CHECK-FIXUP: lw $6, %got_ofst(loop_4)($5)
+# CHECK-FIXUP:        # encoding: [0xc5'A',0xfc'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: loop_4@GOT_OFST,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_GOT_OFST
+# CHECK-FIXUP: lui $2, %tprel_hi(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0xa2'A',0x41'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@TPREL_HI,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_TPREL_HI16
+# CHECK-FIXUP: addiu $2, $2, %tprel_lo(_gp_disp)
+# CHECK-FIXUP:        # encoding: [0x42'A',0x30'A',0x00,0x00]
+# CHECK-FIXUP:        # fixup A - offset: 0,
+# CHECK-FIXUP:          value: _gp_disp@TPREL_LO,
+# CHECK-FIXUP:          kind: fixup_MICROMIPS_TLS_TPREL_LO16
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_HI16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_LO16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_CALL16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_DTPREL_HI16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_DTPREL_LO16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT_DISP
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT_PAGE
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_GOT_OFST
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_TPREL_HI16
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_TLS_TPREL_LO16
+# CHECK-ELF: ]
+
+    lui    $2, %hi(_gp_disp)
+    addiu  $2, $2, %lo(_gp_disp)
+    lw     $25, %call16(strchr)($gp)
+    lw     $3, %got(loop_1)($2)
+    lui    $2, %dtprel_hi(_gp_disp)
+    addiu  $2, $2, %dtprel_lo(_gp_disp)
+    lw     $3, %got(loop_1)($2)
+    lw     $4, %got_disp(loop_2)($3)
+    lw     $5, %got_page(loop_3)($4)
+    lw     $6, %got_ofst(loop_4)($5)
+    lui    $2, %tprel_hi(_gp_disp)
+    addiu  $2, $2, %tprel_lo(_gp_disp)
diff --git a/test/MC/Mips/micromips-shift-instructions.s b/test/MC/Mips/micromips-shift-instructions.s
index 3b5060f5280e..bbb71ac72082 100644
--- a/test/MC/Mips/micromips-shift-instructions.s
+++ b/test/MC/Mips/micromips-shift-instructions.s
@@ -1,17 +1,31 @@
-# RUN: llvm-mc %s -triple=mipsel -show-encoding -mcpu=mips32r2 -mattr=micromips | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mcpu=mips32r2 -mattr=micromips | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mcpu=mips32r2 -mattr=micromips | FileCheck -check-prefix=CHECK-EB %s
 # Check that the assembler can handle the documented syntax
 # for shift instructions.
 #------------------------------------------------------------------------------
 # Shift Instructions
 #------------------------------------------------------------------------------
-# CHECK: sll    $4, $3, 7      # encoding: [0x00,0x38,0x83,0x00]
-# CHECK: sllv   $2, $3, $5     # encoding: [0x10,0x10,0x65,0x00]
-# CHECK: sra    $4, $3, 7      # encoding: [0x80,0x38,0x83,0x00]
-# CHECK: srav   $2, $3, $5     # encoding: [0x90,0x10,0x65,0x00]
-# CHECK: srl    $4, $3, 7      # encoding: [0x40,0x38,0x83,0x00]
-# CHECK: srlv   $2, $3, $5     # encoding: [0x50,0x10,0x65,0x00]
-# CHECK: rotr   $9, $6, 7      # encoding: [0xc0,0x38,0x26,0x01]
-# CHECK: rotrv  $9, $6, $7     # encoding: [0xd0,0x48,0xc7,0x00]
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: sll    $4, $3, 7      # encoding: [0x83,0x00,0x00,0x38]
+# CHECK-EL: sllv   $2, $3, $5     # encoding: [0x65,0x00,0x10,0x10]
+# CHECK-EL: sra    $4, $3, 7      # encoding: [0x83,0x00,0x80,0x38]
+# CHECK-EL: srav   $2, $3, $5     # encoding: [0x65,0x00,0x90,0x10]
+# CHECK-EL: srl    $4, $3, 7      # encoding: [0x83,0x00,0x40,0x38]
+# CHECK-EL: srlv   $2, $3, $5     # encoding: [0x65,0x00,0x50,0x10]
+# CHECK-EL: rotr   $9, $6, 7      # encoding: [0x26,0x01,0xc0,0x38]
+# CHECK-EL: rotrv  $9, $6, $7     # encoding: [0xc7,0x00,0xd0,0x48]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: sll $4, $3, 7         # encoding: [0x00,0x83,0x38,0x00]
+# CHECK-EB: sllv  $2, $3, $5      # encoding: [0x00,0x65,0x10,0x10]
+# CHECK-EB: sra $4, $3, 7         # encoding: [0x00,0x83,0x38,0x80]
+# CHECK-EB: srav  $2, $3, $5      # encoding: [0x00,0x65,0x10,0x90]
+# CHECK-EB: srl $4, $3, 7         # encoding: [0x00,0x83,0x38,0x40]
+# CHECK-EB: srlv  $2, $3, $5      # encoding: [0x00,0x65,0x10,0x50]
+# CHECK-EB: rotr  $9, $6, 7       # encoding: [0x01,0x26,0x38,0xc0]
+# CHECK-EB: rotrv $9, $6, $7      # encoding: [0x00,0xc7,0x48,0xd0]
      sll    $4, $3, 7
      sllv   $2, $3, $5
      sra    $4, $3, 7
diff --git a/test/MC/Mips/micromips-tailr.s b/test/MC/Mips/micromips-tailr.s
new file mode 100644
index 000000000000..0c21a7bf2107
--- /dev/null
+++ b/test/MC/Mips/micromips-tailr.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: foo:
+# CHECK-FIXUP:   addiu $2, $zero, 1332
+# CHECK-FIXUP:         # encoding: [0x40,0x30,0x34,0x05]
+# CHECK-FIXUP:   j foo # encoding: [A,0xd4'A',A,0b000000AA]
+# CHECK-FIXUP:         #   fixup A - offset: 0,
+# CHECK-FIXUP:             value: foo, kind: fixup_MICROMIPS_26_S1
+# CHECK-FIXUP:   nop   # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_26_S1
+# CHECK-ELF: ]
+
+foo:
+  addiu $2, $0, 1332
+  j foo
diff --git a/test/MC/Mips/micromips-trap-instructions.s b/test/MC/Mips/micromips-trap-instructions.s
new file mode 100644
index 000000000000..404006c3e4c2
--- /dev/null
+++ b/test/MC/Mips/micromips-trap-instructions.s
@@ -0,0 +1,50 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EL %s
+# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: | FileCheck -check-prefix=CHECK-EB %s
+# Check that the assembler can handle the documented syntax
+# for miscellaneous instructions
+#------------------------------------------------------------------------------
+# Miscellaneous Instructions
+#------------------------------------------------------------------------------
+# Little endian
+#------------------------------------------------------------------------------
+# CHECK-EL: teq     $8, $9       # encoding: [0x28,0x01,0x3c,0x00]
+# CHECK-EL: tge     $8, $9       # encoding: [0x28,0x01,0x3c,0x02]
+# CHECK-EL: tgeu    $8, $9       # encoding: [0x28,0x01,0x3c,0x04]
+# CHECK-EL: tlt     $8, $9       # encoding: [0x28,0x01,0x3c,0x08]
+# CHECK-EL: tltu    $8, $9       # encoding: [0x28,0x01,0x3c,0x0a]
+# CHECK-EL: tne     $8, $9       # encoding: [0x28,0x01,0x3c,0x0c]
+# CHECK-EL: teqi    $9, 17767    # encoding: [0xc9,0x41,0x67,0x45]
+# CHECK-EL: tgei    $9, 17767    # encoding: [0x29,0x41,0x67,0x45]
+# CHECK-EL: tgeiu   $9, 17767    # encoding: [0x69,0x41,0x67,0x45]
+# CHECK-EL: tlti    $9, 17767    # encoding: [0x09,0x41,0x67,0x45]
+# CHECK-EL: tltiu   $9, 17767    # encoding: [0x49,0x41,0x67,0x45]
+# CHECK-EL: tnei    $9, 17767    # encoding: [0x89,0x41,0x67,0x45]
+#------------------------------------------------------------------------------
+# Big endian
+#------------------------------------------------------------------------------
+# CHECK-EB: teq     $8, $9       # encoding: [0x01,0x28,0x00,0x3c]
+# CHECK-EB: tge     $8, $9       # encoding: [0x01,0x28,0x02,0x3c]
+# CHECK-EB: tgeu    $8, $9       # encoding: [0x01,0x28,0x04,0x3c]
+# CHECK-EB: tlt     $8, $9       # encoding: [0x01,0x28,0x08,0x3c]
+# CHECK-EB: tltu    $8, $9       # encoding: [0x01,0x28,0x0a,0x3c]
+# CHECK-EB: tne     $8, $9       # encoding: [0x01,0x28,0x0c,0x3c]
+# CHECK-EB: teqi    $9, 17767    # encoding: [0x41,0xc9,0x45,0x67]
+# CHECK-EB: tgei    $9, 17767    # encoding: [0x41,0x29,0x45,0x67]
+# CHECK-EB: tgeiu   $9, 17767    # encoding: [0x41,0x69,0x45,0x67]
+# CHECK-EB: tlti    $9, 17767    # encoding: [0x41,0x09,0x45,0x67]
+# CHECK-EB: tltiu   $9, 17767    # encoding: [0x41,0x49,0x45,0x67]
+# CHECK-EB: tnei    $9, 17767    # encoding: [0x41,0x89,0x45,0x67]
+    teq     $8, $9, 0
+    tge     $8, $9, 0
+    tgeu    $8, $9, 0
+    tlt     $8, $9, 0
+    tltu    $8, $9, 0
+    tne     $8, $9, 0
+    teqi    $9, 17767
+    tgei    $9, 17767
+    tgeiu   $9, 17767
+    tlti    $9, 17767
+    tltiu   $9, 17767
+    tnei    $9, 17767
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index 586e88bc4814..68a8da07c2b1 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -7,6 +7,7 @@
 # CHECK:  and    $9, $6, $7      # encoding: [0x24,0x48,0xc7,0x00]
 # CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
 # CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
+# CHECK:  andi   $9, $9, 17767   # encoding: [0x67,0x45,0x29,0x31]
 # CHECK:  clo    $6, $7          # encoding: [0x21,0x30,0xe6,0x70]
 # CHECK:  clz    $6, $7          # encoding: [0x20,0x30,0xe6,0x70]
 # CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
@@ -14,6 +15,7 @@
 # CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
 # CHECK:  ori    $4, $5, 17767   # encoding: [0x67,0x45,0xa4,0x34]
 # CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
+# CHECK:  ori    $11, $11, 128   # encoding: [0x80,0x00,0x6b,0x35]
 # CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
 # CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
 # CHECK:  sll    $4, $3, 7       # encoding: [0xc0,0x21,0x03,0x00]
@@ -28,13 +30,15 @@
 # CHECK:  srl    $4, $3, 7       # encoding: [0xc2,0x21,0x03,0x00]
 # CHECK:  srlv   $2, $3, $5      # encoding: [0x06,0x10,0xa3,0x00]
 # CHECK:  xor    $3, $3, $5      # encoding: [0x26,0x18,0x65,0x00]
-# CHECK:  xori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
 # CHECK:  xori   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  xori   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  xori   $11, $11, 12    # encoding: [0x0c,0x00,0x6b,0x39]
 # CHECK:  wsbh   $6, $7          # encoding: [0xa0,0x30,0x07,0x7c]
 # CHECK:  not    $7, $8          # encoding: [0x27,0x38,0x00,0x01]
      and    $9,  $6, $7
      and    $9,  $6, 17767
      andi   $9,  $6, 17767
+     andi   $9,  17767
      clo    $6,  $7
      clz    $6,  $7
      ins    $19, $9, 6,7
@@ -42,6 +46,7 @@
      or     $3,  $3, $5
      or     $4,  $5, 17767
      ori    $9,  $6, 17767
+     ori    $11, 128
      rotr   $9,  $6, 7
      rotrv  $9,  $6, $7
      sll    $4,  $3, 7
@@ -58,6 +63,7 @@
      xor    $3,  $3, $5
      xor    $9,  $6, 17767
      xori   $9,  $6, 17767
+     xori   $11, 12
      wsbh   $6,  $7
      not    $7  ,$8
 
@@ -69,7 +75,9 @@
 # CHECK:  addi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x20]
 # CHECK:  addiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x24]
 # CHECK:  addi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x20]
+# CHECK:  addi   $9, $9, 17767   # encoding: [0x67,0x45,0x29,0x21]
 # CHECK:  addiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x24]
+# CHECK:  addiu  $11, $11, 40    # encoding: [0x28,0x00,0x6b,0x25]
 # CHECK:  addu   $9, $6, $7      # encoding: [0x21,0x48,0xc7,0x00]
 # CHECK:  madd   $6, $7          # encoding: [0x00,0x00,0xc7,0x70]
 # CHECK:  maddu  $6, $7          # encoding: [0x01,0x00,0xc7,0x70]
@@ -78,7 +86,9 @@
 # CHECK:  mult   $3, $5          # encoding: [0x18,0x00,0x65,0x00]
 # CHECK:  multu  $3, $5          # encoding: [0x19,0x00,0x65,0x00]
 # CHECK:  sub    $9, $6, $7      # encoding: [0x22,0x48,0xc7,0x00]
+# CHECK:  addi   $sp, $sp, -56   # encoding: [0xc8,0xff,0xbd,0x23]
 # CHECK:  subu   $4, $3, $5      # encoding: [0x23,0x20,0x65,0x00]
+# CHECK:  addiu   $sp, $sp, -40  # encoding: [0xd8,0xff,0xbd,0x27]
 # CHECK:  neg     $6, $7         # encoding: [0x22,0x30,0x07,0x00]
 # CHECK:  negu    $6, $7         # encoding: [0x23,0x30,0x07,0x00]
 # CHECK:  move    $7, $8         # encoding: [0x21,0x38,0x00,0x01]
@@ -90,7 +100,9 @@
     add    $9,$6,17767
     addu   $9,$6,-15001
     addi   $9,$6,17767
+    addi   $9,17767
     addiu  $9,$6,-15001
+    addiu  $11,40
     addu   $9,$6,$7
     madd   $6,$7
     maddu  $6,$7
@@ -99,7 +111,9 @@
     mult   $3,$5
     multu  $3,$5
     sub    $9,$6,$7
+    sub    $sp,$sp,56
     subu   $4,$3,$5
+    subu    $sp,$sp,40
     neg    $6,$7
     negu   $6,$7
     move   $7,$8
diff --git a/test/MC/Mips/mips-control-instructions.s b/test/MC/Mips/mips-control-instructions.s
new file mode 100644
index 000000000000..4a16c535637a
--- /dev/null
+++ b/test/MC/Mips/mips-control-instructions.s
@@ -0,0 +1,106 @@
+# RUN: llvm-mc %s -triple=mips-unknown-unknown -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck -check-prefix=CHECK32  %s
+# RUN: llvm-mc %s -triple=mips64-unknown-unknown -show-encoding -mcpu=mips64r2 \
+# RUN: | FileCheck -check-prefix=CHECK64  %s
+
+# CHECK32:    break                      # encoding: [0x00,0x00,0x00,0x0d]
+# CHECK32:    break   7, 0               # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK32:    break   7, 5               # encoding: [0x00,0x07,0x01,0x4d]
+# CHECK32:    syscall                    # encoding: [0x00,0x00,0x00,0x0c]
+# CHECK32:    syscall 13396              # encoding: [0x00,0x0d,0x15,0x0c]
+# CHECK32:    eret                       # encoding: [0x42,0x00,0x00,0x18]
+# CHECK32:    deret                      # encoding: [0x42,0x00,0x00,0x1f]
+# CHECK32:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK32:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK32:    di      $10                # encoding: [0x41,0x6a,0x60,0x00]
+# CHECK32:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK32:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK32:    ei      $10                # encoding: [0x41,0x6a,0x60,0x20]
+# CHECK32:    wait                       # encoding: [0x42,0x00,0x00,0x20]
+# CHECK32:    teq     $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+# CHECK32:    teq     $zero, $3, 1       # encoding: [0x00,0x03,0x00,0x74]
+# CHECK32:    teqi    $3, 1              # encoding: [0x04,0x6c,0x00,0x01]
+# CHECK32:    tge     $zero, $3          # encoding: [0x00,0x03,0x00,0x30]
+# CHECK32:    tge     $zero, $3, 3       # encoding: [0x00,0x03,0x00,0xf0]
+# CHECK32:    tgei    $3, 3              # encoding: [0x04,0x68,0x00,0x03]
+# CHECK32:    tgeu    $zero, $3          # encoding: [0x00,0x03,0x00,0x31]
+# CHECK32:    tgeu    $zero, $3, 7       # encoding: [0x00,0x03,0x01,0xf1]
+# CHECK32:    tgeiu   $3, 7              # encoding: [0x04,0x69,0x00,0x07]
+# CHECK32:    tlt     $zero, $3          # encoding: [0x00,0x03,0x00,0x32]
+# CHECK32:    tlt     $zero, $3, 31      # encoding: [0x00,0x03,0x07,0xf2]
+# CHECK32:    tlti    $3, 31             # encoding: [0x04,0x6a,0x00,0x1f]
+# CHECK32:    tltu    $zero, $3          # encoding: [0x00,0x03,0x00,0x33]
+# CHECK32:    tltu    $zero, $3, 255     # encoding: [0x00,0x03,0x3f,0xf3]
+# CHECK32:    tltiu   $3, 255            # encoding: [0x04,0x6b,0x00,0xff]
+# CHECK32:    tne     $zero, $3          # encoding: [0x00,0x03,0x00,0x36]
+# CHECK32:    tne     $zero, $3, 1023    # encoding: [0x00,0x03,0xff,0xf6]
+# CHECK32:    tnei    $3, 1023           # encoding: [0x04,0x6e,0x03,0xff]
+
+# CHECK64:    break                      # encoding: [0x00,0x00,0x00,0x0d]
+# CHECK64:    break   7, 0               # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK64:    break   7, 5               # encoding: [0x00,0x07,0x01,0x4d]
+# CHECK64:    syscall                    # encoding: [0x00,0x00,0x00,0x0c]
+# CHECK64:    syscall 13396              # encoding: [0x00,0x0d,0x15,0x0c]
+# CHECK64:    eret                       # encoding: [0x42,0x00,0x00,0x18]
+# CHECK64:    deret                      # encoding: [0x42,0x00,0x00,0x1f]
+# CHECK64:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK64:    di                         # encoding: [0x41,0x60,0x60,0x00]
+# CHECK64:    di      $10                # encoding: [0x41,0x6a,0x60,0x00]
+# CHECK64:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK64:    ei                         # encoding: [0x41,0x60,0x60,0x20]
+# CHECK64:    ei      $10                # encoding: [0x41,0x6a,0x60,0x20]
+# CHECK64:    wait                       # encoding: [0x42,0x00,0x00,0x20]
+# CHECK64:    teq     $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+# CHECK64:    teq     $zero, $3, 1       # encoding: [0x00,0x03,0x00,0x74]
+# CHECK64:    teqi    $3, 1              # encoding: [0x04,0x6c,0x00,0x01]
+# CHECK64:    tge     $zero, $3          # encoding: [0x00,0x03,0x00,0x30]
+# CHECK64:    tge     $zero, $3, 3       # encoding: [0x00,0x03,0x00,0xf0]
+# CHECK64:    tgei    $3, 3              # encoding: [0x04,0x68,0x00,0x03]
+# CHECK64:    tgeu    $zero, $3          # encoding: [0x00,0x03,0x00,0x31]
+# CHECK64:    tgeu    $zero, $3, 7       # encoding: [0x00,0x03,0x01,0xf1]
+# CHECK64:    tgeiu   $3, 7              # encoding: [0x04,0x69,0x00,0x07]
+# CHECK64:    tlt     $zero, $3          # encoding: [0x00,0x03,0x00,0x32]
+# CHECK64:    tlt     $zero, $3, 31      # encoding: [0x00,0x03,0x07,0xf2]
+# CHECK64:    tlti    $3, 31             # encoding: [0x04,0x6a,0x00,0x1f]
+# CHECK64:    tltu    $zero, $3          # encoding: [0x00,0x03,0x00,0x33]
+# CHECK64:    tltu    $zero, $3, 255     # encoding: [0x00,0x03,0x3f,0xf3]
+# CHECK64:    tltiu   $3, 255            # encoding: [0x04,0x6b,0x00,0xff]
+# CHECK64:    tne     $zero, $3          # encoding: [0x00,0x03,0x00,0x36]
+# CHECK64:    tne     $zero, $3, 1023    # encoding: [0x00,0x03,0xff,0xf6]
+# CHECK64:    tnei    $3, 1023           # encoding: [0x04,0x6e,0x03,0xff]
+
+    break
+    break 7
+    break 7,5
+    syscall
+    syscall 0x3454
+    eret
+    deret
+    di
+    di  $0
+    di  $10
+
+    ei
+    ei  $0
+    ei  $10
+
+    wait
+
+    teq   $0,$3
+    teq   $0,$3,1
+    teqi  $3,1
+    tge   $0,$3
+    tge   $0,$3,3
+    tgei  $3,3
+    tgeu  $0,$3
+    tgeu  $0,$3,7
+    tgeiu $3,7
+    tlt   $0,$3
+    tlt   $0,$3,31
+    tlti  $3,31
+    tltu  $0,$3
+    tltu  $0,$3,255
+    tltiu $3,255
+    tne   $0,$3
+    tne   $0,$3,1023
+    tnei  $3,1023
diff --git a/test/MC/Mips/mips-dsp-instructions.s b/test/MC/Mips/mips-dsp-instructions.s
new file mode 100644
index 000000000000..5a9e8ea9db39
--- /dev/null
+++ b/test/MC/Mips/mips-dsp-instructions.s
@@ -0,0 +1,97 @@
+# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dspr2 %s | FileCheck %s
+#
+# CHECK:   .text
+# CHECK:   precrq.qb.ph      $16, $17, $18   # encoding: [0x7e,0x32,0x83,0x11]
+# CHECK:   precrq.ph.w       $17, $18, $19   # encoding: [0x7e,0x53,0x8d,0x11]
+# CHECK:   precrq_rs.ph.w    $18, $19, $20   # encoding: [0x7e,0x74,0x95,0x51]
+# CHECK:   precrqu_s.qb.ph   $19, $20, $21   # encoding: [0x7e,0x95,0x9b,0xd1]
+# CHECK:   preceq.w.phl      $20, $21        # encoding: [0x7c,0x15,0xa3,0x12]
+# CHECK:   preceq.w.phr      $21, $22        # encoding: [0x7c,0x16,0xab,0x52]
+# CHECK:   precequ.ph.qbl    $22, $23        # encoding: [0x7c,0x17,0xb1,0x12]
+# CHECK:   precequ.ph.qbr    $23, $24        # encoding: [0x7c,0x18,0xb9,0x52]
+# CHECK:   precequ.ph.qbla   $24, $25        # encoding: [0x7c,0x19,0xc1,0x92]
+# CHECK:   precequ.ph.qbra   $25, $26        # encoding: [0x7c,0x1a,0xc9,0xd2]
+# CHECK:   preceu.ph.qbl     $26, $27        # encoding: [0x7c,0x1b,0xd7,0x12]
+# CHECK:   preceu.ph.qbr     $27, $gp        # encoding: [0x7c,0x1c,0xdf,0x52]
+# CHECK:   preceu.ph.qbla    $gp, $sp        # encoding: [0x7c,0x1d,0xe7,0x92]
+# CHECK:   preceu.ph.qbra    $sp, $fp        # encoding: [0x7c,0x1e,0xef,0xd2]
+
+# CHECK:   precr.qb.ph       $23, $24, $25   # encoding: [0x7f,0x19,0xbb,0x51]
+# CHECK:   precr_sra.ph.w    $24, $25, 0     # encoding: [0x7f,0x38,0x07,0x91]
+# CHECK:   precr_sra.ph.w    $24, $25, 31    # encoding: [0x7f,0x38,0xff,0x91]
+# CHECK:   precr_sra_r.ph.w  $25, $26, 0     # encoding: [0x7f,0x59,0x07,0xd1]
+# CHECK:   precr_sra_r.ph.w  $25, $26, 31    # encoding: [0x7f,0x59,0xff,0xd1]
+
+# CHECK:   lbux $10, $20($26)                # encoding: [0x7f,0x54,0x51,0x8a]
+# CHECK:   lhx  $11, $21($27)                # encoding: [0x7f,0x75,0x59,0x0a]
+# CHECK:   lwx  $12, $22($gp)                # encoding: [0x7f,0x96,0x60,0x0a]
+
+# CHECK:    mult $ac3, $2, $3               # encoding: [0x00,0x43,0x18,0x18]
+# CHECK:    multu $ac2, $4, $5              # encoding: [0x00,0x85,0x10,0x19]
+# CHECK:    madd $ac1, $6, $7               # encoding: [0x70,0xc7,0x08,0x00]
+# CHECK:    maddu $ac0, $8, $9              # encoding: [0x71,0x09,0x00,0x01]
+# CHECK:    msub $ac3, $10, $11             # encoding: [0x71,0x4b,0x18,0x04]
+# CHECK:    msubu $ac2, $12, $13            # encoding: [0x71,0x8d,0x10,0x05]
+# CHECK:    mfhi $14, $ac1                  # encoding: [0x00,0x20,0x70,0x10]
+# CHECK:    mflo $15, $ac0                  # encoding: [0x00,0x00,0x78,0x12]
+# CHECK:    mthi $16, $ac3                  # encoding: [0x02,0x00,0x18,0x11]
+# CHECK:    mtlo $17, $ac2                  # encoding: [0x02,0x20,0x10,0x13]
+
+# CHECK:    mult $2, $3                      # encoding: [0x00,0x43,0x00,0x18]
+# CHECK:    multu $4, $5                     # encoding: [0x00,0x85,0x00,0x19]
+# CHECK:    madd $6, $7                      # encoding: [0x70,0xc7,0x00,0x00]
+# CHECK:    maddu $8, $9                     # encoding: [0x71,0x09,0x00,0x01]
+# CHECK:    msub $10, $11                    # encoding: [0x71,0x4b,0x00,0x04]
+# CHECK:    msubu $12, $13                   # encoding: [0x71,0x8d,0x00,0x05]
+# CHECK:    mfhi $14                         # encoding: [0x00,0x00,0x70,0x10]
+# CHECK:    mflo $15                         # encoding: [0x00,0x00,0x78,0x12]
+# CHECK:    mthi $16                         # encoding: [0x02,0x00,0x00,0x11]
+# CHECK:    mtlo $17                         # encoding: [0x02,0x20,0x00,0x13]
+
+
+  precrq.qb.ph    $16,$17,$18
+  precrq.ph.w     $17,$18,$19
+  precrq_rs.ph.w  $18,$19,$20
+  precrqu_s.qb.ph $19,$20,$21
+  preceq.w.phl    $20,$21
+  preceq.w.phr    $21,$22
+  precequ.ph.qbl  $22,$23
+  precequ.ph.qbr  $23,$24
+  precequ.ph.qbla $24,$25
+  precequ.ph.qbra $25,$26
+  preceu.ph.qbl   $26,$27
+  preceu.ph.qbr   $27,$28
+  preceu.ph.qbla  $28,$29
+  preceu.ph.qbra  $29,$30
+
+  precr.qb.ph     $23,$24,$25
+  precr_sra.ph.w  $24,$25,0
+  precr_sra.ph.w  $24,$25,31
+  precr_sra_r.ph.w  $25,$26,0
+  precr_sra_r.ph.w  $25,$26,31
+
+  lbux $10, $s4($26)
+  lhx  $11, $s5($27)
+  lwx  $12, $s6($28)
+
+  mult $ac3, $2, $3
+  multu $ac2, $4, $5
+  madd $ac1, $6, $7
+  maddu $ac0, $8, $9
+  msub $ac3, $10, $11
+  msubu $ac2, $12, $13
+  mfhi $14, $ac1
+  mflo $15, $ac0
+  mthi $16, $ac3
+  mtlo $17, $ac2
+
+  mult $2, $3
+  multu $4, $5
+  madd $6, $7
+  maddu $8, $9
+  msub $10, $11
+  msubu $12, $13
+  mfhi $14
+  mflo $15
+  mthi $16
+  mtlo $17
diff --git a/test/MC/Mips/mips-fpu-instructions.s b/test/MC/Mips/mips-fpu-instructions.s
index e515872f260a..bfaef9ecacc9 100644
--- a/test/MC/Mips/mips-fpu-instructions.s
+++ b/test/MC/Mips/mips-fpu-instructions.s
@@ -1,4 +1,5 @@
 # RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# RUN: llvm-mc  %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for FPU instructions.
 #------------------------------------------------------------------------------
@@ -137,8 +138,11 @@
 #------------------------------------------------------------------------------
 # FP move instructions
 #------------------------------------------------------------------------------
+# CHECK: bc1f    $BB_1                 # encoding: [A,A,0x00,0x45]
+# CHECK: #   fixup A - offset: 0, value: ($BB_1), kind: fixup_Mips_PC16
 
-# CHECK:  cfc1    $6, $fcc0            # encoding: [0x00,0x00,0x46,0x44]
+# CHECK:  cfc1    $6, $0               # encoding: [0x00,0x00,0x46,0x44]
+# CHECK:  ctc1    $10, $31             # encoding: [0x00,0xf8,0xca,0x44]
 # CHECK:  mfc1    $6, $f7              # encoding: [0x00,0x38,0x06,0x44]
 # CHECK:  mfhi    $5                   # encoding: [0x10,0x28,0x00,0x00]
 # CHECK:  mflo    $5                   # encoding: [0x12,0x28,0x00,0x00]
@@ -158,8 +162,22 @@
 # CHECK:  mtc2    $9, $4, 5               # encoding: [0x05,0x20,0x89,0x48]
 # CHECK:  movf    $2, $1, $fcc0           # encoding: [0x01,0x10,0x20,0x00]
 # CHECK:  movt    $2, $1, $fcc0           # encoding: [0x01,0x10,0x21,0x00]
-
+# CHECK:  movt    $4, $5, $fcc4           # encoding: [0x01,0x20,0xb1,0x00]
+# CHECK:  movf.d  $f4, $f6, $fcc2         # encoding: [0x11,0x31,0x28,0x46]
+# CHECK:  movf.s  $f4, $f6, $fcc5         # encoding: [0x11,0x31,0x14,0x46]
+# CHECK:  luxc1   $f0, $6($5)             # encoding: [0x05,0x00,0xa6,0x4c]
+# CHECK:  suxc1   $f4, $24($5)            # encoding: [0x0d,0x20,0xb8,0x4c]
+# CHECK:  lwxc1   $f20, $12($14)          # encoding: [0x00,0x05,0xcc,0x4d]
+# CHECK:  swxc1   $f26, $18($22)          # encoding: [0x08,0xd0,0xd2,0x4e]
+# CHECK:  mfhc1   $17, $f4                # encoding: [0x00,0x20,0x71,0x44]
+# CHECK:  mthc1   $17, $f6                # encoding: [0x00,0x30,0xf1,0x44]
+# CHECK:  swc2    $4, 16($sp)             # encoding: [0x10,0x00,0xa4,0xeb]
+# CHECK:  sdc2    $4, 16($sp)             # encoding: [0x10,0x00,0xa4,0xfb]
+# CHECK:  lwc2    $11, 12($ra)            # encoding: [0x0c,0x00,0xeb,0xcb]
+# CHECK:  ldc2    $11, 12($ra)            # encoding: [0x0c,0x00,0xeb,0xdb]
+   bc1f    $fcc0, $BB_1
    cfc1    $a2,$0
+   ctc1    $10,$31
    mfc1    $a2,$f7
    mfhi    $a1
    mflo    $a1
@@ -179,3 +197,16 @@
    mtc2    $9, $4, 5
    movf    $2, $1, $fcc0
    movt    $2, $1, $fcc0
+   movt    $4, $5, $fcc4
+   movf.d  $f4, $f6, $fcc2
+   movf.s  $f4, $f6, $fcc5
+   luxc1   $f0, $a2($a1)
+   suxc1   $f4, $t8($a1)
+   lwxc1   $f20, $12($14)
+   swxc1   $f26, $s2($s6)
+   mfhc1   $17, $f4
+   mthc1   $17, $f6
+   swc2    $4, 16($sp)
+   sdc2    $4, 16($sp)
+   lwc2    $11, 12($ra)
+   ldc2    $11, 12($ra)
diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s
index 597f6872d60f..989826a1a2b3 100644
--- a/test/MC/Mips/mips-jump-instructions.s
+++ b/test/MC/Mips/mips-jump-instructions.s
@@ -1,6 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
 # RUN: FileCheck -check-prefix=CHECK32  %s
-# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | \
 # RUN: FileCheck -check-prefix=CHECK64  %s
 
 # Check that the assembler can handle the documented syntax
@@ -26,7 +26,11 @@
 # CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
 # CHECK32:   bne $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x15]
 # CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK32:   bal     1332           # encoding: [0x4d,0x01,0x11,0x04]
+# CHECK32:   bal  1332              # encoding: [0x4d,0x01,0x11,0x04]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bnez $11, 1332         # encoding: [0x4d,0x01,0x60,0x15]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   beqz $11, 1332         # encoding: [0x4d,0x01,0x60,0x11]
 # CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
 
 # CHECK64:   b 1332                 # encoding: [0x4d,0x01,0x00,0x10]
@@ -49,6 +53,10 @@
 # CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
 # CHECK64:   bal     1332           # encoding: [0x4d,0x01,0x11,0x04]
 # CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bnez $11, 1332         # encoding: [0x4d,0x01,0x60,0x15]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   beqz $11, 1332         # encoding: [0x4d,0x01,0x60,0x11]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
 
 .set noreorder
 
@@ -72,6 +80,10 @@
          nop
          bal 1332
          nop
+         bnez $11,1332
+         nop
+         beqz $11,1332
+         nop
 
 end_of_code:
 #------------------------------------------------------------------------------
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
index db6c972b3b49..8262a46ee4f3 100644
--- a/test/MC/Mips/mips64-alu-instructions.s
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
 #------------------------------------------------------------------------------
@@ -69,8 +69,12 @@
 # CHECK:  daddi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x60]
 # CHECK:  daddiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x64]
 # CHECK:  daddi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x60]
+# CHECK:  daddi   $9, $9, 17767   # encoding: [0x67,0x45,0x29,0x61]
 # CHECK:  daddiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x64]
+# CHECK:  daddiu  $9, $9, -15001  # encoding: [0x67,0xc5,0x29,0x65]
 # CHECK:  daddu   $9, $6, $7      # encoding: [0x2d,0x48,0xc7,0x00]
+# CHECK:  drotr   $9, $6, 20      # encoding: [0x3a,0x4d,0x26,0x00]
+# CHECK:  drotr32 $9, $6, 52      # encoding: [0x3e,0x4d,0x26,0x00]
 # CHECK:  madd   $6, $7          # encoding: [0x00,0x00,0xc7,0x70]
 # CHECK:  maddu  $6, $7          # encoding: [0x01,0x00,0xc7,0x70]
 # CHECK:  msub   $6, $7          # encoding: [0x04,0x00,0xc7,0x70]
@@ -88,8 +92,12 @@
     dadd    $9,$6,17767
     daddu   $9,$6,-15001
     daddi   $9,$6,17767
+    daddi   $9,17767
     daddiu  $9,$6,-15001
+    daddiu  $9,-15001
     daddu   $9,$6,$7
+    drotr   $9, $6, 20
+    drotr32 $9, $6, 52
     madd   $6,$7
     maddu  $6,$7
     msub   $6,$7
diff --git a/test/MC/Mips/mips64-instructions.s b/test/MC/Mips/mips64-instructions.s
new file mode 100644
index 000000000000..74e9d13197e9
--- /dev/null
+++ b/test/MC/Mips/mips64-instructions.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc  %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+
+# CHECK: ldxc1 $f2, $2($10)           # encoding: [0x81,0x00,0x42,0x4d]
+# CHECK: sdxc1 $f8, $4($25)           # encoding: [0x09,0x40,0x24,0x4f]
+
+  ldxc1 $f2, $2($10)
+  sdxc1 $f8, $a0($t9)
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
index 45247cd162b5..44e707c89452 100644
--- a/test/MC/Mips/mips_directives.s
+++ b/test/MC/Mips/mips_directives.s
@@ -19,9 +19,11 @@ $BB0_2:
     .set    noat
 $JTI0_0:
     .gpword    ($BB0_2)
+
     .word 0x77fffffc
 # CHECK: $JTI0_0:
-# CHECK-NEXT:     .4byte    2013265916
+# CHECK: .gpword ($BB0_2)
+# CHECK:     .4byte    2013265916
     .set  at=$12
     .set macro
 # CHECK:   b 1332               # encoding: [0x10,0x00,0x01,0x4d]
@@ -37,7 +39,9 @@ $JTI0_0:
     .set  at=$a0
     .set STORE_MASK,$t7
     .set FPU_MASK,$f7
-#CHECK:    abs.s   $f6, $f7           # encoding: [0x46,0x00,0x39,0x85]
-#CHECK:    and     $3, $15, $15       # encoding: [0x01,0xef,0x18,0x24]
-    abs.s      $f6,FPU_MASK
-    and $3,$t7,STORE_MASK
+    .set r3,$3
+    .set f6,$f6
+# CHECK:    abs.s   $f6, $f7           # encoding: [0x46,0x00,0x39,0x85]
+# CHECK:    and     $3, $15, $15       # encoding: [0x01,0xef,0x18,0x24]
+    abs.s  f6,FPU_MASK
+    and    r3,$t7,STORE_MASK
diff --git a/test/MC/Mips/msa/test_2r.s b/test/MC/Mips/msa/test_2r.s
new file mode 100644
index 000000000000..67a2b6f0164e
--- /dev/null
+++ b/test/MC/Mips/msa/test_2r.s
@@ -0,0 +1,51 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        fill.b  $w30, $9                # encoding: [0x7b,0x00,0x4f,0x9e]
+# CHECK:        fill.h  $w31, $23               # encoding: [0x7b,0x01,0xbf,0xde]
+# CHECK:        fill.w  $w16, $24               # encoding: [0x7b,0x02,0xc4,0x1e]
+# CHECK:        nloc.b  $w21, $w0               # encoding: [0x7b,0x08,0x05,0x5e]
+# CHECK:        nloc.h  $w18, $w31              # encoding: [0x7b,0x09,0xfc,0x9e]
+# CHECK:        nloc.w  $w2, $w23               # encoding: [0x7b,0x0a,0xb8,0x9e]
+# CHECK:        nloc.d  $w4, $w10               # encoding: [0x7b,0x0b,0x51,0x1e]
+# CHECK:        nlzc.b  $w31, $w2               # encoding: [0x7b,0x0c,0x17,0xde]
+# CHECK:        nlzc.h  $w27, $w22              # encoding: [0x7b,0x0d,0xb6,0xde]
+# CHECK:        nlzc.w  $w10, $w29              # encoding: [0x7b,0x0e,0xea,0x9e]
+# CHECK:        nlzc.d  $w25, $w9               # encoding: [0x7b,0x0f,0x4e,0x5e]
+# CHECK:        pcnt.b  $w20, $w18              # encoding: [0x7b,0x04,0x95,0x1e]
+# CHECK:        pcnt.h  $w0, $w8                # encoding: [0x7b,0x05,0x40,0x1e]
+# CHECK:        pcnt.w  $w23, $w9               # encoding: [0x7b,0x06,0x4d,0xde]
+# CHECK:        pcnt.d  $w21, $w24              # encoding: [0x7b,0x07,0xc5,0x5e]
+
+# CHECKOBJDUMP:        fill.b  $w30, $9
+# CHECKOBJDUMP:        fill.h  $w31, $23
+# CHECKOBJDUMP:        fill.w  $w16, $24
+# CHECKOBJDUMP:        nloc.b  $w21, $w0
+# CHECKOBJDUMP:        nloc.h  $w18, $w31
+# CHECKOBJDUMP:        nloc.w  $w2, $w23
+# CHECKOBJDUMP:        nloc.d  $w4, $w10
+# CHECKOBJDUMP:        nlzc.b  $w31, $w2
+# CHECKOBJDUMP:        nlzc.h  $w27, $w22
+# CHECKOBJDUMP:        nlzc.w  $w10, $w29
+# CHECKOBJDUMP:        nlzc.d  $w25, $w9
+# CHECKOBJDUMP:        pcnt.b  $w20, $w18
+# CHECKOBJDUMP:        pcnt.h  $w0, $w8
+# CHECKOBJDUMP:        pcnt.w  $w23, $w9
+# CHECKOBJDUMP:        pcnt.d  $w21, $w24
+
+                fill.b  $w30, $9
+                fill.h  $w31, $23
+                fill.w  $w16, $24
+                nloc.b  $w21, $w0
+                nloc.h  $w18, $w31
+                nloc.w  $w2, $w23
+                nloc.d  $w4, $w10
+                nlzc.b  $w31, $w2
+                nlzc.h  $w27, $w22
+                nlzc.w  $w10, $w29
+                nlzc.d  $w25, $w9
+                pcnt.b  $w20, $w18
+                pcnt.h  $w0, $w8
+                pcnt.w  $w23, $w9
+                pcnt.d  $w21, $w24
diff --git a/test/MC/Mips/msa/test_2rf.s b/test/MC/Mips/msa/test_2rf.s
new file mode 100644
index 000000000000..64025a41e1bf
--- /dev/null
+++ b/test/MC/Mips/msa/test_2rf.s
@@ -0,0 +1,102 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        fclass.w        $w26, $w12              # encoding: [0x7b,0x20,0x66,0x9e]
+# CHECK:        fclass.d        $w24, $w17              # encoding: [0x7b,0x21,0x8e,0x1e]
+# CHECK:        fexupl.w        $w8, $w0                # encoding: [0x7b,0x30,0x02,0x1e]
+# CHECK:        fexupl.d        $w17, $w29              # encoding: [0x7b,0x31,0xec,0x5e]
+# CHECK:        fexupr.w        $w13, $w4               # encoding: [0x7b,0x32,0x23,0x5e]
+# CHECK:        fexupr.d        $w5, $w2                # encoding: [0x7b,0x33,0x11,0x5e]
+# CHECK:        ffint_s.w       $w20, $w29              # encoding: [0x7b,0x3c,0xed,0x1e]
+# CHECK:        ffint_s.d       $w12, $w15              # encoding: [0x7b,0x3d,0x7b,0x1e]
+# CHECK:        ffint_u.w       $w7, $w27               # encoding: [0x7b,0x3e,0xd9,0xde]
+# CHECK:        ffint_u.d       $w19, $w16              # encoding: [0x7b,0x3f,0x84,0xde]
+# CHECK:        ffql.w          $w31, $w13              # encoding: [0x7b,0x34,0x6f,0xde]
+# CHECK:        ffql.d          $w12, $w13              # encoding: [0x7b,0x35,0x6b,0x1e]
+# CHECK:        ffqr.w          $w27, $w30              # encoding: [0x7b,0x36,0xf6,0xde]
+# CHECK:        ffqr.d          $w30, $w15              # encoding: [0x7b,0x37,0x7f,0x9e]
+# CHECK:        flog2.w         $w25, $w31              # encoding: [0x7b,0x2e,0xfe,0x5e]
+# CHECK:        flog2.d         $w18, $w10              # encoding: [0x7b,0x2f,0x54,0x9e]
+# CHECK:        frint.w         $w7, $w15               # encoding: [0x7b,0x2c,0x79,0xde]
+# CHECK:        frint.d         $w21, $w22              # encoding: [0x7b,0x2d,0xb5,0x5e]
+# CHECK:        frcp.w          $w19, $w0               # encoding: [0x7b,0x2a,0x04,0xde]
+# CHECK:        frcp.d          $w4, $w14               # encoding: [0x7b,0x2b,0x71,0x1e]
+# CHECK:        frsqrt.w        $w12, $w17              # encoding: [0x7b,0x28,0x8b,0x1e]
+# CHECK:        frsqrt.d        $w23, $w11              # encoding: [0x7b,0x29,0x5d,0xde]
+# CHECK:        fsqrt.w         $w0, $w11               # encoding: [0x7b,0x26,0x58,0x1e]
+# CHECK:        fsqrt.d         $w15, $w12              # encoding: [0x7b,0x27,0x63,0xde]
+# CHECK:        ftint_s.w       $w30, $w5               # encoding: [0x7b,0x38,0x2f,0x9e]
+# CHECK:        ftint_s.d       $w5, $w23               # encoding: [0x7b,0x39,0xb9,0x5e]
+# CHECK:        ftint_u.w       $w20, $w14              # encoding: [0x7b,0x3a,0x75,0x1e]
+# CHECK:        ftint_u.d       $w23, $w21              # encoding: [0x7b,0x3b,0xad,0xde]
+# CHECK:        ftrunc_s.w      $w29, $w17              # encoding: [0x7b,0x22,0x8f,0x5e]
+# CHECK:        ftrunc_s.d      $w12, $w27              # encoding: [0x7b,0x23,0xdb,0x1e]
+# CHECK:        ftrunc_u.w      $w17, $w15              # encoding: [0x7b,0x24,0x7c,0x5e]
+# CHECK:        ftrunc_u.d      $w5, $w27               # encoding: [0x7b,0x25,0xd9,0x5e]
+
+# CHECKOBJDUMP:        fclass.w        $w26, $w12
+# CHECKOBJDUMP:        fclass.d        $w24, $w17
+# CHECKOBJDUMP:        fexupl.w        $w8, $w0
+# CHECKOBJDUMP:        fexupl.d        $w17, $w29
+# CHECKOBJDUMP:        fexupr.w        $w13, $w4
+# CHECKOBJDUMP:        fexupr.d        $w5, $w2
+# CHECKOBJDUMP:        ffint_s.w       $w20, $w29
+# CHECKOBJDUMP:        ffint_s.d       $w12, $w15
+# CHECKOBJDUMP:        ffint_u.w       $w7, $w27
+# CHECKOBJDUMP:        ffint_u.d       $w19, $w16
+# CHECKOBJDUMP:        ffql.w          $w31, $w13
+# CHECKOBJDUMP:        ffql.d          $w12, $w13
+# CHECKOBJDUMP:        ffqr.w          $w27, $w30
+# CHECKOBJDUMP:        ffqr.d          $w30, $w15
+# CHECKOBJDUMP:        flog2.w         $w25, $w31
+# CHECKOBJDUMP:        flog2.d         $w18, $w10
+# CHECKOBJDUMP:        frint.w         $w7, $w15
+# CHECKOBJDUMP:        frint.d         $w21, $w22
+# CHECKOBJDUMP:        frcp.w          $w19, $w0
+# CHECKOBJDUMP:        frcp.d          $w4, $w14
+# CHECKOBJDUMP:        frsqrt.w        $w12, $w17
+# CHECKOBJDUMP:        frsqrt.d        $w23, $w11
+# CHECKOBJDUMP:        fsqrt.w         $w0, $w11
+# CHECKOBJDUMP:        fsqrt.d         $w15, $w12
+# CHECKOBJDUMP:        ftint_s.w       $w30, $w5
+# CHECKOBJDUMP:        ftint_s.d       $w5, $w23
+# CHECKOBJDUMP:        ftint_u.w       $w20, $w14
+# CHECKOBJDUMP:        ftint_u.d       $w23, $w21
+# CHECKOBJDUMP:        ftrunc_s.w      $w29, $w17
+# CHECKOBJDUMP:        ftrunc_s.d      $w12, $w27
+# CHECKOBJDUMP:        ftrunc_u.w      $w17, $w15
+# CHECKOBJDUMP:        ftrunc_u.d      $w5, $w27
+
+                fclass.w        $w26, $w12
+                fclass.d        $w24, $w17
+                fexupl.w        $w8, $w0
+                fexupl.d        $w17, $w29
+                fexupr.w        $w13, $w4
+                fexupr.d        $w5, $w2
+                ffint_s.w       $w20, $w29
+                ffint_s.d       $w12, $w15
+                ffint_u.w       $w7, $w27
+                ffint_u.d       $w19, $w16
+                ffql.w          $w31, $w13
+                ffql.d          $w12, $w13
+                ffqr.w          $w27, $w30
+                ffqr.d          $w30, $w15
+                flog2.w         $w25, $w31
+                flog2.d         $w18, $w10
+                frint.w         $w7, $w15
+                frint.d         $w21, $w22
+                frcp.w          $w19, $w0
+                frcp.d          $w4, $w14
+                frsqrt.w        $w12, $w17
+                frsqrt.d        $w23, $w11
+                fsqrt.w         $w0, $w11
+                fsqrt.d         $w15, $w12
+                ftint_s.w       $w30, $w5
+                ftint_s.d       $w5, $w23
+                ftint_u.w       $w20, $w14
+                ftint_u.d       $w23, $w21
+                ftrunc_s.w      $w29, $w17
+                ftrunc_s.d      $w12, $w27
+                ftrunc_u.w      $w17, $w15
+                ftrunc_u.d      $w5, $w27
diff --git a/test/MC/Mips/msa/test_3r.s b/test/MC/Mips/msa/test_3r.s
new file mode 100644
index 000000000000..3047ecb7aa8d
--- /dev/null
+++ b/test/MC/Mips/msa/test_3r.s
@@ -0,0 +1,732 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        add_a.b         $w26, $w9, $w4                  # encoding: [0x78,0x04,0x4e,0x90]
+# CHECK:        add_a.h         $w23, $w27, $w31                # encoding: [0x78,0x3f,0xdd,0xd0]
+# CHECK:        add_a.w         $w11, $w6, $w22                 # encoding: [0x78,0x56,0x32,0xd0]
+# CHECK:        add_a.d         $w6, $w10, $w0                  # encoding: [0x78,0x60,0x51,0x90]
+# CHECK:        adds_a.b        $w19, $w24, $w19                # encoding: [0x78,0x93,0xc4,0xd0]
+# CHECK:        adds_a.h        $w25, $w6, $w4                  # encoding: [0x78,0xa4,0x36,0x50]
+# CHECK:        adds_a.w        $w25, $w17, $w27                # encoding: [0x78,0xdb,0x8e,0x50]
+# CHECK:        adds_a.d        $w15, $w18, $w26                # encoding: [0x78,0xfa,0x93,0xd0]
+# CHECK:        adds_s.b        $w29, $w11, $w19                # encoding: [0x79,0x13,0x5f,0x50]
+# CHECK:        adds_s.h        $w5, $w23, $w26                 # encoding: [0x79,0x3a,0xb9,0x50]
+# CHECK:        adds_s.w        $w16, $w14, $w13                # encoding: [0x79,0x4d,0x74,0x10]
+# CHECK:        adds_s.d        $w2, $w14, $w28                 # encoding: [0x79,0x7c,0x70,0x90]
+# CHECK:        adds_u.b        $w3, $w17, $w14                 # encoding: [0x79,0x8e,0x88,0xd0]
+# CHECK:        adds_u.h        $w10, $w30, $w4                 # encoding: [0x79,0xa4,0xf2,0x90]
+# CHECK:        adds_u.w        $w15, $w18, $w20                # encoding: [0x79,0xd4,0x93,0xd0]
+# CHECK:        adds_u.d        $w30, $w10, $w9                 # encoding: [0x79,0xe9,0x57,0x90]
+# CHECK:        addv.b          $w24, $w20, $w21                # encoding: [0x78,0x15,0xa6,0x0e]
+# CHECK:        addv.h          $w4, $w13, $w27                 # encoding: [0x78,0x3b,0x69,0x0e]
+# CHECK:        addv.w          $w19, $w11, $w14                # encoding: [0x78,0x4e,0x5c,0xce]
+# CHECK:        addv.d          $w2, $w21, $w31                 # encoding: [0x78,0x7f,0xa8,0x8e]
+# CHECK:        asub_s.b        $w23, $w16, $w3                 # encoding: [0x7a,0x03,0x85,0xd1]
+# CHECK:        asub_s.h        $w22, $w17, $w25                # encoding: [0x7a,0x39,0x8d,0x91]
+# CHECK:        asub_s.w        $w24, $w1, $w9                  # encoding: [0x7a,0x49,0x0e,0x11]
+# CHECK:        asub_s.d        $w13, $w12, $w12                # encoding: [0x7a,0x6c,0x63,0x51]
+# CHECK:        asub_u.b        $w10, $w29, $w11                # encoding: [0x7a,0x8b,0xea,0x91]
+# CHECK:        asub_u.h        $w18, $w9, $w15                 # encoding: [0x7a,0xaf,0x4c,0x91]
+# CHECK:        asub_u.w        $w10, $w19, $w31                # encoding: [0x7a,0xdf,0x9a,0x91]
+# CHECK:        asub_u.d        $w17, $w10, $w0                 # encoding: [0x7a,0xe0,0x54,0x51]
+# CHECK:        ave_s.b         $w2, $w5, $w1                   # encoding: [0x7a,0x01,0x28,0x90]
+# CHECK:        ave_s.h         $w16, $w19, $w9                 # encoding: [0x7a,0x29,0x9c,0x10]
+# CHECK:        ave_s.w         $w17, $w31, $w5                 # encoding: [0x7a,0x45,0xfc,0x50]
+# CHECK:        ave_s.d         $w27, $w25, $w10                # encoding: [0x7a,0x6a,0xce,0xd0]
+# CHECK:        ave_u.b         $w16, $w19, $w9                 # encoding: [0x7a,0x89,0x9c,0x10]
+# CHECK:        ave_u.h         $w28, $w28, $w11                # encoding: [0x7a,0xab,0xe7,0x10]
+# CHECK:        ave_u.w         $w11, $w12, $w11                # encoding: [0x7a,0xcb,0x62,0xd0]
+# CHECK:        ave_u.d         $w30, $w19, $w28                # encoding: [0x7a,0xfc,0x9f,0x90]
+# CHECK:        aver_s.b        $w26, $w16, $w2                 # encoding: [0x7b,0x02,0x86,0x90]
+# CHECK:        aver_s.h        $w31, $w27, $w27                # encoding: [0x7b,0x3b,0xdf,0xd0]
+# CHECK:        aver_s.w        $w28, $w18, $w25                # encoding: [0x7b,0x59,0x97,0x10]
+# CHECK:        aver_s.d        $w29, $w21, $w27                # encoding: [0x7b,0x7b,0xaf,0x50]
+# CHECK:        aver_u.b        $w29, $w26, $w3                 # encoding: [0x7b,0x83,0xd7,0x50]
+# CHECK:        aver_u.h        $w18, $w18, $w9                 # encoding: [0x7b,0xa9,0x94,0x90]
+# CHECK:        aver_u.w        $w17, $w25, $w29                # encoding: [0x7b,0xdd,0xcc,0x50]
+# CHECK:        aver_u.d        $w22, $w22, $w19                # encoding: [0x7b,0xf3,0xb5,0x90]
+# CHECK:        bclr.b          $w2, $w15, $w29                 # encoding: [0x79,0x9d,0x78,0x8d]
+# CHECK:        bclr.h          $w16, $w21, $w28                # encoding: [0x79,0xbc,0xac,0x0d]
+# CHECK:        bclr.w          $w19, $w2, $w9                  # encoding: [0x79,0xc9,0x14,0xcd]
+# CHECK:        bclr.d          $w27, $w31, $w4                 # encoding: [0x79,0xe4,0xfe,0xcd]
+# CHECK:        binsl.b         $w5, $w16, $w24                 # encoding: [0x7b,0x18,0x81,0x4d]
+# CHECK:        binsl.h         $w30, $w5, $w10                 # encoding: [0x7b,0x2a,0x2f,0x8d]
+# CHECK:        binsl.w         $w14, $w15, $w13                # encoding: [0x7b,0x4d,0x7b,0x8d]
+# CHECK:        binsl.d         $w23, $w20, $w12                # encoding: [0x7b,0x6c,0xa5,0xcd]
+# CHECK:        binsr.b         $w22, $w11, $w2                 # encoding: [0x7b,0x82,0x5d,0x8d]
+# CHECK:        binsr.h         $w0, $w26, $w6                  # encoding: [0x7b,0xa6,0xd0,0x0d]
+# CHECK:        binsr.w         $w26, $w3, $w28                 # encoding: [0x7b,0xdc,0x1e,0x8d]
+# CHECK:        binsr.d         $w0, $w0, $w21                  # encoding: [0x7b,0xf5,0x00,0x0d]
+# CHECK:        bneg.b          $w0, $w11, $w24                 # encoding: [0x7a,0x98,0x58,0x0d]
+# CHECK:        bneg.h          $w28, $w16, $w4                 # encoding: [0x7a,0xa4,0x87,0x0d]
+# CHECK:        bneg.w          $w3, $w26, $w19                 # encoding: [0x7a,0xd3,0xd0,0xcd]
+# CHECK:        bneg.d          $w13, $w29, $w15                # encoding: [0x7a,0xef,0xeb,0x4d]
+# CHECK:        bset.b          $w31, $w5, $w31                 # encoding: [0x7a,0x1f,0x2f,0xcd]
+# CHECK:        bset.h          $w14, $w12, $w6                 # encoding: [0x7a,0x26,0x63,0x8d]
+# CHECK:        bset.w          $w31, $w9, $w12                 # encoding: [0x7a,0x4c,0x4f,0xcd]
+# CHECK:        bset.d          $w5, $w22, $w5                  # encoding: [0x7a,0x65,0xb1,0x4d]
+# CHECK:        ceq.b           $w31, $w31, $w18                # encoding: [0x78,0x12,0xff,0xcf]
+# CHECK:        ceq.h           $w10, $w27, $w9                 # encoding: [0x78,0x29,0xda,0x8f]
+# CHECK:        ceq.w           $w9, $w5, $w14                  # encoding: [0x78,0x4e,0x2a,0x4f]
+# CHECK:        ceq.d           $w5, $w17, $w0                  # encoding: [0x78,0x60,0x89,0x4f]
+# CHECK:        cle_s.b         $w23, $w4, $w9                  # encoding: [0x7a,0x09,0x25,0xcf]
+# CHECK:        cle_s.h         $w22, $w27, $w19                # encoding: [0x7a,0x33,0xdd,0x8f]
+# CHECK:        cle_s.w         $w30, $w26, $w10                # encoding: [0x7a,0x4a,0xd7,0x8f]
+# CHECK:        cle_s.d         $w18, $w5, $w10                 # encoding: [0x7a,0x6a,0x2c,0x8f]
+# CHECK:        cle_u.b         $w1, $w25, $w0                  # encoding: [0x7a,0x80,0xc8,0x4f]
+# CHECK:        cle_u.h         $w7, $w0, $w29                  # encoding: [0x7a,0xbd,0x01,0xcf]
+# CHECK:        cle_u.w         $w25, $w18, $w1                 # encoding: [0x7a,0xc1,0x96,0x4f]
+# CHECK:        cle_u.d         $w6, $w0, $w30                  # encoding: [0x7a,0xfe,0x01,0x8f]
+# CHECK:        clt_s.b         $w25, $w2, $w21                 # encoding: [0x79,0x15,0x16,0x4f]
+# CHECK:        clt_s.h         $w2, $w19, $w9                  # encoding: [0x79,0x29,0x98,0x8f]
+# CHECK:        clt_s.w         $w23, $w8, $w16                 # encoding: [0x79,0x50,0x45,0xcf]
+# CHECK:        clt_s.d         $w7, $w30, $w12                 # encoding: [0x79,0x6c,0xf1,0xcf]
+# CHECK:        clt_u.b         $w2, $w31, $w13                 # encoding: [0x79,0x8d,0xf8,0x8f]
+# CHECK:        clt_u.h         $w16, $w31, $w23                # encoding: [0x79,0xb7,0xfc,0x0f]
+# CHECK:        clt_u.w         $w3, $w24, $w9                  # encoding: [0x79,0xc9,0xc0,0xcf]
+# CHECK:        clt_u.d         $w7, $w0, $w1                   # encoding: [0x79,0xe1,0x01,0xcf]
+# CHECK:        div_s.b         $w29, $w3, $w18                 # encoding: [0x7a,0x12,0x1f,0x52]
+# CHECK:        div_s.h         $w17, $w16, $w13                # encoding: [0x7a,0x2d,0x84,0x52]
+# CHECK:        div_s.w         $w4, $w25, $w30                 # encoding: [0x7a,0x5e,0xc9,0x12]
+# CHECK:        div_s.d         $w31, $w9, $w20                 # encoding: [0x7a,0x74,0x4f,0xd2]
+# CHECK:        div_u.b         $w6, $w29, $w10                 # encoding: [0x7a,0x8a,0xe9,0x92]
+# CHECK:        div_u.h         $w24, $w21, $w14                # encoding: [0x7a,0xae,0xae,0x12]
+# CHECK:        div_u.w         $w29, $w14, $w25                # encoding: [0x7a,0xd9,0x77,0x52]
+# CHECK:        div_u.d         $w31, $w1, $w21                 # encoding: [0x7a,0xf5,0x0f,0xd2]
+# CHECK:        dotp_s.h        $w23, $w22, $w25                # encoding: [0x78,0x39,0xb5,0xd3]
+# CHECK:        dotp_s.w        $w20, $w14, $w5                 # encoding: [0x78,0x45,0x75,0x13]
+# CHECK:        dotp_s.d        $w17, $w2, $w22                 # encoding: [0x78,0x76,0x14,0x53]
+# CHECK:        dotp_u.h        $w13, $w2, $w6                  # encoding: [0x78,0xa6,0x13,0x53]
+# CHECK:        dotp_u.w        $w15, $w22, $w21                # encoding: [0x78,0xd5,0xb3,0xd3]
+# CHECK:        dotp_u.d        $w4, $w16, $w26                 # encoding: [0x78,0xfa,0x81,0x13]
+# CHECK:        dpadd_s.h       $w1, $w28, $w22                 # encoding: [0x79,0x36,0xe0,0x53]
+# CHECK:        dpadd_s.w       $w10, $w1, $w12                 # encoding: [0x79,0x4c,0x0a,0x93]
+# CHECK:        dpadd_s.d       $w3, $w21, $w27                 # encoding: [0x79,0x7b,0xa8,0xd3]
+# CHECK:        dpadd_u.h       $w17, $w5, $w20                 # encoding: [0x79,0xb4,0x2c,0x53]
+# CHECK:        dpadd_u.w       $w24, $w8, $w16                 # encoding: [0x79,0xd0,0x46,0x13]
+# CHECK:        dpadd_u.d       $w15, $w29, $w16                # encoding: [0x79,0xf0,0xeb,0xd3]
+# CHECK:        dpsub_s.h       $w4, $w11, $w12                 # encoding: [0x7a,0x2c,0x59,0x13]
+# CHECK:        dpsub_s.w       $w4, $w7, $w6                   # encoding: [0x7a,0x46,0x39,0x13]
+# CHECK:        dpsub_s.d       $w31, $w12, $w28                # encoding: [0x7a,0x7c,0x67,0xd3]
+# CHECK:        dpsub_u.h       $w4, $w25, $w17                 # encoding: [0x7a,0xb1,0xc9,0x13]
+# CHECK:        dpsub_u.w       $w19, $w25, $w16                # encoding: [0x7a,0xd0,0xcc,0xd3]
+# CHECK:        dpsub_u.d       $w7, $w10, $w26                 # encoding: [0x7a,0xfa,0x51,0xd3]
+# CHECK:        hadd_s.h        $w28, $w24, $w2                 # encoding: [0x7a,0x22,0xc7,0x15]
+# CHECK:        hadd_s.w        $w24, $w17, $w11                # encoding: [0x7a,0x4b,0x8e,0x15]
+# CHECK:        hadd_s.d        $w17, $w15, $w20                # encoding: [0x7a,0x74,0x7c,0x55]
+# CHECK:        hadd_u.h        $w12, $w29, $w17                # encoding: [0x7a,0xb1,0xeb,0x15]
+# CHECK:        hadd_u.w        $w9, $w5, $w6                   # encoding: [0x7a,0xc6,0x2a,0x55]
+# CHECK:        hadd_u.d        $w1, $w20, $w6                  # encoding: [0x7a,0xe6,0xa0,0x55]
+# CHECK:        hsub_s.h        $w16, $w14, $w29                # encoding: [0x7b,0x3d,0x74,0x15]
+# CHECK:        hsub_s.w        $w9, $w13, $w11                 # encoding: [0x7b,0x4b,0x6a,0x55]
+# CHECK:        hsub_s.d        $w30, $w18, $w14                # encoding: [0x7b,0x6e,0x97,0x95]
+# CHECK:        hsub_u.h        $w7, $w12, $w14                 # encoding: [0x7b,0xae,0x61,0xd5]
+# CHECK:        hsub_u.w        $w21, $w5, $w5                  # encoding: [0x7b,0xc5,0x2d,0x55]
+# CHECK:        hsub_u.d        $w11, $w12, $w31                # encoding: [0x7b,0xff,0x62,0xd5]
+# CHECK:        ilvev.b         $w18, $w16, $w30                # encoding: [0x7b,0x1e,0x84,0x94]
+# CHECK:        ilvev.h         $w14, $w0, $w13                 # encoding: [0x7b,0x2d,0x03,0x94]
+# CHECK:        ilvev.w         $w12, $w25, $w22                # encoding: [0x7b,0x56,0xcb,0x14]
+# CHECK:        ilvev.d         $w30, $w27, $w3                 # encoding: [0x7b,0x63,0xdf,0x94]
+# CHECK:        ilvl.b          $w29, $w3, $w21                 # encoding: [0x7a,0x15,0x1f,0x54]
+# CHECK:        ilvl.h          $w27, $w10, $w17                # encoding: [0x7a,0x31,0x56,0xd4]
+# CHECK:        ilvl.w          $w6, $w1, $w0                   # encoding: [0x7a,0x40,0x09,0x94]
+# CHECK:        ilvl.d          $w3, $w16, $w24                 # encoding: [0x7a,0x78,0x80,0xd4]
+# CHECK:        ilvod.b         $w11, $w5, $w20                 # encoding: [0x7b,0x94,0x2a,0xd4]
+# CHECK:        ilvod.h         $w18, $w13, $w31                # encoding: [0x7b,0xbf,0x6c,0x94]
+# CHECK:        ilvod.w         $w29, $w16, $w24                # encoding: [0x7b,0xd8,0x87,0x54]
+# CHECK:        ilvod.d         $w22, $w12, $w29                # encoding: [0x7b,0xfd,0x65,0x94]
+# CHECK:        ilvr.b          $w4, $w30, $w6                  # encoding: [0x7a,0x86,0xf1,0x14]
+# CHECK:        ilvr.h          $w28, $w19, $w29                # encoding: [0x7a,0xbd,0x9f,0x14]
+# CHECK:        ilvr.w          $w18, $w20, $w21                # encoding: [0x7a,0xd5,0xa4,0x94]
+# CHECK:        ilvr.d          $w23, $w30, $w12                # encoding: [0x7a,0xec,0xf5,0xd4]
+# CHECK:        maddv.b         $w17, $w31, $w29                # encoding: [0x78,0x9d,0xfc,0x52]
+# CHECK:        maddv.h         $w7, $w24, $w9                  # encoding: [0x78,0xa9,0xc1,0xd2]
+# CHECK:        maddv.w         $w22, $w22, $w20                # encoding: [0x78,0xd4,0xb5,0x92]
+# CHECK:        maddv.d         $w30, $w26, $w20                # encoding: [0x78,0xf4,0xd7,0x92]
+# CHECK:        max_a.b         $w23, $w11, $w23                # encoding: [0x7b,0x17,0x5d,0xce]
+# CHECK:        max_a.h         $w20, $w5, $w30                 # encoding: [0x7b,0x3e,0x2d,0x0e]
+# CHECK:        max_a.w         $w7, $w18, $w30                 # encoding: [0x7b,0x5e,0x91,0xce]
+# CHECK:        max_a.d         $w8, $w8, $w31                  # encoding: [0x7b,0x7f,0x42,0x0e]
+# CHECK:        max_s.b         $w10, $w1, $w19                 # encoding: [0x79,0x13,0x0a,0x8e]
+# CHECK:        max_s.h         $w15, $w29, $w17                # encoding: [0x79,0x31,0xeb,0xce]
+# CHECK:        max_s.w         $w15, $w29, $w14                # encoding: [0x79,0x4e,0xeb,0xce]
+# CHECK:        max_s.d         $w25, $w24, $w3                 # encoding: [0x79,0x63,0xc6,0x4e]
+# CHECK:        max_u.b         $w12, $w24, $w5                 # encoding: [0x79,0x85,0xc3,0x0e]
+# CHECK:        max_u.h         $w5, $w6, $w7                   # encoding: [0x79,0xa7,0x31,0x4e]
+# CHECK:        max_u.w         $w16, $w4, $w7                  # encoding: [0x79,0xc7,0x24,0x0e]
+# CHECK:        max_u.d         $w26, $w12, $w24                # encoding: [0x79,0xf8,0x66,0x8e]
+# CHECK:        min_a.b         $w4, $w26, $w1                  # encoding: [0x7b,0x81,0xd1,0x0e]
+# CHECK:        min_a.h         $w12, $w13, $w31                # encoding: [0x7b,0xbf,0x6b,0x0e]
+# CHECK:        min_a.w         $w28, $w20, $w0                 # encoding: [0x7b,0xc0,0xa7,0x0e]
+# CHECK:        min_a.d         $w12, $w20, $w19                # encoding: [0x7b,0xf3,0xa3,0x0e]
+# CHECK:        min_s.b         $w19, $w3, $w14                 # encoding: [0x7a,0x0e,0x1c,0xce]
+# CHECK:        min_s.h         $w27, $w21, $w8                 # encoding: [0x7a,0x28,0xae,0xce]
+# CHECK:        min_s.w         $w0, $w14, $w30                 # encoding: [0x7a,0x5e,0x70,0x0e]
+# CHECK:        min_s.d         $w6, $w8, $w21                  # encoding: [0x7a,0x75,0x41,0x8e]
+# CHECK:        min_u.b         $w22, $w26, $w8                 # encoding: [0x7a,0x88,0xd5,0x8e]
+# CHECK:        min_u.h         $w7, $w27, $w12                 # encoding: [0x7a,0xac,0xd9,0xce]
+# CHECK:        min_u.w         $w8, $w20, $w14                 # encoding: [0x7a,0xce,0xa2,0x0e]
+# CHECK:        min_u.d         $w26, $w14, $w15                # encoding: [0x7a,0xef,0x76,0x8e]
+# CHECK:        mod_s.b         $w18, $w1, $w26                 # encoding: [0x7b,0x1a,0x0c,0x92]
+# CHECK:        mod_s.h         $w31, $w30, $w28                # encoding: [0x7b,0x3c,0xf7,0xd2]
+# CHECK:        mod_s.w         $w2, $w6, $w13                  # encoding: [0x7b,0x4d,0x30,0x92]
+# CHECK:        mod_s.d         $w21, $w27, $w22                # encoding: [0x7b,0x76,0xdd,0x52]
+# CHECK:        mod_u.b         $w16, $w7, $w13                 # encoding: [0x7b,0x8d,0x3c,0x12]
+# CHECK:        mod_u.h         $w24, $w8, $w7                  # encoding: [0x7b,0xa7,0x46,0x12]
+# CHECK:        mod_u.w         $w30, $w2, $w17                 # encoding: [0x7b,0xd1,0x17,0x92]
+# CHECK:        mod_u.d         $w31, $w2, $w25                 # encoding: [0x7b,0xf9,0x17,0xd2]
+# CHECK:        msubv.b         $w14, $w5, $w12                 # encoding: [0x79,0x0c,0x2b,0x92]
+# CHECK:        msubv.h         $w6, $w7, $w30                  # encoding: [0x79,0x3e,0x39,0x92]
+# CHECK:        msubv.w         $w13, $w2, $w21                 # encoding: [0x79,0x55,0x13,0x52]
+# CHECK:        msubv.d         $w16, $w14, $w27                # encoding: [0x79,0x7b,0x74,0x12]
+# CHECK:        mulv.b          $w20, $w3, $w13                 # encoding: [0x78,0x0d,0x1d,0x12]
+# CHECK:        mulv.h          $w27, $w26, $w14                # encoding: [0x78,0x2e,0xd6,0xd2]
+# CHECK:        mulv.w          $w10, $w29, $w3                 # encoding: [0x78,0x43,0xea,0x92]
+# CHECK:        mulv.d          $w7, $w19, $w29                 # encoding: [0x78,0x7d,0x99,0xd2]
+# CHECK:        pckev.b         $w5, $w27, $w7                  # encoding: [0x79,0x07,0xd9,0x54]
+# CHECK:        pckev.h         $w1, $w4, $w27                  # encoding: [0x79,0x3b,0x20,0x54]
+# CHECK:        pckev.w         $w30, $w20, $w0                 # encoding: [0x79,0x40,0xa7,0x94]
+# CHECK:        pckev.d         $w6, $w1, $w15                  # encoding: [0x79,0x6f,0x09,0x94]
+# CHECK:        pckod.b         $w18, $w28, $w30                # encoding: [0x79,0x9e,0xe4,0x94]
+# CHECK:        pckod.h         $w26, $w5, $w8                  # encoding: [0x79,0xa8,0x2e,0x94]
+# CHECK:        pckod.w         $w9, $w4, $w2                   # encoding: [0x79,0xc2,0x22,0x54]
+# CHECK:        pckod.d         $w30, $w22, $w20                # encoding: [0x79,0xf4,0xb7,0x94]
+# CHECK:        sld.b           $w5, $w23[$12]                  # encoding: [0x78,0x0c,0xb9,0x54]
+# CHECK:        sld.h           $w1, $w23[$3]                   # encoding: [0x78,0x23,0xb8,0x54]
+# CHECK:        sld.w           $w20, $w8[$9]                   # encoding: [0x78,0x49,0x45,0x14]
+# CHECK:        sld.d           $w7, $w23[$fp]                  # encoding: [0x78,0x7e,0xb9,0xd4]
+# CHECK:        sll.b           $w3, $w0, $w17                  # encoding: [0x78,0x11,0x00,0xcd]
+# CHECK:        sll.h           $w17, $w27, $w3                 # encoding: [0x78,0x23,0xdc,0x4d]
+# CHECK:        sll.w           $w16, $w7, $w6                  # encoding: [0x78,0x46,0x3c,0x0d]
+# CHECK:        sll.d           $w9, $w0, $w26                  # encoding: [0x78,0x7a,0x02,0x4d]
+# CHECK:        splat.b         $w28, $w1[$1]                   # encoding: [0x78,0x81,0x0f,0x14]
+# CHECK:        splat.h         $w2, $w11[$11]                  # encoding: [0x78,0xab,0x58,0x94]
+# CHECK:        splat.w         $w22, $w0[$11]                  # encoding: [0x78,0xcb,0x05,0x94]
+# CHECK:        splat.d         $w0, $w0[$2]                    # encoding: [0x78,0xe2,0x00,0x14]
+# CHECK:        sra.b           $w28, $w4, $w17                 # encoding: [0x78,0x91,0x27,0x0d]
+# CHECK:        sra.h           $w13, $w9, $w3                  # encoding: [0x78,0xa3,0x4b,0x4d]
+# CHECK:        sra.w           $w27, $w21, $w19                # encoding: [0x78,0xd3,0xae,0xcd]
+# CHECK:        sra.d           $w30, $w8, $w23                 # encoding: [0x78,0xf7,0x47,0x8d]
+# CHECK:        srar.b          $w19, $w18, $w18                # encoding: [0x78,0x92,0x94,0xd5]
+# CHECK:        srar.h          $w7, $w23, $w8                  # encoding: [0x78,0xa8,0xb9,0xd5]
+# CHECK:        srar.w          $w1, $w12, $w2                  # encoding: [0x78,0xc2,0x60,0x55]
+# CHECK:        srar.d          $w21, $w7, $w14                 # encoding: [0x78,0xee,0x3d,0x55]
+# CHECK:        srl.b           $w12, $w3, $w19                 # encoding: [0x79,0x13,0x1b,0x0d]
+# CHECK:        srl.h           $w23, $w31, $w20                # encoding: [0x79,0x34,0xfd,0xcd]
+# CHECK:        srl.w           $w18, $w27, $w11                # encoding: [0x79,0x4b,0xdc,0x8d]
+# CHECK:        srl.d           $w3, $w12, $w26                 # encoding: [0x79,0x7a,0x60,0xcd]
+# CHECK:        srlr.b          $w15, $w21, $w11                # encoding: [0x79,0x0b,0xab,0xd5]
+# CHECK:        srlr.h          $w21, $w13, $w19                # encoding: [0x79,0x33,0x6d,0x55]
+# CHECK:        srlr.w          $w6, $w30, $w3                  # encoding: [0x79,0x43,0xf1,0x95]
+# CHECK:        srlr.d          $w1, $w2, $w14                  # encoding: [0x79,0x6e,0x10,0x55]
+# CHECK:        subs_s.b        $w25, $w15, $w1                 # encoding: [0x78,0x01,0x7e,0x51]
+# CHECK:        subs_s.h        $w28, $w25, $w22                # encoding: [0x78,0x36,0xcf,0x11]
+# CHECK:        subs_s.w        $w10, $w12, $w21                # encoding: [0x78,0x55,0x62,0x91]
+# CHECK:        subs_s.d        $w4, $w20, $w18                 # encoding: [0x78,0x72,0xa1,0x11]
+# CHECK:        subs_u.b        $w21, $w6, $w25                 # encoding: [0x78,0x99,0x35,0x51]
+# CHECK:        subs_u.h        $w3, $w10, $w7                  # encoding: [0x78,0xa7,0x50,0xd1]
+# CHECK:        subs_u.w        $w9, $w15, $w10                 # encoding: [0x78,0xca,0x7a,0x51]
+# CHECK:        subs_u.d        $w7, $w19, $w10                 # encoding: [0x78,0xea,0x99,0xd1]
+# CHECK:        subsus_u.b      $w6, $w7, $w12                  # encoding: [0x79,0x0c,0x39,0x91]
+# CHECK:        subsus_u.h      $w6, $w29, $w19                 # encoding: [0x79,0x33,0xe9,0x91]
+# CHECK:        subsus_u.w      $w7, $w15, $w7                  # encoding: [0x79,0x47,0x79,0xd1]
+# CHECK:        subsus_u.d      $w9, $w3, $w15                  # encoding: [0x79,0x6f,0x1a,0x51]
+# CHECK:        subsuu_s.b      $w22, $w3, $w31                 # encoding: [0x79,0x9f,0x1d,0x91]
+# CHECK:        subsuu_s.h      $w19, $w23, $w22                # encoding: [0x79,0xb6,0xbc,0xd1]
+# CHECK:        subsuu_s.w      $w9, $w10, $w13                 # encoding: [0x79,0xcd,0x52,0x51]
+# CHECK:        subsuu_s.d      $w5, $w6, $w0                   # encoding: [0x79,0xe0,0x31,0x51]
+# CHECK:        subv.b          $w6, $w13, $w19                 # encoding: [0x78,0x93,0x69,0x8e]
+# CHECK:        subv.h          $w4, $w25, $w12                 # encoding: [0x78,0xac,0xc9,0x0e]
+# CHECK:        subv.w          $w27, $w27, $w11                # encoding: [0x78,0xcb,0xde,0xce]
+# CHECK:        subv.d          $w9, $w24, $w10                 # encoding: [0x78,0xea,0xc2,0x4e]
+# CHECK:        vshf.b          $w3, $w16, $w5                  # encoding: [0x78,0x05,0x80,0xd5]
+# CHECK:        vshf.h          $w20, $w19, $w8                 # encoding: [0x78,0x28,0x9d,0x15]
+# CHECK:        vshf.w          $w16, $w30, $w25                # encoding: [0x78,0x59,0xf4,0x15]
+# CHECK:        vshf.d          $w19, $w11, $w15                # encoding: [0x78,0x6f,0x5c,0xd5]
+
+# CHECKOBJDUMP:        add_a.b         $w26, $w9, $w4
+# CHECKOBJDUMP:        add_a.h         $w23, $w27, $w31
+# CHECKOBJDUMP:        add_a.w         $w11, $w6, $w22
+# CHECKOBJDUMP:        add_a.d         $w6, $w10, $w0
+# CHECKOBJDUMP:        adds_a.b        $w19, $w24, $w19
+# CHECKOBJDUMP:        adds_a.h        $w25, $w6, $w4
+# CHECKOBJDUMP:        adds_a.w        $w25, $w17, $w27
+# CHECKOBJDUMP:        adds_a.d        $w15, $w18, $w26
+# CHECKOBJDUMP:        adds_s.b        $w29, $w11, $w19
+# CHECKOBJDUMP:        adds_s.h        $w5, $w23, $w26
+# CHECKOBJDUMP:        adds_s.w        $w16, $w14, $w13
+# CHECKOBJDUMP:        adds_s.d        $w2, $w14, $w28
+# CHECKOBJDUMP:        adds_u.b        $w3, $w17, $w14
+# CHECKOBJDUMP:        adds_u.h        $w10, $w30, $w4
+# CHECKOBJDUMP:        adds_u.w        $w15, $w18, $w20
+# CHECKOBJDUMP:        adds_u.d        $w30, $w10, $w9
+# CHECKOBJDUMP:        addv.b          $w24, $w20, $w21
+# CHECKOBJDUMP:        addv.h          $w4, $w13, $w27
+# CHECKOBJDUMP:        addv.w          $w19, $w11, $w14
+# CHECKOBJDUMP:        addv.d          $w2, $w21, $w31
+# CHECKOBJDUMP:        asub_s.b        $w23, $w16, $w3
+# CHECKOBJDUMP:        asub_s.h        $w22, $w17, $w25
+# CHECKOBJDUMP:        asub_s.w        $w24, $w1, $w9
+# CHECKOBJDUMP:        asub_s.d        $w13, $w12, $w12
+# CHECKOBJDUMP:        asub_u.b        $w10, $w29, $w11
+# CHECKOBJDUMP:        asub_u.h        $w18, $w9, $w15
+# CHECKOBJDUMP:        asub_u.w        $w10, $w19, $w31
+# CHECKOBJDUMP:        asub_u.d        $w17, $w10, $w0
+# CHECKOBJDUMP:        ave_s.b         $w2, $w5, $w1
+# CHECKOBJDUMP:        ave_s.h         $w16, $w19, $w9
+# CHECKOBJDUMP:        ave_s.w         $w17, $w31, $w5
+# CHECKOBJDUMP:        ave_s.d         $w27, $w25, $w10
+# CHECKOBJDUMP:        ave_u.b         $w16, $w19, $w9
+# CHECKOBJDUMP:        ave_u.h         $w28, $w28, $w11
+# CHECKOBJDUMP:        ave_u.w         $w11, $w12, $w11
+# CHECKOBJDUMP:        ave_u.d         $w30, $w19, $w28
+# CHECKOBJDUMP:        aver_s.b        $w26, $w16, $w2
+# CHECKOBJDUMP:        aver_s.h        $w31, $w27, $w27
+# CHECKOBJDUMP:        aver_s.w        $w28, $w18, $w25
+# CHECKOBJDUMP:        aver_s.d        $w29, $w21, $w27
+# CHECKOBJDUMP:        aver_u.b        $w29, $w26, $w3
+# CHECKOBJDUMP:        aver_u.h        $w18, $w18, $w9
+# CHECKOBJDUMP:        aver_u.w        $w17, $w25, $w29
+# CHECKOBJDUMP:        aver_u.d        $w22, $w22, $w19
+# CHECKOBJDUMP:        bclr.b          $w2, $w15, $w29
+# CHECKOBJDUMP:        bclr.h          $w16, $w21, $w28
+# CHECKOBJDUMP:        bclr.w          $w19, $w2, $w9
+# CHECKOBJDUMP:        bclr.d          $w27, $w31, $w4
+# CHECKOBJDUMP:        binsl.b         $w5, $w16, $w24
+# CHECKOBJDUMP:        binsl.h         $w30, $w5, $w10
+# CHECKOBJDUMP:        binsl.w         $w14, $w15, $w13
+# CHECKOBJDUMP:        binsl.d         $w23, $w20, $w12
+# CHECKOBJDUMP:        binsr.b         $w22, $w11, $w2
+# CHECKOBJDUMP:        binsr.h         $w0, $w26, $w6
+# CHECKOBJDUMP:        binsr.w         $w26, $w3, $w28
+# CHECKOBJDUMP:        binsr.d         $w0, $w0, $w21
+# CHECKOBJDUMP:        bneg.b          $w0, $w11, $w24
+# CHECKOBJDUMP:        bneg.h          $w28, $w16, $w4
+# CHECKOBJDUMP:        bneg.w          $w3, $w26, $w19
+# CHECKOBJDUMP:        bneg.d          $w13, $w29, $w15
+# CHECKOBJDUMP:        bset.b          $w31, $w5, $w31
+# CHECKOBJDUMP:        bset.h          $w14, $w12, $w6
+# CHECKOBJDUMP:        bset.w          $w31, $w9, $w12
+# CHECKOBJDUMP:        bset.d          $w5, $w22, $w5
+# CHECKOBJDUMP:        ceq.b           $w31, $w31, $w18
+# CHECKOBJDUMP:        ceq.h           $w10, $w27, $w9
+# CHECKOBJDUMP:        ceq.w           $w9, $w5, $w14
+# CHECKOBJDUMP:        ceq.d           $w5, $w17, $w0
+# CHECKOBJDUMP:        cle_s.b         $w23, $w4, $w9
+# CHECKOBJDUMP:        cle_s.h         $w22, $w27, $w19
+# CHECKOBJDUMP:        cle_s.w         $w30, $w26, $w10
+# CHECKOBJDUMP:        cle_s.d         $w18, $w5, $w10
+# CHECKOBJDUMP:        cle_u.b         $w1, $w25, $w0
+# CHECKOBJDUMP:        cle_u.h         $w7, $w0, $w29
+# CHECKOBJDUMP:        cle_u.w         $w25, $w18, $w1
+# CHECKOBJDUMP:        cle_u.d         $w6, $w0, $w30
+# CHECKOBJDUMP:        clt_s.b         $w25, $w2, $w21
+# CHECKOBJDUMP:        clt_s.h         $w2, $w19, $w9
+# CHECKOBJDUMP:        clt_s.w         $w23, $w8, $w16
+# CHECKOBJDUMP:        clt_s.d         $w7, $w30, $w12
+# CHECKOBJDUMP:        clt_u.b         $w2, $w31, $w13
+# CHECKOBJDUMP:        clt_u.h         $w16, $w31, $w23
+# CHECKOBJDUMP:        clt_u.w         $w3, $w24, $w9
+# CHECKOBJDUMP:        clt_u.d         $w7, $w0, $w1
+# CHECKOBJDUMP:        div_s.b         $w29, $w3, $w18
+# CHECKOBJDUMP:        div_s.h         $w17, $w16, $w13
+# CHECKOBJDUMP:        div_s.w         $w4, $w25, $w30
+# CHECKOBJDUMP:        div_s.d         $w31, $w9, $w20
+# CHECKOBJDUMP:        div_u.b         $w6, $w29, $w10
+# CHECKOBJDUMP:        div_u.h         $w24, $w21, $w14
+# CHECKOBJDUMP:        div_u.w         $w29, $w14, $w25
+# CHECKOBJDUMP:        div_u.d         $w31, $w1, $w21
+# CHECKOBJDUMP:        dotp_s.h        $w23, $w22, $w25
+# CHECKOBJDUMP:        dotp_s.w        $w20, $w14, $w5
+# CHECKOBJDUMP:        dotp_s.d        $w17, $w2, $w22
+# CHECKOBJDUMP:        dotp_u.h        $w13, $w2, $w6
+# CHECKOBJDUMP:        dotp_u.w        $w15, $w22, $w21
+# CHECKOBJDUMP:        dotp_u.d        $w4, $w16, $w26
+# CHECKOBJDUMP:        dpadd_s.h       $w1, $w28, $w22
+# CHECKOBJDUMP:        dpadd_s.w       $w10, $w1, $w12
+# CHECKOBJDUMP:        dpadd_s.d       $w3, $w21, $w27
+# CHECKOBJDUMP:        dpadd_u.h       $w17, $w5, $w20
+# CHECKOBJDUMP:        dpadd_u.w       $w24, $w8, $w16
+# CHECKOBJDUMP:        dpadd_u.d       $w15, $w29, $w16
+# CHECKOBJDUMP:        dpsub_s.h       $w4, $w11, $w12
+# CHECKOBJDUMP:        dpsub_s.w       $w4, $w7, $w6
+# CHECKOBJDUMP:        dpsub_s.d       $w31, $w12, $w28
+# CHECKOBJDUMP:        dpsub_u.h       $w4, $w25, $w17
+# CHECKOBJDUMP:        dpsub_u.w       $w19, $w25, $w16
+# CHECKOBJDUMP:        dpsub_u.d       $w7, $w10, $w26
+# CHECKOBJDUMP:        hadd_s.h        $w28, $w24, $w2
+# CHECKOBJDUMP:        hadd_s.w        $w24, $w17, $w11
+# CHECKOBJDUMP:        hadd_s.d        $w17, $w15, $w20
+# CHECKOBJDUMP:        hadd_u.h        $w12, $w29, $w17
+# CHECKOBJDUMP:        hadd_u.w        $w9, $w5, $w6
+# CHECKOBJDUMP:        hadd_u.d        $w1, $w20, $w6
+# CHECKOBJDUMP:        hsub_s.h        $w16, $w14, $w29
+# CHECKOBJDUMP:        hsub_s.w        $w9, $w13, $w11
+# CHECKOBJDUMP:        hsub_s.d        $w30, $w18, $w14
+# CHECKOBJDUMP:        hsub_u.h        $w7, $w12, $w14
+# CHECKOBJDUMP:        hsub_u.w        $w21, $w5, $w5
+# CHECKOBJDUMP:        hsub_u.d        $w11, $w12, $w31
+# CHECKOBJDUMP:        ilvev.b         $w18, $w16, $w30
+# CHECKOBJDUMP:        ilvev.h         $w14, $w0, $w13
+# CHECKOBJDUMP:        ilvev.w         $w12, $w25, $w22
+# CHECKOBJDUMP:        ilvev.d         $w30, $w27, $w3
+# CHECKOBJDUMP:        ilvl.b          $w29, $w3, $w21
+# CHECKOBJDUMP:        ilvl.h          $w27, $w10, $w17
+# CHECKOBJDUMP:        ilvl.w          $w6, $w1, $w0
+# CHECKOBJDUMP:        ilvl.d          $w3, $w16, $w24
+# CHECKOBJDUMP:        ilvod.b         $w11, $w5, $w20
+# CHECKOBJDUMP:        ilvod.h         $w18, $w13, $w31
+# CHECKOBJDUMP:        ilvod.w         $w29, $w16, $w24
+# CHECKOBJDUMP:        ilvod.d         $w22, $w12, $w29
+# CHECKOBJDUMP:        ilvr.b          $w4, $w30, $w6
+# CHECKOBJDUMP:        ilvr.h          $w28, $w19, $w29
+# CHECKOBJDUMP:        ilvr.w          $w18, $w20, $w21
+# CHECKOBJDUMP:        ilvr.d          $w23, $w30, $w12
+# CHECKOBJDUMP:        maddv.b         $w17, $w31, $w29
+# CHECKOBJDUMP:        maddv.h         $w7, $w24, $w9
+# CHECKOBJDUMP:        maddv.w         $w22, $w22, $w20
+# CHECKOBJDUMP:        maddv.d         $w30, $w26, $w20
+# CHECKOBJDUMP:        max_a.b         $w23, $w11, $w23
+# CHECKOBJDUMP:        max_a.h         $w20, $w5, $w30
+# CHECKOBJDUMP:        max_a.w         $w7, $w18, $w30
+# CHECKOBJDUMP:        max_a.d         $w8, $w8, $w31
+# CHECKOBJDUMP:        max_s.b         $w10, $w1, $w19
+# CHECKOBJDUMP:        max_s.h         $w15, $w29, $w17
+# CHECKOBJDUMP:        max_s.w         $w15, $w29, $w14
+# CHECKOBJDUMP:        max_s.d         $w25, $w24, $w3
+# CHECKOBJDUMP:        max_u.b         $w12, $w24, $w5
+# CHECKOBJDUMP:        max_u.h         $w5, $w6, $w7
+# CHECKOBJDUMP:        max_u.w         $w16, $w4, $w7
+# CHECKOBJDUMP:        max_u.d         $w26, $w12, $w24
+# CHECKOBJDUMP:        min_a.b         $w4, $w26, $w1
+# CHECKOBJDUMP:        min_a.h         $w12, $w13, $w31
+# CHECKOBJDUMP:        min_a.w         $w28, $w20, $w0
+# CHECKOBJDUMP:        min_a.d         $w12, $w20, $w19
+# CHECKOBJDUMP:        min_s.b         $w19, $w3, $w14
+# CHECKOBJDUMP:        min_s.h         $w27, $w21, $w8
+# CHECKOBJDUMP:        min_s.w         $w0, $w14, $w30
+# CHECKOBJDUMP:        min_s.d         $w6, $w8, $w21
+# CHECKOBJDUMP:        min_u.b         $w22, $w26, $w8
+# CHECKOBJDUMP:        min_u.h         $w7, $w27, $w12
+# CHECKOBJDUMP:        min_u.w         $w8, $w20, $w14
+# CHECKOBJDUMP:        min_u.d         $w26, $w14, $w15
+# CHECKOBJDUMP:        mod_s.b         $w18, $w1, $w26
+# CHECKOBJDUMP:        mod_s.h         $w31, $w30, $w28
+# CHECKOBJDUMP:        mod_s.w         $w2, $w6, $w13
+# CHECKOBJDUMP:        mod_s.d         $w21, $w27, $w22
+# CHECKOBJDUMP:        mod_u.b         $w16, $w7, $w13
+# CHECKOBJDUMP:        mod_u.h         $w24, $w8, $w7
+# CHECKOBJDUMP:        mod_u.w         $w30, $w2, $w17
+# CHECKOBJDUMP:        mod_u.d         $w31, $w2, $w25
+# CHECKOBJDUMP:        msubv.b         $w14, $w5, $w12
+# CHECKOBJDUMP:        msubv.h         $w6, $w7, $w30
+# CHECKOBJDUMP:        msubv.w         $w13, $w2, $w21
+# CHECKOBJDUMP:        msubv.d         $w16, $w14, $w27
+# CHECKOBJDUMP:        mulv.b          $w20, $w3, $w13
+# CHECKOBJDUMP:        mulv.h          $w27, $w26, $w14
+# CHECKOBJDUMP:        mulv.w          $w10, $w29, $w3
+# CHECKOBJDUMP:        mulv.d          $w7, $w19, $w29
+# CHECKOBJDUMP:        pckev.b         $w5, $w27, $w7
+# CHECKOBJDUMP:        pckev.h         $w1, $w4, $w27
+# CHECKOBJDUMP:        pckev.w         $w30, $w20, $w0
+# CHECKOBJDUMP:        pckev.d         $w6, $w1, $w15
+# CHECKOBJDUMP:        pckod.b         $w18, $w28, $w30
+# CHECKOBJDUMP:        pckod.h         $w26, $w5, $w8
+# CHECKOBJDUMP:        pckod.w         $w9, $w4, $w2
+# CHECKOBJDUMP:        pckod.d         $w30, $w22, $w20
+# CHECKOBJDUMP:        sld.b           $w5, $w23[$12]
+# CHECKOBJDUMP:        sld.h           $w1, $w23[$3]
+# CHECKOBJDUMP:        sld.w           $w20, $w8[$9]
+# CHECKOBJDUMP:        sld.d           $w7, $w23[$fp]
+# CHECKOBJDUMP:        sll.b           $w3, $w0, $w17
+# CHECKOBJDUMP:        sll.h           $w17, $w27, $w3
+# CHECKOBJDUMP:        sll.w           $w16, $w7, $w6
+# CHECKOBJDUMP:        sll.d           $w9, $w0, $w26
+# CHECKOBJDUMP:        splat.b         $w28, $w1[$1]
+# CHECKOBJDUMP:        splat.h         $w2, $w11[$11]
+# CHECKOBJDUMP:        splat.w         $w22, $w0[$11]
+# CHECKOBJDUMP:        splat.d         $w0, $w0[$2]
+# CHECKOBJDUMP:        sra.b           $w28, $w4, $w17
+# CHECKOBJDUMP:        sra.h           $w13, $w9, $w3
+# CHECKOBJDUMP:        sra.w           $w27, $w21, $w19
+# CHECKOBJDUMP:        sra.d           $w30, $w8, $w23
+# CHECKOBJDUMP:        srar.b          $w19, $w18, $w18
+# CHECKOBJDUMP:        srar.h          $w7, $w23, $w8
+# CHECKOBJDUMP:        srar.w          $w1, $w12, $w2
+# CHECKOBJDUMP:        srar.d          $w21, $w7, $w14
+# CHECKOBJDUMP:        srl.b           $w12, $w3, $w19
+# CHECKOBJDUMP:        srl.h           $w23, $w31, $w20
+# CHECKOBJDUMP:        srl.w           $w18, $w27, $w11
+# CHECKOBJDUMP:        srl.d           $w3, $w12, $w26
+# CHECKOBJDUMP:        srlr.b          $w15, $w21, $w11
+# CHECKOBJDUMP:        srlr.h          $w21, $w13, $w19
+# CHECKOBJDUMP:        srlr.w          $w6, $w30, $w3
+# CHECKOBJDUMP:        srlr.d          $w1, $w2, $w14
+# CHECKOBJDUMP:        subs_s.b        $w25, $w15, $w1
+# CHECKOBJDUMP:        subs_s.h        $w28, $w25, $w22
+# CHECKOBJDUMP:        subs_s.w        $w10, $w12, $w21
+# CHECKOBJDUMP:        subs_s.d        $w4, $w20, $w18
+# CHECKOBJDUMP:        subs_u.b        $w21, $w6, $w25
+# CHECKOBJDUMP:        subs_u.h        $w3, $w10, $w7
+# CHECKOBJDUMP:        subs_u.w        $w9, $w15, $w10
+# CHECKOBJDUMP:        subs_u.d        $w7, $w19, $w10
+# CHECKOBJDUMP:        subsus_u.b      $w6, $w7, $w12
+# CHECKOBJDUMP:        subsus_u.h      $w6, $w29, $w19
+# CHECKOBJDUMP:        subsus_u.w      $w7, $w15, $w7
+# CHECKOBJDUMP:        subsus_u.d      $w9, $w3, $w15
+# CHECKOBJDUMP:        subsuu_s.b      $w22, $w3, $w31
+# CHECKOBJDUMP:        subsuu_s.h      $w19, $w23, $w22
+# CHECKOBJDUMP:        subsuu_s.w      $w9, $w10, $w13
+# CHECKOBJDUMP:        subsuu_s.d      $w5, $w6, $w0
+# CHECKOBJDUMP:        subv.b          $w6, $w13, $w19
+# CHECKOBJDUMP:        subv.h          $w4, $w25, $w12
+# CHECKOBJDUMP:        subv.w          $w27, $w27, $w11
+# CHECKOBJDUMP:        subv.d          $w9, $w24, $w10
+# CHECKOBJDUMP:        vshf.b          $w3, $w16, $w5
+# CHECKOBJDUMP:        vshf.h          $w20, $w19, $w8
+# CHECKOBJDUMP:        vshf.w          $w16, $w30, $w25
+# CHECKOBJDUMP:        vshf.d          $w19, $w11, $w15
+
+                add_a.b         $w26, $w9, $w4
+                add_a.h         $w23, $w27, $w31
+                add_a.w         $w11, $w6, $w22
+                add_a.d         $w6, $w10, $w0
+                adds_a.b        $w19, $w24, $w19
+                adds_a.h        $w25, $w6, $w4
+                adds_a.w        $w25, $w17, $w27
+                adds_a.d        $w15, $w18, $w26
+                adds_s.b        $w29, $w11, $w19
+                adds_s.h        $w5, $w23, $w26
+                adds_s.w        $w16, $w14, $w13
+                adds_s.d        $w2, $w14, $w28
+                adds_u.b        $w3, $w17, $w14
+                adds_u.h        $w10, $w30, $w4
+                adds_u.w        $w15, $w18, $w20
+                adds_u.d        $w30, $w10, $w9
+                addv.b          $w24, $w20, $w21
+                addv.h          $w4, $w13, $w27
+                addv.w          $w19, $w11, $w14
+                addv.d          $w2, $w21, $w31
+                asub_s.b        $w23, $w16, $w3
+                asub_s.h        $w22, $w17, $w25
+                asub_s.w        $w24, $w1, $w9
+                asub_s.d        $w13, $w12, $w12
+                asub_u.b        $w10, $w29, $w11
+                asub_u.h        $w18, $w9, $w15
+                asub_u.w        $w10, $w19, $w31
+                asub_u.d        $w17, $w10, $w0
+                ave_s.b         $w2, $w5, $w1
+                ave_s.h         $w16, $w19, $w9
+                ave_s.w         $w17, $w31, $w5
+                ave_s.d         $w27, $w25, $w10
+                ave_u.b         $w16, $w19, $w9
+                ave_u.h         $w28, $w28, $w11
+                ave_u.w         $w11, $w12, $w11
+                ave_u.d         $w30, $w19, $w28
+                aver_s.b        $w26, $w16, $w2
+                aver_s.h        $w31, $w27, $w27
+                aver_s.w        $w28, $w18, $w25
+                aver_s.d        $w29, $w21, $w27
+                aver_u.b        $w29, $w26, $w3
+                aver_u.h        $w18, $w18, $w9
+                aver_u.w        $w17, $w25, $w29
+                aver_u.d        $w22, $w22, $w19
+                bclr.b          $w2, $w15, $w29
+                bclr.h          $w16, $w21, $w28
+                bclr.w          $w19, $w2, $w9
+                bclr.d          $w27, $w31, $w4
+                binsl.b         $w5, $w16, $w24
+                binsl.h         $w30, $w5, $w10
+                binsl.w         $w14, $w15, $w13
+                binsl.d         $w23, $w20, $w12
+                binsr.b         $w22, $w11, $w2
+                binsr.h         $w0, $w26, $w6
+                binsr.w         $w26, $w3, $w28
+                binsr.d         $w0, $w0, $w21
+                bneg.b          $w0, $w11, $w24
+                bneg.h          $w28, $w16, $w4
+                bneg.w          $w3, $w26, $w19
+                bneg.d          $w13, $w29, $w15
+                bset.b          $w31, $w5, $w31
+                bset.h          $w14, $w12, $w6
+                bset.w          $w31, $w9, $w12
+                bset.d          $w5, $w22, $w5
+                ceq.b           $w31, $w31, $w18
+                ceq.h           $w10, $w27, $w9
+                ceq.w           $w9, $w5, $w14
+                ceq.d           $w5, $w17, $w0
+                cle_s.b         $w23, $w4, $w9
+                cle_s.h         $w22, $w27, $w19
+                cle_s.w         $w30, $w26, $w10
+                cle_s.d         $w18, $w5, $w10
+                cle_u.b         $w1, $w25, $w0
+                cle_u.h         $w7, $w0, $w29
+                cle_u.w         $w25, $w18, $w1
+                cle_u.d         $w6, $w0, $w30
+                clt_s.b         $w25, $w2, $w21
+                clt_s.h         $w2, $w19, $w9
+                clt_s.w         $w23, $w8, $w16
+                clt_s.d         $w7, $w30, $w12
+                clt_u.b         $w2, $w31, $w13
+                clt_u.h         $w16, $w31, $w23
+                clt_u.w         $w3, $w24, $w9
+                clt_u.d         $w7, $w0, $w1
+                div_s.b         $w29, $w3, $w18
+                div_s.h         $w17, $w16, $w13
+                div_s.w         $w4, $w25, $w30
+                div_s.d         $w31, $w9, $w20
+                div_u.b         $w6, $w29, $w10
+                div_u.h         $w24, $w21, $w14
+                div_u.w         $w29, $w14, $w25
+                div_u.d         $w31, $w1, $w21
+                dotp_s.h        $w23, $w22, $w25
+                dotp_s.w        $w20, $w14, $w5
+                dotp_s.d        $w17, $w2, $w22
+                dotp_u.h        $w13, $w2, $w6
+                dotp_u.w        $w15, $w22, $w21
+                dotp_u.d        $w4, $w16, $w26
+                dpadd_s.h       $w1, $w28, $w22
+                dpadd_s.w       $w10, $w1, $w12
+                dpadd_s.d       $w3, $w21, $w27
+                dpadd_u.h       $w17, $w5, $w20
+                dpadd_u.w       $w24, $w8, $w16
+                dpadd_u.d       $w15, $w29, $w16
+                dpsub_s.h       $w4, $w11, $w12
+                dpsub_s.w       $w4, $w7, $w6
+                dpsub_s.d       $w31, $w12, $w28
+                dpsub_u.h       $w4, $w25, $w17
+                dpsub_u.w       $w19, $w25, $w16
+                dpsub_u.d       $w7, $w10, $w26
+                hadd_s.h        $w28, $w24, $w2
+                hadd_s.w        $w24, $w17, $w11
+                hadd_s.d        $w17, $w15, $w20
+                hadd_u.h        $w12, $w29, $w17
+                hadd_u.w        $w9, $w5, $w6
+                hadd_u.d        $w1, $w20, $w6
+                hsub_s.h        $w16, $w14, $w29
+                hsub_s.w        $w9, $w13, $w11
+                hsub_s.d        $w30, $w18, $w14
+                hsub_u.h        $w7, $w12, $w14
+                hsub_u.w        $w21, $w5, $w5
+                hsub_u.d        $w11, $w12, $w31
+                ilvev.b         $w18, $w16, $w30
+                ilvev.h         $w14, $w0, $w13
+                ilvev.w         $w12, $w25, $w22
+                ilvev.d         $w30, $w27, $w3
+                ilvl.b          $w29, $w3, $w21
+                ilvl.h          $w27, $w10, $w17
+                ilvl.w          $w6, $w1, $w0
+                ilvl.d          $w3, $w16, $w24
+                ilvod.b         $w11, $w5, $w20
+                ilvod.h         $w18, $w13, $w31
+                ilvod.w         $w29, $w16, $w24
+                ilvod.d         $w22, $w12, $w29
+                ilvr.b          $w4, $w30, $w6
+                ilvr.h          $w28, $w19, $w29
+                ilvr.w          $w18, $w20, $w21
+                ilvr.d          $w23, $w30, $w12
+                maddv.b         $w17, $w31, $w29
+                maddv.h         $w7, $w24, $w9
+                maddv.w         $w22, $w22, $w20
+                maddv.d         $w30, $w26, $w20
+                max_a.b         $w23, $w11, $w23
+                max_a.h         $w20, $w5, $w30
+                max_a.w         $w7, $w18, $w30
+                max_a.d         $w8, $w8, $w31
+                max_s.b         $w10, $w1, $w19
+                max_s.h         $w15, $w29, $w17
+                max_s.w         $w15, $w29, $w14
+                max_s.d         $w25, $w24, $w3
+                max_u.b         $w12, $w24, $w5
+                max_u.h         $w5, $w6, $w7
+                max_u.w         $w16, $w4, $w7
+                max_u.d         $w26, $w12, $w24
+                min_a.b         $w4, $w26, $w1
+                min_a.h         $w12, $w13, $w31
+                min_a.w         $w28, $w20, $w0
+                min_a.d         $w12, $w20, $w19
+                min_s.b         $w19, $w3, $w14
+                min_s.h         $w27, $w21, $w8
+                min_s.w         $w0, $w14, $w30
+                min_s.d         $w6, $w8, $w21
+                min_u.b         $w22, $w26, $w8
+                min_u.h         $w7, $w27, $w12
+                min_u.w         $w8, $w20, $w14
+                min_u.d         $w26, $w14, $w15
+                mod_s.b         $w18, $w1, $w26
+                mod_s.h         $w31, $w30, $w28
+                mod_s.w         $w2, $w6, $w13
+                mod_s.d         $w21, $w27, $w22
+                mod_u.b         $w16, $w7, $w13
+                mod_u.h         $w24, $w8, $w7
+                mod_u.w         $w30, $w2, $w17
+                mod_u.d         $w31, $w2, $w25
+                msubv.b         $w14, $w5, $w12
+                msubv.h         $w6, $w7, $w30
+                msubv.w         $w13, $w2, $w21
+                msubv.d         $w16, $w14, $w27
+                mulv.b          $w20, $w3, $w13
+                mulv.h          $w27, $w26, $w14
+                mulv.w          $w10, $w29, $w3
+                mulv.d          $w7, $w19, $w29
+                pckev.b         $w5, $w27, $w7
+                pckev.h         $w1, $w4, $w27
+                pckev.w         $w30, $w20, $w0
+                pckev.d         $w6, $w1, $w15
+                pckod.b         $w18, $w28, $w30
+                pckod.h         $w26, $w5, $w8
+                pckod.w         $w9, $w4, $w2
+                pckod.d         $w30, $w22, $w20
+                sld.b           $w5, $w23[$12]
+                sld.h           $w1, $w23[$3]
+                sld.w           $w20, $w8[$9]
+                sld.d           $w7, $w23[$30]
+                sll.b           $w3, $w0, $w17
+                sll.h           $w17, $w27, $w3
+                sll.w           $w16, $w7, $w6
+                sll.d           $w9, $w0, $w26
+                splat.b         $w28, $w1[$1]
+                splat.h         $w2, $w11[$11]
+                splat.w         $w22, $w0[$11]
+                splat.d         $w0, $w0[$2]
+                sra.b           $w28, $w4, $w17
+                sra.h           $w13, $w9, $w3
+                sra.w           $w27, $w21, $w19
+                sra.d           $w30, $w8, $w23
+                srar.b          $w19, $w18, $w18
+                srar.h          $w7, $w23, $w8
+                srar.w          $w1, $w12, $w2
+                srar.d          $w21, $w7, $w14
+                srl.b           $w12, $w3, $w19
+                srl.h           $w23, $w31, $w20
+                srl.w           $w18, $w27, $w11
+                srl.d           $w3, $w12, $w26
+                srlr.b          $w15, $w21, $w11
+                srlr.h          $w21, $w13, $w19
+                srlr.w          $w6, $w30, $w3
+                srlr.d          $w1, $w2, $w14
+                subs_s.b        $w25, $w15, $w1
+                subs_s.h        $w28, $w25, $w22
+                subs_s.w        $w10, $w12, $w21
+                subs_s.d        $w4, $w20, $w18
+                subs_u.b        $w21, $w6, $w25
+                subs_u.h        $w3, $w10, $w7
+                subs_u.w        $w9, $w15, $w10
+                subs_u.d        $w7, $w19, $w10
+                subsus_u.b      $w6, $w7, $w12
+                subsus_u.h      $w6, $w29, $w19
+                subsus_u.w      $w7, $w15, $w7
+                subsus_u.d      $w9, $w3, $w15
+                subsuu_s.b      $w22, $w3, $w31
+                subsuu_s.h      $w19, $w23, $w22
+                subsuu_s.w      $w9, $w10, $w13
+                subsuu_s.d      $w5, $w6, $w0
+                subv.b          $w6, $w13, $w19
+                subv.h          $w4, $w25, $w12
+                subv.w          $w27, $w27, $w11
+                subv.d          $w9, $w24, $w10
+                vshf.b          $w3, $w16, $w5
+                vshf.h          $w20, $w19, $w8
+                vshf.w          $w16, $w30, $w25
+                vshf.d          $w19, $w11, $w15
diff --git a/test/MC/Mips/msa/test_3rf.s b/test/MC/Mips/msa/test_3rf.s
new file mode 100644
index 000000000000..f45557ee0ec8
--- /dev/null
+++ b/test/MC/Mips/msa/test_3rf.s
@@ -0,0 +1,252 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        fadd.w          $w28, $w19, $w28        # encoding: [0x78,0x1c,0x9f,0x1b]
+# CHECK:        fadd.d          $w13, $w2, $w29         # encoding: [0x78,0x3d,0x13,0x5b]
+# CHECK:        fcaf.w          $w14, $w11, $w25        # encoding: [0x78,0x19,0x5b,0x9a]
+# CHECK:        fcaf.d          $w1, $w1, $w19          # encoding: [0x78,0x33,0x08,0x5a]
+# CHECK:        fceq.w          $w1, $w23, $w16         # encoding: [0x78,0x90,0xb8,0x5a]
+# CHECK:        fceq.d          $w0, $w8, $w16          # encoding: [0x78,0xb0,0x40,0x1a]
+# CHECK:        fcle.w          $w16, $w9, $w24         # encoding: [0x79,0x98,0x4c,0x1a]
+# CHECK:        fcle.d          $w27, $w14, $w1         # encoding: [0x79,0xa1,0x76,0xda]
+# CHECK:        fclt.w          $w28, $w8, $w8          # encoding: [0x79,0x08,0x47,0x1a]
+# CHECK:        fclt.d          $w30, $w25, $w11        # encoding: [0x79,0x2b,0xcf,0x9a]
+# CHECK:        fcne.w          $w2, $w18, $w23         # encoding: [0x78,0xd7,0x90,0x9c]
+# CHECK:        fcne.d          $w14, $w20, $w15        # encoding: [0x78,0xef,0xa3,0x9c]
+# CHECK:        fcor.w          $w10, $w18, $w25        # encoding: [0x78,0x59,0x92,0x9c]
+# CHECK:        fcor.d          $w17, $w25, $w11        # encoding: [0x78,0x6b,0xcc,0x5c]
+# CHECK:        fcueq.w         $w14, $w2, $w21         # encoding: [0x78,0xd5,0x13,0x9a]
+# CHECK:        fcueq.d         $w29, $w3, $w7          # encoding: [0x78,0xe7,0x1f,0x5a]
+# CHECK:        fcule.w         $w17, $w5, $w3          # encoding: [0x79,0xc3,0x2c,0x5a]
+# CHECK:        fcule.d         $w31, $w1, $w30         # encoding: [0x79,0xfe,0x0f,0xda]
+# CHECK:        fcult.w         $w6, $w25, $w9          # encoding: [0x79,0x49,0xc9,0x9a]
+# CHECK:        fcult.d         $w27, $w8, $w17         # encoding: [0x79,0x71,0x46,0xda]
+# CHECK:        fcun.w          $w4, $w20, $w8          # encoding: [0x78,0x48,0xa1,0x1a]
+# CHECK:        fcun.d          $w29, $w11, $w3         # encoding: [0x78,0x63,0x5f,0x5a]
+# CHECK:        fcune.w         $w13, $w18, $w19        # encoding: [0x78,0x93,0x93,0x5c]
+# CHECK:        fcune.d         $w16, $w26, $w21        # encoding: [0x78,0xb5,0xd4,0x1c]
+# CHECK:        fdiv.w          $w13, $w24, $w2         # encoding: [0x78,0xc2,0xc3,0x5b]
+# CHECK:        fdiv.d          $w19, $w4, $w25         # encoding: [0x78,0xf9,0x24,0xdb]
+# CHECK:        fexdo.h         $w8, $w0, $w16          # encoding: [0x7a,0x10,0x02,0x1b]
+# CHECK:        fexdo.w         $w0, $w13, $w27         # encoding: [0x7a,0x3b,0x68,0x1b]
+# CHECK:        fexp2.w         $w17, $w0, $w3          # encoding: [0x79,0xc3,0x04,0x5b]
+# CHECK:        fexp2.d         $w22, $w0, $w10         # encoding: [0x79,0xea,0x05,0x9b]
+# CHECK:        fmadd.w         $w29, $w6, $w23         # encoding: [0x79,0x17,0x37,0x5b]
+# CHECK:        fmadd.d         $w11, $w28, $w21        # encoding: [0x79,0x35,0xe2,0xdb]
+# CHECK:        fmax.w          $w0, $w23, $w13         # encoding: [0x7b,0x8d,0xb8,0x1b]
+# CHECK:        fmax.d          $w26, $w18, $w8         # encoding: [0x7b,0xa8,0x96,0x9b]
+# CHECK:        fmax_a.w        $w10, $w16, $w10        # encoding: [0x7b,0xca,0x82,0x9b]
+# CHECK:        fmax_a.d        $w30, $w9, $w22         # encoding: [0x7b,0xf6,0x4f,0x9b]
+# CHECK:        fmin.w          $w24, $w1, $w30         # encoding: [0x7b,0x1e,0x0e,0x1b]
+# CHECK:        fmin.d          $w27, $w27, $w10        # encoding: [0x7b,0x2a,0xde,0xdb]
+# CHECK:        fmin_a.w        $w10, $w29, $w20        # encoding: [0x7b,0x54,0xea,0x9b]
+# CHECK:        fmin_a.d        $w13, $w30, $w24        # encoding: [0x7b,0x78,0xf3,0x5b]
+# CHECK:        fmsub.w         $w17, $w25, $w0         # encoding: [0x79,0x40,0xcc,0x5b]
+# CHECK:        fmsub.d         $w8, $w18, $w16         # encoding: [0x79,0x70,0x92,0x1b]
+# CHECK:        fmul.w          $w3, $w15, $w15         # encoding: [0x78,0x8f,0x78,0xdb]
+# CHECK:        fmul.d          $w9, $w30, $w10         # encoding: [0x78,0xaa,0xf2,0x5b]
+# CHECK:        fsaf.w          $w25, $w5, $w10         # encoding: [0x7a,0x0a,0x2e,0x5a]
+# CHECK:        fsaf.d          $w25, $w3, $w29         # encoding: [0x7a,0x3d,0x1e,0x5a]
+# CHECK:        fseq.w          $w11, $w17, $w13        # encoding: [0x7a,0x8d,0x8a,0xda]
+# CHECK:        fseq.d          $w29, $w0, $w31         # encoding: [0x7a,0xbf,0x07,0x5a]
+# CHECK:        fsle.w          $w30, $w31, $w31        # encoding: [0x7b,0x9f,0xff,0x9a]
+# CHECK:        fsle.d          $w18, $w23, $w24        # encoding: [0x7b,0xb8,0xbc,0x9a]
+# CHECK:        fslt.w          $w12, $w5, $w6          # encoding: [0x7b,0x06,0x2b,0x1a]
+# CHECK:        fslt.d          $w16, $w26, $w21        # encoding: [0x7b,0x35,0xd4,0x1a]
+# CHECK:        fsne.w          $w30, $w1, $w12         # encoding: [0x7a,0xcc,0x0f,0x9c]
+# CHECK:        fsne.d          $w14, $w13, $w23        # encoding: [0x7a,0xf7,0x6b,0x9c]
+# CHECK:        fsor.w          $w27, $w13, $w27        # encoding: [0x7a,0x5b,0x6e,0xdc]
+# CHECK:        fsor.d          $w12, $w24, $w11        # encoding: [0x7a,0x6b,0xc3,0x1c]
+# CHECK:        fsub.w          $w31, $w26, $w1         # encoding: [0x78,0x41,0xd7,0xdb]
+# CHECK:        fsub.d          $w19, $w17, $w27        # encoding: [0x78,0x7b,0x8c,0xdb]
+# CHECK:        fsueq.w         $w16, $w24, $w25        # encoding: [0x7a,0xd9,0xc4,0x1a]
+# CHECK:        fsueq.d         $w18, $w14, $w14        # encoding: [0x7a,0xee,0x74,0x9a]
+# CHECK:        fsule.w         $w23, $w30, $w13        # encoding: [0x7b,0xcd,0xf5,0xda]
+# CHECK:        fsule.d         $w2, $w11, $w26         # encoding: [0x7b,0xfa,0x58,0x9a]
+# CHECK:        fsult.w         $w11, $w26, $w22        # encoding: [0x7b,0x56,0xd2,0xda]
+# CHECK:        fsult.d         $w6, $w23, $w30         # encoding: [0x7b,0x7e,0xb9,0x9a]
+# CHECK:        fsun.w          $w3, $w18, $w28         # encoding: [0x7a,0x5c,0x90,0xda]
+# CHECK:        fsun.d          $w18, $w11, $w19        # encoding: [0x7a,0x73,0x5c,0x9a]
+# CHECK:        fsune.w         $w16, $w31, $w2         # encoding: [0x7a,0x82,0xfc,0x1c]
+# CHECK:        fsune.d         $w3, $w26, $w17         # encoding: [0x7a,0xb1,0xd0,0xdc]
+# CHECK:        ftq.h           $w16, $w4, $w24         # encoding: [0x7a,0x98,0x24,0x1b]
+# CHECK:        ftq.w           $w5, $w5, $w25          # encoding: [0x7a,0xb9,0x29,0x5b]
+# CHECK:        madd_q.h        $w16, $w20, $w10        # encoding: [0x79,0x4a,0xa4,0x1c]
+# CHECK:        madd_q.w        $w28, $w2, $w9          # encoding: [0x79,0x69,0x17,0x1c]
+# CHECK:        maddr_q.h       $w8, $w18, $w9          # encoding: [0x7b,0x49,0x92,0x1c]
+# CHECK:        maddr_q.w       $w29, $w12, $w16        # encoding: [0x7b,0x70,0x67,0x5c]
+# CHECK:        msub_q.h        $w24, $w26, $w10        # encoding: [0x79,0x8a,0xd6,0x1c]
+# CHECK:        msub_q.w        $w13, $w30, $w28        # encoding: [0x79,0xbc,0xf3,0x5c]
+# CHECK:        msubr_q.h       $w12, $w21, $w11        # encoding: [0x7b,0x8b,0xab,0x1c]
+# CHECK:        msubr_q.w       $w1, $w14, $w20         # encoding: [0x7b,0xb4,0x70,0x5c]
+# CHECK:        mul_q.h         $w6, $w16, $w30         # encoding: [0x79,0x1e,0x81,0x9c]
+# CHECK:        mul_q.w         $w16, $w1, $w4          # encoding: [0x79,0x24,0x0c,0x1c]
+# CHECK:        mulr_q.h        $w6, $w20, $w19         # encoding: [0x7b,0x13,0xa1,0x9c]
+# CHECK:        mulr_q.w        $w27, $w1, $w20         # encoding: [0x7b,0x34,0x0e,0xdc]
+
+# CHECKOBJDUMP:        fadd.w          $w28, $w19, $w28
+# CHECKOBJDUMP:        fadd.d          $w13, $w2, $w29
+# CHECKOBJDUMP:        fcaf.w          $w14, $w11, $w25
+# CHECKOBJDUMP:        fcaf.d          $w1, $w1, $w19
+# CHECKOBJDUMP:        fceq.w          $w1, $w23, $w16
+# CHECKOBJDUMP:        fceq.d          $w0, $w8, $w16
+# CHECKOBJDUMP:        fcle.w          $w16, $w9, $w24
+# CHECKOBJDUMP:        fcle.d          $w27, $w14, $w1
+# CHECKOBJDUMP:        fclt.w          $w28, $w8, $w8
+# CHECKOBJDUMP:        fclt.d          $w30, $w25, $w11
+# CHECKOBJDUMP:        fcne.w          $w2, $w18, $w23
+# CHECKOBJDUMP:        fcne.d          $w14, $w20, $w15
+# CHECKOBJDUMP:        fcor.w          $w10, $w18, $w25
+# CHECKOBJDUMP:        fcor.d          $w17, $w25, $w11
+# CHECKOBJDUMP:        fcueq.w         $w14, $w2, $w21
+# CHECKOBJDUMP:        fcueq.d         $w29, $w3, $w7
+# CHECKOBJDUMP:        fcule.w         $w17, $w5, $w3
+# CHECKOBJDUMP:        fcule.d         $w31, $w1, $w30
+# CHECKOBJDUMP:        fcult.w         $w6, $w25, $w9
+# CHECKOBJDUMP:        fcult.d         $w27, $w8, $w17
+# CHECKOBJDUMP:        fcun.w          $w4, $w20, $w8
+# CHECKOBJDUMP:        fcun.d          $w29, $w11, $w3
+# CHECKOBJDUMP:        fcune.w         $w13, $w18, $w19
+# CHECKOBJDUMP:        fcune.d         $w16, $w26, $w21
+# CHECKOBJDUMP:        fdiv.w          $w13, $w24, $w2
+# CHECKOBJDUMP:        fdiv.d          $w19, $w4, $w25
+# CHECKOBJDUMP:        fexdo.h         $w8, $w0, $w16
+# CHECKOBJDUMP:        fexdo.w         $w0, $w13, $w27
+# CHECKOBJDUMP:        fexp2.w         $w17, $w0, $w3
+# CHECKOBJDUMP:        fexp2.d         $w22, $w0, $w10
+# CHECKOBJDUMP:        fmadd.w         $w29, $w6, $w23
+# CHECKOBJDUMP:        fmadd.d         $w11, $w28, $w21
+# CHECKOBJDUMP:        fmax.w          $w0, $w23, $w13
+# CHECKOBJDUMP:        fmax.d          $w26, $w18, $w8
+# CHECKOBJDUMP:        fmax_a.w        $w10, $w16, $w10
+# CHECKOBJDUMP:        fmax_a.d        $w30, $w9, $w22
+# CHECKOBJDUMP:        fmin.w          $w24, $w1, $w30
+# CHECKOBJDUMP:        fmin.d          $w27, $w27, $w10
+# CHECKOBJDUMP:        fmin_a.w        $w10, $w29, $w20
+# CHECKOBJDUMP:        fmin_a.d        $w13, $w30, $w24
+# CHECKOBJDUMP:        fmsub.w         $w17, $w25, $w0
+# CHECKOBJDUMP:        fmsub.d         $w8, $w18, $w16
+# CHECKOBJDUMP:        fmul.w          $w3, $w15, $w15
+# CHECKOBJDUMP:        fmul.d          $w9, $w30, $w10
+# CHECKOBJDUMP:        fsaf.w          $w25, $w5, $w10
+# CHECKOBJDUMP:        fsaf.d          $w25, $w3, $w29
+# CHECKOBJDUMP:        fseq.w          $w11, $w17, $w13
+# CHECKOBJDUMP:        fseq.d          $w29, $w0, $w31
+# CHECKOBJDUMP:        fsle.w          $w30, $w31, $w31
+# CHECKOBJDUMP:        fsle.d          $w18, $w23, $w24
+# CHECKOBJDUMP:        fslt.w          $w12, $w5, $w6
+# CHECKOBJDUMP:        fslt.d          $w16, $w26, $w21
+# CHECKOBJDUMP:        fsne.w          $w30, $w1, $w12
+# CHECKOBJDUMP:        fsne.d          $w14, $w13, $w23
+# CHECKOBJDUMP:        fsor.w          $w27, $w13, $w27
+# CHECKOBJDUMP:        fsor.d          $w12, $w24, $w11
+# CHECKOBJDUMP:        fsub.w          $w31, $w26, $w1
+# CHECKOBJDUMP:        fsub.d          $w19, $w17, $w27
+# CHECKOBJDUMP:        fsueq.w         $w16, $w24, $w25
+# CHECKOBJDUMP:        fsueq.d         $w18, $w14, $w14
+# CHECKOBJDUMP:        fsule.w         $w23, $w30, $w13
+# CHECKOBJDUMP:        fsule.d         $w2, $w11, $w26
+# CHECKOBJDUMP:        fsult.w         $w11, $w26, $w22
+# CHECKOBJDUMP:        fsult.d         $w6, $w23, $w30
+# CHECKOBJDUMP:        fsun.w          $w3, $w18, $w28
+# CHECKOBJDUMP:        fsun.d          $w18, $w11, $w19
+# CHECKOBJDUMP:        fsune.w         $w16, $w31, $w2
+# CHECKOBJDUMP:        fsune.d         $w3, $w26, $w17
+# CHECKOBJDUMP:        ftq.h           $w16, $w4, $w24
+# CHECKOBJDUMP:        ftq.w           $w5, $w5, $w25
+# CHECKOBJDUMP:        madd_q.h        $w16, $w20, $w10
+# CHECKOBJDUMP:        madd_q.w        $w28, $w2, $w9
+# CHECKOBJDUMP:        maddr_q.h       $w8, $w18, $w9
+# CHECKOBJDUMP:        maddr_q.w       $w29, $w12, $w16
+# CHECKOBJDUMP:        msub_q.h        $w24, $w26, $w10
+# CHECKOBJDUMP:        msub_q.w        $w13, $w30, $w28
+# CHECKOBJDUMP:        msubr_q.h       $w12, $w21, $w11
+# CHECKOBJDUMP:        msubr_q.w       $w1, $w14, $w20
+# CHECKOBJDUMP:        mul_q.h         $w6, $w16, $w30
+# CHECKOBJDUMP:        mul_q.w         $w16, $w1, $w4
+# CHECKOBJDUMP:        mulr_q.h        $w6, $w20, $w19
+# CHECKOBJDUMP:        mulr_q.w        $w27, $w1, $w20
+
+                fadd.w          $w28, $w19, $w28
+                fadd.d          $w13, $w2, $w29
+                fcaf.w          $w14, $w11, $w25
+                fcaf.d          $w1, $w1, $w19
+                fceq.w          $w1, $w23, $w16
+                fceq.d          $w0, $w8, $w16
+                fcle.w          $w16, $w9, $w24
+                fcle.d          $w27, $w14, $w1
+                fclt.w          $w28, $w8, $w8
+                fclt.d          $w30, $w25, $w11
+                fcne.w          $w2, $w18, $w23
+                fcne.d          $w14, $w20, $w15
+                fcor.w          $w10, $w18, $w25
+                fcor.d          $w17, $w25, $w11
+                fcueq.w         $w14, $w2, $w21
+                fcueq.d         $w29, $w3, $w7
+                fcule.w         $w17, $w5, $w3
+                fcule.d         $w31, $w1, $w30
+                fcult.w         $w6, $w25, $w9
+                fcult.d         $w27, $w8, $w17
+                fcun.w          $w4, $w20, $w8
+                fcun.d          $w29, $w11, $w3
+                fcune.w         $w13, $w18, $w19
+                fcune.d         $w16, $w26, $w21
+                fdiv.w          $w13, $w24, $w2
+                fdiv.d          $w19, $w4, $w25
+                fexdo.h         $w8, $w0, $w16
+                fexdo.w         $w0, $w13, $w27
+                fexp2.w         $w17, $w0, $w3
+                fexp2.d         $w22, $w0, $w10
+                fmadd.w         $w29, $w6, $w23
+                fmadd.d         $w11, $w28, $w21
+                fmax.w          $w0, $w23, $w13
+                fmax.d          $w26, $w18, $w8
+                fmax_a.w        $w10, $w16, $w10
+                fmax_a.d        $w30, $w9, $w22
+                fmin.w          $w24, $w1, $w30
+                fmin.d          $w27, $w27, $w10
+                fmin_a.w        $w10, $w29, $w20
+                fmin_a.d        $w13, $w30, $w24
+                fmsub.w         $w17, $w25, $w0
+                fmsub.d         $w8, $w18, $w16
+                fmul.w          $w3, $w15, $w15
+                fmul.d          $w9, $w30, $w10
+                fsaf.w          $w25, $w5, $w10
+                fsaf.d          $w25, $w3, $w29
+                fseq.w          $w11, $w17, $w13
+                fseq.d          $w29, $w0, $w31
+                fsle.w          $w30, $w31, $w31
+                fsle.d          $w18, $w23, $w24
+                fslt.w          $w12, $w5, $w6
+                fslt.d          $w16, $w26, $w21
+                fsne.w          $w30, $w1, $w12
+                fsne.d          $w14, $w13, $w23
+                fsor.w          $w27, $w13, $w27
+                fsor.d          $w12, $w24, $w11
+                fsub.w          $w31, $w26, $w1
+                fsub.d          $w19, $w17, $w27
+                fsueq.w         $w16, $w24, $w25
+                fsueq.d         $w18, $w14, $w14
+                fsule.w         $w23, $w30, $w13
+                fsule.d         $w2, $w11, $w26
+                fsult.w         $w11, $w26, $w22
+                fsult.d         $w6, $w23, $w30
+                fsun.w          $w3, $w18, $w28
+                fsun.d          $w18, $w11, $w19
+                fsune.w         $w16, $w31, $w2
+                fsune.d         $w3, $w26, $w17
+                ftq.h           $w16, $w4, $w24
+                ftq.w           $w5, $w5, $w25
+                madd_q.h        $w16, $w20, $w10
+                madd_q.w        $w28, $w2, $w9
+                maddr_q.h       $w8, $w18, $w9
+                maddr_q.w       $w29, $w12, $w16
+                msub_q.h        $w24, $w26, $w10
+                msub_q.w        $w13, $w30, $w28
+                msubr_q.h       $w12, $w21, $w11
+                msubr_q.w       $w1, $w14, $w20
+                mul_q.h         $w6, $w16, $w30
+                mul_q.w         $w16, $w1, $w4
+                mulr_q.h        $w6, $w20, $w19
+                mulr_q.w        $w27, $w1, $w20
diff --git a/test/MC/Mips/msa/test_bit.s b/test/MC/Mips/msa/test_bit.s
new file mode 100644
index 000000000000..7c2313116c3e
--- /dev/null
+++ b/test/MC/Mips/msa/test_bit.s
@@ -0,0 +1,150 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        bclri.b         $w21, $w30, 2           # encoding: [0x79,0xf2,0xf5,0x49]
+# CHECK:        bclri.h         $w24, $w21, 0           # encoding: [0x79,0xe0,0xae,0x09]
+# CHECK:        bclri.w         $w23, $w30, 3           # encoding: [0x79,0xc3,0xf5,0xc9]
+# CHECK:        bclri.d         $w9, $w11, 0            # encoding: [0x79,0x80,0x5a,0x49]
+# CHECK:        binsli.b        $w25, $w12, 1           # encoding: [0x7b,0x71,0x66,0x49]
+# CHECK:        binsli.h        $w21, $w22, 0           # encoding: [0x7b,0x60,0xb5,0x49]
+# CHECK:        binsli.w        $w22, $w4, 0            # encoding: [0x7b,0x40,0x25,0x89]
+# CHECK:        binsli.d        $w6, $w2, 6             # encoding: [0x7b,0x06,0x11,0x89]
+# CHECK:        binsri.b        $w15, $w19, 0           # encoding: [0x7b,0xf0,0x9b,0xc9]
+# CHECK:        binsri.h        $w8, $w30, 1            # encoding: [0x7b,0xe1,0xf2,0x09]
+# CHECK:        binsri.w        $w2, $w19, 5            # encoding: [0x7b,0xc5,0x98,0x89]
+# CHECK:        binsri.d        $w18, $w20, 1           # encoding: [0x7b,0x81,0xa4,0x89]
+# CHECK:        bnegi.b         $w24, $w19, 0           # encoding: [0x7a,0xf0,0x9e,0x09]
+# CHECK:        bnegi.h         $w28, $w11, 3           # encoding: [0x7a,0xe3,0x5f,0x09]
+# CHECK:        bnegi.w         $w1, $w27, 5            # encoding: [0x7a,0xc5,0xd8,0x49]
+# CHECK:        bnegi.d         $w4, $w21, 1            # encoding: [0x7a,0x81,0xa9,0x09]
+# CHECK:        bseti.b         $w18, $w8, 0            # encoding: [0x7a,0x70,0x44,0x89]
+# CHECK:        bseti.h         $w24, $w14, 2           # encoding: [0x7a,0x62,0x76,0x09]
+# CHECK:        bseti.w         $w9, $w18, 4            # encoding: [0x7a,0x44,0x92,0x49]
+# CHECK:        bseti.d         $w7, $w15, 1            # encoding: [0x7a,0x01,0x79,0xc9]
+# CHECK:        sat_s.b         $w31, $w31, 2           # encoding: [0x78,0x72,0xff,0xca]
+# CHECK:        sat_s.h         $w19, $w19, 0           # encoding: [0x78,0x60,0x9c,0xca]
+# CHECK:        sat_s.w         $w19, $w29, 0           # encoding: [0x78,0x40,0xec,0xca]
+# CHECK:        sat_s.d         $w11, $w22, 0           # encoding: [0x78,0x00,0xb2,0xca]
+# CHECK:        sat_u.b         $w1, $w13, 3            # encoding: [0x78,0xf3,0x68,0x4a]
+# CHECK:        sat_u.h         $w30, $w24, 4           # encoding: [0x78,0xe4,0xc7,0x8a]
+# CHECK:        sat_u.w         $w31, $w13, 0           # encoding: [0x78,0xc0,0x6f,0xca]
+# CHECK:        sat_u.d         $w29, $w16, 5           # encoding: [0x78,0x85,0x87,0x4a]
+# CHECK:        slli.b          $w23, $w10, 1           # encoding: [0x78,0x71,0x55,0xc9]
+# CHECK:        slli.h          $w9, $w18, 1            # encoding: [0x78,0x61,0x92,0x49]
+# CHECK:        slli.w          $w11, $w29, 4           # encoding: [0x78,0x44,0xea,0xc9]
+# CHECK:        slli.d          $w25, $w20, 1           # encoding: [0x78,0x01,0xa6,0x49]
+# CHECK:        srai.b          $w24, $w29, 1           # encoding: [0x78,0xf1,0xee,0x09]
+# CHECK:        srai.h          $w1, $w6, 0             # encoding: [0x78,0xe0,0x30,0x49]
+# CHECK:        srai.w          $w7, $w26, 1            # encoding: [0x78,0xc1,0xd1,0xc9]
+# CHECK:        srai.d          $w20, $w25, 3           # encoding: [0x78,0x83,0xcd,0x09]
+# CHECK:        srari.b         $w5, $w25, 0            # encoding: [0x79,0x70,0xc9,0x4a]
+# CHECK:        srari.h         $w7, $w6, 4             # encoding: [0x79,0x64,0x31,0xca]
+# CHECK:        srari.w         $w17, $w11, 5           # encoding: [0x79,0x45,0x5c,0x4a]
+# CHECK:        srari.d         $w21, $w25, 5           # encoding: [0x79,0x05,0xcd,0x4a]
+# CHECK:        srli.b          $w2, $w0, 2             # encoding: [0x79,0x72,0x00,0x89]
+# CHECK:        srli.h          $w31, $w31, 2           # encoding: [0x79,0x62,0xff,0xc9]
+# CHECK:        srli.w          $w5, $w9, 4             # encoding: [0x79,0x44,0x49,0x49]
+# CHECK:        srli.d          $w27, $w26, 5           # encoding: [0x79,0x05,0xd6,0xc9]
+# CHECK:        srlri.b         $w18, $w3, 0            # encoding: [0x79,0xf0,0x1c,0x8a]
+# CHECK:        srlri.h         $w1, $w2, 3             # encoding: [0x79,0xe3,0x10,0x4a]
+# CHECK:        srlri.w         $w11, $w22, 2           # encoding: [0x79,0xc2,0xb2,0xca]
+# CHECK:        srlri.d         $w24, $w10, 6           # encoding: [0x79,0x86,0x56,0x0a]
+
+# CHECKOBJDUMP:        bclri.b         $w21, $w30, 2
+# CHECKOBJDUMP:        bclri.h         $w24, $w21, 0
+# CHECKOBJDUMP:        bclri.w         $w23, $w30, 3
+# CHECKOBJDUMP:        bclri.d         $w9, $w11, 0
+# CHECKOBJDUMP:        binsli.b        $w25, $w12, 1
+# CHECKOBJDUMP:        binsli.h        $w21, $w22, 0
+# CHECKOBJDUMP:        binsli.w        $w22, $w4, 0
+# CHECKOBJDUMP:        binsli.d        $w6, $w2, 6
+# CHECKOBJDUMP:        binsri.b        $w15, $w19, 0
+# CHECKOBJDUMP:        binsri.h        $w8, $w30, 1
+# CHECKOBJDUMP:        binsri.w        $w2, $w19, 5
+# CHECKOBJDUMP:        binsri.d        $w18, $w20, 1
+# CHECKOBJDUMP:        bnegi.b         $w24, $w19, 0
+# CHECKOBJDUMP:        bnegi.h         $w28, $w11, 3
+# CHECKOBJDUMP:        bnegi.w         $w1, $w27, 5
+# CHECKOBJDUMP:        bnegi.d         $w4, $w21, 1
+# CHECKOBJDUMP:        bseti.b         $w18, $w8, 0
+# CHECKOBJDUMP:        bseti.h         $w24, $w14, 2
+# CHECKOBJDUMP:        bseti.w         $w9, $w18, 4
+# CHECKOBJDUMP:        bseti.d         $w7, $w15, 1
+# CHECKOBJDUMP:        sat_s.b         $w31, $w31, 2
+# CHECKOBJDUMP:        sat_s.h         $w19, $w19, 0
+# CHECKOBJDUMP:        sat_s.w         $w19, $w29, 0
+# CHECKOBJDUMP:        sat_s.d         $w11, $w22, 0
+# CHECKOBJDUMP:        sat_u.b         $w1, $w13, 3
+# CHECKOBJDUMP:        sat_u.h         $w30, $w24, 4
+# CHECKOBJDUMP:        sat_u.w         $w31, $w13, 0
+# CHECKOBJDUMP:        sat_u.d         $w29, $w16, 5
+# CHECKOBJDUMP:        slli.b          $w23, $w10, 1
+# CHECKOBJDUMP:        slli.h          $w9, $w18, 1
+# CHECKOBJDUMP:        slli.w          $w11, $w29, 4
+# CHECKOBJDUMP:        slli.d          $w25, $w20, 1
+# CHECKOBJDUMP:        srai.b          $w24, $w29, 1
+# CHECKOBJDUMP:        srai.h          $w1, $w6, 0
+# CHECKOBJDUMP:        srai.w          $w7, $w26, 1
+# CHECKOBJDUMP:        srai.d          $w20, $w25, 3
+# CHECKOBJDUMP:        srari.b         $w5, $w25, 0
+# CHECKOBJDUMP:        srari.h         $w7, $w6, 4
+# CHECKOBJDUMP:        srari.w         $w17, $w11, 5
+# CHECKOBJDUMP:        srari.d         $w21, $w25, 5
+# CHECKOBJDUMP:        srli.b          $w2, $w0, 2
+# CHECKOBJDUMP:        srli.h          $w31, $w31, 2
+# CHECKOBJDUMP:        srli.w          $w5, $w9, 4
+# CHECKOBJDUMP:        srli.d          $w27, $w26, 5
+# CHECKOBJDUMP:        srlri.b         $w18, $w3, 0
+# CHECKOBJDUMP:        srlri.h         $w1, $w2, 3
+# CHECKOBJDUMP:        srlri.w         $w11, $w22, 2
+# CHECKOBJDUMP:        srlri.d         $w24, $w10, 6
+
+                bclri.b         $w21, $w30, 2
+                bclri.h         $w24, $w21, 0
+                bclri.w         $w23, $w30, 3
+                bclri.d         $w9, $w11, 0
+                binsli.b        $w25, $w12, 1
+                binsli.h        $w21, $w22, 0
+                binsli.w        $w22, $w4, 0
+                binsli.d        $w6, $w2, 6
+                binsri.b        $w15, $w19, 0
+                binsri.h        $w8, $w30, 1
+                binsri.w        $w2, $w19, 5
+                binsri.d        $w18, $w20, 1
+                bnegi.b         $w24, $w19, 0
+                bnegi.h         $w28, $w11, 3
+                bnegi.w         $w1, $w27, 5
+                bnegi.d         $w4, $w21, 1
+                bseti.b         $w18, $w8, 0
+                bseti.h         $w24, $w14, 2
+                bseti.w         $w9, $w18, 4
+                bseti.d         $w7, $w15, 1
+                sat_s.b         $w31, $w31, 2
+                sat_s.h         $w19, $w19, 0
+                sat_s.w         $w19, $w29, 0
+                sat_s.d         $w11, $w22, 0
+                sat_u.b         $w1, $w13, 3
+                sat_u.h         $w30, $w24, 4
+                sat_u.w         $w31, $w13, 0
+                sat_u.d         $w29, $w16, 5
+                slli.b          $w23, $w10, 1
+                slli.h          $w9, $w18, 1
+                slli.w          $w11, $w29, 4
+                slli.d          $w25, $w20, 1
+                srai.b          $w24, $w29, 1
+                srai.h          $w1, $w6, 0
+                srai.w          $w7, $w26, 1
+                srai.d          $w20, $w25, 3
+                srari.b         $w5, $w25, 0
+                srari.h         $w7, $w6, 4
+                srari.w         $w17, $w11, 5
+                srari.d         $w21, $w25, 5
+                srli.b          $w2, $w0, 2
+                srli.h          $w31, $w31, 2
+                srli.w          $w5, $w9, 4
+                srli.d          $w27, $w26, 5
+                srlri.b         $w18, $w3, 0
+                srlri.h         $w1, $w2, 3
+                srlri.w         $w11, $w22, 2
+                srlri.d         $w24, $w10, 6
diff --git a/test/MC/Mips/msa/test_cbranch.s b/test/MC/Mips/msa/test_cbranch.s
new file mode 100644
index 000000000000..2fc65afc1c9f
--- /dev/null
+++ b/test/MC/Mips/msa/test_cbranch.s
@@ -0,0 +1,78 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+#CHECK:      bnz.b        $w0, 4        # encoding: [0x47,0x80,0x00,0x01]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.h        $w1, 16       # encoding: [0x47,0xa1,0x00,0x04]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.w        $w2, 128      # encoding: [0x47,0xc2,0x00,0x20]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.d        $w3, -128     # encoding: [0x47,0xe3,0xff,0xe0]
+#CHECK:      bnz.b        $w0, SYMBOL0  # encoding: [0x47'A',0x80'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.h        $w1, SYMBOL1  # encoding: [0x47'A',0xa1'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.w        $w2, SYMBOL2  # encoding: [0x47'A',0xc2'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.d        $w3, SYMBOL3  # encoding: [0x47'A',0xe3'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+
+#CHECK:      bnz.v        $w0, 4        # encoding: [0x45,0xe0,0x00,0x01]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bnz.v        $w0, SYMBOL0  # encoding: [0x45'A',0xe0'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+
+#CHECK:      bz.b         $w0, 128      # encoding: [0x47,0x00,0x00,0x20]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.h         $w1, 256      # encoding: [0x47,0x21,0x00,0x40]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.w         $w2, 512      # encoding: [0x47,0x42,0x00,0x80]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.d         $w3, -1024    # encoding: [0x47,0x63,0xff,0x00]
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.b         $w0, SYMBOL0  # encoding: [0x47'A',A,0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.h         $w1, SYMBOL1  # encoding: [0x47'A',0x21'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.w         $w2, SYMBOL2  # encoding: [0x47'A',0x42'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.d         $w3, SYMBOL3  # encoding: [0x47'A',0x63'A',0x00,0x00]
+                                        #   fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16
+#CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
+
+#CHECK:      bz.v        $w0, 4        # encoding: [0x45,0x60,0x00,0x01]
+#CHECK:      nop                       # encoding: [0x00,0x00,0x00,0x00]
+#CHECK:      bz.v        $w0, SYMBOL0  # encoding: [0x45'A',0x60'A',0x00,0x00]
+                                       #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
+#CHECK:      nop                       # encoding: [0x00,0x00,0x00,0x00]
+
+bnz.b        $w0, 4
+bnz.h        $w1, 16
+bnz.w        $w2, 128
+bnz.d        $w3, -128
+bnz.b        $w0, SYMBOL0
+bnz.h        $w1, SYMBOL1
+bnz.w        $w2, SYMBOL2
+bnz.d        $w3, SYMBOL3
+
+bnz.v        $w0, 4
+bnz.v        $w0, SYMBOL0
+
+bz.b        $w0, 128
+bz.h        $w1, 256
+bz.w        $w2, 512
+bz.d        $w3, -1024
+bz.b        $w0, SYMBOL0
+bz.h        $w1, SYMBOL1
+bz.w        $w2, SYMBOL2
+bz.d        $w3, SYMBOL3
+
+bz.v        $w0, 4
+bz.v        $w0, SYMBOL0
diff --git a/test/MC/Mips/msa/test_ctrlregs.s b/test/MC/Mips/msa/test_ctrlregs.s
new file mode 100644
index 000000000000..f8f4f9eafd26
--- /dev/null
+++ b/test/MC/Mips/msa/test_ctrlregs.s
@@ -0,0 +1,105 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+#CHECK:  cfcmsa       $1, $0                  # encoding: [0x78,0x7e,0x00,0x59]
+#CHECK:  cfcmsa       $1, $0                  # encoding: [0x78,0x7e,0x00,0x59]
+#CHECK:  cfcmsa       $2, $1                  # encoding: [0x78,0x7e,0x08,0x99]
+#CHECK:  cfcmsa       $2, $1                  # encoding: [0x78,0x7e,0x08,0x99]
+#CHECK:  cfcmsa       $3, $2                  # encoding: [0x78,0x7e,0x10,0xd9]
+#CHECK:  cfcmsa       $3, $2                  # encoding: [0x78,0x7e,0x10,0xd9]
+#CHECK:  cfcmsa       $4, $3                  # encoding: [0x78,0x7e,0x19,0x19]
+#CHECK:  cfcmsa       $4, $3                  # encoding: [0x78,0x7e,0x19,0x19]
+#CHECK:  cfcmsa       $5, $4                  # encoding: [0x78,0x7e,0x21,0x59]
+#CHECK:  cfcmsa       $5, $4                  # encoding: [0x78,0x7e,0x21,0x59]
+#CHECK:  cfcmsa       $6, $5                  # encoding: [0x78,0x7e,0x29,0x99]
+#CHECK:  cfcmsa       $6, $5                  # encoding: [0x78,0x7e,0x29,0x99]
+#CHECK:  cfcmsa       $7, $6                  # encoding: [0x78,0x7e,0x31,0xd9]
+#CHECK:  cfcmsa       $7, $6                  # encoding: [0x78,0x7e,0x31,0xd9]
+#CHECK:  cfcmsa       $8, $7                  # encoding: [0x78,0x7e,0x3a,0x19]
+#CHECK:  cfcmsa       $8, $7                  # encoding: [0x78,0x7e,0x3a,0x19]
+
+#CHECK:  ctcmsa       $0, $1                  # encoding: [0x78,0x3e,0x08,0x19]
+#CHECK:  ctcmsa       $0, $1                  # encoding: [0x78,0x3e,0x08,0x19]
+#CHECK:  ctcmsa       $1, $2                  # encoding: [0x78,0x3e,0x10,0x59]
+#CHECK:  ctcmsa       $1, $2                  # encoding: [0x78,0x3e,0x10,0x59]
+#CHECK:  ctcmsa       $2, $3                  # encoding: [0x78,0x3e,0x18,0x99]
+#CHECK:  ctcmsa       $2, $3                  # encoding: [0x78,0x3e,0x18,0x99]
+#CHECK:  ctcmsa       $3, $4                  # encoding: [0x78,0x3e,0x20,0xd9]
+#CHECK:  ctcmsa       $3, $4                  # encoding: [0x78,0x3e,0x20,0xd9]
+#CHECK:  ctcmsa       $4, $5                  # encoding: [0x78,0x3e,0x29,0x19]
+#CHECK:  ctcmsa       $4, $5                  # encoding: [0x78,0x3e,0x29,0x19]
+#CHECK:  ctcmsa       $5, $6                  # encoding: [0x78,0x3e,0x31,0x59]
+#CHECK:  ctcmsa       $5, $6                  # encoding: [0x78,0x3e,0x31,0x59]
+#CHECK:  ctcmsa       $6, $7                  # encoding: [0x78,0x3e,0x39,0x99]
+#CHECK:  ctcmsa       $6, $7                  # encoding: [0x78,0x3e,0x39,0x99]
+#CHECK:  ctcmsa       $7, $8                  # encoding: [0x78,0x3e,0x41,0xd9]
+#CHECK:  ctcmsa       $7, $8                  # encoding: [0x78,0x3e,0x41,0xd9]
+
+#CHECKOBJDUMP:  cfcmsa       $1, $0
+#CHECKOBJDUMP:  cfcmsa       $1, $0
+#CHECKOBJDUMP:  cfcmsa       $2, $1
+#CHECKOBJDUMP:  cfcmsa       $2, $1
+#CHECKOBJDUMP:  cfcmsa       $3, $2
+#CHECKOBJDUMP:  cfcmsa       $3, $2
+#CHECKOBJDUMP:  cfcmsa       $4, $3
+#CHECKOBJDUMP:  cfcmsa       $4, $3
+#CHECKOBJDUMP:  cfcmsa       $5, $4
+#CHECKOBJDUMP:  cfcmsa       $5, $4
+#CHECKOBJDUMP:  cfcmsa       $6, $5
+#CHECKOBJDUMP:  cfcmsa       $6, $5
+#CHECKOBJDUMP:  cfcmsa       $7, $6
+#CHECKOBJDUMP:  cfcmsa       $7, $6
+#CHECKOBJDUMP:  cfcmsa       $8, $7
+#CHECKOBJDUMP:  cfcmsa       $8, $7
+
+#CHECKOBJDUMP:  ctcmsa       $0, $1
+#CHECKOBJDUMP:  ctcmsa       $0, $1
+#CHECKOBJDUMP:  ctcmsa       $1, $2
+#CHECKOBJDUMP:  ctcmsa       $1, $2
+#CHECKOBJDUMP:  ctcmsa       $2, $3
+#CHECKOBJDUMP:  ctcmsa       $2, $3
+#CHECKOBJDUMP:  ctcmsa       $3, $4
+#CHECKOBJDUMP:  ctcmsa       $3, $4
+#CHECKOBJDUMP:  ctcmsa       $4, $5
+#CHECKOBJDUMP:  ctcmsa       $4, $5
+#CHECKOBJDUMP:  ctcmsa       $5, $6
+#CHECKOBJDUMP:  ctcmsa       $5, $6
+#CHECKOBJDUMP:  ctcmsa       $6, $7
+#CHECKOBJDUMP:  ctcmsa       $6, $7
+#CHECKOBJDUMP:  ctcmsa       $7, $8
+#CHECKOBJDUMP:  ctcmsa       $7, $8
+
+cfcmsa       $1, $msair
+cfcmsa       $1, $0
+cfcmsa       $2, $msacsr
+cfcmsa       $2, $1
+cfcmsa       $3, $msaaccess
+cfcmsa       $3, $2
+cfcmsa       $4, $msasave
+cfcmsa       $4, $3
+cfcmsa       $5, $msamodify
+cfcmsa       $5, $4
+cfcmsa       $6, $msarequest
+cfcmsa       $6, $5
+cfcmsa       $7, $msamap
+cfcmsa       $7, $6
+cfcmsa       $8, $msaunmap
+cfcmsa       $8, $7
+
+ctcmsa       $msair, $1
+ctcmsa       $0, $1
+ctcmsa       $msacsr, $2
+ctcmsa       $1, $2
+ctcmsa       $msaaccess, $3
+ctcmsa       $2, $3
+ctcmsa       $msasave, $4
+ctcmsa       $3, $4
+ctcmsa       $msamodify, $5
+ctcmsa       $4, $5
+ctcmsa       $msarequest, $6
+ctcmsa       $5, $6
+ctcmsa       $msamap, $7
+ctcmsa       $6, $7
+ctcmsa       $msaunmap, $8
+ctcmsa       $7, $8
diff --git a/test/MC/Mips/msa/test_elm.s b/test/MC/Mips/msa/test_elm.s
new file mode 100644
index 000000000000..1d0483826a97
--- /dev/null
+++ b/test/MC/Mips/msa/test_elm.s
@@ -0,0 +1,51 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        copy_s.b        $13, $w8[2]             # encoding: [0x78,0x82,0x43,0x59]
+# CHECK:        copy_s.h        $1, $w25[0]             # encoding: [0x78,0xa0,0xc8,0x59]
+# CHECK:        copy_s.w        $22, $w5[1]             # encoding: [0x78,0xb1,0x2d,0x99]
+# CHECK:        copy_u.b        $22, $w20[4]            # encoding: [0x78,0xc4,0xa5,0x99]
+# CHECK:        copy_u.h        $20, $w4[0]             # encoding: [0x78,0xe0,0x25,0x19]
+# CHECK:        copy_u.w        $fp, $w13[2]            # encoding: [0x78,0xf2,0x6f,0x99]
+# CHECK:        sldi.b          $w0, $w29[4]            # encoding: [0x78,0x04,0xe8,0x19]
+# CHECK:        sldi.h          $w8, $w17[0]            # encoding: [0x78,0x20,0x8a,0x19]
+# CHECK:        sldi.w          $w20, $w27[2]           # encoding: [0x78,0x32,0xdd,0x19]
+# CHECK:        sldi.d          $w4, $w12[0]            # encoding: [0x78,0x38,0x61,0x19]
+# CHECK:        splati.b        $w25, $w3[2]            # encoding: [0x78,0x42,0x1e,0x59]
+# CHECK:        splati.h        $w24, $w28[1]           # encoding: [0x78,0x61,0xe6,0x19]
+# CHECK:        splati.w        $w13, $w18[0]           # encoding: [0x78,0x70,0x93,0x59]
+# CHECK:        splati.d        $w28, $w1[0]            # encoding: [0x78,0x78,0x0f,0x19]
+# CHECK:        move.v          $w23, $w24              # encoding: [0x78,0xbe,0xc5,0xd9]
+
+# CHECKOBJDUMP:        copy_s.b        $13, $w8[2]
+# CHECKOBJDUMP:        copy_s.h        $1, $w25[0]
+# CHECKOBJDUMP:        copy_s.w        $22, $w5[1]
+# CHECKOBJDUMP:        copy_u.b        $22, $w20[4]
+# CHECKOBJDUMP:        copy_u.h        $20, $w4[0]
+# CHECKOBJDUMP:        copy_u.w        $fp, $w13[2]
+# CHECKOBJDUMP:        sldi.b          $w0, $w29[4]
+# CHECKOBJDUMP:        sldi.h          $w8, $w17[0]
+# CHECKOBJDUMP:        sldi.w          $w20, $w27[2]
+# CHECKOBJDUMP:        sldi.d          $w4, $w12[0]
+# CHECKOBJDUMP:        splati.b        $w25, $w3[2]
+# CHECKOBJDUMP:        splati.h        $w24, $w28[1]
+# CHECKOBJDUMP:        splati.w        $w13, $w18[0]
+# CHECKOBJDUMP:        splati.d        $w28, $w1[0]
+# CHECKOBJDUMP:        move.v          $w23, $w24
+
+                copy_s.b        $13, $w8[2]
+                copy_s.h        $1, $w25[0]
+                copy_s.w        $22, $w5[1]
+                copy_u.b        $22, $w20[4]
+                copy_u.h        $20, $w4[0]
+                copy_u.w        $30, $w13[2]
+                sldi.b          $w0, $w29[4]
+                sldi.h          $w8, $w17[0]
+                sldi.w          $w20, $w27[2]
+                sldi.d          $w4, $w12[0]
+                splati.b        $w25, $w3[2]
+                splati.h        $w24, $w28[1]
+                splati.w        $w13, $w18[0]
+                splati.d        $w28, $w1[0]
+                move.v          $w23, $w24
diff --git a/test/MC/Mips/msa/test_elm_insert.s b/test/MC/Mips/msa/test_elm_insert.s
new file mode 100644
index 000000000000..5fc55f3ef0c8
--- /dev/null
+++ b/test/MC/Mips/msa/test_elm_insert.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        insert.b        $w23[3], $sp            # encoding: [0x79,0x03,0xed,0xd9]
+# CHECK:        insert.h        $w20[2], $5             # encoding: [0x79,0x22,0x2d,0x19]
+# CHECK:        insert.w        $w8[2], $15             # encoding: [0x79,0x32,0x7a,0x19]
+
+# CHECKOBJDUMP:        insert.b        $w23[3], $sp
+# CHECKOBJDUMP:        insert.h        $w20[2], $5
+# CHECKOBJDUMP:        insert.w        $w8[2], $15
+
+                insert.b        $w23[3], $sp
+                insert.h        $w20[2], $5
+                insert.w        $w8[2], $15
diff --git a/test/MC/Mips/msa/test_elm_insve.s b/test/MC/Mips/msa/test_elm_insve.s
new file mode 100644
index 000000000000..d63d687ddfe6
--- /dev/null
+++ b/test/MC/Mips/msa/test_elm_insve.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        insve.b $w25[3], $w9[0]         # encoding: [0x79,0x43,0x4e,0x59]
+# CHECK:        insve.h $w24[2], $w2[0]         # encoding: [0x79,0x62,0x16,0x19]
+# CHECK:        insve.w $w0[2], $w13[0]         # encoding: [0x79,0x72,0x68,0x19]
+# CHECK:        insve.d $w3[0], $w18[0]         # encoding: [0x79,0x78,0x90,0xd9]
+
+# CHECKOBJDUMP:        insve.b $w25[3], $w9[0]
+# CHECKOBJDUMP:        insve.h $w24[2], $w2[0]
+# CHECKOBJDUMP:        insve.w $w0[2], $w13[0]
+# CHECKOBJDUMP:        insve.d $w3[0], $w18[0]
+
+                insve.b $w25[3], $w9[0]
+                insve.h $w24[2], $w2[0]
+                insve.w $w0[2], $w13[0]
+                insve.d $w3[0], $w18[0]
diff --git a/test/MC/Mips/msa/test_i10.s b/test/MC/Mips/msa/test_i10.s
new file mode 100644
index 000000000000..828ebb539875
--- /dev/null
+++ b/test/MC/Mips/msa/test_i10.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+
+# CHECK:        ldi.b   $w8, 198                # encoding: [0x7b,0x06,0x32,0x07]
+# CHECK:        ldi.h   $w20, 313               # encoding: [0x7b,0x29,0xcd,0x07]
+# CHECK:        ldi.w   $w24, 492               # encoding: [0x7b,0x4f,0x66,0x07]
+# CHECK:        ldi.d   $w27, -180              # encoding: [0x7b,0x7a,0x66,0xc7]
+
+# CHECKOBJDUMP:        ldi.b   $w8, 198
+# CHECKOBJDUMP:        ldi.h   $w20, 313
+# CHECKOBJDUMP:        ldi.w   $w24, 492
+# CHECKOBJDUMP:        ldi.d   $w27, 844
+
+                ldi.b   $w8, 198
+                ldi.h   $w20, 313
+                ldi.w   $w24, 492
+                ldi.d   $w27, -180
diff --git a/test/MC/Mips/msa/test_i5.s b/test/MC/Mips/msa/test_i5.s
new file mode 100644
index 000000000000..992bfe1a2a39
--- /dev/null
+++ b/test/MC/Mips/msa/test_i5.s
@@ -0,0 +1,138 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        addvi.b         $w3, $w31, 30           # encoding: [0x78,0x1e,0xf8,0xc6]
+# CHECK:        addvi.h         $w24, $w13, 26          # encoding: [0x78,0x3a,0x6e,0x06]
+# CHECK:        addvi.w         $w26, $w20, 26          # encoding: [0x78,0x5a,0xa6,0x86]
+# CHECK:        addvi.d         $w16, $w1, 21           # encoding: [0x78,0x75,0x0c,0x06]
+# CHECK:        ceqi.b          $w24, $w21, -8          # encoding: [0x78,0x18,0xae,0x07]
+# CHECK:        ceqi.h          $w31, $w15, 2           # encoding: [0x78,0x22,0x7f,0xc7]
+# CHECK:        ceqi.w          $w12, $w1, -1           # encoding: [0x78,0x5f,0x0b,0x07]
+# CHECK:        ceqi.d          $w24, $w22, 7           # encoding: [0x78,0x67,0xb6,0x07]
+# CHECK:        clei_s.b        $w12, $w16, 1           # encoding: [0x7a,0x01,0x83,0x07]
+# CHECK:        clei_s.h        $w2, $w10, -9           # encoding: [0x7a,0x37,0x50,0x87]
+# CHECK:        clei_s.w        $w4, $w11, -10          # encoding: [0x7a,0x56,0x59,0x07]
+# CHECK:        clei_s.d        $w0, $w29, -10          # encoding: [0x7a,0x76,0xe8,0x07]
+# CHECK:        clei_u.b        $w21, $w17, 3           # encoding: [0x7a,0x83,0x8d,0x47]
+# CHECK:        clei_u.h        $w29, $w7, 17           # encoding: [0x7a,0xb1,0x3f,0x47]
+# CHECK:        clei_u.w        $w1, $w1, 2             # encoding: [0x7a,0xc2,0x08,0x47]
+# CHECK:        clei_u.d        $w27, $w27, 29          # encoding: [0x7a,0xfd,0xde,0xc7]
+# CHECK:        clti_s.b        $w19, $w13, -7          # encoding: [0x79,0x19,0x6c,0xc7]
+# CHECK:        clti_s.h        $w15, $w10, -12         # encoding: [0x79,0x34,0x53,0xc7]
+# CHECK:        clti_s.w        $w12, $w12, 11          # encoding: [0x79,0x4b,0x63,0x07]
+# CHECK:        clti_s.d        $w29, $w20, -15         # encoding: [0x79,0x71,0xa7,0x47]
+# CHECK:        clti_u.b        $w14, $w9, 29           # encoding: [0x79,0x9d,0x4b,0x87]
+# CHECK:        clti_u.h        $w24, $w25, 25          # encoding: [0x79,0xb9,0xce,0x07]
+# CHECK:        clti_u.w        $w1, $w1, 22            # encoding: [0x79,0xd6,0x08,0x47]
+# CHECK:        clti_u.d        $w21, $w25, 1           # encoding: [0x79,0xe1,0xcd,0x47]
+# CHECK:        maxi_s.b        $w22, $w21, 1           # encoding: [0x79,0x01,0xad,0x86]
+# CHECK:        maxi_s.h        $w29, $w5, -8           # encoding: [0x79,0x38,0x2f,0x46]
+# CHECK:        maxi_s.w        $w1, $w10, -12          # encoding: [0x79,0x54,0x50,0x46]
+# CHECK:        maxi_s.d        $w13, $w29, -16         # encoding: [0x79,0x70,0xeb,0x46]
+# CHECK:        maxi_u.b        $w20, $w0, 12           # encoding: [0x79,0x8c,0x05,0x06]
+# CHECK:        maxi_u.h        $w1, $w14, 3            # encoding: [0x79,0xa3,0x70,0x46]
+# CHECK:        maxi_u.w        $w27, $w22, 11          # encoding: [0x79,0xcb,0xb6,0xc6]
+# CHECK:        maxi_u.d        $w26, $w6, 4            # encoding: [0x79,0xe4,0x36,0x86]
+# CHECK:        mini_s.b        $w4, $w1, 1             # encoding: [0x7a,0x01,0x09,0x06]
+# CHECK:        mini_s.h        $w27, $w27, -9          # encoding: [0x7a,0x37,0xde,0xc6]
+# CHECK:        mini_s.w        $w28, $w11, 9           # encoding: [0x7a,0x49,0x5f,0x06]
+# CHECK:        mini_s.d        $w11, $w10, 10          # encoding: [0x7a,0x6a,0x52,0xc6]
+# CHECK:        mini_u.b        $w18, $w23, 27          # encoding: [0x7a,0x9b,0xbc,0x86]
+# CHECK:        mini_u.h        $w7, $w26, 18           # encoding: [0x7a,0xb2,0xd1,0xc6]
+# CHECK:        mini_u.w        $w11, $w12, 26          # encoding: [0x7a,0xda,0x62,0xc6]
+# CHECK:        mini_u.d        $w11, $w15, 2           # encoding: [0x7a,0xe2,0x7a,0xc6]
+# CHECK:        subvi.b         $w24, $w20, 19          # encoding: [0x78,0x93,0xa6,0x06]
+# CHECK:        subvi.h         $w11, $w19, 4           # encoding: [0x78,0xa4,0x9a,0xc6]
+# CHECK:        subvi.w         $w12, $w10, 11          # encoding: [0x78,0xcb,0x53,0x06]
+# CHECK:        subvi.d         $w19, $w16, 7           # encoding: [0x78,0xe7,0x84,0xc6]
+
+# CHECKOBJDUMP:        addvi.b         $w3, $w31, 30
+# CHECKOBJDUMP:        addvi.h         $w24, $w13, 26
+# CHECKOBJDUMP:        addvi.w         $w26, $w20, 26
+# CHECKOBJDUMP:        addvi.d         $w16, $w1, 21
+# CHECKOBJDUMP:        ceqi.b          $w24, $w21, 24
+# CHECKOBJDUMP:        ceqi.h          $w31, $w15, 2
+# CHECKOBJDUMP:        ceqi.w          $w12, $w1, 31
+# CHECKOBJDUMP:        ceqi.d          $w24, $w22, 7
+# CHECKOBJDUMP:        clei_s.b        $w12, $w16, 1
+# CHECKOBJDUMP:        clei_s.h        $w2, $w10, 23
+# CHECKOBJDUMP:        clei_s.w        $w4, $w11, 22
+# CHECKOBJDUMP:        clei_s.d        $w0, $w29, 22
+# CHECKOBJDUMP:        clei_u.b        $w21, $w17, 3
+# CHECKOBJDUMP:        clei_u.h        $w29, $w7, 17
+# CHECKOBJDUMP:        clei_u.w        $w1, $w1, 2
+# CHECKOBJDUMP:        clei_u.d        $w27, $w27, 29
+# CHECKOBJDUMP:        clti_s.b        $w19, $w13, 25
+# CHECKOBJDUMP:        clti_s.h        $w15, $w10, 20
+# CHECKOBJDUMP:        clti_s.w        $w12, $w12, 11
+# CHECKOBJDUMP:        clti_s.d        $w29, $w20, 17
+# CHECKOBJDUMP:        clti_u.b        $w14, $w9, 29
+# CHECKOBJDUMP:        clti_u.h        $w24, $w25, 25
+# CHECKOBJDUMP:        clti_u.w        $w1, $w1, 22
+# CHECKOBJDUMP:        clti_u.d        $w21, $w25, 1
+# CHECKOBJDUMP:        maxi_s.b        $w22, $w21, 1
+# CHECKOBJDUMP:        maxi_s.h        $w29, $w5, 24
+# CHECKOBJDUMP:        maxi_s.w        $w1, $w10, 20
+# CHECKOBJDUMP:        maxi_s.d        $w13, $w29, 16
+# CHECKOBJDUMP:        maxi_u.b        $w20, $w0, 12
+# CHECKOBJDUMP:        maxi_u.h        $w1, $w14, 3
+# CHECKOBJDUMP:        maxi_u.w        $w27, $w22, 11
+# CHECKOBJDUMP:        maxi_u.d        $w26, $w6, 4
+# CHECKOBJDUMP:        mini_s.b        $w4, $w1, 1
+# CHECKOBJDUMP:        mini_s.h        $w27, $w27, 23
+# CHECKOBJDUMP:        mini_s.w        $w28, $w11, 9
+# CHECKOBJDUMP:        mini_s.d        $w11, $w10, 10
+# CHECKOBJDUMP:        mini_u.b        $w18, $w23, 27
+# CHECKOBJDUMP:        mini_u.h        $w7, $w26, 18
+# CHECKOBJDUMP:        mini_u.w        $w11, $w12, 26
+# CHECKOBJDUMP:        mini_u.d        $w11, $w15, 2
+# CHECKOBJDUMP:        subvi.b         $w24, $w20, 19
+# CHECKOBJDUMP:        subvi.h         $w11, $w19, 4
+# CHECKOBJDUMP:        subvi.w         $w12, $w10, 11
+# CHECKOBJDUMP:        subvi.d         $w19, $w16, 7
+
+                addvi.b         $w3, $w31, 30
+                addvi.h         $w24, $w13, 26
+                addvi.w         $w26, $w20, 26
+                addvi.d         $w16, $w1, 21
+                ceqi.b          $w24, $w21, -8
+                ceqi.h          $w31, $w15, 2
+                ceqi.w          $w12, $w1, -1
+                ceqi.d          $w24, $w22, 7
+                clei_s.b        $w12, $w16, 1
+                clei_s.h        $w2, $w10, -9
+                clei_s.w        $w4, $w11, -10
+                clei_s.d        $w0, $w29, -10
+                clei_u.b        $w21, $w17, 3
+                clei_u.h        $w29, $w7, 17
+                clei_u.w        $w1, $w1, 2
+                clei_u.d        $w27, $w27, 29
+                clti_s.b        $w19, $w13, -7
+                clti_s.h        $w15, $w10, -12
+                clti_s.w        $w12, $w12, 11
+                clti_s.d        $w29, $w20, -15
+                clti_u.b        $w14, $w9, 29
+                clti_u.h        $w24, $w25, 25
+                clti_u.w        $w1, $w1, 22
+                clti_u.d        $w21, $w25, 1
+                maxi_s.b        $w22, $w21, 1
+                maxi_s.h        $w29, $w5, -8
+                maxi_s.w        $w1, $w10, -12
+                maxi_s.d        $w13, $w29, -16
+                maxi_u.b        $w20, $w0, 12
+                maxi_u.h        $w1, $w14, 3
+                maxi_u.w        $w27, $w22, 11
+                maxi_u.d        $w26, $w6, 4
+                mini_s.b        $w4, $w1, 1
+                mini_s.h        $w27, $w27, -9
+                mini_s.w        $w28, $w11, 9
+                mini_s.d        $w11, $w10, 10
+                mini_u.b        $w18, $w23, 27
+                mini_u.h        $w7, $w26, 18
+                mini_u.w        $w11, $w12, 26
+                mini_u.d        $w11, $w15, 2
+                subvi.b         $w24, $w20, 19
+                subvi.h         $w11, $w19, 4
+                subvi.w         $w12, $w10, 11
+                subvi.d         $w19, $w16, 7
diff --git a/test/MC/Mips/msa/test_i8.s b/test/MC/Mips/msa/test_i8.s
new file mode 100644
index 000000000000..2604be0bb6d6
--- /dev/null
+++ b/test/MC/Mips/msa/test_i8.s
@@ -0,0 +1,36 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        andi.b  $w2, $w29, 48           # encoding: [0x78,0x30,0xe8,0x80]
+# CHECK:        bmnzi.b $w6, $w22, 126          # encoding: [0x78,0x7e,0xb1,0x81]
+# CHECK:        bmzi.b  $w27, $w1, 88           # encoding: [0x79,0x58,0x0e,0xc1]
+# CHECK:        bseli.b $w29, $w3, 189          # encoding: [0x7a,0xbd,0x1f,0x41]
+# CHECK:        nori.b  $w1, $w17, 56           # encoding: [0x7a,0x38,0x88,0x40]
+# CHECK:        ori.b   $w26, $w20, 135         # encoding: [0x79,0x87,0xa6,0x80]
+# CHECK:        shf.b   $w19, $w30, 105         # encoding: [0x78,0x69,0xf4,0xc2]
+# CHECK:        shf.h   $w17, $w8, 76           # encoding: [0x79,0x4c,0x44,0x42]
+# CHECK:        shf.w   $w14, $w3, 93           # encoding: [0x7a,0x5d,0x1b,0x82]
+# CHECK:        xori.b  $w16, $w10, 20          # encoding: [0x7b,0x14,0x54,0x00]
+
+# CHECKOBJDUMP:        andi.b  $w2, $w29, 48
+# CHECKOBJDUMP:        bmnzi.b $w6, $w22, 126
+# CHECKOBJDUMP:        bmzi.b  $w27, $w1, 88
+# CHECKOBJDUMP:        bseli.b $w29, $w3, 189
+# CHECKOBJDUMP:        nori.b  $w1, $w17, 56
+# CHECKOBJDUMP:        ori.b   $w26, $w20, 135
+# CHECKOBJDUMP:        shf.b   $w19, $w30, 105
+# CHECKOBJDUMP:        shf.h   $w17, $w8, 76
+# CHECKOBJDUMP:        shf.w   $w14, $w3, 93
+# CHECKOBJDUMP:        xori.b  $w16, $w10, 20
+
+                andi.b  $w2, $w29, 48
+                bmnzi.b $w6, $w22, 126
+                bmzi.b  $w27, $w1, 88
+                bseli.b $w29, $w3, 189
+                nori.b  $w1, $w17, 56
+                ori.b   $w26, $w20, 135
+                shf.b   $w19, $w30, 105
+                shf.h   $w17, $w8, 76
+                shf.w   $w14, $w3, 93
+                xori.b  $w16, $w10, 20
diff --git a/test/MC/Mips/msa/test_lsa.s b/test/MC/Mips/msa/test_lsa.s
new file mode 100644
index 000000000000..6d1d868fc861
--- /dev/null
+++ b/test/MC/Mips/msa/test_lsa.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        lsa        $8, $9, $10, 1              # encoding: [0x01,0x2a,0x40,0x05]
+# CHECK:        lsa        $8, $9, $10, 2              # encoding: [0x01,0x2a,0x40,0x45]
+# CHECK:        lsa        $8, $9, $10, 3              # encoding: [0x01,0x2a,0x40,0x85]
+# CHECK:        lsa        $8, $9, $10, 4              # encoding: [0x01,0x2a,0x40,0xc5]
+
+# CHECKOBJDUMP: lsa        $8, $9, $10, 1
+# CHECKOBJDUMP: lsa        $8, $9, $10, 2
+# CHECKOBJDUMP: lsa        $8, $9, $10, 3
+# CHECKOBJDUMP: lsa        $8, $9, $10, 4
+
+                lsa        $8, $9, $10, 1
+                lsa        $8, $9, $10, 2
+                lsa        $8, $9, $10, 3
+                lsa        $8, $9, $10, 4
diff --git a/test/MC/Mips/msa/test_mi10.s b/test/MC/Mips/msa/test_mi10.s
new file mode 100644
index 000000000000..80257cda8516
--- /dev/null
+++ b/test/MC/Mips/msa/test_mi10.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        ld.b    $w2, 1($7)              # encoding: [0x78,0x01,0x38,0xa0]
+# CHECK:        ld.h    $w16, -9($zero)         # encoding: [0x7b,0xf7,0x04,0x21]
+# CHECK:        ld.w    $w13, -6($4)            # encoding: [0x7b,0xfa,0x23,0x62]
+# CHECK:        ld.d    $w1, -5($16)            # encoding: [0x7b,0xfb,0x80,0x63]
+# CHECK:        st.b    $w29, 1($14)            # encoding: [0x78,0x01,0x77,0x64]
+# CHECK:        st.h    $w6, -1($8)             # encoding: [0x7b,0xff,0x41,0xa5]
+# CHECK:        st.w    $w18, 8($15)            # encoding: [0x78,0x08,0x7c,0xa6]
+# CHECK:        st.d    $w3, -14($18)           # encoding: [0x7b,0xf2,0x90,0xe7]
+
+# CHECKOBJDUMP:        ld.b    $w2, 1($7)
+# CHECKOBJDUMP:        ld.h    $w16, -9($zero)
+# CHECKOBJDUMP:        ld.w    $w13, -6($4)
+# CHECKOBJDUMP:        ld.d    $w1, -5($16)
+# CHECKOBJDUMP:        st.b    $w29, 1($14)
+# CHECKOBJDUMP:        st.h    $w6, -1($8)
+# CHECKOBJDUMP:        st.w    $w18, 8($15)
+# CHECKOBJDUMP:        st.d    $w3, -14($18)
+
+                ld.b    $w2, 1($7)
+                ld.h    $w16, -9($zero)
+                ld.w    $w13, -6($4)
+                ld.d    $w1, -5($16)
+                st.b    $w29, 1($14)
+                st.h    $w6, -1($8)
+                st.w    $w18, 8($15)
+                st.d    $w3, -14($18)
diff --git a/test/MC/Mips/msa/test_vec.s b/test/MC/Mips/msa/test_vec.s
new file mode 100644
index 000000000000..9294f3703cb7
--- /dev/null
+++ b/test/MC/Mips/msa/test_vec.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+msa -arch=mips | FileCheck %s
+#
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa -arch=mips -filetype=obj -o - | llvm-objdump -d -triple=mipsel-unknown-linux -mattr=+msa -arch=mips - | FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        and.v   $w25, $w20, $w27        # encoding: [0x78,0x1b,0xa6,0x5e]
+# CHECK:        bmnz.v  $w17, $w6, $w7          # encoding: [0x78,0x87,0x34,0x5e]
+# CHECK:        bmz.v   $w3, $w17, $w9          # encoding: [0x78,0xa9,0x88,0xde]
+# CHECK:        bsel.v  $w8, $w0, $w14          # encoding: [0x78,0xce,0x02,0x1e]
+# CHECK:        nor.v   $w7, $w31, $w0          # encoding: [0x78,0x40,0xf9,0xde]
+# CHECK:        or.v    $w24, $w26, $w30        # encoding: [0x78,0x3e,0xd6,0x1e]
+# CHECK:        xor.v   $w7, $w27, $w15         # encoding: [0x78,0x6f,0xd9,0xde]
+
+# CHECKOBJDUMP:        and.v   $w25, $w20, $w27
+# CHECKOBJDUMP:        bmnz.v  $w17, $w6, $w7
+# CHECKOBJDUMP:        bmz.v   $w3, $w17, $w9
+# CHECKOBJDUMP:        bsel.v  $w8, $w0, $w14
+# CHECKOBJDUMP:        nor.v   $w7, $w31, $w0
+# CHECKOBJDUMP:        or.v    $w24, $w26, $w30
+# CHECKOBJDUMP:        xor.v   $w7, $w27, $w15
+
+                and.v   $w25, $w20, $w27
+                bmnz.v  $w17, $w6, $w7
+                bmz.v   $w3, $w17, $w9
+                bsel.v  $w8, $w0, $w14
+                nor.v   $w7, $w31, $w0
+                or.v    $w24, $w26, $w30
+                xor.v   $w7, $w27, $w15
diff --git a/test/MC/Mips/xgot.ll b/test/MC/Mips/xgot.ll
index e2a500ffdeea..cc336788aa85 100644
--- a/test/MC/Mips/xgot.ll
+++ b/test/MC/Mips/xgot.ll
@@ -14,10 +14,10 @@ entry:
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_HI16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_LO16
-; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT
-; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_HI16
 ; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
 ; CHECK: ]
 
   %0 = load i32* @ext_1, align 4
diff --git a/test/MC/PowerPC/deprecated-p7.s b/test/MC/PowerPC/deprecated-p7.s
new file mode 100644
index 000000000000..ded992356004
--- /dev/null
+++ b/test/MC/PowerPC/deprecated-p7.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu -mcpu=pwr7 -show-encoding < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple powerpc-unknown-linux-gnu -mcpu=601 -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-OLD %s
+
+         mftb 3
+# CHECK: warning: deprecated
+# CHECK: mftb 3
+
+# CHECK-OLD-NOT: warning: deprecated
+# CHECK-OLD: mftb 3
+
+# FIXME: Test dst and friends once we can parse them.
+
diff --git a/test/MC/PowerPC/lit.local.cfg b/test/MC/PowerPC/lit.local.cfg
index 88488cdd048e..193ebebcd50e 100644
--- a/test/MC/PowerPC/lit.local.cfg
+++ b/test/MC/PowerPC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
diff --git a/test/MC/PowerPC/ppc-llong.s b/test/MC/PowerPC/ppc-llong.s
new file mode 100644
index 000000000000..0838e424fba6
--- /dev/null
+++ b/test/MC/PowerPC/ppc-llong.s
@@ -0,0 +1,28 @@
+
+# RUN: llvm-mc -triple powerpc-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -s -sd | FileCheck %s
+# RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -s -sd | FileCheck %s
+
+.data
+.llong 0
+
+# CHECK:        Section {
+# CHECK:          Name: .data
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [
+# CHECK-NEXT:       SHF_ALLOC
+# CHECK-NEXT:       SHF_WRITE
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset:
+# CHECK-NEXT:     Size: 8
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 00000000 00000000
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+
diff --git a/test/MC/PowerPC/ppc-machine.s b/test/MC/PowerPC/ppc-machine.s
new file mode 100644
index 000000000000..b8a7e3f88013
--- /dev/null
+++ b/test/MC/PowerPC/ppc-machine.s
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -triple powerpc-unknown-unknown %s
+# RUN: llvm-mc -triple powerpc64-unknown-unknown %s
+
+# For now, the only thing we check is that the .machine directive
+# is accepted without syntax error.
+
+	.machine push
+	.machine any
+	.machine pop
+
+	.machine "push"
+	.machine "any"
+	.machine "pop"
+
diff --git a/test/MC/PowerPC/ppc-nop.s b/test/MC/PowerPC/ppc-nop.s
new file mode 100644
index 000000000000..50afae23b715
--- /dev/null
+++ b/test/MC/PowerPC/ppc-nop.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -filetype=obj -triple=powerpc-unknown-linux-gnu %s | llvm-readobj -s -sd - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux-gnu %s | llvm-readobj -s -sd - | FileCheck %s
+
+blr
+.p2align 3
+blr
+
+.byte 0x42
+.p2align 2
+
+# CHECK:  0000: 4E800020 60000000 4E800020 42000000
+
diff --git a/test/MC/PowerPC/ppc-word.s b/test/MC/PowerPC/ppc-word.s
new file mode 100644
index 000000000000..773fa14bc41d
--- /dev/null
+++ b/test/MC/PowerPC/ppc-word.s
@@ -0,0 +1,28 @@
+
+# RUN: llvm-mc -triple powerpc-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -s -sd | FileCheck %s
+# RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -s -sd | FileCheck %s
+
+.data
+.word 0
+
+# CHECK:        Section {
+# CHECK:          Name: .data
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [
+# CHECK-NEXT:       SHF_ALLOC
+# CHECK-NEXT:       SHF_WRITE
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset:
+# CHECK-NEXT:     Size: 2
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 0000
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+
diff --git a/test/MC/PowerPC/ppc64-encoding-bookII.s b/test/MC/PowerPC/ppc64-encoding-bookII.s
index e74c971323fb..9e68a4b3c254 100644
--- a/test/MC/PowerPC/ppc64-encoding-bookII.s
+++ b/test/MC/PowerPC/ppc64-encoding-bookII.s
@@ -3,11 +3,15 @@
 
 # Cache management instruction
 
-# FIXME: icbi 2, 3
-# FIXME: icbt 1, 2, 3
+# CHECK: icbi 2, 3                       # encoding: [0x7c,0x02,0x1f,0xac]
+         icbi 2, 3
 
 # FIXME: dcbt 2, 3, 10
+# CHECK: dcbt 2, 3                       # encoding: [0x7c,0x02,0x1a,0x2c]
+         dcbt 2, 3
 # FIXME: dcbtst 2, 3, 10
+# CHECK: dcbtst 2, 3                     # encoding: [0x7c,0x02,0x19,0xec]
+         dcbtst 2, 3
 # CHECK: dcbz 2, 3                       # encoding: [0x7c,0x02,0x1f,0xec]
          dcbz 2, 3
 # CHECK: dcbst 2, 3                      # encoding: [0x7c,0x02,0x18,0x6c]
@@ -16,7 +20,8 @@
 
 # Synchronization instructions
 
-# FIXME: isync
+# CHECK: isync                           # encoding: [0x4c,0x00,0x01,0x2c]
+         isync
 
 # FIXME: lbarx 2, 3, 4, 1
 # FIXME: lharx 2, 3, 4, 1
@@ -30,9 +35,12 @@
 # CHECK: stdcx. 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0xad]
          stdcx. 2, 3, 4
 
-# FIXME: sync 2
-# FIXME: eieio
-# FIXME: wait 2
+# CHECK: sync 2                          # encoding: [0x7c,0x40,0x04,0xac]
+         sync 2
+# CHECK: eieio                           # encoding: [0x7c,0x00,0x06,0xac]
+         eieio
+# CHECK: wait 2                          # encoding: [0x7c,0x40,0x00,0x7c]
+         wait 2
 
 # Extended mnemonics
 
@@ -47,12 +55,28 @@
 # CHECK: ldarx 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0xa8]
          ldarx 2, 3, 4
 
-# CHECK: sync                            # encoding: [0x7c,0x00,0x04,0xac]
+# CHECK: sync 0                          # encoding: [0x7c,0x00,0x04,0xac]
          sync
-# FIXME: lwsync
-# FIXME: ptesync
+# CHECK: sync 0                          # encoding: [0x7c,0x00,0x04,0xac]
+         msync
+# CHECK: sync 1                          # encoding: [0x7c,0x20,0x04,0xac]
+         lwsync
+# CHECK: sync 2                          # encoding: [0x7c,0x40,0x04,0xac]
+         ptesync
 
-# FIXME: wait
-# FIXME: waitrsv
-# FIXME: waitimpl
+# CHECK: wait 0                          # encoding: [0x7c,0x00,0x00,0x7c]
+         wait
+# CHECK: wait 1                          # encoding: [0x7c,0x20,0x00,0x7c]
+         waitrsv
+# CHECK: wait 2                          # encoding: [0x7c,0x40,0x00,0x7c]
+         waitimpl
+
+# Time base instructions
+
+# CHECK: mftb 2, 123                     # encoding: [0x7c,0x5b,0x1a,0xe6]
+         mftb 2, 123
+# CHECK: mftb 2, 268                     # encoding: [0x7c,0x4c,0x42,0xe6]
+         mftb 2
+# CHECK: mftb 2, 269                     # encoding: [0x7c,0x4d,0x42,0xe6]
+         mftbu 2
 
diff --git a/test/MC/PowerPC/ppc64-encoding-bookIII.s b/test/MC/PowerPC/ppc64-encoding-bookIII.s
new file mode 100644
index 000000000000..318c30b04d4a
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-bookIII.s
@@ -0,0 +1,107 @@
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# CHECK: mtmsr 4, 0                       # encoding: [0x7c,0x80,0x01,0x24]
+         mtmsr %r4
+
+# CHECK: mtmsr 4, 1                       # encoding: [0x7c,0x81,0x01,0x24]
+         mtmsr %r4, 1
+
+# CHECK: mfmsr 4                         # encoding: [0x7c,0x80,0x00,0xa6]
+         mfmsr %r4
+
+# CHECK: mtmsrd 4, 0                     # encoding: [0x7c,0x80,0x01,0x64]
+         mtmsrd %r4
+
+# CHECK: mtmsrd 4, 1                     # encoding: [0x7c,0x81,0x01,0x64]
+         mtmsrd %r4, 1
+
+# CHECK: mfspr 4, 272                    # encoding: [0x7c,0x90,0x42,0xa6]
+         mfsprg %r4, 0
+
+# CHECK: mfspr 4, 273                    # encoding: [0x7c,0x91,0x42,0xa6]
+         mfsprg %r4, 1
+
+# CHECK: mfspr 4, 274                    # encoding: [0x7c,0x92,0x42,0xa6]
+         mfsprg %r4, 2
+
+# CHECK: mfspr 4, 275                    # encoding: [0x7c,0x93,0x42,0xa6]
+         mfsprg %r4, 3
+
+# CHECK: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
+         mtsprg 0, %r4
+
+# CHECK: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
+         mtsprg 1, %r4
+
+# CHECK: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
+         mtsprg 2, %r4
+
+# CHECK: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
+         mtsprg 3, %r4
+
+# CHECK: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
+         mtsprg0 %r4
+
+# CHECK: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
+         mtsprg1 %r4
+
+# CHECK: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
+         mtsprg2 %r4
+
+# CHECK: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
+         mtsprg3 %r4
+
+# CHECK: mtspr 280, 4                    # encoding: [0x7c,0x98,0x43,0xa6]
+         mtasr %r4
+
+# CHECK: mfspr 4, 22                     # encoding: [0x7c,0x96,0x02,0xa6]
+         mfdec %r4
+
+# CHECK: mtspr 22, 4                     # encoding: [0x7c,0x96,0x03,0xa6]
+         mtdec %r4
+
+# CHECK: mfspr 4, 287                    # encoding: [0x7c,0x9f,0x42,0xa6]
+         mfpvr %r4
+
+# CHECK: mfspr 4, 25                     # encoding: [0x7c,0x99,0x02,0xa6]
+         mfsdr1 %r4
+
+# CHECK: mtspr 25, 4                     # encoding: [0x7c,0x99,0x03,0xa6]
+         mtsdr1 %r4
+
+# CHECK: mfspr 4, 26                     # encoding: [0x7c,0x9a,0x02,0xa6]
+         mfsrr0 %r4
+
+# CHECK: mtspr 26, 4                     # encoding: [0x7c,0x9a,0x03,0xa6]
+         mtsrr0 %r4
+
+# CHECK: mfspr 4, 27                     # encoding: [0x7c,0x9b,0x02,0xa6]
+         mfsrr1 %r4
+
+# CHECK: mtspr 27, 4                     # encoding: [0x7c,0x9b,0x03,0xa6]
+         mtsrr1 %r4
+
+# CHECK: slbie 4                         # encoding: [0x7c,0x00,0x23,0x64]
+         slbie %r4
+
+# CHECK: slbmte 4, 5                     # encoding: [0x7c,0x80,0x2b,0x24]
+         slbmte %r4, %r5
+
+# CHECK: slbmfee 4, 5                    # encoding: [0x7c,0x80,0x2f,0x26]
+         slbmfee %r4, %r5
+
+# CHECK: slbia                           # encoding: [0x7c,0x00,0x03,0xe4]
+         slbia
+
+# CHECK: tlbsync                         # encoding: [0x7c,0x00,0x04,0x6c]
+         tlbsync
+
+# CHECK: tlbiel 4                        # encoding: [0x7c,0x00,0x22,0x24]
+         tlbiel %r4
+
+# CHECK: tlbie 4,0                       # encoding: [0x7c,0x00,0x22,0x64]
+         tlbie %r4, 0
+
+# CHECK: tlbie 4,0                       # encoding: [0x7c,0x00,0x22,0x64]
+         tlbie %r4
+
diff --git a/test/MC/PowerPC/ppc64-encoding-ext.s b/test/MC/PowerPC/ppc64-encoding-ext.s
index 4395b1980aba..a9c313a0322b 100644
--- a/test/MC/PowerPC/ppc64-encoding-ext.s
+++ b/test/MC/PowerPC/ppc64-encoding-ext.s
@@ -1,7 +1,105 @@
 
 # RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
 
-# FIXME: Condition register bit symbols
+# Condition register bit symbols
+
+# CHECK: beqlr 0                         # encoding: [0x4d,0x82,0x00,0x20]
+         beqlr cr0
+# CHECK: beqlr 1                         # encoding: [0x4d,0x86,0x00,0x20]
+         beqlr cr1
+# CHECK: beqlr 2                         # encoding: [0x4d,0x8a,0x00,0x20]
+         beqlr cr2
+# CHECK: beqlr 3                         # encoding: [0x4d,0x8e,0x00,0x20]
+         beqlr cr3
+# CHECK: beqlr 4                         # encoding: [0x4d,0x92,0x00,0x20]
+         beqlr cr4
+# CHECK: beqlr 5                         # encoding: [0x4d,0x96,0x00,0x20]
+         beqlr cr5
+# CHECK: beqlr 6                         # encoding: [0x4d,0x9a,0x00,0x20]
+         beqlr cr6
+# CHECK: beqlr 7                         # encoding: [0x4d,0x9e,0x00,0x20]
+         beqlr cr7
+
+# CHECK: bclr 12, 0, 0                   # encoding: [0x4d,0x80,0x00,0x20]
+         btlr 4*cr0+lt
+# CHECK: bclr 12, 1, 0                   # encoding: [0x4d,0x81,0x00,0x20]
+         btlr 4*cr0+gt
+# CHECK: bclr 12, 2, 0                   # encoding: [0x4d,0x82,0x00,0x20]
+         btlr 4*cr0+eq
+# CHECK: bclr 12, 3, 0                   # encoding: [0x4d,0x83,0x00,0x20]
+         btlr 4*cr0+so
+# CHECK: bclr 12, 3, 0                   # encoding: [0x4d,0x83,0x00,0x20]
+         btlr 4*cr0+un
+# CHECK: bclr 12, 4, 0                   # encoding: [0x4d,0x84,0x00,0x20]
+         btlr 4*cr1+lt
+# CHECK: bclr 12, 5, 0                   # encoding: [0x4d,0x85,0x00,0x20]
+         btlr 4*cr1+gt
+# CHECK: bclr 12, 6, 0                   # encoding: [0x4d,0x86,0x00,0x20]
+         btlr 4*cr1+eq
+# CHECK: bclr 12, 7, 0                   # encoding: [0x4d,0x87,0x00,0x20]
+         btlr 4*cr1+so
+# CHECK: bclr 12, 7, 0                   # encoding: [0x4d,0x87,0x00,0x20]
+         btlr 4*cr1+un
+# CHECK: bclr 12, 8, 0                   # encoding: [0x4d,0x88,0x00,0x20]
+         btlr 4*cr2+lt
+# CHECK: bclr 12, 9, 0                   # encoding: [0x4d,0x89,0x00,0x20]
+         btlr 4*cr2+gt
+# CHECK: bclr 12, 10, 0                  # encoding: [0x4d,0x8a,0x00,0x20]
+         btlr 4*cr2+eq
+# CHECK: bclr 12, 11, 0                  # encoding: [0x4d,0x8b,0x00,0x20]
+         btlr 4*cr2+so
+# CHECK: bclr 12, 11, 0                  # encoding: [0x4d,0x8b,0x00,0x20]
+         btlr 4*cr2+un
+# CHECK: bclr 12, 12, 0                  # encoding: [0x4d,0x8c,0x00,0x20]
+         btlr 4*cr3+lt
+# CHECK: bclr 12, 13, 0                  # encoding: [0x4d,0x8d,0x00,0x20]
+         btlr 4*cr3+gt
+# CHECK: bclr 12, 14, 0                  # encoding: [0x4d,0x8e,0x00,0x20]
+         btlr 4*cr3+eq
+# CHECK: bclr 12, 15, 0                  # encoding: [0x4d,0x8f,0x00,0x20]
+         btlr 4*cr3+so
+# CHECK: bclr 12, 15, 0                  # encoding: [0x4d,0x8f,0x00,0x20]
+         btlr 4*cr3+un
+# CHECK: bclr 12, 16, 0                  # encoding: [0x4d,0x90,0x00,0x20]
+         btlr 4*cr4+lt
+# CHECK: bclr 12, 17, 0                  # encoding: [0x4d,0x91,0x00,0x20]
+         btlr 4*cr4+gt
+# CHECK: bclr 12, 18, 0                  # encoding: [0x4d,0x92,0x00,0x20]
+         btlr 4*cr4+eq
+# CHECK: bclr 12, 19, 0                  # encoding: [0x4d,0x93,0x00,0x20]
+         btlr 4*cr4+so
+# CHECK: bclr 12, 19, 0                  # encoding: [0x4d,0x93,0x00,0x20]
+         btlr 4*cr4+un
+# CHECK: bclr 12, 20, 0                  # encoding: [0x4d,0x94,0x00,0x20]
+         btlr 4*cr5+lt
+# CHECK: bclr 12, 21, 0                  # encoding: [0x4d,0x95,0x00,0x20]
+         btlr 4*cr5+gt
+# CHECK: bclr 12, 22, 0                  # encoding: [0x4d,0x96,0x00,0x20]
+         btlr 4*cr5+eq
+# CHECK: bclr 12, 23, 0                  # encoding: [0x4d,0x97,0x00,0x20]
+         btlr 4*cr5+so
+# CHECK: bclr 12, 23, 0                  # encoding: [0x4d,0x97,0x00,0x20]
+         btlr 4*cr5+un
+# CHECK: bclr 12, 24, 0                  # encoding: [0x4d,0x98,0x00,0x20]
+         btlr 4*cr6+lt
+# CHECK: bclr 12, 25, 0                  # encoding: [0x4d,0x99,0x00,0x20]
+         btlr 4*cr6+gt
+# CHECK: bclr 12, 26, 0                  # encoding: [0x4d,0x9a,0x00,0x20]
+         btlr 4*cr6+eq
+# CHECK: bclr 12, 27, 0                  # encoding: [0x4d,0x9b,0x00,0x20]
+         btlr 4*cr6+so
+# CHECK: bclr 12, 27, 0                  # encoding: [0x4d,0x9b,0x00,0x20]
+         btlr 4*cr6+un
+# CHECK: bclr 12, 28, 0                  # encoding: [0x4d,0x9c,0x00,0x20]
+         btlr 4*cr7+lt
+# CHECK: bclr 12, 29, 0                  # encoding: [0x4d,0x9d,0x00,0x20]
+         btlr 4*cr7+gt
+# CHECK: bclr 12, 30, 0                  # encoding: [0x4d,0x9e,0x00,0x20]
+         btlr 4*cr7+eq
+# CHECK: bclr 12, 31, 0                  # encoding: [0x4d,0x9f,0x00,0x20]
+         btlr 4*cr7+so
+# CHECK: bclr 12, 31, 0                  # encoding: [0x4d,0x9f,0x00,0x20]
+         btlr 4*cr7+un
 
 # Branch mnemonics
 
@@ -9,303 +107,2092 @@
          blr
 # CHECK: bctr                            # encoding: [0x4e,0x80,0x04,0x20]
          bctr
-# FIXME: blrl
+# CHECK: blrl                            # encoding: [0x4e,0x80,0x00,0x21]
+         blrl
 # CHECK: bctrl                           # encoding: [0x4e,0x80,0x04,0x21]
          bctrl
 
-# FIXME: bt 2, target
-# FIXME: bta 2, target
-# FIXME: btlr 2
-# FIXME: btctr 2
-# FIXME: btl 2, target
-# FIXME: btla 2, target
-# FIXME: btlrl 2
-# FIXME: btctrl 2
-
-# FIXME: bf 2, target
-# FIXME: bfa 2, target
-# FIXME: bflr 2
-# FIXME: bfctr 2
-# FIXME: bfl 2, target
-# FIXME: bfla 2, target
-# FIXME: bflrl 2
-# FIXME: bfctrl 2
+# CHECK: bc 12, 2, target                # encoding: [0x41,0x82,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bt 2, target
+# CHECK: bca 12, 2, target               # encoding: [0x41,0x82,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bta 2, target
+# CHECK: bclr 12, 2, 0                   # encoding: [0x4d,0x82,0x00,0x20]
+         btlr 2
+# CHECK: bcctr 12, 2, 0                  # encoding: [0x4d,0x82,0x04,0x20]
+         btctr 2
+# CHECK: bcl 12, 2, target               # encoding: [0x41,0x82,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         btl 2, target
+# CHECK: bcla 12, 2, target              # encoding: [0x41,0x82,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         btla 2, target
+# CHECK: bclrl 12, 2, 0                  # encoding: [0x4d,0x82,0x00,0x21]
+         btlrl 2
+# CHECK: bcctrl 12, 2, 0                 # encoding: [0x4d,0x82,0x04,0x21]
+         btctrl 2
+
+# CHECK: bc 15, 2, target                # encoding: [0x41,0xe2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bt+ 2, target
+# CHECK: bca 15, 2, target               # encoding: [0x41,0xe2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bta+ 2, target
+# CHECK: bclr 15, 2, 0                   # encoding: [0x4d,0xe2,0x00,0x20]
+         btlr+ 2
+# CHECK: bcctr 15, 2, 0                  # encoding: [0x4d,0xe2,0x04,0x20]
+         btctr+ 2
+# CHECK: bcl 15, 2, target               # encoding: [0x41,0xe2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         btl+ 2, target
+# CHECK: bcla 15, 2, target              # encoding: [0x41,0xe2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         btla+ 2, target
+# CHECK: bclrl 15, 2, 0                  # encoding: [0x4d,0xe2,0x00,0x21]
+         btlrl+ 2
+# CHECK: bcctrl 15, 2, 0                 # encoding: [0x4d,0xe2,0x04,0x21]
+         btctrl+ 2
+
+# CHECK: bc 14, 2, target                # encoding: [0x41,0xc2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bt- 2, target
+# CHECK: bca 14, 2, target               # encoding: [0x41,0xc2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bta- 2, target
+# CHECK: bclr 14, 2, 0                   # encoding: [0x4d,0xc2,0x00,0x20]
+         btlr- 2
+# CHECK: bcctr 14, 2, 0                  # encoding: [0x4d,0xc2,0x04,0x20]
+         btctr- 2
+# CHECK: bcl 14, 2, target               # encoding: [0x41,0xc2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         btl- 2, target
+# CHECK: bcla 14, 2, target              # encoding: [0x41,0xc2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         btla- 2, target
+# CHECK: bclrl 14, 2, 0                  # encoding: [0x4d,0xc2,0x00,0x21]
+         btlrl- 2
+# CHECK: bcctrl 14, 2, 0                 # encoding: [0x4d,0xc2,0x04,0x21]
+         btctrl- 2
+
+# CHECK: bc 4, 2, target                 # encoding: [0x40,0x82,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bf 2, target
+# CHECK: bca 4, 2, target                # encoding: [0x40,0x82,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bfa 2, target
+# CHECK: bclr 4, 2, 0                    # encoding: [0x4c,0x82,0x00,0x20]
+         bflr 2
+# CHECK: bcctr 4, 2, 0                   # encoding: [0x4c,0x82,0x04,0x20]
+         bfctr 2
+# CHECK: bcl 4, 2, target                # encoding: [0x40,0x82,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bfl 2, target
+# CHECK: bcla 4, 2, target               # encoding: [0x40,0x82,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bfla 2, target
+# CHECK: bclrl 4, 2, 0                   # encoding: [0x4c,0x82,0x00,0x21]
+         bflrl 2
+# CHECK: bcctrl 4, 2, 0                  # encoding: [0x4c,0x82,0x04,0x21]
+         bfctrl 2
+
+# CHECK: bc 7, 2, target                 # encoding: [0x40,0xe2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bf+ 2, target
+# CHECK: bca 7, 2, target                # encoding: [0x40,0xe2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bfa+ 2, target
+# CHECK: bclr 7, 2, 0                    # encoding: [0x4c,0xe2,0x00,0x20]
+         bflr+ 2
+# CHECK: bcctr 7, 2, 0                   # encoding: [0x4c,0xe2,0x04,0x20]
+         bfctr+ 2
+# CHECK: bcl 7, 2, target                # encoding: [0x40,0xe2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bfl+ 2, target
+# CHECK: bcla 7, 2, target               # encoding: [0x40,0xe2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bfla+ 2, target
+# CHECK: bclrl 7, 2, 0                   # encoding: [0x4c,0xe2,0x00,0x21]
+         bflrl+ 2
+# CHECK: bcctrl 7, 2, 0                  # encoding: [0x4c,0xe2,0x04,0x21]
+         bfctrl+ 2
+
+# CHECK: bc 6, 2, target                 # encoding: [0x40,0xc2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bf- 2, target
+# CHECK: bca 6, 2, target                # encoding: [0x40,0xc2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bfa- 2, target
+# CHECK: bclr 6, 2, 0                    # encoding: [0x4c,0xc2,0x00,0x20]
+         bflr- 2
+# CHECK: bcctr 6, 2, 0                   # encoding: [0x4c,0xc2,0x04,0x20]
+         bfctr- 2
+# CHECK: bcl 6, 2, target                # encoding: [0x40,0xc2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bfl- 2, target
+# CHECK: bcla 6, 2, target               # encoding: [0x40,0xc2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bfla- 2, target
+# CHECK: bclrl 6, 2, 0                   # encoding: [0x4c,0xc2,0x00,0x21]
+         bflrl- 2
+# CHECK: bcctrl 6, 2, 0                  # encoding: [0x4c,0xc2,0x04,0x21]
+         bfctrl- 2
 
 # CHECK: bdnz target                     # encoding: [0x42,0x00,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bdnz target
-# FIXME: bdnza target
+# CHECK: bdnza target                    # encoding: [0x42,0x00,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnza target
 # CHECK: bdnzlr                          # encoding: [0x4e,0x00,0x00,0x20]
          bdnzlr
-# FIXME: bdnzl target
-# FIXME: bdnzla target
-# FIXME: bdnzlrl
-
-# FIXME: bdnzt 2, target
-# FIXME: bdnzta 2, target
-# FIXME: bdnztlr 2
-# FIXME: bdnztl 2, target
-# FIXME: bdnztla 2, target
-# FIXME: bdnztlrl 2
-# FIXME: bdnzf 2, target
-# FIXME: bdnzfa 2, target
-# FIXME: bdnzflr 2
-# FIXME: bdnzfl 2, target
-# FIXME: bdnzfla 2, target
-# FIXME: bdnzflrl 2
+# CHECK: bdnzl target                    # encoding: [0x42,0x00,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnzl target
+# CHECK: bdnzla target                   # encoding: [0x42,0x00,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnzla target
+# CHECK: bdnzlrl                         # encoding: [0x4e,0x00,0x00,0x21]
+         bdnzlrl
+
+# CHECK: bdnz+ target                    # encoding: [0x43,0x20,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnz+ target
+# CHECK: bdnza+ target                   # encoding: [0x43,0x20,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnza+ target
+# CHECK: bdnzlr+                         # encoding: [0x4f,0x20,0x00,0x20]
+         bdnzlr+
+# CHECK: bdnzl+ target                   # encoding: [0x43,0x20,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnzl+ target
+# CHECK: bdnzla+ target                  # encoding: [0x43,0x20,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnzla+ target
+# CHECK: bdnzlrl+                        # encoding: [0x4f,0x20,0x00,0x21]
+         bdnzlrl+
+
+# CHECK: bdnz- target                    # encoding: [0x43,0x00,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnz- target
+# CHECK: bdnza- target                   # encoding: [0x43,0x00,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnza- target
+# CHECK: bdnzlr-                         # encoding: [0x4f,0x00,0x00,0x20]
+         bdnzlr-
+# CHECK: bdnzl- target                   # encoding: [0x43,0x00,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnzl- target
+# CHECK: bdnzla- target                  # encoding: [0x43,0x00,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnzla- target
+# CHECK: bdnzlrl-                        # encoding: [0x4f,0x00,0x00,0x21]
+         bdnzlrl-
+
+# CHECK: bc 8, 2, target                 # encoding: [0x41,0x02,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnzt 2, target
+# CHECK: bca 8, 2, target                # encoding: [0x41,0x02,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnzta 2, target
+# CHECK: bclr 8, 2, 0                    # encoding: [0x4d,0x02,0x00,0x20]
+         bdnztlr 2
+# CHECK: bcl 8, 2, target                # encoding: [0x41,0x02,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnztl 2, target
+# CHECK: bcla 8, 2, target               # encoding: [0x41,0x02,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnztla 2, target
+# CHECK: bclrl 8, 2, 0                   # encoding: [0x4d,0x02,0x00,0x21]
+         bdnztlrl 2
+
+# CHECK: bc 0, 2, target                 # encoding: [0x40,0x02,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnzf 2, target
+# CHECK: bca 0, 2, target                # encoding: [0x40,0x02,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnzfa 2, target
+# CHECK: bclr 0, 2, 0                    # encoding: [0x4c,0x02,0x00,0x20]
+         bdnzflr 2
+# CHECK: bcl 0, 2, target                # encoding: [0x40,0x02,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnzfl 2, target
+# CHECK: bcla 0, 2, target               # encoding: [0x40,0x02,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdnzfla 2, target
+# CHECK: bclrl 0, 2, 0                   # encoding: [0x4c,0x02,0x00,0x21]
+         bdnzflrl 2
 
 # CHECK: bdz target                      # encoding: [0x42,0x40,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bdz target
-# FIXME: bdza target
+# CHECK: bdza target                     # encoding: [0x42,0x40,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdza target
 # CHECK: bdzlr                           # encoding: [0x4e,0x40,0x00,0x20]
          bdzlr
-# FIXME: bdzl target
-# FIXME: bdzla target
-
-# FIXME: bdzlrl
-# FIXME: bdzt 2, target
-# FIXME: bdzta 2, target
-# FIXME: bdztlr 2
-# FIXME: bdztl 2, target
-# FIXME: bdztla 2, target
-# FIXME: bdztlrl 2
-# FIXME: bdzf 2, target
-# FIXME: bdzfa 2, target
-# FIXME: bdzflr 2
-# FIXME: bdzfl 2, target
-# FIXME: bdzfla 2, target
-# FIXME: bdzflrl 2
+# CHECK: bdzl target                     # encoding: [0x42,0x40,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdzl target
+# CHECK: bdzla target                    # encoding: [0x42,0x40,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdzla target
+# CHECK: bdzlrl                          # encoding: [0x4e,0x40,0x00,0x21]
+         bdzlrl
+
+# CHECK: bdz+ target                     # encoding: [0x43,0x60,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdz+ target
+# CHECK: bdza+ target                    # encoding: [0x43,0x60,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdza+ target
+# CHECK: bdzlr+                          # encoding: [0x4f,0x60,0x00,0x20]
+         bdzlr+
+# CHECK: bdzl+ target                    # encoding: [0x43,0x60,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdzl+ target
+# CHECK: bdzla+ target                   # encoding: [0x43,0x60,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdzla+ target
+# CHECK: bdzlrl+                         # encoding: [0x4f,0x60,0x00,0x21]
+         bdzlrl+
+
+# CHECK: bdz- target                     # encoding: [0x43,0x40,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdz- target
+# CHECK: bdza- target                    # encoding: [0x43,0x40,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdza- target
+# CHECK: bdzlr-                          # encoding: [0x4f,0x40,0x00,0x20]
+         bdzlr-
+# CHECK: bdzl- target                    # encoding: [0x43,0x40,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdzl- target
+# CHECK: bdzla- target                   # encoding: [0x43,0x40,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdzla- target
+# CHECK: bdzlrl-                         # encoding: [0x4f,0x40,0x00,0x21]
+         bdzlrl-
+
+# CHECK: bc 10, 2, target                # encoding: [0x41,0x42,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdzt 2, target
+# CHECK: bca 10, 2, target               # encoding: [0x41,0x42,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdzta 2, target
+# CHECK: bclr 10, 2, 0                   # encoding: [0x4d,0x42,0x00,0x20]
+         bdztlr 2
+# CHECK: bcl 10, 2, target               # encoding: [0x41,0x42,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdztl 2, target
+# CHECK: bcla 10, 2, target              # encoding: [0x41,0x42,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdztla 2, target
+# CHECK: bclrl 10, 2, 0                  # encoding: [0x4d,0x42,0x00,0x21]
+         bdztlrl 2
+
+# CHECK: bc 2, 2, target                 # encoding: [0x40,0x42,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdzf 2, target
+# CHECK: bca 2, 2, target                # encoding: [0x40,0x42,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdzfa 2, target
+# CHECK: bclr 2, 2, 0                    # encoding: [0x4c,0x42,0x00,0x20]
+         bdzflr 2
+# CHECK: bcl 2, 2, target                # encoding: [0x40,0x42,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdzfl 2, target
+# CHECK: bcla 2, 2, target               # encoding: [0x40,0x42,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bdzfla 2, target
+# CHECK: bclrl 2, 2, 0                   # encoding: [0x4c,0x42,0x00,0x21]
+         bdzflrl 2
 
 # CHECK: blt 2, target                   # encoding: [0x41,0x88,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          blt 2, target
-# FIXME: blta 2, target
+# CHECK: blt 0, target                   # encoding: [0x41,0x80,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blt target
+# CHECK: blta 2, target                  # encoding: [0x41,0x88,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blta 2, target
+# CHECK: blta 0, target                  # encoding: [0x41,0x80,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blta target
 # CHECK: bltlr 2                         # encoding: [0x4d,0x88,0x00,0x20]
          bltlr 2
+# CHECK: bltlr 0                         # encoding: [0x4d,0x80,0x00,0x20]
+         bltlr
 # CHECK: bltctr 2                        # encoding: [0x4d,0x88,0x04,0x20]
          bltctr 2
-# FIXME: bltl 2, target
-# FIXME: bltla 2, target
-# FIXME: bltlrl 2
+# CHECK: bltctr 0                        # encoding: [0x4d,0x80,0x04,0x20]
+         bltctr
+# CHECK: bltl 2, target                  # encoding: [0x41,0x88,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bltl 2, target
+# CHECK: bltl 0, target                  # encoding: [0x41,0x80,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bltl target
+# CHECK: bltla 2, target                 # encoding: [0x41,0x88,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bltla 2, target
+# CHECK: bltla 0, target                 # encoding: [0x41,0x80,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bltla target
+# CHECK: bltlrl 2                        # encoding: [0x4d,0x88,0x00,0x21]
+         bltlrl 2
+# CHECK: bltlrl 0                        # encoding: [0x4d,0x80,0x00,0x21]
+         bltlrl
 # CHECK: bltctrl 2                       # encoding: [0x4d,0x88,0x04,0x21]
          bltctrl 2
+# CHECK: bltctrl 0                       # encoding: [0x4d,0x80,0x04,0x21]
+         bltctrl
+
+# CHECK: blt+ 2, target                  # encoding: [0x41,0xe8,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blt+ 2, target
+# CHECK: blt+ 0, target                  # encoding: [0x41,0xe0,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blt+ target
+# CHECK: blta+ 2, target                 # encoding: [0x41,0xe8,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blta+ 2, target
+# CHECK: blta+ 0, target                 # encoding: [0x41,0xe0,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blta+ target
+# CHECK: bltlr+ 2                        # encoding: [0x4d,0xe8,0x00,0x20]
+         bltlr+ 2
+# CHECK: bltlr+ 0                        # encoding: [0x4d,0xe0,0x00,0x20]
+         bltlr+
+# CHECK: bltctr+ 2                       # encoding: [0x4d,0xe8,0x04,0x20]
+         bltctr+ 2
+# CHECK: bltctr+ 0                       # encoding: [0x4d,0xe0,0x04,0x20]
+         bltctr+
+# CHECK: bltl+ 2, target                 # encoding: [0x41,0xe8,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bltl+ 2, target
+# CHECK: bltl+ 0, target                 # encoding: [0x41,0xe0,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bltl+ target
+# CHECK: bltla+ 2, target                # encoding: [0x41,0xe8,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bltla+ 2, target
+# CHECK: bltla+ 0, target                # encoding: [0x41,0xe0,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bltla+ target
+# CHECK: bltlrl+ 2                       # encoding: [0x4d,0xe8,0x00,0x21]
+         bltlrl+ 2
+# CHECK: bltlrl+ 0                       # encoding: [0x4d,0xe0,0x00,0x21]
+         bltlrl+
+# CHECK: bltctrl+ 2                      # encoding: [0x4d,0xe8,0x04,0x21]
+         bltctrl+ 2
+# CHECK: bltctrl+ 0                      # encoding: [0x4d,0xe0,0x04,0x21]
+         bltctrl+
+
+# CHECK: blt- 2, target                  # encoding: [0x41,0xc8,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blt- 2, target
+# CHECK: blt- 0, target                  # encoding: [0x41,0xc0,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blt- target
+# CHECK: blta- 2, target                 # encoding: [0x41,0xc8,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blta- 2, target
+# CHECK: blta- 0, target                 # encoding: [0x41,0xc0,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blta- target
+# CHECK: bltlr- 2                        # encoding: [0x4d,0xc8,0x00,0x20]
+         bltlr- 2
+# CHECK: bltlr- 0                        # encoding: [0x4d,0xc0,0x00,0x20]
+         bltlr-
+# CHECK: bltctr- 2                       # encoding: [0x4d,0xc8,0x04,0x20]
+         bltctr- 2
+# CHECK: bltctr- 0                       # encoding: [0x4d,0xc0,0x04,0x20]
+         bltctr-
+# CHECK: bltl- 2, target                 # encoding: [0x41,0xc8,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bltl- 2, target
+# CHECK: bltl- 0, target                 # encoding: [0x41,0xc0,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bltl- target
+# CHECK: bltla- 2, target                # encoding: [0x41,0xc8,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bltla- 2, target
+# CHECK: bltla- 0, target                # encoding: [0x41,0xc0,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bltla- target
+# CHECK: bltlrl- 2                       # encoding: [0x4d,0xc8,0x00,0x21]
+         bltlrl- 2
+# CHECK: bltlrl- 0                       # encoding: [0x4d,0xc0,0x00,0x21]
+         bltlrl-
+# CHECK: bltctrl- 2                      # encoding: [0x4d,0xc8,0x04,0x21]
+         bltctrl- 2
+# CHECK: bltctrl- 0                      # encoding: [0x4d,0xc0,0x04,0x21]
+         bltctrl-
 
 # CHECK: ble 2, target                   # encoding: [0x40,0x89,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          ble 2, target
-# FIXME: blea 2, target
+# CHECK: ble 0, target                   # encoding: [0x40,0x81,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         ble target
+# CHECK: blea 2, target                  # encoding: [0x40,0x89,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blea 2, target
+# CHECK: blea 0, target                  # encoding: [0x40,0x81,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blea target
 # CHECK: blelr 2                         # encoding: [0x4c,0x89,0x00,0x20]
          blelr 2
+# CHECK: blelr 0                         # encoding: [0x4c,0x81,0x00,0x20]
+         blelr
 # CHECK: blectr 2                        # encoding: [0x4c,0x89,0x04,0x20]
          blectr 2
-# FIXME: blel 2, target
-# FIXME: blela 2, target
-# FIXME: blelrl 2
+# CHECK: blectr 0                        # encoding: [0x4c,0x81,0x04,0x20]
+         blectr
+# CHECK: blel 2, target                  # encoding: [0x40,0x89,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blel 2, target
+# CHECK: blel 0, target                  # encoding: [0x40,0x81,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blel target
+# CHECK: blela 2, target                 # encoding: [0x40,0x89,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blela 2, target
+# CHECK: blela 0, target                 # encoding: [0x40,0x81,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blela target
+# CHECK: blelrl 2                        # encoding: [0x4c,0x89,0x00,0x21]
+         blelrl 2
+# CHECK: blelrl 0                        # encoding: [0x4c,0x81,0x00,0x21]
+         blelrl
 # CHECK: blectrl 2                       # encoding: [0x4c,0x89,0x04,0x21]
          blectrl 2
+# CHECK: blectrl 0                       # encoding: [0x4c,0x81,0x04,0x21]
+         blectrl
+
+# CHECK: ble+ 2, target                  # encoding: [0x40,0xe9,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         ble+ 2, target
+# CHECK: ble+ 0, target                  # encoding: [0x40,0xe1,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         ble+ target
+# CHECK: blea+ 2, target                 # encoding: [0x40,0xe9,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blea+ 2, target
+# CHECK: blea+ 0, target                 # encoding: [0x40,0xe1,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blea+ target
+# CHECK: blelr+ 2                        # encoding: [0x4c,0xe9,0x00,0x20]
+         blelr+ 2
+# CHECK: blelr+ 0                        # encoding: [0x4c,0xe1,0x00,0x20]
+         blelr+
+# CHECK: blectr+ 2                       # encoding: [0x4c,0xe9,0x04,0x20]
+         blectr+ 2
+# CHECK: blectr+ 0                       # encoding: [0x4c,0xe1,0x04,0x20]
+         blectr+
+# CHECK: blel+ 2, target                 # encoding: [0x40,0xe9,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blel+ 2, target
+# CHECK: blel+ 0, target                 # encoding: [0x40,0xe1,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blel+ target
+# CHECK: blela+ 2, target                # encoding: [0x40,0xe9,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blela+ 2, target
+# CHECK: blela+ 0, target                # encoding: [0x40,0xe1,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blela+ target
+# CHECK: blelrl+ 2                       # encoding: [0x4c,0xe9,0x00,0x21]
+         blelrl+ 2
+# CHECK: blelrl+ 0                       # encoding: [0x4c,0xe1,0x00,0x21]
+         blelrl+
+# CHECK: blectrl+ 2                      # encoding: [0x4c,0xe9,0x04,0x21]
+         blectrl+ 2
+# CHECK: blectrl+ 0                      # encoding: [0x4c,0xe1,0x04,0x21]
+         blectrl+
+
+# CHECK: ble- 2, target                  # encoding: [0x40,0xc9,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         ble- 2, target
+# CHECK: ble- 0, target                  # encoding: [0x40,0xc1,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         ble- target
+# CHECK: blea- 2, target                 # encoding: [0x40,0xc9,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blea- 2, target
+# CHECK: blea- 0, target                 # encoding: [0x40,0xc1,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blea- target
+# CHECK: blelr- 2                        # encoding: [0x4c,0xc9,0x00,0x20]
+         blelr- 2
+# CHECK: blelr- 0                        # encoding: [0x4c,0xc1,0x00,0x20]
+         blelr-
+# CHECK: blectr- 2                       # encoding: [0x4c,0xc9,0x04,0x20]
+         blectr- 2
+# CHECK: blectr- 0                       # encoding: [0x4c,0xc1,0x04,0x20]
+         blectr-
+# CHECK: blel- 2, target                 # encoding: [0x40,0xc9,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blel- 2, target
+# CHECK: blel- 0, target                 # encoding: [0x40,0xc1,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blel- target
+# CHECK: blela- 2, target                # encoding: [0x40,0xc9,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blela- 2, target
+# CHECK: blela- 0, target                # encoding: [0x40,0xc1,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         blela- target
+# CHECK: blelrl- 2                       # encoding: [0x4c,0xc9,0x00,0x21]
+         blelrl- 2
+# CHECK: blelrl- 0                       # encoding: [0x4c,0xc1,0x00,0x21]
+         blelrl-
+# CHECK: blectrl- 2                      # encoding: [0x4c,0xc9,0x04,0x21]
+         blectrl- 2
+# CHECK: blectrl- 0                      # encoding: [0x4c,0xc1,0x04,0x21]
+         blectrl-
 
 # CHECK: beq 2, target                   # encoding: [0x41,0x8a,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          beq 2, target
-# FIXME: beqa 2, target
+# CHECK: beq 0, target                   # encoding: [0x41,0x82,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beq target
+# CHECK: beqa 2, target                  # encoding: [0x41,0x8a,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqa 2, target
+# CHECK: beqa 0, target                  # encoding: [0x41,0x82,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqa target
 # CHECK: beqlr 2                         # encoding: [0x4d,0x8a,0x00,0x20]
          beqlr 2
+# CHECK: beqlr 0                         # encoding: [0x4d,0x82,0x00,0x20]
+         beqlr
 # CHECK: beqctr 2                        # encoding: [0x4d,0x8a,0x04,0x20]
          beqctr 2
-# FIXME: beql 2, target
-# FIXME: beqla 2, target
-# FIXME: beqlrl 2
+# CHECK: beqctr 0                        # encoding: [0x4d,0x82,0x04,0x20]
+         beqctr
+# CHECK: beql 2, target                  # encoding: [0x41,0x8a,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beql 2, target
+# CHECK: beql 0, target                  # encoding: [0x41,0x82,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beql target
+# CHECK: beqla 2, target                 # encoding: [0x41,0x8a,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqla 2, target
+# CHECK: beqla 0, target                 # encoding: [0x41,0x82,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqla target
+# CHECK: beqlrl 2                        # encoding: [0x4d,0x8a,0x00,0x21]
+         beqlrl 2
+# CHECK: beqlrl 0                        # encoding: [0x4d,0x82,0x00,0x21]
+         beqlrl
 # CHECK: beqctrl 2                       # encoding: [0x4d,0x8a,0x04,0x21]
          beqctrl 2
+# CHECK: beqctrl 0                       # encoding: [0x4d,0x82,0x04,0x21]
+         beqctrl
+
+# CHECK: beq+ 2, target                  # encoding: [0x41,0xea,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beq+ 2, target
+# CHECK: beq+ 0, target                  # encoding: [0x41,0xe2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beq+ target
+# CHECK: beqa+ 2, target                 # encoding: [0x41,0xea,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqa+ 2, target
+# CHECK: beqa+ 0, target                 # encoding: [0x41,0xe2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqa+ target
+# CHECK: beqlr+ 2                        # encoding: [0x4d,0xea,0x00,0x20]
+         beqlr+ 2
+# CHECK: beqlr+ 0                        # encoding: [0x4d,0xe2,0x00,0x20]
+         beqlr+
+# CHECK: beqctr+ 2                       # encoding: [0x4d,0xea,0x04,0x20]
+         beqctr+ 2
+# CHECK: beqctr+ 0                       # encoding: [0x4d,0xe2,0x04,0x20]
+         beqctr+
+# CHECK: beql+ 2, target                 # encoding: [0x41,0xea,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beql+ 2, target
+# CHECK: beql+ 0, target                 # encoding: [0x41,0xe2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beql+ target
+# CHECK: beqla+ 2, target                # encoding: [0x41,0xea,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqla+ 2, target
+# CHECK: beqla+ 0, target                # encoding: [0x41,0xe2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqla+ target
+# CHECK: beqlrl+ 2                       # encoding: [0x4d,0xea,0x00,0x21]
+         beqlrl+ 2
+# CHECK: beqlrl+ 0                       # encoding: [0x4d,0xe2,0x00,0x21]
+         beqlrl+
+# CHECK: beqctrl+ 2                      # encoding: [0x4d,0xea,0x04,0x21]
+         beqctrl+ 2
+# CHECK: beqctrl+ 0                      # encoding: [0x4d,0xe2,0x04,0x21]
+         beqctrl+
+
+# CHECK: beq- 2, target                  # encoding: [0x41,0xca,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beq- 2, target
+# CHECK: beq- 0, target                  # encoding: [0x41,0xc2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beq- target
+# CHECK: beqa- 2, target                 # encoding: [0x41,0xca,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqa- 2, target
+# CHECK: beqa- 0, target                 # encoding: [0x41,0xc2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqa- target
+# CHECK: beqlr- 2                        # encoding: [0x4d,0xca,0x00,0x20]
+         beqlr- 2
+# CHECK: beqlr- 0                        # encoding: [0x4d,0xc2,0x00,0x20]
+         beqlr-
+# CHECK: beqctr- 2                       # encoding: [0x4d,0xca,0x04,0x20]
+         beqctr- 2
+# CHECK: beqctr- 0                       # encoding: [0x4d,0xc2,0x04,0x20]
+         beqctr-
+# CHECK: beql- 2, target                 # encoding: [0x41,0xca,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beql- 2, target
+# CHECK: beql- 0, target                 # encoding: [0x41,0xc2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beql- target
+# CHECK: beqla- 2, target                # encoding: [0x41,0xca,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqla- 2, target
+# CHECK: beqla- 0, target                # encoding: [0x41,0xc2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         beqla- target
+# CHECK: beqlrl- 2                       # encoding: [0x4d,0xca,0x00,0x21]
+         beqlrl- 2
+# CHECK: beqlrl- 0                       # encoding: [0x4d,0xc2,0x00,0x21]
+         beqlrl-
+# CHECK: beqctrl- 2                      # encoding: [0x4d,0xca,0x04,0x21]
+         beqctrl- 2
+# CHECK: beqctrl- 0                      # encoding: [0x4d,0xc2,0x04,0x21]
+         beqctrl-
 
 # CHECK: bge 2, target                   # encoding: [0x40,0x88,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bge 2, target
-# FIXME: bgea 2, target
+# CHECK: bge 0, target                   # encoding: [0x40,0x80,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bge target
+# CHECK: bgea 2, target                  # encoding: [0x40,0x88,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgea 2, target
+# CHECK: bgea 0, target                  # encoding: [0x40,0x80,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgea target
 # CHECK: bgelr 2                         # encoding: [0x4c,0x88,0x00,0x20]
          bgelr 2
+# CHECK: bgelr 0                         # encoding: [0x4c,0x80,0x00,0x20]
+         bgelr
 # CHECK: bgectr 2                        # encoding: [0x4c,0x88,0x04,0x20]
          bgectr 2
-# FIXME: bgel 2, target
-# FIXME: bgela 2, target
-# FIXME: bgelrl 2
+# CHECK: bgectr 0                        # encoding: [0x4c,0x80,0x04,0x20]
+         bgectr
+# CHECK: bgel 2, target                  # encoding: [0x40,0x88,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgel 2, target
+# CHECK: bgel 0, target                  # encoding: [0x40,0x80,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgel target
+# CHECK: bgela 2, target                 # encoding: [0x40,0x88,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgela 2, target
+# CHECK: bgela 0, target                 # encoding: [0x40,0x80,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgela target
+# CHECK: bgelrl 2                        # encoding: [0x4c,0x88,0x00,0x21]
+         bgelrl 2
+# CHECK: bgelrl 0                        # encoding: [0x4c,0x80,0x00,0x21]
+         bgelrl
 # CHECK: bgectrl 2                       # encoding: [0x4c,0x88,0x04,0x21]
          bgectrl 2
+# CHECK: bgectrl 0                       # encoding: [0x4c,0x80,0x04,0x21]
+         bgectrl
+
+# CHECK: bge+ 2, target                   # encoding: [0x40,0xe8,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bge+ 2, target
+# CHECK: bge+ 0, target                   # encoding: [0x40,0xe0,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bge+ target
+# CHECK: bgea+ 2, target                  # encoding: [0x40,0xe8,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgea+ 2, target
+# CHECK: bgea+ 0, target                  # encoding: [0x40,0xe0,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgea+ target
+# CHECK: bgelr+ 2                         # encoding: [0x4c,0xe8,0x00,0x20]
+         bgelr+ 2
+# CHECK: bgelr+ 0                         # encoding: [0x4c,0xe0,0x00,0x20]
+         bgelr+
+# CHECK: bgectr+ 2                        # encoding: [0x4c,0xe8,0x04,0x20]
+         bgectr+ 2
+# CHECK: bgectr+ 0                        # encoding: [0x4c,0xe0,0x04,0x20]
+         bgectr+
+# CHECK: bgel+ 2, target                  # encoding: [0x40,0xe8,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgel+ 2, target
+# CHECK: bgel+ 0, target                  # encoding: [0x40,0xe0,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgel+ target
+# CHECK: bgela+ 2, target                 # encoding: [0x40,0xe8,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgela+ 2, target
+# CHECK: bgela+ 0, target                 # encoding: [0x40,0xe0,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgela+ target
+# CHECK: bgelrl+ 2                        # encoding: [0x4c,0xe8,0x00,0x21]
+         bgelrl+ 2
+# CHECK: bgelrl+ 0                        # encoding: [0x4c,0xe0,0x00,0x21]
+         bgelrl+
+# CHECK: bgectrl+ 2                       # encoding: [0x4c,0xe8,0x04,0x21]
+         bgectrl+ 2
+# CHECK: bgectrl+ 0                       # encoding: [0x4c,0xe0,0x04,0x21]
+         bgectrl+
+
+# CHECK: bge- 2, target                   # encoding: [0x40,0xc8,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bge- 2, target
+# CHECK: bge- 0, target                   # encoding: [0x40,0xc0,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bge- target
+# CHECK: bgea- 2, target                  # encoding: [0x40,0xc8,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgea- 2, target
+# CHECK: bgea- 0, target                  # encoding: [0x40,0xc0,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgea- target
+# CHECK: bgelr- 2                         # encoding: [0x4c,0xc8,0x00,0x20]
+         bgelr- 2
+# CHECK: bgelr- 0                         # encoding: [0x4c,0xc0,0x00,0x20]
+         bgelr-
+# CHECK: bgectr- 2                        # encoding: [0x4c,0xc8,0x04,0x20]
+         bgectr- 2
+# CHECK: bgectr- 0                        # encoding: [0x4c,0xc0,0x04,0x20]
+         bgectr-
+# CHECK: bgel- 2, target                  # encoding: [0x40,0xc8,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgel- 2, target
+# CHECK: bgel- 0, target                  # encoding: [0x40,0xc0,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgel- target
+# CHECK: bgela- 2, target                 # encoding: [0x40,0xc8,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgela- 2, target
+# CHECK: bgela- 0, target                 # encoding: [0x40,0xc0,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgela- target
+# CHECK: bgelrl- 2                        # encoding: [0x4c,0xc8,0x00,0x21]
+         bgelrl- 2
+# CHECK: bgelrl- 0                        # encoding: [0x4c,0xc0,0x00,0x21]
+         bgelrl-
+# CHECK: bgectrl- 2                       # encoding: [0x4c,0xc8,0x04,0x21]
+         bgectrl- 2
+# CHECK: bgectrl- 0                       # encoding: [0x4c,0xc0,0x04,0x21]
+         bgectrl-
 
 # CHECK: bgt 2, target                   # encoding: [0x41,0x89,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bgt 2, target
-# FIXME: bgta 2, target
+# CHECK: bgt 0, target                   # encoding: [0x41,0x81,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgt target
+# CHECK: bgta 2, target                  # encoding: [0x41,0x89,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgta 2, target
+# CHECK: bgta 0, target                  # encoding: [0x41,0x81,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgta target
 # CHECK: bgtlr 2                         # encoding: [0x4d,0x89,0x00,0x20]
          bgtlr 2
+# CHECK: bgtlr 0                         # encoding: [0x4d,0x81,0x00,0x20]
+         bgtlr
 # CHECK: bgtctr 2                        # encoding: [0x4d,0x89,0x04,0x20]
          bgtctr 2
-# FIXME: bgtl 2, target
-# FIXME: bgtla 2, target
-# FIXME: bgtlrl 2
+# CHECK: bgtctr 0                        # encoding: [0x4d,0x81,0x04,0x20]
+         bgtctr
+# CHECK: bgtl 2, target                  # encoding: [0x41,0x89,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgtl 2, target
+# CHECK: bgtl 0, target                  # encoding: [0x41,0x81,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgtl target
+# CHECK: bgtla 2, target                 # encoding: [0x41,0x89,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgtla 2, target
+# CHECK: bgtla 0, target                 # encoding: [0x41,0x81,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgtla target
+# CHECK: bgtlrl 2                        # encoding: [0x4d,0x89,0x00,0x21]
+         bgtlrl 2
+# CHECK: bgtlrl 0                        # encoding: [0x4d,0x81,0x00,0x21]
+         bgtlrl
 # CHECK: bgtctrl 2                       # encoding: [0x4d,0x89,0x04,0x21]
          bgtctrl 2
+# CHECK: bgtctrl 0                       # encoding: [0x4d,0x81,0x04,0x21]
+         bgtctrl
+
+# CHECK: bgt+ 2, target                  # encoding: [0x41,0xe9,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgt+ 2, target
+# CHECK: bgt+ 0, target                  # encoding: [0x41,0xe1,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgt+ target
+# CHECK: bgta+ 2, target                 # encoding: [0x41,0xe9,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgta+ 2, target
+# CHECK: bgta+ 0, target                 # encoding: [0x41,0xe1,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgta+ target
+# CHECK: bgtlr+ 2                        # encoding: [0x4d,0xe9,0x00,0x20]
+         bgtlr+ 2
+# CHECK: bgtlr+ 0                        # encoding: [0x4d,0xe1,0x00,0x20]
+         bgtlr+
+# CHECK: bgtctr+ 2                       # encoding: [0x4d,0xe9,0x04,0x20]
+         bgtctr+ 2
+# CHECK: bgtctr+ 0                       # encoding: [0x4d,0xe1,0x04,0x20]
+         bgtctr+
+# CHECK: bgtl+ 2, target                 # encoding: [0x41,0xe9,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgtl+ 2, target
+# CHECK: bgtl+ 0, target                 # encoding: [0x41,0xe1,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgtl+ target
+# CHECK: bgtla+ 2, target                # encoding: [0x41,0xe9,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgtla+ 2, target
+# CHECK: bgtla+ 0, target                # encoding: [0x41,0xe1,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgtla+ target
+# CHECK: bgtlrl+ 2                       # encoding: [0x4d,0xe9,0x00,0x21]
+         bgtlrl+ 2
+# CHECK: bgtlrl+ 0                       # encoding: [0x4d,0xe1,0x00,0x21]
+         bgtlrl+
+# CHECK: bgtctrl+ 2                      # encoding: [0x4d,0xe9,0x04,0x21]
+         bgtctrl+ 2
+# CHECK: bgtctrl+ 0                      # encoding: [0x4d,0xe1,0x04,0x21]
+         bgtctrl+
+
+# CHECK: bgt- 2, target                  # encoding: [0x41,0xc9,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgt- 2, target
+# CHECK: bgt- 0, target                  # encoding: [0x41,0xc1,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgt- target
+# CHECK: bgta- 2, target                 # encoding: [0x41,0xc9,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgta- 2, target
+# CHECK: bgta- 0, target                 # encoding: [0x41,0xc1,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgta- target
+# CHECK: bgtlr- 2                        # encoding: [0x4d,0xc9,0x00,0x20]
+         bgtlr- 2
+# CHECK: bgtlr- 0                        # encoding: [0x4d,0xc1,0x00,0x20]
+         bgtlr-
+# CHECK: bgtctr- 2                       # encoding: [0x4d,0xc9,0x04,0x20]
+         bgtctr- 2
+# CHECK: bgtctr- 0                       # encoding: [0x4d,0xc1,0x04,0x20]
+         bgtctr-
+# CHECK: bgtl- 2, target                 # encoding: [0x41,0xc9,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgtl- 2, target
+# CHECK: bgtl- 0, target                 # encoding: [0x41,0xc1,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgtl- target
+# CHECK: bgtla- 2, target                # encoding: [0x41,0xc9,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgtla- 2, target
+# CHECK: bgtla- 0, target                # encoding: [0x41,0xc1,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bgtla- target
+# CHECK: bgtlrl- 2                       # encoding: [0x4d,0xc9,0x00,0x21]
+         bgtlrl- 2
+# CHECK: bgtlrl- 0                       # encoding: [0x4d,0xc1,0x00,0x21]
+         bgtlrl-
+# CHECK: bgtctrl- 2                      # encoding: [0x4d,0xc9,0x04,0x21]
+         bgtctrl- 2
+# CHECK: bgtctrl- 0                      # encoding: [0x4d,0xc1,0x04,0x21]
+         bgtctrl-
 
 # CHECK: bge 2, target                   # encoding: [0x40,0x88,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bnl 2, target
-# FIXME: bnla 2, target
+# CHECK: bge 0, target                   # encoding: [0x40,0x80,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnl target
+# CHECK: bgea 2, target                  # encoding: [0x40,0x88,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnla 2, target
+# CHECK: bgea 0, target                  # encoding: [0x40,0x80,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnla target
 # CHECK: bgelr 2                         # encoding: [0x4c,0x88,0x00,0x20]
          bnllr 2
+# CHECK: bgelr 0                         # encoding: [0x4c,0x80,0x00,0x20]
+         bnllr
 # CHECK: bgectr 2                        # encoding: [0x4c,0x88,0x04,0x20]
          bnlctr 2
-# FIXME: bnll 2, target
-# FIXME: bnlla 2, target
-# FIXME: bnllrl 2
+# CHECK: bgectr 0                        # encoding: [0x4c,0x80,0x04,0x20]
+         bnlctr
+# CHECK: bgel 2, target                  # encoding: [0x40,0x88,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnll 2, target
+# CHECK: bgel 0, target                  # encoding: [0x40,0x80,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnll target
+# CHECK: bgela 2, target                  # encoding: [0x40,0x88,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnlla 2, target
+# CHECK: bgela 0, target                  # encoding: [0x40,0x80,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnlla target
+# CHECK: bgelrl 2                        # encoding: [0x4c,0x88,0x00,0x21]
+         bnllrl 2
+# CHECK: bgelrl 0                        # encoding: [0x4c,0x80,0x00,0x21]
+         bnllrl
 # CHECK: bgectrl 2                       # encoding: [0x4c,0x88,0x04,0x21]
          bnlctrl 2
+# CHECK: bgectrl 0                       # encoding: [0x4c,0x80,0x04,0x21]
+         bnlctrl
+
+# CHECK: bge+ 2, target                  # encoding: [0x40,0xe8,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnl+ 2, target
+# CHECK: bge+ 0, target                  # encoding: [0x40,0xe0,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnl+ target
+# CHECK: bgea+ 2, target                 # encoding: [0x40,0xe8,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnla+ 2, target
+# CHECK: bgea+ 0, target                 # encoding: [0x40,0xe0,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnla+ target
+# CHECK: bgelr+ 2                        # encoding: [0x4c,0xe8,0x00,0x20]
+         bnllr+ 2
+# CHECK: bgelr+ 0                        # encoding: [0x4c,0xe0,0x00,0x20]
+         bnllr+
+# CHECK: bgectr+ 2                       # encoding: [0x4c,0xe8,0x04,0x20]
+         bnlctr+ 2
+# CHECK: bgectr+ 0                       # encoding: [0x4c,0xe0,0x04,0x20]
+         bnlctr+
+# CHECK: bgel+ 2, target                 # encoding: [0x40,0xe8,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnll+ 2, target
+# CHECK: bgel+ 0, target                 # encoding: [0x40,0xe0,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnll+ target
+# CHECK: bgela+ 2, target                # encoding: [0x40,0xe8,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnlla+ 2, target
+# CHECK: bgela+ 0, target                # encoding: [0x40,0xe0,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnlla+ target
+# CHECK: bgelrl+ 2                       # encoding: [0x4c,0xe8,0x00,0x21]
+         bnllrl+ 2
+# CHECK: bgelrl+ 0                       # encoding: [0x4c,0xe0,0x00,0x21]
+         bnllrl+
+# CHECK: bgectrl+ 2                      # encoding: [0x4c,0xe8,0x04,0x21]
+         bnlctrl+ 2
+# CHECK: bgectrl+ 0                      # encoding: [0x4c,0xe0,0x04,0x21]
+         bnlctrl+
+
+# CHECK: bge- 2, target                  # encoding: [0x40,0xc8,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnl- 2, target
+# CHECK: bge- 0, target                  # encoding: [0x40,0xc0,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnl- target
+# CHECK: bgea- 2, target                 # encoding: [0x40,0xc8,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnla- 2, target
+# CHECK: bgea- 0, target                 # encoding: [0x40,0xc0,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnla- target
+# CHECK: bgelr- 2                        # encoding: [0x4c,0xc8,0x00,0x20]
+         bnllr- 2
+# CHECK: bgelr- 0                        # encoding: [0x4c,0xc0,0x00,0x20]
+         bnllr-
+# CHECK: bgectr- 2                       # encoding: [0x4c,0xc8,0x04,0x20]
+         bnlctr- 2
+# CHECK: bgectr- 0                       # encoding: [0x4c,0xc0,0x04,0x20]
+         bnlctr-
+# CHECK: bgel- 2, target                 # encoding: [0x40,0xc8,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnll- 2, target
+# CHECK: bgel- 0, target                 # encoding: [0x40,0xc0,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnll- target
+# CHECK: bgela- 2, target                # encoding: [0x40,0xc8,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnlla- 2, target
+# CHECK: bgela- 0, target                # encoding: [0x40,0xc0,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnlla- target
+# CHECK: bgelrl- 2                       # encoding: [0x4c,0xc8,0x00,0x21]
+         bnllrl- 2
+# CHECK: bgelrl- 0                       # encoding: [0x4c,0xc0,0x00,0x21]
+         bnllrl-
+# CHECK: bgectrl- 2                      # encoding: [0x4c,0xc8,0x04,0x21]
+         bnlctrl- 2
+# CHECK: bgectrl- 0                      # encoding: [0x4c,0xc0,0x04,0x21]
+         bnlctrl-
 
 # CHECK: bne 2, target                   # encoding: [0x40,0x8a,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bne 2, target
-# FIXME: bnea 2, target
+# CHECK: bne 0, target                   # encoding: [0x40,0x82,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bne target
+# CHECK: bnea 2, target                  # encoding: [0x40,0x8a,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnea 2, target
+# CHECK: bnea 0, target                  # encoding: [0x40,0x82,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnea target
 # CHECK: bnelr 2                         # encoding: [0x4c,0x8a,0x00,0x20]
          bnelr 2
+# CHECK: bnelr 0                         # encoding: [0x4c,0x82,0x00,0x20]
+         bnelr
 # CHECK: bnectr 2                        # encoding: [0x4c,0x8a,0x04,0x20]
          bnectr 2
-# FIXME: bnel 2, target
-# FIXME: bnela 2, target
-# FIXME: bnelrl 2
+# CHECK: bnectr 0                        # encoding: [0x4c,0x82,0x04,0x20]
+         bnectr
+# CHECK: bnel 2, target                  # encoding: [0x40,0x8a,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnel 2, target
+# CHECK: bnel 0, target                  # encoding: [0x40,0x82,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnel target
+# CHECK: bnela 2, target                 # encoding: [0x40,0x8a,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnela 2, target
+# CHECK: bnela 0, target                 # encoding: [0x40,0x82,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnela target
+# CHECK: bnelrl 2                        # encoding: [0x4c,0x8a,0x00,0x21]
+         bnelrl 2
+# CHECK: bnelrl 0                        # encoding: [0x4c,0x82,0x00,0x21]
+         bnelrl
 # CHECK: bnectrl 2                       # encoding: [0x4c,0x8a,0x04,0x21]
          bnectrl 2
+# CHECK: bnectrl 0                       # encoding: [0x4c,0x82,0x04,0x21]
+         bnectrl
+
+# CHECK: bne+ 2, target                  # encoding: [0x40,0xea,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bne+ 2, target
+# CHECK: bne+ 0, target                  # encoding: [0x40,0xe2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bne+ target
+# CHECK: bnea+ 2, target                 # encoding: [0x40,0xea,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnea+ 2, target
+# CHECK: bnea+ 0, target                 # encoding: [0x40,0xe2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnea+ target
+# CHECK: bnelr+ 2                        # encoding: [0x4c,0xea,0x00,0x20]
+         bnelr+ 2
+# CHECK: bnelr+ 0                        # encoding: [0x4c,0xe2,0x00,0x20]
+         bnelr+
+# CHECK: bnectr+ 2                       # encoding: [0x4c,0xea,0x04,0x20]
+         bnectr+ 2
+# CHECK: bnectr+ 0                       # encoding: [0x4c,0xe2,0x04,0x20]
+         bnectr+
+# CHECK: bnel+ 2, target                 # encoding: [0x40,0xea,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnel+ 2, target
+# CHECK: bnel+ 0, target                 # encoding: [0x40,0xe2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnel+ target
+# CHECK: bnela+ 2, target                # encoding: [0x40,0xea,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnela+ 2, target
+# CHECK: bnela+ 0, target                # encoding: [0x40,0xe2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnela+ target
+# CHECK: bnelrl+ 2                       # encoding: [0x4c,0xea,0x00,0x21]
+         bnelrl+ 2
+# CHECK: bnelrl+ 0                       # encoding: [0x4c,0xe2,0x00,0x21]
+         bnelrl+
+# CHECK: bnectrl+ 2                      # encoding: [0x4c,0xea,0x04,0x21]
+         bnectrl+ 2
+# CHECK: bnectrl+ 0                      # encoding: [0x4c,0xe2,0x04,0x21]
+         bnectrl+
+
+# CHECK: bne- 2, target                  # encoding: [0x40,0xca,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bne- 2, target
+# CHECK: bne- 0, target                  # encoding: [0x40,0xc2,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bne- target
+# CHECK: bnea- 2, target                 # encoding: [0x40,0xca,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnea- 2, target
+# CHECK: bnea- 0, target                 # encoding: [0x40,0xc2,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnea- target
+# CHECK: bnelr- 2                        # encoding: [0x4c,0xca,0x00,0x20]
+         bnelr- 2
+# CHECK: bnelr- 0                        # encoding: [0x4c,0xc2,0x00,0x20]
+         bnelr-
+# CHECK: bnectr- 2                       # encoding: [0x4c,0xca,0x04,0x20]
+         bnectr- 2
+# CHECK: bnectr- 0                       # encoding: [0x4c,0xc2,0x04,0x20]
+         bnectr-
+# CHECK: bnel- 2, target                 # encoding: [0x40,0xca,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnel- 2, target
+# CHECK: bnel- 0, target                 # encoding: [0x40,0xc2,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnel- target
+# CHECK: bnela- 2, target                # encoding: [0x40,0xca,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnela- 2, target
+# CHECK: bnela- 0, target                # encoding: [0x40,0xc2,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnela- target
+# CHECK: bnelrl- 2                       # encoding: [0x4c,0xca,0x00,0x21]
+         bnelrl- 2
+# CHECK: bnelrl- 0                       # encoding: [0x4c,0xc2,0x00,0x21]
+         bnelrl-
+# CHECK: bnectrl- 2                      # encoding: [0x4c,0xca,0x04,0x21]
+         bnectrl- 2
+# CHECK: bnectrl- 0                      # encoding: [0x4c,0xc2,0x04,0x21]
+         bnectrl-
 
 # CHECK: ble 2, target                   # encoding: [0x40,0x89,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bng 2, target
-# FIXME: bnga 2, target
+# CHECK: ble 0, target                   # encoding: [0x40,0x81,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bng target
+# CHECK: blea 2, target                  # encoding: [0x40,0x89,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnga 2, target
+# CHECK: blea 0, target                  # encoding: [0x40,0x81,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnga target
 # CHECK: blelr 2                         # encoding: [0x4c,0x89,0x00,0x20]
          bnglr 2
+# CHECK: blelr 0                         # encoding: [0x4c,0x81,0x00,0x20]
+         bnglr
 # CHECK: blectr 2                        # encoding: [0x4c,0x89,0x04,0x20]
          bngctr 2
-# FIXME: bngl 2, target
-# FIXME: bngla 2, target
-# FIXME: bnglrl 2
+# CHECK: blectr 0                        # encoding: [0x4c,0x81,0x04,0x20]
+         bngctr
+# CHECK: blel 2, target                  # encoding: [0x40,0x89,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bngl 2, target
+# CHECK: blel 0, target                  # encoding: [0x40,0x81,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bngl target
+# CHECK: blela 2, target                 # encoding: [0x40,0x89,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bngla 2, target
+# CHECK: blela 0, target                 # encoding: [0x40,0x81,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bngla target
+# CHECK: blelrl 2                        # encoding: [0x4c,0x89,0x00,0x21]
+         bnglrl 2
+# CHECK: blelrl 0                        # encoding: [0x4c,0x81,0x00,0x21]
+         bnglrl
 # CHECK: blectrl 2                       # encoding: [0x4c,0x89,0x04,0x21]
          bngctrl 2
+# CHECK: blectrl 0                       # encoding: [0x4c,0x81,0x04,0x21]
+         bngctrl
+
+# CHECK: ble+ 2, target                   # encoding: [0x40,0xe9,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bng+ 2, target
+# CHECK: ble+ 0, target                   # encoding: [0x40,0xe1,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bng+ target
+# CHECK: blea+ 2, target                  # encoding: [0x40,0xe9,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnga+ 2, target
+# CHECK: blea+ 0, target                  # encoding: [0x40,0xe1,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnga+ target
+# CHECK: blelr+ 2                         # encoding: [0x4c,0xe9,0x00,0x20]
+         bnglr+ 2
+# CHECK: blelr+ 0                         # encoding: [0x4c,0xe1,0x00,0x20]
+         bnglr+
+# CHECK: blectr+ 2                        # encoding: [0x4c,0xe9,0x04,0x20]
+         bngctr+ 2
+# CHECK: blectr+ 0                        # encoding: [0x4c,0xe1,0x04,0x20]
+         bngctr+
+# CHECK: blel+ 2, target                  # encoding: [0x40,0xe9,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bngl+ 2, target
+# CHECK: blel+ 0, target                  # encoding: [0x40,0xe1,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bngl+ target
+# CHECK: blela+ 2, target                 # encoding: [0x40,0xe9,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bngla+ 2, target
+# CHECK: blela+ 0, target                 # encoding: [0x40,0xe1,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bngla+ target
+# CHECK: blelrl+ 2                        # encoding: [0x4c,0xe9,0x00,0x21]
+         bnglrl+ 2
+# CHECK: blelrl+ 0                        # encoding: [0x4c,0xe1,0x00,0x21]
+         bnglrl+
+# CHECK: blectrl+ 2                       # encoding: [0x4c,0xe9,0x04,0x21]
+         bngctrl+ 2
+# CHECK: blectrl+ 0                       # encoding: [0x4c,0xe1,0x04,0x21]
+         bngctrl+
+
+# CHECK: ble- 2, target                   # encoding: [0x40,0xc9,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bng- 2, target
+# CHECK: ble- 0, target                   # encoding: [0x40,0xc1,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bng- target
+# CHECK: blea- 2, target                  # encoding: [0x40,0xc9,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnga- 2, target
+# CHECK: blea- 0, target                  # encoding: [0x40,0xc1,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnga- target
+# CHECK: blelr- 2                         # encoding: [0x4c,0xc9,0x00,0x20]
+         bnglr- 2
+# CHECK: blelr- 0                         # encoding: [0x4c,0xc1,0x00,0x20]
+         bnglr-
+# CHECK: blectr- 2                        # encoding: [0x4c,0xc9,0x04,0x20]
+         bngctr- 2
+# CHECK: blectr- 0                        # encoding: [0x4c,0xc1,0x04,0x20]
+         bngctr-
+# CHECK: blel- 2, target                  # encoding: [0x40,0xc9,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bngl- 2, target
+# CHECK: blel- 0, target                  # encoding: [0x40,0xc1,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bngl- target
+# CHECK: blela- 2, target                 # encoding: [0x40,0xc9,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bngla- 2, target
+# CHECK: blela- 0, target                 # encoding: [0x40,0xc1,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bngla- target
+# CHECK: blelrl- 2                        # encoding: [0x4c,0xc9,0x00,0x21]
+         bnglrl- 2
+# CHECK: blelrl- 0                        # encoding: [0x4c,0xc1,0x00,0x21]
+         bnglrl-
+# CHECK: blectrl- 2                       # encoding: [0x4c,0xc9,0x04,0x21]
+         bngctrl- 2
+# CHECK: blectrl- 0                       # encoding: [0x4c,0xc1,0x04,0x21]
+         bngctrl-
 
 # CHECK: bun 2, target                   # encoding: [0x41,0x8b,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bso 2, target
-# FIXME: bsoa 2, target
+# CHECK: bun 0, target                   # encoding: [0x41,0x83,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bso target
+# CHECK: buna 2, target                  # encoding: [0x41,0x8b,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsoa 2, target
+# CHECK: buna 0, target                  # encoding: [0x41,0x83,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsoa target
 # CHECK: bunlr 2                         # encoding: [0x4d,0x8b,0x00,0x20]
          bsolr 2
+# CHECK: bunlr 0                         # encoding: [0x4d,0x83,0x00,0x20]
+         bsolr
 # CHECK: bunctr 2                        # encoding: [0x4d,0x8b,0x04,0x20]
          bsoctr 2
-# FIXME: bsol 2, target
-# FIXME: bsola 2, target
-# FIXME: bsolrl 2
+# CHECK: bunctr 0                        # encoding: [0x4d,0x83,0x04,0x20]
+         bsoctr
+# CHECK: bunl 2, target                  # encoding: [0x41,0x8b,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bsol 2, target
+# CHECK: bunl 0, target                  # encoding: [0x41,0x83,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bsol target
+# CHECK: bunla 2, target                 # encoding: [0x41,0x8b,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsola 2, target
+# CHECK: bunla 0, target                 # encoding: [0x41,0x83,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsola target
+# CHECK: bunlrl 2                        # encoding: [0x4d,0x8b,0x00,0x21]
+         bsolrl 2
+# CHECK: bunlrl 0                        # encoding: [0x4d,0x83,0x00,0x21]
+         bsolrl
 # CHECK: bunctrl 2                       # encoding: [0x4d,0x8b,0x04,0x21]
          bsoctrl 2
+# CHECK: bunctrl 0                       # encoding: [0x4d,0x83,0x04,0x21]
+         bsoctrl
+
+# CHECK: bun+ 2, target                  # encoding: [0x41,0xeb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bso+ 2, target
+# CHECK: bun+ 0, target                  # encoding: [0x41,0xe3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bso+ target
+# CHECK: buna+ 2, target                 # encoding: [0x41,0xeb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsoa+ 2, target
+# CHECK: buna+ 0, target                 # encoding: [0x41,0xe3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsoa+ target
+# CHECK: bunlr+ 2                        # encoding: [0x4d,0xeb,0x00,0x20]
+         bsolr+ 2
+# CHECK: bunlr+ 0                        # encoding: [0x4d,0xe3,0x00,0x20]
+         bsolr+
+# CHECK: bunctr+ 2                       # encoding: [0x4d,0xeb,0x04,0x20]
+         bsoctr+ 2
+# CHECK: bunctr+ 0                       # encoding: [0x4d,0xe3,0x04,0x20]
+         bsoctr+
+# CHECK: bunl+ 2, target                 # encoding: [0x41,0xeb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bsol+ 2, target
+# CHECK: bunl+ 0, target                 # encoding: [0x41,0xe3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bsol+ target
+# CHECK: bunla+ 2, target                # encoding: [0x41,0xeb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsola+ 2, target
+# CHECK: bunla+ 0, target                # encoding: [0x41,0xe3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsola+ target
+# CHECK: bunlrl+ 2                       # encoding: [0x4d,0xeb,0x00,0x21]
+         bsolrl+ 2
+# CHECK: bunlrl+ 0                       # encoding: [0x4d,0xe3,0x00,0x21]
+         bsolrl+
+# CHECK: bunctrl+ 2                      # encoding: [0x4d,0xeb,0x04,0x21]
+         bsoctrl+ 2
+# CHECK: bunctrl+ 0                      # encoding: [0x4d,0xe3,0x04,0x21]
+         bsoctrl+
+
+# CHECK: bun- 2, target                  # encoding: [0x41,0xcb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bso- 2, target
+# CHECK: bun- 0, target                  # encoding: [0x41,0xc3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bso- target
+# CHECK: buna- 2, target                 # encoding: [0x41,0xcb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsoa- 2, target
+# CHECK: buna- 0, target                 # encoding: [0x41,0xc3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsoa- target
+# CHECK: bunlr- 2                        # encoding: [0x4d,0xcb,0x00,0x20]
+         bsolr- 2
+# CHECK: bunlr- 0                        # encoding: [0x4d,0xc3,0x00,0x20]
+         bsolr-
+# CHECK: bunctr- 2                       # encoding: [0x4d,0xcb,0x04,0x20]
+         bsoctr- 2
+# CHECK: bunctr- 0                       # encoding: [0x4d,0xc3,0x04,0x20]
+         bsoctr-
+# CHECK: bunl- 2, target                 # encoding: [0x41,0xcb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bsol- 2, target
+# CHECK: bunl- 0, target                 # encoding: [0x41,0xc3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bsol- target
+# CHECK: bunla- 2, target                # encoding: [0x41,0xcb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsola- 2, target
+# CHECK: bunla- 0, target                # encoding: [0x41,0xc3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bsola- target
+# CHECK: bunlrl- 2                       # encoding: [0x4d,0xcb,0x00,0x21]
+         bsolrl- 2
+# CHECK: bunlrl- 0                       # encoding: [0x4d,0xc3,0x00,0x21]
+         bsolrl-
+# CHECK: bunctrl- 2                      # encoding: [0x4d,0xcb,0x04,0x21]
+         bsoctrl- 2
+# CHECK: bunctrl- 0                      # encoding: [0x4d,0xc3,0x04,0x21]
+         bsoctrl-
 
 # CHECK: bnu 2, target                   # encoding: [0x40,0x8b,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bns 2, target
-# FIXME: bnsa 2, target
+# CHECK: bnu 0, target                   # encoding: [0x40,0x83,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bns target
+# CHECK: bnua 2, target                  # encoding: [0x40,0x8b,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsa 2, target
+# CHECK: bnua 0, target                  # encoding: [0x40,0x83,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsa target
 # CHECK: bnulr 2                         # encoding: [0x4c,0x8b,0x00,0x20]
          bnslr 2
+# CHECK: bnulr 0                         # encoding: [0x4c,0x83,0x00,0x20]
+         bnslr
 # CHECK: bnuctr 2                        # encoding: [0x4c,0x8b,0x04,0x20]
          bnsctr 2
-# FIXME: bnsl 2, target
-# FIXME: bnsla 2, target
-# FIXME: bnslrl 2
+# CHECK: bnuctr 0                        # encoding: [0x4c,0x83,0x04,0x20]
+         bnsctr
+# CHECK: bnul 2, target                  # encoding: [0x40,0x8b,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnsl 2, target
+# CHECK: bnul 0, target                  # encoding: [0x40,0x83,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnsl target
+# CHECK: bnula 2, target                 # encoding: [0x40,0x8b,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsla 2, target
+# CHECK: bnula 0, target                 # encoding: [0x40,0x83,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsla target
+# CHECK: bnulrl 2                        # encoding: [0x4c,0x8b,0x00,0x21]
+         bnslrl 2
+# CHECK: bnulrl 0                        # encoding: [0x4c,0x83,0x00,0x21]
+         bnslrl
 # CHECK: bnuctrl 2                       # encoding: [0x4c,0x8b,0x04,0x21]
          bnsctrl 2
+# CHECK: bnuctrl 0                       # encoding: [0x4c,0x83,0x04,0x21]
+         bnsctrl
+
+# CHECK: bnu+ 2, target                  # encoding: [0x40,0xeb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bns+ 2, target
+# CHECK: bnu+ 0, target                  # encoding: [0x40,0xe3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bns+ target
+# CHECK: bnua+ 2, target                 # encoding: [0x40,0xeb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsa+ 2, target
+# CHECK: bnua+ 0, target                 # encoding: [0x40,0xe3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsa+ target
+# CHECK: bnulr+ 2                        # encoding: [0x4c,0xeb,0x00,0x20]
+         bnslr+ 2
+# CHECK: bnulr+ 0                        # encoding: [0x4c,0xe3,0x00,0x20]
+         bnslr+
+# CHECK: bnuctr+ 2                       # encoding: [0x4c,0xeb,0x04,0x20]
+         bnsctr+ 2
+# CHECK: bnuctr+ 0                       # encoding: [0x4c,0xe3,0x04,0x20]
+         bnsctr+
+# CHECK: bnul+ 2, target                 # encoding: [0x40,0xeb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnsl+ 2, target
+# CHECK: bnul+ 0, target                 # encoding: [0x40,0xe3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnsl+ target
+# CHECK: bnula+ 2, target                # encoding: [0x40,0xeb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsla+ 2, target
+# CHECK: bnula+ 0, target                # encoding: [0x40,0xe3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsla+ target
+# CHECK: bnulrl+ 2                       # encoding: [0x4c,0xeb,0x00,0x21]
+         bnslrl+ 2
+# CHECK: bnulrl+ 0                       # encoding: [0x4c,0xe3,0x00,0x21]
+         bnslrl+
+# CHECK: bnuctrl+ 2                      # encoding: [0x4c,0xeb,0x04,0x21]
+         bnsctrl+ 2
+# CHECK: bnuctrl+ 0                      # encoding: [0x4c,0xe3,0x04,0x21]
+         bnsctrl+
+
+# CHECK: bnu- 2, target                  # encoding: [0x40,0xcb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bns- 2, target
+# CHECK: bnu- 0, target                  # encoding: [0x40,0xc3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bns- target
+# CHECK: bnua- 2, target                 # encoding: [0x40,0xcb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsa- 2, target
+# CHECK: bnua- 0, target                 # encoding: [0x40,0xc3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsa- target
+# CHECK: bnulr- 2                        # encoding: [0x4c,0xcb,0x00,0x20]
+         bnslr- 2
+# CHECK: bnulr- 0                        # encoding: [0x4c,0xc3,0x00,0x20]
+         bnslr-
+# CHECK: bnuctr- 2                       # encoding: [0x4c,0xcb,0x04,0x20]
+         bnsctr- 2
+# CHECK: bnuctr- 0                       # encoding: [0x4c,0xc3,0x04,0x20]
+         bnsctr-
+# CHECK: bnul- 2, target                 # encoding: [0x40,0xcb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnsl- 2, target
+# CHECK: bnul- 0, target                 # encoding: [0x40,0xc3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnsl- target
+# CHECK: bnula- 2, target                # encoding: [0x40,0xcb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsla- 2, target
+# CHECK: bnula- 0, target                # encoding: [0x40,0xc3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnsla- target
+# CHECK: bnulrl- 2                       # encoding: [0x4c,0xcb,0x00,0x21]
+         bnslrl- 2
+# CHECK: bnulrl- 0                       # encoding: [0x4c,0xc3,0x00,0x21]
+         bnslrl-
+# CHECK: bnuctrl- 2                      # encoding: [0x4c,0xcb,0x04,0x21]
+         bnsctrl- 2
+# CHECK: bnuctrl- 0                      # encoding: [0x4c,0xc3,0x04,0x21]
+         bnsctrl-
 
 # CHECK: bun 2, target                   # encoding: [0x41,0x8b,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bun 2, target
-# FIXME: buna 2, target
+# CHECK: bun 0, target                   # encoding: [0x41,0x83,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bun target
+# CHECK: buna 2, target                  # encoding: [0x41,0x8b,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         buna 2, target
+# CHECK: buna 0, target                  # encoding: [0x41,0x83,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         buna target
 # CHECK: bunlr 2                         # encoding: [0x4d,0x8b,0x00,0x20]
          bunlr 2
+# CHECK: bunlr 0                         # encoding: [0x4d,0x83,0x00,0x20]
+         bunlr
 # CHECK: bunctr 2                        # encoding: [0x4d,0x8b,0x04,0x20]
          bunctr 2
-# FIXME: bunl 2, target
-# FIXME: bunla 2, target
-# FIXME: bunlrl 2
+# CHECK: bunctr 0                        # encoding: [0x4d,0x83,0x04,0x20]
+         bunctr
+# CHECK: bunl 2, target                  # encoding: [0x41,0x8b,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bunl 2, target
+# CHECK: bunl 0, target                  # encoding: [0x41,0x83,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bunl target
+# CHECK: bunla 2, target                 # encoding: [0x41,0x8b,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bunla 2, target
+# CHECK: bunla 0, target                 # encoding: [0x41,0x83,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bunla target
+# CHECK: bunlrl 2                        # encoding: [0x4d,0x8b,0x00,0x21]
+         bunlrl 2
+# CHECK: bunlrl 0                        # encoding: [0x4d,0x83,0x00,0x21]
+         bunlrl
 # CHECK: bunctrl 2                       # encoding: [0x4d,0x8b,0x04,0x21]
          bunctrl 2
+# CHECK: bunctrl 0                       # encoding: [0x4d,0x83,0x04,0x21]
+         bunctrl
+
+# CHECK: bun+ 2, target                  # encoding: [0x41,0xeb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bun+ 2, target
+# CHECK: bun+ 0, target                  # encoding: [0x41,0xe3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bun+ target
+# CHECK: buna+ 2, target                 # encoding: [0x41,0xeb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         buna+ 2, target
+# CHECK: buna+ 0, target                 # encoding: [0x41,0xe3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         buna+ target
+# CHECK: bunlr+ 2                        # encoding: [0x4d,0xeb,0x00,0x20]
+         bunlr+ 2
+# CHECK: bunlr+ 0                        # encoding: [0x4d,0xe3,0x00,0x20]
+         bunlr+
+# CHECK: bunctr+ 2                       # encoding: [0x4d,0xeb,0x04,0x20]
+         bunctr+ 2
+# CHECK: bunctr+ 0                       # encoding: [0x4d,0xe3,0x04,0x20]
+         bunctr+
+# CHECK: bunl+ 2, target                 # encoding: [0x41,0xeb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bunl+ 2, target
+# CHECK: bunl+ 0, target                 # encoding: [0x41,0xe3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bunl+ target
+# CHECK: bunla+ 2, target                # encoding: [0x41,0xeb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bunla+ 2, target
+# CHECK: bunla+ 0, target                # encoding: [0x41,0xe3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bunla+ target
+# CHECK: bunlrl+ 2                       # encoding: [0x4d,0xeb,0x00,0x21]
+         bunlrl+ 2
+# CHECK: bunlrl+ 0                       # encoding: [0x4d,0xe3,0x00,0x21]
+         bunlrl+
+# CHECK: bunctrl+ 2                      # encoding: [0x4d,0xeb,0x04,0x21]
+         bunctrl+ 2
+# CHECK: bunctrl+ 0                      # encoding: [0x4d,0xe3,0x04,0x21]
+         bunctrl+
+
+# CHECK: bun- 2, target                  # encoding: [0x41,0xcb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bun- 2, target
+# CHECK: bun- 0, target                  # encoding: [0x41,0xc3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bun- target
+# CHECK: buna- 2, target                 # encoding: [0x41,0xcb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         buna- 2, target
+# CHECK: buna- 0, target                 # encoding: [0x41,0xc3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         buna- target
+# CHECK: bunlr- 2                        # encoding: [0x4d,0xcb,0x00,0x20]
+         bunlr- 2
+# CHECK: bunlr- 0                        # encoding: [0x4d,0xc3,0x00,0x20]
+         bunlr-
+# CHECK: bunctr- 2                       # encoding: [0x4d,0xcb,0x04,0x20]
+         bunctr- 2
+# CHECK: bunctr- 0                       # encoding: [0x4d,0xc3,0x04,0x20]
+         bunctr-
+# CHECK: bunl- 2, target                 # encoding: [0x41,0xcb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bunl- 2, target
+# CHECK: bunl- 0, target                 # encoding: [0x41,0xc3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bunl- target
+# CHECK: bunla- 2, target                # encoding: [0x41,0xcb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bunla- 2, target
+# CHECK: bunla- 0, target                # encoding: [0x41,0xc3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bunla- target
+# CHECK: bunlrl- 2                       # encoding: [0x4d,0xcb,0x00,0x21]
+         bunlrl- 2
+# CHECK: bunlrl- 0                       # encoding: [0x4d,0xc3,0x00,0x21]
+         bunlrl-
+# CHECK: bunctrl- 2                      # encoding: [0x4d,0xcb,0x04,0x21]
+         bunctrl- 2
+# CHECK: bunctrl- 0                      # encoding: [0x4d,0xc3,0x04,0x21]
+         bunctrl-
 
 # CHECK: bnu 2, target                   # encoding: [0x40,0x8b,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
          bnu 2, target
-# FIXME: bnua 2, target
+# CHECK: bnu 0, target                   # encoding: [0x40,0x83,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnu target
+# CHECK: bnua 2, target                  # encoding: [0x40,0x8b,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnua 2, target
+# CHECK: bnua 0, target                  # encoding: [0x40,0x83,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnua target
 # CHECK: bnulr 2                         # encoding: [0x4c,0x8b,0x00,0x20]
          bnulr 2
+# CHECK: bnulr 0                         # encoding: [0x4c,0x83,0x00,0x20]
+         bnulr
 # CHECK: bnuctr 2                        # encoding: [0x4c,0x8b,0x04,0x20]
          bnuctr 2
-# FIXME: bnul 2, target
-# FIXME: bnula 2, target
-# FIXME: bnulrl 2
+# CHECK: bnuctr 0                        # encoding: [0x4c,0x83,0x04,0x20]
+         bnuctr
+# CHECK: bnul 2, target                  # encoding: [0x40,0x8b,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnul 2, target
+# CHECK: bnul 0, target                  # encoding: [0x40,0x83,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnul target
+# CHECK: bnula 2, target                 # encoding: [0x40,0x8b,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnula 2, target
+# CHECK: bnula 0, target                 # encoding: [0x40,0x83,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnula target
+# CHECK: bnulrl 2                        # encoding: [0x4c,0x8b,0x00,0x21]
+         bnulrl 2
+# CHECK: bnulrl 0                        # encoding: [0x4c,0x83,0x00,0x21]
+         bnulrl
 # CHECK: bnuctrl 2                       # encoding: [0x4c,0x8b,0x04,0x21]
          bnuctrl 2
+# CHECK: bnuctrl 0                       # encoding: [0x4c,0x83,0x04,0x21]
+         bnuctrl
 
-# FIXME: Condition register logical mnemonics
+# CHECK: bnu+ 2, target                  # encoding: [0x40,0xeb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnu+ 2, target
+# CHECK: bnu+ 0, target                  # encoding: [0x40,0xe3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnu+ target
+# CHECK: bnua+ 2, target                 # encoding: [0x40,0xeb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnua+ 2, target
+# CHECK: bnua+ 0, target                 # encoding: [0x40,0xe3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnua+ target
+# CHECK: bnulr+ 2                        # encoding: [0x4c,0xeb,0x00,0x20]
+         bnulr+ 2
+# CHECK: bnulr+ 0                        # encoding: [0x4c,0xe3,0x00,0x20]
+         bnulr+
+# CHECK: bnuctr+ 2                       # encoding: [0x4c,0xeb,0x04,0x20]
+         bnuctr+ 2
+# CHECK: bnuctr+ 0                       # encoding: [0x4c,0xe3,0x04,0x20]
+         bnuctr+
+# CHECK: bnul+ 2, target                 # encoding: [0x40,0xeb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnul+ 2, target
+# CHECK: bnul+ 0, target                 # encoding: [0x40,0xe3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnul+ target
+# CHECK: bnula+ 2, target                # encoding: [0x40,0xeb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnula+ 2, target
+# CHECK: bnula+ 0, target                # encoding: [0x40,0xe3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnula+ target
+# CHECK: bnulrl+ 2                       # encoding: [0x4c,0xeb,0x00,0x21]
+         bnulrl+ 2
+# CHECK: bnulrl+ 0                       # encoding: [0x4c,0xe3,0x00,0x21]
+         bnulrl+
+# CHECK: bnuctrl+ 2                      # encoding: [0x4c,0xeb,0x04,0x21]
+         bnuctrl+ 2
+# CHECK: bnuctrl+ 0                      # encoding: [0x4c,0xe3,0x04,0x21]
+         bnuctrl+
+
+# CHECK: bnu- 2, target                  # encoding: [0x40,0xcb,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnu- 2, target
+# CHECK: bnu- 0, target                  # encoding: [0x40,0xc3,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnu- target
+# CHECK: bnua- 2, target                 # encoding: [0x40,0xcb,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnua- 2, target
+# CHECK: bnua- 0, target                 # encoding: [0x40,0xc3,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnua- target
+# CHECK: bnulr- 2                        # encoding: [0x4c,0xcb,0x00,0x20]
+         bnulr- 2
+# CHECK: bnulr- 0                        # encoding: [0x4c,0xc3,0x00,0x20]
+         bnulr-
+# CHECK: bnuctr- 2                       # encoding: [0x4c,0xcb,0x04,0x20]
+         bnuctr- 2
+# CHECK: bnuctr- 0                       # encoding: [0x4c,0xc3,0x04,0x20]
+         bnuctr-
+# CHECK: bnul- 2, target                 # encoding: [0x40,0xcb,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnul- 2, target
+# CHECK: bnul- 0, target                 # encoding: [0x40,0xc3,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnul- target
+# CHECK: bnula- 2, target                # encoding: [0x40,0xcb,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnula- 2, target
+# CHECK: bnula- 0, target                # encoding: [0x40,0xc3,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bnula- target
+# CHECK: bnulrl- 2                       # encoding: [0x4c,0xcb,0x00,0x21]
+         bnulrl- 2
+# CHECK: bnulrl- 0                       # encoding: [0x4c,0xc3,0x00,0x21]
+         bnulrl-
+# CHECK: bnuctrl- 2                      # encoding: [0x4c,0xcb,0x04,0x21]
+         bnuctrl- 2
+# CHECK: bnuctrl- 0                      # encoding: [0x4c,0xc3,0x04,0x21]
+         bnuctrl-
+
+# Condition register logical mnemonics
 
-# FIXME: Subtract mnemonics
+# CHECK: creqv 2, 2, 2                   # encoding: [0x4c,0x42,0x12,0x42]
+         crset 2
+# CHECK: crxor 2, 2, 2                   # encoding: [0x4c,0x42,0x11,0x82]
+         crclr 2
+# CHECK: cror 2, 3, 3                    # encoding: [0x4c,0x43,0x1b,0x82]
+         crmove 2, 3
+# CHECK: crnor 2, 3, 3                   # encoding: [0x4c,0x43,0x18,0x42]
+         crnot 2, 3
+
+# Subtract mnemonics
+
+# CHECK: addi 2, 3, -128                 # encoding: [0x38,0x43,0xff,0x80]
+         subi 2, 3, 128
+# CHECK: addis 2, 3, -128                # encoding: [0x3c,0x43,0xff,0x80]
+         subis 2, 3, 128
+# CHECK: addic 2, 3, -128                # encoding: [0x30,0x43,0xff,0x80]
+         subic 2, 3, 128
+# CHECK: addic. 2, 3, -128               # encoding: [0x34,0x43,0xff,0x80]
+         subic. 2, 3, 128
+
+# CHECK: subf 2, 4, 3                    # encoding: [0x7c,0x44,0x18,0x50]
+         sub 2, 3, 4
+# CHECK: subf. 2, 4, 3                   # encoding: [0x7c,0x44,0x18,0x51]
+         sub. 2, 3, 4
+# CHECK: subfc 2, 4, 3                   # encoding: [0x7c,0x44,0x18,0x10]
+         subc 2, 3, 4
+# CHECK: subfc. 2, 4, 3                  # encoding: [0x7c,0x44,0x18,0x11]
+         subc. 2, 3, 4
 
 # Compare mnemonics
 
 # CHECK: cmpdi 2, 3, 128                 # encoding: [0x2d,0x23,0x00,0x80]
          cmpdi 2, 3, 128
+# CHECK: cmpdi 0, 3, 128                 # encoding: [0x2c,0x23,0x00,0x80]
+         cmpdi 3, 128
 # CHECK: cmpd 2, 3, 4                    # encoding: [0x7d,0x23,0x20,0x00]
          cmpd 2, 3, 4
+# CHECK: cmpd 0, 3, 4                    # encoding: [0x7c,0x23,0x20,0x00]
+         cmpd 3, 4
 # CHECK: cmpldi 2, 3, 128                # encoding: [0x29,0x23,0x00,0x80]
          cmpldi 2, 3, 128
+# CHECK: cmpldi 0, 3, 128                # encoding: [0x28,0x23,0x00,0x80]
+         cmpldi 3, 128
 # CHECK: cmpld 2, 3, 4                   # encoding: [0x7d,0x23,0x20,0x40]
          cmpld 2, 3, 4
+# CHECK: cmpld 0, 3, 4                   # encoding: [0x7c,0x23,0x20,0x40]
+         cmpld 3, 4
 
 # CHECK: cmpwi 2, 3, 128                 # encoding: [0x2d,0x03,0x00,0x80]
          cmpwi 2, 3, 128
+# CHECK: cmpwi 0, 3, 128                 # encoding: [0x2c,0x03,0x00,0x80]
+         cmpwi 3, 128
 # CHECK: cmpw 2, 3, 4                    # encoding: [0x7d,0x03,0x20,0x00]
          cmpw 2, 3, 4
+# CHECK: cmpw 0, 3, 4                    # encoding: [0x7c,0x03,0x20,0x00]
+         cmpw 3, 4
 # CHECK: cmplwi 2, 3, 128                # encoding: [0x29,0x03,0x00,0x80]
          cmplwi 2, 3, 128
+# CHECK: cmplwi 0, 3, 128                # encoding: [0x28,0x03,0x00,0x80]
+         cmplwi 3, 128
 # CHECK: cmplw 2, 3, 4                   # encoding: [0x7d,0x03,0x20,0x40]
          cmplw 2, 3, 4
+# CHECK: cmplw 0, 3, 4                   # encoding: [0x7c,0x03,0x20,0x40]
+         cmplw 3, 4
+
+# Trap mnemonics
+
+# CHECK: twi 16, 3, 4                    # encoding: [0x0e,0x03,0x00,0x04]
+         twlti 3, 4
+# CHECK: tw 16, 3, 4                     # encoding: [0x7e,0x03,0x20,0x08]
+         twlt 3, 4
+# CHECK: tdi 16, 3, 4                    # encoding: [0x0a,0x03,0x00,0x04]
+         tdlti 3, 4
+# CHECK: td 16, 3, 4                     # encoding: [0x7e,0x03,0x20,0x88]
+         tdlt 3, 4
+
+# CHECK: twi 20, 3, 4                    # encoding: [0x0e,0x83,0x00,0x04]
+         twlei 3, 4
+# CHECK: tw 20, 3, 4                     # encoding: [0x7e,0x83,0x20,0x08]
+         twle 3, 4
+# CHECK: tdi 20, 3, 4                    # encoding: [0x0a,0x83,0x00,0x04]
+         tdlei 3, 4
+# CHECK: td 20, 3, 4                     # encoding: [0x7e,0x83,0x20,0x88]
+         tdle 3, 4
+
+# CHECK: twi 4, 3, 4                     # encoding: [0x0c,0x83,0x00,0x04]
+         tweqi 3, 4
+# CHECK: tw 4, 3, 4                      # encoding: [0x7c,0x83,0x20,0x08]
+         tweq 3, 4
+# CHECK: tdi 4, 3, 4                     # encoding: [0x08,0x83,0x00,0x04]
+         tdeqi 3, 4
+# CHECK: td 4, 3, 4                      # encoding: [0x7c,0x83,0x20,0x88]
+         tdeq 3, 4
 
-# FIXME: Trap mnemonics
+# CHECK: twi 12, 3, 4                    # encoding: [0x0d,0x83,0x00,0x04]
+         twgei 3, 4
+# CHECK: tw 12, 3, 4                     # encoding: [0x7d,0x83,0x20,0x08]
+         twge 3, 4
+# CHECK: tdi 12, 3, 4                    # encoding: [0x09,0x83,0x00,0x04]
+         tdgei 3, 4
+# CHECK: td 12, 3, 4                     # encoding: [0x7d,0x83,0x20,0x88]
+         tdge 3, 4
+
+# CHECK: twi 8, 3, 4                     # encoding: [0x0d,0x03,0x00,0x04]
+         twgti 3, 4
+# CHECK: tw 8, 3, 4                      # encoding: [0x7d,0x03,0x20,0x08]
+         twgt 3, 4
+# CHECK: tdi 8, 3, 4                     # encoding: [0x09,0x03,0x00,0x04]
+         tdgti 3, 4
+# CHECK: td 8, 3, 4                      # encoding: [0x7d,0x03,0x20,0x88]
+         tdgt 3, 4
+
+# CHECK: twi 12, 3, 4                    # encoding: [0x0d,0x83,0x00,0x04]
+         twnli 3, 4
+# CHECK: tw 12, 3, 4                     # encoding: [0x7d,0x83,0x20,0x08]
+         twnl 3, 4
+# CHECK: tdi 12, 3, 4                    # encoding: [0x09,0x83,0x00,0x04]
+         tdnli 3, 4
+# CHECK: td 12, 3, 4                     # encoding: [0x7d,0x83,0x20,0x88]
+         tdnl 3, 4
+
+# CHECK: twi 24, 3, 4                    # encoding: [0x0f,0x03,0x00,0x04]
+         twnei 3, 4
+# CHECK: tw 24, 3, 4                     # encoding: [0x7f,0x03,0x20,0x08]
+         twne 3, 4
+# CHECK: tdi 24, 3, 4                    # encoding: [0x0b,0x03,0x00,0x04]
+         tdnei 3, 4
+# CHECK: td 24, 3, 4                     # encoding: [0x7f,0x03,0x20,0x88]
+         tdne 3, 4
+
+# CHECK: twi 20, 3, 4                    # encoding: [0x0e,0x83,0x00,0x04]
+         twngi 3, 4
+# CHECK: tw 20, 3, 4                     # encoding: [0x7e,0x83,0x20,0x08]
+         twng 3, 4
+# CHECK: tdi 20, 3, 4                    # encoding: [0x0a,0x83,0x00,0x04]
+         tdngi 3, 4
+# CHECK: td 20, 3, 4                     # encoding: [0x7e,0x83,0x20,0x88]
+         tdng 3, 4
+
+# CHECK: twi 2, 3, 4                     # encoding: [0x0c,0x43,0x00,0x04]
+         twllti 3, 4
+# CHECK: tw 2, 3, 4                      # encoding: [0x7c,0x43,0x20,0x08]
+         twllt 3, 4
+# CHECK: tdi 2, 3, 4                     # encoding: [0x08,0x43,0x00,0x04]
+         tdllti 3, 4
+# CHECK: td 2, 3, 4                      # encoding: [0x7c,0x43,0x20,0x88]
+         tdllt 3, 4
+
+# CHECK: twi 6, 3, 4                     # encoding: [0x0c,0xc3,0x00,0x04]
+         twllei 3, 4
+# CHECK: tw 6, 3, 4                      # encoding: [0x7c,0xc3,0x20,0x08]
+         twlle 3, 4
+# CHECK: tdi 6, 3, 4                     # encoding: [0x08,0xc3,0x00,0x04]
+         tdllei 3, 4
+# CHECK: td 6, 3, 4                      # encoding: [0x7c,0xc3,0x20,0x88]
+         tdlle 3, 4
+
+# CHECK: twi 5, 3, 4                     # encoding: [0x0c,0xa3,0x00,0x04]
+         twlgei 3, 4
+# CHECK: tw 5, 3, 4                      # encoding: [0x7c,0xa3,0x20,0x08]
+         twlge 3, 4
+# CHECK: tdi 5, 3, 4                     # encoding: [0x08,0xa3,0x00,0x04]
+         tdlgei 3, 4
+# CHECK: td 5, 3, 4                      # encoding: [0x7c,0xa3,0x20,0x88]
+         tdlge 3, 4
+
+# CHECK: twi 1, 3, 4                     # encoding: [0x0c,0x23,0x00,0x04]
+         twlgti 3, 4
+# CHECK: tw 1, 3, 4                      # encoding: [0x7c,0x23,0x20,0x08]
+         twlgt 3, 4
+# CHECK: tdi 1, 3, 4                     # encoding: [0x08,0x23,0x00,0x04]
+         tdlgti 3, 4
+# CHECK: td 1, 3, 4                      # encoding: [0x7c,0x23,0x20,0x88]
+         tdlgt 3, 4
+
+# CHECK: twi 5, 3, 4                     # encoding: [0x0c,0xa3,0x00,0x04]
+         twlnli 3, 4
+# CHECK: tw 5, 3, 4                      # encoding: [0x7c,0xa3,0x20,0x08]
+         twlnl 3, 4
+# CHECK: tdi 5, 3, 4                     # encoding: [0x08,0xa3,0x00,0x04]
+         tdlnli 3, 4
+# CHECK: td 5, 3, 4                      # encoding: [0x7c,0xa3,0x20,0x88]
+         tdlnl 3, 4
+
+# CHECK: twi 6, 3, 4                     # encoding: [0x0c,0xc3,0x00,0x04]
+         twlngi 3, 4
+# CHECK: tw 6, 3, 4                      # encoding: [0x7c,0xc3,0x20,0x08]
+         twlng 3, 4
+# CHECK: tdi 6, 3, 4                     # encoding: [0x08,0xc3,0x00,0x04]
+         tdlngi 3, 4
+# CHECK: td 6, 3, 4                      # encoding: [0x7c,0xc3,0x20,0x88]
+         tdlng 3, 4
+
+# CHECK: twi 31, 3, 4                    # encoding: [0x0f,0xe3,0x00,0x04]
+         twui 3, 4
+# CHECK: tw 31, 3, 4                     # encoding: [0x7f,0xe3,0x20,0x08]
+         twu 3, 4
+# CHECK: tdi 31, 3, 4                    # encoding: [0x0b,0xe3,0x00,0x04]
+         tdui 3, 4
+# CHECK: td 31, 3, 4                     # encoding: [0x7f,0xe3,0x20,0x88]
+         tdu 3, 4
+
+# CHECK: trap                            # encoding: [0x7f,0xe0,0x00,0x08]
+         trap
 
 # Rotate and shift mnemonics
 
-# FIXME: extldi 2, 3, 4, 5
-# FIXME: extrdi 2, 3, 4, 5
-# FIXME: insrdi 2, 3, 4, 5
-# FIXME: rotldi 2, 3, 4
-# FIXME: rotrdi 2, 3, 4
-# FIXME: rotld 2, 3, 4
+# CHECK: rldicr 2, 3, 5, 3               # encoding: [0x78,0x62,0x28,0xc4]
+         extldi 2, 3, 4, 5
+# CHECK: rldicr. 2, 3, 5, 3              # encoding: [0x78,0x62,0x28,0xc5]
+         extldi. 2, 3, 4, 5
+# CHECK: rldicl 2, 3, 9, 60              # encoding: [0x78,0x62,0x4f,0x20]
+         extrdi 2, 3, 4, 5
+# CHECK: rldicl. 2, 3, 9, 60             # encoding: [0x78,0x62,0x4f,0x21]
+         extrdi. 2, 3, 4, 5
+# CHECK: rldimi 2, 3, 55, 5              # encoding: [0x78,0x62,0xb9,0x4e]
+         insrdi 2, 3, 4, 5
+# CHECK: rldimi. 2, 3, 55, 5             # encoding: [0x78,0x62,0xb9,0x4f]
+         insrdi. 2, 3, 4, 5
+# CHECK: rldicl 2, 3, 4, 0               # encoding: [0x78,0x62,0x20,0x00]
+         rotldi 2, 3, 4
+# CHECK: rldicl. 2, 3, 4, 0              # encoding: [0x78,0x62,0x20,0x01]
+         rotldi. 2, 3, 4
+# CHECK: rldicl 2, 3, 60, 0              # encoding: [0x78,0x62,0xe0,0x02]
+         rotrdi 2, 3, 4
+# CHECK: rldicl. 2, 3, 60, 0             # encoding: [0x78,0x62,0xe0,0x03]
+         rotrdi. 2, 3, 4
+# CHECK: rldcl 2, 3, 4, 0                # encoding: [0x78,0x62,0x20,0x10]
+         rotld 2, 3, 4
+# CHECK: rldcl. 2, 3, 4, 0               # encoding: [0x78,0x62,0x20,0x11]
+         rotld. 2, 3, 4
 # CHECK: sldi 2, 3, 4                    # encoding: [0x78,0x62,0x26,0xe4]
          sldi 2, 3, 4
+# CHECK: rldicr. 2, 3, 4, 59             # encoding: [0x78,0x62,0x26,0xe5]
+         sldi. 2, 3, 4
 # CHECK: rldicl 2, 3, 60, 4              # encoding: [0x78,0x62,0xe1,0x02]
          srdi 2, 3, 4
-# FIXME: clrldi 2, 3, 4
-# FIXME: clrrdi 2, 3, 4
-# FIXME: clrlsldi 2, 3, 4, 5
-
-# FIXME: extlwi 2, 3, 4, 5
-# FIXME: extrwi 2, 3, 4, 5
-# FIXME: inslwi 2, 3, 4, 5
-# FIXME: insrwi 2, 3, 4, 5
-# FIXME: rotlwi 2, 3, 4
-# FIXME: rotrwi 2, 3, 4
-# FIXME: rotlw 2, 3, 4
+# CHECK: rldicl. 2, 3, 60, 4             # encoding: [0x78,0x62,0xe1,0x03]
+         srdi. 2, 3, 4
+# CHECK: rldicl 2, 3, 0, 4               # encoding: [0x78,0x62,0x01,0x00]
+         clrldi 2, 3, 4
+# CHECK: rldicl. 2, 3, 0, 4              # encoding: [0x78,0x62,0x01,0x01]
+         clrldi. 2, 3, 4
+# CHECK: rldicr 2, 3, 0, 59              # encoding: [0x78,0x62,0x06,0xe4]
+         clrrdi 2, 3, 4
+# CHECK: rldicr. 2, 3, 0, 59             # encoding: [0x78,0x62,0x06,0xe5]
+         clrrdi. 2, 3, 4
+# CHECK: rldic 2, 3, 4, 1                # encoding: [0x78,0x62,0x20,0x48]
+         clrlsldi 2, 3, 5, 4
+# CHECK: rldic. 2, 3, 4, 1               # encoding: [0x78,0x62,0x20,0x49]
+         clrlsldi. 2, 3, 5, 4
+
+# CHECK: rlwinm 2, 3, 5, 0, 3            # encoding: [0x54,0x62,0x28,0x06]
+         extlwi 2, 3, 4, 5
+# CHECK: rlwinm. 2, 3, 5, 0, 3           # encoding: [0x54,0x62,0x28,0x07]
+         extlwi. 2, 3, 4, 5
+# CHECK: rlwinm 2, 3, 9, 28, 31          # encoding: [0x54,0x62,0x4f,0x3e]
+         extrwi 2, 3, 4, 5
+# CHECK: rlwinm. 2, 3, 9, 28, 31         # encoding: [0x54,0x62,0x4f,0x3f]
+         extrwi. 2, 3, 4, 5
+# CHECK: rlwimi 2, 3, 27, 5, 8           # encoding: [0x50,0x62,0xd9,0x50]
+         inslwi 2, 3, 4, 5
+# CHECK: rlwimi. 2, 3, 27, 5, 8          # encoding: [0x50,0x62,0xd9,0x51]
+         inslwi. 2, 3, 4, 5
+# CHECK: rlwimi 2, 3, 23, 5, 8           # encoding: [0x50,0x62,0xb9,0x50]
+         insrwi 2, 3, 4, 5
+# CHECK: rlwimi. 2, 3, 23, 5, 8          # encoding: [0x50,0x62,0xb9,0x51]
+         insrwi. 2, 3, 4, 5
+# CHECK: rlwinm 2, 3, 4, 0, 31           # encoding: [0x54,0x62,0x20,0x3e]
+         rotlwi 2, 3, 4
+# CHECK: rlwinm. 2, 3, 4, 0, 31          # encoding: [0x54,0x62,0x20,0x3f]
+         rotlwi. 2, 3, 4
+# CHECK: rlwinm 2, 3, 28, 0, 31          # encoding: [0x54,0x62,0xe0,0x3e]
+         rotrwi 2, 3, 4
+# CHECK: rlwinm. 2, 3, 28, 0, 31         # encoding: [0x54,0x62,0xe0,0x3f]
+         rotrwi. 2, 3, 4
+# CHECK: rlwnm 2, 3, 4, 0, 31            # encoding: [0x5c,0x62,0x20,0x3e]
+         rotlw 2, 3, 4
+# CHECK: rlwnm. 2, 3, 4, 0, 31           # encoding: [0x5c,0x62,0x20,0x3f]
+         rotlw. 2, 3, 4
 # CHECK: slwi 2, 3, 4                    # encoding: [0x54,0x62,0x20,0x36]
          slwi 2, 3, 4
+# CHECK: rlwinm. 2, 3, 4, 0, 27          # encoding: [0x54,0x62,0x20,0x37]
+         slwi. 2, 3, 4
 # CHECK: srwi 2, 3, 4                    # encoding: [0x54,0x62,0xe1,0x3e]
          srwi 2, 3, 4
-# FIXME: clrlwi 2, 3, 4
-# FIXME: clrrwi 2, 3, 4
-# FIXME: clrlslwi 2, 3, 4, 5
+# CHECK: rlwinm. 2, 3, 28, 4, 31         # encoding: [0x54,0x62,0xe1,0x3f]
+         srwi. 2, 3, 4
+# CHECK: rlwinm 2, 3, 0, 4, 31           # encoding: [0x54,0x62,0x01,0x3e]
+         clrlwi 2, 3, 4
+# CHECK: rlwinm. 2, 3, 0, 4, 31          # encoding: [0x54,0x62,0x01,0x3f]
+         clrlwi. 2, 3, 4
+# CHECK: rlwinm 2, 3, 0, 0, 27           # encoding: [0x54,0x62,0x00,0x36]
+         clrrwi 2, 3, 4
+# CHECK: rlwinm. 2, 3, 0, 0, 27          # encoding: [0x54,0x62,0x00,0x37]
+         clrrwi. 2, 3, 4
+# CHECK: rlwinm 2, 3, 4, 1, 27           # encoding: [0x54,0x62,0x20,0x76]
+         clrlslwi 2, 3, 5, 4
+# CHECK: rlwinm. 2, 3, 4, 1, 27          # encoding: [0x54,0x62,0x20,0x77]
+         clrlslwi. 2, 3, 5, 4
 
 # Move to/from special purpose register mnemonics
 
-# FIXME: mtxer 2
-# FIXME: mfxer 2
+# CHECK: mtspr 1, 2                      # encoding: [0x7c,0x41,0x03,0xa6]
+         mtxer 2
+# CHECK: mfspr 2, 1                      # encoding: [0x7c,0x41,0x02,0xa6]
+         mfxer 2
 # CHECK: mtlr 2                          # encoding: [0x7c,0x48,0x03,0xa6]
          mtlr 2
 # CHECK: mflr 2                          # encoding: [0x7c,0x48,0x02,0xa6]
@@ -319,13 +2206,22 @@
 
 # CHECK: nop                             # encoding: [0x60,0x00,0x00,0x00]
          nop
-# FIXME: xnop
+# CHECK: xori 0, 0, 0                    # encoding: [0x68,0x00,0x00,0x00]
+         xnop
 # CHECK: li 2, 128                       # encoding: [0x38,0x40,0x00,0x80]
          li 2, 128
 # CHECK: lis 2, 128                      # encoding: [0x3c,0x40,0x00,0x80]
          lis 2, 128
-# FIXME: la 2, 128(4)
+# CHECK: la 2, 128(4)
+         la 2, 128(4)
 # CHECK: mr 2, 3                         # encoding: [0x7c,0x62,0x1b,0x78]
          mr 2, 3
-# FIXME: not 2, 3
+# CHECK: or. 2, 3, 3                     # encoding: [0x7c,0x62,0x1b,0x79]
+         mr. 2, 3
+# CHECK: nor 2, 3, 3                     # encoding: [0x7c,0x62,0x18,0xf8]
+         not 2, 3
+# CHECK: nor. 2, 3, 3                    # encoding: [0x7c,0x62,0x18,0xf9]
+         not. 2, 3
+# CHECK: mtcrf 255, 2                    # encoding: [0x7c,0x4f,0xf1,0x20]
+         mtcr 2
 
diff --git a/test/MC/PowerPC/ppc64-encoding-fp.s b/test/MC/PowerPC/ppc64-encoding-fp.s
index ae0e2866a261..f9bdee14e157 100644
--- a/test/MC/PowerPC/ppc64-encoding-fp.s
+++ b/test/MC/PowerPC/ppc64-encoding-fp.s
@@ -65,8 +65,10 @@
          fnabs 2, 3
 # CHECK: fnabs. 2, 3                     # encoding: [0xfc,0x40,0x19,0x11]
          fnabs. 2, 3
-# FIXME: fcpsgn 2, 3
-# FIXME: fcpsgn. 2, 3
+# CHECK: fcpsgn 2, 3, 4                  # encoding: [0xfc,0x43,0x20,0x10]
+         fcpsgn 2, 3, 4
+# CHECK: fcpsgn. 2, 3, 4                 # encoding: [0xfc,0x43,0x20,0x11]
+         fcpsgn. 2, 3, 4
 
 # Floating-point arithmetic instructions
 
@@ -171,8 +173,10 @@
 # CHECK: frsp. 2, 3                      # encoding: [0xfc,0x40,0x18,0x19]
          frsp. 2, 3
 
-# FIXME: fctid 2, 3
-# FIXME: fctid. 2, 3
+# CHECK: fctid 2, 3                      # encoding: [0xfc,0x40,0x1e,0x5c]
+         fctid 2, 3
+# CHECK: fctid. 2, 3                     # encoding: [0xfc,0x40,0x1e,0x5d]
+         fctid. 2, 3
 # CHECK: fctidz 2, 3                     # encoding: [0xfc,0x40,0x1e,0x5e]
          fctidz 2, 3
 # CHECK: fctidz. 2, 3                    # encoding: [0xfc,0x40,0x1e,0x5f]
@@ -183,8 +187,10 @@
          fctiduz 2, 3
 # CHECK: fctiduz. 2, 3                   # encoding: [0xfc,0x40,0x1f,0x5f]
          fctiduz. 2, 3
-# FIXME: fctiw 2, 3
-# FIXME: fctiw. 2, 3
+# CHECK: fctiw 2, 3                      # encoding: [0xfc,0x40,0x18,0x1c]
+         fctiw 2, 3
+# CHECK: fctiw. 2, 3                     # encoding: [0xfc,0x40,0x18,0x1d]
+         fctiw. 2, 3
 # CHECK: fctiwz 2, 3                     # encoding: [0xfc,0x40,0x18,0x1e]
          fctiwz 2, 3
 # CHECK: fctiwz. 2, 3                    # encoding: [0xfc,0x40,0x18,0x1f]
diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s
index dda796063849..d82d86fd0102 100644
--- a/test/MC/PowerPC/ppc64-encoding.s
+++ b/test/MC/PowerPC/ppc64-encoding.s
@@ -8,40 +8,73 @@
 # CHECK: b target                        # encoding: [0b010010AA,A,A,0bAAAAAA00]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24
          b target
-# FIXME: ba target
+# CHECK: ba target                       # encoding: [0b010010AA,A,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24abs
+         ba target
 # CHECK: bl target                       # encoding: [0b010010AA,A,A,0bAAAAAA01]
 # CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24
          bl target
-# FIXME: bla target
-
-# FIXME: bc 4, 10, target
-# FIXME: bca 4, 10, target
-# FIXME: bcl 4, 10, target
-# FIXME: bcla 4, 10, target
-
-# FIXME: bclr 4, 10, 3
-# FIXME: bclrl 4, 10, 3
-# FIXME: bcctr 4, 10, 3
-# FIXME: bcctrl 4, 10, 3
+# CHECK: bla target                      # encoding: [0b010010AA,A,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24abs
+         bla target
+
+# CHECK: bc 4, 10, target                # encoding: [0x40,0x8a,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bc 4, 10, target
+# CHECK: bca 4, 10, target               # encoding: [0x40,0x8a,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bca 4, 10, target
+# CHECK: bcl 4, 10, target               # encoding: [0x40,0x8a,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bcl 4, 10, target
+# CHECK: bcla 4, 10, target              # encoding: [0x40,0x8a,A,0bAAAAAA11]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+         bcla 4, 10, target
+
+# CHECK: bclr 4, 10, 3                   # encoding: [0x4c,0x8a,0x18,0x20]
+         bclr 4, 10, 3
+# CHECK: bclr 4, 10, 0                   # encoding: [0x4c,0x8a,0x00,0x20]
+         bclr 4, 10
+# CHECK: bclrl 4, 10, 3                  # encoding: [0x4c,0x8a,0x18,0x21]
+         bclrl 4, 10, 3
+# CHECK: bclrl 4, 10, 0                  # encoding: [0x4c,0x8a,0x00,0x21]
+         bclrl 4, 10
+# CHECK: bcctr 4, 10, 3                  # encoding: [0x4c,0x8a,0x1c,0x20]
+         bcctr 4, 10, 3
+# CHECK: bcctr 4, 10, 0                  # encoding: [0x4c,0x8a,0x04,0x20]
+         bcctr 4, 10
+# CHECK: bcctrl 4, 10, 3                 # encoding: [0x4c,0x8a,0x1c,0x21]
+         bcctrl 4, 10, 3
+# CHECK: bcctrl 4, 10, 0                 # encoding: [0x4c,0x8a,0x04,0x21]
+         bcctrl 4, 10
 
 # Condition register instructions
 
-# FIXME: crand 2, 3, 4
-# FIXME: crnand 2, 3, 4
+# CHECK: crand 2, 3, 4                   # encoding: [0x4c,0x43,0x22,0x02]
+         crand 2, 3, 4
+# CHECK: crnand 2, 3, 4                  # encoding: [0x4c,0x43,0x21,0xc2]
+         crnand 2, 3, 4
 # CHECK: cror 2, 3, 4                    # encoding: [0x4c,0x43,0x23,0x82]
          cror 2, 3, 4
-# FIXME: crxor 2, 3, 4
-# FIXME: crnor 2, 3, 4
+# CHECK: crxor 2, 3, 4                   # encoding: [0x4c,0x43,0x21,0x82]
+         crxor 2, 3, 4
+# CHECK: crnor 2, 3, 4                   # encoding: [0x4c,0x43,0x20,0x42]
+         crnor 2, 3, 4
 # CHECK: creqv 2, 3, 4                   # encoding: [0x4c,0x43,0x22,0x42]
          creqv 2, 3, 4
-# FIXME: crandc 2, 3, 4
-# FIXME: crorc 2, 3, 4
+# CHECK: crandc 2, 3, 4                  # encoding: [0x4c,0x43,0x21,0x02]
+         crandc 2, 3, 4
+# CHECK: crorc 2, 3, 4                   # encoding: [0x4c,0x43,0x23,0x42]
+         crorc 2, 3, 4
 # CHECK: mcrf 2, 3                       # encoding: [0x4d,0x0c,0x00,0x00]
          mcrf 2, 3
 
 # System call instruction
 
-# FIXME: sc 1
+# CHECK: sc 1                            # encoding: [0x44,0x00,0x00,0x22]
+         sc 1
+# CHECK: sc 0                            # encoding: [0x44,0x00,0x00,0x02]
+         sc
 
 # Fixed-point facility
 
@@ -144,7 +177,12 @@
 # CHECK: stdbrx 2, 3, 4                  # encoding: [0x7c,0x43,0x25,0x28]
          stdbrx 2, 3, 4
 
-# FIXME: Fixed-point load and store multiple instructions
+# Fixed-point load and store multiple instructions
+
+# CHECK: lmw 2, 128(1)                   # encoding: [0xb8,0x41,0x00,0x80]
+         lmw 2, 128(1)
+# CHECK: stmw 2, 128(1)                  # encoding: [0xbc,0x41,0x00,0x80]
+         stmw 2, 128(1)
 
 # FIXME: Fixed-point move assist instructions
 
@@ -306,9 +344,36 @@
 # FIXME: divdeuo 2, 3, 4
 # FIXME: divdeuo. 2, 3, 4
 
-# FIXME: Fixed-point compare instructions
-
-# FIXME: Fixed-point trap instructions
+# Fixed-point compare instructions
+
+# CHECK: cmpdi 2, 3, 128                 # encoding: [0x2d,0x23,0x00,0x80]
+         cmpi 2, 1, 3, 128
+# CHECK: cmpd 2, 3, 4                    # encoding: [0x7d,0x23,0x20,0x00]
+         cmp 2, 1, 3, 4
+# CHECK: cmpldi 2, 3, 128                # encoding: [0x29,0x23,0x00,0x80]
+         cmpli 2, 1, 3, 128
+# CHECK: cmpld 2, 3, 4                   # encoding: [0x7d,0x23,0x20,0x40]
+         cmpl 2, 1, 3, 4
+
+# CHECK: cmpwi 2, 3, 128                 # encoding: [0x2d,0x03,0x00,0x80]
+         cmpi 2, 0, 3, 128
+# CHECK: cmpw 2, 3, 4                    # encoding: [0x7d,0x03,0x20,0x00]
+         cmp 2, 0, 3, 4
+# CHECK: cmplwi 2, 3, 128                # encoding: [0x29,0x03,0x00,0x80]
+         cmpli 2, 0, 3, 128
+# CHECK: cmplw 2, 3, 4                   # encoding: [0x7d,0x03,0x20,0x40]
+         cmpl 2, 0, 3, 4
+
+# Fixed-point trap instructions
+
+# CHECK: twi 2, 3, 4                     # encoding: [0x0c,0x43,0x00,0x04]
+         twi 2, 3, 4
+# CHECK: tw 2, 3, 4                      # encoding: [0x7c,0x43,0x20,0x08]
+         tw 2, 3, 4
+# CHECK: tdi 2, 3, 4                     # encoding: [0x08,0x43,0x00,0x04]
+         tdi 2, 3, 4
+# CHECK: td 2, 3, 4                      # encoding: [0x7c,0x43,0x20,0x88]
+         td 2, 3, 4
 
 # Fixed-point select
 
@@ -417,14 +482,18 @@
          rldicr 2, 3, 4, 5
 # CHECK: rldicr. 2, 3, 4, 5              # encoding: [0x78,0x62,0x21,0x45]
          rldicr. 2, 3, 4, 5
-# FIXME: rldic 2, 3, 4, 5
-# FIXME: rldic. 2, 3, 4, 5
+# CHECK: rldic 2, 3, 4, 5                # encoding: [0x78,0x62,0x21,0x48]
+         rldic 2, 3, 4, 5
+# CHECK: rldic. 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x49]
+         rldic. 2, 3, 4, 5
 # CHECK: rldcl 2, 3, 4, 5                # encoding: [0x78,0x62,0x21,0x50]
          rldcl 2, 3, 4, 5
 # CHECK: rldcl. 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x51]
          rldcl. 2, 3, 4, 5
-# FIXME: rldcr 2, 3, 4, 5
-# FIXME: rldcr. 2, 3, 4, 5
+# CHECK: rldcr 2, 3, 4, 5                # encoding: [0x78,0x62,0x21,0x52]
+         rldcr 2, 3, 4, 5
+# CHECK: rldcr. 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x53]
+         rldcr. 2, 3, 4, 5
 # CHECK: rldimi 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x4c]
          rldimi 2, 3, 4, 5
 # CHECK: rldimi. 2, 3, 4, 5              # encoding: [0x78,0x62,0x21,0x4d]
@@ -467,14 +536,16 @@
 
 # Move to/from system register instructions
 
-# FIXME: mtspr 256, 2
-# FIXME: mfspr 2, 256
-# CHECK: mtcrf 16, 2                     # encoding: [0x7c,0x41,0x01,0x20]
-         mtcrf 16, 2
+# CHECK: mtspr 600, 2                    # encoding: [0x7c,0x58,0x93,0xa6]
+         mtspr 600, 2
+# CHECK: mfspr 2, 600                    # encoding: [0x7c,0x58,0x92,0xa6]
+         mfspr 2, 600
+# CHECK: mtcrf 123, 2                    # encoding: [0x7c,0x47,0xb1,0x20]
+         mtcrf 123, 2
 # CHECK: mfcr 2                          # encoding: [0x7c,0x40,0x00,0x26]
          mfcr 2
-# FIXME: mtocrf 16, 2
+# CHECK: mtocrf 16, 2                    # encoding: [0x7c,0x51,0x01,0x20]
+         mtocrf 16, 2
 # CHECK: mfocrf 16, 8                    # encoding: [0x7e,0x10,0x80,0x26]
          mfocrf 16, 8
-# FIXME: mcrxr 2
 
diff --git a/test/MC/PowerPC/ppc64-errors.s b/test/MC/PowerPC/ppc64-errors.s
index 1da575304609..53197ba13476 100644
--- a/test/MC/PowerPC/ppc64-errors.s
+++ b/test/MC/PowerPC/ppc64-errors.s
@@ -12,6 +12,16 @@
 # CHECK-NEXT: add %r32, %r32, %r32
               add %r32, %r32, %r32
 
+# TLS register operands
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: add 3, symbol@tls, 4
+              add 3, symbol@tls, 4
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: subf 3, 4, symbol@tls
+              subf 3, 4, symbol@tls
+
 # Signed 16-bit immediate operands
 
 # CHECK: error: invalid operand for instruction
@@ -32,6 +42,14 @@
 # CHECK-NEXT: ori 1, 2, 65536
               ori 1, 2, 65536
 
+# Signed 16-bit immediate operands (extended range for addis)
+
+# CHECK: error: invalid operand for instruction
+         addis 1, 0, -65537
+
+# CHECK: error: invalid operand for instruction
+         addis 1, 0, 65536
+
 # D-Form memory operands
 
 # CHECK: error: invalid register number
@@ -78,3 +96,6 @@
 # CHECK-NEXT: ld 1, 32768(2)
               ld 1, 32768(2)
 
+# CHECK: error: invalid modifier 'got' (no symbols present)
+         addi 4, 3, 123@got
+# CHECK-NEXT: addi 4, 3, 123@got
diff --git a/test/MC/PowerPC/ppc64-fixup-apply.s b/test/MC/PowerPC/ppc64-fixup-apply.s
new file mode 100644
index 000000000000..ba141e4227a1
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-fixup-apply.s
@@ -0,0 +1,100 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -s -sd | FileCheck %s
+
+# This checks that fixups that can be resolved within the same
+# object file are applied correctly.
+
+.text
+
+addi 1, 1, target
+addis 1, 1, target
+
+.set target, 0x1234
+
+addi 1, 1, target2@l
+addis 1, 1, target2@ha
+
+.set target2, 0x12345678
+
+addi 1, 1, target3-target4@l
+addis 1, 1, target3-target4@ha
+
+.set target3, 0x23455678
+.set target4, 0x12341234
+
+addi 1, 1, target5+0x8000@l
+addis 1, 1, target5+0x8000@ha
+
+.set target5, 0x10000001
+
+1:
+addi 1, 1, 2f-1b@l
+addis 1, 1, 1b-2f@ha
+2:
+
+addi 1, 1, target6@h
+addis 1, 1, target6@h
+
+.set target6, 0x4321fedc
+
+addi 1, 1, target7@higher
+addis 1, 1, target7@highest
+addi 1, 1, target7@highera
+addis 1, 1, target7@highesta
+
+.set target7, 0x1234ffffffff8000
+
+.data
+
+.quad v1
+.long v2
+.short v3
+.byte v4
+
+.set v1, 0x123456789abcdef0
+.set v2, 0x87654321
+.set v3, 0xbeef
+.set v4, 0x42
+
+# CHECK:       Section {
+# CHECK:         Name: .text
+# CHECK-NEXT:    Type: SHT_PROGBITS
+# CHECK-NEXT:    Flags [
+# CHECK-NEXT:      SHF_ALLOC
+# CHECK-NEXT:      SHF_EXECINSTR
+# CHECK-NEXT:    ]
+# CHECK-NEXT:    Address: 0x0
+# CHECK-NEXT:    Offset:
+# CHECK-NEXT:    Size: 64
+# CHECK-NEXT:    Link: 0
+# CHECK-NEXT:    Info: 0
+# CHECK-NEXT:    AddressAlignment: 4
+# CHECK-NEXT:    EntrySize: 0
+# CHECK-NEXT:    SectionData (
+# CHECK-NEXT:      0000: 38211234 3C211234 38215678 3C211234
+# CHECK-NEXT:      0010: 38214444 3C211111 38218001 3C211001
+# CHECK-NEXT:      0020: 38210008 3C210000 38214321 3C214321
+# CHECK-NEXT:      0030: 3821FFFF 3C211234 38210000 3C211235
+# CHECK-NEXT:    )
+# CHECK-NEXT:  }
+
+# CHECK:        Section {
+# CHECK:          Name: .data
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [
+# CHECK-NEXT:       SHF_ALLOC
+# CHECK-NEXT:       SHF_WRITE
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset:
+# CHECK-NEXT:     Size: 15
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 12345678 9ABCDEF0 87654321 BEEF42
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+
diff --git a/test/MC/PowerPC/ppc64-fixup-explicit.s b/test/MC/PowerPC/ppc64-fixup-explicit.s
new file mode 100644
index 000000000000..7c56fe882809
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-fixup-explicit.s
@@ -0,0 +1,46 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -r | FileCheck %s -check-prefix=CHECK-REL
+
+# GOT references must result in explicit relocations
+# even if the target symbol is local.
+
+target:
+
+# CHECK: addi 4, 3, target@GOT           # encoding: [0x38,0x83,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@GOT, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16 target 0x0
+         addi 4, 3, target@got  
+
+# CHECK: ld 1, target@GOT(2)             # encoding: [0xe8,0x22,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@GOT, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_DS target 0x0
+         ld 1, target@got(2)
+
+# CHECK: addis 3, 2, target@got@ha       # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_HA target 0x0
+         addis 3, 2, target@got@ha
+
+# CHECK: addi 4, 3, target@got@l         # encoding: [0x38,0x83,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_LO target 0x0
+         addi 4, 3, target@got@l
+
+# CHECK: addis 3, 2, target@got@h        # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_HI target 0x0
+         addis 3, 2, target@got@h
+
+# CHECK: lwz 1, target@got@l(3)          # encoding: [0x80,0x23,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_LO target 0x0
+         lwz 1, target@got@l(3)
+
+# CHECK: ld 1, target@got@l(3)           # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_LO_DS target 0x0
+         ld 1, target@got@l(3)
+
diff --git a/test/MC/PowerPC/ppc64-fixups.s b/test/MC/PowerPC/ppc64-fixups.s
index 1dcbca8f8189..a0750664c2c7 100644
--- a/test/MC/PowerPC/ppc64-fixups.s
+++ b/test/MC/PowerPC/ppc64-fixups.s
@@ -1,95 +1,448 @@
 
 # RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
 
-# FIXME: .TOC.@tocbase
+# RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -r | FileCheck %s -check-prefix=CHECK-REL
+
+# CHECK: b target                        # encoding: [0b010010AA,A,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24
+# CHECK-REL:                             0x{{[0-9A-F]*[048C]}} R_PPC64_REL24 target 0x0
+         b target
+
+# CHECK: ba target                       # encoding: [0b010010AA,A,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24abs
+# CHECK-REL:                             0x{{[0-9A-F]*[048C]}} R_PPC64_ADDR24 target 0x0
+         ba target
+
+# CHECK: beq 0, target                   # encoding: [0x41,0x82,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+# CHECK-REL:                             0x{{[0-9A-F]*[048C]}} R_PPC64_REL14 target 0x0
+         beq target
+
+# CHECK: beqa 0, target                  # encoding: [0x41,0x82,A,0bAAAAAA10]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14abs
+# CHECK-REL:                             0x{{[0-9A-F]*[048C]}} R_PPC64_ADDR14 target 0x0
+         beqa target
+
 
 # CHECK: li 3, target@l                  # encoding: [0x38,0x60,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_LO target 0x0
          li 3, target@l
 
 # CHECK: addis 3, 3, target@ha           # encoding: [0x3c,0x63,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HA target 0x0
          addis 3, 3, target@ha
 
 # CHECK: lis 3, target@ha                # encoding: [0x3c,0x60,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HA target 0x0
          lis 3, target@ha
 
 # CHECK: addi 4, 3, target@l             # encoding: [0x38,0x83,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_LO target 0x0
          addi 4, 3, target@l
 
+# CHECK: li 3, target@ha                 # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HA target 0x0
+         li 3, target@ha
+
+# CHECK: lis 3, target@l                 # encoding: [0x3c,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_LO target 0x0
+         lis 3, target@l
+
+# CHECK: li 3, target@h                  # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HI target 0x0
+         li 3, target@h
+
+# CHECK: lis 3, target@h                  # encoding: [0x3c,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HI target 0x0
+         lis 3, target@h
+
+# CHECK: li 3, target@higher             # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@higher, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HIGHER target 0x0
+         li 3, target@higher
+
+# CHECK: lis 3, target@highest           # encoding: [0x3c,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@highest, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HIGHEST target 0x0
+         lis 3, target@highest
+
+# CHECK: li 3, target@highera            # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@highera, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HIGHERA target 0x0
+         li 3, target@highera
+
+# CHECK: lis 3, target@highesta          # encoding: [0x3c,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@highesta, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HIGHESTA target 0x0
+         lis 3, target@highesta
+
 # CHECK: lwz 1, target@l(3)              # encoding: [0x80,0x23,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_LO target 0x0
          lwz 1, target@l(3)
 
+# CHECK: lwz 1, target(3)                # encoding: [0x80,0x23,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16 target 0x0
+         lwz 1, target(3)
+
 # CHECK: ld 1, target@l(3)               # encoding: [0xe8,0x23,A,0bAAAAAA00]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16_ds
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_LO_DS target 0x0
          ld 1, target@l(3)
 
+# CHECK: ld 1, target(3)                 # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_DS target 0x0
+         ld 1, target(3)
+
+base:
+# CHECK: lwz 1, target-base(3)           # encoding: [0x80,0x23,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target-base, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_REL16 target 0x2
+         lwz 1, target-base(3)
+
+# CHECK: li 3, target-base@h             # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target-base@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_REL16_HI target 0x6
+         li 3, target-base@h
+
+# CHECK: li 3, target-base@l             # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target-base@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_REL16_LO target 0xA
+         li 3, target-base@l
+
+# CHECK: li 3, target-base@ha            # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target-base@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_REL16_HA target 0xE
+         li 3, target-base@ha
+
+# CHECK: ori 3, 3, target@l              # encoding: [0x60,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_LO target 0x0
+         ori 3, 3, target@l
+
+# CHECK: oris 3, 3, target@h             # encoding: [0x64,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_ADDR16_HI target 0x0
+         oris 3, 3, target@h
+
 # CHECK: ld 1, target@toc(2)             # encoding: [0xe8,0x22,A,0bAAAAAA00]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc, kind: fixup_ppc_lo16_ds
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@toc, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TOC16_DS target 0x0
          ld 1, target@toc(2)
 
 # CHECK: addis 3, 2, target@toc@ha       # encoding: [0x3c,0x62,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@toc@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TOC16_HA target 0x0
          addis 3, 2, target@toc@ha
 
 # CHECK: addi 4, 3, target@toc@l         # encoding: [0x38,0x83,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@toc@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TOC16_LO target 0x0
          addi 4, 3, target@toc@l
 
+# CHECK: addis 3, 2, target@toc@h        # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@toc@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TOC16_HI target 0x0
+         addis 3, 2, target@toc@h
+
 # CHECK: lwz 1, target@toc@l(3)          # encoding: [0x80,0x23,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@toc@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TOC16_LO target 0x0
          lwz 1, target@toc@l(3)
 
 # CHECK: ld 1, target@toc@l(3)           # encoding: [0xe8,0x23,A,0bAAAAAA00]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@l, kind: fixup_ppc_lo16_ds
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@toc@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TOC16_LO_DS target 0x0
          ld 1, target@toc@l(3)
 
-# FIXME: @tls
+# CHECK: addi 4, 3, target@GOT           # encoding: [0x38,0x83,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@GOT, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16 target 0x0
+         addi 4, 3, target@got  
+
+# CHECK: ld 1, target@GOT(2)             # encoding: [0xe8,0x22,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@GOT, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_DS target 0x0
+         ld 1, target@got(2)
+
+# CHECK: addis 3, 2, target@got@ha       # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_HA target 0x0
+         addis 3, 2, target@got@ha
+
+# CHECK: addi 4, 3, target@got@l         # encoding: [0x38,0x83,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_LO target 0x0
+         addi 4, 3, target@got@l
+
+# CHECK: addis 3, 2, target@got@h        # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_HI target 0x0
+         addis 3, 2, target@got@h
+
+# CHECK: lwz 1, target@got@l(3)          # encoding: [0x80,0x23,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_LO target 0x0
+         lwz 1, target@got@l(3)
+
+# CHECK: ld 1, target@got@l(3)           # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT16_LO_DS target 0x0
+         ld 1, target@got@l(3)
 
 
 # CHECK: addis 3, 2, target@tprel@ha     # encoding: [0x3c,0x62,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@tprel@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_HA target 0x0
          addis 3, 2, target@tprel@ha
 
 # CHECK: addi 3, 3, target@tprel@l       # encoding: [0x38,0x63,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@tprel@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_LO target 0x0
          addi 3, 3, target@tprel@l
 
+# CHECK: addi 3, 3, target@tprel         # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16 target 0x0
+         addi 3, 3, target@tprel
+
+# CHECK: addi 3, 3, target@tprel@h       # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_HI target 0x0
+         addi 3, 3, target@tprel@h
+
+# CHECK: addi 3, 3, target@tprel@higher  # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@higher, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_HIGHER target 0x0
+         addi 3, 3, target@tprel@higher
+
+# CHECK: addis 3, 2, target@tprel@highest # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@highest, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_HIGHEST target 0x0
+         addis 3, 2, target@tprel@highest
+
+# CHECK: addi 3, 3, target@tprel@highera  # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@highera, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_HIGHERA target 0x0
+         addi 3, 3, target@tprel@highera
+
+# CHECK: addis 3, 2, target@tprel@highesta # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@highesta, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_HIGHESTA target 0x0
+         addis 3, 2, target@tprel@highesta
+
+# CHECK: ld 1, target@tprel@l(3)         # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_LO_DS target 0x0
+         ld 1, target@tprel@l(3)
+
+# CHECK: ld 1, target@tprel(3)           # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@tprel, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_TPREL16_DS target 0x0
+         ld 1, target@tprel(3)
+
 # CHECK: addis 3, 2, target@dtprel@ha    # encoding: [0x3c,0x62,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@dtprel@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_HA target 0x0
          addis 3, 2, target@dtprel@ha
 
 # CHECK: addi 3, 3, target@dtprel@l      # encoding: [0x38,0x63,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@dtprel@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_LO target 0x0
          addi 3, 3, target@dtprel@l
 
+# CHECK: addi 3, 3, target@dtprel         # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16 target 0x0
+         addi 3, 3, target@dtprel
+
+# CHECK: addi 3, 3, target@dtprel@h       # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_HI target 0x0
+         addi 3, 3, target@dtprel@h
+
+# CHECK: addi 3, 3, target@dtprel@higher  # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@higher, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_HIGHER target 0x0
+         addi 3, 3, target@dtprel@higher
+
+# CHECK: addis 3, 2, target@dtprel@highest # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@highest, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_HIGHEST target 0x0
+         addis 3, 2, target@dtprel@highest
+
+# CHECK: addi 3, 3, target@dtprel@highera  # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@highera, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_HIGHERA target 0x0
+         addi 3, 3, target@dtprel@highera
+
+# CHECK: addis 3, 2, target@dtprel@highesta # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@highesta, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_HIGHESTA target 0x0
+         addis 3, 2, target@dtprel@highesta
+
+# CHECK: ld 1, target@dtprel@l(3)        # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_LO_DS target 0x0
+         ld 1, target@dtprel@l(3)
+
+# CHECK: ld 1, target@dtprel(3)          # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@dtprel, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_DTPREL16_DS target 0x0
+         ld 1, target@dtprel(3)
+
 
 # CHECK: addis 3, 2, target@got@tprel@ha # encoding: [0x3c,0x62,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tprel@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tprel@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TPREL16_HA target 0x0
          addis 3, 2, target@got@tprel@ha
 
 # CHECK: ld 1, target@got@tprel@l(3)     # encoding: [0xe8,0x23,A,0bAAAAAA00]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tprel@l, kind: fixup_ppc_lo16_ds
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tprel@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TPREL16_LO_DS target 0x0
          ld 1, target@got@tprel@l(3)
 
+# CHECK: addis 3, 2, target@got@tprel@h  # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tprel@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TPREL16_HI target 0x0
+         addis 3, 2, target@got@tprel@h
+
+# CHECK: addis 3, 2, target@got@tprel@l  # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tprel@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TPREL16_LO_DS target 0x0
+         addis 3, 2, target@got@tprel@l
+
+# CHECK: addis 3, 2, target@got@tprel    # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tprel, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TPREL16_DS target 0x0
+         addis 3, 2, target@got@tprel
+
+# CHECK: ld 1, target@got@tprel(3)       # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tprel, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TPREL16_DS target 0x0
+         ld 1, target@got@tprel(3)
+
+# CHECK: addis 3, 2, target@got@dtprel@ha # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@dtprel@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_DTPREL16_HA target 0x0
+         addis 3, 2, target@got@dtprel@ha
+
+# CHECK: ld 1, target@got@dtprel@l(3)    # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@dtprel@l, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_DTPREL16_LO_DS target 0x0
+         ld 1, target@got@dtprel@l(3)
+
+# CHECK: addis 3, 2, target@got@dtprel@h # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@dtprel@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_DTPREL16_HI target 0x0
+         addis 3, 2, target@got@dtprel@h
+
+# CHECK: addis 3, 2, target@got@dtprel@l # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@dtprel@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_DTPREL16_LO_DS target 0x0
+         addis 3, 2, target@got@dtprel@l
+
+# CHECK: addis 3, 2, target@got@dtprel   # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@dtprel, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_DTPREL16_DS target 0x0
+         addis 3, 2, target@got@dtprel
+
+# CHECK: ld 1, target@got@dtprel(3)      # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@dtprel, kind: fixup_ppc_half16ds
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_DTPREL16_DS target 0x0
+         ld 1, target@got@dtprel(3)
 
 # CHECK: addis 3, 2, target@got@tlsgd@ha # encoding: [0x3c,0x62,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsgd@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsgd@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSGD16_HA target 0x0
          addis 3, 2, target@got@tlsgd@ha
 
 # CHECK: addi 3, 3, target@got@tlsgd@l   # encoding: [0x38,0x63,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsgd@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsgd@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSGD16_LO target 0x0
          addi 3, 3, target@got@tlsgd@l
 
+# CHECK: addi 3, 3, target@got@tlsgd@h   # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsgd@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSGD16_HI target 0x0
+         addi 3, 3, target@got@tlsgd@h
+
+# CHECK: addi 3, 3, target@got@tlsgd     # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsgd, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSGD16 target 0x0
+         addi 3, 3, target@got@tlsgd
+
 
 # CHECK: addis 3, 2, target@got@tlsld@ha # encoding: [0x3c,0x62,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsld@ha, kind: fixup_ppc_ha16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsld@ha, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSLD16_HA target 0x0
          addis 3, 2, target@got@tlsld@ha
 
 # CHECK: addi 3, 3, target@got@tlsld@l   # encoding: [0x38,0x63,A,A]
-# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsld@l, kind: fixup_ppc_lo16
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsld@l, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSLD16_LO target 0x0
          addi 3, 3, target@got@tlsld@l
 
+# CHECK: addi 3, 3, target@got@tlsld@h   # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsld@h, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSLD16_HI target 0x0
+         addi 3, 3, target@got@tlsld@h
+
+# CHECK: addi 3, 3, target@got@tlsld     # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: target@got@tlsld, kind: fixup_ppc_half16
+# CHECK-REL:                             0x{{[0-9A-F]*[26AE]}} R_PPC64_GOT_TLSLD16 target 0x0
+         addi 3, 3, target@got@tlsld
+
+# CHECK: bl __tls_get_addr(target@tlsgd) # encoding: [0b010010BB,B,B,0bBBBBBB01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@tlsgd, kind: fixup_ppc_nofixup
+# CHECK-NEXT:                            #   fixup B - offset: 0, value: __tls_get_addr, kind: fixup_ppc_br24
+# CHECK-REL:                             0x{{[0-9A-F]*[048C]}} R_PPC64_TLSGD target 0x0
+# CHECK-REL-NEXT:                        0x{{[0-9A-F]*[048C]}} R_PPC64_REL24 __tls_get_addr 0x0
+         bl __tls_get_addr(target@tlsgd)
+
+# CHECK: bl __tls_get_addr(target@tlsld) # encoding: [0b010010BB,B,B,0bBBBBBB01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@tlsld, kind: fixup_ppc_nofixup
+# CHECK-NEXT:                            #   fixup B - offset: 0, value: __tls_get_addr, kind: fixup_ppc_br24
+# CHECK-REL:                             0x{{[0-9A-F]*[048C]}} R_PPC64_TLSLD target 0x0
+# CHECK-REL-NEXT:                        0x{{[0-9A-F]*[048C]}} R_PPC64_REL24 __tls_get_addr 0x0
+         bl __tls_get_addr(target@tlsld)
+
+# CHECK: add 3, 4, target@tls            # encoding: [0x7c,0x64,0x6a,0x14]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@tls, kind: fixup_ppc_nofixup
+# CHECK-REL:                             0x{{[0-9A-F]*[048C]}} R_PPC64_TLS target 0x0
+         add 3, 4, target@tls
+
+
+# Data relocs
+# llvm-mc does not show any "encoding" string for data, so we just check the relocs
+
+# CHECK-REL: .rela.data
+	.data
+
+# CHECK-REL: 0x{{[0-9A-F]*[08]}} R_PPC64_TOC - 0x0
+	.quad .TOC.@tocbase
+
+# CHECK-REL: 0x{{[0-9A-F]*[08]}} R_PPC64_DTPMOD64 target 0x0
+	.quad target@dtpmod
+
+# CHECK-REL: 0x{{[0-9A-F]*[08]}} R_PPC64_TPREL64 target 0x0
+	.quad target@tprel
+
+# CHECK-REL: 0x{{[0-9A-F]*[08]}} R_PPC64_DTPREL64 target 0x0
+	.quad target@dtprel
+
+# Constant fixup
+        ori 1, 2, 131071@l
+# CHECK: ori 1, 2, 131071@l              # encoding: [0x60,0x41,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 2, value: 131071@l, kind: fixup_ppc_half16
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.ll b/test/MC/PowerPC/ppc64-initial-cfa.ll
deleted file mode 100644
index 23a77384ecd0..000000000000
--- a/test/MC/PowerPC/ppc64-initial-cfa.ll
+++ /dev/null
@@ -1,84 +0,0 @@
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=static %s -o - | \
-; RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=STATIC
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=PIC
-
-; FIXME: this file should be in .s form, change when asm parser is available.
-
-define void @f() {
-entry:
-  ret void
-}
-
-; STATIC:      Section {
-; STATIC:        Name: .eh_frame
-; STATIC-NEXT:   Type: SHT_PROGBITS
-; STATIC-NEXT:   Flags [ (0x2)
-; STATIC-NEXT:     SHF_ALLOC
-; STATIC-NEXT:   ]
-; STATIC-NEXT:   Address:
-; STATIC-NEXT:   Offset:
-; STATIC-NEXT:   Size: 40
-; STATIC-NEXT:   Link: 0
-; STATIC-NEXT:   Info: 0
-; STATIC-NEXT:   AddressAlignment: 8
-; STATIC-NEXT:   EntrySize: 
-; STATIC-NEXT:   Relocations [
-; STATIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
-; STATIC-NEXT:   ]
-; STATIC-NEXT:   SectionData (
-; STATIC-NEXT:     0000: 00000010 00000000 017A5200 01784101
-; STATIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
-; STATIC-NEXT:     0020: 00000010 00000000
-; STATIC-NEXT:   )
-; STATIC-NEXT: }
-
-; STATIC:      Section {
-; STATIC:        Name: .rela.eh_frame
-; STATIC-NEXT:   Type: SHT_RELA
-; STATIC-NEXT:   Flags [ (0x0)
-; STATIC-NEXT:   ]
-; STATIC-NEXT:   Address:
-; STATIC-NEXT:   Offset:
-; STATIC-NEXT:   Size: 24
-; STATIC-NEXT:   Link:
-; STATIC-NEXT:   Info:
-; STATIC-NEXT:   AddressAlignment: 8
-; STATIC-NEXT:   EntrySize: 24
-
-
-; PIC:      Section {
-; PIC:        Name: .eh_frame
-; PIC-NEXT:   Type: SHT_PROGBITS
-; PIC-NEXT:   Flags [ (0x2)
-; PIC-NEXT:     SHF_ALLOC
-; PIC-NEXT:   ]
-; PIC-NEXT:   Address:
-; PIC-NEXT:   Offset:
-; PIC-NEXT:   Size: 40
-; PIC-NEXT:   Link: 0
-; PIC-NEXT:   Info: 0
-; PIC-NEXT:   AddressAlignment: 8
-; PIC-NEXT:   EntrySize: 0
-; PIC-NEXT:   Relocations [
-; PIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
-; PIC-NEXT:   ]
-; PIC-NEXT:   SectionData (
-; PIC-NEXT:     0000: 00000010 00000000 017A5200 01784101
-; PIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
-; PIC-NEXT:     0020: 00000010 00000000
-; PIC-NEXT:   )
-; PIC-NEXT: }
-
-; PIC:      Section {
-; PIC:        Name: .rela.eh_frame
-; PIC-NEXT:   Type: SHT_RELA
-; PIC-NEXT:   Flags [ (0x0)
-; PIC-NEXT:   ]
-; PIC-NEXT:   Address:
-; PIC-NEXT:   Offset:
-; PIC-NEXT:   Size: 24
-; PIC-NEXT:   Link:
-; PIC-NEXT:   Info:
-; PIC-NEXT:   AddressAlignment: 8
-; PIC-NEXT:   EntrySize: 24
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.s b/test/MC/PowerPC/ppc64-initial-cfa.s
new file mode 100644
index 000000000000..f976ae9ffa31
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-initial-cfa.s
@@ -0,0 +1,84 @@
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=static %s | \
+# RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=STATIC
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=pic %s | \
+# RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=PIC
+
+        .cfi_startproc
+        nop
+        .cfi_endproc
+
+# STATIC:      Section {
+# STATIC:        Name: .eh_frame
+# STATIC-NEXT:   Type: SHT_PROGBITS
+# STATIC-NEXT:   Flags [ (0x2)
+# STATIC-NEXT:     SHF_ALLOC
+# STATIC-NEXT:   ]
+# STATIC-NEXT:   Address:
+# STATIC-NEXT:   Offset:
+# STATIC-NEXT:   Size: 40
+# STATIC-NEXT:   Link: 0
+# STATIC-NEXT:   Info: 0
+# STATIC-NEXT:   AddressAlignment: 8
+# STATIC-NEXT:   EntrySize: 
+# STATIC-NEXT:   Relocations [
+# STATIC-NEXT:   ]
+# STATIC-NEXT:   SectionData (
+# STATIC-NEXT:     0000: 00000010 00000000 017A5200 04784101
+# STATIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
+# STATIC-NEXT:     0020: 00000004 00000000
+# STATIC-NEXT:   )
+# STATIC-NEXT: }
+
+# STATIC:      Section {
+# STATIC:        Name: .rela.eh_frame
+# STATIC-NEXT:   Type: SHT_RELA
+# STATIC-NEXT:   Flags [ (0x0)
+# STATIC-NEXT:   ]
+# STATIC-NEXT:   Address:
+# STATIC-NEXT:   Offset:
+# STATIC-NEXT:   Size: 24
+# STATIC-NEXT:   Link:
+# STATIC-NEXT:   Info:
+# STATIC-NEXT:   AddressAlignment: 8
+# STATIC-NEXT:   EntrySize: 24
+# STATIC-NEXT:   Relocations [
+# STATIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
+# STATIC-NEXT:   ]
+
+# PIC:      Section {
+# PIC:        Name: .eh_frame
+# PIC-NEXT:   Type: SHT_PROGBITS
+# PIC-NEXT:   Flags [ (0x2)
+# PIC-NEXT:     SHF_ALLOC
+# PIC-NEXT:   ]
+# PIC-NEXT:   Address:
+# PIC-NEXT:   Offset:
+# PIC-NEXT:   Size: 40
+# PIC-NEXT:   Link: 0
+# PIC-NEXT:   Info: 0
+# PIC-NEXT:   AddressAlignment: 8
+# PIC-NEXT:   EntrySize: 0
+# PIC-NEXT:   Relocations [
+# PIC-NEXT:   ]
+# PIC-NEXT:   SectionData (
+# PIC-NEXT:     0000: 00000010 00000000 017A5200 04784101
+# PIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
+# PIC-NEXT:     0020: 00000004 00000000
+# PIC-NEXT:   )
+# PIC-NEXT: }
+
+# PIC:      Section {
+# PIC:        Name: .rela.eh_frame
+# PIC-NEXT:   Type: SHT_RELA
+# PIC-NEXT:   Flags [ (0x0)
+# PIC-NEXT:   ]
+# PIC-NEXT:   Address:
+# PIC-NEXT:   Offset:
+# PIC-NEXT:   Size: 24
+# PIC-NEXT:   Link:
+# PIC-NEXT:   Info:
+# PIC-NEXT:   AddressAlignment: 8
+# PIC-NEXT:   EntrySize: 24
+# PIC-NEXT:   Relocations [
+# PIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
+# PIC-NEXT:   ]
diff --git a/test/MC/PowerPC/ppc64-operands.s b/test/MC/PowerPC/ppc64-operands.s
index de5fcb0e8d2e..fc1cbeb94b94 100644
--- a/test/MC/PowerPC/ppc64-operands.s
+++ b/test/MC/PowerPC/ppc64-operands.s
@@ -40,6 +40,14 @@
 # CHECK: ori 1, 2, 65535                 # encoding: [0x60,0x41,0xff,0xff]
          ori 1, 2, 65535
 
+# Signed 16-bit immediate operands (extended range for addis)
+
+# CHECK: addis 1, 0, 0                   # encoding: [0x3c,0x20,0x00,0x00]
+         addis 1, 0, -65536
+
+# CHECK: addis 1, 0, -1                  # encoding: [0x3c,0x20,0xff,0xff]
+         addis 1, 0, 65535
+
 # D-Form memory operands
 
 # CHECK: lwz 1, 0(0)                     # encoding: [0x80,0x20,0x00,0x00]
@@ -85,3 +93,23 @@
 # CHECK: ld 1, -4(2)                     # encoding: [0xe8,0x22,0xff,0xfc]
          ld 1, -4(2)
 
+
+# Immediate branch operands
+
+# CHECK: b .+1024                        # encoding: [0x48,0x00,0x04,0x00]
+         b 1024
+
+# CHECK: ba 1024                         # encoding: [0x48,0x00,0x04,0x02]
+         ba 1024
+
+# CHECK: beq 0, .+1024                   # encoding: [0x41,0x82,0x04,0x00]
+         beq 1024
+
+# CHECK: beqa 0, 1024                    # encoding: [0x41,0x82,0x04,0x02]
+         beqa 1024
+
+# CHECK:                                 # encoding: [0x42,0x9f,A,0bAAAAAA01]
+         bcl 20, 31, $+4
+
+# CHECK:                                 # encoding: [0x42,0x00,A,0bAAAAAA00]
+         bdnz $-8
diff --git a/test/MC/PowerPC/ppc64-regs.s b/test/MC/PowerPC/ppc64-regs.s
new file mode 100644
index 000000000000..02b1fc5503d3
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-regs.s
@@ -0,0 +1,235 @@
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+#CHECK: .cfi_startproc
+#CHECK: .cfi_offset r0, 0
+#CHECK: .cfi_offset r1, 8
+#CHECK: .cfi_offset r2, 16
+#CHECK: .cfi_offset r3, 24
+#CHECK: .cfi_offset r4, 32
+#CHECK: .cfi_offset r5, 40
+#CHECK: .cfi_offset r6, 48
+#CHECK: .cfi_offset r7, 56
+#CHECK: .cfi_offset r8, 64
+#CHECK: .cfi_offset r9, 72
+#CHECK: .cfi_offset r10, 80
+#CHECK: .cfi_offset r11, 88
+#CHECK: .cfi_offset r12, 96
+#CHECK: .cfi_offset r13, 104
+#CHECK: .cfi_offset r14, 112
+#CHECK: .cfi_offset r15, 120
+#CHECK: .cfi_offset r16, 128
+#CHECK: .cfi_offset r17, 136
+#CHECK: .cfi_offset r18, 144
+#CHECK: .cfi_offset r19, 152
+#CHECK: .cfi_offset r20, 160
+#CHECK: .cfi_offset r21, 168
+#CHECK: .cfi_offset r22, 176
+#CHECK: .cfi_offset r22, 184
+#CHECK: .cfi_offset r23, 192
+#CHECK: .cfi_offset r24, 200
+#CHECK: .cfi_offset r25, 208
+#CHECK: .cfi_offset r26, 216
+#CHECK: .cfi_offset r27, 224
+#CHECK: .cfi_offset r28, 232
+#CHECK: .cfi_offset r29, 240
+#CHECK: .cfi_offset r30, 248
+#CHECK: .cfi_offset r31, 256
+
+#CHECK: .cfi_offset f0, 300
+#CHECK: .cfi_offset f1, 308
+#CHECK: .cfi_offset f2, 316
+#CHECK: .cfi_offset f3, 324
+#CHECK: .cfi_offset f4, 332
+#CHECK: .cfi_offset f5, 340
+#CHECK: .cfi_offset f6, 348
+#CHECK: .cfi_offset f7, 356
+#CHECK: .cfi_offset f8, 364
+#CHECK: .cfi_offset f9, 372
+#CHECK: .cfi_offset f10, 380
+#CHECK: .cfi_offset f11, 388
+#CHECK: .cfi_offset f12, 396
+#CHECK: .cfi_offset f13, 404
+#CHECK: .cfi_offset f14, 412
+#CHECK: .cfi_offset f15, 420
+#CHECK: .cfi_offset f16, 428
+#CHECK: .cfi_offset f17, 436
+#CHECK: .cfi_offset f18, 444
+#CHECK: .cfi_offset f19, 452
+#CHECK: .cfi_offset f20, 460
+#CHECK: .cfi_offset f21, 468
+#CHECK: .cfi_offset f22, 476
+#CHECK: .cfi_offset f22, 484
+#CHECK: .cfi_offset f23, 492
+#CHECK: .cfi_offset f24, 500
+#CHECK: .cfi_offset f25, 508
+#CHECK: .cfi_offset f26, 516
+#CHECK: .cfi_offset f27, 524
+#CHECK: .cfi_offset f28, 532
+#CHECK: .cfi_offset f29, 540
+#CHECK: .cfi_offset f30, 548
+#CHECK: .cfi_offset f31, 556
+
+#CHECK: .cfi_offset lr, 600
+#CHECK: .cfi_offset ctr, 608
+#CHECK: .cfi_offset vrsave, 616
+
+#CHECK: .cfi_offset cr0, 620
+#CHECK: .cfi_offset cr1, 621
+#CHECK: .cfi_offset cr2, 622
+#CHECK: .cfi_offset cr3, 623
+#CHECK: .cfi_offset cr4, 624
+#CHECK: .cfi_offset cr5, 625
+#CHECK: .cfi_offset cr6, 626
+#CHECK: .cfi_offset cr7, 627
+
+#CHECK: .cfi_offset v0, 700
+#CHECK: .cfi_offset v1, 716
+#CHECK: .cfi_offset v2, 732
+#CHECK: .cfi_offset v3, 748
+#CHECK: .cfi_offset v4, 764
+#CHECK: .cfi_offset v5, 780
+#CHECK: .cfi_offset v6, 796
+#CHECK: .cfi_offset v7, 812
+#CHECK: .cfi_offset v8, 828
+#CHECK: .cfi_offset v9, 844
+#CHECK: .cfi_offset v10, 860
+#CHECK: .cfi_offset v11, 876
+#CHECK: .cfi_offset v12, 892
+#CHECK: .cfi_offset v13, 908
+#CHECK: .cfi_offset v14, 924
+#CHECK: .cfi_offset v15, 940
+#CHECK: .cfi_offset v16, 956
+#CHECK: .cfi_offset v17, 972
+#CHECK: .cfi_offset v18, 988
+#CHECK: .cfi_offset v19, 1004
+#CHECK: .cfi_offset v20, 1020
+#CHECK: .cfi_offset v21, 1036
+#CHECK: .cfi_offset v22, 1052
+#CHECK: .cfi_offset v22, 1068
+#CHECK: .cfi_offset v23, 1084
+#CHECK: .cfi_offset v24, 1100
+#CHECK: .cfi_offset v25, 1116
+#CHECK: .cfi_offset v26, 1132
+#CHECK: .cfi_offset v27, 1148
+#CHECK: .cfi_offset v28, 1164
+#CHECK: .cfi_offset v29, 1180
+#CHECK: .cfi_offset v30, 1196
+#CHECK: .cfi_offset v31, 1212
+#CHECK: .cfi_endproc
+
+	.cfi_startproc
+	.cfi_offset r0,0
+	.cfi_offset r1,8
+	.cfi_offset r2,16
+	.cfi_offset r3,24
+	.cfi_offset r4,32
+	.cfi_offset r5,40
+	.cfi_offset r6,48
+	.cfi_offset r7,56
+	.cfi_offset r8,64
+	.cfi_offset r9,72
+	.cfi_offset r10,80
+	.cfi_offset r11,88
+	.cfi_offset r12,96
+	.cfi_offset r13,104
+	.cfi_offset r14,112
+	.cfi_offset r15,120
+	.cfi_offset r16,128
+	.cfi_offset r17,136
+	.cfi_offset r18,144
+	.cfi_offset r19,152
+	.cfi_offset r20,160
+	.cfi_offset r21,168
+	.cfi_offset r22,176
+	.cfi_offset r22,184
+	.cfi_offset r23,192
+	.cfi_offset r24,200
+	.cfi_offset r25,208
+	.cfi_offset r26,216
+	.cfi_offset r27,224
+	.cfi_offset r28,232
+	.cfi_offset r29,240
+	.cfi_offset r30,248
+	.cfi_offset r31,256
+
+	.cfi_offset f0,300
+	.cfi_offset f1,308
+	.cfi_offset f2,316
+	.cfi_offset f3,324
+	.cfi_offset f4,332
+	.cfi_offset f5,340
+	.cfi_offset f6,348
+	.cfi_offset f7,356
+	.cfi_offset f8,364
+	.cfi_offset f9,372
+	.cfi_offset f10,380
+	.cfi_offset f11,388
+	.cfi_offset f12,396
+	.cfi_offset f13,404
+	.cfi_offset f14,412
+	.cfi_offset f15,420
+	.cfi_offset f16,428
+	.cfi_offset f17,436
+	.cfi_offset f18,444
+	.cfi_offset f19,452
+	.cfi_offset f20,460
+	.cfi_offset f21,468
+	.cfi_offset f22,476
+	.cfi_offset f22,484
+	.cfi_offset f23,492
+	.cfi_offset f24,500
+	.cfi_offset f25,508
+	.cfi_offset f26,516
+	.cfi_offset f27,524
+	.cfi_offset f28,532
+	.cfi_offset f29,540
+	.cfi_offset f30,548
+	.cfi_offset f31,556
+
+	.cfi_offset lr,600
+	.cfi_offset ctr,608
+	.cfi_offset vrsave,616
+	.cfi_offset cr0,620
+	.cfi_offset cr1,621
+	.cfi_offset cr2,622
+	.cfi_offset cr3,623
+	.cfi_offset cr4,624
+	.cfi_offset cr5,625
+	.cfi_offset cr6,626
+	.cfi_offset cr7,627
+
+	.cfi_offset v0,700
+	.cfi_offset v1,716
+	.cfi_offset v2,732
+	.cfi_offset v3,748
+	.cfi_offset v4,764
+	.cfi_offset v5,780
+	.cfi_offset v6,796
+	.cfi_offset v7,812
+	.cfi_offset v8,828
+	.cfi_offset v9,844
+	.cfi_offset v10,860
+	.cfi_offset v11,876
+	.cfi_offset v12,892
+	.cfi_offset v13,908
+	.cfi_offset v14,924
+	.cfi_offset v15,940
+	.cfi_offset v16,956
+	.cfi_offset v17,972
+	.cfi_offset v18,988
+	.cfi_offset v19,1004
+	.cfi_offset v20,1020
+	.cfi_offset v21,1036
+	.cfi_offset v22,1052
+	.cfi_offset v22,1068
+	.cfi_offset v23,1084
+	.cfi_offset v24,1100
+	.cfi_offset v25,1116
+	.cfi_offset v26,1132
+	.cfi_offset v27,1148
+	.cfi_offset v28,1164
+	.cfi_offset v29,1180
+	.cfi_offset v30,1196
+	.cfi_offset v31,1212
+
+	.cfi_endproc
diff --git a/test/MC/PowerPC/ppc64-relocs-01.ll b/test/MC/PowerPC/ppc64-relocs-01.ll
deleted file mode 100644
index ac8d303dd4cf..000000000000
--- a/test/MC/PowerPC/ppc64-relocs-01.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 -code-model=small  \
-;; RUN:  -filetype=obj %s -o - | \
-;; RUN: llvm-readobj -r | FileCheck %s
-
-;; FIXME: this file need to be in .s form, change when asm parse is done.
-
-@number64 = global i64 10, align 8
-
-define i64 @access_int64(i64 %a) nounwind readonly {
-entry:
-  %0 = load i64* @number64, align 8
-  %cmp = icmp eq i64 %0, %a
-  %conv1 = zext i1 %cmp to i64
-  ret i64 %conv1
-}
-
-declare double @sin(double) nounwind
-
-define double @test_branch24 (double %x) nounwind readonly {
-entry:
-  %add = call double @sin(double %x) nounwind
-  ret double %add
-}
-
-;; CHECK:      Relocations [
-
-;; The relocations in .rela.text are the 'number64' load using a
-;; R_PPC64_TOC16_DS against the .toc and the 'sin' external function
-;; address using a R_PPC64_REL24
-;; CHECK:        Section ({{[0-9]+}}) .text {
-;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_DS .toc
-;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_REL24    sin
-;; CHECK-NEXT:   }
-
-;; The .opd entry for the 'access_int64' function creates 2 relocations:
-;; 1. A R_PPC64_ADDR64 against the .text segment plus addend (the function
-;    address itself);
-;; 2. And a R_PPC64_TOC against no symbol (the linker will replace for the
-;;    module's TOC base).
-;; CHECK:        Section ({{[0-9]+}}) .opd {
-;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 .text 0x0
-;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC - 0x0
-
-;; Finally the TOC creates the relocation for the 'number64'.
-;; CHECK:        Section ({{[0-9]+}}) .toc {
-;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 number64 0x0
-;; CHECK-NEXT:   }
-
-;; CHECK-NEXT: ]
diff --git a/test/MC/PowerPC/ppc64-relocs-01.s b/test/MC/PowerPC/ppc64-relocs-01.s
new file mode 100644
index 000000000000..f2e899b83430
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-relocs-01.s
@@ -0,0 +1,46 @@
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj %s | \
+# RUN: llvm-readobj -r | FileCheck %s
+
+        .section .opd,"aw",@progbits
+access_int64:
+        .quad .L.access_int64
+        .quad .TOC.@tocbase
+        .quad 0
+        .text
+.L.access_int64:
+        ld 4, .LC1@toc(2)
+        bl sin
+
+        .section .toc,"aw",@progbits
+.LC1:
+        .tc number64[TC],number64
+        .data
+        .globl number64
+number64:
+        .quad	10
+
+# CHECK:      Relocations [
+
+# The relocations in .rela.text are the 'number64' load using a
+# R_PPC64_TOC16_DS against the .toc and the 'sin' external function
+# address using a R_PPC64_REL24
+# CHECK:        Section ({{[0-9]+}}) .rela.text {
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_DS .toc
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_REL24    sin
+# CHECK-NEXT:   }
+
+# The .opd entry for the 'access_int64' function creates 2 relocations:
+# 1. A R_PPC64_ADDR64 against the .text segment plus addend (the function
+#    address itself);
+# 2. And a R_PPC64_TOC against no symbol (the linker will replace for the
+#    module's TOC base).
+# CHECK:        Section ({{[0-9]+}}) .rela.opd {
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 .text 0x0
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC - 0x0
+
+# Finally the TOC creates the relocation for the 'number64'.
+# CHECK:        Section ({{[0-9]+}}) .rela.toc {
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 number64 0x0
+# CHECK-NEXT:   }
+
+# CHECK-NEXT: ]
diff --git a/test/MC/PowerPC/ppc64-tls-relocs-01.ll b/test/MC/PowerPC/ppc64-tls-relocs-01.ll
deleted file mode 100644
index 4e901e816a68..000000000000
--- a/test/MC/PowerPC/ppc64-tls-relocs-01.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj %s -o - | \
-;; RUN: llvm-readobj -r | FileCheck %s
-
-;; FIXME: this file should be in .s form, change when asm parser is available.
-
-@t = thread_local global i32 0, align 4
-
-define i32* @f() nounwind {
-entry:
-  ret i32* @t
-}
-
-;; Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
-;; against the thread-local symbol 't'.
-;; CHECK:      Relocations [
-;; CHECK:        Section ({{[0-9]+}}) .text {
-;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA t
-;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO t
-;; CHECK-NEXT:   }
diff --git a/test/MC/PowerPC/ppc64-tls-relocs-01.s b/test/MC/PowerPC/ppc64-tls-relocs-01.s
new file mode 100644
index 000000000000..66a00fecf347
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-tls-relocs-01.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc -triple=powerpc64-unknown-linux-gnu -filetype=obj %s | \
+# RUN: llvm-readobj -r | FileCheck %s
+
+        .text
+        addis 3, 13, t@tprel@ha
+        addi 3, 3, t@tprel@l
+
+        .type t,@object
+        .section .tbss,"awT",@nobits
+        .globl t
+        .align 2
+t:
+        .long 0
+        .size t, 4
+
+# Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
+# against the thread-local symbol 't'.
+# CHECK:      Relocations [
+# CHECK:        Section ({{[0-9]+}}) .rela.text {
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA t
+# CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO t
+# CHECK-NEXT:   }
diff --git a/test/MC/PowerPC/tls-gd-obj.s b/test/MC/PowerPC/tls-gd-obj.s
new file mode 100644
index 000000000000..63d47ee62935
--- /dev/null
+++ b/test/MC/PowerPC/tls-gd-obj.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc -triple=powerpc64-pc-linux -filetype=obj %s -o - | \
+// RUN: llvm-readobj -r | FileCheck %s
+
+// Test correct relocation generation for thread-local storage using
+// the general dynamic model and integrated assembly.
+
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/PowerPC/tls-gd-obj.ll"
+	.text
+	.globl	main
+	.align	2
+	.type	main,@function
+	.section	.opd,"aw",@progbits
+main:                                   # @main
+	.align	3
+	.quad	.L.main
+	.quad	.TOC.@tocbase
+	.quad	0
+	.text
+.L.main:
+# BB#0:                                 # %entry
+	addis 3, 2, a@got@tlsgd@ha
+	addi 3, 3, a@got@tlsgd@l
+	li 4, 0
+	bl __tls_get_addr(a@tlsgd)
+	nop
+	stw 4, -4(1)
+	lwz 4, 0(3)
+	extsw 3, 4
+	blr
+	.long	0
+	.quad	0
+.Ltmp0:
+	.size	main, .Ltmp0-.L.main
+
+	.type	a,@object               # @a
+	.section	.tbss,"awT",@nobits
+	.globl	a
+	.align	2
+a:
+	.long	0                       # 0x0
+	.size	a, 4
+
+
+// Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
+// and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
+// for the call to __tls_get_addr.
+//
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_HA a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_LO a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSGD          a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/PowerPC/tls-ie-obj.s b/test/MC/PowerPC/tls-ie-obj.s
new file mode 100644
index 000000000000..c8c5d91573ac
--- /dev/null
+++ b/test/MC/PowerPC/tls-ie-obj.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=powerpc64-pc-linux -filetype=obj %s -o - | \
+// RUN: llvm-readobj -r | FileCheck %s
+
+// Test correct relocation generation for thread-local storage
+// using the initial-exec model and integrated assembly.
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/PowerPC/tls-ie-obj.ll"
+	.text
+	.globl	main
+	.align	2
+	.type	main,@function
+	.section	.opd,"aw",@progbits
+main:                                   # @main
+	.align	3
+	.quad	.L.main
+	.quad	.TOC.@tocbase
+	.quad	0
+	.text
+.L.main:
+# BB#0:                                 # %entry
+	li 3, 0
+	addis 4, 2, a@got@tprel@ha
+	ld 4, a@got@tprel@l(4)
+	add 4, 4, a@tls
+	stw 3, -4(1)
+	lwz 3, 0(4)
+	extsw 3, 3
+	blr
+	.long	0
+	.quad	0
+.Ltmp0:
+	.size	main, .Ltmp0-.L.main
+
+
+// Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
+// accessing external variable a.
+//
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA    a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLS               a
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/PowerPC/tls-ld-obj.s b/test/MC/PowerPC/tls-ld-obj.s
new file mode 100644
index 000000000000..b0c4a7a528fb
--- /dev/null
+++ b/test/MC/PowerPC/tls-ld-obj.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple=powerpc64-pc-linux -filetype=obj %s -o - | \
+// RUN: llvm-readobj -r | FileCheck %s
+
+// Test correct relocation generation for thread-local storage using
+// the local dynamic model.
+
+	.file	"/home/espindola/llvm/llvm/test/CodeGen/PowerPC/tls-ld-obj.ll"
+	.text
+	.globl	main
+	.align	2
+	.type	main,@function
+	.section	.opd,"aw",@progbits
+main:                                   # @main
+	.align	3
+	.quad	.L.main
+	.quad	.TOC.@tocbase
+	.quad	0
+	.text
+.L.main:
+# BB#0:                                 # %entry
+	addis 3, 2, a@got@tlsld@ha
+	addi 3, 3, a@got@tlsld@l
+	li 4, 0
+	bl __tls_get_addr(a@tlsld)
+	nop
+	stw 4, -4(1)
+	addis 3, 3, a@dtprel@ha
+	addi 3, 3, a@dtprel@l
+	lwz 4, 0(3)
+	extsw 3, 4
+	blr
+	.long	0
+	.quad	0
+.Ltmp0:
+	.size	main, .Ltmp0-.L.main
+
+	.hidden	a                       # @a
+	.type	a,@object
+	.section	.tbss,"awT",@nobits
+	.globl	a
+	.align	2
+a:
+	.long	0                       # 0x0
+	.size	a, 4
+
+
+// Verify generation of R_PPC64_GOT_TLSLD16_HA, R_PPC64_GOT_TLSLD16_LO,
+// R_PPC64_TLSLD, R_PPC64_DTPREL16_HA, and R_PPC64_DTPREL16_LO for
+// accessing external variable a, and R_PPC64_REL24 for the call to
+// __tls_get_addr.
+//
+// CHECK: Relocations [
+// CHECK:   Section (2) .rela.text {
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_HA a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_LO a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSLD          a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_HA    a
+// CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_LO    a
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/SystemZ/insn-a-01.s b/test/MC/SystemZ/insn-a-01.s
deleted file mode 100644
index 7bb94b31ebe8..000000000000
--- a/test/MC/SystemZ/insn-a-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: a	%r0, 0                  # encoding: [0x5a,0x00,0x00,0x00]
-#CHECK: a	%r0, 4095               # encoding: [0x5a,0x00,0x0f,0xff]
-#CHECK: a	%r0, 0(%r1)             # encoding: [0x5a,0x00,0x10,0x00]
-#CHECK: a	%r0, 0(%r15)            # encoding: [0x5a,0x00,0xf0,0x00]
-#CHECK: a	%r0, 4095(%r1,%r15)     # encoding: [0x5a,0x01,0xff,0xff]
-#CHECK: a	%r0, 4095(%r15,%r1)     # encoding: [0x5a,0x0f,0x1f,0xff]
-#CHECK: a	%r15, 0                 # encoding: [0x5a,0xf0,0x00,0x00]
-
-	a	%r0, 0
-	a	%r0, 4095
-	a	%r0, 0(%r1)
-	a	%r0, 0(%r15)
-	a	%r0, 4095(%r1,%r15)
-	a	%r0, 4095(%r15,%r1)
-	a	%r15, 0
diff --git a/test/MC/SystemZ/insn-a-02.s b/test/MC/SystemZ/insn-a-02.s
deleted file mode 100644
index 9cc967e0cefb..000000000000
--- a/test/MC/SystemZ/insn-a-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: a	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: a	%r0, 4096
-
-	a	%r0, -1
-	a	%r0, 4096
diff --git a/test/MC/SystemZ/insn-adb-01.s b/test/MC/SystemZ/insn-adb-01.s
deleted file mode 100644
index b54be60c6b53..000000000000
--- a/test/MC/SystemZ/insn-adb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: adb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1a]
-#CHECK: adb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1a]
-#CHECK: adb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1a]
-#CHECK: adb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1a]
-#CHECK: adb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1a]
-#CHECK: adb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1a]
-#CHECK: adb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1a]
-
-	adb	%f0, 0
-	adb	%f0, 4095
-	adb	%f0, 0(%r1)
-	adb	%f0, 0(%r15)
-	adb	%f0, 4095(%r1,%r15)
-	adb	%f0, 4095(%r15,%r1)
-	adb	%f15, 0
diff --git a/test/MC/SystemZ/insn-adb-02.s b/test/MC/SystemZ/insn-adb-02.s
deleted file mode 100644
index ff97a51affa8..000000000000
--- a/test/MC/SystemZ/insn-adb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: adb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: adb	%f0, 4096
-
-	adb	%f0, -1
-	adb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-adbr-01.s b/test/MC/SystemZ/insn-adbr-01.s
deleted file mode 100644
index 05724d2a6a89..000000000000
--- a/test/MC/SystemZ/insn-adbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: adbr	%f0, %f0                # encoding: [0xb3,0x1a,0x00,0x00]
-#CHECK: adbr	%f0, %f15               # encoding: [0xb3,0x1a,0x00,0x0f]
-#CHECK: adbr	%f7, %f8                # encoding: [0xb3,0x1a,0x00,0x78]
-#CHECK: adbr	%f15, %f0               # encoding: [0xb3,0x1a,0x00,0xf0]
-
-	adbr	%f0, %f0
-	adbr	%f0, %f15
-	adbr	%f7, %f8
-	adbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-aeb-01.s b/test/MC/SystemZ/insn-aeb-01.s
deleted file mode 100644
index b4268e5c72b6..000000000000
--- a/test/MC/SystemZ/insn-aeb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: aeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0a]
-#CHECK: aeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0a]
-#CHECK: aeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0a]
-#CHECK: aeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0a]
-#CHECK: aeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0a]
-#CHECK: aeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0a]
-#CHECK: aeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0a]
-
-	aeb	%f0, 0
-	aeb	%f0, 4095
-	aeb	%f0, 0(%r1)
-	aeb	%f0, 0(%r15)
-	aeb	%f0, 4095(%r1,%r15)
-	aeb	%f0, 4095(%r15,%r1)
-	aeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-aeb-02.s b/test/MC/SystemZ/insn-aeb-02.s
deleted file mode 100644
index 4fade8ecebce..000000000000
--- a/test/MC/SystemZ/insn-aeb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: aeb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: aeb	%f0, 4096
-
-	aeb	%f0, -1
-	aeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-aebr-01.s b/test/MC/SystemZ/insn-aebr-01.s
deleted file mode 100644
index 2147627e3896..000000000000
--- a/test/MC/SystemZ/insn-aebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: aebr	%f0, %f0                # encoding: [0xb3,0x0a,0x00,0x00]
-#CHECK: aebr	%f0, %f15               # encoding: [0xb3,0x0a,0x00,0x0f]
-#CHECK: aebr	%f7, %f8                # encoding: [0xb3,0x0a,0x00,0x78]
-#CHECK: aebr	%f15, %f0               # encoding: [0xb3,0x0a,0x00,0xf0]
-
-	aebr	%f0, %f0
-	aebr	%f0, %f15
-	aebr	%f7, %f8
-	aebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-afi-01.s b/test/MC/SystemZ/insn-afi-01.s
deleted file mode 100644
index f9a911810ca9..000000000000
--- a/test/MC/SystemZ/insn-afi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: afi	%r0, -2147483648        # encoding: [0xc2,0x09,0x80,0x00,0x00,0x00]
-#CHECK: afi	%r0, -1                 # encoding: [0xc2,0x09,0xff,0xff,0xff,0xff]
-#CHECK: afi	%r0, 0                  # encoding: [0xc2,0x09,0x00,0x00,0x00,0x00]
-#CHECK: afi	%r0, 1                  # encoding: [0xc2,0x09,0x00,0x00,0x00,0x01]
-#CHECK: afi	%r0, 2147483647         # encoding: [0xc2,0x09,0x7f,0xff,0xff,0xff]
-#CHECK: afi	%r15, 0                 # encoding: [0xc2,0xf9,0x00,0x00,0x00,0x00]
-
-	afi	%r0, -1 << 31
-	afi	%r0, -1
-	afi	%r0, 0
-	afi	%r0, 1
-	afi	%r0, (1 << 31) - 1
-	afi	%r15, 0
diff --git a/test/MC/SystemZ/insn-afi-02.s b/test/MC/SystemZ/insn-afi-02.s
deleted file mode 100644
index f848e196a9a7..000000000000
--- a/test/MC/SystemZ/insn-afi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: afi	%r0, (-1 << 31) - 1
-#CHECK: error: invalid operand
-#CHECK: afi	%r0, (1 << 31)
-
-	afi	%r0, (-1 << 31) - 1
-	afi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-ag-01.s b/test/MC/SystemZ/insn-ag-01.s
deleted file mode 100644
index 63029d75e452..000000000000
--- a/test/MC/SystemZ/insn-ag-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ag	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x08]
-#CHECK: ag	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x08]
-#CHECK: ag	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x08]
-#CHECK: ag	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x08]
-#CHECK: ag	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x08]
-#CHECK: ag	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x08]
-#CHECK: ag	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x08]
-#CHECK: ag	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x08]
-#CHECK: ag	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x08]
-#CHECK: ag	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x08]
-
-	ag	%r0, -524288
-	ag	%r0, -1
-	ag	%r0, 0
-	ag	%r0, 1
-	ag	%r0, 524287
-	ag	%r0, 0(%r1)
-	ag	%r0, 0(%r15)
-	ag	%r0, 524287(%r1,%r15)
-	ag	%r0, 524287(%r15,%r1)
-	ag	%r15, 0
diff --git a/test/MC/SystemZ/insn-ag-02.s b/test/MC/SystemZ/insn-ag-02.s
deleted file mode 100644
index 59694cd7d5a7..000000000000
--- a/test/MC/SystemZ/insn-ag-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ag	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: ag	%r0, 524288
-
-	ag	%r0, -524289
-	ag	%r0, 524288
diff --git a/test/MC/SystemZ/insn-agf-01.s b/test/MC/SystemZ/insn-agf-01.s
deleted file mode 100644
index 40a985834200..000000000000
--- a/test/MC/SystemZ/insn-agf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: agf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x18]
-#CHECK: agf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x18]
-#CHECK: agf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x18]
-#CHECK: agf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x18]
-#CHECK: agf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x18]
-#CHECK: agf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x18]
-#CHECK: agf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x18]
-#CHECK: agf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x18]
-#CHECK: agf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x18]
-#CHECK: agf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x18]
-
-	agf	%r0, -524288
-	agf	%r0, -1
-	agf	%r0, 0
-	agf	%r0, 1
-	agf	%r0, 524287
-	agf	%r0, 0(%r1)
-	agf	%r0, 0(%r15)
-	agf	%r0, 524287(%r1,%r15)
-	agf	%r0, 524287(%r15,%r1)
-	agf	%r15, 0
diff --git a/test/MC/SystemZ/insn-agf-02.s b/test/MC/SystemZ/insn-agf-02.s
deleted file mode 100644
index dee31dc1d547..000000000000
--- a/test/MC/SystemZ/insn-agf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: agf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: agf	%r0, 524288
-
-	agf	%r0, -524289
-	agf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-agfi-01.s b/test/MC/SystemZ/insn-agfi-01.s
deleted file mode 100644
index a64721ddd1d8..000000000000
--- a/test/MC/SystemZ/insn-agfi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: agfi	%r0, -2147483648        # encoding: [0xc2,0x08,0x80,0x00,0x00,0x00]
-#CHECK: agfi	%r0, -1                 # encoding: [0xc2,0x08,0xff,0xff,0xff,0xff]
-#CHECK: agfi	%r0, 0                  # encoding: [0xc2,0x08,0x00,0x00,0x00,0x00]
-#CHECK: agfi	%r0, 1                  # encoding: [0xc2,0x08,0x00,0x00,0x00,0x01]
-#CHECK: agfi	%r0, 2147483647         # encoding: [0xc2,0x08,0x7f,0xff,0xff,0xff]
-#CHECK: agfi	%r15, 0                 # encoding: [0xc2,0xf8,0x00,0x00,0x00,0x00]
-
-	agfi	%r0, -1 << 31
-	agfi	%r0, -1
-	agfi	%r0, 0
-	agfi	%r0, 1
-	agfi	%r0, (1 << 31) - 1
-	agfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-agfi-02.s b/test/MC/SystemZ/insn-agfi-02.s
deleted file mode 100644
index 1db3eaae0b6f..000000000000
--- a/test/MC/SystemZ/insn-agfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: agfi	%r0, (-1 << 31) - 1
-#CHECK: error: invalid operand
-#CHECK: agfi	%r0, (1 << 31)
-
-	agfi	%r0, (-1 << 31) - 1
-	agfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-agfr-01.s b/test/MC/SystemZ/insn-agfr-01.s
deleted file mode 100644
index cd17db95f91e..000000000000
--- a/test/MC/SystemZ/insn-agfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: agfr	%r0, %r0                # encoding: [0xb9,0x18,0x00,0x00]
-#CHECK: agfr	%r0, %r15               # encoding: [0xb9,0x18,0x00,0x0f]
-#CHECK: agfr	%r15, %r0               # encoding: [0xb9,0x18,0x00,0xf0]
-#CHECK: agfr	%r7, %r8                # encoding: [0xb9,0x18,0x00,0x78]
-
-	agfr	%r0,%r0
-	agfr	%r0,%r15
-	agfr	%r15,%r0
-	agfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-aghi-01.s b/test/MC/SystemZ/insn-aghi-01.s
deleted file mode 100644
index cd77c355d241..000000000000
--- a/test/MC/SystemZ/insn-aghi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: aghi	%r0, -32768             # encoding: [0xa7,0x0b,0x80,0x00]
-#CHECK: aghi	%r0, -1                 # encoding: [0xa7,0x0b,0xff,0xff]
-#CHECK: aghi	%r0, 0                  # encoding: [0xa7,0x0b,0x00,0x00]
-#CHECK: aghi	%r0, 1                  # encoding: [0xa7,0x0b,0x00,0x01]
-#CHECK: aghi	%r0, 32767              # encoding: [0xa7,0x0b,0x7f,0xff]
-#CHECK: aghi	%r15, 0                 # encoding: [0xa7,0xfb,0x00,0x00]
-
-	aghi	%r0, -32768
-	aghi	%r0, -1
-	aghi	%r0, 0
-	aghi	%r0, 1
-	aghi	%r0, 32767
-	aghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-aghi-02.s b/test/MC/SystemZ/insn-aghi-02.s
deleted file mode 100644
index e2657c6869e4..000000000000
--- a/test/MC/SystemZ/insn-aghi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: aghi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: aghi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: aghi	%r0, foo
-
-	aghi	%r0, -32769
-	aghi	%r0, 32768
-	aghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-agr-01.s b/test/MC/SystemZ/insn-agr-01.s
deleted file mode 100644
index a84ff3d22620..000000000000
--- a/test/MC/SystemZ/insn-agr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: agr	%r0, %r0                # encoding: [0xb9,0x08,0x00,0x00]
-#CHECK: agr	%r0, %r15               # encoding: [0xb9,0x08,0x00,0x0f]
-#CHECK: agr	%r15, %r0               # encoding: [0xb9,0x08,0x00,0xf0]
-#CHECK: agr	%r7, %r8                # encoding: [0xb9,0x08,0x00,0x78]
-
-	agr	%r0,%r0
-	agr	%r0,%r15
-	agr	%r15,%r0
-	agr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-agsi-01.s b/test/MC/SystemZ/insn-agsi-01.s
deleted file mode 100644
index 9b2fe4b80e81..000000000000
--- a/test/MC/SystemZ/insn-agsi-01.s
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: agsi	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x7a]
-#CHECK: agsi	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x7a]
-#CHECK: agsi	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x7a]
-#CHECK: agsi	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x7a]
-#CHECK: agsi	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x7a]
-#CHECK: agsi	0, -128                 # encoding: [0xeb,0x80,0x00,0x00,0x00,0x7a]
-#CHECK: agsi	0, -1                   # encoding: [0xeb,0xff,0x00,0x00,0x00,0x7a]
-#CHECK: agsi	0, 1                    # encoding: [0xeb,0x01,0x00,0x00,0x00,0x7a]
-#CHECK: agsi	0, 127                  # encoding: [0xeb,0x7f,0x00,0x00,0x00,0x7a]
-#CHECK: agsi	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x7a]
-#CHECK: agsi	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x7a]
-#CHECK: agsi	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x7a]
-#CHECK: agsi	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x7a]
-
-	agsi	-524288, 0
-	agsi	-1, 0
-	agsi	0, 0
-	agsi	1, 0
-	agsi	524287, 0
-	agsi	0, -128
-	agsi	0, -1
-	agsi	0, 1
-	agsi	0, 127
-	agsi	0(%r1), 42
-	agsi	0(%r15), 42
-	agsi	524287(%r1), 42
-	agsi	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-agsi-02.s b/test/MC/SystemZ/insn-agsi-02.s
deleted file mode 100644
index a4b3d9a8883b..000000000000
--- a/test/MC/SystemZ/insn-agsi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: agsi	-524289, 0
-#CHECK: error: invalid operand
-#CHECK: agsi	524288, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: agsi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: agsi	0, -129
-#CHECK: error: invalid operand
-#CHECK: agsi	0, 128
-
-	agsi	-524289, 0
-	agsi	524288, 0
-	agsi	0(%r1,%r2), 0
-	agsi	0, -129
-	agsi	0, 128
diff --git a/test/MC/SystemZ/insn-ah-01.s b/test/MC/SystemZ/insn-ah-01.s
deleted file mode 100644
index 35012f0678ed..000000000000
--- a/test/MC/SystemZ/insn-ah-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ah	%r0, 0                  # encoding: [0x4a,0x00,0x00,0x00]
-#CHECK: ah	%r0, 4095               # encoding: [0x4a,0x00,0x0f,0xff]
-#CHECK: ah	%r0, 0(%r1)             # encoding: [0x4a,0x00,0x10,0x00]
-#CHECK: ah	%r0, 0(%r15)            # encoding: [0x4a,0x00,0xf0,0x00]
-#CHECK: ah	%r0, 4095(%r1,%r15)     # encoding: [0x4a,0x01,0xff,0xff]
-#CHECK: ah	%r0, 4095(%r15,%r1)     # encoding: [0x4a,0x0f,0x1f,0xff]
-#CHECK: ah	%r15, 0                 # encoding: [0x4a,0xf0,0x00,0x00]
-
-	ah	%r0, 0
-	ah	%r0, 4095
-	ah	%r0, 0(%r1)
-	ah	%r0, 0(%r15)
-	ah	%r0, 4095(%r1,%r15)
-	ah	%r0, 4095(%r15,%r1)
-	ah	%r15, 0
diff --git a/test/MC/SystemZ/insn-ah-02.s b/test/MC/SystemZ/insn-ah-02.s
deleted file mode 100644
index 1a20cd715f77..000000000000
--- a/test/MC/SystemZ/insn-ah-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ah	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: ah	%r0, 4096
-
-	ah	%r0, -1
-	ah	%r0, 4096
diff --git a/test/MC/SystemZ/insn-ahi-01.s b/test/MC/SystemZ/insn-ahi-01.s
deleted file mode 100644
index e0a5fb389e6c..000000000000
--- a/test/MC/SystemZ/insn-ahi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ahi	%r0, -32768             # encoding: [0xa7,0x0a,0x80,0x00]
-#CHECK: ahi	%r0, -1                 # encoding: [0xa7,0x0a,0xff,0xff]
-#CHECK: ahi	%r0, 0                  # encoding: [0xa7,0x0a,0x00,0x00]
-#CHECK: ahi	%r0, 1                  # encoding: [0xa7,0x0a,0x00,0x01]
-#CHECK: ahi	%r0, 32767              # encoding: [0xa7,0x0a,0x7f,0xff]
-#CHECK: ahi	%r15, 0                 # encoding: [0xa7,0xfa,0x00,0x00]
-
-	ahi	%r0, -32768
-	ahi	%r0, -1
-	ahi	%r0, 0
-	ahi	%r0, 1
-	ahi	%r0, 32767
-	ahi	%r15, 0
diff --git a/test/MC/SystemZ/insn-ahi-02.s b/test/MC/SystemZ/insn-ahi-02.s
deleted file mode 100644
index d41e2da2a21b..000000000000
--- a/test/MC/SystemZ/insn-ahi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ahi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: ahi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: ahi	%r0, foo
-
-	ahi	%r0, -32769
-	ahi	%r0, 32768
-	ahi	%r0, foo
diff --git a/test/MC/SystemZ/insn-ahy-01.s b/test/MC/SystemZ/insn-ahy-01.s
deleted file mode 100644
index ff25dc503c6b..000000000000
--- a/test/MC/SystemZ/insn-ahy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ahy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x7a]
-#CHECK: ahy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x7a]
-#CHECK: ahy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x7a]
-#CHECK: ahy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x7a]
-#CHECK: ahy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x7a]
-#CHECK: ahy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x7a]
-#CHECK: ahy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x7a]
-#CHECK: ahy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x7a]
-#CHECK: ahy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x7a]
-#CHECK: ahy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x7a]
-
-	ahy	%r0, -524288
-	ahy	%r0, -1
-	ahy	%r0, 0
-	ahy	%r0, 1
-	ahy	%r0, 524287
-	ahy	%r0, 0(%r1)
-	ahy	%r0, 0(%r15)
-	ahy	%r0, 524287(%r1,%r15)
-	ahy	%r0, 524287(%r15,%r1)
-	ahy	%r15, 0
diff --git a/test/MC/SystemZ/insn-ahy-02.s b/test/MC/SystemZ/insn-ahy-02.s
deleted file mode 100644
index e725e146df56..000000000000
--- a/test/MC/SystemZ/insn-ahy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ahy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: ahy	%r0, 524288
-
-	ahy	%r0, -524289
-	ahy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-al-01.s b/test/MC/SystemZ/insn-al-01.s
deleted file mode 100644
index 1efc33f0e980..000000000000
--- a/test/MC/SystemZ/insn-al-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: al	%r0, 0                  # encoding: [0x5e,0x00,0x00,0x00]
-#CHECK: al	%r0, 4095               # encoding: [0x5e,0x00,0x0f,0xff]
-#CHECK: al	%r0, 0(%r1)             # encoding: [0x5e,0x00,0x10,0x00]
-#CHECK: al	%r0, 0(%r15)            # encoding: [0x5e,0x00,0xf0,0x00]
-#CHECK: al	%r0, 4095(%r1,%r15)     # encoding: [0x5e,0x01,0xff,0xff]
-#CHECK: al	%r0, 4095(%r15,%r1)     # encoding: [0x5e,0x0f,0x1f,0xff]
-#CHECK: al	%r15, 0                 # encoding: [0x5e,0xf0,0x00,0x00]
-
-	al	%r0, 0
-	al	%r0, 4095
-	al	%r0, 0(%r1)
-	al	%r0, 0(%r15)
-	al	%r0, 4095(%r1,%r15)
-	al	%r0, 4095(%r15,%r1)
-	al	%r15, 0
diff --git a/test/MC/SystemZ/insn-al-02.s b/test/MC/SystemZ/insn-al-02.s
deleted file mode 100644
index 39b1b06a1ba2..000000000000
--- a/test/MC/SystemZ/insn-al-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: al	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: al	%r0, 4096
-
-	al	%r0, -1
-	al	%r0, 4096
diff --git a/test/MC/SystemZ/insn-alc-01.s b/test/MC/SystemZ/insn-alc-01.s
deleted file mode 100644
index 5f8be6a7d5af..000000000000
--- a/test/MC/SystemZ/insn-alc-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: alc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x98]
-#CHECK: alc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x98]
-#CHECK: alc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x98]
-#CHECK: alc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x98]
-#CHECK: alc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x98]
-#CHECK: alc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x98]
-#CHECK: alc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x98]
-#CHECK: alc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x98]
-#CHECK: alc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x98]
-#CHECK: alc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x98]
-
-	alc	%r0, -524288
-	alc	%r0, -1
-	alc	%r0, 0
-	alc	%r0, 1
-	alc	%r0, 524287
-	alc	%r0, 0(%r1)
-	alc	%r0, 0(%r15)
-	alc	%r0, 524287(%r1,%r15)
-	alc	%r0, 524287(%r15,%r1)
-	alc	%r15, 0
diff --git a/test/MC/SystemZ/insn-alc-02.s b/test/MC/SystemZ/insn-alc-02.s
deleted file mode 100644
index 9c082f2dfb7a..000000000000
--- a/test/MC/SystemZ/insn-alc-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: alc	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: alc	%r0, 524288
-
-	alc	%r0, -524289
-	alc	%r0, 524288
diff --git a/test/MC/SystemZ/insn-alcg-01.s b/test/MC/SystemZ/insn-alcg-01.s
deleted file mode 100644
index c05207ec9e98..000000000000
--- a/test/MC/SystemZ/insn-alcg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: alcg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x88]
-#CHECK: alcg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x88]
-#CHECK: alcg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x88]
-#CHECK: alcg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x88]
-#CHECK: alcg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x88]
-#CHECK: alcg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x88]
-#CHECK: alcg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x88]
-#CHECK: alcg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x88]
-#CHECK: alcg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x88]
-#CHECK: alcg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x88]
-
-	alcg	%r0, -524288
-	alcg	%r0, -1
-	alcg	%r0, 0
-	alcg	%r0, 1
-	alcg	%r0, 524287
-	alcg	%r0, 0(%r1)
-	alcg	%r0, 0(%r15)
-	alcg	%r0, 524287(%r1,%r15)
-	alcg	%r0, 524287(%r15,%r1)
-	alcg	%r15, 0
diff --git a/test/MC/SystemZ/insn-alcg-02.s b/test/MC/SystemZ/insn-alcg-02.s
deleted file mode 100644
index 3dab6ddc5dd8..000000000000
--- a/test/MC/SystemZ/insn-alcg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: alcg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: alcg	%r0, 524288
-
-	alcg	%r0, -524289
-	alcg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-alcgr-01.s b/test/MC/SystemZ/insn-alcgr-01.s
deleted file mode 100644
index c9f3ce293418..000000000000
--- a/test/MC/SystemZ/insn-alcgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: alcgr	%r0, %r0                # encoding: [0xb9,0x88,0x00,0x00]
-#CHECK: alcgr	%r0, %r15               # encoding: [0xb9,0x88,0x00,0x0f]
-#CHECK: alcgr	%r15, %r0               # encoding: [0xb9,0x88,0x00,0xf0]
-#CHECK: alcgr	%r7, %r8                # encoding: [0xb9,0x88,0x00,0x78]
-
-	alcgr	%r0,%r0
-	alcgr	%r0,%r15
-	alcgr	%r15,%r0
-	alcgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-alcr-01.s b/test/MC/SystemZ/insn-alcr-01.s
deleted file mode 100644
index 7369224a1bea..000000000000
--- a/test/MC/SystemZ/insn-alcr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: alcr	%r0, %r0                # encoding: [0xb9,0x98,0x00,0x00]
-#CHECK: alcr	%r0, %r15               # encoding: [0xb9,0x98,0x00,0x0f]
-#CHECK: alcr	%r15, %r0               # encoding: [0xb9,0x98,0x00,0xf0]
-#CHECK: alcr	%r7, %r8                # encoding: [0xb9,0x98,0x00,0x78]
-
-	alcr	%r0,%r0
-	alcr	%r0,%r15
-	alcr	%r15,%r0
-	alcr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-alfi-01.s b/test/MC/SystemZ/insn-alfi-01.s
deleted file mode 100644
index 332a74fdc7aa..000000000000
--- a/test/MC/SystemZ/insn-alfi-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: alfi	%r0, 0                  # encoding: [0xc2,0x0b,0x00,0x00,0x00,0x00]
-#CHECK: alfi	%r0, 4294967295         # encoding: [0xc2,0x0b,0xff,0xff,0xff,0xff]
-#CHECK: alfi	%r15, 0                 # encoding: [0xc2,0xfb,0x00,0x00,0x00,0x00]
-
-	alfi	%r0, 0
-	alfi	%r0, (1 << 32) - 1
-	alfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-alfi-02.s b/test/MC/SystemZ/insn-alfi-02.s
deleted file mode 100644
index a5d38942257b..000000000000
--- a/test/MC/SystemZ/insn-alfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: alfi	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: alfi	%r0, (1 << 32)
-
-	alfi	%r0, -1
-	alfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-alg-01.s b/test/MC/SystemZ/insn-alg-01.s
deleted file mode 100644
index 6df084c02b2f..000000000000
--- a/test/MC/SystemZ/insn-alg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: alg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0a]
-#CHECK: alg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0a]
-#CHECK: alg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0a]
-#CHECK: alg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0a]
-#CHECK: alg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0a]
-#CHECK: alg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0a]
-#CHECK: alg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0a]
-#CHECK: alg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0a]
-#CHECK: alg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0a]
-#CHECK: alg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0a]
-
-	alg	%r0, -524288
-	alg	%r0, -1
-	alg	%r0, 0
-	alg	%r0, 1
-	alg	%r0, 524287
-	alg	%r0, 0(%r1)
-	alg	%r0, 0(%r15)
-	alg	%r0, 524287(%r1,%r15)
-	alg	%r0, 524287(%r15,%r1)
-	alg	%r15, 0
diff --git a/test/MC/SystemZ/insn-alg-02.s b/test/MC/SystemZ/insn-alg-02.s
deleted file mode 100644
index 407d73d2bd57..000000000000
--- a/test/MC/SystemZ/insn-alg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: alg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: alg	%r0, 524288
-
-	alg	%r0, -524289
-	alg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-algf-01.s b/test/MC/SystemZ/insn-algf-01.s
deleted file mode 100644
index 751b59059f10..000000000000
--- a/test/MC/SystemZ/insn-algf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: algf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1a]
-#CHECK: algf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1a]
-#CHECK: algf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1a]
-#CHECK: algf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1a]
-#CHECK: algf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1a]
-#CHECK: algf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1a]
-#CHECK: algf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1a]
-#CHECK: algf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1a]
-#CHECK: algf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1a]
-#CHECK: algf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1a]
-
-	algf	%r0, -524288
-	algf	%r0, -1
-	algf	%r0, 0
-	algf	%r0, 1
-	algf	%r0, 524287
-	algf	%r0, 0(%r1)
-	algf	%r0, 0(%r15)
-	algf	%r0, 524287(%r1,%r15)
-	algf	%r0, 524287(%r15,%r1)
-	algf	%r15, 0
diff --git a/test/MC/SystemZ/insn-algf-02.s b/test/MC/SystemZ/insn-algf-02.s
deleted file mode 100644
index 64ef1c975b7b..000000000000
--- a/test/MC/SystemZ/insn-algf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: algf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: algf	%r0, 524288
-
-	algf	%r0, -524289
-	algf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-algfi-01.s b/test/MC/SystemZ/insn-algfi-01.s
deleted file mode 100644
index b6ccb336b3b8..000000000000
--- a/test/MC/SystemZ/insn-algfi-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: algfi	%r0, 0                  # encoding: [0xc2,0x0a,0x00,0x00,0x00,0x00]
-#CHECK: algfi	%r0, 4294967295         # encoding: [0xc2,0x0a,0xff,0xff,0xff,0xff]
-#CHECK: algfi	%r15, 0                 # encoding: [0xc2,0xfa,0x00,0x00,0x00,0x00]
-
-	algfi	%r0, 0
-	algfi	%r0, (1 << 32) - 1
-	algfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-algfi-02.s b/test/MC/SystemZ/insn-algfi-02.s
deleted file mode 100644
index a5ed4b04e55b..000000000000
--- a/test/MC/SystemZ/insn-algfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: algfi	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: algfi	%r0, (1 << 32)
-
-	algfi	%r0, -1
-	algfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-algfr-01.s b/test/MC/SystemZ/insn-algfr-01.s
deleted file mode 100644
index 3ccb692ab947..000000000000
--- a/test/MC/SystemZ/insn-algfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: algfr	%r0, %r0                # encoding: [0xb9,0x1a,0x00,0x00]
-#CHECK: algfr	%r0, %r15               # encoding: [0xb9,0x1a,0x00,0x0f]
-#CHECK: algfr	%r15, %r0               # encoding: [0xb9,0x1a,0x00,0xf0]
-#CHECK: algfr	%r7, %r8                # encoding: [0xb9,0x1a,0x00,0x78]
-
-	algfr	%r0,%r0
-	algfr	%r0,%r15
-	algfr	%r15,%r0
-	algfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-algr-01.s b/test/MC/SystemZ/insn-algr-01.s
deleted file mode 100644
index c3758ee74df4..000000000000
--- a/test/MC/SystemZ/insn-algr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: algr	%r0, %r0                # encoding: [0xb9,0x0a,0x00,0x00]
-#CHECK: algr	%r0, %r15               # encoding: [0xb9,0x0a,0x00,0x0f]
-#CHECK: algr	%r15, %r0               # encoding: [0xb9,0x0a,0x00,0xf0]
-#CHECK: algr	%r7, %r8                # encoding: [0xb9,0x0a,0x00,0x78]
-
-	algr	%r0,%r0
-	algr	%r0,%r15
-	algr	%r15,%r0
-	algr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-alr-01.s b/test/MC/SystemZ/insn-alr-01.s
deleted file mode 100644
index e85173ef151c..000000000000
--- a/test/MC/SystemZ/insn-alr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: alr	%r0, %r0                # encoding: [0x1e,0x00]
-#CHECK: alr	%r0, %r15               # encoding: [0x1e,0x0f]
-#CHECK: alr	%r15, %r0               # encoding: [0x1e,0xf0]
-#CHECK: alr	%r7, %r8                # encoding: [0x1e,0x78]
-
-	alr	%r0,%r0
-	alr	%r0,%r15
-	alr	%r15,%r0
-	alr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-aly-01.s b/test/MC/SystemZ/insn-aly-01.s
deleted file mode 100644
index 94afb07578f4..000000000000
--- a/test/MC/SystemZ/insn-aly-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: aly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5e]
-#CHECK: aly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5e]
-#CHECK: aly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5e]
-#CHECK: aly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5e]
-#CHECK: aly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5e]
-#CHECK: aly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5e]
-#CHECK: aly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5e]
-#CHECK: aly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5e]
-#CHECK: aly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5e]
-#CHECK: aly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5e]
-
-	aly	%r0, -524288
-	aly	%r0, -1
-	aly	%r0, 0
-	aly	%r0, 1
-	aly	%r0, 524287
-	aly	%r0, 0(%r1)
-	aly	%r0, 0(%r15)
-	aly	%r0, 524287(%r1,%r15)
-	aly	%r0, 524287(%r15,%r1)
-	aly	%r15, 0
diff --git a/test/MC/SystemZ/insn-aly-02.s b/test/MC/SystemZ/insn-aly-02.s
deleted file mode 100644
index 01c6f3dcdd19..000000000000
--- a/test/MC/SystemZ/insn-aly-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: aly	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: aly	%r0, 524288
-
-	aly	%r0, -524289
-	aly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ar-01.s b/test/MC/SystemZ/insn-ar-01.s
deleted file mode 100644
index 7cd627a17021..000000000000
--- a/test/MC/SystemZ/insn-ar-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ar	%r0, %r0                # encoding: [0x1a,0x00]
-#CHECK: ar	%r0, %r15               # encoding: [0x1a,0x0f]
-#CHECK: ar	%r15, %r0               # encoding: [0x1a,0xf0]
-#CHECK: ar	%r7, %r8                # encoding: [0x1a,0x78]
-
-	ar	%r0,%r0
-	ar	%r0,%r15
-	ar	%r15,%r0
-	ar	%r7,%r8
diff --git a/test/MC/SystemZ/insn-asi-01.s b/test/MC/SystemZ/insn-asi-01.s
deleted file mode 100644
index 7a1d241e2356..000000000000
--- a/test/MC/SystemZ/insn-asi-01.s
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: asi	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x6a]
-#CHECK: asi	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x6a]
-#CHECK: asi	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x6a]
-#CHECK: asi	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x6a]
-#CHECK: asi	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x6a]
-#CHECK: asi	0, -128                 # encoding: [0xeb,0x80,0x00,0x00,0x00,0x6a]
-#CHECK: asi	0, -1                   # encoding: [0xeb,0xff,0x00,0x00,0x00,0x6a]
-#CHECK: asi	0, 1                    # encoding: [0xeb,0x01,0x00,0x00,0x00,0x6a]
-#CHECK: asi	0, 127                  # encoding: [0xeb,0x7f,0x00,0x00,0x00,0x6a]
-#CHECK: asi	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x6a]
-#CHECK: asi	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x6a]
-#CHECK: asi	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x6a]
-#CHECK: asi	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x6a]
-
-	asi	-524288, 0
-	asi	-1, 0
-	asi	0, 0
-	asi	1, 0
-	asi	524287, 0
-	asi	0, -128
-	asi	0, -1
-	asi	0, 1
-	asi	0, 127
-	asi	0(%r1), 42
-	asi	0(%r15), 42
-	asi	524287(%r1), 42
-	asi	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-asi-02.s b/test/MC/SystemZ/insn-asi-02.s
deleted file mode 100644
index 3c09f9050382..000000000000
--- a/test/MC/SystemZ/insn-asi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: asi	-524289, 0
-#CHECK: error: invalid operand
-#CHECK: asi	524288, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: asi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: asi	0, -129
-#CHECK: error: invalid operand
-#CHECK: asi	0, 128
-
-	asi	-524289, 0
-	asi	524288, 0
-	asi	0(%r1,%r2), 0
-	asi	0, -129
-	asi	0, 128
diff --git a/test/MC/SystemZ/insn-axbr-01.s b/test/MC/SystemZ/insn-axbr-01.s
deleted file mode 100644
index cb592efba6a5..000000000000
--- a/test/MC/SystemZ/insn-axbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: axbr	%f0, %f0                # encoding: [0xb3,0x4a,0x00,0x00]
-#CHECK: axbr	%f0, %f13               # encoding: [0xb3,0x4a,0x00,0x0d]
-#CHECK: axbr	%f8, %f8                # encoding: [0xb3,0x4a,0x00,0x88]
-#CHECK: axbr	%f13, %f0               # encoding: [0xb3,0x4a,0x00,0xd0]
-
-	axbr	%f0, %f0
-	axbr	%f0, %f13
-	axbr	%f8, %f8
-	axbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-axbr-02.s b/test/MC/SystemZ/insn-axbr-02.s
deleted file mode 100644
index 307664d51e53..000000000000
--- a/test/MC/SystemZ/insn-axbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: axbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: axbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: axbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: axbr	%f14, %f0
-
-	axbr	%f0, %f2
-	axbr	%f0, %f14
-	axbr	%f2, %f0
-	axbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-ay-01.s b/test/MC/SystemZ/insn-ay-01.s
deleted file mode 100644
index 3b65c9a7c0e8..000000000000
--- a/test/MC/SystemZ/insn-ay-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ay	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5a]
-#CHECK: ay	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5a]
-#CHECK: ay	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5a]
-#CHECK: ay	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5a]
-#CHECK: ay	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5a]
-#CHECK: ay	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5a]
-#CHECK: ay	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5a]
-#CHECK: ay	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5a]
-#CHECK: ay	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5a]
-#CHECK: ay	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5a]
-
-	ay	%r0, -524288
-	ay	%r0, -1
-	ay	%r0, 0
-	ay	%r0, 1
-	ay	%r0, 524287
-	ay	%r0, 0(%r1)
-	ay	%r0, 0(%r15)
-	ay	%r0, 524287(%r1,%r15)
-	ay	%r0, 524287(%r15,%r1)
-	ay	%r15, 0
diff --git a/test/MC/SystemZ/insn-ay-02.s b/test/MC/SystemZ/insn-ay-02.s
deleted file mode 100644
index 09704dfb9f4a..000000000000
--- a/test/MC/SystemZ/insn-ay-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ay	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: ay	%r0, 524288
-
-	ay	%r0, -524289
-	ay	%r0, 524288
diff --git a/test/MC/SystemZ/insn-bad-z196.s b/test/MC/SystemZ/insn-bad-z196.s
new file mode 100644
index 000000000000..089d9b5b3e14
--- /dev/null
+++ b/test/MC/SystemZ/insn-bad-z196.s
@@ -0,0 +1,343 @@
+# For z196 only.
+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z196 < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: aghik	%r0, %r1, -32769
+#CHECK: error: invalid operand
+#CHECK: aghik	%r0, %r1, 32768
+#CHECK: error: invalid operand
+#CHECK: aghik	%r0, %r1, foo
+
+	aghik	%r0, %r1, -32769
+	aghik	%r0, %r1, 32768
+	aghik	%r0, %r1, foo
+
+#CHECK: error: invalid operand
+#CHECK: ahik	%r0, %r1, -32769
+#CHECK: error: invalid operand
+#CHECK: ahik	%r0, %r1, 32768
+#CHECK: error: invalid operand
+#CHECK: ahik	%r0, %r1, foo
+
+	ahik	%r0, %r1, -32769
+	ahik	%r0, %r1, 32768
+	ahik	%r0, %r1, foo
+
+#CHECK: error: invalid operand
+#CHECK: aih	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: aih	%r0, (1 << 31)
+
+	aih	%r0, (-1 << 31) - 1
+	aih	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: chf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: chf	%r0, 524288
+
+	chf	%r0, -524289
+	chf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: cih	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: cih	%r0, (1 << 31)
+
+	cih	%r0, (-1 << 31) - 1
+	cih	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: clhf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: clhf	%r0, 524288
+
+	clhf	%r0, -524289
+	clhf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: clih	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: clih	%r0, (1 << 32)
+
+	clih	%r0, -1
+	clih	%r0, (1 << 32)
+
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: fidbra	%f0, 16, %f0, 0
+
+	fidbra	%f0, 0, %f0, -1
+	fidbra	%f0, 0, %f0, 16
+	fidbra	%f0, -1, %f0, 0
+	fidbra	%f0, 16, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: fiebra	%f0, 16, %f0, 0
+
+	fiebra	%f0, 0, %f0, -1
+	fiebra	%f0, 0, %f0, 16
+	fiebra	%f0, -1, %f0, 0
+	fiebra	%f0, 16, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, 0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, 0, %f0, 16
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, -1, %f0, 0
+#CHECK: error: invalid operand
+#CHECK: fixbra	%f0, 16, %f0, 0
+#CHECK: error: invalid register pair
+#CHECK: fixbra	%f0, 0, %f2, 0
+#CHECK: error: invalid register pair
+#CHECK: fixbra	%f2, 0, %f0, 0
+
+	fixbra	%f0, 0, %f0, -1
+	fixbra	%f0, 0, %f0, 16
+	fixbra	%f0, -1, %f0, 0
+	fixbra	%f0, 16, %f0, 0
+	fixbra	%f0, 0, %f2, 0
+	fixbra	%f2, 0, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: lbh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lbh	%r0, 524288
+
+	lbh	%r0, -524289
+	lbh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lfh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lfh	%r0, 524288
+
+	lfh	%r0, -524289
+	lfh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lhh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lhh	%r0, 524288
+
+	lhh	%r0, -524289
+	lhh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llch	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llch	%r0, 524288
+
+	llch	%r0, -524289
+	llch	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llhh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llhh	%r0, 524288
+
+	llhh	%r0, -524289
+	llhh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: loc	%r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: loc	%r0,0,16
+#CHECK: error: invalid operand
+#CHECK: loc	%r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: loc	%r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: loc	%r0,0(%r1,%r2),1
+
+	loc	%r0,0,-1
+	loc	%r0,0,16
+	loc	%r0,-524289,1
+	loc	%r0,524288,1
+	loc	%r0,0(%r1,%r2),1
+
+#CHECK: error: invalid operand
+#CHECK: locg	%r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: locg	%r0,0,16
+#CHECK: error: invalid operand
+#CHECK: locg	%r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: locg	%r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: locg	%r0,0(%r1,%r2),1
+
+	locg	%r0,0,-1
+	locg	%r0,0,16
+	locg	%r0,-524289,1
+	locg	%r0,524288,1
+	locg	%r0,0(%r1,%r2),1
+
+#CHECK: error: invalid operand
+#CHECK: locgr	%r0,%r0,-1
+#CHECK: error: invalid operand
+#CHECK: locgr	%r0,%r0,16
+
+	locgr	%r0,%r0,-1
+	locgr	%r0,%r0,16
+
+#CHECK: error: invalid operand
+#CHECK: locr	%r0,%r0,-1
+#CHECK: error: invalid operand
+#CHECK: locr	%r0,%r0,16
+
+	locr	%r0,%r0,-1
+	locr	%r0,%r0,16
+
+#CHECK: error: invalid operand
+#CHECK: risbhg	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: risbhg	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: risbhg	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: risbhg	%r0,%r0,0,256,0
+#CHECK: error: invalid operand
+#CHECK: risbhg	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: risbhg	%r0,%r0,256,0,0
+
+	risbhg	%r0,%r0,0,0,-1
+	risbhg	%r0,%r0,0,0,64
+	risbhg	%r0,%r0,0,-1,0
+	risbhg	%r0,%r0,0,256,0
+	risbhg	%r0,%r0,-1,0,0
+	risbhg	%r0,%r0,256,0,0
+
+#CHECK: error: invalid operand
+#CHECK: risblg	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: risblg	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: risblg	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: risblg	%r0,%r0,0,256,0
+#CHECK: error: invalid operand
+#CHECK: risblg	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: risblg	%r0,%r0,256,0,0
+
+	risblg	%r0,%r0,0,0,-1
+	risblg	%r0,%r0,0,0,64
+	risblg	%r0,%r0,0,-1,0
+	risblg	%r0,%r0,0,256,0
+	risblg	%r0,%r0,-1,0,0
+	risblg	%r0,%r0,256,0,0
+
+#CHECK: error: invalid operand
+#CHECK: sllk	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: sllk	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: sllk	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sllk	%r0,%r0,0(%r1,%r2)
+
+	sllk	%r0,%r0,-524289
+	sllk	%r0,%r0,524288
+	sllk	%r0,%r0,0(%r0)
+	sllk	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: srak	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: srak	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: srak	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srak	%r0,%r0,0(%r1,%r2)
+
+	srak	%r0,%r0,-524289
+	srak	%r0,%r0,524288
+	srak	%r0,%r0,0(%r0)
+	srak	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: srlk	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: srlk	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: srlk	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srlk	%r0,%r0,0(%r1,%r2)
+
+	srlk	%r0,%r0,-524289
+	srlk	%r0,%r0,524288
+	srlk	%r0,%r0,0(%r0)
+	srlk	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: stch	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stch	%r0, 524288
+
+	stch	%r0, -524289
+	stch	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: sthh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sthh	%r0, 524288
+
+	sthh	%r0, -524289
+	sthh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: stfh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stfh	%r0, 524288
+
+	stfh	%r0, -524289
+	stfh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: stoc	%r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: stoc	%r0,0,16
+#CHECK: error: invalid operand
+#CHECK: stoc	%r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: stoc	%r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stoc	%r0,0(%r1,%r2),1
+
+	stoc	%r0,0,-1
+	stoc	%r0,0,16
+	stoc	%r0,-524289,1
+	stoc	%r0,524288,1
+	stoc	%r0,0(%r1,%r2),1
+
+#CHECK: error: invalid operand
+#CHECK: stocg	%r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: stocg	%r0,0,16
+#CHECK: error: invalid operand
+#CHECK: stocg	%r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: stocg	%r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stocg	%r0,0(%r1,%r2),1
+
+	stocg	%r0,0,-1
+	stocg	%r0,0,16
+	stocg	%r0,-524289,1
+	stocg	%r0,524288,1
+	stocg	%r0,0(%r1,%r2),1
diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s
new file mode 100644
index 000000000000..2a3fb98c2b14
--- /dev/null
+++ b/test/MC/SystemZ/insn-bad.s
@@ -0,0 +1,3315 @@
+# For z10 only.
+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z10 < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: a	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: a	%r0, 4096
+
+	a	%r0, -1
+	a	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: adb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: adb	%f0, 4096
+
+	adb	%f0, -1
+	adb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: aeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: aeb	%f0, 4096
+
+	aeb	%f0, -1
+	aeb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: afi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: afi	%r0, (1 << 31)
+
+	afi	%r0, (-1 << 31) - 1
+	afi	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: ag	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ag	%r0, 524288
+
+	ag	%r0, -524289
+	ag	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: agf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: agf	%r0, 524288
+
+	agf	%r0, -524289
+	agf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: agfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: agfi	%r0, (1 << 31)
+
+	agfi	%r0, (-1 << 31) - 1
+	agfi	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: aghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: aghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: aghi	%r0, foo
+
+	aghi	%r0, -32769
+	aghi	%r0, 32768
+	aghi	%r0, foo
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: aghik	%r1, %r2, 3
+
+	aghik	%r1, %r2, 3
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: agrk	%r2,%r3,%r4
+
+	agrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: agsi	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: agsi	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: agsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: agsi	0, -129
+#CHECK: error: invalid operand
+#CHECK: agsi	0, 128
+
+	agsi	-524289, 0
+	agsi	524288, 0
+	agsi	0(%r1,%r2), 0
+	agsi	0, -129
+	agsi	0, 128
+
+#CHECK: error: invalid operand
+#CHECK: ah	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ah	%r0, 4096
+
+	ah	%r0, -1
+	ah	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: ahi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: ahi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: ahi	%r0, foo
+
+	ahi	%r0, -32769
+	ahi	%r0, 32768
+	ahi	%r0, foo
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: ahik	%r1, %r2, 3
+
+	ahik	%r1, %r2, 3
+
+#CHECK: error: invalid operand
+#CHECK: ahy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ahy	%r0, 524288
+
+	ahy	%r0, -524289
+	ahy	%r0, 524288
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: aih	%r0, 0
+
+	aih	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: al	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: al	%r0, 4096
+
+	al	%r0, -1
+	al	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: alc	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: alc	%r0, 524288
+
+	alc	%r0, -524289
+	alc	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: alcg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: alcg	%r0, 524288
+
+	alcg	%r0, -524289
+	alcg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: alfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: alfi	%r0, (1 << 32)
+
+	alfi	%r0, -1
+	alfi	%r0, (1 << 32)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: alghsik	%r1, %r2, 3
+
+	alghsik	%r1, %r2, 3
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: alhsik	%r1, %r2, 3
+
+	alhsik	%r1, %r2, 3
+
+#CHECK: error: invalid operand
+#CHECK: alg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: alg	%r0, 524288
+
+	alg	%r0, -524289
+	alg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: algf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: algf	%r0, 524288
+
+	algf	%r0, -524289
+	algf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: algfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: algfi	%r0, (1 << 32)
+
+	algfi	%r0, -1
+	algfi	%r0, (1 << 32)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: algrk	%r2,%r3,%r4
+
+	algrk	%r2,%r3,%r4
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: alrk	%r2,%r3,%r4
+
+	alrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: aly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: aly	%r0, 524288
+
+	aly	%r0, -524289
+	aly	%r0, 524288
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: ark	%r2,%r3,%r4
+
+	ark	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: asi	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: asi	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: asi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: asi	0, -129
+#CHECK: error: invalid operand
+#CHECK: asi	0, 128
+
+	asi	-524289, 0
+	asi	524288, 0
+	asi	0(%r1,%r2), 0
+	asi	0, -129
+	asi	0, 128
+
+#CHECK: error: invalid register pair
+#CHECK: axbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: axbr	%f2, %f0
+
+	axbr	%f0, %f2
+	axbr	%f2, %f0
+
+
+#CHECK: error: invalid operand
+#CHECK: ay	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ay	%r0, 524288
+
+	ay	%r0, -524289
+	ay	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: bcr	-1, %r1
+#CHECK: error: invalid operand
+#CHECK: bcr	16, %r1
+
+	bcr	-1, %r1
+	bcr	16, %r1
+
+#CHECK: error: offset out of range
+#CHECK: bras	%r0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: bras	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: bras	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: bras	%r0, 0x10000
+
+	bras	%r0, -0x100002
+	bras	%r0, -1
+	bras	%r0, 1
+	bras	%r0, 0x10000
+
+#CHECK: error: offset out of range
+#CHECK: brasl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: brasl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: brasl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: brasl	%r0, 0x100000000
+
+	brasl	%r0, -0x1000000002
+	brasl	%r0, -1
+	brasl	%r0, 1
+	brasl	%r0, 0x100000000
+
+#CHECK: error: offset out of range
+#CHECK: brc	0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: brc	0, -1
+#CHECK: error: offset out of range
+#CHECK: brc	0, 1
+#CHECK: error: offset out of range
+#CHECK: brc	0, 0x10000
+
+	brc	0, -0x100002
+	brc	0, -1
+	brc	0, 1
+	brc	0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: brc	foo, bar
+#CHECK: error: invalid operand
+#CHECK: brc	-1, bar
+#CHECK: error: invalid operand
+#CHECK: brc	16, bar
+
+	brc	foo, bar
+	brc	-1, bar
+	brc	16, bar
+
+#CHECK: error: offset out of range
+#CHECK: brcl	0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: brcl	0, -1
+#CHECK: error: offset out of range
+#CHECK: brcl	0, 1
+#CHECK: error: offset out of range
+#CHECK: brcl	0, 0x100000000
+
+	brcl	0, -0x1000000002
+	brcl	0, -1
+	brcl	0, 1
+	brcl	0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: brcl	foo, bar
+#CHECK: error: invalid operand
+#CHECK: brcl	-1, bar
+#CHECK: error: invalid operand
+#CHECK: brcl	16, bar
+
+	brcl	foo, bar
+	brcl	-1, bar
+	brcl	16, bar
+
+#CHECK: error: offset out of range
+#CHECK: brct	%r0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: brct	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: brct	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: brct	%r0, 0x10000
+
+	brct	%r0, -0x100002
+	brct	%r0, -1
+	brct	%r0, 1
+	brct	%r0, 0x10000
+
+#CHECK: error: offset out of range
+#CHECK: brctg	%r0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: brctg	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: brctg	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: brctg	%r0, 0x10000
+
+	brctg	%r0, -0x100002
+	brctg	%r0, -1
+	brctg	%r0, 1
+	brctg	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: c	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: c	%r0, 4096
+
+	c	%r0, -1
+	c	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: cdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: cdb	%f0, 4096
+
+	cdb	%f0, -1
+	cdb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: ceb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ceb	%f0, 4096
+
+	ceb	%f0, -1
+	ceb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: cfdbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cfdbr	%r0, 16, %f0
+
+	cfdbr	%r0, -1, %f0
+	cfdbr	%r0, 16, %f0
+
+#CHECK: error: invalid operand
+#CHECK: cfebr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cfebr	%r0, 16, %f0
+
+	cfebr	%r0, -1, %f0
+	cfebr	%r0, 16, %f0
+
+#CHECK: error: invalid operand
+#CHECK: cfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: cfi	%r0, (1 << 31)
+
+	cfi	%r0, (-1 << 31) - 1
+	cfi	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: cfxbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cfxbr	%r0, 16, %f0
+#CHECK: error: invalid register pair
+#CHECK: cfxbr	%r0, 0, %f2
+
+	cfxbr	%r0, -1, %f0
+	cfxbr	%r0, 16, %f0
+	cfxbr	%r0, 0, %f2
+
+
+#CHECK: error: invalid operand
+#CHECK: cg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cg	%r0, 524288
+
+	cg	%r0, -524289
+	cg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: cgdbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cgdbr	%r0, 16, %f0
+
+	cgdbr	%r0, -1, %f0
+	cgdbr	%r0, 16, %f0
+
+#CHECK: error: invalid operand
+#CHECK: cgebr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cgebr	%r0, 16, %f0
+
+	cgebr	%r0, -1, %f0
+	cgebr	%r0, 16, %f0
+
+#CHECK: error: invalid operand
+#CHECK: cgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cgf	%r0, 524288
+
+	cgf	%r0, -524289
+	cgf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: cgfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: cgfi	%r0, (1 << 31)
+
+	cgfi	%r0, (-1 << 31) - 1
+	cgfi	%r0, (1 << 31)
+
+#CHECK: error: offset out of range
+#CHECK: cgfrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: cgfrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: cgfrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: cgfrl	%r0, 0x100000000
+
+	cgfrl	%r0, -0x1000000002
+	cgfrl	%r0, -1
+	cgfrl	%r0, 1
+	cgfrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: cgh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cgh	%r0, 524288
+
+	cgh	%r0, -524289
+	cgh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: cghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: cghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: cghi	%r0, foo
+
+	cghi	%r0, -32769
+	cghi	%r0, 32768
+	cghi	%r0, foo
+
+#CHECK: error: offset out of range
+#CHECK: cghrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: cghrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: cghrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: cghrl	%r0, 0x100000000
+
+	cghrl	%r0, -0x1000000002
+	cghrl	%r0, -1
+	cghrl	%r0, 1
+	cghrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: cghsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: cghsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cghsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: cghsi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: cghsi	0, 32768
+
+	cghsi	-1, 0
+	cghsi	4096, 0
+	cghsi	0(%r1,%r2), 0
+	cghsi	0, -32769
+	cghsi	0, 32768
+
+#CHECK: error: invalid operand
+#CHECK: cgij	%r0, -129, 0, 0
+#CHECK: error: invalid operand
+#CHECK: cgij	%r0, 128, 0, 0
+
+	cgij	%r0, -129, 0, 0
+	cgij	%r0, 128, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: cgij	%r0, 0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: cgij	%r0, 0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: cgij	%r0, 0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: cgij	%r0, 0, 0, 0x10000
+
+	cgij	%r0, 0, 0, -0x100002
+	cgij	%r0, 0, 0, -1
+	cgij	%r0, 0, 0, 1
+	cgij	%r0, 0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	cgijo	%r0, 0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	cgijno	%r0, 0, 0, 0
+
+	cgijo	%r0, 0, 0, 0
+	cgijno	%r0, 0, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: cgrj	%r0, %r0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: cgrj	%r0, %r0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: cgrj	%r0, %r0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: cgrj	%r0, %r0, 0, 0x10000
+
+	cgrj	%r0, %r0, 0, -0x100002
+	cgrj	%r0, %r0, 0, -1
+	cgrj	%r0, %r0, 0, 1
+	cgrj	%r0, %r0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	cgrjo	%r0, %r0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	cgrjno	%r0, %r0, 0, 0
+
+	cgrjo	%r0, %r0, 0, 0
+	cgrjno	%r0, %r0, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: cgrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: cgrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: cgrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: cgrl	%r0, 0x100000000
+
+	cgrl	%r0, -0x1000000002
+	cgrl	%r0, -1
+	cgrl	%r0, 1
+	cgrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: cgxbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cgxbr	%r0, 16, %f0
+#CHECK: error: invalid register pair
+#CHECK: cgxbr	%r0, 0, %f2
+
+	cgxbr	%r0, -1, %f0
+	cgxbr	%r0, 16, %f0
+	cgxbr	%r0, 0, %f2
+
+
+#CHECK: error: invalid operand
+#CHECK: ch	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ch	%r0, 4096
+
+	ch	%r0, -1
+	ch	%r0, 4096
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: chf	%r0, 0
+
+	chf	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: chhsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: chhsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: chhsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: chhsi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: chhsi	0, 32768
+
+	chhsi	-1, 0
+	chhsi	4096, 0
+	chhsi	0(%r1,%r2), 0
+	chhsi	0, -32769
+	chhsi	0, 32768
+
+#CHECK: error: invalid operand
+#CHECK: chi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: chi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: chi	%r0, foo
+
+	chi	%r0, -32769
+	chi	%r0, 32768
+	chi	%r0, foo
+
+#CHECK: error: offset out of range
+#CHECK: chrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: chrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: chrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: chrl	%r0, 0x100000000
+
+	chrl	%r0, -0x1000000002
+	chrl	%r0, -1
+	chrl	%r0, 1
+	chrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: chsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: chsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: chsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: chsi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: chsi	0, 32768
+
+	chsi	-1, 0
+	chsi	4096, 0
+	chsi	0(%r1,%r2), 0
+	chsi	0, -32769
+	chsi	0, 32768
+
+#CHECK: error: invalid operand
+#CHECK: chy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: chy	%r0, 524288
+
+	chy	%r0, -524289
+	chy	%r0, 524288
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: cih	%r0, 0
+
+	cih	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: cij	%r0, -129, 0, 0
+#CHECK: error: invalid operand
+#CHECK: cij	%r0, 128, 0, 0
+
+	cij	%r0, -129, 0, 0
+	cij	%r0, 128, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: cij	%r0, 0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: cij	%r0, 0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: cij	%r0, 0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: cij	%r0, 0, 0, 0x10000
+
+	cij	%r0, 0, 0, -0x100002
+	cij	%r0, 0, 0, -1
+	cij	%r0, 0, 0, 1
+	cij	%r0, 0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	cijo	%r0, 0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	cijno	%r0, 0, 0, 0
+
+	cijo	%r0, 0, 0, 0
+	cijno	%r0, 0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: cl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: cl	%r0, 4096
+
+	cl	%r0, -1
+	cl	%r0, 4096
+
+#CHECK: error: missing length in address
+#CHECK: clc	0, 0
+#CHECK: error: missing length in address
+#CHECK: clc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: clc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: clc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: clc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: clc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: clc	0(-), 0
+
+	clc	0, 0
+	clc	0(%r1), 0(%r1)
+	clc	0(1,%r1), 0(2,%r1)
+	clc	0(0,%r1), 0(%r1)
+	clc	0(257,%r1), 0(%r1)
+	clc	-1(1,%r1), 0(%r1)
+	clc	4096(1,%r1), 0(%r1)
+	clc	0(1,%r1), -1(%r1)
+	clc	0(1,%r1), 4096(%r1)
+	clc	0(1,%r0), 0(%r1)
+	clc	0(1,%r1), 0(%r0)
+	clc	0(%r1,%r2), 0(%r1)
+	clc	0(1,%r2), 0(%r1,%r2)
+	clc	0(-), 0
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: clhf	%r0, 0
+
+	clhf	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: clfhsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: clfhsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clfhsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: clfhsi	0, -1
+#CHECK: error: invalid operand
+#CHECK: clfhsi	0, 65536
+
+	clfhsi	-1, 0
+	clfhsi	4096, 0
+	clfhsi	0(%r1,%r2), 0
+	clfhsi	0, -1
+	clfhsi	0, 65536
+
+#CHECK: error: invalid operand
+#CHECK: clfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: clfi	%r0, (1 << 32)
+
+	clfi	%r0, -1
+	clfi	%r0, (1 << 32)
+
+#CHECK: error: invalid operand
+#CHECK: clg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: clg	%r0, 524288
+
+	clg	%r0, -524289
+	clg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: clgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: clgf	%r0, 524288
+
+	clgf	%r0, -524289
+	clgf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: clgfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: clgfi	%r0, (1 << 32)
+
+	clgfi	%r0, -1
+	clgfi	%r0, (1 << 32)
+
+#CHECK: error: offset out of range
+#CHECK: clgfrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: clgfrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: clgfrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: clgfrl	%r0, 0x100000000
+
+	clgfrl	%r0, -0x1000000002
+	clgfrl	%r0, -1
+	clgfrl	%r0, 1
+	clgfrl	%r0, 0x100000000
+
+#CHECK: error: offset out of range
+#CHECK: clghrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: clghrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: clghrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: clghrl	%r0, 0x100000000
+
+	clghrl	%r0, -0x1000000002
+	clghrl	%r0, -1
+	clghrl	%r0, 1
+	clghrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: clghsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: clghsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clghsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: clghsi	0, -1
+#CHECK: error: invalid operand
+#CHECK: clghsi	0, 65536
+
+	clghsi	-1, 0
+	clghsi	4096, 0
+	clghsi	0(%r1,%r2), 0
+	clghsi	0, -1
+	clghsi	0, 65536
+
+#CHECK: error: invalid operand
+#CHECK: clgij	%r0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: clgij	%r0, 256, 0, 0
+
+	clgij	%r0, -1, 0, 0
+	clgij	%r0, 256, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clgij	%r0, 0, 0, 0x10000
+
+	clgij	%r0, 0, 0, -0x100002
+	clgij	%r0, 0, 0, -1
+	clgij	%r0, 0, 0, 1
+	clgij	%r0, 0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	clgijo	%r0, 0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	clgijno	%r0, 0, 0, 0
+
+	clgijo	%r0, 0, 0, 0
+	clgijno	%r0, 0, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clgrj	%r0, %r0, 0, 0x10000
+
+	clgrj	%r0, %r0, 0, -0x100002
+	clgrj	%r0, %r0, 0, -1
+	clgrj	%r0, %r0, 0, 1
+	clgrj	%r0, %r0, 0, 0x10000
+
+#CHECK: error: offset out of range
+#CHECK: clgrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: clgrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: clgrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: clgrl	%r0, 0x100000000
+
+	clgrl	%r0, -0x1000000002
+	clgrl	%r0, -1
+	clgrl	%r0, 1
+	clgrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: clhhsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: clhhsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clhhsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: clhhsi	0, -1
+#CHECK: error: invalid operand
+#CHECK: clhhsi	0, 65536
+
+	clhhsi	-1, 0
+	clhhsi	4096, 0
+	clhhsi	0(%r1,%r2), 0
+	clhhsi	0, -1
+	clhhsi	0, 65536
+
+#CHECK: error: offset out of range
+#CHECK: clhrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: clhrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: clhrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: clhrl	%r0, 0x100000000
+
+	clhrl	%r0, -0x1000000002
+	clhrl	%r0, -1
+	clhrl	%r0, 1
+	clhrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: cli	-1, 0
+#CHECK: error: invalid operand
+#CHECK: cli	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cli	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: cli	0, -1
+#CHECK: error: invalid operand
+#CHECK: cli	0, 256
+
+	cli	-1, 0
+	cli	4096, 0
+	cli	0(%r1,%r2), 0
+	cli	0, -1
+	cli	0, 256
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: clih	%r0, 0
+
+	clih	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: clij	%r0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: clij	%r0, 256, 0, 0
+
+	clij	%r0, -1, 0, 0
+	clij	%r0, 256, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clij	%r0, 0, 0, 0x10000
+
+	clij	%r0, 0, 0, -0x100002
+	clij	%r0, 0, 0, -1
+	clij	%r0, 0, 0, 1
+	clij	%r0, 0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	clijo	%r0, 0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	clijno	%r0, 0, 0, 0
+
+	clijo	%r0, 0, 0, 0
+	clijno	%r0, 0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: cliy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: cliy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cliy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: cliy	0, -1
+#CHECK: error: invalid operand
+#CHECK: cliy	0, 256
+
+	cliy	-524289, 0
+	cliy	524288, 0
+	cliy	0(%r1,%r2), 0
+	cliy	0, -1
+	cliy	0, 256
+
+#CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: clrj	%r0, %r0, 0, 0x10000
+
+	clrj	%r0, %r0, 0, -0x100002
+	clrj	%r0, %r0, 0, -1
+	clrj	%r0, %r0, 0, 1
+	clrj	%r0, %r0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	clrjo	%r0, %r0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	clrjno	%r0, %r0, 0, 0
+
+	clrjo	%r0, %r0, 0, 0
+	clrjno	%r0, %r0, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: clrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: clrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: clrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: clrl	%r0, 0x100000000
+
+	clrl	%r0, -0x1000000002
+	clrl	%r0, -1
+	clrl	%r0, 1
+	clrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: cly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cly	%r0, 524288
+
+	cly	%r0, -524289
+	cly	%r0, 524288
+
+#CHECK: error: offset out of range
+#CHECK: crj	%r0, %r0, 0, -0x100002
+#CHECK: error: offset out of range
+#CHECK: crj	%r0, %r0, 0, -1
+#CHECK: error: offset out of range
+#CHECK: crj	%r0, %r0, 0, 1
+#CHECK: error: offset out of range
+#CHECK: crj	%r0, %r0, 0, 0x10000
+
+	crj	%r0, %r0, 0, -0x100002
+	crj	%r0, %r0, 0, -1
+	crj	%r0, %r0, 0, 1
+	crj	%r0, %r0, 0, 0x10000
+
+#CHECK: error: invalid instruction
+#CHECK:	crjo	%r0, %r0, 0, 0
+#CHECK: error: invalid instruction
+#CHECK:	crjno	%r0, %r0, 0, 0
+
+	crjo	%r0, %r0, 0, 0
+	crjno	%r0, %r0, 0, 0
+
+#CHECK: error: offset out of range
+#CHECK: crl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: crl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: crl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: crl	%r0, 0x100000000
+
+	crl	%r0, -0x1000000002
+	crl	%r0, -1
+	crl	%r0, 1
+	crl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: cs	%r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: cs	%r0, %r0, 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cs	%r0, %r0, 0(%r1,%r2)
+
+	cs	%r0, %r0, -1
+	cs	%r0, %r0, 4096
+	cs	%r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: csg	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: csg	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: csg	%r0, %r0, 0(%r1,%r2)
+
+	csg	%r0, %r0, -524289
+	csg	%r0, %r0, 524288
+	csg	%r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: csy	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: csy	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: csy	%r0, %r0, 0(%r1,%r2)
+
+	csy	%r0, %r0, -524289
+	csy	%r0, %r0, 524288
+	csy	%r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid register pair
+#CHECK: cxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: cxbr	%f2, %f0
+
+	cxbr	%f0, %f2
+	cxbr	%f2, %f0
+
+#CHECK: error: invalid register pair
+#CHECK: cxfbr	%f2, %r0
+
+	cxfbr	%f2, %r0
+
+#CHECK: error: invalid register pair
+#CHECK: cxgbr	%f2, %r0
+
+	cxgbr	%f2, %r0
+
+#CHECK: error: invalid operand
+#CHECK: cy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cy	%r0, 524288
+
+	cy	%r0, -524289
+	cy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: ddb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ddb	%f0, 4096
+
+	ddb	%f0, -1
+	ddb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: deb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: deb	%f0, 4096
+
+	deb	%f0, -1
+	deb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: dl	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dl	%r0, 524288
+#CHECK: error: invalid register pair
+#CHECK: dl	%r1, 0
+
+	dl	%r0, -524289
+	dl	%r0, 524288
+	dl	%r1, 0
+
+#CHECK: error: invalid operand
+#CHECK: dlg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dlg	%r0, 524288
+#CHECK: error: invalid register pair
+#CHECK: dlg	%r1, 0
+
+	dlg	%r0, -524289
+	dlg	%r0, 524288
+	dlg	%r1, 0
+
+#CHECK: error: invalid register pair
+#CHECK: dlgr	%r1, %r0
+
+	dlgr	%r1, %r0
+
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r1, %r0
+
+	dlr	%r1, %r0
+
+#CHECK: error: invalid operand
+#CHECK: dsg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dsg	%r0, 524288
+#CHECK: error: invalid register pair
+#CHECK: dsg	%r1, 0
+
+	dsg	%r0, -524289
+	dsg	%r0, 524288
+	dsg	%r1, 0
+
+#CHECK: error: invalid operand
+#CHECK: dsgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dsgf	%r0, 524288
+#CHECK: error: invalid register pair
+#CHECK: dsgf	%r1, 0
+
+	dsgf	%r0, -524289
+	dsgf	%r0, 524288
+	dsgf	%r1, 0
+
+#CHECK: error: invalid register pair
+#CHECK: dsgfr	%r1, %r0
+
+	dsgfr	%r1, %r0
+
+#CHECK: error: invalid register pair
+#CHECK: dsgr	%r1, %r0
+
+	dsgr	%r1, %r0
+
+#CHECK: error: invalid register pair
+#CHECK: dxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: dxbr	%f2, %f0
+
+	dxbr	%f0, %f2
+	dxbr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: fidbr	%f0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: fidbr	%f0, 16, %f0
+
+	fidbr	%f0, -1, %f0
+	fidbr	%f0, 16, %f0
+
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: fidbra	%f0, 0, %f0, 0
+
+	fidbra	%f0, 0, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: fiebr	%f0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: fiebr	%f0, 16, %f0
+
+	fiebr	%f0, -1, %f0
+	fiebr	%f0, 16, %f0
+
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: fiebra	%f0, 0, %f0, 0
+
+	fiebra	%f0, 0, %f0, 0
+
+#CHECK: error: invalid operand
+#CHECK: fixbr	%f0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: fixbr	%f0, 16, %f0
+#CHECK: error: invalid register pair
+#CHECK: fixbr	%f0, 0, %f2
+#CHECK: error: invalid register pair
+#CHECK: fixbr	%f2, 0, %f0
+
+	fixbr	%f0, -1, %f0
+	fixbr	%f0, 16, %f0
+	fixbr	%f0, 0, %f2
+	fixbr	%f2, 0, %f0
+
+#CHECK: error: {{(instruction requires: fp-extension)?}}
+#CHECK: fixbra	%f0, 0, %f0, 0
+
+	fixbra	%f0, 0, %f0, 0
+
+#CHECK: error: invalid register pair
+#CHECK: flogr	%r1, %r0
+
+	flogr	%r1, %r0
+
+#CHECK: error: invalid operand
+#CHECK: ic	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ic	%r0, 4096
+
+	ic	%r0, -1
+	ic	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: icy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: icy	%r0, 524288
+
+	icy	%r0, -524289
+	icy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: iihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iihf	%r0, 1 << 32
+
+	iihf	%r0, -1
+	iihf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: iihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iihh	%r0, 0x10000
+
+	iihh	%r0, -1
+	iihh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: iihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iihl	%r0, 0x10000
+
+	iihl	%r0, -1
+	iihl	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: iilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iilf	%r0, 1 << 32
+
+	iilf	%r0, -1
+	iilf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: iilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iilh	%r0, 0x10000
+
+	iilh	%r0, -1
+	iilh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: iill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iill	%r0, 0x10000
+
+	iill	%r0, -1
+	iill	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: l	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: l	%r0, 4096
+
+	l	%r0, -1
+	l	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: la	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: la	%r0, 4096
+
+	la	%r0, -1
+	la	%r0, 4096
+
+#CHECK: error: offset out of range
+#CHECK: larl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: larl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: larl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: larl	%r0, 0x100000000
+
+	larl	%r0, -0x1000000002
+	larl	%r0, -1
+	larl	%r0, 1
+	larl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: lay	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lay	%r0, 524288
+
+	lay	%r0, -524289
+	lay	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lb	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lb	%r0, 524288
+
+	lb	%r0, -524289
+	lb	%r0, 524288
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: lbh	%r0, 0
+
+	lbh	%r0, 0
+
+#CHECK: error: invalid register pair
+#CHECK: lcxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: lcxbr	%f2, %f0
+
+	lcxbr	%f0, %f2
+	lcxbr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: ld	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ld	%f0, 4096
+
+	ld	%f0, -1
+	ld	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: ldeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ldeb	%f0, 4096
+
+	ldeb	%f0, -1
+	ldeb	%f0, 4096
+
+#CHECK: error: invalid register pair
+#CHECK: ldxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: ldxbr	%f2, %f0
+
+	ldxbr	%f0, %f2
+	ldxbr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: ldy	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: ldy	%f0, 524288
+
+	ldy	%f0, -524289
+	ldy	%f0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: le	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: le	%f0, 4096
+
+	le	%f0, -1
+	le	%f0, 4096
+
+#CHECK: error: invalid register pair
+#CHECK: lexbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: lexbr	%f2, %f0
+
+	lexbr	%f0, %f2
+	lexbr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: ley	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: ley	%f0, 524288
+
+	ley	%f0, -524289
+	ley	%f0, 524288
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: lfh	%r0, 0
+
+	lfh	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: lg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lg	%r0, 524288
+
+	lg	%r0, -524289
+	lg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lgb	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lgb	%r0, 524288
+
+	lgb	%r0, -524289
+	lgb	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lgf	%r0, 524288
+
+	lgf	%r0, -524289
+	lgf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lgfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: lgfi	%r0, (1 << 31)
+
+	lgfi	%r0, (-1 << 31) - 1
+	lgfi	%r0, (1 << 31)
+
+#CHECK: error: offset out of range
+#CHECK: lgfrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: lgfrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: lgfrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: lgfrl	%r0, 0x100000000
+
+	lgfrl	%r0, -0x1000000002
+	lgfrl	%r0, -1
+	lgfrl	%r0, 1
+	lgfrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: lgh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lgh	%r0, 524288
+
+	lgh	%r0, -524289
+	lgh	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: lghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: lghi	%r0, foo
+
+	lghi	%r0, -32769
+	lghi	%r0, 32768
+	lghi	%r0, foo
+
+#CHECK: error: offset out of range
+#CHECK: lghrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: lghrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: lghrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: lghrl	%r0, 0x100000000
+
+	lghrl	%r0, -0x1000000002
+	lghrl	%r0, -1
+	lghrl	%r0, 1
+	lghrl	%r0, 0x100000000
+
+#CHECK: error: offset out of range
+#CHECK: lgrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: lgrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: lgrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: lgrl	%r0, 0x100000000
+
+	lgrl	%r0, -0x1000000002
+	lgrl	%r0, -1
+	lgrl	%r0, 1
+	lgrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: lh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: lh	%r0, 4096
+
+	lh	%r0, -1
+	lh	%r0, 4096
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: lhh	%r0, 0
+
+	lhh	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: lhi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: lhi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: lhi	%r0, foo
+
+	lhi	%r0, -32769
+	lhi	%r0, 32768
+	lhi	%r0, foo
+
+#CHECK: error: offset out of range
+#CHECK: lhrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: lhrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: lhrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: lhrl	%r0, 0x100000000
+
+	lhrl	%r0, -0x1000000002
+	lhrl	%r0, -1
+	lhrl	%r0, 1
+	lhrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: lhy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lhy	%r0, 524288
+
+	lhy	%r0, -524289
+	lhy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llc	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llc	%r0, 524288
+
+	llc	%r0, -524289
+	llc	%r0, 524288
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: llch	%r0, 0
+
+	llch	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: llgc	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llgc	%r0, 524288
+
+	llgc	%r0, -524289
+	llgc	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llgf	%r0, 524288
+
+	llgf	%r0, -524289
+	llgf	%r0, 524288
+
+#CHECK: error: offset out of range
+#CHECK: llgfrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: llgfrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: llgfrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: llgfrl	%r0, 0x100000000
+
+	llgfrl	%r0, -0x1000000002
+	llgfrl	%r0, -1
+	llgfrl	%r0, 1
+	llgfrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: llgh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llgh	%r0, 524288
+
+	llgh	%r0, -524289
+	llgh	%r0, 524288
+
+#CHECK: error: offset out of range
+#CHECK: llghrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: llghrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: llghrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: llghrl	%r0, 0x100000000
+
+	llghrl	%r0, -0x1000000002
+	llghrl	%r0, -1
+	llghrl	%r0, 1
+	llghrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: llh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llh	%r0, 524288
+
+	llh	%r0, -524289
+	llh	%r0, 524288
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: llhh	%r0, 0
+
+	llhh	%r0, 0
+
+#CHECK: error: offset out of range
+#CHECK: llhrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: llhrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: llhrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: llhrl	%r0, 0x100000000
+
+	llhrl	%r0, -0x1000000002
+	llhrl	%r0, -1
+	llhrl	%r0, 1
+	llhrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: llihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llihf	%r0, 1 << 32
+
+	llihf	%r0, -1
+	llihf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: llihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llihh	%r0, 0x10000
+
+	llihh	%r0, -1
+	llihh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: llihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llihl	%r0, 0x10000
+
+	llihl	%r0, -1
+	llihl	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: llilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llilf	%r0, 1 << 32
+
+	llilf	%r0, -1
+	llilf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: llilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llilh	%r0, 0x10000
+
+	llilh	%r0, -1
+	llilh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: llill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llill	%r0, 0x10000
+
+	llill	%r0, -1
+	llill	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: lmg	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lmg	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lmg	%r0, %r0, 0(%r1,%r2)
+
+	lmg	%r0, %r0, -524289
+	lmg	%r0, %r0, 524288
+	lmg	%r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid register pair
+#CHECK: lnxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: lnxbr	%f2, %f0
+
+	lnxbr	%f0, %f2
+	lnxbr	%f2, %f0
+
+#CHECK: error: invalid register pair
+#CHECK: lpxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: lpxbr	%f2, %f0
+
+	lpxbr	%f0, %f2
+	lpxbr	%f2, %f0
+
+#CHECK: error: offset out of range
+#CHECK: lrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: lrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: lrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: lrl	%r0, 0x100000000
+
+	lrl	%r0, -0x1000000002
+	lrl	%r0, -1
+	lrl	%r0, 1
+	lrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: lrv	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lrv	%r0, 524288
+
+	lrv	%r0, -524289
+	lrv	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lrvg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lrvg	%r0, 524288
+
+	lrvg	%r0, -524289
+	lrvg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lt	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lt	%r0, 524288
+
+	lt	%r0, -524289
+	lt	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: ltg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ltg	%r0, 524288
+
+	ltg	%r0, -524289
+	ltg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: ltgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ltgf	%r0, 524288
+
+	ltgf	%r0, -524289
+	ltgf	%r0, 524288
+
+#CHECK: error: invalid register pair
+#CHECK: ltxbr	%f0, %f14
+#CHECK: error: invalid register pair
+#CHECK: ltxbr	%f14, %f0
+
+	ltxbr	%f0, %f14
+	ltxbr	%f14, %f0
+
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f2, %f0
+
+	lxr	%f0, %f2
+	lxr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: ly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ly	%r0, 524288
+
+	ly	%r0, -524289
+	ly	%r0, 524288
+
+#CHECK: error: invalid register pair
+#CHECK: lzxr	%f2
+
+	lzxr	%f2
+
+#CHECK: error: invalid operand
+#CHECK: madb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: madb	%f0, %f0, 4096
+
+	madb	%f0, %f0, -1
+	madb	%f0, %f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: maeb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: maeb	%f0, %f0, 4096
+
+	maeb	%f0, %f0, -1
+	maeb	%f0, %f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: mdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: mdb	%f0, 4096
+
+	mdb	%f0, -1
+	mdb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: mdeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: mdeb	%f0, 4096
+
+	mdeb	%f0, -1
+	mdeb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: meeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: meeb	%f0, 4096
+
+	meeb	%f0, -1
+	meeb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: mghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: mghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: mghi	%r0, foo
+
+	mghi	%r0, -32769
+	mghi	%r0, 32768
+	mghi	%r0, foo
+
+#CHECK: error: invalid operand
+#CHECK: mh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: mh	%r0, 4096
+
+	mh	%r0, -1
+	mh	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: mhi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: mhi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: mhi	%r0, foo
+
+	mhi	%r0, -32769
+	mhi	%r0, 32768
+	mhi	%r0, foo
+
+#CHECK: error: invalid operand
+#CHECK: mhy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: mhy	%r0, 524288
+
+	mhy	%r0, -524289
+	mhy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: mlg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: mlg	%r0, 524288
+#CHECK: error: invalid register pair
+#CHECK: mlg	%r1, 0
+
+	mlg	%r0, -524289
+	mlg	%r0, 524288
+	mlg	%r1, 0
+
+#CHECK: error: invalid register pair
+#CHECK: mlgr	%r1, %r0
+
+	mlgr	%r1, %r0
+
+#CHECK: error: invalid operand
+#CHECK: ms	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ms	%r0, 4096
+
+	ms	%r0, -1
+	ms	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: msdb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: msdb	%f0, %f0, 4096
+
+	msdb	%f0, %f0, -1
+	msdb	%f0, %f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: mseb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: mseb	%f0, %f0, 4096
+
+	mseb	%f0, %f0, -1
+	mseb	%f0, %f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: msfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: msfi	%r0, (1 << 31)
+
+	msfi	%r0, (-1 << 31) - 1
+	msfi	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: msg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: msg	%r0, 524288
+
+	msg	%r0, -524289
+	msg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: msgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: msgf	%r0, 524288
+
+	msgf	%r0, -524289
+	msgf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: msgfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: msgfi	%r0, (1 << 31)
+
+	msgfi	%r0, (-1 << 31) - 1
+	msgfi	%r0, (1 << 31)
+
+#CHECK: error: invalid operand
+#CHECK: msy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: msy	%r0, 524288
+
+	msy	%r0, -524289
+	msy	%r0, 524288
+
+#CHECK: error: missing length in address
+#CHECK: mvc	0, 0
+#CHECK: error: missing length in address
+#CHECK: mvc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: mvc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: mvc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: mvc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: mvc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: mvc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: mvc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: mvc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: mvc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: mvc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: mvc	0(-), 0
+
+	mvc	0, 0
+	mvc	0(%r1), 0(%r1)
+	mvc	0(1,%r1), 0(2,%r1)
+	mvc	0(0,%r1), 0(%r1)
+	mvc	0(257,%r1), 0(%r1)
+	mvc	-1(1,%r1), 0(%r1)
+	mvc	4096(1,%r1), 0(%r1)
+	mvc	0(1,%r1), -1(%r1)
+	mvc	0(1,%r1), 4096(%r1)
+	mvc	0(1,%r0), 0(%r1)
+	mvc	0(1,%r1), 0(%r0)
+	mvc	0(%r1,%r2), 0(%r1)
+	mvc	0(1,%r2), 0(%r1,%r2)
+	mvc	0(-), 0
+
+#CHECK: error: invalid operand
+#CHECK: mvghi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvghi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvghi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvghi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: mvghi	0, 32768
+
+	mvghi	-1, 0
+	mvghi	4096, 0
+	mvghi	0(%r1,%r2), 0
+	mvghi	0, -32769
+	mvghi	0, 32768
+
+#CHECK: error: invalid operand
+#CHECK: mvhhi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvhhi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvhhi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvhhi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: mvhhi	0, 32768
+
+	mvhhi	-1, 0
+	mvhhi	4096, 0
+	mvhhi	0(%r1,%r2), 0
+	mvhhi	0, -32769
+	mvhhi	0, 32768
+
+#CHECK: error: invalid operand
+#CHECK: mvhi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvhi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvhi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvhi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: mvhi	0, 32768
+
+	mvhi	-1, 0
+	mvhi	4096, 0
+	mvhi	0(%r1,%r2), 0
+	mvhi	0, -32769
+	mvhi	0, 32768
+
+#CHECK: error: invalid operand
+#CHECK: mvi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvi	0, -1
+#CHECK: error: invalid operand
+#CHECK: mvi	0, 256
+
+	mvi	-1, 0
+	mvi	4096, 0
+	mvi	0(%r1,%r2), 0
+	mvi	0, -1
+	mvi	0, 256
+
+#CHECK: error: invalid operand
+#CHECK: mviy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: mviy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mviy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mviy	0, -1
+#CHECK: error: invalid operand
+#CHECK: mviy	0, 256
+
+	mviy	-524289, 0
+	mviy	524288, 0
+	mviy	0(%r1,%r2), 0
+	mviy	0, -1
+	mviy	0, 256
+
+#CHECK: error: invalid register pair
+#CHECK: mxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: mxbr	%f2, %f0
+
+	mxbr	%f0, %f2
+	mxbr	%f2, %f0
+
+#CHECK: error: invalid register pair
+#CHECK: mxdb	%f2, 0
+#CHECK: error: invalid operand
+#CHECK: mxdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: mxdb	%f0, 4096
+
+	mxdb	%f2, 0
+	mxdb	%f0, -1
+	mxdb	%f0, 4096
+
+#CHECK: error: invalid register pair
+#CHECK: mxdbr	%f2, %f0
+
+	mxdbr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: n	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: n	%r0, 4096
+
+	n	%r0, -1
+	n	%r0, 4096
+
+#CHECK: error: missing length in address
+#CHECK: nc	0, 0
+#CHECK: error: missing length in address
+#CHECK: nc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: nc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: nc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: nc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: nc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: nc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: nc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: nc	0(-), 0
+
+	nc	0, 0
+	nc	0(%r1), 0(%r1)
+	nc	0(1,%r1), 0(2,%r1)
+	nc	0(0,%r1), 0(%r1)
+	nc	0(257,%r1), 0(%r1)
+	nc	-1(1,%r1), 0(%r1)
+	nc	4096(1,%r1), 0(%r1)
+	nc	0(1,%r1), -1(%r1)
+	nc	0(1,%r1), 4096(%r1)
+	nc	0(1,%r0), 0(%r1)
+	nc	0(1,%r1), 0(%r0)
+	nc	0(%r1,%r2), 0(%r1)
+	nc	0(1,%r2), 0(%r1,%r2)
+	nc	0(-), 0
+
+#CHECK: error: invalid operand
+#CHECK: ng	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ng	%r0, 524288
+
+	ng	%r0, -524289
+	ng	%r0, 524288
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: ngrk	%r2,%r3,%r4
+
+	ngrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: ni	-1, 0
+#CHECK: error: invalid operand
+#CHECK: ni	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: ni	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: ni	0, -1
+#CHECK: error: invalid operand
+#CHECK: ni	0, 256
+
+	ni	-1, 0
+	ni	4096, 0
+	ni	0(%r1,%r2), 0
+	ni	0, -1
+	ni	0, 256
+
+#CHECK: error: invalid operand
+#CHECK: nihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nihf	%r0, 1 << 32
+
+	nihf	%r0, -1
+	nihf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: nihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nihh	%r0, 0x10000
+
+	nihh	%r0, -1
+	nihh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: nihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nihl	%r0, 0x10000
+
+	nihl	%r0, -1
+	nihl	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: nilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nilf	%r0, 1 << 32
+
+	nilf	%r0, -1
+	nilf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: nilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nilh	%r0, 0x10000
+
+	nilh	%r0, -1
+	nilh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: nill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nill	%r0, 0x10000
+
+	nill	%r0, -1
+	nill	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: niy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: niy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: niy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: niy	0, -1
+#CHECK: error: invalid operand
+#CHECK: niy	0, 256
+
+	niy	-524289, 0
+	niy	524288, 0
+	niy	0(%r1,%r2), 0
+	niy	0, -1
+	niy	0, 256
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: nrk	%r2,%r3,%r4
+
+	nrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: ny	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ny	%r0, 524288
+
+	ny	%r0, -524289
+	ny	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: o	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: o	%r0, 4096
+
+	o	%r0, -1
+	o	%r0, 4096
+
+#CHECK: error: missing length in address
+#CHECK: oc	0, 0
+#CHECK: error: missing length in address
+#CHECK: oc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: oc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: oc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: oc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: oc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: oc	0(-), 0
+
+	oc	0, 0
+	oc	0(%r1), 0(%r1)
+	oc	0(1,%r1), 0(2,%r1)
+	oc	0(0,%r1), 0(%r1)
+	oc	0(257,%r1), 0(%r1)
+	oc	-1(1,%r1), 0(%r1)
+	oc	4096(1,%r1), 0(%r1)
+	oc	0(1,%r1), -1(%r1)
+	oc	0(1,%r1), 4096(%r1)
+	oc	0(1,%r0), 0(%r1)
+	oc	0(1,%r1), 0(%r0)
+	oc	0(%r1,%r2), 0(%r1)
+	oc	0(1,%r2), 0(%r1,%r2)
+	oc	0(-), 0
+
+#CHECK: error: invalid operand
+#CHECK: og	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: og	%r0, 524288
+
+	og	%r0, -524289
+	og	%r0, 524288
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: ogrk	%r2,%r3,%r4
+
+	ogrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: oi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: oi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: oi	0, -1
+#CHECK: error: invalid operand
+#CHECK: oi	0, 256
+
+	oi	-1, 0
+	oi	4096, 0
+	oi	0(%r1,%r2), 0
+	oi	0, -1
+	oi	0, 256
+
+#CHECK: error: invalid operand
+#CHECK: oihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oihf	%r0, 1 << 32
+
+	oihf	%r0, -1
+	oihf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: oihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oihh	%r0, 0x10000
+
+	oihh	%r0, -1
+	oihh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: oihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oihl	%r0, 0x10000
+
+	oihl	%r0, -1
+	oihl	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: oilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oilf	%r0, 1 << 32
+
+	oilf	%r0, -1
+	oilf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: oilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oilh	%r0, 0x10000
+
+	oilh	%r0, -1
+	oilh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: oill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oill	%r0, 0x10000
+
+	oill	%r0, -1
+	oill	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: oiy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: oiy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oiy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: oiy	0, -1
+#CHECK: error: invalid operand
+#CHECK: oiy	0, 256
+
+	oiy	-524289, 0
+	oiy	524288, 0
+	oiy	0(%r1,%r2), 0
+	oiy	0, -1
+	oiy	0, 256
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: ork	%r2,%r3,%r4
+
+	ork	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: oy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: oy	%r0, 524288
+
+	oy	%r0, -524289
+	oy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: pfd	-1, 0
+#CHECK: error: invalid operand
+#CHECK: pfd	16, 0
+#CHECK: error: invalid operand
+#CHECK: pfd	1, -524289
+#CHECK: error: invalid operand
+#CHECK: pfd	1, 524288
+
+	pfd	-1, 0
+	pfd	16, 0
+	pfd	1, -524289
+	pfd	1, 524288
+
+#CHECK: error: invalid operand
+#CHECK: pfdrl	-1, 0
+#CHECK: error: invalid operand
+#CHECK: pfdrl	16, 0
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, -1
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, 1
+#CHECK: error: offset out of range
+#CHECK: pfdrl	1, 0x100000000
+
+	pfdrl	-1, 0
+	pfdrl	16, 0
+	pfdrl	1, -0x1000000002
+	pfdrl	1, -1
+	pfdrl	1, 1
+	pfdrl	1, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,256,0
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,256,0,0
+
+	risbg	%r0,%r0,0,0,-1
+	risbg	%r0,%r0,0,0,64
+	risbg	%r0,%r0,0,-1,0
+	risbg	%r0,%r0,0,256,0
+	risbg	%r0,%r0,-1,0,0
+	risbg	%r0,%r0,256,0,0
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: risbhg	%r1, %r2, 0, 0, 0
+
+	risbhg	%r1, %r2, 0, 0, 0
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: risblg	%r1, %r2, 0, 0, 0
+
+	risblg	%r1, %r2, 0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: rnsbg	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: rnsbg	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: rnsbg	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: rnsbg	%r0,%r0,0,256,0
+#CHECK: error: invalid operand
+#CHECK: rnsbg	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: rnsbg	%r0,%r0,256,0,0
+
+	rnsbg	%r0,%r0,0,0,-1
+	rnsbg	%r0,%r0,0,0,64
+	rnsbg	%r0,%r0,0,-1,0
+	rnsbg	%r0,%r0,0,256,0
+	rnsbg	%r0,%r0,-1,0,0
+	rnsbg	%r0,%r0,256,0,0
+
+#CHECK: error: invalid operand
+#CHECK: rosbg	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: rosbg	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: rosbg	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: rosbg	%r0,%r0,0,256,0
+#CHECK: error: invalid operand
+#CHECK: rosbg	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: rosbg	%r0,%r0,256,0,0
+
+	rosbg	%r0,%r0,0,0,-1
+	rosbg	%r0,%r0,0,0,64
+	rosbg	%r0,%r0,0,-1,0
+	rosbg	%r0,%r0,0,256,0
+	rosbg	%r0,%r0,-1,0,0
+	rosbg	%r0,%r0,256,0,0
+
+#CHECK: error: invalid operand
+#CHECK: rxsbg	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: rxsbg	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: rxsbg	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: rxsbg	%r0,%r0,0,256,0
+#CHECK: error: invalid operand
+#CHECK: rxsbg	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: rxsbg	%r0,%r0,256,0,0
+
+	rxsbg	%r0,%r0,0,0,-1
+	rxsbg	%r0,%r0,0,0,64
+	rxsbg	%r0,%r0,0,-1,0
+	rxsbg	%r0,%r0,0,256,0
+	rxsbg	%r0,%r0,-1,0,0
+	rxsbg	%r0,%r0,256,0,0
+
+#CHECK: error: invalid operand
+#CHECK: rll	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: rll	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: rll	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: rll	%r0,%r0,0(%r1,%r2)
+
+	rll	%r0,%r0,-524289
+	rll	%r0,%r0,524288
+	rll	%r0,%r0,0(%r0)
+	rll	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: rllg	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: rllg	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: rllg	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: rllg	%r0,%r0,0(%r1,%r2)
+
+	rllg	%r0,%r0,-524289
+	rllg	%r0,%r0,524288
+	rllg	%r0,%r0,0(%r0)
+	rllg	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: s	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: s	%r0, 4096
+
+	s	%r0, -1
+	s	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: sdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: sdb	%f0, 4096
+
+	sdb	%f0, -1
+	sdb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: seb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: seb	%f0, 4096
+
+	seb	%f0, -1
+	seb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: sg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sg	%r0, 524288
+
+	sg	%r0, -524289
+	sg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: sgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sgf	%r0, 524288
+
+	sgf	%r0, -524289
+	sgf	%r0, 524288
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: sgrk	%r2,%r3,%r4
+
+	sgrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: sh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: sh	%r0, 4096
+
+	sh	%r0, -1
+	sh	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: shy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: shy	%r0, 524288
+
+	shy	%r0, -524289
+	shy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: sl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: sl	%r0, 4096
+
+	sl	%r0, -1
+	sl	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: slb	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slb	%r0, 524288
+
+	slb	%r0, -524289
+	slb	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: slbg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slbg	%r0, 524288
+
+	slbg	%r0, -524289
+	slbg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: slfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: slfi	%r0, (1 << 32)
+
+	slfi	%r0, -1
+	slfi	%r0, (1 << 32)
+
+#CHECK: error: invalid operand
+#CHECK: slg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slg	%r0, 524288
+
+	slg	%r0, -524289
+	slg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: slgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slgf	%r0, 524288
+
+	slgf	%r0, -524289
+	slgf	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: slgfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: slgfi	%r0, (1 << 32)
+
+	slgfi	%r0, -1
+	slgfi	%r0, (1 << 32)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: slgrk	%r2,%r3,%r4
+
+	slgrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: sll	%r0,-1
+#CHECK: error: invalid operand
+#CHECK: sll	%r0,4096
+#CHECK: error: %r0 used in an address
+#CHECK: sll	%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sll	%r0,0(%r1,%r2)
+
+	sll	%r0,-1
+	sll	%r0,4096
+	sll	%r0,0(%r0)
+	sll	%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: sllg	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: sllg	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: sllg	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sllg	%r0,%r0,0(%r1,%r2)
+
+	sllg	%r0,%r0,-524289
+	sllg	%r0,%r0,524288
+	sllg	%r0,%r0,0(%r0)
+	sllg	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: sllk	%r2,%r3,4(%r5)
+
+	sllk	%r2,%r3,4(%r5)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: slrk	%r2,%r3,%r4
+
+	slrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: sly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sly	%r0, 524288
+
+	sly	%r0, -524289
+	sly	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: sqdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: sqdb	%f0, 4096
+
+	sqdb	%f0, -1
+	sqdb	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: sqeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: sqeb	%f0, 4096
+
+	sqeb	%f0, -1
+	sqeb	%f0, 4096
+
+#CHECK: error: invalid register pair
+#CHECK: sqxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: sqxbr	%f2, %f0
+
+	sqxbr	%f0, %f2
+	sqxbr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: sra	%r0,-1
+#CHECK: error: invalid operand
+#CHECK: sra	%r0,4096
+#CHECK: error: %r0 used in an address
+#CHECK: sra	%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sra	%r0,0(%r1,%r2)
+
+	sra	%r0,-1
+	sra	%r0,4096
+	sra	%r0,0(%r0)
+	sra	%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: srag	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: srag	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: srag	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srag	%r0,%r0,0(%r1,%r2)
+
+	srag	%r0,%r0,-524289
+	srag	%r0,%r0,524288
+	srag	%r0,%r0,0(%r0)
+	srag	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: srak	%r2,%r3,4(%r5)
+
+	srak	%r2,%r3,4(%r5)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: srk	%r2,%r3,%r4
+
+	srk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: srl	%r0,-1
+#CHECK: error: invalid operand
+#CHECK: srl	%r0,4096
+#CHECK: error: %r0 used in an address
+#CHECK: srl	%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srl	%r0,0(%r1,%r2)
+
+	srl	%r0,-1
+	srl	%r0,4096
+	srl	%r0,0(%r0)
+	srl	%r0,0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: srlg	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: srlg	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: srlg	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srlg	%r0,%r0,0(%r1,%r2)
+
+	srlg	%r0,%r0,-524289
+	srlg	%r0,%r0,524288
+	srlg	%r0,%r0,0(%r0)
+	srlg	%r0,%r0,0(%r1,%r2)
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: srlk	%r2,%r3,4(%r5)
+
+	srlk	%r2,%r3,4(%r5)
+
+#CHECK: error: invalid operand
+#CHECK: st	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: st	%r0, 4096
+
+	st	%r0, -1
+	st	%r0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: stc	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: stc	%r0, 4096
+
+	stc	%r0, -1
+	stc	%r0, 4096
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: stch	%r0, 0
+
+	stch	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: stcy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stcy	%r0, 524288
+
+	stcy	%r0, -524289
+	stcy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: std	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: std	%f0, 4096
+
+	std	%f0, -1
+	std	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: stdy	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: stdy	%f0, 524288
+
+	stdy	%f0, -524289
+	stdy	%f0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: ste	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ste	%f0, 4096
+
+	ste	%f0, -1
+	ste	%f0, 4096
+
+#CHECK: error: invalid operand
+#CHECK: stey	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: stey	%f0, 524288
+
+	stey	%f0, -524289
+	stey	%f0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: stg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stg	%r0, 524288
+
+	stg	%r0, -524289
+	stg	%r0, 524288
+
+#CHECK: error: offset out of range
+#CHECK: stgrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: stgrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: stgrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: stgrl	%r0, 0x100000000
+
+	stgrl	%r0, -0x1000000002
+	stgrl	%r0, -1
+	stgrl	%r0, 1
+	stgrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: sth	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: sth	%r0, 4096
+
+	sth	%r0, -1
+	sth	%r0, 4096
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: sthh	%r0, 0
+
+	sthh	%r0, 0
+
+#CHECK: error: offset out of range
+#CHECK: sthrl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: sthrl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: sthrl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: sthrl	%r0, 0x100000000
+
+	sthrl	%r0, -0x1000000002
+	sthrl	%r0, -1
+	sthrl	%r0, 1
+	sthrl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: sthy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sthy	%r0, 524288
+
+	sthy	%r0, -524289
+	sthy	%r0, 524288
+
+#CHECK: error: {{(instruction requires: high-word)?}}
+#CHECK: stfh	%r0, 0
+
+	stfh	%r0, 0
+
+#CHECK: error: invalid operand
+#CHECK: stmg	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stmg	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stmg	%r0, %r0, 0(%r1,%r2)
+
+	stmg	%r0, %r0, -524289
+	stmg	%r0, %r0, 524288
+	stmg	%r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: offset out of range
+#CHECK: strl	%r0, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: strl	%r0, -1
+#CHECK: error: offset out of range
+#CHECK: strl	%r0, 1
+#CHECK: error: offset out of range
+#CHECK: strl	%r0, 0x100000000
+
+	strl	%r0, -0x1000000002
+	strl	%r0, -1
+	strl	%r0, 1
+	strl	%r0, 0x100000000
+
+#CHECK: error: invalid operand
+#CHECK: strv	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: strv	%r0, 524288
+
+	strv	%r0, -524289
+	strv	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: strvg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: strvg	%r0, 524288
+
+	strvg	%r0, -524289
+	strvg	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: sty	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sty	%r0, 524288
+
+	sty	%r0, -524289
+	sty	%r0, 524288
+
+#CHECK: error: invalid register pair
+#CHECK: sxbr	%f0, %f2
+#CHECK: error: invalid register pair
+#CHECK: sxbr	%f2, %f0
+
+	sxbr	%f0, %f2
+	sxbr	%f2, %f0
+
+#CHECK: error: invalid operand
+#CHECK: sy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sy	%r0, 524288
+
+	sy	%r0, -524289
+	sy	%r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: tm	-1, 0
+#CHECK: error: invalid operand
+#CHECK: tm	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tm	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tm	0, -1
+#CHECK: error: invalid operand
+#CHECK: tm	0, 256
+
+	tm	-1, 0
+	tm	4096, 0
+	tm	0(%r1,%r2), 0
+	tm	0, -1
+	tm	0, 256
+
+#CHECK: error: invalid operand
+#CHECK: tmhh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmhh	%r0, 0x10000
+
+	tmhh	%r0, -1
+	tmhh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmhl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmhl	%r0, 0x10000
+
+	tmhl	%r0, -1
+	tmhl	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmlh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmlh	%r0, 0x10000
+
+	tmlh	%r0, -1
+	tmlh	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmll	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: tmll	%r0, 0x10000
+
+	tmll	%r0, -1
+	tmll	%r0, 0x10000
+
+#CHECK: error: invalid operand
+#CHECK: tmy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: tmy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tmy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tmy	0, -1
+#CHECK: error: invalid operand
+#CHECK: tmy	0, 256
+
+	tmy	-524289, 0
+	tmy	524288, 0
+	tmy	0(%r1,%r2), 0
+	tmy	0, -1
+	tmy	0, 256
+
+#CHECK: error: invalid operand
+#CHECK: x	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: x	%r0, 4096
+
+	x	%r0, -1
+	x	%r0, 4096
+
+#CHECK: error: missing length in address
+#CHECK: xc	0, 0
+#CHECK: error: missing length in address
+#CHECK: xc	0(%r1), 0(%r1)
+#CHECK: error: invalid use of length addressing
+#CHECK: xc	0(1,%r1), 0(2,%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(0,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(257,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	-1(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	4096(1,%r1), 0(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(1,%r1), -1(%r1)
+#CHECK: error: invalid operand
+#CHECK: xc	0(1,%r1), 4096(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: xc	0(1,%r0), 0(%r1)
+#CHECK: error: %r0 used in an address
+#CHECK: xc	0(1,%r1), 0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xc	0(%r1,%r2), 0(%r1)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xc	0(1,%r2), 0(%r1,%r2)
+#CHECK: error: unknown token in expression
+#CHECK: xc	0(-), 0
+
+	xc	0, 0
+	xc	0(%r1), 0(%r1)
+	xc	0(1,%r1), 0(2,%r1)
+	xc	0(0,%r1), 0(%r1)
+	xc	0(257,%r1), 0(%r1)
+	xc	-1(1,%r1), 0(%r1)
+	xc	4096(1,%r1), 0(%r1)
+	xc	0(1,%r1), -1(%r1)
+	xc	0(1,%r1), 4096(%r1)
+	xc	0(1,%r0), 0(%r1)
+	xc	0(1,%r1), 0(%r0)
+	xc	0(%r1,%r2), 0(%r1)
+	xc	0(1,%r2), 0(%r1,%r2)
+	xc	0(-), 0
+
+#CHECK: error: invalid operand
+#CHECK: xg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: xg	%r0, 524288
+
+	xg	%r0, -524289
+	xg	%r0, 524288
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: xgrk	%r2,%r3,%r4
+
+	xgrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: xi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: xi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: xi	0, -1
+#CHECK: error: invalid operand
+#CHECK: xi	0, 256
+
+	xi	-1, 0
+	xi	4096, 0
+	xi	0(%r1,%r2), 0
+	xi	0, -1
+	xi	0, 256
+
+#CHECK: error: invalid operand
+#CHECK: xihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: xihf	%r0, 1 << 32
+
+	xihf	%r0, -1
+	xihf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: xilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: xilf	%r0, 1 << 32
+
+	xilf	%r0, -1
+	xilf	%r0, 1 << 32
+
+#CHECK: error: invalid operand
+#CHECK: xiy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: xiy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xiy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: xiy	0, -1
+#CHECK: error: invalid operand
+#CHECK: xiy	0, 256
+
+	xiy	-524289, 0
+	xiy	524288, 0
+	xiy	0(%r1,%r2), 0
+	xiy	0, -1
+	xiy	0, 256
+
+#CHECK: error: {{(instruction requires: distinct-ops)?}}
+#CHECK: xrk	%r2,%r3,%r4
+
+	xrk	%r2,%r3,%r4
+
+#CHECK: error: invalid operand
+#CHECK: xy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: xy	%r0, 524288
+
+	xy	%r0, -524289
+	xy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-basr-01.s b/test/MC/SystemZ/insn-basr-01.s
deleted file mode 100644
index a66cee8f6352..000000000000
--- a/test/MC/SystemZ/insn-basr-01.s
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: basr	%r0, %r1                # encoding: [0x0d,0x01]
-#CHECK: basr	%r0, %r15               # encoding: [0x0d,0x0f]
-#CHECK: basr	%r14, %r9               # encoding: [0x0d,0xe9]
-#CHECK: basr	%r15, %r1               # encoding: [0x0d,0xf1]
-
-	basr	%r0,%r1
-	basr	%r0,%r15
-	basr	%r14,%r9
-	basr	%r15,%r1
-
diff --git a/test/MC/SystemZ/insn-br-01.s b/test/MC/SystemZ/insn-br-01.s
deleted file mode 100644
index 8e2f2aac78a5..000000000000
--- a/test/MC/SystemZ/insn-br-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: br	%r1                     # encoding: [0x07,0xf1]
-#CHECK: br	%r14                    # encoding: [0x07,0xfe]
-#CHECK: br	%r15                    # encoding: [0x07,0xff]
-
-	br	%r1
-	br	%r14
-	br	%r15
diff --git a/test/MC/SystemZ/insn-bras-01.s b/test/MC/SystemZ/insn-bras-01.s
deleted file mode 100644
index 89f7f77477dd..000000000000
--- a/test/MC/SystemZ/insn-bras-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: bras	%r0, foo                # encoding: [0xa7,0x05,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: bras	%r14, foo               # encoding: [0xa7,0xe5,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: bras	%r15, foo               # encoding: [0xa7,0xf5,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	bras	%r0,foo
-	bras	%r14,foo
-	bras	%r15,foo
-
-#CHECK: bras	%r0, bar+100                # encoding: [0xa7,0x05,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-#CHECK: bras	%r14, bar+100               # encoding: [0xa7,0xe5,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-#CHECK: bras	%r15, bar+100               # encoding: [0xa7,0xf5,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	bras	%r0,bar+100
-	bras	%r14,bar+100
-	bras	%r15,bar+100
-
-#CHECK: bras	%r0, bar@PLT                # encoding: [0xa7,0x05,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-#CHECK: bras	%r14, bar@PLT               # encoding: [0xa7,0xe5,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-#CHECK: bras	%r15, bar@PLT               # encoding: [0xa7,0xf5,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	bras	%r0,bar@PLT
-	bras	%r14,bar@PLT
-	bras	%r15,bar@PLT
diff --git a/test/MC/SystemZ/insn-brasl-01.s b/test/MC/SystemZ/insn-brasl-01.s
deleted file mode 100644
index 86d0ced9b33d..000000000000
--- a/test/MC/SystemZ/insn-brasl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: brasl	%r0, foo                # encoding: [0xc0,0x05,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: brasl	%r14, foo               # encoding: [0xc0,0xe5,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: brasl	%r15, foo               # encoding: [0xc0,0xf5,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brasl	%r0,foo
-	brasl	%r14,foo
-	brasl	%r15,foo
-
-#CHECK: brasl	%r0, bar+100                # encoding: [0xc0,0x05,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: brasl	%r14, bar+100               # encoding: [0xc0,0xe5,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: brasl	%r15, bar+100               # encoding: [0xc0,0xf5,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	brasl	%r0,bar+100
-	brasl	%r14,bar+100
-	brasl	%r15,bar+100
-
-#CHECK: brasl	%r0, bar@PLT                # encoding: [0xc0,0x05,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-#CHECK: brasl	%r14, bar@PLT               # encoding: [0xc0,0xe5,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-#CHECK: brasl	%r15, bar@PLT               # encoding: [0xc0,0xf5,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	brasl	%r0,bar@PLT
-	brasl	%r14,bar@PLT
-	brasl	%r15,bar@PLT
diff --git a/test/MC/SystemZ/insn-brc-01.s b/test/MC/SystemZ/insn-brc-01.s
deleted file mode 100644
index a92ea45ecfcc..000000000000
--- a/test/MC/SystemZ/insn-brc-01.s
+++ /dev/null
@@ -1,238 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: brc	0, foo                  # encoding: [0xa7,0x04,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	0, foo
-
-#CHECK: brc	1, foo                  # encoding: [0xa7,0x14,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jo	foo                     # encoding: [0xa7,0x14,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	1, foo
-	jo	foo
-
-#CHECK: brc	2, foo                  # encoding: [0xa7,0x24,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jh	foo                     # encoding: [0xa7,0x24,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	2, foo
-	jh	foo
-
-#CHECK: brc	3, foo                  # encoding: [0xa7,0x34,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jnle	foo                     # encoding: [0xa7,0x34,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	3, foo
-	jnle	foo
-
-#CHECK: brc	4, foo                  # encoding: [0xa7,0x44,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jl	foo                     # encoding: [0xa7,0x44,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	4, foo
-	jl	foo
-
-#CHECK: brc	5, foo                  # encoding: [0xa7,0x54,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jnhe	foo                     # encoding: [0xa7,0x54,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	5, foo
-	jnhe	foo
-
-#CHECK: brc	6, foo                  # encoding: [0xa7,0x64,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jlh	foo                     # encoding: [0xa7,0x64,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	6, foo
-	jlh	foo
-
-#CHECK: brc	7, foo                  # encoding: [0xa7,0x74,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jne	foo                     # encoding: [0xa7,0x74,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	7, foo
-	jne	foo
-
-#CHECK: brc	8, foo                  # encoding: [0xa7,0x84,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: je	foo                     # encoding: [0xa7,0x84,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	8, foo
-	je	foo
-
-#CHECK: brc	9, foo                  # encoding: [0xa7,0x94,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jnlh	foo                     # encoding: [0xa7,0x94,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	9, foo
-	jnlh	foo
-
-#CHECK: brc	10, foo                 # encoding: [0xa7,0xa4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jhe	foo                     # encoding: [0xa7,0xa4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	10, foo
-	jhe	foo
-
-#CHECK: brc	11, foo                 # encoding: [0xa7,0xb4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jnl	foo                     # encoding: [0xa7,0xb4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	11, foo
-	jnl	foo
-
-#CHECK: brc	12, foo                 # encoding: [0xa7,0xc4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jle	foo                     # encoding: [0xa7,0xc4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	12, foo
-	jle	foo
-
-#CHECK: brc	13, foo                 # encoding: [0xa7,0xd4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jnh	foo                     # encoding: [0xa7,0xd4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	13, foo
-	jnh	foo
-
-#CHECK: brc	14, foo                 # encoding: [0xa7,0xe4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: jno	foo                     # encoding: [0xa7,0xe4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	14, foo
-	jno	foo
-
-#CHECK: brc	15, foo                 # encoding: [0xa7,0xf4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-#CHECK: j	foo                     # encoding: [0xa7,0xf4,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
-	brc	15, foo
-	j	foo
-
-#CHECK: brc	0, bar+100              # encoding: [0xa7,0x04,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	brc	0, bar+100
-
-#CHECK: jo	bar+100                 # encoding: [0xa7,0x14,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jo	bar+100
-
-#CHECK: jh	bar+100                 # encoding: [0xa7,0x24,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jh	bar+100
-
-#CHECK: jnle	bar+100                 # encoding: [0xa7,0x34,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jnle	bar+100
-
-#CHECK: jl	bar+100                 # encoding: [0xa7,0x44,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jl	bar+100
-
-#CHECK: jnhe	bar+100                 # encoding: [0xa7,0x54,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jnhe	bar+100
-
-#CHECK: jlh	bar+100                 # encoding: [0xa7,0x64,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jlh	bar+100
-
-#CHECK: jne	bar+100                 # encoding: [0xa7,0x74,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jne	bar+100
-
-#CHECK: je	bar+100                 # encoding: [0xa7,0x84,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	je	bar+100
-
-#CHECK: jnlh	bar+100                 # encoding: [0xa7,0x94,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jnlh	bar+100
-
-#CHECK: jhe	bar+100                 # encoding: [0xa7,0xa4,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jhe	bar+100
-
-#CHECK: jnl	bar+100                 # encoding: [0xa7,0xb4,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jnl	bar+100
-
-#CHECK: jle	bar+100                 # encoding: [0xa7,0xc4,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jle	bar+100
-
-#CHECK: jnh	bar+100                 # encoding: [0xa7,0xd4,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jnh	bar+100
-
-#CHECK: jno	bar+100                 # encoding: [0xa7,0xe4,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	jno	bar+100
-
-#CHECK: j	bar+100                 # encoding: [0xa7,0xf4,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
-	j	bar+100
-
-#CHECK: brc	0, bar@PLT              # encoding: [0xa7,0x04,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	brc	0, bar@PLT
-
-#CHECK: jo	bar@PLT                 # encoding: [0xa7,0x14,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jo	bar@PLT
-
-#CHECK: jh	bar@PLT                 # encoding: [0xa7,0x24,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jh	bar@PLT
-
-#CHECK: jnle	bar@PLT                 # encoding: [0xa7,0x34,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jnle	bar@PLT
-
-#CHECK: jl	bar@PLT                 # encoding: [0xa7,0x44,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jl	bar@PLT
-
-#CHECK: jnhe	bar@PLT                 # encoding: [0xa7,0x54,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jnhe	bar@PLT
-
-#CHECK: jlh	bar@PLT                 # encoding: [0xa7,0x64,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jlh	bar@PLT
-
-#CHECK: jne	bar@PLT                 # encoding: [0xa7,0x74,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jne	bar@PLT
-
-#CHECK: je	bar@PLT                 # encoding: [0xa7,0x84,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	je	bar@PLT
-
-#CHECK: jnlh	bar@PLT                 # encoding: [0xa7,0x94,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jnlh	bar@PLT
-
-#CHECK: jhe	bar@PLT                 # encoding: [0xa7,0xa4,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jhe	bar@PLT
-
-#CHECK: jnl	bar@PLT                 # encoding: [0xa7,0xb4,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jnl	bar@PLT
-
-#CHECK: jle	bar@PLT                 # encoding: [0xa7,0xc4,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jle	bar@PLT
-
-#CHECK: jnh	bar@PLT                 # encoding: [0xa7,0xd4,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jnh	bar@PLT
-
-#CHECK: jno	bar@PLT                 # encoding: [0xa7,0xe4,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	jno	bar@PLT
-
-#CHECK: j	bar@PLT                 # encoding: [0xa7,0xf4,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
-	j	bar@PLT
diff --git a/test/MC/SystemZ/insn-brc-02.s b/test/MC/SystemZ/insn-brc-02.s
deleted file mode 100644
index 941cc459f38a..000000000000
--- a/test/MC/SystemZ/insn-brc-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: brc	foo, bar
-#CHECK: error: invalid operand
-#CHECK: brc	-1, bar
-#CHECK: error: invalid operand
-#CHECK: brc	16, bar
-
-	brc	foo, bar
-	brc	-1, bar
-	brc	16, bar
diff --git a/test/MC/SystemZ/insn-brcl-01.s b/test/MC/SystemZ/insn-brcl-01.s
deleted file mode 100644
index f7138bf5be39..000000000000
--- a/test/MC/SystemZ/insn-brcl-01.s
+++ /dev/null
@@ -1,238 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: brcl	0, foo                  # encoding: [0xc0,0x04,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	0, foo
-
-#CHECK: brcl	1, foo                  # encoding: [0xc0,0x14,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgo	foo                     # encoding: [0xc0,0x14,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	1, foo
-	jgo	foo
-
-#CHECK: brcl	2, foo                  # encoding: [0xc0,0x24,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgh	foo                     # encoding: [0xc0,0x24,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	2, foo
-	jgh	foo
-
-#CHECK: brcl	3, foo                  # encoding: [0xc0,0x34,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgnle	foo                     # encoding: [0xc0,0x34,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	3, foo
-	jgnle	foo
-
-#CHECK: brcl	4, foo                  # encoding: [0xc0,0x44,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgl	foo                     # encoding: [0xc0,0x44,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	4, foo
-	jgl	foo
-
-#CHECK: brcl	5, foo                  # encoding: [0xc0,0x54,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgnhe	foo                     # encoding: [0xc0,0x54,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	5, foo
-	jgnhe	foo
-
-#CHECK: brcl	6, foo                  # encoding: [0xc0,0x64,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jglh	foo                     # encoding: [0xc0,0x64,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	6, foo
-	jglh	foo
-
-#CHECK: brcl	7, foo                  # encoding: [0xc0,0x74,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgne	foo                     # encoding: [0xc0,0x74,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	7, foo
-	jgne	foo
-
-#CHECK: brcl	8, foo                  # encoding: [0xc0,0x84,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jge	foo                     # encoding: [0xc0,0x84,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	8, foo
-	jge	foo
-
-#CHECK: brcl	9, foo                  # encoding: [0xc0,0x94,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgnlh	foo                     # encoding: [0xc0,0x94,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	9, foo
-	jgnlh	foo
-
-#CHECK: brcl	10, foo                 # encoding: [0xc0,0xa4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jghe	foo                     # encoding: [0xc0,0xa4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	10, foo
-	jghe	foo
-
-#CHECK: brcl	11, foo                 # encoding: [0xc0,0xb4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgnl	foo                     # encoding: [0xc0,0xb4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	11, foo
-	jgnl	foo
-
-#CHECK: brcl	12, foo                 # encoding: [0xc0,0xc4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgle	foo                     # encoding: [0xc0,0xc4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	12, foo
-	jgle	foo
-
-#CHECK: brcl	13, foo                 # encoding: [0xc0,0xd4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgnh	foo                     # encoding: [0xc0,0xd4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	13, foo
-	jgnh	foo
-
-#CHECK: brcl	14, foo                 # encoding: [0xc0,0xe4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jgno	foo                     # encoding: [0xc0,0xe4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	14, foo
-	jgno	foo
-
-#CHECK: brcl	15, foo                 # encoding: [0xc0,0xf4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: jg	foo                     # encoding: [0xc0,0xf4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-	brcl	15, foo
-	jg	foo
-
-#CHECK: brcl	0, bar+100              # encoding: [0xc0,0x04,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	brcl	0, bar+100
-
-#CHECK: jgo	bar+100                 # encoding: [0xc0,0x14,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgo	bar+100
-
-#CHECK: jgh	bar+100                 # encoding: [0xc0,0x24,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgh	bar+100
-
-#CHECK: jgnle	bar+100                 # encoding: [0xc0,0x34,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgnle	bar+100
-
-#CHECK: jgl	bar+100                 # encoding: [0xc0,0x44,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgl	bar+100
-
-#CHECK: jgnhe	bar+100                 # encoding: [0xc0,0x54,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgnhe	bar+100
-
-#CHECK: jglh	bar+100                 # encoding: [0xc0,0x64,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jglh	bar+100
-
-#CHECK: jgne	bar+100                 # encoding: [0xc0,0x74,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgne	bar+100
-
-#CHECK: jge	bar+100                 # encoding: [0xc0,0x84,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jge	bar+100
-
-#CHECK: jgnlh	bar+100                 # encoding: [0xc0,0x94,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgnlh	bar+100
-
-#CHECK: jghe	bar+100                 # encoding: [0xc0,0xa4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jghe	bar+100
-
-#CHECK: jgnl	bar+100                 # encoding: [0xc0,0xb4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgnl	bar+100
-
-#CHECK: jgle	bar+100                 # encoding: [0xc0,0xc4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgle	bar+100
-
-#CHECK: jgnh	bar+100                 # encoding: [0xc0,0xd4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgnh	bar+100
-
-#CHECK: jgno	bar+100                 # encoding: [0xc0,0xe4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jgno	bar+100
-
-#CHECK: jg	bar+100                 # encoding: [0xc0,0xf4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-	jg	bar+100
-
-#CHECK: brcl	0, bar@PLT              # encoding: [0xc0,0x04,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	brcl	0, bar@PLT
-
-#CHECK: jgo	bar@PLT                 # encoding: [0xc0,0x14,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgo	bar@PLT
-
-#CHECK: jgh	bar@PLT                 # encoding: [0xc0,0x24,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgh	bar@PLT
-
-#CHECK: jgnle	bar@PLT                 # encoding: [0xc0,0x34,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgnle	bar@PLT
-
-#CHECK: jgl	bar@PLT                 # encoding: [0xc0,0x44,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgl	bar@PLT
-
-#CHECK: jgnhe	bar@PLT                 # encoding: [0xc0,0x54,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgnhe	bar@PLT
-
-#CHECK: jglh	bar@PLT                 # encoding: [0xc0,0x64,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jglh	bar@PLT
-
-#CHECK: jgne	bar@PLT                 # encoding: [0xc0,0x74,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgne	bar@PLT
-
-#CHECK: jge	bar@PLT                 # encoding: [0xc0,0x84,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jge	bar@PLT
-
-#CHECK: jgnlh	bar@PLT                 # encoding: [0xc0,0x94,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgnlh	bar@PLT
-
-#CHECK: jghe	bar@PLT                 # encoding: [0xc0,0xa4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jghe	bar@PLT
-
-#CHECK: jgnl	bar@PLT                 # encoding: [0xc0,0xb4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgnl	bar@PLT
-
-#CHECK: jgle	bar@PLT                 # encoding: [0xc0,0xc4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgle	bar@PLT
-
-#CHECK: jgnh	bar@PLT                 # encoding: [0xc0,0xd4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgnh	bar@PLT
-
-#CHECK: jgno	bar@PLT                 # encoding: [0xc0,0xe4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jgno	bar@PLT
-
-#CHECK: jg	bar@PLT                 # encoding: [0xc0,0xf4,A,A,A,A]
-#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
-	jg	bar@PLT
diff --git a/test/MC/SystemZ/insn-brcl-02.s b/test/MC/SystemZ/insn-brcl-02.s
deleted file mode 100644
index ded5f7e4a6e1..000000000000
--- a/test/MC/SystemZ/insn-brcl-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: brcl	foo, bar
-#CHECK: error: invalid operand
-#CHECK: brcl	-1, bar
-#CHECK: error: invalid operand
-#CHECK: brcl	16, bar
-
-	brcl	foo, bar
-	brcl	-1, bar
-	brcl	16, bar
diff --git a/test/MC/SystemZ/insn-c-01.s b/test/MC/SystemZ/insn-c-01.s
deleted file mode 100644
index e8a8ada30d89..000000000000
--- a/test/MC/SystemZ/insn-c-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: c	%r0, 0                  # encoding: [0x59,0x00,0x00,0x00]
-#CHECK: c	%r0, 4095               # encoding: [0x59,0x00,0x0f,0xff]
-#CHECK: c	%r0, 0(%r1)             # encoding: [0x59,0x00,0x10,0x00]
-#CHECK: c	%r0, 0(%r15)            # encoding: [0x59,0x00,0xf0,0x00]
-#CHECK: c	%r0, 4095(%r1,%r15)     # encoding: [0x59,0x01,0xff,0xff]
-#CHECK: c	%r0, 4095(%r15,%r1)     # encoding: [0x59,0x0f,0x1f,0xff]
-#CHECK: c	%r15, 0                 # encoding: [0x59,0xf0,0x00,0x00]
-
-	c	%r0, 0
-	c	%r0, 4095
-	c	%r0, 0(%r1)
-	c	%r0, 0(%r15)
-	c	%r0, 4095(%r1,%r15)
-	c	%r0, 4095(%r15,%r1)
-	c	%r15, 0
diff --git a/test/MC/SystemZ/insn-c-02.s b/test/MC/SystemZ/insn-c-02.s
deleted file mode 100644
index 81fe25181968..000000000000
--- a/test/MC/SystemZ/insn-c-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: c	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: c	%r0, 4096
-
-	c	%r0, -1
-	c	%r0, 4096
diff --git a/test/MC/SystemZ/insn-cdb-01.s b/test/MC/SystemZ/insn-cdb-01.s
deleted file mode 100644
index 7f6bb59956cb..000000000000
--- a/test/MC/SystemZ/insn-cdb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x19]
-#CHECK: cdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x19]
-#CHECK: cdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x19]
-#CHECK: cdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x19]
-#CHECK: cdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x19]
-#CHECK: cdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x19]
-#CHECK: cdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x19]
-
-	cdb	%f0, 0
-	cdb	%f0, 4095
-	cdb	%f0, 0(%r1)
-	cdb	%f0, 0(%r15)
-	cdb	%f0, 4095(%r1,%r15)
-	cdb	%f0, 4095(%r15,%r1)
-	cdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-cdb-02.s b/test/MC/SystemZ/insn-cdb-02.s
deleted file mode 100644
index 5f02b84c1496..000000000000
--- a/test/MC/SystemZ/insn-cdb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cdb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: cdb	%f0, 4096
-
-	cdb	%f0, -1
-	cdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-cdbr-01.s b/test/MC/SystemZ/insn-cdbr-01.s
deleted file mode 100644
index d2acfc01a256..000000000000
--- a/test/MC/SystemZ/insn-cdbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cdbr	%f0, %f0                # encoding: [0xb3,0x19,0x00,0x00]
-#CHECK: cdbr	%f0, %f15               # encoding: [0xb3,0x19,0x00,0x0f]
-#CHECK: cdbr	%f7, %f8                # encoding: [0xb3,0x19,0x00,0x78]
-#CHECK: cdbr	%f15, %f0               # encoding: [0xb3,0x19,0x00,0xf0]
-
-	cdbr	%f0, %f0
-	cdbr	%f0, %f15
-	cdbr	%f7, %f8
-	cdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-cdfbr-01.s b/test/MC/SystemZ/insn-cdfbr-01.s
deleted file mode 100644
index 94c9b0779c76..000000000000
--- a/test/MC/SystemZ/insn-cdfbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cdfbr	%f0, %r0                # encoding: [0xb3,0x95,0x00,0x00]
-#CHECK: cdfbr	%f0, %r15               # encoding: [0xb3,0x95,0x00,0x0f]
-#CHECK: cdfbr	%f15, %r0               # encoding: [0xb3,0x95,0x00,0xf0]
-#CHECK: cdfbr	%f7, %r8                # encoding: [0xb3,0x95,0x00,0x78]
-#CHECK: cdfbr	%f15, %r15              # encoding: [0xb3,0x95,0x00,0xff]
-
-	cdfbr	%f0, %r0
-	cdfbr	%f0, %r15
-	cdfbr	%f15, %r0
-	cdfbr	%f7, %r8
-	cdfbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cdfbr-02.s b/test/MC/SystemZ/insn-cdfbr-02.s
deleted file mode 100644
index 14caa1e3c0d8..000000000000
--- a/test/MC/SystemZ/insn-cdfbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cdfbr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: cdfbr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: cdfbr	%f0, %a0
-#CHECK: error: invalid register
-#CHECK: cdfbr	%a0, %r0
-
-	cdfbr	%r0, %r0
-	cdfbr	%f0, %f0
-	cdfbr	%f0, %a0
-	cdfbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-cdgbr-01.s b/test/MC/SystemZ/insn-cdgbr-01.s
deleted file mode 100644
index 6a994af744e9..000000000000
--- a/test/MC/SystemZ/insn-cdgbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cdgbr	%f0, %r0                # encoding: [0xb3,0xa5,0x00,0x00]
-#CHECK: cdgbr	%f0, %r15               # encoding: [0xb3,0xa5,0x00,0x0f]
-#CHECK: cdgbr	%f15, %r0               # encoding: [0xb3,0xa5,0x00,0xf0]
-#CHECK: cdgbr	%f7, %r8                # encoding: [0xb3,0xa5,0x00,0x78]
-#CHECK: cdgbr	%f15, %r15              # encoding: [0xb3,0xa5,0x00,0xff]
-
-	cdgbr	%f0, %r0
-	cdgbr	%f0, %r15
-	cdgbr	%f15, %r0
-	cdgbr	%f7, %r8
-	cdgbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cdgbr-02.s b/test/MC/SystemZ/insn-cdgbr-02.s
deleted file mode 100644
index 8fa9d4fa5c28..000000000000
--- a/test/MC/SystemZ/insn-cdgbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cdgbr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: cdgbr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: cdgbr	%f0, %a0
-#CHECK: error: invalid register
-#CHECK: cdgbr	%a0, %r0
-
-	cdgbr	%r0, %r0
-	cdgbr	%f0, %f0
-	cdgbr	%f0, %a0
-	cdgbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-ceb-01.s b/test/MC/SystemZ/insn-ceb-01.s
deleted file mode 100644
index d576e9d572df..000000000000
--- a/test/MC/SystemZ/insn-ceb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ceb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x09]
-#CHECK: ceb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x09]
-#CHECK: ceb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x09]
-#CHECK: ceb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x09]
-#CHECK: ceb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x09]
-#CHECK: ceb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x09]
-#CHECK: ceb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x09]
-
-	ceb	%f0, 0
-	ceb	%f0, 4095
-	ceb	%f0, 0(%r1)
-	ceb	%f0, 0(%r15)
-	ceb	%f0, 4095(%r1,%r15)
-	ceb	%f0, 4095(%r15,%r1)
-	ceb	%f15, 0
diff --git a/test/MC/SystemZ/insn-ceb-02.s b/test/MC/SystemZ/insn-ceb-02.s
deleted file mode 100644
index 90829dbe12f5..000000000000
--- a/test/MC/SystemZ/insn-ceb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ceb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: ceb	%f0, 4096
-
-	ceb	%f0, -1
-	ceb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-cebr-01.s b/test/MC/SystemZ/insn-cebr-01.s
deleted file mode 100644
index b820e3901a67..000000000000
--- a/test/MC/SystemZ/insn-cebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cebr	%f0, %f0                # encoding: [0xb3,0x09,0x00,0x00]
-#CHECK: cebr	%f0, %f15               # encoding: [0xb3,0x09,0x00,0x0f]
-#CHECK: cebr	%f7, %f8                # encoding: [0xb3,0x09,0x00,0x78]
-#CHECK: cebr	%f15, %f0               # encoding: [0xb3,0x09,0x00,0xf0]
-
-	cebr	%f0, %f0
-	cebr	%f0, %f15
-	cebr	%f7, %f8
-	cebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-cefbr-01.s b/test/MC/SystemZ/insn-cefbr-01.s
deleted file mode 100644
index f1068f506c58..000000000000
--- a/test/MC/SystemZ/insn-cefbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cefbr	%f0, %r0                # encoding: [0xb3,0x94,0x00,0x00]
-#CHECK: cefbr	%f0, %r15               # encoding: [0xb3,0x94,0x00,0x0f]
-#CHECK: cefbr	%f15, %r0               # encoding: [0xb3,0x94,0x00,0xf0]
-#CHECK: cefbr	%f7, %r8                # encoding: [0xb3,0x94,0x00,0x78]
-#CHECK: cefbr	%f15, %r15              # encoding: [0xb3,0x94,0x00,0xff]
-
-	cefbr	%f0, %r0
-	cefbr	%f0, %r15
-	cefbr	%f15, %r0
-	cefbr	%f7, %r8
-	cefbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cefbr-02.s b/test/MC/SystemZ/insn-cefbr-02.s
deleted file mode 100644
index b894fb9f3257..000000000000
--- a/test/MC/SystemZ/insn-cefbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cefbr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: cefbr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: cefbr	%f0, %a0
-#CHECK: error: invalid register
-#CHECK: cefbr	%a0, %r0
-
-	cefbr	%r0, %r0
-	cefbr	%f0, %f0
-	cefbr	%f0, %a0
-	cefbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-cegbr-01.s b/test/MC/SystemZ/insn-cegbr-01.s
deleted file mode 100644
index 5b2e6caf12c1..000000000000
--- a/test/MC/SystemZ/insn-cegbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cegbr	%f0, %r0                # encoding: [0xb3,0xa4,0x00,0x00]
-#CHECK: cegbr	%f0, %r15               # encoding: [0xb3,0xa4,0x00,0x0f]
-#CHECK: cegbr	%f15, %r0               # encoding: [0xb3,0xa4,0x00,0xf0]
-#CHECK: cegbr	%f7, %r8                # encoding: [0xb3,0xa4,0x00,0x78]
-#CHECK: cegbr	%f15, %r15              # encoding: [0xb3,0xa4,0x00,0xff]
-
-	cegbr	%f0, %r0
-	cegbr	%f0, %r15
-	cegbr	%f15, %r0
-	cegbr	%f7, %r8
-	cegbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cegbr-02.s b/test/MC/SystemZ/insn-cegbr-02.s
deleted file mode 100644
index bf0c31ad13f9..000000000000
--- a/test/MC/SystemZ/insn-cegbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cegbr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: cegbr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: cegbr	%f0, %a0
-#CHECK: error: invalid register
-#CHECK: cegbr	%a0, %r0
-
-	cegbr	%r0, %r0
-	cegbr	%f0, %f0
-	cegbr	%f0, %a0
-	cegbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-cfdbr-01.s b/test/MC/SystemZ/insn-cfdbr-01.s
deleted file mode 100644
index be4f87fb29e4..000000000000
--- a/test/MC/SystemZ/insn-cfdbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cfdbr	%r0, 0, %f0             # encoding: [0xb3,0x99,0x00,0x00]
-#CHECK: cfdbr	%r0, 0, %f15            # encoding: [0xb3,0x99,0x00,0x0f]
-#CHECK: cfdbr	%r0, 15, %f0            # encoding: [0xb3,0x99,0xf0,0x00]
-#CHECK: cfdbr	%r4, 5, %f6             # encoding: [0xb3,0x99,0x50,0x46]
-#CHECK: cfdbr	%r15, 0, %f0            # encoding: [0xb3,0x99,0x00,0xf0]
-
-	cfdbr	%r0, 0, %f0
-	cfdbr	%r0, 0, %f15
-	cfdbr	%r0, 15, %f0
-	cfdbr	%r4, 5, %f6
-	cfdbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cfdbr-02.s b/test/MC/SystemZ/insn-cfdbr-02.s
deleted file mode 100644
index 00175955f255..000000000000
--- a/test/MC/SystemZ/insn-cfdbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cfdbr	%r0, 0, %r0
-#CHECK: error: invalid register
-#CHECK: cfdbr	%f0, 0, %f0
-#CHECK: error: invalid operand
-#CHECK: cfdbr	%r0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: cfdbr	%r0, 16, %f0
-
-	cfdbr	%r0, 0, %r0
-	cfdbr	%f0, 0, %f0
-	cfdbr	%r0, -1, %f0
-	cfdbr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cfebr-01.s b/test/MC/SystemZ/insn-cfebr-01.s
deleted file mode 100644
index 6f7ab2c0e038..000000000000
--- a/test/MC/SystemZ/insn-cfebr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cfebr	%r0, 0, %f0             # encoding: [0xb3,0x98,0x00,0x00]
-#CHECK: cfebr	%r0, 0, %f15            # encoding: [0xb3,0x98,0x00,0x0f]
-#CHECK: cfebr	%r0, 15, %f0            # encoding: [0xb3,0x98,0xf0,0x00]
-#CHECK: cfebr	%r4, 5, %f6             # encoding: [0xb3,0x98,0x50,0x46]
-#CHECK: cfebr	%r15, 0, %f0            # encoding: [0xb3,0x98,0x00,0xf0]
-
-	cfebr	%r0, 0, %f0
-	cfebr	%r0, 0, %f15
-	cfebr	%r0, 15, %f0
-	cfebr	%r4, 5, %f6
-	cfebr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cfebr-02.s b/test/MC/SystemZ/insn-cfebr-02.s
deleted file mode 100644
index c3c5adaed2ca..000000000000
--- a/test/MC/SystemZ/insn-cfebr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cfebr	%r0, 0, %r0
-#CHECK: error: invalid register
-#CHECK: cfebr	%f0, 0, %f0
-#CHECK: error: invalid operand
-#CHECK: cfebr	%r0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: cfebr	%r0, 16, %f0
-
-	cfebr	%r0, 0, %r0
-	cfebr	%f0, 0, %f0
-	cfebr	%r0, -1, %f0
-	cfebr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cfi-01.s b/test/MC/SystemZ/insn-cfi-01.s
deleted file mode 100644
index 52e34c02e896..000000000000
--- a/test/MC/SystemZ/insn-cfi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cfi	%r0, -2147483648        # encoding: [0xc2,0x0d,0x80,0x00,0x00,0x00]
-#CHECK: cfi	%r0, -1                 # encoding: [0xc2,0x0d,0xff,0xff,0xff,0xff]
-#CHECK: cfi	%r0, 0                  # encoding: [0xc2,0x0d,0x00,0x00,0x00,0x00]
-#CHECK: cfi	%r0, 1                  # encoding: [0xc2,0x0d,0x00,0x00,0x00,0x01]
-#CHECK: cfi	%r0, 2147483647         # encoding: [0xc2,0x0d,0x7f,0xff,0xff,0xff]
-#CHECK: cfi	%r15, 0                 # encoding: [0xc2,0xfd,0x00,0x00,0x00,0x00]
-
-	cfi	%r0, -1 << 31
-	cfi	%r0, -1
-	cfi	%r0, 0
-	cfi	%r0, 1
-	cfi	%r0, (1 << 31) - 1
-	cfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-cfi-02.s b/test/MC/SystemZ/insn-cfi-02.s
deleted file mode 100644
index cf7c72621b74..000000000000
--- a/test/MC/SystemZ/insn-cfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cfi	%r0, (-1 << 31) - 1
-#CHECK: error: invalid operand
-#CHECK: cfi	%r0, (1 << 31)
-
-	cfi	%r0, (-1 << 31) - 1
-	cfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-cfxbr-01.s b/test/MC/SystemZ/insn-cfxbr-01.s
deleted file mode 100644
index c509106b5430..000000000000
--- a/test/MC/SystemZ/insn-cfxbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cfxbr	%r0, 0, %f0             # encoding: [0xb3,0x9a,0x00,0x00]
-#CHECK: cfxbr	%r0, 0, %f13            # encoding: [0xb3,0x9a,0x00,0x0d]
-#CHECK: cfxbr	%r0, 15, %f0            # encoding: [0xb3,0x9a,0xf0,0x00]
-#CHECK: cfxbr	%r4, 5, %f8             # encoding: [0xb3,0x9a,0x50,0x48]
-#CHECK: cfxbr	%r15, 0, %f0            # encoding: [0xb3,0x9a,0x00,0xf0]
-
-	cfxbr	%r0, 0, %f0
-	cfxbr	%r0, 0, %f13
-	cfxbr	%r0, 15, %f0
-	cfxbr	%r4, 5, %f8
-	cfxbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cfxbr-02.s b/test/MC/SystemZ/insn-cfxbr-02.s
deleted file mode 100644
index 3802c514332b..000000000000
--- a/test/MC/SystemZ/insn-cfxbr-02.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cfxbr	%r0, 0, %r0
-#CHECK: error: invalid register
-#CHECK: cfxbr	%f0, 0, %f0
-#CHECK: error: invalid operand
-#CHECK: cfxbr	%r0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: cfxbr	%r0, 16, %f0
-#CHECK: error: invalid register
-#CHECK: cfxbr	%r0, 0, %f2
-#CHECK: error: invalid register
-#CHECK: cfxbr	%r0, 0, %f14
-
-	cfxbr	%r0, 0, %r0
-	cfxbr	%f0, 0, %f0
-	cfxbr	%r0, -1, %f0
-	cfxbr	%r0, 16, %f0
-	cfxbr	%r0, 0, %f2
-	cfxbr	%r0, 0, %f14
-
diff --git a/test/MC/SystemZ/insn-cg-01.s b/test/MC/SystemZ/insn-cg-01.s
deleted file mode 100644
index 1eb185f0ec60..000000000000
--- a/test/MC/SystemZ/insn-cg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x20]
-#CHECK: cg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x20]
-#CHECK: cg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x20]
-#CHECK: cg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x20]
-#CHECK: cg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x20]
-#CHECK: cg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x20]
-#CHECK: cg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x20]
-#CHECK: cg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x20]
-#CHECK: cg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x20]
-#CHECK: cg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x20]
-
-	cg	%r0, -524288
-	cg	%r0, -1
-	cg	%r0, 0
-	cg	%r0, 1
-	cg	%r0, 524287
-	cg	%r0, 0(%r1)
-	cg	%r0, 0(%r15)
-	cg	%r0, 524287(%r1,%r15)
-	cg	%r0, 524287(%r15,%r1)
-	cg	%r15, 0
diff --git a/test/MC/SystemZ/insn-cg-02.s b/test/MC/SystemZ/insn-cg-02.s
deleted file mode 100644
index e093ccd75112..000000000000
--- a/test/MC/SystemZ/insn-cg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: cg	%r0, 524288
-
-	cg	%r0, -524289
-	cg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cgdbr-01.s b/test/MC/SystemZ/insn-cgdbr-01.s
deleted file mode 100644
index 718f50a9ea9a..000000000000
--- a/test/MC/SystemZ/insn-cgdbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgdbr	%r0, 0, %f0             # encoding: [0xb3,0xa9,0x00,0x00]
-#CHECK: cgdbr	%r0, 0, %f15            # encoding: [0xb3,0xa9,0x00,0x0f]
-#CHECK: cgdbr	%r0, 15, %f0            # encoding: [0xb3,0xa9,0xf0,0x00]
-#CHECK: cgdbr	%r4, 5, %f6             # encoding: [0xb3,0xa9,0x50,0x46]
-#CHECK: cgdbr	%r15, 0, %f0            # encoding: [0xb3,0xa9,0x00,0xf0]
-
-	cgdbr	%r0, 0, %f0
-	cgdbr	%r0, 0, %f15
-	cgdbr	%r0, 15, %f0
-	cgdbr	%r4, 5, %f6
-	cgdbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cgdbr-02.s b/test/MC/SystemZ/insn-cgdbr-02.s
deleted file mode 100644
index 3a3e01fde0f4..000000000000
--- a/test/MC/SystemZ/insn-cgdbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cgdbr	%r0, 0, %r0
-#CHECK: error: invalid register
-#CHECK: cgdbr	%f0, 0, %f0
-#CHECK: error: invalid operand
-#CHECK: cgdbr	%r0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: cgdbr	%r0, 16, %f0
-
-	cgdbr	%r0, 0, %r0
-	cgdbr	%f0, 0, %f0
-	cgdbr	%r0, -1, %f0
-	cgdbr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cgebr-01.s b/test/MC/SystemZ/insn-cgebr-01.s
deleted file mode 100644
index dc6a7dbfafdb..000000000000
--- a/test/MC/SystemZ/insn-cgebr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgebr	%r0, 0, %f0             # encoding: [0xb3,0xa8,0x00,0x00]
-#CHECK: cgebr	%r0, 0, %f15            # encoding: [0xb3,0xa8,0x00,0x0f]
-#CHECK: cgebr	%r0, 15, %f0            # encoding: [0xb3,0xa8,0xf0,0x00]
-#CHECK: cgebr	%r4, 5, %f6             # encoding: [0xb3,0xa8,0x50,0x46]
-#CHECK: cgebr	%r15, 0, %f0            # encoding: [0xb3,0xa8,0x00,0xf0]
-
-	cgebr	%r0, 0, %f0
-	cgebr	%r0, 0, %f15
-	cgebr	%r0, 15, %f0
-	cgebr	%r4, 5, %f6
-	cgebr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cgebr-02.s b/test/MC/SystemZ/insn-cgebr-02.s
deleted file mode 100644
index 9b817a417281..000000000000
--- a/test/MC/SystemZ/insn-cgebr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cgebr	%r0, 0, %r0
-#CHECK: error: invalid register
-#CHECK: cgebr	%f0, 0, %f0
-#CHECK: error: invalid operand
-#CHECK: cgebr	%r0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: cgebr	%r0, 16, %f0
-
-	cgebr	%r0, 0, %r0
-	cgebr	%f0, 0, %f0
-	cgebr	%r0, -1, %f0
-	cgebr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cgf-01.s b/test/MC/SystemZ/insn-cgf-01.s
deleted file mode 100644
index 03c439f1f5dc..000000000000
--- a/test/MC/SystemZ/insn-cgf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x30]
-#CHECK: cgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x30]
-#CHECK: cgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x30]
-#CHECK: cgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x30]
-#CHECK: cgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x30]
-#CHECK: cgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x30]
-#CHECK: cgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x30]
-#CHECK: cgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x30]
-#CHECK: cgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x30]
-#CHECK: cgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x30]
-
-	cgf	%r0, -524288
-	cgf	%r0, -1
-	cgf	%r0, 0
-	cgf	%r0, 1
-	cgf	%r0, 524287
-	cgf	%r0, 0(%r1)
-	cgf	%r0, 0(%r15)
-	cgf	%r0, 524287(%r1,%r15)
-	cgf	%r0, 524287(%r15,%r1)
-	cgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-cgf-02.s b/test/MC/SystemZ/insn-cgf-02.s
deleted file mode 100644
index 7171c6e38816..000000000000
--- a/test/MC/SystemZ/insn-cgf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: cgf	%r0, 524288
-
-	cgf	%r0, -524289
-	cgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cgfi-01.s b/test/MC/SystemZ/insn-cgfi-01.s
deleted file mode 100644
index d6f72d5d6712..000000000000
--- a/test/MC/SystemZ/insn-cgfi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgfi	%r0, -2147483648        # encoding: [0xc2,0x0c,0x80,0x00,0x00,0x00]
-#CHECK: cgfi	%r0, -1                 # encoding: [0xc2,0x0c,0xff,0xff,0xff,0xff]
-#CHECK: cgfi	%r0, 0                  # encoding: [0xc2,0x0c,0x00,0x00,0x00,0x00]
-#CHECK: cgfi	%r0, 1                  # encoding: [0xc2,0x0c,0x00,0x00,0x00,0x01]
-#CHECK: cgfi	%r0, 2147483647         # encoding: [0xc2,0x0c,0x7f,0xff,0xff,0xff]
-#CHECK: cgfi	%r15, 0                 # encoding: [0xc2,0xfc,0x00,0x00,0x00,0x00]
-
-	cgfi	%r0, -1 << 31
-	cgfi	%r0, -1
-	cgfi	%r0, 0
-	cgfi	%r0, 1
-	cgfi	%r0, (1 << 31) - 1
-	cgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-cgfi-02.s b/test/MC/SystemZ/insn-cgfi-02.s
deleted file mode 100644
index 6f72a24e6902..000000000000
--- a/test/MC/SystemZ/insn-cgfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cgfi	%r0, (-1 << 31) - 1
-#CHECK: error: invalid operand
-#CHECK: cgfi	%r0, (1 << 31)
-
-	cgfi	%r0, (-1 << 31) - 1
-	cgfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-cgfr-01.s b/test/MC/SystemZ/insn-cgfr-01.s
deleted file mode 100644
index 6bd1792ea020..000000000000
--- a/test/MC/SystemZ/insn-cgfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgfr	%r0, %r0                # encoding: [0xb9,0x30,0x00,0x00]
-#CHECK: cgfr	%r0, %r15               # encoding: [0xb9,0x30,0x00,0x0f]
-#CHECK: cgfr	%r15, %r0               # encoding: [0xb9,0x30,0x00,0xf0]
-#CHECK: cgfr	%r7, %r8                # encoding: [0xb9,0x30,0x00,0x78]
-
-	cgfr	%r0,%r0
-	cgfr	%r0,%r15
-	cgfr	%r15,%r0
-	cgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-cgfrl-01.s b/test/MC/SystemZ/insn-cgfrl-01.s
deleted file mode 100644
index 2792fb4a93b7..000000000000
--- a/test/MC/SystemZ/insn-cgfrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgfrl	%r0, 2864434397         # encoding: [0xc6,0x0c,0x55,0x5d,0xe6,0x6e]
-#CHECK: cgfrl	%r15, 2864434397        # encoding: [0xc6,0xfc,0x55,0x5d,0xe6,0x6e]
-
-	cgfrl	%r0,0xaabbccdd
-	cgfrl	%r15,0xaabbccdd
-
-#CHECK: cgfrl	%r0, foo                # encoding: [0xc6,0x0c,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: cgfrl	%r15, foo               # encoding: [0xc6,0xfc,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	cgfrl	%r0,foo
-	cgfrl	%r15,foo
-
-#CHECK: cgfrl	%r3, bar+100            # encoding: [0xc6,0x3c,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: cgfrl	%r4, bar+100            # encoding: [0xc6,0x4c,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	cgfrl	%r3,bar+100
-	cgfrl	%r4,bar+100
-
-#CHECK: cgfrl	%r7, frob@PLT           # encoding: [0xc6,0x7c,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: cgfrl	%r8, frob@PLT           # encoding: [0xc6,0x8c,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	cgfrl	%r7,frob@PLT
-	cgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cgh-01.s b/test/MC/SystemZ/insn-cgh-01.s
deleted file mode 100644
index 31c86abdda76..000000000000
--- a/test/MC/SystemZ/insn-cgh-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x34]
-#CHECK: cgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x34]
-#CHECK: cgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x34]
-#CHECK: cgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x34]
-#CHECK: cgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x34]
-#CHECK: cgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x34]
-#CHECK: cgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x34]
-#CHECK: cgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x34]
-#CHECK: cgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x34]
-#CHECK: cgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x34]
-
-	cgh	%r0, -524288
-	cgh	%r0, -1
-	cgh	%r0, 0
-	cgh	%r0, 1
-	cgh	%r0, 524287
-	cgh	%r0, 0(%r1)
-	cgh	%r0, 0(%r15)
-	cgh	%r0, 524287(%r1,%r15)
-	cgh	%r0, 524287(%r15,%r1)
-	cgh	%r15, 0
diff --git a/test/MC/SystemZ/insn-cgh-02.s b/test/MC/SystemZ/insn-cgh-02.s
deleted file mode 100644
index 60e665f2dd85..000000000000
--- a/test/MC/SystemZ/insn-cgh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cgh	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: cgh	%r0, 524288
-
-	cgh	%r0, -524289
-	cgh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cghi-01.s b/test/MC/SystemZ/insn-cghi-01.s
deleted file mode 100644
index 575ad89023a2..000000000000
--- a/test/MC/SystemZ/insn-cghi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cghi	%r0, -32768             # encoding: [0xa7,0x0f,0x80,0x00]
-#CHECK: cghi	%r0, -1                 # encoding: [0xa7,0x0f,0xff,0xff]
-#CHECK: cghi	%r0, 0                  # encoding: [0xa7,0x0f,0x00,0x00]
-#CHECK: cghi	%r0, 1                  # encoding: [0xa7,0x0f,0x00,0x01]
-#CHECK: cghi	%r0, 32767              # encoding: [0xa7,0x0f,0x7f,0xff]
-#CHECK: cghi	%r15, 0                 # encoding: [0xa7,0xff,0x00,0x00]
-
-	cghi	%r0, -32768
-	cghi	%r0, -1
-	cghi	%r0, 0
-	cghi	%r0, 1
-	cghi	%r0, 32767
-	cghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-cghi-02.s b/test/MC/SystemZ/insn-cghi-02.s
deleted file mode 100644
index bd4a52a6b720..000000000000
--- a/test/MC/SystemZ/insn-cghi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cghi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: cghi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: cghi	%r0, foo
-
-	cghi	%r0, -32769
-	cghi	%r0, 32768
-	cghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-cghrl-01.s b/test/MC/SystemZ/insn-cghrl-01.s
deleted file mode 100644
index c48c5ec3efdf..000000000000
--- a/test/MC/SystemZ/insn-cghrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cghrl	%r0, 2864434397         # encoding: [0xc6,0x04,0x55,0x5d,0xe6,0x6e]
-#CHECK: cghrl	%r15, 2864434397        # encoding: [0xc6,0xf4,0x55,0x5d,0xe6,0x6e]
-
-	cghrl	%r0,0xaabbccdd
-	cghrl	%r15,0xaabbccdd
-
-#CHECK: cghrl	%r0, foo                # encoding: [0xc6,0x04,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: cghrl	%r15, foo               # encoding: [0xc6,0xf4,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	cghrl	%r0,foo
-	cghrl	%r15,foo
-
-#CHECK: cghrl	%r3, bar+100            # encoding: [0xc6,0x34,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: cghrl	%r4, bar+100            # encoding: [0xc6,0x44,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	cghrl	%r3,bar+100
-	cghrl	%r4,bar+100
-
-#CHECK: cghrl	%r7, frob@PLT           # encoding: [0xc6,0x74,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: cghrl	%r8, frob@PLT           # encoding: [0xc6,0x84,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	cghrl	%r7,frob@PLT
-	cghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cghsi-01.s b/test/MC/SystemZ/insn-cghsi-01.s
deleted file mode 100644
index 7d67e20b5b70..000000000000
--- a/test/MC/SystemZ/insn-cghsi-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cghsi	0, 0                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x00]
-#CHECK: cghsi	4095, 0                 # encoding: [0xe5,0x58,0x0f,0xff,0x00,0x00]
-#CHECK: cghsi	0, -32768               # encoding: [0xe5,0x58,0x00,0x00,0x80,0x00]
-#CHECK: cghsi	0, -1                   # encoding: [0xe5,0x58,0x00,0x00,0xff,0xff]
-#CHECK: cghsi	0, 0                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x00]
-#CHECK: cghsi	0, 1                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x01]
-#CHECK: cghsi	0, 32767                # encoding: [0xe5,0x58,0x00,0x00,0x7f,0xff]
-#CHECK: cghsi	0(%r1), 42              # encoding: [0xe5,0x58,0x10,0x00,0x00,0x2a]
-#CHECK: cghsi	0(%r15), 42             # encoding: [0xe5,0x58,0xf0,0x00,0x00,0x2a]
-#CHECK: cghsi	4095(%r1), 42           # encoding: [0xe5,0x58,0x1f,0xff,0x00,0x2a]
-#CHECK: cghsi	4095(%r15), 42          # encoding: [0xe5,0x58,0xff,0xff,0x00,0x2a]
-
-	cghsi	0, 0
-	cghsi	4095, 0
-	cghsi	0, -32768
-	cghsi	0, -1
-	cghsi	0, 0
-	cghsi	0, 1
-	cghsi	0, 32767
-	cghsi	0(%r1), 42
-	cghsi	0(%r15), 42
-	cghsi	4095(%r1), 42
-	cghsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-cghsi-02.s b/test/MC/SystemZ/insn-cghsi-02.s
deleted file mode 100644
index 773ee5cd9e9d..000000000000
--- a/test/MC/SystemZ/insn-cghsi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cghsi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: cghsi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: cghsi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: cghsi	0, -32769
-#CHECK: error: invalid operand
-#CHECK: cghsi	0, 32768
-
-	cghsi	-1, 0
-	cghsi	4096, 0
-	cghsi	0(%r1,%r2), 0
-	cghsi	0, -32769
-	cghsi	0, 32768
diff --git a/test/MC/SystemZ/insn-cgr-01.s b/test/MC/SystemZ/insn-cgr-01.s
deleted file mode 100644
index 334a0f63f82b..000000000000
--- a/test/MC/SystemZ/insn-cgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgr	%r0, %r0                # encoding: [0xb9,0x20,0x00,0x00]
-#CHECK: cgr	%r0, %r15               # encoding: [0xb9,0x20,0x00,0x0f]
-#CHECK: cgr	%r15, %r0               # encoding: [0xb9,0x20,0x00,0xf0]
-#CHECK: cgr	%r7, %r8                # encoding: [0xb9,0x20,0x00,0x78]
-
-	cgr	%r0,%r0
-	cgr	%r0,%r15
-	cgr	%r15,%r0
-	cgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-cgrl-01.s b/test/MC/SystemZ/insn-cgrl-01.s
deleted file mode 100644
index af878cbf450a..000000000000
--- a/test/MC/SystemZ/insn-cgrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgrl	%r0, 2864434397         # encoding: [0xc6,0x08,0x55,0x5d,0xe6,0x6e]
-#CHECK: cgrl	%r15, 2864434397        # encoding: [0xc6,0xf8,0x55,0x5d,0xe6,0x6e]
-
-	cgrl	%r0,0xaabbccdd
-	cgrl	%r15,0xaabbccdd
-
-#CHECK: cgrl	%r0, foo                # encoding: [0xc6,0x08,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: cgrl	%r15, foo               # encoding: [0xc6,0xf8,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	cgrl	%r0,foo
-	cgrl	%r15,foo
-
-#CHECK: cgrl	%r3, bar+100            # encoding: [0xc6,0x38,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: cgrl	%r4, bar+100            # encoding: [0xc6,0x48,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	cgrl	%r3,bar+100
-	cgrl	%r4,bar+100
-
-#CHECK: cgrl	%r7, frob@PLT           # encoding: [0xc6,0x78,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: cgrl	%r8, frob@PLT           # encoding: [0xc6,0x88,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	cgrl	%r7,frob@PLT
-	cgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cgxbr-01.s b/test/MC/SystemZ/insn-cgxbr-01.s
deleted file mode 100644
index 0250b52955af..000000000000
--- a/test/MC/SystemZ/insn-cgxbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cgxbr	%r0, 0, %f0             # encoding: [0xb3,0xaa,0x00,0x00]
-#CHECK: cgxbr	%r0, 0, %f13            # encoding: [0xb3,0xaa,0x00,0x0d]
-#CHECK: cgxbr	%r0, 15, %f0            # encoding: [0xb3,0xaa,0xf0,0x00]
-#CHECK: cgxbr	%r4, 5, %f8             # encoding: [0xb3,0xaa,0x50,0x48]
-#CHECK: cgxbr	%r15, 0, %f0            # encoding: [0xb3,0xaa,0x00,0xf0]
-
-	cgxbr	%r0, 0, %f0
-	cgxbr	%r0, 0, %f13
-	cgxbr	%r0, 15, %f0
-	cgxbr	%r4, 5, %f8
-	cgxbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cgxbr-02.s b/test/MC/SystemZ/insn-cgxbr-02.s
deleted file mode 100644
index 9caab9fc401d..000000000000
--- a/test/MC/SystemZ/insn-cgxbr-02.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cgxbr	%r0, 0, %r0
-#CHECK: error: invalid register
-#CHECK: cgxbr	%f0, 0, %f0
-#CHECK: error: invalid operand
-#CHECK: cgxbr	%r0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: cgxbr	%r0, 16, %f0
-#CHECK: error: invalid register
-#CHECK: cgxbr	%r0, 0, %f2
-#CHECK: error: invalid register
-#CHECK: cgxbr	%r0, 0, %f14
-
-	cgxbr	%r0, 0, %r0
-	cgxbr	%f0, 0, %f0
-	cgxbr	%r0, -1, %f0
-	cgxbr	%r0, 16, %f0
-	cgxbr	%r0, 0, %f2
-	cgxbr	%r0, 0, %f14
-
diff --git a/test/MC/SystemZ/insn-ch-01.s b/test/MC/SystemZ/insn-ch-01.s
deleted file mode 100644
index dfb0b7fcca05..000000000000
--- a/test/MC/SystemZ/insn-ch-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ch	%r0, 0                  # encoding: [0x49,0x00,0x00,0x00]
-#CHECK: ch	%r0, 4095               # encoding: [0x49,0x00,0x0f,0xff]
-#CHECK: ch	%r0, 0(%r1)             # encoding: [0x49,0x00,0x10,0x00]
-#CHECK: ch	%r0, 0(%r15)            # encoding: [0x49,0x00,0xf0,0x00]
-#CHECK: ch	%r0, 4095(%r1,%r15)     # encoding: [0x49,0x01,0xff,0xff]
-#CHECK: ch	%r0, 4095(%r15,%r1)     # encoding: [0x49,0x0f,0x1f,0xff]
-#CHECK: ch	%r15, 0                 # encoding: [0x49,0xf0,0x00,0x00]
-
-	ch	%r0, 0
-	ch	%r0, 4095
-	ch	%r0, 0(%r1)
-	ch	%r0, 0(%r15)
-	ch	%r0, 4095(%r1,%r15)
-	ch	%r0, 4095(%r15,%r1)
-	ch	%r15, 0
diff --git a/test/MC/SystemZ/insn-ch-02.s b/test/MC/SystemZ/insn-ch-02.s
deleted file mode 100644
index 2034c2b8d4e9..000000000000
--- a/test/MC/SystemZ/insn-ch-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ch	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: ch	%r0, 4096
-
-	ch	%r0, -1
-	ch	%r0, 4096
diff --git a/test/MC/SystemZ/insn-chhsi-01.s b/test/MC/SystemZ/insn-chhsi-01.s
deleted file mode 100644
index 0fd50bc5e100..000000000000
--- a/test/MC/SystemZ/insn-chhsi-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: chhsi	0, 0                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x00]
-#CHECK: chhsi	4095, 0                 # encoding: [0xe5,0x54,0x0f,0xff,0x00,0x00]
-#CHECK: chhsi	0, -32768               # encoding: [0xe5,0x54,0x00,0x00,0x80,0x00]
-#CHECK: chhsi	0, -1                   # encoding: [0xe5,0x54,0x00,0x00,0xff,0xff]
-#CHECK: chhsi	0, 0                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x00]
-#CHECK: chhsi	0, 1                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x01]
-#CHECK: chhsi	0, 32767                # encoding: [0xe5,0x54,0x00,0x00,0x7f,0xff]
-#CHECK: chhsi	0(%r1), 42              # encoding: [0xe5,0x54,0x10,0x00,0x00,0x2a]
-#CHECK: chhsi	0(%r15), 42             # encoding: [0xe5,0x54,0xf0,0x00,0x00,0x2a]
-#CHECK: chhsi	4095(%r1), 42           # encoding: [0xe5,0x54,0x1f,0xff,0x00,0x2a]
-#CHECK: chhsi	4095(%r15), 42          # encoding: [0xe5,0x54,0xff,0xff,0x00,0x2a]
-
-	chhsi	0, 0
-	chhsi	4095, 0
-	chhsi	0, -32768
-	chhsi	0, -1
-	chhsi	0, 0
-	chhsi	0, 1
-	chhsi	0, 32767
-	chhsi	0(%r1), 42
-	chhsi	0(%r15), 42
-	chhsi	4095(%r1), 42
-	chhsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-chhsi-02.s b/test/MC/SystemZ/insn-chhsi-02.s
deleted file mode 100644
index 24e8c0cfbd3a..000000000000
--- a/test/MC/SystemZ/insn-chhsi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: chhsi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: chhsi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: chhsi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: chhsi	0, -32769
-#CHECK: error: invalid operand
-#CHECK: chhsi	0, 32768
-
-	chhsi	-1, 0
-	chhsi	4096, 0
-	chhsi	0(%r1,%r2), 0
-	chhsi	0, -32769
-	chhsi	0, 32768
diff --git a/test/MC/SystemZ/insn-chi-01.s b/test/MC/SystemZ/insn-chi-01.s
deleted file mode 100644
index fb44cfc21708..000000000000
--- a/test/MC/SystemZ/insn-chi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: chi	%r0, -32768             # encoding: [0xa7,0x0e,0x80,0x00]
-#CHECK: chi	%r0, -1                 # encoding: [0xa7,0x0e,0xff,0xff]
-#CHECK: chi	%r0, 0                  # encoding: [0xa7,0x0e,0x00,0x00]
-#CHECK: chi	%r0, 1                  # encoding: [0xa7,0x0e,0x00,0x01]
-#CHECK: chi	%r0, 32767              # encoding: [0xa7,0x0e,0x7f,0xff]
-#CHECK: chi	%r15, 0                 # encoding: [0xa7,0xfe,0x00,0x00]
-
-	chi	%r0, -32768
-	chi	%r0, -1
-	chi	%r0, 0
-	chi	%r0, 1
-	chi	%r0, 32767
-	chi	%r15, 0
diff --git a/test/MC/SystemZ/insn-chi-02.s b/test/MC/SystemZ/insn-chi-02.s
deleted file mode 100644
index bb9ffdc7e6b0..000000000000
--- a/test/MC/SystemZ/insn-chi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: chi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: chi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: chi	%r0, foo
-
-	chi	%r0, -32769
-	chi	%r0, 32768
-	chi	%r0, foo
diff --git a/test/MC/SystemZ/insn-chrl-01.s b/test/MC/SystemZ/insn-chrl-01.s
deleted file mode 100644
index c133a326d2b0..000000000000
--- a/test/MC/SystemZ/insn-chrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: chrl	%r0, 2864434397         # encoding: [0xc6,0x05,0x55,0x5d,0xe6,0x6e]
-#CHECK: chrl	%r15, 2864434397        # encoding: [0xc6,0xf5,0x55,0x5d,0xe6,0x6e]
-
-	chrl	%r0,0xaabbccdd
-	chrl	%r15,0xaabbccdd
-
-#CHECK: chrl	%r0, foo                # encoding: [0xc6,0x05,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: chrl	%r15, foo               # encoding: [0xc6,0xf5,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	chrl	%r0,foo
-	chrl	%r15,foo
-
-#CHECK: chrl	%r3, bar+100            # encoding: [0xc6,0x35,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: chrl	%r4, bar+100            # encoding: [0xc6,0x45,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	chrl	%r3,bar+100
-	chrl	%r4,bar+100
-
-#CHECK: chrl	%r7, frob@PLT           # encoding: [0xc6,0x75,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: chrl	%r8, frob@PLT           # encoding: [0xc6,0x85,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	chrl	%r7,frob@PLT
-	chrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-chsi-01.s b/test/MC/SystemZ/insn-chsi-01.s
deleted file mode 100644
index 6d92202185ab..000000000000
--- a/test/MC/SystemZ/insn-chsi-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: chsi	0, 0                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x00]
-#CHECK: chsi	4095, 0                 # encoding: [0xe5,0x5c,0x0f,0xff,0x00,0x00]
-#CHECK: chsi	0, -32768               # encoding: [0xe5,0x5c,0x00,0x00,0x80,0x00]
-#CHECK: chsi	0, -1                   # encoding: [0xe5,0x5c,0x00,0x00,0xff,0xff]
-#CHECK: chsi	0, 0                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x00]
-#CHECK: chsi	0, 1                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x01]
-#CHECK: chsi	0, 32767                # encoding: [0xe5,0x5c,0x00,0x00,0x7f,0xff]
-#CHECK: chsi	0(%r1), 42              # encoding: [0xe5,0x5c,0x10,0x00,0x00,0x2a]
-#CHECK: chsi	0(%r15), 42             # encoding: [0xe5,0x5c,0xf0,0x00,0x00,0x2a]
-#CHECK: chsi	4095(%r1), 42           # encoding: [0xe5,0x5c,0x1f,0xff,0x00,0x2a]
-#CHECK: chsi	4095(%r15), 42          # encoding: [0xe5,0x5c,0xff,0xff,0x00,0x2a]
-
-	chsi	0, 0
-	chsi	4095, 0
-	chsi	0, -32768
-	chsi	0, -1
-	chsi	0, 0
-	chsi	0, 1
-	chsi	0, 32767
-	chsi	0(%r1), 42
-	chsi	0(%r15), 42
-	chsi	4095(%r1), 42
-	chsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-chsi-02.s b/test/MC/SystemZ/insn-chsi-02.s
deleted file mode 100644
index 16ace53d6710..000000000000
--- a/test/MC/SystemZ/insn-chsi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: chsi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: chsi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: chsi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: chsi	0, -32769
-#CHECK: error: invalid operand
-#CHECK: chsi	0, 32768
-
-	chsi	-1, 0
-	chsi	4096, 0
-	chsi	0(%r1,%r2), 0
-	chsi	0, -32769
-	chsi	0, 32768
diff --git a/test/MC/SystemZ/insn-chy-01.s b/test/MC/SystemZ/insn-chy-01.s
deleted file mode 100644
index 9ecc0551cf2b..000000000000
--- a/test/MC/SystemZ/insn-chy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: chy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x79]
-#CHECK: chy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x79]
-#CHECK: chy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x79]
-#CHECK: chy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x79]
-#CHECK: chy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x79]
-#CHECK: chy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x79]
-#CHECK: chy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x79]
-#CHECK: chy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x79]
-#CHECK: chy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x79]
-#CHECK: chy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x79]
-
-	chy	%r0, -524288
-	chy	%r0, -1
-	chy	%r0, 0
-	chy	%r0, 1
-	chy	%r0, 524287
-	chy	%r0, 0(%r1)
-	chy	%r0, 0(%r15)
-	chy	%r0, 524287(%r1,%r15)
-	chy	%r0, 524287(%r15,%r1)
-	chy	%r15, 0
diff --git a/test/MC/SystemZ/insn-chy-02.s b/test/MC/SystemZ/insn-chy-02.s
deleted file mode 100644
index 8ab849f0908c..000000000000
--- a/test/MC/SystemZ/insn-chy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: chy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: chy	%r0, 524288
-
-	chy	%r0, -524289
-	chy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cl-01.s b/test/MC/SystemZ/insn-cl-01.s
deleted file mode 100644
index 7face8f85431..000000000000
--- a/test/MC/SystemZ/insn-cl-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cl	%r0, 0                  # encoding: [0x55,0x00,0x00,0x00]
-#CHECK: cl	%r0, 4095               # encoding: [0x55,0x00,0x0f,0xff]
-#CHECK: cl	%r0, 0(%r1)             # encoding: [0x55,0x00,0x10,0x00]
-#CHECK: cl	%r0, 0(%r15)            # encoding: [0x55,0x00,0xf0,0x00]
-#CHECK: cl	%r0, 4095(%r1,%r15)     # encoding: [0x55,0x01,0xff,0xff]
-#CHECK: cl	%r0, 4095(%r15,%r1)     # encoding: [0x55,0x0f,0x1f,0xff]
-#CHECK: cl	%r15, 0                 # encoding: [0x55,0xf0,0x00,0x00]
-
-	cl	%r0, 0
-	cl	%r0, 4095
-	cl	%r0, 0(%r1)
-	cl	%r0, 0(%r15)
-	cl	%r0, 4095(%r1,%r15)
-	cl	%r0, 4095(%r15,%r1)
-	cl	%r15, 0
diff --git a/test/MC/SystemZ/insn-cl-02.s b/test/MC/SystemZ/insn-cl-02.s
deleted file mode 100644
index ec3e1df996fa..000000000000
--- a/test/MC/SystemZ/insn-cl-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cl	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: cl	%r0, 4096
-
-	cl	%r0, -1
-	cl	%r0, 4096
diff --git a/test/MC/SystemZ/insn-clfhsi-01.s b/test/MC/SystemZ/insn-clfhsi-01.s
deleted file mode 100644
index 910515d96b7c..000000000000
--- a/test/MC/SystemZ/insn-clfhsi-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clfhsi	0, 0                    # encoding: [0xe5,0x5d,0x00,0x00,0x00,0x00]
-#CHECK: clfhsi	4095, 0                 # encoding: [0xe5,0x5d,0x0f,0xff,0x00,0x00]
-#CHECK: clfhsi	0, 65535                # encoding: [0xe5,0x5d,0x00,0x00,0xff,0xff]
-#CHECK: clfhsi	0(%r1), 42              # encoding: [0xe5,0x5d,0x10,0x00,0x00,0x2a]
-#CHECK: clfhsi	0(%r15), 42             # encoding: [0xe5,0x5d,0xf0,0x00,0x00,0x2a]
-#CHECK: clfhsi	4095(%r1), 42           # encoding: [0xe5,0x5d,0x1f,0xff,0x00,0x2a]
-#CHECK: clfhsi	4095(%r15), 42          # encoding: [0xe5,0x5d,0xff,0xff,0x00,0x2a]
-
-	clfhsi	0, 0
-	clfhsi	4095, 0
-	clfhsi	0, 65535
-	clfhsi	0(%r1), 42
-	clfhsi	0(%r15), 42
-	clfhsi	4095(%r1), 42
-	clfhsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-clfhsi-02.s b/test/MC/SystemZ/insn-clfhsi-02.s
deleted file mode 100644
index 4a6f4049c584..000000000000
--- a/test/MC/SystemZ/insn-clfhsi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: clfhsi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: clfhsi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: clfhsi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: clfhsi	0, -1
-#CHECK: error: invalid operand
-#CHECK: clfhsi	0, 65536
-
-	clfhsi	-1, 0
-	clfhsi	4096, 0
-	clfhsi	0(%r1,%r2), 0
-	clfhsi	0, -1
-	clfhsi	0, 65536
diff --git a/test/MC/SystemZ/insn-clfi-01.s b/test/MC/SystemZ/insn-clfi-01.s
deleted file mode 100644
index 4156c7f6e8ea..000000000000
--- a/test/MC/SystemZ/insn-clfi-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clfi	%r0, 0                  # encoding: [0xc2,0x0f,0x00,0x00,0x00,0x00]
-#CHECK: clfi	%r0, 4294967295         # encoding: [0xc2,0x0f,0xff,0xff,0xff,0xff]
-#CHECK: clfi	%r15, 0                 # encoding: [0xc2,0xff,0x00,0x00,0x00,0x00]
-
-	clfi	%r0, 0
-	clfi	%r0, (1 << 32) - 1
-	clfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-clfi-02.s b/test/MC/SystemZ/insn-clfi-02.s
deleted file mode 100644
index 9d3f80630ef9..000000000000
--- a/test/MC/SystemZ/insn-clfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: clfi	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: clfi	%r0, (1 << 32)
-
-	clfi	%r0, -1
-	clfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-clg-01.s b/test/MC/SystemZ/insn-clg-01.s
deleted file mode 100644
index 596bae1c813b..000000000000
--- a/test/MC/SystemZ/insn-clg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x21]
-#CHECK: clg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x21]
-#CHECK: clg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x21]
-#CHECK: clg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x21]
-#CHECK: clg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x21]
-#CHECK: clg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x21]
-#CHECK: clg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x21]
-#CHECK: clg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x21]
-#CHECK: clg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x21]
-#CHECK: clg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x21]
-
-	clg	%r0, -524288
-	clg	%r0, -1
-	clg	%r0, 0
-	clg	%r0, 1
-	clg	%r0, 524287
-	clg	%r0, 0(%r1)
-	clg	%r0, 0(%r15)
-	clg	%r0, 524287(%r1,%r15)
-	clg	%r0, 524287(%r15,%r1)
-	clg	%r15, 0
diff --git a/test/MC/SystemZ/insn-clg-02.s b/test/MC/SystemZ/insn-clg-02.s
deleted file mode 100644
index a17aab52049f..000000000000
--- a/test/MC/SystemZ/insn-clg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: clg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: clg	%r0, 524288
-
-	clg	%r0, -524289
-	clg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-clgf-01.s b/test/MC/SystemZ/insn-clgf-01.s
deleted file mode 100644
index 003ba838028e..000000000000
--- a/test/MC/SystemZ/insn-clgf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x31]
-#CHECK: clgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x31]
-#CHECK: clgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x31]
-#CHECK: clgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x31]
-#CHECK: clgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x31]
-#CHECK: clgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x31]
-#CHECK: clgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x31]
-#CHECK: clgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x31]
-#CHECK: clgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x31]
-#CHECK: clgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x31]
-
-	clgf	%r0, -524288
-	clgf	%r0, -1
-	clgf	%r0, 0
-	clgf	%r0, 1
-	clgf	%r0, 524287
-	clgf	%r0, 0(%r1)
-	clgf	%r0, 0(%r15)
-	clgf	%r0, 524287(%r1,%r15)
-	clgf	%r0, 524287(%r15,%r1)
-	clgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-clgf-02.s b/test/MC/SystemZ/insn-clgf-02.s
deleted file mode 100644
index 9de49ae79b6a..000000000000
--- a/test/MC/SystemZ/insn-clgf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: clgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: clgf	%r0, 524288
-
-	clgf	%r0, -524289
-	clgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-clgfi-01.s b/test/MC/SystemZ/insn-clgfi-01.s
deleted file mode 100644
index dbf4a0e8f0a7..000000000000
--- a/test/MC/SystemZ/insn-clgfi-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clgfi	%r0, 0                  # encoding: [0xc2,0x0e,0x00,0x00,0x00,0x00]
-#CHECK: clgfi	%r0, 4294967295         # encoding: [0xc2,0x0e,0xff,0xff,0xff,0xff]
-#CHECK: clgfi	%r15, 0                 # encoding: [0xc2,0xfe,0x00,0x00,0x00,0x00]
-
-	clgfi	%r0, 0
-	clgfi	%r0, (1 << 32) - 1
-	clgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-clgfi-02.s b/test/MC/SystemZ/insn-clgfi-02.s
deleted file mode 100644
index 3f2db33d8366..000000000000
--- a/test/MC/SystemZ/insn-clgfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: clgfi	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: clgfi	%r0, (1 << 32)
-
-	clgfi	%r0, -1
-	clgfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-clgfr-01.s b/test/MC/SystemZ/insn-clgfr-01.s
deleted file mode 100644
index 37f1e24452de..000000000000
--- a/test/MC/SystemZ/insn-clgfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clgfr	%r0, %r0                # encoding: [0xb9,0x31,0x00,0x00]
-#CHECK: clgfr	%r0, %r15               # encoding: [0xb9,0x31,0x00,0x0f]
-#CHECK: clgfr	%r15, %r0               # encoding: [0xb9,0x31,0x00,0xf0]
-#CHECK: clgfr	%r7, %r8                # encoding: [0xb9,0x31,0x00,0x78]
-
-	clgfr	%r0,%r0
-	clgfr	%r0,%r15
-	clgfr	%r15,%r0
-	clgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-clgfrl-01.s b/test/MC/SystemZ/insn-clgfrl-01.s
deleted file mode 100644
index 6fc6d5eb3bad..000000000000
--- a/test/MC/SystemZ/insn-clgfrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clgfrl	%r0, 2864434397         # encoding: [0xc6,0x0e,0x55,0x5d,0xe6,0x6e]
-#CHECK: clgfrl	%r15, 2864434397        # encoding: [0xc6,0xfe,0x55,0x5d,0xe6,0x6e]
-
-	clgfrl	%r0,0xaabbccdd
-	clgfrl	%r15,0xaabbccdd
-
-#CHECK: clgfrl	%r0, foo                # encoding: [0xc6,0x0e,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: clgfrl	%r15, foo               # encoding: [0xc6,0xfe,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	clgfrl	%r0,foo
-	clgfrl	%r15,foo
-
-#CHECK: clgfrl	%r3, bar+100            # encoding: [0xc6,0x3e,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: clgfrl	%r4, bar+100            # encoding: [0xc6,0x4e,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	clgfrl	%r3,bar+100
-	clgfrl	%r4,bar+100
-
-#CHECK: clgfrl	%r7, frob@PLT           # encoding: [0xc6,0x7e,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: clgfrl	%r8, frob@PLT           # encoding: [0xc6,0x8e,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	clgfrl	%r7,frob@PLT
-	clgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-clghrl-01.s b/test/MC/SystemZ/insn-clghrl-01.s
deleted file mode 100644
index 41c2580abde5..000000000000
--- a/test/MC/SystemZ/insn-clghrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clghrl	%r0, 2864434397         # encoding: [0xc6,0x06,0x55,0x5d,0xe6,0x6e]
-#CHECK: clghrl	%r15, 2864434397        # encoding: [0xc6,0xf6,0x55,0x5d,0xe6,0x6e]
-
-	clghrl	%r0,0xaabbccdd
-	clghrl	%r15,0xaabbccdd
-
-#CHECK: clghrl	%r0, foo                # encoding: [0xc6,0x06,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: clghrl	%r15, foo               # encoding: [0xc6,0xf6,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	clghrl	%r0,foo
-	clghrl	%r15,foo
-
-#CHECK: clghrl	%r3, bar+100            # encoding: [0xc6,0x36,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: clghrl	%r4, bar+100            # encoding: [0xc6,0x46,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	clghrl	%r3,bar+100
-	clghrl	%r4,bar+100
-
-#CHECK: clghrl	%r7, frob@PLT           # encoding: [0xc6,0x76,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: clghrl	%r8, frob@PLT           # encoding: [0xc6,0x86,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	clghrl	%r7,frob@PLT
-	clghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-clghsi-01.s b/test/MC/SystemZ/insn-clghsi-01.s
deleted file mode 100644
index 05e0c585c0a9..000000000000
--- a/test/MC/SystemZ/insn-clghsi-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clghsi	0, 0                    # encoding: [0xe5,0x59,0x00,0x00,0x00,0x00]
-#CHECK: clghsi	4095, 0                 # encoding: [0xe5,0x59,0x0f,0xff,0x00,0x00]
-#CHECK: clghsi	0, 65535                # encoding: [0xe5,0x59,0x00,0x00,0xff,0xff]
-#CHECK: clghsi	0(%r1), 42              # encoding: [0xe5,0x59,0x10,0x00,0x00,0x2a]
-#CHECK: clghsi	0(%r15), 42             # encoding: [0xe5,0x59,0xf0,0x00,0x00,0x2a]
-#CHECK: clghsi	4095(%r1), 42           # encoding: [0xe5,0x59,0x1f,0xff,0x00,0x2a]
-#CHECK: clghsi	4095(%r15), 42          # encoding: [0xe5,0x59,0xff,0xff,0x00,0x2a]
-
-	clghsi	0, 0
-	clghsi	4095, 0
-	clghsi	0, 65535
-	clghsi	0(%r1), 42
-	clghsi	0(%r15), 42
-	clghsi	4095(%r1), 42
-	clghsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-clghsi-02.s b/test/MC/SystemZ/insn-clghsi-02.s
deleted file mode 100644
index f0361280705d..000000000000
--- a/test/MC/SystemZ/insn-clghsi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: clghsi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: clghsi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: clghsi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: clghsi	0, -1
-#CHECK: error: invalid operand
-#CHECK: clghsi	0, 65536
-
-	clghsi	-1, 0
-	clghsi	4096, 0
-	clghsi	0(%r1,%r2), 0
-	clghsi	0, -1
-	clghsi	0, 65536
diff --git a/test/MC/SystemZ/insn-clgr-01.s b/test/MC/SystemZ/insn-clgr-01.s
deleted file mode 100644
index 7e9d2ad43988..000000000000
--- a/test/MC/SystemZ/insn-clgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clgr	%r0, %r0                # encoding: [0xb9,0x21,0x00,0x00]
-#CHECK: clgr	%r0, %r15               # encoding: [0xb9,0x21,0x00,0x0f]
-#CHECK: clgr	%r15, %r0               # encoding: [0xb9,0x21,0x00,0xf0]
-#CHECK: clgr	%r7, %r8                # encoding: [0xb9,0x21,0x00,0x78]
-
-	clgr	%r0,%r0
-	clgr	%r0,%r15
-	clgr	%r15,%r0
-	clgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-clgrl-01.s b/test/MC/SystemZ/insn-clgrl-01.s
deleted file mode 100644
index 439bcd94ff89..000000000000
--- a/test/MC/SystemZ/insn-clgrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clgrl	%r0, 2864434397         # encoding: [0xc6,0x0a,0x55,0x5d,0xe6,0x6e]
-#CHECK: clgrl	%r15, 2864434397        # encoding: [0xc6,0xfa,0x55,0x5d,0xe6,0x6e]
-
-	clgrl	%r0,0xaabbccdd
-	clgrl	%r15,0xaabbccdd
-
-#CHECK: clgrl	%r0, foo                # encoding: [0xc6,0x0a,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: clgrl	%r15, foo               # encoding: [0xc6,0xfa,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	clgrl	%r0,foo
-	clgrl	%r15,foo
-
-#CHECK: clgrl	%r3, bar+100            # encoding: [0xc6,0x3a,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: clgrl	%r4, bar+100            # encoding: [0xc6,0x4a,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	clgrl	%r3,bar+100
-	clgrl	%r4,bar+100
-
-#CHECK: clgrl	%r7, frob@PLT           # encoding: [0xc6,0x7a,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: clgrl	%r8, frob@PLT           # encoding: [0xc6,0x8a,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	clgrl	%r7,frob@PLT
-	clgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-clhhsi-01.s b/test/MC/SystemZ/insn-clhhsi-01.s
deleted file mode 100644
index ae72ffa018ad..000000000000
--- a/test/MC/SystemZ/insn-clhhsi-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clhhsi	0, 0                    # encoding: [0xe5,0x55,0x00,0x00,0x00,0x00]
-#CHECK: clhhsi	4095, 0                 # encoding: [0xe5,0x55,0x0f,0xff,0x00,0x00]
-#CHECK: clhhsi	0, 65535                # encoding: [0xe5,0x55,0x00,0x00,0xff,0xff]
-#CHECK: clhhsi	0(%r1), 42              # encoding: [0xe5,0x55,0x10,0x00,0x00,0x2a]
-#CHECK: clhhsi	0(%r15), 42             # encoding: [0xe5,0x55,0xf0,0x00,0x00,0x2a]
-#CHECK: clhhsi	4095(%r1), 42           # encoding: [0xe5,0x55,0x1f,0xff,0x00,0x2a]
-#CHECK: clhhsi	4095(%r15), 42          # encoding: [0xe5,0x55,0xff,0xff,0x00,0x2a]
-
-	clhhsi	0, 0
-	clhhsi	4095, 0
-	clhhsi	0, 65535
-	clhhsi	0(%r1), 42
-	clhhsi	0(%r15), 42
-	clhhsi	4095(%r1), 42
-	clhhsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-clhhsi-02.s b/test/MC/SystemZ/insn-clhhsi-02.s
deleted file mode 100644
index bbdf7cd0da6c..000000000000
--- a/test/MC/SystemZ/insn-clhhsi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: clhhsi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: clhhsi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: clhhsi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: clhhsi	0, -1
-#CHECK: error: invalid operand
-#CHECK: clhhsi	0, 65536
-
-	clhhsi	-1, 0
-	clhhsi	4096, 0
-	clhhsi	0(%r1,%r2), 0
-	clhhsi	0, -1
-	clhhsi	0, 65536
diff --git a/test/MC/SystemZ/insn-clhrl-01.s b/test/MC/SystemZ/insn-clhrl-01.s
deleted file mode 100644
index b424de8f6678..000000000000
--- a/test/MC/SystemZ/insn-clhrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clhrl	%r0, 2864434397         # encoding: [0xc6,0x07,0x55,0x5d,0xe6,0x6e]
-#CHECK: clhrl	%r15, 2864434397        # encoding: [0xc6,0xf7,0x55,0x5d,0xe6,0x6e]
-
-	clhrl	%r0,0xaabbccdd
-	clhrl	%r15,0xaabbccdd
-
-#CHECK: clhrl	%r0, foo                # encoding: [0xc6,0x07,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: clhrl	%r15, foo               # encoding: [0xc6,0xf7,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	clhrl	%r0,foo
-	clhrl	%r15,foo
-
-#CHECK: clhrl	%r3, bar+100            # encoding: [0xc6,0x37,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: clhrl	%r4, bar+100            # encoding: [0xc6,0x47,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	clhrl	%r3,bar+100
-	clhrl	%r4,bar+100
-
-#CHECK: clhrl	%r7, frob@PLT           # encoding: [0xc6,0x77,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: clhrl	%r8, frob@PLT           # encoding: [0xc6,0x87,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	clhrl	%r7,frob@PLT
-	clhrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cli-01.s b/test/MC/SystemZ/insn-cli-01.s
deleted file mode 100644
index 23bccfab4b79..000000000000
--- a/test/MC/SystemZ/insn-cli-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cli	0, 0                    # encoding: [0x95,0x00,0x00,0x00]
-#CHECK: cli	4095, 0                 # encoding: [0x95,0x00,0x0f,0xff]
-#CHECK: cli	0, 255                  # encoding: [0x95,0xff,0x00,0x00]
-#CHECK: cli	0(%r1), 42              # encoding: [0x95,0x2a,0x10,0x00]
-#CHECK: cli	0(%r15), 42             # encoding: [0x95,0x2a,0xf0,0x00]
-#CHECK: cli	4095(%r1), 42           # encoding: [0x95,0x2a,0x1f,0xff]
-#CHECK: cli	4095(%r15), 42          # encoding: [0x95,0x2a,0xff,0xff]
-
-	cli	0, 0
-	cli	4095, 0
-	cli	0, 255
-	cli	0(%r1), 42
-	cli	0(%r15), 42
-	cli	4095(%r1), 42
-	cli	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-cli-02.s b/test/MC/SystemZ/insn-cli-02.s
deleted file mode 100644
index 7fe6fdabfd32..000000000000
--- a/test/MC/SystemZ/insn-cli-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cli	-1, 0
-#CHECK: error: invalid operand
-#CHECK: cli	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: cli	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: cli	0, -1
-#CHECK: error: invalid operand
-#CHECK: cli	0, 256
-
-	cli	-1, 0
-	cli	4096, 0
-	cli	0(%r1,%r2), 0
-	cli	0, -1
-	cli	0, 256
diff --git a/test/MC/SystemZ/insn-cliy-01.s b/test/MC/SystemZ/insn-cliy-01.s
deleted file mode 100644
index 1a26f605d8b1..000000000000
--- a/test/MC/SystemZ/insn-cliy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cliy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x55]
-#CHECK: cliy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x55]
-#CHECK: cliy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x55]
-#CHECK: cliy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x55]
-#CHECK: cliy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x55]
-#CHECK: cliy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x55]
-#CHECK: cliy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x55]
-#CHECK: cliy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x55]
-#CHECK: cliy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x55]
-#CHECK: cliy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x55]
-
-	cliy	-524288, 0
-	cliy	-1, 0
-	cliy	0, 0
-	cliy	1, 0
-	cliy	524287, 0
-	cliy	0, 255
-	cliy	0(%r1), 42
-	cliy	0(%r15), 42
-	cliy	524287(%r1), 42
-	cliy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-cliy-02.s b/test/MC/SystemZ/insn-cliy-02.s
deleted file mode 100644
index 3e8056315d78..000000000000
--- a/test/MC/SystemZ/insn-cliy-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cliy	-524289, 0
-#CHECK: error: invalid operand
-#CHECK: cliy	524288, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: cliy	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: cliy	0, -1
-#CHECK: error: invalid operand
-#CHECK: cliy	0, 256
-
-	cliy	-524289, 0
-	cliy	524288, 0
-	cliy	0(%r1,%r2), 0
-	cliy	0, -1
-	cliy	0, 256
diff --git a/test/MC/SystemZ/insn-clr-01.s b/test/MC/SystemZ/insn-clr-01.s
deleted file mode 100644
index d187d4e3e823..000000000000
--- a/test/MC/SystemZ/insn-clr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clr	%r0, %r0                # encoding: [0x15,0x00]
-#CHECK: clr	%r0, %r15               # encoding: [0x15,0x0f]
-#CHECK: clr	%r15, %r0               # encoding: [0x15,0xf0]
-#CHECK: clr	%r7, %r8                # encoding: [0x15,0x78]
-
-	clr	%r0,%r0
-	clr	%r0,%r15
-	clr	%r15,%r0
-	clr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-clrl-01.s b/test/MC/SystemZ/insn-clrl-01.s
deleted file mode 100644
index 4c6e649b439d..000000000000
--- a/test/MC/SystemZ/insn-clrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: clrl	%r0, 2864434397         # encoding: [0xc6,0x0f,0x55,0x5d,0xe6,0x6e]
-#CHECK: clrl	%r15, 2864434397        # encoding: [0xc6,0xff,0x55,0x5d,0xe6,0x6e]
-
-	clrl	%r0,0xaabbccdd
-	clrl	%r15,0xaabbccdd
-
-#CHECK: clrl	%r0, foo                # encoding: [0xc6,0x0f,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: clrl	%r15, foo               # encoding: [0xc6,0xff,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	clrl	%r0,foo
-	clrl	%r15,foo
-
-#CHECK: clrl	%r3, bar+100            # encoding: [0xc6,0x3f,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: clrl	%r4, bar+100            # encoding: [0xc6,0x4f,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	clrl	%r3,bar+100
-	clrl	%r4,bar+100
-
-#CHECK: clrl	%r7, frob@PLT           # encoding: [0xc6,0x7f,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: clrl	%r8, frob@PLT           # encoding: [0xc6,0x8f,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	clrl	%r7,frob@PLT
-	clrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cly-01.s b/test/MC/SystemZ/insn-cly-01.s
deleted file mode 100644
index 8fb4af683e60..000000000000
--- a/test/MC/SystemZ/insn-cly-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x55]
-#CHECK: cly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x55]
-#CHECK: cly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x55]
-#CHECK: cly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x55]
-#CHECK: cly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x55]
-#CHECK: cly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x55]
-#CHECK: cly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x55]
-#CHECK: cly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x55]
-#CHECK: cly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x55]
-#CHECK: cly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x55]
-
-	cly	%r0, -524288
-	cly	%r0, -1
-	cly	%r0, 0
-	cly	%r0, 1
-	cly	%r0, 524287
-	cly	%r0, 0(%r1)
-	cly	%r0, 0(%r15)
-	cly	%r0, 524287(%r1,%r15)
-	cly	%r0, 524287(%r15,%r1)
-	cly	%r15, 0
diff --git a/test/MC/SystemZ/insn-cly-02.s b/test/MC/SystemZ/insn-cly-02.s
deleted file mode 100644
index 23f37a96cc4a..000000000000
--- a/test/MC/SystemZ/insn-cly-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cly	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: cly	%r0, 524288
-
-	cly	%r0, -524289
-	cly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cpsdr-01.s b/test/MC/SystemZ/insn-cpsdr-01.s
deleted file mode 100644
index 23d773dfc9c7..000000000000
--- a/test/MC/SystemZ/insn-cpsdr-01.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cpsdr	%f0, %f0, %f0           # encoding: [0xb3,0x72,0x00,0x00]
-#CHECK: cpsdr	%f0, %f0, %f15          # encoding: [0xb3,0x72,0x00,0x0f]
-#CHECK: cpsdr	%f0, %f15, %f0          # encoding: [0xb3,0x72,0xf0,0x00]
-#CHECK: cpsdr	%f15, %f0, %f0          # encoding: [0xb3,0x72,0x00,0xf0]
-#CHECK: cpsdr	%f1, %f2, %f3           # encoding: [0xb3,0x72,0x20,0x13]
-#CHECK: cpsdr	%f15, %f15, %f15        # encoding: [0xb3,0x72,0xf0,0xff]
-
-	cpsdr	%f0, %f0, %f0
-	cpsdr	%f0, %f0, %f15
-	cpsdr	%f0, %f15, %f0
-	cpsdr	%f15, %f0, %f0
-	cpsdr	%f1, %f2, %f3
-	cpsdr	%f15, %f15, %f15
-
diff --git a/test/MC/SystemZ/insn-cr-01.s b/test/MC/SystemZ/insn-cr-01.s
deleted file mode 100644
index d77e08513fc3..000000000000
--- a/test/MC/SystemZ/insn-cr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cr	%r0, %r0                # encoding: [0x19,0x00]
-#CHECK: cr	%r0, %r15               # encoding: [0x19,0x0f]
-#CHECK: cr	%r15, %r0               # encoding: [0x19,0xf0]
-#CHECK: cr	%r7, %r8                # encoding: [0x19,0x78]
-
-	cr	%r0,%r0
-	cr	%r0,%r15
-	cr	%r15,%r0
-	cr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-crl-01.s b/test/MC/SystemZ/insn-crl-01.s
deleted file mode 100644
index 2451b4c9f829..000000000000
--- a/test/MC/SystemZ/insn-crl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: crl	%r0, 2864434397         # encoding: [0xc6,0x0d,0x55,0x5d,0xe6,0x6e]
-#CHECK: crl	%r15, 2864434397        # encoding: [0xc6,0xfd,0x55,0x5d,0xe6,0x6e]
-
-	crl	%r0,0xaabbccdd
-	crl	%r15,0xaabbccdd
-
-#CHECK: crl	%r0, foo                # encoding: [0xc6,0x0d,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: crl	%r15, foo               # encoding: [0xc6,0xfd,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	crl	%r0,foo
-	crl	%r15,foo
-
-#CHECK: crl	%r3, bar+100            # encoding: [0xc6,0x3d,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: crl	%r4, bar+100            # encoding: [0xc6,0x4d,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	crl	%r3,bar+100
-	crl	%r4,bar+100
-
-#CHECK: crl	%r7, frob@PLT           # encoding: [0xc6,0x7d,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: crl	%r8, frob@PLT           # encoding: [0xc6,0x8d,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	crl	%r7,frob@PLT
-	crl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cs-01.s b/test/MC/SystemZ/insn-cs-01.s
deleted file mode 100644
index 3fc6c34b7404..000000000000
--- a/test/MC/SystemZ/insn-cs-01.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cs	%r0, %r0, 0             # encoding: [0xba,0x00,0x00,0x00]
-#CHECK: cs	%r0, %r0, 4095          # encoding: [0xba,0x00,0x0f,0xff]
-#CHECK: cs	%r0, %r0, 0(%r1)        # encoding: [0xba,0x00,0x10,0x00]
-#CHECK: cs	%r0, %r0, 0(%r15)       # encoding: [0xba,0x00,0xf0,0x00]
-#CHECK: cs	%r0, %r0, 4095(%r1)     # encoding: [0xba,0x00,0x1f,0xff]
-#CHECK: cs	%r0, %r0, 4095(%r15)    # encoding: [0xba,0x00,0xff,0xff]
-#CHECK: cs	%r0, %r15, 0            # encoding: [0xba,0x0f,0x00,0x00]
-#CHECK: cs	%r15, %r0, 0            # encoding: [0xba,0xf0,0x00,0x00]
-
-	cs	%r0, %r0, 0
-	cs	%r0, %r0, 4095
-	cs	%r0, %r0, 0(%r1)
-	cs	%r0, %r0, 0(%r15)
-	cs	%r0, %r0, 4095(%r1)
-	cs	%r0, %r0, 4095(%r15)
-	cs	%r0, %r15, 0
-	cs	%r15, %r0, 0
diff --git a/test/MC/SystemZ/insn-cs-02.s b/test/MC/SystemZ/insn-cs-02.s
deleted file mode 100644
index c22795998dd2..000000000000
--- a/test/MC/SystemZ/insn-cs-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cs	%r0, %r0, -1
-#CHECK: error: invalid operand
-#CHECK: cs	%r0, %r0, 4096
-#CHECK: error: invalid use of indexed addressing
-#CHECK: cs	%r0, %r0, 0(%r1,%r2)
-
-	cs	%r0, %r0, -1
-	cs	%r0, %r0, 4096
-	cs	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-csg-01.s b/test/MC/SystemZ/insn-csg-01.s
deleted file mode 100644
index b0fcfa6450a3..000000000000
--- a/test/MC/SystemZ/insn-csg-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: csg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x30]
-#CHECK: csg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x30]
-#CHECK: csg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x30]
-#CHECK: csg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x30]
-#CHECK: csg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x30]
-#CHECK: csg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x30]
-#CHECK: csg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x30]
-#CHECK: csg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x30]
-#CHECK: csg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x30]
-#CHECK: csg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x30]
-#CHECK: csg	%r15, %r0, 0            # encoding: [0xeb,0xf0,0x00,0x00,0x00,0x30]
-
-	csg	%r0, %r0, -524288
-	csg	%r0, %r0, -1
-	csg	%r0, %r0, 0
-	csg	%r0, %r0, 1
-	csg	%r0, %r0, 524287
-	csg	%r0, %r0, 0(%r1)
-	csg	%r0, %r0, 0(%r15)
-	csg	%r0, %r0, 524287(%r1)
-	csg	%r0, %r0, 524287(%r15)
-	csg	%r0, %r15, 0
-	csg	%r15, %r0, 0
diff --git a/test/MC/SystemZ/insn-csg-02.s b/test/MC/SystemZ/insn-csg-02.s
deleted file mode 100644
index 816b155930bb..000000000000
--- a/test/MC/SystemZ/insn-csg-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: csg	%r0, %r0, -524289
-#CHECK: error: invalid operand
-#CHECK: csg	%r0, %r0, 524288
-#CHECK: error: invalid use of indexed addressing
-#CHECK: csg	%r0, %r0, 0(%r1,%r2)
-
-	csg	%r0, %r0, -524289
-	csg	%r0, %r0, 524288
-	csg	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-csy-01.s b/test/MC/SystemZ/insn-csy-01.s
deleted file mode 100644
index d19b2df703c3..000000000000
--- a/test/MC/SystemZ/insn-csy-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: csy	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x14]
-#CHECK: csy	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x14]
-#CHECK: csy	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x14]
-#CHECK: csy	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x14]
-#CHECK: csy	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x14]
-#CHECK: csy	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x14]
-#CHECK: csy	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x14]
-#CHECK: csy	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x14]
-#CHECK: csy	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x14]
-#CHECK: csy	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x14]
-#CHECK: csy	%r15, %r0, 0            # encoding: [0xeb,0xf0,0x00,0x00,0x00,0x14]
-
-	csy	%r0, %r0, -524288
-	csy	%r0, %r0, -1
-	csy	%r0, %r0, 0
-	csy	%r0, %r0, 1
-	csy	%r0, %r0, 524287
-	csy	%r0, %r0, 0(%r1)
-	csy	%r0, %r0, 0(%r15)
-	csy	%r0, %r0, 524287(%r1)
-	csy	%r0, %r0, 524287(%r15)
-	csy	%r0, %r15, 0
-	csy	%r15, %r0, 0
diff --git a/test/MC/SystemZ/insn-csy-02.s b/test/MC/SystemZ/insn-csy-02.s
deleted file mode 100644
index 3ff795909b43..000000000000
--- a/test/MC/SystemZ/insn-csy-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: csy	%r0, %r0, -524289
-#CHECK: error: invalid operand
-#CHECK: csy	%r0, %r0, 524288
-#CHECK: error: invalid use of indexed addressing
-#CHECK: csy	%r0, %r0, 0(%r1,%r2)
-
-	csy	%r0, %r0, -524289
-	csy	%r0, %r0, 524288
-	csy	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-cxbr-01.s b/test/MC/SystemZ/insn-cxbr-01.s
deleted file mode 100644
index 79527f0c5033..000000000000
--- a/test/MC/SystemZ/insn-cxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cxbr	%f0, %f0                # encoding: [0xb3,0x49,0x00,0x00]
-#CHECK: cxbr	%f0, %f13               # encoding: [0xb3,0x49,0x00,0x0d]
-#CHECK: cxbr	%f8, %f8                # encoding: [0xb3,0x49,0x00,0x88]
-#CHECK: cxbr	%f13, %f0               # encoding: [0xb3,0x49,0x00,0xd0]
-
-	cxbr	%f0, %f0
-	cxbr	%f0, %f13
-	cxbr	%f8, %f8
-	cxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-cxbr-02.s b/test/MC/SystemZ/insn-cxbr-02.s
deleted file mode 100644
index 7aaca91c5f22..000000000000
--- a/test/MC/SystemZ/insn-cxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: cxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: cxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: cxbr	%f14, %f0
-
-	cxbr	%f0, %f2
-	cxbr	%f0, %f14
-	cxbr	%f2, %f0
-	cxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-cxfbr-01.s b/test/MC/SystemZ/insn-cxfbr-01.s
deleted file mode 100644
index 14bafd12071d..000000000000
--- a/test/MC/SystemZ/insn-cxfbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cxfbr	%f0, %r0                # encoding: [0xb3,0x96,0x00,0x00]
-#CHECK: cxfbr	%f0, %r15               # encoding: [0xb3,0x96,0x00,0x0f]
-#CHECK: cxfbr	%f13, %r0               # encoding: [0xb3,0x96,0x00,0xd0]
-#CHECK: cxfbr	%f8, %r7                # encoding: [0xb3,0x96,0x00,0x87]
-#CHECK: cxfbr	%f13, %r15              # encoding: [0xb3,0x96,0x00,0xdf]
-
-	cxfbr	%f0, %r0
-	cxfbr	%f0, %r15
-	cxfbr	%f13, %r0
-	cxfbr	%f8, %r7
-	cxfbr	%f13, %r15
diff --git a/test/MC/SystemZ/insn-cxfbr-02.s b/test/MC/SystemZ/insn-cxfbr-02.s
deleted file mode 100644
index 5343378569c1..000000000000
--- a/test/MC/SystemZ/insn-cxfbr-02.s
+++ /dev/null
@@ -1,22 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cxfbr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: cxfbr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: cxfbr	%f0, %a0
-#CHECK: error: invalid register
-#CHECK: cxfbr	%a0, %r0
-#CHECK: error: invalid register
-#CHECK: cxfbr	%f2, %r0
-#CHECK: error: invalid register
-#CHECK: cxfbr	%f14, %r0
-
-	cxfbr	%r0, %r0
-	cxfbr	%f0, %f0
-	cxfbr	%f0, %a0
-	cxfbr	%a0, %r0
-	cxfbr	%f2, %r0
-	cxfbr	%f14, %r0
diff --git a/test/MC/SystemZ/insn-cxgbr-01.s b/test/MC/SystemZ/insn-cxgbr-01.s
deleted file mode 100644
index 90914b44e9a2..000000000000
--- a/test/MC/SystemZ/insn-cxgbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cxgbr	%f0, %r0                # encoding: [0xb3,0xa6,0x00,0x00]
-#CHECK: cxgbr	%f0, %r15               # encoding: [0xb3,0xa6,0x00,0x0f]
-#CHECK: cxgbr	%f13, %r0               # encoding: [0xb3,0xa6,0x00,0xd0]
-#CHECK: cxgbr	%f8, %r7                # encoding: [0xb3,0xa6,0x00,0x87]
-#CHECK: cxgbr	%f13, %r15              # encoding: [0xb3,0xa6,0x00,0xdf]
-
-	cxgbr	%f0, %r0
-	cxgbr	%f0, %r15
-	cxgbr	%f13, %r0
-	cxgbr	%f8, %r7
-	cxgbr	%f13, %r15
diff --git a/test/MC/SystemZ/insn-cxgbr-02.s b/test/MC/SystemZ/insn-cxgbr-02.s
deleted file mode 100644
index d10664dba7c2..000000000000
--- a/test/MC/SystemZ/insn-cxgbr-02.s
+++ /dev/null
@@ -1,22 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: cxgbr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: cxgbr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: cxgbr	%f0, %a0
-#CHECK: error: invalid register
-#CHECK: cxgbr	%a0, %r0
-#CHECK: error: invalid register
-#CHECK: cxgbr	%f2, %r0
-#CHECK: error: invalid register
-#CHECK: cxgbr	%f14, %r0
-
-	cxgbr	%r0, %r0
-	cxgbr	%f0, %f0
-	cxgbr	%f0, %a0
-	cxgbr	%a0, %r0
-	cxgbr	%f2, %r0
-	cxgbr	%f14, %r0
diff --git a/test/MC/SystemZ/insn-cy-01.s b/test/MC/SystemZ/insn-cy-01.s
deleted file mode 100644
index 5f21b963ff90..000000000000
--- a/test/MC/SystemZ/insn-cy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: cy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x59]
-#CHECK: cy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x59]
-#CHECK: cy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x59]
-#CHECK: cy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x59]
-#CHECK: cy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x59]
-#CHECK: cy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x59]
-#CHECK: cy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x59]
-#CHECK: cy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x59]
-#CHECK: cy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x59]
-#CHECK: cy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x59]
-
-	cy	%r0, -524288
-	cy	%r0, -1
-	cy	%r0, 0
-	cy	%r0, 1
-	cy	%r0, 524287
-	cy	%r0, 0(%r1)
-	cy	%r0, 0(%r15)
-	cy	%r0, 524287(%r1,%r15)
-	cy	%r0, 524287(%r15,%r1)
-	cy	%r15, 0
diff --git a/test/MC/SystemZ/insn-cy-02.s b/test/MC/SystemZ/insn-cy-02.s
deleted file mode 100644
index 1c996cdced35..000000000000
--- a/test/MC/SystemZ/insn-cy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: cy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: cy	%r0, 524288
-
-	cy	%r0, -524289
-	cy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ddb-01.s b/test/MC/SystemZ/insn-ddb-01.s
deleted file mode 100644
index 417af11c08ae..000000000000
--- a/test/MC/SystemZ/insn-ddb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ddb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1d]
-#CHECK: ddb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1d]
-#CHECK: ddb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1d]
-#CHECK: ddb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1d]
-#CHECK: ddb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1d]
-#CHECK: ddb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1d]
-#CHECK: ddb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1d]
-
-	ddb	%f0, 0
-	ddb	%f0, 4095
-	ddb	%f0, 0(%r1)
-	ddb	%f0, 0(%r15)
-	ddb	%f0, 4095(%r1,%r15)
-	ddb	%f0, 4095(%r15,%r1)
-	ddb	%f15, 0
diff --git a/test/MC/SystemZ/insn-ddb-02.s b/test/MC/SystemZ/insn-ddb-02.s
deleted file mode 100644
index c6357d10e429..000000000000
--- a/test/MC/SystemZ/insn-ddb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ddb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: ddb	%f0, 4096
-
-	ddb	%f0, -1
-	ddb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ddbr-01.s b/test/MC/SystemZ/insn-ddbr-01.s
deleted file mode 100644
index 7ee1fee5b375..000000000000
--- a/test/MC/SystemZ/insn-ddbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ddbr	%f0, %f0                # encoding: [0xb3,0x1d,0x00,0x00]
-#CHECK: ddbr	%f0, %f15               # encoding: [0xb3,0x1d,0x00,0x0f]
-#CHECK: ddbr	%f7, %f8                # encoding: [0xb3,0x1d,0x00,0x78]
-#CHECK: ddbr	%f15, %f0               # encoding: [0xb3,0x1d,0x00,0xf0]
-
-	ddbr	%f0, %f0
-	ddbr	%f0, %f15
-	ddbr	%f7, %f8
-	ddbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-deb-01.s b/test/MC/SystemZ/insn-deb-01.s
deleted file mode 100644
index 93cfb024648c..000000000000
--- a/test/MC/SystemZ/insn-deb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: deb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0d]
-#CHECK: deb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0d]
-#CHECK: deb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0d]
-#CHECK: deb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0d]
-#CHECK: deb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0d]
-#CHECK: deb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0d]
-#CHECK: deb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0d]
-
-	deb	%f0, 0
-	deb	%f0, 4095
-	deb	%f0, 0(%r1)
-	deb	%f0, 0(%r15)
-	deb	%f0, 4095(%r1,%r15)
-	deb	%f0, 4095(%r15,%r1)
-	deb	%f15, 0
diff --git a/test/MC/SystemZ/insn-deb-02.s b/test/MC/SystemZ/insn-deb-02.s
deleted file mode 100644
index e4edd4ef86d1..000000000000
--- a/test/MC/SystemZ/insn-deb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: deb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: deb	%f0, 4096
-
-	deb	%f0, -1
-	deb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-debr-01.s b/test/MC/SystemZ/insn-debr-01.s
deleted file mode 100644
index 02ee16c9cf44..000000000000
--- a/test/MC/SystemZ/insn-debr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: debr	%f0, %f0                # encoding: [0xb3,0x0d,0x00,0x00]
-#CHECK: debr	%f0, %f15               # encoding: [0xb3,0x0d,0x00,0x0f]
-#CHECK: debr	%f7, %f8                # encoding: [0xb3,0x0d,0x00,0x78]
-#CHECK: debr	%f15, %f0               # encoding: [0xb3,0x0d,0x00,0xf0]
-
-	debr	%f0, %f0
-	debr	%f0, %f15
-	debr	%f7, %f8
-	debr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-dl-01.s b/test/MC/SystemZ/insn-dl-01.s
deleted file mode 100644
index 50b24e7a00da..000000000000
--- a/test/MC/SystemZ/insn-dl-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dl	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x97]
-#CHECK: dl	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x97]
-#CHECK: dl	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x97]
-#CHECK: dl	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x97]
-#CHECK: dl	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x97]
-#CHECK: dl	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x97]
-#CHECK: dl	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x97]
-#CHECK: dl	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x97]
-#CHECK: dl	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x97]
-#CHECK: dl	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x97]
-
-	dl	%r0, -524288
-	dl	%r0, -1
-	dl	%r0, 0
-	dl	%r0, 1
-	dl	%r0, 524287
-	dl	%r0, 0(%r1)
-	dl	%r0, 0(%r15)
-	dl	%r0, 524287(%r1,%r15)
-	dl	%r0, 524287(%r15,%r1)
-	dl	%r14, 0
diff --git a/test/MC/SystemZ/insn-dl-02.s b/test/MC/SystemZ/insn-dl-02.s
deleted file mode 100644
index 8f9f37320b86..000000000000
--- a/test/MC/SystemZ/insn-dl-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: dl	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: dl	%r0, 524288
-#CHECK: error: invalid register
-#CHECK: dl	%r1, 0
-#CHECK: error: invalid register
-#CHECK: dl	%r15, 0
-
-	dl	%r0, -524289
-	dl	%r0, 524288
-	dl	%r1, 0
-	dl	%r15, 0
diff --git a/test/MC/SystemZ/insn-dlg-01.s b/test/MC/SystemZ/insn-dlg-01.s
deleted file mode 100644
index 8a304f828ec8..000000000000
--- a/test/MC/SystemZ/insn-dlg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dlg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x87]
-#CHECK: dlg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x87]
-#CHECK: dlg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x87]
-#CHECK: dlg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x87]
-#CHECK: dlg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x87]
-#CHECK: dlg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x87]
-#CHECK: dlg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x87]
-#CHECK: dlg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x87]
-#CHECK: dlg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x87]
-#CHECK: dlg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x87]
-
-	dlg	%r0, -524288
-	dlg	%r0, -1
-	dlg	%r0, 0
-	dlg	%r0, 1
-	dlg	%r0, 524287
-	dlg	%r0, 0(%r1)
-	dlg	%r0, 0(%r15)
-	dlg	%r0, 524287(%r1,%r15)
-	dlg	%r0, 524287(%r15,%r1)
-	dlg	%r14, 0
diff --git a/test/MC/SystemZ/insn-dlg-02.s b/test/MC/SystemZ/insn-dlg-02.s
deleted file mode 100644
index cbed8981732f..000000000000
--- a/test/MC/SystemZ/insn-dlg-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: dlg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: dlg	%r0, 524288
-#CHECK: error: invalid register
-#CHECK: dlg	%r1, 0
-#CHECK: error: invalid register
-#CHECK: dlg	%r15, 0
-
-	dlg	%r0, -524289
-	dlg	%r0, 524288
-	dlg	%r1, 0
-	dlg	%r15, 0
diff --git a/test/MC/SystemZ/insn-dlgr-01.s b/test/MC/SystemZ/insn-dlgr-01.s
deleted file mode 100644
index b2a4de5f91f3..000000000000
--- a/test/MC/SystemZ/insn-dlgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dlgr	%r0, %r0                # encoding: [0xb9,0x87,0x00,0x00]
-#CHECK: dlgr	%r0, %r15               # encoding: [0xb9,0x87,0x00,0x0f]
-#CHECK: dlgr	%r14, %r0               # encoding: [0xb9,0x87,0x00,0xe0]
-#CHECK: dlgr	%r6, %r9                # encoding: [0xb9,0x87,0x00,0x69]
-
-	dlgr	%r0,%r0
-	dlgr	%r0,%r15
-	dlgr	%r14,%r0
-	dlgr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dlgr-02.s b/test/MC/SystemZ/insn-dlgr-02.s
deleted file mode 100644
index c407b4fb0bf8..000000000000
--- a/test/MC/SystemZ/insn-dlgr-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: dlgr	%r1, %r0
-#CHECK: error: invalid register
-#CHECK: dlgr	%r15, %r0
-
-	dlgr	%r1, %r0
-	dlgr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dlr-01.s b/test/MC/SystemZ/insn-dlr-01.s
deleted file mode 100644
index 7e8be79e46aa..000000000000
--- a/test/MC/SystemZ/insn-dlr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dlr	%r0, %r0                # encoding: [0xb9,0x97,0x00,0x00]
-#CHECK: dlr	%r0, %r15               # encoding: [0xb9,0x97,0x00,0x0f]
-#CHECK: dlr	%r14, %r0               # encoding: [0xb9,0x97,0x00,0xe0]
-#CHECK: dlr	%r6, %r9                # encoding: [0xb9,0x97,0x00,0x69]
-
-	dlr	%r0,%r0
-	dlr	%r0,%r15
-	dlr	%r14,%r0
-	dlr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dlr-02.s b/test/MC/SystemZ/insn-dlr-02.s
deleted file mode 100644
index eb31e18fd876..000000000000
--- a/test/MC/SystemZ/insn-dlr-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: dlr	%r1, %r0
-#CHECK: error: invalid register
-#CHECK: dlr	%r15, %r0
-
-	dlr	%r1, %r0
-	dlr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dsg-01.s b/test/MC/SystemZ/insn-dsg-01.s
deleted file mode 100644
index 5cd0b40a9fb1..000000000000
--- a/test/MC/SystemZ/insn-dsg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dsg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0d]
-#CHECK: dsg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0d]
-#CHECK: dsg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0d]
-#CHECK: dsg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0d]
-#CHECK: dsg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0d]
-#CHECK: dsg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0d]
-#CHECK: dsg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0d]
-#CHECK: dsg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0d]
-#CHECK: dsg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0d]
-#CHECK: dsg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x0d]
-
-	dsg	%r0, -524288
-	dsg	%r0, -1
-	dsg	%r0, 0
-	dsg	%r0, 1
-	dsg	%r0, 524287
-	dsg	%r0, 0(%r1)
-	dsg	%r0, 0(%r15)
-	dsg	%r0, 524287(%r1,%r15)
-	dsg	%r0, 524287(%r15,%r1)
-	dsg	%r14, 0
diff --git a/test/MC/SystemZ/insn-dsg-02.s b/test/MC/SystemZ/insn-dsg-02.s
deleted file mode 100644
index 16979418c48b..000000000000
--- a/test/MC/SystemZ/insn-dsg-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: dsg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: dsg	%r0, 524288
-#CHECK: error: invalid register
-#CHECK: dsg	%r1, 0
-#CHECK: error: invalid register
-#CHECK: dsg	%r15, 0
-
-	dsg	%r0, -524289
-	dsg	%r0, 524288
-	dsg	%r1, 0
-	dsg	%r15, 0
diff --git a/test/MC/SystemZ/insn-dsgf-01.s b/test/MC/SystemZ/insn-dsgf-01.s
deleted file mode 100644
index 2cde0c79d9d4..000000000000
--- a/test/MC/SystemZ/insn-dsgf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dsgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1d]
-#CHECK: dsgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1d]
-#CHECK: dsgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1d]
-#CHECK: dsgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1d]
-#CHECK: dsgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1d]
-#CHECK: dsgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1d]
-#CHECK: dsgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1d]
-#CHECK: dsgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1d]
-#CHECK: dsgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1d]
-#CHECK: dsgf	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x1d]
-
-	dsgf	%r0, -524288
-	dsgf	%r0, -1
-	dsgf	%r0, 0
-	dsgf	%r0, 1
-	dsgf	%r0, 524287
-	dsgf	%r0, 0(%r1)
-	dsgf	%r0, 0(%r15)
-	dsgf	%r0, 524287(%r1,%r15)
-	dsgf	%r0, 524287(%r15,%r1)
-	dsgf	%r14, 0
diff --git a/test/MC/SystemZ/insn-dsgf-02.s b/test/MC/SystemZ/insn-dsgf-02.s
deleted file mode 100644
index 253d9ad70be5..000000000000
--- a/test/MC/SystemZ/insn-dsgf-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: dsgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: dsgf	%r0, 524288
-#CHECK: error: invalid register
-#CHECK: dsgf	%r1, 0
-#CHECK: error: invalid register
-#CHECK: dsgf	%r15, 0
-
-	dsgf	%r0, -524289
-	dsgf	%r0, 524288
-	dsgf	%r1, 0
-	dsgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-dsgfr-01.s b/test/MC/SystemZ/insn-dsgfr-01.s
deleted file mode 100644
index 9b61550ffcbf..000000000000
--- a/test/MC/SystemZ/insn-dsgfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dsgfr	%r0, %r0                # encoding: [0xb9,0x1d,0x00,0x00]
-#CHECK: dsgfr	%r0, %r15               # encoding: [0xb9,0x1d,0x00,0x0f]
-#CHECK: dsgfr	%r14, %r0               # encoding: [0xb9,0x1d,0x00,0xe0]
-#CHECK: dsgfr	%r6, %r9                # encoding: [0xb9,0x1d,0x00,0x69]
-
-	dsgfr	%r0,%r0
-	dsgfr	%r0,%r15
-	dsgfr	%r14,%r0
-	dsgfr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dsgfr-02.s b/test/MC/SystemZ/insn-dsgfr-02.s
deleted file mode 100644
index 2eb8b2394723..000000000000
--- a/test/MC/SystemZ/insn-dsgfr-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: dsgfr	%r1, %r0
-#CHECK: error: invalid register
-#CHECK: dsgfr	%r15, %r0
-
-	dsgfr	%r1, %r0
-	dsgfr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dsgr-01.s b/test/MC/SystemZ/insn-dsgr-01.s
deleted file mode 100644
index 02b4099cfaa7..000000000000
--- a/test/MC/SystemZ/insn-dsgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dsgr	%r0, %r0                # encoding: [0xb9,0x0d,0x00,0x00]
-#CHECK: dsgr	%r0, %r15               # encoding: [0xb9,0x0d,0x00,0x0f]
-#CHECK: dsgr	%r14, %r0               # encoding: [0xb9,0x0d,0x00,0xe0]
-#CHECK: dsgr	%r6, %r9                # encoding: [0xb9,0x0d,0x00,0x69]
-
-	dsgr	%r0,%r0
-	dsgr	%r0,%r15
-	dsgr	%r14,%r0
-	dsgr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dsgr-02.s b/test/MC/SystemZ/insn-dsgr-02.s
deleted file mode 100644
index 019429580696..000000000000
--- a/test/MC/SystemZ/insn-dsgr-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: dsgr	%r1, %r0
-#CHECK: error: invalid register
-#CHECK: dsgr	%r15, %r0
-
-	dsgr	%r1, %r0
-	dsgr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dxbr-01.s b/test/MC/SystemZ/insn-dxbr-01.s
deleted file mode 100644
index 6a452080ced9..000000000000
--- a/test/MC/SystemZ/insn-dxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dxbr	%f0, %f0                # encoding: [0xb3,0x4d,0x00,0x00]
-#CHECK: dxbr	%f0, %f13               # encoding: [0xb3,0x4d,0x00,0x0d]
-#CHECK: dxbr	%f8, %f8                # encoding: [0xb3,0x4d,0x00,0x88]
-#CHECK: dxbr	%f13, %f0               # encoding: [0xb3,0x4d,0x00,0xd0]
-
-	dxbr	%f0, %f0
-	dxbr	%f0, %f13
-	dxbr	%f8, %f8
-	dxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-dxbr-02.s b/test/MC/SystemZ/insn-dxbr-02.s
deleted file mode 100644
index cac64191b8be..000000000000
--- a/test/MC/SystemZ/insn-dxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: dxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: dxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: dxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: dxbr	%f14, %f0
-
-	dxbr	%f0, %f2
-	dxbr	%f0, %f14
-	dxbr	%f2, %f0
-	dxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-ear-01.s b/test/MC/SystemZ/insn-ear-01.s
deleted file mode 100644
index f614f86999ba..000000000000
--- a/test/MC/SystemZ/insn-ear-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ear	%r0, %a0                # encoding: [0xb2,0x4f,0x00,0x00]
-#CHECK: ear	%r0, %a15               # encoding: [0xb2,0x4f,0x00,0x0f]
-#CHECK: ear	%r15, %a0               # encoding: [0xb2,0x4f,0x00,0xf0]
-#CHECK: ear	%r7, %a8                # encoding: [0xb2,0x4f,0x00,0x78]
-#CHECK: ear	%r15, %a15              # encoding: [0xb2,0x4f,0x00,0xff]
-
-	ear	%r0, %a0
-	ear	%r0, %a15
-	ear	%r15, %a0
-	ear	%r7, %a8
-	ear	%r15, %a15
diff --git a/test/MC/SystemZ/insn-ear-02.s b/test/MC/SystemZ/insn-ear-02.s
deleted file mode 100644
index 7fb35eaa5353..000000000000
--- a/test/MC/SystemZ/insn-ear-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ear	%r0, 0
-#CHECK: error: invalid register
-#CHECK: ear	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: ear	%a0, %r0
-
-	ear	%r0, 0
-	ear	%r0, %r0
-	ear	%a0, %r0
diff --git a/test/MC/SystemZ/insn-fidbr-01.s b/test/MC/SystemZ/insn-fidbr-01.s
deleted file mode 100644
index e52c91c21142..000000000000
--- a/test/MC/SystemZ/insn-fidbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: fidbr	%f0, 0, %f0             # encoding: [0xb3,0x5f,0x00,0x00]
-#CHECK: fidbr	%f0, 0, %f15            # encoding: [0xb3,0x5f,0x00,0x0f]
-#CHECK: fidbr	%f0, 15, %f0            # encoding: [0xb3,0x5f,0xf0,0x00]
-#CHECK: fidbr	%f4, 5, %f6             # encoding: [0xb3,0x5f,0x50,0x46]
-#CHECK: fidbr	%f15, 0, %f0            # encoding: [0xb3,0x5f,0x00,0xf0]
-
-	fidbr	%f0, 0, %f0
-	fidbr	%f0, 0, %f15
-	fidbr	%f0, 15, %f0
-	fidbr	%f4, 5, %f6
-	fidbr	%f15, 0, %f0
diff --git a/test/MC/SystemZ/insn-fidbr-02.s b/test/MC/SystemZ/insn-fidbr-02.s
deleted file mode 100644
index 5a35f46589bc..000000000000
--- a/test/MC/SystemZ/insn-fidbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: fidbr	%r0, 0, %f0
-#CHECK: error: invalid register
-#CHECK: fidbr	%f0, 0, %r0
-#CHECK: error: invalid operand
-#CHECK: fidbr	%f0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: fidbr	%f0, 16, %f0
-
-	fidbr	%r0, 0, %f0
-	fidbr	%f0, 0, %r0
-	fidbr	%f0, -1, %f0
-	fidbr	%f0, 16, %f0
diff --git a/test/MC/SystemZ/insn-fiebr-01.s b/test/MC/SystemZ/insn-fiebr-01.s
deleted file mode 100644
index 0b4e633a1edc..000000000000
--- a/test/MC/SystemZ/insn-fiebr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: fiebr	%f0, 0, %f0             # encoding: [0xb3,0x57,0x00,0x00]
-#CHECK: fiebr	%f0, 0, %f15            # encoding: [0xb3,0x57,0x00,0x0f]
-#CHECK: fiebr	%f0, 15, %f0            # encoding: [0xb3,0x57,0xf0,0x00]
-#CHECK: fiebr	%f4, 5, %f6             # encoding: [0xb3,0x57,0x50,0x46]
-#CHECK: fiebr	%f15, 0, %f0            # encoding: [0xb3,0x57,0x00,0xf0]
-
-	fiebr	%f0, 0, %f0
-	fiebr	%f0, 0, %f15
-	fiebr	%f0, 15, %f0
-	fiebr	%f4, 5, %f6
-	fiebr	%f15, 0, %f0
diff --git a/test/MC/SystemZ/insn-fiebr-02.s b/test/MC/SystemZ/insn-fiebr-02.s
deleted file mode 100644
index 2ecdd4d1bc00..000000000000
--- a/test/MC/SystemZ/insn-fiebr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: fiebr	%r0, 0, %f0
-#CHECK: error: invalid register
-#CHECK: fiebr	%f0, 0, %r0
-#CHECK: error: invalid operand
-#CHECK: fiebr	%f0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: fiebr	%f0, 16, %f0
-
-	fiebr	%r0, 0, %f0
-	fiebr	%f0, 0, %r0
-	fiebr	%f0, -1, %f0
-	fiebr	%f0, 16, %f0
diff --git a/test/MC/SystemZ/insn-fixbr-01.s b/test/MC/SystemZ/insn-fixbr-01.s
deleted file mode 100644
index 02676ed76d32..000000000000
--- a/test/MC/SystemZ/insn-fixbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: fixbr	%f0, 0, %f0             # encoding: [0xb3,0x47,0x00,0x00]
-#CHECK: fixbr	%f0, 0, %f13            # encoding: [0xb3,0x47,0x00,0x0d]
-#CHECK: fixbr	%f0, 15, %f0            # encoding: [0xb3,0x47,0xf0,0x00]
-#CHECK: fixbr	%f4, 5, %f8             # encoding: [0xb3,0x47,0x50,0x48]
-#CHECK: fixbr	%f13, 0, %f0            # encoding: [0xb3,0x47,0x00,0xd0]
-
-	fixbr	%f0, 0, %f0
-	fixbr	%f0, 0, %f13
-	fixbr	%f0, 15, %f0
-	fixbr	%f4, 5, %f8
-	fixbr	%f13, 0, %f0
diff --git a/test/MC/SystemZ/insn-fixbr-02.s b/test/MC/SystemZ/insn-fixbr-02.s
deleted file mode 100644
index 3f6873429208..000000000000
--- a/test/MC/SystemZ/insn-fixbr-02.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: fixbr	%r0, 0, %f0
-#CHECK: error: invalid register
-#CHECK: fixbr	%f0, 0, %r0
-#CHECK: error: invalid operand
-#CHECK: fixbr	%f0, -1, %f0
-#CHECK: error: invalid operand
-#CHECK: fixbr	%f0, 16, %f0
-#CHECK: error: invalid register
-#CHECK: fixbr	%f0, 0, %f2
-#CHECK: error: invalid register
-#CHECK: fixbr	%f0, 0, %f14
-#CHECK: error: invalid register
-#CHECK: fixbr	%f2, 0, %f0
-#CHECK: error: invalid register
-#CHECK: fixbr	%f14, 0, %f0
-
-	fixbr	%r0, 0, %f0
-	fixbr	%f0, 0, %r0
-	fixbr	%f0, -1, %f0
-	fixbr	%f0, 16, %f0
-	fixbr	%f0, 0, %f2
-	fixbr	%f0, 0, %f14
-	fixbr	%f2, 0, %f0
-	fixbr	%f14, 0, %f0
diff --git a/test/MC/SystemZ/insn-flogr-01.s b/test/MC/SystemZ/insn-flogr-01.s
deleted file mode 100644
index f6031ce3b678..000000000000
--- a/test/MC/SystemZ/insn-flogr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: flogr	%r0, %r0                # encoding: [0xb9,0x83,0x00,0x00]
-#CHECK: flogr	%r0, %r15               # encoding: [0xb9,0x83,0x00,0x0f]
-#CHECK: flogr	%r10, %r9               # encoding: [0xb9,0x83,0x00,0xa9]
-#CHECK: flogr	%r14, %r0               # encoding: [0xb9,0x83,0x00,0xe0]
-
-	flogr	%r0, %r0
-	flogr	%r0, %r15
-	flogr	%r10, %r9
-	flogr	%r14, %r0
diff --git a/test/MC/SystemZ/insn-flogr-02.s b/test/MC/SystemZ/insn-flogr-02.s
deleted file mode 100644
index e0d117c3be73..000000000000
--- a/test/MC/SystemZ/insn-flogr-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: flogr	%r1, %r0
-#CHECK: error: invalid register
-#CHECK: flogr	%r15, %r0
-
-	flogr	%r1, %r0
-	flogr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-good-z196.s b/test/MC/SystemZ/insn-good-z196.s
new file mode 100644
index 000000000000..258e06f99dd1
--- /dev/null
+++ b/test/MC/SystemZ/insn-good-z196.s
@@ -0,0 +1,916 @@
+# For z196 and above.
+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=z196 -show-encoding %s | FileCheck %s
+
+#CHECK: aghik	%r0, %r0, -32768        # encoding: [0xec,0x00,0x80,0x00,0x00,0xd9]
+#CHECK: aghik	%r0, %r0, -1            # encoding: [0xec,0x00,0xff,0xff,0x00,0xd9]
+#CHECK: aghik	%r0, %r0, 0             # encoding: [0xec,0x00,0x00,0x00,0x00,0xd9]
+#CHECK: aghik	%r0, %r0, 1             # encoding: [0xec,0x00,0x00,0x01,0x00,0xd9]
+#CHECK: aghik	%r0, %r0, 32767         # encoding: [0xec,0x00,0x7f,0xff,0x00,0xd9]
+#CHECK: aghik	%r0, %r15, 0            # encoding: [0xec,0x0f,0x00,0x00,0x00,0xd9]
+#CHECK: aghik	%r15, %r0, 0            # encoding: [0xec,0xf0,0x00,0x00,0x00,0xd9]
+#CHECK: aghik	%r7, %r8, -16           # encoding: [0xec,0x78,0xff,0xf0,0x00,0xd9]
+
+	aghik	%r0, %r0, -32768
+	aghik	%r0, %r0, -1
+	aghik	%r0, %r0, 0
+	aghik	%r0, %r0, 1
+	aghik	%r0, %r0, 32767
+	aghik	%r0, %r15, 0
+	aghik	%r15, %r0, 0
+	aghik	%r7, %r8, -16
+
+#CHECK: agrk	%r0, %r0, %r0           # encoding: [0xb9,0xe8,0x00,0x00]
+#CHECK: agrk	%r0, %r0, %r15          # encoding: [0xb9,0xe8,0xf0,0x00]
+#CHECK: agrk	%r0, %r15, %r0          # encoding: [0xb9,0xe8,0x00,0x0f]
+#CHECK: agrk	%r15, %r0, %r0          # encoding: [0xb9,0xe8,0x00,0xf0]
+#CHECK: agrk	%r7, %r8, %r9           # encoding: [0xb9,0xe8,0x90,0x78]
+
+	agrk	%r0,%r0,%r0
+	agrk	%r0,%r0,%r15
+	agrk	%r0,%r15,%r0
+	agrk	%r15,%r0,%r0
+	agrk	%r7,%r8,%r9
+
+#CHECK: ahik	%r0, %r0, -32768        # encoding: [0xec,0x00,0x80,0x00,0x00,0xd8]
+#CHECK: ahik	%r0, %r0, -1            # encoding: [0xec,0x00,0xff,0xff,0x00,0xd8]
+#CHECK: ahik	%r0, %r0, 0             # encoding: [0xec,0x00,0x00,0x00,0x00,0xd8]
+#CHECK: ahik	%r0, %r0, 1             # encoding: [0xec,0x00,0x00,0x01,0x00,0xd8]
+#CHECK: ahik	%r0, %r0, 32767         # encoding: [0xec,0x00,0x7f,0xff,0x00,0xd8]
+#CHECK: ahik	%r0, %r15, 0            # encoding: [0xec,0x0f,0x00,0x00,0x00,0xd8]
+#CHECK: ahik	%r15, %r0, 0            # encoding: [0xec,0xf0,0x00,0x00,0x00,0xd8]
+#CHECK: ahik	%r7, %r8, -16           # encoding: [0xec,0x78,0xff,0xf0,0x00,0xd8]
+
+	ahik	%r0, %r0, -32768
+	ahik	%r0, %r0, -1
+	ahik	%r0, %r0, 0
+	ahik	%r0, %r0, 1
+	ahik	%r0, %r0, 32767
+	ahik	%r0, %r15, 0
+	ahik	%r15, %r0, 0
+	ahik	%r7, %r8, -16
+
+#CHECK: aih	%r0, -2147483648        # encoding: [0xcc,0x08,0x80,0x00,0x00,0x00]
+#CHECK: aih	%r0, -1                 # encoding: [0xcc,0x08,0xff,0xff,0xff,0xff]
+#CHECK: aih	%r0, 0                  # encoding: [0xcc,0x08,0x00,0x00,0x00,0x00]
+#CHECK: aih	%r0, 1                  # encoding: [0xcc,0x08,0x00,0x00,0x00,0x01]
+#CHECK: aih	%r0, 2147483647         # encoding: [0xcc,0x08,0x7f,0xff,0xff,0xff]
+#CHECK: aih	%r15, 0                 # encoding: [0xcc,0xf8,0x00,0x00,0x00,0x00]
+
+	aih	%r0, -1 << 31
+	aih	%r0, -1
+	aih	%r0, 0
+	aih	%r0, 1
+	aih	%r0, (1 << 31) - 1
+	aih	%r15, 0
+
+#CHECK: alghsik	%r0, %r0, -32768        # encoding: [0xec,0x00,0x80,0x00,0x00,0xdb]
+#CHECK: alghsik	%r0, %r0, -1            # encoding: [0xec,0x00,0xff,0xff,0x00,0xdb]
+#CHECK: alghsik	%r0, %r0, 0             # encoding: [0xec,0x00,0x00,0x00,0x00,0xdb]
+#CHECK: alghsik	%r0, %r0, 1             # encoding: [0xec,0x00,0x00,0x01,0x00,0xdb]
+#CHECK: alghsik	%r0, %r0, 32767         # encoding: [0xec,0x00,0x7f,0xff,0x00,0xdb]
+#CHECK: alghsik	%r0, %r15, 0            # encoding: [0xec,0x0f,0x00,0x00,0x00,0xdb]
+#CHECK: alghsik	%r15, %r0, 0            # encoding: [0xec,0xf0,0x00,0x00,0x00,0xdb]
+#CHECK: alghsik	%r7, %r8, -16           # encoding: [0xec,0x78,0xff,0xf0,0x00,0xdb]
+
+	alghsik	%r0, %r0, -32768
+	alghsik	%r0, %r0, -1
+	alghsik	%r0, %r0, 0
+	alghsik	%r0, %r0, 1
+	alghsik	%r0, %r0, 32767
+	alghsik	%r0, %r15, 0
+	alghsik	%r15, %r0, 0
+	alghsik	%r7, %r8, -16
+
+#CHECK: algrk	%r0, %r0, %r0           # encoding: [0xb9,0xea,0x00,0x00]
+#CHECK: algrk	%r0, %r0, %r15          # encoding: [0xb9,0xea,0xf0,0x00]
+#CHECK: algrk	%r0, %r15, %r0          # encoding: [0xb9,0xea,0x00,0x0f]
+#CHECK: algrk	%r15, %r0, %r0          # encoding: [0xb9,0xea,0x00,0xf0]
+#CHECK: algrk	%r7, %r8, %r9           # encoding: [0xb9,0xea,0x90,0x78]
+
+	algrk	%r0,%r0,%r0
+	algrk	%r0,%r0,%r15
+	algrk	%r0,%r15,%r0
+	algrk	%r15,%r0,%r0
+	algrk	%r7,%r8,%r9
+
+#CHECK: alhsik	%r0, %r0, -32768        # encoding: [0xec,0x00,0x80,0x00,0x00,0xda]
+#CHECK: alhsik	%r0, %r0, -1            # encoding: [0xec,0x00,0xff,0xff,0x00,0xda]
+#CHECK: alhsik	%r0, %r0, 0             # encoding: [0xec,0x00,0x00,0x00,0x00,0xda]
+#CHECK: alhsik	%r0, %r0, 1             # encoding: [0xec,0x00,0x00,0x01,0x00,0xda]
+#CHECK: alhsik	%r0, %r0, 32767         # encoding: [0xec,0x00,0x7f,0xff,0x00,0xda]
+#CHECK: alhsik	%r0, %r15, 0            # encoding: [0xec,0x0f,0x00,0x00,0x00,0xda]
+#CHECK: alhsik	%r15, %r0, 0            # encoding: [0xec,0xf0,0x00,0x00,0x00,0xda]
+#CHECK: alhsik	%r7, %r8, -16           # encoding: [0xec,0x78,0xff,0xf0,0x00,0xda]
+
+	alhsik	%r0, %r0, -32768
+	alhsik	%r0, %r0, -1
+	alhsik	%r0, %r0, 0
+	alhsik	%r0, %r0, 1
+	alhsik	%r0, %r0, 32767
+	alhsik	%r0, %r15, 0
+	alhsik	%r15, %r0, 0
+	alhsik	%r7, %r8, -16
+
+#CHECK: alrk	%r0, %r0, %r0           # encoding: [0xb9,0xfa,0x00,0x00]
+#CHECK: alrk	%r0, %r0, %r15          # encoding: [0xb9,0xfa,0xf0,0x00]
+#CHECK: alrk	%r0, %r15, %r0          # encoding: [0xb9,0xfa,0x00,0x0f]
+#CHECK: alrk	%r15, %r0, %r0          # encoding: [0xb9,0xfa,0x00,0xf0]
+#CHECK: alrk	%r7, %r8, %r9           # encoding: [0xb9,0xfa,0x90,0x78]
+
+	alrk	%r0,%r0,%r0
+	alrk	%r0,%r0,%r15
+	alrk	%r0,%r15,%r0
+	alrk	%r15,%r0,%r0
+	alrk	%r7,%r8,%r9
+
+#CHECK: ark	%r0, %r0, %r0           # encoding: [0xb9,0xf8,0x00,0x00]
+#CHECK: ark	%r0, %r0, %r15          # encoding: [0xb9,0xf8,0xf0,0x00]
+#CHECK: ark	%r0, %r15, %r0          # encoding: [0xb9,0xf8,0x00,0x0f]
+#CHECK: ark	%r15, %r0, %r0          # encoding: [0xb9,0xf8,0x00,0xf0]
+#CHECK: ark	%r7, %r8, %r9           # encoding: [0xb9,0xf8,0x90,0x78]
+
+	ark	%r0,%r0,%r0
+	ark	%r0,%r0,%r15
+	ark	%r0,%r15,%r0
+	ark	%r15,%r0,%r0
+	ark	%r7,%r8,%r9
+
+#CHECK: chf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xcd]
+#CHECK: chf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xcd]
+#CHECK: chf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xcd]
+#CHECK: chf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xcd]
+#CHECK: chf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xcd]
+#CHECK: chf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xcd]
+#CHECK: chf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xcd]
+#CHECK: chf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xcd]
+#CHECK: chf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xcd]
+#CHECK: chf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xcd]
+
+	chf	%r0, -524288
+	chf	%r0, -1
+	chf	%r0, 0
+	chf	%r0, 1
+	chf	%r0, 524287
+	chf	%r0, 0(%r1)
+	chf	%r0, 0(%r15)
+	chf	%r0, 524287(%r1,%r15)
+	chf	%r0, 524287(%r15,%r1)
+	chf	%r15, 0
+
+#CHECK: cih	%r0, -2147483648        # encoding: [0xcc,0x0d,0x80,0x00,0x00,0x00]
+#CHECK: cih	%r0, -1                 # encoding: [0xcc,0x0d,0xff,0xff,0xff,0xff]
+#CHECK: cih	%r0, 0                  # encoding: [0xcc,0x0d,0x00,0x00,0x00,0x00]
+#CHECK: cih	%r0, 1                  # encoding: [0xcc,0x0d,0x00,0x00,0x00,0x01]
+#CHECK: cih	%r0, 2147483647         # encoding: [0xcc,0x0d,0x7f,0xff,0xff,0xff]
+#CHECK: cih	%r15, 0                 # encoding: [0xcc,0xfd,0x00,0x00,0x00,0x00]
+
+	cih	%r0, -1 << 31
+	cih	%r0, -1
+	cih	%r0, 0
+	cih	%r0, 1
+	cih	%r0, (1 << 31) - 1
+	cih	%r15, 0
+
+#CHECK: clhf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xcf]
+#CHECK: clhf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xcf]
+#CHECK: clhf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xcf]
+#CHECK: clhf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xcf]
+#CHECK: clhf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xcf]
+#CHECK: clhf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xcf]
+#CHECK: clhf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xcf]
+#CHECK: clhf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xcf]
+#CHECK: clhf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xcf]
+#CHECK: clhf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xcf]
+
+	clhf	%r0, -524288
+	clhf	%r0, -1
+	clhf	%r0, 0
+	clhf	%r0, 1
+	clhf	%r0, 524287
+	clhf	%r0, 0(%r1)
+	clhf	%r0, 0(%r15)
+	clhf	%r0, 524287(%r1,%r15)
+	clhf	%r0, 524287(%r15,%r1)
+	clhf	%r15, 0
+
+#CHECK: clih	%r0, 0                  # encoding: [0xcc,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: clih	%r0, 1                  # encoding: [0xcc,0x0f,0x00,0x00,0x00,0x01]
+#CHECK: clih	%r0, 4294967295         # encoding: [0xcc,0x0f,0xff,0xff,0xff,0xff]
+#CHECK: clih	%r15, 0                 # encoding: [0xcc,0xff,0x00,0x00,0x00,0x00]
+
+	clih	%r0, 0
+	clih	%r0, 1
+	clih	%r0, (1 << 32) - 1
+	clih	%r15, 0
+
+#CHECK: fidbra	%f0, 0, %f0, 0          # encoding: [0xb3,0x5f,0x00,0x00]
+#CHECK: fidbra	%f0, 0, %f0, 15         # encoding: [0xb3,0x5f,0x0f,0x00]
+#CHECK: fidbra	%f0, 0, %f15, 0         # encoding: [0xb3,0x5f,0x00,0x0f]
+#CHECK: fidbra	%f0, 15, %f0, 0         # encoding: [0xb3,0x5f,0xf0,0x00]
+#CHECK: fidbra	%f4, 5, %f6, 7          # encoding: [0xb3,0x5f,0x57,0x46]
+#CHECK: fidbra	%f15, 0, %f0, 0         # encoding: [0xb3,0x5f,0x00,0xf0]
+
+	fidbra	%f0, 0, %f0, 0
+	fidbra	%f0, 0, %f0, 15
+	fidbra	%f0, 0, %f15, 0
+	fidbra	%f0, 15, %f0, 0
+	fidbra	%f4, 5, %f6, 7
+	fidbra	%f15, 0, %f0, 0
+
+#CHECK: fiebra	%f0, 0, %f0, 0          # encoding: [0xb3,0x57,0x00,0x00]
+#CHECK: fiebra	%f0, 0, %f0, 15         # encoding: [0xb3,0x57,0x0f,0x00]
+#CHECK: fiebra	%f0, 0, %f15, 0         # encoding: [0xb3,0x57,0x00,0x0f]
+#CHECK: fiebra	%f0, 15, %f0, 0         # encoding: [0xb3,0x57,0xf0,0x00]
+#CHECK: fiebra	%f4, 5, %f6, 7          # encoding: [0xb3,0x57,0x57,0x46]
+#CHECK: fiebra	%f15, 0, %f0, 0         # encoding: [0xb3,0x57,0x00,0xf0]
+
+	fiebra	%f0, 0, %f0, 0
+	fiebra	%f0, 0, %f0, 15
+	fiebra	%f0, 0, %f15, 0
+	fiebra	%f0, 15, %f0, 0
+	fiebra	%f4, 5, %f6, 7
+	fiebra	%f15, 0, %f0, 0
+
+#CHECK: fixbra	%f0, 0, %f0, 0          # encoding: [0xb3,0x47,0x00,0x00]
+#CHECK: fixbra	%f0, 0, %f0, 15         # encoding: [0xb3,0x47,0x0f,0x00]
+#CHECK: fixbra	%f0, 0, %f13, 0         # encoding: [0xb3,0x47,0x00,0x0d]
+#CHECK: fixbra	%f0, 15, %f0, 0         # encoding: [0xb3,0x47,0xf0,0x00]
+#CHECK: fixbra	%f4, 5, %f8, 9          # encoding: [0xb3,0x47,0x59,0x48]
+#CHECK: fixbra	%f13, 0, %f0, 0         # encoding: [0xb3,0x47,0x00,0xd0]
+
+	fixbra	%f0, 0, %f0, 0
+	fixbra	%f0, 0, %f0, 15
+	fixbra	%f0, 0, %f13, 0
+	fixbra	%f0, 15, %f0, 0
+	fixbra	%f4, 5, %f8, 9
+	fixbra	%f13, 0, %f0, 0
+
+#CHECK: lbh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc0]
+#CHECK: lbh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc0]
+#CHECK: lbh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc0]
+#CHECK: lbh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc0]
+#CHECK: lbh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc0]
+#CHECK: lbh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc0]
+#CHECK: lbh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc0]
+#CHECK: lbh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc0]
+#CHECK: lbh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc0]
+#CHECK: lbh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc0]
+
+	lbh	%r0, -524288
+	lbh	%r0, -1
+	lbh	%r0, 0
+	lbh	%r0, 1
+	lbh	%r0, 524287
+	lbh	%r0, 0(%r1)
+	lbh	%r0, 0(%r15)
+	lbh	%r0, 524287(%r1,%r15)
+	lbh	%r0, 524287(%r15,%r1)
+	lbh	%r15, 0
+
+#CHECK: lfh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xca]
+#CHECK: lfh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xca]
+#CHECK: lfh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xca]
+#CHECK: lfh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xca]
+#CHECK: lfh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xca]
+#CHECK: lfh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xca]
+#CHECK: lfh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xca]
+#CHECK: lfh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xca]
+#CHECK: lfh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xca]
+#CHECK: lfh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xca]
+
+	lfh	%r0, -524288
+	lfh	%r0, -1
+	lfh	%r0, 0
+	lfh	%r0, 1
+	lfh	%r0, 524287
+	lfh	%r0, 0(%r1)
+	lfh	%r0, 0(%r15)
+	lfh	%r0, 524287(%r1,%r15)
+	lfh	%r0, 524287(%r15,%r1)
+	lfh	%r15, 0
+
+#CHECK: lhh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc4]
+#CHECK: lhh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc4]
+#CHECK: lhh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc4]
+#CHECK: lhh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc4]
+#CHECK: lhh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc4]
+#CHECK: lhh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc4]
+#CHECK: lhh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc4]
+#CHECK: lhh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc4]
+#CHECK: lhh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc4]
+#CHECK: lhh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc4]
+
+	lhh	%r0, -524288
+	lhh	%r0, -1
+	lhh	%r0, 0
+	lhh	%r0, 1
+	lhh	%r0, 524287
+	lhh	%r0, 0(%r1)
+	lhh	%r0, 0(%r15)
+	lhh	%r0, 524287(%r1,%r15)
+	lhh	%r0, 524287(%r15,%r1)
+	lhh	%r15, 0
+
+#CHECK: llch	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc2]
+#CHECK: llch	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc2]
+#CHECK: llch	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc2]
+#CHECK: llch	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc2]
+#CHECK: llch	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc2]
+#CHECK: llch	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc2]
+#CHECK: llch	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc2]
+#CHECK: llch	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc2]
+#CHECK: llch	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc2]
+#CHECK: llch	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc2]
+
+	llch	%r0, -524288
+	llch	%r0, -1
+	llch	%r0, 0
+	llch	%r0, 1
+	llch	%r0, 524287
+	llch	%r0, 0(%r1)
+	llch	%r0, 0(%r15)
+	llch	%r0, 524287(%r1,%r15)
+	llch	%r0, 524287(%r15,%r1)
+	llch	%r15, 0
+
+#CHECK: llhh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc6]
+#CHECK: llhh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc6]
+#CHECK: llhh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc6]
+#CHECK: llhh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc6]
+#CHECK: llhh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc6]
+#CHECK: llhh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc6]
+#CHECK: llhh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc6]
+#CHECK: llhh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc6]
+#CHECK: llhh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc6]
+#CHECK: llhh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc6]
+
+	llhh	%r0, -524288
+	llhh	%r0, -1
+	llhh	%r0, 0
+	llhh	%r0, 1
+	llhh	%r0, 524287
+	llhh	%r0, 0(%r1)
+	llhh	%r0, 0(%r15)
+	llhh	%r0, 524287(%r1,%r15)
+	llhh	%r0, 524287(%r15,%r1)
+	llhh	%r15, 0
+
+#CHECK: loc	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xf2]
+#CHECK: loc	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xf2]
+#CHECK: loc	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xf2]
+#CHECK: loc	%r0, 524287, 0          # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xf2]
+#CHECK: loc	%r0, 0(%r1), 0          # encoding: [0xeb,0x00,0x10,0x00,0x00,0xf2]
+#CHECK: loc	%r0, 0(%r15), 0         # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xf2]
+#CHECK: loc	%r15, 0, 0              # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xf2]
+#CHECK: loc	%r1, 4095(%r2), 3       # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xf2]
+
+	loc	%r0,0,0
+	loc	%r0,0,15
+	loc	%r0,-524288,0
+	loc	%r0,524287,0
+	loc	%r0,0(%r1),0
+	loc	%r0,0(%r15),0
+	loc	%r15,0,0
+	loc	%r1,4095(%r2),3
+
+#CHECK: loco   %r1, 2(%r3)              # encoding: [0xeb,0x11,0x30,0x02,0x00,0xf2]
+#CHECK: loch   %r1, 2(%r3)              # encoding: [0xeb,0x12,0x30,0x02,0x00,0xf2]
+#CHECK: locnle %r1, 2(%r3)              # encoding: [0xeb,0x13,0x30,0x02,0x00,0xf2]
+#CHECK: locl   %r1, 2(%r3)              # encoding: [0xeb,0x14,0x30,0x02,0x00,0xf2]
+#CHECK: locnhe %r1, 2(%r3)              # encoding: [0xeb,0x15,0x30,0x02,0x00,0xf2]
+#CHECK: loclh  %r1, 2(%r3)              # encoding: [0xeb,0x16,0x30,0x02,0x00,0xf2]
+#CHECK: locne  %r1, 2(%r3)              # encoding: [0xeb,0x17,0x30,0x02,0x00,0xf2]
+#CHECK: loce   %r1, 2(%r3)              # encoding: [0xeb,0x18,0x30,0x02,0x00,0xf2]
+#CHECK: locnlh %r1, 2(%r3)              # encoding: [0xeb,0x19,0x30,0x02,0x00,0xf2]
+#CHECK: loche  %r1, 2(%r3)              # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xf2]
+#CHECK: locnl  %r1, 2(%r3)              # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xf2]
+#CHECK: locle  %r1, 2(%r3)              # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xf2]
+#CHECK: locnh  %r1, 2(%r3)              # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xf2]
+#CHECK: locno  %r1, 2(%r3)              # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xf2]
+
+	loco   %r1,2(%r3)
+	loch   %r1,2(%r3)
+	locnle %r1,2(%r3)
+	locl   %r1,2(%r3)
+	locnhe %r1,2(%r3)
+	loclh  %r1,2(%r3)
+	locne  %r1,2(%r3)
+	loce   %r1,2(%r3)
+	locnlh %r1,2(%r3)
+	loche  %r1,2(%r3)
+	locnl  %r1,2(%r3)
+	locle  %r1,2(%r3)
+	locnh  %r1,2(%r3)
+	locno  %r1,2(%r3)
+
+#CHECK: locg	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe2]
+#CHECK: locg	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe2]
+#CHECK: locg	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe2]
+#CHECK: locg	%r0, 524287, 0          # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe2]
+#CHECK: locg	%r0, 0(%r1), 0          # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe2]
+#CHECK: locg	%r0, 0(%r15), 0         # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe2]
+#CHECK: locg	%r15, 0, 0              # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe2]
+#CHECK: locg	%r1, 4095(%r2), 3       # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe2]
+
+	locg	%r0,0,0
+	locg	%r0,0,15
+	locg	%r0,-524288,0
+	locg	%r0,524287,0
+	locg	%r0,0(%r1),0
+	locg	%r0,0(%r15),0
+	locg	%r15,0,0
+	locg	%r1,4095(%r2),3
+
+#CHECK: locgo   %r1, 2(%r3)             # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe2]
+#CHECK: locgh   %r1, 2(%r3)             # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe2]
+#CHECK: locgnle %r1, 2(%r3)             # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe2]
+#CHECK: locgl   %r1, 2(%r3)             # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe2]
+#CHECK: locgnhe %r1, 2(%r3)             # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe2]
+#CHECK: locglh  %r1, 2(%r3)             # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe2]
+#CHECK: locgne  %r1, 2(%r3)             # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe2]
+#CHECK: locge   %r1, 2(%r3)             # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe2]
+#CHECK: locgnlh %r1, 2(%r3)             # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe2]
+#CHECK: locghe  %r1, 2(%r3)             # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe2]
+#CHECK: locgnl  %r1, 2(%r3)             # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe2]
+#CHECK: locgle  %r1, 2(%r3)             # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe2]
+#CHECK: locgnh  %r1, 2(%r3)             # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe2]
+#CHECK: locgno  %r1, 2(%r3)             # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe2]
+
+	locgo   %r1,2(%r3)
+	locgh   %r1,2(%r3)
+	locgnle %r1,2(%r3)
+	locgl   %r1,2(%r3)
+	locgnhe %r1,2(%r3)
+	locglh  %r1,2(%r3)
+	locgne  %r1,2(%r3)
+	locge   %r1,2(%r3)
+	locgnlh %r1,2(%r3)
+	locghe  %r1,2(%r3)
+	locgnl  %r1,2(%r3)
+	locgle  %r1,2(%r3)
+	locgnh  %r1,2(%r3)
+	locgno  %r1,2(%r3)
+
+#CHECK: locgr	%r1, %r2, 0             # encoding: [0xb9,0xe2,0x00,0x12]
+#CHECK: locgr	%r1, %r2, 15            # encoding: [0xb9,0xe2,0xf0,0x12]
+
+	locgr	%r1,%r2,0
+	locgr	%r1,%r2,15
+
+#CHECK: locgro   %r1, %r3               # encoding: [0xb9,0xe2,0x10,0x13]
+#CHECK: locgrh   %r1, %r3               # encoding: [0xb9,0xe2,0x20,0x13]
+#CHECK: locgrnle %r1, %r3               # encoding: [0xb9,0xe2,0x30,0x13]
+#CHECK: locgrl   %r1, %r3               # encoding: [0xb9,0xe2,0x40,0x13]
+#CHECK: locgrnhe %r1, %r3               # encoding: [0xb9,0xe2,0x50,0x13]
+#CHECK: locgrlh  %r1, %r3               # encoding: [0xb9,0xe2,0x60,0x13]
+#CHECK: locgrne  %r1, %r3               # encoding: [0xb9,0xe2,0x70,0x13]
+#CHECK: locgre   %r1, %r3               # encoding: [0xb9,0xe2,0x80,0x13]
+#CHECK: locgrnlh %r1, %r3               # encoding: [0xb9,0xe2,0x90,0x13]
+#CHECK: locgrhe  %r1, %r3               # encoding: [0xb9,0xe2,0xa0,0x13]
+#CHECK: locgrnl  %r1, %r3               # encoding: [0xb9,0xe2,0xb0,0x13]
+#CHECK: locgrle  %r1, %r3               # encoding: [0xb9,0xe2,0xc0,0x13]
+#CHECK: locgrnh  %r1, %r3               # encoding: [0xb9,0xe2,0xd0,0x13]
+#CHECK: locgrno  %r1, %r3               # encoding: [0xb9,0xe2,0xe0,0x13]
+
+	locgro   %r1,%r3
+	locgrh   %r1,%r3
+	locgrnle %r1,%r3
+	locgrl   %r1,%r3
+	locgrnhe %r1,%r3
+	locgrlh  %r1,%r3
+	locgrne  %r1,%r3
+	locgre   %r1,%r3
+	locgrnlh %r1,%r3
+	locgrhe  %r1,%r3
+	locgrnl  %r1,%r3
+	locgrle  %r1,%r3
+	locgrnh  %r1,%r3
+	locgrno  %r1,%r3
+
+#CHECK: locr	%r1, %r2, 0             # encoding: [0xb9,0xf2,0x00,0x12]
+#CHECK: locr	%r1, %r2, 15            # encoding: [0xb9,0xf2,0xf0,0x12]
+
+	locr	%r1,%r2,0
+	locr	%r1,%r2,15
+
+#CHECK: locro   %r1, %r3                # encoding: [0xb9,0xf2,0x10,0x13]
+#CHECK: locrh   %r1, %r3                # encoding: [0xb9,0xf2,0x20,0x13]
+#CHECK: locrnle %r1, %r3                # encoding: [0xb9,0xf2,0x30,0x13]
+#CHECK: locrl   %r1, %r3                # encoding: [0xb9,0xf2,0x40,0x13]
+#CHECK: locrnhe %r1, %r3                # encoding: [0xb9,0xf2,0x50,0x13]
+#CHECK: locrlh  %r1, %r3                # encoding: [0xb9,0xf2,0x60,0x13]
+#CHECK: locrne  %r1, %r3                # encoding: [0xb9,0xf2,0x70,0x13]
+#CHECK: locre   %r1, %r3                # encoding: [0xb9,0xf2,0x80,0x13]
+#CHECK: locrnlh %r1, %r3                # encoding: [0xb9,0xf2,0x90,0x13]
+#CHECK: locrhe  %r1, %r3                # encoding: [0xb9,0xf2,0xa0,0x13]
+#CHECK: locrnl  %r1, %r3                # encoding: [0xb9,0xf2,0xb0,0x13]
+#CHECK: locrle  %r1, %r3                # encoding: [0xb9,0xf2,0xc0,0x13]
+#CHECK: locrnh  %r1, %r3                # encoding: [0xb9,0xf2,0xd0,0x13]
+#CHECK: locrno  %r1, %r3                # encoding: [0xb9,0xf2,0xe0,0x13]
+
+	locro   %r1,%r3
+	locrh   %r1,%r3
+	locrnle %r1,%r3
+	locrl   %r1,%r3
+	locrnhe %r1,%r3
+	locrlh  %r1,%r3
+	locrne  %r1,%r3
+	locre   %r1,%r3
+	locrnlh %r1,%r3
+	locrhe  %r1,%r3
+	locrnl  %r1,%r3
+	locrle  %r1,%r3
+	locrnh  %r1,%r3
+	locrno  %r1,%r3
+
+#CHECK: ngrk	%r0, %r0, %r0           # encoding: [0xb9,0xe4,0x00,0x00]
+#CHECK: ngrk	%r0, %r0, %r15          # encoding: [0xb9,0xe4,0xf0,0x00]
+#CHECK: ngrk	%r0, %r15, %r0          # encoding: [0xb9,0xe4,0x00,0x0f]
+#CHECK: ngrk	%r15, %r0, %r0          # encoding: [0xb9,0xe4,0x00,0xf0]
+#CHECK: ngrk	%r7, %r8, %r9           # encoding: [0xb9,0xe4,0x90,0x78]
+
+	ngrk	%r0,%r0,%r0
+	ngrk	%r0,%r0,%r15
+	ngrk	%r0,%r15,%r0
+	ngrk	%r15,%r0,%r0
+	ngrk	%r7,%r8,%r9
+
+#CHECK: nrk	%r0, %r0, %r0           # encoding: [0xb9,0xf4,0x00,0x00]
+#CHECK: nrk	%r0, %r0, %r15          # encoding: [0xb9,0xf4,0xf0,0x00]
+#CHECK: nrk	%r0, %r15, %r0          # encoding: [0xb9,0xf4,0x00,0x0f]
+#CHECK: nrk	%r15, %r0, %r0          # encoding: [0xb9,0xf4,0x00,0xf0]
+#CHECK: nrk	%r7, %r8, %r9           # encoding: [0xb9,0xf4,0x90,0x78]
+
+	nrk	%r0,%r0,%r0
+	nrk	%r0,%r0,%r15
+	nrk	%r0,%r15,%r0
+	nrk	%r15,%r0,%r0
+	nrk	%r7,%r8,%r9
+
+#CHECK: ogrk	%r0, %r0, %r0           # encoding: [0xb9,0xe6,0x00,0x00]
+#CHECK: ogrk	%r0, %r0, %r15          # encoding: [0xb9,0xe6,0xf0,0x00]
+#CHECK: ogrk	%r0, %r15, %r0          # encoding: [0xb9,0xe6,0x00,0x0f]
+#CHECK: ogrk	%r15, %r0, %r0          # encoding: [0xb9,0xe6,0x00,0xf0]
+#CHECK: ogrk	%r7, %r8, %r9           # encoding: [0xb9,0xe6,0x90,0x78]
+
+	ogrk	%r0,%r0,%r0
+	ogrk	%r0,%r0,%r15
+	ogrk	%r0,%r15,%r0
+	ogrk	%r15,%r0,%r0
+	ogrk	%r7,%r8,%r9
+
+#CHECK: ork	%r0, %r0, %r0           # encoding: [0xb9,0xf6,0x00,0x00]
+#CHECK: ork	%r0, %r0, %r15          # encoding: [0xb9,0xf6,0xf0,0x00]
+#CHECK: ork	%r0, %r15, %r0          # encoding: [0xb9,0xf6,0x00,0x0f]
+#CHECK: ork	%r15, %r0, %r0          # encoding: [0xb9,0xf6,0x00,0xf0]
+#CHECK: ork	%r7, %r8, %r9           # encoding: [0xb9,0xf6,0x90,0x78]
+
+	ork	%r0,%r0,%r0
+	ork	%r0,%r0,%r15
+	ork	%r0,%r15,%r0
+	ork	%r15,%r0,%r0
+	ork	%r7,%r8,%r9
+
+#CHECK: risbhg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x5d]
+#CHECK: risbhg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x5d]
+#CHECK: risbhg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x5d]
+#CHECK: risbhg	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x5d]
+#CHECK: risbhg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x5d]
+#CHECK: risbhg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x5d]
+#CHECK: risbhg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x5d]
+
+	risbhg	%r0,%r0,0,0,0
+	risbhg	%r0,%r0,0,0,63
+	risbhg	%r0,%r0,0,255,0
+	risbhg	%r0,%r0,255,0,0
+	risbhg	%r0,%r15,0,0,0
+	risbhg	%r15,%r0,0,0,0
+	risbhg	%r4,%r5,6,7,8
+
+#CHECK: risblg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x51]
+#CHECK: risblg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x51]
+#CHECK: risblg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x51]
+#CHECK: risblg	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x51]
+#CHECK: risblg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x51]
+#CHECK: risblg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x51]
+#CHECK: risblg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x51]
+
+	risblg	%r0,%r0,0,0,0
+	risblg	%r0,%r0,0,0,63
+	risblg	%r0,%r0,0,255,0
+	risblg	%r0,%r0,255,0,0
+	risblg	%r0,%r15,0,0,0
+	risblg	%r15,%r0,0,0,0
+	risblg	%r4,%r5,6,7,8
+
+#CHECK: sgrk	%r0, %r0, %r0           # encoding: [0xb9,0xe9,0x00,0x00]
+#CHECK: sgrk	%r0, %r0, %r15          # encoding: [0xb9,0xe9,0xf0,0x00]
+#CHECK: sgrk	%r0, %r15, %r0          # encoding: [0xb9,0xe9,0x00,0x0f]
+#CHECK: sgrk	%r15, %r0, %r0          # encoding: [0xb9,0xe9,0x00,0xf0]
+#CHECK: sgrk	%r7, %r8, %r9           # encoding: [0xb9,0xe9,0x90,0x78]
+
+	sgrk	%r0,%r0,%r0
+	sgrk	%r0,%r0,%r15
+	sgrk	%r0,%r15,%r0
+	sgrk	%r15,%r0,%r0
+	sgrk	%r7,%r8,%r9
+
+#CHECK: slgrk	%r0, %r0, %r0           # encoding: [0xb9,0xeb,0x00,0x00]
+#CHECK: slgrk	%r0, %r0, %r15          # encoding: [0xb9,0xeb,0xf0,0x00]
+#CHECK: slgrk	%r0, %r15, %r0          # encoding: [0xb9,0xeb,0x00,0x0f]
+#CHECK: slgrk	%r15, %r0, %r0          # encoding: [0xb9,0xeb,0x00,0xf0]
+#CHECK: slgrk	%r7, %r8, %r9           # encoding: [0xb9,0xeb,0x90,0x78]
+
+	slgrk	%r0,%r0,%r0
+	slgrk	%r0,%r0,%r15
+	slgrk	%r0,%r15,%r0
+	slgrk	%r15,%r0,%r0
+	slgrk	%r7,%r8,%r9
+
+#CHECK: slrk	%r0, %r0, %r0           # encoding: [0xb9,0xfb,0x00,0x00]
+#CHECK: slrk	%r0, %r0, %r15          # encoding: [0xb9,0xfb,0xf0,0x00]
+#CHECK: slrk	%r0, %r15, %r0          # encoding: [0xb9,0xfb,0x00,0x0f]
+#CHECK: slrk	%r15, %r0, %r0          # encoding: [0xb9,0xfb,0x00,0xf0]
+#CHECK: slrk	%r7, %r8, %r9           # encoding: [0xb9,0xfb,0x90,0x78]
+
+	slrk	%r0,%r0,%r0
+	slrk	%r0,%r0,%r15
+	slrk	%r0,%r15,%r0
+	slrk	%r15,%r0,%r0
+	slrk	%r7,%r8,%r9
+
+#CHECK: sllk	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0xdf]
+#CHECK: sllk	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0xdf]
+#CHECK: sllk	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0xdf]
+#CHECK: sllk	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0xdf]
+#CHECK: sllk	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0xdf]
+#CHECK: sllk	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0xdf]
+#CHECK: sllk	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0xdf]
+#CHECK: sllk	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xdf]
+#CHECK: sllk	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0xdf]
+#CHECK: sllk	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xdf]
+#CHECK: sllk	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0xdf]
+#CHECK: sllk	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0xdf]
+
+	sllk	%r0,%r0,0
+	sllk	%r15,%r1,0
+	sllk	%r1,%r15,0
+	sllk	%r15,%r15,0
+	sllk	%r0,%r0,-524288
+	sllk	%r0,%r0,-1
+	sllk	%r0,%r0,1
+	sllk	%r0,%r0,524287
+	sllk	%r0,%r0,0(%r1)
+	sllk	%r0,%r0,0(%r15)
+	sllk	%r0,%r0,524287(%r1)
+	sllk	%r0,%r0,524287(%r15)
+
+#CHECK: srak	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0xdc]
+#CHECK: srak	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0xdc]
+#CHECK: srak	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0xdc]
+#CHECK: srak	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0xdc]
+#CHECK: srak	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0xdc]
+#CHECK: srak	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0xdc]
+#CHECK: srak	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0xdc]
+#CHECK: srak	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xdc]
+#CHECK: srak	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0xdc]
+#CHECK: srak	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xdc]
+#CHECK: srak	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0xdc]
+#CHECK: srak	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0xdc]
+
+	srak	%r0,%r0,0
+	srak	%r15,%r1,0
+	srak	%r1,%r15,0
+	srak	%r15,%r15,0
+	srak	%r0,%r0,-524288
+	srak	%r0,%r0,-1
+	srak	%r0,%r0,1
+	srak	%r0,%r0,524287
+	srak	%r0,%r0,0(%r1)
+	srak	%r0,%r0,0(%r15)
+	srak	%r0,%r0,524287(%r1)
+	srak	%r0,%r0,524287(%r15)
+
+#CHECK: srk	%r0, %r0, %r0           # encoding: [0xb9,0xf9,0x00,0x00]
+#CHECK: srk	%r0, %r0, %r15          # encoding: [0xb9,0xf9,0xf0,0x00]
+#CHECK: srk	%r0, %r15, %r0          # encoding: [0xb9,0xf9,0x00,0x0f]
+#CHECK: srk	%r15, %r0, %r0          # encoding: [0xb9,0xf9,0x00,0xf0]
+#CHECK: srk	%r7, %r8, %r9           # encoding: [0xb9,0xf9,0x90,0x78]
+
+	srk	%r0,%r0,%r0
+	srk	%r0,%r0,%r15
+	srk	%r0,%r15,%r0
+	srk	%r15,%r0,%r0
+	srk	%r7,%r8,%r9
+
+#CHECK: srlk	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0xde]
+#CHECK: srlk	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0xde]
+#CHECK: srlk	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0xde]
+#CHECK: srlk	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0xde]
+#CHECK: srlk	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0xde]
+#CHECK: srlk	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0xde]
+#CHECK: srlk	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0xde]
+#CHECK: srlk	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xde]
+#CHECK: srlk	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0xde]
+#CHECK: srlk	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xde]
+#CHECK: srlk	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0xde]
+#CHECK: srlk	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0xde]
+
+	srlk	%r0,%r0,0
+	srlk	%r15,%r1,0
+	srlk	%r1,%r15,0
+	srlk	%r15,%r15,0
+	srlk	%r0,%r0,-524288
+	srlk	%r0,%r0,-1
+	srlk	%r0,%r0,1
+	srlk	%r0,%r0,524287
+	srlk	%r0,%r0,0(%r1)
+	srlk	%r0,%r0,0(%r15)
+	srlk	%r0,%r0,524287(%r1)
+	srlk	%r0,%r0,524287(%r15)
+
+#CHECK: stch	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc3]
+#CHECK: stch	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc3]
+#CHECK: stch	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc3]
+#CHECK: stch	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc3]
+#CHECK: stch	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc3]
+#CHECK: stch	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc3]
+#CHECK: stch	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc3]
+#CHECK: stch	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc3]
+#CHECK: stch	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc3]
+#CHECK: stch	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc3]
+
+	stch	%r0, -524288
+	stch	%r0, -1
+	stch	%r0, 0
+	stch	%r0, 1
+	stch	%r0, 524287
+	stch	%r0, 0(%r1)
+	stch	%r0, 0(%r15)
+	stch	%r0, 524287(%r1,%r15)
+	stch	%r0, 524287(%r15,%r1)
+	stch	%r15, 0
+
+#CHECK: sthh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xc7]
+#CHECK: sthh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xc7]
+#CHECK: sthh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xc7]
+#CHECK: sthh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xc7]
+#CHECK: sthh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xc7]
+#CHECK: sthh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xc7]
+#CHECK: sthh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xc7]
+#CHECK: sthh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xc7]
+#CHECK: sthh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xc7]
+#CHECK: sthh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xc7]
+
+	sthh	%r0, -524288
+	sthh	%r0, -1
+	sthh	%r0, 0
+	sthh	%r0, 1
+	sthh	%r0, 524287
+	sthh	%r0, 0(%r1)
+	sthh	%r0, 0(%r15)
+	sthh	%r0, 524287(%r1,%r15)
+	sthh	%r0, 524287(%r15,%r1)
+	sthh	%r15, 0
+
+#CHECK: stfh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0xcb]
+#CHECK: stfh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0xcb]
+#CHECK: stfh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0xcb]
+#CHECK: stfh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0xcb]
+#CHECK: stfh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0xcb]
+#CHECK: stfh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0xcb]
+#CHECK: stfh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0xcb]
+#CHECK: stfh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0xcb]
+#CHECK: stfh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0xcb]
+#CHECK: stfh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0xcb]
+
+	stfh	%r0, -524288
+	stfh	%r0, -1
+	stfh	%r0, 0
+	stfh	%r0, 1
+	stfh	%r0, 524287
+	stfh	%r0, 0(%r1)
+	stfh	%r0, 0(%r15)
+	stfh	%r0, 524287(%r1,%r15)
+	stfh	%r0, 524287(%r15,%r1)
+	stfh	%r15, 0
+
+#CHECK: stoc	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xf3]
+#CHECK: stoc	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xf3]
+#CHECK: stoc	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xf3]
+#CHECK: stoc	%r0, 524287, 0          # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xf3]
+#CHECK: stoc	%r0, 0(%r1), 0          # encoding: [0xeb,0x00,0x10,0x00,0x00,0xf3]
+#CHECK: stoc	%r0, 0(%r15), 0         # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xf3]
+#CHECK: stoc	%r15, 0, 0              # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xf3]
+#CHECK: stoc	%r1, 4095(%r2), 3       # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xf3]
+
+	stoc	%r0,0,0
+	stoc	%r0,0,15
+	stoc	%r0,-524288,0
+	stoc	%r0,524287,0
+	stoc	%r0,0(%r1),0
+	stoc	%r0,0(%r15),0
+	stoc	%r15,0,0
+	stoc	%r1,4095(%r2),3
+
+#CHECK: stoco   %r1, 2(%r3)             # encoding: [0xeb,0x11,0x30,0x02,0x00,0xf3]
+#CHECK: stoch   %r1, 2(%r3)             # encoding: [0xeb,0x12,0x30,0x02,0x00,0xf3]
+#CHECK: stocnle %r1, 2(%r3)             # encoding: [0xeb,0x13,0x30,0x02,0x00,0xf3]
+#CHECK: stocl   %r1, 2(%r3)             # encoding: [0xeb,0x14,0x30,0x02,0x00,0xf3]
+#CHECK: stocnhe %r1, 2(%r3)             # encoding: [0xeb,0x15,0x30,0x02,0x00,0xf3]
+#CHECK: stoclh  %r1, 2(%r3)             # encoding: [0xeb,0x16,0x30,0x02,0x00,0xf3]
+#CHECK: stocne  %r1, 2(%r3)             # encoding: [0xeb,0x17,0x30,0x02,0x00,0xf3]
+#CHECK: stoce   %r1, 2(%r3)             # encoding: [0xeb,0x18,0x30,0x02,0x00,0xf3]
+#CHECK: stocnlh %r1, 2(%r3)             # encoding: [0xeb,0x19,0x30,0x02,0x00,0xf3]
+#CHECK: stoche  %r1, 2(%r3)             # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xf3]
+#CHECK: stocnl  %r1, 2(%r3)             # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xf3]
+#CHECK: stocle  %r1, 2(%r3)             # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xf3]
+#CHECK: stocnh  %r1, 2(%r3)             # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xf3]
+#CHECK: stocno  %r1, 2(%r3)             # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xf3]
+
+	stoco   %r1,2(%r3)
+	stoch   %r1,2(%r3)
+	stocnle %r1,2(%r3)
+	stocl   %r1,2(%r3)
+	stocnhe %r1,2(%r3)
+	stoclh  %r1,2(%r3)
+	stocne  %r1,2(%r3)
+	stoce   %r1,2(%r3)
+	stocnlh %r1,2(%r3)
+	stoche  %r1,2(%r3)
+	stocnl  %r1,2(%r3)
+	stocle  %r1,2(%r3)
+	stocnh  %r1,2(%r3)
+	stocno  %r1,2(%r3)
+
+#CHECK: stocg	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe3]
+#CHECK: stocg	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe3]
+#CHECK: stocg	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe3]
+#CHECK: stocg	%r0, 524287, 0          # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe3]
+#CHECK: stocg	%r0, 0(%r1), 0          # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe3]
+#CHECK: stocg	%r0, 0(%r15), 0         # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe3]
+#CHECK: stocg	%r15, 0, 0              # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe3]
+#CHECK: stocg	%r1, 4095(%r2), 3       # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe3]
+
+	stocg	%r0,0,0
+	stocg	%r0,0,15
+	stocg	%r0,-524288,0
+	stocg	%r0,524287,0
+	stocg	%r0,0(%r1),0
+	stocg	%r0,0(%r15),0
+	stocg	%r15,0,0
+	stocg	%r1,4095(%r2),3
+
+#CHECK: stocgo   %r1, 2(%r3)            # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe3]
+#CHECK: stocgh   %r1, 2(%r3)            # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe3]
+#CHECK: stocgnle %r1, 2(%r3)            # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe3]
+#CHECK: stocgl   %r1, 2(%r3)            # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe3]
+#CHECK: stocgnhe %r1, 2(%r3)            # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe3]
+#CHECK: stocglh  %r1, 2(%r3)            # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe3]
+#CHECK: stocgne  %r1, 2(%r3)            # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe3]
+#CHECK: stocge   %r1, 2(%r3)            # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe3]
+#CHECK: stocgnlh %r1, 2(%r3)            # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe3]
+#CHECK: stocghe  %r1, 2(%r3)            # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe3]
+#CHECK: stocgnl  %r1, 2(%r3)            # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe3]
+#CHECK: stocgle  %r1, 2(%r3)            # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe3]
+#CHECK: stocgnh  %r1, 2(%r3)            # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe3]
+#CHECK: stocgno  %r1, 2(%r3)            # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe3]
+
+	stocgo   %r1,2(%r3)
+	stocgh   %r1,2(%r3)
+	stocgnle %r1,2(%r3)
+	stocgl   %r1,2(%r3)
+	stocgnhe %r1,2(%r3)
+	stocglh  %r1,2(%r3)
+	stocgne  %r1,2(%r3)
+	stocge   %r1,2(%r3)
+	stocgnlh %r1,2(%r3)
+	stocghe  %r1,2(%r3)
+	stocgnl  %r1,2(%r3)
+	stocgle  %r1,2(%r3)
+	stocgnh  %r1,2(%r3)
+	stocgno  %r1,2(%r3)
+
+#CHECK: xgrk	%r0, %r0, %r0           # encoding: [0xb9,0xe7,0x00,0x00]
+#CHECK: xgrk	%r0, %r0, %r15          # encoding: [0xb9,0xe7,0xf0,0x00]
+#CHECK: xgrk	%r0, %r15, %r0          # encoding: [0xb9,0xe7,0x00,0x0f]
+#CHECK: xgrk	%r15, %r0, %r0          # encoding: [0xb9,0xe7,0x00,0xf0]
+#CHECK: xgrk	%r7, %r8, %r9           # encoding: [0xb9,0xe7,0x90,0x78]
+
+	xgrk	%r0,%r0,%r0
+	xgrk	%r0,%r0,%r15
+	xgrk	%r0,%r15,%r0
+	xgrk	%r15,%r0,%r0
+	xgrk	%r7,%r8,%r9
+
+#CHECK: xrk	%r0, %r0, %r0           # encoding: [0xb9,0xf7,0x00,0x00]
+#CHECK: xrk	%r0, %r0, %r15          # encoding: [0xb9,0xf7,0xf0,0x00]
+#CHECK: xrk	%r0, %r15, %r0          # encoding: [0xb9,0xf7,0x00,0x0f]
+#CHECK: xrk	%r15, %r0, %r0          # encoding: [0xb9,0xf7,0x00,0xf0]
+#CHECK: xrk	%r7, %r8, %r9           # encoding: [0xb9,0xf7,0x90,0x78]
+
+	xrk	%r0,%r0,%r0
+	xrk	%r0,%r0,%r15
+	xrk	%r0,%r15,%r0
+	xrk	%r15,%r0,%r0
+	xrk	%r7,%r8,%r9
diff --git a/test/MC/SystemZ/insn-good.s b/test/MC/SystemZ/insn-good.s
new file mode 100644
index 000000000000..23bd68a2f5d9
--- /dev/null
+++ b/test/MC/SystemZ/insn-good.s
@@ -0,0 +1,8580 @@
+# For z10 and above.
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: a	%r0, 0                  # encoding: [0x5a,0x00,0x00,0x00]
+#CHECK: a	%r0, 4095               # encoding: [0x5a,0x00,0x0f,0xff]
+#CHECK: a	%r0, 0(%r1)             # encoding: [0x5a,0x00,0x10,0x00]
+#CHECK: a	%r0, 0(%r15)            # encoding: [0x5a,0x00,0xf0,0x00]
+#CHECK: a	%r0, 4095(%r1,%r15)     # encoding: [0x5a,0x01,0xff,0xff]
+#CHECK: a	%r0, 4095(%r15,%r1)     # encoding: [0x5a,0x0f,0x1f,0xff]
+#CHECK: a	%r15, 0                 # encoding: [0x5a,0xf0,0x00,0x00]
+
+	a	%r0, 0
+	a	%r0, 4095
+	a	%r0, 0(%r1)
+	a	%r0, 0(%r15)
+	a	%r0, 4095(%r1,%r15)
+	a	%r0, 4095(%r15,%r1)
+	a	%r15, 0
+
+#CHECK: adb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1a]
+#CHECK: adb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1a]
+#CHECK: adb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1a]
+#CHECK: adb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1a]
+#CHECK: adb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1a]
+#CHECK: adb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1a]
+#CHECK: adb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1a]
+
+	adb	%f0, 0
+	adb	%f0, 4095
+	adb	%f0, 0(%r1)
+	adb	%f0, 0(%r15)
+	adb	%f0, 4095(%r1,%r15)
+	adb	%f0, 4095(%r15,%r1)
+	adb	%f15, 0
+
+#CHECK: adbr	%f0, %f0                # encoding: [0xb3,0x1a,0x00,0x00]
+#CHECK: adbr	%f0, %f15               # encoding: [0xb3,0x1a,0x00,0x0f]
+#CHECK: adbr	%f7, %f8                # encoding: [0xb3,0x1a,0x00,0x78]
+#CHECK: adbr	%f15, %f0               # encoding: [0xb3,0x1a,0x00,0xf0]
+
+	adbr	%f0, %f0
+	adbr	%f0, %f15
+	adbr	%f7, %f8
+	adbr	%f15, %f0
+
+#CHECK: aeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: aeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0a]
+#CHECK: aeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0a]
+#CHECK: aeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: aeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0a]
+#CHECK: aeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0a]
+#CHECK: aeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0a]
+
+	aeb	%f0, 0
+	aeb	%f0, 4095
+	aeb	%f0, 0(%r1)
+	aeb	%f0, 0(%r15)
+	aeb	%f0, 4095(%r1,%r15)
+	aeb	%f0, 4095(%r15,%r1)
+	aeb	%f15, 0
+
+#CHECK: aebr	%f0, %f0                # encoding: [0xb3,0x0a,0x00,0x00]
+#CHECK: aebr	%f0, %f15               # encoding: [0xb3,0x0a,0x00,0x0f]
+#CHECK: aebr	%f7, %f8                # encoding: [0xb3,0x0a,0x00,0x78]
+#CHECK: aebr	%f15, %f0               # encoding: [0xb3,0x0a,0x00,0xf0]
+
+	aebr	%f0, %f0
+	aebr	%f0, %f15
+	aebr	%f7, %f8
+	aebr	%f15, %f0
+
+#CHECK: afi	%r0, -2147483648        # encoding: [0xc2,0x09,0x80,0x00,0x00,0x00]
+#CHECK: afi	%r0, -1                 # encoding: [0xc2,0x09,0xff,0xff,0xff,0xff]
+#CHECK: afi	%r0, 0                  # encoding: [0xc2,0x09,0x00,0x00,0x00,0x00]
+#CHECK: afi	%r0, 1                  # encoding: [0xc2,0x09,0x00,0x00,0x00,0x01]
+#CHECK: afi	%r0, 2147483647         # encoding: [0xc2,0x09,0x7f,0xff,0xff,0xff]
+#CHECK: afi	%r15, 0                 # encoding: [0xc2,0xf9,0x00,0x00,0x00,0x00]
+
+	afi	%r0, -1 << 31
+	afi	%r0, -1
+	afi	%r0, 0
+	afi	%r0, 1
+	afi	%r0, (1 << 31) - 1
+	afi	%r15, 0
+
+#CHECK: ag	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x08]
+#CHECK: ag	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x08]
+#CHECK: ag	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x08]
+#CHECK: ag	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x08]
+#CHECK: ag	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x08]
+#CHECK: ag	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x08]
+#CHECK: ag	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x08]
+#CHECK: ag	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x08]
+#CHECK: ag	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x08]
+#CHECK: ag	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x08]
+
+	ag	%r0, -524288
+	ag	%r0, -1
+	ag	%r0, 0
+	ag	%r0, 1
+	ag	%r0, 524287
+	ag	%r0, 0(%r1)
+	ag	%r0, 0(%r15)
+	ag	%r0, 524287(%r1,%r15)
+	ag	%r0, 524287(%r15,%r1)
+	ag	%r15, 0
+
+#CHECK: agf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x18]
+#CHECK: agf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x18]
+#CHECK: agf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x18]
+#CHECK: agf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x18]
+#CHECK: agf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x18]
+#CHECK: agf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x18]
+#CHECK: agf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x18]
+#CHECK: agf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x18]
+#CHECK: agf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x18]
+#CHECK: agf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x18]
+
+	agf	%r0, -524288
+	agf	%r0, -1
+	agf	%r0, 0
+	agf	%r0, 1
+	agf	%r0, 524287
+	agf	%r0, 0(%r1)
+	agf	%r0, 0(%r15)
+	agf	%r0, 524287(%r1,%r15)
+	agf	%r0, 524287(%r15,%r1)
+	agf	%r15, 0
+
+#CHECK: agfi	%r0, -2147483648        # encoding: [0xc2,0x08,0x80,0x00,0x00,0x00]
+#CHECK: agfi	%r0, -1                 # encoding: [0xc2,0x08,0xff,0xff,0xff,0xff]
+#CHECK: agfi	%r0, 0                  # encoding: [0xc2,0x08,0x00,0x00,0x00,0x00]
+#CHECK: agfi	%r0, 1                  # encoding: [0xc2,0x08,0x00,0x00,0x00,0x01]
+#CHECK: agfi	%r0, 2147483647         # encoding: [0xc2,0x08,0x7f,0xff,0xff,0xff]
+#CHECK: agfi	%r15, 0                 # encoding: [0xc2,0xf8,0x00,0x00,0x00,0x00]
+
+	agfi	%r0, -1 << 31
+	agfi	%r0, -1
+	agfi	%r0, 0
+	agfi	%r0, 1
+	agfi	%r0, (1 << 31) - 1
+	agfi	%r15, 0
+
+#CHECK: agfr	%r0, %r0                # encoding: [0xb9,0x18,0x00,0x00]
+#CHECK: agfr	%r0, %r15               # encoding: [0xb9,0x18,0x00,0x0f]
+#CHECK: agfr	%r15, %r0               # encoding: [0xb9,0x18,0x00,0xf0]
+#CHECK: agfr	%r7, %r8                # encoding: [0xb9,0x18,0x00,0x78]
+
+	agfr	%r0,%r0
+	agfr	%r0,%r15
+	agfr	%r15,%r0
+	agfr	%r7,%r8
+
+#CHECK: aghi	%r0, -32768             # encoding: [0xa7,0x0b,0x80,0x00]
+#CHECK: aghi	%r0, -1                 # encoding: [0xa7,0x0b,0xff,0xff]
+#CHECK: aghi	%r0, 0                  # encoding: [0xa7,0x0b,0x00,0x00]
+#CHECK: aghi	%r0, 1                  # encoding: [0xa7,0x0b,0x00,0x01]
+#CHECK: aghi	%r0, 32767              # encoding: [0xa7,0x0b,0x7f,0xff]
+#CHECK: aghi	%r15, 0                 # encoding: [0xa7,0xfb,0x00,0x00]
+
+	aghi	%r0, -32768
+	aghi	%r0, -1
+	aghi	%r0, 0
+	aghi	%r0, 1
+	aghi	%r0, 32767
+	aghi	%r15, 0
+
+#CHECK: agr	%r0, %r0                # encoding: [0xb9,0x08,0x00,0x00]
+#CHECK: agr	%r0, %r15               # encoding: [0xb9,0x08,0x00,0x0f]
+#CHECK: agr	%r15, %r0               # encoding: [0xb9,0x08,0x00,0xf0]
+#CHECK: agr	%r7, %r8                # encoding: [0xb9,0x08,0x00,0x78]
+
+	agr	%r0,%r0
+	agr	%r0,%r15
+	agr	%r15,%r0
+	agr	%r7,%r8
+
+#CHECK: agsi	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x7a]
+#CHECK: agsi	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x7a]
+#CHECK: agsi	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x7a]
+#CHECK: agsi	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x7a]
+#CHECK: agsi	0, -128                 # encoding: [0xeb,0x80,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0, -1                   # encoding: [0xeb,0xff,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0, 1                    # encoding: [0xeb,0x01,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0, 127                  # encoding: [0xeb,0x7f,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x7a]
+#CHECK: agsi	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x7a]
+#CHECK: agsi	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x7a]
+#CHECK: agsi	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x7a]
+
+	agsi	-524288, 0
+	agsi	-1, 0
+	agsi	0, 0
+	agsi	1, 0
+	agsi	524287, 0
+	agsi	0, -128
+	agsi	0, -1
+	agsi	0, 1
+	agsi	0, 127
+	agsi	0(%r1), 42
+	agsi	0(%r15), 42
+	agsi	524287(%r1), 42
+	agsi	524287(%r15), 42
+
+#CHECK: ah	%r0, 0                  # encoding: [0x4a,0x00,0x00,0x00]
+#CHECK: ah	%r0, 4095               # encoding: [0x4a,0x00,0x0f,0xff]
+#CHECK: ah	%r0, 0(%r1)             # encoding: [0x4a,0x00,0x10,0x00]
+#CHECK: ah	%r0, 0(%r15)            # encoding: [0x4a,0x00,0xf0,0x00]
+#CHECK: ah	%r0, 4095(%r1,%r15)     # encoding: [0x4a,0x01,0xff,0xff]
+#CHECK: ah	%r0, 4095(%r15,%r1)     # encoding: [0x4a,0x0f,0x1f,0xff]
+#CHECK: ah	%r15, 0                 # encoding: [0x4a,0xf0,0x00,0x00]
+
+	ah	%r0, 0
+	ah	%r0, 4095
+	ah	%r0, 0(%r1)
+	ah	%r0, 0(%r15)
+	ah	%r0, 4095(%r1,%r15)
+	ah	%r0, 4095(%r15,%r1)
+	ah	%r15, 0
+
+#CHECK: ahi	%r0, -32768             # encoding: [0xa7,0x0a,0x80,0x00]
+#CHECK: ahi	%r0, -1                 # encoding: [0xa7,0x0a,0xff,0xff]
+#CHECK: ahi	%r0, 0                  # encoding: [0xa7,0x0a,0x00,0x00]
+#CHECK: ahi	%r0, 1                  # encoding: [0xa7,0x0a,0x00,0x01]
+#CHECK: ahi	%r0, 32767              # encoding: [0xa7,0x0a,0x7f,0xff]
+#CHECK: ahi	%r15, 0                 # encoding: [0xa7,0xfa,0x00,0x00]
+
+	ahi	%r0, -32768
+	ahi	%r0, -1
+	ahi	%r0, 0
+	ahi	%r0, 1
+	ahi	%r0, 32767
+	ahi	%r15, 0
+
+#CHECK: ahy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x7a]
+#CHECK: ahy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x7a]
+#CHECK: ahy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x7a]
+#CHECK: ahy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x7a]
+#CHECK: ahy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x7a]
+#CHECK: ahy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x7a]
+#CHECK: ahy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x7a]
+#CHECK: ahy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x7a]
+#CHECK: ahy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x7a]
+#CHECK: ahy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x7a]
+
+	ahy	%r0, -524288
+	ahy	%r0, -1
+	ahy	%r0, 0
+	ahy	%r0, 1
+	ahy	%r0, 524287
+	ahy	%r0, 0(%r1)
+	ahy	%r0, 0(%r15)
+	ahy	%r0, 524287(%r1,%r15)
+	ahy	%r0, 524287(%r15,%r1)
+	ahy	%r15, 0
+
+#CHECK: al	%r0, 0                  # encoding: [0x5e,0x00,0x00,0x00]
+#CHECK: al	%r0, 4095               # encoding: [0x5e,0x00,0x0f,0xff]
+#CHECK: al	%r0, 0(%r1)             # encoding: [0x5e,0x00,0x10,0x00]
+#CHECK: al	%r0, 0(%r15)            # encoding: [0x5e,0x00,0xf0,0x00]
+#CHECK: al	%r0, 4095(%r1,%r15)     # encoding: [0x5e,0x01,0xff,0xff]
+#CHECK: al	%r0, 4095(%r15,%r1)     # encoding: [0x5e,0x0f,0x1f,0xff]
+#CHECK: al	%r15, 0                 # encoding: [0x5e,0xf0,0x00,0x00]
+
+	al	%r0, 0
+	al	%r0, 4095
+	al	%r0, 0(%r1)
+	al	%r0, 0(%r15)
+	al	%r0, 4095(%r1,%r15)
+	al	%r0, 4095(%r15,%r1)
+	al	%r15, 0
+
+#CHECK: alc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x98]
+#CHECK: alc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x98]
+#CHECK: alc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x98]
+#CHECK: alc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x98]
+#CHECK: alc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x98]
+#CHECK: alc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x98]
+#CHECK: alc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x98]
+#CHECK: alc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x98]
+#CHECK: alc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x98]
+#CHECK: alc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x98]
+
+	alc	%r0, -524288
+	alc	%r0, -1
+	alc	%r0, 0
+	alc	%r0, 1
+	alc	%r0, 524287
+	alc	%r0, 0(%r1)
+	alc	%r0, 0(%r15)
+	alc	%r0, 524287(%r1,%r15)
+	alc	%r0, 524287(%r15,%r1)
+	alc	%r15, 0
+
+#CHECK: alcg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x88]
+#CHECK: alcg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x88]
+#CHECK: alcg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x88]
+#CHECK: alcg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x88]
+#CHECK: alcg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x88]
+#CHECK: alcg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x88]
+#CHECK: alcg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x88]
+#CHECK: alcg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x88]
+#CHECK: alcg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x88]
+#CHECK: alcg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x88]
+
+	alcg	%r0, -524288
+	alcg	%r0, -1
+	alcg	%r0, 0
+	alcg	%r0, 1
+	alcg	%r0, 524287
+	alcg	%r0, 0(%r1)
+	alcg	%r0, 0(%r15)
+	alcg	%r0, 524287(%r1,%r15)
+	alcg	%r0, 524287(%r15,%r1)
+	alcg	%r15, 0
+
+#CHECK: alcgr	%r0, %r0                # encoding: [0xb9,0x88,0x00,0x00]
+#CHECK: alcgr	%r0, %r15               # encoding: [0xb9,0x88,0x00,0x0f]
+#CHECK: alcgr	%r15, %r0               # encoding: [0xb9,0x88,0x00,0xf0]
+#CHECK: alcgr	%r7, %r8                # encoding: [0xb9,0x88,0x00,0x78]
+
+	alcgr	%r0,%r0
+	alcgr	%r0,%r15
+	alcgr	%r15,%r0
+	alcgr	%r7,%r8
+
+#CHECK: alcr	%r0, %r0                # encoding: [0xb9,0x98,0x00,0x00]
+#CHECK: alcr	%r0, %r15               # encoding: [0xb9,0x98,0x00,0x0f]
+#CHECK: alcr	%r15, %r0               # encoding: [0xb9,0x98,0x00,0xf0]
+#CHECK: alcr	%r7, %r8                # encoding: [0xb9,0x98,0x00,0x78]
+
+	alcr	%r0,%r0
+	alcr	%r0,%r15
+	alcr	%r15,%r0
+	alcr	%r7,%r8
+
+#CHECK: alfi	%r0, 0                  # encoding: [0xc2,0x0b,0x00,0x00,0x00,0x00]
+#CHECK: alfi	%r0, 4294967295         # encoding: [0xc2,0x0b,0xff,0xff,0xff,0xff]
+#CHECK: alfi	%r15, 0                 # encoding: [0xc2,0xfb,0x00,0x00,0x00,0x00]
+
+	alfi	%r0, 0
+	alfi	%r0, (1 << 32) - 1
+	alfi	%r15, 0
+
+#CHECK: alg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0a]
+#CHECK: alg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0a]
+#CHECK: alg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: alg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0a]
+#CHECK: alg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0a]
+#CHECK: alg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0a]
+#CHECK: alg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: alg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0a]
+#CHECK: alg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0a]
+#CHECK: alg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0a]
+
+	alg	%r0, -524288
+	alg	%r0, -1
+	alg	%r0, 0
+	alg	%r0, 1
+	alg	%r0, 524287
+	alg	%r0, 0(%r1)
+	alg	%r0, 0(%r15)
+	alg	%r0, 524287(%r1,%r15)
+	alg	%r0, 524287(%r15,%r1)
+	alg	%r15, 0
+
+#CHECK: algf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1a]
+#CHECK: algf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1a]
+#CHECK: algf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1a]
+#CHECK: algf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1a]
+#CHECK: algf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1a]
+#CHECK: algf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1a]
+#CHECK: algf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1a]
+#CHECK: algf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1a]
+#CHECK: algf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1a]
+#CHECK: algf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1a]
+
+	algf	%r0, -524288
+	algf	%r0, -1
+	algf	%r0, 0
+	algf	%r0, 1
+	algf	%r0, 524287
+	algf	%r0, 0(%r1)
+	algf	%r0, 0(%r15)
+	algf	%r0, 524287(%r1,%r15)
+	algf	%r0, 524287(%r15,%r1)
+	algf	%r15, 0
+
+#CHECK: algfi	%r0, 0                  # encoding: [0xc2,0x0a,0x00,0x00,0x00,0x00]
+#CHECK: algfi	%r0, 4294967295         # encoding: [0xc2,0x0a,0xff,0xff,0xff,0xff]
+#CHECK: algfi	%r15, 0                 # encoding: [0xc2,0xfa,0x00,0x00,0x00,0x00]
+
+	algfi	%r0, 0
+	algfi	%r0, (1 << 32) - 1
+	algfi	%r15, 0
+
+#CHECK: algfr	%r0, %r0                # encoding: [0xb9,0x1a,0x00,0x00]
+#CHECK: algfr	%r0, %r15               # encoding: [0xb9,0x1a,0x00,0x0f]
+#CHECK: algfr	%r15, %r0               # encoding: [0xb9,0x1a,0x00,0xf0]
+#CHECK: algfr	%r7, %r8                # encoding: [0xb9,0x1a,0x00,0x78]
+
+	algfr	%r0,%r0
+	algfr	%r0,%r15
+	algfr	%r15,%r0
+	algfr	%r7,%r8
+
+#CHECK: algr	%r0, %r0                # encoding: [0xb9,0x0a,0x00,0x00]
+#CHECK: algr	%r0, %r15               # encoding: [0xb9,0x0a,0x00,0x0f]
+#CHECK: algr	%r15, %r0               # encoding: [0xb9,0x0a,0x00,0xf0]
+#CHECK: algr	%r7, %r8                # encoding: [0xb9,0x0a,0x00,0x78]
+
+	algr	%r0,%r0
+	algr	%r0,%r15
+	algr	%r15,%r0
+	algr	%r7,%r8
+
+#CHECK: alr	%r0, %r0                # encoding: [0x1e,0x00]
+#CHECK: alr	%r0, %r15               # encoding: [0x1e,0x0f]
+#CHECK: alr	%r15, %r0               # encoding: [0x1e,0xf0]
+#CHECK: alr	%r7, %r8                # encoding: [0x1e,0x78]
+
+	alr	%r0,%r0
+	alr	%r0,%r15
+	alr	%r15,%r0
+	alr	%r7,%r8
+
+#CHECK: aly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5e]
+#CHECK: aly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5e]
+#CHECK: aly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5e]
+#CHECK: aly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5e]
+#CHECK: aly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5e]
+#CHECK: aly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5e]
+#CHECK: aly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5e]
+#CHECK: aly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5e]
+#CHECK: aly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5e]
+#CHECK: aly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5e]
+
+	aly	%r0, -524288
+	aly	%r0, -1
+	aly	%r0, 0
+	aly	%r0, 1
+	aly	%r0, 524287
+	aly	%r0, 0(%r1)
+	aly	%r0, 0(%r15)
+	aly	%r0, 524287(%r1,%r15)
+	aly	%r0, 524287(%r15,%r1)
+	aly	%r15, 0
+
+#CHECK: ar	%r0, %r0                # encoding: [0x1a,0x00]
+#CHECK: ar	%r0, %r15               # encoding: [0x1a,0x0f]
+#CHECK: ar	%r15, %r0               # encoding: [0x1a,0xf0]
+#CHECK: ar	%r7, %r8                # encoding: [0x1a,0x78]
+
+	ar	%r0,%r0
+	ar	%r0,%r15
+	ar	%r15,%r0
+	ar	%r7,%r8
+
+#CHECK: asi	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x6a]
+#CHECK: asi	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x6a]
+#CHECK: asi	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x6a]
+#CHECK: asi	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x6a]
+#CHECK: asi	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x6a]
+#CHECK: asi	0, -128                 # encoding: [0xeb,0x80,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0, -1                   # encoding: [0xeb,0xff,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0, 1                    # encoding: [0xeb,0x01,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0, 127                  # encoding: [0xeb,0x7f,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x6a]
+#CHECK: asi	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x6a]
+#CHECK: asi	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x6a]
+#CHECK: asi	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x6a]
+
+	asi	-524288, 0
+	asi	-1, 0
+	asi	0, 0
+	asi	1, 0
+	asi	524287, 0
+	asi	0, -128
+	asi	0, -1
+	asi	0, 1
+	asi	0, 127
+	asi	0(%r1), 42
+	asi	0(%r15), 42
+	asi	524287(%r1), 42
+	asi	524287(%r15), 42
+
+#CHECK: axbr	%f0, %f0                # encoding: [0xb3,0x4a,0x00,0x00]
+#CHECK: axbr	%f0, %f13               # encoding: [0xb3,0x4a,0x00,0x0d]
+#CHECK: axbr	%f8, %f8                # encoding: [0xb3,0x4a,0x00,0x88]
+#CHECK: axbr	%f13, %f0               # encoding: [0xb3,0x4a,0x00,0xd0]
+
+	axbr	%f0, %f0
+	axbr	%f0, %f13
+	axbr	%f8, %f8
+	axbr	%f13, %f0
+
+#CHECK: ay	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5a]
+#CHECK: ay	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5a]
+#CHECK: ay	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5a]
+#CHECK: ay	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5a]
+#CHECK: ay	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5a]
+#CHECK: ay	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5a]
+#CHECK: ay	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5a]
+#CHECK: ay	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5a]
+#CHECK: ay	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5a]
+#CHECK: ay	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5a]
+
+	ay	%r0, -524288
+	ay	%r0, -1
+	ay	%r0, 0
+	ay	%r0, 1
+	ay	%r0, 524287
+	ay	%r0, 0(%r1)
+	ay	%r0, 0(%r15)
+	ay	%r0, 524287(%r1,%r15)
+	ay	%r0, 524287(%r15,%r1)
+	ay	%r15, 0
+
+#CHECK: basr	%r0, %r1                # encoding: [0x0d,0x01]
+#CHECK: basr	%r0, %r15               # encoding: [0x0d,0x0f]
+#CHECK: basr	%r14, %r9               # encoding: [0x0d,0xe9]
+#CHECK: basr	%r15, %r1               # encoding: [0x0d,0xf1]
+
+	basr	%r0,%r1
+	basr	%r0,%r15
+	basr	%r14,%r9
+	basr	%r15,%r1
+
+#CHECK: bcr	0, %r0			# encoding: [0x07,0x00]
+#CHECK:	bcr	0, %r15			# encoding: [0x07,0x0f]
+
+	bcr	0, %r0
+	bcr	0, %r15
+
+#CHECK:	bcr	1, %r7			# encoding: [0x07,0x17]
+#CHECK:	bor	%r15			# encoding: [0x07,0x1f]
+
+	bcr	1, %r7
+	bor	%r15
+
+#CHECK:	bcr	2, %r7			# encoding: [0x07,0x27]
+#CHECK:	bhr	%r15			# encoding: [0x07,0x2f]
+
+	bcr	2, %r7
+	bhr	%r15
+
+#CHECK:	bcr	3, %r7			# encoding: [0x07,0x37]
+#CHECK:	bnler	%r15			# encoding: [0x07,0x3f]
+
+	bcr	3, %r7
+	bnler	%r15
+
+#CHECK:	bcr	4, %r7			# encoding: [0x07,0x47]
+#CHECK:	blr	%r15			# encoding: [0x07,0x4f]
+
+	bcr	4, %r7
+	blr	%r15
+
+#CHECK:	bcr	5, %r7			# encoding: [0x07,0x57]
+#CHECK:	bnher	%r15			# encoding: [0x07,0x5f]
+
+	bcr	5, %r7
+	bnher	%r15
+
+#CHECK:	bcr	6, %r7			# encoding: [0x07,0x67]
+#CHECK:	blhr	%r15			# encoding: [0x07,0x6f]
+
+	bcr	6, %r7
+	blhr	%r15
+
+#CHECK:	bcr	7, %r7			# encoding: [0x07,0x77]
+#CHECK:	bner	%r15			# encoding: [0x07,0x7f]
+
+	bcr	7, %r7
+	bner	%r15
+
+#CHECK:	bcr	8, %r7			# encoding: [0x07,0x87]
+#CHECK:	ber	%r15			# encoding: [0x07,0x8f]
+
+	bcr	8, %r7
+	ber	%r15
+
+#CHECK:	bcr	9, %r7			# encoding: [0x07,0x97]
+#CHECK:	bnlhr	%r15			# encoding: [0x07,0x9f]
+
+	bcr	9, %r7
+	bnlhr	%r15
+
+#CHECK:	bcr	10, %r7			# encoding: [0x07,0xa7]
+#CHECK:	bher	%r15			# encoding: [0x07,0xaf]
+
+	bcr	10, %r7
+	bher	%r15
+
+#CHECK:	bcr	11, %r7			# encoding: [0x07,0xb7]
+#CHECK:	bnlr	%r15			# encoding: [0x07,0xbf]
+
+	bcr	11, %r7
+	bnlr	%r15
+
+#CHECK:	bcr	12, %r7			# encoding: [0x07,0xc7]
+#CHECK:	bler	%r15			# encoding: [0x07,0xcf]
+
+	bcr	12, %r7
+	bler	%r15
+
+#CHECK:	bcr	13, %r7			# encoding: [0x07,0xd7]
+#CHECK:	bnhr	%r15			# encoding: [0x07,0xdf]
+
+	bcr	13, %r7
+	bnhr	%r15
+
+#CHECK:	bcr	14, %r7			# encoding: [0x07,0xe7]
+#CHECK:	bnor	%r15			# encoding: [0x07,0xef]
+
+	bcr	14, %r7
+	bnor	%r15
+
+#CHECK:	bcr	15, %r7			# encoding: [0x07,0xf7]
+#CHECK: br	%r1                     # encoding: [0x07,0xf1]
+#CHECK: br	%r14                    # encoding: [0x07,0xfe]
+#CHECK: br	%r15                    # encoding: [0x07,0xff]
+
+	bcr	15, %r7
+	br	%r1
+	br	%r14
+	br	%r15
+
+#CHECK: bras	%r0, .[[LAB:L.*]]-65536	# encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	bras	%r0, -0x10000
+#CHECK: bras	%r0, .[[LAB:L.*]]-2	# encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	bras	%r0, -2
+#CHECK: bras	%r0, .[[LAB:L.*]]	# encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	bras	%r0, 0
+#CHECK: bras	%r0, .[[LAB:L.*]]+65534	# encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	bras	%r0, 0xfffe
+
+#CHECK: bras	%r0, foo                # encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r14, foo               # encoding: [0xa7,0xe5,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r15, foo               # encoding: [0xa7,0xf5,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	bras	%r0,foo
+	bras	%r14,foo
+	bras	%r15,foo
+
+#CHECK: bras	%r0, bar+100                # encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r14, bar+100               # encoding: [0xa7,0xe5,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r15, bar+100               # encoding: [0xa7,0xf5,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	bras	%r0,bar+100
+	bras	%r14,bar+100
+	bras	%r15,bar+100
+
+#CHECK: bras	%r0, bar@PLT                # encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r14, bar@PLT               # encoding: [0xa7,0xe5,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r15, bar@PLT               # encoding: [0xa7,0xf5,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	bras	%r0,bar@PLT
+	bras	%r14,bar@PLT
+	bras	%r15,bar@PLT
+
+#CHECK: brasl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	brasl	%r0, -0x100000000
+#CHECK: brasl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	brasl	%r0, -2
+#CHECK: brasl	%r0, .[[LAB:L.*]]	# encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	brasl	%r0, 0
+#CHECK: brasl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	brasl	%r0, 0xfffffffe
+
+#CHECK: brasl	%r0, foo                # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r14, foo               # encoding: [0xc0,0xe5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r15, foo               # encoding: [0xc0,0xf5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brasl	%r0,foo
+	brasl	%r14,foo
+	brasl	%r15,foo
+
+#CHECK: brasl	%r0, bar+100                # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r14, bar+100               # encoding: [0xc0,0xe5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r15, bar+100               # encoding: [0xc0,0xf5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	brasl	%r0,bar+100
+	brasl	%r14,bar+100
+	brasl	%r15,bar+100
+
+#CHECK: brasl	%r0, bar@PLT                # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r14, bar@PLT               # encoding: [0xc0,0xe5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r15, bar@PLT               # encoding: [0xc0,0xf5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	brasl	%r0,bar@PLT
+	brasl	%r14,bar@PLT
+	brasl	%r15,bar@PLT
+
+#CHECK: brc	0, .[[LAB:L.*]]-65536	# encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	brc	0, -0x10000
+#CHECK: brc	0, .[[LAB:L.*]]-2	# encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	brc	0, -2
+#CHECK: brc	0, .[[LAB:L.*]]		# encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	brc	0, 0
+#CHECK: brc	0, .[[LAB:L.*]]+65534	# encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	brc	0, 0xfffe
+
+#CHECK: brc	0, foo                  # encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	0, foo
+
+#CHECK: brc	1, foo                  # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jo	foo                     # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	1, foo
+	jo	foo
+
+#CHECK: brc	2, foo                  # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jh	foo                     # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	2, foo
+	jh	foo
+
+#CHECK: brc	3, foo                  # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnle	foo                     # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	3, foo
+	jnle	foo
+
+#CHECK: brc	4, foo                  # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jl	foo                     # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	4, foo
+	jl	foo
+
+#CHECK: brc	5, foo                  # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnhe	foo                     # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	5, foo
+	jnhe	foo
+
+#CHECK: brc	6, foo                  # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jlh	foo                     # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	6, foo
+	jlh	foo
+
+#CHECK: brc	7, foo                  # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jne	foo                     # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	7, foo
+	jne	foo
+
+#CHECK: brc	8, foo                  # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: je	foo                     # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	8, foo
+	je	foo
+
+#CHECK: brc	9, foo                  # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnlh	foo                     # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	9, foo
+	jnlh	foo
+
+#CHECK: brc	10, foo                 # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jhe	foo                     # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	10, foo
+	jhe	foo
+
+#CHECK: brc	11, foo                 # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnl	foo                     # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	11, foo
+	jnl	foo
+
+#CHECK: brc	12, foo                 # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jle	foo                     # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	12, foo
+	jle	foo
+
+#CHECK: brc	13, foo                 # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnh	foo                     # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	13, foo
+	jnh	foo
+
+#CHECK: brc	14, foo                 # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jno	foo                     # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	14, foo
+	jno	foo
+
+#CHECK: brc	15, foo                 # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: j	foo                     # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	15, foo
+	j	foo
+
+#CHECK: brc	0, bar+100              # encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	brc	0, bar+100
+
+#CHECK: jo	bar+100                 # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jo	bar+100
+
+#CHECK: jh	bar+100                 # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jh	bar+100
+
+#CHECK: jnle	bar+100                 # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnle	bar+100
+
+#CHECK: jl	bar+100                 # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jl	bar+100
+
+#CHECK: jnhe	bar+100                 # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnhe	bar+100
+
+#CHECK: jlh	bar+100                 # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jlh	bar+100
+
+#CHECK: jne	bar+100                 # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jne	bar+100
+
+#CHECK: je	bar+100                 # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	je	bar+100
+
+#CHECK: jnlh	bar+100                 # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnlh	bar+100
+
+#CHECK: jhe	bar+100                 # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jhe	bar+100
+
+#CHECK: jnl	bar+100                 # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnl	bar+100
+
+#CHECK: jle	bar+100                 # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jle	bar+100
+
+#CHECK: jnh	bar+100                 # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnh	bar+100
+
+#CHECK: jno	bar+100                 # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jno	bar+100
+
+#CHECK: j	bar+100                 # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	j	bar+100
+
+#CHECK: brc	0, bar@PLT              # encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	brc	0, bar@PLT
+
+#CHECK: jo	bar@PLT                 # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jo	bar@PLT
+
+#CHECK: jh	bar@PLT                 # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jh	bar@PLT
+
+#CHECK: jnle	bar@PLT                 # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnle	bar@PLT
+
+#CHECK: jl	bar@PLT                 # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jl	bar@PLT
+
+#CHECK: jnhe	bar@PLT                 # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnhe	bar@PLT
+
+#CHECK: jlh	bar@PLT                 # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jlh	bar@PLT
+
+#CHECK: jne	bar@PLT                 # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jne	bar@PLT
+
+#CHECK: je	bar@PLT                 # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	je	bar@PLT
+
+#CHECK: jnlh	bar@PLT                 # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnlh	bar@PLT
+
+#CHECK: jhe	bar@PLT                 # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jhe	bar@PLT
+
+#CHECK: jnl	bar@PLT                 # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnl	bar@PLT
+
+#CHECK: jle	bar@PLT                 # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jle	bar@PLT
+
+#CHECK: jnh	bar@PLT                 # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnh	bar@PLT
+
+#CHECK: jno	bar@PLT                 # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jno	bar@PLT
+
+#CHECK: j	bar@PLT                 # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	j	bar@PLT
+
+#CHECK: brcl	0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	brcl	0, -0x100000000
+#CHECK: brcl	0, .[[LAB:L.*]]-2	# encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	brcl	0, -2
+#CHECK: brcl	0, .[[LAB:L.*]]		# encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	brcl	0, 0
+#CHECK: brcl	0, .[[LAB:L.*]]+4294967294 # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	brcl	0, 0xfffffffe
+
+#CHECK: brcl	0, foo                  # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	0, foo
+
+#CHECK: brcl	1, foo                  # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgo	foo                     # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	1, foo
+	jgo	foo
+
+#CHECK: brcl	2, foo                  # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgh	foo                     # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	2, foo
+	jgh	foo
+
+#CHECK: brcl	3, foo                  # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnle	foo                     # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	3, foo
+	jgnle	foo
+
+#CHECK: brcl	4, foo                  # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgl	foo                     # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	4, foo
+	jgl	foo
+
+#CHECK: brcl	5, foo                  # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnhe	foo                     # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	5, foo
+	jgnhe	foo
+
+#CHECK: brcl	6, foo                  # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jglh	foo                     # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	6, foo
+	jglh	foo
+
+#CHECK: brcl	7, foo                  # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgne	foo                     # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	7, foo
+	jgne	foo
+
+#CHECK: brcl	8, foo                  # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jge	foo                     # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	8, foo
+	jge	foo
+
+#CHECK: brcl	9, foo                  # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnlh	foo                     # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	9, foo
+	jgnlh	foo
+
+#CHECK: brcl	10, foo                 # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jghe	foo                     # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	10, foo
+	jghe	foo
+
+#CHECK: brcl	11, foo                 # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnl	foo                     # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	11, foo
+	jgnl	foo
+
+#CHECK: brcl	12, foo                 # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgle	foo                     # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	12, foo
+	jgle	foo
+
+#CHECK: brcl	13, foo                 # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnh	foo                     # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	13, foo
+	jgnh	foo
+
+#CHECK: brcl	14, foo                 # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgno	foo                     # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	14, foo
+	jgno	foo
+
+#CHECK: brcl	15, foo                 # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jg	foo                     # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	15, foo
+	jg	foo
+
+#CHECK: brcl	0, bar+100              # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	brcl	0, bar+100
+
+#CHECK: jgo	bar+100                 # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgo	bar+100
+
+#CHECK: jgh	bar+100                 # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgh	bar+100
+
+#CHECK: jgnle	bar+100                 # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnle	bar+100
+
+#CHECK: jgl	bar+100                 # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgl	bar+100
+
+#CHECK: jgnhe	bar+100                 # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnhe	bar+100
+
+#CHECK: jglh	bar+100                 # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jglh	bar+100
+
+#CHECK: jgne	bar+100                 # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgne	bar+100
+
+#CHECK: jge	bar+100                 # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jge	bar+100
+
+#CHECK: jgnlh	bar+100                 # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnlh	bar+100
+
+#CHECK: jghe	bar+100                 # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jghe	bar+100
+
+#CHECK: jgnl	bar+100                 # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnl	bar+100
+
+#CHECK: jgle	bar+100                 # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgle	bar+100
+
+#CHECK: jgnh	bar+100                 # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnh	bar+100
+
+#CHECK: jgno	bar+100                 # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgno	bar+100
+
+#CHECK: jg	bar+100                 # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jg	bar+100
+
+#CHECK: brcl	0, bar@PLT              # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	brcl	0, bar@PLT
+
+#CHECK: jgo	bar@PLT                 # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgo	bar@PLT
+
+#CHECK: jgh	bar@PLT                 # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgh	bar@PLT
+
+#CHECK: jgnle	bar@PLT                 # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnle	bar@PLT
+
+#CHECK: jgl	bar@PLT                 # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgl	bar@PLT
+
+#CHECK: jgnhe	bar@PLT                 # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnhe	bar@PLT
+
+#CHECK: jglh	bar@PLT                 # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jglh	bar@PLT
+
+#CHECK: jgne	bar@PLT                 # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgne	bar@PLT
+
+#CHECK: jge	bar@PLT                 # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jge	bar@PLT
+
+#CHECK: jgnlh	bar@PLT                 # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnlh	bar@PLT
+
+#CHECK: jghe	bar@PLT                 # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jghe	bar@PLT
+
+#CHECK: jgnl	bar@PLT                 # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnl	bar@PLT
+
+#CHECK: jgle	bar@PLT                 # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgle	bar@PLT
+
+#CHECK: jgnh	bar@PLT                 # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnh	bar@PLT
+
+#CHECK: jgno	bar@PLT                 # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgno	bar@PLT
+
+#CHECK: jg	bar@PLT                 # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jg	bar@PLT
+
+#CHECK: brct	%r0, .[[LAB:L.*]]-65536	# encoding: [0xa7,0x06,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	brct	%r0, -0x10000
+#CHECK: brct	%r0, .[[LAB:L.*]]-2	# encoding: [0xa7,0x06,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	brct	%r0, -2
+#CHECK: brct	%r0, .[[LAB:L.*]]	# encoding: [0xa7,0x06,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	brct	%r0, 0
+#CHECK: brct	%r0, .[[LAB:L.*]]+65534	# encoding: [0xa7,0x06,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	brct	%r0, 0xfffe
+#CHECK: brct	%r15, .[[LAB:L.*]]	# encoding: [0xa7,0xf6,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	brct	%r15, 0
+
+#CHECK: brctg	%r0, .[[LAB:L.*]]-65536	# encoding: [0xa7,0x07,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	brctg	%r0, -0x10000
+#CHECK: brctg	%r0, .[[LAB:L.*]]-2	# encoding: [0xa7,0x07,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	brctg	%r0, -2
+#CHECK: brctg	%r0, .[[LAB:L.*]]	# encoding: [0xa7,0x07,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	brctg	%r0, 0
+#CHECK: brctg	%r0, .[[LAB:L.*]]+65534	# encoding: [0xa7,0x07,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	brctg	%r0, 0xfffe
+#CHECK: brctg	%r15, .[[LAB:L.*]]	# encoding: [0xa7,0xf7,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	brctg	%r15, 0
+
+#CHECK: c	%r0, 0                  # encoding: [0x59,0x00,0x00,0x00]
+#CHECK: c	%r0, 4095               # encoding: [0x59,0x00,0x0f,0xff]
+#CHECK: c	%r0, 0(%r1)             # encoding: [0x59,0x00,0x10,0x00]
+#CHECK: c	%r0, 0(%r15)            # encoding: [0x59,0x00,0xf0,0x00]
+#CHECK: c	%r0, 4095(%r1,%r15)     # encoding: [0x59,0x01,0xff,0xff]
+#CHECK: c	%r0, 4095(%r15,%r1)     # encoding: [0x59,0x0f,0x1f,0xff]
+#CHECK: c	%r15, 0                 # encoding: [0x59,0xf0,0x00,0x00]
+
+	c	%r0, 0
+	c	%r0, 4095
+	c	%r0, 0(%r1)
+	c	%r0, 0(%r15)
+	c	%r0, 4095(%r1,%r15)
+	c	%r0, 4095(%r15,%r1)
+	c	%r15, 0
+
+#CHECK: cdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x19]
+#CHECK: cdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x19]
+#CHECK: cdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x19]
+#CHECK: cdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x19]
+#CHECK: cdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x19]
+#CHECK: cdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x19]
+#CHECK: cdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x19]
+
+	cdb	%f0, 0
+	cdb	%f0, 4095
+	cdb	%f0, 0(%r1)
+	cdb	%f0, 0(%r15)
+	cdb	%f0, 4095(%r1,%r15)
+	cdb	%f0, 4095(%r15,%r1)
+	cdb	%f15, 0
+
+#CHECK: cdbr	%f0, %f0                # encoding: [0xb3,0x19,0x00,0x00]
+#CHECK: cdbr	%f0, %f15               # encoding: [0xb3,0x19,0x00,0x0f]
+#CHECK: cdbr	%f7, %f8                # encoding: [0xb3,0x19,0x00,0x78]
+#CHECK: cdbr	%f15, %f0               # encoding: [0xb3,0x19,0x00,0xf0]
+
+	cdbr	%f0, %f0
+	cdbr	%f0, %f15
+	cdbr	%f7, %f8
+	cdbr	%f15, %f0
+
+#CHECK: cdfbr	%f0, %r0                # encoding: [0xb3,0x95,0x00,0x00]
+#CHECK: cdfbr	%f0, %r15               # encoding: [0xb3,0x95,0x00,0x0f]
+#CHECK: cdfbr	%f15, %r0               # encoding: [0xb3,0x95,0x00,0xf0]
+#CHECK: cdfbr	%f7, %r8                # encoding: [0xb3,0x95,0x00,0x78]
+#CHECK: cdfbr	%f15, %r15              # encoding: [0xb3,0x95,0x00,0xff]
+
+	cdfbr	%f0, %r0
+	cdfbr	%f0, %r15
+	cdfbr	%f15, %r0
+	cdfbr	%f7, %r8
+	cdfbr	%f15, %r15
+
+#CHECK: cdgbr	%f0, %r0                # encoding: [0xb3,0xa5,0x00,0x00]
+#CHECK: cdgbr	%f0, %r15               # encoding: [0xb3,0xa5,0x00,0x0f]
+#CHECK: cdgbr	%f15, %r0               # encoding: [0xb3,0xa5,0x00,0xf0]
+#CHECK: cdgbr	%f7, %r8                # encoding: [0xb3,0xa5,0x00,0x78]
+#CHECK: cdgbr	%f15, %r15              # encoding: [0xb3,0xa5,0x00,0xff]
+
+	cdgbr	%f0, %r0
+	cdgbr	%f0, %r15
+	cdgbr	%f15, %r0
+	cdgbr	%f7, %r8
+	cdgbr	%f15, %r15
+
+#CHECK: ceb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x09]
+#CHECK: ceb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x09]
+#CHECK: ceb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x09]
+#CHECK: ceb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x09]
+#CHECK: ceb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x09]
+#CHECK: ceb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x09]
+#CHECK: ceb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x09]
+
+	ceb	%f0, 0
+	ceb	%f0, 4095
+	ceb	%f0, 0(%r1)
+	ceb	%f0, 0(%r15)
+	ceb	%f0, 4095(%r1,%r15)
+	ceb	%f0, 4095(%r15,%r1)
+	ceb	%f15, 0
+
+#CHECK: cebr	%f0, %f0                # encoding: [0xb3,0x09,0x00,0x00]
+#CHECK: cebr	%f0, %f15               # encoding: [0xb3,0x09,0x00,0x0f]
+#CHECK: cebr	%f7, %f8                # encoding: [0xb3,0x09,0x00,0x78]
+#CHECK: cebr	%f15, %f0               # encoding: [0xb3,0x09,0x00,0xf0]
+
+	cebr	%f0, %f0
+	cebr	%f0, %f15
+	cebr	%f7, %f8
+	cebr	%f15, %f0
+
+#CHECK: cefbr	%f0, %r0                # encoding: [0xb3,0x94,0x00,0x00]
+#CHECK: cefbr	%f0, %r15               # encoding: [0xb3,0x94,0x00,0x0f]
+#CHECK: cefbr	%f15, %r0               # encoding: [0xb3,0x94,0x00,0xf0]
+#CHECK: cefbr	%f7, %r8                # encoding: [0xb3,0x94,0x00,0x78]
+#CHECK: cefbr	%f15, %r15              # encoding: [0xb3,0x94,0x00,0xff]
+
+	cefbr	%f0, %r0
+	cefbr	%f0, %r15
+	cefbr	%f15, %r0
+	cefbr	%f7, %r8
+	cefbr	%f15, %r15
+
+#CHECK: cegbr	%f0, %r0                # encoding: [0xb3,0xa4,0x00,0x00]
+#CHECK: cegbr	%f0, %r15               # encoding: [0xb3,0xa4,0x00,0x0f]
+#CHECK: cegbr	%f15, %r0               # encoding: [0xb3,0xa4,0x00,0xf0]
+#CHECK: cegbr	%f7, %r8                # encoding: [0xb3,0xa4,0x00,0x78]
+#CHECK: cegbr	%f15, %r15              # encoding: [0xb3,0xa4,0x00,0xff]
+
+	cegbr	%f0, %r0
+	cegbr	%f0, %r15
+	cegbr	%f15, %r0
+	cegbr	%f7, %r8
+	cegbr	%f15, %r15
+
+#CHECK: cfdbr	%r0, 0, %f0             # encoding: [0xb3,0x99,0x00,0x00]
+#CHECK: cfdbr	%r0, 0, %f15            # encoding: [0xb3,0x99,0x00,0x0f]
+#CHECK: cfdbr	%r0, 15, %f0            # encoding: [0xb3,0x99,0xf0,0x00]
+#CHECK: cfdbr	%r4, 5, %f6             # encoding: [0xb3,0x99,0x50,0x46]
+#CHECK: cfdbr	%r15, 0, %f0            # encoding: [0xb3,0x99,0x00,0xf0]
+
+	cfdbr	%r0, 0, %f0
+	cfdbr	%r0, 0, %f15
+	cfdbr	%r0, 15, %f0
+	cfdbr	%r4, 5, %f6
+	cfdbr	%r15, 0, %f0
+
+#CHECK: cfebr	%r0, 0, %f0             # encoding: [0xb3,0x98,0x00,0x00]
+#CHECK: cfebr	%r0, 0, %f15            # encoding: [0xb3,0x98,0x00,0x0f]
+#CHECK: cfebr	%r0, 15, %f0            # encoding: [0xb3,0x98,0xf0,0x00]
+#CHECK: cfebr	%r4, 5, %f6             # encoding: [0xb3,0x98,0x50,0x46]
+#CHECK: cfebr	%r15, 0, %f0            # encoding: [0xb3,0x98,0x00,0xf0]
+
+	cfebr	%r0, 0, %f0
+	cfebr	%r0, 0, %f15
+	cfebr	%r0, 15, %f0
+	cfebr	%r4, 5, %f6
+	cfebr	%r15, 0, %f0
+
+#CHECK: cfi	%r0, -2147483648        # encoding: [0xc2,0x0d,0x80,0x00,0x00,0x00]
+#CHECK: cfi	%r0, -1                 # encoding: [0xc2,0x0d,0xff,0xff,0xff,0xff]
+#CHECK: cfi	%r0, 0                  # encoding: [0xc2,0x0d,0x00,0x00,0x00,0x00]
+#CHECK: cfi	%r0, 1                  # encoding: [0xc2,0x0d,0x00,0x00,0x00,0x01]
+#CHECK: cfi	%r0, 2147483647         # encoding: [0xc2,0x0d,0x7f,0xff,0xff,0xff]
+#CHECK: cfi	%r15, 0                 # encoding: [0xc2,0xfd,0x00,0x00,0x00,0x00]
+
+	cfi	%r0, -1 << 31
+	cfi	%r0, -1
+	cfi	%r0, 0
+	cfi	%r0, 1
+	cfi	%r0, (1 << 31) - 1
+	cfi	%r15, 0
+
+#CHECK: cfxbr	%r0, 0, %f0             # encoding: [0xb3,0x9a,0x00,0x00]
+#CHECK: cfxbr	%r0, 0, %f13            # encoding: [0xb3,0x9a,0x00,0x0d]
+#CHECK: cfxbr	%r0, 15, %f0            # encoding: [0xb3,0x9a,0xf0,0x00]
+#CHECK: cfxbr	%r4, 5, %f8             # encoding: [0xb3,0x9a,0x50,0x48]
+#CHECK: cfxbr	%r15, 0, %f0            # encoding: [0xb3,0x9a,0x00,0xf0]
+
+	cfxbr	%r0, 0, %f0
+	cfxbr	%r0, 0, %f13
+	cfxbr	%r0, 15, %f0
+	cfxbr	%r4, 5, %f8
+	cfxbr	%r15, 0, %f0
+
+#CHECK: cg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x20]
+#CHECK: cg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x20]
+#CHECK: cg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x20]
+#CHECK: cg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x20]
+#CHECK: cg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x20]
+#CHECK: cg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x20]
+#CHECK: cg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x20]
+#CHECK: cg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x20]
+#CHECK: cg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x20]
+#CHECK: cg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x20]
+
+	cg	%r0, -524288
+	cg	%r0, -1
+	cg	%r0, 0
+	cg	%r0, 1
+	cg	%r0, 524287
+	cg	%r0, 0(%r1)
+	cg	%r0, 0(%r15)
+	cg	%r0, 524287(%r1,%r15)
+	cg	%r0, 524287(%r15,%r1)
+	cg	%r15, 0
+
+#CHECK: cgdbr	%r0, 0, %f0             # encoding: [0xb3,0xa9,0x00,0x00]
+#CHECK: cgdbr	%r0, 0, %f15            # encoding: [0xb3,0xa9,0x00,0x0f]
+#CHECK: cgdbr	%r0, 15, %f0            # encoding: [0xb3,0xa9,0xf0,0x00]
+#CHECK: cgdbr	%r4, 5, %f6             # encoding: [0xb3,0xa9,0x50,0x46]
+#CHECK: cgdbr	%r15, 0, %f0            # encoding: [0xb3,0xa9,0x00,0xf0]
+
+	cgdbr	%r0, 0, %f0
+	cgdbr	%r0, 0, %f15
+	cgdbr	%r0, 15, %f0
+	cgdbr	%r4, 5, %f6
+	cgdbr	%r15, 0, %f0
+
+#CHECK: cgebr	%r0, 0, %f0             # encoding: [0xb3,0xa8,0x00,0x00]
+#CHECK: cgebr	%r0, 0, %f15            # encoding: [0xb3,0xa8,0x00,0x0f]
+#CHECK: cgebr	%r0, 15, %f0            # encoding: [0xb3,0xa8,0xf0,0x00]
+#CHECK: cgebr	%r4, 5, %f6             # encoding: [0xb3,0xa8,0x50,0x46]
+#CHECK: cgebr	%r15, 0, %f0            # encoding: [0xb3,0xa8,0x00,0xf0]
+
+	cgebr	%r0, 0, %f0
+	cgebr	%r0, 0, %f15
+	cgebr	%r0, 15, %f0
+	cgebr	%r4, 5, %f6
+	cgebr	%r15, 0, %f0
+
+#CHECK: cgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x30]
+#CHECK: cgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x30]
+#CHECK: cgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x30]
+#CHECK: cgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x30]
+#CHECK: cgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x30]
+#CHECK: cgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x30]
+#CHECK: cgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x30]
+#CHECK: cgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x30]
+#CHECK: cgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x30]
+#CHECK: cgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x30]
+
+	cgf	%r0, -524288
+	cgf	%r0, -1
+	cgf	%r0, 0
+	cgf	%r0, 1
+	cgf	%r0, 524287
+	cgf	%r0, 0(%r1)
+	cgf	%r0, 0(%r15)
+	cgf	%r0, 524287(%r1,%r15)
+	cgf	%r0, 524287(%r15,%r1)
+	cgf	%r15, 0
+
+#CHECK: cgfi	%r0, -2147483648        # encoding: [0xc2,0x0c,0x80,0x00,0x00,0x00]
+#CHECK: cgfi	%r0, -1                 # encoding: [0xc2,0x0c,0xff,0xff,0xff,0xff]
+#CHECK: cgfi	%r0, 0                  # encoding: [0xc2,0x0c,0x00,0x00,0x00,0x00]
+#CHECK: cgfi	%r0, 1                  # encoding: [0xc2,0x0c,0x00,0x00,0x00,0x01]
+#CHECK: cgfi	%r0, 2147483647         # encoding: [0xc2,0x0c,0x7f,0xff,0xff,0xff]
+#CHECK: cgfi	%r15, 0                 # encoding: [0xc2,0xfc,0x00,0x00,0x00,0x00]
+
+	cgfi	%r0, -1 << 31
+	cgfi	%r0, -1
+	cgfi	%r0, 0
+	cgfi	%r0, 1
+	cgfi	%r0, (1 << 31) - 1
+	cgfi	%r15, 0
+
+#CHECK: cgfr	%r0, %r0                # encoding: [0xb9,0x30,0x00,0x00]
+#CHECK: cgfr	%r0, %r15               # encoding: [0xb9,0x30,0x00,0x0f]
+#CHECK: cgfr	%r15, %r0               # encoding: [0xb9,0x30,0x00,0xf0]
+#CHECK: cgfr	%r7, %r8                # encoding: [0xb9,0x30,0x00,0x78]
+
+	cgfr	%r0,%r0
+	cgfr	%r0,%r15
+	cgfr	%r15,%r0
+	cgfr	%r7,%r8
+
+#CHECK: cgfrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	cgfrl	%r0, -0x100000000
+#CHECK: cgfrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	cgfrl	%r0, -2
+#CHECK: cgfrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	cgfrl	%r0, 0
+#CHECK: cgfrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	cgfrl	%r0, 0xfffffffe
+
+#CHECK: cgfrl	%r0, foo                # encoding: [0xc6,0x0c,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: cgfrl	%r15, foo               # encoding: [0xc6,0xfc,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	cgfrl	%r0,foo
+	cgfrl	%r15,foo
+
+#CHECK: cgfrl	%r3, bar+100            # encoding: [0xc6,0x3c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: cgfrl	%r4, bar+100            # encoding: [0xc6,0x4c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	cgfrl	%r3,bar+100
+	cgfrl	%r4,bar+100
+
+#CHECK: cgfrl	%r7, frob@PLT           # encoding: [0xc6,0x7c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: cgfrl	%r8, frob@PLT           # encoding: [0xc6,0x8c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	cgfrl	%r7,frob@PLT
+	cgfrl	%r8,frob@PLT
+
+#CHECK: cgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x34]
+#CHECK: cgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x34]
+#CHECK: cgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x34]
+#CHECK: cgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x34]
+#CHECK: cgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x34]
+#CHECK: cgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x34]
+#CHECK: cgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x34]
+#CHECK: cgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x34]
+#CHECK: cgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x34]
+#CHECK: cgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x34]
+
+	cgh	%r0, -524288
+	cgh	%r0, -1
+	cgh	%r0, 0
+	cgh	%r0, 1
+	cgh	%r0, 524287
+	cgh	%r0, 0(%r1)
+	cgh	%r0, 0(%r15)
+	cgh	%r0, 524287(%r1,%r15)
+	cgh	%r0, 524287(%r15,%r1)
+	cgh	%r15, 0
+
+#CHECK: cghi	%r0, -32768             # encoding: [0xa7,0x0f,0x80,0x00]
+#CHECK: cghi	%r0, -1                 # encoding: [0xa7,0x0f,0xff,0xff]
+#CHECK: cghi	%r0, 0                  # encoding: [0xa7,0x0f,0x00,0x00]
+#CHECK: cghi	%r0, 1                  # encoding: [0xa7,0x0f,0x00,0x01]
+#CHECK: cghi	%r0, 32767              # encoding: [0xa7,0x0f,0x7f,0xff]
+#CHECK: cghi	%r15, 0                 # encoding: [0xa7,0xff,0x00,0x00]
+
+	cghi	%r0, -32768
+	cghi	%r0, -1
+	cghi	%r0, 0
+	cghi	%r0, 1
+	cghi	%r0, 32767
+	cghi	%r15, 0
+
+#CHECK: cghrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	cghrl	%r0, -0x100000000
+#CHECK: cghrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	cghrl	%r0, -2
+#CHECK: cghrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	cghrl	%r0, 0
+#CHECK: cghrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	cghrl	%r0, 0xfffffffe
+
+#CHECK: cghrl	%r0, foo                # encoding: [0xc6,0x04,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: cghrl	%r15, foo               # encoding: [0xc6,0xf4,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	cghrl	%r0,foo
+	cghrl	%r15,foo
+
+#CHECK: cghrl	%r3, bar+100            # encoding: [0xc6,0x34,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: cghrl	%r4, bar+100            # encoding: [0xc6,0x44,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	cghrl	%r3,bar+100
+	cghrl	%r4,bar+100
+
+#CHECK: cghrl	%r7, frob@PLT           # encoding: [0xc6,0x74,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: cghrl	%r8, frob@PLT           # encoding: [0xc6,0x84,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	cghrl	%r7,frob@PLT
+	cghrl	%r8,frob@PLT
+
+#CHECK: cghsi	0, 0                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x00]
+#CHECK: cghsi	4095, 0                 # encoding: [0xe5,0x58,0x0f,0xff,0x00,0x00]
+#CHECK: cghsi	0, -32768               # encoding: [0xe5,0x58,0x00,0x00,0x80,0x00]
+#CHECK: cghsi	0, -1                   # encoding: [0xe5,0x58,0x00,0x00,0xff,0xff]
+#CHECK: cghsi	0, 0                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x00]
+#CHECK: cghsi	0, 1                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x01]
+#CHECK: cghsi	0, 32767                # encoding: [0xe5,0x58,0x00,0x00,0x7f,0xff]
+#CHECK: cghsi	0(%r1), 42              # encoding: [0xe5,0x58,0x10,0x00,0x00,0x2a]
+#CHECK: cghsi	0(%r15), 42             # encoding: [0xe5,0x58,0xf0,0x00,0x00,0x2a]
+#CHECK: cghsi	4095(%r1), 42           # encoding: [0xe5,0x58,0x1f,0xff,0x00,0x2a]
+#CHECK: cghsi	4095(%r15), 42          # encoding: [0xe5,0x58,0xff,0xff,0x00,0x2a]
+
+	cghsi	0, 0
+	cghsi	4095, 0
+	cghsi	0, -32768
+	cghsi	0, -1
+	cghsi	0, 0
+	cghsi	0, 1
+	cghsi	0, 32767
+	cghsi	0(%r1), 42
+	cghsi	0(%r15), 42
+	cghsi	4095(%r1), 42
+	cghsi	4095(%r15), 42
+
+#CHECK: cgij	%r0, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x7c]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cgij	%r0, -128, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x80,0x7c]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cgij	%r0, 127, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x7f,0x7c]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cgij	%r15, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x7c]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cgij	%r7, -1, 0, .[[LAB:L.*]]	# encoding: [0xec,0x70,A,A,0xff,0x7c]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	cgij	%r0, 0, 0, 0
+	cgij	%r0, -128, 0, 0
+	cgij	%r0, 127, 0, 0
+	cgij	%r15, 0, 0, 0
+	cgij	%r7, -1, 0, 0
+
+#CHECK: cgij	%r1, -66, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x10,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 0, -0x10000
+#CHECK: cgij	%r1, -66, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x10,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 0, -2
+#CHECK: cgij	%r1, -66, 0, .[[LAB:L.*]]		# encoding: [0xec,0x10,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 0, 0
+#CHECK: cgij	%r1, -66, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x10,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 0, 0xfffe
+
+#CHECK: cgij	%r1, -66, 0, foo                  # encoding: [0xec,0x10,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 0, foo
+
+#CHECK: cgij	%r1, -66, 1, foo                  # encoding: [0xec,0x11,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 1, foo
+
+#CHECK: cgij	%r1, -66, 2, foo                  # encoding: [0xec,0x12,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijh	%r1, -66, foo                     # encoding: [0xec,0x12,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijnle	%r1, -66, foo                     # encoding: [0xec,0x12,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 2, foo
+	cgijh	%r1, -66, foo
+	cgijnle	%r1, -66, foo
+
+#CHECK: cgij	%r1, -66, 3, foo                  # encoding: [0xec,0x13,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 3, foo
+
+#CHECK: cgij	%r1, -66, 4, foo                  # encoding: [0xec,0x14,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijl	%r1, -66, foo                     # encoding: [0xec,0x14,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijnhe	%r1, -66, foo                     # encoding: [0xec,0x14,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 4, foo
+	cgijl	%r1, -66, foo
+	cgijnhe	%r1, -66, foo
+
+#CHECK: cgij	%r1, -66, 5, foo                  # encoding: [0xec,0x15,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 5, foo
+
+#CHECK: cgij	%r1, -66, 6, foo                  # encoding: [0xec,0x16,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijlh	%r1, -66, foo                     # encoding: [0xec,0x16,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijne	%r1, -66, foo                     # encoding: [0xec,0x16,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 6, foo
+	cgijlh	%r1, -66, foo
+	cgijne	%r1, -66, foo
+
+#CHECK: cgij	%r1, -66, 7, foo                  # encoding: [0xec,0x17,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 7, foo
+
+#CHECK: cgij	%r1, -66, 8, foo                  # encoding: [0xec,0x18,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgije	%r1, -66, foo                     # encoding: [0xec,0x18,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijnlh	%r1, -66, foo                     # encoding: [0xec,0x18,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 8, foo
+	cgije	%r1, -66, foo
+	cgijnlh	%r1, -66, foo
+
+#CHECK: cgij	%r1, -66, 9, foo                  # encoding: [0xec,0x19,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 9, foo
+
+#CHECK: cgij	%r1, -66, 10, foo                 # encoding: [0xec,0x1a,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijhe	%r1, -66, foo                     # encoding: [0xec,0x1a,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijnl	%r1, -66, foo                     # encoding: [0xec,0x1a,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 10, foo
+	cgijhe	%r1, -66, foo
+	cgijnl	%r1, -66, foo
+
+#CHECK: cgij	%r1, -66, 11, foo                 # encoding: [0xec,0x1b,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 11, foo
+
+#CHECK: cgij	%r1, -66, 12, foo                 # encoding: [0xec,0x1c,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijle	%r1, -66, foo                     # encoding: [0xec,0x1c,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgijnh	%r1, -66, foo                     # encoding: [0xec,0x1c,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 12, foo
+	cgijle	%r1, -66, foo
+	cgijnh	%r1, -66, foo
+
+#CHECK: cgij	%r1, -66, 13, foo                 # encoding: [0xec,0x1d,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 13, foo
+
+#CHECK: cgij	%r1, -66, 14, foo                 # encoding: [0xec,0x1e,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 14, foo
+
+#CHECK: cgij	%r1, -66, 15, foo                 # encoding: [0xec,0x1f,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 15, foo
+
+#CHECK: cgij	%r1, -66, 0, bar+100              # encoding: [0xec,0x10,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 0, bar+100
+
+#CHECK: cgijh	%r1, -66, bar+100                 # encoding: [0xec,0x12,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijh	%r1, -66, bar+100
+
+#CHECK: cgijnle	%r1, -66, bar+100                 # encoding: [0xec,0x12,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijnle	%r1, -66, bar+100
+
+#CHECK: cgijl	%r1, -66, bar+100                 # encoding: [0xec,0x14,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijl	%r1, -66, bar+100
+
+#CHECK: cgijnhe	%r1, -66, bar+100                 # encoding: [0xec,0x14,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijnhe	%r1, -66, bar+100
+
+#CHECK: cgijlh	%r1, -66, bar+100                 # encoding: [0xec,0x16,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijlh	%r1, -66, bar+100
+
+#CHECK: cgijne	%r1, -66, bar+100                 # encoding: [0xec,0x16,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijne	%r1, -66, bar+100
+
+#CHECK: cgije	%r1, -66, bar+100                 # encoding: [0xec,0x18,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgije	%r1, -66, bar+100
+
+#CHECK: cgijnlh	%r1, -66, bar+100                 # encoding: [0xec,0x18,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijnlh	%r1, -66, bar+100
+
+#CHECK: cgijhe	%r1, -66, bar+100                 # encoding: [0xec,0x1a,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijhe	%r1, -66, bar+100
+
+#CHECK: cgijnl	%r1, -66, bar+100                 # encoding: [0xec,0x1a,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijnl	%r1, -66, bar+100
+
+#CHECK: cgijle	%r1, -66, bar+100                 # encoding: [0xec,0x1c,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijle	%r1, -66, bar+100
+
+#CHECK: cgijnh	%r1, -66, bar+100                 # encoding: [0xec,0x1c,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgijnh	%r1, -66, bar+100
+
+#CHECK: cgij	%r1, -66, 0, bar@PLT              # encoding: [0xec,0x10,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgij	%r1, -66, 0, bar@PLT
+
+#CHECK: cgijh	%r1, -66, bar@PLT                 # encoding: [0xec,0x12,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijh	%r1, -66, bar@PLT
+
+#CHECK: cgijnle	%r1, -66, bar@PLT                 # encoding: [0xec,0x12,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijnle	%r1, -66, bar@PLT
+
+#CHECK: cgijl	%r1, -66, bar@PLT                 # encoding: [0xec,0x14,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijl	%r1, -66, bar@PLT
+
+#CHECK: cgijnhe	%r1, -66, bar@PLT                 # encoding: [0xec,0x14,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijnhe	%r1, -66, bar@PLT
+
+#CHECK: cgijlh	%r1, -66, bar@PLT                 # encoding: [0xec,0x16,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijlh	%r1, -66, bar@PLT
+
+#CHECK: cgijne	%r1, -66, bar@PLT                 # encoding: [0xec,0x16,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijne	%r1, -66, bar@PLT
+
+#CHECK: cgije	%r1, -66, bar@PLT                 # encoding: [0xec,0x18,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgije	%r1, -66, bar@PLT
+
+#CHECK: cgijnlh	%r1, -66, bar@PLT                 # encoding: [0xec,0x18,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijnlh	%r1, -66, bar@PLT
+
+#CHECK: cgijhe	%r1, -66, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijhe	%r1, -66, bar@PLT
+
+#CHECK: cgijnl	%r1, -66, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijnl	%r1, -66, bar@PLT
+
+#CHECK: cgijle	%r1, -66, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijle	%r1, -66, bar@PLT
+
+#CHECK: cgijnh	%r1, -66, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xbe,0x7c]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgijnh	%r1, -66, bar@PLT
+
+#CHECK: cgr	%r0, %r0                # encoding: [0xb9,0x20,0x00,0x00]
+#CHECK: cgr	%r0, %r15               # encoding: [0xb9,0x20,0x00,0x0f]
+#CHECK: cgr	%r15, %r0               # encoding: [0xb9,0x20,0x00,0xf0]
+#CHECK: cgr	%r7, %r8                # encoding: [0xb9,0x20,0x00,0x78]
+
+	cgr	%r0,%r0
+	cgr	%r0,%r15
+	cgr	%r15,%r0
+	cgr	%r7,%r8
+
+#CHECK: cgrj	%r0, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cgrj	%r0, %r15, 0, .[[LAB:L.*]]	# encoding: [0xec,0x0f,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cgrj	%r15, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cgrj	%r7, %r8, 0, .[[LAB:L.*]]	# encoding: [0xec,0x78,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	cgrj	%r0,%r0,0,0
+	cgrj	%r0,%r15,0,0
+	cgrj	%r15,%r0,0,0
+	cgrj	%r7,%r8,0,0
+
+#CHECK: cgrj	%r1, %r2, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x12,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 0, -0x10000
+#CHECK: cgrj	%r1, %r2, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x12,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 0, -2
+#CHECK: cgrj	%r1, %r2, 0, .[[LAB:L.*]]		# encoding: [0xec,0x12,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 0, 0
+#CHECK: cgrj	%r1, %r2, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x12,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 0, 0xfffe
+
+#CHECK: cgrj	%r1, %r2, 0, foo                  # encoding: [0xec,0x12,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 0, foo
+
+#CHECK: cgrj	%r1, %r2, 1, foo                  # encoding: [0xec,0x12,A,A,0x10,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 1, foo
+
+#CHECK: cgrj	%r1, %r2, 2, foo                  # encoding: [0xec,0x12,A,A,0x20,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjnle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 2, foo
+	cgrjh	%r1, %r2, foo
+	cgrjnle	%r1, %r2, foo
+
+#CHECK: cgrj	%r1, %r2, 3, foo                  # encoding: [0xec,0x12,A,A,0x30,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 3, foo
+
+#CHECK: cgrj	%r1, %r2, 4, foo                  # encoding: [0xec,0x12,A,A,0x40,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjnhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 4, foo
+	cgrjl	%r1, %r2, foo
+	cgrjnhe	%r1, %r2, foo
+
+#CHECK: cgrj	%r1, %r2, 5, foo                  # encoding: [0xec,0x12,A,A,0x50,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 5, foo
+
+#CHECK: cgrj	%r1, %r2, 6, foo                  # encoding: [0xec,0x12,A,A,0x60,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjne	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 6, foo
+	cgrjlh	%r1, %r2, foo
+	cgrjne	%r1, %r2, foo
+
+#CHECK: cgrj	%r1, %r2, 7, foo                  # encoding: [0xec,0x12,A,A,0x70,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 7, foo
+
+#CHECK: cgrj	%r1, %r2, 8, foo                  # encoding: [0xec,0x12,A,A,0x80,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrje	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjnlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 8, foo
+	cgrje	%r1, %r2, foo
+	cgrjnlh	%r1, %r2, foo
+
+#CHECK: cgrj	%r1, %r2, 9, foo                  # encoding: [0xec,0x12,A,A,0x90,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 9, foo
+
+#CHECK: cgrj	%r1, %r2, 10, foo                 # encoding: [0xec,0x12,A,A,0xa0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjnl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 10, foo
+	cgrjhe	%r1, %r2, foo
+	cgrjnl	%r1, %r2, foo
+
+#CHECK: cgrj	%r1, %r2, 11, foo                 # encoding: [0xec,0x12,A,A,0xb0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 11, foo
+
+#CHECK: cgrj	%r1, %r2, 12, foo                 # encoding: [0xec,0x12,A,A,0xc0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cgrjnh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 12, foo
+	cgrjle	%r1, %r2, foo
+	cgrjnh	%r1, %r2, foo
+
+#CHECK: cgrj	%r1, %r2, 13, foo                 # encoding: [0xec,0x12,A,A,0xd0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 13, foo
+
+#CHECK: cgrj	%r1, %r2, 14, foo                 # encoding: [0xec,0x12,A,A,0xe0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 14, foo
+
+#CHECK: cgrj	%r1, %r2, 15, foo                 # encoding: [0xec,0x12,A,A,0xf0,0x64]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 15, foo
+
+#CHECK: cgrj	%r1, %r2, 0, bar+100              # encoding: [0xec,0x12,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 0, bar+100
+
+#CHECK: cgrjh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjh	%r1, %r2, bar+100
+
+#CHECK: cgrjnle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjnle	%r1, %r2, bar+100
+
+#CHECK: cgrjl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjl	%r1, %r2, bar+100
+
+#CHECK: cgrjnhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjnhe	%r1, %r2, bar+100
+
+#CHECK: cgrjlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjlh	%r1, %r2, bar+100
+
+#CHECK: cgrjne	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjne	%r1, %r2, bar+100
+
+#CHECK: cgrje	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrje	%r1, %r2, bar+100
+
+#CHECK: cgrjnlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjnlh	%r1, %r2, bar+100
+
+#CHECK: cgrjhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjhe	%r1, %r2, bar+100
+
+#CHECK: cgrjnl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjnl	%r1, %r2, bar+100
+
+#CHECK: cgrjle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjle	%r1, %r2, bar+100
+
+#CHECK: cgrjnh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x64]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cgrjnh	%r1, %r2, bar+100
+
+#CHECK: cgrj	%r1, %r2, 0, bar@PLT              # encoding: [0xec,0x12,A,A,0x00,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrj	%r1, %r2, 0, bar@PLT
+
+#CHECK: cgrjh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjh	%r1, %r2, bar@PLT
+
+#CHECK: cgrjnle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjnle	%r1, %r2, bar@PLT
+
+#CHECK: cgrjl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjl	%r1, %r2, bar@PLT
+
+#CHECK: cgrjnhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjnhe	%r1, %r2, bar@PLT
+
+#CHECK: cgrjlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjlh	%r1, %r2, bar@PLT
+
+#CHECK: cgrjne	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjne	%r1, %r2, bar@PLT
+
+#CHECK: cgrje	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrje	%r1, %r2, bar@PLT
+
+#CHECK: cgrjnlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjnlh	%r1, %r2, bar@PLT
+
+#CHECK: cgrjhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjhe	%r1, %r2, bar@PLT
+
+#CHECK: cgrjnl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjnl	%r1, %r2, bar@PLT
+
+#CHECK: cgrjle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjle	%r1, %r2, bar@PLT
+
+#CHECK: cgrjnh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x64]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cgrjnh	%r1, %r2, bar@PLT
+
+#CHECK: cgrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	cgrl	%r0, -0x100000000
+#CHECK: cgrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	cgrl	%r0, -2
+#CHECK: cgrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	cgrl	%r0, 0
+#CHECK: cgrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	cgrl	%r0, 0xfffffffe
+
+#CHECK: cgrl	%r0, foo                # encoding: [0xc6,0x08,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: cgrl	%r15, foo               # encoding: [0xc6,0xf8,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	cgrl	%r0,foo
+	cgrl	%r15,foo
+
+#CHECK: cgrl	%r3, bar+100            # encoding: [0xc6,0x38,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: cgrl	%r4, bar+100            # encoding: [0xc6,0x48,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	cgrl	%r3,bar+100
+	cgrl	%r4,bar+100
+
+#CHECK: cgrl	%r7, frob@PLT           # encoding: [0xc6,0x78,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: cgrl	%r8, frob@PLT           # encoding: [0xc6,0x88,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	cgrl	%r7,frob@PLT
+	cgrl	%r8,frob@PLT
+
+#CHECK: cgxbr	%r0, 0, %f0             # encoding: [0xb3,0xaa,0x00,0x00]
+#CHECK: cgxbr	%r0, 0, %f13            # encoding: [0xb3,0xaa,0x00,0x0d]
+#CHECK: cgxbr	%r0, 15, %f0            # encoding: [0xb3,0xaa,0xf0,0x00]
+#CHECK: cgxbr	%r4, 5, %f8             # encoding: [0xb3,0xaa,0x50,0x48]
+#CHECK: cgxbr	%r15, 0, %f0            # encoding: [0xb3,0xaa,0x00,0xf0]
+
+	cgxbr	%r0, 0, %f0
+	cgxbr	%r0, 0, %f13
+	cgxbr	%r0, 15, %f0
+	cgxbr	%r4, 5, %f8
+	cgxbr	%r15, 0, %f0
+
+#CHECK: ch	%r0, 0                  # encoding: [0x49,0x00,0x00,0x00]
+#CHECK: ch	%r0, 4095               # encoding: [0x49,0x00,0x0f,0xff]
+#CHECK: ch	%r0, 0(%r1)             # encoding: [0x49,0x00,0x10,0x00]
+#CHECK: ch	%r0, 0(%r15)            # encoding: [0x49,0x00,0xf0,0x00]
+#CHECK: ch	%r0, 4095(%r1,%r15)     # encoding: [0x49,0x01,0xff,0xff]
+#CHECK: ch	%r0, 4095(%r15,%r1)     # encoding: [0x49,0x0f,0x1f,0xff]
+#CHECK: ch	%r15, 0                 # encoding: [0x49,0xf0,0x00,0x00]
+
+	ch	%r0, 0
+	ch	%r0, 4095
+	ch	%r0, 0(%r1)
+	ch	%r0, 0(%r15)
+	ch	%r0, 4095(%r1,%r15)
+	ch	%r0, 4095(%r15,%r1)
+	ch	%r15, 0
+
+#CHECK: chhsi	0, 0                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x00]
+#CHECK: chhsi	4095, 0                 # encoding: [0xe5,0x54,0x0f,0xff,0x00,0x00]
+#CHECK: chhsi	0, -32768               # encoding: [0xe5,0x54,0x00,0x00,0x80,0x00]
+#CHECK: chhsi	0, -1                   # encoding: [0xe5,0x54,0x00,0x00,0xff,0xff]
+#CHECK: chhsi	0, 0                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x00]
+#CHECK: chhsi	0, 1                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x01]
+#CHECK: chhsi	0, 32767                # encoding: [0xe5,0x54,0x00,0x00,0x7f,0xff]
+#CHECK: chhsi	0(%r1), 42              # encoding: [0xe5,0x54,0x10,0x00,0x00,0x2a]
+#CHECK: chhsi	0(%r15), 42             # encoding: [0xe5,0x54,0xf0,0x00,0x00,0x2a]
+#CHECK: chhsi	4095(%r1), 42           # encoding: [0xe5,0x54,0x1f,0xff,0x00,0x2a]
+#CHECK: chhsi	4095(%r15), 42          # encoding: [0xe5,0x54,0xff,0xff,0x00,0x2a]
+
+	chhsi	0, 0
+	chhsi	4095, 0
+	chhsi	0, -32768
+	chhsi	0, -1
+	chhsi	0, 0
+	chhsi	0, 1
+	chhsi	0, 32767
+	chhsi	0(%r1), 42
+	chhsi	0(%r15), 42
+	chhsi	4095(%r1), 42
+	chhsi	4095(%r15), 42
+
+#CHECK: chi	%r0, -32768             # encoding: [0xa7,0x0e,0x80,0x00]
+#CHECK: chi	%r0, -1                 # encoding: [0xa7,0x0e,0xff,0xff]
+#CHECK: chi	%r0, 0                  # encoding: [0xa7,0x0e,0x00,0x00]
+#CHECK: chi	%r0, 1                  # encoding: [0xa7,0x0e,0x00,0x01]
+#CHECK: chi	%r0, 32767              # encoding: [0xa7,0x0e,0x7f,0xff]
+#CHECK: chi	%r15, 0                 # encoding: [0xa7,0xfe,0x00,0x00]
+
+	chi	%r0, -32768
+	chi	%r0, -1
+	chi	%r0, 0
+	chi	%r0, 1
+	chi	%r0, 32767
+	chi	%r15, 0
+
+#CHECK: chrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	chrl	%r0, -0x100000000
+#CHECK: chrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	chrl	%r0, -2
+#CHECK: chrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	chrl	%r0, 0
+#CHECK: chrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	chrl	%r0, 0xfffffffe
+
+#CHECK: chrl	%r0, foo                # encoding: [0xc6,0x05,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: chrl	%r15, foo               # encoding: [0xc6,0xf5,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	chrl	%r0,foo
+	chrl	%r15,foo
+
+#CHECK: chrl	%r3, bar+100            # encoding: [0xc6,0x35,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: chrl	%r4, bar+100            # encoding: [0xc6,0x45,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	chrl	%r3,bar+100
+	chrl	%r4,bar+100
+
+#CHECK: chrl	%r7, frob@PLT           # encoding: [0xc6,0x75,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: chrl	%r8, frob@PLT           # encoding: [0xc6,0x85,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	chrl	%r7,frob@PLT
+	chrl	%r8,frob@PLT
+
+#CHECK: chsi	0, 0                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x00]
+#CHECK: chsi	4095, 0                 # encoding: [0xe5,0x5c,0x0f,0xff,0x00,0x00]
+#CHECK: chsi	0, -32768               # encoding: [0xe5,0x5c,0x00,0x00,0x80,0x00]
+#CHECK: chsi	0, -1                   # encoding: [0xe5,0x5c,0x00,0x00,0xff,0xff]
+#CHECK: chsi	0, 0                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x00]
+#CHECK: chsi	0, 1                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x01]
+#CHECK: chsi	0, 32767                # encoding: [0xe5,0x5c,0x00,0x00,0x7f,0xff]
+#CHECK: chsi	0(%r1), 42              # encoding: [0xe5,0x5c,0x10,0x00,0x00,0x2a]
+#CHECK: chsi	0(%r15), 42             # encoding: [0xe5,0x5c,0xf0,0x00,0x00,0x2a]
+#CHECK: chsi	4095(%r1), 42           # encoding: [0xe5,0x5c,0x1f,0xff,0x00,0x2a]
+#CHECK: chsi	4095(%r15), 42          # encoding: [0xe5,0x5c,0xff,0xff,0x00,0x2a]
+
+	chsi	0, 0
+	chsi	4095, 0
+	chsi	0, -32768
+	chsi	0, -1
+	chsi	0, 0
+	chsi	0, 1
+	chsi	0, 32767
+	chsi	0(%r1), 42
+	chsi	0(%r15), 42
+	chsi	4095(%r1), 42
+	chsi	4095(%r15), 42
+
+#CHECK: chy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x79]
+#CHECK: chy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x79]
+#CHECK: chy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x79]
+#CHECK: chy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x79]
+#CHECK: chy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x79]
+#CHECK: chy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x79]
+#CHECK: chy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x79]
+#CHECK: chy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x79]
+#CHECK: chy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x79]
+#CHECK: chy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x79]
+
+	chy	%r0, -524288
+	chy	%r0, -1
+	chy	%r0, 0
+	chy	%r0, 1
+	chy	%r0, 524287
+	chy	%r0, 0(%r1)
+	chy	%r0, 0(%r15)
+	chy	%r0, 524287(%r1,%r15)
+	chy	%r0, 524287(%r15,%r1)
+	chy	%r15, 0
+
+#CHECK: cij	%r0, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x7e]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cij	%r0, -128, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x80,0x7e]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cij	%r0, 127, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x7f,0x7e]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cij	%r15, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x7e]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: cij	%r7, -1, 0, .[[LAB:L.*]]	# encoding: [0xec,0x70,A,A,0xff,0x7e]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	cij	%r0, 0, 0, 0
+	cij	%r0, -128, 0, 0
+	cij	%r0, 127, 0, 0
+	cij	%r15, 0, 0, 0
+	cij	%r7, -1, 0, 0
+
+#CHECK: cij	%r1, -66, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x10,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 0, -0x10000
+#CHECK: cij	%r1, -66, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x10,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 0, -2
+#CHECK: cij	%r1, -66, 0, .[[LAB:L.*]]		# encoding: [0xec,0x10,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 0, 0
+#CHECK: cij	%r1, -66, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x10,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 0, 0xfffe
+
+#CHECK: cij	%r1, -66, 0, foo                  # encoding: [0xec,0x10,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 0, foo
+
+#CHECK: cij	%r1, -66, 1, foo                  # encoding: [0xec,0x11,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 1, foo
+
+#CHECK: cij	%r1, -66, 2, foo                  # encoding: [0xec,0x12,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijh	%r1, -66, foo                     # encoding: [0xec,0x12,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijnle	%r1, -66, foo                     # encoding: [0xec,0x12,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 2, foo
+	cijh	%r1, -66, foo
+	cijnle	%r1, -66, foo
+
+#CHECK: cij	%r1, -66, 3, foo                  # encoding: [0xec,0x13,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 3, foo
+
+#CHECK: cij	%r1, -66, 4, foo                  # encoding: [0xec,0x14,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijl	%r1, -66, foo                     # encoding: [0xec,0x14,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijnhe	%r1, -66, foo                     # encoding: [0xec,0x14,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 4, foo
+	cijl	%r1, -66, foo
+	cijnhe	%r1, -66, foo
+
+#CHECK: cij	%r1, -66, 5, foo                  # encoding: [0xec,0x15,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 5, foo
+
+#CHECK: cij	%r1, -66, 6, foo                  # encoding: [0xec,0x16,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijlh	%r1, -66, foo                     # encoding: [0xec,0x16,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijne	%r1, -66, foo                     # encoding: [0xec,0x16,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 6, foo
+	cijlh	%r1, -66, foo
+	cijne	%r1, -66, foo
+
+#CHECK: cij	%r1, -66, 7, foo                  # encoding: [0xec,0x17,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 7, foo
+
+#CHECK: cij	%r1, -66, 8, foo                  # encoding: [0xec,0x18,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cije	%r1, -66, foo                     # encoding: [0xec,0x18,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijnlh	%r1, -66, foo                     # encoding: [0xec,0x18,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 8, foo
+	cije	%r1, -66, foo
+	cijnlh	%r1, -66, foo
+
+#CHECK: cij	%r1, -66, 9, foo                  # encoding: [0xec,0x19,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 9, foo
+
+#CHECK: cij	%r1, -66, 10, foo                 # encoding: [0xec,0x1a,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijhe	%r1, -66, foo                     # encoding: [0xec,0x1a,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijnl	%r1, -66, foo                     # encoding: [0xec,0x1a,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 10, foo
+	cijhe	%r1, -66, foo
+	cijnl	%r1, -66, foo
+
+#CHECK: cij	%r1, -66, 11, foo                 # encoding: [0xec,0x1b,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 11, foo
+
+#CHECK: cij	%r1, -66, 12, foo                 # encoding: [0xec,0x1c,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijle	%r1, -66, foo                     # encoding: [0xec,0x1c,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: cijnh	%r1, -66, foo                     # encoding: [0xec,0x1c,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 12, foo
+	cijle	%r1, -66, foo
+	cijnh	%r1, -66, foo
+
+#CHECK: cij	%r1, -66, 13, foo                 # encoding: [0xec,0x1d,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 13, foo
+
+#CHECK: cij	%r1, -66, 14, foo                 # encoding: [0xec,0x1e,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 14, foo
+
+#CHECK: cij	%r1, -66, 15, foo                 # encoding: [0xec,0x1f,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 15, foo
+
+#CHECK: cij	%r1, -66, 0, bar+100              # encoding: [0xec,0x10,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 0, bar+100
+
+#CHECK: cijh	%r1, -66, bar+100                 # encoding: [0xec,0x12,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijh	%r1, -66, bar+100
+
+#CHECK: cijnle	%r1, -66, bar+100                 # encoding: [0xec,0x12,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijnle	%r1, -66, bar+100
+
+#CHECK: cijl	%r1, -66, bar+100                 # encoding: [0xec,0x14,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijl	%r1, -66, bar+100
+
+#CHECK: cijnhe	%r1, -66, bar+100                 # encoding: [0xec,0x14,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijnhe	%r1, -66, bar+100
+
+#CHECK: cijlh	%r1, -66, bar+100                 # encoding: [0xec,0x16,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijlh	%r1, -66, bar+100
+
+#CHECK: cijne	%r1, -66, bar+100                 # encoding: [0xec,0x16,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijne	%r1, -66, bar+100
+
+#CHECK: cije	%r1, -66, bar+100                 # encoding: [0xec,0x18,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cije	%r1, -66, bar+100
+
+#CHECK: cijnlh	%r1, -66, bar+100                 # encoding: [0xec,0x18,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijnlh	%r1, -66, bar+100
+
+#CHECK: cijhe	%r1, -66, bar+100                 # encoding: [0xec,0x1a,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijhe	%r1, -66, bar+100
+
+#CHECK: cijnl	%r1, -66, bar+100                 # encoding: [0xec,0x1a,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijnl	%r1, -66, bar+100
+
+#CHECK: cijle	%r1, -66, bar+100                 # encoding: [0xec,0x1c,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijle	%r1, -66, bar+100
+
+#CHECK: cijnh	%r1, -66, bar+100                 # encoding: [0xec,0x1c,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	cijnh	%r1, -66, bar+100
+
+#CHECK: cij	%r1, -66, 0, bar@PLT              # encoding: [0xec,0x10,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cij	%r1, -66, 0, bar@PLT
+
+#CHECK: cijh	%r1, -66, bar@PLT                 # encoding: [0xec,0x12,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijh	%r1, -66, bar@PLT
+
+#CHECK: cijnle	%r1, -66, bar@PLT                 # encoding: [0xec,0x12,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijnle	%r1, -66, bar@PLT
+
+#CHECK: cijl	%r1, -66, bar@PLT                 # encoding: [0xec,0x14,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijl	%r1, -66, bar@PLT
+
+#CHECK: cijnhe	%r1, -66, bar@PLT                 # encoding: [0xec,0x14,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijnhe	%r1, -66, bar@PLT
+
+#CHECK: cijlh	%r1, -66, bar@PLT                 # encoding: [0xec,0x16,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijlh	%r1, -66, bar@PLT
+
+#CHECK: cijne	%r1, -66, bar@PLT                 # encoding: [0xec,0x16,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijne	%r1, -66, bar@PLT
+
+#CHECK: cije	%r1, -66, bar@PLT                 # encoding: [0xec,0x18,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cije	%r1, -66, bar@PLT
+
+#CHECK: cijnlh	%r1, -66, bar@PLT                 # encoding: [0xec,0x18,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijnlh	%r1, -66, bar@PLT
+
+#CHECK: cijhe	%r1, -66, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijhe	%r1, -66, bar@PLT
+
+#CHECK: cijnl	%r1, -66, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijnl	%r1, -66, bar@PLT
+
+#CHECK: cijle	%r1, -66, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijle	%r1, -66, bar@PLT
+
+#CHECK: cijnh	%r1, -66, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xbe,0x7e]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	cijnh	%r1, -66, bar@PLT
+
+#CHECK: cl	%r0, 0                  # encoding: [0x55,0x00,0x00,0x00]
+#CHECK: cl	%r0, 4095               # encoding: [0x55,0x00,0x0f,0xff]
+#CHECK: cl	%r0, 0(%r1)             # encoding: [0x55,0x00,0x10,0x00]
+#CHECK: cl	%r0, 0(%r15)            # encoding: [0x55,0x00,0xf0,0x00]
+#CHECK: cl	%r0, 4095(%r1,%r15)     # encoding: [0x55,0x01,0xff,0xff]
+#CHECK: cl	%r0, 4095(%r15,%r1)     # encoding: [0x55,0x0f,0x1f,0xff]
+#CHECK: cl	%r15, 0                 # encoding: [0x55,0xf0,0x00,0x00]
+
+	cl	%r0, 0
+	cl	%r0, 4095
+	cl	%r0, 0(%r1)
+	cl	%r0, 0(%r15)
+	cl	%r0, 4095(%r1,%r15)
+	cl	%r0, 4095(%r15,%r1)
+	cl	%r15, 0
+
+#CHECK: clc	0(1), 0                 # encoding: [0xd5,0x00,0x00,0x00,0x00,0x00]
+#CHECK: clc	0(1), 0(%r1)            # encoding: [0xd5,0x00,0x00,0x00,0x10,0x00]
+#CHECK: clc	0(1), 0(%r15)           # encoding: [0xd5,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: clc	0(1), 4095              # encoding: [0xd5,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: clc	0(1), 4095(%r1)         # encoding: [0xd5,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: clc	0(1), 4095(%r15)        # encoding: [0xd5,0x00,0x00,0x00,0xff,0xff]
+#CHECK: clc	0(1,%r1), 0             # encoding: [0xd5,0x00,0x10,0x00,0x00,0x00]
+#CHECK: clc	0(1,%r15), 0            # encoding: [0xd5,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: clc	4095(1,%r1), 0          # encoding: [0xd5,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: clc	4095(1,%r15), 0         # encoding: [0xd5,0x00,0xff,0xff,0x00,0x00]
+#CHECK: clc	0(256,%r1), 0           # encoding: [0xd5,0xff,0x10,0x00,0x00,0x00]
+#CHECK: clc	0(256,%r15), 0          # encoding: [0xd5,0xff,0xf0,0x00,0x00,0x00]
+
+	clc	0(1), 0
+	clc	0(1), 0(%r1)
+	clc	0(1), 0(%r15)
+	clc	0(1), 4095
+	clc	0(1), 4095(%r1)
+	clc	0(1), 4095(%r15)
+	clc	0(1,%r1), 0
+	clc	0(1,%r15), 0
+	clc	4095(1,%r1), 0
+	clc	4095(1,%r15), 0
+	clc	0(256,%r1), 0
+	clc	0(256,%r15), 0
+
+#CHECK: clfhsi	0, 0                    # encoding: [0xe5,0x5d,0x00,0x00,0x00,0x00]
+#CHECK: clfhsi	4095, 0                 # encoding: [0xe5,0x5d,0x0f,0xff,0x00,0x00]
+#CHECK: clfhsi	0, 65535                # encoding: [0xe5,0x5d,0x00,0x00,0xff,0xff]
+#CHECK: clfhsi	0(%r1), 42              # encoding: [0xe5,0x5d,0x10,0x00,0x00,0x2a]
+#CHECK: clfhsi	0(%r15), 42             # encoding: [0xe5,0x5d,0xf0,0x00,0x00,0x2a]
+#CHECK: clfhsi	4095(%r1), 42           # encoding: [0xe5,0x5d,0x1f,0xff,0x00,0x2a]
+#CHECK: clfhsi	4095(%r15), 42          # encoding: [0xe5,0x5d,0xff,0xff,0x00,0x2a]
+
+	clfhsi	0, 0
+	clfhsi	4095, 0
+	clfhsi	0, 65535
+	clfhsi	0(%r1), 42
+	clfhsi	0(%r15), 42
+	clfhsi	4095(%r1), 42
+	clfhsi	4095(%r15), 42
+
+#CHECK: clfi	%r0, 0                  # encoding: [0xc2,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: clfi	%r0, 4294967295         # encoding: [0xc2,0x0f,0xff,0xff,0xff,0xff]
+#CHECK: clfi	%r15, 0                 # encoding: [0xc2,0xff,0x00,0x00,0x00,0x00]
+
+	clfi	%r0, 0
+	clfi	%r0, (1 << 32) - 1
+	clfi	%r15, 0
+
+#CHECK: clg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x21]
+#CHECK: clg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x21]
+#CHECK: clg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x21]
+#CHECK: clg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x21]
+#CHECK: clg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x21]
+#CHECK: clg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x21]
+#CHECK: clg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x21]
+#CHECK: clg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x21]
+#CHECK: clg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x21]
+#CHECK: clg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x21]
+
+	clg	%r0, -524288
+	clg	%r0, -1
+	clg	%r0, 0
+	clg	%r0, 1
+	clg	%r0, 524287
+	clg	%r0, 0(%r1)
+	clg	%r0, 0(%r15)
+	clg	%r0, 524287(%r1,%r15)
+	clg	%r0, 524287(%r15,%r1)
+	clg	%r15, 0
+
+#CHECK: clgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x31]
+#CHECK: clgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x31]
+#CHECK: clgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x31]
+#CHECK: clgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x31]
+#CHECK: clgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x31]
+#CHECK: clgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x31]
+#CHECK: clgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x31]
+#CHECK: clgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x31]
+#CHECK: clgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x31]
+#CHECK: clgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x31]
+
+	clgf	%r0, -524288
+	clgf	%r0, -1
+	clgf	%r0, 0
+	clgf	%r0, 1
+	clgf	%r0, 524287
+	clgf	%r0, 0(%r1)
+	clgf	%r0, 0(%r15)
+	clgf	%r0, 524287(%r1,%r15)
+	clgf	%r0, 524287(%r15,%r1)
+	clgf	%r15, 0
+
+#CHECK: clgfi	%r0, 0                  # encoding: [0xc2,0x0e,0x00,0x00,0x00,0x00]
+#CHECK: clgfi	%r0, 4294967295         # encoding: [0xc2,0x0e,0xff,0xff,0xff,0xff]
+#CHECK: clgfi	%r15, 0                 # encoding: [0xc2,0xfe,0x00,0x00,0x00,0x00]
+
+	clgfi	%r0, 0
+	clgfi	%r0, (1 << 32) - 1
+	clgfi	%r15, 0
+
+#CHECK: clgfr	%r0, %r0                # encoding: [0xb9,0x31,0x00,0x00]
+#CHECK: clgfr	%r0, %r15               # encoding: [0xb9,0x31,0x00,0x0f]
+#CHECK: clgfr	%r15, %r0               # encoding: [0xb9,0x31,0x00,0xf0]
+#CHECK: clgfr	%r7, %r8                # encoding: [0xb9,0x31,0x00,0x78]
+
+	clgfr	%r0,%r0
+	clgfr	%r0,%r15
+	clgfr	%r15,%r0
+	clgfr	%r7,%r8
+
+#CHECK: clgfrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	clgfrl	%r0, -0x100000000
+#CHECK: clgfrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	clgfrl	%r0, -2
+#CHECK: clgfrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	clgfrl	%r0, 0
+#CHECK: clgfrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	clgfrl	%r0, 0xfffffffe
+
+#CHECK: clgfrl	%r0, foo                # encoding: [0xc6,0x0e,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clgfrl	%r15, foo               # encoding: [0xc6,0xfe,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clgfrl	%r0,foo
+	clgfrl	%r15,foo
+
+#CHECK: clgfrl	%r3, bar+100            # encoding: [0xc6,0x3e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clgfrl	%r4, bar+100            # encoding: [0xc6,0x4e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clgfrl	%r3,bar+100
+	clgfrl	%r4,bar+100
+
+#CHECK: clgfrl	%r7, frob@PLT           # encoding: [0xc6,0x7e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clgfrl	%r8, frob@PLT           # encoding: [0xc6,0x8e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clgfrl	%r7,frob@PLT
+	clgfrl	%r8,frob@PLT
+
+#CHECK: clghrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	clghrl	%r0, -0x100000000
+#CHECK: clghrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	clghrl	%r0, -2
+#CHECK: clghrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	clghrl	%r0, 0
+#CHECK: clghrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	clghrl	%r0, 0xfffffffe
+
+#CHECK: clghrl	%r0, foo                # encoding: [0xc6,0x06,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clghrl	%r15, foo               # encoding: [0xc6,0xf6,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clghrl	%r0,foo
+	clghrl	%r15,foo
+
+#CHECK: clghrl	%r3, bar+100            # encoding: [0xc6,0x36,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clghrl	%r4, bar+100            # encoding: [0xc6,0x46,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clghrl	%r3,bar+100
+	clghrl	%r4,bar+100
+
+#CHECK: clghrl	%r7, frob@PLT           # encoding: [0xc6,0x76,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clghrl	%r8, frob@PLT           # encoding: [0xc6,0x86,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clghrl	%r7,frob@PLT
+	clghrl	%r8,frob@PLT
+
+#CHECK: clghsi	0, 0                    # encoding: [0xe5,0x59,0x00,0x00,0x00,0x00]
+#CHECK: clghsi	4095, 0                 # encoding: [0xe5,0x59,0x0f,0xff,0x00,0x00]
+#CHECK: clghsi	0, 65535                # encoding: [0xe5,0x59,0x00,0x00,0xff,0xff]
+#CHECK: clghsi	0(%r1), 42              # encoding: [0xe5,0x59,0x10,0x00,0x00,0x2a]
+#CHECK: clghsi	0(%r15), 42             # encoding: [0xe5,0x59,0xf0,0x00,0x00,0x2a]
+#CHECK: clghsi	4095(%r1), 42           # encoding: [0xe5,0x59,0x1f,0xff,0x00,0x2a]
+#CHECK: clghsi	4095(%r15), 42          # encoding: [0xe5,0x59,0xff,0xff,0x00,0x2a]
+
+	clghsi	0, 0
+	clghsi	4095, 0
+	clghsi	0, 65535
+	clghsi	0(%r1), 42
+	clghsi	0(%r15), 42
+	clghsi	4095(%r1), 42
+	clghsi	4095(%r15), 42
+
+#CHECK: clgij	%r0, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgij	%r0, 255, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0xff,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgij	%r15, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgij	%r0, 0, 0, 0
+	clgij	%r0, 255, 0, 0
+	clgij	%r15, 0, 0, 0
+
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, -0x10000
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, -2
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]		# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, 0
+#CHECK: clgij	%r1, 193, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, 0xfffe
+
+#CHECK: clgij	%r1, 193, 0, foo                  # encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, foo
+
+#CHECK: clgij	%r1, 193, 1, foo                  # encoding: [0xec,0x11,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 1, foo
+
+#CHECK: clgij	%r1, 193, 2, foo                  # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijh	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnle	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 2, foo
+	clgijh	%r1, 193, foo
+	clgijnle	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 3, foo                  # encoding: [0xec,0x13,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 3, foo
+
+#CHECK: clgij	%r1, 193, 4, foo                  # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijl	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnhe	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 4, foo
+	clgijl	%r1, 193, foo
+	clgijnhe	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 5, foo                  # encoding: [0xec,0x15,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 5, foo
+
+#CHECK: clgij	%r1, 193, 6, foo                  # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijlh	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijne	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 6, foo
+	clgijlh	%r1, 193, foo
+	clgijne	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 7, foo                  # encoding: [0xec,0x17,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 7, foo
+
+#CHECK: clgij	%r1, 193, 8, foo                  # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgije	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnlh	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 8, foo
+	clgije	%r1, 193, foo
+	clgijnlh	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 9, foo                  # encoding: [0xec,0x19,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 9, foo
+
+#CHECK: clgij	%r1, 193, 10, foo                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijhe	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnl	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 10, foo
+	clgijhe	%r1, 193, foo
+	clgijnl	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 11, foo                 # encoding: [0xec,0x1b,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 11, foo
+
+#CHECK: clgij	%r1, 193, 12, foo                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijle	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgijnh	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 12, foo
+	clgijle	%r1, 193, foo
+	clgijnh	%r1, 193, foo
+
+#CHECK: clgij	%r1, 193, 13, foo                 # encoding: [0xec,0x1d,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 13, foo
+
+#CHECK: clgij	%r1, 193, 14, foo                 # encoding: [0xec,0x1e,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 14, foo
+
+#CHECK: clgij	%r1, 193, 15, foo                 # encoding: [0xec,0x1f,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 15, foo
+
+#CHECK: clgij	%r1, 193, 0, bar+100              # encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, bar+100
+
+#CHECK: clgijh	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijh	%r1, 193, bar+100
+
+#CHECK: clgijnle	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnle	%r1, 193, bar+100
+
+#CHECK: clgijl	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijl	%r1, 193, bar+100
+
+#CHECK: clgijnhe	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnhe	%r1, 193, bar+100
+
+#CHECK: clgijlh	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijlh	%r1, 193, bar+100
+
+#CHECK: clgijne	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijne	%r1, 193, bar+100
+
+#CHECK: clgije	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgije	%r1, 193, bar+100
+
+#CHECK: clgijnlh	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnlh	%r1, 193, bar+100
+
+#CHECK: clgijhe	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijhe	%r1, 193, bar+100
+
+#CHECK: clgijnl	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnl	%r1, 193, bar+100
+
+#CHECK: clgijle	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijle	%r1, 193, bar+100
+
+#CHECK: clgijnh	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgijnh	%r1, 193, bar+100
+
+#CHECK: clgij	%r1, 193, 0, bar@PLT              # encoding: [0xec,0x10,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgij	%r1, 193, 0, bar@PLT
+
+#CHECK: clgijh	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijh	%r1, 193, bar@PLT
+
+#CHECK: clgijnle	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnle	%r1, 193, bar@PLT
+
+#CHECK: clgijl	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijl	%r1, 193, bar@PLT
+
+#CHECK: clgijnhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnhe	%r1, 193, bar@PLT
+
+#CHECK: clgijlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijlh	%r1, 193, bar@PLT
+
+#CHECK: clgijne	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijne	%r1, 193, bar@PLT
+
+#CHECK: clgije	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgije	%r1, 193, bar@PLT
+
+#CHECK: clgijnlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnlh	%r1, 193, bar@PLT
+
+#CHECK: clgijhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijhe	%r1, 193, bar@PLT
+
+#CHECK: clgijnl	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnl	%r1, 193, bar@PLT
+
+#CHECK: clgijle	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijle	%r1, 193, bar@PLT
+
+#CHECK: clgijnh	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7d]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgijnh	%r1, 193, bar@PLT
+
+#CHECK: clgr	%r0, %r0                # encoding: [0xb9,0x21,0x00,0x00]
+#CHECK: clgr	%r0, %r15               # encoding: [0xb9,0x21,0x00,0x0f]
+#CHECK: clgr	%r15, %r0               # encoding: [0xb9,0x21,0x00,0xf0]
+#CHECK: clgr	%r7, %r8                # encoding: [0xb9,0x21,0x00,0x78]
+
+	clgr	%r0,%r0
+	clgr	%r0,%r15
+	clgr	%r15,%r0
+	clgr	%r7,%r8
+
+#CHECK: clgrj	%r0, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgrj	%r0, %r15, 0, .[[LAB:L.*]]	# encoding: [0xec,0x0f,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgrj	%r15, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clgrj	%r7, %r8, 0, .[[LAB:L.*]]	# encoding: [0xec,0x78,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgrj	%r0,%r0,0,0
+	clgrj	%r0,%r15,0,0
+	clgrj	%r15,%r0,0,0
+	clgrj	%r7,%r8,0,0
+
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, -0x10000
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, -2
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]		# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, 0
+#CHECK: clgrj	%r1, %r2, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, 0xfffe
+
+#CHECK: clgrj	%r1, %r2, 0, foo                  # encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, foo
+
+#CHECK: clgrj	%r1, %r2, 1, foo                  # encoding: [0xec,0x12,A,A,0x10,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 1, foo
+
+#CHECK: clgrj	%r1, %r2, 2, foo                  # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 2, foo
+	clgrjh	%r1, %r2, foo
+	clgrjnle	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 3, foo                  # encoding: [0xec,0x12,A,A,0x30,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 3, foo
+
+#CHECK: clgrj	%r1, %r2, 4, foo                  # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 4, foo
+	clgrjl	%r1, %r2, foo
+	clgrjnhe	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 5, foo                  # encoding: [0xec,0x12,A,A,0x50,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 5, foo
+
+#CHECK: clgrj	%r1, %r2, 6, foo                  # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjne	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 6, foo
+	clgrjlh	%r1, %r2, foo
+	clgrjne	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 7, foo                  # encoding: [0xec,0x12,A,A,0x70,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 7, foo
+
+#CHECK: clgrj	%r1, %r2, 8, foo                  # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrje	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 8, foo
+	clgrje	%r1, %r2, foo
+	clgrjnlh	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 9, foo                  # encoding: [0xec,0x12,A,A,0x90,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 9, foo
+
+#CHECK: clgrj	%r1, %r2, 10, foo                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 10, foo
+	clgrjhe	%r1, %r2, foo
+	clgrjnl	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 11, foo                 # encoding: [0xec,0x12,A,A,0xb0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 11, foo
+
+#CHECK: clgrj	%r1, %r2, 12, foo                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clgrjnh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 12, foo
+	clgrjle	%r1, %r2, foo
+	clgrjnh	%r1, %r2, foo
+
+#CHECK: clgrj	%r1, %r2, 13, foo                 # encoding: [0xec,0x12,A,A,0xd0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 13, foo
+
+#CHECK: clgrj	%r1, %r2, 14, foo                 # encoding: [0xec,0x12,A,A,0xe0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 14, foo
+
+#CHECK: clgrj	%r1, %r2, 15, foo                 # encoding: [0xec,0x12,A,A,0xf0,0x65]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 15, foo
+
+#CHECK: clgrj	%r1, %r2, 0, bar+100              # encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, bar+100
+
+#CHECK: clgrjh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjh	%r1, %r2, bar+100
+
+#CHECK: clgrjnle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnle	%r1, %r2, bar+100
+
+#CHECK: clgrjl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjl	%r1, %r2, bar+100
+
+#CHECK: clgrjnhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnhe	%r1, %r2, bar+100
+
+#CHECK: clgrjlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjlh	%r1, %r2, bar+100
+
+#CHECK: clgrjne	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjne	%r1, %r2, bar+100
+
+#CHECK: clgrje	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrje	%r1, %r2, bar+100
+
+#CHECK: clgrjnlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnlh	%r1, %r2, bar+100
+
+#CHECK: clgrjhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjhe	%r1, %r2, bar+100
+
+#CHECK: clgrjnl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnl	%r1, %r2, bar+100
+
+#CHECK: clgrjle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjle	%r1, %r2, bar+100
+
+#CHECK: clgrjnh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clgrjnh	%r1, %r2, bar+100
+
+#CHECK: clgrj	%r1, %r2, 0, bar@PLT              # encoding: [0xec,0x12,A,A,0x00,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrj	%r1, %r2, 0, bar@PLT
+
+#CHECK: clgrjh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjh	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnle	%r1, %r2, bar@PLT
+
+#CHECK: clgrjl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjl	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnhe	%r1, %r2, bar@PLT
+
+#CHECK: clgrjlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjlh	%r1, %r2, bar@PLT
+
+#CHECK: clgrjne	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjne	%r1, %r2, bar@PLT
+
+#CHECK: clgrje	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrje	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnlh	%r1, %r2, bar@PLT
+
+#CHECK: clgrjhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjhe	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnl	%r1, %r2, bar@PLT
+
+#CHECK: clgrjle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjle	%r1, %r2, bar@PLT
+
+#CHECK: clgrjnh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x65]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clgrjnh	%r1, %r2, bar@PLT
+
+#CHECK: clgrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x0a,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	clgrl	%r0, -0x100000000
+#CHECK: clgrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x0a,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	clgrl	%r0, -2
+#CHECK: clgrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x0a,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	clgrl	%r0, 0
+#CHECK: clgrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x0a,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	clgrl	%r0, 0xfffffffe
+
+#CHECK: clgrl	%r0, foo                # encoding: [0xc6,0x0a,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clgrl	%r15, foo               # encoding: [0xc6,0xfa,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clgrl	%r0,foo
+	clgrl	%r15,foo
+
+#CHECK: clgrl	%r3, bar+100            # encoding: [0xc6,0x3a,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clgrl	%r4, bar+100            # encoding: [0xc6,0x4a,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clgrl	%r3,bar+100
+	clgrl	%r4,bar+100
+
+#CHECK: clgrl	%r7, frob@PLT           # encoding: [0xc6,0x7a,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clgrl	%r8, frob@PLT           # encoding: [0xc6,0x8a,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clgrl	%r7,frob@PLT
+	clgrl	%r8,frob@PLT
+
+#CHECK: clhhsi	0, 0                    # encoding: [0xe5,0x55,0x00,0x00,0x00,0x00]
+#CHECK: clhhsi	4095, 0                 # encoding: [0xe5,0x55,0x0f,0xff,0x00,0x00]
+#CHECK: clhhsi	0, 65535                # encoding: [0xe5,0x55,0x00,0x00,0xff,0xff]
+#CHECK: clhhsi	0(%r1), 42              # encoding: [0xe5,0x55,0x10,0x00,0x00,0x2a]
+#CHECK: clhhsi	0(%r15), 42             # encoding: [0xe5,0x55,0xf0,0x00,0x00,0x2a]
+#CHECK: clhhsi	4095(%r1), 42           # encoding: [0xe5,0x55,0x1f,0xff,0x00,0x2a]
+#CHECK: clhhsi	4095(%r15), 42          # encoding: [0xe5,0x55,0xff,0xff,0x00,0x2a]
+
+	clhhsi	0, 0
+	clhhsi	4095, 0
+	clhhsi	0, 65535
+	clhhsi	0(%r1), 42
+	clhhsi	0(%r15), 42
+	clhhsi	4095(%r1), 42
+	clhhsi	4095(%r15), 42
+
+#CHECK: clhrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	clhrl	%r0, -0x100000000
+#CHECK: clhrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	clhrl	%r0, -2
+#CHECK: clhrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	clhrl	%r0, 0
+#CHECK: clhrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	clhrl	%r0, 0xfffffffe
+
+#CHECK: clhrl	%r0, foo                # encoding: [0xc6,0x07,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clhrl	%r15, foo               # encoding: [0xc6,0xf7,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clhrl	%r0,foo
+	clhrl	%r15,foo
+
+#CHECK: clhrl	%r3, bar+100            # encoding: [0xc6,0x37,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clhrl	%r4, bar+100            # encoding: [0xc6,0x47,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clhrl	%r3,bar+100
+	clhrl	%r4,bar+100
+
+#CHECK: clhrl	%r7, frob@PLT           # encoding: [0xc6,0x77,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clhrl	%r8, frob@PLT           # encoding: [0xc6,0x87,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clhrl	%r7,frob@PLT
+	clhrl	%r8,frob@PLT
+
+#CHECK: cli	0, 0                    # encoding: [0x95,0x00,0x00,0x00]
+#CHECK: cli	4095, 0                 # encoding: [0x95,0x00,0x0f,0xff]
+#CHECK: cli	0, 255                  # encoding: [0x95,0xff,0x00,0x00]
+#CHECK: cli	0(%r1), 42              # encoding: [0x95,0x2a,0x10,0x00]
+#CHECK: cli	0(%r15), 42             # encoding: [0x95,0x2a,0xf0,0x00]
+#CHECK: cli	4095(%r1), 42           # encoding: [0x95,0x2a,0x1f,0xff]
+#CHECK: cli	4095(%r15), 42          # encoding: [0x95,0x2a,0xff,0xff]
+
+	cli	0, 0
+	cli	4095, 0
+	cli	0, 255
+	cli	0(%r1), 42
+	cli	0(%r15), 42
+	cli	4095(%r1), 42
+	cli	4095(%r15), 42
+
+#CHECK: clij	%r0, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clij	%r0, 255, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0xff,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clij	%r15, 0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clij	%r0, 0, 0, 0
+	clij	%r0, 255, 0, 0
+	clij	%r15, 0, 0, 0
+
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, -0x10000
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, -2
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]		# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, 0
+#CHECK: clij	%r1, 193, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, 0xfffe
+
+#CHECK: clij	%r1, 193, 0, foo                  # encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, foo
+
+#CHECK: clij	%r1, 193, 1, foo                  # encoding: [0xec,0x11,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 1, foo
+
+#CHECK: clij	%r1, 193, 2, foo                  # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijh	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnle	%r1, 193, foo                     # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 2, foo
+	clijh	%r1, 193, foo
+	clijnle	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 3, foo                  # encoding: [0xec,0x13,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 3, foo
+
+#CHECK: clij	%r1, 193, 4, foo                  # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijl	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnhe	%r1, 193, foo                     # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 4, foo
+	clijl	%r1, 193, foo
+	clijnhe	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 5, foo                  # encoding: [0xec,0x15,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 5, foo
+
+#CHECK: clij	%r1, 193, 6, foo                  # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijlh	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijne	%r1, 193, foo                     # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 6, foo
+	clijlh	%r1, 193, foo
+	clijne	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 7, foo                  # encoding: [0xec,0x17,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 7, foo
+
+#CHECK: clij	%r1, 193, 8, foo                  # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clije	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnlh	%r1, 193, foo                     # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 8, foo
+	clije	%r1, 193, foo
+	clijnlh	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 9, foo                  # encoding: [0xec,0x19,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 9, foo
+
+#CHECK: clij	%r1, 193, 10, foo                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijhe	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnl	%r1, 193, foo                     # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 10, foo
+	clijhe	%r1, 193, foo
+	clijnl	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 11, foo                 # encoding: [0xec,0x1b,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 11, foo
+
+#CHECK: clij	%r1, 193, 12, foo                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijle	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clijnh	%r1, 193, foo                     # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 12, foo
+	clijle	%r1, 193, foo
+	clijnh	%r1, 193, foo
+
+#CHECK: clij	%r1, 193, 13, foo                 # encoding: [0xec,0x1d,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 13, foo
+
+#CHECK: clij	%r1, 193, 14, foo                 # encoding: [0xec,0x1e,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 14, foo
+
+#CHECK: clij	%r1, 193, 15, foo                 # encoding: [0xec,0x1f,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 15, foo
+
+#CHECK: clij	%r1, 193, 0, bar+100              # encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, bar+100
+
+#CHECK: clijh	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijh	%r1, 193, bar+100
+
+#CHECK: clijnle	%r1, 193, bar+100                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnle	%r1, 193, bar+100
+
+#CHECK: clijl	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijl	%r1, 193, bar+100
+
+#CHECK: clijnhe	%r1, 193, bar+100                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnhe	%r1, 193, bar+100
+
+#CHECK: clijlh	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijlh	%r1, 193, bar+100
+
+#CHECK: clijne	%r1, 193, bar+100                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijne	%r1, 193, bar+100
+
+#CHECK: clije	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clije	%r1, 193, bar+100
+
+#CHECK: clijnlh	%r1, 193, bar+100                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnlh	%r1, 193, bar+100
+
+#CHECK: clijhe	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijhe	%r1, 193, bar+100
+
+#CHECK: clijnl	%r1, 193, bar+100                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnl	%r1, 193, bar+100
+
+#CHECK: clijle	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijle	%r1, 193, bar+100
+
+#CHECK: clijnh	%r1, 193, bar+100                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clijnh	%r1, 193, bar+100
+
+#CHECK: clij	%r1, 193, 0, bar@PLT              # encoding: [0xec,0x10,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clij	%r1, 193, 0, bar@PLT
+
+#CHECK: clijh	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijh	%r1, 193, bar@PLT
+
+#CHECK: clijnle	%r1, 193, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnle	%r1, 193, bar@PLT
+
+#CHECK: clijl	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijl	%r1, 193, bar@PLT
+
+#CHECK: clijnhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x14,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnhe	%r1, 193, bar@PLT
+
+#CHECK: clijlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijlh	%r1, 193, bar@PLT
+
+#CHECK: clijne	%r1, 193, bar@PLT                 # encoding: [0xec,0x16,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijne	%r1, 193, bar@PLT
+
+#CHECK: clije	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clije	%r1, 193, bar@PLT
+
+#CHECK: clijnlh	%r1, 193, bar@PLT                 # encoding: [0xec,0x18,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnlh	%r1, 193, bar@PLT
+
+#CHECK: clijhe	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijhe	%r1, 193, bar@PLT
+
+#CHECK: clijnl	%r1, 193, bar@PLT                 # encoding: [0xec,0x1a,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnl	%r1, 193, bar@PLT
+
+#CHECK: clijle	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijle	%r1, 193, bar@PLT
+
+#CHECK: clijnh	%r1, 193, bar@PLT                 # encoding: [0xec,0x1c,A,A,0xc1,0x7f]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clijnh	%r1, 193, bar@PLT
+
+#CHECK: cliy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x55]
+#CHECK: cliy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x55]
+#CHECK: cliy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x55]
+#CHECK: cliy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x55]
+#CHECK: cliy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x55]
+#CHECK: cliy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x55]
+#CHECK: cliy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x55]
+#CHECK: cliy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x55]
+#CHECK: cliy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x55]
+#CHECK: cliy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x55]
+
+	cliy	-524288, 0
+	cliy	-1, 0
+	cliy	0, 0
+	cliy	1, 0
+	cliy	524287, 0
+	cliy	0, 255
+	cliy	0(%r1), 42
+	cliy	0(%r15), 42
+	cliy	524287(%r1), 42
+	cliy	524287(%r15), 42
+
+#CHECK: clr	%r0, %r0                # encoding: [0x15,0x00]
+#CHECK: clr	%r0, %r15               # encoding: [0x15,0x0f]
+#CHECK: clr	%r15, %r0               # encoding: [0x15,0xf0]
+#CHECK: clr	%r7, %r8                # encoding: [0x15,0x78]
+
+	clr	%r0,%r0
+	clr	%r0,%r15
+	clr	%r15,%r0
+	clr	%r7,%r8
+
+#CHECK: clrj	%r0, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clrj	%r0, %r15, 0, .[[LAB:L.*]]	# encoding: [0xec,0x0f,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clrj	%r15, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: clrj	%r7, %r8, 0, .[[LAB:L.*]]	# encoding: [0xec,0x78,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clrj	%r0,%r0,0,0
+	clrj	%r0,%r15,0,0
+	clrj	%r15,%r0,0,0
+	clrj	%r7,%r8,0,0
+
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, -0x10000
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, -2
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]		# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, 0
+#CHECK: clrj	%r1, %r2, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, 0xfffe
+
+#CHECK: clrj	%r1, %r2, 0, foo                  # encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, foo
+
+#CHECK: clrj	%r1, %r2, 1, foo                  # encoding: [0xec,0x12,A,A,0x10,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 1, foo
+
+#CHECK: clrj	%r1, %r2, 2, foo                  # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 2, foo
+	clrjh	%r1, %r2, foo
+	clrjnle	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 3, foo                  # encoding: [0xec,0x12,A,A,0x30,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 3, foo
+
+#CHECK: clrj	%r1, %r2, 4, foo                  # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 4, foo
+	clrjl	%r1, %r2, foo
+	clrjnhe	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 5, foo                  # encoding: [0xec,0x12,A,A,0x50,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 5, foo
+
+#CHECK: clrj	%r1, %r2, 6, foo                  # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjne	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 6, foo
+	clrjlh	%r1, %r2, foo
+	clrjne	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 7, foo                  # encoding: [0xec,0x12,A,A,0x70,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 7, foo
+
+#CHECK: clrj	%r1, %r2, 8, foo                  # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrje	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 8, foo
+	clrje	%r1, %r2, foo
+	clrjnlh	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 9, foo                  # encoding: [0xec,0x12,A,A,0x90,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 9, foo
+
+#CHECK: clrj	%r1, %r2, 10, foo                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 10, foo
+	clrjhe	%r1, %r2, foo
+	clrjnl	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 11, foo                 # encoding: [0xec,0x12,A,A,0xb0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 11, foo
+
+#CHECK: clrj	%r1, %r2, 12, foo                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: clrjnh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 12, foo
+	clrjle	%r1, %r2, foo
+	clrjnh	%r1, %r2, foo
+
+#CHECK: clrj	%r1, %r2, 13, foo                 # encoding: [0xec,0x12,A,A,0xd0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 13, foo
+
+#CHECK: clrj	%r1, %r2, 14, foo                 # encoding: [0xec,0x12,A,A,0xe0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 14, foo
+
+#CHECK: clrj	%r1, %r2, 15, foo                 # encoding: [0xec,0x12,A,A,0xf0,0x77]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 15, foo
+
+#CHECK: clrj	%r1, %r2, 0, bar+100              # encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, bar+100
+
+#CHECK: clrjh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjh	%r1, %r2, bar+100
+
+#CHECK: clrjnle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnle	%r1, %r2, bar+100
+
+#CHECK: clrjl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjl	%r1, %r2, bar+100
+
+#CHECK: clrjnhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnhe	%r1, %r2, bar+100
+
+#CHECK: clrjlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjlh	%r1, %r2, bar+100
+
+#CHECK: clrjne	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjne	%r1, %r2, bar+100
+
+#CHECK: clrje	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrje	%r1, %r2, bar+100
+
+#CHECK: clrjnlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnlh	%r1, %r2, bar+100
+
+#CHECK: clrjhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjhe	%r1, %r2, bar+100
+
+#CHECK: clrjnl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnl	%r1, %r2, bar+100
+
+#CHECK: clrjle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjle	%r1, %r2, bar+100
+
+#CHECK: clrjnh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	clrjnh	%r1, %r2, bar+100
+
+#CHECK: clrj	%r1, %r2, 0, bar@PLT              # encoding: [0xec,0x12,A,A,0x00,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrj	%r1, %r2, 0, bar@PLT
+
+#CHECK: clrjh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjh	%r1, %r2, bar@PLT
+
+#CHECK: clrjnle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnle	%r1, %r2, bar@PLT
+
+#CHECK: clrjl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjl	%r1, %r2, bar@PLT
+
+#CHECK: clrjnhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnhe	%r1, %r2, bar@PLT
+
+#CHECK: clrjlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjlh	%r1, %r2, bar@PLT
+
+#CHECK: clrjne	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjne	%r1, %r2, bar@PLT
+
+#CHECK: clrje	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrje	%r1, %r2, bar@PLT
+
+#CHECK: clrjnlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnlh	%r1, %r2, bar@PLT
+
+#CHECK: clrjhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjhe	%r1, %r2, bar@PLT
+
+#CHECK: clrjnl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnl	%r1, %r2, bar@PLT
+
+#CHECK: clrjle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjle	%r1, %r2, bar@PLT
+
+#CHECK: clrjnh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x77]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	clrjnh	%r1, %r2, bar@PLT
+
+#CHECK: clrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	clrl	%r0, -0x100000000
+#CHECK: clrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	clrl	%r0, -2
+#CHECK: clrl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	clrl	%r0, 0
+#CHECK: clrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	clrl	%r0, 0xfffffffe
+
+#CHECK: clrl	%r0, foo                # encoding: [0xc6,0x0f,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clrl	%r15, foo               # encoding: [0xc6,0xff,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clrl	%r0,foo
+	clrl	%r15,foo
+
+#CHECK: clrl	%r3, bar+100            # encoding: [0xc6,0x3f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clrl	%r4, bar+100            # encoding: [0xc6,0x4f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clrl	%r3,bar+100
+	clrl	%r4,bar+100
+
+#CHECK: clrl	%r7, frob@PLT           # encoding: [0xc6,0x7f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clrl	%r8, frob@PLT           # encoding: [0xc6,0x8f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clrl	%r7,frob@PLT
+	clrl	%r8,frob@PLT
+
+#CHECK: clst	%r0, %r0                # encoding: [0xb2,0x5d,0x00,0x00]
+#CHECK: clst	%r0, %r15               # encoding: [0xb2,0x5d,0x00,0x0f]
+#CHECK: clst	%r15, %r0               # encoding: [0xb2,0x5d,0x00,0xf0]
+#CHECK: clst	%r7, %r8                # encoding: [0xb2,0x5d,0x00,0x78]
+
+	clst	%r0,%r0
+	clst	%r0,%r15
+	clst	%r15,%r0
+	clst	%r7,%r8
+
+#CHECK: cly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x55]
+#CHECK: cly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x55]
+#CHECK: cly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x55]
+#CHECK: cly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x55]
+#CHECK: cly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x55]
+#CHECK: cly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x55]
+#CHECK: cly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x55]
+#CHECK: cly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x55]
+#CHECK: cly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x55]
+#CHECK: cly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x55]
+
+	cly	%r0, -524288
+	cly	%r0, -1
+	cly	%r0, 0
+	cly	%r0, 1
+	cly	%r0, 524287
+	cly	%r0, 0(%r1)
+	cly	%r0, 0(%r15)
+	cly	%r0, 524287(%r1,%r15)
+	cly	%r0, 524287(%r15,%r1)
+	cly	%r15, 0
+
+#CHECK: cpsdr	%f0, %f0, %f0           # encoding: [0xb3,0x72,0x00,0x00]
+#CHECK: cpsdr	%f0, %f0, %f15          # encoding: [0xb3,0x72,0x00,0x0f]
+#CHECK: cpsdr	%f0, %f15, %f0          # encoding: [0xb3,0x72,0xf0,0x00]
+#CHECK: cpsdr	%f15, %f0, %f0          # encoding: [0xb3,0x72,0x00,0xf0]
+#CHECK: cpsdr	%f1, %f2, %f3           # encoding: [0xb3,0x72,0x20,0x13]
+#CHECK: cpsdr	%f15, %f15, %f15        # encoding: [0xb3,0x72,0xf0,0xff]
+
+	cpsdr	%f0, %f0, %f0
+	cpsdr	%f0, %f0, %f15
+	cpsdr	%f0, %f15, %f0
+	cpsdr	%f15, %f0, %f0
+	cpsdr	%f1, %f2, %f3
+	cpsdr	%f15, %f15, %f15
+
+
+#CHECK: cr	%r0, %r0                # encoding: [0x19,0x00]
+#CHECK: cr	%r0, %r15               # encoding: [0x19,0x0f]
+#CHECK: cr	%r15, %r0               # encoding: [0x19,0xf0]
+#CHECK: cr	%r7, %r8                # encoding: [0x19,0x78]
+
+	cr	%r0,%r0
+	cr	%r0,%r15
+	cr	%r15,%r0
+	cr	%r7,%r8
+
+#CHECK: crj	%r0, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0x00,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: crj	%r0, %r15, 0, .[[LAB:L.*]]	# encoding: [0xec,0x0f,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: crj	%r15, %r0, 0, .[[LAB:L.*]]	# encoding: [0xec,0xf0,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+#CHECK: crj	%r7, %r8, 0, .[[LAB:L.*]]	# encoding: [0xec,0x78,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	crj	%r0,%r0,0,0
+	crj	%r0,%r15,0,0
+	crj	%r15,%r0,0,0
+	crj	%r7,%r8,0,0
+
+#CHECK: crj	%r1, %r2, 0, .[[LAB:L.*]]-65536	# encoding: [0xec,0x12,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-65536)+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 0, -0x10000
+#CHECK: crj	%r1, %r2, 0, .[[LAB:L.*]]-2	# encoding: [0xec,0x12,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 0, -2
+#CHECK: crj	%r1, %r2, 0, .[[LAB:L.*]]		# encoding: [0xec,0x12,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 0, 0
+#CHECK: crj	%r1, %r2, 0, .[[LAB:L.*]]+65534	# encoding: [0xec,0x12,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+65534)+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 0, 0xfffe
+
+#CHECK: crj	%r1, %r2, 0, foo                  # encoding: [0xec,0x12,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 0, foo
+
+#CHECK: crj	%r1, %r2, 1, foo                  # encoding: [0xec,0x12,A,A,0x10,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 1, foo
+
+#CHECK: crj	%r1, %r2, 2, foo                  # encoding: [0xec,0x12,A,A,0x20,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjnle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x20,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 2, foo
+	crjh	%r1, %r2, foo
+	crjnle	%r1, %r2, foo
+
+#CHECK: crj	%r1, %r2, 3, foo                  # encoding: [0xec,0x12,A,A,0x30,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 3, foo
+
+#CHECK: crj	%r1, %r2, 4, foo                  # encoding: [0xec,0x12,A,A,0x40,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjnhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x40,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 4, foo
+	crjl	%r1, %r2, foo
+	crjnhe	%r1, %r2, foo
+
+#CHECK: crj	%r1, %r2, 5, foo                  # encoding: [0xec,0x12,A,A,0x50,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 5, foo
+
+#CHECK: crj	%r1, %r2, 6, foo                  # encoding: [0xec,0x12,A,A,0x60,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjne	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x60,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 6, foo
+	crjlh	%r1, %r2, foo
+	crjne	%r1, %r2, foo
+
+#CHECK: crj	%r1, %r2, 7, foo                  # encoding: [0xec,0x12,A,A,0x70,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 7, foo
+
+#CHECK: crj	%r1, %r2, 8, foo                  # encoding: [0xec,0x12,A,A,0x80,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crje	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjnlh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0x80,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 8, foo
+	crje	%r1, %r2, foo
+	crjnlh	%r1, %r2, foo
+
+#CHECK: crj	%r1, %r2, 9, foo                  # encoding: [0xec,0x12,A,A,0x90,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 9, foo
+
+#CHECK: crj	%r1, %r2, 10, foo                 # encoding: [0xec,0x12,A,A,0xa0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjhe	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjnl	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xa0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 10, foo
+	crjhe	%r1, %r2, foo
+	crjnl	%r1, %r2, foo
+
+#CHECK: crj	%r1, %r2, 11, foo                 # encoding: [0xec,0x12,A,A,0xb0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 11, foo
+
+#CHECK: crj	%r1, %r2, 12, foo                 # encoding: [0xec,0x12,A,A,0xc0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjle	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: crjnh	%r1, %r2, foo                     # encoding: [0xec,0x12,A,A,0xc0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 12, foo
+	crjle	%r1, %r2, foo
+	crjnh	%r1, %r2, foo
+
+#CHECK: crj	%r1, %r2, 13, foo                 # encoding: [0xec,0x12,A,A,0xd0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 13, foo
+
+#CHECK: crj	%r1, %r2, 14, foo                 # encoding: [0xec,0x12,A,A,0xe0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 14, foo
+
+#CHECK: crj	%r1, %r2, 15, foo                 # encoding: [0xec,0x12,A,A,0xf0,0x76]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 15, foo
+
+#CHECK: crj	%r1, %r2, 0, bar+100              # encoding: [0xec,0x12,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 0, bar+100
+
+#CHECK: crjh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjh	%r1, %r2, bar+100
+
+#CHECK: crjnle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x20,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjnle	%r1, %r2, bar+100
+
+#CHECK: crjl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjl	%r1, %r2, bar+100
+
+#CHECK: crjnhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x40,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjnhe	%r1, %r2, bar+100
+
+#CHECK: crjlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjlh	%r1, %r2, bar+100
+
+#CHECK: crjne	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x60,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjne	%r1, %r2, bar+100
+
+#CHECK: crje	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crje	%r1, %r2, bar+100
+
+#CHECK: crjnlh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0x80,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjnlh	%r1, %r2, bar+100
+
+#CHECK: crjhe	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjhe	%r1, %r2, bar+100
+
+#CHECK: crjnl	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xa0,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjnl	%r1, %r2, bar+100
+
+#CHECK: crjle	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjle	%r1, %r2, bar+100
+
+#CHECK: crjnh	%r1, %r2, bar+100                 # encoding: [0xec,0x12,A,A,0xc0,0x76]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	crjnh	%r1, %r2, bar+100
+
+#CHECK: crj	%r1, %r2, 0, bar@PLT              # encoding: [0xec,0x12,A,A,0x00,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crj	%r1, %r2, 0, bar@PLT
+
+#CHECK: crjh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjh	%r1, %r2, bar@PLT
+
+#CHECK: crjnle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x20,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjnle	%r1, %r2, bar@PLT
+
+#CHECK: crjl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjl	%r1, %r2, bar@PLT
+
+#CHECK: crjnhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x40,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjnhe	%r1, %r2, bar@PLT
+
+#CHECK: crjlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjlh	%r1, %r2, bar@PLT
+
+#CHECK: crjne	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x60,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjne	%r1, %r2, bar@PLT
+
+#CHECK: crje	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crje	%r1, %r2, bar@PLT
+
+#CHECK: crjnlh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0x80,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjnlh	%r1, %r2, bar@PLT
+
+#CHECK: crjhe	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjhe	%r1, %r2, bar@PLT
+
+#CHECK: crjnl	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xa0,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjnl	%r1, %r2, bar@PLT
+
+#CHECK: crjle	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjle	%r1, %r2, bar@PLT
+
+#CHECK: crjnh	%r1, %r2, bar@PLT                 # encoding: [0xec,0x12,A,A,0xc0,0x76]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	crjnh	%r1, %r2, bar@PLT
+
+#CHECK: crl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	crl	%r0, -0x100000000
+#CHECK: crl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	crl	%r0, -2
+#CHECK: crl	%r0, .[[LAB:L.*]]	# encoding: [0xc6,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	crl	%r0, 0
+#CHECK: crl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	crl	%r0, 0xfffffffe
+
+#CHECK: crl	%r0, foo                # encoding: [0xc6,0x0d,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: crl	%r15, foo               # encoding: [0xc6,0xfd,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	crl	%r0,foo
+	crl	%r15,foo
+
+#CHECK: crl	%r3, bar+100            # encoding: [0xc6,0x3d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: crl	%r4, bar+100            # encoding: [0xc6,0x4d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	crl	%r3,bar+100
+	crl	%r4,bar+100
+
+#CHECK: crl	%r7, frob@PLT           # encoding: [0xc6,0x7d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: crl	%r8, frob@PLT           # encoding: [0xc6,0x8d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	crl	%r7,frob@PLT
+	crl	%r8,frob@PLT
+
+#CHECK: cs	%r0, %r0, 0             # encoding: [0xba,0x00,0x00,0x00]
+#CHECK: cs	%r0, %r0, 4095          # encoding: [0xba,0x00,0x0f,0xff]
+#CHECK: cs	%r0, %r0, 0(%r1)        # encoding: [0xba,0x00,0x10,0x00]
+#CHECK: cs	%r0, %r0, 0(%r15)       # encoding: [0xba,0x00,0xf0,0x00]
+#CHECK: cs	%r0, %r0, 4095(%r1)     # encoding: [0xba,0x00,0x1f,0xff]
+#CHECK: cs	%r0, %r0, 4095(%r15)    # encoding: [0xba,0x00,0xff,0xff]
+#CHECK: cs	%r0, %r15, 0            # encoding: [0xba,0x0f,0x00,0x00]
+#CHECK: cs	%r15, %r0, 0            # encoding: [0xba,0xf0,0x00,0x00]
+
+	cs	%r0, %r0, 0
+	cs	%r0, %r0, 4095
+	cs	%r0, %r0, 0(%r1)
+	cs	%r0, %r0, 0(%r15)
+	cs	%r0, %r0, 4095(%r1)
+	cs	%r0, %r0, 4095(%r15)
+	cs	%r0, %r15, 0
+	cs	%r15, %r0, 0
+
+#CHECK: csg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x30]
+#CHECK: csg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x30]
+#CHECK: csg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x30]
+#CHECK: csg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x30]
+#CHECK: csg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x30]
+#CHECK: csg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x30]
+#CHECK: csg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x30]
+#CHECK: csg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x30]
+#CHECK: csg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x30]
+#CHECK: csg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x30]
+#CHECK: csg	%r15, %r0, 0            # encoding: [0xeb,0xf0,0x00,0x00,0x00,0x30]
+
+	csg	%r0, %r0, -524288
+	csg	%r0, %r0, -1
+	csg	%r0, %r0, 0
+	csg	%r0, %r0, 1
+	csg	%r0, %r0, 524287
+	csg	%r0, %r0, 0(%r1)
+	csg	%r0, %r0, 0(%r15)
+	csg	%r0, %r0, 524287(%r1)
+	csg	%r0, %r0, 524287(%r15)
+	csg	%r0, %r15, 0
+	csg	%r15, %r0, 0
+
+#CHECK: csy	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x14]
+#CHECK: csy	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x14]
+#CHECK: csy	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x14]
+#CHECK: csy	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x14]
+#CHECK: csy	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x14]
+#CHECK: csy	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x14]
+#CHECK: csy	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x14]
+#CHECK: csy	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x14]
+#CHECK: csy	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x14]
+#CHECK: csy	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x14]
+#CHECK: csy	%r15, %r0, 0            # encoding: [0xeb,0xf0,0x00,0x00,0x00,0x14]
+
+	csy	%r0, %r0, -524288
+	csy	%r0, %r0, -1
+	csy	%r0, %r0, 0
+	csy	%r0, %r0, 1
+	csy	%r0, %r0, 524287
+	csy	%r0, %r0, 0(%r1)
+	csy	%r0, %r0, 0(%r15)
+	csy	%r0, %r0, 524287(%r1)
+	csy	%r0, %r0, 524287(%r15)
+	csy	%r0, %r15, 0
+	csy	%r15, %r0, 0
+
+#CHECK: cxbr	%f0, %f0                # encoding: [0xb3,0x49,0x00,0x00]
+#CHECK: cxbr	%f0, %f13               # encoding: [0xb3,0x49,0x00,0x0d]
+#CHECK: cxbr	%f8, %f8                # encoding: [0xb3,0x49,0x00,0x88]
+#CHECK: cxbr	%f13, %f0               # encoding: [0xb3,0x49,0x00,0xd0]
+
+	cxbr	%f0, %f0
+	cxbr	%f0, %f13
+	cxbr	%f8, %f8
+	cxbr	%f13, %f0
+
+#CHECK: cxfbr	%f0, %r0                # encoding: [0xb3,0x96,0x00,0x00]
+#CHECK: cxfbr	%f0, %r15               # encoding: [0xb3,0x96,0x00,0x0f]
+#CHECK: cxfbr	%f13, %r0               # encoding: [0xb3,0x96,0x00,0xd0]
+#CHECK: cxfbr	%f8, %r7                # encoding: [0xb3,0x96,0x00,0x87]
+#CHECK: cxfbr	%f13, %r15              # encoding: [0xb3,0x96,0x00,0xdf]
+
+	cxfbr	%f0, %r0
+	cxfbr	%f0, %r15
+	cxfbr	%f13, %r0
+	cxfbr	%f8, %r7
+	cxfbr	%f13, %r15
+
+#CHECK: cxgbr	%f0, %r0                # encoding: [0xb3,0xa6,0x00,0x00]
+#CHECK: cxgbr	%f0, %r15               # encoding: [0xb3,0xa6,0x00,0x0f]
+#CHECK: cxgbr	%f13, %r0               # encoding: [0xb3,0xa6,0x00,0xd0]
+#CHECK: cxgbr	%f8, %r7                # encoding: [0xb3,0xa6,0x00,0x87]
+#CHECK: cxgbr	%f13, %r15              # encoding: [0xb3,0xa6,0x00,0xdf]
+
+	cxgbr	%f0, %r0
+	cxgbr	%f0, %r15
+	cxgbr	%f13, %r0
+	cxgbr	%f8, %r7
+	cxgbr	%f13, %r15
+
+#CHECK: cy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x59]
+#CHECK: cy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x59]
+#CHECK: cy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x59]
+#CHECK: cy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x59]
+#CHECK: cy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x59]
+#CHECK: cy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x59]
+#CHECK: cy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x59]
+#CHECK: cy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x59]
+#CHECK: cy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x59]
+#CHECK: cy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x59]
+
+	cy	%r0, -524288
+	cy	%r0, -1
+	cy	%r0, 0
+	cy	%r0, 1
+	cy	%r0, 524287
+	cy	%r0, 0(%r1)
+	cy	%r0, 0(%r15)
+	cy	%r0, 524287(%r1,%r15)
+	cy	%r0, 524287(%r15,%r1)
+	cy	%r15, 0
+
+#CHECK: ddb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1d]
+#CHECK: ddb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1d]
+#CHECK: ddb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1d]
+#CHECK: ddb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1d]
+#CHECK: ddb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1d]
+#CHECK: ddb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1d]
+#CHECK: ddb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1d]
+
+	ddb	%f0, 0
+	ddb	%f0, 4095
+	ddb	%f0, 0(%r1)
+	ddb	%f0, 0(%r15)
+	ddb	%f0, 4095(%r1,%r15)
+	ddb	%f0, 4095(%r15,%r1)
+	ddb	%f15, 0
+
+#CHECK: ddbr	%f0, %f0                # encoding: [0xb3,0x1d,0x00,0x00]
+#CHECK: ddbr	%f0, %f15               # encoding: [0xb3,0x1d,0x00,0x0f]
+#CHECK: ddbr	%f7, %f8                # encoding: [0xb3,0x1d,0x00,0x78]
+#CHECK: ddbr	%f15, %f0               # encoding: [0xb3,0x1d,0x00,0xf0]
+
+	ddbr	%f0, %f0
+	ddbr	%f0, %f15
+	ddbr	%f7, %f8
+	ddbr	%f15, %f0
+
+#CHECK: deb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0d]
+#CHECK: deb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0d]
+#CHECK: deb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0d]
+#CHECK: deb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0d]
+#CHECK: deb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0d]
+#CHECK: deb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0d]
+#CHECK: deb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0d]
+
+	deb	%f0, 0
+	deb	%f0, 4095
+	deb	%f0, 0(%r1)
+	deb	%f0, 0(%r15)
+	deb	%f0, 4095(%r1,%r15)
+	deb	%f0, 4095(%r15,%r1)
+	deb	%f15, 0
+
+#CHECK: debr	%f0, %f0                # encoding: [0xb3,0x0d,0x00,0x00]
+#CHECK: debr	%f0, %f15               # encoding: [0xb3,0x0d,0x00,0x0f]
+#CHECK: debr	%f7, %f8                # encoding: [0xb3,0x0d,0x00,0x78]
+#CHECK: debr	%f15, %f0               # encoding: [0xb3,0x0d,0x00,0xf0]
+
+	debr	%f0, %f0
+	debr	%f0, %f15
+	debr	%f7, %f8
+	debr	%f15, %f0
+
+#CHECK: dl	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x97]
+#CHECK: dl	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x97]
+#CHECK: dl	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x97]
+#CHECK: dl	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x97]
+#CHECK: dl	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x97]
+#CHECK: dl	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x97]
+#CHECK: dl	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x97]
+#CHECK: dl	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x97]
+#CHECK: dl	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x97]
+#CHECK: dl	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x97]
+
+	dl	%r0, -524288
+	dl	%r0, -1
+	dl	%r0, 0
+	dl	%r0, 1
+	dl	%r0, 524287
+	dl	%r0, 0(%r1)
+	dl	%r0, 0(%r15)
+	dl	%r0, 524287(%r1,%r15)
+	dl	%r0, 524287(%r15,%r1)
+	dl	%r14, 0
+
+#CHECK: dlg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x87]
+#CHECK: dlg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x87]
+#CHECK: dlg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x87]
+#CHECK: dlg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x87]
+#CHECK: dlg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x87]
+#CHECK: dlg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x87]
+#CHECK: dlg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x87]
+#CHECK: dlg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x87]
+#CHECK: dlg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x87]
+#CHECK: dlg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x87]
+
+	dlg	%r0, -524288
+	dlg	%r0, -1
+	dlg	%r0, 0
+	dlg	%r0, 1
+	dlg	%r0, 524287
+	dlg	%r0, 0(%r1)
+	dlg	%r0, 0(%r15)
+	dlg	%r0, 524287(%r1,%r15)
+	dlg	%r0, 524287(%r15,%r1)
+	dlg	%r14, 0
+
+#CHECK: dlgr	%r0, %r0                # encoding: [0xb9,0x87,0x00,0x00]
+#CHECK: dlgr	%r0, %r15               # encoding: [0xb9,0x87,0x00,0x0f]
+#CHECK: dlgr	%r14, %r0               # encoding: [0xb9,0x87,0x00,0xe0]
+#CHECK: dlgr	%r6, %r9                # encoding: [0xb9,0x87,0x00,0x69]
+
+	dlgr	%r0,%r0
+	dlgr	%r0,%r15
+	dlgr	%r14,%r0
+	dlgr	%r6,%r9
+
+#CHECK: dlr	%r0, %r0                # encoding: [0xb9,0x97,0x00,0x00]
+#CHECK: dlr	%r0, %r15               # encoding: [0xb9,0x97,0x00,0x0f]
+#CHECK: dlr	%r14, %r0               # encoding: [0xb9,0x97,0x00,0xe0]
+#CHECK: dlr	%r6, %r9                # encoding: [0xb9,0x97,0x00,0x69]
+
+	dlr	%r0,%r0
+	dlr	%r0,%r15
+	dlr	%r14,%r0
+	dlr	%r6,%r9
+
+#CHECK: dsg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0d]
+#CHECK: dsg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0d]
+#CHECK: dsg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0d]
+#CHECK: dsg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0d]
+#CHECK: dsg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0d]
+#CHECK: dsg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0d]
+#CHECK: dsg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0d]
+#CHECK: dsg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0d]
+#CHECK: dsg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0d]
+#CHECK: dsg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x0d]
+
+	dsg	%r0, -524288
+	dsg	%r0, -1
+	dsg	%r0, 0
+	dsg	%r0, 1
+	dsg	%r0, 524287
+	dsg	%r0, 0(%r1)
+	dsg	%r0, 0(%r15)
+	dsg	%r0, 524287(%r1,%r15)
+	dsg	%r0, 524287(%r15,%r1)
+	dsg	%r14, 0
+
+#CHECK: dsgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1d]
+#CHECK: dsgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1d]
+#CHECK: dsgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1d]
+#CHECK: dsgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1d]
+#CHECK: dsgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1d]
+#CHECK: dsgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1d]
+#CHECK: dsgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1d]
+#CHECK: dsgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1d]
+#CHECK: dsgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1d]
+#CHECK: dsgf	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x1d]
+
+	dsgf	%r0, -524288
+	dsgf	%r0, -1
+	dsgf	%r0, 0
+	dsgf	%r0, 1
+	dsgf	%r0, 524287
+	dsgf	%r0, 0(%r1)
+	dsgf	%r0, 0(%r15)
+	dsgf	%r0, 524287(%r1,%r15)
+	dsgf	%r0, 524287(%r15,%r1)
+	dsgf	%r14, 0
+
+#CHECK: dsgfr	%r0, %r0                # encoding: [0xb9,0x1d,0x00,0x00]
+#CHECK: dsgfr	%r0, %r15               # encoding: [0xb9,0x1d,0x00,0x0f]
+#CHECK: dsgfr	%r14, %r0               # encoding: [0xb9,0x1d,0x00,0xe0]
+#CHECK: dsgfr	%r6, %r9                # encoding: [0xb9,0x1d,0x00,0x69]
+
+	dsgfr	%r0,%r0
+	dsgfr	%r0,%r15
+	dsgfr	%r14,%r0
+	dsgfr	%r6,%r9
+
+#CHECK: dsgr	%r0, %r0                # encoding: [0xb9,0x0d,0x00,0x00]
+#CHECK: dsgr	%r0, %r15               # encoding: [0xb9,0x0d,0x00,0x0f]
+#CHECK: dsgr	%r14, %r0               # encoding: [0xb9,0x0d,0x00,0xe0]
+#CHECK: dsgr	%r6, %r9                # encoding: [0xb9,0x0d,0x00,0x69]
+
+	dsgr	%r0,%r0
+	dsgr	%r0,%r15
+	dsgr	%r14,%r0
+	dsgr	%r6,%r9
+
+#CHECK: dxbr	%f0, %f0                # encoding: [0xb3,0x4d,0x00,0x00]
+#CHECK: dxbr	%f0, %f13               # encoding: [0xb3,0x4d,0x00,0x0d]
+#CHECK: dxbr	%f8, %f8                # encoding: [0xb3,0x4d,0x00,0x88]
+#CHECK: dxbr	%f13, %f0               # encoding: [0xb3,0x4d,0x00,0xd0]
+
+	dxbr	%f0, %f0
+	dxbr	%f0, %f13
+	dxbr	%f8, %f8
+	dxbr	%f13, %f0
+
+#CHECK: ear	%r0, %a0                # encoding: [0xb2,0x4f,0x00,0x00]
+#CHECK: ear	%r0, %a15               # encoding: [0xb2,0x4f,0x00,0x0f]
+#CHECK: ear	%r15, %a0               # encoding: [0xb2,0x4f,0x00,0xf0]
+#CHECK: ear	%r7, %a8                # encoding: [0xb2,0x4f,0x00,0x78]
+#CHECK: ear	%r15, %a15              # encoding: [0xb2,0x4f,0x00,0xff]
+
+	ear	%r0, %a0
+	ear	%r0, %a15
+	ear	%r15, %a0
+	ear	%r7, %a8
+	ear	%r15, %a15
+
+#CHECK: fidbr	%f0, 0, %f0             # encoding: [0xb3,0x5f,0x00,0x00]
+#CHECK: fidbr	%f0, 0, %f15            # encoding: [0xb3,0x5f,0x00,0x0f]
+#CHECK: fidbr	%f0, 15, %f0            # encoding: [0xb3,0x5f,0xf0,0x00]
+#CHECK: fidbr	%f4, 5, %f6             # encoding: [0xb3,0x5f,0x50,0x46]
+#CHECK: fidbr	%f15, 0, %f0            # encoding: [0xb3,0x5f,0x00,0xf0]
+
+	fidbr	%f0, 0, %f0
+	fidbr	%f0, 0, %f15
+	fidbr	%f0, 15, %f0
+	fidbr	%f4, 5, %f6
+	fidbr	%f15, 0, %f0
+
+#CHECK: fiebr	%f0, 0, %f0             # encoding: [0xb3,0x57,0x00,0x00]
+#CHECK: fiebr	%f0, 0, %f15            # encoding: [0xb3,0x57,0x00,0x0f]
+#CHECK: fiebr	%f0, 15, %f0            # encoding: [0xb3,0x57,0xf0,0x00]
+#CHECK: fiebr	%f4, 5, %f6             # encoding: [0xb3,0x57,0x50,0x46]
+#CHECK: fiebr	%f15, 0, %f0            # encoding: [0xb3,0x57,0x00,0xf0]
+
+	fiebr	%f0, 0, %f0
+	fiebr	%f0, 0, %f15
+	fiebr	%f0, 15, %f0
+	fiebr	%f4, 5, %f6
+	fiebr	%f15, 0, %f0
+
+#CHECK: fixbr	%f0, 0, %f0             # encoding: [0xb3,0x47,0x00,0x00]
+#CHECK: fixbr	%f0, 0, %f13            # encoding: [0xb3,0x47,0x00,0x0d]
+#CHECK: fixbr	%f0, 15, %f0            # encoding: [0xb3,0x47,0xf0,0x00]
+#CHECK: fixbr	%f4, 5, %f8             # encoding: [0xb3,0x47,0x50,0x48]
+#CHECK: fixbr	%f13, 0, %f0            # encoding: [0xb3,0x47,0x00,0xd0]
+
+	fixbr	%f0, 0, %f0
+	fixbr	%f0, 0, %f13
+	fixbr	%f0, 15, %f0
+	fixbr	%f4, 5, %f8
+	fixbr	%f13, 0, %f0
+
+#CHECK: flogr	%r0, %r0                # encoding: [0xb9,0x83,0x00,0x00]
+#CHECK: flogr	%r0, %r15               # encoding: [0xb9,0x83,0x00,0x0f]
+#CHECK: flogr	%r10, %r9               # encoding: [0xb9,0x83,0x00,0xa9]
+#CHECK: flogr	%r14, %r0               # encoding: [0xb9,0x83,0x00,0xe0]
+
+	flogr	%r0, %r0
+	flogr	%r0, %r15
+	flogr	%r10, %r9
+	flogr	%r14, %r0
+
+#CHECK: ic	%r0, 0                  # encoding: [0x43,0x00,0x00,0x00]
+#CHECK: ic	%r0, 4095               # encoding: [0x43,0x00,0x0f,0xff]
+#CHECK: ic	%r0, 0(%r1)             # encoding: [0x43,0x00,0x10,0x00]
+#CHECK: ic	%r0, 0(%r15)            # encoding: [0x43,0x00,0xf0,0x00]
+#CHECK: ic	%r0, 4095(%r1,%r15)     # encoding: [0x43,0x01,0xff,0xff]
+#CHECK: ic	%r0, 4095(%r15,%r1)     # encoding: [0x43,0x0f,0x1f,0xff]
+#CHECK: ic	%r15, 0                 # encoding: [0x43,0xf0,0x00,0x00]
+
+	ic	%r0, 0
+	ic	%r0, 4095
+	ic	%r0, 0(%r1)
+	ic	%r0, 0(%r15)
+	ic	%r0, 4095(%r1,%r15)
+	ic	%r0, 4095(%r15,%r1)
+	ic	%r15, 0
+
+#CHECK: icy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x73]
+#CHECK: icy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x73]
+#CHECK: icy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x73]
+#CHECK: icy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x73]
+#CHECK: icy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x73]
+#CHECK: icy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x73]
+#CHECK: icy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x73]
+#CHECK: icy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x73]
+#CHECK: icy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x73]
+#CHECK: icy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x73]
+
+	icy	%r0, -524288
+	icy	%r0, -1
+	icy	%r0, 0
+	icy	%r0, 1
+	icy	%r0, 524287
+	icy	%r0, 0(%r1)
+	icy	%r0, 0(%r15)
+	icy	%r0, 524287(%r1,%r15)
+	icy	%r0, 524287(%r15,%r1)
+	icy	%r15, 0
+
+#CHECK: iihf	%r0, 0                  # encoding: [0xc0,0x08,0x00,0x00,0x00,0x00]
+#CHECK: iihf	%r0, 4294967295         # encoding: [0xc0,0x08,0xff,0xff,0xff,0xff]
+#CHECK: iihf	%r15, 0                 # encoding: [0xc0,0xf8,0x00,0x00,0x00,0x00]
+
+	iihf	%r0, 0
+	iihf	%r0, 0xffffffff
+	iihf	%r15, 0
+
+#CHECK: iihh	%r0, 0                  # encoding: [0xa5,0x00,0x00,0x00]
+#CHECK: iihh	%r0, 32768              # encoding: [0xa5,0x00,0x80,0x00]
+#CHECK: iihh	%r0, 65535              # encoding: [0xa5,0x00,0xff,0xff]
+#CHECK: iihh	%r15, 0                 # encoding: [0xa5,0xf0,0x00,0x00]
+
+	iihh	%r0, 0
+	iihh	%r0, 0x8000
+	iihh	%r0, 0xffff
+	iihh	%r15, 0
+
+#CHECK: iihl	%r0, 0                  # encoding: [0xa5,0x01,0x00,0x00]
+#CHECK: iihl	%r0, 32768              # encoding: [0xa5,0x01,0x80,0x00]
+#CHECK: iihl	%r0, 65535              # encoding: [0xa5,0x01,0xff,0xff]
+#CHECK: iihl	%r15, 0                 # encoding: [0xa5,0xf1,0x00,0x00]
+
+	iihl	%r0, 0
+	iihl	%r0, 0x8000
+	iihl	%r0, 0xffff
+	iihl	%r15, 0
+
+#CHECK: iilf	%r0, 0                  # encoding: [0xc0,0x09,0x00,0x00,0x00,0x00]
+#CHECK: iilf	%r0, 4294967295         # encoding: [0xc0,0x09,0xff,0xff,0xff,0xff]
+#CHECK: iilf	%r15, 0                 # encoding: [0xc0,0xf9,0x00,0x00,0x00,0x00]
+
+	iilf	%r0, 0
+	iilf	%r0, 0xffffffff
+	iilf	%r15, 0
+
+#CHECK: iilh	%r0, 0                  # encoding: [0xa5,0x02,0x00,0x00]
+#CHECK: iilh	%r0, 32768              # encoding: [0xa5,0x02,0x80,0x00]
+#CHECK: iilh	%r0, 65535              # encoding: [0xa5,0x02,0xff,0xff]
+#CHECK: iilh	%r15, 0                 # encoding: [0xa5,0xf2,0x00,0x00]
+
+	iilh	%r0, 0
+	iilh	%r0, 0x8000
+	iilh	%r0, 0xffff
+	iilh	%r15, 0
+
+#CHECK: iill	%r0, 0                  # encoding: [0xa5,0x03,0x00,0x00]
+#CHECK: iill	%r0, 32768              # encoding: [0xa5,0x03,0x80,0x00]
+#CHECK: iill	%r0, 65535              # encoding: [0xa5,0x03,0xff,0xff]
+#CHECK: iill	%r15, 0                 # encoding: [0xa5,0xf3,0x00,0x00]
+
+	iill	%r0, 0
+	iill	%r0, 0x8000
+	iill	%r0, 0xffff
+	iill	%r15, 0
+
+#CHECK: ipm	%r0                     # encoding: [0xb2,0x22,0x00,0x00]
+#CHECK: ipm	%r1                     # encoding: [0xb2,0x22,0x00,0x10]
+#CHECK: ipm	%r15                    # encoding: [0xb2,0x22,0x00,0xf0]
+
+	ipm	%r0
+	ipm	%r1
+	ipm	%r15
+
+#CHECK: l	%r0, 0                  # encoding: [0x58,0x00,0x00,0x00]
+#CHECK: l	%r0, 4095               # encoding: [0x58,0x00,0x0f,0xff]
+#CHECK: l	%r0, 0(%r1)             # encoding: [0x58,0x00,0x10,0x00]
+#CHECK: l	%r0, 0(%r15)            # encoding: [0x58,0x00,0xf0,0x00]
+#CHECK: l	%r0, 4095(%r1,%r15)     # encoding: [0x58,0x01,0xff,0xff]
+#CHECK: l	%r0, 4095(%r15,%r1)     # encoding: [0x58,0x0f,0x1f,0xff]
+#CHECK: l	%r15, 0                 # encoding: [0x58,0xf0,0x00,0x00]
+
+	l	%r0, 0
+	l	%r0, 4095
+	l	%r0, 0(%r1)
+	l	%r0, 0(%r15)
+	l	%r0, 4095(%r1,%r15)
+	l	%r0, 4095(%r15,%r1)
+	l	%r15, 0
+
+#CHECK: la	%r0, 0                  # encoding: [0x41,0x00,0x00,0x00]
+#CHECK: la	%r0, 4095               # encoding: [0x41,0x00,0x0f,0xff]
+#CHECK: la	%r0, 0(%r1)             # encoding: [0x41,0x00,0x10,0x00]
+#CHECK: la	%r0, 0(%r15)            # encoding: [0x41,0x00,0xf0,0x00]
+#CHECK: la	%r0, 4095(%r1,%r15)     # encoding: [0x41,0x01,0xff,0xff]
+#CHECK: la	%r0, 4095(%r15,%r1)     # encoding: [0x41,0x0f,0x1f,0xff]
+#CHECK: la	%r15, 0                 # encoding: [0x41,0xf0,0x00,0x00]
+
+	la	%r0, 0
+	la	%r0, 4095
+	la	%r0, 0(%r1)
+	la	%r0, 0(%r15)
+	la	%r0, 4095(%r1,%r15)
+	la	%r0, 4095(%r15,%r1)
+	la	%r15, 0
+
+#CHECK: larl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc0,0x00,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	larl	%r0, -0x100000000
+#CHECK: larl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc0,0x00,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	larl	%r0, -2
+#CHECK: larl	%r0, .[[LAB:L.*]]	# encoding: [0xc0,0x00,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	larl	%r0, 0
+#CHECK: larl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc0,0x00,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	larl	%r0, 0xfffffffe
+
+#CHECK: larl	%r0, foo                # encoding: [0xc0,0x00,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: larl	%r15, foo               # encoding: [0xc0,0xf0,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	larl	%r0,foo
+	larl	%r15,foo
+
+#CHECK: larl	%r3, bar+100            # encoding: [0xc0,0x30,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: larl	%r4, bar+100            # encoding: [0xc0,0x40,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	larl	%r3,bar+100
+	larl	%r4,bar+100
+
+#CHECK: larl	%r7, frob@PLT           # encoding: [0xc0,0x70,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: larl	%r8, frob@PLT           # encoding: [0xc0,0x80,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	larl	%r7,frob@PLT
+	larl	%r8,frob@PLT
+
+#CHECK: lay	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x71]
+#CHECK: lay	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x71]
+#CHECK: lay	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x71]
+#CHECK: lay	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x71]
+#CHECK: lay	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x71]
+#CHECK: lay	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x71]
+#CHECK: lay	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x71]
+#CHECK: lay	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x71]
+#CHECK: lay	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x71]
+#CHECK: lay	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x71]
+
+	lay	%r0, -524288
+	lay	%r0, -1
+	lay	%r0, 0
+	lay	%r0, 1
+	lay	%r0, 524287
+	lay	%r0, 0(%r1)
+	lay	%r0, 0(%r15)
+	lay	%r0, 524287(%r1,%r15)
+	lay	%r0, 524287(%r15,%r1)
+	lay	%r15, 0
+
+#CHECK: lb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x76]
+#CHECK: lb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x76]
+#CHECK: lb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x76]
+#CHECK: lb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x76]
+#CHECK: lb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x76]
+#CHECK: lb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x76]
+#CHECK: lb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x76]
+#CHECK: lb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x76]
+#CHECK: lb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x76]
+#CHECK: lb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x76]
+
+	lb	%r0, -524288
+	lb	%r0, -1
+	lb	%r0, 0
+	lb	%r0, 1
+	lb	%r0, 524287
+	lb	%r0, 0(%r1)
+	lb	%r0, 0(%r15)
+	lb	%r0, 524287(%r1,%r15)
+	lb	%r0, 524287(%r15,%r1)
+	lb	%r15, 0
+
+#CHECK: lbr	%r0, %r15               # encoding: [0xb9,0x26,0x00,0x0f]
+#CHECK: lbr	%r7, %r8                # encoding: [0xb9,0x26,0x00,0x78]
+#CHECK: lbr	%r15, %r0               # encoding: [0xb9,0x26,0x00,0xf0]
+
+	lbr	%r0, %r15
+	lbr	%r7, %r8
+	lbr	%r15, %r0
+
+#CHECK: lcdbr	%f0, %f9                # encoding: [0xb3,0x13,0x00,0x09]
+#CHECK: lcdbr	%f0, %f15               # encoding: [0xb3,0x13,0x00,0x0f]
+#CHECK: lcdbr	%f15, %f0               # encoding: [0xb3,0x13,0x00,0xf0]
+#CHECK: lcdbr	%f15, %f9               # encoding: [0xb3,0x13,0x00,0xf9]
+
+	lcdbr	%f0,%f9
+	lcdbr	%f0,%f15
+	lcdbr	%f15,%f0
+	lcdbr	%f15,%f9
+
+#CHECK: lcebr	%f0, %f9                # encoding: [0xb3,0x03,0x00,0x09]
+#CHECK: lcebr	%f0, %f15               # encoding: [0xb3,0x03,0x00,0x0f]
+#CHECK: lcebr	%f15, %f0               # encoding: [0xb3,0x03,0x00,0xf0]
+#CHECK: lcebr	%f15, %f9               # encoding: [0xb3,0x03,0x00,0xf9]
+
+	lcebr	%f0,%f9
+	lcebr	%f0,%f15
+	lcebr	%f15,%f0
+	lcebr	%f15,%f9
+
+#CHECK: lcgfr	%r0, %r0                # encoding: [0xb9,0x13,0x00,0x00]
+#CHECK: lcgfr	%r0, %r15               # encoding: [0xb9,0x13,0x00,0x0f]
+#CHECK: lcgfr	%r15, %r0               # encoding: [0xb9,0x13,0x00,0xf0]
+#CHECK: lcgfr	%r7, %r8                # encoding: [0xb9,0x13,0x00,0x78]
+
+	lcgfr	%r0,%r0
+	lcgfr	%r0,%r15
+	lcgfr	%r15,%r0
+	lcgfr	%r7,%r8
+
+#CHECK: lcgr	%r0, %r0                # encoding: [0xb9,0x03,0x00,0x00]
+#CHECK: lcgr	%r0, %r15               # encoding: [0xb9,0x03,0x00,0x0f]
+#CHECK: lcgr	%r15, %r0               # encoding: [0xb9,0x03,0x00,0xf0]
+#CHECK: lcgr	%r7, %r8                # encoding: [0xb9,0x03,0x00,0x78]
+
+	lcgr	%r0,%r0
+	lcgr	%r0,%r15
+	lcgr	%r15,%r0
+	lcgr	%r7,%r8
+
+#CHECK: lcr	%r0, %r0                # encoding: [0x13,0x00]
+#CHECK: lcr	%r0, %r15               # encoding: [0x13,0x0f]
+#CHECK: lcr	%r15, %r0               # encoding: [0x13,0xf0]
+#CHECK: lcr	%r7, %r8                # encoding: [0x13,0x78]
+
+	lcr	%r0,%r0
+	lcr	%r0,%r15
+	lcr	%r15,%r0
+	lcr	%r7,%r8
+
+#CHECK: lcxbr	%f0, %f8                # encoding: [0xb3,0x43,0x00,0x08]
+#CHECK: lcxbr	%f0, %f13               # encoding: [0xb3,0x43,0x00,0x0d]
+#CHECK: lcxbr	%f13, %f0               # encoding: [0xb3,0x43,0x00,0xd0]
+#CHECK: lcxbr	%f13, %f9               # encoding: [0xb3,0x43,0x00,0xd9]
+
+	lcxbr	%f0,%f8
+	lcxbr	%f0,%f13
+	lcxbr	%f13,%f0
+	lcxbr	%f13,%f9
+
+#CHECK: ld	%f0, 0                  # encoding: [0x68,0x00,0x00,0x00]
+#CHECK: ld	%f0, 4095               # encoding: [0x68,0x00,0x0f,0xff]
+#CHECK: ld	%f0, 0(%r1)             # encoding: [0x68,0x00,0x10,0x00]
+#CHECK: ld	%f0, 0(%r15)            # encoding: [0x68,0x00,0xf0,0x00]
+#CHECK: ld	%f0, 4095(%r1,%r15)     # encoding: [0x68,0x01,0xff,0xff]
+#CHECK: ld	%f0, 4095(%r15,%r1)     # encoding: [0x68,0x0f,0x1f,0xff]
+#CHECK: ld	%f15, 0                 # encoding: [0x68,0xf0,0x00,0x00]
+
+	ld	%f0, 0
+	ld	%f0, 4095
+	ld	%f0, 0(%r1)
+	ld	%f0, 0(%r15)
+	ld	%f0, 4095(%r1,%r15)
+	ld	%f0, 4095(%r15,%r1)
+	ld	%f15, 0
+
+#CHECK: ldeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x04]
+#CHECK: ldeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x04]
+#CHECK: ldeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x04]
+#CHECK: ldeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x04]
+#CHECK: ldeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x04]
+#CHECK: ldeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x04]
+#CHECK: ldeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x04]
+
+	ldeb	%f0, 0
+	ldeb	%f0, 4095
+	ldeb	%f0, 0(%r1)
+	ldeb	%f0, 0(%r15)
+	ldeb	%f0, 4095(%r1,%r15)
+	ldeb	%f0, 4095(%r15,%r1)
+	ldeb	%f15, 0
+
+#CHECK: ldebr	%f0, %f15               # encoding: [0xb3,0x04,0x00,0x0f]
+#CHECK: ldebr	%f7, %f8                # encoding: [0xb3,0x04,0x00,0x78]
+#CHECK: ldebr	%f15, %f0               # encoding: [0xb3,0x04,0x00,0xf0]
+
+	ldebr	%f0, %f15
+	ldebr	%f7, %f8
+	ldebr	%f15, %f0
+
+#CHECK: ldgr	%f0, %r0                # encoding: [0xb3,0xc1,0x00,0x00]
+#CHECK: ldgr	%f0, %r15               # encoding: [0xb3,0xc1,0x00,0x0f]
+#CHECK: ldgr	%f15, %r0               # encoding: [0xb3,0xc1,0x00,0xf0]
+#CHECK: ldgr	%f7, %r9                # encoding: [0xb3,0xc1,0x00,0x79]
+#CHECK: ldgr	%f15, %r15              # encoding: [0xb3,0xc1,0x00,0xff]
+
+	ldgr	%f0,%r0
+	ldgr	%f0,%r15
+	ldgr	%f15,%r0
+	ldgr	%f7,%r9
+	ldgr	%f15,%r15
+
+#CHECK: ldr	%f0, %f9                # encoding: [0x28,0x09]
+#CHECK: ldr	%f0, %f15               # encoding: [0x28,0x0f]
+#CHECK: ldr	%f15, %f0               # encoding: [0x28,0xf0]
+#CHECK: ldr	%f15, %f9               # encoding: [0x28,0xf9]
+
+	ldr	%f0,%f9
+	ldr	%f0,%f15
+	ldr	%f15,%f0
+	ldr	%f15,%f9
+
+#CHECK: ldxbr	%f0, %f0                # encoding: [0xb3,0x45,0x00,0x00]
+#CHECK: ldxbr	%f0, %f13               # encoding: [0xb3,0x45,0x00,0x0d]
+#CHECK: ldxbr	%f8, %f12               # encoding: [0xb3,0x45,0x00,0x8c]
+#CHECK: ldxbr	%f13, %f0               # encoding: [0xb3,0x45,0x00,0xd0]
+#CHECK: ldxbr	%f13, %f13              # encoding: [0xb3,0x45,0x00,0xdd]
+
+	ldxbr	%f0, %f0
+	ldxbr	%f0, %f13
+	ldxbr	%f8, %f12
+	ldxbr	%f13, %f0
+	ldxbr	%f13, %f13
+
+#CHECK: ldy	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x65]
+#CHECK: ldy	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x65]
+#CHECK: ldy	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x65]
+#CHECK: ldy	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x65]
+#CHECK: ldy	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x65]
+#CHECK: ldy	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x65]
+#CHECK: ldy	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x65]
+#CHECK: ldy	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x65]
+#CHECK: ldy	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x65]
+#CHECK: ldy	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x65]
+
+	ldy	%f0, -524288
+	ldy	%f0, -1
+	ldy	%f0, 0
+	ldy	%f0, 1
+	ldy	%f0, 524287
+	ldy	%f0, 0(%r1)
+	ldy	%f0, 0(%r15)
+	ldy	%f0, 524287(%r1,%r15)
+	ldy	%f0, 524287(%r15,%r1)
+	ldy	%f15, 0
+
+#CHECK: le	%f0, 0                  # encoding: [0x78,0x00,0x00,0x00]
+#CHECK: le	%f0, 4095               # encoding: [0x78,0x00,0x0f,0xff]
+#CHECK: le	%f0, 0(%r1)             # encoding: [0x78,0x00,0x10,0x00]
+#CHECK: le	%f0, 0(%r15)            # encoding: [0x78,0x00,0xf0,0x00]
+#CHECK: le	%f0, 4095(%r1,%r15)     # encoding: [0x78,0x01,0xff,0xff]
+#CHECK: le	%f0, 4095(%r15,%r1)     # encoding: [0x78,0x0f,0x1f,0xff]
+#CHECK: le	%f15, 0                 # encoding: [0x78,0xf0,0x00,0x00]
+
+	le	%f0, 0
+	le	%f0, 4095
+	le	%f0, 0(%r1)
+	le	%f0, 0(%r15)
+	le	%f0, 4095(%r1,%r15)
+	le	%f0, 4095(%r15,%r1)
+	le	%f15, 0
+
+#CHECK: ledbr	%f0, %f0                # encoding: [0xb3,0x44,0x00,0x00]
+#CHECK: ledbr	%f0, %f15               # encoding: [0xb3,0x44,0x00,0x0f]
+#CHECK: ledbr	%f7, %f8                # encoding: [0xb3,0x44,0x00,0x78]
+#CHECK: ledbr	%f15, %f0               # encoding: [0xb3,0x44,0x00,0xf0]
+#CHECK: ledbr	%f15, %f15              # encoding: [0xb3,0x44,0x00,0xff]
+
+	ledbr	%f0, %f0
+	ledbr	%f0, %f15
+	ledbr	%f7, %f8
+	ledbr	%f15, %f0
+	ledbr	%f15, %f15
+
+#CHECK: ler	%f0, %f9                # encoding: [0x38,0x09]
+#CHECK: ler	%f0, %f15               # encoding: [0x38,0x0f]
+#CHECK: ler	%f15, %f0               # encoding: [0x38,0xf0]
+#CHECK: ler	%f15, %f9               # encoding: [0x38,0xf9]
+
+	ler	%f0,%f9
+	ler	%f0,%f15
+	ler	%f15,%f0
+	ler	%f15,%f9
+
+#CHECK: lexbr	%f0, %f0                # encoding: [0xb3,0x46,0x00,0x00]
+#CHECK: lexbr	%f0, %f13               # encoding: [0xb3,0x46,0x00,0x0d]
+#CHECK: lexbr	%f8, %f12               # encoding: [0xb3,0x46,0x00,0x8c]
+#CHECK: lexbr	%f13, %f0               # encoding: [0xb3,0x46,0x00,0xd0]
+#CHECK: lexbr	%f13, %f13              # encoding: [0xb3,0x46,0x00,0xdd]
+
+	lexbr	%f0, %f0
+	lexbr	%f0, %f13
+	lexbr	%f8, %f12
+	lexbr	%f13, %f0
+	lexbr	%f13, %f13
+
+#CHECK: ley	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x64]
+#CHECK: ley	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x64]
+#CHECK: ley	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x64]
+#CHECK: ley	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x64]
+#CHECK: ley	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x64]
+#CHECK: ley	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x64]
+#CHECK: ley	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x64]
+#CHECK: ley	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x64]
+#CHECK: ley	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x64]
+#CHECK: ley	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x64]
+
+	ley	%f0, -524288
+	ley	%f0, -1
+	ley	%f0, 0
+	ley	%f0, 1
+	ley	%f0, 524287
+	ley	%f0, 0(%r1)
+	ley	%f0, 0(%r15)
+	ley	%f0, 524287(%r1,%r15)
+	ley	%f0, 524287(%r15,%r1)
+	ley	%f15, 0
+
+#CHECK: lg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x04]
+#CHECK: lg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x04]
+#CHECK: lg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x04]
+#CHECK: lg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x04]
+#CHECK: lg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x04]
+#CHECK: lg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x04]
+#CHECK: lg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x04]
+#CHECK: lg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x04]
+#CHECK: lg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x04]
+#CHECK: lg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x04]
+
+	lg	%r0, -524288
+	lg	%r0, -1
+	lg	%r0, 0
+	lg	%r0, 1
+	lg	%r0, 524287
+	lg	%r0, 0(%r1)
+	lg	%r0, 0(%r15)
+	lg	%r0, 524287(%r1,%r15)
+	lg	%r0, 524287(%r15,%r1)
+	lg	%r15, 0
+
+#CHECK: lgb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x77]
+#CHECK: lgb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x77]
+#CHECK: lgb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x77]
+#CHECK: lgb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x77]
+#CHECK: lgb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x77]
+#CHECK: lgb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x77]
+#CHECK: lgb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x77]
+#CHECK: lgb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x77]
+#CHECK: lgb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x77]
+#CHECK: lgb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x77]
+
+	lgb	%r0, -524288
+	lgb	%r0, -1
+	lgb	%r0, 0
+	lgb	%r0, 1
+	lgb	%r0, 524287
+	lgb	%r0, 0(%r1)
+	lgb	%r0, 0(%r15)
+	lgb	%r0, 524287(%r1,%r15)
+	lgb	%r0, 524287(%r15,%r1)
+	lgb	%r15, 0
+
+
+#CHECK: lgbr	%r0, %r15               # encoding: [0xb9,0x06,0x00,0x0f]
+#CHECK: lgbr	%r7, %r8                # encoding: [0xb9,0x06,0x00,0x78]
+#CHECK: lgbr	%r15, %r0               # encoding: [0xb9,0x06,0x00,0xf0]
+
+	lgbr	%r0, %r15
+	lgbr	%r7, %r8
+	lgbr	%r15, %r0
+
+#CHECK: lgdr	%r0, %f0                # encoding: [0xb3,0xcd,0x00,0x00]
+#CHECK: lgdr	%r0, %f15               # encoding: [0xb3,0xcd,0x00,0x0f]
+#CHECK: lgdr	%r15, %f0               # encoding: [0xb3,0xcd,0x00,0xf0]
+#CHECK: lgdr	%r8, %f8                # encoding: [0xb3,0xcd,0x00,0x88]
+#CHECK: lgdr	%r15, %f15              # encoding: [0xb3,0xcd,0x00,0xff]
+
+	lgdr	%r0,%f0
+	lgdr	%r0,%f15
+	lgdr	%r15,%f0
+	lgdr	%r8,%f8
+	lgdr	%r15,%f15
+
+#CHECK: lgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x14]
+#CHECK: lgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x14]
+#CHECK: lgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x14]
+#CHECK: lgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x14]
+#CHECK: lgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x14]
+#CHECK: lgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x14]
+#CHECK: lgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x14]
+#CHECK: lgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x14]
+#CHECK: lgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x14]
+#CHECK: lgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x14]
+
+	lgf	%r0, -524288
+	lgf	%r0, -1
+	lgf	%r0, 0
+	lgf	%r0, 1
+	lgf	%r0, 524287
+	lgf	%r0, 0(%r1)
+	lgf	%r0, 0(%r15)
+	lgf	%r0, 524287(%r1,%r15)
+	lgf	%r0, 524287(%r15,%r1)
+	lgf	%r15, 0
+
+
+#CHECK: lgfi	%r0, -2147483648        # encoding: [0xc0,0x01,0x80,0x00,0x00,0x00]
+#CHECK: lgfi	%r0, -1                 # encoding: [0xc0,0x01,0xff,0xff,0xff,0xff]
+#CHECK: lgfi	%r0, 0                  # encoding: [0xc0,0x01,0x00,0x00,0x00,0x00]
+#CHECK: lgfi	%r0, 1                  # encoding: [0xc0,0x01,0x00,0x00,0x00,0x01]
+#CHECK: lgfi	%r0, 2147483647         # encoding: [0xc0,0x01,0x7f,0xff,0xff,0xff]
+#CHECK: lgfi	%r15, 0                 # encoding: [0xc0,0xf1,0x00,0x00,0x00,0x00]
+
+	lgfi	%r0, -1 << 31
+	lgfi	%r0, -1
+	lgfi	%r0, 0
+	lgfi	%r0, 1
+	lgfi	%r0, (1 << 31) - 1
+	lgfi	%r15, 0
+
+#CHECK: lgfr	%r0, %r15               # encoding: [0xb9,0x14,0x00,0x0f]
+#CHECK: lgfr	%r7, %r8                # encoding: [0xb9,0x14,0x00,0x78]
+#CHECK: lgfr	%r15, %r0               # encoding: [0xb9,0x14,0x00,0xf0]
+
+	lgfr	%r0, %r15
+	lgfr	%r7, %r8
+	lgfr	%r15, %r0
+
+#CHECK: lgfrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	lgfrl	%r0, -0x100000000
+#CHECK: lgfrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	lgfrl	%r0, -2
+#CHECK: lgfrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	lgfrl	%r0, 0
+#CHECK: lgfrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x0c,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	lgfrl	%r0, 0xfffffffe
+
+#CHECK: lgfrl	%r0, foo                # encoding: [0xc4,0x0c,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lgfrl	%r15, foo               # encoding: [0xc4,0xfc,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lgfrl	%r0,foo
+	lgfrl	%r15,foo
+
+#CHECK: lgfrl	%r3, bar+100            # encoding: [0xc4,0x3c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lgfrl	%r4, bar+100            # encoding: [0xc4,0x4c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lgfrl	%r3,bar+100
+	lgfrl	%r4,bar+100
+
+#CHECK: lgfrl	%r7, frob@PLT           # encoding: [0xc4,0x7c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lgfrl	%r8, frob@PLT           # encoding: [0xc4,0x8c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lgfrl	%r7,frob@PLT
+	lgfrl	%r8,frob@PLT
+
+#CHECK: lgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x15]
+#CHECK: lgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x15]
+#CHECK: lgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x15]
+#CHECK: lgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x15]
+#CHECK: lgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x15]
+#CHECK: lgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x15]
+#CHECK: lgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x15]
+#CHECK: lgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x15]
+#CHECK: lgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x15]
+#CHECK: lgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x15]
+
+	lgh	%r0, -524288
+	lgh	%r0, -1
+	lgh	%r0, 0
+	lgh	%r0, 1
+	lgh	%r0, 524287
+	lgh	%r0, 0(%r1)
+	lgh	%r0, 0(%r15)
+	lgh	%r0, 524287(%r1,%r15)
+	lgh	%r0, 524287(%r15,%r1)
+	lgh	%r15, 0
+
+
+#CHECK: lghi	%r0, -32768             # encoding: [0xa7,0x09,0x80,0x00]
+#CHECK: lghi	%r0, -1                 # encoding: [0xa7,0x09,0xff,0xff]
+#CHECK: lghi	%r0, 0                  # encoding: [0xa7,0x09,0x00,0x00]
+#CHECK: lghi	%r0, 1                  # encoding: [0xa7,0x09,0x00,0x01]
+#CHECK: lghi	%r0, 32767              # encoding: [0xa7,0x09,0x7f,0xff]
+#CHECK: lghi	%r15, 0                 # encoding: [0xa7,0xf9,0x00,0x00]
+
+	lghi	%r0, -32768
+	lghi	%r0, -1
+	lghi	%r0, 0
+	lghi	%r0, 1
+	lghi	%r0, 32767
+	lghi	%r15, 0
+
+#CHECK: lghr	%r0, %r15               # encoding: [0xb9,0x07,0x00,0x0f]
+#CHECK: lghr	%r7, %r8                # encoding: [0xb9,0x07,0x00,0x78]
+#CHECK: lghr	%r15, %r0               # encoding: [0xb9,0x07,0x00,0xf0]
+
+	lghr	%r0, %r15
+	lghr	%r7, %r8
+	lghr	%r15, %r0
+
+#CHECK: lghrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	lghrl	%r0, -0x100000000
+#CHECK: lghrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	lghrl	%r0, -2
+#CHECK: lghrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	lghrl	%r0, 0
+#CHECK: lghrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	lghrl	%r0, 0xfffffffe
+
+#CHECK: lghrl	%r0, foo                # encoding: [0xc4,0x04,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lghrl	%r15, foo               # encoding: [0xc4,0xf4,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lghrl	%r0,foo
+	lghrl	%r15,foo
+
+#CHECK: lghrl	%r3, bar+100            # encoding: [0xc4,0x34,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lghrl	%r4, bar+100            # encoding: [0xc4,0x44,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lghrl	%r3,bar+100
+	lghrl	%r4,bar+100
+
+#CHECK: lghrl	%r7, frob@PLT           # encoding: [0xc4,0x74,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lghrl	%r8, frob@PLT           # encoding: [0xc4,0x84,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lghrl	%r7,frob@PLT
+	lghrl	%r8,frob@PLT
+
+#CHECK: lgr	%r0, %r9                # encoding: [0xb9,0x04,0x00,0x09]
+#CHECK: lgr	%r0, %r15               # encoding: [0xb9,0x04,0x00,0x0f]
+#CHECK: lgr	%r15, %r0               # encoding: [0xb9,0x04,0x00,0xf0]
+#CHECK: lgr	%r15, %r9               # encoding: [0xb9,0x04,0x00,0xf9]
+
+	lgr	%r0,%r9
+	lgr	%r0,%r15
+	lgr	%r15,%r0
+	lgr	%r15,%r9
+
+#CHECK: lgrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	lgrl	%r0, -0x100000000
+#CHECK: lgrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	lgrl	%r0, -2
+#CHECK: lgrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	lgrl	%r0, 0
+#CHECK: lgrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x08,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	lgrl	%r0, 0xfffffffe
+
+#CHECK: lgrl	%r0, foo                # encoding: [0xc4,0x08,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lgrl	%r15, foo               # encoding: [0xc4,0xf8,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lgrl	%r0,foo
+	lgrl	%r15,foo
+
+#CHECK: lgrl	%r3, bar+100            # encoding: [0xc4,0x38,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lgrl	%r4, bar+100            # encoding: [0xc4,0x48,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lgrl	%r3,bar+100
+	lgrl	%r4,bar+100
+
+#CHECK: lgrl	%r7, frob@PLT           # encoding: [0xc4,0x78,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lgrl	%r8, frob@PLT           # encoding: [0xc4,0x88,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lgrl	%r7,frob@PLT
+	lgrl	%r8,frob@PLT
+
+#CHECK: lh	%r0, 0                  # encoding: [0x48,0x00,0x00,0x00]
+#CHECK: lh	%r0, 4095               # encoding: [0x48,0x00,0x0f,0xff]
+#CHECK: lh	%r0, 0(%r1)             # encoding: [0x48,0x00,0x10,0x00]
+#CHECK: lh	%r0, 0(%r15)            # encoding: [0x48,0x00,0xf0,0x00]
+#CHECK: lh	%r0, 4095(%r1,%r15)     # encoding: [0x48,0x01,0xff,0xff]
+#CHECK: lh	%r0, 4095(%r15,%r1)     # encoding: [0x48,0x0f,0x1f,0xff]
+#CHECK: lh	%r15, 0                 # encoding: [0x48,0xf0,0x00,0x00]
+
+	lh	%r0, 0
+	lh	%r0, 4095
+	lh	%r0, 0(%r1)
+	lh	%r0, 0(%r15)
+	lh	%r0, 4095(%r1,%r15)
+	lh	%r0, 4095(%r15,%r1)
+	lh	%r15, 0
+
+#CHECK: lhi	%r0, -32768             # encoding: [0xa7,0x08,0x80,0x00]
+#CHECK: lhi	%r0, -1                 # encoding: [0xa7,0x08,0xff,0xff]
+#CHECK: lhi	%r0, 0                  # encoding: [0xa7,0x08,0x00,0x00]
+#CHECK: lhi	%r0, 1                  # encoding: [0xa7,0x08,0x00,0x01]
+#CHECK: lhi	%r0, 32767              # encoding: [0xa7,0x08,0x7f,0xff]
+#CHECK: lhi	%r15, 0                 # encoding: [0xa7,0xf8,0x00,0x00]
+
+	lhi	%r0, -32768
+	lhi	%r0, -1
+	lhi	%r0, 0
+	lhi	%r0, 1
+	lhi	%r0, 32767
+	lhi	%r15, 0
+
+#CHECK: lhr	%r0, %r15               # encoding: [0xb9,0x27,0x00,0x0f]
+#CHECK: lhr	%r7, %r8                # encoding: [0xb9,0x27,0x00,0x78]
+#CHECK: lhr	%r15, %r0               # encoding: [0xb9,0x27,0x00,0xf0]
+
+	lhr	%r0, %r15
+	lhr	%r7, %r8
+	lhr	%r15, %r0
+
+#CHECK: lhrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	lhrl	%r0, -0x100000000
+#CHECK: lhrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	lhrl	%r0, -2
+#CHECK: lhrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	lhrl	%r0, 0
+#CHECK: lhrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	lhrl	%r0, 0xfffffffe
+
+#CHECK: lhrl	%r0, foo                # encoding: [0xc4,0x05,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lhrl	%r15, foo               # encoding: [0xc4,0xf5,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lhrl	%r0,foo
+	lhrl	%r15,foo
+
+#CHECK: lhrl	%r3, bar+100            # encoding: [0xc4,0x35,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lhrl	%r4, bar+100            # encoding: [0xc4,0x45,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lhrl	%r3,bar+100
+	lhrl	%r4,bar+100
+
+#CHECK: lhrl	%r7, frob@PLT           # encoding: [0xc4,0x75,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lhrl	%r8, frob@PLT           # encoding: [0xc4,0x85,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lhrl	%r7,frob@PLT
+	lhrl	%r8,frob@PLT
+
+#CHECK: lhy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x78]
+#CHECK: lhy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x78]
+#CHECK: lhy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x78]
+#CHECK: lhy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x78]
+#CHECK: lhy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x78]
+#CHECK: lhy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x78]
+#CHECK: lhy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x78]
+#CHECK: lhy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x78]
+#CHECK: lhy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x78]
+#CHECK: lhy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x78]
+
+	lhy	%r0, -524288
+	lhy	%r0, -1
+	lhy	%r0, 0
+	lhy	%r0, 1
+	lhy	%r0, 524287
+	lhy	%r0, 0(%r1)
+	lhy	%r0, 0(%r15)
+	lhy	%r0, 524287(%r1,%r15)
+	lhy	%r0, 524287(%r15,%r1)
+	lhy	%r15, 0
+
+#CHECK: llc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x94]
+#CHECK: llc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x94]
+#CHECK: llc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x94]
+#CHECK: llc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x94]
+#CHECK: llc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x94]
+#CHECK: llc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x94]
+#CHECK: llc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x94]
+#CHECK: llc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x94]
+#CHECK: llc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x94]
+#CHECK: llc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x94]
+
+	llc	%r0, -524288
+	llc	%r0, -1
+	llc	%r0, 0
+	llc	%r0, 1
+	llc	%r0, 524287
+	llc	%r0, 0(%r1)
+	llc	%r0, 0(%r15)
+	llc	%r0, 524287(%r1,%r15)
+	llc	%r0, 524287(%r15,%r1)
+	llc	%r15, 0
+
+#CHECK: llcr	%r0, %r15               # encoding: [0xb9,0x94,0x00,0x0f]
+#CHECK: llcr	%r7, %r8                # encoding: [0xb9,0x94,0x00,0x78]
+#CHECK: llcr	%r15, %r0               # encoding: [0xb9,0x94,0x00,0xf0]
+
+	llcr	%r0, %r15
+	llcr	%r7, %r8
+	llcr	%r15, %r0
+
+#CHECK: llgc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x90]
+#CHECK: llgc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x90]
+#CHECK: llgc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x90]
+#CHECK: llgc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x90]
+#CHECK: llgc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x90]
+#CHECK: llgc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x90]
+#CHECK: llgc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x90]
+#CHECK: llgc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x90]
+#CHECK: llgc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x90]
+#CHECK: llgc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x90]
+
+	llgc	%r0, -524288
+	llgc	%r0, -1
+	llgc	%r0, 0
+	llgc	%r0, 1
+	llgc	%r0, 524287
+	llgc	%r0, 0(%r1)
+	llgc	%r0, 0(%r15)
+	llgc	%r0, 524287(%r1,%r15)
+	llgc	%r0, 524287(%r15,%r1)
+	llgc	%r15, 0
+
+
+#CHECK: llgcr	%r0, %r15               # encoding: [0xb9,0x84,0x00,0x0f]
+#CHECK: llgcr	%r7, %r8                # encoding: [0xb9,0x84,0x00,0x78]
+#CHECK: llgcr	%r15, %r0               # encoding: [0xb9,0x84,0x00,0xf0]
+
+	llgcr	%r0, %r15
+	llgcr	%r7, %r8
+	llgcr	%r15, %r0
+
+#CHECK: llgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x16]
+#CHECK: llgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x16]
+#CHECK: llgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x16]
+#CHECK: llgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x16]
+#CHECK: llgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x16]
+#CHECK: llgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x16]
+#CHECK: llgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x16]
+#CHECK: llgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x16]
+#CHECK: llgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x16]
+#CHECK: llgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x16]
+
+	llgf	%r0, -524288
+	llgf	%r0, -1
+	llgf	%r0, 0
+	llgf	%r0, 1
+	llgf	%r0, 524287
+	llgf	%r0, 0(%r1)
+	llgf	%r0, 0(%r15)
+	llgf	%r0, 524287(%r1,%r15)
+	llgf	%r0, 524287(%r15,%r1)
+	llgf	%r15, 0
+
+
+#CHECK: llgfr	%r0, %r15               # encoding: [0xb9,0x16,0x00,0x0f]
+#CHECK: llgfr	%r7, %r8                # encoding: [0xb9,0x16,0x00,0x78]
+#CHECK: llgfr	%r15, %r0               # encoding: [0xb9,0x16,0x00,0xf0]
+
+	llgfr	%r0, %r15
+	llgfr	%r7, %r8
+	llgfr	%r15, %r0
+
+#CHECK: llgfrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	llgfrl	%r0, -0x100000000
+#CHECK: llgfrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	llgfrl	%r0, -2
+#CHECK: llgfrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	llgfrl	%r0, 0
+#CHECK: llgfrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x0e,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	llgfrl	%r0, 0xfffffffe
+
+#CHECK: llgfrl	%r0, foo                # encoding: [0xc4,0x0e,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: llgfrl	%r15, foo               # encoding: [0xc4,0xfe,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	llgfrl	%r0,foo
+	llgfrl	%r15,foo
+
+#CHECK: llgfrl	%r3, bar+100            # encoding: [0xc4,0x3e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: llgfrl	%r4, bar+100            # encoding: [0xc4,0x4e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	llgfrl	%r3,bar+100
+	llgfrl	%r4,bar+100
+
+#CHECK: llgfrl	%r7, frob@PLT           # encoding: [0xc4,0x7e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: llgfrl	%r8, frob@PLT           # encoding: [0xc4,0x8e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	llgfrl	%r7,frob@PLT
+	llgfrl	%r8,frob@PLT
+
+#CHECK: llgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x91]
+#CHECK: llgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x91]
+#CHECK: llgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x91]
+#CHECK: llgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x91]
+#CHECK: llgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x91]
+#CHECK: llgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x91]
+#CHECK: llgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x91]
+#CHECK: llgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x91]
+#CHECK: llgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x91]
+#CHECK: llgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x91]
+
+	llgh	%r0, -524288
+	llgh	%r0, -1
+	llgh	%r0, 0
+	llgh	%r0, 1
+	llgh	%r0, 524287
+	llgh	%r0, 0(%r1)
+	llgh	%r0, 0(%r15)
+	llgh	%r0, 524287(%r1,%r15)
+	llgh	%r0, 524287(%r15,%r1)
+	llgh	%r15, 0
+
+
+#CHECK: llghr	%r0, %r15               # encoding: [0xb9,0x85,0x00,0x0f]
+#CHECK: llghr	%r7, %r8                # encoding: [0xb9,0x85,0x00,0x78]
+#CHECK: llghr	%r15, %r0               # encoding: [0xb9,0x85,0x00,0xf0]
+
+	llghr	%r0, %r15
+	llghr	%r7, %r8
+	llghr	%r15, %r0
+
+#CHECK: llghrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	llghrl	%r0, -0x100000000
+#CHECK: llghrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	llghrl	%r0, -2
+#CHECK: llghrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	llghrl	%r0, 0
+#CHECK: llghrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x06,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	llghrl	%r0, 0xfffffffe
+
+#CHECK: llghrl	%r0, foo                # encoding: [0xc4,0x06,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: llghrl	%r15, foo               # encoding: [0xc4,0xf6,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	llghrl	%r0,foo
+	llghrl	%r15,foo
+
+#CHECK: llghrl	%r3, bar+100            # encoding: [0xc4,0x36,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: llghrl	%r4, bar+100            # encoding: [0xc4,0x46,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	llghrl	%r3,bar+100
+	llghrl	%r4,bar+100
+
+#CHECK: llghrl	%r7, frob@PLT           # encoding: [0xc4,0x76,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: llghrl	%r8, frob@PLT           # encoding: [0xc4,0x86,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	llghrl	%r7,frob@PLT
+	llghrl	%r8,frob@PLT
+
+#CHECK: llh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x95]
+#CHECK: llh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x95]
+#CHECK: llh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x95]
+#CHECK: llh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x95]
+#CHECK: llh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x95]
+#CHECK: llh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x95]
+#CHECK: llh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x95]
+#CHECK: llh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x95]
+#CHECK: llh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x95]
+#CHECK: llh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x95]
+
+	llh	%r0, -524288
+	llh	%r0, -1
+	llh	%r0, 0
+	llh	%r0, 1
+	llh	%r0, 524287
+	llh	%r0, 0(%r1)
+	llh	%r0, 0(%r15)
+	llh	%r0, 524287(%r1,%r15)
+	llh	%r0, 524287(%r15,%r1)
+	llh	%r15, 0
+
+#CHECK: llhr	%r0, %r15               # encoding: [0xb9,0x95,0x00,0x0f]
+#CHECK: llhr	%r7, %r8                # encoding: [0xb9,0x95,0x00,0x78]
+#CHECK: llhr	%r15, %r0               # encoding: [0xb9,0x95,0x00,0xf0]
+
+	llhr	%r0, %r15
+	llhr	%r7, %r8
+	llhr	%r15, %r0
+
+#CHECK: llhrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	llhrl	%r0, -0x100000000
+#CHECK: llhrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	llhrl	%r0, -2
+#CHECK: llhrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	llhrl	%r0, 0
+#CHECK: llhrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	llhrl	%r0, 0xfffffffe
+
+#CHECK: llhrl	%r0, foo                # encoding: [0xc4,0x02,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: llhrl	%r15, foo               # encoding: [0xc4,0xf2,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	llhrl	%r0,foo
+	llhrl	%r15,foo
+
+#CHECK: llhrl	%r3, bar+100            # encoding: [0xc4,0x32,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: llhrl	%r4, bar+100            # encoding: [0xc4,0x42,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	llhrl	%r3,bar+100
+	llhrl	%r4,bar+100
+
+#CHECK: llhrl	%r7, frob@PLT           # encoding: [0xc4,0x72,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: llhrl	%r8, frob@PLT           # encoding: [0xc4,0x82,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	llhrl	%r7,frob@PLT
+	llhrl	%r8,frob@PLT
+
+#CHECK: llihf	%r0, 0                  # encoding: [0xc0,0x0e,0x00,0x00,0x00,0x00]
+#CHECK: llihf	%r0, 4294967295         # encoding: [0xc0,0x0e,0xff,0xff,0xff,0xff]
+#CHECK: llihf	%r15, 0                 # encoding: [0xc0,0xfe,0x00,0x00,0x00,0x00]
+
+	llihf	%r0, 0
+	llihf	%r0, 0xffffffff
+	llihf	%r15, 0
+
+#CHECK: llihh	%r0, 0                  # encoding: [0xa5,0x0c,0x00,0x00]
+#CHECK: llihh	%r0, 32768              # encoding: [0xa5,0x0c,0x80,0x00]
+#CHECK: llihh	%r0, 65535              # encoding: [0xa5,0x0c,0xff,0xff]
+#CHECK: llihh	%r15, 0                 # encoding: [0xa5,0xfc,0x00,0x00]
+
+	llihh	%r0, 0
+	llihh	%r0, 0x8000
+	llihh	%r0, 0xffff
+	llihh	%r15, 0
+
+#CHECK: llihl	%r0, 0                  # encoding: [0xa5,0x0d,0x00,0x00]
+#CHECK: llihl	%r0, 32768              # encoding: [0xa5,0x0d,0x80,0x00]
+#CHECK: llihl	%r0, 65535              # encoding: [0xa5,0x0d,0xff,0xff]
+#CHECK: llihl	%r15, 0                 # encoding: [0xa5,0xfd,0x00,0x00]
+
+	llihl	%r0, 0
+	llihl	%r0, 0x8000
+	llihl	%r0, 0xffff
+	llihl	%r15, 0
+
+#CHECK: llilf	%r0, 0                  # encoding: [0xc0,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: llilf	%r0, 4294967295         # encoding: [0xc0,0x0f,0xff,0xff,0xff,0xff]
+#CHECK: llilf	%r15, 0                 # encoding: [0xc0,0xff,0x00,0x00,0x00,0x00]
+
+	llilf	%r0, 0
+	llilf	%r0, 0xffffffff
+	llilf	%r15, 0
+
+#CHECK: llilh	%r0, 0                  # encoding: [0xa5,0x0e,0x00,0x00]
+#CHECK: llilh	%r0, 32768              # encoding: [0xa5,0x0e,0x80,0x00]
+#CHECK: llilh	%r0, 65535              # encoding: [0xa5,0x0e,0xff,0xff]
+#CHECK: llilh	%r15, 0                 # encoding: [0xa5,0xfe,0x00,0x00]
+
+	llilh	%r0, 0
+	llilh	%r0, 0x8000
+	llilh	%r0, 0xffff
+	llilh	%r15, 0
+
+#CHECK: llill	%r0, 0                  # encoding: [0xa5,0x0f,0x00,0x00]
+#CHECK: llill	%r0, 32768              # encoding: [0xa5,0x0f,0x80,0x00]
+#CHECK: llill	%r0, 65535              # encoding: [0xa5,0x0f,0xff,0xff]
+#CHECK: llill	%r15, 0                 # encoding: [0xa5,0xff,0x00,0x00]
+
+	llill	%r0, 0
+	llill	%r0, 0x8000
+	llill	%r0, 0xffff
+	llill	%r15, 0
+
+#CHECK: lmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r14, %r15, 0           # encoding: [0xeb,0xef,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x04]
+#CHECK: lmg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x04]
+#CHECK: lmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x04]
+#CHECK: lmg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x04]
+#CHECK: lmg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x04]
+
+	lmg	%r0,%r0,0
+	lmg	%r0,%r15,0
+	lmg	%r14,%r15,0
+	lmg	%r15,%r15,0
+	lmg	%r0,%r0,-524288
+	lmg	%r0,%r0,-1
+	lmg	%r0,%r0,0
+	lmg	%r0,%r0,1
+	lmg	%r0,%r0,524287
+	lmg	%r0,%r0,0(%r1)
+	lmg	%r0,%r0,0(%r15)
+	lmg	%r0,%r0,524287(%r1)
+	lmg	%r0,%r0,524287(%r15)
+
+#CHECK: lndbr	%f0, %f9                # encoding: [0xb3,0x11,0x00,0x09]
+#CHECK: lndbr	%f0, %f15               # encoding: [0xb3,0x11,0x00,0x0f]
+#CHECK: lndbr	%f15, %f0               # encoding: [0xb3,0x11,0x00,0xf0]
+#CHECK: lndbr	%f15, %f9               # encoding: [0xb3,0x11,0x00,0xf9]
+
+	lndbr	%f0,%f9
+	lndbr	%f0,%f15
+	lndbr	%f15,%f0
+	lndbr	%f15,%f9
+
+#CHECK: lnebr	%f0, %f9                # encoding: [0xb3,0x01,0x00,0x09]
+#CHECK: lnebr	%f0, %f15               # encoding: [0xb3,0x01,0x00,0x0f]
+#CHECK: lnebr	%f15, %f0               # encoding: [0xb3,0x01,0x00,0xf0]
+#CHECK: lnebr	%f15, %f9               # encoding: [0xb3,0x01,0x00,0xf9]
+
+	lnebr	%f0,%f9
+	lnebr	%f0,%f15
+	lnebr	%f15,%f0
+	lnebr	%f15,%f9
+
+#CHECK: lngfr	%r0, %r0                # encoding: [0xb9,0x11,0x00,0x00]
+#CHECK: lngfr	%r0, %r15               # encoding: [0xb9,0x11,0x00,0x0f]
+#CHECK: lngfr	%r15, %r0               # encoding: [0xb9,0x11,0x00,0xf0]
+#CHECK: lngfr	%r7, %r8                # encoding: [0xb9,0x11,0x00,0x78]
+
+	lngfr	%r0,%r0
+	lngfr	%r0,%r15
+	lngfr	%r15,%r0
+	lngfr	%r7,%r8
+
+#CHECK: lngr	%r0, %r0                # encoding: [0xb9,0x01,0x00,0x00]
+#CHECK: lngr	%r0, %r15               # encoding: [0xb9,0x01,0x00,0x0f]
+#CHECK: lngr	%r15, %r0               # encoding: [0xb9,0x01,0x00,0xf0]
+#CHECK: lngr	%r7, %r8                # encoding: [0xb9,0x01,0x00,0x78]
+
+	lngr	%r0,%r0
+	lngr	%r0,%r15
+	lngr	%r15,%r0
+	lngr	%r7,%r8
+
+#CHECK: lnr	%r0, %r0                # encoding: [0x11,0x00]
+#CHECK: lnr	%r0, %r15               # encoding: [0x11,0x0f]
+#CHECK: lnr	%r15, %r0               # encoding: [0x11,0xf0]
+#CHECK: lnr	%r7, %r8                # encoding: [0x11,0x78]
+
+	lnr	%r0,%r0
+	lnr	%r0,%r15
+	lnr	%r15,%r0
+	lnr	%r7,%r8
+
+#CHECK: lnxbr	%f0, %f8                # encoding: [0xb3,0x41,0x00,0x08]
+#CHECK: lnxbr	%f0, %f13               # encoding: [0xb3,0x41,0x00,0x0d]
+#CHECK: lnxbr	%f13, %f0               # encoding: [0xb3,0x41,0x00,0xd0]
+#CHECK: lnxbr	%f13, %f9               # encoding: [0xb3,0x41,0x00,0xd9]
+
+	lnxbr	%f0,%f8
+	lnxbr	%f0,%f13
+	lnxbr	%f13,%f0
+	lnxbr	%f13,%f9
+
+#CHECK: lpdbr	%f0, %f9                # encoding: [0xb3,0x10,0x00,0x09]
+#CHECK: lpdbr	%f0, %f15               # encoding: [0xb3,0x10,0x00,0x0f]
+#CHECK: lpdbr	%f15, %f0               # encoding: [0xb3,0x10,0x00,0xf0]
+#CHECK: lpdbr	%f15, %f9               # encoding: [0xb3,0x10,0x00,0xf9]
+
+	lpdbr	%f0,%f9
+	lpdbr	%f0,%f15
+	lpdbr	%f15,%f0
+	lpdbr	%f15,%f9
+
+#CHECK: lpebr	%f0, %f9                # encoding: [0xb3,0x00,0x00,0x09]
+#CHECK: lpebr	%f0, %f15               # encoding: [0xb3,0x00,0x00,0x0f]
+#CHECK: lpebr	%f15, %f0               # encoding: [0xb3,0x00,0x00,0xf0]
+#CHECK: lpebr	%f15, %f9               # encoding: [0xb3,0x00,0x00,0xf9]
+
+	lpebr	%f0,%f9
+	lpebr	%f0,%f15
+	lpebr	%f15,%f0
+	lpebr	%f15,%f9
+
+#CHECK: lpgfr	%r0, %r0                # encoding: [0xb9,0x10,0x00,0x00]
+#CHECK: lpgfr	%r0, %r15               # encoding: [0xb9,0x10,0x00,0x0f]
+#CHECK: lpgfr	%r15, %r0               # encoding: [0xb9,0x10,0x00,0xf0]
+#CHECK: lpgfr	%r7, %r8                # encoding: [0xb9,0x10,0x00,0x78]
+
+	lpgfr	%r0,%r0
+	lpgfr	%r0,%r15
+	lpgfr	%r15,%r0
+	lpgfr	%r7,%r8
+
+#CHECK: lpgr	%r0, %r0                # encoding: [0xb9,0x00,0x00,0x00]
+#CHECK: lpgr	%r0, %r15               # encoding: [0xb9,0x00,0x00,0x0f]
+#CHECK: lpgr	%r15, %r0               # encoding: [0xb9,0x00,0x00,0xf0]
+#CHECK: lpgr	%r7, %r8                # encoding: [0xb9,0x00,0x00,0x78]
+
+	lpgr	%r0,%r0
+	lpgr	%r0,%r15
+	lpgr	%r15,%r0
+	lpgr	%r7,%r8
+
+#CHECK: lpr	%r0, %r0                # encoding: [0x10,0x00]
+#CHECK: lpr	%r0, %r15               # encoding: [0x10,0x0f]
+#CHECK: lpr	%r15, %r0               # encoding: [0x10,0xf0]
+#CHECK: lpr	%r7, %r8                # encoding: [0x10,0x78]
+
+	lpr	%r0,%r0
+	lpr	%r0,%r15
+	lpr	%r15,%r0
+	lpr	%r7,%r8
+
+#CHECK: lpxbr	%f0, %f8                # encoding: [0xb3,0x40,0x00,0x08]
+#CHECK: lpxbr	%f0, %f13               # encoding: [0xb3,0x40,0x00,0x0d]
+#CHECK: lpxbr	%f13, %f0               # encoding: [0xb3,0x40,0x00,0xd0]
+#CHECK: lpxbr	%f13, %f9               # encoding: [0xb3,0x40,0x00,0xd9]
+
+	lpxbr	%f0,%f8
+	lpxbr	%f0,%f13
+	lpxbr	%f13,%f0
+	lpxbr	%f13,%f9
+
+#CHECK: lr	%r0, %r9                # encoding: [0x18,0x09]
+#CHECK: lr	%r0, %r15               # encoding: [0x18,0x0f]
+#CHECK: lr	%r15, %r0               # encoding: [0x18,0xf0]
+#CHECK: lr	%r15, %r9               # encoding: [0x18,0xf9]
+
+	lr	%r0,%r9
+	lr	%r0,%r15
+	lr	%r15,%r0
+	lr	%r15,%r9
+
+#CHECK: lrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	lrl	%r0, -0x100000000
+#CHECK: lrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	lrl	%r0, -2
+#CHECK: lrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	lrl	%r0, 0
+#CHECK: lrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x0d,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	lrl	%r0, 0xfffffffe
+
+#CHECK: lrl	%r0, foo                # encoding: [0xc4,0x0d,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lrl	%r15, foo               # encoding: [0xc4,0xfd,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lrl	%r0,foo
+	lrl	%r15,foo
+
+#CHECK: lrl	%r3, bar+100            # encoding: [0xc4,0x3d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lrl	%r4, bar+100            # encoding: [0xc4,0x4d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lrl	%r3,bar+100
+	lrl	%r4,bar+100
+
+#CHECK: lrl	%r7, frob@PLT           # encoding: [0xc4,0x7d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lrl	%r8, frob@PLT           # encoding: [0xc4,0x8d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lrl	%r7,frob@PLT
+	lrl	%r8,frob@PLT
+
+#CHECK: lrv	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1e]
+#CHECK: lrv	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1e]
+#CHECK: lrv	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1e]
+#CHECK: lrv	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1e]
+#CHECK: lrv	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1e]
+#CHECK: lrv	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1e]
+#CHECK: lrv	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1e]
+#CHECK: lrv	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1e]
+#CHECK: lrv	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1e]
+#CHECK: lrv	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1e]
+
+	lrv	%r0,-524288
+	lrv	%r0,-1
+	lrv	%r0,0
+	lrv	%r0,1
+	lrv	%r0,524287
+	lrv	%r0,0(%r1)
+	lrv	%r0,0(%r15)
+	lrv	%r0,524287(%r1,%r15)
+	lrv	%r0,524287(%r15,%r1)
+	lrv	%r15,0
+
+#CHECK: lrvg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0f]
+#CHECK: lrvg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0f]
+#CHECK: lrvg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0f]
+#CHECK: lrvg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0f]
+#CHECK: lrvg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0f]
+#CHECK: lrvg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0f]
+#CHECK: lrvg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0f]
+#CHECK: lrvg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0f]
+#CHECK: lrvg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0f]
+#CHECK: lrvg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0f]
+
+	lrvg	%r0,-524288
+	lrvg	%r0,-1
+	lrvg	%r0,0
+	lrvg	%r0,1
+	lrvg	%r0,524287
+	lrvg	%r0,0(%r1)
+	lrvg	%r0,0(%r15)
+	lrvg	%r0,524287(%r1,%r15)
+	lrvg	%r0,524287(%r15,%r1)
+	lrvg	%r15,0
+
+#CHECK: lrvgr	%r0, %r0                # encoding: [0xb9,0x0f,0x00,0x00]
+#CHECK: lrvgr	%r0, %r15               # encoding: [0xb9,0x0f,0x00,0x0f]
+#CHECK: lrvgr	%r15, %r0               # encoding: [0xb9,0x0f,0x00,0xf0]
+#CHECK: lrvgr	%r7, %r8                # encoding: [0xb9,0x0f,0x00,0x78]
+#CHECK: lrvgr	%r15, %r15              # encoding: [0xb9,0x0f,0x00,0xff]
+
+	lrvgr	%r0,%r0
+	lrvgr	%r0,%r15
+	lrvgr	%r15,%r0
+	lrvgr	%r7,%r8
+	lrvgr	%r15,%r15
+
+#CHECK: lrvr	%r0, %r0                # encoding: [0xb9,0x1f,0x00,0x00]
+#CHECK: lrvr	%r0, %r15               # encoding: [0xb9,0x1f,0x00,0x0f]
+#CHECK: lrvr	%r15, %r0               # encoding: [0xb9,0x1f,0x00,0xf0]
+#CHECK: lrvr	%r7, %r8                # encoding: [0xb9,0x1f,0x00,0x78]
+#CHECK: lrvr	%r15, %r15              # encoding: [0xb9,0x1f,0x00,0xff]
+
+	lrvr	%r0,%r0
+	lrvr	%r0,%r15
+	lrvr	%r15,%r0
+	lrvr	%r7,%r8
+	lrvr	%r15,%r15
+
+#CHECK: lt	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x12]
+#CHECK: lt	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x12]
+#CHECK: lt	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x12]
+#CHECK: lt	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x12]
+#CHECK: lt	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x12]
+#CHECK: lt	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x12]
+#CHECK: lt	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x12]
+#CHECK: lt	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x12]
+#CHECK: lt	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x12]
+#CHECK: lt	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x12]
+
+	lt	%r0, -524288
+	lt	%r0, -1
+	lt	%r0, 0
+	lt	%r0, 1
+	lt	%r0, 524287
+	lt	%r0, 0(%r1)
+	lt	%r0, 0(%r15)
+	lt	%r0, 524287(%r1,%r15)
+	lt	%r0, 524287(%r15,%r1)
+	lt	%r15, 0
+
+#CHECK: ltg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x02]
+#CHECK: ltg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x02]
+#CHECK: ltg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x02]
+#CHECK: ltg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x02]
+#CHECK: ltg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x02]
+#CHECK: ltg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x02]
+#CHECK: ltg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x02]
+#CHECK: ltg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x02]
+#CHECK: ltg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x02]
+#CHECK: ltg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x02]
+
+	ltg	%r0, -524288
+	ltg	%r0, -1
+	ltg	%r0, 0
+	ltg	%r0, 1
+	ltg	%r0, 524287
+	ltg	%r0, 0(%r1)
+	ltg	%r0, 0(%r15)
+	ltg	%r0, 524287(%r1,%r15)
+	ltg	%r0, 524287(%r15,%r1)
+	ltg	%r15, 0
+
+#CHECK: ltgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x32]
+#CHECK: ltgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x32]
+#CHECK: ltgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x32]
+#CHECK: ltgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x32]
+#CHECK: ltgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x32]
+#CHECK: ltgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x32]
+#CHECK: ltgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x32]
+#CHECK: ltgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x32]
+#CHECK: ltgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x32]
+#CHECK: ltgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x32]
+
+	ltgf	%r0, -524288
+	ltgf	%r0, -1
+	ltgf	%r0, 0
+	ltgf	%r0, 1
+	ltgf	%r0, 524287
+	ltgf	%r0, 0(%r1)
+	ltgf	%r0, 0(%r15)
+	ltgf	%r0, 524287(%r1,%r15)
+	ltgf	%r0, 524287(%r15,%r1)
+	ltgf	%r15, 0
+
+#CHECK: ltdbr	%f0, %f9                # encoding: [0xb3,0x12,0x00,0x09]
+#CHECK: ltdbr	%f0, %f15               # encoding: [0xb3,0x12,0x00,0x0f]
+#CHECK: ltdbr	%f15, %f0               # encoding: [0xb3,0x12,0x00,0xf0]
+#CHECK: ltdbr	%f15, %f9               # encoding: [0xb3,0x12,0x00,0xf9]
+
+	ltdbr	%f0,%f9
+	ltdbr	%f0,%f15
+	ltdbr	%f15,%f0
+	ltdbr	%f15,%f9
+
+#CHECK: ltebr	%f0, %f9                # encoding: [0xb3,0x02,0x00,0x09]
+#CHECK: ltebr	%f0, %f15               # encoding: [0xb3,0x02,0x00,0x0f]
+#CHECK: ltebr	%f15, %f0               # encoding: [0xb3,0x02,0x00,0xf0]
+#CHECK: ltebr	%f15, %f9               # encoding: [0xb3,0x02,0x00,0xf9]
+
+	ltebr	%f0,%f9
+	ltebr	%f0,%f15
+	ltebr	%f15,%f0
+	ltebr	%f15,%f9
+
+#CHECK: ltgfr	%r0, %r9                # encoding: [0xb9,0x12,0x00,0x09]
+#CHECK: ltgfr	%r0, %r15               # encoding: [0xb9,0x12,0x00,0x0f]
+#CHECK: ltgfr	%r15, %r0               # encoding: [0xb9,0x12,0x00,0xf0]
+#CHECK: ltgfr	%r15, %r9               # encoding: [0xb9,0x12,0x00,0xf9]
+
+	ltgfr	%r0,%r9
+	ltgfr	%r0,%r15
+	ltgfr	%r15,%r0
+	ltgfr	%r15,%r9
+
+#CHECK: ltgr	%r0, %r9                # encoding: [0xb9,0x02,0x00,0x09]
+#CHECK: ltgr	%r0, %r15               # encoding: [0xb9,0x02,0x00,0x0f]
+#CHECK: ltgr	%r15, %r0               # encoding: [0xb9,0x02,0x00,0xf0]
+#CHECK: ltgr	%r15, %r9               # encoding: [0xb9,0x02,0x00,0xf9]
+
+	ltgr	%r0,%r9
+	ltgr	%r0,%r15
+	ltgr	%r15,%r0
+	ltgr	%r15,%r9
+
+#CHECK: ltr	%r0, %r9                # encoding: [0x12,0x09]
+#CHECK: ltr	%r0, %r15               # encoding: [0x12,0x0f]
+#CHECK: ltr	%r15, %r0               # encoding: [0x12,0xf0]
+#CHECK: ltr	%r15, %r9               # encoding: [0x12,0xf9]
+
+	ltr	%r0,%r9
+	ltr	%r0,%r15
+	ltr	%r15,%r0
+	ltr	%r15,%r9
+
+#CHECK: ltxbr	%f0, %f9                # encoding: [0xb3,0x42,0x00,0x09]
+#CHECK: ltxbr	%f0, %f13               # encoding: [0xb3,0x42,0x00,0x0d]
+#CHECK: ltxbr	%f13, %f0               # encoding: [0xb3,0x42,0x00,0xd0]
+#CHECK: ltxbr	%f13, %f9               # encoding: [0xb3,0x42,0x00,0xd9]
+
+	ltxbr	%f0,%f9
+	ltxbr	%f0,%f13
+	ltxbr	%f13,%f0
+	ltxbr	%f13,%f9
+
+#CHECK: lxr	%f0, %f8                # encoding: [0xb3,0x65,0x00,0x08]
+#CHECK: lxr	%f0, %f13               # encoding: [0xb3,0x65,0x00,0x0d]
+#CHECK: lxr	%f13, %f0               # encoding: [0xb3,0x65,0x00,0xd0]
+#CHECK: lxr	%f13, %f9               # encoding: [0xb3,0x65,0x00,0xd9]
+
+	lxr	%f0,%f8
+	lxr	%f0,%f13
+	lxr	%f13,%f0
+	lxr	%f13,%f9
+
+#CHECK: ly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x58]
+#CHECK: ly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x58]
+#CHECK: ly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x58]
+#CHECK: ly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x58]
+#CHECK: ly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x58]
+#CHECK: ly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x58]
+#CHECK: ly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x58]
+#CHECK: ly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x58]
+#CHECK: ly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x58]
+#CHECK: ly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x58]
+
+	ly	%r0, -524288
+	ly	%r0, -1
+	ly	%r0, 0
+	ly	%r0, 1
+	ly	%r0, 524287
+	ly	%r0, 0(%r1)
+	ly	%r0, 0(%r15)
+	ly	%r0, 524287(%r1,%r15)
+	ly	%r0, 524287(%r15,%r1)
+	ly	%r15, 0
+
+#CHECK: lzdr	%f0                     # encoding: [0xb3,0x75,0x00,0x00]
+#CHECK: lzdr	%f7                     # encoding: [0xb3,0x75,0x00,0x70]
+#CHECK: lzdr	%f15                    # encoding: [0xb3,0x75,0x00,0xf0]
+
+	lzdr	%f0
+	lzdr	%f7
+	lzdr	%f15
+
+#CHECK: lzer	%f0                     # encoding: [0xb3,0x74,0x00,0x00]
+#CHECK: lzer	%f7                     # encoding: [0xb3,0x74,0x00,0x70]
+#CHECK: lzer	%f15                    # encoding: [0xb3,0x74,0x00,0xf0]
+
+	lzer	%f0
+	lzer	%f7
+	lzer	%f15
+
+#CHECK: lzxr	%f0                     # encoding: [0xb3,0x76,0x00,0x00]
+#CHECK: lzxr	%f8                     # encoding: [0xb3,0x76,0x00,0x80]
+#CHECK: lzxr	%f13                    # encoding: [0xb3,0x76,0x00,0xd0]
+
+	lzxr	%f0
+	lzxr	%f8
+	lzxr	%f13
+
+#CHECK: madb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1e]
+#CHECK: madb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1e]
+#CHECK: madb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x1e]
+#CHECK: madb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x1e]
+
+	madb	%f0, %f0, 0
+	madb	%f0, %f0, 4095
+	madb	%f0, %f0, 0(%r1)
+	madb	%f0, %f0, 0(%r15)
+	madb	%f0, %f0, 4095(%r1,%r15)
+	madb	%f0, %f0, 4095(%r15,%r1)
+	madb	%f0, %f15, 0
+	madb	%f15, %f0, 0
+	madb	%f15, %f15, 0
+
+#CHECK: madbr	%f0, %f0, %f0           # encoding: [0xb3,0x1e,0x00,0x00]
+#CHECK: madbr	%f0, %f0, %f15          # encoding: [0xb3,0x1e,0x00,0x0f]
+#CHECK: madbr	%f0, %f15, %f0          # encoding: [0xb3,0x1e,0x00,0xf0]
+#CHECK: madbr	%f15, %f0, %f0          # encoding: [0xb3,0x1e,0xf0,0x00]
+#CHECK: madbr	%f7, %f8, %f9           # encoding: [0xb3,0x1e,0x70,0x89]
+#CHECK: madbr	%f15, %f15, %f15        # encoding: [0xb3,0x1e,0xf0,0xff]
+
+	madbr	%f0, %f0, %f0
+	madbr	%f0, %f0, %f15
+	madbr	%f0, %f15, %f0
+	madbr	%f15, %f0, %f0
+	madbr	%f7, %f8, %f9
+	madbr	%f15, %f15, %f15
+
+#CHECK: maeb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0e]
+#CHECK: maeb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0e]
+#CHECK: maeb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x0e]
+#CHECK: maeb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x0e]
+
+	maeb	%f0, %f0, 0
+	maeb	%f0, %f0, 4095
+	maeb	%f0, %f0, 0(%r1)
+	maeb	%f0, %f0, 0(%r15)
+	maeb	%f0, %f0, 4095(%r1,%r15)
+	maeb	%f0, %f0, 4095(%r15,%r1)
+	maeb	%f0, %f15, 0
+	maeb	%f15, %f0, 0
+	maeb	%f15, %f15, 0
+
+#CHECK: maebr	%f0, %f0, %f0           # encoding: [0xb3,0x0e,0x00,0x00]
+#CHECK: maebr	%f0, %f0, %f15          # encoding: [0xb3,0x0e,0x00,0x0f]
+#CHECK: maebr	%f0, %f15, %f0          # encoding: [0xb3,0x0e,0x00,0xf0]
+#CHECK: maebr	%f15, %f0, %f0          # encoding: [0xb3,0x0e,0xf0,0x00]
+#CHECK: maebr	%f7, %f8, %f9           # encoding: [0xb3,0x0e,0x70,0x89]
+#CHECK: maebr	%f15, %f15, %f15        # encoding: [0xb3,0x0e,0xf0,0xff]
+
+	maebr	%f0, %f0, %f0
+	maebr	%f0, %f0, %f15
+	maebr	%f0, %f15, %f0
+	maebr	%f15, %f0, %f0
+	maebr	%f7, %f8, %f9
+	maebr	%f15, %f15, %f15
+
+#CHECK: mdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1c]
+#CHECK: mdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1c]
+#CHECK: mdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1c]
+#CHECK: mdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1c]
+#CHECK: mdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1c]
+#CHECK: mdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1c]
+#CHECK: mdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1c]
+
+	mdb	%f0, 0
+	mdb	%f0, 4095
+	mdb	%f0, 0(%r1)
+	mdb	%f0, 0(%r15)
+	mdb	%f0, 4095(%r1,%r15)
+	mdb	%f0, 4095(%r15,%r1)
+	mdb	%f15, 0
+
+#CHECK: mdbr	%f0, %f0                # encoding: [0xb3,0x1c,0x00,0x00]
+#CHECK: mdbr	%f0, %f15               # encoding: [0xb3,0x1c,0x00,0x0f]
+#CHECK: mdbr	%f7, %f8                # encoding: [0xb3,0x1c,0x00,0x78]
+#CHECK: mdbr	%f15, %f0               # encoding: [0xb3,0x1c,0x00,0xf0]
+
+	mdbr	%f0, %f0
+	mdbr	%f0, %f15
+	mdbr	%f7, %f8
+	mdbr	%f15, %f0
+
+#CHECK: mdeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0c]
+#CHECK: mdeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0c]
+#CHECK: mdeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0c]
+#CHECK: mdeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0c]
+#CHECK: mdeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0c]
+#CHECK: mdeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0c]
+#CHECK: mdeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0c]
+
+	mdeb	%f0, 0
+	mdeb	%f0, 4095
+	mdeb	%f0, 0(%r1)
+	mdeb	%f0, 0(%r15)
+	mdeb	%f0, 4095(%r1,%r15)
+	mdeb	%f0, 4095(%r15,%r1)
+	mdeb	%f15, 0
+
+#CHECK: mdebr	%f0, %f0                # encoding: [0xb3,0x0c,0x00,0x00]
+#CHECK: mdebr	%f0, %f15               # encoding: [0xb3,0x0c,0x00,0x0f]
+#CHECK: mdebr	%f7, %f8                # encoding: [0xb3,0x0c,0x00,0x78]
+#CHECK: mdebr	%f15, %f0               # encoding: [0xb3,0x0c,0x00,0xf0]
+
+	mdebr	%f0, %f0
+	mdebr	%f0, %f15
+	mdebr	%f7, %f8
+	mdebr	%f15, %f0
+
+#CHECK: meeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x17]
+#CHECK: meeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x17]
+#CHECK: meeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x17]
+#CHECK: meeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x17]
+#CHECK: meeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x17]
+#CHECK: meeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x17]
+#CHECK: meeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x17]
+
+	meeb	%f0, 0
+	meeb	%f0, 4095
+	meeb	%f0, 0(%r1)
+	meeb	%f0, 0(%r15)
+	meeb	%f0, 4095(%r1,%r15)
+	meeb	%f0, 4095(%r15,%r1)
+	meeb	%f15, 0
+
+#CHECK: meebr	%f0, %f0                # encoding: [0xb3,0x17,0x00,0x00]
+#CHECK: meebr	%f0, %f15               # encoding: [0xb3,0x17,0x00,0x0f]
+#CHECK: meebr	%f7, %f8                # encoding: [0xb3,0x17,0x00,0x78]
+#CHECK: meebr	%f15, %f0               # encoding: [0xb3,0x17,0x00,0xf0]
+
+	meebr	%f0, %f0
+	meebr	%f0, %f15
+	meebr	%f7, %f8
+	meebr	%f15, %f0
+
+#CHECK: mghi	%r0, -32768             # encoding: [0xa7,0x0d,0x80,0x00]
+#CHECK: mghi	%r0, -1                 # encoding: [0xa7,0x0d,0xff,0xff]
+#CHECK: mghi	%r0, 0                  # encoding: [0xa7,0x0d,0x00,0x00]
+#CHECK: mghi	%r0, 1                  # encoding: [0xa7,0x0d,0x00,0x01]
+#CHECK: mghi	%r0, 32767              # encoding: [0xa7,0x0d,0x7f,0xff]
+#CHECK: mghi	%r15, 0                 # encoding: [0xa7,0xfd,0x00,0x00]
+
+	mghi	%r0, -32768
+	mghi	%r0, -1
+	mghi	%r0, 0
+	mghi	%r0, 1
+	mghi	%r0, 32767
+	mghi	%r15, 0
+
+#CHECK: mh	%r0, 0                  # encoding: [0x4c,0x00,0x00,0x00]
+#CHECK: mh	%r0, 4095               # encoding: [0x4c,0x00,0x0f,0xff]
+#CHECK: mh	%r0, 0(%r1)             # encoding: [0x4c,0x00,0x10,0x00]
+#CHECK: mh	%r0, 0(%r15)            # encoding: [0x4c,0x00,0xf0,0x00]
+#CHECK: mh	%r0, 4095(%r1,%r15)     # encoding: [0x4c,0x01,0xff,0xff]
+#CHECK: mh	%r0, 4095(%r15,%r1)     # encoding: [0x4c,0x0f,0x1f,0xff]
+#CHECK: mh	%r15, 0                 # encoding: [0x4c,0xf0,0x00,0x00]
+
+	mh	%r0, 0
+	mh	%r0, 4095
+	mh	%r0, 0(%r1)
+	mh	%r0, 0(%r15)
+	mh	%r0, 4095(%r1,%r15)
+	mh	%r0, 4095(%r15,%r1)
+	mh	%r15, 0
+
+#CHECK: mhi	%r0, -32768             # encoding: [0xa7,0x0c,0x80,0x00]
+#CHECK: mhi	%r0, -1                 # encoding: [0xa7,0x0c,0xff,0xff]
+#CHECK: mhi	%r0, 0                  # encoding: [0xa7,0x0c,0x00,0x00]
+#CHECK: mhi	%r0, 1                  # encoding: [0xa7,0x0c,0x00,0x01]
+#CHECK: mhi	%r0, 32767              # encoding: [0xa7,0x0c,0x7f,0xff]
+#CHECK: mhi	%r15, 0                 # encoding: [0xa7,0xfc,0x00,0x00]
+
+	mhi	%r0, -32768
+	mhi	%r0, -1
+	mhi	%r0, 0
+	mhi	%r0, 1
+	mhi	%r0, 32767
+	mhi	%r15, 0
+
+#CHECK: mhy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x7c]
+#CHECK: mhy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x7c]
+#CHECK: mhy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x7c]
+#CHECK: mhy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x7c]
+#CHECK: mhy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x7c]
+#CHECK: mhy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x7c]
+#CHECK: mhy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x7c]
+#CHECK: mhy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x7c]
+#CHECK: mhy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x7c]
+#CHECK: mhy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x7c]
+
+	mhy	%r0, -524288
+	mhy	%r0, -1
+	mhy	%r0, 0
+	mhy	%r0, 1
+	mhy	%r0, 524287
+	mhy	%r0, 0(%r1)
+	mhy	%r0, 0(%r15)
+	mhy	%r0, 524287(%r1,%r15)
+	mhy	%r0, 524287(%r15,%r1)
+	mhy	%r15, 0
+
+#CHECK: mlg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x86]
+#CHECK: mlg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x86]
+#CHECK: mlg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x86]
+#CHECK: mlg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x86]
+#CHECK: mlg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x86]
+#CHECK: mlg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x86]
+#CHECK: mlg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x86]
+#CHECK: mlg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x86]
+#CHECK: mlg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x86]
+#CHECK: mlg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x86]
+
+	mlg	%r0, -524288
+	mlg	%r0, -1
+	mlg	%r0, 0
+	mlg	%r0, 1
+	mlg	%r0, 524287
+	mlg	%r0, 0(%r1)
+	mlg	%r0, 0(%r15)
+	mlg	%r0, 524287(%r1,%r15)
+	mlg	%r0, 524287(%r15,%r1)
+	mlg	%r14, 0
+
+#CHECK: mlgr	%r0, %r0                # encoding: [0xb9,0x86,0x00,0x00]
+#CHECK: mlgr	%r0, %r15               # encoding: [0xb9,0x86,0x00,0x0f]
+#CHECK: mlgr	%r14, %r0               # encoding: [0xb9,0x86,0x00,0xe0]
+#CHECK: mlgr	%r6, %r9                # encoding: [0xb9,0x86,0x00,0x69]
+
+	mlgr	%r0,%r0
+	mlgr	%r0,%r15
+	mlgr	%r14,%r0
+	mlgr	%r6,%r9
+
+#CHECK: ms	%r0, 0                  # encoding: [0x71,0x00,0x00,0x00]
+#CHECK: ms	%r0, 4095               # encoding: [0x71,0x00,0x0f,0xff]
+#CHECK: ms	%r0, 0(%r1)             # encoding: [0x71,0x00,0x10,0x00]
+#CHECK: ms	%r0, 0(%r15)            # encoding: [0x71,0x00,0xf0,0x00]
+#CHECK: ms	%r0, 4095(%r1,%r15)     # encoding: [0x71,0x01,0xff,0xff]
+#CHECK: ms	%r0, 4095(%r15,%r1)     # encoding: [0x71,0x0f,0x1f,0xff]
+#CHECK: ms	%r15, 0                 # encoding: [0x71,0xf0,0x00,0x00]
+
+	ms	%r0, 0
+	ms	%r0, 4095
+	ms	%r0, 0(%r1)
+	ms	%r0, 0(%r15)
+	ms	%r0, 4095(%r1,%r15)
+	ms	%r0, 4095(%r15,%r1)
+	ms	%r15, 0
+
+#CHECK: msdb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1f]
+#CHECK: msdb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1f]
+#CHECK: msdb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x1f]
+#CHECK: msdb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x1f]
+
+	msdb	%f0, %f0, 0
+	msdb	%f0, %f0, 4095
+	msdb	%f0, %f0, 0(%r1)
+	msdb	%f0, %f0, 0(%r15)
+	msdb	%f0, %f0, 4095(%r1,%r15)
+	msdb	%f0, %f0, 4095(%r15,%r1)
+	msdb	%f0, %f15, 0
+	msdb	%f15, %f0, 0
+	msdb	%f15, %f15, 0
+
+#CHECK: msdbr	%f0, %f0, %f0           # encoding: [0xb3,0x1f,0x00,0x00]
+#CHECK: msdbr	%f0, %f0, %f15          # encoding: [0xb3,0x1f,0x00,0x0f]
+#CHECK: msdbr	%f0, %f15, %f0          # encoding: [0xb3,0x1f,0x00,0xf0]
+#CHECK: msdbr	%f15, %f0, %f0          # encoding: [0xb3,0x1f,0xf0,0x00]
+#CHECK: msdbr	%f7, %f8, %f9           # encoding: [0xb3,0x1f,0x70,0x89]
+#CHECK: msdbr	%f15, %f15, %f15        # encoding: [0xb3,0x1f,0xf0,0xff]
+
+	msdbr	%f0, %f0, %f0
+	msdbr	%f0, %f0, %f15
+	msdbr	%f0, %f15, %f0
+	msdbr	%f15, %f0, %f0
+	msdbr	%f7, %f8, %f9
+	msdbr	%f15, %f15, %f15
+
+#CHECK: mseb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0f]
+#CHECK: mseb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0f]
+#CHECK: mseb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x0f]
+#CHECK: mseb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x0f]
+
+	mseb	%f0, %f0, 0
+	mseb	%f0, %f0, 4095
+	mseb	%f0, %f0, 0(%r1)
+	mseb	%f0, %f0, 0(%r15)
+	mseb	%f0, %f0, 4095(%r1,%r15)
+	mseb	%f0, %f0, 4095(%r15,%r1)
+	mseb	%f0, %f15, 0
+	mseb	%f15, %f0, 0
+	mseb	%f15, %f15, 0
+
+#CHECK: msebr	%f0, %f0, %f0           # encoding: [0xb3,0x0f,0x00,0x00]
+#CHECK: msebr	%f0, %f0, %f15          # encoding: [0xb3,0x0f,0x00,0x0f]
+#CHECK: msebr	%f0, %f15, %f0          # encoding: [0xb3,0x0f,0x00,0xf0]
+#CHECK: msebr	%f15, %f0, %f0          # encoding: [0xb3,0x0f,0xf0,0x00]
+#CHECK: msebr	%f7, %f8, %f9           # encoding: [0xb3,0x0f,0x70,0x89]
+#CHECK: msebr	%f15, %f15, %f15        # encoding: [0xb3,0x0f,0xf0,0xff]
+
+	msebr	%f0, %f0, %f0
+	msebr	%f0, %f0, %f15
+	msebr	%f0, %f15, %f0
+	msebr	%f15, %f0, %f0
+	msebr	%f7, %f8, %f9
+	msebr	%f15, %f15, %f15
+
+#CHECK: msfi	%r0, -2147483648        # encoding: [0xc2,0x01,0x80,0x00,0x00,0x00]
+#CHECK: msfi	%r0, -1                 # encoding: [0xc2,0x01,0xff,0xff,0xff,0xff]
+#CHECK: msfi	%r0, 0                  # encoding: [0xc2,0x01,0x00,0x00,0x00,0x00]
+#CHECK: msfi	%r0, 1                  # encoding: [0xc2,0x01,0x00,0x00,0x00,0x01]
+#CHECK: msfi	%r0, 2147483647         # encoding: [0xc2,0x01,0x7f,0xff,0xff,0xff]
+#CHECK: msfi	%r15, 0                 # encoding: [0xc2,0xf1,0x00,0x00,0x00,0x00]
+
+	msfi	%r0, -1 << 31
+	msfi	%r0, -1
+	msfi	%r0, 0
+	msfi	%r0, 1
+	msfi	%r0, (1 << 31) - 1
+	msfi	%r15, 0
+
+#CHECK: msg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0c]
+#CHECK: msg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0c]
+#CHECK: msg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0c]
+#CHECK: msg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0c]
+#CHECK: msg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0c]
+#CHECK: msg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0c]
+#CHECK: msg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0c]
+#CHECK: msg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0c]
+#CHECK: msg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0c]
+#CHECK: msg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0c]
+
+	msg	%r0, -524288
+	msg	%r0, -1
+	msg	%r0, 0
+	msg	%r0, 1
+	msg	%r0, 524287
+	msg	%r0, 0(%r1)
+	msg	%r0, 0(%r15)
+	msg	%r0, 524287(%r1,%r15)
+	msg	%r0, 524287(%r15,%r1)
+	msg	%r15, 0
+
+#CHECK: msgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1c]
+#CHECK: msgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1c]
+#CHECK: msgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1c]
+#CHECK: msgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1c]
+#CHECK: msgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1c]
+#CHECK: msgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1c]
+#CHECK: msgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1c]
+#CHECK: msgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1c]
+#CHECK: msgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1c]
+#CHECK: msgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1c]
+
+	msgf	%r0, -524288
+	msgf	%r0, -1
+	msgf	%r0, 0
+	msgf	%r0, 1
+	msgf	%r0, 524287
+	msgf	%r0, 0(%r1)
+	msgf	%r0, 0(%r15)
+	msgf	%r0, 524287(%r1,%r15)
+	msgf	%r0, 524287(%r15,%r1)
+	msgf	%r15, 0
+
+#CHECK: msgfi	%r0, -2147483648        # encoding: [0xc2,0x00,0x80,0x00,0x00,0x00]
+#CHECK: msgfi	%r0, -1                 # encoding: [0xc2,0x00,0xff,0xff,0xff,0xff]
+#CHECK: msgfi	%r0, 0                  # encoding: [0xc2,0x00,0x00,0x00,0x00,0x00]
+#CHECK: msgfi	%r0, 1                  # encoding: [0xc2,0x00,0x00,0x00,0x00,0x01]
+#CHECK: msgfi	%r0, 2147483647         # encoding: [0xc2,0x00,0x7f,0xff,0xff,0xff]
+#CHECK: msgfi	%r15, 0                 # encoding: [0xc2,0xf0,0x00,0x00,0x00,0x00]
+
+	msgfi	%r0, -1 << 31
+	msgfi	%r0, -1
+	msgfi	%r0, 0
+	msgfi	%r0, 1
+	msgfi	%r0, (1 << 31) - 1
+	msgfi	%r15, 0
+
+#CHECK: msgfr	%r0, %r0                # encoding: [0xb9,0x1c,0x00,0x00]
+#CHECK: msgfr	%r0, %r15               # encoding: [0xb9,0x1c,0x00,0x0f]
+#CHECK: msgfr	%r15, %r0               # encoding: [0xb9,0x1c,0x00,0xf0]
+#CHECK: msgfr	%r7, %r8                # encoding: [0xb9,0x1c,0x00,0x78]
+
+	msgfr	%r0,%r0
+	msgfr	%r0,%r15
+	msgfr	%r15,%r0
+	msgfr	%r7,%r8
+
+#CHECK: msgr	%r0, %r0                # encoding: [0xb9,0x0c,0x00,0x00]
+#CHECK: msgr	%r0, %r15               # encoding: [0xb9,0x0c,0x00,0x0f]
+#CHECK: msgr	%r15, %r0               # encoding: [0xb9,0x0c,0x00,0xf0]
+#CHECK: msgr	%r7, %r8                # encoding: [0xb9,0x0c,0x00,0x78]
+
+	msgr	%r0,%r0
+	msgr	%r0,%r15
+	msgr	%r15,%r0
+	msgr	%r7,%r8
+
+#CHECK: msr	%r0, %r0                # encoding: [0xb2,0x52,0x00,0x00]
+#CHECK: msr	%r0, %r15               # encoding: [0xb2,0x52,0x00,0x0f]
+#CHECK: msr	%r15, %r0               # encoding: [0xb2,0x52,0x00,0xf0]
+#CHECK: msr	%r7, %r8                # encoding: [0xb2,0x52,0x00,0x78]
+
+	msr	%r0,%r0
+	msr	%r0,%r15
+	msr	%r15,%r0
+	msr	%r7,%r8
+
+#CHECK: msy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x51]
+#CHECK: msy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x51]
+#CHECK: msy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x51]
+#CHECK: msy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x51]
+#CHECK: msy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x51]
+#CHECK: msy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x51]
+#CHECK: msy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x51]
+#CHECK: msy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x51]
+#CHECK: msy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x51]
+#CHECK: msy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x51]
+
+	msy	%r0, -524288
+	msy	%r0, -1
+	msy	%r0, 0
+	msy	%r0, 1
+	msy	%r0, 524287
+	msy	%r0, 0(%r1)
+	msy	%r0, 0(%r15)
+	msy	%r0, 524287(%r1,%r15)
+	msy	%r0, 524287(%r15,%r1)
+	msy	%r15, 0
+
+#CHECK: mvc	0(1), 0                 # encoding: [0xd2,0x00,0x00,0x00,0x00,0x00]
+#CHECK: mvc	0(1), 0(%r1)            # encoding: [0xd2,0x00,0x00,0x00,0x10,0x00]
+#CHECK: mvc	0(1), 0(%r15)           # encoding: [0xd2,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: mvc	0(1), 4095              # encoding: [0xd2,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: mvc	0(1), 4095(%r1)         # encoding: [0xd2,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: mvc	0(1), 4095(%r15)        # encoding: [0xd2,0x00,0x00,0x00,0xff,0xff]
+#CHECK: mvc	0(1,%r1), 0             # encoding: [0xd2,0x00,0x10,0x00,0x00,0x00]
+#CHECK: mvc	0(1,%r15), 0            # encoding: [0xd2,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: mvc	4095(1,%r1), 0          # encoding: [0xd2,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: mvc	4095(1,%r15), 0         # encoding: [0xd2,0x00,0xff,0xff,0x00,0x00]
+#CHECK: mvc	0(256,%r1), 0           # encoding: [0xd2,0xff,0x10,0x00,0x00,0x00]
+#CHECK: mvc	0(256,%r15), 0          # encoding: [0xd2,0xff,0xf0,0x00,0x00,0x00]
+
+	mvc	0(1), 0
+	mvc	0(1), 0(%r1)
+	mvc	0(1), 0(%r15)
+	mvc	0(1), 4095
+	mvc	0(1), 4095(%r1)
+	mvc	0(1), 4095(%r15)
+	mvc	0(1,%r1), 0
+	mvc	0(1,%r15), 0
+	mvc	4095(1,%r1), 0
+	mvc	4095(1,%r15), 0
+	mvc	0(256,%r1), 0
+	mvc	0(256,%r15), 0
+
+#CHECK: mvghi	0, 0                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x00]
+#CHECK: mvghi	4095, 0                 # encoding: [0xe5,0x48,0x0f,0xff,0x00,0x00]
+#CHECK: mvghi	0, -32768               # encoding: [0xe5,0x48,0x00,0x00,0x80,0x00]
+#CHECK: mvghi	0, -1                   # encoding: [0xe5,0x48,0x00,0x00,0xff,0xff]
+#CHECK: mvghi	0, 0                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x00]
+#CHECK: mvghi	0, 1                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x01]
+#CHECK: mvghi	0, 32767                # encoding: [0xe5,0x48,0x00,0x00,0x7f,0xff]
+#CHECK: mvghi	0(%r1), 42              # encoding: [0xe5,0x48,0x10,0x00,0x00,0x2a]
+#CHECK: mvghi	0(%r15), 42             # encoding: [0xe5,0x48,0xf0,0x00,0x00,0x2a]
+#CHECK: mvghi	4095(%r1), 42           # encoding: [0xe5,0x48,0x1f,0xff,0x00,0x2a]
+#CHECK: mvghi	4095(%r15), 42          # encoding: [0xe5,0x48,0xff,0xff,0x00,0x2a]
+
+	mvghi	0, 0
+	mvghi	4095, 0
+	mvghi	0, -32768
+	mvghi	0, -1
+	mvghi	0, 0
+	mvghi	0, 1
+	mvghi	0, 32767
+	mvghi	0(%r1), 42
+	mvghi	0(%r15), 42
+	mvghi	4095(%r1), 42
+	mvghi	4095(%r15), 42
+
+#CHECK: mvhhi	0, 0                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x00]
+#CHECK: mvhhi	4095, 0                 # encoding: [0xe5,0x44,0x0f,0xff,0x00,0x00]
+#CHECK: mvhhi	0, -32768               # encoding: [0xe5,0x44,0x00,0x00,0x80,0x00]
+#CHECK: mvhhi	0, -1                   # encoding: [0xe5,0x44,0x00,0x00,0xff,0xff]
+#CHECK: mvhhi	0, 0                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x00]
+#CHECK: mvhhi	0, 1                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x01]
+#CHECK: mvhhi	0, 32767                # encoding: [0xe5,0x44,0x00,0x00,0x7f,0xff]
+#CHECK: mvhhi	0(%r1), 42              # encoding: [0xe5,0x44,0x10,0x00,0x00,0x2a]
+#CHECK: mvhhi	0(%r15), 42             # encoding: [0xe5,0x44,0xf0,0x00,0x00,0x2a]
+#CHECK: mvhhi	4095(%r1), 42           # encoding: [0xe5,0x44,0x1f,0xff,0x00,0x2a]
+#CHECK: mvhhi	4095(%r15), 42          # encoding: [0xe5,0x44,0xff,0xff,0x00,0x2a]
+
+	mvhhi	0, 0
+	mvhhi	4095, 0
+	mvhhi	0, -32768
+	mvhhi	0, -1
+	mvhhi	0, 0
+	mvhhi	0, 1
+	mvhhi	0, 32767
+	mvhhi	0(%r1), 42
+	mvhhi	0(%r15), 42
+	mvhhi	4095(%r1), 42
+	mvhhi	4095(%r15), 42
+
+#CHECK: mvhi	0, 0                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x00]
+#CHECK: mvhi	4095, 0                 # encoding: [0xe5,0x4c,0x0f,0xff,0x00,0x00]
+#CHECK: mvhi	0, -32768               # encoding: [0xe5,0x4c,0x00,0x00,0x80,0x00]
+#CHECK: mvhi	0, -1                   # encoding: [0xe5,0x4c,0x00,0x00,0xff,0xff]
+#CHECK: mvhi	0, 0                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x00]
+#CHECK: mvhi	0, 1                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x01]
+#CHECK: mvhi	0, 32767                # encoding: [0xe5,0x4c,0x00,0x00,0x7f,0xff]
+#CHECK: mvhi	0(%r1), 42              # encoding: [0xe5,0x4c,0x10,0x00,0x00,0x2a]
+#CHECK: mvhi	0(%r15), 42             # encoding: [0xe5,0x4c,0xf0,0x00,0x00,0x2a]
+#CHECK: mvhi	4095(%r1), 42           # encoding: [0xe5,0x4c,0x1f,0xff,0x00,0x2a]
+#CHECK: mvhi	4095(%r15), 42          # encoding: [0xe5,0x4c,0xff,0xff,0x00,0x2a]
+
+	mvhi	0, 0
+	mvhi	4095, 0
+	mvhi	0, -32768
+	mvhi	0, -1
+	mvhi	0, 0
+	mvhi	0, 1
+	mvhi	0, 32767
+	mvhi	0(%r1), 42
+	mvhi	0(%r15), 42
+	mvhi	4095(%r1), 42
+	mvhi	4095(%r15), 42
+
+#CHECK: mvi	0, 0                    # encoding: [0x92,0x00,0x00,0x00]
+#CHECK: mvi	4095, 0                 # encoding: [0x92,0x00,0x0f,0xff]
+#CHECK: mvi	0, 255                  # encoding: [0x92,0xff,0x00,0x00]
+#CHECK: mvi	0(%r1), 42              # encoding: [0x92,0x2a,0x10,0x00]
+#CHECK: mvi	0(%r15), 42             # encoding: [0x92,0x2a,0xf0,0x00]
+#CHECK: mvi	4095(%r1), 42           # encoding: [0x92,0x2a,0x1f,0xff]
+#CHECK: mvi	4095(%r15), 42          # encoding: [0x92,0x2a,0xff,0xff]
+
+	mvi	0, 0
+	mvi	4095, 0
+	mvi	0, 255
+	mvi	0(%r1), 42
+	mvi	0(%r15), 42
+	mvi	4095(%r1), 42
+	mvi	4095(%r15), 42
+
+#CHECK: mviy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x52]
+#CHECK: mviy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x52]
+#CHECK: mviy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x52]
+#CHECK: mviy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x52]
+#CHECK: mviy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x52]
+#CHECK: mviy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x52]
+#CHECK: mviy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x52]
+#CHECK: mviy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x52]
+#CHECK: mviy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x52]
+#CHECK: mviy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x52]
+
+	mviy	-524288, 0
+	mviy	-1, 0
+	mviy	0, 0
+	mviy	1, 0
+	mviy	524287, 0
+	mviy	0, 255
+	mviy	0(%r1), 42
+	mviy	0(%r15), 42
+	mviy	524287(%r1), 42
+	mviy	524287(%r15), 42
+
+#CHECK: mvst	%r0, %r0                # encoding: [0xb2,0x55,0x00,0x00]
+#CHECK: mvst	%r0, %r15               # encoding: [0xb2,0x55,0x00,0x0f]
+#CHECK: mvst	%r15, %r0               # encoding: [0xb2,0x55,0x00,0xf0]
+#CHECK: mvst	%r7, %r8                # encoding: [0xb2,0x55,0x00,0x78]
+
+	mvst	%r0,%r0
+	mvst	%r0,%r15
+	mvst	%r15,%r0
+	mvst	%r7,%r8
+
+#CHECK: mxbr	%f0, %f0                # encoding: [0xb3,0x4c,0x00,0x00]
+#CHECK: mxbr	%f0, %f13               # encoding: [0xb3,0x4c,0x00,0x0d]
+#CHECK: mxbr	%f8, %f5                # encoding: [0xb3,0x4c,0x00,0x85]
+#CHECK: mxbr	%f13, %f13              # encoding: [0xb3,0x4c,0x00,0xdd]
+
+	mxbr	%f0, %f0
+	mxbr	%f0, %f13
+	mxbr	%f8, %f5
+	mxbr	%f13, %f13
+
+#CHECK: mxdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x07]
+#CHECK: mxdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x07]
+#CHECK: mxdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x07]
+#CHECK: mxdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x07]
+#CHECK: mxdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x07]
+#CHECK: mxdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x07]
+#CHECK: mxdb	%f13, 0                 # encoding: [0xed,0xd0,0x00,0x00,0x00,0x07]
+
+	mxdb	%f0, 0
+	mxdb	%f0, 4095
+	mxdb	%f0, 0(%r1)
+	mxdb	%f0, 0(%r15)
+	mxdb	%f0, 4095(%r1,%r15)
+	mxdb	%f0, 4095(%r15,%r1)
+	mxdb	%f13, 0
+
+#CHECK: mxdbr	%f0, %f0                # encoding: [0xb3,0x07,0x00,0x00]
+#CHECK: mxdbr	%f0, %f15               # encoding: [0xb3,0x07,0x00,0x0f]
+#CHECK: mxdbr	%f8, %f8                # encoding: [0xb3,0x07,0x00,0x88]
+#CHECK: mxdbr	%f13, %f0               # encoding: [0xb3,0x07,0x00,0xd0]
+
+	mxdbr	%f0, %f0
+	mxdbr	%f0, %f15
+	mxdbr	%f8, %f8
+	mxdbr	%f13, %f0
+
+#CHECK: n	%r0, 0                  # encoding: [0x54,0x00,0x00,0x00]
+#CHECK: n	%r0, 4095               # encoding: [0x54,0x00,0x0f,0xff]
+#CHECK: n	%r0, 0(%r1)             # encoding: [0x54,0x00,0x10,0x00]
+#CHECK: n	%r0, 0(%r15)            # encoding: [0x54,0x00,0xf0,0x00]
+#CHECK: n	%r0, 4095(%r1,%r15)     # encoding: [0x54,0x01,0xff,0xff]
+#CHECK: n	%r0, 4095(%r15,%r1)     # encoding: [0x54,0x0f,0x1f,0xff]
+#CHECK: n	%r15, 0                 # encoding: [0x54,0xf0,0x00,0x00]
+
+	n	%r0, 0
+	n	%r0, 4095
+	n	%r0, 0(%r1)
+	n	%r0, 0(%r15)
+	n	%r0, 4095(%r1,%r15)
+	n	%r0, 4095(%r15,%r1)
+	n	%r15, 0
+
+#CHECK: nc	0(1), 0                 # encoding: [0xd4,0x00,0x00,0x00,0x00,0x00]
+#CHECK: nc	0(1), 0(%r1)            # encoding: [0xd4,0x00,0x00,0x00,0x10,0x00]
+#CHECK: nc	0(1), 0(%r15)           # encoding: [0xd4,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: nc	0(1), 4095              # encoding: [0xd4,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: nc	0(1), 4095(%r1)         # encoding: [0xd4,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: nc	0(1), 4095(%r15)        # encoding: [0xd4,0x00,0x00,0x00,0xff,0xff]
+#CHECK: nc	0(1,%r1), 0             # encoding: [0xd4,0x00,0x10,0x00,0x00,0x00]
+#CHECK: nc	0(1,%r15), 0            # encoding: [0xd4,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: nc	4095(1,%r1), 0          # encoding: [0xd4,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: nc	4095(1,%r15), 0         # encoding: [0xd4,0x00,0xff,0xff,0x00,0x00]
+#CHECK: nc	0(256,%r1), 0           # encoding: [0xd4,0xff,0x10,0x00,0x00,0x00]
+#CHECK: nc	0(256,%r15), 0          # encoding: [0xd4,0xff,0xf0,0x00,0x00,0x00]
+
+	nc	0(1), 0
+	nc	0(1), 0(%r1)
+	nc	0(1), 0(%r15)
+	nc	0(1), 4095
+	nc	0(1), 4095(%r1)
+	nc	0(1), 4095(%r15)
+	nc	0(1,%r1), 0
+	nc	0(1,%r15), 0
+	nc	4095(1,%r1), 0
+	nc	4095(1,%r15), 0
+	nc	0(256,%r1), 0
+	nc	0(256,%r15), 0
+
+#CHECK: ng	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x80]
+#CHECK: ng	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x80]
+#CHECK: ng	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x80]
+#CHECK: ng	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x80]
+#CHECK: ng	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x80]
+#CHECK: ng	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x80]
+#CHECK: ng	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x80]
+#CHECK: ng	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x80]
+#CHECK: ng	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x80]
+#CHECK: ng	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x80]
+
+	ng	%r0, -524288
+	ng	%r0, -1
+	ng	%r0, 0
+	ng	%r0, 1
+	ng	%r0, 524287
+	ng	%r0, 0(%r1)
+	ng	%r0, 0(%r15)
+	ng	%r0, 524287(%r1,%r15)
+	ng	%r0, 524287(%r15,%r1)
+	ng	%r15, 0
+
+#CHECK: ngr	%r0, %r0                # encoding: [0xb9,0x80,0x00,0x00]
+#CHECK: ngr	%r0, %r15               # encoding: [0xb9,0x80,0x00,0x0f]
+#CHECK: ngr	%r15, %r0               # encoding: [0xb9,0x80,0x00,0xf0]
+#CHECK: ngr	%r7, %r8                # encoding: [0xb9,0x80,0x00,0x78]
+
+	ngr	%r0,%r0
+	ngr	%r0,%r15
+	ngr	%r15,%r0
+	ngr	%r7,%r8
+
+#CHECK: ni	0, 0                    # encoding: [0x94,0x00,0x00,0x00]
+#CHECK: ni	4095, 0                 # encoding: [0x94,0x00,0x0f,0xff]
+#CHECK: ni	0, 255                  # encoding: [0x94,0xff,0x00,0x00]
+#CHECK: ni	0(%r1), 42              # encoding: [0x94,0x2a,0x10,0x00]
+#CHECK: ni	0(%r15), 42             # encoding: [0x94,0x2a,0xf0,0x00]
+#CHECK: ni	4095(%r1), 42           # encoding: [0x94,0x2a,0x1f,0xff]
+#CHECK: ni	4095(%r15), 42          # encoding: [0x94,0x2a,0xff,0xff]
+
+	ni	0, 0
+	ni	4095, 0
+	ni	0, 255
+	ni	0(%r1), 42
+	ni	0(%r15), 42
+	ni	4095(%r1), 42
+	ni	4095(%r15), 42
+
+#CHECK: nihf	%r0, 0                  # encoding: [0xc0,0x0a,0x00,0x00,0x00,0x00]
+#CHECK: nihf	%r0, 4294967295         # encoding: [0xc0,0x0a,0xff,0xff,0xff,0xff]
+#CHECK: nihf	%r15, 0                 # encoding: [0xc0,0xfa,0x00,0x00,0x00,0x00]
+
+	nihf	%r0, 0
+	nihf	%r0, 0xffffffff
+	nihf	%r15, 0
+
+#CHECK: nihh	%r0, 0                  # encoding: [0xa5,0x04,0x00,0x00]
+#CHECK: nihh	%r0, 32768              # encoding: [0xa5,0x04,0x80,0x00]
+#CHECK: nihh	%r0, 65535              # encoding: [0xa5,0x04,0xff,0xff]
+#CHECK: nihh	%r15, 0                 # encoding: [0xa5,0xf4,0x00,0x00]
+
+	nihh	%r0, 0
+	nihh	%r0, 0x8000
+	nihh	%r0, 0xffff
+	nihh	%r15, 0
+
+#CHECK: nihl	%r0, 0                  # encoding: [0xa5,0x05,0x00,0x00]
+#CHECK: nihl	%r0, 32768              # encoding: [0xa5,0x05,0x80,0x00]
+#CHECK: nihl	%r0, 65535              # encoding: [0xa5,0x05,0xff,0xff]
+#CHECK: nihl	%r15, 0                 # encoding: [0xa5,0xf5,0x00,0x00]
+
+	nihl	%r0, 0
+	nihl	%r0, 0x8000
+	nihl	%r0, 0xffff
+	nihl	%r15, 0
+
+#CHECK: nilf	%r0, 0                  # encoding: [0xc0,0x0b,0x00,0x00,0x00,0x00]
+#CHECK: nilf	%r0, 4294967295         # encoding: [0xc0,0x0b,0xff,0xff,0xff,0xff]
+#CHECK: nilf	%r15, 0                 # encoding: [0xc0,0xfb,0x00,0x00,0x00,0x00]
+
+	nilf	%r0, 0
+	nilf	%r0, 0xffffffff
+	nilf	%r15, 0
+
+#CHECK: nilh	%r0, 0                  # encoding: [0xa5,0x06,0x00,0x00]
+#CHECK: nilh	%r0, 32768              # encoding: [0xa5,0x06,0x80,0x00]
+#CHECK: nilh	%r0, 65535              # encoding: [0xa5,0x06,0xff,0xff]
+#CHECK: nilh	%r15, 0                 # encoding: [0xa5,0xf6,0x00,0x00]
+
+	nilh	%r0, 0
+	nilh	%r0, 0x8000
+	nilh	%r0, 0xffff
+	nilh	%r15, 0
+
+#CHECK: nill	%r0, 0                  # encoding: [0xa5,0x07,0x00,0x00]
+#CHECK: nill	%r0, 32768              # encoding: [0xa5,0x07,0x80,0x00]
+#CHECK: nill	%r0, 65535              # encoding: [0xa5,0x07,0xff,0xff]
+#CHECK: nill	%r15, 0                 # encoding: [0xa5,0xf7,0x00,0x00]
+
+	nill	%r0, 0
+	nill	%r0, 0x8000
+	nill	%r0, 0xffff
+	nill	%r15, 0
+
+#CHECK: niy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x54]
+#CHECK: niy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x54]
+#CHECK: niy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x54]
+#CHECK: niy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x54]
+#CHECK: niy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x54]
+#CHECK: niy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x54]
+#CHECK: niy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x54]
+#CHECK: niy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x54]
+#CHECK: niy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x54]
+#CHECK: niy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x54]
+
+	niy	-524288, 0
+	niy	-1, 0
+	niy	0, 0
+	niy	1, 0
+	niy	524287, 0
+	niy	0, 255
+	niy	0(%r1), 42
+	niy	0(%r15), 42
+	niy	524287(%r1), 42
+	niy	524287(%r15), 42
+
+#CHECK: nr	%r0, %r0                # encoding: [0x14,0x00]
+#CHECK: nr	%r0, %r15               # encoding: [0x14,0x0f]
+#CHECK: nr	%r15, %r0               # encoding: [0x14,0xf0]
+#CHECK: nr	%r7, %r8                # encoding: [0x14,0x78]
+
+	nr	%r0,%r0
+	nr	%r0,%r15
+	nr	%r15,%r0
+	nr	%r7,%r8
+
+#CHECK: ny	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x54]
+#CHECK: ny	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x54]
+#CHECK: ny	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x54]
+#CHECK: ny	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x54]
+#CHECK: ny	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x54]
+#CHECK: ny	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x54]
+#CHECK: ny	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x54]
+#CHECK: ny	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x54]
+#CHECK: ny	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x54]
+#CHECK: ny	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x54]
+
+	ny	%r0, -524288
+	ny	%r0, -1
+	ny	%r0, 0
+	ny	%r0, 1
+	ny	%r0, 524287
+	ny	%r0, 0(%r1)
+	ny	%r0, 0(%r15)
+	ny	%r0, 524287(%r1,%r15)
+	ny	%r0, 524287(%r15,%r1)
+	ny	%r15, 0
+
+#CHECK: o	%r0, 0                  # encoding: [0x56,0x00,0x00,0x00]
+#CHECK: o	%r0, 4095               # encoding: [0x56,0x00,0x0f,0xff]
+#CHECK: o	%r0, 0(%r1)             # encoding: [0x56,0x00,0x10,0x00]
+#CHECK: o	%r0, 0(%r15)            # encoding: [0x56,0x00,0xf0,0x00]
+#CHECK: o	%r0, 4095(%r1,%r15)     # encoding: [0x56,0x01,0xff,0xff]
+#CHECK: o	%r0, 4095(%r15,%r1)     # encoding: [0x56,0x0f,0x1f,0xff]
+#CHECK: o	%r15, 0                 # encoding: [0x56,0xf0,0x00,0x00]
+
+	o	%r0, 0
+	o	%r0, 4095
+	o	%r0, 0(%r1)
+	o	%r0, 0(%r15)
+	o	%r0, 4095(%r1,%r15)
+	o	%r0, 4095(%r15,%r1)
+	o	%r15, 0
+
+#CHECK: oc	0(1), 0                 # encoding: [0xd6,0x00,0x00,0x00,0x00,0x00]
+#CHECK: oc	0(1), 0(%r1)            # encoding: [0xd6,0x00,0x00,0x00,0x10,0x00]
+#CHECK: oc	0(1), 0(%r15)           # encoding: [0xd6,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: oc	0(1), 4095              # encoding: [0xd6,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: oc	0(1), 4095(%r1)         # encoding: [0xd6,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: oc	0(1), 4095(%r15)        # encoding: [0xd6,0x00,0x00,0x00,0xff,0xff]
+#CHECK: oc	0(1,%r1), 0             # encoding: [0xd6,0x00,0x10,0x00,0x00,0x00]
+#CHECK: oc	0(1,%r15), 0            # encoding: [0xd6,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: oc	4095(1,%r1), 0          # encoding: [0xd6,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: oc	4095(1,%r15), 0         # encoding: [0xd6,0x00,0xff,0xff,0x00,0x00]
+#CHECK: oc	0(256,%r1), 0           # encoding: [0xd6,0xff,0x10,0x00,0x00,0x00]
+#CHECK: oc	0(256,%r15), 0          # encoding: [0xd6,0xff,0xf0,0x00,0x00,0x00]
+
+	oc	0(1), 0
+	oc	0(1), 0(%r1)
+	oc	0(1), 0(%r15)
+	oc	0(1), 4095
+	oc	0(1), 4095(%r1)
+	oc	0(1), 4095(%r15)
+	oc	0(1,%r1), 0
+	oc	0(1,%r15), 0
+	oc	4095(1,%r1), 0
+	oc	4095(1,%r15), 0
+	oc	0(256,%r1), 0
+	oc	0(256,%r15), 0
+
+#CHECK: og	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x81]
+#CHECK: og	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x81]
+#CHECK: og	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x81]
+#CHECK: og	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x81]
+#CHECK: og	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x81]
+#CHECK: og	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x81]
+#CHECK: og	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x81]
+#CHECK: og	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x81]
+#CHECK: og	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x81]
+#CHECK: og	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x81]
+
+	og	%r0, -524288
+	og	%r0, -1
+	og	%r0, 0
+	og	%r0, 1
+	og	%r0, 524287
+	og	%r0, 0(%r1)
+	og	%r0, 0(%r15)
+	og	%r0, 524287(%r1,%r15)
+	og	%r0, 524287(%r15,%r1)
+	og	%r15, 0
+
+#CHECK: ogr	%r0, %r0                # encoding: [0xb9,0x81,0x00,0x00]
+#CHECK: ogr	%r0, %r15               # encoding: [0xb9,0x81,0x00,0x0f]
+#CHECK: ogr	%r15, %r0               # encoding: [0xb9,0x81,0x00,0xf0]
+#CHECK: ogr	%r7, %r8                # encoding: [0xb9,0x81,0x00,0x78]
+
+	ogr	%r0,%r0
+	ogr	%r0,%r15
+	ogr	%r15,%r0
+	ogr	%r7,%r8
+
+#CHECK: oi	0, 0                    # encoding: [0x96,0x00,0x00,0x00]
+#CHECK: oi	4095, 0                 # encoding: [0x96,0x00,0x0f,0xff]
+#CHECK: oi	0, 255                  # encoding: [0x96,0xff,0x00,0x00]
+#CHECK: oi	0(%r1), 42              # encoding: [0x96,0x2a,0x10,0x00]
+#CHECK: oi	0(%r15), 42             # encoding: [0x96,0x2a,0xf0,0x00]
+#CHECK: oi	4095(%r1), 42           # encoding: [0x96,0x2a,0x1f,0xff]
+#CHECK: oi	4095(%r15), 42          # encoding: [0x96,0x2a,0xff,0xff]
+
+	oi	0, 0
+	oi	4095, 0
+	oi	0, 255
+	oi	0(%r1), 42
+	oi	0(%r15), 42
+	oi	4095(%r1), 42
+	oi	4095(%r15), 42
+
+#CHECK: oihf	%r0, 0                  # encoding: [0xc0,0x0c,0x00,0x00,0x00,0x00]
+#CHECK: oihf	%r0, 4294967295         # encoding: [0xc0,0x0c,0xff,0xff,0xff,0xff]
+#CHECK: oihf	%r15, 0                 # encoding: [0xc0,0xfc,0x00,0x00,0x00,0x00]
+
+	oihf	%r0, 0
+	oihf	%r0, 0xffffffff
+	oihf	%r15, 0
+
+#CHECK: oihh	%r0, 0                  # encoding: [0xa5,0x08,0x00,0x00]
+#CHECK: oihh	%r0, 32768              # encoding: [0xa5,0x08,0x80,0x00]
+#CHECK: oihh	%r0, 65535              # encoding: [0xa5,0x08,0xff,0xff]
+#CHECK: oihh	%r15, 0                 # encoding: [0xa5,0xf8,0x00,0x00]
+
+	oihh	%r0, 0
+	oihh	%r0, 0x8000
+	oihh	%r0, 0xffff
+	oihh	%r15, 0
+
+#CHECK: oihl	%r0, 0                  # encoding: [0xa5,0x09,0x00,0x00]
+#CHECK: oihl	%r0, 32768              # encoding: [0xa5,0x09,0x80,0x00]
+#CHECK: oihl	%r0, 65535              # encoding: [0xa5,0x09,0xff,0xff]
+#CHECK: oihl	%r15, 0                 # encoding: [0xa5,0xf9,0x00,0x00]
+
+	oihl	%r0, 0
+	oihl	%r0, 0x8000
+	oihl	%r0, 0xffff
+	oihl	%r15, 0
+
+#CHECK: oilf	%r0, 0                  # encoding: [0xc0,0x0d,0x00,0x00,0x00,0x00]
+#CHECK: oilf	%r0, 4294967295         # encoding: [0xc0,0x0d,0xff,0xff,0xff,0xff]
+#CHECK: oilf	%r15, 0                 # encoding: [0xc0,0xfd,0x00,0x00,0x00,0x00]
+
+	oilf	%r0, 0
+	oilf	%r0, 0xffffffff
+	oilf	%r15, 0
+
+#CHECK: oilh	%r0, 0                  # encoding: [0xa5,0x0a,0x00,0x00]
+#CHECK: oilh	%r0, 32768              # encoding: [0xa5,0x0a,0x80,0x00]
+#CHECK: oilh	%r0, 65535              # encoding: [0xa5,0x0a,0xff,0xff]
+#CHECK: oilh	%r15, 0                 # encoding: [0xa5,0xfa,0x00,0x00]
+
+	oilh	%r0, 0
+	oilh	%r0, 0x8000
+	oilh	%r0, 0xffff
+	oilh	%r15, 0
+
+#CHECK: oill	%r0, 0                  # encoding: [0xa5,0x0b,0x00,0x00]
+#CHECK: oill	%r0, 32768              # encoding: [0xa5,0x0b,0x80,0x00]
+#CHECK: oill	%r0, 65535              # encoding: [0xa5,0x0b,0xff,0xff]
+#CHECK: oill	%r15, 0                 # encoding: [0xa5,0xfb,0x00,0x00]
+
+	oill	%r0, 0
+	oill	%r0, 0x8000
+	oill	%r0, 0xffff
+	oill	%r15, 0
+
+#CHECK: oiy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x56]
+#CHECK: oiy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x56]
+#CHECK: oiy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x56]
+#CHECK: oiy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x56]
+#CHECK: oiy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x56]
+#CHECK: oiy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x56]
+#CHECK: oiy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x56]
+#CHECK: oiy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x56]
+#CHECK: oiy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x56]
+#CHECK: oiy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x56]
+
+	oiy	-524288, 0
+	oiy	-1, 0
+	oiy	0, 0
+	oiy	1, 0
+	oiy	524287, 0
+	oiy	0, 255
+	oiy	0(%r1), 42
+	oiy	0(%r15), 42
+	oiy	524287(%r1), 42
+	oiy	524287(%r15), 42
+
+#CHECK: or	%r0, %r0                # encoding: [0x16,0x00]
+#CHECK: or	%r0, %r15               # encoding: [0x16,0x0f]
+#CHECK: or	%r15, %r0               # encoding: [0x16,0xf0]
+#CHECK: or	%r7, %r8                # encoding: [0x16,0x78]
+
+	or	%r0,%r0
+	or	%r0,%r15
+	or	%r15,%r0
+	or	%r7,%r8
+
+#CHECK: oy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x56]
+#CHECK: oy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x56]
+#CHECK: oy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x56]
+#CHECK: oy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x56]
+#CHECK: oy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x56]
+#CHECK: oy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x56]
+#CHECK: oy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x56]
+#CHECK: oy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x56]
+#CHECK: oy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x56]
+#CHECK: oy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x56]
+
+	oy	%r0, -524288
+	oy	%r0, -1
+	oy	%r0, 0
+	oy	%r0, 1
+	oy	%r0, 524287
+	oy	%r0, 0(%r1)
+	oy	%r0, 0(%r15)
+	oy	%r0, 524287(%r1,%r15)
+	oy	%r0, 524287(%r15,%r1)
+	oy	%r15, 0
+
+#CHECK: pfd	0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x36]
+#CHECK: pfd	0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x36]
+#CHECK: pfd	0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x36]
+#CHECK: pfd	0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x36]
+#CHECK: pfd	0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x36]
+#CHECK: pfd	0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x36]
+#CHECK: pfd	0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x36]
+#CHECK: pfd	0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x36]
+#CHECK: pfd	0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x36]
+#CHECK: pfd	15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x36]
+
+	pfd	0, -524288
+	pfd	0, -1
+	pfd	0, 0
+	pfd	0, 1
+	pfd	0, 524287
+	pfd	0, 0(%r1)
+	pfd	0, 0(%r15)
+	pfd	0, 524287(%r1,%r15)
+	pfd	0, 524287(%r15,%r1)
+	pfd	15, 0
+
+#CHECK: pfdrl	0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	pfdrl	0, -0x100000000
+#CHECK: pfdrl	0, .[[LAB:L.*]]-2	# encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	pfdrl	0, -2
+#CHECK: pfdrl	0, .[[LAB:L.*]]	# encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	pfdrl	0, 0
+#CHECK: pfdrl	0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	pfdrl	0, 0xfffffffe
+
+#CHECK: pfdrl	0, foo                # encoding: [0xc6,0x02,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl	15, foo               # encoding: [0xc6,0xf2,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	pfdrl	0, foo
+	pfdrl	15, foo
+
+#CHECK: pfdrl	3, bar+100            # encoding: [0xc6,0x32,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl	4, bar+100            # encoding: [0xc6,0x42,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	pfdrl	3, bar+100
+	pfdrl	4, bar+100
+
+#CHECK: pfdrl	7, frob@PLT           # encoding: [0xc6,0x72,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl	8, frob@PLT           # encoding: [0xc6,0x82,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	pfdrl	7, frob@PLT
+	pfdrl	8, frob@PLT
+
+#CHECK: risbg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
+#CHECK: risbg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
+#CHECK: risbg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x55]
+#CHECK: risbg	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x55]
+#CHECK: risbg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x55]
+#CHECK: risbg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x55]
+#CHECK: risbg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x55]
+
+	risbg	%r0,%r0,0,0,0
+	risbg	%r0,%r0,0,0,63
+	risbg	%r0,%r0,0,255,0
+	risbg	%r0,%r0,255,0,0
+	risbg	%r0,%r15,0,0,0
+	risbg	%r15,%r0,0,0,0
+	risbg	%r4,%r5,6,7,8
+
+#CHECK: rnsbg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x54]
+#CHECK: rnsbg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x54]
+#CHECK: rnsbg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x54]
+#CHECK: rnsbg	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x54]
+#CHECK: rnsbg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x54]
+#CHECK: rnsbg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x54]
+#CHECK: rnsbg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x54]
+
+	rnsbg	%r0,%r0,0,0,0
+	rnsbg	%r0,%r0,0,0,63
+	rnsbg	%r0,%r0,0,255,0
+	rnsbg	%r0,%r0,255,0,0
+	rnsbg	%r0,%r15,0,0,0
+	rnsbg	%r15,%r0,0,0,0
+	rnsbg	%r4,%r5,6,7,8
+
+#CHECK: rosbg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x56]
+#CHECK: rosbg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x56]
+#CHECK: rosbg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x56]
+#CHECK: rosbg	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x56]
+#CHECK: rosbg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x56]
+#CHECK: rosbg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x56]
+#CHECK: rosbg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x56]
+
+	rosbg	%r0,%r0,0,0,0
+	rosbg	%r0,%r0,0,0,63
+	rosbg	%r0,%r0,0,255,0
+	rosbg	%r0,%r0,255,0,0
+	rosbg	%r0,%r15,0,0,0
+	rosbg	%r15,%r0,0,0,0
+	rosbg	%r4,%r5,6,7,8
+
+#CHECK: rxsbg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x57]
+#CHECK: rxsbg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x57]
+#CHECK: rxsbg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x57]
+#CHECK: rxsbg	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x57]
+#CHECK: rxsbg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x57]
+#CHECK: rxsbg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x57]
+#CHECK: rxsbg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x57]
+
+	rxsbg	%r0,%r0,0,0,0
+	rxsbg	%r0,%r0,0,0,63
+	rxsbg	%r0,%r0,0,255,0
+	rxsbg	%r0,%r0,255,0,0
+	rxsbg	%r0,%r15,0,0,0
+	rxsbg	%r15,%r0,0,0,0
+	rxsbg	%r4,%r5,6,7,8
+
+#CHECK: rll	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x1d]
+#CHECK: rll	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x1d]
+#CHECK: rll	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x1d]
+#CHECK: rll	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x1d]
+#CHECK: rll	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x1d]
+#CHECK: rll	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x1d]
+#CHECK: rll	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x1d]
+#CHECK: rll	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x1d]
+
+	rll	%r0,%r0,0
+	rll	%r15,%r1,0
+	rll	%r1,%r15,0
+	rll	%r15,%r15,0
+	rll	%r0,%r0,-524288
+	rll	%r0,%r0,-1
+	rll	%r0,%r0,1
+	rll	%r0,%r0,524287
+	rll	%r0,%r0,0(%r1)
+	rll	%r0,%r0,0(%r15)
+	rll	%r0,%r0,524287(%r1)
+	rll	%r0,%r0,524287(%r15)
+
+#CHECK: rllg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x1c]
+#CHECK: rllg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x1c]
+#CHECK: rllg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x1c]
+#CHECK: rllg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x1c]
+#CHECK: rllg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x1c]
+
+	rllg	%r0,%r0,0
+	rllg	%r15,%r1,0
+	rllg	%r1,%r15,0
+	rllg	%r15,%r15,0
+	rllg	%r0,%r0,-524288
+	rllg	%r0,%r0,-1
+	rllg	%r0,%r0,1
+	rllg	%r0,%r0,524287
+	rllg	%r0,%r0,0(%r1)
+	rllg	%r0,%r0,0(%r15)
+	rllg	%r0,%r0,524287(%r1)
+	rllg	%r0,%r0,524287(%r15)
+
+#CHECK: s	%r0, 0                  # encoding: [0x5b,0x00,0x00,0x00]
+#CHECK: s	%r0, 4095               # encoding: [0x5b,0x00,0x0f,0xff]
+#CHECK: s	%r0, 0(%r1)             # encoding: [0x5b,0x00,0x10,0x00]
+#CHECK: s	%r0, 0(%r15)            # encoding: [0x5b,0x00,0xf0,0x00]
+#CHECK: s	%r0, 4095(%r1,%r15)     # encoding: [0x5b,0x01,0xff,0xff]
+#CHECK: s	%r0, 4095(%r15,%r1)     # encoding: [0x5b,0x0f,0x1f,0xff]
+#CHECK: s	%r15, 0                 # encoding: [0x5b,0xf0,0x00,0x00]
+
+	s	%r0, 0
+	s	%r0, 4095
+	s	%r0, 0(%r1)
+	s	%r0, 0(%r15)
+	s	%r0, 4095(%r1,%r15)
+	s	%r0, 4095(%r15,%r1)
+	s	%r15, 0
+
+#CHECK: sdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1b]
+#CHECK: sdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1b]
+#CHECK: sdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1b]
+#CHECK: sdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1b]
+#CHECK: sdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1b]
+#CHECK: sdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1b]
+#CHECK: sdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1b]
+
+	sdb	%f0, 0
+	sdb	%f0, 4095
+	sdb	%f0, 0(%r1)
+	sdb	%f0, 0(%r15)
+	sdb	%f0, 4095(%r1,%r15)
+	sdb	%f0, 4095(%r15,%r1)
+	sdb	%f15, 0
+
+#CHECK: sdbr	%f0, %f0                # encoding: [0xb3,0x1b,0x00,0x00]
+#CHECK: sdbr	%f0, %f15               # encoding: [0xb3,0x1b,0x00,0x0f]
+#CHECK: sdbr	%f7, %f8                # encoding: [0xb3,0x1b,0x00,0x78]
+#CHECK: sdbr	%f15, %f0               # encoding: [0xb3,0x1b,0x00,0xf0]
+
+	sdbr	%f0, %f0
+	sdbr	%f0, %f15
+	sdbr	%f7, %f8
+	sdbr	%f15, %f0
+
+#CHECK: seb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0b]
+#CHECK: seb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0b]
+#CHECK: seb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0b]
+#CHECK: seb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0b]
+#CHECK: seb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0b]
+#CHECK: seb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0b]
+#CHECK: seb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0b]
+
+	seb	%f0, 0
+	seb	%f0, 4095
+	seb	%f0, 0(%r1)
+	seb	%f0, 0(%r15)
+	seb	%f0, 4095(%r1,%r15)
+	seb	%f0, 4095(%r15,%r1)
+	seb	%f15, 0
+
+#CHECK: sebr	%f0, %f0                # encoding: [0xb3,0x0b,0x00,0x00]
+#CHECK: sebr	%f0, %f15               # encoding: [0xb3,0x0b,0x00,0x0f]
+#CHECK: sebr	%f7, %f8                # encoding: [0xb3,0x0b,0x00,0x78]
+#CHECK: sebr	%f15, %f0               # encoding: [0xb3,0x0b,0x00,0xf0]
+
+	sebr	%f0, %f0
+	sebr	%f0, %f15
+	sebr	%f7, %f8
+	sebr	%f15, %f0
+
+#CHECK: sg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x09]
+#CHECK: sg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x09]
+#CHECK: sg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x09]
+#CHECK: sg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x09]
+#CHECK: sg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x09]
+#CHECK: sg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x09]
+#CHECK: sg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x09]
+#CHECK: sg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x09]
+#CHECK: sg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x09]
+#CHECK: sg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x09]
+
+	sg	%r0, -524288
+	sg	%r0, -1
+	sg	%r0, 0
+	sg	%r0, 1
+	sg	%r0, 524287
+	sg	%r0, 0(%r1)
+	sg	%r0, 0(%r15)
+	sg	%r0, 524287(%r1,%r15)
+	sg	%r0, 524287(%r15,%r1)
+	sg	%r15, 0
+
+#CHECK: sgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x19]
+#CHECK: sgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x19]
+#CHECK: sgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x19]
+#CHECK: sgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x19]
+#CHECK: sgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x19]
+#CHECK: sgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x19]
+#CHECK: sgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x19]
+#CHECK: sgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x19]
+#CHECK: sgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x19]
+#CHECK: sgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x19]
+
+	sgf	%r0, -524288
+	sgf	%r0, -1
+	sgf	%r0, 0
+	sgf	%r0, 1
+	sgf	%r0, 524287
+	sgf	%r0, 0(%r1)
+	sgf	%r0, 0(%r15)
+	sgf	%r0, 524287(%r1,%r15)
+	sgf	%r0, 524287(%r15,%r1)
+	sgf	%r15, 0
+
+#CHECK: sgfr	%r0, %r0                # encoding: [0xb9,0x19,0x00,0x00]
+#CHECK: sgfr	%r0, %r15               # encoding: [0xb9,0x19,0x00,0x0f]
+#CHECK: sgfr	%r15, %r0               # encoding: [0xb9,0x19,0x00,0xf0]
+#CHECK: sgfr	%r7, %r8                # encoding: [0xb9,0x19,0x00,0x78]
+
+	sgfr	%r0,%r0
+	sgfr	%r0,%r15
+	sgfr	%r15,%r0
+	sgfr	%r7,%r8
+
+#CHECK: sgr	%r0, %r0                # encoding: [0xb9,0x09,0x00,0x00]
+#CHECK: sgr	%r0, %r15               # encoding: [0xb9,0x09,0x00,0x0f]
+#CHECK: sgr	%r15, %r0               # encoding: [0xb9,0x09,0x00,0xf0]
+#CHECK: sgr	%r7, %r8                # encoding: [0xb9,0x09,0x00,0x78]
+
+	sgr	%r0,%r0
+	sgr	%r0,%r15
+	sgr	%r15,%r0
+	sgr	%r7,%r8
+
+#CHECK: sh	%r0, 0                  # encoding: [0x4b,0x00,0x00,0x00]
+#CHECK: sh	%r0, 4095               # encoding: [0x4b,0x00,0x0f,0xff]
+#CHECK: sh	%r0, 0(%r1)             # encoding: [0x4b,0x00,0x10,0x00]
+#CHECK: sh	%r0, 0(%r15)            # encoding: [0x4b,0x00,0xf0,0x00]
+#CHECK: sh	%r0, 4095(%r1,%r15)     # encoding: [0x4b,0x01,0xff,0xff]
+#CHECK: sh	%r0, 4095(%r15,%r1)     # encoding: [0x4b,0x0f,0x1f,0xff]
+#CHECK: sh	%r15, 0                 # encoding: [0x4b,0xf0,0x00,0x00]
+
+	sh	%r0, 0
+	sh	%r0, 4095
+	sh	%r0, 0(%r1)
+	sh	%r0, 0(%r15)
+	sh	%r0, 4095(%r1,%r15)
+	sh	%r0, 4095(%r15,%r1)
+	sh	%r15, 0
+
+#CHECK: shy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x7b]
+#CHECK: shy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x7b]
+#CHECK: shy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x7b]
+#CHECK: shy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x7b]
+#CHECK: shy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x7b]
+#CHECK: shy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x7b]
+#CHECK: shy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x7b]
+#CHECK: shy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x7b]
+#CHECK: shy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x7b]
+#CHECK: shy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x7b]
+
+	shy	%r0, -524288
+	shy	%r0, -1
+	shy	%r0, 0
+	shy	%r0, 1
+	shy	%r0, 524287
+	shy	%r0, 0(%r1)
+	shy	%r0, 0(%r15)
+	shy	%r0, 524287(%r1,%r15)
+	shy	%r0, 524287(%r15,%r1)
+	shy	%r15, 0
+
+#CHECK: sl	%r0, 0                  # encoding: [0x5f,0x00,0x00,0x00]
+#CHECK: sl	%r0, 4095               # encoding: [0x5f,0x00,0x0f,0xff]
+#CHECK: sl	%r0, 0(%r1)             # encoding: [0x5f,0x00,0x10,0x00]
+#CHECK: sl	%r0, 0(%r15)            # encoding: [0x5f,0x00,0xf0,0x00]
+#CHECK: sl	%r0, 4095(%r1,%r15)     # encoding: [0x5f,0x01,0xff,0xff]
+#CHECK: sl	%r0, 4095(%r15,%r1)     # encoding: [0x5f,0x0f,0x1f,0xff]
+#CHECK: sl	%r15, 0                 # encoding: [0x5f,0xf0,0x00,0x00]
+
+	sl	%r0, 0
+	sl	%r0, 4095
+	sl	%r0, 0(%r1)
+	sl	%r0, 0(%r15)
+	sl	%r0, 4095(%r1,%r15)
+	sl	%r0, 4095(%r15,%r1)
+	sl	%r15, 0
+
+#CHECK: slb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x99]
+#CHECK: slb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x99]
+#CHECK: slb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x99]
+#CHECK: slb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x99]
+#CHECK: slb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x99]
+#CHECK: slb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x99]
+#CHECK: slb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x99]
+#CHECK: slb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x99]
+#CHECK: slb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x99]
+#CHECK: slb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x99]
+
+	slb	%r0, -524288
+	slb	%r0, -1
+	slb	%r0, 0
+	slb	%r0, 1
+	slb	%r0, 524287
+	slb	%r0, 0(%r1)
+	slb	%r0, 0(%r15)
+	slb	%r0, 524287(%r1,%r15)
+	slb	%r0, 524287(%r15,%r1)
+	slb	%r15, 0
+
+#CHECK: slbg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x89]
+#CHECK: slbg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x89]
+#CHECK: slbg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x89]
+#CHECK: slbg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x89]
+#CHECK: slbg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x89]
+#CHECK: slbg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x89]
+#CHECK: slbg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x89]
+#CHECK: slbg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x89]
+#CHECK: slbg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x89]
+#CHECK: slbg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x89]
+
+	slbg	%r0, -524288
+	slbg	%r0, -1
+	slbg	%r0, 0
+	slbg	%r0, 1
+	slbg	%r0, 524287
+	slbg	%r0, 0(%r1)
+	slbg	%r0, 0(%r15)
+	slbg	%r0, 524287(%r1,%r15)
+	slbg	%r0, 524287(%r15,%r1)
+	slbg	%r15, 0
+
+#CHECK: slbgr	%r0, %r0                # encoding: [0xb9,0x89,0x00,0x00]
+#CHECK: slbgr	%r0, %r15               # encoding: [0xb9,0x89,0x00,0x0f]
+#CHECK: slbgr	%r15, %r0               # encoding: [0xb9,0x89,0x00,0xf0]
+#CHECK: slbgr	%r7, %r8                # encoding: [0xb9,0x89,0x00,0x78]
+
+	slbgr	%r0,%r0
+	slbgr	%r0,%r15
+	slbgr	%r15,%r0
+	slbgr	%r7,%r8
+
+#CHECK: slbr	%r0, %r0                # encoding: [0xb9,0x99,0x00,0x00]
+#CHECK: slbr	%r0, %r15               # encoding: [0xb9,0x99,0x00,0x0f]
+#CHECK: slbr	%r15, %r0               # encoding: [0xb9,0x99,0x00,0xf0]
+#CHECK: slbr	%r7, %r8                # encoding: [0xb9,0x99,0x00,0x78]
+
+	slbr	%r0,%r0
+	slbr	%r0,%r15
+	slbr	%r15,%r0
+	slbr	%r7,%r8
+
+#CHECK: slfi	%r0, 0                  # encoding: [0xc2,0x05,0x00,0x00,0x00,0x00]
+#CHECK: slfi	%r0, 4294967295         # encoding: [0xc2,0x05,0xff,0xff,0xff,0xff]
+#CHECK: slfi	%r15, 0                 # encoding: [0xc2,0xf5,0x00,0x00,0x00,0x00]
+
+	slfi	%r0, 0
+	slfi	%r0, (1 << 32) - 1
+	slfi	%r15, 0
+
+#CHECK: slg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0b]
+#CHECK: slg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0b]
+#CHECK: slg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0b]
+#CHECK: slg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0b]
+#CHECK: slg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0b]
+#CHECK: slg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0b]
+#CHECK: slg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0b]
+#CHECK: slg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0b]
+#CHECK: slg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0b]
+#CHECK: slg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0b]
+
+	slg	%r0, -524288
+	slg	%r0, -1
+	slg	%r0, 0
+	slg	%r0, 1
+	slg	%r0, 524287
+	slg	%r0, 0(%r1)
+	slg	%r0, 0(%r15)
+	slg	%r0, 524287(%r1,%r15)
+	slg	%r0, 524287(%r15,%r1)
+	slg	%r15, 0
+
+#CHECK: slgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1b]
+#CHECK: slgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1b]
+#CHECK: slgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1b]
+#CHECK: slgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1b]
+#CHECK: slgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1b]
+#CHECK: slgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1b]
+#CHECK: slgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1b]
+#CHECK: slgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1b]
+#CHECK: slgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1b]
+#CHECK: slgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1b]
+
+	slgf	%r0, -524288
+	slgf	%r0, -1
+	slgf	%r0, 0
+	slgf	%r0, 1
+	slgf	%r0, 524287
+	slgf	%r0, 0(%r1)
+	slgf	%r0, 0(%r15)
+	slgf	%r0, 524287(%r1,%r15)
+	slgf	%r0, 524287(%r15,%r1)
+	slgf	%r15, 0
+
+#CHECK: slgfi	%r0, 0                  # encoding: [0xc2,0x04,0x00,0x00,0x00,0x00]
+#CHECK: slgfi	%r0, 4294967295         # encoding: [0xc2,0x04,0xff,0xff,0xff,0xff]
+#CHECK: slgfi	%r15, 0                 # encoding: [0xc2,0xf4,0x00,0x00,0x00,0x00]
+
+	slgfi	%r0, 0
+	slgfi	%r0, (1 << 32) - 1
+	slgfi	%r15, 0
+
+#CHECK: slgfr	%r0, %r0                # encoding: [0xb9,0x1b,0x00,0x00]
+#CHECK: slgfr	%r0, %r15               # encoding: [0xb9,0x1b,0x00,0x0f]
+#CHECK: slgfr	%r15, %r0               # encoding: [0xb9,0x1b,0x00,0xf0]
+#CHECK: slgfr	%r7, %r8                # encoding: [0xb9,0x1b,0x00,0x78]
+
+	slgfr	%r0,%r0
+	slgfr	%r0,%r15
+	slgfr	%r15,%r0
+	slgfr	%r7,%r8
+
+#CHECK: slgr	%r0, %r0                # encoding: [0xb9,0x0b,0x00,0x00]
+#CHECK: slgr	%r0, %r15               # encoding: [0xb9,0x0b,0x00,0x0f]
+#CHECK: slgr	%r15, %r0               # encoding: [0xb9,0x0b,0x00,0xf0]
+#CHECK: slgr	%r7, %r8                # encoding: [0xb9,0x0b,0x00,0x78]
+
+	slgr	%r0,%r0
+	slgr	%r0,%r15
+	slgr	%r15,%r0
+	slgr	%r7,%r8
+
+#CHECK: sll	%r0, 0                  # encoding: [0x89,0x00,0x00,0x00]
+#CHECK: sll	%r7, 0                  # encoding: [0x89,0x70,0x00,0x00]
+#CHECK: sll	%r15, 0                 # encoding: [0x89,0xf0,0x00,0x00]
+#CHECK: sll	%r0, 4095               # encoding: [0x89,0x00,0x0f,0xff]
+#CHECK: sll	%r0, 0(%r1)             # encoding: [0x89,0x00,0x10,0x00]
+#CHECK: sll	%r0, 0(%r15)            # encoding: [0x89,0x00,0xf0,0x00]
+#CHECK: sll	%r0, 4095(%r1)          # encoding: [0x89,0x00,0x1f,0xff]
+#CHECK: sll	%r0, 4095(%r15)         # encoding: [0x89,0x00,0xff,0xff]
+
+	sll	%r0,0
+	sll	%r7,0
+	sll	%r15,0
+	sll	%r0,4095
+	sll	%r0,0(%r1)
+	sll	%r0,0(%r15)
+	sll	%r0,4095(%r1)
+	sll	%r0,4095(%r15)
+
+#CHECK: sllg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0d]
+#CHECK: sllg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0d]
+#CHECK: sllg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0d]
+#CHECK: sllg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0d]
+#CHECK: sllg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0d]
+
+	sllg	%r0,%r0,0
+	sllg	%r15,%r1,0
+	sllg	%r1,%r15,0
+	sllg	%r15,%r15,0
+	sllg	%r0,%r0,-524288
+	sllg	%r0,%r0,-1
+	sllg	%r0,%r0,1
+	sllg	%r0,%r0,524287
+	sllg	%r0,%r0,0(%r1)
+	sllg	%r0,%r0,0(%r15)
+	sllg	%r0,%r0,524287(%r1)
+	sllg	%r0,%r0,524287(%r15)
+
+#CHECK: slr	%r0, %r0                # encoding: [0x1f,0x00]
+#CHECK: slr	%r0, %r15               # encoding: [0x1f,0x0f]
+#CHECK: slr	%r15, %r0               # encoding: [0x1f,0xf0]
+#CHECK: slr	%r7, %r8                # encoding: [0x1f,0x78]
+
+	slr	%r0,%r0
+	slr	%r0,%r15
+	slr	%r15,%r0
+	slr	%r7,%r8
+
+#CHECK: sly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5f]
+#CHECK: sly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5f]
+#CHECK: sly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5f]
+#CHECK: sly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5f]
+#CHECK: sly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5f]
+#CHECK: sly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5f]
+#CHECK: sly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5f]
+#CHECK: sly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5f]
+#CHECK: sly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5f]
+#CHECK: sly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5f]
+
+	sly	%r0, -524288
+	sly	%r0, -1
+	sly	%r0, 0
+	sly	%r0, 1
+	sly	%r0, 524287
+	sly	%r0, 0(%r1)
+	sly	%r0, 0(%r15)
+	sly	%r0, 524287(%r1,%r15)
+	sly	%r0, 524287(%r15,%r1)
+	sly	%r15, 0
+
+#CHECK: sqdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x15]
+#CHECK: sqdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x15]
+#CHECK: sqdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x15]
+#CHECK: sqdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x15]
+#CHECK: sqdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x15]
+#CHECK: sqdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x15]
+#CHECK: sqdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x15]
+
+	sqdb	%f0, 0
+	sqdb	%f0, 4095
+	sqdb	%f0, 0(%r1)
+	sqdb	%f0, 0(%r15)
+	sqdb	%f0, 4095(%r1,%r15)
+	sqdb	%f0, 4095(%r15,%r1)
+	sqdb	%f15, 0
+
+#CHECK: sqdbr	%f0, %f0                # encoding: [0xb3,0x15,0x00,0x00]
+#CHECK: sqdbr	%f0, %f15               # encoding: [0xb3,0x15,0x00,0x0f]
+#CHECK: sqdbr	%f7, %f8                # encoding: [0xb3,0x15,0x00,0x78]
+#CHECK: sqdbr	%f15, %f0               # encoding: [0xb3,0x15,0x00,0xf0]
+
+	sqdbr	%f0, %f0
+	sqdbr	%f0, %f15
+	sqdbr	%f7, %f8
+	sqdbr	%f15, %f0
+
+#CHECK: sqeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x14]
+#CHECK: sqeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x14]
+#CHECK: sqeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x14]
+#CHECK: sqeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x14]
+#CHECK: sqeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x14]
+#CHECK: sqeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x14]
+#CHECK: sqeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x14]
+
+	sqeb	%f0, 0
+	sqeb	%f0, 4095
+	sqeb	%f0, 0(%r1)
+	sqeb	%f0, 0(%r15)
+	sqeb	%f0, 4095(%r1,%r15)
+	sqeb	%f0, 4095(%r15,%r1)
+	sqeb	%f15, 0
+
+#CHECK: sqebr	%f0, %f0                # encoding: [0xb3,0x14,0x00,0x00]
+#CHECK: sqebr	%f0, %f15               # encoding: [0xb3,0x14,0x00,0x0f]
+#CHECK: sqebr	%f7, %f8                # encoding: [0xb3,0x14,0x00,0x78]
+#CHECK: sqebr	%f15, %f0               # encoding: [0xb3,0x14,0x00,0xf0]
+
+	sqebr	%f0, %f0
+	sqebr	%f0, %f15
+	sqebr	%f7, %f8
+	sqebr	%f15, %f0
+
+#CHECK: sqxbr	%f0, %f0                # encoding: [0xb3,0x16,0x00,0x00]
+#CHECK: sqxbr	%f0, %f13               # encoding: [0xb3,0x16,0x00,0x0d]
+#CHECK: sqxbr	%f8, %f8                # encoding: [0xb3,0x16,0x00,0x88]
+#CHECK: sqxbr	%f13, %f0               # encoding: [0xb3,0x16,0x00,0xd0]
+
+	sqxbr	%f0, %f0
+	sqxbr	%f0, %f13
+	sqxbr	%f8, %f8
+	sqxbr	%f13, %f0
+
+#CHECK: sr	%r0, %r0                # encoding: [0x1b,0x00]
+#CHECK: sr	%r0, %r15               # encoding: [0x1b,0x0f]
+#CHECK: sr	%r15, %r0               # encoding: [0x1b,0xf0]
+#CHECK: sr	%r7, %r8                # encoding: [0x1b,0x78]
+
+	sr	%r0,%r0
+	sr	%r0,%r15
+	sr	%r15,%r0
+	sr	%r7,%r8
+
+#CHECK: sra	%r0, 0                  # encoding: [0x8a,0x00,0x00,0x00]
+#CHECK: sra	%r7, 0                  # encoding: [0x8a,0x70,0x00,0x00]
+#CHECK: sra	%r15, 0                 # encoding: [0x8a,0xf0,0x00,0x00]
+#CHECK: sra	%r0, 4095               # encoding: [0x8a,0x00,0x0f,0xff]
+#CHECK: sra	%r0, 0(%r1)             # encoding: [0x8a,0x00,0x10,0x00]
+#CHECK: sra	%r0, 0(%r15)            # encoding: [0x8a,0x00,0xf0,0x00]
+#CHECK: sra	%r0, 4095(%r1)          # encoding: [0x8a,0x00,0x1f,0xff]
+#CHECK: sra	%r0, 4095(%r15)         # encoding: [0x8a,0x00,0xff,0xff]
+
+	sra	%r0,0
+	sra	%r7,0
+	sra	%r15,0
+	sra	%r0,4095
+	sra	%r0,0(%r1)
+	sra	%r0,0(%r15)
+	sra	%r0,4095(%r1)
+	sra	%r0,4095(%r15)
+
+#CHECK: srag	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0a]
+#CHECK: srag	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0a]
+#CHECK: srag	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0a]
+#CHECK: srag	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0a]
+#CHECK: srag	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0a]
+#CHECK: srag	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: srag	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0a]
+#CHECK: srag	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0a]
+
+	srag	%r0,%r0,0
+	srag	%r15,%r1,0
+	srag	%r1,%r15,0
+	srag	%r15,%r15,0
+	srag	%r0,%r0,-524288
+	srag	%r0,%r0,-1
+	srag	%r0,%r0,1
+	srag	%r0,%r0,524287
+	srag	%r0,%r0,0(%r1)
+	srag	%r0,%r0,0(%r15)
+	srag	%r0,%r0,524287(%r1)
+	srag	%r0,%r0,524287(%r15)
+
+#CHECK: srl	%r0, 0                  # encoding: [0x88,0x00,0x00,0x00]
+#CHECK: srl	%r7, 0                  # encoding: [0x88,0x70,0x00,0x00]
+#CHECK: srl	%r15, 0                 # encoding: [0x88,0xf0,0x00,0x00]
+#CHECK: srl	%r0, 4095               # encoding: [0x88,0x00,0x0f,0xff]
+#CHECK: srl	%r0, 0(%r1)             # encoding: [0x88,0x00,0x10,0x00]
+#CHECK: srl	%r0, 0(%r15)            # encoding: [0x88,0x00,0xf0,0x00]
+#CHECK: srl	%r0, 4095(%r1)          # encoding: [0x88,0x00,0x1f,0xff]
+#CHECK: srl	%r0, 4095(%r15)         # encoding: [0x88,0x00,0xff,0xff]
+
+	srl	%r0,0
+	srl	%r7,0
+	srl	%r15,0
+	srl	%r0,4095
+	srl	%r0,0(%r1)
+	srl	%r0,0(%r15)
+	srl	%r0,4095(%r1)
+	srl	%r0,4095(%r15)
+
+#CHECK: srlg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0c]
+#CHECK: srlg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0c]
+#CHECK: srlg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0c]
+#CHECK: srlg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0c]
+#CHECK: srlg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0c]
+
+	srlg	%r0,%r0,0
+	srlg	%r15,%r1,0
+	srlg	%r1,%r15,0
+	srlg	%r15,%r15,0
+	srlg	%r0,%r0,-524288
+	srlg	%r0,%r0,-1
+	srlg	%r0,%r0,1
+	srlg	%r0,%r0,524287
+	srlg	%r0,%r0,0(%r1)
+	srlg	%r0,%r0,0(%r15)
+	srlg	%r0,%r0,524287(%r1)
+	srlg	%r0,%r0,524287(%r15)
+
+#CHECK: srst	%r0, %r0                # encoding: [0xb2,0x5e,0x00,0x00]
+#CHECK: srst	%r0, %r15               # encoding: [0xb2,0x5e,0x00,0x0f]
+#CHECK: srst	%r15, %r0               # encoding: [0xb2,0x5e,0x00,0xf0]
+#CHECK: srst	%r7, %r8                # encoding: [0xb2,0x5e,0x00,0x78]
+
+	srst	%r0,%r0
+	srst	%r0,%r15
+	srst	%r15,%r0
+	srst	%r7,%r8
+
+#CHECK: st	%r0, 0                  # encoding: [0x50,0x00,0x00,0x00]
+#CHECK: st	%r0, 4095               # encoding: [0x50,0x00,0x0f,0xff]
+#CHECK: st	%r0, 0(%r1)             # encoding: [0x50,0x00,0x10,0x00]
+#CHECK: st	%r0, 0(%r15)            # encoding: [0x50,0x00,0xf0,0x00]
+#CHECK: st	%r0, 4095(%r1,%r15)     # encoding: [0x50,0x01,0xff,0xff]
+#CHECK: st	%r0, 4095(%r15,%r1)     # encoding: [0x50,0x0f,0x1f,0xff]
+#CHECK: st	%r15, 0                 # encoding: [0x50,0xf0,0x00,0x00]
+
+	st	%r0, 0
+	st	%r0, 4095
+	st	%r0, 0(%r1)
+	st	%r0, 0(%r15)
+	st	%r0, 4095(%r1,%r15)
+	st	%r0, 4095(%r15,%r1)
+	st	%r15, 0
+
+#CHECK: stc	%r0, 0                  # encoding: [0x42,0x00,0x00,0x00]
+#CHECK: stc	%r0, 4095               # encoding: [0x42,0x00,0x0f,0xff]
+#CHECK: stc	%r0, 0(%r1)             # encoding: [0x42,0x00,0x10,0x00]
+#CHECK: stc	%r0, 0(%r15)            # encoding: [0x42,0x00,0xf0,0x00]
+#CHECK: stc	%r0, 4095(%r1,%r15)     # encoding: [0x42,0x01,0xff,0xff]
+#CHECK: stc	%r0, 4095(%r15,%r1)     # encoding: [0x42,0x0f,0x1f,0xff]
+#CHECK: stc	%r15, 0                 # encoding: [0x42,0xf0,0x00,0x00]
+
+	stc	%r0, 0
+	stc	%r0, 4095
+	stc	%r0, 0(%r1)
+	stc	%r0, 0(%r15)
+	stc	%r0, 4095(%r1,%r15)
+	stc	%r0, 4095(%r15,%r1)
+	stc	%r15, 0
+
+#CHECK: stcy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x72]
+#CHECK: stcy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x72]
+#CHECK: stcy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x72]
+#CHECK: stcy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x72]
+#CHECK: stcy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x72]
+#CHECK: stcy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x72]
+#CHECK: stcy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x72]
+#CHECK: stcy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x72]
+#CHECK: stcy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x72]
+#CHECK: stcy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x72]
+
+	stcy	%r0, -524288
+	stcy	%r0, -1
+	stcy	%r0, 0
+	stcy	%r0, 1
+	stcy	%r0, 524287
+	stcy	%r0, 0(%r1)
+	stcy	%r0, 0(%r15)
+	stcy	%r0, 524287(%r1,%r15)
+	stcy	%r0, 524287(%r15,%r1)
+	stcy	%r15, 0
+
+#CHECK: std	%f0, 0                  # encoding: [0x60,0x00,0x00,0x00]
+#CHECK: std	%f0, 4095               # encoding: [0x60,0x00,0x0f,0xff]
+#CHECK: std	%f0, 0(%r1)             # encoding: [0x60,0x00,0x10,0x00]
+#CHECK: std	%f0, 0(%r15)            # encoding: [0x60,0x00,0xf0,0x00]
+#CHECK: std	%f0, 4095(%r1,%r15)     # encoding: [0x60,0x01,0xff,0xff]
+#CHECK: std	%f0, 4095(%r15,%r1)     # encoding: [0x60,0x0f,0x1f,0xff]
+#CHECK: std	%f15, 0                 # encoding: [0x60,0xf0,0x00,0x00]
+
+	std	%f0, 0
+	std	%f0, 4095
+	std	%f0, 0(%r1)
+	std	%f0, 0(%r15)
+	std	%f0, 4095(%r1,%r15)
+	std	%f0, 4095(%r15,%r1)
+	std	%f15, 0
+
+#CHECK: stdy	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x67]
+#CHECK: stdy	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x67]
+#CHECK: stdy	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x67]
+#CHECK: stdy	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x67]
+#CHECK: stdy	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x67]
+#CHECK: stdy	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x67]
+#CHECK: stdy	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x67]
+#CHECK: stdy	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x67]
+#CHECK: stdy	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x67]
+#CHECK: stdy	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x67]
+
+	stdy	%f0, -524288
+	stdy	%f0, -1
+	stdy	%f0, 0
+	stdy	%f0, 1
+	stdy	%f0, 524287
+	stdy	%f0, 0(%r1)
+	stdy	%f0, 0(%r15)
+	stdy	%f0, 524287(%r1,%r15)
+	stdy	%f0, 524287(%r15,%r1)
+	stdy	%f15, 0
+
+#CHECK: ste	%f0, 0                  # encoding: [0x70,0x00,0x00,0x00]
+#CHECK: ste	%f0, 4095               # encoding: [0x70,0x00,0x0f,0xff]
+#CHECK: ste	%f0, 0(%r1)             # encoding: [0x70,0x00,0x10,0x00]
+#CHECK: ste	%f0, 0(%r15)            # encoding: [0x70,0x00,0xf0,0x00]
+#CHECK: ste	%f0, 4095(%r1,%r15)     # encoding: [0x70,0x01,0xff,0xff]
+#CHECK: ste	%f0, 4095(%r15,%r1)     # encoding: [0x70,0x0f,0x1f,0xff]
+#CHECK: ste	%f15, 0                 # encoding: [0x70,0xf0,0x00,0x00]
+
+	ste	%f0, 0
+	ste	%f0, 4095
+	ste	%f0, 0(%r1)
+	ste	%f0, 0(%r15)
+	ste	%f0, 4095(%r1,%r15)
+	ste	%f0, 4095(%r15,%r1)
+	ste	%f15, 0
+
+#CHECK: stey	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x66]
+#CHECK: stey	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x66]
+#CHECK: stey	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x66]
+#CHECK: stey	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x66]
+#CHECK: stey	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x66]
+#CHECK: stey	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x66]
+#CHECK: stey	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x66]
+#CHECK: stey	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x66]
+#CHECK: stey	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x66]
+#CHECK: stey	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x66]
+
+	stey	%f0, -524288
+	stey	%f0, -1
+	stey	%f0, 0
+	stey	%f0, 1
+	stey	%f0, 524287
+	stey	%f0, 0(%r1)
+	stey	%f0, 0(%r15)
+	stey	%f0, 524287(%r1,%r15)
+	stey	%f0, 524287(%r15,%r1)
+	stey	%f15, 0
+
+#CHECK: stg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x24]
+#CHECK: stg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x24]
+#CHECK: stg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x24]
+#CHECK: stg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x24]
+#CHECK: stg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x24]
+#CHECK: stg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x24]
+#CHECK: stg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x24]
+#CHECK: stg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x24]
+#CHECK: stg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x24]
+#CHECK: stg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x24]
+
+	stg	%r0, -524288
+	stg	%r0, -1
+	stg	%r0, 0
+	stg	%r0, 1
+	stg	%r0, 524287
+	stg	%r0, 0(%r1)
+	stg	%r0, 0(%r15)
+	stg	%r0, 524287(%r1,%r15)
+	stg	%r0, 524287(%r15,%r1)
+	stg	%r15, 0
+
+#CHECK: stgrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x0b,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	stgrl	%r0, -0x100000000
+#CHECK: stgrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x0b,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	stgrl	%r0, -2
+#CHECK: stgrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x0b,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	stgrl	%r0, 0
+#CHECK: stgrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x0b,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	stgrl	%r0, 0xfffffffe
+
+#CHECK: stgrl	%r0, foo                # encoding: [0xc4,0x0b,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: stgrl	%r15, foo               # encoding: [0xc4,0xfb,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	stgrl	%r0,foo
+	stgrl	%r15,foo
+
+#CHECK: stgrl	%r3, bar+100            # encoding: [0xc4,0x3b,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: stgrl	%r4, bar+100            # encoding: [0xc4,0x4b,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	stgrl	%r3,bar+100
+	stgrl	%r4,bar+100
+
+#CHECK: stgrl	%r7, frob@PLT           # encoding: [0xc4,0x7b,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: stgrl	%r8, frob@PLT           # encoding: [0xc4,0x8b,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	stgrl	%r7,frob@PLT
+	stgrl	%r8,frob@PLT
+
+#CHECK: sth	%r0, 0                  # encoding: [0x40,0x00,0x00,0x00]
+#CHECK: sth	%r0, 4095               # encoding: [0x40,0x00,0x0f,0xff]
+#CHECK: sth	%r0, 0(%r1)             # encoding: [0x40,0x00,0x10,0x00]
+#CHECK: sth	%r0, 0(%r15)            # encoding: [0x40,0x00,0xf0,0x00]
+#CHECK: sth	%r0, 4095(%r1,%r15)     # encoding: [0x40,0x01,0xff,0xff]
+#CHECK: sth	%r0, 4095(%r15,%r1)     # encoding: [0x40,0x0f,0x1f,0xff]
+#CHECK: sth	%r15, 0                 # encoding: [0x40,0xf0,0x00,0x00]
+
+	sth	%r0, 0
+	sth	%r0, 4095
+	sth	%r0, 0(%r1)
+	sth	%r0, 0(%r15)
+	sth	%r0, 4095(%r1,%r15)
+	sth	%r0, 4095(%r15,%r1)
+	sth	%r15, 0
+
+#CHECK: sthrl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	sthrl	%r0, -0x100000000
+#CHECK: sthrl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	sthrl	%r0, -2
+#CHECK: sthrl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	sthrl	%r0, 0
+#CHECK: sthrl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x07,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	sthrl	%r0, 0xfffffffe
+
+#CHECK: sthrl	%r0, foo                # encoding: [0xc4,0x07,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: sthrl	%r15, foo               # encoding: [0xc4,0xf7,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	sthrl	%r0,foo
+	sthrl	%r15,foo
+
+#CHECK: sthrl	%r3, bar+100            # encoding: [0xc4,0x37,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: sthrl	%r4, bar+100            # encoding: [0xc4,0x47,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	sthrl	%r3,bar+100
+	sthrl	%r4,bar+100
+
+#CHECK: sthrl	%r7, frob@PLT           # encoding: [0xc4,0x77,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: sthrl	%r8, frob@PLT           # encoding: [0xc4,0x87,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	sthrl	%r7,frob@PLT
+	sthrl	%r8,frob@PLT
+
+#CHECK: sthy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x70]
+#CHECK: sthy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x70]
+#CHECK: sthy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x70]
+#CHECK: sthy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x70]
+#CHECK: sthy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x70]
+#CHECK: sthy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x70]
+#CHECK: sthy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x70]
+#CHECK: sthy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x70]
+#CHECK: sthy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x70]
+#CHECK: sthy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x70]
+
+	sthy	%r0, -524288
+	sthy	%r0, -1
+	sthy	%r0, 0
+	sthy	%r0, 1
+	sthy	%r0, 524287
+	sthy	%r0, 0(%r1)
+	sthy	%r0, 0(%r15)
+	sthy	%r0, 524287(%r1,%r15)
+	sthy	%r0, 524287(%r15,%r1)
+	sthy	%r15, 0
+
+#CHECK: stmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r14, %r15, 0           # encoding: [0xeb,0xef,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x24]
+#CHECK: stmg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x24]
+#CHECK: stmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x24]
+#CHECK: stmg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x24]
+#CHECK: stmg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x24]
+
+	stmg	%r0,%r0,0
+	stmg	%r0,%r15,0
+	stmg	%r14,%r15,0
+	stmg	%r15,%r15,0
+	stmg	%r0,%r0,-524288
+	stmg	%r0,%r0,-1
+	stmg	%r0,%r0,0
+	stmg	%r0,%r0,1
+	stmg	%r0,%r0,524287
+	stmg	%r0,%r0,0(%r1)
+	stmg	%r0,%r0,0(%r15)
+	stmg	%r0,%r0,524287(%r1)
+	stmg	%r0,%r0,524287(%r15)
+
+#CHECK: strl	%r0, .[[LAB:L.*]]-4294967296 # encoding: [0xc4,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+	strl	%r0, -0x100000000
+#CHECK: strl	%r0, .[[LAB:L.*]]-2	# encoding: [0xc4,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+	strl	%r0, -2
+#CHECK: strl	%r0, .[[LAB:L.*]]	# encoding: [0xc4,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+	strl	%r0, 0
+#CHECK: strl	%r0, .[[LAB:L.*]]+4294967294 # encoding: [0xc4,0x0f,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+	strl	%r0, 0xfffffffe
+
+#CHECK: strl	%r0, foo                # encoding: [0xc4,0x0f,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: strl	%r15, foo               # encoding: [0xc4,0xff,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	strl	%r0,foo
+	strl	%r15,foo
+
+#CHECK: strl	%r3, bar+100            # encoding: [0xc4,0x3f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: strl	%r4, bar+100            # encoding: [0xc4,0x4f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	strl	%r3,bar+100
+	strl	%r4,bar+100
+
+#CHECK: strl	%r7, frob@PLT           # encoding: [0xc4,0x7f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: strl	%r8, frob@PLT           # encoding: [0xc4,0x8f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	strl	%r7,frob@PLT
+	strl	%r8,frob@PLT
+
+#CHECK: strv	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x3e]
+#CHECK: strv	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x3e]
+#CHECK: strv	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x3e]
+#CHECK: strv	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x3e]
+#CHECK: strv	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x3e]
+#CHECK: strv	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x3e]
+#CHECK: strv	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x3e]
+#CHECK: strv	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x3e]
+#CHECK: strv	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x3e]
+#CHECK: strv	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x3e]
+
+	strv	%r0,-524288
+	strv	%r0,-1
+	strv	%r0,0
+	strv	%r0,1
+	strv	%r0,524287
+	strv	%r0,0(%r1)
+	strv	%r0,0(%r15)
+	strv	%r0,524287(%r1,%r15)
+	strv	%r0,524287(%r15,%r1)
+	strv	%r15,0
+
+#CHECK: strvg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x2f]
+#CHECK: strvg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x2f]
+#CHECK: strvg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x2f]
+#CHECK: strvg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x2f]
+#CHECK: strvg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x2f]
+#CHECK: strvg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x2f]
+#CHECK: strvg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x2f]
+#CHECK: strvg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x2f]
+#CHECK: strvg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x2f]
+#CHECK: strvg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x2f]
+
+	strvg	%r0,-524288
+	strvg	%r0,-1
+	strvg	%r0,0
+	strvg	%r0,1
+	strvg	%r0,524287
+	strvg	%r0,0(%r1)
+	strvg	%r0,0(%r15)
+	strvg	%r0,524287(%r1,%r15)
+	strvg	%r0,524287(%r15,%r1)
+	strvg	%r15,0
+
+#CHECK: sty	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x50]
+#CHECK: sty	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x50]
+#CHECK: sty	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x50]
+#CHECK: sty	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x50]
+#CHECK: sty	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x50]
+#CHECK: sty	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x50]
+#CHECK: sty	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x50]
+#CHECK: sty	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x50]
+#CHECK: sty	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x50]
+#CHECK: sty	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x50]
+
+	sty	%r0, -524288
+	sty	%r0, -1
+	sty	%r0, 0
+	sty	%r0, 1
+	sty	%r0, 524287
+	sty	%r0, 0(%r1)
+	sty	%r0, 0(%r15)
+	sty	%r0, 524287(%r1,%r15)
+	sty	%r0, 524287(%r15,%r1)
+	sty	%r15, 0
+
+#CHECK: sxbr	%f0, %f0                # encoding: [0xb3,0x4b,0x00,0x00]
+#CHECK: sxbr	%f0, %f13               # encoding: [0xb3,0x4b,0x00,0x0d]
+#CHECK: sxbr	%f8, %f8                # encoding: [0xb3,0x4b,0x00,0x88]
+#CHECK: sxbr	%f13, %f0               # encoding: [0xb3,0x4b,0x00,0xd0]
+
+	sxbr	%f0, %f0
+	sxbr	%f0, %f13
+	sxbr	%f8, %f8
+	sxbr	%f13, %f0
+
+#CHECK: sy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5b]
+#CHECK: sy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5b]
+#CHECK: sy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5b]
+#CHECK: sy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5b]
+#CHECK: sy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5b]
+#CHECK: sy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5b]
+#CHECK: sy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5b]
+#CHECK: sy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5b]
+#CHECK: sy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5b]
+#CHECK: sy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5b]
+
+	sy	%r0, -524288
+	sy	%r0, -1
+	sy	%r0, 0
+	sy	%r0, 1
+	sy	%r0, 524287
+	sy	%r0, 0(%r1)
+	sy	%r0, 0(%r15)
+	sy	%r0, 524287(%r1,%r15)
+	sy	%r0, 524287(%r15,%r1)
+	sy	%r15, 0
+
+#CHECK: tm	0, 0                    # encoding: [0x91,0x00,0x00,0x00]
+#CHECK: tm	4095, 0                 # encoding: [0x91,0x00,0x0f,0xff]
+#CHECK: tm	0, 255                  # encoding: [0x91,0xff,0x00,0x00]
+#CHECK: tm	0(%r1), 42              # encoding: [0x91,0x2a,0x10,0x00]
+#CHECK: tm	0(%r15), 42             # encoding: [0x91,0x2a,0xf0,0x00]
+#CHECK: tm	4095(%r1), 42           # encoding: [0x91,0x2a,0x1f,0xff]
+#CHECK: tm	4095(%r15), 42          # encoding: [0x91,0x2a,0xff,0xff]
+
+	tm	0, 0
+	tm	4095, 0
+	tm	0, 255
+	tm	0(%r1), 42
+	tm	0(%r15), 42
+	tm	4095(%r1), 42
+	tm	4095(%r15), 42
+
+#CHECK: tmhh	%r0, 0                  # encoding: [0xa7,0x02,0x00,0x00]
+#CHECK: tmhh	%r0, 32768              # encoding: [0xa7,0x02,0x80,0x00]
+#CHECK: tmhh	%r0, 65535              # encoding: [0xa7,0x02,0xff,0xff]
+#CHECK: tmhh	%r15, 0                 # encoding: [0xa7,0xf2,0x00,0x00]
+
+	tmhh	%r0, 0
+	tmhh	%r0, 0x8000
+	tmhh	%r0, 0xffff
+	tmhh	%r15, 0
+
+#CHECK: tmhl	%r0, 0                  # encoding: [0xa7,0x03,0x00,0x00]
+#CHECK: tmhl	%r0, 32768              # encoding: [0xa7,0x03,0x80,0x00]
+#CHECK: tmhl	%r0, 65535              # encoding: [0xa7,0x03,0xff,0xff]
+#CHECK: tmhl	%r15, 0                 # encoding: [0xa7,0xf3,0x00,0x00]
+
+	tmhl	%r0, 0
+	tmhl	%r0, 0x8000
+	tmhl	%r0, 0xffff
+	tmhl	%r15, 0
+
+#CHECK: tmlh	%r0, 0                  # encoding: [0xa7,0x00,0x00,0x00]
+#CHECK: tmlh	%r0, 32768              # encoding: [0xa7,0x00,0x80,0x00]
+#CHECK: tmlh	%r0, 65535              # encoding: [0xa7,0x00,0xff,0xff]
+#CHECK: tmlh	%r15, 0                 # encoding: [0xa7,0xf0,0x00,0x00]
+
+	tmlh	%r0, 0
+	tmlh	%r0, 0x8000
+	tmlh	%r0, 0xffff
+	tmlh	%r15, 0
+
+#CHECK: tmll	%r0, 0                  # encoding: [0xa7,0x01,0x00,0x00]
+#CHECK: tmll	%r0, 32768              # encoding: [0xa7,0x01,0x80,0x00]
+#CHECK: tmll	%r0, 65535              # encoding: [0xa7,0x01,0xff,0xff]
+#CHECK: tmll	%r15, 0                 # encoding: [0xa7,0xf1,0x00,0x00]
+
+	tmll	%r0, 0
+	tmll	%r0, 0x8000
+	tmll	%r0, 0xffff
+	tmll	%r15, 0
+
+#CHECK: tmy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x51]
+#CHECK: tmy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x51]
+#CHECK: tmy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x51]
+#CHECK: tmy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x51]
+#CHECK: tmy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x51]
+#CHECK: tmy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x51]
+#CHECK: tmy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x51]
+#CHECK: tmy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x51]
+#CHECK: tmy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x51]
+#CHECK: tmy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x51]
+
+	tmy	-524288, 0
+	tmy	-1, 0
+	tmy	0, 0
+	tmy	1, 0
+	tmy	524287, 0
+	tmy	0, 255
+	tmy	0(%r1), 42
+	tmy	0(%r15), 42
+	tmy	524287(%r1), 42
+	tmy	524287(%r15), 42
+
+#CHECK: x	%r0, 0                  # encoding: [0x57,0x00,0x00,0x00]
+#CHECK: x	%r0, 4095               # encoding: [0x57,0x00,0x0f,0xff]
+#CHECK: x	%r0, 0(%r1)             # encoding: [0x57,0x00,0x10,0x00]
+#CHECK: x	%r0, 0(%r15)            # encoding: [0x57,0x00,0xf0,0x00]
+#CHECK: x	%r0, 4095(%r1,%r15)     # encoding: [0x57,0x01,0xff,0xff]
+#CHECK: x	%r0, 4095(%r15,%r1)     # encoding: [0x57,0x0f,0x1f,0xff]
+#CHECK: x	%r15, 0                 # encoding: [0x57,0xf0,0x00,0x00]
+
+	x	%r0, 0
+	x	%r0, 4095
+	x	%r0, 0(%r1)
+	x	%r0, 0(%r15)
+	x	%r0, 4095(%r1,%r15)
+	x	%r0, 4095(%r15,%r1)
+	x	%r15, 0
+
+#CHECK: xc	0(1), 0                 # encoding: [0xd7,0x00,0x00,0x00,0x00,0x00]
+#CHECK: xc	0(1), 0(%r1)            # encoding: [0xd7,0x00,0x00,0x00,0x10,0x00]
+#CHECK: xc	0(1), 0(%r15)           # encoding: [0xd7,0x00,0x00,0x00,0xf0,0x00]
+#CHECK: xc	0(1), 4095              # encoding: [0xd7,0x00,0x00,0x00,0x0f,0xff]
+#CHECK: xc	0(1), 4095(%r1)         # encoding: [0xd7,0x00,0x00,0x00,0x1f,0xff]
+#CHECK: xc	0(1), 4095(%r15)        # encoding: [0xd7,0x00,0x00,0x00,0xff,0xff]
+#CHECK: xc	0(1,%r1), 0             # encoding: [0xd7,0x00,0x10,0x00,0x00,0x00]
+#CHECK: xc	0(1,%r15), 0            # encoding: [0xd7,0x00,0xf0,0x00,0x00,0x00]
+#CHECK: xc	4095(1,%r1), 0          # encoding: [0xd7,0x00,0x1f,0xff,0x00,0x00]
+#CHECK: xc	4095(1,%r15), 0         # encoding: [0xd7,0x00,0xff,0xff,0x00,0x00]
+#CHECK: xc	0(256,%r1), 0           # encoding: [0xd7,0xff,0x10,0x00,0x00,0x00]
+#CHECK: xc	0(256,%r15), 0          # encoding: [0xd7,0xff,0xf0,0x00,0x00,0x00]
+
+	xc	0(1), 0
+	xc	0(1), 0(%r1)
+	xc	0(1), 0(%r15)
+	xc	0(1), 4095
+	xc	0(1), 4095(%r1)
+	xc	0(1), 4095(%r15)
+	xc	0(1,%r1), 0
+	xc	0(1,%r15), 0
+	xc	4095(1,%r1), 0
+	xc	4095(1,%r15), 0
+	xc	0(256,%r1), 0
+	xc	0(256,%r15), 0
+
+#CHECK: xg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x82]
+#CHECK: xg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x82]
+#CHECK: xg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x82]
+#CHECK: xg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x82]
+#CHECK: xg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x82]
+#CHECK: xg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x82]
+#CHECK: xg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x82]
+#CHECK: xg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x82]
+#CHECK: xg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x82]
+#CHECK: xg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x82]
+
+	xg	%r0, -524288
+	xg	%r0, -1
+	xg	%r0, 0
+	xg	%r0, 1
+	xg	%r0, 524287
+	xg	%r0, 0(%r1)
+	xg	%r0, 0(%r15)
+	xg	%r0, 524287(%r1,%r15)
+	xg	%r0, 524287(%r15,%r1)
+	xg	%r15, 0
+
+#CHECK: xgr	%r0, %r0                # encoding: [0xb9,0x82,0x00,0x00]
+#CHECK: xgr	%r0, %r15               # encoding: [0xb9,0x82,0x00,0x0f]
+#CHECK: xgr	%r15, %r0               # encoding: [0xb9,0x82,0x00,0xf0]
+#CHECK: xgr	%r7, %r8                # encoding: [0xb9,0x82,0x00,0x78]
+
+	xgr	%r0,%r0
+	xgr	%r0,%r15
+	xgr	%r15,%r0
+	xgr	%r7,%r8
+
+#CHECK: xi	0, 0                    # encoding: [0x97,0x00,0x00,0x00]
+#CHECK: xi	4095, 0                 # encoding: [0x97,0x00,0x0f,0xff]
+#CHECK: xi	0, 255                  # encoding: [0x97,0xff,0x00,0x00]
+#CHECK: xi	0(%r1), 42              # encoding: [0x97,0x2a,0x10,0x00]
+#CHECK: xi	0(%r15), 42             # encoding: [0x97,0x2a,0xf0,0x00]
+#CHECK: xi	4095(%r1), 42           # encoding: [0x97,0x2a,0x1f,0xff]
+#CHECK: xi	4095(%r15), 42          # encoding: [0x97,0x2a,0xff,0xff]
+
+	xi	0, 0
+	xi	4095, 0
+	xi	0, 255
+	xi	0(%r1), 42
+	xi	0(%r15), 42
+	xi	4095(%r1), 42
+	xi	4095(%r15), 42
+
+#CHECK: xihf	%r0, 0                  # encoding: [0xc0,0x06,0x00,0x00,0x00,0x00]
+#CHECK: xihf	%r0, 4294967295         # encoding: [0xc0,0x06,0xff,0xff,0xff,0xff]
+#CHECK: xihf	%r15, 0                 # encoding: [0xc0,0xf6,0x00,0x00,0x00,0x00]
+
+	xihf	%r0, 0
+	xihf	%r0, 0xffffffff
+	xihf	%r15, 0
+
+#CHECK: xilf	%r0, 0                  # encoding: [0xc0,0x07,0x00,0x00,0x00,0x00]
+#CHECK: xilf	%r0, 4294967295         # encoding: [0xc0,0x07,0xff,0xff,0xff,0xff]
+#CHECK: xilf	%r15, 0                 # encoding: [0xc0,0xf7,0x00,0x00,0x00,0x00]
+
+	xilf	%r0, 0
+	xilf	%r0, 0xffffffff
+	xilf	%r15, 0
+
+#CHECK: xiy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x57]
+#CHECK: xiy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x57]
+#CHECK: xiy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x57]
+#CHECK: xiy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x57]
+#CHECK: xiy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x57]
+#CHECK: xiy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x57]
+#CHECK: xiy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x57]
+#CHECK: xiy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x57]
+#CHECK: xiy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x57]
+#CHECK: xiy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x57]
+
+	xiy	-524288, 0
+	xiy	-1, 0
+	xiy	0, 0
+	xiy	1, 0
+	xiy	524287, 0
+	xiy	0, 255
+	xiy	0(%r1), 42
+	xiy	0(%r15), 42
+	xiy	524287(%r1), 42
+	xiy	524287(%r15), 42
+
+#CHECK: xr	%r0, %r0                # encoding: [0x17,0x00]
+#CHECK: xr	%r0, %r15               # encoding: [0x17,0x0f]
+#CHECK: xr	%r15, %r0               # encoding: [0x17,0xf0]
+#CHECK: xr	%r7, %r8                # encoding: [0x17,0x78]
+
+	xr	%r0,%r0
+	xr	%r0,%r15
+	xr	%r15,%r0
+	xr	%r7,%r8
+
+#CHECK: xy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x57]
+#CHECK: xy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x57]
+#CHECK: xy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x57]
+#CHECK: xy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x57]
+#CHECK: xy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x57]
+#CHECK: xy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x57]
+#CHECK: xy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x57]
+#CHECK: xy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x57]
+#CHECK: xy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x57]
+#CHECK: xy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x57]
+
+	xy	%r0, -524288
+	xy	%r0, -1
+	xy	%r0, 0
+	xy	%r0, 1
+	xy	%r0, 524287
+	xy	%r0, 0(%r1)
+	xy	%r0, 0(%r15)
+	xy	%r0, 524287(%r1,%r15)
+	xy	%r0, 524287(%r15,%r1)
+	xy	%r15, 0
diff --git a/test/MC/SystemZ/insn-ic-01.s b/test/MC/SystemZ/insn-ic-01.s
deleted file mode 100644
index 76772f9d1a08..000000000000
--- a/test/MC/SystemZ/insn-ic-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ic	%r0, 0                  # encoding: [0x43,0x00,0x00,0x00]
-#CHECK: ic	%r0, 4095               # encoding: [0x43,0x00,0x0f,0xff]
-#CHECK: ic	%r0, 0(%r1)             # encoding: [0x43,0x00,0x10,0x00]
-#CHECK: ic	%r0, 0(%r15)            # encoding: [0x43,0x00,0xf0,0x00]
-#CHECK: ic	%r0, 4095(%r1,%r15)     # encoding: [0x43,0x01,0xff,0xff]
-#CHECK: ic	%r0, 4095(%r15,%r1)     # encoding: [0x43,0x0f,0x1f,0xff]
-#CHECK: ic	%r15, 0                 # encoding: [0x43,0xf0,0x00,0x00]
-
-	ic	%r0, 0
-	ic	%r0, 4095
-	ic	%r0, 0(%r1)
-	ic	%r0, 0(%r15)
-	ic	%r0, 4095(%r1,%r15)
-	ic	%r0, 4095(%r15,%r1)
-	ic	%r15, 0
diff --git a/test/MC/SystemZ/insn-ic-02.s b/test/MC/SystemZ/insn-ic-02.s
deleted file mode 100644
index e70ef1c58dbe..000000000000
--- a/test/MC/SystemZ/insn-ic-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ic	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: ic	%r0, 4096
-
-	ic	%r0, -1
-	ic	%r0, 4096
diff --git a/test/MC/SystemZ/insn-icy-01.s b/test/MC/SystemZ/insn-icy-01.s
deleted file mode 100644
index 079ae2142f1c..000000000000
--- a/test/MC/SystemZ/insn-icy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: icy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x73]
-#CHECK: icy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x73]
-#CHECK: icy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x73]
-#CHECK: icy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x73]
-#CHECK: icy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x73]
-#CHECK: icy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x73]
-#CHECK: icy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x73]
-#CHECK: icy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x73]
-#CHECK: icy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x73]
-#CHECK: icy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x73]
-
-	icy	%r0, -524288
-	icy	%r0, -1
-	icy	%r0, 0
-	icy	%r0, 1
-	icy	%r0, 524287
-	icy	%r0, 0(%r1)
-	icy	%r0, 0(%r15)
-	icy	%r0, 524287(%r1,%r15)
-	icy	%r0, 524287(%r15,%r1)
-	icy	%r15, 0
diff --git a/test/MC/SystemZ/insn-icy-02.s b/test/MC/SystemZ/insn-icy-02.s
deleted file mode 100644
index 321c86f9dfff..000000000000
--- a/test/MC/SystemZ/insn-icy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: icy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: icy	%r0, 524288
-
-	icy	%r0, -524289
-	icy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-iihf-01.s b/test/MC/SystemZ/insn-iihf-01.s
deleted file mode 100644
index bf8d48fbc634..000000000000
--- a/test/MC/SystemZ/insn-iihf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: iihf	%r0, 0                  # encoding: [0xc0,0x08,0x00,0x00,0x00,0x00]
-#CHECK: iihf	%r0, 4294967295         # encoding: [0xc0,0x08,0xff,0xff,0xff,0xff]
-#CHECK: iihf	%r15, 0                 # encoding: [0xc0,0xf8,0x00,0x00,0x00,0x00]
-
-	iihf	%r0, 0
-	iihf	%r0, 0xffffffff
-	iihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-iihf-02.s b/test/MC/SystemZ/insn-iihf-02.s
deleted file mode 100644
index 1c7a69addf60..000000000000
--- a/test/MC/SystemZ/insn-iihf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: iihf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: iihf	%r0, 1 << 32
-
-	iihf	%r0, -1
-	iihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-iihh-01.s b/test/MC/SystemZ/insn-iihh-01.s
deleted file mode 100644
index a2ba9a33a456..000000000000
--- a/test/MC/SystemZ/insn-iihh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: iihh	%r0, 0                  # encoding: [0xa5,0x00,0x00,0x00]
-#CHECK: iihh	%r0, 32768              # encoding: [0xa5,0x00,0x80,0x00]
-#CHECK: iihh	%r0, 65535              # encoding: [0xa5,0x00,0xff,0xff]
-#CHECK: iihh	%r15, 0                 # encoding: [0xa5,0xf0,0x00,0x00]
-
-	iihh	%r0, 0
-	iihh	%r0, 0x8000
-	iihh	%r0, 0xffff
-	iihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-iihh-02.s b/test/MC/SystemZ/insn-iihh-02.s
deleted file mode 100644
index 2d8f8541dd07..000000000000
--- a/test/MC/SystemZ/insn-iihh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: iihh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: iihh	%r0, 0x10000
-
-	iihh	%r0, -1
-	iihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-iihl-01.s b/test/MC/SystemZ/insn-iihl-01.s
deleted file mode 100644
index ff591e8b8696..000000000000
--- a/test/MC/SystemZ/insn-iihl-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: iihl	%r0, 0                  # encoding: [0xa5,0x01,0x00,0x00]
-#CHECK: iihl	%r0, 32768              # encoding: [0xa5,0x01,0x80,0x00]
-#CHECK: iihl	%r0, 65535              # encoding: [0xa5,0x01,0xff,0xff]
-#CHECK: iihl	%r15, 0                 # encoding: [0xa5,0xf1,0x00,0x00]
-
-	iihl	%r0, 0
-	iihl	%r0, 0x8000
-	iihl	%r0, 0xffff
-	iihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-iihl-02.s b/test/MC/SystemZ/insn-iihl-02.s
deleted file mode 100644
index 262955e38d39..000000000000
--- a/test/MC/SystemZ/insn-iihl-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: iihl	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: iihl	%r0, 0x10000
-
-	iihl	%r0, -1
-	iihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-iilf-01.s b/test/MC/SystemZ/insn-iilf-01.s
deleted file mode 100644
index 228e1476764c..000000000000
--- a/test/MC/SystemZ/insn-iilf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: iilf	%r0, 0                  # encoding: [0xc0,0x09,0x00,0x00,0x00,0x00]
-#CHECK: iilf	%r0, 4294967295         # encoding: [0xc0,0x09,0xff,0xff,0xff,0xff]
-#CHECK: iilf	%r15, 0                 # encoding: [0xc0,0xf9,0x00,0x00,0x00,0x00]
-
-	iilf	%r0, 0
-	iilf	%r0, 0xffffffff
-	iilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-iilf-02.s b/test/MC/SystemZ/insn-iilf-02.s
deleted file mode 100644
index c7571e8f6d17..000000000000
--- a/test/MC/SystemZ/insn-iilf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: iilf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: iilf	%r0, 1 << 32
-
-	iilf	%r0, -1
-	iilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-iilh-01.s b/test/MC/SystemZ/insn-iilh-01.s
deleted file mode 100644
index 045ccfe80b66..000000000000
--- a/test/MC/SystemZ/insn-iilh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: iilh	%r0, 0                  # encoding: [0xa5,0x02,0x00,0x00]
-#CHECK: iilh	%r0, 32768              # encoding: [0xa5,0x02,0x80,0x00]
-#CHECK: iilh	%r0, 65535              # encoding: [0xa5,0x02,0xff,0xff]
-#CHECK: iilh	%r15, 0                 # encoding: [0xa5,0xf2,0x00,0x00]
-
-	iilh	%r0, 0
-	iilh	%r0, 0x8000
-	iilh	%r0, 0xffff
-	iilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-iilh-02.s b/test/MC/SystemZ/insn-iilh-02.s
deleted file mode 100644
index af5bdacaff8b..000000000000
--- a/test/MC/SystemZ/insn-iilh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: iilh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: iilh	%r0, 0x10000
-
-	iilh	%r0, -1
-	iilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-iill-01.s b/test/MC/SystemZ/insn-iill-01.s
deleted file mode 100644
index bf50eeb0f678..000000000000
--- a/test/MC/SystemZ/insn-iill-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: iill	%r0, 0                  # encoding: [0xa5,0x03,0x00,0x00]
-#CHECK: iill	%r0, 32768              # encoding: [0xa5,0x03,0x80,0x00]
-#CHECK: iill	%r0, 65535              # encoding: [0xa5,0x03,0xff,0xff]
-#CHECK: iill	%r15, 0                 # encoding: [0xa5,0xf3,0x00,0x00]
-
-	iill	%r0, 0
-	iill	%r0, 0x8000
-	iill	%r0, 0xffff
-	iill	%r15, 0
diff --git a/test/MC/SystemZ/insn-iill-02.s b/test/MC/SystemZ/insn-iill-02.s
deleted file mode 100644
index fe31e44ee66b..000000000000
--- a/test/MC/SystemZ/insn-iill-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: iill	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: iill	%r0, 0x10000
-
-	iill	%r0, -1
-	iill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-l-01.s b/test/MC/SystemZ/insn-l-01.s
deleted file mode 100644
index a589116ebf41..000000000000
--- a/test/MC/SystemZ/insn-l-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: l	%r0, 0                  # encoding: [0x58,0x00,0x00,0x00]
-#CHECK: l	%r0, 4095               # encoding: [0x58,0x00,0x0f,0xff]
-#CHECK: l	%r0, 0(%r1)             # encoding: [0x58,0x00,0x10,0x00]
-#CHECK: l	%r0, 0(%r15)            # encoding: [0x58,0x00,0xf0,0x00]
-#CHECK: l	%r0, 4095(%r1,%r15)     # encoding: [0x58,0x01,0xff,0xff]
-#CHECK: l	%r0, 4095(%r15,%r1)     # encoding: [0x58,0x0f,0x1f,0xff]
-#CHECK: l	%r15, 0                 # encoding: [0x58,0xf0,0x00,0x00]
-
-	l	%r0, 0
-	l	%r0, 4095
-	l	%r0, 0(%r1)
-	l	%r0, 0(%r15)
-	l	%r0, 4095(%r1,%r15)
-	l	%r0, 4095(%r15,%r1)
-	l	%r15, 0
diff --git a/test/MC/SystemZ/insn-l-02.s b/test/MC/SystemZ/insn-l-02.s
deleted file mode 100644
index fad96ff6ce2f..000000000000
--- a/test/MC/SystemZ/insn-l-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: l	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: l	%r0, 4096
-
-	l	%r0, -1
-	l	%r0, 4096
diff --git a/test/MC/SystemZ/insn-la-01.s b/test/MC/SystemZ/insn-la-01.s
deleted file mode 100644
index d4776ab58570..000000000000
--- a/test/MC/SystemZ/insn-la-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: la	%r0, 0                  # encoding: [0x41,0x00,0x00,0x00]
-#CHECK: la	%r0, 4095               # encoding: [0x41,0x00,0x0f,0xff]
-#CHECK: la	%r0, 0(%r1)             # encoding: [0x41,0x00,0x10,0x00]
-#CHECK: la	%r0, 0(%r15)            # encoding: [0x41,0x00,0xf0,0x00]
-#CHECK: la	%r0, 4095(%r1,%r15)     # encoding: [0x41,0x01,0xff,0xff]
-#CHECK: la	%r0, 4095(%r15,%r1)     # encoding: [0x41,0x0f,0x1f,0xff]
-#CHECK: la	%r15, 0                 # encoding: [0x41,0xf0,0x00,0x00]
-
-	la	%r0, 0
-	la	%r0, 4095
-	la	%r0, 0(%r1)
-	la	%r0, 0(%r15)
-	la	%r0, 4095(%r1,%r15)
-	la	%r0, 4095(%r15,%r1)
-	la	%r15, 0
diff --git a/test/MC/SystemZ/insn-la-02.s b/test/MC/SystemZ/insn-la-02.s
deleted file mode 100644
index 35c1ab04b5ea..000000000000
--- a/test/MC/SystemZ/insn-la-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: la	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: la	%r0, 4096
-
-	la	%r0, -1
-	la	%r0, 4096
diff --git a/test/MC/SystemZ/insn-larl-01.s b/test/MC/SystemZ/insn-larl-01.s
deleted file mode 100644
index 3d0f98f562a2..000000000000
--- a/test/MC/SystemZ/insn-larl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: larl	%r0, 2864434397         # encoding: [0xc0,0x00,0x55,0x5d,0xe6,0x6e]
-#CHECK: larl	%r15, 2864434397        # encoding: [0xc0,0xf0,0x55,0x5d,0xe6,0x6e]
-
-	larl	%r0,0xaabbccdd
-	larl	%r15,0xaabbccdd
-
-#CHECK: larl	%r0, foo                # encoding: [0xc0,0x00,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: larl	%r15, foo               # encoding: [0xc0,0xf0,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	larl	%r0,foo
-	larl	%r15,foo
-
-#CHECK: larl	%r3, bar+100            # encoding: [0xc0,0x30,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: larl	%r4, bar+100            # encoding: [0xc0,0x40,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	larl	%r3,bar+100
-	larl	%r4,bar+100
-
-#CHECK: larl	%r7, frob@PLT           # encoding: [0xc0,0x70,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: larl	%r8, frob@PLT           # encoding: [0xc0,0x80,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	larl	%r7,frob@PLT
-	larl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lay-01.s b/test/MC/SystemZ/insn-lay-01.s
deleted file mode 100644
index daa88288ab56..000000000000
--- a/test/MC/SystemZ/insn-lay-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lay	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x71]
-#CHECK: lay	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x71]
-#CHECK: lay	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x71]
-#CHECK: lay	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x71]
-#CHECK: lay	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x71]
-#CHECK: lay	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x71]
-#CHECK: lay	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x71]
-#CHECK: lay	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x71]
-#CHECK: lay	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x71]
-#CHECK: lay	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x71]
-
-	lay	%r0, -524288
-	lay	%r0, -1
-	lay	%r0, 0
-	lay	%r0, 1
-	lay	%r0, 524287
-	lay	%r0, 0(%r1)
-	lay	%r0, 0(%r15)
-	lay	%r0, 524287(%r1,%r15)
-	lay	%r0, 524287(%r15,%r1)
-	lay	%r15, 0
diff --git a/test/MC/SystemZ/insn-lay-02.s b/test/MC/SystemZ/insn-lay-02.s
deleted file mode 100644
index 2729eead22a6..000000000000
--- a/test/MC/SystemZ/insn-lay-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lay	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lay	%r0, 524288
-
-	lay	%r0, -524289
-	lay	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lb-01.s b/test/MC/SystemZ/insn-lb-01.s
deleted file mode 100644
index e9ee0fab2266..000000000000
--- a/test/MC/SystemZ/insn-lb-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x76]
-#CHECK: lb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x76]
-#CHECK: lb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x76]
-#CHECK: lb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x76]
-#CHECK: lb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x76]
-#CHECK: lb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x76]
-#CHECK: lb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x76]
-#CHECK: lb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x76]
-#CHECK: lb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x76]
-#CHECK: lb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x76]
-
-	lb	%r0, -524288
-	lb	%r0, -1
-	lb	%r0, 0
-	lb	%r0, 1
-	lb	%r0, 524287
-	lb	%r0, 0(%r1)
-	lb	%r0, 0(%r15)
-	lb	%r0, 524287(%r1,%r15)
-	lb	%r0, 524287(%r15,%r1)
-	lb	%r15, 0
diff --git a/test/MC/SystemZ/insn-lb-02.s b/test/MC/SystemZ/insn-lb-02.s
deleted file mode 100644
index e65edaf09dac..000000000000
--- a/test/MC/SystemZ/insn-lb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lb	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lb	%r0, 524288
-
-	lb	%r0, -524289
-	lb	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lbr-01.s b/test/MC/SystemZ/insn-lbr-01.s
deleted file mode 100644
index cb4ead043080..000000000000
--- a/test/MC/SystemZ/insn-lbr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lbr	%r0, %r15               # encoding: [0xb9,0x26,0x00,0x0f]
-#CHECK: lbr	%r7, %r8                # encoding: [0xb9,0x26,0x00,0x78]
-#CHECK: lbr	%r15, %r0               # encoding: [0xb9,0x26,0x00,0xf0]
-
-	lbr	%r0, %r15
-	lbr	%r7, %r8
-	lbr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lcdbr-01.s b/test/MC/SystemZ/insn-lcdbr-01.s
deleted file mode 100644
index 347cab514116..000000000000
--- a/test/MC/SystemZ/insn-lcdbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lcdbr	%f0, %f9                # encoding: [0xb3,0x13,0x00,0x09]
-#CHECK: lcdbr	%f0, %f15               # encoding: [0xb3,0x13,0x00,0x0f]
-#CHECK: lcdbr	%f15, %f0               # encoding: [0xb3,0x13,0x00,0xf0]
-#CHECK: lcdbr	%f15, %f9               # encoding: [0xb3,0x13,0x00,0xf9]
-
-	lcdbr	%f0,%f9
-	lcdbr	%f0,%f15
-	lcdbr	%f15,%f0
-	lcdbr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lcebr-01.s b/test/MC/SystemZ/insn-lcebr-01.s
deleted file mode 100644
index e31822f00460..000000000000
--- a/test/MC/SystemZ/insn-lcebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lcebr	%f0, %f9                # encoding: [0xb3,0x03,0x00,0x09]
-#CHECK: lcebr	%f0, %f15               # encoding: [0xb3,0x03,0x00,0x0f]
-#CHECK: lcebr	%f15, %f0               # encoding: [0xb3,0x03,0x00,0xf0]
-#CHECK: lcebr	%f15, %f9               # encoding: [0xb3,0x03,0x00,0xf9]
-
-	lcebr	%f0,%f9
-	lcebr	%f0,%f15
-	lcebr	%f15,%f0
-	lcebr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lcgfr-01.s b/test/MC/SystemZ/insn-lcgfr-01.s
deleted file mode 100644
index bca430b8015d..000000000000
--- a/test/MC/SystemZ/insn-lcgfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lcgfr	%r0, %r0                # encoding: [0xb9,0x13,0x00,0x00]
-#CHECK: lcgfr	%r0, %r15               # encoding: [0xb9,0x13,0x00,0x0f]
-#CHECK: lcgfr	%r15, %r0               # encoding: [0xb9,0x13,0x00,0xf0]
-#CHECK: lcgfr	%r7, %r8                # encoding: [0xb9,0x13,0x00,0x78]
-
-	lcgfr	%r0,%r0
-	lcgfr	%r0,%r15
-	lcgfr	%r15,%r0
-	lcgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-lcgr-01.s b/test/MC/SystemZ/insn-lcgr-01.s
deleted file mode 100644
index dc4e94f03eed..000000000000
--- a/test/MC/SystemZ/insn-lcgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lcgr	%r0, %r0                # encoding: [0xb9,0x03,0x00,0x00]
-#CHECK: lcgr	%r0, %r15               # encoding: [0xb9,0x03,0x00,0x0f]
-#CHECK: lcgr	%r15, %r0               # encoding: [0xb9,0x03,0x00,0xf0]
-#CHECK: lcgr	%r7, %r8                # encoding: [0xb9,0x03,0x00,0x78]
-
-	lcgr	%r0,%r0
-	lcgr	%r0,%r15
-	lcgr	%r15,%r0
-	lcgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-lcr-01.s b/test/MC/SystemZ/insn-lcr-01.s
deleted file mode 100644
index 52471cbbdc54..000000000000
--- a/test/MC/SystemZ/insn-lcr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lcr	%r0, %r0                # encoding: [0x13,0x00]
-#CHECK: lcr	%r0, %r15               # encoding: [0x13,0x0f]
-#CHECK: lcr	%r15, %r0               # encoding: [0x13,0xf0]
-#CHECK: lcr	%r7, %r8                # encoding: [0x13,0x78]
-
-	lcr	%r0,%r0
-	lcr	%r0,%r15
-	lcr	%r15,%r0
-	lcr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-lcxbr-01.s b/test/MC/SystemZ/insn-lcxbr-01.s
deleted file mode 100644
index 48c0b8a77213..000000000000
--- a/test/MC/SystemZ/insn-lcxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lcxbr	%f0, %f8                # encoding: [0xb3,0x43,0x00,0x08]
-#CHECK: lcxbr	%f0, %f13               # encoding: [0xb3,0x43,0x00,0x0d]
-#CHECK: lcxbr	%f13, %f0               # encoding: [0xb3,0x43,0x00,0xd0]
-#CHECK: lcxbr	%f13, %f9               # encoding: [0xb3,0x43,0x00,0xd9]
-
-	lcxbr	%f0,%f8
-	lcxbr	%f0,%f13
-	lcxbr	%f13,%f0
-	lcxbr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lcxbr-02.s b/test/MC/SystemZ/insn-lcxbr-02.s
deleted file mode 100644
index ab3d6bfe9cd8..000000000000
--- a/test/MC/SystemZ/insn-lcxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lcxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: lcxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: lcxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: lcxbr	%f14, %f0
-
-	lcxbr	%f0, %f2
-	lcxbr	%f0, %f14
-	lcxbr	%f2, %f0
-	lcxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-ld-01.s b/test/MC/SystemZ/insn-ld-01.s
deleted file mode 100644
index 653eab5e0a57..000000000000
--- a/test/MC/SystemZ/insn-ld-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ld	%f0, 0                  # encoding: [0x68,0x00,0x00,0x00]
-#CHECK: ld	%f0, 4095               # encoding: [0x68,0x00,0x0f,0xff]
-#CHECK: ld	%f0, 0(%r1)             # encoding: [0x68,0x00,0x10,0x00]
-#CHECK: ld	%f0, 0(%r15)            # encoding: [0x68,0x00,0xf0,0x00]
-#CHECK: ld	%f0, 4095(%r1,%r15)     # encoding: [0x68,0x01,0xff,0xff]
-#CHECK: ld	%f0, 4095(%r15,%r1)     # encoding: [0x68,0x0f,0x1f,0xff]
-#CHECK: ld	%f15, 0                 # encoding: [0x68,0xf0,0x00,0x00]
-
-	ld	%f0, 0
-	ld	%f0, 4095
-	ld	%f0, 0(%r1)
-	ld	%f0, 0(%r15)
-	ld	%f0, 4095(%r1,%r15)
-	ld	%f0, 4095(%r15,%r1)
-	ld	%f15, 0
diff --git a/test/MC/SystemZ/insn-ld-02.s b/test/MC/SystemZ/insn-ld-02.s
deleted file mode 100644
index 5d786b555705..000000000000
--- a/test/MC/SystemZ/insn-ld-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ld	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: ld	%f0, 4096
-
-	ld	%f0, -1
-	ld	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ldeb-01.s b/test/MC/SystemZ/insn-ldeb-01.s
deleted file mode 100644
index a06344d27d97..000000000000
--- a/test/MC/SystemZ/insn-ldeb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ldeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x04]
-#CHECK: ldeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x04]
-#CHECK: ldeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x04]
-#CHECK: ldeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x04]
-#CHECK: ldeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x04]
-#CHECK: ldeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x04]
-#CHECK: ldeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x04]
-
-	ldeb	%f0, 0
-	ldeb	%f0, 4095
-	ldeb	%f0, 0(%r1)
-	ldeb	%f0, 0(%r15)
-	ldeb	%f0, 4095(%r1,%r15)
-	ldeb	%f0, 4095(%r15,%r1)
-	ldeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-ldeb-02.s b/test/MC/SystemZ/insn-ldeb-02.s
deleted file mode 100644
index 6df5e7b46d05..000000000000
--- a/test/MC/SystemZ/insn-ldeb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ldeb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: ldeb	%f0, 4096
-
-	ldeb	%f0, -1
-	ldeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ldebr-01.s b/test/MC/SystemZ/insn-ldebr-01.s
deleted file mode 100644
index 2df932c6686a..000000000000
--- a/test/MC/SystemZ/insn-ldebr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ldebr	%f0, %f15               # encoding: [0xb3,0x04,0x00,0x0f]
-#CHECK: ldebr	%f7, %f8                # encoding: [0xb3,0x04,0x00,0x78]
-#CHECK: ldebr	%f15, %f0               # encoding: [0xb3,0x04,0x00,0xf0]
-
-	ldebr	%f0, %f15
-	ldebr	%f7, %f8
-	ldebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-ldgr-01.s b/test/MC/SystemZ/insn-ldgr-01.s
deleted file mode 100644
index 61a4529bee7c..000000000000
--- a/test/MC/SystemZ/insn-ldgr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ldgr	%f0, %r0                # encoding: [0xb3,0xc1,0x00,0x00]
-#CHECK: ldgr	%f0, %r15               # encoding: [0xb3,0xc1,0x00,0x0f]
-#CHECK: ldgr	%f15, %r0               # encoding: [0xb3,0xc1,0x00,0xf0]
-#CHECK: ldgr	%f7, %r9                # encoding: [0xb3,0xc1,0x00,0x79]
-#CHECK: ldgr	%f15, %r15              # encoding: [0xb3,0xc1,0x00,0xff]
-
-	ldgr	%f0,%r0
-	ldgr	%f0,%r15
-	ldgr	%f15,%r0
-	ldgr	%f7,%r9
-	ldgr	%f15,%r15
diff --git a/test/MC/SystemZ/insn-ldgr-02.s b/test/MC/SystemZ/insn-ldgr-02.s
deleted file mode 100644
index 900174ab1f14..000000000000
--- a/test/MC/SystemZ/insn-ldgr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: ldgr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: ldgr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: ldgr	%f0, %a0
-#CHECK: error: invalid register
-#CHECK: ldgr	%a0, %r0
-
-	ldgr	%f0, %f0
-	ldgr	%r0, %r0
-	ldgr	%f0, %a0
-	ldgr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-ldr-01.s b/test/MC/SystemZ/insn-ldr-01.s
deleted file mode 100644
index 895ed3439fce..000000000000
--- a/test/MC/SystemZ/insn-ldr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ldr	%f0, %f9                # encoding: [0x28,0x09]
-#CHECK: ldr	%f0, %f15               # encoding: [0x28,0x0f]
-#CHECK: ldr	%f15, %f0               # encoding: [0x28,0xf0]
-#CHECK: ldr	%f15, %f9               # encoding: [0x28,0xf9]
-
-	ldr	%f0,%f9
-	ldr	%f0,%f15
-	ldr	%f15,%f0
-	ldr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-ldxbr-01.s b/test/MC/SystemZ/insn-ldxbr-01.s
deleted file mode 100644
index 49e1d2adfb15..000000000000
--- a/test/MC/SystemZ/insn-ldxbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ldxbr	%f0, %f0                # encoding: [0xb3,0x45,0x00,0x00]
-#CHECK: ldxbr	%f0, %f13               # encoding: [0xb3,0x45,0x00,0x0d]
-#CHECK: ldxbr	%f8, %f12               # encoding: [0xb3,0x45,0x00,0x8c]
-#CHECK: ldxbr	%f13, %f0               # encoding: [0xb3,0x45,0x00,0xd0]
-#CHECK: ldxbr	%f13, %f13              # encoding: [0xb3,0x45,0x00,0xdd]
-
-	ldxbr	%f0, %f0
-	ldxbr	%f0, %f13
-	ldxbr	%f8, %f12
-	ldxbr	%f13, %f0
-	ldxbr	%f13, %f13
diff --git a/test/MC/SystemZ/insn-ldxbr-02.s b/test/MC/SystemZ/insn-ldxbr-02.s
deleted file mode 100644
index 89914dff9d47..000000000000
--- a/test/MC/SystemZ/insn-ldxbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: ldxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: ldxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: ldxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: ldxbr	%f14, %f0
-
-	ldxbr	%f0, %f2
-	ldxbr	%f0, %f14
-	ldxbr	%f2, %f0
-	ldxbr	%f14, %f0
diff --git a/test/MC/SystemZ/insn-ldy-01.s b/test/MC/SystemZ/insn-ldy-01.s
deleted file mode 100644
index 5c2d145af124..000000000000
--- a/test/MC/SystemZ/insn-ldy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ldy	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x65]
-#CHECK: ldy	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x65]
-#CHECK: ldy	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x65]
-#CHECK: ldy	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x65]
-#CHECK: ldy	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x65]
-#CHECK: ldy	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x65]
-#CHECK: ldy	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x65]
-#CHECK: ldy	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x65]
-#CHECK: ldy	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x65]
-#CHECK: ldy	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x65]
-
-	ldy	%f0, -524288
-	ldy	%f0, -1
-	ldy	%f0, 0
-	ldy	%f0, 1
-	ldy	%f0, 524287
-	ldy	%f0, 0(%r1)
-	ldy	%f0, 0(%r15)
-	ldy	%f0, 524287(%r1,%r15)
-	ldy	%f0, 524287(%r15,%r1)
-	ldy	%f15, 0
diff --git a/test/MC/SystemZ/insn-ldy-02.s b/test/MC/SystemZ/insn-ldy-02.s
deleted file mode 100644
index b16e0149ee76..000000000000
--- a/test/MC/SystemZ/insn-ldy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ldy	%f0, -524289
-#CHECK: error: invalid operand
-#CHECK: ldy	%f0, 524288
-
-	ldy	%f0, -524289
-	ldy	%f0, 524288
diff --git a/test/MC/SystemZ/insn-le-01.s b/test/MC/SystemZ/insn-le-01.s
deleted file mode 100644
index 15bbce26a519..000000000000
--- a/test/MC/SystemZ/insn-le-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: le	%f0, 0                  # encoding: [0x78,0x00,0x00,0x00]
-#CHECK: le	%f0, 4095               # encoding: [0x78,0x00,0x0f,0xff]
-#CHECK: le	%f0, 0(%r1)             # encoding: [0x78,0x00,0x10,0x00]
-#CHECK: le	%f0, 0(%r15)            # encoding: [0x78,0x00,0xf0,0x00]
-#CHECK: le	%f0, 4095(%r1,%r15)     # encoding: [0x78,0x01,0xff,0xff]
-#CHECK: le	%f0, 4095(%r15,%r1)     # encoding: [0x78,0x0f,0x1f,0xff]
-#CHECK: le	%f15, 0                 # encoding: [0x78,0xf0,0x00,0x00]
-
-	le	%f0, 0
-	le	%f0, 4095
-	le	%f0, 0(%r1)
-	le	%f0, 0(%r15)
-	le	%f0, 4095(%r1,%r15)
-	le	%f0, 4095(%r15,%r1)
-	le	%f15, 0
diff --git a/test/MC/SystemZ/insn-le-02.s b/test/MC/SystemZ/insn-le-02.s
deleted file mode 100644
index f784ea1c9012..000000000000
--- a/test/MC/SystemZ/insn-le-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: le	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: le	%f0, 4096
-
-	le	%f0, -1
-	le	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ledbr-01.s b/test/MC/SystemZ/insn-ledbr-01.s
deleted file mode 100644
index 6582d6344f7e..000000000000
--- a/test/MC/SystemZ/insn-ledbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ledbr	%f0, %f0                # encoding: [0xb3,0x44,0x00,0x00]
-#CHECK: ledbr	%f0, %f15               # encoding: [0xb3,0x44,0x00,0x0f]
-#CHECK: ledbr	%f7, %f8                # encoding: [0xb3,0x44,0x00,0x78]
-#CHECK: ledbr	%f15, %f0               # encoding: [0xb3,0x44,0x00,0xf0]
-#CHECK: ledbr	%f15, %f15              # encoding: [0xb3,0x44,0x00,0xff]
-
-	ledbr	%f0, %f0
-	ledbr	%f0, %f15
-	ledbr	%f7, %f8
-	ledbr	%f15, %f0
-	ledbr	%f15, %f15
diff --git a/test/MC/SystemZ/insn-ler-01.s b/test/MC/SystemZ/insn-ler-01.s
deleted file mode 100644
index 775e5232186d..000000000000
--- a/test/MC/SystemZ/insn-ler-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ler	%f0, %f9                # encoding: [0x38,0x09]
-#CHECK: ler	%f0, %f15               # encoding: [0x38,0x0f]
-#CHECK: ler	%f15, %f0               # encoding: [0x38,0xf0]
-#CHECK: ler	%f15, %f9               # encoding: [0x38,0xf9]
-
-	ler	%f0,%f9
-	ler	%f0,%f15
-	ler	%f15,%f0
-	ler	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lexbr-01.s b/test/MC/SystemZ/insn-lexbr-01.s
deleted file mode 100644
index ce32103c66cb..000000000000
--- a/test/MC/SystemZ/insn-lexbr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lexbr	%f0, %f0                # encoding: [0xb3,0x46,0x00,0x00]
-#CHECK: lexbr	%f0, %f13               # encoding: [0xb3,0x46,0x00,0x0d]
-#CHECK: lexbr	%f8, %f12               # encoding: [0xb3,0x46,0x00,0x8c]
-#CHECK: lexbr	%f13, %f0               # encoding: [0xb3,0x46,0x00,0xd0]
-#CHECK: lexbr	%f13, %f13              # encoding: [0xb3,0x46,0x00,0xdd]
-
-	lexbr	%f0, %f0
-	lexbr	%f0, %f13
-	lexbr	%f8, %f12
-	lexbr	%f13, %f0
-	lexbr	%f13, %f13
diff --git a/test/MC/SystemZ/insn-lexbr-02.s b/test/MC/SystemZ/insn-lexbr-02.s
deleted file mode 100644
index 8c9bb9ed600e..000000000000
--- a/test/MC/SystemZ/insn-lexbr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lexbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: lexbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: lexbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: lexbr	%f14, %f0
-
-	lexbr	%f0, %f2
-	lexbr	%f0, %f14
-	lexbr	%f2, %f0
-	lexbr	%f14, %f0
diff --git a/test/MC/SystemZ/insn-ley-01.s b/test/MC/SystemZ/insn-ley-01.s
deleted file mode 100644
index b854dc148c77..000000000000
--- a/test/MC/SystemZ/insn-ley-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ley	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x64]
-#CHECK: ley	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x64]
-#CHECK: ley	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x64]
-#CHECK: ley	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x64]
-#CHECK: ley	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x64]
-#CHECK: ley	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x64]
-#CHECK: ley	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x64]
-#CHECK: ley	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x64]
-#CHECK: ley	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x64]
-#CHECK: ley	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x64]
-
-	ley	%f0, -524288
-	ley	%f0, -1
-	ley	%f0, 0
-	ley	%f0, 1
-	ley	%f0, 524287
-	ley	%f0, 0(%r1)
-	ley	%f0, 0(%r15)
-	ley	%f0, 524287(%r1,%r15)
-	ley	%f0, 524287(%r15,%r1)
-	ley	%f15, 0
diff --git a/test/MC/SystemZ/insn-ley-02.s b/test/MC/SystemZ/insn-ley-02.s
deleted file mode 100644
index 98bbd94dc176..000000000000
--- a/test/MC/SystemZ/insn-ley-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ley	%f0, -524289
-#CHECK: error: invalid operand
-#CHECK: ley	%f0, 524288
-
-	ley	%f0, -524289
-	ley	%f0, 524288
diff --git a/test/MC/SystemZ/insn-lg-01.s b/test/MC/SystemZ/insn-lg-01.s
deleted file mode 100644
index 10a95cc8958a..000000000000
--- a/test/MC/SystemZ/insn-lg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x04]
-#CHECK: lg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x04]
-#CHECK: lg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x04]
-#CHECK: lg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x04]
-#CHECK: lg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x04]
-#CHECK: lg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x04]
-#CHECK: lg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x04]
-#CHECK: lg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x04]
-#CHECK: lg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x04]
-#CHECK: lg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x04]
-
-	lg	%r0, -524288
-	lg	%r0, -1
-	lg	%r0, 0
-	lg	%r0, 1
-	lg	%r0, 524287
-	lg	%r0, 0(%r1)
-	lg	%r0, 0(%r15)
-	lg	%r0, 524287(%r1,%r15)
-	lg	%r0, 524287(%r15,%r1)
-	lg	%r15, 0
diff --git a/test/MC/SystemZ/insn-lg-02.s b/test/MC/SystemZ/insn-lg-02.s
deleted file mode 100644
index 85b29fff5533..000000000000
--- a/test/MC/SystemZ/insn-lg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lg	%r0, 524288
-
-	lg	%r0, -524289
-	lg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lgb-01.s b/test/MC/SystemZ/insn-lgb-01.s
deleted file mode 100644
index 82b92f167272..000000000000
--- a/test/MC/SystemZ/insn-lgb-01.s
+++ /dev/null
@@ -1,24 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x77]
-#CHECK: lgb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x77]
-#CHECK: lgb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x77]
-#CHECK: lgb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x77]
-#CHECK: lgb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x77]
-#CHECK: lgb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x77]
-#CHECK: lgb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x77]
-#CHECK: lgb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x77]
-#CHECK: lgb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x77]
-#CHECK: lgb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x77]
-
-	lgb	%r0, -524288
-	lgb	%r0, -1
-	lgb	%r0, 0
-	lgb	%r0, 1
-	lgb	%r0, 524287
-	lgb	%r0, 0(%r1)
-	lgb	%r0, 0(%r15)
-	lgb	%r0, 524287(%r1,%r15)
-	lgb	%r0, 524287(%r15,%r1)
-	lgb	%r15, 0
-
diff --git a/test/MC/SystemZ/insn-lgb-02.s b/test/MC/SystemZ/insn-lgb-02.s
deleted file mode 100644
index 7acc1762b51b..000000000000
--- a/test/MC/SystemZ/insn-lgb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lgb	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lgb	%r0, 524288
-
-	lgb	%r0, -524289
-	lgb	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lgbr-01.s b/test/MC/SystemZ/insn-lgbr-01.s
deleted file mode 100644
index ec2e6220091d..000000000000
--- a/test/MC/SystemZ/insn-lgbr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgbr	%r0, %r15               # encoding: [0xb9,0x06,0x00,0x0f]
-#CHECK: lgbr	%r7, %r8                # encoding: [0xb9,0x06,0x00,0x78]
-#CHECK: lgbr	%r15, %r0               # encoding: [0xb9,0x06,0x00,0xf0]
-
-	lgbr	%r0, %r15
-	lgbr	%r7, %r8
-	lgbr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lgdr-01.s b/test/MC/SystemZ/insn-lgdr-01.s
deleted file mode 100644
index 56d1e035b425..000000000000
--- a/test/MC/SystemZ/insn-lgdr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgdr	%r0, %f0                # encoding: [0xb3,0xcd,0x00,0x00]
-#CHECK: lgdr	%r0, %f15               # encoding: [0xb3,0xcd,0x00,0x0f]
-#CHECK: lgdr	%r15, %f0               # encoding: [0xb3,0xcd,0x00,0xf0]
-#CHECK: lgdr	%r8, %f8                # encoding: [0xb3,0xcd,0x00,0x88]
-#CHECK: lgdr	%r15, %f15              # encoding: [0xb3,0xcd,0x00,0xff]
-
-	lgdr	%r0,%f0
-	lgdr	%r0,%f15
-	lgdr	%r15,%f0
-	lgdr	%r8,%f8
-	lgdr	%r15,%f15
diff --git a/test/MC/SystemZ/insn-lgdr-02.s b/test/MC/SystemZ/insn-lgdr-02.s
deleted file mode 100644
index 3bf014d90ec1..000000000000
--- a/test/MC/SystemZ/insn-lgdr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lgdr	%f0, %f0
-#CHECK: error: invalid register
-#CHECK: lgdr	%r0, %r0
-#CHECK: error: invalid register
-#CHECK: lgdr	%r0, %a0
-#CHECK: error: invalid register
-#CHECK: lgdr	%a0, %f0
-
-	lgdr	%f0, %f0
-	lgdr	%r0, %r0
-	lgdr	%r0, %a0
-	lgdr	%a0, %f0
diff --git a/test/MC/SystemZ/insn-lgf-01.s b/test/MC/SystemZ/insn-lgf-01.s
deleted file mode 100644
index 9ed917228a8a..000000000000
--- a/test/MC/SystemZ/insn-lgf-01.s
+++ /dev/null
@@ -1,24 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x14]
-#CHECK: lgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x14]
-#CHECK: lgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x14]
-#CHECK: lgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x14]
-#CHECK: lgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x14]
-#CHECK: lgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x14]
-#CHECK: lgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x14]
-#CHECK: lgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x14]
-#CHECK: lgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x14]
-#CHECK: lgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x14]
-
-	lgf	%r0, -524288
-	lgf	%r0, -1
-	lgf	%r0, 0
-	lgf	%r0, 1
-	lgf	%r0, 524287
-	lgf	%r0, 0(%r1)
-	lgf	%r0, 0(%r15)
-	lgf	%r0, 524287(%r1,%r15)
-	lgf	%r0, 524287(%r15,%r1)
-	lgf	%r15, 0
-
diff --git a/test/MC/SystemZ/insn-lgf-02.s b/test/MC/SystemZ/insn-lgf-02.s
deleted file mode 100644
index 32095a840936..000000000000
--- a/test/MC/SystemZ/insn-lgf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lgf	%r0, 524288
-
-	lgf	%r0, -524289
-	lgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lgfi-01.s b/test/MC/SystemZ/insn-lgfi-01.s
deleted file mode 100644
index a5ca7a59b5d2..000000000000
--- a/test/MC/SystemZ/insn-lgfi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgfi	%r0, -2147483648        # encoding: [0xc0,0x01,0x80,0x00,0x00,0x00]
-#CHECK: lgfi	%r0, -1                 # encoding: [0xc0,0x01,0xff,0xff,0xff,0xff]
-#CHECK: lgfi	%r0, 0                  # encoding: [0xc0,0x01,0x00,0x00,0x00,0x00]
-#CHECK: lgfi	%r0, 1                  # encoding: [0xc0,0x01,0x00,0x00,0x00,0x01]
-#CHECK: lgfi	%r0, 2147483647         # encoding: [0xc0,0x01,0x7f,0xff,0xff,0xff]
-#CHECK: lgfi	%r15, 0                 # encoding: [0xc0,0xf1,0x00,0x00,0x00,0x00]
-
-	lgfi	%r0, -1 << 31
-	lgfi	%r0, -1
-	lgfi	%r0, 0
-	lgfi	%r0, 1
-	lgfi	%r0, (1 << 31) - 1
-	lgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-lgfi-02.s b/test/MC/SystemZ/insn-lgfi-02.s
deleted file mode 100644
index a45cfeb46d05..000000000000
--- a/test/MC/SystemZ/insn-lgfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lgfi	%r0, (-1 << 31) - 1
-#CHECK: error: invalid operand
-#CHECK: lgfi	%r0, (1 << 31)
-
-	lgfi	%r0, (-1 << 31) - 1
-	lgfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-lgfr-01.s b/test/MC/SystemZ/insn-lgfr-01.s
deleted file mode 100644
index bc375a67b000..000000000000
--- a/test/MC/SystemZ/insn-lgfr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgfr	%r0, %r15               # encoding: [0xb9,0x14,0x00,0x0f]
-#CHECK: lgfr	%r7, %r8                # encoding: [0xb9,0x14,0x00,0x78]
-#CHECK: lgfr	%r15, %r0               # encoding: [0xb9,0x14,0x00,0xf0]
-
-	lgfr	%r0, %r15
-	lgfr	%r7, %r8
-	lgfr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lgfrl-01.s b/test/MC/SystemZ/insn-lgfrl-01.s
deleted file mode 100644
index 85c9ea764c8d..000000000000
--- a/test/MC/SystemZ/insn-lgfrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgfrl	%r0, 2864434397         # encoding: [0xc4,0x0c,0x55,0x5d,0xe6,0x6e]
-#CHECK: lgfrl	%r15, 2864434397        # encoding: [0xc4,0xfc,0x55,0x5d,0xe6,0x6e]
-
-	lgfrl	%r0,0xaabbccdd
-	lgfrl	%r15,0xaabbccdd
-
-#CHECK: lgfrl	%r0, foo                # encoding: [0xc4,0x0c,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: lgfrl	%r15, foo               # encoding: [0xc4,0xfc,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	lgfrl	%r0,foo
-	lgfrl	%r15,foo
-
-#CHECK: lgfrl	%r3, bar+100            # encoding: [0xc4,0x3c,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: lgfrl	%r4, bar+100            # encoding: [0xc4,0x4c,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	lgfrl	%r3,bar+100
-	lgfrl	%r4,bar+100
-
-#CHECK: lgfrl	%r7, frob@PLT           # encoding: [0xc4,0x7c,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: lgfrl	%r8, frob@PLT           # encoding: [0xc4,0x8c,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	lgfrl	%r7,frob@PLT
-	lgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lgh-01.s b/test/MC/SystemZ/insn-lgh-01.s
deleted file mode 100644
index 9dae6212b2c2..000000000000
--- a/test/MC/SystemZ/insn-lgh-01.s
+++ /dev/null
@@ -1,24 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x15]
-#CHECK: lgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x15]
-#CHECK: lgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x15]
-#CHECK: lgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x15]
-#CHECK: lgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x15]
-#CHECK: lgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x15]
-#CHECK: lgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x15]
-#CHECK: lgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x15]
-#CHECK: lgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x15]
-#CHECK: lgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x15]
-
-	lgh	%r0, -524288
-	lgh	%r0, -1
-	lgh	%r0, 0
-	lgh	%r0, 1
-	lgh	%r0, 524287
-	lgh	%r0, 0(%r1)
-	lgh	%r0, 0(%r15)
-	lgh	%r0, 524287(%r1,%r15)
-	lgh	%r0, 524287(%r15,%r1)
-	lgh	%r15, 0
-
diff --git a/test/MC/SystemZ/insn-lgh-02.s b/test/MC/SystemZ/insn-lgh-02.s
deleted file mode 100644
index 62b7341d8276..000000000000
--- a/test/MC/SystemZ/insn-lgh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lgh	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lgh	%r0, 524288
-
-	lgh	%r0, -524289
-	lgh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lghi-01.s b/test/MC/SystemZ/insn-lghi-01.s
deleted file mode 100644
index d9d12ae1bb39..000000000000
--- a/test/MC/SystemZ/insn-lghi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lghi	%r0, -32768             # encoding: [0xa7,0x09,0x80,0x00]
-#CHECK: lghi	%r0, -1                 # encoding: [0xa7,0x09,0xff,0xff]
-#CHECK: lghi	%r0, 0                  # encoding: [0xa7,0x09,0x00,0x00]
-#CHECK: lghi	%r0, 1                  # encoding: [0xa7,0x09,0x00,0x01]
-#CHECK: lghi	%r0, 32767              # encoding: [0xa7,0x09,0x7f,0xff]
-#CHECK: lghi	%r15, 0                 # encoding: [0xa7,0xf9,0x00,0x00]
-
-	lghi	%r0, -32768
-	lghi	%r0, -1
-	lghi	%r0, 0
-	lghi	%r0, 1
-	lghi	%r0, 32767
-	lghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-lghi-02.s b/test/MC/SystemZ/insn-lghi-02.s
deleted file mode 100644
index b1af7a0c54ad..000000000000
--- a/test/MC/SystemZ/insn-lghi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lghi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: lghi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: lghi	%r0, foo
-
-	lghi	%r0, -32769
-	lghi	%r0, 32768
-	lghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-lghr-01.s b/test/MC/SystemZ/insn-lghr-01.s
deleted file mode 100644
index a1dc842339b4..000000000000
--- a/test/MC/SystemZ/insn-lghr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lghr	%r0, %r15               # encoding: [0xb9,0x07,0x00,0x0f]
-#CHECK: lghr	%r7, %r8                # encoding: [0xb9,0x07,0x00,0x78]
-#CHECK: lghr	%r15, %r0               # encoding: [0xb9,0x07,0x00,0xf0]
-
-	lghr	%r0, %r15
-	lghr	%r7, %r8
-	lghr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lghrl-01.s b/test/MC/SystemZ/insn-lghrl-01.s
deleted file mode 100644
index 34992e6ff26a..000000000000
--- a/test/MC/SystemZ/insn-lghrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lghrl	%r0, 2864434397         # encoding: [0xc4,0x04,0x55,0x5d,0xe6,0x6e]
-#CHECK: lghrl	%r15, 2864434397        # encoding: [0xc4,0xf4,0x55,0x5d,0xe6,0x6e]
-
-	lghrl	%r0,0xaabbccdd
-	lghrl	%r15,0xaabbccdd
-
-#CHECK: lghrl	%r0, foo                # encoding: [0xc4,0x04,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: lghrl	%r15, foo               # encoding: [0xc4,0xf4,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	lghrl	%r0,foo
-	lghrl	%r15,foo
-
-#CHECK: lghrl	%r3, bar+100            # encoding: [0xc4,0x34,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: lghrl	%r4, bar+100            # encoding: [0xc4,0x44,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	lghrl	%r3,bar+100
-	lghrl	%r4,bar+100
-
-#CHECK: lghrl	%r7, frob@PLT           # encoding: [0xc4,0x74,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: lghrl	%r8, frob@PLT           # encoding: [0xc4,0x84,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	lghrl	%r7,frob@PLT
-	lghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lgr-01.s b/test/MC/SystemZ/insn-lgr-01.s
deleted file mode 100644
index e50295682bd4..000000000000
--- a/test/MC/SystemZ/insn-lgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgr	%r0, %r9                # encoding: [0xb9,0x04,0x00,0x09]
-#CHECK: lgr	%r0, %r15               # encoding: [0xb9,0x04,0x00,0x0f]
-#CHECK: lgr	%r15, %r0               # encoding: [0xb9,0x04,0x00,0xf0]
-#CHECK: lgr	%r15, %r9               # encoding: [0xb9,0x04,0x00,0xf9]
-
-	lgr	%r0,%r9
-	lgr	%r0,%r15
-	lgr	%r15,%r0
-	lgr	%r15,%r9
diff --git a/test/MC/SystemZ/insn-lgrl-01.s b/test/MC/SystemZ/insn-lgrl-01.s
deleted file mode 100644
index 7a18908f9ab7..000000000000
--- a/test/MC/SystemZ/insn-lgrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgrl	%r0, 2864434397         # encoding: [0xc4,0x08,0x55,0x5d,0xe6,0x6e]
-#CHECK: lgrl	%r15, 2864434397        # encoding: [0xc4,0xf8,0x55,0x5d,0xe6,0x6e]
-
-	lgrl	%r0,0xaabbccdd
-	lgrl	%r15,0xaabbccdd
-
-#CHECK: lgrl	%r0, foo                # encoding: [0xc4,0x08,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: lgrl	%r15, foo               # encoding: [0xc4,0xf8,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	lgrl	%r0,foo
-	lgrl	%r15,foo
-
-#CHECK: lgrl	%r3, bar+100            # encoding: [0xc4,0x38,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: lgrl	%r4, bar+100            # encoding: [0xc4,0x48,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	lgrl	%r3,bar+100
-	lgrl	%r4,bar+100
-
-#CHECK: lgrl	%r7, frob@PLT           # encoding: [0xc4,0x78,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: lgrl	%r8, frob@PLT           # encoding: [0xc4,0x88,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	lgrl	%r7,frob@PLT
-	lgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lh-01.s b/test/MC/SystemZ/insn-lh-01.s
deleted file mode 100644
index 07be01275693..000000000000
--- a/test/MC/SystemZ/insn-lh-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lh	%r0, 0                  # encoding: [0x48,0x00,0x00,0x00]
-#CHECK: lh	%r0, 4095               # encoding: [0x48,0x00,0x0f,0xff]
-#CHECK: lh	%r0, 0(%r1)             # encoding: [0x48,0x00,0x10,0x00]
-#CHECK: lh	%r0, 0(%r15)            # encoding: [0x48,0x00,0xf0,0x00]
-#CHECK: lh	%r0, 4095(%r1,%r15)     # encoding: [0x48,0x01,0xff,0xff]
-#CHECK: lh	%r0, 4095(%r15,%r1)     # encoding: [0x48,0x0f,0x1f,0xff]
-#CHECK: lh	%r15, 0                 # encoding: [0x48,0xf0,0x00,0x00]
-
-	lh	%r0, 0
-	lh	%r0, 4095
-	lh	%r0, 0(%r1)
-	lh	%r0, 0(%r15)
-	lh	%r0, 4095(%r1,%r15)
-	lh	%r0, 4095(%r15,%r1)
-	lh	%r15, 0
diff --git a/test/MC/SystemZ/insn-lh-02.s b/test/MC/SystemZ/insn-lh-02.s
deleted file mode 100644
index 80566d8d37ef..000000000000
--- a/test/MC/SystemZ/insn-lh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: lh	%r0, 4096
-
-	lh	%r0, -1
-	lh	%r0, 4096
diff --git a/test/MC/SystemZ/insn-lhi-01.s b/test/MC/SystemZ/insn-lhi-01.s
deleted file mode 100644
index 43b7df08f016..000000000000
--- a/test/MC/SystemZ/insn-lhi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lhi	%r0, -32768             # encoding: [0xa7,0x08,0x80,0x00]
-#CHECK: lhi	%r0, -1                 # encoding: [0xa7,0x08,0xff,0xff]
-#CHECK: lhi	%r0, 0                  # encoding: [0xa7,0x08,0x00,0x00]
-#CHECK: lhi	%r0, 1                  # encoding: [0xa7,0x08,0x00,0x01]
-#CHECK: lhi	%r0, 32767              # encoding: [0xa7,0x08,0x7f,0xff]
-#CHECK: lhi	%r15, 0                 # encoding: [0xa7,0xf8,0x00,0x00]
-
-	lhi	%r0, -32768
-	lhi	%r0, -1
-	lhi	%r0, 0
-	lhi	%r0, 1
-	lhi	%r0, 32767
-	lhi	%r15, 0
diff --git a/test/MC/SystemZ/insn-lhi-02.s b/test/MC/SystemZ/insn-lhi-02.s
deleted file mode 100644
index 8e38464b828f..000000000000
--- a/test/MC/SystemZ/insn-lhi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lhi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: lhi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: lhi	%r0, foo
-
-	lhi	%r0, -32769
-	lhi	%r0, 32768
-	lhi	%r0, foo
diff --git a/test/MC/SystemZ/insn-lhr-01.s b/test/MC/SystemZ/insn-lhr-01.s
deleted file mode 100644
index a31cbc65f44d..000000000000
--- a/test/MC/SystemZ/insn-lhr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lhr	%r0, %r15               # encoding: [0xb9,0x27,0x00,0x0f]
-#CHECK: lhr	%r7, %r8                # encoding: [0xb9,0x27,0x00,0x78]
-#CHECK: lhr	%r15, %r0               # encoding: [0xb9,0x27,0x00,0xf0]
-
-	lhr	%r0, %r15
-	lhr	%r7, %r8
-	lhr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lhrl-01.s b/test/MC/SystemZ/insn-lhrl-01.s
deleted file mode 100644
index 87925fe09870..000000000000
--- a/test/MC/SystemZ/insn-lhrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lhrl	%r0, 2864434397         # encoding: [0xc4,0x05,0x55,0x5d,0xe6,0x6e]
-#CHECK: lhrl	%r15, 2864434397        # encoding: [0xc4,0xf5,0x55,0x5d,0xe6,0x6e]
-
-	lhrl	%r0,0xaabbccdd
-	lhrl	%r15,0xaabbccdd
-
-#CHECK: lhrl	%r0, foo                # encoding: [0xc4,0x05,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: lhrl	%r15, foo               # encoding: [0xc4,0xf5,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	lhrl	%r0,foo
-	lhrl	%r15,foo
-
-#CHECK: lhrl	%r3, bar+100            # encoding: [0xc4,0x35,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: lhrl	%r4, bar+100            # encoding: [0xc4,0x45,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	lhrl	%r3,bar+100
-	lhrl	%r4,bar+100
-
-#CHECK: lhrl	%r7, frob@PLT           # encoding: [0xc4,0x75,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: lhrl	%r8, frob@PLT           # encoding: [0xc4,0x85,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	lhrl	%r7,frob@PLT
-	lhrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lhy-01.s b/test/MC/SystemZ/insn-lhy-01.s
deleted file mode 100644
index db811a41a4a5..000000000000
--- a/test/MC/SystemZ/insn-lhy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lhy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x78]
-#CHECK: lhy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x78]
-#CHECK: lhy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x78]
-#CHECK: lhy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x78]
-#CHECK: lhy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x78]
-#CHECK: lhy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x78]
-#CHECK: lhy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x78]
-#CHECK: lhy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x78]
-#CHECK: lhy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x78]
-#CHECK: lhy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x78]
-
-	lhy	%r0, -524288
-	lhy	%r0, -1
-	lhy	%r0, 0
-	lhy	%r0, 1
-	lhy	%r0, 524287
-	lhy	%r0, 0(%r1)
-	lhy	%r0, 0(%r15)
-	lhy	%r0, 524287(%r1,%r15)
-	lhy	%r0, 524287(%r15,%r1)
-	lhy	%r15, 0
diff --git a/test/MC/SystemZ/insn-lhy-02.s b/test/MC/SystemZ/insn-lhy-02.s
deleted file mode 100644
index 6f1caa59faf5..000000000000
--- a/test/MC/SystemZ/insn-lhy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lhy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lhy	%r0, 524288
-
-	lhy	%r0, -524289
-	lhy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llc-01.s b/test/MC/SystemZ/insn-llc-01.s
deleted file mode 100644
index 74a819b2ee30..000000000000
--- a/test/MC/SystemZ/insn-llc-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x94]
-#CHECK: llc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x94]
-#CHECK: llc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x94]
-#CHECK: llc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x94]
-#CHECK: llc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x94]
-#CHECK: llc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x94]
-#CHECK: llc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x94]
-#CHECK: llc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x94]
-#CHECK: llc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x94]
-#CHECK: llc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x94]
-
-	llc	%r0, -524288
-	llc	%r0, -1
-	llc	%r0, 0
-	llc	%r0, 1
-	llc	%r0, 524287
-	llc	%r0, 0(%r1)
-	llc	%r0, 0(%r15)
-	llc	%r0, 524287(%r1,%r15)
-	llc	%r0, 524287(%r15,%r1)
-	llc	%r15, 0
diff --git a/test/MC/SystemZ/insn-llc-02.s b/test/MC/SystemZ/insn-llc-02.s
deleted file mode 100644
index 4a65f6c55e78..000000000000
--- a/test/MC/SystemZ/insn-llc-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llc	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: llc	%r0, 524288
-
-	llc	%r0, -524289
-	llc	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llcr-01.s b/test/MC/SystemZ/insn-llcr-01.s
deleted file mode 100644
index 72a695ceffa7..000000000000
--- a/test/MC/SystemZ/insn-llcr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llcr	%r0, %r15               # encoding: [0xb9,0x94,0x00,0x0f]
-#CHECK: llcr	%r7, %r8                # encoding: [0xb9,0x94,0x00,0x78]
-#CHECK: llcr	%r15, %r0               # encoding: [0xb9,0x94,0x00,0xf0]
-
-	llcr	%r0, %r15
-	llcr	%r7, %r8
-	llcr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llgc-01.s b/test/MC/SystemZ/insn-llgc-01.s
deleted file mode 100644
index 297c6d62db4f..000000000000
--- a/test/MC/SystemZ/insn-llgc-01.s
+++ /dev/null
@@ -1,24 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llgc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x90]
-#CHECK: llgc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x90]
-#CHECK: llgc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x90]
-#CHECK: llgc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x90]
-#CHECK: llgc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x90]
-#CHECK: llgc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x90]
-#CHECK: llgc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x90]
-#CHECK: llgc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x90]
-#CHECK: llgc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x90]
-#CHECK: llgc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x90]
-
-	llgc	%r0, -524288
-	llgc	%r0, -1
-	llgc	%r0, 0
-	llgc	%r0, 1
-	llgc	%r0, 524287
-	llgc	%r0, 0(%r1)
-	llgc	%r0, 0(%r15)
-	llgc	%r0, 524287(%r1,%r15)
-	llgc	%r0, 524287(%r15,%r1)
-	llgc	%r15, 0
-
diff --git a/test/MC/SystemZ/insn-llgc-02.s b/test/MC/SystemZ/insn-llgc-02.s
deleted file mode 100644
index 76fca0f2512f..000000000000
--- a/test/MC/SystemZ/insn-llgc-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llgc	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: llgc	%r0, 524288
-
-	llgc	%r0, -524289
-	llgc	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llgcr-01.s b/test/MC/SystemZ/insn-llgcr-01.s
deleted file mode 100644
index 5d653bfff909..000000000000
--- a/test/MC/SystemZ/insn-llgcr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llgcr	%r0, %r15               # encoding: [0xb9,0x84,0x00,0x0f]
-#CHECK: llgcr	%r7, %r8                # encoding: [0xb9,0x84,0x00,0x78]
-#CHECK: llgcr	%r15, %r0               # encoding: [0xb9,0x84,0x00,0xf0]
-
-	llgcr	%r0, %r15
-	llgcr	%r7, %r8
-	llgcr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llgf-01.s b/test/MC/SystemZ/insn-llgf-01.s
deleted file mode 100644
index 0394140dab82..000000000000
--- a/test/MC/SystemZ/insn-llgf-01.s
+++ /dev/null
@@ -1,24 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x16]
-#CHECK: llgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x16]
-#CHECK: llgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x16]
-#CHECK: llgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x16]
-#CHECK: llgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x16]
-#CHECK: llgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x16]
-#CHECK: llgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x16]
-#CHECK: llgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x16]
-#CHECK: llgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x16]
-#CHECK: llgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x16]
-
-	llgf	%r0, -524288
-	llgf	%r0, -1
-	llgf	%r0, 0
-	llgf	%r0, 1
-	llgf	%r0, 524287
-	llgf	%r0, 0(%r1)
-	llgf	%r0, 0(%r15)
-	llgf	%r0, 524287(%r1,%r15)
-	llgf	%r0, 524287(%r15,%r1)
-	llgf	%r15, 0
-
diff --git a/test/MC/SystemZ/insn-llgf-02.s b/test/MC/SystemZ/insn-llgf-02.s
deleted file mode 100644
index 0b2fab033aad..000000000000
--- a/test/MC/SystemZ/insn-llgf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: llgf	%r0, 524288
-
-	llgf	%r0, -524289
-	llgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llgfr-01.s b/test/MC/SystemZ/insn-llgfr-01.s
deleted file mode 100644
index 74f1074d150f..000000000000
--- a/test/MC/SystemZ/insn-llgfr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llgfr	%r0, %r15               # encoding: [0xb9,0x16,0x00,0x0f]
-#CHECK: llgfr	%r7, %r8                # encoding: [0xb9,0x16,0x00,0x78]
-#CHECK: llgfr	%r15, %r0               # encoding: [0xb9,0x16,0x00,0xf0]
-
-	llgfr	%r0, %r15
-	llgfr	%r7, %r8
-	llgfr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llgfrl-01.s b/test/MC/SystemZ/insn-llgfrl-01.s
deleted file mode 100644
index 85fc9f4b3c3f..000000000000
--- a/test/MC/SystemZ/insn-llgfrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llgfrl	%r0, 2864434397         # encoding: [0xc4,0x0e,0x55,0x5d,0xe6,0x6e]
-#CHECK: llgfrl	%r15, 2864434397        # encoding: [0xc4,0xfe,0x55,0x5d,0xe6,0x6e]
-
-	llgfrl	%r0,0xaabbccdd
-	llgfrl	%r15,0xaabbccdd
-
-#CHECK: llgfrl	%r0, foo                # encoding: [0xc4,0x0e,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: llgfrl	%r15, foo               # encoding: [0xc4,0xfe,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	llgfrl	%r0,foo
-	llgfrl	%r15,foo
-
-#CHECK: llgfrl	%r3, bar+100            # encoding: [0xc4,0x3e,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: llgfrl	%r4, bar+100            # encoding: [0xc4,0x4e,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	llgfrl	%r3,bar+100
-	llgfrl	%r4,bar+100
-
-#CHECK: llgfrl	%r7, frob@PLT           # encoding: [0xc4,0x7e,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: llgfrl	%r8, frob@PLT           # encoding: [0xc4,0x8e,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	llgfrl	%r7,frob@PLT
-	llgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-llgh-01.s b/test/MC/SystemZ/insn-llgh-01.s
deleted file mode 100644
index acbab004382c..000000000000
--- a/test/MC/SystemZ/insn-llgh-01.s
+++ /dev/null
@@ -1,24 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x91]
-#CHECK: llgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x91]
-#CHECK: llgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x91]
-#CHECK: llgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x91]
-#CHECK: llgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x91]
-#CHECK: llgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x91]
-#CHECK: llgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x91]
-#CHECK: llgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x91]
-#CHECK: llgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x91]
-#CHECK: llgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x91]
-
-	llgh	%r0, -524288
-	llgh	%r0, -1
-	llgh	%r0, 0
-	llgh	%r0, 1
-	llgh	%r0, 524287
-	llgh	%r0, 0(%r1)
-	llgh	%r0, 0(%r15)
-	llgh	%r0, 524287(%r1,%r15)
-	llgh	%r0, 524287(%r15,%r1)
-	llgh	%r15, 0
-
diff --git a/test/MC/SystemZ/insn-llgh-02.s b/test/MC/SystemZ/insn-llgh-02.s
deleted file mode 100644
index 95b6b12c91e6..000000000000
--- a/test/MC/SystemZ/insn-llgh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llgh	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: llgh	%r0, 524288
-
-	llgh	%r0, -524289
-	llgh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llghr-01.s b/test/MC/SystemZ/insn-llghr-01.s
deleted file mode 100644
index 3e2f6de2ae8f..000000000000
--- a/test/MC/SystemZ/insn-llghr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llghr	%r0, %r15               # encoding: [0xb9,0x85,0x00,0x0f]
-#CHECK: llghr	%r7, %r8                # encoding: [0xb9,0x85,0x00,0x78]
-#CHECK: llghr	%r15, %r0               # encoding: [0xb9,0x85,0x00,0xf0]
-
-	llghr	%r0, %r15
-	llghr	%r7, %r8
-	llghr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llghrl-01.s b/test/MC/SystemZ/insn-llghrl-01.s
deleted file mode 100644
index af3fa8b9d787..000000000000
--- a/test/MC/SystemZ/insn-llghrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llghrl	%r0, 2864434397         # encoding: [0xc4,0x06,0x55,0x5d,0xe6,0x6e]
-#CHECK: llghrl	%r15, 2864434397        # encoding: [0xc4,0xf6,0x55,0x5d,0xe6,0x6e]
-
-	llghrl	%r0,0xaabbccdd
-	llghrl	%r15,0xaabbccdd
-
-#CHECK: llghrl	%r0, foo                # encoding: [0xc4,0x06,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: llghrl	%r15, foo               # encoding: [0xc4,0xf6,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	llghrl	%r0,foo
-	llghrl	%r15,foo
-
-#CHECK: llghrl	%r3, bar+100            # encoding: [0xc4,0x36,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: llghrl	%r4, bar+100            # encoding: [0xc4,0x46,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	llghrl	%r3,bar+100
-	llghrl	%r4,bar+100
-
-#CHECK: llghrl	%r7, frob@PLT           # encoding: [0xc4,0x76,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: llghrl	%r8, frob@PLT           # encoding: [0xc4,0x86,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	llghrl	%r7,frob@PLT
-	llghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-llh-01.s b/test/MC/SystemZ/insn-llh-01.s
deleted file mode 100644
index 7e15f3f6d601..000000000000
--- a/test/MC/SystemZ/insn-llh-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x95]
-#CHECK: llh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x95]
-#CHECK: llh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x95]
-#CHECK: llh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x95]
-#CHECK: llh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x95]
-#CHECK: llh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x95]
-#CHECK: llh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x95]
-#CHECK: llh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x95]
-#CHECK: llh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x95]
-#CHECK: llh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x95]
-
-	llh	%r0, -524288
-	llh	%r0, -1
-	llh	%r0, 0
-	llh	%r0, 1
-	llh	%r0, 524287
-	llh	%r0, 0(%r1)
-	llh	%r0, 0(%r15)
-	llh	%r0, 524287(%r1,%r15)
-	llh	%r0, 524287(%r15,%r1)
-	llh	%r15, 0
diff --git a/test/MC/SystemZ/insn-llh-02.s b/test/MC/SystemZ/insn-llh-02.s
deleted file mode 100644
index 92c49bb59c29..000000000000
--- a/test/MC/SystemZ/insn-llh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llh	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: llh	%r0, 524288
-
-	llh	%r0, -524289
-	llh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llhr-01.s b/test/MC/SystemZ/insn-llhr-01.s
deleted file mode 100644
index bb1d3b5d32fb..000000000000
--- a/test/MC/SystemZ/insn-llhr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llhr	%r0, %r15               # encoding: [0xb9,0x95,0x00,0x0f]
-#CHECK: llhr	%r7, %r8                # encoding: [0xb9,0x95,0x00,0x78]
-#CHECK: llhr	%r15, %r0               # encoding: [0xb9,0x95,0x00,0xf0]
-
-	llhr	%r0, %r15
-	llhr	%r7, %r8
-	llhr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llhrl-01.s b/test/MC/SystemZ/insn-llhrl-01.s
deleted file mode 100644
index 30ed4f90565c..000000000000
--- a/test/MC/SystemZ/insn-llhrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llhrl	%r0, 2864434397         # encoding: [0xc4,0x02,0x55,0x5d,0xe6,0x6e]
-#CHECK: llhrl	%r15, 2864434397        # encoding: [0xc4,0xf2,0x55,0x5d,0xe6,0x6e]
-
-	llhrl	%r0,0xaabbccdd
-	llhrl	%r15,0xaabbccdd
-
-#CHECK: llhrl	%r0, foo                # encoding: [0xc4,0x02,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: llhrl	%r15, foo               # encoding: [0xc4,0xf2,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	llhrl	%r0,foo
-	llhrl	%r15,foo
-
-#CHECK: llhrl	%r3, bar+100            # encoding: [0xc4,0x32,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: llhrl	%r4, bar+100            # encoding: [0xc4,0x42,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	llhrl	%r3,bar+100
-	llhrl	%r4,bar+100
-
-#CHECK: llhrl	%r7, frob@PLT           # encoding: [0xc4,0x72,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: llhrl	%r8, frob@PLT           # encoding: [0xc4,0x82,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	llhrl	%r7,frob@PLT
-	llhrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-llihf-01.s b/test/MC/SystemZ/insn-llihf-01.s
deleted file mode 100644
index 6ddd29ffa370..000000000000
--- a/test/MC/SystemZ/insn-llihf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llihf	%r0, 0                  # encoding: [0xc0,0x0e,0x00,0x00,0x00,0x00]
-#CHECK: llihf	%r0, 4294967295         # encoding: [0xc0,0x0e,0xff,0xff,0xff,0xff]
-#CHECK: llihf	%r15, 0                 # encoding: [0xc0,0xfe,0x00,0x00,0x00,0x00]
-
-	llihf	%r0, 0
-	llihf	%r0, 0xffffffff
-	llihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-llihf-02.s b/test/MC/SystemZ/insn-llihf-02.s
deleted file mode 100644
index e1b4537b02c8..000000000000
--- a/test/MC/SystemZ/insn-llihf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llihf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: llihf	%r0, 1 << 32
-
-	llihf	%r0, -1
-	llihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-llihh-01.s b/test/MC/SystemZ/insn-llihh-01.s
deleted file mode 100644
index 06060765c740..000000000000
--- a/test/MC/SystemZ/insn-llihh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llihh	%r0, 0                  # encoding: [0xa5,0x0c,0x00,0x00]
-#CHECK: llihh	%r0, 32768              # encoding: [0xa5,0x0c,0x80,0x00]
-#CHECK: llihh	%r0, 65535              # encoding: [0xa5,0x0c,0xff,0xff]
-#CHECK: llihh	%r15, 0                 # encoding: [0xa5,0xfc,0x00,0x00]
-
-	llihh	%r0, 0
-	llihh	%r0, 0x8000
-	llihh	%r0, 0xffff
-	llihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-llihh-02.s b/test/MC/SystemZ/insn-llihh-02.s
deleted file mode 100644
index 1309f1440f59..000000000000
--- a/test/MC/SystemZ/insn-llihh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llihh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: llihh	%r0, 0x10000
-
-	llihh	%r0, -1
-	llihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-llihl-01.s b/test/MC/SystemZ/insn-llihl-01.s
deleted file mode 100644
index 6353353321c3..000000000000
--- a/test/MC/SystemZ/insn-llihl-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llihl	%r0, 0                  # encoding: [0xa5,0x0d,0x00,0x00]
-#CHECK: llihl	%r0, 32768              # encoding: [0xa5,0x0d,0x80,0x00]
-#CHECK: llihl	%r0, 65535              # encoding: [0xa5,0x0d,0xff,0xff]
-#CHECK: llihl	%r15, 0                 # encoding: [0xa5,0xfd,0x00,0x00]
-
-	llihl	%r0, 0
-	llihl	%r0, 0x8000
-	llihl	%r0, 0xffff
-	llihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-llihl-02.s b/test/MC/SystemZ/insn-llihl-02.s
deleted file mode 100644
index 6891c4219624..000000000000
--- a/test/MC/SystemZ/insn-llihl-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llihl	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: llihl	%r0, 0x10000
-
-	llihl	%r0, -1
-	llihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-llilf-01.s b/test/MC/SystemZ/insn-llilf-01.s
deleted file mode 100644
index 8166583a2c01..000000000000
--- a/test/MC/SystemZ/insn-llilf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llilf	%r0, 0                  # encoding: [0xc0,0x0f,0x00,0x00,0x00,0x00]
-#CHECK: llilf	%r0, 4294967295         # encoding: [0xc0,0x0f,0xff,0xff,0xff,0xff]
-#CHECK: llilf	%r15, 0                 # encoding: [0xc0,0xff,0x00,0x00,0x00,0x00]
-
-	llilf	%r0, 0
-	llilf	%r0, 0xffffffff
-	llilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-llilf-02.s b/test/MC/SystemZ/insn-llilf-02.s
deleted file mode 100644
index dc10cc34ee00..000000000000
--- a/test/MC/SystemZ/insn-llilf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llilf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: llilf	%r0, 1 << 32
-
-	llilf	%r0, -1
-	llilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-llilh-01.s b/test/MC/SystemZ/insn-llilh-01.s
deleted file mode 100644
index 2ec5a7952f08..000000000000
--- a/test/MC/SystemZ/insn-llilh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llilh	%r0, 0                  # encoding: [0xa5,0x0e,0x00,0x00]
-#CHECK: llilh	%r0, 32768              # encoding: [0xa5,0x0e,0x80,0x00]
-#CHECK: llilh	%r0, 65535              # encoding: [0xa5,0x0e,0xff,0xff]
-#CHECK: llilh	%r15, 0                 # encoding: [0xa5,0xfe,0x00,0x00]
-
-	llilh	%r0, 0
-	llilh	%r0, 0x8000
-	llilh	%r0, 0xffff
-	llilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-llilh-02.s b/test/MC/SystemZ/insn-llilh-02.s
deleted file mode 100644
index bdfa1e77435a..000000000000
--- a/test/MC/SystemZ/insn-llilh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llilh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: llilh	%r0, 0x10000
-
-	llilh	%r0, -1
-	llilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-llill-01.s b/test/MC/SystemZ/insn-llill-01.s
deleted file mode 100644
index b95841da4d01..000000000000
--- a/test/MC/SystemZ/insn-llill-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: llill	%r0, 0                  # encoding: [0xa5,0x0f,0x00,0x00]
-#CHECK: llill	%r0, 32768              # encoding: [0xa5,0x0f,0x80,0x00]
-#CHECK: llill	%r0, 65535              # encoding: [0xa5,0x0f,0xff,0xff]
-#CHECK: llill	%r15, 0                 # encoding: [0xa5,0xff,0x00,0x00]
-
-	llill	%r0, 0
-	llill	%r0, 0x8000
-	llill	%r0, 0xffff
-	llill	%r15, 0
diff --git a/test/MC/SystemZ/insn-llill-02.s b/test/MC/SystemZ/insn-llill-02.s
deleted file mode 100644
index 2503b533317f..000000000000
--- a/test/MC/SystemZ/insn-llill-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: llill	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: llill	%r0, 0x10000
-
-	llill	%r0, -1
-	llill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-lmg-01.s b/test/MC/SystemZ/insn-lmg-01.s
deleted file mode 100644
index 24a2768192c9..000000000000
--- a/test/MC/SystemZ/insn-lmg-01.s
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x04]
-#CHECK: lmg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x04]
-#CHECK: lmg	%r14, %r15, 0           # encoding: [0xeb,0xef,0x00,0x00,0x00,0x04]
-#CHECK: lmg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x04]
-#CHECK: lmg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x04]
-#CHECK: lmg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x04]
-#CHECK: lmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x04]
-#CHECK: lmg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x04]
-#CHECK: lmg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x04]
-#CHECK: lmg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x04]
-#CHECK: lmg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x04]
-#CHECK: lmg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x04]
-#CHECK: lmg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x04]
-
-	lmg	%r0,%r0,0
-	lmg	%r0,%r15,0
-	lmg	%r14,%r15,0
-	lmg	%r15,%r15,0
-	lmg	%r0,%r0,-524288
-	lmg	%r0,%r0,-1
-	lmg	%r0,%r0,0
-	lmg	%r0,%r0,1
-	lmg	%r0,%r0,524287
-	lmg	%r0,%r0,0(%r1)
-	lmg	%r0,%r0,0(%r15)
-	lmg	%r0,%r0,524287(%r1)
-	lmg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-lmg-02.s b/test/MC/SystemZ/insn-lmg-02.s
deleted file mode 100644
index 9a67c08ee649..000000000000
--- a/test/MC/SystemZ/insn-lmg-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lmg	%r0, %r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lmg	%r0, %r0, 524288
-#CHECK: error: invalid use of indexed addressing
-#CHECK: lmg	%r0, %r0, 0(%r1,%r2)
-
-	lmg	%r0, %r0, -524289
-	lmg	%r0, %r0, 524288
-	lmg	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-lndbr-01.s b/test/MC/SystemZ/insn-lndbr-01.s
deleted file mode 100644
index 22786238ab59..000000000000
--- a/test/MC/SystemZ/insn-lndbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lndbr	%f0, %f9                # encoding: [0xb3,0x11,0x00,0x09]
-#CHECK: lndbr	%f0, %f15               # encoding: [0xb3,0x11,0x00,0x0f]
-#CHECK: lndbr	%f15, %f0               # encoding: [0xb3,0x11,0x00,0xf0]
-#CHECK: lndbr	%f15, %f9               # encoding: [0xb3,0x11,0x00,0xf9]
-
-	lndbr	%f0,%f9
-	lndbr	%f0,%f15
-	lndbr	%f15,%f0
-	lndbr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lnebr-01.s b/test/MC/SystemZ/insn-lnebr-01.s
deleted file mode 100644
index cf32734ad21f..000000000000
--- a/test/MC/SystemZ/insn-lnebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lnebr	%f0, %f9                # encoding: [0xb3,0x01,0x00,0x09]
-#CHECK: lnebr	%f0, %f15               # encoding: [0xb3,0x01,0x00,0x0f]
-#CHECK: lnebr	%f15, %f0               # encoding: [0xb3,0x01,0x00,0xf0]
-#CHECK: lnebr	%f15, %f9               # encoding: [0xb3,0x01,0x00,0xf9]
-
-	lnebr	%f0,%f9
-	lnebr	%f0,%f15
-	lnebr	%f15,%f0
-	lnebr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lnxbr-01.s b/test/MC/SystemZ/insn-lnxbr-01.s
deleted file mode 100644
index bf3794859ed1..000000000000
--- a/test/MC/SystemZ/insn-lnxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lnxbr	%f0, %f8                # encoding: [0xb3,0x41,0x00,0x08]
-#CHECK: lnxbr	%f0, %f13               # encoding: [0xb3,0x41,0x00,0x0d]
-#CHECK: lnxbr	%f13, %f0               # encoding: [0xb3,0x41,0x00,0xd0]
-#CHECK: lnxbr	%f13, %f9               # encoding: [0xb3,0x41,0x00,0xd9]
-
-	lnxbr	%f0,%f8
-	lnxbr	%f0,%f13
-	lnxbr	%f13,%f0
-	lnxbr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lnxbr-02.s b/test/MC/SystemZ/insn-lnxbr-02.s
deleted file mode 100644
index 9a69f48a0638..000000000000
--- a/test/MC/SystemZ/insn-lnxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lnxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: lnxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: lnxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: lnxbr	%f14, %f0
-
-	lnxbr	%f0, %f2
-	lnxbr	%f0, %f14
-	lnxbr	%f2, %f0
-	lnxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-lpdbr-01.s b/test/MC/SystemZ/insn-lpdbr-01.s
deleted file mode 100644
index 869b0c9117e3..000000000000
--- a/test/MC/SystemZ/insn-lpdbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lpdbr	%f0, %f9                # encoding: [0xb3,0x10,0x00,0x09]
-#CHECK: lpdbr	%f0, %f15               # encoding: [0xb3,0x10,0x00,0x0f]
-#CHECK: lpdbr	%f15, %f0               # encoding: [0xb3,0x10,0x00,0xf0]
-#CHECK: lpdbr	%f15, %f9               # encoding: [0xb3,0x10,0x00,0xf9]
-
-	lpdbr	%f0,%f9
-	lpdbr	%f0,%f15
-	lpdbr	%f15,%f0
-	lpdbr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lpebr-01.s b/test/MC/SystemZ/insn-lpebr-01.s
deleted file mode 100644
index 917f26e9aee5..000000000000
--- a/test/MC/SystemZ/insn-lpebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lpebr	%f0, %f9                # encoding: [0xb3,0x00,0x00,0x09]
-#CHECK: lpebr	%f0, %f15               # encoding: [0xb3,0x00,0x00,0x0f]
-#CHECK: lpebr	%f15, %f0               # encoding: [0xb3,0x00,0x00,0xf0]
-#CHECK: lpebr	%f15, %f9               # encoding: [0xb3,0x00,0x00,0xf9]
-
-	lpebr	%f0,%f9
-	lpebr	%f0,%f15
-	lpebr	%f15,%f0
-	lpebr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lpxbr-01.s b/test/MC/SystemZ/insn-lpxbr-01.s
deleted file mode 100644
index 56a628ab96c2..000000000000
--- a/test/MC/SystemZ/insn-lpxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lpxbr	%f0, %f8                # encoding: [0xb3,0x40,0x00,0x08]
-#CHECK: lpxbr	%f0, %f13               # encoding: [0xb3,0x40,0x00,0x0d]
-#CHECK: lpxbr	%f13, %f0               # encoding: [0xb3,0x40,0x00,0xd0]
-#CHECK: lpxbr	%f13, %f9               # encoding: [0xb3,0x40,0x00,0xd9]
-
-	lpxbr	%f0,%f8
-	lpxbr	%f0,%f13
-	lpxbr	%f13,%f0
-	lpxbr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lpxbr-02.s b/test/MC/SystemZ/insn-lpxbr-02.s
deleted file mode 100644
index 6fa3697111ea..000000000000
--- a/test/MC/SystemZ/insn-lpxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lpxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: lpxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: lpxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: lpxbr	%f14, %f0
-
-	lpxbr	%f0, %f2
-	lpxbr	%f0, %f14
-	lpxbr	%f2, %f0
-	lpxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-lr-01.s b/test/MC/SystemZ/insn-lr-01.s
deleted file mode 100644
index 8ce4a00678d6..000000000000
--- a/test/MC/SystemZ/insn-lr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lr	%r0, %r9                # encoding: [0x18,0x09]
-#CHECK: lr	%r0, %r15               # encoding: [0x18,0x0f]
-#CHECK: lr	%r15, %r0               # encoding: [0x18,0xf0]
-#CHECK: lr	%r15, %r9               # encoding: [0x18,0xf9]
-
-	lr	%r0,%r9
-	lr	%r0,%r15
-	lr	%r15,%r0
-	lr	%r15,%r9
diff --git a/test/MC/SystemZ/insn-lrl-01.s b/test/MC/SystemZ/insn-lrl-01.s
deleted file mode 100644
index 32d0eeb2b848..000000000000
--- a/test/MC/SystemZ/insn-lrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lrl	%r0, 2864434397         # encoding: [0xc4,0x0d,0x55,0x5d,0xe6,0x6e]
-#CHECK: lrl	%r15, 2864434397        # encoding: [0xc4,0xfd,0x55,0x5d,0xe6,0x6e]
-
-	lrl	%r0,0xaabbccdd
-	lrl	%r15,0xaabbccdd
-
-#CHECK: lrl	%r0, foo                # encoding: [0xc4,0x0d,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: lrl	%r15, foo               # encoding: [0xc4,0xfd,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	lrl	%r0,foo
-	lrl	%r15,foo
-
-#CHECK: lrl	%r3, bar+100            # encoding: [0xc4,0x3d,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: lrl	%r4, bar+100            # encoding: [0xc4,0x4d,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	lrl	%r3,bar+100
-	lrl	%r4,bar+100
-
-#CHECK: lrl	%r7, frob@PLT           # encoding: [0xc4,0x7d,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: lrl	%r8, frob@PLT           # encoding: [0xc4,0x8d,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	lrl	%r7,frob@PLT
-	lrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lrv-01.s b/test/MC/SystemZ/insn-lrv-01.s
deleted file mode 100644
index 75b973a22d0e..000000000000
--- a/test/MC/SystemZ/insn-lrv-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lrv	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1e]
-#CHECK: lrv	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1e]
-#CHECK: lrv	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1e]
-#CHECK: lrv	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1e]
-#CHECK: lrv	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1e]
-#CHECK: lrv	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1e]
-#CHECK: lrv	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1e]
-#CHECK: lrv	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1e]
-#CHECK: lrv	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1e]
-#CHECK: lrv	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1e]
-
-	lrv	%r0,-524288
-	lrv	%r0,-1
-	lrv	%r0,0
-	lrv	%r0,1
-	lrv	%r0,524287
-	lrv	%r0,0(%r1)
-	lrv	%r0,0(%r15)
-	lrv	%r0,524287(%r1,%r15)
-	lrv	%r0,524287(%r15,%r1)
-	lrv	%r15,0
diff --git a/test/MC/SystemZ/insn-lrv-02.s b/test/MC/SystemZ/insn-lrv-02.s
deleted file mode 100644
index f2dcfa777a26..000000000000
--- a/test/MC/SystemZ/insn-lrv-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lrv	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lrv	%r0, 524288
-
-	lrv	%r0, -524289
-	lrv	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lrvg-01.s b/test/MC/SystemZ/insn-lrvg-01.s
deleted file mode 100644
index d2d8b2db43d4..000000000000
--- a/test/MC/SystemZ/insn-lrvg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lrvg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0f]
-#CHECK: lrvg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0f]
-#CHECK: lrvg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0f]
-#CHECK: lrvg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0f]
-#CHECK: lrvg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0f]
-#CHECK: lrvg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0f]
-#CHECK: lrvg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0f]
-#CHECK: lrvg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0f]
-#CHECK: lrvg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0f]
-#CHECK: lrvg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0f]
-
-	lrvg	%r0,-524288
-	lrvg	%r0,-1
-	lrvg	%r0,0
-	lrvg	%r0,1
-	lrvg	%r0,524287
-	lrvg	%r0,0(%r1)
-	lrvg	%r0,0(%r15)
-	lrvg	%r0,524287(%r1,%r15)
-	lrvg	%r0,524287(%r15,%r1)
-	lrvg	%r15,0
diff --git a/test/MC/SystemZ/insn-lrvg-02.s b/test/MC/SystemZ/insn-lrvg-02.s
deleted file mode 100644
index 690fa1350092..000000000000
--- a/test/MC/SystemZ/insn-lrvg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: lrvg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: lrvg	%r0, 524288
-
-	lrvg	%r0, -524289
-	lrvg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lrvgr-01.s b/test/MC/SystemZ/insn-lrvgr-01.s
deleted file mode 100644
index 1b6e8841ee2c..000000000000
--- a/test/MC/SystemZ/insn-lrvgr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lrvgr	%r0, %r0                # encoding: [0xb9,0x0f,0x00,0x00]
-#CHECK: lrvgr	%r0, %r15               # encoding: [0xb9,0x0f,0x00,0x0f]
-#CHECK: lrvgr	%r15, %r0               # encoding: [0xb9,0x0f,0x00,0xf0]
-#CHECK: lrvgr	%r7, %r8                # encoding: [0xb9,0x0f,0x00,0x78]
-#CHECK: lrvgr	%r15, %r15              # encoding: [0xb9,0x0f,0x00,0xff]
-
-	lrvgr	%r0,%r0
-	lrvgr	%r0,%r15
-	lrvgr	%r15,%r0
-	lrvgr	%r7,%r8
-	lrvgr	%r15,%r15
diff --git a/test/MC/SystemZ/insn-lrvr-01.s b/test/MC/SystemZ/insn-lrvr-01.s
deleted file mode 100644
index c0d5d895b8bc..000000000000
--- a/test/MC/SystemZ/insn-lrvr-01.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lrvr	%r0, %r0                # encoding: [0xb9,0x1f,0x00,0x00]
-#CHECK: lrvr	%r0, %r15               # encoding: [0xb9,0x1f,0x00,0x0f]
-#CHECK: lrvr	%r15, %r0               # encoding: [0xb9,0x1f,0x00,0xf0]
-#CHECK: lrvr	%r7, %r8                # encoding: [0xb9,0x1f,0x00,0x78]
-#CHECK: lrvr	%r15, %r15              # encoding: [0xb9,0x1f,0x00,0xff]
-
-	lrvr	%r0,%r0
-	lrvr	%r0,%r15
-	lrvr	%r15,%r0
-	lrvr	%r7,%r8
-	lrvr	%r15,%r15
diff --git a/test/MC/SystemZ/insn-lxr-01.s b/test/MC/SystemZ/insn-lxr-01.s
deleted file mode 100644
index a04cdf747285..000000000000
--- a/test/MC/SystemZ/insn-lxr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lxr	%f0, %f8                # encoding: [0xb3,0x65,0x00,0x08]
-#CHECK: lxr	%f0, %f13               # encoding: [0xb3,0x65,0x00,0x0d]
-#CHECK: lxr	%f13, %f0               # encoding: [0xb3,0x65,0x00,0xd0]
-#CHECK: lxr	%f13, %f9               # encoding: [0xb3,0x65,0x00,0xd9]
-
-	lxr	%f0,%f8
-	lxr	%f0,%f13
-	lxr	%f13,%f0
-	lxr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lxr-02.s b/test/MC/SystemZ/insn-lxr-02.s
deleted file mode 100644
index b18ad756b125..000000000000
--- a/test/MC/SystemZ/insn-lxr-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lxr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: lxr	%f15, %f0
-#CHECK: error: invalid register
-#CHECK: lxr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: lxr	%f0, %f15
-
-	lxr	%f2, %f0
-	lxr	%f15, %f0
-	lxr	%f0, %f2
-	lxr	%f0, %f15
diff --git a/test/MC/SystemZ/insn-ly-01.s b/test/MC/SystemZ/insn-ly-01.s
deleted file mode 100644
index 25bc3e8a34c4..000000000000
--- a/test/MC/SystemZ/insn-ly-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x58]
-#CHECK: ly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x58]
-#CHECK: ly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x58]
-#CHECK: ly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x58]
-#CHECK: ly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x58]
-#CHECK: ly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x58]
-#CHECK: ly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x58]
-#CHECK: ly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x58]
-#CHECK: ly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x58]
-#CHECK: ly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x58]
-
-	ly	%r0, -524288
-	ly	%r0, -1
-	ly	%r0, 0
-	ly	%r0, 1
-	ly	%r0, 524287
-	ly	%r0, 0(%r1)
-	ly	%r0, 0(%r15)
-	ly	%r0, 524287(%r1,%r15)
-	ly	%r0, 524287(%r15,%r1)
-	ly	%r15, 0
diff --git a/test/MC/SystemZ/insn-ly-02.s b/test/MC/SystemZ/insn-ly-02.s
deleted file mode 100644
index b2d424e1152e..000000000000
--- a/test/MC/SystemZ/insn-ly-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ly	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: ly	%r0, 524288
-
-	ly	%r0, -524289
-	ly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lzdr-01.s b/test/MC/SystemZ/insn-lzdr-01.s
deleted file mode 100644
index c95082f47b75..000000000000
--- a/test/MC/SystemZ/insn-lzdr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lzdr	%f0                     # encoding: [0xb3,0x75,0x00,0x00]
-#CHECK: lzdr	%f7                     # encoding: [0xb3,0x75,0x00,0x70]
-#CHECK: lzdr	%f15                    # encoding: [0xb3,0x75,0x00,0xf0]
-
-	lzdr	%f0
-	lzdr	%f7
-	lzdr	%f15
diff --git a/test/MC/SystemZ/insn-lzer-01.s b/test/MC/SystemZ/insn-lzer-01.s
deleted file mode 100644
index 0944047a5ba6..000000000000
--- a/test/MC/SystemZ/insn-lzer-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lzer	%f0                     # encoding: [0xb3,0x74,0x00,0x00]
-#CHECK: lzer	%f7                     # encoding: [0xb3,0x74,0x00,0x70]
-#CHECK: lzer	%f15                    # encoding: [0xb3,0x74,0x00,0xf0]
-
-	lzer	%f0
-	lzer	%f7
-	lzer	%f15
diff --git a/test/MC/SystemZ/insn-lzxr-01.s b/test/MC/SystemZ/insn-lzxr-01.s
deleted file mode 100644
index bd5a5c2ea13c..000000000000
--- a/test/MC/SystemZ/insn-lzxr-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lzxr	%f0                     # encoding: [0xb3,0x76,0x00,0x00]
-#CHECK: lzxr	%f8                     # encoding: [0xb3,0x76,0x00,0x80]
-#CHECK: lzxr	%f13                    # encoding: [0xb3,0x76,0x00,0xd0]
-
-	lzxr	%f0
-	lzxr	%f8
-	lzxr	%f13
diff --git a/test/MC/SystemZ/insn-lzxr-02.s b/test/MC/SystemZ/insn-lzxr-02.s
deleted file mode 100644
index 4ce2ad04bc6e..000000000000
--- a/test/MC/SystemZ/insn-lzxr-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lzxr	%f2
-#CHECK: error: invalid register
-#CHECK: lzxr	%f14
-#CHECK: error: invalid register
-#CHECK: lzxr	%f15
-
-	lzxr	%f2
-	lzxr	%f14
-	lzxr	%f15
diff --git a/test/MC/SystemZ/insn-madb-01.s b/test/MC/SystemZ/insn-madb-01.s
deleted file mode 100644
index 6eec4beb37a2..000000000000
--- a/test/MC/SystemZ/insn-madb-01.s
+++ /dev/null
@@ -1,21 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: madb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x1e]
-#CHECK: madb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1e]
-#CHECK: madb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x1e]
-#CHECK: madb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1e]
-#CHECK: madb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x1e]
-#CHECK: madb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1e]
-#CHECK: madb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1e]
-#CHECK: madb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x1e]
-#CHECK: madb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x1e]
-
-	madb	%f0, %f0, 0
-	madb	%f0, %f0, 4095
-	madb	%f0, %f0, 0(%r1)
-	madb	%f0, %f0, 0(%r15)
-	madb	%f0, %f0, 4095(%r1,%r15)
-	madb	%f0, %f0, 4095(%r15,%r1)
-	madb	%f0, %f15, 0
-	madb	%f15, %f0, 0
-	madb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-madb-02.s b/test/MC/SystemZ/insn-madb-02.s
deleted file mode 100644
index f7fdee978e13..000000000000
--- a/test/MC/SystemZ/insn-madb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: madb	%f0, %f0, -1
-#CHECK: error: invalid operand
-#CHECK: madb	%f0, %f0, 4096
-
-	madb	%f0, %f0, -1
-	madb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-madbr-01.s b/test/MC/SystemZ/insn-madbr-01.s
deleted file mode 100644
index 42142be99e5a..000000000000
--- a/test/MC/SystemZ/insn-madbr-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: madbr	%f0, %f0, %f0           # encoding: [0xb3,0x1e,0x00,0x00]
-#CHECK: madbr	%f0, %f0, %f15          # encoding: [0xb3,0x1e,0x00,0x0f]
-#CHECK: madbr	%f0, %f15, %f0          # encoding: [0xb3,0x1e,0x00,0xf0]
-#CHECK: madbr	%f15, %f0, %f0          # encoding: [0xb3,0x1e,0xf0,0x00]
-#CHECK: madbr	%f7, %f8, %f9           # encoding: [0xb3,0x1e,0x70,0x89]
-#CHECK: madbr	%f15, %f15, %f15        # encoding: [0xb3,0x1e,0xf0,0xff]
-
-	madbr	%f0, %f0, %f0
-	madbr	%f0, %f0, %f15
-	madbr	%f0, %f15, %f0
-	madbr	%f15, %f0, %f0
-	madbr	%f7, %f8, %f9
-	madbr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-maeb-01.s b/test/MC/SystemZ/insn-maeb-01.s
deleted file mode 100644
index 7a998fd79ff9..000000000000
--- a/test/MC/SystemZ/insn-maeb-01.s
+++ /dev/null
@@ -1,21 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: maeb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x0e]
-#CHECK: maeb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0e]
-#CHECK: maeb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x0e]
-#CHECK: maeb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0e]
-#CHECK: maeb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x0e]
-#CHECK: maeb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0e]
-#CHECK: maeb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0e]
-#CHECK: maeb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x0e]
-#CHECK: maeb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x0e]
-
-	maeb	%f0, %f0, 0
-	maeb	%f0, %f0, 4095
-	maeb	%f0, %f0, 0(%r1)
-	maeb	%f0, %f0, 0(%r15)
-	maeb	%f0, %f0, 4095(%r1,%r15)
-	maeb	%f0, %f0, 4095(%r15,%r1)
-	maeb	%f0, %f15, 0
-	maeb	%f15, %f0, 0
-	maeb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-maeb-02.s b/test/MC/SystemZ/insn-maeb-02.s
deleted file mode 100644
index e12407acc2d0..000000000000
--- a/test/MC/SystemZ/insn-maeb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: maeb	%f0, %f0, -1
-#CHECK: error: invalid operand
-#CHECK: maeb	%f0, %f0, 4096
-
-	maeb	%f0, %f0, -1
-	maeb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-maebr-01.s b/test/MC/SystemZ/insn-maebr-01.s
deleted file mode 100644
index be92aaf87483..000000000000
--- a/test/MC/SystemZ/insn-maebr-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: maebr	%f0, %f0, %f0           # encoding: [0xb3,0x0e,0x00,0x00]
-#CHECK: maebr	%f0, %f0, %f15          # encoding: [0xb3,0x0e,0x00,0x0f]
-#CHECK: maebr	%f0, %f15, %f0          # encoding: [0xb3,0x0e,0x00,0xf0]
-#CHECK: maebr	%f15, %f0, %f0          # encoding: [0xb3,0x0e,0xf0,0x00]
-#CHECK: maebr	%f7, %f8, %f9           # encoding: [0xb3,0x0e,0x70,0x89]
-#CHECK: maebr	%f15, %f15, %f15        # encoding: [0xb3,0x0e,0xf0,0xff]
-
-	maebr	%f0, %f0, %f0
-	maebr	%f0, %f0, %f15
-	maebr	%f0, %f15, %f0
-	maebr	%f15, %f0, %f0
-	maebr	%f7, %f8, %f9
-	maebr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-mdb-01.s b/test/MC/SystemZ/insn-mdb-01.s
deleted file mode 100644
index 58be9779fc56..000000000000
--- a/test/MC/SystemZ/insn-mdb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1c]
-#CHECK: mdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1c]
-#CHECK: mdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1c]
-#CHECK: mdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1c]
-#CHECK: mdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1c]
-#CHECK: mdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1c]
-#CHECK: mdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1c]
-
-	mdb	%f0, 0
-	mdb	%f0, 4095
-	mdb	%f0, 0(%r1)
-	mdb	%f0, 0(%r15)
-	mdb	%f0, 4095(%r1,%r15)
-	mdb	%f0, 4095(%r15,%r1)
-	mdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-mdb-02.s b/test/MC/SystemZ/insn-mdb-02.s
deleted file mode 100644
index f1bdab015f1c..000000000000
--- a/test/MC/SystemZ/insn-mdb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mdb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: mdb	%f0, 4096
-
-	mdb	%f0, -1
-	mdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-mdbr-01.s b/test/MC/SystemZ/insn-mdbr-01.s
deleted file mode 100644
index 4ff16b9c0b36..000000000000
--- a/test/MC/SystemZ/insn-mdbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mdbr	%f0, %f0                # encoding: [0xb3,0x1c,0x00,0x00]
-#CHECK: mdbr	%f0, %f15               # encoding: [0xb3,0x1c,0x00,0x0f]
-#CHECK: mdbr	%f7, %f8                # encoding: [0xb3,0x1c,0x00,0x78]
-#CHECK: mdbr	%f15, %f0               # encoding: [0xb3,0x1c,0x00,0xf0]
-
-	mdbr	%f0, %f0
-	mdbr	%f0, %f15
-	mdbr	%f7, %f8
-	mdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-mdeb-01.s b/test/MC/SystemZ/insn-mdeb-01.s
deleted file mode 100644
index 5d85c079db1f..000000000000
--- a/test/MC/SystemZ/insn-mdeb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mdeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0c]
-#CHECK: mdeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0c]
-#CHECK: mdeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0c]
-#CHECK: mdeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0c]
-#CHECK: mdeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0c]
-#CHECK: mdeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0c]
-#CHECK: mdeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0c]
-
-	mdeb	%f0, 0
-	mdeb	%f0, 4095
-	mdeb	%f0, 0(%r1)
-	mdeb	%f0, 0(%r15)
-	mdeb	%f0, 4095(%r1,%r15)
-	mdeb	%f0, 4095(%r15,%r1)
-	mdeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-mdeb-02.s b/test/MC/SystemZ/insn-mdeb-02.s
deleted file mode 100644
index 87ec6767d4e1..000000000000
--- a/test/MC/SystemZ/insn-mdeb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mdeb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: mdeb	%f0, 4096
-
-	mdeb	%f0, -1
-	mdeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-mdebr-01.s b/test/MC/SystemZ/insn-mdebr-01.s
deleted file mode 100644
index 17c495581d34..000000000000
--- a/test/MC/SystemZ/insn-mdebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mdebr	%f0, %f0                # encoding: [0xb3,0x0c,0x00,0x00]
-#CHECK: mdebr	%f0, %f15               # encoding: [0xb3,0x0c,0x00,0x0f]
-#CHECK: mdebr	%f7, %f8                # encoding: [0xb3,0x0c,0x00,0x78]
-#CHECK: mdebr	%f15, %f0               # encoding: [0xb3,0x0c,0x00,0xf0]
-
-	mdebr	%f0, %f0
-	mdebr	%f0, %f15
-	mdebr	%f7, %f8
-	mdebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-meeb-01.s b/test/MC/SystemZ/insn-meeb-01.s
deleted file mode 100644
index bb14d0435577..000000000000
--- a/test/MC/SystemZ/insn-meeb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: meeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x17]
-#CHECK: meeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x17]
-#CHECK: meeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x17]
-#CHECK: meeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x17]
-#CHECK: meeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x17]
-#CHECK: meeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x17]
-#CHECK: meeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x17]
-
-	meeb	%f0, 0
-	meeb	%f0, 4095
-	meeb	%f0, 0(%r1)
-	meeb	%f0, 0(%r15)
-	meeb	%f0, 4095(%r1,%r15)
-	meeb	%f0, 4095(%r15,%r1)
-	meeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-meeb-02.s b/test/MC/SystemZ/insn-meeb-02.s
deleted file mode 100644
index ba5e3b28e3c8..000000000000
--- a/test/MC/SystemZ/insn-meeb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: meeb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: meeb	%f0, 4096
-
-	meeb	%f0, -1
-	meeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-meebr-01.s b/test/MC/SystemZ/insn-meebr-01.s
deleted file mode 100644
index 99cd8a38e000..000000000000
--- a/test/MC/SystemZ/insn-meebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: meebr	%f0, %f0                # encoding: [0xb3,0x17,0x00,0x00]
-#CHECK: meebr	%f0, %f15               # encoding: [0xb3,0x17,0x00,0x0f]
-#CHECK: meebr	%f7, %f8                # encoding: [0xb3,0x17,0x00,0x78]
-#CHECK: meebr	%f15, %f0               # encoding: [0xb3,0x17,0x00,0xf0]
-
-	meebr	%f0, %f0
-	meebr	%f0, %f15
-	meebr	%f7, %f8
-	meebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-mghi-01.s b/test/MC/SystemZ/insn-mghi-01.s
deleted file mode 100644
index d07278f20a14..000000000000
--- a/test/MC/SystemZ/insn-mghi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mghi	%r0, -32768             # encoding: [0xa7,0x0d,0x80,0x00]
-#CHECK: mghi	%r0, -1                 # encoding: [0xa7,0x0d,0xff,0xff]
-#CHECK: mghi	%r0, 0                  # encoding: [0xa7,0x0d,0x00,0x00]
-#CHECK: mghi	%r0, 1                  # encoding: [0xa7,0x0d,0x00,0x01]
-#CHECK: mghi	%r0, 32767              # encoding: [0xa7,0x0d,0x7f,0xff]
-#CHECK: mghi	%r15, 0                 # encoding: [0xa7,0xfd,0x00,0x00]
-
-	mghi	%r0, -32768
-	mghi	%r0, -1
-	mghi	%r0, 0
-	mghi	%r0, 1
-	mghi	%r0, 32767
-	mghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-mghi-02.s b/test/MC/SystemZ/insn-mghi-02.s
deleted file mode 100644
index 860fa3b78754..000000000000
--- a/test/MC/SystemZ/insn-mghi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mghi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: mghi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: mghi	%r0, foo
-
-	mghi	%r0, -32769
-	mghi	%r0, 32768
-	mghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-mh-01.s b/test/MC/SystemZ/insn-mh-01.s
deleted file mode 100644
index 59d5515438a0..000000000000
--- a/test/MC/SystemZ/insn-mh-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mh	%r0, 0                  # encoding: [0x4c,0x00,0x00,0x00]
-#CHECK: mh	%r0, 4095               # encoding: [0x4c,0x00,0x0f,0xff]
-#CHECK: mh	%r0, 0(%r1)             # encoding: [0x4c,0x00,0x10,0x00]
-#CHECK: mh	%r0, 0(%r15)            # encoding: [0x4c,0x00,0xf0,0x00]
-#CHECK: mh	%r0, 4095(%r1,%r15)     # encoding: [0x4c,0x01,0xff,0xff]
-#CHECK: mh	%r0, 4095(%r15,%r1)     # encoding: [0x4c,0x0f,0x1f,0xff]
-#CHECK: mh	%r15, 0                 # encoding: [0x4c,0xf0,0x00,0x00]
-
-	mh	%r0, 0
-	mh	%r0, 4095
-	mh	%r0, 0(%r1)
-	mh	%r0, 0(%r15)
-	mh	%r0, 4095(%r1,%r15)
-	mh	%r0, 4095(%r15,%r1)
-	mh	%r15, 0
diff --git a/test/MC/SystemZ/insn-mh-02.s b/test/MC/SystemZ/insn-mh-02.s
deleted file mode 100644
index 4ea35fca5916..000000000000
--- a/test/MC/SystemZ/insn-mh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: mh	%r0, 4096
-
-	mh	%r0, -1
-	mh	%r0, 4096
diff --git a/test/MC/SystemZ/insn-mhi-01.s b/test/MC/SystemZ/insn-mhi-01.s
deleted file mode 100644
index adf42ae7346c..000000000000
--- a/test/MC/SystemZ/insn-mhi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mhi	%r0, -32768             # encoding: [0xa7,0x0c,0x80,0x00]
-#CHECK: mhi	%r0, -1                 # encoding: [0xa7,0x0c,0xff,0xff]
-#CHECK: mhi	%r0, 0                  # encoding: [0xa7,0x0c,0x00,0x00]
-#CHECK: mhi	%r0, 1                  # encoding: [0xa7,0x0c,0x00,0x01]
-#CHECK: mhi	%r0, 32767              # encoding: [0xa7,0x0c,0x7f,0xff]
-#CHECK: mhi	%r15, 0                 # encoding: [0xa7,0xfc,0x00,0x00]
-
-	mhi	%r0, -32768
-	mhi	%r0, -1
-	mhi	%r0, 0
-	mhi	%r0, 1
-	mhi	%r0, 32767
-	mhi	%r15, 0
diff --git a/test/MC/SystemZ/insn-mhi-02.s b/test/MC/SystemZ/insn-mhi-02.s
deleted file mode 100644
index 74e83576e91b..000000000000
--- a/test/MC/SystemZ/insn-mhi-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mhi	%r0, -32769
-#CHECK: error: invalid operand
-#CHECK: mhi	%r0, 32768
-#CHECK: error: invalid operand
-#CHECK: mhi	%r0, foo
-
-	mhi	%r0, -32769
-	mhi	%r0, 32768
-	mhi	%r0, foo
diff --git a/test/MC/SystemZ/insn-mhy-01.s b/test/MC/SystemZ/insn-mhy-01.s
deleted file mode 100644
index 89c394bd81ed..000000000000
--- a/test/MC/SystemZ/insn-mhy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mhy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x7c]
-#CHECK: mhy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x7c]
-#CHECK: mhy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x7c]
-#CHECK: mhy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x7c]
-#CHECK: mhy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x7c]
-#CHECK: mhy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x7c]
-#CHECK: mhy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x7c]
-#CHECK: mhy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x7c]
-#CHECK: mhy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x7c]
-#CHECK: mhy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x7c]
-
-	mhy	%r0, -524288
-	mhy	%r0, -1
-	mhy	%r0, 0
-	mhy	%r0, 1
-	mhy	%r0, 524287
-	mhy	%r0, 0(%r1)
-	mhy	%r0, 0(%r15)
-	mhy	%r0, 524287(%r1,%r15)
-	mhy	%r0, 524287(%r15,%r1)
-	mhy	%r15, 0
diff --git a/test/MC/SystemZ/insn-mhy-02.s b/test/MC/SystemZ/insn-mhy-02.s
deleted file mode 100644
index bce62f096c18..000000000000
--- a/test/MC/SystemZ/insn-mhy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mhy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: mhy	%r0, 524288
-
-	mhy	%r0, -524289
-	mhy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-mlg-01.s b/test/MC/SystemZ/insn-mlg-01.s
deleted file mode 100644
index e9bd6510fc68..000000000000
--- a/test/MC/SystemZ/insn-mlg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mlg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x86]
-#CHECK: mlg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x86]
-#CHECK: mlg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x86]
-#CHECK: mlg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x86]
-#CHECK: mlg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x86]
-#CHECK: mlg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x86]
-#CHECK: mlg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x86]
-#CHECK: mlg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x86]
-#CHECK: mlg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x86]
-#CHECK: mlg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x86]
-
-	mlg	%r0, -524288
-	mlg	%r0, -1
-	mlg	%r0, 0
-	mlg	%r0, 1
-	mlg	%r0, 524287
-	mlg	%r0, 0(%r1)
-	mlg	%r0, 0(%r15)
-	mlg	%r0, 524287(%r1,%r15)
-	mlg	%r0, 524287(%r15,%r1)
-	mlg	%r14, 0
diff --git a/test/MC/SystemZ/insn-mlg-02.s b/test/MC/SystemZ/insn-mlg-02.s
deleted file mode 100644
index 7174bc50c2f1..000000000000
--- a/test/MC/SystemZ/insn-mlg-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mlg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: mlg	%r0, 524288
-#CHECK: error: invalid register
-#CHECK: mlg	%r1, 0
-#CHECK: error: invalid register
-#CHECK: mlg	%r15, 0
-
-	mlg	%r0, -524289
-	mlg	%r0, 524288
-	mlg	%r1, 0
-	mlg	%r15, 0
diff --git a/test/MC/SystemZ/insn-mlgr-01.s b/test/MC/SystemZ/insn-mlgr-01.s
deleted file mode 100644
index 215bde099151..000000000000
--- a/test/MC/SystemZ/insn-mlgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mlgr	%r0, %r0                # encoding: [0xb9,0x86,0x00,0x00]
-#CHECK: mlgr	%r0, %r15               # encoding: [0xb9,0x86,0x00,0x0f]
-#CHECK: mlgr	%r14, %r0               # encoding: [0xb9,0x86,0x00,0xe0]
-#CHECK: mlgr	%r6, %r9                # encoding: [0xb9,0x86,0x00,0x69]
-
-	mlgr	%r0,%r0
-	mlgr	%r0,%r15
-	mlgr	%r14,%r0
-	mlgr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-mlgr-02.s b/test/MC/SystemZ/insn-mlgr-02.s
deleted file mode 100644
index 30f4259031bb..000000000000
--- a/test/MC/SystemZ/insn-mlgr-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: mlgr	%r1, %r0
-#CHECK: error: invalid register
-#CHECK: mlgr	%r15, %r0
-
-	mlgr	%r1, %r0
-	mlgr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-ms-01.s b/test/MC/SystemZ/insn-ms-01.s
deleted file mode 100644
index e104e09a1bf4..000000000000
--- a/test/MC/SystemZ/insn-ms-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ms	%r0, 0                  # encoding: [0x71,0x00,0x00,0x00]
-#CHECK: ms	%r0, 4095               # encoding: [0x71,0x00,0x0f,0xff]
-#CHECK: ms	%r0, 0(%r1)             # encoding: [0x71,0x00,0x10,0x00]
-#CHECK: ms	%r0, 0(%r15)            # encoding: [0x71,0x00,0xf0,0x00]
-#CHECK: ms	%r0, 4095(%r1,%r15)     # encoding: [0x71,0x01,0xff,0xff]
-#CHECK: ms	%r0, 4095(%r15,%r1)     # encoding: [0x71,0x0f,0x1f,0xff]
-#CHECK: ms	%r15, 0                 # encoding: [0x71,0xf0,0x00,0x00]
-
-	ms	%r0, 0
-	ms	%r0, 4095
-	ms	%r0, 0(%r1)
-	ms	%r0, 0(%r15)
-	ms	%r0, 4095(%r1,%r15)
-	ms	%r0, 4095(%r15,%r1)
-	ms	%r15, 0
diff --git a/test/MC/SystemZ/insn-ms-02.s b/test/MC/SystemZ/insn-ms-02.s
deleted file mode 100644
index 9cc7ecda3402..000000000000
--- a/test/MC/SystemZ/insn-ms-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ms	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: ms	%r0, 4096
-
-	ms	%r0, -1
-	ms	%r0, 4096
diff --git a/test/MC/SystemZ/insn-msdb-01.s b/test/MC/SystemZ/insn-msdb-01.s
deleted file mode 100644
index 50ef45bfc42f..000000000000
--- a/test/MC/SystemZ/insn-msdb-01.s
+++ /dev/null
@@ -1,21 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msdb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x1f]
-#CHECK: msdb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1f]
-#CHECK: msdb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x1f]
-#CHECK: msdb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1f]
-#CHECK: msdb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x1f]
-#CHECK: msdb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1f]
-#CHECK: msdb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1f]
-#CHECK: msdb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x1f]
-#CHECK: msdb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x1f]
-
-	msdb	%f0, %f0, 0
-	msdb	%f0, %f0, 4095
-	msdb	%f0, %f0, 0(%r1)
-	msdb	%f0, %f0, 0(%r15)
-	msdb	%f0, %f0, 4095(%r1,%r15)
-	msdb	%f0, %f0, 4095(%r15,%r1)
-	msdb	%f0, %f15, 0
-	msdb	%f15, %f0, 0
-	msdb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-msdb-02.s b/test/MC/SystemZ/insn-msdb-02.s
deleted file mode 100644
index 552fc72223dd..000000000000
--- a/test/MC/SystemZ/insn-msdb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: msdb	%f0, %f0, -1
-#CHECK: error: invalid operand
-#CHECK: msdb	%f0, %f0, 4096
-
-	msdb	%f0, %f0, -1
-	msdb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-msdbr-01.s b/test/MC/SystemZ/insn-msdbr-01.s
deleted file mode 100644
index 0c816578a2a9..000000000000
--- a/test/MC/SystemZ/insn-msdbr-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msdbr	%f0, %f0, %f0           # encoding: [0xb3,0x1f,0x00,0x00]
-#CHECK: msdbr	%f0, %f0, %f15          # encoding: [0xb3,0x1f,0x00,0x0f]
-#CHECK: msdbr	%f0, %f15, %f0          # encoding: [0xb3,0x1f,0x00,0xf0]
-#CHECK: msdbr	%f15, %f0, %f0          # encoding: [0xb3,0x1f,0xf0,0x00]
-#CHECK: msdbr	%f7, %f8, %f9           # encoding: [0xb3,0x1f,0x70,0x89]
-#CHECK: msdbr	%f15, %f15, %f15        # encoding: [0xb3,0x1f,0xf0,0xff]
-
-	msdbr	%f0, %f0, %f0
-	msdbr	%f0, %f0, %f15
-	msdbr	%f0, %f15, %f0
-	msdbr	%f15, %f0, %f0
-	msdbr	%f7, %f8, %f9
-	msdbr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-mseb-01.s b/test/MC/SystemZ/insn-mseb-01.s
deleted file mode 100644
index 4464cfb803ca..000000000000
--- a/test/MC/SystemZ/insn-mseb-01.s
+++ /dev/null
@@ -1,21 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mseb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x0f]
-#CHECK: mseb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0f]
-#CHECK: mseb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x0f]
-#CHECK: mseb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0f]
-#CHECK: mseb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x0f]
-#CHECK: mseb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0f]
-#CHECK: mseb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0f]
-#CHECK: mseb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x0f]
-#CHECK: mseb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x0f]
-
-	mseb	%f0, %f0, 0
-	mseb	%f0, %f0, 4095
-	mseb	%f0, %f0, 0(%r1)
-	mseb	%f0, %f0, 0(%r15)
-	mseb	%f0, %f0, 4095(%r1,%r15)
-	mseb	%f0, %f0, 4095(%r15,%r1)
-	mseb	%f0, %f15, 0
-	mseb	%f15, %f0, 0
-	mseb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-mseb-02.s b/test/MC/SystemZ/insn-mseb-02.s
deleted file mode 100644
index 03aaa0fb8a6b..000000000000
--- a/test/MC/SystemZ/insn-mseb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mseb	%f0, %f0, -1
-#CHECK: error: invalid operand
-#CHECK: mseb	%f0, %f0, 4096
-
-	mseb	%f0, %f0, -1
-	mseb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-msebr-01.s b/test/MC/SystemZ/insn-msebr-01.s
deleted file mode 100644
index f936cb632529..000000000000
--- a/test/MC/SystemZ/insn-msebr-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msebr	%f0, %f0, %f0           # encoding: [0xb3,0x0f,0x00,0x00]
-#CHECK: msebr	%f0, %f0, %f15          # encoding: [0xb3,0x0f,0x00,0x0f]
-#CHECK: msebr	%f0, %f15, %f0          # encoding: [0xb3,0x0f,0x00,0xf0]
-#CHECK: msebr	%f15, %f0, %f0          # encoding: [0xb3,0x0f,0xf0,0x00]
-#CHECK: msebr	%f7, %f8, %f9           # encoding: [0xb3,0x0f,0x70,0x89]
-#CHECK: msebr	%f15, %f15, %f15        # encoding: [0xb3,0x0f,0xf0,0xff]
-
-	msebr	%f0, %f0, %f0
-	msebr	%f0, %f0, %f15
-	msebr	%f0, %f15, %f0
-	msebr	%f15, %f0, %f0
-	msebr	%f7, %f8, %f9
-	msebr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-msfi-01.s b/test/MC/SystemZ/insn-msfi-01.s
deleted file mode 100644
index 629260e42060..000000000000
--- a/test/MC/SystemZ/insn-msfi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msfi	%r0, -2147483648        # encoding: [0xc2,0x01,0x80,0x00,0x00,0x00]
-#CHECK: msfi	%r0, -1                 # encoding: [0xc2,0x01,0xff,0xff,0xff,0xff]
-#CHECK: msfi	%r0, 0                  # encoding: [0xc2,0x01,0x00,0x00,0x00,0x00]
-#CHECK: msfi	%r0, 1                  # encoding: [0xc2,0x01,0x00,0x00,0x00,0x01]
-#CHECK: msfi	%r0, 2147483647         # encoding: [0xc2,0x01,0x7f,0xff,0xff,0xff]
-#CHECK: msfi	%r15, 0                 # encoding: [0xc2,0xf1,0x00,0x00,0x00,0x00]
-
-	msfi	%r0, -1 << 31
-	msfi	%r0, -1
-	msfi	%r0, 0
-	msfi	%r0, 1
-	msfi	%r0, (1 << 31) - 1
-	msfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-msfi-02.s b/test/MC/SystemZ/insn-msfi-02.s
deleted file mode 100644
index 2700ce78b8e3..000000000000
--- a/test/MC/SystemZ/insn-msfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: msfi	%r0, (-1 << 31) - 1
-#CHECK: error: invalid operand
-#CHECK: msfi	%r0, (1 << 31)
-
-	msfi	%r0, (-1 << 31) - 1
-	msfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-msg-01.s b/test/MC/SystemZ/insn-msg-01.s
deleted file mode 100644
index 298811c1e965..000000000000
--- a/test/MC/SystemZ/insn-msg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0c]
-#CHECK: msg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0c]
-#CHECK: msg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0c]
-#CHECK: msg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0c]
-#CHECK: msg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0c]
-#CHECK: msg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0c]
-#CHECK: msg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0c]
-#CHECK: msg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0c]
-#CHECK: msg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0c]
-#CHECK: msg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0c]
-
-	msg	%r0, -524288
-	msg	%r0, -1
-	msg	%r0, 0
-	msg	%r0, 1
-	msg	%r0, 524287
-	msg	%r0, 0(%r1)
-	msg	%r0, 0(%r15)
-	msg	%r0, 524287(%r1,%r15)
-	msg	%r0, 524287(%r15,%r1)
-	msg	%r15, 0
diff --git a/test/MC/SystemZ/insn-msg-02.s b/test/MC/SystemZ/insn-msg-02.s
deleted file mode 100644
index 3326f40e56aa..000000000000
--- a/test/MC/SystemZ/insn-msg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: msg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: msg	%r0, 524288
-
-	msg	%r0, -524289
-	msg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-msgf-01.s b/test/MC/SystemZ/insn-msgf-01.s
deleted file mode 100644
index 9812bcc9cb47..000000000000
--- a/test/MC/SystemZ/insn-msgf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1c]
-#CHECK: msgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1c]
-#CHECK: msgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1c]
-#CHECK: msgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1c]
-#CHECK: msgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1c]
-#CHECK: msgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1c]
-#CHECK: msgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1c]
-#CHECK: msgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1c]
-#CHECK: msgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1c]
-#CHECK: msgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1c]
-
-	msgf	%r0, -524288
-	msgf	%r0, -1
-	msgf	%r0, 0
-	msgf	%r0, 1
-	msgf	%r0, 524287
-	msgf	%r0, 0(%r1)
-	msgf	%r0, 0(%r15)
-	msgf	%r0, 524287(%r1,%r15)
-	msgf	%r0, 524287(%r15,%r1)
-	msgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-msgf-02.s b/test/MC/SystemZ/insn-msgf-02.s
deleted file mode 100644
index 03983b305f07..000000000000
--- a/test/MC/SystemZ/insn-msgf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: msgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: msgf	%r0, 524288
-
-	msgf	%r0, -524289
-	msgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-msgfi-01.s b/test/MC/SystemZ/insn-msgfi-01.s
deleted file mode 100644
index 802ad143a503..000000000000
--- a/test/MC/SystemZ/insn-msgfi-01.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msgfi	%r0, -2147483648        # encoding: [0xc2,0x00,0x80,0x00,0x00,0x00]
-#CHECK: msgfi	%r0, -1                 # encoding: [0xc2,0x00,0xff,0xff,0xff,0xff]
-#CHECK: msgfi	%r0, 0                  # encoding: [0xc2,0x00,0x00,0x00,0x00,0x00]
-#CHECK: msgfi	%r0, 1                  # encoding: [0xc2,0x00,0x00,0x00,0x00,0x01]
-#CHECK: msgfi	%r0, 2147483647         # encoding: [0xc2,0x00,0x7f,0xff,0xff,0xff]
-#CHECK: msgfi	%r15, 0                 # encoding: [0xc2,0xf0,0x00,0x00,0x00,0x00]
-
-	msgfi	%r0, -1 << 31
-	msgfi	%r0, -1
-	msgfi	%r0, 0
-	msgfi	%r0, 1
-	msgfi	%r0, (1 << 31) - 1
-	msgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-msgfi-02.s b/test/MC/SystemZ/insn-msgfi-02.s
deleted file mode 100644
index 82e1f8f9b44c..000000000000
--- a/test/MC/SystemZ/insn-msgfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: msgfi	%r0, (-1 << 31) - 1
-#CHECK: error: invalid operand
-#CHECK: msgfi	%r0, (1 << 31)
-
-	msgfi	%r0, (-1 << 31) - 1
-	msgfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-msgfr-01.s b/test/MC/SystemZ/insn-msgfr-01.s
deleted file mode 100644
index e25f6302ac62..000000000000
--- a/test/MC/SystemZ/insn-msgfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msgfr	%r0, %r0                # encoding: [0xb9,0x1c,0x00,0x00]
-#CHECK: msgfr	%r0, %r15               # encoding: [0xb9,0x1c,0x00,0x0f]
-#CHECK: msgfr	%r15, %r0               # encoding: [0xb9,0x1c,0x00,0xf0]
-#CHECK: msgfr	%r7, %r8                # encoding: [0xb9,0x1c,0x00,0x78]
-
-	msgfr	%r0,%r0
-	msgfr	%r0,%r15
-	msgfr	%r15,%r0
-	msgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-msgr-01.s b/test/MC/SystemZ/insn-msgr-01.s
deleted file mode 100644
index 0b9cd360c8fe..000000000000
--- a/test/MC/SystemZ/insn-msgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msgr	%r0, %r0                # encoding: [0xb9,0x0c,0x00,0x00]
-#CHECK: msgr	%r0, %r15               # encoding: [0xb9,0x0c,0x00,0x0f]
-#CHECK: msgr	%r15, %r0               # encoding: [0xb9,0x0c,0x00,0xf0]
-#CHECK: msgr	%r7, %r8                # encoding: [0xb9,0x0c,0x00,0x78]
-
-	msgr	%r0,%r0
-	msgr	%r0,%r15
-	msgr	%r15,%r0
-	msgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-msr-01.s b/test/MC/SystemZ/insn-msr-01.s
deleted file mode 100644
index 6f7d917a16ce..000000000000
--- a/test/MC/SystemZ/insn-msr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msr	%r0, %r0                # encoding: [0xb2,0x52,0x00,0x00]
-#CHECK: msr	%r0, %r15               # encoding: [0xb2,0x52,0x00,0x0f]
-#CHECK: msr	%r15, %r0               # encoding: [0xb2,0x52,0x00,0xf0]
-#CHECK: msr	%r7, %r8                # encoding: [0xb2,0x52,0x00,0x78]
-
-	msr	%r0,%r0
-	msr	%r0,%r15
-	msr	%r15,%r0
-	msr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-msy-01.s b/test/MC/SystemZ/insn-msy-01.s
deleted file mode 100644
index aed9318a8a56..000000000000
--- a/test/MC/SystemZ/insn-msy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: msy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x51]
-#CHECK: msy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x51]
-#CHECK: msy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x51]
-#CHECK: msy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x51]
-#CHECK: msy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x51]
-#CHECK: msy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x51]
-#CHECK: msy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x51]
-#CHECK: msy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x51]
-#CHECK: msy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x51]
-#CHECK: msy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x51]
-
-	msy	%r0, -524288
-	msy	%r0, -1
-	msy	%r0, 0
-	msy	%r0, 1
-	msy	%r0, 524287
-	msy	%r0, 0(%r1)
-	msy	%r0, 0(%r15)
-	msy	%r0, 524287(%r1,%r15)
-	msy	%r0, 524287(%r15,%r1)
-	msy	%r15, 0
diff --git a/test/MC/SystemZ/insn-msy-02.s b/test/MC/SystemZ/insn-msy-02.s
deleted file mode 100644
index 6f10069ae704..000000000000
--- a/test/MC/SystemZ/insn-msy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: msy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: msy	%r0, 524288
-
-	msy	%r0, -524289
-	msy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-mvghi-01.s b/test/MC/SystemZ/insn-mvghi-01.s
deleted file mode 100644
index 191aa49cd854..000000000000
--- a/test/MC/SystemZ/insn-mvghi-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mvghi	0, 0                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x00]
-#CHECK: mvghi	4095, 0                 # encoding: [0xe5,0x48,0x0f,0xff,0x00,0x00]
-#CHECK: mvghi	0, -32768               # encoding: [0xe5,0x48,0x00,0x00,0x80,0x00]
-#CHECK: mvghi	0, -1                   # encoding: [0xe5,0x48,0x00,0x00,0xff,0xff]
-#CHECK: mvghi	0, 0                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x00]
-#CHECK: mvghi	0, 1                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x01]
-#CHECK: mvghi	0, 32767                # encoding: [0xe5,0x48,0x00,0x00,0x7f,0xff]
-#CHECK: mvghi	0(%r1), 42              # encoding: [0xe5,0x48,0x10,0x00,0x00,0x2a]
-#CHECK: mvghi	0(%r15), 42             # encoding: [0xe5,0x48,0xf0,0x00,0x00,0x2a]
-#CHECK: mvghi	4095(%r1), 42           # encoding: [0xe5,0x48,0x1f,0xff,0x00,0x2a]
-#CHECK: mvghi	4095(%r15), 42          # encoding: [0xe5,0x48,0xff,0xff,0x00,0x2a]
-
-	mvghi	0, 0
-	mvghi	4095, 0
-	mvghi	0, -32768
-	mvghi	0, -1
-	mvghi	0, 0
-	mvghi	0, 1
-	mvghi	0, 32767
-	mvghi	0(%r1), 42
-	mvghi	0(%r15), 42
-	mvghi	4095(%r1), 42
-	mvghi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvghi-02.s b/test/MC/SystemZ/insn-mvghi-02.s
deleted file mode 100644
index 38b38a517a0b..000000000000
--- a/test/MC/SystemZ/insn-mvghi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mvghi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: mvghi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: mvghi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: mvghi	0, -32769
-#CHECK: error: invalid operand
-#CHECK: mvghi	0, 32768
-
-	mvghi	-1, 0
-	mvghi	4096, 0
-	mvghi	0(%r1,%r2), 0
-	mvghi	0, -32769
-	mvghi	0, 32768
diff --git a/test/MC/SystemZ/insn-mvhhi-01.s b/test/MC/SystemZ/insn-mvhhi-01.s
deleted file mode 100644
index 63574a487f93..000000000000
--- a/test/MC/SystemZ/insn-mvhhi-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mvhhi	0, 0                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x00]
-#CHECK: mvhhi	4095, 0                 # encoding: [0xe5,0x44,0x0f,0xff,0x00,0x00]
-#CHECK: mvhhi	0, -32768               # encoding: [0xe5,0x44,0x00,0x00,0x80,0x00]
-#CHECK: mvhhi	0, -1                   # encoding: [0xe5,0x44,0x00,0x00,0xff,0xff]
-#CHECK: mvhhi	0, 0                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x00]
-#CHECK: mvhhi	0, 1                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x01]
-#CHECK: mvhhi	0, 32767                # encoding: [0xe5,0x44,0x00,0x00,0x7f,0xff]
-#CHECK: mvhhi	0(%r1), 42              # encoding: [0xe5,0x44,0x10,0x00,0x00,0x2a]
-#CHECK: mvhhi	0(%r15), 42             # encoding: [0xe5,0x44,0xf0,0x00,0x00,0x2a]
-#CHECK: mvhhi	4095(%r1), 42           # encoding: [0xe5,0x44,0x1f,0xff,0x00,0x2a]
-#CHECK: mvhhi	4095(%r15), 42          # encoding: [0xe5,0x44,0xff,0xff,0x00,0x2a]
-
-	mvhhi	0, 0
-	mvhhi	4095, 0
-	mvhhi	0, -32768
-	mvhhi	0, -1
-	mvhhi	0, 0
-	mvhhi	0, 1
-	mvhhi	0, 32767
-	mvhhi	0(%r1), 42
-	mvhhi	0(%r15), 42
-	mvhhi	4095(%r1), 42
-	mvhhi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvhhi-02.s b/test/MC/SystemZ/insn-mvhhi-02.s
deleted file mode 100644
index 58abb025ebd6..000000000000
--- a/test/MC/SystemZ/insn-mvhhi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mvhhi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: mvhhi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: mvhhi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: mvhhi	0, -32769
-#CHECK: error: invalid operand
-#CHECK: mvhhi	0, 32768
-
-	mvhhi	-1, 0
-	mvhhi	4096, 0
-	mvhhi	0(%r1,%r2), 0
-	mvhhi	0, -32769
-	mvhhi	0, 32768
diff --git a/test/MC/SystemZ/insn-mvhi-01.s b/test/MC/SystemZ/insn-mvhi-01.s
deleted file mode 100644
index 5bf9fd312167..000000000000
--- a/test/MC/SystemZ/insn-mvhi-01.s
+++ /dev/null
@@ -1,25 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mvhi	0, 0                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x00]
-#CHECK: mvhi	4095, 0                 # encoding: [0xe5,0x4c,0x0f,0xff,0x00,0x00]
-#CHECK: mvhi	0, -32768               # encoding: [0xe5,0x4c,0x00,0x00,0x80,0x00]
-#CHECK: mvhi	0, -1                   # encoding: [0xe5,0x4c,0x00,0x00,0xff,0xff]
-#CHECK: mvhi	0, 0                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x00]
-#CHECK: mvhi	0, 1                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x01]
-#CHECK: mvhi	0, 32767                # encoding: [0xe5,0x4c,0x00,0x00,0x7f,0xff]
-#CHECK: mvhi	0(%r1), 42              # encoding: [0xe5,0x4c,0x10,0x00,0x00,0x2a]
-#CHECK: mvhi	0(%r15), 42             # encoding: [0xe5,0x4c,0xf0,0x00,0x00,0x2a]
-#CHECK: mvhi	4095(%r1), 42           # encoding: [0xe5,0x4c,0x1f,0xff,0x00,0x2a]
-#CHECK: mvhi	4095(%r15), 42          # encoding: [0xe5,0x4c,0xff,0xff,0x00,0x2a]
-
-	mvhi	0, 0
-	mvhi	4095, 0
-	mvhi	0, -32768
-	mvhi	0, -1
-	mvhi	0, 0
-	mvhi	0, 1
-	mvhi	0, 32767
-	mvhi	0(%r1), 42
-	mvhi	0(%r15), 42
-	mvhi	4095(%r1), 42
-	mvhi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvhi-02.s b/test/MC/SystemZ/insn-mvhi-02.s
deleted file mode 100644
index 517301c51356..000000000000
--- a/test/MC/SystemZ/insn-mvhi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mvhi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: mvhi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: mvhi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: mvhi	0, -32769
-#CHECK: error: invalid operand
-#CHECK: mvhi	0, 32768
-
-	mvhi	-1, 0
-	mvhi	4096, 0
-	mvhi	0(%r1,%r2), 0
-	mvhi	0, -32769
-	mvhi	0, 32768
diff --git a/test/MC/SystemZ/insn-mvi-01.s b/test/MC/SystemZ/insn-mvi-01.s
deleted file mode 100644
index 83e30900ac3a..000000000000
--- a/test/MC/SystemZ/insn-mvi-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mvi	0, 0                    # encoding: [0x92,0x00,0x00,0x00]
-#CHECK: mvi	4095, 0                 # encoding: [0x92,0x00,0x0f,0xff]
-#CHECK: mvi	0, 255                  # encoding: [0x92,0xff,0x00,0x00]
-#CHECK: mvi	0(%r1), 42              # encoding: [0x92,0x2a,0x10,0x00]
-#CHECK: mvi	0(%r15), 42             # encoding: [0x92,0x2a,0xf0,0x00]
-#CHECK: mvi	4095(%r1), 42           # encoding: [0x92,0x2a,0x1f,0xff]
-#CHECK: mvi	4095(%r15), 42          # encoding: [0x92,0x2a,0xff,0xff]
-
-	mvi	0, 0
-	mvi	4095, 0
-	mvi	0, 255
-	mvi	0(%r1), 42
-	mvi	0(%r15), 42
-	mvi	4095(%r1), 42
-	mvi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvi-02.s b/test/MC/SystemZ/insn-mvi-02.s
deleted file mode 100644
index ddd5909d459c..000000000000
--- a/test/MC/SystemZ/insn-mvi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mvi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: mvi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: mvi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: mvi	0, -1
-#CHECK: error: invalid operand
-#CHECK: mvi	0, 256
-
-	mvi	-1, 0
-	mvi	4096, 0
-	mvi	0(%r1,%r2), 0
-	mvi	0, -1
-	mvi	0, 256
diff --git a/test/MC/SystemZ/insn-mviy-01.s b/test/MC/SystemZ/insn-mviy-01.s
deleted file mode 100644
index 8bd6979b81ee..000000000000
--- a/test/MC/SystemZ/insn-mviy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mviy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x52]
-#CHECK: mviy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x52]
-#CHECK: mviy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x52]
-#CHECK: mviy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x52]
-#CHECK: mviy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x52]
-#CHECK: mviy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x52]
-#CHECK: mviy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x52]
-#CHECK: mviy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x52]
-#CHECK: mviy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x52]
-#CHECK: mviy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x52]
-
-	mviy	-524288, 0
-	mviy	-1, 0
-	mviy	0, 0
-	mviy	1, 0
-	mviy	524287, 0
-	mviy	0, 255
-	mviy	0(%r1), 42
-	mviy	0(%r15), 42
-	mviy	524287(%r1), 42
-	mviy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-mviy-02.s b/test/MC/SystemZ/insn-mviy-02.s
deleted file mode 100644
index ab78dab5c971..000000000000
--- a/test/MC/SystemZ/insn-mviy-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: mviy	-524289, 0
-#CHECK: error: invalid operand
-#CHECK: mviy	524288, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: mviy	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: mviy	0, -1
-#CHECK: error: invalid operand
-#CHECK: mviy	0, 256
-
-	mviy	-524289, 0
-	mviy	524288, 0
-	mviy	0(%r1,%r2), 0
-	mviy	0, -1
-	mviy	0, 256
diff --git a/test/MC/SystemZ/insn-mxbr-01.s b/test/MC/SystemZ/insn-mxbr-01.s
deleted file mode 100644
index 60c8ebadfba3..000000000000
--- a/test/MC/SystemZ/insn-mxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mxbr	%f0, %f0                # encoding: [0xb3,0x4c,0x00,0x00]
-#CHECK: mxbr	%f0, %f13               # encoding: [0xb3,0x4c,0x00,0x0d]
-#CHECK: mxbr	%f8, %f5                # encoding: [0xb3,0x4c,0x00,0x85]
-#CHECK: mxbr	%f13, %f13              # encoding: [0xb3,0x4c,0x00,0xdd]
-
-	mxbr	%f0, %f0
-	mxbr	%f0, %f13
-	mxbr	%f8, %f5
-	mxbr	%f13, %f13
diff --git a/test/MC/SystemZ/insn-mxbr-02.s b/test/MC/SystemZ/insn-mxbr-02.s
deleted file mode 100644
index 92820234139a..000000000000
--- a/test/MC/SystemZ/insn-mxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: mxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: mxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: mxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: mxbr	%f14, %f0
-
-	mxbr	%f0, %f2
-	mxbr	%f0, %f14
-	mxbr	%f2, %f0
-	mxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-mxdb-01.s b/test/MC/SystemZ/insn-mxdb-01.s
deleted file mode 100644
index 46a723e8426f..000000000000
--- a/test/MC/SystemZ/insn-mxdb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mxdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x07]
-#CHECK: mxdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x07]
-#CHECK: mxdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x07]
-#CHECK: mxdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x07]
-#CHECK: mxdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x07]
-#CHECK: mxdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x07]
-#CHECK: mxdb	%f13, 0                 # encoding: [0xed,0xd0,0x00,0x00,0x00,0x07]
-
-	mxdb	%f0, 0
-	mxdb	%f0, 4095
-	mxdb	%f0, 0(%r1)
-	mxdb	%f0, 0(%r15)
-	mxdb	%f0, 4095(%r1,%r15)
-	mxdb	%f0, 4095(%r15,%r1)
-	mxdb	%f13, 0
diff --git a/test/MC/SystemZ/insn-mxdb-02.s b/test/MC/SystemZ/insn-mxdb-02.s
deleted file mode 100644
index 44c821cad039..000000000000
--- a/test/MC/SystemZ/insn-mxdb-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: mxdb	%f2, 0
-#CHECK: error: invalid register
-#CHECK: mxdb	%f15, 0
-#CHECK: error: invalid operand
-#CHECK: mxdb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: mxdb	%f0, 4096
-
-	mxdb	%f2, 0
-	mxdb	%f15, 0
-	mxdb	%f0, -1
-	mxdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-mxdbr-01.s b/test/MC/SystemZ/insn-mxdbr-01.s
deleted file mode 100644
index dfb898fbdaf6..000000000000
--- a/test/MC/SystemZ/insn-mxdbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: mxdbr	%f0, %f0                # encoding: [0xb3,0x07,0x00,0x00]
-#CHECK: mxdbr	%f0, %f15               # encoding: [0xb3,0x07,0x00,0x0f]
-#CHECK: mxdbr	%f8, %f8                # encoding: [0xb3,0x07,0x00,0x88]
-#CHECK: mxdbr	%f13, %f0               # encoding: [0xb3,0x07,0x00,0xd0]
-
-	mxdbr	%f0, %f0
-	mxdbr	%f0, %f15
-	mxdbr	%f8, %f8
-	mxdbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-mxdbr-02.s b/test/MC/SystemZ/insn-mxdbr-02.s
deleted file mode 100644
index 90260481f2a7..000000000000
--- a/test/MC/SystemZ/insn-mxdbr-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: mxdbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: mxdbr	%f15, %f0
-
-	mxdbr	%f2, %f0
-	mxdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-n-01.s b/test/MC/SystemZ/insn-n-01.s
deleted file mode 100644
index 75fa141e2c86..000000000000
--- a/test/MC/SystemZ/insn-n-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: n	%r0, 0                  # encoding: [0x54,0x00,0x00,0x00]
-#CHECK: n	%r0, 4095               # encoding: [0x54,0x00,0x0f,0xff]
-#CHECK: n	%r0, 0(%r1)             # encoding: [0x54,0x00,0x10,0x00]
-#CHECK: n	%r0, 0(%r15)            # encoding: [0x54,0x00,0xf0,0x00]
-#CHECK: n	%r0, 4095(%r1,%r15)     # encoding: [0x54,0x01,0xff,0xff]
-#CHECK: n	%r0, 4095(%r15,%r1)     # encoding: [0x54,0x0f,0x1f,0xff]
-#CHECK: n	%r15, 0                 # encoding: [0x54,0xf0,0x00,0x00]
-
-	n	%r0, 0
-	n	%r0, 4095
-	n	%r0, 0(%r1)
-	n	%r0, 0(%r15)
-	n	%r0, 4095(%r1,%r15)
-	n	%r0, 4095(%r15,%r1)
-	n	%r15, 0
diff --git a/test/MC/SystemZ/insn-n-02.s b/test/MC/SystemZ/insn-n-02.s
deleted file mode 100644
index 7c14b1fe1c0e..000000000000
--- a/test/MC/SystemZ/insn-n-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: n	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: n	%r0, 4096
-
-	n	%r0, -1
-	n	%r0, 4096
diff --git a/test/MC/SystemZ/insn-ng-01.s b/test/MC/SystemZ/insn-ng-01.s
deleted file mode 100644
index bf71a2183f45..000000000000
--- a/test/MC/SystemZ/insn-ng-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ng	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x80]
-#CHECK: ng	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x80]
-#CHECK: ng	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x80]
-#CHECK: ng	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x80]
-#CHECK: ng	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x80]
-#CHECK: ng	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x80]
-#CHECK: ng	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x80]
-#CHECK: ng	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x80]
-#CHECK: ng	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x80]
-#CHECK: ng	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x80]
-
-	ng	%r0, -524288
-	ng	%r0, -1
-	ng	%r0, 0
-	ng	%r0, 1
-	ng	%r0, 524287
-	ng	%r0, 0(%r1)
-	ng	%r0, 0(%r15)
-	ng	%r0, 524287(%r1,%r15)
-	ng	%r0, 524287(%r15,%r1)
-	ng	%r15, 0
diff --git a/test/MC/SystemZ/insn-ng-02.s b/test/MC/SystemZ/insn-ng-02.s
deleted file mode 100644
index a6f326052683..000000000000
--- a/test/MC/SystemZ/insn-ng-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ng	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: ng	%r0, 524288
-
-	ng	%r0, -524289
-	ng	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ngr-01.s b/test/MC/SystemZ/insn-ngr-01.s
deleted file mode 100644
index 714b9fa71292..000000000000
--- a/test/MC/SystemZ/insn-ngr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ngr	%r0, %r0                # encoding: [0xb9,0x80,0x00,0x00]
-#CHECK: ngr	%r0, %r15               # encoding: [0xb9,0x80,0x00,0x0f]
-#CHECK: ngr	%r15, %r0               # encoding: [0xb9,0x80,0x00,0xf0]
-#CHECK: ngr	%r7, %r8                # encoding: [0xb9,0x80,0x00,0x78]
-
-	ngr	%r0,%r0
-	ngr	%r0,%r15
-	ngr	%r15,%r0
-	ngr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-ni-01.s b/test/MC/SystemZ/insn-ni-01.s
deleted file mode 100644
index d075674feaa3..000000000000
--- a/test/MC/SystemZ/insn-ni-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ni	0, 0                    # encoding: [0x94,0x00,0x00,0x00]
-#CHECK: ni	4095, 0                 # encoding: [0x94,0x00,0x0f,0xff]
-#CHECK: ni	0, 255                  # encoding: [0x94,0xff,0x00,0x00]
-#CHECK: ni	0(%r1), 42              # encoding: [0x94,0x2a,0x10,0x00]
-#CHECK: ni	0(%r15), 42             # encoding: [0x94,0x2a,0xf0,0x00]
-#CHECK: ni	4095(%r1), 42           # encoding: [0x94,0x2a,0x1f,0xff]
-#CHECK: ni	4095(%r15), 42          # encoding: [0x94,0x2a,0xff,0xff]
-
-	ni	0, 0
-	ni	4095, 0
-	ni	0, 255
-	ni	0(%r1), 42
-	ni	0(%r15), 42
-	ni	4095(%r1), 42
-	ni	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-ni-02.s b/test/MC/SystemZ/insn-ni-02.s
deleted file mode 100644
index 1b9a6a748de2..000000000000
--- a/test/MC/SystemZ/insn-ni-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ni	-1, 0
-#CHECK: error: invalid operand
-#CHECK: ni	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: ni	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: ni	0, -1
-#CHECK: error: invalid operand
-#CHECK: ni	0, 256
-
-	ni	-1, 0
-	ni	4096, 0
-	ni	0(%r1,%r2), 0
-	ni	0, -1
-	ni	0, 256
diff --git a/test/MC/SystemZ/insn-nihf-01.s b/test/MC/SystemZ/insn-nihf-01.s
deleted file mode 100644
index dceb8d1297c2..000000000000
--- a/test/MC/SystemZ/insn-nihf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: nihf	%r0, 0                  # encoding: [0xc0,0x0a,0x00,0x00,0x00,0x00]
-#CHECK: nihf	%r0, 4294967295         # encoding: [0xc0,0x0a,0xff,0xff,0xff,0xff]
-#CHECK: nihf	%r15, 0                 # encoding: [0xc0,0xfa,0x00,0x00,0x00,0x00]
-
-	nihf	%r0, 0
-	nihf	%r0, 0xffffffff
-	nihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-nihf-02.s b/test/MC/SystemZ/insn-nihf-02.s
deleted file mode 100644
index 5f7f10a6b3f0..000000000000
--- a/test/MC/SystemZ/insn-nihf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: nihf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: nihf	%r0, 1 << 32
-
-	nihf	%r0, -1
-	nihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-nihh-01.s b/test/MC/SystemZ/insn-nihh-01.s
deleted file mode 100644
index a87540d04c08..000000000000
--- a/test/MC/SystemZ/insn-nihh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: nihh	%r0, 0                  # encoding: [0xa5,0x04,0x00,0x00]
-#CHECK: nihh	%r0, 32768              # encoding: [0xa5,0x04,0x80,0x00]
-#CHECK: nihh	%r0, 65535              # encoding: [0xa5,0x04,0xff,0xff]
-#CHECK: nihh	%r15, 0                 # encoding: [0xa5,0xf4,0x00,0x00]
-
-	nihh	%r0, 0
-	nihh	%r0, 0x8000
-	nihh	%r0, 0xffff
-	nihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-nihh-02.s b/test/MC/SystemZ/insn-nihh-02.s
deleted file mode 100644
index 3df88e40a281..000000000000
--- a/test/MC/SystemZ/insn-nihh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: nihh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: nihh	%r0, 0x10000
-
-	nihh	%r0, -1
-	nihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-nihl-01.s b/test/MC/SystemZ/insn-nihl-01.s
deleted file mode 100644
index 6eab58c7e7bf..000000000000
--- a/test/MC/SystemZ/insn-nihl-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: nihl	%r0, 0                  # encoding: [0xa5,0x05,0x00,0x00]
-#CHECK: nihl	%r0, 32768              # encoding: [0xa5,0x05,0x80,0x00]
-#CHECK: nihl	%r0, 65535              # encoding: [0xa5,0x05,0xff,0xff]
-#CHECK: nihl	%r15, 0                 # encoding: [0xa5,0xf5,0x00,0x00]
-
-	nihl	%r0, 0
-	nihl	%r0, 0x8000
-	nihl	%r0, 0xffff
-	nihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-nihl-02.s b/test/MC/SystemZ/insn-nihl-02.s
deleted file mode 100644
index 6e2d52f5a91a..000000000000
--- a/test/MC/SystemZ/insn-nihl-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: nihl	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: nihl	%r0, 0x10000
-
-	nihl	%r0, -1
-	nihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-nilf-01.s b/test/MC/SystemZ/insn-nilf-01.s
deleted file mode 100644
index 0b3a13e752cf..000000000000
--- a/test/MC/SystemZ/insn-nilf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: nilf	%r0, 0                  # encoding: [0xc0,0x0b,0x00,0x00,0x00,0x00]
-#CHECK: nilf	%r0, 4294967295         # encoding: [0xc0,0x0b,0xff,0xff,0xff,0xff]
-#CHECK: nilf	%r15, 0                 # encoding: [0xc0,0xfb,0x00,0x00,0x00,0x00]
-
-	nilf	%r0, 0
-	nilf	%r0, 0xffffffff
-	nilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-nilf-02.s b/test/MC/SystemZ/insn-nilf-02.s
deleted file mode 100644
index 87b65e46fe96..000000000000
--- a/test/MC/SystemZ/insn-nilf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: nilf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: nilf	%r0, 1 << 32
-
-	nilf	%r0, -1
-	nilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-nilh-01.s b/test/MC/SystemZ/insn-nilh-01.s
deleted file mode 100644
index 4bc9353dd284..000000000000
--- a/test/MC/SystemZ/insn-nilh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: nilh	%r0, 0                  # encoding: [0xa5,0x06,0x00,0x00]
-#CHECK: nilh	%r0, 32768              # encoding: [0xa5,0x06,0x80,0x00]
-#CHECK: nilh	%r0, 65535              # encoding: [0xa5,0x06,0xff,0xff]
-#CHECK: nilh	%r15, 0                 # encoding: [0xa5,0xf6,0x00,0x00]
-
-	nilh	%r0, 0
-	nilh	%r0, 0x8000
-	nilh	%r0, 0xffff
-	nilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-nilh-02.s b/test/MC/SystemZ/insn-nilh-02.s
deleted file mode 100644
index ae5a852a6c69..000000000000
--- a/test/MC/SystemZ/insn-nilh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: nilh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: nilh	%r0, 0x10000
-
-	nilh	%r0, -1
-	nilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-nill-01.s b/test/MC/SystemZ/insn-nill-01.s
deleted file mode 100644
index 5f4f87785bf3..000000000000
--- a/test/MC/SystemZ/insn-nill-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: nill	%r0, 0                  # encoding: [0xa5,0x07,0x00,0x00]
-#CHECK: nill	%r0, 32768              # encoding: [0xa5,0x07,0x80,0x00]
-#CHECK: nill	%r0, 65535              # encoding: [0xa5,0x07,0xff,0xff]
-#CHECK: nill	%r15, 0                 # encoding: [0xa5,0xf7,0x00,0x00]
-
-	nill	%r0, 0
-	nill	%r0, 0x8000
-	nill	%r0, 0xffff
-	nill	%r15, 0
diff --git a/test/MC/SystemZ/insn-nill-02.s b/test/MC/SystemZ/insn-nill-02.s
deleted file mode 100644
index 27fbc4a50658..000000000000
--- a/test/MC/SystemZ/insn-nill-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: nill	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: nill	%r0, 0x10000
-
-	nill	%r0, -1
-	nill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-niy-01.s b/test/MC/SystemZ/insn-niy-01.s
deleted file mode 100644
index 4c007e917295..000000000000
--- a/test/MC/SystemZ/insn-niy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: niy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x54]
-#CHECK: niy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x54]
-#CHECK: niy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x54]
-#CHECK: niy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x54]
-#CHECK: niy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x54]
-#CHECK: niy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x54]
-#CHECK: niy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x54]
-#CHECK: niy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x54]
-#CHECK: niy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x54]
-#CHECK: niy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x54]
-
-	niy	-524288, 0
-	niy	-1, 0
-	niy	0, 0
-	niy	1, 0
-	niy	524287, 0
-	niy	0, 255
-	niy	0(%r1), 42
-	niy	0(%r15), 42
-	niy	524287(%r1), 42
-	niy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-niy-02.s b/test/MC/SystemZ/insn-niy-02.s
deleted file mode 100644
index ca398e6fca86..000000000000
--- a/test/MC/SystemZ/insn-niy-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: niy	-524289, 0
-#CHECK: error: invalid operand
-#CHECK: niy	524288, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: niy	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: niy	0, -1
-#CHECK: error: invalid operand
-#CHECK: niy	0, 256
-
-	niy	-524289, 0
-	niy	524288, 0
-	niy	0(%r1,%r2), 0
-	niy	0, -1
-	niy	0, 256
diff --git a/test/MC/SystemZ/insn-nr-01.s b/test/MC/SystemZ/insn-nr-01.s
deleted file mode 100644
index c10216d68f41..000000000000
--- a/test/MC/SystemZ/insn-nr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: nr	%r0, %r0                # encoding: [0x14,0x00]
-#CHECK: nr	%r0, %r15               # encoding: [0x14,0x0f]
-#CHECK: nr	%r15, %r0               # encoding: [0x14,0xf0]
-#CHECK: nr	%r7, %r8                # encoding: [0x14,0x78]
-
-	nr	%r0,%r0
-	nr	%r0,%r15
-	nr	%r15,%r0
-	nr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-ny-01.s b/test/MC/SystemZ/insn-ny-01.s
deleted file mode 100644
index a12bb67e2b03..000000000000
--- a/test/MC/SystemZ/insn-ny-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ny	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x54]
-#CHECK: ny	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x54]
-#CHECK: ny	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x54]
-#CHECK: ny	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x54]
-#CHECK: ny	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x54]
-#CHECK: ny	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x54]
-#CHECK: ny	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x54]
-#CHECK: ny	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x54]
-#CHECK: ny	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x54]
-#CHECK: ny	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x54]
-
-	ny	%r0, -524288
-	ny	%r0, -1
-	ny	%r0, 0
-	ny	%r0, 1
-	ny	%r0, 524287
-	ny	%r0, 0(%r1)
-	ny	%r0, 0(%r15)
-	ny	%r0, 524287(%r1,%r15)
-	ny	%r0, 524287(%r15,%r1)
-	ny	%r15, 0
diff --git a/test/MC/SystemZ/insn-ny-02.s b/test/MC/SystemZ/insn-ny-02.s
deleted file mode 100644
index 5f53ebd3e114..000000000000
--- a/test/MC/SystemZ/insn-ny-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ny	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: ny	%r0, 524288
-
-	ny	%r0, -524289
-	ny	%r0, 524288
diff --git a/test/MC/SystemZ/insn-o-01.s b/test/MC/SystemZ/insn-o-01.s
deleted file mode 100644
index 0c74e9ccc6dc..000000000000
--- a/test/MC/SystemZ/insn-o-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: o	%r0, 0                  # encoding: [0x56,0x00,0x00,0x00]
-#CHECK: o	%r0, 4095               # encoding: [0x56,0x00,0x0f,0xff]
-#CHECK: o	%r0, 0(%r1)             # encoding: [0x56,0x00,0x10,0x00]
-#CHECK: o	%r0, 0(%r15)            # encoding: [0x56,0x00,0xf0,0x00]
-#CHECK: o	%r0, 4095(%r1,%r15)     # encoding: [0x56,0x01,0xff,0xff]
-#CHECK: o	%r0, 4095(%r15,%r1)     # encoding: [0x56,0x0f,0x1f,0xff]
-#CHECK: o	%r15, 0                 # encoding: [0x56,0xf0,0x00,0x00]
-
-	o	%r0, 0
-	o	%r0, 4095
-	o	%r0, 0(%r1)
-	o	%r0, 0(%r15)
-	o	%r0, 4095(%r1,%r15)
-	o	%r0, 4095(%r15,%r1)
-	o	%r15, 0
diff --git a/test/MC/SystemZ/insn-o-02.s b/test/MC/SystemZ/insn-o-02.s
deleted file mode 100644
index 34b741803b42..000000000000
--- a/test/MC/SystemZ/insn-o-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: o	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: o	%r0, 4096
-
-	o	%r0, -1
-	o	%r0, 4096
diff --git a/test/MC/SystemZ/insn-og-01.s b/test/MC/SystemZ/insn-og-01.s
deleted file mode 100644
index 3c9811b008a6..000000000000
--- a/test/MC/SystemZ/insn-og-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: og	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x81]
-#CHECK: og	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x81]
-#CHECK: og	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x81]
-#CHECK: og	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x81]
-#CHECK: og	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x81]
-#CHECK: og	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x81]
-#CHECK: og	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x81]
-#CHECK: og	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x81]
-#CHECK: og	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x81]
-#CHECK: og	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x81]
-
-	og	%r0, -524288
-	og	%r0, -1
-	og	%r0, 0
-	og	%r0, 1
-	og	%r0, 524287
-	og	%r0, 0(%r1)
-	og	%r0, 0(%r15)
-	og	%r0, 524287(%r1,%r15)
-	og	%r0, 524287(%r15,%r1)
-	og	%r15, 0
diff --git a/test/MC/SystemZ/insn-og-02.s b/test/MC/SystemZ/insn-og-02.s
deleted file mode 100644
index 7f4e45328be7..000000000000
--- a/test/MC/SystemZ/insn-og-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: og	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: og	%r0, 524288
-
-	og	%r0, -524289
-	og	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ogr-01.s b/test/MC/SystemZ/insn-ogr-01.s
deleted file mode 100644
index 25ba913ac557..000000000000
--- a/test/MC/SystemZ/insn-ogr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ogr	%r0, %r0                # encoding: [0xb9,0x81,0x00,0x00]
-#CHECK: ogr	%r0, %r15               # encoding: [0xb9,0x81,0x00,0x0f]
-#CHECK: ogr	%r15, %r0               # encoding: [0xb9,0x81,0x00,0xf0]
-#CHECK: ogr	%r7, %r8                # encoding: [0xb9,0x81,0x00,0x78]
-
-	ogr	%r0,%r0
-	ogr	%r0,%r15
-	ogr	%r15,%r0
-	ogr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-oi-01.s b/test/MC/SystemZ/insn-oi-01.s
deleted file mode 100644
index 5d52fd2500bd..000000000000
--- a/test/MC/SystemZ/insn-oi-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oi	0, 0                    # encoding: [0x96,0x00,0x00,0x00]
-#CHECK: oi	4095, 0                 # encoding: [0x96,0x00,0x0f,0xff]
-#CHECK: oi	0, 255                  # encoding: [0x96,0xff,0x00,0x00]
-#CHECK: oi	0(%r1), 42              # encoding: [0x96,0x2a,0x10,0x00]
-#CHECK: oi	0(%r15), 42             # encoding: [0x96,0x2a,0xf0,0x00]
-#CHECK: oi	4095(%r1), 42           # encoding: [0x96,0x2a,0x1f,0xff]
-#CHECK: oi	4095(%r15), 42          # encoding: [0x96,0x2a,0xff,0xff]
-
-	oi	0, 0
-	oi	4095, 0
-	oi	0, 255
-	oi	0(%r1), 42
-	oi	0(%r15), 42
-	oi	4095(%r1), 42
-	oi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-oi-02.s b/test/MC/SystemZ/insn-oi-02.s
deleted file mode 100644
index 330a290aa365..000000000000
--- a/test/MC/SystemZ/insn-oi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: oi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: oi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: oi	0, -1
-#CHECK: error: invalid operand
-#CHECK: oi	0, 256
-
-	oi	-1, 0
-	oi	4096, 0
-	oi	0(%r1,%r2), 0
-	oi	0, -1
-	oi	0, 256
diff --git a/test/MC/SystemZ/insn-oihf-01.s b/test/MC/SystemZ/insn-oihf-01.s
deleted file mode 100644
index 627820d31dcb..000000000000
--- a/test/MC/SystemZ/insn-oihf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oihf	%r0, 0                  # encoding: [0xc0,0x0c,0x00,0x00,0x00,0x00]
-#CHECK: oihf	%r0, 4294967295         # encoding: [0xc0,0x0c,0xff,0xff,0xff,0xff]
-#CHECK: oihf	%r15, 0                 # encoding: [0xc0,0xfc,0x00,0x00,0x00,0x00]
-
-	oihf	%r0, 0
-	oihf	%r0, 0xffffffff
-	oihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-oihf-02.s b/test/MC/SystemZ/insn-oihf-02.s
deleted file mode 100644
index a944cb013cd4..000000000000
--- a/test/MC/SystemZ/insn-oihf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oihf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: oihf	%r0, 1 << 32
-
-	oihf	%r0, -1
-	oihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-oihh-01.s b/test/MC/SystemZ/insn-oihh-01.s
deleted file mode 100644
index f62f61f03f88..000000000000
--- a/test/MC/SystemZ/insn-oihh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oihh	%r0, 0                  # encoding: [0xa5,0x08,0x00,0x00]
-#CHECK: oihh	%r0, 32768              # encoding: [0xa5,0x08,0x80,0x00]
-#CHECK: oihh	%r0, 65535              # encoding: [0xa5,0x08,0xff,0xff]
-#CHECK: oihh	%r15, 0                 # encoding: [0xa5,0xf8,0x00,0x00]
-
-	oihh	%r0, 0
-	oihh	%r0, 0x8000
-	oihh	%r0, 0xffff
-	oihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-oihh-02.s b/test/MC/SystemZ/insn-oihh-02.s
deleted file mode 100644
index 6bf7e237e039..000000000000
--- a/test/MC/SystemZ/insn-oihh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oihh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: oihh	%r0, 0x10000
-
-	oihh	%r0, -1
-	oihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oihl-01.s b/test/MC/SystemZ/insn-oihl-01.s
deleted file mode 100644
index 437b15c39c17..000000000000
--- a/test/MC/SystemZ/insn-oihl-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oihl	%r0, 0                  # encoding: [0xa5,0x09,0x00,0x00]
-#CHECK: oihl	%r0, 32768              # encoding: [0xa5,0x09,0x80,0x00]
-#CHECK: oihl	%r0, 65535              # encoding: [0xa5,0x09,0xff,0xff]
-#CHECK: oihl	%r15, 0                 # encoding: [0xa5,0xf9,0x00,0x00]
-
-	oihl	%r0, 0
-	oihl	%r0, 0x8000
-	oihl	%r0, 0xffff
-	oihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-oihl-02.s b/test/MC/SystemZ/insn-oihl-02.s
deleted file mode 100644
index f4f7a59b774c..000000000000
--- a/test/MC/SystemZ/insn-oihl-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oihl	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: oihl	%r0, 0x10000
-
-	oihl	%r0, -1
-	oihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oilf-01.s b/test/MC/SystemZ/insn-oilf-01.s
deleted file mode 100644
index 6f0c0717bd0e..000000000000
--- a/test/MC/SystemZ/insn-oilf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oilf	%r0, 0                  # encoding: [0xc0,0x0d,0x00,0x00,0x00,0x00]
-#CHECK: oilf	%r0, 4294967295         # encoding: [0xc0,0x0d,0xff,0xff,0xff,0xff]
-#CHECK: oilf	%r15, 0                 # encoding: [0xc0,0xfd,0x00,0x00,0x00,0x00]
-
-	oilf	%r0, 0
-	oilf	%r0, 0xffffffff
-	oilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-oilf-02.s b/test/MC/SystemZ/insn-oilf-02.s
deleted file mode 100644
index 5501724d54c1..000000000000
--- a/test/MC/SystemZ/insn-oilf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oilf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: oilf	%r0, 1 << 32
-
-	oilf	%r0, -1
-	oilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-oilh-01.s b/test/MC/SystemZ/insn-oilh-01.s
deleted file mode 100644
index 0140500e340f..000000000000
--- a/test/MC/SystemZ/insn-oilh-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oilh	%r0, 0                  # encoding: [0xa5,0x0a,0x00,0x00]
-#CHECK: oilh	%r0, 32768              # encoding: [0xa5,0x0a,0x80,0x00]
-#CHECK: oilh	%r0, 65535              # encoding: [0xa5,0x0a,0xff,0xff]
-#CHECK: oilh	%r15, 0                 # encoding: [0xa5,0xfa,0x00,0x00]
-
-	oilh	%r0, 0
-	oilh	%r0, 0x8000
-	oilh	%r0, 0xffff
-	oilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-oilh-02.s b/test/MC/SystemZ/insn-oilh-02.s
deleted file mode 100644
index d2f180d6abd3..000000000000
--- a/test/MC/SystemZ/insn-oilh-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oilh	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: oilh	%r0, 0x10000
-
-	oilh	%r0, -1
-	oilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oill-01.s b/test/MC/SystemZ/insn-oill-01.s
deleted file mode 100644
index ef95d2d90f52..000000000000
--- a/test/MC/SystemZ/insn-oill-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oill	%r0, 0                  # encoding: [0xa5,0x0b,0x00,0x00]
-#CHECK: oill	%r0, 32768              # encoding: [0xa5,0x0b,0x80,0x00]
-#CHECK: oill	%r0, 65535              # encoding: [0xa5,0x0b,0xff,0xff]
-#CHECK: oill	%r15, 0                 # encoding: [0xa5,0xfb,0x00,0x00]
-
-	oill	%r0, 0
-	oill	%r0, 0x8000
-	oill	%r0, 0xffff
-	oill	%r15, 0
diff --git a/test/MC/SystemZ/insn-oill-02.s b/test/MC/SystemZ/insn-oill-02.s
deleted file mode 100644
index 01321db30937..000000000000
--- a/test/MC/SystemZ/insn-oill-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oill	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: oill	%r0, 0x10000
-
-	oill	%r0, -1
-	oill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oiy-01.s b/test/MC/SystemZ/insn-oiy-01.s
deleted file mode 100644
index ba060cad1efa..000000000000
--- a/test/MC/SystemZ/insn-oiy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oiy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x56]
-#CHECK: oiy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x56]
-#CHECK: oiy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x56]
-#CHECK: oiy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x56]
-#CHECK: oiy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x56]
-#CHECK: oiy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x56]
-#CHECK: oiy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x56]
-#CHECK: oiy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x56]
-#CHECK: oiy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x56]
-#CHECK: oiy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x56]
-
-	oiy	-524288, 0
-	oiy	-1, 0
-	oiy	0, 0
-	oiy	1, 0
-	oiy	524287, 0
-	oiy	0, 255
-	oiy	0(%r1), 42
-	oiy	0(%r15), 42
-	oiy	524287(%r1), 42
-	oiy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-oiy-02.s b/test/MC/SystemZ/insn-oiy-02.s
deleted file mode 100644
index c1c556931da1..000000000000
--- a/test/MC/SystemZ/insn-oiy-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oiy	-524289, 0
-#CHECK: error: invalid operand
-#CHECK: oiy	524288, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: oiy	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: oiy	0, -1
-#CHECK: error: invalid operand
-#CHECK: oiy	0, 256
-
-	oiy	-524289, 0
-	oiy	524288, 0
-	oiy	0(%r1,%r2), 0
-	oiy	0, -1
-	oiy	0, 256
diff --git a/test/MC/SystemZ/insn-or-01.s b/test/MC/SystemZ/insn-or-01.s
deleted file mode 100644
index 8ac366d32ac0..000000000000
--- a/test/MC/SystemZ/insn-or-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: or	%r0, %r0                # encoding: [0x16,0x00]
-#CHECK: or	%r0, %r15               # encoding: [0x16,0x0f]
-#CHECK: or	%r15, %r0               # encoding: [0x16,0xf0]
-#CHECK: or	%r7, %r8                # encoding: [0x16,0x78]
-
-	or	%r0,%r0
-	or	%r0,%r15
-	or	%r15,%r0
-	or	%r7,%r8
diff --git a/test/MC/SystemZ/insn-oy-01.s b/test/MC/SystemZ/insn-oy-01.s
deleted file mode 100644
index 58013d0cabbd..000000000000
--- a/test/MC/SystemZ/insn-oy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: oy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x56]
-#CHECK: oy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x56]
-#CHECK: oy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x56]
-#CHECK: oy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x56]
-#CHECK: oy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x56]
-#CHECK: oy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x56]
-#CHECK: oy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x56]
-#CHECK: oy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x56]
-#CHECK: oy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x56]
-#CHECK: oy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x56]
-
-	oy	%r0, -524288
-	oy	%r0, -1
-	oy	%r0, 0
-	oy	%r0, 1
-	oy	%r0, 524287
-	oy	%r0, 0(%r1)
-	oy	%r0, 0(%r15)
-	oy	%r0, 524287(%r1,%r15)
-	oy	%r0, 524287(%r15,%r1)
-	oy	%r15, 0
diff --git a/test/MC/SystemZ/insn-oy-02.s b/test/MC/SystemZ/insn-oy-02.s
deleted file mode 100644
index a9ae5b23954a..000000000000
--- a/test/MC/SystemZ/insn-oy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: oy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: oy	%r0, 524288
-
-	oy	%r0, -524289
-	oy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-risbg-01.s b/test/MC/SystemZ/insn-risbg-01.s
deleted file mode 100644
index b50fbe7f96bd..000000000000
--- a/test/MC/SystemZ/insn-risbg-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: risbg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
-#CHECK: risbg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
-#CHECK: risbg	%r0, %r0, 0, 63, 0      # encoding: [0xec,0x00,0x00,0x3f,0x00,0x55]
-#CHECK: risbg	%r0, %r0, 63, 0, 0      # encoding: [0xec,0x00,0x3f,0x00,0x00,0x55]
-#CHECK: risbg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x55]
-#CHECK: risbg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x55]
-#CHECK: risbg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x55]
-
-	risbg	%r0,%r0,0,0,0
-	risbg	%r0,%r0,0,0,63
-	risbg	%r0,%r0,0,63,0
-	risbg	%r0,%r0,63,0,0
-	risbg	%r0,%r15,0,0,0
-	risbg	%r15,%r0,0,0,0
-	risbg	%r4,%r5,6,7,8
diff --git a/test/MC/SystemZ/insn-risbg-02.s b/test/MC/SystemZ/insn-risbg-02.s
deleted file mode 100644
index 781cb563c847..000000000000
--- a/test/MC/SystemZ/insn-risbg-02.s
+++ /dev/null
@@ -1,22 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: risbg	%r0,%r0,0,0,-1
-#CHECK: error: invalid operand
-#CHECK: risbg	%r0,%r0,0,0,64
-#CHECK: error: invalid operand
-#CHECK: risbg	%r0,%r0,0,-1,0
-#CHECK: error: invalid operand
-#CHECK: risbg	%r0,%r0,0,64,0
-#CHECK: error: invalid operand
-#CHECK: risbg	%r0,%r0,-1,0,0
-#CHECK: error: invalid operand
-#CHECK: risbg	%r0,%r0,64,0,0
-
-	risbg	%r0,%r0,0,0,-1
-	risbg	%r0,%r0,0,0,64
-	risbg	%r0,%r0,0,-1,0
-	risbg	%r0,%r0,0,64,0
-	risbg	%r0,%r0,-1,0,0
-	risbg	%r0,%r0,64,0,0
diff --git a/test/MC/SystemZ/insn-rll-01.s b/test/MC/SystemZ/insn-rll-01.s
deleted file mode 100644
index 06e3774786c9..000000000000
--- a/test/MC/SystemZ/insn-rll-01.s
+++ /dev/null
@@ -1,27 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: rll	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x1d]
-#CHECK: rll	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x1d]
-#CHECK: rll	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x1d]
-#CHECK: rll	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x1d]
-#CHECK: rll	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x1d]
-#CHECK: rll	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x1d]
-#CHECK: rll	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x1d]
-#CHECK: rll	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x1d]
-#CHECK: rll	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x1d]
-#CHECK: rll	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x1d]
-#CHECK: rll	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x1d]
-#CHECK: rll	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x1d]
-
-	rll	%r0,%r0,0
-	rll	%r15,%r1,0
-	rll	%r1,%r15,0
-	rll	%r15,%r15,0
-	rll	%r0,%r0,-524288
-	rll	%r0,%r0,-1
-	rll	%r0,%r0,1
-	rll	%r0,%r0,524287
-	rll	%r0,%r0,0(%r1)
-	rll	%r0,%r0,0(%r15)
-	rll	%r0,%r0,524287(%r1)
-	rll	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-rll-02.s b/test/MC/SystemZ/insn-rll-02.s
deleted file mode 100644
index baf160700fcb..000000000000
--- a/test/MC/SystemZ/insn-rll-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: rll	%r0,%r0,-524289
-#CHECK: error: invalid operand
-#CHECK: rll	%r0,%r0,524288
-#CHECK: error: %r0 used in an address
-#CHECK: rll	%r0,%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: rll	%r0,%r0,0(%r1,%r2)
-
-	rll	%r0,%r0,-524289
-	rll	%r0,%r0,524288
-	rll	%r0,%r0,0(%r0)
-	rll	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-rllg-01.s b/test/MC/SystemZ/insn-rllg-01.s
deleted file mode 100644
index c36dc6daf20b..000000000000
--- a/test/MC/SystemZ/insn-rllg-01.s
+++ /dev/null
@@ -1,27 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: rllg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x1c]
-#CHECK: rllg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x1c]
-#CHECK: rllg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x1c]
-#CHECK: rllg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x1c]
-#CHECK: rllg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x1c]
-#CHECK: rllg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x1c]
-#CHECK: rllg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x1c]
-#CHECK: rllg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x1c]
-#CHECK: rllg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x1c]
-#CHECK: rllg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x1c]
-#CHECK: rllg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x1c]
-#CHECK: rllg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x1c]
-
-	rllg	%r0,%r0,0
-	rllg	%r15,%r1,0
-	rllg	%r1,%r15,0
-	rllg	%r15,%r15,0
-	rllg	%r0,%r0,-524288
-	rllg	%r0,%r0,-1
-	rllg	%r0,%r0,1
-	rllg	%r0,%r0,524287
-	rllg	%r0,%r0,0(%r1)
-	rllg	%r0,%r0,0(%r15)
-	rllg	%r0,%r0,524287(%r1)
-	rllg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-rllg-02.s b/test/MC/SystemZ/insn-rllg-02.s
deleted file mode 100644
index 7f82845aa646..000000000000
--- a/test/MC/SystemZ/insn-rllg-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: rllg	%r0,%r0,-524289
-#CHECK: error: invalid operand
-#CHECK: rllg	%r0,%r0,524288
-#CHECK: error: %r0 used in an address
-#CHECK: rllg	%r0,%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: rllg	%r0,%r0,0(%r1,%r2)
-
-	rllg	%r0,%r0,-524289
-	rllg	%r0,%r0,524288
-	rllg	%r0,%r0,0(%r0)
-	rllg	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-s-01.s b/test/MC/SystemZ/insn-s-01.s
deleted file mode 100644
index 2effedbc6c88..000000000000
--- a/test/MC/SystemZ/insn-s-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: s	%r0, 0                  # encoding: [0x5b,0x00,0x00,0x00]
-#CHECK: s	%r0, 4095               # encoding: [0x5b,0x00,0x0f,0xff]
-#CHECK: s	%r0, 0(%r1)             # encoding: [0x5b,0x00,0x10,0x00]
-#CHECK: s	%r0, 0(%r15)            # encoding: [0x5b,0x00,0xf0,0x00]
-#CHECK: s	%r0, 4095(%r1,%r15)     # encoding: [0x5b,0x01,0xff,0xff]
-#CHECK: s	%r0, 4095(%r15,%r1)     # encoding: [0x5b,0x0f,0x1f,0xff]
-#CHECK: s	%r15, 0                 # encoding: [0x5b,0xf0,0x00,0x00]
-
-	s	%r0, 0
-	s	%r0, 4095
-	s	%r0, 0(%r1)
-	s	%r0, 0(%r15)
-	s	%r0, 4095(%r1,%r15)
-	s	%r0, 4095(%r15,%r1)
-	s	%r15, 0
diff --git a/test/MC/SystemZ/insn-s-02.s b/test/MC/SystemZ/insn-s-02.s
deleted file mode 100644
index f0b4a137ec74..000000000000
--- a/test/MC/SystemZ/insn-s-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: s	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: s	%r0, 4096
-
-	s	%r0, -1
-	s	%r0, 4096
diff --git a/test/MC/SystemZ/insn-sdb-01.s b/test/MC/SystemZ/insn-sdb-01.s
deleted file mode 100644
index 9267796ece45..000000000000
--- a/test/MC/SystemZ/insn-sdb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1b]
-#CHECK: sdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1b]
-#CHECK: sdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1b]
-#CHECK: sdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1b]
-#CHECK: sdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1b]
-#CHECK: sdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1b]
-#CHECK: sdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1b]
-
-	sdb	%f0, 0
-	sdb	%f0, 4095
-	sdb	%f0, 0(%r1)
-	sdb	%f0, 0(%r15)
-	sdb	%f0, 4095(%r1,%r15)
-	sdb	%f0, 4095(%r15,%r1)
-	sdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-sdb-02.s b/test/MC/SystemZ/insn-sdb-02.s
deleted file mode 100644
index c77284fb9a8c..000000000000
--- a/test/MC/SystemZ/insn-sdb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sdb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: sdb	%f0, 4096
-
-	sdb	%f0, -1
-	sdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sdbr-01.s b/test/MC/SystemZ/insn-sdbr-01.s
deleted file mode 100644
index b07f5f2cd8fd..000000000000
--- a/test/MC/SystemZ/insn-sdbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sdbr	%f0, %f0                # encoding: [0xb3,0x1b,0x00,0x00]
-#CHECK: sdbr	%f0, %f15               # encoding: [0xb3,0x1b,0x00,0x0f]
-#CHECK: sdbr	%f7, %f8                # encoding: [0xb3,0x1b,0x00,0x78]
-#CHECK: sdbr	%f15, %f0               # encoding: [0xb3,0x1b,0x00,0xf0]
-
-	sdbr	%f0, %f0
-	sdbr	%f0, %f15
-	sdbr	%f7, %f8
-	sdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-seb-01.s b/test/MC/SystemZ/insn-seb-01.s
deleted file mode 100644
index 4bf5cfa06271..000000000000
--- a/test/MC/SystemZ/insn-seb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: seb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0b]
-#CHECK: seb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0b]
-#CHECK: seb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0b]
-#CHECK: seb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0b]
-#CHECK: seb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0b]
-#CHECK: seb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0b]
-#CHECK: seb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0b]
-
-	seb	%f0, 0
-	seb	%f0, 4095
-	seb	%f0, 0(%r1)
-	seb	%f0, 0(%r15)
-	seb	%f0, 4095(%r1,%r15)
-	seb	%f0, 4095(%r15,%r1)
-	seb	%f15, 0
diff --git a/test/MC/SystemZ/insn-seb-02.s b/test/MC/SystemZ/insn-seb-02.s
deleted file mode 100644
index e185a20fdc20..000000000000
--- a/test/MC/SystemZ/insn-seb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: seb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: seb	%f0, 4096
-
-	seb	%f0, -1
-	seb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sebr-01.s b/test/MC/SystemZ/insn-sebr-01.s
deleted file mode 100644
index 467b57cabe01..000000000000
--- a/test/MC/SystemZ/insn-sebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sebr	%f0, %f0                # encoding: [0xb3,0x0b,0x00,0x00]
-#CHECK: sebr	%f0, %f15               # encoding: [0xb3,0x0b,0x00,0x0f]
-#CHECK: sebr	%f7, %f8                # encoding: [0xb3,0x0b,0x00,0x78]
-#CHECK: sebr	%f15, %f0               # encoding: [0xb3,0x0b,0x00,0xf0]
-
-	sebr	%f0, %f0
-	sebr	%f0, %f15
-	sebr	%f7, %f8
-	sebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-sg-01.s b/test/MC/SystemZ/insn-sg-01.s
deleted file mode 100644
index 153df1ae2d67..000000000000
--- a/test/MC/SystemZ/insn-sg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x09]
-#CHECK: sg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x09]
-#CHECK: sg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x09]
-#CHECK: sg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x09]
-#CHECK: sg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x09]
-#CHECK: sg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x09]
-#CHECK: sg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x09]
-#CHECK: sg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x09]
-#CHECK: sg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x09]
-#CHECK: sg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x09]
-
-	sg	%r0, -524288
-	sg	%r0, -1
-	sg	%r0, 0
-	sg	%r0, 1
-	sg	%r0, 524287
-	sg	%r0, 0(%r1)
-	sg	%r0, 0(%r15)
-	sg	%r0, 524287(%r1,%r15)
-	sg	%r0, 524287(%r15,%r1)
-	sg	%r15, 0
diff --git a/test/MC/SystemZ/insn-sg-02.s b/test/MC/SystemZ/insn-sg-02.s
deleted file mode 100644
index f183e58f7b74..000000000000
--- a/test/MC/SystemZ/insn-sg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: sg	%r0, 524288
-
-	sg	%r0, -524289
-	sg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sgf-01.s b/test/MC/SystemZ/insn-sgf-01.s
deleted file mode 100644
index 844c09998d38..000000000000
--- a/test/MC/SystemZ/insn-sgf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x19]
-#CHECK: sgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x19]
-#CHECK: sgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x19]
-#CHECK: sgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x19]
-#CHECK: sgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x19]
-#CHECK: sgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x19]
-#CHECK: sgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x19]
-#CHECK: sgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x19]
-#CHECK: sgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x19]
-#CHECK: sgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x19]
-
-	sgf	%r0, -524288
-	sgf	%r0, -1
-	sgf	%r0, 0
-	sgf	%r0, 1
-	sgf	%r0, 524287
-	sgf	%r0, 0(%r1)
-	sgf	%r0, 0(%r15)
-	sgf	%r0, 524287(%r1,%r15)
-	sgf	%r0, 524287(%r15,%r1)
-	sgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-sgf-02.s b/test/MC/SystemZ/insn-sgf-02.s
deleted file mode 100644
index 7eba3abe2c3f..000000000000
--- a/test/MC/SystemZ/insn-sgf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: sgf	%r0, 524288
-
-	sgf	%r0, -524289
-	sgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sgfr-01.s b/test/MC/SystemZ/insn-sgfr-01.s
deleted file mode 100644
index 49a1412f8814..000000000000
--- a/test/MC/SystemZ/insn-sgfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sgfr	%r0, %r0                # encoding: [0xb9,0x19,0x00,0x00]
-#CHECK: sgfr	%r0, %r15               # encoding: [0xb9,0x19,0x00,0x0f]
-#CHECK: sgfr	%r15, %r0               # encoding: [0xb9,0x19,0x00,0xf0]
-#CHECK: sgfr	%r7, %r8                # encoding: [0xb9,0x19,0x00,0x78]
-
-	sgfr	%r0,%r0
-	sgfr	%r0,%r15
-	sgfr	%r15,%r0
-	sgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sgr-01.s b/test/MC/SystemZ/insn-sgr-01.s
deleted file mode 100644
index 86c59a145254..000000000000
--- a/test/MC/SystemZ/insn-sgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sgr	%r0, %r0                # encoding: [0xb9,0x09,0x00,0x00]
-#CHECK: sgr	%r0, %r15               # encoding: [0xb9,0x09,0x00,0x0f]
-#CHECK: sgr	%r15, %r0               # encoding: [0xb9,0x09,0x00,0xf0]
-#CHECK: sgr	%r7, %r8                # encoding: [0xb9,0x09,0x00,0x78]
-
-	sgr	%r0,%r0
-	sgr	%r0,%r15
-	sgr	%r15,%r0
-	sgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sl-01.s b/test/MC/SystemZ/insn-sl-01.s
deleted file mode 100644
index c2186da486cf..000000000000
--- a/test/MC/SystemZ/insn-sl-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sl	%r0, 0                  # encoding: [0x5f,0x00,0x00,0x00]
-#CHECK: sl	%r0, 4095               # encoding: [0x5f,0x00,0x0f,0xff]
-#CHECK: sl	%r0, 0(%r1)             # encoding: [0x5f,0x00,0x10,0x00]
-#CHECK: sl	%r0, 0(%r15)            # encoding: [0x5f,0x00,0xf0,0x00]
-#CHECK: sl	%r0, 4095(%r1,%r15)     # encoding: [0x5f,0x01,0xff,0xff]
-#CHECK: sl	%r0, 4095(%r15,%r1)     # encoding: [0x5f,0x0f,0x1f,0xff]
-#CHECK: sl	%r15, 0                 # encoding: [0x5f,0xf0,0x00,0x00]
-
-	sl	%r0, 0
-	sl	%r0, 4095
-	sl	%r0, 0(%r1)
-	sl	%r0, 0(%r15)
-	sl	%r0, 4095(%r1,%r15)
-	sl	%r0, 4095(%r15,%r1)
-	sl	%r15, 0
diff --git a/test/MC/SystemZ/insn-sl-02.s b/test/MC/SystemZ/insn-sl-02.s
deleted file mode 100644
index 8abd99d23514..000000000000
--- a/test/MC/SystemZ/insn-sl-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sl	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: sl	%r0, 4096
-
-	sl	%r0, -1
-	sl	%r0, 4096
diff --git a/test/MC/SystemZ/insn-slb-01.s b/test/MC/SystemZ/insn-slb-01.s
deleted file mode 100644
index 4bc79f6746a9..000000000000
--- a/test/MC/SystemZ/insn-slb-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x99]
-#CHECK: slb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x99]
-#CHECK: slb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x99]
-#CHECK: slb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x99]
-#CHECK: slb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x99]
-#CHECK: slb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x99]
-#CHECK: slb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x99]
-#CHECK: slb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x99]
-#CHECK: slb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x99]
-#CHECK: slb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x99]
-
-	slb	%r0, -524288
-	slb	%r0, -1
-	slb	%r0, 0
-	slb	%r0, 1
-	slb	%r0, 524287
-	slb	%r0, 0(%r1)
-	slb	%r0, 0(%r15)
-	slb	%r0, 524287(%r1,%r15)
-	slb	%r0, 524287(%r15,%r1)
-	slb	%r15, 0
diff --git a/test/MC/SystemZ/insn-slb-02.s b/test/MC/SystemZ/insn-slb-02.s
deleted file mode 100644
index ac87128ffc4d..000000000000
--- a/test/MC/SystemZ/insn-slb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: slb	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: slb	%r0, 524288
-
-	slb	%r0, -524289
-	slb	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slbg-01.s b/test/MC/SystemZ/insn-slbg-01.s
deleted file mode 100644
index 8878aeda5edc..000000000000
--- a/test/MC/SystemZ/insn-slbg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slbg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x89]
-#CHECK: slbg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x89]
-#CHECK: slbg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x89]
-#CHECK: slbg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x89]
-#CHECK: slbg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x89]
-#CHECK: slbg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x89]
-#CHECK: slbg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x89]
-#CHECK: slbg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x89]
-#CHECK: slbg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x89]
-#CHECK: slbg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x89]
-
-	slbg	%r0, -524288
-	slbg	%r0, -1
-	slbg	%r0, 0
-	slbg	%r0, 1
-	slbg	%r0, 524287
-	slbg	%r0, 0(%r1)
-	slbg	%r0, 0(%r15)
-	slbg	%r0, 524287(%r1,%r15)
-	slbg	%r0, 524287(%r15,%r1)
-	slbg	%r15, 0
diff --git a/test/MC/SystemZ/insn-slbg-02.s b/test/MC/SystemZ/insn-slbg-02.s
deleted file mode 100644
index ce09c8a9de51..000000000000
--- a/test/MC/SystemZ/insn-slbg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: slbg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: slbg	%r0, 524288
-
-	slbg	%r0, -524289
-	slbg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slbgr-01.s b/test/MC/SystemZ/insn-slbgr-01.s
deleted file mode 100644
index bcc2b535ba7b..000000000000
--- a/test/MC/SystemZ/insn-slbgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slbgr	%r0, %r0                # encoding: [0xb9,0x89,0x00,0x00]
-#CHECK: slbgr	%r0, %r15               # encoding: [0xb9,0x89,0x00,0x0f]
-#CHECK: slbgr	%r15, %r0               # encoding: [0xb9,0x89,0x00,0xf0]
-#CHECK: slbgr	%r7, %r8                # encoding: [0xb9,0x89,0x00,0x78]
-
-	slbgr	%r0,%r0
-	slbgr	%r0,%r15
-	slbgr	%r15,%r0
-	slbgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-slbr-01.s b/test/MC/SystemZ/insn-slbr-01.s
deleted file mode 100644
index 9c10dbd02106..000000000000
--- a/test/MC/SystemZ/insn-slbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slbr	%r0, %r0                # encoding: [0xb9,0x99,0x00,0x00]
-#CHECK: slbr	%r0, %r15               # encoding: [0xb9,0x99,0x00,0x0f]
-#CHECK: slbr	%r15, %r0               # encoding: [0xb9,0x99,0x00,0xf0]
-#CHECK: slbr	%r7, %r8                # encoding: [0xb9,0x99,0x00,0x78]
-
-	slbr	%r0,%r0
-	slbr	%r0,%r15
-	slbr	%r15,%r0
-	slbr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-slfi-01.s b/test/MC/SystemZ/insn-slfi-01.s
deleted file mode 100644
index 4c8e5b4a84d7..000000000000
--- a/test/MC/SystemZ/insn-slfi-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slfi	%r0, 0                  # encoding: [0xc2,0x05,0x00,0x00,0x00,0x00]
-#CHECK: slfi	%r0, 4294967295         # encoding: [0xc2,0x05,0xff,0xff,0xff,0xff]
-#CHECK: slfi	%r15, 0                 # encoding: [0xc2,0xf5,0x00,0x00,0x00,0x00]
-
-	slfi	%r0, 0
-	slfi	%r0, (1 << 32) - 1
-	slfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-slfi-02.s b/test/MC/SystemZ/insn-slfi-02.s
deleted file mode 100644
index 12e14f6164b0..000000000000
--- a/test/MC/SystemZ/insn-slfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: slfi	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: slfi	%r0, (1 << 32)
-
-	slfi	%r0, -1
-	slfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-slg-01.s b/test/MC/SystemZ/insn-slg-01.s
deleted file mode 100644
index 0b4f99e2e139..000000000000
--- a/test/MC/SystemZ/insn-slg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0b]
-#CHECK: slg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0b]
-#CHECK: slg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0b]
-#CHECK: slg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0b]
-#CHECK: slg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0b]
-#CHECK: slg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0b]
-#CHECK: slg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0b]
-#CHECK: slg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0b]
-#CHECK: slg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0b]
-#CHECK: slg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0b]
-
-	slg	%r0, -524288
-	slg	%r0, -1
-	slg	%r0, 0
-	slg	%r0, 1
-	slg	%r0, 524287
-	slg	%r0, 0(%r1)
-	slg	%r0, 0(%r15)
-	slg	%r0, 524287(%r1,%r15)
-	slg	%r0, 524287(%r15,%r1)
-	slg	%r15, 0
diff --git a/test/MC/SystemZ/insn-slg-02.s b/test/MC/SystemZ/insn-slg-02.s
deleted file mode 100644
index 1ad04c6111d0..000000000000
--- a/test/MC/SystemZ/insn-slg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: slg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: slg	%r0, 524288
-
-	slg	%r0, -524289
-	slg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slgf-01.s b/test/MC/SystemZ/insn-slgf-01.s
deleted file mode 100644
index bca480ecd77a..000000000000
--- a/test/MC/SystemZ/insn-slgf-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1b]
-#CHECK: slgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1b]
-#CHECK: slgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1b]
-#CHECK: slgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1b]
-#CHECK: slgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1b]
-#CHECK: slgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1b]
-#CHECK: slgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1b]
-#CHECK: slgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1b]
-#CHECK: slgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1b]
-#CHECK: slgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1b]
-
-	slgf	%r0, -524288
-	slgf	%r0, -1
-	slgf	%r0, 0
-	slgf	%r0, 1
-	slgf	%r0, 524287
-	slgf	%r0, 0(%r1)
-	slgf	%r0, 0(%r15)
-	slgf	%r0, 524287(%r1,%r15)
-	slgf	%r0, 524287(%r15,%r1)
-	slgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-slgf-02.s b/test/MC/SystemZ/insn-slgf-02.s
deleted file mode 100644
index 71a9aa7812e5..000000000000
--- a/test/MC/SystemZ/insn-slgf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: slgf	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: slgf	%r0, 524288
-
-	slgf	%r0, -524289
-	slgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slgfi-01.s b/test/MC/SystemZ/insn-slgfi-01.s
deleted file mode 100644
index c9fef187f5ad..000000000000
--- a/test/MC/SystemZ/insn-slgfi-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slgfi	%r0, 0                  # encoding: [0xc2,0x04,0x00,0x00,0x00,0x00]
-#CHECK: slgfi	%r0, 4294967295         # encoding: [0xc2,0x04,0xff,0xff,0xff,0xff]
-#CHECK: slgfi	%r15, 0                 # encoding: [0xc2,0xf4,0x00,0x00,0x00,0x00]
-
-	slgfi	%r0, 0
-	slgfi	%r0, (1 << 32) - 1
-	slgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-slgfi-02.s b/test/MC/SystemZ/insn-slgfi-02.s
deleted file mode 100644
index 696408d377e3..000000000000
--- a/test/MC/SystemZ/insn-slgfi-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: slgfi	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: slgfi	%r0, (1 << 32)
-
-	slgfi	%r0, -1
-	slgfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-slgfr-01.s b/test/MC/SystemZ/insn-slgfr-01.s
deleted file mode 100644
index 94c10edcd3b6..000000000000
--- a/test/MC/SystemZ/insn-slgfr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slgfr	%r0, %r0                # encoding: [0xb9,0x1b,0x00,0x00]
-#CHECK: slgfr	%r0, %r15               # encoding: [0xb9,0x1b,0x00,0x0f]
-#CHECK: slgfr	%r15, %r0               # encoding: [0xb9,0x1b,0x00,0xf0]
-#CHECK: slgfr	%r7, %r8                # encoding: [0xb9,0x1b,0x00,0x78]
-
-	slgfr	%r0,%r0
-	slgfr	%r0,%r15
-	slgfr	%r15,%r0
-	slgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-slgr-01.s b/test/MC/SystemZ/insn-slgr-01.s
deleted file mode 100644
index 4d226be83e6e..000000000000
--- a/test/MC/SystemZ/insn-slgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slgr	%r0, %r0                # encoding: [0xb9,0x0b,0x00,0x00]
-#CHECK: slgr	%r0, %r15               # encoding: [0xb9,0x0b,0x00,0x0f]
-#CHECK: slgr	%r15, %r0               # encoding: [0xb9,0x0b,0x00,0xf0]
-#CHECK: slgr	%r7, %r8                # encoding: [0xb9,0x0b,0x00,0x78]
-
-	slgr	%r0,%r0
-	slgr	%r0,%r15
-	slgr	%r15,%r0
-	slgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sll-01.s b/test/MC/SystemZ/insn-sll-01.s
deleted file mode 100644
index 5bc112856d70..000000000000
--- a/test/MC/SystemZ/insn-sll-01.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sll	%r0, 0                  # encoding: [0x89,0x00,0x00,0x00]
-#CHECK: sll	%r7, 0                  # encoding: [0x89,0x70,0x00,0x00]
-#CHECK: sll	%r15, 0                 # encoding: [0x89,0xf0,0x00,0x00]
-#CHECK: sll	%r0, 4095               # encoding: [0x89,0x00,0x0f,0xff]
-#CHECK: sll	%r0, 0(%r1)             # encoding: [0x89,0x00,0x10,0x00]
-#CHECK: sll	%r0, 0(%r15)            # encoding: [0x89,0x00,0xf0,0x00]
-#CHECK: sll	%r0, 4095(%r1)          # encoding: [0x89,0x00,0x1f,0xff]
-#CHECK: sll	%r0, 4095(%r15)         # encoding: [0x89,0x00,0xff,0xff]
-
-	sll	%r0,0
-	sll	%r7,0
-	sll	%r15,0
-	sll	%r0,4095
-	sll	%r0,0(%r1)
-	sll	%r0,0(%r15)
-	sll	%r0,4095(%r1)
-	sll	%r0,4095(%r15)
diff --git a/test/MC/SystemZ/insn-sll-02.s b/test/MC/SystemZ/insn-sll-02.s
deleted file mode 100644
index 1b951be02c27..000000000000
--- a/test/MC/SystemZ/insn-sll-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sll	%r0,-1
-#CHECK: error: invalid operand
-#CHECK: sll	%r0,4096
-#CHECK: error: %r0 used in an address
-#CHECK: sll	%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: sll	%r0,0(%r1,%r2)
-
-	sll	%r0,-1
-	sll	%r0,4096
-	sll	%r0,0(%r0)
-	sll	%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-sllg-01.s b/test/MC/SystemZ/insn-sllg-01.s
deleted file mode 100644
index 1b0f0094676f..000000000000
--- a/test/MC/SystemZ/insn-sllg-01.s
+++ /dev/null
@@ -1,27 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sllg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0d]
-#CHECK: sllg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0d]
-#CHECK: sllg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0d]
-#CHECK: sllg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0d]
-#CHECK: sllg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0d]
-#CHECK: sllg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0d]
-#CHECK: sllg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0d]
-#CHECK: sllg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0d]
-#CHECK: sllg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0d]
-#CHECK: sllg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0d]
-#CHECK: sllg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0d]
-#CHECK: sllg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0d]
-
-	sllg	%r0,%r0,0
-	sllg	%r15,%r1,0
-	sllg	%r1,%r15,0
-	sllg	%r15,%r15,0
-	sllg	%r0,%r0,-524288
-	sllg	%r0,%r0,-1
-	sllg	%r0,%r0,1
-	sllg	%r0,%r0,524287
-	sllg	%r0,%r0,0(%r1)
-	sllg	%r0,%r0,0(%r15)
-	sllg	%r0,%r0,524287(%r1)
-	sllg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-sllg-02.s b/test/MC/SystemZ/insn-sllg-02.s
deleted file mode 100644
index 68c3d1da29f4..000000000000
--- a/test/MC/SystemZ/insn-sllg-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sllg	%r0,%r0,-524289
-#CHECK: error: invalid operand
-#CHECK: sllg	%r0,%r0,524288
-#CHECK: error: %r0 used in an address
-#CHECK: sllg	%r0,%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: sllg	%r0,%r0,0(%r1,%r2)
-
-	sllg	%r0,%r0,-524289
-	sllg	%r0,%r0,524288
-	sllg	%r0,%r0,0(%r0)
-	sllg	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-slr-01.s b/test/MC/SystemZ/insn-slr-01.s
deleted file mode 100644
index c142407cff64..000000000000
--- a/test/MC/SystemZ/insn-slr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: slr	%r0, %r0                # encoding: [0x1f,0x00]
-#CHECK: slr	%r0, %r15               # encoding: [0x1f,0x0f]
-#CHECK: slr	%r15, %r0               # encoding: [0x1f,0xf0]
-#CHECK: slr	%r7, %r8                # encoding: [0x1f,0x78]
-
-	slr	%r0,%r0
-	slr	%r0,%r15
-	slr	%r15,%r0
-	slr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sly-01.s b/test/MC/SystemZ/insn-sly-01.s
deleted file mode 100644
index 59d2907a4d8c..000000000000
--- a/test/MC/SystemZ/insn-sly-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5f]
-#CHECK: sly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5f]
-#CHECK: sly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5f]
-#CHECK: sly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5f]
-#CHECK: sly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5f]
-#CHECK: sly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5f]
-#CHECK: sly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5f]
-#CHECK: sly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5f]
-#CHECK: sly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5f]
-#CHECK: sly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5f]
-
-	sly	%r0, -524288
-	sly	%r0, -1
-	sly	%r0, 0
-	sly	%r0, 1
-	sly	%r0, 524287
-	sly	%r0, 0(%r1)
-	sly	%r0, 0(%r15)
-	sly	%r0, 524287(%r1,%r15)
-	sly	%r0, 524287(%r15,%r1)
-	sly	%r15, 0
diff --git a/test/MC/SystemZ/insn-sly-02.s b/test/MC/SystemZ/insn-sly-02.s
deleted file mode 100644
index 9abd53e1ab1e..000000000000
--- a/test/MC/SystemZ/insn-sly-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sly	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: sly	%r0, 524288
-
-	sly	%r0, -524289
-	sly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sqdb-01.s b/test/MC/SystemZ/insn-sqdb-01.s
deleted file mode 100644
index b79aae144429..000000000000
--- a/test/MC/SystemZ/insn-sqdb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sqdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x15]
-#CHECK: sqdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x15]
-#CHECK: sqdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x15]
-#CHECK: sqdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x15]
-#CHECK: sqdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x15]
-#CHECK: sqdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x15]
-#CHECK: sqdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x15]
-
-	sqdb	%f0, 0
-	sqdb	%f0, 4095
-	sqdb	%f0, 0(%r1)
-	sqdb	%f0, 0(%r15)
-	sqdb	%f0, 4095(%r1,%r15)
-	sqdb	%f0, 4095(%r15,%r1)
-	sqdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-sqdb-02.s b/test/MC/SystemZ/insn-sqdb-02.s
deleted file mode 100644
index 68df26777045..000000000000
--- a/test/MC/SystemZ/insn-sqdb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sqdb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: sqdb	%f0, 4096
-
-	sqdb	%f0, -1
-	sqdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sqdbr-01.s b/test/MC/SystemZ/insn-sqdbr-01.s
deleted file mode 100644
index d66415fa0523..000000000000
--- a/test/MC/SystemZ/insn-sqdbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sqdbr	%f0, %f0                # encoding: [0xb3,0x15,0x00,0x00]
-#CHECK: sqdbr	%f0, %f15               # encoding: [0xb3,0x15,0x00,0x0f]
-#CHECK: sqdbr	%f7, %f8                # encoding: [0xb3,0x15,0x00,0x78]
-#CHECK: sqdbr	%f15, %f0               # encoding: [0xb3,0x15,0x00,0xf0]
-
-	sqdbr	%f0, %f0
-	sqdbr	%f0, %f15
-	sqdbr	%f7, %f8
-	sqdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-sqeb-01.s b/test/MC/SystemZ/insn-sqeb-01.s
deleted file mode 100644
index 60f6e9063572..000000000000
--- a/test/MC/SystemZ/insn-sqeb-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sqeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x14]
-#CHECK: sqeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x14]
-#CHECK: sqeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x14]
-#CHECK: sqeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x14]
-#CHECK: sqeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x14]
-#CHECK: sqeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x14]
-#CHECK: sqeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x14]
-
-	sqeb	%f0, 0
-	sqeb	%f0, 4095
-	sqeb	%f0, 0(%r1)
-	sqeb	%f0, 0(%r15)
-	sqeb	%f0, 4095(%r1,%r15)
-	sqeb	%f0, 4095(%r15,%r1)
-	sqeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-sqeb-02.s b/test/MC/SystemZ/insn-sqeb-02.s
deleted file mode 100644
index efb09fcfbc75..000000000000
--- a/test/MC/SystemZ/insn-sqeb-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sqeb	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: sqeb	%f0, 4096
-
-	sqeb	%f0, -1
-	sqeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sqebr-01.s b/test/MC/SystemZ/insn-sqebr-01.s
deleted file mode 100644
index 2d13dbeb2654..000000000000
--- a/test/MC/SystemZ/insn-sqebr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sqebr	%f0, %f0                # encoding: [0xb3,0x14,0x00,0x00]
-#CHECK: sqebr	%f0, %f15               # encoding: [0xb3,0x14,0x00,0x0f]
-#CHECK: sqebr	%f7, %f8                # encoding: [0xb3,0x14,0x00,0x78]
-#CHECK: sqebr	%f15, %f0               # encoding: [0xb3,0x14,0x00,0xf0]
-
-	sqebr	%f0, %f0
-	sqebr	%f0, %f15
-	sqebr	%f7, %f8
-	sqebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-sqxbr-01.s b/test/MC/SystemZ/insn-sqxbr-01.s
deleted file mode 100644
index 78ba908d5af0..000000000000
--- a/test/MC/SystemZ/insn-sqxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sqxbr	%f0, %f0                # encoding: [0xb3,0x16,0x00,0x00]
-#CHECK: sqxbr	%f0, %f13               # encoding: [0xb3,0x16,0x00,0x0d]
-#CHECK: sqxbr	%f8, %f8                # encoding: [0xb3,0x16,0x00,0x88]
-#CHECK: sqxbr	%f13, %f0               # encoding: [0xb3,0x16,0x00,0xd0]
-
-	sqxbr	%f0, %f0
-	sqxbr	%f0, %f13
-	sqxbr	%f8, %f8
-	sqxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-sqxbr-02.s b/test/MC/SystemZ/insn-sqxbr-02.s
deleted file mode 100644
index e51e552fb70d..000000000000
--- a/test/MC/SystemZ/insn-sqxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: sqxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: sqxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: sqxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: sqxbr	%f14, %f0
-
-	sqxbr	%f0, %f2
-	sqxbr	%f0, %f14
-	sqxbr	%f2, %f0
-	sqxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-sr-01.s b/test/MC/SystemZ/insn-sr-01.s
deleted file mode 100644
index 856bef5eac99..000000000000
--- a/test/MC/SystemZ/insn-sr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sr	%r0, %r0                # encoding: [0x1b,0x00]
-#CHECK: sr	%r0, %r15               # encoding: [0x1b,0x0f]
-#CHECK: sr	%r15, %r0               # encoding: [0x1b,0xf0]
-#CHECK: sr	%r7, %r8                # encoding: [0x1b,0x78]
-
-	sr	%r0,%r0
-	sr	%r0,%r15
-	sr	%r15,%r0
-	sr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sra-01.s b/test/MC/SystemZ/insn-sra-01.s
deleted file mode 100644
index fcdaf5de1625..000000000000
--- a/test/MC/SystemZ/insn-sra-01.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sra	%r0, 0                  # encoding: [0x8a,0x00,0x00,0x00]
-#CHECK: sra	%r7, 0                  # encoding: [0x8a,0x70,0x00,0x00]
-#CHECK: sra	%r15, 0                 # encoding: [0x8a,0xf0,0x00,0x00]
-#CHECK: sra	%r0, 4095               # encoding: [0x8a,0x00,0x0f,0xff]
-#CHECK: sra	%r0, 0(%r1)             # encoding: [0x8a,0x00,0x10,0x00]
-#CHECK: sra	%r0, 0(%r15)            # encoding: [0x8a,0x00,0xf0,0x00]
-#CHECK: sra	%r0, 4095(%r1)          # encoding: [0x8a,0x00,0x1f,0xff]
-#CHECK: sra	%r0, 4095(%r15)         # encoding: [0x8a,0x00,0xff,0xff]
-
-	sra	%r0,0
-	sra	%r7,0
-	sra	%r15,0
-	sra	%r0,4095
-	sra	%r0,0(%r1)
-	sra	%r0,0(%r15)
-	sra	%r0,4095(%r1)
-	sra	%r0,4095(%r15)
diff --git a/test/MC/SystemZ/insn-sra-02.s b/test/MC/SystemZ/insn-sra-02.s
deleted file mode 100644
index 7a84f1774331..000000000000
--- a/test/MC/SystemZ/insn-sra-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sra	%r0,-1
-#CHECK: error: invalid operand
-#CHECK: sra	%r0,4096
-#CHECK: error: %r0 used in an address
-#CHECK: sra	%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: sra	%r0,0(%r1,%r2)
-
-	sra	%r0,-1
-	sra	%r0,4096
-	sra	%r0,0(%r0)
-	sra	%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-srag-01.s b/test/MC/SystemZ/insn-srag-01.s
deleted file mode 100644
index 9271db237d99..000000000000
--- a/test/MC/SystemZ/insn-srag-01.s
+++ /dev/null
@@ -1,27 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: srag	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0a]
-#CHECK: srag	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0a]
-#CHECK: srag	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0a]
-#CHECK: srag	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0a]
-#CHECK: srag	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0a]
-#CHECK: srag	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0a]
-#CHECK: srag	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0a]
-#CHECK: srag	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0a]
-#CHECK: srag	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0a]
-#CHECK: srag	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0a]
-#CHECK: srag	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0a]
-#CHECK: srag	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0a]
-
-	srag	%r0,%r0,0
-	srag	%r15,%r1,0
-	srag	%r1,%r15,0
-	srag	%r15,%r15,0
-	srag	%r0,%r0,-524288
-	srag	%r0,%r0,-1
-	srag	%r0,%r0,1
-	srag	%r0,%r0,524287
-	srag	%r0,%r0,0(%r1)
-	srag	%r0,%r0,0(%r15)
-	srag	%r0,%r0,524287(%r1)
-	srag	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-srag-02.s b/test/MC/SystemZ/insn-srag-02.s
deleted file mode 100644
index 7413cbaf9fc3..000000000000
--- a/test/MC/SystemZ/insn-srag-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: srag	%r0,%r0,-524289
-#CHECK: error: invalid operand
-#CHECK: srag	%r0,%r0,524288
-#CHECK: error: %r0 used in an address
-#CHECK: srag	%r0,%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: srag	%r0,%r0,0(%r1,%r2)
-
-	srag	%r0,%r0,-524289
-	srag	%r0,%r0,524288
-	srag	%r0,%r0,0(%r0)
-	srag	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-srl-01.s b/test/MC/SystemZ/insn-srl-01.s
deleted file mode 100644
index a3a5df8a2bce..000000000000
--- a/test/MC/SystemZ/insn-srl-01.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: srl	%r0, 0                  # encoding: [0x88,0x00,0x00,0x00]
-#CHECK: srl	%r7, 0                  # encoding: [0x88,0x70,0x00,0x00]
-#CHECK: srl	%r15, 0                 # encoding: [0x88,0xf0,0x00,0x00]
-#CHECK: srl	%r0, 4095               # encoding: [0x88,0x00,0x0f,0xff]
-#CHECK: srl	%r0, 0(%r1)             # encoding: [0x88,0x00,0x10,0x00]
-#CHECK: srl	%r0, 0(%r15)            # encoding: [0x88,0x00,0xf0,0x00]
-#CHECK: srl	%r0, 4095(%r1)          # encoding: [0x88,0x00,0x1f,0xff]
-#CHECK: srl	%r0, 4095(%r15)         # encoding: [0x88,0x00,0xff,0xff]
-
-	srl	%r0,0
-	srl	%r7,0
-	srl	%r15,0
-	srl	%r0,4095
-	srl	%r0,0(%r1)
-	srl	%r0,0(%r15)
-	srl	%r0,4095(%r1)
-	srl	%r0,4095(%r15)
diff --git a/test/MC/SystemZ/insn-srl-02.s b/test/MC/SystemZ/insn-srl-02.s
deleted file mode 100644
index 212d16bd8d56..000000000000
--- a/test/MC/SystemZ/insn-srl-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: srl	%r0,-1
-#CHECK: error: invalid operand
-#CHECK: srl	%r0,4096
-#CHECK: error: %r0 used in an address
-#CHECK: srl	%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: srl	%r0,0(%r1,%r2)
-
-	srl	%r0,-1
-	srl	%r0,4096
-	srl	%r0,0(%r0)
-	srl	%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-srlg-01.s b/test/MC/SystemZ/insn-srlg-01.s
deleted file mode 100644
index 0087fefd9285..000000000000
--- a/test/MC/SystemZ/insn-srlg-01.s
+++ /dev/null
@@ -1,27 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: srlg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0c]
-#CHECK: srlg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0c]
-#CHECK: srlg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0c]
-#CHECK: srlg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0c]
-#CHECK: srlg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0c]
-#CHECK: srlg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0c]
-#CHECK: srlg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0c]
-#CHECK: srlg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0c]
-#CHECK: srlg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0c]
-#CHECK: srlg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0c]
-#CHECK: srlg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0c]
-#CHECK: srlg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0c]
-
-	srlg	%r0,%r0,0
-	srlg	%r15,%r1,0
-	srlg	%r1,%r15,0
-	srlg	%r15,%r15,0
-	srlg	%r0,%r0,-524288
-	srlg	%r0,%r0,-1
-	srlg	%r0,%r0,1
-	srlg	%r0,%r0,524287
-	srlg	%r0,%r0,0(%r1)
-	srlg	%r0,%r0,0(%r15)
-	srlg	%r0,%r0,524287(%r1)
-	srlg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-srlg-02.s b/test/MC/SystemZ/insn-srlg-02.s
deleted file mode 100644
index 1e24d0e407f3..000000000000
--- a/test/MC/SystemZ/insn-srlg-02.s
+++ /dev/null
@@ -1,16 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: srlg	%r0,%r0,-524289
-#CHECK: error: invalid operand
-#CHECK: srlg	%r0,%r0,524288
-#CHECK: error: %r0 used in an address
-#CHECK: srlg	%r0,%r0,0(%r0)
-#CHECK: error: invalid use of indexed addressing
-#CHECK: srlg	%r0,%r0,0(%r1,%r2)
-
-	srlg	%r0,%r0,-524289
-	srlg	%r0,%r0,524288
-	srlg	%r0,%r0,0(%r0)
-	srlg	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-st-01.s b/test/MC/SystemZ/insn-st-01.s
deleted file mode 100644
index 0b5fdb682c68..000000000000
--- a/test/MC/SystemZ/insn-st-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: st	%r0, 0                  # encoding: [0x50,0x00,0x00,0x00]
-#CHECK: st	%r0, 4095               # encoding: [0x50,0x00,0x0f,0xff]
-#CHECK: st	%r0, 0(%r1)             # encoding: [0x50,0x00,0x10,0x00]
-#CHECK: st	%r0, 0(%r15)            # encoding: [0x50,0x00,0xf0,0x00]
-#CHECK: st	%r0, 4095(%r1,%r15)     # encoding: [0x50,0x01,0xff,0xff]
-#CHECK: st	%r0, 4095(%r15,%r1)     # encoding: [0x50,0x0f,0x1f,0xff]
-#CHECK: st	%r15, 0                 # encoding: [0x50,0xf0,0x00,0x00]
-
-	st	%r0, 0
-	st	%r0, 4095
-	st	%r0, 0(%r1)
-	st	%r0, 0(%r15)
-	st	%r0, 4095(%r1,%r15)
-	st	%r0, 4095(%r15,%r1)
-	st	%r15, 0
diff --git a/test/MC/SystemZ/insn-st-02.s b/test/MC/SystemZ/insn-st-02.s
deleted file mode 100644
index 63e547a92bc4..000000000000
--- a/test/MC/SystemZ/insn-st-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: st	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: st	%r0, 4096
-
-	st	%r0, -1
-	st	%r0, 4096
diff --git a/test/MC/SystemZ/insn-stc-01.s b/test/MC/SystemZ/insn-stc-01.s
deleted file mode 100644
index 563f8914b643..000000000000
--- a/test/MC/SystemZ/insn-stc-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: stc	%r0, 0                  # encoding: [0x42,0x00,0x00,0x00]
-#CHECK: stc	%r0, 4095               # encoding: [0x42,0x00,0x0f,0xff]
-#CHECK: stc	%r0, 0(%r1)             # encoding: [0x42,0x00,0x10,0x00]
-#CHECK: stc	%r0, 0(%r15)            # encoding: [0x42,0x00,0xf0,0x00]
-#CHECK: stc	%r0, 4095(%r1,%r15)     # encoding: [0x42,0x01,0xff,0xff]
-#CHECK: stc	%r0, 4095(%r15,%r1)     # encoding: [0x42,0x0f,0x1f,0xff]
-#CHECK: stc	%r15, 0                 # encoding: [0x42,0xf0,0x00,0x00]
-
-	stc	%r0, 0
-	stc	%r0, 4095
-	stc	%r0, 0(%r1)
-	stc	%r0, 0(%r15)
-	stc	%r0, 4095(%r1,%r15)
-	stc	%r0, 4095(%r15,%r1)
-	stc	%r15, 0
diff --git a/test/MC/SystemZ/insn-stc-02.s b/test/MC/SystemZ/insn-stc-02.s
deleted file mode 100644
index aa7dcb29df6f..000000000000
--- a/test/MC/SystemZ/insn-stc-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: stc	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: stc	%r0, 4096
-
-	stc	%r0, -1
-	stc	%r0, 4096
diff --git a/test/MC/SystemZ/insn-stcy-01.s b/test/MC/SystemZ/insn-stcy-01.s
deleted file mode 100644
index acc7ac5996e2..000000000000
--- a/test/MC/SystemZ/insn-stcy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: stcy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x72]
-#CHECK: stcy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x72]
-#CHECK: stcy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x72]
-#CHECK: stcy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x72]
-#CHECK: stcy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x72]
-#CHECK: stcy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x72]
-#CHECK: stcy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x72]
-#CHECK: stcy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x72]
-#CHECK: stcy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x72]
-#CHECK: stcy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x72]
-
-	stcy	%r0, -524288
-	stcy	%r0, -1
-	stcy	%r0, 0
-	stcy	%r0, 1
-	stcy	%r0, 524287
-	stcy	%r0, 0(%r1)
-	stcy	%r0, 0(%r15)
-	stcy	%r0, 524287(%r1,%r15)
-	stcy	%r0, 524287(%r15,%r1)
-	stcy	%r15, 0
diff --git a/test/MC/SystemZ/insn-stcy-02.s b/test/MC/SystemZ/insn-stcy-02.s
deleted file mode 100644
index cbd7f7abdec9..000000000000
--- a/test/MC/SystemZ/insn-stcy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: stcy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: stcy	%r0, 524288
-
-	stcy	%r0, -524289
-	stcy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-std-01.s b/test/MC/SystemZ/insn-std-01.s
deleted file mode 100644
index 6867df8bb167..000000000000
--- a/test/MC/SystemZ/insn-std-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: std	%f0, 0                  # encoding: [0x60,0x00,0x00,0x00]
-#CHECK: std	%f0, 4095               # encoding: [0x60,0x00,0x0f,0xff]
-#CHECK: std	%f0, 0(%r1)             # encoding: [0x60,0x00,0x10,0x00]
-#CHECK: std	%f0, 0(%r15)            # encoding: [0x60,0x00,0xf0,0x00]
-#CHECK: std	%f0, 4095(%r1,%r15)     # encoding: [0x60,0x01,0xff,0xff]
-#CHECK: std	%f0, 4095(%r15,%r1)     # encoding: [0x60,0x0f,0x1f,0xff]
-#CHECK: std	%f15, 0                 # encoding: [0x60,0xf0,0x00,0x00]
-
-	std	%f0, 0
-	std	%f0, 4095
-	std	%f0, 0(%r1)
-	std	%f0, 0(%r15)
-	std	%f0, 4095(%r1,%r15)
-	std	%f0, 4095(%r15,%r1)
-	std	%f15, 0
diff --git a/test/MC/SystemZ/insn-std-02.s b/test/MC/SystemZ/insn-std-02.s
deleted file mode 100644
index 62bb9eb7fac4..000000000000
--- a/test/MC/SystemZ/insn-std-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: std	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: std	%f0, 4096
-
-	std	%f0, -1
-	std	%f0, 4096
diff --git a/test/MC/SystemZ/insn-stdy-01.s b/test/MC/SystemZ/insn-stdy-01.s
deleted file mode 100644
index 1ae9a7d3d01b..000000000000
--- a/test/MC/SystemZ/insn-stdy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: stdy	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x67]
-#CHECK: stdy	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x67]
-#CHECK: stdy	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x67]
-#CHECK: stdy	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x67]
-#CHECK: stdy	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x67]
-#CHECK: stdy	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x67]
-#CHECK: stdy	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x67]
-#CHECK: stdy	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x67]
-#CHECK: stdy	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x67]
-#CHECK: stdy	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x67]
-
-	stdy	%f0, -524288
-	stdy	%f0, -1
-	stdy	%f0, 0
-	stdy	%f0, 1
-	stdy	%f0, 524287
-	stdy	%f0, 0(%r1)
-	stdy	%f0, 0(%r15)
-	stdy	%f0, 524287(%r1,%r15)
-	stdy	%f0, 524287(%r15,%r1)
-	stdy	%f15, 0
diff --git a/test/MC/SystemZ/insn-stdy-02.s b/test/MC/SystemZ/insn-stdy-02.s
deleted file mode 100644
index f9a09a59f1c8..000000000000
--- a/test/MC/SystemZ/insn-stdy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: stdy	%f0, -524289
-#CHECK: error: invalid operand
-#CHECK: stdy	%f0, 524288
-
-	stdy	%f0, -524289
-	stdy	%f0, 524288
diff --git a/test/MC/SystemZ/insn-ste-01.s b/test/MC/SystemZ/insn-ste-01.s
deleted file mode 100644
index 8e245df98579..000000000000
--- a/test/MC/SystemZ/insn-ste-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ste	%f0, 0                  # encoding: [0x70,0x00,0x00,0x00]
-#CHECK: ste	%f0, 4095               # encoding: [0x70,0x00,0x0f,0xff]
-#CHECK: ste	%f0, 0(%r1)             # encoding: [0x70,0x00,0x10,0x00]
-#CHECK: ste	%f0, 0(%r15)            # encoding: [0x70,0x00,0xf0,0x00]
-#CHECK: ste	%f0, 4095(%r1,%r15)     # encoding: [0x70,0x01,0xff,0xff]
-#CHECK: ste	%f0, 4095(%r15,%r1)     # encoding: [0x70,0x0f,0x1f,0xff]
-#CHECK: ste	%f15, 0                 # encoding: [0x70,0xf0,0x00,0x00]
-
-	ste	%f0, 0
-	ste	%f0, 4095
-	ste	%f0, 0(%r1)
-	ste	%f0, 0(%r15)
-	ste	%f0, 4095(%r1,%r15)
-	ste	%f0, 4095(%r15,%r1)
-	ste	%f15, 0
diff --git a/test/MC/SystemZ/insn-ste-02.s b/test/MC/SystemZ/insn-ste-02.s
deleted file mode 100644
index acc50eada847..000000000000
--- a/test/MC/SystemZ/insn-ste-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: ste	%f0, -1
-#CHECK: error: invalid operand
-#CHECK: ste	%f0, 4096
-
-	ste	%f0, -1
-	ste	%f0, 4096
diff --git a/test/MC/SystemZ/insn-stey-01.s b/test/MC/SystemZ/insn-stey-01.s
deleted file mode 100644
index 1f8259365086..000000000000
--- a/test/MC/SystemZ/insn-stey-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: stey	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x66]
-#CHECK: stey	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x66]
-#CHECK: stey	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x66]
-#CHECK: stey	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x66]
-#CHECK: stey	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x66]
-#CHECK: stey	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x66]
-#CHECK: stey	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x66]
-#CHECK: stey	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x66]
-#CHECK: stey	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x66]
-#CHECK: stey	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x66]
-
-	stey	%f0, -524288
-	stey	%f0, -1
-	stey	%f0, 0
-	stey	%f0, 1
-	stey	%f0, 524287
-	stey	%f0, 0(%r1)
-	stey	%f0, 0(%r15)
-	stey	%f0, 524287(%r1,%r15)
-	stey	%f0, 524287(%r15,%r1)
-	stey	%f15, 0
diff --git a/test/MC/SystemZ/insn-stey-02.s b/test/MC/SystemZ/insn-stey-02.s
deleted file mode 100644
index 203b016e314a..000000000000
--- a/test/MC/SystemZ/insn-stey-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: stey	%f0, -524289
-#CHECK: error: invalid operand
-#CHECK: stey	%f0, 524288
-
-	stey	%f0, -524289
-	stey	%f0, 524288
diff --git a/test/MC/SystemZ/insn-stg-01.s b/test/MC/SystemZ/insn-stg-01.s
deleted file mode 100644
index e8508d9f4e53..000000000000
--- a/test/MC/SystemZ/insn-stg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: stg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x24]
-#CHECK: stg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x24]
-#CHECK: stg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x24]
-#CHECK: stg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x24]
-#CHECK: stg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x24]
-#CHECK: stg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x24]
-#CHECK: stg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x24]
-#CHECK: stg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x24]
-#CHECK: stg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x24]
-#CHECK: stg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x24]
-
-	stg	%r0, -524288
-	stg	%r0, -1
-	stg	%r0, 0
-	stg	%r0, 1
-	stg	%r0, 524287
-	stg	%r0, 0(%r1)
-	stg	%r0, 0(%r15)
-	stg	%r0, 524287(%r1,%r15)
-	stg	%r0, 524287(%r15,%r1)
-	stg	%r15, 0
diff --git a/test/MC/SystemZ/insn-stg-02.s b/test/MC/SystemZ/insn-stg-02.s
deleted file mode 100644
index 1214ad1998c5..000000000000
--- a/test/MC/SystemZ/insn-stg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: stg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: stg	%r0, 524288
-
-	stg	%r0, -524289
-	stg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-stgrl-01.s b/test/MC/SystemZ/insn-stgrl-01.s
deleted file mode 100644
index 729b01dc115b..000000000000
--- a/test/MC/SystemZ/insn-stgrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: stgrl	%r0, 2864434397         # encoding: [0xc4,0x0b,0x55,0x5d,0xe6,0x6e]
-#CHECK: stgrl	%r15, 2864434397        # encoding: [0xc4,0xfb,0x55,0x5d,0xe6,0x6e]
-
-	stgrl	%r0,0xaabbccdd
-	stgrl	%r15,0xaabbccdd
-
-#CHECK: stgrl	%r0, foo                # encoding: [0xc4,0x0b,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: stgrl	%r15, foo               # encoding: [0xc4,0xfb,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	stgrl	%r0,foo
-	stgrl	%r15,foo
-
-#CHECK: stgrl	%r3, bar+100            # encoding: [0xc4,0x3b,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: stgrl	%r4, bar+100            # encoding: [0xc4,0x4b,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	stgrl	%r3,bar+100
-	stgrl	%r4,bar+100
-
-#CHECK: stgrl	%r7, frob@PLT           # encoding: [0xc4,0x7b,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: stgrl	%r8, frob@PLT           # encoding: [0xc4,0x8b,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	stgrl	%r7,frob@PLT
-	stgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-sth-01.s b/test/MC/SystemZ/insn-sth-01.s
deleted file mode 100644
index 0dabe345b59e..000000000000
--- a/test/MC/SystemZ/insn-sth-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sth	%r0, 0                  # encoding: [0x40,0x00,0x00,0x00]
-#CHECK: sth	%r0, 4095               # encoding: [0x40,0x00,0x0f,0xff]
-#CHECK: sth	%r0, 0(%r1)             # encoding: [0x40,0x00,0x10,0x00]
-#CHECK: sth	%r0, 0(%r15)            # encoding: [0x40,0x00,0xf0,0x00]
-#CHECK: sth	%r0, 4095(%r1,%r15)     # encoding: [0x40,0x01,0xff,0xff]
-#CHECK: sth	%r0, 4095(%r15,%r1)     # encoding: [0x40,0x0f,0x1f,0xff]
-#CHECK: sth	%r15, 0                 # encoding: [0x40,0xf0,0x00,0x00]
-
-	sth	%r0, 0
-	sth	%r0, 4095
-	sth	%r0, 0(%r1)
-	sth	%r0, 0(%r15)
-	sth	%r0, 4095(%r1,%r15)
-	sth	%r0, 4095(%r15,%r1)
-	sth	%r15, 0
diff --git a/test/MC/SystemZ/insn-sth-02.s b/test/MC/SystemZ/insn-sth-02.s
deleted file mode 100644
index e73c28917b0a..000000000000
--- a/test/MC/SystemZ/insn-sth-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sth	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: sth	%r0, 4096
-
-	sth	%r0, -1
-	sth	%r0, 4096
diff --git a/test/MC/SystemZ/insn-sthrl-01.s b/test/MC/SystemZ/insn-sthrl-01.s
deleted file mode 100644
index 0bcdbd4bc8f9..000000000000
--- a/test/MC/SystemZ/insn-sthrl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sthrl	%r0, 2864434397         # encoding: [0xc4,0x07,0x55,0x5d,0xe6,0x6e]
-#CHECK: sthrl	%r15, 2864434397        # encoding: [0xc4,0xf7,0x55,0x5d,0xe6,0x6e]
-
-	sthrl	%r0,0xaabbccdd
-	sthrl	%r15,0xaabbccdd
-
-#CHECK: sthrl	%r0, foo                # encoding: [0xc4,0x07,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: sthrl	%r15, foo               # encoding: [0xc4,0xf7,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	sthrl	%r0,foo
-	sthrl	%r15,foo
-
-#CHECK: sthrl	%r3, bar+100            # encoding: [0xc4,0x37,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: sthrl	%r4, bar+100            # encoding: [0xc4,0x47,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	sthrl	%r3,bar+100
-	sthrl	%r4,bar+100
-
-#CHECK: sthrl	%r7, frob@PLT           # encoding: [0xc4,0x77,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: sthrl	%r8, frob@PLT           # encoding: [0xc4,0x87,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	sthrl	%r7,frob@PLT
-	sthrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-sthy-01.s b/test/MC/SystemZ/insn-sthy-01.s
deleted file mode 100644
index 259c5e1c82f5..000000000000
--- a/test/MC/SystemZ/insn-sthy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sthy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x70]
-#CHECK: sthy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x70]
-#CHECK: sthy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x70]
-#CHECK: sthy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x70]
-#CHECK: sthy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x70]
-#CHECK: sthy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x70]
-#CHECK: sthy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x70]
-#CHECK: sthy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x70]
-#CHECK: sthy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x70]
-#CHECK: sthy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x70]
-
-	sthy	%r0, -524288
-	sthy	%r0, -1
-	sthy	%r0, 0
-	sthy	%r0, 1
-	sthy	%r0, 524287
-	sthy	%r0, 0(%r1)
-	sthy	%r0, 0(%r15)
-	sthy	%r0, 524287(%r1,%r15)
-	sthy	%r0, 524287(%r15,%r1)
-	sthy	%r15, 0
diff --git a/test/MC/SystemZ/insn-sthy-02.s b/test/MC/SystemZ/insn-sthy-02.s
deleted file mode 100644
index 0ad547b4b813..000000000000
--- a/test/MC/SystemZ/insn-sthy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sthy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: sthy	%r0, 524288
-
-	sthy	%r0, -524289
-	sthy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-stmg-01.s b/test/MC/SystemZ/insn-stmg-01.s
deleted file mode 100644
index d1890144227c..000000000000
--- a/test/MC/SystemZ/insn-stmg-01.s
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: stmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x24]
-#CHECK: stmg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x24]
-#CHECK: stmg	%r14, %r15, 0           # encoding: [0xeb,0xef,0x00,0x00,0x00,0x24]
-#CHECK: stmg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x24]
-#CHECK: stmg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x24]
-#CHECK: stmg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x24]
-#CHECK: stmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x24]
-#CHECK: stmg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x24]
-#CHECK: stmg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x24]
-#CHECK: stmg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x24]
-#CHECK: stmg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x24]
-#CHECK: stmg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x24]
-#CHECK: stmg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x24]
-
-	stmg	%r0,%r0,0
-	stmg	%r0,%r15,0
-	stmg	%r14,%r15,0
-	stmg	%r15,%r15,0
-	stmg	%r0,%r0,-524288
-	stmg	%r0,%r0,-1
-	stmg	%r0,%r0,0
-	stmg	%r0,%r0,1
-	stmg	%r0,%r0,524287
-	stmg	%r0,%r0,0(%r1)
-	stmg	%r0,%r0,0(%r15)
-	stmg	%r0,%r0,524287(%r1)
-	stmg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-stmg-02.s b/test/MC/SystemZ/insn-stmg-02.s
deleted file mode 100644
index 342c38aec99f..000000000000
--- a/test/MC/SystemZ/insn-stmg-02.s
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: stmg	%r0, %r0, -524289
-#CHECK: error: invalid operand
-#CHECK: stmg	%r0, %r0, 524288
-#CHECK: error: invalid use of indexed addressing
-#CHECK: stmg	%r0, %r0, 0(%r1,%r2)
-
-	stmg	%r0, %r0, -524289
-	stmg	%r0, %r0, 524288
-	stmg	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-strl-01.s b/test/MC/SystemZ/insn-strl-01.s
deleted file mode 100644
index 84bd41f4c0c1..000000000000
--- a/test/MC/SystemZ/insn-strl-01.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: strl	%r0, 2864434397         # encoding: [0xc4,0x0f,0x55,0x5d,0xe6,0x6e]
-#CHECK: strl	%r15, 2864434397        # encoding: [0xc4,0xff,0x55,0x5d,0xe6,0x6e]
-
-	strl	%r0,0xaabbccdd
-	strl	%r15,0xaabbccdd
-
-#CHECK: strl	%r0, foo                # encoding: [0xc4,0x0f,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-#CHECK: strl	%r15, foo               # encoding: [0xc4,0xff,A,A,A,A]
-# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
-
-	strl	%r0,foo
-	strl	%r15,foo
-
-#CHECK: strl	%r3, bar+100            # encoding: [0xc4,0x3f,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-#CHECK: strl	%r4, bar+100            # encoding: [0xc4,0x4f,A,A,A,A]
-# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
-
-	strl	%r3,bar+100
-	strl	%r4,bar+100
-
-#CHECK: strl	%r7, frob@PLT           # encoding: [0xc4,0x7f,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-#CHECK: strl	%r8, frob@PLT           # encoding: [0xc4,0x8f,A,A,A,A]
-# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
-
-	strl	%r7,frob@PLT
-	strl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-strv-01.s b/test/MC/SystemZ/insn-strv-01.s
deleted file mode 100644
index 6a818a8b9eaf..000000000000
--- a/test/MC/SystemZ/insn-strv-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: strv	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x3e]
-#CHECK: strv	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x3e]
-#CHECK: strv	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x3e]
-#CHECK: strv	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x3e]
-#CHECK: strv	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x3e]
-#CHECK: strv	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x3e]
-#CHECK: strv	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x3e]
-#CHECK: strv	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x3e]
-#CHECK: strv	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x3e]
-#CHECK: strv	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x3e]
-
-	strv	%r0,-524288
-	strv	%r0,-1
-	strv	%r0,0
-	strv	%r0,1
-	strv	%r0,524287
-	strv	%r0,0(%r1)
-	strv	%r0,0(%r15)
-	strv	%r0,524287(%r1,%r15)
-	strv	%r0,524287(%r15,%r1)
-	strv	%r15,0
diff --git a/test/MC/SystemZ/insn-strv-02.s b/test/MC/SystemZ/insn-strv-02.s
deleted file mode 100644
index 24460edfed35..000000000000
--- a/test/MC/SystemZ/insn-strv-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: strv	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: strv	%r0, 524288
-
-	strv	%r0, -524289
-	strv	%r0, 524288
diff --git a/test/MC/SystemZ/insn-strvg-01.s b/test/MC/SystemZ/insn-strvg-01.s
deleted file mode 100644
index 6a4d49d3bcc5..000000000000
--- a/test/MC/SystemZ/insn-strvg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: strvg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x2f]
-#CHECK: strvg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x2f]
-#CHECK: strvg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x2f]
-#CHECK: strvg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x2f]
-#CHECK: strvg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x2f]
-#CHECK: strvg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x2f]
-#CHECK: strvg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x2f]
-#CHECK: strvg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x2f]
-#CHECK: strvg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x2f]
-#CHECK: strvg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x2f]
-
-	strvg	%r0,-524288
-	strvg	%r0,-1
-	strvg	%r0,0
-	strvg	%r0,1
-	strvg	%r0,524287
-	strvg	%r0,0(%r1)
-	strvg	%r0,0(%r15)
-	strvg	%r0,524287(%r1,%r15)
-	strvg	%r0,524287(%r15,%r1)
-	strvg	%r15,0
diff --git a/test/MC/SystemZ/insn-strvg-02.s b/test/MC/SystemZ/insn-strvg-02.s
deleted file mode 100644
index ebb0d5bffba3..000000000000
--- a/test/MC/SystemZ/insn-strvg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: strvg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: strvg	%r0, 524288
-
-	strvg	%r0, -524289
-	strvg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sty-01.s b/test/MC/SystemZ/insn-sty-01.s
deleted file mode 100644
index 1ca2d5c55027..000000000000
--- a/test/MC/SystemZ/insn-sty-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sty	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x50]
-#CHECK: sty	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x50]
-#CHECK: sty	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x50]
-#CHECK: sty	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x50]
-#CHECK: sty	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x50]
-#CHECK: sty	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x50]
-#CHECK: sty	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x50]
-#CHECK: sty	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x50]
-#CHECK: sty	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x50]
-#CHECK: sty	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x50]
-
-	sty	%r0, -524288
-	sty	%r0, -1
-	sty	%r0, 0
-	sty	%r0, 1
-	sty	%r0, 524287
-	sty	%r0, 0(%r1)
-	sty	%r0, 0(%r15)
-	sty	%r0, 524287(%r1,%r15)
-	sty	%r0, 524287(%r15,%r1)
-	sty	%r15, 0
diff --git a/test/MC/SystemZ/insn-sty-02.s b/test/MC/SystemZ/insn-sty-02.s
deleted file mode 100644
index fea7c089ab94..000000000000
--- a/test/MC/SystemZ/insn-sty-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sty	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: sty	%r0, 524288
-
-	sty	%r0, -524289
-	sty	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sxbr-01.s b/test/MC/SystemZ/insn-sxbr-01.s
deleted file mode 100644
index e7f4ed25bf46..000000000000
--- a/test/MC/SystemZ/insn-sxbr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sxbr	%f0, %f0                # encoding: [0xb3,0x4b,0x00,0x00]
-#CHECK: sxbr	%f0, %f13               # encoding: [0xb3,0x4b,0x00,0x0d]
-#CHECK: sxbr	%f8, %f8                # encoding: [0xb3,0x4b,0x00,0x88]
-#CHECK: sxbr	%f13, %f0               # encoding: [0xb3,0x4b,0x00,0xd0]
-
-	sxbr	%f0, %f0
-	sxbr	%f0, %f13
-	sxbr	%f8, %f8
-	sxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-sxbr-02.s b/test/MC/SystemZ/insn-sxbr-02.s
deleted file mode 100644
index 397238bd3926..000000000000
--- a/test/MC/SystemZ/insn-sxbr-02.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: sxbr	%f0, %f2
-#CHECK: error: invalid register
-#CHECK: sxbr	%f0, %f14
-#CHECK: error: invalid register
-#CHECK: sxbr	%f2, %f0
-#CHECK: error: invalid register
-#CHECK: sxbr	%f14, %f0
-
-	sxbr	%f0, %f2
-	sxbr	%f0, %f14
-	sxbr	%f2, %f0
-	sxbr	%f14, %f0
-
diff --git a/test/MC/SystemZ/insn-sy-01.s b/test/MC/SystemZ/insn-sy-01.s
deleted file mode 100644
index bc56bd790ecc..000000000000
--- a/test/MC/SystemZ/insn-sy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: sy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5b]
-#CHECK: sy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5b]
-#CHECK: sy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5b]
-#CHECK: sy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5b]
-#CHECK: sy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5b]
-#CHECK: sy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5b]
-#CHECK: sy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5b]
-#CHECK: sy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5b]
-#CHECK: sy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5b]
-#CHECK: sy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5b]
-
-	sy	%r0, -524288
-	sy	%r0, -1
-	sy	%r0, 0
-	sy	%r0, 1
-	sy	%r0, 524287
-	sy	%r0, 0(%r1)
-	sy	%r0, 0(%r15)
-	sy	%r0, 524287(%r1,%r15)
-	sy	%r0, 524287(%r15,%r1)
-	sy	%r15, 0
diff --git a/test/MC/SystemZ/insn-sy-02.s b/test/MC/SystemZ/insn-sy-02.s
deleted file mode 100644
index 7d64ca922d12..000000000000
--- a/test/MC/SystemZ/insn-sy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: sy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: sy	%r0, 524288
-
-	sy	%r0, -524289
-	sy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-x-01.s b/test/MC/SystemZ/insn-x-01.s
deleted file mode 100644
index a2e3a2605a28..000000000000
--- a/test/MC/SystemZ/insn-x-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: x	%r0, 0                  # encoding: [0x57,0x00,0x00,0x00]
-#CHECK: x	%r0, 4095               # encoding: [0x57,0x00,0x0f,0xff]
-#CHECK: x	%r0, 0(%r1)             # encoding: [0x57,0x00,0x10,0x00]
-#CHECK: x	%r0, 0(%r15)            # encoding: [0x57,0x00,0xf0,0x00]
-#CHECK: x	%r0, 4095(%r1,%r15)     # encoding: [0x57,0x01,0xff,0xff]
-#CHECK: x	%r0, 4095(%r15,%r1)     # encoding: [0x57,0x0f,0x1f,0xff]
-#CHECK: x	%r15, 0                 # encoding: [0x57,0xf0,0x00,0x00]
-
-	x	%r0, 0
-	x	%r0, 4095
-	x	%r0, 0(%r1)
-	x	%r0, 0(%r15)
-	x	%r0, 4095(%r1,%r15)
-	x	%r0, 4095(%r15,%r1)
-	x	%r15, 0
diff --git a/test/MC/SystemZ/insn-x-02.s b/test/MC/SystemZ/insn-x-02.s
deleted file mode 100644
index 371974046416..000000000000
--- a/test/MC/SystemZ/insn-x-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: x	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: x	%r0, 4096
-
-	x	%r0, -1
-	x	%r0, 4096
diff --git a/test/MC/SystemZ/insn-xg-01.s b/test/MC/SystemZ/insn-xg-01.s
deleted file mode 100644
index 6cf5e7e4b48c..000000000000
--- a/test/MC/SystemZ/insn-xg-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x82]
-#CHECK: xg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x82]
-#CHECK: xg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x82]
-#CHECK: xg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x82]
-#CHECK: xg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x82]
-#CHECK: xg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x82]
-#CHECK: xg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x82]
-#CHECK: xg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x82]
-#CHECK: xg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x82]
-#CHECK: xg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x82]
-
-	xg	%r0, -524288
-	xg	%r0, -1
-	xg	%r0, 0
-	xg	%r0, 1
-	xg	%r0, 524287
-	xg	%r0, 0(%r1)
-	xg	%r0, 0(%r15)
-	xg	%r0, 524287(%r1,%r15)
-	xg	%r0, 524287(%r15,%r1)
-	xg	%r15, 0
diff --git a/test/MC/SystemZ/insn-xg-02.s b/test/MC/SystemZ/insn-xg-02.s
deleted file mode 100644
index 0505b9facfe6..000000000000
--- a/test/MC/SystemZ/insn-xg-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: xg	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: xg	%r0, 524288
-
-	xg	%r0, -524289
-	xg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-xgr-01.s b/test/MC/SystemZ/insn-xgr-01.s
deleted file mode 100644
index 1a5a6d6e1c6a..000000000000
--- a/test/MC/SystemZ/insn-xgr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xgr	%r0, %r0                # encoding: [0xb9,0x82,0x00,0x00]
-#CHECK: xgr	%r0, %r15               # encoding: [0xb9,0x82,0x00,0x0f]
-#CHECK: xgr	%r15, %r0               # encoding: [0xb9,0x82,0x00,0xf0]
-#CHECK: xgr	%r7, %r8                # encoding: [0xb9,0x82,0x00,0x78]
-
-	xgr	%r0,%r0
-	xgr	%r0,%r15
-	xgr	%r15,%r0
-	xgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-xi-01.s b/test/MC/SystemZ/insn-xi-01.s
deleted file mode 100644
index 2a7670c732ca..000000000000
--- a/test/MC/SystemZ/insn-xi-01.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xi	0, 0                    # encoding: [0x97,0x00,0x00,0x00]
-#CHECK: xi	4095, 0                 # encoding: [0x97,0x00,0x0f,0xff]
-#CHECK: xi	0, 255                  # encoding: [0x97,0xff,0x00,0x00]
-#CHECK: xi	0(%r1), 42              # encoding: [0x97,0x2a,0x10,0x00]
-#CHECK: xi	0(%r15), 42             # encoding: [0x97,0x2a,0xf0,0x00]
-#CHECK: xi	4095(%r1), 42           # encoding: [0x97,0x2a,0x1f,0xff]
-#CHECK: xi	4095(%r15), 42          # encoding: [0x97,0x2a,0xff,0xff]
-
-	xi	0, 0
-	xi	4095, 0
-	xi	0, 255
-	xi	0(%r1), 42
-	xi	0(%r15), 42
-	xi	4095(%r1), 42
-	xi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-xi-02.s b/test/MC/SystemZ/insn-xi-02.s
deleted file mode 100644
index a1ce668a6931..000000000000
--- a/test/MC/SystemZ/insn-xi-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: xi	-1, 0
-#CHECK: error: invalid operand
-#CHECK: xi	4096, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: xi	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: xi	0, -1
-#CHECK: error: invalid operand
-#CHECK: xi	0, 256
-
-	xi	-1, 0
-	xi	4096, 0
-	xi	0(%r1,%r2), 0
-	xi	0, -1
-	xi	0, 256
diff --git a/test/MC/SystemZ/insn-xihf-01.s b/test/MC/SystemZ/insn-xihf-01.s
deleted file mode 100644
index ad2ec1946a09..000000000000
--- a/test/MC/SystemZ/insn-xihf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xihf	%r0, 0                  # encoding: [0xc0,0x06,0x00,0x00,0x00,0x00]
-#CHECK: xihf	%r0, 4294967295         # encoding: [0xc0,0x06,0xff,0xff,0xff,0xff]
-#CHECK: xihf	%r15, 0                 # encoding: [0xc0,0xf6,0x00,0x00,0x00,0x00]
-
-	xihf	%r0, 0
-	xihf	%r0, 0xffffffff
-	xihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-xihf-02.s b/test/MC/SystemZ/insn-xihf-02.s
deleted file mode 100644
index 945993b82246..000000000000
--- a/test/MC/SystemZ/insn-xihf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: xihf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: xihf	%r0, 1 << 32
-
-	xihf	%r0, -1
-	xihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-xilf-01.s b/test/MC/SystemZ/insn-xilf-01.s
deleted file mode 100644
index 475e57332103..000000000000
--- a/test/MC/SystemZ/insn-xilf-01.s
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xilf	%r0, 0                  # encoding: [0xc0,0x07,0x00,0x00,0x00,0x00]
-#CHECK: xilf	%r0, 4294967295         # encoding: [0xc0,0x07,0xff,0xff,0xff,0xff]
-#CHECK: xilf	%r15, 0                 # encoding: [0xc0,0xf7,0x00,0x00,0x00,0x00]
-
-	xilf	%r0, 0
-	xilf	%r0, 0xffffffff
-	xilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-xilf-02.s b/test/MC/SystemZ/insn-xilf-02.s
deleted file mode 100644
index df02b7cc9477..000000000000
--- a/test/MC/SystemZ/insn-xilf-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: xilf	%r0, -1
-#CHECK: error: invalid operand
-#CHECK: xilf	%r0, 1 << 32
-
-	xilf	%r0, -1
-	xilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-xiy-01.s b/test/MC/SystemZ/insn-xiy-01.s
deleted file mode 100644
index c329ce057260..000000000000
--- a/test/MC/SystemZ/insn-xiy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xiy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x57]
-#CHECK: xiy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x57]
-#CHECK: xiy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x57]
-#CHECK: xiy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x57]
-#CHECK: xiy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x57]
-#CHECK: xiy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x57]
-#CHECK: xiy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x57]
-#CHECK: xiy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x57]
-#CHECK: xiy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x57]
-#CHECK: xiy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x57]
-
-	xiy	-524288, 0
-	xiy	-1, 0
-	xiy	0, 0
-	xiy	1, 0
-	xiy	524287, 0
-	xiy	0, 255
-	xiy	0(%r1), 42
-	xiy	0(%r15), 42
-	xiy	524287(%r1), 42
-	xiy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-xiy-02.s b/test/MC/SystemZ/insn-xiy-02.s
deleted file mode 100644
index 519c26c9659c..000000000000
--- a/test/MC/SystemZ/insn-xiy-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: xiy	-524289, 0
-#CHECK: error: invalid operand
-#CHECK: xiy	524288, 0
-#CHECK: error: invalid use of indexed addressing
-#CHECK: xiy	0(%r1,%r2), 0
-#CHECK: error: invalid operand
-#CHECK: xiy	0, -1
-#CHECK: error: invalid operand
-#CHECK: xiy	0, 256
-
-	xiy	-524289, 0
-	xiy	524288, 0
-	xiy	0(%r1,%r2), 0
-	xiy	0, -1
-	xiy	0, 256
diff --git a/test/MC/SystemZ/insn-xr-01.s b/test/MC/SystemZ/insn-xr-01.s
deleted file mode 100644
index 471e6a63d6fa..000000000000
--- a/test/MC/SystemZ/insn-xr-01.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xr	%r0, %r0                # encoding: [0x17,0x00]
-#CHECK: xr	%r0, %r15               # encoding: [0x17,0x0f]
-#CHECK: xr	%r15, %r0               # encoding: [0x17,0xf0]
-#CHECK: xr	%r7, %r8                # encoding: [0x17,0x78]
-
-	xr	%r0,%r0
-	xr	%r0,%r15
-	xr	%r15,%r0
-	xr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-xy-01.s b/test/MC/SystemZ/insn-xy-01.s
deleted file mode 100644
index 132db04bf560..000000000000
--- a/test/MC/SystemZ/insn-xy-01.s
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: xy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x57]
-#CHECK: xy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x57]
-#CHECK: xy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x57]
-#CHECK: xy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x57]
-#CHECK: xy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x57]
-#CHECK: xy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x57]
-#CHECK: xy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x57]
-#CHECK: xy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x57]
-#CHECK: xy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x57]
-#CHECK: xy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x57]
-
-	xy	%r0, -524288
-	xy	%r0, -1
-	xy	%r0, 0
-	xy	%r0, 1
-	xy	%r0, 524287
-	xy	%r0, 0(%r1)
-	xy	%r0, 0(%r15)
-	xy	%r0, 524287(%r1,%r15)
-	xy	%r0, 524287(%r15,%r1)
-	xy	%r15, 0
diff --git a/test/MC/SystemZ/insn-xy-02.s b/test/MC/SystemZ/insn-xy-02.s
deleted file mode 100644
index 6ba3bad48c9b..000000000000
--- a/test/MC/SystemZ/insn-xy-02.s
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid operand
-#CHECK: xy	%r0, -524289
-#CHECK: error: invalid operand
-#CHECK: xy	%r0, 524288
-
-	xy	%r0, -524289
-	xy	%r0, 524288
diff --git a/test/MC/SystemZ/lit.local.cfg b/test/MC/SystemZ/lit.local.cfg
index abb697406d6f..b12af09434be 100644
--- a/test/MC/SystemZ/lit.local.cfg
+++ b/test/MC/SystemZ/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'SystemZ' in targets:
     config.unsupported = True
diff --git a/test/MC/SystemZ/regs-01.s b/test/MC/SystemZ/regs-01.s
deleted file mode 100644
index df11fee691bc..000000000000
--- a/test/MC/SystemZ/regs-01.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lr	%r0, %r1                # encoding: [0x18,0x01]
-#CHECK: lr	%r2, %r3                # encoding: [0x18,0x23]
-#CHECK: lr	%r4, %r5                # encoding: [0x18,0x45]
-#CHECK: lr	%r6, %r7                # encoding: [0x18,0x67]
-#CHECK: lr	%r8, %r9                # encoding: [0x18,0x89]
-#CHECK: lr	%r10, %r11              # encoding: [0x18,0xab]
-#CHECK: lr	%r12, %r13              # encoding: [0x18,0xcd]
-#CHECK: lr	%r14, %r15              # encoding: [0x18,0xef]
-
-	lr	%r0,%r1
-	lr	%r2,%r3
-	lr	%r4,%r5
-	lr	%r6,%r7
-	lr	%r8,%r9
-	lr	%r10,%r11
-	lr	%r12,%r13
-	lr	%r14,%r15
diff --git a/test/MC/SystemZ/regs-02.s b/test/MC/SystemZ/regs-02.s
deleted file mode 100644
index baaa0f9f9781..000000000000
--- a/test/MC/SystemZ/regs-02.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lgr	%r0, %r1                # encoding: [0xb9,0x04,0x00,0x01]
-#CHECK: lgr	%r2, %r3                # encoding: [0xb9,0x04,0x00,0x23]
-#CHECK: lgr	%r4, %r5                # encoding: [0xb9,0x04,0x00,0x45]
-#CHECK: lgr	%r6, %r7                # encoding: [0xb9,0x04,0x00,0x67]
-#CHECK: lgr	%r8, %r9                # encoding: [0xb9,0x04,0x00,0x89]
-#CHECK: lgr	%r10, %r11              # encoding: [0xb9,0x04,0x00,0xab]
-#CHECK: lgr	%r12, %r13              # encoding: [0xb9,0x04,0x00,0xcd]
-#CHECK: lgr	%r14, %r15              # encoding: [0xb9,0x04,0x00,0xef]
-
-	lgr	%r0,%r1
-	lgr	%r2,%r3
-	lgr	%r4,%r5
-	lgr	%r6,%r7
-	lgr	%r8,%r9
-	lgr	%r10,%r11
-	lgr	%r12,%r13
-	lgr	%r14,%r15
diff --git a/test/MC/SystemZ/regs-03.s b/test/MC/SystemZ/regs-03.s
deleted file mode 100644
index 6ced4157b453..000000000000
--- a/test/MC/SystemZ/regs-03.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: dlr	%r0, %r0                # encoding: [0xb9,0x97,0x00,0x00]
-#CHECK: dlr	%r2, %r0                # encoding: [0xb9,0x97,0x00,0x20]
-#CHECK: dlr	%r4, %r0                # encoding: [0xb9,0x97,0x00,0x40]
-#CHECK: dlr	%r6, %r0                # encoding: [0xb9,0x97,0x00,0x60]
-#CHECK: dlr	%r8, %r0                # encoding: [0xb9,0x97,0x00,0x80]
-#CHECK: dlr	%r10, %r0               # encoding: [0xb9,0x97,0x00,0xa0]
-#CHECK: dlr	%r12, %r0               # encoding: [0xb9,0x97,0x00,0xc0]
-#CHECK: dlr	%r14, %r0               # encoding: [0xb9,0x97,0x00,0xe0]
-
-	dlr	%r0,%r0
-	dlr	%r2,%r0
-	dlr	%r4,%r0
-	dlr	%r6,%r0
-	dlr	%r8,%r0
-	dlr	%r10,%r0
-	dlr	%r12,%r0
-	dlr	%r14,%r0
diff --git a/test/MC/SystemZ/regs-04.s b/test/MC/SystemZ/regs-04.s
deleted file mode 100644
index a2da67186f2b..000000000000
--- a/test/MC/SystemZ/regs-04.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ler	%f0, %f1                # encoding: [0x38,0x01]
-#CHECK: ler	%f2, %f3                # encoding: [0x38,0x23]
-#CHECK: ler	%f4, %f5                # encoding: [0x38,0x45]
-#CHECK: ler	%f6, %f7                # encoding: [0x38,0x67]
-#CHECK: ler	%f8, %f9                # encoding: [0x38,0x89]
-#CHECK: ler	%f10, %f11              # encoding: [0x38,0xab]
-#CHECK: ler	%f12, %f13              # encoding: [0x38,0xcd]
-#CHECK: ler	%f14, %f15              # encoding: [0x38,0xef]
-
-	ler	%f0,%f1
-	ler	%f2,%f3
-	ler	%f4,%f5
-	ler	%f6,%f7
-	ler	%f8,%f9
-	ler	%f10,%f11
-	ler	%f12,%f13
-	ler	%f14,%f15
diff --git a/test/MC/SystemZ/regs-05.s b/test/MC/SystemZ/regs-05.s
deleted file mode 100644
index b5f50b51a3ba..000000000000
--- a/test/MC/SystemZ/regs-05.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: ldr	%f0, %f1                # encoding: [0x28,0x01]
-#CHECK: ldr	%f2, %f3                # encoding: [0x28,0x23]
-#CHECK: ldr	%f4, %f5                # encoding: [0x28,0x45]
-#CHECK: ldr	%f6, %f7                # encoding: [0x28,0x67]
-#CHECK: ldr	%f8, %f9                # encoding: [0x28,0x89]
-#CHECK: ldr	%f10, %f11              # encoding: [0x28,0xab]
-#CHECK: ldr	%f12, %f13              # encoding: [0x28,0xcd]
-#CHECK: ldr	%f14, %f15              # encoding: [0x28,0xef]
-
-	ldr	%f0,%f1
-	ldr	%f2,%f3
-	ldr	%f4,%f5
-	ldr	%f6,%f7
-	ldr	%f8,%f9
-	ldr	%f10,%f11
-	ldr	%f12,%f13
-	ldr	%f14,%f15
diff --git a/test/MC/SystemZ/regs-06.s b/test/MC/SystemZ/regs-06.s
deleted file mode 100644
index 43bf38c9c4b6..000000000000
--- a/test/MC/SystemZ/regs-06.s
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
-
-#CHECK: lxr	%f0, %f1                # encoding: [0xb3,0x65,0x00,0x01]
-#CHECK: lxr	%f4, %f5                # encoding: [0xb3,0x65,0x00,0x45]
-#CHECK: lxr	%f8, %f9                # encoding: [0xb3,0x65,0x00,0x89]
-#CHECK: lxr	%f12, %f13              # encoding: [0xb3,0x65,0x00,0xcd]
-
-	lxr	%f0,%f1
-	lxr	%f4,%f5
-	lxr	%f8,%f9
-	lxr	%f12,%f13
diff --git a/test/MC/SystemZ/regs-07.s b/test/MC/SystemZ/regs-07.s
deleted file mode 100644
index d3585a6e2ccb..000000000000
--- a/test/MC/SystemZ/regs-07.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lr	%r16,%r1
-#CHECK: error: invalid register
-#CHECK: lr	%f0,%r1
-#CHECK: error: invalid register
-#CHECK: lr	%a0,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: lr	%arid,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: lr	%0,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: lr	0,%r1
-#CHECK: error: unknown token in expression
-#CHECK: lr	(%r0),%r1
-#CHECK: error: unknown token in expression
-#CHECK: lr	%,%r1
-
-	lr	%r16,%r1
-	lr	%f0,%r1
-	lr	%a0,%r1
-	lr	%arid,%r1
-	lr	%0,%r1
-	lr	0,%r1
-	lr	(%r0),%r1
-	lr	%,%r1
diff --git a/test/MC/SystemZ/regs-08.s b/test/MC/SystemZ/regs-08.s
deleted file mode 100644
index f11c45737a43..000000000000
--- a/test/MC/SystemZ/regs-08.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lgr	%r16,%r1
-#CHECK: error: invalid register
-#CHECK: lgr	%f0,%r1
-#CHECK: error: invalid register
-#CHECK: lgr	%a0,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: lgr	%arid,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: lgr	%0,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: lgr	0,%r1
-#CHECK: error: unknown token in expression
-#CHECK: lgr	(%r0),%r1
-#CHECK: error: unknown token in expression
-#CHECK: lgr	%,%r1
-
-	lgr	%r16,%r1
-	lgr	%f0,%r1
-	lgr	%a0,%r1
-	lgr	%arid,%r1
-	lgr	%0,%r1
-	lgr	0,%r1
-	lgr	(%r0),%r1
-	lgr	%,%r1
diff --git a/test/MC/SystemZ/regs-09.s b/test/MC/SystemZ/regs-09.s
deleted file mode 100644
index 60f4d3914030..000000000000
--- a/test/MC/SystemZ/regs-09.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: dlr	%r1,%r8
-#CHECK: error: invalid register
-#CHECK: dlr	%r16,%r1
-#CHECK: error: invalid register
-#CHECK: dlr	%f0,%r1
-#CHECK: error: invalid register
-#CHECK: dlr	%a0,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: dlr	%arid,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: dlr	%0,%r1
-#CHECK: error: invalid operand for instruction
-#CHECK: dlr	0,%r1
-#CHECK: error: unknown token in expression
-#CHECK: dlr	(%r0),%r1
-#CHECK: error: unknown token in expression
-#CHECK: dlr	%,%r1
-
-	dlr	%r1,%r8
-	dlr	%r16,%r1
-	dlr	%f0,%r1
-	dlr	%a0,%r1
-	dlr	%arid,%r1
-	dlr	%0,%r1
-	dlr	0,%r1
-	dlr	(%r0),%r1
-	dlr	%,%r1
diff --git a/test/MC/SystemZ/regs-10.s b/test/MC/SystemZ/regs-10.s
deleted file mode 100644
index 865aa82532ff..000000000000
--- a/test/MC/SystemZ/regs-10.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: ler	%f1,%f16
-#CHECK: error: invalid register
-#CHECK: ler	%f1,%r0
-#CHECK: error: invalid register
-#CHECK: ler	%f1,%a0
-#CHECK: error: invalid operand for instruction
-#CHECK: ler	%f1,%fly
-#CHECK: error: invalid operand for instruction
-#CHECK: ler	%f1,%0
-#CHECK: error: invalid operand for instruction
-#CHECK: ler	%f1,0
-#CHECK: error: unknown token in expression
-#CHECK: ler	%f1,(%f0)
-#CHECK: error: unknown token in expression
-#CHECK: ler	%f1,%
-
-	ler	%f1,%f16
-	ler	%f1,%r0
-	ler	%f1,%a0
-	ler	%f1,%fly
-	ler	%f1,%0
-	ler	%f1,0
-	ler	%f1,(%f0)
-	ler	%f1,%
diff --git a/test/MC/SystemZ/regs-11.s b/test/MC/SystemZ/regs-11.s
deleted file mode 100644
index 5d0f04f04343..000000000000
--- a/test/MC/SystemZ/regs-11.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: ldr	%f1,%f16
-#CHECK: error: invalid register
-#CHECK: ldr	%f1,%r0
-#CHECK: error: invalid register
-#CHECK: ldr	%f1,%a0
-#CHECK: error: invalid operand for instruction
-#CHECK: ldr	%f1,%fly
-#CHECK: error: invalid operand for instruction
-#CHECK: ldr	%f1,%0
-#CHECK: error: invalid operand for instruction
-#CHECK: ldr	%f1,0
-#CHECK: error: unknown token in expression
-#CHECK: ldr	%f1,(%f0)
-#CHECK: error: unknown token in expression
-#CHECK: ldr	%f1,%
-
-	ldr	%f1,%f16
-	ldr	%f1,%r0
-	ldr	%f1,%a0
-	ldr	%f1,%fly
-	ldr	%f1,%0
-	ldr	%f1,0
-	ldr	%f1,(%f0)
-	ldr	%f1,%
diff --git a/test/MC/SystemZ/regs-12.s b/test/MC/SystemZ/regs-12.s
deleted file mode 100644
index f6cf0e75844d..000000000000
--- a/test/MC/SystemZ/regs-12.s
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: lxr	%f1,%f2
-#CHECK: error: invalid register
-#CHECK: lxr	%f1,%f16
-#CHECK: error: invalid register
-#CHECK: lxr	%f1,%r0
-#CHECK: error: invalid register
-#CHECK: lxr	%f1,%a0
-#CHECK: error: invalid operand for instruction
-#CHECK: lxr	%f1,%fly
-#CHECK: error: invalid operand for instruction
-#CHECK: lxr	%f1,%0
-#CHECK: error: invalid operand for instruction
-#CHECK: lxr	%f1,0
-#CHECK: error: unknown token in expression
-#CHECK: lxr	%f1,(%f0)
-#CHECK: error: unknown token in expression
-#CHECK: lxr	%f1,%
-
-	lxr	%f1,%f2
-	lxr	%f1,%f16
-	lxr	%f1,%r0
-	lxr	%f1,%a0
-	lxr	%f1,%fly
-	lxr	%f1,%0
-	lxr	%f1,0
-	lxr	%f1,(%f0)
-	lxr	%f1,%
diff --git a/test/MC/SystemZ/regs-13.s b/test/MC/SystemZ/regs-13.s
deleted file mode 100644
index 88b0c05b86fb..000000000000
--- a/test/MC/SystemZ/regs-13.s
+++ /dev/null
@@ -1,69 +0,0 @@
-# RUN: llvm-mc -triple s390x-linux-gnu < %s | FileCheck %s
-
-#CHECK: .cfi_offset %r0, 0
-#CHECK: .cfi_offset %r1, 8
-#CHECK: .cfi_offset %r2, 16
-#CHECK: .cfi_offset %r3, 24
-#CHECK: .cfi_offset %r4, 32
-#CHECK: .cfi_offset %r5, 40
-#CHECK: .cfi_offset %r6, 48
-#CHECK: .cfi_offset %r7, 56
-#CHECK: .cfi_offset %r8, 64
-#CHECK: .cfi_offset %r9, 72
-#CHECK: .cfi_offset %r10, 80
-#CHECK: .cfi_offset %r11, 88
-#CHECK: .cfi_offset %r12, 96
-#CHECK: .cfi_offset %r13, 104
-#CHECK: .cfi_offset %r14, 112
-#CHECK: .cfi_offset %r15, 120
-#CHECK: .cfi_offset %f0, 128
-#CHECK: .cfi_offset %f1, 136
-#CHECK: .cfi_offset %f2, 144
-#CHECK: .cfi_offset %f3, 152
-#CHECK: .cfi_offset %f4, 160
-#CHECK: .cfi_offset %f5, 168
-#CHECK: .cfi_offset %f6, 176
-#CHECK: .cfi_offset %f7, 184
-#CHECK: .cfi_offset %f8, 192
-#CHECK: .cfi_offset %f9, 200
-#CHECK: .cfi_offset %f10, 208
-#CHECK: .cfi_offset %f11, 216
-#CHECK: .cfi_offset %f12, 224
-#CHECK: .cfi_offset %f13, 232
-#CHECK: .cfi_offset %f14, 240
-#CHECK: .cfi_offset %f15, 248
-
-	.cfi_startproc
-	.cfi_offset %r0,0
-	.cfi_offset %r1,8
-	.cfi_offset %r2,16
-	.cfi_offset %r3,24
-	.cfi_offset %r4,32
-	.cfi_offset %r5,40
-	.cfi_offset %r6,48
-	.cfi_offset %r7,56
-	.cfi_offset %r8,64
-	.cfi_offset %r9,72
-	.cfi_offset %r10,80
-	.cfi_offset %r11,88
-	.cfi_offset %r12,96
-	.cfi_offset %r13,104
-	.cfi_offset %r14,112
-	.cfi_offset %r15,120
-	.cfi_offset %f0,128
-	.cfi_offset %f1,136
-	.cfi_offset %f2,144
-	.cfi_offset %f3,152
-	.cfi_offset %f4,160
-	.cfi_offset %f5,168
-	.cfi_offset %f6,176
-	.cfi_offset %f7,184
-	.cfi_offset %f8,192
-	.cfi_offset %f9,200
-	.cfi_offset %f10,208
-	.cfi_offset %f11,216
-	.cfi_offset %f12,224
-	.cfi_offset %f13,232
-	.cfi_offset %f14,240
-	.cfi_offset %f15,248
-	.cfi_endproc
diff --git a/test/MC/SystemZ/regs-14.s b/test/MC/SystemZ/regs-14.s
deleted file mode 100644
index e22307d49ce1..000000000000
--- a/test/MC/SystemZ/regs-14.s
+++ /dev/null
@@ -1,18 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: invalid register
-#CHECK: .cfi_offset %a0,0
-#CHECK: error: register expected
-#CHECK: .cfi_offset %foo,0
-#CHECK: error: register expected
-#CHECK: .cfi_offset %,0
-#CHECK: error: register expected
-#CHECK: .cfi_offset r0,0
-
-	.cfi_startproc
-	.cfi_offset %a0,0
-	.cfi_offset %foo,0
-	.cfi_offset %,0
-	.cfi_offset r0,0
-	.cfi_endproc
diff --git a/test/MC/SystemZ/regs-15.s b/test/MC/SystemZ/regs-15.s
deleted file mode 100644
index baec6a639bbf..000000000000
--- a/test/MC/SystemZ/regs-15.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
-# RUN: FileCheck < %t %s
-
-#CHECK: error: %r0 used in an address
-#CHECK: sll	%r2,8(%r0)
-#CHECK: error: %r0 used in an address
-#CHECK: br	%r0
-#CHECK: error: %r0 used in an address
-#CHECK: l	%r1,8(%r0)
-#CHECK: error: %r0 used in an address
-#CHECK: l	%r1,8(%r0,%r15)
-#CHECK: error: %r0 used in an address
-#CHECK: l	%r1,8(%r15,%r0)
-
-	sll	%r2,8(%r0)
-	br	%r0
-	l	%r1,8(%r0)
-	l	%r1,8(%r0,%r15)
-	l	%r1,8(%r15,%r0)
diff --git a/test/MC/SystemZ/regs-bad.s b/test/MC/SystemZ/regs-bad.s
new file mode 100644
index 000000000000..65720578ff8f
--- /dev/null
+++ b/test/MC/SystemZ/regs-bad.s
@@ -0,0 +1,267 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+# Test GR32 operands
+#
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%f0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%a0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%r0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%r0,%a1
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%r0,0
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%r0,0(%r1)
+
+	lr	%f0,%r1
+	lr	%a0,%r1
+	lr	%r0,%f1
+	lr	%r0,%a1
+	lr	%r0,0
+	lr	%r0,0(%r1)
+
+# Test GR64 operands
+#
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%f0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%a0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%r0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%r0,%a1
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%r0,0
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%r0,0(%r1)
+
+	lgr	%f0,%r1
+	lgr	%a0,%r1
+	lgr	%r0,%f1
+	lgr	%r0,%a1
+	lgr	%r0,0
+	lgr	%r0,0(%r1)
+
+# Test GR128 operands
+#
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r1,%r0
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r3,%r0
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r5,%r0
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r7,%r0
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r9,%r0
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r11,%r0
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r13,%r0
+#CHECK: error: invalid register pair
+#CHECK: dlr	%r15,%r0
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%f0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%a0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%r0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%r0,%a1
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%r0,0
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%r0,0(%r1)
+
+	dlr	%r1,%r0
+	dlr	%r3,%r0
+	dlr	%r5,%r0
+	dlr	%r7,%r0
+	dlr	%r9,%r0
+	dlr	%r11,%r0
+	dlr	%r13,%r0
+	dlr	%r15,%r0
+	dlr	%f0,%r1
+	dlr	%a0,%r1
+	dlr	%r0,%f1
+	dlr	%r0,%a1
+	dlr	%r0,0
+	dlr	%r0,0(%r1)
+
+# Test FP32 operands
+#
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%r0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%a0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%f0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%f0,%a1
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%f0,0
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%f0,0(%r1)
+
+	ler	%r0,%f1
+	ler	%a0,%f1
+	ler	%f0,%r1
+	ler	%f0,%a1
+	ler	%f0,0
+	ler	%f0,0(%r1)
+
+# Test FP64 operands
+#
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%r0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%a0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%f0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%f0,%a1
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%f0,0
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%f0,0(%r1)
+
+	ldr	%r0,%f1
+	ldr	%a0,%f1
+	ldr	%f0,%r1
+	ldr	%f0,%a1
+	ldr	%f0,0
+	ldr	%f0,0(%r1)
+
+# Test FP128 operands
+#
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f2,%f0
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f0,%f3
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f6,%f0
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f0,%f7
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f10,%f0
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f0,%f11
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f14,%f0
+#CHECK: error: invalid register pair
+#CHECK: lxr	%f0,%f15
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%r0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%a0,%f1
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%f0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%f0,%a1
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%f0,0
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%f0,0(%r1)
+
+	lxr	%f2,%f0
+	lxr	%f0,%f3
+	lxr	%f6,%f0
+	lxr	%f0,%f7
+	lxr	%f10,%f0
+	lxr	%f0,%f11
+	lxr	%f14,%f0
+	lxr	%f0,%f15
+	lxr	%r0,%f1
+	lxr	%a0,%f1
+	lxr	%f0,%r1
+	lxr	%f0,%a1
+	lxr	%f0,0
+	lxr	%f0,0(%r1)
+
+# Test access register operands
+#
+#CHECK: error: invalid operand for instruction
+#CHECK: ear	%r0,%r0
+#CHECK: error: invalid operand for instruction
+#CHECK: ear	%r0,%f0
+#CHECK: error: invalid operand for instruction
+#CHECK: ear	%r0,0
+#CHECK: error: invalid operand for instruction
+#CHECK: ear	%r0,0(%r1)
+
+	ear	%r0,%r0
+	ear	%r0,%f0
+	ear	%r0,0
+	ear	%r0,0(%r1)
+
+	.cfi_startproc
+
+# Test general register parsing, with no predetermined class in mind.
+#
+#CHECK: error: register expected
+#CHECK: .cfi_offset r0,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %r,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %f,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %a,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %0,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %c0,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %r16,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %f16,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %a16,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %reef,0
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %arid,0
+
+	.cfi_offset r0,0
+	.cfi_offset %,0
+	.cfi_offset %r,0
+	.cfi_offset %f,0
+	.cfi_offset %a,0
+	.cfi_offset %0,0
+	.cfi_offset %c0,0
+	.cfi_offset %r16,0
+	.cfi_offset %f16,0
+	.cfi_offset %a16,0
+	.cfi_offset %reef,0
+	.cfi_offset %arid,0
+
+# Test invalid CFI registers.  Will need to be updated once access
+# registers are modelled as LLVM registers.
+#
+#CHECK: error: invalid operand for instruction
+#CHECK: .cfi_offset %a0,0
+
+	.cfi_offset %a0,0
+
+	.cfi_endproc
+
+#CHECK: error: %r0 used in an address
+#CHECK: sll	%r2,8(%r0)
+#CHECK: error: %r0 used in an address
+#CHECK: br	%r0
+#CHECK: error: %r0 used in an address
+#CHECK: l	%r1,8(%r0)
+#CHECK: error: %r0 used in an address
+#CHECK: l	%r1,8(%r0,%r15)
+#CHECK: error: %r0 used in an address
+#CHECK: l	%r1,8(%r15,%r0)
+
+	sll	%r2,8(%r0)
+	br	%r0
+	l	%r1,8(%r0)
+	l	%r1,8(%r0,%r15)
+	l	%r1,8(%r15,%r0)
diff --git a/test/MC/SystemZ/regs-good.s b/test/MC/SystemZ/regs-good.s
new file mode 100644
index 000000000000..7513d0c6b2a3
--- /dev/null
+++ b/test/MC/SystemZ/regs-good.s
@@ -0,0 +1,169 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lr	%r0, %r1                # encoding: [0x18,0x01]
+#CHECK: lr	%r2, %r3                # encoding: [0x18,0x23]
+#CHECK: lr	%r4, %r5                # encoding: [0x18,0x45]
+#CHECK: lr	%r6, %r7                # encoding: [0x18,0x67]
+#CHECK: lr	%r8, %r9                # encoding: [0x18,0x89]
+#CHECK: lr	%r10, %r11              # encoding: [0x18,0xab]
+#CHECK: lr	%r12, %r13              # encoding: [0x18,0xcd]
+#CHECK: lr	%r14, %r15              # encoding: [0x18,0xef]
+
+	lr	%r0,%r1
+	lr	%r2,%r3
+	lr	%r4,%r5
+	lr	%r6,%r7
+	lr	%r8,%r9
+	lr	%r10,%r11
+	lr	%r12,%r13
+	lr	%r14,%r15
+
+#CHECK: lgr	%r0, %r1                # encoding: [0xb9,0x04,0x00,0x01]
+#CHECK: lgr	%r2, %r3                # encoding: [0xb9,0x04,0x00,0x23]
+#CHECK: lgr	%r4, %r5                # encoding: [0xb9,0x04,0x00,0x45]
+#CHECK: lgr	%r6, %r7                # encoding: [0xb9,0x04,0x00,0x67]
+#CHECK: lgr	%r8, %r9                # encoding: [0xb9,0x04,0x00,0x89]
+#CHECK: lgr	%r10, %r11              # encoding: [0xb9,0x04,0x00,0xab]
+#CHECK: lgr	%r12, %r13              # encoding: [0xb9,0x04,0x00,0xcd]
+#CHECK: lgr	%r14, %r15              # encoding: [0xb9,0x04,0x00,0xef]
+
+	lgr	%r0,%r1
+	lgr	%r2,%r3
+	lgr	%r4,%r5
+	lgr	%r6,%r7
+	lgr	%r8,%r9
+	lgr	%r10,%r11
+	lgr	%r12,%r13
+	lgr	%r14,%r15
+
+#CHECK: dlr	%r0, %r0                # encoding: [0xb9,0x97,0x00,0x00]
+#CHECK: dlr	%r2, %r0                # encoding: [0xb9,0x97,0x00,0x20]
+#CHECK: dlr	%r4, %r0                # encoding: [0xb9,0x97,0x00,0x40]
+#CHECK: dlr	%r6, %r0                # encoding: [0xb9,0x97,0x00,0x60]
+#CHECK: dlr	%r8, %r0                # encoding: [0xb9,0x97,0x00,0x80]
+#CHECK: dlr	%r10, %r0               # encoding: [0xb9,0x97,0x00,0xa0]
+#CHECK: dlr	%r12, %r0               # encoding: [0xb9,0x97,0x00,0xc0]
+#CHECK: dlr	%r14, %r0               # encoding: [0xb9,0x97,0x00,0xe0]
+
+	dlr	%r0,%r0
+	dlr	%r2,%r0
+	dlr	%r4,%r0
+	dlr	%r6,%r0
+	dlr	%r8,%r0
+	dlr	%r10,%r0
+	dlr	%r12,%r0
+	dlr	%r14,%r0
+
+#CHECK: ler	%f0, %f1                # encoding: [0x38,0x01]
+#CHECK: ler	%f2, %f3                # encoding: [0x38,0x23]
+#CHECK: ler	%f4, %f5                # encoding: [0x38,0x45]
+#CHECK: ler	%f6, %f7                # encoding: [0x38,0x67]
+#CHECK: ler	%f8, %f9                # encoding: [0x38,0x89]
+#CHECK: ler	%f10, %f11              # encoding: [0x38,0xab]
+#CHECK: ler	%f12, %f13              # encoding: [0x38,0xcd]
+#CHECK: ler	%f14, %f15              # encoding: [0x38,0xef]
+
+	ler	%f0,%f1
+	ler	%f2,%f3
+	ler	%f4,%f5
+	ler	%f6,%f7
+	ler	%f8,%f9
+	ler	%f10,%f11
+	ler	%f12,%f13
+	ler	%f14,%f15
+
+#CHECK: ldr	%f0, %f1                # encoding: [0x28,0x01]
+#CHECK: ldr	%f2, %f3                # encoding: [0x28,0x23]
+#CHECK: ldr	%f4, %f5                # encoding: [0x28,0x45]
+#CHECK: ldr	%f6, %f7                # encoding: [0x28,0x67]
+#CHECK: ldr	%f8, %f9                # encoding: [0x28,0x89]
+#CHECK: ldr	%f10, %f11              # encoding: [0x28,0xab]
+#CHECK: ldr	%f12, %f13              # encoding: [0x28,0xcd]
+#CHECK: ldr	%f14, %f15              # encoding: [0x28,0xef]
+
+	ldr	%f0,%f1
+	ldr	%f2,%f3
+	ldr	%f4,%f5
+	ldr	%f6,%f7
+	ldr	%f8,%f9
+	ldr	%f10,%f11
+	ldr	%f12,%f13
+	ldr	%f14,%f15
+
+#CHECK: lxr	%f0, %f1                # encoding: [0xb3,0x65,0x00,0x01]
+#CHECK: lxr	%f4, %f5                # encoding: [0xb3,0x65,0x00,0x45]
+#CHECK: lxr	%f8, %f9                # encoding: [0xb3,0x65,0x00,0x89]
+#CHECK: lxr	%f12, %f13              # encoding: [0xb3,0x65,0x00,0xcd]
+
+	lxr	%f0,%f1
+	lxr	%f4,%f5
+	lxr	%f8,%f9
+	lxr	%f12,%f13
+
+#CHECK: .cfi_offset %r0, 0
+#CHECK: .cfi_offset %r1, 8
+#CHECK: .cfi_offset %r2, 16
+#CHECK: .cfi_offset %r3, 24
+#CHECK: .cfi_offset %r4, 32
+#CHECK: .cfi_offset %r5, 40
+#CHECK: .cfi_offset %r6, 48
+#CHECK: .cfi_offset %r7, 56
+#CHECK: .cfi_offset %r8, 64
+#CHECK: .cfi_offset %r9, 72
+#CHECK: .cfi_offset %r10, 80
+#CHECK: .cfi_offset %r11, 88
+#CHECK: .cfi_offset %r12, 96
+#CHECK: .cfi_offset %r13, 104
+#CHECK: .cfi_offset %r14, 112
+#CHECK: .cfi_offset %r15, 120
+#CHECK: .cfi_offset %f0, 128
+#CHECK: .cfi_offset %f1, 136
+#CHECK: .cfi_offset %f2, 144
+#CHECK: .cfi_offset %f3, 152
+#CHECK: .cfi_offset %f4, 160
+#CHECK: .cfi_offset %f5, 168
+#CHECK: .cfi_offset %f6, 176
+#CHECK: .cfi_offset %f7, 184
+#CHECK: .cfi_offset %f8, 192
+#CHECK: .cfi_offset %f9, 200
+#CHECK: .cfi_offset %f10, 208
+#CHECK: .cfi_offset %f11, 216
+#CHECK: .cfi_offset %f12, 224
+#CHECK: .cfi_offset %f13, 232
+#CHECK: .cfi_offset %f14, 240
+#CHECK: .cfi_offset %f15, 248
+
+	.cfi_startproc
+	.cfi_offset %r0,0
+	.cfi_offset %r1,8
+	.cfi_offset %r2,16
+	.cfi_offset %r3,24
+	.cfi_offset %r4,32
+	.cfi_offset %r5,40
+	.cfi_offset %r6,48
+	.cfi_offset %r7,56
+	.cfi_offset %r8,64
+	.cfi_offset %r9,72
+	.cfi_offset %r10,80
+	.cfi_offset %r11,88
+	.cfi_offset %r12,96
+	.cfi_offset %r13,104
+	.cfi_offset %r14,112
+	.cfi_offset %r15,120
+	.cfi_offset %f0,128
+	.cfi_offset %f1,136
+	.cfi_offset %f2,144
+	.cfi_offset %f3,152
+	.cfi_offset %f4,160
+	.cfi_offset %f5,168
+	.cfi_offset %f6,176
+	.cfi_offset %f7,184
+	.cfi_offset %f8,192
+	.cfi_offset %f9,200
+	.cfi_offset %f10,208
+	.cfi_offset %f11,216
+	.cfi_offset %f12,224
+	.cfi_offset %f13,232
+	.cfi_offset %f14,240
+	.cfi_offset %f15,248
+	.cfi_endproc
diff --git a/test/MC/SystemZ/tokens.s b/test/MC/SystemZ/tokens.s
new file mode 100644
index 000000000000..2719752b7366
--- /dev/null
+++ b/test/MC/SystemZ/tokens.s
@@ -0,0 +1,79 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid instruction
+#CHECK: foo	100, 200
+#CHECK: error: unknown token in expression
+#CHECK: foo	100(, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	100(200), 300
+#CHECK: error: register expected
+#CHECK: foo	100(200,), 300
+#CHECK: error: %r0 used in an address
+#CHECK: foo	100(200,%r0), 300
+#CHECK: error: invalid instruction
+#CHECK: foo	100(200,%r1), 300
+#CHECK: error: invalid operand
+#CHECK: foo	100(%a0), 200
+#CHECK: error: %r0 used in an address
+#CHECK: foo	100(%r0), 200
+#CHECK: error: invalid operand
+#CHECK: foo	100(%r1,%a0), 200
+#CHECK: error: %r0 used in an address
+#CHECK: foo	100(%r1,%r0), 200
+#CHECK: error: unexpected token in address
+#CHECK: foo	100(%r1,%r2, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	100(%r1,%r2), 200
+#CHECK: error: unexpected token in argument list
+#CHECK: foo	100(%r1,%r2)(, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	%r0, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	%r15, 200
+#CHECK: error: invalid register
+#CHECK: foo	%r16, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	%f0, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	%f15, 200
+#CHECK: error: invalid register
+#CHECK: foo	%f16, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	%a0, 200
+#CHECK: error: invalid instruction
+#CHECK: foo	%a15, 200
+#CHECK: error: invalid register
+#CHECK: foo	%a16, 200
+#CHECK: error: invalid register
+#CHECK: foo	%c, 200
+#CHECK: error: invalid register
+#CHECK: foo	%, 200
+#CHECK: error: unknown token in expression
+#CHECK: foo	{, 200
+
+	foo	100, 200
+	foo	100(, 200
+	foo	100(200), 300
+	foo	100(200,), 300
+	foo	100(200,%r0), 300
+	foo	100(200,%r1), 300
+	foo	100(%a0), 200
+	foo	100(%r0), 200
+	foo	100(%r1,%a0), 200
+	foo	100(%r1,%r0), 200
+	foo	100(%r1,%r2, 200
+	foo	100(%r1,%r2), 200
+	foo	100(%r1,%r2)(, 200
+	foo	%r0, 200
+	foo	%r15, 200
+	foo	%r16, 200
+	foo	%f0, 200
+	foo	%f15, 200
+	foo	%f16, 200
+	foo	%a0, 200
+	foo	%a15, 200
+	foo	%a16, 200
+	foo	%c, 200
+	foo	%, 200
+	foo	{, 200
diff --git a/test/MC/X86/AlignedBundling/align-mode-argument-error.s b/test/MC/X86/AlignedBundling/align-mode-argument-error.s
index b4ce0a9d103a..37c74c86f754 100644
--- a/test/MC/X86/AlignedBundling/align-mode-argument-error.s
+++ b/test/MC/X86/AlignedBundling/align-mode-argument-error.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
 
 # Missing .bundle_align_mode argument
 # CHECK: error: unknown token
diff --git a/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s b/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
index 722bf7b9227f..a9a78a79569f 100644
--- a/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
+++ b/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
 
 # CHECK: ERROR: Fragment can't be larger than a bundle size
 
diff --git a/test/MC/X86/AlignedBundling/bundle-lock-option-error.s b/test/MC/X86/AlignedBundling/bundle-lock-option-error.s
index 82c5d7cf0e7b..b0b595f4812f 100644
--- a/test/MC/X86/AlignedBundling/bundle-lock-option-error.s
+++ b/test/MC/X86/AlignedBundling/bundle-lock-option-error.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
 
 # Missing .bundle_align_mode argument
 # CHECK: error: invalid option
diff --git a/test/MC/X86/AlignedBundling/lit.local.cfg b/test/MC/X86/AlignedBundling/lit.local.cfg
index 6c49f08b7496..ba763cf03ffc 100644
--- a/test/MC/X86/AlignedBundling/lit.local.cfg
+++ b/test/MC/X86/AlignedBundling/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s b/test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s
index d45a9b4a5dfb..2f716544b154 100644
--- a/test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s
+++ b/test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
 
 # .bundle_lock can't come without a .bundle_align_mode before it
 
diff --git a/test/MC/X86/AlignedBundling/switch-section-locked-error.s b/test/MC/X86/AlignedBundling/switch-section-locked-error.s
index af41e1921252..a5812fd28ab1 100644
--- a/test/MC/X86/AlignedBundling/switch-section-locked-error.s
+++ b/test/MC/X86/AlignedBundling/switch-section-locked-error.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
 
 # This test invokes .bundle_lock and then switches to a different section
 # w/o the appropriate unlock.
diff --git a/test/MC/X86/AlignedBundling/unlock-without-lock-error.s b/test/MC/X86/AlignedBundling/unlock-without-lock-error.s
index 699511d4e6b6..a73f19ea4836 100644
--- a/test/MC/X86/AlignedBundling/unlock-without-lock-error.s
+++ b/test/MC/X86/AlignedBundling/unlock-without-lock-error.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+# RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
 
 # .bundle_unlock can't come without a .bundle_lock before it
 
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
new file mode 100644
index 000000000000..38f9190d949f
--- /dev/null
+++ b/test/MC/X86/avx512-encodings.s
@@ -0,0 +1,45 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl --show-encoding %s | FileCheck %s
+
+// CHECK: vinserti32x4
+// CHECK: encoding: [0x62,0xa3,0x55,0x48,0x38,0xcd,0x01]
+vinserti32x4  $1, %xmm21, %zmm5, %zmm17
+
+// CHECK: vinserti32x4
+// CHECK: encoding: [0x62,0xe3,0x1d,0x40,0x38,0x4f,0x10,0x01]
+vinserti32x4  $1, 256(%rdi), %zmm28, %zmm17
+
+// CHECK: vextracti32x4
+// CHECK: encoding: [0x62,0x33,0x7d,0x48,0x39,0xc9,0x01]
+vextracti32x4  $1, %zmm9, %xmm17
+
+// CHECK: vextracti64x4
+// CHECK: encoding: [0x62,0x33,0xfd,0x48,0x3b,0xc9,0x01]
+vextracti64x4  $1, %zmm9, %ymm17
+
+// CHECK: vextracti64x4
+// CHECK: encoding: [0x62,0x73,0xfd,0x48,0x3b,0x4f,0x10,0x01]
+vextracti64x4  $1, %zmm9, 512(%rdi)
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0xb1,0x35,0x40,0x72,0xe1,0x02]
+vpsrad $2, %zmm17, %zmm25
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0xf1,0x35,0x40,0x72,0x64,0xb7,0x08,0x02]
+vpsrad $2, 512(%rdi, %rsi, 4), %zmm25
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0x21,0x1d,0x48,0xe2,0xc9]
+vpsrad %xmm17, %zmm12, %zmm25
+
+// CHECK: vpsrad
+// CHECK: encoding: [0x62,0x61,0x1d,0x48,0xe2,0x4c,0xb7,0x20]
+vpsrad 512(%rdi, %rsi, 4), %zmm12, %zmm25
+
+// CHECK: vpbroadcastd {{.*}} {%k1} {z}
+// CHECK: encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc8]
+vpbroadcastd  %xmm0, %zmm1 {%k1} {z}
+
+// CHECK: vmovdqu64 {{.*}} {%k3}
+// CHECK: encoding: [0x62,0xf1,0xfe,0x4b,0x6f,0xc8]
+vmovdqu64 %zmm0, %zmm1 {%k3}
diff --git a/test/MC/X86/cfi_def_cfa-crash.s b/test/MC/X86/cfi_def_cfa-crash.s
new file mode 100644
index 000000000000..9d22d6e281cf
--- /dev/null
+++ b/test/MC/X86/cfi_def_cfa-crash.s
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - | macho-dump | FileCheck %s
+
+// We were trying to generate compact unwind info for assembly like this.
+// The .cfi_def_cfa directive, however, throws a wrench into that and was
+// causing an llvm_unreachable() failure. Make sure the assembler can handle
+// the input. The actual eh_frames created using these directives are checked
+// elsewhere. This test is a simpler "does the code assemble" check.
+
+// rdar://15406518
+
+.macro SaveRegisters
+
+ push %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset rbp, -16
+
+ mov %rsp, %rbp
+ .cfi_def_cfa_register rbp
+
+ sub $$0x80+8, %rsp
+
+ movdqa %xmm0, -0x80(%rbp)
+ push %rax
+ movdqa %xmm1, -0x70(%rbp)
+ push %rdi
+ movdqa %xmm2, -0x60(%rbp)
+ push %rsi
+ movdqa %xmm3, -0x50(%rbp)
+ push %rdx
+ movdqa %xmm4, -0x40(%rbp)
+ push %rcx
+ movdqa %xmm5, -0x30(%rbp)
+ push %r8
+ movdqa %xmm6, -0x20(%rbp)
+ push %r9
+ movdqa %xmm7, -0x10(%rbp)
+
+.endmacro
+.macro RestoreRegisters
+
+ movdqa -0x80(%rbp), %xmm0
+ pop %r9
+ movdqa -0x70(%rbp), %xmm1
+ pop %r8
+ movdqa -0x60(%rbp), %xmm2
+ pop %rcx
+ movdqa -0x50(%rbp), %xmm3
+ pop %rdx
+ movdqa -0x40(%rbp), %xmm4
+ pop %rsi
+ movdqa -0x30(%rbp), %xmm5
+ pop %rdi
+ movdqa -0x20(%rbp), %xmm6
+ pop %rax
+ movdqa -0x10(%rbp), %xmm7
+
+ leave
+ .cfi_def_cfa rsp, 8
+ .cfi_same_value rbp
+
+.endmacro
+
+_foo:
+.cfi_startproc
+  SaveRegisters
+
+  RestoreRegisters
+  ret
+ .cfi_endproc
+
+
+
+// CHECK: 'section_name', '__eh_frame\x00
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index b2f337dac9d7..9677da731c17 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -63,6 +63,14 @@ _main:
     mov ECX, DWORD PTR [4*ECX + _fnan]
 // CHECK: movq %fs:320, %rax
     mov RAX, QWORD PTR FS:[320]
+// CHECK: movq %fs:320, %rax
+    mov RAX, QWORD PTR FS:320
+// CHECK: movq %rax, %fs:320
+    mov QWORD PTR FS:320, RAX
+// CHECK: movq %rax, %fs:20(%rbx)
+    mov QWORD PTR FS:20[rbx], RAX
+// CHECK: vshufpd $1, %xmm2, %xmm1, %xmm0
+    vshufpd XMM0, XMM1, XMM2, 1
 // CHECK: vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm1
     vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
 // CHECK: movsd	-8, %xmm5
@@ -322,4 +330,257 @@ _main:
     setnle al
 // CHECK: jne _foo
     jnz _foo
+// CHECK: outb %al, $4
+    out 4, al
     ret
+
+// CHECK: cmovbl %ebx, %eax
+    cmovc eax, ebx
+// CHECK: cmovel %ebx, %eax
+    cmovz eax, ebx
+// CHECK: cmovbel %ebx, %eax
+    cmovna eax, ebx
+// CHECK: cmovael %ebx, %eax
+    cmovnb eax, ebx
+// CHECK: cmovael %ebx, %eax
+    cmovnc eax, ebx
+// CHECK: cmovlel %ebx, %eax
+    cmovng eax, ebx
+// CHECK: cmovgel %ebx, %eax
+    cmovnl eax, ebx
+// CHECK: cmovnel %ebx, %eax
+    cmovnz eax, ebx
+// CHECK: cmovpl %ebx, %eax
+    cmovpe eax, ebx
+// CHECK: cmovnpl %ebx, %eax
+    cmovpo eax, ebx
+// CHECK: cmovbl %ebx, %eax
+    cmovnae eax, ebx
+// CHECK: cmoval %ebx, %eax
+    cmovnbe eax, ebx
+// CHECK: cmovll %ebx, %eax
+    cmovnge eax, ebx
+// CHECK: cmovgl %ebx, %eax
+    cmovnle eax, ebx
+
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	$1, %bx, %dx
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	$1, %bx, %dx
+// CHECK: shrdw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, (%rax)
+
+shld  DX, BX
+shld  DX, BX, CL
+shld  DX, BX, 1
+shld  [RAX], BX
+shld  [RAX], BX, CL
+shrd  DX, BX
+shrd  DX, BX, CL
+shrd  DX, BX, 1
+shrd  [RAX], BX
+shrd  [RAX], BX, CL
+
+// CHECK: btl $1, (%eax)
+// CHECK: btsl $1, (%eax)
+// CHECK: btrl $1, (%eax)
+// CHECK: btcl $1, (%eax)
+    bt DWORD PTR [EAX], 1
+    bt DWORD PTR [EAX], 1
+    bts DWORD PTR [EAX], 1
+    btr DWORD PTR [EAX], 1
+    btc DWORD PTR [EAX], 1
+
+//CHECK: divb	%bl
+//CHECK: divw	%bx
+//CHECK: divl	%ecx
+//CHECK: divl	3735928559(%ebx,%ecx,8)
+//CHECK: divl	69
+//CHECK: divl	32493
+//CHECK: divl	3133065982
+//CHECK: divl	305419896
+//CHECK: idivb	%bl
+//CHECK: idivw	%bx
+//CHECK: idivl	%ecx
+//CHECK: idivl	3735928559(%ebx,%ecx,8)
+//CHECK: idivl	69
+//CHECK: idivl	32493
+//CHECK: idivl	3133065982
+//CHECK: idivl	305419896
+    div AL, BL
+    div AX, BX
+    div EAX, ECX
+    div EAX, [ECX*8+EBX+0xdeadbeef]
+    div EAX, [0x45]
+    div EAX, [0x7eed]
+    div EAX, [0xbabecafe]
+    div EAX, [0x12345678]
+    idiv AL, BL
+    idiv AX, BX
+    idiv EAX, ECX
+    idiv EAX, [ECX*8+EBX+0xdeadbeef]
+    idiv EAX, [0x45]
+    idiv EAX, [0x7eed]
+    idiv EAX, [0xbabecafe]
+    idiv EAX, [0x12345678]
+
+
+// CHECK: inb %dx, %al
+// CHECK: inw %dx, %ax
+// CHECK: inl %dx, %eax
+// CHECK: outb %al, %dx
+// CHECK: outw %ax, %dx
+// CHECK: outl %eax, %dx
+    inb DX
+    inw DX
+    inl DX
+    outb DX
+    outw DX
+    outl DX
+
+// CHECK: xchgq %rcx, %rax
+// CHECK: xchgq %rcx, %rax
+// CHECK: xchgl %ecx, %eax
+// CHECK: xchgl %ecx, %eax
+// CHECK: xchgw %cx, %ax
+// CHECK: xchgw %cx, %ax
+xchg RAX, RCX
+xchg RCX, RAX
+xchg EAX, ECX
+xchg ECX, EAX
+xchg AX, CX
+xchg CX, AX
+
+// CHECK: xchgq %rax, (%ecx)
+// CHECK: xchgq %rax, (%ecx)
+// CHECK: xchgl %eax, (%ecx)
+// CHECK: xchgl %eax, (%ecx)
+// CHECK: xchgw %ax, (%ecx)
+// CHECK: xchgw %ax, (%ecx)
+xchg RAX, [ECX]
+xchg [ECX], RAX
+xchg EAX, [ECX]
+xchg [ECX], EAX
+xchg AX, [ECX]
+xchg [ECX], AX
+
+// CHECK: testq (%ecx), %rax
+// CHECK: testq (%ecx), %rax
+// CHECK: testl (%ecx), %eax
+// CHECK: testl (%ecx), %eax
+// CHECK: testw (%ecx), %ax
+// CHECK: testw (%ecx), %ax
+// CHECK: testb (%ecx), %al
+// CHECK: testb (%ecx), %al
+test RAX, [ECX]
+test [ECX], RAX
+test EAX, [ECX]
+test [ECX], EAX
+test AX, [ECX]
+test [ECX], AX
+test AL, [ECX]
+test [ECX], AL
+
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+fnstsw
+fnstsw AX
+fnstsw EAX
+fnstsw AL
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fdivrp %st(1)
+// CHECK: fdivp %st(1)
+faddp ST(1), ST(0)
+fmulp ST(1), ST(0)
+fsubp ST(1), ST(0)
+fsubrp ST(1), ST(0)
+fdivp ST(1), ST(0)
+fdivrp ST(1), ST(0)
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fdivrp %st(1)
+// CHECK: fdivp %st(1)
+faddp ST(0), ST(1)
+fmulp ST(0), ST(1)
+fsubp ST(0), ST(1)
+fsubrp ST(0), ST(1)
+fdivp ST(0), ST(1)
+fdivrp ST(0), ST(1)
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fdivrp %st(1)
+// CHECK: fdivp %st(1)
+faddp ST(1)
+fmulp ST(1)
+fsubp ST(1)
+fsubrp ST(1)
+fdivp ST(1)
+fdivrp ST(1)
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fdivrp %st(1)
+// CHECK: fdivp %st(1)
+faddp
+fmulp
+fsubp
+fsubrp
+fdivp
+fdivrp
+
+// CHECK: fadd %st(1)
+// CHECK: fmul %st(1)
+// CHECK: fsub %st(1)
+// CHECK: fsubr %st(1)
+// CHECK: fdiv %st(1)
+// CHECK: fdivr %st(1)
+fadd ST(0), ST(1)
+fmul ST(0), ST(1)
+fsub ST(0), ST(1)
+fsubr ST(0), ST(1)
+fdiv ST(0), ST(1)
+fdivr ST(0), ST(1)
+
+// CHECK: fadd %st(0), %st(1)
+// CHECK: fmul %st(0), %st(1)
+// CHECK: fsubr %st(0), %st(1)
+// CHECK: fsub %st(0), %st(1)
+// CHECK: fdivr %st(0), %st(1)
+// CHECK: fdiv %st(0), %st(1)
+fadd ST(1), ST(0)
+fmul ST(1), ST(0)
+fsub ST(1), ST(0)
+fsubr ST(1), ST(0)
+fdiv ST(1), ST(0)
+fdivr ST(1), ST(0)
+
+// CHECK: fadd %st(1)
+// CHECK: fmul %st(1)
+// CHECK: fsub %st(1)
+// CHECK: fsubr %st(1)
+// CHECK: fdiv %st(1)
+// CHECK: fdivr %st(1)
+fadd ST(1)
+fmul ST(1)
+fsub ST(1)
+fsubr ST(1)
+fdiv ST(1)
+fdivr ST(1)
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
index ad280c7cf7de..19840aa7574c 100644
--- a/test/MC/X86/lit.local.cfg
+++ b/test/MC/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index c348915d23ce..732874b91872 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -3948,7 +3948,7 @@
 // CHECK:  encoding: [0xd9,0xca]
         	fxch	%st(2)
 
-// CHECK: fcom
+// CHECK: fcom	%st(1)
 // CHECK:  encoding: [0xd8,0xd1]
         	fcom
 
@@ -3972,7 +3972,7 @@
 // CHECK:  encoding: [0xda,0x15,0x78,0x56,0x34,0x12]
         	ficoml	0x12345678
 
-// CHECK: fcomp
+// CHECK: fcomp	%st(1)
 // CHECK:  encoding: [0xd8,0xd9]
         	fcomp
 
@@ -19660,3 +19660,37 @@ blendvps %xmm0, %xmm2, %xmm1
 blendvps (%eax), %xmm1
 // CHECK: blendvps (%eax), %xmm1
 blendvps %xmm0, (%eax), %xmm1
+
+
+// CHECK: btl $4, (%eax)
+// CHECK: btw $4, (%eax)
+// CHECK: btl $4, (%eax)
+// CHECK: btq $4, (%eax)
+// CHECK: btsl $4, (%eax)
+// CHECK: btsw $4, (%eax)
+// CHECK: btsl $4, (%eax)
+// CHECK: btsq $4, (%eax)
+// CHECK: btrl $4, (%eax)
+// CHECK: btrw $4, (%eax)
+// CHECK: btrl $4, (%eax)
+// CHECK: btrq $4, (%eax)
+// CHECK: btcl $4, (%eax)
+// CHECK: btcw $4, (%eax)
+// CHECK: btcl $4, (%eax)
+// CHECK: btcq $4, (%eax)
+bt $4, (%eax)
+btw $4, (%eax)
+btl $4, (%eax)
+btq $4, (%eax)
+bts $4, (%eax)
+btsw $4, (%eax)
+btsl $4, (%eax)
+btsq $4, (%eax)
+btr $4, (%eax)
+btrw $4, (%eax)
+btrl $4, (%eax)
+btrq $4, (%eax)
+btc $4, (%eax)
+btcw $4, (%eax)
+btcl $4, (%eax)
+btcq $4, (%eax)
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
index 57a00378d319..99136bd19cdf 100644
--- a/test/MC/X86/x86-32.s
+++ b/test/MC/X86/x86-32.s
@@ -894,9 +894,9 @@ pshufw $90, %mm4, %mm0
 	movsw	%ds:(%esi), %es:(%edi)
 	movsw	(%esi), %es:(%edi)
 
-// CHECK: movsd # encoding: [0xa5]
-// CHECK: movsd
-// CHECK: movsd
+// CHECK: movsl # encoding: [0xa5]
+// CHECK: movsl
+// CHECK: movsl
 	movsl
 	movsl	%ds:(%esi), %es:(%edi)
 	movsl	(%esi), %es:(%edi)
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 521a0776d8af..6b41f485f165 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -115,12 +115,12 @@
 // rdar://8470918
 smovb // CHECK: movsb
 smovw // CHECK: movsw
-smovl // CHECK: movsd
+smovl // CHECK: movsl
 smovq // CHECK: movsq
 
 // rdar://8456361
 // CHECK: rep
-// CHECK: movsd
+// CHECK: movsl
         rep movsd
 
 // CHECK: rep
@@ -241,10 +241,10 @@ cmovnzq %rbx, %rax
 
 // rdar://8407928
 // CHECK: inb	$127, %al
-// CHECK: inw	%dx
+// CHECK: inw	%dx, %ax
 // CHECK: outb	%al, $127
-// CHECK: outw	%dx
-// CHECK: inl	%dx
+// CHECK: outw	%ax, %dx
+// CHECK: inl	%dx, %eax
 inb	$0x7f
 inw	%dx
 outb	$0x7f
@@ -253,12 +253,12 @@ inl	%dx
 
 
 // PR8114
-// CHECK: outb	%dx
-// CHECK: outb	%dx
-// CHECK: outw	%dx
-// CHECK: outw	%dx
-// CHECK: outl	%dx
-// CHECK: outl	%dx
+// CHECK: outb	%al, %dx
+// CHECK: outb	%al, %dx
+// CHECK: outw	%ax, %dx
+// CHECK: outw	%ax, %dx
+// CHECK: outl	%eax, %dx
+// CHECK: outl	%eax, %dx
 
 out	%al, (%dx)
 outb	%al, (%dx)
@@ -267,12 +267,12 @@ outw	%ax, (%dx)
 out	%eax, (%dx)
 outl	%eax, (%dx)
 
-// CHECK: inb	%dx
-// CHECK: inb	%dx
-// CHECK: inw	%dx
-// CHECK: inw	%dx
-// CHECK: inl	%dx
-// CHECK: inl	%dx
+// CHECK: inb	%dx, %al
+// CHECK: inb	%dx, %al
+// CHECK: inw	%dx, %ax
+// CHECK: inw	%dx, %ax
+// CHECK: inl	%dx, %eax
+// CHECK: inl	%dx, %eax
 
 in	(%dx), %al
 inb	(%dx), %al
@@ -283,16 +283,16 @@ inl	(%dx), %eax
 
 // rdar://8431422
 
-// CHECK: fxch
-// CHECK: fucom
-// CHECK: fucomp
-// CHECK: faddp
+// CHECK: fxch %st(1)
+// CHECK: fucom %st(1)
+// CHECK: fucomp %st(1)
+// CHECK: faddp %st(1)
 // CHECK: faddp	%st(0)
-// CHECK: fsubp
-// CHECK: fsubrp
-// CHECK: fmulp
-// CHECK: fdivp
-// CHECK: fdivrp
+// CHECK: fsubp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fdivp %st(1)
+// CHECK: fdivrp %st(1)
 
 fxch
 fucom
@@ -305,9 +305,9 @@ fmulp
 fdivp
 fdivrp
 
-// CHECK: fcomi
+// CHECK: fcomi %st(1)
 // CHECK: fcomi	%st(2)
-// CHECK: fucomi
+// CHECK: fucomi %st(1)
 // CHECK: fucomi %st(2)
 // CHECK: fucomi %st(2)
 
@@ -317,10 +317,10 @@ fucomi
 fucomi	%st(2)
 fucomi	%st(2), %st
 
-// CHECK: fnstsw
-// CHECK: fnstsw
-// CHECK: fnstsw
-// CHECK: fnstsw
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
 
 fnstsw
 fnstsw %ax
@@ -549,8 +549,8 @@ cvttpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
 // rdar://8490728 - llvm-mc rejects 'movmskpd'
 movmskpd	%xmm6, %rax
-// CHECK: movmskpd	%xmm6, %rax
-// CHECK: encoding: [0x66,0x48,0x0f,0x50,0xc6]
+// CHECK: movmskpd	%xmm6, %eax
+// CHECK: encoding: [0x66,0x0f,0x50,0xc6]
 movmskpd	%xmm6, %eax
 // CHECK: movmskpd	%xmm6, %eax
 // CHECK: encoding: [0x66,0x0f,0x50,0xc6]
@@ -627,7 +627,7 @@ movsq
 // CHECK:   encoding: [0x48,0xa5]
 
 movsl
-// CHECK: movsd
+// CHECK: movsl
 // CHECK:   encoding: [0xa5]
 
 stosq
@@ -672,6 +672,38 @@ movl	0, %eax   // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00
 // CHECK: encoding: [0x48,0xc7,0xc0,0x0a,0x00,0x00,0x00]
         movq $10, %rax
 
+// CHECK: movabsb -6066930261531658096, %al
+// CHECK: encoding: [0xa0,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsb 0xabcdef1234567890,%al
+
+// CHECK: movabsw -6066930261531658096, %ax
+// CHECK: encoding: [0x66,0xa1,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsw 0xabcdef1234567890,%ax
+
+// CHECK: movabsl -6066930261531658096, %eax
+// CHECK: encoding: [0xa1,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsl 0xabcdef1234567890,%eax
+
+// CHECK: movabsq -6066930261531658096, %rax
+// CHECK: encoding: [0x48,0xa1,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsq 0xabcdef1234567890, %rax
+
+// CHECK: movabsb %al, -6066930261531658096
+// CHECK: encoding: [0xa2,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsb %al,0xabcdef1234567890
+
+// CHECK: movabsw %ax, -6066930261531658096
+// CHECK: encoding: [0x66,0xa3,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsw %ax,0xabcdef1234567890
+
+// CHECK: movabsl %eax, -6066930261531658096
+// CHECK: encoding: [0xa3,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsl %eax,0xabcdef1234567890
+
+// CHECK: movabsq %rax, -6066930261531658096
+// CHECK: encoding: [0x48,0xa3,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab]
+        movabsq %rax,0xabcdef1234567890
+
 // rdar://8014869
 //
 // CHECK: ret
@@ -813,7 +845,7 @@ lock/incl 1(%rsp)
 rep movsl
 // CHECK: rep
 // CHECK: encoding: [0xf3]
-// CHECK: movsd
+// CHECK: movsl
 // CHECK: encoding: [0xa5]
 
 
@@ -958,6 +990,22 @@ mov %gs, (%rsi)  // CHECK: movl	%gs, (%rsi) # encoding: [0x8c,0x2e]
 
 
 // rdar://8431864
+//CHECK: divb	%bl
+//CHECK: divw	%bx
+//CHECK: divl	%ecx
+//CHECK: divl	3735928559(%ebx,%ecx,8)
+//CHECK: divl	69
+//CHECK: divl	32493
+//CHECK: divl	3133065982
+//CHECK: divl	305419896
+//CHECK: idivb	%bl
+//CHECK: idivw	%bx
+//CHECK: idivl	%ecx
+//CHECK: idivl	3735928559(%ebx,%ecx,8)
+//CHECK: idivl	69
+//CHECK: idivl	32493
+//CHECK: idivl	3133065982
+//CHECK: idivl	305419896
 	div	%bl,%al
 	div	%bx,%ax
 	div	%ecx,%eax
@@ -1051,14 +1099,14 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 	movsw	%ds:(%rsi), %es:(%rdi)
 	movsw	(%rsi), %es:(%rdi)
 
-// CHECK: movsd # encoding: [0xa5]
-// CHECK: movsd
-// CHECK: movsd
+// CHECK: movsl # encoding: [0xa5]
+// CHECK: movsl
+// CHECK: movsl
 	movsl
 	movsl	%ds:(%rsi), %es:(%rdi)
 	movsl	(%rsi), %es:(%rdi)
 // rdar://10883092
-// CHECK: movsd
+// CHECK: movsl
 	movsl	(%rsi), (%rdi)
 
 // CHECK: movsq # encoding: [0x48,0xa5]
@@ -1236,3 +1284,107 @@ clac
 // CHECK: stac
 // CHECK: encoding: [0x0f,0x01,0xcb]
 stac
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fdivp %st(1)
+// CHECK: fdivrp %st(1)
+faddp %st(0), %st(1)
+fmulp %st(0), %st(1)
+fsubp %st(0), %st(1)
+fsubrp %st(0), %st(1)
+fdivp %st(0), %st(1)
+fdivrp %st(0), %st(1)
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fdivp %st(1)
+// CHECK: fdivrp %st(1)
+faddp %st(1), %st(0)
+fmulp %st(1), %st(0)
+fsubp %st(1), %st(0)
+fsubrp %st(1), %st(0)
+fdivp %st(1), %st(0)
+fdivrp %st(1), %st(0)
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fdivp %st(1)
+// CHECK: fdivrp %st(1)
+faddp %st(1)
+fmulp %st(1)
+fsubp %st(1)
+fsubrp %st(1)
+fdivp %st(1)
+fdivrp %st(1)
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fdivp %st(1)
+// CHECK: fdivrp %st(1)
+faddp
+fmulp
+fsubp
+fsubrp
+fdivp
+fdivrp
+
+// CHECK: fadd %st(1)
+// CHECK: fmul %st(1)
+// CHECK: fsub %st(1)
+// CHECK: fsubr %st(1)
+// CHECK: fdiv %st(1)
+// CHECK: fdivr %st(1)
+fadd %st(1), %st(0)
+fmul %st(1), %st(0)
+fsub %st(1), %st(0)
+fsubr %st(1), %st(0)
+fdiv %st(1), %st(0)
+fdivr %st(1), %st(0)
+
+// CHECK: fadd %st(0), %st(1)
+// CHECK: fmul %st(0), %st(1)
+// CHECK: fsub %st(0), %st(1)
+// CHECK: fsubr %st(0), %st(1)
+// CHECK: fdiv %st(0), %st(1)
+// CHECK: fdivr %st(0), %st(1)
+fadd %st(0), %st(1)
+fmul %st(0), %st(1)
+fsub %st(0), %st(1)
+fsubr %st(0), %st(1)
+fdiv %st(0), %st(1)
+fdivr %st(0), %st(1)
+
+// CHECK: fadd %st(1)
+// CHECK: fmul %st(1)
+// CHECK: fsub %st(1)
+// CHECK: fsubr %st(1)
+// CHECK: fdiv %st(1)
+// CHECK: fdivr %st(1)
+fadd %st(1)
+fmul %st(1)
+fsub %st(1)
+fsubr %st(1)
+fdiv %st(1)
+fdivr %st(1)
+
+// CHECK: movd %xmm0, %eax
+// CHECK: movd %xmm0, %rax
+// CHECK: movd %xmm0, %rax
+// CHECK: vmovd %xmm0, %eax
+// CHECK: vmovq %xmm0, %rax
+// CHECK: vmovq %xmm0, %rax
+movd %xmm0, %eax
+movd %xmm0, %rax
+movq %xmm0, %rax
+vmovd %xmm0, %eax
+vmovd %xmm0, %rax
+vmovq %xmm0, %rax
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index 6da9e21fef66..5ba8064ff48a 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -2212,11 +2212,11 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x79,0x7e,0x30]
           vmovd  %xmm14, (%rax)
 
-// CHECK: vmovd  %rax, %xmm14
+// CHECK: vmovq  %rax, %xmm14
 // CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
           vmovd  %rax, %xmm14
 
-// CHECK: vmovd %xmm0, %rax
+// CHECK: vmovq %xmm0, %rax
 // CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
           vmovd %xmm0, %rax
 
@@ -4044,43 +4044,43 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a]
           vextractps   $10, %xmm8, %r8
 
-// CHECK: vextractps   $7, %xmm4, %rcx
+// CHECK: vextractps   $7, %xmm4, %ecx
 // CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07]
           vextractps   $7, %xmm4, %rcx
 
-// CHECK: vmovd  %xmm4, %rcx
+// CHECK: vmovq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1]
           vmovd  %xmm4, %rcx
 
-// CHECK: vmovmskpd  %xmm4, %rcx
+// CHECK: vmovmskpd  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xf9,0x50,0xcc]
           vmovmskpd  %xmm4, %rcx
 
-// CHECK: vmovmskpd  %ymm4, %rcx
+// CHECK: vmovmskpd  %ymm4, %ecx
 // CHECK: encoding: [0xc5,0xfd,0x50,0xcc]
           vmovmskpd  %ymm4, %rcx
 
-// CHECK: vmovmskps  %xmm4, %rcx
+// CHECK: vmovmskps  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xf8,0x50,0xcc]
           vmovmskps  %xmm4, %rcx
 
-// CHECK: vmovmskps  %ymm4, %rcx
+// CHECK: vmovmskps  %ymm4, %ecx
 // CHECK: encoding: [0xc5,0xfc,0x50,0xcc]
           vmovmskps  %ymm4, %rcx
 
-// CHECK: vpextrb  $7, %xmm4, %rcx
+// CHECK: vpextrb  $7, %xmm4, %ecx
 // CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07]
           vpextrb  $7, %xmm4, %rcx
 
-// CHECK: vpinsrw  $7, %r8, %xmm15, %xmm8
+// CHECK: vpinsrw  $7, %r8d, %xmm15, %xmm8
 // CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07]
           vpinsrw  $7, %r8, %xmm15, %xmm8
 
-// CHECK: vpinsrw  $7, %rcx, %xmm4, %xmm6
+// CHECK: vpinsrw  $7, %ecx, %xmm4, %xmm6
 // CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07]
           vpinsrw  $7, %rcx, %xmm4, %xmm6
 
-// CHECK: vpmovmskb  %xmm4, %rcx
+// CHECK: vpmovmskb  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xf9,0xd7,0xcc]
           vpmovmskb  %xmm4, %rcx
 
@@ -4185,3 +4185,59 @@ _foo2:
 // CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
 // CHECK: encoding: [0xc4,0x02,0x3d,0x91,0x14,0x4f]
           vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
+
+// CHECK: vmovaps %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x78,0x28,0xc0]
+          vmovaps %xmm0, %xmm8
+
+// CHECK: vmovaps %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0x78,0x29,0xc0]
+          vmovaps %xmm8, %xmm0
+
+// CHECK: vmovaps %ymm0, %ymm8
+// CHECK: encoding: [0xc5,0x7c,0x28,0xc0]
+          vmovaps %ymm0, %ymm8
+
+// CHECK: vmovaps %ymm8, %ymm0
+// CHECK: encoding: [0xc5,0x7c,0x29,0xc0]
+          vmovaps %ymm8, %ymm0
+
+// CHECK: vmovups %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x78,0x10,0xc0]
+          vmovups %xmm0, %xmm8
+
+// CHECK: vmovups %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0x78,0x11,0xc0]
+          vmovups %xmm8, %xmm0
+
+// CHECK: vmovups %ymm0, %ymm8
+// CHECK: encoding: [0xc5,0x7c,0x10,0xc0]
+          vmovups %ymm0, %ymm8
+
+// CHECK: vmovups %ymm8, %ymm0
+// CHECK: encoding: [0xc5,0x7c,0x11,0xc0]
+          vmovups %ymm8, %ymm0
+
+// CHECK: vmovss %xmm0, %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x7a,0x10,0xc0]
+          vmovss %xmm0, %xmm0, %xmm8
+
+// CHECK: vmovss %xmm0, %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0xba,0x10,0xc0]
+          vmovss %xmm0, %xmm8, %xmm0
+
+// CHECK: vmovss %xmm8, %xmm0, %xmm0
+// CHECK: encoding: [0xc5,0x7a,0x11,0xc0]
+          vmovss %xmm8, %xmm0, %xmm0
+
+// CHECK: vmovsd %xmm0, %xmm0, %xmm8
+// CHECK: encoding: [0xc5,0x7b,0x10,0xc0]
+          vmovsd %xmm0, %xmm0, %xmm8
+
+// CHECK: vmovsd %xmm0, %xmm8, %xmm0
+// CHECK: encoding: [0xc5,0xbb,0x10,0xc0]
+          vmovsd %xmm0, %xmm8, %xmm0
+
+// CHECK: vmovsd %xmm8, %xmm0, %xmm0
+// CHECK: encoding: [0xc5,0x7b,0x11,0xc0]
+          vmovsd %xmm8, %xmm0, %xmm0
diff --git a/test/MC/X86/x86_64-encoding.s b/test/MC/X86/x86_64-encoding.s
index cfdf87f3e343..40b93f0a7d05 100644
--- a/test/MC/X86/x86_64-encoding.s
+++ b/test/MC/X86/x86_64-encoding.s
@@ -120,6 +120,66 @@ movd %mm1, %edx
 // CHECK:  fixup A - offset: 5, value: CPI1_0-4
 pshufb	CPI1_0(%rip), %xmm1
 
+// CHECK: sha1rnds4 $1, %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x3a,0xcc,0xd1,0x01]
+sha1rnds4 $1, %xmm1, %xmm2
+
+// CHECK: sha1rnds4 $1, (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x3a,0xcc,0x10,0x01]
+sha1rnds4 $1, (%rax), %xmm2
+
+// CHECK: sha1nexte %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xc8,0xd1]
+sha1nexte %xmm1, %xmm2
+
+// CHECK: sha1msg1 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xc9,0xd1]
+sha1msg1 %xmm1, %xmm2
+
+// CHECK: sha1msg1 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xc9,0x10]
+sha1msg1 (%rax), %xmm2
+
+// CHECK: sha1msg2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xca,0xd1]
+sha1msg2 %xmm1, %xmm2
+
+// CHECK: sha1msg2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xca,0x10]
+sha1msg2 (%rax), %xmm2
+
+// CHECK: sha256rnds2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0x10]
+sha256rnds2 (%rax), %xmm2
+
+// CHECK: sha256rnds2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0xd1]
+sha256rnds2 %xmm1, %xmm2
+
+// CHECK: sha256rnds2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0x10]
+sha256rnds2 %xmm0, (%rax), %xmm2
+
+// CHECK: sha256rnds2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcb,0xd1]
+sha256rnds2 %xmm0, %xmm1, %xmm2
+
+// CHECK: sha256msg1 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcc,0xd1]
+sha256msg1 %xmm1, %xmm2
+
+// CHECK: sha256msg1 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcc,0x10]
+sha256msg1 (%rax), %xmm2
+
+// CHECK: sha256msg2 %xmm1, %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcd,0xd1]
+sha256msg2 %xmm1, %xmm2
+
+// CHECK: sha256msg2 (%rax), %xmm2
+// CHECK:   encoding: [0x0f,0x38,0xcd,0x10]
+sha256msg2 (%rax), %xmm2
+
 // CHECK: movq  57005(,%riz), %rbx
 // CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
           movq  57005(,%riz), %rbx
@@ -171,3 +231,15 @@ pshufb	CPI1_0(%rip), %xmm1
 // CHECK: filds	(%rdi)
 // CHECK:  encoding: [0xdf,0x07]
         	filds	(%rdi)
+
+// CHECK: pmovmskb	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0xd7,0xcd]
+        	pmovmskb	%xmm5,%rcx
+
+// CHECK: pinsrw $3, %ecx, %xmm5
+// CHECK: encoding: [0x66,0x0f,0xc4,0xe9,0x03]
+          pinsrw $3, %ecx, %xmm5
+
+// CHECK: pinsrw $3, %ecx, %xmm5
+// CHECK: encoding: [0x66,0x0f,0xc4,0xe9,0x03]
+          pinsrw $3, %rcx, %xmm5
diff --git a/test/MC/X86/x86_64-hle-encoding.s b/test/MC/X86/x86_64-hle-encoding.s
new file mode 100644
index 000000000000..aaaca7d9c026
--- /dev/null
+++ b/test/MC/X86/x86_64-hle-encoding.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: xacquire
+// CHECK: [0xf2]
+    xacquire
+
+// CHECK: xrelease
+// CHECK: [0xf3]
+    xrelease
diff --git a/test/MC/X86/x86_64-tbm-encoding.s b/test/MC/X86/x86_64-tbm-encoding.s
new file mode 100644
index 000000000000..180578bfc01c
--- /dev/null
+++ b/test/MC/X86/x86_64-tbm-encoding.s
@@ -0,0 +1,196 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// bextri 32 reg
+// CHECK: bextr   $2814, %edi, %eax
+// CHECK: encoding: [0x8f,0xea,0x78,0x10,0xc7,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, %edi, %eax
+
+// bextri 32 mem
+// CHECK: bextr   $2814, (%rdi), %eax
+// CHECK: encoding: [0x8f,0xea,0x78,0x10,0x07,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, (%rdi), %eax
+
+// bextri 64 reg
+// CHECK: bextr   $2814, %rdi, %rax
+// CHECK: encoding: [0x8f,0xea,0xf8,0x10,0xc7,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, %rdi, %rax
+
+// bextri 64 mem
+// CHECK: bextr   $2814, (%rdi), %rax
+// CHECK: encoding: [0x8f,0xea,0xf8,0x10,0x07,0xfe,0x0a,0x00,0x00]
+          bextr   $2814, (%rdi), %rax
+
+// blcfill 32 reg
+// CHECK: blcfill %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xcf]
+          blcfill %edi, %eax
+
+// blcfill 32 mem
+// CHECK: blcfill (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x0f]
+          blcfill (%rdi), %eax
+
+// blcfill 64 reg
+// CHECK: blcfill %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xcf]
+          blcfill %rdi, %rax
+
+// blcfill 64 mem
+// CHECK: blcfill (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x0f]
+          blcfill (%rdi), %rax
+
+// blci   32 reg
+// CHECK: blci    %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0xf7]
+          blci    %edi, %eax
+
+// blci   32 mem
+// CHECK: blci    (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0x37]
+          blci    (%rdi), %eax
+
+// blci   64 reg
+// CHECK: blci    %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0xf7]
+          blci    %rdi, %rax
+
+// blci   64 mem
+// CHECK: blci    (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0x37]
+          blci    (%rdi), %rax
+
+// blcic  32 reg
+// CHECK: blcic   %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xef]
+          blcic   %edi, %eax
+
+// blcic  32 mem
+// CHECK: blcic   (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x2f]
+          blcic   (%rdi), %eax
+
+// blcic  64 reg
+// CHECK: blcic   %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xef]
+          blcic   %rdi, %rax
+
+// blcic  64 mem
+// CHECK: blcic   (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x2f]
+          blcic   (%rdi), %rax
+
+// blcmsk 32 reg
+// CHECK: blcmsk  %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0xcf]
+          blcmsk  %edi, %eax
+
+// blcmsk 32 mem
+// CHECK: blcmsk  (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x02,0x0f]
+          blcmsk  (%rdi), %eax
+
+// blcmsk 64 reg
+// CHECK: blcmsk  %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0xcf]
+          blcmsk  %rdi, %rax
+
+// blcmsk 64 mem
+// CHECK: blcmsk  (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x02,0x0f]
+          blcmsk  (%rdi), %rax
+
+// blcs   32 reg
+// CHECK: blcs    %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xdf]
+          blcs    %edi, %eax
+
+// blcs   32 mem
+// CHECK: blcs    (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x1f]
+          blcs    (%rdi), %eax
+
+// blcs   64 reg
+// CHECK: blcs    %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xdf]
+          blcs    %rdi, %rax
+
+// blcs   64 mem
+// CHECK: blcs    (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x1f]
+          blcs    (%rdi), %rax
+
+// blsfill 32 reg
+// CHECK: blsfill %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xd7]
+          blsfill %edi, %eax
+
+// blsfill 32 mem
+// CHECK: blsfill (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x17]
+          blsfill (%rdi), %eax
+
+// blsfill 64 reg
+// CHECK: blsfill %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xd7]
+          blsfill %rdi, %rax
+
+// blsfill 64 mem
+// CHECK: blsfill (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x17]
+          blsfill (%rdi), %rax
+
+// blsic  32 reg
+// CHECK: blsic   %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xf7]
+          blsic   %edi, %eax
+
+// blsic  32 mem
+// CHECK: blsic   (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x37]
+          blsic   (%rdi), %eax
+
+// blsic  64 reg
+// CHECK: blsic   %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xf7]
+          blsic   %rdi, %rax
+
+// t1mskc 32 reg
+// CHECK: t1mskc  %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xff]
+          t1mskc  %edi, %eax
+
+// t1mskc 32 mem
+// CHECK: t1mskc  (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x3f]
+          t1mskc  (%rdi), %eax
+
+// t1mskc 64 reg
+// CHECK: t1mskc  %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xff]
+          t1mskc  %rdi, %rax
+
+// t1mskc 64 mem
+// CHECK: t1mskc  (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x3f]
+          t1mskc  (%rdi), %rax
+
+// tzmsk  32 reg
+// CHECK: tzmsk   %edi, %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0xe7]
+          tzmsk   %edi, %eax
+
+// tzmsk  32 mem
+// CHECK: tzmsk   (%rdi), %eax
+// CHECK: encoding: [0x8f,0xe9,0x78,0x01,0x27]
+          tzmsk   (%rdi), %eax
+
+// tzmsk  64 reg
+// CHECK: tzmsk   %rdi, %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0xe7]
+          tzmsk   %rdi, %rax
+
+// tzmsk  64 mem
+// CHECK: tzmsk   (%rdi), %rax
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x01,0x27]
+          tzmsk   (%rdi), %rax
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
index 6e14d62fda4c..a974233d2f49 100644
--- a/test/MC/X86/x86_errors.s
+++ b/test/MC/X86/x86_errors.s
@@ -28,3 +28,6 @@ lea (%rsp, %rbp, $4), %rax
 // rdar://10423777
 // 64: error: index register is 32-bit, but base register is 64-bit
 movq (%rsi,%ecx),%xmm0
+
+// 32: error: invalid operand for instruction
+outb al, 4
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index 396e3022ebec..059f591168d5 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -1,13 +1,36 @@
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=generic %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i386 %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i486 %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i586 %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium-mmx %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=geode %s | llvm-objdump -d - | FileCheck %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i686 %s | llvm-objdump -d - | not FileCheck %s
-
-# CHECK-NOT: nop{{[lw]}}
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=generic %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i386 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i486 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i586 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=pentium %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=pentium-mmx %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=geode %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=i686 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=k6 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=k6-2 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=k6-3 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=winchip-c6 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=winchip2 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=c3 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=c3-2 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-pc-linux -mcpu=core2 %s | llvm-objdump -d - | FileCheck --check-prefix=NOPL %s
+
+
 inc %eax
 .align 8
 inc %eax
+
+// CHECK: 0:	40                                           	incl	%eax
+// CHECK: 1:	90                                           	nop
+// CHECK: 2:	90                                           	nop
+// CHECK: 3:	90                                           	nop
+// CHECK: 4:	90                                           	nop
+// CHECK: 5:	90                                           	nop
+// CHECK: 6:	90                                           	nop
+// CHECK: 7:	90                                           	nop
+// CHECK: 8:	40                                           	incl	%eax
+
+
+// NOPL: 0:	40                                           	incl	%eax
+// NOPL: 1:	0f 1f 80 00 00 00 00                         	nopl	(%eax)
+// NOPL: 8:	40                                           	incl	%eax
diff --git a/test/Makefile b/test/Makefile
index 88573c552386..d3227dd5a347 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -31,16 +31,10 @@ endif
 
 ifdef TESTSUITE
 LIT_TESTSUITE := $(TESTSUITE)
-CLEANED_TESTSUITE := $(patsubst %/,%,$(TESTSUITE))
-CLEANED_TESTSUITE := $(patsubst test/%,%,$(CLEANED_TESTSUITE))
 else
 LIT_TESTSUITE := .
 endif
 
-ifdef VG
-VALGRIND := valgrind --tool=memcheck --quiet --trace-children=yes --error-exitcode=3 --leak-check=full $(VALGRIND_EXTRA_ARGS)
-endif
-
 # Check what to run for -all.
 LIT_ALL_TESTSUITES := $(LIT_TESTSUITE)
 
@@ -66,6 +60,15 @@ clang-tools-site-cfg: FORCE
 	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test lit.site.cfg
 extra-site-cfgs:: clang-tools-site-cfg
 endif
+
+ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/polly/Makefile && echo OK), OK)
+LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/polly/test
+
+# Force creation of Polly's lit.site.cfg.
+polly-tools-site-cfg: FORCE
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/polly/test lit.site.cfg
+extra-site-cfgs:: polly-tools-site-cfg
+endif
 endif
 endif
 
@@ -113,16 +116,6 @@ else
 ENABLE_ASSERTIONS=1
 endif
 
-# Derive whether or not LTO is enabled by checking the extra options.
-LTO_IS_ENABLED := 0
-ifneq ($(findstring -flto,$(CompileCommonOpts)),)
-LTO_IS_ENABLED := 1
-else
-ifneq ($(findstring -O4,$(CompileCommonOpts)),)
-LTO_IS_ENABLED := 1
-endif
-endif
-
 lit.site.cfg: FORCE
 	@echo "Making LLVM 'lit.site.cfg' file..."
 	@$(ECHOPATH) s=@LLVM_HOST_TRIPLE@=$(HOST_TRIPLE)=g > lit.tmp
@@ -133,10 +126,9 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=$(PYTHON)=g >> lit.tmp
-	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp
+	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -cclib -L$(LibDir) -I $(LibDir)/ocaml=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
-	@$(ECHOPATH) s=@LTO_IS_ENABLED@=$(LTO_IS_ENABLED)=g >> lit.tmp
 	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
 	@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> lit.tmp
diff --git a/test/Makefile.tests b/test/Makefile.tests
index c60c90c075d2..b2e53006bf08 100644
--- a/test/Makefile.tests
+++ b/test/Makefile.tests
@@ -47,18 +47,6 @@ clean::
 	$(RM) -f a.out core
 	$(RM) -rf Output/
 
-# Compile from X.c to Output/X.ll
-Output/%.ll: %.c $(LCC1) Output/.dir $(INCLUDES)
-	-$(LLVMCC) $(CPPFLAGS) $(LCCFLAGS) -S $< -o $@
-
-# Compile from X.cpp to Output/X.ll
-Output/%.ll: %.cpp $(LCC1XX) Output/.dir $(INCLUDES)
-	-$(LLVMCXX) $(CPPFLAGS) $(LCXXFLAGS) -S $< -o $@
-
-# Compile from X.cc to Output/X.ll
-Output/%.ll: %.cc $(LCC1XX) Output/.dir $(INCLUDES)
-	-$(LLVMCXX) $(CPPFLAGS) $(LCXXFLAGS) -S $< -o $@
-
 # LLVM Assemble from Output/X.ll to Output/X.bc.  Output/X.ll must have come
 # from GCC output, so use GCCAS.
 #
diff --git a/test/Object/ARM/macho-data-in-code.test b/test/Object/ARM/macho-data-in-code.test
new file mode 100644
index 000000000000..dca084c2caba
--- /dev/null
+++ b/test/Object/ARM/macho-data-in-code.test
@@ -0,0 +1,7 @@
+RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
+
+CHECK:      12:	80 bd                                        	pop	{r7, pc}
+
+CHECK:      14:	38 00 00 00                                  	.long 56	@ KIND_DATA
+CHECK:      16:	00 00                                        	movs	r0, r0
+
diff --git a/test/Object/Inputs/COFF/i386.yaml b/test/Object/Inputs/COFF/i386.yaml
index f7631821c192..1badad8d5915 100644
--- a/test/Object/Inputs/COFF/i386.yaml
+++ b/test/Object/Inputs/COFF/i386.yaml
@@ -12,17 +12,17 @@ sections:
     Relocations:
       - !Relocation
         VirtualAddress: 0xe
-        SymbolTableIndex: 5
+        SymbolName: L_.str
         Type: IMAGE_REL_I386_DIR32
 
       - !Relocation
         VirtualAddress: 0x13
-        SymbolTableIndex: 6
+        SymbolName: _puts
         Type: IMAGE_REL_I386_REL32
 
       - !Relocation
         VirtualAddress: 0x18
-        SymbolTableIndex: 7
+        SymbolName: _SomeOtherFunction
         Type: IMAGE_REL_I386_REL32
 
   - !Section
diff --git a/test/Object/Inputs/COFF/x86-64.yaml b/test/Object/Inputs/COFF/x86-64.yaml
index 5134071cda41..b775ae9cdfbd 100644
--- a/test/Object/Inputs/COFF/x86-64.yaml
+++ b/test/Object/Inputs/COFF/x86-64.yaml
@@ -11,17 +11,17 @@ sections:
     Relocations:
       - !Relocation
         VirtualAddress: 0xf
-        SymbolTableIndex: 5
+        SymbolName: L.str
         Type: IMAGE_REL_AMD64_REL32
 
       - !Relocation
         VirtualAddress: 0x14
-        SymbolTableIndex: 6
+        SymbolName: puts
         Type: IMAGE_REL_AMD64_REL32
 
       - !Relocation
         VirtualAddress: 0x19
-        SymbolTableIndex: 7
+        SymbolName: SomeOtherFunction
         Type: IMAGE_REL_AMD64_REL32
 
   - !Section
diff --git a/test/Object/Inputs/ELF/BE32.yaml b/test/Object/Inputs/ELF/BE32.yaml
new file mode 100644
index 000000000000..2a18d6fca7cb
--- /dev/null
+++ b/test/Object/Inputs/ELF/BE32.yaml
@@ -0,0 +1,6 @@
+!ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS32
+  Data: ELFDATA2MSB
+  Type: ET_EXEC
+  Machine: EM_PPC
diff --git a/test/Object/Inputs/ELF/BE64.yaml b/test/Object/Inputs/ELF/BE64.yaml
new file mode 100644
index 000000000000..091793f39529
--- /dev/null
+++ b/test/Object/Inputs/ELF/BE64.yaml
@@ -0,0 +1,6 @@
+!ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS64
+  Data: ELFDATA2MSB
+  Type: ET_EXEC
+  Machine: EM_PPC64
diff --git a/test/Object/Inputs/ELF/LE32.yaml b/test/Object/Inputs/ELF/LE32.yaml
new file mode 100644
index 000000000000..021fb0d93c2e
--- /dev/null
+++ b/test/Object/Inputs/ELF/LE32.yaml
@@ -0,0 +1,6 @@
+!ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS32
+  Data: ELFDATA2LSB
+  Type: ET_EXEC
+  Machine: EM_386
diff --git a/test/Object/Inputs/ELF/LE64.yaml b/test/Object/Inputs/ELF/LE64.yaml
new file mode 100644
index 000000000000..2fefc0b0c98f
--- /dev/null
+++ b/test/Object/Inputs/ELF/LE64.yaml
@@ -0,0 +1,6 @@
+!ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_EXEC
+  Machine: EM_X86_64
diff --git a/test/Archive/GNU.a b/test/Object/Inputs/GNU.a
index 4c09881eb39d..4c09881eb39d 100644
--- a/test/Archive/GNU.a
+++ b/test/Object/Inputs/GNU.a
diff --git a/test/Archive/IsNAN.o b/test/Object/Inputs/IsNAN.o
index 7b3a12a69d74..7b3a12a69d74 100644
--- a/test/Archive/IsNAN.o
+++ b/test/Object/Inputs/IsNAN.o
diff --git a/test/Archive/MacOSX.a b/test/Object/Inputs/MacOSX.a
index 8ba1e6d30e3d..8ba1e6d30e3d 100644
--- a/test/Archive/MacOSX.a
+++ b/test/Object/Inputs/MacOSX.a
diff --git a/test/Archive/SVR4.a b/test/Object/Inputs/SVR4.a
index 3947813ac60a..3947813ac60a 100644
--- a/test/Archive/SVR4.a
+++ b/test/Object/Inputs/SVR4.a
diff --git a/test/Object/Inputs/archive-test.a-corrupt-symbol-table b/test/Object/Inputs/archive-test.a-corrupt-symbol-table
new file mode 100644
index 000000000000..34e5ed799d3c
--- /dev/null
+++ b/test/Object/Inputs/archive-test.a-corrupt-symbol-table
diff --git a/test/Object/Inputs/archive-test.a-empty b/test/Object/Inputs/archive-test.a-empty
new file mode 100644
index 000000000000..8b277f0dd5dc
--- /dev/null
+++ b/test/Object/Inputs/archive-test.a-empty
@@ -0,0 +1 @@
+!<arch>
diff --git a/test/Object/Inputs/archive-test.a-gnu-minimal b/test/Object/Inputs/archive-test.a-gnu-minimal
new file mode 100644
index 000000000000..a243273c484e
--- /dev/null
+++ b/test/Object/Inputs/archive-test.a-gnu-minimal
@@ -0,0 +1,2 @@
+!<arch>
+test/           1372964340  1000  1000  100664  0         `
diff --git a/test/Object/Inputs/archive-test.a-gnu-no-symtab b/test/Object/Inputs/archive-test.a-gnu-no-symtab
new file mode 100644
index 000000000000..0a5b237a2910
--- /dev/null
+++ b/test/Object/Inputs/archive-test.a-gnu-no-symtab
@@ -0,0 +1,5 @@
+!<arch>
+//                                              24        `
+a-very-long-file-name/
+
+/0              1372864788  1000  1000  100664  0         `
diff --git a/test/Object/Inputs/coff_archive_short.lib b/test/Object/Inputs/coff_archive_short.lib
new file mode 100644
index 000000000000..41cb1c25d1ef
--- /dev/null
+++ b/test/Object/Inputs/coff_archive_short.lib
diff --git a/test/Object/Inputs/corrupt-version.elf-x86_64 b/test/Object/Inputs/corrupt-version.elf-x86_64
new file mode 100644
index 000000000000..1241a27a7bb6
--- /dev/null
+++ b/test/Object/Inputs/corrupt-version.elf-x86_64
diff --git a/test/Object/Inputs/corrupt.elf-x86-64 b/test/Object/Inputs/corrupt.elf-x86-64
new file mode 100644
index 000000000000..8ae5f1759746
--- /dev/null
+++ b/test/Object/Inputs/corrupt.elf-x86-64
diff --git a/test/Object/Inputs/elf-reloc-no-sym.x86_64 b/test/Object/Inputs/elf-reloc-no-sym.x86_64
new file mode 100755
index 000000000000..a41b4e0d78d9
--- /dev/null
+++ b/test/Object/Inputs/elf-reloc-no-sym.x86_64
diff --git a/test/Archive/evenlen b/test/Object/Inputs/evenlen
index 59ee8d552e37..59ee8d552e37 100644
--- a/test/Archive/evenlen
+++ b/test/Object/Inputs/evenlen
diff --git a/test/Object/Inputs/macho-data-in-code.macho-thumbv7 b/test/Object/Inputs/macho-data-in-code.macho-thumbv7
new file mode 100644
index 000000000000..57649302dd94
--- /dev/null
+++ b/test/Object/Inputs/macho-data-in-code.macho-thumbv7
diff --git a/test/Object/Inputs/macho-universal.x86_64.i386 b/test/Object/Inputs/macho-universal.x86_64.i386
new file mode 100755
index 000000000000..36d5fc29d681
--- /dev/null
+++ b/test/Object/Inputs/macho-universal.x86_64.i386
diff --git a/test/Archive/oddlen b/test/Object/Inputs/oddlen
index 8cf5bd181b1b..8cf5bd181b1b 100644
--- a/test/Archive/oddlen
+++ b/test/Object/Inputs/oddlen
diff --git a/test/Object/Inputs/program-headers.mips b/test/Object/Inputs/program-headers.mips
new file mode 100755
index 000000000000..54ebfea31f0c
--- /dev/null
+++ b/test/Object/Inputs/program-headers.mips
diff --git a/test/Object/Inputs/trivial-executable-test.macho-x86-64 b/test/Object/Inputs/trivial-executable-test.macho-x86-64
new file mode 100755
index 000000000000..50a6bab64c47
--- /dev/null
+++ b/test/Object/Inputs/trivial-executable-test.macho-x86-64
diff --git a/test/Object/Inputs/trivial-object-test2.elf-x86-64 b/test/Object/Inputs/trivial-object-test2.elf-x86-64
new file mode 100644
index 000000000000..9124518bd92e
--- /dev/null
+++ b/test/Object/Inputs/trivial-object-test2.elf-x86-64
diff --git a/test/Archive/very_long_bytecode_file_name.bc b/test/Object/Inputs/very_long_bytecode_file_name.bc
index f7fce249020a..f7fce249020a 100644
--- a/test/Archive/very_long_bytecode_file_name.bc
+++ b/test/Object/Inputs/very_long_bytecode_file_name.bc
diff --git a/test/Object/Inputs/weak-global-symbol.macho-i386 b/test/Object/Inputs/weak-global-symbol.macho-i386
new file mode 100644
index 000000000000..a9c8e0cde156
--- /dev/null
+++ b/test/Object/Inputs/weak-global-symbol.macho-i386
diff --git a/test/Archive/xpg4.a b/test/Object/Inputs/xpg4.a
index b2bdb51188fe..b2bdb51188fe 100644
--- a/test/Archive/xpg4.a
+++ b/test/Object/Inputs/xpg4.a
diff --git a/test/Object/Mips/lit.local.cfg b/test/Object/Mips/lit.local.cfg
index 149931749822..88262fb1d323 100644
--- a/test/Object/Mips/lit.local.cfg
+++ b/test/Object/Mips/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
diff --git a/test/Object/X86/lit.local.cfg b/test/Object/X86/lit.local.cfg
index 6a29e9250f3c..ba763cf03ffc 100644
--- a/test/Object/X86/lit.local.cfg
+++ b/test/Object/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.test']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Object/X86/objdump-cfg-invalid-opcode.yaml b/test/Object/X86/objdump-cfg-invalid-opcode.yaml
new file mode 100644
index 000000000000..56ab1d274eef
--- /dev/null
+++ b/test/Object/X86/objdump-cfg-invalid-opcode.yaml
@@ -0,0 +1,58 @@
+# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
+# REQUIRES: shell
+#
+# Generated from:
+# main:
+# .LBL0_1:
+# 	movq	8(%rsi), %rax
+# 	<invalid opcode: 06>
+# 	nop
+
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: "488B46080690"
+
+## 0000000000000000 <main>:
+
+#CFG: Atoms:
+#CFG:   - StartAddress:    0x0000000000000000
+#CFG:     Size:            4
+#CFG:     Type:            Text
+
+##    0:   48 8b 46 08             mov    0x8(%rsi),%rax
+#CFG:       - Inst:            MOV64rm
+#CFG:         Size:            4
+#CFG:         Ops:             [ RRAX, RRSI, I1, R, I8, R ]
+
+
+#CFG:   - StartAddress:    0x0000000000000004
+#CFG:     Size:            1
+#CFG:     Type:            Data
+
+##    4:   06                      (bad)
+#CFG:     Content:         06
+
+#CFG:   - StartAddress:    0x0000000000000005
+#CFG:     Size:            1
+#CFG:     Type:            Text
+
+##    5:   90                      nop
+#CFG:       - Inst:            NOOP
+#CFG:         Size:            1
+#CFG:         Ops:             [  ]
+
+Symbols:
+  Global:
+    - Name: main
+      Type: STT_FUNC
+      Section: .text
+      Value: 0x0
+      Size: 6
diff --git a/test/Object/X86/objdump-cfg-textatomsize.yaml b/test/Object/X86/objdump-cfg-textatomsize.yaml
new file mode 100644
index 000000000000..87cb4e13ec1e
--- /dev/null
+++ b/test/Object/X86/objdump-cfg-textatomsize.yaml
@@ -0,0 +1,39 @@
+# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
+# REQUIRES: shell
+#
+# Generated from:
+# main:
+# .LBL0_1:
+# 	jmp	.LBL0_1
+#
+
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: "EBFE"
+
+## 0000000000000000 <main>:
+
+#CFG: Atoms:
+#CFG:   - StartAddress:    0x0000000000000000
+#CFG:     Size:            2
+
+##    0:   eb fe          jmp $-2
+#CFG:       - Inst:            JMP_1
+#CFG:         Size:            2
+#CFG:         Ops:             [ I-2 ]
+
+Symbols:
+  Global:
+    - Name: main
+      Type: STT_FUNC
+      Section: .text
+      Value: 0x0
+      Size: 2
diff --git a/test/Object/X86/objdump-cfg.yaml b/test/Object/X86/objdump-cfg.yaml
new file mode 100644
index 000000000000..c5bff03c1d0c
--- /dev/null
+++ b/test/Object/X86/objdump-cfg.yaml
@@ -0,0 +1,86 @@
+# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
+# REQUIRES: shell
+#
+# Generated from:
+# main:
+# 	movl	$48, %eax
+# 	cmpl	$3, %edi
+# 	jl	.LBB0_2
+# 	movq	8(%rsi), %rax
+# 	movsbl	(%rax), %eax
+# .LBB0_2:
+# 	ret
+#
+
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: "B83000000083FF037C07488B46080FBE00C3"
+
+## 0000000000000000 <main>:
+
+#CFG: Atoms:
+#CFG:   - StartAddress:    0x0000000000000000
+#CFG:     Size:            10
+
+##    0:   b8 30 00 00 00          mov    $0x30,%eax
+#CFG:       - Inst:            MOV32ri
+#CFG:         Size:            5
+#CFG:         Ops:             [ REAX, I48 ]
+
+##    5:   83 ff 03                cmp    $0x3,%edi
+#CFG:       - Inst:            CMP32ri8
+#CFG:         Size:            3
+#CFG:         Ops:             [ REDI, I3 ]
+
+##    8:   7c 07                   jl     11 <main+0x11>
+#CFG:       - Inst:            JL_1
+#CFG:         Size:            2
+#CFG:         Ops:             [ I7 ]
+
+#CFG:   - StartAddress:    0x000000000000000A
+#CFG:     Size:            7
+
+##    a:   48 8b 46 08             mov    0x8(%rsi),%rax
+#CFG:       - Inst:            MOV64rm
+#CFG:         Size:            4
+#CFG:         Ops:             [ RRAX, RRSI, I1, R, I8, R ]
+
+##    e:   0f be 00                movsbl (%rax),%eax
+#CFG:       - Inst:            MOVSX32rm8
+#CFG:         Size:            3
+#CFG:         Ops:             [ REAX, RRAX, I1, R, I0, R ]
+#CFG:   - StartAddress:    0x0000000000000011
+#CFG:     Size:            1
+
+##   11:   c3                      retq
+#CFG:       - Inst:            RET
+#CFG:         Size:            1
+#CFG:         Ops:             [  ]
+
+Symbols:
+  Global:
+    - Name: main
+      Type: STT_FUNC
+      Section: .text
+      Value: 0x0
+      Size: 18
+
+#CFG: Functions:
+#CFG:     BasicBlocks:
+#CFG:       - Address:         0x0000000000000000
+#CFG:         Preds:           [  ]
+#CFG:         Succs:           [ 0x0000000000000011, 0x000000000000000A ]
+#CFG:       - Address:         0x0000000000000011
+#CFG:         Preds:           [ 0x0000000000000000, 0x000000000000000A ]
+#CFG:         Succs:           [  ]
+#CFG:       - Address:         0x000000000000000A
+#CFG:         Preds:           [ 0x0000000000000000 ]
+#CFG:         Succs:           [ 0x0000000000000011 ]
diff --git a/test/Object/X86/objdump-disassembly-inline-relocations.test b/test/Object/X86/objdump-disassembly-inline-relocations.test
index a5875f6a2f96..2ef1a435d7d4 100644
--- a/test/Object/X86/objdump-disassembly-inline-relocations.test
+++ b/test/Object/X86/objdump-disassembly-inline-relocations.test
@@ -2,6 +2,10 @@ RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.coff-i386 \
 RUN:              | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.coff-x86-64 \
 RUN:              | FileCheck %s -check-prefix COFF-x86-64
+RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.macho-i386 \
+RUN:              | FileCheck %s -check-prefix MACHO-i386
+RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.macho-x86-64 \
+RUN:              | FileCheck %s -check-prefix MACHO-x86-64
 
 COFF-i386: file format COFF-i386
 COFF-i386: Disassembly of section .text:
@@ -30,3 +34,34 @@ COFF-x86-64:                              19: IMAGE_REL_AMD64_REL32 SomeOtherFun
 COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
 COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
 COFF-x86-64:       25:       c3                                              ret
+
+MACHO-i386: file format Mach-O 32-bit i386
+MACHO-i386: Disassembly of section __TEXT,__text:
+MACHO-i386: _main:
+MACHO-i386:        0:       83 ec 0c                                        subl    $12, %esp
+MACHO-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+MACHO-i386:        b:       c7 04 24 24 00 00 00                            movl    $36, (%esp)
+MACHO-i386:                                e: GENERIC_RELOC_VANILLA __cstring
+MACHO-i386:       12:       e8 1f 00 00 00                                  calll   31
+MACHO-i386:                               13: GENERIC_RELOC_VANILLA __jump_table
+MACHO-i386:       17:       e8 15 00 00 00                                  calll   21
+MACHO-i386:                               18: GENERIC_RELOC_VANILLA __jump_table
+MACHO-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+MACHO-i386:       20:       83 c4 0c                                        addl    $12, %esp
+MACHO-i386:       23:       c3                                              ret
+
+MACHO-x86-64: file format Mach-O 64-bit x86-64
+MACHO-x86-64: Disassembly of section __TEXT,__text:
+MACHO-x86-64: _main:
+MACHO-x86-64:        0:       48 83 ec 08                                     subq    $8, %rsp
+MACHO-x86-64:        4:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
+MACHO-x86-64:        c:       48 8d 3d 00 00 00 00                            leaq    (%rip), %rdi
+MACHO-x86-64:                                f: X86_64_RELOC_SIGNED   L_.str
+MACHO-x86-64:       13:       e8 00 00 00 00                                  callq   0
+MACHO-x86-64:                               14: X86_64_RELOC_BRANCH   _puts
+MACHO-x86-64:       18:       30 c0                                           xorb    %al, %al
+MACHO-x86-64:       1a:       e8 00 00 00 00                                  callq   0
+MACHO-x86-64:                               1b: X86_64_RELOC_BRANCH   _SomeOtherFunction
+MACHO-x86-64:       1f:       8b 44 24 04                                     movl    4(%rsp), %eax
+MACHO-x86-64:       23:       48 83 c4 08                                     addq    $8, %rsp
+MACHO-x86-64:       27:       c3                                              ret
diff --git a/test/Object/X86/objdump-disassembly-symbolic.test b/test/Object/X86/objdump-disassembly-symbolic.test
new file mode 100644
index 000000000000..858653e95ebc
--- /dev/null
+++ b/test/Object/X86/objdump-disassembly-symbolic.test
@@ -0,0 +1,48 @@
+RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-x86-64
+RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.macho-x86-64 \
+RUN:              | FileCheck %s -check-prefix MACHO-x86-64
+
+# Generate this using:
+#   ld trivial-object-test.macho-x86-64 -undefined dynamic_lookup
+RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-executable-test.macho-x86-64 \
+RUN:              | FileCheck %s -check-prefix MACHO-STUBS-x86-64
+
+ELF-x86-64: file format ELF64-x86-64
+ELF-x86-64: Disassembly of section .text:
+ELF-x86-64: main:
+ELF-x86-64:        0:	48 83 ec 08                                  	subq	$8, %rsp
+ELF-x86-64:        4:	c7 44 24 04 00 00 00 00                      	movl	$0, 4(%rsp)
+ELF-x86-64:        c:	bf 00 00 00 00                               	movl	$.rodata.str1.1, %edi
+ELF-x86-64:       11:	e8 00 00 00 00                               	callq	puts-4
+ELF-x86-64:       16:	30 c0                                        	xorb	%al, %al
+ELF-x86-64:       18:	e8 00 00 00 00                               	callq	SomeOtherFunction-4
+ELF-x86-64:       1d:	8b 44 24 04                                  	movl	4(%rsp), %eax
+ELF-x86-64:       21:	48 83 c4 08                                  	addq	$8, %rsp
+ELF-x86-64:       25:	c3                                           	ret
+
+MACHO-x86-64: file format Mach-O 64-bit x86-64
+MACHO-x86-64: Disassembly of section __TEXT,__text:
+MACHO-x86-64: _main:
+MACHO-x86-64:        0:	48 83 ec 08                                  	subq	$8, %rsp
+MACHO-x86-64:        4:	c7 44 24 04 00 00 00 00                      	movl	$0, 4(%rsp)
+MACHO-x86-64:        c:	48 8d 3d 00 00 00 00                         	leaq	L_.str(%rip), %rdi ## literal pool for: Hello World!
+MACHO-x86-64:       13:	e8 00 00 00 00                               	callq	_puts
+MACHO-x86-64:       18:	30 c0                                        	xorb	%al, %al
+MACHO-x86-64:       1a:	e8 00 00 00 00                               	callq	_SomeOtherFunction
+MACHO-x86-64:       1f:	8b 44 24 04                                  	movl	4(%rsp), %eax
+MACHO-x86-64:       23:	48 83 c4 08                                  	addq	$8, %rsp
+MACHO-x86-64:       27:	c3                                           	ret
+
+MACHO-STUBS-x86-64: file format Mach-O 64-bit x86-64
+MACHO-STUBS-x86-64: Disassembly of section __TEXT,__text:
+MACHO-STUBS-x86-64: _main:
+MACHO-STUBS-x86-64:     1f90:       48 83 ec 08                                     subq    $8, %rsp
+MACHO-STUBS-x86-64:     1f94:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
+MACHO-STUBS-x86-64:     1f9c:       48 8d 3d 45 00 00 00                            leaq    69(%rip), %rdi ## literal pool for: Hello World!
+MACHO-STUBS-x86-64:     1fa3:       e8 16 00 00 00                                  callq   puts
+MACHO-STUBS-x86-64:     1fa8:       30 c0                                           xorb    %al, %al
+MACHO-STUBS-x86-64:     1faa:       e8 09 00 00 00                                  callq   SomeOtherFunction
+MACHO-STUBS-x86-64:     1faf:       8b 44 24 04                                     movl    4(%rsp), %eax
+MACHO-STUBS-x86-64:     1fb3:       48 83 c4 08                                     addq    $8, %rsp
+MACHO-STUBS-x86-64:     1fb7:       c3                                              ret
diff --git a/test/Object/ar-create.test b/test/Object/ar-create.test
new file mode 100644
index 000000000000..95d994e06a48
--- /dev/null
+++ b/test/Object/ar-create.test
@@ -0,0 +1,17 @@
+Test which operations create an archive and which don't.
+
+RUN: touch %t
+RUN: rm -f %t.foo.a
+RUN: not llvm-ar p %t.foo.a %t 2>&1 | FileCheck %s
+RUN: not llvm-ar d %t.foo.a %t 2>&1 | FileCheck %s
+RUN: not llvm-ar m %t.foo.a %t 2>&1 | FileCheck %s
+RUN: not llvm-ar t %t.foo.a %t 2>&1 | FileCheck %s
+RUN: not llvm-ar x %t.foo.a %t 2>&1 | FileCheck %s
+
+RUN: llvm-ar q %t.foo.a %t 2>&1 | FileCheck --check-prefix=CREATE %s
+RUN: rm -f %t.foo.a
+RUN: llvm-ar r %t.foo.a %t 2>&1 | FileCheck --check-prefix=CREATE %s
+RUN: rm -f %t.foo.a
+
+CHECK: llvm-ar{{(.exe|.EXE)?}}: error loading '{{[^']+}}.foo.a':
+CREATE: creating {{.*}}.foo.a
diff --git a/test/Object/archive-delete.test b/test/Object/archive-delete.test
new file mode 100644
index 000000000000..552b0e709f61
--- /dev/null
+++ b/test/Object/archive-delete.test
@@ -0,0 +1,30 @@
+Test the 'd' operation in llvm-ar
+
+REQUIRES: shell
+
+RUN: cd %T
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/GNU.a %t.a
+RUN: llvm-ar d %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/MacOSX.a %t.a
+RUN: llvm-ar d %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/SVR4.a %t.a
+RUN: llvm-ar d %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/xpg4.a %t.a
+RUN: llvm-ar d %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+
+CHECK: evenlen
+CHECK-NEXT: oddlen
+CHECK-NEXT: IsNAN.o
diff --git a/test/Object/archive-error-tmp.txt b/test/Object/archive-error-tmp.txt
new file mode 100644
index 000000000000..061898655b6c
--- /dev/null
+++ b/test/Object/archive-error-tmp.txt
@@ -0,0 +1,9 @@
+REQUIRES: shell
+
+Test that no temporary file is left behind on error.
+
+RUN: rm -rf %t
+RUN: mkdir %t
+RUN: not llvm-ar rc %t/foo.a .
+RUN: rmdir %t
+
diff --git a/test/Object/archive-extract-dir.test b/test/Object/archive-extract-dir.test
new file mode 100644
index 000000000000..c718f90275b6
--- /dev/null
+++ b/test/Object/archive-extract-dir.test
@@ -0,0 +1,13 @@
+REQUIRES: shell
+
+RUN: mkdir -p %t
+RUN: cd %t
+RUN: rm -rf foo
+RUN: echo foo > foo
+RUN: rm -f test.a
+RUN: llvm-ar rc          test.a foo
+RUN: rm foo
+RUN: mkdir foo
+RUN: not llvm-ar x test.a foo 2>&1 | FileCheck %s
+
+CHECK: foo: Is a directory
diff --git a/test/Object/archive-format.test b/test/Object/archive-format.test
new file mode 100644
index 000000000000..20ac1a0d78fb
--- /dev/null
+++ b/test/Object/archive-format.test
@@ -0,0 +1,22 @@
+Test the exact archive format. In particular, test which file names use the
+string table or not.
+
+REQUIRES: shell
+
+RUN: mkdir -p %t
+RUN: cd %t
+
+RUN: echo bar > 0123456789abcde
+RUN: echo zed > 0123456789abcdef
+
+RUN: rm -f test.a
+RUN: llvm-ar rc test.a 0123456789abcde 0123456789abcdef
+RUN: cat test.a | FileCheck -strict-whitespace %s
+
+CHECK:      !<arch>
+CHECK-NEXT: //                                              18        `
+CHECK-NEXT: 0123456789abcdef/
+CHECK-NEXT: 0123456789abcde/{{................................}}4         `
+CHECK-NEXT: bar
+CHECK-NEXT: /0              {{................................}}4         `
+CHECK-NEXT: zed
diff --git a/test/Object/archive-move.test b/test/Object/archive-move.test
new file mode 100644
index 000000000000..0378e9111a96
--- /dev/null
+++ b/test/Object/archive-move.test
@@ -0,0 +1,50 @@
+Test the 'm' operation in llvm-ar
+
+REQUIRES: shell
+
+RUN: cd %T
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/GNU.a %t.a
+RUN: llvm-ar m %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/MacOSX.a %t.a
+RUN: llvm-ar m %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/SVR4.a %t.a
+RUN: llvm-ar m %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/xpg4.a %t.a
+RUN: llvm-ar m %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck %s
+
+CHECK: evenlen
+CHECK-NEXT: oddlen
+CHECK-NEXT: IsNAN.o
+CHECK-NEXT: very_long_bytecode_file_name.bc
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/GNU.a %t.a
+RUN: llvm-ar mb evenlen %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck --check-prefix=BEFORE %s
+
+BEFORE: very_long_bytecode_file_name.bc
+BEFORE-NEXT: evenlen
+BEFORE-NEXT: oddlen
+BEFORE-NEXT: IsNAN.o
+
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/GNU.a %t.a
+RUN: llvm-ar ma evenlen %t.a very_long_bytecode_file_name.bc
+RUN: llvm-ar t %t.a | FileCheck --check-prefix=AFTER %s
+
+AFTER: evenlen
+AFTER-NEXT: very_long_bytecode_file_name.bc
+AFTER-NEXT: oddlen
+AFTER-NEXT: IsNAN.o
diff --git a/test/Object/archive-replace-pos.test b/test/Object/archive-replace-pos.test
new file mode 100644
index 000000000000..0acead69a1e2
--- /dev/null
+++ b/test/Object/archive-replace-pos.test
@@ -0,0 +1,30 @@
+Test adding a member to a particular position
+
+RUN: touch %t.foo
+RUN: touch %t.bar
+RUN: rm -f %t.a
+RUN: llvm-ar rc %t.a %t.foo %t.bar
+RUN: touch %t.zed
+RUN: llvm-ar rca %t.foo %t.a %t.zed
+RUN: llvm-ar t %t.a | FileCheck %s
+
+CHECK: .foo
+CHECK-NEXT: .zed
+CHECK-NEXT: .bar
+
+RUN: rm -f %t.a
+RUN: llvm-ar rc %t.a %t.zed %t.foo %t.bar
+RUN: llvm-ar t %t.a | FileCheck --check-prefix=CHECK2 %s
+
+CHECK2: .zed
+CHECK2-NEXT: .foo
+CHECK2-NEXT: .bar
+
+RUN: llvm-ar rca %t.foo %t.a %t.zed
+RUN: llvm-ar t %t.a | FileCheck --check-prefix=CHECK3 %s
+CHECK3: .foo
+CHECK3-NEXT: .zed
+CHECK3-NEXT: .bar
+
+RUN: llvm-ar rc %t.a %t.zed
+RUN: llvm-ar t %t.a | FileCheck --check-prefix=CHECK3 %s
diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test
new file mode 100644
index 000000000000..6379504318a6
--- /dev/null
+++ b/test/Object/archive-symtab.test
@@ -0,0 +1,59 @@
+RUN: rm -f %t.a
+RUN: llvm-ar rcs %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
+RUN: llvm-nm -s %t.a | FileCheck %s
+
+CHECK: Archive map
+CHECK-NEXT: main in trivial-object-test.elf-x86-64
+CHECK-NEXT: foo in trivial-object-test2.elf-x86-64
+CHECK-NEXT: main in trivial-object-test2.elf-x86-64
+CHECK-NOT: bar
+
+CHECK: trivial-object-test.elf-x86-64:
+CHECK-NEXT:         U SomeOtherFunction
+CHECK-NEXT: 00000000 T main
+CHECK-NEXT:         U puts
+CHECK-NEXT: trivial-object-test2.elf-x86-64:
+CHECK-NEXT: 00000000 t bar
+CHECK-NEXT: 00000006 T foo
+CHECK-NEXT: 00000016 T main
+
+RUN: rm -f %t.a
+RUN: llvm-ar rcS %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
+RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=NOMAP
+
+NOMAP-NOT: Archive map
+
+RUN: llvm-ar s %t.a
+RUN: llvm-nm -s %t.a | FileCheck %s
+
+check that the archive does have a corrupt symbol table.
+RUN: rm -f %t.a
+RUN: cp %p/Inputs/archive-test.a-corrupt-symbol-table %t.a
+RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=CORRUPT
+
+CORRUPT: Archive map
+CORRUPT-NEXT: mbin in trivial-object-test.elf-x86-64
+CORRUPT-NEXT: foo in trivial-object-test2.elf-x86-64
+CORRUPT-NEXT: main in trivial-object-test2.elf-x86-64
+
+CORRUPT: trivial-object-test.elf-x86-64:
+CORRUPT-NEXT:         U SomeOtherFunction
+CORRUPT-NEXT: 00000000 T main
+CORRUPT-NEXT:         U puts
+CORRUPT-NEXT: trivial-object-test2.elf-x86-64:
+CORRUPT-NEXT: 00000000 t bar
+CORRUPT-NEXT: 00000006 T foo
+CORRUPT-NEXT: 00000016 T main
+
+check that the we *don't* update the symbol table.
+RUN: llvm-ar s %t.a
+RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=CORRUPT
+
+repeate the test with llvm-ranlib
+
+RUN: rm -f %t.a
+RUN: llvm-ar rcS %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
+RUN: llvm-nm -s %t.a | FileCheck %s --check-prefix=NOMAP
+
+RUN: llvm-ranlib %t.a
+RUN: llvm-nm -s %t.a | FileCheck %s
diff --git a/test/Object/archive-toc.test b/test/Object/archive-toc.test
new file mode 100644
index 000000000000..0a5e72b61dce
--- /dev/null
+++ b/test/Object/archive-toc.test
@@ -0,0 +1,28 @@
+Test reading an archive created by gnu ar
+RUN: env TZ=GMT llvm-ar tv %p/Inputs/GNU.a | FileCheck %s --check-prefix=GNU -strict-whitespace
+
+GNU:      rw-r--r-- 500/500      8 Nov 19 02:57 2004 evenlen
+GNU-NEXT: rw-r--r-- 500/500      7 Nov 19 02:57 2004 oddlen
+GNU-NEXT: rwxr-xr-x 500/500   1465 Nov 19 03:01 2004 very_long_bytecode_file_name.bc
+GNU-NEXT: rw-r--r-- 500/500   2280 Nov 19 03:04 2004 IsNAN.o
+
+
+Test reading an archive createdy by Mac OS X ar
+RUN: env TZ=GMT llvm-ar tv %p/Inputs/MacOSX.a | FileCheck %s --check-prefix=OSX -strict-whitespace
+
+OSX-NOT: __.SYMDEF
+OSX:      rw-r--r-- 501/501      8 Nov 19 02:57 2004 evenlen
+OSX-NEXT: rw-r--r-- 501/501      8 Nov 19 02:57 2004 oddlen
+OSX-NEXT: rw-r--r-- 502/502   1465 Feb  4 06:59 2010 very_long_bytecode_file_name.bc
+OSX-NEXT: rw-r--r-- 501/501   2280 Nov 19 04:32 2004 IsNAN.o
+
+Test reading an archive created on Solaris by /usr/ccs/bin/ar
+RUN: env TZ=GMT llvm-ar tv %p/Inputs/SVR4.a | FileCheck %s -strict-whitespace
+
+Test reading an archive created on Solaris by /usr/xpg4/bin/ar
+RUN: env TZ=GMT llvm-ar tv %p/Inputs/xpg4.a | FileCheck %s -strict-whitespace
+
+CHECK:      rw-r--r-- 1002/102      8 Nov 19 03:24 2004 evenlen
+CHECK-NEXT: rw-r--r-- 1002/102      7 Nov 19 03:24 2004 oddlen
+CHECK-NEXT: rwxr-xr-x 1002/102   1465 Nov 19 03:24 2004 very_long_bytecode_file_name.bc
+CHECK-NEXT: rw-r--r-- 1002/102   2280 Nov 19 03:24 2004 IsNAN.o
diff --git a/test/Object/archive-update.test b/test/Object/archive-update.test
new file mode 100644
index 000000000000..20286d2669ad
--- /dev/null
+++ b/test/Object/archive-update.test
@@ -0,0 +1,37 @@
+Test the 'u' option of llvm-ar
+
+REQUIRES: shell
+
+RUN: cd %T
+RUN: rm -f %t.a
+
+Create a file named evenlen that is newer than the evenlen on the source dir.
+RUN: mkdir -p %t.older
+RUN: echo older > %t.older/evenlen
+
+Either the shell supports the 'touch' command with a flag to manually set the
+mtime or we sleep for over a second so that the mtime is definitely observable.
+RUN: touch -m -t 200001010000 %t.older/evenlen || sleep 1.1
+
+RUN: mkdir -p %t.newer
+RUN: echo newer > %t.newer/evenlen
+RUN: touch %t.newer/evenlen
+
+Create an achive with the newest file
+RUN: llvm-ar r %t.a %t.newer/evenlen
+RUN: llvm-ar p %t.a | FileCheck --check-prefix=NEWER %s
+
+Check that without the 'u' option the member is replaced with an older file.
+RUN: llvm-ar r %t.a %t.older/evenlen
+RUN: llvm-ar p %t.a | FileCheck --check-prefix=OLDER %s
+
+Check that with the 'u' option the member is replaced with a newer file.
+RUN: llvm-ar ru %t.a %t.newer/evenlen
+RUN: llvm-ar p %t.a | FileCheck --check-prefix=NEWER %s
+
+Check that with the 'u' option the member is not replaced with an older file.
+RUN: llvm-ar ru %t.a %t.older/evenlen
+RUN: llvm-ar p %t.a | FileCheck --check-prefix=NEWER %s
+
+NEWER: newer
+OLDER: older
diff --git a/test/Object/check_binary_output.ll b/test/Object/check_binary_output.ll
new file mode 100644
index 000000000000..567f18e65ba5
--- /dev/null
+++ b/test/Object/check_binary_output.ll
@@ -0,0 +1,4 @@
+; This is not an assembly file, this is just to run the test.
+; The test verifies that llvm-ar produces a binary output.
+
+;RUN: llvm-ar p %p/Inputs/GNU.a very_long_bytecode_file_name.bc | cmp -s %p/Inputs/very_long_bytecode_file_name.bc -
diff --git a/test/Object/coff-archive-short.test b/test/Object/coff-archive-short.test
new file mode 100644
index 000000000000..fa531b3b6314
--- /dev/null
+++ b/test/Object/coff-archive-short.test
@@ -0,0 +1,26 @@
+#
+# Check if the index is appearing properly in the output file
+#
+# coff_archive_short.lib does not have member whose name is longer
+# than 15 characters, thus, unlike coff_archive.lib, it has no string
+# table as the third member.
+#
+RUN: llvm-nm --numeric-sort -s %p/Inputs/coff_archive_short.lib | FileCheck -check-prefix=CHECKIDX %s
+
+CHECKIDX: Archive map
+CHECKIDX: _shortfn1 in short1.obj
+CHECKIDX: _shortfn2 in short2.obj
+CHECKIDX: short1.obj:
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 i .drectve
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 T _shortfn1
+CHECKIDX: 00000001 a @feat.00
+CHECKIDX: 00aa9d1b a @comp.id
+CHECKIDX: short2.obj:
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 i .drectve
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 T _shortfn2
+CHECKIDX: 00000001 a @feat.00
+CHECKIDX: 00aa9d1b a @comp.id
diff --git a/test/Object/corrupt.test b/test/Object/corrupt.test
new file mode 100644
index 000000000000..ef72a0979b52
--- /dev/null
+++ b/test/Object/corrupt.test
@@ -0,0 +1,24 @@
+// Section name offset overflows section name string table.
+RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections \
+RUN:     2>&1 | FileCheck --check-prefix=SECNAME %s
+
+// Section data offset past end of file.
+RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections -section-data \
+RUN:     2>&1 | FileCheck --check-prefix=SECDATA %s
+
+// Symbol name offset overflows string table.
+RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -symbols \
+RUN:     2>&1 | FileCheck --check-prefix=SYMNAME %s
+
+// Version index in .gnu.version overflows the version map.
+RUN: not llvm-readobj %p/Inputs/corrupt-version.elf-x86_64 -dt \
+RUN:     2>&1 | FileCheck --check-prefix=VER %s
+
+SECNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+SECDATA: Error reading file: Invalid data was encountered while parsing the file.
+SECDATA: Error reading file: Invalid data was encountered while parsing the file.
+
+SYMNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+VER: Error reading file: Invalid data was encountered while parsing the file.
diff --git a/test/Object/directory.ll b/test/Object/directory.ll
new file mode 100644
index 000000000000..48eefcb6ecb8
--- /dev/null
+++ b/test/Object/directory.ll
@@ -0,0 +1,13 @@
+;RUN: rm -f %T/test.a
+;RUN: not llvm-ar r %T/test.a . 2>&1 | FileCheck %s
+;CHECK: .: Is a directory
+
+;RUN: rm -f %T/test.a
+;RUN: touch %T/a-very-long-file-name
+;RUN: llvm-ar r %T/test.a %s %T/a-very-long-file-name
+;RUN: llvm-ar r %T/test.a %T/a-very-long-file-name
+;RUN: llvm-ar t %T/test.a | FileCheck -check-prefix=MEMBERS %s
+;MEMBERS-NOT: /
+;MEMBERS: directory.ll
+;MEMBERS: a-very-long-file-name
+;MEMBERS-NOT: a-very-long-file-name
diff --git a/test/Object/elf-reloc-no-sym.test b/test/Object/elf-reloc-no-sym.test
new file mode 100644
index 000000000000..eabde0cc7348
--- /dev/null
+++ b/test/Object/elf-reloc-no-sym.test
@@ -0,0 +1,7 @@
+RUN: llvm-readobj -r %p/Inputs/elf-reloc-no-sym.x86_64 | FileCheck %s
+
+CHECK:      Relocations [
+CHECK-NEXT:   Section (1) .rela.plt {
+CHECK-NEXT:     0x4011D8 R_X86_64_IRELATIVE - 0x400120
+CHECK-NEXT:   }
+CHECK-NEXT: ]
diff --git a/test/Object/extract.ll b/test/Object/extract.ll
new file mode 100644
index 000000000000..4e519aea7505
--- /dev/null
+++ b/test/Object/extract.ll
@@ -0,0 +1,46 @@
+; This isn't really an assembly file, its just here to run the test.
+
+; This test just makes sure that llvm-ar can extract bytecode members
+; from various style archives.
+
+; REQUIRES: shell
+
+; RUN: cd %T
+
+; RUN: rm -f very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/Inputs/GNU.a very_long_bytecode_file_name.bc | \
+; RUN:   cmp -s %p/Inputs/very_long_bytecode_file_name.bc -
+; RUN: llvm-ar x %p/Inputs/GNU.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/Inputs/very_long_bytecode_file_name.bc \
+; RUN:        very_long_bytecode_file_name.bc
+
+; RUN: rm -f very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/Inputs/MacOSX.a very_long_bytecode_file_name.bc | \
+; RUN:   cmp -s %p/Inputs/very_long_bytecode_file_name.bc -
+; RUN: llvm-ar x %p/Inputs/MacOSX.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/Inputs/very_long_bytecode_file_name.bc \
+; RUN:        very_long_bytecode_file_name.bc
+
+; RUN: rm -f very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/Inputs/SVR4.a very_long_bytecode_file_name.bc | \
+; RUN:   cmp -s %p/Inputs/very_long_bytecode_file_name.bc -
+; RUN: llvm-ar x %p/Inputs/SVR4.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/Inputs/very_long_bytecode_file_name.bc \
+; RUN:        very_long_bytecode_file_name.bc
+
+; RUN: rm -f very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/Inputs/xpg4.a very_long_bytecode_file_name.bc |\
+; RUN:   cmp -s %p/Inputs/very_long_bytecode_file_name.bc -
+; RUN: llvm-ar x %p/Inputs/xpg4.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/Inputs/very_long_bytecode_file_name.bc \
+; RUN:        very_long_bytecode_file_name.bc
+
+
+; Test that the 'o' option is working by extracting a file, putting it in o
+; new archive and checking that date.
+; RUN: rm -f very_long_bytecode_file_name.bc
+; RUN: llvm-ar xo %p/Inputs/GNU.a very_long_bytecode_file_name.bc
+; RUN: llvm-ar rc %t.a very_long_bytecode_file_name.bc
+; RUN: env TZ=GMT llvm-ar tv %t.a | FileCheck %s
+
+CHECK: 1465 Nov 19 03:01 2004 very_long_bytecode_file_name.bc
diff --git a/test/Object/lit.local.cfg b/test/Object/lit.local.cfg
index b2439b2d1b79..d74d039d684b 100644
--- a/test/Object/lit.local.cfg
+++ b/test/Object/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.test', '.ll']
+config.suffixes = ['.test', '.ll', '.yaml']
diff --git a/test/Object/nm-archive.test b/test/Object/nm-archive.test
index 2d96b73a2714..0d43cc701550 100644
--- a/test/Object/nm-archive.test
+++ b/test/Object/nm-archive.test
@@ -1,9 +1,5 @@
 RUN: llvm-nm %p/Inputs/archive-test.a-coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-as %p/Inputs/trivial.ll -o=%t1
-RUN: llvm-ar rcs %t2 %t1
-RUN: llvm-nm %t2 | FileCheck %s -check-prefix BITCODE
-
 
 COFF: trivial-object-test.coff-i386:
 COFF-NEXT: 00000000 d .data
@@ -13,6 +9,27 @@ COFF-NEXT:          U _SomeOtherFunction
 COFF-NEXT: 00000000 T _main
 COFF-NEXT:          U _puts
 
+
+RUN: llvm-as %p/Inputs/trivial.ll -o=%t1
+RUN: rm -f %t2
+RUN: llvm-ar rcs %t2 %t1
+RUN: llvm-nm %t2 | FileCheck %s -check-prefix BITCODE
+
 BITCODE:          U SomeOtherFunction
 BITCODE-NEXT:          T main
 BITCODE-NEXT:          U puts
+
+
+Test we don't error with an archive with no symtab.
+RUN: llvm-nm %p/Inputs/archive-test.a-gnu-no-symtab
+
+
+Or in an archive with no symtab or string table.
+RUN: llvm-nm %p/Inputs/archive-test.a-gnu-minimal
+
+
+And don't crash when asked to print a non existing symtab.
+RUN: llvm-nm -s %p/Inputs/archive-test.a-gnu-minimal
+
+Don't reject an empty archive.
+RUN: llvm-nm %p/Inputs/archive-test.a-empty
diff --git a/test/Object/nm-error.test b/test/Object/nm-error.test
new file mode 100644
index 000000000000..146b88713f87
--- /dev/null
+++ b/test/Object/nm-error.test
@@ -0,0 +1,17 @@
+Test that llvm-nm returns an error because of the unknown file type, but
+keeps processing subsequent files.
+
+Note: We use a temporary file since the tests don't run with pipefail.
+
+RUN: touch %t
+RUN: not llvm-nm %p/Inputs/trivial-object-test.elf-i386 %t \
+RUN:             %p/Inputs/trivial-object-test.elf-i386 > %t.log
+RUN: FileCheck %s < %t.log
+
+CHECK:          U SomeOtherFunction
+CHECK: 00000000 T main
+CHECK:          U puts
+
+CHECK:          U SomeOtherFunction
+CHECK: 00000000 T main
+CHECK:          U puts
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 5c3cc315e543..748d6f20d510 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -18,16 +18,17 @@ COFF:          U {{_?}}SomeOtherFunction
 COFF: 00000000 T {{_?}}main
 COFF:          U {{_?}}puts
 
+ELF-NOT:      U
 ELF:          U SomeOtherFunction
 ELF: 00000000 T main
 ELF:          U puts
 
 
 macho: 00000000 U _SomeOtherFunction
-macho: 00000000 s _main
+macho: 00000000 T _main
 macho: 00000000 U _puts
 
 macho64: 00000028 s L_.str
-macho64: 00000000 u _SomeOtherFunction
-macho64: 00000000 s _main
-macho64: 00000000 u _puts
+macho64: 00000000 U _SomeOtherFunction
+macho64: 00000000 T _main
+macho64: 00000000 U _puts
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
new file mode 100644
index 000000000000..8febfdfab39c
--- /dev/null
+++ b/test/Object/nm-universal-binary.test
@@ -0,0 +1,6 @@
+RUN: llvm-nm %p/Inputs/macho-universal.x86_64.i386 | FileCheck %s
+
+CHECK: macho-universal.x86_64.i386:x86_64
+CHECK: main
+CHECK: macho-universal.x86_64.i386:i386
+CHECK: main
diff --git a/test/Object/nm-weak-global-macho.test b/test/Object/nm-weak-global-macho.test
new file mode 100644
index 000000000000..ede26090f951
--- /dev/null
+++ b/test/Object/nm-weak-global-macho.test
@@ -0,0 +1,3 @@
+RUN: llvm-nm %p/Inputs/weak-global-symbol.macho-i386 | FileCheck %s
+
+CHECK: 00000000 S _a
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
index 0d96fd2bfd8e..49541336c682 100644
--- a/test/Object/obj2yaml.test
+++ b/test/Object/obj2yaml.test
@@ -2,169 +2,152 @@ RUN: obj2yaml %p/Inputs/trivial-object-test.coff-i386 | FileCheck %s --check-pre
 RUN: obj2yaml %p/Inputs/trivial-object-test.coff-x86-64 | FileCheck %s --check-prefix COFF-X86-64
 
 
-COFF-I386: header: !Header
-COFF-I386-NEXT:  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+COFF-I386: header:
+COFF-I386-NEXT:  Machine: IMAGE_FILE_MACHINE_I386
 
 COFF-I386: sections:
-COFF-I386-NEXT:   - !Section
-COFF-I386-NEXT:    Name: .text
-COFF-I386-NEXT:    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
-COFF-I386-NEXT:    SectionData:  !hex "83EC0CC744240800000000C7042400000000E800000000E8000000008B44240883C40CC3" # |....D$.......$...............D$.....|
+COFF-I386-NEXT:  - Name: .text
+COFF-I386-NEXT:    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+COFF-I386-NEXT:    Alignment: 16
+COFF-I386-NEXT:    SectionData:  83EC0CC744240800000000C7042400000000E800000000E8000000008B44240883C40CC3
 
 COFF-I386:    Relocations:
-COFF-I386-NEXT:      - !Relocation
-COFF-I386-NEXT:        VirtualAddress: 0xe
-COFF-I386-NEXT:        SymbolTableIndex: 5
+COFF-I386-NEXT:      - VirtualAddress: 14
+COFF-I386-NEXT:        SymbolName: L_.str
 COFF-I386-NEXT:        Type: IMAGE_REL_I386_DIR32
 
-COFF-I386:      - !Relocation
-COFF-I386-NEXT:        VirtualAddress: 0x13
-COFF-I386-NEXT:        SymbolTableIndex: 6
+COFF-I386:           - VirtualAddress: 19
+COFF-I386-NEXT:        SymbolName: _puts
 COFF-I386-NEXT:        Type: IMAGE_REL_I386_REL32
 
-COFF-I386:      - !Relocation
-COFF-I386-NEXT:        VirtualAddress: 0x18
-COFF-I386-NEXT:        SymbolTableIndex: 7
+COFF-I386:           - VirtualAddress: 24
+COFF-I386-NEXT:        SymbolName: _SomeOtherFunction
 COFF-I386-NEXT:        Type: IMAGE_REL_I386_REL32
 
-COFF-I386:  - !Section
-COFF-I386-NEXT:    Name: .data
-COFF-I386-NEXT:    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
-COFF-I386-NEXT:    SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+COFF-I386:       - Name: .data
+COFF-I386-NEXT:    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+COFF-I386-NEXT:    Alignment: 1
+COFF-I386-NEXT:    SectionData: 48656C6C6F20576F726C642100
 
 COFF-I386: symbols:
-COFF-I386-NEXT:  - !Symbol
-COFF-I386-NEXT:    Name: .text
+COFF-I386-NEXT:  - Name: .text
 COFF-I386-NEXT:    Value: 0
 COFF-I386-NEXT:    SectionNumber: 1
-COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC
 COFF-I386-NEXT:    NumberOfAuxSymbols: 1
-COFF-I386-NEXT:    AuxillaryData:  !hex "240000000300000000000000010000000000" # |$.................|
+COFF-I386-NEXT:    AuxiliaryData: 240000000300000000000000010000000000
 
-COFF-I386:  - !Symbol
-COFF-I386-NEXT:    Name: .data
+COFF-I386:       - Name: .data
 COFF-I386-NEXT:    Value: 0
 COFF-I386-NEXT:    SectionNumber: 2
-COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC
 COFF-I386-NEXT:    NumberOfAuxSymbols: 1
-COFF-I386-NEXT:    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+COFF-I386-NEXT:    AuxiliaryData: 0D0000000000000000000000020000000000
 
-COFF-I386:  - !Symbol
-COFF-I386-NEXT:    Name: _main
+COFF-I386:       - Name: _main
 COFF-I386-NEXT:    Value: 0
 COFF-I386-NEXT:    SectionNumber: 1
-COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
-COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_FUNCTION
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL
 
-COFF-I386:  - !Symbol
-COFF-I386-NEXT:    Name: L_.str
+COFF-I386:       - Name: L_.str
 COFF-I386-NEXT:    Value: 0
 COFF-I386-NEXT:    SectionNumber: 2
-COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC
 
-COFF-I386:  - !Symbol
-COFF-I386-NEXT:    Name: _puts
+COFF-I386:       - Name: _puts
 COFF-I386-NEXT:    Value: 0
 COFF-I386-NEXT:    SectionNumber: 0
-COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL
 
-COFF-I386:  - !Symbol
-COFF-I386-NEXT:    Name: _SomeOtherFunction
+COFF-I386:       - Name: _SomeOtherFunction
 COFF-I386-NEXT:    Value: 0
 COFF-I386-NEXT:    SectionNumber: 0
-COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL
 
 
-COFF-X86-64: header: !Header
-COFF-X86-64-NEXT:  Machine: IMAGE_FILE_MACHINE_AMD64 # (0x8664)
+COFF-X86-64: header:
+COFF-X86-64-NEXT:  Machine: IMAGE_FILE_MACHINE_AMD64
 
 COFF-X86-64: sections:
-COFF-X86-64-NEXT:   - !Section
-COFF-X86-64-NEXT:     Name: .text
-COFF-X86-64-NEXT:     Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
-COFF-X86-64-NEXT:     SectionData:  !hex "4883EC28C744242400000000488D0D00000000E800000000E8000000008B4424244883C428C3" # |H..(.D$$....H.................D$$H..(.|
+COFF-X86-64-NEXT:   - Name: .text
+COFF-X86-64-NEXT:     Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+COFF-X86-64-NEXT:     Alignment: 16
+COFF-X86-64-NEXT:     SectionData: 4883EC28C744242400000000488D0D00000000E800000000E8000000008B4424244883C428C3
 
 COFF-X86-64:     Relocations:
-COFF-X86-64-NEXT:       - !Relocation
-COFF-X86-64-NEXT:         VirtualAddress: 0xf
-COFF-X86-64-NEXT:         SymbolTableIndex: 5
+COFF-X86-64-NEXT:       - VirtualAddress: 15
+COFF-X86-64-NEXT:         SymbolName: L.str
 COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
 
-COFF-X86-64:       - !Relocation
-COFF-X86-64-NEXT:         VirtualAddress: 0x14
-COFF-X86-64-NEXT:         SymbolTableIndex: 6
+COFF-X86-64:            - VirtualAddress: 20
+COFF-X86-64-NEXT:         SymbolName: puts
 COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
 
-COFF-X86-64:       - !Relocation
-COFF-X86-64-NEXT:         VirtualAddress: 0x19
-COFF-X86-64-NEXT:         SymbolTableIndex: 7
+COFF-X86-64:            - VirtualAddress: 25
+COFF-X86-64-NEXT:         SymbolName: SomeOtherFunction
 COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
 
-COFF-X86-64:   - !Section
-COFF-X86-64-NEXT:     Name: .data
-COFF-X86-64-NEXT:     Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
-COFF-X86-64-NEXT:     SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+COFF-X86-64:        - Name: .data
+COFF-X86-64-NEXT:     Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+COFF-X86-64-NEXT:     Alignment: 1
+COFF-X86-64-NEXT:     SectionData: 48656C6C6F20576F726C642100
 
 COFF-X86-64: symbols:
-COFF-X86-64-NEXT:   - !Symbol
-COFF-X86-64-NEXT:     Name: .text
+COFF-X86-64-NEXT:   - Name: .text
 COFF-X86-64-NEXT:     Value: 0
 COFF-X86-64-NEXT:     SectionNumber: 1
-COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC
 COFF-X86-64-NEXT:     NumberOfAuxSymbols: 1
-COFF-X86-64-NEXT:     AuxillaryData:  !hex "260000000300000000000000010000000000" # |&.................|
+COFF-X86-64-NEXT:     AuxiliaryData: 260000000300000000000000010000000000
 
-COFF-X86-64:   - !Symbol
-COFF-X86-64-NEXT:     Name: .data
+COFF-X86-64:        - Name: .data
 COFF-X86-64-NEXT:     Value: 0
 COFF-X86-64-NEXT:     SectionNumber: 2
-COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC
 COFF-X86-64-NEXT:     NumberOfAuxSymbols: 1
-COFF-X86-64-NEXT:     AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+COFF-X86-64-NEXT:     AuxiliaryData: 0D0000000000000000000000020000000000
 
-COFF-X86-64:   - !Symbol
-COFF-X86-64-NEXT:     Name: main
+COFF-X86-64:        - Name: main
 COFF-X86-64-NEXT:     Value: 0
 COFF-X86-64-NEXT:     SectionNumber: 1
-COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL
 
-COFF-X86-64:   - !Symbol
-COFF-X86-64-NEXT:     Name: L.str
+COFF-X86-64:        - Name: L.str
 COFF-X86-64-NEXT:     Value: 0
 COFF-X86-64-NEXT:     SectionNumber: 2
-COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC
 
-COFF-X86-64:   - !Symbol
-COFF-X86-64-NEXT:     Name: puts
+COFF-X86-64:        - Name: puts
 COFF-X86-64-NEXT:     Value: 0
 COFF-X86-64-NEXT:     SectionNumber: 0
-COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL
 
-COFF-X86-64:   - !Symbol
-COFF-X86-64-NEXT:     Name: SomeOtherFunction
+COFF-X86-64:        - Name: SomeOtherFunction
 COFF-X86-64-NEXT:     Value: 0
 COFF-X86-64-NEXT:     SectionNumber: 0
-COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
-COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
-COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL
+COFF-X86-64-NOT:      NumberOfAuxSymbols
diff --git a/test/Object/relocation-executable.test b/test/Object/relocation-executable.test
index 98f5b4ee2fd6..1236035d9f68 100644
--- a/test/Object/relocation-executable.test
+++ b/test/Object/relocation-executable.test
@@ -2,17 +2,25 @@ RUN: llvm-readobj -r -expand-relocs %p/Inputs/hello-world.elf-x86-64 \
 RUN:   | FileCheck %s
 
 // CHECK:     Relocations [
-// CHECK:       Section (11) .plt {
+// CHECK-NEXT:  Section (8) .rela.dyn {
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x4018D8
+// CHECK-NEXT:      Type: R_X86_64_GLOB_DAT (6)
+// CHECK-NEXT:      Symbol: __gmon_start__
+// CHECK-NEXT:      Addend: 0x0
+// CHECK-NEXT:    }
+// CHECK-NEXT:  }
+// CHECK-NEXT:  Section (9) .rela.plt {
 // CHECK-NEXT:    Relocation {
 // CHECK-NEXT:      Offset: 0x4018F8
 // CHECK-NEXT:      Type: R_X86_64_JUMP_SLOT (7)
 // CHECK-NEXT:      Symbol: __libc_start_main
-// CHECK-NEXT:      Info: 0x0
+// CHECK-NEXT:      Addend: 0x0
 // CHECK-NEXT:    }
 // CHECK-NEXT:    Relocation {
 // CHECK-NEXT:      Offset: 0x401900
 // CHECK-NEXT:      Type: R_X86_64_JUMP_SLOT (7)
 // CHECK-NEXT:      Symbol: puts
-// CHECK-NEXT:      Info: 0x0
+// CHECK-NEXT:      Addend: 0x0
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
diff --git a/test/Object/yaml2obj-elf-bits-endian.test b/test/Object/yaml2obj-elf-bits-endian.test
new file mode 100644
index 000000000000..4be9a4465b81
--- /dev/null
+++ b/test/Object/yaml2obj-elf-bits-endian.test
@@ -0,0 +1,16 @@
+RUN: yaml2obj -format=elf %p/Inputs/ELF/LE64.yaml | llvm-readobj -file-headers - | FileCheck %s --check-prefix LE64
+RUN: yaml2obj -format=elf %p/Inputs/ELF/BE64.yaml | llvm-readobj -file-headers - | FileCheck %s --check-prefix BE64
+RUN: yaml2obj -format=elf %p/Inputs/ELF/LE32.yaml | llvm-readobj -file-headers - | FileCheck %s --check-prefix LE32
+RUN: yaml2obj -format=elf %p/Inputs/ELF/BE32.yaml | llvm-readobj -file-headers - | FileCheck %s --check-prefix BE32
+
+LE64:      Class: 64-bit (0x2)
+LE64-NEXT: DataEncoding: LittleEndian (0x1)
+
+BE64:      Class: 64-bit (0x2)
+BE64-NEXT: DataEncoding: BigEndian (0x2)
+
+LE32:      Class: 32-bit (0x1)
+LE32-NEXT: DataEncoding: LittleEndian (0x1)
+
+BE32:      Class: 32-bit (0x1)
+BE32-NEXT: DataEncoding: BigEndian (0x2)
diff --git a/test/Object/yaml2obj-elf-file-headers.yaml b/test/Object/yaml2obj-elf-file-headers.yaml
new file mode 100644
index 000000000000..285170170a20
--- /dev/null
+++ b/test/Object/yaml2obj-elf-file-headers.yaml
@@ -0,0 +1,11 @@
+# RUN: yaml2obj -format=elf %s | llvm-readobj -file-headers - | FileCheck %s
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  OSABI: ELFOSABI_GNU
+  Type: ET_REL
+  Machine: EM_X86_64
+
+# CHECK: OS/ABI: GNU/Linux
+# CHECK: Type: Relocatable
diff --git a/test/Object/yaml2obj-elf-section-basic.yaml b/test/Object/yaml2obj-elf-section-basic.yaml
new file mode 100644
index 000000000000..34be11d3658c
--- /dev/null
+++ b/test/Object/yaml2obj-elf-section-basic.yaml
@@ -0,0 +1,35 @@
+# RUN: yaml2obj -format=elf %s | llvm-readobj -sections -section-data - | FileCheck %s
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address: 0xCAFEBABE
+    Link: .text # Doesn't make sense for SHT_PROGBITS, but good enough for test.
+    Content: EBFE
+    AddressAlign: 2
+
+# CHECK:        Section {
+# CHECK:          Index: 0
+# CHECK:          Type: SHT_NULL (0x0)
+#
+# CHECK:        Section {
+# CHECK:          Name: .text
+# CHECK:          Type: SHT_PROGBITS (0x1)
+# CHECK-NEXT:     Flags [ (0x6)
+# CHECK-NEXT:       SHF_ALLOC (0x2)
+# CHECK-NEXT:       SHF_EXECINSTR (0x4)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0xCAFEBABE
+# CHECK:          Size: 2
+# Check that Link != 0.
+# CHECK:          Link: {{[1-9][0-9]*}}
+# CHECK:          AddressAlignment: 2
+# CHECK:          SectionData (
+# CHECK-NEXT:       0000: EBFE
+# CHECK-NEXT:     )
diff --git a/test/Object/yaml2obj-elf-symbol-LocalGlobalWeak.yaml b/test/Object/yaml2obj-elf-symbol-LocalGlobalWeak.yaml
new file mode 100644
index 000000000000..3c4e830c0df6
--- /dev/null
+++ b/test/Object/yaml2obj-elf-symbol-LocalGlobalWeak.yaml
@@ -0,0 +1,37 @@
+# RUN: yaml2obj -format=elf %s | llvm-readobj -symbols - | FileCheck %s
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .data
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_WRITE ]
+    Content: "DEADBEEF"
+Symbols:
+  Local:
+    - Name: local_symbol
+      Type: STT_OBJECT
+      Section: .data
+  Global:
+    - Name: global_symbol
+      Type: STT_OBJECT
+      Section: .data
+  Weak:
+    - Name: weak_symbol
+      Type: STT_OBJECT
+      Section: .data
+
+# CHECK: Symbol {
+# CHECK:   Name:  (0)
+# CHECK: Symbol {
+# CHECK:   Name: local_symbol
+# CHECK:   Binding: Local
+# CHECK: Symbol {
+# CHECK:   Name: global_symbol
+# CHECK:   Binding: Global
+# CHECK: Symbol {
+# CHECK:   Name: weak_symbol
+# CHECK:   Binding: Weak
diff --git a/test/Object/yaml2obj-elf-symbol-basic.yaml b/test/Object/yaml2obj-elf-symbol-basic.yaml
new file mode 100644
index 000000000000..3fb9b17655fd
--- /dev/null
+++ b/test/Object/yaml2obj-elf-symbol-basic.yaml
@@ -0,0 +1,40 @@
+# RUN: yaml2obj -format=elf %s | llvm-readobj -symbols - | FileCheck %s
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: "90EBFE" # x86 machine code
+                      #   NOP ; To make main's `Value` non-zero (for testing).
+                      # main:
+                      #   JMP -2 ; (infloop)
+                      # This YAML file is a valid relocatable object that,
+                      # when linked and run on x86_64, will go into an
+                      # infloop.
+Symbols:
+  Global:
+    - Name: main
+      Type: STT_FUNC
+      Section: .text
+      Value: 0x1
+      Size: 2
+    - Name: undefined_symbol
+
+# CHECK:      Symbols [
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name:  (0)
+# CHECK:        Symbol {
+# CHECK-NEXT:     Name: main
+# CHECK-NEXT:     Value: 0x1
+# CHECK-NEXT:     Size: 2
+# CHECK:          Binding: Global
+# CHECK-NEXT:     Type: Function
+# CHECK:          Section: .text
+# CHECK:        Symbol {
+# CHECK:          Name: undefined_symbol
+# CHECK:          Section:  (0x0)
diff --git a/test/Other/Inputs/TestProg/TestProg b/test/Other/Inputs/TestProg/TestProg
new file mode 100755
index 000000000000..1c4efba60d09
--- /dev/null
+++ b/test/Other/Inputs/TestProg/TestProg
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+false
diff --git a/test/Other/Inputs/llvm-cov.gcda b/test/Other/Inputs/llvm_cov.gcda
index 9ae2286ea2f4..9ae2286ea2f4 100644
--- a/test/Other/Inputs/llvm-cov.gcda
+++ b/test/Other/Inputs/llvm_cov.gcda
diff --git a/test/Other/Inputs/llvm-cov.gcno b/test/Other/Inputs/llvm_cov.gcno
index 25e202386a89..25e202386a89 100644
--- a/test/Other/Inputs/llvm-cov.gcno
+++ b/test/Other/Inputs/llvm_cov.gcno
diff --git a/test/Other/ResponseFile.ll b/test/Other/ResponseFile.ll
index b8b3d0a90233..914e5480f203 100644
--- a/test/Other/ResponseFile.ll
+++ b/test/Other/ResponseFile.ll
@@ -1,5 +1,9 @@
-; RUN: echo %s > %t.list
-; RUN: llvm-as @%t.list -o %t.bc
+; Test that we can recurse, at least a little bit.  The -time-passes flag here
+; is a hack to make sure that neither echo nor the shell expands the response
+; file for us.  Tokenization with quotes is tested in unittests.
+; RUN: echo %s > %t.list1
+; RUN: echo "-time-passes @%t.list1" > %t.list2
+; RUN: llvm-as @%t.list2 -o %t.bc
 ; RUN: llvm-nm %t.bc 2>&1 | FileCheck %s
 
 ; CHECK: T foobar
diff --git a/test/Other/X86/lit.local.cfg b/test/Other/X86/lit.local.cfg
index da2db5a45f9c..ba763cf03ffc 100644
--- a/test/Other/X86/lit.local.cfg
+++ b/test/Other/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Other/attribute-comment.ll b/test/Other/attribute-comment.ll
index 7354e7f765fd..d12b179ef083 100644
--- a/test/Other/attribute-comment.ll
+++ b/test/Other/attribute-comment.ll
@@ -6,4 +6,4 @@ define void @test1() #0 {
   ret void
 }
 
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" uwtable "no-frame-pointer-elim"="true" readnone "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" uwtable "no-frame-pointer-elim"="true" readnone "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Other/can-execute.txt b/test/Other/can-execute.txt
new file mode 100644
index 000000000000..fd6961f86bf1
--- /dev/null
+++ b/test/Other/can-execute.txt
@@ -0,0 +1,20 @@
+REQUIRES: can-execute
+
+This tests that we abstract two peculiarities of unix in can_execute:
+
+* Directories are executable, but we don't want to try to execute them.
+* For shell scripts, we also need to be able to read them.
+
+The PATH is constructed such that 'not' will first find a directory named
+TestProg, then a file with executable bit but not readable and finally a
+shell script which always returns false, which is what it actually tries to
+execute.
+
+If we want, it is probably OK to change the semantics of can_execute and this
+test, but for now this test serves as a reminder to audit all the callers if
+we do that.
+
+RUN: cp -f %S/Inputs/TestProg/TestProg %T/TestProg
+RUN: chmod 111 %T/TestProg
+RUN: export PATH=%S/Inputs:%T:%S/Inputs/TestProg:$PATH
+RUN: not TestProg
diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll
index 6e180cd1d898..b310bc2c0424 100644
--- a/test/Other/close-stderr.ll
+++ b/test/Other/close-stderr.ll
@@ -9,8 +9,5 @@
 ; XFAIL: vg_leak
 ; REQUIRES: shell
 
-; opt will fail to open /dev/null on native win32.
-; XFAIL: win32
-
 ; Test that the error handling when writing to stderr fails exits the
 ; program cleanly rather than aborting.
diff --git a/test/Other/constant-fold-gep-address-spaces.ll b/test/Other/constant-fold-gep-address-spaces.ll
new file mode 100644
index 000000000000..f6abe7468bba
--- /dev/null
+++ b/test/Other/constant-fold-gep-address-spaces.ll
@@ -0,0 +1,235 @@
+; "PLAIN" - No optimizations. This tests the target-independent
+; constant folder.
+; RUN: opt -S -o - %s | FileCheck --check-prefix=PLAIN %s
+
+target datalayout = "e-p:128:128:128-p1:32:32:32-p2:8:8:8-p3:16:16:16-p4:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+; The automatic constant folder in opt does not have targetdata access, so
+; it can't fold gep arithmetic, in general. However, the constant folder run
+; from instcombine and global opt can use targetdata.
+; PLAIN: @G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1)
+@G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1)
+; PLAIN: @G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1)
+@G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1)
+; PLAIN: @F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2)
+@F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2)
+; PLAIN: @F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2)
+@F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2)
+; PLAIN: @H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1)
+@H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1)
+; PLAIN: @H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i8 -1)
+@H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 0 to i1 addrspace(2)*), i8 -1)
+
+
+; The target-independent folder should be able to do some clever
+; simplifications on sizeof, alignof, and offsetof expressions. The
+; target-dependent folder should fold these down to constants.
+; PLAIN-X: @a = constant i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310)
+@a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]} addrspace(4)* getelementptr ({[7 x double], [7 x double]} addrspace(4)* null, i64 11) to i64), i64 5))
+
+; PLAIN-X: @b = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
+@b = constant i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @c = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2)
+@c = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64)
+
+; PLAIN-X: @d = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11)
+@d = constant i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64)
+
+; PLAIN-X: @e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
+@e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64)
+
+; PLAIN-X: @f = constant i64 1
+@f = constant i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @g = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
+@g = constant i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; PLAIN-X: @h = constant i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64)
+@h = constant i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i64 1) to i64)
+
+; PLAIN-X: @i = constant i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64)
+@i = constant i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double} addrspace(4)* null, i64 0, i32 1) to i64)
+
+; The target-dependent folder should cast GEP indices to integer-sized pointers.
+
+; PLAIN: @M = constant i64 addrspace(4)* getelementptr (i64 addrspace(4)* null, i32 1)
+; PLAIN: @N = constant i64 addrspace(4)* getelementptr ({ i64, i64 } addrspace(4)* null, i32 0, i32 1)
+; PLAIN: @O = constant i64 addrspace(4)* getelementptr ([2 x i64] addrspace(4)* null, i32 0, i32 1)
+
+@M = constant i64 addrspace(4)* getelementptr (i64 addrspace(4)* null, i32 1)
+@N = constant i64 addrspace(4)* getelementptr ({ i64, i64 } addrspace(4)* null, i32 0, i32 1)
+@O = constant i64 addrspace(4)* getelementptr ([2 x i64] addrspace(4)* null, i32 0, i32 1)
+
+; Fold GEP of a GEP. Very simple cases are folded.
+
+; PLAIN-X: @Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 2)
+@ext = external addrspace(3) global [3 x { i32, i32 }]
+@Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 1), i64 1)
+
+; PLAIN-X: @Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1)
+@Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1)
+
+
+; Duplicate all of the above as function return values rather than
+; global initializers.
+
+; PLAIN: define i8 addrspace(1)* @goo8() #0 {
+; PLAIN:   %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+; PLAIN:   ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @goo1() #0 {
+; PLAIN:   %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+; PLAIN:   ret i1 addrspace(2)* %t
+; PLAIN: }
+; PLAIN: define i8 addrspace(1)* @foo8() #0 {
+; PLAIN:   %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)*
+; PLAIN:   ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @foo1() #0 {
+; PLAIN:   %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)*
+; PLAIN:   ret i1 addrspace(2)* %t
+; PLAIN: }
+; PLAIN: define i8 addrspace(1)* @hoo8() #0 {
+; PLAIN:   %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1) to i8 addrspace(1)*
+; PLAIN:   ret i8 addrspace(1)* %t
+; PLAIN: }
+; PLAIN: define i1 addrspace(2)* @hoo1() #0 {
+; PLAIN:   %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 -1) to i1 addrspace(2)*
+; PLAIN:   ret i1 addrspace(2)* %t
+; PLAIN: }
+define i8 addrspace(1)* @goo8() #0 {
+  %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+  ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @goo1() #0 {
+  %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+  ret i1 addrspace(2)* %t
+}
+define i8 addrspace(1)* @foo8() #0 {
+  %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)*
+  ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @foo1() #0 {
+  %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)*
+  ret i1 addrspace(2)* %t
+}
+define i8 addrspace(1)* @hoo8() #0 {
+  %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)*
+  ret i8 addrspace(1)* %t
+}
+define i1 addrspace(2)* @hoo1() #0 {
+  %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 0 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)*
+  ret i1 addrspace(2)* %t
+}
+
+; PLAIN-X: define i64 @fa() #0 {
+; PLAIN-X:   %t = bitcast i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fb() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fc() #0 {
+; PLAIN-X:   %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fd() #0 {
+; PLAIN-X:   %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fe() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @ff() #0 {
+; PLAIN-X:   %t = bitcast i64 1 to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fg() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fh() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+; PLAIN-X: define i64 @fi() #0 {
+; PLAIN-X:   %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64) to i64
+; PLAIN-X:   ret i64 %t
+; PLAIN-X: }
+define i64 @fa() #0 {
+  %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64
+  ret i64 %t
+}
+define i64 @fb() #0 {
+  %t = bitcast i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fc() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64
+  ret i64 %t
+}
+define i64 @fd() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64) to i64
+  ret i64 %t
+}
+define i64 @fe() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64
+  ret i64 %t
+}
+define i64 @ff() #0 {
+  %t = bitcast i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fg() #0 {
+  %t = bitcast i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fh() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fi() #0 {
+  %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double}addrspace(4)* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+
+; PLAIN: define i64* @fM() #0 {
+; PLAIN:   %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fN() #0 {
+; PLAIN:   %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fO() #0 {
+; PLAIN:   %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+
+define i64* @fM() #0 {
+  %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+  ret i64* %t
+}
+define i64* @fN() #0 {
+  %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
+  ret i64* %t
+}
+define i64* @fO() #0 {
+  %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+  ret i64* %t
+}
+
+; PLAIN: define i32 addrspace(1)* @fZ() #0 {
+; PLAIN:   %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)*
+; PLAIN:   ret i32 addrspace(1)* %t
+; PLAIN: }
+@ext2 = external addrspace(1) global [3 x { i32, i32 }]
+define i32 addrspace(1)* @fZ() #0 {
+  %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)*
+  ret i32 addrspace(1)* %t
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index 44b66284dd73..aed4145c5507 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -454,10 +454,10 @@ define i32* @fZ() nounwind {
 
 define i8* @different_addrspace() nounwind noinline {
 ; OPT: different_addrspace
-  %p = getelementptr inbounds i8* bitcast ([4 x i8] addrspace(12)* @p12 to i8*),
+  %p = getelementptr inbounds i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*),
                                   i32 2
   ret i8* %p
-; OPT: ret i8* getelementptr (i8* bitcast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2)
+; OPT: ret i8* getelementptr (i8* addrspacecast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2)
 }
 
 define i8* @same_addrspace() nounwind noinline {
diff --git a/test/Other/extract-alias.ll b/test/Other/extract-alias.ll
index d5bab4b3f36b..d1e4af545625 100644
--- a/test/Other/extract-alias.ll
+++ b/test/Other/extract-alias.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-extract -func foo -S < %s | FileCheck %s
 ; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
 ; RUN: llvm-extract -alias zeda0 -S < %s | FileCheck --check-prefix=ALIAS %s
-; RUN: llvm-extract -ralias .*bar -S < %s | FileCheck --check-prefix=ALIASRE %s
+; RUN: llvm-extract -ralias '.*bar' -S < %s | FileCheck --check-prefix=ALIASRE %s
 
 ; Both aliases should be converted to declarations
 ; CHECK:      @zeda0 = external global i32
diff --git a/test/Other/extract-linkonce.ll b/test/Other/extract-linkonce.ll
index 31fbf3ac4632..4c6b6b76a4ab 100644
--- a/test/Other/extract-linkonce.ll
+++ b/test/Other/extract-linkonce.ll
@@ -1,15 +1,16 @@
 ; RUN: llvm-extract -func foo -S < %s | FileCheck %s
 ; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
 
-; Test that we don't convert weak_odr to external definitions.
+; Test that linkonce definitions are mapped to weak so that they are not
+; dropped.
 
-; CHECK:      @bar = external hidden global i32
-; CHECK:      define hidden i32* @foo() {
+; CHECK:      @bar = external global i32
+; CHECK:      define weak i32* @foo() {
 ; CHECK-NEXT:  ret i32* @bar
 ; CHECK-NEXT: }
 
-; DELETE: @bar = hidden global i32 42
-; DELETE: declare hidden i32* @foo()
+; DELETE: @bar = weak global i32 42
+; DELETE: declare i32* @foo()
 
 @bar = linkonce global i32 42
 
diff --git a/test/Other/lit.local.cfg b/test/Other/lit.local.cfg
deleted file mode 100644
index 269307724232..000000000000
--- a/test/Other/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.txt']
diff --git a/test/Other/llvm-cov.test b/test/Other/llvm-cov.test
index c0aa203e2c17..2ac4e9e866b6 100644
--- a/test/Other/llvm-cov.test
+++ b/test/Other/llvm-cov.test
@@ -1,3 +1,4 @@
 PR11760
-RUN: llvm-cov -gcda=%S/Inputs/llvm-cov.gcda -gcno=%S/Inputs/llvm-cov.gcno
-
+RUN: llvm-cov -gcda=%S/Inputs/llvm_cov.gcda -gcno=%S/Inputs/llvm_cov.gcno
+REQUIRES: asserts
+XFAIL: *
diff --git a/test/Other/optimize-options.ll b/test/Other/optimize-options.ll
index 888a78fd9df4..22dd842cab06 100644
--- a/test/Other/optimize-options.ll
+++ b/test/Other/optimize-options.ll
@@ -1,8 +1,8 @@
-;RUN: opt -S -O1 -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -O2 -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -Os -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -Oz -debug-pass=Arguments 2>&1 | FileCheck %s
-;RUN: opt -S -O3 -debug-pass=Arguments 2>&1 | FileCheck %s
+;RUN: opt -S -O1 -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -O2 -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -Os -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -Oz -debug-pass=Arguments %s 2>&1 | FileCheck %s
+;RUN: opt -S -O3 -debug-pass=Arguments %s 2>&1 | FileCheck %s
 
-; Just check that we get a non-empty set of passes for each -O opton.
+; Just check that we get a non-empty set of passes for each -O option.
 ;CHECK: Pass Arguments: {{.*}} -print-module
diff --git a/test/Other/pipefail.txt b/test/Other/pipefail.txt
new file mode 100644
index 000000000000..241080aab661
--- /dev/null
+++ b/test/Other/pipefail.txt
@@ -0,0 +1,2 @@
+REQUIRES: shell
+RUN: ((false | true) && echo true || echo false) | grep false
diff --git a/test/Other/umask.ll b/test/Other/umask.ll
new file mode 100644
index 000000000000..af9710e6b017
--- /dev/null
+++ b/test/Other/umask.ll
@@ -0,0 +1,14 @@
+; REQUIRES: shell
+; XFAIL: mingw32
+
+; RUN: umask 000
+; RUN: rm -f %t.000
+; RUN: llvm-as %s -o %t.000
+; RUN: ls -l %t.000 | FileCheck --check-prefix=CHECK000 %s
+; CHECK000: rw-rw-rw
+
+; RUN: umask 002
+; RUN: rm -f %t.002
+; RUN: llvm-as %s -o %t.002
+; RUN: ls -l %t.002 | FileCheck --check-prefix=CHECK002 %s
+; CHECK002: rw-rw-r-
diff --git a/test/TableGen/2003-08-03-PassCode.td b/test/TableGen/2003-08-03-PassCode.td
index de7d6261b230..b851a15ffe8b 100644
--- a/test/TableGen/2003-08-03-PassCode.td
+++ b/test/TableGen/2003-08-03-PassCode.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s
-// XFAIL: vg_leak
 
 class test<code C> {
   code Code = C;
diff --git a/test/TableGen/2006-09-18-LargeInt.td b/test/TableGen/2006-09-18-LargeInt.td
index 94cd1ec30710..538021236715 100644
--- a/test/TableGen/2006-09-18-LargeInt.td
+++ b/test/TableGen/2006-09-18-LargeInt.td
@@ -1,4 +1,6 @@
-// RUN: llvm-tblgen %s | grep -- 4294901760
+// RUN: llvm-tblgen %s | FileCheck %s
+
+// CHECK: 4294901760
 
 def X {
   int Y = 0xFFFF0000;
diff --git a/test/TableGen/2010-03-24-PrematureDefaults.td b/test/TableGen/2010-03-24-PrematureDefaults.td
index 716a1d59008c..24f6c93b3e17 100644
--- a/test/TableGen/2010-03-24-PrematureDefaults.td
+++ b/test/TableGen/2010-03-24-PrematureDefaults.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class A<int k, bits<2> x = 1> {
   int K = k;
diff --git a/test/TableGen/CStyleComment.td b/test/TableGen/CStyleComment.td
index 55fb0e787b62..9c50f7e96f8f 100644
--- a/test/TableGen/CStyleComment.td
+++ b/test/TableGen/CStyleComment.td
@@ -1,7 +1,6 @@
 // Test that multiline, nested, comments work correctly.
 //
 // RUN: llvm-tblgen < %s
-// XFAIL: vg_leak
 
 /* Foo
   bar
diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td
index 14d616b52173..fea3aeedde18 100644
--- a/test/TableGen/Dag.td
+++ b/test/TableGen/Dag.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 //===----------------------------------------------------------------------===//
 // Substitution of an int.
diff --git a/test/TableGen/DefmInherit.td b/test/TableGen/DefmInherit.td
index b52a709731e1..bfbb43568b43 100644
--- a/test/TableGen/DefmInherit.td
+++ b/test/TableGen/DefmInherit.td
@@ -1,4 +1,11 @@
-// RUN: llvm-tblgen %s | grep "zing = 4" | count 4
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK-NOT: zing = 4
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/DefmInsideMultiClass.td b/test/TableGen/DefmInsideMultiClass.td
index 0aea21280da0..d34974de0295 100644
--- a/test/TableGen/DefmInsideMultiClass.td
+++ b/test/TableGen/DefmInsideMultiClass.td
@@ -1,4 +1,8 @@
-// RUN: llvm-tblgen %s | grep ADDPSrr | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: ADDPSrr
+// CHECK-NOT: ADDPSrr
 
 class Instruction<bits<4> opc, string Name> {
   bits<4> opcode = opc;
diff --git a/test/TableGen/ForeachList.td b/test/TableGen/ForeachList.td
index 9bc76e0f0cf8..99b7e14c2d5f 100644
--- a/test/TableGen/ForeachList.td
+++ b/test/TableGen/ForeachList.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
index a49a60bf2692..4aacc74d8aa2 100644
--- a/test/TableGen/ForeachLoop.td
+++ b/test/TableGen/ForeachLoop.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/Include.td b/test/TableGen/Include.td
index 8783638f0c61..1cb779f611a1 100644
--- a/test/TableGen/Include.td
+++ b/test/TableGen/Include.td
@@ -1,5 +1,5 @@
 // RUN: llvm-tblgen -I %p %s
-// XFAIL: vg_leak
+
 def BeforeInclude;
 
 include "Include.inc"
diff --git a/test/TableGen/IntBitInit.td b/test/TableGen/IntBitInit.td
index 83713a33321b..4e150f16833b 100644
--- a/test/TableGen/IntBitInit.td
+++ b/test/TableGen/IntBitInit.td
@@ -1,5 +1,5 @@
 // RUN: llvm-tblgen %s
-// XFAIL: vg_leak
+
 def {
   bit A = 1;
   int B = A;
diff --git a/test/TableGen/LazyChange.td b/test/TableGen/LazyChange.td
index 919a1a7e9a32..2ad6191c2535 100644
--- a/test/TableGen/LazyChange.td
+++ b/test/TableGen/LazyChange.td
@@ -1,4 +1,6 @@
-// RUN: llvm-tblgen %s | grep "int Y = 3"
+// RUN: llvm-tblgen %s | FileCheck %s
+
+// CHECK: int Y = 3
 
 class C {
   int X = 4;
diff --git a/test/TableGen/LetInsideMultiClasses.td b/test/TableGen/LetInsideMultiClasses.td
index 72f48b6d8066..095f37bfb92f 100644
--- a/test/TableGen/LetInsideMultiClasses.td
+++ b/test/TableGen/LetInsideMultiClasses.td
@@ -1,4 +1,10 @@
-// RUN: llvm-tblgen %s | grep "bit IsDouble = 1;" | count 3
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: bit IsDouble = 1;
+// CHECK: bit IsDouble = 1;
+// CHECK: bit IsDouble = 1;
+// CHECK-NOT: bit IsDouble = 1;
 
 class Instruction<bits<4> opc, string Name> {
   bits<4> opcode = opc;
diff --git a/test/TableGen/ListOfList.td b/test/TableGen/ListOfList.td
index adf9fe483eb4..56f964e8685c 100644
--- a/test/TableGen/ListOfList.td
+++ b/test/TableGen/ListOfList.td
@@ -1,6 +1,5 @@
-// RUN llvm-tblgen %s | FileCheck %s
-
-// RUN: llvm-tblgen %s | grep "foo" | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Base<string t> {
   string text = t;
@@ -11,3 +10,4 @@ class Derived<list<list<string>> thetext> : Base<thetext[0][0]>;
 def FOO : Derived<[["foo"]]>;
 
 // CHECK: text = "foo"
+// CHECK-NOT: text = "foo"
diff --git a/test/TableGen/LoLoL.td b/test/TableGen/LoLoL.td
index f758e1b60476..778c9609d1a2 100644
--- a/test/TableGen/LoLoL.td
+++ b/test/TableGen/LoLoL.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Base<list<int> v> {
   list<int> values = v;
diff --git a/test/TableGen/MultiClass.td b/test/TableGen/MultiClass.td
index ef320cf79f16..9c3996345813 100644
--- a/test/TableGen/MultiClass.td
+++ b/test/TableGen/MultiClass.td
@@ -1,4 +1,9 @@
-// RUN: llvm-tblgen %s | grep "zing = 4" | count 2
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK-NOT: zing = 4
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiClassDefName.td b/test/TableGen/MultiClassDefName.td
index 75d6af5b42b9..d3c6de7e8421 100644
--- a/test/TableGen/MultiClassDefName.td
+++ b/test/TableGen/MultiClassDefName.td
@@ -1,4 +1,8 @@
-// RUN: llvm-tblgen %s | grep WorldHelloCC | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: WorldHelloCC
+// CHECK-NOT: WorldHelloCC
 
 class C<string n> {
   string name = n;
diff --git a/test/TableGen/MultiClassInherit.td b/test/TableGen/MultiClassInherit.td
index 9d1470a6616b..04fef2c84995 100644
--- a/test/TableGen/MultiClassInherit.td
+++ b/test/TableGen/MultiClassInherit.td
@@ -1,4 +1,36 @@
-// RUN: llvm-tblgen %s | grep "zing = 4" | count 28
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// "zing = 4" x 28
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK: zing = 4
+// CHECK-NOT: zing = 4
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiPat.td b/test/TableGen/MultiPat.td
index b49b06c24caf..b3792777b6b5 100644
--- a/test/TableGen/MultiPat.td
+++ b/test/TableGen/MultiPat.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/NestedForeach.td b/test/TableGen/NestedForeach.td
index 5b63175b192a..e8c16f720d0e 100644
--- a/test/TableGen/NestedForeach.td
+++ b/test/TableGen/NestedForeach.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Droid<string series, int release, string model, int patchlevel> {
   string Series = series;
diff --git a/test/TableGen/Paste.td b/test/TableGen/Paste.td
index 33d61ccde128..a7e2a5b318ba 100644
--- a/test/TableGen/Paste.td
+++ b/test/TableGen/Paste.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Instr<int i> {
   int index = i;
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
index f26b9e65ac0d..761332312b0f 100644
--- a/test/TableGen/SetTheory.td
+++ b/test/TableGen/SetTheory.td
@@ -1,5 +1,6 @@
 // Test evaluation of set operations in dags.
 // RUN: llvm-tblgen -print-sets %s | FileCheck %s
+// XFAIL: vg_leak
 //
 // The -print-sets driver configures a primitive SetTheory instance that
 // understands these sets:
diff --git a/test/TableGen/SiblingForeach.td b/test/TableGen/SiblingForeach.td
index e4c4704a5e39..a11f6f87b427 100644
--- a/test/TableGen/SiblingForeach.td
+++ b/test/TableGen/SiblingForeach.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Set<int i = 0, int j = 0, int k = 0> {
   int I = i;
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index 7a35d315c5d8..89deaefc9abe 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/String.td b/test/TableGen/String.td
index c71ed50f9860..576ba818f383 100644
--- a/test/TableGen/String.td
+++ b/test/TableGen/String.td
@@ -1,5 +1,5 @@
 // RUN: llvm-tblgen %s 
-// XFAIL: vg_leak
+
 class x {
   string y = "missing terminating '\"' character";
 }
diff --git a/test/TableGen/TargetInstrSpec.td b/test/TableGen/TargetInstrSpec.td
index bf2d257c5d01..32253a3a215c 100644
--- a/test/TableGen/TargetInstrSpec.td
+++ b/test/TableGen/TargetInstrSpec.td
@@ -1,5 +1,11 @@
-// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))\]' | count 1
-// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))\]' | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: [(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))]
+// CHECK-NOT: [(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))]
+
+// CHECK: [(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))]
+// CHECK-NOT: [(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))]
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/TwoLevelName.td b/test/TableGen/TwoLevelName.td
index e88696217f70..9c502f475507 100644
--- a/test/TableGen/TwoLevelName.td
+++ b/test/TableGen/TwoLevelName.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Type<string name, int length, int width> {
   string Name = name;
diff --git a/test/TableGen/cast.td b/test/TableGen/cast.td
index b9e4b375359b..a8bd207a85a2 100644
--- a/test/TableGen/cast.td
+++ b/test/TableGen/cast.td
@@ -1,4 +1,10 @@
-// RUN: llvm-tblgen %s | grep "add_ps" | count 3
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: add_ps
+// CHECK: add_ps
+// CHECK: add_ps
+// CHECK-NOT: add_ps
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/defmclass.td b/test/TableGen/defmclass.td
index 6198c000fddc..80f03b319426 100644
--- a/test/TableGen/defmclass.td
+++ b/test/TableGen/defmclass.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class XD { bits<4> Prefix = 11; }
 // CHECK: Prefix = { 1, 1, 0, 0 };
diff --git a/test/TableGen/eq.td b/test/TableGen/eq.td
index fc3ad424e2f7..f8daf880b9ed 100644
--- a/test/TableGen/eq.td
+++ b/test/TableGen/eq.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: Value = 0
 // CHECK: Value = 1
 
diff --git a/test/TableGen/eqbit.td b/test/TableGen/eqbit.td
index b77b1a26dfe1..1d58fa0c1916 100644
--- a/test/TableGen/eqbit.td
+++ b/test/TableGen/eqbit.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: a = 6
 // CHECK: a = 5
 
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index 7b7c19972884..541da49ccde3 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,6 +1,14 @@
-// RUN: llvm-tblgen %s | grep 'Jr' | count 2
-// RUN: llvm-tblgen %s | grep 'Sr' | count 2
-// RUN: llvm-tblgen %s | grep '"NAME"' | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// CHECK: Classes
+// CHECK: Sr
+// CHECK: Jr
+// CHECK: "NAME"
+
+// CHECK: Defs
+// CHECK: Jr
+// CHECK: Sr
 
 // Variables for foreach
 class decls {
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index e4df74f36860..1d8d62329ae3 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 // Support for an `!if' operator as part of a `let' statement.
 // CHECK:      class C
diff --git a/test/TableGen/ifbit.td b/test/TableGen/ifbit.td
index e3341219ffe8..88f575e9acfc 100644
--- a/test/TableGen/ifbit.td
+++ b/test/TableGen/ifbit.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: a = 6
 // CHECK: a = 5
 
diff --git a/test/TableGen/intrinsic-order.td b/test/TableGen/intrinsic-order.td
new file mode 100644
index 000000000000..13c2db27e16a
--- /dev/null
+++ b/test/TableGen/intrinsic-order.td
@@ -0,0 +1,36 @@
+// RUN: llvm-tblgen -gen-intrinsic %s | FileCheck %s
+// XFAIL: vg_leak
+
+class IntrinsicProperty;
+
+class ValueType<int size, int value> {
+  string Namespace = "MVT";
+  int Size = size;
+  int Value = value;
+}
+
+class LLVMType<ValueType vt> {
+  ValueType VT = vt;
+}
+
+class Intrinsic<string name, list<LLVMType> param_types = []> {
+  string LLVMName = name;
+  bit isTarget = 0;
+  string TargetPrefix = "";
+  list<LLVMType> RetTypes = [];
+  list<LLVMType> ParamTypes = param_types;
+  list<IntrinsicProperty> Properties = [];
+}
+
+def iAny : ValueType<0, 254>;
+def llvm_anyint_ty : LLVMType<iAny>;
+
+
+// Make sure an intrinsic name that is a prefix of another is checked after the
+// other.
+
+// CHECK: if (NameR.startswith("oo.bar.")) return Intrinsic::foo_bar;
+// CHECK: if (NameR.startswith("oo.")) return Intrinsic::foo;
+
+def int_foo : Intrinsic<"llvm.foo", [llvm_anyint_ty]>;
+def int_foo_bar : Intrinsic<"llvm.foo.bar", [llvm_anyint_ty]>;
diff --git a/test/TableGen/intrinsic-varargs.td b/test/TableGen/intrinsic-varargs.td
new file mode 100644
index 000000000000..3e48f8da33bf
--- /dev/null
+++ b/test/TableGen/intrinsic-varargs.td
@@ -0,0 +1,30 @@
+// RUN: llvm-tblgen -gen-intrinsic %s | FileCheck %s
+// XFAIL: vg_leak
+
+class IntrinsicProperty;
+
+class ValueType<int size, int value> {
+  string Namespace = "MVT";
+  int Size = size;
+  int Value = value;
+}
+
+class LLVMType<ValueType vt> {
+  ValueType VT = vt;
+}
+
+class Intrinsic<string name, list<LLVMType> param_types = []> {
+  string LLVMName = name;
+  bit isTarget = 0;
+  string TargetPrefix = "";
+  list<LLVMType> RetTypes = [];
+  list<LLVMType> ParamTypes = param_types;
+  list<IntrinsicProperty> Properties = [];
+}
+
+// isVoid needs to match the definition in ValueTypes.td
+def isVoid : ValueType<0, 56>;   // Produces no value
+def llvm_vararg_ty : LLVMType<isVoid>;   // this means vararg here
+
+// CHECK: /* 0 */ 0, 27, 0,
+def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>;
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index efe00022f51d..9e586055ff9d 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -1,4 +1,20 @@
-// RUN: llvm-tblgen %s | grep ""
+// RUN: llvm-tblgen %s
+// XFAIL: vg_leak
+
+// CHECK:      def One {
+// CHECK-NEXT:   list<string> names = ["Jeffrey Sinclair"];
+// CHECK-NEXT:   string element = "Jeffrey Sinclair";
+// CHECK-NEXT:   list<string> rest = [];
+// CHECK-NEXT:   int null = 1;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Three {
+// CHECK-NEXT:   list<string> names = ["Tom", "Dick", "Harry"];
+// CHECK-NEXT:   string element = "Tom";
+// CHECK-NEXT:   list<string> rest = ["Dick", "Harry"];
+// CHECK-NEXT:   int null = 0;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
 
 class List<list<string> n> {
   list<string> names = n;
diff --git a/test/TableGen/list-element-bitref.td b/test/TableGen/list-element-bitref.td
index 7db3d31167fd..4622f28526eb 100644
--- a/test/TableGen/list-element-bitref.td
+++ b/test/TableGen/list-element-bitref.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class C<list<bits<8>> L> {
   bits<2> V0 = L[0]{1-0};
diff --git a/test/TableGen/math.td b/test/TableGen/math.td
index bde267a652dc..59d16ae908e2 100644
--- a/test/TableGen/math.td
+++ b/test/TableGen/math.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Int<int value> {
   int Value = value;
diff --git a/test/TableGen/nested-comment.td b/test/TableGen/nested-comment.td
index bf030e77a4b0..f8581ceb2ff5 100644
--- a/test/TableGen/nested-comment.td
+++ b/test/TableGen/nested-comment.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen < %s
-// XFAIL: vg_leak
 
 /* foo
 
diff --git a/test/TableGen/pr8330.td b/test/TableGen/pr8330.td
index e6720147890b..7779b635e33c 100644
--- a/test/TableGen/pr8330.td
+++ b/test/TableGen/pr8330.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Or4<bits<8> Val> {
   bits<8> V = {Val{7}, Val{6}, Val{5}, Val{4}, Val{3}, 1, Val{1}, Val{0} };
diff --git a/test/TableGen/strconcat.td b/test/TableGen/strconcat.td
index 0173c49365cc..dfb1a94d82c8 100644
--- a/test/TableGen/strconcat.td
+++ b/test/TableGen/strconcat.td
@@ -1,4 +1,6 @@
-// RUN: llvm-tblgen %s | grep fufoo
+// RUN: llvm-tblgen %s | FileCheck %s
+
+// CHECK: fufoo
 
 class Y<string S> {
   string T = !strconcat(S, "foo");
diff --git a/test/TableGen/subst.td b/test/TableGen/subst.td
index e265b44cf328..34818afaa736 100644
--- a/test/TableGen/subst.td
+++ b/test/TableGen/subst.td
@@ -1,9 +1,5 @@
-// RUN: llvm-tblgen %s | grep "Smith" | count 7
-// RUN: llvm-tblgen %s | grep "Johnson" | count 2
-// RUN: llvm-tblgen %s | grep "FIRST" | count 1
-// RUN: llvm-tblgen %s | grep "LAST" | count 1
-// RUN: llvm-tblgen %s | grep "TVAR" | count 2
-// RUN: llvm-tblgen %s | grep "Bogus" | count 1
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Honorific<string t> {
   string honorific = t;
@@ -27,3 +23,56 @@ def JohnSmith : AName<"FIRST LAST", TVAR>;
 def JaneSmith : AName<"Jane LAST", Ms>;
 def JohnSmithJones : AName<"FIRST LAST-Jones", Mr>;
 def JimmyJohnson : AName<"Jimmy Johnson", Mr>;
+
+// CHECK:      ------------- Classes -----------------
+// CHECK-NEXT: class AName<string AName:name = ?, Honorific AName:honorific = ?> {
+// CHECK-NEXT:   string name = !subst("FIRST", "John", !subst("LAST", "Smith", AName:name));
+// CHECK-NEXT:   Honorific honorific = !subst(TVAR, Mr, AName:honorific);
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: class Honorific<string Honorific:t = ?> {
+// CHECK-NEXT:   string honorific = Honorific:t;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: class Name<string Name:n = ?, Honorific Name:t = ?> {
+// CHECK-NEXT:   string name = Name:n;
+// CHECK-NEXT:   Honorific honorific = Name:t;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: ------------- Defs -----------------
+// CHECK-NEXT: def JaneSmith {
+// CHECK-NEXT:   string name = "Jane Smith";
+// CHECK-NEXT:   Honorific honorific = Ms;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def JimmyJohnson {
+// CHECK-NEXT:   string name = "Jimmy Johnson";
+// CHECK-NEXT:   Honorific honorific = Mr;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def JohnSmith {
+// CHECK-NEXT:   string name = "John Smith";
+// CHECK-NEXT:   Honorific honorific = Mr;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def JohnSmithJones {
+// CHECK-NEXT:   string name = "John Smith-Jones";
+// CHECK-NEXT:   Honorific honorific = Mr;
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Mr 
+// CHECK-NEXT:   string honorific = "Mr.";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Mrs {
+// CHECK-NEXT:   string honorific = "Mrs.";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def Ms {
+// CHECK-NEXT:   string honorific = "Ms.";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
+// CHECK-NEXT: def TVAR {
+// CHECK-NEXT:   string honorific = "Bogus";
+// CHECK-NEXT:   string NAME = ?;
+// CHECK-NEXT: }
diff --git a/test/TableGen/subst2.td b/test/TableGen/subst2.td
index ce7307703dcc..7c007f7db12e 100644
--- a/test/TableGen/subst2.td
+++ b/test/TableGen/subst2.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 // CHECK: No subst
 // CHECK: No foo
 // CHECK: RECURSE foo
diff --git a/test/TableGen/usevalname.td b/test/TableGen/usevalname.td
index a80ba12869e0..d85b98ac33e6 100644
--- a/test/TableGen/usevalname.td
+++ b/test/TableGen/usevalname.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
 
 class Instr<list<dag> pat> {
   list<dag> Pattern = pat;
diff --git a/test/Transforms/ADCE/lit.local.cfg b/test/Transforms/ADCE/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/ADCE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/lit.local.cfg b/test/Transforms/ArgumentPromotion/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/ArgumentPromotion/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
new file mode 100644
index 000000000000..4688a83f2425
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -argpromotion -S
+
+; PR17906
+; When we promote two arguments in a single function with different types,
+; before the fix, we used the same tag for the newly-created two loads.
+; This testing case makes sure that we correctly transfer the tbaa tags from the
+; original loads to the newly-created loads when promoting pointer arguments.
+
+@a = global i32* null, align 8
+@e = global i32** @a, align 8
+@g = global i32 0, align 4
+@c = global i64 0, align 8
+@d = global i8 0, align 1
+
+define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
+entry:
+  %0 = load i64* %p2, align 8, !tbaa !1
+  %conv = trunc i64 %0 to i32
+  %1 = load i32* %p1, align 4, !tbaa !5
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, i8* @d, align 1, !tbaa !7
+  ret void
+}
+
+define i32 @main() {
+entry:
+; CHECK-LABEL: main
+; CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa ![[I32:[0-9]+]]
+; CHECK: %g.val = load i32* @g, align 4, !tbaa ![[I32]]
+; CHECK: %c.val = load i64* @c, align 8, !tbaa ![[LONG:[0-9]+]]
+  %0 = load i32*** @e, align 8, !tbaa !8
+  store i32* @g, i32** %0, align 8, !tbaa !8
+  %1 = load i32** @a, align 8, !tbaa !8
+  store i32 1, i32* %1, align 4, !tbaa !5
+  call fastcc void @fn(i32* @g, i64* @c)
+
+  ret i32 0
+}
+
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"long", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{metadata !6, metadata !6, i64 0}
+!6 = metadata !{metadata !"int", metadata !3, i64 0}
+!7 = metadata !{metadata !3, metadata !3, i64 0}
+!8 = metadata !{metadata !9, metadata !9, i64 0}
+!9 = metadata !{metadata !"any pointer", metadata !3, i64 0}
+; CHECK: ![[I32]] = metadata !{metadata ![[I32_TYPE:[0-9]+]], metadata ![[I32_TYPE]], i64 0}
+; CHECK: ![[I32_TYPE]] = metadata !{metadata !"int", metadata !{{.*}}, i64 0}
+; CHECK: ![[LONG]] = metadata !{metadata ![[LONG_TYPE:[0-9]+]], metadata ![[LONG_TYPE]], i64 0}
+; CHECK: ![[LONG_TYPE]] = metadata !{metadata !"long", metadata !{{.*}}, i64 0}
diff --git a/test/Transforms/BBVectorize/X86/cmp-types.ll b/test/Transforms/BBVectorize/X86/cmp-types.ll
index a4fcbb6048f5..fc1da1b0c609 100644
--- a/test/Transforms/BBVectorize/X86/cmp-types.ll
+++ b/test/Transforms/BBVectorize/X86/cmp-types.ll
@@ -11,6 +11,6 @@ entry:
   %tobool21 = icmp ne %"struct.btSoftBody"* %n2, null
   %cond22 = zext i1 %tobool21 to i32
   ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 }
 
diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll
index bbf565d1cc7f..401808441c26 100644
--- a/test/Transforms/BBVectorize/X86/loop1.ll
+++ b/test/Transforms/BBVectorize/X86/loop1.ll
@@ -7,8 +7,8 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
 entry:
   br label %for.body
-; CHECK: @test1
-; CHECK-UNRL: @test1
+; CHECK-LABEL: @test1(
+; CHECK-UNRL-LABEL: @test1(
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
index 07cc5d8b96b7..42bd0ff14032 100644
--- a/test/Transforms/BBVectorize/X86/pr15289.ll
+++ b/test/Transforms/BBVectorize/X86/pr15289.ll
@@ -45,7 +45,7 @@ entry:
   %13 = fmul double %3, %12
   %14 = fmul double %3, undef
   %15 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
-  store double %13, double* %15, align 8, !tbaa !0
+  store double %13, double* %15, align 8
   %16 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
   %17 = fmul double undef, %8
   %18 = fmul double %17, undef
@@ -54,7 +54,7 @@ entry:
   %21 = fmul double %3, %19
   %22 = fsub double -0.000000e+00, %21
   %23 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
-  store double %22, double* %23, align 8, !tbaa !0
+  store double %22, double* %23, align 8
   %24 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
   %25 = fmul double undef, 0x3FE42F601A8C6794
   %26 = fmul double undef, 2.000000e+00
@@ -62,7 +62,7 @@ entry:
   %28 = fmul double %6, undef
   %29 = fsub double undef, %28
   %30 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
-  store double undef, double* %30, align 8, !tbaa !0
+  store double undef, double* %30, align 8
   %31 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
   %32 = fmul double undef, %17
   %33 = fmul double undef, %17
@@ -71,7 +71,7 @@ entry:
   %36 = fsub double undef, %35
   %37 = fmul double %3, %34
   %38 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
-  store double %37, double* %38, align 8, !tbaa !0
+  store double %37, double* %38, align 8
   %39 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
   %40 = fmul double undef, %8
   %41 = fmul double undef, %40
@@ -79,20 +79,17 @@ entry:
   %43 = fsub double undef, %42
   %44 = fmul double %3, %43
   %45 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
-  store double %13, double* %45, align 8, !tbaa !0
+  store double %13, double* %45, align 8
   %46 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
   %47 = fsub double -0.000000e+00, %14
-  store double %47, double* %16, align 8, !tbaa !0
-  store double undef, double* %24, align 8, !tbaa !0
-  store double -0.000000e+00, double* %31, align 8, !tbaa !0
-  store double undef, double* %39, align 8, !tbaa !0
-  store double undef, double* %46, align 8, !tbaa !0
+  store double %47, double* %16, align 8
+  store double undef, double* %24, align 8
+  store double -0.000000e+00, double* %31, align 8
+  store double undef, double* %39, align 8
+  store double undef, double* %46, align 8
   ret void
 }
 
 attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
-
-!0 = metadata !{metadata !"alias set 17: real(kind=8)", metadata !1}
-!1 = metadata !{metadata !1}
diff --git a/test/Transforms/BBVectorize/X86/sh-rec.ll b/test/Transforms/BBVectorize/X86/sh-rec.ll
index 1e0492c2a8c2..ad75fc96d9f2 100644
--- a/test/Transforms/BBVectorize/X86/sh-rec.ll
+++ b/test/Transforms/BBVectorize/X86/sh-rec.ll
@@ -46,7 +46,7 @@ if.end10:                                         ; preds = %entry
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: @ptoa
+; CHECK-LABEL: @ptoa(
 }
 
 declare noalias i8* @malloc() nounwind
diff --git a/test/Transforms/BBVectorize/X86/sh-rec2.ll b/test/Transforms/BBVectorize/X86/sh-rec2.ll
index ef2239932fa1..d65ac1cc12f1 100644
--- a/test/Transforms/BBVectorize/X86/sh-rec2.ll
+++ b/test/Transforms/BBVectorize/X86/sh-rec2.ll
@@ -77,7 +77,7 @@ entry:
   %and390 = shl i8 %conv3898, 6
   store i8 %and390, i8* %incdec.ptr387, align 1
   unreachable
-; CHECK: @gsm_encode
+; CHECK-LABEL: @gsm_encode(
 }
 
 declare void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
diff --git a/test/Transforms/BBVectorize/X86/sh-rec3.ll b/test/Transforms/BBVectorize/X86/sh-rec3.ll
index fd2cc8bdd91c..ad880ed8895a 100644
--- a/test/Transforms/BBVectorize/X86/sh-rec3.ll
+++ b/test/Transforms/BBVectorize/X86/sh-rec3.ll
@@ -162,7 +162,7 @@ entry:
   %conv365 = trunc i32 %or364 to i8
   store i8 %conv365, i8* %incdec.ptr350, align 1
   unreachable
-; CHECK: @gsm_encode
+; CHECK-LABEL: @gsm_encode(
 }
 
 declare void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
diff --git a/test/Transforms/BBVectorize/X86/sh-types.ll b/test/Transforms/BBVectorize/X86/sh-types.ll
index 0bcb714d5e65..fbff2fb86eb0 100644
--- a/test/Transforms/BBVectorize/X86/sh-types.ll
+++ b/test/Transforms/BBVectorize/X86/sh-types.ll
@@ -18,7 +18,7 @@ define <4 x float> @test7(<4 x float> %A1, <4 x float> %B1, double %C1, double %
 
         %R = fmul <4 x float> %Y1, %Y2
         ret <4 x float> %R
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NOT: <8 x float>
 ; CHECK: ret <4 x float>
 }
diff --git a/test/Transforms/BBVectorize/X86/simple-int.ll b/test/Transforms/BBVectorize/X86/simple-int.ll
index f5dbe46b1480..7842ec85b6c8 100644
--- a/test/Transforms/BBVectorize/X86/simple-int.ll
+++ b/test/Transforms/BBVectorize/X86/simple-int.ll
@@ -16,7 +16,7 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1,
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret double %R
 }
 
@@ -30,7 +30,7 @@ define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test1a
+; CHECK-LABEL: @test1a(
 ; CHECK: ret double %R
 }
 
@@ -44,7 +44,7 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret double %R
 }
 
@@ -58,7 +58,7 @@ define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret double %R
 }
 
@@ -73,7 +73,7 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret double %R
 }
 
diff --git a/test/Transforms/BBVectorize/X86/simple-ldstr.ll b/test/Transforms/BBVectorize/X86/simple-ldstr.ll
index 0124399bad9d..1abbc34b68ec 100644
--- a/test/Transforms/BBVectorize/X86/simple-ldstr.ll
+++ b/test/Transforms/BBVectorize/X86/simple-ldstr.ll
@@ -16,7 +16,7 @@ entry:
   %arrayidx5 = getelementptr inbounds double* %c, i64 1
   store double %mul5, double* %arrayidx5, align 8
   ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
 ; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll
index 8abfa5f8bd29..a11e3090f205 100644
--- a/test/Transforms/BBVectorize/X86/simple.ll
+++ b/test/Transforms/BBVectorize/X86/simple.ll
@@ -11,7 +11,7 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: fsub <2 x double>
 ; CHECK: fmul <2 x double>
 ; CHECK: fadd <2 x double>
@@ -38,7 +38,7 @@ define double @test1a(double %A1, double %A2, double %B1, double %B2) {
 	%S2 = fadd double %W2, %Q2
 	%R  = fmul double %S1, %S2
 	ret double %R
-; CHECK: @test1a
+; CHECK-LABEL: @test1a(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -66,7 +66,7 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%Z2 = fadd double %Y1, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: insertelement
 ; CHECK: insertelement
 ; CHECK: insertelement
@@ -88,7 +88,7 @@ define double @test4(double %A1, double %A2, double %B1, double %B2) {
 	%W2 = fadd double %Y1, %Z2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: insertelement
 ; CHECK: insertelement
 ; CHECK: insertelement
@@ -113,7 +113,7 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
         %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
 	%R  = mul <8 x i8> %Q1, %Q2
 	ret <8 x i8> %R
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NOT: sub <16 x i8>
 ; CHECK: ret <8 x i8>
 }
diff --git a/test/Transforms/BBVectorize/X86/vs-cast.ll b/test/Transforms/BBVectorize/X86/vs-cast.ll
index be3efca925b8..0c666b11976c 100644
--- a/test/Transforms/BBVectorize/X86/vs-cast.ll
+++ b/test/Transforms/BBVectorize/X86/vs-cast.ll
@@ -7,6 +7,6 @@ entry:
   %0 = bitcast <2 x i64> undef to i128
   %1 = bitcast <2 x i64> undef to i128
   ret void
-; CHECK: @main
+; CHECK-LABEL: @main(
 }
 
diff --git a/test/Transforms/BBVectorize/X86/wr-aliases.ll b/test/Transforms/BBVectorize/X86/wr-aliases.ll
new file mode 100644
index 000000000000..34b1d4e9cae3
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -0,0 +1,144 @@
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -bb-vectorize -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.QBezier.15 = type { double, double, double, double, double, double, double, double }
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+
+; Function Attrs: uwtable
+declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+define void @main_arrayctor.cont([10 x %class.QBezier.15]* %beziers, %class.QBezier.15* %agg.tmp.i, %class.QBezier.15* %agg.tmp55.i, %class.QBezier.15* %agg.tmp56.i) {
+newFuncRoot:
+  br label %arrayctor.cont
+
+arrayctor.cont.ret.exitStub:                      ; preds = %arrayctor.cont
+  ret void
+
+; CHECK-LABEL: @main_arrayctor.cont
+; CHECK: <2 x double>
+; CHECK: @_ZL12printQBezier7QBezier
+; CHECK: store double %mul8.i, double* %x3.i, align 16
+; CHECK: load double* %x3.i, align 16
+; CHECK: ret
+
+arrayctor.cont:                                   ; preds = %newFuncRoot
+  %ref.tmp.sroa.0.0.idx = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
+  store double 1.000000e+01, double* %ref.tmp.sroa.0.0.idx, align 16
+  %ref.tmp.sroa.2.0.idx1 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
+  store double 2.000000e+01, double* %ref.tmp.sroa.2.0.idx1, align 8
+  %ref.tmp.sroa.3.0.idx2 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
+  store double 3.000000e+01, double* %ref.tmp.sroa.3.0.idx2, align 16
+  %ref.tmp.sroa.4.0.idx3 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
+  store double 4.000000e+01, double* %ref.tmp.sroa.4.0.idx3, align 8
+  %ref.tmp.sroa.5.0.idx4 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
+  store double 5.000000e+01, double* %ref.tmp.sroa.5.0.idx4, align 16
+  %ref.tmp.sroa.6.0.idx5 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
+  store double 6.000000e+01, double* %ref.tmp.sroa.6.0.idx5, align 8
+  %ref.tmp.sroa.7.0.idx6 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
+  store double 7.000000e+01, double* %ref.tmp.sroa.7.0.idx6, align 16
+  %ref.tmp.sroa.8.0.idx7 = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
+  store double 8.000000e+01, double* %ref.tmp.sroa.8.0.idx7, align 8
+  %add.ptr = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1
+  %v0 = bitcast %class.QBezier.15* %agg.tmp.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v0)
+  %v1 = bitcast %class.QBezier.15* %agg.tmp55.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v1)
+  %v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %v2)
+  %v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
+  %x2.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
+  %v4 = load double* %x2.i, align 16
+  %x3.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
+  %v5 = load double* %x3.i, align 16
+  %add.i = fadd double %v4, %v5
+  %mul.i = fmul double 5.000000e-01, %add.i
+  %x1.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
+  %v6 = load double* %x1.i, align 16
+  %add3.i = fadd double %v4, %v6
+  %mul4.i = fmul double 5.000000e-01, %add3.i
+  %x25.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 2
+  store double %mul4.i, double* %x25.i, align 16
+  %v7 = load double* %x3.i, align 16
+  %x4.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
+  %v8 = load double* %x4.i, align 16
+  %add7.i = fadd double %v7, %v8
+  %mul8.i = fmul double 5.000000e-01, %add7.i
+  store double %mul8.i, double* %x3.i, align 16
+  %v9 = load double* %x1.i, align 16
+  %x111.i = getelementptr inbounds %class.QBezier.15* %add.ptr, i64 0, i32 0
+  store double %v9, double* %x111.i, align 16
+  %v10 = load double* %x25.i, align 16
+  %add15.i = fadd double %mul.i, %v10
+  %mul16.i = fmul double 5.000000e-01, %add15.i
+  %x317.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 4
+  store double %mul16.i, double* %x317.i, align 16
+  %v11 = load double* %x3.i, align 16
+  %add19.i = fadd double %mul.i, %v11
+  %mul20.i = fmul double 5.000000e-01, %add19.i
+  store double %mul20.i, double* %x2.i, align 16
+  %v12 = load double* %x317.i, align 16
+  %add24.i = fadd double %v12, %mul20.i
+  %mul25.i = fmul double 5.000000e-01, %add24.i
+  store double %mul25.i, double* %x1.i, align 16
+  %x427.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 6
+  store double %mul25.i, double* %x427.i, align 16
+  %y2.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
+  %v13 = load double* %y2.i, align 8
+  %y3.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
+  %v14 = load double* %y3.i, align 8
+  %add28.i = fadd double %v13, %v14
+  %div.i = fmul double 5.000000e-01, %add28.i
+  %y1.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
+  %v15 = load double* %y1.i, align 8
+  %add30.i = fadd double %v13, %v15
+  %mul31.i = fmul double 5.000000e-01, %add30.i
+  %y232.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 3
+  store double %mul31.i, double* %y232.i, align 8
+  %v16 = load double* %y3.i, align 8
+  %y4.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
+  %v17 = load double* %y4.i, align 8
+  %add34.i = fadd double %v16, %v17
+  %mul35.i = fmul double 5.000000e-01, %add34.i
+  store double %mul35.i, double* %y3.i, align 8
+  %v18 = load double* %y1.i, align 8
+  %y138.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 1
+  store double %v18, double* %y138.i, align 8
+  %v19 = load double* %y232.i, align 8
+  %add42.i = fadd double %div.i, %v19
+  %mul43.i = fmul double 5.000000e-01, %add42.i
+  %y344.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 5
+  store double %mul43.i, double* %y344.i, align 8
+  %v20 = load double* %y3.i, align 8
+  %add46.i = fadd double %div.i, %v20
+  %mul47.i = fmul double 5.000000e-01, %add46.i
+  store double %mul47.i, double* %y2.i, align 8
+  %v21 = load double* %y344.i, align 8
+  %add51.i = fadd double %v21, %mul47.i
+  %mul52.i = fmul double 5.000000e-01, %add51.i
+  store double %mul52.i, double* %y1.i, align 8
+  %y454.i = getelementptr inbounds [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
+  store double %mul52.i, double* %y454.i, align 8
+  %v22 = bitcast %class.QBezier.15* %add.ptr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
+  call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
+  call void @llvm.lifetime.end(i64 64, i8* %v0)
+  call void @llvm.lifetime.end(i64 64, i8* %v1)
+  call void @llvm.lifetime.end(i64 64, i8* %v2)
+  br label %arrayctor.cont.ret.exitStub
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
index bdcb30da887f..6bfa625ea5f0 100644
--- a/test/Transforms/BBVectorize/cycle.ll
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -105,7 +105,7 @@ go:
   br i1 %or.cond, label %done, label %go
 done:
   ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: go:
 ; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
 ; FIXME: When tree pruning is deterministic, include the entire output.
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
index ea5cb5dd93f7..9c79eef05f7b 100644
--- a/test/Transforms/BBVectorize/ld1.ll
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -22,7 +22,7 @@ entry:
   %add15 = fadd double %mul13, %i5
   %mul16 = fmul double %add11, %add15
   ret double %mul16
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
 ; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
index a8ad0f1a28b2..ba763cf03ffc 100644
--- a/test/Transforms/BBVectorize/lit.local.cfg
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
index e592edb44a02..ed7be15f7adf 100644
--- a/test/Transforms/BBVectorize/loop1.ll
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -7,8 +7,8 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
 entry:
   br label %for.body
-; CHECK: @test1
-; CHECK-UNRL: @test1
+; CHECK-LABEL: @test1(
+; CHECK-UNRL-LABEL: @test1(
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
diff --git a/test/Transforms/BBVectorize/mem-op-depth.ll b/test/Transforms/BBVectorize/mem-op-depth.ll
index 84f16bd2f47d..c31d4521183f 100644
--- a/test/Transforms/BBVectorize/mem-op-depth.ll
+++ b/test/Transforms/BBVectorize/mem-op-depth.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @B = common global [1024 x float] zeroinitializer, align 16
 
 define i32 @test1() nounwind {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   %V1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
   %V2 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 1), align 4
   %V3= load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 2), align 8
diff --git a/test/Transforms/BBVectorize/metadata.ll b/test/Transforms/BBVectorize/metadata.ll
index 1e3aaa127a0e..ac7297dd5417 100644
--- a/test/Transforms/BBVectorize/metadata.ll
+++ b/test/Transforms/BBVectorize/metadata.ll
@@ -16,7 +16,7 @@ entry:
   %arrayidx5 = getelementptr inbounds double* %c, i64 1
   store double %mul5, double* %arrayidx5, align 8
   ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: !fpmath
 ; CHECK: ret void
 }
@@ -36,7 +36,7 @@ entry:
   %arrayidx5 = getelementptr inbounds i64* %c, i64 1
   store i64 %mul5, i64* %arrayidx5, align 8
   ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: !range
 ; CHECK: ret void
 }
diff --git a/test/Transforms/BBVectorize/no-ldstr-conn.ll b/test/Transforms/BBVectorize/no-ldstr-conn.ll
index ada2a71e36ec..bcc5ce7531bd 100644
--- a/test/Transforms/BBVectorize/no-ldstr-conn.ll
+++ b/test/Transforms/BBVectorize/no-ldstr-conn.ll
@@ -17,7 +17,7 @@ entry:
   store i64 %v3a, i64* %a3, align 8
   %r = add i64 %v2, %v3
   ret i64 %r
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: getelementptr <2 x i64*>
 }
 
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
index e0120059b952..2675354183a6 100644
--- a/test/Transforms/BBVectorize/req-depth.ll
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -9,8 +9,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
 	%Y2 = fmul double %X2, %A2
 	%R  = fmul double %Y1, %Y2
 	ret double %R
-; CHECK-RD3: @test1
-; CHECK-RD2: @test1
+; CHECK-RD3-LABEL: @test1(
+; CHECK-RD2-LABEL: @test1(
 ; CHECK-RD3-NOT: <2 x double>
 ; CHECK-RD2: <2 x double>
 }
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
index a694e45bc181..be38d3402603 100644
--- a/test/Transforms/BBVectorize/search-limit.ll
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -3,8 +3,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
 
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK: @test1
-; CHECK-SL4: @test1
+; CHECK-LABEL: @test1(
+; CHECK-SL4-LABEL: @test1(
 ; CHECK-SL4-NOT: <2 x double>
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index e4d51526ca11..e33ac612edc2 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -16,7 +16,7 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1,
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -42,7 +42,7 @@ define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test1a
+; CHECK-LABEL: @test1a(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -68,7 +68,7 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -93,7 +93,7 @@ define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -119,7 +119,7 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 	%Z2 = fadd double %Y2, %B2
 	%R  = fmul double %Z1, %Z2
 	ret double %R
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: <2 x double>
 ; CHECK: ret double %R
 }
diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
index d46f7692b6d3..4d2298c1a11d 100644
--- a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
@@ -27,7 +27,7 @@ entry:
   %arrayidx5 = getelementptr inbounds i64* %c, i64 1
   store i64 %mul5, i64* %arrayidx5, align 8
   ret double %r
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %i0.v.i0 = bitcast i64* %a to <2 x i64>*
 ; CHECK: %i1.v.i0 = bitcast i64* %b to <2 x i64>*
 ; CHECK: %i0 = load <2 x i64>* %i0.v.i0, align 8
@@ -43,7 +43,7 @@ entry:
 ; CHECK: %0 = bitcast i64* %c to <2 x i64>*
 ; CHECK: store <2 x i64> %mul, <2 x i64>* %0, align 8
 ; CHECK: ret double %r
-; CHECK-AO: @test1
+; CHECK-AO-LABEL: @test1(
 ; CHECK-AO-NOT: load <2 x
 }
 
@@ -64,7 +64,7 @@ entry:
   %arrayidx5 = getelementptr inbounds i64** %c, i64 1
   store i64* %ptr3, i64** %arrayidx5, align 8
   ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %i0.v.i0 = bitcast i64** %a to <2 x i64*>*
 ; CHECK: %i1 = load i64** %b, align 8
 ; CHECK: %i0 = load <2 x i64*>* %i0.v.i0, align 8
@@ -78,7 +78,7 @@ entry:
 ; CHECK: %0 = bitcast i64** %c to <2 x i64*>*
 ; CHECK: store <2 x i64*> %ptr0, <2 x i64*>* %0, align 8
 ; CHECK: ret void
-; CHECK-AO: @test2
+; CHECK-AO-LABEL: @test2(
 ; CHECK-AO-NOT: <2 x
 }
 
@@ -108,7 +108,7 @@ entry:
   %arrayidx5 = getelementptr inbounds <2 x i64*>* %c, i64 1
   store <2 x i64*> %rtr3, <2 x i64*>* %arrayidx5, align 8
   ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>*
 ; CHECK: %i1 = load <2 x i64*>* %b, align 8
 ; CHECK: %i0 = load <4 x i64*>* %i0.v.i0, align 8
@@ -128,7 +128,7 @@ entry:
 ; CHECK: %1 = shufflevector <2 x i64*> %rtr0, <2 x i64*> %rtr3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK: store <4 x i64*> %1, <4 x i64*>* %0, align 8
 ; CHECK: ret void
-; CHECK-AO: @test3
+; CHECK-AO-LABEL: @test3(
 ; CHECK-AO-NOT: <4 x
 }
 
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
index 8e51d297e8ec..558f8b38d794 100644
--- a/test/Transforms/BBVectorize/simple-ldstr.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -17,7 +17,7 @@ entry:
   %arrayidx5 = getelementptr inbounds double* %c, i64 1
   store double %mul5, double* %arrayidx5, align 8
   ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
 ; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
@@ -26,7 +26,7 @@ entry:
 ; CHECK: %0 = bitcast double* %c to <2 x double>*
 ; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
 ; CHECK: ret void
-; CHECK-AO: @test1
+; CHECK-AO-LABEL: @test1(
 ; CHECK-AO-NOT: <2 x double>
 }
 
@@ -49,7 +49,7 @@ entry:
   %arrayidx5 = getelementptr inbounds double* %c, i64 1
   store double %mul5, double* %arrayidx5, align 8
   ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
 ; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
 ; CHECK: %i0f = load <2 x float>* %i0f.v.i0, align 4
@@ -60,7 +60,7 @@ entry:
 ; CHECK: %0 = bitcast double* %c to <2 x double>*
 ; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
 ; CHECK: ret void
-; CHECK-AO: @test2
+; CHECK-AO-LABEL: @test2(
 ; CHECK-AO-NOT: <2 x double>
 }
 
@@ -81,7 +81,7 @@ entry:
   %arrayidx5 = getelementptr inbounds float* %c, i64 1
   store float %mul5f, float* %arrayidx5, align 4
   ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
 ; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
@@ -91,7 +91,7 @@ entry:
 ; CHECK: %0 = bitcast float* %c to <2 x float>*
 ; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
 ; CHECK: ret void
-; CHECK-AO: @test3
+; CHECK-AO-LABEL: @test3(
 ; CHECK-AO: %i0 = load double* %a, align 8
 ; CHECK-AO: %i1 = load double* %b, align 8
 ; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1
@@ -134,9 +134,9 @@ if.then:
 
 if.end:
   ret void
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: <2 x double>
-; CHECK-AO: @test4
+; CHECK-AO-LABEL: @test4(
 ; CHECK-AO-NOT: <2 x double>
 }
 
@@ -155,7 +155,7 @@ entry:
   store double %mul5, double* %arrayidx5, align 8
   store double %mul, double* %c, align 4
   ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
 ; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
@@ -164,7 +164,7 @@ entry:
 ; CHECK: %0 = bitcast double* %c to <2 x double>*
 ; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4
 ; CHECK: ret void
-; CHECK-AO: @test5
+; CHECK-AO-LABEL: @test5(
 ; CHECK-AO-NOT: <2 x double>
 }
 
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
index 8caccfd32c34..269b07f82d19 100644
--- a/test/Transforms/BBVectorize/simple-sel.ll
+++ b/test/Transforms/BBVectorize/simple-sel.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Basic depth-3 chain with select
 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -30,8 +30,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1
 
 ; Basic depth-3 chain with select (and vect. compare)
 define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK: @test2
-; CHECK-NB: @test2
+; CHECK-LABEL: @test2(
+; CHECK-NB-LABEL: @test2(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
diff --git a/test/Transforms/BBVectorize/simple-tst.ll b/test/Transforms/BBVectorize/simple-tst.ll
index 42146c6d14a2..6a88e1b09c1b 100644
--- a/test/Transforms/BBVectorize/simple-tst.ll
+++ b/test/Transforms/BBVectorize/simple-tst.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-unknown-linux"
 
 ; Basic depth-3 chain (target-specific type should not vectorize)
 define ppc_fp128 @test7(ppc_fp128 %A1, ppc_fp128 %A2, ppc_fp128 %B1, ppc_fp128 %B2) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NOT: <2 x ppc_fp128>
 	%X1 = fsub ppc_fp128 %A1, %B1
 	%X2 = fsub ppc_fp128 %A2, %B2
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
index a447908d16cc..0fe33f17a646 100644
--- a/test/Transforms/BBVectorize/simple.ll
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -27,7 +27,7 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
 
 ; Basic depth-3 chain (last pair permuted)
 define double @test2(double %A1, double %A2, double %B1, double %B2) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -53,7 +53,7 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
 
 ; Basic depth-3 chain (last pair first splat)
 define double @test3(double %A1, double %A2, double %B1, double %B2) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -78,7 +78,7 @@ define double @test3(double %A1, double %A2, double %B1, double %B2) {
 
 ; Basic depth-3 chain (last pair second splat)
 define double @test4(double %A1, double %A2, double %B1, double %B2) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -103,7 +103,7 @@ define double @test4(double %A1, double %A2, double %B1, double %B2) {
 
 ; Basic depth-3 chain
 define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 	%X1 = fsub <2 x float> %A1, %B1
@@ -125,7 +125,7 @@ define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2
 
 ; Basic chain with shuffles
 define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 	%X1 = sub <8 x i8> %A1, %B1
@@ -151,7 +151,7 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
 
 ; Basic depth-3 chain (flipped order)
 define double @test7(double %A1, double %A2, double %B1, double %B2) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
@@ -175,7 +175,7 @@ define double @test7(double %A1, double %A2, double %B1, double %B2) {
 
 ; Basic depth-3 chain (subclass data)
 define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
 ; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
 ; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
index 78bcc9f83080..6edf7f07ac1d 100644
--- a/test/Transforms/BBVectorize/simple3.ll
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0
 ; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1
 ; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2
diff --git a/test/Transforms/BBVectorize/xcore/no-vector-registers.ll b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
new file mode 100644
index 000000000000..9ebdb7368a35
--- /dev/null
+++ b/test/Transforms/BBVectorize/xcore/no-vector-registers.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S -mtriple=xcore | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK-LABEL: @test1(
+; CHECK-NOT: <2 x double>
+  %X1 = fsub double %A1, %B1
+  %X2 = fsub double %A2, %B2
+  %Y1 = fmul double %X1, %A1
+  %Y2 = fmul double %X2, %A2
+  %Z1 = fadd double %Y1, %B1
+  %Z2 = fadd double %Y2, %B2
+  %R  = fmul double %Z1, %Z2
+  ret double %R
+}
diff --git a/test/Transforms/BlockPlacement/basictest.ll b/test/Transforms/BlockPlacement/basictest.ll
deleted file mode 100644
index 47b507903bce..000000000000
--- a/test/Transforms/BlockPlacement/basictest.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -block-placement -disable-output -print-function 2> /dev/null
-
-define i32 @test() {
-        br i1 true, label %X, label %Y
-
-A:              ; preds = %Y, %X
-        ret i32 0
-
-X:              ; preds = %0
-        br label %A
-
-Y:              ; preds = %0
-        br label %A
-}
-
diff --git a/test/Transforms/BlockPlacement/lit.local.cfg b/test/Transforms/BlockPlacement/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/BlockPlacement/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeExtractor/lit.local.cfg b/test/Transforms/CodeExtractor/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/CodeExtractor/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index d617e43be865..495d910b5cd6 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; objectsize should fold to a constant, which causes the branch to fold to an
 ; uncond branch. Next, we fold the control flow alltogether.
 ; rdar://8785296
diff --git a/test/Transforms/CodeGenPrepare/lit.local.cfg b/test/Transforms/CodeGenPrepare/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/CodeGenPrepare/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index 09e6e7db437a..afe6ef91240d 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -16,7 +16,7 @@ BB2:
         br label %BB3
 
 BB3:     
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %Ret = phi i32 [ 0, %BB1 ], [ 1, %BB2 ]
         %Ret = phi i32 [ %Val, %BB1 ], [ 1, %BB2 ] 
         ret i32 %Ret
@@ -31,12 +31,12 @@ entry:
 bb:
   ret i1 %V
   
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i1 true
 }
 
 define i1 @TNAN() {
-; CHECK: @TNAN
+; CHECK-LABEL: @TNAN(
 ; CHECK: ret i1 true
   %A = fcmp uno double 0x7FF8000000000000, 1.000000e+00
   %B = fcmp uno double 1.230000e+02, 1.000000e+00
@@ -47,7 +47,7 @@ define i1 @TNAN() {
 define i128 @vector_to_int_cast() {
   %A = bitcast <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824> to i128
   ret i128 %A
-; CHECK: @vector_to_int_cast
+; CHECK-LABEL: @vector_to_int_cast(
 ; CHECK: ret i128 85070591750041656499021422275829170176
 }
-  
-\ No newline at end of file
+  
diff --git a/test/Transforms/ConstProp/bitcast.ll b/test/Transforms/ConstProp/bitcast.ll
index 53239c7e4fe0..5e1581d801c7 100644
--- a/test/Transforms/ConstProp/bitcast.ll
+++ b/test/Transforms/ConstProp/bitcast.ll
@@ -4,7 +4,7 @@
 define <1 x i64> @test1() {
   %A = bitcast i64 63 to <1 x i64>
   ret <1 x i64> %A
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret <1 x i64> <i64 63>
 }
 
diff --git a/test/Transforms/ConstProp/bswap.ll b/test/Transforms/ConstProp/bswap.ll
index a68fdcd4581e..f601deb73db2 100644
--- a/test/Transforms/ConstProp/bswap.ll
+++ b/test/Transforms/ConstProp/bswap.ll
@@ -10,28 +10,28 @@ declare i64 @llvm.bswap.i64(i64)
 
 declare i80 @llvm.bswap.i80(i80)
 
-; CHECK: define i16 @W
+; CHECK-LABEL: define i16 @W(
 define i16 @W() {
         ; CHECK: ret i16 256
         %Z = call i16 @llvm.bswap.i16( i16 1 )          ; <i16> [#uses=1]
         ret i16 %Z
 }
 
-; CHECK: define i32 @X
+; CHECK-LABEL: define i32 @X(
 define i32 @X() {
         ; CHECK: ret i32 16777216
         %Z = call i32 @llvm.bswap.i32( i32 1 )          ; <i32> [#uses=1]
         ret i32 %Z
 }
 
-; CHECK: define i64 @Y
+; CHECK-LABEL: define i64 @Y(
 define i64 @Y() {
         ; CHECK: ret i64 72057594037927936
         %Z = call i64 @llvm.bswap.i64( i64 1 )          ; <i64> [#uses=1]
         ret i64 %Z
 }
 
-; CHECK: define i80 @Z
+; CHECK-LABEL: define i80 @Z(
 define i80 @Z() {
         ; CHECK: ret i80 -450681596205739728166896
         ;                0xA0908070605040302010
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 7a405a539c51..7541418b06ec 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -11,7 +11,7 @@ declare double @sqrt(double)
 declare double @exp2(double)
 
 define double @T() {
-; CHECK: @T
+; CHECK-LABEL: @T(
 ; CHECK-NOT: call
 ; CHECK: ret
   %A = call double @cos(double 0.000000e+00)
@@ -29,7 +29,7 @@ define double @T() {
 }
 
 define i1 @test_sse_cvt() nounwind readnone {
-; CHECK: @test_sse_cvt
+; CHECK-LABEL: @test_sse_cvt(
 ; CHECK-NOT: call
 ; CHECK: ret i1 true
 entry:
@@ -63,7 +63,7 @@ declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
 
 define double @test_intrinsic_pow() nounwind uwtable ssp {
 entry:
-; CHECK: @test_intrinsic_pow
+; CHECK-LABEL: @test_intrinsic_pow(
 ; CHECK-NOT: call
   %0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
   ret double %0
@@ -72,7 +72,7 @@ declare double @llvm.pow.f64(double, double) nounwind readonly
 
 ; Shouldn't fold because of -fno-builtin
 define double @sin_() nounwind uwtable ssp {
-; FNOBUILTIN: @sin_
+; FNOBUILTIN-LABEL: @sin_(
 ; FNOBUILTIN: %1 = call double @sin(double 3.000000e+00)
   %1 = call double @sin(double 3.000000e+00)
   ret double %1
@@ -80,7 +80,7 @@ define double @sin_() nounwind uwtable ssp {
 
 ; Shouldn't fold because of -fno-builtin
 define double @sqrt_() nounwind uwtable ssp {
-; FNOBUILTIN: @sqrt_
+; FNOBUILTIN-LABEL: @sqrt_(
 ; FNOBUILTIN: %1 = call double @sqrt(double 3.000000e+00)
   %1 = call double @sqrt(double 3.000000e+00)
   ret double %1
@@ -88,7 +88,7 @@ define double @sqrt_() nounwind uwtable ssp {
 
 ; Shouldn't fold because of -fno-builtin
 define float @sqrtf_() nounwind uwtable ssp {
-; FNOBUILTIN: @sqrtf_
+; FNOBUILTIN-LABEL: @sqrtf_(
 ; FNOBUILTIN: %1 = call float @sqrtf(float 3.000000e+00)
   %1 = call float @sqrtf(float 3.000000e+00)
   ret float %1
@@ -97,7 +97,7 @@ declare float @sqrtf(float)
 
 ; Shouldn't fold because of -fno-builtin
 define float @sinf_() nounwind uwtable ssp {
-; FNOBUILTIN: @sinf_
+; FNOBUILTIN-LABEL: @sinf_(
 ; FNOBUILTIN: %1 = call float @sinf(float 3.000000e+00)
   %1 = call float @sinf(float 3.000000e+00)
   ret float %1
@@ -106,7 +106,7 @@ declare float @sinf(float)
 
 ; Shouldn't fold because of -fno-builtin
 define double @tan_() nounwind uwtable ssp {
-; FNOBUILTIN: @tan_
+; FNOBUILTIN-LABEL: @tan_(
 ; FNOBUILTIN: %1 = call double @tan(double 3.000000e+00)
   %1 = call double @tan(double 3.000000e+00)
   ret double %1
@@ -114,7 +114,7 @@ define double @tan_() nounwind uwtable ssp {
 
 ; Shouldn't fold because of -fno-builtin
 define double @tanh_() nounwind uwtable ssp {
-; FNOBUILTIN: @tanh_
+; FNOBUILTIN-LABEL: @tanh_(
 ; FNOBUILTIN: %1 = call double @tanh(double 3.000000e+00)
   %1 = call double @tanh(double 3.000000e+00)
   ret double %1
@@ -123,7 +123,7 @@ declare double @tanh(double)
 
 ; Shouldn't fold because of -fno-builtin
 define double @pow_() nounwind uwtable ssp {
-; FNOBUILTIN: @pow_
+; FNOBUILTIN-LABEL: @pow_(
 ; FNOBUILTIN: %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
   %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
   ret double %1
@@ -132,7 +132,7 @@ declare double @pow(double, double)
 
 ; Shouldn't fold because of -fno-builtin
 define double @fmod_() nounwind uwtable ssp {
-; FNOBUILTIN: @fmod_
+; FNOBUILTIN-LABEL: @fmod_(
 ; FNOBUILTIN: %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
   %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
   ret double %1
@@ -141,7 +141,7 @@ declare double @fmod(double, double)
 
 ; Shouldn't fold because of -fno-builtin
 define double @atan2_() nounwind uwtable ssp {
-; FNOBUILTIN: @atan2_
+; FNOBUILTIN-LABEL: @atan2_(
 ; FNOBUILTIN: %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
   %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
   ret double %1
diff --git a/test/Transforms/ConstProp/extractvalue.ll b/test/Transforms/ConstProp/extractvalue.ll
index f947b22f23ae..72d6cb714a54 100644
--- a/test/Transforms/ConstProp/extractvalue.ll
+++ b/test/Transforms/ConstProp/extractvalue.ll
@@ -5,21 +5,21 @@
 define i32 @test1() {
   %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 0
   ret i32 %A
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 2
 }
 
 define i8 @test2() {
   %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 1, 2
   ret i8 %A
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i8 111
 }
 
 define i32 @test3() {
   %A = extractvalue [3 x %struct] [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], 1, 0
   ret i32 %A
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 1
 }
 
diff --git a/test/Transforms/ConstProp/insertvalue.ll b/test/Transforms/ConstProp/insertvalue.ll
index a4b7bb1019c2..0d288b3841df 100644
--- a/test/Transforms/ConstProp/insertvalue.ll
+++ b/test/Transforms/ConstProp/insertvalue.ll
@@ -5,21 +5,21 @@
 define %struct @test1() {
   %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i32 1, 0
   ret %struct %A
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret %struct { i32 1, [4 x i8] c"foo\00" }
 }
 
 define %struct @test2() {
   %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i8 1, 1, 2
   ret %struct %A
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret %struct { i32 2, [4 x i8] c"fo\01\00" }
 }
 
 define [3 x %struct] @test3() {
   %A = insertvalue [3 x %struct] [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], i32 -1, 1, 0
   ret [3 x %struct] %A
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK:ret [3 x %struct] [%struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 -1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" }]
 }
 
diff --git a/test/Transforms/ConstProp/lit.local.cfg b/test/Transforms/ConstProp/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/ConstProp/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 6794288a0ef2..d05db47dcaaa 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -default-data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
-; RUN: opt < %s -default-data-layout="E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
+; RUN: opt < %s -default-data-layout="e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -default-data-layout="E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
 
 ; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE}
 @g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
@@ -13,11 +13,11 @@ define i32 @test1() {
   ret i32 %r
 
 ; 0xDEADBEEF
-; LE: @test1
+; LE-LABEL: @test1(
 ; LE: ret i32 -559038737
 
 ; 0xDEADBEEF
-; BE: @test1
+; BE-LABEL: @test1(
 ; BE: ret i32 -559038737
 }
 
@@ -28,11 +28,11 @@ define i16 @test2() {
   ret i16 %r
 
 ; 0xBEEF
-; LE: @test2
+; LE-LABEL: @test2(
 ; LE: ret i16 -16657
 
 ; 0xDEAD
-; BE: @test2
+; BE-LABEL: @test2(
 ; BE: ret i16 -8531
 }
 
@@ -42,11 +42,11 @@ define i16 @test3() {
   ret i16 %r
 
 ; 0xDEAD
-; LE: @test3
+; LE-LABEL: @test3(
 ; LE: ret i16 -8531
 
 ; 0xBEEF
-; BE: @test3
+; BE-LABEL: @test3(
 ; BE: ret i16 -16657
 }
 
@@ -56,11 +56,11 @@ define i16 @test4() {
   ret i16 %r
 
 ; 0x00BA
-; LE: @test4
+; LE-LABEL: @test4(
 ; LE: ret i16 186
 
 ; 0xBA00
-; BE: @test4
+; BE-LABEL: @test4(
 ; BE: ret i16 -17920
 }
 
@@ -70,11 +70,11 @@ define i64 @test6() {
   ret i64 %r
 
 ; 0x3FF_0000000000000
-; LE: @test6
+; LE-LABEL: @test6(
 ; LE: ret i64 4607182418800017408
 
 ; 0x3FF_0000000000000
-; BE: @test6
+; BE-LABEL: @test6(
 ; BE: ret i64 4607182418800017408
 }
 
@@ -84,11 +84,11 @@ define i16 @test7() {
   ret i16 %r
 
 ; 0x0000
-; LE: @test7
+; LE-LABEL: @test7(
 ; LE: ret i16 0
 
 ; 0x3FF0
-; BE: @test7
+; BE-LABEL: @test7(
 ; BE: ret i16 16368
 }
 
@@ -97,10 +97,10 @@ define double @test8() {
   %r = load double* bitcast({{i32,i8},i32}* @g1 to double*)
   ret double %r
 
-; LE: @test8
+; LE-LABEL: @test8(
 ; LE: ret double 0xBADEADBEEF
 
-; BE: @test8
+; BE-LABEL: @test8(
 ; BE: ret double 0xDEADBEEFBA000000
 }
 
@@ -111,11 +111,11 @@ define i128 @test9() {
   ret i128 %r
 
 ; 0x00000000_06B1BFF8_00000000_0000007B
-; LE: @test9
+; LE-LABEL: @test9(
 ; LE: ret i128 2071796475790618158476296315
 
 ; 0x00000000_0000007B_00000000_06B1BFF8
-; BE: @test9
+; BE-LABEL: @test9(
 ; BE: ret i128 2268949521066387161080
 }
 
@@ -124,10 +124,10 @@ define <2 x i64> @test10() {
   %r = load <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*)
   ret <2 x i64> %r
 
-; LE: @test10
+; LE-LABEL: @test10(
 ; LE: ret <2 x i64> <i64 123, i64 112312312>
 
-; BE: @test10
+; BE-LABEL: @test10(
 ; BE: ret <2 x i64> <i64 123, i64 112312312>
 }
 
@@ -142,11 +142,11 @@ entry:
   ret i16 %a
 
 ; 0x08A1
-; LE: @test11
+; LE-LABEL: @test11(
 ; LE: ret i16 2209
 
 ; 0xA108
-; BE: @test11
+; BE-LABEL: @test11(
 ; BE: ret i16 -24312
 }
 
@@ -155,15 +155,15 @@ entry:
 @test12g = private constant [6 x i8] c"a\00b\00\00\00"
 
 define i16 @test12() {
-  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1) 
+  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1)
   ret i16 %a
 
 ; 0x0062
-; LE: @test12
+; LE-LABEL: @test12(
 ; LE: ret i16 98
 
 ; 0x6200
-; BE: @test12
+; BE-LABEL: @test12(
 ; BE: ret i16 25088
 }
 
@@ -174,10 +174,10 @@ define i1 @test13() {
   %A = load i1* bitcast (i8* @g5 to i1*)
   ret i1 %A
 
-; LE: @test13
+; LE-LABEL: @test13(
 ; LE: ret i1 false
 
-; BE: @test13
+; BE-LABEL: @test13(
 ; BE: ret i1 false
 }
 
@@ -187,21 +187,35 @@ entry:
   %tmp = load i64* bitcast ([2 x i8*]* @g6 to i64*)
   ret i64 %tmp
 
-; LE: @test14
+; LE-LABEL: @test14(
 ; LE: ret i64 1
 
-; BE: @test14
+; BE-LABEL: @test14(
 ; BE: ret i64 1
 }
 
+; Check with address space pointers
+@g6_as1 = constant [2 x i8 addrspace(1)*] [i8 addrspace(1)* inttoptr (i16 1 to i8 addrspace(1)*), i8 addrspace(1)* inttoptr (i16 2 to i8 addrspace(1)*)]
+define i16 @test14_as1() nounwind {
+entry:
+  %tmp = load i16* bitcast ([2 x i8 addrspace(1)*]* @g6_as1 to i16*)
+  ret i16 %tmp
+
+; LE: @test14_as1
+; LE: ret i16 1
+
+; BE: @test14_as1
+; BE: ret i16 1
+}
+
 define i64 @test15() nounwind {
 entry:
   %tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
   ret i64 %tmp
 
-; LE: @test15
+; LE-LABEL: @test15(
 ; LE: ret i64 2
 
-; BE: @test15
+; BE-LABEL: @test15(
 ; BE: ret i64 2
 }
diff --git a/test/Transforms/ConstProp/overflow-ops.ll b/test/Transforms/ConstProp/overflow-ops.ll
index 849bf9ef75b2..1ae3e562e7e9 100644
--- a/test/Transforms/ConstProp/overflow-ops.ll
+++ b/test/Transforms/ConstProp/overflow-ops.ll
@@ -18,7 +18,7 @@ entry:
   %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 42, i8 100)
   ret {i8, i1} %t
 
-; CHECK: @uadd_1
+; CHECK-LABEL: @uadd_1(
 ; CHECK: ret { i8, i1 } { i8 -114, i1 false }
 }
 
@@ -27,7 +27,7 @@ entry:
   %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 142, i8 120)
   ret {i8, i1} %t
 
-; CHECK: @uadd_2
+; CHECK-LABEL: @uadd_2(
 ; CHECK: ret { i8, i1 } { i8 6, i1 true }
 }
 
@@ -40,7 +40,7 @@ entry:
   %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 2)
   ret {i8, i1} %t
 
-; CHECK: @usub_1
+; CHECK-LABEL: @usub_1(
 ; CHECK: ret { i8, i1 } { i8 2, i1 false }
 }
 
@@ -49,7 +49,7 @@ entry:
   %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 6)
   ret {i8, i1} %t
 
-; CHECK: @usub_2
+; CHECK-LABEL: @usub_2(
 ; CHECK: ret { i8, i1 } { i8 -2, i1 true }
 }
 
@@ -62,7 +62,7 @@ entry:
   %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 100, i8 3)
   ret {i8, i1} %t
 
-; CHECK: @umul_1
+; CHECK-LABEL: @umul_1(
 ; CHECK: ret { i8, i1 } { i8 44, i1 true }
 }
 
@@ -71,7 +71,7 @@ entry:
   %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 100, i8 2)
   ret {i8, i1} %t
 
-; CHECK: @umul_2
+; CHECK-LABEL: @umul_2(
 ; CHECK: ret { i8, i1 } { i8 -56, i1 false }
 }
 
@@ -84,7 +84,7 @@ entry:
   %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 42, i8 2)
   ret {i8, i1} %t
 
-; CHECK: @sadd_1
+; CHECK-LABEL: @sadd_1(
 ; CHECK: ret { i8, i1 } { i8 44, i1 false }
 }
 
@@ -93,7 +93,7 @@ entry:
   %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 120, i8 10)
   ret {i8, i1} %t
 
-; CHECK: @sadd_2
+; CHECK-LABEL: @sadd_2(
 ; CHECK: ret { i8, i1 } { i8 -126, i1 true }
 }
 
@@ -102,7 +102,7 @@ entry:
   %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 10)
   ret {i8, i1} %t
 
-; CHECK: @sadd_3
+; CHECK-LABEL: @sadd_3(
 ; CHECK: ret { i8, i1 } { i8 -110, i1 false }
 }
 
@@ -111,7 +111,7 @@ entry:
   %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 -10)
   ret {i8, i1} %t
 
-; CHECK: @sadd_4
+; CHECK-LABEL: @sadd_4(
 ; CHECK: ret { i8, i1 } { i8 126, i1 true }
 }
 
@@ -120,7 +120,7 @@ entry:
   %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 2, i8 -10)
   ret {i8, i1} %t
 
-; CHECK: @sadd_5
+; CHECK-LABEL: @sadd_5(
 ; CHECK: ret { i8, i1 } { i8 -8, i1 false }
 }
 
@@ -134,7 +134,7 @@ entry:
   %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 2)
   ret {i8, i1} %t
 
-; CHECK: @ssub_1
+; CHECK-LABEL: @ssub_1(
 ; CHECK: ret { i8, i1 } { i8 2, i1 false }
 }
 
@@ -143,7 +143,7 @@ entry:
   %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 6)
   ret {i8, i1} %t
 
-; CHECK: @ssub_2
+; CHECK-LABEL: @ssub_2(
 ; CHECK: ret { i8, i1 } { i8 -2, i1 false }
 }
 
@@ -152,7 +152,7 @@ entry:
   %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 120)
   ret {i8, i1} %t
 
-; CHECK: @ssub_3
+; CHECK-LABEL: @ssub_3(
 ; CHECK: ret { i8, i1 } { i8 126, i1 true }
 }
 
@@ -161,7 +161,7 @@ entry:
   %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 10)
   ret {i8, i1} %t
 
-; CHECK: @ssub_3b
+; CHECK-LABEL: @ssub_3b(
 ; CHECK: ret { i8, i1 } { i8 -20, i1 false }
 }
 
@@ -170,7 +170,7 @@ entry:
   %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 120, i8 -10)
   ret {i8, i1} %t
 
-; CHECK: @ssub_4
+; CHECK-LABEL: @ssub_4(
 ; CHECK: ret { i8, i1 } { i8 -126, i1 true }
 }
 
@@ -179,7 +179,7 @@ entry:
   %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 20, i8 -10)
   ret {i8, i1} %t
 
-; CHECK: @ssub_4b
+; CHECK-LABEL: @ssub_4b(
 ; CHECK: ret { i8, i1 } { i8 30, i1 false }
 }
 
@@ -188,7 +188,7 @@ entry:
   %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -20, i8 -10)
   ret {i8, i1} %t
 
-; CHECK: @ssub_5
+; CHECK-LABEL: @ssub_5(
 ; CHECK: ret { i8, i1 } { i8 -10, i1 false }
 }
 
@@ -202,6 +202,6 @@ entry:
   %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 -20, i8 -10)
   ret {i8, i1} %t
 
-; CHECK: @smul_1
+; CHECK-LABEL: @smul_1(
 ; CHECK: ret { i8, i1 } { i8 -56, i1 true }
 }
diff --git a/test/Transforms/ConstantMerge/align.ll b/test/Transforms/ConstantMerge/align.ll
new file mode 100644
index 000000000000..c1cbcb3c652d
--- /dev/null
+++ b/test/Transforms/ConstantMerge/align.ll
@@ -0,0 +1,28 @@
+; RUN: opt -constmerge -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+; Test that with a TD we do merge and mark the alignment as 4
+@T1A = internal unnamed_addr constant i32 1
+@T1B = internal unnamed_addr constant i32 1, align 2
+; CHECK: @T1B = internal unnamed_addr constant i32 1, align 4
+
+define void @test1(i32** %P1, i32** %P2) {
+  store i32* @T1A, i32** %P1
+  store i32* @T1B, i32** %P2
+  ret void
+}
+
+
+; Test that even with a TD we set the alignment to the maximum if both constants
+; have explicit alignments.
+@T2A = internal unnamed_addr constant i32 2, align 1
+@T2B = internal unnamed_addr constant i32 2, align 2
+; CHECK: @T2B = internal unnamed_addr constant i32 2, align 2
+
+define void @test2(i32** %P1, i32** %P2) {
+  store i32* @T2A, i32** %P1
+  store i32* @T2B, i32** %P2
+  ret void
+}
diff --git a/test/Transforms/ConstantMerge/lit.local.cfg b/test/Transforms/ConstantMerge/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/ConstantMerge/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
index fef5b8579eb5..0754f868d249 100644
--- a/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
+++ b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S < %s -correlated-propagation | FileCheck %s
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 define i16 @test(i32 %a, i1 %b) {
 entry:
   %c = icmp eq i32 %a, 0
@@ -22,4 +22,4 @@ merge:
   %h = select i1 %f, i16 1, i16 0 
 ; CHECK: ret i16 %h
   ret i16 %h
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 39c437ccfae9..9a2264793c50 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -correlated-propagation -S | FileCheck %s
 ; PR2581
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define i32 @test1(i1 %C) nounwind  {
         br i1 %C, label %exit, label %body
 
@@ -18,7 +18,7 @@ exit:           ; preds = %0
 
 ; PR4420
 declare i1 @ext()
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i1 @test2() {
 entry:
         %cond = tail call i1 @ext()             ; <i1> [#uses=2]
@@ -42,7 +42,7 @@ bb3:            ; preds = %bb1
 
 ; PR4855
 @gv = internal constant i8 7
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 define i8 @test3(i8* %a) nounwind {
 entry:
         %cond = icmp eq i8* %a, @gv
@@ -58,7 +58,7 @@ bb2:            ; preds = %entry
 }
 
 ; PR1757
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 define i32 @test4(i32) {
 EntryBlock:
 ; CHECK: icmp sgt i32 %0, 2  
@@ -83,7 +83,7 @@ LessThanOrEqualToTwo:
 
 declare i32* @f(i32*)
 define void @test5(i32* %x, i32* %y) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 entry:
   %pre = icmp eq i32* %x, null
   br i1 %pre, label %return, label %loop
@@ -102,7 +102,7 @@ return:
 }
 
 define i32 @switch1(i32 %s) {
-; CHECK: @switch1
+; CHECK-LABEL: @switch1(
 entry:
   %cmp = icmp slt i32 %s, 0
   br i1 %cmp, label %negative, label %out
@@ -134,7 +134,7 @@ next:
 }
 
 define i32 @switch2(i32 %s) {
-; CHECK: @switch2
+; CHECK-LABEL: @switch2(
 entry:
   %cmp = icmp sgt i32 %s, 0
   br i1 %cmp, label %positive, label %out
@@ -157,7 +157,7 @@ next:
 }
 
 define i32 @switch3(i32 %s) {
-; CHECK: @switch3
+; CHECK-LABEL: @switch3(
 entry:
   %cmp = icmp sgt i32 %s, 0
   br i1 %cmp, label %positive, label %out
@@ -180,7 +180,7 @@ next:
 }
 
 define void @switch4(i32 %s) {
-; CHECK: @switch4
+; CHECK-LABEL: @switch4(
 entry:
   %cmp = icmp eq i32 %s, 0
   br i1 %cmp, label %zero, label %out
diff --git a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
index 6750546ba187..e40c63919078 100644
--- a/test/Transforms/CorrelatedValuePropagation/range.ll
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -17,7 +17,7 @@ else:
 end:
   ret i32 2
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: then:
 ; CHECK-NEXT: br i1 false, label %end, label %else
 }
@@ -37,12 +37,12 @@ else:
 end:
   ret i32 2
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: then:
 ; CHECK-NEXT: br i1 false, label %end, label %else
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 define i32 @test3(i32 %c) nounwind {
   %cmp = icmp slt i32 %c, 2
   br i1 %cmp, label %if.then, label %if.end
@@ -71,7 +71,7 @@ if.end8:
   ret i32 4
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 define i32 @test4(i32 %c) nounwind {
   switch i32 %c, label %sw.default [
     i32 1, label %sw.bb
@@ -99,7 +99,7 @@ return:
   ret i32 %retval.0
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 define i1 @test5(i32 %c) nounwind {
   %cmp = icmp slt i32 %c, 5
   br i1 %cmp, label %if.then, label %if.end
@@ -121,7 +121,7 @@ if.end8:
   ret i1 %cmp2
 }
 
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 define i1 @test6(i32 %c) nounwind {
   %cmp = icmp ule i32 %c, 7
   br i1 %cmp, label %if.then, label %if.end
@@ -143,7 +143,7 @@ sw.bb:
   ret i1 %cmp2
 }
 
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 define i1 @test7(i32 %c) nounwind {
 entry:
  switch i32 %c, label %sw.default [
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index f5d2588dd059..26982db8322d 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -8,7 +8,7 @@ entry:
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
   call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
   call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
-; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !13
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !{{[0-9]+}}
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
 }
@@ -43,31 +43,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: attributes #2 = { noinline nounwind ssp }
 ; CHECK: attributes [[NUW]] = { nounwind }
 
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
 !0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", metadata !2, i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !28} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, metadata !28, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !9}
-!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524303, metadata !28, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524326, metadata !28, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!8 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 524545, metadata !1, metadata !"len", metadata !2, i32 9, metadata !9} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 524545, metadata !1, metadata !"hash", metadata !2, i32 10, metadata !9} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 524545, metadata !1, metadata !"flags", metadata !2, i32 11, metadata !9} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 13, i32 0, metadata !14, null}
-!14 = metadata !{i32 524299, metadata !1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 524299, metadata !28, metadata !1, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 524545, metadata !16, metadata !"name", metadata !2, i32 17, metadata !6} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 524334, i32 0, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", metadata !2, i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !18 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !19, metadata !9}
-!19 = metadata !{i32 524324, metadata !2, metadata !"unsigned char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!19 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
 !20 = metadata !{i32 524545, metadata !16, metadata !"len", metadata !2, i32 18, metadata !9} ; [ DW_TAG_arg_variable ]
 !21 = metadata !{i32 524545, metadata !16, metadata !"hash", metadata !2, i32 19, metadata !9} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 524545, metadata !16, metadata !"extra", metadata !2, i32 20, metadata !19} ; [ DW_TAG_arg_variable ]
 !23 = metadata !{i32 524545, metadata !16, metadata !"flags", metadata !2, i32 21, metadata !9} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 23, i32 0, metadata !25, null}
-!25 = metadata !{i32 524299, metadata !16, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 524299, metadata !28, metadata !16, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 24, i32 0, metadata !25, null}
 !27 = metadata !{i32 26, i32 0, metadata !25, null}
+!28 = metadata !{metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/"}
+!29 = metadata !{i32 0}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll b/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll
new file mode 100644
index 000000000000..23216031b58a
--- /dev/null
+++ b/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll
@@ -0,0 +1,25 @@
+; RUN: opt %s -deadargelim -S | FileCheck %s
+
+
+@block_addr = global i8* blockaddress(@varargs_func, %l1)
+; CHECK: @block_addr = global i8* blockaddress(@varargs_func, %l1)
+
+
+; This function is referenced by a "blockaddress" constant but it is
+; not address-taken, so the pass should be able to remove its unused
+; varargs.
+
+define internal i32 @varargs_func(i8* %addr, ...) {
+  indirectbr i8* %addr, [ label %l1, label %l2 ]
+l1:
+  ret i32 1
+l2:
+  ret i32 2
+}
+; CHECK: define internal i32 @varargs_func(i8* %addr) {
+
+define i32 @caller(i8* %addr) {
+  %r = call i32 (i8*, ...)* @varargs_func(i8* %addr)
+  ret i32 %r
+}
+; CHECK: %r = call i32 @varargs_func(i8* %addr)
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index d53c19c8ef58..7bdcbf5c0623 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -35,29 +35,31 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !8, metadata !9}
-!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
+!5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
 
-!9 = metadata !{i32 786478, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+!9 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
 
 ; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
 
 !10 = metadata !{i32 8, i32 14, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !5, i32 8, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!11 = metadata !{i32 786443, metadata !20, metadata !5, i32 8, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
 !12 = metadata !{i32 8, i32 27, metadata !11, null}
 !13 = metadata !{i32 8, i32 42, metadata !11, null}
 !14 = metadata !{i32 4, i32 28, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !9, i32 4, i32 26, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!15 = metadata !{i32 786443, metadata !20, metadata !9, i32 4, i32 26, i32 2} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
 !16 = metadata !{i32 4, i32 33, metadata !15, null}
 !17 = metadata !{i32 5, i32 25, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !8, i32 5, i32 23, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!18 = metadata !{i32 786443, metadata !20, metadata !8, i32 5, i32 23, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
 !19 = metadata !{i32 5, i32 30, metadata !18, null}
 !20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index cca58721e532..acbcf75ee495 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -7,7 +7,7 @@ define void @test(i32) {
 define void @foo() {
   call void @test(i32 0)
   ret void
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: i32 undef
 }
 
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index e41110c96ef4..82e01f225843 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -13,7 +13,7 @@ define internal zeroext i32 @test1(i32 %DEADARG1) nounwind {
 }
 
 ; This checks if the struct doesn't get non-packed
-; CHECK: define internal <{ i32, i32 }> @test2
+; CHECK-LABEL: define internal <{ i32, i32 }> @test2(
 define internal <{ i32, i32 }> @test2(i32 %DEADARG1) {
         ret <{ i32, i32 }> <{ i32 1, i32 2 }>
 }
diff --git a/test/Transforms/DeadArgElim/linkage.ll b/test/Transforms/DeadArgElim/linkage.ll
new file mode 100644
index 000000000000..f47548489eef
--- /dev/null
+++ b/test/Transforms/DeadArgElim/linkage.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+; rdar://11546243
+%struct.A = type { i8 }
+
+define available_externally void @_Z17externallyDefinedP1A(%struct.A* %a) {
+entry:
+  call void @_Z3foov()
+  ret void
+}
+
+declare void @_Z3foov()
+
+define void @_Z4testP1A(%struct.A* %a) {
+; CHECK: @_Z4testP1A
+; CHECK: @_Z17externallyDefinedP1A(%struct.A* %a)
+
+entry:
+  call void @_Z17externallyDefinedP1A(%struct.A* %a)
+  ret void
+}
diff --git a/test/Transforms/DeadArgElim/lit.local.cfg b/test/Transforms/DeadArgElim/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/DeadArgElim/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadArgElim/returned.ll b/test/Transforms/DeadArgElim/returned.ll
new file mode 100644
index 000000000000..cbee026f9a37
--- /dev/null
+++ b/test/Transforms/DeadArgElim/returned.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+%Ty = type { i32, i32 }
+
+; sanity check that the argument and return value are both dead
+; CHECK-LABEL: define internal void @test1()
+
+define internal %Ty* @test1(%Ty* %this) {
+  ret %Ty* %this
+}
+
+; do not keep alive the return value of a function with a dead 'returned' argument
+; CHECK-LABEL: define internal void @test2()
+
+define internal %Ty* @test2(%Ty* returned %this) {
+  ret %Ty* %this
+}
+
+; dummy to keep 'this' alive
+@dummy = global %Ty* null 
+
+; sanity check that return value is dead
+; CHECK-LABEL: define internal void @test3(%Ty* %this)
+
+define internal %Ty* @test3(%Ty* %this) {
+  store volatile %Ty* %this, %Ty** @dummy
+  ret %Ty* %this
+}
+
+; keep alive return value of a function if the 'returned' argument is live
+; CHECK-LABEL: define internal %Ty* @test4(%Ty* returned %this)
+
+define internal %Ty* @test4(%Ty* returned %this) {
+  store volatile %Ty* %this, %Ty** @dummy
+  ret %Ty* %this
+}
+
+; don't do this if 'returned' is on the call site...
+; CHECK-LABEL: define internal void @test5(%Ty* %this)
+
+define internal %Ty* @test5(%Ty* %this) {
+  store volatile %Ty* %this, %Ty** @dummy
+  ret %Ty* %this
+}
+
+define %Ty* @caller(%Ty* %this) {
+  %1 = call %Ty* @test1(%Ty* %this)
+  %2 = call %Ty* @test2(%Ty* %this)
+  %3 = call %Ty* @test3(%Ty* %this)
+  %4 = call %Ty* @test4(%Ty* %this)
+; ...instead, drop 'returned' form the call site
+; CHECK: call void @test5(%Ty* %this)
+  %5 = call %Ty* @test5(%Ty* returned %this)
+  ret %Ty* %this
+}
diff --git a/test/Transforms/DeadArgElim/variadic_safety.ll b/test/Transforms/DeadArgElim/variadic_safety.ll
new file mode 100644
index 000000000000..15f57bcfdcb8
--- /dev/null
+++ b/test/Transforms/DeadArgElim/variadic_safety.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+declare void @llvm.va_start(i8*)
+
+define internal i32 @va_func(i32 %a, i32 %b, ...) {
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+
+  ret i32 %b
+}
+
+; Function derived from AArch64 ABI, where 8 integer arguments go in
+; registers but the 9th goes on the stack. We really don't want to put
+; just 7 args in registers and then start on the stack since any
+; va_arg implementation already present in va_func won't be expecting
+; it.
+define i32 @call_va(i32 %in) {
+  %stacked = alloca i32
+  store i32 42, i32* %stacked
+  %res = call i32(i32, i32, ...)* @va_func(i32 %in, i32 %in, [6 x i32] undef, i32* byval %stacked)
+  ret i32 %res
+; CHECK: call i32 (i32, i32, ...)* @va_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
+}
+
+define internal i32 @va_deadret_func(i32 %a, i32 %b, ...) {
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+
+  ret i32 %a
+}
+
+define void @call_deadret(i32 %in) {
+  %stacked = alloca i32
+  store i32 42, i32* %stacked
+  call i32 (i32, i32, ...)* @va_deadret_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
+  ret void
+; CHECK: call void (i32, i32, ...)* @va_deadret_func(i32 undef, i32 undef, [6 x i32] undef, i32* byval %stacked)
+}
diff --git a/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll b/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
index d114e513ed2b..95253f6570b5 100644
--- a/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
+++ b/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin"
 
 %"class.std::auto_ptr" = type { i32* }
 
-; CHECK: @_Z3foov
+; CHECK-LABEL: @_Z3foov(
 define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) uwtable ssp {
 _ZNSt8auto_ptrIiED1Ev.exit:
   %temp.lvalue = alloca %"class.std::auto_ptr", align 8
diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
index ed53eb524c20..968d608c67ed 100644
--- a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @glob2 = global %struct.vec2plusi zeroinitializer, align 16
 
 define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write24to28
+; CHECK-LABEL: @write24to28(
 entry:
   %arrayidx0 = getelementptr inbounds i32* %p, i64 1
   %p3 = bitcast i32* %arrayidx0 to i8*
@@ -20,7 +20,7 @@ entry:
 }
 
 define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write28to32
+; CHECK-LABEL: @write28to32(
 entry:
   %p3 = bitcast i32* %p to i8*
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
@@ -31,7 +31,7 @@ entry:
 }
 
 define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
-; CHECK: @dontwrite28to32memset
+; CHECK-LABEL: @dontwrite28to32memset(
 entry:
   %p3 = bitcast i32* %p to i8*
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
@@ -42,7 +42,7 @@ entry:
 }
 
 define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write32to36
+; CHECK-LABEL: @write32to36(
 entry:
   %0 = bitcast %struct.vec2plusi* %p to i8*
 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
@@ -53,7 +53,7 @@ entry:
 }
 
 define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
-; CHECK: @write16to32
+; CHECK-LABEL: @write16to32(
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
@@ -64,7 +64,7 @@ entry:
 }
 
 define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
-; CHECK: @dontwrite28to32memcpy
+; CHECK-LABEL: @dontwrite28to32memcpy(
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
 ; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
@@ -80,7 +80,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 %struct.trapframe = type { i64, i64, i64 }
 
 ; bugzilla 11455 - make sure negative GEP's don't break this optimisation
-; CHECK: @cpu_lwp_fork
+; CHECK-LABEL: @cpu_lwp_fork(
 define void @cpu_lwp_fork(%struct.trapframe* %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
 entry:
   %0 = inttoptr i64 %pcb_rsp0 to %struct.trapframe*
diff --git a/test/Transforms/DeadStoreElimination/PartialStore.ll b/test/Transforms/DeadStoreElimination/PartialStore.ll
index 7ac1e0844ed4..4799ef3383bc 100644
--- a/test/Transforms/DeadStoreElimination/PartialStore.ll
+++ b/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -8,13 +8,13 @@ define void @test1(i32 *%V) {
         store i8 0, i8* %V2
         store i32 1234567, i32* %V
         ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: store i32 1234567
 }
 
 ; Note that we could do better by merging the two stores into one.
 define void @test2(i32* %P) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
   store i32 0, i32* %P
 ; CHECK: store i32
   %Q = bitcast i32* %P to i16*
@@ -25,7 +25,7 @@ define void @test2(i32* %P) {
 
 
 define i32 @test3(double %__x) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: store double
   %__u = alloca { [3 x i32] }
   %tmp.1 = bitcast { [3 x i32] }* %__u to double*
@@ -39,7 +39,7 @@ define i32 @test3(double %__x) {
 
 ; PR6043
 define void @test4(i8* %P) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: store double
 
@@ -64,7 +64,7 @@ define void @test5(i32 %i) nounwind ssp {
   
   call void @test5a(i32* %A)
   ret void
-; CHECK: @test5(
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: alloca
 ; CHECK-NEXT: store i32 20
 ; CHECK-NEXT: call void @test5a
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index 15976f9f10d4..c90d824b34c1 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -11,7 +11,7 @@ define void @test1(%t* noalias %pp) {
   %x = load i32* inttoptr (i32 12345 to i32*)
   store i32 %x, i32* %p
   ret void
-; CHECK: define void @test1
+; CHECK-LABEL: define void @test1(
 ; CHECK: store
 ; CHECK-NOT: store
 ; CHECK: ret void
@@ -21,7 +21,7 @@ define void @test3() {
   store i32 1, i32* @g; <-- This is dead.
   store i32 42, i32* @g
   ret void
-; CHECK: define void @test3
+; CHECK-LABEL: define void @test3(
 ; CHECK: store
 ; CHECK-NOT: store
 ; CHECK: ret void
@@ -32,7 +32,7 @@ define void @test4(i32* %p) {
   %x = load i32* @g; <-- %p and @g could alias
   store i32 %x, i32* %p
   ret void
-; CHECK: define void @test4
+; CHECK-LABEL: define void @test4(
 ; CHECK: store
 ; CHECK: store
 ; CHECK: ret void
diff --git a/test/Transforms/DeadStoreElimination/free.ll b/test/Transforms/DeadStoreElimination/free.ll
index a5fbdc76387e..1d273d67a501 100644
--- a/test/Transforms/DeadStoreElimination/free.ll
+++ b/test/Transforms/DeadStoreElimination/free.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64"
 declare void @free(i8* nocapture)
 declare noalias i8* @malloc(i64)
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: @free
 ; CHECK-NEXT: ret void
@@ -17,7 +17,7 @@ define void @test(i32* %Q, i32* %P) {
         ret void
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: @free
 ; CHECK-NEXT: ret void
@@ -29,7 +29,7 @@ define void @test2({i32, i32}* %P) {
 	ret void
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: store
 ; CHECK: ret void
 define void @test3() {
@@ -42,7 +42,7 @@ define void @test3() {
 }
 
 ; PR11240
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: store
 ; CHECK: ret void
 define void @test4(i1 %x) nounwind {
@@ -59,7 +59,7 @@ skipinit1:
   ret void
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 define void @test5() {
   br label %bb
 
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
new file mode 100644
index 000000000000..9df88014e5c6
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -0,0 +1,261 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+; If there are two stores to the same location, DSE should be able to remove
+; the first store if the two stores are separated by no more than 98
+; instructions. The existence of debug intrinsics between the stores should
+; not affect this instruction limit.
+
+@x = global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @test_within_limit() {
+entry:
+  ; The first store; later there is a second store to the same location,
+  ; so this store should be optimized away by DSE.
+  ; CHECK-NOT: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 98 dummy instructions between the two stores
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+
+  ; Insert a meaningless dbg.value intrinsic; it should have no
+  ; effect on the working of DSE in any way.
+  call void @llvm.dbg.value(metadata !12, i64 0, metadata !10)
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+define i32 @test_outside_limit() {
+entry:
+  ; The first store; later there is a second store to the same location
+  ; CHECK: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 99 dummy instructions between the two stores; this is
+  ; one too many instruction for the DSE to take place.
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+  %98 = bitcast i32 0 to i32
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/home/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_within_limit", metadata !"test_within_limit", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @test_within_limit, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [test]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/tmp/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
+!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!12 = metadata !{i32* undef}
+
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/DeadStoreElimination/libcalls.ll b/test/Transforms/DeadStoreElimination/libcalls.ll
index 4639c0bc9628..65396945bf65 100644
--- a/test/Transforms/DeadStoreElimination/libcalls.ll
+++ b/test/Transforms/DeadStoreElimination/libcalls.ll
@@ -2,7 +2,7 @@
 
 declare i8* @strcpy(i8* %dest, i8* %src) nounwind
 define void @test1(i8* %src) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   %B = alloca [16 x i8]
   %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
 ; CHECK-NOT: @strcpy
@@ -13,7 +13,7 @@ define void @test1(i8* %src) {
 
 declare i8* @strncpy(i8* %dest, i8* %src, i32 %n) nounwind
 define void @test2(i8* %src) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
   %B = alloca [16 x i8]
   %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
 ; CHECK-NOT: @strncpy
@@ -24,7 +24,7 @@ define void @test2(i8* %src) {
 
 declare i8* @strcat(i8* %dest, i8* %src) nounwind
 define void @test3(i8* %src) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
   %B = alloca [16 x i8]
   %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
 ; CHECK-NOT: @strcat
@@ -35,7 +35,7 @@ define void @test3(i8* %src) {
 
 declare i8* @strncat(i8* %dest, i8* %src, i32 %n) nounwind
 define void @test4(i8* %src) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
   %B = alloca [16 x i8]
   %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
 ; CHECK-NOT: @strncat
@@ -45,7 +45,7 @@ define void @test4(i8* %src) {
 }
 
 define void @test5(i8* nocapture %src) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
   %dest = alloca [100 x i8], align 16
   %arraydecay = getelementptr inbounds [100 x i8]* %dest, i64 0, i64 0
   %call = call i8* @strcpy(i8* %arraydecay, i8* %src)
@@ -57,7 +57,7 @@ define void @test5(i8* nocapture %src) {
 
 declare void @user(i8* %p)
 define void @test6(i8* %src) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
   %B = alloca [16 x i8]
   %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
 ; CHECK: @strcpy
diff --git a/test/Transforms/DeadStoreElimination/lifetime.ll b/test/Transforms/DeadStoreElimination/lifetime.ll
index 678565315e37..7fe7fbfa7478 100644
--- a/test/Transforms/DeadStoreElimination/lifetime.ll
+++ b/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -7,7 +7,7 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   %A = alloca i8
 
   store i8 0, i8* %A  ;; Written to by memset
diff --git a/test/Transforms/DeadStoreElimination/lit.local.cfg b/test/Transforms/DeadStoreElimination/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/DeadStoreElimination/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/memintrinsics.ll b/test/Transforms/DeadStoreElimination/memintrinsics.ll
index d5c5365771d5..5bbb8e099c41 100644
--- a/test/Transforms/DeadStoreElimination/memintrinsics.ll
+++ b/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -5,7 +5,7 @@ declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i
 declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   %A = alloca i8
   %B = alloca i8
 
@@ -19,7 +19,7 @@ define void @test1() {
 }
 
 define void @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
   %A = alloca i8
   %B = alloca i8
 
@@ -33,7 +33,7 @@ define void @test2() {
 }
 
 define void @test3() {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
   %A = alloca i8
   %B = alloca i8
 
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index 4022d76dcb52..c0c7c58d4ead 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -7,7 +7,7 @@ define void @test1(i32* noalias %p) {
        call void @test1f()
        store i32 2, i32 *%p
        ret void
-; CHECK: define void @test1
+; CHECK-LABEL: define void @test1(
 ; CHECK-NOT: store
 ; CHECK-NEXT: call void
 ; CHECK-NEXT: store i32 2
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index e0eb90af9437..ec98466d33b9 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -10,7 +10,7 @@ define void @test1(i32* %Q, i32* %P) {
         store i32 %DEAD, i32* %P
         store i32 0, i32* %P
         ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: store i32 0, i32* %P
 ; CHECK-NEXT: ret void
 }
@@ -21,7 +21,7 @@ define void @test2(i32 *%p, i32 *%q) {
   store i32 20, i32* %q, align 4
   store i32 30, i32* %p, align 4
   ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: store i32 20
 }
 
@@ -30,7 +30,7 @@ define void @test2(i32 *%p, i32 *%q) {
 @g = global i32 1
 
 define i32 @test3(i32* %g_addr) nounwind {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: load i32* %g_addr
   %g_value = load i32* %g_addr, align 4
   store i32 -1, i32* @g, align 4
@@ -44,7 +44,7 @@ define void @test4(i32* %Q) {
         %a = load i32* %Q
         store volatile i32 %a, i32* %Q
         ret void
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: load i32
 ; CHECK-NEXT: store volatile
 ; CHECK-NEXT: ret void
@@ -54,7 +54,7 @@ define void @test5(i32* %Q) {
         %a = load volatile i32* %Q
         store i32 %a, i32* %Q
         ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: load volatile
 ; CHECK-NEXT: ret void
 }
@@ -66,7 +66,7 @@ define void @test6(i32 *%p, i8 *%q) {
   call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: call void @llvm.memset
 }
 
@@ -77,7 +77,7 @@ define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: call void @llvm.memcpy
 }
 
@@ -90,7 +90,7 @@ define i32 @test8() {
         %X = load i32* %V
         ret i32 %X
         
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: store i32 1234567
 }
 
@@ -101,7 +101,7 @@ define void @test9(%struct.x* byval  %a) nounwind  {
 	%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0
 	store i32 1, i32* %tmp2, align 4
 	ret void
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: ret void
 }
 
@@ -111,7 +111,7 @@ define double @test10(i8* %X) {
         store i8* %X, i8** %X_addr
         %tmp.0 = va_arg i8** %X_addr, double
         ret double %tmp.0
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: store
 }
 
@@ -119,7 +119,7 @@ define double @test10(i8* %X) {
 ; DSE should delete the dead trampoline.
 declare void @test11f()
 define void @test11() {
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 	%storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
 ; CHECK-NOT: alloca
 	%cast = getelementptr [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
@@ -140,7 +140,7 @@ define void @test12({ i32, i32 }* %x) nounwind  {
 	store i32 %tmp5, i32* %tmp4, align 4
 	store i32 %tmp17, i32* %tmp7, align 4
 	ret void
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NOT: tmp5
 ; CHECK: ret void
 }
@@ -173,7 +173,7 @@ define void @test14(i32* %Q) {
         store i32 %DEAD, i32* %P
         ret void
 
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NEXT: ret void
 }
 
@@ -185,7 +185,7 @@ define void @test15(i8* %P, i8* %Q) nounwind ssp {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret
 }
@@ -195,7 +195,7 @@ define void @test16(i8* %P, i8* %Q) nounwind ssp {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret
 }
@@ -205,7 +205,7 @@ define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
   tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret
 }
@@ -215,7 +215,7 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp {
   tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 true)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
-; CHECK: @test17v
+; CHECK-LABEL: @test17v(
 ; CHECK-NEXT: call void @llvm.memset
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret
@@ -229,7 +229,7 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
   ret void
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret
@@ -246,7 +246,7 @@ bb:
   call void @test19f({i32}* byval align 4 %arg5)
   ret void
 
-; CHECK: @test19(
+; CHECK-LABEL: @test19(
 ; CHECK: store i32 912
 ; CHECK: call void @test19f
 }
@@ -256,10 +256,10 @@ define void @test20() {
   store i8 0, i8* %m
   ret void
 }
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK-NEXT: ret void
 
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 define void @test21() {
   %m = call i8* @calloc(i32 9, i32 7)
   store i8 0, i8* %m
@@ -267,7 +267,7 @@ define void @test21() {
   ret void
 }
 
-; CHECK: @test22(
+; CHECK-LABEL: @test22(
 define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
   %k.addr = alloca i32
   %m.addr = alloca i32
@@ -278,7 +278,7 @@ define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
 }
 
 ; PR13547
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK: store i8 97
 ; CHECK: store i8 0
 declare noalias i8* @strdup(i8* nocapture) nounwind
@@ -293,7 +293,7 @@ define noalias i8* @test23() nounwind uwtable ssp {
 }
 
 ; Make sure same sized store to later element is deleted
-; CHECK: @test24
+; CHECK-LABEL: @test24(
 ; CHECK-NOT: store i32 0
 ; CHECK-NOT: store i32 0
 ; CHECK: store i32 %b
@@ -312,7 +312,7 @@ define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
 }
 
 ; Check another case like PR13547 where strdup is not like malloc.
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 ; CHECK: load i8
 ; CHECK: store i8 0
 ; CHECK: store i8 %tmp
diff --git a/test/Transforms/DebugIR/crash.ll b/test/Transforms/DebugIR/crash.ll
new file mode 100644
index 000000000000..f4a88d7234cb
--- /dev/null
+++ b/test/Transforms/DebugIR/crash.ll
@@ -0,0 +1,42 @@
+; ModuleID = 'crash.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+@.str = private unnamed_addr constant [18 x i8] c"Hello, segfault!\0A\00", align 1
+@.str1 = private unnamed_addr constant [14 x i8] c"Now crash %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 {
+  %1 = alloca i32, align 4                                        ;CHECK: !dbg
+  %2 = alloca i32, align 4                                        ;CHECK-NEXT: !dbg
+  %3 = alloca i8**, align 8                                       ;CHECK-NEXT: !dbg
+  %null_ptr = alloca i32*, align 8                                ;CHECK-NEXT: !dbg
+  store i32 0, i32* %1                                            ;CHECK-NEXT: !dbg
+  store i32 %argc, i32* %2, align 4                               ;CHECK-NEXT: !dbg
+  store i8** %argv, i8*** %3, align 8                             ;CHECK-NEXT: !dbg
+  store i32* null, i32** %null_ptr, align 8                       ;CHECK-NEXT: !dbg
+  %4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str, i32 0, i32 0)) ;CHECK-NEXT: !dbg
+  %5 = load i32** %null_ptr, align 8                              ;CHECK-NEXT: !dbg
+  %6 = load i32* %5, align 4                                      ;CHECK-NEXT: !dbg
+  %7 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i32 0, i32 0), i32 %6) ;CHECK-NEXT: !dbg
+  %8 = load i32* %2, align 4                                      ;CHECK-NEXT: !dbg
+  ret i32 %8                                                      ;CHECK-NEXT: !dbg
+}
+
+declare i32 @printf(i8*, ...) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; CHECK: = metadata !{i32 14,
+; CHECK-NEXT: = metadata !{i32 15,
+; CHECK-NEXT: = metadata !{i32 16,
+; CHECK-NEXT: = metadata !{i32 17,
+; CHECK-NEXT: = metadata !{i32 18,
+; CHECK-NEXT: = metadata !{i32 19,
+; CHECK-NEXT: = metadata !{i32 20,
+; CHECK-NEXT: = metadata !{i32 21,
+; CHECK-NEXT: = metadata !{i32 22,
+; CHECK-NEXT: = metadata !{i32 23,
+
+; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/exception.ll b/test/Transforms/DebugIR/exception.ll
new file mode 100644
index 000000000000..2436d38968c9
--- /dev/null
+++ b/test/Transforms/DebugIR/exception.ll
@@ -0,0 +1,127 @@
+; ModuleID = 'exception.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+@_ZTIi = external constant i8*
+
+; Function Attrs: uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 {
+  %1 = alloca i32, align 4                        ; CHECK: !dbg
+  %2 = alloca i32, align 4                        ; CHECK-NEXT: !dbg
+  %3 = alloca i8**, align 8                       ; CHECK-NEXT: !dbg
+  %4 = alloca i8*                                 ; CHECK-NEXT: !dbg
+  %5 = alloca i32                                 ; CHECK-NEXT: !dbg
+  %e = alloca i32, align 4                        ; CHECK-NEXT: !dbg
+  %6 = alloca i32                                 ; CHECK-NEXT: !dbg
+  store i32 0, i32* %1                            ; CHECK-NEXT: !dbg
+  store i32 %argc, i32* %2, align 4               ; CHECK-NEXT: !dbg
+  store i8** %argv, i8*** %3, align 8             ; CHECK-NEXT: !dbg
+  %7 = call i8* @__cxa_allocate_exception(i64 4) #2 ; CHECK-NEXT: !dbg
+  %8 = bitcast i8* %7 to i32*                     ; CHECK-NEXT: !dbg
+  %9 = load i32* %2, align 4                      ; CHECK-NEXT: !dbg
+  store i32 %9, i32* %8                           ; CHECK-NEXT: !dbg
+  invoke void @__cxa_throw(i8* %7, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3
+          to label %31 unwind label %10           ; CHECK: !dbg
+
+; <label>:10                                      ; preds = %0
+  %11 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)  ; CHECK: !dbg
+  %12 = extractvalue { i8*, i32 } %11, 0          ; CHECK-NEXT: !dbg
+  store i8* %12, i8** %4                          ; CHECK-NEXT: !dbg
+  %13 = extractvalue { i8*, i32 } %11, 1          ; CHECK-NEXT: !dbg
+  store i32 %13, i32* %5                          ; CHECK-NEXT: !dbg
+  br label %14                                    ; CHECK-NEXT: !dbg
+
+; <label>:14                                      ; preds = %10
+  %15 = load i32* %5                              ; CHECK: !dbg
+  %16 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2   ; CHECK-NEXT: !dbg
+  %17 = icmp eq i32 %15, %16                      ; CHECK-NEXT: !dbg
+  br i1 %17, label %18, label %26                 ; CHECK-NEXT: !dbg
+
+; <label>:18                                      ; preds = %14
+  %19 = load i8** %4                              ; CHECK: !dbg
+  %20 = call i8* @__cxa_begin_catch(i8* %19) #2   ; CHECK-NEXT: !dbg
+  %21 = bitcast i8* %20 to i32*                   ; CHECK-NEXT: !dbg
+  %22 = load i32* %21, align 4                    ; CHECK-NEXT: !dbg
+  store i32 %22, i32* %e, align 4                 ; CHECK-NEXT: !dbg
+  %23 = load i32* %e, align 4                     ; CHECK-NEXT: !dbg
+  store i32 %23, i32* %1                          ; CHECK-NEXT: !dbg
+  store i32 1, i32* %6                            ; CHECK-NEXT: !dbg
+  call void @__cxa_end_catch() #2                 ; CHECK-NEXT: !dbg
+  br label %24                                    ; CHECK-NEXT: !dbg
+
+; <label>:24                                      ; preds = %18
+  %25 = load i32* %1                              ; CHECK: !dbg
+  ret i32 %25                                     ; CHECK-NEXT: !dbg
+
+; <label>:26                                      ; preds = %14
+  %27 = load i8** %4                              ; CHECK: !dbg
+  %28 = load i32* %5                              ; CHECK-NEXT: !dbg
+  %29 = insertvalue { i8*, i32 } undef, i8* %27, 0 ; CHECK-NEXT: !dbg
+  %30 = insertvalue { i8*, i32 } %29, i32 %28, 1   ; CHECK-NEXT: !dbg
+  resume { i8*, i32 } %30                         ; CHECK-NEXT: !dbg
+
+; <label>:31                                      ; preds = %0
+  unreachable                                     ; CHECK: !dbg
+}
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+attributes #3 = { noreturn }
+; CHECK: = metadata !{i32 16,
+; CHECK-NEXT: = metadata !{i32 17,
+; CHECK-NEXT: = metadata !{i32 18,
+; CHECK-NEXT: = metadata !{i32 19,
+; CHECK-NEXT: = metadata !{i32 20,
+; CHECK-NEXT: = metadata !{i32 21,
+; CHECK-NEXT: = metadata !{i32 22,
+; CHECK-NEXT: = metadata !{i32 24,
+
+; CHECK-NEXT: = metadata !{i32 28,
+; CHECK-NEXT: = metadata !{i32 29,
+; CHECK-NEXT: = metadata !{i32 30,
+; CHECK-NEXT: = metadata !{i32 31,
+; CHECK-NEXT: = metadata !{i32 32,
+; CHECK-NEXT: = metadata !{i32 33,
+
+; CHECK-NEXT: = metadata !{i32 36,
+; CHECK-NEXT: = metadata !{i32 37,
+; CHECK-NEXT: = metadata !{i32 38,
+; CHECK-NEXT: = metadata !{i32 39,
+
+; CHECK-NEXT: = metadata !{i32 42,
+; CHECK-NEXT: = metadata !{i32 43,
+; CHECK-NEXT: = metadata !{i32 44,
+; CHECK-NEXT: = metadata !{i32 45,
+; CHECK-NEXT: = metadata !{i32 46,
+; CHECK-NEXT: = metadata !{i32 47,
+; CHECK-NEXT: = metadata !{i32 48,
+; CHECK-NEXT: = metadata !{i32 49,
+; CHECK-NEXT: = metadata !{i32 50,
+; CHECK-NEXT: = metadata !{i32 51,
+
+; CHECK-NEXT: = metadata !{i32 54,
+; CHECK-NEXT: = metadata !{i32 55,
+
+; CHECK-NEXT: = metadata !{i32 58,
+; CHECK-NEXT: = metadata !{i32 59,
+; CHECK-NEXT: = metadata !{i32 60,
+; CHECK-NEXT: = metadata !{i32 61,
+; CHECK-NEXT: = metadata !{i32 62,
+; CHECK-NEXT: = metadata !{i32 65,
+
+; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/function.ll b/test/Transforms/DebugIR/function.ll
new file mode 100644
index 000000000000..dba073de37e9
--- /dev/null
+++ b/test/Transforms/DebugIR/function.ll
@@ -0,0 +1,51 @@
+; ModuleID = 'function.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @blah(i32* %i) #0 {
+  %1 = alloca i32*, align 8                   ; CHECK: !dbg
+  store i32* %i, i32** %1, align 8            ; CHECK-NEXT: !dbg
+  %2 = load i32** %1, align 8                 ; CHECK-NEXT: !dbg
+  %3 = load i32* %2, align 4                  ; CHECK-NEXT: !dbg
+  %4 = add nsw i32 %3, 1                      ; CHECK-NEXT: !dbg
+  store i32 %4, i32* %2, align 4              ; CHECK-NEXT: !dbg
+  ret void                                    ; CHECK-NEXT: !dbg
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 {
+  %1 = alloca i32, align 4                    ; CHECK: !dbg
+  %2 = alloca i32, align 4                    ; CHECK-NEXT: !dbg
+  %3 = alloca i8**, align 8                   ; CHECK-NEXT: !dbg
+  %i = alloca i32, align 4                    ; CHECK-NEXT: !dbg
+  store i32 0, i32* %1                        ; CHECK-NEXT: !dbg
+  store i32 %argc, i32* %2, align 4           ; CHECK-NEXT: !dbg
+  store i8** %argv, i8*** %3, align 8         ; CHECK-NEXT: !dbg
+  store i32 7, i32* %i, align 4               ; CHECK-NEXT: !dbg
+  call void @blah(i32* %i)                    ; CHECK-NEXT: !dbg
+  %4 = load i32* %i, align 4                  ; CHECK-NEXT: !dbg
+  ret i32 %4                                  ; CHECK-NEXT: !dbg
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+; CHECK: = metadata !{i32 8,
+; CHECK-NEXT: = metadata !{i32 9,
+; CHECK-NEXT: = metadata !{i32 10,
+; CHECK-NEXT: = metadata !{i32 11,
+; CHECK-NEXT: = metadata !{i32 12,
+; CHECK-NEXT: = metadata !{i32 13,
+
+; CHECK-NEXT: = metadata !{i32 18,
+; CHECK-NEXT: = metadata !{i32 19,
+; CHECK-NEXT: = metadata !{i32 20,
+; CHECK-NEXT: = metadata !{i32 21,
+; CHECK-NEXT: = metadata !{i32 22,
+; CHECK-NEXT: = metadata !{i32 23,
+; CHECK-NEXT: = metadata !{i32 24,
+; CHECK-NEXT: = metadata !{i32 25,
+; CHECK-NEXT: = metadata !{i32 26,
+; CHECK-NEXT: = metadata !{i32 27,
+; CHECK-NEXT: = metadata !{i32 28,
+
+; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/simple-addrspace.ll b/test/Transforms/DebugIR/simple-addrspace.ll
new file mode 100644
index 000000000000..6bea9b269f57
--- /dev/null
+++ b/test/Transforms/DebugIR/simple-addrspace.ll
@@ -0,0 +1,13 @@
+; RUN: opt -debug-ir -S %s -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16"
+
+define void @foo(i32 addrspace(1)*) nounwind {
+  ret void
+}
+
+; Make sure the pointer size is 16
+
+; CHECK: metadata !"i32 addrspace(1)*", i32 0, i64 16, i64 2, i64 0, i32 0
+
+
diff --git a/test/Transforms/DebugIR/simple.ll b/test/Transforms/DebugIR/simple.ll
new file mode 100644
index 000000000000..3b188958261f
--- /dev/null
+++ b/test/Transforms/DebugIR/simple.ll
@@ -0,0 +1,25 @@
+; ModuleID = 'simple.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 {
+  %1 = alloca i32, align 4                  ; CHECK: !dbg
+  %2 = alloca i32, align 4                  ; CHECK-NEXT: !dbg
+  %3 = alloca i8**, align 8                 ; CHECK-NEXT: !dbg
+  store i32 0, i32* %1                      ; CHECK-NEXT: !dbg
+  store i32 %argc, i32* %2, align 4         ; CHECK-NEXT: !dbg
+  store i8** %argv, i8*** %3, align 8       ; CHECK-NEXT: !dbg
+  %4 = load i32* %2, align 4                ; CHECK-NEXT: !dbg
+  ret i32 %4                                ; CHECK-NEXT: !dbg
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; CHECK: = metadata !{i32 10,
+; CHECK-NEXT: = metadata !{i32 11,
+; CHECK-NEXT: = metadata !{i32 12,
+; CHECK-NEXT: = metadata !{i32 13,
+; CHECK-NEXT: = metadata !{i32 14,
+
+; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/struct.ll b/test/Transforms/DebugIR/struct.ll
new file mode 100644
index 000000000000..8db3dbebe90b
--- /dev/null
+++ b/test/Transforms/DebugIR/struct.ll
@@ -0,0 +1,24 @@
+; ModuleID = 'struct.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+%struct.blah = type { i32, float, i8 }
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+  %1 = alloca i32, align 4                                    ; CHECK: !dbg
+  %b = alloca %struct.blah, align 4                           ; CHECK-NEXT: !dbg
+  store i32 0, i32* %1                                        ; CHECK-NEXT: !dbg
+  %2 = getelementptr inbounds %struct.blah* %b, i32 0, i32 0  ; CHECK-NEXT: !dbg
+  %3 = load i32* %2, align 4                                  ; CHECK-NEXT: !dbg
+  ret i32 %3                                                  ; CHECK-NEXT: !dbg
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; CHECK: = metadata !{i32 11,
+; CHECK-NEXT: = metadata !{i32 12,
+; CHECK-NEXT: = metadata !{i32 13,
+; CHECK-NEXT: = metadata !{i32 14,
+
+; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/vector.ll b/test/Transforms/DebugIR/vector.ll
new file mode 100644
index 000000000000..50d99ac2254f
--- /dev/null
+++ b/test/Transforms/DebugIR/vector.ll
@@ -0,0 +1,93 @@
+; ModuleID = 'vector.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define <4 x float> @_Z3fooDv2_fS_(double %a.coerce, double %b.coerce) #0 {
+  %1 = alloca <2 x float>, align 8                    ; CHECK: !dbg
+  %2 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
+  %3 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
+  %4 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
+  %c = alloca <4 x float>, align 16                   ; CHECK-NEXT: !dbg
+  %5 = bitcast <2 x float>* %1 to double*             ; CHECK-NEXT: !dbg
+  store double %a.coerce, double* %5, align 1         ; CHECK-NEXT: !dbg
+  %a = load <2 x float>* %1, align 8                  ; CHECK-NEXT: !dbg
+  store <2 x float> %a, <2 x float>* %2, align 8      ; CHECK-NEXT: !dbg
+  %6 = bitcast <2 x float>* %3 to double*             ; CHECK-NEXT: !dbg
+  store double %b.coerce, double* %6, align 1         ; CHECK-NEXT: !dbg
+  %b = load <2 x float>* %3, align 8                  ; CHECK-NEXT: !dbg
+  store <2 x float> %b, <2 x float>* %4, align 8      ; CHECK-NEXT: !dbg
+  %7 = load <2 x float>* %2, align 8                  ; CHECK-NEXT: !dbg
+  %8 = load <4 x float>* %c, align 16                 ; CHECK-NEXT: !dbg
+  %9 = shufflevector <2 x float> %7, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>   ; CHECK-NEXT: !dbg
+  %10 = shufflevector <4 x float> %8, <4 x float> %9, <4 x i32> <i32 4, i32 1, i32 5, i32 3>             ; CHECK-NEXT: !dbg
+  store <4 x float> %10, <4 x float>* %c, align 16    ; CHECK-NEXT: !dbg
+  %11 = load <2 x float>* %4, align 8                 ; CHECK-NEXT: !dbg
+  %12 = load <4 x float>* %c, align 16                ; CHECK-NEXT: !dbg
+  %13 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ; CHECK-NEXT: !dbg
+  %14 = shufflevector <4 x float> %12, <4 x float> %13, <4 x i32> <i32 0, i32 4, i32 2, i32 5>           ; CHECK-NEXT: !dbg
+  store <4 x float> %14, <4 x float>* %c, align 16    ; CHECK-NEXT: !dbg
+  %15 = load <4 x float>* %c, align 16                ; CHECK-NEXT: !dbg
+  ret <4 x float> %15                                 ; CHECK-NEXT: !dbg
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #1 {
+  %1 = alloca i32, align 4                            ; CHECK: !dbg
+  %a = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
+  %b = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
+  %x = alloca <4 x float>, align 16                   ; CHECK-NEXT: !dbg
+  %2 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
+  %3 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
+  store i32 0, i32* %1                                ; CHECK-NEXT: !dbg
+  store <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float>* %a, align 8                   ; CHECK-NEXT: !dbg
+  store <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float>* %b, align 8                   ; CHECK-NEXT: !dbg
+  %4 = load <2 x float>* %a, align 8                  ; CHECK-NEXT: !dbg
+  %5 = load <2 x float>* %b, align 8                  ; CHECK-NEXT: !dbg
+  store <2 x float> %4, <2 x float>* %2, align 8      ; CHECK-NEXT: !dbg
+  %6 = bitcast <2 x float>* %2 to double*             ; CHECK-NEXT: !dbg
+  %7 = load double* %6, align 1                       ; CHECK-NEXT: !dbg
+  store <2 x float> %5, <2 x float>* %3, align 8      ; CHECK-NEXT: !dbg
+  %8 = bitcast <2 x float>* %3 to double*             ; CHECK-NEXT: !dbg
+  %9 = load double* %8, align 1                       ; CHECK-NEXT: !dbg
+  %10 = call <4 x float> @_Z3fooDv2_fS_(double %7, double %9)                                            ; CHECK-NEXT: !dbg
+  store <4 x float> %10, <4 x float>* %x, align 16    ; CHECK-NEXT: !dbg
+  %11 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
+  %12 = extractelement <4 x float> %11, i32 0         ; CHECK-NEXT: !dbg
+  %13 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
+  %14 = extractelement <4 x float> %13, i32 1         ; CHECK-NEXT: !dbg
+  %15 = fadd float %12, %14                           ; CHECK-NEXT: !dbg
+  %16 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
+  %17 = extractelement <4 x float> %16, i32 2         ; CHECK-NEXT: !dbg
+  %18 = fadd float %15, %17                           ; CHECK-NEXT: !dbg
+  %19 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
+  %20 = extractelement <4 x float> %19, i32 3         ; CHECK-NEXT: !dbg
+  %21 = fadd float %18, %20                           ; CHECK-NEXT: !dbg
+  %22 = fptosi float %21 to i32                       ; CHECK-NEXT: !dbg
+  ret i32 %22                                         ; CHECK-NEXT: !dbg
+}
+
+attributes #0 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; CHECK: = metadata !{i32 13,
+; CHECK-NEXT: = metadata !{i32 14,
+; CHECK-NEXT: = metadata !{i32 15,
+; CHECK-NEXT: = metadata !{i32 16,
+; CHECK-NEXT: = metadata !{i32 17,
+; CHECK-NEXT: = metadata !{i32 18,
+; CHECK-NEXT: = metadata !{i32 19,
+; CHECK-NEXT: = metadata !{i32 20,
+; CHECK-NEXT: = metadata !{i32 21,
+; CHECK-NEXT: = metadata !{i32 22,
+; CHECK-NEXT: = metadata !{i32 23,
+; CHECK-NEXT: = metadata !{i32 24,
+; CHECK-NEXT: = metadata !{i32 25,
+; CHECK-NEXT: = metadata !{i32 26,
+; CHECK-NEXT: = metadata !{i32 27,
+; CHECK-NEXT: = metadata !{i32 28,
+; CHECK-NEXT: = metadata !{i32 29,
+; CHECK-NEXT: = metadata !{i32 30,
+; CHECK-NEXT: = metadata !{i32 31,
+
+; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 32c302c9205b..80704df9852e 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -S -early-cse | FileCheck %s
 
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define void @test1(i8 %V, i32 *%P) {
   %A = bitcast i64 42 to double  ;; dead
   %B = add i32 4, 19             ;; constant folds
@@ -33,7 +33,7 @@ define void @test1(i8 %V, i32 *%P) {
 
 
 ;; Simple load value numbering.
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i32 @test2(i32 *%P) {
   %V1 = load i32* %P
   %V2 = load i32* %P
@@ -43,7 +43,7 @@ define i32 @test2(i32 *%P) {
 }
 
 ;; Cross block load value numbering.
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 define i32 @test3(i32 *%P, i1 %Cond) {
   %V1 = load i32* %P
   br i1 %Cond, label %T, label %F
@@ -59,7 +59,7 @@ F:
 }
 
 ;; Cross block load value numbering stops when stores happen.
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 define i32 @test4(i32 *%P, i1 %Cond) {
   %V1 = load i32* %P
   br i1 %Cond, label %T, label %F
@@ -79,7 +79,7 @@ F:
 declare i32 @func(i32 *%P) readonly
 
 ;; Simple call CSE'ing.
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 define i32 @test5(i32 *%P) {
   %V1 = call i32 @func(i32* %P)
   %V2 = call i32 @func(i32* %P)
@@ -89,7 +89,7 @@ define i32 @test5(i32 *%P) {
 }
 
 ;; Trivial Store->load forwarding
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 define i32 @test6(i32 *%P) {
   store i32 42, i32* %P
   %V1 = load i32* %P
@@ -98,7 +98,7 @@ define i32 @test6(i32 *%P) {
 }
 
 ;; Trivial dead store elimination.
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 define void @test7(i32 *%P) {
   store i32 42, i32* %P
   store i32 45, i32* %P
@@ -108,7 +108,7 @@ define void @test7(i32 *%P) {
 }
 
 ;; Readnone functions aren't invalidated by stores.
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 define i32 @test8(i32 *%P) {
   %V1 = call i32 @func(i32* %P) readnone
   store i32 4, i32* %P
diff --git a/test/Transforms/EarlyCSE/commute.ll b/test/Transforms/EarlyCSE/commute.ll
index 8cf04d1765b9..985fe04ab3cf 100644
--- a/test/Transforms/EarlyCSE/commute.ll
+++ b/test/Transforms/EarlyCSE/commute.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -S -early-cse | FileCheck %s
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define void @test1(float %A, float %B, float* %PA, float* %PB) {
   ; CHECK-NEXT: fadd
   ; CHECK-NEXT: store
@@ -13,7 +13,7 @@ define void @test1(float %A, float %B, float* %PA, float* %PB) {
   ret void
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define void @test2(float %A, float %B, i1* %PA, i1* %PB) {
   ; CHECK-NEXT: fcmp
   ; CHECK-NEXT: store
@@ -26,7 +26,7 @@ define void @test2(float %A, float %B, i1* %PA, i1* %PB) {
   ret void
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 define void @test3(float %A, float %B, i1* %PA, i1* %PB) {
   ; CHECK-NEXT: fcmp
   ; CHECK-NEXT: store
@@ -39,7 +39,7 @@ define void @test3(float %A, float %B, i1* %PA, i1* %PB) {
   ret void
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 define void @test4(i32 %A, i32 %B, i1* %PA, i1* %PB) {
   ; CHECK-NEXT: icmp
   ; CHECK-NEXT: store
@@ -52,7 +52,7 @@ define void @test4(i32 %A, i32 %B, i1* %PA, i1* %PB) {
   ret void
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 define void @test5(i32 %A, i32 %B, i1* %PA, i1* %PB) {
   ; CHECK-NEXT: icmp
   ; CHECK-NEXT: store
diff --git a/test/Transforms/EarlyCSE/instsimplify-dom.ll b/test/Transforms/EarlyCSE/instsimplify-dom.ll
index 36dffec1c63c..ebdd7f9b419f 100644
--- a/test/Transforms/EarlyCSE/instsimplify-dom.ll
+++ b/test/Transforms/EarlyCSE/instsimplify-dom.ll
@@ -16,4 +16,4 @@ xxx:
   br label %lbl_1215
 }
 
-; CHECK: define i32 @fn
+; CHECK-LABEL: define i32 @fn(
diff --git a/test/Transforms/EarlyCSE/lit.local.cfg b/test/Transforms/EarlyCSE/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/EarlyCSE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
index f38c03acca34..0cf1cb7c638d 100644
--- a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
+++ b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
@@ -1,14 +1,23 @@
-; RUN: opt < %s -functionattrs -S | not grep "nocapture *%%q"
-; RUN: opt < %s -functionattrs -S | grep "nocapture *%%p"
+; RUN: opt < %s -functionattrs -S | FileCheck %s
 
+; CHECK: define i32* @a(i32** nocapture readonly %p)
 define i32* @a(i32** %p) {
 	%tmp = load i32** %p
 	ret i32* %tmp
 }
 
+; CHECK: define i32* @b(i32* %q)
 define i32* @b(i32 *%q) {
 	%mem = alloca i32*
 	store i32* %q, i32** %mem
 	%tmp = call i32* @a(i32** %mem)
 	ret i32* %tmp
 }
+
+; CHECK: define i32* @c(i32* readnone %r)
+@g = global i32 0
+define i32* @c(i32 *%r) {
+	%a = icmp eq i32* %r, null
+	store i32 1, i32* @g
+	ret i32* %r
+}
diff --git a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
index d414b73524fd..fa06cc718a93 100644
--- a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
+++ b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
 
-; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) #0
+; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) #0
 declare i8* @fopen(i8*, i8*)
 
 ; CHECK: declare i8 @strlen(i8* nocapture) #1
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll
index ae77380acc4a..9fba7a9f2882 100644
--- a/test/Transforms/FunctionAttrs/annotate-1.ll
+++ b/test/Transforms/FunctionAttrs/annotate-1.ll
@@ -1,7 +1,8 @@
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
 
 declare i8* @fopen(i8*, i8*)
-; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) [[G0:#[0-9]]] 
+; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]] 
 
 declare i8 @strlen(i8*)
 ; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]]
@@ -14,5 +15,9 @@ declare i32* @realloc(i32*, i32)
 declare i32 @strcpy(...)
 ; CHECK: declare i32 @strcpy(...)
 
+declare i32 @gettimeofday(i8*, i8*)
+; CHECK-POSIX: declare i32 @gettimeofday(i8* nocapture, i8* nocapture) [[G0:#[0-9]+]]
+
 ; CHECK: attributes [[G0]] = { nounwind }
 ; CHECK: attributes [[G1]] = { nounwind readonly }
+; CHECK-POSIX: attributes [[G0]] = { nounwind }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index 027ee0fd06a2..d5a8db7d53b9 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -13,7 +13,7 @@ entry:
 
 ; A function with an Acquire load is not readonly.
 define i32 @test2(i32* %x) uwtable ssp {
-; CHECK: define i32 @test2(i32* nocapture %x) #1 {
+; CHECK: define i32 @test2(i32* nocapture readonly %x) #1 {
 entry:
   %r = load atomic i32* %x seq_cst, align 4
   ret i32 %r
diff --git a/test/Transforms/FunctionAttrs/lit.local.cfg b/test/Transforms/FunctionAttrs/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/FunctionAttrs/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index 3027acd35c7d..110bd03dac73 100644
--- a/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -1,12 +1,13 @@
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
 @g = global i32* null		; <i32**> [#uses=1]
 
-; CHECK: define i32* @c1(i32* %q)
+; CHECK: define i32* @c1(i32* readnone %q)
 define i32* @c1(i32* %q) {
 	ret i32* %q
 }
 
 ; CHECK: define void @c2(i32* %q)
+; It would also be acceptable to mark %q as readnone. Update @c3 too.
 define void @c2(i32* %q) {
 	store i32* %q, i32** @g
 	ret void
@@ -45,7 +46,7 @@ define i1 @c5(i32* %q, i32 %bitno) {
 
 declare void @throw_if_bit_set(i8*, i8) readonly
 
-; CHECK: define i1 @c6(i8* %q, i8 %bit)
+; CHECK: define i1 @c6(i8* readonly %q, i8 %bit)
 define i1 @c6(i8* %q, i8 %bit) {
 	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
 		to label %ret0 unwind label %ret1
@@ -67,7 +68,7 @@ define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
 	ret i1* %lookup
 }
 
-; CHECK: define i1 @c7(i32* %q, i32 %bitno)
+; CHECK: define i1 @c7(i32* readnone %q, i32 %bitno)
 define i1 @c7(i32* %q, i32 %bitno) {
 	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
 	%val = load i1* %ptr
@@ -103,7 +104,7 @@ define void @nc3(void ()* %p) {
 }
 
 declare void @external(i8*) readonly nounwind
-; CHECK: define void @nc4(i8* nocapture %p)
+; CHECK: define void @nc4(i8* nocapture readonly %p)
 define void @nc4(i8* %p) {
 	call void @external(i8* %p)
 	ret void
@@ -116,28 +117,29 @@ define void @nc5(void (i8*)* %f, i8* %p) {
 	ret void
 }
 
-; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* %y1_1)
+; CHECK: define void @test1_1(i8* nocapture readnone %x1_1, i8* %y1_1)
+; It would be acceptable to add readnone to %y1_1 and %y1_2.
 define void @test1_1(i8* %x1_1, i8* %y1_1) {
   call i8* @test1_2(i8* %x1_1, i8* %y1_1)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* %y1_2)
+; CHECK: define i8* @test1_2(i8* nocapture readnone %x1_2, i8* %y1_2)
 define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
   call void @test1_1(i8* %x1_2, i8* %y1_2)
   store i32* null, i32** @g
   ret i8* %y1_2
 }
 
-; CHECK: define void @test2(i8* nocapture %x2)
+; CHECK: define void @test2(i8* nocapture readnone %x2)
 define void @test2(i8* %x2) {
   call void @test2(i8* %x2)
   store i32* null, i32** @g
   ret void
 }
 
-; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture %y3, i8* nocapture %z3)
+; CHECK: define void @test3(i8* nocapture readnone %x3, i8* nocapture readnone %y3, i8* nocapture readnone %z3)
 define void @test3(i8* %x3, i8* %y3, i8* %z3) {
   call void @test3(i8* %z3, i8* %y3, i8* %x3)
   store i32* null, i32** @g
@@ -151,7 +153,7 @@ define void @test4_1(i8* %x4_1) {
   ret void
 }
 
-; CHECK: define i8* @test4_2(i8* nocapture %x4_2, i8* %y4_2, i8* nocapture %z4_2)
+; CHECK: define i8* @test4_2(i8* nocapture readnone %x4_2, i8* readnone %y4_2, i8* nocapture readnone %z4_2)
 define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
   call void @test4_1(i8* null)
   store i32* null, i32** @g
diff --git a/test/Transforms/FunctionAttrs/noreturn.ll b/test/Transforms/FunctionAttrs/noreturn.ll
index 470ebcb1d3cd..990bea984dfe 100644
--- a/test/Transforms/FunctionAttrs/noreturn.ll
+++ b/test/Transforms/FunctionAttrs/noreturn.ll
@@ -7,7 +7,7 @@ entry:
 while.body:
   br label %while.body
 }
-;CHECK: @main
+;CHECK-LABEL: @main(
 ;CHECK: endless_loop
 ;CHECK: ret
 define i32 @main() noreturn nounwind ssp uwtable {
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
new file mode 100644
index 000000000000..0842f566d124
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+@x = global i32 0
+
+declare void @test1_1(i8* %x1_1, i8* readonly %y1_1, ...)
+
+; CHECK: define void @test1_2(i8* %x1_2, i8* readonly %y1_2, i8* %z1_2)
+define void @test1_2(i8* %x1_2, i8* %y1_2, i8* %z1_2) {
+  call void (i8*, i8*, ...)* @test1_1(i8* %x1_2, i8* %y1_2, i8* %z1_2)
+  store i32 0, i32* @x
+  ret void
+}
+
+; CHECK: define i8* @test2(i8* readnone %p)
+define i8* @test2(i8* %p) {
+  store i32 0, i32* @x
+  ret i8* %p
+}
+
+; CHECK: define i1 @test3(i8* readnone %p, i8* readnone %q)
+define i1 @test3(i8* %p, i8* %q) {
+  %A = icmp ult i8* %p, %q
+  ret i1 %A
+}
+
+declare void @test4_1(i8* nocapture) readonly
+
+; CHECK: define void @test4_2(i8* nocapture readonly %p)
+define void @test4_2(i8* %p) {
+  call void @test4_1(i8* %p)
+  ret void
+}
+
+; CHECK: define void @test5(i8** nocapture %p, i8* %q)
+; Missed optz'n: we could make %q readnone, but don't break test6!
+define void @test5(i8** %p, i8* %q) {
+  store i8* %q, i8** %p
+  ret void
+}
+
+declare void @test6_1()
+; CHECK: define void @test6_2(i8** nocapture %p, i8* %q)
+; This is not a missed optz'n.
+define void @test6_2(i8** %p, i8* %q) {
+  store i8* %q, i8** %p
+  call void @test6_1()
+  ret void
+}
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 7ce4d861f0e0..ed3a5bd93ada 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -12,16 +12,18 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10}
 !llvm.gcov = !{!9}
 
-!0 = metadata !{i32 786449, i32 4, metadata !1, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
 !2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"}
 !3 = metadata !{i32 0}
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !7 = metadata !{null}
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 
 
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/GCOVProfiling/lit.local.cfg b/test/Transforms/GCOVProfiling/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/GCOVProfiling/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index d6d0f3314c26..2f1bd70f6df9 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -16,14 +16,17 @@ define void @test() {
 
 !llvm.gcov = !{!9}
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
 
-!0 = metadata !{metadata !"./version", metadata !1}
-!1 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
-!2 = metadata !{i32 786473, metadata !"version", metadata !"/usr/local/google/home/nlewycky"} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !11, i32 4, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
+!2 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 0}
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
-!6 = metadata !{i32 786473, metadata !"<stdin>", metadata !"."} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 786478, metadata !10, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!6 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{i32 1, i32 0, metadata !5, null}
 ;; !9 is added through the echo line at the top.
+!10 = metadata !{metadata !"<stdin>", metadata !"."}
+!11 = metadata !{metadata !"version", metadata !"/usr/local/google/home/nlewycky"}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index a1cc0083f1e6..5a15f0e43aaf 100644
--- a/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -2,7 +2,7 @@
 
 @last = external global [65 x i32*]
 
-define i32 @NextRootMove(i32 %wtm) {
+define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) {
 entry:
         %A = alloca i32*
 	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
@@ -15,12 +15,14 @@ entry:
 	br label %cond_true116
 
 cond_true116:
-	br i1 false, label %cond_true128, label %cond_true145
+   %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %cond_true128, label %cond_true145
 
 cond_true128:
 	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
         store i32* %tmp17625, i32** %A
-	br i1 false, label %bb98.backedge, label %return.loopexit
+   %cmp1 = icmp eq i32 %x, %z
+	br i1 %cmp1 , label %bb98.backedge, label %return.loopexit
 
 bb98.backedge:
 	br label %cond_true116
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index 4f07868a1ccb..ce83fa4e4be9 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -3,10 +3,11 @@
 
 @g_3 = external global i8		; <i8*> [#uses=2]
 
-define i8 @func_1() nounwind  {
+define i8 @func_1(i32 %x, i32 %y) nounwind  {
 entry:
   %A = alloca i8
-	br i1 false, label %ifelse, label %ifthen
+    %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %ifelse, label %ifthen
 
 ifthen:		; preds = %entry
 	br label %ifend
@@ -14,9 +15,6 @@ ifthen:		; preds = %entry
 ifelse:		; preds = %entry
 	%tmp3 = load i8* @g_3		; <i8> [#uses=0]
         store i8 %tmp3, i8* %A
-	br label %forcond.thread
-
-forcond.thread:		; preds = %ifelse
 	br label %afterfor
 
 forcond:		; preds = %forinc
diff --git a/test/Transforms/GVN/2010-11-13-Simplify.ll b/test/Transforms/GVN/2010-11-13-Simplify.ll
index 07585a20b6ca..9d0becc4d0ea 100644
--- a/test/Transforms/GVN/2010-11-13-Simplify.ll
+++ b/test/Transforms/GVN/2010-11-13-Simplify.ll
@@ -3,7 +3,7 @@
 declare i32 @foo(i32) readnone
 
 define i1 @bar() {
-; CHECK: @bar
+; CHECK-LABEL: @bar(
   %a = call i32 @foo (i32 0) readnone
   %b = call i32 @foo (i32 0) readnone
   %c = and i32 %a, %b
diff --git a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
index f24e956a2b1f..298f27401aaf 100644
--- a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
+++ b/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.7.0"
 
 define i1 @rb_intern() nounwind ssp {
-; CHECK: @rb_intern
+; CHECK-LABEL: @rb_intern(
 
 bb:
   %tmp = alloca i8*, align 8
@@ -19,10 +19,10 @@ bb1:
   br i1 undef, label %bb3, label %bb15
 
 ; CHECK: bb1:
-; CHECK: %tmp16 = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
+; CHECK: [[TMP:%.*]] = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
 
 ; CHECK: bb1.bb15_crit_edge:
-; CHECK: %tmp17.pre = load i8* %tmp16, align 1
+; CHECK: %tmp17.pre = load i8* [[TMP]], align 1
 
 bb3:
   call void @isalnum()
diff --git a/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll b/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
index 18178e45a22b..ce60ffe449a8 100644
--- a/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
+++ b/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
@@ -11,7 +11,7 @@ entry:
   ret i64 %add1
 }
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: add1
 ; CHECK: ret
 
@@ -23,7 +23,7 @@ entry:
   ret i64 %sub1
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: sub1
 ; CHECK: ret
 
@@ -35,7 +35,7 @@ entry:
   ret i64 %mul1
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: mul1
 ; CHECK: ret
 
@@ -47,7 +47,7 @@ entry:
   ret i64 %add1
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: add1
 ; CHECK: ret
 
@@ -59,7 +59,7 @@ entry:
   ret i64 %sub1
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NOT: sub1
 ; CHECK: ret
 
@@ -71,7 +71,7 @@ entry:
   ret i64 %mul1
 }
 
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NOT: mul1
 ; CHECK: ret
 
diff --git a/test/Transforms/GVN/commute.ll b/test/Transforms/GVN/commute.ll
index cf4fb7f17291..cdd6ecf2edea 100644
--- a/test/Transforms/GVN/commute.ll
+++ b/test/Transforms/GVN/commute.ll
@@ -3,7 +3,7 @@
 declare void @use(i32, i32)
 
 define void @foo(i32 %x, i32 %y) {
-  ; CHECK: @foo
+  ; CHECK-LABEL: @foo(
   %add1 = add i32 %x, %y
   %add2 = add i32 %y, %x
   call void @use(i32 %add1, i32 %add2)
@@ -14,7 +14,7 @@ define void @foo(i32 %x, i32 %y) {
 declare void @vse(i1, i1)
 
 define void @bar(i32 %x, i32 %y) {
-  ; CHECK: @bar
+  ; CHECK-LABEL: @bar(
   %cmp1 = icmp ult i32 %x, %y
   %cmp2 = icmp ugt i32 %y, %x
   call void @vse(i1 %cmp1, i1 %cmp2)
diff --git a/test/Transforms/GVN/cond_br.ll b/test/Transforms/GVN/cond_br.ll
new file mode 100644
index 000000000000..918e7d41f12c
--- /dev/null
+++ b/test/Transforms/GVN/cond_br.ll
@@ -0,0 +1,55 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+@y = external global i32
+@z = external global i32
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo(i32 %x) {
+; CHECK: @foo(i32 %x)
+; CHECK: %.pre = load i32* @y
+; CHECK: call void @bar(i32 %.pre)
+
+  %t = sub i32 %x, %x
+  %.pre = load i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add, %if.then ]
+  tail call void @bar(i32 %1)
+  ret void
+}
+
+define void @foo2(i32 %x) {
+; CHECK: @foo2(i32 %x)
+; CHECK: %.pre = load i32* @y
+; CHECK: tail call void @bar(i32 %.pre)
+entry:
+  %t = sub i32 %x, %x
+  %.pre = load i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 1, i32* @z, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %0 = phi i32 [ %.pre, %if.else ], [ %add, %if.then ]
+  tail call void @bar(i32 %0)
+  ret void
+}
+
+declare void @bar(i32)
diff --git a/test/Transforms/GVN/cond_br2.ll b/test/Transforms/GVN/cond_br2.ll
new file mode 100644
index 000000000000..27e6f75de271
--- /dev/null
+++ b/test/Transforms/GVN/cond_br2.ll
@@ -0,0 +1,140 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" }
+%"union.llvm::SmallVectorBase::U" = type { x86_fp80 }
+
+; Function Attrs: ssp uwtable
+define void @_Z4testv() #0 {
+; CHECK: @_Z4testv()
+; CHECK: invoke.cont:
+; CHECK: br i1 true, label %new.notnull.i11, label %if.end.i14
+; CHECK: Retry.i10:
+
+entry:
+  %sv = alloca %"class.llvm::SmallVector", align 16
+  %0 = bitcast %"class.llvm::SmallVector"* %sv to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %0) #1
+  %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %FirstEl.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 3
+  %1 = bitcast %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i to i8*
+  store i8* %1, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  store i8* %1, i8** %EndX.i.i.i.i.i.i, align 8, !tbaa !4
+  %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %add.ptr.i.i.i.i2.i.i = getelementptr inbounds %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i, i64 2
+  %add.ptr.i.i.i.i.i.i = bitcast %"union.llvm::SmallVectorBase::U"* %add.ptr.i.i.i.i2.i.i to i8*
+  store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  %2 = load i8** %EndX.i, align 8, !tbaa !4
+  %CapacityX.i = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %cmp.i = icmp ult i8* %2, %add.ptr.i.i.i.i.i.i
+  br i1 %cmp.i, label %Retry.i, label %if.end.i
+
+Retry.i:                                          ; preds = %.noexc, %entry
+  %3 = phi i8* [ %2, %entry ], [ %.pre.i, %.noexc ]
+  %new.isnull.i = icmp eq i8* %3, null
+  br i1 %new.isnull.i, label %invoke.cont, label %new.notnull.i
+
+new.notnull.i:                                    ; preds = %Retry.i
+  %4 = bitcast i8* %3 to i32*
+  store i32 1, i32* %4, align 4, !tbaa !5
+  br label %invoke.cont
+
+if.end.i:                                         ; preds = %entry
+  %5 = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %5, i64 0, i64 4)
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %if.end.i
+  %.pre.i = load i8** %EndX.i, align 8, !tbaa !4
+  br label %Retry.i
+
+invoke.cont:                                      ; preds = %new.notnull.i, %Retry.i
+  %add.ptr.i = getelementptr inbounds i8* %3, i64 4
+  store i8* %add.ptr.i, i8** %EndX.i, align 8, !tbaa !4
+  %6 = load i8** %CapacityX.i, align 16, !tbaa !4
+  %cmp.i8 = icmp ult i8* %add.ptr.i, %6
+  br i1 %cmp.i8, label %new.notnull.i11, label %if.end.i14
+
+Retry.i10:                                        ; preds = %if.end.i14
+  %.pre.i13 = load i8** %EndX.i, align 8, !tbaa !4
+  %new.isnull.i9 = icmp eq i8* %.pre.i13, null
+  br i1 %new.isnull.i9, label %invoke.cont2, label %new.notnull.i11
+
+new.notnull.i11:                                  ; preds = %invoke.cont, %Retry.i10
+  %7 = phi i8* [ %.pre.i13, %Retry.i10 ], [ %add.ptr.i, %invoke.cont ]
+  %8 = bitcast i8* %7 to i32*
+  store i32 2, i32* %8, align 4, !tbaa !5
+  br label %invoke.cont2
+
+if.end.i14:                                       ; preds = %invoke.cont
+  %9 = getelementptr inbounds %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %9, i64 0, i64 4)
+          to label %Retry.i10 unwind label %lpad
+
+invoke.cont2:                                     ; preds = %new.notnull.i11, %Retry.i10
+  %10 = phi i8* [ null, %Retry.i10 ], [ %7, %new.notnull.i11 ]
+  %add.ptr.i12 = getelementptr inbounds i8* %10, i64 4
+  store i8* %add.ptr.i12, i8** %EndX.i, align 8, !tbaa !4
+  invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"* %sv)
+          to label %invoke.cont3 unwind label %lpad
+
+invoke.cont3:                                     ; preds = %invoke.cont2
+  %11 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i19 = icmp eq i8* %11, %1
+  br i1 %cmp.i.i.i.i19, label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21, label %if.then.i.i.i20
+
+if.then.i.i.i20:                                  ; preds = %invoke.cont3
+  call void @free(i8* %11) #1
+  br label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21
+
+_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21:          ; preds = %invoke.cont3, %if.then.i.i.i20
+  call void @llvm.lifetime.end(i64 64, i8* %0) #1
+  ret void
+
+lpad:                                             ; preds = %if.end.i14, %if.end.i, %invoke.cont2
+  %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %13 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i = icmp eq i8* %13, %1
+  br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %lpad
+  call void @free(i8* %13) #1
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %if.then.i.i.i, %lpad
+  resume { i8*, i32 } %12
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+declare void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"*, i64, i64) #2
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture) #3
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index 9c28955801f7..708e4b23cb54 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -2,7 +2,7 @@
 
 @a = external global i32		; <i32*> [#uses=7]
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define i32 @test1() nounwind {
 entry:
 	%0 = load i32* @a, align 4
@@ -57,7 +57,7 @@ return:		; preds = %bb8
 declare void @foo(i1)
 declare void @bar(i32)
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 define void @test3(i32 %x, i32 %y) {
   %xz = icmp eq i32 %x, 0
   %yz = icmp eq i32 %y, 0
@@ -79,7 +79,7 @@ nope:
   ret void
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 define void @test4(i1 %b, i32 %x) {
   br i1 %b, label %sw, label %case3
 sw:
@@ -112,7 +112,7 @@ case3:
   ret void
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 define i1 @test5(i32 %x, i32 %y) {
   %cmp = icmp eq i32 %x, %y
   br i1 %cmp, label %same, label %different
@@ -128,7 +128,7 @@ different:
   ret i1 %cmp3
 }
 
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 define i1 @test6(i32 %x, i32 %y) {
   %cmp2 = icmp ne i32 %x, %y
   %cmp = icmp eq i32 %x, %y
@@ -144,7 +144,7 @@ different:
   ret i1 %cmp3
 }
 
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 define i1 @test7(i32 %x, i32 %y) {
   %cmp = icmp sgt i32 %x, %y
   br i1 %cmp, label %same, label %different
@@ -160,7 +160,7 @@ different:
   ret i1 %cmp3
 }
 
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 define i1 @test8(i32 %x, i32 %y) {
   %cmp2 = icmp sle i32 %x, %y
   %cmp = icmp sgt i32 %x, %y
@@ -177,7 +177,7 @@ different:
 }
 
 ; PR1768
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 define i32 @test9(i32 %i, i32 %j) {
   %cmp = icmp eq i32 %i, %j
   br i1 %cmp, label %cond_true, label %ret
@@ -193,7 +193,7 @@ ret:
 }
 
 ; PR1768
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 define i32 @test10(i32 %j, i32 %i) {
   %cmp = icmp eq i32 %i, %j
   br i1 %cmp, label %cond_true, label %ret
@@ -210,7 +210,7 @@ ret:
 
 declare i32 @yogibar()
 
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 define i32 @test11(i32 %x) {
   %v0 = call i32 @yogibar()
   %v1 = call i32 @yogibar()
@@ -233,7 +233,7 @@ next2:
   ret i32 0
 }
 
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 define i32 @test12(i32 %x) {
   %cmp = icmp eq i32 %x, 0
   br i1 %cmp, label %cond_true, label %cond_false
diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll
index 3a102b6c3539..646e10c0cdfb 100644
--- a/test/Transforms/GVN/edge.ll
+++ b/test/Transforms/GVN/edge.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -gvn -S < %s | FileCheck %s
 
 define i32 @f1(i32 %x) {
-  ; CHECK: define i32 @f1(
+  ; CHECK-LABEL: define i32 @f1(
 bb0:
   %cmp = icmp eq i32 %x, 0
   br i1 %cmp, label %bb2, label %bb1
@@ -16,7 +16,7 @@ bb2:
 }
 
 define i32 @f2(i32 %x) {
-  ; CHECK: define i32 @f2(
+  ; CHECK-LABEL: define i32 @f2(
 bb0:
   %cmp = icmp ne i32 %x, 0
   br i1 %cmp, label %bb1, label %bb2
@@ -31,7 +31,7 @@ bb2:
 }
 
 define i32 @f3(i32 %x) {
-  ; CHECK: define i32 @f3(
+  ; CHECK-LABEL: define i32 @f3(
 bb0:
   switch i32 %x, label %bb1 [ i32 0, label %bb2]
 bb1:
@@ -46,7 +46,7 @@ bb2:
 
 declare void @g(i1)
 define void @f4(i8 * %x)  {
-; CHECK: define void @f4(
+; CHECK-LABEL: define void @f4(
 bb0:
   %y = icmp eq i8* null, %x
   br i1 %y, label %bb2, label %bb1
diff --git a/test/Transforms/GVN/lit.local.cfg b/test/Transforms/GVN/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/GVN/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GVN/load-pre-align.ll b/test/Transforms/GVN/load-pre-align.ll
index d8ad59f9df41..4816af2f441b 100644
--- a/test/Transforms/GVN/load-pre-align.ll
+++ b/test/Transforms/GVN/load-pre-align.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 @p = external global i32
 
 define i32 @test(i32 %n) nounwind {
-; CHECK: @test
+; CHECK-LABEL: @test(
 entry:
   br label %for.cond
 
diff --git a/test/Transforms/GVN/local-pre.ll b/test/Transforms/GVN/local-pre.ll
index 1d0dadfbe0fc..2c92699dca91 100644
--- a/test/Transforms/GVN/local-pre.ll
+++ b/test/Transforms/GVN/local-pre.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -gvn -enable-pre -S | grep "b.pre"
 
-define i32 @main(i32 %p) {
+define i32 @main(i32 %p, i32 %q) {
 block1:
-  
-	br i1 true, label %block2, label %block3
+    %cmp = icmp eq i32 %p, %q 
+	br i1 %cmp, label %block2, label %block3
 
 block2:
  %a = add i32 %p, 1
diff --git a/test/Transforms/GVN/malloc-load-removal.ll b/test/Transforms/GVN/malloc-load-removal.ll
index 66b6929d3038..d2d2fd77afec 100644
--- a/test/Transforms/GVN/malloc-load-removal.ll
+++ b/test/Transforms/GVN/malloc-load-removal.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 declare i8* @malloc(i64) nounwind
 
-define noalias i8* @test() nounwind uwtable ssp {
+define noalias i8* @test1() nounwind uwtable ssp {
 entry:
   %call = tail call i8* @malloc(i64 100) nounwind
   %0 = load i8* %call, align 1
@@ -21,11 +21,36 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret i8* %call
 
-; CHECK: @test
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: load
 ; CHECK-NOT: icmp
 
-; CHECK_NO_LIBCALLS: @test
+; CHECK_NO_LIBCALLS-LABEL: @test1(
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: icmp
+}
+
+declare i8* @_Znwm(i64) nounwind
+
+define noalias i8* @test2() nounwind uwtable ssp {
+entry:
+  %call = tail call i8* @_Znwm(i64 100) nounwind
+  %0 = load i8* %call, align 1
+  %tobool = icmp eq i8 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i8 0, i8* %call, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i8* %call
+
+; CHECK-LABEL: @test2(
+; CHECK-NOT: load
+; CHECK-NOT: icmp
+
+; CHECK_NO_LIBCALLS-LABEL: @test2(
 ; CHECK_NO_LIBCALLS: load
 ; CHECK_NO_LIBCALLS: icmp
 }
diff --git a/test/Transforms/GVN/non-local-offset.ll b/test/Transforms/GVN/non-local-offset.ll
index 8eaa99933ab9..0b9edcb8e430 100644
--- a/test/Transforms/GVN/non-local-offset.ll
+++ b/test/Transforms/GVN/non-local-offset.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64"
 ; GVN should ignore the store to p[1] to see that the load from p[0] is
 ; fully redundant.
 
-; CHECK: @yes
+; CHECK-LABEL: @yes(
 ; CHECK: if.then:
 ; CHECK-NEXT: store i32 0, i32* %q
 ; CHECK-NEXT: ret void
@@ -30,7 +30,7 @@ if.else:
 ; fully redundant. However, the second load is larger, so it's not a simple
 ; redundancy.
 
-; CHECK: @watch_out_for_size_change
+; CHECK-LABEL: @watch_out_for_size_change(
 ; CHECK: if.then:
 ; CHECK-NEXT: store i32 0, i32* %q
 ; CHECK-NEXT: ret void
diff --git a/test/Transforms/GVN/phi-translate.ll b/test/Transforms/GVN/phi-translate.ll
index fa91d2919eb2..50d6178c5cc2 100644
--- a/test/Transforms/GVN/phi-translate.ll
+++ b/test/Transforms/GVN/phi-translate.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64"
 
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: entry.end_crit_edge:
 ; CHECK:   %n.pre = load i32* %q.phi.trans.insert
 ; CHECK: then:
diff --git a/test/Transforms/GVN/pr14166.ll b/test/Transforms/GVN/pr14166.ll
index 9f47e464265b..4d6820506879 100644
--- a/test/Transforms/GVN/pr14166.ll
+++ b/test/Transforms/GVN/pr14166.ll
@@ -10,7 +10,7 @@ define <2 x i32> @test1() {
   store <2 x i8*> %v3, <2 x i8*>* %v4
   %v5 = load <2 x i32>* %v1
   ret <2 x i32> %v5
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %v1 = alloca <2 x i32>
 ; CHECK: call void @anything(<2 x i32>* %v1)
 ; CHECK: %v2 = load <2 x i32>* %v1
diff --git a/test/Transforms/GVN/pr17732.ll b/test/Transforms/GVN/pr17732.ll
new file mode 100644
index 000000000000..606a195b8538
--- /dev/null
+++ b/test/Transforms/GVN/pr17732.ll
@@ -0,0 +1,30 @@
+; RUN: opt -gvn -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.with_array = type { [2 x i8], i32, i8 }
+%struct.with_vector = type { <2 x i8>, i32, i8 }
+
+@main.obj_with_array = private unnamed_addr constant { [2 x i8], i32, i8, [3 x i8] } { [2 x i8] zeroinitializer, i32 0, i8 1, [3 x i8] undef }, align 4
+@array_with_zeroinit = common global %struct.with_array zeroinitializer, align 4
+
+@main.obj_with_vector = private unnamed_addr constant { <2 x i8>, i32, i8, [3 x i8] } { <2 x i8> zeroinitializer, i32 0, i8 1, [3 x i8] undef }, align 4
+@vector_with_zeroinit = common global %struct.with_vector zeroinitializer, align 4
+
+define i32 @main() {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  %0 = load i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 2), align 4
+
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  %1 = load i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 2), align 4
+  %conv0 = sext i8 %0 to i32
+  %conv1 = sext i8 %1 to i32
+  %and = and i32 %conv0, %conv1
+  ret i32 %and
+; CHECK-LABEL: define i32 @main(
+; CHECK: ret i32 1
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/Transforms/GVN/pr17852.ll b/test/Transforms/GVN/pr17852.ll
new file mode 100644
index 000000000000..e95ff7f5c106
--- /dev/null
+++ b/test/Transforms/GVN/pr17852.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -basicaa -gvn
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+%struct.S0 = type { [2 x i8], [2 x i8], [4 x i8], [2 x i8], i32, i32, i32, i32 }
+define void @fn1(%struct.S0* byval align 8 %p1) {
+  br label %for.cond
+for.cond:                                         ; preds = %1, %0
+  br label %for.end
+  %f2 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f9 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 7
+  br label %for.cond
+for.end:                                          ; preds = %for.cond
+  br i1 true, label %if.else, label %if.then
+if.then:                                          ; preds = %for.end
+  %f22 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f7 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 5
+  %tmp7 = load i32* %f7, align 8
+  br label %if.end40
+if.else:                                          ; preds = %for.end
+  br i1 false, label %for.cond18, label %if.then6
+if.then6:                                         ; preds = %if.else
+  %f3 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %tmp10 = bitcast %struct.S0* %p1 to i16*
+  %f5 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp11 = bitcast [2 x i8]* %f5 to i16*
+  %bf.load13 = load i16* %tmp11, align 8
+  br label %if.end36
+for.cond18:                                       ; preds = %if.else
+  call void @fn4()
+  br i1 true, label %if.end, label %if.end36
+if.end:                                           ; preds = %for.cond18
+  %f321 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 2
+  %f925 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 7
+  %f526 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp15 = bitcast [2 x i8]* %f526 to i16*
+  %bf.load27 = load i16* %tmp15, align 8
+  %tmp16 = bitcast %struct.S0* %p1 to i16*
+  br label %if.end36
+if.end36:                                         ; preds = %if.end, %for.cond18, %if.then6
+  %f537 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp17 = bitcast [2 x i8]* %f537 to i16*
+  %bf.load38 = load i16* %tmp17, align 8
+  %bf.clear39 = and i16 %bf.load38, -16384
+  br label %if.end40
+if.end40:                                         ; preds = %if.end36, %if.then
+  %f6 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 4
+  %tmp18 = load i32* %f6, align 4
+  call void @fn2(i32 %tmp18)
+  %f8 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 6
+  %tmp19 = load i32* %f8, align 4
+  %tobool41 = icmp eq i32 %tmp19, 0
+  br i1 true, label %if.end50, label %if.then42
+if.then42:                                        ; preds = %if.end40
+  %tmp20 = bitcast %struct.S0* %p1 to i16*
+  %f547 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp21 = bitcast [2 x i8]* %f547 to i16*
+  %bf.load48 = load i16* %tmp21, align 8
+  br label %if.end50
+if.end50:                                         ; preds = %if.then42, %if.end40
+  %f551 = getelementptr inbounds %struct.S0* %p1, i64 0, i32 3
+  %tmp22 = bitcast [2 x i8]* %f551 to i16*
+  %bf.load52 = load i16* %tmp22, align 8
+  %bf.clear53 = and i16 %bf.load52, -16384
+  ret void
+}
+declare void @fn2(i32)
+declare void @fn4()
diff --git a/test/Transforms/GVN/pre-load.ll b/test/Transforms/GVN/pre-load.ll
index bf4add42e80b..9842886fe3ab 100644
--- a/test/Transforms/GVN/pre-load.ll
+++ b/test/Transforms/GVN/pre-load.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32* %p, i1 %C) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 block1:
 	br i1 %C, label %block2, label %block3
 
@@ -25,7 +25,7 @@ block4:
 
 ; This is a simple phi translation case.
 define i32 @test2(i32* %p, i32* %q, i1 %C) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 block1:
 	br i1 %C, label %block2, label %block3
 
@@ -50,7 +50,7 @@ block4:
 
 ; This is a PRE case that requires phi translation through a GEP.
 define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 block1:
   %B = getelementptr i32* %q, i32 1
   store i32* %B, i32** %Hack
@@ -80,7 +80,7 @@ block4:
 ;; Here the loaded address is available, but the computation is in 'block3'
 ;; which does not dominate 'block2'.
 define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 block1:
 	br i1 %C, label %block2, label %block3
 
@@ -116,7 +116,7 @@ block4:
 ;}
 
 define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 entry:
   %0 = add i32 %N, -1           
   %1 = icmp sgt i32 %0, 0       
@@ -159,7 +159,7 @@ return:
 ;}
 
 define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 entry:
   %0 = add i32 %N, -1           
   %1 = icmp sgt i32 %0, 0       
@@ -242,7 +242,7 @@ return:
 ;; Here the loaded address isn't available in 'block2' at all, requiring a new
 ;; GEP to be inserted into it.
 define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 block1:
 	br i1 %C, label %block2, label %block3
 
@@ -365,7 +365,7 @@ return:
 
 ; Test critical edge splitting.
 define i32 @test11(i32* %p, i1 %C, i32 %N) {
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 block1:
         br i1 %C, label %block2, label %block3
 
diff --git a/test/Transforms/GVN/preserve-tbaa.ll b/test/Transforms/GVN/preserve-tbaa.ll
index a93675556cde..c52ed96c23c1 100644
--- a/test/Transforms/GVN/preserve-tbaa.ll
+++ b/test/Transforms/GVN/preserve-tbaa.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64"
 
 ; GVN should preserve the TBAA tag on loads when doing PRE.
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: %tmp33.pre = load i16* %P, align 2, !tbaa !0
 ; CHECK: br label %for.body
 define void @test(i16 *%P, i16* %Q) nounwind {
@@ -25,6 +25,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-!0 = metadata !{metadata !"short", metadata !1}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/Transforms/GVN/readattrs.ll b/test/Transforms/GVN/readattrs.ll
new file mode 100644
index 000000000000..ba624a71f9b6
--- /dev/null
+++ b/test/Transforms/GVN/readattrs.ll
@@ -0,0 +1,17 @@
+; RUN: opt -gvn -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @use(i8* readonly nocapture)
+
+define i8 @test() {
+  %a = alloca i8
+  store i8 1, i8* %a
+  call void @use(i8* %a)
+  %b = load i8* %a
+  ret i8 %b
+; CHECK-LABEL: define i8 @test(
+; CHECK: call void @use(i8* %a)
+; CHECK-NEXT: ret i8 1
+}
diff --git a/test/Transforms/GVN/rle-nonlocal.ll b/test/Transforms/GVN/rle-nonlocal.ll
index 6b74e9a946d1..8229aaa14247 100644
--- a/test/Transforms/GVN/rle-nonlocal.ll
+++ b/test/Transforms/GVN/rle-nonlocal.ll
@@ -1,8 +1,9 @@
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
-define i32 @main(i32** %p) {
+define i32 @main(i32** %p, i32 %x, i32 %y) {
 block1:
-	br i1 true, label %block2, label %block3
+    %cmp = icmp eq i32 %x, %y
+	br i1 %cmp , label %block2, label %block3
 
 block2:
  %a = load i32** %p
diff --git a/test/Transforms/GVN/rle-phi-translate.ll b/test/Transforms/GVN/rle-phi-translate.ll
index 6731f43c0d2b..1ce7e0b93797 100644
--- a/test/Transforms/GVN/rle-phi-translate.ll
+++ b/test/Transforms/GVN/rle-phi-translate.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin7"
 
 define i32 @test1(i32* %b, i32* %c) nounwind {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
 	%g = alloca i32
 	%t1 = icmp eq i32* %b, null
@@ -36,7 +36,7 @@ bb2:		; preds = %bb1, %bb
 }
 
 define i8 @test2(i1 %cond, i32* %b, i32* %c) nounwind {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 entry:
   br i1 %cond, label %bb, label %bb1
 
@@ -61,7 +61,7 @@ bb2:
 }
 
 define i32 @test3(i1 %cond, i32* %b, i32* %c) nounwind {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 entry:
   br i1 %cond, label %bb, label %bb1
 
@@ -88,7 +88,7 @@ bb2:
 
 ; PR5313
 define i32 @test4(i1 %cond, i32* %b, i32* %c) nounwind {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 entry:
   br i1 %cond, label %bb, label %bb1
 
@@ -121,7 +121,7 @@ bb2:
 ;
 ; Should compile into one load in the loop.
 define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 bb.nph:
   br label %for.body
 
diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll
index 71aa548ab11e..923cd03ecdb7 100644
--- a/test/Transforms/GVN/rle-semidominated.ll
+++ b/test/Transforms/GVN/rle-semidominated.ll
@@ -1,9 +1,10 @@
 ; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 "
 
-define i32 @main(i32* %p) {
+define i32 @main(i32* %p, i32 %x, i32 %y) {
 block1:
   %z = load i32* %p
-	br i1 true, label %block2, label %block3
+  %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %block2, label %block3
 
 block2:
  br label %block4
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index f470ed88bb9c..8d289b06997c 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s
-; RUN: opt < %s -default-data-layout="E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basicaa -gvn -S -die | FileCheck %s
+; RUN: opt < %s -default-data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s
+; RUN: opt < %s -default-data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32"      -basicaa -gvn -S -die | FileCheck %s
 
 ;; Trivial RLE test.
 define i32 @test0(i32 %V, i32* %P) {
@@ -7,7 +7,7 @@ define i32 @test0(i32 %V, i32* %P) {
 
   %A = load i32* %P
   ret i32 %A
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK: ret i32 %V
 }
 
@@ -47,7 +47,7 @@ define float @coerce_mustalias1(i32 %V, i32* %P) {
 
   %A = load float* %P2
   ret float %A
-; CHECK: @coerce_mustalias1
+; CHECK-LABEL: @coerce_mustalias1(
 ; CHECK-NOT: load
 ; CHECK: ret float 
 }
@@ -60,7 +60,7 @@ define float @coerce_mustalias2(i32* %V, i32** %P) {
 
   %A = load float* %P2
   ret float %A
-; CHECK: @coerce_mustalias2
+; CHECK-LABEL: @coerce_mustalias2(
 ; CHECK-NOT: load
 ; CHECK: ret float 
 }
@@ -73,7 +73,7 @@ define i32* @coerce_mustalias3(float %V, float* %P) {
 
   %A = load i32** %P2
   ret i32* %A
-; CHECK: @coerce_mustalias3
+; CHECK-LABEL: @coerce_mustalias3(
 ; CHECK-NOT: load
 ; CHECK: ret i32* 
 }
@@ -92,7 +92,7 @@ F:
   %X = bitcast i32 %A to float
   ret float %X
 
-; CHECK: @coerce_mustalias4
+; CHECK-LABEL: @coerce_mustalias4(
 ; CHECK: %A = load i32* %P
 ; CHECK-NOT: load
 ; CHECK: ret float
@@ -107,7 +107,7 @@ define i8 @coerce_mustalias5(i32 %V, i32* %P) {
 
   %A = load i8* %P2
   ret i8 %A
-; CHECK: @coerce_mustalias5
+; CHECK-LABEL: @coerce_mustalias5(
 ; CHECK-NOT: load
 ; CHECK: ret i8
 }
@@ -120,7 +120,7 @@ define float @coerce_mustalias6(i64 %V, i64* %P) {
 
   %A = load float* %P2
   ret float %A
-; CHECK: @coerce_mustalias6
+; CHECK-LABEL: @coerce_mustalias6(
 ; CHECK-NOT: load
 ; CHECK: ret float
 }
@@ -133,7 +133,7 @@ define i8* @coerce_mustalias7(i64 %V, i64* %P) {
 
   %A = load i8** %P2
   ret i8* %A
-; CHECK: @coerce_mustalias7
+; CHECK-LABEL: @coerce_mustalias7(
 ; CHECK-NOT: load
 ; CHECK: ret i8*
 }
@@ -146,7 +146,7 @@ entry:
   %arrayidx = getelementptr inbounds i16* %A, i64 42
   %tmp2 = load i16* %arrayidx
   ret i16 %tmp2
-; CHECK: @memset_to_i16_local
+; CHECK-LABEL: @memset_to_i16_local(
 ; CHECK-NOT: load
 ; CHECK: ret i16 257
 }
@@ -159,7 +159,7 @@ entry:
   %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
-; CHECK: @memset_to_float_local
+; CHECK-LABEL: @memset_to_float_local(
 ; CHECK-NOT: load
 ; CHECK: zext
 ; CHECK-NEXT: shl
@@ -187,7 +187,7 @@ Cont:
   %A = load i16* %P2
   ret i16 %A
 
-; CHECK: @memset_to_i16_nonlocal0
+; CHECK-LABEL: @memset_to_i16_nonlocal0(
 ; CHECK: Cont:
 ; CHECK-NEXT:   %A = phi i16 [ 514, %F ], [ 257, %T ]
 ; CHECK-NOT: load
@@ -195,6 +195,7 @@ Cont:
 }
 
 @GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
+@GCst_as1 = addrspace(1) constant {i32, float, i32 } { i32 42, float 14., i32 97 }
 
 ; memset -> float forwarding.
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
@@ -204,12 +205,23 @@ entry:
   %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
-; CHECK: @memcpy_to_float_local
+; CHECK-LABEL: @memcpy_to_float_local(
 ; CHECK-NOT: load
 ; CHECK: ret float 1.400000e+01
 }
 
-
+; memcpy from address space 1
+define float @memcpy_to_float_local_as1(float* %A) nounwind ssp {
+entry:
+  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
+  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
+; CHECK-LABEL: @memcpy_to_float_local_as1(
+; CHECK-NOT: load
+; CHECK: ret float 1.400000e+01
+}
 
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
@@ -228,7 +240,7 @@ Cont:
   %A = load i8* %P3
   ret i8 %A
 
-; CHECK: @coerce_mustalias_nonlocal0
+; CHECK-LABEL: @coerce_mustalias_nonlocal0(
 ; CHECK: Cont:
 ; CHECK:   %A = phi i8 [
 ; CHECK-NOT: load
@@ -254,7 +266,7 @@ Cont:
   %A = load i8* %P3
   ret i8 %A
 
-; CHECK: @coerce_mustalias_nonlocal1
+; CHECK-LABEL: @coerce_mustalias_nonlocal1(
 ; CHECK: Cont:
 ; CHECK:   %A = phi i8 [
 ; CHECK-NOT: load
@@ -277,7 +289,7 @@ Cont:
   %A = load i8* %P3
   ret i8 %A
 
-; CHECK: @coerce_mustalias_pre0
+; CHECK-LABEL: @coerce_mustalias_pre0(
 ; CHECK: F:
 ; CHECK:   load i8* %P3
 ; CHECK: Cont:
@@ -301,7 +313,7 @@ define i8 @coerce_offset0(i32 %V, i32* %P) {
 
   %A = load i8* %P3
   ret i8 %A
-; CHECK: @coerce_offset0
+; CHECK-LABEL: @coerce_offset0(
 ; CHECK-NOT: load
 ; CHECK: ret i8
 }
@@ -324,7 +336,7 @@ Cont:
   %A = load i8* %P4
   ret i8 %A
 
-; CHECK: @coerce_offset_nonlocal0
+; CHECK-LABEL: @coerce_offset_nonlocal0(
 ; CHECK: Cont:
 ; CHECK:   %A = phi i8 [
 ; CHECK-NOT: load
@@ -348,7 +360,7 @@ Cont:
   %A = load i8* %P4
   ret i8 %A
 
-; CHECK: @coerce_offset_pre0
+; CHECK-LABEL: @coerce_offset_pre0(
 ; CHECK: F:
 ; CHECK:   load i8* %P4
 ; CHECK: Cont:
@@ -357,13 +369,14 @@ Cont:
 ; CHECK: ret i8 %A
 }
 
-define i32 @chained_load(i32** %p) {
+define i32 @chained_load(i32** %p, i32 %x, i32 %y) {
 block1:
   %A = alloca i32*
 
   %z = load i32** %p
   store i32* %z, i32** %A
-  br i1 true, label %block2, label %block3
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %block2, label %block3
 
 block2:
  %a = load i32** %p
@@ -378,7 +391,7 @@ block4:
   %d = load i32* %c
   ret i32 %d
   
-; CHECK: @chained_load
+; CHECK-LABEL: @chained_load(
 ; CHECK: %z = load i32** %p
 ; CHECK-NOT: load
 ; CHECK: %d = load i32* %z
@@ -390,7 +403,7 @@ declare i1 @cond() readonly
 declare i1 @cond2() readonly
 
 define i32 @phi_trans2() {
-; CHECK: @phi_trans2
+; CHECK-LABEL: @phi_trans2(
 entry:
   %P = alloca i32, i32 400
   br label %F1
@@ -427,10 +440,11 @@ TY:
   ret i32 0
 }
 
-define i32 @phi_trans3(i32* %p) {
-; CHECK: @phi_trans3
+define i32 @phi_trans3(i32* %p, i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @phi_trans3(
 block1:
-  br i1 true, label %block2, label %block3
+  %cmpxy = icmp eq i32 %x, %y
+  br i1 %cmpxy, label %block2, label %block3
 
 block2:
  store i32 87, i32* %p
@@ -443,7 +457,7 @@ block3:
 
 block4:
   %A = phi i32 [-1, %block2], [42, %block3]
-  br i1 true, label %block5, label %exit
+  br i1 %cmpxy, label %block5, label %exit
   
 ; CHECK: block4:
 ; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]  
@@ -451,11 +465,11 @@ block4:
 
 block5:
   %B = add i32 %A, 1
-  br i1 true, label %block6, label %exit
+  br i1 %cmpxy, label %block6, label %exit
   
 block6:
   %C = getelementptr i32* %p, i32 %B
-  br i1 true, label %block7, label %exit
+  br i1 %cmpxy, label %block7, label %exit
   
 block7:
   %D = load i32* %C
@@ -469,7 +483,7 @@ exit:
 }
 
 define i8 @phi_trans4(i8* %p) {
-; CHECK: @phi_trans4
+; CHECK-LABEL: @phi_trans4(
 entry:
   %X3 = getelementptr i8* %p, i32 192
   store i8 192, i8* %X3
@@ -499,7 +513,7 @@ out:
 }
 
 define i8 @phi_trans5(i8* %p) {
-; CHECK: @phi_trans5
+; CHECK-LABEL: @phi_trans5(
 entry:
   
   %X4 = getelementptr i8* %p, i32 2
@@ -542,7 +556,7 @@ entry:
   %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
   %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
-; CHECK: @memset_to_load
+; CHECK-LABEL: @memset_to_load(
 ; CHECK: ret i32 0
 }
 
@@ -561,7 +575,7 @@ entry:
   %add = add nsw i32 %tmp2, %conv
   ret i32 %add
 
-; TEMPORARILYDISABLED: @load_load_partial_alias
+; TEMPORARILYDISABLED-LABEL: @load_load_partial_alias(
 ; TEMPORARILYDISABLED: load i32*
 ; TEMPORARILYDISABLED-NOT: load
 ; TEMPORARILYDISABLED: lshr i32 {{.*}}, 8
@@ -588,7 +602,7 @@ land.lhs.true:                                    ; preds = %entry
 
 if.end:
   ret i32 52
-; TEMPORARILY_DISABLED: @load_load_partial_alias_cross_block
+; TEMPORARILY_DISABLED-LABEL: @load_load_partial_alias_cross_block(
 ; TEMPORARILY_DISABLED: land.lhs.true:
 ; TEMPORARILY_DISABLED-NOT: load i8
 ; TEMPORARILY_DISABLED: ret i32 %conv6
@@ -611,7 +625,7 @@ entry:
   %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
   ret i32 %add
-; CHECK: @test_widening1
+; CHECK-LABEL: @test_widening1(
 ; CHECK-NOT: load
 ; CHECK: load i16*
 ; CHECK-NOT: load
@@ -635,7 +649,7 @@ entry:
   %add3 = add nsw i32 %add2, %conv3
 
   ret i32 %add3
-; CHECK: @test_widening2
+; CHECK-LABEL: @test_widening2(
 ; CHECK-NOT: load
 ; CHECK: load i32*
 ; CHECK-NOT: load
@@ -645,6 +659,8 @@ entry:
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+
 
 ;;===----------------------------------------------------------------------===;;
 ;; Load -> Store dependency which isn't interfered with by a call that happens
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index 85fe39a93b01..d6412fce59b2 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -13,7 +13,7 @@ define i32 @test1(i8* %p, i8* %q) {
 
 define i32 @test2(i8* %p, i8* %q) {
 ; CHECK: @test2(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !0
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGC:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !0
@@ -23,7 +23,7 @@ define i32 @test2(i8* %p, i8* %q) {
 
 define i32 @test3(i8* %p, i8* %q) {
 ; CHECK: @test3(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !3
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGB:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !3
   %b = call i32 @foo(i8* %p), !tbaa !3
@@ -33,7 +33,7 @@ define i32 @test3(i8* %p, i8* %q) {
 
 define i32 @test4(i8* %p, i8* %q) {
 ; CHECK: @test4(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !1
   %b = call i32 @foo(i8* %p), !tbaa !0
@@ -43,7 +43,7 @@ define i32 @test4(i8* %p, i8* %q) {
 
 define i32 @test5(i8* %p, i8* %q) {
 ; CHECK: @test5(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !1
@@ -53,7 +53,7 @@ define i32 @test5(i8* %p, i8* %q) {
 
 define i32 @test6(i8* %p, i8* %q) {
 ; CHECK: @test6(i8* %p, i8* %q)
-; CHECK: call i32 @foo(i8* %p), !tbaa !1
+; CHECK: call i32 @foo(i8* %p), !tbaa [[TAGA:!.*]]
 ; CHECK: %c = add i32 %a, %a
   %a = call i32 @foo(i8* %p), !tbaa !0
   %b = call i32 @foo(i8* %p), !tbaa !3
@@ -74,8 +74,18 @@ define i32 @test7(i8* %p, i8* %q) {
 
 declare i32 @foo(i8*) readonly
 
-!0 = metadata !{metadata !"C", metadata !1}
-!1 = metadata !{metadata !"A", metadata !2}
+; CHECK: [[TAGC]] = metadata !{metadata [[TYPEC:!.*]], metadata [[TYPEC]], i64 0}
+; CHECK: [[TYPEC]] = metadata !{metadata !"C", metadata [[TYPEA:!.*]]}
+; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
+; CHECK: [[TAGB]] = metadata !{metadata [[TYPEB:!.*]], metadata [[TYPEB]], i64 0}
+; CHECK: [[TYPEB]] = metadata !{metadata !"B", metadata [[TYPEA]]}
+; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA]], metadata [[TYPEA]], i64 0}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
+!1 = metadata !{metadata !6, metadata !6, i64 0}
 !2 = metadata !{metadata !"tbaa root", null}
-!3 = metadata !{metadata !"B", metadata !1}
-!4 = metadata !{metadata !"another root", null}
+!3 = metadata !{metadata !7, metadata !7, i64 0}
+!4 = metadata !{metadata !8, metadata !8, i64 0}
+!5 = metadata !{metadata !"C", metadata !6}
+!6 = metadata !{metadata !"A", metadata !2}
+!7 = metadata !{metadata !"B", metadata !6}
+!8 = metadata !{metadata !"another root", null}
diff --git a/test/Transforms/GlobalDCE/lit.local.cfg b/test/Transforms/GlobalDCE/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/GlobalDCE/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
index 390e77a8cea8..0867ca9c5431 100644
--- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
+++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -4,7 +4,7 @@
 
 ; RUN: opt < %s -globalopt -S > %t
 ; Check that the new global values still have their address space
-; RUN: cat %t | grep addrspace.*global
+; RUN: cat %t | grep 'addrspace.*global'
 
 @struct = internal addrspace(1) global { i32, i32 } zeroinitializer
 @array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer 
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index a5be2b17d4ad..b98facad3977 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
 define internal void @f() {
-; CHECK-NOT: @f
+; CHECK-NOT: @f(
 ; CHECK: define void @a
 	ret void
 }
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index e71aed9e05ff..01089600637b 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -56,22 +56,24 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.gv = !{!0}
 
 !0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"Stop", metadata !"Stop", metadata !"", metadata !1, i32 2, metadata !2, i1 true, i1 true, i32* @Stop} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 458769, i32 0, i32 1, metadata !"g.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!1 = metadata !{i32 458769, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !3 = metadata !{i32 459009, metadata !4, metadata !"i", metadata !1, i32 4, metadata !2} ; [ DW_TAG_arg_variable ]
-!4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 4, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!5 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !2, metadata !2}
 !7 = metadata !{i32 5, i32 0, metadata !8, null}
-!8 = metadata !{i32 458763, metadata !4, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 458763, metadata !20, metadata !4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !9 = metadata !{i32 6, i32 0, metadata !8, null}
 !10 = metadata !{i32 7, i32 0, metadata !8, null}
 !11 = metadata !{i32 9, i32 0, metadata !8, null}
 !12 = metadata !{i32 11, i32 0, metadata !8, null}
 !13 = metadata !{i32 14, i32 0, metadata !14, null}
-!14 = metadata !{i32 458763, metadata !15, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{i32 458763, metadata !20, metadata !15, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{metadata !2}
 !18 = metadata !{i32 15, i32 0, metadata !14, null}
 !19 = metadata !{i32 16, i32 0, metadata !14, null}
+!20 = metadata !{metadata !"g.c", metadata !"/tmp"}
+!21 = metadata !{i32 0}
diff --git a/test/Transforms/GlobalOpt/2012-05-11-blockaddress.ll b/test/Transforms/GlobalOpt/2012-05-11-blockaddress.ll
index 0c58c1a9d9c7..24213af024f5 100644
--- a/test/Transforms/GlobalOpt/2012-05-11-blockaddress.ll
+++ b/test/Transforms/GlobalOpt/2012-05-11-blockaddress.ll
@@ -2,7 +2,7 @@
 ; Check that the mere presence of a blockaddress doesn't prevent -globalopt
 ; from promoting @f to fastcc.
 
-; CHECK: define{{.*}}fastcc{{.*}}@f
+; CHECK-LABEL: define{{.*}}fastcc{{.*}}@f(
 define internal i8* @f() {
   ret i8* blockaddress(@f, %L1)
 L1:
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 845117987391..32f4bf8ebe25 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,19 +1,38 @@
+; We use a temporary file so that the test fails when opt crashes.
+
 ; RUN: opt < %s -globalopt -S > %t
-; RUN: cat %t | grep foo1 | count 1
-; RUN: cat %t | grep foo2 | count 4
-; RUN: cat %t | grep bar1 | count 1
-; RUN: cat %t | grep bar2 | count 4
+; RUN: FileCheck %s < %t
 
 @foo1 = alias void ()* @foo2
+; CHECK: @foo1 = alias void ()* @foo2
+
 @foo2 = alias weak void()* @bar1
+; CHECK: @foo2 = alias weak void ()* @bar2
+
 @bar1  = alias void ()* @bar2
+; CHECK: @bar1 = alias void ()* @bar2
 
 declare void @bar2()
+; CHECK: declare void @bar2()
 
 define void @baz() {
 entry:
-        call void @foo1()
-        call void @foo2()
-        call void @bar1()
-        ret void
+         call void @foo1()
+; CHECK: call void @foo2()
+
+         call void @foo2()
+; CHECK: call void @foo2()
+
+         call void @bar1()
+; CHECK: call void @bar2()
+
+         ret void
+}
+
+@foo3 = alias void ()* @bar3
+; CHECK-NOT: bar3
+
+define internal void @bar3() {
+  ret void
 }
+;CHECK: define void @foo3
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
index f91579bf0507..05ac7f9bddb3 100644
--- a/test/Transforms/GlobalOpt/alias-used.ll
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -2,16 +2,21 @@
 
 @c = global i8 42
 
+@i = internal global i8 42
+; CHECK: @ia = internal global i8 42
+@ia = alias internal i8* @i
+
 @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
-; CHECK: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
+; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
 
-@llvm.compiler_used = appending global [2 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @fa3 to i8*)], section "llvm.metadata"
+@llvm.compiler.used = appending global [4 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* bitcast (void ()* @fa to i8*), i8* @ia, i8* @i], section "llvm.metadata"
+; CHECK-DAG: @llvm.compiler.used = appending global [2 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* @ia], section "llvm.metadata"
 
 @sameAsUsed = global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca]
-; CHECK: @sameAsUsed = global [3 x i8*] [i8* bitcast (void ()* @f to i8*), i8* bitcast (void ()* @f to i8*), i8* @c]
+; CHECK-DAG: @sameAsUsed = global [3 x i8*] [i8* bitcast (void ()* @f to i8*), i8* bitcast (void ()* @f to i8*), i8* @c]
 
 @other = global i32* bitcast (void ()* @fa to i32*)
-; CHECK: @other = global i32* bitcast (void ()* @f to i32*)
+; CHECK-DAG: @other = global i32* bitcast (void ()* @f to i32*)
 
 @fa = alias internal void ()* @f
 ; CHECK: @fa = alias internal void ()* @f
diff --git a/test/Transforms/GlobalOpt/array-elem-refs.ll b/test/Transforms/GlobalOpt/array-elem-refs.ll
new file mode 100644
index 000000000000..ec472b0e99f2
--- /dev/null
+++ b/test/Transforms/GlobalOpt/array-elem-refs.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { i8, i8 }
+
+@c = internal global i8** bitcast (i8* getelementptr (i8* bitcast ([8 x i8*]* @b to i8*), i64 48) to i8**), align 8
+@b = internal global [8 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S* @a, i32 0, i32 0), i64 1)], align 16
+@a = internal global %struct.S zeroinitializer, align 1
+
+; Function Attrs: nounwind uwtable
+define signext i8 @foo() #0 {
+entry:
+  %0 = load i8*** @c, align 8
+  %1 = load i8** %0, align 8
+  %2 = load i8* %1, align 1
+  ret i8 %2
+
+; CHECK-LABEL: @foo
+; CHECK: ret i8 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/GlobalOpt/atexit.ll b/test/Transforms/GlobalOpt/atexit.ll
new file mode 100644
index 000000000000..dbcd0d7b00bd
--- /dev/null
+++ b/test/Transforms/GlobalOpt/atexit.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; CHECK: ModuleID
+define internal hidden i32 @__cxa_atexit(void (i8*)* nocapture %func, i8* nocapture %arg, i8* nocapture %dso_handle) nounwind readnone optsize noimplicitfloat {
+  unreachable
+}
diff --git a/test/Transforms/GlobalOpt/atomic.ll b/test/Transforms/GlobalOpt/atomic.ll
index 4c3f4395a0ea..ac05bfd68d9d 100644
--- a/test/Transforms/GlobalOpt/atomic.ll
+++ b/test/Transforms/GlobalOpt/atomic.ll
@@ -1,10 +1,25 @@
 ; RUN: opt -globalopt < %s -S -o - | FileCheck %s
 
 @GV1 = internal global i64 1
+@GV2 = internal global i32 0
+
 ; CHECK: @GV1 = internal unnamed_addr constant i64 1
+; CHECK: @GV2 = internal unnamed_addr global i32 0
 
 define void @test1() {
 entry:
   %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
   ret void
 }
+
+; PR17163
+define void @test2a() {
+entry:
+  store atomic i32 10, i32* @GV2 seq_cst, align 4
+  ret void
+}
+define i32 @test2b() {
+entry:
+  %atomic-load = load atomic i32* @GV2 seq_cst, align 4
+  ret i32 %atomic-load
+}
diff --git a/test/Transforms/GlobalOpt/blockaddress.ll b/test/Transforms/GlobalOpt/blockaddress.ll
index 13da76299d5d..f7f830869b84 100644
--- a/test/Transforms/GlobalOpt/blockaddress.ll
+++ b/test/Transforms/GlobalOpt/blockaddress.ll
@@ -3,7 +3,7 @@
 @x = internal global i8* zeroinitializer
 
 define void @f() {
-; CHECK: @f
+; CHECK-LABEL: @f(
 
 ; Check that we don't hit an assert in Constant::IsThreadDependent()
 ; when storing this blockaddress into a global.
@@ -13,7 +13,7 @@ define void @f() {
 }
 
 define void @g() {
-; CHECK: @g
+; CHECK-LABEL: @g(
 
 here:
   ret void
diff --git a/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll b/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
index a472f1033ff9..b6dfdea0610d 100644
--- a/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
+++ b/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
@@ -3,7 +3,7 @@
 @glbl = internal global i8* null
 
 define void @test1a() {
-; CHECK: @test1a
+; CHECK-LABEL: @test1a(
 ; CHECK-NOT: store
 ; CHECK-NEXT: ret void
   store i8* null, i8** @glbl
@@ -11,7 +11,7 @@ define void @test1a() {
 }
 
 define void @test1b(i8* %p) {
-; CHECK: @test1b
+; CHECK-LABEL: @test1b(
 ; CHECK-NEXT: store
 ; CHECK-NEXT: ret void
   store i8* %p, i8** @glbl
@@ -19,7 +19,7 @@ define void @test1b(i8* %p) {
 }
 
 define void @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: alloca i8
   %txt = alloca i8
   call void @foo2(i8* %txt)
@@ -31,7 +31,7 @@ declare i8* @strdup(i8*)
 declare void @foo2(i8*)
 
 define void @test3() uwtable {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: bb1:
 ; CHECK-NOT: bb2:
 ; CHECK: invoke
diff --git a/test/Transforms/GlobalOpt/compiler-used.ll b/test/Transforms/GlobalOpt/compiler-used.ll
new file mode 100644
index 000000000000..a710d272edc7
--- /dev/null
+++ b/test/Transforms/GlobalOpt/compiler-used.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; Test that when all members of llvm.compiler.used are found to be redundant
+; we delete it instead of crashing.
+
+define void @foo() {
+  ret void
+}
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata"
+
+@llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata"
+
+; CHECK-NOT: @llvm.compiler.used
+; CHECK: @llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)], section "llvm.metadata"
+; CHECK-NOT: @llvm.compiler.used
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index c9076109443d..dd86f01924a5 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -20,7 +20,7 @@ entry:
   store i32* inttoptr (i64 sdiv (i64 ptrtoint (i32* @G to i64), i64 ptrtoint (i32* @H to i64)) to i32*), i32** %tmp, align 8
   ret void
 }
-; CHECK: @init1
+; CHECK-LABEL: @init1(
 ; CHECK: store i32*
 
 ; PR11705 - ptrtoint isn't safe in general in global initializers.
@@ -30,5 +30,5 @@ entry:
   store i128 ptrtoint (i32* @G to i128), i128* %tmp, align 16
   ret void
 }
-; CHECK: @init2
+; CHECK-LABEL: @init2(
 ; CHECK: store i128
diff --git a/test/Transforms/GlobalOpt/deadglobal.ll b/test/Transforms/GlobalOpt/deadglobal.ll
index cad5a91488ab..9563a23b2c29 100644
--- a/test/Transforms/GlobalOpt/deadglobal.ll
+++ b/test/Transforms/GlobalOpt/deadglobal.ll
@@ -16,7 +16,7 @@ define void @foo1() {
 @G2 = linkonce_odr constant i32 42
 
 define void @foo2() {
-; CHECK: define void @foo2
+; CHECK-LABEL: define void @foo2(
 ; CHECK-NEXT: store
         store i32 1, i32* @G2
         ret void
diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll
index 51858069ac5b..abf5fdd2ef3c 100644
--- a/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/test/Transforms/GlobalOpt/integer-bool.ll
@@ -19,7 +19,7 @@ define void @set2() {
 }
 
 define i1 @get() {
-; CHECK: @get
+; CHECK-LABEL: @get(
   %A = load i32 addrspace(1) * @G
   %C = icmp slt i32 %A, 2
   ret i1 %C
diff --git a/test/Transforms/GlobalOpt/invariant-nodatalayout.ll b/test/Transforms/GlobalOpt/invariant-nodatalayout.ll
new file mode 100644
index 000000000000..a2abd52c4e80
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant-nodatalayout.ll
@@ -0,0 +1,17 @@
+; RUN: opt -globalopt -S -o - < %s | FileCheck %s
+; The check here is that it doesn't crash.
+
+declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr)
+
+@object1 = global { i32, i32 } zeroinitializer
+; CHECK: @object1 = global { i32, i32 } zeroinitializer
+
+define void @ctor1() {
+  %ptr = bitcast {i32, i32}* @object1 to i8*
+  call {}* @llvm.invariant.start(i64 4, i8* %ptr)
+  ret void
+}
+
+@llvm.global_ctors = appending constant
+  [1 x { i32, void ()* }]
+  [ { i32, void ()* } { i32 65535, void ()* @ctor1 } ]
diff --git a/test/Transforms/GlobalOpt/lit.local.cfg b/test/Transforms/GlobalOpt/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/GlobalOpt/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/load-store-global.ll b/test/Transforms/GlobalOpt/load-store-global.ll
index 25a53370fa09..ad7326dc682c 100644
--- a/test/Transforms/GlobalOpt/load-store-global.ll
+++ b/test/Transforms/GlobalOpt/load-store-global.ll
@@ -7,14 +7,14 @@ define void @foo() {
         %V = load i32* @G               ; <i32> [#uses=1]
         store i32 %V, i32* @G
         ret void
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK-NEXT: ret void
 }
 
 define i32 @bar() {
         %X = load i32* @G               ; <i32> [#uses=1]
         ret i32 %X
-; CHECK: @bar
+; CHECK-LABEL: @bar(
 ; CHECK-NEXT: ret i32 17
 }
 
@@ -31,7 +31,7 @@ define void @qux() nounwind {
   store i64* inttoptr (i64 1 to i64*), i64** @a, align 8
   %l = load i64** @a, align 8
   ret void
-; CHECK: @qux
+; CHECK-LABEL: @qux(
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 }
diff --git a/test/Transforms/GlobalOpt/malloc-promote-3.ll b/test/Transforms/GlobalOpt/malloc-promote-3.ll
new file mode 100644
index 000000000000..d44ee6460950
--- /dev/null
+++ b/test/Transforms/GlobalOpt/malloc-promote-3.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@G = internal global i32* null
+
+define void @t() {
+; CHECK: @t()
+; CHECK: call i8* @malloc
+  %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) nobuiltin
+  %P = bitcast i8* %malloccall to i32*
+  store i32* %P, i32** @G
+  %GV = load i32** @G
+  %GVe = getelementptr i32* %GV, i32 40
+  store i32 20, i32* %GVe
+  ret void
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/test/Transforms/GlobalOpt/metadata.ll b/test/Transforms/GlobalOpt/metadata.ll
index 730e2b080236..ecf3f94196be 100644
--- a/test/Transforms/GlobalOpt/metadata.ll
+++ b/test/Transforms/GlobalOpt/metadata.ll
@@ -6,7 +6,7 @@
 @G = internal global i8** null
 
 define i32 @main(i32 %argc, i8** %argv) {
-; CHECK: @main
+; CHECK-LABEL: @main(
 ; CHECK: %G = alloca
   store i8** %argv, i8*** @G
   ret i32 0
diff --git a/test/Transforms/GlobalOpt/tls.ll b/test/Transforms/GlobalOpt/tls.ll
index 7a410e5ed20b..59ae23a4f6a8 100644
--- a/test/Transforms/GlobalOpt/tls.ll
+++ b/test/Transforms/GlobalOpt/tls.ll
@@ -29,7 +29,7 @@ entry:
   %1 = load i32* %0, align 4
   ret i32 %1
 
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; Make sure that the load from @ip hasn't been removed.
 ; CHECK: load i32** @ip
 ; CHECK: ret
@@ -46,7 +46,7 @@ entry:
   tail call void @signal() nounwind
   ret void
 
-; CHECK: @g
+; CHECK-LABEL: @g(
 ; Make sure that the store to @ip hasn't been removed.
 ; CHECK: store {{.*}} @ip
 ; CHECK: ret
diff --git a/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
index d613601e8d78..d9787232d971 100644
--- a/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
+++ b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
@@ -6,6 +6,6 @@ define i32 @test1(i64 %idx) nounwind {
   %arrayidx = getelementptr inbounds [10 x i32]* @zero, i64 0, i64 %idx
   %l = load i32* %arrayidx
   ret i32 %l
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
diff --git a/test/Transforms/IPConstantProp/lit.local.cfg b/test/Transforms/IPConstantProp/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/IPConstantProp/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
index 76c90e0cddd9..a8020e6014b0 100644
--- a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
+++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-darwin"
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: if.end.i126:
 ; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8* null, i32 undef)
 define void @test() nounwind {
diff --git a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
index c0c508f02ecd..8247886e66d9 100644
--- a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -9,7 +9,7 @@
 ; SCEV. Since it's an i8*, it has unit stride so we never adjust the
 ; SCEV expression in a way that would convert it to an integer type.
 
-; CHECK: @testnullptrptr
+; CHECK-LABEL: @testnullptrptr(
 ; CHECK: loop:
 ; CHECK: icmp ne
 define i8 @testnullptrptr(i8* %buf, i8* %end) nounwind {
@@ -34,7 +34,7 @@ exit:
   ret i8 %snext
 }
 
-; CHECK: @testptrptr
+; CHECK-LABEL: @testptrptr(
 ; CHECK: loop:
 ; CHECK: icmp ne
 define i8 @testptrptr(i8* %buf, i8* %end) nounwind {
@@ -59,7 +59,7 @@ exit:
   ret i8 %snext
 }
 
-; CHECK: @testnullptrint
+; CHECK-LABEL: @testnullptrint(
 ; CHECK: loop:
 ; CHECK: icmp ne
 define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
@@ -89,7 +89,7 @@ exit:
   ret i8 %snext
 }
 
-; CHECK: @testptrint
+; CHECK-LABEL: @testptrint(
 ; CHECK: loop:
 ; CHECK: icmp ne
 define i8 @testptrint(i8* %buf, i8* %end) nounwind {
diff --git a/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
index ccf259597e30..8f0cb80a1070 100644
--- a/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
+++ b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
 ; PR11350: Check that SimplifyIndvar handles a cycle of useless self-phis.
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK-NOT: lcssa = phi
 define void @test() nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
index 7c5f81896f16..643d3cb333ee 100644
--- a/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
+++ b/test/Transforms/IndVarSimplify/2012-07-17-lftr-undef.ll
@@ -2,7 +2,7 @@
 ; PR13371: indvars pass incorrectly substitutes 'undef' values
 ;
 ; LFTR should not user %undef as the loop counter.
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK-NOT: icmp{{.*}}undef
 @.str3 = private constant [6 x i8] c"%lld\0A\00", align 1
 declare i32 @printf(i8* noalias nocapture, ...) nounwind
diff --git a/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll
index 5c478669d298..5f6ff36cf574 100644
--- a/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll
+++ b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll
@@ -19,7 +19,7 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: phi i1
 ; CHECK: call void @aux(i1 false, i1 false)
 }
diff --git a/test/Transforms/IndVarSimplify/dont-recompute.ll b/test/Transforms/IndVarSimplify/dont-recompute.ll
index d37b0e21f826..e5ced0f2e4bf 100644
--- a/test/Transforms/IndVarSimplify/dont-recompute.ll
+++ b/test/Transforms/IndVarSimplify/dont-recompute.ll
@@ -21,7 +21,7 @@
 
 declare void @func(i32)
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 define void @test(i32 %m) nounwind uwtable {
 entry:
   br label %for.body
@@ -45,7 +45,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i32 @test2(i32 %m) nounwind uwtable {
 entry:
   br label %for.body
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index 5dca71264665..b48403e9a23b 100644
--- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Indvars should be able to simplify simple comparisons involving
 ; induction variables.
 
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: %cond = and i1 %tobool.not, true
 
 define void @foo(i64 %n, i32* nocapture %p) nounwind {
@@ -42,7 +42,7 @@ return:
 
 ; Don't eliminate an icmp that's contributing to the loop exit test though.
 
-; CHECK: @_ZNK4llvm5APInt3ultERKS0_
+; CHECK-LABEL: @_ZNK4llvm5APInt3ultERKS0_(
 ; CHECK: %tmp99 = icmp sgt i32 %i, -1
 
 define i32 @_ZNK4llvm5APInt3ultERKS0_(i32 %tmp2.i1, i64** %tmp65, i64** %tmp73, i64** %tmp82, i64** %tmp90) {
@@ -85,7 +85,7 @@ bb20.loopexit:
 
 ; Indvars should eliminate the icmp here.
 
-; CHECK: @func_10
+; CHECK-LABEL: @func_10(
 ; CHECK-NOT: icmp
 ; CHECK: ret void
 
@@ -110,7 +110,7 @@ return:
 ; PR14432
 ; Indvars should not turn the second loop into an infinite one.
 
-; CHECK: @func_11
+; CHECK-LABEL: @func_11(
 ; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
 ; CHECK-NOT: br i1 true, label %noassert68, label %unrolledend
 
@@ -162,7 +162,7 @@ unrolledend:                                      ; preds = %forcond38
 declare void @llvm.trap() noreturn nounwind
 
 ; In this case the second loop only has a single iteration, fold the header away
-; CHECK: @func_12
+; CHECK-LABEL: @func_12(
 ; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
 ; CHECK: br i1 true, label %noassert68, label %unrolledend
 define i32 @func_12() nounwind uwtable {
diff --git a/test/Transforms/IndVarSimplify/eliminate-rem.ll b/test/Transforms/IndVarSimplify/eliminate-rem.ll
index f756389398fb..64fe7107b633 100644
--- a/test/Transforms/IndVarSimplify/eliminate-rem.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-rem.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 ; Indvars should be able to eliminate this srem.
-; CHECK: @simple
+; CHECK-LABEL: @simple(
 ; CHECK-NOT: rem
 ; CHECK: ret
 
@@ -32,7 +32,7 @@ bb12:                                             ; preds = %bb11, %bb
 }
 
 ; Indvars should be able to eliminate the (i+1)%n.
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK-NOT: rem
 ; CHECK: rem
 ; CHECK-NOT: rem
diff --git a/test/Transforms/IndVarSimplify/floating-point-iv.ll b/test/Transforms/IndVarSimplify/floating-point-iv.ll
index 266eebd5bb65..c5bf3860ab5d 100644
--- a/test/Transforms/IndVarSimplify/floating-point-iv.ll
+++ b/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -12,7 +12,7 @@ bb:		; preds = %bb, %entry
 
 return:		; preds = %bb
 	ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: icmp
 }
 
@@ -31,7 +31,7 @@ bb:		; preds = %bb, %entry
 
 return:		; preds = %bb
 	ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: icmp
 }
 
@@ -49,7 +49,7 @@ bb:		; preds = %bb, %entry
 
 return:
 	ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: fcmp
 }
 
@@ -66,7 +66,7 @@ bb:		; preds = %bb, %entry
 
 return:
 	ret void
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: cmp
 ; CHECK: br i1 false
 }
@@ -86,7 +86,7 @@ define void @test5() nounwind {
 exit:
   ret void
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: icmp slt i32 {{.*}}, 0
 ; CHECK-NEXT: br i1
 }
diff --git a/test/Transforms/IndVarSimplify/iv-fold.ll b/test/Transforms/IndVarSimplify/iv-fold.ll
index e0b05cdb31f5..41a1f5ff91a2 100644
--- a/test/Transforms/IndVarSimplify/iv-fold.ll
+++ b/test/Transforms/IndVarSimplify/iv-fold.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Indvars should be able to fold IV increments into shr when low bits are zero.
 ;
-; CHECK: @foldIncShr
+; CHECK-LABEL: @foldIncShr(
 ; CHECK: shr.1 = lshr i32 %0, 5
 define i32 @foldIncShr(i32* %bitmap, i32 %bit_addr, i32 %nbits) nounwind {
 entry:
@@ -31,7 +31,7 @@ while.end:
 ; Invdars should not fold an increment into shr unless 2^shiftBits is
 ; a multiple of the recurrence step.
 ;
-; CHECK: @noFoldIncShr
+; CHECK-LABEL: @noFoldIncShr(
 ; CHECK: shr.1 = lshr i32 %inc.1, 5
 define i32 @noFoldIncShr(i32* %bitmap, i32 %bit_addr, i32 %nbits) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
new file mode 100644
index 000000000000..e4c31d125c60
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -indvars -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
+
+; Derived from ptriv in lftr-reuse.ll
+define void @ptriv_as2(i8 addrspace(2)* %base, i32 %n) nounwind {
+; CHECK-LABEL: @ptriv_as2(
+entry:
+  %idx.trunc = trunc i32 %n to i8
+  %add.ptr = getelementptr inbounds i8 addrspace(2)* %base, i8 %idx.trunc
+  %cmp1 = icmp ult i8 addrspace(2)* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; Make sure the added GEP has the right index type
+; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
+
+; CHECK: for.body:
+; CHECK: phi i8 addrspace(2)*
+; CHECK-NOT: phi
+; CHECK-NOT: add{{^rspace}}
+; CHECK: icmp ne i8 addrspace(2)*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8 addrspace(2)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8 addrspace(2)* %p.02 to i8
+  %sub.ptr.rhs.cast = ptrtoint i8 addrspace(2)* %base to i8
+  %sub.ptr.sub = sub i8 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  store i8 %sub.ptr.sub, i8 addrspace(2)* %p.02
+  %incdec.ptr = getelementptr inbounds i8 addrspace(2)* %p.02, i32 1
+  %cmp = icmp ult i8 addrspace(2)* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @ptriv_as3(i8 addrspace(3)* %base, i32 %n) nounwind {
+; CHECK-LABEL: @ptriv_as3(
+entry:
+  %idx.trunc = trunc i32 %n to i16
+  %add.ptr = getelementptr inbounds i8 addrspace(3)* %base, i16 %idx.trunc
+  %cmp1 = icmp ult i8 addrspace(3)* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; Make sure the added GEP has the right index type
+; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
+
+; CHECK: for.body:
+; CHECK: phi i8 addrspace(3)*
+; CHECK-NOT: phi
+; CHECK-NOT: add{{^rspace}}
+; CHECK: icmp ne i8 addrspace(3)*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8 addrspace(3)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8 addrspace(3)* %p.02 to i16
+  %sub.ptr.rhs.cast = ptrtoint i8 addrspace(3)* %base to i16
+  %sub.ptr.sub = sub i16 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %conv = trunc i16 %sub.ptr.sub to i8
+  store i8 %conv, i8 addrspace(3)* %p.02
+  %incdec.ptr = getelementptr inbounds i8 addrspace(3)* %p.02, i32 1
+  %cmp = icmp ult i8 addrspace(3)* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
new file mode 100644
index 000000000000..2fac4a797e29
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -0,0 +1,44 @@
+;RUN: opt -S %s -indvars | FileCheck %s
+
+; CHECK-LABEL-LABEL: @foo(
+; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16
+; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512
+define void @foo() #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i16 [ 0, %entry ], [ %inc, %for.body ]
+  %conv2 = sext i16 %i.01 to i32
+  call void @bar(i32 %conv2) #1
+  %inc = add i16 %i.01, 1
+  %cmp = icmp slt i16 %inc, 512
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Check that post-incrementing the backedge taken count does not overflow.
+; CHECK-LABEL-LABEL: @postinc(
+; CHECK: icmp eq i32 %indvars.iv.next, 256
+define i32 @postinc() #0 {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %first.0 = phi i8 [ 0, %entry ], [ %inc, %do.body ]
+  %conv = zext i8 %first.0 to i32
+  call void  @bar(i32 %conv) #1
+  %inc = add i8 %first.0, 1
+  %cmp = icmp eq i8 %first.0, -1
+  br i1 %cmp, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body
+  ret i32 0
+}
+
+declare void @bar(i32)
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 7fb36e509aed..fe3df5cfa88c 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -165,7 +165,7 @@ entry:
   %lim = add i32 %x, %n
   %cmp.ph = icmp ult i32 %x, %lim
   br i1 %cmp.ph, label %loop, label %exit
-; CHECK: @geplftr
+; CHECK-LABEL: @geplftr(
 ; CHECK: loop:
 ; CHECK: phi i8*
 ; DISABLE-NOT: phi      // This check is currently disabled
@@ -190,7 +190,7 @@ exit:
 define void @nevertaken() nounwind uwtable ssp {
 entry:
   br label %loop
-; CHECK: @nevertaken
+; CHECK-LABEL: @nevertaken(
 ; CHECK: loop:
 ; CHECK-NOT: phi
 ; CHECK-NOT: add
diff --git a/test/Transforms/IndVarSimplify/lftr-zext.ll b/test/Transforms/IndVarSimplify/lftr-zext.ll
new file mode 100644
index 000000000000..32fa61af1801
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-zext.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@data = common global [240 x i8] zeroinitializer, align 16
+
+define void @foo(i8* %a) nounwind uwtable ssp {
+; CHECK: %exitcond
+; CHECK-NOT: ([240 x i8]* @data, i64 0, i64 -16)
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %i.0 = phi i8 [ 0, %0 ], [ %5, %1 ]
+  %p.0 = phi i8* [ getelementptr inbounds ([240 x i8]* @data, i64 0, i64 0), %0 ], [ %4, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = getelementptr inbounds i8* %p.0, i64 1
+  store i8 %3, i8* %p.0, align 1
+  %5 = add i8 %i.0, 1
+  %6 = icmp ult i8 %5, -16
+  br i1 %6, label %1, label %7
+
+; <label>:7                                       ; preds = %1
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lit.local.cfg b/test/Transforms/IndVarSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/IndVarSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
index abf1bc3a9d09..5d2c8c7209f7 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
-;
-; Testcase distilled from 256.bzip2
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | FileCheck %s
 
-define i32 @main() {
+; Testcase distilled from 256.bzip2
+; CHECK-LABEL: @test1
+; CHECK-NOT: br
+define i32 @test1() {
 entry:
         br label %loopentry
 
@@ -19,3 +20,28 @@ loopexit:               ; preds = %loopentry
         ret i32 %tmp.2
 }
 
+
+; PR12377
+; CHECK-LABEL: @test2
+; CHECK: [[VAR1:%.+]] = add i32 %arg, -11
+; CHECK: [[VAR2:%.+]] = lshr i32 [[VAR1]], 1
+; CHECK: [[VAR3:%.+]] = add i32 [[VAR2]], 1
+; CHECK: [[VAR4:%.+]] = phi i32 [ 0, %bb ], [ [[VAR3]], %bb1.preheader ]
+; CHECK: ret i32 [[VAR4]]
+define i32 @test2(i32 %arg) {
+bb:
+  %tmp = icmp ugt i32 %arg, 10
+  br i1 %tmp, label %bb1, label %bb7
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i32 [ %tmp5, %bb1 ], [ 0, %bb ]
+  %tmp3 = phi i32 [ %tmp4, %bb1 ], [ %arg, %bb ]
+  %tmp4 = add i32 %tmp3, -2
+  %tmp5 = add i32 %tmp2, 1
+  %tmp6 = icmp ugt i32 %tmp4, 10
+  br i1 %tmp6, label %bb1, label %bb7
+
+bb7:                                              ; preds = %bb1, %bb
+  %tmp8 = phi i32 [ 0, %bb ], [ %tmp5, %bb1 ]
+  ret i32 %tmp8
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
index da38de538f7b..af01fe53864b 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
@@ -1,9 +1,4 @@
 ; RUN: opt < %s -indvars -loop-deletion -S | grep phi | count 1
-; XFAIL: *
-
-; Indvars can't evaluate this loop, because ScalarEvolution can't compute
-; an exact trip count, because it doesn't know if dividing by the stride will
-; have a remainder. It could be done with more aggressive VRP though.
 
 define i32 @test(i32 %x_offs) nounwind readnone {
 entry:
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 507f695e67c5..057669277cca 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -223,13 +223,18 @@ entry:
   %halfLim = ashr i32 %limit, 2
   br label %loop
 
-; Test cloning an or, which is not an OverflowBinaryOperator.
+; This test originally checked that the OR instruction was cloned. Now the
+; ScalarEvolution is able to understand the loop evolution and that '%iv' at the
+; end of the loop is an even value. Thus '%val' is computed at the end of the
+; loop and the OR instruction is replaced by an ADD keeping the result
+; equivalent.
 ;
 ; CHECK: loop:
 ; CHECK: phi i64
 ; CHECK-NOT: sext
-; CHECK: or i64
+; CHECK: icmp slt i32
 ; CHECK: exit:
+; CHECK: add i64
 loop:
   %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ]
   %t1 = sext i32 %iv to i64
diff --git a/test/Transforms/IndVarSimplify/tripcount_compute.ll b/test/Transforms/IndVarSimplify/tripcount_compute.ll
index 8835b9627fde..626a29b20b7e 100644
--- a/test/Transforms/IndVarSimplify/tripcount_compute.ll
+++ b/test/Transforms/IndVarSimplify/tripcount_compute.ll
@@ -5,7 +5,7 @@
 ; the exit value of the loop will be for some value, allowing us to substitute
 ; it directly into users outside of the loop, making the loop dead.
 
-; CHECK: @linear_setne
+; CHECK-LABEL: @linear_setne(
 ; CHECK: ret i32 100
 
 define i32 @linear_setne() {
@@ -22,7 +22,7 @@ loopexit:		; preds = %loop
 	ret i32 %i
 }
 
-; CHECK: @linear_setne_2
+; CHECK-LABEL: @linear_setne_2(
 ; CHECK: ret i32 100
 
 define i32 @linear_setne_2() {
@@ -39,7 +39,7 @@ loopexit:		; preds = %loop
 	ret i32 %i
 }
 
-; CHECK: @linear_setne_overflow
+; CHECK-LABEL: @linear_setne_overflow(
 ; CHECK: ret i32 0
 
 define i32 @linear_setne_overflow() {
@@ -56,7 +56,7 @@ loopexit:		; preds = %loop
 	ret i32 %i
 }
 
-; CHECK: @linear_setlt
+; CHECK-LABEL: @linear_setlt(
 ; CHECK: ret i32 100
 
 define i32 @linear_setlt() {
@@ -73,7 +73,7 @@ loopexit:		; preds = %loop
 	ret i32 %i
 }
 
-; CHECK: @quadratic_setlt
+; CHECK-LABEL: @quadratic_setlt(
 ; CHECK: ret i32 34
 
 define i32 @quadratic_setlt() {
@@ -91,7 +91,7 @@ loopexit:		; preds = %loop
 	ret i32 %i
 }
 
-; CHECK: @chained
+; CHECK-LABEL: @chained(
 ; CHECK: ret i32 200
 
 define i32 @chained() {
@@ -117,7 +117,7 @@ loopexit2:		; preds = %loop2
 	ret i32 %j
 }
 
-; CHECK: @chained4
+; CHECK-LABEL: @chained4(
 ; CHECK: ret i32 400
 
 define i32 @chained4() {
diff --git a/test/Transforms/IndVarSimplify/udiv-invariant-but-traps.ll b/test/Transforms/IndVarSimplify/udiv-invariant-but-traps.ll
new file mode 100644
index 000000000000..ef38f5d6a916
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/udiv-invariant-but-traps.ll
@@ -0,0 +1,32 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+@b = common global i32 0, align 4
+
+define i32 @foo(i32 %x, i1 %y) {
+bb0:
+  br label %bb1
+
+bb1:
+  br i1 %y, label %bb14, label %bb8
+
+bb8:
+  %i = phi i64 [ %i.next, %bb8 ], [ 0, %bb1 ]
+  %i.next = add i64 %i, 1
+  %div = udiv i32 1, %x
+  %c = icmp eq i64 %i.next, 6
+  br i1 %c, label %bb11, label %bb8
+
+bb11:
+  br i1 %y, label %bb1, label %bb13
+
+bb13:
+  store i32 %div, i32* @b, align 4
+  br label %bb14
+
+bb14:
+  ret i32 0
+}
+
+; CHECK-LABEL: @foo(
+; CHECK: bb8:
+; CHECK: udiv
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
index 8260093d1c37..aee455d08c80 100644
--- a/test/Transforms/IndVarSimplify/udiv.ll
+++ b/test/Transforms/IndVarSimplify/udiv.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Indvars shouldn't emit a udiv here, because there's no udiv in the
 ; original code. This comes from SingleSource/Benchmarks/Shootout/sieve.c.
 
-; CHECK: @main
+; CHECK-LABEL: @main(
 ; CHECK-NOT: div
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
@@ -130,7 +130,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 ; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in
 ; the original code.
 
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: for.body.preheader:
 ; CHECK-NEXT: udiv
 
diff --git a/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll b/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
index c58a3af62fcc..a421003aa30d 100644
--- a/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
+++ b/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
@@ -26,7 +26,7 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 
 ; First check that we move the sub into the preheader, it doesn't have to be
 ; executed if %cmp4 == false
diff --git a/test/Transforms/IndVarSimplify/widen-nsw.ll b/test/Transforms/IndVarSimplify/widen-nsw.ll
new file mode 100644
index 000000000000..56c3c0d9f128
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/widen-nsw.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; CHECK-LABEL: @test1
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+define i32 @test1(i32* %a) #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, 1000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 %sum.0
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll b/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
index 1ce74e6e41b8..b37b9f2ffa28 100644
--- a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
+++ b/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
@@ -17,7 +17,7 @@ define void @bar(i32* byval %x) {
 }
 
 define void @foo(i32* %x) {
-; CHECK: define void @foo
+; CHECK-LABEL: define void @foo(
 ; CHECK: store i32 %1, i32* %x
   call void @bar(i32* byval %x)
   ret void
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
index d04d54e3a538..3c5052b883b2 100644
--- a/test/Transforms/Inline/alloca-bonus.ll
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -7,7 +7,7 @@ declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
 @glbl = external global i32
 
 define void @outer1() {
-; CHECK: @outer1
+; CHECK-LABEL: @outer1(
 ; CHECK-NOT: call void @inner1
   %ptr = alloca i32
   call void @inner1(i32* %ptr)
@@ -26,7 +26,7 @@ define void @inner1(i32 *%ptr) {
 }
 
 define void @outer2() {
-; CHECK: @outer2
+; CHECK-LABEL: @outer2(
 ; CHECK: call void @inner2
   %ptr = alloca i32
   call void @inner2(i32* %ptr)
@@ -46,7 +46,7 @@ define void @inner2(i32 *%ptr) {
 }
 
 define void @outer3() {
-; CHECK: @outer3
+; CHECK-LABEL: @outer3(
 ; CHECK-NOT: call void @inner3
   %ptr = alloca i32
   call void @inner3(i32* %ptr, i1 undef)
@@ -85,7 +85,7 @@ bb.false:
 }
 
 define void @outer4(i32 %A) {
-; CHECK: @outer4
+; CHECK-LABEL: @outer4(
 ; CHECK-NOT: call void @inner4
   %ptr = alloca i32
   call void @inner4(i32* %ptr, i32 %A)
@@ -126,7 +126,7 @@ bb.false:
 }
 
 define void @outer5() {
-; CHECK: @outer5
+; CHECK-LABEL: @outer5(
 ; CHECK-NOT: call void @inner5
   %ptr = alloca i32
   call void @inner5(i1 false, i32* %ptr)
diff --git a/test/Transforms/Inline/alloca-merge-align-nodl.ll b/test/Transforms/Inline/alloca-merge-align-nodl.ll
new file mode 100644
index 000000000000..301505ff7910
--- /dev/null
+++ b/test/Transforms/Inline/alloca-merge-align-nodl.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; This variant of the test has no data layout information.
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s = type { i32, i32 }
+
+define void @foo(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32], align 4
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 4
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx) #2
+  ret void
+}
+
+define void @foo0(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32]
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 4
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx) #2
+  ret void
+}
+
+declare void @bar(i32*) #1
+
+define void @goo(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32], align 32
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 32
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx) #2
+  ret void
+}
+
+; CHECK-LABEL: @main
+; CHECK: alloca [2 x i32], align 32
+; CHECK-NOT: alloca [2 x i32]
+; CHECK: ret i32 0
+
+define signext i32 @main() {
+entry:
+  %a = alloca i64, align 8
+  %tmpcast = bitcast i64* %a to %struct.s*
+  store i64 0, i64* %a, align 8
+  %a1 = bitcast i64* %a to i32*
+  store i32 1, i32* %a1, align 8
+  call void @foo(%struct.s* byval %tmpcast)
+  store i32 2, i32* %a1, align 8
+  call void @goo(%struct.s* byval %tmpcast)
+  ret i32 0
+}
+
+; CHECK-LABEL: @test0
+; CHECK: alloca [2 x i32], align 32
+; CHECK: alloca [2 x i32]
+; CHECK: ret i32 0
+
+define signext i32 @test0() {
+entry:
+  %a = alloca i64, align 8
+  %tmpcast = bitcast i64* %a to %struct.s*
+  store i64 0, i64* %a, align 8
+  %a1 = bitcast i64* %a to i32*
+  store i32 1, i32* %a1, align 8
+  call void @foo0(%struct.s* byval %tmpcast)
+  store i32 2, i32* %a1, align 8
+  call void @goo(%struct.s* byval %tmpcast)
+  ret i32 0
+}
diff --git a/test/Transforms/Inline/alloca-merge-align.ll b/test/Transforms/Inline/alloca-merge-align.ll
new file mode 100644
index 000000000000..d357b3cd61a4
--- /dev/null
+++ b/test/Transforms/Inline/alloca-merge-align.ll
@@ -0,0 +1,122 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s = type { i32, i32 }
+
+define void @foo(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32], align 4
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 4
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx) #2
+  ret void
+}
+
+define void @foo0(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32]
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 4
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx) #2
+  ret void
+}
+
+define void @foo1(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32], align 1
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 4
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx) #2
+  ret void
+}
+
+declare void @bar(i32*) #1
+
+define void @goo(%struct.s* byval nocapture readonly %a) {
+entry:
+  %x = alloca [2 x i32], align 32
+  %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
+  %0 = load i32* %a1, align 4
+  %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+  store i32 %0, i32* %arrayidx, align 32
+  %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
+  %1 = load i32* %b, align 4
+  %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+  store i32 %1, i32* %arrayidx2, align 4
+  call void @bar(i32* %arrayidx) #2
+  ret void
+}
+
+; CHECK-LABEL: @main
+; CHECK: alloca [2 x i32], align 32
+; CHECK-NOT: alloca [2 x i32]
+; CHECK: ret i32 0
+
+define signext i32 @main() {
+entry:
+  %a = alloca i64, align 8
+  %tmpcast = bitcast i64* %a to %struct.s*
+  store i64 0, i64* %a, align 8
+  %a1 = bitcast i64* %a to i32*
+  store i32 1, i32* %a1, align 8
+  call void @foo(%struct.s* byval %tmpcast)
+  store i32 2, i32* %a1, align 8
+  call void @goo(%struct.s* byval %tmpcast)
+  ret i32 0
+}
+
+; CHECK-LABEL: @test0
+; CHECK: alloca [2 x i32], align 32
+; CHECK-NOT: alloca [2 x i32]
+; CHECK: ret i32 0
+
+define signext i32 @test0() {
+entry:
+  %a = alloca i64, align 8
+  %tmpcast = bitcast i64* %a to %struct.s*
+  store i64 0, i64* %a, align 8
+  %a1 = bitcast i64* %a to i32*
+  store i32 1, i32* %a1, align 8
+  call void @foo0(%struct.s* byval %tmpcast)
+  store i32 2, i32* %a1, align 8
+  call void @goo(%struct.s* byval %tmpcast)
+  ret i32 0
+}
+
+; CHECK-LABEL: @test1
+; CHECK: {{alloca \[2 x i32\]$}}
+; CHECK-NOT: alloca [2 x i32]
+; CHECK: ret i32 0
+
+define signext i32 @test1() {
+entry:
+  %a = alloca i64, align 8
+  %tmpcast = bitcast i64* %a to %struct.s*
+  store i64 0, i64* %a, align 8
+  %a1 = bitcast i64* %a to i32*
+  store i32 1, i32* %a1, align 8
+  call void @foo0(%struct.s* byval %tmpcast)
+  store i32 2, i32* %a1, align 8
+  call void @foo1(%struct.s* byval %tmpcast)
+  ret i32 0
+}
diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll
index c918bc9d5dbb..a8703b898777 100644
--- a/test/Transforms/Inline/always-inline.ll
+++ b/test/Transforms/Inline/always-inline.ll
@@ -8,7 +8,7 @@ define i32 @inner1() alwaysinline {
   ret i32 1
 }
 define i32 @outer1() {
-; CHECK: @outer1
+; CHECK-LABEL: @outer1(
 ; CHECK-NOT: call
 ; CHECK: ret
 
@@ -17,7 +17,7 @@ define i32 @outer1() {
 }
 
 ; The always inliner can't DCE internal functions. PR2945
-; CHECK: @pr2945
+; CHECK-LABEL: @pr2945(
 define internal i32 @pr2945() nounwind {
   ret i32 0
 }
@@ -31,7 +31,7 @@ define void @outer2(i32 %N) {
 ; a function with a dynamic alloca into one without a dynamic alloca.
 ; rdar://6655932
 ;
-; CHECK: @outer2
+; CHECK-LABEL: @outer2(
 ; CHECK-NOT: call void @inner2
 ; CHECK-NOT: call void @inner2
 ; CHECK: ret void
@@ -51,7 +51,7 @@ entry:
 }
 define i32 @outer3() {
 entry:
-; CHECK: @outer3
+; CHECK-LABEL: @outer3(
 ; CHECK-NOT: call i32 @a
 ; CHECK: ret
 
@@ -69,7 +69,7 @@ entry:
 
 define i32 @outer4() {
 entry:
-; CHECK: @outer4
+; CHECK-LABEL: @outer4(
 ; CHECK: call i32 @b()
 ; CHECK: ret
 
@@ -89,7 +89,7 @@ two:
   ret i32 44
 }
 define i32 @outer5(i32 %x) {
-; CHECK: @outer5
+; CHECK-LABEL: @outer5(
 ; CHECK: call i32 @inner5
 ; CHECK: ret
 
@@ -113,7 +113,7 @@ return:
   ret void
 }
 define void @outer6() {
-; CHECK: @outer6
+; CHECK-LABEL: @outer6(
 ; CHECK: call void @inner6(i32 42)
 ; CHECK: ret
 
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
new file mode 100644
index 000000000000..53fb13f2baf5
--- /dev/null
+++ b/test/Transforms/Inline/attributes.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i32 @noattr_callee(i32 %i) {
+  ret i32 %i
+}
+
+define i32 @sanitize_address_callee(i32 %i) sanitize_address {
+  ret i32 %i
+}
+
+define i32 @sanitize_thread_callee(i32 %i) sanitize_thread {
+  ret i32 %i
+}
+
+define i32 @sanitize_memory_callee(i32 %i) sanitize_memory {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_callee(i32 %i) alwaysinline {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_address_callee(i32 %i) alwaysinline sanitize_address {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_thread_callee(i32 %i) alwaysinline sanitize_thread {
+  ret i32 %i
+}
+
+define i32 @alwaysinline_sanitize_memory_callee(i32 %i) alwaysinline sanitize_memory {
+  ret i32 %i
+}
+
+
+; Check that:
+;  * noattr callee is inlined into noattr caller,
+;  * sanitize_(address|memory|thread) callee is not inlined into noattr caller,
+;  * alwaysinline callee is always inlined no matter what sanitize_* attributes are present.
+
+define i32 @test_no_sanitize_address(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_address_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_address(
+; CHECK-NEXT: @sanitize_address_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no_sanitize_memory(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memory_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_memory(
+; CHECK-NEXT: @sanitize_memory_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no_sanitize_thread(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_thread_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_thread(
+; CHECK-NEXT: @sanitize_thread_callee
+; CHECK-NEXT: ret i32
+}
+
+
+; Check that:
+;  * noattr callee is not inlined into sanitize_(address|memory|thread) caller,
+;  * sanitize_(address|memory|thread) callee is inlined into the caller with the same attribute,
+;  * alwaysinline callee is always inlined no matter what sanitize_* attributes are present.
+
+define i32 @test_sanitize_address(i32 %arg) sanitize_address {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_address_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_address(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_sanitize_memory(i32 %arg) sanitize_memory {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memory_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_memory(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_sanitize_thread(i32 %arg) sanitize_thread {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_thread_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_thread(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index 39e25cb5d627..085694febc6a 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 %W) {
         %X = call i32 @test1f(i32 7)
         %Y = add i32 %X, %W
         ret i32 %Y
-; CHECK: @test1(
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: %Y = add i32 7, %W
 ; CHECK-NEXT: ret i32 %Y
 }
@@ -20,7 +20,7 @@ define i32 @test1(i32 %W) {
 
 %T = type { i32, i32 }
 
-; CHECK-NOT: @test2f
+; CHECK-NOT: @test2f(
 define internal %T* @test2f(i1 %cond, %T* %P) {
   br i1 %cond, label %T, label %F
   
@@ -41,7 +41,7 @@ define i32 @test2(i1 %cond) {
   %D = load i32* %C
   ret i32 %D
   
-; CHECK: @test2(
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: = alloca
 ; CHECK: ret i32
 }
@@ -75,7 +75,7 @@ define i32 @test3() {
 ;
 ; The call to @h *can* be inlined.
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 define i32 @test() {
 ; CHECK: call i32 @f()
   %a = call i32 @f()
diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll
index e601faf2bb37..d7597adaf84f 100644
--- a/test/Transforms/Inline/byval.ll
+++ b/test/Transforms/Inline/byval.ll
@@ -104,3 +104,26 @@ entry:
 ; CHECK: ret i32 4
 }
 
+%struct.S0 = type { i32 }
+
+@b = global %struct.S0 { i32 1 }, align 4
+@a = common global i32 0, align 4
+
+define internal void @f5(%struct.S0* byval nocapture readonly align 4 %p) {
+entry:
+	store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+	%f2 = getelementptr inbounds %struct.S0* %p, i64 0, i32 0
+	%0 = load i32* %f2, align 4
+	store i32 %0, i32* @a, align 4
+	ret void
+}
+
+define i32 @test5() {
+entry:
+	tail call void @f5(%struct.S0* byval align 4 @b)
+	%0 = load i32* @a, align 4
+	ret i32 %0
+; CHECK: @test5()
+; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+; CHECK-NOT: load i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+}
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll
index 97c52af9e0d5..7f30ffb306b4 100644
--- a/test/Transforms/Inline/delete-call.ll
+++ b/test/Transforms/Inline/delete-call.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s
 ; CHECK: Number of functions inlined
 
-; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=FUNCTIONATTRS %s
+; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=CHECK-FUNCTIONATTRS %s
 ; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
diff --git a/test/Transforms/Inline/devirtualize-2.ll b/test/Transforms/Inline/devirtualize-2.ll
index 02ff7679148d..b7eb1be19db2 100644
--- a/test/Transforms/Inline/devirtualize-2.ll
+++ b/test/Transforms/Inline/devirtualize-2.ll
@@ -40,5 +40,5 @@ define i32 @test2(i8* %this) ssp align 2 {
   ret i32 %X
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret i32 41
diff --git a/test/Transforms/Inline/devirtualize.ll b/test/Transforms/Inline/devirtualize.ll
index d46154ef6a98..89482941267c 100644
--- a/test/Transforms/Inline/devirtualize.ll
+++ b/test/Transforms/Inline/devirtualize.ll
@@ -14,7 +14,7 @@ entry:
   %X = add i32 %call, 4
   ret i32 %X
   
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: store
 ; CHECK-NEXT: store
diff --git a/test/Transforms/Inline/dynamic_alloca_test.ll b/test/Transforms/Inline/dynamic_alloca_test.ll
index 15a5c66815d2..1c17c7cd9748 100644
--- a/test/Transforms/Inline/dynamic_alloca_test.ll
+++ b/test/Transforms/Inline/dynamic_alloca_test.ll
@@ -19,7 +19,7 @@ define internal void @callee(i32 %N) {
 }
 
 define void @foo(i32 %N) {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: alloca i32, i32 %{{.*}}
 ; CHECK: call i8* @llvm.stacksave()
 ; CHECK: alloca i32, i32 %{{.*}}
diff --git a/test/Transforms/Inline/gvn-inline-iteration.ll b/test/Transforms/Inline/gvn-inline-iteration.ll
index 526ed79e7b48..7916a13df995 100644
--- a/test/Transforms/Inline/gvn-inline-iteration.ll
+++ b/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -12,9 +12,9 @@ entry:
   %call = tail call i32 %tmp3() nounwind          ; <i32> [#uses=1]
   ret i32 %call
 }
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: ret i32 7
-; CHECK: @bar
+; CHECK-LABEL: @bar(
 ; CHECK: ret i32 7
 
 define internal i32 @bar() nounwind readnone ssp {
diff --git a/test/Transforms/Inline/inline-byval-bonus.ll b/test/Transforms/Inline/inline-byval-bonus.ll
index f3ed819a7f38..052a5f12a191 100644
--- a/test/Transforms/Inline/inline-byval-bonus.ll
+++ b/test/Transforms/Inline/inline-byval-bonus.ll
@@ -21,7 +21,7 @@ define i32 @caller(%struct.sphere* %i) {
   %call = call i32 @ray_sphere(%struct.sphere* %i, %struct.ray* byval align 8 %shadow_ray, %struct.spoint* null)
   ret i32 %call
 
-; CHECK: @caller
+; CHECK-LABEL: @caller(
 ; CHECK-NOT: call i32 @ray_sphere
 ; CHECK: ret i32
 }
diff --git a/test/Transforms/Inline/inline-invoke-with-asm-call.ll b/test/Transforms/Inline/inline-invoke-with-asm-call.ll
new file mode 100644
index 000000000000..876f8d7455bf
--- /dev/null
+++ b/test/Transforms/Inline/inline-invoke-with-asm-call.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+target triple = "x86_64-apple-darwin"
+
+; In inliner, we assume that inline asm does not throw. This testing case makes
+; sure that the inliner does not convert "call asm" to "invoke asm".
+; rdar://15317907
+; CHECK-LABEL: @caller
+; Make sure we are generating "call asm" instead of "invoke asm".
+; CHECK: call void asm
+; CHECK-LABEL: @callee_with_asm
+define void @caller() {
+  br i1 undef, label %1, label %4
+
+; <label>:1
+  invoke void @callee_with_asm()
+          to label %4 unwind label %2
+
+; <label>:2
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } undef
+
+; <label>:4
+  ret void
+}
+
+define void @callee_with_asm() {
+  call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
+  ret void
+}
+
+declare i32 @__objc_personality_v0(...)
diff --git a/test/Transforms/Inline/inline-optnone.ll b/test/Transforms/Inline/inline-optnone.ll
new file mode 100644
index 000000000000..9b99c4558ea0
--- /dev/null
+++ b/test/Transforms/Inline/inline-optnone.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Test that functions with attribute optnone are not inlined.
+; Also test that only functions with attribute alwaysinline are
+; valid candidates for inlining if the caller has the optnone attribute.
+
+; Function Attrs: alwaysinline nounwind readnone uwtable
+define i32 @alwaysInlineFunction(i32 %a) #0 {
+entry:
+  %mul = mul i32 %a, %a
+  ret i32 %mul
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @simpleFunction(i32 %a) #1 {
+entry:
+  %add = add i32 %a, %a
+  ret i32 %add
+}
+
+; Function Attrs: nounwind noinline optnone readnone uwtable
+define i32 @OptnoneFunction(i32 %a) #2 {
+entry:
+  %0 = tail call i32 @alwaysInlineFunction(i32 %a)
+  %1 = tail call i32 @simpleFunction(i32 %a)
+  %add = add i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @OptnoneFunction
+; CHECK-NOT: call i32 @alwaysInlineFunction(i32 %a)
+; CHECK: call i32 @simpleFunction(i32 %a)
+; CHECK: ret
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @bar(i32 %a) #1 {
+entry:
+  %0 = tail call i32 @OptnoneFunction(i32 5)
+  %1 = tail call i32 @simpleFunction(i32 6)
+  %add = add i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @bar
+; CHECK: call i32 @OptnoneFunction(i32 5)
+; CHECK-NOT: call i32 @simpleFunction(i32 6)
+; CHECK: ret
+
+
+attributes #0 = { alwaysinline nounwind readnone uwtable }
+attributes #1 = { nounwind readnone uwtable }
+attributes #2 = { nounwind noinline optnone readnone uwtable }
diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll
index 3898aa7044ac..4361c2e8bbf4 100644
--- a/test/Transforms/Inline/inline_cleanup.ll
+++ b/test/Transforms/Inline/inline_cleanup.ll
@@ -52,7 +52,7 @@ UnifiedReturnBlock:		; preds = %cond_next13
 declare void @ext(i32*)
 
 define void @test() {
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK-NOT: ret
 ;
 ; FIXME: This should be a CHECK-NOT, but currently we have a bug that causes us
@@ -128,7 +128,7 @@ end4:
 define void @outer2(i32 %z, i1 %b) {
 ; Ensure that after inlining, none of the blocks with a call to @f actually
 ; make it through inlining.
-; CHECK: define void @outer2
+; CHECK-LABEL: define void @outer2(
 ; CHECK-NOT: call
 ; CHECK: ret void
 
@@ -164,7 +164,7 @@ define void @PR12470_outer() {
 ; This previously crashed during inliner cleanup and folding inner return
 ; instructions. Check that we don't crash and we produce a function with a single
 ; return instruction due to merging the returns of the inlined function.
-; CHECK: define void @PR12470_outer
+; CHECK-LABEL: define void @PR12470_outer(
 ; CHECK-NOT: call
 ; CHECK: ret void
 ; CHECK-NOT: ret void
@@ -202,7 +202,7 @@ for.cond12.for.inc26_crit_edge.2:
 }
 
 define void @crasher_outer() {
-; CHECK: @crasher_outer
+; CHECK-LABEL: @crasher_outer(
 ; CHECK-NOT: call
 ; CHECK: ret void
 ; CHECK-NOT: ret
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 77bc3784acb4..b59a270468e0 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -6,7 +6,7 @@ define internal i32 @callee1(i32 %A, i32 %B) {
 }
 
 define i32 @caller1() {
-; CHECK: define i32 @caller1
+; CHECK-LABEL: define i32 @caller1(
 ; CHECK-NEXT: ret i32 3
 
   %X = call i32 @callee1( i32 10, i32 3 )
@@ -21,7 +21,7 @@ define i32 @caller2() {
 ; inline and be cheap. We should eventually do that and lower the threshold here
 ; to 1.
 ;
-; CHECK: @caller2
+; CHECK-LABEL: @caller2(
 ; CHECK-NOT: call void @callee2
 ; CHECK: ret
 
@@ -61,7 +61,7 @@ define i32 @caller3() {
 ; it doesn't count toward the inline cost when constant-prop proves those paths
 ; dead.
 ;
-; CHECK: @caller3
+; CHECK-LABEL: @caller3(
 ; CHECK-NOT: call
 ; CHECK: ret i32 6
 
@@ -119,7 +119,7 @@ define i8 @caller4(i8 %z) {
 ; as they are used heavily in standard library code and generic C++ code where
 ; the arguments are oftent constant but complete generality is required.
 ;
-; CHECK: @caller4
+; CHECK-LABEL: @caller4(
 ; CHECK-NOT: call
 ; CHECK: ret i8 -1
 
@@ -153,7 +153,7 @@ define i64 @caller5(i64 %y) {
 ; Check that we can round trip constants through various kinds of casts etc w/o
 ; losing track of the constant prop in the inline cost analysis.
 ;
-; CHECK: @caller5
+; CHECK-LABEL: @caller5(
 ; CHECK-NOT: call
 ; CHECK: ret i64 -1
 
@@ -187,6 +187,37 @@ bb.false:
   ret i64 %y8
 }
 
+define float @caller6() {
+; Check that we can constant-prop through fcmp instructions
+;
+; CHECK-LABEL: @caller6(
+; CHECK-NOT: call
+; CHECK: ret
+  %x = call float @callee6(float 42.0)
+  ret float %x
+}
+
+define float @callee6(float %x) {
+  %icmp = fcmp ugt float %x, 42.0
+  br i1 %icmp, label %bb.true, label %bb.false
+
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %x1 = fadd float %x, 1.0
+  %x2 = fadd float %x1, 1.0
+  %x3 = fadd float %x2, 1.0
+  %x4 = fadd float %x3, 1.0
+  %x5 = fadd float %x4, 1.0
+  %x6 = fadd float %x5, 1.0
+  %x7 = fadd float %x6, 1.0
+  %x8 = fadd float %x7, 1.0
+  ret float %x8
+
+bb.false:
+  ret float %x
+}
+
+
 
 define i32 @PR13412.main() {
 ; This is a somewhat complicated three layer subprogram that was reported to
diff --git a/test/Transforms/Inline/inline_minisize.ll b/test/Transforms/Inline/inline_minisize.ll
index 3dddbcf3303d..b9aad6083399 100644
--- a/test/Transforms/Inline/inline_minisize.ll
+++ b/test/Transforms/Inline/inline_minisize.ll
@@ -200,7 +200,7 @@ for.end21:                                        ; preds = %for.cond14
 
 define i32 @fct3(i32 %c) nounwind uwtable ssp {
 entry:
-  ;CHECK: @fct3
+  ;CHECK-LABEL: @fct3(
   ;CHECK: call i32 @fct1
   ; The inline keyword gives a sufficient benefits to inline fct2
   ;CHECK-NOT: call i32 @fct2
@@ -216,7 +216,7 @@ entry:
 
 define i32 @fct4(i32 %c) minsize nounwind uwtable ssp {
 entry:
-  ;CHECK: @fct4
+  ;CHECK-LABEL: @fct4(
   ;CHECK: call i32 @fct1
   ; With Oz (minsize attribute), the benefit of inlining fct2
   ; is the same as fct1, thus no inlining for fct2
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
index ab2e954af151..678ee82f4b84 100644
--- a/test/Transforms/Inline/inline_returns_twice.ll
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -4,38 +4,81 @@
 ; if they are themselve marked as such.
 
 declare i32 @a() returns_twice
-declare i32 @b() returns_twice
 
-define i32 @f() {
+define i32 @inner1() {
 entry:
   %call = call i32 @a() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @g() {
+define i32 @outer1() {
 entry:
-; CHECK: define i32 @g
-; CHECK: call i32 @f()
-; CHECK-NOT: call i32 @a()
-  %call = call i32 @f()
+; CHECK-LABEL: define i32 @outer1(
+; CHECK: call i32 @inner1()
+  %call = call i32 @inner1()
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @h() returns_twice {
+define i32 @inner2() returns_twice {
 entry:
-  %call = call i32 @b() returns_twice
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @outer2() {
+entry:
+; CHECK-LABEL: define i32 @outer2(
+; CHECK: call i32 @a()
+  %call = call i32 @inner2() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
 
-define i32 @i() {
+define i32 @inner3() {
+entry:
+  %invoke = invoke i32 @a() returns_twice
+      to label %cont unwind label %lpad
+
+cont:
+  %add = add nsw i32 1, %invoke
+  ret i32 %add
+
+lpad:
+  %lp = landingpad i32 personality i8* null cleanup
+  resume i32 %lp
+}
+
+define i32 @outer3() {
+entry:
+; CHECK-LABEL: define i32 @outer3(
+; CHECK: call i32 @inner3()
+  %call = call i32 @inner3()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @inner4() returns_twice {
+entry:
+  %invoke = invoke i32 @a() returns_twice
+      to label %cont unwind label %lpad
+
+cont:
+  %add = add nsw i32 1, %invoke
+  ret i32 %add
+
+lpad:
+  %lp = landingpad i32 personality i8* null cleanup
+  resume i32 %lp
+}
+
+define i32 @outer4() {
 entry:
-; CHECK: define i32 @i
-; CHECK: call i32 @b()
-; CHECK-NOT: call i32 @h()
-  %call = call i32 @h() returns_twice
+; CHECK-LABEL: define i32 @outer4(
+; CHECK: invoke i32 @a()
+  %call = call i32 @inner4() returns_twice
   %add = add nsw i32 1, %call
   ret i32 %add
 }
diff --git a/test/Transforms/Inline/invoke-cost.ll b/test/Transforms/Inline/invoke-cost.ll
new file mode 100644
index 000000000000..84d33ad55120
--- /dev/null
+++ b/test/Transforms/Inline/invoke-cost.ll
@@ -0,0 +1,45 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=100 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+@glbl = external global i32
+
+declare void @f()
+declare i32 @__gxx_personality_v0(...)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare void @_ZSt9terminatev()
+
+define void @inner1() {
+entry:
+  invoke void @f() to label %cont1 unwind label %terminate.lpad
+
+cont1:
+  invoke void @f() to label %cont2 unwind label %terminate.lpad
+
+cont2:
+  invoke void @f() to label %cont3 unwind label %terminate.lpad
+
+cont3:
+  invoke void @f() to label %cont4 unwind label %terminate.lpad
+
+cont4:
+  ret void
+
+terminate.lpad:
+  landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+define void @outer1() {
+; CHECK-LABEL: @outer1(
+;
+; This call should not get inlined because inner1 actually calls a function
+; many times, but it only does so through invoke as opposed to call.
+;
+; CHECK: call void @inner1
+  call void @inner1()
+  ret void
+}
diff --git a/test/Transforms/Inline/lifetime-no-datalayout.ll b/test/Transforms/Inline/lifetime-no-datalayout.ll
index f4ffef3850f1..5abb77f2c3fc 100644
--- a/test/Transforms/Inline/lifetime-no-datalayout.ll
+++ b/test/Transforms/Inline/lifetime-no-datalayout.ll
@@ -10,7 +10,7 @@ define void @helper() {
 
 ; Size in llvm.lifetime.X should be -1 (unknown).
 define void @test() {
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK-NOT: lifetime
 ; CHECK: llvm.lifetime.start(i64 -1
 ; CHECK-NOT: lifetime
diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll
index fc73385295ed..12c433b9e626 100644
--- a/test/Transforms/Inline/lifetime.ll
+++ b/test/Transforms/Inline/lifetime.ll
@@ -14,7 +14,7 @@ define void @helper_both_markers() {
 }
 
 define void @test_both_markers() {
-; CHECK: @test_both_markers
+; CHECK-LABEL: @test_both_markers(
 ; CHECK: llvm.lifetime.start(i64 2
 ; CHECK-NEXT: llvm.lifetime.end(i64 2
   call void @helper_both_markers()
@@ -38,7 +38,7 @@ define void @helper_no_markers() {
 ;; We can't use CHECK-NEXT because there's an extra call void @use in between.
 ;; Instead, we use CHECK-NOT to verify that there are no other lifetime calls.
 define void @test_no_marker() {
-; CHECK: @test_no_marker
+; CHECK-LABEL: @test_no_marker(
 ; CHECK-NOT: lifetime
 ; CHECK: llvm.lifetime.start(i64 1
 ; CHECK-NOT: lifetime
@@ -64,7 +64,7 @@ define void @helper_two_casts() {
 }
 
 define void @test_two_casts() {
-; CHECK: @test_two_casts
+; CHECK-LABEL: @test_two_casts(
 ; CHECK-NOT: lifetime
 ; CHECK: llvm.lifetime.start(i64 4
 ; CHECK-NOT: lifetime
@@ -88,7 +88,7 @@ define void @helper_arrays_alloca() {
 }
 
 define void @test_arrays_alloca() {
-; CHECK: @test_arrays_alloca
+; CHECK-LABEL: @test_arrays_alloca(
 ; CHECK-NOT: lifetime
 ; CHECK: llvm.lifetime.start(i64 40,
 ; CHECK-NOT: lifetime
diff --git a/test/Transforms/Inline/lit.local.cfg b/test/Transforms/Inline/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/Inline/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Inline/nested-inline.ll b/test/Transforms/Inline/nested-inline.ll
index 12926671722e..9d08ac0c19c6 100644
--- a/test/Transforms/Inline/nested-inline.ll
+++ b/test/Transforms/Inline/nested-inline.ll
@@ -6,7 +6,7 @@
 
 define fastcc void @foo(i32 %X) {
 entry:
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
 	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
 	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
@@ -47,7 +47,7 @@ UnifiedReturnBlock:		; preds = %cond_next13
 	ret void
 }
 
-; CHECK-NOT: @bar
+; CHECK-NOT: @bar(
 define internal fastcc void @bar(i32 %X) {
 entry:
 	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
@@ -101,7 +101,7 @@ declare void @ext(i32*)
 define void @test(i32 %X) {
 entry:
 ; CHECK: test
-; CHECK-NOT: @bar
+; CHECK-NOT: @bar(
 	tail call fastcc void @bar( i32 %X )
 	tail call fastcc void @bar( i32 %X )
 	tail call fastcc void @bar2( i32 %X )
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
index 5520093ee457..2e581a7dbc4f 100644
--- a/test/Transforms/Inline/noinline-recursive-fn.ll
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -25,7 +25,7 @@ return:                                           ; preds = %entry
 }
 
 
-;; CHECK: @bonk
+;; CHECK-LABEL: @bonk(
 ;; CHECK: call void @foo(i32 42)
 define void @bonk() nounwind ssp {
 entry:
@@ -62,7 +62,7 @@ return:                                           ; preds = %entry
 }
 
 
-; CHECK: @top_level
+; CHECK-LABEL: @top_level(
 ; CHECK: call void @f2(i32 122
 ; Here we inline one instance of the cycle, but we don't want to completely
 ; unroll it.
@@ -100,7 +100,7 @@ one.else:
 }
 
 define i32 @fib_caller() {
-; CHECK: @fib_caller
+; CHECK-LABEL: @fib_caller(
 ; CHECK-NOT: call
 ; CHECK: ret
   %f1 = call i32 @fib(i32 0)
diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll
index 60fc3e2a3326..af42bc7cedeb 100644
--- a/test/Transforms/Inline/ptr-diff.ll
+++ b/test/Transforms/Inline/ptr-diff.ll
@@ -1,9 +1,9 @@
 ; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s
 
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64"
 
 define i32 @outer1() {
-; CHECK: @outer1
+; CHECK-LABEL: @outer1(
 ; CHECK-NOT: call
 ; CHECK: ret i32
 
@@ -32,7 +32,7 @@ else:
 define i32 @outer2(i32* %ptr) {
 ; Test that an inbounds GEP disables this -- it isn't safe in general as
 ; wrapping changes the behavior of lessthan and greaterthan comparisions.
-; CHECK: @outer2
+; CHECK-LABEL: @outer2(
 ; CHECK: call i32 @inner2
 ; CHECK: ret i32
 
@@ -56,3 +56,46 @@ else:
   %t = load i32* %begin
   ret i32 %t
 }
+
+; The inttoptrs are free since it is a smaller integer to a larger
+; pointer size
+define i32 @inttoptr_free_cost(i32 %a, i32 %b, i32 %c) {
+  %p1 = inttoptr i32 %a to i32 addrspace(1)*
+  %p2 = inttoptr i32 %b to i32 addrspace(1)*
+  %p3 = inttoptr i32 %c to i32 addrspace(1)*
+  %t1 = load i32 addrspace(1)* %p1
+  %t2 = load i32 addrspace(1)* %p2
+  %t3 = load i32 addrspace(1)* %p3
+  %s = add i32 %t1, %t2
+  %s1 = add i32 %s, %t3
+  ret i32 %s1
+}
+
+define i32 @inttoptr_free_cost_user(i32 %begin, i32 %end) {
+; CHECK-LABEL: @inttoptr_free_cost_user(
+; CHECK-NOT: call
+  %x = call i32 @inttoptr_free_cost(i32 %begin, i32 %end, i32 9)
+  ret i32 %x
+}
+
+; The inttoptrs have a cost since it is a larger integer to a smaller
+; pointer size
+define i32 @inttoptr_cost_smaller_ptr(i32 %a, i32 %b, i32 %c) {
+  %p1 = inttoptr i32 %a to i32 addrspace(2)*
+  %p2 = inttoptr i32 %b to i32 addrspace(2)*
+  %p3 = inttoptr i32 %c to i32 addrspace(2)*
+  %t1 = load i32 addrspace(2)* %p1
+  %t2 = load i32 addrspace(2)* %p2
+  %t3 = load i32 addrspace(2)* %p3
+  %s = add i32 %t1, %t2
+  %s1 = add i32 %s, %t3
+  ret i32 %s1
+}
+
+define i32 @inttoptr_cost_smaller_ptr_user(i32 %begin, i32 %end) {
+; CHECK-LABEL: @inttoptr_cost_smaller_ptr_user(
+; CHECK: call
+  %x = call i32 @inttoptr_cost_smaller_ptr(i32 %begin, i32 %end, i32 9)
+  ret i32 %x
+}
+
diff --git a/test/Transforms/Inline/recursive.ll b/test/Transforms/Inline/recursive.ll
index fe1c041af9a8..b9b14d1dbb61 100644
--- a/test/Transforms/Inline/recursive.ll
+++ b/test/Transforms/Inline/recursive.ll
@@ -6,7 +6,7 @@ target triple = "i386-apple-darwin10.0"
 ; rdar://10853263
 
 ; Make sure that the callee is still here.
-; CHECK: define i32 @callee
+; CHECK-LABEL: define i32 @callee(
 define i32 @callee(i32 %param) {
  %yyy = alloca [100000 x i8]
  %r = bitcast [100000 x i8]* %yyy to i8*
@@ -14,7 +14,7 @@ define i32 @callee(i32 %param) {
  ret i32 4
 }
 
-; CHECK: define i32 @caller
+; CHECK-LABEL: define i32 @caller(
 ; CHECK-NEXT: entry:
 ; CHECK-NOT: alloca
 ; CHECK: ret
diff --git a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
index d2b2b0027a52..854ec604d01a 100644
--- a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
+++ b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
@@ -1,7 +1,8 @@
 ; Instcombine was missing a test that caused it to make illegal transformations
 ; sometimes.  In this case, it transforms the sub into an add:
-; RUN: opt < %s -instcombine -S | grep sub
-;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: sub
+
 define i32 @test(i32 %i, i32 %j) {
         %A = mul i32 %i, %j
         %B = sub i32 2, %A
diff --git a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
index 22574f77f1d9..49e55c620a49 100644
--- a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
+++ b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep add
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: add
 
 define i32 @test(i32 %A) {
         %A.neg = sub i32 0, %A          ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
index c02d33ccc1b7..bb9a8181ccd4 100644
--- a/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
+++ b/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -1,6 +1,7 @@
-; This testcase can be simplified by "realizing" that alloca can never return 
+; This testcase can be simplified by "realizing" that alloca can never return
 ; null.
-; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
+; RUN: opt < %s -instcombine -simplifycfg -S | FileCheck %s
+; CHECK-NOT: br
 
 declare i32 @bitmap_clear(...)
 
diff --git a/test/Transforms/InstCombine/2006-10-20-mask.ll b/test/Transforms/InstCombine/2006-10-20-mask.ll
index 0aaa5e8c21b4..e9797ae50c95 100644
--- a/test/Transforms/InstCombine/2006-10-20-mask.ll
+++ b/test/Transforms/InstCombine/2006-10-20-mask.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    grep and
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: and
 
 define i64 @foo(i64 %tmp, i64 %tmp2) {
         %tmp.upgrd.1 = trunc i64 %tmp to i32            ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index d3ba1e2287a3..8ab50e222745 100644
--- a/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep mul | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: mul
+; CHECK: mul
 
 define <4 x float> @test(<4 x float> %V) {
         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
index 2665791fe086..272753cc98d6 100644
--- a/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
+++ b/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 ; END.
 
 target datalayout = "e-p:32:32"
diff --git a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
index 4d1b982f6766..6298a0723338 100644
--- a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
+++ b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -instcombine -S | grep "ugt.*, 1"
 
 define i1 @test(i32 %tmp1030) {
-	%tmp1037 = icmp ne i32 %tmp1030, 40		; <i1> [#uses=1]
-	%tmp1039 = icmp ne i32 %tmp1030, 41		; <i1> [#uses=1]
+	%tmp1037 = icmp ne i32 %tmp1030, 39		; <i1> [#uses=1]
+	%tmp1039 = icmp ne i32 %tmp1030, 40		; <i1> [#uses=1]
 	%tmp1042 = and i1 %tmp1037, %tmp1039		; <i1> [#uses=1]
 	ret i1 %tmp1042
 }
diff --git a/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll b/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
index 23b606779e66..22c078250ece 100644
--- a/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
+++ b/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
@@ -1,23 +1,30 @@
 ; Ignore stderr, we expect warnings there
-; RUN: opt < %s -instcombine 2> /dev/null -S | not grep bitcast
+; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
+
+; CHECK-NOT: bitcast
 
 define void @a() {
-	ret void
+  ret void
 }
 
 define signext i32 @b(i32* inreg  %x)   {
-	ret i32 0
+  ret i32 0
 }
 
 define void @c(...) {
-	ret void
+  ret void
 }
 
 define void @g(i32* %y) {
-	call void bitcast (void ()* @a to void (i32*)*)( i32* noalias  %y )
-	call <2 x i32> bitcast (i32 (i32*)* @b to <2 x i32> (i32*)*)( i32* inreg  null )		; <<2 x i32>>:1 [#uses=0]
+; CHECK-LABEL: @g(
+; CHECK: call i64 bitcast (i32 (i32*)* @b to i64 (i32)*)(i32 0)
 	%x = call i64 bitcast (i32 (i32*)* @b to i64 (i32)*)( i32 0 )		; <i64> [#uses=0]
-	call void bitcast (void (...)* @c to void (i32)*)( i32 0 )
-	call void bitcast (void (...)* @c to void (i32)*)( i32 zeroext  0 )
-	ret void
+
+; The rest should not have bitcasts remaining
+; CHECK-NOT: bitcast
+  call void bitcast (void ()* @a to void (i32*)*)( i32* noalias  %y )
+  call <2 x i32> bitcast (i32 (i32*)* @b to <2 x i32> (i32*)*)( i32* inreg  null )		; <<2 x i32>>:1 [#uses=0]
+  call void bitcast (void (...)* @c to void (i32)*)( i32 0 )
+  call void bitcast (void (...)* @c to void (i32)*)( i32 zeroext  0 )
+  ret void
 }
diff --git a/test/Transforms/InstCombine/2008-01-06-VoidCast.ll b/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
index 407ff4ddc29a..5dcaa38edc02 100644
--- a/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
+++ b/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
@@ -1,10 +1,12 @@
-; RUN: opt < %s -instcombine -S | not grep bitcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define void @f(i16 %y) {
-	ret void
+  ret void
 }
 
 define i32 @g(i32 %y) {
-	%x = call i32 bitcast (void (i16)* @f to i32 (i32)*)( i32 %y )		; <i32> [#uses=1]
-	ret i32 %x
+; CHECK-LABEL: @g(
+; CHECK: call i32 bitcast
+  %x = call i32 bitcast (void (i16)* @f to i32 (i32)*)( i32 %y )		; <i32> [#uses=1]
+  ret i32 %x
 }
diff --git a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
index c161bcc9045d..6b4e89dbbe60 100644
--- a/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
+++ b/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
 
 define double @fold(i1 %a, double %b) {
 %s = select i1 %a, double 0., double 1.
diff --git a/test/Transforms/InstCombine/2008-02-13-MulURem.ll b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
index a88c51097247..d85ef97553ab 100644
--- a/test/Transforms/InstCombine/2008-02-13-MulURem.ll
+++ b/test/Transforms/InstCombine/2008-02-13-MulURem.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep rem
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1933
 
+; CHECK: rem
+
 define i32 @fold(i32 %a) {
   %s = mul i32 %a, 3
   %c = urem i32 %s, 3
diff --git a/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
index ed2069041d1b..31b17196d8f5 100644
--- a/test/Transforms/InstCombine/2008-05-31-AddBool.ll
+++ b/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep "xor"
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR2389
 
+; CHECK: xor
+
 define i1 @test(i1 %a, i1 %b) {
   %A = add i1 %a, %b
   ret i1 %A
diff --git a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll b/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
index 43af190abcea..0c0e55a0b2d9 100644
--- a/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
+++ b/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
@@ -2,7 +2,7 @@
 ; PR3103
 
 define i8 @test1(i8 %x, i8 %y) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   %A = udiv i8 %x, %y
 ; CHECK-NEXT: urem
   %B = mul i8 %A, %y
@@ -12,7 +12,7 @@ define i8 @test1(i8 %x, i8 %y) {
 }
 
 define i8 @test2(i8 %x, i8 %y) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
   %A = sdiv i8 %x, %y
 ; CHECK-NEXT: srem
   %B = mul i8 %A, %y
@@ -22,7 +22,7 @@ define i8 @test2(i8 %x, i8 %y) {
 }
 
 define i8 @test3(i8 %x, i8 %y) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
   %A = udiv i8 %x, %y
 ; CHECK-NEXT: urem
   %B = mul i8 %A, %y
@@ -33,7 +33,7 @@ define i8 @test3(i8 %x, i8 %y) {
 }
 
 define i8 @test4(i8 %x) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
   %A = udiv i8 %x, 3
 ; CHECK-NEXT: urem
   %B = mul i8 %A, -3
@@ -45,7 +45,7 @@ define i8 @test4(i8 %x) {
 }
 
 define i32 @test5(i32 %x, i32 %y) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; (((X / Y) * Y) / Y) -> X / Y
   %div = sdiv i32 %x, %y
 ; CHECK-NEXT: sdiv
@@ -56,7 +56,7 @@ define i32 @test5(i32 %x, i32 %y) {
 }
 
 define i32 @test6(i32 %x, i32 %y) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; (((X / Y) * Y) / Y) -> X / Y
   %div = udiv i32 %x, %y
 ; CHECK-NEXT: udiv
diff --git a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
index 949fc592203f..e3543116a666 100644
--- a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
+++ b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin9.6"
 
 define i32 @test(i32* %P) nounwind {
 entry:
-  %Q = bitcast i32* %P to i32 addrspace(1)*
+  %Q = addrspacecast i32* %P to i32 addrspace(1)*
   store i32 0, i32 addrspace(1)* %Q, align 4
   ret i32 0
 }
diff --git a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
index bb3159e1e6fa..b1384ec00209 100644
--- a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
+++ b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -1,5 +1,4 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
-; PR6486
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 target triple = "i386-unknown-linux-gnu"
@@ -7,8 +6,8 @@ target triple = "i386-unknown-linux-gnu"
 @g_92 = common global [2 x i32*] zeroinitializer, align 4 ; <[2 x i32*]*> [#uses=1]
 @g_177 = constant i32** bitcast (i8* getelementptr (i8* bitcast ([2 x i32*]* @g_92 to i8*), i64 4) to i32**), align 4 ; <i32***> [#uses=1]
 
-define i1 @test() nounwind {
-; CHECK: @test
+define i1 @PR6486() nounwind {
+; CHECK-LABEL: @PR6486(
   %tmp = load i32*** @g_177                       ; <i32**> [#uses=1]
   %cmp = icmp ne i32** null, %tmp                 ; <i1> [#uses=1]
   %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
@@ -16,3 +15,18 @@ define i1 @test() nounwind {
   ret i1 %cmp1
 ; CHECK: ret i1 true
 }
+
+@d = common global i32 0, align 4
+@a = common global [1 x i32] zeroinitializer, align 4
+
+define i1 @PR16462_1() nounwind {
+; CHECK-LABEL: @PR16462_1(
+  ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 65535)
+; CHECK: ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 65535)
+}
+
+define i1 @PR16462_2() nounwind {
+; CHECK-LABEL: @PR16462_2(
+  ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 42)
+; CHECK: ret i1 icmp sgt (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16), i16 42)
+}
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index 800162197919..1549c0d0f46e 100644
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; <rdar://problem/8606771>
-; CHECK: @main
+; CHECK-LABEL: @main(
 define i32 @main(i32 %argc) nounwind ssp {
 entry:
   %tmp3151 = trunc i32 %argc to i8
@@ -23,7 +23,7 @@ entry:
 }
 
 ; rdar://8739316
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 define i8 @foo(i8 %arg, i8 %arg1) nounwind {
 bb:
   %tmp = shl i8 %arg, 7
diff --git a/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll b/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
index 720365c4d6b2..80983ef27098 100644
--- a/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
+++ b/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
@@ -3,14 +3,14 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 define {}* @foo({}* %x, i32 %n) {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK-NOT: getelementptr
   %p = getelementptr {}* %x, i32 %n
   ret {}* %p
 }
 
 define i8* @bar(i64 %n, {{}, [0 x {[0 x i8]}]}* %p) {
-; CHECK: @bar
+; CHECK-LABEL: @bar(
   %g = getelementptr {{}, [0 x {[0 x i8]}]}* %p, i64 %n, i32 1, i64 %n, i32 0, i64 %n
 ; CHECK: %p, i64 0, i32 1, i64 0, i32 0, i64 %n
   ret i8* %g
diff --git a/test/Transforms/InstCombine/2010-11-23-Distributed.ll b/test/Transforms/InstCombine/2010-11-23-Distributed.ll
index 4f8e8dc713b4..20bfed87798b 100644
--- a/test/Transforms/InstCombine/2010-11-23-Distributed.ll
+++ b/test/Transforms/InstCombine/2010-11-23-Distributed.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 define i32 @foo(i32 %x, i32 %y) {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
   %add = add nsw i32 %y, %x
   %mul = mul nsw i32 %add, %y
   %square = mul nsw i32 %y, %y
@@ -11,7 +11,7 @@ define i32 @foo(i32 %x, i32 %y) {
 }
 
 define i1 @bar(i64 %x, i64 %y) {
-; CHECK: @bar
+; CHECK-LABEL: @bar(
   %a = and i64 %y, %x
 ; CHECK: and
 ; CHECK-NOT: and
diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
index fedb46dd24ad..a75a4656e68a 100644
--- a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
+++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin10.0.0"
 
-; CHECK: define void @fu1
+; CHECK-LABEL: define void @fu1(
 define void @fu1(i32 %parm) nounwind ssp {
   %1 = alloca i32, align 4
 ; CHECK: alloca double*
@@ -33,7 +33,7 @@ define void @fu1(i32 %parm) nounwind ssp {
 
 declare void @bar(double*)
 
-; CHECK: define void @fu2
+; CHECK-LABEL: define void @fu2(
 define void @fu2(i32 %parm) nounwind ssp {
   %1 = alloca i32, align 4
   %ptr = alloca double*, align 4
diff --git a/test/Transforms/InstCombine/2012-03-10-InstCombine.ll b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
index 58ccf12e6cf4..d1860bccd75e 100644
--- a/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
+++ b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
@@ -2,7 +2,7 @@
 
 ; Derived from gcc.c-torture/execute/frame-address.c
 
-; CHECK:     @func
+; CHECK-LABEL:     @func(
 ; CHECK:     return:
 ; CHECK-NOT: ret i32 0
 ; CHECK:     ret i32 %retval
diff --git a/test/Transforms/InstCombine/2012-04-24-vselect.ll b/test/Transforms/InstCombine/2012-04-24-vselect.ll
index 8d2de2b2431c..211d401a3bca 100644
--- a/test/Transforms/InstCombine/2012-04-24-vselect.ll
+++ b/test/Transforms/InstCombine/2012-04-24-vselect.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: <i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 
 define <8 x i32> @foo() nounwind {
diff --git a/test/Transforms/InstCombine/2012-05-28-select-hang.ll b/test/Transforms/InstCombine/2012-05-28-select-hang.ll
index c580bacb0dcb..db1dbd5b6e2f 100644
--- a/test/Transforms/InstCombine/2012-05-28-select-hang.ll
+++ b/test/Transforms/InstCombine/2012-05-28-select-hang.ll
@@ -34,6 +34,6 @@ land.end:                                         ; preds = %land.rhs, %entry
   store i8 %conv9, i8* @a, align 1
   ret void
 
-; CHECK: @func
+; CHECK-LABEL: @func(
 ; CHECK-NOT: select
 }
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
index 6f3df5b2fd99..4d185bf7e06e 100644
--- a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: bitcast
+; CHECK: addrspacecast
 
 @base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
 declare void @foo(i32*)
 
 define void @test() nounwind {
-  call void @foo(i32* getelementptr (i32* bitcast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
+  call void @foo(i32* getelementptr (i32* addrspacecast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
   ret void
 }
diff --git a/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
index 4efaf8c17255..0374bd52afd4 100644
--- a/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
+++ b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-; CHECK: @udiv400
+; CHECK-LABEL: @udiv400(
 ; CHECK: udiv i32 %x, 400
 ; CHECK: ret
 define i32 @udiv400(i32 %x) {
@@ -16,7 +16,7 @@ entry:
 }
 
 
-; CHECK: @udiv400_no
+; CHECK-LABEL: @udiv400_no(
 ; CHECK: ashr
 ; CHECK: div
 ; CHECK: ret
@@ -27,7 +27,7 @@ entry:
   ret i32 %div1
 }
 
-; CHECK: @sdiv400_yes
+; CHECK-LABEL: @sdiv400_yes(
 ; CHECK: udiv i32 %x, 400
 ; CHECK: ret
 define i32 @sdiv400_yes(i32 %x) {
@@ -41,7 +41,7 @@ entry:
 }
 
 
-; CHECK: @udiv_i80
+; CHECK-LABEL: @udiv_i80(
 ; CHECK: udiv i80 %x, 400
 ; CHECK: ret
 define i80 @udiv_i80(i80 %x) {
diff --git a/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll b/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll
index ba025e92b010..7015725feb16 100644
--- a/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll
+++ b/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll
@@ -9,7 +9,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK-NEXT: alloca [0 x i8], align 1024
 ; CHECK-NOT: alloca
 ; CHECK: ret void
diff --git a/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll
index 4cd60b42fbe1..35b62850c626 100644
--- a/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll
+++ b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll
@@ -8,7 +8,7 @@ declare void @bar(i8*)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define void @test() {
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: llvm.memcpy
 ; CHECK: ret void
   %A = alloca [100 x i8]
diff --git a/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
index c1602da4c84d..466629cb5fd6 100644
--- a/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
+++ b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
@@ -9,4 +9,4 @@ entry:
   %or4 = or i32 or (i32 zext (i1 icmp eq (i32* @g, i32* null) to i32), i32 1), %xor
   ret i32 %or4
 }
-; CHECK: define i32 @function
+; CHECK-LABEL: define i32 @function(
diff --git a/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
index b20c3a07c0ac..789e3172e7c4 100644
--- a/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
+++ b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; allocation of an i96 because of the bitcast to create %2. That's not valid,
 ; as the other 32 bits of the structure still feed into the return value
 define { i64, i64 } @function(i32 %x, i32 %y, i32 %z) nounwind {
-; CHECK: @function
+; CHECK-LABEL: @function(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: %retval = alloca %struct._my_struct, align 8
 ; CHECK-NOT: bitcast i96* %retval to %struct._my_struct*
diff --git a/test/Transforms/InstCombine/ExtractCast.ll b/test/Transforms/InstCombine/ExtractCast.ll
index 5ebbefd97b3e..9a8872f2fb1f 100644
--- a/test/Transforms/InstCombine/ExtractCast.ll
+++ b/test/Transforms/InstCombine/ExtractCast.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S -o - | FileCheck %s
 
-; CHECK: @a
+; CHECK-LABEL: @a(
 define i32 @a(<4 x i64> %I) {
 entry:
 ; CHECK-NOT: trunc <4 x i64>
@@ -13,7 +13,7 @@ entry:
 }
 
 
-; CHECK: @b
+; CHECK-LABEL: @b(
 define i32 @b(<4 x float> %I) {
 entry:
 ; CHECK-NOT: fptosi <4 x float>
diff --git a/test/Transforms/InstCombine/LandingPadClauses.ll b/test/Transforms/InstCombine/LandingPadClauses.ll
index de3b2d34fb94..10af4bcadf68 100644
--- a/test/Transforms/InstCombine/LandingPadClauses.ll
+++ b/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -11,7 +11,7 @@ declare i32 @__objc_personality_v0(i32, i64, i8*, i8*)
 declare void @bar()
 
 define void @foo_generic() {
-; CHECK: @foo_generic
+; CHECK-LABEL: @foo_generic(
   invoke void @bar()
     to label %cont.a unwind label %lpad.a
 cont.a:
@@ -131,7 +131,7 @@ lpad.i:
 }
 
 define void @foo_cxx() {
-; CHECK: @foo_cxx
+; CHECK-LABEL: @foo_cxx(
   invoke void @bar()
     to label %cont.a unwind label %lpad.a
 cont.a:
@@ -182,7 +182,7 @@ lpad.d:
 }
 
 define void @foo_objc() {
-; CHECK: @foo_objc
+; CHECK-LABEL: @foo_objc(
   invoke void @bar()
     to label %cont.a unwind label %lpad.a
 cont.a:
diff --git a/test/Transforms/InstCombine/PR7357.ll b/test/Transforms/InstCombine/PR7357.ll
new file mode 100644
index 000000000000..c397c5ee05e3
--- /dev/null
+++ b/test/Transforms/InstCombine/PR7357.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -instcombine -S | FileCheck %s
+@.str1 = private constant [11 x i8] c"(){};[]&|:\00", align 4
+
+; check that simplify libcalls will not replace a call with one calling
+; convention with a new call with a different calling convention.
+
+; CHECK: define arm_aapcscc i32 @foo(i32 %argc)
+; CHECK: call arm_aapcscc  i8* @strchr
+define arm_aapcscc i32 @foo(i32 %argc) nounwind {
+bb.nph:
+  %c = call arm_aapcscc  i8* @strchr(i8* getelementptr ([11 x i8]* @.str1, i32 0,
+i32 0), i32 %argc) nounwind readonly
+  %p = ptrtoint i8* %c to i32
+  ret i32 %p
+}
+
+declare arm_aapcscc i8* @strchr(i8*, i32) nounwind readonly
diff --git a/test/Transforms/InstCombine/abs-1.ll b/test/Transforms/InstCombine/abs-1.ll
index 807f238755b5..2c223edcfa74 100644
--- a/test/Transforms/InstCombine/abs-1.ll
+++ b/test/Transforms/InstCombine/abs-1.ll
@@ -11,7 +11,7 @@ declare i64 @llabs(i64)
 ; Check abs(x) -> x >s -1 ? x : -x.
 
 define i32 @test_simplify1(i32 %x) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i32 @abs(i32 %x)
 ; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i32 %x, -1
 ; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i32 0, %x
@@ -21,7 +21,7 @@ define i32 @test_simplify1(i32 %x) {
 }
 
 define i64 @test_simplify2(i64 %x) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %ret = call i64 @labs(i64 %x)
 ; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
 ; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
@@ -31,7 +31,7 @@ define i64 @test_simplify2(i64 %x) {
 }
 
 define i64 @test_simplify3(i64 %x) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %ret = call i64 @llabs(i64 %x)
 ; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
 ; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index c5109c528857..0964bc00d1cb 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -6,7 +6,7 @@ define i64 @test1(i64 %A, i32 %B) {
         %tmp5 = add i64 %tmp3, %A
         %tmp6 = and i64 %tmp5, 123
         ret i64 %tmp6
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: and i64 %A, 123
 ; CHECK-NEXT: ret i64
 }
@@ -16,7 +16,7 @@ define i32 @test2(i32 %A) {
   %C = and i32 %A, 32
   %F = add i32 %B, %C
   ret i32 %F
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: and i32 %A, 39
 ; CHECK-NEXT: ret i32
 }
@@ -26,7 +26,7 @@ define i32 @test3(i32 %A) {
   %C = lshr i32 %A, 30
   %F = add i32 %B, %C
   ret i32 %F
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: and
 ; CHECK-NEXT: lshr
 ; CHECK-NEXT: or i32 %B, %C
@@ -36,7 +36,7 @@ define i32 @test3(i32 %A) {
 define i32 @test4(i32 %A) {
   %B = add nuw i32 %A, %A
   ret i32 %B
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: %B = shl nuw i32 %A, 1
 ; CHECK-NEXT: ret i32 %B
 }
diff --git a/test/Transforms/InstCombine/add4.ll b/test/Transforms/InstCombine/add4.ll
index 0fc0a6c1ac26..208c7f03200e 100644
--- a/test/Transforms/InstCombine/add4.ll
+++ b/test/Transforms/InstCombine/add4.ll
@@ -1,9 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-;; Target triple for gep raising case below.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-
 define float @test1(float %A, float %B, i1 %C) {
 EntryBlock:
   ;; A*(1 - uitofp i1 C) -> select C, 0, A
@@ -11,7 +7,7 @@ EntryBlock:
   %mc = fsub float 1.000000e+00, %cf
   %p1 = fmul fast float %A, %mc
   ret float %p1
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: select i1 %C, float -0.000000e+00, float %A
 }
 
@@ -21,38 +17,63 @@ EntryBlock:
   %cf = uitofp i1 %C to float
   %p2 = fmul fast float %B, %cf
   ret float %p2
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: select i1 %C, float %B, float -0.000000e+00
 }
 
 define float @test3(float %A, float %B, i1 %C) {
 EntryBlock:
+  ;;  select C, 0, B + select C, A, 0 -> select C, A, B
+  %cf = uitofp i1 %C to float
+  %s1 = select i1 %C, float 0.000000e+00, float %B
+  %s2 = select i1 %C, float %A, float 0.000000e+00
+  %sum = fadd fast float %s1, %s2
+  ret float %sum
+; CHECK-LABEL: @test3(
+; CHECK: select i1 %C, float %A, float %B
+}
+
+define float @test4(float %A, float %B, i1 %C) {
+EntryBlock:
+  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
+  %cf = uitofp i1 %C to float
+  %mc = fsub fast float 1.000000e+00, %cf
+  %p1 = fmul fast float %A, %mc
+  %p2 = fmul fast float %B, %cf
+  %s1 = fadd fast float %p2, %p1
+  ret float %s1
+; CHECK-LABEL: @test4(
+; CHECK: select i1 %C, float %B, float %A
+}
+
+define float @test5(float %A, float %B, i1 %C) {
+EntryBlock:
   ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
   %cf = uitofp i1 %C to float
-  %mc = fsub float 1.000000e+00, %cf
+  %mc = fsub fast float 1.000000e+00, %cf
   %p1 = fmul fast float %A, %mc
   %p2 = fmul fast float %B, %cf
   %s1 = fadd fast float %p1, %p2
   ret float %s1
-; CHECK: @test3
+; CHECK-LABEL: @test5(
 ; CHECK: select i1 %C, float %B, float %A
 }
 
 ; PR15952
-define float @test4(float %A, float %B, i32 %C) {
+define float @test6(float %A, float %B, i32 %C) {
   %cf = uitofp i32 %C to float
   %mc = fsub float 1.000000e+00, %cf
   %p1 = fmul fast float %A, %mc
   ret float %p1
-; CHECK: @test4
+; CHECK-LABEL: @test6(
 ; CHECK: uitofp
 }
 
-define float @test5(float %A, float %B, i32 %C) {
+define float @test7(float %A, float %B, i32 %C) {
   %cf = uitofp i32 %C to float
   %p2 = fmul fast float %B, %cf
   ret float %p2
-; CHECK: @test5
+; CHECK-LABEL: @test7(
 ; CHECK: uitofp
 }
 
diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll
new file mode 100644
index 000000000000..d908b556e195
--- /dev/null
+++ b/test/Transforms/InstCombine/addrspacecast.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i32, i1) nounwind
+
+
+define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to i32*
+  ret i32* %z
+}
+
+define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_vector(
+; CHECK: addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x i32*>
+  ret <4 x i32*> %z
+}
+
+define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types(
+; CHECK: addrspacecast i32 addrspace(1)* %x to float*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to float*
+  ret float* %z
+}
+
+@const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]
+
+declare void @foo(i8*) nounwind
+
+; A copy from a constant addrspacecast'ed global
+; CHECK-LABEL: @memcpy_addrspacecast(
+; CHECK-NOT:  call void @llvm.memcpy
+define i32 @memcpy_addrspacecast() nounwind {
+entry:
+  %alloca = alloca i8, i32 48
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i32 4, i1 false) nounwind
+  br label %loop.body
+
+loop.body:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop.body]
+  %ptr = getelementptr i8* %alloca, i32 %i
+  %load = load i8* %ptr
+  %ext = zext i8 %load to i32
+  %sum.inc = add i32 %sum, %ext
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i, 48
+  br i1 %cmp, label %loop.body, label %end
+
+end:
+  ret i32 %sum.inc
+}
+
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index 4ea1bd9beb3b..4d22c2cd2ef2 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Instcombine should be able to prove vector alignment in the
 ; presence of a few mild address computation tricks.
 
-; CHECK: @test0(
+; CHECK-LABEL: @test0(
 ; CHECK: align 16
 
 define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind  {
@@ -35,7 +35,7 @@ return:
 ; When we see a unaligned load from an insufficiently aligned global or
 ; alloca, increase the alignment of the load, turning it into an aligned load.
 
-; CHECK: @test1(
+; CHECK-LABEL: @test1(
 ; CHECK: tmp = load
 ; CHECK: GLOBAL{{.*}}align 16
 
@@ -47,9 +47,30 @@ entry:
 	ret <16 x i8> %tmp
 }
 
+@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1_gep(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1_gep(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1_gep{{.*}}align 16
+  %tmp = load <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+
 ; When a load or store lacks an explicit alignment, add one.
 
-; CHECK: @test2(
+; CHECK-LABEL: @test2(
 ; CHECK: load double* %p, align 8
 ; CHECK: store double %n, double* %p, align 8
 
@@ -67,7 +88,7 @@ declare void @use(i8*)
 
 define void @test3(%struct.s* sret %a4) {
 ; Check that the alignment is bumped up the alignment of the sret type.
-; CHECK: @test3
+; CHECK-LABEL: @test3(
   %a4.cast = bitcast %struct.s* %a4 to i8*
   call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 1, i1 false)
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 4, i1 false)
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
index c3ef2dbb70f0..66ff9c16e424 100644
--- a/test/Transforms/InstCombine/align-external.ll
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -22,7 +22,7 @@ define i64 @foo(i64 %a) {
 }
 
 define i32 @bar() {
-; CHECK: @bar
+; CHECK-LABEL: @bar(
   %r = load i32* @B, align 1
 ; CHECK: align 1
   ret i32 %r
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 68a671cec88a..ae1cfa1ed2fc 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -1,14 +1,14 @@
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s
+; RUN: opt < %s -instcombine -S -default-data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NODL
 
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; END.
 
 declare void @use(...)
 
 @int = global i32 zeroinitializer
 
 ; Zero byte allocas should be merged if they can't be deleted.
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: alloca
 ; CHECK-NOT: alloca
 define void @test() {
@@ -25,7 +25,7 @@ define void @test() {
 }
 
 ; Zero byte allocas should be deleted.
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: alloca
 define void @test2() {
         %A = alloca i32         ; <i32*> [#uses=1]
@@ -34,7 +34,7 @@ define void @test2() {
 }
 
 ; Zero byte allocas should be deleted.
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: alloca
 define void @test3() {
         %A = alloca { i32 }             ; <{ i32 }*> [#uses=1]
@@ -43,7 +43,7 @@ define void @test3() {
         ret void
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: = zext i32 %n to i64
 ; CHECK: %A = alloca i32, i64 %
 define i32* @test4(i32 %n) {
@@ -54,7 +54,7 @@ define i32* @test4(i32 %n) {
 ; Allocas which are only used by GEPs, bitcasts, and stores (transitively)
 ; should be deleted.
 define void @test5() {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: store
 ; CHECK: ret
@@ -80,7 +80,7 @@ declare void @f(i32* %p)
 
 ; Check that we don't delete allocas in some erroneous cases.
 define void @test6() {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NOT: ret
 ; CHECK: alloca
 ; CHECK-NEXT: alloca
@@ -110,3 +110,22 @@ entry:
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+
+; Check that the GEP indices use the pointer size, or 64 if unknown
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: alloca [100 x i32]
+; CHECK: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+
+; P32-LABEL: @test8(
+; P32: alloca [100 x i32]
+; P32: getelementptr inbounds [100 x i32]* %x1, i32 0, i32 0
+
+; NODL-LABEL: @test8(
+; NODL: alloca [100 x i32]
+; NODL: getelementptr inbounds [100 x i32]* %x1, i64 0, i64 0
+  %x = alloca i32, i32 100
+  call void (...)* @use(i32* %x)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/and-xor-or.ll b/test/Transforms/InstCombine/and-xor-or.ll
index 7ff810b6eeec..ec36d13ec1af 100644
--- a/test/Transforms/InstCombine/and-xor-or.ll
+++ b/test/Transforms/InstCombine/and-xor-or.ll
@@ -7,7 +7,7 @@ define i64 @or(i64 %x, i64 %y) nounwind uwtable readnone ssp {
   %2 = xor i64 %y, %x
   %3 = add i64 %1, %2
   ret i64 %3
-; CHECK: @or
+; CHECK-LABEL: @or(
 ; CHECK: or i64
 ; CHECK-NEXT: ret
 }
@@ -18,7 +18,7 @@ define i64 @or2(i64 %x, i64 %y) nounwind uwtable readnone ssp {
   %2 = xor i64 %y, %x
   %3 = or i64 %1, %2
   ret i64 %3
-; CHECK: @or2
+; CHECK-LABEL: @or2(
 ; CHECK: or i64
 ; CHECK-NEXT: ret
 }
diff --git a/test/Transforms/InstCombine/and.ll b/test/Transforms/InstCombine/and.ll
index 8492df9a1209..3d36bfb404d1 100644
--- a/test/Transforms/InstCombine/and.ll
+++ b/test/Transforms/InstCombine/and.ll
@@ -186,9 +186,9 @@ define i1 @test25(i32 %A) {
 }
 
 define i1 @test26(i32 %A) {
-        %B = icmp ne i32 %A, 50         ; <i1> [#uses=1]
-        %C = icmp ne i32 %A, 51         ; <i1> [#uses=1]
-        ;; (A-50) > 1
+        %B = icmp ne i32 %A, 49         ; <i1> [#uses=1]
+        %C = icmp ne i32 %A, 50         ; <i1> [#uses=1]
+        ;; (A-49) > 1
         %D = and i1 %B, %C              ; <i1> [#uses=1]
         ret i1 %D
 }
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 531aedb668a0..e88fd5983003 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -13,7 +13,7 @@ define i1 @test2(i1 %X, i1 %Y) {
   %a = and i1 %X, %Y
   %b = and i1 %a, %X
   ret i1 %b
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: and i1 %X, %Y
 ; CHECK-NEXT: ret
 }
@@ -22,7 +22,7 @@ define i32 @test3(i32 %X, i32 %Y) {
   %a = and i32 %X, %Y
   %b = and i32 %Y, %a
   ret i32 %b
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: and i32 %X, %Y
 ; CHECK-NEXT: ret
 }
@@ -32,7 +32,7 @@ define i1 @test4(i32 %X) {
   %b = icmp slt i32 %X, 0
   %c = and i1 %a, %b
   ret i1 %c
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: ret i1 false
 }
 
@@ -42,3 +42,15 @@ define <4 x i32> @test5(<4 x i32> %A) {
   %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
   ret <4 x i32> %2
 }
+
+; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
+define i32 @test6(i64 %x) nounwind {
+; CHECK: @test6
+; CHECK-NEXT: add i64 %x, 1
+; CHECK-NEXT: icmp ugt i64 %x.off, 1
+  %cmp1 = icmp ne i64 %x, -1
+  %not.cmp = icmp ne i64 %x, 0
+  %.cmp1 = and i1 %cmp1, %not.cmp
+  %land.ext = zext i1 %.cmp1 to i32
+  ret i32 %land.ext
+}
diff --git a/test/Transforms/InstCombine/apint-call-cast-target.ll b/test/Transforms/InstCombine/apint-call-cast-target.ll
index fe336de75242..4e98f9b2b3ac 100644
--- a/test/Transforms/InstCombine/apint-call-cast-target.ll
+++ b/test/Transforms/InstCombine/apint-call-cast-target.ll
@@ -1,16 +1,19 @@
-; RUN: opt < %s -instcombine -S | grep call | not grep bitcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
 
-
 define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK: call i32 bitcast
 entry:
 	%tmp = call i32 bitcast (i7* (i999*)* @ctime to i32 (i99*)*)( i99* null )
 	ret i32 %tmp
 }
 
 define i7* @ctime(i999*) {
+; CHECK-LABEL: @ctime(
+; CHECK: call i7* bitcast
 entry:
 	%tmp = call i7* bitcast (i32 ()* @main to i7* ()*)( )
 	ret i7* %tmp
diff --git a/test/Transforms/InstCombine/apint-select.ll b/test/Transforms/InstCombine/apint-select.ll
index f2ea60101c5f..cf24a44d6288 100644
--- a/test/Transforms/InstCombine/apint-select.ll
+++ b/test/Transforms/InstCombine/apint-select.ll
@@ -1,6 +1,7 @@
 ; This test makes sure that these instructions are properly eliminated.
 
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: select
 
 
 define i41 @test1(i1 %C) {
@@ -37,7 +38,7 @@ define i41 @test5(i41 %X) {
 
 define i1023 @test6(i1023 %X) {
     ;; ((X & 27) ? 27 : 0)
-    %Y = and i1023 %X, 64 
+    %Y = and i1023 %X, 64
     %t = icmp ne i1023 %Y, 0
     %V = select i1 %t, i1023 64, i1023 0
     ret i1023 %V
diff --git a/test/Transforms/InstCombine/apint-shift-simplify.ll b/test/Transforms/InstCombine/apint-shift-simplify.ll
index 14e895ad4bf6..63703ba112ac 100644
--- a/test/Transforms/InstCombine/apint-shift-simplify.ll
+++ b/test/Transforms/InstCombine/apint-shift-simplify.ll
@@ -5,7 +5,7 @@ define i41 @test0(i41 %A, i41 %B, i41 %C) {
 	%Y = shl i41 %B, %C
 	%Z = and i41 %X, %Y
 	ret i41 %Z
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK-NEXT: and i41 %A, %B
 ; CHECK-NEXT: shl i41
 ; CHECK-NEXT: ret
@@ -16,7 +16,7 @@ define i57 @test1(i57 %A, i57 %B, i57 %C) {
 	%Y = lshr i57 %B, %C
 	%Z = or i57 %X, %Y
 	ret i57 %Z
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: or i57 %A, %B
 ; CHECK-NEXT: lshr i57
 ; CHECK-NEXT: ret
@@ -27,7 +27,7 @@ define i49 @test2(i49 %A, i49 %B, i49 %C) {
 	%Y = ashr i49 %B, %C
 	%Z = xor i49 %X, %Y
 	ret i49 %Z
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: xor i49 %A, %B
 ; CHECK-NEXT: ashr i49
 ; CHECK-NEXT: ret
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll
index 73f630ebfec6..f5764c2d5e8a 100644
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -2,14 +2,14 @@
 ; even with arbitrary precision integers.
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: sh
 define i47 @test1(i47 %A) {
 	%B = shl i47 %A, 0		; <i47> [#uses=1]
 	ret i47 %B
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: sh
 define i41 @test2(i7 %X) {
 	%A = zext i7 %X to i41		; <i41> [#uses=1]
@@ -17,14 +17,14 @@ define i41 @test2(i7 %X) {
 	ret i41 %B
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: sh
 define i41 @test3(i41 %A) {
 	%B = ashr i41 %A, 0		; <i41> [#uses=1]
 	ret i41 %B
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: sh
 define i39 @test4(i7 %X) {
 	%A = zext i7 %X to i39		; <i39> [#uses=1]
@@ -32,21 +32,21 @@ define i39 @test4(i7 %X) {
 	ret i39 %B
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NOT: sh
 define i55 @test5(i55 %A) {
 	%B = lshr i55 %A, 55		; <i55> [#uses=1]
 	ret i55 %B
 }
 
-; CHECK: @test5a
+; CHECK-LABEL: @test5a(
 ; CHECK-NOT: sh
 define i32 @test5a(i32 %A) {
 	%B = shl i32 %A, 32		; <i32> [#uses=1]
 	ret i32 %B
 }
 
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: mul i55 %A, 6
 define i55 @test6(i55 %A) {
 	%B = shl i55 %A, 1		; <i55> [#uses=1]
@@ -54,7 +54,7 @@ define i55 @test6(i55 %A) {
 	ret i55 %C
 }
 
-; CHECK: @test6a
+; CHECK-LABEL: @test6a(
 ; CHECK: mul i55 %A, 6
 define i55 @test6a(i55 %A) {
 	%B = mul i55 %A, 3		; <i55> [#uses=1]
@@ -62,7 +62,7 @@ define i55 @test6a(i55 %A) {
 	ret i55 %C
 }
 
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NOT: sh
 define i29 @test7(i8 %X) {
 	%A = zext i8 %X to i29		; <i29> [#uses=1]
@@ -70,7 +70,7 @@ define i29 @test7(i8 %X) {
 	ret i29 %B
 }
 
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NOT: sh
 define i7 @test8(i7 %A) {
 	%B = shl i7 %A, 4		; <i7> [#uses=1]
@@ -78,7 +78,7 @@ define i7 @test8(i7 %A) {
 	ret i7 %C
 }
 
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NOT: sh
 define i17 @test9(i17 %A) {
 	%B = shl i17 %A, 16		; <i17> [#uses=1]
@@ -86,7 +86,7 @@ define i17 @test9(i17 %A) {
 	ret i17 %C
 }
 
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK-NOT: sh
 define i19 @test10(i19 %A) {
 	%B = lshr i19 %A, 18		; <i19> [#uses=1]
@@ -94,7 +94,7 @@ define i19 @test10(i19 %A) {
 	ret i19 %C
 }
 
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
 ; CHECK: shl
 define i23 @test11(i23 %A) {
@@ -104,7 +104,7 @@ define i23 @test11(i23 %A) {
 	ret i23 %C
 }
 
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NOT: sh
 define i47 @test12(i47 %A) {
 	%B = ashr i47 %A, 8		; <i47> [#uses=1]
@@ -112,7 +112,7 @@ define i47 @test12(i47 %A) {
 	ret i47 %C
 }
 
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; Don't hide the shl from scalar evolution. DAGCombine will get it.
 ; CHECK: shl
 define i18 @test13(i18 %A) {
@@ -122,7 +122,7 @@ define i18 @test13(i18 %A) {
 	ret i18 %C
 }
 
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NOT: sh
 define i35 @test14(i35 %A) {
 	%B = lshr i35 %A, 4		; <i35> [#uses=1]
@@ -131,7 +131,7 @@ define i35 @test14(i35 %A) {
 	ret i35 %D
 }
 
-; CHECK: @test14a
+; CHECK-LABEL: @test14a(
 ; CHECK-NOT: sh
 define i79 @test14a(i79 %A) {
 	%B = shl i79 %A, 4		; <i79> [#uses=1]
@@ -140,7 +140,7 @@ define i79 @test14a(i79 %A) {
 	ret i79 %D
 }
 
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NOT: sh
 define i45 @test15(i1 %C) {
 	%A = select i1 %C, i45 3, i45 1	; <i45> [#uses=1]
@@ -148,7 +148,7 @@ define i45 @test15(i1 %C) {
 	ret i45 %V
 }
 
-; CHECK: @test15a
+; CHECK-LABEL: @test15a(
 ; CHECK-NOT: sh
 define i53 @test15a(i1 %X) {
 	%A = select i1 %X, i8 3, i8 1	; <i8> [#uses=1]
@@ -157,7 +157,7 @@ define i53 @test15a(i1 %X) {
 	ret i53 %V
 }
 
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK-NOT: sh
 define i1 @test16(i84 %X) {
 	%tmp.3 = ashr i84 %X, 4		; <i84> [#uses=1]
@@ -166,7 +166,7 @@ define i1 @test16(i84 %X) {
 	ret i1 %tmp.7
 }
 
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK-NOT: sh
 define i1 @test17(i106 %A) {
 	%B = lshr i106 %A, 3		; <i106> [#uses=1]
@@ -174,7 +174,7 @@ define i1 @test17(i106 %A) {
 	ret i1 %C
 }
 
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK-NOT: sh
 define i1 @test18(i11 %A) {
 	%B = lshr i11 %A, 10		; <i11> [#uses=1]
@@ -182,7 +182,7 @@ define i1 @test18(i11 %A) {
 	ret i1 %C
 }
 
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK-NOT: sh
 define i1 @test19(i37 %A) {
 	%B = ashr i37 %A, 2		; <i37> [#uses=1]
@@ -190,7 +190,7 @@ define i1 @test19(i37 %A) {
 	ret i1 %C
 }
 
-; CHECK: @test19a
+; CHECK-LABEL: @test19a(
 ; CHECK-NOT: sh
 define i1 @test19a(i39 %A) {
 	%B = ashr i39 %A, 2		; <i39> [#uses=1]
@@ -198,7 +198,7 @@ define i1 @test19a(i39 %A) {
 	ret i1 %C
 }
 
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK-NOT: sh
 define i1 @test20(i13 %A) {
 	%B = ashr i13 %A, 12		; <i13> [#uses=1]
@@ -206,7 +206,7 @@ define i1 @test20(i13 %A) {
 	ret i1 %C
 }
 
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK-NOT: sh
 define i1 @test21(i12 %A) {
 	%B = shl i12 %A, 6		; <i12> [#uses=1]
@@ -214,7 +214,7 @@ define i1 @test21(i12 %A) {
 	ret i1 %C
 }
 
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 ; CHECK-NOT: sh
 define i1 @test22(i14 %A) {
 	%B = shl i14 %A, 7		; <i14> [#uses=1]
@@ -222,7 +222,7 @@ define i1 @test22(i14 %A) {
 	ret i1 %C
 }
 
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK-NOT: sh
 define i11 @test23(i44 %A) {
 	%B = shl i44 %A, 33		; <i44> [#uses=1]
@@ -231,7 +231,7 @@ define i11 @test23(i44 %A) {
 	ret i11 %D
 }
 
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 ; CHECK-NOT: sh
 define i37 @test25(i37 %tmp.2, i37 %AA) {
 	%x = lshr i37 %AA, 17		; <i37> [#uses=1]
@@ -241,7 +241,7 @@ define i37 @test25(i37 %tmp.2, i37 %AA) {
 	ret i37 %tmp.6
 }
 
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; CHECK-NOT: sh
 define i40 @test26(i40 %A) {
 	%B = lshr i40 %A, 1		; <i40> [#uses=1]
diff --git a/test/Transforms/InstCombine/apint-shl-trunc.ll b/test/Transforms/InstCombine/apint-shl-trunc.ll
index f2dc7d5130a9..b4450d4a3a00 100644
--- a/test/Transforms/InstCombine/apint-shl-trunc.ll
+++ b/test/Transforms/InstCombine/apint-shl-trunc.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @test0(i39 %X, i39 %A) {
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK: %[[V1:.*]] = shl i39 1, %A
 ; CHECK: %[[V2:.*]] = and i39 %[[V1]], %X
 ; CHECK: %[[V3:.*]] = icmp ne i39 %[[V2]], 0
@@ -13,7 +13,7 @@ define i1 @test0(i39 %X, i39 %A) {
 }
 
 define i1 @test1(i799 %X, i799 %A) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %[[V1:.*]] = shl i799 1, %A
 ; CHECK: %[[V2:.*]] = and i799 %[[V1]], %X
 ; CHECK: %[[V3:.*]] = icmp ne i799 %[[V2]], 0
diff --git a/test/Transforms/InstCombine/atomic.ll b/test/Transforms/InstCombine/atomic.ll
index 097cf5eafe06..ccee87433f32 100644
--- a/test/Transforms/InstCombine/atomic.ll
+++ b/test/Transforms/InstCombine/atomic.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ; Check transforms involving atomic operations
 
 define i32* @test1(i8** %p) {
-; CHECK: define i32* @test1
+; CHECK-LABEL: define i32* @test1(
 ; CHECK: load atomic i8** %p monotonic, align 8
   %c = bitcast i8** %p to i32**
   %r = load atomic i32** %c monotonic, align 8
@@ -14,7 +14,7 @@ define i32* @test1(i8** %p) {
 }
 
 define i32 @test2(i32* %p) {
-; CHECK: define i32 @test2
+; CHECK-LABEL: define i32 @test2(
 ; CHECK: %x = load atomic i32* %p seq_cst, align 4
 ; CHECK: shl i32 %x, 1
   %x = load atomic i32* %p seq_cst, align 4
diff --git a/test/Transforms/InstCombine/badmalloc.ll b/test/Transforms/InstCombine/badmalloc.ll
index 3abe28aede5d..2074d262ccbd 100644
--- a/test/Transforms/InstCombine/badmalloc.ll
+++ b/test/Transforms/InstCombine/badmalloc.ll
@@ -15,11 +15,11 @@ define i1 @test1() {
   call void @free(i8* %A)
   ret i1 %B
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i1 false
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define noalias i8* @test2() nounwind {
 entry:
 ; CHECK: @malloc
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
new file mode 100644
index 000000000000..a6b56f94ffbf
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -0,0 +1,229 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v64:64:64-v128:128:128-a0:0:64"
+
+
+
+; Cases that should be bitcast
+
+; Test cast between scalars with same bit sizes
+@alias_i32_to_f32 = alias bitcast (i32 (i32)* @func_i32 to float (float)*)
+
+; Test cast between vectors with same number of elements and bit sizes
+@alias_v2i32_to_v2f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
+
+; Test cast from vector to scalar with same number of bits
+@alias_v2f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
+
+; Test cast from scalar to vector with same number of bits
+@alias_i64_to_v2f32 = alias bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
+
+; Test cast between vectors of pointers
+@alias_v2i32p_to_v2i64p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
+
+
+; Cases that should be invalid and unchanged
+
+; Test cast between scalars with different bit sizes
+@alias_i64_to_f32 = alias bitcast (i64 (i64)* @func_i64 to float (float)*)
+
+; Test cast between vectors with different bit sizes but the
+; same number of elements
+@alias_v2i64_to_v2f32 = alias bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
+
+; Test cast between vectors with same number of bits and different
+; numbers of elements
+@alias_v2i32_to_v4f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
+
+; Test cast between scalar and vector with different number of bits
+@alias_i64_to_v4f32 = alias bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
+
+; Test cast between vector and scalar with different number of bits
+@alias_v4f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
+
+; Test cast from scalar to vector of pointers with same number of bits
+; We don't know the pointer size at this point, so this can't be done
+@alias_i64_to_v2i32p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
+
+; Test cast between vector of pointers and scalar with different number of bits
+@alias_v4i32p_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
+
+
+
+define internal <2 x i32> @func_v2i32(<2 x i32> %v) noinline nounwind {
+entry:
+  ret <2 x i32> %v
+}
+
+define internal <2 x float> @func_v2f32(<2 x float> %v) noinline nounwind {
+entry:
+  ret <2 x float> %v
+}
+
+define internal <4 x float> @func_v4f32(<4 x float> %v) noinline nounwind {
+entry:
+  ret <4 x float> %v
+}
+
+define internal i32 @func_i32(i32 %v) noinline nounwind {
+entry:
+  ret i32 %v
+}
+
+define internal i64 @func_i64(i64 %v) noinline nounwind {
+entry:
+  ret i64 %v
+}
+
+define internal <2 x i64> @func_v2i64(<2 x i64> %v) noinline nounwind {
+entry:
+  ret <2 x i64> %v
+}
+
+define internal <2 x i32*> @func_v2i32p(<2 x i32*> %v) noinline nounwind {
+entry:
+  ret <2 x i32*> %v
+}
+
+; Valid cases, only bitcast for argument / return type and call underlying function
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar
+; CHECK: bitcast float %tmp to i32
+; CHECK-NOT: fptoui
+; CHECK-NOT: uitofp
+; CHECK: bitcast i32 %call to float
+  %tmp = load float* %source, align 8
+  %call = call float @alias_i32_to_f32(float %tmp) nounwind
+  store float %call, float* %dest, align 8
+  ret void
+}
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector
+; CHECK: bitcast <2 x float> %tmp to <2 x i32>
+; CHECK-NOT: fptoui
+; CHECK-NOT: uitofp
+; CHECK: bitcast <2 x i32> %call to <2 x float>
+  %tmp = load <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
+; CHECK: bitcast <2 x float> %tmp to i64
+; CHECK: %call = call i64 @func_i64
+; CHECK: bitcast i64 %call to <2 x float>
+  %tmp = load <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
+; CHECK: bitcast i64 %tmp to <2 x float>
+; CHECK: call <2 x float> @func_v2f32
+; CHECK: bitcast <2 x float> %call to i64
+  %tmp = load i64* %source, align 8
+  %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
+; CHECK: bitcast <2 x i64*> %tmp to <2 x i32*>
+; CHECK: call <2 x i32*> @func_v2i32p
+; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
+  %tmp = load <2 x i64*>* %source, align 8
+  %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind
+  store <2 x i64*> %call, <2 x i64*>* %dest, align 8
+  ret void
+}
+
+; Invalid cases:
+
+define void @bitcast_alias_mismatch_scalar_size(float* noalias %source, float* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_mismatch_scalar_size
+; CHECK-NOT: fptoui
+; CHECK: @alias_i64_to_f32
+; CHECK-NOT: uitofp
+  %tmp = load float* %source, align 8
+  %call = call float @alias_i64_to_f32(float %tmp) nounwind
+  store float %call, float* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_mismatch_vector_element_and_bit_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_mismatch_vector_element_and_bit_size
+; CHECK-NOT: fptoui <2 x float> %tmp to <2 x i64>
+; CHECK: @alias_v2i64_to_v2f32
+; CHECK-NOT: uitofp <2 x i64> %call to <2 x float>
+  %tmp = load <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2i64_to_v2f32(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_mismatched_number_elements(<4 x float>* noalias %source, <4 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_mismatched_number_elements
+; CHECK:  %call = call <4 x float> @alias_v2i32_to_v4f32
+  %tmp = load <4 x float>* %source, align 8
+  %call = call <4 x float> @alias_v2i32_to_v4f32(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_scalar_mismatched_bit_size(<4 x float>* noalias %source, <4 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_scalar_mismatched_bit_size
+; CHECK:  %call = call <4 x float> @alias_v4f32_to_i64
+  %tmp = load <4 x float>* %source, align 8
+  %call = call <4 x float> @alias_v4f32_to_i64(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_ptrs_scalar_mismatched_bit_size(<4 x i32*>* noalias %source, <4 x i32*>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_ptrs_scalar_mismatched_bit_size
+; CHECK: @alias_v4i32p_to_i64
+  %tmp = load <4 x i32*>* %source, align 8
+  %call = call <4 x i32*> @alias_v4i32p_to_i64(<4 x i32*> %tmp) nounwind
+  store <4 x i32*> %call, <4 x i32*>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_ptrs_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_ptrs_same_size
+; CHECK: @alias_i64_to_v2i32p
+  %tmp = load i64* %source, align 8
+  %call = call i64 @alias_i64_to_v2i32p(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_mismatched_bit_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_mismatched_bit_size
+; CHECK: call i64 @alias_i64_to_v4f32
+  %tmp = load i64* %source, align 8
+  %call = call i64 @alias_i64_to_v4f32(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
index 4ded581a14c6..ed812e15f385 100644
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -18,7 +18,7 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
   %add = fadd float %tmp24, %tmp4
   ret float %add
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
 ; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
 ; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 1
@@ -40,7 +40,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
   %add = fadd float %tmp24, %tmp4
   ret float %add
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
 ; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
 ; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 1
@@ -48,3 +48,44 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-NEXT:  ret float %add
 }
 
+define <2 x i32> @test4(i32 %A, i32 %B){
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+  ; CHECK-LABEL: @test4(
+  ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %A, i32 1
+  ; CHECK-NEXT: ret <2 x i32>
+
+}
+
+define <2 x float> @test5(float %A, float %B) {
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+  ; CHECK-LABEL: @test5(
+  ; CHECK-NEXT: insertelement <2 x float> undef, float %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %A, i32 1
+  ; CHECK-NEXT: ret <2 x float>
+}
+
+define <2 x float> @test6(float %A){
+  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
+  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
+  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
+  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
+  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %tmp35
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
+; CHECK-NEXT: insertelement <2 x float> {{.*}}, float 4.200000e+01, i32 1
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/bitcast-vec-uniform.ll b/test/Transforms/InstCombine/bitcast-vec-uniform.ll
index 5975f1ec396e..bfb77191a75f 100644
--- a/test/Transforms/InstCombine/bitcast-vec-uniform.ll
+++ b/test/Transforms/InstCombine/bitcast-vec-uniform.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; CHECK: @a
+; CHECK-LABEL: @a(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 define <4 x i32> @a(<1 x i64> %y) {
@@ -8,7 +8,7 @@ define <4 x i32> @a(<1 x i64> %y) {
   ret <4 x i32> %c
 }
 
-; CHECK: @b
+; CHECK-LABEL: @b(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 
@@ -17,7 +17,7 @@ define <4 x i32> @b(<1 x i64> %y) {
   ret <4 x i32> %c
 }
 
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 
@@ -28,7 +28,7 @@ define <2 x float> @foo() {
 }
 
 
-; CHECK: @foo2
+; CHECK-LABEL: @foo2(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 define <2 x double> @foo2() {
@@ -36,7 +36,7 @@ define <2 x double> @foo2() {
   ret <2 x double> %cast
 }
 
-; CHECK: @foo3
+; CHECK-LABEL: @foo3(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 define <1 x float> @foo3() {
@@ -44,7 +44,7 @@ define <1 x float> @foo3() {
   ret <1 x float> %cast
 }
 
-; CHECK: @foo4
+; CHECK-LABEL: @foo4(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 define float @foo4() {
@@ -52,7 +52,7 @@ define float @foo4() {
   ret float %cast
 }
 
-; CHECK: @foo5
+; CHECK-LABEL: @foo5(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 define double @foo5() {
@@ -61,7 +61,7 @@ define double @foo5() {
 }
 
 
-; CHECK: @foo6
+; CHECK-LABEL: @foo6(
 ; CHECK-NOT: bitcast
 ; CHECK: ret
 define <2 x double> @foo6() {
diff --git a/test/Transforms/InstCombine/bitcast-vector-fold.ll b/test/Transforms/InstCombine/bitcast-vector-fold.ll
index 8fd7f35b7bb7..04c2861ae9b7 100644
--- a/test/Transforms/InstCombine/bitcast-vector-fold.ll
+++ b/test/Transforms/InstCombine/bitcast-vector-fold.ll
@@ -35,4 +35,4 @@ define <4 x i32> @test6() {
 define i32 @test7() {
        %tmp3 = bitcast <2 x half> <half 0xH1100, half 0xH0011> to i32
        ret i32 %tmp3
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 1e6113256bf3..c7a520bcf360 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -12,7 +12,7 @@ define i32 @test1(i64 %a) {
         %t4 = extractelement <2 x i32> %t3, i32 0
         ret i32 %t4
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
@@ -31,7 +31,7 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
   %add = fadd float %tmp24, %tmp4
   ret float %add
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
 ; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
 ; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 0
@@ -56,7 +56,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
   %add = fadd float %tmp24, %tmp4
   ret float %add
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
 ; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
 ; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 2
@@ -72,7 +72,7 @@ define <2 x i32> @test4(i32 %A, i32 %B){
   %ins35 = or i64 %tmp33, %tmp38
   %tmp43 = bitcast i64 %ins35 to <2 x i32>
   ret <2 x i32> %tmp43
-  ; CHECK: @test4
+  ; CHECK-LABEL: @test4(
   ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
   ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
   ; CHECK-NEXT: ret <2 x i32>
@@ -89,7 +89,7 @@ define <2 x float> @test5(float %A, float %B) {
   %ins35 = or i64 %tmp33, %tmp38
   %tmp43 = bitcast i64 %ins35 to <2 x float>
   ret <2 x float> %tmp43
-  ; CHECK: @test5
+  ; CHECK-LABEL: @test5(
   ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
   ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
   ; CHECK-NEXT: ret <2 x float>
@@ -102,7 +102,7 @@ define <2 x float> @test6(float %A){
   %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
   %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
   ret <2 x float> %tmp35
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1
 ; CHECK: ret
 }
@@ -110,7 +110,7 @@ define <2 x float> @test6(float %A){
 define i64 @ISPC0(i64 %in) {
   %out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1> to i64), i64 -1)
   ret i64 %out
-; CHECK: @ISPC0
+; CHECK-LABEL: @ISPC0(
 ; CHECK: ret i64 0
 }
 
@@ -118,14 +118,14 @@ define i64 @ISPC0(i64 %in) {
 define i64 @Vec2(i64 %in) {
   %out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 0> to i64), i64 0)
   ret i64 %out
-; CHECK: @Vec2
+; CHECK-LABEL: @Vec2(
 ; CHECK: ret i64 0
 }
 
 define i64 @All11(i64 %in) {
   %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
   ret i64 %out
-; CHECK: @All11
+; CHECK-LABEL: @All11(
 ; CHECK: ret i64 0
 }
 
@@ -133,7 +133,7 @@ define i64 @All11(i64 %in) {
 define i32 @All111(i32 %in) {
   %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
   ret i32 %out
-; CHECK: @All111
+; CHECK-LABEL: @All111(
 ; CHECK: ret i32 0
 }
 
@@ -141,6 +141,16 @@ define <2 x i16> @BitcastInsert(i32 %a) {
   %v = insertelement <1 x i32> undef, i32 %a, i32 0
   %r = bitcast <1 x i32> %v to <2 x i16>
   ret <2 x i16> %r
-; CHECK: @BitcastInsert
+; CHECK-LABEL: @BitcastInsert(
 ; CHECK: bitcast i32 %a to <2 x i16>
 }
+
+; PR17293
+define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
+  %cast = bitcast <2 x i8*>* %arg to <2 x i64>*
+  %load = load <2 x i64>* %cast, align 16
+  ret <2 x i64> %load
+; CHECK: @test7
+; CHECK: bitcast
+; CHECK: load
+}
diff --git a/test/Transforms/InstCombine/call-cast-target.ll b/test/Transforms/InstCombine/call-cast-target.ll
index 7addc8abc84f..315c51683fd2 100644
--- a/test/Transforms/InstCombine/call-cast-target.ll
+++ b/test/Transforms/InstCombine/call-cast-target.ll
@@ -1,13 +1,14 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep call | not grep bitcast
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
 
 define i32 @main() {
+; CHECK-LABEL: @main
+; CHECK: call i32 bitcast
 entry:
-        %tmp = call i32 bitcast (i8* (i32*)* @ctime to i32 (i32*)*)( i32* null )          ; <i32> [#uses=1]
-        ret i32 %tmp
+  %tmp = call i32 bitcast (i8* (i32*)* @ctime to i32 (i32*)*)( i32* null )          ; <i32> [#uses=1]
+  ret i32 %tmp
 }
 
 declare i8* @ctime(i32*)
diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll
index 96ec420eaa8e..e68c0ad9b208 100644
--- a/test/Transforms/InstCombine/call.ll
+++ b/test/Transforms/InstCombine/call.ll
@@ -1,98 +1,122 @@
 ; Ignore stderr, we expect warnings there
 ; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
 
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Simple case, argument translatable without changing the value
 declare void @test1a(i8*)
 
 define void @test1(i32* %A) {
-        call void bitcast (void (i8*)* @test1a to void (i32*)*)( i32* %A )
-        ret void
+; CHECK-LABEL: @test1(
 ; CHECK: %1 = bitcast i32* %A to i8*
 ; CHECK: call void @test1a(i8* %1)
 ; CHECK: ret void
+  call void bitcast (void (i8*)* @test1a to void (i32*)*)( i32* %A )
+  ret void
 }
 
-; More complex case, translate argument because of resolution.  This is safe 
+
+; Should not do because of change in address space of the parameter
+define void @test1_as1_illegal(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1_illegal(
+; CHECK: call void bitcast
+  call void bitcast (void (i8*)* @test1a to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A)
+  ret void
+}
+
+; Test1, but the argument has a different sized address-space
+declare void @test1a_as1(i8 addrspace(1)*)
+
+; This one is OK to perform
+define void @test1_as1(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: %1 = bitcast i32 addrspace(1)* %A to i8 addrspace(1)*
+; CHECK: call void @test1a_as1(i8 addrspace(1)* %1)
+; CHECK: ret void
+  call void bitcast (void (i8 addrspace(1)*)* @test1a_as1 to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A )
+  ret void
+}
+
+; More complex case, translate argument because of resolution.  This is safe
 ; because we have the body of the function
 define void @test2a(i8 %A) {
-        ret void
+; CHECK-LABEL: @test2a(
 ; CHECK: ret void
+  ret void
 }
 
 define i32 @test2(i32 %A) {
-        call void bitcast (void (i8)* @test2a to void (i32)*)( i32 %A )
-        ret i32 %A
-; CHECK: %1 = trunc i32 %A to i8
-; CHECK: call void @test2a(i8 %1)
+; CHECK-LABEL: @test2(
+; CHECK: call void bitcast
 ; CHECK: ret i32 %A
+  call void bitcast (void (i8)* @test2a to void (i32)*)( i32 %A )
+  ret i32 %A
 }
 
 
-; Resolving this should insert a cast from sbyte to int, following the C 
+; Resolving this should insert a cast from sbyte to int, following the C
 ; promotion rules.
 define void @test3a(i8, ...) {unreachable }
 
 define void @test3(i8 %A, i8 %B) {
-        call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B 
-)
-        ret void
+; CHECK-LABEL: @test3(
 ; CHECK: %1 = zext i8 %B to i32
 ; CHECK: call void (i8, ...)* @test3a(i8 %A, i32 %1)
 ; CHECK: ret void
+  call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B)
+  ret void
 }
 
-
 ; test conversion of return value...
 define i8 @test4a() {
-        ret i8 0
+; CHECK-LABEL: @test4a(
 ; CHECK: ret i8 0
+  ret i8 0
 }
 
 define i32 @test4() {
-        %X = call i32 bitcast (i8 ()* @test4a to i32 ()*)( )            ; <i32> [#uses=1]
-        ret i32 %X
-; CHECK: %X = call i8 @test4a()
-; CHECK: %1 = zext i8 %X to i32
-; CHECK: ret i32 %1
+; CHECK-LABEL: @test4(
+; CHECK: call i32 bitcast
+  %X = call i32 bitcast (i8 ()* @test4a to i32 ()*)( )            ; <i32> [#uses=1]
+  ret i32 %X
 }
 
-
-; test conversion of return value... no value conversion occurs so we can do 
+; test conversion of return value... no value conversion occurs so we can do
 ; this with just a prototype...
 declare i32 @test5a()
 
 define i32 @test5() {
-        %X = call i32 @test5a( )                ; <i32> [#uses=1]
-        ret i32 %X
+; CHECK-LABEL: @test5(
 ; CHECK: %X = call i32 @test5a()
 ; CHECK: ret i32 %X
+  %X = call i32 @test5a( )                ; <i32> [#uses=1]
+  ret i32 %X
 }
 
-
 ; test addition of new arguments...
 declare i32 @test6a(i32)
 
 define i32 @test6() {
-        %X = call i32 bitcast (i32 (i32)* @test6a to i32 ()*)( )
-        ret i32 %X
+; CHECK-LABEL: @test6(
 ; CHECK: %X = call i32 @test6a(i32 0)
 ; CHECK: ret i32 %X
+  %X = call i32 bitcast (i32 (i32)* @test6a to i32 ()*)( )
+  ret i32 %X
 }
 
-
 ; test removal of arguments, only can happen with a function body
 define void @test7a() {
-        ret void
+; CHECK-LABEL: @test7a(
 ; CHECK: ret void
+  ret void
 }
 
 define void @test7() {
-        call void bitcast (void ()* @test7a to void (i32)*)( i32 5 )
-        ret void
+; CHECK-LABEL: @test7(
 ; CHECK: call void @test7a()
 ; CHECK: ret void
+  call void bitcast (void ()* @test7a to void (i32)*)( i32 5 )
+  ret void
 }
 
 
@@ -100,6 +124,11 @@ define void @test7() {
 declare void @test8a()
 
 define i8* @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: invoke void @test8a()
+; Don't turn this into "unreachable": the callee and caller don't agree in
+; calling conv, but the implementation of test8a may actually end up using the
+; right calling conv.
   invoke void @test8a()
           to label %invoke.cont unwind label %try.handler
 
@@ -114,23 +143,136 @@ try.handler:                                      ; preds = %entry
 
 declare i32 @__gxx_personality_v0(...)
 
-; Don't turn this into "unreachable": the callee and caller don't agree in
-; calling conv, but the implementation of test8a may actually end up using the
-; right calling conv.
-; CHECK: @test8() {
-; CHECK-NEXT: invoke void @test8a()
-
 
-
-; Don't turn this into a direct call, because test9x is just a prototype and 
+; Don't turn this into a direct call, because test9x is just a prototype and
 ; doing so will make it varargs.
 ; rdar://9038601
 declare i8* @test9x(i8*, i8*, ...) noredzone
 define i8* @test9(i8* %arg, i8* %tmp3) nounwind ssp noredzone {
+; CHECK-LABEL: @test9
 entry:
   %call = call i8* bitcast (i8* (i8*, i8*, ...)* @test9x to i8* (i8*, i8*)*)(i8* %arg, i8* %tmp3) noredzone
   ret i8* %call
-; CHECK: @test9(
+; CHECK-LABEL: @test9(
 ; CHECK: call i8* bitcast
 }
 
+
+; Parameter that's a vector of pointers
+declare void @test10a(<2 x i8*>)
+
+define void @test10(<2 x i32*> %A) {
+; CHECK-LABEL: @test10(
+; CHECK: %1 = bitcast <2 x i32*> %A to <2 x i8*>
+; CHECK: call void @test10a(<2 x i8*> %1)
+; CHECK: ret void
+  call void bitcast (void (<2 x i8*>)* @test10a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Don't transform because different address spaces
+declare void @test10a_mixed_as(<2 x i8 addrspace(1)*>)
+
+define void @test10_mixed_as(<2 x i8*> %A) {
+; CHECK-LABEL: @test10_mixed_as(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8 addrspace(1)*>)* @test10a_mixed_as to void (<2 x i8*>)*)(<2 x i8*> %A)
+  ret void
+}
+
+; Return type that's a pointer
+define i8* @test11a() {
+  ret i8* zeroinitializer
+}
+
+define i32* @test11() {
+; CHECK-LABEL: @test11(
+; CHECK: %X = call i8* @test11a()
+; CHECK: %1 = bitcast i8* %X to i32*
+  %X = call i32* bitcast (i8* ()* @test11a to i32* ()*)()
+  ret i32* %X
+}
+
+; Return type that's a pointer with a different address space
+define i8 addrspace(1)* @test11a_mixed_as() {
+  ret i8 addrspace(1)* zeroinitializer
+}
+
+define i8* @test11_mixed_as() {
+; CHECK-LABEL: @test11_mixed_as(
+; CHECK: call i8* bitcast
+  %X = call i8* bitcast (i8 addrspace(1)* ()* @test11a_mixed_as to i8* ()*)()
+  ret i8* %X
+}
+
+; Return type that's a vector of pointers
+define <2 x i8*> @test12a() {
+  ret <2 x i8*> zeroinitializer
+}
+
+define <2 x i32*> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK: %X = call <2 x i8*> @test12a()
+; CHECK: %1 = bitcast <2 x i8*> %X to <2 x i32*>
+  %X = call <2 x i32*> bitcast (<2 x i8*> ()* @test12a to <2 x i32*> ()*)()
+  ret <2 x i32*> %X
+}
+
+define <2 x i8 addrspace(1)*> @test12a_mixed_as() {
+  ret <2 x i8 addrspace(1)*> zeroinitializer
+}
+
+define <2 x i8*> @test12_mixed_as() {
+; CHECK-LABEL: @test12_mixed_as(
+; CHECK: call <2 x i8*> bitcast
+  %X = call <2 x i8*> bitcast (<2 x i8 addrspace(1)*> ()* @test12a_mixed_as to <2 x i8*> ()*)()
+  ret <2 x i8*> %X
+}
+
+
+; Mix parameter that's a vector of integers and pointers of the same size
+declare void @test13a(<2 x i64>)
+
+define void @test13(<2 x i32*> %A) {
+; CHECK-LABEL: @test13(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i64>)* @test13a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Mix parameter that's a vector of integers and pointers of the same
+; size, but the other way around
+declare void @test14a(<2 x i8*>)
+
+define void @test14(<2 x i64> %A) {
+; CHECK-LABEL: @test14(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8*>)* @test14a to void (<2 x i64>)*)(<2 x i64> %A)
+  ret void
+}
+
+
+; Return type that's a vector
+define <2 x i16> @test15a() {
+  ret <2 x i16> zeroinitializer
+}
+
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK: %X = call <2 x i16> @test15a()
+; CHECK: %1 = bitcast <2 x i16> %X to i32
+  %X = call i32 bitcast (<2 x i16> ()* @test15a to i32 ()*)( )
+  ret i32 %X
+}
+
+define i32 @test16a() {
+  ret i32 0
+}
+
+define <2 x i16> @test16() {
+; CHECK-LABEL: @test16(
+; CHECK: %X = call i32 @test16a()
+; CHECK: %1 = bitcast i32 %X to <2 x i16>
+  %X = call <2 x i16> bitcast (i32 ()* @test16a to <2 x i16> ()*)( )
+  ret <2 x i16> %X
+}
diff --git a/test/Transforms/InstCombine/canonicalize_branch.ll b/test/Transforms/InstCombine/canonicalize_branch.ll
index 869546d57dcd..b62b143d9d51 100644
--- a/test/Transforms/InstCombine/canonicalize_branch.ll
+++ b/test/Transforms/InstCombine/canonicalize_branch.ll
@@ -5,7 +5,7 @@ define i32 @test0(i32 %X, i32 %Y) {
         %C = icmp eq i32 %X, %Y
         br i1 %C, label %T, label %F, !prof !0
 
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK: %C = icmp eq i32 %X, %Y
 ; CHECK: br i1 %C, label %T, label %F
 
@@ -19,7 +19,7 @@ define i32 @test1(i32 %X, i32 %Y) {
         %C = icmp ne i32 %X, %Y
         br i1 %C, label %T, label %F, !prof !1
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %C = icmp eq i32 %X, %Y
 ; CHECK: br i1 %C, label %F, label %T
 
@@ -33,7 +33,7 @@ define i32 @test2(i32 %X, i32 %Y) {
         %C = icmp ule i32 %X, %Y
         br i1 %C, label %T, label %F, !prof !2
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %C = icmp ugt i32 %X, %Y
 ; CHECK: br i1 %C, label %F, label %T
 
@@ -47,7 +47,7 @@ define i32 @test3(i32 %X, i32 %Y) {
         %C = icmp uge i32 %X, %Y
         br i1 %C, label %T, label %F, !prof !3
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %C = icmp ult i32 %X, %Y
 ; CHECK: br i1 %C, label %F, label %T
 
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index de738bb7c06d..cac0ec109163 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,6 +1,6 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
 
 @inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
 
@@ -326,7 +326,7 @@ define i16 @test39(i16 %a) {
         %tmp.upgrd.32 = or i32 %tmp21, %tmp5
         %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
         ret i16 %tmp.upgrd.3
-; CHECK: @test39
+; CHECK-LABEL: @test39(
 ; CHECK: %tmp.upgrd.32 = call i16 @llvm.bswap.i16(i16 %a)
 ; CHECK: ret i16 %tmp.upgrd.32
 }
@@ -338,7 +338,7 @@ define i16 @test40(i16 %a) {
         %tmp.upgrd.32 = or i32 %tmp21, %tmp5
         %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
         ret i16 %tmp.upgrd.3
-; CHECK: @test40
+; CHECK-LABEL: @test40(
 ; CHECK: %tmp21 = lshr i16 %a, 9
 ; CHECK: %tmp5 = shl i16 %a, 8
 ; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
@@ -350,7 +350,7 @@ define i32* @test41(i32* %tmp1) {
         %tmp64 = bitcast i32* %tmp1 to { i32 }*
         %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0
         ret i32* %tmp65
-; CHECK: @test41
+; CHECK-LABEL: @test41(
 ; CHECK: ret i32* %tmp1
 }
 
@@ -358,7 +358,7 @@ define i32 @test42(i32 %X) {
         %Y = trunc i32 %X to i8         ; <i8> [#uses=1]
         %Z = zext i8 %Y to i32          ; <i32> [#uses=1]
         ret i32 %Z
-; CHECK: @test42
+; CHECK-LABEL: @test42(
 ; CHECK: %Z = and i32 %X, 255
 }
 
@@ -368,7 +368,7 @@ define zeroext i64 @test43(i8 zeroext %on_off) nounwind readonly {
 	%B = add i32 %A, -1
 	%C = sext i32 %B to i64
 	ret i64 %C  ;; Should be (add (zext i8 -> i64), -1)
-; CHECK: @test43
+; CHECK-LABEL: @test43(
 ; CHECK-NEXT: %A = zext i8 %on_off to i64
 ; CHECK-NEXT: %B = add i64 %A, -1
 ; CHECK-NEXT: ret i64 %B
@@ -379,7 +379,7 @@ define i64 @test44(i8 %T) {
  %B = or i16 %A, 1234
  %C = zext i16 %B to i64
  ret i64 %C
-; CHECK: @test44
+; CHECK-LABEL: @test44(
 ; CHECK-NEXT: %A = zext i8 %T to i64
 ; CHECK-NEXT: %B = or i64 %A, 1234
 ; CHECK-NEXT: ret i64 %B
@@ -391,7 +391,7 @@ define i64 @test45(i8 %A, i64 %Q) {
  %C = or i32 %B, %D
  %E = zext i32 %C to i64 
  ret i64 %E
-; CHECK: @test45
+; CHECK-LABEL: @test45(
 ; CHECK-NEXT: %B = sext i8 %A to i64
 ; CHECK-NEXT: %C = or i64 %B, %Q
 ; CHECK-NEXT: %E = and i64 %C, 4294967295
@@ -405,7 +405,7 @@ define i64 @test46(i64 %A) {
  %D = shl i32 %C, 8
  %E = zext i32 %D to i64 
  ret i64 %E
-; CHECK: @test46
+; CHECK-LABEL: @test46(
 ; CHECK-NEXT: %C = shl i64 %A, 8
 ; CHECK-NEXT: %D = and i64 %C, 10752
 ; CHECK-NEXT: ret i64 %D
@@ -416,7 +416,7 @@ define i64 @test47(i8 %A) {
  %C = or i32 %B, 42
  %E = zext i32 %C to i64 
  ret i64 %E
-; CHECK: @test47
+; CHECK-LABEL: @test47(
 ; CHECK-NEXT:   %B = sext i8 %A to i64
 ; CHECK-NEXT: %C = and i64 %B, 4294967253
 ; CHECK-NEXT:  %E = or i64 %C, 42
@@ -430,7 +430,7 @@ define i64 @test48(i8 %A, i8 %a) {
   %D = or i32 %C, %b
   %E = zext i32 %D to i64
   ret i64 %E
-; CHECK: @test48
+; CHECK-LABEL: @test48(
 ; CHECK-NEXT: %b = zext i8 %a to i64
 ; CHECK-NEXT: %B = zext i8 %A to i64
 ; CHECK-NEXT: %C = shl nuw nsw i64 %B, 8
@@ -443,7 +443,7 @@ define i64 @test49(i64 %A) {
  %C = or i32 %B, 1
  %D = sext i32 %C to i64 
  ret i64 %D
-; CHECK: @test49
+; CHECK-LABEL: @test49(
 ; CHECK-NEXT: %C = shl i64 %A, 32
 ; CHECK-NEXT: ashr exact i64 %C, 32
 ; CHECK-NEXT: %D = or i64 {{.*}}, 1
@@ -456,7 +456,7 @@ define i64 @test50(i64 %A) {
   %D = add i32 %B, -1
   %E = sext i32 %D to i64
   ret i64 %E
-; CHECK: @test50
+; CHECK-LABEL: @test50(
 ; lshr+shl will be handled by DAGCombine.
 ; CHECK-NEXT: lshr i64 %A, 2
 ; CHECK-NEXT: shl i64 %a, 32
@@ -472,7 +472,7 @@ define i64 @test51(i64 %A, i1 %cond) {
   %E = select i1 %cond, i32 %C, i32 %D
   %F = sext i32 %E to i64
   ret i64 %F
-; CHECK: @test51
+; CHECK-LABEL: @test51(
 ; CHECK-NEXT: %C = and i64 %A, 4294967294
 ; CHECK-NEXT: %D = or i64 %A, 1
 ; CHECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
@@ -487,7 +487,7 @@ define i32 @test52(i64 %A) {
   %D = and i16 %C, -25350
   %E = zext i16 %D to i32
   ret i32 %E
-; CHECK: @test52
+; CHECK-LABEL: @test52(
 ; CHECK-NEXT: %B = trunc i64 %A to i32
 ; CHECK-NEXT: %C = and i32 %B, 7224
 ; CHECK-NEXT: %D = or i32 %C, 32962
@@ -500,7 +500,7 @@ define i64 @test53(i32 %A) {
   %D = and i16 %C, -25350
   %E = zext i16 %D to i64
   ret i64 %E
-; CHECK: @test53
+; CHECK-LABEL: @test53(
 ; CHECK-NEXT: %B = zext i32 %A to i64
 ; CHECK-NEXT: %C = and i64 %B, 7224
 ; CHECK-NEXT: %D = or i64 %C, 32962
@@ -513,7 +513,7 @@ define i32 @test54(i64 %A) {
   %D = and i16 %C, -25350
   %E = sext i16 %D to i32
   ret i32 %E
-; CHECK: @test54
+; CHECK-LABEL: @test54(
 ; CHECK-NEXT: %B = trunc i64 %A to i32
 ; CHECK-NEXT: %C = and i32 %B, 7224
 ; CHECK-NEXT: %D = or i32 %C, -32574
@@ -526,7 +526,7 @@ define i64 @test55(i32 %A) {
   %D = and i16 %C, -25350
   %E = sext i16 %D to i64
   ret i64 %E
-; CHECK: @test55
+; CHECK-LABEL: @test55(
 ; CHECK-NEXT: %B = zext i32 %A to i64
 ; CHECK-NEXT: %C = and i64 %B, 7224
 ; CHECK-NEXT: %D = or i64 %C, -32574
@@ -538,7 +538,7 @@ define i64 @test56(i16 %A) nounwind {
   %tmp354 = lshr i32 %tmp353, 5
   %tmp355 = zext i32 %tmp354 to i64
   ret i64 %tmp355
-; CHECK: @test56
+; CHECK-LABEL: @test56(
 ; CHECK-NEXT: %tmp353 = sext i16 %A to i64
 ; CHECK-NEXT: %tmp354 = lshr i64 %tmp353, 5
 ; CHECK-NEXT: %tmp355 = and i64 %tmp354, 134217727
@@ -550,7 +550,7 @@ define i64 @test57(i64 %A) nounwind {
  %C = lshr i32 %B, 8
  %E = zext i32 %C to i64
  ret i64 %E
-; CHECK: @test57
+; CHECK-LABEL: @test57(
 ; CHECK-NEXT: %C = lshr i64 %A, 8 
 ; CHECK-NEXT: %E = and i64 %C, 16777215
 ; CHECK-NEXT: ret i64 %E
@@ -563,7 +563,7 @@ define i64 @test58(i64 %A) nounwind {
  %E = zext i32 %D to i64
  ret i64 %E
  
-; CHECK: @test58
+; CHECK-LABEL: @test58(
 ; CHECK-NEXT:   %C = lshr i64 %A, 8
 ; CHECK-NEXT:   %D = and i64 %C, 16777087
 ; CHECK-NEXT:   %E = or i64 %D, 128
@@ -579,7 +579,7 @@ define i64 @test59(i8 %A, i8 %B) nounwind {
   %H = or i32 %G, %E
   %I = zext i32 %H to i64
   ret i64 %I
-; CHECK: @test59
+; CHECK-LABEL: @test59(
 ; CHECK-NEXT:   %C = zext i8 %A to i64
 ; CHECK-NOT: i32
 ; CHECK:   %F = zext i8 %B to i64
@@ -593,7 +593,7 @@ define <3 x i32> @test60(<4 x i32> %call4) nounwind {
   %tmp10 = bitcast i96 %tmp9 to <3 x i32>
   ret <3 x i32> %tmp10
   
-; CHECK: @test60
+; CHECK-LABEL: @test60(
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
 }
@@ -603,7 +603,7 @@ define <4 x i32> @test61(<3 x i32> %call4) nounwind {
   %tmp9 = zext i96 %tmp11 to i128
   %tmp10 = bitcast i128 %tmp9 to <4 x i32>
   ret <4 x i32> %tmp10
-; CHECK: @test61
+; CHECK-LABEL: @test61(
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
 }
@@ -613,7 +613,7 @@ define <4 x i32> @test62(<3 x float> %call4) nounwind {
   %tmp9 = zext i96 %tmp11 to i128
   %tmp10 = bitcast i128 %tmp9 to <4 x i32>
   ret <4 x i32> %tmp10
-; CHECK: @test62
+; CHECK-LABEL: @test62(
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
@@ -625,7 +625,7 @@ entry:
   %a = bitcast i64 %tmp8 to <2 x i32>           
   %vcvt.i = uitofp <2 x i32> %a to <2 x float>  
   ret <2 x float> %vcvt.i
-; CHECK: @test63
+; CHECK-LABEL: @test63(
 ; CHECK: bitcast
 ; CHECK: uitofp
 }
@@ -634,7 +634,7 @@ define <4 x float> @test64(<4 x float> %c) nounwind {
   %t0 = bitcast <4 x float> %c to <4 x i32>
   %t1 = bitcast <4 x i32> %t0 to <4 x float>
   ret <4 x float> %t1
-; CHECK: @test64
+; CHECK-LABEL: @test64(
 ; CHECK-NEXT: ret <4 x float> %c
 }
 
@@ -642,7 +642,7 @@ define <4 x float> @test65(<4 x float> %c) nounwind {
   %t0 = bitcast <4 x float> %c to <2 x double>
   %t1 = bitcast <2 x double> %t0 to <4 x float>
   ret <4 x float> %t1
-; CHECK: @test65
+; CHECK-LABEL: @test65(
 ; CHECK-NEXT: ret <4 x float> %c
 }
 
@@ -650,13 +650,13 @@ define <2 x float> @test66(<2 x float> %c) nounwind {
   %t0 = bitcast <2 x float> %c to double
   %t1 = bitcast double %t0 to <2 x float>
   ret <2 x float> %t1
-; CHECK: @test66
+; CHECK-LABEL: @test66(
 ; CHECK-NEXT: ret <2 x float> %c
 }
 
 define float @test2c() {
   ret float extractelement (<2 x float> bitcast (double bitcast (<2 x float> <float -1.000000e+00, float -1.000000e+00> to double) to <2 x float>), i32 0)
-; CHECK: @test2c
+; CHECK-LABEL: @test2c(
 ; CHECK-NOT: extractelement
 }
 
@@ -665,7 +665,7 @@ define i64 @test_mmx(<2 x i32> %c) nounwind {
   %B = bitcast x86_mmx %A to <2 x i32>
   %C = bitcast <2 x i32> %B to i64
   ret i64 %C
-; CHECK: @test_mmx
+; CHECK-LABEL: @test_mmx(
 ; CHECK-NOT: x86_mmx
 }
 
@@ -674,7 +674,7 @@ define i64 @test_mmx_const(<2 x i32> %c) nounwind {
   %B = bitcast x86_mmx %A to <2 x i32>
   %C = bitcast <2 x i32> %B to i64
   ret i64 %C
-; CHECK: @test_mmx_const
+; CHECK-LABEL: @test_mmx_const(
 ; CHECK-NOT: x86_mmx
 }
 
@@ -689,14 +689,14 @@ define i1 @test67(i1 %a, i32 %b) {
   %trunc = trunc i32 %conv.i.i to i8
   %tobool.i = icmp eq i8 %trunc, 0
   ret i1 %tobool.i
-; CHECK: @test67
+; CHECK-LABEL: @test67(
 ; CHECK: ret i1 false
 }
 
 %s = type { i32, i32, i32 }
 
 define %s @test68(%s *%p, i64 %i) {
-; CHECK: @test68
+; CHECK-LABEL: @test68(
   %o = mul i64 %i, 12
   %q = bitcast %s* %p to i8*
   %pp = getelementptr inbounds i8* %q, i64 %o
@@ -708,8 +708,21 @@ define %s @test68(%s *%p, i64 %i) {
 ; CHECK-NEXT: ret %s
 }
 
+define %s @test68_as1(%s addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test68_as1(
+  %o = mul i32 %i, 12
+  %q = bitcast %s addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr inbounds i8 addrspace(1)* %q, i32 %o
+; CHECK-NEXT: getelementptr %s addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to %s addrspace(1)*
+  %l = load %s addrspace(1)* %r
+; CHECK-NEXT: load %s addrspace(1)*
+  ret %s %l
+; CHECK-NEXT: ret %s
+}
+
 define double @test69(double *%p, i64 %i) {
-; CHECK: @test69
+; CHECK-LABEL: @test69(
   %o = shl nsw i64 %i, 3
   %q = bitcast double* %p to i8*
   %pp = getelementptr inbounds i8* %q, i64 %o
@@ -722,7 +735,7 @@ define double @test69(double *%p, i64 %i) {
 }
 
 define %s @test70(%s *%p, i64 %i) {
-; CHECK: @test70
+; CHECK-LABEL: @test70(
   %o = mul nsw i64 %i, 36
 ; CHECK-NEXT: mul nsw i64 %i, 3
   %q = bitcast %s* %p to i8*
@@ -736,7 +749,7 @@ define %s @test70(%s *%p, i64 %i) {
 }
 
 define double @test71(double *%p, i64 %i) {
-; CHECK: @test71
+; CHECK-LABEL: @test71(
   %o = shl i64 %i, 5
 ; CHECK-NEXT: shl i64 %i, 2
   %q = bitcast double* %p to i8*
@@ -750,7 +763,7 @@ define double @test71(double *%p, i64 %i) {
 }
 
 define double @test72(double *%p, i32 %i) {
-; CHECK: @test72
+; CHECK-LABEL: @test72(
   %so = mul nsw i32 %i, 8
   %o = sext i32 %so to i64
 ; CHECK-NEXT: sext i32 %i to i64
@@ -765,7 +778,7 @@ define double @test72(double *%p, i32 %i) {
 }
 
 define double @test73(double *%p, i128 %i) {
-; CHECK: @test73
+; CHECK-LABEL: @test73(
   %lo = mul nsw i128 %i, 8
   %o = trunc i128 %lo to i64
 ; CHECK-NEXT: trunc i128 %i to i64
@@ -780,7 +793,7 @@ define double @test73(double *%p, i128 %i) {
 }
 
 define double @test74(double *%p, i64 %i) {
-; CHECK: @test74
+; CHECK-LABEL: @test74(
   %q = bitcast double* %p to i64*
   %pp = getelementptr inbounds i64* %q, i64 %i
 ; CHECK-NEXT: getelementptr inbounds double*
@@ -792,7 +805,7 @@ define double @test74(double *%p, i64 %i) {
 }
 
 define i32* @test75(i32* %p, i32 %x) {
-; CHECK: @test75
+; CHECK-LABEL: @test75(
   %y = shl i32 %x, 3
 ; CHECK-NEXT: shl i32 %x, 3
   %z = sext i32 %y to i64
@@ -804,7 +817,7 @@ define i32* @test75(i32* %p, i32 %x) {
 }
 
 define %s @test76(%s *%p, i64 %i, i64 %j) {
-; CHECK: @test76
+; CHECK-LABEL: @test76(
   %o = mul i64 %i, 12
   %o2 = mul nsw i64 %o, %j
 ; CHECK-NEXT: %o2 = mul i64 %i, %j
@@ -819,7 +832,7 @@ define %s @test76(%s *%p, i64 %i, i64 %j) {
 }
 
 define %s @test77(%s *%p, i64 %i, i64 %j) {
-; CHECK: @test77
+; CHECK-LABEL: @test77(
   %o = mul nsw i64 %i, 36
   %o2 = mul nsw i64 %o, %j
 ; CHECK-NEXT: %o = mul nsw i64 %i, 3
@@ -835,7 +848,7 @@ define %s @test77(%s *%p, i64 %i, i64 %j) {
 }
 
 define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) {
-; CHECK: @test78
+; CHECK-LABEL: @test78(
   %a = mul nsw i32 %k, 36
 ; CHECK-NEXT: mul nsw i32 %k, 3
   %b = mul nsw i32 %a, %l
@@ -863,7 +876,7 @@ define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) {
 }
 
 define %s @test79(%s *%p, i64 %i, i32 %j) {
-; CHECK: @test79
+; CHECK-LABEL: @test79(
   %a = mul nsw i64 %i, 36
 ; CHECK: mul nsw i64 %i, 36
   %b = trunc i64 %a to i32
@@ -877,7 +890,7 @@ define %s @test79(%s *%p, i64 %i, i32 %j) {
 }
 
 define double @test80([100 x double]* %p, i32 %i) {
-; CHECK: @test80
+; CHECK-LABEL: @test80(
   %tmp = mul nsw i32 %i, 8
 ; CHECK-NEXT: sext i32 %i to i64
   %q = bitcast [100 x double]* %p to i8*
@@ -890,6 +903,20 @@ define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-NEXT: ret double
 }
 
+define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
+; CHECK-LABEL: @test80_as1(
+  %tmp = mul nsw i16 %i, 8
+; CHECK-NEXT: sext i16 %i to i32
+  %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr i8 addrspace(1)* %q, i16 %tmp
+; CHECK-NEXT: getelementptr [100 x double] addrspace(1)*
+  %r = bitcast i8 addrspace(1)* %pp to double addrspace(1)*
+  %l = load double addrspace(1)* %r
+; CHECK-NEXT: load double addrspace(1)*
+  ret double %l
+; CHECK-NEXT: ret double
+}
+
 define double @test81(double *%p, float %f) {
   %i = fptosi float %f to i64
   %q = bitcast double* %p to i8*
@@ -898,3 +925,31 @@ define double @test81(double *%p, float %f) {
   %l = load double* %r
   ret double %l
 }
+
+define i64 @test82(i64 %A) nounwind {
+  %B = trunc i64 %A to i32
+  %C = lshr i32 %B, 8
+  %D = shl i32 %C, 9
+  %E = zext i32 %D to i64
+  ret i64 %E
+
+; CHECK-LABEL: @test82(
+; CHECK-NEXT:   [[REG:%[0-9]*]] = shl i64 %A, 1
+; CHECK-NEXT:   %E = and i64 [[REG]], 4294966784
+; CHECK-NEXT:   ret i64 %E
+}
+
+; PR15959
+define i64 @test83(i16 %a, i64 %k) {
+  %conv = sext i16 %a to i32
+  %sub = add nsw i64 %k, -1
+  %sh_prom = trunc i64 %sub to i32
+  %shl = shl i32 %conv, %sh_prom
+  %sh_prom1 = zext i32 %shl to i64
+  ret i64 %sh_prom1
+
+; CHECK-LABEL: @test83(
+; CHECK: %sub = add nsw i64 %k, 4294967295
+; CHECK: %sh_prom = trunc i64 %sub to i32
+; CHECK: %shl = shl i32 %conv, %sh_prom
+}
diff --git a/test/Transforms/InstCombine/cast_ptr.ll b/test/Transforms/InstCombine/cast_ptr.ll
index 09910fbc8481..23006a84604b 100644
--- a/test/Transforms/InstCombine/cast_ptr.ll
+++ b/test/Transforms/InstCombine/cast_ptr.ll
@@ -1,12 +1,12 @@
 ; Tests to make sure elimination of casts is working correctly
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:32:32-p2:16:16"
 
 ; This shouldn't convert to getelementptr because the relationship
 ; between the arithmetic and the layout of allocated memory is
 ; entirely unknown.
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ptrtoint
 ; CHECK: add
 ; CHECK: inttoptr
@@ -18,7 +18,7 @@ define i8* @test1(i8* %t) {
 }
 
 ; These casts should be folded away.
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: icmp eq i8* %a, %b
 define i1 @test2(i8* %a, i8* %b) {
         %tmpa = ptrtoint i8* %a to i32          ; <i32> [#uses=1]
@@ -27,8 +27,28 @@ define i1 @test2(i8* %a, i8* %b) {
         ret i1 %r
 }
 
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_same_int(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_same_int(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i16
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i16
+  %r = icmp eq i16 %tmpa, %tmpb
+  ret i1 %r
+}
+
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_larger(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_larger(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i32
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i32
+  %r = icmp eq i32 %tmpa, %tmpb
+  ret i1 %r
+}
+
 ; These casts should also be folded away.
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: icmp eq i8* %a, @global
 @global = global i8 0
 define i1 @test3(i8* %a) {
@@ -41,13 +61,22 @@ define i1 @test4(i32 %A) {
   %B = inttoptr i32 %A to i8*
   %C = icmp eq i8* %B, null
   ret i1 %C
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: %C = icmp eq i32 %A, 0
-; CHECK-NEXT: ret i1 %C 
+; CHECK-NEXT: ret i1 %C
 }
 
+define i1 @test4_as2(i16 %A) {
+; CHECK-LABEL: @test4_as2(
+; CHECK-NEXT: %C = icmp eq i16 %A, 0
+; CHECK-NEXT: ret i1 %C
+  %B = inttoptr i16 %A to i8 addrspace(2)*
+  %C = icmp eq i8 addrspace(2)* %B, null
+  ret i1 %C
+}
 
-; Pulling the cast out of the load allows us to eliminate the load, and then 
+
+; Pulling the cast out of the load allows us to eliminate the load, and then
 ; the whole array.
 
         %op = type { float }
@@ -60,7 +89,7 @@ define %unop* @test5(%op* %O) {
         %tmp = load %unop* (%op*)** bitcast ([1 x %op* (%op*)*]* @Array to %unop* (%op*)**); <%unop* (%op*)*> [#uses=1]
         %tmp.2 = call %unop* %tmp( %op* %O )            ; <%unop*> [#uses=1]
         ret %unop* %tmp.2
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: call %op* @foo(%op* %O)
 }
 
@@ -69,11 +98,11 @@ define %unop* @test5(%op* %O) {
 ; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
 ; the address space.
 
-define i8 @test6(i8 addrspace(1)* %source) {                                                                                        
-entry: 
-  %arrayidx223 = bitcast i8 addrspace(1)* %source to i8*
+define i8 @test6(i8 addrspace(1)* %source) {
+entry:
+  %arrayidx223 = addrspacecast i8 addrspace(1)* %source to i8*
   %tmp4 = load i8* %arrayidx223
   ret i8 %tmp4
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: load i8* %arrayidx223
-} 
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index 72db66e3ab0f..62cd5b3f94d5 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -22,11 +22,11 @@
 ;}
 
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 entry:
-; CHECK: xor i32 %a, %b
-; CHECK: lshr i32 %0, 31
-; CHECK: xor i32 %1, 1
+; CHECK: [[XOR1:%.*]] = xor i32 %a, %b
+; CHECK: [[SHIFT:%.*]] = lshr i32 [[XOR1]], 31
+; CHECK: [[XOR2:%.*]] = xor i32 [[SHIFT]], 1
         %0 = lshr i32 %a, 31            ; <i32> [#uses=1]
         %1 = lshr i32 %b, 31            ; <i32> [#uses=1]
         %2 = icmp eq i32 %0, %1         ; <i1> [#uses=1]
@@ -34,13 +34,13 @@ entry:
         ret i32 %3
 ; CHECK-NOT: icmp
 ; CHECK-NOT: zext
-; CHECK: ret i32 %2
+; CHECK: ret i32 [[XOR2]]
 }
 
 ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
 ; is one, not zero.
 define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
-; CHECK: @test3i
+; CHECK-LABEL: @test3i(
 entry:
 ; CHECK: xor i32 %a, %b
 ; CHECK: lshr i32 %0, 31
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
new file mode 100644
index 000000000000..9f21d5419b72
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -0,0 +1,232 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+@g = addrspace(3) global i32 89
+
+@const_zero_i8_as1 = addrspace(1) constant i8 0
+@const_zero_i32_as1 = addrspace(1) constant i32 0
+
+@const_zero_i8_as2 = addrspace(2) constant i8 0
+@const_zero_i32_as2 = addrspace(2) constant i32 0
+
+@const_zero_i8_as3 = addrspace(3) constant i8 0
+@const_zero_i32_as3 = addrspace(3) constant i32 0
+
+; Test constant folding of inttoptr (ptrtoint constantexpr)
+; The intermediate integer size is the same as the pointer size
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_same_size() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_same_size(
+; CHECK-NEXT: ret i32 addrspace(3)* @const_zero_i32_as3
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+; The intermediate integer size is larger than the pointer size
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller(
+; CHECK-NEXT: ret i32 addrspace(2)* @const_zero_i32_as2
+  %x = ptrtoint i32 addrspace(2)* @const_zero_i32_as2 to i16
+  %y = inttoptr i16 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; Different address spaces that are the same size, but they are
+; different so nothing should happen
+define i32 addrspace(4)* @test_constant_fold_inttoptr_as_pointer_smaller_different_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_as(
+; CHECK-NEXT: ret i32 addrspace(4)* inttoptr (i16 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i16) to i32 addrspace(4)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i16
+  %y = inttoptr i16 %x to i32 addrspace(4)*
+  ret i32 addrspace(4)* %y
+}
+
+; Make sure we don't introduce a bitcast between different sized
+; address spaces when folding this
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as(
+; CHECK-NEXT: ret i32 addrspace(2)* inttoptr (i32 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i32) to i32 addrspace(2)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; The intermediate integer size is too small, nothing should happen
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_larger() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_larger(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i8 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i8) to i32 addrspace(3)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i8
+  %y = inttoptr i8 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+define i8 @const_fold_ptrtoint() {
+; CHECK-LABEL: @const_fold_ptrtoint(
+; CHECK-NEXT: ret i8 4
+  ret i8 ptrtoint (i32 addrspace(2)* inttoptr (i4 4 to i32 addrspace(2)*) to i8)
+}
+
+; Test that mask happens when the destination pointer is smaller than
+; the original
+define i8 @const_fold_ptrtoint_mask() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask(
+; CHECK-NEXT: ret i8 1
+  ret i8 ptrtoint (i32 addrspace(3)* inttoptr (i32 257 to i32 addrspace(3)*) to i8)
+}
+
+; Address space 0 is too small for the correct mask, should mask with
+; 64-bits instead of 32
+define i64 @const_fold_ptrtoint_mask_small_as0() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask_small_as0(
+; CHECK: ret i64 -1
+  ret i64 ptrtoint (i32 addrspace(1)* inttoptr (i128 -1 to i32 addrspace(1)*) to i64)
+}
+
+define i32 addrspace(3)* @const_inttoptr() {
+; CHECK-LABEL: @const_inttoptr(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i16 4 to i32 addrspace(3)*)
+  %p = inttoptr i16 4 to i32 addrspace(3)*
+  ret i32 addrspace(3)* %p
+}
+
+define i16 @const_ptrtoint() {
+; CHECK-LABEL: @const_ptrtoint(
+; CHECK-NEXT: ret i16 ptrtoint (i32 addrspace(3)* @g to i16)
+  %i = ptrtoint i32 addrspace(3)* @g to i16
+  ret i16 %i
+}
+
+define i16 @const_inttoptr_ptrtoint() {
+; CHECK-LABEL: @const_inttoptr_ptrtoint(
+; CHECK-NEXT: ret i16 9
+  ret i16 ptrtoint (i32 addrspace(3)* inttoptr (i16 9 to i32 addrspace(3)*) to i16)
+}
+
+define i1 @constant_fold_cmp_constantexpr_inttoptr() {
+; CHECK-LABEL: @constant_fold_cmp_constantexpr_inttoptr(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 0 to i32 addrspace(3)*), null
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr_null(i16 %i) {
+; CHECK-LABEL: @constant_fold_inttoptr_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 0 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* null to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null_2() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null_2(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* null to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint() {
+; CHECK-LABEL: @constant_fold_ptrtoint(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr() {
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 27 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+@g_float_as3 = addrspace(3) global float zeroinitializer
+@g_v4f_as3 = addrspace(3) global <4 x float> zeroinitializer
+
+define float @constant_fold_bitcast_ftoi_load() {
+; CHECK-LABEL: @constant_fold_bitcast_ftoi_load(
+; CHECK: load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  %a = load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  ret float %a
+}
+
+define i32 @constant_fold_bitcast_itof_load() {
+; CHECK-LABEL: @constant_fold_bitcast_itof_load(
+; CHECK: load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  %a = load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  ret i32 %a
+}
+
+define <4 x i32> @constant_fold_bitcast_vector_as() {
+; CHECK-LABEL: @constant_fold_bitcast_vector_as(
+; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
+; CHECK: bitcast <4 x float> %1 to <4 x i32>
+  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
+  ret <4 x i32> %a
+}
+
+@i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
+
+define i32 @test_cast_gep_small_indices_as() {
+; CHECK-LABEL: @test_cast_gep_small_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i7 0, i7 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+%struct.foo = type { float, float, [4 x i32], i32 addrspace(3)* }
+
+@constant_fold_global_ptr = addrspace(3) global %struct.foo {
+  float 0.0,
+  float 0.0,
+  [4 x i32] zeroinitializer,
+  i32 addrspace(3)* getelementptr ([10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0)
+}
+
+define i32 @test_cast_gep_large_indices_as() {
+; CHECK-LABEL: @test_cast_gep_large_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0
+   %x = load i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+define i32 @test_constant_cast_gep_struct_indices_as() {
+; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
+; CHECK: load i32 addrspace(3)* getelementptr inbounds (%struct.foo addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+  %x = getelementptr %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@constant_data_as3 = addrspace(3) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]
+
+define i32 @test_read_data_from_global_as3() {
+; CHECK-LABEL: @test_read_data_from_global_as3(
+; CHECK-NEXT: ret i32 2
+  %x = getelementptr [5 x i32] addrspace(3)* @constant_data_as3, i32 0, i32 1
+  %y = load i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@a = addrspace(1) constant i32 9
+@b = addrspace(1) constant i32 23
+@c = addrspace(1) constant i32 34
+@d = addrspace(1) constant i32 99
+
+@ptr_array = addrspace(2) constant [4 x i32 addrspace(1)*] [ i32 addrspace(1)* @a, i32 addrspace(1)* @b, i32 addrspace(1)* @c, i32 addrspace(1)* @d]
+@indirect = addrspace(0) constant i32 addrspace(1)* addrspace(2)* getelementptr inbounds ([4 x i32 addrspace(1)*] addrspace(2)* @ptr_array, i1 0, i32 2)
+
+define i32 @constant_through_array_as_ptrs() {
+; CHECK-LABEL: @constant_through_array_as_ptrs(
+; CHECK-NEXT: ret i32 34
+  %p = load i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
+  %a = load i32 addrspace(1)* addrspace(2)* %p, align 4
+  %b = load i32 addrspace(1)* %a, align 4
+  ret i32 %b
+}
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index e5b16ea0ffdc..5fb56023a4f6 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target datalayout = "E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
 ; Constant folding should fix notionally out-of-bounds indices
 ; and add inbounds keywords.
@@ -56,7 +56,7 @@ define void @frob() {
 
 
 ; PR8883 - Constant fold exotic gep subtract
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 @X = global [1000 x i8] zeroinitializer, align 16
 
 define i64 @test2() {
@@ -72,3 +72,21 @@ entry:
   ret i64 %E
   ; CHECK: ret i64 1000
 }
+
+@X_as1 = addrspace(1) global [1000 x i8] zeroinitializer, align 16
+
+define i16 @test2_as1() {
+; CHECK-LABEL: @test2_as1(
+  ; CHECK: ret i16 1000
+
+entry:
+  %A = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 1, i64 0) to i8 addrspace(1)*
+  %B = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8] addrspace(1)* @X_as1, i64 0, i64 0) to i8 addrspace(1)*
+
+  %B2 = ptrtoint i8 addrspace(1)* %B to i16
+  %C = sub i16 0, %B2
+  %D = getelementptr i8 addrspace(1)* %A, i16 %C
+  %E = ptrtoint i8 addrspace(1)* %D to i16
+
+  ret i16 %E
+}
diff --git a/test/Transforms/InstCombine/cos-1.ll b/test/Transforms/InstCombine/cos-1.ll
index b92e448abd9f..c2e9a0db4053 100644
--- a/test/Transforms/InstCombine/cos-1.ll
+++ b/test/Transforms/InstCombine/cos-1.ll
@@ -10,7 +10,7 @@ declare double @cos(double)
 ; Check cos(-x) -> cos(x);
 
 define double @test_simplify1(double %d) {
-; NO-FLOAT-SHRINK: @test_simplify1
+; NO-FLOAT-SHRINK-LABEL: @test_simplify1(
   %neg = fsub double -0.000000e+00, %d
   %cos = call double @cos(double %neg)
 ; NO-FLOAT-SHRINK: call double @cos(double %d)
@@ -18,7 +18,7 @@ define double @test_simplify1(double %d) {
 }
 
 define float @test_simplify2(float %f) {
-; DO-FLOAT-SHRINK: @test_simplify2
+; DO-FLOAT-SHRINK-LABEL: @test_simplify2(
   %conv1 = fpext float %f to double
   %neg = fsub double -0.000000e+00, %conv1
   %cos = call double @cos(double %neg)
@@ -28,7 +28,7 @@ define float @test_simplify2(float %f) {
 }
 
 define float @test_simplify3(float %f) {
-; NO-FLOAT-SHRINK: @test_simplify3
+; NO-FLOAT-SHRINK-LABEL: @test_simplify3(
   %conv1 = fpext float %f to double
   %neg = fsub double -0.000000e+00, %conv1
   %cos = call double @cos(double %neg)
diff --git a/test/Transforms/InstCombine/cos-2.ll b/test/Transforms/InstCombine/cos-2.ll
index 2f2dfafe484d..c9a9c7c07712 100644
--- a/test/Transforms/InstCombine/cos-2.ll
+++ b/test/Transforms/InstCombine/cos-2.ll
@@ -9,7 +9,7 @@ declare float @cos(double)
 ; Check that cos functions with the wrong prototype aren't simplified.
 
 define float @test_no_simplify1(double %d) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %neg = fsub double -0.000000e+00, %d
   %cos = call float @cos(double %neg)
 ; CHECK: call float @cos(double %neg)
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 084efdc989f9..2e3785fe597e 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -11,14 +11,18 @@ define void @foo() nounwind ssp {
 
 declare i32 @printf(i8*, ...)
 
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"m.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"m.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !0, i32 4, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 589835, metadata !8, metadata !0, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 6, i32 1, metadata !6, null}
-
+!8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
+!9 = metadata !{metadata !0}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index a9e3de3b3f7b..75082dcae055 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -2,7 +2,7 @@
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 declare i8* @foo(i8*, i32, i64, i64) nounwind
 
@@ -23,34 +23,37 @@ entry:
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
   %tmp3 = load i8** %__dest.addr, align 8, !dbg !21
-  %0 = call i64 @llvm.objectsize.i64(i8* %tmp3, i1 false), !dbg !21
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp3, i1 false), !dbg !21
   %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
   ret i8* %call, !dbg !21
 }
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
 !2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 12, metadata !26, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, metadata !24, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 786447, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 786447, null, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786468, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786468, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !9 = metadata !{i32 786689, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 589846, metadata !3, metadata !"size_t", metadata !2, i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
-!11 = metadata !{i32 589846, metadata !3, metadata !"__darwin_size_t", metadata !2, i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786468, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"size_t", i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
+!11 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"__darwin_size_t", i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 786468, null, metadata !3, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 78, i32 28, metadata !1, null}
 !18 = metadata !{i32 78, i32 40, metadata !1, null}
 !20 = metadata !{i32 78, i32 54, metadata !1, null}
 !21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 786443, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !27, metadata !23, i32 80, i32 3, i32 7} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !27, metadata !1, i32 79, i32 1, i32 6} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{metadata !1}
 !25 = metadata !{metadata !0, metadata !7, metadata !9}
 !26 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
 !27 = metadata !{metadata !"string.h", metadata !"Game"}
 !28 = metadata !{metadata !"bits.c", metadata !"Game"}
+!29 = metadata !{i32 0}
+!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/InstCombine/disable-simplify-libcalls.ll b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
index c2c29368b1a8..665278861791 100644
--- a/test/Transforms/InstCombine/disable-simplify-libcalls.ll
+++ b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
@@ -51,42 +51,42 @@ declare i32 @printf(i8*)
 declare i32 @sprintf(i8*, i8*)
 
 define double @t1(double %x) {
-; CHECK: @t1
+; CHECK-LABEL: @t1(
   %ret = call double @ceil(double %x)
   ret double %ret
 ; CHECK: call double @ceil
 }
 
 define double @t2(double %x, double %y) {
-; CHECK: @t2
+; CHECK-LABEL: @t2(
   %ret = call double @copysign(double %x, double %y)
   ret double %ret
 ; CHECK: call double @copysign
 }
 
 define double @t3(double %x) {
-; CHECK: @t3
+; CHECK-LABEL: @t3(
   %call = call double @cos(double %x)
   ret double %call
 ; CHECK: call double @cos
 }
 
 define double @t4(double %x) {
-; CHECK: @t4
+; CHECK-LABEL: @t4(
   %ret = call double @fabs(double %x)
   ret double %ret
 ; CHECK: call double @fabs
 }
 
 define double @t5(double %x) {
-; CHECK: @t5
+; CHECK-LABEL: @t5(
   %ret = call double @floor(double %x)
   ret double %ret
 ; CHECK: call double @floor
 }
 
 define i8* @t6(i8* %x) {
-; CHECK: @t6
+; CHECK-LABEL: @t6(
   %empty = getelementptr [1 x i8]* @empty, i32 0, i32 0
   %ret = call i8* @strcat(i8* %x, i8* %empty)
   ret i8* %ret
@@ -94,7 +94,7 @@ define i8* @t6(i8* %x) {
 }
 
 define i8* @t7(i8* %x) {
-; CHECK: @t7
+; CHECK-LABEL: @t7(
   %empty = getelementptr [1 x i8]* @empty, i32 0, i32 0
   %ret = call i8* @strncat(i8* %x, i8* %empty, i32 1)
   ret i8* %ret
@@ -102,7 +102,7 @@ define i8* @t7(i8* %x) {
 }
 
 define i8* @t8() {
-; CHECK: @t8
+; CHECK-LABEL: @t8(
   %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
   %ret = call i8* @strchr(i8* %x, i32 119)
   ret i8* %ret
@@ -110,7 +110,7 @@ define i8* @t8() {
 }
 
 define i8* @t9() {
-; CHECK: @t9
+; CHECK-LABEL: @t9(
   %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
   %ret = call i8* @strrchr(i8* %x, i32 119)
   ret i8* %ret
@@ -118,7 +118,7 @@ define i8* @t9() {
 }
 
 define i32 @t10() {
-; CHECK: @t10
+; CHECK-LABEL: @t10(
   %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
   %y = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0
   %ret = call i32 @strcmp(i8* %x, i8* %y)
@@ -127,7 +127,7 @@ define i32 @t10() {
 }
 
 define i32 @t11() {
-; CHECK: @t11
+; CHECK-LABEL: @t11(
   %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
   %y = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0
   %ret = call i32 @strncmp(i8* %x, i8* %y, i64 3)
@@ -136,7 +136,7 @@ define i32 @t11() {
 }
 
 define i8* @t12(i8* %x) {
-; CHECK: @t12
+; CHECK-LABEL: @t12(
   %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
   %ret = call i8* @strcpy(i8* %x, i8* %y)
   ret i8* %ret
@@ -144,7 +144,7 @@ define i8* @t12(i8* %x) {
 }
 
 define i8* @t13(i8* %x) {
-; CHECK: @t13
+; CHECK-LABEL: @t13(
   %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
   %ret = call i8* @stpcpy(i8* %x, i8* %y)
   ret i8* %ret
@@ -152,7 +152,7 @@ define i8* @t13(i8* %x) {
 }
 
 define i8* @t14(i8* %x) {
-; CHECK: @t14
+; CHECK-LABEL: @t14(
   %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
   %ret = call i8* @strncpy(i8* %x, i8* %y, i64 3)
   ret i8* %ret
@@ -160,7 +160,7 @@ define i8* @t14(i8* %x) {
 }
 
 define i64 @t15() {
-; CHECK: @t15
+; CHECK-LABEL: @t15(
   %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
   %ret = call i64 @strlen(i8* %x)
   ret i64 %ret
@@ -168,7 +168,7 @@ define i64 @t15() {
 }
 
 define i8* @t16(i8* %x) {
-; CHECK: @t16
+; CHECK-LABEL: @t16(
   %y = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0
   %ret = call i8* @strpbrk(i8* %x, i8* %y)
   ret i8* %ret
@@ -176,7 +176,7 @@ define i8* @t16(i8* %x) {
 }
 
 define i64 @t17(i8* %x) {
-; CHECK: @t17
+; CHECK-LABEL: @t17(
   %y = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0
   %ret = call i64 @strspn(i8* %x, i8* %y)
   ret i64 %ret
@@ -184,7 +184,7 @@ define i64 @t17(i8* %x) {
 }
 
 define double @t18(i8** %y) {
-; CHECK: @t18
+; CHECK-LABEL: @t18(
   %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0
   %ret = call double @strtod(i8* %x, i8** %y)
   ret double %ret
@@ -192,7 +192,7 @@ define double @t18(i8** %y) {
 }
 
 define float @t19(i8** %y) {
-; CHECK: @t19
+; CHECK-LABEL: @t19(
   %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0
   %ret = call float @strtof(i8* %x, i8** %y)
   ret float %ret
@@ -200,7 +200,7 @@ define float @t19(i8** %y) {
 }
 
 define x86_fp80 @t20(i8** %y) {
-; CHECK: @t20
+; CHECK-LABEL: @t20(
   %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0
   %ret = call x86_fp80 @strtold(i8* %x, i8** %y)
   ret x86_fp80 %ret
@@ -208,7 +208,7 @@ define x86_fp80 @t20(i8** %y) {
 }
 
 define i64 @t21(i8** %y) {
-; CHECK: @t21
+; CHECK-LABEL: @t21(
   %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
   %ret = call i64 @strtol(i8* %x, i8** %y, i32 10)
   ret i64 %ret
@@ -216,7 +216,7 @@ define i64 @t21(i8** %y) {
 }
 
 define i64 @t22(i8** %y) {
-; CHECK: @t22
+; CHECK-LABEL: @t22(
   %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
   %ret = call i64 @strtoll(i8* %x, i8** %y, i32 10)
   ret i64 %ret
@@ -224,7 +224,7 @@ define i64 @t22(i8** %y) {
 }
 
 define i64 @t23(i8** %y) {
-; CHECK: @t23
+; CHECK-LABEL: @t23(
   %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
   %ret = call i64 @strtoul(i8* %x, i8** %y, i32 10)
   ret i64 %ret
@@ -232,7 +232,7 @@ define i64 @t23(i8** %y) {
 }
 
 define i64 @t24(i8** %y) {
-; CHECK: @t24
+; CHECK-LABEL: @t24(
   %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
   %ret = call i64 @strtoull(i8* %x, i8** %y, i32 10)
   ret i64 %ret
@@ -240,7 +240,7 @@ define i64 @t24(i8** %y) {
 }
 
 define i64 @t25(i8* %y) {
-; CHECK: @t25
+; CHECK-LABEL: @t25(
   %x = getelementptr [1 x i8]* @empty, i32 0, i32 0
   %ret = call i64 @strcspn(i8* %x, i8* %y)
   ret i64 %ret
@@ -248,35 +248,35 @@ define i64 @t25(i8* %y) {
 }
 
 define i32 @t26(i32 %y) {
-; CHECK: @t26
+; CHECK-LABEL: @t26(
   %ret = call i32 @abs(i32 %y)
   ret i32 %ret
 ; CHECK: call i32 @abs
 }
 
 define i32 @t27(i32 %y) {
-; CHECK: @t27
+; CHECK-LABEL: @t27(
   %ret = call i32 @ffs(i32 %y)
   ret i32 %ret
 ; CHECK: call i32 @ffs
 }
 
 define i32 @t28(i64 %y) {
-; CHECK: @t28
+; CHECK-LABEL: @t28(
   %ret = call i32 @ffsl(i64 %y)
   ret i32 %ret
 ; CHECK: call i32 @ffsl
 }
 
 define i32 @t29(i64 %y) {
-; CHECK: @t29
+; CHECK-LABEL: @t29(
   %ret = call i32 @ffsll(i64 %y)
   ret i32 %ret
 ; CHECK: call i32 @ffsll
 }
 
 define void @t30() {
-; CHECK: @t30
+; CHECK-LABEL: @t30(
   %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
   call i32 @fprintf(i8* null, i8* %x)
   ret void
@@ -284,42 +284,42 @@ define void @t30() {
 }
 
 define i32 @t31(i32 %y) {
-; CHECK: @t31
+; CHECK-LABEL: @t31(
   %ret = call i32 @isascii(i32 %y)
   ret i32 %ret
 ; CHECK: call i32 @isascii
 }
 
 define i32 @t32(i32 %y) {
-; CHECK: @t32
+; CHECK-LABEL: @t32(
   %ret = call i32 @isdigit(i32 %y)
   ret i32 %ret
 ; CHECK: call i32 @isdigit
 }
 
 define i32 @t33(i32 %y) {
-; CHECK: @t33
+; CHECK-LABEL: @t33(
   %ret = call i32 @toascii(i32 %y)
   ret i32 %ret
 ; CHECK: call i32 @toascii
 }
 
 define i64 @t34(i64 %y) {
-; CHECK: @t34
+; CHECK-LABEL: @t34(
   %ret = call i64 @labs(i64 %y)
   ret i64 %ret
 ; CHECK: call i64 @labs
 }
 
 define i64 @t35(i64 %y) {
-; CHECK: @t35
+; CHECK-LABEL: @t35(
   %ret = call i64 @llabs(i64 %y)
   ret i64 %ret
 ; CHECK: call i64 @llabs
 }
 
 define void @t36() {
-; CHECK: @t36
+; CHECK-LABEL: @t36(
   %x = getelementptr inbounds [1 x i8]* @empty, i32 0, i32 0
   call i32 @printf(i8* %x)
   ret void
@@ -327,7 +327,7 @@ define void @t36() {
 }
 
 define void @t37(i8* %x) {
-; CHECK: @t37
+; CHECK-LABEL: @t37(
   %y = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
   call i32 @sprintf(i8* %x, i8* %y)
   ret void
diff --git a/test/Transforms/InstCombine/div-shift-crash.ll b/test/Transforms/InstCombine/div-shift-crash.ll
new file mode 100644
index 000000000000..a61972456403
--- /dev/null
+++ b/test/Transforms/InstCombine/div-shift-crash.ll
@@ -0,0 +1,101 @@
+; RUN: opt -instcombine < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129 = type <{ i64 }>
+
+; Function Attrs: nounwind
+define void @main() #0 {
+entry:
+  %l_819.i.i = alloca %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129, align 8
+  br i1 undef, label %land.lhs.true, label %for.cond.i
+
+land.lhs.true:                                    ; preds = %entry
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %land.lhs.true, %entry
+  %0 = getelementptr inbounds %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129* %l_819.i.i, i64 0, i32 0
+  br label %for.cond.i6.i.i
+
+for.cond.i6.i.i:                                  ; preds = %for.body.i8.i.i, %for.cond.i
+  br i1 undef, label %for.body.i8.i.i, label %lbl_707.i.i.i
+
+for.body.i8.i.i:                                  ; preds = %for.cond.i6.i.i
+  br label %for.cond.i6.i.i
+
+lbl_707.i.i.i:                                    ; preds = %for.cond.i6.i.i
+  br i1 undef, label %lor.rhs.i.i.i, label %lor.end.i.i.i
+
+lor.rhs.i.i.i:                                    ; preds = %lbl_707.i.i.i
+  br label %lor.end.i.i.i
+
+lor.end.i.i.i:                                    ; preds = %lor.rhs.i.i.i, %lbl_707.i.i.i
+  br label %for.cond1.i.i.i.i
+
+for.cond1.i.i.i.i:                                ; preds = %for.body4.i.i.i.i, %lor.end.i.i.i
+  br i1 undef, label %for.body4.i.i.i.i, label %func_39.exit.i.i
+
+for.body4.i.i.i.i:                                ; preds = %for.cond1.i.i.i.i
+  br label %for.cond1.i.i.i.i
+
+func_39.exit.i.i:                                 ; preds = %for.cond1.i.i.i.i
+  %l_8191.sroa.0.0.copyload.i.i = load i64* %0, align 1
+  br label %for.cond1.i.i.i
+
+for.cond1.i.i.i:                                  ; preds = %safe_div_func_uint32_t_u_u.exit.i.i.i, %func_39.exit.i.i
+  br i1 undef, label %for.cond7.i.i.i, label %func_11.exit.i
+
+for.cond7.i.i.i:                                  ; preds = %for.end30.i.i.i, %for.cond1.i.i.i
+  %storemerge.i.i.i = phi i32 [ %sub.i.i.i, %for.end30.i.i.i ], [ 4, %for.cond1.i.i.i ]
+  br i1 undef, label %for.cond22.i.i.i, label %for.end32.i.i.i
+
+for.cond22.i.i.i:                                 ; preds = %for.body25.i.i.i, %for.cond7.i.i.i
+  br i1 undef, label %for.body25.i.i.i, label %for.end30.i.i.i
+
+for.body25.i.i.i:                                 ; preds = %for.cond22.i.i.i
+  br label %for.cond22.i.i.i
+
+for.end30.i.i.i:                                  ; preds = %for.cond22.i.i.i
+  %sub.i.i.i = add nsw i32 0, -1
+  br label %for.cond7.i.i.i
+
+for.end32.i.i.i:                                  ; preds = %for.cond7.i.i.i
+  %conv33.i.i.i = trunc i64 %l_8191.sroa.0.0.copyload.i.i to i32
+  %xor.i.i.i.i = xor i32 %storemerge.i.i.i, -701565022
+  %sub.i.i.i.i = sub nsw i32 0, %storemerge.i.i.i
+  %xor3.i.i.i.i = xor i32 %sub.i.i.i.i, %storemerge.i.i.i
+  %and4.i.i.i.i = and i32 %xor.i.i.i.i, %xor3.i.i.i.i
+  %cmp.i.i.i.i = icmp slt i32 %and4.i.i.i.i, 0
+  %sub5.i.i.i.i = sub nsw i32 -701565022, %storemerge.i.i.i
+  %.sub5.i.i.i.i = select i1 %cmp.i.i.i.i, i32 -701565022, i32 %sub5.i.i.i.i
+  br i1 undef, label %safe_div_func_uint32_t_u_u.exit.i.i.i, label %cond.false.i.i.i.i
+
+cond.false.i.i.i.i:                               ; preds = %for.end32.i.i.i
+  %div.i.i.i.i = udiv i32 %conv33.i.i.i, %.sub5.i.i.i.i
+  br label %safe_div_func_uint32_t_u_u.exit.i.i.i
+
+safe_div_func_uint32_t_u_u.exit.i.i.i:            ; preds = %cond.false.i.i.i.i, %for.end32.i.i.i
+  %cond.i.i.i.i = phi i32 [ %div.i.i.i.i, %cond.false.i.i.i.i ], [ %conv33.i.i.i, %for.end32.i.i.i ]
+  %cmp35.i.i.i = icmp ne i32 %cond.i.i.i.i, -7
+  br label %for.cond1.i.i.i
+
+func_11.exit.i:                                   ; preds = %for.cond1.i.i.i
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %func_11.exit.i
+  unreachable
+
+for.end:                                          ; preds = %func_11.exit.i
+  br label %for.cond15
+
+for.cond15:                                       ; preds = %for.cond19, %for.end
+  br i1 undef, label %for.cond19, label %for.end45
+
+for.cond19:                                       ; preds = %for.cond15
+  br label %for.cond15
+
+for.end45:                                        ; preds = %for.cond15
+  unreachable
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll
index e0372ebac184..3350f1940554 100644
--- a/test/Transforms/InstCombine/div-shift.ll
+++ b/test/Transforms/InstCombine/div-shift.ll
@@ -35,3 +35,41 @@ define i64 @t3(i64 %x, i32 %y) nounwind  {
   %3 = udiv i64 %x, %2
   ret i64 %3
 }
+
+define i32 @t4(i32 %x, i32 %y) nounwind {
+; CHECK: t4
+; CHECK-NOT: udiv
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %y, 5
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 5, i32 %y
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 %x, [[SEL]]
+; CHECK-NEXT: ret i32 [[SHR]]
+  %1 = shl i32 1, %y
+  %2 = icmp ult i32 %1, 32
+  %3 = select i1 %2, i32 32, i32 %1
+  %4 = udiv i32 %x, %3
+  ret i32 %4
+}
+
+define i32 @t5(i1 %x, i1 %y, i32 %V) nounwind {
+; CHECK: t5
+; CHECK-NOT: udiv
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 %x, i32 5, i32 6
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 %V, [[SEL1]]
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 %y, i32 [[LSHR]], i32 0
+; CHECK-NEXT: ret i32 [[SEL2]]
+  %1 = shl i32 1, %V
+  %2 = select i1 %x, i32 32, i32 64
+  %3 = select i1 %y, i32 %2, i32 %1
+  %4 = udiv i32 %V, %3
+  ret i32 %4
+}
+
+define i32 @t6(i32 %x, i32 %z) nounwind{
+; CHECK: t6
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0
+; CHECK-NOT: udiv i32 %z, %x
+  %x_is_zero = icmp eq i32 %x, 0
+  %divisor = select i1 %x_is_zero, i32 1, i32 %x
+  %y = udiv i32 %z, %divisor
+  ret i32 %y
+}
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 8a0897b972de..f67fd1c51be3 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -5,7 +5,7 @@
 define i32 @test1(i32 %A) {
         %B = sdiv i32 %A, 1             ; <i32> [#uses=1]
         ret i32 %B
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: ret i32 %A
 }
 
@@ -13,7 +13,7 @@ define i32 @test2(i32 %A) {
         ; => Shift
         %B = udiv i32 %A, 8             ; <i32> [#uses=1]
         ret i32 %B
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: lshr i32 %A, 3
 }
 
@@ -21,7 +21,7 @@ define i32 @test3(i32 %A) {
         ; => 0, don't need to keep traps
         %B = sdiv i32 0, %A             ; <i32> [#uses=1]
         ret i32 %B
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: ret i32 0
 }
 
@@ -29,7 +29,7 @@ define i32 @test4(i32 %A) {
         ; 0-A
         %B = sdiv i32 %A, -1            ; <i32> [#uses=1]
         ret i32 %B
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: sub i32 0, %A
 }
 
@@ -37,7 +37,7 @@ define i32 @test5(i32 %A) {
         %B = udiv i32 %A, -16           ; <i32> [#uses=1]
         %C = udiv i32 %B, -4            ; <i32> [#uses=1]
         ret i32 %C
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: ret i32 0
 }
 
@@ -46,7 +46,7 @@ define i1 @test6(i32 %A) {
         ; A < 123
         %C = icmp eq i32 %B, 0          ; <i1> [#uses=1]
         ret i1 %C
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: icmp ult i32 %A, 123
 }
 
@@ -55,7 +55,7 @@ define i1 @test7(i32 %A) {
         ; A >= 20 && A < 30
         %C = icmp eq i32 %B, 2          ; <i1> [#uses=1]
         ret i1 %C
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: add i32 %A, -20
 ; CHECK-NEXT: icmp ult i32
 }
@@ -65,7 +65,7 @@ define i1 @test8(i8 %A) {
         ; A >= 246
         %C = icmp eq i8 %B, 2           ; <i1> [#uses=1]
         ret i1 %C
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NEXT: icmp ugt i8 %A, -11
 }
 
@@ -74,7 +74,7 @@ define i1 @test9(i8 %A) {
         ; A < 246
         %C = icmp ne i8 %B, 2           ; <i1> [#uses=1]
         ret i1 %C
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: icmp ult i8 %A, -10
 }
 
@@ -82,7 +82,7 @@ define i32 @test10(i32 %X, i1 %C) {
         %V = select i1 %C, i32 64, i32 8                ; <i32> [#uses=1]
         %R = udiv i32 %X, %V            ; <i32> [#uses=1]
         ret i32 %R
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK-NEXT: select i1 %C, i32 6, i32 3
 ; CHECK-NEXT: lshr i32 %X
 }
@@ -91,7 +91,7 @@ define i32 @test11(i32 %X, i1 %C) {
         %A = select i1 %C, i32 1024, i32 32             ; <i32> [#uses=1]
         %B = udiv i32 %X, %A            ; <i32> [#uses=1]
         ret i32 %B
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NEXT: select i1 %C, i32 10, i32 5
 ; CHECK-NEXT: lshr i32 %X
 }
@@ -100,14 +100,14 @@ define i32 @test11(i32 %X, i1 %C) {
 define i32 @test12(i32 %x) nounwind  {
 	%tmp3 = udiv i32 %x, %x		; 1
 	ret i32 %tmp3
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NEXT: ret i32 1
 }
 
 define i32 @test13(i32 %x) nounwind  {
 	%tmp3 = sdiv i32 %x, %x		; 1
 	ret i32 %tmp3
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK-NEXT: ret i32 1
 }
 
@@ -115,7 +115,7 @@ define i32 @test14(i8 %x) nounwind {
 	%zext = zext i8 %x to i32
 	%div = udiv i32 %zext, 257	; 0
 	ret i32 %div
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NEXT: ret i32 0
 }
 
@@ -125,7 +125,7 @@ define i32 @test15(i32 %a, i32 %b) nounwind {
   %div = lshr i32 %shl, 2
   %div2 = udiv i32 %a, %div
   ret i32 %div2
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NEXT: add i32 %b, -2
 ; CHECK-NEXT: lshr i32 %a, 
 ; CHECK-NEXT: ret i32
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index e5448ee00765..5cacb591e006 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -263,6 +263,7 @@ define double @sin_test2(float %f) nounwind readnone {
    ret double %call
 ; CHECK: call double @sin(double %conv)
 }
+
 define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: sqrt_test
    %conv = fpext float %f to double
@@ -272,6 +273,15 @@ define float @sqrt_test(float %f) nounwind readnone {
 ; CHECK: call float @sqrtf(float %f)
 }
 
+define float @sqrt_int_test(float %f) nounwind readnone {
+; CHECK: sqrt_int_test
+   %conv = fpext float %f to double
+   %call = call double @llvm.sqrt.f64(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @llvm.sqrt.f32(float %f)
+}
+
 define double @sqrt_test2(float %f) nounwind readnone {
 ; CHECK: sqrt_test2
    %conv = fpext float %f to double
@@ -331,3 +341,6 @@ declare double @acos(double) nounwind readnone
 declare double @acosh(double) nounwind readnone
 declare double @asin(double) nounwind readnone
 declare double @asinh(double) nounwind readnone
+
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
diff --git a/test/Transforms/InstCombine/enforce-known-alignment.ll b/test/Transforms/InstCombine/enforce-known-alignment.ll
index 6645d99035fa..46bb60569fe2 100644
--- a/test/Transforms/InstCombine/enforce-known-alignment.ll
+++ b/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -1,8 +1,12 @@
-; RUN: opt < %s -instcombine -S | grep alloca | grep "align 16"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+; RUN: opt  -instcombine -S %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 
 define void @foo(i32) {
+; CHECK-LABEL: @foo(
+; CHECK: alloca
+; CHECK: align 16
 	%2 = alloca [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], align 16		; <[3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]*> [#uses=1]
 	%3 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]* %2, i32 0, i32 0		; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
 	%4 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>* %3, i32 0, i32 0		; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
@@ -11,8 +15,24 @@ define void @foo(i32) {
 	%7 = getelementptr { [8 x i16] }* %6, i32 0, i32 0		; <[8 x i16]*> [#uses=1]
 	%8 = getelementptr [8 x i16]* %7, i32 0, i32 0		; <i16*> [#uses=1]
 	store i16 0, i16* %8, align 16
-        call void @bar(i16* %8)
+    call void @bar(i16* %8)
 	ret void
 }
 
 declare void @bar(i16*)
+
+define void @foo_as1(i32 %a, [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b) {
+; CHECK-LABEL: @foo_as1(
+; CHECK: align 16
+  %1 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b, i32 0, i32 0        ; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
+  %2 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }> addrspace(1)* %1, i32 0, i32 0      ; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
+  %3 = getelementptr { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } addrspace(1)* %2, i32 0, i32 0        ; <{ [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }*> [#uses=1]
+  %4 = bitcast { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } addrspace(1)* %3 to { [8 x i16] } addrspace(1)*     ; <{ [8 x i16] }*> [#uses=1]
+  %5 = getelementptr { [8 x i16] } addrspace(1)* %4, i32 0, i32 0     ; <[8 x i16]*> [#uses=1]
+  %6 = getelementptr [8 x i16] addrspace(1)* %5, i32 0, i32 0     ; <i16*> [#uses=1]
+  store i16 0, i16 addrspace(1)* %6, align 16
+  call void @bar_as1(i16 addrspace(1)* %6)
+  ret void
+}
+
+declare void @bar_as1(i16 addrspace(1)*)
diff --git a/test/Transforms/InstCombine/err-rep-cold.ll b/test/Transforms/InstCombine/err-rep-cold.ll
new file mode 100644
index 000000000000..0cbafc43e0f6
--- /dev/null
+++ b/test/Transforms/InstCombine/err-rep-cold.ll
@@ -0,0 +1,77 @@
+; Test the static branch probability heuristics for error-reporting functions.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@stdout = external global %struct._IO_FILE*
+@stderr = external global %struct._IO_FILE*
+@.str = private unnamed_addr constant [13 x i8] c"an error: %d\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"an error\00", align 1
+
+define i32 @test1(i32 %a) #0 {
+; CHECK-LABEL: @test1
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
+  br label %return
+
+; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[AT1:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) #1
+
+define i32 @test2(i32 %a) #0 {
+; CHECK-LABEL: @test2
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stderr, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1
+
+define i32 @test3(i32 %a) #0 {
+; CHECK-LABEL: @test3
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE** @stdout, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK-NOT: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[AT2]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+; CHECK: attributes #[[AT1]] = { cold nounwind }
+; CHECK: attributes #[[AT2]] = { cold }
+
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 88ca88c3b927..868d60ac46b1 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -1,20 +1,20 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; CHECK: @sdiv1
+; CHECK-LABEL: @sdiv1(
 ; CHECK: sdiv i32 %x, 8
 define i32 @sdiv1(i32 %x) {
   %y = sdiv i32 %x, 8
   ret i32 %y
 }
 
-; CHECK: @sdiv2
+; CHECK-LABEL: @sdiv2(
 ; CHECK: ashr exact i32 %x, 3
 define i32 @sdiv2(i32 %x) {
   %y = sdiv exact i32 %x, 8
   ret i32 %y
 }
 
-; CHECK: @sdiv3
+; CHECK-LABEL: @sdiv3(
 ; CHECK: %y = srem i32 %x, 3
 ; CHECK: %z = sub i32 %x, %y
 ; CHECK: ret i32 %z
@@ -24,7 +24,7 @@ define i32 @sdiv3(i32 %x) {
   ret i32 %z
 }
 
-; CHECK: @sdiv4
+; CHECK-LABEL: @sdiv4(
 ; CHECK: ret i32 %x
 define i32 @sdiv4(i32 %x) {
   %y = sdiv exact i32 %x, 3
@@ -42,7 +42,7 @@ define i32 @sdiv5(i32 %x) {
   ret i32 %z
 }
 
-; CHECK: @sdiv6
+; CHECK-LABEL: @sdiv6(
 ; CHECK: %z = sub i32 0, %x
 ; CHECK: ret i32 %z
 define i32 @sdiv6(i32 %x) {
@@ -51,7 +51,7 @@ define i32 @sdiv6(i32 %x) {
   ret i32 %z
 }
 
-; CHECK: @udiv1
+; CHECK-LABEL: @udiv1(
 ; CHECK: ret i32 %x
 define i32 @udiv1(i32 %x, i32 %w) {
   %y = udiv exact i32 %x, %w
@@ -59,7 +59,7 @@ define i32 @udiv1(i32 %x, i32 %w) {
   ret i32 %z
 }
 
-; CHECK: @udiv2
+; CHECK-LABEL: @udiv2(
 ; CHECK: %z = lshr exact i32 %x, %w
 ; CHECK: ret i32 %z
 define i32 @udiv2(i32 %x, i32 %w) {
@@ -68,7 +68,7 @@ define i32 @udiv2(i32 %x, i32 %w) {
   ret i32 %z
 }
 
-; CHECK: @ashr1
+; CHECK-LABEL: @ashr1(
 ; CHECK: %B = ashr exact i64 %A, 2
 ; CHECK: ret i64 %B
 define i64 @ashr1(i64 %X) nounwind {
@@ -78,7 +78,7 @@ define i64 @ashr1(i64 %X) nounwind {
 }
 
 ; PR9120
-; CHECK: @ashr_icmp1
+; CHECK-LABEL: @ashr_icmp1(
 ; CHECK: %B = icmp eq i64 %X, 0
 ; CHECK: ret i1 %B
 define i1 @ashr_icmp1(i64 %X) nounwind {
@@ -87,7 +87,7 @@ define i1 @ashr_icmp1(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @ashr_icmp2
+; CHECK-LABEL: @ashr_icmp2(
 ; CHECK: %Z = icmp slt i64 %X, 16
 ; CHECK: ret i1 %Z
 define i1 @ashr_icmp2(i64 %X) nounwind {
@@ -98,7 +98,7 @@ define i1 @ashr_icmp2(i64 %X) nounwind {
 
 ; PR9998
 ; Make sure we don't transform the ashr here into an sdiv
-; CHECK: @pr9998
+; CHECK-LABEL: @pr9998(
 ; CHECK:      [[BIT:%[A-Za-z0-9.]+]] = and i32 %V, 1
 ; CHECK-NEXT: [[CMP:%[A-Za-z0-9.]+]] = icmp ne i32 [[BIT]], 0
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -113,7 +113,7 @@ entry:
 
 
 
-; CHECK: @udiv_icmp1
+; CHECK-LABEL: @udiv_icmp1(
 ; CHECK: icmp ne i64 %X, 0
 define i1 @udiv_icmp1(i64 %X) nounwind {
   %A = udiv exact i64 %X, 5   ; X/5
@@ -121,7 +121,7 @@ define i1 @udiv_icmp1(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @sdiv_icmp1
+; CHECK-LABEL: @sdiv_icmp1(
 ; CHECK: icmp eq i64 %X, 0
 define i1 @sdiv_icmp1(i64 %X) nounwind {
   %A = sdiv exact i64 %X, 5   ; X/5 == 0 --> x == 0
@@ -129,7 +129,7 @@ define i1 @sdiv_icmp1(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @sdiv_icmp2
+; CHECK-LABEL: @sdiv_icmp2(
 ; CHECK: icmp eq i64 %X, 5
 define i1 @sdiv_icmp2(i64 %X) nounwind {
   %A = sdiv exact i64 %X, 5   ; X/5 == 1 --> x == 5
@@ -137,7 +137,7 @@ define i1 @sdiv_icmp2(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @sdiv_icmp3
+; CHECK-LABEL: @sdiv_icmp3(
 ; CHECK: icmp eq i64 %X, -5
 define i1 @sdiv_icmp3(i64 %X) nounwind {
   %A = sdiv exact i64 %X, 5   ; X/5 == -1 --> x == -5
@@ -145,7 +145,7 @@ define i1 @sdiv_icmp3(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @sdiv_icmp4
+; CHECK-LABEL: @sdiv_icmp4(
 ; CHECK: icmp eq i64 %X, 0
 define i1 @sdiv_icmp4(i64 %X) nounwind {
   %A = sdiv exact i64 %X, -5   ; X/-5 == 0 --> x == 0
@@ -153,7 +153,7 @@ define i1 @sdiv_icmp4(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @sdiv_icmp5
+; CHECK-LABEL: @sdiv_icmp5(
 ; CHECK: icmp eq i64 %X, -5
 define i1 @sdiv_icmp5(i64 %X) nounwind {
   %A = sdiv exact i64 %X, -5   ; X/-5 == 1 --> x == -5
@@ -161,7 +161,7 @@ define i1 @sdiv_icmp5(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @sdiv_icmp6
+; CHECK-LABEL: @sdiv_icmp6(
 ; CHECK: icmp eq i64 %X, 5
 define i1 @sdiv_icmp6(i64 %X) nounwind {
   %A = sdiv exact i64 %X, -5   ; X/-5 == 1 --> x == 5
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
index 1b0ad5000412..99fb9ecfd2b2 100644
--- a/test/Transforms/InstCombine/exp2-1.ll
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -10,7 +10,7 @@ declare float @exp2f(float)
 ; Check exp2(sitofp(x)) -> ldexp(1.0, sext(x)).
 
 define double @test_simplify1(i32 %x) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %conv = sitofp i32 %x to double
   %ret = call double @exp2(double %conv)
 ; CHECK: call double @ldexp
@@ -18,7 +18,7 @@ define double @test_simplify1(i32 %x) {
 }
 
 define double @test_simplify2(i16 signext %x) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %conv = sitofp i16 %x to double
   %ret = call double @exp2(double %conv)
 ; CHECK: call double @ldexp
@@ -26,7 +26,7 @@ define double @test_simplify2(i16 signext %x) {
 }
 
 define double @test_simplify3(i8 signext %x) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %conv = sitofp i8 %x to double
   %ret = call double @exp2(double %conv)
 ; CHECK: call double @ldexp
@@ -34,7 +34,7 @@ define double @test_simplify3(i8 signext %x) {
 }
 
 define float @test_simplify4(i32 %x) {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %conv = sitofp i32 %x to float
   %ret = call float @exp2f(float %conv)
 ; CHECK: call float @ldexpf
@@ -44,7 +44,7 @@ define float @test_simplify4(i32 %x) {
 ; Check exp2(uitofp(x)) -> ldexp(1.0, zext(x)).
 
 define double @test_no_simplify1(i32 %x) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %conv = uitofp i32 %x to double
   %ret = call double @exp2(double %conv)
 ; CHECK: call double @exp2
@@ -52,7 +52,7 @@ define double @test_no_simplify1(i32 %x) {
 }
 
 define double @test_simplify6(i16 zeroext %x) {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %conv = uitofp i16 %x to double
   %ret = call double @exp2(double %conv)
 ; CHECK: call double @ldexp
@@ -60,7 +60,7 @@ define double @test_simplify6(i16 zeroext %x) {
 }
 
 define double @test_simplify7(i8 zeroext %x) {
-; CHECK: @test_simplify7
+; CHECK-LABEL: @test_simplify7(
   %conv = uitofp i8 %x to double
   %ret = call double @exp2(double %conv)
 ; CHECK: call double @ldexp
@@ -68,7 +68,7 @@ define double @test_simplify7(i8 zeroext %x) {
 }
 
 define float @test_simplify8(i8 zeroext %x) {
-; CHECK: @test_simplify8
+; CHECK-LABEL: @test_simplify8(
   %conv = uitofp i8 %x to float
   %ret = call float @exp2f(float %conv)
 ; CHECK: call float @ldexpf
diff --git a/test/Transforms/InstCombine/exp2-2.ll b/test/Transforms/InstCombine/exp2-2.ll
index bed063798e29..19368dc48c6b 100644
--- a/test/Transforms/InstCombine/exp2-2.ll
+++ b/test/Transforms/InstCombine/exp2-2.ll
@@ -9,7 +9,7 @@ declare float @exp2(double)
 ; Check that exp2 functions with the wrong prototype aren't simplified.
 
 define float @test_no_simplify1(i32 %x) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %conv = sitofp i32 %x to double
   %ret = call float @exp2(double %conv)
 ; CHECK: call float @exp2(double %conv)
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index 5e4c67778224..04c7ffa219da 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -3,7 +3,7 @@
 declare void @bar({i32, i32} %a)
 declare i32 @baz(i32 %a)
 
-; CHECK: define i32 @foo
+; CHECK-LABEL: define i32 @foo(
 ; CHECK-NOT: extractvalue
 define i32 @foo(i32 %a, i32 %b) {
 ; Instcombine should fold various combinations of insertvalue and extractvalue
@@ -39,7 +39,7 @@ define i32 @foo(i32 %a, i32 %b) {
         ret i32 %v5
 }
 
-; CHECK: define i32 @extract2gep
+; CHECK-LABEL: define i32 @extract2gep(
 ; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}* %pair, i32 0, i32 1
 ; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
 ; CHECK-NEXT: store
@@ -67,7 +67,7 @@ end:
         ret i32 %E
 }
 
-; CHECK: define i32 @doubleextract2gep
+; CHECK-LABEL: define i32 @doubleextract2gep(
 ; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}* %arg, i32 0, i32 1, i32 1
 ; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
 ; CHECK-NEXT: ret i32 [[LOAD]]
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index edcbcc71dfb4..d8ba2a59ff5e 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -6,14 +6,14 @@ define float @fold(float %a) {
   %mul = fmul fast float %a, 0x3FF3333340000000
   %mul1 = fmul fast float %mul, 0x4002666660000000
   ret float %mul1
-; CHECK: @fold
+; CHECK-LABEL: @fold(
 ; CHECK: fmul fast float %a, 0x4006147AE0000000
 }
 
 ; Same testing-case as the one used in fold() except that the operators have
 ; fixed FP mode.
 define float @notfold(float %a) {
-; CHECK: @notfold
+; CHECK-LABEL: @notfold(
 ; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000
   %mul = fmul fast float %a, 0x3FF3333340000000
   %mul1 = fmul float %mul, 0x4002666660000000
@@ -21,7 +21,7 @@ define float @notfold(float %a) {
 }
 
 define float @fold2(float %a) {
-; CHECK: @fold2
+; CHECK-LABEL: @fold2(
 ; CHECK: fmul fast float %a, 0x4006147AE0000000
   %mul = fmul float %a, 0x3FF3333340000000
   %mul1 = fmul fast float %mul, 0x4002666660000000
@@ -33,7 +33,7 @@ define double @fold3(double %f1) {
   %t1 = fmul fast double 2.000000e+00, %f1
   %t2 = fadd fast double %f1, %t1
   ret double %t2
-; CHECK: @fold3
+; CHECK-LABEL: @fold3(
 ; CHECK: fmul fast double %f1, 3.000000e+00
 }
 
@@ -43,7 +43,7 @@ define float @fold4(float %f1, float %f2) {
   %sub1 = fsub float 5.000000e+00, %f2
   %add = fadd fast float %sub, %sub1
   ret float %add
-; CHECK: @fold4
+; CHECK-LABEL: @fold4(
 ; CHECK: %1 = fadd fast float %f1, %f2
 ; CHECK: fsub fast float 9.000000e+00, %1
 }
@@ -53,7 +53,7 @@ define float @fold5(float %f1, float %f2) {
   %add = fadd float %f1, 4.000000e+00
   %add1 = fadd fast float %add, 5.000000e+00
   ret float %add1
-; CHECK: @fold5
+; CHECK-LABEL: @fold5(
 ; CHECK: fadd fast float %f1, 9.000000e+00
 }
 
@@ -62,7 +62,7 @@ define float @fold6(float %f1) {
   %t1 = fadd fast float %f1, %f1
   %t2 = fadd fast float %f1, %t1
   ret float %t2
-; CHECK: @fold6
+; CHECK-LABEL: @fold6(
 ; CHECK: fmul fast float %f1, 3.000000e+00
 }
 
@@ -72,7 +72,7 @@ define float @fold7(float %f1) {
   %t2 = fadd fast float %f1, %f1
   %t3 = fadd fast float %t1, %t2
   ret float %t3
-; CHECK: @fold7
+; CHECK-LABEL: @fold7(
 ; CHECK: fmul fast float %f1, 7.000000e+00
 }
 
@@ -92,7 +92,7 @@ define float @fold9(float %f1, float %f2) {
   %t3 = fsub fast float %f1, %t1
   ret float %t3
 
-; CHECK: @fold9
+; CHECK-LABEL: @fold9(
 ; CHECK: fsub fast float 0.000000e+00, %f2
 }
 
@@ -106,7 +106,7 @@ define float @fold10(float %f1, float %f2) {
   %t2 = fsub fast float %f2, 3.000000e+00
   %t3 = fadd fast float %t1, %t2
   ret float %t3
-; CHECK: @fold10
+; CHECK-LABEL: @fold10(
 ; CHECK: %t3 = fadd fast float %t2, -1.000000e+00
 ; CHECK: ret float %t3
 }
@@ -117,7 +117,7 @@ define float @fail1(float %f1, float %f2) {
   %add = fadd fast float %conv3, %conv3
   %add2 = fadd fast float %add, %conv3
   ret float %add2
-; CHECK: @fail1
+; CHECK-LABEL: @fail1(
 ; CHECK: ret
 }
 
@@ -126,7 +126,7 @@ define double @fail2(double %f1, double %f2) {
   %t2 = fadd fast double %f1, %f2
   %t3 = fsub fast double %t1, %t2
   ret double %t3
-; CHECK: @fail2
+; CHECK-LABEL: @fail2(
 ; CHECK: ret
 }
 
@@ -152,7 +152,7 @@ define float @fmul_distribute1(float %f1) {
   %t2 = fadd float %t1, 2.0e+3
   %t3 = fmul fast float %t2, 5.0e+3
   ret float %t3
-; CHECK: @fmul_distribute1
+; CHECK-LABEL: @fmul_distribute1(
 ; CHECK: %1 = fmul fast float %f1, 3.000000e+07
 ; CHECK: %t3 = fadd fast float %1, 1.000000e+07
 }
@@ -165,7 +165,7 @@ define double @fmul_distribute2(double %f1, double %f2) {
   %t3 = fmul fast double %t2, 0x10000000000000
   ret double %t3
 
-; CHECK: @fmul_distribute2
+; CHECK-LABEL: @fmul_distribute2(
 ; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000
 ; CHECK: fadd fast double %1, 0x69000000000000
 }
@@ -178,7 +178,7 @@ define double @fmul_distribute3(double %f1) {
   %t3 = fmul fast double %t2, 0x10000000000000
   ret double %t3
 
-; CHECK: @fmul_distribute3
+; CHECK-LABEL: @fmul_distribute3(
 ; CHECK: fmul fast double %t2, 0x10000000000000
 }
 
@@ -188,7 +188,7 @@ define float @fmul_distribute4(float %f1) {
   %t2 = fsub float 2.0e+3, %t1
   %t3 = fmul fast float %t2, 5.0e+3
   ret float %t3
-; CHECK: @fmul_distribute4
+; CHECK-LABEL: @fmul_distribute4(
 ; CHECK: %1 = fmul fast float %f1, 3.000000e+07
 ; CHECK: %t3 = fsub fast float 1.000000e+07, %1
 }
@@ -198,16 +198,28 @@ define float @fmul2(float %f1) {
   %t1 = fdiv float 2.0e+3, %f1
   %t3 = fmul fast float %t1, 6.0e+3
   ret float %t3
-; CHECK: @fmul2
+; CHECK-LABEL: @fmul2(
 ; CHECK: fdiv fast float 1.200000e+07, %f1
 }
 
+; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
+@fmul2_external = external global float
+define float @fmul2_disable(float %f1) {
+  %div = fdiv fast float 1.000000e+00, %f1 
+  store float %div, float* @fmul2_external
+  %mul = fmul fast float %div, 2.000000e+00
+  ret float %mul
+; CHECK-LABEL: @fmul2_disable
+; CHECK: store
+; CHECK: fmul fast
+}
+
 ; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
 define float @fmul3(float %f1, float %f2) {
   %t1 = fdiv float %f1, 2.0e+3
   %t3 = fmul fast float %t1, 6.0e+3
   ret float %t3
-; CHECK: @fmul3
+; CHECK-LABEL: @fmul3(
 ; CHECK: fmul fast float %f1, 3.000000e+00
 }
 
@@ -218,7 +230,7 @@ define float @fmul4(float %f1, float %f2) {
   %t1 = fdiv float %f1, 2.0e+3
   %t3 = fmul fast float %t1, 0x3810000000000000
   ret float %t3
-; CHECK: @fmul4
+; CHECK-LABEL: @fmul4(
 ; CHECK: fmul fast float %t1, 0x3810000000000000
 }
 
@@ -229,7 +241,7 @@ define float @fmul5(float %f1, float %f2) {
   %t1 = fdiv float %f1, 3.0e+0
   %t3 = fmul fast float %t1, 0x3810000000000000
   ret float %t3
-; CHECK: @fmul5
+; CHECK-LABEL: @fmul5(
 ; CHECK: fdiv fast float %f1, 0x47E8000000000000
 }
 
@@ -238,7 +250,7 @@ define float @fmul6(float %f1, float %f2) {
   %mul = fmul float %f1, %f2
   %mul1 = fmul fast float %mul, %f1
   ret float %mul1
-; CHECK: @fmul6
+; CHECK-LABEL: @fmul6(
 ; CHECK: fmul fast float %f1, %f1
 }
 
@@ -248,7 +260,7 @@ define float @fmul7(float %f1, float %f2) {
   %mul1 = fmul fast float %mul, %f1
   %add = fadd float %mul1, %mul
   ret float %add
-; CHECK: @fmul7
+; CHECK-LABEL: @fmul7(
 ; CHECK: fmul fast float %mul, %f1
 }
 
@@ -262,7 +274,7 @@ define float @fneg1(float %f1, float %f2) {
   %sub1 = fsub nsz float 0.000000e+00, %f2
   %mul = fmul float %sub, %sub1
   ret float %mul
-; CHECK: @fneg1
+; CHECK-LABEL: @fneg1(
 ; CHECK: fmul float %f1, %f2
 }
 
@@ -280,7 +292,7 @@ define float @fdiv1(float %x) {
 ; 0x3FF3333340000000 = 1.2f
 ; 0x4002666660000000 = 2.3f
 ; 0x3FD7303B60000000 = 0.36231884057971014492
-; CHECK: @fdiv1
+; CHECK-LABEL: @fdiv1(
 ; CHECK: fmul fast float %x, 0x3FD7303B60000000
 }
 
@@ -293,7 +305,7 @@ define float @fdiv2(float %x) {
 ; 0x3FF3333340000000 = 1.2f
 ; 0x4002666660000000 = 2.3f
 ; 0x3FE0B21660000000 = 0.52173918485641479492
-; CHECK: @fdiv2
+; CHECK-LABEL: @fdiv2(
 ; CHECK: fmul fast float %x, 0x3FE0B21660000000
 }
 
@@ -303,7 +315,7 @@ define float @fdiv3(float %x) {
   %div = fdiv float %x, 0x47EFFFFFE0000000
   %div1 = fdiv fast float %div, 0x4002666660000000
   ret float %div1
-; CHECK: @fdiv3
+; CHECK-LABEL: @fdiv3(
 ; CHECK: fdiv float %x, 0x47EFFFFFE0000000
 }
 
@@ -312,7 +324,7 @@ define float @fdiv4(float %x) {
   %mul = fmul float %x, 0x47EFFFFFE0000000
   %div = fdiv float %mul, 0x3FC99999A0000000
   ret float %div
-; CHECK: @fdiv4
+; CHECK-LABEL: @fdiv4(
 ; CHECK: fmul float %x, 0x47EFFFFFE0000000
 }
 
@@ -321,7 +333,7 @@ define float @fdiv5(float %f1, float %f2, float %f3) {
   %t1 = fdiv float %f1, %f2
   %t2 = fdiv fast float %t1, %f3
   ret float %t2
-; CHECK: @fdiv5
+; CHECK-LABEL: @fdiv5(
 ; CHECK: fmul float %f2, %f3
 }
 
@@ -330,7 +342,7 @@ define float @fdiv6(float %f1, float %f2, float %f3) {
   %t1 = fdiv float %f1, %f2
   %t2 = fdiv fast float %f3, %t1
   ret float %t2
-; CHECK: @fdiv6
+; CHECK-LABEL: @fdiv6(
 ; CHECK: fmul float %f3, %f2
 }
 
@@ -339,7 +351,7 @@ define float @fdiv7(float %x) {
   %t1 = fmul float %x, 3.0e0
   %t2 = fdiv fast float 15.0e0, %t1
   ret float %t2
-; CHECK: @fdiv7
+; CHECK-LABEL: @fdiv7(
 ; CHECK: fdiv fast float 5.000000e+00, %x
 }
 
@@ -348,7 +360,7 @@ define float @fdiv8(float %x) {
   %t1 = fdiv float %x, 3.0e0
   %t2 = fdiv fast float 15.0e0, %t1
   ret float %t2
-; CHECK: @fdiv8
+; CHECK-LABEL: @fdiv8(
 ; CHECK: fdiv fast float 4.500000e+01, %x
 }
 
@@ -357,7 +369,7 @@ define float @fdiv9(float %x) {
   %t1 = fdiv float 3.0e0, %x
   %t2 = fdiv fast float 15.0e0, %t1
   ret float %t2
-; CHECK: @fdiv9
+; CHECK-LABEL: @fdiv9(
 ; CHECK: fmul fast float %x, 5.000000e+00
 }
 
@@ -372,7 +384,7 @@ define float @fact_mul1(float %x, float %y, float %z) {
   %t2 = fmul fast float %y, %z
   %t3 = fadd fast float %t1, %t2
   ret float %t3
-; CHECK: @fact_mul1
+; CHECK-LABEL: @fact_mul1(
 ; CHECK: fmul fast float %1, %z
 }
 
@@ -382,7 +394,7 @@ define float @fact_mul2(float %x, float %y, float %z) {
   %t2 = fmul fast float %y, %z
   %t3 = fsub fast float %t1, %t2
   ret float %t3
-; CHECK: @fact_mul2
+; CHECK-LABEL: @fact_mul2(
 ; CHECK: fmul fast float %1, %z
 }
 
@@ -392,7 +404,7 @@ define float @fact_mul3(float %x, float %y, float %z) {
   %t1 = fmul fast float %z, %x
   %t3 = fsub fast float %t1, %t2
   ret float %t3
-; CHECK: @fact_mul3
+; CHECK-LABEL: @fact_mul3(
 ; CHECK: fmul fast float %1, %z
 }
 
@@ -402,7 +414,7 @@ define float @fact_mul4(float %x, float %y, float %z) {
   %t2 = fmul fast float %z, %y
   %t3 = fsub fast float %t1, %t2
   ret float %t3
-; CHECK: @fact_mul4
+; CHECK-LABEL: @fact_mul4(
 ; CHECK: fmul fast float %1, %z
 }
 
diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll
index 376fa079d24c..afc6782a0122 100644
--- a/test/Transforms/InstCombine/fcmp.ll
+++ b/test/Transforms/InstCombine/fcmp.ll
@@ -5,7 +5,7 @@ define i1 @test1(float %x, float %y) nounwind {
   %ext2 = fpext float %y to double
   %cmp = fcmp ogt double %ext1, %ext2
   ret i1 %cmp
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: fcmp ogt float %x, %y
 }
 
@@ -13,7 +13,7 @@ define i1 @test2(float %a) nounwind {
   %ext = fpext float %a to double
   %cmp = fcmp ogt double %ext, 1.000000e+00
   ret i1 %cmp
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: fcmp ogt float %a, 1.0
 }
 
@@ -21,7 +21,7 @@ define i1 @test3(float %a) nounwind {
   %ext = fpext float %a to double
   %cmp = fcmp ogt double %ext, 0x3FF0000000000001 ; more precision than float.
   ret i1 %cmp
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: fpext float %a to double
 }
 
@@ -29,7 +29,7 @@ define i1 @test4(float %a) nounwind {
   %ext = fpext float %a to double
   %cmp = fcmp ogt double %ext, 0x36A0000000000000 ; denormal in float.
   ret i1 %cmp
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: fpext float %a to double
 }
 
@@ -37,7 +37,7 @@ define i1 @test5(float %a) nounwind {
   %neg = fsub float -0.000000e+00, %a
   %cmp = fcmp ogt float %neg, 1.000000e+00
   ret i1 %cmp
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: fcmp olt float %a, -1.0
 }
 
@@ -46,7 +46,7 @@ define i1 @test6(float %x, float %y) nounwind {
   %neg2 = fsub float -0.000000e+00, %y
   %cmp = fcmp olt float %neg1, %neg2
   ret i1 %cmp
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: fcmp ogt float %x, %y
 }
 
@@ -54,7 +54,7 @@ define i1 @test7(float %x) nounwind readnone ssp noredzone {
   %ext = fpext float %x to ppc_fp128
   %cmp = fcmp ogt ppc_fp128 %ext, 0xM00000000000000000000000000000000
   ret i1 %cmp
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: fcmp ogt float %x, 0.000000e+00
 }
 
@@ -65,7 +65,7 @@ define float @test8(float %x) nounwind readnone optsize ssp {
   %conv2 = sitofp i32 %conv1 to float
   ret float %conv2
 ; Float comparison to zero shouldn't cast to double.
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NEXT: fcmp olt float %x, 0.000000e+00
 }
 
@@ -76,7 +76,7 @@ define i32 @test9(double %a) nounwind {
   %cmp = fcmp olt double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NOT: fabs
 ; CHECK: ret i32 0
 }
@@ -86,7 +86,7 @@ define i32 @test10(double %a) nounwind {
   %cmp = fcmp ole double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK-NOT: fabs
 ; CHECK: fcmp oeq double %a, 0.000000e+00
 }
@@ -96,7 +96,7 @@ define i32 @test11(double %a) nounwind {
   %cmp = fcmp ogt double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NOT: fabs
 ; CHECK: fcmp one double %a, 0.000000e+00
 }
@@ -106,7 +106,7 @@ define i32 @test12(double %a) nounwind {
   %cmp = fcmp oge double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NOT: fabs
 ; CHECK: fcmp ord double %a, 0.000000e+00
 }
@@ -116,7 +116,7 @@ define i32 @test13(double %a) nounwind {
   %cmp = fcmp une double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK-NOT: fabs
 ; CHECK: fcmp une double %a, 0.000000e+00
 }
@@ -126,7 +126,7 @@ define i32 @test14(double %a) nounwind {
   %cmp = fcmp oeq double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NOT: fabs
 ; CHECK: fcmp oeq double %a, 0.000000e+00
 }
@@ -136,7 +136,7 @@ define i32 @test15(double %a) nounwind {
   %cmp = fcmp one double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NOT: fabs
 ; CHECK: fcmp one double %a, 0.000000e+00
 }
@@ -146,7 +146,7 @@ define i32 @test16(double %a) nounwind {
   %cmp = fcmp ueq double %call, 0.000000e+00
   %conv = zext i1 %cmp to i32
   ret i32 %conv
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK-NOT: fabs
 ; CHECK: fcmp ueq double %a, 0.000000e+00
 }
diff --git a/test/Transforms/InstCombine/fdiv.ll b/test/Transforms/InstCombine/fdiv.ll
index a2cce016e188..1edbc5ecd60b 100644
--- a/test/Transforms/InstCombine/fdiv.ll
+++ b/test/Transforms/InstCombine/fdiv.ll
@@ -4,7 +4,7 @@ define float @test1(float %x) nounwind readnone ssp {
   %div = fdiv float %x, 0x3810000000000000
   ret float %div
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: fmul float %x, 0x47D0000000000000
 }
 
@@ -12,7 +12,7 @@ define float @test2(float %x) nounwind readnone ssp {
   %div = fdiv float %x, 0x47E0000000000000
   ret float %div
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: fdiv float %x, 0x47E0000000000000
 }
 
@@ -20,6 +20,6 @@ define float @test3(float %x) nounwind readnone ssp {
   %div = fdiv float %x, 0x36A0000000000000
   ret float %div
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: fdiv float %x, 0x36A0000000000000
 }
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index 0510df3d24b9..1dec11da0eb0 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,7 +1,7 @@
 ; Test that the ffs* library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-LINUX
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -12,21 +12,21 @@ declare i32 @ffsll(i64)
 ; Check ffs(0) -> 0.
 
 define i32 @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i32 @ffs(i32 0)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test_simplify2() {
-; CHECK-LINUX: @test_simplify2
+; CHECK-LINUX-LABEL: @test_simplify2(
   %ret = call i32 @ffsl(i32 0)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 0
 }
 
 define i32 @test_simplify3() {
-; CHECK-LINUX: @test_simplify3
+; CHECK-LINUX-LABEL: @test_simplify3(
   %ret = call i32 @ffsll(i64 0)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 0
@@ -35,63 +35,63 @@ define i32 @test_simplify3() {
 ; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
 
 define i32 @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %ret = call i32 @ffs(i32 1)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 1
 }
 
 define i32 @test_simplify5() {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %ret = call i32 @ffs(i32 2048)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 12
 }
 
 define i32 @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %ret = call i32 @ffs(i32 65536)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 17
 }
 
 define i32 @test_simplify7() {
-; CHECK-LINUX: @test_simplify7
+; CHECK-LINUX-LABEL: @test_simplify7(
   %ret = call i32 @ffsl(i32 65536)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 17
 }
 
 define i32 @test_simplify8() {
-; CHECK-LINUX: @test_simplify8
+; CHECK-LINUX-LABEL: @test_simplify8(
   %ret = call i32 @ffsll(i64 1024)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 11
 }
 
 define i32 @test_simplify9() {
-; CHECK-LINUX: @test_simplify9
+; CHECK-LINUX-LABEL: @test_simplify9(
   %ret = call i32 @ffsll(i64 65536)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 17
 }
 
 define i32 @test_simplify10() {
-; CHECK-LINUX: @test_simplify10
+; CHECK-LINUX-LABEL: @test_simplify10(
   %ret = call i32 @ffsll(i64 17179869184)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 35
 }
 
 define i32 @test_simplify11() {
-; CHECK-LINUX: @test_simplify11
+; CHECK-LINUX-LABEL: @test_simplify11(
   %ret = call i32 @ffsll(i64 281474976710656)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 49
 }
 
 define i32 @test_simplify12() {
-; CHECK-LINUX: @test_simplify12
+; CHECK-LINUX-LABEL: @test_simplify12(
   %ret = call i32 @ffsll(i64 1152921504606846976)
   ret i32 %ret
 ; CHECK-LINUX-NEXT: ret i32 61
@@ -100,7 +100,7 @@ define i32 @test_simplify12() {
 ; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
 
 define i32 @test_simplify13(i32 %x) {
-; CHECK: @test_simplify13
+; CHECK-LABEL: @test_simplify13(
   %ret = call i32 @ffs(i32 %x)
 ; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
 ; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
@@ -111,7 +111,7 @@ define i32 @test_simplify13(i32 %x) {
 }
 
 define i32 @test_simplify14(i32 %x) {
-; CHECK-LINUX: @test_simplify14
+; CHECK-LINUX-LABEL: @test_simplify14(
   %ret = call i32 @ffsl(i32 %x)
 ; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
 ; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
@@ -122,7 +122,7 @@ define i32 @test_simplify14(i32 %x) {
 }
 
 define i32 @test_simplify15(i64 %x) {
-; CHECK-LINUX: @test_simplify15
+; CHECK-LINUX-LABEL: @test_simplify15(
   %ret = call i32 @ffsll(i64 %x)
 ; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
 ; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i64 [[CTTZ]], 1
diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll
new file mode 100644
index 000000000000..26f77a7f702b
--- /dev/null
+++ b/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -0,0 +1,179 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @test1(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @ceil(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
+; CHECK-NEXT: fcmp oeq float %ceilf, %y
+}
+
+define i32 @test2(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @fabs(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
+; CHECK-NEXT: fcmp oeq float %fabsf, %y
+}
+
+define i32 @test3(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @floor(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %floorf = call float @floorf(float %x)
+; CHECK-NEXT: fcmp oeq float %floorf, %y
+}
+
+define i32 @test4(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @nearbyint(double %1) nounwind
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+}
+
+define i32 @test5(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @rint(double %1) nounwind
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: %rintf = call float @rintf(float %x)
+; CHECK-NEXT: fcmp oeq float %rintf, %y
+}
+
+define i32 @test6(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @round(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: %roundf = call float @roundf(float %x)
+; CHECK-NEXT: fcmp oeq float %roundf, %y
+}
+
+define i32 @test7(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = call double @trunc(double %1) nounwind
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: %truncf = call float @truncf(float %x)
+; CHECK-NEXT: fcmp oeq float %truncf, %y
+}
+
+define i32 @test8(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @ceil(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
+; CHECK-NEXT: fcmp oeq float %ceilf, %y
+}
+
+define i32 @test9(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fabs(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
+; CHECK-NEXT: fcmp oeq float %fabsf, %y
+}
+
+define i32 @test10(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @floor(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: %floorf = call float @floorf(float %x)
+; CHECK-NEXT: fcmp oeq float %floorf, %y
+}
+
+define i32 @test11(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @nearbyint(double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+}
+
+define i32 @test12(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @rint(double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %rintf = call float @rintf(float %x)
+; CHECK-NEXT: fcmp oeq float %rintf, %y
+}
+
+define i32 @test13(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @round(double %2) nounwind readnone
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %roundf = call float @roundf(float %x)
+; CHECK-NEXT: fcmp oeq float %roundf, %y
+}
+
+define i32 @test14(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @trunc(double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: %truncf = call float @truncf(float %x)
+; CHECK-NEXT: fcmp oeq float %truncf, %y
+}
+
+declare double @fabs(double) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare double @nearbyint(double) nounwind readnone
+declare double @rint(double) nounwind readnone
+declare double @round(double) nounwind readnone
+declare double @trunc(double) nounwind readnone
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index 3671b4c6991c..402ee52e624a 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -6,7 +6,7 @@ define float @test1(float %x) {
   %mul = fmul float %sub, 2.0e+1
   ret float %mul
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: fmul float %x, -2.000000e+01
 }
 
@@ -16,7 +16,7 @@ define float @test2(float %x) {
   %mul = fmul float %sub, 2.0e+1
   ret float %mul
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: fmul float %x, -2.000000e+01
 }
 
@@ -26,7 +26,7 @@ define float @test3(float %x, float %y) {
   %sub2 = fsub float -0.000000e+00, %y
   %mul = fmul float %sub1, %sub2
   ret float %mul
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: fmul float %x, %y
 }
 
@@ -36,7 +36,7 @@ define float @test4(float %x, float %y) {
   %sub2 = fsub nsz float 0.000000e+00, %y
   %mul = fmul float %sub1, %sub2
   ret float %mul
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: fmul float %x, %y
 }
 
@@ -45,7 +45,7 @@ define float @test5(float %x, float %y) {
   %sub1 = fsub float -0.000000e+00, %x
   %mul = fmul float %sub1, %y
   ret float %mul
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %1 = fmul float %x, %y
 ; CHECK: %mul = fsub float -0.000000e+00, %1
 }
@@ -55,7 +55,7 @@ define float @test6(float %x, float %y) {
   %sub1 = fsub nsz float 0.000000e+00, %x
   %mul = fmul float %sub1, %y
   ret float %mul
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: %1 = fmul float %x, %y
 ; CHECK: %mul = fsub float -0.000000e+00, %1
 }
@@ -67,6 +67,29 @@ define float @test7(float %x, float %y) {
   %mul = fmul float %sub1, %y
   %mul2 = fmul float %mul, %sub1
   ret float %mul2
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: fsub float -0.000000e+00, %x
 }
+
+; Don't crash when attempting to cast a constant FMul to an instruction.
+define void @test8(i32* %inout) {
+entry:
+  %0 = load i32* %inout, align 4
+  %conv = uitofp i32 %0 to float
+  %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float %conv, i32 3
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vecinit
+  %1 = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> zeroinitializer, %1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %local_var_7.0 = phi <4 x float> [ %mul, %entry ], [ %2, %for.body ]
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = insertelement <4 x float> %local_var_7.0, float 0.000000e+00, i32 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/InstCombine/fneg-ext.ll b/test/Transforms/InstCombine/fneg-ext.ll
new file mode 100644
index 000000000000..922d26a465b7
--- /dev/null
+++ b/test/Transforms/InstCombine/fneg-ext.ll
@@ -0,0 +1,23 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK: test1
+define double @test1(float %a, double %b) nounwind readnone ssp uwtable {
+; CHECK-NOT: fsub
+; CHECK: fpext
+; CHECK: fadd
+  %1 = fsub float -0.000000e+00, %a
+  %2 = fpext float %1 to double
+  %3 = fsub double %b, %2
+  ret double %3
+}
+
+; CHECK: test2
+define double @test2(float %a, double %b) nounwind readnone ssp uwtable {
+; CHECK-NOT: fsub
+; CHECK: fpext
+; CHECK: fadd fast
+  %1 = fsub float -0.000000e+00, %a
+  %2 = fpext float %1 to double
+  %3 = fsub fast double %b, %2
+  ret double %3
+}
diff --git a/test/Transforms/InstCombine/fold-bin-operand.ll b/test/Transforms/InstCombine/fold-bin-operand.ll
index a8bad0df5960..d3303262be3f 100644
--- a/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -2,14 +2,14 @@
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define i1 @f(i1 %x) {
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK: ret i1 false
 	%b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
 	ret i1 %b
 }
 
 define i32 @g(i32 %x) {
-; CHECK: @g
+; CHECK-LABEL: @g(
 ; CHECK: ret i32 %x
 	%b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
 	ret i32 %b
diff --git a/test/Transforms/InstCombine/fold-calls.ll b/test/Transforms/InstCombine/fold-calls.ll
index 504f874beaeb..1a9a9fd2e9ee 100644
--- a/test/Transforms/InstCombine/fold-calls.ll
+++ b/test/Transforms/InstCombine/fold-calls.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; This shouldn't fold, because sin(inf) is invalid.
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK:   %t = call double @sin(double 0x7FF0000000000000)
 define double @foo() {
   %t = call double @sin(double 0x7FF0000000000000)
@@ -9,7 +9,7 @@ define double @foo() {
 }
 
 ; This should fold.
-; CHECK: @bar
+; CHECK-LABEL: @bar(
 ; CHECK:   ret double 0.0
 define double @bar() {
   %t = call double @sin(double 0.0)
diff --git a/test/Transforms/InstCombine/fold-fops-into-selects.ll b/test/Transforms/InstCombine/fold-fops-into-selects.ll
new file mode 100644
index 000000000000..07aebb13eff3
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-fops-into-selects.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @test1(i1 %A) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float 0.000000e+00
+  %op = fsub float 1.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test1(
+; CHECK: select i1 %A, float 0.000000e+00, float 1.000000e+00
+}
+
+define float @test2(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fadd float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test2(
+; CHECK: [[OP:%.*]] = fadd float %B, 2.000000e+00
+; CHECK: select i1 %A, float 3.000000e+00, float [[OP]]
+}
+
+define float @test3(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fsub float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test3(
+; CHECK: [[OP:%.*]] = fsub float 2.000000e+00, %B
+; CHECK: select i1 %A, float 1.000000e+00, float [[OP]]
+}
+
+define float @test4(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fmul float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test4(
+; CHECK: [[OP:%.*]] = fmul float %B, 2.000000e+00
+; CHECK: select i1 %A, float 2.000000e+00, float [[OP]]
+}
+
+define float @test5(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fdiv float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test5(
+; CHECK: [[OP:%.*]] = fdiv float 2.000000e+00, %B
+; CHECK: select i1 %A, float 2.000000e+00, float [[OP]]
+}
+
+define float @test6(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fdiv float %cf, 2.000000e+00
+  ret float %op
+; CHECK-LABEL: @test6(
+; CHECK: [[OP:%.*]] = fmul float %B, 5.000000e-01
+; CHECK: select i1 %A, float 5.000000e-01, float [[OP]]
+}
+
+define float @test7(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fdiv float %cf, 3.000000e+00
+  ret float %op
+; CHECK-LABEL: @test7(
+; CHECK: [[OP:%.*]] = fdiv float %B, 3.000000e+00
+; CHECK: select i1 %A, float 0x3FD5555560000000, float [[OP]]
+}
+
diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll
index 2cb970bf4177..b58d9dc90acc 100644
--- a/test/Transforms/InstCombine/fold-vector-select.ll
+++ b/test/Transforms/InstCombine/fold-vector-select.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | not grep select
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: select
 
 define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D,
                  <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H,
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 09f053289dc1..05d1b48d5996 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -31,4 +31,16 @@ define half @test4(float %a) {
   ret half %c
 }
 
+; CHECK: test5
+define half @test5(float %a, float %b, float %c) {
+; CHECK: fcmp ogt
+; CHECK: fptrunc
+; CHECK: select
+; CHECK: half 0xH3C00
+  %d = fcmp ogt float %a, %b
+  %e = select i1 %d, float %c, float 1.0
+  %f = fptrunc float %e to half
+  ret half %f
+}
+
 declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index e1dc191bd700..3f6a314e3472 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the fprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -18,7 +18,7 @@ declare i32 @fprintf(%FILE*, i8*, ...)
 ; Check fprintf(fp, "foo") -> fwrite("foo", 3, 1, fp).
 
 define void @test_simplify1(%FILE* %fp) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
 ; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
@@ -29,7 +29,7 @@ define void @test_simplify1(%FILE* %fp) {
 ; Check fprintf(fp, "%c", chr) -> fputc(chr, fp).
 
 define void @test_simplify2(%FILE* %fp) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8 104)
 ; CHECK-NEXT: call i32 @fputc(i32 104, %FILE* %fp)
@@ -41,7 +41,7 @@ define void @test_simplify2(%FILE* %fp) {
 ; NOTE: The fputs simplifier simplifies this further to fwrite.
 
 define void @test_simplify3(%FILE* %fp) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
   %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8* %str)
@@ -53,7 +53,7 @@ define void @test_simplify3(%FILE* %fp) {
 ; Check fprintf(fp, fmt, ...) -> fiprintf(fp, fmt, ...) if no floating point.
 
 define void @test_simplify4(%FILE* %fp) {
-; CHECK-IPRINTF: @test_simplify4
+; CHECK-IPRINTF-LABEL: @test_simplify4(
   %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i32 187)
 ; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
@@ -62,7 +62,7 @@ define void @test_simplify4(%FILE* %fp) {
 }
 
 define void @test_no_simplify1(%FILE* %fp) {
-; CHECK-IPRINTF: @test_no_simplify1
+; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
 ; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
@@ -71,7 +71,7 @@ define void @test_no_simplify1(%FILE* %fp) {
 }
 
 define void @test_no_simplify2(%FILE* %fp, double %d) {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
   call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double %d)
 ; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double %d)
@@ -80,7 +80,7 @@ define void @test_no_simplify2(%FILE* %fp, double %d) {
 }
 
 define i32 @test_no_simplify3(%FILE* %fp) {
-; CHECK: @test_no_simplify3
+; CHECK-LABEL: @test_no_simplify3(
   %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   %1 = call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
 ; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
diff --git a/test/Transforms/InstCombine/fputs-1.ll b/test/Transforms/InstCombine/fputs-1.ll
index c7c5becfd038..473610e6120a 100644
--- a/test/Transforms/InstCombine/fputs-1.ll
+++ b/test/Transforms/InstCombine/fputs-1.ll
@@ -15,7 +15,7 @@ declare i32 @fputs(i8*, %FILE*)
 ; Check fputs(str, fp) --> fwrite(str, 1, strlen(s), fp).
 
 define void @test_simplify1(%FILE* %fp) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
   call i32 @fputs(i8* %str, %FILE* %fp)
   ret void
@@ -25,7 +25,7 @@ define void @test_simplify1(%FILE* %fp) {
 ; NOTE: The fwrite simplifier simplifies this further to fputc.
 
 define void @test_simplify2(%FILE* %fp) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %str = getelementptr [2 x i8]* @A, i32 0, i32 0
   call i32 @fputs(i8* %str, %FILE* %fp)
 ; CHECK-NEXT: call i32 @fputc(i32 65, %FILE* %fp)
@@ -34,7 +34,7 @@ define void @test_simplify2(%FILE* %fp) {
 }
 
 define void @test_simplify3(%FILE* %fp) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %str = getelementptr [7 x i8]* @hello, i32 0, i32 0
   call i32 @fputs(i8* %str, %FILE* %fp)
 ; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([7 x i8]* @hello, i32 0, i32 0), i32 6, i32 1, %FILE* %fp)
diff --git a/test/Transforms/InstCombine/fwrite-1.ll b/test/Transforms/InstCombine/fwrite-1.ll
index 528cdec217f7..6f9a8e463a8c 100644
--- a/test/Transforms/InstCombine/fwrite-1.ll
+++ b/test/Transforms/InstCombine/fwrite-1.ll
@@ -14,7 +14,7 @@ declare i64 @fwrite(i8*, i64, i64, %FILE *)
 ; Check fwrite(S, 1, 1, fp) -> fputc(S[0], fp).
 
 define void @test_simplify1(%FILE* %fp) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
   call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
 ; CHECK-NEXT: call i32 @fputc(i32 0, %FILE* %fp)
@@ -23,7 +23,7 @@ define void @test_simplify1(%FILE* %fp) {
 }
 
 define void @test_simplify2(%FILE* %fp) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
   call i64 @fwrite(i8* %str, i64 1, i64 0, %FILE* %fp)
   ret void
@@ -31,7 +31,7 @@ define void @test_simplify2(%FILE* %fp) {
 }
 
 define void @test_simplify3(%FILE* %fp) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
   call i64 @fwrite(i8* %str, i64 0, i64 1, %FILE* %fp)
   ret void
@@ -39,7 +39,7 @@ define void @test_simplify3(%FILE* %fp) {
 }
 
 define i64 @test_no_simplify1(%FILE* %fp) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
   %ret = call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
 ; CHECK-NEXT: call i64 @fwrite
@@ -48,7 +48,7 @@ define i64 @test_no_simplify1(%FILE* %fp) {
 }
 
 define void @test_no_simplify2(%FILE* %fp, i64 %size) {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
   call i64 @fwrite(i8* %str, i64 %size, i64 1, %FILE* %fp)
 ; CHECK-NEXT: call i64 @fwrite
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index dfe12dbfaf7f..24c355d817e5 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-pc-win32"
 define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
 ST:
   %A = getelementptr inbounds %myStruct addrspace(1)* %p, i64 0
-  %B = bitcast %myStruct addrspace(1)* %A to %myStruct*
+  %B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
   %C = getelementptr inbounds %myStruct* %B, i32 0, i32 1
   %D = getelementptr inbounds [3 x float]* %C, i32 0, i32 2
   %E = load float* %D, align 4
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index bb07736ef803..c29a7dccb8ee 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "e-p:64:64"
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32"
+
 %intstruct = type { i32 }
 %pair = type { i32, i32 }
 %struct.B = type { double }
@@ -8,20 +9,28 @@ target datalayout = "e-p:64:64"
 
 
 @Global = constant [10 x i8] c"helloworld"
+@Global_as1 = addrspace(1) constant [10 x i8] c"helloworld"
 
 ; Test noop elimination
 define i32* @test1(i32* %I) {
-        %A = getelementptr i32* %I, i64 0 
+        %A = getelementptr i32* %I, i64 0
         ret i32* %A
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32* %I
 }
 
+define i32 addrspace(1)* @test1_as1(i32 addrspace(1)* %I) {
+  %A = getelementptr i32 addrspace(1)* %I, i64 0
+  ret i32 addrspace(1)* %A
+; CHECK-LABEL: @test1_as1(
+; CHECK: ret i32 addrspace(1)* %I
+}
+
 ; Test noop elimination
 define i32* @test2(i32* %I) {
         %A = getelementptr i32* %I
         ret i32* %A
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32* %I
 }
 
@@ -30,52 +39,88 @@ define i32* @test3(i32* %I) {
         %A = getelementptr i32* %I, i64 17
         %B = getelementptr i32* %A, i64 4
         ret i32* %B
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: getelementptr i32* %I, i64 21
 }
 
 ; Test that two getelementptr insts fold
 define i32* @test4({ i32 }* %I) {
-        %A = getelementptr { i32 }* %I, i64 1 
+        %A = getelementptr { i32 }* %I, i64 1
         %B = getelementptr { i32 }* %A, i64 0, i32 0
         ret i32* %B
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: getelementptr { i32 }* %I, i64 1, i32 0
 }
 
 define void @test5(i8 %B) {
         ; This should be turned into a constexpr instead of being an instruction
-        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4 
+        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4
         store i8 %B, i8* %A
         ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8]* @Global, i64 0, i64 4)
 }
 
+define void @test5_as1(i8 %B) {
+        ; This should be turned into a constexpr instead of being an instruction
+        %A = getelementptr [10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4
+        store i8 %B, i8 addrspace(1)* %A
+        ret void
+; CHECK-LABEL: @test5_as1(
+; CHECK: store i8 %B, i8 addrspace(1)* getelementptr inbounds ([10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4)
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT: store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0), align 8
+; CHECK-NEXT: ret void
+  %A = getelementptr %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT: store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2), align 4
+
+; CHECK-NEXT: ret void
+  %A = getelementptr [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
 
 define i32* @test7(i32* %I, i64 %C, i64 %D) {
-        %A = getelementptr i32* %I, i64 %C 
-        %B = getelementptr i32* %A, i64 %D 
+        %A = getelementptr i32* %I, i64 %C
+        %B = getelementptr i32* %A, i64 %D
         ret i32* %B
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: %A.sum = add i64 %C, %D
 ; CHECK: getelementptr i32* %I, i64 %A.sum
 }
 
 define i8* @test8([10 x i32]* %X) {
         ;; Fold into the cast.
-        %A = getelementptr [10 x i32]* %X, i64 0, i64 0 
-        %B = bitcast i32* %A to i8*     
+        %A = getelementptr [10 x i32]* %X, i64 0, i64 0
+        %B = bitcast i32* %A to i8*
         ret i8* %B
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: bitcast [10 x i32]* %X to i8*
 }
 
 define i32 @test9() {
         %A = getelementptr { i32, double }* null, i32 0, i32 1
-        %B = ptrtoint double* %A to i32        
+        %B = ptrtoint double* %A to i32
         ret i32 %B
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: ret i32 8
 }
 
@@ -83,17 +128,17 @@ define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
         %tmp.1 = getelementptr { i32, i32 }* %x, i32 0, i32 1
         %tmp.3 = getelementptr { i32, i32 }* %y, i32 0, i32 1
         ;; seteq x, y
-        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3       
+        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
         ret i1 %tmp.4
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: icmp eq { i32, i32 }* %x, %y
 }
 
 define i1 @test11({ i32, i32 }* %X) {
-        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0 
-        %Q = icmp eq i32* %P, null             
+        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0
+        %Q = icmp eq i32* %P, null
         ret i1 %Q
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK: icmp eq { i32, i32 }* %X, null
 }
 
@@ -105,13 +150,13 @@ entry:
   store i32 10, i32* %g3, align 4
 
   %g4 = getelementptr %struct.A* %a, i32 0, i32 0
-  
+
   %new_a = bitcast %struct.B* %g4 to %struct.A*
 
-  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1	
-  %a_a = load i32* %g5, align 4	
+  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1
+  %a_a = load i32* %g5, align 4
   ret i32 %a_a
-; CHECK:      @test12
+; CHECK-LABEL:      @test12(
 ; CHECK:      getelementptr %struct.A* %a, i64 0, i32 1
 ; CHECK-NEXT: store i32 10, i32* %g3
 ; CHECK-NEXT: ret i32 10
@@ -125,17 +170,77 @@ define i1 @test13(i64 %X, %S* %P) {
         %B = getelementptr inbounds %S* %P, i32 0, i32 0
 	%C = icmp eq i32* %A, %B
 	ret i1 %C
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK:    %C = icmp eq i64 %X, -1
 }
 
-
-@G = external global [3 x i8]      
+define <2 x i1> @test13_vector(<2 x i64> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test13_vector(
+; CHECK-NEXT: shl nuw <2 x i64> %X, <i64 2, i64 2>
+; CHECK-NEXT: add <2 x i64> %A.idx, <i64 4, i64 4>
+; CHECK-NEXT: icmp eq <2 x i64> %A.offs, zeroinitializer
+  %A = getelementptr inbounds <2 x %S*> %P, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i64> %X
+  %B = getelementptr inbounds <2 x %S*> %P, <2 x i64> <i64 0, i64 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_as1(i16 %X, %S addrspace(1)* %P) {
+; CHECK-LABEL: @test13_as1(
+; CHECK-NEXT:  %C = icmp eq i16 %X, -1
+; CHECK-NEXT: ret i1 %C
+  %A = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S addrspace(1)* %P, i16 0, i32 0
+  %C = icmp eq i32 addrspace(1)* %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x %S addrspace(1)*> %P) {
+; CHECK-LABEL: @test13_vector_as1(
+; CHECK-NEXT: shl nuw <2 x i16> %X, <i16 2, i16 2>
+; CHECK-NEXT: add <2 x i16> %A.idx, <i16 4, i16 4>
+; CHECK-NEXT: icmp eq <2 x i16> %A.offs, zeroinitializer
+; CHECK-NEXT: ret <2 x i1>
+  %A = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 1, i32 1>, <2 x i16> %X
+  %B = getelementptr inbounds <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32 addrspace(1)*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_i32(i32 %X, %S* %P) {
+; CHECK-LABEL: @test13_i32(
+; CHECK: %C = icmp eq i32 %X, -1
+  %A = getelementptr inbounds %S* %P, i32 0, i32 1, i32 %X
+  %B = getelementptr inbounds %S* %P, i32 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i16(i16 %X, %S* %P) {
+; CHECK-LABEL: @test13_i16(
+; CHECK: %C = icmp eq i16 %X, -1
+  %A = getelementptr inbounds %S* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S* %P, i16 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i128(i128 %X, %S* %P) {
+; CHECK-LABEL: @test13_i128(
+; CHECK: %C = icmp eq i64 %1, -1
+  %A = getelementptr inbounds %S* %P, i128 0, i32 1, i128 %X
+  %B = getelementptr inbounds %S* %P, i128 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+
+@G = external global [3 x i8]
 define i8* @test14(i32 %Idx) {
         %idx = zext i32 %Idx to i64
         %tmp = getelementptr i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i64 %idx
         ret i8* %tmp
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK: getelementptr [3 x i8]* @G, i64 0, i64 %idx
 }
 
@@ -145,15 +250,15 @@ define i8* @test14(i32 %Idx) {
 define i32 *@test15(i64 %X) {
         %A = getelementptr i32* getelementptr ([40 x i32]* @Array, i64 0, i64 0), i64 %X
         ret i32* %A
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK: getelementptr [40 x i32]* @Array, i64 0, i64 %X
 }
 
 
 define i32* @test16(i32* %X, i32 %Idx) {
-        %R = getelementptr i32* %X, i32 %Idx       
+        %R = getelementptr i32* %X, i32 %Idx
         ret i32* %R
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK: sext i32 %Idx to i64
 }
 
@@ -163,25 +268,74 @@ define i1 @test17(i16* %P, i32 %I, i32 %J) {
         %Y = getelementptr inbounds i16* %P, i32 %J
         %C = icmp ult i16* %X, %Y
         ret i1 %C
-; CHECK: @test17
-; CHECK: %C = icmp slt i32 %I, %J 
+; CHECK-LABEL: @test17(
+; CHECK: %C = icmp slt i32 %I, %J
 }
 
 define i1 @test18(i16* %P, i32 %I) {
         %X = getelementptr inbounds i16* %P, i32 %I
         %C = icmp ult i16* %X, %P
         ret i1 %C
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK: %C = icmp slt i32 %I, 0
 }
 
+; Larger than the pointer size for a non-zero address space
+define i1 @test18_as1(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than the pointer size for a non-zero address space
+define i1 @test18_as1_i32(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1_i32(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than pointer size
+define i1 @test18_i16(i16* %P, i16 %I) {
+; CHECK-LABEL: @test18_i16(
+; CHECK: %C = icmp slt i16 %I, 0
+  %X = getelementptr inbounds i16* %P, i16 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Same as pointer size
+define i1 @test18_i64(i16* %P, i64 %I) {
+; CHECK-LABEL: @test18_i64(
+; CHECK: %C = icmp slt i64 %I, 0
+  %X = getelementptr inbounds i16* %P, i64 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Larger than the pointer size
+define i1 @test18_i128(i16* %P, i128 %I) {
+; CHECK-LABEL: @test18_i128(
+; CHECK: %C = icmp slt i64 %1, 0
+  %X = getelementptr inbounds i16* %P, i128 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
 define i32 @test19(i32* %P, i32 %A, i32 %B) {
         %tmp.4 = getelementptr inbounds i32* %P, i32 %A
         %tmp.9 = getelementptr inbounds i32* %P, i32 %B
         %tmp.10 = icmp eq i32* %tmp.4, %tmp.9
         %tmp.11 = zext i1 %tmp.10 to i32
         ret i32 %tmp.11
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK: icmp eq i32 %A, %B
 }
 
@@ -190,10 +344,19 @@ define i32 @test20(i32* %P, i32 %A, i32 %B) {
         %tmp.6 = icmp eq i32* %tmp.4, %P
         %tmp.7 = zext i1 %tmp.6 to i32
         ret i32 %tmp.7
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK: icmp eq i32 %A, 0
 }
 
+define i32 @test20_as1(i32 addrspace(1)* %P, i32 %A, i32 %B) {
+  %tmp.4 = getelementptr inbounds i32 addrspace(1)* %P, i32 %A
+  %tmp.6 = icmp eq i32 addrspace(1)* %tmp.4, %P
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+; CHECK-LABEL: @test20_as1(
+; CHECK: icmp eq i16 %1, 0
+}
+
 
 define i32 @test21() {
         %pbob1 = alloca %intstruct
@@ -201,7 +364,7 @@ define i32 @test21() {
         %pbobel = getelementptr %intstruct* %pbob2, i64 0, i32 0
         %rval = load i32* %pbobel
         ret i32 %rval
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK: getelementptr %intstruct* %pbob1, i64 0, i32 0
 }
 
@@ -210,10 +373,10 @@ define i32 @test21() {
 @B = global i32 2               ; <i32*> [#uses=1]
 
 define i1 @test22() {
-        %C = icmp ult i32* getelementptr (i32* @A, i64 1), 
-                           getelementptr (i32* @B, i64 2) 
+        %C = icmp ult i32* getelementptr (i32* @A, i64 1),
+                           getelementptr (i32* @B, i64 2)
         ret i1 %C
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 ; CHECK: icmp ult (i32* getelementptr inbounds (i32* @A, i64 1), i32* getelementptr (i32* @B, i64 2))
 }
 
@@ -224,7 +387,7 @@ define i1 @test23() {
         %A = getelementptr %X* null, i64 0, i32 0, i64 0                ; <i32*> [#uses=1]
         %B = icmp ne i32* %A, null              ; <i1> [#uses=1]
         ret i1 %B
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK: ret i1 false
 }
 
@@ -239,7 +402,7 @@ entry:
         %tmp27.i = sext i32 %sext to i64                ; <i64> [#uses=1]
         tail call void @foo25( i32 0, i64 %tmp27.i )
         unreachable
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 }
 
 declare void @foo25(i32, i64)
@@ -251,7 +414,7 @@ define i1 @test26(i8* %arr) {
         %Y = getelementptr i8* %arr, i32 1
         %test = icmp uge i8* %X, %Y
         ret i1 %test
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; CHECK: ret i1 true
 }
 
@@ -262,48 +425,48 @@ define i1 @test26(i8* %arr) {
 
 define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
 entry:
-	%from_addr = alloca %struct.siginfo_t*	
-	%tmp344 = load %struct.siginfo_t** %from_addr, align 8	
+	%from_addr = alloca %struct.siginfo_t*
+	%tmp344 = load %struct.siginfo_t** %from_addr, align 8
 	%tmp345 = getelementptr %struct.siginfo_t* %tmp344, i32 0, i32 3
 	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
-	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*	
+	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*
 	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
 	%tmp349 = getelementptr %struct.sigval_t* %tmp348, i32 0, i32 0
 	%tmp349350 = bitcast i8** %tmp349 to i32*
-	%tmp351 = load i32* %tmp349350, align 8	
+	%tmp351 = load i32* %tmp349350, align 8
 	%tmp360 = call i32 asm sideeffect "...",
         "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
          %struct.__large_struct* null, i32 -14, i32 0 )
 	unreachable
-; CHECK: @test27
+; CHECK-LABEL: @test27(
 }
 
 ; PR1978
 	%struct.x = type <{ i8 }>
-@.str = internal constant [6 x i8] c"Main!\00"	
-@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"	
+@.str = internal constant [6 x i8] c"Main!\00"
+@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"
 
 define i32 @test28() nounwind  {
 entry:
 	%orientations = alloca [1 x [1 x %struct.x]]
-	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind 
+	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind
 	%tmp45 = getelementptr inbounds [1 x [1 x %struct.x]]* %orientations, i32 1, i32 0, i32 0
 	%orientations62 = getelementptr [1 x [1 x %struct.x]]* %orientations, i32 0, i32 0, i32 0
 	br label %bb10
 
 bb10:
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb10 ]
-	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1	
-	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1	
+	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1
+	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1
 	%tmp12 = getelementptr inbounds %struct.x* %tmp45, i32 %tmp12.rec
 	%tmp16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
 	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
 	%indvar.next = add i32 %indvar, 1
 	br i1 %tmp84, label %bb17, label %bb10
 
-bb17:	
+bb17:
 	ret i32 0
-; CHECK: @test28
+; CHECK-LABEL: @test28(
 ; CHECK: icmp eq i32 %indvar, 0
 }
 
@@ -318,7 +481,7 @@ declare i32 @printf(i8*, ...)
 	%T = type <{ i64, i64, i64 }>
 define i32 @test29(i8* %start, i32 %X) nounwind {
 entry:
-	%tmp3 = load i64* null		
+	%tmp3 = load i64* null
 	%add.ptr = getelementptr i8* %start, i64 %tmp3
 	%tmp158 = load i32* null
 	%add.ptr159 = getelementptr %T* null, i32 %tmp158
@@ -332,7 +495,7 @@ if.then216:
 
 if.end363:
 	ret i32 0
-; CHECK: @test29
+; CHECK-LABEL: @test29(
 }
 
 
@@ -345,7 +508,7 @@ entry:
 	%2 = getelementptr [0 x i32]* %1, i32 0, i32 %m
 	%3 = load i32* %2, align 4
 	ret i32 %3
-; CHECK: @test30
+; CHECK-LABEL: @test30(
 ; CHECK: getelementptr i32
 }
 
@@ -356,9 +519,9 @@ declare void @test30f(i32*)
 define i1 @test31(i32* %A) {
         %B = getelementptr i32* %A, i32 1
         %C = getelementptr i32* %A, i64 1
-        %V = icmp eq i32* %B, %C 
+        %V = icmp eq i32* %B, %C
         ret i1 %V
-; CHECK: @test31
+; CHECK-LABEL: @test31(
 ; CHECK: ret i1 true
 }
 
@@ -372,10 +535,10 @@ define i8* @test32(i8* %v) {
 	%D = getelementptr { [16 x i8] }* %C, i32 0, i32 0, i32 8
 	%E = bitcast i8* %D to i8**
 	store i8* %v, i8** %E
-	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2	
+	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2
 	%G = load i8** %F
 	ret i8* %G
-; CHECK: @test32
+; CHECK-LABEL: @test32(
 ; CHECK: %D = getelementptr [4 x i8*]* %A, i64 0, i64 1
 ; CHECK: %F = getelementptr [4 x i8*]* %A, i64 0, i64 2
 }
@@ -384,27 +547,50 @@ define i8* @test32(i8* %v) {
 %struct.Key = type { { i32, i32 } }
 %struct.anon = type <{ i8, [3 x i8], i32 }>
 
-define i32 *@test33(%struct.Key *%A) {
-	%B = bitcast %struct.Key* %A to %struct.anon*
-        %C = getelementptr %struct.anon* %B, i32 0, i32 2 
-	ret i32 *%C
-; CHECK: @test33
+define i32* @test33(%struct.Key* %A) {
+; CHECK-LABEL: @test33(
 ; CHECK: getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+  %B = bitcast %struct.Key* %A to %struct.anon*
+  %C = getelementptr %struct.anon* %B, i32 0, i32 2
+  ret i32* %C
 }
 
+define i32 addrspace(1)* @test33_as1(%struct.Key addrspace(1)* %A) {
+; CHECK-LABEL: @test33_as1(
+; CHECK: getelementptr %struct.Key addrspace(1)* %A, i16 0, i32 0, i32 1
+  %B = bitcast %struct.Key addrspace(1)* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
+define i32 addrspace(1)* @test33_array_as1([10 x i32] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_as1(
+; CHECK: getelementptr [10 x i32] addrspace(1)* %A, i16 0, i16 2
+  %B = bitcast [10 x i32] addrspace(1)* %A to [5 x i32] addrspace(1)*
+  %C = getelementptr [5 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+; Make sure the GEP indices use the right pointer sized integer
+define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_struct_as1(
+; CHECK: getelementptr [10 x %struct.Key] addrspace(1)* %A, i16 0, i16 1, i32 0, i32 0
+  %B = bitcast [10 x %struct.Key] addrspace(1)* %A to [20 x i32] addrspace(1)*
+  %C = getelementptr [20 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
 
 	%T2 = type { i8*, i8 }
 define i8* @test34(i8* %Val, i64 %V) nounwind {
 entry:
-	%A = alloca %T2, align 8	
+	%A = alloca %T2, align 8
 	%mrv_gep = bitcast %T2* %A to i64*
 	%B = getelementptr %T2* %A, i64 0, i32 0
-        
+
       	store i64 %V, i64* %mrv_gep
 	%C = load i8** %B, align 8
 	ret i8* %C
-; CHECK: @test34
+; CHECK-LABEL: @test34(
 ; CHECK: %V.c = inttoptr i64 %V to i8*
 ; CHECK: ret i8* %V.c
 }
@@ -423,7 +609,7 @@ define i32 @test35() nounwind {
   call i32 (i8*, ...)* @printf(i8* getelementptr ([17 x i8]* @"\01LC8", i32 0, i32 0),
              i8* getelementptr (%t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
   ret i32 0
-; CHECK: @test35
+; CHECK-LABEL: @test35(
 ; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
 }
 
@@ -434,14 +620,14 @@ define i32 @test35() nounwind {
 
 define i8* @test36() nounwind {
   ret i8* getelementptr ([11 x i8]* @array, i32 0, i64 -1)
-; CHECK: @test36
+; CHECK-LABEL: @test36(
 ; CHECK: ret i8* getelementptr ([11 x i8]* @array, i64 1676976733973595601, i64 4)
 }
 
 ; Instcombine shouldn't assume that gep(A,0,1) != gep(A,1,0).
 @A37 = external constant [1 x i8]
 define i1 @test37() nounwind {
-; CHECK: @test37
+; CHECK-LABEL: @test37(
 ; CHECK: ret i1 true
   %t = icmp eq i8* getelementptr ([1 x i8]* @A37, i64 0, i64 1),
                    getelementptr ([1 x i8]* @A37, i64 1, i64 0)
@@ -452,7 +638,7 @@ define i1 @test37() nounwind {
 define i32* @test38(i32* %I, i32 %n) {
         %A = getelementptr i32* %I, i32 %n
         ret i32* %A
-; CHECK: @test38
+; CHECK-LABEL: @test38(
 ; CHECK: = sext i32 %n to i64
 ; CHECK: %A = getelementptr i32* %I, i64 %
 }
@@ -469,7 +655,7 @@ entry:
   call void @pr10322_f3(i8** %tmp2) nounwind
   ret void
 
-; CHECK: @pr10322_f1
+; CHECK-LABEL: @pr10322_f1(
 ; CHECK: %tmp2 = getelementptr inbounds %pr10322_t* %arrayidx8, i64 0, i32 0
 }
 
@@ -485,7 +671,7 @@ define void @three_gep_f(%three_gep_t2* %x) {
   %gep3 = getelementptr %three_gep_t* %gep2, i64 0, i32 0
   call void @three_gep_g(i32* %gep3)
 
-; CHECK: @three_gep_f
+; CHECK-LABEL: @three_gep_f(
 ; CHECK: %gep3 = getelementptr %three_gep_t2* %gep1, i64 0, i32 0, i32 0
   ret void
 }
@@ -504,9 +690,103 @@ define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
   store i8 %arg1, i8* %tmp4, align 8
   ret void
 
-; CHECK: @test39
+; CHECK-LABEL: @test39(
 ; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
 ; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
 }
 
+define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
+  %c = getelementptr [1 x i8]* %a, i32 0, i32 0
+  %d = getelementptr [1 x i8]* %b, i32 0, i32 0
+  %cmp = icmp ult i8* %c, %d
+  ret i1 %cmp
+
+; CHECK-LABEL: @pr16483(
+; CHECK-NEXT: icmp ult  [1 x i8]* %a, %b
+}
+
+define i8 @test_gep_bitcast_as1(i32 addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_as1(
+; CHECK: getelementptr i32 addrspace(1)* %arr, i16 %N
+; CHECK: bitcast
+  %cast = bitcast i32 addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 4
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+; The element size of the array matches the element size of the pointer
+define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i64*
+  %V = mul i64 %N, 8
+  %t = getelementptr i64* %cast, i64 %V
+  %x = load i64* %t
+  ret i64 %x
+}
+
+; The element size of the array is different the element size of the pointer
+define i8 @test_gep_bitcast_array_different_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element(
+; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i8*
+  %V = mul i64 %N, 8
+  %t = getelementptr i8* %cast, i64 %V
+  %x = load i8* %t
+  ret i8 %x
+}
+
+define i64 @test_gep_bitcast_array_same_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i64 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i64 addrspace(1)* %cast, i16 %V
+  %x = load i64 addrspace(1)* %t
+  ret i64 %x
+}
+
+define i8 @test_gep_bitcast_array_different_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element_as1(
+; CHECK: getelementptr [100 x double] addrspace(1)* %arr, i16 0, i16 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i8 addrspace(1)* %cast, i16 %V
+  %x = load i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+define i64 @test40() {
+  %array = alloca [3 x i32], align 4
+  %gep = getelementptr inbounds [3 x i32]* %array, i64 0, i64 2
+  %gepi8 = bitcast i32* %gep to i8*
+  %p = ptrtoint [3 x i32]* %array to i64
+  %np = sub i64 0, %p
+  %gep2 = getelementptr i8* %gepi8, i64 %np
+  %ret = ptrtoint i8* %gep2 to i64
+  ret i64 %ret
+
+; CHECK-LABEL: @test40
+; CHECK-NEXT: ret i64 8
+}
+
+define i16 @test41([3 x i32] addrspace(1)* %array) {
+  %gep = getelementptr inbounds [3 x i32] addrspace(1)* %array, i16 0, i16 2
+  %gepi8 = bitcast i32 addrspace(1)* %gep to i8 addrspace(1)*
+  %p = ptrtoint [3 x i32] addrspace(1)* %array to i16
+  %np = sub i16 0, %p
+  %gep2 = getelementptr i8 addrspace(1)* %gepi8, i16 %np
+  %ret = ptrtoint i8 addrspace(1)* %gep2 to i16
+  ret i16 %ret
+
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: ret i16 8
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
new file mode 100644
index 000000000000..d5d8cbc8c26e
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -0,0 +1,152 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+
+define i1 @masked_and_notallzeroes(i32 %A) {
+; CHECK-LABEL: @masked_and_notallzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 0
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notallones(i32 %A) {
+; CHECK-LABEL: @masked_and_notallones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp ne i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 39
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allones(i32 %A) {
+; CHECK-LABEL: @masked_or_allones
+; CHECK: [[MASK:%.*]] = and i32 %A, 7
+; CHECK: icmp eq i32 [[MASK]], 7
+; CHECK-NOT: and i32 %A, 39
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 7
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 39
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notA(i32 %A) {
+; CHECK-LABEL: @masked_and_notA
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp ne i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, %A
+
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_A(i32 %A) {
+; CHECK-LABEL: @masked_or_A
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], %A
+; CHECK-NOT: and i32 %A, 7
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, %A
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, %A
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes_notoptimised(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes_notoptimised
+; CHECK: [[MASK:%.*]] = and i32 %A, 15
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: [[MASK:%.*]] = and i32 %A, 39
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK: ret i1
+
+  %mask1 = and i32 %A, 15
+  %tst1 = icmp eq i32 %mask1, 0
+
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @nomask_lhs(i32 %in) {
+; CHECK-LABEL: @nomask_lhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %tst1 = icmp eq i32 %in, 0
+
+  %masked = and i32 %in, 1
+  %tst2 = icmp eq i32 %masked, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
+
+
+define i1 @nomask_rhs(i32 %in) {
+; CHECK-LABEL: @nomask_rhs
+; CHECK: [[MASK:%.*]] = and i32 %in, 1
+; CHECK: icmp eq i32 [[MASK]], 0
+; CHECK-NOT: icmp
+; CHECK: ret i1
+  %masked = and i32 %in, 1
+  %tst1 = icmp eq i32 %masked, 0
+
+  %tst2 = icmp eq i32 %in, 0
+
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index c912a576c3d2..12a4744cc0fe 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,14 +1,14 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout =
-"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+"e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32 %X) {
 entry:
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
         ret i32 %1
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: lshr i32 %X, 31
 ; CHECK-NEXT: ret i32
 }
@@ -18,7 +18,7 @@ entry:
         icmp ult i32 %X, -2147483648            ; <i1>:0 [#uses=1]
         zext i1 %0 to i32               ; <i32>:1 [#uses=1]
         ret i32 %1
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: lshr i32 %X, 31
 ; CHECK-NEXT: xor i32
 ; CHECK-NEXT: ret i32
@@ -29,7 +29,7 @@ entry:
         icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
         sext i1 %0 to i32               ; <i32>:1 [#uses=1]
         ret i32 %1
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ashr i32 %X, 31
 ; CHECK-NEXT: ret i32
 }
@@ -39,7 +39,7 @@ entry:
         icmp ult i32 %X, -2147483648            ; <i1>:0 [#uses=1]
         sext i1 %0 to i32               ; <i32>:1 [#uses=1]
         ret i32 %1
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ashr i32 %X, 31
 ; CHECK-NEXT: xor i32
 ; CHECK-NEXT: ret i32
@@ -50,7 +50,7 @@ define <2 x i1> @test5(<2 x i64> %x) {
 entry:
   %V = icmp eq <2 x i64> %x, undef
   ret <2 x i1> %V
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: ret <2 x i1> <i1 true, i1 true>
 }
 
@@ -60,7 +60,7 @@ define i32 @test6(i32 %a, i32 %b) {
         %e = sub i32 0, %d
         %f = and i32 %e, %b
         ret i32 %f
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: ashr i32 %a, 31
 ; CHECK-NEXT: %f = and i32 %e, %b
 ; CHECK-NEXT: ret i32 %f
@@ -72,37 +72,36 @@ entry:
   %a = add i32 %x, -1
   %b = icmp ult i32 %a, %x
   ret i1 %b
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: %b = icmp ne i32 %x, 0
 ; CHECK: ret i1 %b
 }
 
 define i1 @test8(i32 %x){
 entry:
-  %a = add i32 %x, -1 
+  %a = add i32 %x, -1
   %b = icmp eq i32 %a, %x
   ret i1 %b
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: ret i1 false
 }
 
 define i1 @test9(i32 %x)  {
 entry:
   %a = add i32 %x, -2
-  %b = icmp ugt i32 %x, %a 
+  %b = icmp ugt i32 %x, %a
   ret i1 %b
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: icmp ugt i32 %x, 1
 ; CHECK: ret i1 %b
 }
 
 define i1 @test10(i32 %x){
 entry:
-  %a = add i32 %x, -1      
-  %b = icmp slt i32 %a, %x 
+  %a = add i32 %x, -1
+  %b = icmp slt i32 %a, %x
   ret i1 %b
-  
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: %b = icmp ne i32 %x, -2147483648
 ; CHECK: ret i1 %b
 }
@@ -111,7 +110,7 @@ define i1 @test11(i32 %x) {
   %a = add nsw i32 %x, 8
   %b = icmp slt i32 %x, %a
   ret i1 %b
-; CHECK: @test11  
+; CHECK-LABEL: @test11(
 ; CHECK: ret i1 true
 }
 
@@ -120,7 +119,7 @@ define i1 @test12(i1 %A) {
   %S = select i1 %A, i64 -4294967295, i64 8589934591
   %B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
   ret i1 %B
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NEXT: = xor i1 %A, true
 ; CHECK-NEXT: ret i1
 }
@@ -130,7 +129,7 @@ define i1 @test13(i8 %X) nounwind readnone {
 entry:
         %cmp = icmp slt i8 undef, %X
         ret i1 %cmp
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK: ret i1 false
 }
 
@@ -138,7 +137,7 @@ define i1 @test14(i8 %X) nounwind readnone {
 entry:
         %cmp = icmp slt i8 undef, -128
         ret i1 %cmp
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK: ret i1 false
 }
 
@@ -146,7 +145,7 @@ define i1 @test15() nounwind readnone {
 entry:
         %cmp = icmp eq i8 undef, -128
         ret i1 %cmp
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK: ret i1 undef
 }
 
@@ -154,7 +153,7 @@ define i1 @test16() nounwind readnone {
 entry:
         %cmp = icmp ne i8 undef, -128
         ret i1 %cmp
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK: ret i1 undef
 }
 
@@ -163,7 +162,7 @@ define i1 @test17(i32 %x) nounwind {
   %and = and i32 %shl, 8
   %cmp = icmp eq i32 %and, 0
   ret i1 %cmp
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
 }
 
@@ -173,7 +172,7 @@ define i1 @test18(i32 %x) nounwind {
   %and = and i32 %sh, 1
   %cmp = icmp eq i32 %and, 0
   ret i1 %cmp
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
 }
 
@@ -182,7 +181,7 @@ define i1 @test19(i32 %x) nounwind {
   %and = and i32 %shl, 8
   %cmp = icmp eq i32 %and, 8
   ret i1 %cmp
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
 }
 
@@ -191,12 +190,12 @@ define i1 @test20(i32 %x) nounwind {
   %and = and i32 %shl, 8
   %cmp = icmp ne i32 %and, 0
   ret i1 %cmp
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
 }
 
 define i1 @test21(i8 %x, i8 %y) {
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK-NOT: or i8
 ; CHECK: icmp ugt
   %A = or i8 %x, 1
@@ -205,7 +204,7 @@ define i1 @test21(i8 %x, i8 %y) {
 }
 
 define i1 @test22(i8 %x, i8 %y) {
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 ; CHECK-NOT: or i8
 ; CHECK: icmp ult
   %A = or i8 %x, 1
@@ -214,7 +213,7 @@ define i1 @test22(i8 %x, i8 %y) {
 }
 
 ; PR2740
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK: icmp sgt i32 %x, 1328634634
 define i1 @test23(i32 %x) nounwind {
 	%i3 = sdiv i32 %x, -1328634635
@@ -225,7 +224,7 @@ define i1 @test23(i32 %x) nounwind {
 @X = global [1000 x i32] zeroinitializer
 
 ; PR8882
-; CHECK: @test24
+; CHECK-LABEL: @test24(
 ; CHECK:    %cmp = icmp eq i64 %i, 1000
 ; CHECK:   ret i1 %cmp
 define i1 @test24(i64 %i) {
@@ -234,7 +233,19 @@ define i1 @test24(i64 %i) {
   ret i1 %cmp
 }
 
-; CHECK: @test25
+@X_as1 = addrspace(1) global [1000 x i32] zeroinitializer
+
+; CHECK: @test24_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: %cmp = icmp eq i16 %1, 1000
+; CHECK: ret i1 %cmp
+define i1 @test24_as1(i64 %i) {
+  %p1 = getelementptr inbounds i32 addrspace(1)* getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32 addrspace(1)* %p1, getelementptr inbounds ([1000 x i32] addrspace(1)* @X_as1, i64 1, i64 0)
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @test25(
 ; X + Z > Y + Z -> X > Y if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %x, %y
 ; CHECK: ret i1 %c
@@ -245,7 +256,7 @@ define i1 @test25(i32 %x, i32 %y, i32 %z) {
   ret i1 %c
 }
 
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; X + Z > Y + Z -> X > Y if there is no overflow.
 ; CHECK: %c = icmp ugt i32 %x, %y
 ; CHECK: ret i1 %c
@@ -256,7 +267,7 @@ define i1 @test26(i32 %x, i32 %y, i32 %z) {
   ret i1 %c
 }
 
-; CHECK: @test27
+; CHECK-LABEL: @test27(
 ; X - Z > Y - Z -> X > Y if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %x, %y
 ; CHECK: ret i1 %c
@@ -267,7 +278,7 @@ define i1 @test27(i32 %x, i32 %y, i32 %z) {
   ret i1 %c
 }
 
-; CHECK: @test28
+; CHECK-LABEL: @test28(
 ; X - Z > Y - Z -> X > Y if there is no overflow.
 ; CHECK: %c = icmp ugt i32 %x, %y
 ; CHECK: ret i1 %c
@@ -278,7 +289,7 @@ define i1 @test28(i32 %x, i32 %y, i32 %z) {
   ret i1 %c
 }
 
-; CHECK: @test29
+; CHECK-LABEL: @test29(
 ; X + Y > X -> Y > 0 if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %y, 0
 ; CHECK: ret i1 %c
@@ -288,7 +299,7 @@ define i1 @test29(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test30
+; CHECK-LABEL: @test30(
 ; X + Y > X -> Y > 0 if there is no overflow.
 ; CHECK: %c = icmp ne i32 %y, 0
 ; CHECK: ret i1 %c
@@ -298,7 +309,7 @@ define i1 @test30(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test31
+; CHECK-LABEL: @test31(
 ; X > X + Y -> 0 > Y if there is no overflow.
 ; CHECK: %c = icmp slt i32 %y, 0
 ; CHECK: ret i1 %c
@@ -308,7 +319,7 @@ define i1 @test31(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test32
+; CHECK-LABEL: @test32(
 ; X > X + Y -> 0 > Y if there is no overflow.
 ; CHECK: ret i1 false
 define i1 @test32(i32 %x, i32 %y) {
@@ -317,7 +328,7 @@ define i1 @test32(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test33
+; CHECK-LABEL: @test33(
 ; X - Y > X -> 0 > Y if there is no overflow.
 ; CHECK: %c = icmp slt i32 %y, 0
 ; CHECK: ret i1 %c
@@ -327,7 +338,7 @@ define i1 @test33(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test34
+; CHECK-LABEL: @test34(
 ; X - Y > X -> 0 > Y if there is no overflow.
 ; CHECK: ret i1 false
 define i1 @test34(i32 %x, i32 %y) {
@@ -336,7 +347,7 @@ define i1 @test34(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test35
+; CHECK-LABEL: @test35(
 ; X > X - Y -> Y > 0 if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %y, 0
 ; CHECK: ret i1 %c
@@ -346,7 +357,7 @@ define i1 @test35(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test36
+; CHECK-LABEL: @test36(
 ; X > X - Y -> Y > 0 if there is no overflow.
 ; CHECK: %c = icmp ne i32 %y, 0
 ; CHECK: ret i1 %c
@@ -356,7 +367,7 @@ define i1 @test36(i32 %x, i32 %y) {
   ret i1 %c
 }
 
-; CHECK: @test37
+; CHECK-LABEL: @test37(
 ; X - Y > X - Z -> Z > Y if there is no overflow.
 ; CHECK: %c = icmp sgt i32 %z, %y
 ; CHECK: ret i1 %c
@@ -367,7 +378,7 @@ define i1 @test37(i32 %x, i32 %y, i32 %z) {
   ret i1 %c
 }
 
-; CHECK: @test38
+; CHECK-LABEL: @test38(
 ; X - Y > X - Z -> Z > Y if there is no overflow.
 ; CHECK: %c = icmp ugt i32 %z, %y
 ; CHECK: ret i1 %c
@@ -379,7 +390,7 @@ define i1 @test38(i32 %x, i32 %y, i32 %z) {
 }
 
 ; PR9343 #1
-; CHECK: @test39
+; CHECK-LABEL: @test39(
 ; CHECK: %B = icmp eq i32 %X, 0
 define i1 @test39(i32 %X, i32 %Y) {
   %A = ashr exact i32 %X, %Y
@@ -387,7 +398,7 @@ define i1 @test39(i32 %X, i32 %Y) {
   ret i1 %B
 }
 
-; CHECK: @test40
+; CHECK-LABEL: @test40(
 ; CHECK: %B = icmp ne i32 %X, 0
 define i1 @test40(i32 %X, i32 %Y) {
   %A = lshr exact i32 %X, %Y
@@ -396,7 +407,7 @@ define i1 @test40(i32 %X, i32 %Y) {
 }
 
 ; PR9343 #3
-; CHECK: @test41
+; CHECK-LABEL: @test41(
 ; CHECK: ret i1 true
 define i1 @test41(i32 %X, i32 %Y) {
   %A = urem i32 %X, %Y
@@ -404,7 +415,7 @@ define i1 @test41(i32 %X, i32 %Y) {
   ret i1 %B
 }
 
-; CHECK: @test42
+; CHECK-LABEL: @test42(
 ; CHECK: %B = icmp sgt i32 %Y, -1
 define i1 @test42(i32 %X, i32 %Y) {
   %A = srem i32 %X, %Y
@@ -412,7 +423,7 @@ define i1 @test42(i32 %X, i32 %Y) {
   ret i1 %B
 }
 
-; CHECK: @test43
+; CHECK-LABEL: @test43(
 ; CHECK: %B = icmp slt i32 %Y, 0
 define i1 @test43(i32 %X, i32 %Y) {
   %A = srem i32 %X, %Y
@@ -420,7 +431,7 @@ define i1 @test43(i32 %X, i32 %Y) {
   ret i1 %B
 }
 
-; CHECK: @test44
+; CHECK-LABEL: @test44(
 ; CHECK: %B = icmp sgt i32 %Y, -1
 define i1 @test44(i32 %X, i32 %Y) {
   %A = srem i32 %X, %Y
@@ -428,7 +439,7 @@ define i1 @test44(i32 %X, i32 %Y) {
   ret i1 %B
 }
 
-; CHECK: @test45
+; CHECK-LABEL: @test45(
 ; CHECK: %B = icmp slt i32 %Y, 0
 define i1 @test45(i32 %X, i32 %Y) {
   %A = srem i32 %X, %Y
@@ -437,7 +448,7 @@ define i1 @test45(i32 %X, i32 %Y) {
 }
 
 ; PR9343 #4
-; CHECK: @test46
+; CHECK-LABEL: @test46(
 ; CHECK: %C = icmp ult i32 %X, %Y
 define i1 @test46(i32 %X, i32 %Y, i32 %Z) {
   %A = ashr exact i32 %X, %Z
@@ -447,7 +458,7 @@ define i1 @test46(i32 %X, i32 %Y, i32 %Z) {
 }
 
 ; PR9343 #5
-; CHECK: @test47
+; CHECK-LABEL: @test47(
 ; CHECK: %C = icmp ugt i32 %X, %Y
 define i1 @test47(i32 %X, i32 %Y, i32 %Z) {
   %A = ashr exact i32 %X, %Z
@@ -457,7 +468,7 @@ define i1 @test47(i32 %X, i32 %Y, i32 %Z) {
 }
 
 ; PR9343 #8
-; CHECK: @test48
+; CHECK-LABEL: @test48(
 ; CHECK: %C = icmp eq i32 %X, %Y
 define i1 @test48(i32 %X, i32 %Y, i32 %Z) {
   %A = sdiv exact i32 %X, %Z
@@ -467,17 +478,17 @@ define i1 @test48(i32 %X, i32 %Y, i32 %Z) {
 }
 
 ; PR8469
-; CHECK: @test49
+; CHECK-LABEL: @test49(
 ; CHECK: ret <2 x i1> <i1 true, i1 true>
 define <2 x i1> @test49(<2 x i32> %tmp3) {
 entry:
   %tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
   %cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
-  ret <2 x i1> %cmp  
+  ret <2 x i1> %cmp
 }
 
 ; PR9343 #7
-; CHECK: @test50
+; CHECK-LABEL: @test50(
 ; CHECK: ret i1 true
 define i1 @test50(i16 %X, i32 %Y) {
   %A = zext i16 %X to i32
@@ -486,7 +497,7 @@ define i1 @test50(i16 %X, i32 %Y) {
   ret i1 %C
 }
 
-; CHECK: @test51
+; CHECK-LABEL: @test51(
 ; CHECK: ret i1 %C
 define i1 @test51(i32 %X, i32 %Y) {
   %A = and i32 %X, 2147483648
@@ -495,7 +506,7 @@ define i1 @test51(i32 %X, i32 %Y) {
   ret i1 %C
 }
 
-; CHECK: @test52
+; CHECK-LABEL: @test52(
 ; CHECK-NEXT: and i32 %x1, 16711935
 ; CHECK-NEXT: icmp eq i32 {{.*}}, 4980863
 ; CHECK-NEXT: ret i1
@@ -511,18 +522,18 @@ define i1 @test52(i32 %x1) nounwind {
 }
 
 ; PR9838
-; CHECK: @test53
-; CHECK-NEXT: ashr exact
-; CHECK-NEXT: ashr
+; CHECK-LABEL: @test53(
+; CHECK-NEXT: sdiv exact
+; CHECK-NEXT: sdiv
 ; CHECK-NEXT: icmp
 define i1 @test53(i32 %a, i32 %b) nounwind {
- %x = ashr exact i32 %a, 30
- %y = ashr i32 %b, 30
+ %x = sdiv exact i32 %a, 30
+ %y = sdiv i32 %b, 30
  %z = icmp eq i32 %x, %y
  ret i1 %z
 }
 
-; CHECK: @test54
+; CHECK-LABEL: @test54(
 ; CHECK-NEXT: %and = and i8 %a, -64
 ; CHECK-NEXT: icmp eq i8 %and, -128
 define i1 @test54(i8 %a) nounwind {
@@ -532,7 +543,7 @@ define i1 @test54(i8 %a) nounwind {
   ret i1 %ret
 }
 
-; CHECK: @test55
+; CHECK-LABEL: @test55(
 ; CHECK-NEXT: icmp eq i32 %a, -123
 define i1 @test55(i32 %a) {
   %sub = sub i32 0, %a
@@ -540,7 +551,7 @@ define i1 @test55(i32 %a) {
   ret i1 %cmp
 }
 
-; CHECK: @test56
+; CHECK-LABEL: @test56(
 ; CHECK-NEXT: icmp eq i32 %a, -113
 define i1 @test56(i32 %a) {
   %sub = sub i32 10, %a
@@ -550,7 +561,7 @@ define i1 @test56(i32 %a) {
 
 ; PR10267 Don't make icmps more expensive when no other inst is subsumed.
 declare void @foo(i32)
-; CHECK: @test57
+; CHECK-LABEL: @test57(
 ; CHECK: %and = and i32 %a, -2
 ; CHECK: %cmp = icmp ne i32 %and, 0
 define i1 @test57(i32 %a) {
@@ -561,7 +572,7 @@ define i1 @test57(i32 %a) {
 }
 
 ; rdar://problem/10482509
-; CHECK: @cmpabs1
+; CHECK-LABEL: @cmpabs1(
 ; CHECK-NEXT: icmp ne
 define zeroext i1 @cmpabs1(i64 %val) {
   %sub = sub nsw i64 0, %val
@@ -571,7 +582,7 @@ define zeroext i1 @cmpabs1(i64 %val) {
   ret i1 %tobool
 }
 
-; CHECK: @cmpabs2
+; CHECK-LABEL: @cmpabs2(
 ; CHECK-NEXT: icmp ne
 define zeroext i1 @cmpabs2(i64 %val) {
   %sub = sub nsw i64 0, %val
@@ -581,7 +592,7 @@ define zeroext i1 @cmpabs2(i64 %val) {
   ret i1 %tobool
 }
 
-; CHECK: @test58
+; CHECK-LABEL: @test58(
 ; CHECK-NEXT: call i32 @test58_d(i64 36029346783166592)
 define void @test58() nounwind {
   %cast = bitcast <1 x i64> <i64 36029346783166592> to i64
@@ -599,7 +610,22 @@ define i1 @test59(i8* %foo) {
   %use = ptrtoint i8* %cast1 to i64
   %call = call i32 @test58_d(i64 %use) nounwind
   ret i1 %cmp
-; CHECK: @test59
+; CHECK-LABEL: @test59(
+; CHECK: ret i1 true
+}
+
+define i1 @test59_as1(i8 addrspace(1)* %foo) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 2
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 10
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  %use = ptrtoint i8 addrspace(1)* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59_as1
+; CHECK: %[[GEP:.+]] = getelementptr inbounds i8 addrspace(1)* %foo, i16 8
+; CHECK: ptrtoint i8 addrspace(1)* %[[GEP]] to i16
 ; CHECK: ret i1 true
 }
 
@@ -610,12 +636,27 @@ define i1 @test60(i8* %foo, i64 %i, i64 %j) {
   %cast1 = bitcast i32* %gep1 to i8*
   %cmp = icmp ult i8* %cast1, %gep2
   ret i1 %cmp
-; CHECK: @test60
+; CHECK-LABEL: @test60(
 ; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
 ; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i64 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60_as1
+; CHECK: trunc i64 %i to i16
+; CHECK: trunc i64 %j to i16
+; CHECK: %gep1.idx = shl nuw i16 %{{.+}}, 2
+; CHECK-NEXT: icmp sgt i16 %{{.+}}, %gep1.idx
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test61(i8* %foo, i64 %i, i64 %j) {
   %bit = bitcast i8* %foo to i32*
   %gep1 = getelementptr i32* %bit, i64 %i
@@ -624,26 +665,48 @@ define i1 @test61(i8* %foo, i64 %i, i64 %j) {
   %cmp = icmp ult i8* %cast1, %gep2
   ret i1 %cmp
 ; Don't transform non-inbounds GEPs.
-; CHECK: @test61
+; CHECK-LABEL: @test61(
 ; CHECK: icmp ult i8* %cast1, %gep2
 ; CHECK-NEXT: ret i1
 }
 
+define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr i8 addrspace(1)* %foo, i16 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61_as1
+; CHECK: icmp ult i8 addrspace(1)* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
 define i1 @test62(i8* %a) {
   %arrayidx1 = getelementptr inbounds i8* %a, i64 1
   %arrayidx2 = getelementptr inbounds i8* %a, i64 10
   %cmp = icmp slt i8* %arrayidx1, %arrayidx2
   ret i1 %cmp
-; CHECK: @test62
+; CHECK-LABEL: @test62(
 ; CHECK-NEXT: ret i1 true
 }
 
+define i1 @test62_as1(i8 addrspace(1)* %a) {
+; CHECK-LABEL: @test62_as1(
+; CHECK-NEXT: ret i1 true
+  %arrayidx1 = getelementptr inbounds i8 addrspace(1)* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8 addrspace(1)* %a, i64 10
+  %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
 define i1 @test63(i8 %a, i32 %b) nounwind {
   %z = zext i8 %a to i32
   %t = and i32 %b, 255
   %c = icmp eq i32 %z, %t
   ret i1 %c
-; CHECK: @test63
+; CHECK-LABEL: @test63(
 ; CHECK-NEXT: %1 = trunc i32 %b to i8
 ; CHECK-NEXT: %c = icmp eq i8 %1, %a
 ; CHECK-NEXT: ret i1 %c
@@ -654,7 +717,7 @@ define i1 @test64(i8 %a, i32 %b) nounwind {
   %z = zext i8 %a to i32
   %c = icmp eq i32 %t, %z
   ret i1 %c
-; CHECK: @test64
+; CHECK-LABEL: @test64(
 ; CHECK-NEXT: %1 = trunc i32 %b to i8
 ; CHECK-NEXT: %c = icmp eq i8 %1, %a
 ; CHECK-NEXT: ret i1 %c
@@ -664,7 +727,7 @@ define i1 @test65(i64 %A, i64 %B) {
   %s1 = add i64 %A, %B
   %s2 = add i64 %A, %B
   %cmp = icmp eq i64 %s1, %s2
-; CHECK: @test65
+; CHECK-LABEL: @test65(
 ; CHECK-NEXT: ret i1 true
   ret i1 %cmp
 }
@@ -673,12 +736,12 @@ define i1 @test66(i64 %A, i64 %B) {
   %s1 = add i64 %A, %B
   %s2 = add i64 %B, %A
   %cmp = icmp eq i64 %s1, %s2
-; CHECK: @test66
+; CHECK-LABEL: @test66(
 ; CHECK-NEXT: ret i1 true
   ret i1 %cmp
 }
 
-; CHECK: @test67
+; CHECK-LABEL: @test67(
 ; CHECK: %and = and i32 %x, 96
 ; CHECK: %cmp = icmp ne i32 %and, 0
 define i1 @test67(i32 %x) nounwind uwtable {
@@ -687,7 +750,7 @@ define i1 @test67(i32 %x) nounwind uwtable {
   ret i1 %cmp
 }
 
-; CHECK: @test68
+; CHECK-LABEL: @test68(
 ; CHECK: %cmp = icmp ugt i32 %and, 30
 define i1 @test68(i32 %x) nounwind uwtable {
   %and = and i32 %x, 127
@@ -696,7 +759,7 @@ define i1 @test68(i32 %x) nounwind uwtable {
 }
 
 ; PR14708
-; CHECK: @test69
+; CHECK-LABEL: @test69(
 ; CHECK: %1 = and i32 %c, -33
 ; CHECK: %2 = icmp eq i32 %1, 65
 ; CHECK: ret i1 %2
@@ -707,7 +770,19 @@ define i1 @test69(i32 %c) nounwind uwtable {
   ret i1 %3
 }
 
-; CHECK: @icmp_sext16trunc
+; PR15940
+; CHECK-LABEL: @test70(
+; CHECK-NEXT: %A = srem i32 5, %X
+; CHECK-NEXT: %C = icmp ne i32 %A, 2
+; CHECK-NEXT: ret i1 %C
+define i1 @test70(i32 %X) {
+  %A = srem i32 5, %X
+  %B = add i32 %A, 2
+  %C = icmp ne i32 %B, 4
+  ret i1 %C
+}
+
+; CHECK-LABEL: @icmp_sext16trunc(
 ; CHECK-NEXT: %1 = trunc i32 %x to i16
 ; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
 define i1 @icmp_sext16trunc(i32 %x) {
@@ -717,7 +792,7 @@ define i1 @icmp_sext16trunc(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_sext8trunc
+; CHECK-LABEL: @icmp_sext8trunc(
 ; CHECK-NEXT: %1 = trunc i32 %x to i8
 ; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
 define i1 @icmp_sext8trunc(i32 %x) {
@@ -727,7 +802,7 @@ define i1 @icmp_sext8trunc(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_shl16
+; CHECK-LABEL: @icmp_shl16(
 ; CHECK-NEXT: %1 = trunc i32 %x to i16
 ; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
 define i1 @icmp_shl16(i32 %x) {
@@ -736,7 +811,7 @@ define i1 @icmp_shl16(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_shl24
+; CHECK-LABEL: @icmp_shl24(
 ; CHECK-NEXT: %1 = trunc i32 %x to i8
 ; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
 define i1 @icmp_shl24(i32 %x) {
@@ -747,7 +822,7 @@ define i1 @icmp_shl24(i32 %x) {
 
 ; If the (shl x, C) preserved the sign and this is a sign test,
 ; compare the LHS operand instead
-; CHECK: @icmp_shl_nsw_sgt
+; CHECK-LABEL: @icmp_shl_nsw_sgt(
 ; CHECK-NEXT: icmp sgt i32 %x, 0
 define i1 @icmp_shl_nsw_sgt(i32 %x) {
   %shl = shl nsw i32 %x, 21
@@ -755,7 +830,7 @@ define i1 @icmp_shl_nsw_sgt(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_shl_nsw_sge0
+; CHECK-LABEL: @icmp_shl_nsw_sge0(
 ; CHECK-NEXT: icmp sgt i32 %x, -1
 define i1 @icmp_shl_nsw_sge0(i32 %x) {
   %shl = shl nsw i32 %x, 21
@@ -763,7 +838,7 @@ define i1 @icmp_shl_nsw_sge0(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_shl_nsw_sge1
+; CHECK-LABEL: @icmp_shl_nsw_sge1(
 ; CHECK-NEXT: icmp sgt i32 %x, 0
 define i1 @icmp_shl_nsw_sge1(i32 %x) {
   %shl = shl nsw i32 %x, 21
@@ -772,7 +847,7 @@ define i1 @icmp_shl_nsw_sge1(i32 %x) {
 }
 
 ; Checks for icmp (eq|ne) (shl x, C), 0
-; CHECK: @icmp_shl_nsw_eq
+; CHECK-LABEL: @icmp_shl_nsw_eq(
 ; CHECK-NEXT: icmp eq i32 %x, 0
 define i1 @icmp_shl_nsw_eq(i32 %x) {
   %mul = shl nsw i32 %x, 5
@@ -780,7 +855,7 @@ define i1 @icmp_shl_nsw_eq(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_shl_eq
+; CHECK-LABEL: @icmp_shl_eq(
 ; CHECK-NOT: icmp eq i32 %mul, 0
 define i1 @icmp_shl_eq(i32 %x) {
   %mul = shl i32 %x, 5
@@ -788,7 +863,7 @@ define i1 @icmp_shl_eq(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_shl_nsw_ne
+; CHECK-LABEL: @icmp_shl_nsw_ne(
 ; CHECK-NEXT: icmp ne i32 %x, 0
 define i1 @icmp_shl_nsw_ne(i32 %x) {
   %mul = shl nsw i32 %x, 7
@@ -796,7 +871,7 @@ define i1 @icmp_shl_nsw_ne(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_shl_ne
+; CHECK-LABEL: @icmp_shl_ne(
 ; CHECK-NOT: icmp ne i32 %x, 0
 define i1 @icmp_shl_ne(i32 %x) {
   %mul = shl i32 %x, 7
@@ -806,7 +881,7 @@ define i1 @icmp_shl_ne(i32 %x) {
 
 ; If the (mul x, C) preserved the sign and this is sign test,
 ; compare the LHS operand instead
-; CHECK: @icmp_mul_nsw
+; CHECK-LABEL: @icmp_mul_nsw(
 ; CHECK-NEXT: icmp sgt i32 %x, 0
 define i1 @icmp_mul_nsw(i32 %x) {
   %mul = mul nsw i32 %x, 12
@@ -814,7 +889,7 @@ define i1 @icmp_mul_nsw(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul_nsw1
+; CHECK-LABEL: @icmp_mul_nsw1(
 ; CHECK-NEXT: icmp slt i32 %x, 0
 define i1 @icmp_mul_nsw1(i32 %x) {
   %mul = mul nsw i32 %x, 12
@@ -822,7 +897,7 @@ define i1 @icmp_mul_nsw1(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul_nsw_neg
+; CHECK-LABEL: @icmp_mul_nsw_neg(
 ; CHECK-NEXT: icmp slt i32 %x, 1
 define i1 @icmp_mul_nsw_neg(i32 %x) {
   %mul = mul nsw i32 %x, -12
@@ -830,7 +905,7 @@ define i1 @icmp_mul_nsw_neg(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul_nsw_neg1
+; CHECK-LABEL: @icmp_mul_nsw_neg1(
 ; CHECK-NEXT: icmp slt i32 %x, 0
 define i1 @icmp_mul_nsw_neg1(i32 %x) {
   %mul = mul nsw i32 %x, -12
@@ -838,7 +913,7 @@ define i1 @icmp_mul_nsw_neg1(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul_nsw_0
+; CHECK-LABEL: @icmp_mul_nsw_0(
 ; CHECK-NOT: icmp sgt i32 %x, 0
 define i1 @icmp_mul_nsw_0(i32 %x) {
   %mul = mul nsw i32 %x, 0
@@ -846,7 +921,7 @@ define i1 @icmp_mul_nsw_0(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul
+; CHECK-LABEL: @icmp_mul(
 ; CHECK-NEXT: %mul = mul i32 %x, -12
 define i1 @icmp_mul(i32 %x) {
   %mul = mul i32 %x, -12
@@ -855,7 +930,7 @@ define i1 @icmp_mul(i32 %x) {
 }
 
 ; Checks for icmp (eq|ne) (mul x, C), 0
-; CHECK: @icmp_mul_neq0
+; CHECK-LABEL: @icmp_mul_neq0(
 ; CHECK-NEXT: icmp ne i32 %x, 0
 define i1 @icmp_mul_neq0(i32 %x) {
   %mul = mul nsw i32 %x, -12
@@ -863,7 +938,7 @@ define i1 @icmp_mul_neq0(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul_eq0
+; CHECK-LABEL: @icmp_mul_eq0(
 ; CHECK-NEXT: icmp eq i32 %x, 0
 define i1 @icmp_mul_eq0(i32 %x) {
   %mul = mul nsw i32 %x, 12
@@ -871,7 +946,7 @@ define i1 @icmp_mul_eq0(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul0_eq0
+; CHECK-LABEL: @icmp_mul0_eq0(
 ; CHECK-NEXT: ret i1 true
 define i1 @icmp_mul0_eq0(i32 %x) {
   %mul = mul i32 %x, 0
@@ -879,7 +954,7 @@ define i1 @icmp_mul0_eq0(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_mul0_ne0
+; CHECK-LABEL: @icmp_mul0_ne0(
 ; CHECK-NEXT: ret i1 false
 define i1 @icmp_mul0_ne0(i32 %x) {
   %mul = mul i32 %x, 0
@@ -887,7 +962,7 @@ define i1 @icmp_mul0_ne0(i32 %x) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_sub1_sge
+; CHECK-LABEL: @icmp_sub1_sge(
 ; CHECK-NEXT: icmp sgt i32 %x, %y
 define i1 @icmp_sub1_sge(i32 %x, i32 %y) {
   %sub = add nsw i32 %x, -1
@@ -895,7 +970,7 @@ define i1 @icmp_sub1_sge(i32 %x, i32 %y) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_add1_sgt
+; CHECK-LABEL: @icmp_add1_sgt(
 ; CHECK-NEXT: icmp sge i32 %x, %y
 define i1 @icmp_add1_sgt(i32 %x, i32 %y) {
   %add = add nsw i32 %x, 1
@@ -903,7 +978,7 @@ define i1 @icmp_add1_sgt(i32 %x, i32 %y) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_sub1_slt
+; CHECK-LABEL: @icmp_sub1_slt(
 ; CHECK-NEXT: icmp sle i32 %x, %y
 define i1 @icmp_sub1_slt(i32 %x, i32 %y) {
   %sub = add nsw i32 %x, -1
@@ -911,7 +986,7 @@ define i1 @icmp_sub1_slt(i32 %x, i32 %y) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_add1_sle
+; CHECK-LABEL: @icmp_add1_sle(
 ; CHECK-NEXT: icmp slt i32 %x, %y
 define i1 @icmp_add1_sle(i32 %x, i32 %y) {
   %add = add nsw i32 %x, 1
@@ -919,7 +994,7 @@ define i1 @icmp_add1_sle(i32 %x, i32 %y) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_add20_sge_add57
+; CHECK-LABEL: @icmp_add20_sge_add57(
 ; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add nsw i32 %y, 37
 ; CHECK-NEXT: icmp sle i32 [[ADD]], %x
 define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
@@ -929,7 +1004,7 @@ define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_sub57_sge_sub20
+; CHECK-LABEL: @icmp_sub57_sge_sub20(
 ; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = add nsw i32 %x, -37
 ; CHECK-NEXT: icmp sge i32 [[SUB]], %y
 define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) {
@@ -939,7 +1014,7 @@ define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_and_shl_neg_ne_0
+; CHECK-LABEL: @icmp_and_shl_neg_ne_0(
 ; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 0
@@ -952,7 +1027,7 @@ define i1 @icmp_and_shl_neg_ne_0(i32 %A, i32 %B) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_and_shl_neg_eq_0
+; CHECK-LABEL: @icmp_and_shl_neg_eq_0(
 ; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
@@ -965,7 +1040,7 @@ define i1 @icmp_and_shl_neg_eq_0(i32 %A, i32 %B) {
   ret i1 %cmp
 }
 
-; CHECK: @icmp_add_and_shr_ne_0
+; CHECK-LABEL: @icmp_add_and_shr_ne_0(
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, 240
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 224
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -976,3 +1051,308 @@ define i1 @icmp_add_and_shr_ne_0(i32 %X) {
   %tobool = icmp ne i32 %add, 0
   ret i1 %tobool
 }
+
+; PR16244
+; CHECK-LABEL: define i1 @test71(
+; CHECK-NEXT: ret i1 false
+define i1 @test71(i8* %x) {
+  %a = getelementptr i8* %x, i64 8
+  %b = getelementptr inbounds i8* %x, i64 8
+  %c = icmp ugt i8* %a, %b
+  ret i1 %c
+}
+
+define i1 @test71_as1(i8 addrspace(1)* %x) {
+; CHECK-LABEL: @test71_as1(
+; CHECK-NEXT: ret i1 false
+  %a = getelementptr i8 addrspace(1)* %x, i64 8
+  %b = getelementptr inbounds i8 addrspace(1)* %x, i64 8
+  %c = icmp ugt i8 addrspace(1)* %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ult_32(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_ult_32(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ult i32 %shl, 32
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_eq_32(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_eq_32(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp eq i32 %shl, 32
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_eq_31(
+; CHECK-NEXT: ret i1 false
+define i1 @icmp_shl_1_V_eq_31(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp eq i32 %shl, 31
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ne_31(
+; CHECK-NEXT: ret i1 true
+define i1 @icmp_shl_1_V_ne_31(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ne i32 %shl, 31
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ult_30(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_ult_30(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ult i32 %shl, 30
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ugt_30(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %V, 4
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_ugt_30(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ugt i32 %shl, 30
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ule_30(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_ule_30(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ule i32 %shl, 30
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_uge_30(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %V, 4
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_uge_30(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp uge i32 %shl, 30
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 %V, 31
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp uge i32 %shl, 2147483648
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ugt_2147483648(
+; CHECK-NEXT: ret i1 false
+define i1 @icmp_shl_1_V_ugt_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ugt i32 %shl, 2147483648
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ule_2147483648(
+; CHECK-NEXT: ret i1 true
+define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ule i32 %shl, 2147483648
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %V, 31
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_shl_1_V_ult_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ult i32 %shl, 2147483648
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B(
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = add i64 %b, -1
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp uge i64 [[SUB]], %a
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @or_icmp_eq_B_0_icmp_ult_A_B(i64 %a, i64 %b) {
+  %1 = icmp eq i64 %b, 0
+  %2 = icmp ult i64 %a, %b
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+
+; CHECK-LABEL: @icmp_add_ult_2(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, -2
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 14
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_add_ult_2(i32 %X) {
+  %add = add i32 %X, -14
+  %cmp = icmp ult i32 %add, 2
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add_X_-14_ult_2
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, -2
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 14
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_add_X_-14_ult_2(i32 %X) {
+  %add = add i32 %X, -14
+  %cmp = icmp ult i32 %add, 2
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sub_3_X_ult_2(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %X, 1
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[OR]], 3
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_sub_3_X_ult_2(i32 %X) {
+  %add = sub i32 3, %X
+  %cmp = icmp ult i32 %add, 2
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add_X_-14_uge_2
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, -2
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 14
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_add_X_-14_uge_2(i32 %X) {
+  %add = add i32 %X, -14
+  %cmp = icmp uge i32 %add, 2
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sub_3_X_uge_2(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %X, 1
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[OR]], 3
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_sub_3_X_uge_2(i32 %X) {
+  %add = sub i32 3, %X
+  %cmp = icmp uge i32 %add, 2
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_and_X_-16_eq-16
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %X, -17
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_X_-16_eq-16(i32 %X) {
+  %and = and i32 %X, -16
+  %cmp = icmp eq i32 %and, -16
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_and_X_-16_ne-16
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, -16
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_X_-16_ne-16(i32 %X) {
+  %and = and i32 %X, -16
+  %cmp = icmp ne i32 %and, -16
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sub_-1_X_ult_4
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %X, -5
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_sub_-1_X_ult_4(i32 %X) {
+  %sub = sub i32 -1, %X
+  %cmp = icmp ult i32 %sub, 4
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sub_-1_X_uge_4
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, -4
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_sub_-1_X_uge_4(i32 %X) {
+  %sub = sub i32 -1, %X
+  %cmp = icmp uge i32 %sub, 4
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %sub = sub i32 %X, %Y
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %shift = lshr i32 %sub, 4
+  %resfalse = trunc i32 %shift to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_swap_operands_for_cse2
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %sub1 = sub i32 %X, %Y
+  %add = add i32 %sub, %sub1
+  %restrue = trunc i32 %add to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %Y, %X
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: ret i1
+define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) {
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+; CHECK-LABEL: @icmp_lshr_lshr_eq
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ult i32 %z.unshifted, 1073741824
+define i1 @icmp_lshr_lshr_eq(i32 %a, i32 %b) nounwind {
+ %x = lshr i32 %a, 30
+ %y = lshr i32 %b, 30
+ %z = icmp eq i32 %x, %y
+ ret i1 %z
+}
+
+; CHECK-LABEL: @icmp_ashr_ashr_ne
+; CHECK: %z.unshifted = xor i32 %a, %b
+; CHECK: %z = icmp ugt i32 %z.unshifted, 255
+define i1 @icmp_ashr_ashr_ne(i32 %a, i32 %b) nounwind {
+ %x = ashr i32 %a, 8
+ %y = ashr i32 %b, 8
+ %z = icmp ne i32 %x, %y
+ ret i1 %z
+}
diff --git a/test/Transforms/InstCombine/idioms.ll b/test/Transforms/InstCombine/idioms.ll
index 1a211668c3bf..58485442230c 100644
--- a/test/Transforms/InstCombine/idioms.ll
+++ b/test/Transforms/InstCombine/idioms.ll
@@ -25,7 +25,7 @@ bb3:
 bb4:
         %f = phi i32 [ %not2, %bb2 ], [ %e, %bb3 ]
 	ret i32 %f
-; CHECK: @test_asr
+; CHECK-LABEL: @test_asr(
 ; CHECK: bb4:
 ; CHECK: %f = ashr i32 %a, %b
 ; CHECK: ret i32 %f
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index f334b3b1e935..91c44704ce78 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -14,7 +14,7 @@ define i8 @uaddtest1(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
   %y = extractvalue %overflow.result %x, 0
   ret i8 %y
-; CHECK: @uaddtest1
+; CHECK-LABEL: @uaddtest1(
 ; CHECK-NEXT: %y = add i8 %A, %B
 ; CHECK-NEXT: ret i8 %y
 }
@@ -27,7 +27,7 @@ define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) {
   %z = extractvalue %overflow.result %x, 1
   store i1 %z, i1* %overflowPtr
   ret i8 %y
-; CHECK: @uaddtest2
+; CHECK-LABEL: @uaddtest2(
 ; CHECK-NEXT: %and.A = and i8 %A, 127
 ; CHECK-NEXT: %and.B = and i8 %B, 127
 ; CHECK-NEXT: %x = add nuw i8 %and.A, %and.B
@@ -43,7 +43,7 @@ define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) {
   %z = extractvalue %overflow.result %x, 1
   store i1 %z, i1* %overflowPtr
   ret i8 %y
-; CHECK: @uaddtest3
+; CHECK-LABEL: @uaddtest3(
 ; CHECK-NEXT: %or.A = or i8 %A, -128
 ; CHECK-NEXT: %or.B = or i8 %B, -128
 ; CHECK-NEXT: %x = add i8 %or.A, %or.B
@@ -57,7 +57,7 @@ define i8 @uaddtest4(i8 %A, i1* %overflowPtr) {
   %z = extractvalue %overflow.result %x, 1
   store i1 %z, i1* %overflowPtr
   ret i8 %y
-; CHECK: @uaddtest4
+; CHECK-LABEL: @uaddtest4(
 ; CHECK-NEXT: ret i8 undef
 }
 
@@ -67,7 +67,7 @@ define i8 @uaddtest5(i8 %A, i1* %overflowPtr) {
   %z = extractvalue %overflow.result %x, 1
   store i1 %z, i1* %overflowPtr
   ret i8 %y
-; CHECK: @uaddtest5
+; CHECK-LABEL: @uaddtest5(
 ; CHECK: ret i8 %A
 }
 
@@ -75,7 +75,7 @@ define i1 @uaddtest6(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 -4)
   %z = extractvalue %overflow.result %x, 1
   ret i1 %z
-; CHECK: @uaddtest6
+; CHECK-LABEL: @uaddtest6(
 ; CHECK-NEXT: %z = icmp ugt i8 %A, 3
 ; CHECK-NEXT: ret i1 %z
 }
@@ -84,7 +84,7 @@ define i8 @uaddtest7(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
   %z = extractvalue %overflow.result %x, 0
   ret i8 %z
-; CHECK: @uaddtest7
+; CHECK-LABEL: @uaddtest7(
 ; CHECK-NEXT: %z = add i8 %A, %B
 ; CHECK-NEXT: ret i8 %z
 }
@@ -96,7 +96,7 @@ define i8 @umultest1(i8 %A, i1* %overflowPtr) {
   %z = extractvalue %overflow.result %x, 1
   store i1 %z, i1* %overflowPtr
   ret i8 %y
-; CHECK: @umultest1
+; CHECK-LABEL: @umultest1(
 ; CHECK-NEXT: store i1 false, i1* %overflowPtr
 ; CHECK-NEXT: ret i8 0
 }
@@ -107,7 +107,7 @@ define i8 @umultest2(i8 %A, i1* %overflowPtr) {
   %z = extractvalue %overflow.result %x, 1
   store i1 %z, i1* %overflowPtr
   ret i8 %y
-; CHECK: @umultest2
+; CHECK-LABEL: @umultest2(
 ; CHECK-NEXT: store i1 false, i1* %overflowPtr
 ; CHECK-NEXT: ret i8 %A
 }
@@ -122,7 +122,7 @@ define i32 @umultest3(i32 %n) nounwind {
   %res = extractvalue %ov.result.32 %mul, 0
   %ret = select i1 %ov, i32 -1, i32 %res
   ret i32 %ret
-; CHECK: @umultest3
+; CHECK-LABEL: @umultest3(
 ; CHECK-NEXT: shr
 ; CHECK-NEXT: mul nuw
 ; CHECK-NEXT: ret
@@ -135,7 +135,7 @@ define i32 @umultest4(i32 %n) nounwind {
   %res = extractvalue %ov.result.32 %mul, 0
   %ret = select i1 %ov, i32 -1, i32 %res
   ret i32 %ret
-; CHECK: @umultest4
+; CHECK-LABEL: @umultest4(
 ; CHECK: umul.with.overflow
 }
 
@@ -150,7 +150,7 @@ entry:
   %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind
   store volatile double %C, double* %P
   ret void
-; CHECK: @powi
+; CHECK-LABEL: @powi(
 ; CHECK: %A = fdiv double 1.0{{.*}}, %V
 ; CHECK: store volatile double %A, 
 ; CHECK: store volatile double 1.0 
@@ -163,7 +163,7 @@ entry:
   %and = and i32 %or, -8
   %count = tail call i32 @llvm.cttz.i32(i32 %and, i1 true) nounwind readnone
   ret i32 %count
-; CHECK: @cttz
+; CHECK-LABEL: @cttz(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: ret i32 3
 }
@@ -174,7 +174,7 @@ entry:
   %and = and i8 %or, 63
   %count = tail call i8 @llvm.ctlz.i8(i8 %and, i1 true) nounwind readnone
   ret i8 %count
-; CHECK: @ctlz
+; CHECK-LABEL: @ctlz(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: ret i8 2
 }
@@ -206,7 +206,7 @@ define i32 @cttz_simplify1a(i32 %x) nounwind readnone ssp {
   %shr3 = lshr i32 %tmp1, 5
   ret i32 %shr3
 
-; CHECK: @cttz_simplify1a
+; CHECK-LABEL: @cttz_simplify1a(
 ; CHECK: icmp eq i32 %x, 0
 ; CHECK-NEXT: zext i1
 ; CHECK-NEXT: ret i32
@@ -217,7 +217,7 @@ define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
   %shr3 = lshr i32 %tmp1, 5
   ret i32 %shr3
 
-; CHECK: @cttz_simplify1b
+; CHECK-LABEL: @cttz_simplify1b(
 ; CHECK-NEXT: ret i32 0
 }
 
@@ -225,7 +225,7 @@ define i32 @ctlz_undef(i32 %Value) nounwind {
   %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
   ret i32 %ctlz
 
-; CHECK: @ctlz_undef
+; CHECK-LABEL: @ctlz_undef(
 ; CHECK-NEXT: ret i32 undef
 }
 
@@ -233,7 +233,7 @@ define i32 @cttz_undef(i32 %Value) nounwind {
   %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
   ret i32 %cttz
 
-; CHECK: @cttz_undef
+; CHECK-LABEL: @cttz_undef(
 ; CHECK-NEXT: ret i32 undef
 }
 
@@ -243,7 +243,7 @@ define i32 @ctlz_select(i32 %Value) nounwind {
   %s = select i1 %tobool, i32 %ctlz, i32 32
   ret i32 %s
 
-; CHECK: @ctlz_select
+; CHECK-LABEL: @ctlz_select(
 ; CHECK: select i1 %tobool, i32 %ctlz, i32 32
 }
 
@@ -253,6 +253,6 @@ define i32 @cttz_select(i32 %Value) nounwind {
   %s = select i1 %tobool, i32 %cttz, i32 32
   ret i32 %s
 
-; CHECK: @cttz_select
+; CHECK-LABEL: @cttz_select(
 ; CHECK: select i1 %tobool, i32 %cttz, i32 32
 }
diff --git a/test/Transforms/InstCombine/invoke.ll b/test/Transforms/InstCombine/invoke.ll
index 04eaf86a287b..c4b58de61946 100644
--- a/test/Transforms/InstCombine/invoke.ll
+++ b/test/Transforms/InstCombine/invoke.ll
@@ -7,7 +7,7 @@ declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
 declare i8* @_Znwm(i64)
 
 
-; CHECK: @f1
+; CHECK-LABEL: @f1(
 define i64 @f1() nounwind uwtable ssp {
 entry:
 ; CHECK: nvoke noalias i8* undef()
@@ -27,7 +27,7 @@ lpad:
   unreachable
 }
 
-; CHECK: @f2
+; CHECK-LABEL: @f2(
 define i64 @f2() nounwind uwtable ssp {
 entry:
 ; CHECK: nvoke noalias i8* null()
@@ -47,7 +47,7 @@ lpad:
   unreachable
 }
 
-; CHECK: @f3
+; CHECK-LABEL: @f3(
 define void @f3() nounwind uwtable ssp {
 ; CHECK: invoke void @llvm.donothing()
   %call = invoke noalias i8* @_Znwm(i64 13)
diff --git a/test/Transforms/InstCombine/isascii-1.ll b/test/Transforms/InstCombine/isascii-1.ll
index 2a413d89b492..88f5ad66d2ef 100644
--- a/test/Transforms/InstCombine/isascii-1.ll
+++ b/test/Transforms/InstCombine/isascii-1.ll
@@ -9,21 +9,21 @@ declare i32 @isascii(i32)
 ; Check isascii(c) -> c <u 128.
 
 define i32 @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i32 @isascii(i32 127)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 1
 }
 
 define i32 @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %ret = call i32 @isascii(i32 128)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test_simplify3(i32 %x) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %ret = call i32 @isascii(i32 %x)
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 128
 ; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
diff --git a/test/Transforms/InstCombine/isdigit-1.ll b/test/Transforms/InstCombine/isdigit-1.ll
index f291296c8826..6791307aeaee 100644
--- a/test/Transforms/InstCombine/isdigit-1.ll
+++ b/test/Transforms/InstCombine/isdigit-1.ll
@@ -9,35 +9,35 @@ declare i32 @isdigit(i32)
 ; Check isdigit(c) -> (c - '0') <u 10;
 
 define i32 @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i32 @isdigit(i32 47)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %ret = call i32 @isdigit(i32 48)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 1
 }
 
 define i32 @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %ret = call i32 @isdigit(i32 57)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 1
 }
 
 define i32 @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %ret = call i32 @isdigit(i32 58)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test_simplify5(i32 %x) {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
 
   %ret = call i32 @isdigit(i32 %x)
 ; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add i32 %x, -48
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/InstCombine/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 869215cb58d4..98100263b48c 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -1,18 +1,75 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck -check-prefix=NODL %s
+; RUN: opt -instcombine -S -default-data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck -check-prefix=P32 %s
 
-@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
+@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
+@G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
+                                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
 @GD = internal constant [6 x double]
    [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
 
+%Foo = type { i32, i32, i32, i32 }
+
+@GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 }
+
+@GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 },
+                                             %Foo { i32 5, i32 4, i32 6, i32 11 },
+                                             %Foo { i32 6, i32 5, i32 9, i32 20 },
+                                             %Foo { i32 12, i32 3, i32 9, i32 8 } ]
+
+
 define i1 @test1(i32 %X) {
   %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 0
   ret i1 %R
-; CHECK: @test1
-; CHECK-NEXT: %R = icmp eq i32 %X, 9
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test1(
+; NODL-NEXT: %R = icmp eq i32 %X, 9
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test1(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds(i32 %X) {
+  %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+
+; P32-LABEL: @test1_noinbounds(
+; P32-NEXT: %R = icmp eq i32 %X, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_i64(i64 %X) {
+  %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; NODL-LABEL: @test1_noinbounds_i64(
+; NODL-NEXT: %P = getelementptr [10 x i16]* @G16, i64 0, i64 %X
+
+; P32-LABEL: @test1_noinbounds_i64(
+; P32: %R = icmp eq i32 %1, 9
+; P32-NEXT: ret i1 %R
+}
+
+define i1 @test1_noinbounds_as1(i32 %x) {
+  %p = getelementptr [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
+  %q = load i16 addrspace(1)* %p
+  %r = icmp eq i16 %q, 0
+  ret i1 %r
+
+; P32-LABEL: @test1_noinbounds_as1(
+; P32-NEXT: trunc i32 %x to i16
+; P32-NEXT: %r = icmp eq i16 %1, 9
+; P32-NEXT: ret i1 %r
 }
 
 define i1 @test2(i32 %X) {
@@ -20,9 +77,9 @@ define i1 @test2(i32 %X) {
   %Q = load i16* %P
   %R = icmp slt i16 %Q, 85
   ret i1 %R
-; CHECK: @test2
-; CHECK-NEXT: %R = icmp ne i32 %X, 4
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test2(
+; NODL-NEXT: %R = icmp ne i32 %X, 4
+; NODL-NEXT: ret i1 %R
 }
 
 define i1 @test3(i32 %X) {
@@ -30,9 +87,14 @@ define i1 @test3(i32 %X) {
   %Q = load double* %P
   %R = fcmp oeq double %Q, 1.0
   ret i1 %R
-; CHECK: @test3
-; CHECK-NEXT: %R = icmp eq i32 %X, 1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test3(
+; NODL-NEXT: %R = icmp eq i32 %X, 1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test3(
+; P32-NEXT: %R = icmp eq i32 %X, 1
+; P32-NEXT: ret i1 %R
+
 }
 
 define i1 @test4(i32 %X) {
@@ -40,11 +102,17 @@ define i1 @test4(i32 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK: @test4
-; CHECK-NEXT: lshr i32 933, %X
-; CHECK-NEXT: and i32 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test4(
+; NODL-NEXT: lshr i32 933, %X
+; NODL-NEXT: and i32 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i32 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4(
+; P32-NEXT: lshr i32 933, %X
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test4_i16(i16 %X) {
@@ -52,11 +120,19 @@ define i1 @test4_i16(i16 %X) {
   %Q = load i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
-; CHECK: @test4_i16
-; CHECK-NEXT: lshr i16 933, %X
-; CHECK-NEXT: and i16 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
+
+; NODL-LABEL: @test4_i16(
+; NODL-NEXT: lshr i16 933, %X
+; NODL-NEXT: and i16 {{.*}}, 1
+; NODL-NEXT: %R = icmp ne i16 {{.*}}, 0
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test4_i16(
+; P32-NEXT: sext i16 %X to i32
+; P32-NEXT: lshr i32 933, %1
+; P32-NEXT: and i32 {{.*}}, 1
+; P32-NEXT: %R = icmp ne i32 {{.*}}, 0
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test5(i32 %X) {
@@ -64,11 +140,17 @@ define i1 @test5(i32 %X) {
   %Q = load i16* %P
   %R = icmp eq i16 %Q, 69
   ret i1 %R
-; CHECK: @test5
-; CHECK-NEXT: icmp eq i32 %X, 2
-; CHECK-NEXT: icmp eq i32 %X, 7
-; CHECK-NEXT: %R = or i1
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test5(
+; NODL-NEXT: icmp eq i32 %X, 2
+; NODL-NEXT: icmp eq i32 %X, 7
+; NODL-NEXT: %R = or i1
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test5(
+; P32-NEXT: icmp eq i32 %X, 2
+; P32-NEXT: icmp eq i32 %X, 7
+; P32-NEXT: %R = or i1
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test6(i32 %X) {
@@ -76,10 +158,15 @@ define i1 @test6(i32 %X) {
   %Q = load double* %P
   %R = fcmp ogt double %Q, 0.0
   ret i1 %R
-; CHECK: @test6
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test6(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ult i32 {{.*}}, 3
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test6(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ult i32 {{.*}}, 3
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test7(i32 %X) {
@@ -87,10 +174,15 @@ define i1 @test7(i32 %X) {
   %Q = load double* %P
   %R = fcmp olt double %Q, 0.0
   ret i1 %R
-; CHECK: @test7
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
-; CHECK-NEXT: ret i1 %R
+; NODL-LABEL: @test7(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; NODL-NEXT: ret i1 %R
+
+; P32-LABEL: @test7(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; P32-NEXT: ret i1 %R
 }
 
 define i1 @test8(i32 %X) {
@@ -99,10 +191,15 @@ define i1 @test8(i32 %X) {
   %R = and i16 %Q, 3
   %S = icmp eq i16 %R, 0
   ret i1 %S
-; CHECK: @test8
-; CHECK-NEXT: and i32 %X, -2
-; CHECK-NEXT: icmp eq i32 {{.*}}, 8
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test8(
+; NODL-NEXT: and i32 %X, -2
+; NODL-NEXT: icmp eq i32 {{.*}}, 8
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test8(
+; P32-NEXT: and i32 %X, -2
+; P32-NEXT: icmp eq i32 {{.*}}, 8
+; P32-NEXT: ret i1
 }
 
 @GA = internal constant [4 x { i32, i32 } ] [
@@ -117,8 +214,161 @@ define i1 @test9(i32 %X) {
   %Q = load i32* %P
   %R = icmp eq i32 %Q, 1
   ret i1 %R
-; CHECK: @test9
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: icmp ult i32 {{.*}}, 2
-; CHECK-NEXT: ret i1
+; NODL-LABEL: @test9(
+; NODL-NEXT: add i32 %X, -1
+; NODL-NEXT: icmp ult i32 {{.*}}, 2
+; NODL-NEXT: ret i1
+
+; P32-LABEL: @test9(
+; P32-NEXT: add i32 %X, -1
+; P32-NEXT: icmp ult i32 {{.*}}, 2
+; P32-NEXT: ret i1
+}
+
+define i1 @test10_struct(i32 %x) {
+; NODL-LABEL: @test10_struct(
+; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct(
+; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_noinbounds(
+; NODL: getelementptr %Foo* @GS, i32 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds(
+; P32: getelementptr %Foo* @GS, i32 %x, i32 0
+  %p = getelementptr %Foo* @GS, i32 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index < ptr size
+define i1 @test10_struct_i16(i16 %x){
+; NODL-LABEL: @test10_struct_i16(
+; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index > ptr size
+define i1 @test10_struct_i64(i64 %x){
+; NODL-LABEL: @test10_struct_i64(
+; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+
+; P32-LABEL: @test10_struct_i64(
+; P32: %1 = trunc i64 %x to i32
+; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_noinbounds_i16(
+; NODL: getelementptr %Foo* @GS, i16 %x, i32 0
+
+; P32-LABEL: @test10_struct_noinbounds_i16(
+; P32: %1 = sext i16 %x to i32
+; P32: getelementptr %Foo* @GS, i32 %1, i32 0
+  %p = getelementptr %Foo* @GS, i16 %x, i32 0
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr(i32 %x) {
+; NODL-LABEL: @test10_struct_arr(
+; NODL-NEXT: %r = icmp ne i32 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr(
+; P32-NEXT: %r = icmp ne i32 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds(i32 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds(
+; NODL-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds(
+; P32-NEXT  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_i16(
+; NODL-NEXT: %r = icmp ne i16 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i16 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i64(i64 %x) {
+; NODL-LABEL: @test10_struct_arr_i64(
+; NODL-NEXT: %r = icmp ne i64 %x, 1
+; NODL-NEXT: ret i1 %r
+
+; P32-LABEL: @test10_struct_arr_i64(
+; P32-NEXT: trunc i64 %x to i32
+; P32-NEXT: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr inbounds [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
+; NODL-LABEL: @test10_struct_arr_noinbounds_i16(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i16(
+; P32-NEXT: %r = icmp ne i16 %x, 1
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
+; FIXME: Should be no trunc?
+; NODL-LABEL: @test10_struct_arr_noinbounds_i64(
+; NODL-NEXT:  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+
+; P32-LABEL: @test10_struct_arr_noinbounds_i64(
+; P32: %r = icmp ne i32 %1, 1
+; P32-NEXT: ret i1 %r
+  %p = getelementptr [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+  %q = load i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
 }
diff --git a/test/Transforms/InstCombine/load-select.ll b/test/Transforms/InstCombine/load-select.ll
index f3d83dc8210e..e8cbad335dea 100644
--- a/test/Transforms/InstCombine/load-select.ll
+++ b/test/Transforms/InstCombine/load-select.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 @a = constant [2 x i32] [i32 3, i32 6]            ; <[2 x i32]*> [#uses=2]
 
 define i32 @b(i32 %y) nounwind readonly {
-; CHECK: @b
+; CHECK-LABEL: @b(
 ; CHECK-NOT: load
 ; CHECK: ret i32
 entry:
diff --git a/test/Transforms/InstCombine/load3.ll b/test/Transforms/InstCombine/load3.ll
index db74426783c1..f79ef9a695c2 100644
--- a/test/Transforms/InstCombine/load3.ll
+++ b/test/Transforms/InstCombine/load3.ll
@@ -11,7 +11,7 @@ define i32 @test1(i32* %p) {
   %x = load i32* %t1
   %a = sub i32 %y, %x
   ret i32 %a
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
@@ -22,7 +22,7 @@ define float @test2() {
   %tmp = load float* bitcast ([4 x i8]* @.str to float*), align 1
   ret float %tmp
   
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret float 0x3806965600000000
 }
 
@@ -41,6 +41,6 @@ define void @test3() nounwind {
   store i32 %l, i32* getelementptr ([36 x i32]* @rslts32, i32 29826161, i32 28), align 4
   ret void
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: store i32 1, i32* getelementptr inbounds ([36 x i32]* @rslts32, i32 0, i32 0)
 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index cd12b29b1186..208520653848 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1201
 define i32 @main(i32 %argc, i8** %argv) {
-; CHECK: @main
+; CHECK-LABEL: @main(
     %c_19 = alloca i8*
     %malloc_206 = tail call i8* @malloc(i32 mul (i32 ptrtoint (i8* getelementptr (i8* null, i32 1) to i32), i32 10))
     store i8* %malloc_206, i8** %c_19
@@ -16,7 +16,7 @@ declare noalias i8* @malloc(i32)
 declare void @free(i8*)
 
 define i1 @foo() {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK-NEXT: ret i1 false
   %m = call i8* @malloc(i32 1)
   %z = icmp eq i8* %m, null
@@ -32,7 +32,7 @@ declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
 
 define void @test3(i8* %src) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: ret void
   %a = call noalias i8* @malloc(i32 10)
   call void @llvm.lifetime.start(i64 10, i8* %a)
@@ -49,7 +49,7 @@ define void @test3(i8* %src) {
 
 ;; This used to crash.
 define void @test4() {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: ret void
   %A = call i8* @malloc(i32 16000)
   %B = bitcast i8* %A to double*
@@ -58,7 +58,7 @@ define void @test4() {
   ret void
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 define void @test5(i8* %ptr, i8** %esc) {
 ; CHECK-NEXT: call i8* @malloc
 ; CHECK-NEXT: call i8* @malloc
@@ -98,7 +98,7 @@ define void @test5(i8* %ptr, i8** %esc) {
 ;; Using simplifycfg will remove the empty basic block and the branch operation
 ;; Then, performing a dead elimination will remove the comparison.
 ;; This is what happens with -O1 and upper.
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 define void @test6(i8* %foo) minsize {
 ; CHECK:  %tobool = icmp eq i8* %foo, null
 ;; Call to free moved
@@ -120,3 +120,27 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry, %if.then
   ret void
 }
+
+declare i8* @_ZnwmRKSt9nothrow_t(i64, i8*) nobuiltin
+declare void @_ZdlPvRKSt9nothrow_t(i8*, i8*) nobuiltin
+declare i32 @__gxx_personality_v0(...)
+declare void @_ZN1AC2Ev(i8* %this)
+
+; CHECK-LABEL: @test7(
+define void @test7() {
+entry:
+  %nt = alloca i8
+  ; CHECK-NOT: call {{.*}}@_ZnwmRKSt9nothrow_t(
+  %call.i = tail call i8* @_ZnwmRKSt9nothrow_t(i64 1, i8* %nt) builtin nounwind
+  invoke void @_ZN1AC2Ev(i8* undef)
+          to label %.noexc.i unwind label %lpad.i
+
+.noexc.i:                                         ; preds = %entry
+  unreachable
+
+lpad.i:                                           ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup
+  ; CHECK-NOT: call {{.*}}@_ZdlPvRKSt9nothrow_t(
+  call void @_ZdlPvRKSt9nothrow_t(i8* %call.i, i8* %nt) builtin nounwind
+  resume { i8*, i32 } %0
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index c97b201fc0e9..65349c6e6906 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -13,7 +13,7 @@ declare i32 @memcmp(i8*, i8*, i32)
 ; Check memcmp(mem, mem, size) -> 0.
 
 define i32 @test_simplify1(i8* %mem, i32 %size) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i32 @memcmp(i8* %mem, i8* %mem, i32 %size)
   ret i32 %ret
 ; CHECK: ret i32 0
@@ -22,7 +22,7 @@ define i32 @test_simplify1(i8* %mem, i32 %size) {
 ; Check memcmp(mem1, mem2, 0) -> 0.
 
 define i32 @test_simplify2(i8* %mem1, i8* %mem2) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 0)
   ret i32 %ret
 ; CHECK: ret i32 0
@@ -31,7 +31,7 @@ define i32 @test_simplify2(i8* %mem1, i8* %mem2) {
 ;; Check memcmp(mem1, mem2, 1) -> *(unsigned char*)mem1 - *(unsigned char*)mem2.
 
 define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 1)
 ; CHECK: [[LOAD1:%[a-z]+]] = load i8* %mem1, align 1
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
@@ -45,7 +45,7 @@ define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
 ; Check memcmp(mem1, mem2, size) -> cnst, where all arguments are constants.
 
 define i32 @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %mem1 = getelementptr [4 x i8]* @hel, i32 0, i32 0
   %mem2 = getelementptr [8 x i8]* @hello_u, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
@@ -54,7 +54,7 @@ define i32 @test_simplify4() {
 }
 
 define i32 @test_simplify5() {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %mem1 = getelementptr [4 x i8]* @hel, i32 0, i32 0
   %mem2 = getelementptr [4 x i8]* @foo, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
@@ -63,7 +63,7 @@ define i32 @test_simplify5() {
 }
 
 define i32 @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %mem1 = getelementptr [4 x i8]* @foo, i32 0, i32 0
   %mem2 = getelementptr [4 x i8]* @hel, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
diff --git a/test/Transforms/InstCombine/memcmp-2.ll b/test/Transforms/InstCombine/memcmp-2.ll
index 3796117bc24c..bed62eb3fb95 100644
--- a/test/Transforms/InstCombine/memcmp-2.ll
+++ b/test/Transforms/InstCombine/memcmp-2.ll
@@ -9,7 +9,7 @@ declare i32* @memcmp(i8*, i8*, i32)
 ; Check that memcmp functions with the wrong prototype aren't simplified.
 
 define i32* @test_no_simplify1(i8* %mem, i32 %size) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %ret = call i32* @memcmp(i8* %mem, i8* %mem, i32 %size)
 ; CHECK-NEXT: call i32* @memcmp
   ret i32* %ret
diff --git a/test/Transforms/InstCombine/memcpy-1.ll b/test/Transforms/InstCombine/memcpy-1.ll
index 65b79ad03df4..9efbcc8283d2 100644
--- a/test/Transforms/InstCombine/memcpy-1.ll
+++ b/test/Transforms/InstCombine/memcpy-1.ll
@@ -9,7 +9,7 @@ declare i8* @memcpy(i8*, i8*, i32)
 ; Check memcpy(mem1, mem2, size) -> llvm.memcpy(mem1, mem2, size, 1).
 
 define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size)
 ; CHECK: call void @llvm.memcpy
   ret i8* %ret
diff --git a/test/Transforms/InstCombine/memcpy-2.ll b/test/Transforms/InstCombine/memcpy-2.ll
index 4a8a02018f5e..a31854c01758 100644
--- a/test/Transforms/InstCombine/memcpy-2.ll
+++ b/test/Transforms/InstCombine/memcpy-2.ll
@@ -9,7 +9,7 @@ declare i8 @memcpy(i8*, i8*, i32)
 ; Check that memcpy functions with the wrong prototype aren't simplified.
 
 define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %ret = call i8 @memcpy(i8* %mem1, i8* %mem2, i32 %size)
 ; CHECK: call i8 @memcpy
   ret i8 %ret
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 557b160a8785..58793ab431d1 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -8,7 +8,7 @@ entry:
 	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i32 16, i1 false)
         
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: call{{.*}}@llvm.memcpy
         
@@ -50,7 +50,7 @@ define void @test2() {
   %a = bitcast %T* %A to i8*
   %b = bitcast %T* %B to i8*
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 
 ; %A alloca is deleted
 ; CHECK-NEXT: alloca [124 x i8]
@@ -73,7 +73,7 @@ define void @test3() {
   %a = bitcast %T* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @bar(i8* %a) readonly
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
 }
@@ -83,7 +83,7 @@ define void @test4() {
   %a = bitcast %T* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @baz(i8* byval %a) 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
 }
@@ -95,7 +95,7 @@ define void @test5() {
   call void @llvm.lifetime.start(i64 -1, i8* %a)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @baz(i8* byval %a) 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
 }
@@ -109,7 +109,7 @@ define void @test6() {
   %a = bitcast %U* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
   call void @bar(i8* %a) readonly
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
   ret void
 }
@@ -119,7 +119,7 @@ define void @test7() {
   %a = bitcast %U* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
   ret void
 }
@@ -129,7 +129,7 @@ define void @test8() {
   %a = bitcast %U* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: llvm.memcpy
 ; CHECK: bar
   ret void
@@ -140,7 +140,7 @@ define void @test9() {
   %a = bitcast %U* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U]* @H, i64 0, i64 1) to i8*))
   ret void
 }
diff --git a/test/Transforms/InstCombine/memcpy.ll b/test/Transforms/InstCombine/memcpy.ll
index 3a68ff95af82..f66e14c95af7 100644
--- a/test/Transforms/InstCombine/memcpy.ll
+++ b/test/Transforms/InstCombine/memcpy.ll
@@ -6,7 +6,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 define void @test1(i8* %a) {
         tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
         ret void
-; CHECK: define void @test1
+; CHECK-LABEL: define void @test1(
 ; CHECK-NEXT: ret void
 }
 
@@ -15,13 +15,13 @@ define void @test1(i8* %a) {
 define void @test2(i8* %a) {
         tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 true)
         ret void
-; CHECK: define void @test2
+; CHECK-LABEL: define void @test2(
 ; CHECK-NEXT: call void @llvm.memcpy
 }
 
 define void @test3(i8* %d, i8* %s) {
         tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i32 4, i1 false)
         ret void
-; CHECK: define void @test3
+; CHECK-LABEL: define void @test3(
 ; CHECK-NEXT: call void @llvm.memcpy
 }
diff --git a/test/Transforms/InstCombine/memcpy_chk-1.ll b/test/Transforms/InstCombine/memcpy_chk-1.ll
index 7c7d91808a37..9216ae7fe95a 100644
--- a/test/Transforms/InstCombine/memcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Check cases where dstlen >= len.
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
@@ -26,7 +26,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
@@ -38,7 +38,7 @@ define void @test_simplify2() {
 ; Check cases where dstlen < len.
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = bitcast %struct.T3* @t3 to i8*
   %src = bitcast %struct.T1* @t1 to i8*
 
@@ -48,7 +48,7 @@ define void @test_no_simplify1() {
 }
 
 define void @test_no_simplify2() {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
diff --git a/test/Transforms/InstCombine/memcpy_chk-2.ll b/test/Transforms/InstCombine/memcpy_chk-2.ll
index aa43029d47fc..320b54f82dc6 100644
--- a/test/Transforms/InstCombine/memcpy_chk-2.ll
+++ b/test/Transforms/InstCombine/memcpy_chk-2.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @t2 = common global %struct.T2 zeroinitializer
 
 define void @test_no_simplify() {
-; CHECK: @test_no_simplify
+; CHECK-LABEL: @test_no_simplify(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
diff --git a/test/Transforms/InstCombine/memmove-1.ll b/test/Transforms/InstCombine/memmove-1.ll
index 53f2f116c777..0445a60aeddb 100644
--- a/test/Transforms/InstCombine/memmove-1.ll
+++ b/test/Transforms/InstCombine/memmove-1.ll
@@ -9,7 +9,7 @@ declare i8* @memmove(i8*, i8*, i32)
 ; Check memmove(mem1, mem2, size) -> llvm.memmove(mem1, mem2, size, 1).
 
 define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size)
 ; CHECK: call void @llvm.memmove
   ret i8* %ret
diff --git a/test/Transforms/InstCombine/memmove-2.ll b/test/Transforms/InstCombine/memmove-2.ll
index 23887bce31d8..b20e96bc5555 100644
--- a/test/Transforms/InstCombine/memmove-2.ll
+++ b/test/Transforms/InstCombine/memmove-2.ll
@@ -9,7 +9,7 @@ declare i8 @memmove(i8*, i8*, i32)
 ; Check that memmove functions with the wrong prototype aren't simplified.
 
 define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %ret = call i8 @memmove(i8* %mem1, i8* %mem2, i32 %size)
 ; CHECK: call i8 @memmove
   ret i8 %ret
diff --git a/test/Transforms/InstCombine/memmove_chk-1.ll b/test/Transforms/InstCombine/memmove_chk-1.ll
index f9ff9a103a30..6d93bbbf959e 100644
--- a/test/Transforms/InstCombine/memmove_chk-1.ll
+++ b/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Check cases where dstlen >= len.
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
@@ -26,7 +26,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
@@ -38,7 +38,7 @@ define void @test_simplify2() {
 ; Check cases where dstlen < len.
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = bitcast %struct.T3* @t3 to i8*
   %src = bitcast %struct.T1* @t1 to i8*
 
@@ -48,7 +48,7 @@ define void @test_no_simplify1() {
 }
 
 define void @test_no_simplify2() {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
diff --git a/test/Transforms/InstCombine/memmove_chk-2.ll b/test/Transforms/InstCombine/memmove_chk-2.ll
index f0a915fde2e9..adadf905a588 100644
--- a/test/Transforms/InstCombine/memmove_chk-2.ll
+++ b/test/Transforms/InstCombine/memmove_chk-2.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @t2 = common global %struct.T2 zeroinitializer
 
 define void @test_no_simplify() {
-; CHECK: @test_no_simplify
+; CHECK-LABEL: @test_no_simplify(
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
diff --git a/test/Transforms/InstCombine/memset-1.ll b/test/Transforms/InstCombine/memset-1.ll
index 48b433e137c0..991567d6b597 100644
--- a/test/Transforms/InstCombine/memset-1.ll
+++ b/test/Transforms/InstCombine/memset-1.ll
@@ -9,7 +9,7 @@ declare i8* @memset(i8*, i32, i32)
 ; Check memset(mem1, val, size) -> llvm.memset(mem1, val, size, 1).
 
 define i8* @test_simplify1(i8* %mem, i32 %val, i32 %size) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i8* @memset(i8* %mem, i32 %val, i32 %size)
 ; CHECK: call void @llvm.memset
   ret i8* %ret
diff --git a/test/Transforms/InstCombine/memset-2.ll b/test/Transforms/InstCombine/memset-2.ll
index 8a9033302d04..5e446cb0ee00 100644
--- a/test/Transforms/InstCombine/memset-2.ll
+++ b/test/Transforms/InstCombine/memset-2.ll
@@ -9,7 +9,7 @@ declare i8 @memset(i8*, i32, i32)
 ; Check that memset functions with the wrong prototype aren't simplified.
 
 define i8 @test_no_simplify1(i8* %mem, i32 %val, i32 %size) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %ret = call i8 @memset(i8* %mem, i32 %val, i32 %size)
 ; CHECK: call i8 @memset
   ret i8 %ret
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
index be4c1cfccdb2..47cc7db998e4 100644
--- a/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Check cases where dstlen >= len.
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T* @t to i8*
 
 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64
@@ -21,7 +21,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T* @t to i8*
 
 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64
@@ -30,7 +30,7 @@ define void @test_simplify2() {
 }
 
 define void @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %dst = bitcast %struct.T* @t to i8*
 
 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64
@@ -41,7 +41,7 @@ define void @test_simplify3() {
 ; Check cases where dstlen < len.
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = bitcast %struct.T* @t to i8*
 
 ; CHECK-NEXT: call i8* @__memset_chk
@@ -50,7 +50,7 @@ define void @test_no_simplify1() {
 }
 
 define void @test_no_simplify2() {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   %dst = bitcast %struct.T* @t to i8*
 
 ; CHECK-NEXT: call i8* @__memset_chk
diff --git a/test/Transforms/InstCombine/memset_chk-2.ll b/test/Transforms/InstCombine/memset_chk-2.ll
index 60fbf163c212..bb4f772785e3 100644
--- a/test/Transforms/InstCombine/memset_chk-2.ll
+++ b/test/Transforms/InstCombine/memset_chk-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @t = common global %struct.T zeroinitializer
 
 define void @test_no_simplify() {
-; CHECK: @test_no_simplify
+; CHECK-LABEL: @test_no_simplify(
   %dst = bitcast %struct.T* @t to i8*
 
 ; CHECK-NEXT: call i8* @__memset_chk
diff --git a/test/Transforms/InstCombine/merge-icmp.ll b/test/Transforms/InstCombine/merge-icmp.ll
index 00020b157e0f..b021fe0429a5 100644
--- a/test/Transforms/InstCombine/merge-icmp.ll
+++ b/test/Transforms/InstCombine/merge-icmp.ll
@@ -8,7 +8,7 @@ define i1 @test1(i16* %x) {
   %cmp2 = icmp eq i16 %and, 17664
   %or = and i1 %cmp1, %cmp2
   ret i1 %or
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: load i16
 ; CHECK-NEXT: icmp eq i16 %load, 17791
 ; CHECK-NEXT: ret i1
@@ -22,7 +22,7 @@ define i1 @test2(i16* %x) {
   %cmp2 = icmp eq i8 %trunc, 69
   %or = and i1 %cmp1, %cmp2
   ret i1 %or
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: load i16
 ; CHECK-NEXT: icmp eq i16 %load, 32581
 ; CHECK-NEXT: ret i1
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 16213b8628ca..94fc1183c55a 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -2,14 +2,14 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
         %B = mul i32 %A, 1              ; <i32> [#uses=1]
         ret i32 %B
 ; CHECK: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
         ; Should convert to an add instruction
         %B = mul i32 %A, 2              ; <i32> [#uses=1]
         ret i32 %B
@@ -17,7 +17,7 @@ define i32 @test2(i32 %A) {
 }
 
 define i32 @test3(i32 %A) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
         ; This should disappear entirely
         %B = mul i32 %A, 0              ; <i32> [#uses=1]
         ret i32 %B
@@ -25,7 +25,7 @@ define i32 @test3(i32 %A) {
 }
 
 define double @test4(double %A) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
         ; This is safe for FP
         %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
         ret double %B
@@ -33,14 +33,14 @@ define double @test4(double %A) {
 }
 
 define i32 @test5(i32 %A) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
         %B = mul i32 %A, 8              ; <i32> [#uses=1]
         ret i32 %B
 ; CHECK: shl i32 %A, 3
 }
 
 define i8 @test6(i8 %A) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
         %B = mul i8 %A, 8               ; <i8> [#uses=1]
         %C = mul i8 %B, 8               ; <i8> [#uses=1]
         ret i8 %C
@@ -48,28 +48,28 @@ define i8 @test6(i8 %A) {
 }
 
 define i32 @test7(i32 %i) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
         %tmp = mul i32 %i, -1           ; <i32> [#uses=1]
         ret i32 %tmp
 ; CHECK: sub i32 0, %i
 }
 
 define i64 @test8(i64 %i) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
         %j = mul i64 %i, -1             ; <i64> [#uses=1]
         ret i64 %j
 ; CHECK: sub i64 0, %i
 }
 
 define i32 @test9(i32 %i) {
-; CHECK: @test9
+; CHECK-LABEL: @test9(
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
 ; CHECK: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
-; CHECK: @test10
+; CHECK-LABEL: @test10(
         %c = icmp slt i32 %a, 0         ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
        ; e = b & (a >> 31)
@@ -81,7 +81,7 @@ define i32 @test10(i32 %a, i32 %b) {
 }
 
 define i32 @test11(i32 %a, i32 %b) {
-; CHECK: @test11
+; CHECK-LABEL: @test11(
         %c = icmp sle i32 %a, -1                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
@@ -93,7 +93,7 @@ define i32 @test11(i32 %a, i32 %b) {
 }
 
 define i32 @test12(i32 %a, i32 %b) {
-; CHECK: @test12
+; CHECK-LABEL: @test12(
         %c = icmp ugt i32 %a, 2147483647                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
@@ -106,7 +106,7 @@ define i32 @test12(i32 %a, i32 %b) {
 
 ; PR2642
 define internal void @test13(<4 x float>*) {
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 	load <4 x float>* %0, align 1
 	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
@@ -115,7 +115,7 @@ define internal void @test13(<4 x float>*) {
 }
 
 define <16 x i8> @test14(<16 x i8> %a) {
-; CHECK: @test14
+; CHECK-LABEL: @test14(
         %b = mul <16 x i8> %a, zeroinitializer
         ret <16 x i8> %b
 ; CHECK-NEXT: ret <16 x i8> zeroinitializer
@@ -123,7 +123,7 @@ define <16 x i8> @test14(<16 x i8> %a) {
 
 ; rdar://7293527
 define i32 @test15(i32 %A, i32 %B) {
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 entry:
   %shl = shl i32 1, %B
   %m = mul i32 %shl, %A
@@ -133,7 +133,7 @@ entry:
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test16(i32 %b, i1 %c) {
-; CHECK: @test16
+; CHECK-LABEL: @test16(
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
@@ -144,7 +144,7 @@ define i32 @test16(i32 %b, i1 %c) {
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test17(i32 %a, i32 %b) {
-; CHECK: @test17
+; CHECK-LABEL: @test17(
   %a.lobit = lshr i32 %a, 31
   %e = mul i32 %a.lobit, %b
   ret i32 %e
@@ -154,7 +154,7 @@ define i32 @test17(i32 %a, i32 %b) {
 }
 
 define i32 @test18(i32 %A, i32 %B) {
-; CHECK: @test18
+; CHECK-LABEL: @test18(
   %C = and i32 %A, 1
   %D = and i32 %B, 1
 
@@ -168,7 +168,7 @@ declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
 declare void @use(i1)
 
 define i32 @test19(i32 %A, i32 %B) {
-; CHECK: @test19
+; CHECK-LABEL: @test19(
   %C = and i32 %A, 1
   %D = and i32 %B, 1
 
diff --git a/test/Transforms/InstCombine/multi-size-address-space-pointer.ll b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
new file mode 100644
index 000000000000..2d88bed4e7bf
--- /dev/null
+++ b/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
@@ -0,0 +1,112 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+
+define i32 @test_as0(i32 addrspace(0)* %a) {
+; CHECK-LABEL: @test_as0(
+; CHECK: %arrayidx = getelementptr i32* %a, i32 1
+  %arrayidx = getelementptr i32 addrspace(0)* %a, i64 1
+  %y = load i32 addrspace(0)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as1(i32 addrspace(1)* %a) {
+; CHECK-LABEL: @test_as1(
+; CHECK: %arrayidx = getelementptr i32 addrspace(1)* %a, i64 1
+  %arrayidx = getelementptr i32 addrspace(1)* %a, i32 1
+  %y = load i32 addrspace(1)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as2(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_as2(
+; CHECK: %arrayidx = getelementptr i32 addrspace(2)* %a, i8 1
+  %arrayidx = getelementptr i32 addrspace(2)* %a, i32 1
+  %y = load i32 addrspace(2)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as3(i32 addrspace(3)* %a) {
+; CHECK-LABEL: @test_as3(
+; CHECK: %arrayidx = getelementptr i32 addrspace(3)* %a, i16 1
+  %arrayidx = getelementptr i32 addrspace(3)* %a, i32 1
+  %y = load i32 addrspace(3)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_combine_ptrtoint(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_combine_ptrtoint(
+; CHECK-NEXT: %y = load i32 addrspace(2)* %a
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint i32 addrspace(2)* %a to i8
+  %castback = inttoptr i8 %cast to i32 addrspace(2)*
+  %y = load i32 addrspace(2)* %castback, align 4
+  ret i32 %y
+}
+
+define i8 @test_combine_inttoptr(i8 %a) {
+; CHECK-LABEL: @test_combine_inttoptr(
+; CHECK-NEXT: ret i8 %a
+  %cast = inttoptr i8 %a to i32 addrspace(2)*
+  %castback = ptrtoint i32 addrspace(2)* %cast to i8
+  ret i8 %castback
+}
+
+define i32 @test_combine_vector_ptrtoint(<2 x i32 addrspace(2)*> %a) {
+; CHECK-LABEL: @test_combine_vector_ptrtoint(
+; CHECK-NEXT: %p = extractelement <2 x i32 addrspace(2)*> %a, i32 0
+; CHECK-NEXT: %y = load i32 addrspace(2)* %p, align 4
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint <2 x i32 addrspace(2)*> %a to <2 x i8>
+  %castback = inttoptr <2 x i8> %cast to <2 x i32 addrspace(2)*>
+  %p = extractelement <2 x i32 addrspace(2)*> %castback, i32 0
+  %y = load i32 addrspace(2)* %p, align 4
+  ret i32 %y
+}
+
+define <2 x i8> @test_combine_vector_inttoptr(<2 x i8> %a) {
+; CHECK-LABEL: @test_combine_vector_inttoptr(
+; CHECK-NEXT: ret <2 x i8> %a
+  %cast = inttoptr <2 x i8> %a to <2 x i32 addrspace(2)*>
+  %castback = ptrtoint <2 x i32 addrspace(2)*> %cast to <2 x i8>
+  ret <2 x i8> %castback
+}
+
+; Check that the GEP index is changed to the address space integer type (i64 -> i8)
+define i32 addrspace(2)* @shrink_gep_constant_index_64_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_constant_index_32_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_32_as2(
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32 addrspace(2)* %p, i32 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(3)* @shrink_gep_constant_index_64_as3(i32 addrspace(3)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as3(
+; CHECK-NEXT: getelementptr i32 addrspace(3)* %p, i16 1
+  %ret = getelementptr i32 addrspace(3)* %p, i64 1
+  ret i32 addrspace(3)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_variable_index_64_as2(i32 addrspace(2)* %p, i64 %idx) {
+; CHECK-LABEL: @shrink_gep_variable_index_64_as2(
+; CHECK-NEXT: %1 = trunc i64 %idx to i8
+; CHECK-NEXT: getelementptr i32 addrspace(2)* %p, i8 %1
+  %ret = getelementptr i32 addrspace(2)* %p, i64 %idx
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(1)* @grow_gep_variable_index_8_as1(i32 addrspace(1)* %p, i8 %idx) {
+; CHECK-LABEL: @grow_gep_variable_index_8_as1(
+; CHECK-NEXT: %1 = sext i8 %idx to i64
+; CHECK-NEXT: getelementptr i32 addrspace(1)* %p, i64 %1
+  %ret = getelementptr i32 addrspace(1)* %p, i8 %idx
+  ret i32 addrspace(1)* %ret
+}
+
diff --git a/test/Transforms/InstCombine/no-negzero.ll b/test/Transforms/InstCombine/no-negzero.ll
index f295130b0ea4..4ed283603446 100644
--- a/test/Transforms/InstCombine/no-negzero.ll
+++ b/test/Transforms/InstCombine/no-negzero.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.8"
 
-; CHECK: @mysqrt
+; CHECK-LABEL: @mysqrt(
 ; CHECK-NOT: fadd
 ; CHECK: ret
 define double @mysqrt(double %x) nounwind {
diff --git a/test/Transforms/InstCombine/nsw.ll b/test/Transforms/InstCombine/nsw.ll
index 0140c2f80122..0bed76717ce9 100644
--- a/test/Transforms/InstCombine/nsw.ll
+++ b/test/Transforms/InstCombine/nsw.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; CHECK: @sub1
+; CHECK-LABEL: @sub1(
 ; CHECK: %y = sub i32 0, %x
 ; CHECK: %z = sdiv i32 %y, 337
 ; CHECK: ret i32 %z
@@ -10,7 +10,7 @@ define i32 @sub1(i32 %x) {
   ret i32 %z
 }
 
-; CHECK: @sub2
+; CHECK-LABEL: @sub2(
 ; CHECK: %z = sdiv i32 %x, -337
 ; CHECK: ret i32 %z
 define i32 @sub2(i32 %x) {
@@ -19,7 +19,7 @@ define i32 @sub2(i32 %x) {
   ret i32 %z
 }
 
-; CHECK: @shl_icmp
+; CHECK-LABEL: @shl_icmp(
 ; CHECK: %B = icmp eq i64 %X, 0
 ; CHECK: ret i1 %B
 define i1 @shl_icmp(i64 %X) nounwind {
@@ -28,7 +28,7 @@ define i1 @shl_icmp(i64 %X) nounwind {
   ret i1 %B
 }
 
-; CHECK: @shl1
+; CHECK-LABEL: @shl1(
 ; CHECK: %B = shl nuw nsw i64 %A, 8
 ; CHECK: ret i64 %B
 define i64 @shl1(i64 %X, i64* %P) nounwind {
@@ -38,7 +38,7 @@ define i64 @shl1(i64 %X, i64* %P) nounwind {
   ret i64 %B
 }
 
-; CHECK: @preserve1
+; CHECK-LABEL: @preserve1(
 ; CHECK: add nsw i32 %x, 5
 define i32 @preserve1(i32 %x) nounwind {
   %add = add nsw i32 %x, 2
@@ -46,7 +46,7 @@ define i32 @preserve1(i32 %x) nounwind {
   ret i32 %add3
 }
 
-; CHECK: @nopreserve1
+; CHECK-LABEL: @nopreserve1(
 ; CHECK: add i8 %x, -126
 define i8 @nopreserve1(i8 %x) nounwind {
   %add = add nsw i8 %x, 127
@@ -54,7 +54,7 @@ define i8 @nopreserve1(i8 %x) nounwind {
   ret i8 %add3
 }
 
-; CHECK: @nopreserve2
+; CHECK-LABEL: @nopreserve2(
 ; CHECK: add i8 %x, 3
 define i8 @nopreserve2(i8 %x) nounwind {
   %add = add i8 %x, 1
@@ -62,7 +62,7 @@ define i8 @nopreserve2(i8 %x) nounwind {
   ret i8 %add3
 }
 
-; CHECK: @nopreserve3
+; CHECK-LABEL: @nopreserve3(
 ; CHECK: add i8 %A, %B
 ; CHECK: add i8
 define i8 @nopreserve3(i8 %A, i8 %B) nounwind {
@@ -72,7 +72,7 @@ define i8 @nopreserve3(i8 %A, i8 %B) nounwind {
   ret i8 %add
 }
 
-; CHECK: @nopreserve4
+; CHECK-LABEL: @nopreserve4(
 ; CHECK: add i8 %A, %B
 ; CHECK: add i8
 define i8 @nopreserve4(i8 %A, i8 %B) nounwind {
diff --git a/test/Transforms/InstCombine/objsize-64.ll b/test/Transforms/InstCombine/objsize-64.ll
index 530e1234b4a6..5046724038a3 100644
--- a/test/Transforms/InstCombine/objsize-64.ll
+++ b/test/Transforms/InstCombine/objsize-64.ll
@@ -7,7 +7,7 @@ declare i32 @__gxx_personality_v0(...)
 declare void @__cxa_call_unexpected(i8*)
 declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
 
-; CHECK: @f1
+; CHECK-LABEL: @f1(
 define i64 @f1(i8 **%esc) {
   %call = call i8* @malloc(i32 4)
   store i8* %call, i8** %esc
@@ -17,7 +17,7 @@ define i64 @f1(i8 **%esc) {
 }
 
 
-; CHECK: @f2
+; CHECK-LABEL: @f2(
 define i64 @f2(i8** %esc) nounwind uwtable ssp {
 entry:
 ; CHECK: invoke noalias i8* @_Znwm(i64 13)
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
new file mode 100644
index 000000000000..9cb688423960
--- /dev/null
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+declare i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+
+@array_as2 = private addrspace(2) global [60 x i8] zeroinitializer, align 4
+
+@array_as1_pointers = private global [10 x i32 addrspace(1)*] zeroinitializer, align 4
+@array_as2_pointers = private global [24 x i32 addrspace(2)*] zeroinitializer, align 4
+@array_as3_pointers = private global [42 x i32 addrspace(3)*] zeroinitializer, align 4
+
+@array_as2_as1_pointer_pointers = private global [16 x i32 addrspace(2)* addrspace(1)*] zeroinitializer, align 4
+
+
+@a_as3 = private addrspace(3) global [60 x i8] zeroinitializer, align 1
+
+define i32 @foo_as3() nounwind {
+; CHECK-LABEL: @foo_as3(
+; CHECK-NEXT: ret i32 60
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i16 @foo_as3_i16() nounwind {
+; CHECK-LABEL: @foo_as3_i16(
+; CHECK-NEXT: ret i16 60
+  %1 = call i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i16 %1
+}
+
+@a_alias = alias weak [60 x i8] addrspace(3)* @a_as3
+define i32 @foo_alias() nounwind {
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i32 @array_as2_size() {
+; CHECK-LABEL: @array_as2_size(
+; CHECK-NEXT: ret i32 60
+  %bc = bitcast [60 x i8] addrspace(2)* @array_as2 to i8 addrspace(2)*
+  %1 = call i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as1() {
+; CHECK-LABEL: @pointer_array_as1(
+; CHECK-NEXT: ret i32 80
+  %bc = addrspacecast [10 x i32 addrspace(1)*]* @array_as1_pointers to i8 addrspace(1)*
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as2() {
+; CHECK-LABEL: @pointer_array_as2(
+; CHECK-NEXT: ret i32 24
+  %bc = bitcast [24 x i32 addrspace(2)*]* @array_as2_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as3() {
+; CHECK-LABEL: @pointer_array_as3(
+; CHECK-NEXT: ret i32 84
+  %bc = bitcast [42 x i32 addrspace(3)*]* @array_as3_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_pointer_array_as2_as1() {
+; CHECK-LABEL: @pointer_pointer_array_as2_as1(
+; CHECK-NEXT: ret i32 128
+  %bc = bitcast [16 x i32 addrspace(2)* addrspace(1)*]* @array_as2_as1_pointer_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 122c6501a3f5..645903299c86 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -5,19 +5,18 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 @a = private global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*>
 @.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
-
 define i32 @foo() nounwind {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK-NEXT: ret i32 60
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   ret i32 %1
 }
 
 define i8* @bar() nounwind {
-; CHECK: @bar
+; CHECK-LABEL: @bar(
 entry:
   %retval = alloca i8*
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
   %cmp = icmp ne i32 %0, -1
 ; CHECK: br i1 true
   br i1 %cmp, label %cond.true, label %cond.false
@@ -32,27 +31,27 @@ cond.false:
 }
 
 define i32 @f() nounwind {
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK-NEXT: ret i32 0
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
   ret i32 %1
 }
 
 @window = external global [0 x i8]
 
 define i1 @baz() nounwind {
-; CHECK: @baz
+; CHECK-LABEL: @baz(
 ; CHECK-NEXT: objectsize
-  %1 = tail call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
   %2 = icmp eq i32 %1, -1
   ret i1 %2
 }
 
 define void @test1(i8* %q, i32 %x) nounwind noinline {
-; CHECK: @test1
-; CHECK: objectsize.i32
+; CHECK-LABEL: @test1(
+; CHECK: objectsize.i32.p0i8
 entry:
-  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
   %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
   br i1 %1, label %"47", label %"46"
 
@@ -66,9 +65,9 @@ entry:
 @.str5 = private constant [9 x i32] [i32 97, i32 98, i32 99, i32 100, i32 0, i32
  101, i32 102, i32 103, i32 0], align 4
 define i32 @test2() nounwind {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret i32 34
-  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
   ret i32 %1
 }
 
@@ -77,19 +76,19 @@ define i32 @test2() nounwind {
 
 declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
 
 declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
 
 define void @test3() nounwind {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 entry:
   br i1 undef, label %bb11, label %bb12
 
 bb11:
   %0 = getelementptr inbounds float* getelementptr inbounds ([480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
   %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) ; <i32> [#uses=1]
   %3 = call i8* @__memcpy_chk(i8* undef, i8* undef, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
 ; CHECK: unreachable
   unreachable
@@ -107,11 +106,11 @@ bb12:
 %struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
 
 define i32 @test4(i8** %esc) nounwind ssp {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 entry:
   %0 = alloca %struct.data, align 8
   %1 = bitcast %struct.data* %0 to i8*
-  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) nounwind
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
@@ -123,10 +122,10 @@ entry:
 @s = external global i8*
 
 define i8* @test5(i32 %n) nounwind ssp {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
@@ -135,10 +134,10 @@ entry:
 }
 
 define void @test6(i32 %n) nounwind ssp {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
@@ -151,11 +150,11 @@ declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 declare noalias i8* @malloc(i32) nounwind
 
 define i32 @test7(i8** %esc) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
   %alloc = call noalias i8* @malloc(i32 48) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 16
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 32
   ret i32 %objsize
 }
@@ -163,11 +162,11 @@ define i32 @test7(i8** %esc) {
 declare noalias i8* @calloc(i32, i32) nounwind
 
 define i32 @test8(i8** %esc) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
   %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
   store i8* %alloc, i8** %esc
   %gep = getelementptr inbounds i8* %alloc, i32 5
-  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false) nounwind readonly
 ; CHECK: ret i32 30
   ret i32 %objsize
 }
@@ -175,52 +174,52 @@ define i32 @test8(i8** %esc) {
 declare noalias i8* @strdup(i8* nocapture) nounwind
 declare noalias i8* @strndup(i8* nocapture, i32) nounwind
 
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 define i32 @test9(i8** %esc) {
   %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0)) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
 
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 define i32 @test10(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 4
   ret i32 %1
 }
 
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 define i32 @test11(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
 
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 define i32 @test12(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
 
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 define i32 @test13(i8** %esc) {
   %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
   store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true)
 ; CHECK: ret i32 8
   ret i32 %1
 }
 
-; CHECK: @PR13390
+; CHECK-LABEL: @PR13390(
 define i32 @PR13390(i1 %bool, i8* %a) {
 entry:
   %cond = or i1 %bool, true
@@ -229,8 +228,8 @@ entry:
 xpto:
   %select = select i1 %bool, i8* %select, i8* %a
   %select2 = select i1 %bool, i8* %a, i8* %select2
-  %0 = tail call i32 @llvm.objectsize.i32(i8* %select, i1 true)
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %select2, i1 true)
+  %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true)
   %2 = add i32 %0, %1
 ; CHECK: ret i32 undef
   ret i32 %2
@@ -239,7 +238,7 @@ return:
   ret i32 42
 }
 
-; CHECK: @PR13621
+; CHECK-LABEL: @PR13621(
 define i32 @PR13621(i1 %bool) nounwind {
 entry:
   %cond = or i1 %bool, true
@@ -249,7 +248,7 @@ entry:
 xpto:
   %gep2 = getelementptr i8* %gep, i32 1
   %gep = getelementptr i8* %gep2, i32 1
-  %o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
+  %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true)
 ; CHECK: ret i32 undef
   ret i32 %o
 
@@ -259,20 +258,21 @@ return:
 
 @globalalias = alias internal [60 x i8]* @a
 
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK-NEXT: ret i32 60
 define i32 @test18() {
   %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
 
 @globalalias2 = alias weak [60 x i8]* @a
 
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK: llvm.objectsize
 define i32 @test19() {
   %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
   ret i32 %1
 }
+
diff --git a/test/Transforms/InstCombine/onehot_merge.ll b/test/Transforms/InstCombine/onehot_merge.ll
new file mode 100644
index 000000000000..51f955c2c248
--- /dev/null
+++ b/test/Transforms/InstCombine/onehot_merge.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;CHECK: @and_consts
+;CHECK: and i32 %k, 12
+;CHECK: icmp ne i32 %0, 12
+;CHECK: ret
+define i1 @and_consts(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp1 = and i32 4, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 8, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+;CHECK: @foo1_and
+;CHECK:  shl i32 1, %c1
+;CHECK-NEXT:  shl i32 1, %c2
+;CHECK-NEXT:  or i32
+;CHECK-NEXT:  and i32
+;CHECK-NEXT:  icmp ne i32 %1, %0
+;CHECK: ret
+define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = shl i32 1, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
diff --git a/test/Transforms/InstCombine/or-fcmp.ll b/test/Transforms/InstCombine/or-fcmp.ll
index 09a3c994d93e..29963f6c5c24 100644
--- a/test/Transforms/InstCombine/or-fcmp.ll
+++ b/test/Transforms/InstCombine/or-fcmp.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; CHECK: @t1
+; CHECK-LABEL: @t1(
 define zeroext i8 @t1(float %x, float %y) nounwind {
        %a = fcmp ueq float %x, %y             ; <i1> [#uses=1]
        %b = fcmp uno float %x, %y               ; <i1> [#uses=1]
@@ -11,7 +11,7 @@ define zeroext i8 @t1(float %x, float %y) nounwind {
        ret i8 %retval
 }
 
-; CHECK: @t2
+; CHECK-LABEL: @t2(
 define zeroext i8 @t2(float %x, float %y) nounwind {
        %a = fcmp olt float %x, %y             ; <i1> [#uses=1]
        %b = fcmp oeq float %x, %y               ; <i1> [#uses=1]
@@ -23,7 +23,7 @@ define zeroext i8 @t2(float %x, float %y) nounwind {
        ret i8 %retval
 }
 
-; CHECK: @t3
+; CHECK-LABEL: @t3(
 define zeroext i8 @t3(float %x, float %y) nounwind {
        %a = fcmp ult float %x, %y             ; <i1> [#uses=1]
        %b = fcmp uge float %x, %y               ; <i1> [#uses=1]
@@ -33,7 +33,7 @@ define zeroext i8 @t3(float %x, float %y) nounwind {
        ret i8 %retval
 }
 
-; CHECK: @t4
+; CHECK-LABEL: @t4(
 define zeroext i8 @t4(float %x, float %y) nounwind {
        %a = fcmp ult float %x, %y             ; <i1> [#uses=1]
        %b = fcmp ugt float %x, %y               ; <i1> [#uses=1]
@@ -45,7 +45,7 @@ define zeroext i8 @t4(float %x, float %y) nounwind {
        ret i8 %retval
 }
 
-; CHECK: @t5
+; CHECK-LABEL: @t5(
 define zeroext i8 @t5(float %x, float %y) nounwind {
        %a = fcmp olt float %x, %y             ; <i1> [#uses=1]
        %b = fcmp oge float %x, %y               ; <i1> [#uses=1]
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index f496dd48c402..cec36f119a9a 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -5,7 +5,7 @@ define i32 @test1(i32 %x, i32 %y) nounwind {
   %not = xor i32 %or, -1
   %z = or i32 %x, %not
   ret i32 %z
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: %y.not = xor i32 %y, -1
 ; CHECK-NEXT: %z = or i32 %y.not, %x
 ; CHECK-NEXT: ret i32 %z
@@ -16,7 +16,7 @@ define i32 @test2(i32 %x, i32 %y) nounwind {
   %not = xor i32 %or, -1
   %z = or i32 %y, %not
   ret i32 %z
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: %x.not = xor i32 %x, -1
 ; CHECK-NEXT: %z = or i32 %x.not, %y
 ; CHECK-NEXT: ret i32 %z
@@ -27,7 +27,7 @@ define i32 @test3(i32 %x, i32 %y) nounwind {
   %not = xor i32 %xor, -1
   %z = or i32 %x, %not
   ret i32 %z
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: %y.not = xor i32 %y, -1
 ; CHECK-NEXT: %z = or i32 %y.not, %x
 ; CHECK-NEXT: ret i32 %z
@@ -38,7 +38,7 @@ define i32 @test4(i32 %x, i32 %y) nounwind {
   %not = xor i32 %xor, -1
   %z = or i32 %y, %not
   ret i32 %z
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: %x.not = xor i32 %x, -1
 ; CHECK-NEXT: %z = or i32 %x.not, %y
 ; CHECK-NEXT: ret i32 %z
@@ -49,7 +49,7 @@ define i32 @test5(i32 %x, i32 %y) nounwind {
   %not = xor i32 %and, -1
   %z = or i32 %x, %not
   ret i32 %z
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: ret i32 -1
 }
 
@@ -58,7 +58,7 @@ define i32 @test6(i32 %x, i32 %y) nounwind {
   %not = xor i32 %and, -1
   %z = or i32 %y, %not
   ret i32 %z
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: ret i32 -1
 }
 
@@ -66,7 +66,7 @@ define i32 @test7(i32 %x, i32 %y) nounwind {
   %xor = xor i32 %x, %y
   %z = or i32 %y, %xor
   ret i32 %z
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: %z = or i32 %x, %y
 ; CHECK-NEXT: ret i32 %z
 }
@@ -76,7 +76,7 @@ define i32 @test8(i32 %x, i32 %y) nounwind {
   %xor = xor i32 %x, %not
   %z = or i32 %y, %xor
   ret i32 %z
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NEXT: %x.not = xor i32 %x, -1
 ; CHECK-NEXT: %z = or i32 %x.not, %y
 ; CHECK-NEXT: ret i32 %z
@@ -87,7 +87,7 @@ define i32 @test9(i32 %x, i32 %y) nounwind {
   %xor = xor i32 %not, %y
   %z = or i32 %x, %xor
   ret i32 %z
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: %y.not = xor i32 %y, -1
 ; CHECK-NEXT: %z = or i32 %y.not, %x
 ; CHECK-NEXT: ret i32 %z
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 7226bd93996f..1cd897ee90c6 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -6,49 +6,49 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 define i32 @test1(i32 %A) {
         %B = or i32 %A, 0
         ret i32 %B
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
         %B = or i32 %A, -1 
         ret i32 %B
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 -1
 }
 
 define i8 @test2a(i8 %A) {
         %B = or i8 %A, -1  
         ret i8 %B
-; CHECK: @test2a
+; CHECK-LABEL: @test2a(
 ; CHECK: ret i8 -1
 }
 
 define i1 @test3(i1 %A) {
         %B = or i1 %A, false
         ret i1 %B
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i1 %A
 }
 
 define i1 @test4(i1 %A) {
         %B = or i1 %A, true 
         ret i1 %B
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret i1 true
 }
 
 define i1 @test5(i1 %A) {
         %B = or i1 %A, %A   
         ret i1 %B
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: ret i1 %A
 }
 
 define i32 @test6(i32 %A) {
         %B = or i32 %A, %A  
         ret i32 %B
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: ret i32 %A
 }
 
@@ -57,7 +57,7 @@ define i32 @test7(i32 %A) {
         %NotA = xor i32 -1, %A
         %B = or i32 %A, %NotA
         ret i32 %B
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: ret i32 -1
 }
 
@@ -65,7 +65,7 @@ define i8 @test8(i8 %A) {
         %B = or i8 %A, -2
         %C = or i8 %B, 1
         ret i8 %C
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: ret i8 -1
 }
 
@@ -75,7 +75,7 @@ define i8 @test9(i8 %A, i8 %B) {
         %D = or i8 %B, -2
         %E = or i8 %C, %D
         ret i8 %E
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: ret i8 -1
 }
 
@@ -85,7 +85,7 @@ define i8 @test10(i8 %A) {
         ; (X & C1) | C2 --> (X | C2) & (C1|C2)
         %D = or i8 %C, -2
         ret i8 %D
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: ret i8 -2
 }
 
@@ -96,7 +96,7 @@ define i8 @test11(i8 %A) {
         %D = or i8 %C, 1
         %E = xor i8 %D, 12
         ret i8 %E
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK: ret i8 -1
 }
 
@@ -105,7 +105,7 @@ define i32 @test12(i32 %A) {
         %B = or i32 %A, 4
         %C = and i32 %B, 8
         ret i32 %C
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK: %C = and i32 %A, 8
 ; CHECK: ret i32 %C
 }
@@ -115,7 +115,7 @@ define i32 @test13(i32 %A) {
         ; Always equal to 8
         %C = and i32 %B, 8
         ret i32 %C
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK: ret i32 8
 }
 
@@ -125,7 +125,7 @@ define i1 @test14(i32 %A, i32 %B) {
         ; (A < B) | (A > B) === A != B
         %D = or i1 %C1, %C2
         ret i1 %D
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK: icmp ne i32 %A, %B
 ; CHECK: ret i1
 }
@@ -136,7 +136,7 @@ define i1 @test15(i32 %A, i32 %B) {
         ; (A < B) | (A == B) === A <= B
         %D = or i1 %C1, %C2
         ret i1 %D
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK:  icmp ule i32 %A, %B
 ; CHECK: ret i1
 }
@@ -148,7 +148,7 @@ define i32 @test16(i32 %A) {
         ; %D = and int %B, -1 == %B
         %D = or i32 %B, %C
         ret i32 %D
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK: ret i32 %A
 }
 
@@ -158,7 +158,7 @@ define i32 @test17(i32 %A) {
         ; %D = and int %B, 5
         %D = or i32 %B, %C
         ret i32 %D
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK: %D = and i32 %A, 5
 ; CHECK: ret i32 %D
 }
@@ -169,7 +169,7 @@ define i1 @test18(i32 %A) {
         ;; (A-50) >u 50
         %D = or i1 %B, %C
         ret i1 %D
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK: add i32
 ; CHECK:  icmp ugt 
 ; CHECK: ret i1 
@@ -181,7 +181,7 @@ define i1 @test19(i32 %A) {
         ;; (A&-2) == 50
         %D = or i1 %B, %C
         ret i1 %D
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK: and i32
 ; CHECK: icmp eq 
 ; CHECK: ret i1
@@ -191,7 +191,7 @@ define i32 @test20(i32 %x) {
         %y = and i32 %x, 123
         %z = or i32 %y, %x
         ret i32 %z
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK: ret i32 %x
 }
 
@@ -202,7 +202,7 @@ define i32 @test21(i32 %tmp.1) {
         ;; add tmp.1, 2
         %tmp.6 = or i32 %tmp.5, %tmp.3
         ret i32 %tmp.6
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK:   add i32 %{{[^,]*}}, 2
 ; CHECK:   ret i32 
 }
@@ -212,7 +212,7 @@ define i32 @test22(i32 %B) {
         %ELIM7 = and i32 %B, -2
         %ELIM5 = or i32 %ELIM41, %ELIM7
         ret i32 %ELIM5
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 ; CHECK: ret i32 %B
 }
 
@@ -222,7 +222,7 @@ define i16 @test23(i16 %A) {
         %C = or i16 %B, -32768
         %D = xor i16 %C, 8193
         ret i16 %D
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK:   %B = lshr i16 %A, 1
 ; CHECK:   %D = xor i16 %B, -24575
 ; CHECK:   ret i16 %D
@@ -235,7 +235,7 @@ define i1 @test24(double %X, double %Y) {
         %bothcond = or i1 %tmp13, %tmp9         ; <i1> [#uses=1]
         ret i1 %bothcond
         
-; CHECK: @test24
+; CHECK-LABEL: @test24(
 ; CHECK:    = fcmp uno double %Y, %X
 ; CHECK:   ret i1 
 }
@@ -248,7 +248,7 @@ define i1 @test25(i32 %A, i32 %B) {
   %F = xor i1 %E, -1
   ret i1 %F
 
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 ; CHECK: icmp ne i32 %A, 0
 ; CHECK-NEXT: icmp ne i32 %B, 57
 ; CHECK-NEXT:  %F = and i1 
@@ -262,7 +262,7 @@ define i1 @test26(i32 %A, i32 %B) {
         ; (A == 0) & (A == 0)   -->   (A|B) == 0
         %D = and i1 %C1, %C2
         ret i1 %D
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; CHECK: or i32 %A, %B
 ; CHECK: icmp eq i32 {{.*}}, 0
 ; CHECK: ret i1 
@@ -274,7 +274,7 @@ define i1 @test27(i32* %A, i32* %B) {
   %D = or i32 %C1, %C2
   %E = icmp eq i32 %D, 0
   ret i1 %E
-; CHECK: @test27
+; CHECK-LABEL: @test27(
 ; CHECK: icmp eq i32* %A, null
 ; CHECK: icmp eq i32* %B, null
 ; CHECK: and i1
@@ -288,7 +288,7 @@ define i1 @test28(i32 %A, i32 %B) {
         ; (A != 0) | (A != 0)   -->   (A|B) != 0
         %D = or i1 %C1, %C2
         ret i1 %D
-; CHECK: @test28
+; CHECK-LABEL: @test28(
 ; CHECK: or i32 %A, %B
 ; CHECK: icmp ne i32 {{.*}}, 0
 ; CHECK: ret i1 
@@ -300,7 +300,7 @@ define i1 @test29(i32* %A, i32* %B) {
   %D = or i32 %C1, %C2
   %E = icmp ne i32 %D, 0
   ret i1 %E
-; CHECK: @test29
+; CHECK-LABEL: @test29(
 ; CHECK: icmp ne i32* %A, null
 ; CHECK: icmp ne i32* %B, null
 ; CHECK: or i1
@@ -315,7 +315,7 @@ entry:
   %D = and i32 %B, 40186
   %E = or i32 %D, %C
   ret i32 %E
-; CHECK: @test30
+; CHECK-LABEL: @test30(
 ; CHECK: %D = and i32 %A, -58312
 ; CHECK: %E = or i32 %D, 32962
 ; CHECK: ret i32 %E
@@ -331,7 +331,7 @@ define i64 @test31(i64 %A) nounwind readnone ssp noredzone {
 
   %F = or i64 %D, %E
   ret i64 %F
-; CHECK: @test31
+; CHECK-LABEL: @test31(
 ; CHECK-NEXT: %E = and i64 %A, 4294908984
 ; CHECK-NEXT: %F = or i64 %E, 32962
 ; CHECK-NEXT: ret i64 %F
@@ -345,7 +345,7 @@ define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32
   %or.i = or <4 x i32> %and.i, %and.i129          ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %or.i
 ; codegen is mature enough to handle vector selects.
-; CHECK: @test32
+; CHECK-LABEL: @test32(
 ; CHECK: select <4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191
 }
 
@@ -353,7 +353,7 @@ define i1 @test33(i1 %X, i1 %Y) {
   %a = or i1 %X, %Y
   %b = or i1 %a, %X
   ret i1 %b
-; CHECK: @test33
+; CHECK-LABEL: @test33(
 ; CHECK-NEXT: or i1 %X, %Y
 ; CHECK-NEXT: ret
 }
@@ -362,7 +362,7 @@ define i32 @test34(i32 %X, i32 %Y) {
   %a = or i32 %X, %Y
   %b = or i32 %Y, %a
   ret i32 %b
-; CHECK: @test34
+; CHECK-LABEL: @test34(
 ; CHECK-NEXT: or i32 %X, %Y
 ; CHECK-NEXT: ret
 }
@@ -371,7 +371,7 @@ define i32 @test35(i32 %a, i32 %b) {
   %1 = or i32 %a, 1135
   %2 = or i32 %1, %b
   ret i32 %2
-  ; CHECK: @test35
+  ; CHECK-LABEL: @test35(
   ; CHECK-NEXT: or i32 %a, %b
   ; CHECK-NEXT: or i32 %1, 1135
 }
@@ -383,14 +383,14 @@ define i1 @test36(i32 %x) {
   %cmp3 = icmp eq i32 %x, 25
   %ret2 = or i1 %ret1, %cmp3
   ret i1 %ret2
-; CHECK: @test36
+; CHECK-LABEL: @test36(
 ; CHECK-NEXT: %x.off = add i32 %x, -23
 ; CHECK-NEXT: icmp ult i32 %x.off, 3
 ; CHECK-NEXT: ret i1
 }
 
 define i32 @test37(i32* %xp, i32 %y) {
-; CHECK: @test37
+; CHECK-LABEL: @test37(
 ; CHECK: select i1 %tobool, i32 -1, i32 %x
   %tobool = icmp ne i32 %y, 0
   %sext = sext i1 %tobool to i32
@@ -400,7 +400,7 @@ define i32 @test37(i32* %xp, i32 %y) {
 }
 
 define i32 @test38(i32* %xp, i32 %y) {
-; CHECK: @test38
+; CHECK-LABEL: @test38(
 ; CHECK: select i1 %tobool, i32 -1, i32 %x
   %tobool = icmp ne i32 %y, 0
   %sext = sext i1 %tobool to i32
diff --git a/test/Transforms/InstCombine/osx-names.ll b/test/Transforms/InstCombine/osx-names.ll
index 7b83526aceb5..926caadc49d8 100644
--- a/test/Transforms/InstCombine/osx-names.ll
+++ b/test/Transforms/InstCombine/osx-names.ll
@@ -14,14 +14,14 @@ target triple = "i386-apple-macosx10.7.2"
 @.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
 
 define void @test1(%struct.__sFILE* %stream) nounwind {
-; CHECK: define void @test1
+; CHECK-LABEL: define void @test1(
 ; CHECK: call i32 @"fwrite$UNIX2003"
   %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
   ret void
 }
 
 define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
-; CHECK: define void @test2
+; CHECK-LABEL: define void @test2(
 ; CHECK: call i32 @"fputs$UNIX2003"
   %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
   ret void
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index 81ceef8c41cf..3eddc80a7048 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -3,7 +3,7 @@
 
 declare void @throwAnExceptionOrWhatever()
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
 entry:
 ; CHECK-NOT: sext
@@ -26,7 +26,7 @@ if.end:
   ret i32 %conv9
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; This form should not be promoted for two reasons: 1) it is unprofitable to
 ; promote it since the add.off instruction has another use, and 2) it is unsafe
 ; because the add-with-off makes the high bits of the original add live.
@@ -76,7 +76,7 @@ if.end:
 ; CHECK: ret i64
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; Should be able to form an i8 sadd computed in an i32.
 define zeroext i8 @test4(i8 signext %a, i8 signext %b) nounwind ssp {
 entry:
@@ -97,7 +97,7 @@ if.end:                                           ; preds = %entry
 ; CHECK: ret i8
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: llvm.uadd.with.overflow
 ; CHECK: ret i64
 define i64 @test5(i64 %a, i64 %b) nounwind ssp {
@@ -108,7 +108,7 @@ entry:
   ret i64 %Q
 }
 
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: llvm.uadd.with.overflow
 ; CHECK: ret i64
 define i64 @test6(i64 %a, i64 %b) nounwind ssp {
@@ -119,7 +119,7 @@ entry:
   ret i64 %Q
 }
 
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: llvm.uadd.with.overflow
 ; CHECK: ret i64
 define i64 @test7(i64 %a, i64 %b) nounwind ssp {
@@ -130,7 +130,7 @@ entry:
   ret i64 %Q
 }
 
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; PR11438
 ; This is @test1, but the operands are not sign-extended.  Make sure
 ; we don't transform this case.
diff --git a/test/Transforms/InstCombine/phi-select-constexpr.ll b/test/Transforms/InstCombine/phi-select-constexpr.ll
new file mode 100644
index 000000000000..054e0691d47a
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-select-constexpr.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+@A = extern_weak global i32, align 4
+@B = extern_weak global i32, align 4
+
+define i32 @foo(i1 %which) {
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+; CHECK-LABEL: final:
+; CHECK: phi i32 [ 1, %entry ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), %delay ]
+final:
+  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
+  %value = select i1 %use2, i32 2, i32 1
+  ret i32 %value
+}
+
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 1c307d430f3f..6e314651383c 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -15,7 +15,7 @@ BB1:
 
 BB2:
         ret i32 %A
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: BB1:
 ; CHECK-NEXT: ret i32 %A
 }
@@ -31,7 +31,7 @@ BB2:
         ; Combine away PHI nodes with same values
         %B = phi i32 [ %A, %BB0 ], [ %A, %BB1 ]         
         ret i32 %B
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: BB2:
 ; CHECK-NEXT: ret i32 %A
 }
@@ -47,7 +47,7 @@ Loop:
 
 Exit:
         ret i32 %B
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: Exit:
 ; CHECK-NEXT: ret i32 %A
 }
@@ -64,7 +64,7 @@ Loop:           ; preds = %L2, %Loop
 
 L2:             ; preds = %Loop
         br label %Loop
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: Loop:
 ; CHECK-NEXT: br i1 %b
 }
@@ -80,7 +80,7 @@ Loop:           ; preds = %Loop, %BB0
 
 Exit:           ; preds = %Loop
         ret i32 %B
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: Loop:
 ; CHECK-NEXT: br i1 %b
 ; CHECK: Exit:
@@ -100,7 +100,7 @@ BB2:
         ;; Suck casts into phi
         %B = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]         
         ret i32 %B
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: BB2:
 ; CHECK: zext i16 %A to i32
 ; CHECK-NEXT: ret i32
@@ -118,7 +118,7 @@ Loop:           ; preds = %Loop, %BB0
 
 Exit:           ; preds = %Loop
         ret i32 0
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: Loop:
 ; CHECK-NEXT: br i1 %b
 }
@@ -136,7 +136,7 @@ BB2:
         ;; Suck GEPs into phi
         %B = phi i32* [ %X, %BB0 ], [ %Y, %BB1 ]
         ret i32* %B
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NOT: phi
 ; CHECK: BB2:
 ; CHECK-NEXT: %B = getelementptr { i32, i32 }* %A 
@@ -159,7 +159,7 @@ bb1:
 bb2:
   %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
   ret i32 %E
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK:       bb2:
 ; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
 ; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 1
@@ -183,7 +183,7 @@ bb1:
 bb2:
   %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
   ret i32 %E
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK:       bb2:
 ; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
 ; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 16
@@ -219,7 +219,7 @@ end:
   store i32 10, i32* %g
   %z = call i1 @test11a()
   ret i1 %z
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NOT: phi i32
 ; CHECK: ret i1 %z
 }
@@ -245,7 +245,7 @@ end:
 
   %tmp2 = add i64 %tmp32, %tmp30
   ret i64 %tmp2
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NOT: zext
 ; CHECK: end:
 ; CHECK-NEXT: phi i64 [ 0, %entry ], [ %Val, %two ]
@@ -276,7 +276,7 @@ end:
   
   call void @test13f(double %tmp31, i32 %tmp32)
   ret void
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK-NOT: zext
 ; CHECK: end:
 ; CHECK-NEXT: phi double [ 0.000000e+00, %entry ], [ %Vald, %two ]
@@ -296,7 +296,7 @@ Loop:
 
 Exit:           ; preds = %Loop
         ret i640 %C
-; CHECK: @test14a
+; CHECK-LABEL: @test14a(
 ; CHECK: Loop:
 ; CHECK-NEXT: phi i320
 }
@@ -313,7 +313,7 @@ Loop:
 
 Exit:           ; preds = %Loop
         ret i160 %C
-; CHECK: @test14b
+; CHECK-LABEL: @test14b(
 ; CHECK: Loop:
 ; CHECK-NEXT: phi i160
 }
@@ -321,7 +321,7 @@ Exit:           ; preds = %Loop
 declare i64 @test15a(i64)
 
 define i64 @test15b(i64 %A, i1 %b) {
-; CHECK: @test15b
+; CHECK-LABEL: @test15b(
 entry:
   %i0 = zext i64 %A to i128
   %i1 = shl i128 %i0, 64
@@ -405,7 +405,7 @@ if.else:                                          ; preds = %entry
 
 ; PR4413
 declare i32 @ext()
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 define i32 @test17(i1 %a) {
 entry:
     br i1 %a, label %bb1, label %bb2
@@ -435,7 +435,7 @@ ret:
   %ptr = phi i32* [ %zero, %true ] , [ %one, %false ]
   %isnull = icmp eq i32* %ptr, null
   ret i1 %isnull
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK: ret i1 false
 }
 
@@ -449,7 +449,7 @@ ret:
   %p = phi double [ %x, %true ], [ 0x7FF0000000000000, %false ]; RHS = +infty
   %cmp = fcmp ule double %x, %p
   ret i1 %cmp
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK: ret i1 true
 }
 
@@ -466,7 +466,7 @@ ret:
   %p = phi i32* [ %a, %true ], [ %b, %false ]
   %r = icmp eq i32* %p, %c
   ret i1 %r
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK: ret i1 false
 }
 
@@ -485,12 +485,12 @@ loop:
   br i1 %c2, label %ret, label %loop
 ret:
   ret i1 %r
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK: ret i1 false
 }
 
 define void @test22() {
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 entry:
   br label %loop
 loop:
@@ -518,7 +518,7 @@ Loop:           ; preds = %Loop, %BB0
 Exit:           ; preds = %Loop
         %E = add i32 %B, 19
         ret i32 %E
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK: %phitmp = add i32 %A, 19
 ; CHECK: Loop:
 ; CHECK-NEXT: %B = phi i32 [ %phitmp, %BB0 ], [ 61, %Loop ]
@@ -538,7 +538,7 @@ BB1:
 BB2:
         %C = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]
         ret i32 %C
-; CHECK: @test24
+; CHECK-LABEL: @test24(
 ; CHECK-NOT: phi
 ; CHECK: BB2:
 ; CHECK-NEXT: %C = add nuw i32 %A, 1
@@ -573,7 +573,7 @@ end:
   store i32 10, i32* %g
   %z = call i1 @test25a()
   ret i1 %z
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 ; CHECK-NOT: phi i32
 ; CHECK: ret i1 %z
 }
@@ -616,12 +616,12 @@ end:
   store i32 10, i32* %g
   %z = call i1 @test26a()
   ret i1 %z
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; CHECK-NOT: phi i32
 ; CHECK: ret i1 %z
 }
 
-; CHECK: @test27(
+; CHECK-LABEL: @test27(
 ; CHECK: ret i32 undef
 define i32 @test27(i1 %b) {
 entry:
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index 8a311f0b74c6..9f1d073fe760 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -12,14 +12,14 @@ declare double @pow(double, double) nounwind readonly
 ; Check pow(1.0, x) -> 1.0.
 
 define float @test_simplify1(float %x) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %retval = call float @powf(float 1.0, float %x)
   ret float %retval
 ; CHECK-NEXT: ret float 1.000000e+00
 }
 
 define double @test_simplify2(double %x) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %retval = call double @pow(double 1.0, double %x)
   ret double %retval
 ; CHECK-NEXT: ret double 1.000000e+00
@@ -28,7 +28,7 @@ define double @test_simplify2(double %x) {
 ; Check pow(2.0, x) -> exp2(x).
 
 define float @test_simplify3(float %x) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %retval = call float @powf(float 2.0, float %x)
 ; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) [[NUW_RO:#[0-9]+]]
   ret float %retval
@@ -36,7 +36,7 @@ define float @test_simplify3(float %x) {
 }
 
 define double @test_simplify4(double %x) {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %retval = call double @pow(double 2.0, double %x)
 ; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) [[NUW_RO]]
   ret double %retval
@@ -46,14 +46,14 @@ define double @test_simplify4(double %x) {
 ; Check pow(x, 0.0) -> 1.0.
 
 define float @test_simplify5(float %x) {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %retval = call float @powf(float %x, float 0.0)
   ret float %retval
 ; CHECK-NEXT: ret float 1.000000e+00
 }
 
 define double @test_simplify6(double %x) {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %retval = call double @pow(double %x, double 0.0)
   ret double %retval
 ; CHECK-NEXT: ret double 1.000000e+00
@@ -62,7 +62,7 @@ define double @test_simplify6(double %x) {
 ; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity.
 
 define float @test_simplify7(float %x) {
-; CHECK: @test_simplify7
+; CHECK-LABEL: @test_simplify7(
   %retval = call float @powf(float %x, float 0.5)
 ; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO]]
 ; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
@@ -73,7 +73,7 @@ define float @test_simplify7(float %x) {
 }
 
 define double @test_simplify8(double %x) {
-; CHECK: @test_simplify8
+; CHECK-LABEL: @test_simplify8(
   %retval = call double @pow(double %x, double 0.5)
 ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
 ; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
@@ -86,14 +86,14 @@ define double @test_simplify8(double %x) {
 ; Check pow(-infinity, 0.5) -> +infinity.
 
 define float @test_simplify9(float %x) {
-; CHECK: @test_simplify9
+; CHECK-LABEL: @test_simplify9(
   %retval = call float @powf(float 0xFFF0000000000000, float 0.5)
   ret float %retval
 ; CHECK-NEXT: ret float 0x7FF0000000000000
 }
 
 define double @test_simplify10(double %x) {
-; CHECK: @test_simplify10
+; CHECK-LABEL: @test_simplify10(
   %retval = call double @pow(double 0xFFF0000000000000, double 0.5)
   ret double %retval
 ; CHECK-NEXT: ret double 0x7FF0000000000000
@@ -102,14 +102,14 @@ define double @test_simplify10(double %x) {
 ; Check pow(x, 1.0) -> x.
 
 define float @test_simplify11(float %x) {
-; CHECK: @test_simplify11
+; CHECK-LABEL: @test_simplify11(
   %retval = call float @powf(float %x, float 1.0)
   ret float %retval
 ; CHECK-NEXT: ret float %x
 }
 
 define double @test_simplify12(double %x) {
-; CHECK: @test_simplify12
+; CHECK-LABEL: @test_simplify12(
   %retval = call double @pow(double %x, double 1.0)
   ret double %retval
 ; CHECK-NEXT: ret double %x
@@ -118,7 +118,7 @@ define double @test_simplify12(double %x) {
 ; Check pow(x, 2.0) -> x*x.
 
 define float @test_simplify13(float %x) {
-; CHECK: @test_simplify13
+; CHECK-LABEL: @test_simplify13(
   %retval = call float @powf(float %x, float 2.0)
 ; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul float %x, %x
   ret float %retval
@@ -126,7 +126,7 @@ define float @test_simplify13(float %x) {
 }
 
 define double @test_simplify14(double %x) {
-; CHECK: @test_simplify14
+; CHECK-LABEL: @test_simplify14(
   %retval = call double @pow(double %x, double 2.0)
 ; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul double %x, %x
   ret double %retval
@@ -136,7 +136,7 @@ define double @test_simplify14(double %x) {
 ; Check pow(x, -1.0) -> 1.0/x.
 
 define float @test_simplify15(float %x) {
-; CHECK: @test_simplify15
+; CHECK-LABEL: @test_simplify15(
   %retval = call float @powf(float %x, float -1.0)
 ; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv float 1.000000e+00, %x
   ret float %retval
@@ -144,11 +144,24 @@ define float @test_simplify15(float %x) {
 }
 
 define double @test_simplify16(double %x) {
-; CHECK: @test_simplify16
+; CHECK-LABEL: @test_simplify16(
   %retval = call double @pow(double %x, double -1.0)
 ; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv double 1.000000e+00, %x
   ret double %retval
 ; CHECK-NEXT: ret double [[RECIPROCAL]]
 }
 
+declare double @llvm.pow.f64(double %Val, double %Power)
+define double @test_simplify17(double %x) {
+; CHECK-LABEL: @test_simplify17(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
+  ret double %retval
+; CHECK-NEXT: ret double [[SELECT]]
+}
+
 ; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
+
diff --git a/test/Transforms/InstCombine/pow-2.ll b/test/Transforms/InstCombine/pow-2.ll
index af64cda0904a..d1ffde703faa 100644
--- a/test/Transforms/InstCombine/pow-2.ll
+++ b/test/Transforms/InstCombine/pow-2.ll
@@ -7,7 +7,7 @@ declare float @pow(double, double)
 ; Check that pow functions with the wrong prototype aren't simplified.
 
 define float @test_no_simplify1(double %x) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %retval = call float @pow(double 1.0, double %x)
 ; CHECK-NEXT: call float @pow(double 1.000000e+00, double %x)
   ret float %retval
diff --git a/test/Transforms/InstCombine/pow-3.ll b/test/Transforms/InstCombine/pow-3.ll
new file mode 100644
index 000000000000..1c5cf910a8af
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-3.ll
@@ -0,0 +1,12 @@
+; Test that the pow won't get simplified to sqrt(fabs) when they are not available.
+;
+; RUN: opt < %s -disable-simplify-libcalls -instcombine -S | FileCheck %s
+
+declare double @llvm.pow.f64(double %Val, double %Power)
+
+define double @test_simplify_unavailable(double %x) {
+; CHECK-LABEL: @test_simplify_unavailable(
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+; CHECK-NEXT: call double @llvm.pow.f64(double %x, double 5.000000e-01)
+  ret double %retval
+}
diff --git a/test/Transforms/InstCombine/pr17827.ll b/test/Transforms/InstCombine/pr17827.ll
new file mode 100644
index 000000000000..a8b592635523
--- /dev/null
+++ b/test/Transforms/InstCombine/pr17827.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; With left shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed1(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed1(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; With arithmetic right shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed2(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed2(i8 %p) #0 {
+entry:
+  %shlp = ashr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; This should simplify functionally to the left shift case.
+; The extra input parameter should be optimized away.
+; CHECK-LABEL: @test_shift_and_cmp_changed1(
+; CHECK:  %andp = shl i8 %p, 5
+; CHECK-NEXT: %shl = and i8 %andp, -64
+; CHECK-NEXT:  %cmp = icmp slt i8 %shl, 32
+define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) #0 {
+entry:
+  %andp = and i8 %p, 6
+  %andq = and i8 %q, 8
+  %or = or i8 %andq, %andp
+  %shl = shl i8 %or, 5
+  %ashr = ashr i8 %shl, 5
+  %cmp = icmp slt i8 %ashr, 1
+  ret i1 %cmp
+}
+
+; Unsigned compare allows a transformation to compare against 0.
+; CHECK-LABEL: @test_shift_and_cmp_changed2(
+; CHECK: icmp eq i8 %andp, 0
+define i1 @test_shift_and_cmp_changed2(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp ult i8 %andp, 32
+  ret i1 %cmp
+}
+
+; nsw on the shift should not affect the comparison.
+; CHECK-LABEL: @test_shift_and_cmp_changed3(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_changed3(i8 %p) #0 {
+entry:
+  %shlp = shl nsw i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; Logical shift right allows a return true because the 'and' guarantees no bits are set.
+; CHECK-LABEL: @test_shift_and_cmp_changed4(
+; CHECK: ret i1 true
+define i1 @test_shift_and_cmp_changed4(i8 %p) #0 {
+entry:
+  %shlp = lshr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
diff --git a/test/Transforms/InstCombine/pr8547.ll b/test/Transforms/InstCombine/pr8547.ll
index 485f4d9644f3..7e9cbe17b68d 100644
--- a/test/Transforms/InstCombine/pr8547.ll
+++ b/test/Transforms/InstCombine/pr8547.ll
@@ -23,4 +23,4 @@ for.cond:                                         ; preds = %for.cond, %codeRepl
 codeRepl2:                                        ; preds = %for.cond
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i32 %conv2) nounwind
   ret i32 0
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
index 3a910ea437b7..c98ddd55df10 100644
--- a/test/Transforms/InstCombine/printf-1.ll
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the printf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -20,7 +20,7 @@ declare i32 @printf(i8*, ...)
 ; Check printf("") -> noop.
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [1 x i8]* @empty, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt)
   ret void
@@ -30,7 +30,7 @@ define void @test_simplify1() {
 ; Check printf("x") -> putchar('x'), even for '%'.
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @putchar(i32 104)
@@ -39,7 +39,7 @@ define void @test_simplify2() {
 }
 
 define void @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %fmt = getelementptr [2 x i8]* @percent, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @putchar(i32 37)
@@ -50,7 +50,7 @@ define void @test_simplify3() {
 ; Check printf("foo\n") -> puts("foo").
 
 define void @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* [[STR]], i32 0, i32 0))
@@ -61,7 +61,7 @@ define void @test_simplify4() {
 ; Check printf("%c", chr) -> putchar(chr).
 
 define void @test_simplify5() {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt, i8 104)
 ; CHECK-NEXT: call i32 @putchar(i32 104)
@@ -72,7 +72,7 @@ define void @test_simplify5() {
 ; Check printf("%s\n", str) -> puts(str).
 
 define void @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
   %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt, i8* %str)
@@ -84,7 +84,7 @@ define void @test_simplify6() {
 ; Check printf(format, ...) -> iprintf(format, ...) if no floating point.
 
 define void @test_simplify7() {
-; CHECK-IPRINTF: @test_simplify7
+; CHECK-IPRINTF-LABEL: @test_simplify7(
   %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt, i32 187)
 ; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
@@ -93,7 +93,7 @@ define void @test_simplify7() {
 }
 
 define void @test_no_simplify1() {
-; CHECK-IPRINTF: @test_no_simplify1
+; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
   call i32 (i8*, ...)* @printf(i8* %fmt, double 1.87)
 ; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
@@ -102,7 +102,7 @@ define void @test_no_simplify1() {
 }
 
 define void @test_no_simplify2(i8* %fmt, double %d) {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
 ; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
   ret void
@@ -110,7 +110,7 @@ define void @test_no_simplify2(i8* %fmt, double %d) {
 }
 
 define i32 @test_no_simplify3() {
-; CHECK: @test_no_simplify3
+; CHECK-LABEL: @test_no_simplify3(
   %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
   %ret = call i32 (i8*, ...)* @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @h, i32 0, i32 0))
diff --git a/test/Transforms/InstCombine/printf-2.ll b/test/Transforms/InstCombine/printf-2.ll
index 466ee1c75770..7e018ebba04d 100644
--- a/test/Transforms/InstCombine/printf-2.ll
+++ b/test/Transforms/InstCombine/printf-2.ll
@@ -13,7 +13,7 @@ declare void @printf(i8*, ...)
 ; Check simplification of printf with void return type.
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
   call void (i8*, ...)* @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @putchar(i32 104)
@@ -22,7 +22,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call void (i8*, ...)* @printf(i8* %fmt)
 ; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i32 0, i32 0))
@@ -31,7 +31,7 @@ define void @test_simplify2() {
 }
 
 define void @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
   %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call void (i8*, ...)* @printf(i8* %fmt, i8* %str)
diff --git a/test/Transforms/InstCombine/ptr-int-cast.ll b/test/Transforms/InstCombine/ptr-int-cast.ll
index 7a6ecff9c0be..826c00484227 100644
--- a/test/Transforms/InstCombine/ptr-int-cast.ll
+++ b/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -28,7 +28,7 @@ define i64 @f0(i32 %a0) nounwind {
 }
 
 define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
 ; CHECK: trunc <4 x i64> %1 to <4 x i32>
   %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
@@ -36,7 +36,7 @@ define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
 }
 
 define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
 ; CHECK: zext <4 x i64> %1 to <4 x i128>
   %p1 = ptrtoint <4 x i8*> %arg to <4 x i128>
@@ -44,7 +44,7 @@ define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
 }
 
 define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: zext <4 x i32> %arg to <4 x i64>
 ; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
   %p1 = inttoptr <4 x i32> %arg to <4 x i8*>
@@ -52,7 +52,7 @@ define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
 }
 
 define <4 x i8*> @test7(<4 x i128> %arg) nounwind {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: trunc <4 x i128> %arg to <4 x i64>
 ; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
   %p1 = inttoptr <4 x i128> %arg to <4 x i8*>
diff --git a/test/Transforms/InstCombine/puts-1.ll b/test/Transforms/InstCombine/puts-1.ll
index ef4e1bbd824c..bd7557ef650f 100644
--- a/test/Transforms/InstCombine/puts-1.ll
+++ b/test/Transforms/InstCombine/puts-1.ll
@@ -11,7 +11,7 @@ declare i32 @puts(i8*)
 ; Check puts("") -> putchar('\n').
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
   call i32 @puts(i8* %str)
 ; CHECK-NEXT: call i32 @putchar(i32 10)
@@ -22,7 +22,7 @@ define void @test_simplify1() {
 ; Don't simplify if the return value is used.
 
 define i32 @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
   %ret = call i32 @puts(i8* %str)
 ; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @empty, i32 0, i32 0))
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index b421b7c0e8b4..22fd90bf7a75 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -1,36 +1,56 @@
-; This test makes sure that these instructions are properly eliminated.
+; This test makes sure that urem instructions are properly eliminated.
 ;
-; RUN: opt < %s -instcombine -S | not grep rem
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; END.
 
 define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i32 0
 	%B = srem i32 %A, 1	; ISA constant 0
 	ret i32 %B
 }
 
 define i32 @test2(i32 %A) {	; 0 % X = 0, we don't need to preserve traps
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i32 0
 	%B = srem i32 0, %A
 	ret i32 %B
 }
 
 define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 7
+; CHECK-NEXT: ret i32 [[AND]]
 	%B = urem i32 %A, 8
 	ret i32 %B
 }
 
 define i1 @test3a(i32 %A) {
+; CHECK-LABEL: @test3a(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
 	%B = srem i32 %A, -8
 	%C = icmp ne i32 %B, 0
 	ret i1 %C
 }
 
 define i32 @test4(i32 %X, i1 %C) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[SEL:%.*]] = select i1 %C, i32 0, i32 7
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SEL]], %X
 	%V = select i1 %C, i32 1, i32 8
 	%R = urem i32 %X, %V
 	ret i32 %R
 }
 
 define i32 @test5(i32 %X, i8 %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 %B to i32
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 32, [[ZEXT]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], -1
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], %X
+; CHECK-NEXT: ret i32 [[AND]]
 	%shift.upgrd.1 = zext i8 %B to i32
 	%Amt = shl i32 32, %shift.upgrd.1
 	%V = urem i32 %X, %Amt
@@ -38,29 +58,39 @@ define i32 @test5(i32 %X, i8 %B) {
 }
 
 define i32 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: ret i32 undef
 	%B = srem i32 %A, 0	;; undef
 	ret i32 %B
 }
 
 define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: ret i32 0
 	%B = mul i32 %A, 8
 	%C = srem i32 %B, 4
 	ret i32 %C
 }
 
 define i32 @test8(i32 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: ret i32 0
 	%B = shl i32 %A, 4
 	%C = srem i32 %B, 8
 	ret i32 %C
 }
 
 define i32 @test9(i32 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: ret i32 0
 	%B = mul i32 %A, 64
 	%C = urem i32 %B, 32
 	ret i32 %C
 }
 
 define i32 @test10(i8 %c) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: ret i32 0
 	%tmp.1 = zext i8 %c to i32
 	%tmp.2 = mul i32 %tmp.1, 4
 	%tmp.3 = sext i32 %tmp.2 to i64
@@ -70,6 +100,8 @@ define i32 @test10(i8 %c) {
 }
 
 define i32 @test11(i32 %i) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i32 0
 	%tmp.1 = and i32 %i, -2
 	%tmp.3 = mul i32 %tmp.1, 2
 	%tmp.5 = urem i32 %tmp.3, 4
@@ -77,12 +109,98 @@ define i32 @test11(i32 %i) {
 }
 
 define i32 @test12(i32 %i) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: ret i32 0
 	%tmp.1 = and i32 %i, -4
 	%tmp.5 = srem i32 %tmp.1, 2
 	ret i32 %tmp.5
 }
 
 define i32 @test13(i32 %i) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: ret i32 0
 	%x = srem i32 %i, %i
 	ret i32 %x
 }
+
+define i64 @test14(i64 %x, i32 %y) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %y
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SHL]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT]], -1
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[ADD]], %x
+; CHECK-NEXT: ret i64 [[AND]]
+	%shl = shl i32 1, %y
+	%zext = zext i32 %shl to i64
+	%urem = urem i64 %x, %zext
+	ret i64 %urem
+}
+
+define i64 @test15(i32 %x, i32 %y) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, %y
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], -1
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], %x
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[AND]] to i64
+; CHECK-NEXT: ret i64 [[ZEXT]]
+	%shl = shl i32 1, %y
+	%zext0 = zext i32 %shl to i64
+	%zext1 = zext i32 %x to i64
+	%urem = urem i64 %zext1, %zext0
+	ret i64 %urem
+}
+
+define i32 @test16(i32 %x, i32 %y) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 %y, 11
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 4
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], 3
+; CHECK-NEXT: [[REM:%.*]] = and i32 [[OR]], %x
+; CHECK-NEXT: ret i32 [[REM]]
+	%shr = lshr i32 %y, 11
+	%and = and i32 %shr, 4
+	%add = add i32 %and, 4
+	%rem = urem i32 %x, %add
+	ret i32 %rem
+}
+
+define i32 @test17(i32 %X) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: icmp ne i32 %X, 1
+; CHECK-NEXT: zext i1
+; CHECK-NEXT: ret
+  %A = urem i32 1, %X
+  ret i32 %A
+}
+
+define i32 @test18(i16 %x, i32 %y) {
+; CHECK: @test18
+; CHECK-NEXT: [[AND:%.*]] = and i16 %x, 4
+; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[AND]] to i32
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[EXT]], 3
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 63
+; CHECK-NEXT: [[REM:%.*]] = and i32 [[XOR]], %y
+; CHECK-NEXT: ret i32 [[REM]]
+	%1 = and i16 %x, 4
+	%2 = icmp ne i16 %1, 0
+	%3 = select i1 %2, i32 32, i32 64
+	%4 = urem i32 %y, %3
+	ret i32 %4
+}
+
+define i32 @test19(i32 %x, i32 %y) {
+; CHECK: @test19
+; CHECK-NEXT: [[SHL1:%.*]] = shl i32 1, %x
+; CHECK-NEXT: [[SHL2:%.*]] = shl i32 1, %y
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL1]], [[SHL2]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[AND]], [[SHL1]]
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[ADD]], -1
+; CHECK-NEXT: [[REM:%.*]] = and i32 [[SUB]], %y
+; CHECK-NEXT: ret i32 [[REM]]
+	%A = shl i32 1, %x
+	%B = shl i32 1, %y
+	%C = and i32 %A, %B
+	%D = add i32 %C, %A
+	%E = urem i32 %y, %D
+	ret i32 %E
+}
diff --git a/test/Transforms/InstCombine/select-2.ll b/test/Transforms/InstCombine/select-2.ll
index a76addc9942b..5b9deb4515a8 100644
--- a/test/Transforms/InstCombine/select-2.ll
+++ b/test/Transforms/InstCombine/select-2.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -instcombine -S | grep select | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: select
+; CHECK: select
 
 ; Make sure instcombine don't fold select into operands. We don't want to emit
 ; select of two integers unless it's selecting 0 / 1.
diff --git a/test/Transforms/InstCombine/select-crash.ll b/test/Transforms/InstCombine/select-crash.ll
index 946ea2b8b70c..77446cd8ba02 100644
--- a/test/Transforms/InstCombine/select-crash.ll
+++ b/test/Transforms/InstCombine/select-crash.ll
@@ -21,7 +21,7 @@ entry:
 
 ; PR10180: same crash, but with vectors
 define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: fsub <4 x float>
 ; CHECK: select
 ; CHECK: fadd <4 x float>
@@ -31,7 +31,7 @@ define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
   ret <4 x float> %sel
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 define i32 @test3(i1 %bool, i32 %a) {
 entry:
   %cond = or i1 %bool, true
diff --git a/test/Transforms/InstCombine/select-extractelement.ll b/test/Transforms/InstCombine/select-extractelement.ll
new file mode 100644
index 000000000000..e7ea851d9214
--- /dev/null
+++ b/test/Transforms/InstCombine/select-extractelement.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @v4float_user(<4 x float>) #0
+
+
+
+define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  ret float %extract
+}
+
+; Multiple extractelements
+define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_two_select(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; Select has an extra non-extractelement user, don't change it
+define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select_user(
+; CHECK: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect_user(
+; CHECK: select <4 x i1> {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+; Extract from a vector select
+define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect(
+; CHECK-NOT: select <4 x i1>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %select, i32 0
+  ret float %extract
+}
+
+; Multiple extractelements from a vector select
+define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_two_vselect(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; All the vector selects should be decomposed into scalar selects
+; Test multiple extractelements
+define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_vector_select(
+; CHECK-NOT: select i1 {{.*}}, <4 x float>
+entry:
+  %0 = extractelement <4 x i32> %c, i32 0
+  %tobool = icmp ne i32 %0, 0
+  %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
+  %1 = extractelement <4 x float> %a.sink, i32 0
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = extractelement <4 x i32> %c, i32 1
+  %tobool1 = icmp ne i32 %3, 0
+  %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
+  %4 = extractelement <4 x float> %a.sink1, i32 1
+  %5 = insertelement <4 x float> %2, float %4, i32 1
+  %6 = extractelement <4 x i32> %c, i32 2
+  %tobool6 = icmp ne i32 %6, 0
+  %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
+  %7 = extractelement <4 x float> %a.sink2, i32 2
+  %8 = insertelement <4 x float> %5, float %7, i32 2
+  %9 = extractelement <4 x i32> %c, i32 3
+  %tobool11 = icmp ne i32 %9, 0
+  %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
+  %10 = extractelement <4 x float> %a.sink3, i32 3
+  %11 = insertelement <4 x float> %8, float %10, i32 3
+  ret <4 x float> %11
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index c72a6f7c49c6..1458bde82124 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -6,14 +6,14 @@
 define i32 @test1(i32 %A, i32 %B) {
         %C = select i1 false, i32 %A, i32 %B            
         ret i32 %C
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 %B
 }
 
 define i32 @test2(i32 %A, i32 %B) {
         %C = select i1 true, i32 %A, i32 %B             
         ret i32 %C
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 %A
 }
 
@@ -22,7 +22,7 @@ define i32 @test3(i1 %C, i32 %I) {
         ; V = I
         %V = select i1 %C, i32 %I, i32 %I               
         ret i32 %V
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 %I
 }
 
@@ -30,7 +30,7 @@ define i1 @test4(i1 %C) {
         ; V = C
         %V = select i1 %C, i1 true, i1 false            
         ret i1 %V
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret i1 %C
 }
 
@@ -38,7 +38,7 @@ define i1 @test5(i1 %C) {
         ; V = !C
         %V = select i1 %C, i1 false, i1 true            
         ret i1 %V
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: xor i1 %C, true
 ; CHECK: ret i1
 }
@@ -47,7 +47,7 @@ define i32 @test6(i1 %C) {
         ; V = cast C to int
         %V = select i1 %C, i32 1, i32 0         
         ret i32 %V
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: %V = zext i1 %C to i32
 ; CHECK: ret i32 %V
 }
@@ -56,7 +56,7 @@ define i1 @test7(i1 %C, i1 %X) {
         ; R = or C, X       
         %R = select i1 %C, i1 true, i1 %X               
         ret i1 %R
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: %R = or i1 %C, %X
 ; CHECK: ret i1 %R
 }
@@ -65,7 +65,7 @@ define i1 @test8(i1 %C, i1 %X) {
         ; R = and C, X
         %R = select i1 %C, i1 %X, i1 false              
         ret i1 %R
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: %R = and i1 %C, %X
 ; CHECK: ret i1 %R
 }
@@ -74,7 +74,7 @@ define i1 @test9(i1 %C, i1 %X) {
         ; R = and !C, X
         %R = select i1 %C, i1 false, i1 %X              
         ret i1 %R
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: xor i1 %C, true
 ; CHECK: %R = and i1
 ; CHECK: ret i1 %R
@@ -84,7 +84,7 @@ define i1 @test10(i1 %C, i1 %X) {
         ; R = or !C, X
         %R = select i1 %C, i1 %X, i1 true               
         ret i1 %R
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: xor i1 %C, true
 ; CHECK: %R = or i1
 ; CHECK: ret i1 %R
@@ -94,7 +94,7 @@ define i32 @test11(i32 %a) {
         %C = icmp eq i32 %a, 0          
         %R = select i1 %C, i32 0, i32 1         
         ret i32 %R
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK: icmp ne i32 %a, 0
 ; CHECK: %R = zext i1
 ; CHECK: ret i32 %R
@@ -104,7 +104,7 @@ define i32 @test12(i1 %cond, i32 %a) {
         %b = or i32 %a, 1               
         %c = select i1 %cond, i32 %b, i32 %a            
         ret i32 %c
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK: %b = zext i1 %cond to i32
 ; CHECK: %c = or i32 %b, %a
 ; CHECK: ret i32 %c
@@ -114,7 +114,7 @@ define i32 @test12a(i1 %cond, i32 %a) {
         %b = ashr i32 %a, 1             
         %c = select i1 %cond, i32 %b, i32 %a            
         ret i32 %c
-; CHECK: @test12a
+; CHECK-LABEL: @test12a(
 ; CHECK: %b = zext i1 %cond to i32
 ; CHECK: %c = ashr i32 %a, %b
 ; CHECK: ret i32 %c
@@ -124,7 +124,7 @@ define i32 @test12b(i1 %cond, i32 %a) {
         %b = ashr i32 %a, 1             
         %c = select i1 %cond, i32 %a, i32 %b            
         ret i32 %c
-; CHECK: @test12b
+; CHECK-LABEL: @test12b(
 ; CHECK: zext i1 %cond to i32
 ; CHECK: %b = xor i32
 ; CHECK: %c = ashr i32 %a, %b
@@ -135,7 +135,7 @@ define i32 @test13(i32 %a, i32 %b) {
         %C = icmp eq i32 %a, %b         
         %V = select i1 %C, i32 %a, i32 %b               
         ret i32 %V
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK: ret i32 %b
 }
 
@@ -143,7 +143,7 @@ define i32 @test13a(i32 %a, i32 %b) {
         %C = icmp ne i32 %a, %b         
         %V = select i1 %C, i32 %a, i32 %b               
         ret i32 %V
-; CHECK: @test13a
+; CHECK-LABEL: @test13a(
 ; CHECK: ret i32 %a
 }
 
@@ -151,7 +151,7 @@ define i32 @test13b(i32 %a, i32 %b) {
         %C = icmp eq i32 %a, %b         
         %V = select i1 %C, i32 %b, i32 %a               
         ret i32 %V
-; CHECK: @test13b
+; CHECK-LABEL: @test13b(
 ; CHECK: ret i32 %a
 }
 
@@ -160,7 +160,7 @@ define i1 @test14a(i1 %C, i32 %X) {
         ; (X < 1) | !C
         %R = icmp slt i32 %V, 1         
         ret i1 %R
-; CHECK: @test14a
+; CHECK-LABEL: @test14a(
 ; CHECK: icmp slt i32 %X, 1
 ; CHECK: xor i1 %C, true
 ; CHECK: or i1
@@ -172,7 +172,7 @@ define i1 @test14b(i1 %C, i32 %X) {
         ; (X < 1) | C
         %R = icmp slt i32 %V, 1         
         ret i1 %R
-; CHECK: @test14b
+; CHECK-LABEL: @test14b(
 ; CHECK: icmp slt i32 %X, 1
 ; CHECK: or i1
 ; CHECK: ret i1 %R
@@ -184,7 +184,7 @@ define i32 @test15a(i32 %X) {
         %t2 = icmp eq i32 %t1, 0                
         %t3 = select i1 %t2, i32 0, i32 16              
         ret i32 %t3
-; CHECK: @test15a
+; CHECK-LABEL: @test15a(
 ; CHECK: %t1 = and i32 %X, 16
 ; CHECK: ret i32 %t1
 }
@@ -195,7 +195,7 @@ define i32 @test15b(i32 %X) {
         %t2 = icmp eq i32 %t1, 0                
         %t3 = select i1 %t2, i32 32, i32 0              
         ret i32 %t3
-; CHECK: @test15b
+; CHECK-LABEL: @test15b(
 ; CHECK: %t1 = and i32 %X, 32
 ; CHECK: xor i32 %t1, 32
 ; CHECK: ret i32
@@ -207,7 +207,7 @@ define i32 @test15c(i32 %X) {
         %t2 = icmp eq i32 %t1, 16               
         %t3 = select i1 %t2, i32 16, i32 0              
         ret i32 %t3
-; CHECK: @test15c
+; CHECK-LABEL: @test15c(
 ; CHECK: %t1 = and i32 %X, 16
 ; CHECK: ret i32 %t1
 }
@@ -218,7 +218,7 @@ define i32 @test15d(i32 %X) {
         %t2 = icmp ne i32 %t1, 0                
         %t3 = select i1 %t2, i32 16, i32 0              
         ret i32 %t3
-; CHECK: @test15d
+; CHECK-LABEL: @test15d(
 ; CHECK: %t1 = and i32 %X, 16
 ; CHECK: ret i32 %t1
 }
@@ -229,7 +229,7 @@ define i32 @test15e(i32 %X) {
         %t2 = icmp ne i32 %t1, 0
         %t3 = select i1 %t2, i32 256, i32 0
         ret i32 %t3
-; CHECK: @test15e
+; CHECK-LABEL: @test15e(
 ; CHECK: %t1 = shl i32 %X, 1
 ; CHECK: and i32 %t1, 256
 ; CHECK: ret i32
@@ -241,7 +241,7 @@ define i32 @test15f(i32 %X) {
         %t2 = icmp ne i32 %t1, 0
         %t3 = select i1 %t2, i32 0, i32 256
         ret i32 %t3
-; CHECK: @test15f
+; CHECK-LABEL: @test15f(
 ; CHECK: %t1 = shl i32 %X, 1
 ; CHECK: and i32 %t1, 256
 ; CHECK: xor i32 %{{.*}}, 256
@@ -254,7 +254,7 @@ define i32 @test15g(i32 %X) {
         %t2 = icmp ne i32 %t1, 0
         %t3 = select i1 %t2, i32 -1, i32 -9
         ret i32 %t3
-; CHECK: @test15g
+; CHECK-LABEL: @test15g(
 ; CHECK-NEXT: %1 = or i32 %X, -9
 ; CHECK-NEXT: ret i32 %1
 }
@@ -265,7 +265,7 @@ define i32 @test15h(i32 %X) {
         %t2 = icmp ne i32 %t1, 0
         %t3 = select i1 %t2, i32 -9, i32 -1
         ret i32 %t3
-; CHECK: @test15h
+; CHECK-LABEL: @test15h(
 ; CHECK-NEXT: %1 = or i32 %X, -9
 ; CHECK-NEXT: %2 = xor i32 %1, 8
 ; CHECK-NEXT: ret i32 %2
@@ -277,7 +277,7 @@ define i32 @test15i(i32 %X) {
         %t2 = icmp ne i32 %t1, 0
         %t3 = select i1 %t2, i32 577, i32 1089
         ret i32 %t3
-; CHECK: @test15i
+; CHECK-LABEL: @test15i(
 ; CHECK-NEXT: %t1 = shl i32 %X, 8
 ; CHECK-NEXT: %1 = and i32 %t1, 512
 ; CHECK-NEXT: %2 = xor i32 %1, 512
@@ -291,7 +291,7 @@ define i32 @test15j(i32 %X) {
         %t2 = icmp ne i32 %t1, 0
         %t3 = select i1 %t2, i32 1089, i32 577
         ret i32 %t3
-; CHECK: @test15j
+; CHECK-LABEL: @test15j(
 ; CHECK-NEXT: %t1 = shl i32 %X, 8
 ; CHECK-NEXT: %1 = and i32 %t1, 512
 ; CHECK-NEXT: %2 = add i32 %1, 577
@@ -302,7 +302,7 @@ define i32 @test16(i1 %C, i32* %P) {
         %P2 = select i1 %C, i32* %P, i32* null          
         %V = load i32* %P2              
         ret i32 %V
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK-NEXT: %V = load i32* %P
 ; CHECK: ret i32 %V
 }
@@ -311,7 +311,7 @@ define i1 @test17(i32* %X, i1 %C) {
         %R = select i1 %C, i32* %X, i32* null           
         %RV = icmp eq i32* %R, null             
         ret i1 %RV
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK: icmp eq i32* %X, null
 ; CHECK: xor i1 %C, true
 ; CHECK: %RV = or i1
@@ -322,7 +322,7 @@ define i32 @test18(i32 %X, i32 %Y, i1 %C) {
         %R = select i1 %C, i32 %X, i32 0                
         %V = sdiv i32 %Y, %R            
         ret i32 %V
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK: %V = sdiv i32 %Y, %X
 ; CHECK: ret i32 %V
 }
@@ -331,7 +331,7 @@ define i32 @test19(i32 %x) {
         %tmp = icmp ugt i32 %x, 2147483647              
         %retval = select i1 %tmp, i32 -1, i32 0         
         ret i32 %retval
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK-NEXT: ashr i32 %x, 31
 ; CHECK-NEXT: ret i32 
 }
@@ -340,7 +340,7 @@ define i32 @test20(i32 %x) {
         %tmp = icmp slt i32 %x, 0               
         %retval = select i1 %tmp, i32 -1, i32 0         
         ret i32 %retval
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK-NEXT: ashr i32 %x, 31
 ; CHECK-NEXT: ret i32 
 }
@@ -349,7 +349,7 @@ define i64 @test21(i32 %x) {
         %tmp = icmp slt i32 %x, 0               
         %retval = select i1 %tmp, i64 -1, i64 0         
         ret i64 %retval
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK-NEXT: ashr i32 %x, 31
 ; CHECK-NEXT: sext i32 
 ; CHECK-NEXT: ret i64
@@ -359,7 +359,7 @@ define i16 @test22(i32 %x) {
         %tmp = icmp slt i32 %x, 0               
         %retval = select i1 %tmp, i16 -1, i16 0         
         ret i16 %retval
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 ; CHECK-NEXT: ashr i32 %x, 31
 ; CHECK-NEXT: trunc i32 
 ; CHECK-NEXT: ret i16
@@ -368,7 +368,7 @@ define i16 @test22(i32 %x) {
 define i1 @test23(i1 %a, i1 %b) {
         %c = select i1 %a, i1 %b, i1 %a         
         ret i1 %c
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK-NEXT: %c = and i1 %a, %b
 ; CHECK-NEXT: ret i1 %c
 }
@@ -376,7 +376,7 @@ define i1 @test23(i1 %a, i1 %b) {
 define i1 @test24(i1 %a, i1 %b) {
         %c = select i1 %a, i1 %a, i1 %b         
         ret i1 %c
-; CHECK: @test24
+; CHECK-LABEL: @test24(
 ; CHECK-NEXT: %c = or i1 %a, %b
 ; CHECK-NEXT: ret i1 %c
 }
@@ -390,7 +390,7 @@ ret:
   %a = phi i1 [true, %jump], [false, %entry]
   %b = select i1 %a, i32 10, i32 20
   ret i32 %b
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 ; CHECK: %a = phi i32 [ 10, %jump ], [ 20, %entry ]
 ; CHECK-NEXT: ret i32 %a
 }
@@ -405,7 +405,7 @@ ret:
   %a = phi i1 [true, %jump], [%c, %entry]
   %b = select i1 %a, i32 10, i32 20
   ret i32 %b
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; CHECK: %a = phi i32 [ 10, %jump ], [ 20, %entry ]
 ; CHECK-NEXT: ret i32 %a
 }
@@ -419,7 +419,7 @@ ret:
   %a = phi i1 [true, %jump], [false, %entry]
   %b = select i1 %a, i32 %A, i32 %B
   ret i32 %b
-; CHECK: @test27
+; CHECK-LABEL: @test27(
 ; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ]
 ; CHECK-NEXT: ret i32 %a
 }
@@ -434,7 +434,7 @@ ret:
   %a = phi i1 [true, %jump], [false, %entry]
   %b = select i1 %a, i32 %A, i32 %c
   ret i32 %b
-; CHECK: @test28
+; CHECK-LABEL: @test28(
 ; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ]
 ; CHECK-NEXT: ret i32 %a
 }
@@ -452,7 +452,7 @@ ret:
 next:
   %b = select i1 %a, i32 %A, i32 %c
   ret i32 %b
-; CHECK: @test29
+; CHECK-LABEL: @test29(
 ; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ]
 ; CHECK: ret i32 %a
 }
@@ -466,7 +466,7 @@ define i32 @test30(i32 %x, i32 %y) {
   %cmp5 = icmp sgt i32 %cond, %x
   %retval = select i1 %cmp5, i32 %cond, i32 %x
   ret i32 %retval
-; CHECK: @test30
+; CHECK-LABEL: @test30(
 ; CHECK: ret i32 %cond
 }
 
@@ -477,7 +477,7 @@ define i32 @test31(i32 %x, i32 %y) {
   %cmp5 = icmp ugt i32 %cond, %x
   %retval = select i1 %cmp5, i32 %cond, i32 %x
   ret i32 %retval
-; CHECK: @test31
+; CHECK-LABEL: @test31(
 ; CHECK: ret i32 %cond
 }
 
@@ -488,7 +488,7 @@ define i32 @test32(i32 %x, i32 %y) {
   %cmp5 = icmp sgt i32 %cond, %x
   %retval = select i1 %cmp5, i32 %x, i32 %cond
   ret i32 %retval
-; CHECK: @test32
+; CHECK-LABEL: @test32(
 ; CHECK: ret i32 %cond
 }
 
@@ -499,7 +499,7 @@ define i32 @test33(i32 %x, i32 %y) {
   %cmp5 = icmp sgt i32 %cond, %x
   %retval = select i1 %cmp5, i32 %cond, i32 %x
   ret i32 %retval
-; CHECK: @test33
+; CHECK-LABEL: @test33(
 ; CHECK: ret i32 %x
 }
 
@@ -510,7 +510,7 @@ define i32 @test34(i32 %x, i32 %y) {
   %cmp5 = icmp sgt i32 %cond, %x
   %retval = select i1 %cmp5, i32 %x, i32 %cond
   ret i32 %retval
-; CHECK: @test34
+; CHECK-LABEL: @test34(
 ; CHECK: ret i32 %x
 }
 
@@ -518,7 +518,7 @@ define i32 @test35(i32 %x) {
   %cmp = icmp sge i32 %x, 0
   %cond = select i1 %cmp, i32 60, i32 100
   ret i32 %cond
-; CHECK: @test35
+; CHECK-LABEL: @test35(
 ; CHECK: ashr i32 %x, 31
 ; CHECK: and i32 {{.*}}, 40
 ; CHECK: add i32 {{.*}}, 60
@@ -529,7 +529,7 @@ define i32 @test36(i32 %x) {
   %cmp = icmp slt i32 %x, 0
   %cond = select i1 %cmp, i32 60, i32 100
   ret i32 %cond
-; CHECK: @test36
+; CHECK-LABEL: @test36(
 ; CHECK: ashr i32 %x, 31
 ; CHECK: and i32 {{.*}}, -40
 ; CHECK: add i32 {{.*}}, 100
@@ -540,7 +540,7 @@ define i32 @test37(i32 %x) {
   %cmp = icmp sgt i32 %x, -1
   %cond = select i1 %cmp, i32 1, i32 -1
   ret i32 %cond
-; CHECK: @test37
+; CHECK-LABEL: @test37(
 ; CHECK: ashr i32 %x, 31
 ; CHECK: or i32 {{.*}}, 1
 ; CHECK: ret
@@ -552,7 +552,7 @@ define i1 @test38(i1 %cond) {
   %ptr = select i1 %cond, i32* %zero, i32* %one
   %isnull = icmp eq i32* %ptr, null
   ret i1 %isnull
-; CHECK: @test38
+; CHECK-LABEL: @test38(
 ; CHECK: ret i1 false
 }
 
@@ -560,7 +560,7 @@ define i1 @test39(i1 %cond, double %x) {
   %s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
   %cmp = fcmp ule double %x, %s
   ret i1 %cmp
-; CHECK: @test39
+; CHECK-LABEL: @test39(
 ; CHECK: ret i1 true
 }
 
@@ -571,7 +571,7 @@ define i1 @test40(i1 %cond) {
   %s = select i1 %cond, i32* %a, i32* %b
   %r = icmp eq i32* %s, %c
   ret i1 %r
-; CHECK: @test40
+; CHECK-LABEL: @test40(
 ; CHECK: ret i1 false
 }
 
@@ -580,7 +580,7 @@ define i32 @test41(i1 %cond, i32 %x, i32 %y) {
   %s = select i1 %cond, i32 %y, i32 %z
   %r = and i32 %x, %s
   ret i32 %r
-; CHECK: @test41
+; CHECK-LABEL: @test41(
 ; CHECK-NEXT: and i32 %x, %y
 ; CHECK-NEXT: ret i32
 }
@@ -590,7 +590,7 @@ define i32 @test42(i32 %x, i32 %y) {
   %cond = icmp eq i32 %x, 0
   %c = select i1 %cond, i32 %b, i32 %y
   ret i32 %c
-; CHECK: @test42
+; CHECK-LABEL: @test42(
 ; CHECK-NEXT: %cond = icmp eq i32 %x, 0
 ; CHECK-NEXT: %b = sext i1 %cond to i32
 ; CHECK-NEXT: %c = add i32 %b, %y
@@ -602,7 +602,7 @@ define i64 @test43(i32 %a) nounwind {
 	%is_a_nonnegative = icmp sgt i32 %a, -1
 	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 0
 	ret i64 %max
-; CHECK: @test43
+; CHECK-LABEL: @test43(
 ; CHECK-NEXT: %a_ext = sext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonnegative = icmp slt i64 %a_ext, 0
 ; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 0, i64 %a_ext
@@ -614,7 +614,7 @@ define i64 @test44(i32 %a) nounwind {
 	%is_a_nonpositive = icmp slt i32 %a, 1
 	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 0
 	ret i64 %min
-; CHECK: @test44
+; CHECK-LABEL: @test44(
 ; CHECK-NEXT: %a_ext = sext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonpositive = icmp sgt i64 %a_ext, 0
 ; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 0, i64 %a_ext
@@ -625,7 +625,7 @@ define i64 @test45(i32 %a) nounwind {
 	%is_a_nonnegative = icmp ugt i32 %a, 2
 	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 3
 	ret i64 %max
-; CHECK: @test45
+; CHECK-LABEL: @test45(
 ; CHECK-NEXT: %a_ext = zext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonnegative = icmp ult i64 %a_ext, 3
 ; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 3, i64 %a_ext
@@ -637,7 +637,7 @@ define i64 @test46(i32 %a) nounwind {
 	%is_a_nonpositive = icmp ult i32 %a, 3
 	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
 	ret i64 %min
-; CHECK: @test46
+; CHECK-LABEL: @test46(
 ; CHECK-NEXT: %a_ext = zext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
 ; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
@@ -648,7 +648,7 @@ define i64 @test47(i32 %a) nounwind {
 	%is_a_nonnegative = icmp ugt i32 %a, 2
 	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 3
 	ret i64 %max
-; CHECK: @test47
+; CHECK-LABEL: @test47(
 ; CHECK-NEXT: %a_ext = sext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonnegative = icmp ult i64 %a_ext, 3
 ; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 3, i64 %a_ext
@@ -660,7 +660,7 @@ define i64 @test48(i32 %a) nounwind {
 	%is_a_nonpositive = icmp ult i32 %a, 3
 	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
 	ret i64 %min
-; CHECK: @test48
+; CHECK-LABEL: @test48(
 ; CHECK-NEXT: %a_ext = sext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
 ; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
@@ -672,7 +672,7 @@ define i64 @test49(i32 %a) nounwind {
 	%is_a_nonpositive = icmp ult i32 %a, 3
 	%min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
 	ret i64 %min
-; CHECK: @test49
+; CHECK-LABEL: @test49(
 ; CHECK-NEXT: %a_ext = sext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
 ; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
@@ -683,7 +683,7 @@ define i64 @test50(i32 %a) nounwind {
 	%a_ext = sext i32 %a to i64
 	%min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
 	ret i64 %min
-; CHECK: @test50
+; CHECK-LABEL: @test50(
 ; CHECK-NEXT: %a_ext = sext i32 %a to i64
 ; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
 ; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
@@ -695,7 +695,7 @@ define i64 @test50(i32 %a) nounwind {
 ; This select instruction can't be eliminated because trying to do so would
 ; change the number of vector elements. This used to assert.
 define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
-; CHECK: @test51
+; CHECK-LABEL: @test51(
   %select = select <3 x i1> %icmp, <3 x i16> zeroinitializer, <3 x i16> %tmp
 ; CHECK: select <3 x i1>
   %tmp2 = bitcast <3 x i16> %select to i48
@@ -705,7 +705,7 @@ define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
 ; PR8575
 
 define i32 @test52(i32 %n, i32 %m) nounwind {
-; CHECK: @test52
+; CHECK-LABEL: @test52(
   %cmp = icmp sgt i32 %n, %m
   %. = select i1 %cmp, i32 1, i32 3
   %add = add nsw i32 %., 3
@@ -720,7 +720,7 @@ define i32 @test53(i32 %x) nounwind {
   %cmp = icmp eq i32 %and, %x
   %sel = select i1 %cmp, i32 2, i32 1
   ret i32 %sel
-; CHECK: @test53
+; CHECK-LABEL: @test53(
 ; CHECK: select i1 %cmp
 ; CHECK: ret
 }
@@ -730,7 +730,7 @@ define i32 @test54(i32 %X, i32 %Y) {
   %B = icmp eq i32 %A, 0
   %C = select i1 %B, i32 %A, i32 1
   ret i32 %C
-; CHECK: @test54
+; CHECK-LABEL: @test54(
 ; CHECK-NOT: ashr
 ; CHECK-NOT: select
 ; CHECK: icmp ne i32 %X, 0
@@ -743,7 +743,7 @@ define i1 @test55(i1 %X, i32 %Y, i32 %Z) {
   %B = select i1 %X, i32 %Y, i32 %A
   %C = icmp eq i32 %B, 0
   ret i1 %C
-; CHECK: @test55
+; CHECK-LABEL: @test55(
 ; CHECK-NOT: ashr
 ; CHECK-NOT: select
 ; CHECK: icmp eq
@@ -755,7 +755,7 @@ define i32 @test56(i16 %x) nounwind {
   %conv = zext i16 %x to i32
   %cond = select i1 %tobool, i32 0, i32 %conv
   ret i32 %cond
-; CHECK: @test56
+; CHECK-LABEL: @test56(
 ; CHECK-NEXT: zext
 ; CHECK-NEXT: ret
 }
@@ -765,7 +765,7 @@ define i32 @test57(i32 %x, i32 %y) nounwind {
   %tobool = icmp eq i32 %x, 0
   %.and = select i1 %tobool, i32 0, i32 %and
   ret i32 %.and
-; CHECK: @test57
+; CHECK-LABEL: @test57(
 ; CHECK-NEXT: and i32 %x, %y
 ; CHECK-NEXT: ret
 }
@@ -775,7 +775,7 @@ define i32 @test58(i16 %x) nounwind {
   %conv = zext i16 %x to i32
   %cond = select i1 %tobool, i32 %conv, i32 1
   ret i32 %cond
-; CHECK: @test58
+; CHECK-LABEL: @test58(
 ; CHECK-NEXT: zext
 ; CHECK-NEXT: ret
 }
@@ -785,7 +785,7 @@ define i32 @test59(i32 %x, i32 %y) nounwind {
   %tobool = icmp ne i32 %x, %y
   %.and = select i1 %tobool, i32 %and, i32 %y
   ret i32 %.and
-; CHECK: @test59
+; CHECK-LABEL: @test59(
 ; CHECK-NEXT: and i32 %x, %y
 ; CHECK-NEXT: ret
 }
@@ -796,7 +796,7 @@ define i1 @test60(i32 %x, i1* %y) nounwind {
   %cmp1 = icmp slt i32 %x, 1
   %sel = select i1 %cmp, i1 %load, i1 %cmp1
   ret i1 %sel
-; CHECK: @test60
+; CHECK-LABEL: @test60(
 ; CHECK: select
 }
 
@@ -806,7 +806,7 @@ define i32 @test61(i32* %ptr) {
   %B = icmp eq i32* %ptr, @glbl
   %C = select i1 %B, i32 %A, i32 10
   ret i32 %C
-; CHECK: @test61
+; CHECK-LABEL: @test61(
 ; CHECK: ret i32 10
 }
 
@@ -814,7 +814,7 @@ define i1 @test62(i1 %A, i1 %B) {
         %not = xor i1 %A, true
         %C = select i1 %A, i1 %not, i1 %B             
         ret i1 %C
-; CHECK: @test62
+; CHECK-LABEL: @test62(
 ; CHECK: %not = xor i1 %A, true
 ; CHECK: %C = and i1 %not, %B
 ; CHECK: ret i1 %C
@@ -824,7 +824,7 @@ define i1 @test63(i1 %A, i1 %B) {
         %not = xor i1 %A, true
         %C = select i1 %A, i1 %B, i1 %not         
         ret i1 %C
-; CHECK: @test63
+; CHECK-LABEL: @test63(
 ; CHECK: %not = xor i1 %A, true
 ; CHECK: %C = or i1 %B, %not
 ; CHECK: ret i1 %C
@@ -860,11 +860,11 @@ cond.end17:
 
 while.body:
   br label %while.body
-; CHECK: @test64
+; CHECK-LABEL: @test64(
 ; CHECK-NOT: select
 }
 
-; CHECK: @select_icmp_eq_and_1_0_or_2
+; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2(
 ; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 %x, 1
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], 2
 ; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[AND]], %y
@@ -877,7 +877,7 @@ define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) {
   ret i32 %select
 }
 
-; CHECK: @select_icmp_eq_and_32_0_or_8
+; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8(
 ; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 2
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 8
 ; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[AND]], %y
@@ -890,7 +890,7 @@ define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) {
   ret i32 %select
 }
 
-; CHECK: @select_icmp_ne_0_and_4096_or_4096
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096(
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 4096
 ; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i32 [[AND]], 4096
 ; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
@@ -903,7 +903,7 @@ define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) {
   ret i32 %select
 }
 
-; CHECK: @select_icmp_eq_and_4096_0_or_4096
+; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096(
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 4096
 ; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[AND]], %y
 ; CHECK-NEXT: ret i32 [[OR]]
@@ -915,7 +915,7 @@ define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) {
   ret i32 %select
 }
 
-; CHECK: @select_icmp_eq_0_and_1_or_1
+; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1(
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i64 %x, 1
 ; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = trunc i64 [[AND]] to i32
 ; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
@@ -928,7 +928,7 @@ define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) {
   ret i32 %select
 }
 
-; CHECK: @select_icmp_ne_0_and_4096_or_32
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_32(
 ; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 7
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 32
 ; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i32 [[AND]], 32
@@ -942,7 +942,7 @@ define i32 @select_icmp_ne_0_and_4096_or_32(i32 %x, i32 %y) {
   ret i32 %select
 }
 
-; CHECK: @select_icmp_ne_0_and_32_or_4096
+; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096(
 ; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 %x, 7
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], 4096
 ; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i32 [[AND]], 4096
@@ -956,7 +956,7 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
   ret i32 %select
 }
 
-; CHECK: @select_icmp_ne_0_and_1073741824_or_8
+; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
 ; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 27
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 8
 ; CHECK-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i32 [[AND]] to i8
@@ -971,7 +971,7 @@ define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
   ret i8 %select
 }
 
-; CHECK: @select_icmp_ne_0_and_8_or_1073741824
+; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824(
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i8 %x, 8
 ; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i8 [[AND]] to i32
 ; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl nuw nsw i32 [[ZEXT]], 27
@@ -985,3 +985,49 @@ define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
   %select = select i1 %cmp, i32 %y, i32 %or
   ret i32 %select
 }
+
+; We can't combine here, because the cmp is scalar and the or vector.
+; Just make sure we don't assert.
+define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select i1 %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @test65(i64 %x) {
+  %1 = and i64 %x, 16
+  %2 = icmp ne i64 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+
+; CHECK-LABEL: @test65(
+; CHECK: and i64 %x, 16
+; CHECK: trunc i64 %1 to i32
+; CHECK: lshr exact i32 %2, 3
+; CHECK: xor i32 %3, 42
+}
+
+define i32 @test66(i64 %x) {
+  %1 = and i64 %x, 4294967296
+  %2 = icmp ne i64 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+
+; CHECK-LABEL: @test66(
+; CHECK: select
+}
+
+define i32 @test67(i16 %x) {
+  %1 = and i16 %x, 4
+  %2 = icmp ne i16 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+
+; CHECK-LABEL: @test67(
+; CHECK: and i16 %x, 4
+; CHECK: zext i16 %1 to i32
+; CHECK: lshr exact i32 %2, 1
+; CHECK: xor i32 %3, 42
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index 968f37c9c129..b8dfe2257b18 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -11,7 +11,7 @@ define i64 @test1(i32 %x) {
   %s = sext i32 %t to i64
   ret i64 %s
   
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: zext i32 %t
 }
 
@@ -20,7 +20,7 @@ define i64 @test2(i32 %x) {
   %s = sext i32 %t to i64
   ret i64 %s
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: zext i32 %t
 }
 
@@ -29,7 +29,7 @@ define i64 @test3(i32 %x) {
   %s = sext i32 %t to i64
   ret i64 %s
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: zext i32 %t
 }
 
@@ -38,7 +38,7 @@ define i64 @test4(i32 %x) {
   %s = sext i32 %t to i64
   ret i64 %s
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: zext i32 %t
 }
 
@@ -46,7 +46,7 @@ define i64 @test5(i32 %x) {
   %t = urem i32 %x, 30000
   %s = sext i32 %t to i64
   ret i64 %s
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: zext i32 %t
 }
 
@@ -55,7 +55,7 @@ define i64 @test6(i32 %x) {
   %t = mul i32 %u, 3
   %s = sext i32 %t to i64
   ret i64 %s
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: zext i32 %t
 }
 
@@ -64,7 +64,7 @@ define i64 @test7(i32 %x) {
   %u = sub i32 20000, %t
   %s = sext i32 %u to i64
   ret i64 %s
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: zext i32 %u to i64
 }
 
@@ -74,7 +74,7 @@ define i32 @test8(i8 %a, i32 %f, i1 %p, i32* %z) {
   %s = trunc i32 %e to i16
   %n = sext i16 %s to i32
   ret i32 %n
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: %d = lshr i32 %f, 24
 ; CHECK: %n = select i1 %p, i32 %d, i32 0
 ; CHECK: ret i32 %n
@@ -92,7 +92,7 @@ F:
 	%V = phi i32 [%t2, %T], [42, %entry]
 	%W = trunc i32 %V to i16
 	ret i16 %W
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: T:
 ; CHECK-NEXT: br label %F
 ; CHECK: F:
@@ -108,7 +108,7 @@ entry:
         %a = ashr i8 %tmp16, 6 
         %b = sext i8 %a to i32 
         ret i32 %b
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK:  shl i32 %i, 30
 ; CHECK-NEXT: ashr exact i32
 ; CHECK-NEXT: ret i32
@@ -120,7 +120,7 @@ define void @test11(<2 x i16> %srcA, <2 x i16> %srcB, <2 x i16>* %dst) {
   %tmask = ashr <2 x i16> %sext, <i16 15, i16 15> 
   store <2 x i16> %tmask, <2 x i16>* %dst
   ret void                                                                                                                      
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NEXT: icmp eq
 ; CHECK-NEXT: sext <2 x i1>
 ; CHECK-NEXT: store <2 x i16>
@@ -132,7 +132,7 @@ define i64 @test12(i32 %x) nounwind {
   %sub = sub nsw i32 0, %shr
   %conv = sext i32 %sub to i64
   ret i64 %conv
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK: sext
 ; CHECK: ret
 }
@@ -142,7 +142,7 @@ define i32 @test13(i32 %x) nounwind {
   %cmp = icmp eq i32 %and, 0
   %ext = sext i1 %cmp to i32
   ret i32 %ext
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK-NEXT: %and = lshr i32 %x, 3
 ; CHECK-NEXT: %1 = and i32 %and, 1
 ; CHECK-NEXT: %sext = add i32 %1, -1
@@ -154,7 +154,7 @@ define i32 @test14(i16 %x) nounwind {
   %cmp = icmp ne i16 %and, 16
   %ext = sext i1 %cmp to i32
   ret i32 %ext
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NEXT: %and = lshr i16 %x, 4
 ; CHECK-NEXT: %1 = and i16 %and, 1
 ; CHECK-NEXT: %sext = add i16 %1, -1
@@ -167,7 +167,7 @@ define i32 @test15(i32 %x) nounwind {
   %cmp = icmp ne i32 %and, 0
   %ext = sext i1 %cmp to i32
   ret i32 %ext
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NEXT: %1 = shl i32 %x, 27
 ; CHECK-NEXT: %sext = ashr i32 %1, 31
 ; CHECK-NEXT: ret i32 %sext
@@ -178,7 +178,7 @@ define i32 @test16(i16 %x) nounwind {
   %cmp = icmp eq i16 %and, 8
   %ext = sext i1 %cmp to i32
   ret i32 %ext
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK-NEXT: %1 = shl i16 %x, 12
 ; CHECK-NEXT: %sext = ashr i16 %1, 15
 ; CHECK-NEXT: %ext = sext i16 %sext to i32
@@ -189,7 +189,7 @@ define i32 @test17(i1 %x) nounwind {
   %c1 = sext i1 %x to i32
   %c2 = sub i32 0, %c1
   ret i32 %c2
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK-NEXT: [[TEST17:%.*]] = zext i1 %x to i32
 ; CHECK-NEXT: ret i32 [[TEST17]]
 }
diff --git a/test/Transforms/InstCombine/shift-sra.ll b/test/Transforms/InstCombine/shift-sra.ll
index a578bbe4d4fc..75235500d513 100644
--- a/test/Transforms/InstCombine/shift-sra.ll
+++ b/test/Transforms/InstCombine/shift-sra.ll
@@ -7,7 +7,7 @@ define i32 @test1(i32 %X, i8 %A) {
         %Y = ashr i32 %X, %shift.upgrd.1                ; <i32> [#uses=1]
         %Z = and i32 %Y, 1              ; <i32> [#uses=1]
         ret i32 %Z
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: lshr i32 %X, %shift.upgrd.1 
 }
 
@@ -16,7 +16,7 @@ define i32 @test2(i8 %tmp) {
         %tmp4 = add i32 %tmp3, 7                ; <i32> [#uses=1]
         %tmp5 = ashr i32 %tmp4, 3               ; <i32> [#uses=1]
         ret i32 %tmp5
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: lshr i32 %tmp4, 3
 }
 
@@ -33,7 +33,7 @@ C:
   %S = ashr i64 %P, 12
   ret i64 %S
   
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %P = phi i64
 ; CHECK-NEXT: ret i64 %P
 }
@@ -52,7 +52,7 @@ C:
   %S = ashr i64 %R, 12
   ret i64 %S
   
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: %P = phi i64
 ; CHECK-NEXT: ret i64 %P
 }
@@ -70,7 +70,7 @@ D:
   %P = phi i32 [0, %A], [0, %B], [%Y, %C] 
   %S = ashr i32 %P, 16
   ret i32 %S
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %P = phi i32
 ; CHECK-NEXT: ashr i32 %P, 16
 E:
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 41f8aa9ee812..b1082f06ef74 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -3,14 +3,14 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 %A
         %B = shl i32 %A, 0              ; <i32> [#uses=1]
         ret i32 %B
 }
 
 define i32 @test2(i8 %A) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 0
         %shift.upgrd.1 = zext i8 %A to i32              ; <i32> [#uses=1]
         %B = shl i32 0, %shift.upgrd.1          ; <i32> [#uses=1]
@@ -18,14 +18,14 @@ define i32 @test2(i8 %A) {
 }
 
 define i32 @test3(i32 %A) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 %A
         %B = ashr i32 %A, 0             ; <i32> [#uses=1]
         ret i32 %B
 }
 
 define i32 @test4(i8 %A) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret i32 0
         %shift.upgrd.2 = zext i8 %A to i32              ; <i32> [#uses=1]
         %B = ashr i32 0, %shift.upgrd.2         ; <i32> [#uses=1]
@@ -34,35 +34,35 @@ define i32 @test4(i8 %A) {
 
 
 define i32 @test5(i32 %A) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: ret i32 undef
         %B = lshr i32 %A, 32  ;; shift all bits out 
         ret i32 %B
 }
 
 define i32 @test5a(i32 %A) {
-; CHECK: @test5a
+; CHECK-LABEL: @test5a(
 ; CHECK: ret i32 undef
         %B = shl i32 %A, 32     ;; shift all bits out 
         ret i32 %B
 }
 
 define i32 @test5b() {
-; CHECK: @test5b
+; CHECK-LABEL: @test5b(
 ; CHECK: ret i32 -1
         %B = ashr i32 undef, 2  ;; top two bits must be equal, so not undef
         ret i32 %B
 }
 
 define i32 @test5b2(i32 %A) {
-; CHECK: @test5b2
+; CHECK-LABEL: @test5b2(
 ; CHECK: ret i32 -1
         %B = ashr i32 undef, %A  ;; top %A bits must be equal, so not undef
         ret i32 %B
 }
 
 define i32 @test6(i32 %A) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: mul i32 %A, 6
 ; CHECK-NEXT: ret i32
         %B = shl i32 %A, 1      ;; convert to an mul instruction
@@ -71,7 +71,7 @@ define i32 @test6(i32 %A) {
 }
 
 define i32 @test6a(i32 %A) {
-; CHECK: @test6a
+; CHECK-LABEL: @test6a(
 ; CHECK-NEXT: mul i32 %A, 6
 ; CHECK-NEXT: ret i32
         %B = mul i32 %A, 3
@@ -80,7 +80,7 @@ define i32 @test6a(i32 %A) {
 }
 
 define i32 @test7(i8 %A) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: ret i32 -1
         %shift.upgrd.3 = zext i8 %A to i32 
         %B = ashr i32 -1, %shift.upgrd.3  ;; Always equal to -1
@@ -89,7 +89,7 @@ define i32 @test7(i8 %A) {
 
 ;; (A << 5) << 3 === A << 8 == 0
 define i8 @test8(i8 %A) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: ret i8 0
         %B = shl i8 %A, 5               ; <i8> [#uses=1]
         %C = shl i8 %B, 3               ; <i8> [#uses=1]
@@ -98,7 +98,7 @@ define i8 @test8(i8 %A) {
 
 ;; (A << 7) >> 7 === A & 1
 define i8 @test9(i8 %A) {
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: and i8 %A, 1
 ; CHECK-NEXT: ret i8
         %B = shl i8 %A, 7               ; <i8> [#uses=1]
@@ -110,7 +110,7 @@ define i8 @test9(i8 %A) {
 ;; (A >> 7) << 7 === A & 128
 ;; The shl may be valuable to scalar evolution.
 define i8 @test10(i8 %A) {
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK-NEXT: and i8 %A, -128
 ; CHECK-NEXT: ret i8
         %B = lshr i8 %A, 7              ; <i8> [#uses=1]
@@ -120,7 +120,7 @@ define i8 @test10(i8 %A) {
 
 ;; Allow the simplification when the lshr shift is exact.
 define i8 @test10a(i8 %A) {
-; CHECK: @test10a
+; CHECK-LABEL: @test10a(
 ; CHECK-NEXT: ret i8 %A
         %B = lshr exact i8 %A, 7
         %C = shl i8 %B, 7
@@ -131,7 +131,7 @@ define i8 @test10a(i8 %A) {
 ;; (A >> 3) << 4 === (A & 0x1F) << 1
 ;; The shl may be valuable to scalar evolution.
 define i8 @test11(i8 %A) {
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK: shl i8
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
@@ -142,7 +142,7 @@ define i8 @test11(i8 %A) {
 
 ;; Allow the simplification in InstCombine when the lshr shift is exact.
 define i8 @test11a(i8 %A) {
-; CHECK: @test11a
+; CHECK-LABEL: @test11a(
 ; CHECK-NEXT: mul i8 %A, 6
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3
@@ -154,7 +154,7 @@ define i8 @test11a(i8 %A) {
 ;; This is deferred to DAGCombine unless %B is single-use.
 ;; (A >> 8) << 8 === A & -256
 define i32 @test12(i32 %A) {
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NEXT: and i32 %A, -256
 ; CHECK-NEXT: ret i32
         %B = ashr i32 %A, 8             ; <i32> [#uses=1]
@@ -166,7 +166,7 @@ define i32 @test12(i32 %A) {
 ;; (A >> 3) << 4 === (A & -8) * 2
 ;; The shl may be valuable to scalar evolution.
 define i8 @test13(i8 %A) {
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK: shl i8
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3               ; <i8> [#uses=1]
@@ -176,7 +176,7 @@ define i8 @test13(i8 %A) {
 }
 
 define i8 @test13a(i8 %A) {
-; CHECK: @test13a
+; CHECK-LABEL: @test13a(
 ; CHECK-NEXT: mul i8 %A, 6
 ; CHECK-NEXT: ret i8
         %a = mul i8 %A, 3
@@ -187,7 +187,7 @@ define i8 @test13a(i8 %A) {
 
 ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
 define i32 @test14(i32 %A) {
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NEXT: %B = and i32 %A, -19760
 ; CHECK-NEXT: or i32 %B, 19744
 ; CHECK-NEXT: ret i32
@@ -199,7 +199,7 @@ define i32 @test14(i32 %A) {
 
 ;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
 define i32 @test14a(i32 %A) {
-; CHECK: @test14a
+; CHECK-LABEL: @test14a(
 ; CHECK-NEXT: and i32 %A, 77
 ; CHECK-NEXT: ret i32
         %B = shl i32 %A, 4              ; <i32> [#uses=1]
@@ -209,7 +209,7 @@ define i32 @test14a(i32 %A) {
 }
 
 define i32 @test15(i1 %C) {
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NEXT: select i1 %C, i32 12, i32 4
 ; CHECK-NEXT: ret i32
         %A = select i1 %C, i32 3, i32 1         ; <i32> [#uses=1]
@@ -218,7 +218,7 @@ define i32 @test15(i1 %C) {
 }
 
 define i32 @test15a(i1 %C) {
-; CHECK: @test15a
+; CHECK-LABEL: @test15a(
 ; CHECK-NEXT: select i1 %C, i32 512, i32 128
 ; CHECK-NEXT: ret i32
         %A = select i1 %C, i8 3, i8 1           ; <i8> [#uses=1]
@@ -228,7 +228,7 @@ define i32 @test15a(i1 %C) {
 }
 
 define i1 @test16(i32 %X) {
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK-NEXT: and i32 %X, 16
 ; CHECK-NEXT: icmp ne i32
 ; CHECK-NEXT: ret i1
@@ -239,7 +239,7 @@ define i1 @test16(i32 %X) {
 }
 
 define i1 @test17(i32 %A) {
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK-NEXT: and i32 %A, -8
 ; CHECK-NEXT: icmp eq i32
 ; CHECK-NEXT: ret i1
@@ -250,7 +250,7 @@ define i1 @test17(i32 %A) {
 
 
 define i1 @test18(i8 %A) {
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK: ret i1 false
 
         %B = lshr i8 %A, 7              ; <i8> [#uses=1]
@@ -260,7 +260,7 @@ define i1 @test18(i8 %A) {
 }
 
 define i1 @test19(i32 %A) {
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK-NEXT: icmp ult i32 %A, 4
 ; CHECK-NEXT: ret i1
         %B = ashr i32 %A, 2             ; <i32> [#uses=1]
@@ -271,18 +271,17 @@ define i1 @test19(i32 %A) {
 
 
 define i1 @test19a(i32 %A) {
-; CHECK: @test19a
-; CHECK-NEXT: and i32 %A, -4
-; CHECK-NEXT: icmp eq i32
+; CHECK-LABEL: @test19a(
+; CHECK-NEXT: icmp ugt i32 %A, -5
 ; CHECK-NEXT: ret i1
         %B = ashr i32 %A, 2             ; <i32> [#uses=1]
-        ;; (X & -4) == -4
+        ;; X >u ~4
         %C = icmp eq i32 %B, -1         ; <i1> [#uses=1]
         ret i1 %C
 }
 
 define i1 @test20(i8 %A) {
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK: ret i1 false
         %B = ashr i8 %A, 7              ; <i8> [#uses=1]
         ;; false
@@ -291,7 +290,7 @@ define i1 @test20(i8 %A) {
 }
 
 define i1 @test21(i8 %A) {
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK-NEXT: and i8 %A, 15
 ; CHECK-NEXT: icmp eq i8
 ; CHECK-NEXT: ret i1
@@ -301,7 +300,7 @@ define i1 @test21(i8 %A) {
 }
 
 define i1 @test22(i8 %A) {
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 ; CHECK-NEXT: and i8 %A, 15
 ; CHECK-NEXT: icmp eq i8
 ; CHECK-NEXT: ret i1
@@ -311,7 +310,7 @@ define i1 @test22(i8 %A) {
 }
 
 define i8 @test23(i32 %A) {
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK-NEXT: trunc i32 %A to i8
 ; CHECK-NEXT: ret i8
 
@@ -323,7 +322,7 @@ define i8 @test23(i32 %A) {
 }
 
 define i8 @test24(i8 %X) {
-; CHECK: @test24
+; CHECK-LABEL: @test24(
 ; CHECK-NEXT: and i8 %X, 3
 ; CHECK-NEXT: ret i8
         %Y = and i8 %X, -5              ; <i8> [#uses=1]
@@ -333,7 +332,7 @@ define i8 @test24(i8 %X) {
 }
 
 define i32 @test25(i32 %tmp.2, i32 %AA) {
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 ; CHECK-NEXT: and i32 %tmp.2, -131072
 ; CHECK-NEXT: add i32 %{{[^,]*}}, %AA
 ; CHECK-NEXT: and i32 %{{[^,]*}}, -131072
@@ -347,7 +346,7 @@ define i32 @test25(i32 %tmp.2, i32 %AA) {
 
 ;; handle casts between shifts.
 define i32 @test26(i32 %A) {
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; CHECK-NEXT: and i32 %A, -2
 ; CHECK-NEXT: ret i32
         %B = lshr i32 %A, 1             ; <i32> [#uses=1]
@@ -358,7 +357,7 @@ define i32 @test26(i32 %A) {
 
 
 define i1 @test27(i32 %x) nounwind {
-; CHECK: @test27
+; CHECK-LABEL: @test27(
 ; CHECK-NEXT: and i32 %x, 8
 ; CHECK-NEXT: icmp ne i32
 ; CHECK-NEXT: ret i1
@@ -369,7 +368,7 @@ define i1 @test27(i32 %x) nounwind {
  
 define i8 @test28(i8 %x) {
 entry:
-; CHECK: @test28
+; CHECK-LABEL: @test28(
 ; CHECK:     icmp slt i8 %x, 0
 ; CHECK-NEXT:     br i1 
 	%tmp1 = lshr i8 %x, 7
@@ -386,7 +385,7 @@ bb2:
 define i8 @test28a(i8 %x, i8 %y) {
 entry:
 ; This shouldn't be transformed.
-; CHECK: @test28a
+; CHECK-LABEL: @test28a(
 ; CHECK:     %tmp1 = lshr i8 %x, 7
 ; CHECK:     %cond1 = icmp eq i8 %tmp1, 0
 ; CHECK:     br i1 %cond1, label %bb2, label %bb1
@@ -407,7 +406,7 @@ entry:
 	%tmp917 = trunc i64 %tmp916 to i32
 	%tmp10 = lshr i32 %tmp917, 31
 	ret i32 %tmp10
-; CHECK: @test29
+; CHECK-LABEL: @test29(
 ; CHECK:  %tmp916 = lshr i64 %d18, 63
 ; CHECK:  %tmp10 = trunc i64 %tmp916 to i32
 }
@@ -418,7 +417,7 @@ define i32 @test30(i32 %A, i32 %B, i32 %C) {
 	%Y = shl i32 %B, %C
 	%Z = and i32 %X, %Y
 	ret i32 %Z
-; CHECK: @test30
+; CHECK-LABEL: @test30(
 ; CHECK: %X1 = and i32 %A, %B
 ; CHECK: %Z = shl i32 %X1, %C
 }
@@ -428,7 +427,7 @@ define i32 @test31(i32 %A, i32 %B, i32 %C) {
 	%Y = lshr i32 %B, %C
 	%Z = or i32 %X, %Y
 	ret i32 %Z
-; CHECK: @test31
+; CHECK-LABEL: @test31(
 ; CHECK: %X1 = or i32 %A, %B
 ; CHECK: %Z = lshr i32 %X1, %C
 }
@@ -438,7 +437,7 @@ define i32 @test32(i32 %A, i32 %B, i32 %C) {
 	%Y = ashr i32 %B, %C
 	%Z = xor i32 %X, %Y
 	ret i32 %Z
-; CHECK: @test32
+; CHECK-LABEL: @test32(
 ; CHECK: %X1 = xor i32 %A, %B
 ; CHECK: %Z = ashr i32 %X1, %C
 ; CHECK: ret i32 %Z
@@ -448,7 +447,7 @@ define i1 @test33(i32 %X) {
         %tmp1 = shl i32 %X, 7
         %tmp2 = icmp slt i32 %tmp1, 0
         ret i1 %tmp2
-; CHECK: @test33
+; CHECK-LABEL: @test33(
 ; CHECK: %tmp1.mask = and i32 %X, 16777216
 ; CHECK: %tmp2 = icmp ne i32 %tmp1.mask, 0
 }
@@ -457,7 +456,7 @@ define i1 @test34(i32 %X) {
         %tmp1 = lshr i32 %X, 7
         %tmp2 = icmp slt i32 %tmp1, 0
         ret i1 %tmp2
-; CHECK: @test34
+; CHECK-LABEL: @test34(
 ; CHECK: ret i1 false
 }
 
@@ -465,7 +464,7 @@ define i1 @test35(i32 %X) {
         %tmp1 = ashr i32 %X, 7
         %tmp2 = icmp slt i32 %tmp1, 0
         ret i1 %tmp2
-; CHECK: @test35
+; CHECK-LABEL: @test35(
 ; CHECK: %tmp2 = icmp slt i32 %X, 0
 ; CHECK: ret i1 %tmp2
 }
@@ -478,7 +477,7 @@ entry:
   %tmp45 = lshr i128 %ins, 64
   ret i128 %tmp45
   
-; CHECK: @test36
+; CHECK-LABEL: @test36(
 ; CHECK:  %tmp231 = or i128 %B, %A
 ; CHECK:  %ins = and i128 %tmp231, 18446744073709551615
 ; CHECK:  ret i128 %ins
@@ -494,7 +493,7 @@ entry:
   %tmp46 = trunc i128 %tmp45 to i64
   ret i64 %tmp46
   
-; CHECK: @test37
+; CHECK-LABEL: @test37(
 ; CHECK:  %tmp23 = shl nuw nsw i128 %tmp22, 32
 ; CHECK:  %ins = or i128 %tmp23, %A
 ; CHECK:  %tmp46 = trunc i128 %ins to i64
@@ -504,14 +503,14 @@ define i32 @test38(i32 %x) nounwind readnone {
   %rem = srem i32 %x, 32
   %shl = shl i32 1, %rem
   ret i32 %shl
-; CHECK: @test38
+; CHECK-LABEL: @test38(
 ; CHECK-NEXT: and i32 %x, 31
 ; CHECK-NEXT: shl i32 1
 ; CHECK-NEXT: ret i32
 }
 
 ; <rdar://problem/8756731>
-; CHECK: @test39
+; CHECK-LABEL: @test39(
 define i8 @test39(i32 %a0) {
 entry:
   %tmp4 = trunc i32 %a0 to i8
@@ -537,7 +536,7 @@ define i32 @test40(i32 %a, i32 %b) nounwind {
   %shl2 = shl i32 %shl1, 2
   %div = udiv i32 %a, %shl2
   ret i32 %div
-; CHECK: @test40
+; CHECK-LABEL: @test40(
 ; CHECK-NEXT: add i32 %b, 2
 ; CHECK-NEXT: lshr i32 %a
 ; CHECK-NEXT: ret i32
@@ -547,7 +546,7 @@ define i32 @test41(i32 %a, i32 %b) nounwind {
   %1 = shl i32 1, %b
   %2 = shl i32 %1, 3
   ret i32 %2
-; CHECK: @test41
+; CHECK-LABEL: @test41(
 ; CHECK-NEXT: shl i32 8, %b
 ; CHECK-NEXT: ret i32
 }
@@ -556,7 +555,7 @@ define i32 @test42(i32 %a, i32 %b) nounwind {
   %div = lshr i32 4096, %b    ; must be exact otherwise we'd divide by zero
   %div2 = udiv i32 %a, %div
   ret i32 %div2
-; CHECK: @test42
+; CHECK-LABEL: @test42(
 ; CHECK-NEXT: lshr exact i32 4096, %b
 }
 
@@ -564,7 +563,7 @@ define i32 @test43(i32 %a, i32 %b) nounwind {
   %div = shl i32 4096, %b    ; must be exact otherwise we'd divide by zero
   %div2 = udiv i32 %a, %div
   ret i32 %div2
-; CHECK: @test43
+; CHECK-LABEL: @test43(
 ; CHECK-NEXT: add i32 %b, 12
 ; CHECK-NEXT: lshr
 ; CHECK-NEXT: ret
@@ -574,7 +573,7 @@ define i32 @test44(i32 %a) nounwind {
   %y = shl nuw i32 %a, 1
   %z = shl i32 %y, 4
   ret i32 %z
-; CHECK: @test44
+; CHECK-LABEL: @test44(
 ; CHECK-NEXT: %y = shl i32 %a, 5
 ; CHECK-NEXT: ret i32 %y
 }
@@ -583,7 +582,7 @@ define i32 @test45(i32 %a) nounwind {
   %y = lshr exact i32 %a, 1
   %z = lshr i32 %y, 4
   ret i32 %z
-; CHECK: @test45
+; CHECK-LABEL: @test45(
 ; CHECK-NEXT: %y = lshr i32 %a, 5
 ; CHECK-NEXT: ret i32 %y
 }
@@ -592,7 +591,7 @@ define i32 @test46(i32 %a) {
   %y = ashr exact i32 %a, 3
   %z = shl i32 %y, 1
   ret i32 %z
-; CHECK: @test46
+; CHECK-LABEL: @test46(
 ; CHECK-NEXT: %z = ashr exact i32 %a, 2
 ; CHECK-NEXT: ret i32 %z
 }
@@ -601,7 +600,7 @@ define i32 @test47(i32 %a) {
   %y = lshr exact i32 %a, 3
   %z = shl i32 %y, 1
   ret i32 %z
-; CHECK: @test47
+; CHECK-LABEL: @test47(
 ; CHECK-NEXT: %z = lshr exact i32 %a, 2
 ; CHECK-NEXT: ret i32 %z
 }
@@ -610,7 +609,7 @@ define i32 @test48(i32 %x) {
   %A = lshr exact i32 %x, 1
   %B = shl i32 %A, 3
   ret i32 %B
-; CHECK: @test48
+; CHECK-LABEL: @test48(
 ; CHECK-NEXT: %B = shl i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
@@ -619,7 +618,7 @@ define i32 @test49(i32 %x) {
   %A = ashr exact i32 %x, 1
   %B = shl i32 %A, 3
   ret i32 %B
-; CHECK: @test49
+; CHECK-LABEL: @test49(
 ; CHECK-NEXT: %B = shl i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
@@ -628,7 +627,7 @@ define i32 @test50(i32 %x) {
   %A = shl nsw i32 %x, 1
   %B = ashr i32 %A, 3
   ret i32 %B
-; CHECK: @test50
+; CHECK-LABEL: @test50(
 ; CHECK-NEXT: %B = ashr i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
@@ -637,7 +636,7 @@ define i32 @test51(i32 %x) {
   %A = shl nuw i32 %x, 1
   %B = lshr i32 %A, 3
   ret i32 %B
-; CHECK: @test51
+; CHECK-LABEL: @test51(
 ; CHECK-NEXT: %B = lshr i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
@@ -646,7 +645,7 @@ define i32 @test52(i32 %x) {
   %A = shl nsw i32 %x, 3
   %B = ashr i32 %A, 1
   ret i32 %B
-; CHECK: @test52
+; CHECK-LABEL: @test52(
 ; CHECK-NEXT: %B = shl nsw i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
@@ -655,7 +654,7 @@ define i32 @test53(i32 %x) {
   %A = shl nuw i32 %x, 3
   %B = lshr i32 %A, 1
   ret i32 %B
-; CHECK: @test53
+; CHECK-LABEL: @test53(
 ; CHECK-NEXT: %B = shl nuw i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
@@ -665,7 +664,7 @@ define i32 @test54(i32 %x) {
   %shl = shl i32 %shr2, 4
   %and = and i32 %shl, 16
   ret i32 %and
-; CHECK: @test54
+; CHECK-LABEL: @test54(
 ; CHECK: shl i32 %x, 3
 }
 
@@ -675,7 +674,7 @@ define i32 @test55(i32 %x) {
   %shl = shl i32 %shr2, 4
   %or = or i32 %shl, 8
   ret i32 %or
-; CHECK: @test55
+; CHECK-LABEL: @test55(
 ; CHECK: shl i32 %x, 3
 }
 
@@ -684,7 +683,7 @@ define i32 @test56(i32 %x) {
   %shl = shl i32 %shr2, 4
   %or = or i32 %shl, 7
   ret i32 %or
-; CHECK: @test56
+; CHECK-LABEL: @test56(
 ; CHECK: shl i32 %shr2, 4
 }
 
@@ -694,7 +693,7 @@ define i32 @test57(i32 %x) {
   %shl = shl i32 %shr, 4
   %and = and i32 %shl, 16
   ret i32 %and
-; CHECK: @test57
+; CHECK-LABEL: @test57(
 ; CHECK: shl i32 %x, 3
 }
 
@@ -703,7 +702,7 @@ define i32 @test58(i32 %x) {
   %shl = shl i32 %shr, 4
   %or = or i32 %shl, 8
   ret i32 %or
-; CHECK: @test58
+; CHECK-LABEL: @test58(
 ; CHECK: shl i32 %x, 3
 }
 
@@ -712,7 +711,7 @@ define i32 @test59(i32 %x) {
   %shl = shl i32 %shr, 4
   %or = or i32 %shl, 7
   ret i32 %or
-; CHECK: @test59
+; CHECK-LABEL: @test59(
 ; CHECK: %shl = shl i32 %shr1, 4
 }
 
@@ -722,7 +721,7 @@ define i32 @test60(i32 %x) {
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 1
   ret i32 %or
-; CHECK: @test60
+; CHECK-LABEL: @test60(
 ; CHECK: ashr i32 %x, 3
 }
 
@@ -732,7 +731,7 @@ define i32 @test61(i32 %x) {
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 2
   ret i32 %or
-; CHECK: @test61
+; CHECK-LABEL: @test61(
 ; CHECK: ashr i32 %x, 4
 }
 
@@ -742,6 +741,42 @@ define i32 @test62(i32 %x) {
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 1
   ret i32 %or
-; CHECK: @test62
+; CHECK-LABEL: @test62(
 ; CHECK: ashr exact i32 %x, 3
 }
+
+; PR17026
+; CHECK-LABEL: @test63(
+; CHECK-NOT: sh
+; CHECK: ret
+define void @test63(i128 %arg) {
+bb:
+  br i1 undef, label %bb1, label %bb12
+
+bb1:                                              ; preds = %bb11, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb1
+  br i1 undef, label %bb3, label %bb7
+
+bb3:                                              ; preds = %bb2
+  %tmp = lshr i128 %arg, 36893488147419103232
+  %tmp4 = shl i128 %tmp, 0
+  %tmp5 = or i128 %tmp4, undef
+  %tmp6 = trunc i128 %tmp5 to i16
+  br label %bb8
+
+bb7:                                              ; preds = %bb2
+  br i1 undef, label %bb8, label %bb2
+
+bb8:                                              ; preds = %bb7, %bb3
+  %tmp9 = phi i16 [ %tmp6, %bb3 ], [ undef, %bb7 ]
+  %tmp10 = icmp eq i16 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb8
+  br i1 undef, label %bb1, label %bb12
+
+bb12:                                             ; preds = %bb11, %bb8, %bb
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll
index a6066d80020d..95ed9b976ba3 100644
--- a/test/Transforms/InstCombine/sign-test-and-or.ll
+++ b/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -8,7 +8,7 @@ define void @test1(i32 %a, i32 %b) nounwind {
   %or.cond = or i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: %1 = or i32 %a, %b
 ; CHECK-NEXT: %2 = icmp slt i32 %1, 0
 ; CHECK-NEXT: br
@@ -27,7 +27,7 @@ define void @test2(i32 %a, i32 %b) nounwind {
   %or.cond = or i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: %1 = and i32 %a, %b
 ; CHECK-NEXT: %2 = icmp sgt i32 %1, -1
 ; CHECK-NEXT: br
@@ -46,7 +46,7 @@ define void @test3(i32 %a, i32 %b) nounwind {
   %or.cond = and i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: %1 = and i32 %a, %b
 ; CHECK-NEXT: %2 = icmp slt i32 %1, 0
 ; CHECK-NEXT: br
@@ -65,7 +65,7 @@ define void @test4(i32 %a, i32 %b) nounwind {
   %or.cond = and i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: %1 = or i32 %a, %b
 ; CHECK-NEXT: %2 = icmp sgt i32 %1, -1
 ; CHECK-NEXT: br
@@ -85,7 +85,7 @@ define void @test5(i32 %a) nounwind {
   %or.cond = and i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: %1 = and i32 %a, -2013265920
 ; CHECK-NEXT: %2 = icmp eq i32 %1, 0
 ; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
@@ -105,7 +105,7 @@ define void @test6(i32 %a) nounwind {
   %or.cond = and i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: %1 = and i32 %a, -2013265920
 ; CHECK-NEXT: %2 = icmp eq i32 %1, 0
 ; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
@@ -125,7 +125,7 @@ define void @test7(i32 %a) nounwind {
   %or.cond = or i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: %1 = and i32 %a, -2013265920
 ; CHECK-NEXT: %2 = icmp eq i32 %1, 0
 ; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
@@ -145,7 +145,7 @@ define void @test8(i32 %a) nounwind {
   %or.cond = or i1 %1, %2
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NEXT: %1 = and i32 %a, -2013265920
 ; CHECK-NEXT: %2 = icmp eq i32 %1, 0
 ; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
@@ -165,7 +165,7 @@ define void @test9(i32 %a) nounwind {
   %or.cond = and i1 %2, %3
   br i1 %or.cond, label %if.then, label %if.end
 
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: %1 = and i32 %a, -1073741824
 ; CHECK-NEXT: %2 = icmp eq i32 %1, 1073741824
 ; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index 5ed1cd5590ae..d7004977cd63 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -7,7 +7,7 @@ define i32 @test1(i32 %x) {
         %tmp.2 = xor i32 %tmp.1, -32768         ; <i32> [#uses=1]
         %tmp.3 = add i32 %tmp.2, 32768          ; <i32> [#uses=1]
         ret i32 %tmp.3
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %sext = shl i32 %x, 16
 ; CHECK: %tmp.3 = ashr exact i32 %sext, 16
 ; CHECK: ret i32 %tmp.3
@@ -18,7 +18,7 @@ define i32 @test2(i32 %x) {
         %tmp.2 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
         %tmp.3 = add i32 %tmp.2, -32768         ; <i32> [#uses=1]
         ret i32 %tmp.3
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %sext = shl i32 %x, 16
 ; CHECK: %tmp.3 = ashr exact i32 %sext, 16
 ; CHECK: ret i32 %tmp.3
@@ -29,7 +29,7 @@ define i32 @test3(i16 %P) {
         %tmp.4 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
         %tmp.5 = add i32 %tmp.4, -32768         ; <i32> [#uses=1]
         ret i32 %tmp.5
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %tmp.5 = sext i16 %P to i32
 ; CHECK: ret i32 %tmp.5
 }
@@ -39,7 +39,7 @@ define i32 @test4(i16 %P) {
         %tmp.4 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
         %tmp.5 = add i32 %tmp.4, -32768         ; <i32> [#uses=1]
         ret i32 %tmp.5
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: %tmp.5 = sext i16 %P to i32
 ; CHECK: ret i32 %tmp.5
 }
@@ -49,7 +49,7 @@ define i32 @test5(i32 %x) {
         %tmp.2 = xor i32 %tmp.1, 128            ; <i32> [#uses=1]
         %tmp.3 = add i32 %tmp.2, -128           ; <i32> [#uses=1]
         ret i32 %tmp.3
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %sext = shl i32 %x, 24
 ; CHECK: %tmp.3 = ashr exact i32 %sext, 24
 ; CHECK: ret i32 %tmp.3
@@ -59,7 +59,7 @@ define i32 @test6(i32 %x) {
         %tmp.2 = shl i32 %x, 16         ; <i32> [#uses=1]
         %tmp.4 = ashr i32 %tmp.2, 16            ; <i32> [#uses=1]
         ret i32 %tmp.4
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: %tmp.2 = shl i32 %x, 16
 ; CHECK: %tmp.4 = ashr exact i32 %tmp.2, 16
 ; CHECK: ret i32 %tmp.4
@@ -70,7 +70,7 @@ define i32 @test7(i16 %P) {
   %sext1 = shl i32 %tmp.1, 16                     ; <i32> [#uses=1]
   %tmp.5 = ashr i32 %sext1, 16                    ; <i32> [#uses=1]
   ret i32 %tmp.5
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: %tmp.5 = sext i16 %P to i32
 ; CHECK: ret i32 %tmp.5
 }
@@ -81,7 +81,7 @@ entry:
   %xor = xor i32 %shr, 67108864                   ; <i32> [#uses=1]
   %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
   ret i32 %sub
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: %sub = ashr i32 %x, 5
 ; CHECK: ret i32 %sub
 }
diff --git a/test/Transforms/InstCombine/simplify-libcalls.ll b/test/Transforms/InstCombine/simplify-libcalls.ll
new file mode 100644
index 000000000000..fae3e6e3d9be
--- /dev/null
+++ b/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -0,0 +1,144 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+@G = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+define void @foo(i8* %P, i32* %X) {
+	call i32 (i8*, i8*, ...)* @sprintf( i8* %P, i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i32* %X )		; <i32>:1 [#uses=0]
+	ret void
+}
+
+; PR1307
+@str = internal constant [5 x i8] c"foog\00"
+@str1 = internal constant [8 x i8] c"blahhh!\00"
+@str2 = internal constant [5 x i8] c"Ponk\00"
+
+define i8* @test1() {
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8]* @str, i32 0, i32 2), i32 103 )              ; <i8*> [#uses=1]
+        ret i8* %tmp3
+
+; CHECK-LABEL: @test1(
+; CHECK: ret i8* getelementptr inbounds ([5 x i8]* @str, i32 0, i64 3)
+}
+
+declare i8* @strchr(i8*, i32)
+
+define i8* @test2() {
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([8 x i8]* @str1, i32 0, i32 2), i32 0 )               ; <i8*> [#uses=1]
+        ret i8* %tmp3
+
+; CHECK-LABEL: @test2(
+; CHECK: ret i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i64 7)
+}
+
+define i8* @test3() {
+entry:
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8]* @str2, i32 0, i32 1), i32 80 )              ; <i8*> [#uses=1]
+        ret i8* %tmp3
+
+; CHECK-LABEL: @test3(
+; CHECK: ret i8* null
+}
+
+@_2E_str = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+
+declare i32 @memcmp(i8*, i8*, i32) nounwind readonly
+
+define i1 @PR2341(i8** %start_addr) {
+entry:
+	%tmp4 = load i8** %start_addr, align 4		; <i8*> [#uses=1]
+	%tmp5 = call i32 @memcmp( i8* %tmp4, i8* getelementptr ([5 x i8]* @_2E_str, i32 0, i32 0), i32 4 ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp6 = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
+	ret i1 %tmp6
+
+; CHECK-LABEL: @PR2341(
+; CHECK: i32
+}
+
+define i32 @PR4284() nounwind {
+entry:
+	%c0 = alloca i8, align 1		; <i8*> [#uses=2]
+	%c2 = alloca i8, align 1		; <i8*> [#uses=2]
+	store i8 64, i8* %c0
+	store i8 -127, i8* %c2
+	%call = call i32 @memcmp(i8* %c0, i8* %c2, i32 1)		; <i32> [#uses=1]
+	ret i32 %call
+
+; CHECK-LABEL: @PR4284(
+; CHECK: ret i32 -65
+}
+
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64, %struct.pthread_mutex*, %struct.pthread*, i32, i32, %union.anon }
+%struct.__sbuf = type { i8*, i32, [4 x i8] }
+%struct.pthread = type opaque
+%struct.pthread_mutex = type opaque
+%union.anon = type { i64, [120 x i8] }
+@.str13 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+@.str14 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+
+define i32 @PR4641(i32 %argc, i8** %argv) nounwind {
+entry:
+	call void @exit(i32 0) nounwind
+	%cond392 = select i1 undef, i8* getelementptr ([2 x i8]* @.str13, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str14, i32 0, i32 0)		; <i8*> [#uses=1]
+	%call393 = call %struct.__sFILE* @fopen(i8* undef, i8* %cond392) nounwind		; <%struct.__sFILE*> [#uses=0]
+	unreachable
+}
+
+declare %struct.__sFILE* @fopen(i8*, i8*)
+
+declare void @exit(i32)
+
+define i32 @PR4645() {
+entry:
+	br label %if.then
+
+lor.lhs.false:		; preds = %while.body
+	br i1 undef, label %if.then, label %for.cond
+
+if.then:		; preds = %lor.lhs.false, %while.body
+	call void @exit(i32 1)
+	br label %for.cond
+
+for.cond:		; preds = %for.end, %if.then, %lor.lhs.false
+	%j.0 = phi i32 [ %inc47, %for.end ], [ 0, %if.then ], [ 0, %lor.lhs.false ]		; <i32> [#uses=1]
+	unreachable
+
+for.end:		; preds = %for.cond20
+	%inc47 = add i32 %j.0, 1		; <i32> [#uses=1]
+	br label %for.cond
+}
+
+@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
+@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
+@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
+
+define i32 @MemCpy() {
+  %h_p = getelementptr [2 x i8]* @h, i32 0, i32 0
+  %hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0
+  %hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
+  ret i32 0
+
+; CHECK-LABEL: @MemCpy(
+; CHECK-NOT: llvm.memcpy
+; CHECK: ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare i32 @strcmp(i8*, i8*) #0
+
+define void @test9(i8* %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NOT: strcmp
+  %y = call i32 @strcmp(i8* %x, i8* %x) #1
+  ret void
+}
+
+attributes #0 = { nobuiltin }
+attributes #1 = { builtin }
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
new file mode 100644
index 000000000000..0d1a6027a00a
--- /dev/null
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -0,0 +1,91 @@
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+
+
+attributes #0 = { readnone nounwind }
+
+declare float @__sinpif(float %x) #0
+declare float @__cospif(float %x) #0 
+
+declare double @__sinpi(double %x) #0
+declare double @__cospi(double %x) #0 
+
+@var32 = global float 0.0
+@var64 = global double 0.0
+
+define float @test_instbased_f32() {
+       %val = load float* @var32
+       %sin = call float @__sinpif(float %val) #0
+       %cos = call float @__cospif(float %val) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float [[VAL]])
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define float @test_constant_f32() {
+       %sin = call float @__sinpif(float 1.0) #0
+       %cos = call float @__cospif(float 1.0) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospi_stretf(float 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospi_stretf(float 1.000000e+00)
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define double @test_instbased_f64() {
+       %val = load double* @var64
+       %sin = call double @__sinpi(double %val) #0
+       %cos = call double @__cospi(double %val) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
+
+define double @test_constant_f64() {
+       %sin = call double @__sinpi(double 1.0) #0
+       %cos = call double @__cospi(double 1.0) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
diff --git a/test/Transforms/InstCombine/sink_instruction.ll b/test/Transforms/InstCombine/sink_instruction.ll
index 5c4019a98df5..1bbd6b763841 100644
--- a/test/Transforms/InstCombine/sink_instruction.ll
+++ b/test/Transforms/InstCombine/sink_instruction.ll
@@ -4,7 +4,7 @@
 ;; arm of the 'if'.
 
 define i32 @test1(i1 %C, i32 %A, i32 %B) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
         %tmp.2 = sdiv i32 %A, %B                ; <i32> [#uses=1]
         %tmp.9 = add i32 %B, %A         ; <i32> [#uses=1]
@@ -22,7 +22,7 @@ endif:          ; preds = %entry
 
 ;; PHI use, sink divide before call.
 define i32 @test2(i32 %x) nounwind ssp {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: sdiv i32
 entry:
   br label %bb
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
index 9b8c8b1b12c7..78dd7aa7df47 100644
--- a/test/Transforms/InstCombine/sprintf-1.ll
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -1,7 +1,7 @@
 ; Test that the sprintf library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -19,7 +19,7 @@ declare i32 @sprintf(i8*, i8*, ...)
 ; Check sprintf(dst, fmt) -> llvm.memcpy(str, fmt, strlen(fmt) + 1, 1).
 
 define void @test_simplify1(i8* %dst) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
@@ -28,7 +28,7 @@ define void @test_simplify1(i8* %dst) {
 }
 
 define void @test_simplify2(i8* %dst) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %fmt = getelementptr [1 x i8]* @null, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
 ; CHECK-NEXT: store i8 0, i8* %dst, align 1
@@ -37,7 +37,7 @@ define void @test_simplify2(i8* %dst) {
 }
 
 define void @test_simplify3(i8* %dst) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %fmt = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
 ; CHECK-NEXT: store i8 0, i8* %dst, align 1
@@ -48,7 +48,7 @@ define void @test_simplify3(i8* %dst) {
 ; Check sprintf(dst, "%c", chr) -> *(i8*)dst = chr; *((i8*)dst + 1) = 0.
 
 define void @test_simplify4(i8* %dst) {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8 104)
 ; CHECK-NEXT: store i8 104, i8* %dst, align 1
@@ -61,7 +61,7 @@ define void @test_simplify4(i8* %dst) {
 ; Check sprintf(dst, "%s", str) -> llvm.memcpy(dest, str, strlen(str) + 1, 1).
 
 define void @test_simplify5(i8* %dst, i8* %str) {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8* %str)
 ; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
@@ -74,7 +74,7 @@ define void @test_simplify5(i8* %dst, i8* %str) {
 ; Check sprintf(dst, format, ...) -> siprintf(str, format, ...) if no floating.
 
 define void @test_simplify6(i8* %dst) {
-; CHECK-IPRINTF: @test_simplify6
+; CHECK-IPRINTF-LABEL: @test_simplify6(
   %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i32 187)
 ; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
@@ -83,7 +83,7 @@ define void @test_simplify6(i8* %dst) {
 }
 
 define void @test_no_simplify1(i8* %dst) {
-; CHECK-IPRINTF: @test_no_simplify1
+; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double 1.87)
 ; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
@@ -92,7 +92,7 @@ define void @test_no_simplify1(i8* %dst) {
 }
 
 define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
 ; CHECK-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
   ret void
diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll
index 440b9748518d..650b10c6e966 100644
--- a/test/Transforms/InstCombine/sqrt.ll
+++ b/test/Transforms/InstCombine/sqrt.ll
@@ -2,7 +2,7 @@
 
 define float @test1(float %x) nounwind readnone ssp {
 entry:
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: fpext
 ; CHECK-NOT: sqrt(
 ; CHECK: sqrtf(
@@ -17,7 +17,7 @@ entry:
 ; PR8096
 define float @test2(float %x) nounwind readnone ssp {
 entry:
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: fpext
 ; CHECK-NOT: sqrt(
 ; CHECK: sqrtf(
@@ -34,7 +34,7 @@ entry:
 ; use of sqrt result.
 define float @test3(float* %v) nounwind uwtable ssp {
 entry:
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: sqrt(
 ; CHECK-NOT: sqrtf(
 ; CHECK: fptrunc
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 164ba7632684..b64c800e546c 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -5,7 +5,7 @@ define void @test1(i32* %P) {
         store i32 123, i32* undef
         store i32 124, i32* null
         ret void
-; CHECK: @test1(
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: store i32 123, i32* undef
 ; CHECK-NEXT: store i32 undef, i32* null
 ; CHECK-NEXT: ret void
@@ -16,7 +16,7 @@ define void @test2(i32* %P) {
         %Y = add i32 %X, 0              ; <i32> [#uses=1]
         store i32 %Y, i32* %P
         ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret void
 }
 
@@ -38,7 +38,7 @@ Cond2:
 Cont:
 	%V = load i32* %A
 	ret i32 %V
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: alloca
 ; CHECK: Cont:
 ; CHECK-NEXT:  %storemerge = phi i32 [ 47, %Cond2 ], [ -987654321, %Cond ]
@@ -58,7 +58,7 @@ Cond:
 Cont:
 	%V = load i32* %A
 	ret i32 %V
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: alloca
 ; CHECK: Cont:
 ; CHECK-NEXT:  %storemerge = phi i32 [ -987654321, %Cond ], [ 47, %0 ]
@@ -76,7 +76,7 @@ Cond:
 
 Cont:
 	ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: Cont:
 ; CHECK-NEXT:  %storemerge = phi i32
 ; CHECK-NEXT:  store i32 %storemerge, i32* %P, align 1
@@ -107,13 +107,14 @@ for.body:                                         ; preds = %for.cond
 
 for.end:                                          ; preds = %for.cond
   ret void
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: for.cond:
 ; CHECK-NEXT: phi i32 [ 42
 ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !"float", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/InstCombine/stpcpy-1.ll b/test/Transforms/InstCombine/stpcpy-1.ll
index 8b6bb0e0d509..b918c9e9e890 100644
--- a/test/Transforms/InstCombine/stpcpy-1.ll
+++ b/test/Transforms/InstCombine/stpcpy-1.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i8* @stpcpy(i8*, i8*)
 
 define i8* @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
@@ -24,7 +24,7 @@ define i8* @test_simplify1() {
 }
 
 define i8* @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
 
@@ -35,7 +35,7 @@ define i8* @test_simplify2() {
 }
 
 define i8* @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [32 x i8]* @b, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/stpcpy-2.ll b/test/Transforms/InstCombine/stpcpy-2.ll
index 2e92c0895ed4..6a0f7530d5a8 100644
--- a/test/Transforms/InstCombine/stpcpy-2.ll
+++ b/test/Transforms/InstCombine/stpcpy-2.ll
@@ -11,7 +11,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i16* @stpcpy(i8*, i8*)
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll
index 05603918c642..8a02529c61ca 100644
--- a/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; Check cases where slen >= strlen (src).
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -22,7 +22,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -32,7 +32,7 @@ define void @test_simplify2() {
 }
 
 define void @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -44,7 +44,7 @@ define void @test_simplify3() {
 ; Check cases where there are no string constants.
 
 define void @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
@@ -56,12 +56,12 @@ define void @test_simplify4() {
 ; Check case where the string length is not constant.
 
 define i8* @test_simplify5() {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
 ; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
   ret i8* %ret
@@ -70,12 +70,12 @@ define i8* @test_simplify5() {
 ; Check case where the source and destination are the same.
 
 define i8* @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
 
 ; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
 ; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]]
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -83,7 +83,7 @@ define i8* @test_simplify6() {
 ; Check case where slen < strlen (src).
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
@@ -93,4 +93,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/stpcpy_chk-2.ll b/test/Transforms/InstCombine/stpcpy_chk-2.ll
index 46c2139276e2..b503da9c191c 100644
--- a/test/Transforms/InstCombine/stpcpy_chk-2.ll
+++ b/test/Transforms/InstCombine/stpcpy_chk-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 @.str = private constant [8 x i8] c"abcdefg\00"
 
 define void @test_no_simplify() {
-; CHECK: @test_no_simplify
+; CHECK-LABEL: @test_no_simplify(
   %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
   %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
 
diff --git a/test/Transforms/InstCombine/strcat-1.ll b/test/Transforms/InstCombine/strcat-1.ll
index 3c05d6b06fa0..131ad4834837 100644
--- a/test/Transforms/InstCombine/strcat-1.ll
+++ b/test/Transforms/InstCombine/strcat-1.ll
@@ -13,7 +13,7 @@ declare i8* @strcat(i8*, i8*)
 declare i32 @puts(i8*)
 
 define i32 @main() {
-; CHECK: @main
+; CHECK-LABEL: @main(
 ; CHECK-NOT: call i8* @strcat
 ; CHECK: call i32 @puts
 
diff --git a/test/Transforms/InstCombine/strcat-2.ll b/test/Transforms/InstCombine/strcat-2.ll
index 379ee7495317..48f82670c325 100644
--- a/test/Transforms/InstCombine/strcat-2.ll
+++ b/test/Transforms/InstCombine/strcat-2.ll
@@ -11,7 +11,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i8* @strcat(i8*, i8*)
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
 ; CHECK-NOT: call i8* @strcat
 ; CHECK: ret void
 
@@ -22,7 +22,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
 ; CHECK-NEXT: ret void
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/strcat-3.ll b/test/Transforms/InstCombine/strcat-3.ll
index 15aff2f1aa28..e3396df73d37 100644
--- a/test/Transforms/InstCombine/strcat-3.ll
+++ b/test/Transforms/InstCombine/strcat-3.ll
@@ -11,7 +11,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i16* @strcat(i8*, i8*)
 
 define void @test_nosimplify1() {
-; CHECK: @test_nosimplify1
+; CHECK-LABEL: @test_nosimplify1(
 ; CHECK: call i16* @strcat
 ; CHECK: ret void
 
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
index 5efab9ec4bee..d2c989462156 100644
--- a/test/Transforms/InstCombine/strchr-1.ll
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -52,3 +52,14 @@ define void @test_simplify4(i32 %chr) {
   store i8* %dst, i8** @chp
   ret void
 }
+
+define void @test_simplify5() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcmp-1.ll b/test/Transforms/InstCombine/strcmp-1.ll
index 0679246e0915..fc58ffcb8cb0 100644
--- a/test/Transforms/InstCombine/strcmp-1.ll
+++ b/test/Transforms/InstCombine/strcmp-1.ll
@@ -12,7 +12,7 @@ declare i32 @strcmp(i8*, i8*)
 
 ; strcmp("", x) -> -*x
 define i32 @test1(i8* %str2) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: %2 = sub i32 0, %1
@@ -26,7 +26,7 @@ define i32 @test1(i8* %str2) {
 
 ; strcmp(x, "") -> *x
 define i32 @test2(i8* %str1) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: ret i32 %1
@@ -38,7 +38,7 @@ define i32 @test2(i8* %str1) {
 
 ; strcmp(x, y)  -> cnst
 define i32 @test3() {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 -1
 
   %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
@@ -48,7 +48,7 @@ define i32 @test3() {
 }
 
 define i32 @test4() {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret i32 1
 
   %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
@@ -60,7 +60,7 @@ define i32 @test4() {
 ; strcmp(x, y)   -> memcmp(x, y, <known length>)
 ; (This transform is rather difficult to trigger in a useful manner)
 define i32 @test5(i1 %b) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
 ; CHECK: ret i32 %memcmp
 
@@ -74,7 +74,7 @@ define i32 @test5(i1 %b) {
 
 ; strcmp(x,x)  -> 0
 define i32 @test6(i8* %str) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: ret i32 0
 
   %temp1 = call i32 @strcmp(i8* %str, i8* %str)
diff --git a/test/Transforms/InstCombine/strcmp-2.ll b/test/Transforms/InstCombine/strcmp-2.ll
index 20518960f302..f0ef51652270 100644
--- a/test/Transforms/InstCombine/strcmp-2.ll
+++ b/test/Transforms/InstCombine/strcmp-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i16 @strcmp(i8*, i8*)
 
 define i16 @test_nosimplify() {
-; CHECK: @test_nosimplify
+; CHECK-LABEL: @test_nosimplify(
 ; CHECK: call i16 @strcmp
 ; CHECK: ret i16 %temp1
 
diff --git a/test/Transforms/InstCombine/strcpy-1.ll b/test/Transforms/InstCombine/strcpy-1.ll
index b6cf048b2a81..7c253f6f9fca 100644
--- a/test/Transforms/InstCombine/strcpy-1.ll
+++ b/test/Transforms/InstCombine/strcpy-1.ll
@@ -13,7 +13,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i8* @strcpy(i8*, i8*)
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
@@ -24,7 +24,7 @@ define void @test_simplify1() {
 }
 
 define i8* @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
 
@@ -34,7 +34,7 @@ define i8* @test_simplify2() {
 }
 
 define i8* @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [32 x i8]* @b, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/strcpy-2.ll b/test/Transforms/InstCombine/strcpy-2.ll
index 779e9fdd9598..bad392d7c648 100644
--- a/test/Transforms/InstCombine/strcpy-2.ll
+++ b/test/Transforms/InstCombine/strcpy-2.ll
@@ -11,7 +11,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i16* @strcpy(i8*, i8*)
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index 3e48f4fd3057..8e7fec76ef5c 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; Check cases where slen >= strlen (src).
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -22,7 +22,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -32,7 +32,7 @@ define void @test_simplify2() {
 }
 
 define void @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -44,7 +44,7 @@ define void @test_simplify3() {
 ; Check cases where there are no string constants.
 
 define void @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
@@ -56,12 +56,12 @@ define void @test_simplify4() {
 ; Check case where the string length is not constant.
 
 define void @test_simplify5() {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
 ; CHECK: @__memcpy_chk
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
   ret void
 }
@@ -69,11 +69,11 @@ define void @test_simplify5() {
 ; Check case where the source and destination are the same.
 
 define i8* @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
 
 ; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
-  %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
   ret i8* %ret
 }
@@ -81,7 +81,7 @@ define i8* @test_simplify6() {
 ; Check case where slen < strlen (src).
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
@@ -91,4 +91,4 @@ define void @test_no_simplify1() {
 }
 
 declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strcpy_chk-2.ll b/test/Transforms/InstCombine/strcpy_chk-2.ll
index d76ea5d068bc..1eff5a822e78 100644
--- a/test/Transforms/InstCombine/strcpy_chk-2.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 @.str = private constant [8 x i8] c"abcdefg\00"
 
 define void @test_no_simplify() {
-; CHECK: @test_no_simplify
+; CHECK-LABEL: @test_no_simplify(
   %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
   %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
 
diff --git a/test/Transforms/InstCombine/strcpy_chk-64.ll b/test/Transforms/InstCombine/strcpy_chk-64.ll
index 036fcbe6de1d..31447d9569e1 100644
--- a/test/Transforms/InstCombine/strcpy_chk-64.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-64.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 define void @func(i8* %i) nounwind ssp {
-; CHECK: @func
+; CHECK-LABEL: @func(
 ; CHECK: @__strcpy_chk(i8* %arraydecay, i8* %i, i64 32)
 entry:
   %s = alloca [32 x i8], align 16
diff --git a/test/Transforms/InstCombine/strcspn-1.ll b/test/Transforms/InstCombine/strcspn-1.ll
index 60fad897b2c8..b3b52b5025a5 100644
--- a/test/Transforms/InstCombine/strcspn-1.ll
+++ b/test/Transforms/InstCombine/strcspn-1.ll
@@ -13,7 +13,7 @@ declare i64 @strcspn(i8*, i8*)
 ; Check strcspn(s, "") -> strlen(s).
 
 define i64 @test_simplify1(i8* %str) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %pat = getelementptr [1 x i8]* @null, i32 0, i32 0
 
   %ret = call i64 @strcspn(i8* %str, i8* %pat)
@@ -25,7 +25,7 @@ define i64 @test_simplify1(i8* %str) {
 ; Check strcspn("", s) -> 0.
 
 define i64 @test_simplify2(i8* %pat) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %str = getelementptr [1 x i8]* @null, i32 0, i32 0
 
   %ret = call i64 @strcspn(i8* %str, i8* %pat)
@@ -36,7 +36,7 @@ define i64 @test_simplify2(i8* %pat) {
 ; Check strcspn(s1, s2), where s1 and s2 are constants.
 
 define i64 @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %str = getelementptr [6 x i8]* @abcba, i32 0, i32 0
   %pat = getelementptr [4 x i8]* @abc, i32 0, i32 0
 
@@ -48,7 +48,7 @@ define i64 @test_simplify3() {
 ; Check cases that shouldn't be simplified.
 
 define i64 @test_no_simplify1(i8* %str, i8* %pat) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
 
   %ret = call i64 @strcspn(i8* %str, i8* %pat)
 ; CHECK-NEXT: %ret = call i64 @strcspn(i8* %str, i8* %pat)
diff --git a/test/Transforms/InstCombine/strcspn-2.ll b/test/Transforms/InstCombine/strcspn-2.ll
index 4e2393686c7d..ecfa27d3b6ba 100644
--- a/test/Transforms/InstCombine/strcspn-2.ll
+++ b/test/Transforms/InstCombine/strcspn-2.ll
@@ -11,7 +11,7 @@ declare double @strcspn(i8*, i8*)
 ; Check that strcspn functions with the wrong prototype aren't simplified.
 
 define double @test_no_simplify1(i8* %pat) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %str = getelementptr [1 x i8]* @null, i32 0, i32 0
 
   %ret = call double @strcspn(i8* %str, i8* %pat)
diff --git a/test/Transforms/InstCombine/strlen-1.ll b/test/Transforms/InstCombine/strlen-1.ll
index 6d7464a4cc80..4fa5b4fdb62f 100644
--- a/test/Transforms/InstCombine/strlen-1.ll
+++ b/test/Transforms/InstCombine/strlen-1.ll
@@ -15,7 +15,7 @@ declare i32 @strlen(i8*)
 ; Check strlen(string constant) -> integer constant.
 
 define i32 @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
   %hello_l = call i32 @strlen(i8* %hello_p)
   ret i32 %hello_l
@@ -23,7 +23,7 @@ define i32 @test_simplify1() {
 }
 
 define i32 @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
   %null_l = call i32 @strlen(i8* %null_p)
   ret i32 %null_l
@@ -31,7 +31,7 @@ define i32 @test_simplify2() {
 }
 
 define i32 @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
   %null_hello_l = call i32 @strlen(i8* %null_hello_p)
   ret i32 %null_hello_l
@@ -39,7 +39,7 @@ define i32 @test_simplify3() {
 }
 
 define i32 @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %len = tail call i32 @strlen(i8* @nullstring) nounwind
   ret i32 %len
 ; CHECK-NEXT: ret i32 0
@@ -48,7 +48,7 @@ define i32 @test_simplify4() {
 ; Check strlen(x) == 0 --> *x == 0.
 
 define i1 @test_simplify5() {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
   %hello_l = call i32 @strlen(i8* %hello_p)
   %eq_hello = icmp eq i32 %hello_l, 0
@@ -57,7 +57,7 @@ define i1 @test_simplify5() {
 }
 
 define i1 @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
   %null_l = call i32 @strlen(i8* %null_p)
   %eq_null = icmp eq i32 %null_l, 0
@@ -68,7 +68,7 @@ define i1 @test_simplify6() {
 ; Check strlen(x) != 0 --> *x != 0.
 
 define i1 @test_simplify7() {
-; CHECK: @test_simplify7
+; CHECK-LABEL: @test_simplify7(
   %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
   %hello_l = call i32 @strlen(i8* %hello_p)
   %ne_hello = icmp ne i32 %hello_l, 0
@@ -77,7 +77,7 @@ define i1 @test_simplify7() {
 }
 
 define i1 @test_simplify8() {
-; CHECK: @test_simplify8
+; CHECK-LABEL: @test_simplify8(
   %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
   %null_l = call i32 @strlen(i8* %null_p)
   %ne_null = icmp ne i32 %null_l, 0
@@ -88,7 +88,7 @@ define i1 @test_simplify8() {
 ; Check cases that shouldn't be simplified.
 
 define i32 @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %a_p = getelementptr [32 x i8]* @a, i32 0, i32 0
   %a_l = call i32 @strlen(i8* %a_p)
 ; CHECK-NEXT: %a_l = call i32 @strlen
diff --git a/test/Transforms/InstCombine/strlen-2.ll b/test/Transforms/InstCombine/strlen-2.ll
index c4fd54c06db9..6652a310ba6a 100644
--- a/test/Transforms/InstCombine/strlen-2.ll
+++ b/test/Transforms/InstCombine/strlen-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i32 @strlen(i8*, i32)
 
 define i32 @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
   %hello_l = call i32 @strlen(i8* %hello_p, i32 187)
 ; CHECK-NEXT: %hello_l = call i32 @strlen
diff --git a/test/Transforms/InstCombine/strncat-1.ll b/test/Transforms/InstCombine/strncat-1.ll
index ad2a18b1465d..8eae3dae2e23 100644
--- a/test/Transforms/InstCombine/strncat-1.ll
+++ b/test/Transforms/InstCombine/strncat-1.ll
@@ -12,7 +12,7 @@ declare i8* @strncat(i8*, i8*, i32)
 declare i32 @puts(i8*)
 
 define i32 @main() {
-; CHECK: @main
+; CHECK-LABEL: @main(
 ; CHECK-NOT: call i8* @strncat
 ; CHECK: call i32 @puts
 
diff --git a/test/Transforms/InstCombine/strncat-2.ll b/test/Transforms/InstCombine/strncat-2.ll
index c56deacd39bb..b09fa1260a03 100644
--- a/test/Transforms/InstCombine/strncat-2.ll
+++ b/test/Transforms/InstCombine/strncat-2.ll
@@ -11,7 +11,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i8* @strncat(i8*, i8*, i32)
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
 ; CHECK-NOT: call i8* @strncat
 ; CHECK: ret void
 
@@ -22,7 +22,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
 ; CHECK-NEXT: ret void
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
@@ -32,7 +32,7 @@ define void @test_simplify2() {
 }
 
 define void @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
 ; CHECK-NEXT: ret void
 
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
@@ -42,7 +42,7 @@ define void @test_simplify3() {
 }
 
 define void @test_nosimplify1() {
-; CHECK: @test_nosimplify1
+; CHECK-LABEL: @test_nosimplify1(
 ; CHECK: call i8* @strncat
 ; CHECK: ret void
 
diff --git a/test/Transforms/InstCombine/strncat-3.ll b/test/Transforms/InstCombine/strncat-3.ll
index 3cd797168705..1b25b4aca1ae 100644
--- a/test/Transforms/InstCombine/strncat-3.ll
+++ b/test/Transforms/InstCombine/strncat-3.ll
@@ -11,7 +11,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i16* @strncat(i8*, i8*, i32)
 
 define void @test_nosimplify1() {
-; CHECK: @test_nosimplify1
+; CHECK-LABEL: @test_nosimplify1(
 ; CHECK: call i16* @strncat
 ; CHECK: ret void
 
diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll
index 187c2fa50e82..df30dd100443 100644
--- a/test/Transforms/InstCombine/strncmp-1.ll
+++ b/test/Transforms/InstCombine/strncmp-1.ll
@@ -12,7 +12,7 @@ declare i32 @strncmp(i8*, i8*, i32)
 
 ; strncmp("", x, n) -> -*x
 define i32 @test1(i8* %str2) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: %2 = sub i32 0, %1
@@ -25,7 +25,7 @@ define i32 @test1(i8* %str2) {
 
 ; strncmp(x, "", n) -> *x
 define i32 @test2(i8* %str1) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %strcmpload = load i8* %str1
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: ret i32 %1
@@ -37,7 +37,7 @@ define i32 @test2(i8* %str1) {
 
 ; strncmp(x, y, n)  -> cnst
 define i32 @test3() {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 -1
 
   %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
@@ -47,7 +47,7 @@ define i32 @test3() {
 }
 
 define i32 @test4() {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: ret i32 1
 
   %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
@@ -57,7 +57,7 @@ define i32 @test4() {
 }
 
 define i32 @test5() {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: ret i32 0
 
   %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
@@ -68,7 +68,7 @@ define i32 @test5() {
 
 ; strncmp(x,y,1) -> memcmp(x,y,1)
 define i32 @test6(i8* %str1, i8* %str2) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: [[LOAD1:%[a-z]+]] = load i8* %str1, align 1
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
 ; CHECK: [[LOAD2:%[a-z]+]] = load i8* %str2, align 1
@@ -82,7 +82,7 @@ define i32 @test6(i8* %str1, i8* %str2) {
 
 ; strncmp(x,y,0)   -> 0
 define i32 @test7(i8* %str1, i8* %str2) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: ret i32 0
 
   %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
@@ -91,7 +91,7 @@ define i32 @test7(i8* %str1, i8* %str2) {
 
 ; strncmp(x,x,n)  -> 0
 define i32 @test8(i8* %str, i32 %n) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: ret i32 0
 
   %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
diff --git a/test/Transforms/InstCombine/strncmp-2.ll b/test/Transforms/InstCombine/strncmp-2.ll
index 3fc43a6fd4f5..16ad8a4f62a7 100644
--- a/test/Transforms/InstCombine/strncmp-2.ll
+++ b/test/Transforms/InstCombine/strncmp-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare i16 @strncmp(i8*, i8*, i32)
 
 define i16 @test_nosimplify() {
-; CHECK: @test_nosimplify
+; CHECK-LABEL: @test_nosimplify(
 ; CHECK: call i16 @strncmp
 ; CHECK: ret i16 %temp1
 
diff --git a/test/Transforms/InstCombine/strncpy-1.ll b/test/Transforms/InstCombine/strncpy-1.ll
index 3ce2b9b5eecc..c70197f12e28 100644
--- a/test/Transforms/InstCombine/strncpy-1.ll
+++ b/test/Transforms/InstCombine/strncpy-1.ll
@@ -16,7 +16,7 @@ declare i32 @puts(i8*)
 ; Check a bunch of strncpy invocations together.
 
 define i32 @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
 ; CHECK-NOT: call i8* @strncpy
 ; CHECK: call i32 @puts
   %target = alloca [1024 x i8]
@@ -39,7 +39,7 @@ define i32 @test_simplify1() {
 ; Check strncpy(x, "", y) -> memset(x, '\0', y, 1).
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [1 x i8]* @null, i32 0, i32 0
 
@@ -51,7 +51,7 @@ define void @test_simplify2() {
 ; Check strncpy(x, y, 0) -> x.
 
 define i8* @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
 
@@ -63,7 +63,7 @@ define i8* @test_simplify3() {
 ; Check  strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant].
 
 define void @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
 
@@ -75,7 +75,7 @@ define void @test_simplify4() {
 ; Check cases that shouldn't be simplified.
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [32 x i8]* @b, i32 0, i32 0
 
@@ -85,7 +85,7 @@ define void @test_no_simplify1() {
 }
 
 define void @test_no_simplify2() {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
 
diff --git a/test/Transforms/InstCombine/strncpy-2.ll b/test/Transforms/InstCombine/strncpy-2.ll
index ac28ea655009..acc2878648fc 100644
--- a/test/Transforms/InstCombine/strncpy-2.ll
+++ b/test/Transforms/InstCombine/strncpy-2.ll
@@ -12,7 +12,7 @@ declare i16* @strncpy(i8*, i8*, i32)
 ; Check that 'strncpy' functions with the wrong prototype aren't simplified.
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
   %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
 
diff --git a/test/Transforms/InstCombine/strncpy_chk-1.ll b/test/Transforms/InstCombine/strncpy_chk-1.ll
index aadff4268ec2..90b4173ced77 100644
--- a/test/Transforms/InstCombine/strncpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strncpy_chk-1.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; Check cases where dstlen >= len
 
 define void @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -22,7 +22,7 @@ define void @test_simplify1() {
 }
 
 define void @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -32,7 +32,7 @@ define void @test_simplify2() {
 }
 
 define void @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
@@ -44,7 +44,7 @@ define void @test_simplify3() {
 ; Check cases where dstlen < len
 
 define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
 
@@ -54,7 +54,7 @@ define void @test_no_simplify1() {
 }
 
 define void @test_no_simplify2() {
-; CHECK: @test_no_simplify2
+; CHECK-LABEL: @test_no_simplify2(
   %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
 
diff --git a/test/Transforms/InstCombine/strncpy_chk-2.ll b/test/Transforms/InstCombine/strncpy_chk-2.ll
index a0f132ebf63b..829a4798f09a 100644
--- a/test/Transforms/InstCombine/strncpy_chk-2.ll
+++ b/test/Transforms/InstCombine/strncpy_chk-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 @b = common global [60 x i16] zeroinitializer, align 1
 
 define void @test_no_simplify() {
-; CHECK: @test_no_simplify
+; CHECK-LABEL: @test_no_simplify(
   %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
   %src = getelementptr inbounds [60 x i16]* @b, i32 0, i32 0
 
diff --git a/test/Transforms/InstCombine/strpbrk-1.ll b/test/Transforms/InstCombine/strpbrk-1.ll
index a5d0d86501b1..58b2d9e88d2c 100644
--- a/test/Transforms/InstCombine/strpbrk-1.ll
+++ b/test/Transforms/InstCombine/strpbrk-1.ll
@@ -13,7 +13,7 @@ declare i8* @strpbrk(i8*, i8*)
 ; Check strpbrk(s, "") -> NULL.
 
 define i8* @test_simplify1(i8* %str) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %pat = getelementptr [1 x i8]* @null, i32 0, i32 0
 
   %ret = call i8* @strpbrk(i8* %str, i8* %pat)
@@ -24,7 +24,7 @@ define i8* @test_simplify1(i8* %str) {
 ; Check strpbrk("", s) -> NULL.
 
 define i8* @test_simplify2(i8* %pat) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %str = getelementptr [1 x i8]* @null, i32 0, i32 0
 
   %ret = call i8* @strpbrk(i8* %str, i8* %pat)
@@ -35,7 +35,7 @@ define i8* @test_simplify2(i8* %pat) {
 ; Check strpbrk(s1, s2), where s1 and s2 are constants.
 
 define i8* @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %str = getelementptr [12 x i8]* @hello, i32 0, i32 0
   %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
 
@@ -47,7 +47,7 @@ define i8* @test_simplify3() {
 ; Check strpbrk(s, "a") -> strchr(s, 'a').
 
 define i8* @test_simplify4(i8* %str) {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
 
   %ret = call i8* @strpbrk(i8* %str, i8* %pat)
@@ -59,7 +59,7 @@ define i8* @test_simplify4(i8* %str) {
 ; Check cases that shouldn't be simplified.
 
 define i8* @test_no_simplify1(i8* %str, i8* %pat) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
 
   %ret = call i8* @strpbrk(i8* %str, i8* %pat)
 ; CHECK-NEXT: %ret = call i8* @strpbrk(i8* %str, i8* %pat)
diff --git a/test/Transforms/InstCombine/strpbrk-2.ll b/test/Transforms/InstCombine/strpbrk-2.ll
index 31ac2905df2c..b797d7a59527 100644
--- a/test/Transforms/InstCombine/strpbrk-2.ll
+++ b/test/Transforms/InstCombine/strpbrk-2.ll
@@ -12,7 +12,7 @@ declare i16* @strpbrk(i8*, i8*)
 ; Check that 'strpbrk' functions with the wrong prototype aren't simplified.
 
 define i16* @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %str = getelementptr [12 x i8]* @hello, i32 0, i32 0
   %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
 
diff --git a/test/Transforms/InstCombine/strrchr-1.ll b/test/Transforms/InstCombine/strrchr-1.ll
index 854ce45bffb2..4615f5f2bd25 100644
--- a/test/Transforms/InstCombine/strrchr-1.ll
+++ b/test/Transforms/InstCombine/strrchr-1.ll
@@ -42,8 +42,19 @@ define void @test_simplify3() {
   ret void
 }
 
+define void @test_simplify4() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
 define void @test_nosimplify1(i32 %chr) {
-; CHECK: @test_nosimplify1
+; CHECK-LABEL: @test_nosimplify1(
 ; CHECK: call i8* @strrchr
 ; CHECK: ret void
 
diff --git a/test/Transforms/InstCombine/strspn-1.ll b/test/Transforms/InstCombine/strspn-1.ll
index 393f88735bd4..ac940cce9e56 100644
--- a/test/Transforms/InstCombine/strspn-1.ll
+++ b/test/Transforms/InstCombine/strspn-1.ll
@@ -13,7 +13,7 @@ declare i64 @strspn(i8*, i8*)
 ; Check strspn(s, "") -> 0.
 
 define i64 @test_simplify1(i8* %str) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %pat = getelementptr [1 x i8]* @null, i32 0, i32 0
 
   %ret = call i64 @strspn(i8* %str, i8* %pat)
@@ -24,7 +24,7 @@ define i64 @test_simplify1(i8* %str) {
 ; Check strspn("", s) -> 0.
 
 define i64 @test_simplify2(i8* %pat) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %str = getelementptr [1 x i8]* @null, i32 0, i32 0
 
   %ret = call i64 @strspn(i8* %str, i8* %pat)
@@ -35,7 +35,7 @@ define i64 @test_simplify2(i8* %pat) {
 ; Check strspn(s1, s2), where s1 and s2 are constants.
 
 define i64 @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %str = getelementptr [6 x i8]* @abcba, i32 0, i32 0
   %pat = getelementptr [4 x i8]* @abc, i32 0, i32 0
 
@@ -47,7 +47,7 @@ define i64 @test_simplify3() {
 ; Check cases that shouldn't be simplified.
 
 define i64 @test_no_simplify1(i8* %str, i8* %pat) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
 
   %ret = call i64 @strspn(i8* %str, i8* %pat)
 ; CHECK-NEXT: %ret = call i64 @strspn(i8* %str, i8* %pat)
diff --git a/test/Transforms/InstCombine/strstr-1.ll b/test/Transforms/InstCombine/strstr-1.ll
index 81f52718747d..a946dd32779a 100644
--- a/test/Transforms/InstCombine/strstr-1.ll
+++ b/test/Transforms/InstCombine/strstr-1.ll
@@ -14,7 +14,7 @@ declare i8* @strstr(i8*, i8*)
 ; Check strstr(str, "") -> str.
 
 define i8* @test_simplify1(i8* %str) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %pat = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0
   %ret = call i8* @strstr(i8* %str, i8* %pat)
   ret i8* %ret
@@ -24,7 +24,7 @@ define i8* @test_simplify1(i8* %str) {
 ; Check strstr(str, "a") -> strchr(str, 'a').
 
 define i8* @test_simplify2(i8* %str) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %pat = getelementptr inbounds [2 x i8]* @.str1, i32 0, i32 0
   %ret = call i8* @strstr(i8* %str, i8* %pat)
   ret i8* %ret
@@ -34,7 +34,7 @@ define i8* @test_simplify2(i8* %str) {
 ; Check strstr("abcde", "bcd") -> "abcde" + 1.
 
 define i8* @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %str = getelementptr inbounds [6 x i8]* @.str2, i32 0, i32 0
   %pat = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0
   %ret = call i8* @strstr(i8* %str, i8* %pat)
@@ -45,7 +45,7 @@ define i8* @test_simplify3() {
 ; Check strstr(str, str) -> str.
 
 define i8* @test_simplify4(i8* %str) {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %ret = call i8* @strstr(i8* %str, i8* %str)
   ret i8* %ret
 ; CHECK-NEXT: ret i8* %str
@@ -54,7 +54,7 @@ define i8* @test_simplify4(i8* %str) {
 ; Check strstr(str, pat) == str -> strncmp(str, pat, strlen(str)) == 0.
 
 define i1 @test_simplify5(i8* %str, i8* %pat) {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %ret = call i8* @strstr(i8* %str, i8* %pat)
   %cmp = icmp eq i8* %ret, %str
   ret i1 %cmp
diff --git a/test/Transforms/InstCombine/strstr-2.ll b/test/Transforms/InstCombine/strstr-2.ll
index 5092f9b4f803..7b28ed0e691e 100644
--- a/test/Transforms/InstCombine/strstr-2.ll
+++ b/test/Transforms/InstCombine/strstr-2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 declare i8 @strstr(i8*, i8*)
 
 define i8 @test_no_simplify1(i8* %str) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   %pat = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
   %ret = call i8 @strstr(i8* %str, i8* %pat)
 ; CHECK-NEXT: call i8 @strstr
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
index 7139972fe043..fc35dddcae5a 100644
--- a/test/Transforms/InstCombine/strto-1.ll
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -5,77 +5,77 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 declare i64 @strtol(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtol(i8*, i8** nocapture, i32)
+; CHECK: declare i64 @strtol(i8* readonly, i8** nocapture, i32)
 
 declare double @strtod(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare double @strtod(i8*, i8** nocapture, i32)
+; CHECK: declare double @strtod(i8* readonly, i8** nocapture, i32)
 
 declare float @strtof(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare float @strtof(i8*, i8** nocapture, i32)
+; CHECK: declare float @strtof(i8* readonly, i8** nocapture, i32)
 
 declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoul(i8*, i8** nocapture, i32)
+; CHECK: declare i64 @strtoul(i8* readonly, i8** nocapture, i32)
 
 declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoll(i8*, i8** nocapture, i32)
+; CHECK: declare i64 @strtoll(i8* readonly, i8** nocapture, i32)
 
 declare double @strtold(i8* %s, i8** %endptr)
-; CHECK: declare double @strtold(i8*, i8** nocapture)
+; CHECK: declare double @strtold(i8* readonly, i8** nocapture)
 
 declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoull(i8*, i8** nocapture, i32)
+; CHECK: declare i64 @strtoull(i8* readonly, i8** nocapture, i32)
 
 define void @test_simplify1(i8* %x, i8** %endptr) {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   call i64 @strtol(i8* %x, i8** null, i32 10)
 ; CHECK-NEXT: call i64 @strtol(i8* nocapture %x, i8** null, i32 10)
   ret void
 }
 
 define void @test_simplify2(i8* %x, i8** %endptr) {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   call double @strtod(i8* %x, i8** null, i32 10)
 ; CHECK-NEXT: call double @strtod(i8* nocapture %x, i8** null, i32 10)
   ret void
 }
 
 define void @test_simplify3(i8* %x, i8** %endptr) {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   call float @strtof(i8* %x, i8** null, i32 10)
 ; CHECK-NEXT: call float @strtof(i8* nocapture %x, i8** null, i32 10)
   ret void
 }
 
 define void @test_simplify4(i8* %x, i8** %endptr) {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   call i64 @strtoul(i8* %x, i8** null, i32 10)
 ; CHECK-NEXT: call i64 @strtoul(i8* nocapture %x, i8** null, i32 10)
   ret void
 }
 
 define void @test_simplify5(i8* %x, i8** %endptr) {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   call i64 @strtoll(i8* %x, i8** null, i32 10)
 ; CHECK-NEXT: call i64 @strtoll(i8* nocapture %x, i8** null, i32 10)
   ret void
 }
 
 define void @test_simplify6(i8* %x, i8** %endptr) {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   call double @strtold(i8* %x, i8** null)
 ; CHECK-NEXT: call double @strtold(i8* nocapture %x, i8** null)
   ret void
 }
 
 define void @test_simplify7(i8* %x, i8** %endptr) {
-; CHECK: @test_simplify7
+; CHECK-LABEL: @test_simplify7(
   call i64 @strtoull(i8* %x, i8** null, i32 10)
 ; CHECK-NEXT: call i64 @strtoull(i8* nocapture %x, i8** null, i32 10)
   ret void
 }
 
 define void @test_no_simplify1(i8* %x, i8** %endptr) {
-; CHECK: @test_no_simplify1
+; CHECK-LABEL: @test_no_simplify1(
   call i64 @strtol(i8* %x, i8** %endptr, i32 10)
 ; CHECK-NEXT: call i64 @strtol(i8* %x, i8** %endptr, i32 10)
   ret void
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
index 33a771e6d8b6..c80e31ae3ddc 100644
--- a/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -24,7 +24,7 @@ entry:
 %struct.test2 = type { i32 (i8*, i32*, double*)** }
 
 define i32 (i8*, i32*, double*)*** @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: memcpy
 ; CHECK: ret
   %tmp = alloca %struct.test2, align 8
@@ -35,10 +35,12 @@ define i32 (i8*, i32*, double*)*** @test2() {
   ret i32 (i8*, i32*, double*)*** %tmp2
 }
 
-; CHECK: !0 = metadata !{metadata !"float", metadata !1}
+; CHECK: !0 = metadata !{metadata !1, metadata !1, i64 0}
+; CHECK: !1 = metadata !{metadata !"float", metadata !2}
 
 !0 = metadata !{metadata !"Simple C/C++ TBAA"}
 !1 = metadata !{metadata !"omnipotent char", metadata !0}
-!2 = metadata !{metadata !"float", metadata !0}
+!2 = metadata !{metadata !5, metadata !5, i64 0}
 !3 = metadata !{i64 0, i64 4, metadata !2}
 !4 = metadata !{i64 0, i64 8, null}
+!5 = metadata !{metadata !"float", metadata !0}
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index 1d14852bc803..e7aff00ba8d0 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -5,7 +5,7 @@ define i32 @test1(i32 %x) nounwind {
   %sub = sub i32 63, %and
   ret i32 %sub
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: and i32 %x, 31
 ; CHECK-NEXT: xor i32 %and, 63
 ; CHECK-NEXT: ret
@@ -18,7 +18,7 @@ define i32 @test2(i32 %x) nounwind {
   %sub = sub i32 31, %count
   ret i32 %sub
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ctlz
 ; CHECK-NEXT: xor i32 %count, 31
 ; CHECK-NEXT: ret
@@ -30,7 +30,7 @@ define i32 @test3(i32 %x) nounwind {
   %add = add i32 %sub, 42
   ret i32 %add
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: and i32 %x, 31
 ; CHECK-NEXT: sub i32 73, %and
 ; CHECK-NEXT: ret
@@ -41,7 +41,7 @@ define i32 @test4(i32 %x) nounwind {
   %add = add i32 %sub, 42
   ret i32 %add
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: add i32 %x, -2147483606
 ; CHECK-NEXT: ret
 }
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index b71ec8c98f83..36c523bd7b75 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -1,156 +1,156 @@
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 ; Optimize subtracts.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
-	%B = sub i32 %A, %A	
+	%B = sub i32 %A, %A
 	ret i32 %B
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
 define i32 @test2(i32 %A) {
-	%B = sub i32 %A, 0	
+	%B = sub i32 %A, 0
 	ret i32 %B
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test3(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sub i32 0, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 0, %B
 	ret i32 %C
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 %A
 }
 
 define i32 @test4(i32 %A, i32 %x) {
-	%B = sub i32 0, %A	
-	%C = sub i32 %x, %B	
+	%B = sub i32 0, %A
+	%C = sub i32 %x, %B
 	ret i32 %C
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: %C = add i32 %x, %A
 ; CHECK: ret i32 %C
 }
 
 define i32 @test5(i32 %A, i32 %B, i32 %C) {
-	%D = sub i32 %B, %C	
-	%E = sub i32 %A, %D	
+	%D = sub i32 %B, %C
+	%E = sub i32 %A, %D
 	ret i32 %E
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %D1 = sub i32 %C, %B
 ; CHECK: %E = add
 ; CHECK: ret i32 %E
 }
 
 define i32 @test6(i32 %A, i32 %B) {
-	%C = and i32 %A, %B	
-	%D = sub i32 %A, %C	
+	%C = and i32 %A, %B
+	%D = sub i32 %A, %C
 	ret i32 %D
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: xor i32 %B, -1
-; CHECK-NEXT: %D = and i32 
+; CHECK-NEXT: %D = and i32
 ; CHECK-NEXT: ret i32 %D
 }
 
 define i32 @test7(i32 %A) {
-	%B = sub i32 -1, %A	
+	%B = sub i32 -1, %A
 	ret i32 %B
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: %B = xor i32 %A, -1
 ; CHECK: ret i32 %B
 }
 
 define i32 @test8(i32 %A) {
-	%B = mul i32 9, %A	
-	%C = sub i32 %B, %A	
+	%B = mul i32 9, %A
+	%C = sub i32 %B, %A
 	ret i32 %C
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: %C = shl i32 %A, 3
 ; CHECK: ret i32 %C
 }
 
 define i32 @test9(i32 %A) {
-	%B = mul i32 3, %A	
-	%C = sub i32 %A, %B	
+	%B = mul i32 3, %A
+	%C = sub i32 %A, %B
 	ret i32 %C
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: %C = mul i32 %A, -2
 ; CHECK: ret i32 %C
 }
 
 define i32 @test10(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = sub i32 0, %B	
-	%E = mul i32 %C, %D	
+	%C = sub i32 0, %A
+	%D = sub i32 0, %B
+	%E = mul i32 %C, %D
 	ret i32 %E
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: %E = mul i32 %A, %B
 ; CHECK: ret i32 %E
 }
 
 define i32 @test10a(i32 %A) {
-	%C = sub i32 0, %A	
-	%E = mul i32 %C, 7	
+	%C = sub i32 0, %A
+	%E = mul i32 %C, 7
 	ret i32 %E
-; CHECK: @test10a
+; CHECK-LABEL: @test10a(
 ; CHECK: %E = mul i32 %A, -7
 ; CHECK: ret i32 %E
 }
 
 define i1 @test11(i8 %A, i8 %B) {
-	%C = sub i8 %A, %B	
-	%cD = icmp ne i8 %C, 0	
+	%C = sub i8 %A, %B
+	%cD = icmp ne i8 %C, 0
 	ret i1 %cD
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK: %cD = icmp ne i8 %A, %B
 ; CHECK: ret i1 %cD
 }
 
 define i32 @test12(i32 %A) {
-	%B = ashr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = ashr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK: %C = lshr i32 %A, 31
 ; CHECK: ret i32 %C
 }
 
 define i32 @test13(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = sub i32 0, %B	
+	%B = lshr i32 %A, 31
+	%C = sub i32 0, %B
 	ret i32 %C
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK: %C = ashr i32 %A, 31
 ; CHECK: ret i32 %C
 }
 
 define i32 @test14(i32 %A) {
-	%B = lshr i32 %A, 31	
-	%C = bitcast i32 %B to i32	
-	%D = sub i32 0, %C	
+	%B = lshr i32 %A, 31
+	%C = bitcast i32 %B to i32
+	%D = sub i32 0, %C
 	ret i32 %D
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK: %D = ashr i32 %A, 31
 ; CHECK: ret i32 %D
 }
 
 define i32 @test15(i32 %A, i32 %B) {
-	%C = sub i32 0, %A	
-	%D = srem i32 %B, %C	
+	%C = sub i32 0, %A
+	%D = srem i32 %B, %C
 	ret i32 %D
-; CHECK: @test15
-; CHECK: %D = srem i32 %B, %A 
+; CHECK-LABEL: @test15(
+; CHECK: %D = srem i32 %B, %A
 ; CHECK: ret i32 %D
 }
 
 define i32 @test16(i32 %A) {
-	%X = sdiv i32 %A, 1123	
-	%Y = sub i32 0, %X	
+	%X = sdiv i32 %A, 1123
+	%Y = sub i32 0, %X
 	ret i32 %Y
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK: %Y = sdiv i32 %A, -1123
 ; CHECK: ret i32 %Y
 }
@@ -158,57 +158,57 @@ define i32 @test16(i32 %A) {
 ; Can't fold subtract here because negation it might oveflow.
 ; PR3142
 define i32 @test17(i32 %A) {
-	%B = sub i32 0, %A	
-	%C = sdiv i32 %B, 1234	
+	%B = sub i32 0, %A
+	%C = sdiv i32 %B, 1234
 	ret i32 %C
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK: %B = sub i32 0, %A
 ; CHECK: %C = sdiv i32 %B, 1234
 ; CHECK: ret i32 %C
 }
 
 define i64 @test18(i64 %Y) {
-	%tmp.4 = shl i64 %Y, 2	
-	%tmp.12 = shl i64 %Y, 2	
-	%tmp.8 = sub i64 %tmp.4, %tmp.12	
+	%tmp.4 = shl i64 %Y, 2
+	%tmp.12 = shl i64 %Y, 2
+	%tmp.8 = sub i64 %tmp.4, %tmp.12
 	ret i64 %tmp.8
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK: ret i64 0
 }
 
 define i32 @test19(i32 %X, i32 %Y) {
-	%Z = sub i32 %X, %Y	
-	%Q = add i32 %Z, %Y	
+	%Z = sub i32 %X, %Y
+	%Q = add i32 %Z, %Y
 	ret i32 %Q
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK: ret i32 %X
 }
 
 define i1 @test20(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g	
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
 	ret i1 %tmp.4
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
 ; CHECK: ret i1 %tmp.4
 }
 
 define i1 @test21(i32 %g, i32 %h) {
-	%tmp.2 = sub i32 %g, %h	
-	%tmp.4 = icmp ne i32 %tmp.2, %g		
+	%tmp.2 = sub i32 %g, %h
+	%tmp.4 = icmp ne i32 %tmp.2, %g
         ret i1 %tmp.4
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK: %tmp.4 = icmp ne i32 %h, 0
 ; CHECK: ret i1 %tmp.4
 }
 
 ; PR2298
 define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
-	%tmp2 = sub i32 0, %a	
-	%tmp4 = sub i32 0, %b	
-	%tmp5 = icmp eq i32 %tmp2, %tmp4	
+	%tmp2 = sub i32 0, %a
+	%tmp4 = sub i32 0, %b
+	%tmp5 = icmp eq i32 %tmp2, %tmp4
 	ret i1 %tmp5
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 ; CHECK: %tmp5 = icmp eq i32 %b, %a
 ; CHECK: ret i1 %tmp5
 }
@@ -222,32 +222,67 @@ define i32 @test23(i8* %P, i64 %A){
   %F = trunc i64 %E to i32
   %G = sub i32 %D, %F
   ret i32 %G
-; CHECK: @test23
+; CHECK-LABEL: @test23(
 ; CHECK-NEXT: = trunc i64 %A to i32
 ; CHECK-NEXT: ret i32
 }
 
+define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test23_as1
+; CHECK-NEXT: = trunc i16 %A to i8
+; CHECK-NEXT: ret i8
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %D = trunc i16 %C to i8
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %F = trunc i16 %E to i8
+  %G = sub i8 %D, %F
+  ret i8 %G
+}
+
 define i64 @test24(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
   %E = ptrtoint i8* %P to i64
   %G = sub i64 %C, %E
   ret i64 %G
-; CHECK: @test24
+; CHECK-LABEL: @test24(
 ; CHECK-NEXT: ret i64 %A
 }
 
+define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24_as1
+; CHECK-NEXT: ret i16 %A
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %C, %E
+  ret i16 %G
+}
+
 define i64 @test24a(i8* %P, i64 %A){
   %B = getelementptr inbounds i8* %P, i64 %A
   %C = ptrtoint i8* %B to i64
   %E = ptrtoint i8* %P to i64
   %G = sub i64 %E, %C
   ret i64 %G
-; CHECK: @test24a
+; CHECK-LABEL: @test24a(
 ; CHECK-NEXT: sub i64 0, %A
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
+define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK: @test24a_as1
+; CHECK-NEXT: sub i16 0, %A
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %E, %C
+  ret i16 %G
+}
+
+
 @Arr = external global [42 x i16]
 
 define i64 @test24b(i8* %P, i64 %A){
@@ -255,9 +290,9 @@ define i64 @test24b(i8* %P, i64 %A){
   %C = ptrtoint i16* %B to i64
   %G = sub i64 %C, ptrtoint ([42 x i16]* @Arr to i64)
   ret i64 %G
-; CHECK: @test24b
+; CHECK-LABEL: @test24b(
 ; CHECK-NEXT: shl nuw i64 %A, 1
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
 }
 
 
@@ -266,17 +301,31 @@ define i64 @test25(i8* %P, i64 %A){
   %C = ptrtoint i16* %B to i64
   %G = sub i64 %C, ptrtoint (i16* getelementptr ([42 x i16]* @Arr, i64 1, i64 0) to i64)
   ret i64 %G
-; CHECK: @test25
+; CHECK-LABEL: @test25(
 ; CHECK-NEXT: shl nuw i64 %A, 1
 ; CHECK-NEXT: add i64 {{.*}}, -84
-; CHECK-NEXT: ret i64 
+; CHECK-NEXT: ret i64
+}
+
+@Arr_as1 = external addrspace(1) global [42 x i16]
+
+define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
+; CHECK: @test25_as1
+; CHECK-NEXT: %1 = trunc i64 %A to i16
+; CHECK-NEXT: shl nuw i16 %1, 1
+; CHECK-NEXT: add i16 {{.*}}, -84
+; CHECK-NEXT: ret i16
+  %B = getelementptr inbounds [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A
+  %C = ptrtoint i16 addrspace(1)* %B to i16
+  %G = sub i16 %C, ptrtoint (i16 addrspace(1)* getelementptr ([42 x i16] addrspace(1)* @Arr_as1, i64 1, i64 0) to i16)
+  ret i16 %G
 }
 
 define i32 @test26(i32 %x) {
   %shl = shl i32 3, %x
   %neg = sub i32 0, %shl
   ret i32 %neg
-; CHECK: @test26
+; CHECK-LABEL: @test26(
 ; CHECK-NEXT: shl i32 -3
 ; CHECK-NEXT: ret i32
 }
@@ -285,7 +334,7 @@ define i32 @test27(i32 %x, i32 %y) {
   %mul = mul i32 %y, -8
   %sub = sub i32 %x, %mul
   ret i32 %sub
-; CHECK: @test27
+; CHECK-LABEL: @test27(
 ; CHECK-NEXT: shl i32 %y, 3
 ; CHECK-NEXT: add i32
 ; CHECK-NEXT: ret i32
@@ -296,7 +345,7 @@ define i32 @test28(i32 %x, i32 %y, i32 %z) {
   %mul = mul i32 %neg, %y
   %sub = sub i32 %x, %mul
   ret i32 %sub
-; CHECK: @test28
+; CHECK-LABEL: @test28(
 ; CHECK-NEXT: mul i32 %z, %y
 ; CHECK-NEXT: add i32
 ; CHECK-NEXT: ret i32
@@ -309,7 +358,7 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) {
   %cast2 = ptrtoint i8* %gep2 to i64
   %sub = sub i64 %cast1, %cast2
   ret i64 %sub
-; CHECK: @test29
+; CHECK-LABEL: @test29(
 ; CHECK-NEXT: sub i64 %i, %j
 ; CHECK-NEXT: ret i64
 }
@@ -322,8 +371,24 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) {
   %cast2 = ptrtoint i8* %gep2 to i64
   %sub = sub i64 %cast1, %cast2
   ret i64 %sub
-; CHECK: @test30
+; CHECK-LABEL: @test30(
 ; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
 ; CHECK-NEXT: sub i64 %gep1.idx, %j
 ; CHECK-NEXT: ret i64
 }
+
+define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test30_as1(
+; CHECK-NEXT: %gep1.idx = shl nuw i16 %i, 2
+; CHECK-NEXT: sub i16 %gep1.idx, %j
+; CHECK-NEXT: ret i16
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8 addrspace(1)* %foo, i16 %j
+  %cast1 = ptrtoint i32 addrspace(1)* %gep1 to i16
+  %cast2 = ptrtoint i8 addrspace(1)* %gep2 to i16
+  %sub = sub i16 %cast1, %cast2
+  ret i16 %sub
+}
+
+
diff --git a/test/Transforms/InstCombine/toascii-1.ll b/test/Transforms/InstCombine/toascii-1.ll
index c4a13e229393..f5e18983e122 100644
--- a/test/Transforms/InstCombine/toascii-1.ll
+++ b/test/Transforms/InstCombine/toascii-1.ll
@@ -9,49 +9,49 @@ declare i32 @toascii(i32)
 ; Check isascii(c) -> c & 0x7f.
 
 define i32 @test_simplify1() {
-; CHECK: @test_simplify1
+; CHECK-LABEL: @test_simplify1(
   %ret = call i32 @toascii(i32 0)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test_simplify2() {
-; CHECK: @test_simplify2
+; CHECK-LABEL: @test_simplify2(
   %ret = call i32 @toascii(i32 1)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 1
 }
 
 define i32 @test_simplify3() {
-; CHECK: @test_simplify3
+; CHECK-LABEL: @test_simplify3(
   %ret = call i32 @toascii(i32 127)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 127
 }
 
 define i32 @test_simplify4() {
-; CHECK: @test_simplify4
+; CHECK-LABEL: @test_simplify4(
   %ret = call i32 @toascii(i32 128)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test_simplify5() {
-; CHECK: @test_simplify5
+; CHECK-LABEL: @test_simplify5(
   %ret = call i32 @toascii(i32 255)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 127
 }
 
 define i32 @test_simplify6() {
-; CHECK: @test_simplify6
+; CHECK-LABEL: @test_simplify6(
   %ret = call i32 @toascii(i32 256)
   ret i32 %ret
 ; CHECK-NEXT: ret i32 0
 }
 
 define i32 @test_simplify7(i32 %x) {
-; CHECK: @test_simplify7
+; CHECK-LABEL: @test_simplify7(
   %ret = call i32 @toascii(i32 %x)
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 127
   ret i32 %ret
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index cbbad7f79777..ee81cf8c3c5d 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -11,7 +11,7 @@ define i64 @test1(i64 %a) {
   %d = zext i32 %c to i64
   call void @use(i32 %b)
   ret i64 %d
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: ext
 ; CHECK: ret
 }
@@ -22,7 +22,7 @@ define i64 @test2(i64 %a) {
   %d = sext i32 %q to i64
   call void @use(i32 %b)
   ret i64 %d
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: shl i64 %a, 36
 ; CHECK: %d = ashr exact i64 {{.*}}, 36
 ; CHECK: ret i64 %d
@@ -33,7 +33,7 @@ define i64 @test3(i64 %a) {
   %d = zext i32 %c to i64
   call void @use(i32 %b)
   ret i64 %d
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: ext
 ; CHECK: ret
 }
@@ -44,7 +44,7 @@ define i64 @test4(i64 %a) {
   %d = zext i32 %x to i64
   call void @use(i32 %b)
   ret i64 %d
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: = and i64 %a, 8
 ; CHECK: = xor i64 {{.*}}, 8
 ; CHECK-NOT: ext
@@ -56,7 +56,7 @@ define i32 @test5(i32 %A) {
   %C = lshr i128 %B, 16
   %D = trunc i128 %C to i32
   ret i32 %D
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: %C = lshr i32 %A, 16
 ; CHECK: ret i32 %C
 }
@@ -66,7 +66,7 @@ define i32 @test6(i64 %A) {
   %C = lshr i128 %B, 32
   %D = trunc i128 %C to i32
   ret i32 %D
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: %C = lshr i64 %A, 32
 ; CHECK: %D = trunc i64 %C to i32
 ; CHECK: ret i32 %D
@@ -77,7 +77,7 @@ define i92 @test7(i64 %A) {
   %C = lshr i128 %B, 32
   %D = trunc i128 %C to i92
   ret i92 %D
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: %B = zext i64 %A to i92
 ; CHECK: %C = lshr i92 %B, 32
 ; CHECK: ret i92 %C
@@ -90,7 +90,7 @@ define i64 @test8(i32 %A, i32 %B) {
   %ins35 = or i128 %tmp33, %tmp38
   %tmp42 = trunc i128 %ins35 to i64
   ret i64 %tmp42
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK:   %tmp38 = zext i32 %A to i64
 ; CHECK:   %tmp32 = zext i32 %B to i64
 ; CHECK:   %tmp33 = shl nuw i64 %tmp32, 32
@@ -102,7 +102,7 @@ define i8 @test9(i32 %X) {
   %Y = and i32 %X, 42
   %Z = trunc i32 %Y to i8
   ret i8 %Z
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: trunc
 ; CHECK: and
 ; CHECK: ret
@@ -113,7 +113,7 @@ define i8 @test10(i32 %X) {
   %Y = trunc i32 %X to i8
   %Z = and i8 %Y, 42
   ret i8 %Z
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: trunc
 ; CHECK: and
 ; CHECK: ret
diff --git a/test/Transforms/InstCombine/udivrem-change-width.ll b/test/Transforms/InstCombine/udivrem-change-width.ll
index b388a3b0634e..478e9ca387fc 100644
--- a/test/Transforms/InstCombine/udivrem-change-width.ll
+++ b/test/Transforms/InstCombine/udivrem-change-width.ll
@@ -9,7 +9,7 @@ define i8 @udiv_i8(i8 %a, i8 %b) nounwind {
   %div = udiv i32 %conv, %conv2   
   %conv3 = trunc i32 %div to i8   
   ret i8 %conv3
-; CHECK: @udiv_i8
+; CHECK-LABEL: @udiv_i8(
 ; CHECK: udiv i8 %a, %b
 }
 
@@ -19,7 +19,7 @@ define i8 @urem_i8(i8 %a, i8 %b) nounwind {
   %div = urem i32 %conv, %conv2   
   %conv3 = trunc i32 %div to i8   
   ret i8 %conv3
-; CHECK: @urem_i8
+; CHECK-LABEL: @urem_i8(
 ; CHECK: urem i8 %a, %b
 }
 
@@ -28,7 +28,7 @@ define i32 @udiv_i32(i8 %a, i8 %b) nounwind {
   %conv2 = zext i8 %b to i32
   %div = udiv i32 %conv, %conv2
   ret i32 %div
-; CHECK: @udiv_i32
+; CHECK-LABEL: @udiv_i32(
 ; CHECK: udiv i8 %a, %b
 ; CHECK: zext
 }
@@ -38,7 +38,7 @@ define i32 @urem_i32(i8 %a, i8 %b) nounwind {
   %conv2 = zext i8 %b to i32
   %div = urem i32 %conv, %conv2
   ret i32 %div
-; CHECK: @urem_i32
+; CHECK-LABEL: @urem_i32(
 ; CHECK: urem i8 %a, %b
 ; CHECK: zext
 }
@@ -47,7 +47,7 @@ define i32 @udiv_i32_c(i8 %a) nounwind {
   %conv = zext i8 %a to i32
   %div = udiv i32 %conv, 10
   ret i32 %div
-; CHECK: @udiv_i32_c
+; CHECK-LABEL: @udiv_i32_c(
 ; CHECK: udiv i8 %a, 10
 ; CHECK: zext
 }
@@ -56,7 +56,7 @@ define i32 @urem_i32_c(i8 %a) nounwind {
   %conv = zext i8 %a to i32
   %div = urem i32 %conv, 10
   ret i32 %div
-; CHECK: @urem_i32_c
+; CHECK-LABEL: @urem_i32_c(
 ; CHECK: urem i8 %a, 10
 ; CHECK: zext
 }
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 0019a57627cb..d12412a92977 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -2,7 +2,7 @@
 
 define i16 @test1(float %f) {
 entry:
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: fmul float
 ; CHECK-NOT: insertelement {{.*}} 0.00
 ; CHECK-NOT: call {{.*}} @llvm.x86.sse.mul
@@ -22,7 +22,7 @@ entry:
 }
 
 define i32 @test2(float %f) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: insertelement
 ; CHECK-NOT: extractelement
 ; CHECK: ret
@@ -37,7 +37,7 @@ define i32 @test2(float %f) {
 }
 
 define i64 @test3(float %f, double %d) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: insertelement {{.*}} 0.00
 ; CHECK: ret
 entry:
@@ -85,7 +85,7 @@ entry:
 }
 
 define void @get_image() nounwind {
-; CHECK: @get_image
+; CHECK-LABEL: @get_image(
 ; CHECK-NOT: extractelement
 ; CHECK: unreachable
 entry:
@@ -105,7 +105,7 @@ bb3:            ; preds = %bb2, %entry
 
 ; PR4340
 define void @vac(<4 x float>* nocapture %a) nounwind {
-; CHECK: @vac
+; CHECK-LABEL: @vac(
 ; CHECK-NOT: load
 ; CHECK: ret
 entry:
@@ -155,7 +155,7 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
 
 define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
 entry:
-; CHECK: define <4 x float> @dead_shuffle_elt
+; CHECK-LABEL: define <4 x float> @dead_shuffle_elt(
 ; CHECK: shufflevector <2 x float> %y, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   %shuffle9.i = shufflevector <4 x float> %x, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@@ -163,7 +163,7 @@ entry:
 }
 
 define <2 x float> @test_fptrunc(double %f) {
-; CHECK: @test_fptrunc
+; CHECK-LABEL: @test_fptrunc(
 ; CHECK: insertelement
 ; CHECK: insertelement
 ; CHECK-NOT: insertelement
@@ -177,7 +177,7 @@ define <2 x float> @test_fptrunc(double %f) {
 }
 
 define <2 x double> @test_fpext(float %f) {
-; CHECK: @test_fpext
+; CHECK-LABEL: @test_fpext(
 ; CHECK: insertelement
 ; CHECK: insertelement
 ; CHECK-NOT: insertelement
@@ -191,7 +191,7 @@ define <2 x double> @test_fpext(float %f) {
 }
 
 define <4 x float> @test_select(float %f, float %g) {
-; CHECK: @test_select
+; CHECK-LABEL: @test_select(
 ; CHECK: %a0 = insertelement <4 x float> undef, float %f, i32 0
 ; CHECK-NOT: insertelement
 ; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 166066a201bf..3daf72ede509 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | not grep extractelement
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: extractelement
 
 define i32 @test(float %f) {
         %tmp7 = insertelement <4 x float> undef, float %f, i32 0                ; <<4 x float>> [#uses=1]
diff --git a/test/Transforms/InstCombine/vec_insertelt.ll b/test/Transforms/InstCombine/vec_insertelt.ll
index e35fa5e551fb..3b949209c4d7 100644
--- a/test/Transforms/InstCombine/vec_insertelt.ll
+++ b/test/Transforms/InstCombine/vec_insertelt.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | grep "ret <4 x i32> %A"
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: ret <4 x i32> %A
 
 ; PR1286
 define <4 x i32> @test1(<4 x i32> %A) {
diff --git a/test/Transforms/InstCombine/vec_phi_extract.ll b/test/Transforms/InstCombine/vec_phi_extract.ll
index 2f10fc2c1ed2..73ec1f1bb04f 100644
--- a/test/Transforms/InstCombine/vec_phi_extract.ll
+++ b/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -25,3 +25,28 @@ ret:
   ret void
 }
 
+define i1 @g(<3 x i32> %input_2) {
+; CHECK: extractelement
+entry:
+  br label %for.cond
+
+for.cond:
+; CHECK: phi i32
+  %input_2.addr.0 = phi <3 x i32> [ %input_2, %entry ], [ %div45, %for.body ]
+  %input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ]
+  br i1 undef, label %for.end, label %for.body
+
+; CHECK extractelement
+for.body:
+  %dec43 = add <3 x i32> %input_1.addr.1, <i32 -1, i32 -1, i32 -1>
+  %sub44 = sub <3 x i32> zeroinitializer, %dec43
+  %div45 = sdiv <3 x i32> %input_2.addr.0, %sub44
+  br label %for.cond
+
+for.end:
+  %0 = extractelement <3 x i32> %input_2.addr.0, i32 0
+  %.89 = select i1 false, i32 0, i32 %0
+  %tobool313 = icmp eq i32 %.89, 0
+  ret i1 %tobool313
+}
+
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 8f78c2e6bd50..3ee43dc63a6f 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1,21 +1,21 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define <4 x float> @test1(<4 x float> %v1) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret <4 x float> %v1
   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x float> %v2
 }
 
 define <4 x float> @test2(<4 x float> %v1) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret <4 x float> %v1
   %v2 = shufflevector <4 x float> %v1, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %v2
 }
 
 define float @test3(<4 x float> %A, <4 x float> %B, float %f) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret float %f
         %C = insertelement <4 x float> %A, float %f, i32 0
         %D = shufflevector <4 x float> %C, <4 x float> %B, <4 x i32> <i32 5, i32 0, i32 2, i32 7>
@@ -24,7 +24,7 @@ define float @test3(<4 x float> %A, <4 x float> %B, float %f) {
 }
 
 define i32 @test4(<4 x i32> %X) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: extractelement
 ; CHECK-NEXT: ret 
         %tmp152.i53899.i = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -33,7 +33,7 @@ define i32 @test4(<4 x i32> %X) {
 }
 
 define i32 @test5(<4 x i32> %X) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: extractelement
 ; CHECK-NEXT: ret 
         %tmp152.i53899.i = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 undef>
@@ -42,7 +42,7 @@ define i32 @test5(<4 x i32> %X) {
 }
 
 define float @test6(<4 x float> %X) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: extractelement
 ; CHECK-NEXT: ret 
         %X1 = bitcast <4 x float> %X to <4 x i32>
@@ -53,7 +53,7 @@ define float @test6(<4 x float> %X) {
 }
 
 define <4 x float> @test7(<4 x float> %tmp45.i) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: ret <4 x float> %tmp45.i
         %tmp1642.i = shufflevector <4 x float> %tmp45.i, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >
         ret <4 x float> %tmp1642.i
@@ -61,7 +61,7 @@ define <4 x float> @test7(<4 x float> %tmp45.i) {
 
 ; This should turn into a single shuffle.
 define <4 x float> @test8(<4 x float> %tmp, <4 x float> %tmp1) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
         %tmp4 = extractelement <4 x float> %tmp, i32 1
@@ -77,7 +77,7 @@ define <4 x float> @test8(<4 x float> %tmp, <4 x float> %tmp1) {
 ; Test fold of two shuffles where the first shuffle vectors inputs are a
 ; different length then the second.
 define <4 x i8> @test9(<16 x i8> %tmp6) nounwind {
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
 	%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 >		; <<4 x i8>> [#uses=1]
@@ -89,7 +89,7 @@ define <4 x i8> @test9(<16 x i8> %tmp6) nounwind {
 ; mask values of 2*N, where N is the mask length.  These shuffles should not
 ; be folded (because [8,9,4,8] may not be a mask supported by the target).
 define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind {
-; CHECK: @test9a
+; CHECK-LABEL: @test9a(
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
@@ -101,7 +101,7 @@ define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind {
 ; Test fold of two shuffles where the first shuffle vectors inputs are a
 ; different length then the second.
 define <4 x i8> @test9b(<4 x i8> %tmp6, <4 x i8> %tmp7) nounwind {
-; CHECK: @test9
+; CHECK-LABEL: @test9b(
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
   %tmp1 = shufflevector <4 x i8> %tmp6, <4 x i8> %tmp7, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>		; <<4 x i8>> [#uses=1]
@@ -111,7 +111,7 @@ define <4 x i8> @test9b(<4 x i8> %tmp6, <4 x i8> %tmp7) nounwind {
 
 ; Redundant vector splats should be removed.  Radar 8597790.
 define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: ret
   %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -122,7 +122,7 @@ define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
 ; Test fold of two shuffles where the two shufflevector inputs's op1 are
 ; the same
 define <8 x i8> @test11(<16 x i8> %tmp6) nounwind {
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NEXT: shufflevector <16 x i8> %tmp6, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT: ret
   %tmp1 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>		; <<4 x i8>> [#uses=1]
@@ -134,7 +134,7 @@ define <8 x i8> @test11(<16 x i8> %tmp6) nounwind {
 ; Test fold of two shuffles where the first shufflevector's inputs are
 ; the same as the second
 define <8 x i8> @test12(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NEXT: shufflevector <8 x i8> %tmp6, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
 ; CHECK-NEXT: ret
   %tmp1 = shufflevector <8 x i8> %tmp6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>	; <<8 x i8>> [#uses=1]
@@ -145,7 +145,7 @@ define <8 x i8> @test12(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
 ; Test fold of two shuffles where the first shufflevector's inputs are
 ; the same as the second
 define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
-; CHECK: @test12a
+; CHECK-LABEL: @test12a(
 ; CHECK-NEXT: shufflevector <8 x i8> %tmp2, <8 x i8> %tmp6, <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT: ret
   %tmp1 = shufflevector <8 x i8> %tmp6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>	; <<8 x i8>> [#uses=1]
@@ -153,3 +153,78 @@ define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
   ret <8 x i8> %tmp3
 }
 
+define <2 x i8> @test13a(i8 %x1, i8 %x2) {
+; CHECK-LABEL: @test13a(
+; CHECK-NEXT: insertelement {{.*}} undef, i8 %x1, i32 1
+; CHECK-NEXT: insertelement {{.*}} i8 %x2, i32 0
+; CHECK-NEXT: add {{.*}} <i8 7, i8 5>
+; CHECK-NEXT: ret
+  %A = insertelement <2 x i8> undef, i8 %x1, i32 0
+  %B = insertelement <2 x i8> %A, i8 %x2, i32 1
+  %C = add <2 x i8> %B, <i8 5, i8 7>
+  %D = shufflevector <2 x i8> %C, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x i8> %D
+}
+
+define <2 x i8> @test13b(i8 %x) {
+; CHECK-LABEL: @test13b(
+; CHECK-NEXT: insertelement <2 x i8> undef, i8 %x, i32 1
+; CHECK-NEXT: ret
+  %A = insertelement <2 x i8> undef, i8 %x, i32 0
+  %B = shufflevector <2 x i8> %A, <2 x i8> undef, <2 x i32> <i32 undef, i32 0>
+  ret <2 x i8> %B
+}
+
+define <2 x i8> @test13c(i8 %x1, i8 %x2) {
+; CHECK-LABEL: @test13c(
+; CHECK-NEXT: insertelement <2 x i8> {{.*}}, i32 0
+; CHECK-NEXT: insertelement <2 x i8> {{.*}}, i32 1
+; CHECK-NEXT: ret
+  %A = insertelement <4 x i8> undef, i8 %x1, i32 0
+  %B = insertelement <4 x i8> %A, i8 %x2, i32 2
+  %C = shufflevector <4 x i8> %B, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x i8> %C
+}
+
+define void @test14(i16 %conv10) {
+  %tmp = alloca <4 x i16>, align 8
+  %vecinit6 = insertelement <4 x i16> undef, i16 23, i32 3
+  store <4 x i16> %vecinit6, <4 x i16>* undef
+  %tmp1 = load <4 x i16>* undef
+  %vecinit11 = insertelement <4 x i16> undef, i16 %conv10, i32 3
+  %div = udiv <4 x i16> %tmp1, %vecinit11
+  store <4 x i16> %div, <4 x i16>* %tmp
+  %tmp4 = load <4 x i16>* %tmp
+  %tmp5 = shufflevector <4 x i16> %tmp4, <4 x i16> undef, <2 x i32> <i32 2, i32 0>
+  %cmp = icmp ule <2 x i16> %tmp5, undef
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  ret void
+}
+
+; Check that sequences of insert/extract element are 
+; collapsed into valid shuffle instruction with correct shuffle indexes.
+ 
+define <4 x float> @test15a(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15a
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 0, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp4
+  %tmp1 = extractelement <4 x float> %LHS, i32 0
+  %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1
+  %tmp3 = extractelement <4 x float> %RHS, i32 2
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3
+  ret <4 x float> %tmp4
+}
+ 
+define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15b
+; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x float> %tmp5
+  %tmp0 = extractelement <4 x float> %LHS, i32 3
+  %tmp1 = insertelement <4 x float> %RHS, float %tmp0, i32 0
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  %tmp3 = insertelement <4 x float> %RHS, float %tmp2, i32 1
+  %tmp4 = extractelement <4 x float> %RHS, i32 2
+  %tmp5 = insertelement <4 x float> %tmp3, float %tmp4, i32 3
+  ret <4 x float> %tmp5
+}
+
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 2f2990b7b055..ca97b3407da6 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -5,7 +5,7 @@ define <2 x i1> @test1(<2 x i64> %a) {
   %t = trunc <2 x i64> %a to <2 x i1>
   ret <2 x i1> %t
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK:   and <2 x i64> %a, <i64 1, i64 1>
 ; CHECK:   icmp ne <2 x i64> %1, zeroinitializer
 }
@@ -16,7 +16,7 @@ define <2 x i64> @test2(<2 x i64> %a) {
   %t = ashr <2 x i64> %b, <i64 1, i64 1>
   ret <2 x i64> %t
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK:   and <2 x i64> %a, <i64 65535, i64 65535>
 ; CHECK:   lshr <2 x i64> %b, <i64 1, i64 1>
 }
@@ -33,7 +33,7 @@ entry:
 	%conv = bitcast <4 x i32> %and to <2 x i64>
 	ret <2 x i64> %conv
         
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK:   fcmp ord <4 x float> %a, %b
 }
 
@@ -46,7 +46,7 @@ entry:
 	%or = or <4 x i32> %sext, %sext5
 	%conv = bitcast <4 x i32> %or to <2 x i64>
 	ret <2 x i64> %conv
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK:   fcmp uno <4 x float> %a, %b
 }
 
@@ -62,7 +62,7 @@ entry:
 	%conv = bitcast <4 x i32> %and to <2 x i64>
 	ret <2 x i64> %conv
         
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK:   sext <4 x i1> %cmp to <4 x i32>	
 ; The sext-and pair is canonicalized to a select.
 ; CHECK:   select <4 x i1> %cmp4, <4 x i32>	%sext, <4 x i32> zeroinitializer
@@ -126,7 +126,7 @@ define <2 x double> @fc(<2 x double> %t) {
 ; PR9228
 ; This was a crasher, so no CHECK statements.
 define <4 x float> @f(i32 %a) nounwind alwaysinline {
-; CHECK: @f
+; CHECK-LABEL: @f(
 entry:
   %dim = insertelement <4 x i32> undef, i32 %a, i32 0
   %dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1
diff --git a/test/Transforms/InstCombine/vector-mul.ll b/test/Transforms/InstCombine/vector-mul.ll
new file mode 100644
index 000000000000..284d407ce33f
--- /dev/null
+++ b/test/Transforms/InstCombine/vector-mul.ll
@@ -0,0 +1,408 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check that instcombine rewrites multiply by a vector
+; of known constant power-of-2 elements with vector shift.
+
+define <4 x i8> @Zero_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 0, i8 0, i8 0, i8 0>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @Zero_i8(
+; CHECK: ret <4 x i8> zeroinitializer
+
+define <4 x i8> @Identity_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 1, i8 1, i8 1, i8 1>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @Identity_i8(
+; CHECK: ret <4 x i8> %InVec
+
+define <4 x i8> @AddToSelf_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @AddToSelf_i8(
+; CHECK: shl <4 x i8> %InVec, <i8 1, i8 1, i8 1, i8 1>
+; CHECK: ret
+
+define <4 x i8> @SplatPow2Test1_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 4, i8 4>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test1_i8(
+; CHECK: shl <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+; CHECK: ret
+
+define <4 x i8> @SplatPow2Test2_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 8, i8 8, i8 8, i8 8>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test2_i8(
+; CHECK: shl <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3>
+; CHECK: ret
+
+define <4 x i8> @MulTest1_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 1, i8 2, i8 4, i8 8>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @MulTest1_i8(
+; CHECK: shl <4 x i8> %InVec, <i8 0, i8 1, i8 2, i8 3>
+; CHECK: ret
+
+define <4 x i8> @MulTest2_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @MulTest2_i8(
+; CHECK: mul <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3>
+; CHECK: ret
+
+define <4 x i8> @MulTest3_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 2, i8 2>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @MulTest3_i8(
+; CHECK: shl <4 x i8> %InVec, <i8 2, i8 2, i8 1, i8 1>
+; CHECK: ret
+
+
+define <4 x i8> @MulTest4_i8(<4 x i8> %InVec)  {
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 0, i8 1>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @MulTest4_i8(
+; CHECK: mul <4 x i8> %InVec, <i8 4, i8 4, i8 0, i8 1>
+; CHECK: ret
+
+define <4 x i16> @Zero_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @Zero_i16(
+; CHECK: ret <4 x i16> zeroinitializer
+
+define <4 x i16> @Identity_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @Identity_i16(
+; CHECK: ret <4 x i16> %InVec
+
+define <4 x i16> @AddToSelf_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @AddToSelf_i16(
+; CHECK: shl <4 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1>
+; CHECK: ret
+
+define <4 x i16> @SplatPow2Test1_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 4, i16 4>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test1_i16(
+; CHECK: shl <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+; CHECK: ret
+
+define <4 x i16> @SplatPow2Test2_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 8, i16 8, i16 8, i16 8>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test2_i16(
+; CHECK: shl <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3>
+; CHECK: ret
+
+define <4 x i16> @MulTest1_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 1, i16 2, i16 4, i16 8>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @MulTest1_i16(
+; CHECK: shl <4 x i16> %InVec, <i16 0, i16 1, i16 2, i16 3>
+; CHECK: ret
+
+define <4 x i16> @MulTest2_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @MulTest2_i16(
+; CHECK: mul <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3>
+; CHECK: ret
+
+define <4 x i16> @MulTest3_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 2, i16 2>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @MulTest3_i16(
+; CHECK: shl <4 x i16> %InVec, <i16 2, i16 2, i16 1, i16 1>
+; CHECK: ret
+
+define <4 x i16> @MulTest4_i16(<4 x i16> %InVec)  {
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 0, i16 2>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @MulTest4_i16(
+; CHECK: mul <4 x i16> %InVec, <i16 4, i16 4, i16 0, i16 2>
+; CHECK: ret
+
+define <4 x i32> @Zero_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @Zero_i32(
+; CHECK: ret <4 x i32> zeroinitializer
+
+define <4 x i32> @Identity_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @Identity_i32(
+; CHECK: ret <4 x i32> %InVec
+
+define <4 x i32> @AddToSelf_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @AddToSelf_i32(
+; CHECK: shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+; CHECK: ret
+
+
+define <4 x i32> @SplatPow2Test1_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 4, i32 4>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test1_i32(
+; CHECK: shl <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+; CHECK: ret
+
+define <4 x i32> @SplatPow2Test2_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test2_i32(
+; CHECK: shl <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3>
+; CHECK: ret
+
+define <4 x i32> @MulTest1_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 1, i32 2, i32 4, i32 8>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @MulTest1_i32(
+; CHECK: shl <4 x i32> %InVec, <i32 0, i32 1, i32 2, i32 3>
+; CHECK: ret
+
+define <4 x i32> @MulTest2_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @MulTest2_i32(
+; CHECK: mul <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3>
+; CHECK: ret
+
+define <4 x i32> @MulTest3_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 2, i32 2>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @MulTest3_i32(
+; CHECK: shl <4 x i32> %InVec, <i32 2, i32 2, i32 1, i32 1>
+; CHECK: ret
+
+
+define <4 x i32> @MulTest4_i32(<4 x i32> %InVec)  {
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 0, i32 1>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @MulTest4_i32(
+; CHECK: mul <4 x i32> %InVec, <i32 4, i32 4, i32 0, i32 1>
+; CHECK: ret
+
+define <4 x i64> @Zero_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @Zero_i64(
+; CHECK: ret <4 x i64> zeroinitializer
+
+define <4 x i64> @Identity_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @Identity_i64(
+; CHECK: ret <4 x i64> %InVec
+
+define <4 x i64> @AddToSelf_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @AddToSelf_i64(
+; CHECK: shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+; CHECK: ret
+
+define <4 x i64> @SplatPow2Test1_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 4, i64 4>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test1_i64(
+; CHECK: shl <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+; CHECK: ret
+
+define <4 x i64> @SplatPow2Test2_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 8, i64 8, i64 8, i64 8>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @SplatPow2Test2_i64(
+; CHECK: shl <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3>
+; CHECK: ret
+
+define <4 x i64> @MulTest1_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 1, i64 2, i64 4, i64 8>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @MulTest1_i64(
+; CHECK: shl <4 x i64> %InVec, <i64 0, i64 1, i64 2, i64 3>
+; CHECK: ret
+
+define <4 x i64> @MulTest2_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @MulTest2_i64(
+; CHECK: mul <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3>
+; CHECK: ret
+
+define <4 x i64> @MulTest3_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 2, i64 2>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @MulTest3_i64(
+; CHECK: shl <4 x i64> %InVec, <i64 2, i64 2, i64 1, i64 1>
+; CHECK: ret
+
+define <4 x i64> @MulTest4_i64(<4 x i64> %InVec)  {
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 0, i64 1>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @MulTest4_i64(
+; CHECK: mul <4 x i64> %InVec, <i64 4, i64 4, i64 0, i64 1>
+; CHECK: ret
+
+; Test also that the following rewriting rule works with vectors
+; of integers as well:
+;   ((X << C1)*C2) == (X * (C2 << C1))
+
+define <4 x i8> @ShiftMulTest1(<4 x i8> %InVec) {
+entry:
+  %shl = shl <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+  %mul = mul <4 x i8> %shl, <i8 3, i8 3, i8 3, i8 3>
+  ret <4 x i8> %mul
+}
+
+; CHECK-LABEL: @ShiftMulTest1(
+; CHECK: mul <4 x i8> %InVec, <i8 12, i8 12, i8 12, i8 12>
+; CHECK: ret
+
+define <4 x i16> @ShiftMulTest2(<4 x i16> %InVec) {
+entry:
+  %shl = shl <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+  %mul = mul <4 x i16> %shl, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %mul
+}
+
+; CHECK-LABEL: @ShiftMulTest2(
+; CHECK: mul <4 x i16> %InVec, <i16 12, i16 12, i16 12, i16 12>
+; CHECK: ret
+
+define <4 x i32> @ShiftMulTest3(<4 x i32> %InVec) {
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+  %mul = mul <4 x i32> %shl, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %mul
+}
+
+; CHECK-LABEL: @ShiftMulTest3(
+; CHECK: mul <4 x i32> %InVec, <i32 12, i32 12, i32 12, i32 12>
+; CHECK: ret
+
+define <4 x i64> @ShiftMulTest4(<4 x i64> %InVec) {
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+  %mul = mul <4 x i64> %shl, <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %mul
+}
+
+; CHECK-LABEL: @ShiftMulTest4(
+; CHECK: mul <4 x i64> %InVec, <i64 12, i64 12, i64 12, i64 12>
+; CHECK: ret
+
diff --git a/test/Transforms/InstCombine/vector_gep2.ll b/test/Transforms/InstCombine/vector_gep2.ll
index 20165b110016..42057d60b8c8 100644
--- a/test/Transforms/InstCombine/vector_gep2.ll
+++ b/test/Transforms/InstCombine/vector_gep2.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 define <2 x i8*> @testa(<2 x i8*> %a) {
-; CHECK: @testa
+; CHECK-LABEL: @testa(
   %g = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 1>
 ; CHECK: getelementptr <2 x i8*> %a, <2 x i64> <i64 0, i64 1>
   ret <2 x i8*> %g
diff --git a/test/Transforms/InstCombine/weak-symbols.ll b/test/Transforms/InstCombine/weak-symbols.ll
index 0039b5962f74..ec946ead8602 100644
--- a/test/Transforms/InstCombine/weak-symbols.ll
+++ b/test/Transforms/InstCombine/weak-symbols.ll
@@ -8,7 +8,7 @@
 @.str = private constant [2 x i8] c"y\00"
 
 define i32 @foo() nounwind {
-; CHECK: define i32 @foo
+; CHECK-LABEL: define i32 @foo(
 ; CHECK: call i32 @strcmp
 ; CHECK: ret i32 %temp1
 
@@ -20,7 +20,7 @@ entry:
 }
 
 define i32 @bar() nounwind {
-; CHECK: define i32 @bar
+; CHECK-LABEL: define i32 @bar(
 ; CHECK: ret i32 0
 
 entry:
diff --git a/test/Transforms/InstCombine/win-math.ll b/test/Transforms/InstCombine/win-math.ll
new file mode 100644
index 000000000000..e6e79e2b84a0
--- /dev/null
+++ b/test/Transforms/InstCombine/win-math.ll
@@ -0,0 +1,295 @@
+; RUN: opt -O2 -S -mtriple=i386-pc-win32 < %s | FileCheck %s -check-prefix=WIN32
+; RUN: opt -O2 -S -mtriple=x86_64-pc-win32 < %s | FileCheck %s -check-prefix=WIN64
+; RUN: opt -O2 -S -mtriple=i386-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW32
+; RUN: opt -O2 -S -mtriple=x86_64-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW64
+
+; x86 win32 msvcrt does not provide entry points for single-precision libm.
+; x86-64 win32 msvcrt does (except for fabsf)
+; msvcrt does not provide C99 math, but mingw32 does.
+
+declare double @acos(double %x)
+define float @float_acos(float %x) nounwind readnone {
+; WIN32-LABEL: @float_acos(
+; WIN32-NOT: float @acosf
+; WIN32: double @acos
+    %1 = fpext float %x to double
+    %2 = call double @acos(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @asin(double %x)
+define float @float_asin(float %x) nounwind readnone {
+; WIN32-LABEL: @float_asin(
+; WIN32-NOT: float @asinf
+; WIN32: double @asin
+    %1 = fpext float %x to double
+    %2 = call double @asin(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @atan(double %x)
+define float @float_atan(float %x) nounwind readnone {
+; WIN32-LABEL: @float_atan(
+; WIN32-NOT: float @atanf
+; WIN32: double @atan
+    %1 = fpext float %x to double
+    %2 = call double @atan(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @atan2(double %x, double %y)
+define float @float_atan2(float %x, float %y) nounwind readnone {
+; WIN32-LABEL: @float_atan2(
+; WIN32-NOT: float @atan2f
+; WIN32: double @atan2
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @atan2(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @ceil(double %x)
+define float @float_ceil(float %x) nounwind readnone {
+; WIN32-LABEL: @float_ceil(
+; WIN32-NOT: float @ceilf
+; WIN32: double @ceil
+; WIN64-LABEL: @float_ceil(
+; WIN64: float @ceilf
+; WIN64-NOT: double @ceil
+; MINGW32-LABEL: @float_ceil(
+; MINGW32: float @ceilf
+; MINGW32-NOT: double @ceil
+; MINGW64-LABEL: @float_ceil(
+; MINGW64: float @ceilf
+; MINGW64-NOT: double @ceil
+    %1 = fpext float %x to double
+    %2 = call double @ceil(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @_copysign(double %x)
+define float @float_copysign(float %x) nounwind readnone {
+; WIN32-LABEL: @float_copysign(
+; WIN32-NOT: float @copysignf
+; WIN32-NOT: float @_copysignf
+; WIN32: double @_copysign
+    %1 = fpext float %x to double
+    %2 = call double @_copysign(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @cos(double %x)
+define float @float_cos(float %x) nounwind readnone {
+; WIN32-LABEL: @float_cos(
+; WIN32-NOT: float @cosf
+; WIN32: double @cos
+    %1 = fpext float %x to double
+    %2 = call double @cos(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @cosh(double %x)
+define float @float_cosh(float %x) nounwind readnone {
+; WIN32-LABEL: @float_cosh(
+; WIN32-NOT: float @coshf
+; WIN32: double @cosh
+    %1 = fpext float %x to double
+    %2 = call double @cosh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @exp(double %x, double %y)
+define float @float_exp(float %x, float %y) nounwind readnone {
+; WIN32-LABEL: @float_exp(
+; WIN32-NOT: float @expf
+; WIN32: double @exp
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @exp(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @fabs(double %x, double %y)
+define float @float_fabs(float %x, float %y) nounwind readnone {
+; WIN32-LABEL: @float_fabs(
+; WIN32-NOT: float @fabsf
+; WIN32: double @fabs
+; WIN64-LABEL: @float_fabs(
+; WIN64-NOT: float @fabsf
+; WIN64: double @fabs
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @fabs(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @floor(double %x)
+define float @float_floor(float %x) nounwind readnone {
+; WIN32-LABEL: @float_floor(
+; WIN32-NOT: float @floorf
+; WIN32: double @floor
+; WIN64-LABEL: @float_floor(
+; WIN64: float @floorf
+; WIN64-NOT: double @floor
+; MINGW32-LABEL: @float_floor(
+; MINGW32: float @floorf
+; MINGW32-NOT: double @floor
+; MINGW64-LABEL: @float_floor(
+; MINGW64: float @floorf
+; MINGW64-NOT: double @floor
+    %1 = fpext float %x to double
+    %2 = call double @floor(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @fmod(double %x, double %y)
+define float @float_fmod(float %x, float %y) nounwind readnone {
+; WIN32-LABEL: @float_fmod(
+; WIN32-NOT: float @fmodf
+; WIN32: double @fmod
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @fmod(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @log(double %x)
+define float @float_log(float %x) nounwind readnone {
+; WIN32-LABEL: @float_log(
+; WIN32-NOT: float @logf
+; WIN32: double @log
+    %1 = fpext float %x to double
+    %2 = call double @log(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @pow(double %x, double %y)
+define float @float_pow(float %x, float %y) nounwind readnone {
+; WIN32-LABEL: @float_pow(
+; WIN32-NOT: float @powf
+; WIN32: double @pow
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @pow(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @sin(double %x)
+define float @float_sin(float %x) nounwind readnone {
+; WIN32-LABEL: @float_sin(
+; WIN32-NOT: float @sinf
+; WIN32: double @sin
+    %1 = fpext float %x to double
+    %2 = call double @sin(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @sinh(double %x)
+define float @float_sinh(float %x) nounwind readnone {
+; WIN32-LABEL: @float_sinh(
+; WIN32-NOT: float @sinhf
+; WIN32: double @sinh
+    %1 = fpext float %x to double
+    %2 = call double @sinh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @sqrt(double %x)
+define float @float_sqrt(float %x) nounwind readnone {
+; WIN32-LABEL: @float_sqrt(
+; WIN32-NOT: float @sqrtf
+; WIN32: double @sqrt
+; WIN64-LABEL: @float_sqrt(
+; WIN64: float @sqrtf
+; WIN64-NOT: double @sqrt
+; MINGW32-LABEL: @float_sqrt(
+; MINGW32: float @sqrtf
+; MINGW32-NOT: double @sqrt
+; MINGW64-LABEL: @float_sqrt(
+; MINGW64: float @sqrtf
+; MINGW64-NOT: double @sqrt
+    %1 = fpext float %x to double
+    %2 = call double @sqrt(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @tan(double %x)
+define float @float_tan(float %x) nounwind readnone {
+; WIN32-LABEL: @float_tan(
+; WIN32-NOT: float @tanf
+; WIN32: double @tan
+    %1 = fpext float %x to double
+    %2 = call double @tan(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @tanh(double %x)
+define float @float_tanh(float %x) nounwind readnone {
+; WIN32-LABEL: @float_tanh(
+; WIN32-NOT: float @tanhf
+; WIN32: double @tanh
+    %1 = fpext float %x to double
+    %2 = call double @tanh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+; win32 does not have round; mingw32 does
+declare double @round(double %x)
+define float @float_round(float %x) nounwind readnone {
+; WIN32-LABEL: @float_round(
+; WIN32-NOT: float @roundf
+; WIN32: double @round
+; WIN64-LABEL: @float_round(
+; WIN64-NOT: float @roundf
+; WIN64: double @round
+; MINGW32-LABEL: @float_round(
+; MINGW32: float @roundf
+; MINGW32-NOT: double @round
+; MINGW64-LABEL: @float_round(
+; MINGW64: float @roundf
+; MINGW64-NOT: double @round
+    %1 = fpext float %x to double
+    %2 = call double @round(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare float @powf(float, float)
+; win32 lacks sqrtf&fabsf, win64 lacks fabsf
+define float @float_powsqrt(float %x) nounwind readnone {
+; WIN32-LABEL: @float_powsqrt(
+; WIN32-NOT: float @sqrtf
+; WIN32: float @powf
+; WIN64-LABEL: @float_powsqrt(
+; WIN64-NOT: float @sqrtf
+; WIN64: float @powf
+; MINGW32-LABEL: @float_powsqrt(
+; MINGW32: float @sqrtf
+; MINGW32: float @fabsf
+; MINGW32-NOT: float @powf
+; MINGW64-LABEL: @float_powsqrt(
+; MINGW64: float @sqrtf
+; MINGW64: float @fabsf
+; MINGW64-NOT: float @powf
+    %1 = call float @powf(float %x, float 0.5)
+    ret float %1
+}
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index be06d7999d84..d153e035c899 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -4,7 +4,7 @@
 
 ; PR1253
 define i1 @test0(i32 %A) {
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK: %C = icmp slt i32 %A, 0
 	%B = xor i32 %A, -2147483648
 	%C = icmp sgt i32 %B, -1
@@ -12,7 +12,7 @@ define i1 @test0(i32 %A) {
 }
 
 define i1 @test1(i32 %A) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %C = icmp slt i32 %A, 0
 	%B = xor i32 %A, 12345
 	%C = icmp slt i32 %B, 0
@@ -21,7 +21,7 @@ define i1 @test1(i32 %A) {
 
 ; PR1014
 define i32 @test2(i32 %tmp1) {
-; CHECK:      @test2
+; CHECK-LABEL:      @test2(
 ; CHECK-NEXT:   and i32 %tmp1, 32
 ; CHECK-NEXT:   or i32 %ovm, 8 
 ; CHECK-NEXT:   ret i32
@@ -32,7 +32,7 @@ define i32 @test2(i32 %tmp1) {
 }
 
 define i32 @test3(i32 %tmp1) {
-; CHECK:      @test3
+; CHECK-LABEL:      @test3(
 ; CHECK-NEXT:   and i32 %tmp1, 32
 ; CHECK-NEXT:   or i32 %ovm, 8
 ; CHECK-NEXT:   ret i32
@@ -47,7 +47,7 @@ define i32 @test4(i32 %A, i32 %B) {
 	%2 = ashr i32 %1, %B
 	%3 = xor i32 %2, -1
 	ret i32 %3
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: %1 = ashr i32 %A, %B
 ; CHECK: ret i32 %1
 }
@@ -62,7 +62,7 @@ test5:
   %xor1 = xor i32 %shr, 1
   %add = add i32 %xor1, %xor
   ret i32 %add
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: lshr i32 %val1, 8
 ; CHECK: ret
 }
@@ -78,7 +78,7 @@ define i32 @test6(i32 %x) {
   %shr = lshr i32 %xor, 16
   %add = add i32 %shr, %xor
   ret i32 %add
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: lshr i32 %x, 16
 ; CHECK: ret
 }
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index b5310575502b..d7f338b659b4 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -3,7 +3,7 @@
 
 define i32 @a(i1 zeroext %x, i1 zeroext %y) {
 entry:
-; CHECK: @a
+; CHECK-LABEL: @a(
 ; CHECK: [[TMP1:%.*]] = sext i1 %y to i32
 ; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
 ; CHECK-NEXT: add i32 [[TMP2]], [[TMP1]]
diff --git a/test/Transforms/InstSimplify/2010-12-20-Boolean.ll b/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
index 3aa1bd60cfd6..28c25c0e77ee 100644
--- a/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
+++ b/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
@@ -1,28 +1,28 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define i1 @add(i1 %x) {
-; CHECK: @add
+; CHECK-LABEL: @add(
   %z = add i1 %x, %x
   ret i1 %z
 ; CHECK: ret i1 false
 }
 
 define i1 @sub(i1 %x) {
-; CHECK: @sub
+; CHECK-LABEL: @sub(
   %z = sub i1 false, %x
   ret i1 %z
 ; CHECK: ret i1 %x
 }
 
 define i1 @mul(i1 %x) {
-; CHECK: @mul
+; CHECK-LABEL: @mul(
   %z = mul i1 %x, %x
   ret i1 %z
 ; CHECK: ret i1 %x
 }
 
 define i1 @ne(i1 %x) {
-; CHECK: @ne
+; CHECK-LABEL: @ne(
   %z = icmp ne i1 %x, 0
   ret i1 %z
 ; CHECK: ret i1 %x
diff --git a/test/Transforms/InstSimplify/2010-12-20-Distribute.ll b/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
index d20abd68c200..9ea0a5e10708 100644
--- a/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
+++ b/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define i32 @factorize(i32 %x, i32 %y) {
-; CHECK: @factorize
+; CHECK-LABEL: @factorize(
 ; (X | 1) & (X | 2) -> X | (1 & 2) -> X
   %l = or i32 %x, 1
   %r = or i32 %x, 2
@@ -11,7 +11,7 @@ define i32 @factorize(i32 %x, i32 %y) {
 }
 
 define i32 @factorize2(i32 %x) {
-; CHECK: @factorize2
+; CHECK-LABEL: @factorize2(
 ; 3*X - 2*X -> X
   %l = mul i32 3, %x
   %r = mul i32 2, %x
@@ -21,7 +21,7 @@ define i32 @factorize2(i32 %x) {
 }
 
 define i32 @factorize3(i32 %x, i32 %a, i32 %b) {
-; CHECK: @factorize3
+; CHECK-LABEL: @factorize3(
 ; (X | (A|B)) & (X | B) -> X | ((A|B) & B) -> X | B
   %aORb = or i32 %a, %b
   %l = or i32 %x, %aORb
@@ -32,7 +32,7 @@ define i32 @factorize3(i32 %x, i32 %a, i32 %b) {
 }
 
 define i32 @factorize4(i32 %x, i32 %y) {
-; CHECK: @factorize4
+; CHECK-LABEL: @factorize4(
   %sh = shl i32 %y, 1
   %ml = mul i32 %sh, %x
   %mr = mul i32 %x, %y
@@ -42,7 +42,7 @@ define i32 @factorize4(i32 %x, i32 %y) {
 }
 
 define i32 @factorize5(i32 %x, i32 %y) {
-; CHECK: @factorize5
+; CHECK-LABEL: @factorize5(
   %sh = mul i32 %y, 2
   %ml = mul i32 %sh, %x
   %mr = mul i32 %x, %y
@@ -52,7 +52,7 @@ define i32 @factorize5(i32 %x, i32 %y) {
 }
 
 define i32 @expand(i32 %x) {
-; CHECK: @expand
+; CHECK-LABEL: @expand(
 ; ((X & 1) | 2) & 1 -> ((X & 1) & 1) | (2 & 1) -> (X & 1) | 0 -> X & 1
   %a = and i32 %x, 1
   %b = or i32 %a, 2
diff --git a/test/Transforms/InstSimplify/2011-01-14-Thread.ll b/test/Transforms/InstSimplify/2011-01-14-Thread.ll
index 8fc4dc5d5bb7..9de06600c0c3 100644
--- a/test/Transforms/InstSimplify/2011-01-14-Thread.ll
+++ b/test/Transforms/InstSimplify/2011-01-14-Thread.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define i32 @shift_select(i1 %cond) {
-; CHECK: @shift_select
+; CHECK-LABEL: @shift_select(
   %s = select i1 %cond, i32 0, i32 1
   %r = lshr i32 %s, 1
   ret i32 %r
diff --git a/test/Transforms/InstSimplify/2011-02-01-Vector.ll b/test/Transforms/InstSimplify/2011-02-01-Vector.ll
index 3039a663fa45..3cbbf350ec12 100644
--- a/test/Transforms/InstSimplify/2011-02-01-Vector.ll
+++ b/test/Transforms/InstSimplify/2011-02-01-Vector.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define <2 x i32> @sdiv(<2 x i32> %x) {
-; CHECK: @sdiv
+; CHECK-LABEL: @sdiv(
   %div = sdiv <2 x i32> %x, <i32 1, i32 1>
   ret <2 x i32> %div
 ; CHECK: ret <2 x i32> %x
diff --git a/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll b/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
index d10c61fe2cf7..3514b3479374 100644
--- a/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
+++ b/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
@@ -14,7 +14,7 @@ lpad:
   %exc_ptr2 = insertvalue { i8*, i32 } undef, i8* %exc_ptr, 0
   %filter2 = insertvalue { i8*, i32 } %exc_ptr2, i32 %filter, 1
   resume { i8*, i32 } %filter2
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: extractvalue
 ; CHECK-NOT: insertvalue
 }
@@ -25,5 +25,5 @@ define { i8, i32 } @test2({ i8*, i32 } %x) {
   %ex = extractvalue { i8*, i32 } %x, 1
   %ins = insertvalue { i8, i32 } undef, i32 %ex, 1
   ret { i8, i32 } %ins
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 }
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index 33a4d6b02d63..c59d6c916a6b 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define i64 @pow2(i32 %x) {
-; CHECK: @pow2
+; CHECK-LABEL: @pow2(
   %negx = sub i32 0, %x
   %x2 = and i32 %x, %negx
   %e = zext i32 %x2 to i64
@@ -12,7 +12,7 @@ define i64 @pow2(i32 %x) {
 }
 
 define i64 @pow2b(i32 %x) {
-; CHECK: @pow2b
+; CHECK-LABEL: @pow2b(
   %sh = shl i32 2, %x
   %e = zext i32 %sh to i64
   %nege = sub i64 0, %e
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index cf2f8476763f..fd854c5b4662 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -3,7 +3,7 @@
 declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
 
 define i1 @test_uadd1() {
-; CHECK: @test_uadd1
+; CHECK-LABEL: @test_uadd1(
   %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 3)
   %overflow = extractvalue {i8, i1} %x, 1
   ret i1 %overflow
@@ -11,7 +11,7 @@ define i1 @test_uadd1() {
 }
 
 define i8 @test_uadd2() {
-; CHECK: @test_uadd2
+; CHECK-LABEL: @test_uadd2(
   %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 44)
   %result = extractvalue {i8, i1} %x, 0
   ret i8 %result
@@ -21,7 +21,7 @@ define i8 @test_uadd2() {
 declare i256 @llvm.cttz.i256(i256 %src, i1 %is_zero_undef)
 
 define i256 @test_cttz() {
-; CHECK: @test_cttz
+; CHECK-LABEL: @test_cttz(
   %x = call i256 @llvm.cttz.i256(i256 10, i1 false)
   ret i256 %x
 ; CHECK-NEXT: ret i256 1
@@ -30,7 +30,7 @@ define i256 @test_cttz() {
 declare i256 @llvm.ctpop.i256(i256 %src)
 
 define i256 @test_ctpop() {
-; CHECK: @test_ctpop
+; CHECK-LABEL: @test_ctpop(
   %x = call i256 @llvm.ctpop.i256(i256 10)
   ret i256 %x
 ; CHECK-NEXT: ret i256 2
@@ -40,7 +40,7 @@ define i256 @test_ctpop() {
 declare float @fabs(float %x)
 
 define float @test_fabs_libcall() {
-; CHECK: @test_fabs_libcall
+; CHECK-LABEL: @test_fabs_libcall(
 
   %x = call float @fabs(float -42.0)
 ; This is still a real function call, so instsimplify won't nuke it -- other
@@ -61,7 +61,7 @@ declare float @llvm.nearbyint.f32(float) nounwind readnone
 
 ; Test idempotent intrinsics
 define float @test_idempotence(float %a) {
-; CHECK: @test_idempotence
+; CHECK-LABEL: @test_idempotence(
 
 ; CHECK: fabs
 ; CHECK-NOT: fabs
@@ -101,3 +101,66 @@ define float @test_idempotence(float %a) {
 
   ret float %r4
 }
+
+define i8* @operator_new() {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @operator_new
+; CHECK: br i1 false, label %cast.end, label %cast.notnull
+}
+
+declare noalias i8* @_Znwm(i64)
+
+%"struct.std::nothrow_t" = type { i8 }
+@_ZSt7nothrow = external global %"struct.std::nothrow_t"
+
+define i8* @operator_new_nothrow_t() {
+entry:
+  %call = tail call noalias i8* @_ZnamRKSt9nothrow_t(i64 8, %"struct.std::nothrow_t"* @_ZSt7nothrow)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @operator_new_nothrow_t
+; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
+}
+
+declare i8* @_ZnamRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*) nounwind
+
+define i8* @malloc_can_return_null() {
+entry:
+  %call = tail call noalias i8* @malloc(i64 8)
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %add.ptr = getelementptr inbounds i8* %call, i64 4
+  br label %cast.end
+
+cast.end:                                         ; preds = %cast.notnull, %entry
+  %cast.result = phi i8* [ %add.ptr, %cast.notnull ], [ null, %entry ]
+  ret i8* %cast.result
+
+; CHECK-LABEL: @malloc_can_return_null
+; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index b764c761cfb2..abb38695e710 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -2,7 +2,7 @@
 target datalayout = "p:32:32"
 
 define i1 @ptrtoint() {
-; CHECK: @ptrtoint
+; CHECK-LABEL: @ptrtoint(
   %a = alloca i8
   %tmp = ptrtoint i8* %a to i32
   %r = icmp eq i32 %tmp, 0
@@ -11,7 +11,7 @@ define i1 @ptrtoint() {
 }
 
 define i1 @bitcast() {
-; CHECK: @bitcast
+; CHECK-LABEL: @bitcast(
   %a = alloca i32
   %b = alloca i64
   %x = bitcast i32* %a to i8*
@@ -22,7 +22,7 @@ define i1 @bitcast() {
 }
 
 define i1 @gep() {
-; CHECK: @gep
+; CHECK-LABEL: @gep(
   %a = alloca [3 x i8], align 8
   %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
   %cmp = icmp eq i8* %x, null
@@ -31,7 +31,7 @@ define i1 @gep() {
 }
 
 define i1 @gep2() {
-; CHECK: @gep2
+; CHECK-LABEL: @gep2(
   %a = alloca [3 x i8], align 8
   %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
   %y = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
@@ -46,7 +46,7 @@ define i1 @gep2() {
 @gepz = extern_weak global %gept
 
 define i1 @gep3() {
-; CHECK: @gep3
+; CHECK-LABEL: @gep3(
   %x = alloca %gept, align 8
   %a = getelementptr %gept* %x, i64 0, i32 0
   %b = getelementptr %gept* %x, i64 0, i32 1
@@ -56,7 +56,7 @@ define i1 @gep3() {
 }
 
 define i1 @gep4() {
-; CHECK: @gep4
+; CHECK-LABEL: @gep4(
   %x = alloca %gept, align 8
   %a = getelementptr %gept* @gepy, i64 0, i32 0
   %b = getelementptr %gept* @gepy, i64 0, i32 1
@@ -66,7 +66,7 @@ define i1 @gep4() {
 }
 
 define i1 @gep5() {
-; CHECK: @gep5
+; CHECK-LABEL: @gep5(
   %x = alloca %gept, align 8
   %a = getelementptr inbounds %gept* %x, i64 0, i32 1
   %b = getelementptr %gept* @gepy, i64 0, i32 0
@@ -77,7 +77,7 @@ define i1 @gep5() {
 
 define i1 @gep6(%gept* %x) {
 ; Same as @gep3 but potentially null.
-; CHECK: @gep6
+; CHECK-LABEL: @gep6(
   %a = getelementptr %gept* %x, i64 0, i32 0
   %b = getelementptr %gept* %x, i64 0, i32 1
   %equal = icmp eq i32* %a, %b
@@ -86,7 +86,7 @@ define i1 @gep6(%gept* %x) {
 }
 
 define i1 @gep7(%gept* %x) {
-; CHECK: @gep7
+; CHECK-LABEL: @gep7(
   %a = getelementptr %gept* %x, i64 0, i32 0
   %b = getelementptr %gept* @gepz, i64 0, i32 0
   %equal = icmp eq i32* %a, %b
@@ -95,7 +95,7 @@ define i1 @gep7(%gept* %x) {
 }
 
 define i1 @gep8(%gept* %x) {
-; CHECK: @gep8
+; CHECK-LABEL: @gep8(
   %a = getelementptr %gept* %x, i32 1
   %b = getelementptr %gept* %x, i32 -1
   %equal = icmp ugt %gept* %a, %b
@@ -104,7 +104,7 @@ define i1 @gep8(%gept* %x) {
 }
 
 define i1 @gep9(i8* %ptr) {
-; CHECK: @gep9
+; CHECK-LABEL: @gep9(
 ; CHECK-NOT: ret
 ; CHECK: ret i1 true
 
@@ -124,7 +124,7 @@ entry:
 }
 
 define i1 @gep10(i8* %ptr) {
-; CHECK: @gep10
+; CHECK-LABEL: @gep10(
 ; CHECK-NOT: ret
 ; CHECK: ret i1 true
 
@@ -140,7 +140,7 @@ entry:
 }
 
 define i1 @gep11(i8* %ptr) {
-; CHECK: @gep11
+; CHECK-LABEL: @gep11(
 ; CHECK-NOT: ret
 ; CHECK: ret i1 true
 
@@ -153,7 +153,7 @@ entry:
 }
 
 define i1 @gep12(i8* %ptr) {
-; CHECK: @gep12
+; CHECK-LABEL: @gep12(
 ; CHECK-NOT: ret
 ; CHECK: ret i1 %cmp
 
@@ -166,7 +166,7 @@ entry:
 }
 
 define i1 @gep13(i8* %ptr) {
-; CHECK: @gep13
+; CHECK-LABEL: @gep13(
 ; We can prove this GEP is non-null because it is inbounds.
   %x = getelementptr inbounds i8* %ptr, i32 1
   %cmp = icmp eq i8* %x, null
@@ -175,7 +175,7 @@ define i1 @gep13(i8* %ptr) {
 }
 
 define i1 @gep14({ {}, i8 }* %ptr) {
-; CHECK: @gep14
+; CHECK-LABEL: @gep14(
 ; We can't simplify this because the offset of one in the GEP actually doesn't
 ; move the pointer.
   %x = getelementptr inbounds { {}, i8 }* %ptr, i32 0, i32 1
@@ -185,7 +185,7 @@ define i1 @gep14({ {}, i8 }* %ptr) {
 }
 
 define i1 @gep15({ {}, [4 x {i8, i8}]}* %ptr, i32 %y) {
-; CHECK: @gep15
+; CHECK-LABEL: @gep15(
 ; We can prove this GEP is non-null even though there is a user value, as we
 ; would necessarily violate inbounds on one side or the other.
   %x = getelementptr inbounds { {}, [4 x {i8, i8}]}* %ptr, i32 0, i32 1, i32 %y, i32 1
@@ -195,7 +195,7 @@ define i1 @gep15({ {}, [4 x {i8, i8}]}* %ptr, i32 %y) {
 }
 
 define i1 @gep16(i8* %ptr, i32 %a) {
-; CHECK: @gep16
+; CHECK-LABEL: @gep16(
 ; We can prove this GEP is non-null because it is inbounds and because we know
 ; %b is non-zero even though we don't know its value.
   %b = or i32 %a, 1
@@ -206,7 +206,7 @@ define i1 @gep16(i8* %ptr, i32 %a) {
 }
 
 define i1 @zext(i32 %x) {
-; CHECK: @zext
+; CHECK-LABEL: @zext(
   %e1 = zext i32 %x to i64
   %e2 = zext i32 %x to i64
   %r = icmp eq i64 %e1, %e2
@@ -215,7 +215,7 @@ define i1 @zext(i32 %x) {
 }
 
 define i1 @zext2(i1 %x) {
-; CHECK: @zext2
+; CHECK-LABEL: @zext2(
   %e = zext i1 %x to i32
   %c = icmp ne i32 %e, 0
   ret i1 %c
@@ -223,7 +223,7 @@ define i1 @zext2(i1 %x) {
 }
 
 define i1 @zext3() {
-; CHECK: @zext3
+; CHECK-LABEL: @zext3(
   %e = zext i1 1 to i32
   %c = icmp ne i32 %e, 0
   ret i1 %c
@@ -231,7 +231,7 @@ define i1 @zext3() {
 }
 
 define i1 @sext(i32 %x) {
-; CHECK: @sext
+; CHECK-LABEL: @sext(
   %e1 = sext i32 %x to i64
   %e2 = sext i32 %x to i64
   %r = icmp eq i64 %e1, %e2
@@ -240,7 +240,7 @@ define i1 @sext(i32 %x) {
 }
 
 define i1 @sext2(i1 %x) {
-; CHECK: @sext2
+; CHECK-LABEL: @sext2(
   %e = sext i1 %x to i32
   %c = icmp ne i32 %e, 0
   ret i1 %c
@@ -248,7 +248,7 @@ define i1 @sext2(i1 %x) {
 }
 
 define i1 @sext3() {
-; CHECK: @sext3
+; CHECK-LABEL: @sext3(
   %e = sext i1 1 to i32
   %c = icmp ne i32 %e, 0
   ret i1 %c
@@ -256,7 +256,7 @@ define i1 @sext3() {
 }
 
 define i1 @add(i32 %x, i32 %y) {
-; CHECK: @add
+; CHECK-LABEL: @add(
   %l = lshr i32 %x, 1
   %q = lshr i32 %y, 1
   %r = or i32 %q, 1
@@ -267,7 +267,7 @@ define i1 @add(i32 %x, i32 %y) {
 }
 
 define i1 @add2(i8 %x, i8 %y) {
-; CHECK: @add2
+; CHECK-LABEL: @add2(
   %l = or i8 %x, 128
   %r = or i8 %y, 129
   %s = add i8 %l, %r
@@ -277,7 +277,7 @@ define i1 @add2(i8 %x, i8 %y) {
 }
 
 define i1 @add3(i8 %x, i8 %y) {
-; CHECK: @add3
+; CHECK-LABEL: @add3(
   %l = zext i8 %x to i32
   %r = zext i8 %y to i32
   %s = add i32 %l, %r
@@ -287,7 +287,7 @@ define i1 @add3(i8 %x, i8 %y) {
 }
 
 define i1 @add4(i32 %x, i32 %y) {
-; CHECK: @add4
+; CHECK-LABEL: @add4(
   %z = add nsw i32 %y, 1
   %s1 = add nsw i32 %x, %y
   %s2 = add nsw i32 %x, %z
@@ -297,7 +297,7 @@ define i1 @add4(i32 %x, i32 %y) {
 }
 
 define i1 @add5(i32 %x, i32 %y) {
-; CHECK: @add5
+; CHECK-LABEL: @add5(
   %z = add nuw i32 %y, 1
   %s1 = add nuw i32 %x, %z
   %s2 = add nuw i32 %x, %y
@@ -307,7 +307,7 @@ define i1 @add5(i32 %x, i32 %y) {
 }
 
 define i1 @add6(i64 %A, i64 %B) {
-; CHECK: @add6
+; CHECK-LABEL: @add6(
   %s1 = add i64 %A, %B
   %s2 = add i64 %B, %A
   %cmp = icmp eq i64 %s1, %s2
@@ -316,7 +316,7 @@ define i1 @add6(i64 %A, i64 %B) {
 }
 
 define i1 @addpowtwo(i32 %x, i32 %y) {
-; CHECK: @addpowtwo
+; CHECK-LABEL: @addpowtwo(
   %l = lshr i32 %x, 1
   %r = shl i32 1, %y
   %s = add i32 %l, %r
@@ -326,7 +326,7 @@ define i1 @addpowtwo(i32 %x, i32 %y) {
 }
 
 define i1 @or(i32 %x) {
-; CHECK: @or
+; CHECK-LABEL: @or(
   %o = or i32 %x, 1
   %c = icmp eq i32 %o, 0
   ret i1 %c
@@ -334,7 +334,7 @@ define i1 @or(i32 %x) {
 }
 
 define i1 @shl(i32 %x) {
-; CHECK: @shl
+; CHECK-LABEL: @shl(
   %s = shl i32 1, %x
   %c = icmp eq i32 %s, 0
   ret i1 %c
@@ -342,7 +342,7 @@ define i1 @shl(i32 %x) {
 }
 
 define i1 @lshr1(i32 %x) {
-; CHECK: @lshr1
+; CHECK-LABEL: @lshr1(
   %s = lshr i32 -1, %x
   %c = icmp eq i32 %s, 0
   ret i1 %c
@@ -350,15 +350,23 @@ define i1 @lshr1(i32 %x) {
 }
 
 define i1 @lshr2(i32 %x) {
-; CHECK: @lshr2
+; CHECK-LABEL: @lshr2(
   %s = lshr i32 %x, 30
   %c = icmp ugt i32 %s, 8
   ret i1 %c
 ; CHECK: ret i1 false
 }
 
+define i1 @lshr3(i32 %x) {
+; CHECK-LABEL: @lshr3(
+  %s = lshr i32 %x, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
 define i1 @ashr1(i32 %x) {
-; CHECK: @ashr1
+; CHECK-LABEL: @ashr1(
   %s = ashr i32 -1, %x
   %c = icmp eq i32 %s, 0
   ret i1 %c
@@ -366,15 +374,23 @@ define i1 @ashr1(i32 %x) {
 }
 
 define i1 @ashr2(i32 %x) {
-; CHECK: @ashr2
+; CHECK-LABEL: @ashr2(
   %s = ashr i32 %x, 30
   %c = icmp slt i32 %s, -5
   ret i1 %c
 ; CHECK: ret i1 false
 }
 
+define i1 @ashr3(i32 %x) {
+; CHECK-LABEL: @ashr3(
+  %s = ashr i32 %x, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
 define i1 @select1(i1 %cond) {
-; CHECK: @select1
+; CHECK-LABEL: @select1(
   %s = select i1 %cond, i32 1, i32 0
   %c = icmp eq i32 %s, 1
   ret i1 %c
@@ -382,7 +398,7 @@ define i1 @select1(i1 %cond) {
 }
 
 define i1 @select2(i1 %cond) {
-; CHECK: @select2
+; CHECK-LABEL: @select2(
   %x = zext i1 %cond to i32
   %s = select i1 %cond, i32 %x, i32 0
   %c = icmp ne i32 %s, 0
@@ -391,7 +407,7 @@ define i1 @select2(i1 %cond) {
 }
 
 define i1 @select3(i1 %cond) {
-; CHECK: @select3
+; CHECK-LABEL: @select3(
   %x = zext i1 %cond to i32
   %s = select i1 %cond, i32 1, i32 %x
   %c = icmp ne i32 %s, 0
@@ -400,7 +416,7 @@ define i1 @select3(i1 %cond) {
 }
 
 define i1 @select4(i1 %cond) {
-; CHECK: @select4
+; CHECK-LABEL: @select4(
   %invert = xor i1 %cond, 1
   %s = select i1 %invert, i32 0, i32 1
   %c = icmp ne i32 %s, 0
@@ -409,7 +425,7 @@ define i1 @select4(i1 %cond) {
 }
 
 define i1 @select5(i32 %x) {
-; CHECK: @select5
+; CHECK-LABEL: @select5(
   %c = icmp eq i32 %x, 0
   %s = select i1 %c, i32 1, i32 %x
   %c2 = icmp eq i32 %s, 0
@@ -418,7 +434,7 @@ define i1 @select5(i32 %x) {
 }
 
 define i1 @select6(i32 %x) {
-; CHECK: @select6
+; CHECK-LABEL: @select6(
   %c = icmp sgt i32 %x, 0
   %s = select i1 %c, i32 %x, i32 4
   %c2 = icmp eq i32 %s, 0
@@ -427,7 +443,7 @@ define i1 @select6(i32 %x) {
 }
 
 define i1 @urem1(i32 %X, i32 %Y) {
-; CHECK: @urem1
+; CHECK-LABEL: @urem1(
   %A = urem i32 %X, %Y
   %B = icmp ult i32 %A, %Y
   ret i1 %B
@@ -435,7 +451,7 @@ define i1 @urem1(i32 %X, i32 %Y) {
 }
 
 define i1 @urem2(i32 %X, i32 %Y) {
-; CHECK: @urem2
+; CHECK-LABEL: @urem2(
   %A = urem i32 %X, %Y
   %B = icmp eq i32 %A, %Y
   ret i1 %B
@@ -443,7 +459,7 @@ define i1 @urem2(i32 %X, i32 %Y) {
 }
 
 define i1 @urem3(i32 %X) {
-; CHECK: @urem3
+; CHECK-LABEL: @urem3(
   %A = urem i32 %X, 10
   %B = icmp ult i32 %A, 15
   ret i1 %B
@@ -451,7 +467,7 @@ define i1 @urem3(i32 %X) {
 }
 
 define i1 @urem4(i32 %X) {
-; CHECK: @urem4
+; CHECK-LABEL: @urem4(
   %A = urem i32 %X, 15
   %B = icmp ult i32 %A, 10
   ret i1 %B
@@ -459,24 +475,32 @@ define i1 @urem4(i32 %X) {
 }
 
 define i1 @urem5(i16 %X, i32 %Y) {
-; CHECK: @urem5
+; CHECK-LABEL: @urem5(
   %A = zext i16 %X to i32
   %B = urem i32 %A, %Y
   %C = icmp slt i32 %B, %Y
   ret i1 %C
-; CHECK: ret i1 true
+; CHECK-NOT: ret i1 true
 }
 
 define i1 @urem6(i32 %X, i32 %Y) {
-; CHECK: @urem6
+; CHECK-LABEL: @urem6(
   %A = urem i32 %X, %Y
   %B = icmp ugt i32 %Y, %A
   ret i1 %B
 ; CHECK: ret i1 true
 }
 
+define i1 @urem7(i32 %X) {
+; CHECK-LABEL: @urem7(
+  %A = urem i32 1, %X
+  %B = icmp sgt i32 %A, %X
+  ret i1 %B
+; CHECK-NOT: ret i1 false
+}
+
 define i1 @srem1(i32 %X) {
-; CHECK: @srem1
+; CHECK-LABEL: @srem1(
   %A = srem i32 %X, -5
   %B = icmp sgt i32 %A, 5
   ret i1 %B
@@ -484,7 +508,7 @@ define i1 @srem1(i32 %X) {
 }
 
 ; PR9343 #15
-; CHECK: @srem2
+; CHECK-LABEL: @srem2(
 ; CHECK: ret i1 false
 define i1 @srem2(i16 %X, i32 %Y) {
   %A = zext i16 %X to i32
@@ -494,7 +518,7 @@ define i1 @srem2(i16 %X, i32 %Y) {
   ret i1 %D
 }
 
-; CHECK: @srem3
+; CHECK-LABEL: @srem3(
 ; CHECK-NEXT: ret i1 false
 define i1 @srem3(i16 %X, i32 %Y) {
   %A = zext i16 %X to i32
@@ -506,7 +530,7 @@ define i1 @srem3(i16 %X, i32 %Y) {
 }
 
 define i1 @udiv1(i32 %X) {
-; CHECK: @udiv1
+; CHECK-LABEL: @udiv1(
   %A = udiv i32 %X, 1000000
   %B = icmp ult i32 %A, 5000
   ret i1 %B
@@ -514,7 +538,7 @@ define i1 @udiv1(i32 %X) {
 }
 
 define i1 @udiv2(i32 %X, i32 %Y, i32 %Z) {
-; CHECK: @udiv2
+; CHECK-LABEL: @udiv2(
   %A = udiv exact i32 10, %Z
   %B = udiv exact i32 20, %Z
   %C = icmp ult i32 %A, %B
@@ -523,7 +547,7 @@ define i1 @udiv2(i32 %X, i32 %Y, i32 %Z) {
 }
 
 define i1 @udiv3(i32 %X, i32 %Y) {
-; CHECK: @udiv3
+; CHECK-LABEL: @udiv3(
   %A = udiv i32 %X, %Y
   %C = icmp ugt i32 %A, %X
   ret i1 %C
@@ -531,7 +555,7 @@ define i1 @udiv3(i32 %X, i32 %Y) {
 }
 
 define i1 @udiv4(i32 %X, i32 %Y) {
-; CHECK: @udiv4
+; CHECK-LABEL: @udiv4(
   %A = udiv i32 %X, %Y
   %C = icmp ule i32 %A, %X
   ret i1 %C
@@ -539,7 +563,7 @@ define i1 @udiv4(i32 %X, i32 %Y) {
 }
 
 define i1 @udiv5(i32 %X) {
-; CHECK: @udiv5
+; CHECK-LABEL: @udiv5(
   %A = udiv i32 123, %X
   %C = icmp ugt i32 %A, 124
   ret i1 %C
@@ -548,7 +572,7 @@ define i1 @udiv5(i32 %X) {
 
 ; PR11340
 define i1 @udiv6(i32 %X) nounwind {
-; CHECK: @udiv6
+; CHECK-LABEL: @udiv6(
   %A = udiv i32 1, %X
   %C = icmp eq i32 %A, 0
   ret i1 %C
@@ -557,7 +581,7 @@ define i1 @udiv6(i32 %X) nounwind {
 
 
 define i1 @sdiv1(i32 %X) {
-; CHECK: @sdiv1
+; CHECK-LABEL: @sdiv1(
   %A = sdiv i32 %X, 1000000
   %B = icmp slt i32 %A, 3000
   ret i1 %B
@@ -565,7 +589,7 @@ define i1 @sdiv1(i32 %X) {
 }
 
 define i1 @or1(i32 %X) {
-; CHECK: @or1
+; CHECK-LABEL: @or1(
   %A = or i32 %X, 62
   %B = icmp ult i32 %A, 50
   ret i1 %B
@@ -573,7 +597,7 @@ define i1 @or1(i32 %X) {
 }
 
 define i1 @and1(i32 %X) {
-; CHECK: @and1
+; CHECK-LABEL: @and1(
   %A = and i32 %X, 62
   %B = icmp ugt i32 %A, 70
   ret i1 %B
@@ -581,7 +605,7 @@ define i1 @and1(i32 %X) {
 }
 
 define i1 @mul1(i32 %X) {
-; CHECK: @mul1
+; CHECK-LABEL: @mul1(
 ; Square of a non-zero number is non-zero if there is no overflow.
   %Y = or i32 %X, 1
   %M = mul nuw i32 %Y, %Y
@@ -591,7 +615,7 @@ define i1 @mul1(i32 %X) {
 }
 
 define i1 @mul2(i32 %X) {
-; CHECK: @mul2
+; CHECK-LABEL: @mul2(
 ; Square of a non-zero number is positive if there is no signed overflow.
   %Y = or i32 %X, 1
   %M = mul nsw i32 %Y, %Y
@@ -601,7 +625,7 @@ define i1 @mul2(i32 %X) {
 }
 
 define i1 @mul3(i32 %X, i32 %Y) {
-; CHECK: @mul3
+; CHECK-LABEL: @mul3(
 ; Product of non-negative numbers is non-negative if there is no signed overflow.
   %XX = mul nsw i32 %X, %X
   %YY = mul nsw i32 %Y, %Y
@@ -612,7 +636,7 @@ define i1 @mul3(i32 %X, i32 %Y) {
 }
 
 define <2 x i1> @vectorselect1(<2 x i1> %cond) {
-; CHECK: @vectorselect1
+; CHECK-LABEL: @vectorselect1(
   %invert = xor <2 x i1> %cond, <i1 1, i1 1>
   %s = select <2 x i1> %invert, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 1, i32 1>
   %c = icmp ne <2 x i32> %s, <i32 0, i32 0>
@@ -684,7 +708,7 @@ define zeroext i1 @external_compare(i32* noalias %x) {
 }
 
 define i1 @alloca_gep(i64 %a, i64 %b) {
-; CHECK: @alloca_gep
+; CHECK-LABEL: @alloca_gep(
 ; We can prove this GEP is non-null because it is inbounds and the pointer
 ; is non-null.
   %strs = alloca [1000 x [1001 x i8]], align 16
@@ -693,3 +717,25 @@ define i1 @alloca_gep(i64 %a, i64 %b) {
   ret i1 %cmp
 ; CHECK-NEXT: ret i1 false
 }
+
+define i1 @non_inbounds_gep_compare(i64* %a) {
+; CHECK-LABEL: @non_inbounds_gep_compare(
+; Equality compares with non-inbounds GEPs can be folded.
+  %x = getelementptr i64* %a, i64 42
+  %y = getelementptr inbounds i64* %x, i64 -42
+  %z = getelementptr i64* %a, i64 -42
+  %w = getelementptr inbounds i64* %z, i64 42
+  %cmp = icmp eq i64* %y, %w
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @non_inbounds_gep_compare2(i64* %a) {
+; CHECK-LABEL: @non_inbounds_gep_compare2(
+; Equality compares with non-inbounds GEPs can be folded.
+  %x = getelementptr i64* %a, i64 4294967297
+  %y = getelementptr i64* %a, i64 1
+  %cmp = icmp eq i64* %y, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/exact-nsw-nuw.ll b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
index f3a804eb5b5e..a0e326b13c02 100644
--- a/test/Transforms/InstSimplify/exact-nsw-nuw.ll
+++ b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
@@ -2,7 +2,7 @@
 
 ; PR8862
 
-; CHECK: @shift1
+; CHECK-LABEL: @shift1(
 ; CHECK: ret i32 %A
 define i32 @shift1(i32 %A, i32 %B) {
   %C = lshr exact i32 %A, %B
@@ -10,7 +10,7 @@ define i32 @shift1(i32 %A, i32 %B) {
   ret i32 %D
 }
 
-; CHECK: @shift2
+; CHECK-LABEL: @shift2(
 ; CHECK: lshr
 ; CHECK: ret i32 %D
 define i32 @shift2(i32 %A, i32 %B) {
@@ -19,7 +19,7 @@ define i32 @shift2(i32 %A, i32 %B) {
   ret i32 %D
 }
 
-; CHECK: @shift3
+; CHECK-LABEL: @shift3(
 ; CHECK: ret i32 %A
 define i32 @shift3(i32 %A, i32 %B) {
   %C = ashr exact i32 %A, %B
@@ -27,7 +27,7 @@ define i32 @shift3(i32 %A, i32 %B) {
   ret i32 %D
 }
 
-; CHECK: @shift4
+; CHECK-LABEL: @shift4(
 ; CHECK: ret i32 %A
 define i32 @shift4(i32 %A, i32 %B) {
   %C = shl nuw i32 %A, %B
@@ -35,7 +35,7 @@ define i32 @shift4(i32 %A, i32 %B) {
   ret i32 %D
 }
 
-; CHECK: @shift5
+; CHECK-LABEL: @shift5(
 ; CHECK: ret i32 %A
 define i32 @shift5(i32 %A, i32 %B) {
   %C = shl nsw i32 %A, %B
diff --git a/test/Transforms/InstSimplify/fast-math.ll b/test/Transforms/InstSimplify/fast-math.ll
index 154b96739791..71d1ed838491 100644
--- a/test/Transforms/InstSimplify/fast-math.ll
+++ b/test/Transforms/InstSimplify/fast-math.ll
@@ -71,7 +71,7 @@ define float @fadd_fsub_0(float %a) {
 }
 
 ; fsub nnan ninf x, x ==> 0.0
-; CHECK: @fsub_x_x
+; CHECK-LABEL: @fsub_x_x(
 define float @fsub_x_x(float %a) {
 ; X - X ==> 0
   %zero1 = fsub nnan ninf float %a, %a
@@ -92,7 +92,7 @@ define float @fsub_x_x(float %a) {
 }
 
 ; fadd nsz X, 0 ==> X
-; CHECK: @nofold_fadd_x_0
+; CHECK-LABEL: @nofold_fadd_x_0(
 define float @nofold_fadd_x_0(float %a) {
 ; Dont fold
 ; CHECK: %no_zero1 = fadd
diff --git a/test/Transforms/InstSimplify/fdiv.ll b/test/Transforms/InstSimplify/fdiv.ll
index 9d85154b240f..53ad25d07476 100644
--- a/test/Transforms/InstSimplify/fdiv.ll
+++ b/test/Transforms/InstSimplify/fdiv.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define double @fdiv_of_undef(double %X) {
-; CHECK: @fdiv_of_undef
+; CHECK-LABEL: @fdiv_of_undef(
 ; undef / X -> undef
   %r = fdiv double undef, %X
   ret double %r
@@ -9,7 +9,7 @@ define double @fdiv_of_undef(double %X) {
 }
 
 define double @fdiv_by_undef(double %X) {
-; CHECK: @fdiv_by_undef
+; CHECK-LABEL: @fdiv_by_undef(
 ; X / undef -> undef
   %r = fdiv double %X, undef
   ret double %r
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index 91ce26324b81..8177440472cb 100644
--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 ; fsub 0, (fsub 0, X) ==> X
-; CHECK: @fsub_0_0_x
+; CHECK-LABEL: @fsub_0_0_x(
 define float @fsub_0_0_x(float %a) {
   %t1 = fsub float -0.0, %a
   %ret = fsub float -0.0, %t1
@@ -11,7 +11,7 @@ define float @fsub_0_0_x(float %a) {
 }
 
 ; fsub X, 0 ==> X
-; CHECK: @fsub_x_0
+; CHECK-LABEL: @fsub_x_0(
 define float @fsub_x_0(float %a) {
   %ret = fsub float %a, 0.0
 ; CHECK: ret float %a
@@ -19,7 +19,7 @@ define float @fsub_x_0(float %a) {
 }
 
 ; fadd X, -0 ==> X
-; CHECK: @fadd_x_n0
+; CHECK-LABEL: @fadd_x_n0(
 define float @fadd_x_n0(float %a) {
   %ret = fadd float %a, -0.0
 ; CHECK: ret float %a
@@ -27,7 +27,7 @@ define float @fadd_x_n0(float %a) {
 }
 
 ; fmul X, 1.0 ==> X
-; CHECK: @fmul_X_1
+; CHECK-LABEL: @fmul_X_1(
 define double @fmul_X_1(double %a) {
   %b = fmul double 1.000000e+00, %a                ; <double> [#uses=1]
   ; CHECK: ret double %a
diff --git a/test/Transforms/InstSimplify/lit.local.cfg b/test/Transforms/InstSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/InstSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstSimplify/maxmin.ll b/test/Transforms/InstSimplify/maxmin.ll
index e921214e0f0d..3c643ed3e03e 100644
--- a/test/Transforms/InstSimplify/maxmin.ll
+++ b/test/Transforms/InstSimplify/maxmin.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define i1 @max1(i32 %x, i32 %y) {
-; CHECK: @max1
+; CHECK-LABEL: @max1(
   %c = icmp sgt i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp slt i32 %m, %x
@@ -10,7 +10,7 @@ define i1 @max1(i32 %x, i32 %y) {
 }
 
 define i1 @max2(i32 %x, i32 %y) {
-; CHECK: @max2
+; CHECK-LABEL: @max2(
   %c = icmp sge i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp sge i32 %m, %x
@@ -19,7 +19,7 @@ define i1 @max2(i32 %x, i32 %y) {
 }
 
 define i1 @max3(i32 %x, i32 %y) {
-; CHECK: @max3
+; CHECK-LABEL: @max3(
   %c = icmp ugt i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp ult i32 %m, %x
@@ -28,7 +28,7 @@ define i1 @max3(i32 %x, i32 %y) {
 }
 
 define i1 @max4(i32 %x, i32 %y) {
-; CHECK: @max4
+; CHECK-LABEL: @max4(
   %c = icmp uge i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp uge i32 %m, %x
@@ -37,7 +37,7 @@ define i1 @max4(i32 %x, i32 %y) {
 }
 
 define i1 @max5(i32 %x, i32 %y) {
-; CHECK: @max5
+; CHECK-LABEL: @max5(
   %c = icmp sgt i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp sgt i32 %x, %m
@@ -46,7 +46,7 @@ define i1 @max5(i32 %x, i32 %y) {
 }
 
 define i1 @max6(i32 %x, i32 %y) {
-; CHECK: @max6
+; CHECK-LABEL: @max6(
   %c = icmp sge i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp sle i32 %x, %m
@@ -55,7 +55,7 @@ define i1 @max6(i32 %x, i32 %y) {
 }
 
 define i1 @max7(i32 %x, i32 %y) {
-; CHECK: @max7
+; CHECK-LABEL: @max7(
   %c = icmp ugt i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp ugt i32 %x, %m
@@ -64,7 +64,7 @@ define i1 @max7(i32 %x, i32 %y) {
 }
 
 define i1 @max8(i32 %x, i32 %y) {
-; CHECK: @max8
+; CHECK-LABEL: @max8(
   %c = icmp uge i32 %x, %y
   %m = select i1 %c, i32 %x, i32 %y
   %r = icmp ule i32 %x, %m
@@ -73,7 +73,7 @@ define i1 @max8(i32 %x, i32 %y) {
 }
 
 define i1 @min1(i32 %x, i32 %y) {
-; CHECK: @min1
+; CHECK-LABEL: @min1(
   %c = icmp sgt i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp sgt i32 %m, %x
@@ -82,7 +82,7 @@ define i1 @min1(i32 %x, i32 %y) {
 }
 
 define i1 @min2(i32 %x, i32 %y) {
-; CHECK: @min2
+; CHECK-LABEL: @min2(
   %c = icmp sge i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp sle i32 %m, %x
@@ -91,7 +91,7 @@ define i1 @min2(i32 %x, i32 %y) {
 }
 
 define i1 @min3(i32 %x, i32 %y) {
-; CHECK: @min3
+; CHECK-LABEL: @min3(
   %c = icmp ugt i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp ugt i32 %m, %x
@@ -100,7 +100,7 @@ define i1 @min3(i32 %x, i32 %y) {
 }
 
 define i1 @min4(i32 %x, i32 %y) {
-; CHECK: @min4
+; CHECK-LABEL: @min4(
   %c = icmp uge i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp ule i32 %m, %x
@@ -109,7 +109,7 @@ define i1 @min4(i32 %x, i32 %y) {
 }
 
 define i1 @min5(i32 %x, i32 %y) {
-; CHECK: @min5
+; CHECK-LABEL: @min5(
   %c = icmp sgt i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp slt i32 %x, %m
@@ -118,7 +118,7 @@ define i1 @min5(i32 %x, i32 %y) {
 }
 
 define i1 @min6(i32 %x, i32 %y) {
-; CHECK: @min6
+; CHECK-LABEL: @min6(
   %c = icmp sge i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp sge i32 %x, %m
@@ -127,7 +127,7 @@ define i1 @min6(i32 %x, i32 %y) {
 }
 
 define i1 @min7(i32 %x, i32 %y) {
-; CHECK: @min7
+; CHECK-LABEL: @min7(
   %c = icmp ugt i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp ult i32 %x, %m
@@ -136,7 +136,7 @@ define i1 @min7(i32 %x, i32 %y) {
 }
 
 define i1 @min8(i32 %x, i32 %y) {
-; CHECK: @min8
+; CHECK-LABEL: @min8(
   %c = icmp uge i32 %x, %y
   %m = select i1 %c, i32 %y, i32 %x
   %r = icmp uge i32 %x, %m
@@ -145,7 +145,7 @@ define i1 @min8(i32 %x, i32 %y) {
 }
 
 define i1 @maxmin1(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin1
+; CHECK-LABEL: @maxmin1(
   %c1 = icmp sge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp sge i32 %x, %z
@@ -156,7 +156,7 @@ define i1 @maxmin1(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @maxmin2(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin2
+; CHECK-LABEL: @maxmin2(
   %c1 = icmp sge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp sge i32 %x, %z
@@ -167,7 +167,7 @@ define i1 @maxmin2(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @maxmin3(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin3
+; CHECK-LABEL: @maxmin3(
   %c1 = icmp sge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp sge i32 %x, %z
@@ -178,7 +178,7 @@ define i1 @maxmin3(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @maxmin4(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin4
+; CHECK-LABEL: @maxmin4(
   %c1 = icmp sge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp sge i32 %x, %z
@@ -189,7 +189,7 @@ define i1 @maxmin4(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @maxmin5(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin5
+; CHECK-LABEL: @maxmin5(
   %c1 = icmp uge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp uge i32 %x, %z
@@ -200,7 +200,7 @@ define i1 @maxmin5(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @maxmin6(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin6
+; CHECK-LABEL: @maxmin6(
   %c1 = icmp uge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp uge i32 %x, %z
@@ -211,7 +211,7 @@ define i1 @maxmin6(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @maxmin7(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin7
+; CHECK-LABEL: @maxmin7(
   %c1 = icmp uge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp uge i32 %x, %z
@@ -222,7 +222,7 @@ define i1 @maxmin7(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @maxmin8(i32 %x, i32 %y, i32 %z) {
-; CHECK: @maxmin8
+; CHECK-LABEL: @maxmin8(
   %c1 = icmp uge i32 %x, %y
   %max = select i1 %c1, i32 %x, i32 %y
   %c2 = icmp uge i32 %x, %z
@@ -233,7 +233,7 @@ define i1 @maxmin8(i32 %x, i32 %y, i32 %z) {
 }
 
 define i1 @eqcmp1(i32 %x, i32 %y) {
-; CHECK: @eqcmp1
+; CHECK-LABEL: @eqcmp1(
   %c = icmp sge i32 %x, %y
   %max = select i1 %c, i32 %x, i32 %y
   %r = icmp eq i32 %max, %x
@@ -242,7 +242,7 @@ define i1 @eqcmp1(i32 %x, i32 %y) {
 }
 
 define i1 @eqcmp2(i32 %x, i32 %y) {
-; CHECK: @eqcmp2
+; CHECK-LABEL: @eqcmp2(
   %c = icmp sge i32 %x, %y
   %max = select i1 %c, i32 %x, i32 %y
   %r = icmp eq i32 %x, %max
@@ -251,7 +251,7 @@ define i1 @eqcmp2(i32 %x, i32 %y) {
 }
 
 define i1 @eqcmp3(i32 %x, i32 %y) {
-; CHECK: @eqcmp3
+; CHECK-LABEL: @eqcmp3(
   %c = icmp uge i32 %x, %y
   %max = select i1 %c, i32 %x, i32 %y
   %r = icmp eq i32 %max, %x
@@ -260,7 +260,7 @@ define i1 @eqcmp3(i32 %x, i32 %y) {
 }
 
 define i1 @eqcmp4(i32 %x, i32 %y) {
-; CHECK: @eqcmp4
+; CHECK-LABEL: @eqcmp4(
   %c = icmp uge i32 %x, %y
   %max = select i1 %c, i32 %x, i32 %y
   %r = icmp eq i32 %x, %max
diff --git a/test/Transforms/InstSimplify/phi.ll b/test/Transforms/InstSimplify/phi.ll
index 05cd40d90210..5b7aaa93caf1 100644
--- a/test/Transforms/InstSimplify/phi.ll
+++ b/test/Transforms/InstSimplify/phi.ll
@@ -2,7 +2,7 @@
 
 ; PR12189
 define i1 @test1(i32 %x) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   br i1 true, label %a, label %b
 
 a:
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
index 8b4aa796013b..6a3f65237c5e 100644
--- a/test/Transforms/InstSimplify/ptr_diff.ll
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @ptrdiff1(i8* %ptr) {
-; CHECK: @ptrdiff1
+; CHECK-LABEL: @ptrdiff1(
 ; CHECK-NEXT: ret i64 42
 
   %first = getelementptr inbounds i8* %ptr, i32 0
@@ -15,7 +15,7 @@ define i64 @ptrdiff1(i8* %ptr) {
 }
 
 define i64 @ptrdiff2(i8* %ptr) {
-; CHECK: @ptrdiff2
+; CHECK-LABEL: @ptrdiff2(
 ; CHECK-NEXT: ret i64 42
 
   %first1 = getelementptr inbounds i8* %ptr, i32 0
@@ -34,7 +34,7 @@ define i64 @ptrdiff2(i8* %ptr) {
 
 define i64 @ptrdiff3(i8* %ptr) {
 ; Don't bother with non-inbounds GEPs.
-; CHECK: @ptrdiff3
+; CHECK-LABEL: @ptrdiff3(
 ; CHECK: getelementptr
 ; CHECK: sub
 ; CHECK: ret
@@ -49,7 +49,7 @@ define i64 @ptrdiff3(i8* %ptr) {
 
 define <4 x i32> @ptrdiff4(<4 x i8*> %arg) nounwind {
 ; Handle simple cases of vectors of pointers.
-; CHECK: @ptrdiff4
+; CHECK-LABEL: @ptrdiff4(
 ; CHECK: ret <4 x i32> zeroinitializer
   %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
   %bc = bitcast <4 x i8*> %arg to <4 x i32*>
@@ -73,6 +73,6 @@ bb:
   %tmp6 = ptrtoint [2 x i32]* %tmp5 to i32
   %tmp7 = sub i32 %tmp3, %tmp6
   ret i32 %tmp7
-; CHECK: @ptrdiff5
+; CHECK-LABEL: @ptrdiff5(
 ; CHECK: ret i32 0
 }
diff --git a/test/Transforms/InstSimplify/reassociate.ll b/test/Transforms/InstSimplify/reassociate.ll
index e659e6f42c8d..d44f7155ffda 100644
--- a/test/Transforms/InstSimplify/reassociate.ll
+++ b/test/Transforms/InstSimplify/reassociate.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define i32 @add1(i32 %x) {
-; CHECK: @add1
+; CHECK-LABEL: @add1(
 ; (X + -1) + 1 -> X
   %l = add i32 %x, -1
   %r = add i32 %l, 1
@@ -10,7 +10,7 @@ define i32 @add1(i32 %x) {
 }
 
 define i32 @and1(i32 %x, i32 %y) {
-; CHECK: @and1
+; CHECK-LABEL: @and1(
 ; (X & Y) & X -> X & Y
   %l = and i32 %x, %y
   %r = and i32 %l, %x
@@ -19,7 +19,7 @@ define i32 @and1(i32 %x, i32 %y) {
 }
 
 define i32 @and2(i32 %x, i32 %y) {
-; CHECK: @and2
+; CHECK-LABEL: @and2(
 ; X & (X & Y) -> X & Y
   %r = and i32 %x, %y
   %l = and i32 %x, %r
@@ -28,7 +28,7 @@ define i32 @and2(i32 %x, i32 %y) {
 }
 
 define i32 @or1(i32 %x, i32 %y) {
-; CHECK: @or1
+; CHECK-LABEL: @or1(
 ; (X | Y) | X -> X | Y
   %l = or i32 %x, %y
   %r = or i32 %l, %x
@@ -37,7 +37,7 @@ define i32 @or1(i32 %x, i32 %y) {
 }
 
 define i32 @or2(i32 %x, i32 %y) {
-; CHECK: @or2
+; CHECK-LABEL: @or2(
 ; X | (X | Y) -> X | Y
   %r = or i32 %x, %y
   %l = or i32 %x, %r
@@ -46,7 +46,7 @@ define i32 @or2(i32 %x, i32 %y) {
 }
 
 define i32 @xor1(i32 %x, i32 %y) {
-; CHECK: @xor1
+; CHECK-LABEL: @xor1(
 ; (X ^ Y) ^ X = Y
   %l = xor i32 %x, %y
   %r = xor i32 %l, %x
@@ -55,7 +55,7 @@ define i32 @xor1(i32 %x, i32 %y) {
 }
 
 define i32 @xor2(i32 %x, i32 %y) {
-; CHECK: @xor2
+; CHECK-LABEL: @xor2(
 ; X ^ (X ^ Y) = Y
   %r = xor i32 %x, %y
   %l = xor i32 %x, %r
@@ -64,7 +64,7 @@ define i32 @xor2(i32 %x, i32 %y) {
 }
 
 define i32 @sub1(i32 %x, i32 %y) {
-; CHECK: @sub1
+; CHECK-LABEL: @sub1(
   %d = sub i32 %x, %y
   %r = sub i32 %x, %d
   ret i32 %r
@@ -72,7 +72,7 @@ define i32 @sub1(i32 %x, i32 %y) {
 }
 
 define i32 @sub2(i32 %x) {
-; CHECK: @sub2
+; CHECK-LABEL: @sub2(
 ; X - (X + 1) -> -1
   %xp1 = add i32 %x, 1
   %r = sub i32 %x, %xp1
@@ -81,7 +81,7 @@ define i32 @sub2(i32 %x) {
 }
 
 define i32 @sub3(i32 %x, i32 %y) {
-; CHECK: @sub3
+; CHECK-LABEL: @sub3(
 ; ((X + 1) + Y) - (Y + 1) -> X
   %xp1 = add i32 %x, 1
   %lhs = add i32 %xp1, %y
@@ -92,7 +92,7 @@ define i32 @sub3(i32 %x, i32 %y) {
 }
 
 define i32 @sdiv1(i32 %x, i32 %y) {
-; CHECK: @sdiv1
+; CHECK-LABEL: @sdiv1(
 ; (no overflow X * Y) / Y -> X
   %mul = mul nsw i32 %x, %y
   %r = sdiv i32 %mul, %y
@@ -101,7 +101,7 @@ define i32 @sdiv1(i32 %x, i32 %y) {
 }
 
 define i32 @sdiv2(i32 %x, i32 %y) {
-; CHECK: @sdiv2
+; CHECK-LABEL: @sdiv2(
 ; (((X / Y) * Y) / Y) -> X / Y
   %div = sdiv i32 %x, %y
   %mul = mul i32 %div, %y
@@ -111,7 +111,7 @@ define i32 @sdiv2(i32 %x, i32 %y) {
 }
 
 define i32 @sdiv3(i32 %x, i32 %y) {
-; CHECK: @sdiv3
+; CHECK-LABEL: @sdiv3(
 ; (X rem Y) / Y -> 0
   %rem = srem i32 %x, %y
   %div = sdiv i32 %rem, %y
@@ -120,7 +120,7 @@ define i32 @sdiv3(i32 %x, i32 %y) {
 }
 
 define i32 @sdiv4(i32 %x, i32 %y) {
-; CHECK: @sdiv4
+; CHECK-LABEL: @sdiv4(
 ; (X / Y) * Y -> X if the division is exact
   %div = sdiv exact i32 %x, %y
   %mul = mul i32 %div, %y
@@ -129,7 +129,7 @@ define i32 @sdiv4(i32 %x, i32 %y) {
 }
 
 define i32 @sdiv5(i32 %x, i32 %y) {
-; CHECK: @sdiv5
+; CHECK-LABEL: @sdiv5(
 ; Y * (X / Y) -> X if the division is exact
   %div = sdiv exact i32 %x, %y
   %mul = mul i32 %y, %div
@@ -139,7 +139,7 @@ define i32 @sdiv5(i32 %x, i32 %y) {
 
 
 define i32 @udiv1(i32 %x, i32 %y) {
-; CHECK: @udiv1
+; CHECK-LABEL: @udiv1(
 ; (no overflow X * Y) / Y -> X
   %mul = mul nuw i32 %x, %y
   %r = udiv i32 %mul, %y
@@ -148,7 +148,7 @@ define i32 @udiv1(i32 %x, i32 %y) {
 }
 
 define i32 @udiv2(i32 %x, i32 %y) {
-; CHECK: @udiv2
+; CHECK-LABEL: @udiv2(
 ; (((X / Y) * Y) / Y) -> X / Y
   %div = udiv i32 %x, %y
   %mul = mul i32 %div, %y
@@ -158,7 +158,7 @@ define i32 @udiv2(i32 %x, i32 %y) {
 }
 
 define i32 @udiv3(i32 %x, i32 %y) {
-; CHECK: @udiv3
+; CHECK-LABEL: @udiv3(
 ; (X rem Y) / Y -> 0
   %rem = urem i32 %x, %y
   %div = udiv i32 %rem, %y
@@ -167,7 +167,7 @@ define i32 @udiv3(i32 %x, i32 %y) {
 }
 
 define i32 @udiv4(i32 %x, i32 %y) {
-; CHECK: @udiv4
+; CHECK-LABEL: @udiv4(
 ; (X / Y) * Y -> X if the division is exact
   %div = udiv exact i32 %x, %y
   %mul = mul i32 %div, %y
@@ -176,7 +176,7 @@ define i32 @udiv4(i32 %x, i32 %y) {
 }
 
 define i32 @udiv5(i32 %x, i32 %y) {
-; CHECK: @udiv5
+; CHECK-LABEL: @udiv5(
 ; Y * (X / Y) -> X if the division is exact
   %div = udiv exact i32 %x, %y
   %mul = mul i32 %y, %div
@@ -185,7 +185,7 @@ define i32 @udiv5(i32 %x, i32 %y) {
 }
 
 define i16 @trunc1(i32 %x) {
-; CHECK: @trunc1
+; CHECK-LABEL: @trunc1(
   %y = add i32 %x, 1
   %tx = trunc i32 %x to i16
   %ty = trunc i32 %y to i16
diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll
index 4c8f87cf5e92..80fa8e7b4831 100644
--- a/test/Transforms/InstSimplify/rem.ll
+++ b/test/Transforms/InstSimplify/rem.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 define i32 @select1(i32 %x, i1 %b) {
-; CHECK: @select1
+; CHECK-LABEL: @select1(
   %rhs = select i1 %b, i32 %x, i32 1
   %rem = srem i32 %x, %rhs
   ret i32 %rem
@@ -9,7 +9,7 @@ define i32 @select1(i32 %x, i1 %b) {
 }
 
 define i32 @select2(i32 %x, i1 %b) {
-; CHECK: @select2
+; CHECK-LABEL: @select2(
   %rhs = select i1 %b, i32 %x, i32 1
   %rem = urem i32 %x, %rhs
   ret i32 %rem
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll b/test/Transforms/Internalize/2008-05-09-AllButMain.ll
deleted file mode 100644
index c07abb0c6365..000000000000
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; No arguments means internalize everything
-; RUN: opt < %s -internalize -S | FileCheck --check-prefix=NOARGS %s
-
-; Internalize all but foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=LIST %s
-
-; Non existent files should be treated as if they were empty (so internalize
-; everything)
-; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=EMPTYFILE %s
-
-; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file  2> /dev/null | FileCheck --check-prefix=LIST2 %s
-
-; -file and -list options should be merged, the .apifile contains foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | FileCheck --check-prefix=MERGE %s
-
-; NOARGS: @i = internal global
-; LIST: @i = internal global
-; EMPTYFILE: @i = internal global
-; LIST2: @i = internal global
-; MERGE: @i = internal global
-@i = global i32 0
-
-; NOARGS: @j = internal global
-; LIST: @j = global
-; EMPTYFILE: @j = internal global
-; LIST2: @j = internal global
-; MERGE: @j = global
-@j = global i32 0
-
-; NOARGS: define internal void @main
-; LIST: define internal void @main
-; EMPTYFILE: define internal void @main
-; LIST2: define internal void @main
-; MERGE: define internal void @main
-define void @main() {
-        ret void
-}
-
-; NOARGS: define internal void @foo
-; LIST: define void @foo
-; EMPTYFILE: define internal void @foo
-; LIST2: define void @foo
-; MERGE: define void @foo
-define void @foo() {
-        ret void
-}
-
-; NOARGS: define internal void @bar
-; LIST: define internal void @bar
-; EMPTYFILE: define internal void @bar
-; LIST2: define void @bar
-; MERGE: define void @bar
-define void @bar() {
-        ret void
-}
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile b/test/Transforms/Internalize/apifile
index f6c58b80c1cd..f6c58b80c1cd 100644
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile
+++ b/test/Transforms/Internalize/apifile
diff --git a/test/Transforms/Internalize/available_externally.ll b/test/Transforms/Internalize/available_externally.ll
deleted file mode 100644
index a2cf23fb3909..000000000000
--- a/test/Transforms/Internalize/available_externally.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -internalize -internalize-public-api-list foo -S | FileCheck %s
-
-; CHECK: define void @foo
-define void @foo() {
-  ret void
-}
-
-; CHECK: define internal void @zed
-define void @zed() {
-  ret void
-}
-
-; CHECK: define available_externally void @bar
-define available_externally void @bar() {
-  ret void
-}
diff --git a/test/Transforms/Internalize/lists.ll b/test/Transforms/Internalize/lists.ll
new file mode 100644
index 000000000000..83e441a2dfe7
--- /dev/null
+++ b/test/Transforms/Internalize/lists.ll
@@ -0,0 +1,50 @@
+; No arguments means internalize everything
+; RUN: opt < %s -internalize -S | FileCheck --check-prefix=ALL %s
+
+; Non existent files should be treated as if they were empty (so internalize
+; everything)
+; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=ALL %s
+
+; Internalize all but foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=FOO_AND_J %s
+
+; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file  2> /dev/null | FileCheck --check-prefix=FOO_AND_BAR %s
+
+; -file and -list options should be merged, the apifile contains foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %S/apifile -S | FileCheck --check-prefix=FOO_J_AND_BAR %s
+
+; ALL: @i = internal global
+; FOO_AND_J: @i = internal global
+; FOO_AND_BAR: @i = internal global
+; FOO_J_AND_BAR: @i = internal global
+@i = global i32 0
+
+; ALL: @j = internal global
+; FOO_AND_J: @j = global
+; FOO_AND_BAR: @j = internal global
+; FOO_J_AND_BAR: @j = global
+@j = global i32 0
+
+; ALL: define internal void @main() {
+; FOO_AND_J: define internal void @main() {
+; FOO_AND_BAR: define internal void @main() {
+; FOO_J_AND_BAR: define internal void @main() {
+define void @main() {
+        ret void
+}
+
+; ALL: define internal void @foo() {
+; FOO_AND_J: define void @foo() {
+; FOO_AND_BAR: define void @foo() {
+; FOO_J_AND_BAR: define void @foo() {
+define void @foo() {
+        ret void
+}
+
+; ALL: define available_externally void @bar() {
+; FOO_AND_J: define available_externally void @bar() {
+; FOO_AND_BAR: define available_externally void @bar() {
+; FOO_J_AND_BAR: define available_externally void @bar() {
+define available_externally void @bar() {
+  ret void
+}
diff --git a/test/Transforms/Internalize/lit.local.cfg b/test/Transforms/Internalize/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/Internalize/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Internalize/used.ll b/test/Transforms/Internalize/used.ll
new file mode 100644
index 000000000000..85b85acd5083
--- /dev/null
+++ b/test/Transforms/Internalize/used.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -internalize -S | FileCheck %s
+
+@llvm.used = appending global [1 x void ()*] [void ()* @f], section "llvm.metadata"
+
+@llvm.compiler.used = appending global [1 x void ()*] [void ()* @g], section "llvm.metadata"
+
+; CHECK: define void @f()
+define void @f() {
+  ret void
+}
+
+; CHECK: define internal void @g()
+define void @g() {
+  ret void
+}
+
+; CHECK: define internal void @h()
+define void @h() {
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index fe3dc77c9c13..32cc4de9285a 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -5,7 +5,7 @@ declare i32 @f2()
 declare void @f3()
 
 define i32 @test1(i1 %cond) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 
 	br i1 %cond, label %T1, label %F1
 
@@ -37,7 +37,7 @@ F2:
 
 ;; cond is known false on Entry -> F1 edge!
 define i32 @test2(i1 %cond) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 Entry:
 	br i1 %cond, label %T1, label %F1
 
@@ -62,7 +62,7 @@ F2:
 
 ; Undef handling.
 define i32 @test3(i1 %cond) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: T1:
 ; CHECK-NEXT: ret i32 42
 	br i1 undef, label %T1, label %F1
@@ -75,7 +75,7 @@ F1:
 }
 
 define i32 @test4(i1 %cond, i1 %cond2) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 
 	br i1 %cond, label %T1, label %F1
 
@@ -108,7 +108,7 @@ F2:
 
 ;; This tests that the branch in 'merge' can be cloned up into T1.
 define i32 @test5(i1 %cond, i1 %cond2) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 
 	br i1 %cond, label %T1, label %F1
 
@@ -144,7 +144,7 @@ F2:
 
 
 define i32 @test6(i32 %A) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 	%tmp455 = icmp eq i32 %A, 42
 	br i1 %tmp455, label %BB1, label %BB2
 
@@ -180,7 +180,7 @@ BB4:
 ;; rdar://7367025
 define i32 @test7(i1 %cond, i1 %cond2) {
 Entry:
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 	%v1 = call i32 @f1()
 	br i1 %cond, label %Merge, label %F1
 
@@ -213,7 +213,7 @@ F2:
 declare i1 @test8a()
 
 define i32 @test8b(i1 %cond, i1 %cond2) {
-; CHECK: @test8b
+; CHECK-LABEL: @test8b(
 T0:
         %A = call i1 @test8a()
 	br i1 %A, label %T1, label %F1
@@ -255,7 +255,7 @@ Y:
 ;;; Verify that we can handle constraint propagation through "xor x, 1".
 define i32 @test9(i1 %cond, i1 %cond2) {
 Entry:
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 	%v1 = call i32 @f1()
 	br i1 %cond, label %Merge, label %F1
 
@@ -298,7 +298,7 @@ declare void @test10f3()
 
 ;; Non-local condition threading.
 define i32 @test10g(i1 %cond) {
-; CHECK: @test10g
+; CHECK-LABEL: @test10g(
 ; CHECK-NEXT:   br i1 %cond, label %T2, label %F2
         br i1 %cond, label %T1, label %F1
 
@@ -329,7 +329,7 @@ F2:
 
 ; Impossible conditional constraints should get threaded.  BB3 is dead here.
 define i32 @test11(i32 %A) {
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NEXT: icmp
 ; CHECK-NEXT: br i1 %tmp455, label %BB4, label %BB2
 	%tmp455 = icmp eq i32 %A, 42
@@ -357,7 +357,7 @@ BB4:
 
 ;; Correlated value through boolean expression.  GCC PR18046.
 define void @test12(i32 %A) {
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 entry:
   %cond = icmp eq i32 %A, 0
   br i1 %cond, label %bb, label %bb1
@@ -393,7 +393,7 @@ return:
 ;; rdar://7391699
 define i32 @test13(i1 %cond, i1 %cond2) {
 Entry:
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 	%v1 = call i32 @f1()
 	br i1 %cond, label %Merge, label %F1
 
@@ -421,7 +421,7 @@ F2:
 ; CHECK-NEXT:   br i1 %N, label %T2, label %F2
 }
 
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 define i32 @test14(i32 %in) {
 entry:
 	%A = icmp eq i32 %in, 0
@@ -453,7 +453,7 @@ right_ret:
 }
 
 ; PR5652
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 define i32 @test15(i32 %len) {
 entry:
 ; CHECK: icmp ult i32 %len, 13
diff --git a/test/Transforms/JumpThreading/indirectbr.ll b/test/Transforms/JumpThreading/indirectbr.ll
index 141277fec626..b87fb6c22287 100644
--- a/test/Transforms/JumpThreading/indirectbr.ll
+++ b/test/Transforms/JumpThreading/indirectbr.ll
@@ -67,7 +67,7 @@ L2:                                               ; preds = %indirectgoto
 ; Don't merge address-taken blocks.
 @.str = private unnamed_addr constant [4 x i8] c"%p\0A\00"
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: __here:
 ; CHECK: blockaddress(@test3, %__here)
 ; CHECK: __here1:
diff --git a/test/Transforms/JumpThreading/landing-pad.ll b/test/Transforms/JumpThreading/landing-pad.ll
new file mode 100644
index 000000000000..9ee0526d0446
--- /dev/null
+++ b/test/Transforms/JumpThreading/landing-pad.ll
@@ -0,0 +1,203 @@
+; RUN: opt < %s -disable-output -jump-threading
+
+%class.E = type { i32 (...)**, %class.C }
+%class.C = type { %class.A }
+%class.A = type { i32 }
+%class.D = type { %class.F }
+%class.F = type { %class.E }
+%class.B = type { %class.D* }
+
+@_ZTV1D = unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1D to i8*), i8* bitcast (void (%class.D*)* @_ZN1D7doApplyEv to i8*)]
+@_ZTI1D = external unnamed_addr constant { i8*, i8*, i8* }
+
+define void @_ZN15EditCommandImpl5applyEv(%class.E* %this) uwtable align 2 {
+entry:
+  %0 = bitcast %class.E* %this to void (%class.E*)***
+  %vtable = load void (%class.E*)*** %0, align 8
+  %1 = load void (%class.E*)** %vtable, align 8
+  call void %1(%class.E* %this)
+  ret void
+}
+
+define void @_ZN1DC1Ev(%class.D* nocapture %this) unnamed_addr uwtable align 2 {
+entry:
+  call void @_ZN24CompositeEditCommandImplC2Ev()
+  %0 = getelementptr inbounds %class.D* %this, i64 0, i32 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+}
+
+define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr uwtable align 2 {
+entry:
+  call void @_ZN24CompositeEditCommandImplC2Ev()
+  %0 = getelementptr inbounds %class.D* %this, i64 0, i32 0, i32 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+}
+
+declare void @_ZN24CompositeEditCommandImplC2Ev() #1
+
+define void @_ZN1D7doApplyEv(%class.D* nocapture %this) unnamed_addr nounwind readnone uwtable align 2 {
+entry:
+  ret void
+}
+
+define void @_Z3fn1v() uwtable {
+entry:
+  %call = call noalias i8* @_Znwm() #8
+  invoke void @_ZN24CompositeEditCommandImplC2Ev()
+          to label %_ZN1DC1Ev.exit unwind label %lpad
+
+_ZN1DC1Ev.exit:                                   ; preds = %entry
+  %0 = bitcast i8* %call to i32 (...)***
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  %_ref.i.i.i = getelementptr inbounds i8* %call, i64 8
+  %1 = bitcast i8* %_ref.i.i.i to i32*
+  %2 = load i32* %1, align 4
+  %inc.i.i.i = add nsw i32 %2, 1
+  store i32 %inc.i.i.i, i32* %1, align 4
+  %3 = bitcast i8* %call to %class.D*
+  invoke void @_ZN1D7doApplyEv(%class.D* %3)
+          to label %_ZN15EditCommandImpl5applyEv.exit unwind label %lpad1
+
+_ZN15EditCommandImpl5applyEv.exit:                ; preds = %_ZN1DC1Ev.exit
+  invoke void @_ZN1D16deleteKeyPressedEv()
+          to label %invoke.cont7 unwind label %lpad1
+
+invoke.cont7:                                     ; preds = %_ZN15EditCommandImpl5applyEv.exit
+  ret void
+
+lpad:                                             ; preds = %entry
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  call void @_ZdlPv() #9
+  unreachable
+
+lpad1:                                            ; preds = %_ZN1DC1Ev.exit, %_ZN15EditCommandImpl5applyEv.exit
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %6 = load i32* %1, align 4
+  %tobool.i.i.i = icmp eq i32 %6, 0
+  br i1 %tobool.i.i.i, label %_ZN1BI1DED1Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %lpad1
+  br i1 undef, label %_ZN1BI1DED1Ev.exit, label %delete.notnull.i.i.i
+
+delete.notnull.i.i.i:                             ; preds = %if.then.i.i.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1BI1DED1Ev.exit:                               ; preds = %lpad1, %if.then.i.i.i
+  resume { i8*, i32 } undef
+
+terminate.lpad:                                   ; No predecessors!
+  %7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+define void @_ZN1BI1DEC1EPS0_(%class.B* nocapture %this, %class.D* %p1) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr.i = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  store %class.D* %p1, %class.D** %m_ptr.i, align 8
+  %_ref.i.i = getelementptr inbounds %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %0 = load i32* %_ref.i.i, align 4
+  %inc.i.i = add nsw i32 %0, 1
+  store i32 %inc.i.i, i32* %_ref.i.i, align 4
+  ret void
+}
+
+declare noalias i8* @_Znwm()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv()
+
+define %class.D* @_ZN1BI1DEptEv(%class.B* nocapture readonly %this) nounwind readonly uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr, align 8
+  ret %class.D* %0
+}
+
+declare void @_ZN1D16deleteKeyPressedEv()
+
+define void @_ZN1BI1DED1Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr.i = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr.i, align 8
+  %_ref.i.i = getelementptr inbounds %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %1 = load i32* %_ref.i.i, align 4
+  %tobool.i.i = icmp eq i32 %1, 0
+  br i1 %tobool.i.i, label %_ZN1BI1DED2Ev.exit, label %if.then.i.i
+
+if.then.i.i:                                      ; preds = %entry
+  br i1 undef, label %_ZN1BI1DED2Ev.exit, label %delete.notnull.i.i
+
+delete.notnull.i.i:                               ; preds = %if.then.i.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1BI1DED2Ev.exit:                               ; preds = %entry, %if.then.i.i
+  ret void
+}
+
+declare hidden void @__clang_call_terminate()
+
+define void @_ZN1BI1DED2Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  %0 = load %class.D** %m_ptr, align 8
+  %_ref.i = getelementptr inbounds %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %1 = load i32* %_ref.i, align 4
+  %tobool.i = icmp eq i32 %1, 0
+  br i1 %tobool.i, label %_ZN1AI1CE5derefEv.exit, label %if.then.i
+
+if.then.i:                                        ; preds = %entry
+  br i1 undef, label %_ZN1AI1CE5derefEv.exit, label %delete.notnull.i
+
+delete.notnull.i:                                 ; preds = %if.then.i
+  call void @_ZdlPv() #9
+  unreachable
+
+_ZN1AI1CE5derefEv.exit:                           ; preds = %entry, %if.then.i
+  ret void
+}
+
+define void @_ZN1AI1CE5derefEv(%class.A* nocapture readonly %this) nounwind uwtable align 2 {
+entry:
+  %_ref = getelementptr inbounds %class.A* %this, i64 0, i32 0
+  %0 = load i32* %_ref, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  call void @_ZdlPv() #9
+  unreachable
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+define void @_ZN1BI1DEC2EPS0_(%class.B* nocapture %this, %class.D* %p1) unnamed_addr uwtable align 2 {
+entry:
+  %m_ptr = getelementptr inbounds %class.B* %this, i64 0, i32 0
+  store %class.D* %p1, %class.D** %m_ptr, align 8
+  %_ref.i = getelementptr inbounds %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  %0 = load i32* %_ref.i, align 4
+  %inc.i = add nsw i32 %0, 1
+  store i32 %inc.i, i32* %_ref.i, align 4
+  ret void
+}
+
+define void @_ZN1AI1CE3refEv(%class.A* nocapture %this) nounwind uwtable align 2 {
+entry:
+  %_ref = getelementptr inbounds %class.A* %this, i64 0, i32 0
+  %0 = load i32* %_ref, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %_ref, align 4
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/lit.local.cfg b/test/Transforms/JumpThreading/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/JumpThreading/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 9676efec9df2..201e604e0c5e 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -10,7 +10,7 @@ declare void @quux()
 ; Mostly theoretical since instruction combining simplifies all selects of
 ; booleans where at least one operand is true/false/undef.
 
-; CHECK: @test_br
+; CHECK-LABEL: @test_br(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br i1 %cond, label %L1,
 define void @test_br(i1 %cond, i1 %value) nounwind {
@@ -34,7 +34,7 @@ L3:
 
 ; Jump threading of switch with select as condition.
 
-; CHECK: @test_switch
+; CHECK-LABEL: @test_switch(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br i1 %cond, label %L1,
 define void @test_switch(i1 %cond, i8 %value) nounwind {
@@ -69,7 +69,7 @@ L4:
 
 ; Jump threading of indirectbr with select as address.
 
-; CHECK: @test_indirectbr
+; CHECK-LABEL: @test_indirectbr(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br i1 %cond, label %L1, label %L3
 define void @test_indirectbr(i1 %cond, i8* %address) nounwind {
@@ -93,7 +93,7 @@ L3:
 
 ; A more complicated case: the condition is a select based on a comparison.
 
-; CHECK: @test_switch_cmp
+; CHECK-LABEL: @test_switch_cmp(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br i1 %cond, label %L0, label %[[THREADED:[A-Za-z.0-9]+]]
 ; CHECK: [[THREADED]]:
@@ -157,3 +157,66 @@ L3:
 L4:
   ret void
 }
+
+define void @unfold1(double %x, double %y) nounwind {
+entry:
+  %sub = fsub double %x, %y
+  %cmp = fcmp ogt double %sub, 1.000000e+01
+  br i1 %cmp, label %cond.end4, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %add = fadd double %x, %y
+  %cmp1 = fcmp ogt double %add, 1.000000e+01
+  %add. = select i1 %cmp1, double %add, double 0.000000e+00
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %entry, %cond.false
+  %cond5 = phi double [ %add., %cond.false ], [ %sub, %entry ]
+  %cmp6 = fcmp oeq double %cond5, 0.000000e+00
+  br i1 %cmp6, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.end4
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.end4
+  ret void
+
+; CHECK-LABEL: @unfold1
+; CHECK: br i1 %cmp, label %cond.end4, label %cond.false
+; CHECK: br i1 %cmp1, label %cond.end4, label %if.then
+; CHECK: br i1 %cmp6, label %if.then, label %if.end
+; CHECK: br label %if.end
+}
+
+
+define void @unfold2(i32 %x, i32 %y) nounwind {
+entry:
+  %sub = sub nsw i32 %x, %y
+  %cmp = icmp sgt i32 %sub, 10
+  br i1 %cmp, label %cond.end4, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %add = add nsw i32 %x, %y
+  %cmp1 = icmp sgt i32 %add, 10
+  %add. = select i1 %cmp1, i32 0, i32 %add
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %entry, %cond.false
+  %cond5 = phi i32 [ %add., %cond.false ], [ %sub, %entry ]
+  %cmp6 = icmp eq i32 %cond5, 0
+  br i1 %cmp6, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.end4
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.end4
+  ret void
+
+; CHECK-LABEL: @unfold2
+; CHECK: br i1 %cmp, label %if.end, label %cond.false
+; CHECK: br i1 %cmp1, label %if.then, label %cond.end4
+; CHECK: br i1 %cmp6, label %if.then, label %if.end
+; CHECK: br label %if.end
+}
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index 78d36e7053c9..e5bf64b9e256 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -6,7 +6,7 @@ target triple = "i386-apple-darwin7"
 ; Test that we can thread through the block with the partially redundant load (%2).
 ; rdar://6402033
 define i32 @test1(i32* %P) nounwind {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
 	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
@@ -45,7 +45,7 @@ declare i32 @f2(...)
 ; rdar://11039258
 
 define i32 @test2(i32* %P) nounwind {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 entry:
 	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
@@ -75,7 +75,7 @@ bb3:		; preds = %bb1
 	ret i32 %res.0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !3, metadata !3, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LCSSA/lit.local.cfg b/test/Transforms/LCSSA/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LCSSA/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
index 86c2679b076a..7cf7a323552d 100644
--- a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
+++ b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
@@ -30,8 +30,10 @@ for.end:                                          ; preds = %for.inc
   ret void
 }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
+!0 = metadata !{metadata !5, metadata !5, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !6, metadata !6, i64 0}
+!5 = metadata !{metadata !"any pointer", metadata !1}
+!6 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LICM/atomics.ll b/test/Transforms/LICM/atomics.ll
index 3902152ba2e5..acf605d2dad8 100644
--- a/test/Transforms/LICM/atomics.ll
+++ b/test/Transforms/LICM/atomics.ll
@@ -14,7 +14,7 @@ loop:
 
 end:
   ret i32 %val
-; CHECK: define i32 @test1(
+; CHECK-LABEL: define i32 @test1(
 ; CHECK: load atomic
 ; CHECK-NEXT: br label %loop
 }
@@ -33,7 +33,7 @@ loop:
 
 end:
   ret i32 %val
-; CHECK: define i32 @test2(
+; CHECK-LABEL: define i32 @test2(
 ; CHECK: load atomic
 ; CHECK-NEXT: %exitcond = icmp ne
 ; CHECK-NEXT: br i1 %exitcond, label %end, label %loop
@@ -54,7 +54,7 @@ loop:
 
 end:
   ret i32 %vala
-; CHECK: define i32 @test3(
+; CHECK-LABEL: define i32 @test3(
 ; CHECK: load atomic i32* %x unordered
 ; CHECK-NEXT: br label %loop
 }
@@ -73,7 +73,7 @@ loop:
 
 end:
   ret i32 %vala
-; CHECK: define i32 @test4(
+; CHECK-LABEL: define i32 @test4(
 ; CHECK: load atomic i32* %y monotonic
 ; CHECK-NEXT: store atomic
 }
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index 889d4e2e3af9..e5c774ff8e9d 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -33,30 +33,33 @@ for.end104:                                       ; preds = %for.cond.backedge
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!26}
 !llvm.dbg.sp = !{!0, !6, !9, !10}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"idamax", metadata !"idamax", metadata !"", metadata !1, i32 112, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 127169)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"idamax", metadata !"idamax", metadata !"", i32 112, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !25} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !25, i32 12, metadata !"clang version 2.9 (trunk 127169)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !8, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"dscal", metadata !"dscal", metadata !"", metadata !1, i32 206, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dscal", metadata !"dscal", metadata !"", i32 206, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
-!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"daxpy", metadata !"daxpy", metadata !"", metadata !1, i32 230, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", metadata !1, i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"daxpy", metadata !"daxpy", metadata !"", i32 230, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 267] [def] [scope 0] [dgefa]
 !11 = metadata !{i32 281, i32 9, metadata !12, null}
-!12 = metadata !{i32 589835, metadata !13, i32 272, i32 5, metadata !1, i32 32} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589835, metadata !14, i32 271, i32 5, metadata !1, i32 31} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 589835, metadata !10, i32 267, i32 1, metadata !1, i32 30} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 589835, metadata !25, metadata !13, i32 272, i32 5, i32 32} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589835, metadata !25, metadata !14, i32 271, i32 5, i32 31} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 589835, metadata !25, metadata !10, i32 267, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 271, i32 5, metadata !14, null}
 !16 = metadata !{i32 284, i32 10, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !12, i32 282, i32 9, metadata !1, i32 33} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 589835, metadata !25, metadata !12, i32 282, i32 9, i32 33} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{double undef}
 !19 = metadata !{i32 590080, metadata !14, metadata !"temp", metadata !1, i32 268, metadata !20, i32 0} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 589860, metadata !2, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!20 = metadata !{i32 589860, null, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !21 = metadata !{i32 286, i32 14, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !17, i32 285, i32 13, metadata !1, i32 34} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 589835, metadata !25, metadata !17, i32 285, i32 13, i32 34} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 296, i32 13, metadata !17, null}
 !24 = metadata !{i32 313, i32 1, metadata !14, null}
+!25 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 1ca377eb4a99..b4d297ac27bf 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -7,7 +7,7 @@ declare void @foo()
 ; This testcase tests for a problem where LICM hoists 
 ; potentially trapping instructions when they are not guaranteed to execute.
 define i32 @test1(i1 %c) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 	%A = load i32* @X		; <i32> [#uses=2]
 	br label %Loop
 Loop:		; preds = %LoopTail, %0
@@ -34,7 +34,7 @@ declare void @foo2(i32) nounwind
 
 ;; It is ok and desirable to hoist this potentially trapping instruction.
 define i32 @test2(i1 %c) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: load i32* @X
 ; CHECK-NEXT: %B = sdiv i32 4, %A
 	%A = load i32* @X		; <i32> [#uses=2]
@@ -52,7 +52,7 @@ Out:		; preds = %Loop
 
 ; This loop invariant instruction should be constant folded, not hoisted.
 define i32 @test3(i1 %c) {
-; CHECK: define i32 @test3
+; CHECK-LABEL: define i32 @test3(
 ; CHECK: call void @foo2(i32 6)
 	%A = load i32* @X		; <i32> [#uses=2]
 	br label %Loop
@@ -65,7 +65,7 @@ Out:		; preds = %Loop
 	ret i32 %C
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: call
 ; CHECK: sdiv
 ; CHECK: ret
@@ -91,7 +91,7 @@ for.end:                                          ; preds = %for.body
 declare void @foo_may_call_exit(i32)
 
 ; PR14854
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: extractvalue
 ; CHECK: br label %tailrecurse
 ; CHECK: tailrecurse:
diff --git a/test/Transforms/LICM/lit.local.cfg b/test/Transforms/LICM/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LICM/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll
index b016265bbb03..86f11fe04435 100644
--- a/test/Transforms/LICM/promote-order.ll
+++ b/test/Transforms/LICM/promote-order.ll
@@ -37,5 +37,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 }
 
 !0 = metadata !{metadata !"minimal TBAA"}
-!1 = metadata !{metadata !"float", metadata !0}
-!2 = metadata !{metadata !"int", metadata !0}
+!1 = metadata !{metadata !3, metadata !3, i64 0}
+!2 = metadata !{metadata !4, metadata !4, i64 0}
+!3 = metadata !{metadata !"float", metadata !0}
+!4 = metadata !{metadata !"int", metadata !0}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index e7eab92aa8d7..92ef15581ce0 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -6,7 +6,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 define void @test1(i32 %i) {
 Entry:
   br label %Loop
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
@@ -32,7 +32,7 @@ Out:
 define void @test2(i32 %i) {
 Entry:
   br label %Loop
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: Entry:
 ; CHECK-NEXT:    %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:    br label %Loop
@@ -55,7 +55,7 @@ Exit:   ; preds = %Loop
 
 
 define void @test3(i32 %i) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
   br label %Loop
 Loop:
         ; Should not promote this to a register
@@ -73,7 +73,7 @@ Out:    ; preds = %Loop
 
 ; PR8041
 define void @test4(i8* %x, i8 %n) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
   %handle1 = alloca i8*
   %handle2 = alloca i8*
   store i8* %x, i8** %handle1
@@ -121,7 +121,7 @@ exit:
 define void @test5(i32 %i, i32** noalias %P2) {
 Entry:
   br label %Loop
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
@@ -181,7 +181,9 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; CHECK-NEXT:  store i32 %inc, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
+!0 = metadata !{metadata !4, metadata !4, i64 0}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"float", metadata !1}
+!3 = metadata !{metadata !5, metadata !5, i64 0}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll
index 68e4b64bf9bf..b503f96e42c0 100644
--- a/test/Transforms/LICM/sinking.ll
+++ b/test/Transforms/LICM/sinking.ll
@@ -14,7 +14,7 @@ Loop:		; preds = %Loop, %0
 
 Out:		; preds = %Loop
 	ret i32 %A
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: Out:
 ; CHECK-NEXT: call i32 @strlen
 ; CHECK-NEXT: ret i32 %A
@@ -33,7 +33,7 @@ Loop:		; preds = %Loop, %0
 
 Out:		; preds = %Loop
 	ret double %A
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: Out:
 ; CHECK-NEXT: call double @sin
 ; CHECK-NEXT: ret double %A
@@ -51,7 +51,7 @@ Exit:
 	%Y = phi i32 [ 0, %Entry ], [ %X, %Loop ]
 	ret void
         
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK:     Exit.loopexit:
 ; CHECK-NEXT:  %X = add i32 0, 1
 ; CHECK-NEXT:  br label %Exit
@@ -74,7 +74,7 @@ Loop:		; preds = %Loop, %Entry
 	br i1 %tmp.1, label %Loop, label %Out
 Out:		; preds = %Loop
 	ret i32 %tmp.7
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK:     Out:
 ; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
 ; CHECK-NEXT:  sub i32 %tmp.6, %N
@@ -98,7 +98,7 @@ Loop:		; preds = %Loop, %Entry
 	br i1 %tmp.1, label %Loop, label %Out
 Out:		; preds = %Loop
 	ret i32 %tmp.6
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK:     Out:
 ; CHECK-NEXT:  %tmp.6 = load i32* @X
 ; CHECK-NEXT:  ret i32 %tmp.6
@@ -122,7 +122,7 @@ Loop:
 	br i1 false, label %Loop, label %Out
 Out:		; preds = %Loop
 	ret i32 %sunk2
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK:     Out:
 ; CHECK-NEXT:  %dead = getelementptr %Ty* @X2, i64 0, i32 0
 ; CHECK-NEXT:  %sunk2 = load i32* %dead
@@ -150,7 +150,7 @@ Out1:		; preds = %Loop
 	ret i32 %tmp.7
 Out2:		; preds = %ContLoop
 	ret i32 %tmp.7
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK:     Out1:
 ; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
 ; CHECK-NEXT:  sub i32 %tmp.6, %N
@@ -179,7 +179,7 @@ exit1:		; preds = %Loop
 	ret i32 0
 exit2:		; preds = %Cont
 	ret i32 %V
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK:     exit1:
 ; CHECK-NEXT:  ret i32 0
 ; CHECK:     exit2:
@@ -206,7 +206,7 @@ loopentry.3.i.preheader:		; preds = %loopentry.3.i.preheader.loopexit, %loopentr
 return.i:		; preds = %no_exit.1.i
 	ret void
 
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: loopentry.3.i.preheader.loopexit:
 ; CHECK-NEXT:  %inc.1.i = add i32 0, 1
 ; CHECK-NEXT:  br label %loopentry.3.i.preheader
@@ -227,7 +227,7 @@ Loop:		; preds = %Loop, %Entry
 Out:		; preds = %Loop
 	ret i32 %tmp.6
         
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: Out: 
 ; CHECK-NEXT:  %tmp.6 = sdiv i32 %N, %N_addr.0.pn
 ; CHECK-NEXT:  ret i32 %tmp.6
@@ -241,7 +241,7 @@ Loop:
 	br i1 false, label %Loop, label %Out
 Out:
 	ret void
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK:     Out:
 ; CHECK-NEXT:  ret void
 }
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
index 4c4d036b7dbf..4244f157d9f8 100644
--- a/test/Transforms/LICM/speculate.ll
+++ b/test/Transforms/LICM/speculate.ll
@@ -2,7 +2,7 @@
 
 ; UDiv is safe to speculate if the denominator is known non-zero.
 
-; CHECK: @safe_udiv
+; CHECK-LABEL: @safe_udiv(
 ; CHECK:      %div = udiv i64 %x, %or
 ; CHECK-NEXT: br label %for.body
 
@@ -35,7 +35,7 @@ for.end:                                          ; preds = %for.inc, %entry
 
 ; UDiv is unsafe to speculate if the denominator is not known non-zero.
 
-; CHECK: @unsafe_udiv
+; CHECK-LABEL: @unsafe_udiv(
 ; CHECK-NOT:  udiv
 ; CHECK: for.body:
 
@@ -68,7 +68,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ; SDiv is safe to speculate if the denominator is known non-zero and
 ; known to have at least one zero bit.
 
-; CHECK: @safe_sdiv
+; CHECK-LABEL: @safe_sdiv(
 ; CHECK:      %div = sdiv i64 %x, %or
 ; CHECK-NEXT: br label %for.body
 
@@ -102,7 +102,7 @@ for.end:                                          ; preds = %for.inc, %entry
 
 ; SDiv is unsafe to speculate if the denominator is not known non-zero.
 
-; CHECK: @unsafe_sdiv_a
+; CHECK-LABEL: @unsafe_sdiv_a(
 ; CHECK-NOT:  sdiv
 ; CHECK: for.body:
 
@@ -135,7 +135,7 @@ for.end:                                          ; preds = %for.inc, %entry
 
 ; SDiv is unsafe to speculate if the denominator is not known to have a zero bit.
 
-; CHECK: @unsafe_sdiv_b
+; CHECK-LABEL: @unsafe_sdiv_b(
 ; CHECK-NOT:  sdiv
 ; CHECK: for.body:
 
diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll
new file mode 100644
index 000000000000..886d7f2f8074
--- /dev/null
+++ b/test/Transforms/LICM/volatile-alias.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s
+; The objects *p and *q are aliased to each other, but even though *q is
+; volatile, *p can be considered invariant in the loop. Check if it is moved
+; out of the loop.
+; CHECK: load i32* %p
+; CHECK: for.body:
+; CHECK; load volatile i32* %q
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32* %p, i32* %q, i32 %n) #0 {
+entry:
+  %p.addr = alloca i32*, align 8
+  %q.addr = alloca i32*, align 8
+  %n.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %s = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 8
+  store i32* %q, i32** %q.addr, align 8
+  store i32 %n, i32* %n.addr, align 4
+  store i32 0, i32* %s, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %1 = load i32* %n.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32** %p.addr, align 8
+  %3 = load i32* %2, align 4
+  %4 = load i32** %q.addr, align 8
+  %5 = load volatile i32* %4, align 4
+  %add = add nsw i32 %3, %5
+  %6 = load i32* %s, align 4
+  %add1 = add nsw i32 %6, %add
+  store i32 %add1, i32* %s, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %s, align 4
+  ret i32 %8
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopDeletion/lit.local.cfg b/test/Transforms/LoopDeletion/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopDeletion/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
index a8ad0f1a28b2..ba763cf03ffc 100644
--- a/test/Transforms/LoopIdiom/X86/lit.local.cfg
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopIdiom/basic-address-space.ll b/test/Transforms/LoopIdiom/basic-address-space.ll
new file mode 100644
index 000000000000..697ab3726807
--- /dev/null
+++ b/test/Transforms/LoopIdiom/basic-address-space.ll
@@ -0,0 +1,91 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Two dimensional nested loop should be promoted to one big memset.
+define void @test10(i8 addrspace(2)* %X) nounwind ssp {
+; CHECK-LABEL: @test10(
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* %X, i8 0, i16 10000, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry, %for.inc10
+  %i.04 = phi i16 [ 0, %entry ], [ %inc12, %for.inc10 ]
+  br label %for.body5
+
+for.body5:                                        ; preds = %for.body5, %bb.nph
+  %j.02 = phi i16 [ 0, %bb.nph ], [ %inc, %for.body5 ]
+  %mul = mul nsw i16 %i.04, 100
+  %add = add nsw i16 %j.02, %mul
+  %arrayidx = getelementptr inbounds i8 addrspace(2)* %X, i16 %add
+  store i8 0, i8 addrspace(2)* %arrayidx, align 1
+  %inc = add nsw i16 %j.02, 1
+  %cmp4 = icmp eq i16 %inc, 100
+  br i1 %cmp4, label %for.inc10, label %for.body5
+
+for.inc10:                                        ; preds = %for.body5
+  %inc12 = add nsw i16 %i.04, 1
+  %cmp = icmp eq i16 %inc12, 100
+  br i1 %cmp, label %for.end13, label %bb.nph
+
+for.end13:                                        ; preds = %for.inc10
+  ret void
+}
+
+define void @test11_pattern(i32 addrspace(2)* nocapture %P) nounwind ssp {
+; CHECK-LABEL: @test11_pattern(
+; CHECK-NOT: memset_pattern
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32 addrspace(2)* %P, i64 %indvar
+  store i32 1, i32 addrspace(2)* %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; PR9815 - This is a partial overlap case that cannot be safely transformed
+; into a memcpy.
+@g_50 = addrspace(2) global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
+
+
+define i32 @test14() nounwind {
+; CHECK-LABEL: @test14(
+; CHECK: for.body:
+; CHECK: load i32
+; CHECK: store i32
+; CHECK: br i1 %cmp
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %add = add nsw i32 %tmp5, 4
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom
+  %tmp2 = load i32 addrspace(2)* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, 5
+  %idxprom5 = sext i32 %add4 to i64
+  %arrayidx6 = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom5
+  store i32 %tmp2, i32 addrspace(2)* %arrayidx6, align 4
+  %inc = add nsw i32 %tmp5, 1
+  %cmp = icmp slt i32 %inc, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc
+  %tmp8 = load i32 addrspace(2)* getelementptr inbounds ([7 x i32] addrspace(2)* @g_50, i32 0, i64 6), align 4
+  ret i32 %tmp8
+}
+
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index 06a5bd90864d..835a9f695ca9 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -16,7 +16,7 @@ for.body:                                         ; preds = %bb.nph, %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 }
@@ -39,7 +39,7 @@ for.body.cont:
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test1a
+; CHECK-LABEL: @test1a(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 }
@@ -60,7 +60,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: br i1 %cmp10,
 ; CHECK: %0 = mul i64 %Size, 4
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
@@ -85,7 +85,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 for.end:                                          ; preds = %entry
   ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: memset
 ; CHECK: ret void
 }
@@ -111,7 +111,7 @@ for.body:                                         ; preds = %bb.nph, %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK-TODO: @test4
+; CHECK-TODO-LABEL: @test4(
 ; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
 ; CHECK-TODO-NOT: store
 }
@@ -133,7 +133,7 @@ for.body:                                         ; preds = %bb.nph, %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NOT: memset
 ; CHECK: ret void
 }
@@ -158,7 +158,7 @@ for.body:                                         ; preds = %bb.nph, %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
@@ -183,7 +183,7 @@ for.body.cont:
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 }
@@ -203,7 +203,7 @@ for.body:                                         ; preds = %bb.nph, %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: store i64 0, i64* %PI
 }
 
@@ -235,7 +235,7 @@ for.body:                                         ; preds = %bb.nph, %for.body
 
 for.end:                                          ; preds = %for.body, %entry
   ret void
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NOT: llvm.memcpy
 ; CHECK: ret void
 }
@@ -267,7 +267,7 @@ for.inc10:                                        ; preds = %for.body5
 
 for.end13:                                        ; preds = %for.inc10
   ret void
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: entry:
 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
 ; CHECK-NOT: store
@@ -291,7 +291,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 for.end:                                          ; preds = %for.body
   ret void
-; CHECK: @test11_pattern
+; CHECK-LABEL: @test11_pattern(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: memset_pattern
@@ -314,7 +314,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 for.end:                                          ; preds = %for.body
   ret void
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
@@ -340,7 +340,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 for.end:                                          ; preds = %for.body
   ret void
-; CHECK: @test13_pattern
+; CHECK-LABEL: @test13_pattern(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: memset_pattern
@@ -375,7 +375,7 @@ for.body:                                         ; preds = %for.inc, %for.body.
 for.end:                                          ; preds = %for.inc
   %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
   ret i32 %tmp8
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK: for.body:
 ; CHECK: load i32
 ; CHECK: store i32
@@ -389,7 +389,7 @@ define void @PR14241(i32* %s, i64 %size) {
 ; instead of a memmove. If we get the memmove transform back, this will catch
 ; regressions.
 ;
-; CHECK: @PR14241
+; CHECK-LABEL: @PR14241(
 
 entry:
   %end.idx = add i64 %size, -1
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index d31662d57e98..ef4a478d0e85 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -27,23 +27,26 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!19}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (double*)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"li.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"li.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (double*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777218, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!6 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 589860, null, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 2, i32 18, metadata !0, null}
 !9 = metadata !{i32 0}
 !10 = metadata !{i32 590080, metadata !11, metadata !"i", metadata !1, i32 3, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !12, i32 3, i32 3, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 589835, metadata !0, i32 2, i32 21, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 589835, metadata !18, metadata !12, i32 3, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 589835, metadata !18, metadata !0, i32 2, i32 21, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 3, i32 3, metadata !12, null}
 !15 = metadata !{i32 4, i32 5, metadata !11, null}
 !16 = metadata !{i32 3, i32 29, metadata !11, null}
 !17 = metadata !{i32 5, i32 1, metadata !12, null}
+!18 = metadata !{metadata !"li.c", metadata !"/private/tmp"}
+!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopIdiom/lit.local.cfg b/test/Transforms/LoopIdiom/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopIdiom/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/memset_noidiom.ll b/test/Transforms/LoopIdiom/memset_noidiom.ll
index 168eb95357c4..f2b55aed5467 100644
--- a/test/Transforms/LoopIdiom/memset_noidiom.ll
+++ b/test/Transforms/LoopIdiom/memset_noidiom.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; CHECK: @memset
+; CHECK-LABEL: @memset(
 ; CHECK-NOT: llvm.memset
 define i8* @memset(i8* %b, i32 %c, i64 %len) nounwind uwtable ssp {
 entry:
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
new file mode 100644
index 000000000000..314a14947e3e
--- /dev/null
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -0,0 +1,327 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; int foo(int a);
+; void bar(int *x) {
+;   for (int i = 0; i < 500; i += 3) {
+;     foo(i);
+;     foo(i+1);
+;     foo(i+2);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @bar(i32* nocapture readnone %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @foo(i32 %i.08) #1
+  %add = add nsw i32 %i.08, 1
+  %call1 = tail call i32 @foo(i32 %add) #1
+  %add2 = add nsw i32 %i.08, 2
+  %call3 = tail call i32 @foo(i32 %add2) #1
+  %add3 = add nsw i32 %i.08, 3
+  %exitcond = icmp eq i32 %add3, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @bar
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond1 = icmp eq i32 %indvar.next, 498
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i32 @foo(i32)
+
+; void hi1(int *x) {
+;   for (int i = 0; i < 1500; i += 3) {
+;     x[i] = foo(0);
+;     x[i+1] = foo(0);
+;     x[i+2] = foo(0);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hi1(i32* nocapture %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %call = tail call i32 @foo(i32 0) #1
+  %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+  store i32 %call, i32* %arrayidx, align 4
+  %call1 = tail call i32 @foo(i32 0) #1
+  %0 = add nsw i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds i32* %x, i64 %0
+  store i32 %call1, i32* %arrayidx3, align 4
+  %call4 = tail call i32 @foo(i32 0) #1
+  %1 = add nsw i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds i32* %x, i64 %1
+  store i32 %call4, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
+  %2 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %2, 1500
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @hi1
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %call = tail call i32 @foo(i32 0) #1
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
+; CHECK: store i32 %call, i32* %arrayidx, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 1500
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void hi2(int *x) {
+;   for (int i = 0; i < 500; ++i) {
+;     x[3*i] = foo(0);
+;     x[3*i+1] = foo(0);
+;     x[3*i+2] = foo(0);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hi2(i32* nocapture %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %call = tail call i32 @foo(i32 0) #1
+  %0 = mul nsw i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32* %x, i64 %0
+  store i32 %call, i32* %arrayidx, align 4
+  %call1 = tail call i32 @foo(i32 0) #1
+  %1 = add nsw i64 %0, 1
+  %arrayidx4 = getelementptr inbounds i32* %x, i64 %1
+  store i32 %call1, i32* %arrayidx4, align 4
+  %call5 = tail call i32 @foo(i32 0) #1
+  %2 = add nsw i64 %0, 2
+  %arrayidx9 = getelementptr inbounds i32* %x, i64 %2
+  store i32 %call5, i32* %arrayidx9, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @hi2
+
+; CHECK: for.body:
+; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: %call = tail call i32 @foo(i32 0) #1
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+; CHECK: store i32 %call, i32* %arrayidx, align 4
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %exitcond1 = icmp eq i64 %indvars.iv.next, 1500
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void goo(float alpha, float *a, float *b) {
+;   for (int i = 0; i < 3200; i += 5) {
+;     a[i] += alpha * b[i];
+;     a[i + 1] += alpha * b[i + 1];
+;     a[i + 2] += alpha * b[i + 2];
+;     a[i + 3] += alpha * b[i + 3];
+;     a[i + 4] += alpha * b[i + 4];
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @goo(float %alpha, float* nocapture %a, float* nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %mul = fmul float %0, %alpha
+  %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %add = fadd float %1, %mul
+  store float %add, float* %arrayidx2, align 4
+  %2 = add nsw i64 %indvars.iv, 1
+  %arrayidx5 = getelementptr inbounds float* %b, i64 %2
+  %3 = load float* %arrayidx5, align 4
+  %mul6 = fmul float %3, %alpha
+  %arrayidx9 = getelementptr inbounds float* %a, i64 %2
+  %4 = load float* %arrayidx9, align 4
+  %add10 = fadd float %4, %mul6
+  store float %add10, float* %arrayidx9, align 4
+  %5 = add nsw i64 %indvars.iv, 2
+  %arrayidx13 = getelementptr inbounds float* %b, i64 %5
+  %6 = load float* %arrayidx13, align 4
+  %mul14 = fmul float %6, %alpha
+  %arrayidx17 = getelementptr inbounds float* %a, i64 %5
+  %7 = load float* %arrayidx17, align 4
+  %add18 = fadd float %7, %mul14
+  store float %add18, float* %arrayidx17, align 4
+  %8 = add nsw i64 %indvars.iv, 3
+  %arrayidx21 = getelementptr inbounds float* %b, i64 %8
+  %9 = load float* %arrayidx21, align 4
+  %mul22 = fmul float %9, %alpha
+  %arrayidx25 = getelementptr inbounds float* %a, i64 %8
+  %10 = load float* %arrayidx25, align 4
+  %add26 = fadd float %10, %mul22
+  store float %add26, float* %arrayidx25, align 4
+  %11 = add nsw i64 %indvars.iv, 4
+  %arrayidx29 = getelementptr inbounds float* %b, i64 %11
+  %12 = load float* %arrayidx29, align 4
+  %mul30 = fmul float %12, %alpha
+  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
+  %13 = load float* %arrayidx33, align 4
+  %add34 = fadd float %13, %mul30
+  store float %add34, float* %arrayidx33, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %14 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %14, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @goo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %arrayidx = getelementptr inbounds float* %b, i64 %indvar
+; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %mul = fmul float %0, %alpha
+; CHECK: %arrayidx2 = getelementptr inbounds float* %a, i64 %indvar
+; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %add = fadd float %1, %mul
+; CHECK: store float %add, float* %arrayidx2, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; void hoo(float alpha, float *a, float *b, int *ip) {
+;   for (int i = 0; i < 3200; i += 5) {
+;     a[i] += alpha * b[ip[i]];
+;     a[i + 1] += alpha * b[ip[i + 1]];
+;     a[i + 2] += alpha * b[ip[i + 2]];
+;     a[i + 3] += alpha * b[ip[i + 3]];
+;     a[i + 4] += alpha * b[ip[i + 4]];
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @hoo(float %alpha, float* nocapture %a, float* nocapture readonly %b, i32* nocapture readonly %ip) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %ip, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %0 to i64
+  %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
+  %1 = load float* %arrayidx2, align 4
+  %mul = fmul float %1, %alpha
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4
+  %add = fadd float %2, %mul
+  store float %add, float* %arrayidx4, align 4
+  %3 = add nsw i64 %indvars.iv, 1
+  %arrayidx7 = getelementptr inbounds i32* %ip, i64 %3
+  %4 = load i32* %arrayidx7, align 4
+  %idxprom8 = sext i32 %4 to i64
+  %arrayidx9 = getelementptr inbounds float* %b, i64 %idxprom8
+  %5 = load float* %arrayidx9, align 4
+  %mul10 = fmul float %5, %alpha
+  %arrayidx13 = getelementptr inbounds float* %a, i64 %3
+  %6 = load float* %arrayidx13, align 4
+  %add14 = fadd float %6, %mul10
+  store float %add14, float* %arrayidx13, align 4
+  %7 = add nsw i64 %indvars.iv, 2
+  %arrayidx17 = getelementptr inbounds i32* %ip, i64 %7
+  %8 = load i32* %arrayidx17, align 4
+  %idxprom18 = sext i32 %8 to i64
+  %arrayidx19 = getelementptr inbounds float* %b, i64 %idxprom18
+  %9 = load float* %arrayidx19, align 4
+  %mul20 = fmul float %9, %alpha
+  %arrayidx23 = getelementptr inbounds float* %a, i64 %7
+  %10 = load float* %arrayidx23, align 4
+  %add24 = fadd float %10, %mul20
+  store float %add24, float* %arrayidx23, align 4
+  %11 = add nsw i64 %indvars.iv, 3
+  %arrayidx27 = getelementptr inbounds i32* %ip, i64 %11
+  %12 = load i32* %arrayidx27, align 4
+  %idxprom28 = sext i32 %12 to i64
+  %arrayidx29 = getelementptr inbounds float* %b, i64 %idxprom28
+  %13 = load float* %arrayidx29, align 4
+  %mul30 = fmul float %13, %alpha
+  %arrayidx33 = getelementptr inbounds float* %a, i64 %11
+  %14 = load float* %arrayidx33, align 4
+  %add34 = fadd float %14, %mul30
+  store float %add34, float* %arrayidx33, align 4
+  %15 = add nsw i64 %indvars.iv, 4
+  %arrayidx37 = getelementptr inbounds i32* %ip, i64 %15
+  %16 = load i32* %arrayidx37, align 4
+  %idxprom38 = sext i32 %16 to i64
+  %arrayidx39 = getelementptr inbounds float* %b, i64 %idxprom38
+  %17 = load float* %arrayidx39, align 4
+  %mul40 = fmul float %17, %alpha
+  %arrayidx43 = getelementptr inbounds float* %a, i64 %15
+  %18 = load float* %arrayidx43, align 4
+  %add44 = fadd float %18, %mul40
+  store float %add44, float* %arrayidx43, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %19 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %19, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @hoo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %arrayidx = getelementptr inbounds i32* %ip, i64 %indvar
+; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %idxprom1 = sext i32 %0 to i64
+; CHECK: %arrayidx2 = getelementptr inbounds float* %b, i64 %idxprom1
+; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %mul = fmul float %1, %alpha
+; CHECK: %arrayidx4 = getelementptr inbounds float* %a, i64 %indvar
+; CHECK: %2 = load float* %arrayidx4, align 4
+; CHECK: %add = fadd float %2, %mul
+; CHECK: store float %add, float* %arrayidx4, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 3200
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/LoopReroll/reduction.ll b/test/Transforms/LoopReroll/reduction.ll
new file mode 100644
index 000000000000..aed7670b666d
--- /dev/null
+++ b/test/Transforms/LoopReroll/reduction.ll
@@ -0,0 +1,96 @@
+; RUN: opt < %s -loop-reroll -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* nocapture readonly %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.029 = phi i32 [ 0, %entry ], [ %add12, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %x, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.029
+  %1 = or i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds i32* %x, i64 %1
+  %2 = load i32* %arrayidx3, align 4
+  %add4 = add nsw i32 %add, %2
+  %3 = or i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds i32* %x, i64 %3
+  %4 = load i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %add4, %4
+  %5 = or i64 %indvars.iv, 3
+  %arrayidx11 = getelementptr inbounds i32* %x, i64 %5
+  %6 = load i32* %arrayidx11, align 4
+  %add12 = add nsw i32 %add8, %6
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %7 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %7, 400
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @foo
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %r.029 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds i32* %x, i64 %indvar
+; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %add = add nsw i32 %0, %r.029
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add12
+}
+
+define float @bar(float* nocapture readonly %x) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.029 = phi float [ 0.0, %entry ], [ %add12, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %add = fadd float %0, %r.029
+  %1 = or i64 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds float* %x, i64 %1
+  %2 = load float* %arrayidx3, align 4
+  %add4 = fadd float %add, %2
+  %3 = or i64 %indvars.iv, 2
+  %arrayidx7 = getelementptr inbounds float* %x, i64 %3
+  %4 = load float* %arrayidx7, align 4
+  %add8 = fadd float %add4, %4
+  %5 = or i64 %indvars.iv, 3
+  %arrayidx11 = getelementptr inbounds float* %x, i64 %5
+  %6 = load float* %arrayidx11, align 4
+  %add12 = fadd float %add8, %6
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %7 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %7, 400
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @bar
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %r.029 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds float* %x, i64 %indvar
+; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %add = fadd float %0, %r.029
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar.next, 400
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret float %add12
+}
+
+attributes #0 = { nounwind readonly uwtable }
+
diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll
index 78878f9fa663..6b92a6ecac13 100644
--- a/test/Transforms/LoopRotate/basic.ll
+++ b/test/Transforms/LoopRotate/basic.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 ; PR5319 - The "arrayidx" gep should be hoisted, not duplicated.  We should
 ; end up with one phi node.
 define void @test1() nounwind ssp {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
   %array = alloca [20 x i32], align 16
   br label %for.cond
@@ -33,7 +33,7 @@ for.end:                                          ; preds = %for.cond
 
 declare void @g(i32*)
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define void @test2() nounwind ssp {
 entry:
   %array = alloca [20 x i32], align 16
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 6a8d30820f6e..9461980ac08d 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -4,7 +4,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
-; CHECK: define i32 @tak
+; CHECK-LABEL: define i32 @tak(
 ; CHECK: entry
 ; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
 
@@ -43,7 +43,7 @@ return:                                           ; preds = %if.end
 define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) {
 ; Ensure that the loop increment basic block is rotated into the tail of the
 ; body, even though it contains a debug intrinsic call.
-; CHECK: define void @FindFreeHorzSeg
+; CHECK-LABEL: define void @FindFreeHorzSeg(
 ; CHECK: %dec = add
 ; CHECK-NEXT: tail call void @llvm.dbg.value
 ; CHECK-NEXT: br i1 %tobool, label %for.cond, label %for.end
@@ -68,7 +68,7 @@ for.body:
 
 for.inc:
   %dec = add i64 %i.0, -1
-  tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata undef)
+  tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata !{metadata !"undef"})
   br label %for.cond
 
 for.end:
@@ -77,14 +77,15 @@ for.end:
   ret void
 }
 
+!llvm.module.flags = !{!20}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"tak", metadata !"tak", metadata !"", metadata !1, i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32, i32)* @tak} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame", metadata !"clang version 2.9 (trunk 125492)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"tak", metadata !"tak", metadata !"", i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32, i32)* @tak, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 0] [tak]
+!1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 125492)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 590081, metadata !0, metadata !"x", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 32, i32 13, metadata !0, null}
 !8 = metadata !{i32 590081, metadata !0, metadata !"y", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
@@ -92,8 +93,11 @@ for.end:
 !10 = metadata !{i32 590081, metadata !0, metadata !"z", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 32, i32 27, metadata !0, null}
 !12 = metadata !{i32 33, i32 3, metadata !13, null}
-!13 = metadata !{i32 589835, metadata !0, i32 32, i32 30, metadata !1, i32 6} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589835, metadata !18, metadata !0, i32 32, i32 30, i32 6} ; [ DW_TAG_lexical_block ]
 !14 = metadata !{i32 34, i32 5, metadata !15, null}
-!15 = metadata !{i32 589835, metadata !13, i32 33, i32 14, metadata !1, i32 7} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 589835, metadata !18, metadata !13, i32 33, i32 14, i32 7} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 36, i32 3, metadata !13, null}
 !17 = metadata !{i32 37, i32 1, metadata !13, null}
+!18 = metadata !{metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame"}
+!19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopRotate/lit.local.cfg b/test/Transforms/LoopRotate/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopRotate/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/multiple-exits.ll b/test/Transforms/LoopRotate/multiple-exits.ll
index 675d71f60da4..cc8738e479d4 100644
--- a/test/Transforms/LoopRotate/multiple-exits.ll
+++ b/test/Transforms/LoopRotate/multiple-exits.ll
@@ -32,7 +32,7 @@ return:                                           ; preds = %for.cond, %land.rhs
   %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ]
   ret i32 %retval.0
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: for.cond1.preheader:
 ; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ]
 ; CHECK: br label %for.cond1
@@ -73,7 +73,7 @@ return.loopexit:                                  ; preds = %for.cond
 return:                                           ; preds = %return.loopexit, %a
   ret void
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: if.end:
 ; CHECK: %inc = add i32 %i.02, 1
 ; CHECK: %cmp = icmp eq i32 %inc, %x
diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll
index 8ad2dce71a65..86a4f2a475fa 100644
--- a/test/Transforms/LoopRotate/phi-duplicate.ll
+++ b/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -29,7 +29,7 @@ for.end:                                          ; preds = %for.cond
 }
 
 ; Should only end up with one phi.
-; CHECK:      define void @test
+; CHECK-LABEL:      define void @test(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %for.body
 ; CHECK:      for.body:
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
index 037bb2042f95..d646cb9d6cb2 100644
--- a/test/Transforms/LoopRotate/simplifylatch.ll
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -3,7 +3,7 @@
 
 @mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
 
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK-NOT: bb4
 define i8 @f() {
 entry:
diff --git a/test/Transforms/LoopSimplify/dup-preds.ll b/test/Transforms/LoopSimplify/dup-preds.ll
new file mode 100644
index 000000000000..3d1f1499b11c
--- /dev/null
+++ b/test/Transforms/LoopSimplify/dup-preds.ll
@@ -0,0 +1,46 @@
+; RUN: opt -loop-simplify -S %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define fastcc void @do_update_md([3 x float]* nocapture readonly %x) #0 {
+entry:
+  br i1 undef, label %if.end365, label %lor.lhs.false134
+
+lor.lhs.false134:                                 ; preds = %entry
+  br i1 undef, label %lor.lhs.false138, label %if.end365
+
+lor.lhs.false138:                                 ; preds = %lor.lhs.false134
+  br i1 undef, label %lor.lhs.false142, label %if.end365
+
+lor.lhs.false142:                                 ; preds = %lor.lhs.false138
+  br i1 undef, label %for.body276.lr.ph, label %if.end365
+
+for.body276.lr.ph:                                ; preds = %lor.lhs.false142
+  switch i16 undef, label %if.then288 [
+    i16 4, label %for.body344
+    i16 2, label %for.body344
+  ]
+
+if.then288:                                       ; preds = %for.body276.lr.ph
+  br label %for.body305
+
+for.body305:                                      ; preds = %for.body305, %if.then288
+  br label %for.body305
+
+for.body344:                                      ; preds = %for.body344, %for.body276.lr.ph, %for.body276.lr.ph
+  %indvar = phi i64 [ %indvar.next, %for.body344 ], [ 0, %for.body276.lr.ph ]
+  %indvars.iv552 = phi i64 [ %indvars.iv.next553, %for.body344 ], [ 0, %for.body276.lr.ph ], [ 0, %for.body276.lr.ph ]
+  %indvars.iv.next553 = add nuw nsw i64 %indvars.iv552, 1
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body344
+
+; CHECK-LABEL: @do_update_md
+; CHECK: %indvars.iv552 = phi i64 [ %indvars.iv.next553, %for.body344 ], [ 0, %for.body344.preheader ]
+; CHECK: ret
+
+if.end365:                                        ; preds = %lor.lhs.false142, %lor.lhs.false138, %lor.lhs.false134, %entry
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/LoopSimplify/lit.local.cfg b/test/Transforms/LoopSimplify/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopSimplify/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll
index 854c612f02dd..89626b2af518 100644
--- a/test/Transforms/LoopSimplify/preserve-scev.ll
+++ b/test/Transforms/LoopSimplify/preserve-scev.ll
@@ -50,7 +50,7 @@ return:                                           ; preds = %for.body18, %for.bo
 declare void @foo() nounwind
 
 ; Notify SCEV when removing an ExitingBlock.
-; CHECK: @mergeExit
+; CHECK-LABEL: @mergeExit(
 ; CHECK: while.cond191:
 ; CHECK: br i1 %or.cond, label %while.body197
 ; CHECK-NOT: land.rhs:
diff --git a/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll b/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
index af3a53708b49..ccf8ebdd5d13 100644
--- a/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
@@ -5,7 +5,7 @@
 target triple = "x86-apple-darwin"
 
 ; Verify that identical edges are merged. rdar://problem/6453893
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: bb89:
 ; CHECK: phi i8* [ %lsr.iv.next1, %bbA.bb89_crit_edge ], [ %lsr.iv.next1, %bbB.bb89_crit_edge ]{{$}}
 
@@ -43,7 +43,7 @@ exit:
 }
 
 ; Handle single-predecessor phis: PR13756
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: bb89:
 ; CHECK: phi i8* [ %lsr.iv.next1, %bbA ], [ %lsr.iv.next1, %bbA ], [ %lsr.iv.next1, %bbA ]{{$}}
 define i8* @test2() {
diff --git a/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll b/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
index 1ee9bb409d91..83963e3126da 100644
--- a/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
@@ -5,7 +5,7 @@
 
 target triple = "x86-apple-darwin"
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; multiplies are hoisted out of the loop
 ; CHECK: while.body.lr.ph:
 ; CHECK: mul i64
diff --git a/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll b/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll
index 4718529bfd50..484fefaad413 100644
--- a/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-10-13-SCEVChain.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-darwin"
 
 ; Verify that -loop-reduce runs without "hanging" and reuses post-inc
 ; expansions.
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: icmp
 ; CHECK: icmp
 ; CHECK: icmp
diff --git a/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll b/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll
index 60cc7a516326..068b716651d8 100644
--- a/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-10-14-IntPtr.ll
@@ -5,7 +5,7 @@
 
 target triple = "x86_64-apple-darwin"
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: phi
 ; CHECK-NOT: phi
 define void @test(i32 %rowStride) ssp align 2 {
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
index 392a8bcf89db..6c128feb541c 100644
--- a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
@@ -9,7 +9,7 @@ target triple = "i386-unknown-freebsd10.0"
 
 @b = external global [121 x i32]
 
-; CHECK: @vb
+; CHECK-LABEL: @vb(
 ;   Outer recurrence:
 ; CHECK: %lsr.iv1 = phi [121 x i32]*
 ;   Inner recurrence:
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
index d7f5723188c2..87dd39730ec1 100644
--- a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
@@ -11,7 +11,7 @@ target triple = "i386-apple-darwin"
 ; cannot find a preheader, so they should be expanded in the loop header
 ; (bb7.lr.ph.us) below the existing phi i.12.us.
 ; Currently, LSR won't kick in on such loops.
-; CHECK: @nopreheader
+; CHECK-LABEL: @nopreheader(
 ; CHECK: bb7.us:
 ; CHECK-NOT: phi float*
 ; CHECK: %j.01.us = phi i32
@@ -54,7 +54,7 @@ return:                                           ; preds = %bb9, %bb9.us, %bb10
 ; In this case, SCEVExpander simply cannot materialize the AddRecExpr
 ; that LSR picks. We must detect that %bb8.preheader does not have a
 ; preheader and avoid performing LSR on %bb7.
-; CHECK: @nopreheader2
+; CHECK-LABEL: @nopreheader2(
 ; CHECK: bb7:
 ; CHECK: %indvar = phi i32
 define fastcc void @nopreheader2([200 x i32]* nocapture %Array2) nounwind {
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
index 3036a7e38bbf..94a037ec28eb 100644
--- a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 ; while.cond197 is a dominates the simplified loop while.cond238 but
 ; has no with no preheader.
 ;
-; CHECK: @nopreheader
+; CHECK-LABEL: @nopreheader(
 ; CHECK: %while.cond238
 ; CHECK: phi i64
 ; CHECK-NOT: phi
diff --git a/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
index 0172492edc99..5fa3838c8297 100644
--- a/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 ; IVUsers should not consider tmp128 a valid user because it is not in a
 ; simplified loop nest.
-; CHECK: @nopreheader
+; CHECK-LABEL: @nopreheader(
 ; CHECK: for.cond:
 ; CHECK: %tmp128 = add i64 %0, %indvar65
 define void @nopreheader(i8* %cmd) nounwind ssp {
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
index 8bac639ae559..ea1d65b1652e 100644
--- a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin"
 @g_3 = global i32 0, align 4
 
 ; Ensure that %div.i.i.us is not hoisted.
-; CHECK: @main
+; CHECK-LABEL: @main(
 ; CHECK: for.body.i.i.us:
 ; CHECK: %div.i.i.i.us
 ; CHECK: %cmp5.i.i.us
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
index bce234cd4066..8a5a0a4c5fcd 100644
--- a/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
+++ b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
@@ -5,7 +5,7 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: bb8:
 ; CHECK-NEXT: phi i8
 ; CHECK-NEXT: phi i8
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
index 8fbddf8ae4c8..79dbf0d53703 100644
--- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -8,14 +8,14 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Verify that nothing uses the "dead" ptrtoint from "undef".
-; CHECK: @VerifyDiagnosticConsumerTest
+; CHECK-LABEL: @VerifyDiagnosticConsumerTest(
 ; CHECK: bb:
-; CHECK: %0 = ptrtoint i8* undef to i64
-; CHECK-NOT: %0
+; "dead" ptrpoint not emitted (or dead code eliminated) with
+; current LSR cost model.
+; CHECK-NOT: = ptrtoint i8* undef to i64
 ; CHECK: .lr.ph
-; CHECK-NOT: %0
-; CHECK: sub i64 %7, %tmp6
-; CHECK-NOT: %0
+; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp5, 1
+; CHECK: sub i64 [[TMP]], %tmp6
 ; CHECK: ret void
 define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
 bb:
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index ee3cc4dd78fc..ab7f20f0129b 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -138,7 +138,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Consequently, we should *not* form any chains.
 ;
 ; A9: foldedidx:
-; A9: ldrb.w {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
+; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
 define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
 entry:
   br label %for.body
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
index bac2ffab31d9..8a3ba96497e7 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
index a932b4792586..2fe62e39fc93 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
@@ -5,7 +5,7 @@
 ; rdar://9786536
 
 ; First, make sure LSR doesn't crash on an empty IVUsers list.
-; CHECK: @dummyIV
+; CHECK-LABEL: @dummyIV(
 ; CHECK-NOT: phi
 ; CHECK-NOT: sitofp
 ; CHECK: br
@@ -24,7 +24,7 @@ for.end:
 }
 
 ; Now check that the computed double constant is correct.
-; CHECK: @doubleIV
+; CHECK-LABEL: @doubleIV(
 ; CHECK: phi double [ -3.900000e+01, %entry ]
 ; CHECK: br
 define void @doubleIV() nounwind {
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
index cb23ad01a497..8053940df13f 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -11,9 +11,9 @@ declare i1 @check() nounwind
 
 ; Check that LSR did something close to the behavior at the time of the bug.
 ; CHECK: @sqlite3DropTriggerPtr
-; CHECK: incq %rax
+; CHECK: incq %r{{[a-d]}}x
 ; CHECK: jne
-; CHECK: decq %rax
+; CHECK: decq %r{{[a-d]}}x
 ; CHECK: ret
 define i64 @sqlite3DropTriggerPtr() nounwind {
 bb:
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index eedfc200f48b..001a1d695c99 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ;
-; Test LSR's ability to prune formulae that refer to nonexistant
+; Test LSR's ability to prune formulae that refer to nonexistent
 ; AddRecs in other loops.
 ;
 ; Unable to reduce this case further because it requires LSR to exceed
@@ -14,7 +14,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-darwin"
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: for.body:
 ; CHECK: %lsr.iv
 ; CHECK-NOT: %dummyout
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
index da2db5a45f9c..ba763cf03ffc 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
new file mode 100644
index 000000000000..6333291aa65d
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double addrspace(1)* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
+
+; CHECK-NOT: cast
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* [[IV]], i16 1
+; CHECK: br {{.*}} label %bb1
+
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double addrspace(1)* {{.*}}, i16
+
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double addrspace(1)* %p, i64 %z0		; <double addrspace(1)*> [#uses=1]
+	%tmp6 = load double addrspace(1)* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double addrspace(1)* %p, i64 %z1		; <double addrspace(1)*> [#uses=1]
+	store double %tmp7, double addrspace(1)* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/address-space-loop.ll b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
new file mode 100644
index 000000000000..9c1b213b5979
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/address-space-loop.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK-LABEL: @Z4(
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
index ff8cab83137b..3ba93ff74835 100644
--- a/test/Transforms/LoopStrengthReduce/dominate-assert.ll
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -68,3 +68,46 @@ bb7:
           catch i8* null
   ret void
 }
+
+; PR17425
+define void @i() {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %c.0 = phi i16* [ undef, %entry ], [ %incdec.ptr, %while.cond ]
+  %incdec.ptr = getelementptr inbounds i16* %c.0, i64 1
+  br i1 undef, label %while.cond1, label %while.cond
+
+while.cond1:                                      ; preds = %while.cond1, %while.cond
+  %c.1 = phi i16* [ %incdec.ptr5, %while.cond1 ], [ %c.0, %while.cond ]
+  %incdec.ptr5 = getelementptr inbounds i16* %c.1, i64 1
+  br i1 undef, label %while.cond7, label %while.cond1
+
+while.cond7:                                      ; preds = %while.cond7, %while.cond1
+  %0 = phi i16* [ %incdec.ptr10, %while.cond7 ], [ %c.1, %while.cond1 ]
+  %incdec.ptr10 = getelementptr inbounds i16* %0, i64 1
+  br i1 undef, label %while.cond12.preheader, label %while.cond7
+
+while.cond12.preheader:                           ; preds = %while.cond7
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13:                                     ; preds = %if.else, %while.body13.lr.ph
+  %1 = phi i16* [ %2, %while.body13.lr.ph ], [ %incdec.ptr15, %if.else ]
+  br i1 undef, label %while.cond12.outer.loopexit, label %if.else
+
+while.cond12.outer.loopexit:                      ; preds = %while.body13
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13.lr.ph:                               ; preds = %while.cond12.outer.loopexit, %while.cond12.preheader
+  %2 = phi i16* [ %1, %while.cond12.outer.loopexit ], [ undef, %while.cond12.preheader ]
+  br label %while.body13
+
+if.else:                                          ; preds = %while.body13
+  %incdec.ptr15 = getelementptr inbounds i16* %1, i64 1
+  %cmp = icmp eq i16* %incdec.ptr15, %0
+  br i1 %cmp, label %while.end16, label %while.body13
+
+while.end16:                                      ; preds = %if.else, %while.cond12.outer.loopexit, %while.cond12.preheader
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ivchain.ll b/test/Transforms/LoopStrengthReduce/ivchain.ll
index ce7ad198de49..233800b71c64 100644
--- a/test/Transforms/LoopStrengthReduce/ivchain.ll
+++ b/test/Transforms/LoopStrengthReduce/ivchain.ll
@@ -6,7 +6,7 @@
 
 %struct = type { i8*, i8*, i16, i64, i16, i16, i16, i64, i64, i16, i8*, i64, i64, i64 }
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: for.body:
 ; CHECK: lsr.iv = phi %struct
 ; CHECK: br
diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopStrengthReduce/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
new file mode 100644
index 000000000000..255cf41a8174
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
@@ -0,0 +1,42 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; PR15470: LSR miscompile. The test2 function should return '1'.
+;
+; SCEV expander cannot expand quadratic recurrences outside of the
+; loop. This recurrence depends on %sub.us, so can't be expanded.
+;
+; CHECK-LABEL: @test2
+; CHECK-LABEL: test2.loop:
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+;
+; CHECK=LABEL: for.end:
+; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
+; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK: ret i32 %f
+define i32 @test2() {
+entry:
+  br label %test2.loop
+
+test2.loop:
+  %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
+  %inc11.us = add nsw i32 %inc1115.us, 1
+  %cmp.us = icmp slt i32 %inc11.us, 2
+  br i1 %cmp.us, label %test2.loop, label %for.end
+
+for.end:
+  %tobool.us = icmp eq i32 %inc1115.us, 0
+  %sub.us = select i1 %tobool.us, i32 0, i32 0
+  %mul.us = shl i32 %inc1115.us, 24
+  %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+  %sext.us = mul i32 %mul.us, %sub.cond.us
+  %f = ashr i32 %sext.us, 24
+  br label %exit
+
+exit:
+  ret i32 %f
+}
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 011824116b3a..65aa61fb937e 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,18 +1,50 @@
-; RUN: opt < %s -analyze -iv-users | grep "{1,+,3,+,2}<%loop> (post-inc with loop %loop)"
+; RUN: opt < %s -analyze -iv-users | FileCheck %s
 
 ; The value of %r is dependent on a polynomial iteration expression.
-
+;
+; CHECK-LABEL: IV Users for loop %foo.loop
+; CHECK: {1,+,3,+,2}<%foo.loop>
 define i64 @foo(i64 %n) {
 entry:
-  br label %loop
+  br label %foo.loop
 
-loop:
-  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+foo.loop:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %foo.loop ]
   %indvar.next = add i64 %indvar, 1
   %c = icmp eq i64 %indvar.next, %n
-  br i1 %c, label %exit, label %loop
+  br i1 %c, label %exit, label %foo.loop
 
 exit:
   %r = mul i64 %indvar.next, %indvar.next
   ret i64 %r
 }
+
+; PR15470: LSR miscompile. The test2 function should return '1'.
+;
+; SCEV does not know how to denormalize chained recurrences, so make
+; sure they aren't marked as post-inc users.
+;
+; CHECK-LABEL: IV Users for loop %test2.loop
+; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
+define i32 @test2() {
+entry:
+  br label %test2.loop
+
+test2.loop:
+  %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
+  %inc11.us = add nsw i32 %inc1115.us, 1
+  %cmp.us = icmp slt i32 %inc11.us, 2
+  br i1 %cmp.us, label %test2.loop, label %for.end
+
+for.end:
+  %tobool.us = icmp eq i32 %inc1115.us, 0
+  %sub.us = select i1 %tobool.us, i32 0, i32 0
+  %mul.us = shl i32 %inc1115.us, 24
+  %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+  %sext.us = mul i32 %mul.us, %sub.cond.us
+  %f = ashr i32 %sext.us, 24
+  br label %exit
+
+exit:
+  ret i32 %f
+}
diff --git a/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll b/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll
new file mode 100644
index 000000000000..a652a7661e23
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll
@@ -0,0 +1,68 @@
+; RUN: opt -loop-reduce %s -S -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+; <rdar://problem/14199725> Assertion failed: (CurScaleCost >= 0 && "Legal addressing mode has an illegal cost!")
+; CHECK-LABEL: @scalingFactorCrash(
+define void @scalingFactorCrash() {
+  br i1 undef, label %1, label %24
+
+; <label>:1                                       ; preds = %0
+  br i1 undef, label %2, label %24
+
+; <label>:2                                       ; preds = %1
+  br i1 undef, label %3, label %24
+
+; <label>:3                                       ; preds = %2
+  br i1 undef, label %4, label %24
+
+; <label>:4                                       ; preds = %3
+  br i1 undef, label %24, label %6
+
+; <label>:5                                       ; preds = %6
+  br i1 undef, label %24, label %7
+
+; <label>:6                                       ; preds = %6, %4
+  br i1 undef, label %6, label %5
+
+; <label>:7                                       ; preds = %9, %5
+  br label %8
+
+; <label>:8                                       ; preds = %8, %7
+  br i1 undef, label %9, label %8
+
+; <label>:9                                       ; preds = %8
+  br i1 undef, label %7, label %10
+
+; <label>:10                                      ; preds = %9
+  br i1 undef, label %24, label %11
+
+; <label>:11                                      ; preds = %10
+  br i1 undef, label %15, label %13
+
+; <label>:12                                      ; preds = %14
+  br label %15
+
+; <label>:13                                      ; preds = %11
+  br label %14
+
+; <label>:14                                      ; preds = %14, %13
+  br i1 undef, label %14, label %12
+
+; <label>:15                                      ; preds = %12, %11
+  br i1 undef, label %16, label %24
+
+; <label>:16                                      ; preds = %16, %15
+  %17 = phi i32 [ %21, %16 ], [ undef, %15 ]
+  %18 = sub i32 %17, 1623127498
+  %19 = getelementptr inbounds i32* undef, i32 %18
+  store i32 undef, i32* %19, align 4
+  %20 = add i32 %17, 1623127499
+  %21 = add i32 %20, -1623127498
+  %22 = add i32 %21, -542963121
+  %23 = icmp ult i32 %22, undef
+  br i1 undef, label %16, label %24
+
+; <label>:24                                      ; preds = %16, %15, %10, %5, %4, %3, %2, %1, %0
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
new file mode 100644
index 000000000000..2c65261f57f5
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK: define void @Z4
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll
index 8af5cf1dfd72..4562d29a0a20 100644
--- a/test/Transforms/LoopStrengthReduce/uglygep.ll
+++ b/test/Transforms/LoopStrengthReduce/uglygep.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-reduce -S | not grep uglygep
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
 
 ; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
 ; should be able to form pretty GEPs.
@@ -6,6 +6,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define void @Z4() nounwind {
+; CHECK-LABEL: define void @Z4(
 bb:
   br label %bb3
 
@@ -20,11 +21,26 @@ bb3:                                              ; preds = %bb2, %bb
   %t4 = phi i64 [ %t, %bb2 ], [ 0, %bb ]      ; <i64> [#uses=3]
   br label %bb1
 
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i64 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8* undef, i64 %t4
+; CHECK-NEXT: br label %bb14
 bb10:                                             ; preds = %bb9
   %t7 = icmp eq i64 %t4, 0                    ; <i1> [#uses=1]
   %t3 = add i64 %t4, 16                     ; <i64> [#uses=1]
   br label %bb14
 
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float** undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float* %t6, i64 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float* [[SCEVGEP1]] to i8*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8* [[SCEVGEP2]], i64 %t4
+; CHECK-NEXT: store i8 undef, i8* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
 bb14:                                             ; preds = %bb14, %bb10
   %t2 = getelementptr inbounds i8* undef, i64 %t4 ; <i8*> [#uses=1]
   store i8 undef, i8* %t2
@@ -36,9 +52,15 @@ bb14:                                             ; preds = %bb14, %bb10
 }
 
 define fastcc void @TransformLine() nounwind {
+; CHECK-LABEL: @TransformLine(
 bb:
   br label %loop0
 
+; CHECK: loop0:
+; Induction variable is initialized to -2.
+; CHECK-NEXT: [[PHIIV:%[^ ]+]] = phi i32 [ [[IVNEXT:%[^ ]+]], %loop0 ], [ -2, %bb ]
+; CHECK-NEXT: [[IVNEXT]] = add i32 [[PHIIV]], 1
+; CHECK-NEXT: br i1 false, label %loop0, label %bb0
 loop0:                                            ; preds = %loop0, %bb
   %i0 = phi i32 [ %i0.next, %loop0 ], [ 0, %bb ]  ; <i32> [#uses=2]
   %i0.next = add i32 %i0, 1                       ; <i32> [#uses=1]
@@ -47,18 +69,52 @@ loop0:                                            ; preds = %loop0, %bb
 bb0:                                              ; preds = %loop0
   br label %loop1
 
+; CHECK: loop1:
+; CHECK-NEXT: %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ]
+; IVNEXT covers the uses of %i0 and %t0.
+; Therefore, %t0 has been removed.
+; The critical edge has been split.
+; CHECK-NEXT: br i1 false, label %bb2, label %[[LOOP1BB6:.+]]
 loop1:                                            ; preds = %bb5, %bb0
   %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ]   ; <i32> [#uses=4]
   %t0 = add i32 %i0, %i1                          ; <i32> [#uses=1]
   br i1 false, label %bb2, label %bb6
 
+; CHECK: bb2:
+; Critical edge split.
+; CHECK-NEXT: br i1 true, label %[[BB2BB6:[^,]+]], label %bb5
 bb2:                                              ; preds = %loop1
   br i1 true, label %bb6, label %bb5
 
+; CHECK: bb5:
+; CHECK-NEXT: %i1.next = add i32 %i1, 1
+; CHECK-NEXT: br i1 true, label %[[BB5BB6:[^,]+]], label %loop1
 bb5:                                              ; preds = %bb2
   %i1.next = add i32 %i1, 1                       ; <i32> [#uses=1]
   br i1 true, label %bb6, label %loop1
 
+; bb5 to bb6 split basic block.
+; CHECK: [[BB5BB6]]:
+; CHECK-NEXT: [[INITIALVAL:%[^ ]+]] = add i32 [[IVNEXT]], %i1.next
+; CHECK-NEXT: br label %[[SPLITTOBB6:.+]]
+
+; bb2 to bb6 split basic block.
+; CHECK: [[BB2BB6]]:
+; CHECK-NEXT: br label %[[SPLITTOBB6]]
+
+; Split basic blocks to bb6.
+; CHECK: [[SPLITTOBB6]]:
+; CHECK-NEXT: [[INITP8:%[^ ]+]] = phi i32 [ [[INITIALVAL]], %[[BB5BB6]] ], [ undef, %[[BB2BB6]] ]
+; CHECK-NEXT: [[INITP9:%[^ ]+]] = phi i32 [ undef, %[[BB5BB6]] ], [ %i1, %[[BB2BB6]] ]
+; CHECK-NEXT: br label %bb6
+  
+; CHECK: [[LOOP1BB6]]:
+; CHECK-NEXT: br label %bb6
+
+; CHECK: bb6:
+; CHECK-NEXT: %p8 = phi i32 [ undef, %[[LOOP1BB6]] ], [ [[INITP8]], %[[SPLITTOBB6]] ]
+; CHECK-NEXT: %p9 = phi i32 [ %i1, %[[LOOP1BB6]] ], [ [[INITP9]], %[[SPLITTOBB6]] ]
+; CHECK-NEXT: unreachable
 bb6:                                              ; preds = %bb5, %bb2, %loop1
   %p8 = phi i32 [ %t0, %bb5 ], [ undef, %loop1 ], [ undef, %bb2 ] ; <i32> [#uses=0]
   %p9 = phi i32 [ undef, %bb5 ], [ %i1, %loop1 ], [ %i1, %bb2 ] ; <i32> [#uses=0]
diff --git a/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll b/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
index cd954c80ec37..bf6d6d5989c9 100644
--- a/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
+++ b/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
@@ -30,7 +30,7 @@ if.then:                                          ; preds = %if.else, %entry
 
 ; PR7318: assertion failure after doing a simple loop unroll
 ;
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: bb1.bb2_crit_edge:
 ; CHECK: %.lcssa = phi i32 [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ]
 ; CHECK: bb1.3:
@@ -67,7 +67,7 @@ bb2:                                              ; preds = %bb1.bb2_crit_edge,
 
 ; Check phi update for loop with an early-exit.
 ;
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: return.loopexit:
 ; CHECK: %tmp7.i.lcssa = phi i32 [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ]
 ; CHECK: exit.3:
diff --git a/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll b/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
index c1221f595ac2..8344993a6fd2 100644
--- a/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
+++ b/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
@@ -12,10 +12,10 @@ declare i32 @getval() nounwind
 ; Check that the loop exit merges values from all the iterations. This
 ; could be a tad fragile, but it's a good test.
 ;
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: return:
 ; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ]
-; CHECK-NOT: @bar
+; CHECK-NOT: @bar(
 ; CHECK: bar.exit.3
 define i32 @foo() uwtable ssp align 2 {
 entry:
diff --git a/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll b/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll
index 7fb471ea7509..617d4dbe8fdd 100644
--- a/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll
+++ b/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll
@@ -8,7 +8,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 ; Check that for.body was unrolled 19 times.
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: %0 = load
 ; CHECK: %conv = sext i8 %0 to i32
 ; CHECK: %add.1 = add nsw i32 %conv.1, %conv
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
new file mode 100644
index 000000000000..17c91e5c07b1
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_opt_for_size
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK: icmp
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+; CHECK-LABEL: @test
+; CHECK: unr.cmp{{.*}}:
+; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+
diff --git a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..2e463005586f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/basic.ll b/test/Transforms/LoopUnroll/basic.ll
index ab5bc568ede4..2bfd3e6de8fc 100644
--- a/test/Transforms/LoopUnroll/basic.ll
+++ b/test/Transforms/LoopUnroll/basic.ll
@@ -3,7 +3,7 @@
 
 ; This should not unroll since the address of the loop header is taken.
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: store i8* blockaddress(@test1, %l1), i8** %P
 ; CHECK: l1:
 ; CHECK-NEXT: phi i32
@@ -25,7 +25,7 @@ l2:                                               ; preds = %l1
 
 ; This should not unroll since the call is 'noduplicate'.
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i32 @test2(i8** %P) nounwind ssp {
 entry:
   br label %l1
diff --git a/test/Transforms/LoopUnroll/lit.local.cfg b/test/Transforms/LoopUnroll/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopUnroll/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopUnroll/pr14167.ll b/test/Transforms/LoopUnroll/pr14167.ll
index 205ae44b72e4..9aac70115d9a 100644
--- a/test/Transforms/LoopUnroll/pr14167.ll
+++ b/test/Transforms/LoopUnroll/pr14167.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-bgq-linux"
 
 define void @test1() nounwind {
 ; Ensure that we don't crash when the trip count == -1.
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
   br label %for.cond2.preheader
 
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
index 308a0363165c..c3086e8335f9 100644
--- a/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -7,7 +7,7 @@
 
 ; Completely unroll loops without a canonical IV.
 ;
-; CHECK: @sansCanonical
+; CHECK-LABEL: @sansCanonical(
 ; CHECK-NOT: phi
 ; CHECK-NOT: icmp
 ; CHECK: ret
@@ -35,7 +35,7 @@ exit:
 ; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
 ; unrolling does not.
 ;
-; CHECK: @earlyLoopTest
+; CHECK-LABEL: @earlyLoopTest(
 ; CHECK: tail:
 ; CHECK-NOT: br
 ; CHECK: br i1 %cmp2, label %loop, label %exit2
@@ -69,7 +69,7 @@ exit2:
 ; SCEV cannot currently unroll this loop.
 ; It should ideally detect a trip count of 5.
 ; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
-; CHECK: @multiExit
+; CHECK-LABEL: @multiExit(
 ; CHECKFIXME: getelementptr i32* %base, i32 10
 ; CHECKFIXME-NEXT: load i32*
 ; CHECKFIXME: br i1 false, label %l2.10, label %exit1
@@ -103,7 +103,7 @@ exit2:
 ; LoopUnroll utility uses this assumption to optimize the latch
 ; block's branch.
 ;
-; CHECK: @multiExit
+; CHECK-LABEL: @multiExitIncomplete(
 ; CHECK: l3:
 ; CHECK-NOT: br
 ; CHECK:   br i1 %cmp3, label %l1, label %exit3
@@ -137,7 +137,7 @@ exit3:
 ; When loop unroll merges a loop exit with one of its parent loop's
 ; exits, SCEV must forget its ExitNotTaken info.
 ;
-; CHECK: @nestedUnroll
+; CHECK-LABEL: @nestedUnroll(
 ; CHECK-NOT: br i1
 ; CHECK: for.body87:
 define void @nestedUnroll() nounwind {
@@ -183,7 +183,7 @@ for.body87:
 ; the loop latch's exit count of zero is an upper bound on the number
 ; of iterations.
 ;
-; CHECK: @nsw_latch
+; CHECK-LABEL: @nsw_latch(
 ; CHECK: for.body:
 ; CHECK: %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ]
 ; CHECK: return:
diff --git a/test/Transforms/LoopUnroll/unloop.ll b/test/Transforms/LoopUnroll/unloop.ll
index 9a938cc28774..b98b4a3fffba 100644
--- a/test/Transforms/LoopUnroll/unloop.ll
+++ b/test/Transforms/LoopUnroll/unloop.ll
@@ -7,7 +7,7 @@ declare i1 @check() nounwind
 ; Ensure that tail->inner is removed and rely on verify-loopinfo to
 ; check soundness.
 ;
-; CHECK: @skiplevelexit
+; CHECK-LABEL: @skiplevelexit(
 ; CHECK: tail:
 ; CHECK-NOT: br
 ; CHECK: ret void
@@ -38,7 +38,7 @@ exit:
 ; Ensure that only the middle loop is removed and rely on verify-loopinfo to
 ; check soundness.
 ;
-; CHECK: @unloopNested
+; CHECK-LABEL: @unloopNested(
 ; Outer loop control.
 ; CHECK: while.body:
 ; CHECK: br i1 %cmp3, label %if.then, label %if.end
@@ -128,7 +128,7 @@ return:
 ;
 ; This test must be disabled until trip count computation can be optimized...
 ; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
-; CHECKFIXME: @unloopDeepNested
+; CHECKFIXME-LABEL: @unloopDeepNested(
 ; Inner-inner loop control.
 ; CHECKFIXME: while.cond.us.i:
 ; CHECKFIXME: br i1 %cmp.us.i, label %next_data.exit, label %while.body.us.i
@@ -248,7 +248,7 @@ while.end:
 ; Ensure that only the middle loop is removed and rely on verify-loopinfo to
 ; check soundness.
 ;
-; CHECK: @unloopIrreducible
+; CHECK-LABEL: @unloopIrreducible(
 ; Irreducible loop.
 ; CHECK: for.inc117:
 ; CHECK: br label %for.cond103t
@@ -326,7 +326,7 @@ for.end166:
 ; Ensure that only the loop is removed and rely on verify-loopinfo to
 ; check soundness.
 ;
-; CHECK: @unloopCriticalEdge
+; CHECK-LABEL: @unloopCriticalEdge(
 ; CHECK: while.cond.outer.i.loopexit.split:
 ; CHECK: br label %while.body
 ; CHECK: while.body:
@@ -431,7 +431,7 @@ return:                                           ; preds = %sw.bb304
 }
 
 ; PR11335: the most deeply nested block should be removed from the outer loop.
-; CHECK: @removeSubloopBlocks2
+; CHECK-LABEL: @removeSubloopBlocks2(
 ; CHECK: for.cond3:
 ; CHECK-NOT: br
 ; CHECK: ret void
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index e98d82b6522d..85e44eca9b63 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -32,7 +32,7 @@ return:		; preds = %endif, %then
 ; This simple test would normally unswitch, but should be inhibited by the presence of
 ; the noduplicate call.
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i32 @test2(i32* %var) {
   %mem = alloca i32
   store i32 2, i32* %mem
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index 8261e389370a..e79d874d9ca6 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -11,7 +11,7 @@
 ; STATS: 2 loop-unswitch - Number of branches unswitched
 ; STATS: 1 loop-unswitch - Number of unswitches that are trivial
 
-; CHECK: @func_16
+; CHECK-LABEL: @func_16(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
 
diff --git a/test/Transforms/LoopUnswitch/lit.local.cfg b/test/Transforms/LoopUnswitch/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopUnswitch/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index bab6300f2e7f..1e1396f80085 100644
--- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: icmp eq <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
@@ -30,7 +30,7 @@ if.then:                                          ; preds = %for.body
 if.end:                                           ; preds = %for.body, %if.then
   %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
   store i32 %z.0, i32* %arrayidx, align 4
-  %indvars.iv.next = add i64 %indvars.iv, 1
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %x
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
index c8d307f5d443..39363ab2d802 100644
--- a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
+++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -4,11 +4,11 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios3.0.0"
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: load <4 x i32>
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret
-;SWIFT: @foo
+;SWIFT-LABEL: @foo(
 ;SWIFT: load <4 x i32>
 ;SWIFT: load <4 x i32>
 ;SWIFT: ret
diff --git a/test/Transforms/LoopVectorize/ARM/gather-cost.ll b/test/Transforms/LoopVectorize/ARM/gather-cost.ll
new file mode 100644
index 000000000000..239a28fa6c7c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/gather-cost.ll
@@ -0,0 +1,88 @@
+; RUN: opt -loop-vectorize -mtriple=thumbv7s-apple-ios6.0.0 -S < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+@kernel = global [512 x float] zeroinitializer, align 4
+@kernel2 = global [512 x float] zeroinitializer, align 4
+@kernel3 = global [512 x float] zeroinitializer, align 4
+@kernel4 = global [512 x float] zeroinitializer, align 4
+@src_data = global [1536 x float] zeroinitializer, align 4
+@r_ = global i8 0, align 4
+@g_ = global i8 0, align 4
+@b_ = global i8 0, align 4
+
+; We don't want to vectorize most loops containing gathers because they are
+; expensive. This function represents a point where vectorization starts to
+; become beneficial.
+; Make sure we are conservative and don't vectorize it.
+; CHECK-NOT: <2 x float>
+; CHECK-NOT: <4 x float>
+
+define void @_Z4testmm(i32 %size, i32 %offset) {
+entry:
+  %cmp53 = icmp eq i32 %size, 0
+  br i1 %cmp53, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %r.057 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add10, %for.body ]
+  %g.056 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add20, %for.body ]
+  %v.055 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
+  %add = add i32 %v.055, %offset
+  %mul = mul i32 %add, 3
+  %arrayidx = getelementptr inbounds [1536 x float]* @src_data, i32 0, i32 %mul
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float]* @kernel, i32 0, i32 %v.055
+  %1 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %1
+  %arrayidx4 = getelementptr inbounds [512 x float]* @kernel2, i32 0, i32 %v.055
+  %2 = load float* %arrayidx4, align 4
+  %mul5 = fmul fast float %mul3, %2
+  %arrayidx6 = getelementptr inbounds [512 x float]* @kernel3, i32 0, i32 %v.055
+  %3 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %mul5, %3
+  %arrayidx8 = getelementptr inbounds [512 x float]* @kernel4, i32 0, i32 %v.055
+  %4 = load float* %arrayidx8, align 4
+  %mul9 = fmul fast float %mul7, %4
+  %add10 = fadd fast float %r.057, %mul9
+  %arrayidx.sum = add i32 %mul, 1
+  %arrayidx11 = getelementptr inbounds [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum
+  %5 = load float* %arrayidx11, align 4
+  %mul13 = fmul fast float %1, %5
+  %mul15 = fmul fast float %2, %mul13
+  %mul17 = fmul fast float %3, %mul15
+  %mul19 = fmul fast float %4, %mul17
+  %add20 = fadd fast float %g.056, %mul19
+  %arrayidx.sum52 = add i32 %mul, 2
+  %arrayidx21 = getelementptr inbounds [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum52
+  %6 = load float* %arrayidx21, align 4
+  %mul23 = fmul fast float %1, %6
+  %mul25 = fmul fast float %2, %mul23
+  %mul27 = fmul fast float %3, %mul25
+  %mul29 = fmul fast float %4, %mul27
+  %add30 = fadd fast float %b.054, %mul29
+  %inc = add i32 %v.055, 1
+  %exitcond = icmp ne i32 %inc, %size
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  %add30.lcssa = phi float [ %add30, %for.body ]
+  %add20.lcssa = phi float [ %add20, %for.body ]
+  %add10.lcssa = phi float [ %add10, %for.body ]
+  %phitmp = fptoui float %add10.lcssa to i8
+  %phitmp60 = fptoui float %add20.lcssa to i8
+  %phitmp61 = fptoui float %add30.lcssa to i8
+  br label %for.end
+
+for.end:
+  %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  store i8 %r.0.lcssa, i8* @r_, align 4
+  store i8 %g.0.lcssa, i8* @g_, align 4
+  store i8 %b.0.lcssa, i8* @b_, align 4
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
index 6a68e81bcae0..f2bd0ac200d4 100644
--- a/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -8,7 +8,7 @@ target triple = "thumbv7-apple-ios3.0.0"
 @a = common global [2048 x i32] zeroinitializer, align 16
 
 ; Select VF = 8;
-;CHECK: @example1
+;CHECK-LABEL: @example1(
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
@@ -34,7 +34,7 @@ define void @example1() nounwind uwtable ssp {
   ret void
 }
 
-;CHECK: @example10b
+;CHECK-LABEL: @example10b(
 ;CHECK: load <4 x i16>
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
index cb77b09ef4ad..8a3ba96497e7 100644
--- a/test/Transforms/LoopVectorize/ARM/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
index c0795b6a79af..99d7fa75ee33 100644
--- a/test/Transforms/LoopVectorize/ARM/width-detect.ll
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -3,27 +3,27 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios3.0.0"
 
-;CHECK:foo_F64
-;CHECK: <2 x double>
+;CHECK:foo_F32
+;CHECK: <4 x float>
 ;CHECK:ret
-define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define float @foo_F32(float* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
-  %2 = getelementptr inbounds double* %A, i64 %indvars.iv
-  %3 = load double* %2, align 8
-  %4 = fmul fast double %prod.01, %3
+  %prod.01 = phi float [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+  %2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  %3 = load float* %2, align 8
+  %4 = fmul fast float %prod.01, %3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 
 ._crit_edge:                                      ; preds = %.lr.ph, %0
-  %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
-  ret double %prod.0.lcssa
+  %prod.0.lcssa = phi float [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+  ret float %prod.0.lcssa
 }
 
 ;CHECK:foo_I8
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
new file mode 100644
index 000000000000..885418c0fdd9
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; We want to make sure that we don't even try to vectorize loops again
+; The vectorizer used to mark the un-vectorized loop only as already vectorized
+; thus, trying to vectorize the vectorized loop again
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external global [255 x i32]
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @vect() {
+; CHECK: LV: Checking a loop in "vect"
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; We need to make sure we did vectorize the loop
+; CHECK: LV: Found a loop: for.body
+; CHECK: LV: We can vectorize this loop!
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds [255 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body
+
+; If it did, we have two loops:
+; CHECK: vector.body:
+; CHECK: br {{.*}} label %vector.body, !llvm.loop [[vect:![0-9]+]]
+; CHECK: for.body:
+; CHECK: br {{.*}} label %for.body, !llvm.loop [[scalar:![0-9]+]]
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; Now, we check for the Hint metadata
+; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
+; CHECK: [[width]] = metadata !{metadata !"llvm.vectorizer.width", i32 1}
+; CHECK: [[unroll]] = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
+
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
index 6c0366eae973..01c912567b61 100644
--- a/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @read_mod_write_single_ptr
+;CHECK-LABEL: @read_mod_write_single_ptr(
 ;CHECK: load <8 x float>
 ;CHECK: ret i32
 define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
@@ -26,7 +26,7 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 }
 
 
-;CHECK: @read_mod_i64
+;CHECK-LABEL: @read_mod_i64(
 ;CHECK: load <2 x i64>
 ;CHECK: ret i32
 define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 760d28deaf27..0af562db8479 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @conversion_cost1
+;CHECK-LABEL: @conversion_cost1(
 ;CHECK: store <32 x i8>
 ;CHECK: ret
 define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
@@ -24,7 +24,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun
   ret i32 undef
 }
 
-;CHECK: @conversion_cost2
+;CHECK-LABEL: @conversion_cost2(
 ;CHECK: <2 x float>
 ;CHECK: ret
 define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index b7f479acf962..98718e1e9708 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @a = common global [2048 x i32] zeroinitializer, align 16
 
 ; The program below gathers and scatters data. We better not vectorize it.
-;CHECK: cost_model_1
+;CHECK-LABEL: @cost_model_1(
 ;CHECK-NOT: <2 x i32>
 ;CHECK-NOT: <4 x i32>
 ;CHECK-NOT: <8 x i32>
diff --git a/test/Transforms/LoopVectorize/X86/gather-cost.ll b/test/Transforms/LoopVectorize/X86/gather-cost.ll
new file mode 100644
index 000000000000..09363d65eefc
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/gather-cost.ll
@@ -0,0 +1,86 @@
+; RUN: opt -loop-vectorize -mtriple=x86_64-apple-macosx -S -mcpu=corei7-avx < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@kernel = global [512 x float] zeroinitializer, align 16
+@kernel2 = global [512 x float] zeroinitializer, align 16
+@kernel3 = global [512 x float] zeroinitializer, align 16
+@kernel4 = global [512 x float] zeroinitializer, align 16
+@src_data = global [1536 x float] zeroinitializer, align 16
+@r_ = global i8 0, align 1
+@g_ = global i8 0, align 1
+@b_ = global i8 0, align 1
+
+; We don't want to vectorize most loops containing gathers because they are
+; expensive. This function represents a point where vectorization starts to
+; become beneficial.
+; Make sure we are conservative and don't vectorize it.
+; CHECK-NOT: x float>
+
+define void @_Z4testmm(i64 %size, i64 %offset) {
+entry:
+  %cmp53 = icmp eq i64 %size, 0
+  br i1 %cmp53, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %r.057 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add10, %for.body ]
+  %g.056 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add20, %for.body ]
+  %v.055 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
+  %add = add i64 %v.055, %offset
+  %mul = mul i64 %add, 3
+  %arrayidx = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %mul
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float]* @kernel, i64 0, i64 %v.055
+  %1 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %1
+  %arrayidx4 = getelementptr inbounds [512 x float]* @kernel2, i64 0, i64 %v.055
+  %2 = load float* %arrayidx4, align 4
+  %mul5 = fmul fast float %mul3, %2
+  %arrayidx6 = getelementptr inbounds [512 x float]* @kernel3, i64 0, i64 %v.055
+  %3 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %mul5, %3
+  %arrayidx8 = getelementptr inbounds [512 x float]* @kernel4, i64 0, i64 %v.055
+  %4 = load float* %arrayidx8, align 4
+  %mul9 = fmul fast float %mul7, %4
+  %add10 = fadd fast float %r.057, %mul9
+  %arrayidx.sum = add i64 %mul, 1
+  %arrayidx11 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
+  %5 = load float* %arrayidx11, align 4
+  %mul13 = fmul fast float %1, %5
+  %mul15 = fmul fast float %2, %mul13
+  %mul17 = fmul fast float %3, %mul15
+  %mul19 = fmul fast float %4, %mul17
+  %add20 = fadd fast float %g.056, %mul19
+  %arrayidx.sum52 = add i64 %mul, 2
+  %arrayidx21 = getelementptr inbounds [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
+  %6 = load float* %arrayidx21, align 4
+  %mul23 = fmul fast float %1, %6
+  %mul25 = fmul fast float %2, %mul23
+  %mul27 = fmul fast float %3, %mul25
+  %mul29 = fmul fast float %4, %mul27
+  %add30 = fadd fast float %b.054, %mul29
+  %inc = add i64 %v.055, 1
+  %exitcond = icmp ne i64 %inc, %size
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  %add30.lcssa = phi float [ %add30, %for.body ]
+  %add20.lcssa = phi float [ %add20, %for.body ]
+  %add10.lcssa = phi float [ %add10, %for.body ]
+  %phitmp = fptoui float %add10.lcssa to i8
+  %phitmp60 = fptoui float %add20.lcssa to i8
+  %phitmp61 = fptoui float %add30.lcssa to i8
+  br label %for.end
+
+for.end:
+  %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  store i8 %r.0.lcssa, i8* @r_, align 1
+  store i8 %g.0.lcssa, i8* @g_, align 1
+  store i8 %b.0.lcssa, i8* @b_, align 1
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index d2d0eac305f5..e1113fdd911c 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -9,13 +9,13 @@ target triple = "x86_64-apple-macosx10.8.0"
 @a = common global [2048 x i32] zeroinitializer, align 16
 
 ; Select VF = 8;
-;CHECK: @example1
+;CHECK-LABEL: @example1(
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
 
-;UNROLL: @example1
+;UNROLL-LABEL: @example1(
 ;UNROLL: load <4 x i32>
 ;UNROLL: load <4 x i32>
 ;UNROLL: add nsw <4 x i32>
@@ -45,12 +45,12 @@ define void @example1() nounwind uwtable ssp {
 }
 
 ; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive.
-;CHECK: @example10b
+;CHECK-LABEL: @example10b(
 ;CHECK: load <4 x i16>
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-;UNROLL: @example10b
+;UNROLL-LABEL: @example10b(
 ;UNROLL: load <4 x i16>
 ;UNROLL: load <4 x i16>
 ;UNROLL: store <4 x i32>
diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index 47a5e7aee4c1..d6120e76cc0b 100644
--- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret void
 
@@ -21,7 +21,7 @@ for.end.us:                                       ; preds = %for.body3.us
   %indvars.iv.next34 = add i64 %indvars.iv33, 1
   %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
   %exitcond36 = icmp eq i32 %lftr.wideiv35, %m
-  br i1 %exitcond36, label %for.end15, label %for.body3.lr.ph.us, !llvm.loop.parallel !5
+  br i1 %exitcond36, label %for.end15, label %for.body3.lr.ph.us, !llvm.loop !5
 
 for.body3.us:                                     ; preds = %for.body3.us, %for.body3.lr.ph.us
   %indvars.iv29 = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next30, %for.body3.us ]
@@ -35,7 +35,7 @@ for.body3.us:                                     ; preds = %for.body3.us, %for.
   %indvars.iv.next30 = add i64 %indvars.iv29, 1
   %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
   %exitcond32 = icmp eq i32 %lftr.wideiv31, %m
-  br i1 %exitcond32, label %for.end.us, label %for.body3.us, !llvm.loop.parallel !4
+  br i1 %exitcond32, label %for.end.us, label %for.body3.us, !llvm.loop !4
 
 for.body3.lr.ph.us:                               ; preds = %for.end.us, %entry
   %indvars.iv33 = phi i64 [ %indvars.iv.next34, %for.end.us ], [ 0, %entry ]
diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg
index a8ad0f1a28b2..ba763cf03ffc 100644
--- a/test/Transforms/LoopVectorize/X86/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index f904a8e0b117..2c47fcb4d389 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -35,7 +35,7 @@ for.body:                                         ; preds = %for.body.for.body_c
   %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
   %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
-  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop.parallel !3
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop !3
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
   %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index 3f1a071e69fa..7e156a9edad4 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ;    }
 ;}
 
-;CHECK: @loop
+;CHECK-LABEL: @loop(
 ;CHECK-NOT: <4 x i32>
 define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
 entry:
@@ -42,7 +42,7 @@ for.end:                                          ; preds = %for.body
 ; The same loop with parallel loop metadata added to the loop branch
 ; and the memory instructions.
 
-;CHECK: @parallel_loop
+;CHECK-LABEL: @parallel_loop(
 ;CHECK: <4 x i32>
 define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
 entry:
@@ -65,7 +65,7 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -74,7 +74,7 @@ for.end:                                          ; preds = %for.body
 ; The same loop with an illegal parallel loop metadata: the memory
 ; accesses refer to a different loop's identifier.
 
-;CHECK: @mixed_metadata
+;CHECK-LABEL: @mixed_metadata(
 ;CHECK-NOT: <4 x i32>
 
 define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
@@ -98,7 +98,7 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
 
 for.end:                                          ; preds = %for.body
   ret void
diff --git a/test/Transforms/LoopVectorize/X86/rauw-bug.ll b/test/Transforms/LoopVectorize/X86/rauw-bug.ll
new file mode 100644
index 000000000000..4284fbacfa7e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/rauw-bug.ll
@@ -0,0 +1,33 @@
+; RUN: opt -slp-vectorizer -S %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; This test used to fail under libgmalloc. Because we would try to access a
+; pointer that was already deleted.
+;
+; llvm-lit -v --param use_gmalloc=1 --param
+;   gmalloc_path=/usr/lib/libgmalloc.dylib
+;   test/Transforms/LoopVectorize/X86/rauw-bug.ll
+;
+; radar://15498655
+
+; CHECK: reduced
+define void @reduced()  {
+entry:
+  br i1 undef, label %while.body, label %while.cond63.preheader.while.end76_crit_edge
+
+while.cond63.preheader.while.end76_crit_edge:
+  ret void
+
+while.body:
+  %d2_fx.015 = phi double [ %sub52, %while.body ], [ undef, %entry ]
+  %d2_fy.014 = phi double [ %sub58, %while.body ], [ undef, %entry ]
+  %d3_fy.013 = phi double [ %div56, %while.body ], [ undef, %entry ]
+  %d3_fx.012 = phi double [ %div50, %while.body ], [ undef, %entry ]
+  %div50 = fmul double %d3_fx.012, 1.250000e-01
+  %sub52 = fsub double 0.000000e+00, %div50
+  %div56 = fmul double %d3_fy.013, 1.250000e-01
+  %sub58 = fsub double 0.000000e+00, %div56
+  br label %while.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index f580846a0228..3957a5541422 100644
--- a/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -5,7 +5,7 @@ target triple = "i386-apple-darwin"
 
 ; PR15344
 define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: preheader
 ; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
 ; CHECK: vector.memcheck
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index f390b33c0388..14ac417bb573 100644
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @dj = common global [1024 x i32] zeroinitializer, align 16
 
 ; We can optimize this test without a tail.
-;CHECK: @example1
+;CHECK-LABEL: @example1(
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
@@ -47,7 +47,7 @@ define void @example1() optsize {
 }
 
 ; Can't vectorize in 'optsize' mode because we need a tail.
-;CHECK: @example2
+;CHECK-LABEL: @example2(
 ;CHECK-NOT: store <4 x i32>
 ;CHECK: ret void
 define void @example2(i32 %n, i32 %x) optsize {
@@ -92,7 +92,7 @@ define void @example2(i32 %n, i32 %x) optsize {
 }
 
 ; N is unknown, we need a tail. Can't vectorize.
-;CHECK: @example3
+;CHECK-LABEL: @example3(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret void
 define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) optsize {
@@ -117,7 +117,7 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 
 
 ; We can't vectorize this one because we need a runtime ptr check.
-;CHECK: @example23
+;CHECK-LABEL: @example23(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret void
 define void @example23(i16* nocapture %src, i32* nocapture %dst) optsize {
@@ -143,7 +143,7 @@ define void @example23(i16* nocapture %src, i32* nocapture %dst) optsize {
 
 
 ; We CAN vectorize this example because the pointers are marked as noalias.
-;CHECK: @example23b
+;CHECK-LABEL: @example23b(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst) optsize {
diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll
new file mode 100644
index 000000000000..6b38bacf8888
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd11.0"
+
+@big = external global [0 x i32]
+
+; PR18049
+; We need to truncate the exit count to i32. This is legal because the
+; arithmetic is signed (%inc is nsw).
+
+; CHECK-LABEL: tripcount
+; CHECK: trunc i64 %count to i32
+
+define void @tripcount(i64 %count) {
+entry:
+  %cmp6 = icmp sgt i64 %count, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07
+  %0 = load i32* %arrayidx, align 4
+  %neg = xor i32 %0, -1
+  store i32 %neg, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.07, 1
+  %conv = sext i32 %inc to i64
+  %cmp = icmp slt i64 %conv, %count
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
new file mode 100644
index 000000000000..5064fec286ce
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -O2 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -disable-loop-unrolling -S | FileCheck %s -check-prefix=CHECK-NOUNRL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK-LABEL: @bar(
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+;CHECK-NOUNRL-LABEL: @bar(
+;CHECK-NOUNRL: store <4 x i32>
+;CHECK-NOUNRL-NOT: store <4 x i32>
+;CHECK-NOUNRL: ret
+define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index ef63a145d0c1..ea107dc4dc51 100644
--- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: load <4 x i32>
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: store <4 x i32>
@@ -26,7 +26,7 @@ define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
   ret i32 undef
 }
 
-;CHECK: @bar
+;CHECK-LABEL: @bar(
 ;CHECK: store <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index b66119f4ef59..efc93d94a7c5 100644
--- a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 @x = common global [1024 x x86_fp80] zeroinitializer, align 16
 
-;CHECK: @example
+;CHECK-LABEL: @example(
 ;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>*
 ;CHECK: store
 ;CHECK: ret void
diff --git a/test/Transforms/LoopVectorize/XCore/lit.local.cfg b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
new file mode 100644
index 000000000000..4d17d4642045
--- /dev/null
+++ b/test/Transforms/LoopVectorize/XCore/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
new file mode 100644
index 000000000000..a099daa740e5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+; The xcore target has no vector registers, so loop should not be vectorized.
+;CHECK-LABEL: @f(
+;CHECK: entry:
+;CHECK-NOT: vector.body
+;CHECK-NEXT: br label %do.body
+define void @f(i8* nocapture %ptr, i32 %len) {
+entry:
+  br label %do.body
+do.body:
+  %ptr.addr.0 = phi i8* [ %ptr, %entry ], [ %incdec.ptr, %do.body ]
+  %len.addr.0 = phi i32 [ %len, %entry ], [ %dec, %do.body ]
+  %incdec.ptr = getelementptr inbounds i8* %ptr.addr.0, i32 1
+  store i8 0, i8* %ptr.addr.0, align 1
+  %dec = add nsw i32 %len.addr.0, -1
+  %tobool = icmp eq i32 %len.addr.0, 0
+  br i1 %tobool, label %do.end, label %do.body
+do.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/align.ll b/test/Transforms/LoopVectorize/align.ll
new file mode 100644
index 000000000000..84b03615d374
--- /dev/null
+++ b/test/Transforms/LoopVectorize/align.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we output the abi alignment if no alignment is specified.
+
+;CHECK-LABEL: @align
+;CHECK: load <4 x i32>* {{.*}} align  4
+;CHECK: load <4 x i32>* {{.*}} align  4
+;CHECK: store <4 x i32> {{.*}} align  4
+
+define void @align(i32* %a, i32* %b, i32* %c) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %3 = load i32* %2
+  %4 = getelementptr inbounds i32* %c, i64 %indvars.iv
+  %5 = load i32* %4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %6, i32* %7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 128 
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll
index a14b92d229b4..7b712729a1cd 100644
--- a/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ;PR 15830.
 
-;CHECK: foo
+;CHECK-LABEL: @foo(
 ; When scalarizing stores we need to preserve the original order.
 ; Make sure that we are extracting in the correct order (0101, and not 0011).
 ;CHECK: extractelement <2 x i64> {{.*}}, i32 0
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
index 2aa29ed2c820..255ce9c77eaf 100644
--- a/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 @a = common global [2048 x i32] zeroinitializer, align 16
 
-;CHECK: @example12
+;CHECK-LABEL: @example12(
 ;CHECK: trunc i64
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index da0fb05fe843..c8215a107de7 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @cpp_new_arrays
+;CHECK-LABEL: @cpp_new_arrays(
 ;CHECK: sext i32
 ;CHECK: load <4 x float>
 ;CHECK: fadd <4 x float>
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index 127d479b3a06..2497b25ea1da 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @B = global [1024 x i32] zeroinitializer, align 16
 @C = global [1024 x i32] zeroinitializer, align 16
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 define i32 @test() #0 {
 entry:
   tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18
@@ -38,30 +38,33 @@ declare void @llvm.dbg.declare(metadata, metadata) #1
 
 declare void @llvm.dbg.value(metadata, i64, metadata) #1
 
-attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test", metadata !"/path/to/somewhere", metadata !"clang", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, metadata !""}
+!0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, null, metadata !""}
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
-!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"test", metadata !"test", metadata !"test", metadata !4, i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
-!4 = metadata !{i32 786473, metadata !"test", metadata !"/path/to/somewhere", null}
-!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0}
+!3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"test", metadata !"test", metadata !"test", i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
+!4 = metadata !{i32 786473, metadata !25}
+!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0}
-!10 = metadata !{i32 786443, metadata !3, i32 6, i32 0, metadata !4, i32 0}
+!10 = metadata !{i32 786443, metadata !25, metadata !3, i32 6, i32 0, i32 0}
 !11 = metadata !{metadata !12, metadata !16, metadata !17}
 !12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
-!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, i32 0}
+!13 = metadata !{i32 786433, null, null, null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786465, i64 0, i64 1024}
 !16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
 !17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} 
 !18 = metadata !{i32 6, i32 0, metadata !10, null}
 !19 = metadata !{i32 7, i32 0, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1}
+!20 = metadata !{i32 786443, metadata !25, metadata !10, i32 6, i32 0, i32 1}
 !24 = metadata !{i32 9, i32 0, metadata !3, null}
+!25 = metadata !{metadata !"test", metadata !"/path/to/somewhere"}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
new file mode 100644
index 000000000000..bf0b4184b7a1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -0,0 +1,90 @@
+; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure we are preserving debug info in the vectorized code.
+
+; CHECK: for.body.lr.ph
+; CHECK:   cmp.zero = icmp eq i64 {{.*}}, 0, !dbg ![[LOC:[0-9]+]]
+; CHECK: vector.body
+; CHECK:   index {{.*}}, !dbg ![[LOC]]
+; CHECK:   getelementptr inbounds i32* %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
+; CHECK:   load <2 x i32>* {{.*}}, !dbg ![[LOC2]]
+; CHECK:   add <2 x i32> {{.*}}, !dbg ![[LOC2]]
+; CHECK:   add i64 %index, 2, !dbg ![[LOC]]
+; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg ![[LOC]]
+; CHECK: middle.block
+; CHECK:   add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg ![[LOC2]]
+; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[LOC2]]
+
+define i32 @f(i32* nocapture %a, i32 %size) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14), !dbg !19
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !20
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !21
+  %cmp4 = icmp eq i32 %size, 0, !dbg !21
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph, !dbg !21
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body, !dbg !21
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %sum.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22
+  %0 = load i32* %arrayidx, align 4, !dbg !22
+  %add = add i32 %0, %sum.05, !dbg !22
+  tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16), !dbg !21
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+  %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  br label %for.end, !dbg !21
+
+for.end:                                          ; preds = %entry, %for.cond.for.end_crit_edge
+  %sum.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa, !dbg !26
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !27}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
+!1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !10, metadata !11}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16}
+!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!14 = metadata !{i32 786689, metadata !4, metadata !"size", metadata !6, i32 33554435, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [size] [line 3]
+!15 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !6, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 4]
+!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !6, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5]
+!17 = metadata !{i32 786443, metadata !5, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!19 = metadata !{i32 3, i32 0, metadata !4, null}
+!20 = metadata !{i32 4, i32 0, metadata !4, null}
+!21 = metadata !{i32 5, i32 0, metadata !17, null}
+!22 = metadata !{i32 6, i32 0, metadata !17, null}
+!26 = metadata !{i32 7, i32 0, metadata !4, null}
+!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/LoopVectorize/ee-crash.ll b/test/Transforms/LoopVectorize/ee-crash.ll
new file mode 100644
index 000000000000..8a4f8ce3c122
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ee-crash.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; This test checks that we deal with an in-loop extractelement (for now, this
+; means not crashing by not vectorizing).
+; CHECK-LABEL: @_Z4foo1Pii(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+define i32 @_Z4foo1Pii(i32* %A, i32 %n, <2 x i32> %q) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4
+  %q1 = extractelement <2 x i32> %q, i32 %n
+  %q2 = add nsw i32 %0, %q1
+  %add.i = add nsw i32 %q2, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable }
+
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index 656912e178f9..a4ebb4284881 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @flags1
+;CHECK-LABEL: @flags1(
 ;CHECK: load <4 x i32>
 ;CHECK: mul nsw <4 x i32>
 ;CHECK: store <4 x i32>
@@ -28,7 +28,7 @@ define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
 }
 
 
-;CHECK: @flags2
+;CHECK-LABEL: @flags2(
 ;CHECK: load <4 x i32>
 ;CHECK: mul <4 x i32>
 ;CHECK: store <4 x i32>
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
index 54ca172e8696..c45098dd2c3b 100644
--- a/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: fadd <4 x float>
 ;CHECK: ret
 define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll
new file mode 100644
index 000000000000..f1f068c43db3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/funcall.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure we can vectorize loops with functions to math library functions.
+; They might read the rounding mode but we are only vectorizing loops that
+; contain a limited set of function calls and none of them sets the rounding
+; mode, so vectorizing them is safe.
+
+; CHECK-LABEL: @test(
+; CHECK: <2 x double>
+
+define void @test(double* %d, double %t) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %d, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %1 = tail call double @llvm.pow.f64(double %0, double %t)
+  store double %1, double* %arrayidx, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+declare double @llvm.pow.f64(double, double)
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index f335557c0019..d8959d4c106a 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -20,12 +20,12 @@ target triple = "x86_64-apple-macosx10.8.0"
 @dd = common global [1024 x float] zeroinitializer, align 16
 @dj = common global [1024 x i32] zeroinitializer, align 16
 
-;CHECK: @example1
+;CHECK-LABEL: @example1(
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-;UNROLL: @example1
+;UNROLL-LABEL: @example1(
 ;UNROLL: load <4 x i32>
 ;UNROLL: load <4 x i32>
 ;UNROLL: load <4 x i32>
@@ -60,10 +60,10 @@ define void @example1() nounwind uwtable ssp {
   ret void
 }
 
-;CHECK: @example2
+;CHECK-LABEL: @example2(
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-;UNROLL: @example2
+;UNROLL-LABEL: @example2(
 ;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
@@ -110,10 +110,10 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   ret void
 }
 
-;CHECK: @example3
+;CHECK-LABEL: @example3(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
-;UNROLL: @example3
+;UNROLL-LABEL: @example3(
 ;UNROLL: <4 x i32>
 ;UNROLL: <4 x i32>
 ;UNROLL: <4 x i32>
@@ -139,10 +139,10 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
   ret void
 }
 
-;CHECK: @example4
+;CHECK-LABEL: @example4(
 ;CHECK: load <4 x i32>
 ;CHECK: ret void
-;UNROLL: @example4
+;UNROLL-LABEL: @example4(
 ;UNROLL: load <4 x i32>
 ;UNROLL: load <4 x i32>
 ;UNROLL: load <4 x i32>
@@ -205,10 +205,10 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
   ret void
 }
 
-;CHECK: @example8
+;CHECK-LABEL: @example8(
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-;UNROLL: @example8
+;UNROLL-LABEL: @example8(
 ;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
@@ -240,7 +240,7 @@ define void @example8(i32 %x) nounwind uwtable ssp {
   ret void
 }
 
-;CHECK: @example9
+;CHECK-LABEL: @example9(
 ;CHECK: phi <4 x i32>
 ;CHECK: ret i32
 define i32 @example9() nounwind uwtable readonly ssp {
@@ -264,7 +264,7 @@ define i32 @example9() nounwind uwtable readonly ssp {
   ret i32 %7
 }
 
-;CHECK: @example10a
+;CHECK-LABEL: @example10a(
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
 ;CHECK: load <4 x i16>
@@ -299,7 +299,7 @@ define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb,
   ret void
 }
 
-;CHECK: @example10b
+;CHECK-LABEL: @example10b(
 ;CHECK: load <4 x i16>
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
@@ -323,7 +323,7 @@ define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb,
   ret void
 }
 
-;CHECK: @example11
+;CHECK-LABEL: @example11(
 ;CHECK: load i32
 ;CHECK: load i32
 ;CHECK: load i32
@@ -367,7 +367,7 @@ define void @example11() nounwind uwtable ssp {
   ret void
 }
 
-;CHECK: @example12
+;CHECK-LABEL: @example12(
 ;CHECK: trunc i64
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
@@ -389,7 +389,7 @@ define void @example12() nounwind uwtable ssp {
 }
 
 ; Can't vectorize because of reductions.
-;CHECK: @example13
+;CHECK-LABEL: @example13(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret void
 define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
@@ -430,7 +430,7 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
 }
 
 ; Can vectorize.
-;CHECK: @example14
+;CHECK-LABEL: @example14(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
@@ -575,7 +575,7 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   ret void
 }
 
-;CHECK: @example21
+;CHECK-LABEL: @example21(
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
 ;CHECK: ret i32
@@ -603,7 +603,7 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   ret i32 %a.0.lcssa
 }
 
-;CHECK: @example23
+;CHECK-LABEL: @example23(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
@@ -627,7 +627,7 @@ define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtabl
   ret void
 }
 
-;CHECK: @example24
+;CHECK-LABEL: @example24(
 ;CHECK: shufflevector <4 x i16>
 ;CHECK: ret void
 define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
@@ -653,7 +653,7 @@ define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
   ret void
 }
 
-;CHECK: @example25
+;CHECK-LABEL: @example25(
 ;CHECK: and <4 x i1>
 ;CHECK: zext <4 x i1>
 ;CHECK: ret void
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index 121da8ba7e16..0118fb47412a 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 
@@ -22,7 +22,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;     Foo.A[i] = Foo.B[i] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias01
+; CHECK-LABEL: define i32 @noAlias01(
 ; CHECK: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -70,7 +70,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[i] = Foo.B[i+10] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias02
+; CHECK-LABEL: define i32 @noAlias02(
 ; CHECK: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -119,7 +119,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[i+10] = Foo.B[i] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias03
+; CHECK-LABEL: define i32 @noAlias03(
 ; CHECK: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -168,7 +168,7 @@ for.end:                                          ; preds = %for.cond
 ;     *(PA+i) = *(PB+i) + a;
 ;   return *(PA+a);
 ; }
-; CHECK: define i32 @noAlias04
+; CHECK-LABEL: define i32 @noAlias04(
 ; CHECK-NOT: add nsw <4 x i32>
 ; CHECK: ret
 ;
@@ -222,7 +222,7 @@ for.end:                                          ; preds = %for.cond
 ;     Bar.A[N][i] = Bar.B[N][i] + a;
 ;   return Bar.A[N][a];
 ; }
-; CHECK: define i32 @noAlias05
+; CHECK-LABEL: define i32 @noAlias05(
 ; CHECK: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -278,7 +278,7 @@ for.end:                                          ; preds = %for.cond
 ;     Bar.A[N][i] = Bar.A[N+1][i] + a;
 ;   return Bar.A[N][a];
 ; }
-; CHECK: define i32 @noAlias06
+; CHECK-LABEL: define i32 @noAlias06(
 ; CHECK: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -335,10 +335,9 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias07
-; CHECK: sub nsw <4 x i32>
+; CHECK-LABEL: define i32 @noAlias07(
+; CHECK: store <4 x i32>
 ; CHECK: ret
-
 define i32 @noAlias07(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
@@ -387,7 +386,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias08
+; CHECK-LABEL: define i32 @noAlias08(
 ; CHECK: sub nsw <4 x i32>
 ; CHECK: ret
 
@@ -439,7 +438,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias09
+; CHECK-LABEL: define i32 @noAlias09(
 ; CHECK: sub nsw <4 x i32>
 ; CHECK: ret
 
@@ -491,7 +490,7 @@ for.end:                                          ; preds = %for.cond
 ;     *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
 ;   return *(PA+a);
 ; }
-; CHECK: define i32 @noAlias10
+; CHECK-LABEL: define i32 @noAlias10(
 ; CHECK-NOT: sub nsw <4 x i32>
 ; CHECK: ret
 ;
@@ -551,8 +550,8 @@ for.end:                                          ; preds = %for.cond
 ;     Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
 ;   return Bar.A[N][a];
 ; }
-; CHECK: define i32 @noAlias11
-; CHECK: sub nsw <4 x i32>
+; CHECK-LABEL: define i32 @noAlias11(
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias11(i32 %a) #0 {
@@ -611,8 +610,8 @@ for.end:                                          ; preds = %for.cond
 ;     Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
 ;   return Bar.A[N][a];
 ; }
-; CHECK: define i32 @noAlias12
-; CHECK: sub nsw <4 x i32>
+; CHECK-LABEL: define i32 @noAlias12(
+; CHECK: store <4 x i32>
 ; CHECK: ret
 
 define i32 @noAlias12(i32 %a) #0 {
@@ -672,7 +671,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[i] = Foo.A[i+4] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias13
+; CHECK-LABEL: define i32 @noAlias13(
 ; CHECK: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -721,7 +720,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @noAlias14
+; CHECK-LABEL: define i32 @noAlias14(
 ; CHECK: sub nsw <4 x i32>
 ; CHECK: ret
 
@@ -777,7 +776,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[i] = Foo.B[SIZE-i-1] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @mayAlias01
+; CHECK-LABEL: define i32 @mayAlias01(
 ; CHECK-NOT: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -827,7 +826,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[SIZE-i-1] = Foo.B[i] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @mayAlias02
+; CHECK-LABEL: define i32 @mayAlias02(
 ; CHECK-NOT: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -877,7 +876,7 @@ for.end:                                          ; preds = %for.cond
 ;     *(PA+i) = *(PB+SIZE-i-1) + a;
 ;   return *(PA+a);
 ; }
-; CHECK: define i32 @mayAlias03
+; CHECK-LABEL: define i32 @mayAlias03(
 ; CHECK-NOT: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -934,7 +933,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @mustAlias01
+; CHECK-LABEL: define i32 @mustAlias01(
 ; CHECK-NOT: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -984,7 +983,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[i] = Foo.B[SIZE-i-10] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @mustAlias02
+; CHECK-LABEL: define i32 @mustAlias02(
 ; CHECK-NOT: add nsw <4 x i32>
 ; CHECK: ret
 
@@ -1033,7 +1032,7 @@ for.end:                                          ; preds = %for.cond
 ;     Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
 ;   return Foo.A[a];
 ; }
-; CHECK: define i32 @mustAlias03
+; CHECK-LABEL: define i32 @mustAlias03(
 ; CHECK-NOT: add nsw <4 x i32>
 ; CHECK: ret
 
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll
new file mode 100644
index 000000000000..765e14d6985b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -0,0 +1,69 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+; Make sure we can vectorize in the presence of hoistable conditional loads.
+; CHECK-LABEL: @hoist_cond_load(
+; CHECK: load <2 x float>
+
+define void @hoist_cond_load() {
+entry:
+  br label %for.body
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx2, align 4
+  %cmp3 = fcmp oeq float %0, 0.000000e+00
+  br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+  %1 = load float* %arrayidx, align 4
+  br label %if.end9
+
+if.end9:
+  %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+  store float %tmp.0, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; However, we can't hoist loads whose address we have not seen unconditionally
+; accessed.
+; CHECK-LABEL: @dont_hoist_cond_load(
+; CHECK-NOT: load <2 x float>
+
+define void @dont_hoist_cond_load() {
+entry:
+  br label %for.body
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx2, align 4
+  %cmp3 = fcmp oeq float %0, 0.000000e+00
+  br i1 %cmp3, label %if.end9, label %if.else
+
+if.else:
+  %1 = load float* %arrayidx, align 4
+  br label %if.end9
+
+if.end9:
+  %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
+  store float %tmp.0, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll
index 3283456aa3c3..f8f2cf1fff4b 100644
--- a/test/Transforms/LoopVectorize/if-conv-crash.ll
+++ b/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -37,3 +37,25 @@ if.end21:                                         ; preds = %lor.lhs.false
 if.end25:                                         ; preds = %entry
   ret void
 }
+
+; PR15990
+; We can have basic blocks with single entry PHI nodes.
+define void @single_entry_phi(i32* %a, i32 *%b) {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %inc10 = phi i32 [ 0, %entry ], [ %inc, %for.end ]
+  br label %for.end
+
+for.end:
+  %malicious.phi = phi i32 [ 0, %for.cond1.preheader ]
+  %inc = add nsw i32 %inc10, 1
+  %tobool = icmp eq i32 %inc, 0
+  br i1 %tobool, label %for.cond.for.end5, label %for.cond1.preheader
+
+for.cond.for.end5:
+  %and.lcssa = phi i32 [ %malicious.phi, %for.end ]
+  store i32 %and.lcssa, i32* %a, align 4
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll b/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
new file mode 100644
index 000000000000..27c274d557eb
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
@@ -0,0 +1,243 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@a = global i32* null, align 8
+@b = global i32* null, align 8
+@c = global i32* null, align 8
+
+; Don't create an exponetial IR for the edge masks needed when if-converting
+; this code.
+
+; PR16472
+
+; CHECK-NOT: %6000000 =
+
+define void @_Z3fn4i(i32 %p1) {
+entry:
+  %cmp88 = icmp sgt i32 %p1, 0
+  br i1 %cmp88, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load i32** @b, align 8  %1 = load i32** @a, align 8  %2 = load i32** @c, align 8  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %_ZL3fn3ii.exit58 ]
+  %arrayidx = getelementptr inbounds i32* %0, i64 %indvars.iv
+  %3 = load i32* %arrayidx, align 4  %4 = trunc i64 %indvars.iv to i32
+  %and.i = and i32 %4, 1
+  %tobool.i.i = icmp eq i32 %and.i, 0
+  br i1 %tobool.i.i, label %if.end.i, label %if.then.i
+
+if.then.i:
+  %and.i.i = lshr i32 %3, 2
+  %and.lobit.i.i = and i32 %and.i.i, 1
+  %5 = xor i32 %and.lobit.i.i, 1
+  %or.i.i = or i32 %5, %3
+  %cmp.i = icmp sgt i32 %or.i.i, 0
+  %conv.i = zext i1 %cmp.i to i32
+  br label %if.end.i
+
+if.end.i:
+  %tobool.i87 = phi i1 [ true, %if.then.i ], [ false, %for.body ]
+  %p1.addr.0.i = phi i32 [ %conv.i, %if.then.i ], [ %3, %for.body ]
+  %6 = trunc i64 %indvars.iv to i32
+  %and1.i = and i32 %6, 7
+  %tobool2.i = icmp eq i32 %and1.i, 0
+  br i1 %tobool2.i, label %if.end7.i, label %if.then3.i
+
+if.then3.i:
+  %p1.addr.0.lobit.i = lshr i32 %p1.addr.0.i, 31
+  %and6.i = and i32 %p1.addr.0.i, 1
+  %or.i = or i32 %p1.addr.0.lobit.i, %and6.i
+  br label %if.end7.i
+
+if.end7.i:
+  %p1.addr.1.i = phi i32 [ %or.i, %if.then3.i ], [ %p1.addr.0.i, %if.end.i ]
+  br i1 %tobool.i87, label %if.then10.i, label %if.end13.i
+
+if.then10.i:
+  %cmp11.i = icmp sgt i32 %p1.addr.1.i, 0
+  %conv12.i = zext i1 %cmp11.i to i32
+  br label %if.end13.i
+
+if.end13.i:
+  %p1.addr.2.i = phi i32 [ %conv12.i, %if.then10.i ], [ %p1.addr.1.i, %if.end7.i ]
+  br i1 %tobool.i.i, label %_Z3fn2iii.exit, label %if.then16.i
+
+if.then16.i:
+  %and17.i = lshr i32 %p1.addr.2.i, 3
+  %and17.lobit.i = and i32 %and17.i, 1
+  br label %_Z3fn2iii.exit
+
+_Z3fn2iii.exit:
+  %p1.addr.3.i = phi i32 [ %and17.lobit.i, %if.then16.i ], [ %p1.addr.2.i, %if.end13.i ]
+  %7 = trunc i64 %indvars.iv to i32
+  %shr.i = ashr i32 %7, 1
+  %and.i18.i = and i32 %shr.i, 1
+  %tobool.i19.i = icmp ne i32 %and.i18.i, 0
+  br i1 %tobool.i19.i, label %if.then.i20.i, label %if.end.i.i
+
+if.then.i20.i:
+  %cmp.i.i = icmp sgt i32 %p1.addr.3.i, 0
+  %conv.i.i = zext i1 %cmp.i.i to i32
+  br label %if.end.i.i
+
+if.end.i.i:
+  %p1.addr.0.i21.i = phi i32 [ %conv.i.i, %if.then.i20.i ], [ %p1.addr.3.i, %_Z3fn2iii.exit ]
+  %and1.i.i = and i32 %shr.i, 7
+  %tobool2.i.i = icmp eq i32 %and1.i.i, 0
+  br i1 %tobool2.i.i, label %if.end7.i.i, label %if.then3.i.i
+
+if.then3.i.i:
+  %p1.addr.0.lobit.i.i = lshr i32 %p1.addr.0.i21.i, 31
+  %and6.i.i = and i32 %p1.addr.0.i21.i, 1
+  %or.i22.i = or i32 %p1.addr.0.lobit.i.i, %and6.i.i
+  br label %if.end7.i.i
+
+if.end7.i.i:
+  %p1.addr.1.i.i = phi i32 [ %or.i22.i, %if.then3.i.i ], [ %p1.addr.0.i21.i, %if.end.i.i ]
+  br i1 %tobool.i19.i, label %if.then10.i.i, label %if.end13.i.i
+
+if.then10.i.i:
+  %cmp11.i.i = icmp sgt i32 %p1.addr.1.i.i, 0
+  %conv12.i.i = zext i1 %cmp11.i.i to i32
+  br label %if.end13.i.i
+
+if.end13.i.i:
+  %p1.addr.2.i.i = phi i32 [ %conv12.i.i, %if.then10.i.i ], [ %p1.addr.1.i.i, %if.end7.i.i ]
+  %and14.i.i = and i32 %shr.i, 5
+  %tobool15.i.i = icmp eq i32 %and14.i.i, 0
+  br i1 %tobool15.i.i, label %_Z3fn2iii.exit.i, label %if.then16.i.i
+
+if.then16.i.i:
+  %and17.i.i = lshr i32 %p1.addr.2.i.i, 3
+  %and17.lobit.i.i = and i32 %and17.i.i, 1
+  br label %_Z3fn2iii.exit.i
+
+_Z3fn2iii.exit.i:
+  %p1.addr.3.i.i = phi i32 [ %and17.lobit.i.i, %if.then16.i.i ], [ %p1.addr.2.i.i, %if.end13.i.i ]
+  %8 = trunc i64 %indvars.iv to i32
+  %tobool.i11.i = icmp eq i32 %8, 0
+  br i1 %tobool.i11.i, label %_ZL3fn3ii.exit, label %if.then.i15.i
+
+if.then.i15.i:
+  %and.i12.i = lshr i32 %p1.addr.3.i.i, 2
+  %and.lobit.i13.i = and i32 %and.i12.i, 1
+  %9 = xor i32 %and.lobit.i13.i, 1
+  %or.i14.i = or i32 %9, %p1.addr.3.i.i
+  br label %_ZL3fn3ii.exit
+
+_ZL3fn3ii.exit:
+  %p1.addr.0.i16.i = phi i32 [ %or.i14.i, %if.then.i15.i ], [ %p1.addr.3.i.i, %_Z3fn2iii.exit.i ]
+  %arrayidx2 = getelementptr inbounds i32* %1, i64 %indvars.iv
+  store i32 %p1.addr.0.i16.i, i32* %arrayidx2, align 4  %arrayidx4 = getelementptr inbounds i32* %0, i64 %indvars.iv
+  %10 = load i32* %arrayidx4, align 4  br i1 %tobool.i.i, label %_Z3fn1ii.exit.i26, label %if.then.i.i21
+
+if.then.i.i21:
+  %and.i.i18 = lshr i32 %10, 2
+  %and.lobit.i.i19 = and i32 %and.i.i18, 1
+  %11 = xor i32 %and.lobit.i.i19, 1
+  %or.i.i20 = or i32 %11, %10
+  br label %_Z3fn1ii.exit.i26
+
+_Z3fn1ii.exit.i26:
+  %p1.addr.0.i.i22 = phi i32 [ %or.i.i20, %if.then.i.i21 ], [ %10, %_ZL3fn3ii.exit ]
+  br i1 %tobool.i87, label %if.then.i63, label %if.end.i67
+
+if.then.i63:
+  %cmp.i61 = icmp sgt i32 %p1.addr.0.i.i22, 0
+  %conv.i62 = zext i1 %cmp.i61 to i32
+  br label %if.end.i67
+
+if.end.i67:
+  %p1.addr.0.i64 = phi i32 [ %conv.i62, %if.then.i63 ], [ %p1.addr.0.i.i22, %_Z3fn1ii.exit.i26 ]
+  br i1 %tobool2.i, label %if.end7.i73, label %if.then3.i71
+
+if.then3.i71:
+  %p1.addr.0.lobit.i68 = lshr i32 %p1.addr.0.i64, 31
+  %and6.i69 = and i32 %p1.addr.0.i64, 1
+  %or.i70 = or i32 %p1.addr.0.lobit.i68, %and6.i69
+  br label %if.end7.i73
+
+if.end7.i73:
+  %p1.addr.1.i72 = phi i32 [ %or.i70, %if.then3.i71 ], [ %p1.addr.0.i64, %if.end.i67 ]
+  br i1 %tobool.i87, label %if.then10.i76, label %if.end13.i80
+
+if.then10.i76:
+  %cmp11.i74 = icmp sgt i32 %p1.addr.1.i72, 0
+  %conv12.i75 = zext i1 %cmp11.i74 to i32
+  br label %if.end13.i80
+
+if.end13.i80:
+  %p1.addr.2.i77 = phi i32 [ %conv12.i75, %if.then10.i76 ], [ %p1.addr.1.i72, %if.end7.i73 ]
+  br i1 %tobool.i.i, label %_Z3fn2iii.exit85, label %if.then16.i83
+
+if.then16.i83:
+  %and17.i81 = lshr i32 %p1.addr.2.i77, 3
+  %and17.lobit.i82 = and i32 %and17.i81, 1
+  br label %_Z3fn2iii.exit85
+
+_Z3fn2iii.exit85:
+  %p1.addr.3.i84 = phi i32 [ %and17.lobit.i82, %if.then16.i83 ], [ %p1.addr.2.i77, %if.end13.i80 ]
+  br i1 %tobool.i19.i, label %if.then.i20.i29, label %if.end.i.i33
+
+if.then.i20.i29:
+  %cmp.i.i27 = icmp sgt i32 %p1.addr.3.i84, 0
+  %conv.i.i28 = zext i1 %cmp.i.i27 to i32
+  br label %if.end.i.i33
+
+if.end.i.i33:
+  %p1.addr.0.i21.i30 = phi i32 [ %conv.i.i28, %if.then.i20.i29 ], [ %p1.addr.3.i84, %_Z3fn2iii.exit85 ]
+  br i1 %tobool2.i.i, label %if.end7.i.i39, label %if.then3.i.i37
+
+if.then3.i.i37:
+  %p1.addr.0.lobit.i.i34 = lshr i32 %p1.addr.0.i21.i30, 31
+  %and6.i.i35 = and i32 %p1.addr.0.i21.i30, 1
+  %or.i22.i36 = or i32 %p1.addr.0.lobit.i.i34, %and6.i.i35
+  br label %if.end7.i.i39
+
+if.end7.i.i39:
+  %p1.addr.1.i.i38 = phi i32 [ %or.i22.i36, %if.then3.i.i37 ], [ %p1.addr.0.i21.i30, %if.end.i.i33 ]
+  br i1 %tobool.i19.i, label %if.then10.i.i42, label %if.end13.i.i46
+
+if.then10.i.i42:
+  %cmp11.i.i40 = icmp sgt i32 %p1.addr.1.i.i38, 0
+  %conv12.i.i41 = zext i1 %cmp11.i.i40 to i32
+  br label %if.end13.i.i46
+
+if.end13.i.i46:
+  %p1.addr.2.i.i43 = phi i32 [ %conv12.i.i41, %if.then10.i.i42 ], [ %p1.addr.1.i.i38, %if.end7.i.i39 ]
+  br i1 %tobool15.i.i, label %_Z3fn2iii.exit.i52, label %if.then16.i.i49
+
+if.then16.i.i49:
+  %and17.i.i47 = lshr i32 %p1.addr.2.i.i43, 3
+  %and17.lobit.i.i48 = and i32 %and17.i.i47, 1
+  br label %_Z3fn2iii.exit.i52
+
+_Z3fn2iii.exit.i52:
+  %p1.addr.3.i.i50 = phi i32 [ %and17.lobit.i.i48, %if.then16.i.i49 ], [ %p1.addr.2.i.i43, %if.end13.i.i46 ]
+  br i1 %tobool.i11.i, label %_ZL3fn3ii.exit58, label %if.then.i15.i56
+
+if.then.i15.i56:
+  %and.i12.i53 = lshr i32 %p1.addr.3.i.i50, 2
+  %and.lobit.i13.i54 = and i32 %and.i12.i53, 1
+  %12 = xor i32 %and.lobit.i13.i54, 1
+  %or.i14.i55 = or i32 %12, %p1.addr.3.i.i50
+  br label %_ZL3fn3ii.exit58
+
+_ZL3fn3ii.exit58:
+  %p1.addr.0.i16.i57 = phi i32 [ %or.i14.i55, %if.then.i15.i56 ], [ %p1.addr.3.i.i50, %_Z3fn2iii.exit.i52 ]
+  %arrayidx7 = getelementptr inbounds i32* %2, i64 %indvars.iv
+  store i32 %p1.addr.0.i16.i57, i32* %arrayidx7, align 4  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %p1
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  br label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll
index f44862a2ebb9..92cb06e5e525 100644
--- a/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: icmp sgt
 ;CHECK: icmp sgt
 ;CHECK: icmp slt
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index 3a2d82e15d63..8cb703cdfa4b 100644
--- a/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
-;CHECK: @reduction_func
+;CHECK-LABEL: @reduction_func(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret i32
 define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index 6e7c03a556c4..dbe0243a8110 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -17,7 +17,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;  }
 ;}
 
-;CHECK: @function0
+;CHECK-LABEL: @function0(
 ;CHECK: load <4 x i32>
 ;CHECK: icmp sgt <4 x i32>
 ;CHECK: mul <4 x i32>
@@ -70,7 +70,7 @@ for.end:
 ;   return sum;
 ; }
 
-;CHECK: @reduction_func
+;CHECK-LABEL: @reduction_func(
 ;CHECK: load <4 x i32>
 ;CHECK: icmp sgt <4 x i32>
 ;CHECK: add <4 x i32>
@@ -106,3 +106,66 @@ for.end:                                          ; preds = %for.inc, %entry
   ret i32 %sum.0.lcssa
 }
 
+@a = common global [1 x i32*] zeroinitializer, align 8
+@c = common global i32* null, align 8
+
+; We use to if convert this loop. This is not safe because there is a trapping
+; constant expression.
+; PR16729
+
+; CHECK-LABEL: trapping_constant_expression
+; CHECK-NOT: or <4 x i32>
+
+define i32 @trapping_constant_expression() {
+entry:
+  br label %for.body
+
+for.body:
+  %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
+  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)), %cond.false ], [ 0, %for.body ]
+  %or = or i32 %or2, %cond
+  %inc = add nsw i32 %inc3, 1
+  %cmp = icmp slt i32 %inc, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 %or
+}
+
+; Neither should we if-convert if there is an instruction operand that is a
+; trapping constant expression.
+; PR16729
+
+; CHECK-LABEL: trapping_constant_expression2
+; CHECK-NOT: or <4 x i32>
+
+define i32 @trapping_constant_expression2() {
+entry:
+  br label %for.body
+
+for.body:
+  %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
+  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
+
+cond.false:
+  %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32))
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %cond.1, %cond.false ], [ %inc3, %for.body ]
+  %or = or i32 %or2, %cond
+  %inc = add nsw i32 %inc3, 1
+  %cmp = icmp slt i32 %inc, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 %or
+}
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index 3fa6b19ca928..d35bd58a0281 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;  for (i=0; i<n; i++){
 ;    a[i] += i;
 ;  }
-;CHECK: @inc
+;CHECK-LABEL: @inc(
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
@@ -39,7 +39,7 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 ;  for (i = 0; i < n; ++i) {
 ;    A[B[i]]++;
 ;
-;CHECK: @histogram
+;CHECK-LABEL: @histogram(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret i32
 define i32 @histogram(i32* nocapture noalias %A, i32* nocapture noalias %B, i32 %n) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
new file mode 100644
index 000000000000..50c3b6b6e79b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure that we can handle multiple integer induction variables.
+; CHECK-LABEL: @multi_int_induction(
+; CHECK: vector.body:
+; CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:  %normalized.idx = sub i64 %index, 0
+; CHECK:  %[[VAR:.*]] = trunc i64 %normalized.idx to i32
+; CHECK:  %offset.idx = add i32 190, %[[VAR]]
+define void @multi_int_induction(i32* %A, i32 %N) {
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  store i32 %count.09, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %count.09, 1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+
+; Make sure we remove unneeded vectorization of induction variables.
+; In order for instcombine to cleanup the vectorized induction variables that we
+; create in the loop vectorizer we need to perform some form of redundancy
+; elimination to get rid of multiple uses.
+
+; IND-LABEL: scalar_use
+
+; IND:     br label %vector.body
+; IND:     vector.body:
+;   Vectorized induction variable.
+; IND-NOT:  insertelement <2 x i64>
+; IND-NOT:  shufflevector <2 x i64>
+; IND:     br {{.*}}, label %vector.body
+
+define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+  %l1 = load float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+  %l2 = load float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
+
+
+; Make sure that the loop exit count computation does not overflow for i8 and
+; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
+; induction variable to a bigger type the exit count computation will overflow
+; to 0.
+; PR17532
+
+; CHECK-LABEL: i8_loop
+; CHECK; icmp eq i32 {{.*}}, 256
+define i32 @i8_loop() nounwind readnone ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i8 %b.0, -1
+  %4 = icmp eq i8 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
+
+; CHECK-LABEL: i16_loop
+; CHECK; icmp eq i32 {{.*}}, 65536
+
+define i32 @i16_loop() nounwind readnone ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
+  %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
+  %2 = and i32 %a.0, 4
+  %3 = add i16 %b.0, -1
+  %4 = icmp eq i16 %3, 0
+  br i1 %4, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 %2
+}
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index 96595cdc16bc..9c8201ab7805 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -5,9 +5,9 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 @array = common global [1024 x i32] zeroinitializer, align 16
 
-;CHECK: @array_at_plus_one
-;CHECK: trunc i64
+;CHECK-LABEL: @array_at_plus_one(
 ;CHECK: add i64 %index, 12
+;CHECK: trunc i64
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/infiniteloop.ll b/test/Transforms/LoopVectorize/infiniteloop.ll
new file mode 100644
index 000000000000..5c5e1a3be0a2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/infiniteloop.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -indvars -loop-vectorize -force-vector-width=2  < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+
+@a = common global i64 0, align 8
+@x = common global i32 0, align 4
+
+; We used to assert on this loop because we could not find an induction
+; variable but assumed there must be one. Scalar evolution returned a exit
+; count for the loop below and from there on we assumed that there must be an
+; induction variable. This is not a valid assumption:
+;   // getExitCount - Get the expression for the number of loop iterations for
+;   // which this loop is *guaranteed not to exit* via ExitingBlock. Otherwise
+;   // return SCEVCouldNotCompute.
+; For an infinite loop SE can return any number.
+
+; CHECK-LABEL: @fn1(
+define void @fn1()  {
+entry:
+  store i64 0, i64* @a, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %inc1 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  store volatile i32 0, i32* @x, align 4
+  %inc = add nsw i64 %inc1, 1
+  %cmp = icmp sgt i64 %inc1, -2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %inc.lcssa = phi i64 [ %inc, %for.body ]
+  store i64 %inc.lcssa, i64* @a, align 8
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index defbb5bd94ba..c3d570c03a77 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-;CHECK: @sqrt_f32
+;CHECK-LABEL: @sqrt_f32(
 ;CHECK: llvm.sqrt.v4f32
 ;CHECK: ret void
 define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -29,7 +29,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.sqrt.f32(float) nounwind readnone
 
-;CHECK: @sqrt_f64
+;CHECK-LABEL: @sqrt_f64(
 ;CHECK: llvm.sqrt.v4f64
 ;CHECK: ret void
 define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -55,7 +55,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.sqrt.f64(double) nounwind readnone
 
-;CHECK: @sin_f32
+;CHECK-LABEL: @sin_f32(
 ;CHECK: llvm.sin.v4f32
 ;CHECK: ret void
 define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -81,7 +81,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.sin.f32(float) nounwind readnone
 
-;CHECK: @sin_f64
+;CHECK-LABEL: @sin_f64(
 ;CHECK: llvm.sin.v4f64
 ;CHECK: ret void
 define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -107,7 +107,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.sin.f64(double) nounwind readnone
 
-;CHECK: @cos_f32
+;CHECK-LABEL: @cos_f32(
 ;CHECK: llvm.cos.v4f32
 ;CHECK: ret void
 define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -133,7 +133,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.cos.f32(float) nounwind readnone
 
-;CHECK: @cos_f64
+;CHECK-LABEL: @cos_f64(
 ;CHECK: llvm.cos.v4f64
 ;CHECK: ret void
 define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -159,7 +159,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.cos.f64(double) nounwind readnone
 
-;CHECK: @exp_f32
+;CHECK-LABEL: @exp_f32(
 ;CHECK: llvm.exp.v4f32
 ;CHECK: ret void
 define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -185,7 +185,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.exp.f32(float) nounwind readnone
 
-;CHECK: @exp_f64
+;CHECK-LABEL: @exp_f64(
 ;CHECK: llvm.exp.v4f64
 ;CHECK: ret void
 define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -211,7 +211,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.exp.f64(double) nounwind readnone
 
-;CHECK: @exp2_f32
+;CHECK-LABEL: @exp2_f32(
 ;CHECK: llvm.exp2.v4f32
 ;CHECK: ret void
 define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -237,7 +237,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.exp2.f32(float) nounwind readnone
 
-;CHECK: @exp2_f64
+;CHECK-LABEL: @exp2_f64(
 ;CHECK: llvm.exp2.v4f64
 ;CHECK: ret void
 define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -263,7 +263,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.exp2.f64(double) nounwind readnone
 
-;CHECK: @log_f32
+;CHECK-LABEL: @log_f32(
 ;CHECK: llvm.log.v4f32
 ;CHECK: ret void
 define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -289,7 +289,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.log.f32(float) nounwind readnone
 
-;CHECK: @log_f64
+;CHECK-LABEL: @log_f64(
 ;CHECK: llvm.log.v4f64
 ;CHECK: ret void
 define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -315,7 +315,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.log.f64(double) nounwind readnone
 
-;CHECK: @log10_f32
+;CHECK-LABEL: @log10_f32(
 ;CHECK: llvm.log10.v4f32
 ;CHECK: ret void
 define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -341,7 +341,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.log10.f32(float) nounwind readnone
 
-;CHECK: @log10_f64
+;CHECK-LABEL: @log10_f64(
 ;CHECK: llvm.log10.v4f64
 ;CHECK: ret void
 define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -367,7 +367,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.log10.f64(double) nounwind readnone
 
-;CHECK: @log2_f32
+;CHECK-LABEL: @log2_f32(
 ;CHECK: llvm.log2.v4f32
 ;CHECK: ret void
 define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -393,7 +393,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.log2.f32(float) nounwind readnone
 
-;CHECK: @log2_f64
+;CHECK-LABEL: @log2_f64(
 ;CHECK: llvm.log2.v4f64
 ;CHECK: ret void
 define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -419,7 +419,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.log2.f64(double) nounwind readnone
 
-;CHECK: @fabs_f32
+;CHECK-LABEL: @fabs_f32(
 ;CHECK: llvm.fabs.v4f32
 ;CHECK: ret void
 define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -468,7 +468,60 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.fabs(double) nounwind readnone
 
-;CHECK: @floor_f32
+;CHECK-LABEL: @copysign_f32(
+;CHECK: llvm.copysign.v4f32
+;CHECK: ret void
+define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx1, align 4
+  %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+
+define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.copysign(double, double) nounwind readnone
+
+;CHECK-LABEL: @floor_f32(
 ;CHECK: llvm.floor.v4f32
 ;CHECK: ret void
 define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -494,7 +547,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.floor.f32(float) nounwind readnone
 
-;CHECK: @floor_f64
+;CHECK-LABEL: @floor_f64(
 ;CHECK: llvm.floor.v4f64
 ;CHECK: ret void
 define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -520,7 +573,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.floor.f64(double) nounwind readnone
 
-;CHECK: @ceil_f32
+;CHECK-LABEL: @ceil_f32(
 ;CHECK: llvm.ceil.v4f32
 ;CHECK: ret void
 define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -546,7 +599,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.ceil.f32(float) nounwind readnone
 
-;CHECK: @ceil_f64
+;CHECK-LABEL: @ceil_f64(
 ;CHECK: llvm.ceil.v4f64
 ;CHECK: ret void
 define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -572,7 +625,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.ceil.f64(double) nounwind readnone
 
-;CHECK: @trunc_f32
+;CHECK-LABEL: @trunc_f32(
 ;CHECK: llvm.trunc.v4f32
 ;CHECK: ret void
 define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -598,7 +651,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.trunc.f32(float) nounwind readnone
 
-;CHECK: @trunc_f64
+;CHECK-LABEL: @trunc_f64(
 ;CHECK: llvm.trunc.v4f64
 ;CHECK: ret void
 define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -624,7 +677,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.trunc.f64(double) nounwind readnone
 
-;CHECK: @rint_f32
+;CHECK-LABEL: @rint_f32(
 ;CHECK: llvm.rint.v4f32
 ;CHECK: ret void
 define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -650,7 +703,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.rint.f32(float) nounwind readnone
 
-;CHECK: @rint_f64
+;CHECK-LABEL: @rint_f64(
 ;CHECK: llvm.rint.v4f64
 ;CHECK: ret void
 define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -676,7 +729,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.rint.f64(double) nounwind readnone
 
-;CHECK: @nearbyint_f32
+;CHECK-LABEL: @nearbyint_f32(
 ;CHECK: llvm.nearbyint.v4f32
 ;CHECK: ret void
 define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
@@ -702,7 +755,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.nearbyint.f32(float) nounwind readnone
 
-;CHECK: @nearbyint_f64
+;CHECK-LABEL: @nearbyint_f64(
 ;CHECK: llvm.nearbyint.v4f64
 ;CHECK: ret void
 define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
@@ -728,7 +781,59 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.nearbyint.f64(double) nounwind readnone
 
-;CHECK: @fma_f32
+;CHECK-LABEL: @round_f32(
+;CHECK: llvm.round.v4f32
+;CHECK: ret void
+define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @llvm.round.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.round.f32(float) nounwind readnone
+
+;CHECK-LABEL: @round_f64(
+;CHECK: llvm.round.v4f64
+;CHECK: ret void
+define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %call = tail call double @llvm.round.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.round.f64(double) nounwind readnone
+
+;CHECK-LABEL: @fma_f32(
 ;CHECK: llvm.fma.v4f32
 ;CHECK: ret void
 define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
@@ -758,7 +863,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.fma.f32(float, float, float) nounwind readnone
 
-;CHECK: @fma_f64
+;CHECK-LABEL: @fma_f64(
 ;CHECK: llvm.fma.v4f64
 ;CHECK: ret void
 define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
@@ -788,7 +893,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.fma.f64(double, double, double) nounwind readnone
 
-;CHECK: @fmuladd_f32
+;CHECK-LABEL: @fmuladd_f32(
 ;CHECK: llvm.fmuladd.v4f32
 ;CHECK: ret void
 define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
@@ -818,7 +923,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
 
-;CHECK: @fmuladd_f64
+;CHECK-LABEL: @fmuladd_f64(
 ;CHECK: llvm.fmuladd.v4f64
 ;CHECK: ret void
 define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
@@ -848,7 +953,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
 
-;CHECK: @pow_f32
+;CHECK-LABEL: @pow_f32(
 ;CHECK: llvm.pow.v4f32
 ;CHECK: ret void
 define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
@@ -876,7 +981,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.pow.f32(float, float) nounwind readnone
 
-;CHECK: @pow_f64
+;CHECK-LABEL: @pow_f64(
 ;CHECK: llvm.pow.v4f64
 ;CHECK: ret void
 define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
@@ -927,3 +1032,61 @@ for.end:                                          ; preds = %for.body
 declare float @fabsf(float) nounwind readnone
 
 declare double @llvm.pow.f64(double, double) nounwind readnone
+
+
+
+; Make sure we don't replace calls to functions with standard library function
+; signatures but defined with internal linkage.
+
+define internal float @roundf(float %x) nounwind readnone {
+  ret float 0.00000000
+}
+; CHECK-LABEL: internal_round
+; CHECK-NOT:  load <4 x float>
+
+define void @internal_round(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @roundf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Make sure we don't replace calls to functions with standard library names but
+; different signatures.
+
+declare void @round(double %f)
+
+; CHECK-LABEL: wrong_signature
+; CHECK-NOT:  load <4 x double>
+
+define void @wrong_signature(double* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %x, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 4
+  store double %0, double* %arrayidx, align 4
+  tail call void @round(double %0) nounwind readnone
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll
new file mode 100644
index 000000000000..4f6f3b820a43
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lifetime.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure we can vectorize loops which contain lifetime markers.
+
+; CHECK-LABEL: @test(
+; CHECK: call void @llvm.lifetime.end
+; CHECK: store <2 x i32>
+; CHECK: call void @llvm.lifetime.start
+
+define void @test(i32 *%d) {
+entry:
+  %arr = alloca [1024 x i32], align 16
+  %0 = bitcast [1024 x i32]* %arr to i8*
+  call void @llvm.lifetime.start(i64 4096, i8* %0) #1
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  call void @llvm.lifetime.end(i64 4096, i8* %0) #1
+  %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 8
+  store i32 100, i32* %arrayidx, align 8
+  call void @llvm.lifetime.start(i64 4096, i8* %0) #1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  call void @llvm.lifetime.end(i64 4096, i8* %0) #1
+  ret void
+}
+
+; CHECK-LABEL: @testbitcast(
+; CHECK: call void @llvm.lifetime.end
+; CHECK: store <2 x i32>
+; CHECK: call void @llvm.lifetime.start
+
+define void @testbitcast(i32 *%d) {
+entry:
+  %arr = alloca [1024 x i32], align 16
+  %0 = bitcast [1024 x i32]* %arr to i8*
+  call void @llvm.lifetime.start(i64 4096, i8* %0) #1
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %1 = bitcast [1024 x i32]* %arr to i8*
+  call void @llvm.lifetime.end(i64 4096, i8* %1) #1
+  %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
+  %2 = load i32* %arrayidx, align 8
+  store i32 100, i32* %arrayidx, align 8
+  call void @llvm.lifetime.start(i64 4096, i8* %1) #1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  call void @llvm.lifetime.end(i64 4096, i8* %0) #1
+  ret void
+}
+
+; CHECK-LABEL: @testloopvariant(
+; CHECK: call void @llvm.lifetime.end
+; CHECK: store <2 x i32>
+; CHECK: call void @llvm.lifetime.start
+
+define void @testloopvariant(i32 *%d) {
+entry:
+  %arr = alloca [1024 x i32], align 16
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = getelementptr [1024 x i32]* %arr, i32 0, i64 %indvars.iv
+  %1 = bitcast [1024 x i32]* %arr to i8*
+  call void @llvm.lifetime.end(i64 4096, i8* %1) #1
+  %arrayidx = getelementptr inbounds i32* %d, i64 %indvars.iv
+  %2 = load i32* %arrayidx, align 8
+  store i32 100, i32* %arrayidx, align 8
+  call void @llvm.lifetime.start(i64 4096, i8* %1) #1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LoopVectorize/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/memdep.ll b/test/Transforms/LoopVectorize/memdep.ll
new file mode 100644
index 000000000000..21cb703ba47c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/memdep.ll
@@ -0,0 +1,222 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Vectorization with dependence checks.
+
+; No plausible dependence - can be vectorized.
+;  for (i = 0; i < 1024; ++i)
+;    A[i] = A[i + 1] + 1;
+
+; CHECK-LABEL: @f1_vec(
+; CHECK: <2 x i32>
+
+define void @f1_vec(i32* %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %indvars.iv.next
+  %0 = load i32* %arrayidx, align 4
+  %add1 = add nsw i32 %0, 1
+  %arrayidx3 = getelementptr inbounds i32* %A, i32 %indvars.iv
+  store i32 %add1, i32* %arrayidx3, align 4
+  %exitcond = icmp ne i32 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Plausible dependence of distance 1 - can't be vectorized.
+;  for (i = 0; i < 1024; ++i)
+;    A[i+1] = A[i] + 1;
+
+; CHECK-LABEL: @f2_novec(
+; CHECK-NOT: <2 x i32>
+
+define void @f2_novec(i32* %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %A, i32 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %arrayidx3 = getelementptr inbounds i32* %A, i32 %indvars.iv.next
+  store i32 %add, i32* %arrayidx3, align 4
+  %exitcond = icmp ne i32 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Plausible dependence of distance 2 - can be vectorized with a width of 2.
+;  for (i = 0; i < 1024; ++i)
+;    A[i+2] = A[i] + 1;
+
+; CHECK-LABEL: @f3_vec_len(
+; CHECK: <2 x i32>
+
+; WIDTH: f3_vec_len
+; WIDTH-NOT: <4 x i32>
+
+define void @f3_vec_len(i32* %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %idxprom = sext i32 %i.01 to i64
+  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %add1 = add nsw i32 %i.01, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
+  store i32 %add, i32* %arrayidx3, align 4
+  %inc = add nsw i32 %i.01, 1
+  %cmp = icmp slt i32 %inc, 1024
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Plausible dependence of distance 1 - cannot be vectorized (without reordering
+; accesses).
+;   for (i = 0; i < 1024; ++i) {
+;     B[i] = A[i];
+;     A[i] = B[i + 1];
+;   }
+
+; CHECK-LABEL: @f5(
+; CHECK-NOT: <2 x i32>
+
+define void @f5(i32*  %A, i32* %B) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  store i32 %0, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 %indvars.iv.next
+  %1 = load i32* %arrayidx4, align 4
+  store i32 %1, i32* %arrayidx, align 4
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Dependence through a phi node - must not vectorize.
+;   for (i = 0; i < 1024; ++i) {
+;     a[i+1] = tmp;
+;     tmp = a[i];
+;   }
+
+; CHECK-LABEL: @f6
+; CHECK-NOT: <2 x i32>
+
+define i32 @f6(i32* %a, i32 %tmp) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv.next
+  store i32 %tmp.addr.08, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx3, align 4
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret i32 undef
+}
+
+; Don't vectorize true loop carried dependencies that are not a multiple of the
+; vector width.
+; Example:
+;   for (int i = ...; ++i) {
+;     a[i] = a[i-3] + ...;
+; It is a bad idea to vectorize this loop because store-load forwarding will not
+; happen.
+;
+
+; CHECK-LABEL: @nostoreloadforward(
+; CHECK-NOT: <2 x i32>
+
+define void @nostoreloadforward(i32* %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = add nsw i64 %indvars.iv, -3
+  %arrayidx = getelementptr inbounds i32* %A, i64 %0
+  %1 = load i32* %arrayidx, align 4
+  %2 = add nsw i64 %indvars.iv, 4
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %2
+  %3 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %3, %1
+  %arrayidx5 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  store i32 %add3, i32* %arrayidx5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Example:
+;   for (int i = ...; ++i) {
+;     a[i] = b[i];
+;     c[i] = a[i-3] + ...;
+; It is a bad idea to vectorize this loop because store-load forwarding will not
+; happen.
+;
+
+; CHECK-LABEL: @nostoreloadforward2(
+; CHECK-NOT: <2 x i32>
+
+define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  store i32 %0, i32* %arrayidx2, align 4
+  %1 = add nsw i64 %indvars.iv, -3
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %1
+  %2 = load i32* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds i32* %C, i64 %indvars.iv
+  store i32 %2, i32* %arrayidx6, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll
new file mode 100644
index 000000000000..7f1037200659
--- /dev/null
+++ b/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK-LABEL: @inc(
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !0
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
+!0 = metadata !{metadata !0, metadata !1}
+!1 = metadata !{metadata !"llvm.vectorizer.unroll", i32 2}
diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll
new file mode 100644
index 000000000000..1960c0bad6bc
--- /dev/null
+++ b/test/Transforms/LoopVectorize/metadata-width.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @test1(
+; CHECK: store <8 x i32>
+; CHECK: ret void
+define void @test1(i32* nocapture %a, i32 %n) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !0, metadata !1}
+!1 = metadata !{metadata !"llvm.vectorizer.width", i32 8}
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index 502fd8b9383b..0e47260984f2 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -10,14 +10,14 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Turn this into a max reduction. Make sure we use a splat to initialize the
 ; vector for the reduction.
-; CHECK: @max_red
+; CHECK-LABEL: @max_red(
 ; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0
 ; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer
 ; CHECK: icmp sgt <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @max_red(i32 %max) {
 entry:
@@ -41,12 +41,12 @@ for.end:
 
 ; Turn this into a max reduction. The select has its inputs reversed therefore
 ; this is a max reduction.
-; CHECK: @max_red_inverse_select
+; CHECK-LABEL: @max_red_inverse_select(
 ; CHECK: icmp slt <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @max_red_inverse_select(i32 %max) {
 entry:
@@ -69,12 +69,12 @@ for.end:
 }
 
 ; Turn this into a min reduction.
-; CHECK: @min_red
+; CHECK-LABEL: @min_red(
 ; CHECK: icmp slt <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @min_red(i32 %max) {
 entry:
@@ -98,12 +98,12 @@ for.end:
 
 ; Turn this into a min reduction. The select has its inputs reversed therefore
 ; this is a min reduction.
-; CHECK: @min_red_inverse_select
+; CHECK-LABEL: @min_red_inverse_select(
 ; CHECK: icmp sgt <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @min_red_inverse_select(i32 %max) {
 entry:
@@ -128,12 +128,12 @@ for.end:
 ; Unsigned tests.
 
 ; Turn this into a max reduction.
-; CHECK: @umax_red
+; CHECK-LABEL: @umax_red(
 ; CHECK: icmp ugt <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umax_red(i32 %max) {
 entry:
@@ -157,12 +157,12 @@ for.end:
 
 ; Turn this into a max reduction. The select has its inputs reversed therefore
 ; this is a max reduction.
-; CHECK: @umax_red_inverse_select
+; CHECK-LABEL: @umax_red_inverse_select(
 ; CHECK: icmp ult <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umax_red_inverse_select(i32 %max) {
 entry:
@@ -185,12 +185,12 @@ for.end:
 }
 
 ; Turn this into a min reduction.
-; CHECK: @umin_red
+; CHECK-LABEL: @umin_red(
 ; CHECK: icmp ult <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umin_red(i32 %max) {
 entry:
@@ -214,12 +214,12 @@ for.end:
 
 ; Turn this into a min reduction. The select has its inputs reversed therefore
 ; this is a min reduction.
-; CHECK: @umin_red_inverse_select
+; CHECK-LABEL: @umin_red_inverse_select(
 ; CHECK: icmp ugt <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @umin_red_inverse_select(i32 %max) {
 entry:
@@ -243,12 +243,12 @@ for.end:
 
 ; SGE -> SLT
 ; Turn this into a min reduction (select inputs are reversed).
-; CHECK: @sge_min_red
+; CHECK-LABEL: @sge_min_red(
 ; CHECK: icmp sge <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @sge_min_red(i32 %max) {
 entry:
@@ -272,12 +272,12 @@ for.end:
 
 ; SLE -> SGT
 ; Turn this into a max reduction (select inputs are reversed).
-; CHECK: @sle_min_red
+; CHECK-LABEL: @sle_min_red(
 ; CHECK: icmp sle <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @sle_min_red(i32 %max) {
 entry:
@@ -301,12 +301,12 @@ for.end:
 
 ; UGE -> ULT
 ; Turn this into a min reduction (select inputs are reversed).
-; CHECK: @uge_min_red
+; CHECK-LABEL: @uge_min_red(
 ; CHECK: icmp uge <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @uge_min_red(i32 %max) {
 entry:
@@ -330,12 +330,12 @@ for.end:
 
 ; ULE -> UGT
 ; Turn this into a max reduction (select inputs are reversed).
-; CHECK: @ule_min_red
+; CHECK-LABEL: @ule_min_red(
 ; CHECK: icmp ule <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define i32 @ule_min_red(i32 %max) {
 entry:
@@ -358,7 +358,7 @@ for.end:
 }
 
 ; No reduction.
-; CHECK: @no_red_1
+; CHECK-LABEL: @no_red_1(
 ; CHECK-NOT: icmp <2 x i32>
 define i32 @no_red_1(i32 %max) {
 entry:
@@ -382,7 +382,7 @@ for.end:
   ret i32 %max.red.0
 }
 
-; CHECK: @no_red_2
+; CHECK-LABEL: @no_red_2(
 ; CHECK-NOT: icmp <2 x i32>
 define i32 @no_red_2(i32 %max) {
 entry:
@@ -411,12 +411,12 @@ for.end:
 ; Maximum.
 
 ; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
-; CHECK: @max_red_float
+; CHECK-LABEL: @max_red_float(
 ; CHECK: fcmp ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @max_red_float(float %max) #0 {
 entry:
@@ -437,12 +437,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @max_red_float_ge
+; CHECK-LABEL: @max_red_float_ge(
 ; CHECK: fcmp oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @max_red_float_ge(float %max) #0 {
 entry:
@@ -463,12 +463,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @inverted_max_red_float
+; CHECK-LABEL: @inverted_max_red_float(
 ; CHECK: fcmp olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_max_red_float(float %max) #0 {
 entry:
@@ -489,12 +489,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @inverted_max_red_float_le
+; CHECK-LABEL: @inverted_max_red_float_le(
 ; CHECK: fcmp ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_max_red_float_le(float %max) #0 {
 entry:
@@ -515,12 +515,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @unordered_max_red
+; CHECK-LABEL: @unordered_max_red_float(
 ; CHECK: fcmp ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_max_red_float(float %max) #0 {
 entry:
@@ -541,12 +541,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @unordered_max_red_float_ge
+; CHECK-LABEL: @unordered_max_red_float_ge(
 ; CHECK: fcmp uge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_max_red_float_ge(float %max) #0 {
 entry:
@@ -567,12 +567,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @inverted_unordered_max_red
+; CHECK-LABEL: @inverted_unordered_max_red_float(
 ; CHECK: fcmp ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_max_red_float(float %max) #0 {
 entry:
@@ -593,12 +593,12 @@ for.end:
   ret float %max.red.0
 }
 
-; CHECK: @inverted_unordered_max_red_float_le
+; CHECK-LABEL: @inverted_unordered_max_red_float_le(
 ; CHECK: fcmp ule <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_max_red_float_le(float %max) #0 {
 entry:
@@ -622,12 +622,12 @@ for.end:
 ; Minimum.
 
 ; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
-; CHECK: @min_red_float
+; CHECK-LABEL: @min_red_float(
 ; CHECK: fcmp olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @min_red_float(float %min) #0 {
 entry:
@@ -648,12 +648,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @min_red_float_le
+; CHECK-LABEL: @min_red_float_le(
 ; CHECK: fcmp ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @min_red_float_le(float %min) #0 {
 entry:
@@ -674,12 +674,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @inverted_min_red_float
+; CHECK-LABEL: @inverted_min_red_float(
 ; CHECK: fcmp ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_min_red_float(float %min) #0 {
 entry:
@@ -700,12 +700,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @inverted_min_red_float_ge
+; CHECK-LABEL: @inverted_min_red_float_ge(
 ; CHECK: fcmp oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_min_red_float_ge(float %min) #0 {
 entry:
@@ -726,12 +726,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @unordered_min_red
+; CHECK-LABEL: @unordered_min_red_float(
 ; CHECK: fcmp ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_min_red_float(float %min) #0 {
 entry:
@@ -752,12 +752,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @unordered_min_red_float_le
+; CHECK-LABEL: @unordered_min_red_float_le(
 ; CHECK: fcmp ule <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @unordered_min_red_float_le(float %min) #0 {
 entry:
@@ -778,12 +778,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @inverted_unordered_min_red
+; CHECK-LABEL: @inverted_unordered_min_red_float(
 ; CHECK: fcmp ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_min_red_float(float %min) #0 {
 entry:
@@ -804,12 +804,12 @@ for.end:
   ret float %min.red.0
 }
 
-; CHECK: @inverted_unordered_min_red_float_ge
+; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
 ; CHECK: fcmp uge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define float @inverted_unordered_min_red_float_ge(float %min) #0 {
 entry:
@@ -831,12 +831,12 @@ for.end:
 }
 
 ; Make sure we handle doubles, too.
-; CHECK: @min_red_double
+; CHECK-LABEL: @min_red_double(
 ; CHECK: fcmp olt <2 x double>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x double>
-; CHECK: select <2 x i1>
+; CHECK: select i1
 
 define double @min_red_double(double %min) #0 {
 entry:
@@ -859,7 +859,7 @@ for.end:
 
 
 ; Don't this into a max reduction. The no-nans-fp-math attribute is missing
-; CHECK: @max_red_float_nans
+; CHECK-LABEL: @max_red_float_nans(
 ; CHECK-NOT: <2 x float>
 
 define float @max_red_float_nans(float %max) {
@@ -882,4 +882,4 @@ for.end:
 }
 
 
-attributes #0 = { "no-nans-fp-math"="true" } 
+attributes #0 = { "no-nans-fp-math"="true" }
diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
new file mode 100644
index 000000000000..7d836dedbdbb
--- /dev/null
+++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; From a simple program with two address spaces:
+; char Y[4*10000] __attribute__((address_space(1)));
+; char X[4*10000];
+; int main() {
+;    for (int i = 0; i < 4*10000; ++i)
+;        X[i] = Y[i] + 1;
+;    return 0;
+;}
+
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@Y = common addrspace(1) global [40000 x i8] zeroinitializer, align 16
+@X = common global [40000 x i8] zeroinitializer, align 16
+
+;CHECK-LABEL: @main(
+;CHECK: bitcast i8 addrspace(1)* %{{.*}} to <4 x i8> addrspace(1)*
+;CHECK: bitcast i8* %{{.*}} to <4 x i8>*
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [40000 x i8] addrspace(1)* @Y, i64 0, i64 %indvars.iv
+  %0 = load i8 addrspace(1)* %arrayidx, align 1
+  %add = add i8 %0, 1
+  %arrayidx3 = getelementptr inbounds [40000 x i8]* @X, i64 0, i64 %indvars.iv
+  store i8 %add, i8* %arrayidx3, align 1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 40000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
index cdfb3fd66f05..295fcabb0b29 100644
--- a/test/Transforms/LoopVectorize/no_idiv_reduction.ll
+++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -7,7 +7,7 @@ entry:
   br label %for.body
 
 for.body:
-  ; CHECK: @g
+  ; CHECK-LABEL: @g(
   ; CHECK-NOT: sdiv <2 x i32>
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ]
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
index 45aa8c7cd9be..e572d1a884ca 100644
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -4,10 +4,10 @@
 ;  return std::accumulate(A, A + n, 0);
 ; }
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
 
-;CHECK: @sum_array
+;CHECK-LABEL: @sum_array(
+;CHECK: phi i64
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
@@ -31,3 +31,30 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
   %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
   ret i32 %.01.lcssa.i
 }
+
+; Same, but use a pointer with a different size.
+;CHECK-LABEL: @sum_array_as1(
+;CHECK: phi i16
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: ret i32
+define i32 @sum_array_as1(i32 addrspace(1)* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+  %1 = sext i32 %n to i64
+  %2 = getelementptr inbounds i32 addrspace(1)* %A, i64 %1
+  %3 = icmp eq i32 %n, 0
+  br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+.lr.ph.i:                                         ; preds = %0, %.lr.ph.i
+  %.03.i = phi i32 addrspace(1)* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
+  %4 = load i32 addrspace(1)* %.03.i, align 4
+  %5 = add nsw i32 %4, %.012.i
+  %6 = getelementptr inbounds i32 addrspace(1)* %.03.i, i64 1
+  %7 = icmp eq i32 addrspace(1)* %6, %2
+  br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
+  %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
+  ret i32 %.01.lcssa.i
+}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 6f0357c5e546..1f891ad2c4fa 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -12,6 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; We used to vectorize this loop. But it has a value that is used outside of the
 ; and is not a recognized reduction variable "tmp17".
 
+; CHECK-LABEL: @main(
 ; CHECK-NOT: <2 x i32>
 
 define i32 @main()  {
@@ -38,4 +39,33 @@ f1.exit.loopexit:
   ret i32 %.lcssa
 }
 
+; Don't vectorize this loop. Its phi node (induction variable) has an outside
+; loop user. We currently don't handle this case.
+; PR17179
 
+; CHECK-LABEL: @test2(
+; CHECK-NOT:  <2 x
+
+@x1 = common global i32 0, align 4
+@x2 = common global i32 0, align 4
+@x0 = common global i32 0, align 4
+
+define i32 @test2()  {
+entry:
+  store i32 0, i32* @x1, align 4
+  %0 = load i32* @x0, align 4
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
+  %inc = add nsw i32 %inc7, 1
+  %cmp = icmp eq i32 %inc, 52
+  br i1 %cmp, label %for.end5, label %for.cond1.preheader
+
+for.end5:
+  %inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
+  %xor = xor i32 %inc7.lcssa, %0
+  store i32 52, i32* @x1, align 4
+  store i32 1, i32* @x2, align 4
+  ret i32 %xor
+}
diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll
index de23bf02b63a..c3c81b6f8450 100644
--- a/test/Transforms/LoopVectorize/nofloat.ll
+++ b/test/Transforms/LoopVectorize/nofloat.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 @a = common global [2048 x i32] zeroinitializer, align 16
 
-;CHECK: @example12
+;CHECK-LABEL: @example12(
 ;CHECK-NOT: store <4 x i32>
 ;CHECK: ret void
 define void @example12() noimplicitfloat { ;           <--------- "noimplicitfloat" attribute here!
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 8262a18f1807..0c54a2b01226 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @c = common global [2048 x i32] zeroinitializer, align 16
 @a = common global [2048 x i32] zeroinitializer, align 16
 
-;CHECK: @example1
+;CHECK-LABEL: @example1(
 ;CHECK: shl i32
 ;CHECK: zext i32
 ;CHECK: load <4 x i32>
diff --git a/test/Transforms/LoopVectorize/opt.ll b/test/Transforms/LoopVectorize/opt.ll
new file mode 100644
index 000000000000..27030a2ff2a9
--- /dev/null
+++ b/test/Transforms/LoopVectorize/opt.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s
+; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we can disable vectorization in opt.
+
+; LOOPVEC:       add <2 x i32>
+; NOLOOPVEC-NOT: add <2 x i32>
+
+define i32 @vect(i32* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add
+}
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
index 25599f8f4c3c..15983f068556 100644
--- a/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
 @B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
 
-;CHECK:_Z5test1v
+;CHECK-LABEL:@_Z5test1v(
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector <4 x i32>
 ;CHECK: store <4 x i32>
@@ -29,7 +29,7 @@ define i32 @_Z5test1v() nounwind uwtable ssp {
   ret i32 0
 }
 
-;CHECK:_Z5test2v
+;CHECK-LABEL: @_Z5test2v(
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector <4 x i32>
 ;CHECK: store <4 x i32>
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index bfaa6d452bce..fc8f0a5482f0 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @read_only_func
+;CHECK-LABEL: @read_only_func(
 ;CHECK: load <4 x i32>
 ;CHECK: ret i32
 define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 08b7b27e4257..791fce156220 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @reduction_sum
+;CHECK-LABEL: @reduction_sum(
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
@@ -38,7 +38,7 @@ define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
   ret i32 %sum.0.lcssa
 }
 
-;CHECK: @reduction_prod
+;CHECK-LABEL: @reduction_prod(
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul <4 x i32>
@@ -73,7 +73,7 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap
   ret i32 %prod.0.lcssa
 }
 
-;CHECK: @reduction_mix
+;CHECK-LABEL: @reduction_mix(
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul nsw <4 x i32>
@@ -108,7 +108,7 @@ define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
   ret i32 %sum.0.lcssa
 }
 
-;CHECK: @reduction_mul
+;CHECK-LABEL: @reduction_mul(
 ;CHECK: mul <4 x i32>
 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: mul <4 x i32>
@@ -141,7 +141,7 @@ define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
   ret i32 %sum.0.lcssa
 }
 
-;CHECK: @start_at_non_zero
+;CHECK-LABEL: @start_at_non_zero(
 ;CHECK: phi <4 x i32>
 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
@@ -174,7 +174,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %sum.0.lcssa
 }
 
-;CHECK: @reduction_and
+;CHECK-LABEL: @reduction_and(
 ;CHECK: and <4 x i32>
 ;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
@@ -207,7 +207,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-;CHECK: @reduction_or
+;CHECK-LABEL: @reduction_or(
 ;CHECK: or <4 x i32>
 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: or <4 x i32>
@@ -239,7 +239,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-;CHECK: @reduction_xor
+;CHECK-LABEL: @reduction_xor(
 ;CHECK: xor <4 x i32>
 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: xor <4 x i32>
@@ -272,7 +272,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; In this code the subtracted variable is on the RHS and this is not an induction variable.
-;CHECK: @reduction_sub_rhs
+;CHECK-LABEL: @reduction_sub_rhs(
 ;CHECK-NOT: phi <4 x i32>
 ;CHECK-NOT: sub nsw <4 x i32>
 ;CHECK: ret i32
@@ -299,7 +299,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; In this test the reduction variable is on the LHS and we can vectorize it.
-;CHECK: @reduction_sub_lhs
+;CHECK-LABEL: @reduction_sub_lhs(
 ;CHECK: phi <4 x i32>
 ;CHECK: sub nsw <4 x i32>
 ;CHECK: ret i32
@@ -323,3 +323,174 @@ for.end:                                          ; preds = %for.body, %entry
   %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
   ret i32 %x.0.lcssa
 }
+
+; We can vectorize conditional reductions with multi-input phis.
+; CHECK: reduction_conditional
+; CHECK: fadd <4 x float>
+
+define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
+  %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %B, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %cmp3 = fcmp ogt float %0, %1
+  br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:
+  %cmp6 = fcmp ogt float %1, 1.000000e+00
+  br i1 %cmp6, label %if.then8, label %if.else
+
+if.then8:
+  %add = fadd fast float %sum.033, %0
+  br label %for.inc
+
+if.else:
+  %cmp14 = fcmp ogt float %0, 2.000000e+00
+  br i1 %cmp14, label %if.then16, label %for.inc
+
+if.then16:
+  %add19 = fadd fast float %sum.033, %1
+  br label %for.inc
+
+for.inc:
+  %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
+  ret float %sum.1.lcssa
+}
+
+; We can't vectorize reductions with phi inputs from outside the reduction.
+; CHECK: noreduction_phi
+; CHECK-NOT: fadd <4 x float>
+define float @noreduction_phi(float* %A, float* %B, float* %C, float %S) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
+  %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %B, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %cmp3 = fcmp ogt float %0, %1
+  br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:
+  %cmp6 = fcmp ogt float %1, 1.000000e+00
+  br i1 %cmp6, label %if.then8, label %if.else
+
+if.then8:
+  %add = fadd fast float %sum.033, %0
+  br label %for.inc
+
+if.else:
+  %cmp14 = fcmp ogt float %0, 2.000000e+00
+  br i1 %cmp14, label %if.then16, label %for.inc
+
+if.then16:
+  %add19 = fadd fast float %sum.033, %1
+  br label %for.inc
+
+for.inc:
+  %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ 0.000000e+00, %if.else ], [ %sum.033, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
+  ret float %sum.1.lcssa
+}
+
+; We can't vectorize reductions that feed another header PHI.
+; CHECK: noredux_header_phi
+; CHECK-NOT: fadd <4 x float>
+
+define float @noredux_header_phi(float* %A, float* %B, float* %C, float %S)  {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
+  %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %add = fadd fast float %sum.08, %0
+  %add1 = fadd fast float %sum2.09, %add
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  %add1.lcssa = phi float [ %add1, %for.body ]
+  %add.lcssa = phi float [ %add, %for.body ]
+  %add2 = fadd fast float %add.lcssa, %add1.lcssa
+  ret float %add2
+}
+
+
+; When vectorizing a reduction whose loop header phi value is used outside the
+; loop special care must be taken. Otherwise, the reduced value feeding into the
+; outside user misses a few iterations (VF-1) of the loop.
+; PR16522
+
+; CHECK-LABEL: @phivalueredux(
+; CHECK-NOT: x i32>
+
+define i32 @phivalueredux(i32 %p) {
+entry:
+  br label %for.body
+
+for.body:
+  %t.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %p.addr.02 = phi i32 [ %p, %entry ], [ %xor, %for.body ]
+  %xor = xor i32 %p.addr.02, -1
+  %inc = add nsw i32 %t.03, 1
+  %exitcond = icmp eq i32 %inc, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %p.addr.02
+}
+
+; Don't vectorize a reduction value that is not the last in a reduction cyle. We
+; would loose iterations (VF-1) on the operations after that use.
+; PR17498
+
+; CHECK-LABEL: not_last_operation
+; CHECK-NOT: x i32>
+define i32 @not_last_operation(i32 %p, i32 %val) {
+entry:
+  %tobool = icmp eq i32 %p, 0
+  br label %for.body
+
+for.body:
+  %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ]
+  %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ]
+  %0 = zext i1 %tobool to i32
+  %inc4.1 = xor i32 %0, 1
+  %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1
+  %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1
+  %inc6.1 = add nsw i32 %inc613.1, 1
+  %exitcond.1 = icmp eq i32 %inc6.1, 22
+  br i1 %exitcond.1, label %exit, label %for.body
+
+exit:
+  %inc.2 = add nsw i32 %inc511.1.inc4.1, 2
+  ret i32 %inc.2
+}
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index f43f02bc3132..65ef95dcb121 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Make sure consecutive vector generates correct negative indices.
 ; PR15882
 
-; CHECK: reverse_induction_i64
+; CHECK-LABEL: @reverse_induction_i64(
 ; CHECK: add <4 x i64> %[[SPLAT:.*]], <i64 0, i64 -1, i64 -2, i64 -3>
 ; CHECK: add <4 x i64> %[[SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
 
@@ -29,7 +29,7 @@ loopend:
   ret i32 %inc.redux
 }
 
-; CHECK: reverse_induction_i128
+; CHECK-LABEL: @reverse_induction_i128(
 ; CHECK: add <4 x i128> %[[SPLAT:.*]], <i128 0, i128 -1, i128 -2, i128 -3>
 ; CHECK: add <4 x i128> %[[SPLAT]], <i128 -4, i128 -5, i128 -6, i128 -7>
 define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
@@ -52,7 +52,7 @@ loopend:
   ret i32 %inc.redux
 }
 
-; CHECK: reverse_induction_i16
+; CHECK-LABEL: @reverse_induction_i16(
 ; CHECK: add <4 x i16> %[[SPLAT:.*]], <i16 0, i16 -1, i16 -2, i16 -3>
 ; CHECK: add <4 x i16> %[[SPLAT]], <i16 -4, i16 -5, i16 -6, i16 -7>
 
@@ -77,3 +77,72 @@ loopend:
 }
 
 
+@a = common global [1024 x i32] zeroinitializer, align 16
+
+; We incorrectly transformed this loop into an empty one because we left the
+; induction variable in i8 type and truncated the exit value 1024 to 0.
+; int a[1024];
+;
+; void fail() {
+;   int reverse_induction = 1023;
+;   unsigned char forward_induction = 0;
+;   while ((reverse_induction) >= 0) {
+;     forward_induction++;
+;     a[reverse_induction] = forward_induction;
+;     --reverse_induction;
+;   }
+; }
+
+; CHECK-LABEL: @reverse_forward_induction_i64_i8(
+; CHECK: vector.body
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %normalized.idx = sub i64 %index, 0
+; CHECK: %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK: trunc i64 %index to i8
+
+define void @reverse_forward_induction_i64_i8() {
+entry:
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+  %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ]
+  %inc = add i8 %forward_induction.05, 1
+  %conv = zext i8 %inc to i32
+  %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %0 = trunc i64 %indvars.iv to i32
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+  ret void
+}
+
+; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
+; CHECK: vector.body:
+; CHECK:  %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:  %normalized.idx = sub i64 %index, 129
+; CHECK:  %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK:  trunc i64 %index to i8
+
+define void @reverse_forward_induction_i64_i8_signed() {
+entry:
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+  %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ]
+  %inc = add i8 %forward_induction.05, 1
+  %conv = sext i8 %inc to i32
+  %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %0 = trunc i64 %indvars.iv to i32
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/reverse_iter.ll b/test/Transforms/LoopVectorize/reverse_iter.ll
new file mode 100644
index 000000000000..f803120c4d57
--- /dev/null
+++ b/test/Transforms/LoopVectorize/reverse_iter.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure that the reverse iterators are calculated using 64bit arithmetic, not 32.
+;
+; int foo(int n, int *A) {
+;   int sum;
+;   for (int i=n; i > 0; i--)
+;     sum += A[i*2];
+;   return sum;
+; }
+;
+
+;CHECK-LABEL: @foo(
+;CHECK:  <i64 0, i64 -1, i64 -2, i64 -3>
+;CHECK: ret
+define i32 @foo(i32 %n, i32* nocapture %A) {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %2 = sext i32 %n to i64
+  br label %3
+
+; <label>:3                                       ; preds = %.lr.ph, %3
+  %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
+  %sum.01 = phi i32 [ undef, %.lr.ph ], [ %9, %3 ]
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = shl nsw i32 %4, 1
+  %6 = sext i32 %5 to i64
+  %7 = getelementptr inbounds i32* %A, i64 %6
+  %8 = load i32* %7, align 4
+  %9 = add nsw i32 %8, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %10 = trunc i64 %indvars.iv.next to i32
+  %11 = icmp sgt i32 %10, 0
+  br i1 %11, label %3, label %._crit_edge
+
+._crit_edge:                                      ; preds = %3, %0
+  %sum.0.lcssa = phi i32 [ undef, %0 ], [ %9, %3 ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
new file mode 100644
index 000000000000..6c86561a1c7e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -0,0 +1,235 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Check vectorization that would ordinarily require a runtime bounds
+; check on the pointers when mixing address spaces. For now we cannot
+; assume address spaces do not alias, and we can't assume that
+; different pointers are directly comparable.
+;
+; These all test this basic loop for different combinations of address
+; spaces, and swapping in globals or adding noalias.
+;
+;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n)
+;{
+;    for (int i = 0; i < n; ++i)
+;    {
+;        a[i] = 3 * b[i];
+;    }
+;}
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+@g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16
+@q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16
+
+; Both parameters are unidentified objects with the same address
+; space, so this should vectorize normally.
+define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @foo(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Parameters are unidentified and different address spaces, so cannot vectorize.
+define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Swapped arguments should be the same
+define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; We should still be able to vectorize with noalias even if the
+; address spaces are different.
+define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
+; CHECK-LABEL: @bar2(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Store to identified global with different address space. This isn't
+; generally safe and shouldn't be vectorized.
+define void @arst0(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+
+; Load from identified global with different address space.
+; This isn't generally safe and shouldn't be vectorized.
+define void @arst1(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Read and write to 2 identified globals in different address
+; spaces. This should be vectorized.
+define void @aoeu(i32 %n) #0 {
+; CHECK-LABEL: @aoeu(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
+  %0 = load i32 addrspace(2)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.0 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
new file mode 100644
index 000000000000..212b37cceab3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -0,0 +1,142 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_1_1_1(
+; CHECK: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_1_0_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %b, i64 %i.0
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_2(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i64 %i.0, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0
+  %1 = load i32 addrspace(2)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index 4145d134fd70..a2b9ad94c837 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: add_ints
+;CHECK-LABEL: @add_ints(
 ;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-NEXT: getelementptr
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 014c4fc48f87..d15479d202b7 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;CHECK: for.body.preheader:
 ;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
 ;CHECK: vector.memcheck:
-;CHECK: br i1 %found.conflict, label %middle.block, label %vector.ph
+;CHECK: br i1 %memcheck.conflict, label %middle.block, label %vector.ph
 ;CHECK: load <4 x float>
 define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
 entry:
@@ -34,3 +34,31 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret i32 undef
 }
+
+; Make sure that we try to vectorize loops with a runtime check if the
+; dependency check fails.
+
+; CHECK-LABEL: test_runtime_check
+; CHECK:      <4 x float>
+define void @test_runtime_check(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+  %l1 = load float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+  %l2 = load float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index d7839746f0e1..7370a6fb93c5 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; We are vectorizing with 6 runtime checks.
-;CHECK: func1x6
+;CHECK-LABEL: func1x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
 define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
@@ -38,7 +38,7 @@ for.end:                                          ; preds = %for.body
 }
 
 ; We are not vectorizing with 12 runtime checks.
-;CHECK: func2x6
+;CHECK-LABEL: func2x6(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret
 define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll
new file mode 100644
index 000000000000..c9508601e2c1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/safegep.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1  < %s |  FileCheck %s
+target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+
+; We can vectorize this code because if the address computation would wrap then
+; a load from 0 would take place which is undefined behaviour in address space 0
+; according to LLVM IR semantics.
+
+; PR16592
+
+; CHECK-LABEL: @safe(
+; CHECK: <4 x float>
+
+define void @safe(float* %A, float* %B, float %K) {
+entry:
+  br label %"<bb 3>"
+
+"<bb 3>":
+  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
+  %pp3 = getelementptr float* %A, i32 %i_15
+  %D.1396_10 = load float* %pp3, align 4
+  %pp24 = getelementptr float* %B, i32 %i_15
+  %D.1398_15 = load float* %pp24, align 4
+  %D.1399_17 = fadd float %D.1398_15, %K
+  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
+  store float %D.1400_18, float* %pp3, align 4
+  %i_19 = add nsw i32 %i_15, 1
+  %exitcond = icmp ne i32 %i_19, 64
+  br i1 %exitcond, label %"<bb 3>", label %return
+
+return:
+  ret void
+}
+
+; In a non-default address space we don't have this rule.
+
+; CHECK-LABEL: @notsafe(
+; CHECK-NOT: <4 x float>
+
+define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
+entry:
+  br label %"<bb 3>"
+
+"<bb 3>":
+  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
+  %pp3 = getelementptr float addrspace(5) * %A, i32 %i_15
+  %D.1396_10 = load float addrspace(5) * %pp3, align 4
+  %pp24 = getelementptr float* %B, i32 %i_15
+  %D.1398_15 = load float* %pp24, align 4
+  %D.1399_17 = fadd float %D.1398_15, %K
+  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
+  store float %D.1400_18, float addrspace(5) * %pp3, align 4
+  %i_19 = add nsw i32 %i_15, 1
+  %exitcond = icmp ne i32 %i_19, 64
+  br i1 %exitcond, label %"<bb 3>", label %return
+
+return:
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
index 15738936457a..d623a3469096 100644
--- a/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;     x[k] = x[k-1] + y[k];
 ; }
 
-; CHECK: @kernel11
+; CHECK-LABEL: @kernel11(
 ; CHECK-NOT: <4 x double>
 ; CHECK: ret
 define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
@@ -77,7 +77,7 @@ define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
 ;   }
 ; }
 
-; CHECK: @func2
+; CHECK-LABEL: @func2(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
 define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index 7a14d247c9b4..257c7bebe4d5 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @b = common global [2048 x i32] zeroinitializer, align 16
 @c = common global [2048 x i32] zeroinitializer, align 16
 
-;CHECK: @example1
+;CHECK-LABEL: @example1(
 ;CHECK: load <4 x i32>
 ; make sure that we have a scalar condition and a vector operand.
 ;CHECK: select i1 %cond, <4 x i32>
diff --git a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
new file mode 100644
index 000000000000..683621a6f69b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=8 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+@b = common global i32 0, align 4
+@f = common global i32 0, align 4
+@a = common global i32 0, align 4
+@d = common global i32* null, align 8
+@e = common global i32* null, align 8
+@c = common global i32 0, align 4
+
+; CHECK-LABEL: @fn1(
+; CHECK: vector.body
+define void @fn1() #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %i.0 = phi i32 [ undef, %entry ], [ %inc, %for.cond ]
+  %cmp = icmp slt i32 %i.0, 0
+  %call = tail call i32 @fn2(double fadd (double fsub (double undef, double undef), double 1.000000e+00)) #2
+  %inc = add nsw i32 %i.0, 1
+  br i1 %cmp, label %for.cond, label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond
+  %call.lcssa = phi i32 [ %call, %for.cond ]
+  %cmp514 = icmp sgt i32 %call.lcssa, 0
+  br i1 %cmp514, label %for.cond7.preheader.lr.ph, label %for.end26
+
+for.cond7.preheader.lr.ph:                        ; preds = %for.cond4.preheader
+  %0 = load i32** @e, align 8, !tbaa !4
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.cond7.preheader.lr.ph, %for.inc23
+  %y.017 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %inc24, %for.inc23 ]
+  %i.116 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %i.2.lcssa, %for.inc23 ]
+  %n.015 = phi i32 [ undef, %for.cond7.preheader.lr.ph ], [ %inc25, %for.inc23 ]
+  %1 = load i32* @b, align 4, !tbaa !5
+  %tobool11 = icmp eq i32 %1, 0
+  br i1 %tobool11, label %for.inc23, label %for.body8.lr.ph
+
+for.body8.lr.ph:                                  ; preds = %for.cond7.preheader
+  %add9 = add i32 %n.015, 1
+  br label %for.body8
+
+for.body8:                                        ; preds = %for.body8.lr.ph, %for.inc19
+  %indvars.iv19 = phi i64 [ 0, %for.body8.lr.ph ], [ %indvars.iv.next20, %for.inc19 ]
+  %i.213 = phi i32 [ %i.116, %for.body8.lr.ph ], [ 0, %for.inc19 ]
+  %2 = trunc i64 %indvars.iv19 to i32
+  %add10 = add i32 %add9, %2
+  store i32 %add10, i32* @f, align 4, !tbaa !5
+  %idx.ext = sext i32 %add10 to i64
+  %add.ptr = getelementptr inbounds i32* @a, i64 %idx.ext
+  %tobool129 = icmp eq i32 %i.213, 0
+  br i1 %tobool129, label %for.inc19, label %for.body13.lr.ph
+
+for.body13.lr.ph:                                 ; preds = %for.body8
+  %3 = sext i32 %i.213 to i64
+  br label %for.body13
+
+for.body13:                                       ; preds = %for.body13.lr.ph, %for.body13
+  %indvars.iv = phi i64 [ %3, %for.body13.lr.ph ], [ %indvars.iv.next, %for.body13 ]
+  %add.ptr.sum = add i64 %idx.ext, %indvars.iv
+  %arrayidx = getelementptr inbounds i32* @a, i64 %add.ptr.sum
+  %4 = load i32* %arrayidx, align 4, !tbaa !5
+  %arrayidx15 = getelementptr inbounds i32* %0, i64 %indvars.iv
+  store i32 %4, i32* %arrayidx15, align 4, !tbaa !5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %5 = trunc i64 %indvars.iv.next to i32
+  %tobool12 = icmp eq i32 %5, 0
+  br i1 %tobool12, label %for.cond11.for.inc19_crit_edge, label %for.body13
+
+for.cond11.for.inc19_crit_edge:                   ; preds = %for.body13
+  br label %for.inc19
+
+for.inc19:                                        ; preds = %for.cond11.for.inc19_crit_edge, %for.body8
+  %6 = load i32* @c, align 4, !tbaa !5
+  %inc20 = add nsw i32 %6, 1
+  store i32 %inc20, i32* @c, align 4, !tbaa !5
+  %indvars.iv.next20 = add i64 %indvars.iv19, 1
+  %7 = load i32* @b, align 4, !tbaa !5
+  %tobool = icmp eq i32 %7, 0
+  br i1 %tobool, label %for.cond7.for.inc23_crit_edge, label %for.body8
+
+for.cond7.for.inc23_crit_edge:                    ; preds = %for.inc19
+  %add.ptr.lcssa = phi i32* [ %add.ptr, %for.inc19 ]
+  store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !4
+  br label %for.inc23
+
+for.inc23:                                        ; preds = %for.cond7.for.inc23_crit_edge, %for.cond7.preheader
+  %i.2.lcssa = phi i32 [ 0, %for.cond7.for.inc23_crit_edge ], [ %i.116, %for.cond7.preheader ]
+  %inc24 = add nsw i32 %y.017, 1
+  %inc25 = add nsw i32 %n.015, 1
+  %exitcond = icmp ne i32 %inc24, %call.lcssa
+  br i1 %exitcond, label %for.cond7.preheader, label %for.cond4.for.end26_crit_edge
+
+for.cond4.for.end26_crit_edge:                    ; preds = %for.inc23
+  br label %for.end26
+
+for.end26:                                        ; preds = %for.cond4.for.end26_crit_edge, %for.cond4.preheader
+  ret void
+}
+declare i32 @fn2(double) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll
index 7e2dd5fc0fcf..83f35ffb609b 100644
--- a/test/Transforms/LoopVectorize/simple-unroll.ll
+++ b/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;  for (i=0; i<n; i++){
 ;    a[i] += i;
 ;  }
-;CHECK: @inc
+;CHECK-LABEL: @inc(
 ;CHECK: load <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add nsw <4 x i32>
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index fa83dba3d367..49ce5c539727 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @b = common global [2048 x i32] zeroinitializer, align 16
 @c = common global [2048 x i32] zeroinitializer, align 16
 
-;CHECK: @example1
+;CHECK-LABEL: @example1(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret void
 define void @example1() nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
index e8a089a98120..8f675afd80cb 100644
--- a/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @start_at_nonzero
+;CHECK-LABEL: @start_at_nonzero(
 ;CHECK: mul nuw <4 x i32>
 ;CHECK: ret i32
 define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
new file mode 100644
index 000000000000..0ec8010756d1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
@@ -0,0 +1,55 @@
+; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@uf = common global [100 x i32] zeroinitializer, align 16
+@xi = common global [100 x i32] zeroinitializer, align 16
+@q = common global [100 x i32] zeroinitializer, align 16
+
+; PR16455
+
+
+; Due to a bug in the way we handled reverse induction stores we would generate
+; a shuffle too many.
+
+define void @t()  {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @t(
+; CHECK: vector.body:
+; CHECK: load <4 x i32>
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = shufflevector
+; CHECK: load <4 x i32>
+; CHECK: [[VAR2:%[a-zA-Z0-9]+]] = shufflevector
+; CHECK: [[VAR3:%[a-zA-Z0-9]+]] = add nsw <4 x i32> [[VAR2]], [[VAR1]]
+; CHECK: [[VAR4:%[a-zA-Z0-9]+]] = shufflevector <4 x i32> [[VAR3]]
+; CHECK: store <4 x i32> [[VAR4]]
+; CHECK: load <4 x i32>
+; CHECK: [[VAR5:%[a-zA-Z0-9]+]] = shufflevector
+; CHECK-NOT: add nsw <4 x i32> [[VAR4]], [[VAR5]]
+; CHECK-NOT: add nsw <4 x i32> [[VAR5]], [[VAR4]]
+; CHECK: add nsw <4 x i32> [[VAR3]], [[VAR5]]
+
+for.body:
+  %indvars.iv = phi i64 [ 93, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = add i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* @uf, i64 0, i64 %0
+  %arrayidx3 = getelementptr inbounds [100 x i32]* @xi, i64 0, i64 %0
+  %1 = load i32* %arrayidx3, align 4
+  %2 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %2, %1
+  store i32 %add4, i32* %arrayidx, align 4
+  %arrayidx7 = getelementptr inbounds [100 x i32]* @q, i64 0, i64 %0
+  %3 = load i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %add4, %3
+  store i32 %add8, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp ugt i32 %4, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
index 573480d77cdd..75beae82f170 100644
--- a/test/Transforms/LoopVectorize/struct_access.ll
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -21,7 +21,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;   return sum;
 ; }
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret
 define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
@@ -44,3 +44,45 @@ for.end:                                          ; preds = %for.body, %entry
   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
   ret i32 %sum.0.lcssa
 }
+
+%struct.lit = type { i32 }
+
+; Verify that we still vectorize the access if the struct has the same size as
+; the loaded element.
+; struct lit {
+;  int x;
+; };
+;
+;
+; int bar(struct lit *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK-LABEL: @bar(
+;CHECK: load <4 x i32>
+;CHECK: ret
+define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.lit* %A, i64 %indvars.iv, i32 0
+  %0 = load i32* %x, align 4
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/undef-inst-bug.ll b/test/Transforms/LoopVectorize/undef-inst-bug.ll
new file mode 100644
index 000000000000..ed60e801afde
--- /dev/null
+++ b/test/Transforms/LoopVectorize/undef-inst-bug.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; We use to fail on this loop because we did not properly handle the loop
+; invariant instruction anchored in the loop when used as a getelementptr index.
+; We would use the index from the original loop resulting in a use not dominated
+; by the definition.
+
+; PR16452
+
+; Verify that we don't miscompile this loop.
+
+; CHECK-LABEL: @t(
+; CHECK: <4 x i32>
+
+define void @t() {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv17 = phi i64 [ %indvars.next, %for.body ], [ 128, %entry ]
+
+  ; Loop invariant anchored in loop.
+  %idxprom21 = zext i32 undef to i64
+
+  %arrayidx23 = getelementptr inbounds [100 x [100 x i32]]* undef, i64 0, i64 %idxprom21, i64 %indvars.iv17
+  store i32 undef, i32* %arrayidx23, align 4
+  %indvars.next= add i64 %indvars.iv17, -1
+  %0 = trunc i64 %indvars.next to i32
+  %cmp15 = icmp ugt i32 %0, undef
+  br i1 %cmp15, label %for.body, label %loopexit
+
+loopexit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll
new file mode 100644
index 000000000000..33f128da905d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK-LABEL: @inc(
+;CHECK: load i32*
+;CHECK: load i32*
+;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK: store i32
+;CHECK: store i32
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll
index f376656f0754..e8d37285f803 100644
--- a/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; SCEVExpander::expandCodeFor would change a value (the start value of an
 ; induction) that we cached in the induction variable list.
 
-; CHECK: test_vh
+; CHECK-LABEL: @test_vh(
 ; CHECK-NOT: store <4 x i8> undef
 
 define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) {
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index f289ded25de1..780046930e1b 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -9,9 +9,9 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 
 ; This test checks that we add metadata to vectorized loops
-; CHECK: _Z4foo1Pii
+; CHECK-LABEL: @_Z4foo1Pii(
 ; CHECK: <4 x i32>
-; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: llvm.loop
 ; CHECK: ret
 
 ; This test comes from the loop:
@@ -40,10 +40,10 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
   ret i32 %__init.addr.0.lcssa.i
 }
 
-; This test checks that we don't vectorize loops that are marked with the "already vectorized" metadata.
-; CHECK: _Z4foo2Pii
+; This test checks that we don't vectorize loops that are marked with the "width" == 1 metadata.
+; CHECK-LABEL: @_Z4foo2Pii(
 ; CHECK-NOT: <4 x i32>
-; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: llvm.loop
 ; CHECK: ret
 define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
 entry:
@@ -59,7 +59,7 @@ for.body.i:                                       ; preds = %entry, %for.body.i
   %add.i = add nsw i32 %0, %__init.addr.05.i
   %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
   %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
-  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.vectorizer.already_vectorized !3
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.loop !0
 
 _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
   %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
@@ -68,5 +68,10 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 
 attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
 
-!3 = metadata !{}
+; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2}
+; CHECK: !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
+; CHECK: !2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 1}
+; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2}
 
+!0 = metadata !{metadata !0, metadata !1}
+!1 = metadata !{metadata !"llvm.vectorizer.width", i32 1}
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
index 54cbe8df46b0..71a9cd0dc5be 100644
--- a/test/Transforms/LoopVectorize/write-only.ll
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @read_mod_write_single_ptr
+;CHECK-LABEL: @read_mod_write_single_ptr(
 ;CHECK: load <4 x float>
 ;CHECK: ret i32
 define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
diff --git a/test/Transforms/LowerAtomic/atomic-load.ll b/test/Transforms/LowerAtomic/atomic-load.ll
index bc04e88344ef..1279bf72201c 100644
--- a/test/Transforms/LowerAtomic/atomic-load.ll
+++ b/test/Transforms/LowerAtomic/atomic-load.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -loweratomic -S | FileCheck %s
 
 define i8 @add() {
-; CHECK: @add
+; CHECK-LABEL: @add(
   %i = alloca i8
   %j = atomicrmw add i8* %i, i8 42 monotonic
 ; CHECK: [[INST:%[a-z0-9]+]] = load
@@ -12,7 +12,7 @@ define i8 @add() {
 }
 
 define i8 @nand() {
-; CHECK: @nand
+; CHECK-LABEL: @nand(
   %i = alloca i8
   %j = atomicrmw nand i8* %i, i8 42 monotonic
 ; CHECK: [[INST:%[a-z0-9]+]] = load
@@ -24,7 +24,7 @@ define i8 @nand() {
 }
 
 define i8 @min() {
-; CHECK: @min
+; CHECK-LABEL: @min(
   %i = alloca i8
   %j = atomicrmw min i8* %i, i8 42 monotonic
 ; CHECK: [[INST:%[a-z0-9]+]] = load
diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll
index 5e2f034a5177..4331677764ba 100644
--- a/test/Transforms/LowerAtomic/atomic-swap.ll
+++ b/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -loweratomic -S | FileCheck %s
 
 define i8 @cmpswap() {
-; CHECK: @cmpswap
+; CHECK-LABEL: @cmpswap(
   %i = alloca i8
   %j = cmpxchg i8* %i, i8 0, i8 42 monotonic
 ; CHECK: [[INST:%[a-z0-9]+]] = load
@@ -13,7 +13,7 @@ define i8 @cmpswap() {
 }
 
 define i8 @swap() {
-; CHECK: @swap
+; CHECK-LABEL: @swap(
   %i = alloca i8
   %j = atomicrmw xchg i8* %i, i8 42 monotonic
 ; CHECK: [[INST:%[a-z0-9]+]] = load
diff --git a/test/Transforms/LowerAtomic/barrier.ll b/test/Transforms/LowerAtomic/barrier.ll
index 814d7afb5ff9..665f9d756d34 100644
--- a/test/Transforms/LowerAtomic/barrier.ll
+++ b/test/Transforms/LowerAtomic/barrier.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -loweratomic -S | FileCheck %s
 
 define void @barrier() {
-; CHECK: @barrier
+; CHECK-LABEL: @barrier(
   fence seq_cst
 ; CHECK-NEXT: ret
   ret void
diff --git a/test/Transforms/LowerAtomic/lit.local.cfg b/test/Transforms/LowerAtomic/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LowerAtomic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index c00127e1ed24..955209af14a6 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -lower-expect -strip-dead-prototypes -S -o - < %s | FileCheck %s
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 define i32 @test1(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
@@ -34,7 +34,7 @@ declare i64 @llvm.expect.i64(i64, i64) nounwind readnone
 
 declare i32 @f(...)
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 define i32 @test2(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
@@ -62,7 +62,7 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %0
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 define i32 @test3(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
@@ -93,7 +93,7 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %0
 }
 
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 define i32 @test4(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
@@ -125,7 +125,7 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %0
 }
 
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 define i32 @test5(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
@@ -155,7 +155,7 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %0
 }
 
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 define i32 @test6(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
@@ -184,7 +184,7 @@ return:                                           ; preds = %sw.epilog, %sw.bb
   ret i32 %0
 }
 
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 define i32 @test7(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
@@ -214,7 +214,7 @@ return:                                           ; preds = %sw.epilog, %sw.bb
   ret i32 %0
 }
 
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 define i32 @test8(i32 %x) nounwind uwtable ssp {
 entry:
   %retval = alloca i32, align 4
diff --git a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/LowerInvoke/lit.local.cfg b/test/Transforms/LowerInvoke/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LowerInvoke/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index cc77d3c44d56..e85f03ee5c78 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -7,88 +7,88 @@
 ;CHECK-NEXT:   br label %NodeBlock37
 
 ;CHECK:      NodeBlock37:                                      ; preds = %entry
-;CHECK-NEXT:   %Pivot38 = icmp ult i32 %tmp158, 11
+;CHECK-NEXT:   %Pivot38 = icmp slt i32 %tmp158, 10
 ;CHECK-NEXT:   br i1 %Pivot38, label %NodeBlock13, label %NodeBlock35
 
 ;CHECK:      NodeBlock35:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot36 = icmp ult i32 %tmp158, 14
+;CHECK-NEXT:   %Pivot36 = icmp slt i32 %tmp158, 13
 ;CHECK-NEXT:   br i1 %Pivot36, label %NodeBlock23, label %NodeBlock33
 
 ;CHECK:      NodeBlock33:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot34 = icmp ult i32 %tmp158, 15
+;CHECK-NEXT:   %Pivot34 = icmp slt i32 %tmp158, 14
 ;CHECK-NEXT:   br i1 %Pivot34, label %LeafBlock25, label %NodeBlock31
 
 ;CHECK:      NodeBlock31:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %Pivot32 = icmp ult i32 %tmp158, -6
+;CHECK-NEXT:   %Pivot32 = icmp slt i32 %tmp158, 15
 ;CHECK-NEXT:   br i1 %Pivot32, label %LeafBlock27, label %LeafBlock29
 
 ;CHECK:      LeafBlock29:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
-;CHECK-NEXT:   %SwitchLeaf30 = icmp ule i32 %tmp158.off, 4
-;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb338, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf30 = icmp eq i32 %tmp158, 15
+;CHECK-NEXT:   br i1 %SwitchLeaf30, label %bb334, label %NewDefault
 
 ;CHECK:      LeafBlock27:                                      ; preds = %NodeBlock31
-;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 15
-;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb334, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf28 = icmp eq i32 %tmp158, 14
+;CHECK-NEXT:   br i1 %SwitchLeaf28, label %bb332, label %NewDefault
 
 ;CHECK:      LeafBlock25:                                      ; preds = %NodeBlock33
-;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 14
-;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb332, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf26 = icmp eq i32 %tmp158, 13
+;CHECK-NEXT:   br i1 %SwitchLeaf26, label %bb330, label %NewDefault
 
 ;CHECK:      NodeBlock23:                                      ; preds = %NodeBlock35
-;CHECK-NEXT:   %Pivot24 = icmp ult i32 %tmp158, 12
+;CHECK-NEXT:   %Pivot24 = icmp slt i32 %tmp158, 11
 ;CHECK-NEXT:   br i1 %Pivot24, label %LeafBlock15, label %NodeBlock21
 
 ;CHECK:      NodeBlock21:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %Pivot22 = icmp ult i32 %tmp158, 13
+;CHECK-NEXT:   %Pivot22 = icmp slt i32 %tmp158, 12
 ;CHECK-NEXT:   br i1 %Pivot22, label %LeafBlock17, label %LeafBlock19
 
 ;CHECK:      LeafBlock19:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 13
-;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb330, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf20 = icmp eq i32 %tmp158, 12
+;CHECK-NEXT:   br i1 %SwitchLeaf20, label %bb328, label %NewDefault
 
 ;CHECK:      LeafBlock17:                                      ; preds = %NodeBlock21
-;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 12
-;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb328, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf18 = icmp eq i32 %tmp158, 11
+;CHECK-NEXT:   br i1 %SwitchLeaf18, label %bb326, label %NewDefault
 
 ;CHECK:      LeafBlock15:                                      ; preds = %NodeBlock23
-;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 11
-;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb326, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf16 = icmp eq i32 %tmp158, 10
+;CHECK-NEXT:   br i1 %SwitchLeaf16, label %bb324, label %NewDefault
 
 ;CHECK:      NodeBlock13:                                      ; preds = %NodeBlock37
-;CHECK-NEXT:   %Pivot14 = icmp ult i32 %tmp158, 8
+;CHECK-NEXT:   %Pivot14 = icmp slt i32 %tmp158, 7
 ;CHECK-NEXT:   br i1 %Pivot14, label %NodeBlock, label %NodeBlock11
 
 ;CHECK:      NodeBlock11:                                      ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot12 = icmp ult i32 %tmp158, 9
+;CHECK-NEXT:   %Pivot12 = icmp slt i32 %tmp158, 8
 ;CHECK-NEXT:   br i1 %Pivot12, label %LeafBlock3, label %NodeBlock9
 
 ;CHECK:      NodeBlock9:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %Pivot10 = icmp ult i32 %tmp158, 10
+;CHECK-NEXT:   %Pivot10 = icmp slt i32 %tmp158, 9
 ;CHECK-NEXT:   br i1 %Pivot10, label %LeafBlock5, label %LeafBlock7
 
 ;CHECK:      LeafBlock7:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 10
-;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb324, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf8 = icmp eq i32 %tmp158, 9
+;CHECK-NEXT:   br i1 %SwitchLeaf8, label %bb322, label %NewDefault
 
 ;CHECK:      LeafBlock5:                                       ; preds = %NodeBlock9
-;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 9
-;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb322, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf6 = icmp eq i32 %tmp158, 8
+;CHECK-NEXT:   br i1 %SwitchLeaf6, label %bb338, label %NewDefault
 
 ;CHECK:      LeafBlock3:                                       ; preds = %NodeBlock11
-;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 8
-;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb338, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf4 = icmp eq i32 %tmp158, 7
+;CHECK-NEXT:   br i1 %SwitchLeaf4, label %bb, label %NewDefault
 
 ;CHECK:      NodeBlock:                                        ; preds = %NodeBlock13
-;CHECK-NEXT:   %Pivot = icmp ult i32 %tmp158, 7
+;CHECK-NEXT:   %Pivot = icmp slt i32 %tmp158, 0
 ;CHECK-NEXT:   br i1 %Pivot, label %LeafBlock, label %LeafBlock1
 
 ;CHECK:      LeafBlock1:                                       ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf2 = icmp eq i32 %tmp158, 7
-;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb, label %NewDefault
+;CHECK-NEXT:   %SwitchLeaf2 = icmp ule i32 %tmp158, 6
+;CHECK-NEXT:   br i1 %SwitchLeaf2, label %bb338, label %NewDefault
 
 ;CHECK:      LeafBlock:                                        ; preds = %NodeBlock
-;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158, 6
+;CHECK-NEXT:   %tmp158.off = add i32 %tmp158, 6
+;CHECK-NEXT:   %SwitchLeaf = icmp ule i32 %tmp158.off, 4
 ;CHECK-NEXT:   br i1 %SwitchLeaf, label %bb338, label %NewDefault
 
 define i32 @main(i32 %tmp158) {
diff --git a/test/Transforms/LowerSwitch/lit.local.cfg b/test/Transforms/LowerSwitch/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/LowerSwitch/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index c0eaaa40154b..33eaed60fe58 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -33,17 +33,20 @@ return:                                           ; preds = %entry
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!14}
 
 !0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !12, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{metadata !6, metadata !7, metadata !6}
-!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 2, i32 0, metadata !1, null}
 !9 = metadata !{i32 786689, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !12, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"}
+!13 = metadata !{i32 0}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index f6119f8bbd85..32acdd696ecf 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -30,24 +30,28 @@ return:                                           ; preds = %entry
   ret void, !dbg !19
 }
 
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!22}
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
 !9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
-!13 = metadata !{i32 786468, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786447, metadata !20, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{i32 4, i32 0, metadata !10, metadata !8}
 !16 = metadata !{i32 786689, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 786689, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 5, i32 0, metadata !10, metadata !8}
 !19 = metadata !{i32 10, i32 0, metadata !1, null}
 !20 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
+!21 = metadata !{i32 0}
+!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/Mem2Reg/atomic.ll b/test/Transforms/Mem2Reg/atomic.ll
index 982c41318b10..5bc9e9281b27 100644
--- a/test/Transforms/Mem2Reg/atomic.ll
+++ b/test/Transforms/Mem2Reg/atomic.ll
@@ -3,7 +3,7 @@
 ; mem2reg is allowed with arbitrary atomic operations (although we only support
 ; it for atomic load and store at the moment).
 define i32 @test1(i32 %x) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 %x
   %a = alloca i32
   store atomic i32 %x, i32* %a seq_cst, align 4
diff --git a/test/Transforms/Mem2Reg/lit.local.cfg b/test/Transforms/Mem2Reg/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/Mem2Reg/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll
index 1b98f6ad383f..5d5bfbdafc5b 100644
--- a/test/Transforms/MemCpyOpt/align.ll
+++ b/test/Transforms/MemCpyOpt/align.ll
@@ -8,7 +8,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 ; a 16-byte aligned store in the middle.
 
 define void @foo(i32* %p) {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
   %a0 = getelementptr i32* %p, i64 0
   store i32 0, i32* %a0, align 4
@@ -24,7 +24,7 @@ define void @foo(i32* %p) {
 ; Replacing %a8 with %a4 in the memset requires boosting the alignment of %a4.
 
 define void @bar() {
-; CHECK: @bar
+; CHECK-LABEL: @bar(
 ; CHECK: %a4 = alloca i32, align 8
 ; CHECK-NOT: memcpy
   %a4 = alloca i32, align 4
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index f63b1dcfdd5f..7c7b4fc08809 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -48,7 +48,7 @@ entry:
 	store i8 %c, i8* %tmp73, align 1
 	%tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind
 	ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: store
 ; CHECK: call void @llvm.memset.p0i8.i64
 ; CHECK-NOT: store
@@ -150,7 +150,7 @@ entry:
 	call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind 
 	ret void
         
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: store
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false)
 ; CHECK-NOT: store
@@ -173,7 +173,7 @@ entry:
   %0 = bitcast i32* %add.ptr to i8*
   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
   ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: store
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
 }
@@ -186,7 +186,7 @@ entry:
   %0 = bitcast i32* %add.ptr to i8*
   tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
   ret void
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: store
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
 }
@@ -202,7 +202,7 @@ entry:
   %arrayidx = getelementptr inbounds i32* %P, i64 1
   store i32 0, i32* %arrayidx, align 4
   ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NOT: store
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
 }
@@ -216,7 +216,7 @@ entry:
   %1 = bitcast i32* %add.ptr to i8*
   tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
   ret void
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
 }
 
@@ -232,7 +232,7 @@ define void @test7(i32* nocapture %c) nounwind optsize {
   store i32 -1, i32* %3, align 4
   %4 = getelementptr inbounds i32* %c, i32 4
   store i32 -1, i32* %4, align 4
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
   ret void
 }
@@ -245,7 +245,7 @@ entry:
   %0 = bitcast %struct.test8* %memtmp to <4 x i32>*
   store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
   ret void
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
 }
 
@@ -269,6 +269,6 @@ define void @test9() nounwind {
   store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2
   store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1
   ret void
-; CHECK: @test9(
+; CHECK-LABEL: @test9(
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false)
 }
diff --git a/test/Transforms/MemCpyOpt/lit.local.cfg b/test/Transforms/MemCpyOpt/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/MemCpyOpt/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/loadstore-sret.ll b/test/Transforms/MemCpyOpt/loadstore-sret.ll
index 67e7137e7e49..89eabca21bf9 100644
--- a/test/Transforms/MemCpyOpt/loadstore-sret.ll
+++ b/test/Transforms/MemCpyOpt/loadstore-sret.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 %"class.std::auto_ptr" = type { i32* }
 
-; CHECK: @_Z3foov
+; CHECK-LABEL: @_Z3foov(
 define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp {
 _ZNSt8auto_ptrIiED1Ev.exit:
   %temp.lvalue = alloca %"class.std::auto_ptr", align 8
diff --git a/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
index b18d176f0030..8409de7ad206 100644
--- a/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
+++ b/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
@@ -12,7 +12,7 @@ define void @test1() nounwind {
   %arraydecay = getelementptr inbounds [3 x i32]* %arr, i64 0, i64 0
   call void @foo(i32* %arraydecay) nounwind
   ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: call void @llvm.memset
 ; CHECK-NOT: call void @llvm.memcpy
 ; CHECK: ret void
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 582a57b5d39c..2417cd11f7ff 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -22,7 +22,7 @@ entry:
 ; Check that one of the memcpy's are removed.
 ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: call void @ccoshl
 ; CHECK: call void @llvm.memcpy
 ; CHECK-NOT: llvm.memcpy
@@ -41,7 +41,7 @@ define void @test2(i8* %P, i8* %Q) nounwind  {
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
   ret void
         
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
 ; CHECK-NEXT: ret void
 }
@@ -58,7 +58,7 @@ define void @test3(%0* noalias sret %agg.result) nounwind  {
   %agg.result2 = bitcast %0* %agg.result to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
   ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: %agg.result1 = bitcast 
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret void
@@ -72,7 +72,7 @@ define void @test4(i8 *%P) {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
   call void @test4a(i8* align 1 byval %a)
   ret void
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NEXT: call void @test4a(
 }
 
@@ -96,7 +96,7 @@ entry:
   store i8 4, i8* %a
   call void @test5a(%struct.S* align 16 byval %y)
   ret i32 0
-  ; CHECK: @test5(
+  ; CHECK-LABEL: @test5(
   ; CHECK: store i8 4
   ; CHECK: call void @test5a(%struct.S* byval align 16 %y)
 }
@@ -105,7 +105,7 @@ entry:
 define void @test6(i8 *%P) {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false)
   ret void
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: ret void
 }
 
@@ -122,7 +122,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
   %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
   ret i32 %call
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[NUW:#[0-9]+]]
 }
 
@@ -168,6 +168,23 @@ entry:
   ret void
 }
 
+; rdar://14073661.
+; Test10 triggered assertion when the compiler try to get the size of the
+; opaque type of *x, where the x is the formal argument with attribute 'sret'.
+
+%opaque = type opaque
+declare void @foo(i32* noalias nocapture)
+
+define void @test10(%opaque* noalias nocapture sret %x, i32 %y) {
+  %a = alloca i32, align 4
+  store i32 %y, i32* %a
+  call void @foo(i32* noalias nocapture %a)
+  %c = load i32* %a
+  %d = bitcast %opaque* %x to i32*
+  store i32 %c, i32* %d
+  ret void
+}
+
 declare void @f1(%struct.big* sret)
 declare void @f2(%struct.big*)
 
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
index 7f1667a45559..2057760efa01 100644
--- a/test/Transforms/MemCpyOpt/memmove.ll
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -8,7 +8,7 @@ declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 
 define i8* @test1(i8* nocapture %src) nounwind {
 entry:
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: call void @llvm.memcpy
 
   %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8* null, i32 1) to i64), i64 13) to i32))
@@ -22,7 +22,7 @@ declare noalias i8* @malloc(i32)
 
 define void @test2(i8* %P) nounwind {
 entry:
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: call void @llvm.memcpy
   %add.ptr = getelementptr i8* %P, i64 16
   tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
@@ -32,7 +32,7 @@ entry:
 ; This cannot be optimize because the src/dst really do overlap.
 define void @test3(i8* %P) nounwind {
 entry:
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: call void @llvm.memmove
   %add.ptr = getelementptr i8* %P, i64 16
   tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
diff --git a/test/Transforms/MergeFunc/address-spaces.ll b/test/Transforms/MergeFunc/address-spaces.ll
new file mode 100644
index 000000000000..0d66b8281fb2
--- /dev/null
+++ b/test/Transforms/MergeFunc/address-spaces.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "p:32:32:32-p1:32:32:32-p2:16:16:16"
+
+declare void @foo(i32) nounwind
+
+; None of these functions should be merged
+
+define i32 @store_as0(i32* %x) {
+; CHECK-LABEL: @store_as0(
+; CHECK: call void @foo(
+  %gep = getelementptr i32* %x, i32 4
+  %y = load i32* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as1(i32 addrspace(1)* %x) {
+; CHECK-LABEL: @store_as1(
+; CHECK: call void @foo(
+  %gep = getelementptr i32 addrspace(1)* %x, i32 4
+  %y = load i32 addrspace(1)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as2(i32 addrspace(2)* %x) {
+; CHECK-LABEL: @store_as2(
+; CHECK: call void @foo(
+  %gep = getelementptr i32 addrspace(2)* %x, i32 4
+  %y = load i32 addrspace(2)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
diff --git a/test/Transforms/MergeFunc/inttoptr-address-space.ll b/test/Transforms/MergeFunc/inttoptr-address-space.ll
new file mode 100644
index 000000000000..0d834bc3b437
--- /dev/null
+++ b/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -0,0 +1,29 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-n8:16:32-S128"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+@g2 = external addrspace(1) constant [9 x i8], align 1
+@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585 addrspace(1)*)* @func35 to i8*)]
+
+
+define internal hidden i32 @func10(%.qux.2496 addrspace(1)* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2496 addrspace(1)* %this, i32 0, i32 1, i32 1
+  %tmp1 = load i32 addrspace(1)* %tmp, align 4
+  ret i32 %tmp1
+}
+
+; Check for pointer bitwidth equal assertion failure
+define internal hidden i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
+bb:
+; CHECK-LABEL: @func35(
+; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* %[[V2]])
+; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
+  %tmp = getelementptr inbounds %.qux.2585 addrspace(1)* %this, i32 0, i32 2
+  %tmp1 = load i8* addrspace(1)* %tmp, align 4
+  ret i8* %tmp1
+}
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
index 93250fa8ed1a..6a69e3fcfd86 100644
--- a/test/Transforms/MergeFunc/inttoptr.ll
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -46,6 +46,7 @@ bb:
 
 define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
 bb:
+; CHECK-LABEL: @func35(
 ; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
 ; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
 ; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
diff --git a/test/Transforms/MergeFunc/lit.local.cfg b/test/Transforms/MergeFunc/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/MergeFunc/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MergeFunc/merge-ptr-and-int.ll b/test/Transforms/MergeFunc/merge-ptr-and-int.ll
new file mode 100644
index 000000000000..4e887cec9065
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-ptr-and-int.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+declare void @stuff()
+
+; CHECK-LABEL: @f0(
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f1(
+; CHECK: ptrtoint i64*
+; CHECK: tail call void @f0(i64
+
+define void @f1(i64* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
new file mode 100644
index 000000000000..d6ff10f82578
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
new file mode 100644
index 000000000000..c9fb6a6ea353
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f0
+; CHECK:  %2 = ptrtoint i64* %0 to i64
+; CHECK:  tail call void @f0(i64 %2)
+; CHECK:  ret void
+define void @f1(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll b/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
new file mode 100644
index 000000000000..8f00f033396b
--- /dev/null
+++ b/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/too-small.ll b/test/Transforms/MergeFunc/too-small.ll
new file mode 100644
index 000000000000..1a526ffd50a6
--- /dev/null
+++ b/test/Transforms/MergeFunc/too-small.ll
@@ -0,0 +1,14 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NOT: call
+  ret void
+}
+
+define void @bar(i32 %x) {
+; CHECK-LABEL: @bar(
+; CHECK-NOT: call
+  ret void
+}
+
diff --git a/test/Transforms/MetaRenamer/lit.local.cfg b/test/Transforms/MetaRenamer/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/MetaRenamer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ObjCARC/allocas.ll b/test/Transforms/ObjCARC/allocas.ll
new file mode 100644
index 000000000000..50656739ae71
--- /dev/null
+++ b/test/Transforms/ObjCARC/allocas.ll
@@ -0,0 +1,500 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare void @objc_autoreleasePoolPop(i8*)
+declare i8* @objc_autoreleasePoolPush()
+declare i8* @objc_retainBlock(i8*)
+
+declare i8* @objc_retainedObject(i8*)
+declare i8* @objc_unretainedObject(i8*)
+declare i8* @objc_unretainedPointer(i8*)
+
+declare void @use_pointer(i8*)
+declare void @callee()
+declare void @callee_fnptr(void ()*)
+declare void @invokee()
+declare i8* @returner()
+declare i8* @returner1()
+declare i8* @returner2()
+declare void @bar(i32 ()*)
+declare void @use_alloca(i8**)
+
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+
+; In the presense of allocas, unconditionally remove retain/release pairs only
+; if they are known safe in both directions. This prevents matching up an inner
+; retain with the boundary guarding release in the following situation:
+; 
+; %A = alloca
+; retain(%x)
+; retain(%x) <--- Inner Retain
+; store %x, %A
+; %y = load %A
+; ... DO STUFF ...
+; release(%y)
+; release(%x) <--- Guarding Release
+;
+; rdar://13750319
+
+; CHECK: define void @test1a(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test1a(i8* %x) {
+entry:
+  %A = alloca i8*
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  store i8* %x, i8** %A, align 8
+  %y = load i8** %A
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test1b(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test1b(i8* %x) {
+entry:
+  %A = alloca i8*
+  %gep = getelementptr i8** %A, i32 0
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  store i8* %x, i8** %gep, align 8
+  %y = load i8** %A
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+
+; CHECK: define void @test1c(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test1c(i8* %x) {
+entry:
+  %A = alloca i8*, i32 3
+  %gep = getelementptr i8** %A, i32 2
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  store i8* %x, i8** %gep, align 8
+  %y = load i8** %gep
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+
+; CHECK: define void @test1d(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test1d(i8* %x) {
+entry:
+  br i1 undef, label %use_allocaA, label %use_allocaB
+
+use_allocaA:
+  %allocaA = alloca i8*
+  br label %exit
+
+use_allocaB:
+  %allocaB = alloca i8*
+  br label %exit
+
+exit:
+  %A = phi i8** [ %allocaA, %use_allocaA ], [ %allocaB, %use_allocaB ]
+  %gep = getelementptr i8** %A, i32 0
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  store i8* %x, i8** %gep, align 8
+  %y = load i8** %gep
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test1e(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test1e(i8* %x) {
+entry:
+  br i1 undef, label %use_allocaA, label %use_allocaB
+
+use_allocaA:
+  %allocaA = alloca i8*, i32 4
+  br label %exit
+
+use_allocaB:
+  %allocaB = alloca i8*, i32 4
+  br label %exit
+
+exit:
+  %A = phi i8** [ %allocaA, %use_allocaA ], [ %allocaB, %use_allocaB ]
+  %gep = getelementptr i8** %A, i32 2
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  store i8* %x, i8** %gep, align 8
+  %y = load i8** %gep
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test1f(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test1f(i8* %x) {
+entry:
+  %allocaOne = alloca i8*
+  %allocaTwo = alloca i8*
+  %A = select i1 undef, i8** %allocaOne, i8** %allocaTwo
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  store i8* %x, i8** %A, align 8
+  %y = load i8** %A
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; Make sure that if a store is in a different basic block we handle known safe
+; conservatively.
+
+
+; CHECK: define void @test2a(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test2a(i8* %x) {
+entry:
+  %A = alloca i8*
+  store i8* %x, i8** %A, align 8
+  %y = load i8** %A
+  br label %bb1
+
+bb1:
+  br label %bb2
+
+bb2:
+  br label %bb3
+
+bb3:
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test2b(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test2b(i8* %x) {
+entry:
+  %A = alloca i8*
+  %gep1 = getelementptr i8** %A, i32 0
+  store i8* %x, i8** %gep1, align 8
+  %gep2 = getelementptr i8** %A, i32 0
+  %y = load i8** %gep2
+  br label %bb1
+
+bb1:
+  br label %bb2
+
+bb2:
+  br label %bb3
+
+bb3:
+  tail call i8* @objc_retain(i8* %x)
+  tail call i8* @objc_retain(i8* %x)
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test2c(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test2c(i8* %x) {
+entry:
+  %A = alloca i8*, i32 3
+  %gep1 = getelementptr i8** %A, i32 2
+  store i8* %x, i8** %gep1, align 8
+  %gep2 = getelementptr i8** %A, i32 2
+  %y = load i8** %gep2
+  tail call i8* @objc_retain(i8* %x)
+  br label %bb1
+
+bb1:
+  br label %bb2
+
+bb2:
+  br label %bb3
+
+bb3:
+  tail call i8* @objc_retain(i8* %x)
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test2d(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %y)
+; CHECK: @objc_release(i8* %x)
+; CHECK: ret void
+; CHECK: }
+define void @test2d(i8* %x) {
+entry:
+  tail call i8* @objc_retain(i8* %x)
+  br label %bb1
+
+bb1:
+  %Abb1 = alloca i8*, i32 3
+  %gepbb11 = getelementptr i8** %Abb1, i32 2
+  store i8* %x, i8** %gepbb11, align 8
+  %gepbb12 = getelementptr i8** %Abb1, i32 2
+  %ybb1 = load i8** %gepbb12
+  br label %bb3
+
+bb2:
+  %Abb2 = alloca i8*, i32 4
+  %gepbb21 = getelementptr i8** %Abb2, i32 2
+  store i8* %x, i8** %gepbb21, align 8
+  %gepbb22 = getelementptr i8** %Abb2, i32 2
+  %ybb2 = load i8** %gepbb22
+  br label %bb3
+
+bb3:
+  %A = phi i8** [ %Abb1, %bb1 ], [ %Abb2, %bb2 ]
+  %y = phi i8* [ %ybb1, %bb1 ], [ %ybb2, %bb2 ]
+  tail call i8* @objc_retain(i8* %x)
+  call void @use_alloca(i8** %A)
+  call void @objc_release(i8* %y), !clang.imprecise_release !0
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+; Make sure in the presense of allocas, if we find a cfghazard we do not perform
+; code motion even if we are known safe. These two concepts are separate and
+; should be treated as such.
+;
+; rdar://13949644
+
+; CHECK: define void @test3a() {
+; CHECK: entry:
+; CHECK:   @objc_retainAutoreleasedReturnValue
+; CHECK:   @objc_retain
+; CHECK:   @objc_retain
+; CHECK:   @objc_retain
+; CHECK:   @objc_retain
+; CHECK: arraydestroy.body:
+; CHECK:   @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: arraydestroy.done:
+; CHECK-NOT: @objc_release
+; CHECK: arraydestroy.body1:
+; CHECK:   @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: arraydestroy.done1:
+; CHECK: @objc_release
+; CHECK: ret void
+; CHECK: }
+define void @test3a() {
+entry:
+  %keys = alloca [2 x i8*], align 16
+  %objs = alloca [2 x i8*], align 16
+  
+  %call1 = call i8* @returner()
+  %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call1)
+
+  %objs.begin = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 0
+  tail call i8* @objc_retain(i8* %call1)
+  store i8* %call1, i8** %objs.begin, align 8
+  %objs.elt = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 1
+  tail call i8* @objc_retain(i8* %call1)
+  store i8* %call1, i8** %objs.elt
+
+  %call2 = call i8* @returner1()
+  %call3 = call i8* @returner2()
+  %keys.begin = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 0
+  tail call i8* @objc_retain(i8* %call2)
+  store i8* %call2, i8** %keys.begin, align 8
+  %keys.elt = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 1
+  tail call i8* @objc_retain(i8* %call3)
+  store i8* %call3, i8** %keys.elt  
+  
+  %gep = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 2
+  br label %arraydestroy.body
+
+arraydestroy.body:
+  %arraydestroy.elementPast = phi i8** [ %gep, %entry ], [ %arraydestroy.element, %arraydestroy.body ]
+  %arraydestroy.element = getelementptr inbounds i8** %arraydestroy.elementPast, i64 -1
+  %destroy_tmp = load i8** %arraydestroy.element, align 8
+  call void @objc_release(i8* %destroy_tmp), !clang.imprecise_release !0
+  %objs_ptr = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 0
+  %arraydestroy.cmp = icmp eq i8** %arraydestroy.element, %objs_ptr
+  br i1 %arraydestroy.cmp, label %arraydestroy.done, label %arraydestroy.body
+
+arraydestroy.done:
+  %gep1 = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 2
+  br label %arraydestroy.body1
+
+arraydestroy.body1:
+  %arraydestroy.elementPast1 = phi i8** [ %gep1, %arraydestroy.done ], [ %arraydestroy.element1, %arraydestroy.body1 ]
+  %arraydestroy.element1 = getelementptr inbounds i8** %arraydestroy.elementPast1, i64 -1
+  %destroy_tmp1 = load i8** %arraydestroy.element1, align 8
+  call void @objc_release(i8* %destroy_tmp1), !clang.imprecise_release !0
+  %keys_ptr = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 0
+  %arraydestroy.cmp1 = icmp eq i8** %arraydestroy.element1, %keys_ptr
+  br i1 %arraydestroy.cmp1, label %arraydestroy.done1, label %arraydestroy.body1
+
+arraydestroy.done1:
+  call void @objc_release(i8* %call1), !clang.imprecise_release !0
+  ret void
+}
+
+; Make sure that even though we stop said code motion we still allow for
+; pointers to be removed if we are known safe in both directions.
+;
+; rdar://13949644
+
+; CHECK: define void @test3b() {
+; CHECK: entry:
+; CHECK:   @objc_retainAutoreleasedReturnValue
+; CHECK:   @objc_retain
+; CHECK:   @objc_retain
+; CHECK:   @objc_retain
+; CHECK:   @objc_retain
+; CHECK: arraydestroy.body:
+; CHECK:   @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: arraydestroy.done:
+; CHECK-NOT: @objc_release
+; CHECK: arraydestroy.body1:
+; CHECK:   @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: arraydestroy.done1:
+; CHECK: @objc_release
+; CHECK: ret void
+; CHECK: }
+define void @test3b() {
+entry:
+  %keys = alloca [2 x i8*], align 16
+  %objs = alloca [2 x i8*], align 16
+  
+  %call1 = call i8* @returner()
+  %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call1)
+  %tmp1 = tail call i8* @objc_retain(i8* %call1)
+
+  %objs.begin = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 0
+  tail call i8* @objc_retain(i8* %call1)
+  store i8* %call1, i8** %objs.begin, align 8
+  %objs.elt = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 1
+  tail call i8* @objc_retain(i8* %call1)
+  store i8* %call1, i8** %objs.elt
+
+  %call2 = call i8* @returner1()
+  %call3 = call i8* @returner2()
+  %keys.begin = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 0
+  tail call i8* @objc_retain(i8* %call2)
+  store i8* %call2, i8** %keys.begin, align 8
+  %keys.elt = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 1
+  tail call i8* @objc_retain(i8* %call3)
+  store i8* %call3, i8** %keys.elt  
+  
+  %gep = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 2
+  br label %arraydestroy.body
+
+arraydestroy.body:
+  %arraydestroy.elementPast = phi i8** [ %gep, %entry ], [ %arraydestroy.element, %arraydestroy.body ]
+  %arraydestroy.element = getelementptr inbounds i8** %arraydestroy.elementPast, i64 -1
+  %destroy_tmp = load i8** %arraydestroy.element, align 8
+  call void @objc_release(i8* %destroy_tmp), !clang.imprecise_release !0
+  %objs_ptr = getelementptr inbounds [2 x i8*]* %objs, i64 0, i64 0
+  %arraydestroy.cmp = icmp eq i8** %arraydestroy.element, %objs_ptr
+  br i1 %arraydestroy.cmp, label %arraydestroy.done, label %arraydestroy.body
+
+arraydestroy.done:
+  %gep1 = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 2
+  br label %arraydestroy.body1
+
+arraydestroy.body1:
+  %arraydestroy.elementPast1 = phi i8** [ %gep1, %arraydestroy.done ], [ %arraydestroy.element1, %arraydestroy.body1 ]
+  %arraydestroy.element1 = getelementptr inbounds i8** %arraydestroy.elementPast1, i64 -1
+  %destroy_tmp1 = load i8** %arraydestroy.element1, align 8
+  call void @objc_release(i8* %destroy_tmp1), !clang.imprecise_release !0
+  %keys_ptr = getelementptr inbounds [2 x i8*]* %keys, i64 0, i64 0
+  %arraydestroy.cmp1 = icmp eq i8** %arraydestroy.element1, %keys_ptr
+  br i1 %arraydestroy.cmp1, label %arraydestroy.done1, label %arraydestroy.body1
+
+arraydestroy.done1:
+  call void @objc_release(i8* %call1), !clang.imprecise_release !0
+  call void @objc_release(i8* %call1), !clang.imprecise_release !0
+  ret void
+}
+
+!0 = metadata !{}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/ObjCARC/arc-annotations.ll b/test/Transforms/ObjCARC/arc-annotations.ll
index c0dea4b1b6a0..f76ba3b80b06 100644
--- a/test/Transforms/ObjCARC/arc-annotations.ll
+++ b/test/Transforms/ObjCARC/arc-annotations.ll
@@ -27,7 +27,7 @@ declare i8* @returner()
 ; Simple retain+release pair deletion, with some intervening control
 ; flow and harmless instructions.
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: entry:
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
 ; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup ![[ANN0:[0-9]+]], !llvm.arc.annotation.topdown ![[ANN1:[0-9]+]]
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index ca1279206591..885935c51533 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -177,12 +177,12 @@ if.end5:                                          ; preds = %if.then3, %if.end
   ret void
 }
 
-; CHECK: define void @test1b_imprecise(
+; CHECK-LABEL: define void @test1b_imprecise(
 ; CHECK: entry:
 ; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: if.end5:
-; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release ![[RELEASE:[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test1b_imprecise(i8* %x, i1 %p, i1 %q) {
@@ -210,7 +210,7 @@ if.end5:                                          ; preds = %if.then3, %if.end
 ; Like test0 but the pointer is passed to an intervening call,
 ; so the optimization is not safe.
 
-; CHECK: define void @test2_precise(
+; CHECK-LABEL: define void @test2_precise(
 ; CHECK: @objc_retain(i8* %a)
 ; CHECK: @objc_release
 ; CHECK: }
@@ -239,7 +239,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test2_imprecise(
+; CHECK-LABEL: define void @test2_imprecise(
 ; CHECK: @objc_retain(i8* %a)
 ; CHECK: @objc_release
 ; CHECK: }
@@ -273,7 +273,7 @@ return:
 
 ; TODO: For now, assume this can't happen.
 
-; CHECK: define void @test3_precise(
+; CHECK-LABEL: define void @test3_precise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
@@ -293,7 +293,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test3_imprecise(
+; CHECK-LABEL: define void @test3_imprecise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
@@ -319,7 +319,7 @@ return:
 ; Like test0 but the retain is in a loop,
 ; so the optimization is not safe.
 
-; CHECK: define void @test4_precise(
+; CHECK-LABEL: define void @test4_precise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
@@ -339,7 +339,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test4_imprecise(
+; CHECK-LABEL: define void @test4_imprecise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
@@ -363,7 +363,7 @@ return:
 ; Like test0 but the pointer is conditionally passed to an intervening call,
 ; so the optimization is not safe.
 
-; CHECK: define void @test5a(
+; CHECK-LABEL: define void @test5a(
 ; CHECK: @objc_retain(i8*
 ; CHECK: @objc_release
 ; CHECK: }
@@ -379,7 +379,7 @@ entry:
   ret void
 }
 
-; CHECK: define void @test5b(
+; CHECK-LABEL: define void @test5b(
 ; CHECK: @objc_retain(i8*
 ; CHECK: @objc_release
 ; CHECK: }
@@ -399,7 +399,7 @@ entry:
 ; retain+release pair deletion, where the release happens on two different
 ; flow paths.
 
-; CHECK: define void @test6a(
+; CHECK-LABEL: define void @test6a(
 ; CHECK: entry:
 ; CHECK:   tail call i8* @objc_retain(
 ; CHECK: t:
@@ -433,7 +433,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test6b(
+; CHECK-LABEL: define void @test6b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test6b(i32* %x, i1 %p) nounwind {
@@ -461,7 +461,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test6c(
+; CHECK-LABEL: define void @test6c(
 ; CHECK: entry:
 ; CHECK:   tail call i8* @objc_retain(
 ; CHECK: t:
@@ -495,7 +495,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test6d(
+; CHECK-LABEL: define void @test6d(
 ; CHECK: entry:
 ; CHECK:   tail call i8* @objc_retain(
 ; CHECK: t:
@@ -533,7 +533,7 @@ return:
 ; retain+release pair deletion, where the retain happens on two different
 ; flow paths.
 
-; CHECK:     define void @test7(
+; CHECK-LABEL:     define void @test7(
 ; CHECK:     entry:
 ; CHECK-NOT:   objc_
 ; CHECK:     t:
@@ -567,7 +567,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test7b(
+; CHECK-LABEL: define void @test7b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test7b(i32* %x, i1 %p) nounwind {
@@ -596,7 +596,7 @@ return:
 
 ; Like test7, but there's a retain/retainBlock mismatch. Don't delete!
 
-; CHECK: define void @test7c
+; CHECK-LABEL: define void @test7c(
 ; CHECK: t:
 ; CHECK:   call i8* @objc_retainBlock
 ; CHECK: f:
@@ -631,7 +631,7 @@ return:
 ; retain+release pair deletion, where the retain and release both happen on
 ; different flow paths. Wild!
 
-; CHECK: define void @test8a(
+; CHECK-LABEL: define void @test8a(
 ; CHECK: entry:
 ; CHECK: t:
 ; CHECK:   @objc_retain
@@ -679,7 +679,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test8b(
+; CHECK-LABEL: define void @test8b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test8b(i32* %x, i1 %p, i1 %q) nounwind {
@@ -717,7 +717,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test8c(
+; CHECK-LABEL: define void @test8c(
 ; CHECK: entry:
 ; CHECK: t:
 ; CHECK:   @objc_retain
@@ -765,7 +765,7 @@ return:
   ret void
 }
 
-; CHECK: define void @test8d(
+; CHECK-LABEL: define void @test8d(
 ; CHECK: entry:
 ; CHECK: t:
 ; CHECK:   @objc_retain
@@ -815,7 +815,7 @@ return:
 
 ; Trivial retain+release pair deletion.
 
-; CHECK: define void @test9(
+; CHECK-LABEL: define void @test9(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test9(i8* %x) nounwind {
@@ -827,7 +827,7 @@ entry:
 
 ; Retain+release pair, but on an unknown pointer relationship. Don't delete!
 
-; CHECK: define void @test9b
+; CHECK-LABEL: define void @test9b(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK: @objc_release(i8* %s)
 ; CHECK: }
@@ -841,7 +841,7 @@ entry:
 
 ; Trivial retain+release pair with intervening calls - don't delete!
 
-; CHECK: define void @test10(
+; CHECK-LABEL: define void @test10(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK: @callee
 ; CHECK: @use_pointer
@@ -860,7 +860,7 @@ entry:
 ; Also, add a tail keyword, since objc_retain can never be passed
 ; a stack argument.
 
-; CHECK: define void @test11(
+; CHECK-LABEL: define void @test11(
 ; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
@@ -874,7 +874,7 @@ entry:
 
 ; Same as test11 but with no use_pointer call. Delete the pair!
 
-; CHECK: define void @test11a(
+; CHECK-LABEL: define void @test11a(
 ; CHECK: entry:
 ; CHECK-NEXT: ret void
 ; CHECK: }
@@ -889,7 +889,7 @@ entry:
 ; since if the frontend emitted code for an __autoreleasing variable, we may
 ; want it to be in the autorelease pool.
 
-; CHECK: define i8* @test11b(
+; CHECK-LABEL: define i8* @test11b(
 ; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
@@ -903,7 +903,7 @@ entry:
 ; Trivial retain,release pair with intervening call, but it's dominated
 ; by another retain - delete!
 
-; CHECK: define void @test12(
+; CHECK-LABEL: define void @test12(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @objc_retain(i8* %x)
 ; CHECK-NOT: @objc_
@@ -920,7 +920,7 @@ entry:
 
 ; Trivial retain,autorelease pair. Don't delete!
 
-; CHECK: define void @test13(
+; CHECK-LABEL: define void @test13(
 ; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %x)
@@ -937,7 +937,7 @@ entry:
 
 ; Delete the retain+release pair.
 
-; CHECK: define void @test13b
+; CHECK-LABEL: define void @test13b(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @objc_retain(i8* %x)
 ; CHECK-NEXT: @use_pointer
@@ -957,7 +957,7 @@ entry:
 ; Don't delete the retain+release pair because there's an
 ; autoreleasePoolPop in the way.
 
-; CHECK: define void @test13c
+; CHECK-LABEL: define void @test13c(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK: @objc_autoreleasePoolPop
 ; CHECK: @objc_retain(i8* %x)
@@ -978,7 +978,7 @@ entry:
 ; Like test13c, but there's an autoreleasePoolPush in the way, but that
 ; doesn't matter.
 
-; CHECK: define void @test13d
+; CHECK-LABEL: define void @test13d(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @objc_retain(i8* %x)
 ; CHECK-NEXT: @objc_autoreleasePoolPush
@@ -1000,7 +1000,7 @@ entry:
 ; Trivial retain,release pair with intervening call, but it's post-dominated
 ; by another release - delete!
 
-; CHECK: define void @test14(
+; CHECK-LABEL: define void @test14(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: @use_pointer
@@ -1020,7 +1020,7 @@ entry:
 ; Trivial retain,autorelease pair with intervening call, but it's post-dominated
 ; by another release. Don't delete anything.
 
-; CHECK: define void @test15(
+; CHECK-LABEL: define void @test15(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @objc_retain(i8* %x)
 ; CHECK-NEXT: @use_pointer
@@ -1040,7 +1040,7 @@ entry:
 ; Trivial retain,autorelease pair, post-dominated
 ; by another release. Delete the retain and release.
 
-; CHECK: define void @test15b
+; CHECK-LABEL: define void @test15b(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @objc_retain
 ; CHECK-NEXT: @objc_autorelease
@@ -1055,7 +1055,7 @@ entry:
   ret void
 }
 
-; CHECK: define void @test15c
+; CHECK-LABEL: define void @test15c(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @objc_autorelease
 ; CHECK-NEXT: ret void
@@ -1070,7 +1070,7 @@ entry:
 
 ; Retain+release pairs in diamonds, all dominated by a retain.
 
-; CHECK: define void @test16a(
+; CHECK-LABEL: define void @test16a(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1104,7 +1104,7 @@ purple:
   ret void
 }
 
-; CHECK: define void @test16b(
+; CHECK-LABEL: define void @test16b(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1138,7 +1138,7 @@ purple:
   ret void
 }
 
-; CHECK: define void @test16c(
+; CHECK-LABEL: define void @test16c(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1172,7 +1172,7 @@ purple:
   ret void
 }
 
-; CHECK: define void @test16d(
+; CHECK-LABEL: define void @test16d(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1209,7 +1209,7 @@ purple:
 
 ; Retain+release pairs in diamonds, all post-dominated by a release.
 
-; CHECK: define void @test17(
+; CHECK-LABEL: define void @test17(
 ; CHECK-NOT: @objc_
 ; CHECK: purple:
 ; CHECK: @objc_release
@@ -1246,7 +1246,7 @@ purple:
 
 ; Delete no-ops.
 
-; CHECK: define void @test18(
+; CHECK-LABEL: define void @test18(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test18() {
@@ -1258,7 +1258,7 @@ define void @test18() {
 
 ; Delete no-ops where undef can be assumed to be null.
 
-; CHECK: define void @test18b
+; CHECK-LABEL: define void @test18b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test18b() {
@@ -1294,7 +1294,7 @@ entry:
 
 ; Bitcast insertion
 
-; CHECK: define void @test20(
+; CHECK-LABEL: define void @test20(
 ; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: invoke
 ; CHECK: }
@@ -1322,7 +1322,7 @@ if.end:                                           ; preds = %invoke.cont23
 ; Delete a redundant retain,autorelease when forwaring a call result
 ; directly to a return value.
 
-; CHECK: define i8* @test21(
+; CHECK-LABEL: define i8* @test21(
 ; CHECK: call i8* @returner()
 ; CHECK-NEXT: ret i8* %call
 ; CHECK-NEXT: }
@@ -1336,7 +1336,7 @@ entry:
 
 ; Move an objc call up through a phi that has null operands.
 
-; CHECK: define void @test22(
+; CHECK-LABEL: define void @test22(
 ; CHECK: B:
 ; CHECK:   %1 = bitcast double* %p to i8*
 ; CHECK:   call void @objc_release(i8* %1)
@@ -1357,58 +1357,9 @@ C:
   ret void
 }
 
-; Optimize objc_retainBlock.
-
-; CHECK: define void @test23(
-; CHECK-NOT: @objc_
-; CHECK: }
-%block0 = type { i64, i64, i8*, i8* }
-%block1 = type { i8**, i32, i32, i32 (%struct.__block_literal_1*)*, %block0* }
-%struct.__block_descriptor = type { i64, i64 }
-%struct.__block_literal_1 = type { i8**, i32, i32, i8**, %struct.__block_descriptor* }
-@__block_holder_tmp_1 = external constant %block1
-define void @test23() {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind, !clang.arc.copy_on_escape !0
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  ret void
-}
-
-; Don't optimize objc_retainBlock, but do strength reduce it.
-
-; CHECK: define void @test23b(i8* %p) {
-; CHECK: @objc_retain
-; CHECK: @objc_release
-; CHECK: }
-define void @test23b(i8* %p) {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* %p) nounwind, !clang.arc.copy_on_escape !0
-  call void @callee()
-  call void @use_pointer(i8* %p)
-  call void @objc_release(i8* %p) nounwind
-  ret void
-}
-
-; Don't optimize objc_retainBlock, because there's no copy_on_escape metadata.
-
-; CHECK: define void @test23c(
-; CHECK: @objc_retainBlock
-; CHECK: @objc_release
-; CHECK: }
-define void @test23c() {
-entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
-  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
-  ret void
-}
-
 ; Any call can decrement a retain count.
 
-; CHECK: define void @test24(
+; CHECK-LABEL: define void @test24(
 ; CHECK: @objc_retain(i8* %a)
 ; CHECK: @objc_release
 ; CHECK: }
@@ -1423,7 +1374,7 @@ define void @test24(i8* %r, i8* %a) {
 ; Don't move a retain/release pair if the release can be moved
 ; but the retain can't be moved to balance it.
 
-; CHECK: define void @test25(
+; CHECK-LABEL: define void @test25(
 ; CHECK: entry:
 ; CHECK:   call i8* @objc_retain(i8* %p)
 ; CHECK: true:
@@ -1448,7 +1399,7 @@ done:
 ; Don't move a retain/release pair if the retain can be moved
 ; but the release can't be moved to balance it.
 
-; CHECK: define void @test26(
+; CHECK-LABEL: define void @test26(
 ; CHECK: entry:
 ; CHECK:   call i8* @objc_retain(i8* %p)
 ; CHECK: true:
@@ -1472,7 +1423,7 @@ done:
 
 ; Don't sink the retain,release into the loop.
 
-; CHECK: define void @test27(
+; CHECK-LABEL: define void @test27(
 ; CHECK: entry:
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: loop:
@@ -1497,7 +1448,7 @@ done:
 
 ; Trivial code motion case: Triangle.
 
-; CHECK: define void @test28(
+; CHECK-LABEL: define void @test28(
 ; CHECK-NOT: @objc_
 ; CHECK: true:
 ; CHECK: call i8* @objc_retain(
@@ -1525,7 +1476,7 @@ done:
 ; Trivial code motion case: Triangle, but no metadata. Don't move past
 ; unrelated memory references!
 
-; CHECK: define void @test28b
+; CHECK-LABEL: define void @test28b(
 ; CHECK: call i8* @objc_retain(
 ; CHECK: true:
 ; CHECK-NOT: @objc_
@@ -1555,7 +1506,7 @@ done:
 ; Trivial code motion case: Triangle, with metadata. Do move past
 ; unrelated memory references! And preserve the metadata.
 
-; CHECK: define void @test28c
+; CHECK-LABEL: define void @test28c(
 ; CHECK-NOT: @objc_
 ; CHECK: true:
 ; CHECK: call i8* @objc_retain(
@@ -1583,7 +1534,7 @@ done:
 
 ; Like test28. but with two releases.
 
-; CHECK: define void @test29(
+; CHECK-LABEL: define void @test29(
 ; CHECK-NOT: @objc_
 ; CHECK: true:
 ; CHECK: call i8* @objc_retain(
@@ -1618,7 +1569,7 @@ ohno:
 ; Basic case with the use and call in a diamond
 ; with an extra release.
 
-; CHECK: define void @test30(
+; CHECK-LABEL: define void @test30(
 ; CHECK-NOT: @objc_
 ; CHECK: true:
 ; CHECK: call i8* @objc_retain(
@@ -1657,7 +1608,7 @@ ohno:
 
 ; Basic case with a mergeable release.
 
-; CHECK: define void @test31(
+; CHECK-LABEL: define void @test31(
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: call void @callee()
 ; CHECK: store
@@ -1686,7 +1637,7 @@ false:
 
 ; Don't consider bitcasts or getelementptrs direct uses.
 
-; CHECK: define void @test32(
+; CHECK-LABEL: define void @test32(
 ; CHECK-NOT: @objc_
 ; CHECK: true:
 ; CHECK: call i8* @objc_retain(
@@ -1715,7 +1666,7 @@ done:
 
 ; Do consider icmps to be direct uses.
 
-; CHECK: define void @test33(
+; CHECK-LABEL: define void @test33(
 ; CHECK-NOT: @objc_
 ; CHECK: true:
 ; CHECK: call i8* @objc_retain(
@@ -1745,7 +1696,7 @@ done:
 ; Delete retain,release if there's just a possible dec and we have imprecise
 ; releases.
 
-; CHECK: define void @test34a(
+; CHECK-LABEL: define void @test34a(
 ; CHECK:   call i8* @objc_retain
 ; CHECK: true:
 ; CHECK: done:
@@ -1767,7 +1718,7 @@ done:
   ret void
 }
 
-; CHECK: define void @test34b(
+; CHECK-LABEL: define void @test34b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test34b(i8* %p, i1 %x, i8* %y) {
@@ -1791,7 +1742,7 @@ done:
 ; release.
 
 ; Precise.
-; CHECK: define void @test35a(
+; CHECK-LABEL: define void @test35a(
 ; CHECK: entry:
 ; CHECK:   call i8* @objc_retain
 ; CHECK: true:
@@ -1815,7 +1766,7 @@ done:
 }
 
 ; Imprecise.
-; CHECK: define void @test35b(
+; CHECK-LABEL: define void @test35b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test35b(i8* %p, i1 %x, i8* %y) {
@@ -1836,7 +1787,7 @@ done:
 
 ; Delete a retain,release if there's no actual use and we have precise release.
 
-; CHECK: define void @test36a(
+; CHECK-LABEL: define void @test36a(
 ; CHECK: @objc_retain
 ; CHECK: call void @callee()
 ; CHECK-NOT: @objc_
@@ -1854,7 +1805,7 @@ entry:
 
 ; Like test36, but with metadata.
 
-; CHECK: define void @test36b(
+; CHECK-LABEL: define void @test36b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test36b(i8* %p) {
@@ -1868,7 +1819,7 @@ entry:
 
 ; Be aggressive about analyzing phis to eliminate possible uses.
 
-; CHECK: define void @test38(
+; CHECK-LABEL: define void @test38(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test38(i8* %p, i1 %u, i1 %m, i8* %z, i8* %y, i8* %x, i8* %w) {
@@ -1902,7 +1853,7 @@ g:
 
 ; Delete retain,release pairs around loops.
 
-; CHECK: define void @test39(
+; CHECK-LABEL: define void @test39(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39(i8* %p) {
@@ -1920,7 +1871,7 @@ exit:                                             ; preds = %loop
 
 ; Delete retain,release pairs around loops containing uses.
 
-; CHECK: define void @test39b(
+; CHECK-LABEL: define void @test39b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39b(i8* %p) {
@@ -1939,7 +1890,7 @@ exit:                                             ; preds = %loop
 
 ; Delete retain,release pairs around loops containing potential decrements.
 
-; CHECK: define void @test39c(
+; CHECK-LABEL: define void @test39c(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39c(i8* %p) {
@@ -1959,7 +1910,7 @@ exit:                                             ; preds = %loop
 ; Delete retain,release pairs around loops even if
 ; the successors are in a different order.
 
-; CHECK: define void @test40(
+; CHECK-LABEL: define void @test40(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test40(i8* %p) {
@@ -1979,7 +1930,7 @@ exit:                                             ; preds = %loop
 ; Do the known-incremented retain+release elimination even if the pointer
 ; is also autoreleased.
 
-; CHECK: define void @test42(
+; CHECK-LABEL: define void @test42(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: call i8* @objc_retain(i8* %p)
 ; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
@@ -2001,7 +1952,7 @@ entry:
 ; Don't the known-incremented retain+release elimination if the pointer is
 ; autoreleased and there's an autoreleasePoolPop.
 
-; CHECK: define void @test43(
+; CHECK-LABEL: define void @test43(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: call i8* @objc_retain(i8* %p)
 ; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
@@ -2027,7 +1978,7 @@ entry:
 ; Do the known-incremented retain+release elimination if the pointer is
 ; autoreleased and there's an autoreleasePoolPush.
 
-; CHECK: define void @test43b
+; CHECK-LABEL: define void @test43b(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: call i8* @objc_retain(i8* %p)
 ; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
@@ -2050,7 +2001,7 @@ entry:
 
 ; Do retain+release elimination for non-provenance pointers.
 
-; CHECK: define void @test44(
+; CHECK-LABEL: define void @test44(
 ; CHECK-NOT: objc_
 ; CHECK: }
 define void @test44(i8** %pp) {
@@ -2063,7 +2014,7 @@ define void @test44(i8** %pp) {
 ; Don't delete retain+release with an unknown-provenance
 ; may-alias objc_release between them.
 
-; CHECK: define void @test45(
+; CHECK-LABEL: define void @test45(
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: call void @objc_release(i8* %q)
 ; CHECK: call void @use_pointer(i8* %p)
@@ -2081,7 +2032,7 @@ define void @test45(i8** %pp, i8** %qq) {
 
 ; Don't delete retain and autorelease here.
 
-; CHECK: define void @test46(
+; CHECK-LABEL: define void @test46(
 ; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
 ; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
@@ -2102,7 +2053,7 @@ false:
 
 ; Delete no-op cast calls.
 
-; CHECK: define i8* @test47(
+; CHECK-LABEL: define i8* @test47(
 ; CHECK-NOT: call
 ; CHECK: ret i8* %p
 ; CHECK: }
@@ -2113,7 +2064,7 @@ define i8* @test47(i8* %p) nounwind {
 
 ; Delete no-op cast calls.
 
-; CHECK: define i8* @test48(
+; CHECK-LABEL: define i8* @test48(
 ; CHECK-NOT: call
 ; CHECK: ret i8* %p
 ; CHECK: }
@@ -2124,7 +2075,7 @@ define i8* @test48(i8* %p) nounwind {
 
 ; Delete no-op cast calls.
 
-; CHECK: define i8* @test49(
+; CHECK-LABEL: define i8* @test49(
 ; CHECK-NOT: call
 ; CHECK: ret i8* %p
 ; CHECK: }
@@ -2136,7 +2087,7 @@ define i8* @test49(i8* %p) nounwind {
 ; Do delete retain+release with intervening stores of the address value if we
 ; have imprecise release attached to objc_release.
 
-; CHECK:      define void @test50a(
+; CHECK-LABEL:      define void @test50a(
 ; CHECK-NEXT:   call i8* @objc_retain
 ; CHECK-NEXT:   call void @callee
 ; CHECK-NEXT:   store
@@ -2151,7 +2102,7 @@ define void @test50a(i8* %p, i8** %pp) {
   ret void
 }
 
-; CHECK: define void @test50b(
+; CHECK-LABEL: define void @test50b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test50b(i8* %p, i8** %pp) {
@@ -2166,7 +2117,7 @@ define void @test50b(i8* %p, i8** %pp) {
 ; Don't delete retain+release with intervening stores through the
 ; address value.
 
-; CHECK: define void @test51a(
+; CHECK-LABEL: define void @test51a(
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: call void @objc_release(i8* %p)
 ; CHECK: ret void
@@ -2179,7 +2130,7 @@ define void @test51a(i8* %p) {
   ret void
 }
 
-; CHECK: define void @test51b(
+; CHECK-LABEL: define void @test51b(
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: call void @objc_release(i8* %p)
 ; CHECK: ret void
@@ -2195,7 +2146,7 @@ define void @test51b(i8* %p) {
 ; Don't delete retain+release with intervening use of a pointer of
 ; unknown provenance.
 
-; CHECK: define void @test52a(
+; CHECK-LABEL: define void @test52a(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call void @callee()
 ; CHECK: call void @use_pointer(i8* %z)
@@ -2212,7 +2163,7 @@ define void @test52a(i8** %zz, i8** %pp) {
   ret void
 }
 
-; CHECK: define void @test52b(
+; CHECK-LABEL: define void @test52b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call void @callee()
 ; CHECK: call void @use_pointer(i8* %z)
@@ -2234,7 +2185,7 @@ define void @test52b(i8** %zz, i8** %pp) {
 ; Oops. That's wrong. Clang sometimes uses function types gratuitously.
 ; See rdar://10551239.
 
-; CHECK: define void @test53(
+; CHECK-LABEL: define void @test53(
 ; CHECK: @objc_
 ; CHECK: }
 define void @test53(void ()** %zz, i8** %pp) {
@@ -2249,9 +2200,9 @@ define void @test53(void ()** %zz, i8** %pp) {
 
 ; Convert autorelease to release if the value is unused.
 
-; CHECK: define void @test54(
+; CHECK-LABEL: define void @test54(
 ; CHECK: call i8* @returner()
-; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release ![[RELEASE]]
 ; CHECK-NEXT: ret void
 ; CHECK: }
 define void @test54() {
@@ -2262,7 +2213,7 @@ define void @test54() {
 
 ; Nested retain+release pairs. Delete them both.
 
-; CHECK: define void @test55(
+; CHECK-LABEL: define void @test55(
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test55(i8* %x) { 
@@ -2279,13 +2230,13 @@ entry:
 ; can be partially eliminated. Plus an extra outer pair to
 ; eliminate, for fun.
 
-; CHECK: define void @test56(
+; CHECK-LABEL: define void @test56(
 ; CHECK-NOT: @objc
 ; CHECK: if.then:
 ; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
-; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release ![[RELEASE]]
 ; CHECK-NEXT: br label %if.end
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -2313,7 +2264,7 @@ if.end:                                           ; preds = %entry, %if.then
 ; known unnecessary because the presence of the second one means that
 ; the first one won't be deleting the object.
 
-; CHECK:      define void @test57(
+; CHECK-LABEL:      define void @test57(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
@@ -2339,7 +2290,7 @@ entry:
 ; An adjacent retain+release pair is sufficient even if it will be
 ; removed itself.
 
-; CHECK:      define void @test58(
+; CHECK-LABEL:      define void @test58(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
@@ -2358,7 +2309,7 @@ entry:
 
 ; Don't delete the second retain+release pair in an adjacent set.
 
-; CHECK:      define void @test59(
+; CHECK-LABEL:      define void @test59(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
@@ -2385,7 +2336,7 @@ entry:
 ; We have a precise lifetime retain/release here. We can not remove them since
 ; @something is not constant.
 
-; CHECK: define void @test60a(
+; CHECK-LABEL: define void @test60a(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call void @objc_release
 ; CHECK: }
@@ -2399,7 +2350,7 @@ define void @test60a() {
   ret void
 }
 
-; CHECK: define void @test60b(
+; CHECK-LABEL: define void @test60b(
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: call i8* @objc_retain
 ; CHECK-NOT: call i8* @objc_rrelease
@@ -2415,7 +2366,7 @@ define void @test60b() {
   ret void
 }
 
-; CHECK: define void @test60c(
+; CHECK-LABEL: define void @test60c(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test60c() {
@@ -2428,7 +2379,7 @@ define void @test60c() {
   ret void
 }
 
-; CHECK: define void @test60d(
+; CHECK-LABEL: define void @test60d(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test60d() {
@@ -2441,7 +2392,7 @@ define void @test60d() {
   ret void
 }
 
-; CHECK: define void @test60e(
+; CHECK-LABEL: define void @test60e(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test60e() {
@@ -2457,7 +2408,7 @@ define void @test60e() {
 ; Constant pointers to objects don't need to be considered related to other
 ; pointers.
 
-; CHECK: define void @test61(
+; CHECK-LABEL: define void @test61(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test61() {
@@ -2472,7 +2423,7 @@ define void @test61() {
 ; Delete a retain matched by releases when one is inside the loop and the
 ; other is outside the loop.
 
-; CHECK: define void @test62(
+; CHECK-LABEL: define void @test62(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test62(i8* %x, i1* %p) nounwind {
@@ -2496,7 +2447,7 @@ exit:
 ; Like test62 but with no release in exit.
 ; Don't delete anything!
 
-; CHECK: define void @test63(
+; CHECK-LABEL: define void @test63(
 ; CHECK: loop:
 ; CHECK:   tail call i8* @objc_retain(i8* %x)
 ; CHECK: loop.more:
@@ -2522,7 +2473,7 @@ exit:
 ; Like test62 but with no release in loop.more.
 ; Don't delete anything!
 
-; CHECK: define void @test64(
+; CHECK-LABEL: define void @test64(
 ; CHECK: loop:
 ; CHECK:   tail call i8* @objc_retain(i8* %x)
 ; CHECK: exit:
@@ -2547,7 +2498,7 @@ exit:
 
 ; Move an autorelease past a phi with a null.
 
-; CHECK: define i8* @test65(
+; CHECK-LABEL: define i8* @test65(
 ; CHECK: if.then:
 ; CHECK:   call i8* @objc_autorelease(
 ; CHECK: return:
@@ -2570,7 +2521,7 @@ return:                                           ; preds = %if.then, %entry
 
 ; Don't move an autorelease past an autorelease pool boundary.
 
-; CHECK: define i8* @test65b(
+; CHECK-LABEL: define i8* @test65b(
 ; CHECK: if.then:
 ; CHECK-NOT: @objc_autorelease
 ; CHECK: return:
@@ -2596,7 +2547,7 @@ return:                                           ; preds = %if.then, %entry
 ; Don't move an autoreleaseReuturnValue, which would break
 ; the RV optimization.
 
-; CHECK: define i8* @test65c(
+; CHECK-LABEL: define i8* @test65c(
 ; CHECK: if.then:
 ; CHECK-NOT: @objc_autorelease
 ; CHECK: return:
@@ -2620,7 +2571,7 @@ return:                                           ; preds = %if.then, %entry
 ; An objc_retain can serve as a may-use for a different pointer.
 ; rdar://11931823
 
-; CHECK: define void @test66a(
+; CHECK-LABEL: define void @test66a(
 ; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
 ; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
 ; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
@@ -2643,7 +2594,7 @@ cond.end:                                         ; preds = %cond.true, %entry
   ret void
 }
 
-; CHECK: define void @test66b(
+; CHECK-LABEL: define void @test66b(
 ; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
 ; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
 ; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
@@ -2666,7 +2617,7 @@ cond.end:                                         ; preds = %cond.true, %entry
   ret void
 }
 
-; CHECK: define void @test66c(
+; CHECK-LABEL: define void @test66c(
 ; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
 ; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
 ; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
@@ -2689,7 +2640,7 @@ cond.end:                                         ; preds = %cond.true, %entry
   ret void
 }
 
-; CHECK: define void @test66d(
+; CHECK-LABEL: define void @test66d(
 ; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
 ; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
 ; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
@@ -3037,9 +2988,32 @@ end:                                              ; preds = %if.end125, %if.end1
   ret void
 }
 
-!0 = metadata !{}
-
 declare i32 @__gxx_personality_v0(...)
 
+declare i32 @objc_sync_enter(i8*)
+declare i32 @objc_sync_exit(i8*)
+
+; Make sure that we understand that objc_sync_{enter,exit} are IC_User not
+; IC_Call/IC_CallOrUser.
+
+; CHECK-LABEL:      define void @test67(
+; CHECK-NEXT:   call i32 @objc_sync_enter(i8* %x)
+; CHECK-NEXT:   call i32 @objc_sync_exit(i8* %x)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test67(i8* %x) {
+  call i8* @objc_retain(i8* %x)
+  call i32 @objc_sync_enter(i8* %x)
+  call i32 @objc_sync_exit(i8* %x)
+  call void @objc_release(i8* %x), !clang.imprecise_release !0
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+
+!0 = metadata !{}
+!1 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+
 ; CHECK: attributes #0 = { nounwind readnone }
 ; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: ![[RELEASE]] = metadata !{}
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 0156d5bfb464..61e5a3b1dccb 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -10,7 +10,7 @@ declare void @objc_release(i8*)
 declare void @callee()
 declare void @block_callee(void ()*)
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK:   call i8* @objc_retain(
 ; CHECK: for.body:
 ; CHECK-NOT: @objc
@@ -35,7 +35,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-; CHECK: define void @test1(
+; CHECK-LABEL: define void @test1(
 ; CHECK:   call i8* @objc_retain(
 ; CHECK: for.body:
 ; CHECK-NOT: @objc
@@ -60,7 +60,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-; CHECK: define void @test2(
+; CHECK-LABEL: define void @test2(
 ; CHECK:   call i8* @objc_retain(
 ; CHECK: for.body:
 ; CHECK-NOT: @objc
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 023604e105b0..50a2d9756648 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -8,7 +8,7 @@ declare void @use_pointer(i8*)
 
 @x = external global i8*
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: entry:
 ; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret void
@@ -137,7 +137,7 @@ entry:
 
 ; Like test0, but there's no store, so don't form an objc_storeStrong.
 
-;      CHECK: define void @test7(
+;      CHECK-LABEL: define void @test7(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
@@ -154,7 +154,7 @@ entry:
 
 ; Like test0, but there's no retain, so don't form an objc_storeStrong.
 
-;      CHECK: define void @test8(
+;      CHECK-LABEL: define void @test8(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %p, i8** @x, align 8
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index fc023f898198..0bf63a6e5174 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -18,7 +18,7 @@ declare i32 @__gxx_personality_sj0(...)
 
 ; Don't get in trouble on bugpointed code.
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 define void @test0() {
 bb:
   %tmp = bitcast %4* undef to i8*
@@ -45,7 +45,7 @@ bb6:                                              ; preds = %bb5, %bb4, %bb4, %b
 ; When rewriting operands for a phi which has multiple operands
 ; for the same block, use the exactly same value in each block.
 
-; CHECK: define void @test1(
+; CHECK-LABEL: define void @test1(
 ; CHECK: %0 = bitcast i8* %tmp3 to %0* 
 ; CHECK: br i1 undef, label %bb7, label %bb7
 ; CHECK: bb7:
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index 3544f885528b..2259e17ec58c 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -12,7 +12,7 @@ declare void @use_pointer(i8*)
 declare i8* @returner()
 declare void @callee()
 
-; CHECK: define void @test0
+; CHECK-LABEL: define void @test0(
 ; CHECK: call void @use_pointer(i8* %0)
 ; CHECK: }
 define void @test0(i8* %x) nounwind {
@@ -22,7 +22,7 @@ entry:
   ret void
 }
 
-; CHECK: define void @test1
+; CHECK-LABEL: define void @test1(
 ; CHECK: call void @use_pointer(i8* %0)
 ; CHECK: }
 define void @test1(i8* %x) nounwind {
@@ -34,7 +34,7 @@ entry:
 
 ; Merge objc_retain and objc_autorelease into objc_retainAutorelease.
 
-; CHECK: define void @test2(
+; CHECK-LABEL: define void @test2(
 ; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: }
 define void @test2(i8* %x) nounwind {
@@ -47,7 +47,7 @@ entry:
 
 ; Same as test2 but the value is returned. Do an RV optimization.
 
-; CHECK: define i8* @test2b(
+; CHECK-LABEL: define i8* @test2b(
 ; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) [[NUW]]
 ; CHECK: }
 define i8* @test2b(i8* %x) nounwind {
@@ -59,7 +59,7 @@ entry:
 
 ; Merge a retain,autorelease pair around a call.
 
-; CHECK: define void @test3(
+; CHECK-LABEL: define void @test3(
 ; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %0)
 ; CHECK: }
@@ -74,7 +74,7 @@ entry:
 ; Trivial retain,autorelease pair with intervening call, but it's post-dominated
 ; by another release. The retain and autorelease can be merged.
 
-; CHECK: define void @test4(
+; CHECK-LABEL: define void @test4(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK-NEXT: @use_pointer
@@ -92,7 +92,7 @@ entry:
 
 ; Don't merge retain and autorelease if they're not control-equivalent.
 
-; CHECK: define void @test5(
+; CHECK-LABEL: define void @test5(
 ; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
 ; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
@@ -119,7 +119,7 @@ false:
 ; into objc_retainAutoreleasedReturnValueAutoreleaseReturnValue?
 ; Those entrypoints don't exist yet though.
 
-; CHECK: define i8* @test6(
+; CHECK-LABEL: define i8* @test6(
 ; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) [[NUW]]
 ; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) [[NUW]]
 ; CHECK: }
@@ -148,7 +148,7 @@ define i8* @test7(i8* %p) {
 
 ; Do the return value substitution for PHI nodes too.
 
-; CHECK: define i8* @test8(
+; CHECK-LABEL: define i8* @test8(
 ; CHECK: %retval = phi i8* [ %p, %if.then ], [ null, %entry ]
 ; CHECK: }
 define i8* @test8(i1 %x, i8* %c) {
@@ -165,7 +165,7 @@ return:                                           ; preds = %if.then, %entry
 }
 
 ; Kill calls to @clang.arc.use(...)
-; CHECK: define void @test9(
+; CHECK-LABEL: define void @test9(
 ; CHECK-NOT: clang.arc.use
 ; CHECK: }
 define void @test9(i8* %a, i8* %b) {
@@ -188,7 +188,7 @@ define void @test10() {
 ; Convert objc_retain to objc_retainAutoreleasedReturnValue if its
 ; argument is a return value.
 
-; CHECK: define void @test11(
+; CHECK-LABEL: define void @test11(
 ; CHECK-NEXT: %y = call i8* @returner()
 ; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
@@ -201,7 +201,7 @@ define void @test11() {
 ; Don't convert objc_retain to objc_retainAutoreleasedReturnValue if its
 ; argument is not a return value.
 
-; CHECK: define void @test12(
+; CHECK-LABEL: define void @test12(
 ; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 ; CHECK-NEXT: }
@@ -213,7 +213,7 @@ define void @test12(i8* %y) {
 ; Don't Convert objc_retain to objc_retainAutoreleasedReturnValue if it
 ; isn't next to the call providing its return value.
 
-; CHECK: define void @test13(
+; CHECK-LABEL: define void @test13(
 ; CHECK-NEXT: %y = call i8* @returner()
 ; CHECK-NEXT: call void @callee()
 ; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
diff --git a/test/Transforms/ObjCARC/empty-block.ll b/test/Transforms/ObjCARC/empty-block.ll
index ca5541365a4e..0440ab8f7c07 100644
--- a/test/Transforms/ObjCARC/empty-block.ll
+++ b/test/Transforms/ObjCARC/empty-block.ll
@@ -11,7 +11,7 @@ declare i8* @objc_autoreleaseReturnValue(i8*)
 
 ; Don't delete the autorelease.
 
-; CHECK: define %0* @test0(
+; CHECK-LABEL: define %0* @test0(
 ; CHECK:   @objc_retain
 ; CHECK: .lr.ph:
 ; CHECK-NOT: @objc_r
@@ -35,7 +35,7 @@ define %0* @test0(%0* %buffer) nounwind {
 
 ; Do delete the autorelease, even with the retain in a different block.
 
-; CHECK: define %0* @test1(
+; CHECK-LABEL: define %0* @test1(
 ; CHECK-NOT: @objc
 ; CHECK: }
 define %0* @test1() nounwind {
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index 05257d1d5cf8..072861720e63 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -111,37 +111,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!33, !34, !35, !36}
-
-!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", metadata !"clang version 3.3 ", i1 true, i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !27}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
-!6 = metadata !{i32 786473, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!llvm.module.flags = !{!33, !34, !35, !36, !61}
+
+!0 = metadata !{i32 786449, metadata !60, i32 16, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !27}
+!5 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
+!6 = metadata !{i32 786473, metadata !60} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{metadata !11}
 !11 = metadata !{metadata !12, metadata !21, metadata !25}
 !12 = metadata !{i32 786688, metadata !13, metadata !"obj", metadata !6, i32 11, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj] [line 11]
-!13 = metadata !{i32 786443, metadata !5, i32 10, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!14 = metadata !{i32 786454, null, metadata !"id", metadata !6, i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
-!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!16 = metadata !{i32 786451, null, metadata !"objc_object", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{i32 786443, metadata !60, metadata !5, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!14 = metadata !{i32 786454, metadata !60, null, metadata !"id", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786447, metadata !60, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!16 = metadata !{i32 786451, metadata !60, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
 !17 = metadata !{metadata !18}
-!18 = metadata !{i32 786445, metadata !16, metadata !"isa", metadata !6, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
-!19 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!20 = metadata !{i32 786451, null, metadata !"objc_class", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!18 = metadata !{i32 786445, metadata !60, metadata !16, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!19 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!20 = metadata !{i32 786451, metadata !60, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
 !21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
-!22 = metadata !{i32 786443, metadata !13, i32 12, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!23 = metadata !{i32 786454, null, metadata !"BOOL", metadata !6, i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
-!24 = metadata !{i32 786468, null, metadata !"signed char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!22 = metadata !{i32 786443, metadata !60, metadata !13, i32 12, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!23 = metadata !{i32 786454, metadata !60, null, metadata !"BOOL", i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
+!24 = metadata !{i32 786468, null, null, metadata !"signed char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
 !25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
-!26 = metadata !{i32 786443, metadata !22, i32 14, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!27 = metadata !{i32 786478, i32 0, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", metadata !6, i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
-!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = metadata !{i32 786443, metadata !60, metadata !22, i32 14, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!27 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
+!28 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !29 = metadata !{null, metadata !14}
 !30 = metadata !{metadata !31}
 !31 = metadata !{metadata !32}
@@ -154,21 +152,23 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !38 = metadata !{}
 !39 = metadata !{i32 15, i32 0, metadata !26, null}
 !40 = metadata !{i32 17, i32 0, metadata !41, null}
-!41 = metadata !{i32 786443, metadata !26, i32 16, i32 0, metadata !6, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!41 = metadata !{i32 786443, metadata !60, metadata !26, i32 16, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !42 = metadata !{i32 22, i32 0, metadata !26, null}
 !43 = metadata !{i32 23, i32 0, metadata !22, null}
 !44 = metadata !{i32 19, i32 0, metadata !41, null}
 !45 = metadata !{i8 0}
 !46 = metadata !{i32 20, i32 0, metadata !47, null}
-!47 = metadata !{i32 786443, metadata !48, i32 19, i32 0, metadata !6, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!48 = metadata !{i32 786443, metadata !26, i32 19, i32 0, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!47 = metadata !{i32 786443, metadata !60, metadata !48, i32 19, i32 0, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!48 = metadata !{i32 786443, metadata !60, metadata !26, i32 19, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !49 = metadata !{i32 21, i32 0, metadata !47, null}
 !50 = metadata !{i32 24, i32 0, metadata !51, null}
-!51 = metadata !{i32 786443, metadata !22, i32 23, i32 0, metadata !6, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!51 = metadata !{i32 786443, metadata !60, metadata !22, i32 23, i32 0, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !52 = metadata !{i32 25, i32 0, metadata !51, null}
 !53 = metadata !{i32 27, i32 0, metadata !13, null}
 !54 = metadata !{i32 28, i32 0, metadata !13, null}
 !55 = metadata !{i32 4, i32 0, metadata !27, null}
 !56 = metadata !{i32 6, i32 0, metadata !57, null}
-!57 = metadata !{i32 786443, metadata !27, i32 5, i32 0, metadata !6, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!57 = metadata !{i32 786443, metadata !60, metadata !27, i32 5, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
 !58 = metadata !{i32 7, i32 0, metadata !57, null}
+!60 = metadata !{metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997"}
+!61 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll
index 8f252a0d343a..28f2e807798a 100644
--- a/test/Transforms/ObjCARC/escape.ll
+++ b/test/Transforms/ObjCARC/escape.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Don't optimize away the retainBlock, because the object's address "escapes"
 ; with the objc_storeWeak call.
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape !0
 ; CHECK: call void @objc_release(i8* %tmp7) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
@@ -65,7 +65,7 @@ entry:
 ; Like test0, but it makes a regular call instead of a storeWeak call,
 ; so the optimization is valid.
 
-; CHECK: define void @test1(
+; CHECK-LABEL: define void @test1(
 ; CHECK-NOT: @objc_retainBlock
 ; CHECK: }
 define void @test1() nounwind {
diff --git a/test/Transforms/ObjCARC/gvn.ll b/test/Transforms/ObjCARC/gvn.ll
index a828b5485f4c..2d120e7345a5 100644
--- a/test/Transforms/ObjCARC/gvn.ll
+++ b/test/Transforms/ObjCARC/gvn.ll
@@ -3,20 +3,39 @@
 @x = common global i8* null, align 8
 
 declare i8* @objc_retain(i8*)
+declare i32 @objc_sync_enter(i8*)
+declare i32 @objc_sync_exit(i8*)
 
 ; GVN should be able to eliminate this redundant load, with ARC-specific
 ; alias analysis.
 
-; CHECK: define i8* @foo(i32 %n)
+; CHECK: define i8* @test0(i32 %n)
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: %s = load i8** @x
 ; CHECK-NOT: load
 ; CHECK: ret i8* %s
 ; CHECK-NEXT: }
-define i8* @foo(i32 %n) nounwind {
+define i8* @test0(i32 %n) nounwind {
 entry:
   %s = load i8** @x
   %0 = tail call i8* @objc_retain(i8* %s) nounwind
   %t = load i8** @x
-  ret i8* %s
+  ret i8* %t
+}
+
+; GVN should not be able to eliminate this redundant load, with ARC-specific
+; alias analysis.
+
+; CHECK-LABEL: define i8* @test1(
+; CHECK: load
+; CHECK: load
+; CHECK: ret i8* %t
+; CHECK: }
+define i8* @test1(i32 %n) nounwind {
+entry:
+  %s = load i8** @x
+  %0 = call i32 @objc_sync_enter(i8* %s)
+  %t = load i8** @x
+  %1 = call i32 @objc_sync_exit(i8* %s)
+  ret i8* %t
 }
diff --git a/test/Transforms/ObjCARC/intrinsic-use-isolated.ll b/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
index 4215b5c36465..f5c31fd82ecb 100644
--- a/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
+++ b/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
@@ -6,7 +6,7 @@
 declare void @clang.arc.use(...) nounwind
 
 ; Kill calls to @clang.arc.use(...)
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK-NOT: clang.arc.use
 ; CHECK: }
 define void @test0(i8* %a, i8* %b) {
diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll
index 60370c1f4405..f3833cb54297 100644
--- a/test/Transforms/ObjCARC/intrinsic-use.ll
+++ b/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -17,7 +17,7 @@ declare void @test0_helper(i8*, i8**)
 ; FIXME: the fact that we re-order retains w.r.t. @clang.arc.use could
 ; be problematic if we get run twice, e.g. under LTO.
 ;
-; CHECK:      define void @test0(
+; CHECK-LABEL:      define void @test0(
 ; CHECK:        @objc_retain(i8* %x)
 ; CHECK-NEXT:   store i8* %y, i8** %temp0
 ; CHECK-NEXT:   @objc_retain(i8* %y)
@@ -65,7 +65,7 @@ entry:
   ret void
 }
 
-; CHECK:      define void @test0a(
+; CHECK-LABEL:      define void @test0a(
 ; CHECK:        @objc_retain(i8* %x)
 ; CHECK-NEXT:   store i8* %y, i8** %temp0
 ; CHECK-NEXT:   @objc_retain(i8* %y)
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 9510f2e7ddec..04d057b9d496 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -10,7 +10,7 @@ declare i8* @returner()
 
 ; ARCOpt shouldn't try to move the releases to the block containing the invoke.
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: invoke.cont:
 ; CHECK:   call void @objc_release(i8* %zipFile) [[NUW:#[0-9]+]], !clang.imprecise_release !0
 ; CHECK:   ret void
@@ -38,7 +38,7 @@ lpad:                                             ; preds = %entry
 
 ; ARCOpt should move the release before the callee calls.
 
-; CHECK: define void @test1(
+; CHECK-LABEL: define void @test1(
 ; CHECK: invoke.cont:
 ; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
@@ -108,7 +108,7 @@ finally.rethrow:                                  ; preds = %invoke.cont, %entry
 
 ; Don't try to place code on invoke critical edges.
 
-; CHECK: define void @test3(
+; CHECK-LABEL: define void @test3(
 ; CHECK: if.end:
 ; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
@@ -139,7 +139,7 @@ if.end:
 
 ; Like test3, but with ARC-relevant exception handling.
 
-; CHECK: define void @test4(
+; CHECK-LABEL: define void @test4(
 ; CHECK: lpad:
 ; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
 ; CHECK-NEXT: cleanup
@@ -177,7 +177,7 @@ if.end:
 ; Don't turn the retainAutoreleaseReturnValue into retain, because it's
 ; for an invoke which we can assume codegen will put immediately prior.
 
-; CHECK: define void @test5(
+; CHECK-LABEL: define void @test5(
 ; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
 ; CHECK: }
 define void @test5() {
@@ -197,7 +197,7 @@ if.end:
 
 ; Like test5, but there's intervening code.
 
-; CHECK: define void @test6(
+; CHECK-LABEL: define void @test6(
 ; CHECK: call i8* @objc_retain(i8* %z)
 ; CHECK: }
 define void @test6() {
diff --git a/test/Transforms/ObjCARC/lit.local.cfg b/test/Transforms/ObjCARC/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/ObjCARC/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index ca9c58bcb3e3..2eeb4fc25873 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -25,7 +25,7 @@ declare void @__crasher_block_invoke1(i8* nocapture)
 
 ; Delete a nested retain+release pair.
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
@@ -89,7 +89,7 @@ forcoll.empty:
 
 ; Delete a nested retain+release pair.
 
-; CHECK: define void @test2(
+; CHECK-LABEL: define void @test2(
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
@@ -154,7 +154,7 @@ forcoll.empty:
 
 ; Delete a nested retain+release pair.
 
-; CHECK: define void @test4(
+; CHECK-LABEL: define void @test4(
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
@@ -219,7 +219,7 @@ forcoll.empty:
 
 ; Delete a nested retain+release pair.
 
-; CHECK: define void @test5(
+; CHECK-LABEL: define void @test5(
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
@@ -287,7 +287,7 @@ forcoll.empty:
 ; The optimizer currently can't do this, because isn't isn't sophisticated enough in
 ; reasnoning about nesting.
 
-; CHECK: define void @test6(
+; CHECK-LABEL: define void @test6(
 ; CHECK: call i8* @objc_retain
 ; CHECK: @objc_retain
 ; CHECK: }
@@ -355,7 +355,7 @@ forcoll.empty:
 ; The optimizer currently can't do this, because isn't isn't sophisticated enough in
 ; reasnoning about nesting.
 
-; CHECK: define void @test7(
+; CHECK-LABEL: define void @test7(
 ; CHECK: call i8* @objc_retain
 ; CHECK: @objc_retain
 ; CHECK: }
@@ -422,7 +422,7 @@ forcoll.empty:
 
 ; Delete a nested retain+release pair.
 
-; CHECK: define void @test8(
+; CHECK-LABEL: define void @test8(
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
@@ -496,7 +496,7 @@ forcoll.empty:
 ; The optimizer currently can't do this, because of a split loop backedge.
 ; See test9b for the same testcase without a split backedge.
 
-; CHECK: define void @test9(
+; CHECK-LABEL: define void @test9(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
@@ -563,7 +563,7 @@ forcoll.empty:
 
 ; Like test9, but without a split backedge. TODO: optimize this.
 
-; CHECK: define void @test9b(
+; CHECK-LABEL: define void @test9b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK: @objc_retain
@@ -629,7 +629,7 @@ forcoll.empty:
 ; The optimizer currently can't do this, because of a split loop backedge.
 ; See test10b for the same testcase without a split backedge.
 
-; CHECK: define void @test10(
+; CHECK-LABEL: define void @test10(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
@@ -697,7 +697,7 @@ forcoll.empty:
 
 ; Like test10, but without a split backedge. TODO: optimize this.
 
-; CHECK: define void @test10b(
+; CHECK-LABEL: define void @test10b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK: @objc_retain
@@ -769,7 +769,7 @@ forcoll.empty:
 @__block_d_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 @__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 
-; CHECK: define void @test11(
+; CHECK-LABEL: define void @test11(
 ; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
 ; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW]]
 ; CHECK: call void @objc_release(i8* %call) [[NUW]], !clang.imprecise_release !0
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
deleted file mode 100644
index 58b5bbe9c7e9..000000000000
--- a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
+++ /dev/null
@@ -1,123 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
-%struct.__block_descriptor = type { i64, i64 }
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
-
-; The optimizer should make use of the !clang.arc.no_objc_arc_exceptions
-; metadata and eliminate the retainBlock+release pair here.
-; rdar://10803830.
-
-; CHECK: define void @test0(
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test0() {
-entry:
-  %x = alloca %struct.__block_byref_x, align 8
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
-  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
-  store i8* null, i8** %byref.isa, align 8
-  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
-  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
-  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
-  store i32 0, i32* %byref.flags, align 8
-  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
-  store i32 32, i32* %byref.size, align 4
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
-  %t1 = bitcast %struct.__block_byref_x* %x to i8*
-  store i8* %t1, i8** %block.captured, align 8
-  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
-  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
-  %t4 = getelementptr inbounds i8* %t3, i64 16
-  %t5 = bitcast i8* %t4 to i8**
-  %t6 = load i8** %t5, align 8
-  %t7 = bitcast i8* %t6 to void (i8*)*
-  invoke void %t7(i8* %t3)
-          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !4
-
-invoke.cont:                                      ; preds = %entry
-  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  ret void
-
-lpad:                                             ; preds = %entry
-  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
-          cleanup
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  resume { i8*, i32 } %t8
-}
-
-; There is no !clang.arc.no_objc_arc_exceptions metadata here, so the optimizer
-; shouldn't eliminate anything, but *CAN* strength reduce the objc_retainBlock
-; to an objc_retain.
-
-; CHECK: define void @test0_no_metadata(
-; CHECK: call i8* @objc_retain(
-; CHECK: invoke
-; CHECK: call void @objc_release(
-; CHECK: }
-define void @test0_no_metadata() {
-entry:
-  %x = alloca %struct.__block_byref_x, align 8
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
-  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
-  store i8* null, i8** %byref.isa, align 8
-  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
-  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
-  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
-  store i32 0, i32* %byref.flags, align 8
-  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
-  store i32 32, i32* %byref.size, align 4
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
-  %t1 = bitcast %struct.__block_byref_x* %x to i8*
-  store i8* %t1, i8** %block.captured, align 8
-  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
-  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
-  %t4 = getelementptr inbounds i8* %t3, i64 16
-  %t5 = bitcast i8* %t4 to i8**
-  %t6 = load i8** %t5, align 8
-  %t7 = bitcast i8* %t6 to void (i8*)*
-  invoke void %t7(i8* %t3)
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  ret void
-
-lpad:                                             ; preds = %entry
-  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
-          cleanup
-  call void @_Block_object_dispose(i8* %t1, i32 8)
-  resume { i8*, i32 } %t8
-}
-
-declare i8* @objc_retainBlock(i8*)
-declare void @objc_release(i8*)
-declare void @_Block_object_dispose(i8*, i32)
-declare i32 @__objc_personality_v0(...)
-declare void @__foo_block_invoke_0(i8* nocapture) uwtable ssp
-
-!4 = metadata !{}
diff --git a/test/Transforms/ObjCARC/path-overflow.ll b/test/Transforms/ObjCARC/path-overflow.ll
index e7866ed1b442..3c14353947ae 100644
--- a/test/Transforms/ObjCARC/path-overflow.ll
+++ b/test/Transforms/ObjCARC/path-overflow.ll
@@ -1,21 +1,35 @@
 ; RUN: opt -objc-arc -S < %s
 ; rdar://12277446
+; rdar://12480535
+; rdar://14590914
+; rdar://15377890
 
 ; The total number of paths grows exponentially with the number of branches, and a
-; computation of this number can overflow any reasonable fixed-sized integer.
+; computation of this number can overflow any reasonable fixed-sized
+; integer. This can occur in both the addition phase when we are adding up the
+; total bottomup/topdown paths and when we multiply them together at the end.
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios5.0.0"
 
-%struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768 = type { i32*, i32, i8*, i32 }
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+%struct.CGPoint = type { float, float }
 
-@_unnamed_cfstring_591 = external constant %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768, section "__DATA,__cfstring"
+@_unnamed_cfstring = external constant %struct.NSConstantString, section "__DATA,__cfstring"
+@_unnamed_cfstring_2 = external constant %struct.NSConstantString, section "__DATA,__cfstring"
 
 declare i8* @objc_retain(i8*) nonlazybind
-
+declare i8* @objc_retainAutoreleasedReturnValue(i8*) nonlazybind
 declare void @objc_release(i8*) nonlazybind
+declare i8* @returner()
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare void @NSLog(i8*, ...)
+declare void @objc_msgSend_stret(i8*, i8*, ...)
+declare i32 @__gxx_personality_sj0(...)
+declare i32 @__objc_personality_v0(...)
+
 
-define hidden void @foo() {
+define hidden void @test1() {
 entry:
   br i1 undef, label %msgSend.nullinit, label %msgSend.call
 
@@ -26,7 +40,7 @@ msgSend.nullinit:                                 ; preds = %entry
   br label %msgSend.cont
 
 msgSend.cont:                                     ; preds = %msgSend.nullinit, %msgSend.call
-  %0 = bitcast %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768* @_unnamed_cfstring_591 to i8*
+  %0 = bitcast %struct.NSConstantString* @_unnamed_cfstring to i8*
   %1 = call i8* @objc_retain(i8* %0) nounwind
   br i1 undef, label %msgSend.nullinit33, label %msgSend.call32
 
@@ -326,4 +340,1854 @@ msgSend.cont507:                                  ; preds = %msgSend.nullinit506
   ret void
 }
 
+; Function Attrs: optsize ssp uwtable
+define void @test2() unnamed_addr align 2 {
+bb:
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  br i1 undef, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  br i1 undef, label %bb7, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb5
+  br i1 undef, label %bb9, label %bb8
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7
+  br i1 undef, label %bb11, label %bb10
+
+bb10:                                             ; preds = %bb9
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb9
+  br i1 undef, label %bb13, label %bb12
+
+bb12:                                             ; preds = %bb11
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb11
+  br i1 undef, label %bb15, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %bb15
+
+bb15:                                             ; preds = %bb14, %bb13
+  br i1 undef, label %bb17, label %bb16
+
+bb16:                                             ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb16, %bb15
+  br i1 undef, label %bb19, label %bb18
+
+bb18:                                             ; preds = %bb17
+  br label %bb19
+
+bb19:                                             ; preds = %bb18, %bb17
+  br i1 undef, label %bb222, label %bb20
+
+bb20:                                             ; preds = %bb19
+  br i1 undef, label %bb222, label %bb21
+
+bb21:                                             ; preds = %bb20
+  br i1 undef, label %bb22, label %bb30
+
+bb22:                                             ; preds = %bb21
+  br i1 undef, label %bb23, label %bb32
+
+bb23:                                             ; preds = %bb22
+  br i1 undef, label %bb24, label %bb34
+
+bb24:                                             ; preds = %bb23
+  br i1 undef, label %bb26, label %bb25
+
+bb25:                                             ; preds = %bb24
+  br label %bb27
+
+bb26:                                             ; preds = %bb24
+  br label %bb27
+
+bb27:                                             ; preds = %bb26, %bb25
+  br i1 undef, label %bb28, label %bb42
+
+bb28:                                             ; preds = %bb27
+  br i1 undef, label %bb36, label %bb29
+
+bb29:                                             ; preds = %bb28
+  br label %bb36
+
+bb30:                                             ; preds = %bb210, %bb207, %bb203, %bb199, %bb182, %bb176, %bb174, %bb171, %bb136, %bb132, %bb21
+  br label %bb213
+
+bb32:                                             ; preds = %bb22
+  unreachable
+
+bb34:                                             ; preds = %bb23
+  unreachable
+
+bb36:                                             ; preds = %bb29, %bb28
+  br i1 undef, label %bb38, label %bb37
+
+bb37:                                             ; preds = %bb36
+  br label %bb39
+
+bb38:                                             ; preds = %bb36
+  br label %bb39
+
+bb39:                                             ; preds = %bb38, %bb37
+  br i1 undef, label %bb41, label %bb40
+
+bb40:                                             ; preds = %bb39
+  unreachable
+
+bb41:                                             ; preds = %bb39
+  br label %bb42
+
+bb42:                                             ; preds = %bb41, %bb27
+  br i1 undef, label %bb43, label %bb214
+
+bb43:                                             ; preds = %bb42
+  br i1 undef, label %bb47, label %bb45
+
+bb45:                                             ; preds = %bb130, %bb128, %bb126, %bb124, %bb122, %bb120, %bb118, %bb116, %bb114, %bb112, %bb110, %bb108, %bb105, %bb102, %bb100, %bb96, %bb94, %bb90, %bb88, %bb84, %bb82, %bb78, %bb76, %bb72, %bb70, %bb66, %bb64, %bb60, %bb58, %bb54, %bb51, %bb43
+  unreachable
+
+bb47:                                             ; preds = %bb43
+  br i1 undef, label %bb48, label %bb106
+
+bb48:                                             ; preds = %bb47
+  br i1 undef, label %bb50, label %bb49
+
+bb49:                                             ; preds = %bb48
+  br label %bb51
+
+bb50:                                             ; preds = %bb48
+  br label %bb51
+
+bb51:                                             ; preds = %bb50, %bb49
+  br i1 undef, label %bb53, label %bb45
+
+bb53:                                             ; preds = %bb51
+  br i1 undef, label %bb54, label %bb134
+
+bb54:                                             ; preds = %bb53
+  br i1 undef, label %bb55, label %bb45
+
+bb55:                                             ; preds = %bb54
+  br i1 undef, label %bb57, label %bb56
+
+bb56:                                             ; preds = %bb55
+  br label %bb58
+
+bb57:                                             ; preds = %bb55
+  br label %bb58
+
+bb58:                                             ; preds = %bb57, %bb56
+  br i1 undef, label %bb60, label %bb45
+
+bb60:                                             ; preds = %bb58
+  br i1 undef, label %bb61, label %bb45
+
+bb61:                                             ; preds = %bb60
+  br i1 undef, label %bb63, label %bb62
+
+bb62:                                             ; preds = %bb61
+  br label %bb64
+
+bb63:                                             ; preds = %bb61
+  br label %bb64
+
+bb64:                                             ; preds = %bb63, %bb62
+  br i1 undef, label %bb66, label %bb45
+
+bb66:                                             ; preds = %bb64
+  br i1 undef, label %bb67, label %bb45
+
+bb67:                                             ; preds = %bb66
+  br i1 undef, label %bb69, label %bb68
+
+bb68:                                             ; preds = %bb67
+  br label %bb70
+
+bb69:                                             ; preds = %bb67
+  br label %bb70
+
+bb70:                                             ; preds = %bb69, %bb68
+  br i1 undef, label %bb72, label %bb45
+
+bb72:                                             ; preds = %bb70
+  br i1 undef, label %bb73, label %bb45
+
+bb73:                                             ; preds = %bb72
+  br i1 undef, label %bb75, label %bb74
+
+bb74:                                             ; preds = %bb73
+  br label %bb76
+
+bb75:                                             ; preds = %bb73
+  br label %bb76
+
+bb76:                                             ; preds = %bb75, %bb74
+  br i1 undef, label %bb78, label %bb45
+
+bb78:                                             ; preds = %bb76
+  br i1 undef, label %bb79, label %bb45
+
+bb79:                                             ; preds = %bb78
+  br i1 undef, label %bb81, label %bb80
+
+bb80:                                             ; preds = %bb79
+  br label %bb82
+
+bb81:                                             ; preds = %bb79
+  br label %bb82
+
+bb82:                                             ; preds = %bb81, %bb80
+  br i1 undef, label %bb84, label %bb45
+
+bb84:                                             ; preds = %bb82
+  br i1 undef, label %bb85, label %bb45
+
+bb85:                                             ; preds = %bb84
+  br i1 undef, label %bb87, label %bb86
+
+bb86:                                             ; preds = %bb85
+  br label %bb88
+
+bb87:                                             ; preds = %bb85
+  br label %bb88
+
+bb88:                                             ; preds = %bb87, %bb86
+  br i1 undef, label %bb90, label %bb45
+
+bb90:                                             ; preds = %bb88
+  br i1 undef, label %bb91, label %bb45
+
+bb91:                                             ; preds = %bb90
+  br i1 undef, label %bb93, label %bb92
+
+bb92:                                             ; preds = %bb91
+  br label %bb94
+
+bb93:                                             ; preds = %bb91
+  br label %bb94
+
+bb94:                                             ; preds = %bb93, %bb92
+  br i1 undef, label %bb96, label %bb45
+
+bb96:                                             ; preds = %bb94
+  br i1 undef, label %bb97, label %bb45
+
+bb97:                                             ; preds = %bb96
+  br i1 undef, label %bb99, label %bb98
+
+bb98:                                             ; preds = %bb97
+  br label %bb100
+
+bb99:                                             ; preds = %bb97
+  br label %bb100
+
+bb100:                                            ; preds = %bb99, %bb98
+  br i1 undef, label %bb102, label %bb45
+
+bb102:                                            ; preds = %bb100
+  br i1 undef, label %bb104, label %bb45
+
+bb104:                                            ; preds = %bb102
+  br i1 undef, label %bb108, label %bb105
+
+bb105:                                            ; preds = %bb104
+  br i1 undef, label %bb108, label %bb45
+
+bb106:                                            ; preds = %bb47
+  unreachable
+
+bb108:                                            ; preds = %bb105, %bb104
+  br i1 undef, label %bb110, label %bb45
+
+bb110:                                            ; preds = %bb108
+  br i1 undef, label %bb112, label %bb45
+
+bb112:                                            ; preds = %bb110
+  br i1 undef, label %bb114, label %bb45
+
+bb114:                                            ; preds = %bb112
+  br i1 undef, label %bb116, label %bb45
+
+bb116:                                            ; preds = %bb114
+  br i1 undef, label %bb118, label %bb45
+
+bb118:                                            ; preds = %bb116
+  br i1 undef, label %bb120, label %bb45
+
+bb120:                                            ; preds = %bb118
+  br i1 undef, label %bb122, label %bb45
+
+bb122:                                            ; preds = %bb120
+  br i1 undef, label %bb124, label %bb45
+
+bb124:                                            ; preds = %bb122
+  br i1 undef, label %bb126, label %bb45
+
+bb126:                                            ; preds = %bb124
+  br i1 undef, label %bb128, label %bb45
+
+bb128:                                            ; preds = %bb126
+  br i1 undef, label %bb130, label %bb45
+
+bb130:                                            ; preds = %bb128
+  br i1 undef, label %bb132, label %bb45
+
+bb132:                                            ; preds = %bb130
+  br i1 undef, label %bb135, label %bb30
+
+bb134:                                            ; preds = %bb53
+  unreachable
+
+bb135:                                            ; preds = %bb132
+  br i1 undef, label %bb139, label %bb136
+
+bb136:                                            ; preds = %bb135
+  br i1 undef, label %bb138, label %bb30
+
+bb138:                                            ; preds = %bb136
+  br label %bb139
+
+bb139:                                            ; preds = %bb138, %bb135
+  br i1 undef, label %bb140, label %bb141
+
+bb140:                                            ; preds = %bb139
+  unreachable
+
+bb141:                                            ; preds = %bb139
+  br i1 undef, label %bb142, label %bb215
+
+bb142:                                            ; preds = %bb141
+  br i1 undef, label %bb144, label %bb143
+
+bb143:                                            ; preds = %bb142
+  br label %bb145
+
+bb144:                                            ; preds = %bb142
+  br label %bb145
+
+bb145:                                            ; preds = %bb144, %bb143
+  br i1 undef, label %bb146, label %bb151
+
+bb146:                                            ; preds = %bb145
+  br i1 undef, label %bb148, label %bb153
+
+bb148:                                            ; preds = %bb146
+  br i1 undef, label %bb155, label %bb149
+
+bb149:                                            ; preds = %bb148
+  br i1 undef, label %bb150, label %bb153
+
+bb150:                                            ; preds = %bb149
+  br label %bb155
+
+bb151:                                            ; preds = %bb145
+  unreachable
+
+bb153:                                            ; preds = %bb158, %bb149, %bb146
+  unreachable
+
+bb155:                                            ; preds = %bb150, %bb148
+  br i1 undef, label %bb157, label %bb156
+
+bb156:                                            ; preds = %bb155
+  br label %bb158
+
+bb157:                                            ; preds = %bb155
+  br label %bb158
+
+bb158:                                            ; preds = %bb157, %bb156
+  br i1 undef, label %bb160, label %bb153
+
+bb160:                                            ; preds = %bb158
+  br i1 undef, label %bb162, label %bb161
+
+bb161:                                            ; preds = %bb160
+  br label %bb163
+
+bb162:                                            ; preds = %bb160
+  br label %bb163
+
+bb163:                                            ; preds = %bb162, %bb161
+  br i1 undef, label %bb165, label %bb164
+
+bb164:                                            ; preds = %bb163
+  br label %bb165
+
+bb165:                                            ; preds = %bb164, %bb163
+  br i1 undef, label %bb170, label %bb166
+
+bb166:                                            ; preds = %bb165
+  br i1 undef, label %bb167, label %bb168
+
+bb167:                                            ; preds = %bb166
+  unreachable
+
+bb168:                                            ; preds = %bb166
+  unreachable
+
+bb170:                                            ; preds = %bb165
+  br i1 undef, label %bb215, label %bb171
+
+bb171:                                            ; preds = %bb170
+  br i1 undef, label %bb173, label %bb30
+
+bb173:                                            ; preds = %bb171
+  br i1 undef, label %bb174, label %bb215
+
+bb174:                                            ; preds = %bb173
+  br i1 undef, label %bb176, label %bb30
+
+bb176:                                            ; preds = %bb174
+  br i1 undef, label %bb178, label %bb30
+
+bb178:                                            ; preds = %bb176
+  br i1 undef, label %bb179, label %bb193
+
+bb179:                                            ; preds = %bb178
+  br i1 undef, label %bb181, label %bb180
+
+bb180:                                            ; preds = %bb179
+  br label %bb182
+
+bb181:                                            ; preds = %bb179
+  br label %bb182
+
+bb182:                                            ; preds = %bb181, %bb180
+  br i1 undef, label %bb184, label %bb30
+
+bb184:                                            ; preds = %bb182
+  %tmp185 = call i8* @returner()
+  br i1 undef, label %bb186, label %bb195
+
+bb186:                                            ; preds = %bb184
+  %tmp188 = call i8* @objc_retainAutoreleasedReturnValue(i8* %tmp185)
+  %tmp189 = call i8* @objc_retain(i8* %tmp188)
+  call void @objc_release(i8* %tmp189), !clang.imprecise_release !0
+  br i1 undef, label %bb197, label %bb190
+
+bb190:                                            ; preds = %bb186
+  br i1 undef, label %bb192, label %bb195
+
+bb192:                                            ; preds = %bb190
+  br i1 undef, label %bb197, label %bb195
+
+bb193:                                            ; preds = %bb178
+  br label %bb213
+
+bb195:                                            ; preds = %bb192, %bb190, %bb184
+  unreachable
+
+bb197:                                            ; preds = %bb192, %bb186
+  br i1 undef, label %bb198, label %bb215
+
+bb198:                                            ; preds = %bb197
+  br i1 undef, label %bb202, label %bb199
+
+bb199:                                            ; preds = %bb198
+  br i1 undef, label %bb201, label %bb30
+
+bb201:                                            ; preds = %bb199
+  br label %bb202
+
+bb202:                                            ; preds = %bb201, %bb198
+  br i1 undef, label %bb206, label %bb203
+
+bb203:                                            ; preds = %bb202
+  br i1 undef, label %bb205, label %bb30
+
+bb205:                                            ; preds = %bb203
+  br label %bb206
+
+bb206:                                            ; preds = %bb205, %bb202
+  br i1 undef, label %bb210, label %bb207
+
+bb207:                                            ; preds = %bb206
+  br i1 undef, label %bb209, label %bb30
+
+bb209:                                            ; preds = %bb207
+  br label %bb210
+
+bb210:                                            ; preds = %bb209, %bb206
+  br i1 undef, label %bb212, label %bb30
+
+bb212:                                            ; preds = %bb210
+  unreachable
+
+bb213:                                            ; preds = %bb193, %bb30
+  resume { i8*, i32 } undef
+
+bb214:                                            ; preds = %bb42
+  br label %bb219
+
+bb215:                                            ; preds = %bb197, %bb173, %bb170, %bb141
+  br i1 undef, label %bb217, label %bb216
+
+bb216:                                            ; preds = %bb215
+  br label %bb217
+
+bb217:                                            ; preds = %bb216, %bb215
+  br i1 undef, label %bb219, label %bb218
+
+bb218:                                            ; preds = %bb217
+  br label %bb219
+
+bb219:                                            ; preds = %bb218, %bb217, %bb214
+  br i1 undef, label %bb221, label %bb220
+
+bb220:                                            ; preds = %bb219
+  unreachable
+
+bb221:                                            ; preds = %bb219
+  unreachable
+
+bb222:                                            ; preds = %bb20, %bb19
+  ret void
+}
+
+; Function Attrs: ssp
+define void @test3() #1 {
+entry:
+  %call2 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %call5 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont4 unwind label %lpad3
+
+invoke.cont4:                                     ; preds = %invoke.cont
+  br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %invoke.cont4
+  %call7 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %land.end unwind label %lpad3
+
+land.end:                                         ; preds = %land.rhs, %invoke.cont4
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i unwind label %lpad.i
+
+invoke.cont.i:                                    ; preds = %land.end
+  br i1 undef, label %invoke.cont8, label %if.then.i
+
+if.then.i:                                        ; preds = %invoke.cont.i
+  br label %invoke.cont8
+
+lpad.i:                                           ; preds = %land.end
+  %tmp13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont8:                                     ; preds = %if.then.i, %invoke.cont.i
+  %call18 = invoke i8* (i8*, i8*, i8*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, ...)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont17 unwind label %lpad16
+
+invoke.cont17:                                    ; preds = %invoke.cont8
+  %call22 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont21 unwind label %lpad20
+
+invoke.cont21:                                    ; preds = %invoke.cont17
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1980 unwind label %lpad.i1982
+
+invoke.cont.i1980:                                ; preds = %invoke.cont21
+  br i1 undef, label %invoke.cont24, label %if.then.i1981
+
+if.then.i1981:                                    ; preds = %invoke.cont.i1980
+  br label %invoke.cont24
+
+lpad.i1982:                                       ; preds = %invoke.cont21
+  %tmp28 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont24:                                    ; preds = %if.then.i1981, %invoke.cont.i1980
+  %call37 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont24
+  br i1 undef, label %land.end43, label %land.rhs39
+
+land.rhs39:                                       ; preds = %invoke.cont36
+  %call41 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %land.end43 unwind label %lpad35
+
+land.end43:                                       ; preds = %land.rhs39, %invoke.cont36
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1986 unwind label %lpad.i1988
+
+invoke.cont.i1986:                                ; preds = %land.end43
+  br i1 undef, label %invoke.cont44, label %if.then.i1987
+
+if.then.i1987:                                    ; preds = %invoke.cont.i1986
+  br label %invoke.cont44
+
+lpad.i1988:                                       ; preds = %land.end43
+  %tmp42 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont44:                                    ; preds = %if.then.i1987, %invoke.cont.i1986
+  %call53 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont52 unwind label %lpad51
+
+invoke.cont52:                                    ; preds = %invoke.cont44
+  br i1 undef, label %land.end70, label %land.rhs58
+
+land.rhs58:                                       ; preds = %invoke.cont52
+  %call63 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 42)
+          to label %invoke.cont62 unwind label %lpad61
+
+invoke.cont62:                                    ; preds = %land.rhs58
+  %call68 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %land.end70 unwind label %lpad66.body.thread
+
+land.end70:                                       ; preds = %invoke.cont62, %invoke.cont52
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1992 unwind label %lpad66.body
+
+invoke.cont.i1992:                                ; preds = %land.end70
+  br i1 undef, label %invoke.cont71, label %if.then.i1993
+
+if.then.i1993:                                    ; preds = %invoke.cont.i1992
+  br label %invoke.cont71
+
+invoke.cont71:                                    ; preds = %if.then.i1993, %invoke.cont.i1992
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i1998 unwind label %lpad.i2000
+
+invoke.cont.i1998:                                ; preds = %invoke.cont71
+  br i1 undef, label %invoke.cont91, label %if.then.i1999
+
+if.then.i1999:                                    ; preds = %invoke.cont.i1998
+  br label %invoke.cont91
+
+lpad.i2000:                                       ; preds = %invoke.cont71
+  %tmp74 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup102
+
+invoke.cont91:                                    ; preds = %if.then.i1999, %invoke.cont.i1998
+  %call96 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont95 unwind label %lpad94
+
+invoke.cont95:                                    ; preds = %invoke.cont91
+  %call98 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* %call96)
+          to label %invoke.cont97 unwind label %lpad94
+
+invoke.cont97:                                    ; preds = %invoke.cont95
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2004 unwind label %lpad.i2006
+
+invoke.cont.i2004:                                ; preds = %invoke.cont97
+  br i1 undef, label %invoke.cont100, label %if.then.i2005
+
+if.then.i2005:                                    ; preds = %invoke.cont.i2004
+  br label %invoke.cont100
+
+lpad.i2006:                                       ; preds = %invoke.cont97
+  %tmp82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont100:                                   ; preds = %if.then.i2005, %invoke.cont.i2004
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont110 unwind label %lpad109
+
+invoke.cont110:                                   ; preds = %invoke.cont100
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2010 unwind label %lpad.i2012
+
+invoke.cont.i2010:                                ; preds = %invoke.cont110
+  br i1 undef, label %invoke.cont117, label %if.then.i2011
+
+if.then.i2011:                                    ; preds = %invoke.cont.i2010
+  br label %invoke.cont117
+
+lpad.i2012:                                       ; preds = %invoke.cont110
+  %tmp98 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont117:                                   ; preds = %if.then.i2011, %invoke.cont.i2010
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2022 unwind label %lpad156.body
+
+lpad:                                             ; preds = %entry
+  %tmp118 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup
+
+lpad3:                                            ; preds = %land.rhs, %invoke.cont
+  %tmp119 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup
+
+ehcleanup:                                        ; preds = %lpad3, %lpad
+  unreachable
+
+lpad16:                                           ; preds = %invoke.cont8
+  %tmp121 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup26
+
+lpad20:                                           ; preds = %invoke.cont17
+  %tmp122 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup26
+
+ehcleanup26:                                      ; preds = %lpad20, %lpad16
+  unreachable
+
+lpad35:                                           ; preds = %land.rhs39, %invoke.cont24
+  %tmp124 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad51:                                           ; preds = %invoke.cont44
+  %tmp125 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad61:                                           ; preds = %land.rhs58
+  %tmp127 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad66.body.thread:                               ; preds = %invoke.cont62
+  %tmp128 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad66.body:                                      ; preds = %land.end70
+  %tmp129 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad94:                                           ; preds = %invoke.cont95, %invoke.cont91
+  %tmp133 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup102
+
+ehcleanup102:                                     ; preds = %lpad94, %lpad.i2000
+  unreachable
+
+lpad109:                                          ; preds = %invoke.cont100
+  %tmp134 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont.i2022:                                ; preds = %invoke.cont117
+  br i1 undef, label %invoke.cont157, label %if.then.i2023
+
+if.then.i2023:                                    ; preds = %invoke.cont.i2022
+  br label %invoke.cont157
+
+invoke.cont157:                                   ; preds = %if.then.i2023, %invoke.cont.i2022
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2028 unwind label %lpad164.body
+
+invoke.cont.i2028:                                ; preds = %invoke.cont157
+  br i1 undef, label %invoke.cont165, label %if.then.i2029
+
+if.then.i2029:                                    ; preds = %invoke.cont.i2028
+  br label %invoke.cont165
+
+invoke.cont165:                                   ; preds = %if.then.i2029, %invoke.cont.i2028
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, void (i8*, i8*)*)*)(i8* undef, i8* undef, void (i8*, i8*)* undef)
+          to label %invoke.cont184 unwind label %lpad183
+
+invoke.cont184:                                   ; preds = %invoke.cont165
+  %call186 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont185 unwind label %lpad183
+
+invoke.cont185:                                   ; preds = %invoke.cont184
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2034 unwind label %lpad.i2036
+
+invoke.cont.i2034:                                ; preds = %invoke.cont185
+  br i1 undef, label %invoke.cont190, label %if.then.i2035
+
+if.then.i2035:                                    ; preds = %invoke.cont.i2034
+  br label %invoke.cont190
+
+lpad.i2036:                                       ; preds = %invoke.cont185
+  %tmp168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %lpad183.body
+
+invoke.cont190:                                   ; preds = %if.then.i2035, %invoke.cont.i2034
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont197 unwind label %lpad196
+
+invoke.cont197:                                   ; preds = %invoke.cont190
+  %call202 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont201 unwind label %lpad200
+
+invoke.cont201:                                   ; preds = %invoke.cont197
+  %call205 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont204 unwind label %lpad203
+
+invoke.cont204:                                   ; preds = %invoke.cont201
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2040 unwind label %lpad.i2042
+
+invoke.cont.i2040:                                ; preds = %invoke.cont204
+  br i1 undef, label %invoke.cont207, label %if.then.i2041
+
+if.then.i2041:                                    ; preds = %invoke.cont.i2040
+  br label %invoke.cont207
+
+lpad.i2042:                                       ; preds = %invoke.cont204
+  %tmp181 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont207:                                   ; preds = %if.then.i2041, %invoke.cont.i2040
+  %call209 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont208 unwind label %lpad203
+
+invoke.cont208:                                   ; preds = %invoke.cont207
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2046 unwind label %lpad212.body
+
+invoke.cont.i2046:                                ; preds = %invoke.cont208
+  br i1 undef, label %invoke.cont213, label %if.then.i2047
+
+if.then.i2047:                                    ; preds = %invoke.cont.i2046
+  br label %invoke.cont213
+
+invoke.cont213:                                   ; preds = %if.then.i2047, %invoke.cont.i2046
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont221 unwind label %lpad220
+
+invoke.cont221:                                   ; preds = %invoke.cont213
+  %call229 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont228 unwind label %lpad227
+
+invoke.cont228:                                   ; preds = %invoke.cont221
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2052 unwind label %lpad.i2054
+
+invoke.cont.i2052:                                ; preds = %invoke.cont228
+  br i1 undef, label %invoke.cont231, label %if.then.i2053
+
+if.then.i2053:                                    ; preds = %invoke.cont.i2052
+  br label %invoke.cont231
+
+lpad.i2054:                                       ; preds = %invoke.cont228
+  %tmp198 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont231:                                   ; preds = %if.then.i2053, %invoke.cont.i2052
+  %call233 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont232 unwind label %lpad227
+
+invoke.cont232:                                   ; preds = %invoke.cont231
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2058 unwind label %lpad236.body
+
+invoke.cont.i2058:                                ; preds = %invoke.cont232
+  br i1 undef, label %invoke.cont237, label %if.then.i2059
+
+if.then.i2059:                                    ; preds = %invoke.cont.i2058
+  br label %invoke.cont237
+
+invoke.cont237:                                   ; preds = %if.then.i2059, %invoke.cont.i2058
+  %call246 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont245 unwind label %lpad244
+
+invoke.cont245:                                   ; preds = %invoke.cont237
+  %call248 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 13)
+          to label %invoke.cont247 unwind label %lpad244
+
+invoke.cont247:                                   ; preds = %invoke.cont245
+  %call251 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 2)
+          to label %invoke.cont250 unwind label %lpad249
+
+invoke.cont250:                                   ; preds = %invoke.cont247
+  %call254 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 7)
+          to label %invoke.cont253 unwind label %lpad252
+
+invoke.cont253:                                   ; preds = %invoke.cont250
+  %call257 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8**, i32)*)(i8* undef, i8* undef, i8** undef, i32 3)
+          to label %invoke.cont256 unwind label %lpad255
+
+invoke.cont256:                                   ; preds = %invoke.cont253
+  %call260 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %invoke.cont256
+  %call267 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont266 unwind label %lpad265
+
+invoke.cont266:                                   ; preds = %invoke.cont259
+  %call275 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont274 unwind label %lpad273
+
+invoke.cont274:                                   ; preds = %invoke.cont266
+  %call279 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont278 unwind label %lpad277
+
+invoke.cont278:                                   ; preds = %invoke.cont274
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2064 unwind label %lpad.i2066
+
+invoke.cont.i2064:                                ; preds = %invoke.cont278
+  br i1 undef, label %invoke.cont281, label %if.then.i2065
+
+if.then.i2065:                                    ; preds = %invoke.cont.i2064
+  br label %invoke.cont281
+
+lpad.i2066:                                       ; preds = %invoke.cont278
+  %tmp253 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont281:                                   ; preds = %if.then.i2065, %invoke.cont.i2064
+  %call291 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont290 unwind label %lpad289
+
+invoke.cont290:                                   ; preds = %invoke.cont281
+  %call303 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 8)
+          to label %invoke.cont302 unwind label %lpad301
+
+invoke.cont302:                                   ; preds = %invoke.cont290
+  %call310 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, double)*)(i8* undef, i8* undef, double 5.000000e-01)
+          to label %invoke.cont309 unwind label %lpad308
+
+invoke.cont309:                                   ; preds = %invoke.cont302
+  %call313 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 42)
+          to label %invoke.cont312 unwind label %lpad311
+
+invoke.cont312:                                   ; preds = %invoke.cont309
+  %call316 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8**, i8**, i32)*)(i8* undef, i8* undef, i8** undef, i8** undef, i32 2)
+          to label %invoke.cont315 unwind label %lpad314
+
+invoke.cont315:                                   ; preds = %invoke.cont312
+  %call322 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont321 unwind label %lpad320
+
+invoke.cont321:                                   ; preds = %invoke.cont315
+  br i1 undef, label %land.end344, label %land.rhs335
+
+land.rhs335:                                      ; preds = %invoke.cont321
+  %call342 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %land.end344 unwind label %lpad340.body.thread
+
+land.end344:                                      ; preds = %land.rhs335, %invoke.cont321
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2070 unwind label %lpad340.body
+
+invoke.cont.i2070:                                ; preds = %land.end344
+  br i1 undef, label %invoke.cont345, label %if.then.i2071
+
+if.then.i2071:                                    ; preds = %invoke.cont.i2070
+  br label %invoke.cont345
+
+invoke.cont345:                                   ; preds = %if.then.i2071, %invoke.cont.i2070
+  %call362 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef)
+          to label %invoke.cont361 unwind label %lpad360
+
+invoke.cont361:                                   ; preds = %invoke.cont345
+  %call365 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont364 unwind label %lpad363
+
+invoke.cont364:                                   ; preds = %invoke.cont361
+  %call371 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont370 unwind label %lpad369
+
+invoke.cont370:                                   ; preds = %invoke.cont364
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2076 unwind label %lpad.i2078
+
+invoke.cont.i2076:                                ; preds = %invoke.cont370
+  br i1 undef, label %invoke.cont373, label %if.then.i2077
+
+if.then.i2077:                                    ; preds = %invoke.cont.i2076
+  br label %invoke.cont373
+
+lpad.i2078:                                       ; preds = %invoke.cont370
+  %tmp340 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont373:                                   ; preds = %if.then.i2077, %invoke.cont.i2076
+  %call377 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32, i8*)*)(i8* undef, i8* undef, i32 42, i8* undef)
+          to label %invoke.cont376 unwind label %lpad363
+
+invoke.cont376:                                   ; preds = %invoke.cont373
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 5)
+          to label %invoke.cont382 unwind label %lpad381
+
+invoke.cont382:                                   ; preds = %invoke.cont376
+  %call384 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont383 unwind label %lpad381
+
+invoke.cont383:                                   ; preds = %invoke.cont382
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2082 unwind label %lpad.i2084
+
+invoke.cont.i2082:                                ; preds = %invoke.cont383
+  br i1 undef, label %invoke.cont392, label %if.then.i2083
+
+if.then.i2083:                                    ; preds = %invoke.cont.i2082
+  br label %invoke.cont392
+
+lpad.i2084:                                       ; preds = %invoke.cont383
+  %tmp360 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont392:                                   ; preds = %if.then.i2083, %invoke.cont.i2082
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 -2)
+          to label %invoke.cont395 unwind label %lpad381
+
+invoke.cont395:                                   ; preds = %invoke.cont392
+  %call397 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont396 unwind label %lpad381
+
+invoke.cont396:                                   ; preds = %invoke.cont395
+  %call400 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont399 unwind label %lpad398
+
+invoke.cont399:                                   ; preds = %invoke.cont396
+  %call403 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont402 unwind label %lpad401
+
+invoke.cont402:                                   ; preds = %invoke.cont399
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2088 unwind label %lpad.i2090
+
+invoke.cont.i2088:                                ; preds = %invoke.cont402
+  br i1 undef, label %invoke.cont405, label %if.then.i2089
+
+if.then.i2089:                                    ; preds = %invoke.cont.i2088
+  br label %invoke.cont405
+
+lpad.i2090:                                       ; preds = %invoke.cont402
+  %tmp370 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont405:                                   ; preds = %if.then.i2089, %invoke.cont.i2088
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 -1)
+          to label %invoke.cont408 unwind label %lpad381
+
+invoke.cont408:                                   ; preds = %invoke.cont405
+  %call410 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont409 unwind label %lpad381
+
+invoke.cont409:                                   ; preds = %invoke.cont408
+  %call413 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont412 unwind label %lpad411
+
+invoke.cont412:                                   ; preds = %invoke.cont409
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2094 unwind label %lpad.i2096
+
+invoke.cont.i2094:                                ; preds = %invoke.cont412
+  br i1 undef, label %invoke.cont418, label %if.then.i2095
+
+if.then.i2095:                                    ; preds = %invoke.cont.i2094
+  br label %invoke.cont418
+
+lpad.i2096:                                       ; preds = %invoke.cont412
+  %tmp380 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont418:                                   ; preds = %if.then.i2095, %invoke.cont.i2094
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* undef, i8* undef, i8* undef, i32 0)
+          to label %invoke.cont422 unwind label %lpad381
+
+invoke.cont422:                                   ; preds = %invoke.cont418
+  %call424 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont423 unwind label %lpad381
+
+invoke.cont423:                                   ; preds = %invoke.cont422
+  %call427 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont426 unwind label %lpad425
+
+invoke.cont426:                                   ; preds = %invoke.cont423
+  %call430 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont429 unwind label %lpad428
+
+invoke.cont429:                                   ; preds = %invoke.cont426
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2100 unwind label %lpad.i2102
+
+invoke.cont.i2100:                                ; preds = %invoke.cont429
+  br i1 undef, label %invoke.cont432, label %if.then.i2101
+
+if.then.i2101:                                    ; preds = %invoke.cont.i2100
+  br label %invoke.cont432
+
+lpad.i2102:                                       ; preds = %invoke.cont429
+  %tmp390 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont432:                                   ; preds = %if.then.i2101, %invoke.cont.i2100
+  %call436 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 0)
+          to label %invoke.cont435 unwind label %lpad381
+
+invoke.cont435:                                   ; preds = %invoke.cont432
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2106 unwind label %lpad.i2108
+
+invoke.cont.i2106:                                ; preds = %invoke.cont435
+  %call444 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 5)
+          to label %invoke.cont443 unwind label %lpad381
+
+lpad.i2108:                                       ; preds = %invoke.cont435
+  %tmp396 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont443:                                   ; preds = %invoke.cont.i2106
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2112 unwind label %lpad.i2114
+
+invoke.cont.i2112:                                ; preds = %invoke.cont443
+  br i1 undef, label %invoke.cont449, label %if.then.i2113
+
+if.then.i2113:                                    ; preds = %invoke.cont.i2112
+  br label %invoke.cont449
+
+lpad.i2114:                                       ; preds = %invoke.cont443
+  %tmp402 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont449:                                   ; preds = %if.then.i2113, %invoke.cont.i2112
+  %call453 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 -2)
+          to label %invoke.cont452 unwind label %lpad381
+
+invoke.cont452:                                   ; preds = %invoke.cont449
+  %call456 = invoke i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont455 unwind label %lpad454
+
+invoke.cont455:                                   ; preds = %invoke.cont452
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2118 unwind label %lpad.i2120
+
+invoke.cont.i2118:                                ; preds = %invoke.cont455
+  br i1 undef, label %invoke.cont458, label %if.then.i2119
+
+if.then.i2119:                                    ; preds = %invoke.cont.i2118
+  br label %invoke.cont458
+
+lpad.i2120:                                       ; preds = %invoke.cont455
+  %tmp408 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont458:                                   ; preds = %if.then.i2119, %invoke.cont.i2118
+  %call461 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 -1)
+          to label %invoke.cont460 unwind label %lpad381
+
+invoke.cont460:                                   ; preds = %invoke.cont458
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2124 unwind label %lpad.i2126
+
+invoke.cont.i2124:                                ; preds = %invoke.cont460
+  br i1 undef, label %invoke.cont466, label %if.then.i2125
+
+if.then.i2125:                                    ; preds = %invoke.cont.i2124
+  br label %invoke.cont466
+
+lpad.i2126:                                       ; preds = %invoke.cont460
+  %tmp414 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+invoke.cont466:                                   ; preds = %if.then.i2125, %invoke.cont.i2124
+  %call470 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 0)
+          to label %invoke.cont469 unwind label %lpad381
+
+invoke.cont469:                                   ; preds = %invoke.cont466
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2130 unwind label %lpad.i2132
+
+invoke.cont.i2130:                                ; preds = %invoke.cont469
+  br i1 undef, label %invoke.cont475, label %if.then.i2131
+
+if.then.i2131:                                    ; preds = %invoke.cont.i2130
+  br label %invoke.cont475
+
+lpad.i2132:                                       ; preds = %invoke.cont469
+  %tmp420 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+invoke.cont475:                                   ; preds = %if.then.i2131, %invoke.cont.i2130
+  %call491 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 1)
+          to label %invoke.cont490 unwind label %lpad489
+
+invoke.cont490:                                   ; preds = %invoke.cont475
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont499 unwind label %lpad498
+
+invoke.cont499:                                   ; preds = %invoke.cont490
+  %call504 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont503 unwind label %lpad489
+
+invoke.cont503:                                   ; preds = %invoke.cont499
+  %call507 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* undef, i8* undef, i32 3)
+          to label %invoke.cont506 unwind label %lpad505
+
+invoke.cont506:                                   ; preds = %invoke.cont503
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont509 unwind label %lpad508
+
+invoke.cont509:                                   ; preds = %invoke.cont506
+  %call513 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont512 unwind label %lpad489
+
+invoke.cont512:                                   ; preds = %invoke.cont509
+  br i1 undef, label %msgSend.null-receiver, label %msgSend.call
+
+msgSend.call:                                     ; preds = %invoke.cont512
+  invoke void bitcast (void (i8*, i8*, ...)* @objc_msgSend_stret to void (%struct.CGPoint*, i8*, i8*)*)(%struct.CGPoint* sret undef, i8* undef, i8* undef)
+          to label %msgSend.cont unwind label %lpad514
+
+msgSend.null-receiver:                            ; preds = %invoke.cont512
+  br label %msgSend.cont
+
+msgSend.cont:                                     ; preds = %msgSend.null-receiver, %msgSend.call
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2136 unwind label %lpad.i2138
+
+invoke.cont.i2136:                                ; preds = %msgSend.cont
+  br i1 undef, label %invoke.cont521, label %if.then.i2137
+
+if.then.i2137:                                    ; preds = %invoke.cont.i2136
+  br label %invoke.cont521
+
+lpad.i2138:                                       ; preds = %msgSend.cont
+  %tmp468 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont521:                                   ; preds = %if.then.i2137, %invoke.cont.i2136
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont528 unwind label %lpad527
+
+invoke.cont528:                                   ; preds = %invoke.cont521
+  %call532 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont531 unwind label %lpad489
+
+invoke.cont531:                                   ; preds = %invoke.cont528
+  %call535 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont534 unwind label %lpad533
+
+invoke.cont534:                                   ; preds = %invoke.cont531
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2142 unwind label %lpad.i2144
+
+invoke.cont.i2142:                                ; preds = %invoke.cont534
+  br i1 undef, label %invoke.cont540, label %if.then.i2143
+
+if.then.i2143:                                    ; preds = %invoke.cont.i2142
+  br label %invoke.cont540
+
+lpad.i2144:                                       ; preds = %invoke.cont534
+  %tmp486 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+invoke.cont540:                                   ; preds = %if.then.i2143, %invoke.cont.i2142
+  %call544 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef, i32 3)
+          to label %invoke.cont543 unwind label %lpad489
+
+invoke.cont543:                                   ; preds = %invoke.cont540
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* undef)
+          to label %invoke.cont546 unwind label %lpad545
+
+invoke.cont546:                                   ; preds = %invoke.cont543
+  %call549 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont548 unwind label %lpad489
+
+invoke.cont548:                                   ; preds = %invoke.cont546
+  %call555 = invoke signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont554 unwind label %lpad553
+
+invoke.cont554:                                   ; preds = %invoke.cont548
+  %tmp499 = call i8* @objc_retain(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*)) #3
+  invoke void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i8* %tmp499, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont.i2148 unwind label %lpad.i2150
+
+invoke.cont.i2148:                                ; preds = %invoke.cont554
+  call void @objc_release(i8* %tmp499) #3, !clang.imprecise_release !0
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont566 unwind label %lpad565
+
+lpad.i2150:                                       ; preds = %invoke.cont554
+  %tmp500 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  call void @objc_release(i8* %tmp499) #3, !clang.imprecise_release !0
+  unreachable
+
+invoke.cont566:                                   ; preds = %invoke.cont.i2148
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* undef, i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*))
+          to label %invoke.cont572 unwind label %lpad571
+
+invoke.cont572:                                   ; preds = %invoke.cont566
+  %call582 = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %invoke.cont581 unwind label %lpad580
+
+invoke.cont581:                                   ; preds = %invoke.cont572
+  unreachable
+
+lpad156.body:                                     ; preds = %invoke.cont117
+  %tmp1157 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad164.body:                                     ; preds = %invoke.cont157
+  %tmp1158 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad183:                                          ; preds = %invoke.cont184, %invoke.cont165
+  %tmp1159 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %lpad183.body
+
+lpad183.body:                                     ; preds = %lpad183, %lpad.i2036
+  unreachable
+
+lpad196:                                          ; preds = %invoke.cont190
+  %tmp1160 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad200:                                          ; preds = %invoke.cont197
+  %tmp1161 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad203:                                          ; preds = %invoke.cont207, %invoke.cont201
+  %tmp1162 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad212.body:                                     ; preds = %invoke.cont208
+  %tmp1163 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad220:                                          ; preds = %invoke.cont213
+  %tmp1164 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad227:                                          ; preds = %invoke.cont231, %invoke.cont221
+  %tmp1166 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup239
+
+lpad236.body:                                     ; preds = %invoke.cont232
+  %tmp1167 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup239
+
+ehcleanup239:                                     ; preds = %lpad236.body, %lpad227
+  unreachable
+
+lpad244:                                          ; preds = %invoke.cont245, %invoke.cont237
+  %tmp1168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad249:                                          ; preds = %invoke.cont247
+  %tmp1169 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad252:                                          ; preds = %invoke.cont250
+  %tmp1170 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup263
+
+lpad255:                                          ; preds = %invoke.cont253
+  %tmp1171 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup263
+
+lpad258:                                          ; preds = %invoke.cont256
+  %tmp1172 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup263:                                     ; preds = %lpad255, %lpad252
+  unreachable
+
+lpad265:                                          ; preds = %invoke.cont259
+  %tmp1173 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad273:                                          ; preds = %invoke.cont266
+  %tmp1175 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad277:                                          ; preds = %invoke.cont274
+  %tmp1176 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad289:                                          ; preds = %invoke.cont281
+  %tmp1177 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad301:                                          ; preds = %invoke.cont290
+  %tmp1180 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad308:                                          ; preds = %invoke.cont302
+  %tmp1182 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad311:                                          ; preds = %invoke.cont309
+  %tmp1183 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad314:                                          ; preds = %invoke.cont312
+  %tmp1184 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad320:                                          ; preds = %invoke.cont315
+  %tmp1186 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad340.body.thread:                              ; preds = %land.rhs335
+  %tmp1188 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad340.body:                                     ; preds = %land.end344
+  %tmp1189 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad360:                                          ; preds = %invoke.cont345
+  %tmp1191 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad363:                                          ; preds = %invoke.cont373, %invoke.cont361
+  %tmp1192 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad369:                                          ; preds = %invoke.cont364
+  %tmp1194 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad381:                                          ; preds = %invoke.cont466, %invoke.cont458, %invoke.cont449, %invoke.cont.i2106, %invoke.cont432, %invoke.cont422, %invoke.cont418, %invoke.cont408, %invoke.cont405, %invoke.cont395, %invoke.cont392, %invoke.cont382, %invoke.cont376
+  %tmp1196 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+lpad398:                                          ; preds = %invoke.cont396
+  %tmp1199 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad401:                                          ; preds = %invoke.cont399
+  %tmp1200 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad411:                                          ; preds = %invoke.cont409
+  %tmp1201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad425:                                          ; preds = %invoke.cont423
+  %tmp1203 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup477
+
+lpad428:                                          ; preds = %invoke.cont426
+  %tmp1204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad454:                                          ; preds = %invoke.cont452
+  %tmp1207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup477:                                     ; preds = %lpad425, %lpad381, %lpad.i2132, %lpad.i2126
+  unreachable
+
+lpad489:                                          ; preds = %invoke.cont546, %invoke.cont540, %invoke.cont528, %invoke.cont509, %invoke.cont499, %invoke.cont475
+  %tmp1211 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup560
+
+lpad498:                                          ; preds = %invoke.cont490
+  %tmp1214 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad505:                                          ; preds = %invoke.cont503
+  %tmp1215 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad508:                                          ; preds = %invoke.cont506
+  %tmp1216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad514:                                          ; preds = %msgSend.call
+  %tmp1217 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad527:                                          ; preds = %invoke.cont521
+  %tmp1219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %ehcleanup560
+
+lpad533:                                          ; preds = %invoke.cont531
+  %tmp1220 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad545:                                          ; preds = %invoke.cont543
+  %tmp1222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad553:                                          ; preds = %invoke.cont548
+  %tmp1224 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+ehcleanup560:                                     ; preds = %lpad527, %lpad489
+  br label %eh.resume
+
+lpad565:                                          ; preds = %invoke.cont.i2148
+  %tmp1225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad571:                                          ; preds = %invoke.cont566
+  %tmp1227 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  unreachable
+
+lpad580:                                          ; preds = %invoke.cont572
+  %tmp1228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad580, %ehcleanup560, %lpad360, %lpad220
+  resume { i8*, i32 } undef
+}
+
+@"OBJC_EHTYPE_$_NSException" = external global i8
+
+define void @test4() {
+entry:
+  br i1 undef, label %if.end13, label %if.then10
+
+if.then10:                                        ; preds = %entry
+  br label %if.end13
+
+if.end13:                                         ; preds = %if.then10, %entry
+  %0 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*, i64, i8*, i8)*)(i8* undef, i8* undef, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring to i8*), i64 2, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_2 to i8*), i8 signext 0), !clang.arc.no_objc_arc_exceptions !0
+  br i1 undef, label %if.then17, label %if.end18
+
+if.then17:                                        ; preds = %if.end13
+  br label %if.end18
+
+if.end18:                                         ; preds = %if.then17, %if.end13
+  br i1 undef, label %if.then64, label %if.end73
+
+if.then64:                                        ; preds = %if.end18
+  br i1 undef, label %cond.end71, label %cond.true68
+
+cond.true68:                                      ; preds = %if.then64
+  br label %cond.end71
+
+cond.end71:                                       ; preds = %cond.true68, %if.then64
+  br i1 undef, label %cleanup.action, label %cleanup.done
+
+cleanup.action:                                   ; preds = %cond.end71
+  br label %cleanup.done
+
+cleanup.done:                                     ; preds = %cleanup.action, %cond.end71
+  br label %if.end73
+
+if.end73:                                         ; preds = %cleanup.done, %if.end18
+  br i1 undef, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:                                 ; preds = %if.end73
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:                           ; preds = %forcoll.refetch, %forcoll.loopinit
+  br label %forcoll.loopbody
+
+forcoll.loopbody:                                 ; preds = %forcoll.notmutated, %forcoll.loopbody.outer
+  br i1 undef, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:                                  ; preds = %forcoll.loopbody
+  br label %forcoll.notmutated
+
+forcoll.notmutated:                               ; preds = %forcoll.mutated, %forcoll.loopbody
+  br i1 undef, label %forcoll.loopbody, label %forcoll.refetch
+
+forcoll.refetch:                                  ; preds = %forcoll.notmutated
+  br i1 undef, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:                                    ; preds = %forcoll.refetch, %if.end73
+  br i1 undef, label %if.end85, label %if.then82
+
+if.then82:                                        ; preds = %forcoll.empty
+  br label %if.end85
+
+if.end85:                                         ; preds = %if.then82, %forcoll.empty
+  br i1 undef, label %if.then87, label %if.end102
+
+if.then87:                                        ; preds = %if.end85
+  br i1 undef, label %if.end94, label %if.then91
+
+if.then91:                                        ; preds = %if.then87
+  br label %if.end94
+
+if.end94:                                         ; preds = %if.then91, %if.then87
+  br i1 undef, label %if.end101, label %if.then98
+
+if.then98:                                        ; preds = %if.end94
+  br label %if.end101
+
+if.end101:                                        ; preds = %if.then98, %if.end94
+  br label %if.end102
+
+if.end102:                                        ; preds = %if.end101, %if.end85
+  br i1 undef, label %do.body113, label %if.then107
+
+if.then107:                                       ; preds = %if.end102
+  br label %do.body113
+
+do.body113:                                       ; preds = %if.then107, %if.end102
+  br i1 undef, label %if.then116, label %if.end117
+
+if.then116:                                       ; preds = %do.body113
+  br label %if.end117
+
+if.end117:                                        ; preds = %if.then116, %do.body113
+  br i1 undef, label %if.then125, label %if.end126
+
+if.then125:                                       ; preds = %if.end117
+  br label %if.end126
+
+if.end126:                                        ; preds = %if.then125, %if.end117
+  br i1 undef, label %do.end166, label %cond.true132
+
+cond.true132:                                     ; preds = %if.end126
+  br i1 undef, label %do.body148, label %cond.true151
+
+do.body148:                                       ; preds = %cond.true132
+  br i1 undef, label %do.end166, label %cond.true151
+
+cond.true151:                                     ; preds = %do.body148, %cond.true132
+  br i1 undef, label %if.then162, label %do.end166
+
+if.then162:                                       ; preds = %cond.true151
+  br label %do.end166
+
+do.end166:                                        ; preds = %if.then162, %cond.true151, %do.body148, %if.end126
+  br i1 undef, label %if.then304, label %if.then170
+
+if.then170:                                       ; preds = %do.end166
+  br i1 undef, label %do.end193, label %cond.true179
+
+cond.true179:                                     ; preds = %if.then170
+  br i1 undef, label %if.then190, label %do.end193
+
+if.then190:                                       ; preds = %cond.true179
+  br label %do.end193
+
+do.end193:                                        ; preds = %if.then190, %cond.true179, %if.then170
+  br i1 undef, label %do.body200, label %do.body283
+
+do.body200:                                       ; preds = %do.end193
+  br i1 undef, label %do.end254, label %cond.true203
+
+cond.true203:                                     ; preds = %do.body200
+  br i1 undef, label %do.body218, label %cond.true221
+
+do.body218:                                       ; preds = %cond.true203
+  br i1 undef, label %do.end254, label %cond.true221
+
+cond.true221:                                     ; preds = %do.body218, %cond.true203
+  br i1 undef, label %if.then232, label %do.body236
+
+if.then232:                                       ; preds = %cond.true221
+  br label %do.body236
+
+do.body236:                                       ; preds = %if.then232, %cond.true221
+  br i1 undef, label %do.end254, label %cond.true239
+
+cond.true239:                                     ; preds = %do.body236
+  br i1 undef, label %if.then250, label %do.end254
+
+if.then250:                                       ; preds = %cond.true239
+  br label %do.end254
+
+do.end254:                                        ; preds = %if.then250, %cond.true239, %do.body236, %do.body218, %do.body200
+  br i1 undef, label %do.end277, label %cond.true263
+
+cond.true263:                                     ; preds = %do.end254
+  br i1 undef, label %if.then274, label %do.end277
+
+if.then274:                                       ; preds = %cond.true263
+  unreachable
+
+do.end277:                                        ; preds = %cond.true263, %do.end254
+  br i1 undef, label %if.then280, label %do.body283
+
+if.then280:                                       ; preds = %do.end277
+  br label %do.body283
+
+do.body283:                                       ; preds = %if.then280, %do.end277, %do.end193
+  br i1 undef, label %if.end301, label %cond.true286
+
+cond.true286:                                     ; preds = %do.body283
+  br i1 undef, label %if.then297, label %if.end301
+
+if.then297:                                       ; preds = %cond.true286
+  br label %if.end301
+
+if.end301:                                        ; preds = %if.then297, %cond.true286, %do.body283
+  br i1 undef, label %if.then304, label %do.body351
+
+if.then304:                                       ; preds = %if.end301, %do.end166
+  br i1 undef, label %do.body309.lr.ph, label %do.body351
+
+do.body309.lr.ph:                                 ; preds = %if.then304
+  br label %do.body309
+
+do.body309:                                       ; preds = %for.cond.backedge, %do.body309.lr.ph
+  br i1 undef, label %do.end328, label %cond.true312
+
+cond.true312:                                     ; preds = %do.body309
+  br i1 undef, label %if.then323, label %do.end328
+
+if.then323:                                       ; preds = %cond.true312
+  br label %do.end328
+
+do.end328:                                        ; preds = %if.then323, %cond.true312, %do.body309
+  br i1 undef, label %for.cond.backedge, label %cond.true335
+
+for.cond.backedge:                                ; preds = %if.then346, %cond.true335, %do.end328
+  br i1 undef, label %do.body309, label %do.body351
+
+cond.true335:                                     ; preds = %do.end328
+  br i1 undef, label %if.then346, label %for.cond.backedge
+
+if.then346:                                       ; preds = %cond.true335
+  br label %for.cond.backedge
+
+do.body351:                                       ; preds = %for.cond.backedge, %if.then304, %if.end301
+  br i1 undef, label %if.then354, label %if.end355
+
+if.then354:                                       ; preds = %do.body351
+  br label %if.end355
+
+if.end355:                                        ; preds = %if.then354, %do.body351
+  br i1 undef, label %if.else, label %if.then364
+
+if.then364:                                       ; preds = %if.end355
+  br label %do.body366
+
+if.else:                                          ; preds = %if.end355
+  br label %do.body366
+
+do.body366:                                       ; preds = %if.else, %if.then364
+  br i1 undef, label %if.then369, label %if.end377.critedge
+
+if.then369:                                       ; preds = %do.body366
+  br label %if.end377
+
+if.end377.critedge:                               ; preds = %do.body366
+  br label %if.end377
+
+if.end377:                                        ; preds = %if.end377.critedge, %if.then369
+  br i1 undef, label %if.then383, label %if.end392.critedge
+
+if.then383:                                       ; preds = %if.end377
+  br label %if.end392
+
+if.end392.critedge:                               ; preds = %if.end377
+  br label %if.end392
+
+if.end392:                                        ; preds = %if.end392.critedge, %if.then383
+  br i1 undef, label %if.then398, label %if.end399
+
+if.then398:                                       ; preds = %if.end392
+  br label %if.end399
+
+if.end399:                                        ; preds = %if.then398, %if.end392
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %eh.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+eh.cont:                                          ; preds = %if.end399
+  br i1 undef, label %if.then430, label %if.end439.critedge
+
+if.then430:                                       ; preds = %eh.cont
+  %1 = call i8* @objc_retain(i8* %0)
+  br label %if.end439
+
+lpad:                                             ; preds = %if.end399
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* @"OBJC_EHTYPE_$_NSException"
+  unreachable
+
+if.end439.critedge:                               ; preds = %eh.cont
+  %3 = call i8* @objc_retain(i8* %0)
+  br label %if.end439
+
+if.end439:                                        ; preds = %if.end439.critedge, %if.then430
+  call void @objc_release(i8* %0), !clang.imprecise_release !0
+  unreachable
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/pointer-types.ll b/test/Transforms/ObjCARC/pointer-types.ll
index 6abc93986434..257560d9f7b2 100644
--- a/test/Transforms/ObjCARC/pointer-types.ll
+++ b/test/Transforms/ObjCARC/pointer-types.ll
@@ -5,7 +5,7 @@
 ; in dubious ways.
 ; rdar://10551239
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: %otherBlock = phi void ()* [ %b1, %if.then ], [ null, %entry ]
 ; CHECK-NEXT: call void @use_fptr(void ()* %otherBlock)
 ; CHECK-NEXT: %tmp11 = bitcast void ()* %otherBlock to i8*
diff --git a/test/Transforms/ObjCARC/post-inlining.ll b/test/Transforms/ObjCARC/post-inlining.ll
index ad69ccdd794d..b2d6112cf4d9 100644
--- a/test/Transforms/ObjCARC/post-inlining.ll
+++ b/test/Transforms/ObjCARC/post-inlining.ll
@@ -8,7 +8,7 @@ declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 
 ; Clean up residue left behind after inlining.
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: entry:
 ; CHECK-NEXT: ret void
 ; CHECK-NEXT: }
@@ -21,7 +21,7 @@ entry:
 
 ; Same as test0, but with slightly different use arrangements.
 
-; CHECK: define void @test1(
+; CHECK-LABEL: define void @test1(
 ; CHECK: entry:
 ; CHECK-NEXT: ret void
 ; CHECK-NEXT: }
@@ -34,7 +34,7 @@ entry:
 
 ; Delete a retainRV+autoreleaseRV even if the pointer is used.
 
-; CHECK: define void @test24(
+; CHECK-LABEL: define void @test24(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   call void @use_pointer(i8* %p)
 ; CHECK-NEXT:   ret void
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
deleted file mode 100644
index f40be238baf3..000000000000
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-; rdar://10209613
-
-%0 = type opaque
-%struct.__block_descriptor = type { i64, i64 }
-
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
-@"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
-
-; CHECK: define void @test(
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) [[NUW:#[0-9]+]]
-; CHECK: @objc_msgSend
-; CHECK-NEXT: @objc_release(i8* %3)
-define void @test(%0* %array) uwtable {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
-  %0 = bitcast %0* %array to i8*
-  %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
-  store %0* %array, %0** %block.captured, align 8
-  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
-  %3 = call i8* @objc_retainBlock(i8* %2) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
-  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
-  call void @objc_release(i8* %3) nounwind
-  %strongdestroy = load %0** %block.captured, align 8
-  %4 = bitcast %0* %strongdestroy to i8*
-  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test, but the objc_retainBlock has a clang.arc.copy_on_escape
-; tag so it's safe to delete.
-
-; CHECK: define void @test_with_COE(
-; CHECK-NOT: @objc_retainBlock
-; CHECK: @objc_msgSend
-; CHECK: @objc_release
-; CHECK-NOT: @objc_release
-; CHECK: }
-define void @test_with_COE(%0* %array) uwtable {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
-  %0 = bitcast %0* %array to i8*
-  %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
-  store i32 1107296256, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
-  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
-  store %0* %array, %0** %block.captured, align 8
-  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
-  %3 = call i8* @objc_retainBlock(i8* %2) nounwind, !clang.arc.copy_on_escape !0
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
-  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
-  call void @objc_release(i8* %3) nounwind
-  %strongdestroy = load %0** %block.captured, align 8
-  %4 = bitcast %0* %strongdestroy to i8*
-  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-declare i8* @objc_retain(i8*)
-
-declare void @__test_block_invoke_0(i8* nocapture) uwtable
-
-declare i8* @objc_retainBlock(i8*)
-
-declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
-
-declare void @objc_release(i8*)
-
-; CHECK: attributes #0 = { uwtable }
-; CHECK: attributes #1 = { nonlazybind }
-; CHECK: attributes [[NUW]] = { nounwind }
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
deleted file mode 100644
index 8df05ad22666..000000000000
--- a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
+++ /dev/null
@@ -1,215 +0,0 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
-
-declare i8* @objc_retain(i8*) nonlazybind
-declare void @objc_release(i8*) nonlazybind
-declare i8* @objc_retainBlock(i8*)
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; Use by an instruction which copies the value is an escape if the             ;
-; result is an escape. The current instructions with this property are:        ;
-;                                                                              ;
-; 1. BitCast.                                                                  ;
-; 2. GEP.                                                                      ;
-; 3. PhiNode.                                                                  ;
-; 4. SelectInst.                                                               ;
-;                                                                              ;
-; Make sure that such instructions do not confuse the optimizer into removing  ;
-; an objc_retainBlock that is needed.                                          ;
-;                                                                              ;
-; rdar://13273675. (With extra test cases to handle bitcast, phi, and select.  ;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define void @bitcasttest(i8* %storage, void (...)* %block)  {
-; CHECK: define void @bitcasttest
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %storage to void (...)**
-  %t5 = bitcast i8* %t3 to void (...)*
-  store void (...)* %t5, void (...)** %t4, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @bitcasttest_a(i8* %storage, void (...)* %block)  {
-; CHECK: define void @bitcasttest_a
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %storage to void (...)**
-  %t5 = bitcast i8* %t3 to void (...)*
-  store void (...)* %t5, void (...)** %t4, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @geptest(void (...)** %storage_array, void (...)* %block)  {
-; CHECK: define void @geptest
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  
-  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
-  
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @geptest_a(void (...)** %storage_array, void (...)* %block)  {
-; CHECK: define void @geptest_a
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  
-  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
-  
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @selecttest(void (...)** %store1, void (...)** %store2,
-                        void (...)* %block) {
-; CHECK: define void @selecttest
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  %store = select i1 undef, void (...)** %store1, void (...)** %store2
-  store void (...)* %t4, void (...)** %store, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @selecttest_a(void (...)** %store1, void (...)** %store2,
-                          void (...)* %block) {
-; CHECK: define void @selecttest_a
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  %store = select i1 undef, void (...)** %store1, void (...)** %store2
-  store void (...)* %t4, void (...)** %store, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-; CHECK: }
-}
-
-define void @phinodetest(void (...)** %storage1,
-                         void (...)** %storage2,
-                         void (...)* %block) {
-; CHECK: define void @phinodetest
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  br i1 undef, label %store1_set, label %store2_set
-; CHECK: store1_set:
-
-store1_set:
-  br label %end
-
-store2_set:
-  br label %end
-
-end:
-; CHECK: end:
-  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK: call void @objc_release
-  call void @objc_release(i8* %t1)
-  ret void
-; CHECK: }
-}
-
-define void @phinodetest_a(void (...)** %storage1,
-                           void (...)** %storage2,
-                           void (...)* %block) {
-; CHECK: define void @phinodetest_a
-entry:
-  %t1 = bitcast void (...)* %block to i8*
-; CHECK-NOT: tail call i8* @objc_retain
-  %t2 = tail call i8* @objc_retain(i8* %t1)
-; CHECK: tail call i8* @objc_retainBlock
-  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
-  %t4 = bitcast i8* %t3 to void (...)*
-  br i1 undef, label %store1_set, label %store2_set
-
-store1_set:
-  br label %end
-
-store2_set:
-  br label %end
-
-end:
-  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
-  store void (...)* %t4, void (...)** %storage, align 8
-; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1), !clang.imprecise_release !0
-  ret void
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; This test makes sure that we do not hang clang when visiting a use ;
-; cycle caused by phi nodes during objc-arc analysis. *NOTE* This    ;
-; test case looks a little convoluted since it was produced by	     ;
-; bugpoint.							     ;
-; 								     ;
-; bugzilla://14551						     ;
-; rdar://12851911						     ;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define void @phinode_use_cycle(i8* %block) uwtable optsize ssp {
-; CHECK: define void @phinode_use_cycle(i8* %block)
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %if.then, %for.body, %entry
-  %block.05 = phi void (...)* [ null, %entry ], [ %1, %if.then ], [ %block.05, %for.body ]
-  br i1 undef, label %for.body, label %if.then
-
-if.then:                                          ; preds = %for.body
-  %0 = call i8* @objc_retainBlock(i8* %block), !clang.arc.copy_on_escape !0
-  %1 = bitcast i8* %0 to void (...)*
-  %2 = bitcast void (...)* %block.05 to i8*
-  call void @objc_release(i8* %2) nounwind, !clang.imprecise_release !0
-  br label %for.body
-}
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-load.ll b/test/Transforms/ObjCARC/retain-block-load.ll
deleted file mode 100644
index a5170e323653..000000000000
--- a/test/Transforms/ObjCARC/retain-block-load.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
-
-; rdar://10803830
-; The optimizer should be able to prove that the block does not
-; "escape", so the retainBlock+release pair can be eliminated.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-%struct.__block_descriptor = type { i64, i64 }
-
-@_NSConcreteStackBlock = external global i8*
-@__block_descriptor_tmp = external global { i64, i64, i8*, i8* }
-
-; CHECK: define void @test() {
-; CHECK-NOT: @objc
-; CHECK: declare i8* @objc_retainBlock(i8*)
-; CHECK: declare void @objc_release(i8*)
-
-define void @test() {
-entry:
-  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8
-  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 0
-  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
-  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 1
-  store i32 1073741824, i32* %block.flags, align 8
-  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 2
-  store i32 0, i32* %block.reserved, align 4
-  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 3
-  store i8* bitcast (i32 (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
-  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 4
-  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
-  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 5
-  store i32 4, i32* %block.captured, align 8
-  %tmp = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block to i8*
-  %tmp1 = call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  %tmp2 = getelementptr inbounds i8* %tmp1, i64 16
-  %tmp3 = bitcast i8* %tmp2 to i8**
-  %tmp4 = load i8** %tmp3, align 8
-  %tmp5 = bitcast i8* %tmp4 to i32 (i8*)*
-  %call = call i32 %tmp5(i8* %tmp1)
-  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-declare i32 @__test_block_invoke_0(i8* nocapture %.block_descriptor) nounwind readonly
-
-declare i8* @objc_retainBlock(i8*)
-
-declare void @objc_release(i8*)
-
-!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
deleted file mode 100644
index 1bb3f0276adf..000000000000
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ /dev/null
@@ -1,140 +0,0 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64"
-
-!0 = metadata !{}
-
-declare i8* @objc_retain(i8*)
-declare void @callee(i8)
-declare void @use_pointer(i8*)
-declare void @objc_release(i8*)
-declare i8* @objc_retainBlock(i8*)
-declare i8* @objc_autorelease(i8*)
-
-; Basic retainBlock+release elimination.
-
-; CHECK: define void @test0(i8* %tmp) {
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test0(i8* %tmp) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0, but there's no copy_on_escape metadata, so there's no
-; optimization possible.
-
-; CHECK: define void @test0_no_metadata(i8* %tmp) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW:#[0-9]+]]
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_no_metadata(i8* %tmp) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0, but the pointer escapes, so there's no
-; optimization possible.
-
-; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test0_escape, but there's no intervening call.
-
-; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK: }
-define void @test0_just_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Basic nested retainBlock+release elimination.
-
-; CHECK: define void @test1(i8* %tmp) {
-; CHECK-NOT: @objc
-; CHECK: tail call i8* @objc_retain(i8* %tmp) [[NUW]]
-; CHECK-NOT: @objc
-; CHECK: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1(i8* %tmp) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test1, but there's no copy_on_escape metadata, so there's no
-; retainBlock+release optimization possible. But we can still eliminate
-; the outer retain+release.
-
-; CHECK: define void @test1_no_metadata(i8* %tmp) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1_no_metadata(i8* %tmp) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Same as test1, but the pointer escapes, so there's no
-; retainBlock+release optimization possible. But we can still eliminate
-; the outer retain+release
-
-; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
-; CHECK-NEXT: store i8* %tmp2, i8** %z
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
-; CHECK-NOT: @objc
-; CHECK: }
-define void @test1_escape(i8* %tmp, i8** %z) {
-entry:
-  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
-  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-  store i8* %tmp2, i8** %z
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @use_pointer(i8* %tmp2)
-  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
-  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index 165829f7c01f..3a2bd03692e1 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -28,7 +28,7 @@ entry:
 ; Properly create the @objc_retain declaration when it doesn't already exist.
 ; rdar://9825114
 
-; CHECK: @test1(
+; CHECK-LABEL: @test1(
 ; CHECK: @objc_retain(
 ; CHECK: @objc_retainAutoreleasedReturnValue(
 ; CHECK: @objc_release(
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index e857c9f41bb4..85a16127c6d9 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -26,7 +26,7 @@ declare i8* @returner()
 ; retain is an objc_retainAutoreleasedReturnValue, since it's
 ; better to do the RV optimization.
 
-; CHECK:      define void @test0(
+; CHECK-LABEL:      define void @test0(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %x = call i8* @returner
 ; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) [[NUW:#[0-9]+]]
@@ -54,7 +54,7 @@ return:
 
 ; Delete no-ops.
 
-; CHECK: define void @test2
+; CHECK-LABEL: define void @test2(
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test2() {
@@ -67,7 +67,7 @@ define void @test2() {
 ; Delete a redundant retainRV,autoreleaseRV when forwaring a call result
 ; directly to a return value.
 
-; CHECK: define i8* @test3
+; CHECK-LABEL: define i8* @test3(
 ; CHECK: call i8* @returner()
 ; CHECK-NEXT: ret i8* %call
 define i8* @test3() {
@@ -81,7 +81,7 @@ entry:
 ; Delete a redundant retain,autoreleaseRV when forwaring a call result
 ; directly to a return value.
 
-; CHECK: define i8* @test4
+; CHECK-LABEL: define i8* @test4(
 ; CHECK: call i8* @returner()
 ; CHECK-NEXT: ret i8* %call
 define i8* @test4() {
@@ -114,7 +114,7 @@ entry:
 ; into objc_retainAutoreleasedReturnValueAutoreleaseReturnValue?
 ; Those entrypoints don't exist yet though.
 
-; CHECK: define i8* @test7(
+; CHECK-LABEL: define i8* @test7(
 ; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
 ; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %p)
 define i8* @test7() {
@@ -125,7 +125,7 @@ define i8* @test7() {
   ret i8* %t
 }
 
-; CHECK: define i8* @test7b(
+; CHECK-LABEL: define i8* @test7b(
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %p)
 define i8* @test7b() {
@@ -188,7 +188,7 @@ define i8* @test12(i8* %p) {
 
 ; Don't zap the objc_retainAutoreleasedReturnValue.
 
-; CHECK: define i8* @test13(
+; CHECK-LABEL: define i8* @test13(
 ; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
 ; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK: ret i8* %p
@@ -203,7 +203,7 @@ define i8* @test13() {
 ; Convert objc_retainAutoreleasedReturnValue to objc_retain if its
 ; argument is not a return value.
 
-; CHECK: define void @test14(
+; CHECK-LABEL: define void @test14(
 ; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test14(i8* %p) {
@@ -214,7 +214,7 @@ define void @test14(i8* %p) {
 ; Don't convert objc_retainAutoreleasedReturnValue to objc_retain if its
 ; argument is a return value.
 
-; CHECK: define void @test15(
+; CHECK-LABEL: define void @test15(
 ; CHECK-NEXT: %y = call i8* @returner()
 ; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
@@ -272,7 +272,7 @@ define i8* @test22(i8* %p) {
 
 ; Convert autoreleaseRV to autorelease.
 
-; CHECK: define void @test23(
+; CHECK-LABEL: define void @test23(
 ; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test23(i8* %p) {
   store i8 0, i8* %p
@@ -283,7 +283,7 @@ define void @test23(i8* %p) {
 ; Don't convert autoreleaseRV to autorelease if the result is returned,
 ; even through a bitcast.
 
-; CHECK: define {}* @test24(
+; CHECK-LABEL: define {}* @test24(
 ; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p)
 define {}* @test24(i8* %p) {
   %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
diff --git a/test/Transforms/ObjCARC/split-backedge.ll b/test/Transforms/ObjCARC/split-backedge.ll
index 5ac278a45d50..1b7cf441a685 100644
--- a/test/Transforms/ObjCARC/split-backedge.ll
+++ b/test/Transforms/ObjCARC/split-backedge.ll
@@ -3,7 +3,7 @@
 ; Handle a retain+release pair entirely contained within a split loop backedge.
 ; rdar://11256239
 
-; CHECK: define void @test0
+; CHECK-LABEL: define void @test0(
 ; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
 ; CHECK: call i8* @objc_retain(i8* %call) [[NUW]]
 ; CHECK: call i8* @objc_retain(i8* %cond) [[NUW]]
diff --git a/test/Transforms/ObjCARC/weak.ll b/test/Transforms/ObjCARC/weak.ll
index 85a290c0981c..119aa8257866 100644
--- a/test/Transforms/ObjCARC/weak.ll
+++ b/test/Transforms/ObjCARC/weak.ll
@@ -10,7 +10,7 @@ declare void @objc_copyWeak(i8**, i8**)
 
 ; If the pointer-to-weak-pointer is null, it's undefined behavior.
 
-; CHECK: define void @test0(
+; CHECK-LABEL: define void @test0(
 ; CHECK: store i8* undef, i8** null
 ; CHECK: store i8* undef, i8** null
 ; CHECK: store i8* undef, i8** null
diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll
index 58b762a7af49..cf95363e655a 100644
--- a/test/Transforms/PhaseOrdering/PR6627.ll
+++ b/test/Transforms/PhaseOrdering/PR6627.ll
@@ -42,7 +42,7 @@ if.then:                                          ; preds = %land.lhs.true17
 if.end:
   ret void
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %x1 = load i32* %xx, align 4
 ; CHECK-NEXT: icmp eq i32 %x1, 1179403647
 ; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
@@ -86,7 +86,7 @@ if.then:                                          ; preds = %land.lhs.true17
 if.end:
   ret void
 
-; CHECK: @test2a
+; CHECK-LABEL: @test2a(
 ; CHECK: %x1 = load i32* {{.*}}, align 4
 ; CHECK-NEXT: icmp eq i32 %x1, 1179403647
 ; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
index 8fbe8c58f451..2deefa618109 100644
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -19,7 +19,7 @@ define void @test1() nounwind ssp {
   call void @free(i8* %tmp1)
   ret void
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: ret void
 }
 
@@ -44,7 +44,7 @@ entry:
   %sub = sub i32 %0, %mul
   ret i32 %sub
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %div = lshr i32 %a, 2
 ; CHECK: %add = shl nuw nsw i32 %div, 1
 ; CHECK: ret i32 0
diff --git a/test/Transforms/PhaseOrdering/lit.local.cfg b/test/Transforms/PhaseOrdering/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/PhaseOrdering/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/PruneEH/lit.local.cfg b/test/Transforms/PruneEH/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/PruneEH/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll b/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
index 2f5a53e0ce46..c563fe26c134 100644
--- a/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
+++ b/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
@@ -5,7 +5,7 @@
 ; Transform disabled until PR13021 is fixed.
 
 define i64 @f(i64 %x0) {
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK-NEXT: mul i64 %x0, 208
 ; CHECK-NEXT: add i64 %{{.*}}, 1617
 ; CHECK-NEXT: ret i64
diff --git a/test/Transforms/Reassociate/absorption.ll b/test/Transforms/Reassociate/absorption.ll
index 2ccc2b579496..40b3d80eee9d 100644
--- a/test/Transforms/Reassociate/absorption.ll
+++ b/test/Transforms/Reassociate/absorption.ll
@@ -6,6 +6,6 @@ define i8 @foo(i8 %x) {
   %tmp1 = or i8 %x, 127
   %tmp2 = or i8 %tmp1, 128
   ret i8 %tmp2
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: ret i8 -1
 }
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index 086474066c56..fda0ca6be1aa 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -6,7 +6,7 @@ define i32 @test1(i32 %arg) {
 	%tmp1 = sub i32 -12, %arg
 	%tmp2 = add i32 %tmp1, 12
 	ret i32 %tmp2
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: sub i32 0, %arg
 ; CHECK-NEXT: ret i32
 }
@@ -16,7 +16,7 @@ define i32 @test2(i32 %reg109, i32 %reg1111) {
 	%reg116 = add i32 %reg115, %reg1111		; <i32> [#uses=1]
 	%reg117 = add i32 %reg116, 30		; <i32> [#uses=1]
 	ret i32 %reg117
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: add i32 %reg1111, %reg109
 ; CHECK-NEXT: ret i32
 }
@@ -40,7 +40,7 @@ define void @test3() {
         ; f = (a+c)+b
 	store i32 %t4, i32* @f
 	ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -60,7 +60,7 @@ define void @test4() {
         ; f = (c+a)+b
 	store i32 %t4, i32* @f
 	ret void
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -80,7 +80,7 @@ define void @test5() {
         ; f = (c+a)+b
 	store i32 %t4, i32* @f
 	ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: add i32
 ; CHECK: add i32
 ; CHECK-NOT: add i32
@@ -102,7 +102,7 @@ define i32 @test6() {
 	; X ^ X = 0
         %RV = xor i32 %tmp.5, %tmp.11
 	ret i32 %RV
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: ret i32 0
 }
 
@@ -115,7 +115,7 @@ define i32 @test7(i32 %A, i32 %B, i32 %C) {
 	%aac = mul i32 %ac, %A
 	%r = add i32 %aab, %aac
 	ret i32 %r
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT: add i32 %C, %B
 ; CHECK-NEXT: mul i32 
 ; CHECK-NEXT: mul i32 
@@ -129,7 +129,7 @@ define i32 @test8(i32 %X, i32 %Y, i32 %Z) {
         ; (-X)*Y + Z -> Z-X*Y
 	%C = add i32 %B, %Z
 	ret i32 %C
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NEXT: %A = mul i32 %Y, %X
 ; CHECK-NEXT: %C = sub i32 %Z, %A
 ; CHECK-NEXT: ret i32 %C
@@ -141,7 +141,7 @@ define i32 @test9(i32 %X) {
   %Y = mul i32 %X, 47
   %Z = add i32 %Y, %Y
   ret i32 %Z
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NEXT: mul i32 %X, 94
 ; CHECK-NEXT: ret i32
 }
@@ -150,7 +150,7 @@ define i32 @test10(i32 %X) {
   %Y = add i32 %X ,%X
   %Z = add i32 %Y, %X
   ret i32 %Z
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK-NEXT: mul i32 %X, 3
 ; CHECK-NEXT: ret i32
 }
@@ -160,7 +160,7 @@ define i32 @test11(i32 %W) {
   %Y = add i32 %X ,%X
   %Z = add i32 %Y, %X
   ret i32 %Z
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NEXT: mul i32 %W, 381
 ; CHECK-NEXT: ret i32
 }
@@ -173,7 +173,7 @@ define i32 @test12(i32 %X) {
   %Y = add i32 %A ,%B
   %Z = add i32 %Y, %C
   ret i32 %Z
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NEXT: mul i32 %X, -3
 ; CHECK-NEXT: add i32{{.*}}, 6
 ; CHECK-NEXT: ret i32
@@ -185,7 +185,7 @@ define i32 @test13(i32 %X1, i32 %X2, i32 %X3) {
   %C = mul i32 %X1, %X3  ; X1*X3
   %D = add i32 %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
   ret i32 %D
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK-NEXT: sub i32 %X3, %X2
 ; CHECK-NEXT: mul i32 {{.*}}, %X1
 ; CHECK-NEXT: ret i32
@@ -197,7 +197,7 @@ define i32 @test14(i32 %X1, i32 %X2) {
   %C = mul i32 %X2, -47  ; X2*-47
   %D = add i32 %B, %C    ; X1*47 + X2*-47 -> 47*(X1-X2)
   ret i32 %D
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NEXT: sub i32 %X1, %X2
 ; CHECK-NEXT: mul i32 {{.*}}, 47
 ; CHECK-NEXT: ret i32
@@ -210,7 +210,7 @@ define i32 @test15(i32 %X1, i32 %X2, i32 %X3) {
   %C = and i1 %A, %B
   %D = select i1 %C, i32 %X1, i32 0
   ret i32 %D
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK: and i1 %A, %B
 }
 
diff --git a/test/Transforms/Reassociate/inverses.ll b/test/Transforms/Reassociate/inverses.ll
index 34abdc7aae0d..afe076caea92 100644
--- a/test/Transforms/Reassociate/inverses.ll
+++ b/test/Transforms/Reassociate/inverses.ll
@@ -6,7 +6,7 @@ define i32 @test1(i32 %a, i32 %b) {
         ; (A&B)&~A == 0
 	%tmp.5 = and i32 %tmp.2, %tmp.4
 	ret i32 %tmp.5
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
 }
 
@@ -17,7 +17,7 @@ define i32 @test2(i32 %a, i32 %b) {
 	; A&~A == 0
         %tmp.5 = and i32 %tmp.2, %tmp.4
 	ret i32 %tmp.5
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 0
 }
 
@@ -28,7 +28,7 @@ define i32 @test3(i32 %b, i32 %a) {
         ; (b+(a+1234))+-a -> b+1234
   	%tmp.5 = add i32 %tmp.2, %tmp.4
 	ret i32 %tmp.5
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: %tmp.5 = add i32 %b, 1234
 ; CHECK: ret i32 %tmp.5
 }
diff --git a/test/Transforms/Reassociate/lit.local.cfg b/test/Transforms/Reassociate/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/Reassociate/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/mulfactor.ll b/test/Transforms/Reassociate/mulfactor.ll
index 6c099b43b363..951228ec3cc2 100644
--- a/test/Transforms/Reassociate/mulfactor.ll
+++ b/test/Transforms/Reassociate/mulfactor.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -reassociate -S | FileCheck %s
 
 define i32 @test1(i32 %a, i32 %b) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: mul i32 %a, %a
 ; CHECK-NEXT: mul i32 %a, 2
 ; CHECK-NEXT: add
@@ -20,7 +20,7 @@ entry:
 }
 
 define i32 @test2(i32 %t) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: mul
 ; CHECK-NEXT: add
 ; CHECK-NEXT: ret
@@ -35,7 +35,7 @@ entry:
 
 define i32 @test3(i32 %x) {
 ; (x^8)
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -54,7 +54,7 @@ entry:
 
 define i32 @test4(i32 %x) {
 ; (x^7)
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -73,7 +73,7 @@ entry:
 
 define i32 @test5(i32 %x, i32 %y) {
 ; (x^4) * (y^2)
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -90,7 +90,7 @@ entry:
 
 define i32 @test6(i32 %x, i32 %y, i32 %z) {
 ; (x^5) * (y^3) * z
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -113,7 +113,7 @@ entry:
 
 define i32 @test7(i32 %x, i32 %y, i32 %z) {
 ; (x^4) * (y^3) * (z^2)
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index 7466d2e99d89..d79464753f12 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -2,7 +2,7 @@
 
 define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
 ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
-; CHECK: @multistep1
+; CHECK-LABEL: @multistep1(
   %t0 = mul i64 %a, %b
   %t1 = mul i64 %a, %t0 ; a*(a*b)
   %t2 = mul i64 %a, %c
@@ -17,7 +17,7 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
 
 define i64 @multistep2(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; Check that a*b+a*c+d is turned into a*(b+c)+d.
-; CHECK: @multistep2
+; CHECK-LABEL: @multistep2(
   %t0 = mul i64 %a, %b
   %t1 = mul i64 %a, %c
   %t2 = add i64 %t1, %d ; a*c+d
diff --git a/test/Transforms/Reassociate/no-op.ll b/test/Transforms/Reassociate/no-op.ll
index 0444cf082d0d..7b02df99464b 100644
--- a/test/Transforms/Reassociate/no-op.ll
+++ b/test/Transforms/Reassociate/no-op.ll
@@ -8,7 +8,7 @@ declare void @use(i32)
 define void @test1(i32 %a, i32 %b) {
 ; Shouldn't change or move any of the add instructions.  Should commute but
 ; otherwise not change or move any of the mul instructions.
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   %a0 = add nsw i32 %a, 1
 ; CHECK-NEXT: %a0 = add nsw i32 %a, 1
   %m0 = mul nsw i32 3, %a
@@ -25,7 +25,7 @@ define void @test1(i32 %a, i32 %b) {
 
 define void @test2(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; The initial add doesn't change so should not lose the nsw flag.
-; CHECK: @test2
+; CHECK-LABEL: @test2(
   %a0 = add nsw i32 %b, %a
 ; CHECK-NEXT: %a0 = add nsw i32 %b, %a
   %a1 = add nsw i32 %a0, %d
diff --git a/test/Transforms/Reassociate/optional-flags.ll b/test/Transforms/Reassociate/optional-flags.ll
index 40f7d5bf5b80..bf599be78bc9 100644
--- a/test/Transforms/Reassociate/optional-flags.ll
+++ b/test/Transforms/Reassociate/optional-flags.ll
@@ -3,7 +3,7 @@
 
 ; Reassociate should clear optional flags like nsw when reassociating.
 
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK: %y = add i64 %b, %a
 ; CHECK: %z = add i64 %y, %c
 define i64 @test0(i64 %a, i64 %b, i64 %c) {
@@ -12,7 +12,7 @@ define i64 @test0(i64 %a, i64 %b, i64 %c) {
   ret i64 %z
 }
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %y = add i64 %b, %a
 ; CHECK: %z = add i64 %y, %c
 define i64 @test1(i64 %a, i64 %b, i64 %c) {
diff --git a/test/Transforms/Reassociate/repeats.ll b/test/Transforms/Reassociate/repeats.ll
index 6a020470f379..547cb0ad72c1 100644
--- a/test/Transforms/Reassociate/repeats.ll
+++ b/test/Transforms/Reassociate/repeats.ll
@@ -3,14 +3,14 @@
 ; Tests involving repeated operations on the same value.
 
 define i8 @nilpotent(i8 %x) {
-; CHECK: @nilpotent
+; CHECK-LABEL: @nilpotent(
   %tmp = xor i8 %x, %x
   ret i8 %tmp
 ; CHECK: ret i8 0
 }
 
 define i2 @idempotent(i2 %x) {
-; CHECK: @idempotent
+; CHECK-LABEL: @idempotent(
   %tmp1 = and i2 %x, %x
   %tmp2 = and i2 %tmp1, %x
   %tmp3 = and i2 %tmp2, %x
@@ -19,7 +19,7 @@ define i2 @idempotent(i2 %x) {
 }
 
 define i2 @add(i2 %x) {
-; CHECK: @add
+; CHECK-LABEL: @add(
   %tmp1 = add i2 %x, %x
   %tmp2 = add i2 %tmp1, %x
   %tmp3 = add i2 %tmp2, %x
@@ -28,7 +28,7 @@ define i2 @add(i2 %x) {
 }
 
 define i2 @cst_add() {
-; CHECK: @cst_add
+; CHECK-LABEL: @cst_add(
   %tmp1 = add i2 1, 1
   %tmp2 = add i2 %tmp1, 1
   ret i2 %tmp2
@@ -36,7 +36,7 @@ define i2 @cst_add() {
 }
 
 define i8 @cst_mul() {
-; CHECK: @cst_mul
+; CHECK-LABEL: @cst_mul(
   %tmp1 = mul i8 3, 3
   %tmp2 = mul i8 %tmp1, 3
   %tmp3 = mul i8 %tmp2, 3
@@ -47,7 +47,7 @@ define i8 @cst_mul() {
 
 define i3 @foo3x5(i3 %x) {
 ; Can be done with two multiplies.
-; CHECK: @foo3x5
+; CHECK-LABEL: @foo3x5(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: ret
@@ -60,7 +60,7 @@ define i3 @foo3x5(i3 %x) {
 
 define i3 @foo3x6(i3 %x) {
 ; Can be done with two multiplies.
-; CHECK: @foo3x6
+; CHECK-LABEL: @foo3x6(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: ret
@@ -74,7 +74,7 @@ define i3 @foo3x6(i3 %x) {
 
 define i3 @foo3x7(i3 %x) {
 ; Can be done with two multiplies.
-; CHECK: @foo3x7
+; CHECK-LABEL: @foo3x7(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: ret
@@ -89,7 +89,7 @@ define i3 @foo3x7(i3 %x) {
 
 define i4 @foo4x8(i4 %x) {
 ; Can be done with two multiplies.
-; CHECK: @foo4x8
+; CHECK-LABEL: @foo4x8(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: ret
@@ -105,7 +105,7 @@ define i4 @foo4x8(i4 %x) {
 
 define i4 @foo4x9(i4 %x) {
 ; Can be done with three multiplies.
-; CHECK: @foo4x9
+; CHECK-LABEL: @foo4x9(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -123,7 +123,7 @@ define i4 @foo4x9(i4 %x) {
 
 define i4 @foo4x10(i4 %x) {
 ; Can be done with three multiplies.
-; CHECK: @foo4x10
+; CHECK-LABEL: @foo4x10(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -142,7 +142,7 @@ define i4 @foo4x10(i4 %x) {
 
 define i4 @foo4x11(i4 %x) {
 ; Can be done with four multiplies.
-; CHECK: @foo4x11
+; CHECK-LABEL: @foo4x11(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -163,7 +163,7 @@ define i4 @foo4x11(i4 %x) {
 
 define i4 @foo4x12(i4 %x) {
 ; Can be done with two multiplies.
-; CHECK: @foo4x12
+; CHECK-LABEL: @foo4x12(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: ret
@@ -183,7 +183,7 @@ define i4 @foo4x12(i4 %x) {
 
 define i4 @foo4x13(i4 %x) {
 ; Can be done with three multiplies.
-; CHECK: @foo4x13
+; CHECK-LABEL: @foo4x13(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -205,7 +205,7 @@ define i4 @foo4x13(i4 %x) {
 
 define i4 @foo4x14(i4 %x) {
 ; Can be done with three multiplies.
-; CHECK: @foo4x14
+; CHECK-LABEL: @foo4x14(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
@@ -228,7 +228,7 @@ define i4 @foo4x14(i4 %x) {
 
 define i4 @foo4x15(i4 %x) {
 ; Can be done with four multiplies.
-; CHECK: @foo4x15
+; CHECK-LABEL: @foo4x15(
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
 ; CHECK-NEXT: mul
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index b9353c7f81fe..a22689805fb5 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -14,7 +14,7 @@ define i32 @xor1(i32 %x) {
   %xor = xor i32 %or, %or1
   ret i32 %xor
 
-;CHECK: @xor1
+;CHECK-LABEL: @xor1(
 ;CHECK: %and.ra = and i32 %x, 435
 ;CHECK: %xor = xor i32 %and.ra, 435
 }
@@ -28,7 +28,7 @@ define i32 @xor2(i32 %x, i32 %y) {
   %xor2 = xor i32 %xor, %and1
   ret i32 %xor2
 
-;CHECK: @xor2
+;CHECK-LABEL: @xor2(
 ;CHECK: %and.ra = and i32 %x, 435
 ;CHECK: %xor2 = xor i32 %and.ra, %y
 }
@@ -42,7 +42,7 @@ define i32 @xor3(i32 %x, i32 %y) {
   %xor1 = xor i32 %xor, %and
   ret i32 %xor1
 
-;CHECK: @xor3
+;CHECK-LABEL: @xor3(
 ;CHECK: %and.ra = and i32 %x, -436
 ;CHECK: %xor = xor i32 %y, 123
 ;CHECK: %xor1 = xor i32 %xor, %and.ra
@@ -54,7 +54,7 @@ define i32 @xor4(i32 %x, i32 %y) {
   %xor = xor i32 %y, 435
   %xor1 = xor i32 %xor, %and
   ret i32 %xor1
-; CHECK: @xor4
+; CHECK-LABEL: @xor4(
 ; CHECK: %and = and i32 %x, -124
 ; CHECK: %xor = xor i32 %y, 435
 ; CHECK: %xor1 = xor i32 %xor, %and
@@ -74,7 +74,7 @@ define i32 @xor_special1(i32 %x, i32 %y) {
   %and = and i32 %x, -124
   %xor1 = xor i32 %xor, %and
   ret i32 %xor1
-; CHECK: @xor_special1
+; CHECK-LABEL: @xor_special1(
 ; CHECK: %xor1 = xor i32 %y, 123
 ; CHECK: ret i32 %xor1
 }
@@ -87,7 +87,7 @@ define i32 @xor_special2(i32 %x, i32 %y) {
   %and = and i32 %x, 123
   %xor1 = xor i32 %xor, %and
   ret i32 %xor1
-; CHECK: @xor_special2
+; CHECK-LABEL: @xor_special2(
 ; CHECK: %xor = xor i32 %y, 123
 ; CHECK: %xor1 = xor i32 %xor, %x
 ; CHECK: ret i32 %xor1
@@ -99,7 +99,7 @@ define i32 @xor_special3(i32 %x) {
   %or1 = or i32 %x, 123
   %xor = xor i32 %or, %or1
   ret i32 %xor
-;CHECK: @xor_special3
+;CHECK-LABEL: @xor_special3(
 ;CHECK: ret i32 0
 }
 
@@ -109,7 +109,7 @@ define i32 @xor_special4(i32 %x) {
   %or1 = and i32 123, %x
   %xor = xor i32 %or, %or1
   ret i32 %xor
-;CHECK: @xor_special4
+;CHECK-LABEL: @xor_special4(
 ;CHECK: ret i32 0
 }
 
@@ -129,7 +129,7 @@ define i32 @xor_ra_size1(i32 %x) {
 
   %add = add i32 %xor, %or
   ret i32 %add
-;CHECK: @xor_ra_size1
+;CHECK-LABEL: @xor_ra_size1(
 ;CHECK: %xor = xor i32 %and.ra, 435
 }
 
@@ -145,7 +145,7 @@ define i32 @xor_ra_size2(i32 %x) {
   %add2 = add i32 %add, %or1
   ret i32 %add2
 
-;CHECK: @xor_ra_size2
+;CHECK-LABEL: @xor_ra_size2(
 ;CHECK: %or1 = or i32 %x, 456
 ;CHECK: %xor = xor i32 %or, %or1
 }
@@ -188,6 +188,6 @@ define i32 @xor_bug2(i32, i32, i32, i32) {
   %19 = add i32 %18, %12
   %20 = add i32 %19, %15
   ret i32 %20
-;CHECK: @xor_bug2
+;CHECK-LABEL: @xor_bug2(
 ;CHECK: xor i32 %5, 891034567
 }
diff --git a/test/Transforms/Reg2Mem/lit.local.cfg b/test/Transforms/Reg2Mem/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/Reg2Mem/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/atomic-load-store.ll b/test/Transforms/SCCP/atomic-load-store.ll
index 09061f0a6fd5..53e4c10a720a 100644
--- a/test/Transforms/SCCP/atomic-load-store.ll
+++ b/test/Transforms/SCCP/atomic-load-store.ll
@@ -16,7 +16,7 @@ F:
 	store atomic i32 123, i32* @G seq_cst, align 4
 	ret i32 0
 }
-; CHECK: define i32 @test1
+; CHECK-LABEL: define i32 @test1(
 ; CHECK-NOT: store
 ; CHECK: ret i32 17
 
@@ -25,6 +25,6 @@ define i32 @test2() {
 	ret i32 %V
 }
 
-; CHECK: define i32 @test2
+; CHECK-LABEL: define i32 @test2(
 ; CHECK-NOT: load
 ; CHECK: ret i32 222
diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll
index b49da97ab2c0..ca586a009b33 100644
--- a/test/Transforms/SCCP/ipsccp-addr-taken.ll
+++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 define internal i32 @foo() nounwind noinline ssp {
 entry:
   ret i32 0
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: entry:
 ; CHECK: ret i32 0
 }
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index 8340f0c1e127..c1c6c926fd9b 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -6,14 +6,14 @@ define internal i32 @test1a(i32 %A) {
 	%X = add i32 1, 2
 	ret i32 %A
 }
-; CHECK: define internal i32 @test1a
+; CHECK-LABEL: define internal i32 @test1a(
 ; CHECK: ret i32 undef
 
 define i32 @test1b() {
 	%X = call i32 @test1a( i32 17 )
 	ret i32 %X
 
-; CHECK: define i32 @test1b
+; CHECK-LABEL: define i32 @test1b(
 ; CHECK: ret i32 17
 }
 
@@ -31,7 +31,7 @@ F:
 	%C.upgrd.1 = call i32 @test2a(i32 1)
 	ret i32 %C.upgrd.1
 }
-; CHECK: define internal i32 @test2a
+; CHECK-LABEL: define internal i32 @test2a(
 ; CHECK-NEXT: br label %T
 ; CHECK: ret i32 undef
 
@@ -40,7 +40,7 @@ define i32 @test2b() {
 	%X = call i32 @test2a(i32 0)
 	ret i32 %X
 }
-; CHECK: define i32 @test2b
+; CHECK-LABEL: define i32 @test2b(
 ; CHECK-NEXT: %X = call i32 @test2a(i32 0)
 ; CHECK-NEXT: ret i32 0
 
@@ -54,7 +54,7 @@ define void @test3a() {
 	store i32 %X, i32* @G
 	ret void
 }
-; CHECK: define void @test3a
+; CHECK-LABEL: define void @test3a(
 ; CHECK-NEXT: ret void
 
 
@@ -69,7 +69,7 @@ F:
 	store i32 123, i32* @G
 	ret i32 0
 }
-; CHECK: define i32 @test3b
+; CHECK-LABEL: define i32 @test3b(
 ; CHECK-NOT: store
 ; CHECK: ret i32 0
 
@@ -102,7 +102,7 @@ B:
 define internal i64 @test4c(i64 %a) {
   ret i64 %a
 }
-; CHECK: define internal i64 @test4c
+; CHECK-LABEL: define internal i64 @test4c(
 ; CHECK: ret i64 undef
 
 
@@ -149,7 +149,7 @@ define i64 @test6b() {
   %a = call i64 @test6a()
   ret i64 %a
 }
-; CHECK: define i64 @test6b
+; CHECK-LABEL: define i64 @test6b(
 ; CHECK: ret i64 0
 
 ;;======================== test7
@@ -162,7 +162,7 @@ define internal %T @test7a(i32 %A) {
   %mrv0 = insertvalue %T undef, i32 %X, 0
   %mrv1 = insertvalue %T %mrv0, i32 %A, 1
   ret %T %mrv1
-; CHECK: @test7a
+; CHECK-LABEL: @test7a(
 ; CHECK-NEXT: %mrv0 = insertvalue %T undef, i32 18, 0
 ; CHECK-NEXT: %mrv1 = insertvalue %T %mrv0, i32 17, 1
 }
@@ -172,7 +172,7 @@ define i32 @test7b() {
         %Y = extractvalue %T %X, 0
 	%Z = add i32 %Y, %Y
 	ret i32 %Z
-; CHECK: define i32 @test7b
+; CHECK-LABEL: define i32 @test7b(
 ; CHECK-NEXT: call %T @test7a(i32 17)
 ; CHECK-NEXT: ret i32 36
 }
@@ -183,7 +183,7 @@ define i32 @test7b() {
 define internal {} @test8a(i32 %A, i32* %P) {
   store i32 %A, i32* %P
   ret {} {}
-; CHECK: @test8a
+; CHECK-LABEL: @test8a(
 ; CHECK-NEXT: store i32 5, 
 ; CHECK-NEXT: ret 
 }
@@ -191,7 +191,7 @@ define internal {} @test8a(i32 %A, i32* %P) {
 define void @test8b(i32* %P) {
     %X = call {} @test8a(i32 5, i32* %P)
     ret void
-; CHECK: define void @test8b
+; CHECK-LABEL: define void @test8b(
 ; CHECK-NEXT: call {} @test8a
 ; CHECK-NEXT: ret void
 }
@@ -216,7 +216,7 @@ define i32 @test10a() nounwind {
 entry:
   %call = call i32 @test10b(i32 undef)
   ret i32 %call
-; CHECK: define i32 @test10a
+; CHECK-LABEL: define i32 @test10a(
 ; CHECK: ret i32 0
 }
 
@@ -224,6 +224,6 @@ define internal i32 @test10b(i32 %x) nounwind {
 entry:
   %r = and i32 %x, 1
   ret i32 %r
-; CHECK: define internal i32 @test10b
+; CHECK-LABEL: define internal i32 @test10b(
 ; CHECK: ret i32 undef
 }
diff --git a/test/Transforms/SCCP/lit.local.cfg b/test/Transforms/SCCP/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/SCCP/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/sccptest.ll b/test/Transforms/SCCP/sccptest.ll
index a719f6cfb48d..5cc5087b1010 100644
--- a/test/Transforms/SCCP/sccptest.ll
+++ b/test/Transforms/SCCP/sccptest.ll
@@ -14,7 +14,7 @@ BB3:		; preds = %BB2, %BB1
 	%Ret = phi i32 [ %Val, %BB1 ], [ 1, %BB2 ]		; <i32> [#uses=1]
 	ret i32 %Ret
         
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %Ret = phi i32 [ 0, %BB1 ], [ 1, %BB2 ]
 }
 
@@ -22,7 +22,7 @@ BB3:		; preds = %BB2, %BB1
 ; that SCCP gets right.
 ;
 define i32 @test2(i32 %i0, i32 %j0) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 BB1:
 	br label %BB2
 BB2:
diff --git a/test/Transforms/SCCP/switch.ll b/test/Transforms/SCCP/switch.ll
index 9f934237e619..155faa5c6067 100644
--- a/test/Transforms/SCCP/switch.ll
+++ b/test/Transforms/SCCP/switch.ll
@@ -4,7 +4,7 @@
 ; with no cases.
 declare void @foo()
 define void @test1() {
-; CHECK: define void @test1
+; CHECK-LABEL: define void @test1(
 ; CHECK: call void @foo()
   switch i32 undef, label %d []
 d:
diff --git a/test/Transforms/SCCP/undef-resolve.ll b/test/Transforms/SCCP/undef-resolve.ll
index a1a600c9607a..2b40183c2cc5 100644
--- a/test/Transforms/SCCP/undef-resolve.ll
+++ b/test/Transforms/SCCP/undef-resolve.ll
@@ -5,7 +5,7 @@
 define double @test1() {
   %t = sitofp i32 undef to double
   ret double %t
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: ret double 0.0
 }
 
@@ -13,7 +13,7 @@ define double @test1() {
 ; rdar://7832370
 ; Check that lots of stuff doesn't get turned into undef.
 define i32 @test2() nounwind readnone ssp {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 init:
   br label %control.outer.outer
 
@@ -110,7 +110,7 @@ bb1:                                              ; preds = %bb1.us-lcssa, %bb1.
 define i32 @test3() {
   %t = xor i32 undef, undef
   ret i32 %t
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: ret i32 0
 }
 
@@ -118,7 +118,7 @@ define i32 @test3() {
 define double @test4(double %x) {
   %t = fadd double %x, undef
   ret double %t
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: fadd double %x, undef
 }
 
@@ -126,7 +126,7 @@ define double @test4(double %x) {
 define i32 @test5() {
   %t = sext i8 undef to i32
   ret i32 %t
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: ret i32 0
 }
 
@@ -134,7 +134,7 @@ define i32 @test5() {
 define i32 @test6() {
   %t = ashr i32 undef, 31
   ret i32 %t
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: ret i32 -1
 }
 
@@ -142,7 +142,7 @@ define i32 @test6() {
 define i32 @test7() {
   %t = lshr i32 undef, 31
   ret i32 %t
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: ret i32 0
 }
 
@@ -150,7 +150,7 @@ define i32 @test7() {
 define i1 @test8() {
   %t = icmp eq i32 undef, -1
   ret i1 %t
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK: ret i1 undef
 }
 
@@ -158,7 +158,7 @@ define i1 @test8() {
 define i1 @test9() {
   %t = icmp ugt i32 undef, -1
   ret i1 %t
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK: icmp ugt
 }
 
@@ -167,6 +167,6 @@ define i64 @test10() {
 entry:
   %e = extractvalue { i64, i64 } undef, 1
   ret i64 %e
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: ret i64 undef
 }
diff --git a/test/Transforms/SLPVectorizer/ARM/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
new file mode 100644
index 000000000000..5fc35d80541d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/ARM/memory.ll b/test/Transforms/SLPVectorizer/ARM/memory.ll
new file mode 100644
index 000000000000..383c808d21cf
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/memory.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+; On swift unaligned <2 x double> stores need 4uops and it is there for cheaper
+; to do this scalar.
+
+; CHECK-LABEL: expensive_double_store
+; CHECK-NOT: load <2 x double>
+; CHECK-NOT: store <2 x double>
+define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
+entry:
+  %0 = load double* %src, align 8
+  store double %0, double* %dst, align 8
+  %arrayidx2 = getelementptr inbounds double* %src, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst, i64 1
+  store double %1, double* %arrayidx3, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/ARM/sroa.ll b/test/Transforms/SLPVectorizer/ARM/sroa.ll
new file mode 100644
index 000000000000..e0c75b147f6f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/ARM/sroa.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -mcpu=swift -mtriple=thumbv7-apple-ios -basicaa -slp-vectorizer < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+%class.Complex = type { double, double }
+
+; Code like this is the result of SROA. Make sure we don't vectorize this
+; because the in the scalar version of this the shl/or are handled by the
+; backend and disappear, the vectorized code stays.
+
+; CHECK-LABEL: SROAed
+; CHECK-NOT: shl <2 x i64>
+; CHECK-NOT: or <2 x i64>
+
+define void @SROAed(%class.Complex* noalias nocapture sret %agg.result, [4 x i32] %a.coerce, [4 x i32] %b.coerce) {
+entry:
+  %a.coerce.fca.0.extract = extractvalue [4 x i32] %a.coerce, 0
+  %a.sroa.0.0.insert.ext = zext i32 %a.coerce.fca.0.extract to i64
+  %a.coerce.fca.1.extract = extractvalue [4 x i32] %a.coerce, 1
+  %a.sroa.0.4.insert.ext = zext i32 %a.coerce.fca.1.extract to i64
+  %a.sroa.0.4.insert.shift = shl nuw i64 %a.sroa.0.4.insert.ext, 32
+  %a.sroa.0.4.insert.insert = or i64 %a.sroa.0.4.insert.shift, %a.sroa.0.0.insert.ext
+  %0 = bitcast i64 %a.sroa.0.4.insert.insert to double
+  %a.coerce.fca.2.extract = extractvalue [4 x i32] %a.coerce, 2
+  %a.sroa.3.8.insert.ext = zext i32 %a.coerce.fca.2.extract to i64
+  %a.coerce.fca.3.extract = extractvalue [4 x i32] %a.coerce, 3
+  %a.sroa.3.12.insert.ext = zext i32 %a.coerce.fca.3.extract to i64
+  %a.sroa.3.12.insert.shift = shl nuw i64 %a.sroa.3.12.insert.ext, 32
+  %a.sroa.3.12.insert.insert = or i64 %a.sroa.3.12.insert.shift, %a.sroa.3.8.insert.ext
+  %1 = bitcast i64 %a.sroa.3.12.insert.insert to double
+  %b.coerce.fca.0.extract = extractvalue [4 x i32] %b.coerce, 0
+  %b.sroa.0.0.insert.ext = zext i32 %b.coerce.fca.0.extract to i64
+  %b.coerce.fca.1.extract = extractvalue [4 x i32] %b.coerce, 1
+  %b.sroa.0.4.insert.ext = zext i32 %b.coerce.fca.1.extract to i64
+  %b.sroa.0.4.insert.shift = shl nuw i64 %b.sroa.0.4.insert.ext, 32
+  %b.sroa.0.4.insert.insert = or i64 %b.sroa.0.4.insert.shift, %b.sroa.0.0.insert.ext
+  %2 = bitcast i64 %b.sroa.0.4.insert.insert to double
+  %b.coerce.fca.2.extract = extractvalue [4 x i32] %b.coerce, 2
+  %b.sroa.3.8.insert.ext = zext i32 %b.coerce.fca.2.extract to i64
+  %b.coerce.fca.3.extract = extractvalue [4 x i32] %b.coerce, 3
+  %b.sroa.3.12.insert.ext = zext i32 %b.coerce.fca.3.extract to i64
+  %b.sroa.3.12.insert.shift = shl nuw i64 %b.sroa.3.12.insert.ext, 32
+  %b.sroa.3.12.insert.insert = or i64 %b.sroa.3.12.insert.shift, %b.sroa.3.8.insert.ext
+  %3 = bitcast i64 %b.sroa.3.12.insert.insert to double
+  %add = fadd double %0, %2
+  %add3 = fadd double %1, %3
+  %re.i.i = getelementptr inbounds %class.Complex* %agg.result, i32 0, i32 0
+  store double %add, double* %re.i.i, align 4
+  %im.i.i = getelementptr inbounds %class.Complex* %agg.result, i32 0, i32 1
+  store double %add3, double* %im.i.i, align 4
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/R600/lit.local.cfg b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
new file mode 100644
index 000000000000..9e0ab99235e0
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'R600' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SLPVectorizer/R600/simplebb.ll b/test/Transforms/SLPVectorizer/R600/simplebb.ll
new file mode 100644
index 000000000000..b6d794b994d4
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/R600/simplebb.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+; Simple 3-pair chain with loads and stores
+define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_3_3_3(
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double addrspace(3)* %a, align 8
+  %i1 = load double addrspace(3)* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double addrspace(3)* %a, i64 1
+  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double addrspace(3)* %b, i64 1
+  %i4 = load double addrspace(3)* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1_as_3_0_0(
+; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
+; CHECK: ret
+  %i0 = load double addrspace(3)* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double addrspace(3)* %a, i64 1
+  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_0_0_3(
+; CHECK: load <2 x double>*
+; CHECK: load <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double addrspace(3)* %c, align 8
+  %arrayidx5 = getelementptr inbounds double addrspace(3)* %c, i64 1
+  store double %mul5, double addrspace(3)* %arrayidx5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/barriercall.ll b/test/Transforms/SLPVectorizer/X86/barriercall.ll
index 04eb8f919bc7..bba285526a4b 100644
--- a/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: store <4 x i32>
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n) {
diff --git a/test/Transforms/SLPVectorizer/X86/cast.ll b/test/Transforms/SLPVectorizer/X86/cast.ll
index 344dbbca2c57..e340fba351a3 100644
--- a/test/Transforms/SLPVectorizer/X86/cast.ll
+++ b/test/Transforms/SLPVectorizer/X86/cast.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;     A[2] = B[2];
 ;     A[3] = B[3];
 ; }
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: load <4 x i8>
 ;CHECK: sext
 ;CHECK: store <4 x i32>
diff --git a/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
new file mode 100644
index 000000000000..0c124a75d417
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; int foo(double * restrict A, double * restrict B, double G) {
+;   A[0] = (B[10] ? G : 1);
+;   A[1] = (B[11] ? G : 1);
+; }
+
+;CHECK-LABEL: @foo(
+;CHECK: load <2 x double>
+;CHECK: fcmp une <2 x double>
+;CHECK: select <2 x i1>
+;CHECK: store <2 x double>
+;CHECK: ret i32 undef
+define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, double %G) {
+entry:
+  %arrayidx = getelementptr inbounds double* %B, i64 10
+  %0 = load double* %arrayidx, align 8
+  %tobool = fcmp une double %0, 0.000000e+00
+  %cond = select i1 %tobool, double %G, double 1.000000e+00
+  store double %cond, double* %A, align 8
+  %arrayidx2 = getelementptr inbounds double* %B, i64 11
+  %1 = load double* %arrayidx2, align 8
+  %tobool3 = fcmp une double %1, 0.000000e+00
+  %cond7 = select i1 %tobool3, double %G, double 1.000000e+00
+  %arrayidx8 = getelementptr inbounds double* %A, i64 1
+  store double %cond7, double* %arrayidx8, align 8
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
index 05f8e616bb8e..9653d18db566 100644
--- a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
+++ b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 @.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
 
-;CHECK: @reduce_compare
+;CHECK-LABEL: @reduce_compare(
 ;CHECK: load <2 x double>
 ;CHECK: fmul <2 x double>
 ;CHECK: fmul <2 x double>
diff --git a/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
new file mode 100644
index 000000000000..51b1c08fb36f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334 = type { %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333, i16*, i8*, i8*, i32, i32, i64, i64, i32, i32, i32, [4 x i32], i32, i32, i32, i32, i32, [20 x i8] }
+%struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333 = type { i32, i32, i32, i32 }
+
+define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p) {
+entry:
+  %range20.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 4
+  %code21.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 5
+  br label %do.body66.i
+
+do.body66.i:                                      ; preds = %do.cond.i, %entry
+  %range.2.i = phi i32 [ %range.4.i, %do.cond.i ], [ undef, %entry ]
+  %code.2.i = phi i32 [ %code.4.i, %do.cond.i ], [ undef, %entry ]
+  %.range.2.i = select i1 undef, i32 undef, i32 %range.2.i
+  %.code.2.i = select i1 undef, i32 undef, i32 %code.2.i
+  br i1 undef, label %do.cond.i, label %if.else.i
+
+if.else.i:                                        ; preds = %do.body66.i
+  %sub91.i = sub i32 %.range.2.i, undef
+  %sub92.i = sub i32 %.code.2.i, undef
+  br label %do.cond.i
+
+do.cond.i:                                        ; preds = %if.else.i, %do.body66.i
+  %range.4.i = phi i32 [ %sub91.i, %if.else.i ], [ undef, %do.body66.i ]
+  %code.4.i = phi i32 [ %sub92.i, %if.else.i ], [ %.code.2.i, %do.body66.i ]
+  br i1 undef, label %do.body66.i, label %do.end1006.i
+
+do.end1006.i:                                     ; preds = %do.cond.i
+  %.range.4.i = select i1 undef, i32 undef, i32 %range.4.i
+  %.code.4.i = select i1 undef, i32 undef, i32 %code.4.i
+  store i32 %.range.4.i, i32* %range20.i, align 4
+  store i32 %.code.4.i, i32* %code21.i, align 4
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
new file mode 100644
index 000000000000..389892115ced
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960" = type { i32, i32 }
+
+define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* nocapture %info) {
+entry:
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.else:                                          ; preds = %entry
+  %m_numConstraintRows4 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 0
+  %nub5 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 1
+  br i1 undef, label %land.lhs.true.i.1, label %if.then7.1
+
+land.lhs.true.i.1:                                ; preds = %if.else
+  br i1 undef, label %for.inc.1, label %if.then7.1
+
+if.then7.1:                                       ; preds = %land.lhs.true.i.1, %if.else
+  %inc.1 = add nsw i32 0, 1
+  store i32 %inc.1, i32* %m_numConstraintRows4, align 4
+  %dec.1 = add nsw i32 6, -1
+  store i32 %dec.1, i32* %nub5, align 4
+  br label %for.inc.1
+
+for.inc.1:                                        ; preds = %if.then7.1, %land.lhs.true.i.1
+  %0 = phi i32 [ %dec.1, %if.then7.1 ], [ 6, %land.lhs.true.i.1 ]
+  %1 = phi i32 [ %inc.1, %if.then7.1 ], [ 0, %land.lhs.true.i.1 ]
+  %inc.2 = add nsw i32 %1, 1
+  store i32 %inc.2, i32* %m_numConstraintRows4, align 4
+  %dec.2 = add nsw i32 %0, -1
+  store i32 %dec.2, i32* %nub5, align 4
+  unreachable
+}
+
+%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332 = type { float, [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, [4 x float], float, float, [4 x float], float, float, [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330] }
+%class.btVector3.5.30.65.90.115.140.175.185.260.280.330 = type { [4 x float] }
+%class.btVector4.7.32.67.92.117.142.177.187.262.282.331 = type { %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 }
+
+define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this) {
+entry:
+  %arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
+  %arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
+  %0 = load float* %arrayidx36, align 4
+  %add587 = fadd float undef, undef
+  %sub600 = fsub float %add587, undef
+  store float %sub600, float* undef, align 4
+  %sub613 = fsub float %add587, %sub600
+  store float %sub613, float* %arrayidx26, align 4
+  %add626 = fadd float %0, undef
+  %sub639 = fsub float %add626, undef
+  %sub652 = fsub float %add626, %sub639
+  store float %sub652, float* %arrayidx36, align 4
+  br i1 undef, label %if.else1609, label %if.then1595
+
+if.then1595:                                      ; preds = %entry
+  br i1 undef, label %return, label %for.body.lr.ph.i.i1702
+
+for.body.lr.ph.i.i1702:                           ; preds = %if.then1595
+  unreachable
+
+if.else1609:                                      ; preds = %entry
+  unreachable
+
+return:                                           ; preds = %if.then1595
+  ret void
+}
+
+define void @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE() {
+entry:
+  %add8.i2343 = fadd float undef, undef
+  %add8.i2381 = fadd float undef, undef
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %return, label %if.end111
+
+if.end111:                                        ; preds = %if.end
+  br i1 undef, label %return, label %if.end136
+
+if.end136:                                        ; preds = %if.end111
+  br i1 undef, label %return, label %if.end162
+
+if.end162:                                        ; preds = %if.end136
+  br i1 undef, label %return, label %if.end189
+
+if.end189:                                        ; preds = %if.end162
+  br i1 undef, label %return, label %if.end216
+
+if.end216:                                        ; preds = %if.end189
+  br i1 undef, label %if.then218, label %if.end225
+
+if.then218:                                       ; preds = %if.end216
+  br label %if.end225
+
+if.end225:                                        ; preds = %if.then218, %if.end216
+  br i1 undef, label %return, label %if.end248
+
+if.end248:                                        ; preds = %if.end225
+  br i1 undef, label %return, label %if.end304
+
+if.end304:                                        ; preds = %if.end248
+  %mul341 = fmul float undef, %add8.i2343
+  %mul344 = fmul float undef, %add8.i2381
+  %sub345 = fsub float %mul341, %mul344
+  br i1 undef, label %return, label %if.end361
+
+if.end361:                                        ; preds = %if.end304
+  %mul364 = fmul float %add8.i2381, %add8.i2381
+  br i1 undef, label %if.then370, label %if.end395
+
+if.then370:                                       ; preds = %if.end361
+  br i1 undef, label %if.then374, label %if.end395
+
+if.then374:                                       ; preds = %if.then370
+  %cmp392 = fcmp olt float %sub345, 0.000000e+00
+  br label %if.end395
+
+if.end395:                                        ; preds = %if.then374, %if.then370, %if.end361
+  unreachable
+
+return:                                           ; preds = %if.end304, %if.end248, %if.end225, %if.end189, %if.end162, %if.end136, %if.end111, %if.end, %entry
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
new file mode 100644
index 000000000000..25c65457946b
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113 = type { [4 x float] }
+
+; Function Attrs: ssp uwtable
+define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices) #0 align 2 {
+entry:
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %if.end22.2, %if.end
+  br i1 undef, label %if.then17.1, label %if.end22.1
+
+for.end36:                                        ; preds = %if.end22.2
+  br label %for.body144
+
+for.body144:                                      ; preds = %for.body144, %for.end36
+  br i1 undef, label %for.end227, label %for.body144
+
+for.end227:                                       ; preds = %for.body144
+  br i1 undef, label %for.end271, label %for.body233
+
+for.body233:                                      ; preds = %for.body233, %for.end227
+  br i1 undef, label %for.body233, label %for.end271
+
+for.end271:                                       ; preds = %for.body233, %for.end227
+  %0 = phi float [ 0x47EFFFFFE0000000, %for.end227 ], [ undef, %for.body233 ]
+  %1 = phi float [ 0x47EFFFFFE0000000, %for.end227 ], [ undef, %for.body233 ]
+  %sub275 = fsub float undef, %1
+  %sub279 = fsub float undef, %0
+  br i1 undef, label %if.then291, label %return
+
+if.then291:                                       ; preds = %for.end271
+  %mul292 = fmul float %sub275, 5.000000e-01
+  %add294 = fadd float %1, %mul292
+  %mul295 = fmul float %sub279, 5.000000e-01
+  %add297 = fadd float %0, %mul295
+  br i1 undef, label %if.end332, label %if.else319
+
+if.else319:                                       ; preds = %if.then291
+  br i1 undef, label %if.then325, label %if.end327
+
+if.then325:                                       ; preds = %if.else319
+  br label %if.end327
+
+if.end327:                                        ; preds = %if.then325, %if.else319
+  br i1 undef, label %if.then329, label %if.end332
+
+if.then329:                                       ; preds = %if.end327
+  br label %if.end332
+
+if.end332:                                        ; preds = %if.then329, %if.end327, %if.then291
+  %dx272.1 = phi float [ %sub275, %if.then329 ], [ %sub275, %if.end327 ], [ 0x3F847AE140000000, %if.then291 ]
+  %dy276.1 = phi float [ undef, %if.then329 ], [ undef, %if.end327 ], [ 0x3F847AE140000000, %if.then291 ]
+  %sub334 = fsub float %add294, %dx272.1
+  %sub338 = fsub float %add297, %dy276.1
+  %arrayidx.i.i606 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 0
+  store float %sub334, float* %arrayidx.i.i606, align 4
+  %arrayidx3.i607 = getelementptr inbounds %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* %vertices, i64 0, i32 0, i64 1
+  store float %sub338, float* %arrayidx3.i607, align 4
+  br label %return
+
+return:                                           ; preds = %if.end332, %for.end271, %entry
+  ret void
+
+if.then17.1:                                      ; preds = %for.body
+  br label %if.end22.1
+
+if.end22.1:                                       ; preds = %if.then17.1, %for.body
+  br i1 undef, label %if.then17.2, label %if.end22.2
+
+if.then17.2:                                      ; preds = %if.end22.1
+  br label %if.end22.2
+
+if.end22.2:                                       ; preds = %if.then17.2, %if.end22.1
+  br i1 undef, label %for.end36, label %for.body
+}
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll b/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
new file mode 100644
index 000000000000..ce0159071c60
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731" = type { double*, double*, double*, double** }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
+entry:
+  %_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
+  %0 = load double** %_M_cur2.i.i, align 8
+  %_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 1
+  %_M_cur2.i.i81 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 0
+  %1 = load double** %_M_cur2.i.i81, align 8
+  %_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 1
+  %2 = load double** %_M_first3.i.i83, align 8
+  br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader
+
+while.cond.i.preheader:                           ; preds = %entry
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.body.i, %while.cond.i.preheader
+  br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i
+
+_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit: ; preds = %while.body.i, %while.cond.i, %entry
+  %3 = phi double* [ %2, %entry ], [ %2, %while.cond.i ], [ undef, %while.body.i ]
+  %4 = phi double* [ %0, %entry ], [ %1, %while.cond.i ], [ undef, %while.body.i ]
+  store double* %4, double** %_M_cur2.i.i, align 8
+  store double* %3, double** %_M_first3.i.i, align 8
+  br i1 undef, label %if.then.i55, label %while.cond
+
+if.then.i55:                                      ; preds = %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %if.then.i55, %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
+  br label %while.cond
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_flop7.ll b/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
new file mode 100644
index 000000000000..e11be488f795
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @main() #0 {
+entry:
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %entry
+  unreachable
+
+while.end:                                        ; preds = %entry
+  br i1 undef, label %for.end80, label %for.body75.lr.ph
+
+for.body75.lr.ph:                                 ; preds = %while.end
+  br label %for.body75
+
+for.body75:                                       ; preds = %for.body75, %for.body75.lr.ph
+  br label %for.body75
+
+for.end80:                                        ; preds = %while.end
+  br i1 undef, label %for.end300, label %for.body267.lr.ph
+
+for.body267.lr.ph:                                ; preds = %for.end80
+  br label %for.body267
+
+for.body267:                                      ; preds = %for.body267, %for.body267.lr.ph
+  %s.71010 = phi double [ 0.000000e+00, %for.body267.lr.ph ], [ %add297, %for.body267 ]
+  %mul269 = fmul double undef, undef
+  %mul270 = fmul double %mul269, %mul269
+  %add282 = fadd double undef, undef
+  %mul283 = fmul double %mul269, %add282
+  %add293 = fadd double undef, undef
+  %mul294 = fmul double %mul270, %add293
+  %add295 = fadd double undef, %mul294
+  %div296 = fdiv double %mul283, %add295
+  %add297 = fadd double %s.71010, %div296
+  br i1 undef, label %for.body267, label %for.end300
+
+for.end300:                                       ; preds = %for.body267, %for.end80
+  unreachable
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
new file mode 100644
index 000000000000..c02e1fa607d5
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -0,0 +1,91 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @RCModelEstimator() {
+entry:
+  br i1 undef, label %for.body.lr.ph, label %for.end.thread
+
+for.end.thread:                                   ; preds = %entry
+  unreachable
+
+for.body.lr.ph:                                   ; preds = %entry
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %for.body.lr.ph
+  br i1 undef, label %for.body3, label %if.end103
+
+for.cond14.preheader:                             ; preds = %for.inc11
+  br i1 undef, label %for.body16.lr.ph, label %if.end103
+
+for.body16.lr.ph:                                 ; preds = %for.cond14.preheader
+  br label %for.body16
+
+for.body3:                                        ; preds = %for.inc11, %for.end
+  br i1 undef, label %if.then7, label %for.inc11
+
+if.then7:                                         ; preds = %for.body3
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %if.then7, %for.body3
+  br i1 false, label %for.cond14.preheader, label %for.body3
+
+for.body16:                                       ; preds = %for.body16, %for.body16.lr.ph
+  br i1 undef, label %for.end39, label %for.body16
+
+for.end39:                                        ; preds = %for.body16
+  br i1 undef, label %if.end103, label %for.cond45.preheader
+
+for.cond45.preheader:                             ; preds = %for.end39
+  br i1 undef, label %if.then88, label %if.else
+
+if.then88:                                        ; preds = %for.cond45.preheader
+  %mul89 = fmul double 0.000000e+00, 0.000000e+00
+  %mul90 = fmul double 0.000000e+00, 0.000000e+00
+  %sub91 = fsub double %mul89, %mul90
+  %div92 = fdiv double %sub91, undef
+  %mul94 = fmul double 0.000000e+00, 0.000000e+00
+  %mul95 = fmul double 0.000000e+00, 0.000000e+00
+  %sub96 = fsub double %mul94, %mul95
+  %div97 = fdiv double %sub96, undef
+  br label %if.end103
+
+if.else:                                          ; preds = %for.cond45.preheader
+  br label %if.end103
+
+if.end103:                                        ; preds = %if.else, %if.then88, %for.end39, %for.cond14.preheader, %for.end
+  %0 = phi double [ 0.000000e+00, %for.end39 ], [ %div97, %if.then88 ], [ 0.000000e+00, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
+  %1 = phi double [ undef, %for.end39 ], [ %div92, %if.then88 ], [ undef, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
+  ret void
+}
+
+
+define void @intrapred_luma() {
+entry:
+  %conv153 = trunc i32 undef to i16
+  %arrayidx154 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 12
+  store i16 %conv153, i16* %arrayidx154, align 8
+  %arrayidx155 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 11
+  store i16 %conv153, i16* %arrayidx155, align 2
+  %arrayidx156 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 10
+  store i16 %conv153, i16* %arrayidx156, align 4
+  ret void
+}
+
+define fastcc void @dct36(double* %inbuf) {
+entry:
+  %arrayidx41 = getelementptr inbounds double* %inbuf, i64 2
+  %arrayidx44 = getelementptr inbounds double* %inbuf, i64 1
+  %0 = load double* %arrayidx44, align 8
+  %add46 = fadd double %0, undef
+  store double %add46, double* %arrayidx41, align 8
+  %1 = load double* %inbuf, align 8
+  %add49 = fadd double %1, %0
+  store double %add49, double* %arrayidx44, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
new file mode 100644
index 000000000000..d6915e2dc5d6
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @main() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.end44, %entry
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %if.then25, %for.body
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.inc21, %for.cond4.preheader
+  br label %for.body12
+
+for.body12:                                       ; preds = %if.end, %for.body6
+  %fZImg.069 = phi double [ undef, %for.body6 ], [ %add19, %if.end ]
+  %fZReal.068 = phi double [ undef, %for.body6 ], [ %add20, %if.end ]
+  %mul13 = fmul double %fZReal.068, %fZReal.068
+  %mul14 = fmul double %fZImg.069, %fZImg.069
+  %add15 = fadd double %mul13, %mul14
+  %cmp16 = fcmp ogt double %add15, 4.000000e+00
+  br i1 %cmp16, label %for.inc21, label %if.end
+
+if.end:                                           ; preds = %for.body12
+  %mul18 = fmul double undef, %fZImg.069
+  %add19 = fadd double undef, %mul18
+  %sub = fsub double %mul13, %mul14
+  %add20 = fadd double undef, %sub
+  br i1 undef, label %for.body12, label %for.inc21
+
+for.inc21:                                        ; preds = %if.end, %for.body12
+  br i1 undef, label %for.end23, label %for.body6
+
+for.end23:                                        ; preds = %for.inc21
+  br i1 undef, label %if.then25, label %if.then26
+
+if.then25:                                        ; preds = %for.end23
+  br i1 undef, label %for.end44, label %for.cond4.preheader
+
+if.then26:                                        ; preds = %for.end23
+  unreachable
+
+for.end44:                                        ; preds = %if.then25
+  br i1 undef, label %for.end48, label %for.body
+
+for.end48:                                        ; preds = %for.end44
+  ret void
+}
+
+%struct.hoge = type { double, double, double}
+
+define void @zot(%struct.hoge* %arg) {
+bb:
+  %tmp = load double* undef, align 8
+  %tmp1 = fsub double %tmp, undef
+  %tmp2 = load double* undef, align 8
+  %tmp3 = fsub double %tmp2, undef
+  %tmp4 = fmul double %tmp3, undef
+  %tmp5 = fmul double %tmp3, undef
+  %tmp6 = fsub double %tmp5, undef
+  %tmp7 = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 1
+  store double %tmp6, double* %tmp7, align 8
+  %tmp8 = fmul double %tmp1, undef
+  %tmp9 = fsub double %tmp8, undef
+  %tmp10 = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 2
+  store double %tmp9, double* %tmp10, align 8
+  br i1 undef, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb
+  br label %bb14
+
+bb12:                                             ; preds = %bb
+  %tmp13 = fmul double undef, %tmp2
+  br label %bb14
+
+bb14:                                             ; preds = %bb12, %bb11
+  ret void
+}
+
+
+%struct.rc4_state.0.24 = type { i32, i32, [256 x i32] }
+
+define void @rc4_crypt(%struct.rc4_state.0.24* nocapture %s) {
+entry:
+  %x1 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 0
+  %y2 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 1
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %x.045 = phi i32 [ %conv4, %for.body ], [ undef, %entry ]
+  %conv4 = and i32 undef, 255
+  %conv7 = and i32 undef, 255
+  %idxprom842 = zext i32 %conv7 to i64
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ undef, %entry ], [ %conv4, %for.body ]
+  %y.0.lcssa = phi i32 [ undef, %entry ], [ %conv7, %for.body ]
+  store i32 %x.0.lcssa, i32* %x1, align 4
+  store i32 %y.0.lcssa, i32* %y2, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
new file mode 100644
index 000000000000..8da3c34a0279
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.DState = type { i32, i32 }
+
+@b = common global %struct.DState zeroinitializer, align 4
+@d = common global i32 0, align 4
+@c = common global i32 0, align 4
+@a = common global i32 0, align 4
+@e = common global i32 0, align 4
+
+define i32 @fn1() {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  %1 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  %2 = load i32* @d, align 4
+  %cond = icmp eq i32 %2, 0
+  br i1 %cond, label %sw.bb, label %save_state_and_return
+
+sw.bb:                                            ; preds = %entry
+  %3 = load i32* @c, align 4
+  %and = and i32 %3, 7
+  store i32 %and, i32* @a, align 4
+  switch i32 %and, label %if.end [
+    i32 7, label %save_state_and_return
+    i32 0, label %save_state_and_return
+  ]
+
+if.end:                                           ; preds = %sw.bb
+  br label %save_state_and_return
+
+save_state_and_return:                            ; preds = %sw.bb, %sw.bb, %if.end, %entry
+  %t.0 = phi i32 [ 0, %if.end ], [ %0, %entry ], [ %0, %sw.bb ], [ %0, %sw.bb ]
+  %f.0 = phi i32 [ 0, %if.end ], [ %1, %entry ], [ 0, %sw.bb ], [ 0, %sw.bb ]
+  store i32 %t.0, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  store i32 %f.0, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
new file mode 100644
index 000000000000..05415456cf0f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
@@ -0,0 +1,113 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171 = type { i32, i32, i32, i32, i32, i32, [8 x i8] }
+
+define void @SIM4() {
+entry:
+  br i1 undef, label %return, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %lor.lhs.false
+  br i1 undef, label %for.end605, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc603, %for.body.lr.ph
+  br i1 undef, label %for.inc603, label %if.end12
+
+if.end12:                                         ; preds = %for.body
+  br i1 undef, label %land.lhs.true, label %land.lhs.true167
+
+land.lhs.true:                                    ; preds = %if.end12
+  br i1 undef, label %if.then17, label %land.lhs.true167
+
+if.then17:                                        ; preds = %land.lhs.true
+  br i1 undef, label %if.end98, label %land.rhs.lr.ph
+
+land.rhs.lr.ph:                                   ; preds = %if.then17
+  unreachable
+
+if.end98:                                         ; preds = %if.then17
+  %from299 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 1
+  br i1 undef, label %land.lhs.true167, label %if.then103
+
+if.then103:                                       ; preds = %if.end98
+  %.sub100 = select i1 undef, i32 250, i32 undef
+  %mul114 = shl nsw i32 %.sub100, 2
+  %from1115 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 0
+  %cond125 = select i1 undef, i32 undef, i32 %mul114
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %land.rhs.i874, %if.then103
+  %row.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %.sub100, %if.then103 ]
+  %col.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %cond125, %if.then103 ]
+  br i1 undef, label %land.rhs.i874, label %for.end.i
+
+land.rhs.i874:                                    ; preds = %for.cond.i
+  br i1 undef, label %for.cond.i, label %for.end.i
+
+for.end.i:                                        ; preds = %land.rhs.i874, %for.cond.i
+  br i1 undef, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %for.end.i
+  %add14.i = add nsw i32 %row.0.i, undef
+  %add15.i = add nsw i32 %col.0.i, undef
+  br label %extend_bw.exit
+
+if.end.i:                                         ; preds = %for.end.i
+  %add16.i = add i32 %cond125, %.sub100
+  %cmp26514.i = icmp slt i32 %add16.i, 0
+  br i1 %cmp26514.i, label %for.end33.i, label %for.body28.lr.ph.i
+
+for.body28.lr.ph.i:                               ; preds = %if.end.i
+  br label %for.end33.i
+
+for.end33.i:                                      ; preds = %for.body28.lr.ph.i, %if.end.i
+  br i1 undef, label %for.end58.i, label %for.body52.lr.ph.i
+
+for.body52.lr.ph.i:                               ; preds = %for.end33.i
+  br label %for.end58.i
+
+for.end58.i:                                      ; preds = %for.body52.lr.ph.i, %for.end33.i
+  br label %while.cond260.i
+
+while.cond260.i:                                  ; preds = %land.rhs263.i, %for.end58.i
+  br i1 undef, label %land.rhs263.i, label %while.end275.i
+
+land.rhs263.i:                                    ; preds = %while.cond260.i
+  br i1 undef, label %while.cond260.i, label %while.end275.i
+
+while.end275.i:                                   ; preds = %land.rhs263.i, %while.cond260.i
+  br label %extend_bw.exit
+
+extend_bw.exit:                                   ; preds = %while.end275.i, %if.then.i
+  %add14.i1262 = phi i32 [ %add14.i, %if.then.i ], [ undef, %while.end275.i ]
+  %add15.i1261 = phi i32 [ %add15.i, %if.then.i ], [ undef, %while.end275.i ]
+  br i1 false, label %if.then157, label %land.lhs.true167
+
+if.then157:                                       ; preds = %extend_bw.exit
+  %add158 = add nsw i32 %add14.i1262, 1
+  store i32 %add158, i32* %from299, align 4
+  %add160 = add nsw i32 %add15.i1261, 1
+  store i32 %add160, i32* %from1115, align 4
+  br label %land.lhs.true167
+
+land.lhs.true167:                                 ; preds = %if.then157, %extend_bw.exit, %if.end98, %land.lhs.true, %if.end12
+  unreachable
+
+for.inc603:                                       ; preds = %for.body
+  br i1 undef, label %for.body, label %for.end605
+
+for.end605:                                       ; preds = %for.inc603, %if.end
+  unreachable
+
+return:                                           ; preds = %lor.lhs.false, %entry
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
new file mode 100644
index 000000000000..915c41bb9c59
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -0,0 +1,105 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.Ray.5.11.53.113.119.137.149.185.329.389.416 = type { %struct.Vec.0.6.48.108.114.132.144.180.324.384.414, %struct.Vec.0.6.48.108.114.132.144.180.324.384.414 }
+%struct.Vec.0.6.48.108.114.132.144.180.324.384.414 = type { double, double, double }
+
+; Function Attrs: ssp uwtable
+define void @main() #0 {
+entry:
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  br label %invoke.cont
+
+invoke.cont:                                      ; preds = %invoke.cont, %cond.end
+  br i1 undef, label %arrayctor.cont, label %invoke.cont
+
+arrayctor.cont:                                   ; preds = %invoke.cont
+  %agg.tmp99208.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416* undef, i64 0, i32 0, i32 0
+  %agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416* undef, i64 0, i32 0, i32 1
+  %agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416* undef, i64 0, i32 1, i32 0
+  %agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416* undef, i64 0, i32 1, i32 1
+  br label %for.cond36.preheader
+
+for.cond36.preheader:                             ; preds = %_Z5clampd.exit.1, %arrayctor.cont
+  br i1 undef, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1
+
+cond.false51.us:                                  ; preds = %for.body42.lr.ph.us
+  unreachable
+
+cond.true48.us:                                   ; preds = %for.body42.lr.ph.us
+  br i1 undef, label %cond.true63.us, label %cond.false66.us
+
+cond.false66.us:                                  ; preds = %cond.true48.us
+  %add.i276.us = fadd double 0.000000e+00, undef
+  %add.i264.us = fadd double %add.i276.us, 0.000000e+00
+  %add4.i267.us = fadd double undef, 0xBFA5CC2D1960285F
+  %mul.i254.us = fmul double %add.i264.us, 1.400000e+02
+  %mul2.i256.us = fmul double %add4.i267.us, 1.400000e+02
+  %add.i243.us = fadd double %mul.i254.us, 5.000000e+01
+  %add4.i246.us = fadd double %mul2.i256.us, 5.200000e+01
+  %mul.i.i.us = fmul double undef, %add.i264.us
+  %mul2.i.i.us = fmul double undef, %add4.i267.us
+  store double %add.i243.us, double* %agg.tmp99208.sroa.0.0.idx, align 8
+  store double %add4.i246.us, double* %agg.tmp99208.sroa.1.8.idx388, align 8
+  store double %mul.i.i.us, double* %agg.tmp101211.sroa.0.0.idx, align 8
+  store double %mul2.i.i.us, double* %agg.tmp101211.sroa.1.8.idx390, align 8
+  unreachable
+
+cond.true63.us:                                   ; preds = %cond.true48.us
+  unreachable
+
+for.body42.lr.ph.us:                              ; preds = %for.cond36.preheader
+  br i1 undef, label %cond.true48.us, label %cond.false51.us
+
+_Z5clampd.exit.1:                                 ; preds = %for.cond36.preheader
+  br label %for.cond36.preheader
+}
+
+
+%struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601 = type { %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600, %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 }
+%struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 = type { double, double, double }
+
+define void @_Z8radianceRK3RayiPt() #0 {
+entry:
+  br i1 undef, label %if.then78, label %if.then38
+
+if.then38:                                        ; preds = %entry
+  %mul.i.i790 = fmul double undef, undef
+  %mul3.i.i792 = fmul double undef, undef
+  %mul.i764 = fmul double undef, %mul3.i.i792
+  %mul4.i767 = fmul double undef, undef
+  %sub.i768 = fsub double %mul.i764, %mul4.i767
+  %mul6.i770 = fmul double undef, %mul.i.i790
+  %mul9.i772 = fmul double undef, %mul3.i.i792
+  %sub10.i773 = fsub double %mul6.i770, %mul9.i772
+  %mul.i736 = fmul double undef, %sub.i768
+  %mul2.i738 = fmul double undef, %sub10.i773
+  %mul.i727 = fmul double undef, %mul.i736
+  %mul2.i729 = fmul double undef, %mul2.i738
+  %add.i716 = fadd double undef, %mul.i727
+  %add4.i719 = fadd double undef, %mul2.i729
+  %add.i695 = fadd double undef, %add.i716
+  %add4.i698 = fadd double undef, %add4.i719
+  %mul.i.i679 = fmul double undef, %add.i695
+  %mul2.i.i680 = fmul double undef, %add4.i698
+  %agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
+  store double %mul.i.i679, double* %agg.tmp74663.sroa.0.0.idx, align 8
+  %agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 1
+  store double %mul2.i.i680, double* %agg.tmp74663.sroa.1.8.idx943, align 8
+  br label %return
+
+if.then78:                                        ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.then78, %if.then38
+  ret void
+}
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
new file mode 100644
index 000000000000..06c4b524ee95
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; int foo(double *A, float *B, int g) {
+;   float B0 = B[0];
+;   float B1 = B[1]; <----- BasicBlock #1
+;   B0 += 5;
+;   B1 += 8;
+;
+;   if (g) bar();
+;
+;   A[0] += B0;     <------- BasicBlock #3
+;   A[1] += B1;
+; }
+
+
+;CHECK-LABEL: @foo(
+;CHECK: load <2 x float>
+;CHECK: fadd <2 x float>
+;CHECK: call i32
+;CHECK: load <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: store <2 x double>
+;CHECK: ret
+define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
+entry:
+  %0 = load float* %B, align 4
+  %arrayidx1 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %add = fadd float %0, 5.000000e+00
+  %add2 = fadd float %1, 8.000000e+00
+  %tobool = icmp eq i32 %g, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  %call = tail call i32 (...)* @bar()
+  br label %if.end
+
+if.end:
+  %conv = fpext float %add to double
+  %2 = load double* %A, align 8
+  %add4 = fadd double %conv, %2
+  store double %add4, double* %A, align 8
+  %conv5 = fpext float %add2 to double
+  %arrayidx6 = getelementptr inbounds double* %A, i64 1
+  %3 = load double* %arrayidx6, align 8
+  %add7 = fadd double %conv5, %3
+  store double %add7, double* %arrayidx6, align 8
+  ret i32 undef
+}
+
+declare i32 @bar(...)
diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll
new file mode 100644
index 000000000000..bbfd6f28ea97
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -0,0 +1,219 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+;int test(double *G) {
+;  G[0] = 1+G[5]*4;
+;  G[1] = 6+G[6]*3;
+;  G[2] = 7+G[5]*4;
+;  G[3] = 8+G[6]*4;
+;}
+
+;CHECK-LABEL: @test(
+;CHECK: load <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: store <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: store <2 x double>
+;CHECK: ret i32
+
+define i32 @test(double* nocapture %G) {
+entry:
+  %arrayidx = getelementptr inbounds double* %G, i64 5
+  %0 = load double* %arrayidx, align 8
+  %mul = fmul double %0, 4.000000e+00
+  %add = fadd double %mul, 1.000000e+00
+  store double %add, double* %G, align 8
+  %arrayidx2 = getelementptr inbounds double* %G, i64 6
+  %1 = load double* %arrayidx2, align 8
+  %mul3 = fmul double %1, 3.000000e+00
+  %add4 = fadd double %mul3, 6.000000e+00
+  %arrayidx5 = getelementptr inbounds double* %G, i64 1
+  store double %add4, double* %arrayidx5, align 8
+  %add8 = fadd double %mul, 7.000000e+00
+  %arrayidx9 = getelementptr inbounds double* %G, i64 2
+  store double %add8, double* %arrayidx9, align 8
+  %mul11 = fmul double %1, 4.000000e+00
+  %add12 = fadd double %mul11, 8.000000e+00
+  %arrayidx13 = getelementptr inbounds double* %G, i64 3
+  store double %add12, double* %arrayidx13, align 8
+  ret i32 undef
+}
+
+;int foo(double *A, int n) {
+;  A[0] = A[0] * 7.9 * n + 6.0;
+;  A[1] = A[1] * 7.7 * n + 2.0;
+;  A[2] = A[2] * 7.6 * n + 3.0;
+;  A[3] = A[3] * 7.4 * n + 4.0;
+;}
+;CHECK-LABEL: @foo(
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK-NOT: insertelement <2 x double>
+;CHECK: ret
+define i32 @foo(double* nocapture %A, i32 %n) {
+entry:
+  %0 = load double* %A, align 8
+  %mul = fmul double %0, 7.900000e+00
+  %conv = sitofp i32 %n to double
+  %mul1 = fmul double %conv, %mul
+  %add = fadd double %mul1, 6.000000e+00
+  store double %add, double* %A, align 8
+  %arrayidx3 = getelementptr inbounds double* %A, i64 1
+  %1 = load double* %arrayidx3, align 8
+  %mul4 = fmul double %1, 7.700000e+00
+  %mul6 = fmul double %conv, %mul4
+  %add7 = fadd double %mul6, 2.000000e+00
+  store double %add7, double* %arrayidx3, align 8
+  %arrayidx9 = getelementptr inbounds double* %A, i64 2
+  %2 = load double* %arrayidx9, align 8
+  %mul10 = fmul double %2, 7.600000e+00
+  %mul12 = fmul double %conv, %mul10
+  %add13 = fadd double %mul12, 3.000000e+00
+  store double %add13, double* %arrayidx9, align 8
+  %arrayidx15 = getelementptr inbounds double* %A, i64 3
+  %3 = load double* %arrayidx15, align 8
+  %mul16 = fmul double %3, 7.400000e+00
+  %mul18 = fmul double %conv, %mul16
+  %add19 = fadd double %mul18, 4.000000e+00
+  store double %add19, double* %arrayidx15, align 8
+  ret i32 undef
+}
+
+; int test2(double *G, int k) {
+;   if (k) {
+;     G[0] = 1+G[5]*4;
+;     G[1] = 6+G[6]*3;
+;   } else {
+;     G[2] = 7+G[5]*4;
+;     G[3] = 8+G[6]*3;
+;   }
+; }
+
+; We can't merge the gather sequences because one does not dominate the other.
+; CHECK: test2
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: ret
+define i32 @test2(double* nocapture %G, i32 %k) {
+  %1 = icmp eq i32 %k, 0
+  %2 = getelementptr inbounds double* %G, i64 5
+  %3 = load double* %2, align 8
+  %4 = fmul double %3, 4.000000e+00
+  br i1 %1, label %12, label %5
+
+; <label>:5                                       ; preds = %0
+  %6 = fadd double %4, 1.000000e+00
+  store double %6, double* %G, align 8
+  %7 = getelementptr inbounds double* %G, i64 6
+  %8 = load double* %7, align 8
+  %9 = fmul double %8, 3.000000e+00
+  %10 = fadd double %9, 6.000000e+00
+  %11 = getelementptr inbounds double* %G, i64 1
+  store double %10, double* %11, align 8
+  br label %20
+
+; <label>:12                                      ; preds = %0
+  %13 = fadd double %4, 7.000000e+00
+  %14 = getelementptr inbounds double* %G, i64 2
+  store double %13, double* %14, align 8
+  %15 = getelementptr inbounds double* %G, i64 6
+  %16 = load double* %15, align 8
+  %17 = fmul double %16, 3.000000e+00
+  %18 = fadd double %17, 8.000000e+00
+  %19 = getelementptr inbounds double* %G, i64 3
+  store double %18, double* %19, align 8
+  br label %20
+
+; <label>:20                                      ; preds = %12, %5
+  ret i32 undef
+}
+
+
+;int foo(double *A, int n) {
+;  A[0] = A[0] * 7.9 * n + 6.0;
+;  A[1] = A[1] * 7.9 * n + 6.0;
+;  A[2] = A[2] * 7.9 * n + 6.0;
+;  A[3] = A[3] * 7.9 * n + 6.0;
+;}
+;CHECK-LABEL: @foo4(
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK-NOT: insertelement <2 x double>
+;CHECK: ret
+define i32 @foo4(double* nocapture %A, i32 %n) {
+entry:
+  %0 = load double* %A, align 8
+  %mul = fmul double %0, 7.900000e+00
+  %conv = sitofp i32 %n to double
+  %mul1 = fmul double %conv, %mul
+  %add = fadd double %mul1, 6.000000e+00
+  store double %add, double* %A, align 8
+  %arrayidx3 = getelementptr inbounds double* %A, i64 1
+  %1 = load double* %arrayidx3, align 8
+  %mul4 = fmul double %1, 7.900000e+00
+  %mul6 = fmul double %conv, %mul4
+  %add7 = fadd double %mul6, 6.000000e+00
+  store double %add7, double* %arrayidx3, align 8
+  %arrayidx9 = getelementptr inbounds double* %A, i64 2
+  %2 = load double* %arrayidx9, align 8
+  %mul10 = fmul double %2, 7.900000e+00
+  %mul12 = fmul double %conv, %mul10
+  %add13 = fadd double %mul12, 6.000000e+00
+  store double %add13, double* %arrayidx9, align 8
+  %arrayidx15 = getelementptr inbounds double* %A, i64 3
+  %3 = load double* %arrayidx15, align 8
+  %mul16 = fmul double %3, 7.900000e+00
+  %mul18 = fmul double %conv, %mul16
+  %add19 = fadd double %mul18, 6.000000e+00
+  store double %add19, double* %arrayidx15, align 8
+  ret i32 undef
+}
+
+;int partial_mrg(double *A, int n) {
+;  A[0] = A[0] * n;
+;  A[1] = A[1] * n;
+;  if (n < 4) return 0;
+;  A[2] = A[2] * n;
+;  A[3] = A[3] * (n+4);
+;}
+;CHECK-LABEL: @partial_mrg(
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK-NOT: insertelement <2 x double>
+;CHECK: ret
+define i32 @partial_mrg(double* nocapture %A, i32 %n) {
+entry:
+  %0 = load double* %A, align 8
+  %conv = sitofp i32 %n to double
+  %mul = fmul double %conv, %0
+  store double %mul, double* %A, align 8
+  %arrayidx2 = getelementptr inbounds double* %A, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %mul4 = fmul double %conv, %1
+  store double %mul4, double* %arrayidx2, align 8
+  %cmp = icmp slt i32 %n, 4
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %arrayidx7 = getelementptr inbounds double* %A, i64 2
+  %2 = load double* %arrayidx7, align 8
+  %mul9 = fmul double %conv, %2
+  store double %mul9, double* %arrayidx7, align 8
+  %arrayidx11 = getelementptr inbounds double* %A, i64 3
+  %3 = load double* %arrayidx11, align 8
+  %add = add nsw i32 %n, 4
+  %conv12 = sitofp i32 %add to double
+  %mul13 = fmul double %conv12, %3
+  store double %mul13, double* %arrayidx11, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret i32 0
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
new file mode 100644
index 000000000000..fba35499fb7d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; int foo(int *A) {
+;   int r = A[0], g = A[1], b = A[2], a = A[3];
+;   for (int i=0; i < A[13]; i++) {
+;     r*=18; g*=19; b*=12; a *=9;
+;   }
+;   A[0] = r; A[1] = g; A[2] = b; A[3] = a;
+; }
+
+;CHECK-LABEL: @foo
+;CHECK: bitcast i32* %A to <4 x i32>*
+;CHECK-NEXT: load <4 x i32>
+;CHECK: phi <4 x i32>
+;CHECK-NEXT: mul <4 x i32>
+;CHECK-NOT: mul
+;CHECK: phi <4 x i32>
+;CHECK: bitcast i32* %A to <4 x i32>*
+;CHECK-NEXT: store <4 x i32>
+;CHECK-NEXT:ret i32 undef
+define i32 @foo(i32* nocapture %A) #0 {
+entry:
+  %0 = load i32* %A, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i64 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 2
+  %2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 3
+  %3 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 13
+  %4 = load i32* %arrayidx4, align 4
+  %cmp24 = icmp sgt i32 %4, 0
+  br i1 %cmp24, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.029 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %a.028 = phi i32 [ %mul7, %for.body ], [ %3, %entry ]
+  %b.027 = phi i32 [ %mul6, %for.body ], [ %2, %entry ]
+  %g.026 = phi i32 [ %mul5, %for.body ], [ %1, %entry ]
+  %r.025 = phi i32 [ %mul, %for.body ], [ %0, %entry ]
+  %mul = mul nsw i32 %r.025, 18
+  %mul5 = mul nsw i32 %g.026, 19
+  %mul6 = mul nsw i32 %b.027, 12
+  %mul7 = mul nsw i32 %a.028, 9
+  %inc = add nsw i32 %i.029, 1
+  %cmp = icmp slt i32 %inc, %4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  %a.0.lcssa = phi i32 [ %3, %entry ], [ %mul7, %for.body ]
+  %b.0.lcssa = phi i32 [ %2, %entry ], [ %mul6, %for.body ]
+  %g.0.lcssa = phi i32 [ %1, %entry ], [ %mul5, %for.body ]
+  %r.0.lcssa = phi i32 [ %0, %entry ], [ %mul, %for.body ]
+  store i32 %r.0.lcssa, i32* %A, align 4
+  store i32 %g.0.lcssa, i32* %arrayidx1, align 4
+  store i32 %b.0.lcssa, i32* %arrayidx2, align 4
+  store i32 %a.0.lcssa, i32* %arrayidx3, align 4
+  ret i32 undef
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
new file mode 100644
index 000000000000..f4e68f217f25
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; int depth(double *A, int m) {
+;   double y0 = 0; double y1 = 1;
+;   for (int i=0; i < m; i++) {
+;     y0 = A[4];
+;     y1 = A[5];
+;   }
+;   A[8] = y0; A[8+1] = y1;
+; }
+
+;CHECK: @depth
+;CHECK: getelementptr inbounds {{.*}}, !dbg ![[LOC:[0-9]+]]
+;CHECK: bitcast double* {{.*}}, !dbg ![[LOC]]
+;CHECK: load <2 x double>* {{.*}}, !dbg ![[LOC]]
+;CHECK: store <2 x double> {{.*}}, !dbg ![[LOC2:[0-9]+]]
+;CHECK: ret
+;CHECK: ![[LOC]] = metadata !{i32 4, i32 0,
+;CHECK: ![[LOC2]] = metadata !{i32 7, i32 0,
+
+define i32 @depth(double* nocapture %A, i32 %m) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{double* %A}, i64 0, metadata !12), !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i32 %m}, i64 0, metadata !13), !dbg !19
+  tail call void @llvm.dbg.value(metadata !20, i64 0, metadata !14), !dbg !21
+  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !15), !dbg !21
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !23
+  %cmp8 = icmp sgt i32 %m, 0, !dbg !23
+  br i1 %cmp8, label %for.body.lr.ph, label %for.end, !dbg !23
+
+for.body.lr.ph:                                   ; preds = %entry
+  %arrayidx = getelementptr inbounds double* %A, i64 4, !dbg !24
+  %0 = load double* %arrayidx, align 8, !dbg !24
+  %arrayidx1 = getelementptr inbounds double* %A, i64 5, !dbg !29
+  %1 = load double* %arrayidx1, align 8, !dbg !29
+  br label %for.end, !dbg !23
+
+for.end:                                          ; preds = %for.body.lr.ph, %entry
+  %y1.0.lcssa = phi double [ %1, %for.body.lr.ph ], [ 1.000000e+00, %entry ]
+  %y0.0.lcssa = phi double [ %0, %for.body.lr.ph ], [ 0.000000e+00, %entry ]
+  %arrayidx2 = getelementptr inbounds double* %A, i64 8, !dbg !30
+  store double %y0.0.lcssa, double* %arrayidx2, align 8, !dbg !30
+  %arrayidx3 = getelementptr inbounds double* %A, i64 9, !dbg !30
+  store double %y1.0.lcssa, double* %arrayidx3, align 8, !dbg !30
+  ret i32 undef, !dbg !31
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !32}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/nadav/file.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"file.c", metadata !"/Users/nadav"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"depth", metadata !"depth", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (double*, i32)* @depth, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [depth]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/nadav/file.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !9, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
+!10 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!11 = metadata !{metadata !12, metadata !13, metadata !14, metadata !15, metadata !16}
+!12 = metadata !{i32 786689, metadata !4, metadata !"A", metadata !5, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [A] [line 1]
+!13 = metadata !{i32 786689, metadata !4, metadata !"m", metadata !5, i32 33554433, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [m] [line 1]
+!14 = metadata !{i32 786688, metadata !4, metadata !"y0", metadata !5, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [y0] [line 2]
+!15 = metadata !{i32 786688, metadata !4, metadata !"y1", metadata !5, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [y1] [line 2]
+!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !5, i32 3, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3]
+!17 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
+!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!19 = metadata !{i32 1, i32 0, metadata !4, null}
+!20 = metadata !{double 0.000000e+00}
+!21 = metadata !{i32 2, i32 0, metadata !4, null}
+!22 = metadata !{double 1.000000e+00}
+!23 = metadata !{i32 3, i32 0, metadata !17, null}
+!24 = metadata !{i32 4, i32 0, metadata !25, null}
+!25 = metadata !{i32 786443, metadata !1, metadata !17, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
+!29 = metadata !{i32 5, i32 0, metadata !25, null}
+!30 = metadata !{i32 7, i32 0, metadata !4, null}
+!31 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll
index 8e85cb6c9b8f..5135a92a7bdb 100644
--- a/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;   return 0;
 ; }
 
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 ; CHECK: load <4 x i32>
 ; CHECK: mul <4 x i32>
 ; CHECK: store <4 x i32>
@@ -41,7 +41,7 @@ entry:
 }
 
 
-; int foo_fail(int * restrict B,  int * restrict A, int n, int m) {
+; int extr_user(int * restrict B,  int * restrict A, int n, int m) {
 ;   B[0] = n * A[0] + m * A[0];
 ;   B[1] = n * A[1] + m * A[1];
 ;   B[2] = n * A[2] + m * A[2];
@@ -49,10 +49,12 @@ entry:
 ;   return A[0];
 ; }
 
-; CHECK: @foo_fail
-; CHECK-NOT: load <4 x i32>
-; CHECK: ret
-define i32 @foo_fail(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+; CHECK-LABEL: @extr_user(
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+; CHECK: extractelement <4 x i32>
+; CHECK-NEXT: ret
+define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
   %0 = load i32* %A, align 4
   %mul238 = add i32 %m, %n
@@ -73,6 +75,35 @@ entry:
   %add20 = mul i32 %3, %mul238
   %arrayidx21 = getelementptr inbounds i32* %B, i64 3
   store i32 %add20, i32* %arrayidx21, align 4
-  ret i32 %0  ;<--------- This value has multiple users and can't be vectorized.
+  ret i32 %0  ;<--------- This value has multiple users
 }
 
+; In this example we have an external user that is not the first element in the vector.
+; CHECK-LABEL: @extr_user1(
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+; CHECK: extractelement <4 x i32>
+; CHECK-NEXT: ret
+define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+entry:
+  %0 = load i32* %A, align 4
+  %mul238 = add i32 %m, %n
+  %add = mul i32 %0, %mul238
+  store i32 %add, i32* %B, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add8 = mul i32 %1, %mul238
+  %arrayidx9 = getelementptr inbounds i32* %B, i64 1
+  store i32 %add8, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %A, i64 2
+  %2 = load i32* %arrayidx10, align 4
+  %add14 = mul i32 %2, %mul238
+  %arrayidx15 = getelementptr inbounds i32* %B, i64 2
+  store i32 %add14, i32* %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 3
+  %3 = load i32* %arrayidx16, align 4
+  %add20 = mul i32 %3, %mul238
+  %arrayidx21 = getelementptr inbounds i32* %B, i64 3
+  store i32 %add20, i32* %arrayidx21, align 4
+  ret i32 %1  ;<--------- This value has multiple users
+}
diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll
new file mode 100644
index 000000000000..6d09aa61bf35
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -0,0 +1,96 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; double foo(double * restrict b,  double * restrict a, int n, int m) {
+;   double r=a[1];
+;   double g=a[0];
+;   double x;
+;   for (int i=0; i < 100; i++) {
+;     r += 10;
+;     g += 10;
+;     r *= 4;
+;     g *= 4;
+;     x = g; <----- external user!
+;     r += 4;
+;     g += 4;
+;   }
+;   b[0] = g;
+;   b[1] = r;
+;
+;   return x; <-- must extract here!
+; }
+
+;CHECK: ext_user
+;CHECK: phi <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: br
+;CHECK: store <2 x double>
+;CHECK: extractelement <2 x double>
+;CHECK: ret double
+
+define double @ext_user(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) {
+entry:
+  %arrayidx = getelementptr inbounds double* %A, i64 1
+  %0 = load double* %arrayidx, align 8
+  %1 = load double* %A, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.020 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %G.019 = phi double [ %1, %entry ], [ %add5, %for.body ]
+  %R.018 = phi double [ %0, %entry ], [ %add4, %for.body ]
+  %add = fadd double %R.018, 1.000000e+01
+  %add2 = fadd double %G.019, 1.000000e+01
+  %mul = fmul double %add, 4.000000e+00
+  %mul3 = fmul double %add2, 4.000000e+00
+  %add4 = fadd double %mul, 4.000000e+00
+  %add5 = fadd double %mul3, 4.000000e+00
+  %inc = add nsw i32 %i.020, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  store double %add5, double* %B, align 8
+  %arrayidx7 = getelementptr inbounds double* %B, i64 1
+  store double %add4, double* %arrayidx7, align 8
+  ret double %mul3
+}
+
+; A need-to-gather entry cannot be an external use of the scalar element.
+; Instead the insertelement instructions of the need-to-gather entry are the
+; external users.
+; This test would assert because we would keep the scalar fpext and fadd alive.
+; PR18129
+
+; CHECK-LABEL: needtogather
+define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
+                i32 * noalias %d) {
+entry:
+  %0 = load i32* %d, align 4
+  %conv = sitofp i32 %0 to float
+  %1 = load float* %c
+  %sub = fsub float 0.000000e+00, %1
+  %mul = fmul float %sub, 0.000000e+00
+  %add = fadd float %conv, %mul
+  %conv1 = fpext float %add to double
+  %sub3 = fsub float 1.000000e+00, %1
+  %mul4 = fmul float %sub3, 0.000000e+00
+  %add5 = fadd float %conv, %mul4
+  %conv6 = fpext float %add5 to double
+  %tobool = fcmp une float %add, 0.000000e+00
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  br label %if.end
+
+if.end:
+  %storemerge = phi double [ %conv6, %if.then ], [ %conv1, %entry ]
+  %e.0 = phi double [ %conv1, %if.then ], [ %conv6, %entry ]
+  store double %storemerge, double* %a, align 8
+  %conv7 = fptosi double %e.0 to i32
+  store i32 %conv7, i32* %b, align 4
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/extract.ll b/test/Transforms/SLPVectorizer/X86/extract.ll
new file mode 100644
index 000000000000..f611fd4ec24f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/extract.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK: fextr
+;CHECK-NOT: insertelement
+;CHECK-NOT: extractelement
+;CHECK: fadd <2 x double>
+;CHECK: ret void
+define void @fextr(double* %ptr) {
+entry:
+  %LD = load <2 x double>* undef
+  %V0 = extractelement <2 x double> %LD, i32 0
+  %V1 = extractelement <2 x double> %LD, i32 1
+  %P0 = getelementptr inbounds double* %ptr, i64 0
+  %P1 = getelementptr inbounds double* %ptr, i64 1
+  %A0 = fadd double %V0, 0.0
+  %A1 = fadd double %V1, 1.1
+  store double %A0, double* %P0, align 4
+  store double %A1, double* %P1, align 4
+  ret void
+}
+
+;CHECK: fextr1
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: ret void
+define void @fextr1(double* %ptr) {
+entry:
+  %LD = load <2 x double>* undef
+  %V0 = extractelement <2 x double> %LD, i32 0
+  %V1 = extractelement <2 x double> %LD, i32 1
+  %P0 = getelementptr inbounds double* %ptr, i64 1  ; <--- incorrect order
+  %P1 = getelementptr inbounds double* %ptr, i64 0
+  %A0 = fadd double %V0, 1.2
+  %A1 = fadd double %V1, 3.4
+  store double %A0, double* %P0, align 4
+  store double %A1, double* %P1, align 4
+  ret void
+}
+
+;CHECK: fextr2
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: ret void
+define void @fextr2(double* %ptr) {
+entry:
+  %LD = load <4 x double>* undef
+  %V0 = extractelement <4 x double> %LD, i32 0  ; <--- invalid size.
+  %V1 = extractelement <4 x double> %LD, i32 1
+  %P0 = getelementptr inbounds double* %ptr, i64 0
+  %P1 = getelementptr inbounds double* %ptr, i64 1
+  %A0 = fadd double %V0, 5.5
+  %A1 = fadd double %V1, 6.6
+  store double %A0, double* %P0, align 4
+  store double %A1, double* %P1, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
new file mode 100644
index 000000000000..8f919512ff8d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -0,0 +1,417 @@
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; #include <stdint.h>
+;
+; int foo(float *A, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += 7*A[i*4  ] +
+;            7*A[i*4+1] +
+;            7*A[i*4+2] +
+;            7*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; NOSTORE-LABEL: add_red
+; NOSTORE: fmul <4 x float>
+; NOSTORE: shufflevector <4 x float>
+
+define i32 @add_red(float* %A, i32 %n) {
+entry:
+  %cmp31 = icmp sgt i32 %n, 0
+  br i1 %cmp31, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.033 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ]
+  %mul = shl nsw i64 %i.033, 2
+  %arrayidx = getelementptr inbounds float* %A, i64 %mul
+  %1 = load float* %arrayidx, align 4
+  %mul2 = fmul float %1, 7.000000e+00
+  %add28 = or i64 %mul, 1
+  %arrayidx4 = getelementptr inbounds float* %A, i64 %add28
+  %2 = load float* %arrayidx4, align 4
+  %mul5 = fmul float %2, 7.000000e+00
+  %add6 = fadd fast float %mul2, %mul5
+  %add829 = or i64 %mul, 2
+  %arrayidx9 = getelementptr inbounds float* %A, i64 %add829
+  %3 = load float* %arrayidx9, align 4
+  %mul10 = fmul float %3, 7.000000e+00
+  %add11 = fadd fast float %add6, %mul10
+  %add1330 = or i64 %mul, 3
+  %arrayidx14 = getelementptr inbounds float* %A, i64 %add1330
+  %4 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %4, 7.000000e+00
+  %add16 = fadd fast float %add11, %mul15
+  %add17 = fadd fast float %sum.032, %add16
+  %inc = add nsw i64 %i.033, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add17 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum *= B[0]*A[i*4  ] +
+;       B[1]*A[i*4+1] +
+;       B[2]*A[i*4+2] +
+;       B[3]*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: mul_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp38 = icmp sgt i32 %n, 0
+  br i1 %cmp38, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx9, align 4
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx15, align 4
+  %4 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.040 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ]
+  %mul = shl nsw i64 %i.040, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %5 = load float* %arrayidx2, align 4
+  %mul3 = fmul float %0, %5
+  %add35 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add35
+  %6 = load float* %arrayidx6, align 4
+  %mul7 = fmul float %1, %6
+  %add8 = fadd fast float %mul3, %mul7
+  %add1136 = or i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add1136
+  %7 = load float* %arrayidx12, align 4
+  %mul13 = fmul float %2, %7
+  %add14 = fadd fast float %add8, %mul13
+  %add1737 = or i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add1737
+  %8 = load float* %arrayidx18, align 4
+  %mul19 = fmul float %3, %8
+  %add20 = fadd fast float %add14, %mul19
+  %mul21 = fmul float %sum.039, %add20
+  %inc = add nsw i64 %i.040, 1
+  %exitcond = icmp eq i64 %inc, %4
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %mul21 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += B[0]*A[i*6  ] +
+;            B[1]*A[i*6+1] +
+;            B[2]*A[i*6+2] +
+;            B[3]*A[i*6+3] +
+;            B[4]*A[i*6+4] +
+;            B[5]*A[i*6+5] +
+;            B[6]*A[i*6+6] +
+;            B[7]*A[i*6+7] +
+;            B[8]*A[i*6+8];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: long_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp81 = icmp sgt i32 %n, 0
+  br i1 %cmp81, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx9, align 4
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx15, align 4
+  %arrayidx21 = getelementptr inbounds float* %B, i64 4
+  %4 = load float* %arrayidx21, align 4
+  %arrayidx27 = getelementptr inbounds float* %B, i64 5
+  %5 = load float* %arrayidx27, align 4
+  %arrayidx33 = getelementptr inbounds float* %B, i64 6
+  %6 = load float* %arrayidx33, align 4
+  %arrayidx39 = getelementptr inbounds float* %B, i64 7
+  %7 = load float* %arrayidx39, align 4
+  %arrayidx45 = getelementptr inbounds float* %B, i64 8
+  %8 = load float* %arrayidx45, align 4
+  %9 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.083 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ]
+  %mul = mul nsw i64 %i.083, 6
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %10 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %10
+  %add80 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add80
+  %11 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %1, %11
+  %add8 = fadd fast float %mul3, %mul7
+  %add11 = add nsw i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add11
+  %12 = load float* %arrayidx12, align 4
+  %mul13 = fmul fast float %2, %12
+  %add14 = fadd fast float %add8, %mul13
+  %add17 = add nsw i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add17
+  %13 = load float* %arrayidx18, align 4
+  %mul19 = fmul fast float %3, %13
+  %add20 = fadd fast float %add14, %mul19
+  %add23 = add nsw i64 %mul, 4
+  %arrayidx24 = getelementptr inbounds float* %A, i64 %add23
+  %14 = load float* %arrayidx24, align 4
+  %mul25 = fmul fast float %4, %14
+  %add26 = fadd fast float %add20, %mul25
+  %add29 = add nsw i64 %mul, 5
+  %arrayidx30 = getelementptr inbounds float* %A, i64 %add29
+  %15 = load float* %arrayidx30, align 4
+  %mul31 = fmul fast float %5, %15
+  %add32 = fadd fast float %add26, %mul31
+  %add35 = add nsw i64 %mul, 6
+  %arrayidx36 = getelementptr inbounds float* %A, i64 %add35
+  %16 = load float* %arrayidx36, align 4
+  %mul37 = fmul fast float %6, %16
+  %add38 = fadd fast float %add32, %mul37
+  %add41 = add nsw i64 %mul, 7
+  %arrayidx42 = getelementptr inbounds float* %A, i64 %add41
+  %17 = load float* %arrayidx42, align 4
+  %mul43 = fmul fast float %7, %17
+  %add44 = fadd fast float %add38, %mul43
+  %add47 = add nsw i64 %mul, 8
+  %arrayidx48 = getelementptr inbounds float* %A, i64 %add47
+  %18 = load float* %arrayidx48, align 4
+  %mul49 = fmul fast float %8, %18
+  %add50 = fadd fast float %add44, %mul49
+  %add51 = fadd fast float %sum.082, %add50
+  %inc = add nsw i64 %i.083, 1
+  %exitcond = icmp eq i64 %inc, %9
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add51 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     sum += B[0]*A[i*4  ];
+;     sum += B[1]*A[i*4+1];
+;     sum += B[2]*A[i*4+2];
+;     sum += B[3]*A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: chain_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
+entry:
+  %cmp41 = icmp sgt i32 %n, 0
+  br i1 %cmp41, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load float* %B, align 4
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %1 = load float* %arrayidx4, align 4
+  %arrayidx10 = getelementptr inbounds float* %B, i64 2
+  %2 = load float* %arrayidx10, align 4
+  %arrayidx16 = getelementptr inbounds float* %B, i64 3
+  %3 = load float* %arrayidx16, align 4
+  %4 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.043 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ]
+  %mul = shl nsw i64 %i.043, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %5 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %5
+  %add = fadd fast float %sum.042, %mul3
+  %add638 = or i64 %mul, 1
+  %arrayidx7 = getelementptr inbounds float* %A, i64 %add638
+  %6 = load float* %arrayidx7, align 4
+  %mul8 = fmul fast float %1, %6
+  %add9 = fadd fast float %add, %mul8
+  %add1239 = or i64 %mul, 2
+  %arrayidx13 = getelementptr inbounds float* %A, i64 %add1239
+  %7 = load float* %arrayidx13, align 4
+  %mul14 = fmul fast float %2, %7
+  %add15 = fadd fast float %add9, %mul14
+  %add1840 = or i64 %mul, 3
+  %arrayidx19 = getelementptr inbounds float* %A, i64 %add1840
+  %8 = load float* %arrayidx19, align 4
+  %mul20 = fmul fast float %3, %8
+  %add21 = fadd fast float %add15, %mul20
+  %inc = add nsw i64 %i.043, 1
+  %exitcond = icmp eq i64 %inc, %4
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+  %phitmp = fptosi float %add21 to i32
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+; int foo(float * restrict A, float * restrict B, float * restrict C, int n) {
+;   float sum = 0;
+;   for (intptr_t i=0; i < n; ++i) {
+;     C[i] = B[0] *A[i*4  ] +
+;          B[1] *A[i*4+1] +
+;          B[2] *A[i*4+2] +
+;          B[3] *A[i*4+3];
+;   }
+;   return sum;
+; }
+
+; CHECK-LABEL: store_red
+; CHECK: fmul <4 x float>
+; CHECK: shufflevector <4 x float>
+
+define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) {
+entry:
+  %cmp37 = icmp sgt i32 %n, 0
+  br i1 %cmp37, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %arrayidx4 = getelementptr inbounds float* %B, i64 1
+  %arrayidx9 = getelementptr inbounds float* %B, i64 2
+  %arrayidx15 = getelementptr inbounds float* %B, i64 3
+  %0 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %1 = load float* %B, align 4
+  %mul = shl nsw i64 %i.039, 2
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %mul
+  %2 = load float* %arrayidx2, align 4
+  %mul3 = fmul fast float %1, %2
+  %3 = load float* %arrayidx4, align 4
+  %add34 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds float* %A, i64 %add34
+  %4 = load float* %arrayidx6, align 4
+  %mul7 = fmul fast float %3, %4
+  %add8 = fadd fast float %mul3, %mul7
+  %5 = load float* %arrayidx9, align 4
+  %add1135 = or i64 %mul, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %add1135
+  %6 = load float* %arrayidx12, align 4
+  %mul13 = fmul fast float %5, %6
+  %add14 = fadd fast float %add8, %mul13
+  %7 = load float* %arrayidx15, align 4
+  %add1736 = or i64 %mul, 3
+  %arrayidx18 = getelementptr inbounds float* %A, i64 %add1736
+  %8 = load float* %arrayidx18, align 4
+  %mul19 = fmul fast float %7, %8
+  %add20 = fadd fast float %add14, %mul19
+  store float %add20, float* %C.addr.038, align 4
+  %incdec.ptr = getelementptr inbounds float* %C.addr.038, i64 1
+  %inc = add nsw i64 %i.039, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+
+; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE
+
+; void foo(double * restrict A, double * restrict B, double * restrict C,
+;          int n) {
+;   for (intptr_t i=0; i < n; ++i) {
+;     C[i] = B[0] *A[i*4  ] + B[1] *A[i*4+1];
+;   }
+; }
+
+; STORE-LABEL: store_red_double
+; STORE: fmul <2 x double>
+; STORE: extractelement <2 x double>
+; STORE: extractelement <2 x double>
+
+define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) {
+entry:
+  %cmp17 = icmp sgt i32 %n, 0
+  br i1 %cmp17, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load double* %B, align 8
+  %arrayidx4 = getelementptr inbounds double* %B, i64 1
+  %1 = load double* %arrayidx4, align 8
+  %2 = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %mul = shl nsw i64 %i.018, 2
+  %arrayidx2 = getelementptr inbounds double* %A, i64 %mul
+  %3 = load double* %arrayidx2, align 8
+  %mul3 = fmul fast double %0, %3
+  %add16 = or i64 %mul, 1
+  %arrayidx6 = getelementptr inbounds double* %A, i64 %add16
+  %4 = load double* %arrayidx6, align 8
+  %mul7 = fmul fast double %1, %4
+  %add8 = fadd fast double %mul3, %mul7
+  %arrayidx9 = getelementptr inbounds double* %C, i64 %i.018
+  store double %add8, double* %arrayidx9, align 8
+  %inc = add nsw i64 %i.018, 1
+  %exitcond = icmp eq i64 %inc, %2
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/implicitfloat.ll b/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
new file mode 100644
index 000000000000..f63f2683b10e
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Don't vectorize when noimplicitfloat is used.
+; CHECK: test1
+; CHECK-NOT: store <2 x double>
+; CHECK: ret
+define void @test1(double* %a, double* %b, double* %c) noimplicitfloat { ; <------ noimplicitfloat attribute here!
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
new file mode 100644
index 000000000000..3115232887bd
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
+
+; We can't vectorize when the roots are used inside the tree.
+;CHECK-LABEL: @in_tree_user(
+;CHECK-NOT: load <2 x double>
+;CHECK: ret
+define void @in_tree_user(double* nocapture %A, i32 %n) {
+entry:
+  %conv = sitofp i32 %n to double
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %0 = shl nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds double* %A, i64 %0
+  %1 = load double* %arrayidx, align 8
+  %mul1 = fmul double %conv, %1
+  %mul2 = fmul double %mul1, 7.000000e+00
+  %add = fadd double %mul2, 5.000000e+00
+  %BadValue = fadd double %add, %add    ; <------------------ In tree user.
+  %2 = or i64 %0, 1
+  %arrayidx6 = getelementptr inbounds double* %A, i64 %2
+  %3 = load double* %arrayidx6, align 8
+  %mul8 = fmul double %conv, %3
+  %mul9 = fmul double %mul8, 4.000000e+00
+  %add10 = fadd double %mul9, 9.000000e+00
+  %cmp11 = fcmp ogt double %add, %add10
+  br i1 %cmp11, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0))
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
new file mode 100644
index 000000000000..43f7aed9f519
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -0,0 +1,197 @@
+; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
+
+target triple = "x86_64-apple-macosx10.8.0"
+
+define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+; Insert in an order different from the vector indices to make sure it
+; doesn't matter
+define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_insert_out_of_order(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 2
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 0
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+declare void @v4f32_user(<4 x float>) #0
+declare void @f32_user(float) #0
+
+; Multiple users of the final constructed vector
+define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_users(
+; CHECK-NEXT: %1 = icmp ne <4 x i32> %c, zeroinitializer
+; CHECK-NEXT: select <4 x i1> %1, <4 x float> %a, <4 x float> %b
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  call void @v4f32_user(<4 x float> %rd) #0
+  ret <4 x float> %rd
+}
+
+; Unused insertelement
+define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_no_users(
+; CHECK-NOT: icmp ne <4 x i32>
+; CHECK-NOT: select <4 x i1>
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> undef, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  ret <4 x float> %rd
+}
+
+; Make sure infinite loop doesn't happen which I ran into when trying
+; to do this backwards this backwards
+define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
+; CHECK-LABEL: @reconstruct(
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %ra = insertelement <4 x i32> undef, i32 %c0, i32 0
+  %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1
+  %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2
+  %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3
+  ret <4 x i32> %rd
+}
+
+define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_v2(
+; CHECK: icmp ne <2 x i32>
+; CHECK: select <2 x i1>
+  %c0 = extractelement <2 x i32> %c, i32 0
+  %c1 = extractelement <2 x i32> %c, i32 1
+  %a0 = extractelement <2 x float> %a, i32 0
+  %a1 = extractelement <2 x float> %a, i32 1
+  %b0 = extractelement <2 x float> %b, i32 0
+  %b1 = extractelement <2 x float> %b, i32 1
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %ra = insertelement <2 x float> undef, float %s0, i32 0
+  %rb = insertelement <2 x float> %ra, float %s1, i32 1
+  ret <2 x float> %rb
+}
+
+; Make sure when we construct partial vectors, we don't keep
+; re-visiting the insertelement chains starting with undef
+; (low cost threshold needed to force this to happen)
+define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %1 = insertelement <2 x i32> undef, i32 %c0, i32 0
+  %2 = insertelement <2 x i32> %1, i32 %c1, i32 1
+  %3 = icmp ne <2 x i32> %2, zeroinitializer
+  %4 = insertelement <2 x float> undef, float %a0, i32 0
+  %5 = insertelement <2 x float> %4, float %a1, i32 1
+  %6 = insertelement <2 x float> undef, float %b0, i32 0
+  %7 = insertelement <2 x float> %6, float %b1, i32 1
+  %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7
+  %9 = extractelement <2 x float> %8, i32 0
+  %ra = insertelement <4 x float> undef, float %9, i32 0
+  %10 = extractelement <2 x float> %8, i32 1
+  %rb = insertelement <4 x float> %ra, float %10, i32 1
+  ret <4 x float> %rb
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/lit.local.cfg b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
index a8ad0f1a28b2..ba763cf03ffc 100644
--- a/test/Transforms/SLPVectorizer/X86/lit.local.cfg
+++ b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/X86/long_chains.ll b/test/Transforms/SLPVectorizer/X86/long_chains.ll
new file mode 100644
index 000000000000..5af3e6d6e903
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; At this point we can't vectorize only parts of the tree.
+
+; CHECK: test
+; CHECK: insertelement <2 x i8>
+; CHECK: insertelement <2 x i8>
+; CHECK: sitofp <2 x i8>
+; CHECK: fmul <2 x double>
+; CHECK: ret
+define i32 @test(double* nocapture %A, i8* nocapture %B) {
+entry:
+  %0 = load i8* %B, align 1
+  %arrayidx1 = getelementptr inbounds i8* %B, i64 1
+  %1 = load i8* %arrayidx1, align 1
+  %add = add i8 %0, 3
+  %add4 = add i8 %1, 3
+  %conv6 = sitofp i8 %add to double
+  %conv7 = sitofp i8 %add4 to double 
+  %mul = fmul double %conv6, %conv6
+  %add8 = fadd double %mul, 1.000000e+00
+  %mul9 = fmul double %conv7, %conv7
+  %add10 = fadd double %mul9, 1.000000e+00
+  %mul11 = fmul double %add8, %add8
+  %add12 = fadd double %mul11, 1.000000e+00
+  %mul13 = fmul double %add10, %add10
+  %add14 = fadd double %mul13, 1.000000e+00
+  %mul15 = fmul double %add12, %add12
+  %add16 = fadd double %mul15, 1.000000e+00
+  %mul17 = fmul double %add14, %add14
+  %add18 = fadd double %mul17, 1.000000e+00
+  %mul19 = fmul double %add16, %add16
+  %add20 = fadd double %mul19, 1.000000e+00
+  %mul21 = fmul double %add18, %add18
+  %add22 = fadd double %mul21, 1.000000e+00
+  %mul23 = fmul double %add20, %add20
+  %add24 = fadd double %mul23, 1.000000e+00
+  %mul25 = fmul double %add22, %add22
+  %add26 = fadd double %mul25, 1.000000e+00
+  store double %add24, double* %A, align 8
+  %arrayidx28 = getelementptr inbounds double* %A, i64 1
+  store double %add26, double* %arrayidx28, align 8
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index 4a37fce2ff24..aef2479dd524 100644
--- a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
 ;CHECK: store <4 x i32>
diff --git a/test/Transforms/SLPVectorizer/X86/multi_block.ll b/test/Transforms/SLPVectorizer/X86/multi_block.ll
new file mode 100644
index 000000000000..2f1cc74d05ff
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; int bar(double *A, int d) {
+;   double A0 = A[0];
+;   double A1 = A[1];
+;   float F0 = A0;
+;   float F1 = A1;
+;   if (d) foo(); <----- This splits the blocks
+;   F0+=4.0;
+;   F1+=5.0;
+;   A[8] = 9.0 + F0;
+;   A[9] = 5.0 + F1;
+; }
+
+
+;CHECK-LABEL: @bar(
+;CHECK: load <2 x double>
+;CHECK: fptrunc <2 x double>
+;CHECK: call i32
+;CHECK: fadd <2 x float>
+;CHECK: fpext <2 x float>
+;CHECK: store <2 x double>
+;CHECK: ret
+define i32 @bar(double* nocapture %A, i32 %d) {
+  %1 = load double* %A, align 8
+  %2 = getelementptr inbounds double* %A, i64 1
+  %3 = load double* %2, align 8
+  %4 = fptrunc double %1 to float
+  %5 = fptrunc double %3 to float
+  %6 = icmp eq i32 %d, 0
+  br i1 %6, label %9, label %7
+
+; <label>:7                                       ; preds = %0
+  %8 = tail call i32 (...)* @foo()
+  br label %9
+
+; <label>:9                                       ; preds = %0, %7
+  %10 = fadd float %4, 4.000000e+00
+  %11 = fadd float %5, 5.000000e+00
+  %12 = fpext float %10 to double
+  %13 = fadd double %12, 9.000000e+00
+  %14 = getelementptr inbounds double* %A, i64 8
+  store double %13, double* %14, align 8
+  %15 = fpext float %11 to double
+  %16 = fadd double %15, 5.000000e+00
+  %17 = getelementptr inbounds double* %A, i64 9
+  store double %16, double* %17, align 8
+  ret i32 undef
+}
+
+declare i32 @foo(...)
+
diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll
index aaa6063fdeda..cab99945e29e 100644
--- a/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;  A[4] += n * 5 + 11;
 ;}
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: insertelement <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
diff --git a/test/Transforms/SLPVectorizer/X86/odd_store.ll b/test/Transforms/SLPVectorizer/X86/odd_store.ll
new file mode 100644
index 000000000000..027f6016e2b9
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/odd_store.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;int foo(char * restrict A, float * restrict B, float T) {
+;  A[0] = (T * B[10] + 4.0);
+;  A[1] = (T * B[11] + 5.0);
+;  A[2] = (T * B[12] + 6.0);
+;}
+
+;CHECK-LABEL: @foo(
+;CHECK-NOT: load <3 x float>
+;CHECK-NOT: fmul <3 x float>
+;CHECK-NOT: fpext <3 x float>
+;CHECK-NOT: fadd <3 x double>
+;CHECK-NOT: fptosi <3 x double>
+;CHECK-NOT: store <3 x i8>
+;CHECK: ret
+define i32 @foo(i8* noalias nocapture %A, float* noalias nocapture %B, float %T) {
+  %1 = getelementptr inbounds float* %B, i64 10
+  %2 = load float* %1, align 4
+  %3 = fmul float %2, %T
+  %4 = fpext float %3 to double
+  %5 = fadd double %4, 4.000000e+00
+  %6 = fptosi double %5 to i8
+  store i8 %6, i8* %A, align 1
+  %7 = getelementptr inbounds float* %B, i64 11
+  %8 = load float* %7, align 4
+  %9 = fmul float %8, %T
+  %10 = fpext float %9 to double
+  %11 = fadd double %10, 5.000000e+00
+  %12 = fptosi double %11 to i8
+  %13 = getelementptr inbounds i8* %A, i64 1
+  store i8 %12, i8* %13, align 1
+  %14 = getelementptr inbounds float* %B, i64 12
+  %15 = load float* %14, align 4
+  %16 = fmul float %15, %T
+  %17 = fpext float %16 to double
+  %18 = fadd double %17, 6.000000e+00
+  %19 = fptosi double %18 to i8
+  %20 = getelementptr inbounds i8* %A, i64 2
+  store i8 %19, i8* %20, align 1
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/operandorder.ll b/test/Transforms/SLPVectorizer/X86/operandorder.ll
new file mode 100644
index 000000000000..c5322a839ed1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -0,0 +1,234 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -instcombine -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+
+
+; Make sure we order the operands of commutative operations so that we get
+; bigger vectorizable trees.
+
+; CHECK-LABEL: shuffle_operands1
+; CHECK:         load <2 x double>
+; CHECK:         fadd <2 x double>
+
+define void @shuffle_operands1(double * noalias %from, double * noalias %to,
+                               double %v1, double %v2) {
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v1
+  %v1_2 = fadd double %v2, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %p
+  %v1_2 = fadd double %v0_1, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast2
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast2(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %p, %v0_1
+  %v1_2 = fadd double %v0_2, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast3
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast3(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %p, %v0_1
+  %v1_2 = fadd double %v0_1, %v0_2
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+
+; CHECK-LABEL: shuffle_preserve_broadcast4
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast4(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_2, %v0_1
+  %v1_2 = fadd double %p, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; CHECK-LABEL: shuffle_preserve_broadcast5
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast5(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v0_2
+  %v1_2 = fadd double %p, %v0_1
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+
+; CHECK-LABEL: shuffle_preserve_broadcast6
+; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
+; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+define void @shuffle_preserve_broadcast6(double * noalias %from,
+                                        double * noalias %to,
+                                        double %v1, double %v2) {
+entry:
+br label %lp
+
+lp:
+  %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
+  %from_1 = getelementptr double *%from, i64 1
+  %v0_1 = load double * %from
+  %v0_2 = load double * %from_1
+  %v1_1 = fadd double %v0_1, %v0_2
+  %v1_2 = fadd double %v0_1, %p
+  %to_2 = getelementptr double * %to, i64 1
+  store double %v1_1, double *%to
+  store double %v1_2, double *%to_2
+br i1 undef, label %lp, label %ext
+
+ext:
+  ret void
+}
+
+; Make sure we don't scramble operands when we reorder them and destroy
+; 'good' source order.
+
+; CHECK-LABEL: good_load_order
+
+; CHECK: %[[V1:[0-9]+]] = load <4 x float>*
+; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
+; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+; CHECK:                = fmul <4 x float> %[[V1]], %[[V3]]
+
+@a = common global [32000 x float] zeroinitializer, align 16
+
+define void @good_load_order() {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %0 = load float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
+  br label %for.body3
+
+for.body3:
+  %1 = phi float [ %0, %for.cond1.preheader ], [ %10, %for.body3 ]
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %2 = add nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %2
+  %3 = load float* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %mul6 = fmul float %3, %1
+  store float %mul6, float* %arrayidx5, align 4
+  %4 = add nsw i64 %indvars.iv, 2
+  %arrayidx11 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %4
+  %5 = load float* %arrayidx11, align 4
+  %mul15 = fmul float %5, %3
+  store float %mul15, float* %arrayidx, align 4
+  %6 = add nsw i64 %indvars.iv, 3
+  %arrayidx21 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %6
+  %7 = load float* %arrayidx21, align 4
+  %mul25 = fmul float %7, %5
+  store float %mul25, float* %arrayidx11, align 4
+  %8 = add nsw i64 %indvars.iv, 4
+  %arrayidx31 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %8
+  %9 = load float* %arrayidx31, align 4
+  %mul35 = fmul float %9, %7
+  store float %mul35, float* %arrayidx21, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
+  %arrayidx41 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.next
+  %10 = load float* %arrayidx41, align 4
+  %mul45 = fmul float %10, %9
+  store float %mul45, float* %arrayidx31, align 4
+  %11 = trunc i64 %indvars.iv.next to i32
+  %cmp2 = icmp slt i32 %11, 31995
+  br i1 %cmp2, label %for.body3, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/opt.ll b/test/Transforms/SLPVectorizer/X86/opt.ll
new file mode 100644
index 000000000000..14137c11ee41
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -O3 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=SLP
+; RUN: opt < %s -O3 -disable-slp-vectorization -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure we can disable slp vectorization in opt.
+
+; SLP-LABEL: test1
+; SLP: store <2 x double>
+
+; NOSLP-LABEL: test1
+; NOSLP-NOT: store <2 x double>
+
+
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/ordering.ll b/test/Transforms/SLPVectorizer/X86/ordering.ll
new file mode 100644
index 000000000000..d2ecd4546ddb
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @updateModelQPFrame(i32 %m_Bits) {
+entry:
+  %0 = load double* undef, align 8
+  %mul = fmul double undef, %0
+  %mul2 = fmul double undef, %mul
+  %mul4 = fmul double %0, %mul2
+  %mul5 = fmul double undef, 4.000000e+00
+  %mul7 = fmul double undef, %mul5
+  %conv = sitofp i32 %m_Bits to double
+  %mul8 = fmul double %conv, %mul7
+  %add = fadd double %mul4, %mul8
+  %cmp11 = fcmp olt double %add, 0.000000e+00
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+declare i32 @personality_v0(...)
+
+define void @invoketest() {
+entry:
+  br i1 undef, label %cond.true, label %cond.false
+
+cond.true:
+  %call49 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
+          to label %cond.true54 unwind label %lpad
+
+cond.false:
+  %call51 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %cond.false57 unwind label %lpad
+
+cond.true54:
+  %call56 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef) 
+          to label %cond.end60 unwind label %lpad
+
+cond.false57:
+  %call59 = invoke double bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to double (i8*, i8*)*)(i8* undef, i8* undef)
+          to label %cond.end60 unwind label %lpad
+
+; Make sure we don't vectorize these phis - they have invokes as inputs.
+
+; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
+
+; CHECK-LABEL: invoketest
+
+; CHECK-LABEL: cond.end60
+; CHECK-NEXT-NOT: phi <2 x double>
+; CHECK: insertelement
+; CHECK-LABEL: if.then63
+
+cond.end60:
+  %cond126 = phi double [ %call49, %cond.true54 ], [ %call51, %cond.false57 ]
+  %cond61 = phi double [ %call56, %cond.true54 ], [ %call59, %cond.false57 ]
+  br i1 undef, label %if.end98, label %if.then63
+
+if.then63:
+  %conv69 = fptrunc double undef to float
+  %conv70 = fpext float %conv69 to double
+  %div71 = fdiv double %cond126, %conv70
+  %conv78 = fptrunc double undef to float
+  %conv79 = fpext float %conv78 to double
+  %div80 = fdiv double %cond61, %conv79
+  br label %if.end98
+
+lpad:
+  %l = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %l
+
+if.end98:
+  %dimensionsResult.sroa.0.0 = phi double [ %div71, %if.then63 ], [ %cond126, %cond.end60 ]
+  %dimensionsResult.sroa.6.0 = phi double [ %div80, %if.then63 ], [ %cond61, %cond.end60 ]
+  br label %if.end99
+
+if.end99:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/phi.ll b/test/Transforms/SLPVectorizer/X86/phi.ll
new file mode 100644
index 000000000000..964e0e4efee7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -0,0 +1,248 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+;int foo(double *A, int k) {
+;  double A0;
+;  double A1;
+;  if (k) {
+;    A0 = 3;
+;    A1 = 5;
+;  } else {
+;    A0 = A[10];
+;    A1 = A[11];
+;  }
+;  A[0] = A0;
+;  A[1] = A1;
+;}
+
+
+;CHECK: i32 @foo
+;CHECK: load <2 x double>
+;CHECK: phi <2 x double>
+;CHECK: store <2 x double>
+;CHECK: ret i32 undef
+define i32 @foo(double* nocapture %A, i32 %k) {
+entry:
+  %tobool = icmp eq i32 %k, 0
+  br i1 %tobool, label %if.else, label %if.end
+
+if.else:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds double* %A, i64 10
+  %0 = load double* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double* %A, i64 11
+  %1 = load double* %arrayidx1, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.else
+  %A0.0 = phi double [ %0, %if.else ], [ 3.000000e+00, %entry ]
+  %A1.0 = phi double [ %1, %if.else ], [ 5.000000e+00, %entry ]
+  store double %A0.0, double* %A, align 8
+  %arrayidx3 = getelementptr inbounds double* %A, i64 1
+  store double %A1.0, double* %arrayidx3, align 8
+  ret i32 undef
+}
+
+
+;int foo(double * restrict B,  double * restrict A, int n, int m) {
+;  double R=A[1];
+;  double G=A[0];
+;  for (int i=0; i < 100; i++) {
+;    R += 10;
+;    G += 10;
+;    R *= 4;
+;    G *= 4;
+;    R += 4;
+;    G += 4;
+;  }
+;  B[0] = G;
+;  B[1] = R;
+;  return 0;
+;}
+
+;CHECK: foo2
+;CHECK: load <2 x double>
+;CHECK: phi <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: store <2 x double>
+;CHECK: ret
+define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) #0 {
+entry:
+  %arrayidx = getelementptr inbounds double* %A, i64 1
+  %0 = load double* %arrayidx, align 8
+  %1 = load double* %A, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.019 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %G.018 = phi double [ %1, %entry ], [ %add5, %for.body ]
+  %R.017 = phi double [ %0, %entry ], [ %add4, %for.body ]
+  %add = fadd double %R.017, 1.000000e+01
+  %add2 = fadd double %G.018, 1.000000e+01
+  %mul = fmul double %add, 4.000000e+00
+  %mul3 = fmul double %add2, 4.000000e+00
+  %add4 = fadd double %mul, 4.000000e+00
+  %add5 = fadd double %mul3, 4.000000e+00
+  %inc = add nsw i32 %i.019, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  store double %add5, double* %B, align 8
+  %arrayidx7 = getelementptr inbounds double* %B, i64 1
+  store double %add4, double* %arrayidx7, align 8
+  ret i32 0
+}
+
+; float foo3(float *A) {
+;
+;   float R = A[0];
+;   float G = A[1];
+;   float B = A[2];
+;   float Y = A[3];
+;   float P = A[4];
+;   for (int i=0; i < 121; i+=3) {
+;     R+=A[i+0]*7;
+;     G+=A[i+1]*8;
+;     B+=A[i+2]*9;
+;     Y+=A[i+3]*10;
+;     P+=A[i+4]*11;
+;   }
+;
+;   return R+G+B+Y+P;
+; }
+
+;CHECK: foo3
+;CHECK: phi <4 x float>
+;CHECK: fmul <4 x float>
+;CHECK: fadd <4 x float>
+;CHECK-NOT: phi <5 x float>
+;CHECK-NOT: fmul <5 x float>
+;CHECK-NOT: fadd <5 x float>
+
+define float @foo3(float* nocapture readonly %A) #0 {
+entry:
+  %0 = load float* %A, align 4
+  %arrayidx1 = getelementptr inbounds float* %A, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float* %A, i64 2
+  %2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %A, i64 3
+  %3 = load float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %A, i64 4
+  %4 = load float* %arrayidx4, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %P.056 = phi float [ %4, %entry ], [ %add26, %for.body ]
+  %Y.055 = phi float [ %3, %entry ], [ %add21, %for.body ]
+  %B.054 = phi float [ %2, %entry ], [ %add16, %for.body ]
+  %G.053 = phi float [ %1, %entry ], [ %add11, %for.body ]
+  %R.052 = phi float [ %0, %entry ], [ %add6, %for.body ]
+  %5 = phi float [ %1, %entry ], [ %11, %for.body ]
+  %6 = phi float [ %0, %entry ], [ %9, %for.body ]
+  %mul = fmul float %6, 7.000000e+00
+  %add6 = fadd float %R.052, %mul
+  %mul10 = fmul float %5, 8.000000e+00
+  %add11 = fadd float %G.053, %mul10
+  %7 = add nsw i64 %indvars.iv, 2
+  %arrayidx14 = getelementptr inbounds float* %A, i64 %7
+  %8 = load float* %arrayidx14, align 4
+  %mul15 = fmul float %8, 9.000000e+00
+  %add16 = fadd float %B.054, %mul15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
+  %arrayidx19 = getelementptr inbounds float* %A, i64 %indvars.iv.next
+  %9 = load float* %arrayidx19, align 4
+  %mul20 = fmul float %9, 1.000000e+01
+  %add21 = fadd float %Y.055, %mul20
+  %10 = add nsw i64 %indvars.iv, 4
+  %arrayidx24 = getelementptr inbounds float* %A, i64 %10
+  %11 = load float* %arrayidx24, align 4
+  %mul25 = fmul float %11, 1.100000e+01
+  %add26 = fadd float %P.056, %mul25
+  %12 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %12, 121
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add28 = fadd float %add6, %add11
+  %add29 = fadd float %add28, %add16
+  %add30 = fadd float %add29, %add21
+  %add31 = fadd float %add30, %add26
+  ret float %add31
+}
+
+; Make sure the order of phi nodes of different types does not prevent
+; vectorization of same typed phi nodes.
+; CHECK-LABEL: sort_phi_type
+; CHECK: phi <4 x float>
+; CHECK: fmul <4 x float>
+
+define float @sort_phi_type(float* nocapture readonly %A) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %Y = phi float [ 1.000000e+01, %entry ], [ %mul10, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %B = phi float [ 1.000000e+01, %entry ], [ %mul15, %for.body ]
+  %G = phi float [ 1.000000e+01, %entry ], [ %mul20, %for.body ]
+  %R = phi float [ 1.000000e+01, %entry ], [ %mul25, %for.body ]
+  %mul10 = fmul float %Y, 8.000000e+00
+  %mul15 = fmul float %B, 9.000000e+00
+  %mul20 = fmul float %R, 10.000000e+01
+  %mul25 = fmul float %G, 11.100000e+01
+  %indvars.iv.next = add nsw i64 %indvars.iv, 4
+  %cmp = icmp slt i64 %indvars.iv.next, 128
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add28 = fadd float 1.000000e+01, %mul10
+  %add29 = fadd float %mul10, %mul15
+  %add30 = fadd float %add29, %mul20
+  %add31 = fadd float %add30, %mul25
+  ret float %add31
+}
+
+define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
+; CHECK-LABEL: @test(
+;
+; Test that we correctly recognize the discontiguous memory in arrays where the
+; size is less than the alignment, and through various different GEP formations.
+;
+; We disable the vectorization of x86_fp80 for now. 
+
+entry:
+  %i1.0 = load x86_fp80* %i1, align 16
+  %i1.gep1 = getelementptr x86_fp80* %i1, i64 1
+  %i1.1 = load x86_fp80* %i1.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK-NOT: insertelement <2 x x86_fp80>
+; CHECK_NOT: insertelement <2 x x86_fp80>
+  br i1 undef, label %then, label %end
+
+then:
+  %i2.gep0 = getelementptr inbounds x86_fp80* %i2, i64 0
+  %i2.0 = load x86_fp80* %i2.gep0, align 16
+  %i2.gep1 = getelementptr inbounds x86_fp80* %i2, i64 1
+  %i2.1 = load x86_fp80* %i2.gep1, align 16
+; CHECK: load x86_fp80*
+; CHECK: load x86_fp80*
+; CHECK-NOT: insertelement <2 x x86_fp80>
+; CHECK-NOT: insertelement <2 x x86_fp80>
+  br label %end
+
+end:
+  %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
+  %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
+; CHECK-NOT: phi <2 x x86_fp80>
+; CHECK-NOT: extractelement <2 x x86_fp80>
+; CHECK-NOT: extractelement <2 x x86_fp80>
+  store x86_fp80 %phi0, x86_fp80* %o, align 16
+  %o.gep1 = getelementptr inbounds x86_fp80* %o, i64 1
+  store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/phi3.ll b/test/Transforms/SLPVectorizer/X86/phi3.ll
new file mode 100644
index 000000000000..fd8d36137201
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi3.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.GPar.0.16.26 = type { [0 x double], double }
+
+@d = external global double, align 8
+
+declare %struct.GPar.0.16.26* @Rf_gpptr(...)
+
+define void @Rf_GReset() {
+entry:
+  %sub = fsub double -0.000000e+00, undef
+  %0 = load double* @d, align 8
+  %sub1 = fsub double -0.000000e+00, %0
+  br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
+
+if.then:                                          ; preds = %entry
+  %sub2 = fsub double %sub, undef
+  %div.i = fdiv double %sub2, undef
+  %sub4 = fsub double %sub1, undef
+  %div.i16 = fdiv double %sub4, undef
+  %cmp = fcmp ogt double %div.i, %div.i16
+  br i1 %cmp, label %if.then6, label %if.end7
+
+if.then6:                                         ; preds = %if.then
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.then6, %if.then, %entry
+  %g.0 = phi double [ 0.000000e+00, %if.then6 ], [ %sub, %if.then ], [ %sub, %entry ]
+  ret void
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
new file mode 100644
index 000000000000..6d2d5e3540c7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -disable-output
+
+target datalayout = "f64:64:64-v64:64:64"
+
+define void @test_phi_in_landingpad() {
+entry:
+  invoke void @foo()
+          to label %inner unwind label %lpad
+
+inner:
+  %x0 = fsub double undef, undef
+  %y0 = fsub double undef, undef
+  invoke void @foo()
+          to label %done unwind label %lpad
+
+lpad:
+  %x1 = phi double [ undef, %entry ], [ undef, %inner ]
+  %y1 = phi double [ undef, %entry ], [ undef, %inner ]
+  landingpad { i8*, i32 } personality i8*
+          bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* null
+  br label %done
+
+done:
+  phi double [ %x0, %inner ], [ %x1, %lpad ]
+  phi double [ %y0, %inner ], [ %y1, %lpad ]
+  ret void
+}
+
+declare void @foo()
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
new file mode 100644
index 000000000000..520e6729de0c
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+; We purposely over-align f64 to 128bit here. 
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:128:128-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+
+define void @test(double* %i1, double* %i2, double* %o) {
+; CHECK-LABEL: @test(
+;
+; Test that we correctly recognize the discontiguous memory in arrays where the
+; size is less than the alignment, and through various different GEP formations.
+
+entry:
+  %i1.0 = load double* %i1, align 16
+  %i1.gep1 = getelementptr double* %i1, i64 1
+  %i1.1 = load double* %i1.gep1, align 16
+; CHECK: load double*
+; CHECK: load double*
+; CHECK: insertelement <2 x double>
+; CHECK: insertelement <2 x double>
+  br i1 undef, label %then, label %end
+
+then:
+  %i2.gep0 = getelementptr inbounds double* %i2, i64 0
+  %i2.0 = load double* %i2.gep0, align 16
+  %i2.gep1 = getelementptr inbounds double* %i2, i64 1
+  %i2.1 = load double* %i2.gep1, align 16
+; CHECK: load double*
+; CHECK: load double*
+; CHECK: insertelement <2 x double>
+; CHECK: insertelement <2 x double>
+  br label %end
+
+end:
+  %phi0 = phi double [ %i1.0, %entry ], [ %i2.0, %then ]
+  %phi1 = phi double [ %i1.1, %entry ], [ %i2.1, %then ]
+; CHECK: phi <2 x double>
+; CHECK: extractelement <2 x double>
+; CHECK: extractelement <2 x double>
+  store double %phi0, double* %o, align 16
+  %o.gep1 = getelementptr inbounds double* %o, i64 1
+  store double %phi1, double* %o.gep1, align 16
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/pr16571.ll b/test/Transforms/SLPVectorizer/X86/pr16571.ll
new file mode 100644
index 000000000000..13d82149c0c5
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr16571.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -slp-vectorizer -S -mtriple=i686-pc-win32 -mcpu=corei7-avx
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+define hidden fastcc void @"System.PrimitiveTypesParser.TryParseIEEE754<char>(char*,uint,double&)"() unnamed_addr {
+"@0":
+  br i1 undef, label %"@38.lr.ph", label %"@37"
+
+"@37":                                            ; preds = %"@38.lr.ph", %"@44", %"@0"
+  ret void
+
+"@44":                                            ; preds = %"@38.lr.ph"
+  %0 = add i64 undef, undef
+  %1 = add i32 %mainPartDigits.loc.0.ph45, 1
+  br i1 undef, label %"@38.lr.ph", label %"@37"
+
+"@38.lr.ph":                                      ; preds = %"@44", %"@0"
+  %mainDoublePart.loc.0.ph46 = phi i64 [ %0, %"@44" ], [ 0, %"@0" ]
+  %mainPartDigits.loc.0.ph45 = phi i32 [ %1, %"@44" ], [ 0, %"@0" ]
+  br i1 undef, label %"@44", label %"@37"
+}
diff --git a/test/Transforms/SLPVectorizer/X86/pr16628.ll b/test/Transforms/SLPVectorizer/X86/pr16628.ll
new file mode 100644
index 000000000000..3f9d775eeeb6
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr16628.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@c = common global i32 0, align 4
+@a = common global i16 0, align 2
+@b = common global i16 0, align 2
+
+; Function Attrs: nounwind ssp uwtable
+define void @f() {
+entry:
+  %call = tail call i32 (...)* @g()
+  %0 = load i32* @c, align 4
+  %lnot = icmp eq i32 %0, 0
+  %lnot.ext = zext i1 %lnot to i32
+  %1 = load i16* @a, align 2
+  %lnot2 = icmp eq i16 %1, 0
+  %lnot.ext3 = zext i1 %lnot2 to i32
+  %or = or i32 %lnot.ext3, %lnot.ext
+  %cmp = icmp eq i32 %call, %or
+  %conv4 = zext i1 %cmp to i16
+  store i16 %conv4, i16* @b, align 2
+  ret void
+}
+
+declare i32 @g(...)
diff --git a/test/Transforms/SLPVectorizer/X86/pr16899.ll b/test/Transforms/SLPVectorizer/X86/pr16899.ll
new file mode 100644
index 000000000000..8631bc9125df
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s  -slp-vectorizer -S -mtriple=i386--netbsd -mcpu=i486
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386--netbsd"
+
+@a = common global i32* null, align 4
+
+; Function Attrs: noreturn nounwind readonly
+define i32 @fn1() #0 {
+entry:
+  %0 = load i32** @a, align 4, !tbaa !4
+  %1 = load i32* %0, align 4, !tbaa !5
+  %arrayidx1 = getelementptr inbounds i32* %0, i32 1
+  %2 = load i32* %arrayidx1, align 4, !tbaa !5
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %c.0 = phi i32 [ %2, %entry ], [ %add2, %do.body ]
+  %b.0 = phi i32 [ %1, %entry ], [ %add, %do.body ]
+  %add = add nsw i32 %b.0, %c.0
+  %add2 = add nsw i32 %add, 1
+  br label %do.body
+}
+
+attributes #0 = { noreturn nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{metadata !0, metadata !0, i64 0}
+!5 = metadata !{metadata !3, metadata !3, i64 0}
diff --git a/test/Transforms/SLPVectorizer/X86/pr18060.ll b/test/Transforms/SLPVectorizer/X86/pr18060.ll
new file mode 100644
index 000000000000..e6813f3b315d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr18060.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux"
+
+; Function Attrs: nounwind
+define i32 @_Z16adjustFixupValueyj(i64 %Value, i32 %Kind) {
+entry:
+  %extract.t = trunc i64 %Value to i32
+  %extract = lshr i64 %Value, 12
+  %extract.t6 = trunc i64 %extract to i32
+  switch i32 %Kind, label %sw.default [
+    i32 0, label %return
+    i32 1, label %return
+    i32 129, label %sw.bb1
+    i32 130, label %sw.bb2
+  ]
+
+sw.default:                                       ; preds = %entry
+  call void @_Z25llvm_unreachable_internalv()
+  unreachable
+
+sw.bb1:                                           ; preds = %entry
+  %shr = lshr i64 %Value, 16
+  %extract.t5 = trunc i64 %shr to i32
+  %extract7 = lshr i64 %Value, 28
+  %extract.t8 = trunc i64 %extract7 to i32
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %sw.bb1, %entry
+  %Value.addr.0.off0 = phi i32 [ %extract.t, %entry ], [ %extract.t5, %sw.bb1 ]
+  %Value.addr.0.off12 = phi i32 [ %extract.t6, %entry ], [ %extract.t8, %sw.bb1 ]
+  %conv6 = and i32 %Value.addr.0.off0, 4095
+  %conv4 = shl i32 %Value.addr.0.off12, 16
+  %shl = and i32 %conv4, 983040
+  %or = or i32 %shl, %conv6
+  %or11 = or i32 %or, 8388608
+  br label %return
+
+return:                                           ; preds = %sw.bb2, %entry, %entry
+  %retval.0 = phi i32 [ %or11, %sw.bb2 ], [ %extract.t, %entry ], [ %extract.t, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: noreturn
+declare void @_Z25llvm_unreachable_internalv()
+
diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll
index 7aa7d7e243d0..f21e86c5646c 100644
--- a/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.8.0"
 
-;CHECK: @foo
+;CHECK-LABEL: @foo(
 ;CHECK: load <2 x double>
 ;CHECK: ret
 define double @foo(double* nocapture %D) {
@@ -16,11 +16,13 @@ define double @foo(double* nocapture %D) {
   %3 = getelementptr inbounds double* %D, i32 %2
   %4 = load double* %3, align 4
   %A4 = fmul double %4, %4
+  %A42 = fmul double %A4, %A4
   %5 = or i32 %2, 1
   %6 = getelementptr inbounds double* %D, i32 %5
   %7 = load double* %6, align 4
   %A7 = fmul double %7, %7
-  %8 = fadd double %A4, %A7
+  %A72 = fmul double %A7, %A7
+  %8 = fadd double %A42, %A72
   %9 = fadd double %sum.01, %8
   %10 = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %10, 100
diff --git a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
new file mode 100644
index 000000000000..6aea5d3c6f6b
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -0,0 +1,76 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+; We disable the vectorization of <3 x float> for now
+
+; float foo(float *A) {
+;
+;   float R = A[0];
+;   float G = A[1];
+;   float B = A[2];
+;   for (int i=0; i < 121; i+=3) {
+;     R+=A[i+0]*7;
+;     G+=A[i+1]*8;
+;     B+=A[i+2]*9;
+;   }
+;
+;   return R+G+B;
+; }
+
+;CHECK-LABEL: @foo(
+;CHECK: br
+;CHECK-NOT: phi <3 x float>
+;CHECK-NOT: fmul <3 x float>
+;CHECK-NOT: fadd <3 x float>
+; At the moment we don't sink extractelements.
+;CHECK: br
+;CHECK-NOT: extractelement
+;CHECK-NOT: extractelement
+;CHECK-NOT: extractelement
+;CHECK: ret
+
+define float @foo(float* nocapture readonly %A) {
+entry:
+  %0 = load float* %A, align 4
+  %arrayidx1 = getelementptr inbounds float* %A, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float* %A, i64 2
+  %2 = load float* %arrayidx2, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
+  %3 = phi float [ %0, %entry ], [ %.pre, %for.body.for.body_crit_edge ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
+  %B.032 = phi float [ %2, %entry ], [ %add14, %for.body.for.body_crit_edge ]
+  %G.031 = phi float [ %1, %entry ], [ %add9, %for.body.for.body_crit_edge ]
+  %R.030 = phi float [ %0, %entry ], [ %add4, %for.body.for.body_crit_edge ]
+  %mul = fmul float %3, 7.000000e+00
+  %add4 = fadd float %R.030, %mul
+  %4 = add nsw i64 %indvars.iv, 1
+  %arrayidx7 = getelementptr inbounds float* %A, i64 %4
+  %5 = load float* %arrayidx7, align 4
+  %mul8 = fmul float %5, 8.000000e+00
+  %add9 = fadd float %G.031, %mul8
+  %6 = add nsw i64 %indvars.iv, 2
+  %arrayidx12 = getelementptr inbounds float* %A, i64 %6
+  %7 = load float* %arrayidx12, align 4
+  %mul13 = fmul float %7, 9.000000e+00
+  %add14 = fadd float %B.032, %mul13
+  %indvars.iv.next = add i64 %indvars.iv, 3
+  %8 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %8, 121
+  br i1 %cmp, label %for.body.for.body_crit_edge, label %for.end
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %arrayidx3.phi.trans.insert = getelementptr inbounds float* %A, i64 %indvars.iv.next
+  %.pre = load float* %arrayidx3.phi.trans.insert, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %for.body
+  %add16 = fadd float %add4, %add9
+  %add17 = fadd float %add16, %add14
+  ret float %add17
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll
index b520913a398d..46263416a90b 100644
--- a/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -43,3 +43,19 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a,
   ret void
 }
 
+; Make sure we don't crash on this one.
+define void @SAXPY_crash(i32* noalias nocapture %x, i32* noalias nocapture %y, i64 %i) {
+  %1 = add i64 %i, 1
+  %2 = getelementptr inbounds i32* %x, i64 %1
+  %3 = getelementptr inbounds i32* %y, i64 %1
+  %4 = load i32* %3, align 4
+  %5 = add nsw i32 undef, %4
+  store i32 %5, i32* %2, align 4
+  %6 = add i64 %i, 2
+  %7 = getelementptr inbounds i32* %x, i64 %6
+  %8 = getelementptr inbounds i32* %y, i64 %6
+  %9 = load i32* %8, align 4
+  %10 = add nsw i32 undef, %9
+  store i32 %10, i32* %7, align 4
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/simplebb.ll b/test/Transforms/SLPVectorizer/X86/simplebb.ll
index cd0b99e64677..7d682e5e4676 100644
--- a/test/Transforms/SLPVectorizer/X86/simplebb.ll
+++ b/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -23,3 +23,67 @@ entry:
   ret void
 }
 
+; Simple 3-pair chain with loads and stores, obfuscated with bitcasts
+; CHECK: test2
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test2(double* %a, double* %b, i8* %e) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %c = bitcast i8* %e to double*
+  store double %mul, double* %c, align 8
+  %carrayidx5 = getelementptr inbounds i8* %e, i64 8
+  %arrayidx5 = bitcast i8* %carrayidx5 to double*
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+; Don't vectorize volatile loads.
+; CHECK: test_volatile_load
+; CHECK-NOT: load <2 x double>
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test_volatile_load(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load volatile double* %a, align 8
+  %i1 = load volatile double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+; Don't vectorize volatile stores.
+; CHECK: test_volatile_store
+; CHECK-NOT: store <2 x double>
+; CHECK: ret
+define void @test_volatile_store(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store volatile double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store volatile double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
new file mode 100644
index 000000000000..2747a1f48997
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -0,0 +1,140 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
+
+
+; CHECK: tiny_tree_fully_vectorizable
+; CHECK: load <2 x double>
+; CHECK: store <2 x double>
+; CHECK: ret 
+
+define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp12 = icmp eq i64 %count, 0
+  br i1 %cmp12, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double* %src.addr.013, align 8
+  store double %0, double* %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1
+  store double %1, double* %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
+  %inc = add i64 %i.015, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK: tiny_tree_fully_vectorizable2
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+; CHECK: ret
+
+define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp20 = icmp eq i64 %count, 0
+  br i1 %cmp20, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float* %src.addr.021, align 4
+  store float %0, float* %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 1
+  %1 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
+  store float %1, float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
+  store float %2, float* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
+  %3 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
+  store float %3, float* %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
+  %inc = add i64 %i.023, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; We do not vectorize the tiny tree which is not fully vectorizable. 
+; CHECK: tiny_tree_not_fully_vectorizable
+; CHECK-NOT: <2 x double>
+; CHECK: ret 
+
+define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp12 = icmp eq i64 %count, 0
+  br i1 %cmp12, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
+  %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load double* %src.addr.013, align 8
+  store double %0, double* %dst.addr.014, align 8
+  %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 2
+  %1 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1 
+  store double %1, double* %arrayidx3, align 8
+  %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
+  %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
+  %inc = add i64 %i.015, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+; CHECK: tiny_tree_not_fully_vectorizable2
+; CHECK-NOT: <2 x double>
+; CHECK: ret
+
+define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
+entry:
+  %cmp20 = icmp eq i64 %count, 0
+  br i1 %cmp20, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
+  %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
+  %0 = load float* %src.addr.021, align 4
+  store float %0, float* %dst.addr.022, align 4
+  %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 4 
+  %1 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
+  store float %1, float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
+  %2 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
+  store float %2, float* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
+  %3 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
+  store float %3, float* %arrayidx7, align 4
+  %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
+  %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
+  %inc = add i64 %i.023, 1
+  %exitcond = icmp eq i64 %inc, %count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/XCore/lit.local.cfg b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
new file mode 100644
index 000000000000..4d17d4642045
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/XCore/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
new file mode 100644
index 000000000000..66392e74cb31
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=xcore  | FileCheck %s
+
+target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
+target triple = "xcore"
+
+; Simple 3-pair chain with loads and stores
+; CHECK: test1
+; CHECK-NOT: <2 x double>
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/lit.local.cfg b/test/Transforms/SLPVectorizer/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/SLPVectorizer/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll
index ad5fb6c4a5d8..5fa78766ed0f 100644
--- a/test/Transforms/SROA/alignment.ll
+++ b/test/Transforms/SROA/alignment.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
 define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 0
 ; CHECK: %[[a0:.*]] = load i8* %[[gep_a0]], align 16
 ; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 1
@@ -29,7 +29,7 @@ entry:
 }
 
 define void @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: alloca i16
 ; CHECK: load i8* %{{.*}}
 ; CHECK: store i8 42, i8* %{{.*}}
@@ -48,7 +48,7 @@ entry:
 
 define void @PR13920(<2 x i64>* %a, i16* %b) {
 ; Test that alignments on memcpy intrinsics get propagated to loads and stores.
-; CHECK: @PR13920
+; CHECK-LABEL: @PR13920(
 ; CHECK: load <2 x i64>* %a, align 2
 ; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
 ; CHECK: ret void
@@ -68,7 +68,7 @@ define void @test3(i8* %x) {
 ; provide the needed explicit alignment that code using the alloca may be
 ; expecting. However, also check that any offset within an alloca can in turn
 ; reduce the alignment.
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: alloca [22 x i8], align 8
 ; CHECK: alloca [18 x i8], align 2
 ; CHECK: ret void
@@ -86,7 +86,7 @@ entry:
 
 define void @test5() {
 ; Test that we preserve underaligned loads and stores when splitting.
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: alloca [9 x i8]
 ; CHECK: alloca [9 x i8]
 ; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1
@@ -119,7 +119,7 @@ entry:
 define void @test6() {
 ; Test that we promote alignment when the underlying alloca switches to one
 ; that innately provides it.
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: alloca double
 ; CHECK: alloca double
 ; CHECK-NOT: align
@@ -142,7 +142,7 @@ entry:
 define void @test7(i8* %out) {
 ; Test that we properly compute the destination alignment when rewriting
 ; memcpys as direct loads or stores.
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NOT: alloca
 
 entry:
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 834032246f9a..5d3e4b5d8b2c 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -7,7 +7,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture)
 declare void @llvm.lifetime.end(i64, i8* nocapture)
 
 define i32 @test0() {
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK-NOT: alloca
 ; CHECK: ret i32
 
@@ -37,7 +37,7 @@ entry:
 }
 
 define i32 @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
 ; CHECK: ret i32 0
 
@@ -50,7 +50,7 @@ entry:
 }
 
 define i64 @test2(i64 %X) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: alloca
 ; CHECK: ret i64 %X
 
@@ -66,7 +66,7 @@ L2:
 }
 
 define void @test3(i8* %dst, i8* %src) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 
 entry:
   %a = alloca [300 x i8]
@@ -302,7 +302,7 @@ entry:
 }
 
 define void @test4(i8* %dst, i8* %src) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 
 entry:
   %a = alloca [100 x i8]
@@ -408,7 +408,7 @@ declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define i16 @test5() {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NOT: alloca float
 ; CHECK:      %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
 ; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
@@ -426,7 +426,7 @@ entry:
 }
 
 define i32 @test6() {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: alloca i32
 ; CHECK-NEXT: store volatile i32
 ; CHECK-NEXT: load i32*
@@ -442,7 +442,7 @@ entry:
 }
 
 define void @test7(i8* %src, i8* %dst) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK: alloca i32
 ; CHECK-NEXT: bitcast i8* %src to i32*
 ; CHECK-NEXT: load volatile i32*
@@ -465,7 +465,7 @@ entry:
 %S2 = type { %S1*, %S2* }
 
 define %S2 @test8(%S2* %s2) {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 entry:
   %new = alloca %S2
 ; CHECK-NOT: alloca
@@ -503,7 +503,7 @@ define i64 @test9() {
 ; weird bit casts and types. This is valid IR due to the alignment and masking
 ; off the bits past the end of the alloca.
 ;
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NOT: alloca
 ; CHECK:      %[[b2:.*]] = zext i8 26 to i64
 ; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16
@@ -535,7 +535,7 @@ entry:
 }
 
 define %S2* @test10() {
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK-NOT: alloca %S2*
 ; CHECK: ret %S2* null
 
@@ -549,7 +549,7 @@ entry:
 }
 
 define i32 @test11() {
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK-NOT: alloca
 ; CHECK: ret i32 0
 
@@ -574,7 +574,7 @@ define i8 @test12() {
 ; We fully promote these to the i24 load or store size, resulting in just masks
 ; and other operations that instcombine will fold, but no alloca.
 ;
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 
 entry:
   %a = alloca [3 x i8]
@@ -630,7 +630,7 @@ entry:
 define i32 @test13() {
 ; Ensure we don't crash and handle undefined loads that straddle the end of the
 ; allocation.
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK:      %[[value:.*]] = zext i8 0 to i16
 ; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32
 ; CHECK-NEXT: ret i32 %[[ret]]
@@ -657,7 +657,7 @@ define void @test14(...) nounwind uwtable {
 ; also gain enough data to prove they must be dead allocas due to GEPs that walk
 ; across two adjacent allocas. Test that we don't try to promote or otherwise
 ; do bad things to these dead allocas, they should just be removed.
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: ret void
 
@@ -688,7 +688,7 @@ define i32 @test15(i1 %flag) nounwind uwtable {
 ; Ensure that when there are dead instructions using an alloca that are not
 ; loads or stores we still delete them during partitioning and rewriting.
 ; Otherwise we'll go to promote them while thy still have unpromotable uses.
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ; CHECK:      loop:
@@ -731,7 +731,7 @@ loop:
 
 define void @test16(i8* %src, i8* %dst) {
 ; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value.
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK-NOT: alloca
 ; CHECK:      %[[srccast:.*]] = bitcast i8* %src to i24*
 ; CHECK-NEXT: load i24* %[[srccast]]
@@ -752,7 +752,7 @@ entry:
 define void @test17(i8* %src, i8* %dst) {
 ; Ensure that we can rewrite unpromotable memcpys which extend past the end of
 ; the alloca.
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK:      %[[a:.*]] = alloca [3 x i8]
 ; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8]* %[[a]], i32 0, i32 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src,
@@ -771,7 +771,7 @@ define void @test18(i8* %src, i8* %dst, i32 %size) {
 ; Preserve transfer instrinsics with a variable size, even if they overlap with
 ; fixed size operations. Further, continue to split and promote allocas preceding
 ; the variable sized intrinsic.
-; CHECK: @test18
+; CHECK-LABEL: @test18(
 ; CHECK:      %[[a:.*]] = alloca [34 x i8]
 ; CHECK:      %[[srcgep1:.*]] = getelementptr inbounds i8* %src, i64 4
 ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
@@ -810,7 +810,7 @@ define i32 @test19(%opaque* %x) {
 ; pointers in such a way that we try to GEP through the opaque type. Previously,
 ; a check for an unsized type was missing and this crashed. Ensure it behaves
 ; reasonably now.
-; CHECK: @test19
+; CHECK-LABEL: @test19(
 ; CHECK-NOT: alloca
 ; CHECK: ret i32 undef
 
@@ -827,7 +827,7 @@ entry:
 define i32 @test20() {
 ; Ensure we can track negative offsets (before the beginning of the alloca) and
 ; negative relative offsets from offsets starting past the end of the alloca.
-; CHECK: @test20
+; CHECK-LABEL: @test20(
 ; CHECK-NOT: alloca
 ; CHECK: %[[sum1:.*]] = add i32 1, 2
 ; CHECK: %[[sum2:.*]] = add i32 %[[sum1]], 3
@@ -858,7 +858,7 @@ define i8 @test21() {
 ; Test allocations and offsets which border on overflow of the int64_t used
 ; internally. This is really awkward to really test as LLVM doesn't really
 ; support such extreme constructs cleanly.
-; CHECK: @test21
+; CHECK-LABEL: @test21(
 ; CHECK-NOT: alloca
 ; CHECK: or i8 -1, -1
 
@@ -926,7 +926,7 @@ define void @PR13990() {
 ; Ensure we can handle cases where processing one alloca causes the other
 ; alloca to become dead and get deleted. This might crash or fail under
 ; Valgrind if we regress.
-; CHECK: @PR13990
+; CHECK-LABEL: @PR13990(
 ; CHECK-NOT: alloca
 ; CHECK: unreachable
 ; CHECK: unreachable
@@ -955,7 +955,7 @@ define double @PR13969(double %x) {
 ; Check that we detect when promotion will un-escape an alloca and iterate to
 ; re-try running SROA over that alloca. Without that, the two allocas that are
 ; stored into a dead alloca don't get rewritten and promoted.
-; CHECK: @PR13969
+; CHECK-LABEL: @PR13969(
 
 entry:
   %a = alloca double
@@ -982,7 +982,7 @@ define void @PR14034() {
 ; This test case tries to form GEPs into the empty leading struct members, and
 ; subsequently crashed (under valgrind) before we fixed the PR. The important
 ; thing is to handle empty structs gracefully.
-; CHECK: @PR14034
+; CHECK-LABEL: @PR14034(
 
 entry:
   %a = alloca %PR14034.struct
@@ -998,7 +998,7 @@ entry:
 define i32 @test22(i32 %x) {
 ; Test that SROA and promotion is not confused by a grab bax mixture of pointer
 ; types involving wrapper aggregates and zero-length aggregate members.
-; CHECK: @test22
+; CHECK-LABEL: @test22(
 
 entry:
   %a1 = alloca { { [1 x { i32 }] } }
@@ -1134,7 +1134,7 @@ define void @PR14105({ [16 x i8] }* %ptr) {
 ; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
 ; sign as negative. We use a volatile memcpy to ensure promotion never actually
 ; occurs.
-; CHECK: @PR14105
+; CHECK-LABEL: @PR14105(
 
 entry:
   %a = alloca { [16 x i8] }, align 8
@@ -1153,7 +1153,7 @@ entry:
 define void @PR14465() {
 ; Ensure that we don't crash when analyzing a alloca larger than the maximum
 ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1).
-; CHECK: @PR14465
+; CHECK-LABEL: @PR14465(
 
   %stack = alloca [1048576 x i32], align 16
 ; CHECK: alloca [1048576 x i32]
@@ -1170,7 +1170,7 @@ define void @PR14548(i1 %x) {
 ; iteratively.
 ; Note that we don't do a particularly good *job* of handling these mixtures,
 ; but the hope is that this is very rare.
-; CHECK: @PR14548
+; CHECK-LABEL: @PR14548(
 
 entry:
   %a = alloca <{ i1 }>, align 8
@@ -1181,7 +1181,6 @@ entry:
   store i1 %x, i1* %b.i1, align 8
   %b.i8 = bitcast <{ i1 }>* %b to i8*
   %foo = load i8* %b.i8, align 1
-; CHECK-NEXT: {{.*}} = zext i1 %x to i8
 ; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
 ; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
 ; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
@@ -1232,7 +1231,7 @@ entry:
 define i32 @PR14601(i32 %x) {
 ; Don't try to form a promotable integer alloca when there is a variable length
 ; memory intrinsic.
-; CHECK: @PR14601
+; CHECK-LABEL: @PR14601(
 
 entry:
   %a = alloca i32
@@ -1250,7 +1249,7 @@ define void @PR15674(i8* %data, i8* %src, i32 %size) {
 ; beginning of the array. Ensure that the final integer store, despite being
 ; convertable to the integer type that we end up promoting this alloca toward,
 ; doesn't get widened to a full alloca store.
-; CHECK: @PR15674
+; CHECK-LABEL: @PR15674(
 
 entry:
   %tmp = alloca [4 x i8], align 1
@@ -1307,8 +1306,8 @@ end:
 }
 
 define void @PR15805(i1 %a, i1 %b) {
-; CHECK: @PR15805
-; CHECK: select i1 undef, i64* %c, i64* %c
+; CHECK-LABEL: @PR15805(
+; CHECK-NOT: alloca
 ; CHECK: ret void
 
   %c = alloca i64, align 8
@@ -1317,3 +1316,43 @@ define void @PR15805(i1 %a, i1 %b) {
   %cond = load i64* %cond.in, align 8
   ret void
 }
+
+define void @PR16651.1(i8* %a) {
+; This test case caused a crash due to the volatile memcpy in combination with
+; lowering to integer loads and stores of a width other than that of the original
+; memcpy.
+;
+; CHECK-LABEL: @PR16651.1(
+; CHECK: alloca i16
+; CHECK: alloca i8
+; CHECK: alloca i8
+; CHECK: unreachable
+
+entry:
+  %b = alloca i32, align 4
+  %b.cast = bitcast i32* %b to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i32 4, i1 true)
+  %b.gep = getelementptr inbounds i8* %b.cast, i32 2
+  load i8* %b.gep, align 2
+  unreachable
+}
+
+define void @PR16651.2() {
+; This test case caused a crash due to failing to promote given a select that
+; can't be speculated. It shouldn't be promoted, but we missed that fact when
+; analyzing whether we could form a vector promotion because that code didn't
+; bail on select instructions.
+;
+; CHECK-LABEL: @PR16651.2(
+; CHECK: alloca <2 x float>
+; CHECK: ret void
+
+entry:
+  %tv1 = alloca { <2 x float>, <2 x float> }, align 8
+  %0 = getelementptr { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1
+  store <2 x float> undef, <2 x float>* %0, align 8
+  %1 = getelementptr inbounds { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1, i64 0
+  %cond105.in.i.i = select i1 undef, float* null, float* %1
+  %cond105.i.i = load float* %cond105.in.i.i, align 8
+  ret void
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 64a0cc743974..9e87a9f073c5 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -9,7 +9,7 @@ define i8 @test1() {
 ; the same as test12 in basictest.ll, but here we assert big-endian byte
 ; ordering.
 ;
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 
 entry:
   %a = alloca [3 x i8]
@@ -66,7 +66,7 @@ define i64 @test2() {
 ; Test for various mixed sizes of integer loads and stores all getting
 ; promoted.
 ;
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 
 entry:
   %a = alloca [7 x i8]
diff --git a/test/Transforms/SROA/fca.ll b/test/Transforms/SROA/fca.ll
index c30a5cc974fc..e8b4c6c13c88 100644
--- a/test/Transforms/SROA/fca.ll
+++ b/test/Transforms/SROA/fca.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define { i32, i32 } @test0(i32 %x, i32 %y) {
-; CHECK: @test0
+; CHECK-LABEL: @test0(
 ; CHECK-NOT: alloca
 ; CHECK: insertvalue { i32, i32 }
 ; CHECK: insertvalue { i32, i32 }
@@ -27,7 +27,7 @@ define { i32, i32 } @test1(i32 %x, i32 %y) {
 ; FIXME: This may be too conservative. Duncan argues that we are allowed to
 ; split the volatile load and store here but must produce volatile scalar loads
 ; and stores from them.
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: alloca
 ; CHECK: alloca
 ; CHECK: load volatile { i32, i32 }*
diff --git a/test/Transforms/SROA/lit.local.cfg b/test/Transforms/SROA/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/SROA/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index b9931800e7f4..8d82964dcbd1 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define i32 @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
 	%a = alloca [2 x i32]
 ; CHECK-NOT: alloca
@@ -31,7 +31,7 @@ exit:
 }
 
 define i32 @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 entry:
 	%a = alloca [2 x i32]
 ; CHECK-NOT: alloca
@@ -54,7 +54,7 @@ entry:
 }
 
 define i32 @test3(i32 %x) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 entry:
 	%a = alloca [2 x i32]
 ; CHECK-NOT: alloca
@@ -105,7 +105,7 @@ exit:
 }
 
 define i32 @test4() {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 entry:
 	%a = alloca [2 x i32]
 ; CHECK-NOT: alloca
@@ -129,7 +129,7 @@ entry:
 }
 
 define i32 @test5(i32* %b) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 entry:
 	%a = alloca [2 x i32]
 ; CHECK-NOT: alloca
@@ -151,7 +151,7 @@ entry:
 declare void @f(i32*, i32*)
 
 define i32 @test6(i32* %b) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 entry:
 	%a = alloca [2 x i32]
   %c = alloca i32
@@ -182,7 +182,7 @@ entry:
 }
 
 define i32 @test7() {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NOT: alloca
 
 entry:
@@ -210,7 +210,7 @@ exit:
 define i32 @test8(i32 %b, i32* %ptr) {
 ; Ensure that we rewrite allocas to the used type when that use is hidden by
 ; a PHI that can be speculated.
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: load
 ; CHECK: %[[value:.*]] = load i32* %ptr
@@ -238,7 +238,7 @@ exit:
 
 define i32 @test9(i32 %b, i32* %ptr) {
 ; Same as @test8 but for a select rather than a PHI node.
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: load
 ; CHECK: %[[value:.*]] = load i32* %ptr
@@ -260,7 +260,7 @@ define float @test10(i32 %b, float* %ptr) {
 ; Don't try to promote allocas which are not elligible for it even after
 ; rewriting due to the necessity of inserting bitcasts when speculating a PHI
 ; node.
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK: %[[alloca:.*]] = alloca
 ; CHECK: %[[argvalue:.*]] = load float* %ptr
 ; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
@@ -289,7 +289,7 @@ exit:
 
 define float @test11(i32 %b, float* %ptr) {
 ; Same as @test10 but for a select rather than a PHI node.
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK: %[[alloca:.*]] = alloca
 ; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
 ; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
@@ -311,7 +311,7 @@ entry:
 define i32 @test12(i32 %x, i32* %p) {
 ; Ensure we don't crash or fail to nuke dead selects of allocas if no load is
 ; never found.
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: select
 ; CHECK: ret i32 %x
@@ -327,7 +327,7 @@ entry:
 define i32 @test13(i32 %x, i32* %p) {
 ; Ensure we don't crash or fail to nuke dead phis of allocas if no load is ever
 ; found.
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: phi
 ; CHECK: ret i32 %x
@@ -346,10 +346,47 @@ exit:
   ret i32 %load
 }
 
+define i32 @test14(i1 %b1, i1 %b2, i32* %ptr) {
+; Check for problems when there are both selects and phis and one is
+; speculatable toward promotion but the other is not. That should block all of
+; the speculation.
+; CHECK-LABEL: @test14(
+; CHECK: alloca
+; CHECK: alloca
+; CHECK: select
+; CHECK: phi
+; CHECK: phi
+; CHECK: select
+; CHECK: ret i32
+
+entry:
+  %f = alloca i32
+  %g = alloca i32
+  store i32 0, i32* %f
+  store i32 0, i32* %g
+  %f.select = select i1 %b1, i32* %f, i32* %ptr
+  br i1 %b2, label %then, label %else
+
+then:
+  br label %exit
+
+else:
+  br label %exit
+
+exit:
+  %f.phi = phi i32* [ %f, %then ], [ %f.select, %else ]
+  %g.phi = phi i32* [ %g, %then ], [ %ptr, %else ]
+  %f.loaded = load i32* %f.phi
+  %g.select = select i1 %b1, i32* %g, i32* %g.phi
+  %g.loaded = load i32* %g.select
+  %result = add i32 %f.loaded, %g.loaded
+  ret i32 %result
+}
+
 define i32 @PR13905() {
 ; Check a pattern where we have a chain of dead phi nodes to ensure they are
 ; deleted and promotion can proceed.
-; CHECK: @PR13905
+; CHECK-LABEL: @PR13905(
 ; CHECK-NOT: alloca i32
 ; CHECK: ret i32 undef
 
@@ -374,7 +411,7 @@ define i32 @PR13906() {
 ; Another pattern which can lead to crashes due to failing to clear out dead
 ; PHI nodes or select nodes. This triggers subtly differently from the above
 ; cases because the PHI node is (recursively) alive, but the select is dead.
-; CHECK: @PR13906
+; CHECK-LABEL: @PR13906(
 ; CHECK-NOT: alloca
 
 entry:
@@ -392,7 +429,7 @@ if.then:
 }
 
 define i64 @PR14132(i1 %flag) {
-; CHECK: @PR14132
+; CHECK-LABEL: @PR14132(
 ; Here we form a PHI-node by promoting the pointer alloca first, and then in
 ; order to promote the other two allocas, we speculate the load of the
 ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
@@ -427,3 +464,40 @@ if.end:
   ret i64 %result
 ; CHECK-NEXT: ret i64 %[[result]]
 }
+
+define float @PR16687(i64 %x, i1 %flag) {
+; CHECK-LABEL: @PR16687(
+; Check that even when we try to speculate the same phi twice (in two slices)
+; on an otherwise promotable construct, we don't get ahead of ourselves and try
+; to promote one of the slices prior to speculating it.
+
+entry:
+  %a = alloca i64, align 8
+  store i64 %x, i64* %a
+  br i1 %flag, label %then, label %else
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK: %[[lo:.*]] = trunc i64 %x to i32
+; CHECK: %[[shift:.*]] = lshr i64 %x, 32
+; CHECK: %[[hi:.*]] = trunc i64 %[[shift]] to i32
+
+then:
+  %a.f = bitcast i64* %a to float*
+  br label %end
+; CHECK: %[[lo_cast:.*]] = bitcast i32 %[[lo]] to float
+
+else:
+  %a.raw = bitcast i64* %a to i8*
+  %a.raw.4 = getelementptr i8* %a.raw, i64 4
+  %a.raw.4.f = bitcast i8* %a.raw.4 to float*
+  br label %end
+; CHECK: %[[hi_cast:.*]] = bitcast i32 %[[hi]] to float
+
+end:
+  %a.phi.f = phi float* [ %a.f, %then ], [ %a.raw.4.f, %else ]
+  %f = load float* %a.phi.f
+  ret float %f
+; CHECK: %[[phi:.*]] = phi float [ %[[lo_cast]], %then ], [ %[[hi_cast]], %else ]
+; CHECK-NOT: load
+; CHECK: ret float %[[phi]]
+}
diff --git a/test/Transforms/SROA/vector-conversion.ll b/test/Transforms/SROA/vector-conversion.ll
new file mode 100644
index 000000000000..08d796087317
--- /dev/null
+++ b/test/Transforms/SROA/vector-conversion.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtoint
+  %a = alloca {<2 x i32*>, <2 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
+  %vec = load <4 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+
+  ret <4 x i64> %vec
+}
+
+define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
+; CHECK-LABEL: @vector_inttoptr
+  %a = alloca {<2 x i64>, <2 x i64>}
+; CHECK-NOT: alloca
+
+  store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
+  %vec = load <4 x i32*>* %cast
+; CHECK-NOT: load
+; CHECK: inttoptr
+
+  ret <4 x i32*> %vec
+}
+
+define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtointbitcast
+  %a = alloca {<1 x i32*>, <1 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
+  %vec = load <2 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+; CHECK: bitcast
+; CHECK: ptrtoint
+; CHECK: bitcast
+
+  ret <2 x i64> %vec
+}
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 3336515770a3..4f084214d396 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 %S1 = type { i64, [42 x float] }
 
 define i32 @test1(<4 x i32> %x, <4 x i32> %y) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
 	%a = alloca [2 x <4 x i32>]
 ; CHECK-NOT: alloca
@@ -35,7 +35,7 @@ entry:
 }
 
 define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 entry:
 	%a = alloca [2 x <4 x i32>]
 ; CHECK-NOT: alloca
@@ -69,7 +69,7 @@ entry:
 }
 
 define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 entry:
 	%a = alloca [2 x <4 x i32>]
 ; CHECK-NOT: alloca
@@ -107,7 +107,7 @@ entry:
 }
 
 define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 entry:
 	%a = alloca [2 x <4 x i32>]
 ; CHECK-NOT: alloca
@@ -151,7 +151,7 @@ entry:
 }
 
 define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; The same as the above, but with reversed source and destination for the
 ; element memcpy, and a self copy.
 entry:
@@ -199,7 +199,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; The old scalarrepl pass would wrongly drop the store to the second alloca.
 ; PR13254
   %tmp = alloca { <4 x i64>, <4 x i64> }
@@ -215,7 +215,7 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
 }
 
 define <4 x i32> @test_subvec_store() {
-; CHECK: @test_subvec_store
+; CHECK-LABEL: @test_subvec_store(
 entry:
   %a = alloca <4 x i32>
 ; CHECK-NOT: alloca
@@ -247,7 +247,7 @@ entry:
 }
 
 define <4 x i32> @test_subvec_load() {
-; CHECK: @test_subvec_load
+; CHECK-LABEL: @test_subvec_load(
 entry:
   %a = alloca <4 x i32>
 ; CHECK-NOT: alloca
@@ -282,7 +282,7 @@ entry:
 declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind
 
 define <4 x float> @test_subvec_memset() {
-; CHECK: @test_subvec_memset
+; CHECK-LABEL: @test_subvec_memset(
 entry:
   %a = alloca <4 x float>
 ; CHECK-NOT: alloca
@@ -315,7 +315,7 @@ entry:
 }
 
 define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
-; CHECK: @test_subvec_memcpy
+; CHECK-LABEL: @test_subvec_memcpy(
 entry:
   %a = alloca <4 x float>
 ; CHECK-NOT: alloca
@@ -363,7 +363,7 @@ entry:
 }
 
 define i32 @PR14212() {
-; CHECK: @PR14212
+; CHECK-LABEL: @PR14212(
 ; This caused a crash when "splitting" the load of the i32 in order to promote
 ; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case.
 entry:
diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof
new file mode 100644
index 000000000000..d19894d428ce
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/branch.prof
@@ -0,0 +1,11 @@
+symbol table
+1
+main
+main:15680:0:7
+0: 0
+4: 0
+7: 0
+9: 10226
+10: 2243
+16: 0
+18: 0
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
new file mode 100644
index 000000000000..516762763d7b
--- /dev/null
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; #include <stdio.h>
+; #include <stdlib.h>
+;
+; int main(int argc, char *argv[]) {
+;   if (argc < 2)
+;     return 1;
+;   double result;
+;   int limit = atoi(argv[1]);
+;   if (limit > 100) {
+;     double s = 23.041968;
+;     for (int u = 0; u < limit; u++) {
+;       double x = s;
+;       s = x + 3.049 + (double)u;
+;       s -= s + 3.94 / x * 0.32;
+;     }
+;     result = s;
+;   } else {
+;     result = 0;
+;   }
+;   printf("result is %lf\n", result);
+;   return 0;
+; }
+
+@.str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
+; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
+
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !13), !dbg !27
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !14), !dbg !27
+  %cmp = icmp slt i32 %argc, 2, !dbg !28
+  br i1 %cmp, label %return, label %if.end, !dbg !28
+; CHECK: edge entry -> return probability is 1 / 2 = 50%
+; CHECK: edge entry -> if.end probability is 1 / 2 = 50%
+
+if.end:                                           ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !30
+  %0 = load i8** %arrayidx, align 8, !dbg !30, !tbaa !31
+  %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !17), !dbg !30
+  %cmp1 = icmp sgt i32 %call, 100, !dbg !35
+  br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
+; CHECK: edge if.end -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
+; CHECK: edge if.end -> if.end6 probability is 1 / 2244 = 0.0445633%
+
+for.body:                                         ; preds = %if.end, %for.body
+  %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
+  %s.015 = phi double [ %sub, %for.body ], [ 0x40370ABE6A337A81, %if.end ]
+  %add = fadd double %s.015, 3.049000e+00, !dbg !36
+  %conv = sitofp i32 %u.016 to double, !dbg !36
+  %add4 = fadd double %add, %conv, !dbg !36
+  tail call void @llvm.dbg.value(metadata !{double %add4}, i64 0, metadata !18), !dbg !36
+  %div = fdiv double 3.940000e+00, %s.015, !dbg !37
+  %mul = fmul double %div, 3.200000e-01, !dbg !37
+  %add5 = fadd double %add4, %mul, !dbg !37
+  %sub = fsub double %add4, %add5, !dbg !37
+  tail call void @llvm.dbg.value(metadata !{double %sub}, i64 0, metadata !18), !dbg !37
+  %inc = add nsw i32 %u.016, 1, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !21), !dbg !38
+  %exitcond = icmp eq i32 %inc, %call, !dbg !38
+  br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
+; CHECK: edge for.body -> if.end6 probability is 1 / 2244 = 0.0445633%
+; CHECK: edge for.body -> for.body probability is 2243 / 2244 = 99.9554% [HOT edge]
+
+if.end6:                                          ; preds = %for.body, %if.end
+  %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
+  %call7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
+  br label %return, !dbg !40
+; CHECK: edge if.end6 -> return probability is 16 / 16 = 100% [HOT edge]
+
+return:                                           ; preds = %entry, %if.end6
+  %retval.0 = phi i32 [ 0, %if.end6 ], [ 1, %entry ]
+  ret i32 %retval.0, !dbg !41
+}
+
+; Function Attrs: nounwind readonly
+declare i32 @atoi(i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #3
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind readonly }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!25, !42}
+!llvm.ident = !{!26}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./branch.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"branch.cc", metadata !"."}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !12, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./branch.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8, metadata !9}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !17, metadata !18, metadata !21, metadata !23}
+!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 4]
+!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 4]
+!15 = metadata !{i32 786688, metadata !4, metadata !"result", metadata !5, i32 7, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 7]
+!16 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!17 = metadata !{i32 786688, metadata !4, metadata !"limit", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [limit] [line 8]
+!18 = metadata !{i32 786688, metadata !19, metadata !"s", metadata !5, i32 10, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 10]
+!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 9, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!20 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!21 = metadata !{i32 786688, metadata !22, metadata !"u", metadata !5, i32 11, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u] [line 11]
+!22 = metadata !{i32 786443, metadata !1, metadata !19, i32 11, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!23 = metadata !{i32 786688, metadata !24, metadata !"x", metadata !5, i32 12, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 12]
+!24 = metadata !{i32 786443, metadata !1, metadata !22, i32 11, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!25 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!26 = metadata !{metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
+!27 = metadata !{i32 4, i32 0, metadata !4, null}
+!28 = metadata !{i32 5, i32 0, metadata !29, null}
+!29 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!30 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!31 = metadata !{metadata !32, metadata !32, i64 0}
+!32 = metadata !{metadata !"any pointer", metadata !33, i64 0}
+!33 = metadata !{metadata !"omnipotent char", metadata !34, i64 0}
+!34 = metadata !{metadata !"Simple C/C++ TBAA"}
+!35 = metadata !{i32 9, i32 0, metadata !20, null}
+!36 = metadata !{i32 13, i32 0, metadata !24, null}
+!37 = metadata !{i32 14, i32 0, metadata !24, null}
+!38 = metadata !{i32 11, i32 0, metadata !22, null}
+!39 = metadata !{i32 20, i32 0, metadata !4, null}
+!40 = metadata !{i32 21, i32 0, metadata !4, null}
+!41 = metadata !{i32 22, i32 0, metadata !4, null}
+!42 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index e7a58f16e227..7554b7f2b0b8 100644
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -22,4 +22,4 @@ entry:
         ret void
 }
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-\ No newline at end of file
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
index 1993e4f526d1..1f921914fcb0 100644
--- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -10,7 +10,7 @@ target triple = "thumbv7-apple-darwin10"
 %union..0anon = type { %struct.int16x8x2_t }
 
 define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK-NOT: alloca
 ; CHECK: "alloca point"
 ; CHECK: store <8 x i16>
@@ -82,7 +82,7 @@ cond.true:                                        ; preds = %entry
 cond.false:                                       ; preds = %entry
   ret void
 
-; CHECK: @test_memcpy_self
+; CHECK-LABEL: @test_memcpy_self(
 ; CHECK-NOT: alloca
 ; CHECK: br i1
 }
diff --git a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
index 52df6d5c5967..b926b021caf1 100644
--- a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 %struct.test = type { [3 x double] }
 
 define void @test_memcpy_self() nounwind {
-; CHECK: @test_memcpy_self
+; CHECK-LABEL: @test_memcpy_self(
 ; CHECK-NOT: alloca
 ; CHECK: ret void
   %1 = alloca %struct.test
diff --git a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll b/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
index f98f3e8fc458..5f4d0fc7e1c8 100644
--- a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
+++ b/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
@@ -36,5 +36,5 @@ unwind:                                           ; preds = %then
   unreachable
 }
 
-; CHECK: define void @odd_fn
+; CHECK-LABEL: define void @odd_fn(
 ; CHECK: %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
diff --git a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
index cd21ff5f0b51..e4456361a66d 100644
--- a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
+++ b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
@@ -4,7 +4,7 @@ target triple = "thumbv7-apple-ios5.0.0"
 
 %union.anon = type { <4 x float> }
 
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK-NOT: alloca
 
 define void @test() nounwind {
diff --git a/test/Transforms/ScalarRepl/address-space.ll b/test/Transforms/ScalarRepl/address-space.ll
index 318d4e759061..d8efc1774b7e 100644
--- a/test/Transforms/ScalarRepl/address-space.ll
+++ b/test/Transforms/ScalarRepl/address-space.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10"
 
 %struct.anon = type { [1 x float] }
 
-; CHECK: define void @Test(
+; CHECK-LABEL: define void @Test(
 ; CHECK: load float addrspace(2)*
 ; CHECK-NEXT: fsub float
 ; CHECK: store float {{.*}}, float addrspace(2)* 
diff --git a/test/Transforms/ScalarRepl/badarray.ll b/test/Transforms/ScalarRepl/badarray.ll
index 768fec630629..480e12b8d445 100644
--- a/test/Transforms/ScalarRepl/badarray.ll
+++ b/test/Transforms/ScalarRepl/badarray.ll
@@ -7,7 +7,7 @@ target triple = "i386-pc-linux-gnu"
 ; PR3466
 ; Off end of array, don't transform.
 define i32 @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: = alloca
 	%X = alloca [4 x i32]
 	%Y = getelementptr [4 x i32]* %X, i64 0, i64 6		; <i32*> [#uses=2]
@@ -20,7 +20,7 @@ define i32 @test1() {
 ; Off end of array, don't transform.
 define i32 @test2() nounwind {
 entry:
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: = alloca
         %yx2.i = alloca float, align 4          ; <float*> [#uses=1]            
         %yx26.i = bitcast float* %yx2.i to i64*         ; <i64*> [#uses=1]      
@@ -34,7 +34,7 @@ entry:
 ; PR5436
 define void @test3() {
 entry:
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: = alloca
 ; CHECK: store i64
   %var_1 = alloca %padded, align 8                ; <%padded*> [#uses=3]
diff --git a/test/Transforms/ScalarRepl/basictest.ll b/test/Transforms/ScalarRepl/basictest.ll
index 9676873c30c0..af3c237d4914 100644
--- a/test/Transforms/ScalarRepl/basictest.ll
+++ b/test/Transforms/ScalarRepl/basictest.ll
@@ -7,7 +7,7 @@ define i32 @test1() {
 	store i32 0, i32* %Y
 	%Z = load i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
 ; CHECK: ret i32 0
 }
@@ -23,7 +23,7 @@ define i64 @test2(i64 %X) {
 L2:
 	%Z = load i64* %B		; <i32> [#uses=1]
 	ret i64 %Z
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: alloca
 ; CHECK: ret i64 %X
 }
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 7d3bcea8b857..71bf22a61cd2 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -40,19 +40,20 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, metadata !18, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 1, i32 11, metadata !1, null}
 !8 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 18, metadata !1, null}
 !10 = metadata !{i32 786688, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !18, metadata !1, i32 1, i32 21, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 9, metadata !11, null}
 !13 = metadata !{i32 2, i32 14, metadata !11, null}
 !14 = metadata !{i32 3, i32 5, metadata !11, null}
@@ -60,3 +61,5 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !16 = metadata !{i32 5, i32 5, metadata !11, null}
 !17 = metadata !{metadata !1}
 !18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
+!19 = metadata !{i32 0}
+!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/ScalarRepl/inline-vector.ll b/test/Transforms/ScalarRepl/inline-vector.ll
index 2f51cc7cf59c..f7c70dcd5575 100644
--- a/test/Transforms/ScalarRepl/inline-vector.ll
+++ b/test/Transforms/ScalarRepl/inline-vector.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7-apple-darwin10.0.0"
 %struct.Vector4 = type { float, float, float, float }
 @f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16
 
-; CHECK: define void @f
+; CHECK-LABEL: define void @f(
 ; CHECK-NOT: alloca
 ; CHECK: phi <4 x float>
 
diff --git a/test/Transforms/ScalarRepl/lifetime.ll b/test/Transforms/ScalarRepl/lifetime.ll
index 3f558a1c3737..47cb8549822b 100644
--- a/test/Transforms/ScalarRepl/lifetime.ll
+++ b/test/Transforms/ScalarRepl/lifetime.ll
@@ -9,7 +9,7 @@ declare void @llvm.lifetime.end(i64, i8*)
 %t1 = type {i32, i32, i32}
 
 define void @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
   %A = alloca %t1
   %A1 = getelementptr %t1* %A, i32 0, i32 0
   %A2 = getelementptr %t1* %A, i32 0, i32 1
@@ -22,7 +22,7 @@ define void @test1() {
 }
 
 define void @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
   %A = alloca %t1
   %A1 = getelementptr %t1* %A, i32 0, i32 0
   %A2 = getelementptr %t1* %A, i32 0, i32 1
@@ -36,7 +36,7 @@ define void @test2() {
 }
 
 define void @test3() {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
   %A = alloca %t1
   %A1 = getelementptr %t1* %A, i32 0, i32 0
   %A2 = getelementptr %t1* %A, i32 0, i32 1
@@ -50,7 +50,7 @@ define void @test3() {
 }
 
 define void @test4() {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
   %A = alloca %t1
   %A1 = getelementptr %t1* %A, i32 0, i32 0
   %A2 = getelementptr %t1* %A, i32 0, i32 1
@@ -66,7 +66,7 @@ define void @test4() {
 %t2 = type {i32, [4 x i8], i32}
 
 define void @test5() {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
   %A = alloca %t2
 ; CHECK: alloca{{.*}}i8
 ; CHECK: alloca{{.*}}i8
@@ -97,7 +97,7 @@ define void @test5() {
 %t3 = type {[4 x i16], [4 x i8]}
 
 define void @test6() {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
   %A = alloca %t3
 ; CHECK: alloca i8
 ; CHECK: alloca i8
diff --git a/test/Transforms/ScalarRepl/lit.local.cfg b/test/Transforms/ScalarRepl/lit.local.cfg
deleted file mode 100644
index c6106e4746f2..000000000000
--- a/test/Transforms/ScalarRepl/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll']
diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll
index 95ecf175eed2..3a5c37c04673 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -64,4 +64,4 @@ entry:
 }
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
-\ No newline at end of file
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/nonzero-first-index.ll b/test/Transforms/ScalarRepl/nonzero-first-index.ll
index 60f414b71757..b2e93feb6cc2 100644
--- a/test/Transforms/ScalarRepl/nonzero-first-index.ll
+++ b/test/Transforms/ScalarRepl/nonzero-first-index.ll
@@ -8,7 +8,7 @@ target triple = "i386-pc-linux-gnu"
 ; Check that a GEP with a non-zero first index does not prevent SROA as long
 ; as the resulting offset corresponds to an element in the alloca.
 define i32 @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: = i160
 ; CHECK: ret i32 undef
 	%A = alloca %nested
@@ -20,7 +20,7 @@ define i32 @test1() {
 
 ; But, if the offset is out of range, then it should not be transformed.
 define i32 @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: i160
 	%A = alloca %nested
 	%B = getelementptr %nested* %A, i32 0, i32 1, i32 0
@@ -31,7 +31,7 @@ define i32 @test2() {
 
 ; Try it with a bitcast and single GEP....
 define i32 @test3() {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: = i160
 ; CHECK: ret i32 undef
 	%A = alloca %nested
@@ -43,7 +43,7 @@ define i32 @test3() {
 
 ; ...and again make sure that out-of-range accesses are not transformed.
 define i32 @test4() {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: i160
 	%A = alloca %nested
 	%B = bitcast %nested* %A to i32*
diff --git a/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
index cfb88bd80d60..935c2895520c 100644
--- a/test/Transforms/ScalarRepl/only-memcpy-uses.ll
+++ b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
@@ -4,7 +4,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 %struct.S = type { [12 x i32] }
 
-; CHECK: @bar4
+; CHECK-LABEL: @bar4(
 define void @bar4(%struct.S* byval %s) nounwind ssp {
 entry:
 ; CHECK: alloca
diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll
index 5c21c3bd9f34..a5da2dcf49d1 100644
--- a/test/Transforms/ScalarRepl/phi-select.ll
+++ b/test/Transforms/ScalarRepl/phi-select.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10.2"
 %struct.X = type { i32 }
 %PairTy = type {i32, i32}
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %a.0 = alloca i32
 ; CHECK: %b.0 = alloca i32
 define i32 @test1(i32 %x) nounwind readnone ssp {
@@ -24,7 +24,7 @@ entry:
   ret i32 %4
 }
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ]
 ; CHECK-NEXT: ret i32 %X.ld
 define i32 @test2(i1 %c) {
@@ -43,7 +43,7 @@ F:
   ret i32 %Q
 }
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2
 ; CHECK-NEXT: ret i32 %Q
 ; rdar://8904039
@@ -63,7 +63,7 @@ define i32 @test3(i1 %c) {
 define i64 @test4(i1 %c) {
 entry:
   %A = alloca %PairTy
-  ; CHECK: @test4
+  ; CHECK-LABEL: @test4(
   ; CHECK: %A = alloca %PairTy
   %B = getelementptr %PairTy* %A, i32 0, i32 0
   store i32 1, i32* %B
@@ -94,7 +94,7 @@ entry:
   %r = load i32* %b, align 8
   ret i32 %r
   
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: store i32 123, i32* %P
 ; CHECK: ret i32 2
 }
@@ -107,7 +107,7 @@ define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp {
   %p.0 = select i1 %c, i32* %b, i32* %a
   %r = load i32* %p.0, align 8
   ret i32 %r
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1
 ; CHECK-NEXT: ret i32 %r
 }
@@ -124,7 +124,7 @@ define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp {
   
   %r = load i32* %p.0, align 8
   ret i32 %r
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NOT: alloca i32
 ; CHECK: %r = select i1 %c, i32 2, i32 0
 ; CHECK: ret i32 %r
@@ -132,7 +132,7 @@ define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp {
 
 ;; Promote allocs that are PHI'd together by moving the loads.
 define i32 @test8(i32 %x) nounwind readnone ssp {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 ; CHECK-NOT: load i32
 ; CHECK-NOT: store i32
 ; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ]
diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll
index 03d25ac7085e..f0dc141aca8d 100644
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/test/Transforms/ScalarRepl/union-pointer.ll
@@ -1,13 +1,16 @@
 ; PR892
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   not grep alloca
-; RUN: opt < %s -scalarrepl -S | grep "ret i8"
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
-target datalayout = "e-p:32:32-n8:16:32"
+
+target datalayout = "e-p:32:32-p1:16:16-n8:16:32"
 target triple = "i686-apple-darwin8.7.2"
-	%struct.Val = type { i32*, i32 }
+
+%struct.Val = type { i32*, i32 }
 
 define i8* @test(i16* %X) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: alloca
+; CHECK: ret i8*
 	%X_addr = alloca i16*		; <i16**> [#uses=2]
 	store i16* %X, i16** %X_addr
 	%X_addr.upgrd.1 = bitcast i16** %X_addr to i8**		; <i8**> [#uses=1]
@@ -15,7 +18,37 @@ define i8* @test(i16* %X) {
 	ret i8* %tmp
 }
 
+define i8 addrspace(1)* @test_as1(i16 addrspace(1)* %x) {
+; CHECK-LABEL: @test_as1(
+; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
+; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
+; CHECK-NEXT: ret i8 addrspace(1)* %2
+    %x_addr = alloca i16 addrspace(1)*
+	store i16 addrspace(1)* %x, i16 addrspace(1)** %x_addr
+	%x_addr.upgrd.1 = bitcast i16 addrspace(1)** %x_addr to i8 addrspace(1)**
+	%tmp = load i8 addrspace(1)** %x_addr.upgrd.1
+	ret i8 addrspace(1)* %tmp
+}
+
+define i8 addrspace(1)* @test_as1_array(i16 addrspace(1)* %x) {
+; CHECK-LABEL: @test_as1_array(
+; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
+; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
+; CHECK-NEXT: ret i8 addrspace(1)* %2
+  %as_ptr_array = alloca [4 x i16 addrspace(1)*]
+  %elem1 = getelementptr [4 x i16 addrspace(1)*]* %as_ptr_array, i32 0, i32 1
+  store i16 addrspace(1)* %x, i16 addrspace(1)** %elem1
+  %elem1.cast = bitcast i16 addrspace(1)** %elem1 to i8 addrspace(1)**
+  %tmp = load i8 addrspace(1)** %elem1.cast
+  ret i8 addrspace(1)* %tmp
+}
+
+
 define void @test2(i64 %Op.0) {
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
 	%tmp = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
 	%tmp1 = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
 	%tmp.upgrd.2 = call i64 @_Z3foov( )		; <i64> [#uses=1]
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index 5c82ae4d196d..8ca1ed5005b3 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -14,7 +14,7 @@ entry:
 	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
 ; CHECK: %tmp = load <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
@@ -33,7 +33,7 @@ entry:
 	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NOT: alloca
 ; CHECK: %tmp = load <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
@@ -50,7 +50,7 @@ entry:
 	%tmp.upgrd.4 = load float* %tmp.upgrd.3		; <float> [#uses=1]
 	store float %tmp.upgrd.4, float* %f
 	ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: alloca
 ; CHECK: %tmp = load <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
@@ -67,7 +67,7 @@ entry:
 	%tmp.upgrd.6 = load float* %G.upgrd.5		; <float> [#uses=1]
 	store float %tmp.upgrd.6, float* %f
 	ret void
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: alloca
 ; CHECK: %tmp = load <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
@@ -81,7 +81,7 @@ define i32 @test5(float %X) {  ;; should turn into bitcast.
 	%a = bitcast float* %X1 to i32*
 	%tmp = load i32* %a
 	ret i32 %tmp
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: bitcast float %X to i32
 ; CHECK-NEXT: ret i32
 }
@@ -92,7 +92,7 @@ define i64 @test6(<2 x float> %X) {
 	%P = bitcast <2 x float>* %X_addr to i64*
 	%tmp = load i64* %P
 	ret i64 %tmp
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: bitcast <2 x float> %X to i64
 ; CHECK: ret i64
 }
@@ -107,7 +107,31 @@ entry:
   %1 = getelementptr inbounds %struct.test7* %memtmp, i64 0, i32 0, i64 5
   store i32 0, i32* %1, align 4
   ret void
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 ; CHECK-NOT: alloca
 ; CHECK: and i192
 }
+
+; When promoting an alloca to a 1-element vector type, instructions that
+; produce that same vector type should not be changed to insert one element
+; into a new vector. <rdar://problem/14249078>
+define <1 x i64> @test8(<1 x i64> %a) {
+entry:
+  %a.addr = alloca <1 x i64>, align 8
+  %__a = alloca <1 x i64>, align 8
+  %tmp = alloca <1 x i64>, align 8
+  store <1 x i64> %a, <1 x i64>* %a.addr, align 8
+  %0 = load <1 x i64>* %a.addr, align 8
+  store <1 x i64> %0, <1 x i64>* %__a, align 8
+  %1 = load <1 x i64>* %__a, align 8
+  %2 = bitcast <1 x i64> %1 to <8 x i8>
+  %3 = bitcast <8 x i8> %2 to <1 x i64>
+  %vshl_n = shl <1 x i64> %3, <i64 4>
+  store <1 x i64> %vshl_n, <1 x i64>* %tmp
+  %4 = load <1 x i64>* %tmp
+  ret <1 x i64> %4
+; CHECK-LABEL: @test8(
+; CHECK-NOT: alloca
+; CHECK-NOT: insertelement
+; CHECK: ret <1 x i64>
+}
diff --git a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
index e2765e5f50e6..740ea25f93ae 100644
--- a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
@@ -4,7 +4,7 @@
 
 @G = extern_weak global i32
 
-; CHECK: @test(
+; CHECK-LABEL: @test(
 ; CHECK: br i1 %tmp25
 ; CHECK: bb1:
 ; CHECK: sdiv
@@ -23,7 +23,7 @@ bb6:
 	ret i32 927
 }
 
-; CHECK: @test2(
+; CHECK-LABEL: @test2(
 ; CHECK: br i1 %tmp34
 ; CHECK: bb5:
 ; CHECK: sdiv
diff --git a/test/Transforms/SimplifyCFG/CoveredLookupTable.ll b/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
new file mode 100644
index 000000000000..8b45a590bb1f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
@@ -0,0 +1,48 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+; rdar://15268442
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.0.0"
+
+; CHECK-LABEL: define i3 @coveredswitch_test(
+; CHECK: entry:
+; CHECK-NEXT: sub i3 %input, -4
+; CHECK-NEXT: zext i3 %switch.tableidx to i24
+; CHECK-NEXT: mul i24 %switch.cast, 3
+; CHECK-NEXT: lshr i24 7507338, %switch.shiftamt
+; CHECK-NEXT: trunc i24 %switch.downshift to i3
+; CHECK-NEXT: ret i3 %switch.masked
+
+define i3 @coveredswitch_test(i3 %input) {
+entry:
+  switch i3 %input, label %bb8 [
+    i3 0, label %bb7
+    i3 1, label %bb
+    i3 2, label %bb3
+    i3 3, label %bb4
+    i3 4, label %bb5
+    i3 5, label %bb6
+  ]
+
+bb:                                               ; preds = %entry
+  br label %bb8
+
+bb3:                                              ; preds = %entry
+  br label %bb8
+
+bb4:                                              ; preds = %entry
+  br label %bb8
+
+bb5:                                              ; preds = %entry
+  br label %bb8
+
+bb6:                                              ; preds = %entry
+  br label %bb8
+
+bb7:                                              ; preds = %entry
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb6, %bb5, %bb4, %bb3, %bb, %entry
+  %result = phi i3 [ 0, %bb7 ], [ 1, %bb6 ], [ 2, %bb5 ], [ 3, %bb4 ], [ 4, %bb3 ], [ 5, %bb ], [ 6, %entry ]
+  ret i3 %result
+}
diff --git a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
index 912c7556e006..b07ef970a20a 100644
--- a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
@@ -1,8 +1,18 @@
 ; Test merging of blocks with phi nodes.
 ;
-; RUN: opt < %s -simplifycfg -S | not grep N:
+; RUN: opt < %s -simplifycfg -S > %t
+; RUN: not grep N: %t
+; RUN: not grep X: %t
+; RUN: not grep 'switch i32[^U]+%U' %t
+; RUN: not grep "^BB.tomerge" %t
+; RUN: grep "^BB.nomerge" %t | count 2
 ;
 
+; ModuleID = '<stdin>'
+declare i1 @foo()
+
+declare i1 @bar(i32)
+
 define i32 @test(i1 %a) {
 Q:
         br i1 %a, label %N, label %M
@@ -16,3 +26,231 @@ M:              ; preds = %N, %Q
         ret i32 %R
 }
 
+; Test merging of blocks with phi nodes where at least one incoming value
+; in the successor is undef.
+define i8 @testundef(i32 %u) {
+R:
+  switch i32 %u, label %U [
+    i32 0, label %S
+    i32 1, label %T
+    i32 2, label %T
+  ]
+
+S:                                            ; preds = %R
+  br label %U
+
+T:                                           ; preds = %R, %R
+  br label %U
+
+U:                                        ; preds = %T, %S, %R
+  ; We should be able to merge either the S or T block into U by rewriting
+  ; R's incoming value with the incoming value of that predecessor since
+  ; R's incoming value is undef and both of those predecessors are simple
+  ; unconditional branches.
+  %val.0 = phi i8 [ undef, %R ], [ 1, %T ], [ 0, %S ]
+  ret i8 %val.0
+}
+
+; Test merging of blocks with phi nodes where at least one incoming value
+; in the successor is undef.
+define i8 @testundef2(i32 %u, i32* %A) {
+V:
+  switch i32 %u, label %U [
+    i32 0, label %W
+    i32 1, label %X
+    i32 2, label %X
+    i32 3, label %Z
+  ]
+
+W:                                            ; preds = %V
+  br label %U
+
+Z:
+  store i32 0, i32* %A, align 4
+  br label %X
+
+X:                                           ; preds = %V, %V, %Z
+  br label %U
+
+U:                                        ; preds = %X, %W, %V
+  ; We should be able to merge either the W or X block into U by rewriting
+  ; V's incoming value with the incoming value of that predecessor since
+  ; V's incoming value is undef and both of those predecessors are simple
+  ; unconditional branches. Note that X has predecessors beyond
+  ; the direct predecessors of U.
+  %val.0 = phi i8 [ undef, %V ], [ 1, %X ], [ 1, %W ]
+  ret i8 %val.0
+}
+
+define i8 @testmergesome(i32 %u, i32* %A) {
+V:
+  switch i32 %u, label %Y [
+    i32 0, label %W
+    i32 1, label %X
+    i32 2, label %X
+    i32 3, label %Z
+  ]
+
+W:                                            ; preds = %V
+  store i32 1, i32* %A, align 4
+  br label %Y
+
+Z:
+  store i32 0, i32* %A, align 4
+  br label %X
+
+X:                                           ; preds = %V, %Z
+  br label %Y
+
+Y:                                        ; preds = %X, %W, %V
+  ; After merging X into Y, we should have 5 predecessors
+  ; and thus 5 incoming values to the phi.
+  %val.0 = phi i8 [ 1, %V ], [ 1, %X ], [ 2, %W ]
+  ret i8 %val.0
+}
+
+
+define i8 @testmergesome2(i32 %u, i32* %A) {
+V:
+  switch i32 %u, label %W [
+    i32 0, label %W
+    i32 1, label %Y
+    i32 2, label %X
+    i32 4, label %Y
+  ]
+
+W:                                            ; preds = %V
+  store i32 1, i32* %A, align 4
+  br label %Y
+
+X:                                           ; preds = %V, %Z
+  br label %Y
+
+Y:                                        ; preds = %X, %W, %V
+  ; Ensure that we deal with both undef inputs for V when we merge in X.
+  %val.0 = phi i8 [ undef, %V ], [ 1, %X ], [ 2, %W ], [ undef, %V ]
+  ret i8 %val.0
+}
+
+; This function can't be merged
+define void @a() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+        ; This phi has a conflicting value (0) with below phi (2), so blocks
+        ; can't be merged.
+	%a = phi i32 [ 1, %entry ], [ 0, %Common ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.nomerge
+	%b = phi i32 [ %a, %BB.nomerge ], [ 2, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can't be merged
+define void @b() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.nomerge
+        ; This phi has confliction values for Common and (through BB) Common,
+        ; blocks can't be merged
+	%b = phi i32 [ 1, %BB.nomerge ], [ 2, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can be merged
+define void @c() {
+entry:
+	br label %BB.tomerge
+
+BB.tomerge:		; preds = %Common, %entry
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.tomerge, %Pre-Exit
+        ; This phi has identical values for Common and (through BB) Common,
+        ; blocks can't be merged
+	%b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Pre-Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.tomerge, label %Succ
+
+Pre-Exit:       ; preds = %Succ
+        ; This adds a backedge, so the %b phi node gets a third branch and is
+        ; not completely trivial
+	%cond2 = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond2, label %Succ, label %Exit
+
+Exit:		; preds = %Pre-Exit
+	ret void
+}
+
+; This function can be merged
+define void @d() {
+entry:
+	br label %BB.tomerge
+
+BB.tomerge:		; preds = %Common, %entry
+        ; This phi has a matching value (0) with below phi (0), so blocks
+        ; can be merged.
+	%a = phi i32 [ 1, %entry ], [ 0, %Common ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.tomerge
+	%b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.tomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can be merged
+define void @e() {
+entry:
+	br label %BB.tomerge
+
+BB.tomerge:		; preds = %Use, %entry
+        ; This phi is used somewhere else than Succ, but this should not prevent
+        ; merging this block
+	%a = phi i32 [ 1, %entry ], [ 0, %Use ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %BB.tomerge
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Use, label %Exit
+
+Use:		; preds = %Succ
+	%cond = call i1 @bar( i32 %a )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.tomerge, label %Exit
+
+Exit:		; preds = %Use, %Succ
+	ret void
+}
diff --git a/test/Transforms/SimplifyCFG/MagicPointer.ll b/test/Transforms/SimplifyCFG/MagicPointer.ll
index 93b9a276eac4..b8b8cbd9dc63 100644
--- a/test/Transforms/SimplifyCFG/MagicPointer.ll
+++ b/test/Transforms/SimplifyCFG/MagicPointer.ll
@@ -2,15 +2,7 @@
 ;
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
-; CHECK: switch i64 %magicptr
-; CHECK: i64 0, label
-; CHECK: i64 1, label
-; CHECK: i64 2, label
-; CHECK: i64 3, label
-; CHECK: i64 4, label
-; CHECK: }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
 @.str = private constant [5 x i8] c"null\00"      ; <[5 x i8]*> [#uses=2]
@@ -18,7 +10,24 @@ target triple = "x86_64-apple-darwin10.0.0"
 @.str2 = private constant [4 x i8] c"two\00"      ; <[4 x i8]*> [#uses=2]
 @.str3 = private constant [5 x i8] c"four\00"     ; <[5 x i8]*> [#uses=2]
 
+@.str_as1 = private addrspace(1) constant [5 x i8] c"null\00"      ; <[5 x i8]*> [#uses=2]
+@.str1_as1 = private addrspace(1) constant [4 x i8] c"one\00"      ; <[4 x i8]*> [#uses=2]
+@.str2_as1 = private addrspace(1) constant [4 x i8] c"two\00"      ; <[4 x i8]*> [#uses=2]
+@.str3_as1 = private addrspace(1) constant [5 x i8] c"four\00"     ; <[5 x i8]*> [#uses=2]
+
+declare i32 @puts(i8*)
+declare i32 @puts_as1(i8 addrspace(1)*)
+
 define void @f(i8* %x) nounwind ssp {
+; CHECK-LABEL: @f(
+; CHECK: switch i64 %magicptr
+; CHECK: i64 0, label
+; CHECK: i64 1, label
+; CHECK: i64 2, label
+; CHECK: i64 3, label
+; CHECK: i64 4, label
+; CHECK: }
+
 entry:
   %tobool = icmp eq i8* %x, null                  ; <i1> [#uses=1]
   br i1 %tobool, label %if.then, label %if.else
@@ -72,4 +81,69 @@ if.end21:                                         ; preds = %if.end20, %if.then
   ret void
 }
 
-declare i32 @puts(i8*)
+; Is it useful to test a version where the ptrtoints are to the same
+; size?
+define void @f_as1(i8 addrspace(1)* %x) nounwind ssp {
+; CHECK-LABEL: @f_as1(
+; CHECK: ptrtoint i8 addrspace(1)* %x to i16
+; CHECK: switch i16 %magicptr
+; CHECK: i16 0, label
+; CHECK: i16 1, label
+; CHECK: i16 2, label
+; CHECK: i16 3, label
+; CHECK: i16 4, label
+; CHECK: }
+
+entry:
+  %tobool = icmp eq i8 addrspace(1)* %x, null                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([5 x i8] addrspace(1)* @.str_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end21
+
+if.else:                                          ; preds = %entry
+  %cmp = icmp eq i8 addrspace(1)* %x, inttoptr (i64 1 to i8 addrspace(1)*)  ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then2, label %if.else4
+
+if.then2:                                         ; preds = %if.else
+  %call3 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([4 x i8] addrspace(1)* @.str1_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end20
+
+if.else4:                                         ; preds = %if.else
+  %cmp6 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 2 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp6, label %if.then9, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %if.else4
+  %cmp8 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 3 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp8, label %if.then9, label %if.else11
+
+if.then9:                                         ; preds = %lor.lhs.false, %if.else4
+  %call10 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([4 x i8] addrspace(1)* @.str2_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end19
+
+if.else11:                                        ; preds = %lor.lhs.false
+  %cmp13 = icmp eq i8 addrspace(1)* %x, inttoptr (i64 4 to i8 addrspace(1)*) ; <i1> [#uses=1]
+  br i1 %cmp13, label %if.then14, label %if.else16
+
+if.then14:                                        ; preds = %if.else11
+  %call15 = call i32 @puts_as1(i8 addrspace(1)* getelementptr inbounds ([5 x i8] addrspace(1)* @.str3_as1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end
+
+if.else16:                                        ; preds = %if.else11
+  %call18 = call i32 @puts_as1(i8 addrspace(1)* %x) nounwind       ; <i32> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %if.else16, %if.then14
+  br label %if.end19
+
+if.end19:                                         ; preds = %if.end, %if.then9
+  br label %if.end20
+
+if.end20:                                         ; preds = %if.end19, %if.then2
+  br label %if.end21
+
+if.end21:                                         ; preds = %if.end20, %if.then
+  ret void
+}
+
diff --git a/test/Transforms/SimplifyCFG/PR16069.ll b/test/Transforms/SimplifyCFG/PR16069.ll
new file mode 100644
index 000000000000..0b3d67794513
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR16069.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+@b = extern_weak global i32
+
+define i32 @foo(i1 %y) {
+; CHECK: define i32 @foo(i1 %y) {
+  br i1 %y, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  br label %bb3
+bb3:
+  %cond.i = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb2 ]
+; CHECK: phi i32 {{.*}} srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb2
+  ret i32 %cond.i
+}
+
+define i32 @foo2(i1 %x) {
+; CHECK: define i32 @foo2(i1 %x) {
+bb0:
+  br i1 %x, label %bb1, label %bb2
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb0 ]
+; CHECK:  %cond = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb0 ]
+  ret i32 %cond
+}
diff --git a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
index 786fee9e6610..4d344fa91a9e 100644
--- a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll
index 9d1568557f30..bb48c80268aa 100644
--- a/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll
@@ -26,7 +26,7 @@ return:
   %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %retval.0
 
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK-NOT: getelementptr
 ; CHECK: switch i32 %c
 }
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index dd2e5d1c3a77..83fa41900035 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 define i32 @test1(i32 %a, i32 %b, i32 %c) nounwind  {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
         %tmp1 = icmp eq i32 %b, 0
         br i1 %tmp1, label %bb1, label %bb3
@@ -31,7 +31,7 @@ bb3:		; preds = %bb2, %entry
 declare i8 @llvm.cttz.i8(i8, i1)
 
 define i8 @test2(i8 %a) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
   br i1 undef, label %bb_true, label %bb_false
 bb_true:
   %b = tail call i8 @llvm.cttz.i8(i8 %a, i1 false)
@@ -47,7 +47,7 @@ join:
 define i8* @test4(i1* %dummy, i8* %a, i8* %b) {
 ; Test that we don't speculate an arbitrarily large number of unfolded constant
 ; expressions.
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 
 entry:
   %cond1 = load volatile i1* %dummy
diff --git a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index 4a692f3622ec..e1635f491156 100644
--- a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define void @test1(i1 %C, i1* %BP) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: entry:
 ; CHECK-NEXT: ret void
 entry:
@@ -14,7 +14,7 @@ F:
 }
 
 define void @test2() {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: entry:
 ; CHECK-NEXT: call void @test2()
 ; CHECK-NEXT: ret void
@@ -28,7 +28,7 @@ N:
 }
 
 define i32 @test3(i32 %v) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: entry:
 ; CHECK-NEXT: [[CMP:%[A-Za-z0-9]+]] = icmp eq i32 %v, 2
 ; CHECK-NEXT: select i1 [[CMP]], i32 2, i32 1
diff --git a/test/Transforms/SimplifyCFG/X86/lit.local.cfg b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
index a8ad0f1a28b2..ba763cf03ffc 100644
--- a/test/Transforms/SimplifyCFG/X86/lit.local.cfg
+++ b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 5f70465c64d4..368732711a70 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -52,7 +52,7 @@ return:
   %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %retval.0
 
-; CHECK: @f
+; CHECK-LABEL: @f(
 ; CHECK: entry:
 ; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
 ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
@@ -88,7 +88,7 @@ sw.epilog:
   call void @dummy(i8 signext %a.0, float %b.0)
   ret void
 
-; CHECK: @h
+; CHECK-LABEL: @h(
 ; CHECK: entry:
 ; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
 ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
@@ -138,7 +138,7 @@ return:
                       [ getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), %entry ]
   ret i8* %retval.0
 
-; CHECK: @foostring
+; CHECK-LABEL: @foostring(
 ; CHECK: entry:
 ; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
 ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
@@ -171,7 +171,7 @@ sw.epilog:
   %b.0 = phi i32 [ 10, %sw.default ], [ 5, %sw.bb3 ], [ 1, %sw.bb2 ], [ 4, %sw.bb1 ], [ 3, %entry ]
   ret i32 %a.0
 
-; CHECK: @earlyreturncrash
+; CHECK-LABEL: @earlyreturncrash(
 ; CHECK: switch.lookup:
 ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx
 ; CHECK-NEXT: %switch.load = load i32* %switch.gep
@@ -221,7 +221,7 @@ lor.end:
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
 
-; CHECK: @crud
+; CHECK-LABEL: @crud(
 ; CHECK: entry:
 ; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
 ; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
@@ -263,7 +263,7 @@ if.else: br label %if.end
 if.end:
   %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ]
   ret i32 %dirent_type.0
-; CHECK: define i32 @overflow
+; CHECK-LABEL: define i32 @overflow(
 ; CHECK: switch
 ; CHECK: phi
 }
@@ -284,7 +284,7 @@ bb2: br label %bb3
 bb3:
   %tmp4 = phi i1 [ undef, %bb ], [ false, %bb2 ], [ true, %bb1 ]
   ret i1 %tmp4
-; CHECK: define i1 @undef
+; CHECK-LABEL: define i1 @undef(
 ; CHECK: %switch.cast = trunc i32 %switch.tableidx to i9
 ; CHECK: %switch.downshift = lshr i9 3, %switch.shiftamt
 }
@@ -711,7 +711,7 @@ return:
   ret i32 %retval.0
 }
 
-define i32 @cprop(i32 %x) {
+define i32 @cprop(i32 %x, i32 %y) {
 entry:
   switch i32 %x, label %sw.default [
     i32 1, label %return
@@ -727,7 +727,8 @@ sw.bb1: br label %return
 
 sw.bb2:
   %and = and i32 %x, 1
-  %tobool = icmp ne i32 %and, 0
+  %and.ptr = inttoptr i32 %and to i8*
+  %tobool = icmp ne i8* %and.ptr, null
   %cond = select i1 %tobool, i32 -123, i32 456
   %sub = sub nsw i32 %x, %cond
   br label %return
@@ -735,16 +736,18 @@ sw.bb2:
 sw.bb3:
   %trunc = trunc i32 %x to i8
   %sext = sext i8 %trunc to i32
+  %select.i = icmp sgt i32 %sext, 0
+  %select = select i1 %select.i, i32 %sext, i32 %y
   br label %return
 
 sw.default:
   br label %return
 
 return:
-  %retval.0 = phi i32 [ 123, %sw.default ], [ %sext, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
+  %retval.0 = phi i32 [ 123, %sw.default ], [ %select, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
   ret i32 %retval.0
 
-; CHECK: @cprop
+; CHECK-LABEL: @cprop(
 ; CHECK: switch.lookup:
 ; CHECK: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table5, i32 0, i32 %switch.tableidx
 }
@@ -773,7 +776,7 @@ return:
   %retval.0 = phi i32 [ 1, %sw.bb3 ], [ -1, %sw.bb2 ], [ 0, %sw.bb ]
   ret i32 %retval.0
 
-; CHECK: @unreachable
+; CHECK-LABEL: @unreachable(
 ; CHECK: switch.lookup:
 ; CHECK: getelementptr inbounds [5 x i32]* @switch.table6, i32 0, i32 %switch.tableidx
 }
@@ -799,7 +802,7 @@ return:
   %retval.0 = phi i96 [ 15, %sw.default ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i96 %retval.0
 
-; CHECK: @illegaltype
+; CHECK-LABEL: @illegaltype(
 ; CHECK-NOT: @switch.table
 ; CHECK: switch i32 %c
 }
diff --git a/test/Transforms/SimplifyCFG/attr-noduplicate.ll b/test/Transforms/SimplifyCFG/attr-noduplicate.ll
new file mode 100644
index 000000000000..523aa51bb84e
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/attr-noduplicate.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; This test checks that the SimplifyCFG pass won't duplicate a call to a
+; function marked noduplicate.
+;
+; CHECK-LABEL: @noduplicate
+; CHECK: call void @barrier
+; CHECK-NOT: call void @barrier
+define void @noduplicate(i32 %cond, i32* %out) {
+entry:
+  %out1 = getelementptr i32* %out, i32 1
+  %out2 = getelementptr i32* %out, i32 2
+  %cmp = icmp eq i32 %cond, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 5, i32* %out
+  br label %if.end
+
+if.end:
+  call void @barrier() #0
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:
+  store i32 5, i32* %out1
+  br label %cond.end
+
+cond.end:
+  %value = phi i32 [ 1, %cond.false ], [ 0, %if.end ]
+  store i32 %value, i32* %out2
+  ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @barrier() #0
+
+attributes #0 = { noduplicate nounwind }
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index 052e10667da5..9c4edd68b800 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -5,14 +5,14 @@
 define void @test1() {
         br label %1
         ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: ret void
 }
 
 define void @test2() {
         ret void
         ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK-NEXT: ret void
 ; CHECK-NEXT: }
 }
@@ -20,7 +20,7 @@ define void @test2() {
 define void @test3(i1 %T) {
         br i1 %T, label %1, label %1
         ret void
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NEXT: ret void
 }
 
@@ -38,6 +38,6 @@ define void @test5(i32 %A) {
 
 return:                                           ; preds = %entry
   ret void
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT: ret void
 }
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 0526883fe8f4..9d8086c29769 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -41,18 +41,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i32)* @foo, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.i", metadata !"/private/tmp", metadata !"clang (trunk 129006)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !15, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 231] [def] [scope 0] [foo]
+!1 = metadata !{i32 589865, metadata !15} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !15, i32 12, metadata !"clang (trunk 129006)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 131, i32 2, metadata !0, null}
 !6 = metadata !{i32 134, i32 2, metadata !0, null}
 !7 = metadata !{i32 590080, metadata !8, metadata !"bar", metadata !1, i32 232, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 231, i32 1, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 589862, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589835, metadata !15, metadata !0, i32 231, i32 1, i32 3} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 589862, null, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 589860, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 232, i32 40, metadata !8, null}
 !13 = metadata !{i32 234, i32 2, metadata !8, null}
 !14 = metadata !{i32 274, i32 1, metadata !8, null}
+!15 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/Transforms/SimplifyCFG/common-dest-folding.ll b/test/Transforms/SimplifyCFG/common-dest-folding.ll
new file mode 100644
index 000000000000..0aa3b2c560a5
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/common-dest-folding.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+;CHECK: @foo
+;CHECK: and i32 %c1, %k
+;CHECK: icmp eq i32
+;CHECK: and i32 %c2, %k
+;CHECK: icmp eq i32
+;CHECK: or i1
+;CHECK: ret
+define i32 @foo(i32 %k, i32 %c1, i32 %c2) {
+  %1 = and i32 %c1, %k
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %8, label %3
+
+; <label>:3                                       ; preds = %0
+  %4 = and i32 %c2, %k
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %8, label %6
+
+; <label>:6                                       ; preds = %3
+  %7 = tail call i32 (...)* @bar() nounwind
+  br label %8
+
+; <label>:8                                       ; preds = %3, %0, %6
+  ret i32 undef
+}
+
+;CHECK: @conduse
+;CHECK: shl i32 1, %c1
+;CHECK-NEXT: shl i32 1, %c2
+;CHECK-NEXT: and i32
+;CHECK-NEXT: icmp eq i32
+;CHECK-NEXT: and i32
+;CHECK-NEXT: icmp eq i32
+;CHECK: ret
+define i32 @conduse(i32 %k, i32 %c1, i32 %c2) #0 {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = shl i32 1, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  br i1 %tmp2, label %bb9, label %bb3
+
+bb3:                                              ; preds = %bb
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  br i1 %tmp6, label %bb9, label %bb7
+
+bb7:                                              ; preds = %bb3
+  %tmp8 = tail call i32 (...)* @bar() #1
+  br label %bb9
+
+bb9:                                              ; preds = %bb7, %bb3, %bb
+  ret i32 undef
+}
+
+declare i32 @bar(...)
diff --git a/test/Transforms/SimplifyCFG/dce-cond-after-folding-terminator.ll b/test/Transforms/SimplifyCFG/dce-cond-after-folding-terminator.ll
index 3996efd82b5f..036a615e7ff0 100644
--- a/test/Transforms/SimplifyCFG/dce-cond-after-folding-terminator.ll
+++ b/test/Transforms/SimplifyCFG/dce-cond-after-folding-terminator.ll
@@ -2,7 +2,7 @@
 
 define void @test_br(i32 %x) {
 entry:
-; CHECK: @test_br
+; CHECK-LABEL: @test_br(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: ret void
   %cmp = icmp eq i32 %x, 10
@@ -17,7 +17,7 @@ if.end:                                           ; preds = %if.else, %if.then
 
 define void @test_switch(i32 %x) nounwind {
 entry:
-; CHECK: @test_switch
+; CHECK-LABEL: @test_switch(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: ret void
   %rem = srem i32 %x, 3
@@ -35,7 +35,7 @@ sw.epilog:                                        ; preds = %sw.bb
 
 define void @test_indirectbr(i32 %x) {
 entry:
-; CHECK: @test_indirectbr
+; CHECK-LABEL: @test_indirectbr(
 ; CHECK-NEXT: entry:
 ; Ideally this should now check:
 ;   CHK-NEXT: ret void
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 03053f037d0a..0547fa972017 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -29,25 +29,28 @@ declare i32 @bar(...)
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.module.flags = !{!21}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"b.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"b.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !20, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!1 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !8, metadata !8, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 2, i32 13, metadata !0, null}
 !8 = metadata !{i32 0}
 !9 = metadata !{i32 590080, metadata !10, metadata !"k", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 589835, metadata !0, i32 2, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 589835, metadata !20, metadata !0, i32 2, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 3, i32 12, metadata !10, null}
 !12 = metadata !{i32 4, i32 3, metadata !10, null}
 !13 = metadata !{i32 5, i32 5, metadata !14, null}
-!14 = metadata !{i32 589835, metadata !10, i32 4, i32 10, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 589835, metadata !20, metadata !10, i32 4, i32 10, i32 1} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 6, i32 3, metadata !14, null}
 !16 = metadata !{i32 7, i32 5, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !10, i32 6, i32 10, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 589835, metadata !20, metadata !10, i32 6, i32 10, i32 2} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{i32 8, i32 3, metadata !17, null}
 !19 = metadata !{i32 9, i32 3, metadata !10, null}
+!20 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/SimplifyCFG/indirectbr.ll b/test/Transforms/SimplifyCFG/indirectbr.ll
index 7853e9abd2ea..d0020d001b03 100644
--- a/test/Transforms/SimplifyCFG/indirectbr.ll
+++ b/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -77,7 +77,7 @@ BB0:
 ; SimplifyCFG should turn the indirectbr into a conditional branch on the
 ; condition of the select.
 
-; CHECK: @indbrtest3
+; CHECK-LABEL: @indbrtest3(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br i1 %cond, label %L1, label %L2
 ; CHECK-NOT: indirectbr
@@ -104,7 +104,7 @@ L3:
 ; As in @indbrtest1, it should really remove the branch entirely, but it doesn't
 ; because it's in the entry block.
 
-; CHECK: @indbrtest4
+; CHECK-LABEL: @indbrtest4(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br label %L1
 define void @indbrtest4(i1 %cond) nounwind {
@@ -126,7 +126,7 @@ L3:
 ; SimplifyCFG should turn the indirectbr into an unreachable because neither
 ; destination is listed as a successor.
 
-; CHECK: @indbrtest5
+; CHECK-LABEL: @indbrtest5(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: unreachable
 ; CHECK-NEXT: }
@@ -156,7 +156,7 @@ L4:
 
 ; The same as above, except the selected addresses are equal.
 
-; CHECK: @indbrtest6
+; CHECK-LABEL: @indbrtest6(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: unreachable
 ; CHECK-NEXT: }
diff --git a/test/Transforms/SimplifyCFG/invoke.ll b/test/Transforms/SimplifyCFG/invoke.ll
index 10dc41b44bc7..5f513ac3e6de 100644
--- a/test/Transforms/SimplifyCFG/invoke.ll
+++ b/test/Transforms/SimplifyCFG/invoke.ll
@@ -9,7 +9,7 @@ declare i32 @nounwind_fn() nounwind
 declare i32 @fn()
 
 
-; CHECK: @f1
+; CHECK-LABEL: @f1(
 define i8* @f1() nounwind uwtable ssp {
 entry:
 ; CHECK: call void @llvm.trap()
@@ -28,7 +28,7 @@ lpad:
   unreachable
 }
 
-; CHECK: @f2
+; CHECK-LABEL: @f2(
 define i8* @f2() nounwind uwtable ssp {
 entry:
 ; CHECK: call void @llvm.trap()
@@ -47,7 +47,7 @@ lpad:
   unreachable
 }
 
-; CHECK: @f3
+; CHECK-LABEL: @f3(
 define i32 @f3() nounwind uwtable ssp {
 ; CHECK-NEXT: entry
 entry:
@@ -66,7 +66,7 @@ lpad:
   unreachable
 }
 
-; CHECK: @f4
+; CHECK-LABEL: @f4(
 define i32 @f4() nounwind uwtable ssp {
 ; CHECK-NEXT: entry
 entry:
@@ -86,7 +86,7 @@ lpad:
   unreachable
 }
 
-; CHECK: @f5
+; CHECK-LABEL: @f5(
 define i32 @f5(i1 %cond, i8* %a, i8* %b) {
 entry:
   br i1 %cond, label %x, label %y
@@ -117,7 +117,7 @@ lpad:
   unreachable
 }
 
-; CHECK: @f6
+; CHECK-LABEL: @f6(
 define void @f6() {
 entry:
   invoke void @purefn()
diff --git a/test/Transforms/SimplifyCFG/invoke_unwind.ll b/test/Transforms/SimplifyCFG/invoke_unwind.ll
index ed7ff8287f20..435bed0c2957 100644
--- a/test/Transforms/SimplifyCFG/invoke_unwind.ll
+++ b/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -5,7 +5,7 @@ declare void @bar()
 ; This testcase checks to see if the simplifycfg pass is converting invoke
 ; instructions to call instructions if the handler just rethrows the exception.
 define i32 @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT: call void @bar()
 ; CHECK-NEXT: ret i32 0
         invoke void @bar( )
diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/SimplifyCFG/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll b/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
index 028fb0745631..f34aec525df8 100644
--- a/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
+++ b/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
@@ -24,7 +24,7 @@ if.end7:                                          ; preds = %if.else, %if.then4,
   %tmp9 = load i32* %x.0
   ret i32 %tmp9
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: if.else:
 ; CHECK: br label %if.end7
 
@@ -52,7 +52,7 @@ if.end7:                                          ; preds = %if.else, %if.then4,
   %x.0 = phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
   %tmp9 = load i32* %x.0
   ret i32 %tmp9
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: if.else:
 ; CHECK: unreachable
 
@@ -81,7 +81,7 @@ if.end7:                                          ; preds = %if.else, %if.then4,
   tail call void @bar() nounwind
   %tmp9 = load i32* %x.0
   ret i32 %tmp9
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: if.end7:
 ; CHECK: phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
 }
@@ -110,6 +110,6 @@ if.end7:                                          ; preds = %if.else, %if.then4,
   %tmp10 = or i32 %tmp9, 1
   store i32 %tmp10, i32* %gep
   ret i32 %tmp9
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: phi
 }
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
index 53d5448372da..8cc07e39a180 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
@@ -13,7 +13,7 @@
 declare void @foo() nounwind uwtable
 
 define void @func(i32 %A) nounwind uwtable {
-; CHECK: define void @func
+; CHECK-LABEL: define void @func(
 entry:
   %cmp11 = icmp eq i32 %A, 1
   br i1 %cmp11, label %if.then, label %if.else, !prof !0
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index beef52700820..4022ed6927ee 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -3,7 +3,7 @@
 declare void @helper(i32)
 
 define void @test1(i1 %a, i1 %b) {
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 entry:
   br i1 %a, label %Y, label %X, !prof !0
 ; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !0
@@ -22,7 +22,7 @@ Z:
 }
 
 define void @test2(i1 %a, i1 %b) {
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 entry:
   br i1 %a, label %X, label %Y, !prof !1
 ; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
@@ -42,7 +42,7 @@ Z:
 }
 
 define void @test3(i1 %a, i1 %b) {
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK-NOT: !prof
 entry:
   br i1 %a, label %X, label %Y, !prof !1
@@ -61,7 +61,7 @@ Z:
 }
 
 define void @test4(i1 %a, i1 %b) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK-NOT: !prof
 entry:
   br i1 %a, label %X, label %Y
@@ -156,7 +156,7 @@ sw.epilog:
 
 ;; This test is based on test1 but swapped the targets of the second branch.
 define void @test1_swap(i1 %a, i1 %b) {
-; CHECK: @test1_swap
+; CHECK-LABEL: @test1_swap(
 entry:
   br i1 %a, label %Y, label %X, !prof !0
 ; CHECK: br i1 %or.cond, label %Y, label %Z, !prof !4
@@ -175,7 +175,7 @@ Z:
 }
 
 define void @test7(i1 %a, i1 %b) {
-; CHECK: @test7
+; CHECK-LABEL: @test7(
 entry:
   %c = or i1 %b, false
   br i1 %a, label %Y, label %X, !prof !0
@@ -195,7 +195,7 @@ Z:
 
 ; Test basic folding to a conditional branch.
 define void @test8(i64 %x, i64 %y) nounwind {
-; CHECK: @test8
+; CHECK-LABEL: @test8(
 entry:
     %lt = icmp slt i64 %x, %y
 ; CHECK: br i1 %lt, label %a, label %b, !prof !6
@@ -219,7 +219,7 @@ bees:
 ; Test edge splitting when the default target has icmp and unconditinal
 ; branch
 define i1 @test9(i32 %x, i32 %y) nounwind {
-; CHECK: @test9
+; CHECK-LABEL: @test9(
 entry:
     switch i32 %x, label %bees [
         i32 0, label %a
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 3e2a6237b275..96c214cbc81e 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -14,7 +14,7 @@ if.end:
   %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %x, %entry ]
   ret i8* %x.addr
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK-NOT: select
 ; CHECK: ret i8* %x.addr
 }
@@ -34,7 +34,7 @@ if.end:
   %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %y, %entry ]
   ret i8* %x.addr
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %incdec.ptr.y = select i1 %cmp, i8* %incdec.ptr, i8* %y
 ; CHECK: ret i8* %incdec.ptr.y
 }
diff --git a/test/Transforms/SimplifyCFG/speculate-store.ll b/test/Transforms/SimplifyCFG/speculate-store.ll
index 8d7fe79dcd4e..e241901a8789 100644
--- a/test/Transforms/SimplifyCFG/speculate-store.ll
+++ b/test/Transforms/SimplifyCFG/speculate-store.ll
@@ -17,7 +17,7 @@ entry:
 
 ; Make sure we speculate stores like the following one. It is cheap compared to
 ; a mispredicated branch.
-; CHECK: @ifconvertstore
+; CHECK-LABEL: @ifconvertstore(
 ; CHECK: %add5.add = select i1 %cmp6, i32 %add5, i32 %add
 ; CHECK: store i32 %add5.add, i32* %arrayidx2, align 4
 if.then:
@@ -43,7 +43,7 @@ entry:
   %cmp6 = icmp sgt i32 %add5, %C
   br i1 %cmp6, label %if.then, label %ret.end
 
-; CHECK: @noifconvertstore1
+; CHECK-LABEL: @noifconvertstore1(
 ; CHECK-NOT: select
 if.then:
   store i32 %add5, i32* %arrayidx2, align 4
@@ -71,7 +71,7 @@ entry:
   %cmp6 = icmp sgt i32 %add5, %C
   br i1 %cmp6, label %if.then, label %ret.end
 
-; CHECK: @noifconvertstore2
+; CHECK-LABEL: @noifconvertstore2(
 ; CHECK-NOT: select
 if.then:
   store i32 %add5, i32* %arrayidx2, align 4
@@ -97,7 +97,7 @@ entry:
   br i1 %cmp6, label %if.then, label %ret.end
 
 ; Make sure we don't speculate volatile stores.
-; CHECK: @noifconvertstore_volatile
+; CHECK-LABEL: @noifconvertstore_volatile(
 ; CHECK-NOT: select
 if.then:
   store volatile i32 %add5, i32* %arrayidx2, align 4
diff --git a/test/Transforms/SimplifyCFG/speculate-with-offset.ll b/test/Transforms/SimplifyCFG/speculate-with-offset.ll
index a737d5602e84..64fed85c795c 100644
--- a/test/Transforms/SimplifyCFG/speculate-with-offset.ll
+++ b/test/Transforms/SimplifyCFG/speculate-with-offset.ll
@@ -3,7 +3,7 @@
 ; This load is safe to speculate, as it's from a safe offset
 ; within an alloca.
 
-; CHECK: @yes
+; CHECK-LABEL: @yes(
 ; CHECK-NOT: br
 
 define void @yes(i1 %c) nounwind {
@@ -25,7 +25,7 @@ return:                                           ; preds = %if.end, %if.then
   ret void
 }
 
-; CHECK: @no0
+; CHECK-LABEL: @no0(
 ; CHECK: br i1 %c
 
 define void @no0(i1 %c) nounwind {
@@ -47,7 +47,7 @@ return:                                           ; preds = %if.end, %if.then
   ret void
 }
 
-; CHECK: @no1
+; CHECK-LABEL: @no1(
 ; CHECK: br i1 %c
 
 define void @no1(i1 %c, i64 %n) nounwind {
@@ -69,7 +69,7 @@ return:                                           ; preds = %if.end, %if.then
   ret void
 }
 
-; CHECK: @no2
+; CHECK-LABEL: @no2(
 ; CHECK: br i1 %c
 
 define void @no2(i1 %c, i64 %n) nounwind {
diff --git a/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index 3b0c48be6e2d..692973c362bf 100644
--- a/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -13,7 +13,7 @@ b:
   ret i32 3
 c:
   ret i32 5
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %cond = icmp eq i32 %i, 24
 ; CHECK: %. = select i1 %cond, i32 5, i32 0
 ; CHECK: ret i32 %.
@@ -33,6 +33,6 @@ b:
   ret i32 3
 c:
   ret i32 5
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: ret i32 0
 }
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 9cd709ff8ecf..dec5f80ab98b 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -2,7 +2,7 @@
 
 ; Test basic folding to a conditional branch.
 define i32 @foo(i64 %x, i64 %y) nounwind {
-; CHECK: @foo
+; CHECK-LABEL: @foo(
 entry:
     %eq = icmp eq i64 %x, %y
     br i1 %eq, label %b, label %switch
@@ -32,7 +32,7 @@ bees:
 
 ; Test basic folding to an unconditional branch.
 define i32 @bar(i64 %x, i64 %y) nounwind {
-; CHECK: @bar
+; CHECK-LABEL: @bar(
 entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: tail call void @bees.a() [[NUW:#[0-9]+]]
@@ -58,7 +58,7 @@ bees:
 
 ; Test the edge case where both values from the select are the default case.
 define void @bazz(i64 %x, i64 %y) nounwind {
-; CHECK: @bazz
+; CHECK-LABEL: @bazz(
 entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: tail call void @bees.b() [[NUW]]
@@ -83,7 +83,7 @@ bees:
 
 ; Test the edge case where both values from the select are equal.
 define void @quux(i64 %x, i64 %y) nounwind {
-; CHECK: @quux
+; CHECK-LABEL: @quux(
 entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: tail call void @bees.a() [[NUW]]
@@ -108,7 +108,7 @@ bees:
 
 ; A final test, for phi node munging.
 define i32 @xyzzy(i64 %x, i64 %y) {
-; CHECK: @xyzzy
+; CHECK-LABEL: @xyzzy(
 entry:
     %eq = icmp eq i64 %x, %y
     br i1 %eq, label %r, label %cont
diff --git a/test/Transforms/SimplifyCFG/switch-to-icmp.ll b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
index e9a6db45cb00..bfacf25ca7f7 100644
--- a/test/Transforms/SimplifyCFG/switch-to-icmp.ll
+++ b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
@@ -15,7 +15,7 @@ lor.end:
  %0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
  ret i1 %0
 
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: %x.off = add i32 %x, -1
 ; CHECK: %switch = icmp ult i32 %x.off, 3
 }
@@ -34,7 +34,7 @@ lor.end:
  %0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ]
  ret i1 %0
 
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: %switch = icmp ult i32 %x, 2
 }
 
@@ -51,7 +51,7 @@ good:
 bad:
  ret i32 1
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: entry:
 ; CHECK-NEXT: ret i32 0
 }
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 546cc75f2973..e1e91570300c 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck -check-prefix=CHECK %s
+; RUN: opt -S -default-data-layout="p:32:32-p1:16:16" -simplifycfg < %s | FileCheck -check-prefix=CHECK -check-prefix=DL %s
 
 declare void @foo1()
 
@@ -15,13 +16,51 @@ T:              ; preds = %0
 F:              ; preds = %0
         call void @foo2( )
         ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK:  switch i32 %V, label %F [
 ; CHECK:    i32 17, label %T
 ; CHECK:    i32 4, label %T
 ; CHECK:  ]
 }
 
+define void @test1_ptr(i32* %V) {
+        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr(
+; DL:  %magicptr = ptrtoint i32* %V to i32
+; DL:  switch i32 %magicptr, label %F [
+; DL:    i32 17, label %T
+; DL:    i32 4, label %T
+; DL:  ]
+}
+
+define void @test1_ptr_as1(i32 addrspace(1)* %V) {
+        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr_as1(
+; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
+; DL:  switch i16 %magicptr, label %F [
+; DL:    i16 17, label %T
+; DL:    i16 4, label %T
+; DL:  ]
+}
+
 define void @test2(i32 %V) {
         %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
         %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
@@ -33,7 +72,7 @@ T:              ; preds = %0
 F:              ; preds = %0
         call void @foo2( )
         ret void
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK:  switch i32 %V, label %T [
 ; CHECK:    i32 17, label %F
 ; CHECK:    i32 4, label %F
@@ -53,7 +92,7 @@ F:              ; preds = %N
         call void @foo2( )
         ret void
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: switch i32 %V, label %F [
 ; CHECK:     i32 4, label %T
 ; CHECK:     i32 17, label %T
@@ -79,8 +118,8 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp8, %lor.rhs ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
-  
-; CHECK: @test4
+
+; CHECK-LABEL: @test4(
 ; CHECK:  switch i8 %c, label %lor.rhs [
 ; CHECK:    i8 62, label %lor.end
 ; CHECK:    i8 34, label %lor.end
@@ -104,7 +143,7 @@ lor.end:                                          ; preds = %entry, %entry, %ent
   %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK:  switch i8 %c, label %lor.rhs [
 ; CHECK:    i8 62, label %lor.end
 ; CHECK:    i8 34, label %lor.end
@@ -139,8 +178,8 @@ shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2,
 UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
         %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
         ret i1 %UnifiedRetVal
-        
-; CHECK: @test6
+
+; CHECK-LABEL: @test6(
 ; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
 ; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
 }
@@ -160,8 +199,8 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry
   ret void
-  
-; CHECK: @test7
+
+; CHECK-LABEL: @test7(
 ; CHECK:   %cmp = icmp ult i32 %x, 32
 ; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
 ; CHECK: switch.early.test:
@@ -189,8 +228,8 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry
   ret i32 0
-  
-; CHECK: @test8
+
+; CHECK-LABEL: @test8(
 ; CHECK: switch.early.test:
 ; CHECK:   switch i8 %c, label %if.end [
 ; CHECK:     i8 99, label %if.then
@@ -245,8 +284,8 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %0 = phi i1 [ true, %lor.lhs.false36 ], [ true, %lor.lhs.false31 ], [ true, %lor.lhs.false26 ], [ true, %lor.lhs.false21 ], [ true, %lor.lhs.false16 ], [ true, %lor.lhs.false11 ], [ true, %lor.lhs.false6 ], [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp43, %lor.rhs ]
   %conv46 = zext i1 %0 to i32
   ret i32 %conv46
-  
-; CHECK: @test9
+
+; CHECK-LABEL: @test9(
 ; CHECK:   %cmp = icmp ult i8 %c, 33
 ; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
 
@@ -275,7 +314,7 @@ T:
 F:
   ret i32 324
 
-; CHECK: @test10
+; CHECK-LABEL: @test10(
 ; CHECK:  br i1 %Cond, label %switch.early.test, label %F
 ; CHECK:switch.early.test:
 ; CHECK:  switch i32 %mode, label %T [
@@ -314,7 +353,7 @@ return:                                           ; preds = %if.end, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
   ret i32 %retval.0
 
-; CHECK: @test11
+; CHECK-LABEL: @test11(
 ; CHECK: switch i32 %bar, label %if.end [
 ; CHECK:   i32 55, label %return
 ; CHECK:   i32 53, label %return
@@ -343,7 +382,7 @@ bb55.us.us:
 
 malformed:
   ret void
-; CHECK: @test12
+; CHECK-LABEL: @test12(
 
 }
 
@@ -371,7 +410,7 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
-; CHECK: @test13
+; CHECK-LABEL: @test13(
 ; CHECK:  switch i32 %x, label %if.end [
 ; CHECK:     i32 6, label %if.then
 ; CHECK:     i32 4, label %if.then
@@ -405,7 +444,7 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
-; CHECK: @test14
+; CHECK-LABEL: @test14(
 ; CHECK:  switch i32 %x, label %if.end [
 ; CHECK:     i32 6, label %if.then
 ; CHECK:     i32 4, label %if.then
@@ -431,7 +470,7 @@ if.then:
 if.end:
   ret void
 
-; CHECK: @test15
+; CHECK-LABEL: @test15(
 ; CHECK-NOT: switch
 ; CHECK: ret void
 }
@@ -440,7 +479,7 @@ if.end:
 ; rdar://5134905
 define zeroext i1 @test16(i32 %x) nounwind {
 entry:
-; CHECK: @test16
+; CHECK-LABEL: @test16(
 ; CHECK: %x.off = add i32 %x, -1
 ; CHECK: %switch = icmp ult i32 %x.off, 3
   %cmp.i = icmp eq i32 %x, 1
@@ -473,9 +512,45 @@ lor.lhs.false8:
 return:
   ret void
 
-; CHECK: @test17
+; CHECK-LABEL: @test17(
 ; CHECK-NOT: switch.early.test
 ; CHECK-NOT: switch i32
 ; CHECK: ret void
 }
 
+define void @test18(i32 %arg) {
+bb:
+  %tmp = and i32 %arg, -2
+  %tmp1 = icmp eq i32 %tmp, 8
+  %tmp2 = icmp eq i32 %arg, 10
+  %tmp3 = or i1 %tmp1, %tmp2
+  %tmp4 = icmp eq i32 %arg, 11
+  %tmp5 = or i1 %tmp3, %tmp4
+  %tmp6 = icmp eq i32 %arg, 12
+  %tmp7 = or i1 %tmp5, %tmp6
+  br i1 %tmp7, label %bb19, label %bb8
+
+bb8:                                              ; preds = %bb
+  %tmp9 = add i32 %arg, -13
+  %tmp10 = icmp ult i32 %tmp9, 2
+  %tmp11 = icmp eq i32 %arg, 16
+  %tmp12 = or i1 %tmp10, %tmp11
+  %tmp13 = icmp eq i32 %arg, 17
+  %tmp14 = or i1 %tmp12, %tmp13
+  %tmp15 = icmp eq i32 %arg, 18
+  %tmp16 = or i1 %tmp14, %tmp15
+  %tmp17 = icmp eq i32 %arg, 15
+  %tmp18 = or i1 %tmp16, %tmp17
+  br i1 %tmp18, label %bb19, label %bb20
+
+bb19:                                             ; preds = %bb8, %bb
+  tail call void @foo1()
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %bb8
+  ret void
+
+; CHECK-LABEL: @test18(
+; CHECK: %arg.off = add i32 %arg, -8
+; CHECK: icmp ult i32 %arg.off, 11
+}
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index 24540e5217b9..3b449cb000ab 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -7,13 +7,18 @@ define void @foo() nounwind ssp {
   ret void, !dbg !7
 }
 
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"foo.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
+!1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 4, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 589835, metadata !8, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 5, i32 1, metadata !6, null}
+!8 = metadata !{metadata !"foo.c", metadata !"/private/tmp"}
+!9 = metadata !{metadata !0}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
index 10d6981af0e6..e9d93e834a50 100644
--- a/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
+++ b/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -17,7 +17,7 @@ bb:             ; preds = %entry
         br label %return
 return:         ; preds = %entry
         ret void
-; CHECK: @test1
+; CHECK-LABEL: @test1(
 ; CHECK: load volatile
 }
 
@@ -27,7 +27,7 @@ entry:
         store i32 4,i32* null
         ret void
         
-; CHECK: @test2
+; CHECK-LABEL: @test2(
 ; CHECK: call void @llvm.trap
 ; CHECK: unreachable
 }
@@ -38,14 +38,14 @@ entry:
         store volatile i32 4, i32* null
         ret void
 
-; CHECK: @test3
+; CHECK-LABEL: @test3(
 ; CHECK: store volatile i32 4, i32* null
 ; CHECK: ret
 }
 
 ; Check store before unreachable.
 define void @test4(i1 %C, i32* %P) {
-; CHECK: @test4
+; CHECK-LABEL: @test4(
 ; CHECK: entry:
 ; CHECK-NEXT: br i1 %C
 entry:
@@ -59,7 +59,7 @@ F:
 
 ; Check cmpxchg before unreachable.
 define void @test5(i1 %C, i32* %P) {
-; CHECK: @test5
+; CHECK-LABEL: @test5(
 ; CHECK: entry:
 ; CHECK-NEXT: br i1 %C
 entry:
@@ -73,7 +73,7 @@ F:
 
 ; Check atomicrmw before unreachable.
 define void @test6(i1 %C, i32* %P) {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK: entry:
 ; CHECK-NEXT: br i1 %C
 entry:
diff --git a/test/Transforms/SimplifyCFG/volatile-phioper.ll b/test/Transforms/SimplifyCFG/volatile-phioper.ll
index 164898897eff..1ef3a7ce59b3 100644
--- a/test/Transforms/SimplifyCFG/volatile-phioper.ll
+++ b/test/Transforms/SimplifyCFG/volatile-phioper.ll
@@ -7,7 +7,7 @@
 ; it can no longer use language standard as an excuse. The compiler
 ; needs to expose the volatile access to the platform.
 ;
-; CHECK: @test
+; CHECK-LABEL: @test(
 ; CHECK: entry:
 ; CHECK: @Trace
 ; CHECK: while.body:
@@ -41,8 +41,8 @@ end:
 }
 declare i32 @Trace(...) #1
 
-attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #1 = { "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" }
 attributes #2 = { nounwind }
 
 !0 = metadata !{i32 1039}
diff --git a/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll b/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
deleted file mode 100644
index 8816579a42ff..000000000000
--- a/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -disable-output
-
-@G = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
-
-declare i32 @sprintf(i8*, i8*, ...)
-
-define void @foo(i8* %P, i32* %X) {
-	call i32 (i8*, i8*, ...)* @sprintf( i8* %P, i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i32* %X )		; <i32>:1 [#uses=0]
-	ret void
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll b/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
deleted file mode 100644
index ae917f70f4f1..000000000000
--- a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; PR1307
-; RUN: opt < %s -simplify-libcalls -instcombine -S > %t
-; RUN: grep "@str,.*i64 3" %t
-; RUN: grep "@str1,.*i64 7" %t
-; RUN: grep "ret i8.*null" %t
-; END.
-
-@str = internal constant [5 x i8] c"foog\00"
-@str1 = internal constant [8 x i8] c"blahhh!\00"
-@str2 = internal constant [5 x i8] c"Ponk\00"
-
-define i8* @test1() {
-        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8]* @str, i32 0, i32 2), i32 103 )              ; <i8*> [#uses=1]
-        ret i8* %tmp3
-}
-
-declare i8* @strchr(i8*, i32)
-
-define i8* @test2() {
-        %tmp3 = tail call i8* @strchr( i8* getelementptr ([8 x i8]* @str1, i32 0, i32 2), i32 0 )               ; <i8*> [#uses=1]
-        ret i8* %tmp3
-}
-
-define i8* @test3() {
-entry:
-        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8]* @str2, i32 0, i32 1), i32 80 )              ; <i8*> [#uses=1]
-        ret i8* %tmp3
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll b/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
deleted file mode 100644
index b6874322c4c7..000000000000
--- a/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep i32
-; PR2341
-
-@_2E_str = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
-
-declare i32 @memcmp(i8*, i8*, i32) nounwind readonly 
-
-define i1 @f(i8** %start_addr) {
-entry:
-	%tmp4 = load i8** %start_addr, align 4		; <i8*> [#uses=1]
-	%tmp5 = call i32 @memcmp( i8* %tmp4, i8* getelementptr ([5 x i8]* @_2E_str, i32 0, i32 0), i32 4 ) nounwind readonly 		; <i32> [#uses=1]
-	%tmp6 = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
-	ret i1 %tmp6
-}
diff --git a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll b/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
deleted file mode 100644
index f4c80ed13271..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -instcombine -S | grep "ret i32 -65"
-; PR4284
-
-define i32 @test() nounwind {
-entry:
-	%c0 = alloca i8, align 1		; <i8*> [#uses=2]
-	%c2 = alloca i8, align 1		; <i8*> [#uses=2]
-	store i8 64, i8* %c0
-	store i8 -127, i8* %c2
-	%call = call i32 @memcmp(i8* %c0, i8* %c2, i32 1)		; <i32> [#uses=1]
-	ret i32 %call
-}
-
-declare i32 @memcmp(i8*, i8*, i32)
diff --git a/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll b/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll
deleted file mode 100644
index 7af0a261d436..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -disable-output
-; PR4641
-
-	%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64, %struct.pthread_mutex*, %struct.pthread*, i32, i32, %union.anon }
-	%struct.__sbuf = type { i8*, i32, [4 x i8] }
-	%struct.pthread = type opaque
-	%struct.pthread_mutex = type opaque
-	%union.anon = type { i64, [120 x i8] }
-@.str13 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
-@.str14 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-	call void @exit(i32 0) nounwind
-	%cond392 = select i1 undef, i8* getelementptr ([2 x i8]* @.str13, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str14, i32 0, i32 0)		; <i8*> [#uses=1]
-	%call393 = call %struct.__sFILE* @fopen(i8* undef, i8* %cond392) nounwind		; <%struct.__sFILE*> [#uses=0]
-	unreachable
-}
-
-declare %struct.__sFILE* @fopen(i8*, i8*)
-
-declare void @exit(i32)
diff --git a/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll b/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll
deleted file mode 100644
index b5a788e09735..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -disable-output
-; PR4645
-
-define i32 @main() {
-entry:
-	br label %if.then
-
-lor.lhs.false:		; preds = %while.body
-	br i1 undef, label %if.then, label %for.cond
-
-if.then:		; preds = %lor.lhs.false, %while.body
-	call void @exit(i32 1)
-	br label %for.cond
-
-for.cond:		; preds = %for.end, %if.then, %lor.lhs.false
-	%j.0 = phi i32 [ %inc47, %for.end ], [ 0, %if.then ], [ 0, %lor.lhs.false ]		; <i32> [#uses=1]
-	unreachable
-
-for.end:		; preds = %for.cond20
-	%inc47 = add i32 %j.0, 1		; <i32> [#uses=1]
-	br label %for.cond
-}
-
-declare void @exit(i32)
diff --git a/test/Transforms/SimplifyLibCalls/MemCpy.ll b/test/Transforms/SimplifyLibCalls/MemCpy.ll
deleted file mode 100644
index 1faad036a865..000000000000
--- a/test/Transforms/SimplifyLibCalls/MemCpy.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -constprop -instcombine -S | not grep "call.*llvm.memcpy.i32"
-
-@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
-@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
-@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
-
-define i32 @main() {
-  %h_p = getelementptr [2 x i8]* @h, i32 0, i32 0
-  %hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0
-  %hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0
-  %target = alloca [1024 x i8]
-  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
-  ret i32 0
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/SimplifyLibCalls/PR7357.ll b/test/Transforms/SimplifyLibCalls/PR7357.ll
deleted file mode 100644
index 3529a9cfb1b8..000000000000
--- a/test/Transforms/SimplifyLibCalls/PR7357.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -simplify-libcalls -S | FileCheck %s
-@.str1 = private constant [11 x i8] c"(){};[]&|:\00", align 4
-
-; check that simplify libcalls will not replace a call with one calling
-; convention with a new call with a different calling convention.
-
-; CHECK: define arm_aapcscc i32 @foo(i32 %argc)
-; CHECK: call arm_aapcscc  i8* @strchr
-define arm_aapcscc i32 @foo(i32 %argc) nounwind {
-bb.nph:
-  call arm_aapcscc  i8* @strchr(i8* getelementptr ([11 x i8]* @.str1, i32 0,
-i32 0), i32 %argc) nounwind readonly
-  ret i32 0
-}
-
-declare arm_aapcscc i8* @strchr(i8*, i32) nounwind readonly
diff --git a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
deleted file mode 100644
index ad54c3e38f13..000000000000
--- a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
+++ /dev/null
@@ -1,179 +0,0 @@
-; RUN: opt -S -simplify-libcalls -instcombine < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
-
-define i32 @test1(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @ceil(double %1) nounwind readnone
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test1
-; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
-; CHECK-NEXT: fcmp oeq float %ceilf, %y
-}
-
-define i32 @test2(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @fabs(double %1) nounwind readnone
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test2
-; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
-; CHECK-NEXT: fcmp oeq float %fabsf, %y
-}
-
-define i32 @test3(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @floor(double %1) nounwind readnone
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test3
-; CHECK-NEXT: %floorf = call float @floorf(float %x)
-; CHECK-NEXT: fcmp oeq float %floorf, %y
-}
-
-define i32 @test4(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @nearbyint(double %1) nounwind
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test4
-; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
-; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
-}
-
-define i32 @test5(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @rint(double %1) nounwind
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test5
-; CHECK-NEXT: %rintf = call float @rintf(float %x)
-; CHECK-NEXT: fcmp oeq float %rintf, %y
-}
-
-define i32 @test6(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @round(double %1) nounwind readnone
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test6
-; CHECK-NEXT: %roundf = call float @roundf(float %x)
-; CHECK-NEXT: fcmp oeq float %roundf, %y
-}
-
-define i32 @test7(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = call double @trunc(double %1) nounwind
-  %3 = fpext float %y to double
-  %4 = fcmp oeq double %2, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test7
-; CHECK-NEXT: %truncf = call float @truncf(float %x)
-; CHECK-NEXT: fcmp oeq float %truncf, %y
-}
-
-define i32 @test8(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @ceil(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test8
-; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
-; CHECK-NEXT: fcmp oeq float %ceilf, %y
-}
-
-define i32 @test9(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @fabs(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test9
-; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
-; CHECK-NEXT: fcmp oeq float %fabsf, %y
-}
-
-define i32 @test10(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @floor(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test10
-; CHECK-NEXT: %floorf = call float @floorf(float %x)
-; CHECK-NEXT: fcmp oeq float %floorf, %y
-}
-
-define i32 @test11(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @nearbyint(double %2) nounwind
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test11
-; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
-; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
-}
-
-define i32 @test12(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @rint(double %2) nounwind
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test12
-; CHECK-NEXT: %rintf = call float @rintf(float %x)
-; CHECK-NEXT: fcmp oeq float %rintf, %y
-}
-
-define i32 @test13(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @round(double %2) nounwind readnone
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test13
-; CHECK-NEXT: %roundf = call float @roundf(float %x)
-; CHECK-NEXT: fcmp oeq float %roundf, %y
-}
-
-define i32 @test14(float %x, float %y) nounwind uwtable {
-  %1 = fpext float %y to double
-  %2 = fpext float %x to double
-  %3 = call double @trunc(double %2) nounwind
-  %4 = fcmp oeq double %1, %3
-  %5 = zext i1 %4 to i32
-  ret i32 %5
-; CHECK: @test14
-; CHECK-NEXT: %truncf = call float @truncf(float %x)
-; CHECK-NEXT: fcmp oeq float %truncf, %y
-}
-
-declare double @fabs(double) nounwind readnone
-declare double @ceil(double) nounwind readnone
-declare double @floor(double) nounwind readnone
-declare double @nearbyint(double) nounwind readnone
-declare double @rint(double) nounwind readnone
-declare double @round(double) nounwind readnone
-declare double @trunc(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/lit.local.cfg b/test/Transforms/SimplifyLibCalls/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/SimplifyLibCalls/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyLibCalls/win-math.ll b/test/Transforms/SimplifyLibCalls/win-math.ll
deleted file mode 100644
index 367e5b80721c..000000000000
--- a/test/Transforms/SimplifyLibCalls/win-math.ll
+++ /dev/null
@@ -1,275 +0,0 @@
-; RUN: opt -O2 -S -mtriple=i386-pc-win32 < %s | FileCheck %s -check-prefix=WIN32
-; RUN: opt -O2 -S -mtriple=x86_64-pc-win32 < %s | FileCheck %s -check-prefix=WIN64
-; RUN: opt -O2 -S -mtriple=i386-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW32
-; RUN: opt -O2 -S -mtriple=x86_64-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW64
-
-; x86 win32 msvcrt does not provide entry points for single-precision libm.
-; x86-64 win32 msvcrt does (except for fabsf)
-; msvcrt does not provide C99 math, but mingw32 does.
-
-declare double @acos(double %x)
-define float @float_acos(float %x) nounwind readnone {
-; WIN32: @float_acos
-; WIN32-NOT: float @acosf
-; WIN32: double @acos
-    %1 = fpext float %x to double
-    %2 = call double @acos(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @asin(double %x)
-define float @float_asin(float %x) nounwind readnone {
-; WIN32: @float_asin
-; WIN32-NOT: float @asinf
-; WIN32: double @asin
-    %1 = fpext float %x to double
-    %2 = call double @asin(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @atan(double %x)
-define float @float_atan(float %x) nounwind readnone {
-; WIN32: @float_atan
-; WIN32-NOT: float @atanf
-; WIN32: double @atan
-    %1 = fpext float %x to double
-    %2 = call double @atan(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @atan2(double %x, double %y)
-define float @float_atan2(float %x, float %y) nounwind readnone {
-; WIN32: @float_atan2
-; WIN32-NOT: float @atan2f
-; WIN32: double @atan2
-    %1 = fpext float %x to double
-    %2 = fpext float %y to double
-    %3 = call double @atan2(double %1, double %2)
-    %4 = fptrunc double %3 to float
-    ret float %4
-}
-
-declare double @ceil(double %x)
-define float @float_ceil(float %x) nounwind readnone {
-; WIN32: @float_ceil
-; WIN32-NOT: float @ceilf
-; WIN32: double @ceil
-; WIN64: @float_ceil
-; WIN64: float @ceilf
-; WIN64-NOT: double @ceil
-; MINGW32: @float_ceil
-; MINGW32: float @ceilf
-; MINGW32-NOT: double @ceil
-; MINGW64: @float_ceil
-; MINGW64: float @ceilf
-; MINGW64-NOT: double @ceil
-    %1 = fpext float %x to double
-    %2 = call double @ceil(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @_copysign(double %x)
-define float @float_copysign(float %x) nounwind readnone {
-; WIN32: @float_copysign
-; WIN32-NOT: float @copysignf
-; WIN32-NOT: float @_copysignf
-; WIN32: double @_copysign
-    %1 = fpext float %x to double
-    %2 = call double @_copysign(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @cos(double %x)
-define float @float_cos(float %x) nounwind readnone {
-; WIN32: @float_cos
-; WIN32-NOT: float @cosf
-; WIN32: double @cos
-    %1 = fpext float %x to double
-    %2 = call double @cos(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @cosh(double %x)
-define float @float_cosh(float %x) nounwind readnone {
-; WIN32: @float_cosh
-; WIN32-NOT: float @coshf
-; WIN32: double @cosh
-    %1 = fpext float %x to double
-    %2 = call double @cosh(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @exp(double %x, double %y)
-define float @float_exp(float %x, float %y) nounwind readnone {
-; WIN32: @float_exp
-; WIN32-NOT: float @expf
-; WIN32: double @exp
-    %1 = fpext float %x to double
-    %2 = fpext float %y to double
-    %3 = call double @exp(double %1, double %2)
-    %4 = fptrunc double %3 to float
-    ret float %4
-}
-
-declare double @fabs(double %x, double %y)
-define float @float_fabs(float %x, float %y) nounwind readnone {
-; WIN32: @float_fabs
-; WIN32-NOT: float @fabsf
-; WIN32: double @fabs
-; WIN64: @float_fabs
-; WIN64-NOT: float @fabsf
-; WIN64: double @fabs
-    %1 = fpext float %x to double
-    %2 = fpext float %y to double
-    %3 = call double @fabs(double %1, double %2)
-    %4 = fptrunc double %3 to float
-    ret float %4
-}
-
-declare double @floor(double %x)
-define float @float_floor(float %x) nounwind readnone {
-; WIN32: @float_floor
-; WIN32-NOT: float @floorf
-; WIN32: double @floor
-; WIN64: @float_floor
-; WIN64: float @floorf
-; WIN64-NOT: double @floor
-; MINGW32: @float_floor
-; MINGW32: float @floorf
-; MINGW32-NOT: double @floor
-; MINGW64: @float_floor
-; MINGW64: float @floorf
-; MINGW64-NOT: double @floor
-    %1 = fpext float %x to double
-    %2 = call double @floor(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @fmod(double %x, double %y)
-define float @float_fmod(float %x, float %y) nounwind readnone {
-; WIN32: @float_fmod
-; WIN32-NOT: float @fmodf
-; WIN32: double @fmod
-    %1 = fpext float %x to double
-    %2 = fpext float %y to double
-    %3 = call double @fmod(double %1, double %2)
-    %4 = fptrunc double %3 to float
-    ret float %4
-}
-
-declare double @log(double %x)
-define float @float_log(float %x) nounwind readnone {
-; WIN32: @float_log
-; WIN32-NOT: float @logf
-; WIN32: double @log
-    %1 = fpext float %x to double
-    %2 = call double @log(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @pow(double %x, double %y)
-define float @float_pow(float %x, float %y) nounwind readnone {
-; WIN32: @float_pow
-; WIN32-NOT: float @powf
-; WIN32: double @pow
-    %1 = fpext float %x to double
-    %2 = fpext float %y to double
-    %3 = call double @pow(double %1, double %2)
-    %4 = fptrunc double %3 to float
-    ret float %4
-}
-
-declare double @sin(double %x)
-define float @float_sin(float %x) nounwind readnone {
-; WIN32: @float_sin
-; WIN32-NOT: float @sinf
-; WIN32: double @sin
-    %1 = fpext float %x to double
-    %2 = call double @sin(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @sinh(double %x)
-define float @float_sinh(float %x) nounwind readnone {
-; WIN32: @float_sinh
-; WIN32-NOT: float @sinhf
-; WIN32: double @sinh
-    %1 = fpext float %x to double
-    %2 = call double @sinh(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @sqrt(double %x)
-define float @float_sqrt(float %x) nounwind readnone {
-; WIN32: @float_sqrt
-; WIN32-NOT: float @sqrtf
-; WIN32: double @sqrt
-; WIN64: @float_sqrt
-; WIN64: float @sqrtf
-; WIN64-NOT: double @sqrt
-; MINGW32: @float_sqrt
-; MINGW32: float @sqrtf
-; MINGW32-NOT: double @sqrt
-; MINGW64: @float_sqrt
-; MINGW64: float @sqrtf
-; MINGW64-NOT: double @sqrt
-    %1 = fpext float %x to double
-    %2 = call double @sqrt(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @tan(double %x)
-define float @float_tan(float %x) nounwind readnone {
-; WIN32: @float_tan
-; WIN32-NOT: float @tanf
-; WIN32: double @tan
-    %1 = fpext float %x to double
-    %2 = call double @tan(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-declare double @tanh(double %x)
-define float @float_tanh(float %x) nounwind readnone {
-; WIN32: @float_tanh
-; WIN32-NOT: float @tanhf
-; WIN32: double @tanh
-    %1 = fpext float %x to double
-    %2 = call double @tanh(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
-; win32 does not have round; mingw32 does
-declare double @round(double %x)
-define float @float_round(float %x) nounwind readnone {
-; WIN32: @float_round
-; WIN32-NOT: float @roundf
-; WIN32: double @round
-; WIN64: @float_round
-; WIN64-NOT: float @roundf
-; WIN64: double @round
-; MINGW32: @float_round
-; MINGW32: float @roundf
-; MINGW32-NOT: double @round
-; MINGW64: @float_round
-; MINGW64: float @roundf
-; MINGW64-NOT: double @round
-    %1 = fpext float %x to double
-    %2 = call double @round(double %1)
-    %3 = fptrunc double %2 to float
-    ret float %3
-}
-
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index 1d0b6b529d56..85ab3766002d 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -6,7 +6,7 @@
 ; Sink should sink the load past the store (which doesn't overlap) into
 ; the block that uses it.
 
-;      CHECK: @foo
+;      CHECK-LABEL: @foo(
 ;      CHECK: true:
 ; CHECK-NEXT: %l = load i32* @A
 ; CHECK-NEXT: ret i32 %l
@@ -23,7 +23,7 @@ false:
 
 ; But don't sink load volatiles...
 
-;      CHECK: @foo2
+;      CHECK-LABEL: @foo2(
 ;      CHECK: load volatile
 ; CHECK-NEXT: store i32
 
@@ -39,7 +39,7 @@ false:
 
 ; Sink to the nearest post-dominator
 
-;      CHECK: @diamond
+;      CHECK-LABEL: @diamond(
 ;      CHECK: X:
 ; CHECK-NEXT: phi
 ; CHECK-NEXT: mul nsw
diff --git a/test/Transforms/Sink/lit.local.cfg b/test/Transforms/Sink/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/Sink/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
index 69febc35df76..438fa96b41ef 100644
--- a/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
+++ b/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
@@ -1,5 +1,10 @@
-; RUN: opt < %s -strip -S | grep foo | count 2
-; RUN: opt < %s -strip -S | grep bar | count 2
+; RUN: opt < %s -strip -S | FileCheck %s
+
+; CHECK: foo
+; CHECK: bar
+; CHECK: foo
+; CHECK: bar
+
 @llvm.used = appending global [2 x i8*] [ i8* bitcast (i32* @foo to i8*), i8* bitcast (i32 ()* @bar to i8*) ], section "llvm.metadata"		; <[2 x i8*]*> [#uses=0]
 @foo = internal constant i32 41		; <i32*> [#uses=1]
 
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index f5899d26189d..5353744824dc 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -1,4 +1,6 @@
-; RUN: opt -strip-debug < %s | llvm-dis | grep -v llvm.dbg
+; RUN: opt -strip-debug < %s -S | FileCheck %s
+
+; CHECK-NOT: llvm.dbg
 
 @x = common global i32 0                          ; <i32*> [#uses=0]
 
@@ -10,19 +12,23 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13}
 !llvm.dbg.sp = !{!0}
 !llvm.dbg.lv.foo = !{!5}
 !llvm.dbg.gv = !{!8}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{null}
 !5 = metadata !{i32 524544, metadata !6, metadata !"y", metadata !1, i32 3, metadata !7} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 524299, metadata !0, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524299, metadata !12, metadata !0, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 524324, metadata !12, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 524340, i32 0, metadata !1, metadata !"x", metadata !"x", metadata !"", metadata !1, i32 1, metadata !7, i1 false, i1 true, i32* @x} ; [ DW_TAG_variable ]
 !9 = metadata !{i32 0}
 !10 = metadata !{i32 3, i32 0, metadata !6, null}
 !11 = metadata !{i32 4, i32 0, metadata !6, null}
+!12 = metadata !{metadata !"b.c", metadata !"/tmp"}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll b/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
deleted file mode 100644
index 1df0351c59ab..000000000000
--- a/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: opt -strip-dead-debug-info < %s | llvm-dis -o %t.ll
-; RUN: grep -v bar %t.ll
-; RUN: grep -v abcd %t.ll
-
-@xyz = global i32 2                               ; <i32*> [#uses=1]
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-define i32 @fn() nounwind readnone ssp {
-entry:
-  ret i32 0, !dbg !17
-}
-
-define i32 @foo(i32 %i) nounwind readonly ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !14), !dbg !19
-  %.0 = load i32* @xyz, align 4                   ; <i32> [#uses=1]
-  ret i32 %.0, !dbg !20
-}
-
-!llvm.dbg.sp = !{!0, !5, !9}
-!llvm.dbg.lv.bar = !{!12}
-!llvm.dbg.lv.foo = !{!14}
-!llvm.dbg.gv = !{!15, !16}
-
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"g.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"g.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null}
-!5 = metadata !{i32 524334, i32 0, metadata !1, metadata !"fn", metadata !"fn", metadata !"fn", metadata !1, i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!11 = metadata !{metadata !8, metadata !8}
-!12 = metadata !{i32 524544, metadata !13, metadata !"bb", metadata !1, i32 5, metadata !8} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 524299, metadata !0, i32 5, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 524545, metadata !9, metadata !"i", metadata !1, i32 7, metadata !8} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 524340, i32 0, metadata !1, metadata !"abcd", metadata !"abcd", metadata !"", metadata !1, i32 2, metadata !8, i1 true, i1 true, null} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 524340, i32 0, metadata !1, metadata !"xyz", metadata !"xyz", metadata !"", metadata !1, i32 3, metadata !8, i1 false, i1 true, i32* @xyz} ; [ DW_TAG_variable ]
-!17 = metadata !{i32 6, i32 0, metadata !18, null}
-!18 = metadata !{i32 524299, metadata !5, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 7, i32 0, metadata !9, null}
-!20 = metadata !{i32 10, i32 0, metadata !21, null}
-!21 = metadata !{i32 524299, metadata !9, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index 7de5a028054a..28784686bb68 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -4,16 +4,21 @@ entry:
   ret i32 0, !dbg !8
 }
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.gv = !{!6}
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !"clang version 2.8 (trunk 112062)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 524334, metadata !10, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !10} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !12, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ]
-!7 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
+!5 = metadata !{i32 524324, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0, null} ; [ DW_TAG_variable ]
+!7 = metadata !{i32 524326, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
 !8 = metadata !{i32 3, i32 13, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !0, i32 3, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 524299, metadata !10, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW"}
+!11 = metadata !{i32 0}
+!12 = metadata !{metadata !0}
+!13 = metadata !{metadata !6}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StripSymbols/lit.local.cfg b/test/Transforms/StripSymbols/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/StripSymbols/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
new file mode 100644
index 000000000000..2d687ae65470
--- /dev/null
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -0,0 +1,58 @@
+; RUN: opt -strip-dead-debug-info -verify %s -S | FileCheck %s
+
+; CHECK: ModuleID = '{{.*}}'
+; CHECK-NOT: bar
+; CHECK-NOT: abcd
+
+@xyz = global i32 2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #0
+
+; Function Attrs: nounwind readnone ssp
+define i32 @fn() #1 {
+entry:
+  ret i32 0, !dbg !18
+}
+
+; Function Attrs: nounwind readonly ssp
+define i32 @foo(i32 %i) #2 {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15), !dbg !20
+  %.0 = load i32* @xyz, align 4
+  ret i32 %.0, !dbg !21
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readnone ssp }
+attributes #2 = { nounwind readonly ssp }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!25}
+
+!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !23, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
+!1 = metadata !{metadata !"g.c", metadata !"/tmp/"}
+!2 = metadata !{null}
+!3 = metadata !{i32 524334, metadata !1, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
+!4 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{i32 524329, metadata !1}          ; [ DW_TAG_file_type ] [/tmp//g.c]
+!6 = metadata !{i32 524334, metadata !1, null, metadata !"fn", metadata !"fn", metadata !"fn", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [fn]
+!7 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 524324, metadata !1, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 524334, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 0] [foo]
+!11 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !9, metadata !9}
+!13 = metadata !{i32 524544, metadata !14, metadata !"bb", metadata !5, i32 5, metadata !9}
+!14 = metadata !{i32 524299, metadata !1, metadata !3, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!15 = metadata !{i32 524545, metadata !10, metadata !"i", metadata !5, i32 7, metadata !9}
+!16 = metadata !{i32 524340, i32 0, metadata !5, metadata !"abcd", metadata !"abcd", metadata !"", metadata !5, i32 2, metadata !9, i1 true, i1 true, null, null}
+!17 = metadata !{i32 524340, i32 0, metadata !5, metadata !"xyz", metadata !"xyz", metadata !"", metadata !5, i32 3, metadata !9, i1 false, i1 true, i32* @xyz, null}
+!18 = metadata !{i32 6, i32 0, metadata !19, null}
+!19 = metadata !{i32 524299, metadata !1, metadata !6, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!20 = metadata !{i32 7, i32 0, metadata !10, null}
+!21 = metadata !{i32 10, i32 0, metadata !22, null}
+!22 = metadata !{i32 524299, metadata !1, metadata !10, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!23 = metadata !{metadata !3, metadata !6, metadata !10}
+!24 = metadata !{metadata !16, metadata !17}
+!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/Transforms/StructurizeCFG/branch-on-argument.ll b/test/Transforms/StructurizeCFG/branch-on-argument.ll
new file mode 100644
index 000000000000..4eba0cd8fe15
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/branch-on-argument.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
+
+; CHECK-LABEL: @invert_branch_on_arg_inf_loop(
+; CHECK: entry:
+; CHECK: %arg.inv = xor i1 %arg, true
+; CHECK: phi i1 [ false, %Flow1 ], [ %arg.inv, %entry ]
+define void @invert_branch_on_arg_inf_loop(i32 addrspace(1)* %out, i1 %arg) {
+entry:
+  br i1 %arg, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
+
+
+; CHECK-LABEL: @invert_branch_on_arg_jump_into_loop(
+; CHECK: entry:
+; CHECK: %arg.inv = xor i1 %arg, true
+; CHECK: Flow:
+; CHECK: Flow1:
+define void @invert_branch_on_arg_jump_into_loop(i32 addrspace(1)* %out, i32 %n, i1 %arg) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+  %ptr = getelementptr i32 addrspace(1)* %out, i32 %i
+  store i32 %i, i32 addrspace(1)* %ptr, align 4
+  br i1 %arg, label %mid.loop, label %end.loop
+
+mid.loop:
+  store i32 333, i32 addrspace(1)* %out, align 4
+  br label %for.end
+
+end.loop:
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i.inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
diff --git a/test/Transforms/StructurizeCFG/loop-multiple-exits.ll b/test/Transforms/StructurizeCFG/loop-multiple-exits.ll
new file mode 100644
index 000000000000..45f3165671e1
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/loop-multiple-exits.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+;
+; void loop(int *out, int cond_a, int cond_b) {
+;
+;   unsigned i;
+;   for (i = 0; i < cond_a; i++) {
+;     out[i] = i;
+;     if (i > cond_b) {
+;       break;
+;     }
+;     out[i + cond_a] = i;
+;   }
+; }
+
+define void @loop(i32 addrspace(1)* %out, i32 %cond_a, i32 %cond_b) nounwind uwtable {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i32 %i.0, %cond_a
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: for.body:
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %i.0
+  store i32 %i.0, i32 addrspace(1)* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %i.0, %cond_b
+; CHECK: br i1 %{{[0-9a-zA-Z_]+}}, label %for.inc, label %[[FLOW1:[0-9a-zA-Z_]+]]
+  br i1 %cmp1, label %for.end, label %for.inc
+
+; CHECK: [[FLOW:[0-9a-zA-Z]+]]:
+; CHECK: br i1 %{{[0-9a-zA-Z_]+}}, label %for.end, label %for.cond
+
+; CHECK: for.inc:
+; CHECK: br label %[[FLOW1]]
+
+for.inc:                                          ; preds = %for.body
+  %0 = add i32 %cond_a, %i.0
+  %arrayidx3 = getelementptr inbounds i32 addrspace(1)* %out, i32 %0
+  store i32 %i.0, i32 addrspace(1)* %arrayidx3, align 4
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+; CHECK: [[FLOW1]]
+; CHECK: br label %[[FLOW]]
+
+for.end:                                          ; preds = %for.cond, %for.body
+  ret void
+}
diff --git a/test/Transforms/StructurizeCFG/no-branch-to-entry.ll b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll
new file mode 100644
index 000000000000..2e22c8715347
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/no-branch-to-entry.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
+
+; CHECK-LABEL: @no_branch_to_entry_undef(
+; CHECK: entry:
+; CHECK-NEXT: br label %entry.orig
+define void @no_branch_to_entry_undef(i32 addrspace(1)* %out) {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
+
+; CHECK-LABEL: @no_branch_to_entry_true(
+; CHECK: entry:
+; CHECK-NEXT: br label %entry.orig
+define void @no_branch_to_entry_true(i32 addrspace(1)* %out) {
+entry:
+  br i1 true, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  store i32 999, i32 addrspace(1)* %out, align 4
+  br label %for.body
+
+for.end:                                          ; preds = %Flow
+  ret void
+}
diff --git a/test/Transforms/StructurizeCFG/switch.ll b/test/Transforms/StructurizeCFG/switch.ll
new file mode 100644
index 000000000000..316df57ea736
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/switch.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+; The structurizecfg pass cannot handle switch instructions, so we need to
+; make sure the lower switch pass is always run before structurizecfg.
+
+; CHECK-LABEL: @switch
+define void @switch(i32 addrspace(1)* %out, i32 %cond) nounwind {
+entry:
+; CHECK: icmp
+  switch i32 %cond, label %done [ i32 0, label %zero]
+
+; CHECK: zero:
+zero:
+; CHECK: store i32 7, i32 addrspace(1)* %out
+  store i32 7, i32 addrspace(1)* %out
+; CHECK: br label %done
+  br label %done
+
+; CHECK: done:
+done:
+; CHECK: ret void
+  ret void
+}
diff --git a/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll b/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
index 06265926fa68..053fc9543611 100644
--- a/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
+++ b/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
@@ -2,7 +2,7 @@
 ; PR7328
 ; PR7506
 define i32 @foo(i32 %x) {
-; CHECK: define i32 @foo
+; CHECK-LABEL: define i32 @foo(
 ; CHECK: %accumulator.tr = phi i32 [ 1, %entry ], [ 0, %body ]
 entry:
   %cond = icmp ugt i32 %x, 0                      ; <i1> [#uses=1]
diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll
index 9475f87e8f5b..c95bfe6aeed1 100644
--- a/test/Transforms/TailCallElim/accum_recursion.ll
+++ b/test/Transforms/TailCallElim/accum_recursion.ll
@@ -13,7 +13,7 @@ else:		; preds = %entry
 	ret i32 1
 }
 
-; CHECK: define i32 @test1_factorial
+; CHECK-LABEL: define i32 @test1_factorial(
 ; CHECK: phi i32
 ; CHECK-NOT: call i32
 ; CHECK: else:
@@ -34,14 +34,14 @@ return:		; preds = %entry
 	ret i32 %x
 }
 
-; CHECK: define i32 @test2_mul
+; CHECK-LABEL: define i32 @test2_mul(
 ; CHECK: phi i32
 ; CHECK-NOT: call i32
 ; CHECK: return:
 
 
 define i64 @test3_fib(i64 %n) nounwind readnone {
-; CHECK: @test3_fib
+; CHECK-LABEL: @test3_fib(
 entry:
 ; CHECK: tailrecurse:
 ; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ]
diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll
new file mode 100644
index 000000000000..35420ab08c33
--- /dev/null
+++ b/test/Transforms/TailCallElim/basic.ll
@@ -0,0 +1,145 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
+declare void @noarg()
+declare void @use(i32*)
+declare void @use_nocapture(i32* nocapture)
+declare void @use2_nocapture(i32* nocapture, i32* nocapture)
+
+; Trivial case. Mark @noarg with tail call.
+define void @test0() {
+; CHECK: tail call void @noarg()
+	call void @noarg()
+	ret void
+}
+
+; PR615. Make sure that we do not move the alloca so that it interferes with the tail call.
+define i32 @test1() {
+; CHECK: i32 @test1()
+; CHECK-NEXT: alloca
+	%A = alloca i32		; <i32*> [#uses=2]
+	store i32 5, i32* %A
+	call void @use(i32* %A)
+; CHECK: tail call i32 @test1
+	%X = tail call i32 @test1()		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+; This function contains intervening instructions which should be moved out of the way
+define i32 @test2(i32 %X) {
+; CHECK: i32 @test2
+; CHECK-NOT: call
+; CHECK: ret i32
+entry:
+	%tmp.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %then.0, label %endif.0
+then.0:		; preds = %entry
+	%tmp.4 = add i32 %X, 1		; <i32> [#uses=1]
+	ret i32 %tmp.4
+endif.0:		; preds = %entry
+	%tmp.10 = add i32 %X, -1		; <i32> [#uses=1]
+	%tmp.8 = call i32 @test2(i32 %tmp.10)		; <i32> [#uses=1]
+	%DUMMY = add i32 %X, 1		; <i32> [#uses=0]
+	ret i32 %tmp.8
+}
+
+; Though this case seems to be fairly unlikely to occur in the wild, someone
+; plunked it into the demo script, so maybe they care about it.
+define i32 @test3(i32 %c) {
+; CHECK: i32 @test3
+; CHECK-NOT: call
+; CHECK: ret i32 0
+entry:
+	%tmp.1 = icmp eq i32 %c, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %return, label %else
+else:		; preds = %entry
+	%tmp.5 = add i32 %c, -1		; <i32> [#uses=1]
+	%tmp.3 = call i32 @test3(i32 %tmp.5)		; <i32> [#uses=0]
+	ret i32 0
+return:		; preds = %entry
+	ret i32 0
+}
+
+; Make sure that a nocapture pointer does not stop adding a tail call marker to
+; an unrelated call and additionally that we do not mark the nocapture call with
+; a tail call.
+;
+; rdar://14324281
+define void @test4() {
+; CHECK: void @test4
+; CHECK-NOT: tail call void @use_nocapture
+; CHECK: tail call void @noarg()
+; CHECK: ret void
+  %a = alloca i32
+  call void @use_nocapture(i32* %a)
+  call void @noarg()
+  ret void
+}
+
+; Make sure that we do not perform TRE even with a nocapture use. This is due to
+; bad codegen caused by PR962.
+;
+; rdar://14324281.
+define i32* @test5(i32* nocapture %A, i1 %cond) {
+; CHECK: i32* @test5
+; CHECK-NOT: tailrecurse:
+; CHECK: ret i32* null
+  %B = alloca i32
+  br i1 %cond, label %cond_true, label %cond_false
+cond_true:
+  call i32* @test5(i32* %B, i1 false)
+  ret i32* null
+cond_false:
+  call void @use2_nocapture(i32* %A, i32* %B)
+  call void @noarg()
+  ret i32* null
+}
+
+; PR14143: Make sure that we do not mark functions with nocapture allocas with tail.
+;
+; rdar://14324281.
+define void @test6(i32* %a, i32* %b) {
+; CHECK-LABEL: @test6(
+; CHECK-NOT: tail call
+; CHECK: ret void
+  %c = alloca [100 x i8], align 16
+  %tmp = bitcast [100 x i8]* %c to i32*
+  call void @use2_nocapture(i32* %b, i32* %tmp)
+  ret void
+}
+
+; PR14143: Make sure that we do not mark functions with nocapture allocas with tail.
+;
+; rdar://14324281
+define void @test7(i32* %a, i32* %b) nounwind uwtable {
+entry:
+; CHECK-LABEL: @test7(
+; CHECK-NOT: tail call
+; CHECK: ret void
+  %c = alloca [100 x i8], align 16
+  %0 = bitcast [100 x i8]* %c to i32*
+  call void @use2_nocapture(i32* %0, i32* %a)
+  call void @use2_nocapture(i32* %b, i32* %0)
+  ret void
+}
+
+; If we have a mix of escaping captured/non-captured allocas, ensure that we do
+; not do anything including marking callsites with the tail call marker.
+;
+; rdar://14324281.
+define i32* @test8(i32* nocapture %A, i1 %cond) {
+; CHECK: i32* @test8
+; CHECK-NOT: tailrecurse:
+; CHECK-NOT: tail call
+; CHECK: ret i32* null
+  %B = alloca i32
+  %B2 = alloca i32
+  br i1 %cond, label %cond_true, label %cond_false
+cond_true:
+  call void @use(i32* %B2)
+  call i32* @test8(i32* %B, i1 false)
+  ret i32* null
+cond_false:
+  call void @use2_nocapture(i32* %A, i32* %B)
+  call void @noarg()
+  ret i32* null
+}
diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
deleted file mode 100644
index 97e67b26424d..000000000000
--- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -tailcallelim -S | FileCheck %s
-
-declare void @bar(i32*)
-
-define i32 @foo(i32 %N) {
-	%A = alloca i32, i32 %N		; <i32*> [#uses=2]
-	store i32 17, i32* %A
-	call void @bar( i32* %A )
-; CHECK: tail call i32 @foo
-	%X = tail call i32 @foo( i32 %N )		; <i32> [#uses=1]
-	ret i32 %X
-}
-
diff --git a/test/Transforms/TailCallElim/inf-recursion.ll b/test/Transforms/TailCallElim/inf-recursion.ll
index c427869107c1..157226f93d3f 100644
--- a/test/Transforms/TailCallElim/inf-recursion.ll
+++ b/test/Transforms/TailCallElim/inf-recursion.ll
@@ -14,7 +14,7 @@ entry:
 
 ; Do turn other calls into infinite loops though.
 
-; CHECK: define double @foo
+; CHECK-LABEL: define double @foo(
 ; CHECK-NOT: call
 ; CHECK: }
 define double @foo(double %f) {
@@ -22,7 +22,7 @@ define double @foo(double %f) {
         ret double %t
 }
 
-; CHECK: define float @fabsf
+; CHECK-LABEL: define float @fabsf(
 ; CHECK-NOT: call
 ; CHECK: }
 define float @fabsf(float %f) {
diff --git a/test/Transforms/TailCallElim/intervening-inst.ll b/test/Transforms/TailCallElim/intervening-inst.ll
deleted file mode 100644
index 10dffbd69425..000000000000
--- a/test/Transforms/TailCallElim/intervening-inst.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; This function contains intervening instructions which should be moved out of the way
-; RUN: opt < %s -tailcallelim -S | FileCheck %s
-
-define i32 @Test(i32 %X) {
-entry:
-	%tmp.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]
-	br i1 %tmp.1, label %then.0, label %endif.0
-then.0:		; preds = %entry
-	%tmp.4 = add i32 %X, 1		; <i32> [#uses=1]
-	ret i32 %tmp.4
-endif.0:		; preds = %entry
-	%tmp.10 = add i32 %X, -1		; <i32> [#uses=1]
-; CHECK-NOT: call
-	%tmp.8 = call i32 @Test( i32 %tmp.10 )		; <i32> [#uses=1]
-	%DUMMY = add i32 %X, 1		; <i32> [#uses=0]
-	ret i32 %tmp.8
-}
-
diff --git a/test/Transforms/TailCallElim/lit.local.cfg b/test/Transforms/TailCallElim/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Transforms/TailCallElim/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
deleted file mode 100644
index 741f5848bc67..000000000000
--- a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt -tailcallelim -S < %s | FileCheck %s
-; PR615
-
-declare void @bar(i32*)
-
-define i32 @foo() {
-; CHECK: i32 @foo()
-; CHECK-NEXT: alloca
-	%A = alloca i32		; <i32*> [#uses=2]
-	store i32 17, i32* %A
-	call void @bar( i32* %A )
-	%X = tail call i32 @foo( )		; <i32> [#uses=1]
-	ret i32 %X
-}
-
diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll
deleted file mode 100644
index e49d87cc4b59..000000000000
--- a/test/Transforms/TailCallElim/nocapture.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt -tailcallelim -S < %s | FileCheck %s
-; XFAIL: *
-
-declare void @use(i8* nocapture, i8* nocapture)
-
-define i8* @foo(i8* nocapture %A, i1 %cond) {
-; CHECK: tailrecurse:
-; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ]
-; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ]
-  %B = alloca i8
-; CHECK: %B = alloca i8
-  br i1 %cond, label %cond_true, label %cond_false
-; CHECK: br i1 %cond.tr, label %cond_true, label %cond_false
-cond_true:
-; CHECK: cond_true:
-; CHECK: br label %tailrecurse
-  call i8* @foo(i8* %B, i1 false)
-  ret i8* null
-cond_false:
-; CHECK: cond_false
-  call void @use(i8* %A, i8* %B)
-; CHECK: tail call void @use(i8* %A.tr, i8* %B)
-  ret i8* null
-; CHECK: ret i8* null
-}
diff --git a/test/Transforms/TailCallElim/return_constant.ll b/test/Transforms/TailCallElim/return_constant.ll
deleted file mode 100644
index e99e57e1457d..000000000000
--- a/test/Transforms/TailCallElim/return_constant.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; Though this case seems to be fairly unlikely to occur in the wild, someone
-; plunked it into the demo script, so maybe they care about it.
-;
-; RUN: opt < %s -tailcallelim -S | FileCheck %s
-
-define i32 @aaa(i32 %c) {
-entry:
-	%tmp.1 = icmp eq i32 %c, 0		; <i1> [#uses=1]
-	br i1 %tmp.1, label %return, label %else
-else:		; preds = %entry
-	%tmp.5 = add i32 %c, -1		; <i32> [#uses=1]
-; CHECK-NOT: call
-	%tmp.3 = call i32 @aaa( i32 %tmp.5 )		; <i32> [#uses=0]
-	ret i32 0
-return:		; preds = %entry
-	ret i32 0
-}
-
diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
deleted file mode 100644
index 7049e4d588d4..000000000000
--- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -tailcallelim -S | FileCheck %s
-
-
-declare void @foo()
-
-define void @bar() {
-; CHECK: tail call void @foo()
-	call void @foo()
-	ret void
-}
-
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
index da2db5a45f9c..ba763cf03ffc 100644
--- a/test/Transforms/TailDup/X86/lit.local.cfg
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
index 18c604aba567..19840aa7574c 100644
--- a/test/Transforms/TailDup/lit.local.cfg
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -1,5 +1,3 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 15cf626c72bf..68ba0b36c4d0 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -4,6 +4,8 @@
 
 import os
 
+import lit.formats
+
 # name: The name of this test suite.
 config.name = 'LLVM-Unit'
 
@@ -43,9 +45,9 @@ if config.test_exec_root is None:
     # out-of-tree build situation).
 
     # Check for 'llvm_unit_site_config' user parameter, and use that if available.
-    site_cfg = lit.params.get('llvm_unit_site_config', None)
+    site_cfg = lit_config.params.get('llvm_unit_site_config', None)
     if site_cfg and os.path.exists(site_cfg):
-        lit.load_config(config, site_cfg)
+        lit_config.load_config(config, site_cfg)
         raise SystemExit
 
     # Try to detect the situation where we are using an out-of-tree build by
@@ -58,7 +60,7 @@ if config.test_exec_root is None:
 
     llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
     if not llvm_config:
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Get the source and object roots.
     llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
@@ -67,16 +69,16 @@ if config.test_exec_root is None:
     # Validate that we got a tree which points to here.
     this_src_root = os.path.join(os.path.dirname(__file__),'..','..')
     if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Check that the site specific configuration exists.
     site_cfg = os.path.join(llvm_obj_root, 'test', 'Unit', 'lit.site.cfg')
     if not os.path.exists(site_cfg):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Okay, that worked. Notify the user of the automagic, and reconfigure.
-    lit.note('using out-of-tree build at %r' % llvm_obj_root)
-    lit.load_config(config, site_cfg)
+    lit_config.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit_config.load_config(config, site_cfg)
     raise SystemExit
 
 # If necessary, point the dynamic loader at libLLVM.so.
diff --git a/test/Unit/lit.site.cfg.in b/test/Unit/lit.site.cfg.in
index 65e98d0af550..7ff8155dff60 100644
--- a/test/Unit/lit.site.cfg.in
+++ b/test/Unit/lit.site.cfg.in
@@ -1,3 +1,5 @@
+import sys
+
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
@@ -11,11 +13,12 @@ config.shlibpath_var = "@SHLIBPATH_VAR@"
 # Support substitution of the tools_dir and build_mode with user parameters.
 # This is used when we can't determine the tool dir at configuration time.
 try:
-    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
-    config.llvm_build_mode = config.llvm_build_mode % lit.params
-except KeyError,e:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
+    config.llvm_build_mode = config.llvm_build_mode % lit_config.params
+except KeyError:
+    e = sys.exc_info()[1]
     key, = e.args
-    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
 
 # Let the main config do the real work.
-lit.load_config(config, "@LLVM_SOURCE_DIR@/test/Unit/lit.cfg")
+lit_config.load_config(config, "@LLVM_SOURCE_DIR@/test/Unit/lit.cfg")
diff --git a/test/Verifier/bitcast-address-space-nested-global-cycle.ll b/test/Verifier/bitcast-address-space-nested-global-cycle.ll
new file mode 100644
index 000000000000..0cee726a95cf
--- /dev/null
+++ b/test/Verifier/bitcast-address-space-nested-global-cycle.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+%struct.Self1 = type { %struct.Self1 addrspace(1)* }
+
+@cycle1 = addrspace(1) constant %struct.Self1 { %struct.Self1 addrspace(1)* bitcast (%struct.Self1 addrspace(0)* @cycle0 to %struct.Self1 addrspace(1)*) }
+@cycle0 = addrspace(0) constant %struct.Self1 { %struct.Self1 addrspace(1)* @cycle1 }
diff --git a/test/Verifier/bitcast-address-space-nested-global.ll b/test/Verifier/bitcast-address-space-nested-global.ll
new file mode 100644
index 000000000000..abe9d947f7ba
--- /dev/null
+++ b/test/Verifier/bitcast-address-space-nested-global.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+
+%struct.Self1 = type { %struct.Self1 addrspace(1)* }
+
+@nestedD = constant %struct.Self1 { %struct.Self1 addrspace(1)* bitcast (%struct.Self1 addrspace(0)* @nestedC to %struct.Self1 addrspace(1)*) }
+@nestedC = constant %struct.Self1 { %struct.Self1 addrspace(1)* bitcast (%struct.Self1 addrspace(0)* @nestedB to %struct.Self1 addrspace(1)*) }
+@nestedB = constant %struct.Self1 { %struct.Self1 addrspace(1)* bitcast (%struct.Self1 addrspace(0)* @nestedA to %struct.Self1 addrspace(1)*) }
+@nestedA = constant %struct.Self1 { %struct.Self1 addrspace(1)* null }
diff --git a/test/Verifier/bitcast-address-space-through-constant-inttoptr-inside-gep-instruction.ll b/test/Verifier/bitcast-address-space-through-constant-inttoptr-inside-gep-instruction.ll
new file mode 100644
index 000000000000..ed71afaef9a9
--- /dev/null
+++ b/test/Verifier/bitcast-address-space-through-constant-inttoptr-inside-gep-instruction.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as -verify -disable-output < %s
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+; Check that we can find inttoptr -> illegal bitcasts when hidden
+; inside constantexpr pointer operands
+define i32 addrspace(2)* @illegal_bitcast_inttoptr_as_1_to_2_inside_gep() {
+  %cast = getelementptr i32 addrspace(2)* bitcast (i32 addrspace(1)* inttoptr (i32 1234 to i32 addrspace(1)*) to i32 addrspace(2)*), i32 3
+  ret i32 addrspace(2)* %cast
+}
+
diff --git a/test/Verifier/bitcast-address-space-through-constant-inttoptr.ll b/test/Verifier/bitcast-address-space-through-constant-inttoptr.ll
new file mode 100644
index 000000000000..e65c71e8be0e
--- /dev/null
+++ b/test/Verifier/bitcast-address-space-through-constant-inttoptr.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+
+%struct.Foo = type { i32 addrspace(1)* }
+
+; Make sure we still reject the bitcast when the source is a inttoptr (constant int) in a global initializer
+@bitcast_after_constant_inttoptr_initializer = global %struct.Foo { i32 addrspace(1)* bitcast (i32 addrspace(2)* inttoptr (i8 7 to i32 addrspace(2)*) to i32 addrspace(1)*) }
+
+
diff --git a/test/Verifier/bitcast-address-space-through-gep-2.ll b/test/Verifier/bitcast-address-space-through-gep-2.ll
new file mode 100644
index 000000000000..3b77d9a30223
--- /dev/null
+++ b/test/Verifier/bitcast-address-space-through-gep-2.ll
@@ -0,0 +1,17 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-p3:8:8:8-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+
+%struct.Foo1 = type { i32 addrspace(1)* }
+
+@as2_array = addrspace(2) global [32 x i32] zeroinitializer
+
+; gep -> legal bitcast (2 -> 3) -> gep -> illegal bitcast (3 -> 1)
+@bitcast_after_gep_bitcast_gep =
+         global %struct.Foo1 { i32 addrspace(1)* bitcast
+                                    (i32 addrspace(3)* getelementptr
+                                         (i32 addrspace(3)* bitcast
+                                              (i32 addrspace(2)* getelementptr
+                                                   ([32 x i32] addrspace(2)* @as2_array, i32 0, i32 8) to i32 addrspace(3)*), i32 3) to i32 addrspace(1)*) }
+
diff --git a/test/Verifier/bitcast-address-space-through-gep.ll b/test/Verifier/bitcast-address-space-through-gep.ll
new file mode 100644
index 000000000000..8e950dc1e6bd
--- /dev/null
+++ b/test/Verifier/bitcast-address-space-through-gep.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+%struct.Foo = type { i32 addrspace(1)* }
+
+
+@as2_array = addrspace(2) global [32 x i32] zeroinitializer
+
+; Make sure we still reject the bitcast after the value is accessed through a GEP
+@bitcast_after_gep = global %struct.Foo { i32 addrspace(1)* bitcast (i32 addrspace(2)* getelementptr ([32 x i32] addrspace(2)* @as2_array, i32 0, i32 8) to i32 addrspace(1)*) }
+
+
diff --git a/test/Verifier/bitcast-address-space-through-inttoptr.ll b/test/Verifier/bitcast-address-space-through-inttoptr.ll
new file mode 100644
index 000000000000..bec40488a13c
--- /dev/null
+++ b/test/Verifier/bitcast-address-space-through-inttoptr.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:8:8:8-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+define i32 addrspace(2)* @illegal_bitcast_as_1_to_2_inttoptr() {
+   %cast = bitcast i32 addrspace(1)* inttoptr (i32 5 to i32 addrspace(1)*) to i32 addrspace(2)*
+   ret i32 addrspace(2)* %cast
+}
+
diff --git a/test/Verifier/bitcast-address-spaces.ll b/test/Verifier/bitcast-address-spaces.ll
new file mode 100644
index 000000000000..450841740b1d
--- /dev/null
+++ b/test/Verifier/bitcast-address-spaces.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+define i32 addrspace(1)* @illegal_bitcast_as_0_to_1(i32 addrspace(0) *%p) {
+  %cast = bitcast i32 addrspace(0)* %p to i32 addrspace(1)*
+  ret i32 addrspace(1)* %cast
+}
+
diff --git a/test/Verifier/bitcast-alias-address-space.ll b/test/Verifier/bitcast-alias-address-space.ll
new file mode 100644
index 000000000000..9cad8ab3779a
--- /dev/null
+++ b/test/Verifier/bitcast-alias-address-space.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+
+@data = addrspace(2) global i32 27
+
+@illegal_alias_data = alias bitcast (i32 addrspace(2)* @data to i32 addrspace(1)*)
diff --git a/test/Verifier/bitcast-vector-pointer-as.ll b/test/Verifier/bitcast-vector-pointer-as.ll
new file mode 100644
index 000000000000..89070e5a8900
--- /dev/null
+++ b/test/Verifier/bitcast-vector-pointer-as.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as -verify -disable-output %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-p2:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
+
+define <4 x i32 addrspace(1)*> @vector_illegal_bitcast_as_0_to_1(<4 x i32 addrspace(0)*> %p) {
+   %cast = bitcast <4 x i32 addrspace(0)*> %p to <4 x i32 addrspace(1)*>
+   ret <4 x i32 addrspace(1)*> %cast
+}
+
diff --git a/test/Verifier/ident-meta1.ll b/test/Verifier/ident-meta1.ll
new file mode 100644
index 000000000000..fb247a8c5e2e
--- /dev/null
+++ b/test/Verifier/ident-meta1.ll
@@ -0,0 +1,12 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+; Verify that llvm.ident is properly structured.
+; llvm.ident takes a list of metadata entries.
+; Each metadata entry can have only one string.
+
+!llvm.ident = !{!0, !1}
+!0 = metadata !{metadata !"version string"}
+!1 = metadata !{metadata !"string1", metadata !"string2"}
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: incorrect number of operands in llvm.ident metadata
+; CHECK-NEXT: metadata !1
+
diff --git a/test/Verifier/ident-meta2.ll b/test/Verifier/ident-meta2.ll
new file mode 100644
index 000000000000..e86f18adc0e8
--- /dev/null
+++ b/test/Verifier/ident-meta2.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+; Verify that llvm.ident is properly structured.
+; llvm.ident takes a list of metadata entries.
+; Each metadata entry can contain one string only.
+
+!llvm.ident = !{!0, !1, !2, !3}
+!0 = metadata !{metadata !"str1"}
+!1 = metadata !{metadata !"str2"}
+!2 = metadata !{metadata !"str3"}
+!3 = metadata !{i32 1}
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: invalid value for llvm.ident metadata entry operand(the operand should be a string)
+; CHECK-NEXT: i32 1
diff --git a/test/Verifier/ident-meta3.ll b/test/Verifier/ident-meta3.ll
new file mode 100644
index 000000000000..a847b462161d
--- /dev/null
+++ b/test/Verifier/ident-meta3.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+; Verify that llvm.ident is properly structured.
+; llvm.ident takes a list of metadata entries.
+; Each metadata entry can contain one string only.
+
+!llvm.ident = !{!0}
+!0 = metadata !{metadata !{metadata !"nested metadata"}}
+; CHECK: assembly parsed, but does not verify as correct!
+; CHECK-NEXT: invalid value for llvm.ident metadata entry operand(the operand should be a string)
+; CHECK-NEXT: metadata !1
diff --git a/test/Verifier/lit.local.cfg b/test/Verifier/lit.local.cfg
deleted file mode 100644
index 19eebc0ac7ac..000000000000
--- a/test/Verifier/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Verifier/llvm.compiler_used-invalid-type.ll b/test/Verifier/llvm.compiler_used-invalid-type.ll
index 0913027fb479..ef533b5bf783 100644
--- a/test/Verifier/llvm.compiler_used-invalid-type.ll
+++ b/test/Verifier/llvm.compiler_used-invalid-type.ll
@@ -1,6 +1,6 @@
 ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
 
-@llvm.compiler_used = appending global [1 x i32] [i32 0], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x i32] [i32 0], section "llvm.metadata"
 
 ; CHECK:       wrong type for intrinsic global variable
-; CHECK-NEXT: [1 x i32]* @llvm.compiler_used
+; CHECK-NEXT: [1 x i32]* @llvm.compiler.used
diff --git a/test/Verifier/llvm.used-invalid-type2.ll b/test/Verifier/llvm.used-invalid-type2.ll
index bff3f2d153a7..4bd0aa455645 100644
--- a/test/Verifier/llvm.used-invalid-type2.ll
+++ b/test/Verifier/llvm.used-invalid-type2.ll
@@ -2,4 +2,4 @@
 @llvm.used = appending global i32 0, section "llvm.metadata"
 
 ; CHECK: Only global arrays can have appending linkage!
-; CHEKC-NEXT: i32* @llvm.used
+; CHECK-NEXT: i32* @llvm.used
diff --git a/test/Verifier/varargs-intrinsic.ll b/test/Verifier/varargs-intrinsic.ll
new file mode 100644
index 000000000000..f6d0a7084c72
--- /dev/null
+++ b/test/Verifier/varargs-intrinsic.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.stackmap(i32, i32)
+declare void @llvm.donothing(...)
+
+define void @foo1() {
+  call void @llvm.experimental.stackmap(i32 0, i32 12)
+; CHECK: Callsite was not defined with variable arguments!
+  ret void
+}
+
+define void @foo2() {
+  call void (...)* @llvm.donothing(i32 0, i64 1)
+; CHECK: Intrinsic was not defined with variable arguments!
+  ret void
+}
diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data
index 01ca7f5d122a..56b25cbbd80d 100644
--- a/test/YAMLParser/spec-02-24.data
+++ b/test/YAMLParser/spec-02-24.data
@@ -1,4 +1,4 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
 
 %TAG ! tag:clarkevans.com,2002:
 --- !shape
@@ -14,3 +14,8 @@
   start: *ORIGIN
   color: 0xFFEEBB
   text: Pretty vector drawing.
+
+#CHECK: !<tag:clarkevans.com,2002:shape>
+#CHECK:   !<tag:clarkevans.com,2002:circle>
+#CHECK:   !<tag:clarkevans.com,2002:line>
+#CHECK:   !<tag:clarkevans.com,2002:label>
diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data
index beba7d06ecf4..2c8b2ec6959c 100644
--- a/test/YAMLParser/spec-07-04.data
+++ b/test/YAMLParser/spec-07-04.data
@@ -1,5 +1,7 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
 
 %TAG !yaml! tag:yaml.org,2002:
 ---
 !yaml!str "foo"
+
+#CHECK: !!str "foo"
diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data
index 3ce5e4b73e28..4f9b294f456b 100644
--- a/test/YAMLParser/yaml.data
+++ b/test/YAMLParser/yaml.data
@@ -1,5 +1,11 @@
-# RUN: yaml-bench -canonical %s
+# RUN: yaml-bench -canonical %s | FileCheck %s
 
 - !!yaml '!'
 - !!yaml '&'
 - !!yaml '*'
+
+# CHECK: !!seq [
+# CHECK:   !!yaml "!",
+# CHECK:   !!yaml "&",
+# CHECK:   !!yaml "*",
+# CHECK: ]
diff --git a/test/lit.cfg b/test/lit.cfg
index 8272e97c3856..df1f4a101965 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -7,6 +7,9 @@ import sys
 import re
 import platform
 
+import lit.util
+import lit.formats
+
 # name: The name of this test suite.
 config.name = 'LLVM'
 
@@ -14,9 +17,9 @@ config.name = 'LLVM'
 if sys.platform in ['win32']:
     # Seek sane tools in directories and set to $PATH.
     path = getattr(config, 'lit_tools_dir', None)
-    path = lit.getToolsPath(path,
-                            config.environment['PATH'],
-                            ['cmp.exe', 'grep.exe', 'sed.exe'])
+    path = lit_config.getToolsPath(path,
+                                   config.environment['PATH'],
+                                   ['cmp.exe', 'grep.exe', 'sed.exe'])
     if path is not None:
         path = os.path.pathsep.join((path,
                                      config.environment['PATH']))
@@ -36,17 +39,14 @@ else:
 # testFormat: The test format to use to interpret tests.
 config.test_format = lit.formats.ShTest(execute_external)
 
-# To ignore test output on stderr so it doesn't trigger failures uncomment this:
-#config.test_format = lit.formats.TclTest(ignoreStdErr=True)
-
-# suffixes: A list of file extensions to treat as test files, this is actually
-# set by on_clone().
-config.suffixes = []
+# suffixes: A list of file extensions to treat as test files. This is overriden
+# by individual lit.local.cfg files in the test subdirectories.
+config.suffixes = ['.ll', '.c', '.cpp', '.test', '.txt', '.s']
 
 # excludes: A list of directories to exclude from the testsuite. The 'Inputs'
 # subdirectories contain auxiliary inputs for various tests in their parent
 # directories.
-config.excludes = ['Inputs']
+config.excludes = ['Inputs', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt']
 
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
@@ -60,7 +60,7 @@ if llvm_obj_root is not None:
 if llvm_obj_root is not None:
     llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
     if not llvm_tools_dir:
-        lit.fatal('No LLVM tools dir set!')
+        lit_config.fatal('No LLVM tools dir set!')
     path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
     config.environment['PATH'] = path
 
@@ -107,9 +107,9 @@ if config.test_exec_root is None:
     # out-of-tree build situation).
 
     # Check for 'llvm_site_config' user parameter, and use that if available.
-    site_cfg = lit.params.get('llvm_site_config', None)
+    site_cfg = lit_config.params.get('llvm_site_config', None)
     if site_cfg and os.path.exists(site_cfg):
-        lit.load_config(config, site_cfg)
+        lit_config.load_config(config, site_cfg)
         raise SystemExit
 
     # Try to detect the situation where we are using an out-of-tree build by
@@ -122,7 +122,7 @@ if config.test_exec_root is None:
 
     llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
     if not llvm_config:
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Get the source and object roots.
     llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
@@ -131,16 +131,16 @@ if config.test_exec_root is None:
     # Validate that we got a tree which points to here.
     this_src_root = os.path.dirname(config.test_source_root)
     if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Check that the site specific configuration exists.
     site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
     if not os.path.exists(site_cfg):
-        lit.fatal('No site specific configuration available!')
+        lit_config.fatal('No site specific configuration available!')
 
     # Okay, that worked. Notify the user of the automagic, and reconfigure.
-    lit.note('using out-of-tree build at %r' % llvm_obj_root)
-    lit.load_config(config, site_cfg)
+    lit_config.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit_config.load_config(config, site_cfg)
     raise SystemExit
 
 ###
@@ -169,11 +169,11 @@ else:
 config.substitutions.append( ('%defaultjit', '-use-mcjit='+defaultIsMCJIT) )
 
 # Process jit implementation option
-jit_impl_cfg = lit.params.get('jit_impl', None)
+jit_impl_cfg = lit_config.params.get('jit_impl', None)
 if jit_impl_cfg == 'mcjit':
   # When running with mcjit, mangle -mcjit into target triple
   # and add -use-mcjit flag to lli invocation
-  if 'i686' in config.target_triple:
+  if 'i386' in config.target_triple or 'i686' in config.target_triple:
     config.target_triple += jit_impl_cfg + '-ia32'
   elif 'x86_64' in config.target_triple:
     config.target_triple += jit_impl_cfg + '-ia64'
@@ -201,30 +201,56 @@ if os.pathsep == ';':
     pathext = os.environ.get('PATHEXT', '').split(';')
 else:
     pathext = ['']
-for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
+# Regex to reject matching a hyphen
+NOHYPHEN = r"(?<!-)"
+
+for pattern in [r"\bbugpoint\b(?!-)",
+                r"(?<!/|-)\bclang\b(?!-)",
                 r"\bgold\b",
-                r"\bllc\b",             r"\blli\b",
-                r"\bllvm-ar\b",         r"\bllvm-as\b",
-                r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
-                r"\bllvm-cov\b",        r"\bllvm-diff\b",
-                r"\bllvm-dis\b",        r"\bllvm-dwarfdump\b",
-                r"\bllvm-extract\b",    r"\bllvm-jistlistener\b",
-                r"\bllvm-link\b",       r"\bllvm-mc\b",
-                r"\bllvm-nm\b",         r"\bllvm-objdump\b",
-                r"\bllvm-prof\b",       r"\bllvm-ranlib\b",
-                r"\bllvm-rtdyld\b",     r"\bllvm-shlib\b",
+                # Match llc but not -llc
+                NOHYPHEN + r"\bllc\b",
+                r"\blli\b",
+                r"\bllvm-PerfectShuffle\b",
+                r"\bllvm-ar\b",
+                r"\bllvm-as\b",
+                r"\bllvm-bcanalyzer\b",
+                r"\bllvm-config\b",
+                r"\bllvm-cov\b",
+                r"\bllvm-diff\b",
+                r"\bllvm-dis\b",
+                r"\bllvm-dwarfdump\b",
+                r"\bllvm-extract\b",
+                r"\bllvm-jistlistener\b",
+                r"\bllvm-link\b",
+                r"\bllvm-lto\b",
+                r"\bllvm-mc\b",
+                r"\bllvm-mcmarkup\b",
+                r"\bllvm-nm\b",
+                r"\bllvm-objdump\b",
+                r"\bllvm-ranlib\b",
+                r"\bllvm-readobj\b",
+                r"\bllvm-rtdyld\b",
+                r"\bllvm-shlib\b",
                 r"\bllvm-size\b",
-                # Don't match '-llvmc'.
-                r"(?<!-)\bllvmc\b",     r"\blto\b",
-                                        # Don't match '.opt', '-opt',
-                                        # '^opt' or '/opt'.
-                r"\bmacho-dump\b",      r"(?<!\.|-|\^|/)\bopt\b",
-                r"\bllvm-tblgen\b",     r"\bFileCheck\b",
-                r"\bFileUpdate\b",      r"\bc-index-test\b",
-                r"\bfpcmp\b",           r"\bllvm-PerfectShuffle\b",
+                r"\bllvm-tblgen\b",
+                r"\bllvm-c-test\b",
+                # Match llvmc but not -llvmc
+                NOHYPHEN + r"\bllvmc\b",
+                # Match lto but not -lto
+                NOHYPHEN + r"\blto\b",
+                r"\bmacho-dump\b",
+                # Don't match '.opt', '-opt', '^opt' or '/opt'.
+                r"(?<!\.|-|\^|/)\bopt\b",
+                r"\bFileCheck\b",
+                r"\bFileUpdate\b",
+                r"\bc-index-test\b",
+                r"\bfpcmp\b",
+                r"\bobj2yaml\b",
+                r"\byaml2obj\b",
                 # Handle these specially as they are strings searched
                 # for during testing.
-                r"\| \bcount\b",         r"\| \bnot\b"]:
+                r"\| \bcount\b",
+                r"\| \bnot\b"]:
     # Extract the tool name from the pattern.  This relies on the tool
     # name being surrounded by \b word match operators.  If the
     # pattern starts with "| ", include it in the string to be
@@ -245,6 +271,10 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
 if execute_external:
     config.available_features.add('shell')
 
+# Others/can-execute.txt
+if sys.platform not in ['win32']:
+    config.available_features.add('can-execute')
+
 # Loadable module
 # FIXME: This should be supplied by Makefile or autoconf.
 if sys.platform in ['win32', 'cygwin']:
@@ -255,10 +285,6 @@ else:
 if loadable_module:
     config.available_features.add('loadable_module')
 
-# LTO on OS X
-if config.lto_is_enabled == "1" and platform.system() == "Darwin":
-    config.available_features.add('lto_on_osx')
-
 # Sanitizers.
 if config.llvm_use_sanitizer == "Address":
     config.available_features.add("asan")
@@ -273,15 +299,53 @@ if not 'hexagon' in config.target_triple:
 if config.have_zlib == "1":
     config.available_features.add("zlib")
 
+# Native compilation: host arch == target arch
+# FIXME: Consider cases that target can be executed
+# even if host_triple were different from target_triple.
+if config.host_triple == config.target_triple:
+    config.available_features.add("native")
+
 # llc knows whether he is compiled with -DNDEBUG.
 import subprocess
 try:
     llc_cmd = subprocess.Popen([os.path.join(llvm_tools_dir, 'llc'), '-version'],
                            stdout = subprocess.PIPE)
-except OSError, why:
-    print "Could not find llc in " + llvm_tools_dir
+except OSError:
+    print("Could not find llc in " + llvm_tools_dir)
     exit(42)
 
-if re.search(r'with assertions', llc_cmd.stdout.read()):
+if re.search(r'with assertions', llc_cmd.stdout.read().decode('ascii')):
     config.available_features.add('asserts')
 llc_cmd.wait()
+
+if 'darwin' == sys.platform:
+    try:
+        sysctl_cmd = subprocess.Popen(['sysctl', 'hw.optional.fma'],
+                                    stdout = subprocess.PIPE)
+    except OSError:
+        print("Could not exec sysctl")
+    result = sysctl_cmd.stdout.read().decode('ascii')
+    if -1 != result.find("hw.optional.fma: 1"):
+        config.available_features.add('fma3')
+    sysctl_cmd.wait()
+
+# Check if we should use gmalloc.
+use_gmalloc_str = lit_config.params.get('use_gmalloc', None)
+if use_gmalloc_str is not None:
+    if use_gmalloc_str.lower() in ('1', 'true'):
+        use_gmalloc = True
+    elif use_gmalloc_str.lower() in ('', '0', 'false'):
+        use_gmalloc = False
+    else:
+        lit_config.fatal('user parameter use_gmalloc should be 0 or 1')
+else:
+    # Default to not using gmalloc
+    use_gmalloc = False
+
+# Allow use of an explicit path for gmalloc library.
+# Will default to '/usr/lib/libgmalloc.dylib' if not set.
+gmalloc_path_str = lit_config.params.get('gmalloc_path',
+                                         '/usr/lib/libgmalloc.dylib')
+
+if use_gmalloc:
+     config.environment.update({'DYLD_INSERT_LIBRARIES' : gmalloc_path_str})
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 1ae99eb02496..72fd9c9ff785 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -1,3 +1,5 @@
+import sys
+
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
 config.host_triple = "@LLVM_HOST_TRIPLE@"
@@ -12,7 +14,6 @@ config.python_executable = "@PYTHON_EXECUTABLE@"
 config.ocamlopt_executable = "@OCAMLOPT@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
-config.lto_is_enabled = "@LTO_IS_ENABLED@"
 config.targets_to_build = "@TARGETS_TO_BUILD@"
 config.llvm_bindings = "@LLVM_BINDINGS@"
 config.host_os = "@HOST_OS@"
@@ -24,10 +25,11 @@ config.have_zlib = "@HAVE_LIBZ@"
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
 try:
-    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
-except KeyError,e:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
+except KeyError:
+    e = sys.exc_info()[1]
     key, = e.args
-    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
 
 # Let the main config do the real work.
-lit.load_config(config, "@LLVM_SOURCE_DIR@/test/lit.cfg")
+lit_config.load_config(config, "@LLVM_SOURCE_DIR@/test/lit.cfg")
diff --git a/test/tools/llvm-cov/Inputs/README b/test/tools/llvm-cov/Inputs/README
new file mode 100644
index 000000000000..2cfb1917965c
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/README
@@ -0,0 +1,7 @@
+These inputs were pre-generated to allow for easier testing of llvm-cov.
+
+test.gcno and test.gcda were create by running clang:
+  clang++ -g -ftest-coverage -fprofile-arcs test.cpp
+
+test.cpp.gcov was created by running gcov 4.2.1:
+  gcov test.cpp
diff --git a/test/tools/llvm-cov/Inputs/test.cpp b/test/tools/llvm-cov/Inputs/test.cpp
new file mode 100644
index 000000000000..07bc3f294c50
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.cpp
@@ -0,0 +1,77 @@
+#include <cstdlib>
+
+bool on = false;
+int len = 42;
+double grid[10][10] = {0};
+const char * hello = "world";
+const char * world = "hello";
+
+struct A {
+  virtual void B();
+};
+
+void A::B() {}
+
+void useless() {}
+
+double more_useless() {
+  return 0;
+}
+
+int foo() {
+  on = true;
+  return 3;
+}
+
+int bar() {
+  len--;
+  return foo() + 45;
+}
+
+void assign(int ii, int jj) {
+  grid[ii][jj] = (ii+1) * (jj+1);
+}
+
+void initialize_grid() {
+  for (int ii = 0; ii < 2; ii++)
+    for (int jj = 0; jj < 2; jj++)
+      assign(ii, jj);
+}
+
+int main() {
+  initialize_grid();
+
+  int a = 2;
+  on = rand() % 2;
+  if (on) {
+    foo();
+    ++a;
+  } else {
+    bar();
+    a += rand();
+  }
+
+  for (int ii = 0; ii < 10; ++ii) {
+    switch (rand() % 5) {
+      case 0:
+        a += rand();
+        break;
+      case 1:
+      case 2:
+        a += rand() / rand();
+        break;
+      case 3:
+        a -= rand();
+        break;
+      default:
+        a = -1;
+    }
+  }
+
+  A thing;
+  for (uint64_t ii = 0; ii < 4294967296; ++ii)
+    thing.B();
+
+  return a + 8 + grid[2][3] + len;
+  return more_useless();
+}
diff --git a/test/tools/llvm-cov/Inputs/test.cpp.gcov b/test/tools/llvm-cov/Inputs/test.cpp.gcov
new file mode 100644
index 000000000000..a3dacc269ead
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.cpp.gcov
@@ -0,0 +1,82 @@
+        -:    0:Source:test.cpp
+        -:    0:Graph:test.gcno
+        -:    0:Data:test.gcda
+        -:    0:Runs:2
+        -:    0:Programs:1
+        -:    1:#include <cstdlib>
+        -:    2:
+        -:    3:bool on = false;
+        -:    4:int len = 42;
+        -:    5:double grid[10][10] = {0};
+        -:    6:const char * hello = "world";
+        -:    7:const char * world = "hello";
+        -:    8:
+        4:    9:struct A {
+        -:   10:  virtual void B();
+        -:   11:};
+        -:   12:
+8589934592:   13:void A::B() {}
+        -:   14:
+    #####:   15:void useless() {}
+        -:   16:
+        -:   17:double more_useless() {
+    #####:   18:  return 0;
+        -:   19:}
+        -:   20:
+        -:   21:int foo() {
+        2:   22:  on = true;
+        2:   23:  return 3;
+        -:   24:}
+        -:   25:
+        -:   26:int bar() {
+    #####:   27:  len--;
+    #####:   28:  return foo() + 45;
+        -:   29:}
+        -:   30:
+        8:   31:void assign(int ii, int jj) {
+        8:   32:  grid[ii][jj] = (ii+1) * (jj+1);
+        8:   33:}
+        -:   34:
+        -:   35:void initialize_grid() {
+       12:   36:  for (int ii = 0; ii < 2; ii++)
+       24:   37:    for (int jj = 0; jj < 2; jj++)
+       12:   38:      assign(ii, jj);
+        2:   39:}
+        -:   40:
+        -:   41:int main() {
+        2:   42:  initialize_grid();
+        -:   43:
+        2:   44:  int a = 2;
+        2:   45:  on = rand() % 2;
+        2:   46:  if (on) {
+        2:   47:    foo();
+        2:   48:    ++a;
+        2:   49:  } else {
+    #####:   50:    bar();
+    #####:   51:    a += rand();
+        -:   52:  }
+        -:   53:
+       44:   54:  for (int ii = 0; ii < 10; ++ii) {
+       20:   55:    switch (rand() % 5) {
+        -:   56:      case 0:
+        4:   57:        a += rand();
+        4:   58:        break;
+        -:   59:      case 1:
+        -:   60:      case 2:
+        2:   61:        a += rand() / rand();
+        2:   62:        break;
+        -:   63:      case 3:
+        6:   64:        a -= rand();
+        6:   65:        break;
+        -:   66:      default:
+        8:   67:        a = -1;
+        8:   68:    }
+       20:   69:  }
+        -:   70:
+        2:   71:  A thing;
+17179869188:   72:  for (uint64_t ii = 0; ii < 4294967296; ++ii)
+8589934592:   73:    thing.B();
+        -:   74:
+        2:   75:  return a + 8 + grid[2][3] + len;
+        -:   76:  return more_useless();
+        -:   77:}
diff --git a/test/tools/llvm-cov/Inputs/test.gcda b/test/tools/llvm-cov/Inputs/test.gcda
new file mode 100644
index 000000000000..23d03bdd1fd2
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.gcda
diff --git a/test/tools/llvm-cov/Inputs/test.gcno b/test/tools/llvm-cov/Inputs/test.gcno
new file mode 100644
index 000000000000..6162604e7449
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test.gcno
diff --git a/test/tools/llvm-cov/Inputs/test_read_fail.gcno b/test/tools/llvm-cov/Inputs/test_read_fail.gcno
new file mode 100644
index 000000000000..63b5d71e6951
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_read_fail.gcno
diff --git a/test/tools/llvm-readobj/lit.local.cfg b/test/tools/llvm-cov/lit.local.cfg
index df9b335dd131..df9b335dd131 100644
--- a/test/tools/llvm-readobj/lit.local.cfg
+++ b/test/tools/llvm-cov/lit.local.cfg
diff --git a/test/tools/llvm-cov/llvm-cov.test b/test/tools/llvm-cov/llvm-cov.test
new file mode 100644
index 000000000000..28738a78d160
--- /dev/null
+++ b/test/tools/llvm-cov/llvm-cov.test
@@ -0,0 +1,10 @@
+RUN: cd %p/Inputs
+# "cd" is unsupported in lit internal runner.
+REQUIRES: shell
+
+RUN: llvm-cov -gcno=test.gcno -gcda=test.gcda \
+RUN:   | diff -aub test.cpp.gcov -
+
+RUN: not llvm-cov -gcno=test_read_fail.gcno -gcda=test.gcda
+
+XFAIL: powerpc64, s390x
diff --git a/test/tools/llvm-lit/lit.local.cfg b/test/tools/llvm-lit/lit.local.cfg
deleted file mode 100644
index 856a54932f0b..000000000000
--- a/test/tools/llvm-lit/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = ['.c']
diff --git a/test/tools/llvm-objdump/Inputs/nop.exe.coff-i386 b/test/tools/llvm-objdump/Inputs/nop.exe.coff-i386
new file mode 100644
index 000000000000..68c9d3db0f8f
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/nop.exe.coff-i386
diff --git a/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386 b/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386
new file mode 100644
index 000000000000..fdc48743a886
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386
diff --git a/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64 b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64
new file mode 100644
index 000000000000..63460e7826ef
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64.asm b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64.asm
new file mode 100644
index 000000000000..4d47fa4515a3
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/win64-unwind.exe.coff-x86_64.asm
@@ -0,0 +1,53 @@
+    .text
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+func:
+    .seh_pushframe @code
+    subq $24, %rsp
+    .seh_stackalloc 24
+    movq %rsi, 16(%rsp)
+    .seh_savereg %rsi, 16
+    movups %xmm8, (%rsp)
+    .seh_savexmm %xmm8, 0
+    pushq %rbx
+    .seh_pushreg 3
+    mov %rsp, %rbx
+    .seh_setframe 3, 0
+    .seh_endprologue
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0
+    .text
+    .seh_startchained
+    .seh_endprologue
+    .seh_endchained
+    lea (%rbx), %rsp
+    pop %rbx
+    addq $24, %rsp
+    ret
+    .seh_endproc
+
+// Test emission of small functions.
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+smallFunc:
+    ret
+    .seh_endproc
+
+// Function with big stack allocation.
+    .globl allocFunc
+    .def allocFunc; .scl 2; .type 32; .endef
+    .seh_proc allocFunc
+allocFunc:
+    .seh_pushframe @code
+    subq $65520, %rsp
+    .seh_stackalloc 65520
+    sub $8454128, %rsp
+    .seh_stackalloc 8454128
+    .seh_endprologue
+    add $8454128, %rsp
+    addq $65520, %rsp
+    ret
+    .seh_endproc
diff --git a/test/tools/llvm-objdump/coff-private-headers.test b/test/tools/llvm-objdump/coff-private-headers.test
new file mode 100644
index 000000000000..d36c148cec6e
--- /dev/null
+++ b/test/tools/llvm-objdump/coff-private-headers.test
@@ -0,0 +1,9 @@
+// RUN: llvm-objdump -p %p/Inputs/nop.exe.coff-i386 | FileCheck %s
+
+CHECK:       The Import Tables:
+CHECK-NEXT:  lookup 00005028 time 00000000 fwd 00000000 name 00005096 addr 00005058
+CHECK:       DLL Name: KERNEL32.dll
+CHECK-NEXT:     Hint/Ord  Name
+CHECK-NEXT:          365  ExitProcess
+
+
diff --git a/test/tools/llvm-objdump/disassembly-show-raw.s b/test/tools/llvm-objdump/disassembly-show-raw.s
deleted file mode 100644
index 32fcad4a369d..000000000000
--- a/test/tools/llvm-objdump/disassembly-show-raw.s
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d - \
-// RUN:                                    | FileCheck %s -check-prefix=WITHRAW
-// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d -no-show-raw-insn - \
-// RUN:                                    | FileCheck %s -check-prefix=NORAW
-
-// Expect to find the raw incoding when run with raw output (default), but not
-// when run explicitly with -no-show-raw-insn
-
-movl 0, %eax
-// WITHRAW: a1 00 00 00 00 movl
-
-// NORAW: movl
-// NORAW-NOT: a1 00
-
-
diff --git a/test/tools/llvm-objdump/disassembly-show-raw.test b/test/tools/llvm-objdump/disassembly-show-raw.test
new file mode 100644
index 000000000000..e9956a5ebe4b
--- /dev/null
+++ b/test/tools/llvm-objdump/disassembly-show-raw.test
@@ -0,0 +1,14 @@
+// RUN: llvm-objdump -d %p/Inputs/trivial.obj.elf-i386 \
+// RUN:     | FileCheck %s -check-prefix=WITHRAW
+// RUN: llvm-objdump -d -no-show-raw-insn %p/Inputs/trivial.obj.elf-i386 \
+// RUN:     | FileCheck %s -check-prefix=NORAW
+
+// Expect to find the raw incoding when run with raw output (default), but not
+// when run explicitly with -no-show-raw-insn
+
+WITHRAW: a1 00 00 00 00 movl
+
+NORAW: movl
+NORAW-NOT: a1 00
+
+
diff --git a/test/tools/llvm-objdump/lit.local.cfg b/test/tools/llvm-objdump/lit.local.cfg
index 56bf00859572..19840aa7574c 100644
--- a/test/tools/llvm-objdump/lit.local.cfg
+++ b/test/tools/llvm-objdump/lit.local.cfg
@@ -1,6 +1,3 @@
-config.suffixes = ['.ll', '.s']
-
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
-
diff --git a/test/tools/llvm-objdump/win64-unwind-data.s b/test/tools/llvm-objdump/win64-unwind-data.s
deleted file mode 100644
index 1e4c7428ce32..000000000000
--- a/test/tools/llvm-objdump/win64-unwind-data.s
+++ /dev/null
@@ -1,106 +0,0 @@
-// This test checks that the unwind data is dumped by llvm-objdump.
-// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-objdump -u - | FileCheck %s
-
-// CHECK:      Unwind info:
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text
-// CHECK-NEXT: End Address: .text + 0x001b
-// CHECK-NEXT: Unwind Info Address: .xdata
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 1 UNW_ExceptionHandler
-// CHECK-NEXT: Size of prolog: 18
-// CHECK-NEXT: Number of Codes: 8
-// CHECK-NEXT: Frame register: RBX
-// CHECK-NEXT: Frame offset: 0
-// CHECK-NEXT: Unwind Codes:
-// CHECK-NEXT: 0x00: UOP_SetFPReg
-// CHECK-NEXT: 0x0f: UOP_PushNonVol RBX
-// CHECK-NEXT: 0x0e: UOP_SaveXMM128 XMM8 [0x0000]
-// CHECK-NEXT: 0x09: UOP_SaveNonVol RSI [0x0010]
-// CHECK-NEXT: 0x04: UOP_AllocSmall 24
-// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text + 0x0012
-// CHECK-NEXT: End Address: .text + 0x0012
-// CHECK-NEXT: Unwind Info Address: .xdata + 0x001c
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 4 UNW_ChainInfo
-// CHECK-NEXT: Size of prolog: 0
-// CHECK-NEXT: Number of Codes: 0
-// CHECK-NEXT: No frame pointer used
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text + 0x001b
-// CHECK-NEXT: End Address: .text + 0x001c
-// CHECK-NEXT: Unwind Info Address: .xdata + 0x002c
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 0
-// CHECK-NEXT: Size of prolog: 0
-// CHECK-NEXT: Number of Codes: 0
-// CHECK-NEXT: No frame pointer used
-// CHECK:      Function Table:
-// CHECK-NEXT: Start Address: .text + 0x001c
-// CHECK-NEXT: End Address: .text + 0x0039
-// CHECK-NEXT: Unwind Info Address: .xdata + 0x0034
-// CHECK-NEXT: Version: 1
-// CHECK-NEXT: Flags: 0
-// CHECK-NEXT: Size of prolog: 14
-// CHECK-NEXT: Number of Codes: 6
-// CHECK-NEXT: No frame pointer used
-// CHECK-NEXT: Unwind Codes:
-// CHECK-NEXT: 0x0e: UOP_AllocLarge 8454128
-// CHECK-NEXT: 0x07: UOP_AllocLarge 8190
-// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
-
-    .text
-    .globl func
-    .def func; .scl 2; .type 32; .endef
-    .seh_proc func
-func:
-    .seh_pushframe @code
-    subq $24, %rsp
-    .seh_stackalloc 24
-    movq %rsi, 16(%rsp)
-    .seh_savereg %rsi, 16
-    movups %xmm8, (%rsp)
-    .seh_savexmm %xmm8, 0
-    pushq %rbx
-    .seh_pushreg 3
-    mov %rsp, %rbx
-    .seh_setframe 3, 0
-    .seh_endprologue
-    .seh_handler __C_specific_handler, @except
-    .seh_handlerdata
-    .long 0
-    .text
-    .seh_startchained
-    .seh_endprologue
-    .seh_endchained
-    lea (%rbx), %rsp
-    pop %rbx
-    addq $24, %rsp
-    ret
-    .seh_endproc
-
-// Test emission of small functions.
-    .globl smallFunc
-    .def smallFunc; .scl 2; .type 32; .endef
-    .seh_proc smallFunc
-smallFunc:
-    ret
-    .seh_endproc
-
-// Function with big stack allocation.
-    .globl smallFunc
-    .def allocFunc; .scl 2; .type 32; .endef
-    .seh_proc smallFunc
-allocFunc:
-    .seh_pushframe @code
-    subq $65520, %rsp
-    .seh_stackalloc 65520
-    sub $8454128, %rsp
-    .seh_stackalloc 8454128
-    .seh_endprologue
-    add $8454128, %rsp
-    addq $65520, %rsp
-    ret
-    .seh_endproc
diff --git a/test/tools/llvm-objdump/win64-unwind-data.test b/test/tools/llvm-objdump/win64-unwind-data.test
new file mode 100644
index 000000000000..a723ffed3e2b
--- /dev/null
+++ b/test/tools/llvm-objdump/win64-unwind-data.test
@@ -0,0 +1,52 @@
+// This test checks that the unwind data is dumped by llvm-objdump.
+// RUN: llvm-objdump -u %p/Inputs/win64-unwind.exe.coff-x86_64 | FileCheck %s
+
+CHECK:      Unwind info:
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: func
+CHECK-NEXT: End Address: func + 0x001b
+CHECK-NEXT: Unwind Info Address: .xdata
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 1 UNW_ExceptionHandler
+CHECK-NEXT: Size of prolog: 18
+CHECK-NEXT: Number of Codes: 8
+CHECK-NEXT: Frame register: RBX
+CHECK-NEXT: Frame offset: 0
+CHECK-NEXT: Unwind Codes:
+CHECK-NEXT: 0x12: UOP_SetFPReg
+CHECK-NEXT: 0x0f: UOP_PushNonVol RBX
+CHECK-NEXT: 0x0e: UOP_SaveXMM128 XMM8 [0x0000]
+CHECK-NEXT: 0x09: UOP_SaveNonVol RSI [0x0010]
+CHECK-NEXT: 0x04: UOP_AllocSmall 24
+CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: func + 0x0012
+CHECK-NEXT: End Address: func + 0x0012
+CHECK-NEXT: Unwind Info Address: .xdata + 0x001c
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 4 UNW_ChainInfo
+CHECK-NEXT: Size of prolog: 0
+CHECK-NEXT: Number of Codes: 0
+CHECK-NEXT: No frame pointer used
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: smallFunc
+CHECK-NEXT: End Address: smallFunc + 0x0001
+CHECK-NEXT: Unwind Info Address: .xdata + 0x002c
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 0
+CHECK-NEXT: Size of prolog: 0
+CHECK-NEXT: Number of Codes: 0
+CHECK-NEXT: No frame pointer used
+CHECK:      Function Table:
+CHECK-NEXT: Start Address: allocFunc
+CHECK-NEXT: End Address: allocFunc + 0x001d
+CHECK-NEXT: Unwind Info Address: .xdata + 0x0034
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Flags: 0
+CHECK-NEXT: Size of prolog: 14
+CHECK-NEXT: Number of Codes: 6
+CHECK-NEXT: No frame pointer used
+CHECK-NEXT: Unwind Codes:
+CHECK-NEXT: 0x0e: UOP_AllocLarge 8454128
+CHECK-NEXT: 0x07: UOP_AllocLarge 8190
+CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
diff --git a/test/tools/llvm-readobj/Inputs/dynamic-table.c b/test/tools/llvm-readobj/Inputs/dynamic-table.c
new file mode 100644
index 000000000000..6d36e8a71186
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/dynamic-table.c
@@ -0,0 +1,7 @@
+// clang -target mipsel-linux-gnu -shared -fPIC -lc dynamic-table.c \
+//       -o dynamic-table.mips
+int puts(const char *);
+
+void foo(void) {
+  puts("Hello, World");
+}
diff --git a/test/tools/llvm-readobj/Inputs/dynamic-table.mips b/test/tools/llvm-readobj/Inputs/dynamic-table.mips
new file mode 100644
index 000000000000..ab36ceeb5a00
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/dynamic-table.mips
diff --git a/test/tools/llvm-readobj/Inputs/magic.coff-importlib b/test/tools/llvm-readobj/Inputs/magic.coff-importlib
new file mode 100644
index 000000000000..b934afb41a8d
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/magic.coff-importlib
diff --git a/test/tools/llvm-readobj/Inputs/magic.coff-unknown b/test/tools/llvm-readobj/Inputs/magic.coff-unknown
new file mode 100644
index 000000000000..7b3b4619a8cd
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/magic.coff-unknown
diff --git a/test/tools/llvm-readobj/Inputs/relocs.py b/test/tools/llvm-readobj/Inputs/relocs.py
index 232d080d7d78..af9459df8ee2 100644
--- a/test/tools/llvm-readobj/Inputs/relocs.py
+++ b/test/tools/llvm-readobj/Inputs/relocs.py
@@ -533,30 +533,6 @@ class Relocs_Elf_i386(Enum):
   R_386_IRELATIVE     = 42
   R_386_NUM           = 43
 
-class Relocs_Elf_MBlaze(Enum):
-  R_MICROBLAZE_NONE           = 0
-  R_MICROBLAZE_32             = 1
-  R_MICROBLAZE_32_PCREL       = 2
-  R_MICROBLAZE_64_PCREL       = 3
-  R_MICROBLAZE_32_PCREL_LO    = 4
-  R_MICROBLAZE_64             = 5
-  R_MICROBLAZE_32_LO          = 6
-  R_MICROBLAZE_SRO32          = 7
-  R_MICROBLAZE_SRW32          = 8
-  R_MICROBLAZE_64_NONE        = 9
-  R_MICROBLAZE_32_SYM_OP_SYM  = 10
-  R_MICROBLAZE_GNU_VTINHERIT  = 11
-  R_MICROBLAZE_GNU_VTENTRY    = 12
-  R_MICROBLAZE_GOTPC_64       = 13
-  R_MICROBLAZE_GOT_64         = 14
-  R_MICROBLAZE_PLT_64         = 15
-  R_MICROBLAZE_REL            = 16
-  R_MICROBLAZE_JUMP_SLOT      = 17
-  R_MICROBLAZE_GLOB_DAT       = 18
-  R_MICROBLAZE_GOTOFF_64      = 19
-  R_MICROBLAZE_GOTOFF_32      = 20
-  R_MICROBLAZE_COPY           = 21
-
 class Relocs_Elf_PPC32(Enum):
   R_PPC_NONE                  = 0
   R_PPC_ADDR32                = 1
@@ -1071,7 +1047,6 @@ craftElf("relocs.obj.elf-aarch64",  "aarch64",                     Relocs_Elf_AA
 craftElf("relocs.obj.elf-arm",      "arm-unknown-unknown",         Relocs_Elf_ARM.entries(), "b sym")
 craftElf("relocs.obj.elf-mips",     "mips-unknown-linux",          Relocs_Elf_Mips.entries(), "lui $2, %hi(sym)")
 craftElf("relocs.obj.elf-mips64el", "mips64el-unknown-linux",        Relocs_Elf_Mips.entries(), "lui $2, %hi(sym)")
-#craftElf("relocs.obj.elf-mblaze",   "mblaze-unknown-unknown",      Relocs_Elf_MBlaze.entries(), ...)
 #craftElf("relocs.obj.elf-hexagon",  "hexagon-unknown-unknown",     Relocs_Elf_Hexagon.entries(), ...)
 
 craftCoff("relocs.obj.coff-i386",   "i386-pc-win32",   Relocs_Coff_i386.entries(),   "mov foo@imgrel(%ebx, %ecx, 4), %eax")
diff --git a/test/tools/llvm-readobj/Inputs/rpath.exe.elf-x86_64 b/test/tools/llvm-readobj/Inputs/rpath.exe.elf-x86_64
new file mode 100644
index 000000000000..8c01c502cf86
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/rpath.exe.elf-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/trivial.exe.coff-i386 b/test/tools/llvm-readobj/Inputs/trivial.exe.coff-i386
new file mode 100644
index 000000000000..1558d2452ecc
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.exe.coff-i386
diff --git a/test/tools/llvm-readobj/dynamic.test b/test/tools/llvm-readobj/dynamic.test
new file mode 100644
index 000000000000..78a9b3bd937e
--- /dev/null
+++ b/test/tools/llvm-readobj/dynamic.test
@@ -0,0 +1,33 @@
+RUN: llvm-readobj -dynamic-table %p/Inputs/dynamic-table.mips \
+RUN:     | FileCheck %s -check-prefix ELF-MIPS
+
+ELF-MIPS: Format: ELF32-mips
+ELF-MIPS: Arch: mipsel
+ELF-MIPS: AddressSize: 32bit
+ELF-MIPS: LoadName:
+ELF-MIPS: DynamicSection [ (23 entries)
+ELF-MIPS:   Tag        Type                 Name/Value
+ELF-MIPS:   0x00000001 NEEDED               SharedLibrary (libc.so.6)
+ELF-MIPS:   0x0000000C INIT                 0x528
+ELF-MIPS:   0x0000000D FINI                 0x860
+ELF-MIPS:   0x00000004 HASH                 0x210
+ELF-MIPS:   0x00000005 STRTAB               0x3D8
+ELF-MIPS:   0x00000006 SYMTAB               0x2A8
+ELF-MIPS:   0x0000000A STRSZ                231 (bytes)
+ELF-MIPS:   0x0000000B SYMENT               16 (bytes)
+ELF-MIPS:   0x00000003 PLTGOT               0x108E0
+ELF-MIPS:   0x00000011 REL                  0x518
+ELF-MIPS:   0x00000012 RELSZ                16 (bytes)
+ELF-MIPS:   0x00000013 RELENT               8 (bytes)
+ELF-MIPS:   0x70000001 MIPS_RLD_VERSION     1
+ELF-MIPS:   0x70000005 MIPS_FLAGS           0x2
+ELF-MIPS:   0x70000006 MIPS_BASE_ADDRESS    0x0
+ELF-MIPS:   0x7000000A MIPS_LOCAL_GOTNO     10
+ELF-MIPS:   0x70000011 MIPS_SYMTABNO        19
+ELF-MIPS:   0x70000012 MIPS_UNREFEXTNO      26
+ELF-MIPS:   0x70000013 MIPS_GOTSYM          0xD
+ELF-MIPS:   0x6FFFFFFE VERNEED              0x4E8
+ELF-MIPS:   0x6FFFFFFF VERNEEDNUM           1
+ELF-MIPS:   0x6FFFFFF0 VERSYM               0x4C0
+ELF-MIPS:   0x00000000 NULL                 0x0
+ELF-MIPS: ]
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
index 226eb9342334..b2b454772d6a 100644
--- a/test/tools/llvm-readobj/file-headers.test
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -2,10 +2,16 @@ RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-i386 \
 RUN:   | FileCheck %s -check-prefix COFF32
 RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-x86-64 \
 RUN:   | FileCheck %s -check-prefix COFF64
+RUN: llvm-readobj -h %p/Inputs/trivial.exe.coff-i386 \
+RUN:   | FileCheck %s -check-prefix PE32
 RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-i386 \
 RUN:   | FileCheck %s -check-prefix ELF32
 RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-x86-64 \
 RUN:   | FileCheck %s -check-prefix ELF64
+RUN: llvm-readobj -h %p/Inputs/magic.coff-unknown \
+RUN:   | FileCheck %s -check-prefix COFF-UNKNOWN
+RUN: llvm-readobj -h %p/Inputs/magic.coff-importlib \
+RUN:   | FileCheck %s -check-prefix COFF-IMPORTLIB
 
 COFF32:      File: {{(.*[/\\])?}}trivial.obj.coff-i386
 COFF32-NEXT: Format: COFF-i386
@@ -98,3 +104,115 @@ ELF64-NEXT:   SectionHeaderEntrySize: 64
 ELF64-NEXT:   SectionHeaderCount: 10
 ELF64-NEXT:   StringTableSectionIndex: 7
 ELF64-NEXT: }
+
+PE32:      File: {{(.*[/\\])?}}trivial.exe.coff-i386
+PE32-NEXT: Format: COFF-i386
+PE32-NEXT: Arch: i386
+PE32-NEXT: AddressSize: 32bit
+PE32-NEXT: ImageFileHeader {
+PE32-NEXT:   Machine: IMAGE_FILE_MACHINE_I386 (0x14C)
+PE32-NEXT:   SectionCount: 3
+PE32-NEXT:   TimeDateStamp: 2013-07-16 00:39:15 (0x51E49633)
+PE32-NEXT:   PointerToSymbolTable: 0x0
+PE32-NEXT:   SymbolCount: 0
+PE32-NEXT:   OptionalHeaderSize: 224
+PE32-NEXT:   Characteristics [ (0x102)
+PE32-NEXT:     IMAGE_FILE_32BIT_MACHINE (0x100)
+PE32-NEXT:     IMAGE_FILE_EXECUTABLE_IMAGE (0x2)
+PE32-NEXT:   ]
+PE32-NEXT: }
+PE32-NEXT: ImageOptionalHeader {
+PE32-NEXT:   MajorLinkerVersion: 11
+PE32-NEXT:   MinorLinkerVersion: 0
+PE32-NEXT:   SizeOfCode: 512
+PE32-NEXT:   SizeOfInitializedData: 1024
+PE32-NEXT:   SizeOfUninitializedData: 0
+PE32-NEXT:   AddressOfEntryPoint: 0x1000
+PE32-NEXT:   BaseOfCode: 0x1000
+PE32-NEXT:   BaseOfData: 0x2000
+PE32-NEXT:   ImageBase: 0x400000
+PE32-NEXT:   SectionAlignment: 4096
+PE32-NEXT:   FileAlignment: 512
+PE32-NEXT:   MajorOperatingSystemVersion: 6
+PE32-NEXT:   MinorOperatingSystemVersion: 0
+PE32-NEXT:   MajorImageVersion: 0
+PE32-NEXT:   MinorImageVersion: 0
+PE32-NEXT:   MajorSubsystemVersion: 6
+PE32-NEXT:   MinorSubsystemVersion: 0
+PE32-NEXT:   SizeOfImage: 16384
+PE32-NEXT:   SizeOfHeaders: 1024
+PE32-NEXT:   Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI (0x3)
+PE32-NEXT:   Subsystem [ (0x8140)
+PE32-NEXT:     IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE (0x40)
+PE32-NEXT:     IMAGE_DLL_CHARACTERISTICS_NX_COMPAT (0x100)
+PE32-NEXT:     IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE (0x8000)
+PE32-NEXT:   ]
+PE32-NEXT:   SizeOfStackReserve: 1048576
+PE32-NEXT:   SizeOfStackCommit: 4096
+PE32-NEXT:   SizeOfHeapReserve: 1048576
+PE32-NEXT:   SizeOfHeapCommit: 4096
+PE32-NEXT:   NumberOfRvaAndSize: 16
+PE32-NEXT:   DataDirectory {
+PE32-NEXT:     ExportTableRVA: 0x0
+PE32-NEXT:     ExportTableSize: 0x0
+PE32-NEXT:     ImportTableRVA: 0x0
+PE32-NEXT:     ImportTableSize: 0x0
+PE32-NEXT:     ResourceTableRVA: 0x0
+PE32-NEXT:     ResourceTableSize: 0x0
+PE32-NEXT:     ExceptionTableRVA: 0x0
+PE32-NEXT:     ExceptionTableSize: 0x0
+PE32-NEXT:     CertificateTableRVA: 0x0
+PE32-NEXT:     CertificateTableSize: 0x0
+PE32-NEXT:     BaseRelocationTableRVA: 0x3000
+PE32-NEXT:     BaseRelocationTableSize: 0xC
+PE32-NEXT:     DebugRVA: 0x0
+PE32-NEXT:     DebugSize: 0x0
+PE32-NEXT:     ArchitectureRVA: 0x0
+PE32-NEXT:     ArchitectureSize: 0x0
+PE32-NEXT:     GlobalPtrRVA: 0x0
+PE32-NEXT:     GlobalPtrSize: 0x0
+PE32-NEXT:     TLSTableRVA: 0x0
+PE32-NEXT:     TLSTableSize: 0x0
+PE32-NEXT:     LoadConfigTableRVA: 0x0
+PE32-NEXT:     LoadConfigTableSize: 0x0
+PE32-NEXT:     BoundImportRVA: 0x0
+PE32-NEXT:     BoundImportSize: 0x0
+PE32-NEXT:     IATRVA: 0x0
+PE32-NEXT:     IATSize: 0x0
+PE32-NEXT:     DelayImportDescriptorRVA: 0x0
+PE32-NEXT:     DelayImportDescriptorSize: 0x0
+PE32-NEXT:     CLRRuntimeHeaderRVA: 0x0
+PE32-NEXT:     CLRRuntimeHeaderSize: 0x0
+PE32-NEXT:     ReservedRVA: 0x0
+PE32-NEXT:     ReservedSize: 0x0
+PE32-NEXT:   }
+PE32-NEXT: }
+
+COFF-UNKNOWN:      Format: COFF-<unknown arch>
+COFF-UNKNOWN-NEXT: Arch: unknown
+COFF-UNKNOWN-NEXT: AddressSize: 32bit
+COFF-UNKNOWN-NEXT: ImageFileHeader {
+COFF-UNKNOWN-NEXT:   Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0)
+COFF-UNKNOWN-NEXT:   SectionCount: 3
+COFF-UNKNOWN-NEXT:   TimeDateStamp: 2013-11-14 21:19:28 (0x52853E60)
+COFF-UNKNOWN-NEXT:   PointerToSymbolTable: 0xF8
+COFF-UNKNOWN-NEXT:   SymbolCount: 11
+COFF-UNKNOWN-NEXT:   OptionalHeaderSize: 0
+COFF-UNKNOWN-NEXT:   Characteristics [ (0x0)
+COFF-UNKNOWN-NEXT:   ]
+COFF-UNKNOWN-NEXT: }
+
+COFF-IMPORTLIB:      Format: COFF-<unknown arch>
+COFF-IMPORTLIB-NEXT: Arch: unknown
+COFF-IMPORTLIB-NEXT: AddressSize: 32bit
+COFF-IMPORTLIB-NEXT: ImageFileHeader {
+COFF-IMPORTLIB-NEXT:   Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0)
+COFF-IMPORTLIB-NEXT:   SectionCount: 65535
+COFF-IMPORTLIB-NEXT:   TimeDateStamp: 1970-09-09 19:52:32 (0x14C0000)
+COFF-IMPORTLIB-NEXT:   PointerToSymbolTable: 0x528542EB
+COFF-IMPORTLIB-NEXT:   SymbolCount: 20
+COFF-IMPORTLIB-NEXT:   OptionalHeaderSize: 0
+COFF-IMPORTLIB-NEXT:   Characteristics [ (0x8)
+COFF-IMPORTLIB-NEXT:     IMAGE_FILE_LOCAL_SYMS_STRIPPED (0x8)
+COFF-IMPORTLIB-NEXT:   ]
+COFF-IMPORTLIB-NEXT: }
diff --git a/test/tools/llvm-readobj/program-headers.test b/test/tools/llvm-readobj/program-headers.test
index 2a574bb2e646..7c22f2b529b1 100644
--- a/test/tools/llvm-readobj/program-headers.test
+++ b/test/tools/llvm-readobj/program-headers.test
@@ -2,6 +2,8 @@ RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-i3
 RUN:     | FileCheck %s -check-prefix ELF-I386
 RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-x86-64 \
 RUN:     | FileCheck %s -check-prefix ELF-X86-64
+RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.mips \
+RUN:     | FileCheck %s -check-prefix ELF-MIPS
 
 ELF-I386:      ProgramHeaders [
 ELF-I386-NEXT:   ProgramHeader {
@@ -72,3 +74,31 @@ ELF-X86-64-NEXT:     ]
 ELF-X86-64-NEXT:     Alignment: 8
 ELF-X86-64-NEXT:   }
 ELF-X86-64-NEXT: ]
+
+ELF-MIPS:      ProgramHeaders [
+ELF-MIPS-NEXT:   ProgramHeader {
+ELF-MIPS-NEXT:     Type: PT_MIPS_REGINFO (0x70000000)
+ELF-MIPS-NEXT:     Offset: 0x74
+ELF-MIPS-NEXT:     VirtualAddress: 0x400074
+ELF-MIPS-NEXT:     PhysicalAddress: 0x400074
+ELF-MIPS-NEXT:     FileSize: 24
+ELF-MIPS-NEXT:     MemSize: 24
+ELF-MIPS-NEXT:     Flags [ (0x4)
+ELF-MIPS-NEXT:       PF_R (0x4)
+ELF-MIPS-NEXT:     ]
+ELF-MIPS-NEXT:     Alignment: 4
+ELF-MIPS-NEXT:   }
+ELF-MIPS-NEXT:   ProgramHeader {
+ELF-MIPS-NEXT:     Type: PT_LOAD (0x1)
+ELF-MIPS-NEXT:     Offset: 0x0
+ELF-MIPS-NEXT:     VirtualAddress: 0x400000
+ELF-MIPS-NEXT:     PhysicalAddress: 0x400000
+ELF-MIPS-NEXT:     FileSize: 160
+ELF-MIPS-NEXT:     MemSize: 160
+ELF-MIPS-NEXT:     Flags [ (0x5)
+ELF-MIPS-NEXT:       PF_R (0x4)
+ELF-MIPS-NEXT:       PF_X (0x1)
+ELF-MIPS-NEXT:     ]
+ELF-MIPS-NEXT:     Alignment: 65536
+ELF-MIPS-NEXT:   }
+ELF-MIPS-NEXT: ]
diff --git a/test/tools/llvm-readobj/reloc-types.test b/test/tools/llvm-readobj/reloc-types.test
index 08603bc7fb55..0c8b54dbc6bf 100644
--- a/test/tools/llvm-readobj/reloc-types.test
+++ b/test/tools/llvm-readobj/reloc-types.test
@@ -460,29 +460,6 @@ ELF-MIPS64EL: Type: R_MIPS_COPY/R_MIPS_COPY/R_MIPS_COPY (8289918)
 ELF-MIPS64EL: Type: R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT (8355711)
 ELF-MIPS64EL: Type: R_MIPS_NUM/R_MIPS_NUM/R_MIPS_NUM (14342874)
 
-ELF-MBLAZE: Type: R_MICROBLAZE_NONE (0)
-ELF-MBLAZE: Type: R_MICROBLAZE_32 (1)
-ELF-MBLAZE: Type: R_MICROBLAZE_32_PCREL (2)
-ELF-MBLAZE: Type: R_MICROBLAZE_64_PCREL (3)
-ELF-MBLAZE: Type: R_MICROBLAZE_32_PCREL_LO (4)
-ELF-MBLAZE: Type: R_MICROBLAZE_64 (5)
-ELF-MBLAZE: Type: R_MICROBLAZE_32_LO (6)
-ELF-MBLAZE: Type: R_MICROBLAZE_SRO32 (7)
-ELF-MBLAZE: Type: R_MICROBLAZE_SRW32 (8)
-ELF-MBLAZE: Type: R_MICROBLAZE_64_NONE (9)
-ELF-MBLAZE: Type: R_MICROBLAZE_32_SYM_OP_SYM (10)
-ELF-MBLAZE: Type: R_MICROBLAZE_GNU_VTINHERIT (11)
-ELF-MBLAZE: Type: R_MICROBLAZE_GNU_VTENTRY (12)
-ELF-MBLAZE: Type: R_MICROBLAZE_GOTPC_64 (13)
-ELF-MBLAZE: Type: R_MICROBLAZE_GOT_64 (14)
-ELF-MBLAZE: Type: R_MICROBLAZE_PLT_64 (15)
-ELF-MBLAZE: Type: R_MICROBLAZE_REL (16)
-ELF-MBLAZE: Type: R_MICROBLAZE_JUMP_SLOT (17)
-ELF-MBLAZE: Type: R_MICROBLAZE_GLOB_DAT (18)
-ELF-MBLAZE: Type: R_MICROBLAZE_GOTOFF_64 (19)
-ELF-MBLAZE: Type: R_MICROBLAZE_GOTOFF_32 (20)
-ELF-MBLAZE: Type: R_MICROBLAZE_COPY (21)
-
 ELF-HEXAGON: Type: R_HEX_NONE (0)
 ELF-HEXAGON: Type: R_HEX_B22_PCREL (1)
 ELF-HEXAGON: Type: R_HEX_B15_PCREL (2)
diff --git a/test/tools/llvm-readobj/relocations.test b/test/tools/llvm-readobj/relocations.test
index dec7f862982c..3a87ff548e39 100644
--- a/test/tools/llvm-readobj/relocations.test
+++ b/test/tools/llvm-readobj/relocations.test
@@ -22,7 +22,7 @@ COFF-NEXT:   }
 COFF-NEXT: ]
 
 ELF:      Relocations [
-ELF-NEXT:   Section (1) .text {
+ELF-NEXT:   Section (2) .rel.text {
 ELF-NEXT:     0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
 ELF-NEXT:     0x12 R_386_GOTOFF .L.str 0x0
 ELF-NEXT:     0x1A R_386_PLT32 puts 0x0
diff --git a/test/tools/llvm-readobj/rpath.test b/test/tools/llvm-readobj/rpath.test
new file mode 100644
index 000000000000..600938ee9385
--- /dev/null
+++ b/test/tools/llvm-readobj/rpath.test
@@ -0,0 +1,4 @@
+RUN: llvm-readobj -dynamic-table %p/Inputs/rpath.exe.elf-x86_64 \
+RUN:   | FileCheck %s
+
+CHECK: 0x000000000000000F RPATH /usr/local/lib
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
index 327f040854f9..e3a40c3eb192 100644
--- a/test/tools/llvm-readobj/sections-ext.test
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -108,10 +108,6 @@ ELF-NEXT:     Info: 0
 ELF-NEXT:     AddressAlignment: 16
 ELF-NEXT:     EntrySize: 0
 ELF-NEXT:     Relocations [
-ELF-NEXT:       0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
-ELF-NEXT:       0x12 R_386_GOTOFF .L.str 0x0
-ELF-NEXT:       0x1A R_386_PLT32 puts 0x0
-ELF-NEXT:       0x1F R_386_PLT32 SomeOtherFunction 0x0
 ELF-NEXT:     ]
 ELF-NEXT:     Symbols [
 ELF-NEXT:       Symbol {
@@ -139,6 +135,32 @@ ELF-NEXT:       0010: 8D830000 00008904 24E8FCFF FFFFE8FC  |........$.......|
 ELF-NEXT:       0020: FFFFFF31 C083C408 5BC3               |...1....[.|
 ELF-NEXT:     )
 ELF-NEXT:   }
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 2
+ELF-NEXT:     Name: .rel.text (1)
+ELF-NEXT:     Type: SHT_REL (0x9)
+ELF-NEXT:     Flags [ (0x0)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x360
+ELF-NEXT:     Size: 32
+ELF-NEXT:     Link: 8
+ELF-NEXT:     Info: 1
+ELF-NEXT:     AddressAlignment: 4
+ELF-NEXT:     EntrySize: 8
+ELF-NEXT:     Relocations [
+ELF-NEXT:       0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
+ELF-NEXT:       0x12 R_386_GOTOFF .L.str 0x0
+ELF-NEXT:       0x1A R_386_PLT32 puts 0x0
+ELF-NEXT:       0x1F R_386_PLT32 SomeOtherFunction 0x0
+ELF-NEXT:     ]
+ELF-NEXT:     Symbols [
+ELF-NEXT:     ]
+ELF-NEXT:     SectionData (
+ELF-NEXT:       0000: 0C000000 0A0A0000 12000000 09020000  |................|
+ELF-NEXT:       0010: 1A000000 040B0000 1F000000 04090000  |................|
+ELF-NEXT:     )
+ELF-NEXT:   }
 
 MACHO-I386:      Sections [
 MACHO-I386-NEXT:   Section {
diff --git a/test/tools/llvm-readobj/symbols.test b/test/tools/llvm-readobj/symbols.test
index d33bd8ed2cd0..e014377e586f 100644
--- a/test/tools/llvm-readobj/symbols.test
+++ b/test/tools/llvm-readobj/symbols.test
@@ -25,6 +25,15 @@ COFF-NEXT:   }
 
 ELF:      Symbols [
 ELF-NEXT:   Symbol {
+ELF-NEXT:     Name:  (0)
+ELF-NEXT:     Value: 0x0
+ELF-NEXT:     Size: 0
+ELF-NEXT:     Binding: Local (0x0)
+ELF-NEXT:     Type: None (0x0)
+ELF-NEXT:     Other: 0
+ELF-NEXT:     Section:  (0x0)
+ELF-NEXT:   }
+ELF-NEXT:   Symbol {
 ELF-NEXT:     Name: trivial.ll (1)
 ELF-NEXT:     Value: 0x0
 ELF-NEXT:     Size: 0